aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-18 20:30:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-06 20:11:55 +0000
commit5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch)
tree1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib
parent3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff)
parent312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff)
downloadsrc-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.tar.gz
src-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.zip
Merge llvm-project main llvmorg-18-init-15088-gd14ee76181fb
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvm-project main llvmorg-18-init-15088-gd14ee76181fb. PR: 276104 MFC after: 1 month
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp123
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp141
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp189
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DomConditionCache.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InlineOrder.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp802
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp441
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Lint.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Local.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp285
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp376
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/StructuralHash.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TFLiteUtils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TrainingLogger.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/UniformityAnalysis.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp231
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp2010
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp259
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/DXContainer.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/ELF.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/Magic.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp156
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp230
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp219
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp255
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp203
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp373
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp252
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h263
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp245
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp325
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp143
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp436
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp219
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp133
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCEmptyBasicBlocks.cpp100
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp113
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp659
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp134
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp622
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp100
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp (renamed from contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp)6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h (renamed from contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h)0
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp (renamed from contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp)0
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp210
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp149
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp340
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp159
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp351
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp246
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp123
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp156
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp573
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp322
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp173
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp182
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1364
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp147
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp338
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp146
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp894
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp692
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h86
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp624
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp489
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp195
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp134
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp112
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp323
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.cpp295
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.h70
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/ArrayList.h165
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.cpp655
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.h184
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEGenerator.h180
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp166
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h221
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFFile.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.cpp1879
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h644
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerGlobalData.h159
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp1440
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h313
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.cpp391
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.h138
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.cpp250
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h223
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DebugLineSectionEmitter.h384
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DependencyTracker.cpp839
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DependencyTracker.h272
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/IndexedValuesMap.h49
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.cpp526
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.h505
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/StringEntryToDwarfStringPoolEntryMap.h72
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.cpp767
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.h155
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/TypePool.h177
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinkerParallel/Utils.h40
-rw-r--r--contrib/llvm-project/llvm/lib/DWP/DWP.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFContext.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFParser.cpp620
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp88
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp1148
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp247
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp220
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp130
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVCompare.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Debuginfod/HTTPClient.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Debuginfod/HTTPServer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFF.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp145
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h22
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h31
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp209
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h25
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp390
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/riscv.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp185
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp472
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupport.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp423
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/LLJITUtilsCBindings.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.cpp303
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp191
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp615
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp94
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp457
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp219
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h28
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h6
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h20
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h4
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h24
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h9
-rw-r--r--contrib/llvm-project/llvm/lib/FileCheck/FileCheck.cpp382
-rw-r--r--contrib/llvm-project/llvm/lib/FileCheck/FileCheckImpl.h64
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/Driver/CodeGenOptions.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/Offloading/Utility.cpp110
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp1860
-rw-r--r--contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp231
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Attributes.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp2229
-rw-r--r--contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp650
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp618
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp135
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Constants.cpp269
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConstantsContext.h2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConvergenceVerifier.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Core.cpp323
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DataLayout.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp189
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DebugProgramInstruction.cpp388
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Dominators.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/IR/EHPersonalities.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Function.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Globals.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp165
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Instruction.cpp236
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Instructions.cpp392
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h79
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Mangler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Metadata.cpp130
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Module.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Operator.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Pass.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PassManager.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PrintPasses.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ProfDataUtils.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ReplaceConstant.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/IR/SSAContext.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/IR/SymbolTableListTraitsImpl.h20
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Type.cpp117
-rw-r--r--contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Value.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Verifier.cpp414
-rw-r--r--contrib/llvm-project/llvm/lib/IRPrinter/IRPrintingPasses.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/InterfaceStub/IFSHandler.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTO.cpp179
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Linker/IRMover.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/MC/DXContainerPSVInfo.cpp181
-rw-r--r--contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/MC/GOFFObjectWriter.cpp297
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCCodeEmitter.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCContext.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCDXContainerWriter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCELFObjectTargetWriter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCExpr.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCGOFFStreamer.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/MC/SPIRVObjectWriter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/MC/TargetRegistry.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Support.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/Archive.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/CommonConfig.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/ConfigManager.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.h5
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.h4
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOReader.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Object/Archive.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp286
-rw-r--r--contrib/llvm-project/llvm/lib/Object/Binary.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Object/COFFImportFile.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Object/COFFModuleDefinition.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Object/DXContainer.cpp195
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ELF.cpp151
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Object/IRObjectFile.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Object/IRSymtab.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Object/MachOUniversalWriter.cpp171
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Object/OffloadBinary.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Object/RecordStreamer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Object/RecordStreamer.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Object/TapiFile.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Object/WindowsResource.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/COFFYAML.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerEmitter.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerYAML.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/GOFFEmitter.cpp282
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/GOFFYAML.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/OffloadEmitter.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFYAML.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Option/Arg.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Option/ArgList.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Option/OptTable.cpp178
-rw-r--r--contrib/llvm-project/llvm/lib/Option/Option.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp221
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassRegistry.def656
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp361
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp533
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp333
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp344
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProfCorrelator.cpp278
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp313
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/ItaniumManglingCanonicalizer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/MemProf.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp112
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp249
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/SymbolRemappingReader.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Remarks/Remark.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Remarks/YAMLRemarkParser.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Support/APFloat.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Support/AutoConvert.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/Support/BLAKE3/blake3_impl.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Support/BalancedPartitioning.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Support/BinaryStreamRef.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Support/BinaryStreamWriter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/BlockFrequency.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CachePruning.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Caching.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Chrono.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CodeGenCoverage.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CommandLine.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/Support/DataExtractor.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ELFAttributes.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Support/FloatingPointMode.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp273
-rw-r--r--contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Support/KnownBits.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Support/PGOOptions.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Path.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp162
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Regex.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Support/SHA1.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/SHA256.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Signals.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp198
-rw-r--r--contrib/llvm-project/llvm/lib/Support/StringExtras.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Support/StringRef.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Support/StringSaver.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Twine.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepoint.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp41701
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc3
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Path.inc21
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Process.inc4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Program.inc18
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc123
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Path.inc58
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Process.inc2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc5
-rw-r--r--contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp447
-rw-r--r--contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp172
-rw-r--r--contrib/llvm-project/llvm/lib/Support/regcomp.c32
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/Error.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/Main.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/Record.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp147
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGLexer.h90
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp385
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGParser.h9
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TableGenBackend.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TableGenBackendSkeleton.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td298
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp252
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp184
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp818
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp480
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp1994
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h98
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td361
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td194
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp1772
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td394
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp367
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp363
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.h116
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td323
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td463
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td389
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA510.td118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td90
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td110
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td164
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td133
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td66
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td100
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td130
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td100
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td192
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td102
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp115
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp232
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp275
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp1215
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp616
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp328
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp213
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp283
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp174
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp185
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SMEABIPass.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td673
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td469
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h176
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td258
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp135
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp233
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp160
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp355
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp548
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h102
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp186
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp298
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp257
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp336
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp122
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp646
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp885
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp1234
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp138
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp107
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp85
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp556
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp180
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp734
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td1339
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td385
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp324
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/EXPInstructions.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td386
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp320
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h101
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h173
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp192
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td759
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h122
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp94
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp310
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp274
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp2070
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp220
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td84
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp1301
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h157
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td211
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td235
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp388
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.h97
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp136
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp141
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp195
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td158
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td157
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td1148
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp302
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h80
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VINTERPInstructions.td38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td469
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td691
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td404
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td380
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td597
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPDInstructions.td141
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td414
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARC.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp222
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp88
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp169
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMMacroFusion.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSLSHardening.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM55.td30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM85.td981
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp147
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp242
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp268
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVR.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPF.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPF.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp200
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td277
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp371
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp680
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFCallLowering.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFCallLowering.h39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFInstructionSelector.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFLegalizerInfo.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFLegalizerInfo.h28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBankInfo.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBankInfo.h39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBanks.td15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKY.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.td52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td66
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td66
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/CBufferDataLayout.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/DXILOpBuilder.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/DXILPrepare.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td1118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td484
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td85
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAluCode.h28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArch.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp1770
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td206
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td936
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td1057
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.h22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrAtomics.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrBits.td160
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrControl.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kRegisterInfo.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsCombine.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp193
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp680
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td403
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td363
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp101
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp266
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp300
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h261
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrP10.td240
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp277
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp365
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP10.td66
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP7.td661
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP8.td716
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp446
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp1177
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp439
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp173
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp169
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp442
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp136
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td258
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp216
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp146
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td159
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp484
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp4756
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp473
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp458
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp981
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td371
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td137
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td65
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td90
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td824
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td2114
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td140
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td294
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td665
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td256
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td329
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td260
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td135
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td710
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp125
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp101
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp116
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td81
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp157
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td542
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h90
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp218
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp284
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRV.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp127
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp185
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp133
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td66
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp517
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcASITags.td54
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td116
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td85
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td213
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td497
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp310
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp (renamed from contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp)27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGOFFObjectWriter.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp253
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp108
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp1004
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td271
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/ZOSLibcallNames.def100
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp199
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.h92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h90
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VVPISelLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WasmAddressSpaces.h48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp (renamed from contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp)2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h (renamed from contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h)0
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp149
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h505
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/GISel/X86CallLowering.cpp (renamed from contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp)10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/GISel/X86CallLowering.h (renamed from contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h)0
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp (renamed from contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp)48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (renamed from contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp)18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (renamed from contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.h)0
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp (renamed from contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.cpp)2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h (renamed from contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.h)3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h2412
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp275
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp324
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86.td80
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td84
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp175
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupVectorConstants.cpp157
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp576
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp406
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp4817
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp2950
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Instr3DNow.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td432
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td108
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td155
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td123
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp456
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td728
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragments.td841
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td117
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp4600
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td1395
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td211
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrOperands.td497
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td207
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td1015
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXType.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PreAMXConfig.cpp415
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td254
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ReplaceableInstrs.def426
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedAlderlakeP.td294
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td286
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td308
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedIceLake.td448
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td214
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedSapphireRapids.td706
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td300
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td448
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td136
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td136
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver3.td138
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver4.td213
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCore.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaELFObjectWriter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.h4
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/AArch64TargetParser.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/CSKYTargetParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/Host.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/RISCVTargetParser.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/SubtargetFeature.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp162
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp217
-rw-r--r--contrib/llvm-project/llvm/lib/Testing/Support/Error.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/InterfaceFile.cpp280
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/PackedVersion.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/Platform.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp224
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/Symbol.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/SymbolSet.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/Target.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/TextAPIError.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/TextStub.cpp136
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/TextStubCommon.h6
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/CFGuard/CFGuard.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp328
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h48
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp145
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp312
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp396
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp1719
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp391
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp441
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp112
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp936
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp576
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp129
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp274
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp634
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp256
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp140
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp1145
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h91
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp210
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp499
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp133
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp354
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp257
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp632
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp153
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp110
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp328
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp758
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp85
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp611
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp101
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp543
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp88
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp189
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp397
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp1139
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAlignment.cpp91
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp175
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp173
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp395
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp198
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp133
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp440
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp293
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp157
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp171
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp123
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeLayout.cpp882
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CtorUtils.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp94
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp305
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp790
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopConstrainer.cpp904
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp234
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp125
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp210
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MoveAutoInit.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SanitizerStats.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp390
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp618
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp187
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h62
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2062
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp4231
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp230
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h585
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp237
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h61
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp257
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp575
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp484
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h64
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp270
-rw-r--r--contrib/llvm-project/llvm/lib/XRay/BlockVerifier.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/XRay/FDRTraceWriter.cpp2
1928 files changed, 171933 insertions, 91521 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
index 7b2f91f5392a..da18279ae9b9 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -896,7 +896,7 @@ bool llvm::isNotVisibleOnUnwind(const Value *Object,
// Byval goes out of scope on unwind.
if (auto *A = dyn_cast<Argument>(Object))
- return A->hasByValAttr();
+ return A->hasByValAttr() || A->hasAttribute(Attribute::DeadOnUnwind);
// A noalias return is not accessible from any other code. If the pointer
// does not escape prior to the unwind, then the caller cannot access the
@@ -908,3 +908,28 @@ bool llvm::isNotVisibleOnUnwind(const Value *Object,
return false;
}
+
+// We don't consider globals as writable: While the physical memory is writable,
+// we may not have provenance to perform the write.
+bool llvm::isWritableObject(const Value *Object,
+ bool &ExplicitlyDereferenceableOnly) {
+ ExplicitlyDereferenceableOnly = false;
+
+ // TODO: Alloca might not be writable after its lifetime ends.
+ // See https://github.com/llvm/llvm-project/issues/51838.
+ if (isa<AllocaInst>(Object))
+ return true;
+
+ if (auto *A = dyn_cast<Argument>(Object)) {
+ if (A->hasAttribute(Attribute::Writable)) {
+ ExplicitlyDereferenceableOnly = true;
+ return true;
+ }
+
+ return A->hasByValAttr();
+ }
+
+ // TODO: Noalias shouldn't imply writability, this should check for an
+ // allocator function instead.
+ return isNoAliasCall(Object);
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index a551ea6b69c5..f4b4d8888a54 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -14,8 +14,6 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -319,45 +317,3 @@ AAEvaluator::~AAEvaluator() {
<< "%/" << ModRefCount * 100 / ModRefSum << "%\n";
}
}
-
-namespace llvm {
-class AAEvalLegacyPass : public FunctionPass {
- std::unique_ptr<AAEvaluator> P;
-
-public:
- static char ID; // Pass identification, replacement for typeid
- AAEvalLegacyPass() : FunctionPass(ID) {
- initializeAAEvalLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AAResultsWrapperPass>();
- AU.setPreservesAll();
- }
-
- bool doInitialization(Module &M) override {
- P.reset(new AAEvaluator());
- return false;
- }
-
- bool runOnFunction(Function &F) override {
- P->runInternal(F, getAnalysis<AAResultsWrapperPass>().getAAResults());
- return false;
- }
- bool doFinalization(Module &M) override {
- P.reset();
- return false;
- }
-};
-}
-
-char AAEvalLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(AAEvalLegacyPass, "aa-eval",
- "Exhaustive Alias Analysis Precision Evaluator", false,
- true)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(AAEvalLegacyPass, "aa-eval",
- "Exhaustive Alias Analysis Precision Evaluator", false,
- true)
-
-FunctionPass *llvm::createAAEvalPass() { return new AAEvalLegacyPass(); }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
index 91b889116dfa..99d20c7bef3b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -348,8 +348,16 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
// due to a quirk of alias analysis behavior. Since alias(undef, undef)
// is NoAlias, mergeAliasSetsForPointer(undef, ...) will not find the
// the right set for undef, even if it exists.
- if (Entry.updateSizeAndAAInfo(Size, AAInfo))
+ if (Entry.updateSizeAndAAInfo(Size, AAInfo)) {
mergeAliasSetsForPointer(Pointer, Size, AAInfo, MustAliasAll);
+
+ // For MustAlias sets, also update Size/AAInfo of the representative
+ // pointer.
+ AliasSet &AS = *Entry.getAliasSet(*this);
+ if (AS.isMustAlias())
+ if (AliasSet::PointerRec *P = AS.getSomePointer())
+ P->updateSizeAndAAInfo(Size, AAInfo);
+ }
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
}
@@ -367,9 +375,8 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
return AliasSets.back();
}
-void AliasSetTracker::add(Value *Ptr, LocationSize Size,
- const AAMDNodes &AAInfo) {
- addPointer(MemoryLocation(Ptr, Size, AAInfo), AliasSet::NoAccess);
+void AliasSetTracker::add(const MemoryLocation &Loc) {
+ addPointer(Loc, AliasSet::NoAccess);
}
void AliasSetTracker::add(LoadInst *LI) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp
index 5461ce07af0b..44d2ff18a694 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp
@@ -18,22 +18,14 @@ using namespace llvm;
/// initializeAnalysis - Initialize all passes linked into the Analysis library.
void llvm::initializeAnalysis(PassRegistry &Registry) {
- initializeAAEvalLegacyPassPass(Registry);
initializeBasicAAWrapperPassPass(Registry);
initializeBlockFrequencyInfoWrapperPassPass(Registry);
initializeBranchProbabilityInfoWrapperPassPass(Registry);
initializeCallGraphWrapperPassPass(Registry);
initializeCallGraphDOTPrinterPass(Registry);
- initializeCallGraphPrinterLegacyPassPass(Registry);
initializeCallGraphViewerPass(Registry);
- initializeCostModelAnalysisPass(Registry);
- initializeCFGViewerLegacyPassPass(Registry);
- initializeCFGPrinterLegacyPassPass(Registry);
- initializeCFGOnlyViewerLegacyPassPass(Registry);
- initializeCFGOnlyPrinterLegacyPassPass(Registry);
initializeCycleInfoWrapperPassPass(Registry);
initializeDependenceAnalysisWrapperPassPass(Registry);
- initializeDelinearizationPass(Registry);
initializeDominanceFrontierWrapperPassPass(Registry);
initializeDomViewerWrapperPassPass(Registry);
initializeDomPrinterWrapperPassPass(Registry);
@@ -46,13 +38,11 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeAAResultsWrapperPassPass(Registry);
initializeGlobalsAAWrapperPassPass(Registry);
initializeIVUsersWrapperPassPass(Registry);
- initializeInstCountLegacyPassPass(Registry);
initializeIntervalPartitionPass(Registry);
initializeIRSimilarityIdentifierWrapperPassPass(Registry);
initializeLazyBranchProbabilityInfoPassPass(Registry);
initializeLazyBlockFrequencyInfoPassPass(Registry);
initializeLazyValueInfoWrapperPassPass(Registry);
- initializeLazyValueInfoPrinterPass(Registry);
initializeLoopInfoWrapperPassPass(Registry);
initializeMemoryDependenceWrapperPassPass(Registry);
initializeModuleSummaryIndexWrapperPassPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp b/contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp
index 7440dbd29ccf..21530693c5f1 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -99,6 +99,9 @@ RetainedKnowledge
llvm::getKnowledgeFromBundle(AssumeInst &Assume,
const CallBase::BundleOpInfo &BOI) {
RetainedKnowledge Result;
+ if (!DebugCounter::shouldExecute(AssumeQueryCounter))
+ return Result;
+
Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey());
if (bundleHasArgument(BOI, ABA_WasOn))
Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn);
@@ -122,7 +125,7 @@ RetainedKnowledge llvm::getKnowledgeFromOperandInAssume(AssumeInst &Assume,
return getKnowledgeFromBundle(Assume, BOI);
}
-bool llvm::isAssumeWithEmptyBundle(AssumeInst &Assume) {
+bool llvm::isAssumeWithEmptyBundle(const AssumeInst &Assume) {
return none_of(Assume.bundle_op_infos(),
[](const CallBase::BundleOpInfo &BOI) {
return BOI.Tag->getKey() != IgnoreBundleTag;
@@ -158,8 +161,6 @@ llvm::getKnowledgeForValue(const Value *V,
const CallBase::BundleOpInfo *)>
Filter) {
NumAssumeQueries++;
- if (!DebugCounter::shouldExecute(AssumeQueryCounter))
- return RetainedKnowledge::none();
if (AC) {
for (AssumptionCache::ResultElem &Elem : AC->assumptionsFor(V)) {
auto *II = cast_or_null<AssumeInst>(Elem.Assume);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
index b439dc1e6a76..fb3a6f8de2d6 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
@@ -62,15 +62,14 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
auto AddAffected = [&Affected](Value *V, unsigned Idx =
AssumptionCache::ExprResultIdx) {
- if (isa<Argument>(V)) {
+ if (isa<Argument>(V) || isa<GlobalValue>(V)) {
Affected.push_back({V, Idx});
} else if (auto *I = dyn_cast<Instruction>(V)) {
Affected.push_back({I, Idx});
// Peek through unary operators to find the source of the condition.
Value *Op;
- if (match(I, m_BitCast(m_Value(Op))) ||
- match(I, m_PtrToInt(m_Value(Op))) || match(I, m_Not(m_Value(Op)))) {
+ if (match(I, m_PtrToInt(m_Value(Op)))) {
if (isa<Instruction>(Op) || isa<Argument>(Op))
Affected.push_back({Op, Idx});
}
@@ -85,6 +84,8 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
Value *Cond = CI->getArgOperand(0), *A, *B;
AddAffected(Cond);
+ if (match(Cond, m_Not(m_Value(A))))
+ AddAffected(A);
CmpInst::Predicate Pred;
if (match(Cond, m_Cmp(Pred, m_Value(A), m_Value(B)))) {
@@ -92,35 +93,19 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
AddAffected(B);
if (Pred == ICmpInst::ICMP_EQ) {
- // For equality comparisons, we handle the case of bit inversion.
- auto AddAffectedFromEq = [&AddAffected](Value *V) {
- Value *A;
- if (match(V, m_Not(m_Value(A)))) {
- AddAffected(A);
- V = A;
- }
-
- Value *B;
- // (A & B) or (A | B) or (A ^ B).
- if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) {
- AddAffected(A);
- AddAffected(B);
- // (A << C) or (A >>_s C) or (A >>_u C) where C is some constant.
- } else if (match(V, m_Shift(m_Value(A), m_ConstantInt()))) {
- AddAffected(A);
- }
- };
-
- AddAffectedFromEq(A);
- AddAffectedFromEq(B);
+ if (match(B, m_ConstantInt())) {
+ Value *X;
+ // (X & C) or (X | C) or (X ^ C).
+ // (X << C) or (X >>_s C) or (X >>_u C).
+ if (match(A, m_BitwiseLogic(m_Value(X), m_ConstantInt())) ||
+ match(A, m_Shift(m_Value(X), m_ConstantInt())))
+ AddAffected(X);
+ }
} else if (Pred == ICmpInst::ICMP_NE) {
- Value *X, *Y;
- // Handle (a & b != 0). If a/b is a power of 2 we can use this
- // information.
- if (match(A, m_And(m_Value(X), m_Value(Y))) && match(B, m_Zero())) {
+ Value *X;
+ // Handle (X & pow2 != 0).
+ if (match(A, m_And(m_Value(X), m_Power2())) && match(B, m_Zero()))
AddAffected(X);
- AddAffected(Y);
- }
} else if (Pred == ICmpInst::ICMP_ULT) {
Value *X;
// Handle (A + C1) u< C2, which is the canonical form of A > C3 && A < C4,
@@ -188,7 +173,7 @@ void AssumptionCache::unregisterAssumption(AssumeInst *CI) {
AffectedValues.erase(AVI);
}
- erase_value(AssumeHandles, CI);
+ llvm::erase(AssumeHandles, CI);
}
void AssumptionCache::AffectedValueCallbackVH::deleted() {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 16e0e1f66524..3de147368f23 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -101,22 +101,23 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
//===----------------------------------------------------------------------===//
/// Returns the size of the object specified by V or UnknownSize if unknown.
-static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo &TLI,
- bool NullIsValidLoc,
- bool RoundToAlign = false) {
+static std::optional<TypeSize> getObjectSize(const Value *V,
+ const DataLayout &DL,
+ const TargetLibraryInfo &TLI,
+ bool NullIsValidLoc,
+ bool RoundToAlign = false) {
uint64_t Size;
ObjectSizeOpts Opts;
Opts.RoundToAlign = RoundToAlign;
Opts.NullIsUnknownSize = NullIsValidLoc;
if (getObjectSize(V, Size, DL, &TLI, Opts))
- return Size;
- return MemoryLocation::UnknownSize;
+ return TypeSize::getFixed(Size);
+ return std::nullopt;
}
/// Returns true if we can prove that the object specified by V is smaller than
/// Size.
-static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+static bool isObjectSmallerThan(const Value *V, TypeSize Size,
const DataLayout &DL,
const TargetLibraryInfo &TLI,
bool NullIsValidLoc) {
@@ -151,16 +152,16 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
- uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc,
- /*RoundToAlign*/ true);
+ std::optional<TypeSize> ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc,
+ /*RoundToAlign*/ true);
- return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
+ return ObjectSize && TypeSize::isKnownLT(*ObjectSize, Size);
}
/// Return the minimal extent from \p V to the end of the underlying object,
/// assuming the result is used in an aliasing query. E.g., we do use the query
/// location size and the fact that null pointers cannot alias here.
-static uint64_t getMinimalExtentFrom(const Value &V,
+static TypeSize getMinimalExtentFrom(const Value &V,
const LocationSize &LocSize,
const DataLayout &DL,
bool NullIsValidLoc) {
@@ -175,15 +176,16 @@ static uint64_t getMinimalExtentFrom(const Value &V,
// If queried with a precise location size, we assume that location size to be
// accessed, thus valid.
if (LocSize.isPrecise())
- DerefBytes = std::max(DerefBytes, LocSize.getValue());
- return DerefBytes;
+ DerefBytes = std::max(DerefBytes, LocSize.getValue().getKnownMinValue());
+ return TypeSize::getFixed(DerefBytes);
}
/// Returns true if we can prove that the object specified by V has size Size.
-static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
+static bool isObjectSize(const Value *V, TypeSize Size, const DataLayout &DL,
const TargetLibraryInfo &TLI, bool NullIsValidLoc) {
- uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc);
- return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
+ std::optional<TypeSize> ObjectSize =
+ getObjectSize(V, DL, TLI, NullIsValidLoc);
+ return ObjectSize && *ObjectSize == Size;
}
//===----------------------------------------------------------------------===//
@@ -192,13 +194,21 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
CaptureInfo::~CaptureInfo() = default;
-bool SimpleCaptureInfo::isNotCapturedBeforeOrAt(const Value *Object,
- const Instruction *I) {
+bool SimpleCaptureInfo::isNotCapturedBefore(const Value *Object,
+ const Instruction *I, bool OrAt) {
return isNonEscapingLocalObject(Object, &IsCapturedCache);
}
-bool EarliestEscapeInfo::isNotCapturedBeforeOrAt(const Value *Object,
- const Instruction *I) {
+static bool isNotInCycle(const Instruction *I, const DominatorTree *DT,
+ const LoopInfo *LI) {
+ BasicBlock *BB = const_cast<BasicBlock *>(I->getParent());
+ SmallVector<BasicBlock *> Succs(successors(BB));
+ return Succs.empty() ||
+ !isPotentiallyReachableFromMany(Succs, BB, nullptr, DT, LI);
+}
+
+bool EarliestEscapeInfo::isNotCapturedBefore(const Value *Object,
+ const Instruction *I, bool OrAt) {
if (!isIdentifiedFunctionLocal(Object))
return false;
@@ -206,7 +216,7 @@ bool EarliestEscapeInfo::isNotCapturedBeforeOrAt(const Value *Object,
if (Iter.second) {
Instruction *EarliestCapture = FindEarliestCapture(
Object, *const_cast<Function *>(I->getFunction()),
- /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT, EphValues);
+ /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT);
if (EarliestCapture) {
auto Ins = Inst2Obj.insert({EarliestCapture, {}});
Ins.first->second.push_back(Object);
@@ -218,8 +228,13 @@ bool EarliestEscapeInfo::isNotCapturedBeforeOrAt(const Value *Object,
if (!Iter.first->second)
return true;
- return I != Iter.first->second &&
- !isPotentiallyReachable(Iter.first->second, I, nullptr, &DT, &LI);
+ if (I == Iter.first->second) {
+ if (OrAt)
+ return false;
+ return isNotInCycle(I, &DT, LI);
+ }
+
+ return !isPotentiallyReachable(Iter.first->second, I, nullptr, &DT, LI);
}
void EarliestEscapeInfo::removeInstruction(Instruction *I) {
@@ -380,8 +395,8 @@ static LinearExpression GetLinearExpression(
case Instruction::Or:
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
// analyze it.
- if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC,
- BOp, DT))
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(),
+ SimplifyQuery(DL, DT, AC, BOp)))
return Val;
[[fallthrough]];
@@ -442,10 +457,13 @@ static LinearExpression GetLinearExpression(
/// an issue, for example, in particular for 32b pointers with negative indices
/// that rely on two's complement wrap-arounds for precise alias information
/// where the maximum index size is 64b.
-static APInt adjustToIndexSize(const APInt &Offset, unsigned IndexSize) {
+static void adjustToIndexSize(APInt &Offset, unsigned IndexSize) {
assert(IndexSize <= Offset.getBitWidth() && "Invalid IndexSize!");
unsigned ShiftBits = Offset.getBitWidth() - IndexSize;
- return (Offset << ShiftBits).ashr(ShiftBits);
+ if (ShiftBits != 0) {
+ Offset <<= ShiftBits;
+ Offset.ashrInPlace(ShiftBits);
+ }
}
namespace {
@@ -662,6 +680,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
Scale += Decomposed.VarIndices[i].Scale;
+ LE.IsNSW = false; // We cannot guarantee nsw for the merge.
Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
break;
}
@@ -669,7 +688,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
// Make sure that we have a scale that makes sense for this target's
// index size.
- Scale = adjustToIndexSize(Scale, IndexSize);
+ adjustToIndexSize(Scale, IndexSize);
if (!!Scale) {
VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
@@ -680,7 +699,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
// Take care of wrap-arounds
if (GepHasConstantOffset)
- Decomposed.Offset = adjustToIndexSize(Decomposed.Offset, IndexSize);
+ adjustToIndexSize(Decomposed.Offset, IndexSize);
// Analyze the base pointer next.
V = GEPOp->getOperand(0);
@@ -731,7 +750,7 @@ ModRefInfo BasicAAResult::getModRefInfoMask(const MemoryLocation &Loc,
// global to be marked constant in some modules and non-constant in
// others. GV may even be a declaration, not a definition.
if (!GV->isConstant())
- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+ return ModRefInfo::ModRef;
continue;
}
@@ -747,18 +766,18 @@ ModRefInfo BasicAAResult::getModRefInfoMask(const MemoryLocation &Loc,
if (const PHINode *PN = dyn_cast<PHINode>(V)) {
// Don't bother inspecting phi nodes with many operands.
if (PN->getNumIncomingValues() > MaxLookup)
- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+ return ModRefInfo::ModRef;
append_range(Worklist, PN->incoming_values());
continue;
}
// Otherwise be conservative.
- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+ return ModRefInfo::ModRef;
} while (!Worklist.empty() && --MaxLookup);
// If we hit the maximum number of instructions to examine, be conservative.
if (!Worklist.empty())
- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+ return ModRefInfo::ModRef;
return Result;
}
@@ -813,7 +832,7 @@ ModRefInfo BasicAAResult::getArgModRefInfo(const CallBase *Call,
if (Call->paramHasAttr(ArgIdx, Attribute::ReadNone))
return ModRefInfo::NoModRef;
- return AAResultBase::getArgModRefInfo(Call, ArgIdx);
+ return ModRefInfo::ModRef;
}
#ifndef NDEBUG
@@ -884,7 +903,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// Make sure the object has not escaped here, and then check that none of the
// call arguments alias the object below.
if (!isa<Constant>(Object) && Call != Object &&
- AAQI.CI->isNotCapturedBeforeOrAt(Object, Call)) {
+ AAQI.CI->isNotCapturedBefore(Object, Call, /*OrAt*/ false)) {
// Optimistically assume that call doesn't touch Object and check this
// assumption in the following loop.
@@ -972,8 +991,8 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
if (isIntrinsicCall(Call, Intrinsic::invariant_start))
return ModRefInfo::Ref;
- // The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(Call, Loc, AAQI);
+ // Be conservative.
+ return ModRefInfo::ModRef;
}
ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
@@ -1000,8 +1019,8 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
? ModRefInfo::Mod
: ModRefInfo::NoModRef;
- // The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
+ // Be conservative.
+ return ModRefInfo::ModRef;
}
/// Return true if we know V to the base address of the corresponding memory
@@ -1055,15 +1074,19 @@ AliasResult BasicAAResult::aliasGEP(
// If an inbounds GEP would have to start from an out of bounds address
// for the two to alias, then we can assume noalias.
+ // TODO: Remove !isScalable() once BasicAA fully support scalable location
+ // size
if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
- V2Size.hasValue() && DecompGEP1.Offset.sge(V2Size.getValue()) &&
+ V2Size.hasValue() && !V2Size.isScalable() &&
+ DecompGEP1.Offset.sge(V2Size.getValue()) &&
isBaseOfObject(DecompGEP2.Base))
return AliasResult::NoAlias;
if (isa<GEPOperator>(V2)) {
// Symmetric case to above.
if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
- V1Size.hasValue() && DecompGEP1.Offset.sle(-V1Size.getValue()) &&
+ V1Size.hasValue() && !V1Size.isScalable() &&
+ DecompGEP1.Offset.sle(-V1Size.getValue()) &&
isBaseOfObject(DecompGEP1.Base))
return AliasResult::NoAlias;
}
@@ -1087,6 +1110,10 @@ AliasResult BasicAAResult::aliasGEP(
return BaseAlias;
}
+ // Bail on analysing scalable LocationSize
+ if (V1Size.isScalable() || V2Size.isScalable())
+ return AliasResult::MayAlias;
+
// If there is a constant difference between the pointers, but the difference
// is less than the size of the associated memory object, then we know
// that the objects are partially overlapping. If the difference is
@@ -1206,9 +1233,6 @@ AliasResult BasicAAResult::aliasGEP(
const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
if (Var.Val.TruncBits == 0 &&
isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) {
- // If V != 0, then abs(VarIndex) > 0.
- MinAbsVarIndex = APInt(Var.Scale.getBitWidth(), 1);
-
// Check if abs(V*Scale) >= abs(Scale) holds in the presence of
// potentially wrapping math.
auto MultiplyByScaleNoWrap = [](const VariableGEPIndex &Var) {
@@ -1501,10 +1525,10 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// location if that memory location doesn't escape. Or it may pass a
// nocapture value to other functions as long as they don't capture it.
if (isEscapeSource(O1) &&
- AAQI.CI->isNotCapturedBeforeOrAt(O2, cast<Instruction>(O1)))
+ AAQI.CI->isNotCapturedBefore(O2, cast<Instruction>(O1), /*OrAt*/ true))
return AliasResult::NoAlias;
if (isEscapeSource(O2) &&
- AAQI.CI->isNotCapturedBeforeOrAt(O1, cast<Instruction>(O2)))
+ AAQI.CI->isNotCapturedBefore(O1, cast<Instruction>(O2), /*OrAt*/ true))
return AliasResult::NoAlias;
}
@@ -1697,12 +1721,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
if (!Inst || Inst->getParent()->isEntryBlock())
return true;
- // Check whether the instruction is part of a cycle, by checking whether the
- // block can (non-trivially) reach itself.
- BasicBlock *BB = const_cast<BasicBlock *>(Inst->getParent());
- SmallVector<BasicBlock *> Succs(successors(BB));
- return !Succs.empty() &&
- !isPotentiallyReachableFromMany(Succs, BB, nullptr, DT);
+ return isNotInCycle(Inst, DT, /*LI*/ nullptr);
}
/// Computes the symbolic difference between two de-composed GEPs.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index b18d04cc73db..96c9bfa0e372 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -78,14 +78,13 @@ cl::opt<PGOViewCountsType> PGOViewCounts(
clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
clEnumValN(PGOVCT_Text, "text", "show in text.")));
-static cl::opt<bool> PrintBlockFreq(
- "print-bfi", cl::init(false), cl::Hidden,
- cl::desc("Print the block frequency info."));
-
-cl::opt<std::string> PrintBlockFreqFuncName(
- "print-bfi-func-name", cl::Hidden,
- cl::desc("The option to specify the name of the function "
- "whose block frequency info is printed."));
+static cl::opt<bool> PrintBFI("print-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print the block frequency info."));
+
+cl::opt<std::string>
+ PrintBFIFuncName("print-bfi-func-name", cl::Hidden,
+ cl::desc("The option to specify the name of the function "
+ "whose block frequency info is printed."));
} // namespace llvm
namespace llvm {
@@ -193,15 +192,14 @@ void BlockFrequencyInfo::calculate(const Function &F,
F.getName().equals(ViewBlockFreqFuncName))) {
view();
}
- if (PrintBlockFreq &&
- (PrintBlockFreqFuncName.empty() ||
- F.getName().equals(PrintBlockFreqFuncName))) {
+ if (PrintBFI &&
+ (PrintBFIFuncName.empty() || F.getName().equals(PrintBFIFuncName))) {
print(dbgs());
}
}
BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
- return BFI ? BFI->getBlockFreq(BB) : 0;
+ return BFI ? BFI->getBlockFreq(BB) : BlockFrequency(0);
}
std::optional<uint64_t>
@@ -214,7 +212,7 @@ BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB,
}
std::optional<uint64_t>
-BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+BlockFrequencyInfo::getProfileCountFromFreq(BlockFrequency Freq) const {
if (!BFI)
return std::nullopt;
return BFI->getProfileCountFromFreq(*getFunction(), Freq);
@@ -225,17 +223,18 @@ bool BlockFrequencyInfo::isIrrLoopHeader(const BasicBlock *BB) {
return BFI->isIrrLoopHeader(BB);
}
-void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) {
+void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB,
+ BlockFrequency Freq) {
assert(BFI && "Expected analysis to be available");
BFI->setBlockFreq(BB, Freq);
}
void BlockFrequencyInfo::setBlockFreqAndScale(
- const BasicBlock *ReferenceBB, uint64_t Freq,
+ const BasicBlock *ReferenceBB, BlockFrequency Freq,
SmallPtrSetImpl<BasicBlock *> &BlocksToScale) {
assert(BFI && "Expected analysis to be available");
// Use 128 bits APInt to avoid overflow.
- APInt NewFreq(128, Freq);
+ APInt NewFreq(128, Freq.getFrequency());
APInt OldFreq(128, BFI->getBlockFreq(ReferenceBB).getFrequency());
APInt BBFreq(128, 0);
for (auto *BB : BlocksToScale) {
@@ -247,7 +246,7 @@ void BlockFrequencyInfo::setBlockFreqAndScale(
// a hot spot, one of the options proposed in
// https://reviews.llvm.org/D28535#650071 could be used to avoid this.
BBFreq = BBFreq.udiv(OldFreq);
- BFI->setBlockFreq(BB, BBFreq.getLimitedValue());
+ BFI->setBlockFreq(BB, BlockFrequency(BBFreq.getLimitedValue()));
}
BFI->setBlockFreq(ReferenceBB, Freq);
}
@@ -266,19 +265,8 @@ const BranchProbabilityInfo *BlockFrequencyInfo::getBPI() const {
return BFI ? &BFI->getBPI() : nullptr;
}
-raw_ostream &BlockFrequencyInfo::
-printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const {
- return BFI ? BFI->printBlockFreq(OS, Freq) : OS;
-}
-
-raw_ostream &
-BlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
- const BasicBlock *BB) const {
- return BFI ? BFI->printBlockFreq(OS, BB) : OS;
-}
-
-uint64_t BlockFrequencyInfo::getEntryFreq() const {
- return BFI ? BFI->getEntryFreq() : 0;
+BlockFrequency BlockFrequencyInfo::getEntryFreq() const {
+ return BFI ? BFI->getEntryFreq() : BlockFrequency(0);
}
void BlockFrequencyInfo::releaseMemory() { BFI.reset(); }
@@ -293,6 +281,18 @@ void BlockFrequencyInfo::verifyMatch(BlockFrequencyInfo &Other) const {
BFI->verifyMatch(*Other.BFI);
}
+Printable llvm::printBlockFreq(const BlockFrequencyInfo &BFI,
+ BlockFrequency Freq) {
+ return Printable([&BFI, Freq](raw_ostream &OS) {
+ printBlockFreqImpl(OS, BFI.getEntryFreq(), Freq);
+ });
+}
+
+Printable llvm::printBlockFreq(const BlockFrequencyInfo &BFI,
+ const BasicBlock &BB) {
+ return printBlockFreq(BFI, BFI.getBlockFreq(&BB));
+}
+
INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq",
"Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 82b1e3b9eede..ae08d56ef098 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -481,30 +481,24 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
const Scaled64 &Min, const Scaled64 &Max) {
- // Scale the Factor to a size that creates integers. Ideally, integers would
- // be scaled so that Max == UINT64_MAX so that they can be best
- // differentiated. However, in the presence of large frequency values, small
- // frequencies are scaled down to 1, making it impossible to differentiate
- // small, unequal numbers. When the spread between Min and Max frequencies
- // fits well within MaxBits, we make the scale be at least 8.
- const unsigned MaxBits = 64;
- const unsigned SpreadBits = (Max / Min).lg();
- Scaled64 ScalingFactor;
- if (SpreadBits <= MaxBits - 3) {
- // If the values are small enough, make the scaling factor at least 8 to
- // allow distinguishing small values.
- ScalingFactor = Min.inverse();
- ScalingFactor <<= 3;
- } else {
- // If the values need more than MaxBits to be represented, saturate small
- // frequency values down to 1 by using a scaling factor that benefits large
- // frequency values.
- ScalingFactor = Scaled64(1, MaxBits) / Max;
- }
+ // Scale the Factor to a size that creates integers. If possible scale
+ // integers so that Max == UINT64_MAX so that they can be best differentiated.
+ // Is is possible that the range between min and max cannot be accurately
+ // represented in a 64bit integer without either loosing precision for small
+ // values (so small unequal numbers all map to 1) or saturaturing big numbers
+ // loosing precision for big numbers (so unequal big numbers may map to
+ // UINT64_MAX). We choose to loose precision for small numbers.
+ const unsigned MaxBits = sizeof(Scaled64::DigitsType) * CHAR_BIT;
+ // Users often add up multiple BlockFrequency values or multiply them with
+ // things like instruction costs. Leave some room to avoid saturating
+ // operations reaching UIN64_MAX too early.
+ const unsigned Slack = 10;
+ Scaled64 ScalingFactor = Scaled64(1, MaxBits - Slack) / Max;
// Translate the floats to integers.
LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
<< ", factor = " << ScalingFactor << "\n");
+ (void)Min;
for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
@@ -581,30 +575,27 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
report_fatal_error(OS.str());
}
#endif
- return 0;
+ return BlockFrequency(0);
}
- return Freqs[Node.Index].Integer;
+ return BlockFrequency(Freqs[Node.Index].Integer);
}
std::optional<uint64_t>
BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
const BlockNode &Node,
bool AllowSynthetic) const {
- return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency(),
- AllowSynthetic);
+ return getProfileCountFromFreq(F, getBlockFreq(Node), AllowSynthetic);
}
-std::optional<uint64_t>
-BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
- uint64_t Freq,
- bool AllowSynthetic) const {
+std::optional<uint64_t> BlockFrequencyInfoImplBase::getProfileCountFromFreq(
+ const Function &F, BlockFrequency Freq, bool AllowSynthetic) const {
auto EntryCount = F.getEntryCount(AllowSynthetic);
if (!EntryCount)
return std::nullopt;
// Use 128 bit APInt to do the arithmetic to avoid overflow.
APInt BlockCount(128, EntryCount->getCount());
- APInt BlockFreq(128, Freq);
- APInt EntryFreq(128, getEntryFreq());
+ APInt BlockFreq(128, Freq.getFrequency());
+ APInt EntryFreq(128, getEntryFreq().getFrequency());
BlockCount *= BlockFreq;
// Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
// lshr by 1 gives EntryFreq/2.
@@ -627,10 +618,10 @@ BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
}
void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
- uint64_t Freq) {
+ BlockFrequency Freq) {
assert(Node.isValid() && "Expected valid node");
assert(Node.Index < Freqs.size() && "Expected legal index");
- Freqs[Node.Index].Integer = Freq;
+ Freqs[Node.Index].Integer = Freq.getFrequency();
}
std::string
@@ -643,19 +634,19 @@ BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
}
-raw_ostream &
-BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
- const BlockNode &Node) const {
- return OS << getFloatingBlockFreq(Node);
-}
-
-raw_ostream &
-BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
- const BlockFrequency &Freq) const {
+void llvm::printBlockFreqImpl(raw_ostream &OS, BlockFrequency EntryFreq,
+ BlockFrequency Freq) {
+ if (Freq == BlockFrequency(0)) {
+ OS << "0";
+ return;
+ }
+ if (EntryFreq == BlockFrequency(0)) {
+ OS << "<invalid BFI>";
+ return;
+ }
Scaled64 Block(Freq.getFrequency(), 0);
- Scaled64 Entry(getEntryFreq(), 0);
-
- return OS << Block / Entry;
+ Scaled64 Entry(EntryFreq.getFrequency(), 0);
+ OS << Block / Entry;
}
void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index b45deccd913d..6448ed66dc51 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1188,8 +1188,11 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
const BasicBlock *Src,
const BasicBlock *Dst) const {
const BranchProbability Prob = getEdgeProbability(Src, Dst);
- OS << "edge " << Src->getName() << " -> " << Dst->getName()
- << " probability is " << Prob
+ OS << "edge ";
+ Src->printAsOperand(OS, false, Src->getModule());
+ OS << " -> ";
+ Dst->printAsOperand(OS, false, Dst->getModule());
+ OS << " probability is " << Prob
<< (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
return OS;
@@ -1322,9 +1325,8 @@ BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses
BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
- OS << "Printing analysis results of BPI for function "
- << "'" << F.getName() << "':"
- << "\n";
+ OS << "Printing analysis 'Branch Probability Analysis' for function '"
+ << F.getName() << "':\n";
AM.getResult<BranchProbabilityAnalysis>(F).print(OS);
return PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
index f05dd6852d6d..67a15197058b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
@@ -19,8 +19,6 @@
#include "llvm/Analysis/CFGPrinter.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/GraphWriter.h"
@@ -92,37 +90,6 @@ static void viewCFG(Function &F, const BlockFrequencyInfo *BFI,
ViewGraph(&CFGInfo, "cfg." + F.getName(), CFGOnly);
}
-namespace {
-struct CFGViewerLegacyPass : public FunctionPass {
- static char ID; // Pass identifcation, replacement for typeid
- CFGViewerLegacyPass() : FunctionPass(ID) {
- initializeCFGViewerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
- return false;
- auto *BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
- viewCFG(F, BFI, BPI, getMaxFreq(F, BFI));
- return false;
- }
-
- void print(raw_ostream &OS, const Module * = nullptr) const override {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
- AU.setPreservesAll();
- }
-};
-} // namespace
-
-char CFGViewerLegacyPass::ID = 0;
-INITIALIZE_PASS(CFGViewerLegacyPass, "view-cfg", "View CFG of function", false,
- true)
-
PreservedAnalyses CFGViewerPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
return PreservedAnalyses::all();
@@ -132,37 +99,6 @@ PreservedAnalyses CFGViewerPass::run(Function &F, FunctionAnalysisManager &AM) {
return PreservedAnalyses::all();
}
-namespace {
-struct CFGOnlyViewerLegacyPass : public FunctionPass {
- static char ID; // Pass identifcation, replacement for typeid
- CFGOnlyViewerLegacyPass() : FunctionPass(ID) {
- initializeCFGOnlyViewerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
- return false;
- auto *BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
- viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true);
- return false;
- }
-
- void print(raw_ostream &OS, const Module * = nullptr) const override {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
- AU.setPreservesAll();
- }
-};
-} // namespace
-
-char CFGOnlyViewerLegacyPass::ID = 0;
-INITIALIZE_PASS(CFGOnlyViewerLegacyPass, "view-cfg-only",
- "View CFG of function (with no function bodies)", false, true)
-
PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
@@ -173,37 +109,6 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
return PreservedAnalyses::all();
}
-namespace {
-struct CFGPrinterLegacyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- CFGPrinterLegacyPass() : FunctionPass(ID) {
- initializeCFGPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
- return false;
- auto *BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
- writeCFGToDotFile(F, BFI, BPI, getMaxFreq(F, BFI));
- return false;
- }
-
- void print(raw_ostream &OS, const Module * = nullptr) const override {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
- AU.setPreservesAll();
- }
-};
-} // namespace
-
-char CFGPrinterLegacyPass::ID = 0;
-INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg",
- "Print CFG of function to 'dot' file", false, true)
-
PreservedAnalyses CFGPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
@@ -214,37 +119,6 @@ PreservedAnalyses CFGPrinterPass::run(Function &F,
return PreservedAnalyses::all();
}
-namespace {
-struct CFGOnlyPrinterLegacyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- CFGOnlyPrinterLegacyPass() : FunctionPass(ID) {
- initializeCFGOnlyPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
- return false;
- auto *BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
- writeCFGToDotFile(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true);
- return false;
- }
- void print(raw_ostream &OS, const Module * = nullptr) const override {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
- AU.setPreservesAll();
- }
-};
-} // namespace
-
-char CFGOnlyPrinterLegacyPass::ID = 0;
-INITIALIZE_PASS(CFGOnlyPrinterLegacyPass, "dot-cfg-only",
- "Print CFG of function to 'dot' file (with no function bodies)",
- false, true)
-
PreservedAnalyses CFGOnlyPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
@@ -282,14 +156,6 @@ void Function::viewCFGOnly(const BlockFrequencyInfo *BFI,
viewCFG(true, BFI, BPI);
}
-FunctionPass *llvm::createCFGPrinterLegacyPassPass() {
- return new CFGPrinterLegacyPass();
-}
-
-FunctionPass *llvm::createCFGOnlyPrinterLegacyPassPass() {
- return new CFGOnlyPrinterLegacyPass();
-}
-
/// Find all blocks on the paths which terminate with a deoptimize or
/// unreachable (i.e. all blocks which are post-dominated by a deoptimize
/// or unreachable). These paths are hidden if the corresponding cl::opts
@@ -318,10 +184,11 @@ bool DOTGraphTraits<DOTFuncInfo *>::isNodeHidden(const BasicBlock *Node,
const DOTFuncInfo *CFGInfo) {
if (HideColdPaths.getNumOccurrences() > 0)
if (auto *BFI = CFGInfo->getBFI()) {
- uint64_t NodeFreq = BFI->getBlockFreq(Node).getFrequency();
- uint64_t EntryFreq = BFI->getEntryFreq();
+ BlockFrequency NodeFreq = BFI->getBlockFreq(Node);
+ BlockFrequency EntryFreq = BFI->getEntryFreq();
// Hide blocks with relative frequency below HideColdPaths threshold.
- if ((double)NodeFreq / EntryFreq < HideColdPaths)
+ if ((double)NodeFreq.getFrequency() / EntryFreq.getFrequency() <
+ HideColdPaths)
return true;
}
if (HideUnreachablePaths || HideDeoptimizePaths) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp
index facb9c897da3..2246887afe68 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -235,7 +235,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
// rather one pass of the RefSCC creating one child RefSCC at a time.
// Ensure we can proxy analysis updates from the CGSCC analysis manager
- // into the the Function analysis manager by getting a proxy here.
+ // into the Function analysis manager by getting a proxy here.
// This also needs to update the FunctionAnalysisManager, as this may be
// the first time we see this SCC.
CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM(
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
index 58ccf2bd664b..20efa2b4ff64 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
@@ -382,33 +382,3 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
LLVM_DUMP_METHOD
void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
#endif
-
-namespace {
-
-struct CallGraphPrinterLegacyPass : public ModulePass {
- static char ID; // Pass ID, replacement for typeid
-
- CallGraphPrinterLegacyPass() : ModulePass(ID) {
- initializeCallGraphPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- AU.addRequiredTransitive<CallGraphWrapperPass>();
- }
-
- bool runOnModule(Module &M) override {
- getAnalysis<CallGraphWrapperPass>().print(errs(), &M);
- return false;
- }
-};
-
-} // end anonymous namespace
-
-char CallGraphPrinterLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(CallGraphPrinterLegacyPass, "print-callgraph",
- "Print a call graph", true, true)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(CallGraphPrinterLegacyPass, "print-callgraph",
- "Print a call graph", true, true)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
index 00e096af3110..7f8f7b26f8fe 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
@@ -16,7 +16,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -74,10 +73,8 @@ bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
namespace {
struct SimpleCaptureTracker : public CaptureTracker {
- explicit SimpleCaptureTracker(
-
- const SmallPtrSetImpl<const Value *> &EphValues, bool ReturnCaptures)
- : EphValues(EphValues), ReturnCaptures(ReturnCaptures) {}
+ explicit SimpleCaptureTracker(bool ReturnCaptures)
+ : ReturnCaptures(ReturnCaptures) {}
void tooManyUses() override {
LLVM_DEBUG(dbgs() << "Captured due to too many uses\n");
@@ -88,17 +85,12 @@ namespace {
if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
return false;
- if (EphValues.contains(U->getUser()))
- return false;
-
LLVM_DEBUG(dbgs() << "Captured by: " << *U->getUser() << "\n");
Captured = true;
return true;
}
- const SmallPtrSetImpl<const Value *> &EphValues;
-
bool ReturnCaptures;
bool Captured = false;
@@ -166,9 +158,8 @@ namespace {
// escape are not in a cycle.
struct EarliestCaptures : public CaptureTracker {
- EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT,
- const SmallPtrSetImpl<const Value *> &EphValues)
- : EphValues(EphValues), DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}
+ EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
+ : DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}
void tooManyUses() override {
Captured = true;
@@ -180,9 +171,6 @@ namespace {
if (isa<ReturnInst>(I) && !ReturnCaptures)
return false;
- if (EphValues.contains(I))
- return false;
-
if (!EarliestCapture)
EarliestCapture = I;
else
@@ -194,8 +182,6 @@ namespace {
return false;
}
- const SmallPtrSetImpl<const Value *> &EphValues;
-
Instruction *EarliestCapture = nullptr;
const DominatorTree &DT;
@@ -217,17 +203,6 @@ namespace {
/// counts as capturing it or not.
bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
bool StoreCaptures, unsigned MaxUsesToExplore) {
- SmallPtrSet<const Value *, 1> Empty;
- return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures, Empty,
- MaxUsesToExplore);
-}
-
-/// Variant of the above function which accepts a set of Values that are
-/// ephemeral and cannot cause pointers to escape.
-bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
- bool StoreCaptures,
- const SmallPtrSetImpl<const Value *> &EphValues,
- unsigned MaxUsesToExplore) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
@@ -239,7 +214,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
LLVM_DEBUG(dbgs() << "Captured?: " << *V << " = ");
- SimpleCaptureTracker SCT(EphValues, ReturnCaptures);
+ SimpleCaptureTracker SCT(ReturnCaptures);
PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
if (SCT.Captured)
++NumCaptured;
@@ -283,16 +258,14 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
return CB.Captured;
}
-Instruction *
-llvm::FindEarliestCapture(const Value *V, Function &F, bool ReturnCaptures,
- bool StoreCaptures, const DominatorTree &DT,
-
- const SmallPtrSetImpl<const Value *> &EphValues,
- unsigned MaxUsesToExplore) {
+Instruction *llvm::FindEarliestCapture(const Value *V, Function &F,
+ bool ReturnCaptures, bool StoreCaptures,
+ const DominatorTree &DT,
+ unsigned MaxUsesToExplore) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
- EarliestCaptures CB(ReturnCaptures, F, DT, EphValues);
+ EarliestCaptures CB(ReturnCaptures, F, DT);
PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
if (CB.Captured)
++NumCapturedBefore;
@@ -304,7 +277,11 @@ llvm::FindEarliestCapture(const Value *V, Function &F, bool ReturnCaptures,
UseCaptureKind llvm::DetermineUseCaptureKind(
const Use &U,
function_ref<bool(Value *, const DataLayout &)> IsDereferenceableOrNull) {
- Instruction *I = cast<Instruction>(U.getUser());
+ Instruction *I = dyn_cast<Instruction>(U.getUser());
+
+ // TODO: Investigate non-instruction uses.
+ if (!I)
+ return UseCaptureKind::MAY_CAPTURE;
switch (I->getOpcode()) {
case Instruction::Call:
@@ -384,8 +361,13 @@ UseCaptureKind llvm::DetermineUseCaptureKind(
return UseCaptureKind::MAY_CAPTURE;
return UseCaptureKind::NO_CAPTURE;
}
- case Instruction::BitCast:
case Instruction::GetElementPtr:
+ // AA does not support pointers of vectors, so GEP vector splats need to
+ // be considered as captures.
+ if (I->getType()->isVectorTy())
+ return UseCaptureKind::MAY_CAPTURE;
+ return UseCaptureKind::PASSTHROUGH;
+ case Instruction::BitCast:
case Instruction::PHI:
case Instruction::Select:
case Instruction::AddrSpaceCast:
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
index 38cccb3ea3c2..90da3390eab3 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
@@ -227,11 +227,16 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
return ConstantExpr::getBitCast(C, DestTy);
// Zero extend the element to the right size.
- Src = ConstantExpr::getZExt(Src, Elt->getType());
+ Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(),
+ DL);
+ assert(Src && "Constant folding cannot fail on plain integers");
// Shift it to the right place, depending on endianness.
- Src = ConstantExpr::getShl(Src,
- ConstantInt::get(Src->getType(), ShiftAmt));
+ Src = ConstantFoldBinaryOpOperands(
+ Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt),
+ DL);
+ assert(Src && "Constant folding cannot fail on plain integers");
+
ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
// Mix it in.
@@ -268,21 +273,11 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
for (unsigned j = 0; j != Ratio; ++j) {
// Shift the piece of the value into the right place, depending on
// endianness.
- Constant *Elt = ConstantExpr::getLShr(Src,
- ConstantInt::get(Src->getType(), ShiftAmt));
+ APInt Elt = Src->getValue().lshr(ShiftAmt);
ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
- // Truncate the element to an integer with the same pointer size and
- // convert the element back to a pointer using a inttoptr.
- if (DstEltTy->isPointerTy()) {
- IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize);
- Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy);
- Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy));
- continue;
- }
-
// Truncate and remember this piece.
- Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+ Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
}
}
@@ -378,7 +373,7 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
Cast = Instruction::PtrToInt;
if (CastInst::castIsValid(Cast, C, DestTy))
- return ConstantExpr::getCast(Cast, C, DestTy);
+ return ConstantFoldCastOperand(Cast, C, DestTy, DL);
}
// If this isn't an aggregate type, there is nothing we can do to drill down
@@ -583,7 +578,7 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
// Be careful not to replace a load of an addrspace value with an inttoptr here
return nullptr;
- Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy);
+ Res = ConstantExpr::getIntToPtr(Res, LoadTy);
}
return Res;
}
@@ -841,14 +836,14 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
SrcElemTy, Ops.slice(1, i - 1)))) &&
Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
Any = true;
- Type *NewType = Ops[i]->getType()->isVectorTy()
- ? IntIdxTy
- : IntIdxScalarTy;
- NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
- true,
- NewType,
- true),
- Ops[i], NewType));
+ Type *NewType =
+ Ops[i]->getType()->isVectorTy() ? IntIdxTy : IntIdxScalarTy;
+ Constant *NewIdx = ConstantFoldCastOperand(
+ CastInst::getCastOpcode(Ops[i], true, NewType, true), Ops[i], NewType,
+ DL);
+ if (!NewIdx)
+ return nullptr;
+ NewIdxs.push_back(NewIdx);
} else
NewIdxs.push_back(Ops[i]);
}
@@ -861,20 +856,6 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
return ConstantFoldConstant(C, DL, TLI);
}
-/// Strip the pointer casts, but preserve the address space information.
-// TODO: This probably doesn't make sense with opaque pointers.
-static Constant *StripPtrCastKeepAS(Constant *Ptr) {
- assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
- auto *OldPtrTy = cast<PointerType>(Ptr->getType());
- Ptr = cast<Constant>(Ptr->stripPointerCasts());
- auto *NewPtrTy = cast<PointerType>(Ptr->getType());
-
- // Preserve the address space number of the pointer.
- if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace())
- Ptr = ConstantExpr::getPointerCast(Ptr, OldPtrTy);
- return Ptr;
-}
-
/// If we can symbolically evaluate the GEP constant expression, do so.
Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
ArrayRef<Constant *> Ops,
@@ -909,7 +890,6 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
BitWidth,
DL.getIndexedOffsetInType(
SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)));
- Ptr = StripPtrCastKeepAS(Ptr);
// If this is a GEP of a GEP, fold it all into a single GEP.
while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
@@ -931,7 +911,6 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
Ptr = cast<Constant>(GEP->getOperand(0));
SrcElemTy = GEP->getSourceElementType();
Offset += APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps));
- Ptr = StripPtrCastKeepAS(Ptr);
}
// If the base value for this address is a literal integer value, fold the
@@ -1228,10 +1207,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(
Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
- Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
- IntPtrTy, false);
- Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
+ if (Constant *C = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,
+ /*IsSigned*/ false, DL)) {
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
+ }
}
// Only do this transformation if the int is intptrty in size, otherwise
@@ -1253,11 +1233,12 @@ Constant *llvm::ConstantFoldCompareInstOperands(
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
- Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
- IntPtrTy, false);
- Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
- IntPtrTy, false);
- return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
+ Constant *C0 = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,
+ /*IsSigned*/ false, DL);
+ Constant *C1 = ConstantFoldIntegerCast(CE1->getOperand(0), IntPtrTy,
+ /*IsSigned*/ false, DL);
+ if (C0 && C1)
+ return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
@@ -1273,19 +1254,6 @@ Constant *llvm::ConstantFoldCompareInstOperands(
}
}
- // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
- // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
- if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
- CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
- Constant *LHS = ConstantFoldCompareInstOperands(
- Predicate, CE0->getOperand(0), Ops1, DL, TLI);
- Constant *RHS = ConstantFoldCompareInstOperands(
- Predicate, CE0->getOperand(1), Ops1, DL, TLI);
- unsigned OpC =
- Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
- return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL);
- }
-
// Convert pointer comparison (base+offset1) pred (base+offset2) into
// offset1 pred offset2, for the case where the offset is inbounds. This
// only works for equality and unsigned comparison, as inbounds permits
@@ -1425,9 +1393,9 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
// the width of a pointer, so it can't be done in ConstantExpr::getCast.
if (CE->getOpcode() == Instruction::IntToPtr) {
// zext/trunc the inttoptr to pointer size.
- FoldedValue = ConstantExpr::getIntegerCast(
- CE->getOperand(0), DL.getIntPtrType(CE->getType()),
- /*IsSigned=*/false);
+ FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0),
+ DL.getIntPtrType(CE->getType()),
+ /*IsSigned=*/false, DL);
} else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {
// If we have GEP, we can perform the following folds:
// (ptrtoint (gep null, x)) -> x
@@ -1455,11 +1423,11 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
}
if (FoldedValue) {
// Do a zext or trunc to get to the ptrtoint dest size.
- return ConstantExpr::getIntegerCast(FoldedValue, DestTy,
- /*IsSigned=*/false);
+ return ConstantFoldIntegerCast(FoldedValue, DestTy, /*IsSigned=*/false,
+ DL);
}
}
- return ConstantExpr::getCast(Opcode, C, DestTy);
+ break;
case Instruction::IntToPtr:
// If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
// the int size is >= the ptr size and the address spaces are the same.
@@ -1478,8 +1446,7 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
}
}
}
-
- return ConstantExpr::getCast(Opcode, C, DestTy);
+ break;
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -1490,10 +1457,26 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::AddrSpaceCast:
- return ConstantExpr::getCast(Opcode, C, DestTy);
+ break;
case Instruction::BitCast:
return FoldBitCast(C, DestTy, DL);
}
+
+ if (ConstantExpr::isDesirableCastOp(Opcode))
+ return ConstantExpr::getCast(Opcode, C, DestTy);
+ return ConstantFoldCastInstruction(Opcode, C, DestTy);
+}
+
+Constant *llvm::ConstantFoldIntegerCast(Constant *C, Type *DestTy,
+ bool IsSigned, const DataLayout &DL) {
+ Type *SrcTy = C->getType();
+ if (SrcTy == DestTy)
+ return C;
+ if (SrcTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits())
+ return ConstantFoldCastOperand(Instruction::Trunc, C, DestTy, DL);
+ if (IsSigned)
+ return ConstantFoldCastOperand(Instruction::SExt, C, DestTy, DL);
+ return ConstantFoldCastOperand(Instruction::ZExt, C, DestTy, DL);
}
//===----------------------------------------------------------------------===//
@@ -1548,6 +1531,11 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::vector_reduce_umax:
// Target intrinsics
case Intrinsic::amdgcn_perm:
+ case Intrinsic::amdgcn_wave_reduce_umin:
+ case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_s_wqm:
+ case Intrinsic::amdgcn_s_quadmask:
+ case Intrinsic::amdgcn_s_bitreplicate:
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
@@ -1569,11 +1557,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::log10:
case Intrinsic::exp:
case Intrinsic::exp2:
+ case Intrinsic::exp10:
case Intrinsic::sqrt:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::pow:
case Intrinsic::powi:
+ case Intrinsic::ldexp:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::frexp:
@@ -1589,7 +1579,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_fma_legacy:
case Intrinsic::amdgcn_fract:
- case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_sin:
// The intrinsics below depend on rounding mode in MXCSR.
case Intrinsic::x86_sse_cvtss2si:
@@ -2227,6 +2216,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::exp2:
// Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
+ case Intrinsic::exp10:
+ // Fold exp10(x) as pow(10, x), in case the host lacks a C99 library.
+ return ConstantFoldBinaryFP(pow, APFloat(10.0), APF, Ty);
case Intrinsic::sin:
return ConstantFoldFP(sin, APF, Ty);
case Intrinsic::cos:
@@ -2433,6 +2425,39 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), Val);
}
+
+ case Intrinsic::amdgcn_s_wqm: {
+ uint64_t Val = Op->getZExtValue();
+ Val |= (Val & 0x5555555555555555ULL) << 1 |
+ ((Val >> 1) & 0x5555555555555555ULL);
+ Val |= (Val & 0x3333333333333333ULL) << 2 |
+ ((Val >> 2) & 0x3333333333333333ULL);
+ return ConstantInt::get(Ty, Val);
+ }
+
+ case Intrinsic::amdgcn_s_quadmask: {
+ uint64_t Val = Op->getZExtValue();
+ uint64_t QuadMask = 0;
+ for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {
+ if (!(Val & 0xF))
+ continue;
+
+ QuadMask |= (1ULL << I);
+ }
+ return ConstantInt::get(Ty, QuadMask);
+ }
+
+ case Intrinsic::amdgcn_s_bitreplicate: {
+ uint64_t Val = Op->getZExtValue();
+ Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;
+ Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8;
+ Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4;
+ Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2;
+ Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;
+ Val = Val | Val << 1;
+ return ConstantInt::get(Ty, Val);
+ }
+
default:
return nullptr;
}
@@ -2650,6 +2675,11 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
}
} else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
switch (IntrinsicID) {
+ case Intrinsic::ldexp: {
+ return ConstantFP::get(
+ Ty->getContext(),
+ scalbn(Op1V, Op2C->getSExtValue(), APFloat::rmNearestTiesToEven));
+ }
case Intrinsic::is_fpclass: {
FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue());
bool Result =
@@ -2686,16 +2716,6 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
Ty->getContext(),
APFloat((double)std::pow(Op1V.convertToDouble(),
(int)Op2C->getZExtValue())));
-
- if (IntrinsicID == Intrinsic::amdgcn_ldexp) {
- // FIXME: Should flush denorms depending on FP mode, but that's ignored
- // everywhere else.
-
- // scalbn is equivalent to ldexp with float radix 2
- APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(),
- APFloat::rmNearestTiesToEven);
- return ConstantFP::get(Ty->getContext(), Result);
- }
}
return nullptr;
}
@@ -2839,6 +2859,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return Constant::getNullValue(Ty);
return ConstantInt::get(Ty, C0->abs());
+ case Intrinsic::amdgcn_wave_reduce_umin:
+ case Intrinsic::amdgcn_wave_reduce_umax:
+ return dyn_cast<Constant>(Operands[0]);
}
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
index 1782b399e7fd..7e2ddea3bbfb 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
@@ -47,79 +47,6 @@ static cl::opt<bool> TypeBasedIntrinsicCost("type-based-intrinsic-cost",
#define CM_NAME "cost-model"
#define DEBUG_TYPE CM_NAME
-namespace {
- class CostModelAnalysis : public FunctionPass {
-
- public:
- static char ID; // Class identification, replacement for typeinfo
- CostModelAnalysis() : FunctionPass(ID) {
- initializeCostModelAnalysisPass(
- *PassRegistry::getPassRegistry());
- }
-
- private:
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
- void print(raw_ostream &OS, const Module*) const override;
-
- /// The function that we analyze.
- Function *F = nullptr;
- /// Target information.
- const TargetTransformInfo *TTI = nullptr;
- };
-} // End of anonymous namespace
-
-// Register this pass.
-char CostModelAnalysis::ID = 0;
-static const char cm_name[] = "Cost Model Analysis";
-INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
-INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true)
-
-FunctionPass *llvm::createCostModelAnalysisPass() {
- return new CostModelAnalysis();
-}
-
-void
-CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
-}
-
-bool
-CostModelAnalysis::runOnFunction(Function &F) {
- this->F = &F;
- auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
- TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
-
- return false;
-}
-
-void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
- if (!F)
- return;
-
- for (BasicBlock &B : *F) {
- for (Instruction &Inst : B) {
- InstructionCost Cost;
- auto *II = dyn_cast<IntrinsicInst>(&Inst);
- if (II && TypeBasedIntrinsicCost) {
- IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II,
- InstructionCost::getInvalid(), true);
- Cost = TTI->getIntrinsicInstrCost(ICA, CostKind);
- }
- else {
- Cost = TTI->getInstructionCost(&Inst, CostKind);
- }
-
- if (auto CostVal = Cost.getValue())
- OS << "Cost Model: Found an estimated cost of " << *CostVal;
- else
- OS << "Cost Model: Invalid cost";
-
- OS << " for instruction: " << Inst << "\n";
- }
- }
-}
-
PreservedAnalyses CostModelPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
index 7ab91b9eaea4..a45d8815e54c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
@@ -25,8 +25,6 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -561,24 +559,6 @@ bool llvm::tryDelinearizeFixedSizeImpl(
namespace {
-class Delinearization : public FunctionPass {
- Delinearization(const Delinearization &); // do not implement
-protected:
- Function *F;
- LoopInfo *LI;
- ScalarEvolution *SE;
-
-public:
- static char ID; // Pass identification, replacement for typeid
-
- Delinearization() : FunctionPass(ID) {
- initializeDelinearizationPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- void print(raw_ostream &O, const Module *M = nullptr) const override;
-};
-
void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
ScalarEvolution *SE) {
O << "Delinearization on function " << F->getName() << ":\n";
@@ -631,32 +611,6 @@ void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
} // end anonymous namespace
-void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
-}
-
-bool Delinearization::runOnFunction(Function &F) {
- this->F = &F;
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- return false;
-}
-
-void Delinearization::print(raw_ostream &O, const Module *) const {
- printDelinearization(O, F, LI, SE);
-}
-
-char Delinearization::ID = 0;
-static const char delinearization_name[] = "Delinearization";
-INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
- true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
-
-FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
-
DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS)
: OS(OS) {}
PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index 456d58660680..7d51302bcc1a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements a model runner using Tensorflow C APIs, allowing the
+// This file implements a model runner using TFLite, allowing the
// loading of a model from a command line option.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DomConditionCache.cpp b/contrib/llvm-project/llvm/lib/Analysis/DomConditionCache.cpp
new file mode 100644
index 000000000000..c7f4cab41588
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/DomConditionCache.cpp
@@ -0,0 +1,67 @@
+//===- DomConditionCache.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DomConditionCache.h"
+#include "llvm/IR/PatternMatch.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+// TODO: This code is very similar to findAffectedValues() in
+// AssumptionCache, but currently specialized to just the patterns that
+// computeKnownBits() supports, and without the notion of result elem indices
+// that are AC specific. Deduplicate this code once we have a clearer picture
+// of how much they can be shared.
+static void findAffectedValues(Value *Cond,
+ SmallVectorImpl<Value *> &Affected) {
+ auto AddAffected = [&Affected](Value *V) {
+ if (isa<Argument>(V) || isa<GlobalValue>(V)) {
+ Affected.push_back(V);
+ } else if (auto *I = dyn_cast<Instruction>(V)) {
+ Affected.push_back(I);
+
+ // Peek through unary operators to find the source of the condition.
+ Value *Op;
+ if (match(I, m_PtrToInt(m_Value(Op)))) {
+ if (isa<Instruction>(Op) || isa<Argument>(Op))
+ Affected.push_back(Op);
+ }
+ }
+ };
+
+ ICmpInst::Predicate Pred;
+ Value *A;
+ if (match(Cond, m_ICmp(Pred, m_Value(A), m_Constant()))) {
+ AddAffected(A);
+
+ if (ICmpInst::isEquality(Pred)) {
+ Value *X;
+ // (X & C) or (X | C) or (X ^ C).
+ // (X << C) or (X >>_s C) or (X >>_u C).
+ if (match(A, m_BitwiseLogic(m_Value(X), m_ConstantInt())) ||
+ match(A, m_Shift(m_Value(X), m_ConstantInt())))
+ AddAffected(X);
+ } else {
+ Value *X;
+ // Handle (A + C1) u< C2, which is the canonical form of A > C3 && A < C4.
+ if (match(A, m_Add(m_Value(X), m_ConstantInt())))
+ AddAffected(X);
+ }
+ }
+}
+
+void DomConditionCache::registerBranch(BranchInst *BI) {
+ assert(BI->isConditional() && "Must be conditional branch");
+ SmallVector<Value *, 16> Affected;
+ findAffectedValues(BI->getCondition(), Affected);
+ for (Value *V : Affected) {
+ auto &AV = AffectedValues[V];
+ if (!is_contained(AV, BI))
+ AV.push_back(BI);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
index 6094f22a17fd..e27db66710a1 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
@@ -16,12 +16,36 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
#include <deque>
using namespace llvm;
+namespace llvm {
+cl::opt<bool> EnableDetailedFunctionProperties(
+ "enable-detailed-function-properties", cl::Hidden, cl::init(false),
+ cl::desc("Whether or not to compute detailed function properties."));
+
+cl::opt<unsigned> BigBasicBlockInstructionThreshold(
+ "big-basic-block-instruction-threshold", cl::Hidden, cl::init(500),
+ cl::desc("The minimum number of instructions a basic block should contain "
+ "before being considered big."));
+
+cl::opt<unsigned> MediumBasicBlockInstructionThreshold(
+ "medium-basic-block-instruction-threshold", cl::Hidden, cl::init(15),
+ cl::desc("The minimum number of instructions a basic block should contain "
+ "before being considered medium-sized."));
+}
+
+static cl::opt<unsigned> CallWithManyArgumentsThreshold(
+ "call-with-many-arguments-threshold", cl::Hidden, cl::init(4),
+ cl::desc("The minimum number of arguments a function call must have before "
+ "it is considered having many arguments."));
+
namespace {
int64_t getNrBlocksFromCond(const BasicBlock &BB) {
int64_t Ret = 0;
@@ -62,6 +86,118 @@ void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB,
}
}
TotalInstructionCount += Direction * BB.sizeWithoutDebug();
+
+ if (EnableDetailedFunctionProperties) {
+ unsigned SuccessorCount = succ_size(&BB);
+ if (SuccessorCount == 1)
+ BasicBlocksWithSingleSuccessor += Direction;
+ else if (SuccessorCount == 2)
+ BasicBlocksWithTwoSuccessors += Direction;
+ else if (SuccessorCount > 2)
+ BasicBlocksWithMoreThanTwoSuccessors += Direction;
+
+ unsigned PredecessorCount = pred_size(&BB);
+ if (PredecessorCount == 1)
+ BasicBlocksWithSinglePredecessor += Direction;
+ else if (PredecessorCount == 2)
+ BasicBlocksWithTwoPredecessors += Direction;
+ else if (PredecessorCount > 2)
+ BasicBlocksWithMoreThanTwoPredecessors += Direction;
+
+ if (TotalInstructionCount > BigBasicBlockInstructionThreshold)
+ BigBasicBlocks += Direction;
+ else if (TotalInstructionCount > MediumBasicBlockInstructionThreshold)
+ MediumBasicBlocks += Direction;
+ else
+ SmallBasicBlocks += Direction;
+
+ // Calculate critical edges by looking through all successors of a basic
+ // block that has multiple successors and finding ones that have multiple
+ // predecessors, which represent critical edges.
+ if (SuccessorCount > 1) {
+ for (const auto *Successor : successors(&BB)) {
+ if (pred_size(Successor) > 1)
+ CriticalEdgeCount += Direction;
+ }
+ }
+
+ ControlFlowEdgeCount += Direction * SuccessorCount;
+
+ if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
+ if (!BI->isConditional())
+ UnconditionalBranchCount += Direction;
+ }
+
+ for (const Instruction &I : BB.instructionsWithoutDebug()) {
+ if (I.isCast())
+ CastInstructionCount += Direction;
+
+ if (I.getType()->isFloatTy())
+ FloatingPointInstructionCount += Direction;
+ else if (I.getType()->isIntegerTy())
+ IntegerInstructionCount += Direction;
+
+ if (isa<IntrinsicInst>(I))
+ ++IntrinsicCount;
+
+ if (const auto *Call = dyn_cast<CallInst>(&I)) {
+ if (Call->isIndirectCall())
+ IndirectCallCount += Direction;
+ else
+ DirectCallCount += Direction;
+
+ if (Call->getType()->isIntegerTy())
+ CallReturnsIntegerCount += Direction;
+ else if (Call->getType()->isFloatingPointTy())
+ CallReturnsFloatCount += Direction;
+ else if (Call->getType()->isPointerTy())
+ CallReturnsPointerCount += Direction;
+ else if (Call->getType()->isVectorTy()) {
+ if (Call->getType()->getScalarType()->isIntegerTy())
+ CallReturnsVectorIntCount += Direction;
+ else if (Call->getType()->getScalarType()->isFloatingPointTy())
+ CallReturnsVectorFloatCount += Direction;
+ else if (Call->getType()->getScalarType()->isPointerTy())
+ CallReturnsVectorPointerCount += Direction;
+ }
+
+ if (Call->arg_size() > CallWithManyArgumentsThreshold)
+ CallWithManyArgumentsCount += Direction;
+
+ for (const auto &Arg : Call->args()) {
+ if (Arg->getType()->isPointerTy()) {
+ CallWithPointerArgumentCount += Direction;
+ break;
+ }
+ }
+ }
+
+#define COUNT_OPERAND(OPTYPE) \
+ if (isa<OPTYPE>(Operand)) { \
+ OPTYPE##OperandCount += Direction; \
+ continue; \
+ }
+
+ for (unsigned int OperandIndex = 0; OperandIndex < I.getNumOperands();
+ ++OperandIndex) {
+ Value *Operand = I.getOperand(OperandIndex);
+ COUNT_OPERAND(GlobalValue)
+ COUNT_OPERAND(ConstantInt)
+ COUNT_OPERAND(ConstantFP)
+ COUNT_OPERAND(Constant)
+ COUNT_OPERAND(Instruction)
+ COUNT_OPERAND(BasicBlock)
+ COUNT_OPERAND(InlineAsm)
+ COUNT_OPERAND(Argument)
+
+ // We only get to this point if we haven't matched any of the other
+ // operand types.
+ UnknownOperandCount += Direction;
+ }
+
+#undef CHECK_OPERAND
+ }
+ }
}
void FunctionPropertiesInfo::updateAggregateStats(const Function &F,
@@ -99,17 +235,59 @@ FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo(
}
void FunctionPropertiesInfo::print(raw_ostream &OS) const {
- OS << "BasicBlockCount: " << BasicBlockCount << "\n"
- << "BlocksReachedFromConditionalInstruction: "
- << BlocksReachedFromConditionalInstruction << "\n"
- << "Uses: " << Uses << "\n"
- << "DirectCallsToDefinedFunctions: " << DirectCallsToDefinedFunctions
- << "\n"
- << "LoadInstCount: " << LoadInstCount << "\n"
- << "StoreInstCount: " << StoreInstCount << "\n"
- << "MaxLoopDepth: " << MaxLoopDepth << "\n"
- << "TopLevelLoopCount: " << TopLevelLoopCount << "\n"
- << "TotalInstructionCount: " << TotalInstructionCount << "\n\n";
+#define PRINT_PROPERTY(PROP_NAME) OS << #PROP_NAME ": " << PROP_NAME << "\n";
+
+ PRINT_PROPERTY(BasicBlockCount)
+ PRINT_PROPERTY(BlocksReachedFromConditionalInstruction)
+ PRINT_PROPERTY(Uses)
+ PRINT_PROPERTY(DirectCallsToDefinedFunctions)
+ PRINT_PROPERTY(LoadInstCount)
+ PRINT_PROPERTY(StoreInstCount)
+ PRINT_PROPERTY(MaxLoopDepth)
+ PRINT_PROPERTY(TopLevelLoopCount)
+ PRINT_PROPERTY(TotalInstructionCount)
+
+ if (EnableDetailedFunctionProperties) {
+ PRINT_PROPERTY(BasicBlocksWithSingleSuccessor)
+ PRINT_PROPERTY(BasicBlocksWithTwoSuccessors)
+ PRINT_PROPERTY(BasicBlocksWithMoreThanTwoSuccessors)
+ PRINT_PROPERTY(BasicBlocksWithSinglePredecessor)
+ PRINT_PROPERTY(BasicBlocksWithTwoPredecessors)
+ PRINT_PROPERTY(BasicBlocksWithMoreThanTwoPredecessors)
+ PRINT_PROPERTY(BigBasicBlocks)
+ PRINT_PROPERTY(MediumBasicBlocks)
+ PRINT_PROPERTY(SmallBasicBlocks)
+ PRINT_PROPERTY(CastInstructionCount)
+ PRINT_PROPERTY(FloatingPointInstructionCount)
+ PRINT_PROPERTY(IntegerInstructionCount)
+ PRINT_PROPERTY(ConstantIntOperandCount)
+ PRINT_PROPERTY(ConstantFPOperandCount)
+ PRINT_PROPERTY(ConstantOperandCount)
+ PRINT_PROPERTY(InstructionOperandCount)
+ PRINT_PROPERTY(BasicBlockOperandCount)
+ PRINT_PROPERTY(GlobalValueOperandCount)
+ PRINT_PROPERTY(InlineAsmOperandCount)
+ PRINT_PROPERTY(ArgumentOperandCount)
+ PRINT_PROPERTY(UnknownOperandCount)
+ PRINT_PROPERTY(CriticalEdgeCount)
+ PRINT_PROPERTY(ControlFlowEdgeCount)
+ PRINT_PROPERTY(UnconditionalBranchCount)
+ PRINT_PROPERTY(IntrinsicCount)
+ PRINT_PROPERTY(DirectCallCount)
+ PRINT_PROPERTY(IndirectCallCount)
+ PRINT_PROPERTY(CallReturnsIntegerCount)
+ PRINT_PROPERTY(CallReturnsFloatCount)
+ PRINT_PROPERTY(CallReturnsPointerCount)
+ PRINT_PROPERTY(CallReturnsVectorIntCount)
+ PRINT_PROPERTY(CallReturnsVectorFloatCount)
+ PRINT_PROPERTY(CallReturnsVectorPointerCount)
+ PRINT_PROPERTY(CallWithManyArgumentsCount)
+ PRINT_PROPERTY(CallWithPointerArgumentCount)
+ }
+
+#undef PRINT_PROPERTY
+
+ OS << "\n";
}
AnalysisKey FunctionPropertiesAnalysis::Key;
@@ -258,4 +436,4 @@ bool FunctionPropertiesUpdater::isUpdateValid(Function &F,
LoopInfo LI(DT);
auto Fresh = FunctionPropertiesInfo::getFunctionPropertiesInfo(F, DT, LI);
return FPI == Fresh;
-} \ No newline at end of file
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
index c218b3d511e2..527f19b194ee 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -241,7 +241,7 @@ MemoryEffects GlobalsAAResult::getMemoryEffects(const Function *F) {
if (FunctionInfo *FI = getFunctionInfo(F))
return MemoryEffects(FI->getModRefInfo());
- return AAResultBase::getMemoryEffects(F);
+ return MemoryEffects::unknown();
}
/// Returns the function info for the function, or null if we don't have
@@ -791,10 +791,7 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
// FIXME: It would be good to handle other obvious no-alias cases here, but
// it isn't clear how to do so reasonably without building a small version
- // of BasicAA into this code. We could recurse into AAResultBase::alias
- // here but that seems likely to go poorly as we're inside the
- // implementation of such a query. Until then, just conservatively return
- // false.
+ // of BasicAA into this code.
return false;
} while (!Inputs.empty());
@@ -892,7 +889,7 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
if ((GV1 || GV2) && GV1 != GV2)
return AliasResult::NoAlias;
- return AAResultBase::alias(LocA, LocB, AAQI, nullptr);
+ return AliasResult::MayAlias;
}
ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp
index 40b898e96f3b..b872286fb939 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp
@@ -19,6 +19,10 @@ bool llvm::isGuard(const User *U) {
return match(U, m_Intrinsic<Intrinsic::experimental_guard>());
}
+bool llvm::isWidenableCondition(const Value *V) {
+ return match(V, m_Intrinsic<Intrinsic::experimental_widenable_condition>());
+}
+
bool llvm::isWidenableBranch(const User *U) {
Value *Condition, *WidenableCondition;
BasicBlock *GuardedBB, *DeoptBB;
@@ -27,11 +31,9 @@ bool llvm::isWidenableBranch(const User *U) {
}
bool llvm::isGuardAsWidenableBranch(const User *U) {
- Value *Condition, *WidenableCondition;
- BasicBlock *GuardedBB, *DeoptBB;
- if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB,
- DeoptBB))
+ if (!isWidenableBranch(U))
return false;
+ BasicBlock *DeoptBB = cast<BranchInst>(U)->getSuccessor(1);
SmallPtrSet<const BasicBlock *, 2> Visited;
Visited.insert(DeoptBB);
do {
@@ -111,3 +113,59 @@ bool llvm::parseWidenableBranch(User *U, Use *&C,Use *&WC,
}
return false;
}
+
+template <typename CallbackType>
+static void parseCondition(Value *Condition,
+ CallbackType RecordCheckOrWidenableCond) {
+ SmallVector<Value *, 4> Worklist(1, Condition);
+ SmallPtrSet<Value *, 4> Visited;
+ Visited.insert(Condition);
+ do {
+ Value *Check = Worklist.pop_back_val();
+ Value *LHS, *RHS;
+ if (match(Check, m_And(m_Value(LHS), m_Value(RHS)))) {
+ if (Visited.insert(LHS).second)
+ Worklist.push_back(LHS);
+ if (Visited.insert(RHS).second)
+ Worklist.push_back(RHS);
+ continue;
+ }
+ if (!RecordCheckOrWidenableCond(Check))
+ break;
+ } while (!Worklist.empty());
+}
+
+void llvm::parseWidenableGuard(const User *U,
+ llvm::SmallVectorImpl<Value *> &Checks) {
+ assert((isGuard(U) || isWidenableBranch(U)) && "Should be");
+ Value *Condition = isGuard(U) ? cast<IntrinsicInst>(U)->getArgOperand(0)
+ : cast<BranchInst>(U)->getCondition();
+
+ parseCondition(Condition, [&](Value *Check) {
+ if (!isWidenableCondition(Check))
+ Checks.push_back(Check);
+ return true;
+ });
+}
+
+Value *llvm::extractWidenableCondition(const User *U) {
+ auto *BI = dyn_cast<BranchInst>(U);
+ if (!BI || !BI->isConditional())
+ return nullptr;
+
+ auto Condition = BI->getCondition();
+ if (!Condition->hasOneUse())
+ return nullptr;
+
+ Value *WidenableCondition = nullptr;
+ parseCondition(Condition, [&](Value *Check) {
+ // We require widenable_condition has only one use, otherwise we don't
+ // consider appropriate branch as widenable.
+ if (isWidenableCondition(Check) && Check->hasOneUse()) {
+ WidenableCondition = Check;
+ return false;
+ }
+ return true;
+ });
+ return WidenableCondition;
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
index 6c750b7baa40..1aa324c6b5f3 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
@@ -24,8 +24,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
-#include <set>
-
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -52,8 +50,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
+ case RecurKind::IAnyOf:
+ case RecurKind::FAnyOf:
return true;
}
return false;
@@ -123,7 +121,7 @@ static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
// meaning that we will use sext instructions instead of zext
// instructions to restore the original type.
IsSigned = true;
- // Make sure at at least one sign bit is included in the result, so it
+ // Make sure at least one sign bit is included in the result, so it
// will get properly sign-extended.
++MaxBitWidth;
}
@@ -411,18 +409,17 @@ bool RecurrenceDescriptor::AddReductionVar(
// A reduction operation must only have one use of the reduction value.
if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
- !isSelectCmpRecurrenceKind(Kind) &&
- hasMultipleUsesOf(Cur, VisitedInsts, 1))
+ !isAnyOfRecurrenceKind(Kind) && hasMultipleUsesOf(Cur, VisitedInsts, 1))
return false;
// All inputs to a PHI node must be a reduction value.
if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
return false;
- if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) &&
+ if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::IAnyOf) &&
(isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
- if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) &&
+ if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::FAnyOf) &&
(isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
@@ -488,7 +485,7 @@ bool RecurrenceDescriptor::AddReductionVar(
((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
!isa<SelectInst>(UI)) ||
(!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
- !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal)
+ !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal)
.isRecurrence() &&
!isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
return false;
@@ -508,7 +505,7 @@ bool RecurrenceDescriptor::AddReductionVar(
NumCmpSelectPatternInst != 0)
return false;
- if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
+ if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
return false;
if (IntermediateStore) {
@@ -628,8 +625,8 @@ bool RecurrenceDescriptor::AddReductionVar(
// value if nothing changed (0 in the example above) or the other selected
// value (3 in the example above).
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
- Instruction *I, InstDesc &Prev) {
+RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
+ Instruction *I, InstDesc &Prev) {
// We must handle the select(cmp(),x,y) as a single instruction. Advance to
// the select.
CmpInst::Predicate Pred;
@@ -659,8 +656,8 @@ RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
if (!Loop->isLoopInvariant(NonPhi))
return InstDesc(false, I);
- return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
- : RecurKind::SelectFCmp);
+ return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
+ : RecurKind::FAnyOf);
}
RecurrenceDescriptor::InstDesc
@@ -803,8 +800,8 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Call:
- if (isSelectCmpRecurrenceKind(Kind))
- return isSelectCmpPattern(L, OrigPhi, I, Prev);
+ if (isAnyOfRecurrenceKind(Kind))
+ return isAnyOfPattern(L, OrigPhi, I, Prev);
auto HasRequiredFMF = [&]() {
if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
return true;
@@ -897,8 +894,8 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
- DT, SE)) {
+ if (AddReductionVar(Phi, RecurKind::IAnyOf, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
<< *Phi << "\n");
return true;
@@ -923,8 +920,8 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
- DT, SE)) {
+ if (AddReductionVar(Phi, RecurKind::FAnyOf, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
<< " PHI." << *Phi << "\n");
return true;
@@ -1088,8 +1085,8 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
return ConstantFP::getInfinity(Tp, false /*Negative*/);
case RecurKind::FMaximum:
return ConstantFP::getInfinity(Tp, true /*Negative*/);
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
+ case RecurKind::IAnyOf:
+ case RecurKind::FAnyOf:
return getRecurrenceStartValue();
break;
default:
@@ -1118,13 +1115,13 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
- case RecurKind::SelectICmp:
+ case RecurKind::IAnyOf:
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMin:
case RecurKind::FMaximum:
case RecurKind::FMinimum:
- case RecurKind::SelectFCmp:
+ case RecurKind::FAnyOf:
return Instruction::FCmp;
default:
llvm_unreachable("Unknown recurrence operation");
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
index a2f46edcf5ef..7096e06d925a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
@@ -88,10 +88,21 @@ static cl::opt<bool> InlineEnableCostBenefitAnalysis(
"inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
cl::desc("Enable the cost-benefit analysis for the inliner"));
+// InlineSavingsMultiplier overrides per TTI multipliers iff it is
+// specified explicitly in command line options. This option is exposed
+// for tuning and testing.
static cl::opt<int> InlineSavingsMultiplier(
"inline-savings-multiplier", cl::Hidden, cl::init(8),
cl::desc("Multiplier to multiply cycle savings by during inlining"));
+// InlineSavingsProfitableMultiplier overrides per TTI multipliers iff it is
+// specified explicitly in command line options. This option is exposed
+// for tuning and testing.
+static cl::opt<int> InlineSavingsProfitableMultiplier(
+ "inline-savings-profitable-multiplier", cl::Hidden, cl::init(4),
+ cl::desc("A multiplier on top of cycle savings to decide whether the "
+ "savings won't justify the cost"));
+
static cl::opt<int>
InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
cl::desc("The maximum size of a callee that get's "
@@ -118,7 +129,7 @@ static cl::opt<int> ColdCallSiteRelFreq(
"entry frequency, for a callsite to be cold in the absence of "
"profile information."));
-static cl::opt<int> HotCallSiteRelFreq(
+static cl::opt<uint64_t> HotCallSiteRelFreq(
"hot-callsite-rel-freq", cl::Hidden, cl::init(60),
cl::desc("Minimum block frequency, expressed as a multiple of caller's "
"entry frequency, for a callsite to be hot in the absence of "
@@ -612,8 +623,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
/// Handle a capped 'int' increment for Cost.
void addCost(int64_t Inc) {
- Inc = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc), INT_MIN);
- Cost = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc + Cost), INT_MIN);
+ Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
+ Cost = std::clamp<int64_t>(Inc + Cost, INT_MIN, INT_MAX);
}
void onDisableSROA(AllocaInst *Arg) override {
@@ -684,7 +695,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
}
} else
// Otherwise simply add the cost for merely making the call.
- addCost(CallPenalty);
+ addCost(TTI.getInlineCallPenalty(CandidateCall.getCaller(), Call,
+ CallPenalty));
}
void onFinalizeSwitch(unsigned JumpTableSize,
@@ -787,7 +799,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
return false;
} else {
// Otherwise, require instrumentation profile.
- if (!PSI->hasInstrumentationProfile())
+ if (!(PSI->hasInstrumentationProfile() || PSI->hasSampleProfile()))
return false;
}
@@ -815,9 +827,35 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
return true;
}
+ // A helper function to choose between command line override and default.
+ unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {
+ if (InlineSavingsMultiplier.getNumOccurrences())
+ return InlineSavingsMultiplier;
+ return TTI.getInliningCostBenefitAnalysisSavingsMultiplier();
+ }
+
+ // A helper function to choose between command line override and default.
+ unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
+ if (InlineSavingsProfitableMultiplier.getNumOccurrences())
+ return InlineSavingsProfitableMultiplier;
+ return TTI.getInliningCostBenefitAnalysisProfitableMultiplier();
+ }
+
+ void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) {
+ if (std::optional<int> AttrCycleSavings = getStringFnAttrAsInt(
+ CandidateCall, "inline-cycle-savings-for-test")) {
+ CycleSavings = *AttrCycleSavings;
+ }
+
+ if (std::optional<int> AttrRuntimeCost = getStringFnAttrAsInt(
+ CandidateCall, "inline-runtime-cost-for-test")) {
+ Size = *AttrRuntimeCost;
+ }
+ }
+
// Determine whether we should inline the given call site, taking into account
// both the size cost and the cycle savings. Return std::nullopt if we don't
- // have suficient profiling information to determine.
+ // have sufficient profiling information to determine.
std::optional<bool> costBenefitAnalysis() {
if (!CostBenefitAnalysisEnabled)
return std::nullopt;
@@ -855,6 +893,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
SimplifiedValues.lookup(BI->getCondition()))) {
CurrentSavings += InstrCost;
}
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
+ if (isa_and_present<ConstantInt>(SimplifiedValues.lookup(SI->getCondition())))
+ CurrentSavings += InstrCost;
} else if (Value *V = dyn_cast<Value>(&I)) {
// Count an instruction as savings if we can fold it.
if (SimplifiedValues.count(V)) {
@@ -878,32 +919,58 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// Compute the total savings for the call site.
auto *CallerBB = CandidateCall.getParent();
BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
- CycleSavings += getCallsiteCost(this->CandidateCall, DL);
+ CycleSavings += getCallsiteCost(TTI, this->CandidateCall, DL);
CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
- // Remove the cost of the cold basic blocks.
+ // Remove the cost of the cold basic blocks to model the runtime cost more
+ // accurately. Both machine block placement and function splitting could
+ // place cold blocks further from hot blocks.
int Size = Cost - ColdSize;
// Allow tiny callees to be inlined regardless of whether they meet the
// savings threshold.
Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1;
+ OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size);
CostBenefit.emplace(APInt(128, Size), CycleSavings);
- // Return true if the savings justify the cost of inlining. Specifically,
- // we evaluate the following inequality:
+ // Let R be the ratio of CycleSavings to Size. We accept the inlining
+ // opportunity if R is really high and reject if R is really low. If R is
+ // somewhere in the middle, we fall back to the cost-based analysis.
//
- // CycleSavings PSI->getOrCompHotCountThreshold()
- // -------------- >= -----------------------------------
- // Size InlineSavingsMultiplier
+ // Specifically, let R = CycleSavings / Size, we accept the inlining
+ // opportunity if:
//
- // Note that the left hand side is specific to a call site. The right hand
- // side is a constant for the entire executable.
- APInt LHS = CycleSavings;
- LHS *= InlineSavingsMultiplier;
- APInt RHS(128, PSI->getOrCompHotCountThreshold());
- RHS *= Size;
- return LHS.uge(RHS);
+ // PSI->getOrCompHotCountThreshold()
+ // R > -------------------------------------------------
+ // getInliningCostBenefitAnalysisSavingsMultiplier()
+ //
+ // and reject the inlining opportunity if:
+ //
+ // PSI->getOrCompHotCountThreshold()
+ // R <= ----------------------------------------------------
+ // getInliningCostBenefitAnalysisProfitableMultiplier()
+ //
+ // Otherwise, we fall back to the cost-based analysis.
+ //
+ // Implementation-wise, use multiplication (CycleSavings * Multiplier,
+ // HotCountThreshold * Size) rather than division to avoid precision loss.
+ APInt Threshold(128, PSI->getOrCompHotCountThreshold());
+ Threshold *= Size;
+
+ APInt UpperBoundCycleSavings = CycleSavings;
+ UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
+ if (UpperBoundCycleSavings.uge(Threshold))
+ return true;
+
+ APInt LowerBoundCycleSavings = CycleSavings;
+ LowerBoundCycleSavings *=
+ getInliningCostBenefitAnalysisProfitableMultiplier();
+ if (LowerBoundCycleSavings.ult(Threshold))
+ return false;
+
+ // Otherwise, fall back to the cost-based analysis.
+ return std::nullopt;
}
InlineResult finalizeAnalysis() override {
@@ -1010,7 +1077,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// Give out bonuses for the callsite, as the instructions setting them up
// will be gone after inlining.
- addCost(-getCallsiteCost(this->CandidateCall, DL));
+ addCost(-getCallsiteCost(TTI, this->CandidateCall, DL));
// If this function uses the coldcc calling convention, prefer not to inline
// it.
@@ -1249,7 +1316,7 @@ private:
InlineResult onAnalysisStart() override {
increment(InlineCostFeatureIndex::callsite_cost,
- -1 * getCallsiteCost(this->CandidateCall, DL));
+ -1 * getCallsiteCost(TTI, this->CandidateCall, DL));
set(InlineCostFeatureIndex::cold_cc_penalty,
(F.getCallingConv() == CallingConv::Cold));
@@ -1820,10 +1887,11 @@ InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
// potentially cache the computation of scaled entry frequency, but the added
// complexity is not worth it unless this scaling shows up high in the
// profiles.
- auto CallSiteBB = Call.getParent();
- auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
- auto CallerEntryFreq = CallerBFI->getEntryFreq();
- if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
+ const BasicBlock *CallSiteBB = Call.getParent();
+ BlockFrequency CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
+ BlockFrequency CallerEntryFreq = CallerBFI->getEntryFreq();
+ std::optional<BlockFrequency> Limit = CallerEntryFreq.mul(HotCallSiteRelFreq);
+ if (Limit && CallSiteFreq >= *Limit)
return Params.LocallyHotCallSiteThreshold;
// Otherwise treat it normally.
@@ -2820,7 +2888,8 @@ static bool functionsHaveCompatibleAttributes(
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
}
-int llvm::getCallsiteCost(const CallBase &Call, const DataLayout &DL) {
+int llvm::getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
+ const DataLayout &DL) {
int64_t Cost = 0;
for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
if (Call.isByValArgument(I)) {
@@ -2850,7 +2919,8 @@ int llvm::getCallsiteCost(const CallBase &Call, const DataLayout &DL) {
}
// The call instruction also disappears after inlining.
Cost += InstrCost;
- Cost += CallPenalty;
+ Cost += TTI.getInlineCallPenalty(Call.getCaller(), Call, CallPenalty);
+
return std::min<int64_t>(Cost, INT_MAX);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineOrder.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineOrder.cpp
index 3b85820d7b8f..d6acafdc6ab8 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InlineOrder.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InlineOrder.cpp
@@ -36,7 +36,7 @@ static cl::opt<InlinePriorityMode> UseInlinePriority(
clEnumValN(InlinePriorityMode::ML, "ml", "Use ML.")));
static cl::opt<int> ModuleInlinerTopPriorityThreshold(
- "moudle-inliner-top-priority-threshold", cl::Hidden, cl::init(0),
+ "module-inliner-top-priority-threshold", cl::Hidden, cl::init(0),
cl::desc("The cost threshold for call sites that get inlined without the "
"cost-benefit analysis"));
@@ -218,14 +218,15 @@ class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
// A call site could become less desirable for inlining because of the size
// growth from prior inlining into the callee. This method is used to lazily
// update the desirability of a call site if it's decreasing. It is only
- // called on pop() or front(), not every time the desirability changes. When
- // the desirability of the front call site decreases, an updated one would be
- // pushed right back into the heap. For simplicity, those cases where
- // the desirability of a call site increases are ignored here.
- void adjust() {
- while (updateAndCheckDecreased(Heap.front())) {
- std::pop_heap(Heap.begin(), Heap.end(), isLess);
+ // called on pop(), not every time the desirability changes. When the
+ // desirability of the front call site decreases, an updated one would be
+ // pushed right back into the heap. For simplicity, those cases where the
+ // desirability of a call site increases are ignored here.
+ void pop_heap_adjust() {
+ std::pop_heap(Heap.begin(), Heap.end(), isLess);
+ while (updateAndCheckDecreased(Heap.back())) {
std::push_heap(Heap.begin(), Heap.end(), isLess);
+ std::pop_heap(Heap.begin(), Heap.end(), isLess);
}
}
@@ -251,13 +252,11 @@ public:
T pop() override {
assert(size() > 0);
- adjust();
+ pop_heap_adjust();
- CallBase *CB = Heap.front();
+ CallBase *CB = Heap.pop_back_val();
T Result = std::make_pair(CB, InlineHistoryMap[CB]);
InlineHistoryMap.erase(CB);
- std::pop_heap(Heap.begin(), Heap.end(), isLess);
- Heap.pop_back();
return Result;
}
@@ -317,4 +316,4 @@ llvm::getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params,
M);
}
return getDefaultInlineOrder(FAM, Params, MAM, M);
-} \ No newline at end of file
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
index 8366bee083f2..d427d3eeaa9e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
@@ -15,8 +15,6 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstVisitor.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -63,32 +61,3 @@ PreservedAnalyses InstCountPass::run(Function &F,
return PreservedAnalyses::all();
}
-
-namespace {
-class InstCountLegacyPass : public FunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
- InstCountLegacyPass() : FunctionPass(ID) {
- initializeInstCountLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- LLVM_DEBUG(dbgs() << "INSTCOUNT: running on function " << F.getName()
- << "\n");
- InstCount().visit(F);
- return false;
- };
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-
- void print(raw_ostream &O, const Module *M) const override {}
-};
-} // namespace
-
-char InstCountLegacyPass::ID = 0;
-INITIALIZE_PASS(InstCountLegacyPass, "instcount",
- "Counts the various types of Instructions", false, true)
-
-FunctionPass *llvm::createInstCountPass() { return new InstCountLegacyPass(); }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
index 0bfea6140ab5..2a45acf63aa2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -811,7 +811,7 @@ static Value *simplifySubInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
if (IsNUW)
return Constant::getNullValue(Op0->getType());
- KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits Known = computeKnownBits(Op1, /* Depth */ 0, Q);
if (Known.Zero.isMaxSignedValue()) {
// Op1 is either 0 or the minimum signed value. If the sub is NSW, then
// Op1 must be 0 because negating the minimum signed value is undefined.
@@ -895,7 +895,8 @@ static Value *simplifySubInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
// Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y))))
if (Constant *Result = computePointerDifference(Q.DL, X, Y))
- return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
+ return ConstantFoldIntegerCast(Result, Op0->getType(), /*IsSigned*/ true,
+ Q.DL);
// i1 sub -> xor.
if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
@@ -1062,7 +1063,7 @@ static bool isDivZero(Value *X, Value *Y, const SimplifyQuery &Q,
// ("computeConstantRangeIncludingKnownBits")?
const APInt *C;
if (match(Y, m_APInt(C)) &&
- computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, Q.DT).getMaxValue().ult(*C))
+ computeKnownBits(X, /* Depth */ 0, Q).getMaxValue().ult(*C))
return true;
// Try again for any divisor:
@@ -1124,8 +1125,7 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
if (Op0 == Op1)
return IsDiv ? ConstantInt::get(Ty, 1) : Constant::getNullValue(Ty);
-
- KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits Known = computeKnownBits(Op1, /* Depth */ 0, Q);
// X / 0 -> poison
// X % 0 -> poison
// If the divisor is known to be zero, just return poison. This can happen in
@@ -1194,7 +1194,7 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// less trailing zeros, then the result must be poison.
const APInt *DivC;
if (IsExact && match(Op1, m_APInt(DivC)) && DivC->countr_zero()) {
- KnownBits KnownOp0 = computeKnownBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits KnownOp0 = computeKnownBits(Op0, /* Depth */ 0, Q);
if (KnownOp0.countMaxTrailingZeros() < DivC->countr_zero())
return PoisonValue::get(Op0->getType());
}
@@ -1354,7 +1354,7 @@ static Value *simplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
// If any bits in the shift amount make that value greater than or equal to
// the number of bits in the type, the shift is undefined.
- KnownBits KnownAmt = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits KnownAmt = computeKnownBits(Op1, /* Depth */ 0, Q);
if (KnownAmt.getMinValue().uge(KnownAmt.getBitWidth()))
return PoisonValue::get(Op0->getType());
@@ -1367,7 +1367,7 @@ static Value *simplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
// Check for nsw shl leading to a poison value.
if (IsNSW) {
assert(Opcode == Instruction::Shl && "Expected shl for nsw instruction");
- KnownBits KnownVal = computeKnownBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits KnownVal = computeKnownBits(Op0, /* Depth */ 0, Q);
KnownBits KnownShl = KnownBits::shl(KnownVal, KnownAmt);
if (KnownVal.Zero.isSignBitSet())
@@ -1403,8 +1403,7 @@ static Value *simplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
// The low bit cannot be shifted out of an exact shift if it is set.
// TODO: Generalize by counting trailing zeros (see fold for exact division).
if (IsExact) {
- KnownBits Op0Known =
- computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits Op0Known = computeKnownBits(Op0, /* Depth */ 0, Q);
if (Op0Known.One[0])
return Op0;
}
@@ -1463,7 +1462,7 @@ static Value *simplifyLShrInst(Value *Op0, Value *Op1, bool IsExact,
// (X << A) >> A -> X
Value *X;
- if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
+ if (Q.IIQ.UseInstrInfo && match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
return X;
// ((X << A) | Y) >> A -> X if effective width of Y is not larger than A.
@@ -1473,10 +1472,10 @@ static Value *simplifyLShrInst(Value *Op0, Value *Op1, bool IsExact,
// optimizers by supporting a simple but common case in InstSimplify.
Value *Y;
const APInt *ShRAmt, *ShLAmt;
- if (match(Op1, m_APInt(ShRAmt)) &&
+ if (Q.IIQ.UseInstrInfo && match(Op1, m_APInt(ShRAmt)) &&
match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) &&
*ShRAmt == *ShLAmt) {
- const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ const KnownBits YKnown = computeKnownBits(Y, /* Depth */ 0, Q);
const unsigned EffWidthY = YKnown.countMaxActiveBits();
if (ShRAmt->uge(EffWidthY))
return X;
@@ -1673,43 +1672,6 @@ static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
}
-static Value *simplifyAndOrOfICmpsWithZero(ICmpInst *Cmp0, ICmpInst *Cmp1,
- bool IsAnd) {
- ICmpInst::Predicate P0 = Cmp0->getPredicate(), P1 = Cmp1->getPredicate();
- if (!match(Cmp0->getOperand(1), m_Zero()) ||
- !match(Cmp1->getOperand(1), m_Zero()) || P0 != P1)
- return nullptr;
-
- if ((IsAnd && P0 != ICmpInst::ICMP_NE) || (!IsAnd && P1 != ICmpInst::ICMP_EQ))
- return nullptr;
-
- // We have either "(X == 0 || Y == 0)" or "(X != 0 && Y != 0)".
- Value *X = Cmp0->getOperand(0);
- Value *Y = Cmp1->getOperand(0);
-
- // If one of the compares is a masked version of a (not) null check, then
- // that compare implies the other, so we eliminate the other. Optionally, look
- // through a pointer-to-int cast to match a null check of a pointer type.
-
- // (X == 0) || (([ptrtoint] X & ?) == 0) --> ([ptrtoint] X & ?) == 0
- // (X == 0) || ((? & [ptrtoint] X) == 0) --> (? & [ptrtoint] X) == 0
- // (X != 0) && (([ptrtoint] X & ?) != 0) --> ([ptrtoint] X & ?) != 0
- // (X != 0) && ((? & [ptrtoint] X) != 0) --> (? & [ptrtoint] X) != 0
- if (match(Y, m_c_And(m_Specific(X), m_Value())) ||
- match(Y, m_c_And(m_PtrToInt(m_Specific(X)), m_Value())))
- return Cmp1;
-
- // (([ptrtoint] Y & ?) == 0) || (Y == 0) --> ([ptrtoint] Y & ?) == 0
- // ((? & [ptrtoint] Y) == 0) || (Y == 0) --> (? & [ptrtoint] Y) == 0
- // (([ptrtoint] Y & ?) != 0) && (Y != 0) --> ([ptrtoint] Y & ?) != 0
- // ((? & [ptrtoint] Y) != 0) && (Y != 0) --> (? & [ptrtoint] Y) != 0
- if (match(X, m_c_And(m_Specific(Y), m_Value())) ||
- match(X, m_c_And(m_PtrToInt(m_Specific(Y)), m_Value())))
- return Cmp0;
-
- return nullptr;
-}
-
static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
const InstrInfoQuery &IIQ) {
// (icmp (add V, C0), C1) & (icmp V, C0)
@@ -1757,66 +1719,6 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
return nullptr;
}
-/// Try to eliminate compares with signed or unsigned min/max constants.
-static Value *simplifyAndOrOfICmpsWithLimitConst(ICmpInst *Cmp0, ICmpInst *Cmp1,
- bool IsAnd) {
- // Canonicalize an equality compare as Cmp0.
- if (Cmp1->isEquality())
- std::swap(Cmp0, Cmp1);
- if (!Cmp0->isEquality())
- return nullptr;
-
- // The non-equality compare must include a common operand (X). Canonicalize
- // the common operand as operand 0 (the predicate is swapped if the common
- // operand was operand 1).
- ICmpInst::Predicate Pred0 = Cmp0->getPredicate();
- Value *X = Cmp0->getOperand(0);
- ICmpInst::Predicate Pred1;
- bool HasNotOp = match(Cmp1, m_c_ICmp(Pred1, m_Not(m_Specific(X)), m_Value()));
- if (!HasNotOp && !match(Cmp1, m_c_ICmp(Pred1, m_Specific(X), m_Value())))
- return nullptr;
- if (ICmpInst::isEquality(Pred1))
- return nullptr;
-
- // The equality compare must be against a constant. Flip bits if we matched
- // a bitwise not. Convert a null pointer constant to an integer zero value.
- APInt MinMaxC;
- const APInt *C;
- if (match(Cmp0->getOperand(1), m_APInt(C)))
- MinMaxC = HasNotOp ? ~*C : *C;
- else if (isa<ConstantPointerNull>(Cmp0->getOperand(1)))
- MinMaxC = APInt::getZero(8);
- else
- return nullptr;
-
- // DeMorganize if this is 'or': P0 || P1 --> !P0 && !P1.
- if (!IsAnd) {
- Pred0 = ICmpInst::getInversePredicate(Pred0);
- Pred1 = ICmpInst::getInversePredicate(Pred1);
- }
-
- // Normalize to unsigned compare and unsigned min/max value.
- // Example for 8-bit: -128 + 128 -> 0; 127 + 128 -> 255
- if (ICmpInst::isSigned(Pred1)) {
- Pred1 = ICmpInst::getUnsignedPredicate(Pred1);
- MinMaxC += APInt::getSignedMinValue(MinMaxC.getBitWidth());
- }
-
- // (X != MAX) && (X < Y) --> X < Y
- // (X == MAX) || (X >= Y) --> X >= Y
- if (MinMaxC.isMaxValue())
- if (Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT)
- return Cmp1;
-
- // (X != MIN) && (X > Y) --> X > Y
- // (X == MIN) || (X <= Y) --> X <= Y
- if (MinMaxC.isMinValue())
- if (Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_UGT)
- return Cmp1;
-
- return nullptr;
-}
-
/// Try to simplify and/or of icmp with ctpop intrinsic.
static Value *simplifyAndOrOfICmpsWithCtpop(ICmpInst *Cmp0, ICmpInst *Cmp1,
bool IsAnd) {
@@ -1848,12 +1750,6 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true))
return X;
- if (Value *X = simplifyAndOrOfICmpsWithLimitConst(Op0, Op1, true))
- return X;
-
- if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
- return X;
-
if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op0, Op1, true))
return X;
if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op1, Op0, true))
@@ -1924,12 +1820,6 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false))
return X;
- if (Value *X = simplifyAndOrOfICmpsWithLimitConst(Op0, Op1, false))
- return X;
-
- if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
- return X;
-
if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op0, Op1, false))
return X;
if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op1, Op0, false))
@@ -2019,7 +1909,60 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, Value *Op0,
// If we looked through casts, we can only handle a constant simplification
// because we are not allowed to create a cast instruction here.
if (auto *C = dyn_cast<Constant>(V))
- return ConstantExpr::getCast(Cast0->getOpcode(), C, Cast0->getType());
+ return ConstantFoldCastOperand(Cast0->getOpcode(), C, Cast0->getType(),
+ Q.DL);
+
+ return nullptr;
+}
+
+static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+ const SimplifyQuery &Q,
+ bool AllowRefinement,
+ SmallVectorImpl<Instruction *> *DropFlags,
+ unsigned MaxRecurse);
+
+static Value *simplifyAndOrWithICmpEq(unsigned Opcode, Value *Op0, Value *Op1,
+ const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ "Must be and/or");
+ ICmpInst::Predicate Pred;
+ Value *A, *B;
+ if (!match(Op0, m_ICmp(Pred, m_Value(A), m_Value(B))) ||
+ !ICmpInst::isEquality(Pred))
+ return nullptr;
+
+ auto Simplify = [&](Value *Res) -> Value * {
+ Constant *Absorber = ConstantExpr::getBinOpAbsorber(Opcode, Res->getType());
+
+ // and (icmp eq a, b), x implies (a==b) inside x.
+ // or (icmp ne a, b), x implies (a==b) inside x.
+ // If x simplifies to true/false, we can simplify the and/or.
+ if (Pred ==
+ (Opcode == Instruction::And ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
+ if (Res == Absorber)
+ return Absorber;
+ if (Res == ConstantExpr::getBinOpIdentity(Opcode, Res->getType()))
+ return Op0;
+ return nullptr;
+ }
+
+ // If we have and (icmp ne a, b), x and for a==b we can simplify x to false,
+ // then we can drop the icmp, as x will already be false in the case where
+ // the icmp is false. Similar for or and true.
+ if (Res == Absorber)
+ return Op1;
+ return nullptr;
+ };
+
+ if (Value *Res =
+ simplifyWithOpReplaced(Op1, A, B, Q, /* AllowRefinement */ true,
+ /* DropFlags */ nullptr, MaxRecurse))
+ return Simplify(Res);
+ if (Value *Res =
+ simplifyWithOpReplaced(Op1, B, A, Q, /* AllowRefinement */ true,
+ /* DropFlags */ nullptr, MaxRecurse))
+ return Simplify(Res);
return nullptr;
}
@@ -2048,6 +1991,58 @@ static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1,
return nullptr;
}
+// Commutative patterns for and that will be tried with both operand orders.
+static Value *simplifyAndCommutative(Value *Op0, Value *Op1,
+ const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ // ~A & A = 0
+ if (match(Op0, m_Not(m_Specific(Op1))))
+ return Constant::getNullValue(Op0->getType());
+
+ // (A | ?) & A = A
+ if (match(Op0, m_c_Or(m_Specific(Op1), m_Value())))
+ return Op1;
+
+ // (X | ~Y) & (X | Y) --> X
+ Value *X, *Y;
+ if (match(Op0, m_c_Or(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(Op1, m_c_Or(m_Deferred(X), m_Deferred(Y))))
+ return X;
+
+ // If we have a multiplication overflow check that is being 'and'ed with a
+ // check that one of the multipliers is not zero, we can omit the 'and', and
+ // only keep the overflow check.
+ if (isCheckForZeroAndMulWithOverflow(Op0, Op1, true))
+ return Op1;
+
+ // -A & A = A if A is a power of two or zero.
+ if (match(Op0, m_Neg(m_Specific(Op1))) &&
+ isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ return Op1;
+
+ // This is a similar pattern used for checking if a value is a power-of-2:
+ // (A - 1) & A --> 0 (if A is a power-of-2 or 0)
+ if (match(Op0, m_Add(m_Specific(Op1), m_AllOnes())) &&
+ isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ return Constant::getNullValue(Op1->getType());
+
+ // (x << N) & ((x << M) - 1) --> 0, where x is known to be a power of 2 and
+ // M <= N.
+ const APInt *Shift1, *Shift2;
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(Shift1))) &&
+ match(Op1, m_Add(m_Shl(m_Specific(X), m_APInt(Shift2)), m_AllOnes())) &&
+ isKnownToBeAPowerOfTwo(X, Q.DL, /*OrZero*/ true, /*Depth*/ 0, Q.AC,
+ Q.CxtI) &&
+ Shift1->uge(*Shift2))
+ return Constant::getNullValue(Op0->getType());
+
+ if (Value *V =
+ simplifyAndOrWithICmpEq(Instruction::And, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ return nullptr;
+}
+
/// Given operands for an And, see if we can fold the result.
/// If not, this returns null.
static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -2075,26 +2070,10 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (match(Op1, m_AllOnes()))
return Op0;
- // A & ~A = ~A & A = 0
- if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0))))
- return Constant::getNullValue(Op0->getType());
-
- // (A | ?) & A = A
- if (match(Op0, m_c_Or(m_Specific(Op1), m_Value())))
- return Op1;
-
- // A & (A | ?) = A
- if (match(Op1, m_c_Or(m_Specific(Op0), m_Value())))
- return Op0;
-
- // (X | Y) & (X | ~Y) --> X (commuted 8 ways)
- Value *X, *Y;
- if (match(Op0, m_c_Or(m_Value(X), m_Not(m_Value(Y)))) &&
- match(Op1, m_c_Or(m_Deferred(X), m_Deferred(Y))))
- return X;
- if (match(Op1, m_c_Or(m_Value(X), m_Not(m_Value(Y)))) &&
- match(Op0, m_c_Or(m_Deferred(X), m_Deferred(Y))))
- return X;
+ if (Value *Res = simplifyAndCommutative(Op0, Op1, Q, MaxRecurse))
+ return Res;
+ if (Value *Res = simplifyAndCommutative(Op1, Op0, Q, MaxRecurse))
+ return Res;
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And))
return V;
@@ -2102,6 +2081,7 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// A mask that only clears known zeros of a shifted value is a no-op.
const APInt *Mask;
const APInt *ShAmt;
+ Value *X, *Y;
if (match(Op1, m_APInt(Mask))) {
// If all bits in the inverted and shifted mask are clear:
// and (shl X, ShAmt), Mask --> shl X, ShAmt
@@ -2116,35 +2096,19 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op0;
}
- // If we have a multiplication overflow check that is being 'and'ed with a
- // check that one of the multipliers is not zero, we can omit the 'and', and
- // only keep the overflow check.
- if (isCheckForZeroAndMulWithOverflow(Op0, Op1, true))
- return Op1;
- if (isCheckForZeroAndMulWithOverflow(Op1, Op0, true))
- return Op0;
-
- // A & (-A) = A if A is a power of two or zero.
- if (match(Op0, m_Neg(m_Specific(Op1))) ||
- match(Op1, m_Neg(m_Specific(Op0)))) {
- if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
- Q.DT))
- return Op0;
- if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
- Q.DT))
- return Op1;
+ // and 2^x-1, 2^C --> 0 where x <= C.
+ const APInt *PowerC;
+ Value *Shift;
+ if (match(Op1, m_Power2(PowerC)) &&
+ match(Op0, m_Add(m_Value(Shift), m_AllOnes())) &&
+ isKnownToBeAPowerOfTwo(Shift, Q.DL, /*OrZero*/ false, 0, Q.AC, Q.CxtI,
+ Q.DT)) {
+ KnownBits Known = computeKnownBits(Shift, /* Depth */ 0, Q);
+ // Use getActiveBits() to make use of the additional power of two knowledge
+ if (PowerC->getActiveBits() >= Known.getMaxValue().getActiveBits())
+ return ConstantInt::getNullValue(Op1->getType());
}
- // This is a similar pattern used for checking if a value is a power-of-2:
- // (A - 1) & A --> 0 (if A is a power-of-2 or 0)
- // A & (A - 1) --> 0 (if A is a power-of-2 or 0)
- if (match(Op0, m_Add(m_Specific(Op1), m_AllOnes())) &&
- isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
- return Constant::getNullValue(Op1->getType());
- if (match(Op1, m_Add(m_Specific(Op0), m_AllOnes())) &&
- isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
- return Constant::getNullValue(Op0->getType());
-
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
return V;
@@ -2197,16 +2161,16 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// SimplifyDemandedBits in InstCombine can optimize the general case.
// This pattern aims to help other passes for a common case.
Value *XShifted;
- if (match(Op1, m_APInt(Mask)) &&
+ if (Q.IIQ.UseInstrInfo && match(Op1, m_APInt(Mask)) &&
match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
m_Value(XShifted)),
m_Value(Y)))) {
const unsigned Width = Op0->getType()->getScalarSizeInBits();
const unsigned ShftCnt = ShAmt->getLimitedValue(Width);
- const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ const KnownBits YKnown = computeKnownBits(Y, /* Depth */ 0, Q);
const unsigned EffWidthY = YKnown.countMaxActiveBits();
if (EffWidthY <= ShftCnt) {
- const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ const KnownBits XKnown = computeKnownBits(X, /* Depth */ 0, Q);
const unsigned EffWidthX = XKnown.countMaxActiveBits();
const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
@@ -2421,6 +2385,13 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_LShr(m_Specific(X), m_Specific(Y))))
return Op1;
+ if (Value *V =
+ simplifyAndOrWithICmpEq(Instruction::Or, Op0, Op1, Q, MaxRecurse))
+ return V;
+ if (Value *V =
+ simplifyAndOrWithICmpEq(Instruction::Or, Op1, Op0, Q, MaxRecurse))
+ return V;
+
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
@@ -2472,13 +2443,13 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (C2->isMask() && // C2 == 0+1+
match(A, m_c_Add(m_Specific(B), m_Value(N)))) {
// Add commutes, try both ways.
- if (MaskedValueIsZero(N, *C2, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (MaskedValueIsZero(N, *C2, Q))
return A;
}
// Or commutes, try both ways.
if (C1->isMask() && match(B, m_c_Add(m_Specific(A), m_Value(N)))) {
// Add commutes, try both ways.
- if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (MaskedValueIsZero(N, *C1, Q))
return B;
}
}
@@ -2722,13 +2693,6 @@ static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS,
const TargetLibraryInfo *TLI = Q.TLI;
const DominatorTree *DT = Q.DT;
const Instruction *CxtI = Q.CxtI;
- const InstrInfoQuery &IIQ = Q.IIQ;
-
- // A non-null pointer is not equal to a null pointer.
- if (isa<ConstantPointerNull>(RHS) && ICmpInst::isEquality(Pred) &&
- llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr,
- IIQ.UseInstrInfo))
- return ConstantInt::get(getCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred));
// We can only fold certain predicates on pointer comparisons.
switch (Pred) {
@@ -3002,7 +2966,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT: {
- KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits LHSKnown = computeKnownBits(LHS, /* Depth */ 0, Q);
if (LHSKnown.isNegative())
return getTrue(ITy);
if (LHSKnown.isNonNegative())
@@ -3010,7 +2974,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
break;
}
case ICmpInst::ICMP_SLE: {
- KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits LHSKnown = computeKnownBits(LHS, /* Depth */ 0, Q);
if (LHSKnown.isNegative())
return getTrue(ITy);
if (LHSKnown.isNonNegative() &&
@@ -3019,7 +2983,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
break;
}
case ICmpInst::ICMP_SGE: {
- KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits LHSKnown = computeKnownBits(LHS, /* Depth */ 0, Q);
if (LHSKnown.isNegative())
return getFalse(ITy);
if (LHSKnown.isNonNegative())
@@ -3027,7 +2991,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
break;
}
case ICmpInst::ICMP_SGT: {
- KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits LHSKnown = computeKnownBits(LHS, /* Depth */ 0, Q);
if (LHSKnown.isNegative())
return getFalse(ITy);
if (LHSKnown.isNonNegative() &&
@@ -3079,7 +3043,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
// (mul nuw/nsw X, MulC) != C --> true (if C is not a multiple of MulC)
// (mul nuw/nsw X, MulC) == C --> false (if C is not a multiple of MulC)
const APInt *MulC;
- if (ICmpInst::isEquality(Pred) &&
+ if (IIQ.UseInstrInfo && ICmpInst::isEquality(Pred) &&
((match(LHS, m_NUWMul(m_Value(), m_APIntAllowUndef(MulC))) &&
*MulC != 0 && C->urem(*MulC) != 0) ||
(match(LHS, m_NSWMul(m_Value(), m_APIntAllowUndef(MulC))) &&
@@ -3104,8 +3068,8 @@ static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred,
return getTrue(ITy);
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
- KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits RHSKnown = computeKnownBits(RHS, /* Depth */ 0, Q);
+ KnownBits YKnown = computeKnownBits(Y, /* Depth */ 0, Q);
if (RHSKnown.isNonNegative() && YKnown.isNegative())
return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy);
if (RHSKnown.isNegative() || YKnown.isNonNegative())
@@ -3128,7 +3092,7 @@ static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE: {
- KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits Known = computeKnownBits(RHS, /* Depth */ 0, Q);
if (!Known.isNonNegative())
break;
[[fallthrough]];
@@ -3139,7 +3103,7 @@ static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred,
return getFalse(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE: {
- KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits Known = computeKnownBits(RHS, /* Depth */ 0, Q);
if (!Known.isNonNegative())
break;
[[fallthrough]];
@@ -3247,9 +3211,9 @@ static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred,
// *) C2 < C1 && C1 <= 0.
//
static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS) {
+ Value *RHS, const InstrInfoQuery &IIQ) {
// TODO: only support icmp slt for now.
- if (Pred != CmpInst::ICMP_SLT)
+ if (Pred != CmpInst::ICMP_SLT || !IIQ.UseInstrInfo)
return false;
// Canonicalize nsw add as RHS.
@@ -3318,7 +3282,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
bool CanSimplify = (NoLHSWrapProblem && NoRHSWrapProblem) ||
- trySimplifyICmpWithAdds(Pred, LHS, RHS);
+ trySimplifyICmpWithAdds(Pred, LHS, RHS, Q.IIQ);
if (A && C && (A == C || A == D || B == C || B == D) && CanSimplify) {
// Determine Y and Z in the form icmp (X+Y), (X+Z).
Value *Y, *Z;
@@ -3397,10 +3361,10 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
}
}
- // TODO: This is overly constrained. LHS can be any power-of-2.
- // (1 << X) >u 0x8000 --> false
- // (1 << X) <=u 0x8000 --> true
- if (match(LHS, m_Shl(m_One(), m_Value())) && match(RHS, m_SignMask())) {
+ // If C is a power-of-2:
+ // (C << X) >u 0x8000 --> false
+ // (C << X) <=u 0x8000 --> true
+ if (match(LHS, m_Shl(m_Power2(), m_Value())) && match(RHS, m_SignMask())) {
if (Pred == ICmpInst::ICMP_UGT)
return ConstantInt::getFalse(getCompareTy(RHS));
if (Pred == ICmpInst::ICMP_ULE)
@@ -3414,7 +3378,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
switch (LBO->getOpcode()) {
default:
break;
- case Instruction::Shl:
+ case Instruction::Shl: {
bool NUW = Q.IIQ.hasNoUnsignedWrap(LBO) && Q.IIQ.hasNoUnsignedWrap(RBO);
bool NSW = Q.IIQ.hasNoSignedWrap(LBO) && Q.IIQ.hasNoSignedWrap(RBO);
if (!NUW || (ICmpInst::isSigned(Pred) && !NSW) ||
@@ -3423,6 +3387,38 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(1),
RBO->getOperand(1), Q, MaxRecurse - 1))
return V;
+ break;
+ }
+ // If C1 & C2 == C1, A = X and/or C1, B = X and/or C2:
+ // icmp ule A, B -> true
+ // icmp ugt A, B -> false
+ // icmp sle A, B -> true (C1 and C2 are the same sign)
+ // icmp sgt A, B -> false (C1 and C2 are the same sign)
+ case Instruction::And:
+ case Instruction::Or: {
+ const APInt *C1, *C2;
+ if (ICmpInst::isRelational(Pred) &&
+ match(LBO->getOperand(1), m_APInt(C1)) &&
+ match(RBO->getOperand(1), m_APInt(C2))) {
+ if (!C1->isSubsetOf(*C2)) {
+ std::swap(C1, C2);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ if (C1->isSubsetOf(*C2)) {
+ if (Pred == ICmpInst::ICMP_ULE)
+ return ConstantInt::getTrue(getCompareTy(LHS));
+ if (Pred == ICmpInst::ICMP_UGT)
+ return ConstantInt::getFalse(getCompareTy(LHS));
+ if (C1->isNonNegative() == C2->isNonNegative()) {
+ if (Pred == ICmpInst::ICMP_SLE)
+ return ConstantInt::getTrue(getCompareTy(LHS));
+ if (Pred == ICmpInst::ICMP_SGT)
+ return ConstantInt::getFalse(getCompareTy(LHS));
+ }
+ }
+ }
+ break;
+ }
}
}
@@ -3831,9 +3827,15 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Compute the constant that would happen if we truncated to SrcTy then
// reextended to DstTy.
- Constant *Trunc = ConstantExpr::getTrunc(C, SrcTy);
- Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
- Constant *AnyEq = ConstantExpr::getICmp(ICmpInst::ICMP_EQ, RExt, C);
+ Constant *Trunc =
+ ConstantFoldCastOperand(Instruction::Trunc, C, SrcTy, Q.DL);
+ assert(Trunc && "Constant-fold of ImmConstant should not fail");
+ Constant *RExt =
+ ConstantFoldCastOperand(CastInst::ZExt, Trunc, DstTy, Q.DL);
+ assert(RExt && "Constant-fold of ImmConstant should not fail");
+ Constant *AnyEq =
+ ConstantFoldCompareInstOperands(ICmpInst::ICMP_EQ, RExt, C, Q.DL);
+ assert(AnyEq && "Constant-fold of ImmConstant should not fail");
// If the re-extended constant didn't change any of the elements then
// this is effectively also a case of comparing two zero-extended
@@ -3864,12 +3866,14 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// is non-negative then LHS <s RHS.
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- return ConstantExpr::getICmp(ICmpInst::ICMP_SLT, C,
- Constant::getNullValue(C->getType()));
+ return ConstantFoldCompareInstOperands(
+ ICmpInst::ICMP_SLT, C, Constant::getNullValue(C->getType()),
+ Q.DL);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- return ConstantExpr::getICmp(ICmpInst::ICMP_SGE, C,
- Constant::getNullValue(C->getType()));
+ return ConstantFoldCompareInstOperands(
+ ICmpInst::ICMP_SGE, C, Constant::getNullValue(C->getType()),
+ Q.DL);
}
}
}
@@ -3897,14 +3901,19 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
// too. If not, then try to deduce the result of the comparison.
else if (match(RHS, m_ImmConstant())) {
- Constant *C = dyn_cast<Constant>(RHS);
- assert(C != nullptr);
+ Constant *C = cast<Constant>(RHS);
// Compute the constant that would happen if we truncated to SrcTy then
// reextended to DstTy.
- Constant *Trunc = ConstantExpr::getTrunc(C, SrcTy);
- Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
- Constant *AnyEq = ConstantExpr::getICmp(ICmpInst::ICMP_EQ, RExt, C);
+ Constant *Trunc =
+ ConstantFoldCastOperand(Instruction::Trunc, C, SrcTy, Q.DL);
+ assert(Trunc && "Constant-fold of ImmConstant should not fail");
+ Constant *RExt =
+ ConstantFoldCastOperand(CastInst::SExt, Trunc, DstTy, Q.DL);
+ assert(RExt && "Constant-fold of ImmConstant should not fail");
+ Constant *AnyEq =
+ ConstantFoldCompareInstOperands(ICmpInst::ICMP_EQ, RExt, C, Q.DL);
+ assert(AnyEq && "Constant-fold of ImmConstant should not fail");
// If the re-extended constant didn't change then this is effectively
// also a case of comparing two sign-extended values.
@@ -4047,19 +4056,6 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Pred == FCmpInst::FCMP_TRUE)
return getTrue(RetTy);
- // Fold (un)ordered comparison if we can determine there are no NaNs.
- if (Pred == FCmpInst::FCMP_UNO || Pred == FCmpInst::FCMP_ORD)
- if (FMF.noNaNs() ||
- (isKnownNeverNaN(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT) &&
- isKnownNeverNaN(RHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT)))
- return ConstantInt::get(RetTy, Pred == FCmpInst::FCMP_ORD);
-
- // NaN is unordered; NaN is not ordered.
- assert((FCmpInst::isOrdered(Pred) || FCmpInst::isUnordered(Pred)) &&
- "Comparison must be either ordered or unordered");
- if (match(RHS, m_NaN()))
- return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred));
-
// fcmp pred x, poison and fcmp pred poison, x
// fold to poison
if (isa<PoisonValue>(LHS) || isa<PoisonValue>(RHS))
@@ -4081,80 +4077,88 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(RetTy);
}
- // Handle fcmp with constant RHS.
- // TODO: Use match with a specific FP value, so these work with vectors with
- // undef lanes.
- const APFloat *C;
- if (match(RHS, m_APFloat(C))) {
- // Check whether the constant is an infinity.
- if (C->isInfinity()) {
- if (C->isNegative()) {
- switch (Pred) {
- case FCmpInst::FCMP_OLT:
- // No value is ordered and less than negative infinity.
- return getFalse(RetTy);
- case FCmpInst::FCMP_UGE:
- // All values are unordered with or at least negative infinity.
- return getTrue(RetTy);
- default:
- break;
- }
- } else {
- switch (Pred) {
- case FCmpInst::FCMP_OGT:
- // No value is ordered and greater than infinity.
- return getFalse(RetTy);
- case FCmpInst::FCMP_ULE:
- // All values are unordered with and at most infinity.
- return getTrue(RetTy);
- default:
- break;
- }
- }
+ // Fold (un)ordered comparison if we can determine there are no NaNs.
+ //
+ // This catches the 2 variable input case, constants are handled below as a
+ // class-like compare.
+ if (Pred == FCmpInst::FCMP_ORD || Pred == FCmpInst::FCMP_UNO) {
+ if (FMF.noNaNs() ||
+ (isKnownNeverNaN(RHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT) &&
+ isKnownNeverNaN(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT)))
+ return ConstantInt::get(RetTy, Pred == FCmpInst::FCMP_ORD);
+ }
- // LHS == Inf
- if (Pred == FCmpInst::FCMP_OEQ &&
- isKnownNeverInfinity(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
- return getFalse(RetTy);
- // LHS != Inf
- if (Pred == FCmpInst::FCMP_UNE &&
- isKnownNeverInfinity(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
- return getTrue(RetTy);
- // LHS == Inf || LHS == NaN
- if (Pred == FCmpInst::FCMP_UEQ &&
- isKnownNeverInfOrNaN(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
+ const APFloat *C = nullptr;
+ match(RHS, m_APFloatAllowUndef(C));
+ std::optional<KnownFPClass> FullKnownClassLHS;
+
+ // Lazily compute the possible classes for LHS. Avoid computing it twice if
+ // RHS is a 0.
+ auto computeLHSClass = [=, &FullKnownClassLHS](FPClassTest InterestedFlags =
+ fcAllFlags) {
+ if (FullKnownClassLHS)
+ return *FullKnownClassLHS;
+ return computeKnownFPClass(LHS, FMF, Q.DL, InterestedFlags, 0, Q.TLI, Q.AC,
+ Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo);
+ };
+
+ if (C && Q.CxtI) {
+ // Fold out compares that express a class test.
+ //
+ // FIXME: Should be able to perform folds without context
+ // instruction. Always pass in the context function?
+
+ const Function *ParentF = Q.CxtI->getFunction();
+ auto [ClassVal, ClassTest] = fcmpToClassTest(Pred, *ParentF, LHS, C);
+ if (ClassVal) {
+ FullKnownClassLHS = computeLHSClass();
+ if ((FullKnownClassLHS->KnownFPClasses & ClassTest) == fcNone)
return getFalse(RetTy);
- // LHS != Inf && LHS != NaN
- if (Pred == FCmpInst::FCMP_ONE &&
- isKnownNeverInfOrNaN(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
+ if ((FullKnownClassLHS->KnownFPClasses & ~ClassTest) == fcNone)
return getTrue(RetTy);
}
+ }
+
+ // Handle fcmp with constant RHS.
+ if (C) {
+ // TODO: If we always required a context function, we wouldn't need to
+ // special case nans.
+ if (C->isNaN())
+ return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred));
+
+ // TODO: Need version fcmpToClassTest which returns implied class when the
+ // compare isn't a complete class test. e.g. > 1.0 implies fcPositive, but
+ // isn't implementable as a class call.
if (C->isNegative() && !C->isNegZero()) {
- assert(!C->isNaN() && "Unexpected NaN constant!");
+ FPClassTest Interested = KnownFPClass::OrderedLessThanZeroMask;
+
// TODO: We can catch more cases by using a range check rather than
// relying on CannotBeOrderedLessThanZero.
switch (Pred) {
case FCmpInst::FCMP_UGE:
case FCmpInst::FCMP_UGT:
- case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_UNE: {
+ KnownFPClass KnownClass = computeLHSClass(Interested);
+
// (X >= 0) implies (X > C) when (C < 0)
- if (cannotBeOrderedLessThanZero(LHS, Q.DL, Q.TLI, 0,
- Q.AC, Q.CxtI, Q.DT))
+ if (KnownClass.cannotBeOrderedLessThanZero())
return getTrue(RetTy);
break;
+ }
case FCmpInst::FCMP_OEQ:
case FCmpInst::FCMP_OLE:
- case FCmpInst::FCMP_OLT:
+ case FCmpInst::FCMP_OLT: {
+ KnownFPClass KnownClass = computeLHSClass(Interested);
+
// (X >= 0) implies !(X < C) when (C < 0)
- if (cannotBeOrderedLessThanZero(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI,
- Q.DT))
+ if (KnownClass.cannotBeOrderedLessThanZero())
return getFalse(RetTy);
break;
+ }
default:
break;
}
}
-
// Check comparison of [minnum/maxnum with constant] with other constant.
const APFloat *C2;
if ((match(LHS, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_APFloat(C2))) &&
@@ -4201,13 +4205,17 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // TODO: Could fold this with above if there were a matcher which returned all
+ // classes in a non-splat vector.
if (match(RHS, m_AnyZeroFP())) {
switch (Pred) {
case FCmpInst::FCMP_OGE:
case FCmpInst::FCMP_ULT: {
- FPClassTest Interested = FMF.noNaNs() ? fcNegative : fcNegative | fcNan;
- KnownFPClass Known = computeKnownFPClass(LHS, Q.DL, Interested, 0,
- Q.TLI, Q.AC, Q.CxtI, Q.DT);
+ FPClassTest Interested = KnownFPClass::OrderedLessThanZeroMask;
+ if (!FMF.noNaNs())
+ Interested |= fcNan;
+
+ KnownFPClass Known = computeLHSClass(Interested);
// Positive or zero X >= 0.0 --> true
// Positive or zero X < 0.0 --> false
@@ -4217,12 +4225,16 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
}
case FCmpInst::FCMP_UGE:
- case FCmpInst::FCMP_OLT:
+ case FCmpInst::FCMP_OLT: {
+ FPClassTest Interested = KnownFPClass::OrderedLessThanZeroMask;
+ KnownFPClass Known = computeLHSClass(Interested);
+
// Positive or zero or nan X >= 0.0 --> true
// Positive or zero or nan X < 0.0 --> false
- if (cannotBeOrderedLessThanZero(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
+ if (Known.cannotBeOrderedLessThanZero())
return Pred == FCmpInst::FCMP_UGE ? getTrue(RetTy) : getFalse(RetTy);
break;
+ }
default:
break;
}
@@ -4251,6 +4263,7 @@ Value *llvm::simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const SimplifyQuery &Q,
bool AllowRefinement,
+ SmallVectorImpl<Instruction *> *DropFlags,
unsigned MaxRecurse) {
// Trivial replacement.
if (V == Op)
@@ -4280,12 +4293,16 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return nullptr;
}
+ // Don't fold away llvm.is.constant checks based on assumptions.
+ if (match(I, m_Intrinsic<Intrinsic::is_constant>()))
+ return nullptr;
+
// Replace Op with RepOp in instruction operands.
SmallVector<Value *, 8> NewOps;
bool AnyReplaced = false;
for (Value *InstOp : I->operands()) {
if (Value *NewInstOp = simplifyWithOpReplaced(
- InstOp, Op, RepOp, Q, AllowRefinement, MaxRecurse)) {
+ InstOp, Op, RepOp, Q, AllowRefinement, DropFlags, MaxRecurse)) {
NewOps.push_back(NewInstOp);
AnyReplaced = InstOp != NewInstOp;
} else {
@@ -4312,8 +4329,17 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// x & x -> x, x | x -> x
if ((Opcode == Instruction::And || Opcode == Instruction::Or) &&
- NewOps[0] == NewOps[1])
+ NewOps[0] == NewOps[1]) {
+ // or disjoint x, x results in poison.
+ if (auto *PDI = dyn_cast<PossiblyDisjointInst>(BO)) {
+ if (PDI->isDisjoint()) {
+ if (!DropFlags)
+ return nullptr;
+ DropFlags->push_back(BO);
+ }
+ }
return NewOps[0];
+ }
// x - x -> 0, x ^ x -> 0. This is non-refining, because x is non-poison
// by assumption and this case never wraps, so nowrap flags can be
@@ -4379,16 +4405,30 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// will be done in InstCombine).
// TODO: This may be unsound, because it only catches some forms of
// refinement.
- if (!AllowRefinement && canCreatePoison(cast<Operator>(I)))
- return nullptr;
+ if (!AllowRefinement) {
+ if (canCreatePoison(cast<Operator>(I), !DropFlags)) {
+ // abs cannot create poison if the value is known to never be int_min.
+ if (auto *II = dyn_cast<IntrinsicInst>(I);
+ II && II->getIntrinsicID() == Intrinsic::abs) {
+ if (!ConstOps[0]->isNotMinSignedValue())
+ return nullptr;
+ } else
+ return nullptr;
+ }
+ Constant *Res = ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI);
+ if (DropFlags && Res && I->hasPoisonGeneratingFlagsOrMetadata())
+ DropFlags->push_back(I);
+ return Res;
+ }
return ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI);
}
Value *llvm::simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const SimplifyQuery &Q,
- bool AllowRefinement) {
- return ::simplifyWithOpReplaced(V, Op, RepOp, Q, AllowRefinement,
+ bool AllowRefinement,
+ SmallVectorImpl<Instruction *> *DropFlags) {
+ return ::simplifyWithOpReplaced(V, Op, RepOp, Q, AllowRefinement, DropFlags,
RecursionLimit);
}
@@ -4414,14 +4454,22 @@ static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X,
// (X & Y) == 0 ? X | Y : X --> X | Y
// (X & Y) != 0 ? X | Y : X --> X
if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
- *Y == *C)
+ *Y == *C) {
+ // We can't return the or if it has the disjoint flag.
+ if (TrueWhenUnset && cast<PossiblyDisjointInst>(TrueVal)->isDisjoint())
+ return nullptr;
return TrueWhenUnset ? TrueVal : FalseVal;
+ }
// (X & Y) == 0 ? X : X | Y --> X
// (X & Y) != 0 ? X : X | Y --> X | Y
if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
- *Y == *C)
+ *Y == *C) {
+ // We can't return the or if it has the disjoint flag.
+ if (!TrueWhenUnset && cast<PossiblyDisjointInst>(FalseVal)->isDisjoint())
+ return nullptr;
return TrueWhenUnset ? TrueVal : FalseVal;
+ }
}
return nullptr;
@@ -4521,11 +4569,11 @@ static Value *simplifySelectWithICmpEq(Value *CmpLHS, Value *CmpRHS,
unsigned MaxRecurse) {
if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
/* AllowRefinement */ false,
- MaxRecurse) == TrueVal)
+ /* DropFlags */ nullptr, MaxRecurse) == TrueVal)
return FalseVal;
if (simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q,
/* AllowRefinement */ true,
- MaxRecurse) == FalseVal)
+ /* DropFlags */ nullptr, MaxRecurse) == FalseVal)
return FalseVal;
return nullptr;
@@ -4888,10 +4936,8 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
// Compute the (pointer) type returned by the GEP instruction.
Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Indices);
- Type *GEPTy = PointerType::get(LastType, AS);
- if (VectorType *VT = dyn_cast<VectorType>(Ptr->getType()))
- GEPTy = VectorType::get(GEPTy, VT->getElementCount());
- else {
+ Type *GEPTy = Ptr->getType();
+ if (!GEPTy->isVectorTy()) {
for (Value *Op : Indices) {
// If one of the operands is a vector, the result type is a vector of
// pointers. All vector operands must have the same number of elements.
@@ -4918,15 +4964,11 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
return UndefValue::get(GEPTy);
bool IsScalableVec =
- isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) {
+ SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) {
return isa<ScalableVectorType>(V->getType());
});
if (Indices.size() == 1) {
- // getelementptr P, 0 -> P.
- if (match(Indices[0], m_Zero()) && Ptr->getType() == GEPTy)
- return Ptr;
-
Type *Ty = SrcTy;
if (!IsScalableVec && Ty->isSized()) {
Value *P;
@@ -6034,23 +6076,18 @@ static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset,
if (!IsConstantOffsetFromGlobal(Ptr, PtrSym, PtrOffset, DL))
return nullptr;
- Type *Int8PtrTy = Type::getInt8PtrTy(Ptr->getContext());
Type *Int32Ty = Type::getInt32Ty(Ptr->getContext());
- Type *Int32PtrTy = Int32Ty->getPointerTo();
- Type *Int64Ty = Type::getInt64Ty(Ptr->getContext());
auto *OffsetConstInt = dyn_cast<ConstantInt>(Offset);
if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64)
return nullptr;
- uint64_t OffsetInt = OffsetConstInt->getSExtValue();
- if (OffsetInt % 4 != 0)
+ APInt OffsetInt = OffsetConstInt->getValue().sextOrTrunc(
+ DL.getIndexTypeSizeInBits(Ptr->getType()));
+ if (OffsetInt.srem(4) != 0)
return nullptr;
- Constant *C = ConstantExpr::getGetElementPtr(
- Int32Ty, ConstantExpr::getBitCast(Ptr, Int32PtrTy),
- ConstantInt::get(Int64Ty, OffsetInt / 4));
- Constant *Loaded = ConstantFoldLoadFromConstPtr(C, Int32Ty, DL);
+ Constant *Loaded = ConstantFoldLoadFromConstPtr(Ptr, Int32Ty, OffsetInt, DL);
if (!Loaded)
return nullptr;
@@ -6080,11 +6117,62 @@ static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset,
PtrSym != LoadedRHSSym || PtrOffset != LoadedRHSOffset)
return nullptr;
- return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy);
+ return LoadedLHSPtr;
+}
+
+// TODO: Need to pass in FastMathFlags
+static Value *simplifyLdexp(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+ bool IsStrict) {
+ // ldexp(poison, x) -> poison
+ // ldexp(x, poison) -> poison
+ if (isa<PoisonValue>(Op0) || isa<PoisonValue>(Op1))
+ return Op0;
+
+ // ldexp(undef, x) -> nan
+ if (Q.isUndefValue(Op0))
+ return ConstantFP::getNaN(Op0->getType());
+
+ if (!IsStrict) {
+ // TODO: Could insert a canonicalize for strict
+
+ // ldexp(x, undef) -> x
+ if (Q.isUndefValue(Op1))
+ return Op0;
+ }
+
+ const APFloat *C = nullptr;
+ match(Op0, PatternMatch::m_APFloat(C));
+
+ // These cases should be safe, even with strictfp.
+ // ldexp(0.0, x) -> 0.0
+ // ldexp(-0.0, x) -> -0.0
+ // ldexp(inf, x) -> inf
+ // ldexp(-inf, x) -> -inf
+ if (C && (C->isZero() || C->isInfinity()))
+ return Op0;
+
+ // These are canonicalization dropping, could do it if we knew how we could
+ // ignore denormal flushes and target handling of nan payload bits.
+ if (IsStrict)
+ return nullptr;
+
+ // TODO: Could quiet this with strictfp if the exception mode isn't strict.
+ if (C && C->isNaN())
+ return ConstantFP::get(Op0->getType(), C->makeQuiet());
+
+ // ldexp(x, 0) -> x
+
+ // TODO: Could fold this if we know the exception mode isn't
+ // strict, we know the denormal mode and other target modes.
+ if (match(Op1, PatternMatch::m_ZeroInt()))
+ return Op0;
+
+ return nullptr;
}
static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
- const SimplifyQuery &Q) {
+ const SimplifyQuery &Q,
+ const CallBase *Call) {
// Idempotent functions return the same result when called repeatedly.
Intrinsic::ID IID = F->getIntrinsicID();
if (isIdempotent(IID))
@@ -6129,31 +6217,37 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
// ctpop(and X, 1) --> and X, 1
unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
if (MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, BitWidth - 1),
- Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ Q))
return Op0;
break;
}
case Intrinsic::exp:
// exp(log(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
+ if (Call->hasAllowReassoc() &&
match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X))))
return X;
break;
case Intrinsic::exp2:
// exp2(log2(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
+ if (Call->hasAllowReassoc() &&
match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X))))
return X;
break;
+ case Intrinsic::exp10:
+ // exp10(log10(x)) -> x
+ if (Call->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::log10>(m_Value(X))))
+ return X;
+ break;
case Intrinsic::log:
// log(exp(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
+ if (Call->hasAllowReassoc() &&
match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))))
return X;
break;
case Intrinsic::log2:
// log2(exp2(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
+ if (Call->hasAllowReassoc() &&
(match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) ||
match(Op0,
m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0), m_Value(X)))))
@@ -6161,8 +6255,11 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
break;
case Intrinsic::log10:
// log10(pow(10.0, x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
- match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0), m_Value(X))))
+ // log10(exp10(x)) -> x
+ if (Call->hasAllowReassoc() &&
+ (match(Op0, m_Intrinsic<Intrinsic::exp10>(m_Value(X))) ||
+ match(Op0,
+ m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0), m_Value(X)))))
return X;
break;
case Intrinsic::experimental_vector_reverse:
@@ -6260,7 +6357,8 @@ static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0,
}
static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
- const SimplifyQuery &Q) {
+ const SimplifyQuery &Q,
+ const CallBase *Call) {
Intrinsic::ID IID = F->getIntrinsicID();
Type *ReturnType = F->getReturnType();
unsigned BitWidth = ReturnType->getScalarSizeInBits();
@@ -6287,6 +6385,44 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
return Constant::getNullValue(ReturnType);
break;
}
+ case Intrinsic::ptrmask: {
+ if (isa<PoisonValue>(Op0) || isa<PoisonValue>(Op1))
+ return PoisonValue::get(Op0->getType());
+
+ // NOTE: We can't apply this simplifications based on the value of Op1
+ // because we need to preserve provenance.
+ if (Q.isUndefValue(Op0) || match(Op0, m_Zero()))
+ return Constant::getNullValue(Op0->getType());
+
+ assert(Op1->getType()->getScalarSizeInBits() ==
+ Q.DL.getIndexTypeSizeInBits(Op0->getType()) &&
+ "Invalid mask width");
+ // If index-width (mask size) is less than pointer-size then mask is
+ // 1-extended.
+ if (match(Op1, m_PtrToInt(m_Specific(Op0))))
+ return Op0;
+
+ // NOTE: We may have attributes associated with the return value of the
+ // llvm.ptrmask intrinsic that will be lost when we just return the
+ // operand. We should try to preserve them.
+ if (match(Op1, m_AllOnes()) || Q.isUndefValue(Op1))
+ return Op0;
+
+ Constant *C;
+ if (match(Op1, m_ImmConstant(C))) {
+ KnownBits PtrKnown = computeKnownBits(Op0, /*Depth=*/0, Q);
+ // See if we only masking off bits we know are already zero due to
+ // alignment.
+ APInt IrrelevantPtrBits =
+ PtrKnown.Zero.zextOrTrunc(C->getType()->getScalarSizeInBits());
+ C = ConstantFoldBinaryOpOperands(
+ Instruction::Or, C, ConstantInt::get(C->getType(), IrrelevantPtrBits),
+ Q.DL);
+ if (C != nullptr && C->isAllOnesValue())
+ return Op0;
+ }
+ break;
+ }
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
@@ -6426,6 +6562,8 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
return Op0;
}
break;
+ case Intrinsic::ldexp:
+ return simplifyLdexp(Op0, Op1, Q, false);
case Intrinsic::copysign:
// copysign X, X --> X
if (Op0 == Op1)
@@ -6480,19 +6618,19 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
// float, if the ninf flag is set.
const APFloat *C;
if (match(Op1, m_APFloat(C)) &&
- (C->isInfinity() || (Q.CxtI->hasNoInfs() && C->isLargest()))) {
+ (C->isInfinity() || (Call->hasNoInfs() && C->isLargest()))) {
// minnum(X, -inf) -> -inf
// maxnum(X, +inf) -> +inf
// minimum(X, -inf) -> -inf if nnan
// maximum(X, +inf) -> +inf if nnan
- if (C->isNegative() == IsMin && (!PropagateNaN || Q.CxtI->hasNoNaNs()))
+ if (C->isNegative() == IsMin && (!PropagateNaN || Call->hasNoNaNs()))
return ConstantFP::get(ReturnType, *C);
// minnum(X, +inf) -> X if nnan
// maxnum(X, -inf) -> X if nnan
// minimum(X, +inf) -> X
// maximum(X, -inf) -> X
- if (C->isNegative() != IsMin && (PropagateNaN || Q.CxtI->hasNoNaNs()))
+ if (C->isNegative() != IsMin && (PropagateNaN || Call->hasNoNaNs()))
return Op0;
}
@@ -6539,13 +6677,10 @@ static Value *simplifyIntrinsic(CallBase *Call, Value *Callee,
if (!NumOperands) {
switch (IID) {
case Intrinsic::vscale: {
- auto Attr = Call->getFunction()->getFnAttribute(Attribute::VScaleRange);
- if (!Attr.isValid())
- return nullptr;
- unsigned VScaleMin = Attr.getVScaleRangeMin();
- std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax();
- if (VScaleMax && VScaleMin == VScaleMax)
- return ConstantInt::get(F->getReturnType(), VScaleMin);
+ Type *RetTy = F->getReturnType();
+ ConstantRange CR = getVScaleRange(Call->getFunction(), 64);
+ if (const APInt *C = CR.getSingleElement())
+ return ConstantInt::get(RetTy, C->getZExtValue());
return nullptr;
}
default:
@@ -6554,10 +6689,10 @@ static Value *simplifyIntrinsic(CallBase *Call, Value *Callee,
}
if (NumOperands == 1)
- return simplifyUnaryIntrinsic(F, Args[0], Q);
+ return simplifyUnaryIntrinsic(F, Args[0], Q, Call);
if (NumOperands == 2)
- return simplifyBinaryIntrinsic(F, Args[0], Args[1], Q);
+ return simplifyBinaryIntrinsic(F, Args[0], Args[1], Q, Call);
// Handle intrinsics with 3 or more arguments.
switch (IID) {
@@ -6692,6 +6827,8 @@ static Value *simplifyIntrinsic(CallBase *Call, Value *Callee,
*FPI->getExceptionBehavior(),
*FPI->getRoundingMode());
}
+ case Intrinsic::experimental_constrained_ldexp:
+ return simplifyLdexp(Args[0], Args[1], Q, true);
default:
return nullptr;
}
@@ -6811,6 +6948,9 @@ static Value *simplifyInstructionWithOperands(Instruction *I,
const SimplifyQuery &SQ,
unsigned MaxRecurse) {
assert(I->getFunction() && "instruction should be inserted in a function");
+ assert((!SQ.CxtI || SQ.CxtI->getFunction() == I->getFunction()) &&
+ "context instruction should be in the same function");
+
const SimplifyQuery Q = SQ.CxtI ? SQ : SQ.getWithInstruction(I);
switch (I->getOpcode()) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
index b66c2378b72a..910f6b72afef 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
@@ -336,11 +337,10 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc,
}
}
-
+namespace llvm {
namespace {
/// An assembly annotator class to print LazyValueCache information in
/// comments.
-class LazyValueInfoImpl;
class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter {
LazyValueInfoImpl *LVIImpl;
// While analyzing which blocks we can solve values for, we need the dominator
@@ -357,8 +357,7 @@ public:
void emitInstructionAnnot(const Instruction *I,
formatted_raw_ostream &OS) override;
};
-}
-namespace {
+} // namespace
// The actual implementation of the lazy analysis and update. Note that the
// inheritance from LazyValueInfoCache is intended to be temporary while
// splitting the code and then transitioning to a has-a relationship.
@@ -454,6 +453,8 @@ public:
BasicBlock *ToBB,
Instruction *CxtI = nullptr);
+ ValueLatticeElement getValueAtUse(const Use &U);
+
/// Complete flush all previously computed values
void clear() {
TheCache.clear();
@@ -483,8 +484,7 @@ public:
Function *GuardDecl)
: AC(AC), DL(DL), GuardDecl(GuardDecl) {}
};
-} // end anonymous namespace
-
+} // namespace llvm
void LazyValueInfoImpl::solve() {
SmallVector<std::pair<BasicBlock *, Value *>, 8> StartingStack(
@@ -756,7 +756,8 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) {
}
static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond,
- bool isTrueDest = true);
+ bool isTrueDest = true,
+ unsigned Depth = 0);
// If we can determine a constraint on the value given conditions assumed by
// the program, intersect those constraints with BBLV
@@ -802,11 +803,15 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
}
}
-static ConstantRange getConstantRangeOrFull(const ValueLatticeElement &Val,
- Type *Ty, const DataLayout &DL) {
- if (Val.isConstantRange(/*UndefAllowed*/ false))
+static ConstantRange toConstantRange(const ValueLatticeElement &Val,
+ Type *Ty, bool UndefAllowed = false) {
+ assert(Ty->isIntOrIntVectorTy() && "Must be integer type");
+ if (Val.isConstantRange(UndefAllowed))
return Val.getConstantRange();
- return ConstantRange::getFull(DL.getTypeSizeInBits(Ty));
+ unsigned BW = Ty->getScalarSizeInBits();
+ if (Val.isUnknown())
+ return ConstantRange::getEmpty(BW);
+ return ConstantRange::getFull(BW);
}
std::optional<ValueLatticeElement>
@@ -825,10 +830,8 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) {
ValueLatticeElement &FalseVal = *OptFalseVal;
if (TrueVal.isConstantRange() || FalseVal.isConstantRange()) {
- const ConstantRange &TrueCR =
- getConstantRangeOrFull(TrueVal, SI->getType(), DL);
- const ConstantRange &FalseCR =
- getConstantRangeOrFull(FalseVal, SI->getType(), DL);
+ const ConstantRange &TrueCR = toConstantRange(TrueVal, SI->getType());
+ const ConstantRange &FalseCR = toConstantRange(FalseVal, SI->getType());
Value *LHS = nullptr;
Value *RHS = nullptr;
SelectPatternResult SPR = matchSelectPattern(SI, LHS, RHS);
@@ -882,7 +885,7 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) {
Value *Cond = SI->getCondition();
// If the value is undef, a different value may be chosen in
// the select condition.
- if (isGuaranteedNotToBeUndefOrPoison(Cond, AC)) {
+ if (isGuaranteedNotToBeUndef(Cond, AC)) {
TrueVal = intersect(TrueVal,
getValueFromCondition(SI->getTrueValue(), Cond, true));
FalseVal = intersect(
@@ -899,16 +902,11 @@ LazyValueInfoImpl::getRangeFor(Value *V, Instruction *CxtI, BasicBlock *BB) {
std::optional<ValueLatticeElement> OptVal = getBlockValue(V, BB, CxtI);
if (!OptVal)
return std::nullopt;
- return getConstantRangeOrFull(*OptVal, V->getType(), DL);
+ return toConstantRange(*OptVal, V->getType());
}
std::optional<ValueLatticeElement>
LazyValueInfoImpl::solveBlockValueCast(CastInst *CI, BasicBlock *BB) {
- // Without knowing how wide the input is, we can't analyze it in any useful
- // way.
- if (!CI->getOperand(0)->getType()->isSized())
- return ValueLatticeElement::getOverdefined();
-
// Filter out casts we don't know how to reason about before attempting to
// recurse on our operand. This can cut a long search short if we know we're
// not going to be able to get any useful information anways.
@@ -916,7 +914,6 @@ LazyValueInfoImpl::solveBlockValueCast(CastInst *CI, BasicBlock *BB) {
case Instruction::Trunc:
case Instruction::SExt:
case Instruction::ZExt:
- case Instruction::BitCast:
break;
default:
// Unhandled instructions are overdefined.
@@ -1086,6 +1083,26 @@ static ValueLatticeElement getValueFromSimpleICmpCondition(
return ValueLatticeElement::getRange(TrueValues.subtract(Offset));
}
+static std::optional<ConstantRange>
+getRangeViaSLT(CmpInst::Predicate Pred, APInt RHS,
+ function_ref<std::optional<ConstantRange>(const APInt &)> Fn) {
+ bool Invert = false;
+ if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
+ Pred = ICmpInst::getInversePredicate(Pred);
+ Invert = true;
+ }
+ if (Pred == ICmpInst::ICMP_SLE) {
+ Pred = ICmpInst::ICMP_SLT;
+ if (RHS.isMaxSignedValue())
+ return std::nullopt; // Could also return full/empty here, if we wanted.
+ ++RHS;
+ }
+ assert(Pred == ICmpInst::ICMP_SLT && "Must be signed predicate");
+ if (auto CR = Fn(RHS))
+ return Invert ? CR->inverse() : CR;
+ return std::nullopt;
+}
+
static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
bool isTrueDest) {
Value *LHS = ICI->getOperand(0);
@@ -1151,6 +1168,25 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
CR.getUnsignedMin().zext(BitWidth), APInt(BitWidth, 0)));
}
+ // Recognize:
+ // icmp slt (ashr X, ShAmtC), C --> icmp slt X, C << ShAmtC
+ // Preconditions: (C << ShAmtC) >> ShAmtC == C
+ const APInt *ShAmtC;
+ if (CmpInst::isSigned(EdgePred) &&
+ match(LHS, m_AShr(m_Specific(Val), m_APInt(ShAmtC))) &&
+ match(RHS, m_APInt(C))) {
+ auto CR = getRangeViaSLT(
+ EdgePred, *C, [&](const APInt &RHS) -> std::optional<ConstantRange> {
+ APInt New = RHS << *ShAmtC;
+ if ((New.ashr(*ShAmtC)) != RHS)
+ return std::nullopt;
+ return ConstantRange::getNonEmpty(
+ APInt::getSignedMinValue(New.getBitWidth()), New);
+ });
+ if (CR)
+ return ValueLatticeElement::getRange(*CR);
+ }
+
return ValueLatticeElement::getOverdefined();
}
@@ -1176,36 +1212,22 @@ static ValueLatticeElement getValueFromOverflowCondition(
return ValueLatticeElement::getRange(NWR);
}
-// Tracks a Value * condition and whether we're interested in it or its inverse
-typedef PointerIntPair<Value *, 1, bool> CondValue;
-
-static std::optional<ValueLatticeElement> getValueFromConditionImpl(
- Value *Val, CondValue CondVal, bool isRevisit,
- SmallDenseMap<CondValue, ValueLatticeElement> &Visited,
- SmallVectorImpl<CondValue> &Worklist) {
+static ValueLatticeElement getValueFromCondition(
+ Value *Val, Value *Cond, bool IsTrueDest, unsigned Depth) {
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
+ return getValueFromICmpCondition(Val, ICI, IsTrueDest);
- Value *Cond = CondVal.getPointer();
- bool isTrueDest = CondVal.getInt();
- if (!isRevisit) {
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
- return getValueFromICmpCondition(Val, ICI, isTrueDest);
+ if (auto *EVI = dyn_cast<ExtractValueInst>(Cond))
+ if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+ if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 1)
+ return getValueFromOverflowCondition(Val, WO, IsTrueDest);
- if (auto *EVI = dyn_cast<ExtractValueInst>(Cond))
- if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
- if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 1)
- return getValueFromOverflowCondition(Val, WO, isTrueDest);
- }
+ if (++Depth == MaxAnalysisRecursionDepth)
+ return ValueLatticeElement::getOverdefined();
Value *N;
- if (match(Cond, m_Not(m_Value(N)))) {
- CondValue NKey(N, !isTrueDest);
- auto NV = Visited.find(NKey);
- if (NV == Visited.end()) {
- Worklist.push_back(NKey);
- return std::nullopt;
- }
- return NV->second;
- }
+ if (match(Cond, m_Not(m_Value(N))))
+ return getValueFromCondition(Val, N, !IsTrueDest, Depth);
Value *L, *R;
bool IsAnd;
@@ -1216,64 +1238,19 @@ static std::optional<ValueLatticeElement> getValueFromConditionImpl(
else
return ValueLatticeElement::getOverdefined();
- auto LV = Visited.find(CondValue(L, isTrueDest));
- auto RV = Visited.find(CondValue(R, isTrueDest));
+ ValueLatticeElement LV = getValueFromCondition(Val, L, IsTrueDest, Depth);
+ ValueLatticeElement RV = getValueFromCondition(Val, R, IsTrueDest, Depth);
// if (L && R) -> intersect L and R
// if (!(L || R)) -> intersect !L and !R
// if (L || R) -> union L and R
// if (!(L && R)) -> union !L and !R
- if ((isTrueDest ^ IsAnd) && (LV != Visited.end())) {
- ValueLatticeElement V = LV->second;
- if (V.isOverdefined())
- return V;
- if (RV != Visited.end()) {
- V.mergeIn(RV->second);
- return V;
- }
- }
-
- if (LV == Visited.end() || RV == Visited.end()) {
- assert(!isRevisit);
- if (LV == Visited.end())
- Worklist.push_back(CondValue(L, isTrueDest));
- if (RV == Visited.end())
- Worklist.push_back(CondValue(R, isTrueDest));
- return std::nullopt;
+ if (IsTrueDest ^ IsAnd) {
+ LV.mergeIn(RV);
+ return LV;
}
- return intersect(LV->second, RV->second);
-}
-
-ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond,
- bool isTrueDest) {
- assert(Cond && "precondition");
- SmallDenseMap<CondValue, ValueLatticeElement> Visited;
- SmallVector<CondValue> Worklist;
-
- CondValue CondKey(Cond, isTrueDest);
- Worklist.push_back(CondKey);
- do {
- CondValue CurrentCond = Worklist.back();
- // Insert an Overdefined placeholder into the set to prevent
- // infinite recursion if there exists IRs that use not
- // dominated by its def as in this example:
- // "%tmp3 = or i1 undef, %tmp4"
- // "%tmp4 = or i1 undef, %tmp3"
- auto Iter =
- Visited.try_emplace(CurrentCond, ValueLatticeElement::getOverdefined());
- bool isRevisit = !Iter.second;
- std::optional<ValueLatticeElement> Result = getValueFromConditionImpl(
- Val, CurrentCond, isRevisit, Visited, Worklist);
- if (Result) {
- Visited[CurrentCond] = *Result;
- Worklist.pop_back();
- }
- } while (!Worklist.empty());
-
- auto Result = Visited.find(CondKey);
- assert(Result != Visited.end());
- return Result->second;
+ return intersect(LV, RV);
}
// Return true if Usr has Op as an operand, otherwise false.
@@ -1324,12 +1301,9 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op,
return ValueLatticeElement::getOverdefined();
}
-/// Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
-/// Val is not constrained on the edge. Result is unspecified if return value
-/// is false.
-static std::optional<ValueLatticeElement> getEdgeValueLocal(Value *Val,
- BasicBlock *BBFrom,
- BasicBlock *BBTo) {
+/// Compute the value of Val on the edge BBFrom -> BBTo.
+static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
+ BasicBlock *BBTo) {
// TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
// know that v != 0.
if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
@@ -1403,7 +1377,7 @@ static std::optional<ValueLatticeElement> getEdgeValueLocal(Value *Val,
if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
Value *Condition = SI->getCondition();
if (!isa<IntegerType>(Val->getType()))
- return std::nullopt;
+ return ValueLatticeElement::getOverdefined();
bool ValUsesConditionAndMayBeFoldable = false;
if (Condition != Val) {
// Check if Val has Condition as an operand.
@@ -1411,7 +1385,7 @@ static std::optional<ValueLatticeElement> getEdgeValueLocal(Value *Val,
ValUsesConditionAndMayBeFoldable = isOperationFoldable(Usr) &&
usesOperand(Usr, Condition);
if (!ValUsesConditionAndMayBeFoldable)
- return std::nullopt;
+ return ValueLatticeElement::getOverdefined();
}
assert((Condition == Val || ValUsesConditionAndMayBeFoldable) &&
"Condition != Val nor Val doesn't use Condition");
@@ -1429,7 +1403,7 @@ static std::optional<ValueLatticeElement> getEdgeValueLocal(Value *Val,
ValueLatticeElement EdgeLatticeVal =
constantFoldUser(Usr, Condition, CaseValue, DL);
if (EdgeLatticeVal.isOverdefined())
- return std::nullopt;
+ return ValueLatticeElement::getOverdefined();
EdgeVal = EdgeLatticeVal.getConstantRange();
}
if (DefaultCase) {
@@ -1446,7 +1420,7 @@ static std::optional<ValueLatticeElement> getEdgeValueLocal(Value *Val,
}
return ValueLatticeElement::getRange(std::move(EdgesVals));
}
- return std::nullopt;
+ return ValueLatticeElement::getOverdefined();
}
/// Compute the value of Val on the edge BBFrom -> BBTo or the value at
@@ -1458,9 +1432,7 @@ LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
if (Constant *VC = dyn_cast<Constant>(Val))
return ValueLatticeElement::get(VC);
- ValueLatticeElement LocalResult =
- getEdgeValueLocal(Val, BBFrom, BBTo)
- .value_or(ValueLatticeElement::getOverdefined());
+ ValueLatticeElement LocalResult = getEdgeValueLocal(Val, BBFrom, BBTo);
if (hasSingleValue(LocalResult))
// Can't get any more precise here
return LocalResult;
@@ -1537,6 +1509,52 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
return *Result;
}
+ValueLatticeElement LazyValueInfoImpl::getValueAtUse(const Use &U) {
+ Value *V = U.get();
+ auto *CxtI = cast<Instruction>(U.getUser());
+ ValueLatticeElement VL = getValueInBlock(V, CxtI->getParent(), CxtI);
+
+ // Check whether the only (possibly transitive) use of the value is in a
+ // position where V can be constrained by a select or branch condition.
+ const Use *CurrU = &U;
+ // TODO: Increase limit?
+ const unsigned MaxUsesToInspect = 3;
+ for (unsigned I = 0; I < MaxUsesToInspect; ++I) {
+ std::optional<ValueLatticeElement> CondVal;
+ auto *CurrI = cast<Instruction>(CurrU->getUser());
+ if (auto *SI = dyn_cast<SelectInst>(CurrI)) {
+ // If the value is undef, a different value may be chosen in
+ // the select condition and at use.
+ if (!isGuaranteedNotToBeUndef(SI->getCondition(), AC))
+ break;
+ if (CurrU->getOperandNo() == 1)
+ CondVal = getValueFromCondition(V, SI->getCondition(), true);
+ else if (CurrU->getOperandNo() == 2)
+ CondVal = getValueFromCondition(V, SI->getCondition(), false);
+ } else if (auto *PHI = dyn_cast<PHINode>(CurrI)) {
+ // TODO: Use non-local query?
+ CondVal =
+ getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU), PHI->getParent());
+ }
+ if (CondVal)
+ VL = intersect(VL, *CondVal);
+
+ // Only follow one-use chain, to allow direct intersection of conditions.
+ // If there are multiple uses, we would have to intersect with the union of
+ // all conditions at different uses.
+ // Stop walking if we hit a non-speculatable instruction. Even if the
+ // result is only used under a specific condition, executing the
+ // instruction itself may cause side effects or UB already.
+ // This also disallows looking through phi nodes: If the phi node is part
+ // of a cycle, we might end up reasoning about values from different cycle
+ // iterations (PR60629).
+ if (!CurrI->hasOneUse() || !isSafeToSpeculativelyExecute(CurrI))
+ break;
+ CurrU = &*CurrI->use_begin();
+ }
+ return VL;
+}
+
void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
TheCache.threadEdgeImpl(OldSucc, NewSucc);
@@ -1546,25 +1564,12 @@ void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
// LazyValueInfo Impl
//===----------------------------------------------------------------------===//
-/// This lazily constructs the LazyValueInfoImpl.
-static LazyValueInfoImpl &getImpl(void *&PImpl, AssumptionCache *AC,
- const Module *M) {
- if (!PImpl) {
- assert(M && "getCache() called with a null Module");
- const DataLayout &DL = M->getDataLayout();
- Function *GuardDecl = M->getFunction(
- Intrinsic::getName(Intrinsic::experimental_guard));
- PImpl = new LazyValueInfoImpl(AC, DL, GuardDecl);
- }
- return *static_cast<LazyValueInfoImpl*>(PImpl);
-}
-
bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
Info.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- if (Info.PImpl)
- getImpl(Info.PImpl, Info.AC, F.getParent()).clear();
+ if (auto *Impl = Info.getImpl())
+ Impl->clear();
// Fully lazy.
return false;
@@ -1578,12 +1583,30 @@ void LazyValueInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
LazyValueInfo &LazyValueInfoWrapperPass::getLVI() { return Info; }
+/// This lazily constructs the LazyValueInfoImpl.
+LazyValueInfoImpl &LazyValueInfo::getOrCreateImpl(const Module *M) {
+ if (!PImpl) {
+ assert(M && "getCache() called with a null Module");
+ const DataLayout &DL = M->getDataLayout();
+ Function *GuardDecl =
+ M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
+ PImpl = new LazyValueInfoImpl(AC, DL, GuardDecl);
+ }
+ return *static_cast<LazyValueInfoImpl *>(PImpl);
+}
+
+LazyValueInfoImpl *LazyValueInfo::getImpl() {
+ if (!PImpl)
+ return nullptr;
+ return static_cast<LazyValueInfoImpl *>(PImpl);
+}
+
LazyValueInfo::~LazyValueInfo() { releaseMemory(); }
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
- if (PImpl) {
- delete &getImpl(PImpl, AC, nullptr);
+ if (auto *Impl = getImpl()) {
+ delete &*Impl;
PImpl = nullptr;
}
}
@@ -1630,7 +1653,7 @@ Constant *LazyValueInfo::getConstant(Value *V, Instruction *CxtI) {
BasicBlock *BB = CxtI->getParent();
ValueLatticeElement Result =
- getImpl(PImpl, AC, BB->getModule()).getValueInBlock(V, BB, CxtI);
+ getOrCreateImpl(BB->getModule()).getValueInBlock(V, BB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1645,66 +1668,18 @@ Constant *LazyValueInfo::getConstant(Value *V, Instruction *CxtI) {
ConstantRange LazyValueInfo::getConstantRange(Value *V, Instruction *CxtI,
bool UndefAllowed) {
assert(V->getType()->isIntegerTy());
- unsigned Width = V->getType()->getIntegerBitWidth();
BasicBlock *BB = CxtI->getParent();
ValueLatticeElement Result =
- getImpl(PImpl, AC, BB->getModule()).getValueInBlock(V, BB, CxtI);
- if (Result.isUnknown())
- return ConstantRange::getEmpty(Width);
- if (Result.isConstantRange(UndefAllowed))
- return Result.getConstantRange(UndefAllowed);
- // We represent ConstantInt constants as constant ranges but other kinds
- // of integer constants, i.e. ConstantExpr will be tagged as constants
- assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
- "ConstantInt value must be represented as constantrange");
- return ConstantRange::getFull(Width);
+ getOrCreateImpl(BB->getModule()).getValueInBlock(V, BB, CxtI);
+ return toConstantRange(Result, V->getType(), UndefAllowed);
}
ConstantRange LazyValueInfo::getConstantRangeAtUse(const Use &U,
bool UndefAllowed) {
- Value *V = U.get();
- ConstantRange CR =
- getConstantRange(V, cast<Instruction>(U.getUser()), UndefAllowed);
-
- // Check whether the only (possibly transitive) use of the value is in a
- // position where V can be constrained by a select or branch condition.
- const Use *CurrU = &U;
- // TODO: Increase limit?
- const unsigned MaxUsesToInspect = 3;
- for (unsigned I = 0; I < MaxUsesToInspect; ++I) {
- std::optional<ValueLatticeElement> CondVal;
- auto *CurrI = cast<Instruction>(CurrU->getUser());
- if (auto *SI = dyn_cast<SelectInst>(CurrI)) {
- // If the value is undef, a different value may be chosen in
- // the select condition and at use.
- if (!isGuaranteedNotToBeUndefOrPoison(SI->getCondition(), AC))
- break;
- if (CurrU->getOperandNo() == 1)
- CondVal = getValueFromCondition(V, SI->getCondition(), true);
- else if (CurrU->getOperandNo() == 2)
- CondVal = getValueFromCondition(V, SI->getCondition(), false);
- } else if (auto *PHI = dyn_cast<PHINode>(CurrI)) {
- // TODO: Use non-local query?
- CondVal =
- getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU), PHI->getParent());
- }
- if (CondVal && CondVal->isConstantRange())
- CR = CR.intersectWith(CondVal->getConstantRange());
-
- // Only follow one-use chain, to allow direct intersection of conditions.
- // If there are multiple uses, we would have to intersect with the union of
- // all conditions at different uses.
- // Stop walking if we hit a non-speculatable instruction. Even if the
- // result is only used under a specific condition, executing the
- // instruction itself may cause side effects or UB already.
- // This also disallows looking through phi nodes: If the phi node is part
- // of a cycle, we might end up reasoning about values from different cycle
- // iterations (PR60629).
- if (!CurrI->hasOneUse() || !isSafeToSpeculativelyExecute(CurrI))
- break;
- CurrU = &*CurrI->use_begin();
- }
- return CR;
+ auto *Inst = cast<Instruction>(U.getUser());
+ ValueLatticeElement Result =
+ getOrCreateImpl(Inst->getModule()).getValueAtUse(U);
+ return toConstantRange(Result, U->getType(), UndefAllowed);
}
/// Determine whether the specified value is known to be a
@@ -1714,7 +1689,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
Instruction *CxtI) {
Module *M = FromBB->getModule();
ValueLatticeElement Result =
- getImpl(PImpl, AC, M).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ getOrCreateImpl(M).getValueOnEdge(V, FromBB, ToBB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1730,20 +1705,11 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V,
BasicBlock *FromBB,
BasicBlock *ToBB,
Instruction *CxtI) {
- unsigned Width = V->getType()->getIntegerBitWidth();
Module *M = FromBB->getModule();
ValueLatticeElement Result =
- getImpl(PImpl, AC, M).getValueOnEdge(V, FromBB, ToBB, CxtI);
-
- if (Result.isUnknown())
- return ConstantRange::getEmpty(Width);
- if (Result.isConstantRange())
- return Result.getConstantRange();
- // We represent ConstantInt constants as constant ranges but other kinds
- // of integer constants, i.e. ConstantExpr will be tagged as constants
- assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
- "ConstantInt value must be represented as constantrange");
- return ConstantRange::getFull(Width);
+ getOrCreateImpl(M).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ // TODO: Should undef be allowed here?
+ return toConstantRange(Result, V->getType(), /*UndefAllowed*/ true);
}
static LazyValueInfo::Tristate
@@ -1819,7 +1785,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
Instruction *CxtI) {
Module *M = FromBB->getModule();
ValueLatticeElement Result =
- getImpl(PImpl, AC, M).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ getOrCreateImpl(M).getValueOnEdge(V, FromBB, ToBB, CxtI);
return getPredicateResult(Pred, C, Result, M->getDataLayout(), TLI);
}
@@ -1841,9 +1807,10 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
return LazyValueInfo::True;
}
- ValueLatticeElement Result = UseBlockValue
- ? getImpl(PImpl, AC, M).getValueInBlock(V, CxtI->getParent(), CxtI)
- : getImpl(PImpl, AC, M).getValueAt(V, CxtI);
+ auto &Impl = getOrCreateImpl(M);
+ ValueLatticeElement Result =
+ UseBlockValue ? Impl.getValueInBlock(V, CxtI->getParent(), CxtI)
+ : Impl.getValueAt(V, CxtI);
Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI);
if (Ret != Unknown)
return Ret;
@@ -1947,12 +1914,12 @@ LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned P, Value *LHS,
if (UseBlockValue) {
Module *M = CxtI->getModule();
ValueLatticeElement L =
- getImpl(PImpl, AC, M).getValueInBlock(LHS, CxtI->getParent(), CxtI);
+ getOrCreateImpl(M).getValueInBlock(LHS, CxtI->getParent(), CxtI);
if (L.isOverdefined())
return LazyValueInfo::Unknown;
ValueLatticeElement R =
- getImpl(PImpl, AC, M).getValueInBlock(RHS, CxtI->getParent(), CxtI);
+ getOrCreateImpl(M).getValueInBlock(RHS, CxtI->getParent(), CxtI);
Type *Ty = CmpInst::makeCmpResultType(LHS->getType());
if (Constant *Res = L.getCompare((CmpInst::Predicate)P, Ty, R,
M->getDataLayout())) {
@@ -1967,33 +1934,28 @@ LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned P, Value *LHS,
void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
- if (PImpl) {
- getImpl(PImpl, AC, PredBB->getModule())
- .threadEdge(PredBB, OldSucc, NewSucc);
- }
+ if (auto *Impl = getImpl())
+ Impl->threadEdge(PredBB, OldSucc, NewSucc);
}
void LazyValueInfo::forgetValue(Value *V) {
- if (PImpl)
- getImpl(PImpl, AC, nullptr).forgetValue(V);
+ if (auto *Impl = getImpl())
+ Impl->forgetValue(V);
}
void LazyValueInfo::eraseBlock(BasicBlock *BB) {
- if (PImpl) {
- getImpl(PImpl, AC, BB->getModule()).eraseBlock(BB);
- }
+ if (auto *Impl = getImpl())
+ Impl->eraseBlock(BB);
}
-void LazyValueInfo::clear(const Module *M) {
- if (PImpl) {
- getImpl(PImpl, AC, M).clear();
- }
+void LazyValueInfo::clear() {
+ if (auto *Impl = getImpl())
+ Impl->clear();
}
void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) {
- if (PImpl) {
- getImpl(PImpl, AC, F.getParent()).printLVI(F, DTree, OS);
- }
+ if (auto *Impl = getImpl())
+ Impl->printLVI(F, DTree, OS);
}
// Print the LVI for the function arguments at the start of each basic block.
@@ -2049,36 +2011,11 @@ void LazyValueInfoAnnotatedWriter::emitInstructionAnnot(
}
-namespace {
-// Printer class for LazyValueInfo results.
-class LazyValueInfoPrinter : public FunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
- LazyValueInfoPrinter() : FunctionPass(ID) {
- initializeLazyValueInfoPrinterPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- AU.addRequired<LazyValueInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- }
-
- // Get the mandatory dominator tree analysis and pass this in to the
- // LVIPrinter. We cannot rely on the LVI's DT, since it's optional.
- bool runOnFunction(Function &F) override {
- dbgs() << "LVI for function '" << F.getName() << "':\n";
- auto &LVI = getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- auto &DTree = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LVI.printLVI(F, DTree, dbgs());
- return false;
- }
-};
+PreservedAnalyses LazyValueInfoPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ OS << "LVI for function '" << F.getName() << "':\n";
+ auto &LVI = AM.getResult<LazyValueAnalysis>(F);
+ auto &DTree = AM.getResult<DominatorTreeAnalysis>(F);
+ LVI.printLVI(F, DTree, OS);
+ return PreservedAnalyses::all();
}
-
-char LazyValueInfoPrinter::ID = 0;
-INITIALIZE_PASS_BEGIN(LazyValueInfoPrinter, "print-lazy-value-info",
- "Lazy Value Info Printer Pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
-INITIALIZE_PASS_END(LazyValueInfoPrinter, "print-lazy-value-info",
- "Lazy Value Info Printer Pass", false, false)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
index ff022006df65..1ebc593016bc 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
@@ -235,6 +235,10 @@ void Lint::visitCallBase(CallBase &I) {
// If both arguments are readonly, they have no dependence.
if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo))
continue;
+ // Skip readnone arguments since those are guaranteed not to be
+ // dereferenced anyway.
+ if (I.doesNotAccessMemory(ArgNo))
+ continue;
if (AI != BI && (*BI)->getType()->isPointerTy()) {
AliasResult Result = AA->alias(*AI, *BI);
Check(Result != AliasResult::MustAlias &&
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Local.cpp b/contrib/llvm-project/llvm/lib/Analysis/Local.cpp
index 5d558de516d3..30757abeb098 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Local.cpp
@@ -28,18 +28,18 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
// If the GEP is inbounds, we know that none of the addressing operations will
// overflow in a signed sense.
bool isInBounds = GEPOp->isInBounds() && !NoAssumptions;
-
- // Build a mask for high order bits.
- unsigned IntPtrWidth = IntIdxTy->getScalarType()->getIntegerBitWidth();
- uint64_t PtrSizeMask =
- std::numeric_limits<uint64_t>::max() >> (64 - IntPtrWidth);
+ auto AddOffset = [&](Value *Offset) {
+ if (Result)
+ Result = Builder->CreateAdd(Result, Offset, GEP->getName() + ".offs",
+ false /*NUW*/, isInBounds /*NSW*/);
+ else
+ Result = Offset;
+ };
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
++i, ++GTI) {
Value *Op = *i;
- uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
- Value *Offset;
if (Constant *OpC = dyn_cast<Constant>(Op)) {
if (OpC->isZeroValue())
continue;
@@ -47,46 +47,34 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = GTI.getStructTypeOrNull()) {
uint64_t OpValue = OpC->getUniqueInteger().getZExtValue();
- Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
+ uint64_t Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
if (!Size)
continue;
- Offset = ConstantInt::get(IntIdxTy, Size);
- } else {
- // Splat the constant if needed.
- if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy())
- OpC = ConstantVector::getSplat(
- cast<VectorType>(IntIdxTy)->getElementCount(), OpC);
-
- Constant *Scale = ConstantInt::get(IntIdxTy, Size);
- Constant *OC =
- ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/);
- Offset =
- ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/);
- }
- } else {
- // Splat the index if needed.
- if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy())
- Op = Builder->CreateVectorSplat(
- cast<FixedVectorType>(IntIdxTy)->getNumElements(), Op);
-
- // Convert to correct type.
- if (Op->getType() != IntIdxTy)
- Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
- if (Size != 1) {
- // We'll let instcombine(mul) convert this to a shl if possible.
- Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size),
- GEP->getName() + ".idx", false /*NUW*/,
- isInBounds /*NSW*/);
+ AddOffset(ConstantInt::get(IntIdxTy, Size));
+ continue;
}
- Offset = Op;
}
- if (Result)
- Result = Builder->CreateAdd(Result, Offset, GEP->getName() + ".offs",
- false /*NUW*/, isInBounds /*NSW*/);
- else
- Result = Offset;
+ // Splat the index if needed.
+ if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy())
+ Op = Builder->CreateVectorSplat(
+ cast<VectorType>(IntIdxTy)->getElementCount(), Op);
+
+ // Convert to correct type.
+ if (Op->getType() != IntIdxTy)
+ Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
+ TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType());
+ if (TSize != TypeSize::getFixed(1)) {
+ Value *Scale = Builder->CreateTypeSize(IntIdxTy->getScalarType(), TSize);
+ if (IntIdxTy->isVectorTy())
+ Scale = Builder->CreateVectorSplat(
+ cast<VectorType>(IntIdxTy)->getElementCount(), Scale);
+ // We'll let instcombine(mul) convert this to a shl if possible.
+ Op = Builder->CreateMul(Op, Scale, GEP->getName() + ".idx", false /*NUW*/,
+ isInBounds /*NSW*/);
+ }
+ AddOffset(Op);
}
return Result ? Result : Constant::getNullValue(IntIdxTy);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index fd0e81c51ac8..0894560fd078 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -14,7 +14,6 @@
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
@@ -22,7 +21,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
@@ -53,8 +51,6 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -65,6 +61,7 @@
#include <cstdint>
#include <iterator>
#include <utility>
+#include <variant>
#include <vector>
using namespace llvm;
@@ -142,6 +139,13 @@ static cl::opt<bool> SpeculateUnitStride(
cl::desc("Speculate that non-constant strides are unit in LAA"),
cl::init(true));
+static cl::opt<bool, true> HoistRuntimeChecks(
+ "hoist-runtime-checks", cl::Hidden,
+ cl::desc(
+ "Hoist inner loop runtime memory checks to outer loop if possible"),
+ cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(false));
+bool VectorizerParams::HoistRuntimeChecks;
+
bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
}
@@ -331,6 +335,34 @@ void RuntimePointerChecking::tryToCreateDiffCheck(
CanUseDiffCheck = false;
return;
}
+
+ const Loop *InnerLoop = SrcAR->getLoop();
+ // If the start values for both Src and Sink also vary according to an outer
+ // loop, then it's probably better to avoid creating diff checks because
+ // they may not be hoisted. We should instead let llvm::addRuntimeChecks
+ // do the expanded full range overlap checks, which can be hoisted.
+ if (HoistRuntimeChecks && InnerLoop->getParentLoop() &&
+ isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) {
+ auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt);
+ auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt);
+ const Loop *StartARLoop = SrcStartAR->getLoop();
+ if (StartARLoop == SinkStartAR->getLoop() &&
+ StartARLoop == InnerLoop->getParentLoop() &&
+ // If the diff check would already be loop invariant (due to the
+ // recurrences being the same), then we prefer to keep the diff checks
+ // because they are cheaper.
+ SrcStartAR->getStepRecurrence(*SE) !=
+ SinkStartAR->getStepRecurrence(*SE)) {
+ LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
+ "cannot be hoisted out of the outer loop\n");
+ CanUseDiffCheck = false;
+ return;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
+ << "SrcStart: " << *SrcStartInt << '\n'
+ << "SinkStartInt: " << *SinkStartInt << '\n');
DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
Src->NeedsFreeze || Sink->NeedsFreeze);
}
@@ -634,7 +666,7 @@ public:
/// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
+ AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
@@ -643,7 +675,7 @@ public:
/// Register a store.
void addStore(MemoryLocation &Loc, Type *AccessTy) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
+ AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
}
@@ -691,6 +723,11 @@ public:
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
+ const DenseMap<Value *, SmallVector<const Value *, 16>> &
+ getUnderlyingObjects() {
+ return UnderlyingObjects;
+ }
+
private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
@@ -736,6 +773,8 @@ private:
/// The SCEV predicate containing all the SCEV-related assumptions.
PredicatedScalarEvolution &PSE;
+
+ DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
};
} // end anonymous namespace
@@ -914,6 +953,22 @@ static void findForkedSCEVs(
ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
break;
}
+ case Instruction::PHI: {
+ SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> ChildScevs;
+ // A phi means we've found a forked pointer, but we currently only
+ // support a single phi per pointer so if there's another behind this
+ // then we just bail out and return the generic SCEV.
+ if (I->getNumOperands() == 2) {
+ findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth);
+ findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
+ }
+ if (ChildScevs.size() == 2) {
+ ScevList.push_back(ChildScevs[0]);
+ ScevList.push_back(ChildScevs[1]);
+ } else
+ ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
+ break;
+ }
case Instruction::Add:
case Instruction::Sub: {
SmallVector<PointerIntPair<const SCEV *, 1, bool>> LScevs;
@@ -1074,7 +1129,6 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
for (const auto &A : AS) {
Value *Ptr = A.getValue();
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
-
if (IsWrite)
++NumWritePtrChecks;
else
@@ -1289,10 +1343,12 @@ void AccessAnalysis::processMemAccesses() {
typedef SmallVector<const Value *, 16> ValueVector;
ValueVector TempObjects;
- getUnderlyingObjects(Ptr, TempObjects, LI);
+ UnderlyingObjects[Ptr] = {};
+ SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr];
+ ::getUnderlyingObjects(Ptr, UOs, LI);
LLVM_DEBUG(dbgs()
<< "Underlying objects for pointer " << *Ptr << "\n");
- for (const Value *UnderlyingObj : TempObjects) {
+ for (const Value *UnderlyingObj : UOs) {
// nullptr never alias, don't join sets for pointer that have "null"
// in their UnderlyingObjects list.
if (isa<ConstantPointerNull>(UnderlyingObj) &&
@@ -1620,6 +1676,7 @@ MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
case ForwardButPreventsForwarding:
case Backward:
case BackwardVectorizableButPreventsForwarding:
+ case IndirectUnsafe:
return VectorizationSafetyStatus::Unsafe;
}
llvm_unreachable("unexpected DepType!");
@@ -1631,6 +1688,7 @@ bool MemoryDepChecker::Dependence::isBackward() const {
case Forward:
case ForwardButPreventsForwarding:
case Unknown:
+ case IndirectUnsafe:
return false;
case BackwardVectorizable:
@@ -1656,6 +1714,7 @@ bool MemoryDepChecker::Dependence::isForward() const {
case BackwardVectorizable:
case Backward:
case BackwardVectorizableButPreventsForwarding:
+ case IndirectUnsafe:
return false;
}
llvm_unreachable("unexpected DepType!");
@@ -1678,7 +1737,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
// Maximum vector factor.
uint64_t MaxVFWithoutSLForwardIssues = std::min(
- VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
+ VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes);
// Compute the smallest VF at which the store and load would be misaligned.
for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
@@ -1698,10 +1757,10 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
return true;
}
- if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
+ if (MaxVFWithoutSLForwardIssues < MinDepDistBytes &&
MaxVFWithoutSLForwardIssues !=
VectorizerParams::MaxVectorWidth * TypeByteSize)
- MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
+ MinDepDistBytes = MaxVFWithoutSLForwardIssues;
return false;
}
@@ -1813,67 +1872,116 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
return ScaledDist % Stride;
}
-MemoryDepChecker::Dependence::DepType
-MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
- const MemAccessInfo &B, unsigned BIdx,
- const DenseMap<Value *, const SCEV *> &Strides) {
- assert (AIdx < BIdx && "Must pass arguments in program order");
+/// Returns true if any of the underlying objects has a loop varying address,
+/// i.e. may change in \p L.
+static bool
+isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
+ ScalarEvolution &SE, const Loop *L) {
+ return any_of(UnderlyingObjects, [&SE, L](const Value *UO) {
+ return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L);
+ });
+}
+// Get the dependence distance, stride, type size in whether i is a write for
+// the dependence between A and B. Returns a DepType, if we can prove there's
+// no dependence or the analysis fails. Outlined to lambda to limit he scope
+// of various temporary variables, like A/BPtr, StrideA/BPtr and others.
+// Returns either the dependence result, if it could already be determined, or a
+// tuple with (Distance, Stride, TypeSize, AIsWrite, BIsWrite).
+static std::variant<MemoryDepChecker::Dependence::DepType,
+ std::tuple<const SCEV *, uint64_t, uint64_t, bool, bool>>
+getDependenceDistanceStrideAndSize(
+ const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
+ const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
+ const DenseMap<Value *, const SCEV *> &Strides,
+ const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
+ PredicatedScalarEvolution &PSE, const Loop *InnermostLoop) {
+ auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+ auto &SE = *PSE.getSE();
auto [APtr, AIsWrite] = A;
auto [BPtr, BIsWrite] = B;
- Type *ATy = getLoadStoreType(InstMap[AIdx]);
- Type *BTy = getLoadStoreType(InstMap[BIdx]);
// Two reads are independent.
if (!AIsWrite && !BIsWrite)
- return Dependence::NoDep;
+ return MemoryDepChecker::Dependence::NoDep;
+
+ Type *ATy = getLoadStoreType(AInst);
+ Type *BTy = getLoadStoreType(BInst);
// We cannot check pointers in different address spaces.
if (APtr->getType()->getPointerAddressSpace() !=
BPtr->getType()->getPointerAddressSpace())
- return Dependence::Unknown;
+ return MemoryDepChecker::Dependence::Unknown;
int64_t StrideAPtr =
- getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0);
+ getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0);
int64_t StrideBPtr =
- getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0);
+ getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
// If the induction step is negative we have to invert source and sink of the
- // dependence.
+ // dependence when measuring the distance between them. We should not swap
+ // AIsWrite with BIsWrite, as their uses expect them in program order.
if (StrideAPtr < 0) {
- std::swap(APtr, BPtr);
- std::swap(ATy, BTy);
std::swap(Src, Sink);
- std::swap(AIsWrite, BIsWrite);
- std::swap(AIdx, BIdx);
- std::swap(StrideAPtr, StrideBPtr);
+ std::swap(AInst, BInst);
}
- ScalarEvolution &SE = *PSE.getSE();
const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
<< "(Induction step: " << StrideAPtr << ")\n");
- LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
- << *InstMap[BIdx] << ": " << *Dist << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
+ << ": " << *Dist << "\n");
+
+ // Needs accesses where the addresses of the accessed underlying objects do
+ // not change within the loop.
+ if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE,
+ InnermostLoop) ||
+ isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE,
+ InnermostLoop))
+ return MemoryDepChecker::Dependence::IndirectUnsafe;
// Need accesses with constant stride. We don't want to vectorize
- // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
- // the address space.
- if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
+ // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
+ // in the address space.
+ if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) {
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
- return Dependence::Unknown;
+ return MemoryDepChecker::Dependence::Unknown;
}
- auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
bool HasSameSize =
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
+ if (!HasSameSize)
+ TypeByteSize = 0;
uint64_t Stride = std::abs(StrideAPtr);
+ return std::make_tuple(Dist, Stride, TypeByteSize, AIsWrite, BIsWrite);
+}
+MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
+ const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
+ unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides,
+ const DenseMap<Value *, SmallVector<const Value *, 16>>
+ &UnderlyingObjects) {
+ assert(AIdx < BIdx && "Must pass arguments in program order");
+
+ // Get the dependence distance, stride, type size and what access writes for
+ // the dependence between A and B.
+ auto Res = getDependenceDistanceStrideAndSize(
+ A, InstMap[AIdx], B, InstMap[BIdx], Strides, UnderlyingObjects, PSE,
+ InnermostLoop);
+ if (std::holds_alternative<Dependence::DepType>(Res))
+ return std::get<Dependence::DepType>(Res);
+
+ const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] =
+ std::get<std::tuple<const SCEV *, uint64_t, uint64_t, bool, bool>>(Res);
+ bool HasSameSize = TypeByteSize > 0;
+
+ ScalarEvolution &SE = *PSE.getSE();
+ auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
Stride, TypeByteSize))
@@ -1899,9 +2007,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Negative distances are not plausible dependencies.
if (Val.isNegative()) {
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
+ // There is no need to update MaxSafeVectorWidthInBits after call to
+ // couldPreventStoreLoadForward, even if it changed MinDepDistBytes,
+ // since a forward dependency will allow vectorization using any width.
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
- (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
- !HasSameSize)) {
+ (!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(),
+ TypeByteSize))) {
LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
return Dependence::ForwardButPreventsForwarding;
}
@@ -1969,8 +2080,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Backward;
}
- // Unsafe if the minimum distance needed is greater than max safe distance.
- if (MinDistanceNeeded > MaxSafeDepDistBytes) {
+ // Unsafe if the minimum distance needed is greater than smallest dependence
+ // distance distance.
+ if (MinDistanceNeeded > MinDepDistBytes) {
LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
<< MinDistanceNeeded << " size in bytes\n");
return Dependence::Backward;
@@ -1992,15 +2104,25 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// is 2. Then we analyze the accesses on array A, the minimum distance needed
// is 8, which is less than 2 and forbidden vectorization, But actually
// both A and B could be vectorized by 2 iterations.
- MaxSafeDepDistBytes =
- std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
+ MinDepDistBytes =
+ std::min(static_cast<uint64_t>(Distance), MinDepDistBytes);
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
+ uint64_t MinDepDistBytesOld = MinDepDistBytes;
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
- couldPreventStoreLoadForward(Distance, TypeByteSize))
+ couldPreventStoreLoadForward(Distance, TypeByteSize)) {
+ // Sanity check that we didn't update MinDepDistBytes when calling
+ // couldPreventStoreLoadForward
+ assert(MinDepDistBytes == MinDepDistBytesOld &&
+ "An update to MinDepDistBytes requires an update to "
+ "MaxSafeVectorWidthInBits");
+ (void)MinDepDistBytesOld;
return Dependence::BackwardVectorizableButPreventsForwarding;
+ }
- uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
+ // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
+ // since there is a backwards dependency.
+ uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride);
LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
<< " with max VF = " << MaxVF << '\n');
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
@@ -2008,11 +2130,13 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::BackwardVectorizable;
}
-bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
- MemAccessInfoList &CheckDeps,
- const DenseMap<Value *, const SCEV *> &Strides) {
+bool MemoryDepChecker::areDepsSafe(
+ DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
+ const DenseMap<Value *, const SCEV *> &Strides,
+ const DenseMap<Value *, SmallVector<const Value *, 16>>
+ &UnderlyingObjects) {
- MaxSafeDepDistBytes = -1;
+ MinDepDistBytes = -1;
SmallPtrSet<MemAccessInfo, 8> Visited;
for (MemAccessInfo CurAccess : CheckDeps) {
if (Visited.count(CurAccess))
@@ -2054,7 +2178,8 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
std::swap(A, B);
Dependence::DepType Type =
- isDependent(*A.first, A.second, *B.first, B.second, Strides);
+ isDependent(*A.first, A.second, *B.first, B.second, Strides,
+ UnderlyingObjects);
mergeInStatus(Dependence::isSafeForVectorization(Type));
// Gather dependences unless we accumulated MaxDependences
@@ -2098,8 +2223,14 @@ MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
}
const char *MemoryDepChecker::Dependence::DepName[] = {
- "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
- "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
+ "NoDep",
+ "Unknown",
+ "IndidrectUnsafe",
+ "Forward",
+ "ForwardButPreventsForwarding",
+ "Backward",
+ "BackwardVectorizable",
+ "BackwardVectorizableButPreventsForwarding"};
void MemoryDepChecker::Dependence::print(
raw_ostream &OS, unsigned Depth,
@@ -2192,17 +2323,17 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (HasComplexMemInst)
continue;
+ // Many math library functions read the rounding mode. We will only
+ // vectorize a loop if it contains known function calls that don't set
+ // the flag. Therefore, it is safe to ignore this read from memory.
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (Call && getVectorIntrinsicIDForCall(Call, TLI))
+ continue;
+
// If this is a load, save it. If this instruction can read from memory
// but is not a load, then we quit. Notice that we don't handle function
// calls that read or write.
if (I.mayReadFromMemory()) {
- // Many math library functions read the rounding mode. We will only
- // vectorize a loop if it contains known function calls that don't set
- // the flag. Therefore, it is safe to ignore this read from memory.
- auto *Call = dyn_cast<CallInst>(&I);
- if (Call && getVectorIntrinsicIDForCall(Call, TLI))
- continue;
-
// If the function has an explicit vectorized counterpart, we can safely
// assume that it can be vectorized.
if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
@@ -2400,8 +2531,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (Accesses.isDependencyCheckNeeded()) {
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
CanVecMem = DepChecker->areDepsSafe(
- DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
- MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
+ DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides,
+ Accesses.getUnderlyingObjects());
if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
@@ -2464,12 +2595,24 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() {
LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
// Emit remark for first unsafe dependence
+ bool HasForcedDistribution = false;
+ std::optional<const MDOperand *> Value =
+ findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable");
+ if (Value) {
+ const MDOperand *Op = *Value;
+ assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
+ HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
+ }
+
+ const std::string Info =
+ HasForcedDistribution
+ ? "unsafe dependent memory operations in loop."
+ : "unsafe dependent memory operations in loop. Use "
+ "#pragma clang loop distribute(enable) to allow loop distribution "
+ "to attempt to isolate the offending operations into a separate "
+ "loop";
OptimizationRemarkAnalysis &R =
- recordAnalysis("UnsafeDep", Dep.getDestination(*this))
- << "unsafe dependent memory operations in loop. Use "
- "#pragma loop distribute(enable) to allow loop distribution "
- "to attempt to isolate the offending operations into a separate "
- "loop";
+ recordAnalysis("UnsafeDep", Dep.getDestination(*this)) << Info;
switch (Dep.Type) {
case MemoryDepChecker::Dependence::NoDep:
@@ -2487,6 +2630,9 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() {
R << "\nBackward loop carried data dependence that prevents "
"store-to-load forwarding.";
break;
+ case MemoryDepChecker::Dependence::IndirectUnsafe:
+ R << "\nUnsafe indirect dependence.";
+ break;
case MemoryDepChecker::Dependence::Unknown:
R << "\nUnknown data dependence.";
break;
@@ -2766,9 +2912,10 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (CanVecMem) {
OS.indent(Depth) << "Memory dependences are safe";
- if (MaxSafeDepDistBytes != -1ULL)
- OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
- << " bytes";
+ const MemoryDepChecker &DC = getDepChecker();
+ if (!DC.isSafeForAnyVectorWidth())
+ OS << " with a maximum safe vector width of "
+ << DC.getMaxSafeVectorWidthInBits() << " bits";
if (PtrRtChecking->Need)
OS << " with run-time checks";
OS << "\n";
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
index 60a72079e864..87ddfe3e92ae 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
@@ -1143,7 +1143,7 @@ MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context,
if (S)
IsVectorMetadata =
llvm::any_of(RemovePrefixes, [S](StringRef Prefix) -> bool {
- return S->getString().startswith(Prefix);
+ return S->getString().starts_with(Prefix);
});
}
if (!IsVectorMetadata)
@@ -1218,7 +1218,7 @@ PreservedAnalyses LoopVerifierPass::run(Function &F,
/// Traverse the loop blocks and store the DFS result.
/// Useful for clients that just want the final DFS result and don't need to
/// visit blocks during the initial traversal.
-void LoopBlocksDFS::perform(LoopInfo *LI) {
+void LoopBlocksDFS::perform(const LoopInfo *LI) {
LoopBlocksTraversal Traversal(*this, LI);
for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
POE = Traversal.end();
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
index 294dfd9d41c1..61d3a270d653 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
@@ -114,7 +114,7 @@ void LPPassManager::markLoopAsDeleted(Loop &L) {
// there. However, we have to be careful to not remove the back of the queue
// as that is assumed to match the current loop.
assert(LQ.back() == CurrentLoop && "Loop queue back isn't the current loop!");
- llvm::erase_value(LQ, &L);
+ llvm::erase(LQ, &L);
if (&L == CurrentLoop) {
CurrentLoopDeleted = true;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 0660a9993b6d..75eb8ece2e44 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -192,7 +192,9 @@ void MLInlineAdvisor::onPassEntry(LazyCallGraph::SCC *LastSCC) {
// - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
// they'd be adjacent to Nodes in the last SCC. So we just need to check the
// boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
- // care about the nature of the Edge (call or ref).
+ // care about the nature of the Edge (call or ref). `FunctionLevels`-wise, we
+ // record them at the same level as the original node (this is a choice, may
+ // need revisiting).
NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
while (!NodesInLastSCC.empty()) {
const auto *N = *NodesInLastSCC.begin();
@@ -204,12 +206,15 @@ void MLInlineAdvisor::onPassEntry(LazyCallGraph::SCC *LastSCC) {
}
++NodeCount;
EdgeCount += getLocalCalls(N->getFunction());
+ const auto NLevel = FunctionLevels.at(N);
for (const auto &E : *(*N)) {
const auto *AdjNode = &E.getNode();
assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
auto I = AllNodes.insert(AdjNode);
- if (I.second)
+ if (I.second) {
NodesInLastSCC.insert(AdjNode);
+ FunctionLevels[AdjNode] = NLevel;
+ }
}
}
@@ -461,6 +466,12 @@ void MLInlineAdvisor::print(raw_ostream &OS) const {
OS << "\n";
}
OS << "\n";
+ OS << "[MLInlineAdvisor] FuncLevels:\n";
+ for (auto I : FunctionLevels)
+ OS << (I.first->isDead() ? "<deleted>" : I.first->getFunction().getName())
+ << " : " << I.second << "\n";
+
+ OS << "\n";
}
MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
index 53e089ba1fea..9e6811f3bf88 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -50,6 +51,12 @@ using namespace llvm;
#define DEBUG_TYPE "memory-builtins"
+static cl::opt<unsigned> ObjectSizeOffsetVisitorMaxVisitInstructions(
+ "object-size-offset-visitor-max-visit-instructions",
+ cl::desc("Maximum number of instructions for ObjectSizeOffsetVisitor to "
+ "look at"),
+ cl::init(100));
+
enum AllocType : uint8_t {
OpNewLike = 1<<0, // allocates; never returns null
MallocLike = 1<<1, // allocates; may return null
@@ -275,10 +282,7 @@ static AllocFnKind getAllocFnKind(const Value *V) {
}
static AllocFnKind getAllocFnKind(const Function *F) {
- Attribute Attr = F->getFnAttribute(Attribute::AllocKind);
- if (Attr.isValid())
- return AllocFnKind(Attr.getValueAsInt());
- return AllocFnKind::Unknown;
+ return F->getAttributes().getAllocKind();
}
static bool checkFnAllocKind(const Value *V, AllocFnKind Wanted) {
@@ -694,6 +698,11 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
}
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
+ InstructionsVisited = 0;
+ return computeImpl(V);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) {
unsigned InitialIntTyBits = DL.getIndexTypeSizeInBits(V->getType());
// Stripping pointer casts can strip address space casts which can change the
@@ -710,14 +719,15 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
IntTyBits = DL.getIndexTypeSizeInBits(V->getType());
Zero = APInt::getZero(IntTyBits);
+ SizeOffsetType SOT = computeValue(V);
+
bool IndexTypeSizeChanged = InitialIntTyBits != IntTyBits;
if (!IndexTypeSizeChanged && Offset.isZero())
- return computeImpl(V);
+ return SOT;
// We stripped an address space cast that changed the index type size or we
// accumulated some constant offset (or both). Readjust the bit width to match
// the argument index type size and apply the offset, as required.
- SizeOffsetType SOT = computeImpl(V);
if (IndexTypeSizeChanged) {
if (knownSize(SOT) && !::CheckedZextOrTrunc(SOT.first, InitialIntTyBits))
SOT.first = APInt();
@@ -729,14 +739,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
SOT.second.getBitWidth() > 1 ? SOT.second + Offset : SOT.second};
}
-SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) {
+SizeOffsetType ObjectSizeOffsetVisitor::computeValue(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If we have already seen this instruction, bail out. Cycles can happen in
// unreachable code after constant propagation.
- if (!SeenInsts.insert(I).second)
+ auto P = SeenInsts.try_emplace(I, unknown());
+ if (!P.second)
+ return P.first->second;
+ ++InstructionsVisited;
+ if (InstructionsVisited > ObjectSizeOffsetVisitorMaxVisitInstructions)
return unknown();
-
- return visit(*I);
+ SizeOffsetType Res = visit(*I);
+ // Cache the result for later visits. If we happened to visit this during
+ // the above recursion, we would consider it unknown until now.
+ SeenInsts[I] = Res;
+ return Res;
}
if (Argument *A = dyn_cast<Argument>(V))
return visitArgument(*A);
@@ -826,7 +843,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) {
if (GA.isInterposable())
return unknown();
- return compute(GA.getAliasee());
+ return computeImpl(GA.getAliasee());
}
SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
@@ -881,7 +898,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::findLoadSizeOffset(
continue;
case AliasResult::MustAlias:
if (SI->getValueOperand()->getType()->isPointerTy())
- return Known(compute(SI->getValueOperand()));
+ return Known(computeImpl(SI->getValueOperand()));
else
return Unknown(); // No handling of non-pointer values by `compute`.
default:
@@ -984,7 +1001,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::combineSizeOffset(SizeOffsetType LHS,
return (getSizeWithOverflow(LHS).eq(getSizeWithOverflow(RHS))) ? LHS
: unknown();
case ObjectSizeOpts::Mode::ExactUnderlyingSizeAndOffset:
- return LHS == RHS && LHS.second.eq(RHS.second) ? LHS : unknown();
+ return LHS == RHS ? LHS : unknown();
}
llvm_unreachable("missing an eval mode");
}
@@ -994,15 +1011,15 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PN) {
return unknown();
auto IncomingValues = PN.incoming_values();
return std::accumulate(IncomingValues.begin() + 1, IncomingValues.end(),
- compute(*IncomingValues.begin()),
+ computeImpl(*IncomingValues.begin()),
[this](SizeOffsetType LHS, Value *VRHS) {
- return combineSizeOffset(LHS, compute(VRHS));
+ return combineSizeOffset(LHS, computeImpl(VRHS));
});
}
SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
- return combineSizeOffset(compute(I.getTrueValue()),
- compute(I.getFalseValue()));
+ return combineSizeOffset(computeImpl(I.getTrueValue()),
+ computeImpl(I.getFalseValue()));
}
SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) {
@@ -1191,7 +1208,8 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
// Compute offset/size for each PHI incoming pointer.
for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) {
- Builder.SetInsertPoint(&*PHI.getIncomingBlock(i)->getFirstInsertionPt());
+ BasicBlock *IncomingBlock = PHI.getIncomingBlock(i);
+ Builder.SetInsertPoint(IncomingBlock, IncomingBlock->getFirstInsertionPt());
SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));
if (!bothKnown(EdgeData)) {
@@ -1203,8 +1221,8 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
InsertedInstructions.erase(SizePHI);
return unknown();
}
- SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i));
- OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i));
+ SizePHI->addIncoming(EdgeData.first, IncomingBlock);
+ OffsetPHI->addIncoming(EdgeData.second, IncomingBlock);
}
Value *Size = SizePHI, *Offset = OffsetPHI;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 071ecdba8a54..49eccde45f31 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -268,7 +268,7 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
- BatchAAResults BatchAA(AA);
+ BatchAAResults BatchAA(AA, &EII);
return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
BatchAA);
}
@@ -360,11 +360,46 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
return MemDepResult::getNonLocal();
}
+// Check if SI that may alias with MemLoc can be safely skipped. This is
+// possible in case if SI can only must alias or no alias with MemLoc (no
+// partial overlapping possible) and it writes the same value that MemLoc
+// contains now (it was loaded before this store and was not modified in
+// between).
+static bool canSkipClobberingStore(const StoreInst *SI,
+ const MemoryLocation &MemLoc,
+ Align MemLocAlign, BatchAAResults &BatchAA,
+ unsigned ScanLimit) {
+ if (!MemLoc.Size.hasValue())
+ return false;
+ if (MemoryLocation::get(SI).Size != MemLoc.Size)
+ return false;
+ if (MemLoc.Size.isScalable())
+ return false;
+ if (std::min(MemLocAlign, SI->getAlign()).value() <
+ MemLoc.Size.getValue().getKnownMinValue())
+ return false;
+
+ auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ if (!LI || LI->getParent() != SI->getParent())
+ return false;
+ if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias)
+ return false;
+ unsigned NumVisitedInsts = 0;
+ for (const Instruction *I = LI; I != SI; I = I->getNextNonDebugInstruction())
+ if (++NumVisitedInsts > ScanLimit ||
+ isModSet(BatchAA.getModRefInfo(I, MemLoc)))
+ return false;
+
+ return true;
+}
+
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
BatchAAResults &BatchAA) {
bool isInvariantLoad = false;
+ Align MemLocAlign =
+ MemLoc.Ptr->getPointerAlignment(BB->getModule()->getDataLayout());
unsigned DefaultLimit = getDefaultBlockScanLimit();
if (!Limit)
@@ -402,11 +437,12 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// do want to respect mustalias results since defs are useful for value
// forwarding, but any mayalias write can be assumed to be noalias.
// Arguably, this logic should be pushed inside AliasAnalysis itself.
- if (isLoad && QueryInst) {
- LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
- if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
- isInvariantLoad = true;
- }
+ if (isLoad && QueryInst)
+ if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
+ if (LI->hasMetadata(LLVMContext::MD_invariant_load))
+ isInvariantLoad = true;
+ MemLocAlign = LI->getAlign();
+ }
// True for volatile instruction.
// For Load/Store return true if atomic ordering is stronger than AO,
@@ -577,6 +613,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
return MemDepResult::getDef(Inst);
if (isInvariantLoad)
continue;
+ if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, *Limit))
+ continue;
return MemDepResult::getClobber(Inst);
}
@@ -610,11 +648,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
continue;
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
- ModRefInfo MR = BatchAA.getModRefInfo(Inst, MemLoc);
- // If necessary, perform additional analysis.
- if (isModAndRefSet(MR))
- MR = BatchAA.callCapturesBefore(Inst, MemLoc, &DT);
- switch (MR) {
+ switch (BatchAA.getModRefInfo(Inst, MemLoc)) {
case ModRefInfo::NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
continue;
@@ -1068,7 +1102,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// be conservative.
ThrowOutEverything =
CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
- CacheInfo->Size.getValue() < Loc.Size.getValue();
+ !TypeSize::isKnownGE(CacheInfo->Size.getValue(),
+ Loc.Size.getValue());
} else {
// For our purposes, unknown size > all others.
ThrowOutEverything = !Loc.Size.hasValue();
@@ -1192,7 +1227,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
bool GotWorklistLimit = false;
LLVM_DEBUG(AssertSorted(*Cache));
- BatchAAResults BatchAA(AA);
+ BatchAAResults BatchAA(AA, &EII);
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
@@ -1504,6 +1539,8 @@ void MemoryDependenceResults::invalidateCachedPredecessors() {
}
void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
+ EII.removeInstruction(RemInst);
+
// Walk through the Non-local dependencies, removing this one as the value
// for any cached queries.
NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
index d16658028266..2cf92ceba010 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
@@ -2390,6 +2390,10 @@ MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
BatchAAResults &BAA, unsigned &UpwardWalkLimit) {
assert(!isa<MemoryUse>(StartingAccess) && "Use cannot be defining access");
+ // If location is undefined, conservatively return starting access.
+ if (Loc.Ptr == nullptr)
+ return StartingAccess;
+
Instruction *I = nullptr;
if (auto *StartingUseOrDef = dyn_cast<MemoryUseOrDef>(StartingAccess)) {
if (MSSA->isLiveOnEntryDef(StartingUseOrDef))
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 2076ed48ea34..1f15e9478324 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -93,7 +93,7 @@ extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
// instruction in it takes an address of any basic block, because instruction
// can only take an address of basic block located in the same function.
static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
- SetVector<ValueInfo> &RefEdges,
+ SetVector<ValueInfo, std::vector<ValueInfo>> &RefEdges,
SmallPtrSet<const User *, 8> &Visited) {
bool HasBlockAddress = false;
SmallVector<const User *, 32> Worklist;
@@ -144,9 +144,12 @@ static bool isNonRenamableLocal(const GlobalValue &GV) {
/// Determine whether this call has all constant integer arguments (excluding
/// "this") and summarize it to VCalls or ConstVCalls as appropriate.
-static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid,
- SetVector<FunctionSummary::VFuncId> &VCalls,
- SetVector<FunctionSummary::ConstVCall> &ConstVCalls) {
+static void addVCallToSet(
+ DevirtCallSite Call, GlobalValue::GUID Guid,
+ SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>>
+ &VCalls,
+ SetVector<FunctionSummary::ConstVCall,
+ std::vector<FunctionSummary::ConstVCall>> &ConstVCalls) {
std::vector<uint64_t> Args;
// Start from the second argument to skip the "this" pointer.
for (auto &Arg : drop_begin(Call.CB.args())) {
@@ -163,11 +166,18 @@ static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid,
/// If this intrinsic call requires that we add information to the function
/// summary, do so via the non-constant reference arguments.
static void addIntrinsicToSummary(
- const CallInst *CI, SetVector<GlobalValue::GUID> &TypeTests,
- SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls,
- SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls,
- SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls,
- SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls,
+ const CallInst *CI,
+ SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> &TypeTests,
+ SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>>
+ &TypeTestAssumeVCalls,
+ SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>>
+ &TypeCheckedLoadVCalls,
+ SetVector<FunctionSummary::ConstVCall,
+ std::vector<FunctionSummary::ConstVCall>>
+ &TypeTestAssumeConstVCalls,
+ SetVector<FunctionSummary::ConstVCall,
+ std::vector<FunctionSummary::ConstVCall>>
+ &TypeCheckedLoadConstVCalls,
DominatorTree &DT) {
switch (CI->getCalledFunction()->getIntrinsicID()) {
case Intrinsic::type_test:
@@ -269,12 +279,14 @@ static void computeFunctionSummary(
MapVector<ValueInfo, CalleeInfo, DenseMap<ValueInfo, unsigned>,
std::vector<std::pair<ValueInfo, CalleeInfo>>>
CallGraphEdges;
- SetVector<ValueInfo> RefEdges, LoadRefEdges, StoreRefEdges;
- SetVector<GlobalValue::GUID> TypeTests;
- SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
- TypeCheckedLoadVCalls;
- SetVector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls,
- TypeCheckedLoadConstVCalls;
+ SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges, LoadRefEdges,
+ StoreRefEdges;
+ SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> TypeTests;
+ SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>>
+ TypeTestAssumeVCalls, TypeCheckedLoadVCalls;
+ SetVector<FunctionSummary::ConstVCall,
+ std::vector<FunctionSummary::ConstVCall>>
+ TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls;
ICallPromotionAnalysis ICallAnalysis;
SmallPtrSet<const User *, 8> Visited;
@@ -293,6 +305,7 @@ static void computeFunctionSummary(
bool HasInlineAsmMaybeReferencingInternal = false;
bool HasIndirBranchToBlockAddress = false;
+ bool HasIFuncCall = false;
bool HasUnknownCall = false;
bool MayThrow = false;
for (const BasicBlock &BB : F) {
@@ -396,15 +409,27 @@ static void computeFunctionSummary(
auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo(
cast<GlobalValue>(CalledValue))];
ValueInfo.updateHotness(Hotness);
+ if (CB->isTailCall())
+ ValueInfo.setHasTailCall(true);
// Add the relative block frequency to CalleeInfo if there is no profile
// information.
if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) {
uint64_t BBFreq = BFI->getBlockFreq(&BB).getFrequency();
- uint64_t EntryFreq = BFI->getEntryFreq();
+ uint64_t EntryFreq = BFI->getEntryFreq().getFrequency();
ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq);
}
} else {
HasUnknownCall = true;
+ // If F is imported, a local linkage ifunc (e.g. target_clones on a
+ // static function) called by F will be cloned. Since summaries don't
+ // track ifunc, we do not know implementation functions referenced by
+ // the ifunc resolver need to be promoted in the exporter, and we will
+ // get linker errors due to cloned declarations for implementation
+ // functions. As a simple fix, just mark F as not eligible for import.
+ // Non-local ifunc is not cloned and does not have the issue.
+ if (auto *GI = dyn_cast_if_present<GlobalIFunc>(CalledValue))
+ if (GI->hasLocalLinkage())
+ HasIFuncCall = true;
// Skip inline assembly calls.
if (CI && CI->isInlineAsm())
continue;
@@ -505,7 +530,7 @@ static void computeFunctionSummary(
std::vector<ValueInfo> Refs;
if (IsThinLTO) {
auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs,
- SetVector<ValueInfo> &Edges,
+ SetVector<ValueInfo, std::vector<ValueInfo>> &Edges,
SmallPtrSet<const User *, 8> &Cache) {
for (const auto *I : Instrs) {
Cache.erase(I);
@@ -587,7 +612,7 @@ static void computeFunctionSummary(
bool NonRenamableLocal = isNonRenamableLocal(F);
bool NotEligibleForImport = NonRenamableLocal ||
HasInlineAsmMaybeReferencingInternal ||
- HasIndirBranchToBlockAddress;
+ HasIndirBranchToBlockAddress || HasIFuncCall;
GlobalValueSummary::GVFlags Flags(
F.getLinkage(), F.getVisibility(), NotEligibleForImport,
/* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable());
@@ -710,7 +735,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index,
DenseSet<GlobalValue::GUID> &CantBePromoted,
const Module &M,
SmallVectorImpl<MDNode *> &Types) {
- SetVector<ValueInfo> RefEdges;
+ SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges;
SmallPtrSet<const User *, 8> Visited;
bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited);
bool NonRenamableLocal = isNonRenamableLocal(V);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 1ccf792d2f8c..ca87bc3c78be 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -20,6 +20,11 @@
/// TODO: Theoretically we could check for dependencies between objc_* calls
/// and FMRB_OnlyAccessesArgumentPointees calls or other well-behaved calls.
///
+/// TODO: The calls here to AAResultBase member functions are all effectively
+/// no-ops that just return a conservative result. The original intent was to
+/// chain to another analysis for a recursive query, but this was lost in a
+/// refactor. These should instead be rephrased in terms of queries to AAQI.AAR.
+///
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 203f1e42733f..fdad14571dfe 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -23,16 +23,6 @@
#include <optional>
using namespace llvm;
-// Knobs for profile summary based thresholds.
-namespace llvm {
-extern cl::opt<int> ProfileSummaryCutoffHot;
-extern cl::opt<int> ProfileSummaryCutoffCold;
-extern cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold;
-extern cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold;
-extern cl::opt<int> ProfileSummaryHotCount;
-extern cl::opt<int> ProfileSummaryColdCount;
-} // namespace llvm
-
static cl::opt<bool> PartialProfile(
"partial-profile", cl::Hidden, cl::init(false),
cl::desc("Specify the current profile is used as a partial profile."));
diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
index fbd3d17febff..ad59c79f4075 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
@@ -9,7 +9,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionPrinter.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Analysis/DOTGraphTraitsPass.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
index 39ab48b4a48e..580fe112fcd7 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -652,7 +652,7 @@ CompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue,
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
- for (unsigned Idx : seq(0u, LNumOps)) {
+ for (unsigned Idx : seq(LNumOps)) {
int Result =
CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx),
RInst->getOperand(Idx), Depth + 1);
@@ -1335,11 +1335,14 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
// Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
// subtraction is expensive. For this purpose, perform a quick and dirty
- // difference, by checking for Step in the operand list.
- SmallVector<const SCEV *, 4> DiffOps;
- for (const SCEV *Op : SA->operands())
- if (Op != Step)
- DiffOps.push_back(Op);
+ // difference, by checking for Step in the operand list. Note, that
+ // SA might have repeated ops, like %a + %a + ..., so only remove one.
+ SmallVector<const SCEV *, 4> DiffOps(SA->operands());
+ for (auto It = DiffOps.begin(); It != DiffOps.end(); ++It)
+ if (*It == Step) {
+ DiffOps.erase(It);
+ break;
+ }
if (DiffOps.size() == SA->getNumOperands())
return nullptr;
@@ -1571,8 +1574,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
- return getConstant(
- cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
+ return getConstant(SC->getAPInt().zext(getTypeSizeInBits(Ty)));
// zext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
@@ -1715,9 +1717,9 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
Step = getZeroExtendExpr(Step, Ty, Depth + 1);
return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
-
+
// For a negative step, we can extend the operands iff doing so only
- // traverses values in the range zext([0,UINT_MAX]).
+ // traverses values in the range zext([0,UINT_MAX]).
if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
getSignedRangeMin(Step));
@@ -1908,8 +1910,7 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
- return getConstant(
- cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
+ return getConstant(SC->getAPInt().sext(getTypeSizeInBits(Ty)));
// sext(sext(x)) --> sext(x)
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
@@ -3269,18 +3270,28 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(AddRec->getNumOperands());
const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1);
- for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+
+ // If both the mul and addrec are nuw, we can preserve nuw.
+ // If both the mul and addrec are nsw, we can only preserve nsw if either
+ // a) they are also nuw, or
+ // b) all multiplications of addrec operands with scale are nsw.
+ SCEV::NoWrapFlags Flags =
+ AddRec->getNoWrapFlags(ComputeFlags({Scale, AddRec}));
+
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i),
SCEV::FlagAnyWrap, Depth + 1));
- // Build the new addrec. Propagate the NUW and NSW flags if both the
- // outer mul and the inner addrec are guaranteed to have no overflow.
- //
- // No self-wrap cannot be guaranteed after changing the step size, but
- // will be inferred if either NUW or NSW is true.
- SCEV::NoWrapFlags Flags = ComputeFlags({Scale, AddRec});
- const SCEV *NewRec = getAddRecExpr(
- NewOps, AddRec->getLoop(), AddRec->getNoWrapFlags(Flags));
+ if (hasFlags(Flags, SCEV::FlagNSW) && !hasFlags(Flags, SCEV::FlagNUW)) {
+ ConstantRange NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
+ Instruction::Mul, getSignedRange(Scale),
+ OverflowingBinaryOperator::NoSignedWrap);
+ if (!NSWRegion.contains(getSignedRange(AddRec->getOperand(i))))
+ Flags = clearFlags(Flags, SCEV::FlagNSW);
+ }
+ }
+
+ const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(), Flags);
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
@@ -3661,8 +3672,8 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
assert(!Operands[i]->getType()->isPointerTy() && "Step must be integer");
}
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
- assert(isLoopInvariant(Operands[i], L) &&
- "SCEVAddRecExpr operand is not loop-invariant!");
+ assert(isAvailableAtLoopEntry(Operands[i], L) &&
+ "SCEVAddRecExpr operand is not available at loop entry!");
#endif
if (Operands.back()->isZero()) {
@@ -4108,36 +4119,38 @@ static bool scevUnconditionallyPropagatesPoisonFromOperands(SCEVTypes Kind) {
llvm_unreachable("Unknown SCEV kind!");
}
-/// Return true if V is poison given that AssumedPoison is already poison.
-static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) {
- // The only way poison may be introduced in a SCEV expression is from a
- // poison SCEVUnknown (ConstantExprs are also represented as SCEVUnknown,
- // not SCEVConstant). Notably, nowrap flags in SCEV nodes can *not*
- // introduce poison -- they encode guaranteed, non-speculated knowledge.
- //
- // Additionally, all SCEV nodes propagate poison from inputs to outputs,
- // with the notable exception of umin_seq, where only poison from the first
- // operand is (unconditionally) propagated.
- struct SCEVPoisonCollector {
- bool LookThroughMaybePoisonBlocking;
- SmallPtrSet<const SCEV *, 4> MaybePoison;
- SCEVPoisonCollector(bool LookThroughMaybePoisonBlocking)
- : LookThroughMaybePoisonBlocking(LookThroughMaybePoisonBlocking) {}
-
- bool follow(const SCEV *S) {
- if (!LookThroughMaybePoisonBlocking &&
- !scevUnconditionallyPropagatesPoisonFromOperands(S->getSCEVType()))
- return false;
+namespace {
+// The only way poison may be introduced in a SCEV expression is from a
+// poison SCEVUnknown (ConstantExprs are also represented as SCEVUnknown,
+// not SCEVConstant). Notably, nowrap flags in SCEV nodes can *not*
+// introduce poison -- they encode guaranteed, non-speculated knowledge.
+//
+// Additionally, all SCEV nodes propagate poison from inputs to outputs,
+// with the notable exception of umin_seq, where only poison from the first
+// operand is (unconditionally) propagated.
+struct SCEVPoisonCollector {
+ bool LookThroughMaybePoisonBlocking;
+ SmallPtrSet<const SCEVUnknown *, 4> MaybePoison;
+ SCEVPoisonCollector(bool LookThroughMaybePoisonBlocking)
+ : LookThroughMaybePoisonBlocking(LookThroughMaybePoisonBlocking) {}
+
+ bool follow(const SCEV *S) {
+ if (!LookThroughMaybePoisonBlocking &&
+ !scevUnconditionallyPropagatesPoisonFromOperands(S->getSCEVType()))
+ return false;
- if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
- if (!isGuaranteedNotToBePoison(SU->getValue()))
- MaybePoison.insert(S);
- }
- return true;
+ if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
+ if (!isGuaranteedNotToBePoison(SU->getValue()))
+ MaybePoison.insert(SU);
}
- bool isDone() const { return false; }
- };
+ return true;
+ }
+ bool isDone() const { return false; }
+};
+} // namespace
+/// Return true if V is poison given that AssumedPoison is already poison.
+static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) {
// First collect all SCEVs that might result in AssumedPoison to be poison.
// We need to look through potentially poison-blocking operations here,
// because we want to find all SCEVs that *might* result in poison, not only
@@ -4158,8 +4171,17 @@ static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) {
// Make sure that no matter which SCEV in PC1.MaybePoison is actually poison,
// it will also make S poison by being part of PC2.MaybePoison.
- return all_of(PC1.MaybePoison,
- [&](const SCEV *S) { return PC2.MaybePoison.contains(S); });
+ return all_of(PC1.MaybePoison, [&](const SCEVUnknown *S) {
+ return PC2.MaybePoison.contains(S);
+ });
+}
+
+void ScalarEvolution::getPoisonGeneratingValues(
+ SmallPtrSetImpl<const Value *> &Result, const SCEV *S) {
+ SCEVPoisonCollector PC(/* LookThroughMaybePoisonBlocking */ false);
+ visitAll(S, PC);
+ for (const SCEVUnknown *SU : PC.MaybePoison)
+ Result.insert(SU->getValue());
}
const SCEV *
@@ -4403,8 +4425,8 @@ Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
}
-bool ScalarEvolution::instructionCouldExistWitthOperands(const SCEV *A,
- const SCEV *B) {
+bool ScalarEvolution::instructionCouldExistWithOperands(const SCEV *A,
+ const SCEV *B) {
/// For a valid use point to exist, the defining scope of one operand
/// must dominate the other.
bool PreciseA, PreciseB;
@@ -4417,7 +4439,6 @@ bool ScalarEvolution::instructionCouldExistWitthOperands(const SCEV *A,
DT.dominates(ScopeB, ScopeA);
}
-
const SCEV *ScalarEvolution::getCouldNotCompute() {
return CouldNotCompute.get();
}
@@ -4476,18 +4497,6 @@ void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
}
}
-/// Determine whether this instruction is either not SCEVable or will always
-/// produce a SCEVUnknown. We do not have to walk past such instructions when
-/// invalidating.
-static bool isAlwaysUnknown(const Instruction *I) {
- switch (I->getOpcode()) {
- case Instruction::Load:
- return true;
- default:
- return false;
- }
-}
-
/// Return an existing SCEV if it exists, otherwise analyze the expression and
/// create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
@@ -4495,11 +4504,7 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
if (const SCEV *S = getExistingSCEV(V))
return S;
- const SCEV *S = createSCEVIter(V);
- assert((!isa<Instruction>(V) || !isAlwaysUnknown(cast<Instruction>(V)) ||
- isa<SCEVUnknown>(S)) &&
- "isAlwaysUnknown() instruction is not SCEVUnknown");
- return S;
+ return createSCEVIter(V);
}
const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
@@ -4800,8 +4805,6 @@ static void PushDefUseChildren(Instruction *I,
// Push the def-use children onto the Worklist stack.
for (User *U : I->users()) {
auto *UserInsn = cast<Instruction>(U);
- if (isAlwaysUnknown(UserInsn))
- continue;
if (Visited.insert(UserInsn).second)
Worklist.push_back(UserInsn);
}
@@ -5220,11 +5223,8 @@ static std::optional<BinaryOp> MatchBinaryOp(Value *V, const DataLayout &DL,
return BinaryOp(Op);
case Instruction::Or: {
- // LLVM loves to convert `add` of operands with no common bits
- // into an `or`. But SCEV really doesn't deal with `or` that well,
- // so try extra hard to recognize this `or` as an `add`.
- if (haveNoCommonBitsSet(Op->getOperand(0), Op->getOperand(1), DL, &AC, CxtI,
- &DT, /*UseInstrInfo=*/true))
+ // Convert or disjoint into add nuw nsw.
+ if (cast<PossiblyDisjointInst>(Op)->isDisjoint())
return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1),
/*IsNSW=*/true, /*IsNUW=*/true);
return BinaryOp(Op);
@@ -6521,8 +6521,7 @@ ScalarEvolution::getRangeRefIter(const SCEV *S,
// Use getRangeRef to compute ranges for items in the worklist in reverse
// order. This will force ranges for earlier operands to be computed before
// their users in most cases.
- for (const SCEV *P :
- reverse(make_range(WorkList.begin() + 1, WorkList.end()))) {
+ for (const SCEV *P : reverse(drop_begin(WorkList))) {
getRangeRef(P, SignHint);
if (auto *UnknownS = dyn_cast<SCEVUnknown>(P))
@@ -7844,7 +7843,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
}
break;
- case Instruction::AShr: {
+ case Instruction::AShr:
// AShr X, C, where C is a constant.
ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS);
if (!CI)
@@ -7866,37 +7865,68 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt);
Operator *L = dyn_cast<Operator>(BO->LHS);
- if (L && L->getOpcode() == Instruction::Shl) {
+ const SCEV *AddTruncateExpr = nullptr;
+ ConstantInt *ShlAmtCI = nullptr;
+ const SCEV *AddConstant = nullptr;
+
+ if (L && L->getOpcode() == Instruction::Add) {
+ // X = Shl A, n
+ // Y = Add X, c
+ // Z = AShr Y, m
+ // n, c and m are constants.
+
+ Operator *LShift = dyn_cast<Operator>(L->getOperand(0));
+ ConstantInt *AddOperandCI = dyn_cast<ConstantInt>(L->getOperand(1));
+ if (LShift && LShift->getOpcode() == Instruction::Shl) {
+ if (AddOperandCI) {
+ const SCEV *ShlOp0SCEV = getSCEV(LShift->getOperand(0));
+ ShlAmtCI = dyn_cast<ConstantInt>(LShift->getOperand(1));
+ // since we truncate to TruncTy, the AddConstant should be of the
+ // same type, so create a new Constant with type same as TruncTy.
+ // Also, the Add constant should be shifted right by AShr amount.
+ APInt AddOperand = AddOperandCI->getValue().ashr(AShrAmt);
+ AddConstant = getConstant(AddOperand.trunc(BitWidth - AShrAmt));
+ // we model the expression as sext(add(trunc(A), c << n)), since the
+ // sext(trunc) part is already handled below, we create a
+ // AddExpr(TruncExp) which will be used later.
+ AddTruncateExpr = getTruncateExpr(ShlOp0SCEV, TruncTy);
+ }
+ }
+ } else if (L && L->getOpcode() == Instruction::Shl) {
// X = Shl A, n
// Y = AShr X, m
// Both n and m are constant.
const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0));
- if (L->getOperand(1) == BO->RHS)
- // For a two-shift sext-inreg, i.e. n = m,
- // use sext(trunc(x)) as the SCEV expression.
- return getSignExtendExpr(
- getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy);
-
- ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1));
- if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) {
- uint64_t ShlAmt = ShlAmtCI->getZExtValue();
- if (ShlAmt > AShrAmt) {
- // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV
- // expression. We already checked that ShlAmt < BitWidth, so
- // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as
- // ShlAmt - AShrAmt < Amt.
- APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt,
- ShlAmt - AShrAmt);
- return getSignExtendExpr(
- getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy),
- getConstant(Mul)), OuterTy);
- }
+ ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1));
+ AddTruncateExpr = getTruncateExpr(ShlOp0SCEV, TruncTy);
+ }
+
+ if (AddTruncateExpr && ShlAmtCI) {
+ // We can merge the two given cases into a single SCEV statement,
+ // incase n = m, the mul expression will be 2^0, so it gets resolved to
+ // a simpler case. The following code handles the two cases:
+ //
+ // 1) For a two-shift sext-inreg, i.e. n = m,
+ // use sext(trunc(x)) as the SCEV expression.
+ //
+ // 2) When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV
+ // expression. We already checked that ShlAmt < BitWidth, so
+ // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as
+ // ShlAmt - AShrAmt < Amt.
+ uint64_t ShlAmt = ShlAmtCI->getZExtValue();
+ if (ShlAmtCI->getValue().ult(BitWidth) && ShlAmt >= AShrAmt) {
+ APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt, ShlAmt - AShrAmt);
+ const SCEV *CompositeExpr =
+ getMulExpr(AddTruncateExpr, getConstant(Mul));
+ if (L->getOpcode() != Instruction::Shl)
+ CompositeExpr = getAddExpr(CompositeExpr, AddConstant);
+
+ return getSignExtendExpr(CompositeExpr, OuterTy);
}
}
break;
}
- }
}
switch (U->getOpcode()) {
@@ -8377,6 +8407,44 @@ void ScalarEvolution::forgetValue(Value *V) {
forgetMemoizedResults(ToForget);
}
+void ScalarEvolution::forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V) {
+ if (!isSCEVable(V->getType()))
+ return;
+
+ // If SCEV looked through a trivial LCSSA phi node, we might have SCEV's
+ // directly using a SCEVUnknown/SCEVAddRec defined in the loop. After an
+ // extra predecessor is added, this is no longer valid. Find all Unknowns and
+ // AddRecs defined in the loop and invalidate any SCEV's making use of them.
+ if (const SCEV *S = getExistingSCEV(V)) {
+ struct InvalidationRootCollector {
+ Loop *L;
+ SmallVector<const SCEV *, 8> Roots;
+
+ InvalidationRootCollector(Loop *L) : L(L) {}
+
+ bool follow(const SCEV *S) {
+ if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
+ if (auto *I = dyn_cast<Instruction>(SU->getValue()))
+ if (L->contains(I))
+ Roots.push_back(S);
+ } else if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (L->contains(AddRec->getLoop()))
+ Roots.push_back(S);
+ }
+ return true;
+ }
+ bool isDone() const { return false; }
+ };
+
+ InvalidationRootCollector C(L);
+ visitAll(S, C);
+ forgetMemoizedResults(C.Roots);
+ }
+
+ // Also perform the normal invalidation.
+ forgetValue(V);
+}
+
void ScalarEvolution::forgetLoopDispositions() { LoopDispositions.clear(); }
void ScalarEvolution::forgetBlockAndLoopDispositions(Value *V) {
@@ -9007,7 +9075,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(
InnerLHS = ZExt->getOperand();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(InnerLHS)) {
auto *StrideC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this));
- if (!AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() &&
+ if (!AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() &&
StrideC && StrideC->getAPInt().isPowerOf2()) {
auto Flags = AR->getNoWrapFlags();
Flags = setFlags(Flags, SCEV::FlagNW);
@@ -9617,18 +9685,6 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
return cast<SCEVConstant>(V)->getValue();
case scUnknown:
return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
- case scSignExtend: {
- const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
- if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
- return ConstantExpr::getSExt(CastOp, SS->getType());
- return nullptr;
- }
- case scZeroExtend: {
- const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
- if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
- return ConstantExpr::getZExt(CastOp, SZ->getType());
- return nullptr;
- }
case scPtrToInt: {
const SCEVPtrToIntExpr *P2I = cast<SCEVPtrToIntExpr>(V);
if (Constant *CastOp = BuildConstantFromSCEV(P2I->getOperand()))
@@ -9655,12 +9711,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
}
assert(!C->getType()->isPointerTy() &&
"Can only have one pointer, and it must be last");
- if (auto *PT = dyn_cast<PointerType>(OpC->getType())) {
- // The offsets have been converted to bytes. We can add bytes to an
- // i8* by GEP with the byte count in the first index.
- Type *DestPtrTy =
- Type::getInt8PtrTy(PT->getContext(), PT->getAddressSpace());
- OpC = ConstantExpr::getBitCast(OpC, DestPtrTy);
+ if (OpC->getType()->isPointerTy()) {
+ // The offsets have been converted to bytes. We can add bytes using
+ // an i8 GEP.
C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()),
OpC, C);
} else {
@@ -9669,25 +9722,16 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
}
return C;
}
- case scMulExpr: {
- const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
- Constant *C = nullptr;
- for (const SCEV *Op : SM->operands()) {
- assert(!Op->getType()->isPointerTy() && "Can't multiply pointers");
- Constant *OpC = BuildConstantFromSCEV(Op);
- if (!OpC)
- return nullptr;
- C = C ? ConstantExpr::getMul(C, OpC) : OpC;
- }
- return C;
- }
+ case scMulExpr:
+ case scSignExtend:
+ case scZeroExtend:
case scUDivExpr:
case scSMaxExpr:
case scUMaxExpr:
case scSMinExpr:
case scUMinExpr:
case scSequentialUMinExpr:
- return nullptr; // TODO: smax, umax, smin, umax, umin_seq.
+ return nullptr;
}
llvm_unreachable("Unknown SCEV kind!");
}
@@ -9855,7 +9899,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// Do we have a loop invariant value flowing around the backedge
// for a loop which must execute the backedge?
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
- isKnownPositive(BackedgeTakenCount) &&
+ isKnownNonZero(BackedgeTakenCount) &&
PN->getNumIncomingValues() == 2) {
unsigned InLoopPred =
@@ -9905,10 +9949,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
Constant *C = BuildConstantFromSCEV(OpV);
if (!C)
return V;
- if (C->getType() != Op->getType())
- C = ConstantExpr::getCast(
- CastInst::getCastOpcode(C, false, Op->getType(), false), C,
- Op->getType());
+ assert(C->getType() == Op->getType() && "Type mismatch");
Operands.push_back(C);
}
@@ -11735,6 +11776,9 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, CtxI))
return true;
+ if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS))
+ return true;
+
// Otherwise assume the worst.
return false;
}
@@ -11992,7 +12036,7 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) {
return isKnownViaNonRecursiveReasoning(Pred, S1, S2) ||
- isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS) ||
+ isImpliedCondOperandsViaRanges(Pred, S1, S2, Pred, FoundLHS, FoundRHS) ||
isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth);
};
@@ -12094,7 +12138,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *FoundLHS,
const SCEV *FoundRHS,
const Instruction *CtxI) {
- if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, Pred, FoundLHS, FoundRHS))
return true;
if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
@@ -12446,6 +12490,7 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS,
+ ICmpInst::Predicate FoundPred,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
@@ -12460,9 +12505,9 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
const APInt &ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
// `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
- // antecedent "`FoundLHS` `Pred` `FoundRHS`".
+ // antecedent "`FoundLHS` `FoundPred` `FoundRHS`".
ConstantRange FoundLHSRange =
- ConstantRange::makeExactICmpRegion(Pred, ConstFoundRHS);
+ ConstantRange::makeExactICmpRegion(FoundPred, ConstFoundRHS);
// Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`:
ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend));
@@ -12501,7 +12546,7 @@ bool ScalarEvolution::canIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
bool ScalarEvolution::canIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
bool IsSigned) {
-
+
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
const SCEV *One = getOne(Stride->getType());
@@ -12622,6 +12667,11 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand());
if (AR && AR->getLoop() == L && AR->isAffine()) {
auto canProveNUW = [&]() {
+ // We can use the comparison to infer no-wrap flags only if it fully
+ // controls the loop exit.
+ if (!ControlsOnlyExit)
+ return false;
+
if (!isLoopInvariant(RHS, L))
return false;
@@ -12860,7 +12910,11 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (!BECount) {
auto canProveRHSGreaterThanEqualStart = [&]() {
auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
- if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart))
+ const SCEV *GuardedRHS = applyLoopGuards(OrigRHS, L);
+ const SCEV *GuardedStart = applyLoopGuards(OrigStart, L);
+
+ if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart) ||
+ isKnownPredicate(CondGE, GuardedRHS, GuardedStart))
return true;
// (RHS > Start - 1) implies RHS >= Start.
@@ -13251,26 +13305,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
// Forget all the expressions associated with users of the old value,
// so that future queries will recompute the expressions using the new
// value.
- Value *Old = getValPtr();
- SmallVector<User *, 16> Worklist(Old->users());
- SmallPtrSet<User *, 8> Visited;
- while (!Worklist.empty()) {
- User *U = Worklist.pop_back_val();
- // Deleting the Old value will cause this to dangle. Postpone
- // that until everything else is done.
- if (U == Old)
- continue;
- if (!Visited.insert(U).second)
- continue;
- if (PHINode *PN = dyn_cast<PHINode>(U))
- SE->ConstantEvolutionLoopExitValue.erase(PN);
- SE->eraseValueFromMap(U);
- llvm::append_range(Worklist, U->users());
- }
- // Delete the Old value.
- if (PHINode *PN = dyn_cast<PHINode>(Old))
- SE->ConstantEvolutionLoopExitValue.erase(PN);
- SE->eraseValueFromMap(Old);
+ SE->forgetValue(getValPtr());
// this now dangles!
}
@@ -13433,9 +13468,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
for (const auto *P : Preds)
P->print(OS, 4);
} else {
- OS << "Unpredictable predicated backedge-taken count. ";
+ OS << "Unpredictable predicated backedge-taken count.\n";
}
- OS << "\n";
if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
OS << "Loop ";
@@ -13825,7 +13859,7 @@ void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
if (ScopeIt != ValuesAtScopes.end()) {
for (const auto &Pair : ScopeIt->second)
if (!isa_and_nonnull<SCEVConstant>(Pair.second))
- erase_value(ValuesAtScopesUsers[Pair.second],
+ llvm::erase(ValuesAtScopesUsers[Pair.second],
std::make_pair(Pair.first, S));
ValuesAtScopes.erase(ScopeIt);
}
@@ -13833,7 +13867,7 @@ void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
auto ScopeUserIt = ValuesAtScopesUsers.find(S);
if (ScopeUserIt != ValuesAtScopesUsers.end()) {
for (const auto &Pair : ScopeUserIt->second)
- erase_value(ValuesAtScopes[Pair.second], std::make_pair(Pair.first, S));
+ llvm::erase(ValuesAtScopes[Pair.second], std::make_pair(Pair.first, S));
ValuesAtScopesUsers.erase(ScopeUserIt);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 08f7a91ff9b2..af8232b03f1e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -30,7 +30,7 @@ static bool canComputePointerDiff(ScalarEvolution &SE,
SE.getEffectiveSCEVType(B->getType()))
return false;
- return SE.instructionCouldExistWitthOperands(A, B);
+ return SE.instructionCouldExistWithOperands(A, B);
}
AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
@@ -55,10 +55,10 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
if (canComputePointerDiff(SE, AS, BS)) {
unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
APInt ASizeInt(BitWidth, LocA.Size.hasValue()
- ? LocA.Size.getValue()
+ ? static_cast<uint64_t>(LocA.Size.getValue())
: MemoryLocation::UnknownSize);
APInt BSizeInt(BitWidth, LocB.Size.hasValue()
- ? LocB.Size.getValue()
+ ? static_cast<uint64_t>(LocB.Size.getValue())
: MemoryLocation::UnknownSize);
// Compute the difference between the two pointers.
@@ -105,8 +105,7 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
AAQI, nullptr) == AliasResult::NoAlias)
return AliasResult::NoAlias;
- // Forward the query to the next analysis.
- return AAResultBase::alias(LocA, LocB, AAQI, nullptr);
+ return AliasResult::MayAlias;
}
/// Given an expression, try to find a base value.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index f16ac0ab0416..3815bdf49d59 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -56,7 +56,7 @@ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
AAQueryInfo &AAQI,
const Instruction *) {
if (!EnableScopedNoAlias)
- return AAResultBase::alias(LocA, LocB, AAQI, nullptr);
+ return AliasResult::MayAlias;
// Get the attached MDNodes.
const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope;
@@ -69,15 +69,14 @@ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
if (!mayAliasInScopes(BScopes, ANoAlias))
return AliasResult::NoAlias;
- // If they may alias, chain to the next AliasAnalysis.
- return AAResultBase::alias(LocA, LocB, AAQI, nullptr);
+ return AliasResult::MayAlias;
}
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc,
AAQueryInfo &AAQI) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(Call, Loc, AAQI);
+ return ModRefInfo::ModRef;
if (!mayAliasInScopes(Loc.AATags.Scope,
Call->getMetadata(LLVMContext::MD_noalias)))
@@ -87,14 +86,14 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
Loc.AATags.NoAlias))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call, Loc, AAQI);
+ return ModRefInfo::ModRef;
}
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
const CallBase *Call2,
AAQueryInfo &AAQI) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
+ return ModRefInfo::ModRef;
if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope),
Call2->getMetadata(LLVMContext::MD_noalias)))
@@ -104,7 +103,7 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
Call1->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
+ return ModRefInfo::ModRef;
}
static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 606397727b01..da21e3f28e78 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -272,7 +272,7 @@ ConstantRange StackSafetyLocalAnalysis::offsetFrom(Value *Addr, Value *Base) {
if (!SE.isSCEVable(Addr->getType()) || !SE.isSCEVable(Base->getType()))
return UnknownRange;
- auto *PtrTy = IntegerType::getInt8PtrTy(SE.getContext());
+ auto *PtrTy = PointerType::getUnqual(SE.getContext());
const SCEV *AddrExp = SE.getTruncateOrZeroExtend(SE.getSCEV(Addr), PtrTy);
const SCEV *BaseExp = SE.getTruncateOrZeroExtend(SE.getSCEV(Base), PtrTy);
const SCEV *Diff = SE.getMinusSCEV(AddrExp, BaseExp);
@@ -356,14 +356,14 @@ bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI,
const SCEV *AccessSize) {
if (!AI)
- return true;
+ return true; // This only judges whether it is a safe *stack* access.
if (isa<SCEVCouldNotCompute>(AccessSize))
return false;
const auto *I = cast<Instruction>(U.getUser());
auto ToCharPtr = [&](const SCEV *V) {
- auto *PtrTy = IntegerType::getInt8PtrTy(SE.getContext());
+ auto *PtrTy = PointerType::getUnqual(SE.getContext());
return SE.getTruncateOrZeroExtend(V, PtrTy);
};
@@ -408,6 +408,23 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
assert(V == UI.get());
+ auto RecordStore = [&](const Value* StoredVal) {
+ if (V == StoredVal) {
+ // Stored the pointer - conservatively assume it may be unsafe.
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
+ return;
+ }
+ if (AI && !SL.isAliveAfter(AI, I)) {
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
+ return;
+ }
+ auto TypeSize = DL.getTypeStoreSize(StoredVal->getType());
+ auto AccessRange = getAccessRange(UI, Ptr, TypeSize);
+ bool Safe = isSafeAccess(UI, AI, TypeSize);
+ US.addRange(I, AccessRange, Safe);
+ return;
+ };
+
switch (I->getOpcode()) {
case Instruction::Load: {
if (AI && !SL.isAliveAfter(AI, I)) {
@@ -424,22 +441,15 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
case Instruction::VAArg:
// "va-arg" from a pointer is safe.
break;
- case Instruction::Store: {
- if (V == I->getOperand(0)) {
- // Stored the pointer - conservatively assume it may be unsafe.
- US.addRange(I, UnknownRange, /*IsSafe=*/false);
- break;
- }
- if (AI && !SL.isAliveAfter(AI, I)) {
- US.addRange(I, UnknownRange, /*IsSafe=*/false);
- break;
- }
- auto TypeSize = DL.getTypeStoreSize(I->getOperand(0)->getType());
- auto AccessRange = getAccessRange(UI, Ptr, TypeSize);
- bool Safe = isSafeAccess(UI, AI, TypeSize);
- US.addRange(I, AccessRange, Safe);
+ case Instruction::Store:
+ RecordStore(cast<StoreInst>(I)->getValueOperand());
+ break;
+ case Instruction::AtomicCmpXchg:
+ RecordStore(cast<AtomicCmpXchgInst>(I)->getNewValOperand());
+ break;
+ case Instruction::AtomicRMW:
+ RecordStore(cast<AtomicRMWInst>(I)->getValOperand());
break;
- }
case Instruction::Ret:
// Information leak.
@@ -986,6 +996,7 @@ void StackSafetyGlobalInfo::print(raw_ostream &O) const {
for (const auto &I : instructions(F)) {
const CallInst *Call = dyn_cast<CallInst>(&I);
if ((isa<StoreInst>(I) || isa<LoadInst>(I) || isa<MemIntrinsic>(I) ||
+ isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I) ||
(Call && Call->hasByValArgument())) &&
stackAccessIsSafe(I)) {
O << " " << I << "\n";
diff --git a/contrib/llvm-project/llvm/lib/Analysis/StructuralHash.cpp b/contrib/llvm-project/llvm/lib/Analysis/StructuralHash.cpp
new file mode 100644
index 000000000000..244ed5d55f3f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/StructuralHash.cpp
@@ -0,0 +1,33 @@
+//===- StructuralHash.cpp - Function Hash Printing ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the StructuralHashPrinterPass which is used to show
+// the structural hash of all functions in a module and the module itself.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/StructuralHash.h"
+#include "llvm/IR/StructuralHash.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+PreservedAnalyses StructuralHashPrinterPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ OS << "Module Hash: "
+ << Twine::utohexstr(StructuralHash(M, EnableDetailedStructuralHash))
+ << "\n";
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ OS << "Function " << F.getName() << " Hash: "
+ << Twine::utohexstr(StructuralHash(F, EnableDetailedStructuralHash))
+ << "\n";
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TFLiteUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/TFLiteUtils.cpp
index b2862033e9cf..2762e22f28ce 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TFLiteUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TFLiteUtils.cpp
@@ -1,4 +1,4 @@
-//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
+//===- TFUtils.cpp - TFLite-based evaluation utilities --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements utilities for interfacing with tensorflow C APIs.
+// This file implements utilities for interfacing with TFLite.
//
//===----------------------------------------------------------------------===//
#include "llvm/Config/config.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 05fa67d0bbf1..20959cf6948f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -11,6 +11,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
@@ -43,6 +45,14 @@ StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
#include "llvm/Analysis/TargetLibraryInfo.def"
};
+std::string VecDesc::getVectorFunctionABIVariantString() const {
+ assert(!VectorFnName.empty() && "Vector function name must not be empty.");
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << VABIPrefix << "_" << ScalarFnName << "(" << VectorFnName << ")";
+ return std::string(Out.str());
+}
+
// Recognized types of library function arguments and return types.
enum FuncArgTypeID : char {
Void = 0, // Must be zero.
@@ -203,6 +213,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setAvailable(LibFunc_getchar_unlocked);
TLI.setAvailable(LibFunc_putc_unlocked);
TLI.setAvailable(LibFunc_putchar_unlocked);
+ TLI.setUnavailable(LibFunc_memrchr);
if (T.isMacOSXVersionLT(10, 5)) {
TLI.setUnavailable(LibFunc_memset_pattern4);
@@ -939,16 +950,26 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
return GlobalValue::dropLLVMManglingEscape(funcName);
}
+static DenseMap<StringRef, LibFunc>
+buildIndexMap(ArrayRef<StringLiteral> StandardNames) {
+ DenseMap<StringRef, LibFunc> Indices;
+ unsigned Idx = 0;
+ Indices.reserve(LibFunc::NumLibFuncs);
+ for (const auto &Func : StandardNames)
+ Indices[Func] = static_cast<LibFunc>(Idx++);
+ return Indices;
+}
+
bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc &F) const {
funcName = sanitizeFunctionName(funcName);
if (funcName.empty())
return false;
- const auto *Start = std::begin(StandardNames);
- const auto *End = std::end(StandardNames);
- const auto *I = std::lower_bound(Start, End, funcName);
- if (I != End && *I == funcName) {
- F = (LibFunc)(I - Start);
+ static const DenseMap<StringRef, LibFunc> Indices =
+ buildIndexMap(StandardNames);
+
+ if (auto Loc = Indices.find(funcName); Loc != Indices.end()) {
+ F = Loc->second;
return true;
}
return false;
@@ -1117,8 +1138,15 @@ bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl,
const Module *M = FDecl.getParent();
assert(M && "Expecting FDecl to be connected to a Module.");
- return getLibFunc(FDecl.getName(), F) &&
- isValidProtoForLibFunc(*FDecl.getFunctionType(), F, *M);
+ if (FDecl.LibFuncCache == Function::UnknownLibFunc)
+ if (!getLibFunc(FDecl.getName(), FDecl.LibFuncCache))
+ FDecl.LibFuncCache = NotLibFunc;
+
+ if (FDecl.LibFuncCache == NotLibFunc)
+ return false;
+
+ F = FDecl.LibFuncCache;
+ return isValidProtoForLibFunc(*FDecl.getFunctionType(), F, *M);
}
void TargetLibraryInfoImpl::disableAllFunctions() {
@@ -1126,15 +1154,15 @@ void TargetLibraryInfoImpl::disableAllFunctions() {
}
static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
- return LHS.ScalarFnName < RHS.ScalarFnName;
+ return LHS.getScalarFnName() < RHS.getScalarFnName();
}
static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) {
- return LHS.VectorFnName < RHS.VectorFnName;
+ return LHS.getVectorFnName() < RHS.getVectorFnName();
}
static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) {
- return LHS.ScalarFnName < S;
+ return LHS.getScalarFnName() < S;
}
void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
@@ -1191,17 +1219,20 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
case SLEEFGNUABI: {
const VecDesc VecFuncs_VF2[] = {
#define TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS
-#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, /* MASK = */ false},
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, VABI_PREFIX) \
+ {SCAL, VEC, VF, /* MASK = */ false, VABI_PREFIX},
#include "llvm/Analysis/VecFuncs.def"
};
const VecDesc VecFuncs_VF4[] = {
#define TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS
-#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, /* MASK = */ false},
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, VABI_PREFIX) \
+ {SCAL, VEC, VF, /* MASK = */ false, VABI_PREFIX},
#include "llvm/Analysis/VecFuncs.def"
};
const VecDesc VecFuncs_VFScalable[] = {
#define TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS
-#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK) {SCAL, VEC, VF, MASK},
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \
+ {SCAL, VEC, VF, MASK, VABI_PREFIX},
#include "llvm/Analysis/VecFuncs.def"
};
@@ -1220,7 +1251,8 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
case ArmPL: {
const VecDesc VecFuncs[] = {
#define TLI_DEFINE_ARMPL_VECFUNCS
-#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK) {SCAL, VEC, VF, MASK},
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \
+ {SCAL, VEC, VF, MASK, VABI_PREFIX},
#include "llvm/Analysis/VecFuncs.def"
};
@@ -1246,23 +1278,32 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
std::vector<VecDesc>::const_iterator I =
llvm::lower_bound(VectorDescs, funcName, compareWithScalarFnName);
- return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
+ return I != VectorDescs.end() && StringRef(I->getScalarFnName()) == funcName;
}
StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
const ElementCount &VF,
bool Masked) const {
+ const VecDesc *VD = getVectorMappingInfo(F, VF, Masked);
+ if (VD)
+ return VD->getVectorFnName();
+ return StringRef();
+}
+
+const VecDesc *
+TargetLibraryInfoImpl::getVectorMappingInfo(StringRef F, const ElementCount &VF,
+ bool Masked) const {
F = sanitizeFunctionName(F);
if (F.empty())
- return F;
+ return nullptr;
std::vector<VecDesc>::const_iterator I =
llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
- while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
- if ((I->VectorizationFactor == VF) && (I->Masked == Masked))
- return I->VectorFnName;
+ while (I != VectorDescs.end() && StringRef(I->getScalarFnName()) == F) {
+ if ((I->getVectorizationFactor() == VF) && (I->isMasked() == Masked))
+ return &(*I);
++I;
}
- return StringRef();
+ return nullptr;
}
TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F,
@@ -1334,11 +1375,11 @@ void TargetLibraryInfoImpl::getWidestVF(StringRef ScalarF,
std::vector<VecDesc>::const_iterator I =
llvm::lower_bound(VectorDescs, ScalarF, compareWithScalarFnName);
- while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == ScalarF) {
+ while (I != VectorDescs.end() && StringRef(I->getScalarFnName()) == ScalarF) {
ElementCount *VF =
- I->VectorizationFactor.isScalable() ? &ScalableVF : &FixedVF;
- if (ElementCount::isKnownGT(I->VectorizationFactor, *VF))
- *VF = I->VectorizationFactor;
+ I->getVectorizationFactor().isScalable() ? &ScalableVF : &FixedVF;
+ if (ElementCount::isKnownGT(I->getVectorizationFactor(), *VF))
+ *VF = I->getVectorizationFactor();
++I;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
index c751d174a48a..3f76dfdaac31 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -213,6 +213,17 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
}
unsigned
+TargetTransformInfo::getInliningCostBenefitAnalysisSavingsMultiplier() const {
+ return TTIImpl->getInliningCostBenefitAnalysisSavingsMultiplier();
+}
+
+unsigned
+TargetTransformInfo::getInliningCostBenefitAnalysisProfitableMultiplier()
+ const {
+ return TTIImpl->getInliningCostBenefitAnalysisProfitableMultiplier();
+}
+
+unsigned
TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
return TTIImpl->adjustInliningThreshold(CB);
}
@@ -402,6 +413,10 @@ bool TargetTransformInfo::isNumRegsMajorCostOfLSR() const {
return TTIImpl->isNumRegsMajorCostOfLSR();
}
+bool TargetTransformInfo::shouldFoldTerminatingConditionAfterLSR() const {
+ return TTIImpl->shouldFoldTerminatingConditionAfterLSR();
+}
+
bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const {
return TTIImpl->isProfitableLSRChainElement(I);
}
@@ -667,6 +682,11 @@ TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return Cost;
}
+bool TargetTransformInfo::preferToKeepConstantsAttached(
+ const Instruction &Inst, const Function &Fn) const {
+ return TTIImpl->preferToKeepConstantsAttached(Inst, Fn);
+}
+
unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
return TTIImpl->getNumberOfRegisters(ClassID);
}
@@ -1122,6 +1142,13 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
return TTIImpl->areInlineCompatible(Caller, Callee);
}
+unsigned
+TargetTransformInfo::getInlineCallPenalty(const Function *F,
+ const CallBase &Call,
+ unsigned DefaultCallPenalty) const {
+ return TTIImpl->getInlineCallPenalty(F, Call, DefaultCallPenalty);
+}
+
bool TargetTransformInfo::areTypesABICompatible(
const Function *Caller, const Function *Callee,
const ArrayRef<Type *> &Types) const {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TrainingLogger.cpp b/contrib/llvm-project/llvm/lib/Analysis/TrainingLogger.cpp
index e236890aa2bc..344ca92e18b5 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TrainingLogger.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TrainingLogger.cpp
@@ -27,11 +27,6 @@
using namespace llvm;
-// FIXME(mtrofin): remove the flag altogether
-static cl::opt<bool>
- UseSimpleLogger("tfutils-use-simplelogger", cl::init(true), cl::Hidden,
- cl::desc("Output simple (non-protobuf) log."));
-
void Logger::writeHeader(std::optional<TensorSpec> AdviceSpec) {
json::OStream JOS(*OS);
JOS.object([&]() {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 529f3a76d23e..e4dc1a867f6f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -375,11 +375,10 @@ AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB,
AAQueryInfo &AAQI, const Instruction *) {
if (!EnableTBAA)
- return AAResultBase::alias(LocA, LocB, AAQI, nullptr);
+ return AliasResult::MayAlias;
- // If accesses may alias, chain to the next AliasAnalysis.
if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA))
- return AAResultBase::alias(LocA, LocB, AAQI, nullptr);
+ return AliasResult::MayAlias;
// Otherwise return a definitive result.
return AliasResult::NoAlias;
@@ -389,11 +388,11 @@ ModRefInfo TypeBasedAAResult::getModRefInfoMask(const MemoryLocation &Loc,
AAQueryInfo &AAQI,
bool IgnoreLocals) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+ return ModRefInfo::ModRef;
const MDNode *M = Loc.AATags.TBAA;
if (!M)
- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+ return ModRefInfo::ModRef;
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
@@ -401,13 +400,13 @@ ModRefInfo TypeBasedAAResult::getModRefInfoMask(const MemoryLocation &Loc,
(isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+ return ModRefInfo::ModRef;
}
MemoryEffects TypeBasedAAResult::getMemoryEffects(const CallBase *Call,
AAQueryInfo &AAQI) {
if (!EnableTBAA)
- return AAResultBase::getMemoryEffects(Call, AAQI);
+ return MemoryEffects::unknown();
// If this is an "immutable" type, the access is not observable.
if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
@@ -415,40 +414,40 @@ MemoryEffects TypeBasedAAResult::getMemoryEffects(const CallBase *Call,
(isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
return MemoryEffects::none();
- return AAResultBase::getMemoryEffects(Call, AAQI);
+ return MemoryEffects::unknown();
}
MemoryEffects TypeBasedAAResult::getMemoryEffects(const Function *F) {
- // Functions don't have metadata. Just chain to the next implementation.
- return AAResultBase::getMemoryEffects(F);
+ // Functions don't have metadata.
+ return MemoryEffects::unknown();
}
ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc,
AAQueryInfo &AAQI) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(Call, Loc, AAQI);
+ return ModRefInfo::ModRef;
if (const MDNode *L = Loc.AATags.TBAA)
if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call, Loc, AAQI);
+ return ModRefInfo::ModRef;
}
ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1,
const CallBase *Call2,
AAQueryInfo &AAQI) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
+ return ModRefInfo::ModRef;
if (const MDNode *M1 = Call1->getMetadata(LLVMContext::MD_tbaa))
if (const MDNode *M2 = Call2->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
+ return ModRefInfo::ModRef;
}
bool MDNode::isTBAAVtableAccess() const {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/UniformityAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/UniformityAnalysis.cpp
index bf0b194dcd70..2d617db431c5 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -118,7 +118,7 @@ llvm::UniformityInfo UniformityInfoAnalysis::run(Function &F,
auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
auto &CI = FAM.getResult<CycleAnalysis>(F);
- UniformityInfo UI{F, DT, CI, &TTI};
+ UniformityInfo UI{DT, CI, &TTI};
// Skip computation if we can assume everything is uniform.
if (TTI.hasBranchDivergence(&F))
UI.compute();
@@ -171,8 +171,7 @@ bool UniformityInfoWrapperPass::runOnFunction(Function &F) {
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
m_function = &F;
- m_uniformityInfo =
- UniformityInfo{F, domTree, cycleInfo, &targetTransformInfo};
+ m_uniformityInfo = UniformityInfo{domTree, cycleInfo, &targetTransformInfo};
// Skip computation if we can assume everything is uniform.
if (targetTransformInfo.hasBranchDivergence(m_function))
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
index 1e2d1db4e44b..ad918ef7245b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
@@ -7,9 +7,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <limits>
using namespace llvm;
+#define DEBUG_TYPE "vfabi-demangling"
+
namespace {
/// Utilities for the Vector Function ABI name parser.
@@ -21,12 +26,13 @@ enum class ParseRet {
};
/// Extracts the `<isa>` information from the mangled string, and
-/// sets the `ISA` accordingly.
-ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
+/// sets the `ISA` accordingly. If successful, the <isa> token is removed
+/// from the input string `MangledName`.
+static ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
if (MangledName.empty())
return ParseRet::Error;
- if (MangledName.startswith(VFABI::_LLVM_)) {
+ if (MangledName.starts_with(VFABI::_LLVM_)) {
MangledName = MangledName.drop_front(strlen(VFABI::_LLVM_));
ISA = VFISAKind::LLVM;
} else {
@@ -45,9 +51,9 @@ ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
}
/// Extracts the `<mask>` information from the mangled string, and
-/// sets `IsMasked` accordingly. The input string `MangledName` is
-/// left unmodified.
-ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) {
+/// sets `IsMasked` accordingly. If successful, the <mask> token is removed
+/// from the input string `MangledName`.
+static ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) {
if (MangledName.consume_front("M")) {
IsMasked = true;
return ParseRet::OK;
@@ -62,20 +68,28 @@ ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) {
}
/// Extract the `<vlen>` information from the mangled string, and
-/// sets `VF` accordingly. A `<vlen> == "x"` token is interpreted as a scalable
-/// vector length. On success, the `<vlen>` token is removed from
-/// the input string `ParseString`.
-///
-ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) {
+/// sets `ParsedVF` accordingly. A `<vlen> == "x"` token is interpreted as a
+/// scalable vector length and the boolean is set to true, otherwise a nonzero
+/// unsigned integer will be directly used as a VF. On success, the `<vlen>`
+/// token is removed from the input string `ParseString`.
+static ParseRet tryParseVLEN(StringRef &ParseString, VFISAKind ISA,
+ std::pair<unsigned, bool> &ParsedVF) {
if (ParseString.consume_front("x")) {
- // Set VF to 0, to be later adjusted to a value grater than zero
- // by looking at the signature of the vector function with
- // `getECFromSignature`.
- VF = 0;
- IsScalable = true;
+ // SVE is the only scalable ISA currently supported.
+ if (ISA != VFISAKind::SVE) {
+ LLVM_DEBUG(dbgs() << "Vector function variant declared with scalable VF "
+ << "but ISA is not SVE\n");
+ return ParseRet::Error;
+ }
+ // We can't determine the VF of a scalable vector by looking at the vlen
+ // string (just 'x'), so say we successfully parsed it but return a 'true'
+ // for the scalable field with an invalid VF field so that we know to look
+ // up the actual VF based on element types from the parameters or return.
+ ParsedVF = {0, true};
return ParseRet::OK;
}
+ unsigned VF = 0;
if (ParseString.consumeInteger(10, VF))
return ParseRet::Error;
@@ -83,7 +97,7 @@ ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) {
if (VF == 0)
return ParseRet::Error;
- IsScalable = false;
+ ParsedVF = {VF, false};
return ParseRet::OK;
}
@@ -99,9 +113,9 @@ ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) {
///
/// The function expects <token> to be one of "ls", "Rs", "Us" or
/// "Ls".
-ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString,
- VFParamKind &PKind, int &Pos,
- const StringRef Token) {
+static ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString,
+ VFParamKind &PKind, int &Pos,
+ const StringRef Token) {
if (ParseString.consume_front(Token)) {
PKind = VFABI::getVFParamKindFromString(Token);
if (ParseString.consumeInteger(10, Pos))
@@ -123,8 +137,9 @@ ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString,
/// sets `PKind` to the correspondent enum value, sets `StepOrPos` to
/// <number>, and return success. On a syntax error, it return a
/// parsing error. If nothing is parsed, it returns std::nullopt.
-ParseRet tryParseLinearWithRuntimeStep(StringRef &ParseString,
- VFParamKind &PKind, int &StepOrPos) {
+static ParseRet tryParseLinearWithRuntimeStep(StringRef &ParseString,
+ VFParamKind &PKind,
+ int &StepOrPos) {
ParseRet Ret;
// "ls" <RuntimeStepPos>
@@ -162,9 +177,10 @@ ParseRet tryParseLinearWithRuntimeStep(StringRef &ParseString,
///
/// The function expects <token> to be one of "l", "R", "U" or
/// "L".
-ParseRet tryParseCompileTimeLinearToken(StringRef &ParseString,
- VFParamKind &PKind, int &LinearStep,
- const StringRef Token) {
+static ParseRet tryParseCompileTimeLinearToken(StringRef &ParseString,
+ VFParamKind &PKind,
+ int &LinearStep,
+ const StringRef Token) {
if (ParseString.consume_front(Token)) {
PKind = VFABI::getVFParamKindFromString(Token);
const bool Negate = ParseString.consume_front("n");
@@ -187,8 +203,9 @@ ParseRet tryParseCompileTimeLinearToken(StringRef &ParseString,
/// sets `PKind` to the correspondent enum value, sets `LinearStep` to
/// <number>, and return success. On a syntax error, it return a
/// parsing error. If nothing is parsed, it returns std::nullopt.
-ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString,
- VFParamKind &PKind, int &StepOrPos) {
+static ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString,
+ VFParamKind &PKind,
+ int &StepOrPos) {
// "l" {"n"} <CompileTimeStep>
if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "l") ==
ParseRet::OK)
@@ -220,8 +237,8 @@ ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString,
/// sets `PKind` to the correspondent enum value, sets `StepOrPos`
/// accordingly, and return success. On a syntax error, it return a
/// parsing error. If nothing is parsed, it returns std::nullopt.
-ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind,
- int &StepOrPos) {
+static ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind,
+ int &StepOrPos) {
if (ParseString.consume_front("v")) {
PKind = VFParamKind::Vector;
StepOrPos = 0;
@@ -255,7 +272,7 @@ ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind,
/// sets `PKind` to the correspondent enum value, sets `StepOrPos`
/// accordingly, and return success. On a syntax error, it return a
/// parsing error. If nothing is parsed, it returns std::nullopt.
-ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
+static ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
uint64_t Val;
// "a" <number>
if (ParseString.consume_front("a")) {
@@ -273,49 +290,86 @@ ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
return ParseRet::None;
}
-#ifndef NDEBUG
-// Verify the assumtion that all vectors in the signature of a vector
-// function have the same number of elements.
-bool verifyAllVectorsHaveSameWidth(FunctionType *Signature) {
- SmallVector<VectorType *, 2> VecTys;
- if (auto *RetTy = dyn_cast<VectorType>(Signature->getReturnType()))
- VecTys.push_back(RetTy);
- for (auto *Ty : Signature->params())
- if (auto *VTy = dyn_cast<VectorType>(Ty))
- VecTys.push_back(VTy);
-
- if (VecTys.size() <= 1)
- return true;
-
- assert(VecTys.size() > 1 && "Invalid number of elements.");
- const ElementCount EC = VecTys[0]->getElementCount();
- return llvm::all_of(llvm::drop_begin(VecTys), [&EC](VectorType *VTy) {
- return (EC == VTy->getElementCount());
- });
+// Returns the 'natural' VF for a given scalar element type, based on the
+// current architecture.
+//
+// For SVE (currently the only scalable architecture with a defined name
+// mangling), we assume a minimum vector size of 128b and return a VF based on
+// the number of elements of the given type which would fit in such a vector.
+static std::optional<ElementCount> getElementCountForTy(const VFISAKind ISA,
+ const Type *Ty) {
+ // Only AArch64 SVE is supported at present.
+ assert(ISA == VFISAKind::SVE &&
+ "Scalable VF decoding only implemented for SVE\n");
+
+ if (Ty->isIntegerTy(64) || Ty->isDoubleTy() || Ty->isPointerTy())
+ return ElementCount::getScalable(2);
+ if (Ty->isIntegerTy(32) || Ty->isFloatTy())
+ return ElementCount::getScalable(4);
+ if (Ty->isIntegerTy(16) || Ty->is16bitFPTy())
+ return ElementCount::getScalable(8);
+ if (Ty->isIntegerTy(8))
+ return ElementCount::getScalable(16);
+
+ return std::nullopt;
}
-#endif // NDEBUG
-
-// Extract the VectorizationFactor from a given function signature,
-// under the assumtion that all vectors have the same number of
-// elements, i.e. same ElementCount.Min.
-ElementCount getECFromSignature(FunctionType *Signature) {
- assert(verifyAllVectorsHaveSameWidth(Signature) &&
- "Invalid vector signature.");
-
- if (auto *RetTy = dyn_cast<VectorType>(Signature->getReturnType()))
- return RetTy->getElementCount();
- for (auto *Ty : Signature->params())
- if (auto *VTy = dyn_cast<VectorType>(Ty))
- return VTy->getElementCount();
-
- return ElementCount::getFixed(/*Min=*/1);
+
+// Extract the VectorizationFactor from a given function signature, based
+// on the widest scalar element types that will become vector parameters.
+static std::optional<ElementCount>
+getScalableECFromSignature(const FunctionType *Signature, const VFISAKind ISA,
+ const SmallVectorImpl<VFParameter> &Params) {
+ // Start with a very wide EC and drop when we find smaller ECs based on type.
+ ElementCount MinEC =
+ ElementCount::getScalable(std::numeric_limits<unsigned int>::max());
+ for (auto &Param : Params) {
+ // Only vector parameters are used when determining the VF; uniform or
+ // linear are left as scalars, so do not affect VF.
+ if (Param.ParamKind == VFParamKind::Vector) {
+ // If the scalar function doesn't actually have a corresponding argument,
+ // reject the mapping.
+ if (Param.ParamPos >= Signature->getNumParams())
+ return std::nullopt;
+ Type *PTy = Signature->getParamType(Param.ParamPos);
+
+ std::optional<ElementCount> EC = getElementCountForTy(ISA, PTy);
+ // If we have an unknown scalar element type we can't find a reasonable
+ // VF.
+ if (!EC)
+ return std::nullopt;
+
+ // Find the smallest VF, based on the widest scalar type.
+ if (ElementCount::isKnownLT(*EC, MinEC))
+ MinEC = *EC;
+ }
+ }
+
+ // Also check the return type if not void.
+ Type *RetTy = Signature->getReturnType();
+ if (!RetTy->isVoidTy()) {
+ std::optional<ElementCount> ReturnEC = getElementCountForTy(ISA, RetTy);
+ // If we have an unknown scalar element type we can't find a reasonable VF.
+ if (!ReturnEC)
+ return std::nullopt;
+ if (ElementCount::isKnownLT(*ReturnEC, MinEC))
+ MinEC = *ReturnEC;
+ }
+
+ // The SVE Vector function call ABI bases the VF on the widest element types
+ // present, and vector arguments containing types of that width are always
+ // considered to be packed. Arguments with narrower elements are considered
+ // to be unpacked.
+ if (MinEC.getKnownMinValue() < std::numeric_limits<unsigned int>::max())
+ return MinEC;
+
+ return std::nullopt;
}
} // namespace
// Format of the ABI name:
// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
std::optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName,
- const Module &M) {
+ const FunctionType *FTy) {
const StringRef OriginalName = MangledName;
// Assume there is no custom name <redirection>, and therefore the
// vector name consists of
@@ -338,9 +392,8 @@ std::optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName,
return std::nullopt;
// Parse the variable size, starting from <vlen>.
- unsigned VF;
- bool IsScalable;
- if (tryParseVLEN(MangledName, VF, IsScalable) != ParseRet::OK)
+ std::pair<unsigned, bool> ParsedVF;
+ if (tryParseVLEN(MangledName, ISA, ParsedVF) != ParseRet::OK)
return std::nullopt;
// Parse the <parameters>.
@@ -374,6 +427,19 @@ std::optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName,
if (Parameters.empty())
return std::nullopt;
+ // Figure out the number of lanes in vectors for this function variant. This
+ // is easy for fixed length, as the vlen encoding just gives us the value
+ // directly. However, if the vlen mangling indicated that this function
+ // variant expects scalable vectors we need to work it out based on the
+ // demangled parameter types and the scalar function signature.
+ std::optional<ElementCount> EC;
+ if (ParsedVF.second) {
+ EC = getScalableECFromSignature(FTy, ISA, Parameters);
+ if (!EC)
+ return std::nullopt;
+ } else
+ EC = ElementCount::getFixed(ParsedVF.first);
+
// Check for the <scalarname> and the optional <redirection>, which
// are separated from the prefix with "_"
if (!MangledName.consume_front("_"))
@@ -426,32 +492,7 @@ std::optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName,
assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate &&
"The global predicate must be the last parameter");
- // Adjust the VF for scalable signatures. The EC.Min is not encoded
- // in the name of the function, but it is encoded in the IR
- // signature of the function. We need to extract this information
- // because it is needed by the loop vectorizer, which reasons in
- // terms of VectorizationFactor or ElementCount. In particular, we
- // need to make sure that the VF field of the VFShape class is never
- // set to 0.
- if (IsScalable) {
- const Function *F = M.getFunction(VectorName);
- // The declaration of the function must be present in the module
- // to be able to retrieve its signature.
- if (!F)
- return std::nullopt;
- const ElementCount EC = getECFromSignature(F->getFunctionType());
- VF = EC.getKnownMinValue();
- }
-
- // 1. We don't accept a zero lanes vectorization factor.
- // 2. We don't accept the demangling if the vector function is not
- // present in the module.
- if (VF == 0)
- return std::nullopt;
- if (!M.getFunction(VectorName))
- return std::nullopt;
-
- const VFShape Shape({ElementCount::get(VF, IsScalable), Parameters});
+ const VFShape Shape({*EC, Parameters});
return VFInfo({Shape, std::string(ScalarName), std::string(VectorName), ISA});
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
index 410f93b1c215..9ae05a4b5ccc 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomConditionCache.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -33,6 +34,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/Analysis/WithCache.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -148,117 +150,105 @@ static void computeKnownBits(const Value *V, const APInt &DemandedElts,
KnownBits &Known, unsigned Depth,
const SimplifyQuery &Q);
-static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
- const SimplifyQuery &Q) {
+void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
+ const SimplifyQuery &Q) {
// Since the number of lanes in a scalable vector is unknown at compile time,
// we track one bit which is implicitly broadcast to all lanes. This means
// that all lanes in a scalable vector are considered demanded.
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
- computeKnownBits(V, DemandedElts, Known, Depth, Q);
+ ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
}
void llvm::computeKnownBits(const Value *V, KnownBits &Known,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- ::computeKnownBits(V, Known, Depth,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(V, CxtI), UseInstrInfo));
-}
-
-void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
- KnownBits &Known, const DataLayout &DL,
- unsigned Depth, AssumptionCache *AC,
- const Instruction *CxtI, const DominatorTree *DT,
- bool UseInstrInfo) {
- ::computeKnownBits(V, DemandedElts, Known, Depth,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(V, CxtI), UseInstrInfo));
+ computeKnownBits(
+ V, Known, Depth,
+ SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
}
-static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
- unsigned Depth, const SimplifyQuery &Q);
-
-static KnownBits computeKnownBits(const Value *V, unsigned Depth,
- const SimplifyQuery &Q);
-
KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- return ::computeKnownBits(V, Depth,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(V, CxtI), UseInstrInfo));
+ return computeKnownBits(
+ V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
}
KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- return ::computeKnownBits(V, DemandedElts, Depth,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(V, CxtI), UseInstrInfo));
+ return computeKnownBits(
+ V, DemandedElts, Depth,
+ SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
}
-bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
- const DataLayout &DL, AssumptionCache *AC,
- const Instruction *CxtI, const DominatorTree *DT,
- bool UseInstrInfo) {
- assert(LHS->getType() == RHS->getType() &&
- "LHS and RHS should have the same type");
- assert(LHS->getType()->isIntOrIntVectorTy() &&
- "LHS and RHS should be integers");
+static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
+ const SimplifyQuery &SQ) {
// Look for an inverted mask: (X & ~M) op (Y & M).
{
Value *M;
if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
- match(RHS, m_c_And(m_Specific(M), m_Value())))
- return true;
- if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
- match(LHS, m_c_And(m_Specific(M), m_Value())))
+ match(RHS, m_c_And(m_Specific(M), m_Value())) &&
+ isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT))
return true;
}
// X op (Y & ~X)
- if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) ||
- match(LHS, m_c_And(m_Not(m_Specific(RHS)), m_Value())))
+ if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) &&
+ isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
return true;
// X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
// for constant Y.
Value *Y;
if (match(RHS,
- m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) ||
- match(LHS, m_c_Xor(m_c_And(m_Specific(RHS), m_Value(Y)), m_Deferred(Y))))
+ m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) &&
+ isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) &&
+ isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
return true;
// Peek through extends to find a 'not' of the other side:
// (ext Y) op ext(~Y)
- // (ext ~Y) op ext(Y)
- if ((match(LHS, m_ZExtOrSExt(m_Value(Y))) &&
- match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y))))) ||
- (match(RHS, m_ZExtOrSExt(m_Value(Y))) &&
- match(LHS, m_ZExtOrSExt(m_Not(m_Specific(Y))))))
+ if (match(LHS, m_ZExtOrSExt(m_Value(Y))) &&
+ match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))) &&
+ isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
return true;
// Look for: (A & B) op ~(A | B)
{
Value *A, *B;
if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
- match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
- return true;
- if (match(RHS, m_And(m_Value(A), m_Value(B))) &&
- match(LHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))) &&
+ isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) &&
+ isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT))
return true;
}
- IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
- KnownBits LHSKnown(IT->getBitWidth());
- KnownBits RHSKnown(IT->getBitWidth());
- computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, UseInstrInfo);
- computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, UseInstrInfo);
- return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown);
+
+ return false;
+}
+
+bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache,
+ const WithCache<const Value *> &RHSCache,
+ const SimplifyQuery &SQ) {
+ const Value *LHS = LHSCache.getValue();
+ const Value *RHS = RHSCache.getValue();
+
+ assert(LHS->getType() == RHS->getType() &&
+ "LHS and RHS should have the same type");
+ assert(LHS->getType()->isIntOrIntVectorTy() &&
+ "LHS and RHS should be integers");
+
+ if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) ||
+ haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ))
+ return true;
+
+ return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ),
+ RHSCache.getKnownBits(SQ));
}
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
@@ -275,10 +265,9 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
bool OrZero, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(V, CxtI),
- UseInstrInfo));
+ return ::isKnownToBeAPowerOfTwo(
+ V, OrZero, Depth,
+ SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
}
static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
@@ -290,36 +279,28 @@ static bool isKnownNonZero(const Value *V, unsigned Depth,
bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- return ::isKnownNonZero(V, Depth,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(V, CxtI), UseInstrInfo));
+ return ::isKnownNonZero(
+ V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
}
-bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
- unsigned Depth, AssumptionCache *AC,
- const Instruction *CxtI, const DominatorTree *DT,
- bool UseInstrInfo) {
- KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
- return Known.isNonNegative();
+bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
+ unsigned Depth) {
+ return computeKnownBits(V, Depth, SQ).isNonNegative();
}
-bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT, bool UseInstrInfo) {
+bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
+ unsigned Depth) {
if (auto *CI = dyn_cast<ConstantInt>(V))
return CI->getValue().isStrictlyPositive();
// TODO: We'd doing two recursive queries here. We should factor this such
// that only a single query is needed.
- return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) &&
- isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
+ return isKnownNonNegative(V, SQ, Depth) && ::isKnownNonZero(V, Depth, SQ);
}
-bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT, bool UseInstrInfo) {
- KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
- return Known.isNegative();
+bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
+ unsigned Depth) {
+ return computeKnownBits(V, Depth, SQ).isNegative();
}
static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
@@ -329,21 +310,16 @@ bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
const DataLayout &DL, AssumptionCache *AC,
const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
- return ::isKnownNonEqual(V1, V2, 0,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(V2, V1, CxtI), UseInstrInfo));
+ return ::isKnownNonEqual(
+ V1, V2, 0,
+ SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo));
}
-static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
- const SimplifyQuery &Q);
-
bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
- const DataLayout &DL, unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT, bool UseInstrInfo) {
- return ::MaskedValueIsZero(V, Mask, Depth,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(V, CxtI), UseInstrInfo));
+ const SimplifyQuery &SQ, unsigned Depth) {
+ KnownBits Known(Mask.getBitWidth());
+ computeKnownBits(V, Known, Depth, SQ);
+ return Mask.isSubsetOf(Known.Zero);
}
static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
@@ -361,9 +337,8 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- return ::ComputeNumSignBits(V, Depth,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(V, CxtI), UseInstrInfo));
+ return ::ComputeNumSignBits(
+ V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
}
unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL,
@@ -422,10 +397,9 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
}
bool SelfMultiply = Op0 == Op1;
- // TODO: SelfMultiply can be poison, but not undef.
if (SelfMultiply)
SelfMultiply &=
- isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
+ isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
Known = KnownBits::mul(Known, Known2, SelfMultiply);
// Only make use of no-wrap flags if we failed to compute the sign bit
@@ -573,11 +547,24 @@ static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
// All other predicates - rely on generic ConstantRange handling.
const APInt *C;
- if (!match(RHS, m_APInt(C)))
+ auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits());
+ if (match(RHS, m_APInt(C))) {
+ ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
+ return !TrueValues.contains(Zero);
+ }
+
+ auto *VC = dyn_cast<ConstantDataVector>(RHS);
+ if (VC == nullptr)
return false;
- ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
- return !TrueValues.contains(APInt::getZero(C->getBitWidth()));
+ for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
+ ++ElemIdx) {
+ ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
+ Pred, VC->getElementAsAPInt(ElemIdx));
+ if (TrueValues.contains(Zero))
+ return false;
+ }
+ return true;
}
static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
@@ -586,30 +573,34 @@ static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
if (!Q.AC || !Q.CxtI)
return false;
- if (Q.CxtI && V->getType()->isPointerTy()) {
- SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NonNull};
- if (!NullPointerIsDefined(Q.CxtI->getFunction(),
- V->getType()->getPointerAddressSpace()))
- AttrKinds.push_back(Attribute::Dereferenceable);
-
- if (getKnowledgeValidInContext(V, AttrKinds, Q.CxtI, Q.DT, Q.AC))
- return true;
- }
-
- for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
- if (!AssumeVH)
+ for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
+ if (!Elem.Assume)
continue;
- CallInst *I = cast<CallInst>(AssumeVH);
+
+ AssumeInst *I = cast<AssumeInst>(Elem.Assume);
assert(I->getFunction() == Q.CxtI->getFunction() &&
"Got assumption for the wrong function!");
+ if (Elem.Index != AssumptionCache::ExprResultIdx) {
+ if (!V->getType()->isPointerTy())
+ continue;
+ if (RetainedKnowledge RK = getKnowledgeFromBundle(
+ *I, I->bundle_op_info_begin()[Elem.Index])) {
+ if (RK.WasOn == V &&
+ (RK.AttrKind == Attribute::NonNull ||
+ (RK.AttrKind == Attribute::Dereferenceable &&
+ !NullPointerIsDefined(Q.CxtI->getFunction(),
+ V->getType()->getPointerAddressSpace()))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT))
+ return true;
+ }
+ continue;
+ }
+
// Warning: This loop can end up being somewhat performance sensitive.
// We're running this loop for once for each value queried resulting in a
// runtime of ~O(#assumes * #values).
- assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
- "must be an assume intrinsic");
-
Value *RHS;
CmpInst::Predicate Pred;
auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
@@ -623,157 +614,89 @@ static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
return false;
}
-static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
- KnownBits &Known, unsigned Depth,
+static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred,
+ Value *LHS, Value *RHS, KnownBits &Known,
const SimplifyQuery &Q) {
+ if (RHS->getType()->isPointerTy()) {
+ // Handle comparison of pointer to null explicitly, as it will not be
+ // covered by the m_APInt() logic below.
+ if (LHS == V && match(RHS, m_Zero())) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ:
+ Known.setAllZero();
+ break;
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_SGT:
+ Known.makeNonNegative();
+ break;
+ case ICmpInst::ICMP_SLT:
+ Known.makeNegative();
+ break;
+ default:
+ break;
+ }
+ }
+ return;
+ }
+
unsigned BitWidth = Known.getBitWidth();
- // We are attempting to compute known bits for the operands of an assume.
- // Do not try to use other assumptions for those recursive calls because
- // that can lead to mutual recursion and a compile-time explosion.
- // An example of the mutual recursion: computeKnownBits can call
- // isKnownNonZero which calls computeKnownBitsFromAssume (this function)
- // and so on.
- SimplifyQuery QueryNoAC = Q;
- QueryNoAC.AC = nullptr;
-
- // Note that ptrtoint may change the bitwidth.
- Value *A, *B;
auto m_V =
m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V)));
- CmpInst::Predicate Pred;
- uint64_t C;
- switch (Cmp->getPredicate()) {
+ const APInt *Mask, *C;
+ uint64_t ShAmt;
+ switch (Pred) {
case ICmpInst::ICMP_EQ:
- // assume(v = a)
- if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- Known = Known.unionWith(RHSKnown);
- // assume(v & b = a)
- } else if (match(Cmp,
- m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- KnownBits MaskKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
- // For those bits in the mask that are known to be one, we can propagate
- // known bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & MaskKnown.One;
- Known.One |= RHSKnown.One & MaskKnown.One;
- // assume(~(v & b) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
- m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- KnownBits MaskKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
- // For those bits in the mask that are known to be one, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & MaskKnown.One;
- Known.One |= RHSKnown.Zero & MaskKnown.One;
- // assume(v | b = a)
- } else if (match(Cmp,
- m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
- // For those bits in B that are known to be zero, we can propagate known
- // bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & BKnown.Zero;
- Known.One |= RHSKnown.One & BKnown.Zero;
- // assume(~(v | b) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
- m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
- // assume(v ^ b = a)
- } else if (match(Cmp,
- m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
- // For those bits in B that are known to be zero, we can propagate known
- // bits from the RHS to V. For those bits in B that are known to be one,
- // we can propagate inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & BKnown.Zero;
- Known.One |= RHSKnown.One & BKnown.Zero;
- Known.Zero |= RHSKnown.One & BKnown.One;
- Known.One |= RHSKnown.Zero & BKnown.One;
- // assume(~(v ^ b) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
- m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V. For those bits in B that are
- // known to be one, we can propagate known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
- Known.Zero |= RHSKnown.Zero & BKnown.One;
- Known.One |= RHSKnown.One & BKnown.One;
- // assume(v << c = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
- m_Value(A))) &&
- C < BitWidth) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-
- // For those bits in RHS that are known, we can propagate them to known
- // bits in V shifted to the right by C.
- RHSKnown.Zero.lshrInPlace(C);
- RHSKnown.One.lshrInPlace(C);
+ // assume(V = C)
+ if (match(LHS, m_V) && match(RHS, m_APInt(C))) {
+ Known = Known.unionWith(KnownBits::makeConstant(*C));
+ // assume(V & Mask = C)
+ } else if (match(LHS, m_And(m_V, m_APInt(Mask))) &&
+ match(RHS, m_APInt(C))) {
+ // For one bits in Mask, we can propagate bits from C to V.
+ Known.Zero |= ~*C & *Mask;
+ Known.One |= *C & *Mask;
+ // assume(V | Mask = C)
+ } else if (match(LHS, m_Or(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) {
+ // For zero bits in Mask, we can propagate bits from C to V.
+ Known.Zero |= ~*C & ~*Mask;
+ Known.One |= *C & ~*Mask;
+ // assume(V ^ Mask = C)
+ } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) &&
+ match(RHS, m_APInt(C))) {
+ // Equivalent to assume(V == Mask ^ C)
+ Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask));
+ // assume(V << ShAmt = C)
+ } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) &&
+ match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
+ // For those bits in C that are known, we can propagate them to known
+ // bits in V shifted to the right by ShAmt.
+ KnownBits RHSKnown = KnownBits::makeConstant(*C);
+ RHSKnown.Zero.lshrInPlace(ShAmt);
+ RHSKnown.One.lshrInPlace(ShAmt);
Known = Known.unionWith(RHSKnown);
- // assume(~(v << c) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- C < BitWidth) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- RHSKnown.One.lshrInPlace(C);
- Known.Zero |= RHSKnown.One;
- RHSKnown.Zero.lshrInPlace(C);
- Known.One |= RHSKnown.Zero;
- // assume(v >> c = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
- m_Value(A))) &&
- C < BitWidth) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ // assume(V >> ShAmt = C)
+ } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) &&
+ match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
+ KnownBits RHSKnown = KnownBits::makeConstant(*C);
// For those bits in RHS that are known, we can propagate them to known
// bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.Zero << C;
- Known.One |= RHSKnown.One << C;
- // assume(~(v >> c) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- C < BitWidth) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.One << C;
- Known.One |= RHSKnown.Zero << C;
+ Known.Zero |= RHSKnown.Zero << ShAmt;
+ Known.One |= RHSKnown.One << ShAmt;
}
break;
case ICmpInst::ICMP_NE: {
- // assume (v & b != 0) where b is a power of 2
+ // assume (V & B != 0) where B is a power of 2
const APInt *BPow2;
- if (match(Cmp, m_ICmp(Pred, m_c_And(m_V, m_Power2(BPow2)), m_Zero()))) {
+ if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero()))
Known.One |= *BPow2;
- }
break;
}
default:
const APInt *Offset = nullptr;
- if (match(Cmp, m_ICmp(Pred, m_CombineOr(m_V, m_Add(m_V, m_APInt(Offset))),
- m_Value(A)))) {
- KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
- ConstantRange RHSRange =
- ConstantRange::fromKnownBits(RHSKnown, Cmp->isSigned());
- ConstantRange LHSRange =
- ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
+ if (match(LHS, m_CombineOr(m_V, m_Add(m_V, m_APInt(Offset)))) &&
+ match(RHS, m_APInt(C))) {
+ ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C);
if (Offset)
LHSRange = LHSRange.sub(*Offset);
Known = Known.unionWith(LHSRange.toKnownBits());
@@ -782,41 +705,67 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
}
}
-void llvm::computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
+void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
unsigned Depth, const SimplifyQuery &Q) {
- // Use of assumptions is context-sensitive. If we don't have a context, we
- // cannot use them!
- if (!Q.AC || !Q.CxtI)
+ if (!Q.CxtI)
return;
- unsigned BitWidth = Known.getBitWidth();
+ if (Q.DC && Q.DT) {
+ // Handle dominating conditions.
+ for (BranchInst *BI : Q.DC->conditionsFor(V)) {
+ auto *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cmp)
+ continue;
- // Refine Known set if the pointer alignment is set by assume bundles.
- if (V->getType()->isPointerTy()) {
- if (RetainedKnowledge RK = getKnowledgeValidInContext(
- V, { Attribute::Alignment }, Q.CxtI, Q.DT, Q.AC)) {
- if (isPowerOf2_64(RK.ArgValue))
- Known.Zero.setLowBits(Log2_64(RK.ArgValue));
+ BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
+ if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
+ computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0),
+ Cmp->getOperand(1), Known, Q);
+
+ BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
+ if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
+ computeKnownBitsFromCmp(V, Cmp->getInversePredicate(),
+ Cmp->getOperand(0), Cmp->getOperand(1), Known,
+ Q);
}
+
+ if (Known.hasConflict())
+ Known.resetAll();
}
+ if (!Q.AC)
+ return;
+
+ unsigned BitWidth = Known.getBitWidth();
+
// Note that the patterns below need to be kept in sync with the code
// in AssumptionCache::updateAffectedValues.
- for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
- if (!AssumeVH)
+ for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
+ if (!Elem.Assume)
continue;
- CallInst *I = cast<CallInst>(AssumeVH);
+
+ AssumeInst *I = cast<AssumeInst>(Elem.Assume);
assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
"Got assumption for the wrong function!");
+ if (Elem.Index != AssumptionCache::ExprResultIdx) {
+ if (!V->getType()->isPointerTy())
+ continue;
+ if (RetainedKnowledge RK = getKnowledgeFromBundle(
+ *I, I->bundle_op_info_begin()[Elem.Index])) {
+ if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment &&
+ isPowerOf2_64(RK.ArgValue) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT))
+ Known.Zero.setLowBits(Log2_64(RK.ArgValue));
+ }
+ continue;
+ }
+
// Warning: This loop can end up being somewhat performance sensitive.
// We're running this loop for once for each value queried resulting in a
// runtime of ~O(#assumes * #values).
- assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
- "must be an assume intrinsic");
-
Value *Arg = I->getArgOperand(0);
if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
@@ -844,7 +793,8 @@ void llvm::computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
continue;
- computeKnownBitsFromCmp(V, Cmp, Known, Depth, Q);
+ computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0),
+ Cmp->getOperand(1), Known, Q);
}
// Conflicting assumption: Undefined behavior will occur on this execution
@@ -948,18 +898,17 @@ getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
}
// Public so this can be used in `SimplifyDemandedUseBits`.
-KnownBits llvm::analyzeKnownBitsFromAndXorOr(
- const Operator *I, const KnownBits &KnownLHS, const KnownBits &KnownRHS,
- unsigned Depth, const DataLayout &DL, AssumptionCache *AC,
- const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) {
+KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I,
+ const KnownBits &KnownLHS,
+ const KnownBits &KnownRHS,
+ unsigned Depth,
+ const SimplifyQuery &SQ) {
auto *FVTy = dyn_cast<FixedVectorType>(I->getType());
APInt DemandedElts =
FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
- safeCxtI(I, CxtI),
- UseInstrInfo));
+ SQ);
}
ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) {
@@ -1101,6 +1050,9 @@ static void computeKnownBitsFromOperator(const Operator *I,
assert(SrcBitWidth && "SrcBitWidth can't be zero");
Known = Known.anyextOrTrunc(SrcBitWidth);
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I);
+ Inst && Inst->hasNonNeg() && !Known.isNegative())
+ Known.makeNonNegative();
Known = Known.zextOrTrunc(BitWidth);
break;
}
@@ -1459,11 +1411,13 @@ static void computeKnownBitsFromOperator(const Operator *I,
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
+ // TODO: See if we can base recursion limiter on number of incoming phi
+ // edges so we don't overly clamp analysis.
computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ);
- // If this failed, see if we can use a conditional branch into the phi
+ // See if we can further use a conditional branch into the phi
// to help us determine the range of the value.
- if (Known2.isUnknown()) {
+ if (!Known2.isConstant()) {
ICmpInst::Predicate Pred;
const APInt *RHSC;
BasicBlock *TrueSucc, *FalseSucc;
@@ -1476,21 +1430,19 @@ static void computeKnownBitsFromOperator(const Operator *I,
// If we're using the false successor, invert the predicate.
if (FalseSucc == P->getParent())
Pred = CmpInst::getInversePredicate(Pred);
-
- switch (Pred) {
- case CmpInst::Predicate::ICMP_EQ:
- Known2 = KnownBits::makeConstant(*RHSC);
- break;
- case CmpInst::Predicate::ICMP_ULE:
- Known2.Zero.setHighBits(RHSC->countl_zero());
- break;
- case CmpInst::Predicate::ICMP_ULT:
- Known2.Zero.setHighBits((*RHSC - 1).countl_zero());
- break;
- default:
- // TODO - add additional integer predicate handling.
+ // Get the knownbits implied by the incoming phi condition.
+ auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC);
+ KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits());
+ // We can have conflicts here if we are analyzing deadcode (its
+ // impossible for us reach this BB based the icmp).
+ if (KnownUnion.hasConflict()) {
+ // No reason to continue analyzing in a known dead region, so
+ // just resetAll and break. This will cause us to also exit the
+ // outer loop.
+ Known.resetAll();
break;
}
+ Known2 = KnownUnion;
}
}
}
@@ -1513,8 +1465,10 @@ static void computeKnownBitsFromOperator(const Operator *I,
Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
computeKnownBitsFromRangeMetadata(*MD, Known);
if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) {
- computeKnownBits(RV, Known2, Depth + 1, Q);
- Known = Known.unionWith(Known2);
+ if (RV->getType() == I->getType()) {
+ computeKnownBits(RV, Known2, Depth + 1, Q);
+ Known = Known.unionWith(Known2);
+ }
}
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
@@ -1635,8 +1589,8 @@ static void computeKnownBitsFromOperator(const Operator *I,
const Value *Mask = I->getOperand(1);
Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
computeKnownBits(Mask, Known2, Depth + 1, Q);
- // This is basically a pointer typed and.
- Known &= Known2.zextOrTrunc(Known.getBitWidth());
+ // TODO: 1-extend would be more precise.
+ Known &= Known2.anyextOrTrunc(BitWidth);
break;
}
case Intrinsic::x86_sse42_crc32_64_64:
@@ -1644,7 +1598,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax:
- // Assume that VL output is >= 65536.
+ // Assume that VL output is <= 65536.
// TODO: Take SEW and LMUL into account.
if (BitWidth > 17)
Known.Zero.setBitsFrom(17);
@@ -1778,17 +1732,17 @@ static void computeKnownBitsFromOperator(const Operator *I,
/// Determine which bits of V are known to be either zero or one and return
/// them.
-KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
- unsigned Depth, const SimplifyQuery &Q) {
+KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
+ unsigned Depth, const SimplifyQuery &Q) {
KnownBits Known(getBitWidth(V->getType(), Q.DL));
- computeKnownBits(V, DemandedElts, Known, Depth, Q);
+ ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
return Known;
}
/// Determine which bits of V are known to be either zero or one and return
/// them.
-KnownBits computeKnownBits(const Value *V, unsigned Depth,
- const SimplifyQuery &Q) {
+KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth,
+ const SimplifyQuery &Q) {
KnownBits Known(getBitWidth(V->getType(), Q.DL));
computeKnownBits(V, Known, Depth, Q);
return Known;
@@ -1884,6 +1838,8 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
if (!DemandedElts[i])
continue;
Constant *Element = CV->getAggregateElement(i);
+ if (isa<PoisonValue>(Element))
+ continue;
auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
if (!ElementCI) {
Known.resetAll();
@@ -1932,11 +1888,11 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
Known.Zero.setLowBits(Log2(Alignment));
}
- // computeKnownBitsFromAssume strictly refines Known.
+ // computeKnownBitsFromContext strictly refines Known.
// Therefore, we run them after computeKnownBitsFromOperator.
- // Check whether a nearby assume intrinsic can determine some known bits.
- computeKnownBitsFromAssume(V, Known, Depth, Q);
+ // Check whether we can determine known bits from context such as assumes.
+ computeKnownBitsFromContext(V, Known, Depth, Q);
assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
}
@@ -2006,11 +1962,17 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
const SimplifyQuery &Q) {
assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
- // Attempt to match against constants.
- if (OrZero && match(V, m_Power2OrZero()))
- return true;
- if (match(V, m_Power2()))
- return true;
+ if (isa<Constant>(V))
+ return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
+
+ // i1 is by definition a power of 2 or zero.
+ if (OrZero && V->getType()->getScalarSizeInBits() == 1)
+ return true;
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
if (Q.CxtI && match(V, m_VScale())) {
const Function *F = Q.CxtI->getFunction();
// The vscale_range indicates vscale is a power-of-two.
@@ -2019,70 +1981,71 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
// 1 << X is clearly a power of two if the one is not shifted off the end. If
// it is shifted off the end then the result is undefined.
- if (match(V, m_Shl(m_One(), m_Value())))
+ if (match(I, m_Shl(m_One(), m_Value())))
return true;
// (signmask) >>l X is clearly a power of two if the one is not shifted off
// the bottom. If it is shifted off the bottom then the result is undefined.
- if (match(V, m_LShr(m_SignMask(), m_Value())))
+ if (match(I, m_LShr(m_SignMask(), m_Value())))
return true;
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth++ == MaxAnalysisRecursionDepth)
return false;
- Value *X = nullptr, *Y = nullptr;
- // A shift left or a logical shift right of a power of two is a power of two
- // or zero.
- if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
- match(V, m_LShr(m_Value(X), m_Value()))))
- return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q);
-
- if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V))
- return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
-
- if (const SelectInst *SI = dyn_cast<SelectInst>(V))
- return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
- isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
-
- // Peek through min/max.
- if (match(V, m_MaxOrMin(m_Value(X), m_Value(Y)))) {
- return isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q) &&
- isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q);
- }
-
- if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
+ switch (I->getOpcode()) {
+ case Instruction::ZExt:
+ return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
+ case Instruction::Trunc:
+ return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
+ case Instruction::Shl:
+ if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I))
+ return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
+ return false;
+ case Instruction::LShr:
+ if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I)))
+ return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
+ return false;
+ case Instruction::UDiv:
+ if (Q.IIQ.isExact(cast<BinaryOperator>(I)))
+ return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
+ return false;
+ case Instruction::Mul:
+ return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
+ isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) &&
+ (OrZero || isKnownNonZero(I, Depth, Q));
+ case Instruction::And:
// A power of two and'd with anything is a power of two or zero.
- if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) ||
- isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q))
+ if (OrZero &&
+ (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) ||
+ isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q)))
return true;
// X & (-X) is always a power of two or zero.
- if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
- return true;
+ if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) ||
+ match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0)))))
+ return OrZero || isKnownNonZero(I->getOperand(0), Depth, Q);
return false;
- }
-
- // Adding a power-of-two or zero to the same power-of-two or zero yields
- // either the original power-of-two, a larger power-of-two or zero.
- if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+ case Instruction::Add: {
+ // Adding a power-of-two or zero to the same power-of-two or zero yields
+ // either the original power-of-two, a larger power-of-two or zero.
const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
Q.IIQ.hasNoSignedWrap(VOBO)) {
- if (match(X, m_And(m_Specific(Y), m_Value())) ||
- match(X, m_And(m_Value(), m_Specific(Y))))
- if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
- return true;
- if (match(Y, m_And(m_Specific(X), m_Value())) ||
- match(Y, m_And(m_Value(), m_Specific(X))))
- if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
- return true;
+ if (match(I->getOperand(0),
+ m_c_And(m_Specific(I->getOperand(1)), m_Value())) &&
+ isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q))
+ return true;
+ if (match(I->getOperand(1),
+ m_c_And(m_Specific(I->getOperand(0)), m_Value())) &&
+ isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q))
+ return true;
unsigned BitWidth = V->getType()->getScalarSizeInBits();
KnownBits LHSBits(BitWidth);
- computeKnownBits(X, LHSBits, Depth, Q);
+ computeKnownBits(I->getOperand(0), LHSBits, Depth, Q);
KnownBits RHSBits(BitWidth);
- computeKnownBits(Y, RHSBits, Depth, Q);
+ computeKnownBits(I->getOperand(1), RHSBits, Depth, Q);
// If i8 V is a power of two or zero:
// ZeroBits: 1 1 1 0 1 1 1 1
// ~ZeroBits: 0 0 0 1 0 0 0 0
@@ -2092,11 +2055,16 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
return true;
}
+ return false;
}
-
- // A PHI node is power of two if all incoming values are power of two, or if
- // it is an induction variable where in each step its value is a power of two.
- if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+ case Instruction::Select:
+ return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
+ isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q);
+ case Instruction::PHI: {
+ // A PHI node is power of two if all incoming values are power of two, or if
+ // it is an induction variable where in each step its value is a power of
+ // two.
+ auto *PN = cast<PHINode>(I);
SimplifyQuery RecQ = Q;
// Check if it is an induction variable and always power of two.
@@ -2117,17 +2085,36 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
});
}
-
- // An exact divide or right shift can only shift off zero bits, so the result
- // is a power of two only if the first operand is a power of two and not
- // copying a sign bit (sdiv int_min, 2).
- if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
- match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
- return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
- Depth, Q);
+ case Instruction::Invoke:
+ case Instruction::Call: {
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::umax:
+ case Intrinsic::smax:
+ case Intrinsic::umin:
+ case Intrinsic::smin:
+ return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) &&
+ isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
+ // bswap/bitreverse just move around bits, but don't change any 1s/0s
+ // thus dont change pow2/non-pow2 status.
+ case Intrinsic::bitreverse:
+ case Intrinsic::bswap:
+ return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
+ case Intrinsic::fshr:
+ case Intrinsic::fshl:
+ // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
+ if (II->getArgOperand(0) == II->getArgOperand(1))
+ return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+ }
+ default:
+ return false;
}
-
- return false;
}
/// Test whether a GEP's result is known to be non-null.
@@ -2231,6 +2218,11 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
return true;
}
+ if ((match(U, m_IDiv(m_Value(), m_Specific(V))) ||
+ match(U, m_IRem(m_Value(), m_Specific(V)))) &&
+ isValidAssumeForContext(cast<Instruction>(U), CtxI, DT))
+ return true;
+
// Consider only compare instructions uniquely controlling a branch
Value *RHS;
CmpInst::Predicate Pred;
@@ -2447,6 +2439,9 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
unsigned Depth, const SimplifyQuery &Q) {
unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL);
switch (I->getOpcode()) {
+ case Instruction::Alloca:
+ // Alloca never returns null, malloc might.
+ return I->getType()->getPointerAddressSpace() == 0;
case Instruction::GetElementPtr:
if (I->getType()->isPointerTy())
return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q);
@@ -2545,26 +2540,33 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
}
case Instruction::UDiv:
- case Instruction::SDiv:
+ case Instruction::SDiv: {
// X / Y
// div exact can only produce a zero if the dividend is zero.
if (cast<PossiblyExactOperator>(I)->isExact())
return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q);
- if (I->getOpcode() == Instruction::UDiv) {
- std::optional<bool> XUgeY;
- KnownBits XKnown =
- computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
- if (!XKnown.isUnknown()) {
- KnownBits YKnown =
- computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
- // If X u>= Y then div is non zero (0/0 is UB).
- XUgeY = KnownBits::uge(XKnown, YKnown);
- }
- // If X is total unknown or X u< Y we won't be able to prove non-zero
- // with compute known bits so just return early.
- return XUgeY && *XUgeY;
+
+ std::optional<bool> XUgeY;
+ KnownBits XKnown =
+ computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
+ // If X is fully unknown we won't be able to figure anything out so don't
+ // both computing knownbits for Y.
+ if (XKnown.isUnknown())
+ return false;
+
+ KnownBits YKnown =
+ computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
+ if (I->getOpcode() == Instruction::SDiv) {
+ // For signed division need to compare abs value of the operands.
+ XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
+ YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
}
- break;
+ // If X u>= Y then div is non zero (0/0 is UB).
+ XUgeY = KnownBits::uge(XKnown, YKnown);
+ // If X is total unknown or X u< Y we won't be able to prove non-zero
+ // with compute known bits so just return early.
+ return XUgeY && *XUgeY;
+ }
case Instruction::Add: {
// X + Y.
@@ -2651,6 +2653,23 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
if (U.get() == PN)
return true;
RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
+ // Check if the branch on the phi excludes zero.
+ ICmpInst::Predicate Pred;
+ Value *X;
+ BasicBlock *TrueSucc, *FalseSucc;
+ if (match(RecQ.CxtI,
+ m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)),
+ m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
+ // Check for cases of duplicate successors.
+ if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
+ // If we're using the false successor, invert the predicate.
+ if (FalseSucc == PN->getParent())
+ Pred = CmpInst::getInversePredicate(Pred);
+ if (cmpExcludesZero(Pred, X))
+ return true;
+ }
+ }
+ // Finally recurse on the edge and check it directly.
return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ);
});
}
@@ -2672,15 +2691,33 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
return isKnownNonZero(I->getOperand(0), Depth, Q) &&
isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
Depth);
- case Instruction::Load:
- // A Load tagged with nonnull metadata is never null.
- if (Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_nonnull))
- return true;
+ case Instruction::Load: {
+ auto *LI = cast<LoadInst>(I);
+ // A Load tagged with nonnull or dereferenceable with null pointer undefined
+ // is never null.
+ if (auto *PtrT = dyn_cast<PointerType>(I->getType()))
+ if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) ||
+ (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) &&
+ !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace())))
+ return true;
// No need to fall through to computeKnownBits as range metadata is already
// handled in isKnownNonZero.
return false;
+ }
case Instruction::Call:
+ case Instruction::Invoke:
+ if (I->getType()->isPointerTy()) {
+ const auto *Call = cast<CallBase>(I);
+ if (Call->isReturnNonNull())
+ return true;
+ if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
+ return isKnownNonZero(RP, Depth, Q);
+ } else if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) {
+ if (RV->getType() == I->getType() && isKnownNonZero(RV, Depth, Q))
+ return true;
+ }
+
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::sshl_sat:
@@ -2741,8 +2778,10 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
default:
break;
}
+ break;
}
- break;
+
+ return false;
}
KnownBits Known(BitWidth);
@@ -2831,10 +2870,6 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
// Check for pointer simplifications.
if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) {
- // Alloca never returns null, malloc might.
- if (isa<AllocaInst>(V) && PtrTy->getAddressSpace() == 0)
- return true;
-
// A byval, inalloca may not be null in a non-default addres space. A
// nonnull argument is assumed never 0.
if (const Argument *A = dyn_cast<Argument>(V)) {
@@ -2843,13 +2878,6 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
A->hasNonNullAttr()))
return true;
}
-
- if (const auto *Call = dyn_cast<CallBase>(V)) {
- if (Call->isReturnNonNull())
- return true;
- if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
- return isKnownNonZero(RP, Depth, Q);
- }
}
if (const auto *I = dyn_cast<Operator>(V))
@@ -3049,6 +3077,77 @@ static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
return true;
}
+static bool isNonEqualSelect(const Value *V1, const Value *V2, unsigned Depth,
+ const SimplifyQuery &Q) {
+ const SelectInst *SI1 = dyn_cast<SelectInst>(V1);
+ if (!SI1)
+ return false;
+
+ if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) {
+ const Value *Cond1 = SI1->getCondition();
+ const Value *Cond2 = SI2->getCondition();
+ if (Cond1 == Cond2)
+ return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(),
+ Depth + 1, Q) &&
+ isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(),
+ Depth + 1, Q);
+ }
+ return isKnownNonEqual(SI1->getTrueValue(), V2, Depth + 1, Q) &&
+ isKnownNonEqual(SI1->getFalseValue(), V2, Depth + 1, Q);
+}
+
+// Check to see if A is both a GEP and is the incoming value for a PHI in the
+// loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
+// one of them being the recursive GEP A and the other a ptr at same base and at
+// the same/higher offset than B we are only incrementing the pointer further in
+// loop if offset of recursive GEP is greater than 0.
+static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B,
+ const SimplifyQuery &Q) {
+ if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
+ return false;
+
+ auto *GEPA = dyn_cast<GEPOperator>(A);
+ if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin()))
+ return false;
+
+ // Handle 2 incoming PHI values with one being a recursive GEP.
+ auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand());
+ if (!PN || PN->getNumIncomingValues() != 2)
+ return false;
+
+ // Search for the recursive GEP as an incoming operand, and record that as
+ // Step.
+ Value *Start = nullptr;
+ Value *Step = const_cast<Value *>(A);
+ if (PN->getIncomingValue(0) == Step)
+ Start = PN->getIncomingValue(1);
+ else if (PN->getIncomingValue(1) == Step)
+ Start = PN->getIncomingValue(0);
+ else
+ return false;
+
+ // Other incoming node base should match the B base.
+ // StartOffset >= OffsetB && StepOffset > 0?
+ // StartOffset <= OffsetB && StepOffset < 0?
+ // Is non-equal if above are true.
+ // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
+ // optimisation to inbounds GEPs only.
+ unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType());
+ APInt StartOffset(IndexWidth, 0);
+ Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset);
+ APInt StepOffset(IndexWidth, 0);
+ Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset);
+
+ // Check if Base Pointer of Step matches the PHI.
+ if (Step != PN)
+ return false;
+ APInt OffsetB(IndexWidth, 0);
+ B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB);
+ return Start == B &&
+ ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) ||
+ (StartOffset.sle(OffsetB) && StepOffset.isNegative()));
+}
+
/// Return true if it is known that V1 != V2.
static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
const SimplifyQuery &Q) {
@@ -3098,23 +3197,15 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
Known2.Zero.intersects(Known1.One))
return true;
}
- return false;
-}
-/// Return true if 'V & Mask' is known to be zero. We use this predicate to
-/// simplify operations downstream. Mask is known to be zero for bits that V
-/// cannot have.
-///
-/// This function is defined on values with integer type, values with pointer
-/// type, and vectors of integers. In the case
-/// where V is a vector, the mask, known zero, and known one values are the
-/// same width as the vector element, and the bit is set only if it is true
-/// for all of the elements in the vector.
-bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
- const SimplifyQuery &Q) {
- KnownBits Known(Mask.getBitWidth());
- computeKnownBits(V, Known, Depth, Q);
- return Mask.isSubsetOf(Known.Zero);
+ if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V2, V1, Depth, Q))
+ return true;
+
+ if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) ||
+ isNonEqualPointersWithRecursiveGEP(V2, V1, Q))
+ return true;
+
+ return false;
}
// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
@@ -3641,6 +3732,8 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
return Intrinsic::not_intrinsic;
}
+/// Deprecated, use computeKnownFPClass instead.
+///
/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
/// bit despite comparing equal.
@@ -3832,13 +3925,9 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
return false;
}
-bool llvm::CannotBeOrderedLessThanZero(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, 0);
-}
-
bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
+ // FIXME: Use computeKnownFPClass and pass all arguments
return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0);
}
@@ -3941,9 +4030,15 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
Value *LHS, Value *RHS,
bool LookThroughSrc) {
const APFloat *ConstRHS;
- if (!match(RHS, m_APFloat(ConstRHS)))
- return {nullptr, fcNone};
+ if (!match(RHS, m_APFloatAllowUndef(ConstRHS)))
+ return {nullptr, fcAllFlags};
+ return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc);
+}
+
+std::pair<Value *, FPClassTest>
+llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS,
+ const APFloat *ConstRHS, bool LookThroughSrc) {
// fcmp ord x, zero|normal|subnormal|inf -> ~fcNan
if (Pred == FCmpInst::FCMP_ORD && !ConstRHS->isNaN())
return {LHS, ~fcNan};
@@ -3958,7 +4053,7 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
// TODO: Handle DAZ by expanding masks to cover subnormal cases.
if (Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO &&
!inputDenormalIsIEEE(F, LHS->getType()))
- return {nullptr, fcNone};
+ return {nullptr, fcAllFlags};
switch (Pred) {
case FCmpInst::FCMP_OEQ: // Match x == 0.0
@@ -3995,7 +4090,7 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
break;
}
- return {nullptr, fcNone};
+ return {nullptr, fcAllFlags};
}
Value *Src = LHS;
@@ -4078,8 +4173,14 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
}
case FCmpInst::FCMP_OGE:
case FCmpInst::FCMP_ULT: {
- if (ConstRHS->isNegative()) // TODO
- return {nullptr, fcNone};
+ if (ConstRHS->isNegative()) {
+ // fcmp oge x, -inf -> ~fcNan
+ // fcmp oge fabs(x), -inf -> ~fcNan
+ // fcmp ult x, -inf -> fcNan
+ // fcmp ult fabs(x), -inf -> fcNan
+ Mask = ~fcNan;
+ break;
+ }
// fcmp oge fabs(x), +inf -> fcInf
// fcmp oge x, +inf -> fcPosInf
@@ -4092,15 +4193,21 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
}
case FCmpInst::FCMP_OGT:
case FCmpInst::FCMP_ULE: {
- if (ConstRHS->isNegative())
- return {nullptr, fcNone};
+ if (ConstRHS->isNegative()) {
+ // fcmp ogt x, -inf -> fcmp one x, -inf
+ // fcmp ogt fabs(x), -inf -> fcmp ord x, x
+ // fcmp ule x, -inf -> fcmp ueq x, -inf
+ // fcmp ule fabs(x), -inf -> fcmp uno x, x
+ Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan);
+ break;
+ }
// No value is ordered and greater than infinity.
Mask = fcNone;
break;
}
default:
- return {nullptr, fcNone};
+ return {nullptr, fcAllFlags};
}
} else if (ConstRHS->isSmallestNormalized() && !ConstRHS->isNegative()) {
// Match pattern that's used in __builtin_isnormal.
@@ -4129,14 +4236,14 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
break;
}
default:
- return {nullptr, fcNone};
+ return {nullptr, fcAllFlags};
}
} else if (ConstRHS->isNaN()) {
// fcmp o__ x, nan -> false
// fcmp u__ x, nan -> true
Mask = fcNone;
} else
- return {nullptr, fcNone};
+ return {nullptr, fcAllFlags};
// Invert the comparison for the unordered cases.
if (FCmpInst::isUnordered(Pred))
@@ -4369,427 +4476,421 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
break;
}
case Instruction::Call: {
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op)) {
- const Intrinsic::ID IID = II->getIntrinsicID();
- switch (IID) {
- case Intrinsic::fabs: {
- if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
- // If we only care about the sign bit we don't need to inspect the
- // operand.
- computeKnownFPClass(II->getArgOperand(0), DemandedElts,
- InterestedClasses, Known, Depth + 1, Q);
- }
-
- Known.fabs();
- break;
- }
- case Intrinsic::copysign: {
- KnownFPClass KnownSign;
-
+ const CallInst *II = cast<CallInst>(Op);
+ const Intrinsic::ID IID = II->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::fabs: {
+ if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
+ // If we only care about the sign bit we don't need to inspect the
+ // operand.
computeKnownFPClass(II->getArgOperand(0), DemandedElts,
InterestedClasses, Known, Depth + 1, Q);
- computeKnownFPClass(II->getArgOperand(1), DemandedElts,
- InterestedClasses, KnownSign, Depth + 1, Q);
- Known.copysign(KnownSign);
- break;
}
- case Intrinsic::fma:
- case Intrinsic::fmuladd: {
- if ((InterestedClasses & fcNegative) == fcNone)
- break;
- if (II->getArgOperand(0) != II->getArgOperand(1))
- break;
-
- // The multiply cannot be -0 and therefore the add can't be -0
- Known.knownNot(fcNegZero);
+ Known.fabs();
+ break;
+ }
+ case Intrinsic::copysign: {
+ KnownFPClass KnownSign;
- // x * x + y is non-negative if y is non-negative.
- KnownFPClass KnownAddend;
- computeKnownFPClass(II->getArgOperand(2), DemandedElts,
- InterestedClasses, KnownAddend, Depth + 1, Q);
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
+ Known, Depth + 1, Q);
+ computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses,
+ KnownSign, Depth + 1, Q);
+ Known.copysign(KnownSign);
+ break;
+ }
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd: {
+ if ((InterestedClasses & fcNegative) == fcNone)
+ break;
- // TODO: Known sign bit with no nans
- if (KnownAddend.cannotBeOrderedLessThanZero())
- Known.knownNot(fcNegative);
+ if (II->getArgOperand(0) != II->getArgOperand(1))
break;
- }
- case Intrinsic::sqrt:
- case Intrinsic::experimental_constrained_sqrt: {
- KnownFPClass KnownSrc;
- FPClassTest InterestedSrcs = InterestedClasses;
- if (InterestedClasses & fcNan)
- InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
- computeKnownFPClass(II->getArgOperand(0), DemandedElts,
- InterestedSrcs, KnownSrc, Depth + 1, Q);
+ // The multiply cannot be -0 and therefore the add can't be -0
+ Known.knownNot(fcNegZero);
- if (KnownSrc.isKnownNeverPosInfinity())
- Known.knownNot(fcPosInf);
- if (KnownSrc.isKnownNever(fcSNan))
- Known.knownNot(fcSNan);
-
- // Any negative value besides -0 returns a nan.
- if (KnownSrc.isKnownNeverNaN() &&
- KnownSrc.cannotBeOrderedLessThanZero())
- Known.knownNot(fcNan);
-
- // The only negative value that can be returned is -0 for -0 inputs.
- Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal);
-
- // If the input denormal mode could be PreserveSign, a negative
- // subnormal input could produce a negative zero output.
- const Function *F = II->getFunction();
- if (Q.IIQ.hasNoSignedZeros(II) ||
- (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) {
- Known.knownNot(fcNegZero);
- if (KnownSrc.isKnownNeverNaN())
- Known.SignBit = false;
- }
+ // x * x + y is non-negative if y is non-negative.
+ KnownFPClass KnownAddend;
+ computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses,
+ KnownAddend, Depth + 1, Q);
- break;
- }
- case Intrinsic::sin:
- case Intrinsic::cos: {
- // Return NaN on infinite inputs.
- KnownFPClass KnownSrc;
- computeKnownFPClass(II->getArgOperand(0), DemandedElts,
- InterestedClasses, KnownSrc, Depth + 1, Q);
- Known.knownNot(fcInf);
- if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
- Known.knownNot(fcNan);
- break;
+ // TODO: Known sign bit with no nans
+ if (KnownAddend.cannotBeOrderedLessThanZero())
+ Known.knownNot(fcNegative);
+ break;
+ }
+ case Intrinsic::sqrt:
+ case Intrinsic::experimental_constrained_sqrt: {
+ KnownFPClass KnownSrc;
+ FPClassTest InterestedSrcs = InterestedClasses;
+ if (InterestedClasses & fcNan)
+ InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
+
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
+ KnownSrc, Depth + 1, Q);
+
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
+ if (KnownSrc.isKnownNever(fcSNan))
+ Known.knownNot(fcSNan);
+
+ // Any negative value besides -0 returns a nan.
+ if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
+ Known.knownNot(fcNan);
+
+ // The only negative value that can be returned is -0 for -0 inputs.
+ Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal);
+
+ // If the input denormal mode could be PreserveSign, a negative
+ // subnormal input could produce a negative zero output.
+ const Function *F = II->getFunction();
+ if (Q.IIQ.hasNoSignedZeros(II) ||
+ (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) {
+ Known.knownNot(fcNegZero);
+ if (KnownSrc.isKnownNeverNaN())
+ Known.SignBit = false;
}
- case Intrinsic::maxnum:
- case Intrinsic::minnum:
- case Intrinsic::minimum:
- case Intrinsic::maximum: {
- KnownFPClass KnownLHS, KnownRHS;
- computeKnownFPClass(II->getArgOperand(0), DemandedElts,
- InterestedClasses, KnownLHS, Depth + 1, Q);
- computeKnownFPClass(II->getArgOperand(1), DemandedElts,
- InterestedClasses, KnownRHS, Depth + 1, Q);
-
- bool NeverNaN =
- KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
- Known = KnownLHS | KnownRHS;
-
- // If either operand is not NaN, the result is not NaN.
- if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum))
- Known.knownNot(fcNan);
-
- if (IID == Intrinsic::maxnum) {
- // If at least one operand is known to be positive, the result must be
- // positive.
- if ((KnownLHS.cannotBeOrderedLessThanZero() &&
- KnownLHS.isKnownNeverNaN()) ||
- (KnownRHS.cannotBeOrderedLessThanZero() &&
- KnownRHS.isKnownNeverNaN()))
- Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
- } else if (IID == Intrinsic::maximum) {
- // If at least one operand is known to be positive, the result must be
- // positive.
- if (KnownLHS.cannotBeOrderedLessThanZero() ||
- KnownRHS.cannotBeOrderedLessThanZero())
- Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
- } else if (IID == Intrinsic::minnum) {
- // If at least one operand is known to be negative, the result must be
- // negative.
- if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
- KnownLHS.isKnownNeverNaN()) ||
- (KnownRHS.cannotBeOrderedGreaterThanZero() &&
- KnownRHS.isKnownNeverNaN()))
- Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
- } else {
- // If at least one operand is known to be negative, the result must be
- // negative.
- if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
- KnownRHS.cannotBeOrderedGreaterThanZero())
- Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
- }
+ break;
+ }
+ case Intrinsic::sin:
+ case Intrinsic::cos: {
+ // Return NaN on infinite inputs.
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
+ KnownSrc, Depth + 1, Q);
+ Known.knownNot(fcInf);
+ if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
+ Known.knownNot(fcNan);
+ break;
+ }
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum: {
+ KnownFPClass KnownLHS, KnownRHS;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
+ KnownLHS, Depth + 1, Q);
+ computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses,
+ KnownRHS, Depth + 1, Q);
- // Fixup zero handling if denormals could be returned as a zero.
- //
- // As there's no spec for denormal flushing, be conservative with the
- // treatment of denormals that could be flushed to zero. For older
- // subtargets on AMDGPU the min/max instructions would not flush the
- // output and return the original value.
- //
- // TODO: This could be refined based on the sign
- if ((Known.KnownFPClasses & fcZero) != fcNone &&
- !Known.isKnownNeverSubnormal()) {
- const Function *Parent = II->getFunction();
- if (!Parent)
- break;
-
- DenormalMode Mode = Parent->getDenormalMode(
- II->getType()->getScalarType()->getFltSemantics());
- if (Mode != DenormalMode::getIEEE())
- Known.KnownFPClasses |= fcZero;
- }
+ bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
+ Known = KnownLHS | KnownRHS;
- break;
+ // If either operand is not NaN, the result is not NaN.
+ if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum))
+ Known.knownNot(fcNan);
+
+ if (IID == Intrinsic::maxnum) {
+ // If at least one operand is known to be positive, the result must be
+ // positive.
+ if ((KnownLHS.cannotBeOrderedLessThanZero() &&
+ KnownLHS.isKnownNeverNaN()) ||
+ (KnownRHS.cannotBeOrderedLessThanZero() &&
+ KnownRHS.isKnownNeverNaN()))
+ Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+ } else if (IID == Intrinsic::maximum) {
+ // If at least one operand is known to be positive, the result must be
+ // positive.
+ if (KnownLHS.cannotBeOrderedLessThanZero() ||
+ KnownRHS.cannotBeOrderedLessThanZero())
+ Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+ } else if (IID == Intrinsic::minnum) {
+ // If at least one operand is known to be negative, the result must be
+ // negative.
+ if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
+ KnownLHS.isKnownNeverNaN()) ||
+ (KnownRHS.cannotBeOrderedGreaterThanZero() &&
+ KnownRHS.isKnownNeverNaN()))
+ Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
+ } else {
+ // If at least one operand is known to be negative, the result must be
+ // negative.
+ if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
+ KnownRHS.cannotBeOrderedGreaterThanZero())
+ Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
}
- case Intrinsic::canonicalize: {
- KnownFPClass KnownSrc;
- computeKnownFPClass(II->getArgOperand(0), DemandedElts,
- InterestedClasses, KnownSrc, Depth + 1, Q);
- // This is essentially a stronger form of
- // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
- // actually have an IR canonicalization guarantee.
+ // Fixup zero handling if denormals could be returned as a zero.
+ //
+ // As there's no spec for denormal flushing, be conservative with the
+ // treatment of denormals that could be flushed to zero. For older
+ // subtargets on AMDGPU the min/max instructions would not flush the
+ // output and return the original value.
+ //
+ // TODO: This could be refined based on the sign
+ if ((Known.KnownFPClasses & fcZero) != fcNone &&
+ !Known.isKnownNeverSubnormal()) {
+ const Function *Parent = II->getFunction();
+ if (!Parent)
+ break;
+
+ DenormalMode Mode = Parent->getDenormalMode(
+ II->getType()->getScalarType()->getFltSemantics());
+ if (Mode != DenormalMode::getIEEE())
+ Known.KnownFPClasses |= fcZero;
+ }
- // Canonicalize may flush denormals to zero, so we have to consider the
- // denormal mode to preserve known-not-0 knowledge.
- Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
+ break;
+ }
+ case Intrinsic::canonicalize: {
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
+ KnownSrc, Depth + 1, Q);
- // Stronger version of propagateNaN
- // Canonicalize is guaranteed to quiet signaling nans.
- if (KnownSrc.isKnownNeverNaN())
- Known.knownNot(fcNan);
- else
- Known.knownNot(fcSNan);
+ // This is essentially a stronger form of
+ // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
+ // actually have an IR canonicalization guarantee.
- const Function *F = II->getFunction();
- if (!F)
- break;
+ // Canonicalize may flush denormals to zero, so we have to consider the
+ // denormal mode to preserve known-not-0 knowledge.
+ Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
- // If the parent function flushes denormals, the canonical output cannot
- // be a denormal.
- const fltSemantics &FPType =
- II->getType()->getScalarType()->getFltSemantics();
- DenormalMode DenormMode = F->getDenormalMode(FPType);
- if (DenormMode == DenormalMode::getIEEE()) {
- if (KnownSrc.isKnownNever(fcPosZero))
- Known.knownNot(fcPosZero);
- if (KnownSrc.isKnownNever(fcNegZero))
- Known.knownNot(fcNegZero);
- break;
- }
+ // Stronger version of propagateNaN
+ // Canonicalize is guaranteed to quiet signaling nans.
+ if (KnownSrc.isKnownNeverNaN())
+ Known.knownNot(fcNan);
+ else
+ Known.knownNot(fcSNan);
- if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
- Known.knownNot(fcSubnormal);
+ const Function *F = II->getFunction();
+ if (!F)
+ break;
- if (DenormMode.Input == DenormalMode::PositiveZero ||
- (DenormMode.Output == DenormalMode::PositiveZero &&
- DenormMode.Input == DenormalMode::IEEE))
+ // If the parent function flushes denormals, the canonical output cannot
+ // be a denormal.
+ const fltSemantics &FPType =
+ II->getType()->getScalarType()->getFltSemantics();
+ DenormalMode DenormMode = F->getDenormalMode(FPType);
+ if (DenormMode == DenormalMode::getIEEE()) {
+ if (KnownSrc.isKnownNever(fcPosZero))
+ Known.knownNot(fcPosZero);
+ if (KnownSrc.isKnownNever(fcNegZero))
Known.knownNot(fcNegZero);
-
break;
}
- case Intrinsic::trunc:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::round:
- case Intrinsic::roundeven: {
- KnownFPClass KnownSrc;
- FPClassTest InterestedSrcs = InterestedClasses;
- if (InterestedSrcs & fcPosFinite)
- InterestedSrcs |= fcPosFinite;
- if (InterestedSrcs & fcNegFinite)
- InterestedSrcs |= fcNegFinite;
- computeKnownFPClass(II->getArgOperand(0), DemandedElts,
- InterestedSrcs, KnownSrc, Depth + 1, Q);
- // Integer results cannot be subnormal.
+ if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
Known.knownNot(fcSubnormal);
- Known.propagateNaN(KnownSrc, true);
+ if (DenormMode.Input == DenormalMode::PositiveZero ||
+ (DenormMode.Output == DenormalMode::PositiveZero &&
+ DenormMode.Input == DenormalMode::IEEE))
+ Known.knownNot(fcNegZero);
- // Pass through infinities, except PPC_FP128 is a special case for
- // intrinsics other than trunc.
- if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) {
- if (KnownSrc.isKnownNeverPosInfinity())
- Known.knownNot(fcPosInf);
- if (KnownSrc.isKnownNeverNegInfinity())
- Known.knownNot(fcNegInf);
- }
+ break;
+ }
+ case Intrinsic::trunc:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ case Intrinsic::roundeven: {
+ KnownFPClass KnownSrc;
+ FPClassTest InterestedSrcs = InterestedClasses;
+ if (InterestedSrcs & fcPosFinite)
+ InterestedSrcs |= fcPosFinite;
+ if (InterestedSrcs & fcNegFinite)
+ InterestedSrcs |= fcNegFinite;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
+ KnownSrc, Depth + 1, Q);
+
+ // Integer results cannot be subnormal.
+ Known.knownNot(fcSubnormal);
- // Negative round ups to 0 produce -0
- if (KnownSrc.isKnownNever(fcPosFinite))
- Known.knownNot(fcPosFinite);
- if (KnownSrc.isKnownNever(fcNegFinite))
- Known.knownNot(fcNegFinite);
+ Known.propagateNaN(KnownSrc, true);
- break;
+ // Pass through infinities, except PPC_FP128 is a special case for
+ // intrinsics other than trunc.
+ if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) {
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
+ if (KnownSrc.isKnownNeverNegInfinity())
+ Known.knownNot(fcNegInf);
}
- case Intrinsic::exp:
- case Intrinsic::exp2: {
- Known.knownNot(fcNegative);
- if ((InterestedClasses & fcNan) == fcNone)
- break;
- KnownFPClass KnownSrc;
- computeKnownFPClass(II->getArgOperand(0), DemandedElts,
- InterestedClasses, KnownSrc, Depth + 1, Q);
- if (KnownSrc.isKnownNeverNaN()) {
- Known.knownNot(fcNan);
- Known.SignBit = false;
- }
+ // Negative round ups to 0 produce -0
+ if (KnownSrc.isKnownNever(fcPosFinite))
+ Known.knownNot(fcPosFinite);
+ if (KnownSrc.isKnownNever(fcNegFinite))
+ Known.knownNot(fcNegFinite);
+ break;
+ }
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::exp10: {
+ Known.knownNot(fcNegative);
+ if ((InterestedClasses & fcNan) == fcNone)
break;
+
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
+ KnownSrc, Depth + 1, Q);
+ if (KnownSrc.isKnownNeverNaN()) {
+ Known.knownNot(fcNan);
+ Known.SignBit = false;
}
- case Intrinsic::fptrunc_round: {
- computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses,
- Known, Depth, Q);
+
+ break;
+ }
+ case Intrinsic::fptrunc_round: {
+ computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
+ Depth, Q);
+ break;
+ }
+ case Intrinsic::log:
+ case Intrinsic::log10:
+ case Intrinsic::log2:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_log2: {
+ // log(+inf) -> +inf
+ // log([+-]0.0) -> -inf
+ // log(-inf) -> nan
+ // log(-x) -> nan
+ if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
break;
- }
- case Intrinsic::log:
- case Intrinsic::log10:
- case Intrinsic::log2:
- case Intrinsic::experimental_constrained_log:
- case Intrinsic::experimental_constrained_log10:
- case Intrinsic::experimental_constrained_log2: {
- // log(+inf) -> +inf
- // log([+-]0.0) -> -inf
- // log(-inf) -> nan
- // log(-x) -> nan
- if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
- break;
- FPClassTest InterestedSrcs = InterestedClasses;
- if ((InterestedClasses & fcNegInf) != fcNone)
- InterestedSrcs |= fcZero | fcSubnormal;
- if ((InterestedClasses & fcNan) != fcNone)
- InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
+ FPClassTest InterestedSrcs = InterestedClasses;
+ if ((InterestedClasses & fcNegInf) != fcNone)
+ InterestedSrcs |= fcZero | fcSubnormal;
+ if ((InterestedClasses & fcNan) != fcNone)
+ InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
- KnownFPClass KnownSrc;
- computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
- KnownSrc, Depth + 1, Q);
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
+ KnownSrc, Depth + 1, Q);
- if (KnownSrc.isKnownNeverPosInfinity())
- Known.knownNot(fcPosInf);
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
- if (KnownSrc.isKnownNeverNaN() &&
- KnownSrc.cannotBeOrderedLessThanZero())
- Known.knownNot(fcNan);
+ if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
+ Known.knownNot(fcNan);
- const Function *F = II->getFunction();
- if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType()))
- Known.knownNot(fcNegInf);
+ const Function *F = II->getFunction();
+ if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType()))
+ Known.knownNot(fcNegInf);
+ break;
+ }
+ case Intrinsic::powi: {
+ if ((InterestedClasses & fcNegative) == fcNone)
break;
- }
- case Intrinsic::powi: {
- if ((InterestedClasses & fcNegative) == fcNone)
- break;
- const Value *Exp = II->getArgOperand(1);
- Type *ExpTy = Exp->getType();
- unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth();
- KnownBits ExponentKnownBits(BitWidth);
- computeKnownBits(Exp,
- isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1),
- ExponentKnownBits, Depth + 1, Q);
+ const Value *Exp = II->getArgOperand(1);
+ Type *ExpTy = Exp->getType();
+ unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth();
+ KnownBits ExponentKnownBits(BitWidth);
+ computeKnownBits(Exp, isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1),
+ ExponentKnownBits, Depth + 1, Q);
- if (ExponentKnownBits.Zero[0]) { // Is even
- Known.knownNot(fcNegative);
- break;
- }
-
- // Given that exp is an integer, here are the
- // ways that pow can return a negative value:
- //
- // pow(-x, exp) --> negative if exp is odd and x is negative.
- // pow(-0, exp) --> -inf if exp is negative odd.
- // pow(-0, exp) --> -0 if exp is positive odd.
- // pow(-inf, exp) --> -0 if exp is negative odd.
- // pow(-inf, exp) --> -inf if exp is positive odd.
- KnownFPClass KnownSrc;
- computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative,
- KnownSrc, Depth + 1, Q);
- if (KnownSrc.isKnownNever(fcNegative))
- Known.knownNot(fcNegative);
+ if (ExponentKnownBits.Zero[0]) { // Is even
+ Known.knownNot(fcNegative);
break;
}
- case Intrinsic::ldexp: {
- KnownFPClass KnownSrc;
- computeKnownFPClass(II->getArgOperand(0), DemandedElts,
- InterestedClasses, KnownSrc, Depth + 1, Q);
- Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
- // Sign is preserved, but underflows may produce zeroes.
- if (KnownSrc.isKnownNever(fcNegative))
- Known.knownNot(fcNegative);
- else if (KnownSrc.cannotBeOrderedLessThanZero())
- Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+ // Given that exp is an integer, here are the
+ // ways that pow can return a negative value:
+ //
+ // pow(-x, exp) --> negative if exp is odd and x is negative.
+ // pow(-0, exp) --> -inf if exp is negative odd.
+ // pow(-0, exp) --> -0 if exp is positive odd.
+ // pow(-inf, exp) --> -0 if exp is negative odd.
+ // pow(-inf, exp) --> -inf if exp is positive odd.
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative,
+ KnownSrc, Depth + 1, Q);
+ if (KnownSrc.isKnownNever(fcNegative))
+ Known.knownNot(fcNegative);
+ break;
+ }
+ case Intrinsic::ldexp: {
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
+ KnownSrc, Depth + 1, Q);
+ Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
- if (KnownSrc.isKnownNever(fcPositive))
- Known.knownNot(fcPositive);
- else if (KnownSrc.cannotBeOrderedGreaterThanZero())
- Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
+ // Sign is preserved, but underflows may produce zeroes.
+ if (KnownSrc.isKnownNever(fcNegative))
+ Known.knownNot(fcNegative);
+ else if (KnownSrc.cannotBeOrderedLessThanZero())
+ Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
- // Can refine inf/zero handling based on the exponent operand.
- const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
- if ((InterestedClasses & ExpInfoMask) == fcNone)
- break;
- if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
- break;
+ if (KnownSrc.isKnownNever(fcPositive))
+ Known.knownNot(fcPositive);
+ else if (KnownSrc.cannotBeOrderedGreaterThanZero())
+ Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
- const fltSemantics &Flt
- = II->getType()->getScalarType()->getFltSemantics();
- unsigned Precision = APFloat::semanticsPrecision(Flt);
- const Value *ExpArg = II->getArgOperand(1);
- ConstantRange ExpRange = computeConstantRange(
- ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1);
+ // Can refine inf/zero handling based on the exponent operand.
+ const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
+ if ((InterestedClasses & ExpInfoMask) == fcNone)
+ break;
+ if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
+ break;
- const int MantissaBits = Precision - 1;
- if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits)))
- Known.knownNot(fcSubnormal);
+ const fltSemantics &Flt =
+ II->getType()->getScalarType()->getFltSemantics();
+ unsigned Precision = APFloat::semanticsPrecision(Flt);
+ const Value *ExpArg = II->getArgOperand(1);
+ ConstantRange ExpRange = computeConstantRange(
+ ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1);
- const Function *F = II->getFunction();
- const APInt *ConstVal = ExpRange.getSingleElement();
- if (ConstVal && ConstVal->isZero()) {
- // ldexp(x, 0) -> x, so propagate everything.
- Known.propagateCanonicalizingSrc(KnownSrc, *F,
- II->getType());
- } else if (ExpRange.isAllNegative()) {
- // If we know the power is <= 0, can't introduce inf
- if (KnownSrc.isKnownNeverPosInfinity())
- Known.knownNot(fcPosInf);
- if (KnownSrc.isKnownNeverNegInfinity())
- Known.knownNot(fcNegInf);
- } else if (ExpRange.isAllNonNegative()) {
- // If we know the power is >= 0, can't introduce subnormal or zero
- if (KnownSrc.isKnownNeverPosSubnormal())
- Known.knownNot(fcPosSubnormal);
- if (KnownSrc.isKnownNeverNegSubnormal())
- Known.knownNot(fcNegSubnormal);
- if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType()))
- Known.knownNot(fcPosZero);
- if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))
- Known.knownNot(fcNegZero);
- }
+ const int MantissaBits = Precision - 1;
+ if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits)))
+ Known.knownNot(fcSubnormal);
- break;
- }
- case Intrinsic::arithmetic_fence: {
- computeKnownFPClass(II->getArgOperand(0), DemandedElts,
- InterestedClasses, Known, Depth + 1, Q);
- break;
+ const Function *F = II->getFunction();
+ const APInt *ConstVal = ExpRange.getSingleElement();
+ if (ConstVal && ConstVal->isZero()) {
+ // ldexp(x, 0) -> x, so propagate everything.
+ Known.propagateCanonicalizingSrc(KnownSrc, *F, II->getType());
+ } else if (ExpRange.isAllNegative()) {
+ // If we know the power is <= 0, can't introduce inf
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
+ if (KnownSrc.isKnownNeverNegInfinity())
+ Known.knownNot(fcNegInf);
+ } else if (ExpRange.isAllNonNegative()) {
+ // If we know the power is >= 0, can't introduce subnormal or zero
+ if (KnownSrc.isKnownNeverPosSubnormal())
+ Known.knownNot(fcPosSubnormal);
+ if (KnownSrc.isKnownNeverNegSubnormal())
+ Known.knownNot(fcNegSubnormal);
+ if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType()))
+ Known.knownNot(fcPosZero);
+ if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))
+ Known.knownNot(fcNegZero);
}
- case Intrinsic::experimental_constrained_sitofp:
- case Intrinsic::experimental_constrained_uitofp:
- // Cannot produce nan
- Known.knownNot(fcNan);
- // sitofp and uitofp turn into +0.0 for zero.
- Known.knownNot(fcNegZero);
+ break;
+ }
+ case Intrinsic::arithmetic_fence: {
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
+ Known, Depth + 1, Q);
+ break;
+ }
+ case Intrinsic::experimental_constrained_sitofp:
+ case Intrinsic::experimental_constrained_uitofp:
+ // Cannot produce nan
+ Known.knownNot(fcNan);
- // Integers cannot be subnormal
- Known.knownNot(fcSubnormal);
+ // sitofp and uitofp turn into +0.0 for zero.
+ Known.knownNot(fcNegZero);
- if (IID == Intrinsic::experimental_constrained_uitofp)
- Known.signBitMustBeZero();
+ // Integers cannot be subnormal
+ Known.knownNot(fcSubnormal);
- // TODO: Copy inf handling from instructions
- break;
- default:
- break;
- }
+ if (IID == Intrinsic::experimental_constrained_uitofp)
+ Known.signBitMustBeZero();
+
+ // TODO: Copy inf handling from instructions
+ break;
+ default:
+ break;
}
break;
@@ -5249,7 +5350,7 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
return UndefInt8;
// Return Undef for zero-sized type.
- if (!DL.getTypeStoreSize(V->getType()).isNonZero())
+ if (DL.getTypeStoreSize(V->getType()).isZero())
return UndefInt8;
Constant *C = dyn_cast<Constant>(V);
@@ -5296,10 +5397,9 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
if (CE->getOpcode() == Instruction::IntToPtr) {
if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) {
unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace());
- return isBytewiseValue(
- ConstantExpr::getIntegerCast(CE->getOperand(0),
- Type::getIntNTy(Ctx, BitWidth), false),
- DL);
+ if (Constant *Op = ConstantFoldIntegerCast(
+ CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL))
+ return isBytewiseValue(Op, DL);
}
}
}
@@ -6184,36 +6284,31 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
}
/// Combine constant ranges from computeConstantRange() and computeKnownBits().
-static ConstantRange computeConstantRangeIncludingKnownBits(
- const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
- bool UseInstrInfo = true) {
- KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
- ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
- ConstantRange CR2 = computeConstantRange(V, ForSigned, UseInstrInfo);
+static ConstantRange
+computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
+ bool ForSigned,
+ const SimplifyQuery &SQ) {
+ ConstantRange CR1 =
+ ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
+ ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);
ConstantRange::PreferredRangeType RangeType =
ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
return CR1.intersectWith(CR2, RangeType);
}
-OverflowResult llvm::computeOverflowForUnsignedMul(
- const Value *LHS, const Value *RHS, const DataLayout &DL,
- AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
- bool UseInstrInfo) {
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
- UseInstrInfo);
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
- UseInstrInfo);
+OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
+ const Value *RHS,
+ const SimplifyQuery &SQ) {
+ KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ);
+ KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ);
ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
}
-OverflowResult
-llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
- const DataLayout &DL, AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT, bool UseInstrInfo) {
+OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
+ const Value *RHS,
+ const SimplifyQuery &SQ) {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -6224,8 +6319,8 @@ llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
// Note that underestimating the number of sign bits gives a more
// conservative answer.
- unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) +
- ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT);
+ unsigned SignBits =
+ ::ComputeNumSignBits(LHS, 0, SQ) + ::ComputeNumSignBits(RHS, 0, SQ);
// First handle the easy case: if we have enough sign bits there's
// definitely no overflow.
@@ -6242,34 +6337,29 @@ llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
// product is exactly the minimum negative number.
// E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
// For simplicity we just check if at least one side is not negative.
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
- UseInstrInfo);
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
- UseInstrInfo);
+ KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ);
+ KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ);
if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
return OverflowResult::NeverOverflows;
}
return OverflowResult::MayOverflow;
}
-OverflowResult llvm::computeOverflowForUnsignedAdd(
- const Value *LHS, const Value *RHS, const DataLayout &DL,
- AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
- bool UseInstrInfo) {
- ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
- LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, UseInstrInfo);
- ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
- RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, UseInstrInfo);
+OverflowResult
+llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS,
+ const WithCache<const Value *> &RHS,
+ const SimplifyQuery &SQ) {
+ ConstantRange LHSRange =
+ computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ);
+ ConstantRange RHSRange =
+ computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ);
return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
}
-static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
- const Value *RHS,
- const AddOperator *Add,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
+static OverflowResult
+computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
+ const WithCache<const Value *> &RHS,
+ const AddOperator *Add, const SimplifyQuery &SQ) {
if (Add && Add->hasNoSignedWrap()) {
return OverflowResult::NeverOverflows;
}
@@ -6288,14 +6378,14 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
//
// Since the carry into the most significant position is always equal to
// the carry out of the addition, there is no signed overflow.
- if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
- ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
+ if (::ComputeNumSignBits(LHS, 0, SQ) > 1 &&
+ ::ComputeNumSignBits(RHS, 0, SQ) > 1)
return OverflowResult::NeverOverflows;
- ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
- LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
- ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
- RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ ConstantRange LHSRange =
+ computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ);
+ ConstantRange RHSRange =
+ computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ);
OverflowResult OR =
mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange));
if (OR != OverflowResult::MayOverflow)
@@ -6309,16 +6399,14 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
// CANNOT overflow. If this can be determined from the known bits of the
// operands the above signedAddMayOverflow() check will have already done so.
// The only other way to improve on the known bits is from an assumption, so
- // call computeKnownBitsFromAssume() directly.
+ // call computeKnownBitsFromContext() directly.
bool LHSOrRHSKnownNonNegative =
(LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative());
bool LHSOrRHSKnownNegative =
(LHSRange.isAllNegative() || RHSRange.isAllNegative());
if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
KnownBits AddKnown(LHSRange.getBitWidth());
- computeKnownBitsFromAssume(
- Add, AddKnown, /*Depth=*/0,
- SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, CxtI, DT));
+ computeKnownBitsFromContext(Add, AddKnown, /*Depth=*/0, SQ);
if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
(AddKnown.isNegative() && LHSOrRHSKnownNegative))
return OverflowResult::NeverOverflows;
@@ -6329,10 +6417,7 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
+ const SimplifyQuery &SQ) {
// X - (X % ?)
// The remainder of a value can't have greater magnitude than itself,
// so the subtraction can't overflow.
@@ -6346,32 +6431,29 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
// See simplifyICmpWithBinOpOnLHS() for candidates.
if (match(RHS, m_URem(m_Specific(LHS), m_Value())) ||
match(RHS, m_NUWSub(m_Specific(LHS), m_Value())))
- if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT))
+ if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
return OverflowResult::NeverOverflows;
// Checking for conditions implied by dominating conditions may be expensive.
// Limit it to usub_with_overflow calls for now.
- if (match(CxtI,
+ if (match(SQ.CxtI,
m_Intrinsic<Intrinsic::usub_with_overflow>(m_Value(), m_Value())))
- if (auto C =
- isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, CxtI, DL)) {
+ if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI,
+ SQ.DL)) {
if (*C)
return OverflowResult::NeverOverflows;
return OverflowResult::AlwaysOverflowsLow;
}
- ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
- LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
- ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
- RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
+ ConstantRange LHSRange =
+ computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ);
+ ConstantRange RHSRange =
+ computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ);
return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange));
}
OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
+ const SimplifyQuery &SQ) {
// X - (X % ?)
// The remainder of a value can't have greater magnitude than itself,
// so the subtraction can't overflow.
@@ -6382,19 +6464,19 @@ OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
// then determining no-overflow may allow other transforms.
if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) ||
match(RHS, m_NSWSub(m_Specific(LHS), m_Value())))
- if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT))
+ if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
return OverflowResult::NeverOverflows;
// If LHS and RHS each have at least two sign bits, the subtraction
// cannot overflow.
- if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
- ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
+ if (::ComputeNumSignBits(LHS, 0, SQ) > 1 &&
+ ::ComputeNumSignBits(RHS, 0, SQ) > 1)
return OverflowResult::NeverOverflows;
- ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
- LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
- ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
- RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ ConstantRange LHSRange =
+ computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ);
+ ConstantRange RHSRange =
+ computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ);
return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange));
}
@@ -6540,6 +6622,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
case Intrinsic::log2:
case Intrinsic::exp:
case Intrinsic::exp2:
+ case Intrinsic::exp10:
case Intrinsic::fabs:
case Intrinsic::copysign:
case Intrinsic::floor:
@@ -6557,6 +6640,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
case Intrinsic::minimum:
case Intrinsic::maximum:
case Intrinsic::is_fpclass:
+ case Intrinsic::ldexp:
+ case Intrinsic::frexp:
return false;
case Intrinsic::lround:
case Intrinsic::llround:
@@ -6748,7 +6833,9 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
return true;
if (const auto *CB = dyn_cast<CallBase>(V)) {
- if (CB->hasRetAttr(Attribute::NoUndef))
+ if (CB->hasRetAttr(Attribute::NoUndef) ||
+ CB->hasRetAttr(Attribute::Dereferenceable) ||
+ CB->hasRetAttr(Attribute::DereferenceableOrNull))
return true;
}
@@ -6838,6 +6925,13 @@ bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, true);
}
+bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC,
+ const Instruction *CtxI,
+ const DominatorTree *DT, unsigned Depth) {
+ // TODO: This is currently equivalent to isGuaranteedNotToBeUndefOrPoison().
+ return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, false);
+}
+
/// Return true if undefined behavior would provably be executed on the path to
/// OnPathTo if Root produced a posion result. Note that this doesn't say
/// anything about whether OnPathTo is actually executed or whether Root is
@@ -6883,21 +6977,16 @@ bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
}
OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
+ const SimplifyQuery &SQ) {
return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
- Add, DL, AC, CxtI, DT);
+ Add, SQ);
}
-OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
- const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
+OverflowResult
+llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
+ const WithCache<const Value *> &RHS,
+ const SimplifyQuery &SQ) {
+ return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ);
}
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
@@ -7114,6 +7203,8 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
Begin = Inst->getIterator();
Begin++;
} else if (const auto *Arg = dyn_cast<Argument>(V)) {
+ if (Arg->getParent()->isDeclaration())
+ return false;
BB = &Arg->getParent()->getEntryBlock();
Begin = BB->begin();
} else {
@@ -7760,6 +7851,7 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
if (!C)
return nullptr;
+ const DataLayout &DL = CmpI->getModule()->getDataLayout();
Constant *CastedTo = nullptr;
switch (*CastOp) {
case Instruction::ZExt:
@@ -7797,26 +7889,27 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
// CmpConst == C is checked below.
CastedTo = CmpConst;
} else {
- CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned());
+ unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt;
+ CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL);
}
break;
case Instruction::FPTrunc:
- CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true);
+ CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL);
break;
case Instruction::FPExt:
- CastedTo = ConstantExpr::getFPTrunc(C, SrcTy, true);
+ CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL);
break;
case Instruction::FPToUI:
- CastedTo = ConstantExpr::getUIToFP(C, SrcTy, true);
+ CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL);
break;
case Instruction::FPToSI:
- CastedTo = ConstantExpr::getSIToFP(C, SrcTy, true);
+ CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL);
break;
case Instruction::UIToFP:
- CastedTo = ConstantExpr::getFPToUI(C, SrcTy, true);
+ CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL);
break;
case Instruction::SIToFP:
- CastedTo = ConstantExpr::getFPToSI(C, SrcTy, true);
+ CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL);
break;
default:
break;
@@ -7827,8 +7920,8 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
// Make sure the cast doesn't lose any information.
Constant *CastedBack =
- ConstantExpr::getCast(*CastOp, CastedTo, C->getType(), true);
- if (CastedBack != C)
+ ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL);
+ if (CastedBack && CastedBack != C)
return nullptr;
return CastedTo;
@@ -7989,7 +8082,7 @@ bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
for (unsigned i = 0; i != 2; ++i) {
Value *L = P->getIncomingValue(i);
Value *R = P->getIncomingValue(!i);
- Operator *LU = dyn_cast<Operator>(L);
+ auto *LU = dyn_cast<BinaryOperator>(L);
if (!LU)
continue;
unsigned Opcode = LU->getOpcode();
@@ -8027,7 +8120,7 @@ bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
// OR
// %iv = [R, %entry], [%iv.next, %backedge]
// %iv.next = binop L, %iv
- BO = cast<BinaryOperator>(LU);
+ BO = LU;
Start = R;
Step = L;
return true;
@@ -8065,10 +8158,9 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
}
case CmpInst::ICMP_ULE: {
- const APInt *C;
-
- // LHS u<= LHS +_{nuw} C for any C
- if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C))))
+ // LHS u<= LHS +_{nuw} V for any V
+ if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) &&
+ cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap())
return true;
// RHS >> V u<= RHS for any V
@@ -8207,17 +8299,40 @@ static std::optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
CmpInst::Predicate LPred =
LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate();
- // Can we infer anything when the two compares have matching operands?
- bool AreSwappedOps;
- if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps))
- return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps);
-
// Can we infer anything when the 0-operands match and the 1-operands are
// constants (not necessarily matching)?
const APInt *LC, *RC;
if (L0 == R0 && match(L1, m_APInt(LC)) && match(R1, m_APInt(RC)))
return isImpliedCondCommonOperandWithConstants(LPred, *LC, RPred, *RC);
+ // Can we infer anything when the two compares have matching operands?
+ bool AreSwappedOps;
+ if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps))
+ return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps);
+
+ // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1
+ if (ICmpInst::isUnsigned(LPred) && ICmpInst::isUnsigned(RPred)) {
+ if (L0 == R1) {
+ std::swap(R0, R1);
+ RPred = ICmpInst::getSwappedPredicate(RPred);
+ }
+ if (L1 == R0) {
+ std::swap(L0, L1);
+ LPred = ICmpInst::getSwappedPredicate(LPred);
+ }
+ if (L1 == R1) {
+ std::swap(L0, L1);
+ LPred = ICmpInst::getSwappedPredicate(LPred);
+ std::swap(R0, R1);
+ RPred = ICmpInst::getSwappedPredicate(RPred);
+ }
+ if (L0 == R0 &&
+ (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) &&
+ (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) &&
+ match(L0, m_c_Add(m_Specific(L1), m_Specific(R1))))
+ return LPred == RPred;
+ }
+
if (LPred == RPred)
return isImpliedCondOperands(LPred, L0, L1, R0, R1, DL, Depth);
@@ -8427,6 +8542,11 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
if (match(BO.getOperand(1), m_APInt(C)))
// 'and x, C' produces [0, C].
Upper = *C + 1;
+ // X & -X is a power of two or zero. So we can cap the value at max power of
+ // two.
+ if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) ||
+ match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0)))))
+ Upper = APInt::getSignedMinValue(Width) + 1;
break;
case Instruction::Or:
@@ -8488,7 +8608,20 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
Lower = *C;
Upper = C->shl(ShiftAmount) + 1;
}
+ } else {
+ // If lowbit is set, value can never be zero.
+ if ((*C)[0])
+ Lower = APInt::getOneBitSet(Width, 0);
+ // If we are shifting a constant the largest it can be is if the longest
+ // sequence of consecutive ones is shifted to the highbits (breaking
+ // ties for which sequence is higher). At the moment we take a liberal
+ // upper bound on this by just popcounting the constant.
+ // TODO: There may be a bitwise trick for it longest/highest
+ // consecutative sequence of ones (naive method is O(Width) loop).
+ Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1;
}
+ } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+ Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1;
}
break;
@@ -8659,56 +8792,50 @@ static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II) {
return ConstantRange::getFull(Width);
}
-static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
- APInt &Upper, const InstrInfoQuery &IIQ) {
+static ConstantRange getRangeForSelectPattern(const SelectInst &SI,
+ const InstrInfoQuery &IIQ) {
+ unsigned BitWidth = SI.getType()->getScalarSizeInBits();
const Value *LHS = nullptr, *RHS = nullptr;
SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
if (R.Flavor == SPF_UNKNOWN)
- return;
-
- unsigned BitWidth = SI.getType()->getScalarSizeInBits();
+ return ConstantRange::getFull(BitWidth);
if (R.Flavor == SelectPatternFlavor::SPF_ABS) {
// If the negation part of the abs (in RHS) has the NSW flag,
// then the result of abs(X) is [0..SIGNED_MAX],
// otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
- Lower = APInt::getZero(BitWidth);
if (match(RHS, m_Neg(m_Specific(LHS))) &&
IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
- Upper = APInt::getSignedMaxValue(BitWidth) + 1;
- else
- Upper = APInt::getSignedMinValue(BitWidth) + 1;
- return;
+ return ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
+ APInt::getSignedMaxValue(BitWidth) + 1);
+
+ return ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
+ APInt::getSignedMinValue(BitWidth) + 1);
}
if (R.Flavor == SelectPatternFlavor::SPF_NABS) {
// The result of -abs(X) is <= 0.
- Lower = APInt::getSignedMinValue(BitWidth);
- Upper = APInt(BitWidth, 1);
- return;
+ return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth),
+ APInt(BitWidth, 1));
}
const APInt *C;
if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C)))
- return;
+ return ConstantRange::getFull(BitWidth);
switch (R.Flavor) {
- case SPF_UMIN:
- Upper = *C + 1;
- break;
- case SPF_UMAX:
- Lower = *C;
- break;
- case SPF_SMIN:
- Lower = APInt::getSignedMinValue(BitWidth);
- Upper = *C + 1;
- break;
- case SPF_SMAX:
- Lower = *C;
- Upper = APInt::getSignedMaxValue(BitWidth) + 1;
- break;
- default:
- break;
+ case SPF_UMIN:
+ return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1);
+ case SPF_UMAX:
+ return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth));
+ case SPF_SMIN:
+ return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth),
+ *C + 1);
+ case SPF_SMAX:
+ return ConstantRange::getNonEmpty(*C,
+ APInt::getSignedMaxValue(BitWidth) + 1);
+ default:
+ return ConstantRange::getFull(BitWidth);
}
}
@@ -8742,9 +8869,17 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
const APInt *C;
if (match(V, m_APInt(C)))
return ConstantRange(*C);
+ unsigned BitWidth = V->getType()->getScalarSizeInBits();
+
+ if (auto *VC = dyn_cast<ConstantDataVector>(V)) {
+ ConstantRange CR = ConstantRange::getEmpty(BitWidth);
+ for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
+ ++ElemIdx)
+ CR = CR.unionWith(VC->getElementAsAPInt(ElemIdx));
+ return CR;
+ }
InstrInfoQuery IIQ(UseInstrInfo);
- unsigned BitWidth = V->getType()->getScalarSizeInBits();
ConstantRange CR = ConstantRange::getFull(BitWidth);
if (auto *BO = dyn_cast<BinaryOperator>(V)) {
APInt Lower = APInt(BitWidth, 0);
@@ -8755,11 +8890,12 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
} else if (auto *II = dyn_cast<IntrinsicInst>(V))
CR = getRangeForIntrinsic(*II);
else if (auto *SI = dyn_cast<SelectInst>(V)) {
- APInt Lower = APInt(BitWidth, 0);
- APInt Upper = APInt(BitWidth, 0);
- // TODO: Return ConstantRange.
- setLimitsForSelectPattern(*SI, Lower, Upper, IIQ);
- CR = ConstantRange::getNonEmpty(Lower, Upper);
+ ConstantRange CRTrue = computeConstantRange(
+ SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
+ ConstantRange CRFalse = computeConstantRange(
+ SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
+ CR = CRTrue.unionWith(CRFalse);
+ CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ));
} else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) {
APInt Lower = APInt(BitWidth, 0);
APInt Upper = APInt(BitWidth, 0);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
index 87f0bb690477..91d8c31fa062 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
@@ -12,7 +12,6 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -92,6 +91,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::canonicalize:
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint:
return true;
default:
return false;
@@ -123,6 +124,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
switch (ID) {
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint:
return OpdIdx == -1 || OpdIdx == 0;
case Intrinsic::is_fpclass:
return OpdIdx == 0;
@@ -1158,14 +1161,11 @@ void InterleavedAccessInfo::analyzeInterleaving(
LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
<< '\n');
GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
- } else if (CompletedLoadGroups.contains(GroupB)) {
- // Skip B if no new instructions can be added to its load group.
- continue;
+ if (B->mayWriteToMemory())
+ StoreGroups.insert(GroupB);
+ else
+ LoadGroups.insert(GroupB);
}
- if (B->mayWriteToMemory())
- StoreGroups.insert(GroupB);
- else
- LoadGroups.insert(GroupB);
}
for (auto AI = std::next(BI); AI != E; ++AI) {
@@ -1191,38 +1191,62 @@ void InterleavedAccessInfo::analyzeInterleaving(
// Because accesses (2) and (3) are dependent, we can group (2) with (1)
// but not with (4). If we did, the dependent access (3) would be within
// the boundaries of the (2, 4) group.
- if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
- // If a dependence exists and A is already in a group, we know that A
- // must be a store since A precedes B and WAR dependences are allowed.
- // Thus, A would be sunk below B. We release A's group to prevent this
- // illegal code motion. A will then be free to form another group with
- // instructions that precede it.
- if (isInterleaved(A)) {
- InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
-
- LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
- "dependence between " << *A << " and "<< *B << '\n');
-
- StoreGroups.remove(StoreGroup);
- releaseGroup(StoreGroup);
- }
- // If B is a load and part of an interleave group, no earlier loads can
- // be added to B's interleave group, because this would mean the load B
- // would need to be moved across store A. Mark the interleave group as
- // complete.
- if (GroupB && isa<LoadInst>(B)) {
- LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
- << " as complete.\n");
-
- CompletedLoadGroups.insert(GroupB);
+ auto DependentMember = [&](InterleaveGroup<Instruction> *Group,
+ StrideEntry *A) -> Instruction * {
+ for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {
+ Instruction *MemberOfGroupB = Group->getMember(Index);
+ if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
+ A, &*AccessStrideInfo.find(MemberOfGroupB)))
+ return MemberOfGroupB;
}
+ return nullptr;
+ };
- // If a dependence exists and A is not already in a group (or it was
- // and we just released it), B might be hoisted above A (if B is a
- // load) or another store might be sunk below A (if B is a store). In
- // either case, we can't add additional instructions to B's group. B
- // will only form a group with instructions that it precedes.
- break;
+ auto GroupA = getInterleaveGroup(A);
+ // If A is a load, dependencies are tolerable, there's nothing to do here.
+ // If both A and B belong to the same (store) group, they are independent,
+ // even if dependencies have not been recorded.
+ // If both GroupA and GroupB are null, there's nothing to do here.
+ if (A->mayWriteToMemory() && GroupA != GroupB) {
+ Instruction *DependentInst = nullptr;
+ // If GroupB is a load group, we have to compare AI against all
+ // members of GroupB because if any load within GroupB has a dependency
+ // on AI, we need to mark GroupB as complete and also release the
+ // store GroupA (if A belongs to one). The former prevents incorrect
+ // hoisting of load B above store A while the latter prevents incorrect
+ // sinking of store A below load B.
+ if (GroupB && LoadGroups.contains(GroupB))
+ DependentInst = DependentMember(GroupB, &*AI);
+ else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
+ DependentInst = B;
+
+ if (DependentInst) {
+ // A has a store dependence on B (or on some load within GroupB) and
+ // is part of a store group. Release A's group to prevent illegal
+ // sinking of A below B. A will then be free to form another group
+ // with instructions that precede it.
+ if (GroupA && StoreGroups.contains(GroupA)) {
+ LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
+ "dependence between "
+ << *A << " and " << *DependentInst << '\n');
+ StoreGroups.remove(GroupA);
+ releaseGroup(GroupA);
+ }
+ // If B is a load and part of an interleave group, no earlier loads
+ // can be added to B's interleave group, because this would mean the
+ // DependentInst would move across store A. Mark the interleave group
+ // as complete.
+ if (GroupB && LoadGroups.contains(GroupB)) {
+ LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
+ << " as complete.\n");
+ CompletedLoadGroups.insert(GroupB);
+ }
+ }
+ }
+ if (CompletedLoadGroups.contains(GroupB)) {
+ // Skip trying to add A to B, continue to look for other conflicting A's
+ // in groups to be released.
+ continue;
}
// At this point, we've checked for illegal code motion. If either A or B
@@ -1432,22 +1456,6 @@ void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
}
}
-std::string VFABI::mangleTLIVectorName(StringRef VectorName,
- StringRef ScalarName, unsigned numArgs,
- ElementCount VF, bool Masked) {
- SmallString<256> Buffer;
- llvm::raw_svector_ostream Out(Buffer);
- Out << "_ZGV" << VFABI::_LLVM_ << (Masked ? "M" : "N");
- if (VF.isScalable())
- Out << 'x';
- else
- Out << VF.getFixedValue();
- for (unsigned I = 0; I < numArgs; ++I)
- Out << "v";
- Out << "_" << ScalarName << "(" << VectorName << ")";
- return std::string(Out.str());
-}
-
void VFABI::getVectorVariantNames(
const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
const StringRef S = CI.getFnAttr(VFABI::MappingsAttrName).getValueAsString();
@@ -1458,15 +1466,14 @@ void VFABI::getVectorVariantNames(
S.split(ListAttr, ",");
for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
-#ifndef NDEBUG
- LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
std::optional<VFInfo> Info =
- VFABI::tryDemangleForVFABI(S, *(CI.getModule()));
- assert(Info && "Invalid name for a VFABI variant.");
- assert(CI.getModule()->getFunction(Info->VectorName) &&
- "Vector function is missing.");
-#endif
- VariantMappings.push_back(std::string(S));
+ VFABI::tryDemangleForVFABI(S, CI.getFunctionType());
+ if (Info && CI.getModule()->getFunction(Info->VectorName)) {
+ LLVM_DEBUG(dbgs() << "VFABI: Adding mapping '" << S << "' for " << CI
+ << "\n");
+ VariantMappings.push_back(std::string(S));
+ } else
+ LLVM_DEBUG(dbgs() << "VFABI: Invalid mapping '" << S << "'\n");
}
}
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
index 466bdebc001f..c8da3efbb68a 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
@@ -279,7 +279,7 @@ lltok::Kind LLLexer::LexDollar() {
if (CurChar == '"') {
StrVal.assign(TokStart + 2, CurPtr - 1);
UnEscapeLexed(StrVal);
- if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
+ if (StringRef(StrVal).contains(0)) {
Error("Null bytes are not allowed in names");
return lltok::Error;
}
@@ -362,7 +362,7 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
if (CurChar == '"') {
StrVal.assign(TokStart+2, CurPtr-1);
UnEscapeLexed(StrVal);
- if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
+ if (StringRef(StrVal).contains(0)) {
Error("Null bytes are not allowed in names");
return lltok::Error;
}
@@ -397,7 +397,7 @@ lltok::Kind LLLexer::LexQuote() {
if (CurPtr[0] == ':') {
++CurPtr;
- if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
+ if (StringRef(StrVal).contains(0)) {
Error("Null bytes are not allowed in names");
kind = lltok::Error;
} else {
@@ -564,11 +564,14 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(nuw);
KEYWORD(nsw);
KEYWORD(exact);
+ KEYWORD(disjoint);
KEYWORD(inbounds);
+ KEYWORD(nneg);
KEYWORD(inrange);
KEYWORD(addrspace);
KEYWORD(section);
KEYWORD(partition);
+ KEYWORD(code_model);
KEYWORD(alias);
KEYWORD(ifunc);
KEYWORD(module);
@@ -609,7 +612,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(x86_64_sysvcc);
KEYWORD(win64cc);
KEYWORD(x86_regcallcc);
- KEYWORD(webkit_jscc);
KEYWORD(swiftcc);
KEYWORD(swifttailcc);
KEYWORD(anyregcc);
@@ -632,6 +634,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(amdgpu_kernel);
KEYWORD(amdgpu_gfx);
KEYWORD(tailcc);
+ KEYWORD(m68k_rtdcc);
+ KEYWORD(graalcc);
KEYWORD(cc);
KEYWORD(c);
@@ -694,6 +698,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(uinc_wrap);
KEYWORD(udec_wrap);
+ KEYWORD(splat);
KEYWORD(vscale);
KEYWORD(x);
KEYWORD(blockaddress);
@@ -901,7 +906,7 @@ lltok::Kind LLLexer::LexIdentifier() {
#define DWKEYWORD(TYPE, TOKEN) \
do { \
- if (Keyword.startswith("DW_" #TYPE "_")) { \
+ if (Keyword.starts_with("DW_" #TYPE "_")) { \
StrVal.assign(Keyword.begin(), Keyword.end()); \
return lltok::TOKEN; \
} \
@@ -917,17 +922,17 @@ lltok::Kind LLLexer::LexIdentifier() {
#undef DWKEYWORD
- if (Keyword.startswith("DIFlag")) {
+ if (Keyword.starts_with("DIFlag")) {
StrVal.assign(Keyword.begin(), Keyword.end());
return lltok::DIFlag;
}
- if (Keyword.startswith("DISPFlag")) {
+ if (Keyword.starts_with("DISPFlag")) {
StrVal.assign(Keyword.begin(), Keyword.end());
return lltok::DISPFlag;
}
- if (Keyword.startswith("CSK_")) {
+ if (Keyword.starts_with("CSK_")) {
StrVal.assign(Keyword.begin(), Keyword.end());
return lltok::ChecksumKind;
}
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
index 5f0d1a76de79..fb9e1ba875e1 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
@@ -1286,6 +1286,11 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc,
return true;
if (Alignment)
GV->setAlignment(*Alignment);
+ } else if (Lex.getKind() == lltok::kw_code_model) {
+ CodeModel::Model CodeModel;
+ if (parseOptionalCodeModel(CodeModel))
+ return true;
+ GV->setCodeModel(CodeModel);
} else if (Lex.getKind() == lltok::MetadataVar) {
if (parseGlobalObjectMetadataAttachment(*GV))
return true;
@@ -1977,7 +1982,6 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'spir_kernel'
/// ::= 'x86_64_sysvcc'
/// ::= 'win64cc'
-/// ::= 'webkit_jscc'
/// ::= 'anyregcc'
/// ::= 'preserve_mostcc'
/// ::= 'preserve_allcc'
@@ -1999,6 +2003,8 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'amdgpu_cs_chain_preserve'
/// ::= 'amdgpu_kernel'
/// ::= 'tailcc'
+/// ::= 'm68k_rtdcc'
+/// ::= 'graalcc'
/// ::= 'cc' UINT
///
bool LLParser::parseOptionalCallingConv(unsigned &CC) {
@@ -2036,7 +2042,6 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break;
case lltok::kw_win64cc: CC = CallingConv::Win64; break;
- case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break;
case lltok::kw_anyregcc: CC = CallingConv::AnyReg; break;
case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break;
case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break;
@@ -2067,6 +2072,8 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
break;
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
+ case lltok::kw_m68k_rtdcc: CC = CallingConv::M68k_RTD; break;
+ case lltok::kw_graalcc: CC = CallingConv::GRAAL; break;
case lltok::kw_cc: {
Lex.Lex();
return parseUInt32(CC);
@@ -2166,6 +2173,30 @@ bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) {
return false;
}
+/// parseOptionalCodeModel
+/// ::= /* empty */
+/// ::= 'code_model' "large"
+bool LLParser::parseOptionalCodeModel(CodeModel::Model &model) {
+ Lex.Lex();
+ auto StrVal = Lex.getStrVal();
+ auto ErrMsg = "expected global code model string";
+ if (StrVal == "tiny")
+ model = CodeModel::Tiny;
+ else if (StrVal == "small")
+ model = CodeModel::Small;
+ else if (StrVal == "kernel")
+ model = CodeModel::Kernel;
+ else if (StrVal == "medium")
+ model = CodeModel::Medium;
+ else if (StrVal == "large")
+ model = CodeModel::Large;
+ else
+ return tokError(ErrMsg);
+ if (parseToken(lltok::StringConstant, ErrMsg))
+ return true;
+ return false;
+}
+
/// parseOptionalDerefAttrBytes
/// ::= /* empty */
/// ::= AttrKind '(' 4 ')'
@@ -3803,16 +3834,8 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
}
case lltok::kw_trunc:
- case lltok::kw_zext:
- case lltok::kw_sext:
- case lltok::kw_fptrunc:
- case lltok::kw_fpext:
case lltok::kw_bitcast:
case lltok::kw_addrspacecast:
- case lltok::kw_uitofp:
- case lltok::kw_sitofp:
- case lltok::kw_fptoui:
- case lltok::kw_fptosi:
case lltok::kw_inttoptr:
case lltok::kw_ptrtoint: {
unsigned Opc = Lex.getUIntVal();
@@ -3856,10 +3879,34 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
return error(ID.Loc, "fdiv constexprs are no longer supported");
case lltok::kw_frem:
return error(ID.Loc, "frem constexprs are no longer supported");
+ case lltok::kw_and:
+ return error(ID.Loc, "and constexprs are no longer supported");
+ case lltok::kw_or:
+ return error(ID.Loc, "or constexprs are no longer supported");
+ case lltok::kw_lshr:
+ return error(ID.Loc, "lshr constexprs are no longer supported");
+ case lltok::kw_ashr:
+ return error(ID.Loc, "ashr constexprs are no longer supported");
case lltok::kw_fneg:
return error(ID.Loc, "fneg constexprs are no longer supported");
case lltok::kw_select:
return error(ID.Loc, "select constexprs are no longer supported");
+ case lltok::kw_zext:
+ return error(ID.Loc, "zext constexprs are no longer supported");
+ case lltok::kw_sext:
+ return error(ID.Loc, "sext constexprs are no longer supported");
+ case lltok::kw_fptrunc:
+ return error(ID.Loc, "fptrunc constexprs are no longer supported");
+ case lltok::kw_fpext:
+ return error(ID.Loc, "fpext constexprs are no longer supported");
+ case lltok::kw_uitofp:
+ return error(ID.Loc, "uitofp constexprs are no longer supported");
+ case lltok::kw_sitofp:
+ return error(ID.Loc, "sitofp constexprs are no longer supported");
+ case lltok::kw_fptoui:
+ return error(ID.Loc, "fptoui constexprs are no longer supported");
+ case lltok::kw_fptosi:
+ return error(ID.Loc, "fptosi constexprs are no longer supported");
case lltok::kw_icmp:
case lltok::kw_fcmp: {
unsigned PredVal, Opc = Lex.getUIntVal();
@@ -3898,11 +3945,9 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
case lltok::kw_sub:
case lltok::kw_mul:
case lltok::kw_shl:
- case lltok::kw_lshr:
- case lltok::kw_ashr: {
+ case lltok::kw_xor: {
bool NUW = false;
bool NSW = false;
- bool Exact = false;
unsigned Opc = Lex.getUIntVal();
Constant *Val0, *Val1;
Lex.Lex();
@@ -3915,10 +3960,6 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
if (EatIfPresent(lltok::kw_nuw))
NUW = true;
}
- } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv ||
- Opc == Instruction::LShr || Opc == Instruction::AShr) {
- if (EatIfPresent(lltok::kw_exact))
- Exact = true;
}
if (parseToken(lltok::lparen, "expected '(' in binary constantexpr") ||
parseGlobalTypeAndValue(Val0) ||
@@ -3929,60 +3970,29 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
if (Val0->getType() != Val1->getType())
return error(ID.Loc, "operands of constexpr must have same type");
// Check that the type is valid for the operator.
- switch (Opc) {
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::Shl:
- case Instruction::AShr:
- case Instruction::LShr:
- if (!Val0->getType()->isIntOrIntVectorTy())
- return error(ID.Loc, "constexpr requires integer operands");
- break;
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- if (!Val0->getType()->isFPOrFPVectorTy())
- return error(ID.Loc, "constexpr requires fp operands");
- break;
- default: llvm_unreachable("Unknown binary operator!");
- }
+ if (!Val0->getType()->isIntOrIntVectorTy())
+ return error(ID.Loc,
+ "constexpr requires integer or integer vector operands");
unsigned Flags = 0;
if (NUW) Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
if (NSW) Flags |= OverflowingBinaryOperator::NoSignedWrap;
- if (Exact) Flags |= PossiblyExactOperator::IsExact;
- Constant *C = ConstantExpr::get(Opc, Val0, Val1, Flags);
- ID.ConstantVal = C;
+ ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1, Flags);
ID.Kind = ValID::t_Constant;
return false;
}
- // Logical Operations
- case lltok::kw_and:
- case lltok::kw_or:
- case lltok::kw_xor: {
- unsigned Opc = Lex.getUIntVal();
- Constant *Val0, *Val1;
+ case lltok::kw_splat: {
Lex.Lex();
- if (parseToken(lltok::lparen, "expected '(' in logical constantexpr") ||
- parseGlobalTypeAndValue(Val0) ||
- parseToken(lltok::comma, "expected comma in logical constantexpr") ||
- parseGlobalTypeAndValue(Val1) ||
- parseToken(lltok::rparen, "expected ')' in logical constantexpr"))
+ if (parseToken(lltok::lparen, "expected '(' after vector splat"))
return true;
- if (Val0->getType() != Val1->getType())
- return error(ID.Loc, "operands of constexpr must have same type");
- if (!Val0->getType()->isIntOrIntVectorTy())
- return error(ID.Loc,
- "constexpr requires integer or integer vector operands");
- ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
- ID.Kind = ValID::t_Constant;
+ Constant *C;
+ if (parseGlobalTypeAndValue(C))
+ return true;
+ if (parseToken(lltok::rparen, "expected ')' at end of vector splat"))
+ return true;
+
+ ID.ConstantVal = C;
+ ID.Kind = ValID::t_ConstantSplat;
return false;
}
@@ -5522,13 +5532,9 @@ bool LLParser::parseDIExpression(MDNode *&Result, bool IsDistinct) {
return false;
}
-bool LLParser::parseDIArgList(MDNode *&Result, bool IsDistinct) {
- return parseDIArgList(Result, IsDistinct, nullptr);
-}
/// ParseDIArgList:
/// ::= !DIArgList(i32 7, i64 %0)
-bool LLParser::parseDIArgList(MDNode *&Result, bool IsDistinct,
- PerFunctionState *PFS) {
+bool LLParser::parseDIArgList(Metadata *&MD, PerFunctionState *PFS) {
assert(PFS && "Expected valid function state");
assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name");
Lex.Lex();
@@ -5548,7 +5554,7 @@ bool LLParser::parseDIArgList(MDNode *&Result, bool IsDistinct,
if (parseToken(lltok::rparen, "expected ')' here"))
return true;
- Result = GET_OR_DISTINCT(DIArgList, (Context, Args));
+ MD = DIArgList::get(Context, Args);
return false;
}
@@ -5662,13 +5668,17 @@ bool LLParser::parseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
/// ::= !DILocation(...)
bool LLParser::parseMetadata(Metadata *&MD, PerFunctionState *PFS) {
if (Lex.getKind() == lltok::MetadataVar) {
- MDNode *N;
// DIArgLists are a special case, as they are a list of ValueAsMetadata and
// so parsing this requires a Function State.
if (Lex.getStrVal() == "DIArgList") {
- if (parseDIArgList(N, false, PFS))
+ Metadata *AL;
+ if (parseDIArgList(AL, PFS))
return true;
- } else if (parseSpecializedMDNode(N)) {
+ MD = AL;
+ return false;
+ }
+ MDNode *N;
+ if (parseSpecializedMDNode(N)) {
return true;
}
MD = N;
@@ -5829,6 +5839,17 @@ bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
"' but expected '" + getTypeString(Ty) + "'");
V = ID.ConstantVal;
return false;
+ case ValID::t_ConstantSplat:
+ if (!Ty->isVectorTy())
+ return error(ID.Loc, "vector constant must have vector type");
+ if (ID.ConstantVal->getType() != Ty->getScalarType())
+ return error(ID.Loc, "constant expression type mismatch: got type '" +
+ getTypeString(ID.ConstantVal->getType()) +
+ "' but expected '" +
+ getTypeString(Ty->getScalarType()) + "'");
+ V = ConstantVector::getSplat(cast<VectorType>(Ty)->getElementCount(),
+ ID.ConstantVal);
+ return false;
case ValID::t_ConstantStruct:
case ValID::t_PackedConstantStruct:
if (StructType *ST = dyn_cast<StructType>(Ty)) {
@@ -5866,6 +5887,7 @@ bool LLParser::parseConstantValue(Type *Ty, Constant *&C) {
case ValID::t_APFloat:
case ValID::t_Undef:
case ValID::t_Constant:
+ case ValID::t_ConstantSplat:
case ValID::t_ConstantStruct:
case ValID::t_PackedConstantStruct: {
Value *V;
@@ -6404,8 +6426,15 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_srem:
return parseArithmetic(Inst, PFS, KeywordVal,
/*IsFP*/ false);
+ case lltok::kw_or: {
+ bool Disjoint = EatIfPresent(lltok::kw_disjoint);
+ if (parseLogical(Inst, PFS, KeywordVal))
+ return true;
+ if (Disjoint)
+ cast<PossiblyDisjointInst>(Inst)->setIsDisjoint(true);
+ return false;
+ }
case lltok::kw_and:
- case lltok::kw_or:
case lltok::kw_xor:
return parseLogical(Inst, PFS, KeywordVal);
case lltok::kw_icmp:
@@ -6421,8 +6450,16 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB,
}
// Casts.
+ case lltok::kw_zext: {
+ bool NonNeg = EatIfPresent(lltok::kw_nneg);
+ bool Res = parseCast(Inst, PFS, KeywordVal);
+ if (Res != 0)
+ return Res;
+ if (NonNeg)
+ Inst->setNonNeg();
+ return 0;
+ }
case lltok::kw_trunc:
- case lltok::kw_zext:
case lltok::kw_sext:
case lltok::kw_fptrunc:
case lltok::kw_fpext:
@@ -8179,7 +8216,7 @@ bool LLParser::parseModuleEntry(unsigned ID) {
parseToken(lltok::rparen, "expected ')' here"))
return true;
- auto ModuleEntry = Index->addModule(Path, ID, Hash);
+ auto ModuleEntry = Index->addModule(Path, Hash);
ModuleIdMap[ID] = ModuleEntry->first();
return false;
@@ -8612,9 +8649,9 @@ static void resolveFwdRef(ValueInfo *Fwd, ValueInfo &Resolved) {
/// Stores the given Name/GUID and associated summary into the Index.
/// Also updates any forward references to the associated entry ID.
-void LLParser::addGlobalValueToIndex(
+bool LLParser::addGlobalValueToIndex(
std::string Name, GlobalValue::GUID GUID, GlobalValue::LinkageTypes Linkage,
- unsigned ID, std::unique_ptr<GlobalValueSummary> Summary) {
+ unsigned ID, std::unique_ptr<GlobalValueSummary> Summary, LocTy Loc) {
// First create the ValueInfo utilizing the Name or GUID.
ValueInfo VI;
if (GUID != 0) {
@@ -8624,7 +8661,9 @@ void LLParser::addGlobalValueToIndex(
assert(!Name.empty());
if (M) {
auto *GV = M->getNamedValue(Name);
- assert(GV);
+ if (!GV)
+ return error(Loc, "Reference to undefined global \"" + Name + "\"");
+
VI = Index->getOrInsertValueInfo(GV);
} else {
assert(
@@ -8672,6 +8711,8 @@ void LLParser::addGlobalValueToIndex(
NumberedValueInfos.resize(ID + 1);
NumberedValueInfos[ID] = VI;
}
+
+ return false;
}
/// parseSummaryIndexFlags
@@ -8718,6 +8759,7 @@ bool LLParser::parseGVEntry(unsigned ID) {
parseToken(lltok::lparen, "expected '(' here"))
return true;
+ LocTy Loc = Lex.getLoc();
std::string Name;
GlobalValue::GUID GUID = 0;
switch (Lex.getKind()) {
@@ -8747,9 +8789,8 @@ bool LLParser::parseGVEntry(unsigned ID) {
// an external definition. We pass ExternalLinkage since that is only
// used when the GUID must be computed from Name, and in that case
// the symbol must have external linkage.
- addGlobalValueToIndex(Name, GUID, GlobalValue::ExternalLinkage, ID,
- nullptr);
- return false;
+ return addGlobalValueToIndex(Name, GUID, GlobalValue::ExternalLinkage, ID,
+ nullptr, Loc);
}
// Have a list of summaries
@@ -8790,6 +8831,7 @@ bool LLParser::parseGVEntry(unsigned ID) {
/// [',' OptionalRefs]? ')'
bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
unsigned ID) {
+ LocTy Loc = Lex.getLoc();
assert(Lex.getKind() == lltok::kw_function);
Lex.Lex();
@@ -8866,10 +8908,9 @@ bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
FS->setModulePath(ModulePath);
- addGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
- ID, std::move(FS));
-
- return false;
+ return addGlobalValueToIndex(Name, GUID,
+ (GlobalValue::LinkageTypes)GVFlags.Linkage, ID,
+ std::move(FS), Loc);
}
/// VariableSummary
@@ -8877,6 +8918,7 @@ bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
/// [',' OptionalRefs]? ')'
bool LLParser::parseVariableSummary(std::string Name, GlobalValue::GUID GUID,
unsigned ID) {
+ LocTy Loc = Lex.getLoc();
assert(Lex.getKind() == lltok::kw_variable);
Lex.Lex();
@@ -8924,10 +8966,9 @@ bool LLParser::parseVariableSummary(std::string Name, GlobalValue::GUID GUID,
GS->setModulePath(ModulePath);
GS->setVTableFuncs(std::move(VTableFuncs));
- addGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
- ID, std::move(GS));
-
- return false;
+ return addGlobalValueToIndex(Name, GUID,
+ (GlobalValue::LinkageTypes)GVFlags.Linkage, ID,
+ std::move(GS), Loc);
}
/// AliasSummary
@@ -8974,10 +9015,9 @@ bool LLParser::parseAliasSummary(std::string Name, GlobalValue::GUID GUID,
AS->setAliasee(AliaseeVI, Summary);
}
- addGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
- ID, std::move(AS));
-
- return false;
+ return addGlobalValueToIndex(Name, GUID,
+ (GlobalValue::LinkageTypes)GVFlags.Linkage, ID,
+ std::move(AS), Loc);
}
/// Flag
@@ -9086,7 +9126,8 @@ bool LLParser::parseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
/// OptionalCalls
/// := 'calls' ':' '(' Call [',' Call]* ')'
/// Call ::= '(' 'callee' ':' GVReference
-/// [( ',' 'hotness' ':' Hotness | ',' 'relbf' ':' UInt32 )]? ')'
+/// [( ',' 'hotness' ':' Hotness | ',' 'relbf' ':' UInt32 )]?
+/// [ ',' 'tail' ]? ')'
bool LLParser::parseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
assert(Lex.getKind() == lltok::kw_calls);
Lex.Lex();
@@ -9111,23 +9152,39 @@ bool LLParser::parseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown;
unsigned RelBF = 0;
- if (EatIfPresent(lltok::comma)) {
- // Expect either hotness or relbf
- if (EatIfPresent(lltok::kw_hotness)) {
+ unsigned HasTailCall = false;
+
+ // parse optional fields
+ while (EatIfPresent(lltok::comma)) {
+ switch (Lex.getKind()) {
+ case lltok::kw_hotness:
+ Lex.Lex();
if (parseToken(lltok::colon, "expected ':'") || parseHotness(Hotness))
return true;
- } else {
- if (parseToken(lltok::kw_relbf, "expected relbf") ||
- parseToken(lltok::colon, "expected ':'") || parseUInt32(RelBF))
+ break;
+ case lltok::kw_relbf:
+ Lex.Lex();
+ if (parseToken(lltok::colon, "expected ':'") || parseUInt32(RelBF))
+ return true;
+ break;
+ case lltok::kw_tail:
+ Lex.Lex();
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(HasTailCall))
return true;
+ break;
+ default:
+ return error(Lex.getLoc(), "expected hotness, relbf, or tail");
}
}
+ if (Hotness != CalleeInfo::HotnessType::Unknown && RelBF > 0)
+ return tokError("Expected only one of hotness or relbf");
// Keep track of the Call array index needing a forward reference.
// We will save the location of the ValueInfo needing an update, but
// can only do so once the std::vector is finalized.
if (VI.getRef() == FwdVIRef)
IdToIndexMap[GVId].push_back(std::make_pair(Calls.size(), Loc));
- Calls.push_back(FunctionSummary::EdgeTy{VI, CalleeInfo(Hotness, RelBF)});
+ Calls.push_back(
+ FunctionSummary::EdgeTy{VI, CalleeInfo(Hotness, HasTailCall, RelBF)});
if (parseToken(lltok::rparen, "expected ')' in call"))
return true;
@@ -9801,7 +9858,7 @@ bool LLParser::parseGVReference(ValueInfo &VI, unsigned &GVId) {
GVId = Lex.getUIntVal();
// Check if we already have a VI for this GV
- if (GVId < NumberedValueInfos.size()) {
+ if (GVId < NumberedValueInfos.size() && NumberedValueInfos[GVId]) {
assert(NumberedValueInfos[GVId].getRef() != FwdVIRef);
VI = NumberedValueInfos[GVId];
} else
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 35a79ec04b6e..dda3380c04ea 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
-#include <map>
#include <utility>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/DXContainer.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/DXContainer.cpp
index 60a89c66d28c..9c0e657b0696 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/DXContainer.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/DXContainer.cpp
@@ -13,6 +13,7 @@
#include "llvm/BinaryFormat/DXContainer.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/ScopedPrinter.h"
using namespace llvm;
using namespace llvm::dxbc;
@@ -28,3 +29,63 @@ bool ShaderHash::isPopulated() {
static uint8_t Zeros[16] = {0};
return Flags > 0 || 0 != memcmp(&Digest, &Zeros, 16);
}
+
+#define COMPONENT_PRECISION(Val, Enum) {#Enum, SigMinPrecision::Enum},
+
+static const EnumEntry<SigMinPrecision> SigMinPrecisionNames[] = {
+#include "llvm/BinaryFormat/DXContainerConstants.def"
+};
+
+ArrayRef<EnumEntry<SigMinPrecision>> dxbc::getSigMinPrecisions() {
+ return ArrayRef(SigMinPrecisionNames);
+}
+
+#define D3D_SYSTEM_VALUE(Val, Enum) {#Enum, D3DSystemValue::Enum},
+
+static const EnumEntry<D3DSystemValue> D3DSystemValueNames[] = {
+#include "llvm/BinaryFormat/DXContainerConstants.def"
+};
+
+ArrayRef<EnumEntry<D3DSystemValue>> dxbc::getD3DSystemValues() {
+ return ArrayRef(D3DSystemValueNames);
+}
+
+#define COMPONENT_TYPE(Val, Enum) {#Enum, SigComponentType::Enum},
+
+static const EnumEntry<SigComponentType> SigComponentTypes[] = {
+#include "llvm/BinaryFormat/DXContainerConstants.def"
+};
+
+ArrayRef<EnumEntry<SigComponentType>> dxbc::getSigComponentTypes() {
+ return ArrayRef(SigComponentTypes);
+}
+
+#define SEMANTIC_KIND(Val, Enum) {#Enum, PSV::SemanticKind::Enum},
+
+static const EnumEntry<PSV::SemanticKind> SemanticKindNames[] = {
+#include "llvm/BinaryFormat/DXContainerConstants.def"
+};
+
+ArrayRef<EnumEntry<PSV::SemanticKind>> PSV::getSemanticKinds() {
+ return ArrayRef(SemanticKindNames);
+}
+
+#define COMPONENT_TYPE(Val, Enum) {#Enum, PSV::ComponentType::Enum},
+
+static const EnumEntry<PSV::ComponentType> ComponentTypeNames[] = {
+#include "llvm/BinaryFormat/DXContainerConstants.def"
+};
+
+ArrayRef<EnumEntry<PSV::ComponentType>> PSV::getComponentTypes() {
+ return ArrayRef(ComponentTypeNames);
+}
+
+#define INTERPOLATION_MODE(Val, Enum) {#Enum, PSV::InterpolationMode::Enum},
+
+static const EnumEntry<PSV::InterpolationMode> InterpolationModeNames[] = {
+#include "llvm/BinaryFormat/DXContainerConstants.def"
+};
+
+ArrayRef<EnumEntry<PSV::InterpolationMode>> PSV::getInterpolationModes() {
+ return ArrayRef(InterpolationModeNames);
+}
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/ELF.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/ELF.cpp
index dc8f3051a149..f4cedffa8b45 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/ELF.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/ELF.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/StringSwitch.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/Magic.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/Magic.cpp
index 025334f9f3f4..45a0b7e11452 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/Magic.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/Magic.cpp
@@ -26,7 +26,7 @@ using namespace llvm::sys::fs;
template <size_t N>
static bool startswith(StringRef Magic, const char (&S)[N]) {
- return Magic.startswith(StringRef(S, N - 1));
+ return Magic.starts_with(StringRef(S, N - 1));
}
/// Identify the magic in magic.
@@ -72,6 +72,14 @@ file_magic llvm::identify_magic(StringRef Magic) {
case 0x03:
if (startswith(Magic, "\x03\xF0\x00"))
return file_magic::goff_object;
+ // SPIR-V format in little-endian mode.
+ if (startswith(Magic, "\x03\x02\x23\x07"))
+ return file_magic::spirv_object;
+ break;
+
+ case 0x07: // SPIR-V format in big-endian mode.
+ if (startswith(Magic, "\x07\x23\x02\x03"))
+ return file_magic::spirv_object;
break;
case 0x10:
@@ -87,6 +95,10 @@ file_magic llvm::identify_magic(StringRef Magic) {
if (startswith(Magic, "BC\xC0\xDE"))
return file_magic::bitcode;
break;
+ case 'C':
+ if (startswith(Magic, "CCOB"))
+ return file_magic::offload_bundle_compressed;
+ break;
case '!':
if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
return file_magic::archive;
@@ -213,11 +225,11 @@ file_magic llvm::identify_magic(StringRef Magic) {
if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
uint32_t off = read32le(Magic.data() + 0x3c);
// PE/COFF file, either EXE or DLL.
- if (Magic.substr(off).startswith(
+ if (Magic.substr(off).starts_with(
StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
return file_magic::pecoff_executable;
}
- if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
+ if (Magic.starts_with("Microsoft C/C++ MSF 7.00\r\n"))
return file_magic::pdb;
if (startswith(Magic, "MDMP"))
return file_magic::minidump;
@@ -251,6 +263,13 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::coff_object;
break;
+ case '_': {
+ const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
+ if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
+ return file_magic::offload_bundle;
+ break;
+ }
+
default:
break;
}
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp
index 21ffa35dfb6e..11598ee24d6f 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/MsgPackDocument.cpp
@@ -143,7 +143,13 @@ bool Document::readFromBlob(
// On to next element (or key if doing a map key next).
// Read the value.
Object Obj;
- if (!MPReader.read(Obj)) {
+ Expected<bool> ReadObj = MPReader.read(Obj);
+ if (!ReadObj) {
+ // FIXME: Propagate the Error to the caller.
+ consumeError(ReadObj.takeError());
+ return false;
+ }
+ if (!ReadObj.get()) {
if (Multi && Stack.size() == 1) {
// OK to finish here as we've just done a top-level element with Multi
break;
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 3797a44c1793..8907f6fa4ff3 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -904,10 +904,6 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
/// path to the bitcode file.
StringRef ModulePath;
- /// For per-module summary indexes, the unique numerical identifier given to
- /// this module by the client.
- unsigned ModuleId;
-
/// Callback to ask whether a symbol is the prevailing copy when invoked
/// during combined index building.
std::function<bool(GlobalValue::GUID)> IsPrevailing;
@@ -919,7 +915,7 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
public:
ModuleSummaryIndexBitcodeReader(
BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
- StringRef ModulePath, unsigned ModuleId,
+ StringRef ModulePath,
std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
Error parseModule();
@@ -1121,6 +1117,22 @@ static GlobalVarSummary::GVarFlags getDecodedGVarFlags(uint64_t RawFlags) {
(GlobalObject::VCallVisibility)(RawFlags >> 3));
}
+static std::pair<CalleeInfo::HotnessType, bool>
+getDecodedHotnessCallEdgeInfo(uint64_t RawFlags) {
+ CalleeInfo::HotnessType Hotness =
+ static_cast<CalleeInfo::HotnessType>(RawFlags & 0x7); // 3 bits
+ bool HasTailCall = (RawFlags & 0x8); // 1 bit
+ return {Hotness, HasTailCall};
+}
+
+static void getDecodedRelBFCallEdgeInfo(uint64_t RawFlags, uint64_t &RelBF,
+ bool &HasTailCall) {
+ static constexpr uint64_t RelBlockFreqMask =
+ (1 << CalleeInfo::RelBlockFreqBits) - 1;
+ RelBF = RawFlags & RelBlockFreqMask; // RelBlockFreqBits bits
+ HasTailCall = (RawFlags & (1 << CalleeInfo::RelBlockFreqBits)); // 1 bit
+}
+
static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
switch (Val) {
default: // Map unknown visibilities to default.
@@ -1148,6 +1160,23 @@ static bool getDecodedDSOLocal(unsigned Val) {
}
}
+static std::optional<CodeModel::Model> getDecodedCodeModel(unsigned Val) {
+ switch (Val) {
+ case 1:
+ return CodeModel::Tiny;
+ case 2:
+ return CodeModel::Small;
+ case 3:
+ return CodeModel::Kernel;
+ case 4:
+ return CodeModel::Medium;
+ case 5:
+ return CodeModel::Large;
+ }
+
+ return {};
+}
+
static GlobalVariable::ThreadLocalMode getDecodedThreadLocalMode(unsigned Val) {
switch (Val) {
case 0: return GlobalVariable::NotThreadLocal;
@@ -1398,6 +1427,9 @@ static bool isConstExprSupported(const BitcodeConstant *BC) {
if (Instruction::isBinaryOp(Opcode))
return ConstantExpr::isSupportedBinOp(Opcode);
+ if (Instruction::isCast(Opcode))
+ return ConstantExpr::isSupportedCastOp(Opcode);
+
if (Opcode == Instruction::GetElementPtr)
return ConstantExpr::isSupportedGetElementPtr(BC->SrcElemTy);
@@ -1984,6 +2016,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::NoSanitizeCoverage;
case bitc::ATTR_KIND_NULL_POINTER_IS_VALID:
return Attribute::NullPointerIsValid;
+ case bitc::ATTR_KIND_OPTIMIZE_FOR_DEBUGGING:
+ return Attribute::OptimizeForDebugging;
case bitc::ATTR_KIND_OPT_FOR_FUZZING:
return Attribute::OptForFuzzing;
case bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE:
@@ -2060,6 +2094,12 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::Hot;
case bitc::ATTR_KIND_PRESPLIT_COROUTINE:
return Attribute::PresplitCoroutine;
+ case bitc::ATTR_KIND_WRITABLE:
+ return Attribute::Writable;
+ case bitc::ATTR_KIND_CORO_ONLY_DESTROY_WHEN_COMPLETE:
+ return Attribute::CoroDestroyOnlyWhenComplete;
+ case bitc::ATTR_KIND_DEAD_ON_UNWIND:
+ return Attribute::DeadOnUnwind;
}
}
@@ -2648,7 +2688,7 @@ Expected<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
Value *V = ValueList[ValueID];
StringRef NameStr(ValueName.data(), ValueName.size());
- if (NameStr.find_first_of(0) != StringRef::npos)
+ if (NameStr.contains(0))
return error("Invalid value name");
V->setName(NameStr);
auto *GO = dyn_cast<GlobalObject>(V);
@@ -2894,11 +2934,7 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
return error("Alias and aliasee types don't match");
GA->setAliasee(C);
} else if (auto *GI = dyn_cast<GlobalIFunc>(GV)) {
- Type *ResolverFTy =
- GlobalIFunc::getResolverFunctionType(GI->getValueType());
- // Transparently fix up the type for compatibility with older bitcode
- GI->setResolver(ConstantExpr::getBitCast(
- C, ResolverFTy->getPointerTo(GI->getAddressSpace())));
+ GI->setResolver(C);
} else {
return error("Expected an alias or an ifunc");
}
@@ -3196,7 +3232,7 @@ Error BitcodeReader::parseConstants() {
Opc == Instruction::LShr ||
Opc == Instruction::AShr) {
if (Record[3] & (1 << bitc::PEO_EXACT))
- Flags |= SDivOperator::IsExact;
+ Flags |= PossiblyExactOperator::IsExact;
}
}
V = BitcodeConstant::create(Alloc, CurTy, {(uint8_t)Opc, Flags},
@@ -3804,6 +3840,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
// dllstorageclass, comdat, attributes, preemption specifier,
// partition strtab offset, partition strtab size] (name in VST)
// v2: [strtab_offset, strtab_size, v1]
+ // v3: [v2, code_model]
StringRef Name;
std::tie(Name, Record) = readNameFromStrtab(Record);
@@ -3912,6 +3949,13 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
NewGV->setSanitizerMetadata(Meta);
}
+ if (Record.size() > 17 && Record[17]) {
+ if (auto CM = getDecodedCodeModel(Record[17]))
+ NewGV->setCodeModel(*CM);
+ else
+ return error("Invalid global variable code model");
+ }
+
return Error::success();
}
@@ -4865,12 +4909,14 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Opc == Instruction::AShr) {
if (Record[OpNum] & (1 << bitc::PEO_EXACT))
cast<BinaryOperator>(I)->setIsExact(true);
+ } else if (Opc == Instruction::Or) {
+ if (Record[OpNum] & (1 << bitc::PDI_DISJOINT))
+ cast<PossiblyDisjointInst>(I)->setIsDisjoint(true);
} else if (isa<FPMathOperator>(I)) {
FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
if (FMF.any())
I->setFastMathFlags(FMF);
}
-
}
break;
}
@@ -4879,12 +4925,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Value *Op;
unsigned OpTypeID;
if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB) ||
- OpNum+2 != Record.size())
+ OpNum + 1 > Record.size())
return error("Invalid record");
- ResTypeID = Record[OpNum];
+ ResTypeID = Record[OpNum++];
Type *ResTy = getTypeByID(ResTypeID);
- int Opc = getDecodedCastOpcode(Record[OpNum + 1]);
+ int Opc = getDecodedCastOpcode(Record[OpNum++]);
+
if (Opc == -1 || !ResTy)
return error("Invalid record");
Instruction *Temp = nullptr;
@@ -4900,6 +4947,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Invalid cast");
I = CastInst::Create(CastOp, Op, ResTy);
}
+ if (OpNum < Record.size() && isa<PossiblyNonNegInst>(I) &&
+ (Record[OpNum] & (1 << bitc::PNNI_NON_NEG)))
+ I->setNonNeg(true);
InstructionList.push_back(I);
break;
}
@@ -5200,7 +5250,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error(
"Invalid record: operand number exceeded available operands");
- unsigned PredVal = Record[OpNum];
+ CmpInst::Predicate PredVal = CmpInst::Predicate(Record[OpNum]);
bool IsFP = LHS->getType()->isFPOrFPVectorTy();
FastMathFlags FMF;
if (IsFP && Record.size() > OpNum+1)
@@ -5209,10 +5259,15 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (OpNum+1 != Record.size())
return error("Invalid record");
- if (LHS->getType()->isFPOrFPVectorTy())
- I = new FCmpInst((FCmpInst::Predicate)PredVal, LHS, RHS);
- else
- I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS);
+ if (IsFP) {
+ if (!CmpInst::isFPPredicate(PredVal))
+ return error("Invalid fcmp predicate");
+ I = new FCmpInst(PredVal, LHS, RHS);
+ } else {
+ if (!CmpInst::isIntPredicate(PredVal))
+ return error("Invalid icmp predicate");
+ I = new ICmpInst(PredVal, LHS, RHS);
+ }
ResTypeID = getVirtualTypeID(I->getType()->getScalarType());
if (LHS->getType()->isVectorTy())
@@ -5315,6 +5370,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Type *TokenTy = Type::getTokenTy(Context);
Value *ParentPad = getValue(Record, Idx++, NextValueNo, TokenTy,
getVirtualTypeID(TokenTy), CurBB);
+ if (!ParentPad)
+ return error("Invalid record");
unsigned NumHandlers = Record[Idx++];
@@ -5356,6 +5413,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Type *TokenTy = Type::getTokenTy(Context);
Value *ParentPad = getValue(Record, Idx++, NextValueNo, TokenTy,
getVirtualTypeID(TokenTy), CurBB);
+ if (!ParentPad)
+ return error("Invald record");
unsigned NumArgOperands = Record[Idx++];
@@ -5910,6 +5969,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (!Align)
Align = DL.getPrefTypeAlign(Ty);
+ if (!Size->getType()->isIntegerTy())
+ return error("alloca element count must have integer type");
+
AllocaInst *AI = new AllocaInst(Ty, AS, Size, *Align);
AI->setUsedWithInAlloca(InAlloca);
AI->setSwiftError(SwiftError);
@@ -5936,10 +5998,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
} else {
ResTypeID = getContainedTypeID(OpTypeID);
Ty = getTypeByID(ResTypeID);
- if (!Ty)
- return error("Missing element type for old-style load");
}
+ if (!Ty)
+ return error("Missing load type");
+
if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
return Err;
@@ -5974,10 +6037,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
} else {
ResTypeID = getContainedTypeID(OpTypeID);
Ty = getTypeByID(ResTypeID);
- if (!Ty)
- return error("Missing element type for old style atomic load");
}
+ if (!Ty)
+ return error("Missing atomic load type");
+
if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
return Err;
@@ -6370,7 +6434,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
cast<CallInst>(I)->setCallingConv(
static_cast<CallingConv::ID>((0x7ff & CCInfo) >> bitc::CALL_CCONV));
CallInst::TailCallKind TCK = CallInst::TCK_None;
- if (CCInfo & 1 << bitc::CALL_TAIL)
+ if (CCInfo & (1 << bitc::CALL_TAIL))
TCK = CallInst::TCK_Tail;
if (CCInfo & (1 << bitc::CALL_MUSTTAIL))
TCK = CallInst::TCK_MustTail;
@@ -6699,13 +6763,12 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex,
- StringRef ModulePath, unsigned ModuleId,
- std::function<bool(GlobalValue::GUID)> IsPrevailing)
+ StringRef ModulePath, std::function<bool(GlobalValue::GUID)> IsPrevailing)
: BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex),
- ModulePath(ModulePath), ModuleId(ModuleId), IsPrevailing(IsPrevailing) {}
+ ModulePath(ModulePath), IsPrevailing(IsPrevailing) {}
void ModuleSummaryIndexBitcodeReader::addThisModule() {
- TheIndex.addModule(ModulePath, ModuleId);
+ TheIndex.addModule(ModulePath);
}
ModuleSummaryIndex::ModuleInfo *
@@ -6936,7 +6999,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
case bitc::MODULE_CODE_HASH: {
if (Record.size() != 5)
return error("Invalid hash length " + Twine(Record.size()).str());
- auto &Hash = getThisModule()->second.second;
+ auto &Hash = getThisModule()->second;
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
@@ -6999,6 +7062,7 @@ ModuleSummaryIndexBitcodeReader::makeCallList(ArrayRef<uint64_t> Record,
Ret.reserve(Record.size());
for (unsigned I = 0, E = Record.size(); I != E; ++I) {
CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown;
+ bool HasTailCall = false;
uint64_t RelBF = 0;
ValueInfo Callee = std::get<0>(getValueInfoFromValueId(Record[I]));
if (IsOldProfileFormat) {
@@ -7006,10 +7070,12 @@ ModuleSummaryIndexBitcodeReader::makeCallList(ArrayRef<uint64_t> Record,
if (HasProfile)
I += 1; // Skip old profilecount field
} else if (HasProfile)
- Hotness = static_cast<CalleeInfo::HotnessType>(Record[++I]);
+ std::tie(Hotness, HasTailCall) =
+ getDecodedHotnessCallEdgeInfo(Record[++I]);
else if (HasRelBF)
- RelBF = Record[++I];
- Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo(Hotness, RelBF)});
+ getDecodedRelBFCallEdgeInfo(Record[++I], RelBF, HasTailCall);
+ Ret.push_back(FunctionSummary::EdgeTy{
+ Callee, CalleeInfo(Hotness, HasTailCall, RelBF)});
}
return Ret;
}
@@ -7223,14 +7289,15 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
TheIndex.getOrInsertValueInfo(RefGUID), RefGUID, RefGUID);
break;
}
+ // FS_PERMODULE is legacy and does not have support for the tail call flag.
// FS_PERMODULE: [valueid, flags, instcount, fflags, numrefs,
// numrefs x valueid, n x (valueid)]
// FS_PERMODULE_PROFILE: [valueid, flags, instcount, fflags, numrefs,
// numrefs x valueid,
- // n x (valueid, hotness)]
+ // n x (valueid, hotness+tailcall flags)]
// FS_PERMODULE_RELBF: [valueid, flags, instcount, fflags, numrefs,
// numrefs x valueid,
- // n x (valueid, relblockfreq)]
+ // n x (valueid, relblockfreq+tailcall)]
case bitc::FS_PERMODULE:
case bitc::FS_PERMODULE_RELBF:
case bitc::FS_PERMODULE_PROFILE: {
@@ -7377,10 +7444,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
TheIndex.addGlobalValueSummary(std::get<0>(GUID), std::move(VS));
break;
}
+ // FS_COMBINED is legacy and does not have support for the tail call flag.
// FS_COMBINED: [valueid, modid, flags, instcount, fflags, numrefs,
// numrefs x valueid, n x (valueid)]
// FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, fflags, numrefs,
- // numrefs x valueid, n x (valueid, hotness)]
+ // numrefs x valueid,
+ // n x (valueid, hotness+tailcall flags)]
case bitc::FS_COMBINED:
case bitc::FS_COMBINED_PROFILE: {
unsigned ValueID = Record[0];
@@ -7697,7 +7766,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
if (convertToString(Record, 1, ModulePath))
return error("Invalid record");
- LastSeenModule = TheIndex.addModule(ModulePath, ModuleId);
+ LastSeenModule = TheIndex.addModule(ModulePath);
ModuleIdMap[ModuleId] = LastSeenModule->first();
ModulePath.clear();
@@ -7712,7 +7781,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
- LastSeenModule->second.second[Pos++] = Val;
+ LastSeenModule->second[Pos++] = Val;
}
// Reset LastSeenModule to avoid overriding the hash unexpectedly.
LastSeenModule = nullptr;
@@ -7970,14 +8039,14 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
// module path used in the combined summary (e.g. when reading summaries for
// regular LTO modules).
Error BitcodeModule::readSummary(
- ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, uint64_t ModuleId,
+ ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
std::function<bool(GlobalValue::GUID)> IsPrevailing) {
BitstreamCursor Stream(Buffer);
if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
return JumpFailed;
ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex,
- ModulePath, ModuleId, IsPrevailing);
+ ModulePath, IsPrevailing);
return R.parseModule();
}
@@ -8183,13 +8252,12 @@ Expected<std::string> llvm::getBitcodeProducerString(MemoryBufferRef Buffer) {
}
Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer,
- ModuleSummaryIndex &CombinedIndex,
- uint64_t ModuleId) {
+ ModuleSummaryIndex &CombinedIndex) {
Expected<BitcodeModule> BM = getSingleModule(Buffer);
if (!BM)
return BM.takeError();
- return BM->readSummary(CombinedIndex, BM->getModuleIdentifier(), ModuleId);
+ return BM->readSummary(CombinedIndex, BM->getModuleIdentifier());
}
Expected<std::unique_ptr<ModuleSummaryIndex>>
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 0a9a80688a41..910e97489dbb 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -22,7 +22,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist_iterator.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
@@ -555,12 +554,12 @@ class MetadataLoader::MetadataLoaderImpl {
if (!CU)
continue;
- if (auto *RawImported = CU->getRawImportedEntities()) {
+ if (CU->getRawImportedEntities()) {
// Collect a set of imported entities to be moved.
SetVector<Metadata *> EntitiesToRemove;
for (Metadata *Op : CU->getImportedEntities()->operands()) {
auto *IE = cast<DIImportedEntity>(Op);
- if (auto *S = dyn_cast_or_null<DILocalScope>(IE->getScope())) {
+ if (dyn_cast_or_null<DILocalScope>(IE->getScope())) {
EntitiesToRemove.insert(IE);
}
}
@@ -705,10 +704,11 @@ class MetadataLoader::MetadataLoaderImpl {
return Error::success();
}
- void upgradeDebugInfo() {
+ void upgradeDebugInfo(bool ModuleLevel) {
upgradeCUSubprograms();
upgradeCUVariables();
- upgradeCULocals();
+ if (ModuleLevel)
+ upgradeCULocals();
}
void callMDTypeCallback(Metadata **Val, unsigned TypeID);
@@ -1085,7 +1085,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// Reading the named metadata created forward references and/or
// placeholders, that we flush here.
resolveForwardRefsAndPlaceholders(Placeholders);
- upgradeDebugInfo();
+ upgradeDebugInfo(ModuleLevel);
// Return at the beginning of the block, since it is easy to skip it
// entirely from there.
Stream.ReadBlockEnd(); // Pop the abbrev block context.
@@ -1116,7 +1116,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
return error("Malformed block");
case BitstreamEntry::EndBlock:
resolveForwardRefsAndPlaceholders(Placeholders);
- upgradeDebugInfo();
+ upgradeDebugInfo(ModuleLevel);
return Error::success();
case BitstreamEntry::Record:
// The interesting case.
@@ -1213,6 +1213,26 @@ void MetadataLoader::MetadataLoaderImpl::resolveForwardRefsAndPlaceholders(
Placeholders.flush(MetadataList);
}
+static Value *getValueFwdRef(BitcodeReaderValueList &ValueList, unsigned Idx,
+ Type *Ty, unsigned TyID) {
+ Value *V = ValueList.getValueFwdRef(Idx, Ty, TyID,
+ /*ConstExprInsertBB*/ nullptr);
+ if (V)
+ return V;
+
+ // This is a reference to a no longer supported constant expression.
+ // Pretend that the constant was deleted, which will replace metadata
+ // references with undef.
+ // TODO: This is a rather indirect check. It would be more elegant to use
+ // a separate ErrorInfo for constant materialization failure and thread
+ // the error reporting through getValueFwdRef().
+ if (Idx < ValueList.size() && ValueList[Idx] &&
+ ValueList[Idx]->getType() == Ty)
+ return UndefValue::get(Ty);
+
+ return nullptr;
+}
+
Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
SmallVectorImpl<uint64_t> &Record, unsigned Code,
PlaceholderQueue &Placeholders, StringRef Blob, unsigned &NextMetadataNo) {
@@ -1315,7 +1335,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
unsigned TyID = Record[0];
Type *Ty = Callbacks.GetTypeByID(TyID);
- if (Ty->isMetadataTy() || Ty->isVoidTy()) {
+ if (!Ty || Ty->isMetadataTy() || Ty->isVoidTy()) {
dropRecord();
break;
}
@@ -1344,8 +1364,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Ty->isMetadataTy())
Elts.push_back(getMD(Record[i + 1]));
else if (!Ty->isVoidTy()) {
- Value *V = ValueList.getValueFwdRef(Record[i + 1], Ty, TyID,
- /*ConstExprInsertBB*/ nullptr);
+ Value *V = getValueFwdRef(ValueList, Record[i + 1], Ty, TyID);
if (!V)
return error("Invalid value reference from old metadata");
Metadata *MD = ValueAsMetadata::get(V);
@@ -1366,11 +1385,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
unsigned TyID = Record[0];
Type *Ty = Callbacks.GetTypeByID(TyID);
- if (Ty->isMetadataTy() || Ty->isVoidTy())
+ if (!Ty || Ty->isMetadataTy() || Ty->isVoidTy())
return error("Invalid record");
- Value *V = ValueList.getValueFwdRef(Record[1], Ty, TyID,
- /*ConstExprInsertBB*/ nullptr);
+ Value *V = getValueFwdRef(ValueList, Record[1], Ty, TyID);
if (!V)
return error("Invalid value reference from metadata");
@@ -1615,7 +1633,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// DICompositeType flag specifying whether template parameters are
// required on declarations of this type.
StringRef NameStr = Name->getString();
- if (!NameStr.contains('<') || NameStr.startswith("_STN|"))
+ if (!NameStr.contains('<') || NameStr.starts_with("_STN|"))
TemplateParams = getMDOrNull(Record[14]);
}
} else {
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.h b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.h
index fbee7e49f8df..bab855ca6359 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.h
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.h
@@ -19,6 +19,7 @@
#include <memory>
namespace llvm {
+class BasicBlock;
class BitcodeReaderValueList;
class BitstreamCursor;
class DISubprogram;
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp
index b9dbf904c89e..f5568a923b11 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "ValueList.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9416c7f5a03e..8fca569a391b 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -123,6 +123,7 @@ enum {
FUNCTION_INST_BINOP_ABBREV,
FUNCTION_INST_BINOP_FLAGS_ABBREV,
FUNCTION_INST_CAST_ABBREV,
+ FUNCTION_INST_CAST_FLAGS_ABBREV,
FUNCTION_INST_RET_VOID_ABBREV,
FUNCTION_INST_RET_VAL_ABBREV,
FUNCTION_INST_UNREACHABLE_ABBREV,
@@ -335,8 +336,7 @@ private:
unsigned Abbrev);
void writeDIMacroFile(const DIMacroFile *N, SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev);
- void writeDIArgList(const DIArgList *N, SmallVectorImpl<uint64_t> &Record,
- unsigned Abbrev);
+ void writeDIArgList(const DIArgList *N, SmallVectorImpl<uint64_t> &Record);
void writeDIModule(const DIModule *N, SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev);
void writeDIAssignID(const DIAssignID *N, SmallVectorImpl<uint64_t> &Record,
@@ -431,6 +431,10 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
/// Tracks the last value id recorded in the GUIDToValueMap.
unsigned GlobalValueId = 0;
+ /// Tracks the assignment of module paths in the module path string table to
+ /// an id assigned for use in summary references to the module path.
+ DenseMap<StringRef, uint64_t> ModuleIdMap;
+
public:
/// Constructs a IndexBitcodeWriter object for the given combined index,
/// writing to the provided \p Buffer. When writing a subset of the index
@@ -512,8 +516,16 @@ public:
Callback(*MPI);
}
} else {
- for (const auto &MPSE : Index.modulePaths())
- Callback(MPSE);
+ // Since StringMap iteration order isn't guaranteed, order by path string
+ // first.
+ // FIXME: Make this a vector of StringMapEntry instead to avoid the later
+ // map lookup.
+ std::vector<StringRef> ModulePaths;
+ for (auto &[ModPath, _] : Index.modulePaths())
+ ModulePaths.push_back(ModPath);
+ llvm::sort(ModulePaths.begin(), ModulePaths.end());
+ for (auto &ModPath : ModulePaths)
+ Callback(*Index.modulePaths().find(ModPath));
}
}
@@ -735,6 +747,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NO_SANITIZE_COVERAGE;
case Attribute::NullPointerIsValid:
return bitc::ATTR_KIND_NULL_POINTER_IS_VALID;
+ case Attribute::OptimizeForDebugging:
+ return bitc::ATTR_KIND_OPTIMIZE_FOR_DEBUGGING;
case Attribute::OptForFuzzing:
return bitc::ATTR_KIND_OPT_FOR_FUZZING;
case Attribute::OptimizeForSize:
@@ -809,6 +823,12 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_MUSTPROGRESS;
case Attribute::PresplitCoroutine:
return bitc::ATTR_KIND_PRESPLIT_COROUTINE;
+ case Attribute::Writable:
+ return bitc::ATTR_KIND_WRITABLE;
+ case Attribute::CoroDestroyOnlyWhenComplete:
+ return bitc::ATTR_KIND_CORO_ONLY_DESTROY_WHEN_COMPLETE;
+ case Attribute::DeadOnUnwind:
+ return bitc::ATTR_KIND_DEAD_ON_UNWIND;
case Attribute::EndAttrKinds:
llvm_unreachable("Can not encode end-attribute kinds marker.");
case Attribute::None:
@@ -1139,6 +1159,24 @@ static uint64_t getEncodedGVarFlags(GlobalVarSummary::GVarFlags Flags) {
return RawFlags;
}
+static uint64_t getEncodedHotnessCallEdgeInfo(const CalleeInfo &CI) {
+ uint64_t RawFlags = 0;
+
+ RawFlags |= CI.Hotness; // 3 bits
+ RawFlags |= (CI.HasTailCall << 3); // 1 bit
+
+ return RawFlags;
+}
+
+static uint64_t getEncodedRelBFCallEdgeInfo(const CalleeInfo &CI) {
+ uint64_t RawFlags = 0;
+
+ RawFlags |= CI.RelBlockFreq; // CalleeInfo::RelBlockFreqBits bits
+ RawFlags |= (CI.HasTailCall << CalleeInfo::RelBlockFreqBits); // 1 bit
+
+ return RawFlags;
+}
+
static unsigned getEncodedVisibility(const GlobalValue &GV) {
switch (GV.getVisibility()) {
case GlobalValue::DefaultVisibility: return 0;
@@ -1385,7 +1423,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// GLOBALVAR: [strtab offset, strtab size, type, isconst, initid,
// linkage, alignment, section, visibility, threadlocal,
// unnamed_addr, externally_initialized, dllstorageclass,
- // comdat, attributes, DSO_Local, GlobalSanitizer]
+ // comdat, attributes, DSO_Local, GlobalSanitizer, code_model]
Vals.push_back(addToStrtab(GV.getName()));
Vals.push_back(GV.getName().size());
Vals.push_back(VE.getTypeID(GV.getValueType()));
@@ -1402,7 +1440,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
GV.isExternallyInitialized() ||
GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
GV.hasComdat() || GV.hasAttributes() || GV.isDSOLocal() ||
- GV.hasPartition() || GV.hasSanitizerMetadata()) {
+ GV.hasPartition() || GV.hasSanitizerMetadata() || GV.getCodeModel()) {
Vals.push_back(getEncodedVisibility(GV));
Vals.push_back(getEncodedThreadLocalMode(GV));
Vals.push_back(getEncodedUnnamedAddr(GV));
@@ -1420,6 +1458,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back((GV.hasSanitizerMetadata() ? serializeSanitizerMetadata(
GV.getSanitizerMetadata())
: 0));
+ Vals.push_back(GV.getCodeModelRaw());
} else {
AbbrevToUse = SimpleGVarAbbrev;
}
@@ -1522,6 +1561,9 @@ static uint64_t getOptimizationFlags(const Value *V) {
} else if (const auto *PEO = dyn_cast<PossiblyExactOperator>(V)) {
if (PEO->isExact())
Flags |= 1 << bitc::PEO_EXACT;
+ } else if (const auto *PDI = dyn_cast<PossiblyDisjointInst>(V)) {
+ if (PDI->isDisjoint())
+ Flags |= 1 << bitc::PDI_DISJOINT;
} else if (const auto *FPMO = dyn_cast<FPMathOperator>(V)) {
if (FPMO->hasAllowReassoc())
Flags |= bitc::AllowReassoc;
@@ -1537,6 +1579,9 @@ static uint64_t getOptimizationFlags(const Value *V) {
Flags |= bitc::AllowContract;
if (FPMO->hasApproxFunc())
Flags |= bitc::ApproxFunc;
+ } else if (const auto *NNI = dyn_cast<PossiblyNonNegInst>(V)) {
+ if (NNI->hasNonNeg())
+ Flags |= 1 << bitc::PNNI_NON_NEG;
}
return Flags;
@@ -1953,13 +1998,12 @@ void ModuleBitcodeWriter::writeDIMacroFile(const DIMacroFile *N,
}
void ModuleBitcodeWriter::writeDIArgList(const DIArgList *N,
- SmallVectorImpl<uint64_t> &Record,
- unsigned Abbrev) {
+ SmallVectorImpl<uint64_t> &Record) {
Record.reserve(N->getArgs().size());
for (ValueAsMetadata *MD : N->getArgs())
Record.push_back(VE.getMetadataID(MD));
- Stream.EmitRecord(bitc::METADATA_ARG_LIST, Record, Abbrev);
+ Stream.EmitRecord(bitc::METADATA_ARG_LIST, Record);
Record.clear();
}
@@ -2242,6 +2286,10 @@ void ModuleBitcodeWriter::writeMetadataRecords(
#include "llvm/IR/Metadata.def"
}
}
+ if (auto *AL = dyn_cast<DIArgList>(MD)) {
+ writeDIArgList(AL, Record);
+ continue;
+ }
writeValueAsMetadata(cast<ValueAsMetadata>(MD), Record);
}
}
@@ -2813,6 +2861,12 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
AbbrevToUse = FUNCTION_INST_CAST_ABBREV;
Vals.push_back(VE.getTypeID(I.getType()));
Vals.push_back(getEncodedCastOpcode(I.getOpcode()));
+ uint64_t Flags = getOptimizationFlags(&I);
+ if (Flags != 0) {
+ if (AbbrevToUse == FUNCTION_INST_CAST_ABBREV)
+ AbbrevToUse = FUNCTION_INST_CAST_FLAGS_ABBREV;
+ Vals.push_back(Flags);
+ }
} else {
assert(isa<BinaryOperator>(I) && "Unknown instruction!");
Code = bitc::FUNC_CODE_INST_BINOP;
@@ -3634,6 +3688,18 @@ void ModuleBitcodeWriter::writeBlockInfo() {
FUNCTION_INST_CAST_ABBREV)
llvm_unreachable("Unexpected abbrev ordering!");
}
+ { // INST_CAST_FLAGS abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
+ VE.computeBitsRequiredForTypeIndicies()));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // flags
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
+ FUNCTION_INST_CAST_FLAGS_ABBREV)
+ llvm_unreachable("Unexpected abbrev ordering!");
+ }
{ // INST_RET abbrev for FUNCTION_BLOCK.
auto Abbv = std::make_shared<BitCodeAbbrev>();
@@ -3715,33 +3781,33 @@ void IndexBitcodeWriter::writeModStrings() {
unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<unsigned, 64> Vals;
- forEachModule(
- [&](const StringMapEntry<std::pair<uint64_t, ModuleHash>> &MPSE) {
- StringRef Key = MPSE.getKey();
- const auto &Value = MPSE.getValue();
- StringEncoding Bits = getStringEncoding(Key);
- unsigned AbbrevToUse = Abbrev8Bit;
- if (Bits == SE_Char6)
- AbbrevToUse = Abbrev6Bit;
- else if (Bits == SE_Fixed7)
- AbbrevToUse = Abbrev7Bit;
-
- Vals.push_back(Value.first);
- Vals.append(Key.begin(), Key.end());
-
- // Emit the finished record.
- Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
-
- // Emit an optional hash for the module now
- const auto &Hash = Value.second;
- if (llvm::any_of(Hash, [](uint32_t H) { return H; })) {
- Vals.assign(Hash.begin(), Hash.end());
- // Emit the hash record.
- Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
- }
+ forEachModule([&](const StringMapEntry<ModuleHash> &MPSE) {
+ StringRef Key = MPSE.getKey();
+ const auto &Hash = MPSE.getValue();
+ StringEncoding Bits = getStringEncoding(Key);
+ unsigned AbbrevToUse = Abbrev8Bit;
+ if (Bits == SE_Char6)
+ AbbrevToUse = Abbrev6Bit;
+ else if (Bits == SE_Fixed7)
+ AbbrevToUse = Abbrev7Bit;
- Vals.clear();
- });
+ auto ModuleId = ModuleIdMap.size();
+ ModuleIdMap[Key] = ModuleId;
+ Vals.push_back(ModuleId);
+ Vals.append(Key.begin(), Key.end());
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
+
+ // Emit an optional hash for the module now
+ if (llvm::any_of(Hash, [](uint32_t H) { return H; })) {
+ Vals.assign(Hash.begin(), Hash.end());
+ // Emit the hash record.
+ Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
+ }
+
+ Vals.clear();
+ });
Stream.ExitBlock();
}
@@ -3963,8 +4029,9 @@ static void writeFunctionHeapProfileRecords(
// Helper to emit a single function summary record.
void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
- unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
- unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F) {
+ unsigned ValueID, unsigned FSCallsRelBFAbbrev,
+ unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
+ unsigned AllocAbbrev, const Function &F) {
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -3991,21 +4058,21 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
for (auto &RI : FS->refs())
NameVals.push_back(VE.getValueID(RI.getValue()));
- bool HasProfileData =
- F.hasProfileData() || ForceSummaryEdgesCold != FunctionSummary::FSHT_None;
+ const bool UseRelBFRecord =
+ WriteRelBFToSummary && !F.hasProfileData() &&
+ ForceSummaryEdgesCold == FunctionSummary::FSHT_None;
for (auto &ECI : FS->calls()) {
NameVals.push_back(getValueId(ECI.first));
- if (HasProfileData)
- NameVals.push_back(static_cast<uint8_t>(ECI.second.Hotness));
- else if (WriteRelBFToSummary)
- NameVals.push_back(ECI.second.RelBlockFreq);
+ if (UseRelBFRecord)
+ NameVals.push_back(getEncodedRelBFCallEdgeInfo(ECI.second));
+ else
+ NameVals.push_back(getEncodedHotnessCallEdgeInfo(ECI.second));
}
- unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
+ unsigned FSAbbrev =
+ (UseRelBFRecord ? FSCallsRelBFAbbrev : FSCallsProfileAbbrev);
unsigned Code =
- (HasProfileData ? bitc::FS_PERMODULE_PROFILE
- : (WriteRelBFToSummary ? bitc::FS_PERMODULE_RELBF
- : bitc::FS_PERMODULE));
+ (UseRelBFRecord ? bitc::FS_PERMODULE_RELBF : bitc::FS_PERMODULE_PROFILE);
// Emit the finished record.
Stream.EmitRecord(Code, NameVals, FSAbbrev);
@@ -4114,17 +4181,14 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
- // numrefs x valueid, n x (valueid, hotness)
+ // numrefs x valueid, n x (valueid, hotness+tailcall flags)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
- // Abbrev for FS_PERMODULE or FS_PERMODULE_RELBF.
+ // Abbrev for FS_PERMODULE_RELBF.
Abbv = std::make_shared<BitCodeAbbrev>();
- if (WriteRelBFToSummary)
- Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_RELBF));
- else
- Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_RELBF));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
@@ -4132,10 +4196,10 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
- // numrefs x valueid, n x (valueid [, rel_block_freq])
+ // numrefs x valueid, n x (valueid, rel_block_freq+tailcall])
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ unsigned FSCallsRelBFAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS.
Abbv = std::make_shared<BitCodeAbbrev>();
@@ -4207,9 +4271,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
continue;
}
auto *Summary = VI.getSummaryList()[0].get();
- writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
- FSCallsAbbrev, FSCallsProfileAbbrev,
- CallsiteAbbrev, AllocAbbrev, F);
+ writePerModuleFunctionSummaryRecord(
+ NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev,
+ FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, F);
}
// Capture references from GlobalVariable initializers, which are outside
@@ -4280,25 +4344,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.EmitRecord(bitc::FS_STACK_IDS, StackIds, StackIdAbbvId);
}
- // Abbrev for FS_COMBINED.
- auto Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
- // numrefs x valueid, n x (valueid)
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
-
// Abbrev for FS_COMBINED_PROFILE.
- Abbv = std::make_shared<BitCodeAbbrev>();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_PROFILE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
@@ -4309,7 +4356,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
- // numrefs x valueid, n x (valueid, hotness)
+ // numrefs x valueid, n x (valueid, hotness+tailcall flags)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
@@ -4410,7 +4457,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
if (auto *VS = dyn_cast<GlobalVarSummary>(S)) {
NameVals.push_back(*ValueId);
- NameVals.push_back(Index.getModuleId(VS->modulePath()));
+ assert(ModuleIdMap.count(VS->modulePath()));
+ NameVals.push_back(ModuleIdMap[VS->modulePath()]);
NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
NameVals.push_back(getEncodedGVarFlags(VS->varflags()));
for (auto &RI : VS->refs()) {
@@ -4460,7 +4508,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
});
NameVals.push_back(*ValueId);
- NameVals.push_back(Index.getModuleId(FS->modulePath()));
+ assert(ModuleIdMap.count(FS->modulePath()));
+ NameVals.push_back(ModuleIdMap[FS->modulePath()]);
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
NameVals.push_back(getEncodedFFlags(FS->fflags()));
@@ -4487,14 +4536,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals[7] = RORefCnt;
NameVals[8] = WORefCnt;
- bool HasProfileData = false;
- for (auto &EI : FS->calls()) {
- HasProfileData |=
- EI.second.getHotness() != CalleeInfo::HotnessType::Unknown;
- if (HasProfileData)
- break;
- }
-
for (auto &EI : FS->calls()) {
// If this GUID doesn't have a value id, it doesn't have a function
// summary and we don't need to record any calls to it.
@@ -4502,16 +4543,12 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
if (!CallValueId)
continue;
NameVals.push_back(*CallValueId);
- if (HasProfileData)
- NameVals.push_back(static_cast<uint8_t>(EI.second.Hotness));
+ NameVals.push_back(getEncodedHotnessCallEdgeInfo(EI.second));
}
- unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
- unsigned Code =
- (HasProfileData ? bitc::FS_COMBINED_PROFILE : bitc::FS_COMBINED);
-
// Emit the finished record.
- Stream.EmitRecord(Code, NameVals, FSAbbrev);
+ Stream.EmitRecord(bitc::FS_COMBINED_PROFILE, NameVals,
+ FSCallsProfileAbbrev);
NameVals.clear();
MaybeEmitOriginalName(*S);
});
@@ -4520,7 +4557,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
auto AliasValueId = SummaryToValueIdMap[AS];
assert(AliasValueId);
NameVals.push_back(AliasValueId);
- NameVals.push_back(Index.getModuleId(AS->modulePath()));
+ assert(ModuleIdMap.count(AS->modulePath()));
+ NameVals.push_back(ModuleIdMap[AS->modulePath()]);
NameVals.push_back(getEncodedGVSummaryFlags(AS->flags()));
auto AliaseeValueId = SummaryToValueIdMap[&AS->getAliasee()];
assert(AliaseeValueId);
@@ -5137,7 +5175,7 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
// Save llvm.compiler.used and remove it.
SmallVector<Constant *, 2> UsedArray;
SmallVector<GlobalValue *, 4> UsedGlobals;
- Type *UsedElementType = Type::getInt8Ty(M.getContext())->getPointerTo(0);
+ Type *UsedElementType = PointerType::getUnqual(M.getContext());
GlobalVariable *Used = collectUsedGlobalVariables(M, UsedGlobals, true);
for (auto *GV : UsedGlobals) {
if (GV->getName() != "llvm.embedded.module" &&
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 536d04f2fe26..28941d6c41cf 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -19,10 +19,20 @@
using namespace llvm;
PreservedAnalyses BitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
+ // RemoveDIs: there's no bitcode representation of the DPValue debug-info,
+ // convert to dbg.values before writing out.
+ bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
+ if (IsNewDbgInfoFormat)
+ M.convertFromNewDbgValues();
+
const ModuleSummaryIndex *Index =
EmitSummaryIndex ? &(AM.getResult<ModuleSummaryIndexAnalysis>(M))
: nullptr;
WriteBitcodeToFile(M, OS, ShouldPreserveUseListOrder, Index, EmitModuleHash);
+
+ if (IsNewDbgInfoFormat)
+ M.convertToNewDbgValues();
+
return PreservedAnalyses::all();
}
@@ -54,8 +64,17 @@ namespace {
EmitSummaryIndex
? &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex())
: nullptr;
+ // RemoveDIs: there's no bitcode representation of the DPValue debug-info,
+ // convert to dbg.values before writing out.
+ bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
+ if (IsNewDbgInfoFormat)
+ M.convertFromNewDbgValues();
+
WriteBitcodeToFile(M, OS, ShouldPreserveUseListOrder, Index,
EmitModuleHash);
+
+ if (IsNewDbgInfoFormat)
+ M.convertToNewDbgValues();
return false;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 886c4db069f1..c5367221cae7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -533,9 +533,8 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
}
bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
- unsigned AntiDepGroupIndex,
- RenameOrderType& RenameOrder,
- std::map<unsigned, unsigned> &RenameMap) {
+ unsigned SuperReg, unsigned AntiDepGroupIndex, RenameOrderType &RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap) {
std::vector<unsigned> &KillIndices = State->GetKillIndices();
std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
@@ -550,17 +549,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
if (Regs.empty())
return false;
- // Find the "superest" register in the group. At the same time,
- // collect the BitVector of registers that can be used to rename
+ // Collect the BitVector of registers that can be used to rename
// each register.
LLVM_DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
<< ":\n");
std::map<unsigned, BitVector> RenameRegisterMap;
- unsigned SuperReg = 0;
for (unsigned Reg : Regs) {
- if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
- SuperReg = Reg;
-
// If Reg has any references, then collect possible rename regs
if (RegRefs.count(Reg) > 0) {
LLVM_DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":");
@@ -892,30 +886,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
}
}
- if (AntiDepReg == 0) continue;
-
- // If the definition of the anti-dependency register does not start
- // a new live range, bail out. This can happen if the anti-dep
- // register is a sub-register of another register whose live range
- // spans over PathSU. In such case, PathSU defines only a part of
- // the larger register.
- RegAliases.reset();
- for (MCRegAliasIterator AI(AntiDepReg, TRI, true); AI.isValid(); ++AI)
- RegAliases.set(*AI);
- for (SDep S : PathSU->Succs) {
- SDep::Kind K = S.getKind();
- if (K != SDep::Data && K != SDep::Output && K != SDep::Anti)
- continue;
- unsigned R = S.getReg();
- if (!RegAliases[R])
- continue;
- if (R == AntiDepReg || TRI->isSubRegister(AntiDepReg, R))
- continue;
- AntiDepReg = 0;
- break;
- }
-
- if (AntiDepReg == 0) continue;
+ if (AntiDepReg == 0)
+ continue;
}
assert(AntiDepReg != 0);
@@ -931,7 +903,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Look for a suitable register to use to break the anti-dependence.
std::map<unsigned, unsigned> RenameMap;
- if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
+ if (FindSuitableFreeRegisters(AntiDepReg, GroupIndex, RenameOrder,
+ RenameMap)) {
LLVM_DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
<< printReg(AntiDepReg, TRI) << ":");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
index cece217e645c..06c4c6957ba0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -176,8 +176,9 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
std::set<unsigned> &PassthruRegs);
void ScanInstruction(MachineInstr &MI, unsigned Count);
BitVector GetRenameRegisters(unsigned Reg);
- bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
- RenameOrderType& RenameOrder,
+ bool FindSuitableFreeRegisters(unsigned SuperReg,
+ unsigned AntiDepGroupIndex,
+ RenameOrderType &RenameOrder,
std::map<unsigned, unsigned> &RenameMap);
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index 2065bfbd1c44..1994e6aec84b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -140,15 +140,14 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
SmallVectorImpl<uint64_t> *FixedOffsets,
uint64_t StartingOffset) {
TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
- SmallVector<TypeSize, 4> Offsets;
- if (FixedOffsets)
+ if (FixedOffsets) {
+ SmallVector<TypeSize, 4> Offsets;
ComputeValueVTs(TLI, DL, Ty, ValueVTs, &Offsets, Offset);
- else
- ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offset);
-
- if (FixedOffsets)
for (TypeSize Offset : Offsets)
- FixedOffsets->push_back(Offset.getKnownMinValue());
+ FixedOffsets->push_back(Offset.getFixedValue());
+ } else {
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offset);
+ }
}
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
@@ -166,15 +165,14 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
SmallVectorImpl<uint64_t> *FixedOffsets,
uint64_t StartingOffset) {
TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
- SmallVector<TypeSize, 4> Offsets;
- if (FixedOffsets)
+ if (FixedOffsets) {
+ SmallVector<TypeSize, 4> Offsets;
ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, &Offsets, Offset);
- else
- ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, nullptr, Offset);
-
- if (FixedOffsets)
for (TypeSize Offset : Offsets)
- FixedOffsets->push_back(Offset.getKnownMinValue());
+ FixedOffsets->push_back(Offset.getFixedValue());
+ } else {
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, nullptr, Offset);
+ }
}
void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index aab3c2681339..d6f487c18b03 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -12,8 +12,8 @@
#include "llvm/CodeGen/AccelTable.h"
#include "DwarfCompileUnit.h"
+#include "DwarfUnit.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -200,32 +200,35 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
uint32_t AugmentationStringSize = sizeof(AugmentationString);
char AugmentationString[8] = {'L', 'L', 'V', 'M', '0', '7', '0', '0'};
- Header(uint32_t CompUnitCount, uint32_t BucketCount, uint32_t NameCount)
- : CompUnitCount(CompUnitCount), BucketCount(BucketCount),
+ Header(uint32_t CompUnitCount, uint32_t LocalTypeUnitCount,
+ uint32_t ForeignTypeUnitCount, uint32_t BucketCount,
+ uint32_t NameCount)
+ : CompUnitCount(CompUnitCount), LocalTypeUnitCount(LocalTypeUnitCount),
+ ForeignTypeUnitCount(ForeignTypeUnitCount), BucketCount(BucketCount),
NameCount(NameCount) {}
void emit(Dwarf5AccelTableWriter &Ctx);
};
- struct AttributeEncoding {
- dwarf::Index Index;
- dwarf::Form Form;
- };
Header Header;
- DenseMap<uint32_t, SmallVector<AttributeEncoding, 2>> Abbreviations;
- ArrayRef<MCSymbol *> CompUnits;
- llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry;
+ DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 2>>
+ Abbreviations;
+ ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
+ ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
+ llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
+ const DataT &)>
+ getIndexForEntry;
MCSymbol *ContributionEnd = nullptr;
MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start");
MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end");
MCSymbol *EntryPool = Asm->createTempSymbol("names_entries");
+ // Indicates if this module is built with Split Dwarf enabled.
+ bool IsSplitDwarf = false;
- DenseSet<uint32_t> getUniqueTags() const;
-
- // Right now, we emit uniform attributes for all tags.
- SmallVector<AttributeEncoding, 2> getUniformAttributes() const;
+ void populateAbbrevsMap();
void emitCUList() const;
+ void emitTUList() const;
void emitBuckets() const;
void emitStringOffsets() const;
void emitAbbrevs() const;
@@ -235,8 +238,12 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
public:
Dwarf5AccelTableWriter(
AsmPrinter *Asm, const AccelTableBase &Contents,
- ArrayRef<MCSymbol *> CompUnits,
- llvm::function_ref<unsigned(const DataT &)> GetCUIndexForEntry);
+ ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
+ ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
+ llvm::function_ref<
+ std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(const DataT &)>
+ getIndexForEntry,
+ bool IsSplitDwarf);
void emit();
};
@@ -358,6 +365,11 @@ void AppleAccelTableWriter::emit() const {
emitData();
}
+DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die,
+ const uint32_t UnitID,
+ const bool IsTU)
+ : OffsetVal(&Die), DieTag(Die.getTag()), UnitID(UnitID), IsTU(IsTU) {}
+
template <typename DataT>
void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
assert(CompUnitCount > 0 && "Index must have at least one CU.");
@@ -388,38 +400,65 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize});
}
+static uint32_t constexpr LowerBitSize = dwarf::DW_IDX_type_hash;
+static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) {
+ return AbbrvTag >> LowerBitSize;
+}
+
+/// Constructs a unique AbbrevTag that captures what a DIE accesses.
+/// Using this tag we can emit a unique abbreviation for each DIE.
+static uint32_t constructAbbreviationTag(
+ const unsigned Tag,
+ const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet) {
+ uint32_t AbbrvTag = 0;
+ if (EntryRet)
+ AbbrvTag |= 1 << EntryRet->Endoding.Index;
+ AbbrvTag |= 1 << dwarf::DW_IDX_die_offset;
+ AbbrvTag |= Tag << LowerBitSize;
+ return AbbrvTag;
+}
template <typename DataT>
-DenseSet<uint32_t> Dwarf5AccelTableWriter<DataT>::getUniqueTags() const {
- DenseSet<uint32_t> UniqueTags;
+void Dwarf5AccelTableWriter<DataT>::populateAbbrevsMap() {
for (auto &Bucket : Contents.getBuckets()) {
for (auto *Hash : Bucket) {
for (auto *Value : Hash->Values) {
+ std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
+ getIndexForEntry(*static_cast<const DataT *>(Value));
unsigned Tag = static_cast<const DataT *>(Value)->getDieTag();
- UniqueTags.insert(Tag);
+ uint32_t AbbrvTag = constructAbbreviationTag(Tag, EntryRet);
+ if (Abbreviations.count(AbbrvTag) == 0) {
+ SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA;
+ if (EntryRet)
+ UA.push_back(EntryRet->Endoding);
+ UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+ Abbreviations.try_emplace(AbbrvTag, UA);
+ }
}
}
}
- return UniqueTags;
}
template <typename DataT>
-SmallVector<typename Dwarf5AccelTableWriter<DataT>::AttributeEncoding, 2>
-Dwarf5AccelTableWriter<DataT>::getUniformAttributes() const {
- SmallVector<AttributeEncoding, 2> UA;
- if (CompUnits.size() > 1) {
- size_t LargestCUIndex = CompUnits.size() - 1;
- dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false, LargestCUIndex);
- UA.push_back({dwarf::DW_IDX_compile_unit, Form});
+void Dwarf5AccelTableWriter<DataT>::emitCUList() const {
+ for (const auto &CU : enumerate(CompUnits)) {
+ Asm->OutStreamer->AddComment("Compilation unit " + Twine(CU.index()));
+ if (std::holds_alternative<MCSymbol *>(CU.value()))
+ Asm->emitDwarfSymbolReference(std::get<MCSymbol *>(CU.value()));
+ else
+ Asm->emitDwarfLengthOrOffset(std::get<uint64_t>(CU.value()));
}
- UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
- return UA;
}
template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::emitCUList() const {
- for (const auto &CU : enumerate(CompUnits)) {
- Asm->OutStreamer->AddComment("Compilation unit " + Twine(CU.index()));
- Asm->emitDwarfSymbolReference(CU.value());
+void Dwarf5AccelTableWriter<DataT>::emitTUList() const {
+ for (const auto &TU : enumerate(TypeUnits)) {
+ Asm->OutStreamer->AddComment("Type unit " + Twine(TU.index()));
+ if (std::holds_alternative<MCSymbol *>(TU.value()))
+ Asm->emitDwarfSymbolReference(std::get<MCSymbol *>(TU.value()));
+ else if (IsSplitDwarf)
+ Asm->emitInt64(std::get<uint64_t>(TU.value()));
+ else
+ Asm->emitDwarfLengthOrOffset(std::get<uint64_t>(TU.value()));
}
}
@@ -450,10 +489,11 @@ void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
Asm->OutStreamer->emitLabel(AbbrevStart);
for (const auto &Abbrev : Abbreviations) {
Asm->OutStreamer->AddComment("Abbrev code");
- assert(Abbrev.first != 0);
- Asm->emitULEB128(Abbrev.first);
- Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first));
+ uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first);
+ assert(Tag != 0);
Asm->emitULEB128(Abbrev.first);
+ Asm->OutStreamer->AddComment(dwarf::TagString(Tag));
+ Asm->emitULEB128(Tag);
for (const auto &AttrEnc : Abbrev.second) {
Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
Asm->emitULEB128(AttrEnc.Form,
@@ -468,16 +508,22 @@ void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
template <typename DataT>
void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const {
- auto AbbrevIt = Abbreviations.find(Entry.getDieTag());
+ std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
+ getIndexForEntry(Entry);
+ uint32_t AbbrvTag = constructAbbreviationTag(Entry.getDieTag(), EntryRet);
+ auto AbbrevIt = Abbreviations.find(AbbrvTag);
assert(AbbrevIt != Abbreviations.end() &&
"Why wasn't this abbrev generated?");
-
+ assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() &&
+ "Invalid Tag");
Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
+
for (const auto &AttrEnc : AbbrevIt->second) {
Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
switch (AttrEnc.Index) {
- case dwarf::DW_IDX_compile_unit: {
- DIEInteger ID(getCUIndexForEntry(Entry));
+ case dwarf::DW_IDX_compile_unit:
+ case dwarf::DW_IDX_type_unit: {
+ DIEInteger ID(EntryRet->Index);
ID.emitValue(Asm, AttrEnc.Form);
break;
}
@@ -508,23 +554,26 @@ template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const {
template <typename DataT>
Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
AsmPrinter *Asm, const AccelTableBase &Contents,
- ArrayRef<MCSymbol *> CompUnits,
- llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry)
+ ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
+ ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
+ llvm::function_ref<
+ std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(const DataT &)>
+ getIndexForEntry,
+ bool IsSplitDwarf)
: AccelTableWriter(Asm, Contents, false),
- Header(CompUnits.size(), Contents.getBucketCount(),
+ Header(CompUnits.size(), IsSplitDwarf ? 0 : TypeUnits.size(),
+ IsSplitDwarf ? TypeUnits.size() : 0, Contents.getBucketCount(),
Contents.getUniqueNameCount()),
- CompUnits(CompUnits), getCUIndexForEntry(std::move(getCUIndexForEntry)) {
- DenseSet<uint32_t> UniqueTags = getUniqueTags();
- SmallVector<AttributeEncoding, 2> UniformAttributes = getUniformAttributes();
-
- Abbreviations.reserve(UniqueTags.size());
- for (uint32_t Tag : UniqueTags)
- Abbreviations.try_emplace(Tag, UniformAttributes);
+ CompUnits(CompUnits), TypeUnits(TypeUnits),
+ getIndexForEntry(std::move(getIndexForEntry)),
+ IsSplitDwarf(IsSplitDwarf) {
+ populateAbbrevsMap();
}
template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() {
Header.emit(*this);
emitCUList();
+ emitTUList();
emitBuckets();
emitHashes();
emitStringOffsets();
@@ -543,11 +592,15 @@ void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
}
void llvm::emitDWARF5AccelTable(
- AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents,
- const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) {
- std::vector<MCSymbol *> CompUnits;
+ AsmPrinter *Asm, DWARF5AccelTable &Contents, const DwarfDebug &DD,
+ ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) {
+ TUVectorTy TUSymbols = Contents.getTypeUnitsSymbols();
+ std::vector<std::variant<MCSymbol *, uint64_t>> CompUnits;
+ std::vector<std::variant<MCSymbol *, uint64_t>> TypeUnits;
SmallVector<unsigned, 1> CUIndex(CUs.size());
- int Count = 0;
+ DenseMap<unsigned, unsigned> TUIndex(TUSymbols.size());
+ int CUCount = 0;
+ int TUCount = 0;
for (const auto &CU : enumerate(CUs)) {
switch (CU.value()->getCUNode()->getNameTableKind()) {
case DICompileUnit::DebugNameTableKind::Default:
@@ -556,13 +609,21 @@ void llvm::emitDWARF5AccelTable(
default:
continue;
}
- CUIndex[CU.index()] = Count++;
+ CUIndex[CU.index()] = CUCount++;
assert(CU.index() == CU.value()->getUniqueID());
const DwarfCompileUnit *MainCU =
DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get();
CompUnits.push_back(MainCU->getLabelBegin());
}
+ for (const auto &TU : TUSymbols) {
+ TUIndex[TU.UniqueID] = TUCount++;
+ if (DD.useSplitDwarf())
+ TypeUnits.push_back(std::get<uint64_t>(TU.LabelOrSignature));
+ else
+ TypeUnits.push_back(std::get<MCSymbol *>(TU.LabelOrSignature));
+ }
+
if (CompUnits.empty())
return;
@@ -570,23 +631,44 @@ void llvm::emitDWARF5AccelTable(
Asm->getObjFileLowering().getDwarfDebugNamesSection());
Contents.finalize(Asm, "names");
+ dwarf::Form CUIndexForm =
+ DIEInteger::BestForm(/*IsSigned*/ false, CompUnits.size() - 1);
+ dwarf::Form TUIndexForm =
+ DIEInteger::BestForm(/*IsSigned*/ false, TypeUnits.size() - 1);
Dwarf5AccelTableWriter<DWARF5AccelTableData>(
- Asm, Contents, CompUnits,
- [&](const DWARF5AccelTableData &Entry) {
- const DIE *CUDie = Entry.getDie().getUnitDie();
- return CUIndex[DD.lookupCU(CUDie)->getUniqueID()];
- })
+ Asm, Contents, CompUnits, TypeUnits,
+ [&](const DWARF5AccelTableData &Entry)
+ -> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
+ if (Entry.isTU())
+ return {{TUIndex[Entry.getUnitID()],
+ {dwarf::DW_IDX_type_unit, TUIndexForm}}};
+ if (CUIndex.size() > 1)
+ return {{CUIndex[Entry.getUnitID()],
+ {dwarf::DW_IDX_compile_unit, CUIndexForm}}};
+ return std::nullopt;
+ },
+ DD.useSplitDwarf())
.emit();
}
+void DWARF5AccelTable::addTypeUnitSymbol(DwarfTypeUnit &U) {
+ TUSymbolsOrHashes.push_back({U.getLabelBegin(), U.getUniqueID()});
+}
+
+void DWARF5AccelTable::addTypeUnitSignature(DwarfTypeUnit &U) {
+ TUSymbolsOrHashes.push_back({U.getTypeSignature(), U.getUniqueID()});
+}
+
void llvm::emitDWARF5AccelTable(
- AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents,
- ArrayRef<MCSymbol *> CUs,
- llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)>
- getCUIndexForEntry) {
+ AsmPrinter *Asm, DWARF5AccelTable &Contents,
+ ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
+ llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
+ const DWARF5AccelTableData &)>
+ getIndexForEntry) {
+ std::vector<std::variant<MCSymbol *, uint64_t>> TypeUnits;
Contents.finalize(Asm, "names");
- Dwarf5AccelTableWriter<DWARF5AccelTableStaticData>(Asm, Contents, CUs,
- getCUIndexForEntry)
+ Dwarf5AccelTableWriter<DWARF5AccelTableData>(Asm, Contents, CUs, TypeUnits,
+ getIndexForEntry, false)
.emit();
}
@@ -685,11 +767,6 @@ void DWARF5AccelTableData::print(raw_ostream &OS) const {
OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n";
}
-void DWARF5AccelTableStaticData::print(raw_ostream &OS) const {
- OS << " Offset: " << getDieOffset() << "\n";
- OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n";
-}
-
void AppleAccelTableOffsetData::print(raw_ostream &OS) const {
OS << " Offset: " << Die.getOffset() << "\n";
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 5381dfdd184c..61309c51336e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -59,6 +59,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Comdat.h"
@@ -93,6 +94,7 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -383,6 +385,7 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
}
const DataLayout &AsmPrinter::getDataLayout() const {
+ assert(MMI && "MMI could not be nullptr!");
return MMI->getModule()->getDataLayout();
}
@@ -442,7 +445,12 @@ bool AsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.getModuleMetadata(M);
- OutStreamer->initSections(false, *TM.getMCSubtargetInfo());
+ // On AIX, we delay emitting any section information until
+ // after emitting the .file pseudo-op. This allows additional
+ // information (such as the embedded command line) to be associated
+ // with all sections in the object file rather than a single section.
+ if (!TM.getTargetTriple().isOSBinFormatXCOFF())
+ OutStreamer->initSections(false, *TM.getMCSubtargetInfo());
// Emit the version-min deployment target directive if needed.
//
@@ -488,8 +496,21 @@ bool AsmPrinter::doInitialization(Module &M) {
// On AIX, emit bytes for llvm.commandline metadata after .file so that the
// C_INFO symbol is preserved if any csect is kept by the linker.
- if (TM.getTargetTriple().isOSBinFormatXCOFF())
+ if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
emitModuleCommandLines(M);
+ // Now we can generate section information.
+ OutStreamer->initSections(false, *TM.getMCSubtargetInfo());
+
+ // To work around an AIX assembler and/or linker bug, generate
+ // a rename for the default text-section symbol name. This call has
+ // no effect when generating object code directly.
+ MCSection *TextSection =
+ OutStreamer->getContext().getObjectFileInfo()->getTextSection();
+ MCSymbolXCOFF *XSym =
+ static_cast<MCSectionXCOFF *>(TextSection)->getQualNameSymbol();
+ if (XSym->hasRename())
+ OutStreamer->emitXCOFFRenameDirective(XSym, XSym->getSymbolTableName());
+ }
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
@@ -516,6 +537,7 @@ bool AsmPrinter::doInitialization(Module &M) {
CodeViewLineTablesGroupDescription);
}
if (!EmitCodeView || M.getDwarfVersion()) {
+ assert(MMI && "MMI could not be nullptr here!");
if (MMI->hasDebugInfo()) {
DD = new DwarfDebug(this);
Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
@@ -1370,7 +1392,11 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
if (BBAddrMapVersion > 1) {
OutStreamer->AddComment("BB id");
// Emit the BB ID for this basic block.
- OutStreamer->emitULEB128IntValue(*MBB.getBBID());
+ // We only emit BaseID since CloneID is unset for
+ // basic-block-sections=labels.
+ // TODO: Emit the full BBID when labels and sections can be mixed
+ // together.
+ OutStreamer->emitULEB128IntValue(MBB.getBBID()->BaseID);
}
// Emit the basic block offset relative to the end of the previous block.
// This is zero unless the block is padded due to alignment.
@@ -1467,9 +1493,10 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
}
}
- *StackUsageStream << MF.getFunction().getParent()->getName();
if (const DISubprogram *DSP = MF.getFunction().getSubprogram())
- *StackUsageStream << ':' << DSP->getLine();
+ *StackUsageStream << DSP->getFilename() << ':' << DSP->getLine();
+ else
+ *StackUsageStream << MF.getFunction().getParent()->getName();
*StackUsageStream << ':' << MF.getName() << '\t' << StackSize << '\t';
if (FrameInfo.hasVarSizedObjects())
@@ -1522,7 +1549,7 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
const size_t OptStart = SecWithOpt.find('!'); // likely npos
const StringRef Sec = SecWithOpt.substr(0, OptStart);
const StringRef Opts = SecWithOpt.substr(OptStart); // likely empty
- ConstULEB128 = Opts.find('C') != StringRef::npos;
+ ConstULEB128 = Opts.contains('C');
#ifndef NDEBUG
for (char O : Opts)
assert((O == '!' || O == 'C') && "Invalid !pcsections options");
@@ -1724,6 +1751,10 @@ void AsmPrinter::emitFunctionBody() {
case TargetOpcode::MEMBARRIER:
OutStreamer->emitRawComment("MEMBARRIER");
break;
+ case TargetOpcode::JUMP_TABLE_DEBUG_INFO:
+ // This instruction is only used to note jump table debug info, it's
+ // purely meta information.
+ break;
default:
emitInstruction(&MI);
if (CanDoExtraAnalysis) {
@@ -1923,18 +1954,35 @@ void AsmPrinter::emitFunctionBody() {
// Output MBB ids, function names, and frequencies if the flag to dump
// MBB profile information has been set
- if (MBBProfileDumpFileOutput) {
- if (!MF->hasBBLabels())
+ if (MBBProfileDumpFileOutput && !MF->empty() &&
+ MF->getFunction().getEntryCount()) {
+ if (!MF->hasBBLabels()) {
MF->getContext().reportError(
SMLoc(),
"Unable to find BB labels for MBB profile dump. -mbb-profile-dump "
"must be called with -basic-block-sections=labels");
- MachineBlockFrequencyInfo &MBFI =
- getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
- for (const auto &MBB : *MF) {
- *MBBProfileDumpFileOutput.get()
- << MF->getName() << "," << MBB.getBBID() << ","
- << MBFI.getBlockFreqRelativeToEntryBlock(&MBB) << "\n";
+ } else {
+ MachineBlockFrequencyInfo &MBFI =
+ getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
+ // The entry count and the entry basic block frequency aren't the same. We
+ // want to capture "absolute" frequencies, i.e. the frequency with which a
+ // MBB is executed when the program is executed. From there, we can derive
+ // Function-relative frequencies (divide by the value for the first MBB).
+ // We also have the information about frequency with which functions
+ // were called. This helps, for example, in a type of integration tests
+ // where we want to cross-validate the compiler's profile with a real
+ // profile.
+ // Using double precision because uint64 values used to encode mbb
+ // "frequencies" may be quite large.
+ const double EntryCount =
+ static_cast<double>(MF->getFunction().getEntryCount()->getCount());
+ for (const auto &MBB : *MF) {
+ const double MBBRelFreq = MBFI.getBlockFreqRelativeToEntryBlock(&MBB);
+ const double AbsMBBFreq = MBBRelFreq * EntryCount;
+ *MBBProfileDumpFileOutput.get()
+ << MF->getName() << "," << MBB.getBBID()->BaseID << ","
+ << AbsMBBFreq << "\n";
+ }
}
}
}
@@ -2100,24 +2148,80 @@ void AsmPrinter::emitGlobalIFunc(Module &M, const GlobalIFunc &GI) {
assert(!TM.getTargetTriple().isOSBinFormatXCOFF() &&
"IFunc is not supported on AIX.");
- MCSymbol *Name = getSymbol(&GI);
+ auto EmitLinkage = [&](MCSymbol *Sym) {
+ if (GI.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Global);
+ else if (GI.hasWeakLinkage() || GI.hasLinkOnceLinkage())
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_WeakReference);
+ else
+ assert(GI.hasLocalLinkage() && "Invalid ifunc linkage");
+ };
- if (GI.hasExternalLinkage() || !MAI->getWeakRefDirective())
- OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
- else if (GI.hasWeakLinkage() || GI.hasLinkOnceLinkage())
- OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
- else
- assert(GI.hasLocalLinkage() && "Invalid ifunc linkage");
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ MCSymbol *Name = getSymbol(&GI);
+ EmitLinkage(Name);
+ OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
+ emitVisibility(Name, GI.getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ const MCExpr *Expr = lowerConstant(GI.getResolver());
+ OutStreamer->emitAssignment(Name, Expr);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GI);
+ if (LocalAlias != Name)
+ OutStreamer->emitAssignment(LocalAlias, Expr);
- OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
- emitVisibility(Name, GI.getVisibility());
+ return;
+ }
- // Emit the directives as assignments aka .set:
- const MCExpr *Expr = lowerConstant(GI.getResolver());
- OutStreamer->emitAssignment(Name, Expr);
- MCSymbol *LocalAlias = getSymbolPreferLocal(GI);
- if (LocalAlias != Name)
- OutStreamer->emitAssignment(LocalAlias, Expr);
+ if (!TM.getTargetTriple().isOSBinFormatMachO() || !getIFuncMCSubtargetInfo())
+ llvm::report_fatal_error("IFuncs are not supported on this platform");
+
+ // On Darwin platforms, emit a manually-constructed .symbol_resolver that
+ // implements the symbol resolution duties of the IFunc.
+ //
+ // Normally, this would be handled by linker magic, but unfortunately there
+ // are a few limitations in ld64 and ld-prime's implementation of
+ // .symbol_resolver that mean we can't always use them:
+ //
+ // * resolvers cannot be the target of an alias
+ // * resolvers cannot have private linkage
+ // * resolvers cannot have linkonce linkage
+ // * resolvers cannot appear in executables
+ // * resolvers cannot appear in bundles
+ //
+ // This works around that by emitting a close approximation of what the
+ // linker would have done.
+
+ MCSymbol *LazyPointer =
+ GetExternalSymbolSymbol(GI.getName() + ".lazy_pointer");
+ MCSymbol *StubHelper = GetExternalSymbolSymbol(GI.getName() + ".stub_helper");
+
+ OutStreamer->switchSection(OutContext.getObjectFileInfo()->getDataSection());
+
+ const DataLayout &DL = M.getDataLayout();
+ emitAlignment(Align(DL.getPointerSize()));
+ OutStreamer->emitLabel(LazyPointer);
+ emitVisibility(LazyPointer, GI.getVisibility());
+ OutStreamer->emitValue(MCSymbolRefExpr::create(StubHelper, OutContext), 8);
+
+ OutStreamer->switchSection(OutContext.getObjectFileInfo()->getTextSection());
+
+ const TargetSubtargetInfo *STI =
+ TM.getSubtargetImpl(*GI.getResolverFunction());
+ const TargetLowering *TLI = STI->getTargetLowering();
+ Align TextAlign(TLI->getMinFunctionAlignment());
+
+ MCSymbol *Stub = getSymbol(&GI);
+ EmitLinkage(Stub);
+ OutStreamer->emitCodeAlignment(TextAlign, getIFuncMCSubtargetInfo());
+ OutStreamer->emitLabel(Stub);
+ emitVisibility(Stub, GI.getVisibility());
+ emitMachOIFuncStubBody(M, GI, LazyPointer);
+
+ OutStreamer->emitCodeAlignment(TextAlign, getIFuncMCSubtargetInfo());
+ OutStreamer->emitLabel(StubHelper);
+ emitVisibility(StubHelper, GI.getVisibility());
+ emitMachOIFuncStubHelperBody(M, GI, LazyPointer);
}
void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
@@ -2264,6 +2368,32 @@ bool AsmPrinter::doFinalization(Module &M) {
// through user plugins.
emitStackMaps();
+ // Print aliases in topological order, that is, for each alias a = b,
+ // b must be printed before a.
+ // This is because on some targets (e.g. PowerPC) linker expects aliases in
+ // such an order to generate correct TOC information.
+ SmallVector<const GlobalAlias *, 16> AliasStack;
+ SmallPtrSet<const GlobalAlias *, 16> AliasVisited;
+ for (const auto &Alias : M.aliases()) {
+ if (Alias.hasAvailableExternallyLinkage())
+ continue;
+ for (const GlobalAlias *Cur = &Alias; Cur;
+ Cur = dyn_cast<GlobalAlias>(Cur->getAliasee())) {
+ if (!AliasVisited.insert(Cur).second)
+ break;
+ AliasStack.push_back(Cur);
+ }
+ for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack))
+ emitGlobalAlias(M, *AncestorAlias);
+ AliasStack.clear();
+ }
+
+ // IFuncs must come before deubginfo in case the backend decides to emit them
+ // as actual functions, since on Mach-O targets, we cannot create regular
+ // sections after DWARF.
+ for (const auto &IFunc : M.ifuncs())
+ emitGlobalIFunc(M, IFunc);
+
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
@@ -2294,7 +2424,7 @@ bool AsmPrinter::doFinalization(Module &M) {
auto SymbolName = "swift_async_extendedFramePointerFlags";
auto Global = M.getGlobalVariable(SymbolName);
if (!Global) {
- auto Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ auto Int8PtrTy = PointerType::getUnqual(M.getContext());
Global = new GlobalVariable(M, Int8PtrTy, false,
GlobalValue::ExternalWeakLinkage, nullptr,
SymbolName);
@@ -2303,28 +2433,6 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
- // Print aliases in topological order, that is, for each alias a = b,
- // b must be printed before a.
- // This is because on some targets (e.g. PowerPC) linker expects aliases in
- // such an order to generate correct TOC information.
- SmallVector<const GlobalAlias *, 16> AliasStack;
- SmallPtrSet<const GlobalAlias *, 16> AliasVisited;
- for (const auto &Alias : M.aliases()) {
- if (Alias.hasAvailableExternallyLinkage())
- continue;
- for (const GlobalAlias *Cur = &Alias; Cur;
- Cur = dyn_cast<GlobalAlias>(Cur->getAliasee())) {
- if (!AliasVisited.insert(Cur).second)
- break;
- AliasStack.push_back(Cur);
- }
- for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack))
- emitGlobalAlias(M, *AncestorAlias);
- AliasStack.clear();
- }
- for (const auto &IFunc : M.ifuncs())
- emitGlobalIFunc(M, IFunc);
-
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
@@ -2361,7 +2469,8 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer->emitAddrsig();
for (const GlobalValue &GV : M.global_values()) {
if (!GV.use_empty() && !GV.isThreadLocal() &&
- !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") &&
+ !GV.hasDLLImportStorageClass() &&
+ !GV.getName().starts_with("llvm.") &&
!GV.hasAtLeastLocalUnnamedAddr())
OutStreamer->emitAddrsigSym(getSymbol(&GV));
}
@@ -2565,7 +2674,8 @@ void AsmPrinter::emitJumpTableInfo() {
const Function &F = MF->getFunction();
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
- MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64,
F);
if (JTInDiffSection) {
// Drop it in the readonly section.
@@ -2663,7 +2773,8 @@ void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
return;
}
- case MachineJumpTableInfo::EK_LabelDifference32: {
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_LabelDifference64: {
// Each entry is the address of the block minus the address of the jump
// table. This is used for PIC jump tables where gprel32 is not supported.
// e.g.:
@@ -2671,7 +2782,8 @@ void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
// If the .set directive avoids relocations, this is emitted as:
// .set L4_5_set_123, LBB123 - LJTI1_2
// .word L4_5_set_123
- if (MAI->doesSetDirectiveSuppressReloc()) {
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+ MAI->doesSetDirectiveSuppressReloc()) {
Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()),
OutContext);
break;
@@ -3021,9 +3133,12 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
- false/*ZExt*/);
- return lowerConstant(Op);
+ Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
+ /*IsSigned*/ false, DL);
+ if (Op)
+ return lowerConstant(Op);
+
+ break; // Error
}
case Instruction::PtrToInt: {
@@ -3470,12 +3585,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
//
// gotpcrelcst := <offset from @foo base> + <cst>
//
- // If gotpcrelcst is positive it means that we can safely fold the pc rel
- // displacement into the GOTPCREL. We can also can have an extra offset <cst>
- // if the target knows how to encode it.
int64_t GOTPCRelCst = Offset + MV.getConstant();
- if (GOTPCRelCst < 0)
- return;
if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0)
return;
@@ -3696,7 +3806,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
}
/// Return the MCSymbol for the specified ExternalSymbol.
-MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(Twine Sym) const {
SmallString<60> NameStr;
Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout());
return OutContext.getOrCreateSymbol(NameStr);
@@ -4164,3 +4274,18 @@ unsigned int AsmPrinter::getUnitLengthFieldByteSize() const {
return dwarf::getUnitLengthFieldByteSize(
OutStreamer->getContext().getDwarfFormat());
}
+
+std::tuple<const MCSymbol *, uint64_t, const MCSymbol *,
+ codeview::JumpTableEntrySize>
+AsmPrinter::getCodeViewJumpTableInfo(int JTI, const MachineInstr *BranchInstr,
+ const MCSymbol *BranchLabel) const {
+ const auto TLI = MF->getSubtarget().getTargetLowering();
+ const auto BaseExpr =
+ TLI->getPICJumpTableRelocBaseExpr(MF, JTI, MMI->getContext());
+ const auto Base = &cast<MCSymbolRefExpr>(BaseExpr)->getSymbol();
+
+ // By default, for the architectures that support CodeView,
+ // EK_LabelDifference32 is implemented as an Int32 from the base address.
+ return std::make_tuple(Base, 0, BranchLabel,
+ codeview::JumpTableEntrySize::Int32);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 32674bbeb061..d0ef3e5a1939 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -278,8 +278,8 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
for (; Val; --Val) {
if (OpNo >= MI->getNumOperands())
break;
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ const InlineAsm::Flag F(MI->getOperand(OpNo).getImm());
+ OpNo += F.getNumOperandRegisters() + 1;
}
// We may have a location metadata attached to the end of the
@@ -288,7 +288,7 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
Error = true;
} else {
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ const InlineAsm::Flag F(MI->getOperand(OpNo).getImm());
++OpNo; // Skip over the ID number.
// FIXME: Shouldn't arch-independent output template handling go into
@@ -302,7 +302,7 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
} else if (MI->getOperand(OpNo).isMBB()) {
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
Sym->print(OS, AP->MAI);
- } else if (InlineAsm::isMemKind(OpFlags)) {
+ } else if (F.isMemKind()) {
Error = AP->PrintAsmMemoryOperand(
MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
} else {
@@ -379,14 +379,14 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
const MachineOperand &MO = MI->getOperand(I);
if (!MO.isImm())
continue;
- unsigned Flags = MO.getImm();
- if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber) {
+ const InlineAsm::Flag F(MO.getImm());
+ if (F.isClobberKind()) {
Register Reg = MI->getOperand(I + 1).getReg();
if (!TRI->isAsmClobberable(*MF, Reg))
RestrRegs.push_back(Reg);
}
// Skip to one before the next operand descriptor, if it exists.
- I += InlineAsm::getNumOperandRegisters(Flags);
+ I += F.getNumOperandRegisters();
}
if (!RestrRegs.empty()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8161de57b58e..dddc08b3bc01 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -13,6 +13,7 @@
#include "CodeViewDebug.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
@@ -26,6 +27,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
@@ -54,7 +56,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
@@ -141,7 +142,7 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
// If this is a Unix-style path, just use it as is. Don't try to canonicalize
// it textually because one of the path components could be a symlink.
- if (Dir.startswith("/") || Filename.startswith("/")) {
+ if (Dir.starts_with("/") || Filename.starts_with("/")) {
if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix))
return Filename;
Filepath = std::string(Dir);
@@ -248,7 +249,10 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt,
InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc());
Site->Inlinee = Inlinee;
InlinedSubprograms.insert(Inlinee);
- getFuncIdForSubprogram(Inlinee);
+ auto InlineeIdx = getFuncIdForSubprogram(Inlinee);
+
+ if (InlinedAt->getInlinedAt() == nullptr)
+ CurFn->Inlinees.insert(InlineeIdx);
}
return *Site;
}
@@ -906,10 +910,10 @@ static std::string flattenCommandLine(ArrayRef<std::string> Args,
i++; // Skip this argument and next one.
continue;
}
- if (Arg.startswith("-object-file-name") || Arg == MainFilename)
+ if (Arg.starts_with("-object-file-name") || Arg == MainFilename)
continue;
// Skip fmessage-length for reproduciability.
- if (Arg.startswith("-fmessage-length"))
+ if (Arg.starts_with("-fmessage-length"))
continue;
if (PrintedOneArg)
OS << " ";
@@ -1192,6 +1196,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.emitInt32(uint32_t(FI.FrameProcOpts));
endSymbolRecord(FrameProcEnd);
+ emitInlinees(FI.Inlinees);
emitLocalVariableList(FI, FI.Locals);
emitGlobalVariableList(FI.Globals);
emitLexicalBlockList(FI.ChildBlocks, FI);
@@ -1243,6 +1248,8 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
if (SP != nullptr)
emitDebugInfoForUDTs(LocalUDTs);
+ emitDebugInfoForJumpTables(FI);
+
// We're done with this function.
emitEndSymbolRecord(SymbolKind::S_PROC_ID_END);
}
@@ -1390,6 +1397,12 @@ void CodeViewDebug::calculateRanges(
if (Location->Register == 0 || Location->LoadChain.size() > 1)
continue;
+ // Codeview can only express byte-aligned offsets, ensure that we have a
+ // byte-boundaried location.
+ if (Location->FragmentInfo)
+ if (Location->FragmentInfo->OffsetInBits % 8)
+ continue;
+
LocalVarDef DR;
DR.CVRegister = TRI->getCodeViewRegNum(Location->Register);
DR.InMemory = !Location->LoadChain.empty();
@@ -1533,8 +1546,8 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
}
FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U);
FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U);
- if (Asm->TM.getOptLevel() != CodeGenOpt::None &&
- !GV.hasOptSize() && !GV.hasOptNone())
+ if (Asm->TM.getOptLevel() != CodeGenOptLevel::None && !GV.hasOptSize() &&
+ !GV.hasOptNone())
FPO |= FrameProcedureOptions::OptimizedForSpeed;
if (GV.hasProfileData()) {
FPO |= FrameProcedureOptions::ValidProfileCounts;
@@ -1578,6 +1591,11 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
}
}
}
+
+ // Mark branches that may potentially be using jump tables with labels.
+ bool isThumb = Triple(MMI->getModule()->getTargetTriple()).getArch() ==
+ llvm::Triple::ArchType::thumb;
+ discoverJumpTableBranches(MF, isThumb);
}
static bool shouldEmitUdt(const DIType *T) {
@@ -2571,7 +2589,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
// Virtual function pointer member.
if ((Member->getFlags() & DINode::FlagArtificial) &&
- Member->getName().startswith("_vptr$")) {
+ Member->getName().starts_with("_vptr$")) {
VFPtrRecord VFPR(getTypeIndex(Member->getBaseType()));
ContinuationBuilder.writeMemberType(VFPR);
MemberCount++;
@@ -3024,7 +3042,7 @@ void CodeViewDebug::collectLexicalBlockInfo(
if (!BlockInsertion.second)
return;
- // Create a lexical block containing the variables and collect the the
+ // Create a lexical block containing the variables and collect the
// lexical block information for the children.
const InsnRange &Range = Ranges.front();
assert(Range.first && Range.second);
@@ -3083,6 +3101,10 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
}
}
+ bool isThumb = Triple(MMI->getModule()->getTargetTriple()).getArch() ==
+ llvm::Triple::ArchType::thumb;
+ collectDebugInfoForJumpTables(MF, isThumb);
+
CurFn->Annotations = MF->getCodeViewAnnotations();
CurFn->End = Asm->getFunctionEnd();
@@ -3336,7 +3358,7 @@ void CodeViewDebug::emitConstantSymbolRecord(const DIType *DTy, APSInt &Value,
// Encoded integers shouldn't need more than 10 bytes.
uint8_t Data[10];
- BinaryStreamWriter Writer(Data, llvm::support::endianness::little);
+ BinaryStreamWriter Writer(Data, llvm::endianness::little);
CodeViewRecordIO IO(Writer);
cantFail(IO.mapEncodedInteger(Value));
StringRef SRef((char *)Data, Writer.getOffset());
@@ -3442,3 +3464,164 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
emitConstantSymbolRecord(DIGV->getType(), Value, QualifiedName);
}
}
+
+void forEachJumpTableBranch(
+ const MachineFunction *MF, bool isThumb,
+ const std::function<void(const MachineJumpTableInfo &, const MachineInstr &,
+ int64_t)> &Callback) {
+ auto JTI = MF->getJumpTableInfo();
+ if (JTI && !JTI->isEmpty()) {
+#ifndef NDEBUG
+ auto UsedJTs = llvm::SmallBitVector(JTI->getJumpTables().size());
+#endif
+ for (const auto &MBB : *MF) {
+ // Search for indirect branches...
+ const auto LastMI = MBB.getFirstTerminator();
+ if (LastMI != MBB.end() && LastMI->isIndirectBranch()) {
+ if (isThumb) {
+ // ... that directly use jump table operands.
+ // NOTE: ARM uses pattern matching to lower its BR_JT SDNode to
+ // machine instructions, hence inserting a JUMP_TABLE_DEBUG_INFO node
+ // interferes with this process *but* the resulting pseudo-instruction
+ // uses a Jump Table operand, so extract the jump table index directly
+ // from that.
+ for (const auto &MO : LastMI->operands()) {
+ if (MO.isJTI()) {
+ unsigned Index = MO.getIndex();
+#ifndef NDEBUG
+ UsedJTs.set(Index);
+#endif
+ Callback(*JTI, *LastMI, Index);
+ break;
+ }
+ }
+ } else {
+ // ... that have jump table debug info.
+ // NOTE: The debug info is inserted as a JUMP_TABLE_DEBUG_INFO node
+ // when lowering the BR_JT SDNode to an indirect branch.
+ for (auto I = MBB.instr_rbegin(), E = MBB.instr_rend(); I != E; ++I) {
+ if (I->isJumpTableDebugInfo()) {
+ unsigned Index = I->getOperand(0).getImm();
+#ifndef NDEBUG
+ UsedJTs.set(Index);
+#endif
+ Callback(*JTI, *LastMI, Index);
+ break;
+ }
+ }
+ }
+ }
+ }
+#ifndef NDEBUG
+ assert(UsedJTs.all() &&
+ "Some of jump tables were not used in a debug info instruction");
+#endif
+ }
+}
+
+void CodeViewDebug::discoverJumpTableBranches(const MachineFunction *MF,
+ bool isThumb) {
+ forEachJumpTableBranch(
+ MF, isThumb,
+ [this](const MachineJumpTableInfo &, const MachineInstr &BranchMI,
+ int64_t) { requestLabelBeforeInsn(&BranchMI); });
+}
+
+void CodeViewDebug::collectDebugInfoForJumpTables(const MachineFunction *MF,
+ bool isThumb) {
+ forEachJumpTableBranch(
+ MF, isThumb,
+ [this, MF](const MachineJumpTableInfo &JTI, const MachineInstr &BranchMI,
+ int64_t JumpTableIndex) {
+ // For label-difference jump tables, find the base expression.
+ // Otherwise the jump table uses an absolute address (so no base
+ // is required).
+ const MCSymbol *Base;
+ uint64_t BaseOffset = 0;
+ const MCSymbol *Branch = getLabelBeforeInsn(&BranchMI);
+ JumpTableEntrySize EntrySize;
+ switch (JTI.getEntryKind()) {
+ case MachineJumpTableInfo::EK_Custom32:
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ llvm_unreachable(
+ "EK_Custom32, EK_GPRel32BlockAddress, and "
+ "EK_GPRel64BlockAddress should never be emitted for COFF");
+ case MachineJumpTableInfo::EK_BlockAddress:
+ // Each entry is an absolute address.
+ EntrySize = JumpTableEntrySize::Pointer;
+ Base = nullptr;
+ break;
+ case MachineJumpTableInfo::EK_Inline:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_LabelDifference64:
+ // Ask the AsmPrinter.
+ std::tie(Base, BaseOffset, Branch, EntrySize) =
+ Asm->getCodeViewJumpTableInfo(JumpTableIndex, &BranchMI, Branch);
+ break;
+ }
+
+ CurFn->JumpTables.push_back(
+ {EntrySize, Base, BaseOffset, Branch,
+ MF->getJTISymbol(JumpTableIndex, MMI->getContext()),
+ JTI.getJumpTables()[JumpTableIndex].MBBs.size()});
+ });
+}
+
+void CodeViewDebug::emitDebugInfoForJumpTables(const FunctionInfo &FI) {
+ for (auto JumpTable : FI.JumpTables) {
+ MCSymbol *JumpTableEnd = beginSymbolRecord(SymbolKind::S_ARMSWITCHTABLE);
+ if (JumpTable.Base) {
+ OS.AddComment("Base offset");
+ OS.emitCOFFSecRel32(JumpTable.Base, JumpTable.BaseOffset);
+ OS.AddComment("Base section index");
+ OS.emitCOFFSectionIndex(JumpTable.Base);
+ } else {
+ OS.AddComment("Base offset");
+ OS.emitInt32(0);
+ OS.AddComment("Base section index");
+ OS.emitInt16(0);
+ }
+ OS.AddComment("Switch type");
+ OS.emitInt16(static_cast<uint16_t>(JumpTable.EntrySize));
+ OS.AddComment("Branch offset");
+ OS.emitCOFFSecRel32(JumpTable.Branch, /*Offset=*/0);
+ OS.AddComment("Table offset");
+ OS.emitCOFFSecRel32(JumpTable.Table, /*Offset=*/0);
+ OS.AddComment("Branch section index");
+ OS.emitCOFFSectionIndex(JumpTable.Branch);
+ OS.AddComment("Table section index");
+ OS.emitCOFFSectionIndex(JumpTable.Table);
+ OS.AddComment("Entries count");
+ OS.emitInt32(JumpTable.TableSize);
+ endSymbolRecord(JumpTableEnd);
+ }
+}
+
+void CodeViewDebug::emitInlinees(
+ const SmallSet<codeview::TypeIndex, 1> &Inlinees) {
+ // Divide the list of inlinees into chunks such that each chunk fits within
+ // one record.
+ constexpr size_t ChunkSize =
+ (MaxRecordLength - sizeof(SymbolKind) - sizeof(uint32_t)) /
+ sizeof(uint32_t);
+
+ SmallVector<TypeIndex> SortedInlinees{Inlinees.begin(), Inlinees.end()};
+ llvm::sort(SortedInlinees);
+
+ size_t CurrentIndex = 0;
+ while (CurrentIndex < SortedInlinees.size()) {
+ auto Symbol = beginSymbolRecord(SymbolKind::S_INLINEES);
+ auto CurrentChunkSize =
+ std::min(ChunkSize, SortedInlinees.size() - CurrentIndex);
+ OS.AddComment("Count");
+ OS.emitInt32(CurrentChunkSize);
+
+ const size_t CurrentChunkEnd = CurrentIndex + CurrentChunkSize;
+ for (; CurrentIndex < CurrentChunkEnd; ++CurrentIndex) {
+ OS.AddComment("Inlinee");
+ OS.emitInt32(SortedInlinees[CurrentIndex].getIndex());
+ }
+ endSymbolRecord(Symbol);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 1455ac417824..4c03bf79d04d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -20,9 +20,11 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/CodeGen/DebugHandlerBase.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
@@ -133,6 +135,15 @@ private:
StringRef Name;
};
+ struct JumpTableInfo {
+ codeview::JumpTableEntrySize EntrySize;
+ const MCSymbol *Base;
+ uint64_t BaseOffset;
+ const MCSymbol *Branch;
+ const MCSymbol *Table;
+ size_t TableSize;
+ };
+
// For each function, store a vector of labels to its instructions, as well as
// to the end of the function.
struct FunctionInfo {
@@ -148,6 +159,9 @@ private:
/// Ordered list of top-level inlined call sites.
SmallVector<const DILocation *, 1> ChildSites;
+ /// Set of all functions directly inlined into this one.
+ SmallSet<codeview::TypeIndex, 1> Inlinees;
+
SmallVector<LocalVariable, 1> Locals;
SmallVector<CVGlobalVariable, 1> Globals;
@@ -160,6 +174,8 @@ private:
std::vector<std::tuple<const MCSymbol *, const MCSymbol *, const DIType *>>
HeapAllocSites;
+ std::vector<JumpTableInfo> JumpTables;
+
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
unsigned FuncId = 0;
@@ -359,6 +375,8 @@ private:
void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt,
const InlineSite &Site);
+ void emitInlinees(const SmallSet<codeview::TypeIndex, 1> &Inlinees);
+
using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
void collectGlobalVariableInfo();
@@ -478,6 +496,10 @@ private:
unsigned getPointerSizeInBytes();
+ void discoverJumpTableBranches(const MachineFunction *MF, bool isThumb);
+ void collectDebugInfoForJumpTables(const MachineFunction *MF, bool isThumb);
+ void emitDebugInfoForJumpTables(const FunctionInfo &FI);
+
protected:
/// Gather pre-function debug information.
void beginFunctionImpl(const MachineFunction *MF) override;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
index 8c6109880afc..700e24a08b5d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -40,8 +40,5 @@ void DebugLocStream::finalizeEntry() {
DebugLocStream::ListBuilder::~ListBuilder() {
if (!Locs.finalizeList(Asm))
return;
- V.initializeDbgValue(&MI);
- V.setDebugLocListIndex(ListIndex);
- if (TagOffset)
- V.setDebugLocListTagOffset(*TagOffset);
+ V.emplace<Loc::Multi>(ListIndex, TagOffset);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index a96bdd034918..6f553dc85c64 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -18,7 +18,6 @@ namespace llvm {
class AsmPrinter;
class DbgVariable;
class DwarfCompileUnit;
-class MachineInstr;
class MCSymbol;
/// Byte stream of .debug_loc entries.
@@ -156,14 +155,13 @@ class DebugLocStream::ListBuilder {
DebugLocStream &Locs;
AsmPrinter &Asm;
DbgVariable &V;
- const MachineInstr &MI;
size_t ListIndex;
std::optional<uint8_t> TagOffset;
public:
ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm,
- DbgVariable &V, const MachineInstr &MI)
- : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)),
+ DbgVariable &V)
+ : Locs(Locs), Asm(Asm), V(V), ListIndex(Locs.startList(&CU)),
TagOffset(std::nullopt) {}
void setTagOffset(uint8_t TO) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 58ed21379d29..14f2a363f9be 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -58,7 +58,7 @@ static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) {
DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU, UnitKind Kind)
- : DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU), UniqueID(UID) {
+ : DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU, UID) {
insertDIE(Node, &getUnitDie());
MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
}
@@ -367,13 +367,15 @@ void DwarfCompileUnit::addLocationAttribute(
addLinkageName(*VariableDIE, GV->getLinkageName());
if (addToAccelTable) {
- DD->addAccelName(*CUNode, GV->getName(), *VariableDIE);
+ DD->addAccelName(*this, CUNode->getNameTableKind(), GV->getName(),
+ *VariableDIE);
// If the linkage name is different than the name, go ahead and output
// that as well into the name table.
if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() &&
DD->useAllLinkageNames())
- DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE);
+ DD->addAccelName(*this, CUNode->getNameTableKind(), GV->getLinkageName(),
+ *VariableDIE);
}
}
@@ -458,15 +460,6 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
}
-// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
-// and DW_AT_high_pc attributes. If there are global variables in this
-// scope then create and insert DIEs for these variables.
-DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
- DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
- auto *ContextCU = static_cast<DwarfCompileUnit *>(SPDie->getUnit());
- return ContextCU->updateSubprogramScopeDIEImpl(SP, SPDie);
-}
-
// Add info for Wasm-global-based relocation.
// 'GlobalIndex' is used for split dwarf, which currently relies on a few
// assumptions that are not guaranteed in a formal way but work in practice.
@@ -500,8 +493,11 @@ void DwarfCompileUnit::addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
}
}
-DIE &DwarfCompileUnit::updateSubprogramScopeDIEImpl(const DISubprogram *SP,
- DIE *SPDie) {
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
+DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
+ DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
SmallVector<RangeSpan, 2> BB_List;
// If basic block sections are on, ranges for each basic block section has
// to be emitted separately.
@@ -567,7 +563,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIEImpl(const DISubprogram *SP,
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_subprogram nodes.
- DD->addSubprogramNames(*CUNode, SP, *SPDie);
+ DD->addSubprogramNames(*this, CUNode->getNameTableKind(), SP, *SPDie);
return *SPDie;
}
@@ -716,7 +712,8 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope,
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
- DD->addSubprogramNames(*CUNode, InlinedSP, *ScopeDIE);
+ DD->addSubprogramNames(*this, CUNode->getNameTableKind(), InlinedSP,
+ *ScopeDIE);
return ScopeDIE;
}
@@ -746,156 +743,139 @@ DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
return ScopeDIE;
}
-/// constructVariableDIE - Construct a DIE for the given DbgVariable.
DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) {
- auto D = constructVariableDIEImpl(DV, Abstract);
- DV.setDIE(*D);
- return D;
-}
-
-DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL,
- const LexicalScope &Scope) {
- auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag());
- insertDIE(DL.getLabel(), LabelDie);
- DL.setDIE(*LabelDie);
-
- if (Scope.isAbstractScope())
- applyLabelAttributes(DL, *LabelDie);
-
- return LabelDie;
-}
-
-DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
- bool Abstract) {
- // Define variable debug information entry.
- auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag());
+ auto *VariableDie = DIE::get(DIEValueAllocator, DV.getTag());
insertDIE(DV.getVariable(), VariableDie);
-
+ DV.setDIE(*VariableDie);
+ // Abstract variables don't get common attributes later, so apply them now.
if (Abstract) {
- applyVariableAttributes(DV, *VariableDie);
- return VariableDie;
- }
-
- // Add variable address.
-
- unsigned Index = DV.getDebugLocListIndex();
- if (Index != ~0U) {
- addLocationList(*VariableDie, dwarf::DW_AT_location, Index);
- auto TagOffset = DV.getDebugLocListTagOffset();
- if (TagOffset)
- addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
- *TagOffset);
- return VariableDie;
+ applyCommonDbgVariableAttributes(DV, *VariableDie);
+ } else {
+ std::visit(
+ [&](const auto &V) {
+ applyConcreteDbgVariableAttributes(V, DV, *VariableDie);
+ },
+ DV.asVariant());
}
+ return VariableDie;
+}
- // Check if variable has a single location description.
- if (auto *DVal = DV.getValueLoc()) {
- if (!DVal->isVariadic()) {
- const DbgValueLocEntry *Entry = DVal->getLocEntries().begin();
- if (Entry->isLocation()) {
- addVariableAddress(DV, *VariableDie, Entry->getLoc());
- } else if (Entry->isInt()) {
- auto *Expr = DV.getSingleExpression();
- if (Expr && Expr->getNumElements()) {
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- // If there is an expression, emit raw unsigned bytes.
- DwarfExpr.addFragmentOffset(Expr);
- DwarfExpr.addUnsignedConstant(Entry->getInt());
- DwarfExpr.addExpression(Expr);
- addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
- if (DwarfExpr.TagOffset)
- addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
- dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
- } else
- addConstantValue(*VariableDie, Entry->getInt(), DV.getType());
- } else if (Entry->isConstantFP()) {
- addConstantFPValue(*VariableDie, Entry->getConstantFP());
- } else if (Entry->isConstantInt()) {
- addConstantValue(*VariableDie, Entry->getConstantInt(), DV.getType());
- } else if (Entry->isTargetIndexLocation()) {
+void DwarfCompileUnit::applyConcreteDbgVariableAttributes(
+ const Loc::Single &Single, const DbgVariable &DV, DIE &VariableDie) {
+ const DbgValueLoc *DVal = &Single.getValueLoc();
+ if (!DVal->isVariadic()) {
+ const DbgValueLocEntry *Entry = DVal->getLocEntries().begin();
+ if (Entry->isLocation()) {
+ addVariableAddress(DV, VariableDie, Entry->getLoc());
+ } else if (Entry->isInt()) {
+ auto *Expr = Single.getExpr();
+ if (Expr && Expr->getNumElements()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- const DIBasicType *BT = dyn_cast<DIBasicType>(
- static_cast<const Metadata *>(DV.getVariable()->getType()));
- DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr);
- addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
- }
- return VariableDie;
- }
- // If any of the location entries are registers with the value 0, then the
- // location is undefined.
- if (any_of(DVal->getLocEntries(), [](const DbgValueLocEntry &Entry) {
- return Entry.isLocation() && !Entry.getLoc().getReg();
- }))
- return VariableDie;
- const DIExpression *Expr = DV.getSingleExpression();
- assert(Expr && "Variadic Debug Value must have an Expression.");
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- DwarfExpr.addFragmentOffset(Expr);
- DIExpressionCursor Cursor(Expr);
- const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
-
- auto AddEntry = [&](const DbgValueLocEntry &Entry,
- DIExpressionCursor &Cursor) {
- if (Entry.isLocation()) {
- if (!DwarfExpr.addMachineRegExpression(TRI, Cursor,
- Entry.getLoc().getReg()))
- return false;
- } else if (Entry.isInt()) {
// If there is an expression, emit raw unsigned bytes.
- DwarfExpr.addUnsignedConstant(Entry.getInt());
- } else if (Entry.isConstantFP()) {
- // DwarfExpression does not support arguments wider than 64 bits
- // (see PR52584).
- // TODO: Consider chunking expressions containing overly wide
- // arguments into separate pointer-sized fragment expressions.
- APInt RawBytes = Entry.getConstantFP()->getValueAPF().bitcastToAPInt();
- if (RawBytes.getBitWidth() > 64)
- return false;
- DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue());
- } else if (Entry.isConstantInt()) {
- APInt RawBytes = Entry.getConstantInt()->getValue();
- if (RawBytes.getBitWidth() > 64)
- return false;
- DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue());
- } else if (Entry.isTargetIndexLocation()) {
- TargetIndexLocation Loc = Entry.getTargetIndexLocation();
- // TODO TargetIndexLocation is a target-independent. Currently only the
- // WebAssembly-specific encoding is supported.
- assert(Asm->TM.getTargetTriple().isWasm());
- DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
- } else {
- llvm_unreachable("Unsupported Entry type.");
- }
- return true;
- };
-
- if (!DwarfExpr.addExpression(
- std::move(Cursor),
- [&](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
- return AddEntry(DVal->getLocEntries()[Idx], Cursor);
- }))
- return VariableDie;
-
- // Now attach the location information to the DIE.
- addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
- if (DwarfExpr.TagOffset)
- addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
- *DwarfExpr.TagOffset);
-
- return VariableDie;
+ DwarfExpr.addFragmentOffset(Expr);
+ DwarfExpr.addUnsignedConstant(Entry->getInt());
+ DwarfExpr.addExpression(Expr);
+ addBlock(VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(VariableDie, dwarf::DW_AT_LLVM_tag_offset,
+ dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
+ } else
+ addConstantValue(VariableDie, Entry->getInt(), DV.getType());
+ } else if (Entry->isConstantFP()) {
+ addConstantFPValue(VariableDie, Entry->getConstantFP());
+ } else if (Entry->isConstantInt()) {
+ addConstantValue(VariableDie, Entry->getConstantInt(), DV.getType());
+ } else if (Entry->isTargetIndexLocation()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ const DIBasicType *BT = dyn_cast<DIBasicType>(
+ static_cast<const Metadata *>(DV.getVariable()->getType()));
+ DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr);
+ addBlock(VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ }
+ return;
}
+ // If any of the location entries are registers with the value 0,
+ // then the location is undefined.
+ if (any_of(DVal->getLocEntries(), [](const DbgValueLocEntry &Entry) {
+ return Entry.isLocation() && !Entry.getLoc().getReg();
+ }))
+ return;
+ const DIExpression *Expr = Single.getExpr();
+ assert(Expr && "Variadic Debug Value must have an Expression.");
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ DwarfExpr.addFragmentOffset(Expr);
+ DIExpressionCursor Cursor(Expr);
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+
+ auto AddEntry = [&](const DbgValueLocEntry &Entry,
+ DIExpressionCursor &Cursor) {
+ if (Entry.isLocation()) {
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor,
+ Entry.getLoc().getReg()))
+ return false;
+ } else if (Entry.isInt()) {
+ // If there is an expression, emit raw unsigned bytes.
+ DwarfExpr.addUnsignedConstant(Entry.getInt());
+ } else if (Entry.isConstantFP()) {
+ // DwarfExpression does not support arguments wider than 64 bits
+ // (see PR52584).
+ // TODO: Consider chunking expressions containing overly wide
+ // arguments into separate pointer-sized fragment expressions.
+ APInt RawBytes = Entry.getConstantFP()->getValueAPF().bitcastToAPInt();
+ if (RawBytes.getBitWidth() > 64)
+ return false;
+ DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue());
+ } else if (Entry.isConstantInt()) {
+ APInt RawBytes = Entry.getConstantInt()->getValue();
+ if (RawBytes.getBitWidth() > 64)
+ return false;
+ DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue());
+ } else if (Entry.isTargetIndexLocation()) {
+ TargetIndexLocation Loc = Entry.getTargetIndexLocation();
+ // TODO TargetIndexLocation is a target-independent. Currently
+ // only the WebAssembly-specific encoding is supported.
+ assert(Asm->TM.getTargetTriple().isWasm());
+ DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
+ } else {
+ llvm_unreachable("Unsupported Entry type.");
+ }
+ return true;
+ };
+
+ if (!DwarfExpr.addExpression(
+ std::move(Cursor),
+ [&](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
+ return AddEntry(DVal->getLocEntries()[Idx], Cursor);
+ }))
+ return;
+
+ // Now attach the location information to the DIE.
+ addBlock(VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
+}
- // .. else use frame index.
- if (!DV.hasFrameIndexExprs())
- return VariableDie;
+void DwarfCompileUnit::applyConcreteDbgVariableAttributes(
+ const Loc::Multi &Multi, const DbgVariable &DV, DIE &VariableDie) {
+ addLocationList(VariableDie, dwarf::DW_AT_location,
+ Multi.getDebugLocListIndex());
+ auto TagOffset = Multi.getDebugLocListTagOffset();
+ if (TagOffset)
+ addUInt(VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *TagOffset);
+}
+void DwarfCompileUnit::applyConcreteDbgVariableAttributes(const Loc::MMI &MMI,
+ const DbgVariable &DV,
+ DIE &VariableDie) {
std::optional<unsigned> NVPTXAddressSpace;
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- for (const auto &Fragment : DV.getFrameIndexExprs()) {
+ for (const auto &Fragment : MMI.getFrameIndexExprs()) {
Register FrameReg;
const DIExpression *Expr = Fragment.Expr;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
@@ -909,10 +889,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
- // cuda-gdb requires DW_AT_address_class for all variables to be able to
- // correctly interpret address space of the variable address.
- // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
- // sequence for the NVPTX + gdb target.
+ // cuda-gdb requires DW_AT_address_class for all variables to be
+ // able to correctly interpret address space of the variable
+ // address. Decode DW_OP_constu <DWARF Address Space> DW_OP_swap
+ // DW_OP_xderef sequence for the NVPTX + gdb target.
unsigned LocalNVPTXAddressSpace;
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
const DIExpression *NewExpr =
@@ -936,20 +916,39 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
- // cuda-gdb requires DW_AT_address_class for all variables to be able to
- // correctly interpret address space of the variable address.
+ // cuda-gdb requires DW_AT_address_class for all variables to be
+ // able to correctly interpret address space of the variable
+ // address.
const unsigned NVPTX_ADDR_local_space = 6;
- addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+ addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
NVPTXAddressSpace.value_or(NVPTX_ADDR_local_space));
}
- addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ addBlock(VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
if (DwarfExpr.TagOffset)
- addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ addUInt(VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
*DwarfExpr.TagOffset);
+}
- return VariableDie;
+void DwarfCompileUnit::applyConcreteDbgVariableAttributes(
+ const Loc::EntryValue &EntryValue, const DbgVariable &DV,
+ DIE &VariableDie) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ // Emit each expression as: EntryValue(Register) <other ops> <Fragment>.
+ for (auto [Register, Expr] : EntryValue.EntryValues) {
+ DwarfExpr.addFragmentOffset(&Expr);
+ DIExpressionCursor Cursor(Expr.getElements());
+ DwarfExpr.beginEntryValueExpression(Cursor);
+ DwarfExpr.addMachineRegExpression(
+ *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, Register);
+ DwarfExpr.addExpression(std::move(Cursor));
+ }
+ addBlock(VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
}
+void DwarfCompileUnit::applyConcreteDbgVariableAttributes(
+ const std::monostate &, const DbgVariable &DV, DIE &VariableDie) {}
+
DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
const LexicalScope &Scope,
DIE *&ObjectPointer) {
@@ -959,6 +958,18 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
return Var;
}
+DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL,
+ const LexicalScope &Scope) {
+ auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag());
+ insertDIE(DL.getLabel(), LabelDie);
+ DL.setDIE(*LabelDie);
+
+ if (Scope.isAbstractScope())
+ applyLabelAttributes(DL, *LabelDie);
+
+ return LabelDie;
+}
+
/// Return all DIVariables that appear in count: expressions.
static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
SmallVector<const DIVariable *, 2> Result;
@@ -1065,7 +1076,6 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope) {
DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
- auto *ContextCU = static_cast<DwarfCompileUnit *>(ScopeDIE.getUnit());
if (Scope) {
assert(!Scope->getInlinedAt());
@@ -1073,10 +1083,8 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
// Collect lexical scope children first.
// ObjectPointer might be a local (non-argument) local variable if it's a
// block's synthetic this pointer.
- if (DIE *ObjectPointer =
- ContextCU->createAndAddScopeChildren(Scope, ScopeDIE))
- ContextCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer,
- *ObjectPointer);
+ if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
+ addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
// If this is a variadic function, add an unspecified parameter.
@@ -1353,7 +1361,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
// or `using namespace std::ranges`, we could add the
// import declaration into the accelerator table with the
// name being the one of the entity being imported.
- DD->addAccelNamespace(*CUNode, Name, *IMDie);
+ DD->addAccelNamespace(*this, CUNode->getNameTableKind(), Name, *IMDie);
}
// This is for imported module with renamed entities (such as variables and
@@ -1410,16 +1418,25 @@ void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) {
Label = dyn_cast<const DbgLabel>(Entity);
} else {
if (const DbgVariable *Var = dyn_cast<const DbgVariable>(Entity))
- applyVariableAttributes(*Var, *Die);
+ applyCommonDbgVariableAttributes(*Var, *Die);
else if ((Label = dyn_cast<const DbgLabel>(Entity)))
applyLabelAttributes(*Label, *Die);
else
llvm_unreachable("DbgEntity must be DbgVariable or DbgLabel.");
}
- if (Label)
- if (const auto *Sym = Label->getSymbol())
- addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym);
+ if (!Label)
+ return;
+
+ const auto *Sym = Label->getSymbol();
+ if (!Sym)
+ return;
+
+ addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym);
+
+ // A TAG_label with a name and an AT_low_pc must be placed in debug_names.
+ if (StringRef Name = Label->getName(); !Name.empty())
+ getDwarfDebug().addAccelName(*this, CUNode->getNameTableKind(), Name, *Die);
}
DbgEntity *DwarfCompileUnit::getExistingAbstractEntity(const DINode *Node) {
@@ -1523,8 +1540,9 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
MachineLocation Location) {
- if (DV.hasComplexAddress())
- addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ auto *Single = std::get_if<Loc::Single>(&DV);
+ if (Single && Single->getExpr())
+ addComplexAddress(Single->getExpr(), Die, dwarf::DW_AT_location, Location);
else
addAddress(Die, dwarf::DW_AT_location, Location);
}
@@ -1555,12 +1573,11 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
/// DWARF information necessary to find the actual variable given the extra
/// address information encoded in the DbgVariable, starting from the starting
/// location. Add the DWARF information to the die.
-void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
+void DwarfCompileUnit::addComplexAddress(const DIExpression *DIExpr, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- const DIExpression *DIExpr = DV.getSingleExpression();
DwarfExpr.addFragmentOffset(DIExpr);
DwarfExpr.setLocation(Location, DIExpr);
@@ -1591,8 +1608,8 @@ void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
addAttribute(Die, Attribute, Form, DIELocList(Index));
}
-void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
- DIE &VariableDie) {
+void DwarfCompileUnit::applyCommonDbgVariableAttributes(const DbgVariable &Var,
+ DIE &VariableDie) {
StringRef Name = Var.getName();
if (!Name.empty())
addString(VariableDie, dwarf::DW_AT_name, Name);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 6ef73ebd4f7f..dc772bb459c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Casting.h"
-#include <cassert>
#include <cstdint>
#include <memory>
@@ -44,8 +43,6 @@ class MDNode;
enum class UnitKind { Skeleton, Full };
class DwarfCompileUnit final : public DwarfUnit {
- /// A numeric ID unique among all CUs in the module
- unsigned UniqueID;
bool HasRangeLists = false;
/// The start of the unit line section, this is also
@@ -55,9 +52,6 @@ class DwarfCompileUnit final : public DwarfUnit {
/// Skeleton unit associated with this unit.
DwarfCompileUnit *Skeleton = nullptr;
- /// The start of the unit within its section.
- MCSymbol *LabelBegin = nullptr;
-
/// The start of the unit macro info within macro section.
MCSymbol *MacroLabelBegin;
@@ -96,9 +90,34 @@ class DwarfCompileUnit final : public DwarfUnit {
const DIFile *LastFile = nullptr;
unsigned LastFileID;
- /// Construct a DIE for the given DbgVariable without initializing the
- /// DbgVariable's DIE reference.
- DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);
+ /// \anchor applyConcreteDbgVariableAttribute
+ /// \name applyConcreteDbgVariableAttribute
+ /// Overload set which applies attributes to \c VariableDie based on
+ /// the active variant of \c DV, which is passed as the first argument.
+ ///@{
+
+ /// See \ref applyConcreteDbgVariableAttribute
+ void applyConcreteDbgVariableAttributes(const Loc::Single &Single,
+ const DbgVariable &DV,
+ DIE &VariableDie);
+ /// See \ref applyConcreteDbgVariableAttribute
+ void applyConcreteDbgVariableAttributes(const Loc::Multi &Multi,
+ const DbgVariable &DV,
+ DIE &VariableDie);
+ /// See \ref applyConcreteDbgVariableAttribute
+ void applyConcreteDbgVariableAttributes(const Loc::MMI &MMI,
+ const DbgVariable &DV,
+ DIE &VariableDie);
+ /// See \ref applyConcreteDbgVariableAttribute
+ void applyConcreteDbgVariableAttributes(const Loc::EntryValue &EntryValue,
+ const DbgVariable &DV,
+ DIE &VariableDie);
+ /// See \ref applyConcreteDbgVariableAttribute
+ void applyConcreteDbgVariableAttributes(const std::monostate &,
+ const DbgVariable &DV,
+ DIE &VariableDie);
+
+ ///@}
bool isDwoUnit() const override;
@@ -126,7 +145,6 @@ public:
UnitKind Kind = UnitKind::Full);
bool hasRangeLists() const { return HasRangeLists; }
- unsigned getUniqueID() const { return UniqueID; }
DwarfCompileUnit *getSkeleton() const {
return Skeleton;
@@ -193,7 +211,6 @@ public:
/// variables in this scope then create and insert DIEs for these
/// variables.
DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
- DIE &updateSubprogramScopeDIEImpl(const DISubprogram *SP, DIE *SPDie);
void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE);
@@ -219,9 +236,11 @@ public:
/// and it's an error, if it hasn't.
DIE *getLexicalBlockDIE(const DILexicalBlock *LB);
- /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ /// Construct a DIE for the given DbgVariable.
DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false);
+ /// Convenience overload which writes the DIE pointer into an out variable
+ /// ObjectPointer in addition to returning it.
DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope,
DIE *&ObjectPointer);
@@ -304,11 +323,6 @@ public:
/// Add the DW_AT_addr_base attribute to the unit DIE.
void addAddrTableBase();
- MCSymbol *getLabelBegin() const {
- assert(LabelBegin && "LabelBegin is not initialized");
- return LabelBegin;
- }
-
MCSymbol *getMacroLabelBegin() const {
return MacroLabelBegin;
}
@@ -342,13 +356,17 @@ public:
/// DWARF information necessary to find the actual variable (navigating the
/// extra location information encoded in the type) based on the starting
/// location. Add the DWARF information to the die.
- void addComplexAddress(const DbgVariable &DV, DIE &Die,
+ void addComplexAddress(const DIExpression *DIExpr, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location);
/// Add a Dwarf loclistptr attribute data and value.
void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
- void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
+
+ /// Add attributes to \p Var which reflect the common attributes of \p
+ /// VariableDie, namely those which are not dependant on the active variant.
+ void applyCommonDbgVariableAttributes(const DbgVariable &Var,
+ DIE &VariableDie);
/// Add a Dwarf expression attribute data and value.
void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1ae17ec9b874..41afbea45614 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -234,7 +234,15 @@ const DIType *DbgVariable::getType() const {
/// Get .debug_loc entry for the instruction range starting at MI.
static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
const DIExpression *Expr = MI->getDebugExpression();
- const bool IsVariadic = MI->isDebugValueList();
+ auto SingleLocExprOpt = DIExpression::convertToNonVariadicExpression(Expr);
+ const bool IsVariadic = !SingleLocExprOpt;
+ // If we have a variadic debug value instruction that is equivalent to a
+ // non-variadic instruction, then convert it to non-variadic form here.
+ if (!IsVariadic && !MI->isNonListDebugValue()) {
+ assert(MI->getNumDebugOperands() == 1 &&
+ "Mismatched DIExpression and debug operands for debug instruction.");
+ Expr = *SingleLocExprOpt;
+ }
assert(MI->getNumOperands() >= 3);
SmallVector<DbgValueLocEntry, 4> DbgValueLocEntries;
for (const MachineOperand &Op : MI->debug_operands()) {
@@ -257,67 +265,39 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
return DbgValueLoc(Expr, DbgValueLocEntries, IsVariadic);
}
-void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
- assert(FrameIndexExprs.empty() && "Already initialized?");
- assert(!ValueLoc.get() && "Already initialized?");
+static uint64_t getFragmentOffsetInBits(const DIExpression &Expr) {
+ std::optional<DIExpression::FragmentInfo> Fragment = Expr.getFragmentInfo();
+ return Fragment ? Fragment->OffsetInBits : 0;
+}
- assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
- assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
- "Wrong inlined-at");
+bool llvm::operator<(const FrameIndexExpr &LHS, const FrameIndexExpr &RHS) {
+ return getFragmentOffsetInBits(*LHS.Expr) <
+ getFragmentOffsetInBits(*RHS.Expr);
+}
- ValueLoc = std::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
- if (auto *E = DbgValue->getDebugExpression())
- if (E->getNumElements())
- FrameIndexExprs.push_back({0, E});
+bool llvm::operator<(const EntryValueInfo &LHS, const EntryValueInfo &RHS) {
+ return getFragmentOffsetInBits(LHS.Expr) < getFragmentOffsetInBits(RHS.Expr);
}
-ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
- if (FrameIndexExprs.size() == 1)
- return FrameIndexExprs;
+Loc::Single::Single(DbgValueLoc ValueLoc)
+ : ValueLoc(std::make_unique<DbgValueLoc>(ValueLoc)),
+ Expr(ValueLoc.getExpression()) {
+ if (!Expr->getNumElements())
+ Expr = nullptr;
+}
- assert(llvm::all_of(FrameIndexExprs,
- [](const FrameIndexExpr &A) {
- return A.Expr->isFragment();
- }) &&
- "multiple FI expressions without DW_OP_LLVM_fragment");
- llvm::sort(FrameIndexExprs,
- [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
- return A.Expr->getFragmentInfo()->OffsetInBits <
- B.Expr->getFragmentInfo()->OffsetInBits;
- });
+Loc::Single::Single(const MachineInstr *DbgValue)
+ : Single(getDebugLocValue(DbgValue)) {}
+const std::set<FrameIndexExpr> &Loc::MMI::getFrameIndexExprs() const {
return FrameIndexExprs;
}
-void DbgVariable::addMMIEntry(const DbgVariable &V) {
- assert(DebugLocListIndex == ~0U && !ValueLoc.get() && "not an MMI entry");
- assert(V.DebugLocListIndex == ~0U && !V.ValueLoc.get() && "not an MMI entry");
- assert(V.getVariable() == getVariable() && "conflicting variable");
- assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");
-
- assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
- assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
-
- // FIXME: This logic should not be necessary anymore, as we now have proper
- // deduplication. However, without it, we currently run into the assertion
- // below, which means that we are likely dealing with broken input, i.e. two
- // non-fragment entries for the same variable at different frame indices.
- if (FrameIndexExprs.size()) {
- auto *Expr = FrameIndexExprs.back().Expr;
- if (!Expr || !Expr->isFragment())
- return;
- }
-
- for (const auto &FIE : V.FrameIndexExprs)
- // Ignore duplicate entries.
- if (llvm::none_of(FrameIndexExprs, [&](const FrameIndexExpr &Other) {
- return FIE.FI == Other.FI && FIE.Expr == Other.Expr;
- }))
- FrameIndexExprs.push_back(FIE);
-
+void Loc::MMI::addFrameIndexExpr(const DIExpression *Expr, int FI) {
+ FrameIndexExprs.insert({FI, Expr});
assert((FrameIndexExprs.size() == 1 ||
llvm::all_of(FrameIndexExprs,
- [](FrameIndexExpr &FIE) {
+ [](const FrameIndexExpr &FIE) {
return FIE.Expr && FIE.Expr->isFragment();
})) &&
"conflicting locations for variable");
@@ -331,8 +311,9 @@ static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
if (AccelTables != AccelTableKind::Default)
return AccelTables;
- // Accelerator tables with type units are currently not supported.
- if (GenerateTypeUnits)
+ // Generating DWARF5 acceleration table.
+ // Currently Split dwarf and non ELF format is not supported.
+ if (GenerateTypeUnits && (DwarfVersion < 5 || !TT.isOSBinFormatELF()))
return AccelTableKind::None;
// Accelerator tables get emitted if targetting DWARF v5 or LLDB. DWARF v5
@@ -465,7 +446,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
DwarfDebug::~DwarfDebug() = default;
static bool isObjCClass(StringRef Name) {
- return Name.startswith("+") || Name.startswith("-");
+ return Name.starts_with("+") || Name.starts_with("-");
}
static bool hasObjCCategory(StringRef Name) {
@@ -492,36 +473,38 @@ static StringRef getObjCMethodName(StringRef In) {
}
// Add the various names to the Dwarf accelerator table names.
-void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
- const DISubprogram *SP, DIE &Die) {
+void DwarfDebug::addSubprogramNames(
+ const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind,
+ const DISubprogram *SP, DIE &Die) {
if (getAccelTableKind() != AccelTableKind::Apple &&
- CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Apple &&
- CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+ NameTableKind != DICompileUnit::DebugNameTableKind::Apple &&
+ NameTableKind == DICompileUnit::DebugNameTableKind::None)
return;
if (!SP->isDefinition())
return;
if (SP->getName() != "")
- addAccelName(CU, SP->getName(), Die);
+ addAccelName(Unit, NameTableKind, SP->getName(), Die);
// If the linkage name is different than the name, go ahead and output that as
// well into the name table. Only do that if we are going to actually emit
// that name.
if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
(useAllLinkageNames() || InfoHolder.getAbstractScopeDIEs().lookup(SP)))
- addAccelName(CU, SP->getLinkageName(), Die);
+ addAccelName(Unit, NameTableKind, SP->getLinkageName(), Die);
// If this is an Objective-C selector name add it to the ObjC accelerator
// too.
if (isObjCClass(SP->getName())) {
StringRef Class, Category;
getObjCClassCategory(SP->getName(), Class, Category);
- addAccelObjC(CU, Class, Die);
+ addAccelObjC(Unit, NameTableKind, Class, Die);
if (Category != "")
- addAccelObjC(CU, Category, Die);
+ addAccelObjC(Unit, NameTableKind, Category, Die);
// Also add the base method name to the name table.
- addAccelName(CU, getObjCMethodName(SP->getName()), Die);
+ addAccelName(Unit, NameTableKind, getObjCMethodName(SP->getName()), Die);
}
}
@@ -603,7 +586,7 @@ static const DIExpression *combineDIExpressions(const DIExpression *Original,
std::vector<uint64_t> Elts = Addition->getElements().vec();
// Avoid multiple DW_OP_stack_values.
if (Original->isImplicit() && Addition->isImplicit())
- erase_value(Elts, dwarf::DW_OP_stack_value);
+ llvm::erase(Elts, dwarf::DW_OP_stack_value);
const DIExpression *CombinedExpr =
(Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original;
return CombinedExpr;
@@ -1409,6 +1392,10 @@ void DwarfDebug::finalizeModuleInfo() {
InfoHolder.computeSizeAndOffsets();
if (useSplitDwarf())
SkeletonHolder.computeSizeAndOffsets();
+
+ // Now that offsets are computed, can replace DIEs in debug_names Entry with
+ // an actual offset.
+ AccelDebugNames.convertDieToOffset();
}
// Emit all Dwarf sections that should come after the content.
@@ -1560,29 +1547,42 @@ void DwarfDebug::collectVariableInfoFromMFTable(
}
ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
+
+ // If we have already seen information for this variable, add to what we
+ // already know.
+ if (DbgVariable *PreviousLoc = MFVars.lookup(Var)) {
+ auto *PreviousMMI = std::get_if<Loc::MMI>(PreviousLoc);
+ auto *PreviousEntryValue = std::get_if<Loc::EntryValue>(PreviousLoc);
+ // Previous and new locations are both stack slots (MMI).
+ if (PreviousMMI && VI.inStackSlot())
+ PreviousMMI->addFrameIndexExpr(VI.Expr, VI.getStackSlot());
+ // Previous and new locations are both entry values.
+ else if (PreviousEntryValue && VI.inEntryValueRegister())
+ PreviousEntryValue->addExpr(VI.getEntryValueRegister(), *VI.Expr);
+ else {
+ // Locations differ, this should (rarely) happen in optimized async
+ // coroutines.
+ // Prefer whichever location has an EntryValue.
+ if (PreviousLoc->holds<Loc::MMI>())
+ PreviousLoc->emplace<Loc::EntryValue>(VI.getEntryValueRegister(),
+ *VI.Expr);
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << VI.Var->getName()
+ << ", conflicting fragment location types\n");
+ }
+ continue;
+ }
+
auto RegVar = std::make_unique<DbgVariable>(
cast<DILocalVariable>(Var.first), Var.second);
if (VI.inStackSlot())
- RegVar->initializeMMI(VI.Expr, VI.getStackSlot());
- else {
- MachineLocation MLoc(VI.getEntryValueRegister(), /*IsIndirect*/ true);
- auto LocEntry = DbgValueLocEntry(MLoc);
- RegVar->initializeDbgValue(DbgValueLoc(VI.Expr, LocEntry));
- }
+ RegVar->emplace<Loc::MMI>(VI.Expr, VI.getStackSlot());
+ else
+ RegVar->emplace<Loc::EntryValue>(VI.getEntryValueRegister(), *VI.Expr);
LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName()
<< "\n");
-
- if (DbgVariable *DbgVar = MFVars.lookup(Var)) {
- if (DbgVar->getValueLoc())
- LLVM_DEBUG(dbgs() << "Dropping repeated entry value debug info for "
- "variable "
- << VI.Var->getName() << "\n");
- else
- DbgVar->addMMIEntry(*RegVar);
- } else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
- MFVars.insert({Var, RegVar.get()});
- ConcreteEntities.push_back(std::move(RegVar));
- }
+ InfoHolder.addScopeVariable(Scope, RegVar.get());
+ MFVars.insert({Var, RegVar.get()});
+ ConcreteEntities.push_back(std::move(RegVar));
}
}
@@ -1916,7 +1916,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
const auto *End =
SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
if (validThroughout(LScopes, MInsn, End, getInstOrdering())) {
- RegVar->initializeDbgValue(MInsn);
+ RegVar->emplace<Loc::Single>(MInsn);
continue;
}
}
@@ -1926,7 +1926,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
continue;
// Handle multiple DBG_VALUE instructions describing one variable.
- DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn);
+ DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar);
// Build the location list for this variable.
SmallVector<DebugLocEntry, 8> Entries;
@@ -1936,7 +1936,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// that is valid throughout the variable's scope. If so, produce single
// value location.
if (isValidSingleLocation) {
- RegVar->initializeDbgValue(Entries[0].getValues()[0]);
+ RegVar->emplace<Loc::Single>(Entries[0].getValues()[0]);
continue;
}
@@ -2639,7 +2639,7 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
DIExpressionCursor ExprCursor(DIExpr);
DwarfExpr.addFragmentOffset(DIExpr);
- // If the DIExpr is is an Entry Value, we want to follow the same code path
+ // If the DIExpr is an Entry Value, we want to follow the same code path
// regardless of whether the DBG_VALUE is variadic or not.
if (DIExpr && DIExpr->isEntryValue()) {
// Entry values can only be a single register with no additional DIExpr,
@@ -3448,6 +3448,7 @@ uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
StringRef Identifier, DIE &RefDie,
const DICompositeType *CTy) {
+ setCurrentDWARF5AccelTable(DWARF5AccelTableKind::TU);
// Fast path if we're building some type units and one has already used the
// address pool we know we're going to throw away all this work anyway, so
// don't bother building dependent types.
@@ -3463,8 +3464,8 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
- auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
- getDwoLineTable(CU));
+ auto OwnedUnit = std::make_unique<DwarfTypeUnit>(
+ CU, Asm, this, &InfoHolder, NumTypeUnitsCreated++, getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
@@ -3477,6 +3478,16 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
Ins.first->second = Signature;
if (useSplitDwarf()) {
+ // Although multiple type units can have the same signature, they are not
+ // guranteed to be bit identical. When LLDB uses .debug_names it needs to
+ // know from which CU a type unit came from. These two attrbutes help it to
+ // figure that out.
+ if (getDwarfVersion() >= 5) {
+ if (!CompilationDir.empty())
+ NewTU.addString(UnitDie, dwarf::DW_AT_comp_dir, CompilationDir);
+ NewTU.addString(UnitDie, dwarf::DW_AT_dwo_name,
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
+ }
MCSection *Section =
getDwarfVersion() <= 4
? Asm->getObjFileLowering().getDwarfTypesDWOSection()
@@ -3506,7 +3517,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// Types referencing entries in the address table cannot be placed in type
// units.
if (AddrPool.hasBeenUsed()) {
-
+ AccelTypeUnitsDebugNames.clear();
// Remove all the types built while building this type.
// This is pessimistic as some of these types might not be dependent on
// the type that used an address.
@@ -3517,6 +3528,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// This is inefficient because all the dependent types will be rebuilt
// from scratch, including building them in type units, discovering that
// they depend on addresses, throwing them out and rebuilding them.
+ setCurrentDWARF5AccelTable(DWARF5AccelTableKind::CU);
CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy));
return;
}
@@ -3526,9 +3538,20 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
for (auto &TU : TypeUnitsToAdd) {
InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get());
InfoHolder.emitUnit(TU.first.get(), useSplitDwarf());
+ if (getDwarfVersion() >= 5 &&
+ getAccelTableKind() == AccelTableKind::Dwarf) {
+ if (useSplitDwarf())
+ AccelDebugNames.addTypeUnitSignature(*TU.first);
+ else
+ AccelDebugNames.addTypeUnitSymbol(*TU.first);
+ }
}
+ AccelTypeUnitsDebugNames.convertDieToOffset();
+ AccelDebugNames.addTypeEntries(AccelTypeUnitsDebugNames);
+ AccelTypeUnitsDebugNames.clear();
}
CU.addDIETypeSignature(RefDie, Signature);
+ setCurrentDWARF5AccelTable(DWARF5AccelTableKind::CU);
}
// Add the Name along with its companion DIE to the appropriate accelerator
@@ -3536,15 +3559,16 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// AccelTableKind::Apple, we use the table we got as an argument). If
// accelerator tables are disabled, this function does nothing.
template <typename DataT>
-void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
- AccelTable<DataT> &AppleAccel, StringRef Name,
- const DIE &Die) {
+void DwarfDebug::addAccelNameImpl(
+ const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind,
+ AccelTable<DataT> &AppleAccel, StringRef Name, const DIE &Die) {
if (getAccelTableKind() == AccelTableKind::None || Name.empty())
return;
if (getAccelTableKind() != AccelTableKind::Apple &&
- CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Apple &&
- CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default)
+ NameTableKind != DICompileUnit::DebugNameTableKind::Apple &&
+ NameTableKind != DICompileUnit::DebugNameTableKind::Default)
return;
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
@@ -3554,9 +3578,13 @@ void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
case AccelTableKind::Apple:
AppleAccel.addName(Ref, Die);
break;
- case AccelTableKind::Dwarf:
- AccelDebugNames.addName(Ref, Die);
+ case AccelTableKind::Dwarf: {
+ DWARF5AccelTable &Current = getCurrentDWARF5AccelTable();
+ // The type unit can be discarded, so need to add references to final
+ // acceleration table once we know it's complete and we emit it.
+ Current.addName(Ref, Die, Unit.getUniqueID());
break;
+ }
case AccelTableKind::Default:
llvm_unreachable("Default should have already been resolved.");
case AccelTableKind::None:
@@ -3564,26 +3592,34 @@ void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
}
}
-void DwarfDebug::addAccelName(const DICompileUnit &CU, StringRef Name,
- const DIE &Die) {
- addAccelNameImpl(CU, AccelNames, Name, Die);
+void DwarfDebug::addAccelName(
+ const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind, StringRef Name,
+ const DIE &Die) {
+ addAccelNameImpl(Unit, NameTableKind, AccelNames, Name, Die);
}
-void DwarfDebug::addAccelObjC(const DICompileUnit &CU, StringRef Name,
- const DIE &Die) {
+void DwarfDebug::addAccelObjC(
+ const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind, StringRef Name,
+ const DIE &Die) {
// ObjC names go only into the Apple accelerator tables.
if (getAccelTableKind() == AccelTableKind::Apple)
- addAccelNameImpl(CU, AccelObjC, Name, Die);
+ addAccelNameImpl(Unit, NameTableKind, AccelObjC, Name, Die);
}
-void DwarfDebug::addAccelNamespace(const DICompileUnit &CU, StringRef Name,
- const DIE &Die) {
- addAccelNameImpl(CU, AccelNamespace, Name, Die);
+void DwarfDebug::addAccelNamespace(
+ const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind, StringRef Name,
+ const DIE &Die) {
+ addAccelNameImpl(Unit, NameTableKind, AccelNamespace, Name, Die);
}
-void DwarfDebug::addAccelType(const DICompileUnit &CU, StringRef Name,
- const DIE &Die, char Flags) {
- addAccelNameImpl(CU, AccelTypes, Name, Die);
+void DwarfDebug::addAccelType(
+ const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind, StringRef Name,
+ const DIE &Die, char Flags) {
+ addAccelNameImpl(Unit, NameTableKind, AccelTypes, Name, Die);
}
uint16_t DwarfDebug::getDwarfVersion() const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 1af4b643eb17..452485b632c4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -17,7 +17,6 @@
#include "DebugLocEntry.h"
#include "DebugLocStream.h"
#include "DwarfFile.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
@@ -41,6 +40,7 @@
#include <limits>
#include <memory>
#include <utility>
+#include <variant>
#include <vector>
namespace llvm {
@@ -100,90 +100,154 @@ public:
}
};
-//===----------------------------------------------------------------------===//
-/// This class is used to track local variable information.
-///
-/// Variables can be created from allocas, in which case they're generated from
-/// the MMI table. Such variables can have multiple expressions and frame
-/// indices.
-///
-/// Variables can be created from \c DBG_VALUE instructions. Those whose
-/// location changes over time use \a DebugLocListIndex, while those with a
-/// single location use \a ValueLoc and (optionally) a single entry of \a Expr.
-///
-/// Variables that have been optimized out use none of these fields.
-class DbgVariable : public DbgEntity {
- /// Index of the entry list in DebugLocs.
- unsigned DebugLocListIndex = ~0u;
- /// DW_OP_LLVM_tag_offset value from DebugLocs.
- std::optional<uint8_t> DebugLocListTagOffset;
+class DbgVariable;
- /// Single value location description.
- std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
+bool operator<(const struct FrameIndexExpr &LHS,
+ const struct FrameIndexExpr &RHS);
+bool operator<(const struct EntryValueInfo &LHS,
+ const struct EntryValueInfo &RHS);
- struct FrameIndexExpr {
- int FI;
- const DIExpression *Expr;
- };
- mutable SmallVector<FrameIndexExpr, 1>
- FrameIndexExprs; /// Frame index + expression.
+/// Proxy for one MMI entry.
+struct FrameIndexExpr {
+ int FI;
+ const DIExpression *Expr;
+
+ /// Operator enabling sorting based on fragment offset.
+ friend bool operator<(const FrameIndexExpr &LHS, const FrameIndexExpr &RHS);
+};
+
+/// Represents an entry-value location, or a fragment of one.
+struct EntryValueInfo {
+ MCRegister Reg;
+ const DIExpression &Expr;
+
+ /// Operator enabling sorting based on fragment offset.
+ friend bool operator<(const EntryValueInfo &LHS, const EntryValueInfo &RHS);
+};
+
+// Namespace for alternatives of a DbgVariable.
+namespace Loc {
+/// Single value location description.
+class Single {
+ std::unique_ptr<DbgValueLoc> ValueLoc;
+ const DIExpression *Expr;
public:
- /// Construct a DbgVariable.
- ///
- /// Creates a variable without any DW_AT_location. Call \a initializeMMI()
- /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
- DbgVariable(const DILocalVariable *V, const DILocation *IA)
- : DbgEntity(V, IA, DbgVariableKind) {}
+ explicit Single(DbgValueLoc ValueLoc);
+ explicit Single(const MachineInstr *DbgValue);
+ const DbgValueLoc &getValueLoc() const { return *ValueLoc; }
+ const DIExpression *getExpr() const { return Expr; }
+};
+/// Multi-value location description.
+class Multi {
+ /// Index of the entry list in DebugLocs.
+ unsigned DebugLocListIndex;
+ /// DW_OP_LLVM_tag_offset value from DebugLocs.
+ std::optional<uint8_t> DebugLocListTagOffset;
- /// Initialize from the MMI table.
- void initializeMMI(const DIExpression *E, int FI) {
- assert(FrameIndexExprs.empty() && "Already initialized?");
- assert(!ValueLoc.get() && "Already initialized?");
+public:
+ explicit Multi(unsigned DebugLocListIndex,
+ std::optional<uint8_t> DebugLocListTagOffset)
+ : DebugLocListIndex(DebugLocListIndex),
+ DebugLocListTagOffset(DebugLocListTagOffset) {}
+ unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
+ std::optional<uint8_t> getDebugLocListTagOffset() const {
+ return DebugLocListTagOffset;
+ }
+};
+/// Single location defined by (potentially multiple) MMI entries.
+struct MMI {
+ std::set<FrameIndexExpr> FrameIndexExprs;
+public:
+ explicit MMI(const DIExpression *E, int FI) : FrameIndexExprs({{FI, E}}) {
assert((!E || E->isValid()) && "Expected valid expression");
assert(FI != std::numeric_limits<int>::max() && "Expected valid index");
-
- FrameIndexExprs.push_back({FI, E});
}
+ void addFrameIndexExpr(const DIExpression *Expr, int FI);
+ /// Get the FI entries, sorted by fragment offset.
+ const std::set<FrameIndexExpr> &getFrameIndexExprs() const;
+};
+/// Single location defined by (potentially multiple) EntryValueInfo.
+struct EntryValue {
+ std::set<EntryValueInfo> EntryValues;
+ explicit EntryValue(MCRegister Reg, const DIExpression &Expr) {
+ addExpr(Reg, Expr);
+ };
+ // Add the pair Reg, Expr to the list of entry values describing the variable.
+ // If multiple expressions are added, it is the callers responsibility to
+ // ensure they are all non-overlapping fragments.
+ void addExpr(MCRegister Reg, const DIExpression &Expr) {
+ std::optional<const DIExpression *> NonVariadicExpr =
+ DIExpression::convertToNonVariadicExpression(&Expr);
+ assert(NonVariadicExpr && *NonVariadicExpr);
+
+ EntryValues.insert({Reg, **NonVariadicExpr});
+ }
+};
+/// Alias for the std::variant specialization base class of DbgVariable.
+using Variant = std::variant<std::monostate, Loc::Single, Loc::Multi, Loc::MMI,
+ Loc::EntryValue>;
+} // namespace Loc
- // Initialize variable's location.
- void initializeDbgValue(DbgValueLoc Value) {
- assert(FrameIndexExprs.empty() && "Already initialized?");
- assert(!ValueLoc && "Already initialized?");
- assert(!Value.getExpression()->isFragment() && "Fragments not supported.");
+//===----------------------------------------------------------------------===//
+/// This class is used to track local variable information.
+///
+/// Variables that have been optimized out hold the \c monostate alternative.
+/// This is not distinguished from the case of a constructed \c DbgVariable
+/// which has not be initialized yet.
+///
+/// Variables can be created from allocas, in which case they're generated from
+/// the MMI table. Such variables hold the \c Loc::MMI alternative which can
+/// have multiple expressions and frame indices.
+///
+/// Variables can be created from the entry value of registers, in which case
+/// they're generated from the MMI table. Such variables hold the \c
+/// EntryValueLoc alternative which can either have a single expression or
+/// multiple *fragment* expressions.
+///
+/// Variables can be created from \c DBG_VALUE instructions. Those whose
+/// location changes over time hold a \c Loc::Multi alternative which uses \c
+/// DebugLocListIndex and (optionally) \c DebugLocListTagOffset, while those
+/// with a single location hold a \c Loc::Single alternative which use \c
+/// ValueLoc and (optionally) a single \c Expr.
+class DbgVariable : public DbgEntity, public Loc::Variant {
- ValueLoc = std::make_unique<DbgValueLoc>(Value);
- if (auto *E = ValueLoc->getExpression())
- if (E->getNumElements())
- FrameIndexExprs.push_back({0, E});
+public:
+ /// To workaround P2162R0 https://github.com/cplusplus/papers/issues/873 the
+ /// base class subobject needs to be passed directly to std::visit, so expose
+ /// it directly here.
+ Loc::Variant &asVariant() { return *static_cast<Loc::Variant *>(this); }
+ const Loc::Variant &asVariant() const {
+ return *static_cast<const Loc::Variant *>(this);
+ }
+ /// Member shorthand for std::holds_alternative
+ template <typename T> bool holds() const {
+ return std::holds_alternative<T>(*this);
+ }
+ /// Asserting, noexcept member alternative to std::get
+ template <typename T> auto &get() noexcept {
+ assert(holds<T>());
+ return *std::get_if<T>(this);
+ }
+ /// Asserting, noexcept member alternative to std::get
+ template <typename T> const auto &get() const noexcept {
+ assert(holds<T>());
+ return *std::get_if<T>(this);
}
- /// Initialize from a DBG_VALUE instruction.
- void initializeDbgValue(const MachineInstr *DbgValue);
+ /// Construct a DbgVariable.
+ ///
+ /// Creates a variable without any DW_AT_location.
+ DbgVariable(const DILocalVariable *V, const DILocation *IA)
+ : DbgEntity(V, IA, DbgVariableKind) {}
// Accessors.
const DILocalVariable *getVariable() const {
return cast<DILocalVariable>(getEntity());
}
- const DIExpression *getSingleExpression() const {
- assert(ValueLoc.get() && FrameIndexExprs.size() <= 1);
- return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
- }
-
- void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
- unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
- void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; }
- std::optional<uint8_t> getDebugLocListTagOffset() const {
- return DebugLocListTagOffset;
- }
StringRef getName() const { return getVariable()->getName(); }
- const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
- /// Get the FI entries, sorted by fragment offset.
- ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
- bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
- void addMMIEntry(const DbgVariable &V);
// Translate tag to proper Dwarf tag.
dwarf::Tag getTag() const {
@@ -211,15 +275,6 @@ public:
return false;
}
- bool hasComplexAddress() const {
- assert(ValueLoc.get() && "Expected DBG_VALUE, not MMI variable");
- assert((FrameIndexExprs.empty() ||
- (FrameIndexExprs.size() == 1 &&
- FrameIndexExprs[0].Expr->getNumElements())) &&
- "Invalid Expr for DBG_VALUE");
- return !FrameIndexExprs.empty();
- }
-
const DIType *getType() const;
static bool classof(const DbgEntity *N) {
@@ -353,6 +408,11 @@ class DwarfDebug : public DebugHandlerBase {
std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
TypeUnitsUnderConstruction;
+ /// Used to set a uniqe ID for a Type Unit.
+ /// This counter represents number of DwarfTypeUnits created, not necessarily
+ /// number of type units that will be emitted.
+ unsigned NumTypeUnitsCreated = 0;
+
/// Whether to use the GNU TLS opcode (instead of the standard opcode).
bool UseGNUTLSOpcode;
@@ -393,6 +453,11 @@ public:
Form,
};
+ enum class DWARF5AccelTableKind {
+ CU = 0,
+ TU = 1,
+ };
+
private:
/// Force the use of DW_AT_ranges even for single-entry range lists.
MinimizeAddrInV5 MinimizeAddr = MinimizeAddrInV5::Disabled;
@@ -440,7 +505,10 @@ private:
AddressPool AddrPool;
/// Accelerator tables.
- AccelTable<DWARF5AccelTableData> AccelDebugNames;
+ DWARF5AccelTable AccelDebugNames;
+ DWARF5AccelTable AccelTypeUnitsDebugNames;
+ /// Used to hide which DWARF5AccelTable we are using now.
+ DWARF5AccelTable *CurrentDebugNames = &AccelDebugNames;
AccelTable<AppleAccelTableOffsetData> AccelNames;
AccelTable<AppleAccelTableOffsetData> AccelObjC;
AccelTable<AppleAccelTableOffsetData> AccelNamespace;
@@ -479,8 +547,10 @@ private:
DIE &ScopeDIE, const MachineFunction &MF);
template <typename DataT>
- void addAccelNameImpl(const DICompileUnit &CU, AccelTable<DataT> &AppleAccel,
- StringRef Name, const DIE &Die);
+ void addAccelNameImpl(const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind,
+ AccelTable<DataT> &AppleAccel, StringRef Name,
+ const DIE &Die);
void finishEntityDefinitions();
@@ -724,6 +794,9 @@ public:
/// Returns what kind (if any) of accelerator tables to emit.
AccelTableKind getAccelTableKind() const { return TheAccelTableKind; }
+ /// Seet TheAccelTableKind
+ void setTheAccelTableKind(AccelTableKind K) { TheAccelTableKind = K; };
+
bool useAppleExtensionAttributes() const {
return HasAppleExtensionAttributes;
}
@@ -781,20 +854,27 @@ public:
void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
const DwarfCompileUnit *CU);
- void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
- DIE &Die);
+ void addSubprogramNames(const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind,
+ const DISubprogram *SP, DIE &Die);
AddressPool &getAddressPool() { return AddrPool; }
- void addAccelName(const DICompileUnit &CU, StringRef Name, const DIE &Die);
+ void addAccelName(const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind,
+ StringRef Name, const DIE &Die);
- void addAccelObjC(const DICompileUnit &CU, StringRef Name, const DIE &Die);
+ void addAccelObjC(const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind,
+ StringRef Name, const DIE &Die);
- void addAccelNamespace(const DICompileUnit &CU, StringRef Name,
- const DIE &Die);
+ void addAccelNamespace(const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind,
+ StringRef Name, const DIE &Die);
- void addAccelType(const DICompileUnit &CU, StringRef Name, const DIE &Die,
- char Flags);
+ void addAccelType(const DwarfUnit &Unit,
+ const DICompileUnit::DebugNameTableKind NameTableKind,
+ StringRef Name, const DIE &Die, char Flags);
const MachineFunction *getCurrentFunction() const { return CurFn; }
@@ -842,6 +922,19 @@ public:
MDNodeSet &getLocalDeclsForScope(const DILocalScope *S) {
return LocalDeclsPerLS[S];
}
+
+ /// Sets the current DWARF5AccelTable to use.
+ void setCurrentDWARF5AccelTable(const DWARF5AccelTableKind Kind) {
+ switch (Kind) {
+ case DWARF5AccelTableKind::CU:
+ CurrentDebugNames = &AccelDebugNames;
+ break;
+ case DWARF5AccelTableKind::TU:
+ CurrentDebugNames = &AccelTypeUnitsDebugNames;
+ }
+ }
+ /// Returns either CU or TU DWARF5AccelTable.
+ DWARF5AccelTable &getCurrentDWARF5AccelTable() { return *CurrentDebugNames; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 7623b7fb7c5d..a74d43897d45 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -414,6 +414,7 @@ void DwarfExpression::beginEntryValueExpression(
SavedLocationKind = LocationKind;
LocationKind = Register;
+ LocationFlags |= EntryValue;
IsEmittingEntryValue = true;
enableTemporaryBuffer();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 3fe437a07c92..eab798c0da78 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -102,21 +102,16 @@ void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection,
StrPool.emit(*Asm, StrSection, OffsetSection, UseRelativeOffsets);
}
-bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
auto &ScopeVars = ScopeVariables[LS];
const DILocalVariable *DV = Var->getVariable();
if (unsigned ArgNum = DV->getArg()) {
- auto Cached = ScopeVars.Args.find(ArgNum);
- if (Cached == ScopeVars.Args.end())
- ScopeVars.Args[ArgNum] = Var;
- else {
- Cached->second->addMMIEntry(*Var);
- return false;
- }
+ auto Ret = ScopeVars.Args.insert({ArgNum, Var});
+ assert(Ret.second);
+ (void)Ret;
} else {
ScopeVars.Locals.push_back(Var);
}
- return true;
}
void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 464f4f048016..f76858fc2f36 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -150,8 +150,7 @@ public:
MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; }
void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; }
- /// \returns false if the variable was merged with a previous one.
- bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+ void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
void addScopeLabel(LexicalScope *LS, DbgLabel *Label);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index d30f0ef7af34..d462859e4894 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -16,7 +16,6 @@
#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -81,15 +80,16 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
}
DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
- AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
- : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU) {}
+ AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU,
+ unsigned UniqueID)
+ : DIEUnit(UnitTag), UniqueID(UniqueID), CUNode(Node), Asm(A), DD(DW),
+ DU(DWU) {}
DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU,
+ DwarfDebug *DW, DwarfFile *DWU, unsigned UniqueID,
MCDwarfDwoLineTable *SplitLineTable)
- : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
- SplitLineTable(SplitLineTable) {
-}
+ : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU, UniqueID),
+ CU(CU), SplitLineTable(SplitLineTable) {}
DwarfUnit::~DwarfUnit() {
for (DIEBlock *B : DIEBlocks)
@@ -640,7 +640,8 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
}
unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
- DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);
+ DD->addAccelType(*this, CUNode->getNameTableKind(), Ty->getName(), TyDIE,
+ Flags);
if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
isa<DINamespace>(Context) || isa<DICommonBlock>(Context))
@@ -1112,7 +1113,7 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
addString(NDie, dwarf::DW_AT_name, NS->getName());
else
Name = "(anonymous namespace)";
- DD->addAccelNamespace(*CUNode, Name, NDie);
+ DD->addAccelNamespace(*this, CUNode->getNameTableKind(), Name, NDie);
addGlobalName(Name, NDie, NS->getScope());
if (NS->getExportSymbols())
addFlag(NDie, dwarf::DW_AT_export_symbols);
@@ -1439,7 +1440,8 @@ DIE *DwarfUnit::getIndexTyDie() {
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::getArrayIndexTypeEncoding(
(dwarf::SourceLanguage)getLanguage()));
- DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0);
+ DD->addAccelType(*this, CUNode->getNameTableKind(), Name, *IndexTyDie,
+ /*Flags*/ 0);
return IndexTyDie;
}
@@ -1778,6 +1780,10 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
}
void DwarfTypeUnit::emitHeader(bool UseOffsets) {
+ if (!DD->useSplitDwarf()) {
+ LabelBegin = Asm->createTempSymbol("tu_begin");
+ Asm->OutStreamer->emitLabel(LabelBegin);
+ }
DwarfUnit::emitCommonHeader(UseOffsets,
DD->useSplitDwarf() ? dwarf::DW_UT_split_type
: dwarf::DW_UT_type);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 8f17e94c2d1c..18f50f86ec87 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -34,6 +34,8 @@ class MCSymbol;
/// source file.
class DwarfUnit : public DIEUnit {
protected:
+ /// A numeric ID unique among all CUs in the module
+ unsigned UniqueID;
/// MDNode for the compile unit.
const DICompileUnit *CUNode;
@@ -43,6 +45,9 @@ protected:
/// Target of Dwarf emission.
AsmPrinter *Asm;
+ /// The start of the unit within its section.
+ MCSymbol *LabelBegin = nullptr;
+
/// Emitted at the end of the CU and used to compute the CU Length field.
MCSymbol *EndLabel = nullptr;
@@ -68,8 +73,8 @@ protected:
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const DINode *> ContainingTypeMap;
- DwarfUnit(dwarf::Tag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW,
- DwarfFile *DWU);
+ DwarfUnit(dwarf::Tag, const DICompileUnit *Node, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU, unsigned UniqueID = 0);
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie, bool Minimal);
@@ -92,8 +97,15 @@ protected:
}
public:
+ /// Gets Unique ID for this unit.
+ unsigned getUniqueID() const { return UniqueID; }
// Accessors.
AsmPrinter* getAsmPrinter() const { return Asm; }
+ /// Get the the symbol for start of the section for this unit.
+ MCSymbol *getLabelBegin() const {
+ assert(LabelBegin && "LabelBegin is not initialized");
+ return LabelBegin;
+ }
MCSymbol *getEndLabel() const { return EndLabel; }
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
const DICompileUnit *getCUNode() const { return CUNode; }
@@ -369,9 +381,12 @@ class DwarfTypeUnit final : public DwarfUnit {
public:
DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW,
- DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable = nullptr);
+ DwarfFile *DWU, unsigned UniqueID,
+ MCDwarfDwoLineTable *SplitLineTable = nullptr);
void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
+ /// Returns Type Signature.
+ uint64_t getTypeSignature() const { return TypeSignature; }
void setType(const DIE *Ty) { this->Ty = Ty; }
/// Emit the header for this unit, not including the initial length field.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index eef6b1d93f36..32239535e4d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -13,7 +13,6 @@
#include "EHStreamer.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 5d813b72c0b7..cd18703b359e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -75,7 +75,7 @@ static bool isPossibleIndirectCallTarget(const Function *F) {
}
MCSymbol *WinCFGuard::lookupImpSymbol(const MCSymbol *Sym) {
- if (Sym->getName().startswith("__imp_"))
+ if (Sym->getName().starts_with("__imp_"))
return nullptr;
return Asm->OutContext.lookupSymbol(Twine("__imp_") + Sym->getName());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index 5ef850d09d92..ad3ad9928987 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -5,7 +5,6 @@
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/Analysis/Interval.h"
@@ -26,6 +25,7 @@
#include <assert.h>
#include <cstdint>
#include <optional>
+#include <queue>
#include <sstream>
#include <unordered_map>
@@ -1979,20 +1979,23 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
I, Fn.getParent()->getDataLayout())) {
// Find markers linked to this alloca.
for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(Info->Base)) {
- // Discard the fragment if it covers the entire variable.
- std::optional<DIExpression::FragmentInfo> FragInfo =
- [&Info, DAI]() -> std::optional<DIExpression::FragmentInfo> {
- DIExpression::FragmentInfo F;
- F.OffsetInBits = Info->OffsetInBits;
- F.SizeInBits = Info->SizeInBits;
- if (auto ExistingFrag = DAI->getExpression()->getFragmentInfo())
- F.OffsetInBits += ExistingFrag->OffsetInBits;
- if (auto Sz = DAI->getVariable()->getSizeInBits()) {
- if (F.OffsetInBits == 0 && F.SizeInBits == *Sz)
- return std::nullopt;
- }
- return F;
- }();
+ std::optional<DIExpression::FragmentInfo> FragInfo;
+
+ // Skip this assignment if the affected bits are outside of the
+ // variable fragment.
+ if (!at::calculateFragmentIntersect(
+ I.getModule()->getDataLayout(), Info->Base,
+ Info->OffsetInBits, Info->SizeInBits, DAI, FragInfo) ||
+ (FragInfo && FragInfo->SizeInBits == 0))
+ continue;
+
+ // FragInfo from calculateFragmentIntersect is nullopt if the
+ // resultant fragment matches DAI's fragment or entire variable - in
+ // which case copy the fragment info from DAI. If FragInfo is still
+ // nullopt after the copy it means "no fragment info" instead, which
+ // is how it is usually interpreted.
+ if (!FragInfo)
+ FragInfo = DAI->getExpression()->getFragmentInfo();
DebugVariable DV = DebugVariable(DAI->getVariable(), FragInfo,
DAI->getDebugLoc().getInlinedAt());
@@ -2266,14 +2269,14 @@ static bool
removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
FunctionVarLocsBuilder &FnVarLocs) {
bool Changed = false;
- SmallDenseMap<DebugAggregate, BitVector> VariableDefinedBits;
+ SmallDenseMap<DebugAggregate, BitVector> VariableDefinedBytes;
// Scan over the entire block, not just over the instructions mapped by
// FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
// instructions.
for (const Instruction &I : reverse(*BB)) {
if (!isa<DbgVariableIntrinsic>(I)) {
// Sequence of consecutive defs ended. Clear map for the next one.
- VariableDefinedBits.clear();
+ VariableDefinedBytes.clear();
}
// Get the location defs that start just before this instruction.
@@ -2292,9 +2295,15 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
DebugAggregate Aggr =
getAggregate(FnVarLocs.getVariable(RIt->VariableID));
uint64_t SizeInBits = Aggr.first->getSizeInBits().value_or(0);
+ uint64_t SizeInBytes = divideCeil(SizeInBits, 8);
+
+ // Cutoff for large variables to prevent expensive bitvector operations.
+ const uint64_t MaxSizeBytes = 2048;
- if (SizeInBits == 0) {
+ if (SizeInBytes == 0 || SizeInBytes > MaxSizeBytes) {
// If the size is unknown (0) then keep this location def to be safe.
+ // Do the same for defs of large variables, which would be expensive
+ // to represent with a BitVector.
NewDefsReversed.push_back(*RIt);
continue;
}
@@ -2302,23 +2311,24 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
// Only keep this location definition if it is not fully eclipsed by
// other definitions in this wedge that come after it
- // Inert the bits the location definition defines.
+ // Inert the bytes the location definition defines.
auto InsertResult =
- VariableDefinedBits.try_emplace(Aggr, BitVector(SizeInBits));
+ VariableDefinedBytes.try_emplace(Aggr, BitVector(SizeInBytes));
bool FirstDefinition = InsertResult.second;
- BitVector &DefinedBits = InsertResult.first->second;
+ BitVector &DefinedBytes = InsertResult.first->second;
DIExpression::FragmentInfo Fragment =
RIt->Expr->getFragmentInfo().value_or(
DIExpression::FragmentInfo(SizeInBits, 0));
bool InvalidFragment = Fragment.endInBits() > SizeInBits;
+ uint64_t StartInBytes = Fragment.startInBits() / 8;
+ uint64_t EndInBytes = divideCeil(Fragment.endInBits(), 8);
- // If this defines any previously undefined bits, keep it.
+ // If this defines any previously undefined bytes, keep it.
if (FirstDefinition || InvalidFragment ||
- DefinedBits.find_first_unset_in(Fragment.startInBits(),
- Fragment.endInBits()) != -1) {
+ DefinedBytes.find_first_unset_in(StartInBytes, EndInBytes) != -1) {
if (!InvalidFragment)
- DefinedBits.set(Fragment.startInBits(), Fragment.endInBits());
+ DefinedBytes.set(StartInBytes, EndInBytes);
NewDefsReversed.push_back(*RIt);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 80a0bb957cfc..ccf3e9ec6492 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -373,10 +373,8 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
ReplacementIRBuilder Builder(LI, *DL);
Value *Addr = LI->getPointerOperand();
- Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
- Value *NewAddr = Builder.CreateBitCast(Addr, PT);
- auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
+ auto *NewLI = Builder.CreateLoad(NewTy, Addr);
NewLI->setAlignment(LI->getAlign());
NewLI->setVolatile(LI->isVolatile());
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
@@ -398,14 +396,12 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
Value *Addr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
- Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
- Value *NewAddr = Builder.CreateBitCast(Addr, PT);
Value *NewVal = Val->getType()->isPointerTy()
? Builder.CreatePtrToInt(Val, NewTy)
: Builder.CreateBitCast(Val, NewTy);
auto *NewRMWI =
- Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
RMWI->getAlign(), RMWI->getOrdering());
NewRMWI->setVolatile(RMWI->isVolatile());
LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
@@ -508,10 +504,8 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
Value *Addr = SI->getPointerOperand();
- Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
- Value *NewAddr = Builder.CreateBitCast(Addr, PT);
- StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
+ StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
NewSI->setAlignment(SI->getAlign());
NewSI->setVolatile(SI->isVolatile());
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
@@ -553,8 +547,6 @@ static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
bool NeedBitcast = OrigTy->isFloatingPointTy();
if (NeedBitcast) {
IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
- unsigned AS = Addr->getType()->getPointerAddressSpace();
- Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
NewVal = Builder.CreateBitCast(NewVal, IntTy);
Loaded = Builder.CreateBitCast(Loaded, IntTy);
}
@@ -727,7 +719,6 @@ static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
assert(ValueSize < MinWordSize);
PointerType *PtrTy = cast<PointerType>(Addr->getType());
- Type *WordPtrType = PMV.WordType->getPointerTo(PtrTy->getAddressSpace());
IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
Value *PtrLSB;
@@ -761,10 +752,6 @@ static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
- // Cast for typed pointers.
- PMV.AlignedAddr =
- Builder.CreateBitCast(PMV.AlignedAddr, WordPtrType, "AlignedAddr");
-
return PMV;
}
@@ -924,9 +911,10 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
else
NewOperand = ValOperand_Shifted;
- AtomicRMWInst *NewAI =
- Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
- PMV.AlignedAddrAlignment, AI->getOrdering());
+ AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
+ Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
+ AI->getOrdering(), AI->getSyncScopeID());
+ // TODO: Preserve metadata
Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
AI->replaceAllUsesWith(FinalOldResult);
@@ -1188,14 +1176,12 @@ AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
ReplacementIRBuilder Builder(CI, *DL);
Value *Addr = CI->getPointerOperand();
- Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
- Value *NewAddr = Builder.CreateBitCast(Addr, PT);
Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
auto *NewCI = Builder.CreateAtomicCmpXchg(
- NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
+ Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
CI->getFailureOrdering(), CI->getSyncScopeID());
NewCI->setVolatile(CI->isVolatile());
NewCI->setWeak(CI->isWeak());
@@ -1848,11 +1834,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// variables.
AllocaInst *AllocaCASExpected = nullptr;
- Value *AllocaCASExpected_i8 = nullptr;
AllocaInst *AllocaValue = nullptr;
- Value *AllocaValue_i8 = nullptr;
AllocaInst *AllocaResult = nullptr;
- Value *AllocaResult_i8 = nullptr;
Type *ResultTy;
SmallVector<Value *, 6> Args;
@@ -1869,23 +1852,17 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// implementation and that addresses are convertable. For systems without
// that property, we'd need to extend this mechanism to support AS-specific
// families of atomic intrinsics.
- auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
- Value *PtrVal =
- Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS));
- PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
+ Value *PtrVal = PointerOperand;
+ PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
Args.push_back(PtrVal);
// 'expected' argument, if present.
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
AllocaCASExpected->setAlignment(AllocaAlignment);
- unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
-
- AllocaCASExpected_i8 = Builder.CreateBitCast(
- AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS));
- Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
+ Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
- Args.push_back(AllocaCASExpected_i8);
+ Args.push_back(AllocaCASExpected);
}
// 'val' argument ('desired' for cas), if present.
@@ -1897,11 +1874,9 @@ bool AtomicExpand::expandAtomicOpToLibcall(
} else {
AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
AllocaValue->setAlignment(AllocaAlignment);
- AllocaValue_i8 =
- Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
- Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
+ Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
- Args.push_back(AllocaValue_i8);
+ Args.push_back(AllocaValue);
}
}
@@ -1909,11 +1884,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
AllocaResult->setAlignment(AllocaAlignment);
- unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
- AllocaResult_i8 =
- Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
- Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
- Args.push_back(AllocaResult_i8);
+ Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
+ Args.push_back(AllocaResult);
}
// 'ordering' ('success_order' for cas) argument.
@@ -1945,7 +1917,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// And then, extract the results...
if (ValueOperand && !UseSizedLibcall)
- Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
+ Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
if (CASExpected) {
// The final result from the CAS is {load of 'expected' alloca, bool result
@@ -1954,7 +1926,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
Value *V = PoisonValue::get(FinalResultTy);
Value *ExpectedOut = Builder.CreateAlignedLoad(
CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
- Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
+ Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
V = Builder.CreateInsertValue(V, ExpectedOut, 0);
V = Builder.CreateInsertValue(V, Result, 1);
I->replaceAllUsesWith(V);
@@ -1965,7 +1937,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
else {
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
AllocaAlignment);
- Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
+ Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
}
I->replaceAllUsesWith(V);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
new file mode 100644
index 000000000000..5d5f3c3da481
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
@@ -0,0 +1,245 @@
+//===-- BasicBlockPathCloning.cpp ---=========-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// BasicBlockPathCloning implementation.
+///
+/// The purpose of this pass is to clone basic block paths based on information
+/// provided by the -fbasic-block-sections=list option.
+/// Please refer to BasicBlockSectionsProfileReader.cpp to see a path cloning
+/// example.
+//===----------------------------------------------------------------------===//
+// This pass clones the machine basic blocks alongs the given paths and sets up
+// the CFG. It assigns BBIDs to the cloned blocks so that the
+// `BasicBlockSections` pass can correctly map the cluster information to the
+// blocks. The cloned block's BBID will have the same BaseID as the original
+// block, but will get a unique non-zero CloneID (original blocks all have zero
+// CloneIDs). This pass applies a path cloning if it satisfies the following
+// conditions:
+// 1. All BBIDs in the path should be mapped to existing blocks.
+// 2. Each two consecutive BBIDs in the path must have a successor
+// relationship in the CFG.
+// 3. The path should not include a block with indirect branches, except for
+// the last block.
+// If a path does not satisfy all three conditions, it will be rejected, but the
+// CloneIDs for its (supposed to be cloned) blocks will be bypassed to make sure
+// that the `BasicBlockSections` pass can map cluster info correctly to the
+// actually-cloned blocks.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+// Clones the given block and assigns the given `CloneID` to its BBID. Copies
+// the instructions into the new block and sets up its successors.
+MachineBasicBlock *CloneMachineBasicBlock(MachineBasicBlock &OrigBB,
+ unsigned CloneID) {
+ auto &MF = *OrigBB.getParent();
+ auto TII = MF.getSubtarget().getInstrInfo();
+ // Create the clone block and set its BBID based on the original block.
+ MachineBasicBlock *CloneBB = MF.CreateMachineBasicBlock(
+ OrigBB.getBasicBlock(), UniqueBBID{OrigBB.getBBID()->BaseID, CloneID});
+ MF.push_back(CloneBB);
+
+ // Copy the instructions.
+ for (auto &I : OrigBB.instrs()) {
+ // Bundled instructions are duplicated together.
+ if (I.isBundledWithPred())
+ continue;
+ TII->duplicate(*CloneBB, CloneBB->end(), I);
+ }
+
+ // Add the successors of the original block as the new block's successors.
+ // We set the predecessor after returning from this call.
+ for (auto SI = OrigBB.succ_begin(), SE = OrigBB.succ_end(); SI != SE; ++SI)
+ CloneBB->copySuccessor(&OrigBB, SI);
+
+ if (auto FT = OrigBB.getFallThrough(/*JumpToFallThrough=*/false)) {
+ // The original block has an implicit fall through.
+ // Insert an explicit unconditional jump from the cloned block to the
+ // fallthrough block. Technically, this is only needed for the last block
+ // of the path, but we do it for all clones for consistency.
+ TII->insertUnconditionalBranch(*CloneBB, FT, CloneBB->findBranchDebugLoc());
+ }
+ return CloneBB;
+}
+
+// Returns if we can legally apply the cloning represented by `ClonePath`.
+// `BBIDToBlock` contains the original basic blocks in function `MF` keyed by
+// their `BBID::BaseID`.
+bool IsValidCloning(const MachineFunction &MF,
+ const DenseMap<unsigned, MachineBasicBlock *> &BBIDToBlock,
+ const SmallVector<unsigned> &ClonePath) {
+ const MachineBasicBlock *PrevBB = nullptr;
+ for (size_t I = 0; I < ClonePath.size(); ++I) {
+ unsigned BBID = ClonePath[I];
+ const MachineBasicBlock *PathBB = BBIDToBlock.lookup(BBID);
+ if (!PathBB) {
+ WithColor::warning() << "no block with id " << BBID << " in function "
+ << MF.getName() << "\n";
+ return false;
+ }
+
+ if (PrevBB) {
+ if (!PrevBB->isSuccessor(PathBB)) {
+ WithColor::warning()
+ << "block #" << BBID << " is not a successor of block #"
+ << PrevBB->getBBID()->BaseID << " in function " << MF.getName()
+ << "\n";
+ return false;
+ }
+
+ for (auto &MI : *PathBB) {
+ // Avoid cloning when the block contains non-duplicable instructions.
+ // CFI instructions are marked as non-duplicable only because of Darwin,
+ // so we exclude them from this check.
+ if (MI.isNotDuplicable() && !MI.isCFIInstruction()) {
+ WithColor::warning()
+ << "block #" << BBID
+ << " has non-duplicable instructions in function " << MF.getName()
+ << "\n";
+ return false;
+ }
+ }
+ }
+
+ if (I != ClonePath.size() - 1 && !PathBB->empty() &&
+ PathBB->back().isIndirectBranch()) {
+ WithColor::warning()
+ << "block #" << BBID
+ << " has indirect branch and appears as the non-tail block of a "
+ "path in function "
+ << MF.getName() << "\n";
+ return false;
+ }
+ PrevBB = PathBB;
+ }
+ return true;
+}
+
+// Applies all clonings specified in `ClonePaths` to `MF`. Returns true
+// if any clonings have been applied.
+bool ApplyCloning(MachineFunction &MF,
+ const SmallVector<SmallVector<unsigned>> &ClonePaths) {
+ if (ClonePaths.empty())
+ return false;
+ bool AnyPathsCloned = false;
+ // Map from the final BB IDs to the `MachineBasicBlock`s.
+ DenseMap<unsigned, MachineBasicBlock *> BBIDToBlock;
+ for (auto &BB : MF)
+ BBIDToBlock.try_emplace(BB.getBBID()->BaseID, &BB);
+
+ DenseMap<unsigned, unsigned> NClonesForBBID;
+ auto TII = MF.getSubtarget().getInstrInfo();
+ for (const auto &ClonePath : ClonePaths) {
+ if (!IsValidCloning(MF, BBIDToBlock, ClonePath)) {
+ // We still need to increment the number of clones so we can map
+ // to the cluster info correctly.
+ for (unsigned BBID : ClonePath)
+ ++NClonesForBBID[BBID];
+ continue;
+ }
+ MachineBasicBlock *PrevBB = nullptr;
+ for (unsigned BBID : ClonePath) {
+ MachineBasicBlock *OrigBB = BBIDToBlock.at(BBID);
+ if (PrevBB == nullptr) {
+ // The first block in the path is not cloned. We only need to make it
+ // branch to the next cloned block in the path. Here, we make its
+ // fallthrough explicit so we can change it later.
+ if (auto FT = OrigBB->getFallThrough(/*JumpToFallThrough=*/false)) {
+ TII->insertUnconditionalBranch(*OrigBB, FT,
+ OrigBB->findBranchDebugLoc());
+ }
+ PrevBB = OrigBB;
+ continue;
+ }
+ MachineBasicBlock *CloneBB =
+ CloneMachineBasicBlock(*OrigBB, ++NClonesForBBID[BBID]);
+
+ // Set up the previous block in the path to jump to the clone. This also
+ // transfers the successor/predecessor relationship of PrevBB and OrigBB
+ // to that of PrevBB and CloneBB.
+ PrevBB->ReplaceUsesOfBlockWith(OrigBB, CloneBB);
+
+ // Copy the livein set.
+ for (auto &LiveIn : OrigBB->liveins())
+ CloneBB->addLiveIn(LiveIn);
+
+ PrevBB = CloneBB;
+ }
+ AnyPathsCloned = true;
+ }
+ return AnyPathsCloned;
+}
+} // end anonymous namespace
+
+namespace llvm {
+class BasicBlockPathCloning : public MachineFunctionPass {
+public:
+ static char ID;
+
+ BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
+
+ BasicBlockPathCloning() : MachineFunctionPass(ID) {
+ initializeBasicBlockPathCloningPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Basic Block Path Cloning"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Identify basic blocks that need separate sections and prepare to emit them
+ /// accordingly.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // namespace llvm
+
+char BasicBlockPathCloning::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ BasicBlockPathCloning, "bb-path-cloning",
+ "Applies path clonings for the -basic-block-sections=list option", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
+INITIALIZE_PASS_END(
+ BasicBlockPathCloning, "bb-path-cloning",
+ "Applies path clonings for the -basic-block-sections=list option", false,
+ false)
+
+bool BasicBlockPathCloning::runOnMachineFunction(MachineFunction &MF) {
+ assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+ "BB Sections list not enabled!");
+ if (hasInstrProfHashMismatch(MF))
+ return false;
+
+ return ApplyCloning(MF, getAnalysis<BasicBlockSectionsProfileReader>()
+ .getClonePathsForFunction(MF.getName()));
+}
+
+void BasicBlockPathCloning::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<BasicBlockSectionsProfileReader>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineFunctionPass *llvm::createBasicBlockPathCloningPass() {
+ return new BasicBlockPathCloning();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
index 6967ca5160c0..42997d2287d6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -168,31 +168,6 @@ updateBranches(MachineFunction &MF,
}
}
-// This function provides the BBCluster information associated with a function.
-// Returns true if a valid association exists and false otherwise.
-bool getBBClusterInfoForFunction(
- const MachineFunction &MF,
- BasicBlockSectionsProfileReader *BBSectionsProfileReader,
- DenseMap<unsigned, BBClusterInfo> &V) {
-
- // Find the assoicated cluster information.
- std::pair<bool, SmallVector<BBClusterInfo, 4>> P =
- BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName());
- if (!P.first)
- return false;
-
- if (P.second.empty()) {
- // This indicates that sections are desired for all basic blocks of this
- // function. We clear the BBClusterInfo vector to denote this.
- V.clear();
- return true;
- }
-
- for (const BBClusterInfo &BBCI : P.second)
- V[BBCI.BBID] = BBCI;
- return true;
-}
-
// This function sorts basic blocks according to the cluster's information.
// All explicitly specified clusters of basic blocks will be ordered
// accordingly. All non-specified BBs go into a separate "Cold" section.
@@ -200,12 +175,12 @@ bool getBBClusterInfoForFunction(
// clusters, they are moved into a single "Exception" section. Eventually,
// clusters are ordered in increasing order of their IDs, with the "Exception"
// and "Cold" succeeding all other clusters.
-// FuncBBClusterInfo represent the cluster information for basic blocks. It
+// FuncClusterInfo represents the cluster information for basic blocks. It
// maps from BBID of basic blocks to their cluster information. If this is
// empty, it means unique sections for all basic blocks in the function.
static void
assignSections(MachineFunction &MF,
- const DenseMap<unsigned, BBClusterInfo> &FuncBBClusterInfo) {
+ const DenseMap<UniqueBBID, BBClusterInfo> &FuncClusterInfo) {
assert(MF.hasBBSections() && "BB Sections is not set for function.");
// This variable stores the section ID of the cluster containing eh_pads (if
// all eh_pads are one cluster). If more than one cluster contain eh_pads, we
@@ -216,19 +191,17 @@ assignSections(MachineFunction &MF,
// With the 'all' option, every basic block is placed in a unique section.
// With the 'list' option, every basic block is placed in a section
// associated with its cluster, unless we want individual unique sections
- // for every basic block in this function (if FuncBBClusterInfo is empty).
+ // for every basic block in this function (if FuncClusterInfo is empty).
if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All ||
- FuncBBClusterInfo.empty()) {
+ FuncClusterInfo.empty()) {
// If unique sections are desired for all basic blocks of the function, we
// set every basic block's section ID equal to its original position in
// the layout (which is equal to its number). This ensures that basic
// blocks are ordered canonically.
MBB.setSectionID(MBB.getNumber());
} else {
- // TODO: Replace `getBBIDOrNumber` with `getBBID` once version 1 is
- // deprecated.
- auto I = FuncBBClusterInfo.find(MBB.getBBIDOrNumber());
- if (I != FuncBBClusterInfo.end()) {
+ auto I = FuncClusterInfo.find(*MBB.getBBID());
+ if (I != FuncClusterInfo.end()) {
MBB.setSectionID(I->second.ClusterID);
} else {
// BB goes into the special cold section if it is not specified in the
@@ -260,7 +233,8 @@ void llvm::sortBasicBlocksAndUpdateBranches(
[[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front();
SmallVector<MachineBasicBlock *> PreLayoutFallThroughs(MF.getNumBlockIDs());
for (auto &MBB : MF)
- PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
+ PreLayoutFallThroughs[MBB.getNumber()] =
+ MBB.getFallThrough(/*JumpToFallThrough=*/false);
MF.sort(MBBCmp);
assert(&MF.front() == EntryBlock &&
@@ -285,19 +259,12 @@ void llvm::avoidZeroOffsetLandingPad(MachineFunction &MF) {
MachineBasicBlock::iterator MI = MBB.begin();
while (!MI->isEHLabel())
++MI;
- MCInst Nop = MF.getSubtarget().getInstrInfo()->getNop();
- BuildMI(MBB, MI, DebugLoc(),
- MF.getSubtarget().getInstrInfo()->get(Nop.getOpcode()));
+ MF.getSubtarget().getInstrInfo()->insertNoop(MBB, MI);
}
}
}
-// This checks if the source of this function has drifted since this binary was
-// profiled previously. For now, we are piggy backing on what PGO does to
-// detect this with instrumented profiles. PGO emits an hash of the IR and
-// checks if the hash has changed. Advanced basic block layout is usually done
-// on top of PGO optimized binaries and hence this check works well in practice.
-static bool hasInstrProfHashMismatch(MachineFunction &MF) {
+bool llvm::hasInstrProfHashMismatch(MachineFunction &MF) {
if (!BBSectionsDetectSourceDrift)
return false;
@@ -318,7 +285,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
assert(BBSectionsType != BasicBlockSection::None &&
"BB Sections not enabled!");
- // Check for source drift. If the source has changed since the profiles
+ // Check for source drift. If the source has changed since the profiles
// were obtained, optimizing basic blocks might be sub-optimal.
// This only applies to BasicBlockSection::List as it creates
// clusters of basic blocks using basic block ids. Source drift can
@@ -326,32 +293,30 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
// regards to performance.
if (BBSectionsType == BasicBlockSection::List &&
hasInstrProfHashMismatch(MF))
- return true;
- // Renumber blocks before sorting them. This is useful during sorting,
- // basic blocks in the same section will retain the default order.
- // This renumbering should also be done for basic block labels to match the
- // profiles with the correct blocks.
- // For LLVM_BB_ADDR_MAP versions 2 and higher, this renumbering serves
- // the different purpose of accessing the original layout positions and
- // finding the original fallthroughs.
- // TODO: Change the above comment accordingly when version 1 is deprecated.
+ return false;
+ // Renumber blocks before sorting them. This is useful for accessing the
+ // original layout positions and finding the original fallthroughs.
MF.RenumberBlocks();
if (BBSectionsType == BasicBlockSection::Labels) {
MF.setBBSectionsType(BBSectionsType);
- return true;
+ return false;
}
- BBSectionsProfileReader = &getAnalysis<BasicBlockSectionsProfileReader>();
+ DenseMap<UniqueBBID, BBClusterInfo> FuncClusterInfo;
+ if (BBSectionsType == BasicBlockSection::List) {
+ auto [HasProfile, ClusterInfo] =
+ getAnalysis<BasicBlockSectionsProfileReader>()
+ .getClusterInfoForFunction(MF.getName());
+ if (!HasProfile)
+ return false;
+ for (auto &BBClusterInfo : ClusterInfo) {
+ FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo);
+ }
+ }
- // Map from BBID of blocks to their cluster information.
- DenseMap<unsigned, BBClusterInfo> FuncBBClusterInfo;
- if (BBSectionsType == BasicBlockSection::List &&
- !getBBClusterInfoForFunction(MF, BBSectionsProfileReader,
- FuncBBClusterInfo))
- return true;
MF.setBBSectionsType(BBSectionsType);
- assignSections(MF, FuncBBClusterInfo);
+ assignSections(MF, FuncClusterInfo);
// We make sure that the cluster including the entry basic block precedes all
// other clusters.
@@ -385,8 +350,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
// If the two basic block are in the same section, the order is decided by
// their position within the section.
if (XSectionID.Type == MBBSectionID::SectionType::Default)
- return FuncBBClusterInfo.lookup(X.getBBIDOrNumber()).PositionInCluster <
- FuncBBClusterInfo.lookup(Y.getBBIDOrNumber()).PositionInCluster;
+ return FuncClusterInfo.lookup(*X.getBBID()).PositionInCluster <
+ FuncClusterInfo.lookup(*Y.getBBID()).PositionInCluster;
return X.getNumber() < Y.getNumber();
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 5dede452ec34..15b6f63e8632 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -13,14 +13,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
@@ -33,49 +35,224 @@ INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
"Reads and parses a basic block sections profile.", false,
false)
+Expected<UniqueBBID>
+BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const {
+ SmallVector<StringRef, 2> Parts;
+ S.split(Parts, '.');
+ if (Parts.size() > 2)
+ return createProfileParseError(Twine("unable to parse basic block id: '") +
+ S + "'");
+ unsigned long long BaseBBID;
+ if (getAsUnsignedInteger(Parts[0], 10, BaseBBID))
+ return createProfileParseError(
+ Twine("unable to parse BB id: '" + Parts[0]) +
+ "': unsigned integer expected");
+ unsigned long long CloneID = 0;
+ if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID))
+ return createProfileParseError(Twine("unable to parse clone id: '") +
+ Parts[1] + "': unsigned integer expected");
+ return UniqueBBID{static_cast<unsigned>(BaseBBID),
+ static_cast<unsigned>(CloneID)};
+}
+
bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
- return getBBClusterInfoForFunction(FuncName).first;
+ return getClusterInfoForFunction(FuncName).first;
}
std::pair<bool, SmallVector<BBClusterInfo>>
-BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
+BasicBlockSectionsProfileReader::getClusterInfoForFunction(
StringRef FuncName) const {
- auto R = ProgramBBClusterInfo.find(getAliasName(FuncName));
- return R != ProgramBBClusterInfo.end()
- ? std::pair(true, R->second)
- : std::pair(false, SmallVector<BBClusterInfo>{});
+ auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
+ return R != ProgramPathAndClusterInfo.end()
+ ? std::pair(true, R->second.ClusterInfo)
+ : std::pair(false, SmallVector<BBClusterInfo>());
}
-// Basic Block Sections can be enabled for a subset of machine basic blocks.
-// This is done by passing a file containing names of functions for which basic
-// block sections are desired. Additionally, machine basic block ids of the
-// functions can also be specified for a finer granularity. Moreover, a cluster
-// of basic blocks could be assigned to the same section.
-// Optionally, a debug-info filename can be specified for each function to allow
-// distinguishing internal-linkage functions of the same name.
-// A file with basic block sections for all of function main and three blocks
-// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
-// (Profile for function foo is only loaded when its debug-info filename
-// matches 'path/to/foo_file.cc').
-// ----------------------------
-// list.txt:
-// !main
-// !foo M=path/to/foo_file.cc
-// !!1 2
-// !!4
-Error BasicBlockSectionsProfileReader::ReadProfile() {
- assert(MBuf);
- line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
+SmallVector<SmallVector<unsigned>>
+BasicBlockSectionsProfileReader::getClonePathsForFunction(
+ StringRef FuncName) const {
+ return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths;
+}
+
+// Reads the version 1 basic block sections profile. Profile for each function
+// is encoded as follows:
+// m <module_name>
+// f <function_name_1> <function_name_2> ...
+// c <bb_id_1> <bb_id_2> <bb_id_3>
+// c <bb_id_4> <bb_id_5>
+// ...
+// Module name specifier (starting with 'm') is optional and allows
+// distinguishing profile for internal-linkage functions with the same name. If
+// not specified, it will apply to any function with the same name. Function
+// name specifier (starting with 'f') can specify multiple function name
+// aliases. Basic block clusters are specified by 'c' and specify the cluster of
+// basic blocks, and the internal order in which they must be placed in the same
+// section.
+// This profile can also specify cloning paths which instruct the compiler to
+// clone basic blocks along a path. The cloned blocks are then specified in the
+// cluster information.
+// The following profile lists two cloning paths (starting with 'p') for
+// function bar and places the total 9 blocks within two clusters. The first two
+// blocks of a cloning path specify the edge along which the path is cloned. For
+// instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along
+// the edge 1->3. Within the given clusters, each cloned block is identified by
+// "<original block id>.<clone id>". For instance, 3.1 represents the first
+// clone of block 3. Original blocks are specified just with their block ids. A
+// block cloned multiple times appears with distinct clone ids. The CFG for bar
+// is shown below before and after cloning with its final clusters labeled.
+//
+// f main
+// f bar
+// p 1 3 4 # cloning path 1
+// p 4 2 # cloning path 2
+// c 1 3.1 4.1 6 # basic block cluster 1
+// c 0 2 3 4 2.1 5 # basic block cluster 2
+// ****************************************************************************
+// function bar before and after cloning with basic block clusters shown.
+// ****************************************************************************
+// .... ..............
+// 0 -------+ : 0 :---->: 1 ---> 3.1 :
+// | | : | : :........ | :
+// v v : v : : v :
+// +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1
+// | | | | : | : : | :
+// | v | | : v ....... : v :
+// | 3 <------+ | : 3 <--+ : : 6 :
+// | | | : | | : :....:
+// | v | : v | :
+// +--- 4 ---> 6 | : 4 | :
+// | : | | :
+// | : v | :
+// | :2.1---+ : cluster 2
+// | : | ......:
+// | : v :
+// +-->: 5 :
+// ....
+// ****************************************************************************
+Error BasicBlockSectionsProfileReader::ReadV1Profile() {
+ auto FI = ProgramPathAndClusterInfo.end();
+
+ // Current cluster ID corresponding to this function.
+ unsigned CurrentCluster = 0;
+ // Current position in the current cluster.
+ unsigned CurrentPosition = 0;
- auto invalidProfileError = [&](auto Message) {
- return make_error<StringError>(
- Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
- Twine(LineIt.line_number()) + ": " + Message),
- inconvertibleErrorCode());
- };
+ // Temporary set to ensure every basic block ID appears once in the clusters
+ // of a function.
+ DenseSet<UniqueBBID> FuncBBIDs;
- auto FI = ProgramBBClusterInfo.end();
+ // Debug-info-based module filename for the current function. Empty string
+ // means no filename.
+ StringRef DIFilename;
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ StringRef S(*LineIt);
+ char Specifier = S[0];
+ S = S.drop_front().trim();
+ SmallVector<StringRef, 4> Values;
+ S.split(Values, ' ');
+ switch (Specifier) {
+ case '@':
+ continue;
+ case 'm': // Module name speicifer.
+ if (Values.size() != 1) {
+ return createProfileParseError(Twine("invalid module name value: '") +
+ S + "'");
+ }
+ DIFilename = sys::path::remove_leading_dotslash(Values[0]);
+ continue;
+ case 'f': { // Function names specifier.
+ bool FunctionFound = any_of(Values, [&](StringRef Alias) {
+ auto It = FunctionNameToDIFilename.find(Alias);
+ // No match if this function name is not found in this module.
+ if (It == FunctionNameToDIFilename.end())
+ return false;
+ // Return a match if debug-info-filename is not specified. Otherwise,
+ // check for equality.
+ return DIFilename.empty() || It->second.equals(DIFilename);
+ });
+ if (!FunctionFound) {
+ // Skip the following profile by setting the profile iterator (FI) to
+ // the past-the-end element.
+ FI = ProgramPathAndClusterInfo.end();
+ DIFilename = "";
+ continue;
+ }
+ for (size_t i = 1; i < Values.size(); ++i)
+ FuncAliasMap.try_emplace(Values[i], Values.front());
+
+ // Prepare for parsing clusters of this function name.
+ // Start a new cluster map for this function name.
+ auto R = ProgramPathAndClusterInfo.try_emplace(Values.front());
+ // Report error when multiple profiles have been specified for the same
+ // function.
+ if (!R.second)
+ return createProfileParseError("duplicate profile for function '" +
+ Values.front() + "'");
+ FI = R.first;
+ CurrentCluster = 0;
+ FuncBBIDs.clear();
+ // We won't need DIFilename anymore. Clean it up to avoid its application
+ // on the next function.
+ DIFilename = "";
+ continue;
+ }
+ case 'c': // Basic block cluster specifier.
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramPathAndClusterInfo.end())
+ continue;
+ // Reset current cluster position.
+ CurrentPosition = 0;
+ for (auto BasicBlockIDStr : Values) {
+ auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr);
+ if (!BasicBlockID)
+ return BasicBlockID.takeError();
+ if (!FuncBBIDs.insert(*BasicBlockID).second)
+ return createProfileParseError(
+ Twine("duplicate basic block id found '") + BasicBlockIDStr +
+ "'");
+
+ if (!BasicBlockID->BaseID && CurrentPosition)
+ return createProfileParseError(
+ "entry BB (0) does not begin a cluster.");
+
+ FI->second.ClusterInfo.emplace_back(BBClusterInfo{
+ *std::move(BasicBlockID), CurrentCluster, CurrentPosition++});
+ }
+ CurrentCluster++;
+ continue;
+ case 'p': { // Basic block cloning path specifier.
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramPathAndClusterInfo.end())
+ continue;
+ SmallSet<unsigned, 5> BBsInPath;
+ FI->second.ClonePaths.push_back({});
+ for (size_t I = 0; I < Values.size(); ++I) {
+ auto BaseBBIDStr = Values[I];
+ unsigned long long BaseBBID = 0;
+ if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ BaseBBIDStr + "'");
+ if (I != 0 && !BBsInPath.insert(BaseBBID).second)
+ return createProfileParseError(
+ Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'");
+ FI->second.ClonePaths.back().push_back(BaseBBID);
+ }
+ continue;
+ }
+ default:
+ return createProfileParseError(Twine("invalid specifier: '") +
+ Twine(Specifier) + "'");
+ }
+ llvm_unreachable("should not break from this switch statement");
+ }
+ return Error::success();
+}
+
+Error BasicBlockSectionsProfileReader::ReadV0Profile() {
+ auto FI = ProgramPathAndClusterInfo.end();
// Current cluster ID corresponding to this function.
unsigned CurrentCluster = 0;
// Current position in the current cluster.
@@ -96,7 +273,7 @@ Error BasicBlockSectionsProfileReader::ReadProfile() {
if (S.consume_front("!")) {
// Skip the profile when we the profile iterator (FI) refers to the
// past-the-end element.
- if (FI == ProgramBBClusterInfo.end())
+ if (FI == ProgramPathAndClusterInfo.end())
continue;
SmallVector<StringRef, 4> BBIDs;
S.split(BBIDs, ' ');
@@ -105,16 +282,19 @@ Error BasicBlockSectionsProfileReader::ReadProfile() {
for (auto BBIDStr : BBIDs) {
unsigned long long BBID;
if (getAsUnsignedInteger(BBIDStr, 10, BBID))
- return invalidProfileError(Twine("Unsigned integer expected: '") +
- BBIDStr + "'.");
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ BBIDStr + "'");
if (!FuncBBIDs.insert(BBID).second)
- return invalidProfileError(Twine("Duplicate basic block id found '") +
- BBIDStr + "'.");
+ return createProfileParseError(
+ Twine("duplicate basic block id found '") + BBIDStr + "'");
if (BBID == 0 && CurrentPosition)
- return invalidProfileError("Entry BB (0) does not begin a cluster.");
+ return createProfileParseError(
+ "entry BB (0) does not begin a cluster");
- FI->second.emplace_back(
- BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++});
+ FI->second.ClusterInfo.emplace_back(
+ BBClusterInfo({{static_cast<unsigned>(BBID), 0},
+ CurrentCluster,
+ CurrentPosition++}));
}
CurrentCluster++;
} else {
@@ -122,14 +302,14 @@ Error BasicBlockSectionsProfileReader::ReadProfile() {
// specifier starting with `M=`.
auto [AliasesStr, DIFilenameStr] = S.split(' ');
SmallString<128> DIFilename;
- if (DIFilenameStr.startswith("M=")) {
+ if (DIFilenameStr.starts_with("M=")) {
DIFilename =
sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
if (DIFilename.empty())
- return invalidProfileError("Empty module name specifier.");
+ return createProfileParseError("empty module name specifier");
} else if (!DIFilenameStr.empty()) {
- return invalidProfileError("Unknown string found: '" + DIFilenameStr +
- "'.");
+ return createProfileParseError("unknown string found: '" +
+ DIFilenameStr + "'");
}
// Function aliases are separated using '/'. We use the first function
// name for the cluster info mapping and delegate all other aliases to
@@ -148,7 +328,7 @@ Error BasicBlockSectionsProfileReader::ReadProfile() {
if (!FunctionFound) {
// Skip the following profile by setting the profile iterator (FI) to
// the past-the-end element.
- FI = ProgramBBClusterInfo.end();
+ FI = ProgramPathAndClusterInfo.end();
continue;
}
for (size_t i = 1; i < Aliases.size(); ++i)
@@ -156,12 +336,12 @@ Error BasicBlockSectionsProfileReader::ReadProfile() {
// Prepare for parsing clusters of this function name.
// Start a new cluster map for this function name.
- auto R = ProgramBBClusterInfo.try_emplace(Aliases.front());
+ auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front());
// Report error when multiple profiles have been specified for the same
// function.
if (!R.second)
- return invalidProfileError("Duplicate profile for function '" +
- Aliases.front() + "'.");
+ return createProfileParseError("duplicate profile for function '" +
+ Aliases.front() + "'");
FI = R.first;
CurrentCluster = 0;
FuncBBIDs.clear();
@@ -170,6 +350,51 @@ Error BasicBlockSectionsProfileReader::ReadProfile() {
return Error::success();
}
+// Basic Block Sections can be enabled for a subset of machine basic blocks.
+// This is done by passing a file containing names of functions for which basic
+// block sections are desired. Additionally, machine basic block ids of the
+// functions can also be specified for a finer granularity. Moreover, a cluster
+// of basic blocks could be assigned to the same section.
+// Optionally, a debug-info filename can be specified for each function to allow
+// distinguishing internal-linkage functions of the same name.
+// A file with basic block sections for all of function main and three blocks
+// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
+// (Profile for function foo is only loaded when its debug-info filename
+// matches 'path/to/foo_file.cc').
+// ----------------------------
+// list.txt:
+// !main
+// !foo M=path/to/foo_file.cc
+// !!1 2
+// !!4
+Error BasicBlockSectionsProfileReader::ReadProfile() {
+ assert(MBuf);
+
+ unsigned long long Version = 0;
+ StringRef FirstLine(*LineIt);
+ if (FirstLine.consume_front("v")) {
+ if (getAsUnsignedInteger(FirstLine, 10, Version)) {
+ return createProfileParseError(Twine("version number expected: '") +
+ FirstLine + "'");
+ }
+ if (Version > 1) {
+ return createProfileParseError(Twine("invalid profile version: ") +
+ Twine(Version));
+ }
+ ++LineIt;
+ }
+
+ switch (Version) {
+ case 0:
+ // TODO: Deprecate V0 once V1 is fully integrated downstream.
+ return ReadV0Profile();
+ case 1:
+ return ReadV1Profile();
+ default:
+ llvm_unreachable("Invalid profile version.");
+ }
+}
+
bool BasicBlockSectionsProfileReader::doInitialization(Module &M) {
if (!MBuf)
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index 05494f1ddc67..f50eb5e1730a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -78,12 +79,19 @@ class BranchRelaxation : public MachineFunctionPass {
};
SmallVector<BasicBlockInfo, 16> BlockInfo;
+
+ // The basic block after which trampolines are inserted. This is the last
+ // basic block that isn't in the cold section.
+ MachineBasicBlock *TrampolineInsertionPoint = nullptr;
+ SmallDenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>>
+ RelaxedUnconditionals;
std::unique_ptr<RegScavenger> RS;
LivePhysRegs LiveRegs;
MachineFunction *MF = nullptr;
const TargetRegisterInfo *TRI = nullptr;
const TargetInstrInfo *TII = nullptr;
+ const TargetMachine *TM = nullptr;
bool relaxBranchInstructions();
void scanFunction();
@@ -142,7 +150,8 @@ void BranchRelaxation::verify() {
if (MI.getOpcode() == TargetOpcode::FAULTING_OP)
continue;
MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
- assert(isBlockInRange(MI, *DestBB));
+ assert(isBlockInRange(MI, *DestBB) ||
+ RelaxedUnconditionals.contains({&MBB, DestBB}));
}
}
#endif
@@ -165,15 +174,28 @@ void BranchRelaxation::scanFunction() {
BlockInfo.clear();
BlockInfo.resize(MF->getNumBlockIDs());
+ TrampolineInsertionPoint = nullptr;
+ RelaxedUnconditionals.clear();
+
// First thing, compute the size of all basic blocks, and see if the function
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
- // instructions in the inline assembly.
- for (MachineBasicBlock &MBB : *MF)
+ // instructions in the inline assembly. At the same time, place the
+ // trampoline insertion point at the end of the hot portion of the function.
+ for (MachineBasicBlock &MBB : *MF) {
BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB);
+ if (MBB.getSectionID() != MBBSectionID::ColdSectionID)
+ TrampolineInsertionPoint = &MBB;
+ }
+
// Compute block offsets and known bits.
adjustBlockOffsets(*MF->begin());
+
+ if (TrampolineInsertionPoint == nullptr) {
+ LLVM_DEBUG(dbgs() << " No suitable trampoline insertion point found in "
+ << MF->getName() << ".\n");
+ }
}
/// computeBlockSize - Compute the size for MBB.
@@ -232,6 +254,11 @@ BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigMBB,
MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(BB);
MF->insert(++OrigMBB.getIterator(), NewBB);
+ // Place the new block in the same section as OrigBB
+ NewBB->setSectionID(OrigMBB.getSectionID());
+ NewBB->setIsEndSection(OrigMBB.isEndSection());
+ OrigMBB.setIsEndSection(false);
+
// Insert an entry into BlockInfo to align it properly with the block numbers.
BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
@@ -241,8 +268,9 @@ BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigMBB,
/// Split the basic block containing MI into two blocks, which are joined by
/// an unconditional branch. Update data structures and renumber blocks to
/// account for this change and returns the newly created block.
-MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
- MachineBasicBlock *DestBB) {
+MachineBasicBlock *
+BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
+ MachineBasicBlock *DestBB) {
MachineBasicBlock *OrigBB = MI.getParent();
// Create a new MBB for the code after the OrigBB.
@@ -250,6 +278,11 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
MF->insert(++OrigBB->getIterator(), NewBB);
+ // Place the new block in the same section as OrigBB.
+ NewBB->setSectionID(OrigBB->getSectionID());
+ NewBB->setIsEndSection(OrigBB->isEndSection());
+ OrigBB->setIsEndSection(false);
+
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI.getIterator(), OrigBB->end());
@@ -300,7 +333,12 @@ bool BranchRelaxation::isBlockInRange(
int64_t BrOffset = getInstrOffset(MI);
int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset;
- if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset))
+ const MachineBasicBlock *SrcBB = MI.getParent();
+
+ if (TII->isBranchOffsetInRange(MI.getOpcode(),
+ SrcBB->getSectionID() != DestBB.getSectionID()
+ ? TM->getMaxCodeSize()
+ : DestOffset - BrOffset))
return true;
LLVM_DEBUG(dbgs() << "Out of range branch to destination "
@@ -358,6 +396,50 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
assert(!Fail && "branches to be relaxed must be analyzable");
(void)Fail;
+ // Since cross-section conditional branches to the cold section are rarely
+ // taken, try to avoid inverting the condition. Instead, add a "trampoline
+ // branch", which unconditionally branches to the branch destination. Place
+ // the trampoline branch at the end of the function and retarget the
+ // conditional branch to the trampoline.
+ // tbz L1
+ // =>
+ // tbz L1Trampoline
+ // ...
+ // L1Trampoline: b L1
+ if (MBB->getSectionID() != TBB->getSectionID() &&
+ TBB->getSectionID() == MBBSectionID::ColdSectionID &&
+ TrampolineInsertionPoint != nullptr) {
+ // If the insertion point is out of range, we can't put a trampoline there.
+ NewBB =
+ createNewBlockAfter(*TrampolineInsertionPoint, MBB->getBasicBlock());
+
+ if (isBlockInRange(MI, *NewBB)) {
+ LLVM_DEBUG(dbgs() << " Retarget destination to trampoline at "
+ << NewBB->back());
+
+ insertUncondBranch(NewBB, TBB);
+
+ // Update the successor lists to include the trampoline.
+ MBB->replaceSuccessor(TBB, NewBB);
+ NewBB->addSuccessor(TBB);
+
+ // Replace branch in the current (MBB) block.
+ removeBranch(MBB);
+ insertBranch(MBB, NewBB, FBB, Cond);
+
+ TrampolineInsertionPoint = NewBB;
+ finalizeBlockChanges(MBB, NewBB);
+ return true;
+ }
+
+ LLVM_DEBUG(
+ dbgs() << " Trampoline insertion point out of range for Bcc from "
+ << printMBBReference(*MBB) << " to " << printMBBReference(*TBB)
+ << ".\n");
+ TrampolineInsertionPoint->setIsEndSection(NewBB->isEndSection());
+ MF->erase(NewBB);
+ }
+
// Add an unconditional branch to the destination and invert the branch
// condition to jump over it:
// tbz L1
@@ -462,7 +544,10 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
int64_t DestOffset = BlockInfo[DestBB->getNumber()].Offset;
int64_t SrcOffset = getInstrOffset(MI);
- assert(!TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - SrcOffset));
+ assert(!TII->isBranchOffsetInRange(
+ MI.getOpcode(), MBB->getSectionID() != DestBB->getSectionID()
+ ? TM->getMaxCodeSize()
+ : DestOffset - SrcOffset));
BlockInfo[MBB->getNumber()].Size -= OldBrSize;
@@ -482,6 +567,8 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
BranchBB->sortUniqueLiveIns();
BranchBB->addSuccessor(DestBB);
MBB->replaceSuccessor(DestBB, BranchBB);
+ if (TrampolineInsertionPoint == MBB)
+ TrampolineInsertionPoint = BranchBB;
}
DebugLoc DL = MI.getDebugLoc();
@@ -492,15 +579,41 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
// be erased.
MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back(),
DestBB->getBasicBlock());
+ std::prev(RestoreBB->getIterator())
+ ->setIsEndSection(RestoreBB->isEndSection());
+ RestoreBB->setIsEndSection(false);
TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL,
- DestOffset - SrcOffset, RS.get());
+ BranchBB->getSectionID() != DestBB->getSectionID()
+ ? TM->getMaxCodeSize()
+ : DestOffset - SrcOffset,
+ RS.get());
BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB);
adjustBlockOffsets(*MBB);
- // If RestoreBB is required, try to place just before DestBB.
+ // If RestoreBB is required, place it appropriately.
if (!RestoreBB->empty()) {
+ // If the jump is Cold -> Hot, don't place the restore block (which is
+ // cold) in the middle of the function. Place it at the end.
+ if (MBB->getSectionID() == MBBSectionID::ColdSectionID &&
+ DestBB->getSectionID() != MBBSectionID::ColdSectionID) {
+ MachineBasicBlock *NewBB = createNewBlockAfter(*TrampolineInsertionPoint);
+ TII->insertUnconditionalBranch(*NewBB, DestBB, DebugLoc());
+ BlockInfo[NewBB->getNumber()].Size = computeBlockSize(*NewBB);
+
+ // New trampolines should be inserted after NewBB.
+ TrampolineInsertionPoint = NewBB;
+
+ // Retarget the unconditional branch to the trampoline block.
+ BranchBB->replaceSuccessor(DestBB, NewBB);
+ NewBB->addSuccessor(DestBB);
+
+ DestBB = NewBB;
+ }
+
+ // In all other cases, try to place just before DestBB.
+
// TODO: For multiple far branches to the same destination, there are
// chances that some restore blocks could be shared if they clobber the
// same registers and share the same restore sequence. So far, those
@@ -525,9 +638,16 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB);
// Update the offset starting from the previous block.
adjustBlockOffsets(*PrevBB);
+
+ // Fix up section information for RestoreBB and DestBB
+ RestoreBB->setSectionID(DestBB->getSectionID());
+ RestoreBB->setIsBeginSection(DestBB->isBeginSection());
+ DestBB->setIsBeginSection(false);
+ RelaxedUnconditionals.insert({BranchBB, RestoreBB});
} else {
// Remove restore block if it's not required.
MF->erase(RestoreBB);
+ RelaxedUnconditionals.insert({BranchBB, DestBB});
}
return true;
@@ -553,7 +673,8 @@ bool BranchRelaxation::relaxBranchInstructions() {
// Unconditional branch destination might be unanalyzable, assume these
// are OK.
if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) {
- if (!isBlockInRange(*Last, *DestBB)) {
+ if (!isBlockInRange(*Last, *DestBB) && !TII->isTailCall(*Last) &&
+ !RelaxedUnconditionals.contains({&MBB, DestBB})) {
fixupUnconditionalBranch(*Last);
++NumUnconditionalRelaxed;
Changed = true;
@@ -607,6 +728,7 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
const TargetSubtargetInfo &ST = MF->getSubtarget();
TII = ST.getInstrInfo();
+ TM = &MF->getTarget();
TRI = ST.getRegisterInfo();
if (TRI->trackLivenessAfterRegAlloc(*MF))
@@ -632,6 +754,7 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs());
BlockInfo.clear();
+ RelaxedUnconditionals.clear();
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp
index 837dbd77d073..61888a426665 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp
@@ -10,20 +10,27 @@
// This pass inserts the necessary instructions to adjust for the inconsistency
// of the call-frame information caused by final machine basic block layout.
// The pass relies in constraints LLVM imposes on the placement of
-// save/restore points (cf. ShrinkWrap):
-// * there is a single basic block, containing the function prologue
-// * possibly multiple epilogue blocks, where each epilogue block is
-// complete and self-contained, i.e. CSR restore instructions (and the
-// corresponding CFI instructions are not split across two or more blocks.
-// * prologue and epilogue blocks are outside of any loops
-// Thus, during execution, at the beginning and at the end of each basic block
-// the function can be in one of two states:
+// save/restore points (cf. ShrinkWrap) and has certain preconditions about
+// placement of CFI instructions:
+// * For any two CFI instructions of the function prologue one dominates
+// and is post-dominated by the other.
+// * The function possibly contains multiple epilogue blocks, where each
+// epilogue block is complete and self-contained, i.e. CSR restore
+// instructions (and the corresponding CFI instructions)
+// are not split across two or more blocks.
+// * CFI instructions are not contained in any loops.
+
+// Thus, during execution, at the beginning and at the end of each basic block,
+// following the prologue, the function can be in one of two states:
// - "has a call frame", if the function has executed the prologue, and
// has not executed any epilogue
// - "does not have a call frame", if the function has not executed the
// prologue, or has executed an epilogue
// which can be computed by a single RPO traversal.
+// The location of the prologue is determined by finding the first block in the
+// reverse traversal which contains CFI instructions.
+
// In order to accommodate backends which do not generate unwind info in
// epilogues we compute an additional property "strong no call frame on entry",
// which is set for the entry point of the function and for every block
@@ -85,10 +92,6 @@ static bool isPrologueCFIInstruction(const MachineInstr &MI) {
MI.getFlag(MachineInstr::FrameSetup);
}
-static bool containsPrologue(const MachineBasicBlock &MBB) {
- return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction);
-}
-
static bool containsEpilogue(const MachineBasicBlock &MBB) {
return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) {
return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
@@ -96,6 +99,23 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) {
});
}
+static MachineBasicBlock *
+findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) {
+ // Even though we should theoretically traverse the blocks in post-order, we
+ // can't encode correctly cases where prologue blocks are not laid out in
+ // topological order. Then, assuming topological order, we can just traverse
+ // the function in reverse.
+ for (MachineBasicBlock &MBB : reverse(MF)) {
+ for (MachineInstr &MI : reverse(MBB.instrs())) {
+ if (!isPrologueCFIInstruction(MI))
+ continue;
+ PrologueEnd = std::next(MI.getIterator());
+ return &MBB;
+ }
+ }
+ return nullptr;
+}
+
bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering();
if (!TFL.enableCFIFixup(MF))
@@ -105,6 +125,13 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
if (NumBlocks < 2)
return false;
+ // Find the prologue and the point where we can issue the first
+ // `.cfi_remember_state`.
+ MachineBasicBlock::iterator PrologueEnd;
+ MachineBasicBlock *PrologueBlock = findPrologueEnd(MF, PrologueEnd);
+ if (PrologueBlock == nullptr)
+ return false;
+
struct BlockFlags {
bool Reachable : 1;
bool StrongNoFrameOnEntry : 1;
@@ -116,21 +143,15 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
BlockInfo[0].StrongNoFrameOnEntry = true;
// Compute the presence/absence of frame at each basic block.
- MachineBasicBlock *PrologueBlock = nullptr;
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
for (MachineBasicBlock *MBB : RPOT) {
BlockFlags &Info = BlockInfo[MBB->getNumber()];
// Set to true if the current block contains the prologue or the epilogue,
// respectively.
- bool HasPrologue = false;
+ bool HasPrologue = MBB == PrologueBlock;
bool HasEpilogue = false;
- if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) {
- PrologueBlock = MBB;
- HasPrologue = true;
- }
-
if (Info.HasFrameOnEntry || HasPrologue)
HasEpilogue = containsEpilogue(*MBB);
@@ -149,9 +170,6 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
}
}
- if (!PrologueBlock)
- return false;
-
// Walk the blocks of the function in "physical" order.
// Every block inherits the frame state (as recorded in the unwind tables)
// of the previous block. If the intended frame state is different, insert
@@ -162,10 +180,7 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
// insert a `.cfi_remember_state`, in the case that the current block needs a
// `.cfi_restore_state`.
MachineBasicBlock *InsertMBB = PrologueBlock;
- MachineBasicBlock::iterator InsertPt = PrologueBlock->begin();
- for (MachineInstr &MI : *PrologueBlock)
- if (isPrologueCFIInstruction(MI))
- InsertPt = std::next(MI.getIterator());
+ MachineBasicBlock::iterator InsertPt = PrologueEnd;
assert(InsertPt != PrologueBlock->begin() &&
"Inconsistent notion of \"prologue block\"");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 6a024287f002..87b062a16df1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -151,7 +151,7 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
Register InitialRegister =
MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF);
InitialRegister = TRI.getDwarfRegNum(InitialRegister, true);
- unsigned NumRegs = TRI.getNumRegs();
+ unsigned NumRegs = TRI.getNumSupportedRegs(MF);
// Initialize MBBMap.
for (MachineBasicBlock &MBB : MF) {
@@ -181,7 +181,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
MachineFunction *MF = MBBInfo.MBB->getParent();
const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
- unsigned NumRegs = TRI.getNumRegs();
+ unsigned NumRegs = TRI.getNumSupportedRegs(*MF);
BitVector CSRSaved(NumRegs), CSRRestored(NumRegs);
// Determine cfa offset and register set by the block.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 5a005ba7b414..f3cb7fa5af61 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -8,6 +8,7 @@
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -97,7 +98,7 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI,
// Trace copies introduced by live range splitting. The inline
// spiller can rematerialize through these copies, so the spill
// weight must reflect this.
- while (MI->isFullCopy()) {
+ while (TII.isFullCopyInstr(*MI)) {
// The copy destination must match the interval register.
if (MI->getOperand(0).getReg() != Reg)
return false;
@@ -145,14 +146,23 @@ void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) {
LI.setWeight(Weight);
}
+static bool canMemFoldInlineAsm(LiveInterval &LI,
+ const MachineRegisterInfo &MRI) {
+ for (const MachineOperand &MO : MRI.reg_operands(LI.reg())) {
+ const MachineInstr *MI = MO.getParent();
+ if (MI->isInlineAsm() && MI->mayFoldInlineAsmRegOp(MI->getOperandNo(&MO)))
+ return true;
+ }
+
+ return false;
+}
+
float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
SlotIndex *End) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineBasicBlock *MBB = nullptr;
- MachineLoop *Loop = nullptr;
- bool IsExiting = false;
float TotalWeight = 0;
unsigned NumInstr = 0; // Number of instructions using LI
SmallPtrSet<MachineInstr *, 8> Visited;
@@ -209,6 +219,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
}
};
+ bool IsExiting = false;
std::set<CopyHint> CopyHints;
DenseMap<unsigned, float> Hint;
for (MachineRegisterInfo::reg_instr_nodbg_iterator
@@ -224,7 +235,16 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
continue;
NumInstr++;
- if (MI->isIdentityCopy() || MI->isImplicitDef())
+ bool identityCopy = false;
+ auto DestSrc = TII.isCopyInstr(*MI);
+ if (DestSrc) {
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
+ identityCopy = DestRegOp->getReg() == SrcRegOp->getReg() &&
+ DestRegOp->getSubReg() == SrcRegOp->getSubReg();
+ }
+
+ if (identityCopy || MI->isImplicitDef())
continue;
if (!Visited.insert(MI).second)
continue;
@@ -241,7 +261,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// Get loop info for mi.
if (MI->getParent() != MBB) {
MBB = MI->getParent();
- Loop = Loops.getLoopFor(MBB);
+ const MachineLoop *Loop = Loops.getLoopFor(MBB);
IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
}
@@ -258,7 +278,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
}
// Get allocation hints from copies.
- if (!MI->isCopy())
+ if (!TII.isCopyInstr(*MI))
continue;
Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
if (!HintReg)
@@ -305,7 +325,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// into instruction itself makes perfect sense.
if (ShouldUpdateLI && LI.isZeroLength(LIS.getSlotIndexes()) &&
!LI.isLiveAtIndexes(LIS.getRegMaskSlots()) &&
- !isLiveAtStatepointVarArg(LI)) {
+ !isLiveAtStatepointVarArg(LI) && !canMemFoldInlineAsm(LI, MRI)) {
LI.markNotSpillable();
return -1.0;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp
index db243a0bfebe..fddc4d74b2da 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp
@@ -31,6 +31,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/CallBrPrepare.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -53,15 +54,16 @@ using namespace llvm;
#define DEBUG_TYPE "callbrprepare"
+static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
+static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT);
+static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
+ SSAUpdater &SSAUpdate);
+static SmallVector<CallBrInst *, 2> FindCallBrs(Function &Fn);
+
namespace {
class CallBrPrepare : public FunctionPass {
- bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
- bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
- DominatorTree &DT) const;
- void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
- SSAUpdater &SSAUpdate) const;
-
public:
CallBrPrepare() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -71,6 +73,26 @@ public:
} // end anonymous namespace
+PreservedAnalyses CallBrPreparePass::run(Function &Fn,
+ FunctionAnalysisManager &FAM) {
+ bool Changed = false;
+ SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(Fn);
+
+ if (CBRs.empty())
+ return PreservedAnalyses::all();
+
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(Fn);
+
+ Changed |= SplitCriticalEdges(CBRs, DT);
+ Changed |= InsertIntrinsicCalls(CBRs, DT);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
char CallBrPrepare::ID = 0;
INITIALIZE_PASS_BEGIN(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -82,7 +104,7 @@ void CallBrPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTreeWrapperPass>();
}
-static SmallVector<CallBrInst *, 2> FindCallBrs(Function &Fn) {
+SmallVector<CallBrInst *, 2> FindCallBrs(Function &Fn) {
SmallVector<CallBrInst *, 2> CBRs;
for (BasicBlock &BB : Fn)
if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
@@ -91,8 +113,7 @@ static SmallVector<CallBrInst *, 2> FindCallBrs(Function &Fn) {
return CBRs;
}
-bool CallBrPrepare::SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs,
- DominatorTree &DT) {
+bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
bool Changed = false;
CriticalEdgeSplittingOptions Options(&DT);
Options.setMergeIdenticalEdges();
@@ -114,8 +135,7 @@ bool CallBrPrepare::SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs,
return Changed;
}
-bool CallBrPrepare::InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
- DominatorTree &DT) const {
+bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
bool Changed = false;
SmallPtrSet<const BasicBlock *, 4> Visited;
IRBuilder<> Builder(CBRs[0]->getContext());
@@ -160,9 +180,8 @@ static void PrintDebugDomInfo(const DominatorTree &DT, const Use &U,
}
#endif
-void CallBrPrepare::UpdateSSA(DominatorTree &DT, CallBrInst *CBR,
- CallInst *Intrinsic,
- SSAUpdater &SSAUpdate) const {
+void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
+ SSAUpdater &SSAUpdate) {
SmallPtrSet<Use *, 4> Visited;
BasicBlock *DefaultDest = CBR->getDefaultDest();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index 6272b654b329..7b73a7b11ddf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -20,6 +20,7 @@ using namespace llvm;
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAssignmentTrackingAnalysisPass(Registry);
initializeAtomicExpandPass(Registry);
+ initializeBasicBlockPathCloningPass(Registry);
initializeBasicBlockSectionsPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
@@ -40,7 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyTailDuplicatePass(Registry);
initializeExpandLargeDivRemLegacyPassPass(Registry);
initializeExpandLargeFpConvertLegacyPassPass(Registry);
- initializeExpandMemCmpPassPass(Registry);
+ initializeExpandMemCmpLegacyPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
initializeFinalizeISelPass(Registry);
@@ -52,7 +53,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeHardwareLoopsLegacyPass(Registry);
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
- initializeIndirectBrExpandPassPass(Registry);
+ initializeIndirectBrExpandLegacyPassPass(Registry);
initializeInterleavedLoadCombinePass(Registry);
initializeInterleavedAccessPass(Registry);
initializeJMCInstrumenterPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
index 7f37f2069a3b..82945528e768 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
@@ -17,9 +17,11 @@ using namespace llvm;
namespace llvm {
#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
- AnalysisKey PASS_NAME::Key;
+ MachinePassKey PASS_NAME::Key;
#include "llvm/CodeGen/MachinePassRegistry.def"
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ MachinePassKey PASS_NAME::Key;
+#define DUMMY_MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \
AnalysisKey PASS_NAME::Key;
#include "llvm/CodeGen/MachinePassRegistry.def"
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b00df0b6c6cb..6e99fb133e26 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -198,7 +198,7 @@ static cl::opt<bool> BBSectionsGuidedSectionPrefix(
"impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
"profiles."));
-static cl::opt<unsigned> FreqRatioToSkipMerge(
+static cl::opt<uint64_t> FreqRatioToSkipMerge(
"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
cl::desc("Skip merging empty blocks if (frequency of empty block) / "
"(frequency of destination block) is greater than this ratio"));
@@ -268,6 +268,11 @@ static cl::opt<unsigned>
MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
cl::Hidden,
cl::desc("Max number of address users to look at"));
+
+static cl::opt<bool>
+ DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
+ cl::desc("Disable elimination of dead PHI nodes."));
+
namespace {
enum ExtType {
@@ -454,6 +459,8 @@ private:
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
bool fixupDbgValue(Instruction *I);
+ bool fixupDPValue(DPValue &I);
+ bool fixupDPValuesOnInst(Instruction &I);
bool placeDbgValues(Function &F);
bool placePseudoProbes(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
@@ -878,8 +885,12 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
// as we remove them.
// Note that this intentionally skips the entry block.
SmallVector<WeakTrackingVH, 16> Blocks;
- for (auto &Block : llvm::drop_begin(F))
+ for (auto &Block : llvm::drop_begin(F)) {
+ // Delete phi nodes that could block deleting other empty blocks.
+ if (!DisableDeletePHIs)
+ MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
Blocks.push_back(&Block);
+ }
for (auto &Block : Blocks) {
BasicBlock *BB = cast_or_null<BasicBlock>(Block);
@@ -977,8 +988,8 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
BBFreq += BFI->getBlockFreq(SameValueBB);
- return PredFreq.getFrequency() <=
- BBFreq.getFrequency() * FreqRatioToSkipMerge;
+ std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
+ return !Limit || PredFreq <= *Limit;
}
/// Return true if we can merge BB into DestBB if there is a single
@@ -1200,6 +1211,7 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
if (RI->getStatepoint() == RelocatedBase->getStatepoint())
if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
RelocatedBase->moveBefore(RI);
+ MadeChange = true;
break;
}
@@ -1372,7 +1384,8 @@ static bool SinkCast(CastInst *CI) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
assert(InsertPt != UserBB->end());
InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
- CI->getType(), "", &*InsertPt);
+ CI->getType(), "");
+ InsertedCast->insertBefore(*UserBB, InsertPt);
InsertedCast->setDebugLoc(CI->getDebugLoc());
}
@@ -1743,8 +1756,8 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
assert(InsertPt != UserBB->end());
InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
- Cmp->getOperand(0), Cmp->getOperand(1), "",
- &*InsertPt);
+ Cmp->getOperand(0), Cmp->getOperand(1), "");
+ InsertedCmp->insertBefore(*UserBB, InsertPt);
// Propagate the debug info.
InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
}
@@ -2046,20 +2059,24 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
assert(InsertPt != TruncUserBB->end());
// Sink the shift
if (ShiftI->getOpcode() == Instruction::AShr)
- InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
- "", &*InsertPt);
+ InsertedShift =
+ BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
else
- InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
- "", &*InsertPt);
+ InsertedShift =
+ BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
+ InsertedShift->insertBefore(*TruncUserBB, InsertPt);
// Sink the trunc
BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
TruncInsertPt++;
+ // It will go ahead of any debug-info.
+ TruncInsertPt.setHeadBit(true);
assert(TruncInsertPt != TruncUserBB->end());
InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
- TruncI->getType(), "", &*TruncInsertPt);
+ TruncI->getType(), "");
+ InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
MadeChange = true;
@@ -2147,11 +2164,12 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
assert(InsertPt != UserBB->end());
if (ShiftI->getOpcode() == Instruction::AShr)
- InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
- "", &*InsertPt);
+ InsertedShift =
+ BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
else
- InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
- "", &*InsertPt);
+ InsertedShift =
+ BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
+ InsertedShift->insertBefore(*UserBB, InsertPt);
InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
MadeChange = true;
@@ -2224,7 +2242,9 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Create another block after the count zero intrinsic. A PHI will be added
// in this block to select the result of the intrinsic or the bit-width
// constant if the input to the intrinsic is zero.
- BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
+ BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
+ // Any debug-info after CountZeros should not be included.
+ SplitPt.setHeadBit(true);
BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
if (IsHugeFunc)
FreshBBs.insert(EndBlock);
@@ -2253,7 +2273,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Create a PHI in the end block to select either the output of the intrinsic
// or the bit width of the operand.
- Builder.SetInsertPoint(&EndBlock->front());
+ Builder.SetInsertPoint(EndBlock, EndBlock->begin());
PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
@@ -2581,9 +2601,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
(void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
assert(!VerifyBFIUpdates ||
BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
- BFI->setBlockFreq(
- BB,
- (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency());
+ BFI->setBlockFreq(BB,
+ (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
ModifiedDT = ModifyDT::ModifyBBDT;
Changed = true;
++NumRetsDup;
@@ -2820,6 +2839,7 @@ class TypePromotionTransaction {
Instruction *PrevInst;
BasicBlock *BB;
} Point;
+ std::optional<DPValue::self_iterator> BeforeDPValue = std::nullopt;
/// Remember whether or not the instruction had a previous instruction.
bool HasPrevInstruction;
@@ -2827,12 +2847,19 @@ class TypePromotionTransaction {
public:
/// Record the position of \p Inst.
InsertionHandler(Instruction *Inst) {
- BasicBlock::iterator It = Inst->getIterator();
- HasPrevInstruction = (It != (Inst->getParent()->begin()));
- if (HasPrevInstruction)
- Point.PrevInst = &*--It;
- else
- Point.BB = Inst->getParent();
+ HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
+ BasicBlock *BB = Inst->getParent();
+
+ // Record where we would have to re-insert the instruction in the sequence
+ // of DPValues, if we ended up reinserting.
+ if (BB->IsNewDbgInfoFormat)
+ BeforeDPValue = Inst->getDbgReinsertionPosition();
+
+ if (HasPrevInstruction) {
+ Point.PrevInst = &*std::prev(Inst->getIterator());
+ } else {
+ Point.BB = BB;
+ }
}
/// Insert \p Inst at the recorded position.
@@ -2840,14 +2867,16 @@ class TypePromotionTransaction {
if (HasPrevInstruction) {
if (Inst->getParent())
Inst->removeFromParent();
- Inst->insertAfter(Point.PrevInst);
+ Inst->insertAfter(&*Point.PrevInst);
} else {
- Instruction *Position = &*Point.BB->getFirstInsertionPt();
+ BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
if (Inst->getParent())
- Inst->moveBefore(Position);
+ Inst->moveBefore(*Point.BB, Position);
else
- Inst->insertBefore(Position);
+ Inst->insertBefore(*Point.BB, Position);
}
+
+ Inst->getParent()->reinsertInstInDPValues(Inst, BeforeDPValue);
}
};
@@ -3050,6 +3079,8 @@ class TypePromotionTransaction {
SmallVector<InstructionAndIdx, 4> OriginalUses;
/// Keep track of the debug users.
SmallVector<DbgValueInst *, 1> DbgValues;
+ /// And non-instruction debug-users too.
+ SmallVector<DPValue *, 1> DPValues;
/// Keep track of the new value so that we can undo it by replacing
/// instances of the new value with the original value.
@@ -3070,7 +3101,7 @@ class TypePromotionTransaction {
}
// Record the debug uses separately. They are not in the instruction's
// use list, but they are replaced by RAUW.
- findDbgValues(DbgValues, Inst);
+ findDbgValues(DbgValues, Inst, &DPValues);
// Now, we can replace the uses.
Inst->replaceAllUsesWith(New);
@@ -3087,6 +3118,10 @@ class TypePromotionTransaction {
// correctness and utility of debug value instructions.
for (auto *DVI : DbgValues)
DVI->replaceVariableLocationOp(New, Inst);
+ // Similar story with DPValues, the non-instruction representation of
+ // dbg.values.
+ for (DPValue *DPV : DPValues) // tested by transaction-test I'm adding
+ DPV->replaceVariableLocationOp(New, Inst);
}
};
@@ -3183,10 +3218,6 @@ public:
/// Same as IRBuilder::createZExt.
Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
- /// Same as Instruction::moveBefore.
- void moveBefore(Instruction *Inst, Instruction *Before);
- /// @}
-
private:
/// The ordered list of actions made so far.
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
@@ -3246,13 +3277,6 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
return Val;
}
-void TypePromotionTransaction::moveBefore(Instruction *Inst,
- Instruction *Before) {
- Actions.push_back(
- std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
- Inst, Before));
-}
-
TypePromotionTransaction::ConstRestorationPt
TypePromotionTransaction::getRestorationPoint() const {
return !Actions.empty() ? Actions.back().get() : nullptr;
@@ -4559,8 +4583,6 @@ Value *TypePromotionHelper::promoteOperandForOther(
// Step #2.
TPT.replaceAllUsesWith(Ext, ExtOpnd);
// Step #3.
- Instruction *ExtForOpnd = Ext;
-
LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
++OpIdx) {
@@ -4588,33 +4610,21 @@ Value *TypePromotionHelper::promoteOperandForOther(
}
// Otherwise we have to explicitly sign extend the operand.
- // Check if Ext was reused to extend an operand.
- if (!ExtForOpnd) {
- // If yes, create a new one.
- LLVM_DEBUG(dbgs() << "More operands to ext\n");
- Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
- : TPT.createZExt(Ext, Opnd, Ext->getType());
- if (!isa<Instruction>(ValForExtOpnd)) {
- TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
- continue;
- }
- ExtForOpnd = cast<Instruction>(ValForExtOpnd);
- }
+ Value *ValForExtOpnd = IsSExt
+ ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
+ : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
+ TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
+ Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
+ if (!InstForExtOpnd)
+ continue;
+
if (Exts)
- Exts->push_back(ExtForOpnd);
- TPT.setOperand(ExtForOpnd, 0, Opnd);
+ Exts->push_back(InstForExtOpnd);
- // Move the sign extension before the insertion point.
- TPT.moveBefore(ExtForOpnd, ExtOpnd);
- TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
- CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
- // If more sext are required, new instructions will have to be created.
- ExtForOpnd = nullptr;
- }
- if (ExtForOpnd == Ext) {
- LLVM_DEBUG(dbgs() << "Extension is useless now\n");
- TPT.eraseInstruction(Ext);
+ CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
}
+ LLVM_DEBUG(dbgs() << "Extension is useless now\n");
+ TPT.eraseInstruction(Ext);
return ExtOpnd;
}
@@ -5493,7 +5503,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
return Modified;
} else {
Type *I8PtrTy =
- Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+ Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
Type *I8Ty = Builder.getInt8Ty();
// Start with the base register. Do this first so that subsequent address
@@ -6104,6 +6114,55 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
Value *NewBaseGEP = nullptr;
+ auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
+ GetElementPtrInst *GEP) {
+ LLVMContext &Ctx = GEP->getContext();
+ Type *PtrIdxTy = DL->getIndexType(GEP->getType());
+ Type *I8PtrTy =
+ PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
+ Type *I8Ty = Type::getInt8Ty(Ctx);
+
+ BasicBlock::iterator NewBaseInsertPt;
+ BasicBlock *NewBaseInsertBB;
+ if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
+ // If the base of the struct is an instruction, the new base will be
+ // inserted close to it.
+ NewBaseInsertBB = BaseI->getParent();
+ if (isa<PHINode>(BaseI))
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
+ NewBaseInsertBB =
+ SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ } else
+ NewBaseInsertPt = std::next(BaseI->getIterator());
+ } else {
+ // If the current base is an argument or global value, the new base
+ // will be inserted to the entry block.
+ NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ }
+ IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
+ // Create a new base.
+ Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
+ NewBaseGEP = OldBase;
+ if (NewBaseGEP->getType() != I8PtrTy)
+ NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
+ NewBaseGEP =
+ NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
+ NewGEPBases.insert(NewBaseGEP);
+ return;
+ };
+
+ // Check whether all the offsets can be encoded with prefered common base.
+ if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
+ LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
+ BaseOffset = PreferBase;
+ // Create a new base if the offset of the BaseGEP can be decoded with one
+ // instruction.
+ createNewBase(BaseOffset, OldBase, BaseGEP);
+ }
+
auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
GetElementPtrInst *GEP = LargeOffsetGEP->first;
@@ -6129,56 +6188,20 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
// Generate a new GEP to replace the current one.
LLVMContext &Ctx = GEP->getContext();
Type *PtrIdxTy = DL->getIndexType(GEP->getType());
- Type *I8PtrTy =
- Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
Type *I8Ty = Type::getInt8Ty(Ctx);
if (!NewBaseGEP) {
// Create a new base if we don't have one yet. Find the insertion
// pointer for the new base first.
- BasicBlock::iterator NewBaseInsertPt;
- BasicBlock *NewBaseInsertBB;
- if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
- // If the base of the struct is an instruction, the new base will be
- // inserted close to it.
- NewBaseInsertBB = BaseI->getParent();
- if (isa<PHINode>(BaseI))
- NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
- else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
- NewBaseInsertBB =
- SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
- NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
- } else
- NewBaseInsertPt = std::next(BaseI->getIterator());
- } else {
- // If the current base is an argument or global value, the new base
- // will be inserted to the entry block.
- NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
- NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
- }
- IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
- // Create a new base.
- Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
- NewBaseGEP = OldBase;
- if (NewBaseGEP->getType() != I8PtrTy)
- NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
- NewBaseGEP =
- NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
- NewGEPBases.insert(NewBaseGEP);
+ createNewBase(BaseOffset, OldBase, GEP);
}
IRBuilder<> Builder(GEP);
Value *NewGEP = NewBaseGEP;
- if (Offset == BaseOffset) {
- if (GEP->getType() != I8PtrTy)
- NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
- } else {
+ if (Offset != BaseOffset) {
// Calculate the new offset for the new GEP.
Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
-
- if (GEP->getType() != I8PtrTy)
- NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
}
replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
LargeOffsetGEPID.erase(GEP);
@@ -6295,7 +6318,7 @@ bool CodeGenPrepare::optimizePhiType(
// correct type.
ValueToValueMap ValMap;
for (ConstantData *C : Constants)
- ValMap[C] = ConstantExpr::getCast(Instruction::BitCast, C, ConvertTy);
+ ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
for (Instruction *D : Defs) {
if (isa<BitCastInst>(D)) {
ValMap[D] = D->getOperand(0);
@@ -6589,7 +6612,8 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
if (!InsertedTrunc) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
assert(InsertPt != UserBB->end());
- InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
+ InsertedTrunc = new TruncInst(I, Src->getType(), "");
+ InsertedTrunc->insertBefore(*UserBB, InsertPt);
InsertedInsts.insert(InsertedTrunc);
}
@@ -6754,7 +6778,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
!TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
return false;
- IRBuilder<> Builder(Load->getNextNode());
+ IRBuilder<> Builder(Load->getNextNonDebugInstruction());
auto *NewAnd = cast<Instruction>(
Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
// Mark this instruction as "inserted by CGP", so that other
@@ -6948,6 +6972,11 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// Increment the current iterator to skip all the rest of select instructions
// because they will be either "not lowered" or "all lowered" to branch.
CurInstIterator = std::next(LastSI->getIterator());
+ // Examine debug-info attached to the consecutive select instructions. They
+ // won't be individually optimised by optimizeInst, so we need to perform
+ // DPValue maintenence here instead.
+ for (SelectInst *SI : ArrayRef(ASI).drop_front())
+ fixupDPValuesOnInst(*SI);
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
@@ -7010,7 +7039,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// Split the select block, according to how many (if any) values go on each
// side.
BasicBlock *StartBlock = SI->getParent();
- BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
+ BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
+ // We should split before any debug-info.
+ SplitPt.setHeadBit(true);
IRBuilder<> IB(SI);
auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
@@ -7022,18 +7053,18 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
BranchInst *FalseBranch = nullptr;
if (TrueInstrs.size() == 0) {
FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
- CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ CondFr, SplitPt, false, nullptr, nullptr, LI));
FalseBlock = FalseBranch->getParent();
EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
} else if (FalseInstrs.size() == 0) {
TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
- CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ CondFr, SplitPt, false, nullptr, nullptr, LI));
TrueBlock = TrueBranch->getParent();
EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
} else {
Instruction *ThenTerm = nullptr;
Instruction *ElseTerm = nullptr;
- SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm,
+ SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
nullptr, nullptr, LI);
TrueBranch = cast<BranchInst>(ThenTerm);
FalseBranch = cast<BranchInst>(ElseTerm);
@@ -7057,7 +7088,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
FreshBBs.insert(EndBlock);
}
- BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
static const unsigned MD[] = {
LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
@@ -7087,7 +7118,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// to get the PHI operand.
for (SelectInst *SI : llvm::reverse(ASI)) {
// The select itself is replaced with a PHI Node.
- PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "");
+ PN->insertBefore(EndBlock->begin());
PN->takeName(SI);
PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
@@ -7841,9 +7873,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
auto CreateSplitStore = [&](Value *V, bool Upper) {
V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
- Value *Addr = Builder.CreateBitCast(
- SI.getOperand(1),
- SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
+ Value *Addr = SI.getPointerOperand();
Align Alignment = SI.getAlign();
const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
if (IsOffsetStore) {
@@ -7991,6 +8021,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
return false;
if (UGEPI->getOperand(0) != GEPIOp)
return false;
+ if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
+ return false;
if (GEPIIdx->getType() !=
cast<ConstantInt>(UGEPI->getOperand(1))->getType())
return false;
@@ -8099,10 +8131,13 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
}
bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
+ bool AnyChange = false;
+ AnyChange = fixupDPValuesOnInst(*I);
+
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
if (InsertedInsts.count(I))
- return false;
+ return AnyChange;
// TODO: Move into the switch on opcode below here.
if (PHINode *P = dyn_cast<PHINode>(I)) {
@@ -8116,7 +8151,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
++NumPHIsElim;
return true;
}
- return false;
+ return AnyChange;
}
if (CastInst *CI = dyn_cast<CastInst>(I)) {
@@ -8127,7 +8162,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
// the address of globals out of a loop). If this is the case, we don't
// want to forward-subst the cast.
if (isa<Constant>(CI->getOperand(0)))
- return false;
+ return AnyChange;
if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
return true;
@@ -8153,7 +8188,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return MadeChange | optimizeExtUses(I);
}
}
- return false;
+ return AnyChange;
}
if (auto *Cmp = dyn_cast<CmpInst>(I))
@@ -8220,7 +8255,6 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
return true;
}
- return false;
}
if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
@@ -8249,7 +8283,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return true;
}
}
- return false;
+ return AnyChange;
}
if (tryToSinkFreeOperands(I))
@@ -8274,7 +8308,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
}
- return false;
+ return AnyChange;
}
/// Given an OR instruction, check to see if this is a bitreverse
@@ -8364,6 +8398,56 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
return AnyChange;
}
+bool CodeGenPrepare::fixupDPValuesOnInst(Instruction &I) {
+ bool AnyChange = false;
+ for (DPValue &DPV : I.getDbgValueRange())
+ AnyChange |= fixupDPValue(DPV);
+ return AnyChange;
+}
+
+// FIXME: should updating debug-info really cause the "changed" flag to fire,
+// which can cause a function to be reprocessed?
+bool CodeGenPrepare::fixupDPValue(DPValue &DPV) {
+ if (DPV.Type != DPValue::LocationType::Value)
+ return false;
+
+ // Does this DPValue refer to a sunk address calculation?
+ bool AnyChange = false;
+ SmallDenseSet<Value *> LocationOps(DPV.location_ops().begin(),
+ DPV.location_ops().end());
+ for (Value *Location : LocationOps) {
+ WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
+ Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ if (SunkAddr) {
+ // Point dbg.value at locally computed address, which should give the best
+ // opportunity to be accurately lowered. This update may change the type
+ // of pointer being referred to; however this makes no difference to
+ // debugging information, and we can't generate bitcasts that may affect
+ // codegen.
+ DPV.replaceVariableLocationOp(Location, SunkAddr);
+ AnyChange = true;
+ }
+ }
+ return AnyChange;
+}
+
+static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI) {
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+}
+
+static void DbgInserterHelper(DPValue *DPV, Instruction *VI) {
+ DPV->removeFromParent();
+ BasicBlock *VIBB = VI->getParent();
+ if (isa<PHINode>(VI))
+ VIBB->insertDPValueBefore(DPV, VIBB->getFirstInsertionPt());
+ else
+ VIBB->insertDPValueAfter(DPV, VI);
+}
+
// A llvm.dbg.value may be using a value before its definition, due to
// optimizations in this pass and others. Scan for such dbg.values, and rescue
// them by moving the dbg.value to immediately after the value definition.
@@ -8373,59 +8457,69 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
bool MadeChange = false;
DominatorTree DT(F);
- for (BasicBlock &BB : F) {
- for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
- DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
- if (!DVI)
+ auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
+ SmallVector<Instruction *, 4> VIs;
+ for (Value *V : DbgItem->location_ops())
+ if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
+ VIs.push_back(VI);
+
+ // This item may depend on multiple instructions, complicating any
+ // potential sink. This block takes the defensive approach, opting to
+ // "undef" the item if it has more than one instruction and any of them do
+ // not dominate iem.
+ for (Instruction *VI : VIs) {
+ if (VI->isTerminator())
continue;
- SmallVector<Instruction *, 4> VIs;
- for (Value *V : DVI->getValues())
- if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
- VIs.push_back(VI);
-
- // This DVI may depend on multiple instructions, complicating any
- // potential sink. This block takes the defensive approach, opting to
- // "undef" the DVI if it has more than one instruction and any of them do
- // not dominate DVI.
- for (Instruction *VI : VIs) {
- if (VI->isTerminator())
- continue;
+ // If VI is a phi in a block with an EHPad terminator, we can't insert
+ // after it.
+ if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+ continue;
- // If VI is a phi in a block with an EHPad terminator, we can't insert
- // after it.
- if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
- continue;
+ // If the defining instruction dominates the dbg.value, we do not need
+ // to move the dbg.value.
+ if (DT.dominates(VI, Position))
+ continue;
- // If the defining instruction dominates the dbg.value, we do not need
- // to move the dbg.value.
- if (DT.dominates(VI, DVI))
- continue;
+ // If we depend on multiple instructions and any of them doesn't
+ // dominate this DVI, we probably can't salvage it: moving it to
+ // after any of the instructions could cause us to lose the others.
+ if (VIs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Unable to find valid location for Debug Value, undefing:\n"
+ << *DbgItem);
+ DbgItem->setKillLocation();
+ break;
+ }
- // If we depend on multiple instructions and any of them doesn't
- // dominate this DVI, we probably can't salvage it: moving it to
- // after any of the instructions could cause us to lose the others.
- if (VIs.size() > 1) {
- LLVM_DEBUG(
- dbgs()
- << "Unable to find valid location for Debug Value, undefing:\n"
- << *DVI);
- DVI->setKillLocation();
- break;
- }
+ LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
+ << *DbgItem << ' ' << *VI);
+ DbgInserterHelper(DbgItem, VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ };
- LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
- << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
- else
- DVI->insertAfter(VI);
- MadeChange = true;
- ++NumDbgValueMoved;
+ for (BasicBlock &BB : F) {
+ for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
+ // Process dbg.value intrinsics.
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
+ if (DVI) {
+ DbgProcessor(DVI, DVI);
+ continue;
+ }
+
+ // If this isn't a dbg.value, process any attached DPValue records
+ // attached to this instruction.
+ for (DPValue &DPV : llvm::make_early_inc_range(Insn.getDbgValueRange())) {
+ if (DPV.Type != DPValue::LocationType::Value)
+ continue;
+ DbgProcessor(&DPV, &Insn);
}
}
}
+
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index c34a52a6f2de..c6d7827f36df 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -18,8 +18,10 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/Host.h"
#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/TargetParser/Triple.h"
@@ -58,6 +60,7 @@ CGLIST(std::string, MAttrs)
CGOPT_EXP(Reloc::Model, RelocModel)
CGOPT(ThreadModel::Model, ThreadModel)
CGOPT_EXP(CodeModel::Model, CodeModel)
+CGOPT_EXP(uint64_t, LargeDataThreshold)
CGOPT(ExceptionHandling, ExceptionModel)
CGOPT_EXP(CodeGenFileType, FileType)
CGOPT(FramePointerKind, FramePointerUsage)
@@ -162,6 +165,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
clEnumValN(CodeModel::Large, "large", "Large code model")));
CGBINDOPT(CodeModel);
+ static cl::opt<uint64_t> LargeDataThreshold(
+ "large-data-threshold",
+ cl::desc("Choose large data threshold for x86_64 medium code model"),
+ cl::init(0));
+ CGBINDOPT(LargeDataThreshold);
+
static cl::opt<ExceptionHandling> ExceptionModel(
"exception-model", cl::desc("exception model"),
cl::init(ExceptionHandling::None),
@@ -180,15 +189,15 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
CGBINDOPT(ExceptionModel);
static cl::opt<CodeGenFileType> FileType(
- "filetype", cl::init(CGFT_AssemblyFile),
+ "filetype", cl::init(CodeGenFileType::AssemblyFile),
cl::desc(
"Choose a file type (not all types are supported by all targets):"),
- cl::values(
- clEnumValN(CGFT_AssemblyFile, "asm", "Emit an assembly ('.s') file"),
- clEnumValN(CGFT_ObjectFile, "obj",
- "Emit a native object ('.o') file"),
- clEnumValN(CGFT_Null, "null",
- "Emit nothing, for performance testing")));
+ cl::values(clEnumValN(CodeGenFileType::AssemblyFile, "asm",
+ "Emit an assembly ('.s') file"),
+ clEnumValN(CodeGenFileType::ObjectFile, "obj",
+ "Emit a native object ('.o') file"),
+ clEnumValN(CodeGenFileType::Null, "null",
+ "Emit nothing, for performance testing")));
CGBINDOPT(FileType);
static cl::opt<FramePointerKind> FramePointerUsage(
@@ -725,3 +734,24 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
for (Function &F : M)
setFunctionAttributes(CPU, Features, F);
}
+
+Expected<std::unique_ptr<TargetMachine>>
+codegen::createTargetMachineForTriple(StringRef TargetTriple,
+ CodeGenOptLevel OptLevel) {
+ Triple TheTriple(TargetTriple);
+ std::string Error;
+ const auto *TheTarget =
+ TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error);
+ if (!TheTarget)
+ return createStringError(inconvertibleErrorCode(), Error);
+ auto *Target = TheTarget->createTargetMachine(
+ TheTriple.getTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(),
+ codegen::InitTargetOptionsFromCodeGenFlags(TheTriple),
+ codegen::getExplicitRelocModel(), codegen::getExplicitCodeModel(),
+ OptLevel);
+ if (!Target)
+ return createStringError(inconvertibleErrorCode(),
+ Twine("could not allocate target machine for ") +
+ TargetTriple);
+ return std::unique_ptr<TargetMachine>(Target);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index 7979ac9a5fb7..a6cacf874bdc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -60,6 +60,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -258,7 +259,7 @@ private:
///
/// %OutsideUser can be `llvm.vector.reduce.fadd` or `fadd` preceding
/// `llvm.vector.reduce.fadd` when unroll factor isn't one.
- std::map<Instruction *, std::pair<PHINode *, Instruction *>> ReductionInfo;
+ MapVector<Instruction *, std::pair<PHINode *, Instruction *>> ReductionInfo;
/// In the process of detecting a reduction, we consider a pair of
/// %ReductionOP, which we refer to as real and imag (or vice versa), and
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 32c94de7280c..e7eb34d8e651 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/DwarfEHPrepare.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -41,7 +42,7 @@
using namespace llvm;
-#define DEBUG_TYPE "dwarfehprepare"
+#define DEBUG_TYPE "dwarf-eh-prepare"
STATISTIC(NumResumesLowered, "Number of resume calls lowered");
STATISTIC(NumCleanupLandingPadsUnreachable,
@@ -54,7 +55,7 @@ STATISTIC(NumUnwind, "Number of functions with unwind");
namespace {
class DwarfEHPrepare {
- CodeGenOpt::Level OptLevel;
+ CodeGenOptLevel OptLevel;
Function &F;
const TargetLowering &TLI;
@@ -78,7 +79,7 @@ class DwarfEHPrepare {
bool InsertUnwindResumeCalls();
public:
- DwarfEHPrepare(CodeGenOpt::Level OptLevel_, Function &F_,
+ DwarfEHPrepare(CodeGenOptLevel OptLevel_, Function &F_,
const TargetLowering &TLI_, DomTreeUpdater *DTU_,
const TargetTransformInfo *TTI_, const Triple &TargetTriple_)
: OptLevel(OptLevel_), F(F_), TLI(TLI_), DTU(DTU_), TTI(TTI_),
@@ -194,7 +195,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
LLVMContext &Ctx = F.getContext();
size_t ResumesLeft = Resumes.size();
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
ResumesLeft = pruneUnreachableResumes(Resumes, CleanupLPads);
#if LLVM_ENABLE_STATS
unsigned NumRemainingLPs = 0;
@@ -227,8 +228,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
DoesRewindFunctionNeedExceptionObject = false;
} else {
RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
- FTy =
- FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false);
+ FTy = FunctionType::get(Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx),
+ false);
RewindFunctionCallingConv = TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME);
DoesRewindFunctionNeedExceptionObject = true;
}
@@ -269,8 +270,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
llvm::SmallVector<Value *, 1> RewindFunctionArgs;
BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F);
- PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj",
- UnwindBB);
+ PHINode *PN = PHINode::Create(PointerType::getUnqual(Ctx), ResumesLeft,
+ "exn.obj", UnwindBB);
// Extract the exception object from the ResumeInst and add it to the PHI node
// that feeds the _Unwind_Resume call.
@@ -309,7 +310,7 @@ bool DwarfEHPrepare::run() {
return Changed;
}
-static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, Function &F,
+static bool prepareDwarfEH(CodeGenOptLevel OptLevel, Function &F,
const TargetLowering &TLI, DominatorTree *DT,
const TargetTransformInfo *TTI,
const Triple &TargetTriple) {
@@ -324,12 +325,12 @@ namespace {
class DwarfEHPrepareLegacyPass : public FunctionPass {
- CodeGenOpt::Level OptLevel;
+ CodeGenOptLevel OptLevel;
public:
static char ID; // Pass identification, replacement for typeid.
- DwarfEHPrepareLegacyPass(CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
+ DwarfEHPrepareLegacyPass(CodeGenOptLevel OptLevel = CodeGenOptLevel::Default)
: FunctionPass(ID), OptLevel(OptLevel) {}
bool runOnFunction(Function &F) override {
@@ -340,7 +341,7 @@ public:
const TargetTransformInfo *TTI = nullptr;
if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
DT = &DTWP->getDomTree();
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
if (!DT)
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -351,7 +352,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -365,6 +366,27 @@ public:
} // end anonymous namespace
+PreservedAnalyses DwarfEHPreparePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ const auto &TLI = *TM->getSubtargetImpl(F)->getTargetLowering();
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ const TargetTransformInfo *TTI = nullptr;
+ auto OptLevel = TM->getOptLevel();
+ if (OptLevel != CodeGenOptLevel::None) {
+ if (!DT)
+ DT = &FAM.getResult<DominatorTreeAnalysis>(F);
+ TTI = &FAM.getResult<TargetIRAnalysis>(F);
+ }
+ bool Changed =
+ prepareDwarfEH(OptLevel, F, TLI, DT, TTI, TM->getTargetTriple());
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
char DwarfEHPrepareLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(DwarfEHPrepareLegacyPass, DEBUG_TYPE,
@@ -375,6 +397,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(DwarfEHPrepareLegacyPass, DEBUG_TYPE,
"Prepare DWARF exceptions", false, false)
-FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) {
+FunctionPass *llvm::createDwarfEHPass(CodeGenOptLevel OptLevel) {
return new DwarfEHPrepareLegacyPass(OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 61867d74bfa2..31e107ade1cc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -828,8 +828,6 @@ void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
/// Update LoopInfo after if-conversion.
void updateLoops(MachineLoopInfo *Loops,
ArrayRef<MachineBasicBlock *> Removed) {
- if (!Loops)
- return;
// If-conversion doesn't change loop structure, and it doesn't mess with back
// edges, so updating LoopInfo is simply removing the dead blocks.
for (auto *B : Removed)
@@ -1092,7 +1090,7 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
SchedModel = STI.getSchedModel();
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
- Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Loops = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
@@ -1226,7 +1224,7 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
SchedModel.init(&STI);
DomTree = &getAnalysis<MachineDominatorTree>();
- Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Loops = &getAnalysis<MachineLoopInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
bool Changed = false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
index 057b5311db70..973c814604b3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
@@ -14,6 +14,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/ExpandLargeDivRem.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/GlobalsModRef.h"
@@ -128,6 +129,13 @@ public:
};
} // namespace
+PreservedAnalyses ExpandLargeDivRemPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
+ return runImpl(F, *STI->getTargetLowering()) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
char ExpandLargeDivRemLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ExpandLargeDivRemLegacyPass, "expand-large-div-rem",
"Expand large div/rem", false, false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
index ca8056a53139..78ad2a25d0e4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
@@ -14,6 +14,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/ExpandLargeFpConvert.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/GlobalsModRef.h"
@@ -653,6 +654,13 @@ public:
};
} // namespace
+PreservedAnalyses ExpandLargeFpConvertPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
+ return runImpl(F, *STI->getTargetLowering()) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
char ExpandLargeFpConvertLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
"Expand large fp convert", false, false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 500f31bd8e89..bb84813569f4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
@@ -23,6 +24,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -31,12 +33,13 @@
#include <optional>
using namespace llvm;
+using namespace llvm::PatternMatch;
namespace llvm {
class TargetLowering;
}
-#define DEBUG_TYPE "expandmemcmp"
+#define DEBUG_TYPE "expand-memcmp"
STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
@@ -117,8 +120,8 @@ class MemCmpExpansion {
Value *Lhs = nullptr;
Value *Rhs = nullptr;
};
- LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType,
- unsigned OffsetBytes);
+ LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType,
+ Type *CmpSizeType, unsigned OffsetBytes);
static LoadEntryVector
computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
@@ -128,6 +131,11 @@ class MemCmpExpansion {
unsigned MaxNumLoads,
unsigned &NumLoadsNonOneByte);
+ static void optimiseLoadSequence(
+ LoadEntryVector &LoadSequence,
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
+ bool IsUsedForZeroCmp);
+
public:
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
@@ -210,6 +218,37 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
return LoadSequence;
}
+void MemCmpExpansion::optimiseLoadSequence(
+ LoadEntryVector &LoadSequence,
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
+ bool IsUsedForZeroCmp) {
+ // This part of code attempts to optimize the LoadSequence by merging allowed
+ // subsequences into single loads of allowed sizes from
+ // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero
+ // comparison or if no allowed tail expansions are specified, we exit early.
+ if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty())
+ return;
+
+ while (LoadSequence.size() >= 2) {
+ auto Last = LoadSequence[LoadSequence.size() - 1];
+ auto PreLast = LoadSequence[LoadSequence.size() - 2];
+
+ // Exit the loop if the two sequences are not contiguous
+ if (PreLast.Offset + PreLast.LoadSize != Last.Offset)
+ break;
+
+ auto LoadSize = Last.LoadSize + PreLast.LoadSize;
+ if (find(Options.AllowedTailExpansions, LoadSize) ==
+ Options.AllowedTailExpansions.end())
+ break;
+
+ // Remove the last two sequences and replace with the combined sequence
+ LoadSequence.pop_back();
+ LoadSequence.pop_back();
+ LoadSequence.emplace_back(PreLast.Offset, LoadSize);
+ }
+}
+
// Initialize the basic block structure required for expansion of memcmp call
// with given maximum load size and memcmp size parameter.
// This structure includes:
@@ -255,6 +294,7 @@ MemCmpExpansion::MemCmpExpansion(
}
}
assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
+ optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp);
}
unsigned MemCmpExpansion::getNumBlocks() {
@@ -278,7 +318,7 @@ void MemCmpExpansion::createResultBlock() {
}
MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
- bool NeedsBSwap,
+ Type *BSwapSizeType,
Type *CmpSizeType,
unsigned OffsetBytes) {
// Get the memory source at offset `OffsetBytes`.
@@ -307,16 +347,22 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
if (!Rhs)
Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
+ // Zero extend if Byte Swap intrinsic has different type
+ if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
+ Lhs = Builder.CreateZExt(Lhs, BSwapSizeType);
+ Rhs = Builder.CreateZExt(Rhs, BSwapSizeType);
+ }
+
// Swap bytes if required.
- if (NeedsBSwap) {
- Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
- Intrinsic::bswap, LoadSizeType);
+ if (BSwapSizeType) {
+ Function *Bswap = Intrinsic::getDeclaration(
+ CI->getModule(), Intrinsic::bswap, BSwapSizeType);
Lhs = Builder.CreateCall(Bswap, Lhs);
Rhs = Builder.CreateCall(Bswap, Rhs);
}
// Zero extend if required.
- if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
+ if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) {
Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
}
@@ -332,7 +378,7 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
BasicBlock *BB = LoadCmpBlocks[BlockIndex];
Builder.SetInsertPoint(BB);
const LoadPair Loads =
- getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
+ getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr,
Type::getInt32Ty(CI->getContext()), OffsetBytes);
Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
@@ -385,11 +431,12 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
IntegerType *const MaxLoadType =
NumLoads == 1 ? nullptr
: IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+
for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
const LoadPair Loads = getLoadPair(
- IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
- /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset);
+ IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), nullptr,
+ MaxLoadType, CurLoadEntry.Offset);
if (NumLoads != 1) {
// If we have multiple loads per block, we need to generate a composite
@@ -475,14 +522,20 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
Type *LoadSizeType =
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
- Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ Type *BSwapSizeType =
+ DL.isLittleEndian()
+ ? IntegerType::get(CI->getContext(),
+ PowerOf2Ceil(CurLoadEntry.LoadSize * 8))
+ : nullptr;
+ Type *MaxLoadType = IntegerType::get(
+ CI->getContext(),
+ std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);
assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
- const LoadPair Loads =
- getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType,
- CurLoadEntry.Offset);
+ const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
+ CurLoadEntry.Offset);
// Add the loaded values to the phi nodes for calculating memcmp result only
// if result is not used in a zero equality.
@@ -558,7 +611,7 @@ void MemCmpExpansion::setupResultBlockPHINodes() {
}
void MemCmpExpansion::setupEndBlockPHINodes() {
- Builder.SetInsertPoint(&EndBlock->front());
+ Builder.SetInsertPoint(EndBlock, EndBlock->begin());
PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
}
@@ -586,21 +639,63 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
/// A memcmp expansion that only has one block of load and compare can bypass
/// the compare, branch, and phi IR that is required in the general case.
+/// This function also analyses users of memcmp, and if there is only one user
+/// from which we can conclude that only 2 out of 3 memcmp outcomes really
+/// matter, then it generates more efficient code with only one comparison.
Value *MemCmpExpansion::getMemCmpOneBlock() {
- Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
+ Type *BSwapSizeType =
+ NeedsBSwap ? IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8))
+ : nullptr;
+ Type *MaxLoadType =
+ IntegerType::get(CI->getContext(),
+ std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(Size)) * 8);
// The i8 and i16 cases don't need compares. We zext the loaded values and
// subtract them to get the suitable negative, zero, or positive i32 result.
- if (Size < 4) {
- const LoadPair Loads =
- getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
- /*Offset*/ 0);
+ if (Size == 1 || Size == 2) {
+ const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,
+ Builder.getInt32Ty(), /*Offset*/ 0);
return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
}
- const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
+ const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
/*Offset*/ 0);
+
+ // If a user of memcmp cares only about two outcomes, for example:
+ // bool result = memcmp(a, b, NBYTES) > 0;
+ // We can generate more optimal code with a smaller number of operations
+ if (CI->hasOneUser()) {
+ auto *UI = cast<Instruction>(*CI->user_begin());
+ ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
+ uint64_t Shift;
+ bool NeedsZExt = false;
+ // This is a special case because instead of checking if the result is less
+ // than zero:
+ // bool result = memcmp(a, b, NBYTES) < 0;
+ // Compiler is clever enough to generate the following code:
+ // bool result = memcmp(a, b, NBYTES) >> 31;
+ if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) &&
+ Shift == (CI->getType()->getIntegerBitWidth() - 1)) {
+ Pred = ICmpInst::ICMP_SLT;
+ NeedsZExt = true;
+ } else {
+ // In case of a successful match this call will set `Pred` variable
+ match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
+ }
+ // Generate new code and remove the original memcmp call and the user
+ if (ICmpInst::isSigned(Pred)) {
+ Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred),
+ Loads.Lhs, Loads.Rhs);
+ auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, UI->getType()) : Cmp;
+ UI->replaceAllUsesWith(Result);
+ UI->eraseFromParent();
+ CI->eraseFromParent();
+ return nullptr;
+ }
+ }
+
// The result of memcmp is negative, zero, or positive, so produce that by
// subtracting 2 extended compare bits: sub (ugt, ult).
// If a target prefers to use selects to get -1/0/1, they should be able
@@ -615,7 +710,7 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
}
// This function expands the memcmp call into an inline expansion and returns
-// the memcmp result.
+// the memcmp result. Returns nullptr if the memcmp is already replaced.
Value *MemCmpExpansion::getMemCmpExpansion() {
// Create the basic block framework for a multi-block expansion.
if (getNumBlocks() != 1) {
@@ -783,21 +878,33 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
NumMemCmpInlined++;
- Value *Res = Expansion.getMemCmpExpansion();
-
- // Replace call with result of expansion and erase call.
- CI->replaceAllUsesWith(Res);
- CI->eraseFromParent();
+ if (Value *Res = Expansion.getMemCmpExpansion()) {
+ // Replace call with result of expansion and erase call.
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ }
return true;
}
-class ExpandMemCmpPass : public FunctionPass {
+// Returns true if a change was made.
+static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const DataLayout &DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
+
+static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const TargetLowering *TL,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DominatorTree *DT);
+
+class ExpandMemCmpLegacyPass : public FunctionPass {
public:
static char ID;
- ExpandMemCmpPass() : FunctionPass(ID) {
- initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry());
+ ExpandMemCmpLegacyPass() : FunctionPass(ID) {
+ initializeExpandMemCmpLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
@@ -834,25 +941,13 @@ private:
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
FunctionPass::getAnalysisUsage(AU);
}
-
- PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
- const TargetLowering *TL, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, DominatorTree *DT);
- // Returns true if a change was made.
- bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering *TL,
- const DataLayout &DL, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
};
-bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
- const TargetLowering *TL,
- const DataLayout &DL, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI,
- DomTreeUpdater *DTU) {
- for (Instruction& I : BB) {
+bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const DataLayout &DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {
+ for (Instruction &I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI) {
continue;
@@ -867,8 +962,7 @@ bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
return false;
}
-PreservedAnalyses
-ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
+PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
const TargetLowering *TL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DominatorTree *DT) {
@@ -900,17 +994,32 @@ ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
} // namespace
-char ExpandMemCmpPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
+PreservedAnalyses ExpandMemCmpPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ const auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ const auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+ auto *PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
+ .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ BlockFrequencyInfo *BFI = (PSI && PSI->hasProfileSummary())
+ ? &FAM.getResult<BlockFrequencyAnalysis>(F)
+ : nullptr;
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+
+ return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT);
+}
+
+char ExpandMemCmpLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE,
"Expand memcmp() to load/stores", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
+INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE,
"Expand memcmp() to load/stores", false, false)
-FunctionPass *llvm::createExpandMemCmpPass() {
- return new ExpandMemCmpPass();
+FunctionPass *llvm::createExpandMemCmpLegacyPass() {
+ return new ExpandMemCmpLegacyPass();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 9807be0bea39..6c873a9aee27 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -123,9 +123,12 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) {
if (isa<VPReductionIntrinsic>(VPI))
return false;
// Fallback to whether the intrinsic is speculatable.
- std::optional<unsigned> OpcOpt = VPI.getFunctionalOpcode();
- unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call);
- return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, &VPI);
+ if (auto IntrID = VPI.getFunctionalIntrinsicID())
+ return Intrinsic::getAttributes(VPI.getContext(), *IntrID)
+ .hasFnAttr(Attribute::AttrKind::Speculatable);
+ if (auto Opc = VPI.getFunctionalOpcode())
+ return isSafeToSpeculativelyExecuteWithOpcode(*Opc, &VPI);
+ return false;
}
//// } Helpers
@@ -171,6 +174,10 @@ struct CachingVPExpander {
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &PI);
+ /// Lower this VP int call to a unpredicated int call.
+ Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI,
+ unsigned UnpredicatedIntrinsicID);
+
/// Lower this VP fp call to a unpredicated fp call.
Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI,
unsigned UnpredicatedIntrinsicID);
@@ -179,6 +186,10 @@ struct CachingVPExpander {
Value *expandPredicationInReduction(IRBuilder<> &Builder,
VPReductionIntrinsic &PI);
+ /// Lower this VP cast operation to a non-VP intrinsic.
+ Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder,
+ VPIntrinsic &VPI);
+
/// Lower this VP memory operation to a non-VP intrinsic.
Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
VPIntrinsic &VPI);
@@ -275,6 +286,35 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
return NewBinOp;
}
+Value *CachingVPExpander::expandPredicationToIntCall(
+ IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) {
+ switch (UnpredicatedIntrinsicID) {
+ case Intrinsic::abs:
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin: {
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ case Intrinsic::bswap:
+ case Intrinsic::bitreverse: {
+ Value *Op = VPI.getOperand(0);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp = Builder.CreateCall(Fn, {Op}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ }
+ return nullptr;
+}
+
Value *CachingVPExpander::expandPredicationToFPCall(
IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) {
assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
@@ -290,6 +330,16 @@ Value *CachingVPExpander::expandPredicationToFPCall(
replaceOperation(*NewOp, VPI);
return NewOp;
}
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum: {
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fmuladd: {
Value *Op0 = VPI.getOperand(0);
@@ -436,6 +486,62 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
return Reduction;
}
+Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder,
+ VPIntrinsic &VPI) {
+ Value *CastOp = nullptr;
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Not a VP cast intrinsic");
+ case Intrinsic::vp_sext:
+ CastOp =
+ Builder.CreateSExt(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ case Intrinsic::vp_zext:
+ CastOp =
+ Builder.CreateZExt(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ case Intrinsic::vp_trunc:
+ CastOp =
+ Builder.CreateTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ case Intrinsic::vp_inttoptr:
+ CastOp =
+ Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ case Intrinsic::vp_ptrtoint:
+ CastOp =
+ Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ case Intrinsic::vp_fptosi:
+ CastOp =
+ Builder.CreateFPToSI(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+
+ case Intrinsic::vp_fptoui:
+ CastOp =
+ Builder.CreateFPToUI(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ case Intrinsic::vp_sitofp:
+ CastOp =
+ Builder.CreateSIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ case Intrinsic::vp_uitofp:
+ CastOp =
+ Builder.CreateUIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ case Intrinsic::vp_fptrunc:
+ CastOp =
+ Builder.CreateFPTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ case Intrinsic::vp_fpext:
+ CastOp =
+ Builder.CreateFPExt(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ break;
+ }
+ replaceOperation(*CastOp, VPI);
+ return CastOp;
+}
+
Value *
CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
VPIntrinsic &VPI) {
@@ -598,18 +704,33 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI))
return expandPredicationInComparison(Builder, *VPCmp);
+ if (VPCastIntrinsic::isVPCast(VPI.getIntrinsicID())) {
+ return expandPredicationToCastIntrinsic(Builder, VPI);
+ }
+
switch (VPI.getIntrinsicID()) {
default:
break;
case Intrinsic::vp_fneg: {
Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName());
replaceOperation(*NewNegOp, VPI);
- return NewNegOp;
+ return NewNegOp;
}
+ case Intrinsic::vp_abs:
+ case Intrinsic::vp_smax:
+ case Intrinsic::vp_smin:
+ case Intrinsic::vp_umax:
+ case Intrinsic::vp_umin:
+ case Intrinsic::vp_bswap:
+ case Intrinsic::vp_bitreverse:
+ return expandPredicationToIntCall(Builder, VPI,
+ VPI.getFunctionalIntrinsicID().value());
case Intrinsic::vp_fabs:
- return expandPredicationToFPCall(Builder, VPI, Intrinsic::fabs);
case Intrinsic::vp_sqrt:
- return expandPredicationToFPCall(Builder, VPI, Intrinsic::sqrt);
+ case Intrinsic::vp_maxnum:
+ case Intrinsic::vp_minnum:
+ return expandPredicationToFPCall(Builder, VPI,
+ VPI.getFunctionalIntrinsicID().value());
case Intrinsic::vp_load:
case Intrinsic::vp_store:
case Intrinsic::vp_gather:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index 75504ef32250..4d668c53f715 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -461,7 +461,8 @@ public:
if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) {
RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad);
- auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin());
+ auto EHPadInsertPoint =
+ EHPad->SkipPHIsLabelsAndDebug(EHPad->begin(), Reg);
insertReloadBefore(Reg, EHPadInsertPoint, EHPad);
LLVM_DEBUG(dbgs() << "...also reload at EHPad "
<< printMBBReference(*EHPad) << "\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCEmptyBasicBlocks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCEmptyBasicBlocks.cpp
new file mode 100644
index 000000000000..598be26e40c8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCEmptyBasicBlocks.cpp
@@ -0,0 +1,100 @@
+//===-- GCEmptyBasicBlocks.cpp ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of empty blocks garbage collection
+/// pass.
+///
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "gc-empty-basic-blocks"
+
+STATISTIC(NumEmptyBlocksRemoved, "Number of empty blocks removed");
+
+class GCEmptyBasicBlocks : public MachineFunctionPass {
+public:
+ static char ID;
+
+ GCEmptyBasicBlocks() : MachineFunctionPass(ID) {
+ initializeGCEmptyBasicBlocksPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Remove Empty Basic Blocks.";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+bool GCEmptyBasicBlocks::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.size() < 2)
+ return false;
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ int NumRemoved = 0;
+
+ // Iterate over all blocks except the last one. We can't remove the last block
+ // since it has no fallthrough block to rewire its predecessors to.
+ for (MachineFunction::iterator MBB = MF.begin(),
+ LastMBB = MachineFunction::iterator(MF.back()),
+ NextMBB;
+ MBB != LastMBB; MBB = NextMBB) {
+ NextMBB = std::next(MBB);
+ // TODO If a block is an eh pad, or it has address taken, we don't remove
+ // it. Removing such blocks is possible, but it probably requires a more
+ // complex logic.
+ if (MBB->isEHPad() || MBB->hasAddressTaken())
+ continue;
+ // Skip blocks with real code.
+ bool HasAnyRealCode = llvm::any_of(*MBB, [](const MachineInstr &MI) {
+ return !MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
+ !MI.isDebugInstr();
+ });
+ if (HasAnyRealCode)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Removing basic block " << MBB->getName()
+ << " in function " << MF.getName() << ":\n"
+ << *MBB << "\n");
+ SmallVector<MachineBasicBlock *, 8> Preds(MBB->predecessors());
+ // Rewire the predecessors of this block to use the next block.
+ for (auto &Pred : Preds)
+ Pred->ReplaceUsesOfBlockWith(&*MBB, &*NextMBB);
+ // Update the jump tables.
+ if (JTI)
+ JTI->ReplaceMBBInJumpTables(&*MBB, &*NextMBB);
+ // Remove this block from predecessors of all its successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end() - 1);
+ // Finally, remove the block from the function.
+ MBB->eraseFromParent();
+ ++NumRemoved;
+ }
+ NumEmptyBlocksRemoved += NumRemoved;
+ return NumRemoved != 0;
+}
+
+char GCEmptyBasicBlocks::ID = 0;
+INITIALIZE_PASS(GCEmptyBasicBlocks, "gc-empty-basic-blocks",
+ "Removes empty basic blocks and redirects their uses to their "
+ "fallthrough blocks.",
+ false, false)
+
+MachineFunctionPass *llvm::createGCEmptyBasicBlocksPass() {
+ return new GCEmptyBasicBlocks();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
index 4d27143c5298..cad7d1f1137b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
@@ -24,24 +24,49 @@
using namespace llvm;
-namespace {
-
-class Printer : public FunctionPass {
- static char ID;
-
- raw_ostream &OS;
-
-public:
- explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+bool GCStrategyMap::invalidate(Module &M, const PreservedAnalyses &PA,
+ ModuleAnalysisManager::Invalidator &) {
+ for (const auto &F : M) {
+ if (F.isDeclaration() || !F.hasGC())
+ continue;
+ if (!StrategyMap.contains(F.getGC()))
+ return true;
+ }
+ return false;
+}
- StringRef getPassName() const override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+AnalysisKey CollectorMetadataAnalysis::Key;
+
+CollectorMetadataAnalysis::Result
+CollectorMetadataAnalysis::run(Module &M, ModuleAnalysisManager &MAM) {
+ Result R;
+ auto &Map = R.StrategyMap;
+ for (auto &F : M) {
+ if (F.isDeclaration() || !F.hasGC())
+ continue;
+ if (auto GCName = F.getGC(); !Map.contains(GCName))
+ Map[GCName] = getGCStrategy(GCName);
+ }
+ return R;
+}
- bool runOnFunction(Function &F) override;
- bool doFinalization(Module &M) override;
-};
+AnalysisKey GCFunctionAnalysis::Key;
-} // end anonymous namespace
+GCFunctionAnalysis::Result
+GCFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC() && "Function doesn't have GC!");
+
+ auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ assert(
+ MAMProxy.cachedResultExists<CollectorMetadataAnalysis>(*F.getParent()) &&
+ "This pass need module analysis `collector-metadata`!");
+ auto &Map =
+ MAMProxy.getCachedResult<CollectorMetadataAnalysis>(*F.getParent())
+ ->StrategyMap;
+ GCFunctionInfo Info(F, *Map[F.getGC()]);
+ return Info;
+}
INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
"Create Garbage Collector Module Metadata", false, false)
@@ -53,6 +78,12 @@ GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
GCFunctionInfo::~GCFunctionInfo() = default;
+bool GCFunctionInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &) {
+ auto PAC = PA.getChecker<GCFunctionAnalysis>();
+ return !PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>();
+}
+
// -----------------------------------------------------------------------------
char GCModuleInfo::ID = 0;
@@ -84,58 +115,6 @@ void GCModuleInfo::clear() {
// -----------------------------------------------------------------------------
-char Printer::ID = 0;
-
-FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
- return new Printer(OS);
-}
-
-StringRef Printer::getPassName() const {
- return "Print Garbage Collector Information";
-}
-
-void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
- FunctionPass::getAnalysisUsage(AU);
- AU.setPreservesAll();
- AU.addRequired<GCModuleInfo>();
-}
-
-bool Printer::runOnFunction(Function &F) {
- if (F.hasGC())
- return false;
-
- GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
-
- OS << "GC roots for " << FD->getFunction().getName() << ":\n";
- for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
- RE = FD->roots_end();
- RI != RE; ++RI)
- OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
-
- OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
- for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE;
- ++PI) {
-
- OS << "\t" << PI->Label->getName() << ": " << "post-call"
- << ", live = {";
-
- ListSeparator LS(",");
- for (const GCRoot &R : make_range(FD->live_begin(PI), FD->live_end(PI)))
- OS << LS << " " << R.Num;
-
- OS << " }\n";
- }
-
- return false;
-}
-
-bool Printer::doFinalization(Module &M) {
- GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
- assert(GMI && "Printer didn't require GCModuleInfo?!");
- GMI->clear();
- return false;
-}
-
GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
// TODO: Arguably, just doing a linear search would be faster for small N
auto NMI = GCStrategyMap.find(Name);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index e047996f9aa8..ca4d0986b442 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -76,9 +76,9 @@ bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
}
std::unique_ptr<CSEConfigBase>
-llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) {
+llvm::getStandardCSEConfigForOpt(CodeGenOptLevel Level) {
std::unique_ptr<CSEConfigBase> Config;
- if (Level == CodeGenOpt::None)
+ if (Level == CodeGenOptLevel::None)
Config = std::make_unique<CSEConfigConstantOnly>();
else
Config = std::make_unique<CSEConfigFull>();
@@ -244,8 +244,6 @@ void GISelCSEInfo::changedInstr(MachineInstr &MI) { changingInstr(MI); }
void GISelCSEInfo::analyze(MachineFunction &MF) {
setMF(MF);
for (auto &MBB : MF) {
- if (MBB.empty())
- continue;
for (MachineInstr &MI : MBB) {
if (!shouldCSE(MI.getOpcode()))
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 28c33e2038e4..6858e030c2c7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -110,6 +110,8 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL);
Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg);
+ Info.IsConvergent = CB.isConvergent();
+
if (!Info.CanLowerReturn) {
// Callee requires sret demotion.
insertSRetOutgoingArgument(MIRBuilder, CB, Info);
@@ -144,7 +146,12 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts();
if (const Function *F = dyn_cast<Function>(CalleeV))
Info.Callee = MachineOperand::CreateGA(F, 0);
- else
+ else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
+ // IR IFuncs and Aliases can't be forward declared (only defined), so the
+ // callee must be in the same TU and therefore we can direct-call it without
+ // worrying about it being out of range.
+ Info.Callee = MachineOperand::CreateGA(cast<GlobalValue>(CalleeV), 0);
+ } else
Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
Register ReturnHintAlignReg;
@@ -356,7 +363,7 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
if (PartLLT.isVector() == LLTy.isVector() &&
PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits() &&
(!PartLLT.isVector() ||
- PartLLT.getNumElements() == LLTy.getNumElements()) &&
+ PartLLT.getElementCount() == LLTy.getElementCount()) &&
OrigRegs.size() == 1 && Regs.size() == 1) {
Register SrcReg = Regs[0];
@@ -404,6 +411,7 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
// If PartLLT is a mismatched vector in both number of elements and element
// size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to
// have the same elt type, i.e. v4s32.
+ // TODO: Extend this coersion to element multiples other than just 2.
if (PartLLT.getSizeInBits() > LLTy.getSizeInBits() &&
PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 &&
Regs.size() == 1) {
@@ -845,7 +853,8 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
unsigned NumValues = SplitVTs.size();
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
- Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
+ Type *RetPtrTy =
+ PointerType::get(RetTy->getContext(), DL.getAllocaAddrSpace());
LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
@@ -1132,7 +1141,7 @@ void CallLowering::ValueHandler::copyArgumentMemory(
}
Register CallLowering::ValueHandler::extendRegister(Register ValReg,
- CCValAssign &VA,
+ const CCValAssign &VA,
unsigned MaxSizeBits) {
LLT LocTy{VA.getLocVT()};
LLT ValTy{VA.getValVT()};
@@ -1181,9 +1190,8 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg,
void CallLowering::ValueAssigner::anchor() {}
-Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA,
- Register SrcReg,
- LLT NarrowTy) {
+Register CallLowering::IncomingValueHandler::buildExtensionHint(
+ const CCValAssign &VA, Register SrcReg, LLT NarrowTy) {
switch (VA.getLocInfo()) {
case CCValAssign::LocInfo::ZExt: {
return MIRBuilder
@@ -1223,9 +1231,8 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {
(DstTy.isPointer() && SrcTy.isScalar());
}
-void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
- Register PhysReg,
- CCValAssign VA) {
+void CallLowering::IncomingValueHandler::assignValueToReg(
+ Register ValVReg, Register PhysReg, const CCValAssign &VA) {
const MVT LocVT = VA.getLocVT();
const LLT LocTy(LocVT);
const LLT RegTy = MRI.getType(ValVReg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 748fa273d499..d18e65a83484 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -39,7 +39,6 @@ cl::OptionCategory GICombinerOptionCategory(
);
} // end namespace llvm
-namespace {
/// This class acts as the glue the joins the CombinerHelper to the overall
/// Combine algorithm. The CombinerHelper is intended to report the
/// modifications it makes to the MIR to the GISelChangeObserver and the
@@ -48,7 +47,7 @@ namespace {
/// instruction creation will schedule that instruction for a future visit.
/// Other Combiner implementations may require more complex behaviour from
/// their GISelChangeObserver subclass.
-class WorkListMaintainer : public GISelChangeObserver {
+class Combiner::WorkListMaintainer : public GISelChangeObserver {
using WorkListTy = GISelWorkList<512>;
WorkListTy &WorkList;
/// The instructions that have been created but we want to report once they
@@ -88,27 +87,46 @@ public:
LLVM_DEBUG(CreatedInstrs.clear());
}
};
-}
-Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC)
- : CInfo(Info), TPC(TPC) {
+Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo,
+ const TargetPassConfig *TPC, GISelKnownBits *KB,
+ GISelCSEInfo *CSEInfo)
+ : Builder(CSEInfo ? std::make_unique<CSEMIRBuilder>()
+ : std::make_unique<MachineIRBuilder>()),
+ WLObserver(std::make_unique<WorkListMaintainer>(WorkList)),
+ ObserverWrapper(std::make_unique<GISelObserverWrapper>()), CInfo(CInfo),
+ Observer(*ObserverWrapper), B(*Builder), MF(MF), MRI(MF.getRegInfo()),
+ KB(KB), TPC(TPC), CSEInfo(CSEInfo) {
(void)this->TPC; // FIXME: Remove when used.
+
+ // Setup builder.
+ B.setMF(MF);
+ if (CSEInfo)
+ B.setCSEInfo(CSEInfo);
+
+ // Setup observer.
+ ObserverWrapper->addObserver(WLObserver.get());
+ if (CSEInfo)
+ ObserverWrapper->addObserver(CSEInfo);
+
+ B.setChangeObserver(*ObserverWrapper);
}
-bool Combiner::combineMachineInstrs(MachineFunction &MF,
- GISelCSEInfo *CSEInfo) {
+Combiner::~Combiner() = default;
+
+bool Combiner::combineMachineInstrs() {
// If the ISel pipeline failed, do not bother running this pass.
// FIXME: Should this be here or in individual combiner passes.
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
- Builder =
- CSEInfo ? std::make_unique<CSEMIRBuilder>() : std::make_unique<MachineIRBuilder>();
- MRI = &MF.getRegInfo();
- Builder->setMF(MF);
- if (CSEInfo)
- Builder->setCSEInfo(CSEInfo);
+ // We can't call this in the constructor because the derived class is
+ // uninitialized at that time.
+ if (!HasSetupMF) {
+ HasSetupMF = true;
+ setupMF(MF, KB);
+ }
LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n');
@@ -116,26 +134,23 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
bool MFChanged = false;
bool Changed;
- MachineIRBuilder &B = *Builder;
do {
+ WorkList.clear();
+
// Collect all instructions. Do a post order traversal for basic blocks and
// insert with list bottom up, so while we pop_back_val, we'll traverse top
// down RPOT.
Changed = false;
- GISelWorkList<512> WorkList;
- WorkListMaintainer Observer(WorkList);
- GISelObserverWrapper WrapperObserver(&Observer);
- if (CSEInfo)
- WrapperObserver.addObserver(CSEInfo);
- RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
+
+ RAIIDelegateInstaller DelInstall(MF, ObserverWrapper.get());
for (MachineBasicBlock *MBB : post_order(&MF)) {
for (MachineInstr &CurMI :
llvm::make_early_inc_range(llvm::reverse(*MBB))) {
// Erase dead insts before even adding to the list.
- if (isTriviallyDead(CurMI, *MRI)) {
+ if (isTriviallyDead(CurMI, MRI)) {
LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
- llvm::salvageDebugInfo(*MRI, CurMI);
+ llvm::salvageDebugInfo(MRI, CurMI);
CurMI.eraseFromParent();
continue;
}
@@ -147,8 +162,8 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
while (!WorkList.empty()) {
MachineInstr *CurrInst = WorkList.pop_back_val();
LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;);
- Changed |= CInfo.combine(WrapperObserver, *CurrInst, B);
- Observer.reportFullyCreatedInstrs();
+ Changed |= tryCombineAll(*CurrInst);
+ WLObserver->reportFullyCreatedInstrs();
}
MFChanged |= Changed;
} while (Changed);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index cc7fb3ee1109..91a64d59e154 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
@@ -395,6 +397,39 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
replaceRegWith(MRI, DstReg, NewDstReg);
}
+bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Invalid instruction kind");
+
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ return Mask.size() == 1;
+}
+
+void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInsertPt(*MI.getParent(), MI);
+
+ int I = MI.getOperand(3).getShuffleMask()[0];
+ Register Src1 = MI.getOperand(1).getReg();
+ LLT Src1Ty = MRI.getType(Src1);
+ int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+ Register SrcReg;
+ if (I >= Src1NumElts) {
+ SrcReg = MI.getOperand(2).getReg();
+ I -= Src1NumElts;
+ } else if (I >= 0)
+ SrcReg = Src1;
+
+ if (I < 0)
+ Builder.buildUndef(DstReg);
+ else if (!MRI.getType(SrcReg).isVector())
+ Builder.buildCopy(DstReg, SrcReg);
+ else
+ Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I);
+
+ MI.eraseFromParent();
+}
+
namespace {
/// Select a preference between two uses. CurrentUse is the current preference
@@ -910,160 +945,332 @@ void CombinerHelper::applySextInRegOfLoad(
MI.eraseFromParent();
}
-bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
- Register &Base, Register &Offset) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
-#ifndef NDEBUG
- unsigned Opcode = MI.getOpcode();
- assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
- Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
-#endif
+/// Return true if 'MI' is a load or a store that may be fold it's address
+/// operand into the load / store addressing mode.
+static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI,
+ MachineRegisterInfo &MRI) {
+ TargetLowering::AddrMode AM;
+ auto *MF = MI->getMF();
+ auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
+ if (!Addr)
+ return false;
+
+ AM.HasBaseReg = true;
+ if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
+ AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
+ else
+ AM.Scale = 1; // [reg +/- reg]
- Base = MI.getOperand(1).getReg();
- MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
- if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ return TLI.isLegalAddressingMode(
+ MF->getDataLayout(), AM,
+ getTypeForLLT(MI->getMMO().getMemoryType(),
+ MF->getFunction().getContext()),
+ MI->getMMO().getAddrSpace());
+}
+
+static unsigned getIndexedOpc(unsigned LdStOpc) {
+ switch (LdStOpc) {
+ case TargetOpcode::G_LOAD:
+ return TargetOpcode::G_INDEXED_LOAD;
+ case TargetOpcode::G_STORE:
+ return TargetOpcode::G_INDEXED_STORE;
+ case TargetOpcode::G_ZEXTLOAD:
+ return TargetOpcode::G_INDEXED_ZEXTLOAD;
+ case TargetOpcode::G_SEXTLOAD:
+ return TargetOpcode::G_INDEXED_SEXTLOAD;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
+bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
+ // Check for legality.
+ LLT PtrTy = MRI.getType(LdSt.getPointerReg());
+ LLT Ty = MRI.getType(LdSt.getReg(0));
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+ {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
+ SmallVector<LLT> OpTys;
+ if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
+ OpTys = {PtrTy, Ty, Ty};
+ else
+ OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
+
+ LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
+ return isLegal(Q);
+}
+
+static cl::opt<unsigned> PostIndexUseThreshold(
+ "post-index-use-threshold", cl::Hidden, cl::init(32),
+ cl::desc("Number of uses of a base pointer to check before it is no longer "
+ "considered for post-indexing."));
+
+bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
+ Register &Base, Register &Offset,
+ bool &RematOffset) {
+ // We're looking for the following pattern, for either load or store:
+ // %baseptr:_(p0) = ...
+ // G_STORE %val(s64), %baseptr(p0)
+ // %offset:_(s64) = G_CONSTANT i64 -256
+ // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
+ const auto &TLI = getTargetLowering();
+
+ Register Ptr = LdSt.getPointerReg();
+ // If the store is the only use, don't bother.
+ if (MRI.hasOneNonDBGUse(Ptr))
+ return false;
+
+ if (!isIndexedLoadStoreLegal(LdSt))
return false;
- LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
- // FIXME: The following use traversal needs a bail out for patholigical cases.
- for (auto &Use : MRI.use_nodbg_instructions(Base)) {
- if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
+ if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
+ return false;
+
+ MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
+ auto *PtrDef = MRI.getVRegDef(Ptr);
+
+ unsigned NumUsesChecked = 0;
+ for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
+ if (++NumUsesChecked > PostIndexUseThreshold)
+ return false; // Try to avoid exploding compile time.
+
+ auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
+ // The use itself might be dead. This can happen during combines if DCE
+ // hasn't had a chance to run yet. Don't allow it to form an indexed op.
+ if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
continue;
- Offset = Use.getOperand(2).getReg();
+ // Check the user of this isn't the store, otherwise we'd be generate a
+ // indexed store defining its own use.
+ if (StoredValDef == &Use)
+ continue;
+
+ Offset = PtrAdd->getOffsetReg();
if (!ForceLegalIndexing &&
- !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
- LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
- << Use);
+ !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
+ /*IsPre*/ false, MRI))
continue;
- }
// Make sure the offset calculation is before the potentially indexed op.
- // FIXME: we really care about dependency here. The offset calculation might
- // be movable.
- MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
- if (!OffsetDef || !dominates(*OffsetDef, MI)) {
- LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
- << Use);
- continue;
+ MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
+ RematOffset = false;
+ if (!dominates(*OffsetDef, LdSt)) {
+ // If the offset however is just a G_CONSTANT, we can always just
+ // rematerialize it where we need it.
+ if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
+ continue;
+ RematOffset = true;
}
- // FIXME: check whether all uses of Base are load/store with foldable
- // addressing modes. If so, using the normal addr-modes is better than
- // forming an indexed one.
+ for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
+ if (&BasePtrUse == PtrDef)
+ continue;
- bool MemOpDominatesAddrUses = true;
- for (auto &PtrAddUse :
- MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
- if (!dominates(MI, PtrAddUse)) {
- MemOpDominatesAddrUses = false;
- break;
- }
- }
+ // If the user is a later load/store that can be post-indexed, then don't
+ // combine this one.
+ auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
+ if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
+ dominates(LdSt, *BasePtrLdSt) &&
+ isIndexedLoadStoreLegal(*BasePtrLdSt))
+ return false;
- if (!MemOpDominatesAddrUses) {
- LLVM_DEBUG(
- dbgs() << " Ignoring candidate as memop does not dominate uses: "
- << Use);
- continue;
+ // Now we're looking for the key G_PTR_ADD instruction, which contains
+ // the offset add that we want to fold.
+ if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
+ Register PtrAddDefReg = BasePtrUseDef->getReg(0);
+ for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
+ // If the use is in a different block, then we may produce worse code
+ // due to the extra register pressure.
+ if (BaseUseUse.getParent() != LdSt.getParent())
+ return false;
+
+ if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
+ if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
+ return false;
+ }
+ if (!dominates(LdSt, BasePtrUse))
+ return false; // All use must be dominated by the load/store.
+ }
}
- LLVM_DEBUG(dbgs() << " Found match: " << Use);
- Addr = Use.getOperand(0).getReg();
+ Addr = PtrAdd->getReg(0);
+ Base = PtrAdd->getBaseReg();
return true;
}
return false;
}
-bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
+bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
Register &Base, Register &Offset) {
- auto &MF = *MI.getParent()->getParent();
+ auto &MF = *LdSt.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
-#ifndef NDEBUG
- unsigned Opcode = MI.getOpcode();
- assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
- Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
-#endif
-
- Addr = MI.getOperand(1).getReg();
- MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
- if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
+ Addr = LdSt.getPointerReg();
+ if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
+ MRI.hasOneNonDBGUse(Addr))
return false;
- Base = AddrDef->getOperand(1).getReg();
- Offset = AddrDef->getOperand(2).getReg();
-
- LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
-
if (!ForceLegalIndexing &&
- !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
- LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
+ !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
+ return false;
+
+ if (!isIndexedLoadStoreLegal(LdSt))
return false;
- }
MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
- if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
- LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
+ if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return false;
- }
- if (MI.getOpcode() == TargetOpcode::G_STORE) {
+ if (auto *St = dyn_cast<GStore>(&LdSt)) {
// Would require a copy.
- if (Base == MI.getOperand(0).getReg()) {
- LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
+ if (Base == St->getValueReg())
return false;
- }
// We're expecting one use of Addr in MI, but it could also be the
// value stored, which isn't actually dominated by the instruction.
- if (MI.getOperand(0).getReg() == Addr) {
- LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
+ if (St->getValueReg() == Addr)
return false;
- }
}
+ // Avoid increasing cross-block register pressure.
+ for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
+ if (AddrUse.getParent() != LdSt.getParent())
+ return false;
+
// FIXME: check whether all uses of the base pointer are constant PtrAdds.
// That might allow us to end base's liveness here by adjusting the constant.
-
- for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
- if (!dominates(MI, UseMI)) {
- LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
- return false;
+ bool RealUse = false;
+ for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
+ if (!dominates(LdSt, AddrUse))
+ return false; // All use must be dominated by the load/store.
+
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
+ if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
+ RealUse = true;
+ } else {
+ RealUse = true;
}
}
-
- return true;
+ return RealUse;
}
-bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
- IndexedLoadStoreMatchInfo MatchInfo;
- if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
- applyCombineIndexedLoadStore(MI, MatchInfo);
- return true;
+bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
+
+ // Check if there is a load that defines the vector being extracted from.
+ auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
+ if (!LoadMI)
+ return false;
+
+ Register Vector = MI.getOperand(1).getReg();
+ LLT VecEltTy = MRI.getType(Vector).getElementType();
+
+ assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
+
+ // Checking whether we should reduce the load width.
+ if (!MRI.hasOneNonDBGUse(Vector))
+ return false;
+
+ // Check if the defining load is simple.
+ if (!LoadMI->isSimple())
+ return false;
+
+ // If the vector element type is not a multiple of a byte then we are unable
+ // to correctly compute an address to load only the extracted element as a
+ // scalar.
+ if (!VecEltTy.isByteSized())
+ return false;
+
+ // Check if the new load that we are going to create is legal
+ // if we are in the post-legalization phase.
+ MachineMemOperand MMO = LoadMI->getMMO();
+ Align Alignment = MMO.getAlign();
+ MachinePointerInfo PtrInfo;
+ uint64_t Offset;
+
+ // Finding the appropriate PtrInfo if offset is a known constant.
+ // This is required to create the memory operand for the narrowed load.
+ // This machine memory operand object helps us infer about legality
+ // before we proceed to combine the instruction.
+ if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
+ int Elt = CVal->getZExtValue();
+ // FIXME: should be (ABI size)*Elt.
+ Offset = VecEltTy.getSizeInBits() * Elt / 8;
+ PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
+ } else {
+ // Discard the pointer info except the address space because the memory
+ // operand can't represent this new access since the offset is variable.
+ Offset = VecEltTy.getSizeInBits() / 8;
+ PtrInfo = MachinePointerInfo(MMO.getPointerInfo().getAddrSpace());
}
- return false;
-}
-bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
- unsigned Opcode = MI.getOpcode();
- if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
- Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
+ Alignment = commonAlignment(Alignment, Offset);
+
+ Register VecPtr = LoadMI->getPointerReg();
+ LLT PtrTy = MRI.getType(VecPtr);
+
+ MachineFunction &MF = *MI.getMF();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
+
+ LegalityQuery::MemDesc MMDesc(*NewMMO);
+
+ LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
+
+ if (!isLegalOrBeforeLegalizer(Q))
+ return false;
+
+ // Load must be allowed and fast on the target.
+ LLVMContext &C = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ unsigned Fast = 0;
+ if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
+ &Fast) ||
+ !Fast)
return false;
- // For now, no targets actually support these opcodes so don't waste time
- // running these unless we're forced to for testing.
- if (!ForceLegalIndexing)
+ Register Result = MI.getOperand(0).getReg();
+ Register Index = MI.getOperand(2).getReg();
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(B.getMF(), DummyObserver, B);
+ //// Get pointer to the vector element.
+ Register finalPtr = Helper.getVectorElementPointer(
+ LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
+ Index);
+ // New G_LOAD instruction.
+ B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
+ // Remove original GLOAD instruction.
+ LoadMI->eraseFromParent();
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchCombineIndexedLoadStore(
+ MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
+ auto &LdSt = cast<GLoadStore>(MI);
+
+ if (LdSt.isAtomic())
return false;
- MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+ MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
- !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
- MatchInfo.Offset))
+ !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
+ MatchInfo.Offset, MatchInfo.RematOffset))
return false;
return true;
@@ -1072,28 +1279,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS
void CombinerHelper::applyCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
- MachineIRBuilder MIRBuilder(MI);
+ Builder.setInstrAndDebugLoc(MI);
unsigned Opcode = MI.getOpcode();
bool IsStore = Opcode == TargetOpcode::G_STORE;
- unsigned NewOpcode;
- switch (Opcode) {
- case TargetOpcode::G_LOAD:
- NewOpcode = TargetOpcode::G_INDEXED_LOAD;
- break;
- case TargetOpcode::G_SEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
- break;
- case TargetOpcode::G_ZEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
- break;
- case TargetOpcode::G_STORE:
- NewOpcode = TargetOpcode::G_INDEXED_STORE;
- break;
- default:
- llvm_unreachable("Unknown load/store opcode");
+ unsigned NewOpcode = getIndexedOpc(Opcode);
+
+ // If the offset constant didn't happen to dominate the load/store, we can
+ // just clone it as needed.
+ if (MatchInfo.RematOffset) {
+ auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
+ auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
+ *OldCst->getOperand(1).getCImm());
+ MatchInfo.Offset = NewCst.getReg(0);
}
- auto MIB = MIRBuilder.buildInstr(NewOpcode);
+ auto MIB = Builder.buildInstr(NewOpcode);
if (IsStore) {
MIB.addDef(MatchInfo.Addr);
MIB.addUse(MI.getOperand(0).getReg());
@@ -1105,6 +1305,7 @@ void CombinerHelper::applyCombineIndexedLoadStore(
MIB.addUse(MatchInfo.Base);
MIB.addUse(MatchInfo.Offset);
MIB.addImm(MatchInfo.IsPre);
+ MIB->cloneMemRefs(*MI.getMF(), MI);
MI.eraseFromParent();
AddrDef.eraseFromParent();
@@ -1271,13 +1472,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
- if (Ty.isVector())
- return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
- return IntegerType::get(C, Ty.getSizeInBits());
-}
-
+
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
@@ -1394,7 +1589,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
if (AccessTy) {
AMNew.HasBaseReg = true;
TargetLoweringBase::AddrMode AMOld;
- AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
+ AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
AMOld.HasBaseReg = true;
unsigned AS = MRI.getType(Add2).getAddressSpace();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
@@ -1456,7 +1651,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
// Pass the combined immediate to the apply function.
MatchInfo.Imm =
- (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
+ (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
MatchInfo.Reg = Base;
// There is no simple replacement for a saturating unsigned left shift that
@@ -1535,7 +1730,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Find a matching one-use shift by constant.
const Register C1 = MI.getOperand(2).getReg();
auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
- if (!MaybeImmVal)
+ if (!MaybeImmVal || MaybeImmVal->Value == 0)
return false;
const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
@@ -1685,6 +1880,8 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
RegisterImmPair &MatchData) {
assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+ if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
+ return false;
Register LHS = MI.getOperand(1).getReg();
@@ -2248,35 +2445,6 @@ void CombinerHelper::applyCombineExtOfExt(
}
}
-void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
-
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
- MI.getFlags());
- MI.eraseFromParent();
-}
-
-bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
- BuildFnTy &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
- Register Src = MI.getOperand(1).getReg();
- Register NegSrc;
-
- if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc))))
- return false;
-
- MatchInfo = [=, &MI](MachineIRBuilder &B) {
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(NegSrc);
- Observer.changedInstr(MI);
- };
- return true;
-}
-
bool CombinerHelper::matchCombineTruncOfExt(
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
@@ -2580,6 +2748,16 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
MaybeCst->getSExtValue() == C;
}
+bool CombinerHelper::matchConstantFPOp(const MachineOperand &MOP, double C) {
+ if (!MOP.isReg())
+ return false;
+ std::optional<FPValueAndVReg> MaybeCst;
+ if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
+ return false;
+
+ return MaybeCst->Value.isExactlyValue(C);
+}
+
void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
unsigned OpIdx) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
@@ -2599,6 +2777,45 @@ void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
replaceRegWith(MRI, OldReg, Replacement);
}
+bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI,
+ unsigned ConstIdx) {
+ Register ConstReg = MI.getOperand(ConstIdx).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // Get the shift amount
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
+ if (!VRegAndVal)
+ return false;
+
+ // Return true of shift amount >= Bitwidth
+ return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
+}
+
+void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) {
+ assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
+ MI.getOpcode() == TargetOpcode::G_FSHR) &&
+ "This is not a funnel shift operation");
+
+ Register ConstReg = MI.getOperand(3).getReg();
+ LLT ConstTy = MRI.getType(ConstReg);
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
+ assert((VRegAndVal) && "Value is not a constant");
+
+ // Calculate the new Shift Amount = Old Shift Amount % BitWidth
+ APInt NewConst = VRegAndVal->Value.urem(
+ APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
+ Builder.buildInstr(
+ MI.getOpcode(), {MI.getOperand(0)},
+ {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
+
+ MI.eraseFromParent();
+}
+
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SELECT);
// Match (cond ? x : x)
@@ -2652,6 +2869,13 @@ void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
MI.eraseFromParent();
}
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
+ MI.eraseFromParent();
+}
+
void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
@@ -3246,7 +3470,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
unsigned BinOpcode = MI.getOpcode();
- // We know know one of the operands is a select of constants. Now verify that
+ // We know that one of the operands is a select of constants. Now verify that
// the other binary operator operand is either a constant, or we can handle a
// variable.
bool CanFoldNonConst =
@@ -4141,8 +4365,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
- if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
- TargetOpcode::G_UBFX, Ty, ExtractTy))
+ if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
return false;
int64_t AndImm, LSBImm;
@@ -4228,8 +4451,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
const Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
- if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
- TargetOpcode::G_UBFX, Ty, ExtractTy))
+ if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
return false;
// Try to match shr (and x, c1), c2
@@ -4279,20 +4501,20 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
}
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
- MachineInstr &PtrAdd) {
- assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
+ MachineInstr &MI) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
- Register Src1Reg = PtrAdd.getOperand(1).getReg();
- MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
+ Register Src1Reg = PtrAdd.getBaseReg();
+ auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
if (!Src1Def)
return false;
- Register Src2Reg = PtrAdd.getOperand(2).getReg();
+ Register Src2Reg = PtrAdd.getOffsetReg();
if (MRI.hasOneNonDBGUse(Src1Reg))
return false;
- auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
if (!C1)
return false;
auto C2 = getIConstantVRegVal(Src2Reg, MRI);
@@ -4303,7 +4525,7 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
const APInt &C2APIntVal = *C2;
const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
- for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
+ for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
// This combine may end up running before ptrtoint/inttoptr combines
// manage to eliminate redundant conversions, so try to look through them.
MachineInstr *ConvUseMI = &UseMI;
@@ -4316,9 +4538,8 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
ConvUseOpc = ConvUseMI->getOpcode();
}
- auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
- ConvUseOpc == TargetOpcode::G_STORE;
- if (!LoadStore)
+ auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
+ if (!LdStMI)
continue;
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
@@ -4326,11 +4547,9 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
- unsigned AS =
- MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
- Type *AccessTy =
- getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
- PtrAdd.getMF()->getFunction().getContext());
+ unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
+ Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
+ PtrAdd.getMF()->getFunction().getContext());
const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
AccessTy, AS))
@@ -4519,7 +4738,19 @@ bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,
return false;
}
-bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
+bool CombinerHelper::matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register SrcOp = MI.getOperand(1).getReg();
+
+ if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
+ MatchInfo = *MaybeCst;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) {
Register Op1 = MI.getOperand(1).getReg();
Register Op2 = MI.getOperand(2).getReg();
auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
@@ -4529,6 +4760,42 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
return true;
}
+bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo) {
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
+ if (!MaybeCst)
+ return false;
+ MatchInfo =
+ ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
+ return true;
+}
+
+bool CombinerHelper::matchConstantFoldFMA(MachineInstr &MI,
+ ConstantFP *&MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FMA ||
+ MI.getOpcode() == TargetOpcode::G_FMAD);
+ auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
+
+ const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
+ if (!Op3Cst)
+ return false;
+
+ const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
+ if (!Op2Cst)
+ return false;
+
+ const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
+ if (!Op1Cst)
+ return false;
+
+ APFloat Op1F = Op1Cst->getValueAPF();
+ Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
+ APFloat::rmNearestTiesToEven);
+ MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
+ return true;
+}
+
bool CombinerHelper::matchNarrowBinopFeedingAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
// Look for a binop feeding into an AND with a mask:
@@ -6018,12 +6285,36 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
}
-bool CombinerHelper::tryCombine(MachineInstr &MI) {
- if (tryCombineCopy(MI))
- return true;
- if (tryCombineExtendingLoads(MI))
- return true;
- if (tryCombineIndexedLoadStore(MI))
+bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ auto *LHSDef = MRI.getVRegDef(LHS);
+ if (getIConstantVRegVal(LHS, MRI).has_value())
return true;
- return false;
+
+ // LHS may be a G_CONSTANT_FOLD_BARRIER. If so we commute
+ // as long as we don't already have a constant on the RHS.
+ if (LHSDef->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER)
+ return false;
+ return MRI.getVRegDef(RHS)->getOpcode() !=
+ TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
+ !getIConstantVRegVal(RHS, MRI);
+}
+
+bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ std::optional<FPValueAndVReg> ValAndVReg;
+ if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
+ return false;
+ return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
+}
+
+void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
+ Observer.changingInstr(MI);
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ MI.getOperand(1).setReg(RHSReg);
+ MI.getOperand(2).setReg(LHSReg);
+ Observer.changedInstr(MI);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
index d747cbf5aadc..26752369a771 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -26,12 +26,19 @@ GIMatchTableExecutor::MatcherState::MatcherState(unsigned MaxRenderers)
GIMatchTableExecutor::GIMatchTableExecutor() = default;
-bool GIMatchTableExecutor::isOperandImmEqual(
- const MachineOperand &MO, int64_t Value,
- const MachineRegisterInfo &MRI) const {
- if (MO.isReg() && MO.getReg())
+bool GIMatchTableExecutor::isOperandImmEqual(const MachineOperand &MO,
+ int64_t Value,
+ const MachineRegisterInfo &MRI,
+ bool Splat) const {
+ if (MO.isReg() && MO.getReg()) {
if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
return VRegVal->Value.getSExtValue() == Value;
+
+ if (Splat) {
+ if (auto VRegVal = getIConstantSplatVal(MO.getReg(), MRI))
+ return VRegVal->getSExtValue() == Value;
+ }
+ }
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 363ffbfa90b5..ea8c20cdcd45 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "gisel-known-bits"
@@ -48,6 +49,8 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
default:
return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
}
@@ -72,7 +75,7 @@ KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts,
assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
KnownBits Known;
- computeKnownBitsImpl(R, Known, DemandedElts);
+ computeKnownBitsImpl(R, Known, DemandedElts, Depth);
ComputeKnownBitsCache.clear();
return Known;
}
@@ -726,6 +729,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
default: {
unsigned NumBits =
TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
@@ -769,3 +774,12 @@ void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
return false;
}
+
+GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) {
+ if (!Info) {
+ unsigned MaxDepth =
+ MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
+ Info = std::make_unique<GISelKnownBits>(MF, MaxDepth);
+ }
+ return *Info.get();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 9a67a8d05a4d..bea29642cd00 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -62,6 +62,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
@@ -80,6 +81,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/MemoryOpRemark.h"
#include <algorithm>
#include <cassert>
@@ -127,7 +129,7 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
+IRTranslator::IRTranslator(CodeGenOptLevel optlevel)
: MachineFunctionPass(ID), OptLevel(optlevel) {}
#ifndef NDEBUG
@@ -173,7 +175,7 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
}
@@ -358,7 +360,7 @@ bool IRTranslator::translateCompare(const User &U,
bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
const ReturnInst &RI = cast<ReturnInst>(U);
const Value *Ret = RI.getReturnValue();
- if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
+ if (Ret && DL->getTypeStoreSize(Ret->getType()).isZero())
Ret = nullptr;
ArrayRef<Register> VRegs;
@@ -578,7 +580,8 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
if (BrInst.isUnconditional()) {
// If the unconditional target is the layout successor, fallthrough.
- if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB))
+ if (OptLevel == CodeGenOptLevel::None ||
+ !CurMBB.isLayoutSuccessor(Succ0MBB))
MIRBuilder.buildBr(*Succ0MBB);
// Link successors.
@@ -720,7 +723,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
return true;
}
- SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
+ SL->findJumpTables(Clusters, &SI, std::nullopt, DefaultMBB, nullptr, nullptr);
SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
@@ -766,7 +769,7 @@ void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
MIB.setMBB(*MBB);
MIB.setDebugLoc(CurBuilder->getDebugLoc());
- Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext());
const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
@@ -789,7 +792,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
// This value may be smaller or larger than the target's pointer type, and
// therefore require extension or truncating.
- Type *PtrIRTy = SValue.getType()->getPointerTo();
+ auto *PtrIRTy = PointerType::getUnqual(SValue.getContext());
const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
@@ -1014,7 +1017,7 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
- Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext());
const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
LLT MaskTy = SwitchOpTy;
@@ -1483,6 +1486,9 @@ bool IRTranslator::translateBitCast(const User &U,
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
+ if (U.getType()->getScalarType()->isBFloatTy() ||
+ U.getOperand(0)->getType()->getScalarType()->isBFloatTy())
+ return false;
Register Op = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
MIRBuilder.buildInstr(Opcode, {Res}, {Op});
@@ -1498,6 +1504,12 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Type *OffsetIRTy = DL->getIndexType(PtrIRTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+ uint32_t Flags = 0;
+ if (isa<Instruction>(U)) {
+ const Instruction &I = cast<Instruction>(U);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
+ }
+
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
unsigned VectorWidth = 0;
@@ -1538,8 +1550,10 @@ bool IRTranslator::translateGetElementPtr(const User &U,
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
- Offset += ElementSize * CI->getSExtValue();
- continue;
+ if (std::optional<int64_t> Val = CI->getValue().trySExtValue()) {
+ Offset += ElementSize * *Val;
+ continue;
+ }
}
if (Offset != 0) {
@@ -1578,7 +1592,12 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
auto OffsetMIB =
MIRBuilder.buildConstant(OffsetTy, Offset);
- MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
+
+ if (int64_t(Offset) >= 0 && cast<GEPOperator>(U).isInBounds())
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+
+ MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0),
+ Flags);
return true;
}
@@ -1742,6 +1761,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FEXP;
case Intrinsic::exp2:
return TargetOpcode::G_FEXP2;
+ case Intrinsic::exp10:
+ return TargetOpcode::G_FEXP10;
case Intrinsic::fabs:
return TargetOpcode::G_FABS;
case Intrinsic::copysign:
@@ -1797,6 +1818,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_VECREDUCE_FMIN;
case Intrinsic::vector_reduce_fmax:
return TargetOpcode::G_VECREDUCE_FMAX;
+ case Intrinsic::vector_reduce_fminimum:
+ return TargetOpcode::G_VECREDUCE_FMINIMUM;
+ case Intrinsic::vector_reduce_fmaximum:
+ return TargetOpcode::G_VECREDUCE_FMAXIMUM;
case Intrinsic::vector_reduce_add:
return TargetOpcode::G_VECREDUCE_ADD;
case Intrinsic::vector_reduce_mul:
@@ -1819,6 +1844,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_LROUND;
case Intrinsic::llround:
return TargetOpcode::G_LLROUND;
+ case Intrinsic::get_fpmode:
+ return TargetOpcode::G_GET_FPMODE;
}
return Intrinsic::not_intrinsic;
}
@@ -1939,6 +1966,8 @@ bool IRTranslator::translateIfEntryValueArgument(
if (!PhysReg)
return false;
+ // Append an op deref to account for the fact that this is a dbg_declare.
+ Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
MF->setVariableDbgInfo(DebugInst.getVariable(), Expr, *PhysReg,
DebugInst.getDebugLoc());
return true;
@@ -1966,7 +1995,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
// No stack colouring in O0, discard region information.
- if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
+ if (MF->getTarget().getOptLevel() == CodeGenOptLevel::None)
return true;
unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
@@ -2041,12 +2070,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
auto &TLI = *MF->getSubtarget().getTargetLowering();
Value *Ptr = CI.getArgOperand(0);
unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
+ Align Alignment = getKnownAlignment(Ptr, *DL);
- // FIXME: Get alignment
MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr),
MachineMemOperand::MOStore,
- ListSize, Align(1)));
+ ListSize, Alignment));
return true;
}
case Intrinsic::dbg_value: {
@@ -2229,31 +2258,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
case Intrinsic::stacksave: {
- // Save the stack pointer to the location provided by the intrinsic.
- Register Reg = getOrCreateVReg(CI);
- Register StackPtr = MF->getSubtarget()
- .getTargetLowering()
- ->getStackPointerRegisterToSaveRestore();
-
- // If the target doesn't specify a stack pointer, then fall back.
- if (!StackPtr)
- return false;
-
- MIRBuilder.buildCopy(Reg, StackPtr);
+ MIRBuilder.buildInstr(TargetOpcode::G_STACKSAVE, {getOrCreateVReg(CI)}, {});
return true;
}
case Intrinsic::stackrestore: {
- // Restore the stack pointer from the location provided by the intrinsic.
- Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
- Register StackPtr = MF->getSubtarget()
- .getTargetLowering()
- ->getStackPointerRegisterToSaveRestore();
-
- // If the target doesn't specify a stack pointer, then fall back.
- if (!StackPtr)
- return false;
-
- MIRBuilder.buildCopy(StackPtr, Reg);
+ MIRBuilder.buildInstr(TargetOpcode::G_STACKRESTORE, {},
+ {getOrCreateVReg(*CI.getArgOperand(0))});
return true;
}
case Intrinsic::cttz:
@@ -2387,6 +2397,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
return CLI->lowerCall(MIRBuilder, Info);
}
+ case Intrinsic::amdgcn_cs_chain:
+ return translateCallBase(CI, MIRBuilder);
case Intrinsic::fptrunc_round: {
uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
@@ -2415,6 +2427,31 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+ case Intrinsic::set_fpmode: {
+ Value *FPState = CI.getOperand(0);
+ MIRBuilder.buildInstr(TargetOpcode::G_SET_FPMODE, {},
+ { getOrCreateVReg(*FPState) });
+ return true;
+ }
+ case Intrinsic::reset_fpmode: {
+ MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {});
+ return true;
+ }
+ case Intrinsic::prefetch: {
+ Value *Addr = CI.getOperand(0);
+ unsigned RW = cast<ConstantInt>(CI.getOperand(1))->getZExtValue();
+ unsigned Locality = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
+ unsigned CacheType = cast<ConstantInt>(CI.getOperand(3))->getZExtValue();
+
+ auto Flags = RW ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+ auto &MMO = *MF->getMachineMemOperand(MachinePointerInfo(Addr), Flags,
+ LLT(), Align());
+
+ MIRBuilder.buildPrefetch(getOrCreateVReg(*Addr), RW, Locality, CacheType,
+ MMO);
+
+ return true;
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -2493,7 +2530,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
auto TII = MF->getTarget().getIntrinsicInfo();
const Function *F = CI.getCalledFunction();
- // FIXME: support Windows dllimport function calls.
+ // FIXME: support Windows dllimport function calls and calls through
+ // weak symbols.
if (F && (F->hasDLLImportStorageClass() ||
(MF->getTarget().getTargetTriple().isOSWindows() &&
F->hasExternalWeakLinkage())))
@@ -2533,8 +2571,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
// Ignore the callsite attributes. Backend code is most likely not expecting
// an intrinsic to sometimes have side effects and sometimes not.
- MachineInstrBuilder MIB =
- MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
+ MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
@@ -2676,6 +2713,13 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
return false;
+ // FIXME: support Windows dllimport function calls and calls through
+ // weak symbols.
+ if (Fn && (Fn->hasDLLImportStorageClass() ||
+ (MF->getTarget().getTargetTriple().isOSWindows() &&
+ Fn->hasExternalWeakLinkage())))
+ return false;
+
bool LowerInlineAsm = I.isInlineAsm();
bool NeedEHLabel = true;
@@ -2868,7 +2912,7 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
}
bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
- if (!MF->getTarget().Options.TrapUnreachable)
+ if (!MF->getTarget().Options.TrapUnreachable)
return true;
auto &UI = cast<UnreachableInst>(U);
@@ -2885,7 +2929,7 @@ bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuil
}
}
- MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
return true;
}
@@ -3321,7 +3365,7 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
CurBuilder->setInsertPt(*ParentBB, ParentBB->end());
// First create the loads to the guard/stack slot for the comparison.
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
- Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext());
const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL));
@@ -3331,7 +3375,7 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
Register Guard;
Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
- Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+ Align Align = DL->getPrefTypeAlign(PointerType::getUnqual(M.getContext()));
// Generate code to load the content of the guard slot.
Register GuardVal =
@@ -3500,7 +3544,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
const TargetMachine &TM = MF->getTarget();
TM.resetTargetOptions(F);
- EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
+ EnableOpts = OptLevel != CodeGenOptLevel::None && !skipFunction(F);
FuncInfo.MF = MF;
if (EnableOpts) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 3925611f1485..4089a5e941b0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -133,71 +133,6 @@ static void getRegistersForValue(MachineFunction &MF,
}
}
-/// Return an integer indicating how general CT is.
-static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
- switch (CT) {
- case TargetLowering::C_Immediate:
- case TargetLowering::C_Other:
- case TargetLowering::C_Unknown:
- return 0;
- case TargetLowering::C_Register:
- return 1;
- case TargetLowering::C_RegisterClass:
- return 2;
- case TargetLowering::C_Memory:
- case TargetLowering::C_Address:
- return 3;
- }
- llvm_unreachable("Invalid constraint type");
-}
-
-static void chooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
- const TargetLowering *TLI) {
- assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
- unsigned BestIdx = 0;
- TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
- int BestGenerality = -1;
-
- // Loop over the options, keeping track of the most general one.
- for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
- TargetLowering::ConstraintType CType =
- TLI->getConstraintType(OpInfo.Codes[i]);
-
- // Indirect 'other' or 'immediate' constraints are not allowed.
- if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
- CType == TargetLowering::C_Register ||
- CType == TargetLowering::C_RegisterClass))
- continue;
-
- // If this is an 'other' or 'immediate' constraint, see if the operand is
- // valid for it. For example, on X86 we might have an 'rI' constraint. If
- // the operand is an integer in the range [0..31] we want to use I (saving a
- // load of a register), otherwise we must use 'r'.
- if (CType == TargetLowering::C_Other ||
- CType == TargetLowering::C_Immediate) {
- assert(OpInfo.Codes[i].size() == 1 &&
- "Unhandled multi-letter 'other' constraint");
- // FIXME: prefer immediate constraints if the target allows it
- }
-
- // Things with matching constraints can only be registers, per gcc
- // documentation. This mainly affects "g" constraints.
- if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
- continue;
-
- // This constraint letter is more general than the previous one, use it.
- int Generality = getConstraintGenerality(CType);
- if (Generality > BestGenerality) {
- BestType = CType;
- BestIdx = i;
- BestGenerality = Generality;
- }
- }
-
- OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
- OpInfo.ConstraintType = BestType;
-}
-
static void computeConstraintToUse(const TargetLowering *TLI,
TargetLowering::AsmOperandInfo &OpInfo) {
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
@@ -207,7 +142,18 @@ static void computeConstraintToUse(const TargetLowering *TLI,
OpInfo.ConstraintCode = OpInfo.Codes[0];
OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode);
} else {
- chooseConstraint(OpInfo, TLI);
+ TargetLowering::ConstraintGroup G = TLI->getConstraintPreferences(OpInfo);
+ if (G.empty())
+ return;
+ // FIXME: prefer immediate constraints if the target allows it
+ unsigned BestIdx = 0;
+ for (const unsigned E = G.size();
+ BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
+ G[BestIdx].second == TargetLowering::C_Immediate);
+ ++BestIdx)
+ ;
+ OpInfo.ConstraintCode = G[BestIdx].first;
+ OpInfo.ConstraintType = G[BestIdx].second;
}
// 'X' matches anything.
@@ -229,8 +175,8 @@ static void computeConstraintToUse(const TargetLowering *TLI,
}
static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) {
- unsigned Flag = I.getOperand(OpIdx).getImm();
- return InlineAsm::getNumOperandRegisters(Flag);
+ const InlineAsm::Flag F(I.getOperand(OpIdx).getImm());
+ return F.getNumOperandRegisters();
}
static bool buildAnyextOrCopy(Register Dst, Register Src,
@@ -373,16 +319,16 @@ bool InlineAsmLowering::lowerInlineAsm(
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM instruction to know about this
// output.
- unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
- Inst.addImm(OpFlags);
+ InlineAsm::Flag Flag(InlineAsm::Kind::Mem, 1);
+ Flag.setMemConstraint(ConstraintID);
+ Inst.addImm(Flag);
ArrayRef<Register> SourceRegs =
GetOrCreateVRegs(*OpInfo.CallOperandVal);
assert(
@@ -405,17 +351,17 @@ bool InlineAsmLowering::lowerInlineAsm(
// Add information to the INLINEASM instruction to know that this
// register is set.
- unsigned Flag = InlineAsm::getFlagWord(
- OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
- : InlineAsm::Kind_RegDef,
- OpInfo.Regs.size());
+ InlineAsm::Flag Flag(OpInfo.isEarlyClobber
+ ? InlineAsm::Kind::RegDefEarlyClobber
+ : InlineAsm::Kind::RegDef,
+ OpInfo.Regs.size());
if (OpInfo.Regs.front().isVirtual()) {
// Put the register class of the virtual registers in the flag word.
// That way, later passes can recompute register class constraints for
// inline assembly as well as normal instructions. Don't do this for
// tied operands that can use the regclass information from the def.
const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
- Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ Flag.setRegClass(RC->getID());
}
Inst.addImm(Flag);
@@ -441,14 +387,13 @@ bool InlineAsmLowering::lowerInlineAsm(
InstFlagIdx += getNumOpRegs(*Inst, InstFlagIdx) + 1;
assert(getNumOpRegs(*Inst, InstFlagIdx) == 1 && "Wrong flag");
- unsigned MatchedOperandFlag = Inst->getOperand(InstFlagIdx).getImm();
- if (InlineAsm::isMemKind(MatchedOperandFlag)) {
+ const InlineAsm::Flag MatchedOperandFlag(Inst->getOperand(InstFlagIdx).getImm());
+ if (MatchedOperandFlag.isMemKind()) {
LLVM_DEBUG(dbgs() << "Matching input constraint to mem operand not "
"supported. This should be target specific.\n");
return false;
}
- if (!InlineAsm::isRegDefKind(MatchedOperandFlag) &&
- !InlineAsm::isRegDefEarlyClobberKind(MatchedOperandFlag)) {
+ if (!MatchedOperandFlag.isRegDefKind() && !MatchedOperandFlag.isRegDefEarlyClobberKind()) {
LLVM_DEBUG(dbgs() << "Unknown matching constraint\n");
return false;
}
@@ -470,9 +415,9 @@ bool InlineAsmLowering::lowerInlineAsm(
}
// Add Flag and input register operand (In) to Inst. Tie In to Def.
- unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1);
- unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx);
- Inst.addImm(Flag);
+ InlineAsm::Flag UseFlag(InlineAsm::Kind::RegUse, 1);
+ UseFlag.setMatchingOp(DefIdx);
+ Inst.addImm(UseFlag);
Inst.addReg(In);
Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1);
break;
@@ -501,8 +446,8 @@ bool InlineAsmLowering::lowerInlineAsm(
"Expected constraint to be lowered to at least one operand");
// Add information to the INLINEASM node to know about this input.
- unsigned OpFlags =
- InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ const unsigned OpFlags =
+ InlineAsm::Flag(InlineAsm::Kind::Imm, Ops.size());
Inst.addImm(OpFlags);
Inst.add(Ops);
break;
@@ -518,10 +463,10 @@ bool InlineAsmLowering::lowerInlineAsm(
assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1);
+ OpFlags.setMemConstraint(ConstraintID);
Inst.addImm(OpFlags);
ArrayRef<Register> SourceRegs =
GetOrCreateVRegs(*OpInfo.CallOperandVal);
@@ -563,11 +508,11 @@ bool InlineAsmLowering::lowerInlineAsm(
return false;
}
- unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs);
+ InlineAsm::Flag Flag(InlineAsm::Kind::RegUse, NumRegs);
if (OpInfo.Regs.front().isVirtual()) {
// Put the register class of the virtual registers in the flag word.
const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
- Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ Flag.setRegClass(RC->getID());
}
Inst.addImm(Flag);
if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder))
@@ -578,10 +523,9 @@ bool InlineAsmLowering::lowerInlineAsm(
case InlineAsm::isClobber: {
- unsigned NumRegs = OpInfo.Regs.size();
+ const unsigned NumRegs = OpInfo.Regs.size();
if (NumRegs > 0) {
- unsigned Flag =
- InlineAsm::getFlagWord(InlineAsm::Kind_Clobber, NumRegs);
+ unsigned Flag = InlineAsm::Flag(InlineAsm::Kind::Clobber, NumRegs);
Inst.addImm(Flag);
for (Register Reg : OpInfo.Regs) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 9bbef11067ae..baea773cf528 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -58,21 +58,21 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
-InstructionSelect::InstructionSelect(CodeGenOpt::Level OL)
+InstructionSelect::InstructionSelect(CodeGenOptLevel OL)
: MachineFunctionPass(ID), OptLevel(OL) {}
// In order not to crash when calling getAnalysis during testing with -run-pass
// we use the default opt level here instead of None, so that the addRequired()
// calls are made in getAnalysisUsage().
InstructionSelect::InstructionSelect()
- : MachineFunctionPass(ID), OptLevel(CodeGenOpt::Default) {}
+ : MachineFunctionPass(ID), OptLevel(CodeGenOptLevel::Default) {}
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelKnownBitsAnalysis>();
AU.addPreserved<GISelKnownBitsAnalysis>();
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
AU.addRequired<ProfileSummaryInfoWrapperPass>();
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
@@ -90,14 +90,15 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+ ISel->setTargetPassConfig(&TPC);
- CodeGenOpt::Level OldOptLevel = OptLevel;
+ CodeGenOptLevel OldOptLevel = OptLevel;
auto RestoreOptLevel = make_scope_exit([=]() { OptLevel = OldOptLevel; });
- OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None
+ OptLevel = MF.getFunction().hasOptNone() ? CodeGenOptLevel::None
: MF.getTarget().getOptLevel();
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (PSI && PSI->hasProfileSummary())
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
@@ -109,6 +110,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// An optimization remark emitter. Used to report failures.
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+ ISel->setRemarkEmitter(&MORE);
// FIXME: There are many other MF/MFI fields we need to initialize.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
index 8cfb1b786c24..45b403bdd076 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
@@ -76,6 +76,9 @@ LegacyLegalizerInfo::LegacyLegalizerInfo() {
setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS, 0,
+ {{1, Legal}});
setLegalizeScalarToDifferentSizeStrategy(
TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index aecbe0b7604c..6d75258c1041 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -218,7 +218,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
// This will keep all the observers notified about new insertions/deletions.
RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB);
- LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
+ LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI, KB);
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
do {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index f0da0d88140f..37e7153be572 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -119,8 +119,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
MIRBuilder.setInstrAndDebugLoc(MI);
- if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
- MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
+ if (isa<GIntrinsic>(MI))
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
@@ -526,6 +525,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(EXP_F);
case TargetOpcode::G_FEXP2:
RTLIBCASE(EXP2_F);
+ case TargetOpcode::G_FEXP10:
+ RTLIBCASE(EXP10_F);
case TargetOpcode::G_FREM:
RTLIBCASE(REM_F);
case TargetOpcode::G_FPOW:
@@ -690,7 +691,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
LLT OpLLT = MRI.getType(Reg);
Type *OpTy = nullptr;
if (OpLLT.isPointer())
- OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
+ OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
else
OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
Args.push_back({Reg, OpTy, 0});
@@ -795,10 +796,134 @@ conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
{{MI.getOperand(1).getReg(), FromType, 0}});
}
+static RTLIB::Libcall
+getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
+ RTLIB::Libcall RTLibcall;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_GET_FPMODE:
+ RTLibcall = RTLIB::FEGETMODE;
+ break;
+ case TargetOpcode::G_SET_FPMODE:
+ case TargetOpcode::G_RESET_FPMODE:
+ RTLibcall = RTLIB::FESETMODE;
+ break;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+ return RTLibcall;
+}
+
+// Some library functions that read FP state (fegetmode, fegetenv) write the
+// state into a region in memory. IR intrinsics that do the same operations
+// (get_fpmode, get_fpenv) return the state as integer value. To implement these
+// intrinsics via the library functions, we need to use temporary variable,
+// for example:
+//
+// %0:_(s32) = G_GET_FPMODE
+//
+// is transformed to:
+//
+// %1:_(p0) = G_FRAME_INDEX %stack.0
+// BL &fegetmode
+// %0:_(s32) = G_LOAD % 1
+//
+LegalizerHelper::LegalizeResult
+LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
+ MachineInstr &MI) {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ auto &MF = MIRBuilder.getMF();
+ auto &MRI = *MIRBuilder.getMRI();
+ auto &Ctx = MF.getFunction().getContext();
+
+ // Create temporary, where library function will put the read state.
+ Register Dst = MI.getOperand(0).getReg();
+ LLT StateTy = MRI.getType(Dst);
+ TypeSize StateSize = StateTy.getSizeInBytes();
+ Align TempAlign = getStackTemporaryAlignment(StateTy);
+ MachinePointerInfo TempPtrInfo;
+ auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
+
+ // Create a call to library function, with the temporary as an argument.
+ unsigned TempAddrSpace = DL.getAllocaAddrSpace();
+ Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
+ RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
+ auto Res =
+ createLibcall(MIRBuilder, RTLibcall,
+ CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
+ CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}));
+ if (Res != LegalizerHelper::Legalized)
+ return Res;
+
+ // Create a load from the temporary.
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
+
+ return LegalizerHelper::Legalized;
+}
+
+// Similar to `createGetStateLibcall` the function calls a library function
+// using transient space in stack. In this case the library function reads
+// content of memory region.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
+ MachineInstr &MI) {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ auto &MF = MIRBuilder.getMF();
+ auto &MRI = *MIRBuilder.getMRI();
+ auto &Ctx = MF.getFunction().getContext();
+
+ // Create temporary, where library function will get the new state.
+ Register Src = MI.getOperand(0).getReg();
+ LLT StateTy = MRI.getType(Src);
+ TypeSize StateSize = StateTy.getSizeInBytes();
+ Align TempAlign = getStackTemporaryAlignment(StateTy);
+ MachinePointerInfo TempPtrInfo;
+ auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
+
+ // Put the new state into the temporary.
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
+ MIRBuilder.buildStore(Src, Temp, *MMO);
+
+ // Create a call to library function, with the temporary as an argument.
+ unsigned TempAddrSpace = DL.getAllocaAddrSpace();
+ Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
+ RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
+ return createLibcall(MIRBuilder, RTLibcall,
+ CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
+ CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}));
+}
+
+// The function is used to legalize operations that set default environment
+// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
+// On most targets supported in glibc FE_DFL_MODE is defined as
+// `((const femode_t *) -1)`. Such assumption is used here. If for some target
+// it is not true, the target must provide custom lowering.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
+ MachineInstr &MI) {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ auto &MF = MIRBuilder.getMF();
+ auto &Ctx = MF.getFunction().getContext();
+
+ // Create an argument for the library function.
+ unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
+ Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
+ unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
+ LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
+ auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
+ DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
+ MIRBuilder.buildIntToPtr(Dest, DefValue);
+
+ RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
+ return createLibcall(MIRBuilder, RTLibcall,
+ CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
+ CallLowering::ArgInfo({ Dest.getReg(), StatePtrTy, 0}));
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
- LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
- unsigned Size = LLTy.getSizeInBits();
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
switch (MI.getOpcode()) {
@@ -810,6 +935,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM:
case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+ LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = LLTy.getSizeInBits();
Type *HLTy = IntegerType::get(Ctx, Size);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
@@ -831,6 +958,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FEXP10:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FMINNUM:
@@ -839,6 +967,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = LLTy.getSizeInBits();
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
@@ -901,6 +1031,24 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
MI.eraseFromParent();
return Result;
}
+ case TargetOpcode::G_GET_FPMODE: {
+ LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI);
+ if (Result != Legalized)
+ return Result;
+ break;
+ }
+ case TargetOpcode::G_SET_FPMODE: {
+ LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI);
+ if (Result != Legalized)
+ return Result;
+ break;
+ }
+ case TargetOpcode::G_RESET_FPMODE: {
+ LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI);
+ if (Result != Legalized)
+ return Result;
+ break;
+ }
}
MI.eraseFromParent();
@@ -1297,7 +1445,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// So long as the new type has more bits than the bits we're extending we
// don't need to break it apart.
- if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
+ if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
Observer.changingInstr(MI);
// We don't lose any non-extension bits by truncating the src and
// sign-extending the dst.
@@ -1340,14 +1488,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Register AshrCstReg =
MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
.getReg(0);
- Register FullExtensionReg = 0;
- Register PartialExtensionReg = 0;
+ Register FullExtensionReg;
+ Register PartialExtensionReg;
// Do the operation on each small part.
for (int i = 0; i < NumParts; ++i) {
- if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
+ if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
DstRegs.push_back(SrcRegs[i]);
- else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
+ PartialExtensionReg = DstRegs.back();
+ } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
assert(PartialExtensionReg &&
"Expected to visit partial extension before full");
if (FullExtensionReg) {
@@ -1993,8 +2142,20 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
- auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
- {LeftOperand, RightOperand});
+ // Multiplication cannot overflow if the WideTy is >= 2 * original width,
+ // so we don't need to check the overflow result of larger type Mulo.
+ bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
+
+ unsigned MulOpc =
+ WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
+
+ MachineInstrBuilder Mulo;
+ if (WideMulCanOverflow)
+ Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
+ {LeftOperand, RightOperand});
+ else
+ Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
+
auto Mul = Mulo->getOperand(0);
MIRBuilder.buildTrunc(Result, Mul);
@@ -2012,9 +2173,7 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
}
- // Multiplication cannot overflow if the WideTy is >= 2 * original width,
- // so we don't need to check the overflow result of larger type Mulo.
- if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
+ if (WideMulCanOverflow) {
auto Overflow =
MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
// Finally check if the multiplication in the larger type itself overflowed.
@@ -2247,6 +2406,16 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_ROTR:
+ case TargetOpcode::G_ROTL:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+
case TargetOpcode::G_SDIV:
case TargetOpcode::G_SREM:
case TargetOpcode::G_SMIN:
@@ -2325,6 +2494,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_IS_FPCLASS:
Observer.changingInstr(MI);
if (TypeIdx == 0)
@@ -2494,6 +2664,17 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx == 0) {
+ Observer.changingInstr(MI);
+ const LLT WideEltTy = WideTy.getElementType();
+
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
if (TypeIdx == 1) {
Observer.changingInstr(MI);
@@ -2546,6 +2727,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FEXP10:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
@@ -2648,6 +2830,23 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMIN:
+ case TargetOpcode::G_VECREDUCE_FMAX:
+ case TargetOpcode::G_VECREDUCE_FMINIMUM:
+ case TargetOpcode::G_VECREDUCE_FMAXIMUM:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ LLT WideVecTy = VecTy.isVector()
+ ? LLT::vector(VecTy.getElementCount(), WideTy)
+ : WideTy;
+ widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
}
}
@@ -3384,10 +3583,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerFFloor(MI);
case TargetOpcode::G_INTRINSIC_ROUND:
return lowerIntrinsicRound(MI);
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ case TargetOpcode::G_FRINT: {
// Since round even is the assumed rounding mode for unconstrained FP
// operations, rint and roundeven are the same operation.
- changeOpcode(MI, TargetOpcode::G_FRINT);
+ changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
@@ -3421,12 +3620,25 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
}
case G_UADDE: {
auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
- LLT Ty = MRI.getType(Res);
+ const LLT CondTy = MRI.getType(CarryOut);
+ const LLT Ty = MRI.getType(Res);
+ // Initial add of the two operands.
auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
+
+ // Initial check for carry.
+ auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
+
+ // Add the sum and the carry.
auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
- MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
+
+ // Second check for carry. We can only carry if the initial sum is all 1s
+ // and the carry is set, resulting in a new sum of 0.
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero);
+ auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
+ MIRBuilder.buildOr(CarryOut, Carry, Carry2);
MI.eraseFromParent();
return Legalized;
@@ -3445,13 +3657,23 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
const LLT CondTy = MRI.getType(BorrowOut);
const LLT Ty = MRI.getType(Res);
+ // Initial subtract of the two operands.
auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
+
+ // Initial check for borrow.
+ auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
+
+ // Subtract the borrow from the first subtract.
auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
- auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
- auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
- MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
+ // Second check for borrow. We can only borrow if the initial difference is
+ // 0 and the borrow is set, resulting in a new difference of all 1s.
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ auto TmpResEqZero =
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
+ auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
+ MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
MI.eraseFromParent();
return Legalized;
@@ -3503,6 +3725,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerShuffleVector(MI);
case G_DYN_STACKALLOC:
return lowerDynStackAlloc(MI);
+ case G_STACKSAVE:
+ return lowerStackSave(MI);
+ case G_STACKRESTORE:
+ return lowerStackRestore(MI);
case G_EXTRACT:
return lowerExtract(MI);
case G_INSERT:
@@ -3559,8 +3785,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerMemCpyFamily(MI);
case G_MEMCPY_INLINE:
return lowerMemcpyInline(MI);
+ case G_ZEXT:
+ case G_SEXT:
+ case G_ANYEXT:
+ return lowerEXT(MI);
+ case G_TRUNC:
+ return lowerTRUNC(MI);
GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
+ case G_VAARG:
+ return lowerVAArg(MI);
}
}
@@ -4168,6 +4402,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FPOW:
case G_FEXP:
case G_FEXP2:
+ case G_FEXP10:
case G_FLOG:
case G_FLOG2:
case G_FLOG10:
@@ -4425,73 +4660,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
return Legalized;
}
-static unsigned getScalarOpcForReduction(unsigned Opc) {
- unsigned ScalarOpc;
- switch (Opc) {
- case TargetOpcode::G_VECREDUCE_FADD:
- ScalarOpc = TargetOpcode::G_FADD;
- break;
- case TargetOpcode::G_VECREDUCE_FMUL:
- ScalarOpc = TargetOpcode::G_FMUL;
- break;
- case TargetOpcode::G_VECREDUCE_FMAX:
- ScalarOpc = TargetOpcode::G_FMAXNUM;
- break;
- case TargetOpcode::G_VECREDUCE_FMIN:
- ScalarOpc = TargetOpcode::G_FMINNUM;
- break;
- case TargetOpcode::G_VECREDUCE_ADD:
- ScalarOpc = TargetOpcode::G_ADD;
- break;
- case TargetOpcode::G_VECREDUCE_MUL:
- ScalarOpc = TargetOpcode::G_MUL;
- break;
- case TargetOpcode::G_VECREDUCE_AND:
- ScalarOpc = TargetOpcode::G_AND;
- break;
- case TargetOpcode::G_VECREDUCE_OR:
- ScalarOpc = TargetOpcode::G_OR;
- break;
- case TargetOpcode::G_VECREDUCE_XOR:
- ScalarOpc = TargetOpcode::G_XOR;
- break;
- case TargetOpcode::G_VECREDUCE_SMAX:
- ScalarOpc = TargetOpcode::G_SMAX;
- break;
- case TargetOpcode::G_VECREDUCE_SMIN:
- ScalarOpc = TargetOpcode::G_SMIN;
- break;
- case TargetOpcode::G_VECREDUCE_UMAX:
- ScalarOpc = TargetOpcode::G_UMAX;
- break;
- case TargetOpcode::G_VECREDUCE_UMIN:
- ScalarOpc = TargetOpcode::G_UMIN;
- break;
- default:
- llvm_unreachable("Unhandled reduction");
- }
- return ScalarOpc;
-}
-
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
- unsigned Opc = MI.getOpcode();
- assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
- Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
- "Sequential reductions not expected");
+ auto &RdxMI = cast<GVecReduce>(MI);
if (TypeIdx != 1)
return UnableToLegalize;
// The semantics of the normal non-sequential reductions allow us to freely
// re-associate the operation.
- auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
if (NarrowTy.isVector() &&
(SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
return UnableToLegalize;
- unsigned ScalarOpc = getScalarOpcForReduction(Opc);
+ unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
SmallVector<Register> SplitSrcs;
// If NarrowTy is a scalar then we're being asked to scalarize.
const unsigned NumParts =
@@ -4536,10 +4720,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
SmallVector<Register> PartialReductions;
for (unsigned Part = 0; Part < NumParts; ++Part) {
PartialReductions.push_back(
- MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
+ MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
+ .getReg(0));
}
-
// If the types involved are powers of 2, we can generate intermediate vector
// ops, before generating a final reduction operation.
if (isPowerOf2_32(SrcTy.getNumElements()) &&
@@ -4836,7 +5020,9 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_SUB:
case TargetOpcode::G_MUL:
case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
case TargetOpcode::G_SADDSAT:
@@ -4886,6 +5072,14 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FREEZE:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_BSWAP:
case TargetOpcode::G_FCANONICALIZE:
case TargetOpcode::G_SEXT_INREG:
@@ -4943,15 +5137,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_TRUNC: {
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 1);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
+ case TargetOpcode::G_TRUNC:
case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FPEXT: {
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -5765,8 +5957,10 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+ Observer.changingInstr(MI);
MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
MI.getOperand(1).setReg(MIBTmp.getReg(0));
+ Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_CTPOP: {
@@ -5956,6 +6150,105 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
return Result;
}
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ uint32_t DstTySize = DstTy.getSizeInBits();
+ uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
+ uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
+
+ if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
+ !isPowerOf2_32(SrcTyScalarSize))
+ return UnableToLegalize;
+
+ // The step between extend is too large, split it by creating an intermediate
+ // extend instruction
+ if (SrcTyScalarSize * 2 < DstTyScalarSize) {
+ LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
+ // If the destination type is illegal, split it into multiple statements
+ // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
+ auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
+ // Unmerge the vector
+ LLT EltTy = MidTy.changeElementCount(
+ MidTy.getElementCount().divideCoefficientBy(2));
+ auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
+
+ // ZExt the vectors
+ LLT ZExtResTy = DstTy.changeElementCount(
+ DstTy.getElementCount().divideCoefficientBy(2));
+ auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
+ {UnmergeSrc.getReg(0)});
+ auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
+ {UnmergeSrc.getReg(1)});
+
+ // Merge the ending vectors
+ MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
+ // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ // Similar to how operand splitting is done in SelectiondDAG, we can handle
+ // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
+ // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
+ // %lo16(<4 x s16>) = G_TRUNC %inlo
+ // %hi16(<4 x s16>) = G_TRUNC %inhi
+ // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
+ // %res(<8 x s8>) = G_TRUNC %in16
+
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
+ isPowerOf2_32(DstTy.getScalarSizeInBits()) &&
+ isPowerOf2_32(SrcTy.getNumElements()) &&
+ isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
+ // Split input type.
+ LLT SplitSrcTy = SrcTy.changeElementCount(
+ SrcTy.getElementCount().divideCoefficientBy(2));
+
+ // First, split the source into two smaller vectors.
+ SmallVector<Register, 2> SplitSrcs;
+ extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
+
+ // Truncate the splits into intermediate narrower elements.
+ LLT InterTy;
+ if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+ InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
+ else
+ InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
+ for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
+ SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
+ }
+
+ // Combine the new truncates into one vector
+ auto Merge = MIRBuilder.buildMergeLikeInstr(
+ DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
+
+ // Truncate the new vector to the final result type
+ if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
+ else
+ MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
+
+ MI.eraseFromParent();
+
+ return Legalized;
+ }
+ return UnableToLegalize;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
@@ -6523,23 +6816,25 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
// round(x) =>
// t = trunc(x);
// d = fabs(x - t);
- // o = copysign(1.0f, x);
- // return t + (d >= 0.5 ? o : 0.0);
+ // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
+ // return t + o;
auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
- auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
- auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+
auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
- auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
+ auto Cmp =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
- auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
- Flags);
- auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
+ // Could emit G_UITOFP instead
+ auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+ auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
+ auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
- MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
+ MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
MI.eraseFromParent();
return Legalized;
@@ -6688,8 +6983,8 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
Align EltAlign;
MachinePointerInfo PtrInfo;
- auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
- VecAlign, PtrInfo);
+ auto StackTemp = createStackTemporary(
+ TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
// Get the pointer to the element, and be sure not to hit undefined behavior
@@ -6727,26 +7022,9 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
LLT IdxTy = LLT::scalar(32);
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
-
- if (DstTy.isScalar()) {
- if (Src0Ty.isVector())
- return UnableToLegalize;
-
- // This is just a SELECT.
- assert(Mask.size() == 1 && "Expected a single mask element");
- Register Val;
- if (Mask[0] < 0 || Mask[0] > 1)
- Val = MIRBuilder.buildUndef(DstTy).getReg(0);
- else
- Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
- MIRBuilder.buildCopy(DstReg, Val);
- MI.eraseFromParent();
- return Legalized;
- }
-
Register Undef;
SmallVector<Register, 32> BuildVec;
- LLT EltTy = DstTy.getElementType();
+ LLT EltTy = DstTy.getScalarType();
for (int Idx : Mask) {
if (Idx < 0) {
@@ -6768,26 +7046,20 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
}
}
- MIRBuilder.buildBuildVector(DstReg, BuildVec);
+ if (DstTy.isScalar())
+ MIRBuilder.buildCopy(DstReg, BuildVec[0]);
+ else
+ MIRBuilder.buildBuildVector(DstReg, BuildVec);
MI.eraseFromParent();
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
- const auto &MF = *MI.getMF();
- const auto &TFI = *MF.getSubtarget().getFrameLowering();
- if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
- return UnableToLegalize;
-
- Register Dst = MI.getOperand(0).getReg();
- Register AllocSize = MI.getOperand(1).getReg();
- Align Alignment = assumeAligned(MI.getOperand(2).getImm());
-
- LLT PtrTy = MRI.getType(Dst);
+Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
+ Register AllocSize,
+ Align Alignment,
+ LLT PtrTy) {
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
- Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
@@ -6802,7 +7074,25 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
}
- SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
+ return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
+ const auto &MF = *MI.getMF();
+ const auto &TFI = *MF.getSubtarget().getFrameLowering();
+ if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+ return UnableToLegalize;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register AllocSize = MI.getOperand(1).getReg();
+ Align Alignment = assumeAligned(MI.getOperand(2).getImm());
+
+ LLT PtrTy = MRI.getType(Dst);
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ Register SPTmp =
+ getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
+
MIRBuilder.buildCopy(SPReg, SPTmp);
MIRBuilder.buildCopy(Dst, SPTmp);
@@ -6811,6 +7101,28 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerStackSave(MachineInstr &MI) {
+ Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
+ if (!StackPtr)
+ return UnableToLegalize;
+
+ MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
+ Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
+ if (!StackPtr)
+ return UnableToLegalize;
+
+ MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerExtract(MachineInstr &MI) {
auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Offset = MI.getOperand(2).getImm();
@@ -7577,6 +7889,56 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
return UnableToLegalize;
}
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
+ MachineFunction &MF = *MI.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ Register ListPtr = MI.getOperand(1).getReg();
+ LLT PtrTy = MRI.getType(ListPtr);
+
+ // LstPtr is a pointer to the head of the list. Get the address
+ // of the head of the list.
+ Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
+ MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
+ auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
+
+ const Align A(MI.getOperand(2).getImm());
+ LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
+ if (A > TLI.getMinStackArgumentAlignment()) {
+ Register AlignAmt =
+ MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
+ auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
+ auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
+ VAList = AndDst.getReg(0);
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ // The list should be bumped by the size of element in the current head of
+ // list.
+ Register Dst = MI.getOperand(0).getReg();
+ LLT LLTTy = MRI.getType(Dst);
+ Type *Ty = getTypeForLLT(LLTTy, Ctx);
+ auto IncAmt =
+ MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
+ auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
+
+ // Store the increment VAList to the legalized pointer
+ MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
+ MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
+ // Load the actual argument out of the pointer VAList
+ Align EltAlignment = DL.getABITypeAlign(Ty);
+ MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
+ MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 1f2e481c63e0..de9931d1c240 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -77,13 +77,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) {
}
raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
- OS << Opcode << ", Tys={";
+ OS << "Opcode=" << Opcode << ", Tys={";
for (const auto &Type : Types) {
OS << Type << ", ";
}
- OS << "}, Opcode=";
-
- OS << Opcode << ", MMOs={";
+ OS << "}, MMOs={";
for (const auto &MMODescr : MMODescrs) {
OS << MMODescr.MemoryTy << ", ";
}
@@ -102,6 +100,7 @@ static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
case Lower:
case MoreElements:
case FewerElements:
+ case Libcall:
break;
default:
return Q.Types[Mutation.first] != Mutation.second;
@@ -118,6 +117,10 @@ static bool mutationIsSane(const LegalizeRule &Rule,
if (Rule.getAction() == Custom || Rule.getAction() == Legal)
return true;
+ // Skip null mutation.
+ if (!Mutation.second.isValid())
+ return true;
+
const unsigned TypeIdx = Mutation.first;
const LLT OldTy = Q.Types[TypeIdx];
const LLT NewTy = Mutation.second;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 49f40495d6fc..246aa88b09ac 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -934,9 +934,8 @@ void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
BitVector LegalSizes(MaxStoreSizeToForm * 2);
const auto &LI = *MF->getSubtarget().getLegalizerInfo();
const auto &DL = MF->getFunction().getParent()->getDataLayout();
- Type *IntPtrIRTy =
- DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
- LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
+ Type *IRPtrTy = PointerType::get(MF->getFunction().getContext(), AddrSpace);
+ LLT PtrTy = getLLTForType(*IRPtrTy, DL);
// We assume that we're not going to be generating any stores wider than
// MaxStoreSizeToForm bits for now.
for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 962b54ec5d6b..a5827c26c04f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -196,14 +196,14 @@ void MachineIRBuilder::validateShiftOp(const LLT Res, const LLT Op0,
assert((Res == Op0) && "type mismatch");
}
-MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res,
- const SrcOp &Op0,
- const SrcOp &Op1) {
+MachineInstrBuilder
+MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0,
+ const SrcOp &Op1, std::optional<unsigned> Flags) {
assert(Res.getLLTTy(*getMRI()).getScalarType().isPointer() &&
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type");
- return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1});
+ return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}, Flags);
}
std::optional<MachineInstrBuilder>
@@ -314,7 +314,10 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
assert(EltTy.getScalarSizeInBits() == Val.getBitWidth() &&
"creating constant with the wrong size");
- if (Ty.isVector()) {
+ assert(!Ty.isScalableVector() &&
+ "unexpected scalable vector in buildConstant");
+
+ if (Ty.isFixedVector()) {
auto Const = buildInstr(TargetOpcode::G_CONSTANT)
.addDef(getMRI()->createGenericVirtualRegister(EltTy))
.addCImm(&Val);
@@ -347,7 +350,10 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
assert(!Ty.isPointer() && "invalid operand type");
- if (Ty.isVector()) {
+ assert(!Ty.isScalableVector() &&
+ "unexpected scalable vector in buildFConstant");
+
+ if (Ty.isFixedVector()) {
auto Const = buildInstr(TargetOpcode::G_FCONSTANT)
.addDef(getMRI()->createGenericVirtualRegister(EltTy))
.addFPImm(&Val);
@@ -775,30 +781,55 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res,
return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)});
}
-MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
- ArrayRef<Register> ResultRegs,
- bool HasSideEffects) {
- auto MIB =
- buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
- : TargetOpcode::G_INTRINSIC);
+static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) {
+ if (HasSideEffects && IsConvergent)
+ return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS;
+ if (HasSideEffects)
+ return TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
+ if (IsConvergent)
+ return TargetOpcode::G_INTRINSIC_CONVERGENT;
+ return TargetOpcode::G_INTRINSIC;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<Register> ResultRegs,
+ bool HasSideEffects, bool isConvergent) {
+ auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
for (unsigned ResultReg : ResultRegs)
MIB.addDef(ResultReg);
MIB.addIntrinsicID(ID);
return MIB;
}
+MachineInstrBuilder
+MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<Register> ResultRegs) {
+ auto Attrs = Intrinsic::getAttributes(getContext(), ID);
+ bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
+ bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
+ return buildIntrinsic(ID, ResultRegs, HasSideEffects, isConvergent);
+}
+
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
ArrayRef<DstOp> Results,
- bool HasSideEffects) {
- auto MIB =
- buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
- : TargetOpcode::G_INTRINSIC);
+ bool HasSideEffects,
+ bool isConvergent) {
+ auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
for (DstOp Result : Results)
Result.addDefToMIB(*getMRI(), MIB);
MIB.addIntrinsicID(ID);
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<DstOp> Results) {
+ auto Attrs = Intrinsic::getAttributes(getContext(), ID);
+ bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
+ bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
+ return buildIntrinsic(ID, Results, HasSideEffects, isConvergent);
+}
+
MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
@@ -1026,6 +1057,18 @@ MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
.addImm(Scope);
}
+MachineInstrBuilder MachineIRBuilder::buildPrefetch(const SrcOp &Addr,
+ unsigned RW,
+ unsigned Locality,
+ unsigned CacheType,
+ MachineMemOperand &MMO) {
+ auto MIB = buildInstr(TargetOpcode::G_PREFETCH);
+ Addr.addSrcToMIB(MIB);
+ MIB.addImm(RW).addImm(Locality).addImm(CacheType);
+ MIB.addMemOperand(&MMO);
+ return MIB;
+}
+
MachineInstrBuilder
MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) {
#ifndef NDEBUG
@@ -1040,16 +1083,16 @@ void MachineIRBuilder::validateTruncExt(const LLT DstTy, const LLT SrcTy,
#ifndef NDEBUG
if (DstTy.isVector()) {
assert(SrcTy.isVector() && "mismatched cast between vector and non-vector");
- assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
+ assert(SrcTy.getElementCount() == DstTy.getElementCount() &&
"different number of elements in a trunc/ext");
} else
assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc");
if (IsExtend)
- assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
+ assert(TypeSize::isKnownGT(DstTy.getSizeInBits(), SrcTy.getSizeInBits()) &&
"invalid narrowing extend");
else
- assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() &&
+ assert(TypeSize::isKnownLT(DstTy.getSizeInBits(), SrcTy.getSizeInBits()) &&
"invalid widening trunc");
#endif
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 885a1056b2ea..bb5363fb2527 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -449,7 +449,8 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
return MappingCost::ImpossibleCost();
// If mapped with InstrMapping, MI will have the recorded cost.
- MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
+ MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent())
+ : BlockFrequency(1));
bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
assert(!Saturated && "Possible mapping saturated the cost");
LLVM_DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
@@ -623,7 +624,7 @@ bool RegBankSelect::applyMapping(
// Second, rewrite the instruction.
LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
- RBI->applyMapping(OpdMapper);
+ RBI->applyMapping(MIRBuilder, OpdMapper);
return true;
}
@@ -971,7 +972,7 @@ bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
return Src.canSplitCriticalEdge(DstOrSplit);
}
-RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
+RegBankSelect::MappingCost::MappingCost(BlockFrequency LocalFreq)
: LocalFreq(LocalFreq.getFrequency()) {}
bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 080600d3cc98..eaf829f562b2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -205,8 +205,15 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg,
return false;
// Replace if either DstReg has no constraints or the register
// constraints match.
- return !MRI.getRegClassOrRegBank(DstReg) ||
- MRI.getRegClassOrRegBank(DstReg) == MRI.getRegClassOrRegBank(SrcReg);
+ const auto &DstRBC = MRI.getRegClassOrRegBank(DstReg);
+ if (!DstRBC || DstRBC == MRI.getRegClassOrRegBank(SrcReg))
+ return true;
+
+ // Otherwise match if the Src is already a regclass that is covered by the Dst
+ // RegBank.
+ return DstRBC.is<const RegisterBank *>() && MRI.getRegClassOrNull(SrcReg) &&
+ DstRBC.get<const RegisterBank *>()->covers(
+ *MRI.getRegClassOrNull(SrcReg));
}
bool llvm::isTriviallyDead(const MachineInstr &MI,
@@ -773,6 +780,29 @@ std::optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode,
return std::nullopt;
}
+std::optional<APInt> llvm::ConstantFoldCastOp(unsigned Opcode, LLT DstTy,
+ const Register Op0,
+ const MachineRegisterInfo &MRI) {
+ std::optional<APInt> Val = getIConstantVRegVal(Op0, MRI);
+ if (!Val)
+ return Val;
+
+ const unsigned DstSize = DstTy.getScalarSizeInBits();
+
+ switch (Opcode) {
+ case TargetOpcode::G_SEXT:
+ return Val->sext(DstSize);
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ // TODO: DAG considers target preference when constant folding any_extend.
+ return Val->zext(DstSize);
+ default:
+ break;
+ }
+
+ llvm_unreachable("unexpected cast opcode to constant fold");
+}
+
std::optional<APFloat>
llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src,
const MachineRegisterInfo &MRI) {
@@ -1086,9 +1116,9 @@ std::optional<APInt>
llvm::getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI) {
if (auto SplatValAndReg =
getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) {
- std::optional<ValueAndVReg> ValAndVReg =
- getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
- return ValAndVReg->Value;
+ if (std::optional<ValueAndVReg> ValAndVReg =
+ getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI))
+ return ValAndVReg->Value;
}
return std::nullopt;
@@ -1143,7 +1173,7 @@ llvm::getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI) {
if (auto Splat = getIConstantSplatSExtVal(MI, MRI))
return RegOrConstant(*Splat);
auto Reg = MI.getOperand(1).getReg();
- if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
+ if (any_of(drop_begin(MI.operands(), 2),
[&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; }))
return std::nullopt;
return RegOrConstant(Reg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index f259cbc1d788..22b6d31d0634 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -63,7 +63,6 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -644,8 +643,7 @@ bool GlobalMerge::doInitialization(Module &M) {
StringRef Section = GV.getSection();
// Ignore all 'special' globals.
- if (GV.getName().startswith("llvm.") ||
- GV.getName().startswith(".llvm."))
+ if (GV.getName().starts_with("llvm.") || GV.getName().starts_with(".llvm."))
continue;
// Ignore all "required" globals:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
index 2ad5820bd9fb..e8e276a8558d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
@@ -541,13 +541,12 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
RetVal = IfConvertTriangle(BBI, Kind);
LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) {
- if (isFalse) {
- if (isRev) ++NumTriangleFRev;
- else ++NumTriangleFalse;
- } else {
- if (isRev) ++NumTriangleRev;
- else ++NumTriangle;
- }
+ if (isFalse)
+ ++NumTriangleFalse;
+ else if (isRev)
+ ++NumTriangleRev;
+ else
+ ++NumTriangle;
}
break;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index b2a7aad73411..5ad003ed3180 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -372,7 +372,7 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
if (!MI.mayLoadOrStore() || MI.isPredicable())
return SR_Unsuitable;
auto AM = TII->getAddrModeFromMemoryOp(MI, TRI);
- if (!AM)
+ if (!AM || AM->Form != ExtAddrMode::Formula::Basic)
return SR_Unsuitable;
auto AddrMode = *AM;
const Register BaseReg = AddrMode.BaseReg, ScaledReg = AddrMode.ScaledReg;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index 012892166ae7..f7b931a3bdac 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/CodeGen/IndirectBrExpand.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -48,14 +49,12 @@ using namespace llvm;
namespace {
-class IndirectBrExpandPass : public FunctionPass {
- const TargetLowering *TLI = nullptr;
-
+class IndirectBrExpandLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- IndirectBrExpandPass() : FunctionPass(ID) {
- initializeIndirectBrExpandPassPass(*PassRegistry::getPassRegistry());
+ IndirectBrExpandLegacyPass() : FunctionPass(ID) {
+ initializeIndirectBrExpandLegacyPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -67,33 +66,41 @@ public:
} // end anonymous namespace
-char IndirectBrExpandPass::ID = 0;
+static bool runImpl(Function &F, const TargetLowering *TLI,
+ DomTreeUpdater *DTU);
+
+PreservedAnalyses IndirectBrExpandPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto *STI = TM->getSubtargetImpl(F);
+ if (!STI->enableIndirectBrExpand())
+ return PreservedAnalyses::all();
+
+ auto *TLI = STI->getTargetLowering();
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ bool Changed = runImpl(F, TLI, DT ? &DTU : nullptr);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+char IndirectBrExpandLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(IndirectBrExpandPass, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(IndirectBrExpandLegacyPass, DEBUG_TYPE,
"Expand indirectbr instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(IndirectBrExpandPass, DEBUG_TYPE,
+INITIALIZE_PASS_END(IndirectBrExpandLegacyPass, DEBUG_TYPE,
"Expand indirectbr instructions", false, false)
FunctionPass *llvm::createIndirectBrExpandPass() {
- return new IndirectBrExpandPass();
+ return new IndirectBrExpandLegacyPass();
}
-bool IndirectBrExpandPass::runOnFunction(Function &F) {
+bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) {
auto &DL = F.getParent()->getDataLayout();
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- auto &TM = TPC->getTM<TargetMachine>();
- auto &STI = *TM.getSubtargetImpl(F);
- if (!STI.enableIndirectBrExpand())
- return false;
- TLI = STI.getTargetLowering();
-
- std::optional<DomTreeUpdater> DTU;
- if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
SmallVector<IndirectBrInst *, 1> IndirectBrs;
@@ -268,3 +275,21 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
return true;
}
+
+bool IndirectBrExpandLegacyPass::runOnFunction(Function &F) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<TargetMachine>();
+ auto &STI = *TM.getSubtargetImpl(F);
+ if (!STI.enableIndirectBrExpand())
+ return false;
+ auto *TLI = STI.getTargetLowering();
+
+ std::optional<DomTreeUpdater> DTU;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
+
+ return runImpl(F, TLI, DTU ? &*DTU : nullptr);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index 277c6be418c5..c46b1fe18ca7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -33,7 +33,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
@@ -56,7 +55,6 @@
#include <iterator>
#include <tuple>
#include <utility>
-#include <vector>
using namespace llvm;
@@ -86,7 +84,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
LiveIntervals &LIS;
LiveStacks &LSS;
MachineDominatorTree &MDT;
- MachineLoopInfo &Loops;
VirtRegMap &VRM;
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
@@ -138,8 +135,7 @@ public:
VirtRegMap &vrm)
: MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
LSS(pass.getAnalysis<LiveStacks>()),
- MDT(pass.getAnalysis<MachineDominatorTree>()),
- Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+ MDT(pass.getAnalysis<MachineDominatorTree>()), VRM(vrm),
MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -157,7 +153,6 @@ class InlineSpiller : public Spiller {
LiveIntervals &LIS;
LiveStacks &LSS;
MachineDominatorTree &MDT;
- MachineLoopInfo &Loops;
VirtRegMap &VRM;
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
@@ -197,8 +192,7 @@ public:
VirtRegAuxInfo &VRAI)
: MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()),
LSS(Pass.getAnalysis<LiveStacks>()),
- MDT(Pass.getAnalysis<MachineDominatorTree>()),
- Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM),
+ MDT(Pass.getAnalysis<MachineDominatorTree>()), VRM(VRM),
MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
TRI(*MF.getSubtarget().getRegisterInfo()),
MBFI(Pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -256,11 +250,11 @@ Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass,
// This minimizes register pressure and maximizes the store-to-load distance for
// spill slots which can be important in tight loops.
-/// If MI is a COPY to or from Reg, return the other register, otherwise return
-/// 0.
-static Register isCopyOf(const MachineInstr &MI, Register Reg) {
- assert(!MI.isBundled());
- if (!MI.isCopy())
+/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
+/// otherwise return 0.
+static Register isCopyOf(const MachineInstr &MI, Register Reg,
+ const TargetInstrInfo &TII) {
+ if (!TII.isCopyInstr(MI))
return Register();
const MachineOperand &DstOp = MI.getOperand(0);
@@ -277,9 +271,10 @@ static Register isCopyOf(const MachineInstr &MI, Register Reg) {
}
/// Check for a copy bundle as formed by SplitKit.
-static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg) {
+static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg,
+ const TargetInstrInfo &TII) {
if (!FirstMI.isBundled())
- return isCopyOf(FirstMI, Reg);
+ return isCopyOf(FirstMI, Reg, TII);
assert(!FirstMI.isBundledWithPred() && FirstMI.isBundledWithSucc() &&
"expected to see first instruction in bundle");
@@ -288,11 +283,12 @@ static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg) {
MachineBasicBlock::const_instr_iterator I = FirstMI.getIterator();
while (I->isBundledWithSucc()) {
const MachineInstr &MI = *I;
- if (!MI.isCopy())
+ auto CopyInst = TII.isCopyInstr(MI);
+ if (!CopyInst)
return Register();
- const MachineOperand &DstOp = MI.getOperand(0);
- const MachineOperand &SrcOp = MI.getOperand(1);
+ const MachineOperand &DstOp = *CopyInst->Destination;
+ const MachineOperand &SrcOp = *CopyInst->Source;
if (DstOp.getReg() == Reg) {
if (!SnipReg)
SnipReg = SrcOp.getReg();
@@ -358,7 +354,7 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
MachineInstr &MI = *RI++;
// Allow copies to/from Reg.
- if (isCopyOfBundle(MI, Reg))
+ if (isCopyOfBundle(MI, Reg, TII))
continue;
// Allow stack slot loads.
@@ -396,7 +392,7 @@ void InlineSpiller::collectRegsToSpill() {
return;
for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
- Register SnipReg = isCopyOfBundle(MI, Reg);
+ Register SnipReg = isCopyOfBundle(MI, Reg, TII);
if (!isSibling(SnipReg))
continue;
LiveInterval &SnipLI = LIS.getInterval(SnipReg);
@@ -467,7 +463,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
MachineBasicBlock::iterator MII;
if (SrcVNI->isPHIDef())
- MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
+ MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin(), SrcReg);
else {
MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
assert(DefMI && "Defining instruction disappeared");
@@ -519,14 +515,14 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// Find all spills and copies of VNI.
for (MachineInstr &MI :
llvm::make_early_inc_range(MRI.use_nodbg_bundles(Reg))) {
- if (!MI.isCopy() && !MI.mayStore())
+ if (!MI.mayStore() && !TII.isCopyInstr(MI))
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
if (LI->getVNInfoAt(Idx) != VNI)
continue;
// Follow sibling copies down the dominator tree.
- if (Register DstReg = isCopyOfBundle(MI, Reg)) {
+ if (Register DstReg = isCopyOfBundle(MI, Reg, TII)) {
if (isSibling(DstReg)) {
LiveInterval &DstLI = LIS.getInterval(DstReg);
VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
@@ -751,6 +747,35 @@ void InlineSpiller::reMaterializeAll() {
continue;
LLVM_DEBUG(dbgs() << "All defs dead: " << *MI);
DeadDefs.push_back(MI);
+ // If MI is a bundle header, also try removing copies inside the bundle,
+ // otherwise the verifier would complain "live range continues after dead
+ // def flag".
+ if (MI->isBundledWithSucc() && !MI->isBundledWithPred()) {
+ MachineBasicBlock::instr_iterator BeginIt = MI->getIterator(),
+ EndIt = MI->getParent()->instr_end();
+ ++BeginIt; // Skip MI that was already handled.
+
+ bool OnlyDeadCopies = true;
+ for (MachineBasicBlock::instr_iterator It = BeginIt;
+ It != EndIt && It->isBundledWithPred(); ++It) {
+
+ auto DestSrc = TII.isCopyInstr(*It);
+ bool IsCopyToDeadReg =
+ DestSrc && DestSrc->Destination->getReg() == Reg;
+ if (!IsCopyToDeadReg) {
+ OnlyDeadCopies = false;
+ break;
+ }
+ }
+ if (OnlyDeadCopies) {
+ for (MachineBasicBlock::instr_iterator It = BeginIt;
+ It != EndIt && It->isBundledWithPred(); ++It) {
+ It->addRegisterDead(Reg, &TRI);
+ LLVM_DEBUG(dbgs() << "All defs dead: " << *It);
+ DeadDefs.push_back(&*It);
+ }
+ }
+ }
}
}
@@ -870,7 +895,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (Ops.back().first != MI || MI->isBundled())
return false;
- bool WasCopy = MI->isCopy();
+ bool WasCopy = TII.isCopyInstr(*MI).has_value();
Register ImpReg;
// TII::foldMemoryOperand will do what we need here for statepoint
@@ -1069,8 +1094,7 @@ void InlineSpiller::insertReload(Register NewVReg,
static bool isRealSpill(const MachineInstr &Def) {
if (!Def.isImplicitDef())
return true;
- assert(Def.getNumOperands() == 1 &&
- "Implicit def with more than one definition");
+
// We can say that the VReg defined by Def is undef, only if it is
// fully defined by Def. Otherwise, some of the lanes may not be
// undef and the value of the VReg matters.
@@ -1155,7 +1179,7 @@ void InlineSpiller::spillAroundUses(Register Reg) {
Idx = VNI->def;
// Check for a sibling copy.
- Register SibReg = isCopyOfBundle(MI, Reg);
+ Register SibReg = isCopyOfBundle(MI, Reg, TII);
if (SibReg && isSibling(SibReg)) {
// This may actually be a copy between snippets.
if (isRegToSpill(SibReg)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 6b3848531569..2a0daf404c97 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -48,6 +48,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/InterleavedAccess.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -82,22 +83,14 @@ static cl::opt<bool> LowerInterleavedAccesses(
namespace {
-class InterleavedAccess : public FunctionPass {
-public:
- static char ID;
-
- InterleavedAccess() : FunctionPass(ID) {
- initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
- }
+class InterleavedAccessImpl {
+ friend class InterleavedAccess;
- StringRef getPassName() const override { return "Interleaved Access Pass"; }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesCFG();
- }
+public:
+ InterleavedAccessImpl() = default;
+ InterleavedAccessImpl(DominatorTree *DT, const TargetLowering *TLI)
+ : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
+ bool runOnFunction(Function &F);
private:
DominatorTree *DT = nullptr;
@@ -141,10 +134,60 @@ private:
LoadInst *LI);
};
+class InterleavedAccess : public FunctionPass {
+ InterleavedAccessImpl Impl;
+
+public:
+ static char ID;
+
+ InterleavedAccess() : FunctionPass(ID) {
+ initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Interleaved Access Pass"; }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+
} // end anonymous namespace.
+PreservedAnalyses InterleavedAccessPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto *DT = &FAM.getResult<DominatorTreeAnalysis>(F);
+ auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ InterleavedAccessImpl Impl(DT, TLI);
+ bool Changed = Impl.runOnFunction(F);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
char InterleavedAccess::ID = 0;
+bool InterleavedAccess::runOnFunction(Function &F) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC || !LowerInterleavedAccesses)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
+
+ Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &TM = TPC->getTM<TargetMachine>();
+ Impl.TLI = TM.getSubtargetImpl(F)->getTargetLowering();
+ Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
+
+ return Impl.runOnFunction(F);
+}
+
INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
"Lower interleaved memory accesses to target specific intrinsics", false,
false)
@@ -228,7 +271,7 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
return false;
}
-bool InterleavedAccess::lowerInterleavedLoad(
+bool InterleavedAccessImpl::lowerInterleavedLoad(
LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
return false;
@@ -251,7 +294,7 @@ bool InterleavedAccess::lowerInterleavedLoad(
continue;
}
if (auto *BI = dyn_cast<BinaryOperator>(User)) {
- if (all_of(BI->users(), [](auto *U) {
+ if (!BI->user_empty() && all_of(BI->users(), [](auto *U) {
auto *SVI = dyn_cast<ShuffleVectorInst>(U);
return SVI && isa<UndefValue>(SVI->getOperand(1));
})) {
@@ -334,7 +377,7 @@ bool InterleavedAccess::lowerInterleavedLoad(
return true;
}
-bool InterleavedAccess::replaceBinOpShuffles(
+bool InterleavedAccessImpl::replaceBinOpShuffles(
ArrayRef<ShuffleVectorInst *> BinOpShuffles,
SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
for (auto *SVI : BinOpShuffles) {
@@ -367,7 +410,7 @@ bool InterleavedAccess::replaceBinOpShuffles(
return !BinOpShuffles.empty();
}
-bool InterleavedAccess::tryReplaceExtracts(
+bool InterleavedAccessImpl::tryReplaceExtracts(
ArrayRef<ExtractElementInst *> Extracts,
ArrayRef<ShuffleVectorInst *> Shuffles) {
// If there aren't any extractelement instructions to modify, there's nothing
@@ -431,7 +474,7 @@ bool InterleavedAccess::tryReplaceExtracts(
return true;
}
-bool InterleavedAccess::lowerInterleavedStore(
+bool InterleavedAccessImpl::lowerInterleavedStore(
StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
if (!SI->isSimple())
return false;
@@ -457,7 +500,7 @@ bool InterleavedAccess::lowerInterleavedStore(
return true;
}
-bool InterleavedAccess::lowerDeinterleaveIntrinsic(
+bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
IntrinsicInst *DI, SmallVector<Instruction *, 32> &DeadInsts) {
LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
@@ -476,7 +519,7 @@ bool InterleavedAccess::lowerDeinterleaveIntrinsic(
return true;
}
-bool InterleavedAccess::lowerInterleaveIntrinsic(
+bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
IntrinsicInst *II, SmallVector<Instruction *, 32> &DeadInsts) {
if (!II->hasOneUse())
return false;
@@ -498,18 +541,7 @@ bool InterleavedAccess::lowerInterleaveIntrinsic(
return true;
}
-bool InterleavedAccess::runOnFunction(Function &F) {
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC || !LowerInterleavedAccesses)
- return false;
-
- LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
-
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &TM = TPC->getTM<TargetMachine>();
- TLI = TM.getSubtargetImpl(F)->getTargetLowering();
- MaxFactor = TLI->getMaxSupportedInterleaveFactor();
-
+bool InterleavedAccessImpl::runOnFunction(Function &F) {
// Holds dead instructions that will be erased later.
SmallVector<Instruction *, 32> DeadInsts;
bool Changed = false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index d0ad6e45b4d3..f2d5c3c867c2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/InterleavedLoadCombine.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -63,7 +64,7 @@ struct VectorInfo;
struct InterleavedLoadCombineImpl {
public:
InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA,
- TargetMachine &TM)
+ const TargetMachine &TM)
: F(F), DT(DT), MSSA(MSSA),
TLI(*TM.getSubtargetImpl(F)->getTargetLowering()),
TTI(TM.getTargetTransformInfo(F)) {}
@@ -628,7 +629,7 @@ static raw_ostream &operator<<(raw_ostream &OS, const Polynomial &S) {
/// VectorInfo stores abstract the following information for each vector
/// element:
///
-/// 1) The the memory address loaded into the element as Polynomial
+/// 1) The memory address loaded into the element as Polynomial
/// 2) a set of load instruction necessary to construct the vector,
/// 3) a set of all other instructions that are necessary to create the vector and
/// 4) a pointer value that can be used as relative base for all elements.
@@ -1215,13 +1216,9 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
return false;
}
- // Create a pointer cast for the wide load.
- auto CI = Builder.CreatePointerCast(InsertionPoint->getOperand(0),
- ILTy->getPointerTo(),
- "interleaved.wide.ptrcast");
-
// Create the wide load and update the MemorySSA.
- auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlign(),
+ auto Ptr = InsertionPoint->getPointerOperand();
+ auto LI = Builder.CreateAlignedLoad(ILTy, Ptr, InsertionPoint->getAlign(),
"interleaved.wide.load");
auto MSSAU = MemorySSAUpdater(&MSSA);
MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
@@ -1343,6 +1340,15 @@ private:
};
} // anonymous namespace
+PreservedAnalyses
+InterleavedLoadCombinePass::run(Function &F, FunctionAnalysisManager &FAM) {
+
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ auto &MemSSA = FAM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ bool Changed = InterleavedLoadCombineImpl(F, DT, MemSSA, *TM).run();
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
char InterleavedLoadCombine::ID = 0;
INITIALIZE_PASS_BEGIN(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
index f1953c363b59..62a381918875 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
@@ -20,6 +20,7 @@
// weak symbol.
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/JMCInstrumenter.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/Passes.h"
@@ -39,19 +40,25 @@
using namespace llvm;
-#define DEBUG_TYPE "jmc-instrument"
+#define DEBUG_TYPE "jmc-instrumenter"
+static bool runImpl(Module &M);
namespace {
struct JMCInstrumenter : public ModulePass {
static char ID;
JMCInstrumenter() : ModulePass(ID) {
initializeJMCInstrumenterPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M) override;
+ bool runOnModule(Module &M) override { return runImpl(M); }
};
char JMCInstrumenter::ID = 0;
} // namespace
+PreservedAnalyses JMCInstrumenterPass::run(Module &M, ModuleAnalysisManager &) {
+ bool Changed = runImpl(M);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
INITIALIZE_PASS(
JMCInstrumenter, DEBUG_TYPE,
"Instrument function entry with call to __CheckForDebuggerJustMyCode",
@@ -120,7 +127,7 @@ void attachDebugInfo(GlobalVariable &GV, DISubprogram &SP) {
FunctionType *getCheckFunctionType(LLVMContext &Ctx) {
Type *VoidTy = Type::getVoidTy(Ctx);
- PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx);
+ PointerType *VoidPtrTy = PointerType::getUnqual(Ctx);
return FunctionType::get(VoidTy, VoidPtrTy, false);
}
@@ -143,7 +150,7 @@ Function *createDefaultCheckFunction(Module &M, bool UseX86FastCall) {
}
} // namespace
-bool JMCInstrumenter::runOnModule(Module &M) {
+bool runImpl(Module &M) {
bool Changed = false;
LLVMContext &Ctx = M.getContext();
Triple ModuleTriple(M.getTargetTriple());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index d02ec1db1165..42cabb58e518 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -37,6 +37,11 @@ static cl::opt<bool>
EnableTrapUnreachable("trap-unreachable", cl::Hidden,
cl::desc("Enable generating trap for unreachable"));
+static cl::opt<bool> EnableNoTrapAfterNoreturn(
+ "no-trap-after-noreturn", cl::Hidden,
+ cl::desc("Do not emit a trap instruction for 'unreachable' IR instructions "
+ "after noreturn calls, even if --trap-unreachable is set."));
+
void LLVMTargetMachine::initAsmInfo() {
MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
assert(MRI && "Unable to create reg info");
@@ -76,6 +81,8 @@ void LLVMTargetMachine::initAsmInfo() {
TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations);
+ TmpAsmInfo->setFullRegisterNames(Options.MCOptions.PPCUseFullRegisterNames);
+
if (Options.ExceptionModel != ExceptionHandling::None)
TmpAsmInfo->setExceptionsType(Options.ExceptionModel);
@@ -87,7 +94,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
+ CodeGenOptLevel OL)
: TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
this->RM = RM;
this->CMModel = CM;
@@ -95,6 +102,8 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
if (EnableTrapUnreachable)
this->Options.TrapUnreachable = true;
+ if (EnableNoTrapAfterNoreturn)
+ this->Options.NoTrapAfterNoreturn = true;
}
TargetTransformInfo
@@ -156,7 +165,7 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
std::unique_ptr<MCStreamer> AsmStreamer;
switch (FileType) {
- case CGFT_AssemblyFile: {
+ case CodeGenFileType::AssemblyFile: {
MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI);
@@ -188,7 +197,7 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
AsmStreamer.reset(S);
break;
}
- case CGFT_ObjectFile: {
+ case CodeGenFileType::ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, Context);
@@ -211,7 +220,7 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
/*DWARFMustBeAtTheEnd*/ true));
break;
}
- case CGFT_Null:
+ case CodeGenFileType::Null:
// The Null output is intended for use for performance analysis and testing,
// not real users.
AsmStreamer.reset(getTarget().createNullStreamer(Context));
@@ -238,7 +247,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
return true;
} else {
// MIR printing is redundant with -filetype=null.
- if (FileType != CGFT_Null)
+ if (FileType != CodeGenFileType::Null)
PM.add(createPrintMIRPass(Out));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 57df9b67fd02..87a0ba58b14c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -1429,7 +1429,7 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
std::optional<ValueIDNum> InstrRefBasedLDV::getValueForInstrRef(
unsigned InstNo, unsigned OpNo, MachineInstr &MI,
- const ValueTable *MLiveOuts, const ValueTable *MLiveIns) {
+ const FuncValueTable *MLiveOuts, const FuncValueTable *MLiveIns) {
// Various optimizations may have happened to the value during codegen,
// recorded in the value substitution table. Apply any substitutions to
// the instruction / operand number in this DBG_INSTR_REF, and collect
@@ -1495,7 +1495,8 @@ std::optional<ValueIDNum> InstrRefBasedLDV::getValueForInstrRef(
} else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) {
// It's actually a PHI value. Which value it is might not be obvious, use
// the resolver helper to find out.
- NewID = resolveDbgPHIs(*MI.getParent()->getParent(), MLiveOuts, MLiveIns,
+ assert(MLiveOuts && MLiveIns);
+ NewID = resolveDbgPHIs(*MI.getParent()->getParent(), *MLiveOuts, *MLiveIns,
MI, InstNo);
}
@@ -1574,8 +1575,8 @@ std::optional<ValueIDNum> InstrRefBasedLDV::getValueForInstrRef(
}
bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
- const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns) {
+ const FuncValueTable *MLiveOuts,
+ const FuncValueTable *MLiveIns) {
if (!MI.isDebugRef())
return false;
@@ -2116,7 +2117,7 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
}
bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
- auto DestSrc = TII->isCopyInstr(MI);
+ auto DestSrc = TII->isCopyLikeInstr(MI);
if (!DestSrc)
return false;
@@ -2245,8 +2246,9 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
AllSeenFragments.insert(ThisFragment);
}
-void InstrRefBasedLDV::process(MachineInstr &MI, const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns) {
+void InstrRefBasedLDV::process(MachineInstr &MI,
+ const FuncValueTable *MLiveOuts,
+ const FuncValueTable *MLiveIns) {
// Try to interpret an MI as a debug or transfer instruction. Only if it's
// none of these should we interpret it's register defs as new value
// definitions.
@@ -2543,7 +2545,7 @@ void InstrRefBasedLDV::placeMLocPHIs(
for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
MachineBasicBlock *MBB = OrderToBB[I];
const auto &TransferFunc = MLocTransfer[MBB->getNumber()];
- if (TransferFunc.find(L) != TransferFunc.end())
+ if (TransferFunc.contains(L))
DefBlocks.insert(MBB);
}
@@ -3503,7 +3505,10 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
// Helper lambda for ejecting a block -- if nothing is going to use the block,
// we can translate the variable location information into DBG_VALUEs and then
// free all of InstrRefBasedLDV's data structures.
+ SmallPtrSet<const MachineBasicBlock *, 8> EjectedBBs;
auto EjectBlock = [&](MachineBasicBlock &MBB) -> void {
+ if (EjectedBBs.insert(&MBB).second == false)
+ return;
unsigned BBNum = MBB.getNumber();
AllTheVLocs[BBNum].clear();
@@ -3517,14 +3522,14 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
CurBB = BBNum;
CurInst = 1;
for (auto &MI : MBB) {
- process(MI, MOutLocs.get(), MInLocs.get());
+ process(MI, &MOutLocs, &MInLocs);
TTracker->checkInstForNewValues(CurInst, MI.getIterator());
++CurInst;
}
// Free machine-location tables for this block.
- MInLocs[BBNum].reset();
- MOutLocs[BBNum].reset();
+ MInLocs[BBNum] = ValueTable();
+ MOutLocs[BBNum] = ValueTable();
// We don't need live-in variable values for this block either.
Output[BBNum].clear();
AllTheVLocs[BBNum].clear();
@@ -3589,8 +3594,7 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
// anything for such out-of-scope blocks, but for the sake of being similar
// to VarLocBasedLDV, eject these too.
for (auto *MBB : ArtificialBlocks)
- if (MOutLocs[MBB->getNumber()])
- EjectBlock(*MBB);
+ EjectBlock(*MBB);
return emitTransfers(AllVarsNumbering);
}
@@ -3688,14 +3692,9 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// Allocate and initialize two array-of-arrays for the live-in and live-out
// machine values. The outer dimension is the block number; while the inner
// dimension is a LocIdx from MLocTracker.
- FuncValueTable MOutLocs = std::make_unique<ValueTable[]>(MaxNumBlocks);
- FuncValueTable MInLocs = std::make_unique<ValueTable[]>(MaxNumBlocks);
unsigned NumLocs = MTracker->getNumLocs();
- for (int i = 0; i < MaxNumBlocks; ++i) {
- // These all auto-initialize to ValueIDNum::EmptyValue
- MOutLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs);
- MInLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs);
- }
+ FuncValueTable MOutLocs(MaxNumBlocks, ValueTable(NumLocs));
+ FuncValueTable MInLocs(MaxNumBlocks, ValueTable(NumLocs));
// Solve the machine value dataflow problem using the MLocTransfer function,
// storing the computed live-ins / live-outs into the array-of-arrays. We use
@@ -3736,7 +3735,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
MTracker->loadFromArray(MInLocs[CurBB], CurBB);
CurInst = 1;
for (auto &MI : MBB) {
- process(MI, MOutLocs.get(), MInLocs.get());
+ process(MI, &MOutLocs, &MInLocs);
++CurInst;
}
MTracker->reset();
@@ -3917,9 +3916,9 @@ public:
/// Machine location where any PHI must occur.
LocIdx Loc;
/// Table of live-in machine value numbers for blocks / locations.
- const ValueTable *MLiveIns;
+ const FuncValueTable &MLiveIns;
- LDVSSAUpdater(LocIdx L, const ValueTable *MLiveIns)
+ LDVSSAUpdater(LocIdx L, const FuncValueTable &MLiveIns)
: Loc(L), MLiveIns(MLiveIns) {}
void reset() {
@@ -4075,12 +4074,8 @@ public:
} // end namespace llvm
std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(
- MachineFunction &MF, const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
- assert(MLiveOuts && MLiveIns &&
- "Tried to resolve DBG_PHI before location "
- "tables allocated?");
-
+ MachineFunction &MF, const FuncValueTable &MLiveOuts,
+ const FuncValueTable &MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
// This function will be called twice per DBG_INSTR_REF, and might end up
// computing lots of SSA information: memoize it.
auto SeenDbgPHIIt = SeenDbgPHIs.find(std::make_pair(&Here, InstrNum));
@@ -4094,8 +4089,8 @@ std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(
}
std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
- MachineFunction &MF, const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
+ MachineFunction &MF, const FuncValueTable &MLiveOuts,
+ const FuncValueTable &MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
// Pick out records of DBG_PHI instructions that have been observed. If there
// are none, then we cannot compute a value number.
auto RangePair = std::equal_range(DebugPHINumToValue.begin(),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index 30de18e53c4f..d6dbb1feda3e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -205,11 +205,11 @@ namespace LiveDebugValues {
using namespace llvm;
/// Type for a table of values in a block.
-using ValueTable = std::unique_ptr<ValueIDNum[]>;
+using ValueTable = SmallVector<ValueIDNum, 0>;
/// Type for a table-of-table-of-values, i.e., the collection of either
/// live-in or live-out values for each block in the function.
-using FuncValueTable = std::unique_ptr<ValueTable[]>;
+using FuncValueTable = SmallVector<ValueTable, 0>;
/// Thin wrapper around an integer -- designed to give more type safety to
/// spill location numbers.
@@ -602,8 +602,8 @@ public:
/// Slot Num (%stack.0) /
/// FrameIdx => SpillNum /
/// \ /
-/// SpillID (int) Register number (int)
-/// \ /
+/// SpillID (int) Register number (int)
+/// \ /
/// LocationID => LocIdx
/// |
/// LocIdx => ValueIDNum
@@ -1200,12 +1200,12 @@ private:
/// exists, otherwise returns std::nullopt.
std::optional<ValueIDNum> getValueForInstrRef(unsigned InstNo, unsigned OpNo,
MachineInstr &MI,
- const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns);
+ const FuncValueTable *MLiveOuts,
+ const FuncValueTable *MLiveIns);
/// Observe a single instruction while stepping through a block.
- void process(MachineInstr &MI, const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns);
+ void process(MachineInstr &MI, const FuncValueTable *MLiveOuts,
+ const FuncValueTable *MLiveIns);
/// Examines whether \p MI is a DBG_VALUE and notifies trackers.
/// \returns true if MI was recognized and processed.
@@ -1213,8 +1213,8 @@ private:
/// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
/// \returns true if MI was recognized and processed.
- bool transferDebugInstrRef(MachineInstr &MI, const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns);
+ bool transferDebugInstrRef(MachineInstr &MI, const FuncValueTable *MLiveOuts,
+ const FuncValueTable *MLiveIns);
/// Stores value-information about where this PHI occurred, and what
/// instruction number is associated with it.
@@ -1246,14 +1246,14 @@ private:
/// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
/// \returns The machine value number at position Here, or std::nullopt.
std::optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
- const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns,
+ const FuncValueTable &MLiveOuts,
+ const FuncValueTable &MLiveIns,
MachineInstr &Here,
uint64_t InstrNum);
std::optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF,
- const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns,
+ const FuncValueTable &MLiveOuts,
+ const FuncValueTable &MLiveIns,
MachineInstr &Here,
uint64_t InstrNum);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 116c6b7e2d19..bf730be00a9a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -1364,7 +1364,7 @@ void VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
// TODO: Try to keep tracking of an entry value if we encounter a propagated
// DBG_VALUE describing the copy of the entry value. (Propagated entry value
// does not indicate the parameter modification.)
- auto DestSrc = TII->isCopyInstr(*TransferInst);
+ auto DestSrc = TII->isCopyLikeInstr(*TransferInst);
if (DestSrc) {
const MachineOperand *SrcRegOp, *DestRegOp;
SrcRegOp = DestSrc->Source;
@@ -1840,7 +1840,7 @@ void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers) {
- auto DestSrc = TII->isCopyInstr(MI);
+ auto DestSrc = TII->isCopyLikeInstr(MI);
if (!DestSrc)
return;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 9603c1f01e08..7cb90af5ff17 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -58,6 +58,7 @@
#include <algorithm>
#include <cassert>
#include <iterator>
+#include <map>
#include <memory>
#include <optional>
#include <utility>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
index 1cf354349c56..c81540602f59 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
@@ -563,13 +563,15 @@ VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
return CalcLiveRangeUtilVector(this).extendInBlock(StartIdx, Kill);
}
-/// Remove the specified segment from this range. Note that the segment must
-/// be in a single Segment in its entirety.
void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
bool RemoveDeadValNo) {
// Find the Segment containing this span.
iterator I = find(Start);
- assert(I != end() && "Segment is not in range!");
+
+ // No Segment found, so nothing to do.
+ if (I == end())
+ return;
+
assert(I->containsInterval(Start, End)
&& "Segment is not entirely in range!");
@@ -629,6 +631,7 @@ void LiveRange::join(LiveRange &Other,
const int *RHSValNoAssignments,
SmallVectorImpl<VNInfo *> &NewVNInfo) {
verify();
+ Other.verify();
// Determine if any of our values are mapped. This is uncommon, so we want
// to avoid the range scan if not.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
index ccc5ae98086e..ba4ea32e6851 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 11a4ecf0bef9..bfaa3bf9a694 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index da55e7f7284b..68fff9bc221d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -862,7 +862,7 @@ float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
LiveRange::Segment
LiveIntervals::addSegmentToEndOfBlock(Register Reg, MachineInstr &startInst) {
- LiveInterval &Interval = createEmptyInterval(Reg);
+ LiveInterval &Interval = getOrCreateEmptyInterval(Reg);
VNInfo *VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getVNInfoAllocator());
@@ -1676,7 +1676,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
if (!hasInterval(Reg)) {
createAndComputeVirtRegInterval(Reg);
// Don't bother to repair a freshly calculated live interval.
- erase_value(RegsToRepair, Reg);
+ llvm::erase(RegsToRepair, Reg);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 26f6e1ede1ad..f7d9e5c44ac2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -217,13 +217,18 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
report_fatal_error("Use not jointly dominated by defs.");
}
- if (Register::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) {
- MBB->getParent()->verify();
+ if (Register::isPhysicalRegister(PhysReg)) {
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- errs() << "The register " << printReg(PhysReg, TRI)
- << " needs to be live in to " << printMBBReference(*MBB)
- << ", but is missing from the live-in list.\n";
- report_fatal_error("Invalid global physical register");
+ bool IsLiveIn = MBB->isLiveIn(PhysReg);
+ for (MCRegAliasIterator Alias(PhysReg, TRI, false); !IsLiveIn && Alias.isValid(); ++Alias)
+ IsLiveIn = MBB->isLiveIn(*Alias);
+ if (!IsLiveIn) {
+ MBB->getParent()->verify();
+ errs() << "The register " << printReg(PhysReg, TRI)
+ << " needs to be live in to " << printMBBReference(*MBB)
+ << ", but is missing from the live-in list.\n";
+ report_fatal_error("Invalid global physical register");
+ }
}
#endif
FoundUndef |= MBB->pred_empty();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index c3477cd8ce34..0203034b5a01 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -190,7 +190,7 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
// DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg
// to false anyway in case the isDead flag of RM.OrigMI's dest register
// is true.
- (*--MI).getOperand(0).setIsDead(false);
+ (*--MI).clearRegisterDeads(DestReg);
Rematted.insert(RM.ParentVNI);
++NumReMaterialization;
@@ -352,7 +352,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// unlikely to change anything. We typically don't want to shrink the
// PIC base register that has lots of uses everywhere.
// Always shrink COPY uses that probably come from live range splitting.
- if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) ||
+ if ((MI->readsVirtualRegister(Reg) &&
+ (MO.isDef() || TII.isCopyInstr(*MI))) ||
(MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO))))
ToShrink.insert(&LI);
else if (MO.readsReg())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 93f5314539cd..af7d6c4403b8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -109,6 +110,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
return false;
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
@@ -197,7 +199,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
// is because it needs more accurate model to handle register
// pressure correctly.
MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
- if (!DefInstr.isCopy())
+ if (!TII.isCopyInstr(DefInstr))
NumEligibleUse++;
Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index 9cd74689ba10..b85526cfb380 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -406,11 +406,11 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
return true;
}
-void LiveVariables::HandleRegMask(const MachineOperand &MO) {
+void LiveVariables::HandleRegMask(const MachineOperand &MO, unsigned NumRegs) {
// Call HandlePhysRegKill() for all live registers clobbered by Mask.
// Clobbered registers are always dead, sp there is no need to use
// HandlePhysRegDef().
- for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
+ for (unsigned Reg = 1; Reg != NumRegs; ++Reg) {
// Skip dead regs.
if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
continue;
@@ -421,7 +421,8 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
// This avoids needless implicit operands.
unsigned Super = Reg;
for (MCPhysReg SR : TRI->superregs(Reg))
- if ((PhysRegDef[SR] || PhysRegUse[SR]) && MO.clobbersPhysReg(SR))
+ if (SR < NumRegs && (PhysRegDef[SR] || PhysRegUse[SR]) &&
+ MO.clobbersPhysReg(SR))
Super = SR;
HandlePhysRegKill(Super, nullptr);
}
@@ -478,7 +479,8 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
}
void LiveVariables::runOnInstr(MachineInstr &MI,
- SmallVectorImpl<unsigned> &Defs) {
+ SmallVectorImpl<unsigned> &Defs,
+ unsigned NumRegs) {
assert(!MI.isDebugOrPseudoInstr());
// Process all of the operands of the instruction...
unsigned NumOperandsToProcess = MI.getNumOperands();
@@ -527,7 +529,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
// Process all masked registers. (Call clobbers).
for (unsigned Mask : RegMasks)
- HandleRegMask(MI.getOperand(Mask));
+ HandleRegMask(MI.getOperand(Mask), NumRegs);
// Process all defs.
for (unsigned MOReg : DefRegs) {
@@ -539,7 +541,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
UpdatePhysRegDefs(MI, Defs);
}
-void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
+void LiveVariables::runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs) {
// Mark live-in registers as live-in.
SmallVector<unsigned, 4> Defs;
for (const auto &LI : MBB->liveins()) {
@@ -556,7 +558,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
continue;
DistanceMap.insert(std::make_pair(&MI, Dist++));
- runOnInstr(MI, Defs);
+ runOnInstr(MI, Defs, NumRegs);
}
// Handle any virtual assignments from PHI nodes which might be at the
@@ -597,11 +599,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
TRI = MF->getSubtarget().getRegisterInfo();
- const unsigned NumRegs = TRI->getNumRegs();
+ const unsigned NumRegs = TRI->getNumSupportedRegs(mf);
PhysRegDef.assign(NumRegs, nullptr);
PhysRegUse.assign(NumRegs, nullptr);
PHIVarInfo.resize(MF->getNumBlockIDs());
- PHIJoins.clear();
// FIXME: LiveIntervals will be updated to remove its dependence on
// LiveVariables to improve compilation time and eliminate bizarre pass
@@ -661,22 +662,18 @@ void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
MachineInstr &DefMI = *MRI->getUniqueVRegDef(Reg);
MachineBasicBlock &DefBB = *DefMI.getParent();
- // Handle the case where all uses have been removed.
- if (MRI->use_nodbg_empty(Reg)) {
- VI.Kills.push_back(&DefMI);
- DefMI.addRegisterDead(Reg, nullptr);
- return;
- }
- DefMI.clearRegisterDeads(Reg);
-
// Initialize a worklist of BBs that Reg is live-to-end of. (Here
// "live-to-end" means Reg is live at the end of a block even if it is only
// live because of phi uses in a successor. This is different from isLiveOut()
// which does not consider phi uses.)
SmallVector<MachineBasicBlock *> LiveToEndBlocks;
SparseBitVector<> UseBlocks;
+ unsigned NumRealUses = 0;
for (auto &UseMO : MRI->use_nodbg_operands(Reg)) {
UseMO.setIsKill(false);
+ if (!UseMO.readsReg())
+ continue;
+ ++NumRealUses;
MachineInstr &UseMI = *UseMO.getParent();
MachineBasicBlock &UseBB = *UseMI.getParent();
UseBlocks.set(UseBB.getNumber());
@@ -693,6 +690,14 @@ void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
}
}
+ // Handle the case where all uses have been removed.
+ if (NumRealUses == 0) {
+ VI.Kills.push_back(&DefMI);
+ DefMI.addRegisterDead(Reg, nullptr);
+ return;
+ }
+ DefMI.clearRegisterDeads(Reg);
+
// Iterate over the worklist adding blocks to AliveBlocks.
bool LiveToEndOfDefBB = false;
while (!LiveToEndBlocks.empty()) {
@@ -721,7 +726,7 @@ void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
continue;
if (MI.isPHI())
break;
- if (MI.readsRegister(Reg)) {
+ if (MI.readsVirtualRegister(Reg)) {
assert(!MI.killsRegister(Reg));
MI.addRegisterKilled(Reg, nullptr);
VI.Kills.push_back(&MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
index 24c30b756737..cd85bf606989 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
LLT::LLT(MVT VT) {
if (VT.isVector()) {
- bool asVector = VT.getVectorMinNumElements() > 1;
+ bool asVector = VT.getVectorMinNumElements() > 1 || VT.isScalableVector();
init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector,
VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(),
/*AddressSpace=*/0);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
index a517ee3794ca..f3b5069d351b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -85,7 +85,7 @@ bool LowerEmuTLS::runOnModule(Module &M) {
bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
LLVMContext &C = M.getContext();
- PointerType *VoidPtrType = Type::getInt8PtrTy(C);
+ PointerType *VoidPtrType = PointerType::getUnqual(C);
std::string EmuTlsVarName = ("__emutls_v." + GV->getName()).str();
GlobalVariable *EmuTlsVar = M.getNamedGlobal(EmuTlsVarName);
@@ -114,8 +114,7 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
// void *templ; // 0 or point to __emutls_t.*
// sizeof(word) should be the same as sizeof(void*) on target.
IntegerType *WordType = DL.getIntPtrType(C);
- PointerType *InitPtrType = InitValue ?
- PointerType::getUnqual(InitValue->getType()) : VoidPtrType;
+ PointerType *InitPtrType = PointerType::getUnqual(C);
Type *ElementTypes[4] = {WordType, WordType, VoidPtrType, InitPtrType};
ArrayRef<Type*> ElementTypeArray(ElementTypes, 4);
StructType *EmuTlsVarType = StructType::create(ElementTypeArray);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
index 5b388be27839..e9f16329b57f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
@@ -38,25 +38,13 @@ MBFIWrapper::getBlockProfileCount(const MachineBasicBlock *MBB) const {
// Modified block frequency also impacts profile count. So we should compute
// profile count from new block frequency if it has been changed.
if (I != MergedBBFreq.end())
- return MBFI.getProfileCountFromFreq(I->second.getFrequency());
+ return MBFI.getProfileCountFromFreq(I->second);
return MBFI.getBlockProfileCount(MBB);
}
-raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS,
- const MachineBasicBlock *MBB) const {
- return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
-}
-
-raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS,
- const BlockFrequency Freq) const {
- return MBFI.printBlockFreq(OS, Freq);
-}
-
void MBFIWrapper::view(const Twine &Name, bool isSimple) {
MBFI.view(Name, isSimple);
}
-uint64_t MBFIWrapper::getEntryFreq() const {
- return MBFI.getEntryFreq();
-}
+BlockFrequency MBFIWrapper::getEntryFreq() const { return MBFI.getEntryFreq(); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index 8d17cceeb3cd..f5146f5feeec 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -35,10 +35,12 @@ using namespace sampleprofutil;
// TODO(xur): Remove this option and related code once we make true as the
// default.
+namespace llvm {
cl::opt<bool> ImprovedFSDiscriminator(
"improved-fs-discriminator", cl::Hidden, cl::init(false),
cl::desc("New FS discriminators encoding (incompatible with the original "
"encoding)"));
+}
char MIRAddFSDiscriminators::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index a4c1ba340e46..870611248466 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -281,6 +281,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("ir-block-address-taken", MIToken::kw_ir_block_address_taken)
.Case("machine-block-address-taken",
MIToken::kw_machine_block_address_taken)
+ .Case("call-frame-size", MIToken::kw_call_frame_size)
+ .Case("noconvergent", MIToken::kw_noconvergent)
.Default(MIToken::Identifier);
}
@@ -298,8 +300,8 @@ static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
- bool IsReference = C.remaining().startswith("%bb.");
- if (!IsReference && !C.remaining().startswith("bb."))
+ bool IsReference = C.remaining().starts_with("%bb.");
+ if (!IsReference && !C.remaining().starts_with("bb."))
return std::nullopt;
auto Range = C;
unsigned PrefixLength = IsReference ? 4 : 3;
@@ -333,7 +335,7 @@ static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
MIToken::TokenKind Kind) {
- if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ if (!C.remaining().starts_with(Rule) || !isdigit(C.peek(Rule.size())))
return std::nullopt;
auto Range = C;
C.advance(Rule.size());
@@ -346,7 +348,7 @@ static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
MIToken::TokenKind Kind) {
- if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ if (!C.remaining().starts_with(Rule) || !isdigit(C.peek(Rule.size())))
return std::nullopt;
auto Range = C;
C.advance(Rule.size());
@@ -386,7 +388,7 @@ static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
const StringRef Rule = "%subreg.";
- if (!C.remaining().startswith(Rule))
+ if (!C.remaining().starts_with(Rule))
return std::nullopt;
return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
ErrorCallback);
@@ -395,7 +397,7 @@ static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
const StringRef Rule = "%ir-block.";
- if (!C.remaining().startswith(Rule))
+ if (!C.remaining().starts_with(Rule))
return std::nullopt;
if (isdigit(C.peek(Rule.size())))
return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
@@ -405,7 +407,7 @@ static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
const StringRef Rule = "%ir.";
- if (!C.remaining().startswith(Rule))
+ if (!C.remaining().starts_with(Rule))
return std::nullopt;
if (isdigit(C.peek(Rule.size())))
return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
@@ -499,7 +501,7 @@ static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
const StringRef Rule = "<mcsymbol ";
- if (!C.remaining().startswith(Rule))
+ if (!C.remaining().starts_with(Rule))
return std::nullopt;
auto Start = C;
C.advance(Rule.size());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
index 7149c29d6ba7..0f344da52182 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -135,6 +135,8 @@ struct MIToken {
kw_unknown_address,
kw_ir_block_address_taken,
kw_machine_block_address_taken,
+ kw_call_frame_size,
+ kw_noconvergent,
// Metadata types.
kw_distinct,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index bfd9286ff59c..ede4291fe26d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -500,7 +501,8 @@ public:
bool parseAlignment(uint64_t &Alignment);
bool parseAddrspace(unsigned &Addrspace);
bool parseSectionID(std::optional<MBBSectionID> &SID);
- bool parseBBID(std::optional<unsigned> &BBID);
+ bool parseBBID(std::optional<UniqueBBID> &BBID);
+ bool parseCallFrameSize(unsigned &CallFrameSize);
bool parseOperandsOffset(MachineOperand &Op);
bool parseIRValue(const Value *&V);
bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
@@ -665,13 +667,31 @@ bool MIParser::parseSectionID(std::optional<MBBSectionID> &SID) {
}
// Parse Machine Basic Block ID.
-bool MIParser::parseBBID(std::optional<unsigned> &BBID) {
+bool MIParser::parseBBID(std::optional<UniqueBBID> &BBID) {
assert(Token.is(MIToken::kw_bb_id));
lex();
+ unsigned BaseID = 0;
+ unsigned CloneID = 0;
+ if (getUnsigned(BaseID))
+ return error("Unknown BB ID");
+ lex();
+ if (Token.is(MIToken::IntegerLiteral)) {
+ if (getUnsigned(CloneID))
+ return error("Unknown Clone ID");
+ lex();
+ }
+ BBID = {BaseID, CloneID};
+ return false;
+}
+
+// Parse basic block call frame size.
+bool MIParser::parseCallFrameSize(unsigned &CallFrameSize) {
+ assert(Token.is(MIToken::kw_call_frame_size));
+ lex();
unsigned Value = 0;
if (getUnsigned(Value))
- return error("Unknown BB ID");
- BBID = Value;
+ return error("Unknown call frame size");
+ CallFrameSize = Value;
lex();
return false;
}
@@ -692,7 +712,8 @@ bool MIParser::parseBasicBlockDefinition(
bool IsEHFuncletEntry = false;
std::optional<MBBSectionID> SectionID;
uint64_t Alignment = 0;
- std::optional<unsigned> BBID;
+ std::optional<UniqueBBID> BBID;
+ unsigned CallFrameSize = 0;
BasicBlock *BB = nullptr;
if (consumeIfPresent(MIToken::lparen)) {
do {
@@ -737,6 +758,10 @@ bool MIParser::parseBasicBlockDefinition(
if (parseBBID(BBID))
return true;
break;
+ case MIToken::kw_call_frame_size:
+ if (parseCallFrameSize(CallFrameSize))
+ return true;
+ break;
default:
break;
}
@@ -781,6 +806,7 @@ bool MIParser::parseBasicBlockDefinition(
MF.setBBSectionsType(BasicBlockSection::Labels);
MBB->setBBID(BBID.value());
}
+ MBB->setCallFrameSize(CallFrameSize);
return false;
}
@@ -1150,19 +1176,10 @@ bool MIParser::parse(MachineInstr *&MI) {
MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
MI->setFlags(Flags);
- unsigned NumExplicitOps = 0;
- for (const auto &Operand : Operands) {
- bool IsImplicitOp = Operand.Operand.isReg() && Operand.Operand.isImplicit();
- if (!IsImplicitOp) {
- if (!MCID.isVariadic() && NumExplicitOps >= MCID.getNumOperands() &&
- !Operand.Operand.isValidExcessOperand())
- return error(Operand.Begin, "too many operands for instruction");
-
- ++NumExplicitOps;
- }
-
+ // Don't check the operands make sense, let the verifier catch any
+ // improprieties.
+ for (const auto &Operand : Operands)
MI->addOperand(MF, Operand.Operand);
- }
if (assignRegisterTies(*MI, Operands))
return true;
@@ -1439,6 +1456,7 @@ bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
// Allow frame and fast math flags for OPCODE
+ // clang-format off
while (Token.is(MIToken::kw_frame_setup) ||
Token.is(MIToken::kw_frame_destroy) ||
Token.is(MIToken::kw_nnan) ||
@@ -1452,7 +1470,9 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_nsw) ||
Token.is(MIToken::kw_exact) ||
Token.is(MIToken::kw_nofpexcept) ||
+ Token.is(MIToken::kw_noconvergent) ||
Token.is(MIToken::kw_unpredictable)) {
+ // clang-format on
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
@@ -1482,6 +1502,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::NoFPExcept;
if (Token.is(MIToken::kw_unpredictable))
Flags |= MachineInstr::Unpredictable;
+ if (Token.is(MIToken::kw_noconvergent))
+ Flags |= MachineInstr::NoConvergent;
lex();
}
@@ -1916,12 +1938,28 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
// Now we're looking for a vector.
if (Token.isNot(MIToken::less))
- return error(Loc,
- "expected sN, pA, <M x sN>, or <M x pA> for GlobalISel type");
+ return error(Loc, "expected sN, pA, <M x sN>, <M x pA>, <vscale x M x sN>, "
+ "or <vscale x M x pA> for GlobalISel type");
lex();
- if (Token.isNot(MIToken::IntegerLiteral))
+ bool HasVScale =
+ Token.is(MIToken::Identifier) && Token.stringValue() == "vscale";
+ if (HasVScale) {
+ lex();
+ if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
+ return error("expected <vscale x M x sN> or <vscale x M x pA>");
+ lex();
+ }
+
+ auto GetError = [this, &HasVScale, Loc]() {
+ if (HasVScale)
+ return error(
+ Loc, "expected <vscale x M x sN> or <vscale M x pA> for vector type");
return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ };
+
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return GetError();
uint64_t NumElements = Token.integerValue().getZExtValue();
if (!verifyVectorElementCount(NumElements))
return error("invalid number of vector elements");
@@ -1929,11 +1967,12 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
lex();
if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
- return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ return GetError();
lex();
if (Token.range().front() != 's' && Token.range().front() != 'p')
- return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ return GetError();
+
StringRef SizeStr = Token.range().drop_front();
if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
return error("expected integers after 's'/'p' type character");
@@ -1951,14 +1990,15 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
} else
- return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ return GetError();
lex();
if (Token.isNot(MIToken::greater))
- return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ return GetError();
+
lex();
- Ty = LLT::fixed_vector(NumElements, Ty);
+ Ty = LLT::vector(ElementCount::get(NumElements, HasVScale), Ty);
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index b2e570c5e67e..78d7e62797ce 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -13,7 +13,6 @@
#include "llvm/CodeGen/MIRParser/MIRParser.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index b91d9c4727fc..fee237104022 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -803,6 +803,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "nomerge ";
if (MI.getFlag(MachineInstr::Unpredictable))
OS << "unpredictable ";
+ if (MI.getFlag(MachineInstr::NoConvergent))
+ OS << "noconvergent ";
OS << TII->getName(MI.getOpcode());
if (I < E)
@@ -979,11 +981,29 @@ void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V,
}
void llvm::printMIR(raw_ostream &OS, const Module &M) {
+ // RemoveDIs: as there's no textual form for DPValues yet, print debug-info
+ // in dbg.value format.
+ bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
+ if (IsNewDbgInfoFormat)
+ const_cast<Module &>(M).convertFromNewDbgValues();
+
yaml::Output Out(OS);
Out << const_cast<Module &>(M);
+
+ if (IsNewDbgInfoFormat)
+ const_cast<Module &>(M).convertToNewDbgValues();
}
void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) {
+ // RemoveDIs: as there's no textual form for DPValues yet, print debug-info
+ // in dbg.value format.
+ bool IsNewDbgInfoFormat = MF.getFunction().IsNewDbgInfoFormat;
+ if (IsNewDbgInfoFormat)
+ const_cast<Function &>(MF.getFunction()).convertFromNewDbgValues();
+
MIRPrinter Printer(OS);
Printer.print(MF);
+
+ if (IsNewDbgInfoFormat)
+ const_cast<Function &>(MF.getFunction()).convertToNewDbgValues();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 96f8589e682d..42d0aba4b166 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -61,7 +61,9 @@ static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
cl::init(false),
cl::desc("View BFI after MIR loader"));
+namespace llvm {
extern cl::opt<bool> ImprovedFSDiscriminator;
+}
char MIRProfileLoaderPass::ID = 0;
INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
index 7b3746fde503..114e7910dc27 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
@@ -21,7 +21,7 @@
#include "llvm/Analysis/NoInferenceModelRunner.h"
#include "llvm/Analysis/Utils/TrainingLogger.h"
#endif
-#include "MLRegallocEvictAdvisor.h"
+#include "MLRegAllocEvictAdvisor.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
@@ -48,8 +48,8 @@ using namespace llvm;
// Generated header in release (AOT) mode
#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
-#include "RegallocEvictModel.h"
-using CompiledModelType = RegallocEvictModel;
+#include "RegAllocEvictModel.h"
+using CompiledModelType = RegAllocEvictModel;
#else
using CompiledModelType = NoopSavedModelImpl;
#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h
index e36a41154096..e36a41154096 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp
index 422781593a9c..422781593a9c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 231544494c32..4410fb7ecd23 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -223,13 +223,13 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I,
- bool SkipPseudoOp) {
+ Register Reg, bool SkipPseudoOp) {
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
iterator E = end();
while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() ||
(SkipPseudoOp && I->isPseudoProbe()) ||
- TII->isBasicBlockPrologue(*I)))
+ TII->isBasicBlockPrologue(*I, Reg)))
++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values
// inside the bundle.
@@ -567,7 +567,14 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
}
if (getBBID().has_value()) {
os << (hasAttributes ? ", " : " (");
- os << "bb_id " << *getBBID();
+ os << "bb_id " << getBBID()->BaseID;
+ if (getBBID()->CloneID != 0)
+ os << " " << getBBID()->CloneID;
+ hasAttributes = true;
+ }
+ if (CallFrameSize != 0) {
+ os << (hasAttributes ? ", " : " (");
+ os << "call-frame-size " << CallFrameSize;
hasAttributes = true;
}
}
@@ -881,7 +888,7 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
removeSuccessor(OldI);
}
-void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig,
+void MachineBasicBlock::copySuccessor(const MachineBasicBlock *Orig,
succ_iterator I) {
if (!Orig->Probs.empty())
addSuccessor(*I, Orig->getSuccProbability(I));
@@ -955,6 +962,10 @@ const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const {
return Successors.size() == 1 ? Successors[0] : nullptr;
}
+const MachineBasicBlock *MachineBasicBlock::getSinglePredecessor() const {
+ return Predecessors.size() == 1 ? Predecessors[0] : nullptr;
+}
+
MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) {
MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
@@ -1088,6 +1099,36 @@ static bool jumpTableHasOtherUses(const MachineFunction &MF,
return false;
}
+class SlotIndexUpdateDelegate : public MachineFunction::Delegate {
+private:
+ MachineFunction &MF;
+ SlotIndexes *Indexes;
+ SmallSetVector<MachineInstr *, 2> Insertions;
+
+public:
+ SlotIndexUpdateDelegate(MachineFunction &MF, SlotIndexes *Indexes)
+ : MF(MF), Indexes(Indexes) {
+ MF.setDelegate(this);
+ }
+
+ ~SlotIndexUpdateDelegate() {
+ MF.resetDelegate(this);
+ for (auto MI : Insertions)
+ Indexes->insertMachineInstrInMaps(*MI);
+ }
+
+ void MF_HandleInsertion(MachineInstr &MI) override {
+ // This is called before MI is inserted into block so defer index update.
+ if (Indexes)
+ Insertions.insert(&MI);
+ }
+
+ void MF_HandleRemoval(MachineInstr &MI) override {
+ if (Indexes && !Insertions.remove(&MI))
+ Indexes->removeMachineInstrFromMaps(MI);
+ }
+};
+
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *Succ, Pass &P,
std::vector<SparseBitVector<>> *LiveInSets) {
@@ -1099,6 +1140,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
DebugLoc DL; // FIXME: this is nowhere
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ NMBB->setCallFrameSize(Succ->getCallFrameSize());
// Is there an indirect jump with jump table?
bool ChangedIndirectJump = false;
@@ -1160,51 +1202,23 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
ReplaceUsesOfBlockWith(Succ, NMBB);
- // If updateTerminator() removes instructions, we need to remove them from
- // SlotIndexes.
- SmallVector<MachineInstr*, 4> Terminators;
- if (Indexes) {
- for (MachineInstr &MI :
- llvm::make_range(getFirstInstrTerminator(), instr_end()))
- Terminators.push_back(&MI);
- }
-
// Since we replaced all uses of Succ with NMBB, that should also be treated
// as the fallthrough successor
if (Succ == PrevFallthrough)
PrevFallthrough = NMBB;
- if (!ChangedIndirectJump)
+ if (!ChangedIndirectJump) {
+ SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes);
updateTerminator(PrevFallthrough);
-
- if (Indexes) {
- SmallVector<MachineInstr*, 4> NewTerminators;
- for (MachineInstr &MI :
- llvm::make_range(getFirstInstrTerminator(), instr_end()))
- NewTerminators.push_back(&MI);
-
- for (MachineInstr *Terminator : Terminators) {
- if (!is_contained(NewTerminators, Terminator))
- Indexes->removeMachineInstrFromMaps(*Terminator);
- }
}
// Insert unconditional "jump Succ" instruction in NMBB if necessary.
NMBB->addSuccessor(Succ);
if (!NMBB->isLayoutSuccessor(Succ)) {
+ SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes);
SmallVector<MachineOperand, 4> Cond;
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL);
-
- if (Indexes) {
- for (MachineInstr &MI : NMBB->instrs()) {
- // Some instructions may have been moved to NMBB by updateTerminator(),
- // so we first remove any instruction that already has an index.
- if (Indexes->hasIndex(MI))
- Indexes->removeMachineInstrFromMaps(MI);
- Indexes->insertMachineInstrInMaps(MI);
- }
- }
}
// Fix PHI nodes in Succ so they refer to NMBB instead of this.
@@ -1269,6 +1283,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
assert(VNI &&
"PHI sources should be live out of their predecessors.");
LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
+ for (auto &SR : LI.subranges())
+ SR.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
}
}
}
@@ -1288,8 +1304,16 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
assert(VNI && "LiveInterval should have VNInfo where it is live.");
LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
+ // Update subranges with live values
+ for (auto &SR : LI.subranges()) {
+ VNInfo *VNI = SR.getVNInfoAt(PrevIndex);
+ if (VNI)
+ SR.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
+ }
} else if (!isLiveOut && !isLastMBB) {
LI.removeSegment(StartIndex, EndIndex);
+ for (auto &SR : LI.subranges())
+ SR.removeSegment(StartIndex, EndIndex);
}
}
@@ -1730,11 +1754,6 @@ bool MachineBasicBlock::sizeWithoutDebugLargerThan(unsigned Limit) const {
return false;
}
-unsigned MachineBasicBlock::getBBIDOrNumber() const {
- uint8_t BBAddrMapVersion = getParent()->getContext().getBBAddrMapVersion();
- return BBAddrMapVersion < 2 ? getNumber() : *getBBID();
-}
-
const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold);
const MBBSectionID
MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index b1cbe525d7e6..7ee72e214426 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -75,7 +75,7 @@ static cl::opt<bool> PrintMachineBlockFreq(
// Command line option to specify the name of the function for block frequency
// dump. Defined in Analysis/BlockFrequencyInfo.cpp.
-extern cl::opt<std::string> PrintBlockFreqFuncName;
+extern cl::opt<std::string> PrintBFIFuncName;
} // namespace llvm
static GVDAGType getGVDT() {
@@ -203,8 +203,7 @@ void MachineBlockFrequencyInfo::calculate(
view("MachineBlockFrequencyDAGS." + F.getName());
}
if (PrintMachineBlockFreq &&
- (PrintBlockFreqFuncName.empty() ||
- F.getName().equals(PrintBlockFreqFuncName))) {
+ (PrintBFIFuncName.empty() || F.getName().equals(PrintBFIFuncName))) {
MBFI->print(dbgs());
}
}
@@ -228,7 +227,7 @@ void MachineBlockFrequencyInfo::view(const Twine &Name, bool isSimple) const {
BlockFrequency
MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const {
- return MBFI ? MBFI->getBlockFreq(MBB) : 0;
+ return MBFI ? MBFI->getBlockFreq(MBB) : BlockFrequency(0);
}
std::optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
@@ -241,7 +240,7 @@ std::optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
}
std::optional<uint64_t>
-MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+MachineBlockFrequencyInfo::getProfileCountFromFreq(BlockFrequency Freq) const {
if (!MBFI)
return std::nullopt;
@@ -263,7 +262,7 @@ void MachineBlockFrequencyInfo::onEdgeSplit(
auto NewSuccFreq = MBFI->getBlockFreq(&NewPredecessor) *
MBPI.getEdgeProbability(&NewPredecessor, &NewSuccessor);
- MBFI->setBlockFreq(&NewSuccessor, NewSuccFreq.getFrequency());
+ MBFI->setBlockFreq(&NewSuccessor, NewSuccFreq);
}
const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
@@ -274,18 +273,18 @@ const MachineBranchProbabilityInfo *MachineBlockFrequencyInfo::getMBPI() const {
return MBFI ? &MBFI->getBPI() : nullptr;
}
-raw_ostream &
-MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
- const BlockFrequency Freq) const {
- return MBFI ? MBFI->printBlockFreq(OS, Freq) : OS;
+BlockFrequency MachineBlockFrequencyInfo::getEntryFreq() const {
+ return MBFI ? MBFI->getEntryFreq() : BlockFrequency(0);
}
-raw_ostream &
-MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
- const MachineBasicBlock *MBB) const {
- return MBFI ? MBFI->printBlockFreq(OS, MBB) : OS;
+Printable llvm::printBlockFreq(const MachineBlockFrequencyInfo &MBFI,
+ BlockFrequency Freq) {
+ return Printable([&MBFI, Freq](raw_ostream &OS) {
+ printBlockFreqImpl(OS, MBFI.getEntryFreq(), Freq);
+ });
}
-uint64_t MachineBlockFrequencyInfo::getEntryFreq() const {
- return MBFI ? MBFI->getEntryFreq() : 0;
+Printable llvm::printBlockFreq(const MachineBlockFrequencyInfo &MBFI,
+ const MachineBasicBlock &MBB) {
+ return printBlockFreq(MBFI, MBFI.getBlockFreq(&MBB));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 912e9ec993e3..a7a839688ddf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -444,9 +444,9 @@ class MachineBlockPlacement : public MachineFunctionPass {
if (UseProfileCount) {
auto Count = MBFI->getBlockProfileCount(BB);
if (Count)
- return *Count;
+ return BlockFrequency(*Count);
else
- return 0;
+ return BlockFrequency(0);
} else
return MBFI->getBlockFreq(BB);
}
@@ -795,10 +795,10 @@ bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
/// penalty is less than 100%
/// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere.
static bool greaterWithBias(BlockFrequency A, BlockFrequency B,
- uint64_t EntryFreq) {
+ BlockFrequency EntryFreq) {
BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
BlockFrequency Gain = A - B;
- return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
+ return (Gain / ThresholdProb) >= EntryFreq;
}
/// Check the edge frequencies to see if tail duplication will increase
@@ -843,7 +843,7 @@ bool MachineBlockPlacement::isProfitableToTailDup(
auto SuccFreq = MBFI->getBlockFreq(Succ);
BlockFrequency P = BBFreq * PProb;
BlockFrequency Qout = BBFreq * QProb;
- uint64_t EntryFreq = MBFI->getEntryFreq();
+ BlockFrequency EntryFreq = MBFI->getEntryFreq();
// If there are no more successors, it is profitable to copy, as it strictly
// increases fallthrough.
if (SuccSuccs.size() == 0)
@@ -1729,8 +1729,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
"Found CFG-violating block");
BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
- LLVM_DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
- MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+ LLVM_DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "
+ << printBlockFreq(MBFI->getMBFI(), CandidateFreq)
+ << " (freq)\n");
// For ehpad, we layout the least probable first as to avoid jumping back
// from least probable landingpads to more probable ones.
@@ -1927,7 +1928,7 @@ BlockFrequency
MachineBlockPlacement::TopFallThroughFreq(
const MachineBasicBlock *Top,
const BlockFilterSet &LoopBlockSet) {
- BlockFrequency MaxFreq = 0;
+ BlockFrequency MaxFreq = BlockFrequency(0);
for (MachineBasicBlock *Pred : Top->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (!LoopBlockSet.count(Pred) &&
@@ -1986,7 +1987,7 @@ MachineBlockPlacement::FallThroughGains(
const MachineBasicBlock *ExitBB,
const BlockFilterSet &LoopBlockSet) {
BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
- BlockFrequency FallThrough2Exit = 0;
+ BlockFrequency FallThrough2Exit = BlockFrequency(0);
if (ExitBB)
FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
MBPI->getEdgeProbability(NewTop, ExitBB);
@@ -1994,58 +1995,58 @@ MachineBlockPlacement::FallThroughGains(
MBPI->getEdgeProbability(NewTop, OldTop);
// Find the best Pred of NewTop.
- MachineBasicBlock *BestPred = nullptr;
- BlockFrequency FallThroughFromPred = 0;
- for (MachineBasicBlock *Pred : NewTop->predecessors()) {
- if (!LoopBlockSet.count(Pred))
- continue;
- BlockChain *PredChain = BlockToChain[Pred];
- if (!PredChain || Pred == *std::prev(PredChain->end())) {
- BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
- MBPI->getEdgeProbability(Pred, NewTop);
- if (EdgeFreq > FallThroughFromPred) {
- FallThroughFromPred = EdgeFreq;
- BestPred = Pred;
- }
- }
- }
-
- // If NewTop is not placed after Pred, another successor can be placed
- // after Pred.
- BlockFrequency NewFreq = 0;
- if (BestPred) {
- for (MachineBasicBlock *Succ : BestPred->successors()) {
- if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
- continue;
- if (ComputedEdges.contains(Succ))
- continue;
- BlockChain *SuccChain = BlockToChain[Succ];
- if ((SuccChain && (Succ != *SuccChain->begin())) ||
- (SuccChain == BlockToChain[BestPred]))
- continue;
- BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
- MBPI->getEdgeProbability(BestPred, Succ);
- if (EdgeFreq > NewFreq)
- NewFreq = EdgeFreq;
- }
- BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
- MBPI->getEdgeProbability(BestPred, NewTop);
- if (NewFreq > OrigEdgeFreq) {
- // If NewTop is not the best successor of Pred, then Pred doesn't
- // fallthrough to NewTop. So there is no FallThroughFromPred and
- // NewFreq.
- NewFreq = 0;
- FallThroughFromPred = 0;
- }
- }
-
- BlockFrequency Result = 0;
- BlockFrequency Gains = BackEdgeFreq + NewFreq;
- BlockFrequency Lost = FallThrough2Top + FallThrough2Exit +
- FallThroughFromPred;
- if (Gains > Lost)
- Result = Gains - Lost;
- return Result;
+ MachineBasicBlock *BestPred = nullptr;
+ BlockFrequency FallThroughFromPred = BlockFrequency(0);
+ for (MachineBasicBlock *Pred : NewTop->predecessors()) {
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!PredChain || Pred == *std::prev(PredChain->end())) {
+ BlockFrequency EdgeFreq =
+ MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, NewTop);
+ if (EdgeFreq > FallThroughFromPred) {
+ FallThroughFromPred = EdgeFreq;
+ BestPred = Pred;
+ }
+ }
+ }
+
+ // If NewTop is not placed after Pred, another successor can be placed
+ // after Pred.
+ BlockFrequency NewFreq = BlockFrequency(0);
+ if (BestPred) {
+ for (MachineBasicBlock *Succ : BestPred->successors()) {
+ if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
+ continue;
+ if (ComputedEdges.contains(Succ))
+ continue;
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if ((SuccChain && (Succ != *SuccChain->begin())) ||
+ (SuccChain == BlockToChain[BestPred]))
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, Succ);
+ if (EdgeFreq > NewFreq)
+ NewFreq = EdgeFreq;
+ }
+ BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, NewTop);
+ if (NewFreq > OrigEdgeFreq) {
+ // If NewTop is not the best successor of Pred, then Pred doesn't
+ // fallthrough to NewTop. So there is no FallThroughFromPred and
+ // NewFreq.
+ NewFreq = BlockFrequency(0);
+ FallThroughFromPred = BlockFrequency(0);
+ }
+ }
+
+ BlockFrequency Result = BlockFrequency(0);
+ BlockFrequency Gains = BackEdgeFreq + NewFreq;
+ BlockFrequency Lost =
+ FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
+ if (Gains > Lost)
+ Result = Gains - Lost;
+ return Result;
}
/// Helper function of findBestLoopTop. Find the best loop top block
@@ -2087,7 +2088,7 @@ MachineBlockPlacement::findBestLoopTopHelper(
LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
<< "\n");
- BlockFrequency BestGains = 0;
+ BlockFrequency BestGains = BlockFrequency(0);
MachineBasicBlock *BestPred = nullptr;
for (MachineBasicBlock *Pred : OldTop->predecessors()) {
if (!LoopBlockSet.count(Pred))
@@ -2095,8 +2096,8 @@ MachineBlockPlacement::findBestLoopTopHelper(
if (Pred == L.getHeader())
continue;
LLVM_DEBUG(dbgs() << " old top pred: " << getBlockName(Pred) << ", has "
- << Pred->succ_size() << " successors, ";
- MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
+ << Pred->succ_size() << " successors, "
+ << printBlockFreq(MBFI->getMBFI(), *Pred) << " freq\n");
if (Pred->succ_size() > 2)
continue;
@@ -2112,8 +2113,9 @@ MachineBlockPlacement::findBestLoopTopHelper(
BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
LoopBlockSet);
- if ((Gains > 0) && (Gains > BestGains ||
- ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
+ if ((Gains > BlockFrequency(0)) &&
+ (Gains > BestGains ||
+ ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
BestPred = Pred;
BestGains = Gains;
}
@@ -2239,10 +2241,10 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
}
BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
- LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
- << getBlockName(Succ) << " [L:" << SuccLoopDepth
- << "] (";
- MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+ LLVM_DEBUG(
+ dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("
+ << printBlockFreq(MBFI->getMBFI(), ExitEdgeFreq) << ")\n");
// Note that we bias this toward an existing layout successor to retain
// incoming order in the absence of better information. The exit must have
// a frequency higher than the current exit before we consider breaking
@@ -2425,14 +2427,14 @@ void MachineBlockPlacement::rotateLoopWithProfile(
if (ChainHeaderBB->isEntryBlock())
return;
- BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
+ BlockFrequency SmallestRotationCost = BlockFrequency::max();
// A utility lambda that scales up a block frequency by dividing it by a
// branch probability which is the reciprocal of the scale.
auto ScaleBlockFrequency = [](BlockFrequency Freq,
unsigned Scale) -> BlockFrequency {
if (Scale == 0)
- return 0;
+ return BlockFrequency(0);
// Use operator / between BlockFrequency and BranchProbability to implement
// saturating multiplication.
return Freq / BranchProbability(1, Scale);
@@ -2492,7 +2494,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
auto TailBB = *TailIter;
// Calculate the cost by putting this BB to the top.
- BlockFrequency Cost = 0;
+ BlockFrequency Cost = BlockFrequency(0);
// If the current BB is the loop header, we need to take into account the
// cost of the missed fall through edge from outside of the loop to the
@@ -2523,8 +2525,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
if (TailBB->isSuccessor(*Iter)) {
auto TailBBFreq = MBFI->getBlockFreq(TailBB);
if (TailBB->succ_size() == 1)
- Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
- MisfetchCost + JumpInstCost);
+ Cost += ScaleBlockFrequency(TailBBFreq, MisfetchCost + JumpInstCost);
else if (TailBB->succ_size() == 2) {
auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
@@ -2537,8 +2538,8 @@ void MachineBlockPlacement::rotateLoopWithProfile(
}
LLVM_DEBUG(dbgs() << "The cost of loop rotation by making "
- << getBlockName(*Iter)
- << " to the top: " << Cost.getFrequency() << "\n");
+ << getBlockName(*Iter) << " to the top: "
+ << printBlockFreq(MBFI->getMBFI(), Cost) << "\n");
if (Cost < SmallestRotationCost) {
SmallestRotationCost = Cost;
@@ -2918,8 +2919,30 @@ void MachineBlockPlacement::alignBlocks() {
if (!L)
continue;
- const Align Align = TLI->getPrefLoopAlignment(L);
- if (Align == 1)
+ const Align TLIAlign = TLI->getPrefLoopAlignment(L);
+ unsigned MDAlign = 1;
+ MDNode *LoopID = L->getLoopID();
+ if (LoopID) {
+ for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
+ if (MD == nullptr)
+ continue;
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (S == nullptr)
+ continue;
+ if (S->getString() == "llvm.loop.align") {
+ assert(MD->getNumOperands() == 2 &&
+ "per-loop align metadata should have two operands.");
+ MDAlign =
+ mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+ assert(MDAlign >= 1 && "per-loop align value must be positive.");
+ }
+ }
+ }
+
+ // Use max of the TLIAlign and MDAlign
+ const Align LoopAlign = std::max(TLIAlign, Align(MDAlign));
+ if (LoopAlign == 1)
continue; // Don't care about loop alignment.
// If the block is cold relative to the function entry don't waste space
@@ -2958,7 +2981,7 @@ void MachineBlockPlacement::alignBlocks() {
// Force alignment if all the predecessors are jumps. We already checked
// that the block isn't cold above.
if (!LayoutPred->isSuccessor(ChainBB)) {
- ChainBB->setAlignment(Align);
+ ChainBB->setAlignment(LoopAlign);
DetermineMaxAlignmentPadding();
continue;
}
@@ -2971,7 +2994,7 @@ void MachineBlockPlacement::alignBlocks() {
MBPI->getEdgeProbability(LayoutPred, ChainBB);
BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
if (LayoutEdgeFreq <= (Freq * ColdProb)) {
- ChainBB->setAlignment(Align);
+ ChainBB->setAlignment(LoopAlign);
DetermineMaxAlignmentPadding();
}
}
@@ -3090,7 +3113,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
if (RemBB->isEHPad())
RemoveList = EHPadWorkList;
- llvm::erase_value(RemoveList, RemBB);
+ llvm::erase(RemoveList, RemBB);
}
// Handle the filter set
@@ -3159,7 +3182,7 @@ static uint64_t countMBBInstruction(MachineBasicBlock *MBB) {
// So we should scale the threshold accordingly. But the instruction size is not
// available on all targets, so we use the number of instructions instead.
BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
- return DupThreshold.getFrequency() * countMBBInstruction(BB);
+ return BlockFrequency(DupThreshold.getFrequency() * countMBBInstruction(BB));
}
// Returns true if BB is Pred's best successor.
@@ -3313,7 +3336,7 @@ void MachineBlockPlacement::findDuplicateCandidates(
}
void MachineBlockPlacement::initDupThreshold() {
- DupThreshold = 0;
+ DupThreshold = BlockFrequency(0);
if (!F->getFunction().hasProfileData())
return;
@@ -3321,12 +3344,13 @@ void MachineBlockPlacement::initDupThreshold() {
uint64_t HotThreshold = PSI->getOrCompHotCountThreshold();
if (HotThreshold != UINT64_MAX) {
UseProfileCount = true;
- DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100;
+ DupThreshold =
+ BlockFrequency(HotThreshold * TailDupProfilePercentThreshold / 100);
return;
}
// Profile count is not available, we can use block frequency instead.
- BlockFrequency MaxFreq = 0;
+ BlockFrequency MaxFreq = BlockFrequency(0);
for (MachineBasicBlock &MBB : *F) {
BlockFrequency Freq = MBFI->getBlockFreq(&MBB);
if (Freq > MaxFreq)
@@ -3334,7 +3358,7 @@ void MachineBlockPlacement::initDupThreshold() {
}
BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
- DupThreshold = MaxFreq * ThresholdProb;
+ DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);
UseProfileCount = false;
}
@@ -3376,7 +3400,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
// For aggressive optimization, we can adjust some thresholds to be less
// conservative.
- if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
+ if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) {
// At O3 we should be more willing to copy blocks for tail duplication. This
// increases size pressure, so we only do it at O3
// Do this unless only the regular threshold is explicitly set.
@@ -3388,7 +3412,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
// If there's no threshold provided through options, query the target
// information for a threshold instead.
if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
- (PassConfig->getOptLevel() < CodeGenOpt::Aggressive ||
+ (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive ||
TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
@@ -3501,7 +3525,7 @@ void MachineBlockPlacement::applyExtTsp() {
auto BlockSizes = std::vector<uint64_t>(F->size());
auto BlockCounts = std::vector<uint64_t>(F->size());
- std::vector<EdgeCountT> JumpCounts;
+ std::vector<codelayout::EdgeCount> JumpCounts;
for (MachineBasicBlock &MBB : *F) {
// Getting the block frequency.
BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
@@ -3520,8 +3544,8 @@ void MachineBlockPlacement::applyExtTsp() {
for (MachineBasicBlock *Succ : MBB.successors()) {
auto EP = MBPI->getEdgeProbability(&MBB, Succ);
BlockFrequency JumpFreq = BlockFreq * EP;
- auto Jump = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]);
- JumpCounts.push_back(std::make_pair(Jump, JumpFreq.getFrequency()));
+ JumpCounts.push_back(
+ {BlockIndex[&MBB], BlockIndex[Succ], JumpFreq.getFrequency()});
}
}
@@ -3534,7 +3558,7 @@ void MachineBlockPlacement::applyExtTsp() {
calcExtTspScore(BlockSizes, BlockCounts, JumpCounts)));
// Run the layout algorithm.
- auto NewOrder = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
+ auto NewOrder = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
std::vector<const MachineBasicBlock *> NewBlockOrder;
NewBlockOrder.reserve(F->size());
for (uint64_t Node : NewOrder) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
index f879c5fcf20c..26a8d00e6626 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -44,7 +44,6 @@
#include <cassert>
#include <iterator>
#include <utility>
-#include <vector>
using namespace llvm;
@@ -65,6 +64,10 @@ static cl::opt<int>
CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
cl::desc("Threshold for the size of CSUses"));
+static cl::opt<bool> AggressiveMachineCSE(
+ "aggressive-machine-cse", cl::Hidden, cl::init(false),
+ cl::desc("Override the profitability heuristics for Machine CSE"));
+
namespace {
class MachineCSE : public MachineFunctionPass {
@@ -403,7 +406,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
bool MachineCSE::isCSECandidate(MachineInstr *MI) {
if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
- MI->isInlineAsm() || MI->isDebugInstr())
+ MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo())
return false;
// Ignore copies.
@@ -439,6 +442,9 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
/// defined.
bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
MachineBasicBlock *CSBB, MachineInstr *MI) {
+ if (AggressiveMachineCSE)
+ return true;
+
// FIXME: Heuristics that works around the lack the live range splitting.
// If CSReg is used at all uses of Reg, CSE should not increase register
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 3453e6c0b8be..a032b31a1fc7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -134,28 +134,31 @@ public:
const TargetInstrInfo &TII, bool UseCopyInstr) {
// Since Reg might be a subreg of some registers, only invalidate Reg is not
// enough. We have to find the COPY defines Reg or registers defined by Reg
- // and invalidate all of them.
- SmallSet<MCRegister, 8> RegsToInvalidate;
- RegsToInvalidate.insert(Reg);
+ // and invalidate all of them. Similarly, we must invalidate all of the
+ // the subregisters used in the source of the COPY.
+ SmallSet<MCRegUnit, 8> RegUnitsToInvalidate;
+ auto InvalidateCopy = [&](MachineInstr *MI) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MI, TII, UseCopyInstr);
+ assert(CopyOperands && "Expect copy");
+
+ auto Dest = TRI.regunits(CopyOperands->Destination->getReg().asMCReg());
+ auto Src = TRI.regunits(CopyOperands->Source->getReg().asMCReg());
+ RegUnitsToInvalidate.insert(Dest.begin(), Dest.end());
+ RegUnitsToInvalidate.insert(Src.begin(), Src.end());
+ };
+
for (MCRegUnit Unit : TRI.regunits(Reg)) {
auto I = Copies.find(Unit);
if (I != Copies.end()) {
- if (MachineInstr *MI = I->second.MI) {
- std::optional<DestSourcePair> CopyOperands =
- isCopyInstr(*MI, TII, UseCopyInstr);
- assert(CopyOperands && "Expect copy");
-
- RegsToInvalidate.insert(
- CopyOperands->Destination->getReg().asMCReg());
- RegsToInvalidate.insert(CopyOperands->Source->getReg().asMCReg());
- }
- RegsToInvalidate.insert(I->second.DefRegs.begin(),
- I->second.DefRegs.end());
+ if (MachineInstr *MI = I->second.MI)
+ InvalidateCopy(MI);
+ if (MachineInstr *MI = I->second.LastSeenUseInCopy)
+ InvalidateCopy(MI);
}
}
- for (MCRegister InvalidReg : RegsToInvalidate)
- for (MCRegUnit Unit : TRI.regunits(InvalidReg))
- Copies.erase(Unit);
+ for (MCRegUnit Unit : RegUnitsToInvalidate)
+ Copies.erase(Unit);
}
/// Clobber a single register, removing it from the tracker's copy maps.
@@ -1144,11 +1147,11 @@ void MachineCopyPropagation::EliminateSpillageCopies(MachineBasicBlock &MBB) {
return;
// If violate property#2, we don't fold the chain.
- for (const MachineInstr *Spill : make_range(SC.begin() + 1, SC.end()))
+ for (const MachineInstr *Spill : drop_begin(SC))
if (CopySourceInvalid.count(Spill))
return;
- for (const MachineInstr *Reload : make_range(RC.begin(), RC.end() - 1))
+ for (const MachineInstr *Reload : drop_end(RC))
if (CopySourceInvalid.count(Reload))
return;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 88939e96e07f..57af571ed9bf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -178,6 +179,12 @@ void MachineFunction::handleRemoval(MachineInstr &MI) {
TheDelegate->MF_HandleRemoval(MI);
}
+void MachineFunction::handleChangeDesc(MachineInstr &MI,
+ const MCInstrDesc &TID) {
+ if (TheDelegate)
+ TheDelegate->MF_HandleChangeDesc(MI, TID);
+}
+
void MachineFunction::init() {
// Assume the function starts in SSA form with correct liveness.
Properties.set(MachineFunctionProperties::Property::IsSSA);
@@ -451,16 +458,17 @@ void MachineFunction::deleteMachineInstr(MachineInstr *MI) {
/// Allocate a new MachineBasicBlock. Use this instead of
/// `new MachineBasicBlock'.
MachineBasicBlock *
-MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB,
+ std::optional<UniqueBBID> BBID) {
MachineBasicBlock *MBB =
new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
- MachineBasicBlock(*this, bb);
+ MachineBasicBlock(*this, BB);
// Set BBID for `-basic-block=sections=labels` and
// `-basic-block-sections=list` to allow robust mapping of profiles to basic
// blocks.
if (Target.getBBSectionsType() == BasicBlockSection::Labels ||
Target.getBBSectionsType() == BasicBlockSection::List)
- MBB->setBBID(NextBBID++);
+ MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0});
return MBB;
}
@@ -1206,7 +1214,7 @@ bool MachineFunction::shouldUseDebugInstrRef() const {
// have optimized code inlined into this unoptimized code, however with
// fewer and less aggressive optimizations happening, coverage and accuracy
// should not suffer.
- if (getTarget().getOptLevel() == CodeGenOpt::None)
+ if (getTarget().getOptLevel() == CodeGenOptLevel::None)
return false;
// Don't use instr-ref if this function is marked optnone.
@@ -1244,6 +1252,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
case MachineJumpTableInfo::EK_BlockAddress:
return TD.getPointerSize();
case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference64:
return 8;
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
case MachineJumpTableInfo::EK_LabelDifference32:
@@ -1264,6 +1273,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
case MachineJumpTableInfo::EK_BlockAddress:
return TD.getPointerABIAlignment(0).value();
case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference64:
return TD.getABIIntegerTypeAlignment(64).value();
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
case MachineJumpTableInfo::EK_LabelDifference32:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 3a1e1720be9c..d57a912f418b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -88,6 +88,8 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
MF.print(OS);
}
+ MFProps.reset(ClearedProperties);
+
bool RV = runOnMachineFunction(MF);
if (ShouldEmitSizeRemarks) {
@@ -114,7 +116,6 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
}
MFProps.set(SetProperties);
- MFProps.reset(ClearedProperties);
// For --print-changed, print if the serialized MF has changed. Modes other
// than quiet/verbose are unimplemented and treated the same as 'quiet'.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index fbc071536d22..38c1c56d2823 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
@@ -108,6 +109,12 @@ static bool isColdBlock(const MachineBasicBlock &MBB,
const MachineBlockFrequencyInfo *MBFI,
ProfileSummaryInfo *PSI) {
std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
+
+ // Temporary hack to cope with AArch64's jump table encoding
+ const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
+ if (!TII.isMBBSafeToSplitToCold(MBB))
+ return false;
+
// For instrumentation profiles and sample profiles, we use different ways
// to judge whether a block is cold and should be split.
if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) {
@@ -135,22 +142,10 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
if (!UseProfileData && !SplitAllEHCode)
return false;
- // TODO: We don't split functions where a section attribute has been set
- // since the split part may not be placed in a contiguous region. It may also
- // be more beneficial to augment the linker to ensure contiguous layout of
- // split functions within the same section as specified by the attribute.
- if (MF.getFunction().hasSection() ||
- MF.getFunction().hasFnAttribute("implicit-section-name"))
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ if (!TII.isFunctionSafeToSplit(MF))
return false;
- // We don't want to proceed further for cold functions
- // or functions of unknown hotness. Lukewarm functions have no prefix.
- std::optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
- if (SectionPrefix &&
- (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) {
- return false;
- }
-
// Renumbering blocks here preserves the order of the blocks as
// sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
// blocks. Preserving the order of blocks is essential to retaining decisions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index a9309487a7a7..27eae372f8ad 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -138,6 +138,12 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
setFlags(MI.Flags);
}
+void MachineInstr::setDesc(const MCInstrDesc &TID) {
+ if (getParent())
+ getMF()->handleChangeDesc(*this, TID);
+ MCID = &TID;
+}
+
void MachineInstr::moveBefore(MachineInstr *MovePos) {
MovePos->getParent()->splice(MovePos, getParent(), getIterator());
}
@@ -225,10 +231,6 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
// OpNo now points as the desired insertion point. Unless this is a variadic
// instruction, only implicit regs are allowed beyond MCID->getNumOperands().
// RegMask operands go between the explicit and implicit operands.
- assert((MCID->isVariadic() || OpNo < MCID->getNumOperands() ||
- Op.isValidExcessOperand()) &&
- "Trying to add an operand to a machine instr that is already done!");
-
MachineRegisterInfo *MRI = getRegInfo();
// Determine if the Operands array needs to be reallocated.
@@ -845,7 +847,8 @@ int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
// If we reach the implicit register operands, stop looking.
if (!FlagMO.isImm())
return -1;
- NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ const InlineAsm::Flag F(FlagMO.getImm());
+ NumOps = 1 + F.getNumOperandRegisters();
if (i + NumOps > OpIdx) {
if (GroupNo)
*GroupNo = Group;
@@ -922,16 +925,14 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
if (FlagIdx < 0)
return nullptr;
- unsigned Flag = getOperand(FlagIdx).getImm();
+ const InlineAsm::Flag F(getOperand(FlagIdx).getImm());
unsigned RCID;
- if ((InlineAsm::getKind(Flag) == InlineAsm::Kind_RegUse ||
- InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDef ||
- InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDefEarlyClobber) &&
- InlineAsm::hasRegClassConstraint(Flag, RCID))
+ if ((F.isRegUseKind() || F.isRegDefKind() || F.isRegDefEarlyClobberKind()) &&
+ F.hasRegClassConstraint(RCID))
return TRI->getRegClass(RCID);
// Assume that all registers in a memory operand are pointers.
- if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
+ if (F.isMemKind())
return TRI->getPointerRegClass(MF);
return nullptr;
@@ -1196,12 +1197,13 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
assert(FlagMO.isImm() && "Invalid tied operand on inline asm");
unsigned CurGroup = GroupIdx.size();
GroupIdx.push_back(i);
- NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ const InlineAsm::Flag F(FlagMO.getImm());
+ NumOps = 1 + F.getNumOperandRegisters();
// OpIdx belongs to this operand group.
if (OpIdx > i && OpIdx < i + NumOps)
OpIdxGroup = CurGroup;
unsigned TiedGroup;
- if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup))
+ if (!F.isUseOperandTiedToDef(TiedGroup))
continue;
// Operands in this group are tied to operands in TiedGroup which must be
// earlier. Find the number of operands between the two groups.
@@ -1263,7 +1265,8 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const {
}
if (isPosition() || isDebugInstr() || isTerminator() ||
- mayRaiseFPException() || hasUnmodeledSideEffects())
+ mayRaiseFPException() || hasUnmodeledSideEffects() ||
+ isJumpTableDebugInfo())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
@@ -1497,6 +1500,16 @@ bool MachineInstr::allDefsAreDead() const {
return true;
}
+bool MachineInstr::allImplicitDefsAreDead() const {
+ for (const MachineOperand &MO : implicit_operands()) {
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ if (!MO.isDead())
+ return false;
+ }
+ return true;
+}
+
/// copyImplicitOps - Copy implicit register operands from specified
/// instruction to this instruction.
void MachineInstr::copyImplicitOps(MachineFunction &MF,
@@ -1754,31 +1767,37 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Pretty print the inline asm operand descriptor.
OS << '$' << AsmOpCount++;
unsigned Flag = MO.getImm();
+ const InlineAsm::Flag F(Flag);
OS << ":[";
- OS << InlineAsm::getKindName(InlineAsm::getKind(Flag));
+ OS << F.getKindName();
- unsigned RCID = 0;
- if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
- InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ unsigned RCID;
+ if (!F.isImmKind() && !F.isMemKind() && F.hasRegClassConstraint(RCID)) {
if (TRI) {
OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
} else
OS << ":RC" << RCID;
}
- if (InlineAsm::isMemKind(Flag)) {
- unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
+ if (F.isMemKind()) {
+ const InlineAsm::ConstraintCode MCID = F.getMemoryConstraintID();
OS << ":" << InlineAsm::getMemConstraintName(MCID);
}
- unsigned TiedTo = 0;
- if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ unsigned TiedTo;
+ if (F.isUseOperandTiedToDef(TiedTo))
OS << " tiedto:$" << TiedTo;
+ if ((F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
+ F.isRegUseKind()) &&
+ F.getRegMayBeFolded()) {
+ OS << " foldable";
+ }
+
OS << ']';
// Compute the index of the next operand descriptor.
- AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
+ AsmDescOp += 1 + F.getNumOperandRegisters();
} else {
LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(i);
@@ -1883,16 +1902,20 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
DL.print(OS);
}
- // Print extra comments for DEBUG_VALUE.
- if (isDebugValueLike() && getDebugVariableOp().isMetadata()) {
- if (!HaveSemi) {
- OS << ";";
- HaveSemi = true;
+ // Print extra comments for DEBUG_VALUE and friends if they are well-formed.
+ if ((isNonListDebugValue() && getNumOperands() >= 4) ||
+ (isDebugValueList() && getNumOperands() >= 2) ||
+ (isDebugRef() && getNumOperands() >= 3)) {
+ if (getDebugVariableOp().isMetadata()) {
+ if (!HaveSemi) {
+ OS << ";";
+ HaveSemi = true;
+ }
+ auto *DV = getDebugVariable();
+ OS << " line no:" << DV->getLine();
+ if (isIndirectDebugValue())
+ OS << " indirect";
}
- auto *DV = getDebugVariable();
- OS << " line no:" << DV->getLine();
- if (isIndirectDebugValue())
- OS << " indirect";
}
// TODO: DBG_LABEL
@@ -2460,3 +2483,65 @@ MachineInstr::getFirst5RegLLTs() const {
Reg2, getRegInfo()->getType(Reg2), Reg3, getRegInfo()->getType(Reg3),
Reg4, getRegInfo()->getType(Reg4));
}
+
+void MachineInstr::insert(mop_iterator InsertBefore,
+ ArrayRef<MachineOperand> Ops) {
+ assert(InsertBefore != nullptr && "invalid iterator");
+ assert(InsertBefore->getParent() == this &&
+ "iterator points to operand of other inst");
+ if (Ops.empty())
+ return;
+
+ // Do one pass to untie operands.
+ SmallDenseMap<unsigned, unsigned> TiedOpIndices;
+ for (const MachineOperand &MO : operands()) {
+ if (MO.isReg() && MO.isTied()) {
+ unsigned OpNo = getOperandNo(&MO);
+ unsigned TiedTo = findTiedOperandIdx(OpNo);
+ TiedOpIndices[OpNo] = TiedTo;
+ untieRegOperand(OpNo);
+ }
+ }
+
+ unsigned OpIdx = getOperandNo(InsertBefore);
+ unsigned NumOperands = getNumOperands();
+ unsigned OpsToMove = NumOperands - OpIdx;
+
+ SmallVector<MachineOperand> MovingOps;
+ MovingOps.reserve(OpsToMove);
+
+ for (unsigned I = 0; I < OpsToMove; ++I) {
+ MovingOps.emplace_back(getOperand(OpIdx));
+ removeOperand(OpIdx);
+ }
+ for (const MachineOperand &MO : Ops)
+ addOperand(MO);
+ for (const MachineOperand &OpMoved : MovingOps)
+ addOperand(OpMoved);
+
+ // Re-tie operands.
+ for (auto [Tie1, Tie2] : TiedOpIndices) {
+ if (Tie1 >= OpIdx)
+ Tie1 += Ops.size();
+ if (Tie2 >= OpIdx)
+ Tie2 += Ops.size();
+ tieOperands(Tie1, Tie2);
+ }
+}
+
+bool MachineInstr::mayFoldInlineAsmRegOp(unsigned OpId) const {
+ assert(OpId && "expected non-zero operand id");
+ assert(isInlineAsm() && "should only be used on inline asm");
+
+ if (!getOperand(OpId).isReg())
+ return false;
+
+ const MachineOperand &MD = getOperand(OpId - 1);
+ if (!MD.isImm())
+ return false;
+
+ InlineAsm::Flag F(MD.getImm());
+ if (F.isRegUseKind() || F.isRegDefKind() || F.isRegDefEarlyClobberKind())
+ return F.getRegMayBeFolded();
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
index 523e077fd9a2..efc19f8fdbf8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -72,6 +72,11 @@ static cl::opt<bool>
HoistConstStores("hoist-const-stores",
cl::desc("Hoist invariant stores"),
cl::init(true), cl::Hidden);
+
+static cl::opt<bool> HoistConstLoads("hoist-const-loads",
+ cl::desc("Hoist invariant loads"),
+ cl::init(true), cl::Hidden);
+
// The default threshold of 100 (i.e. if target block is 100 times hotter)
// is based on empirical data on a single target and is subject to tuning.
static cl::opt<unsigned>
@@ -110,6 +115,7 @@ STATISTIC(NumNotHoistedDueToHotness,
"Number of instructions not hoisted due to block frequency");
namespace {
+ enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
class MachineLICMBase : public MachineFunctionPass {
const TargetInstrInfo *TII = nullptr;
@@ -130,13 +136,21 @@ namespace {
// State that is updated as we process loops
bool Changed = false; // True if a loop is changed.
bool FirstInLoop = false; // True if it's the first LICM in the loop.
- MachineLoop *CurLoop = nullptr; // The current loop we are working on.
- MachineBasicBlock *CurPreheader = nullptr; // The preheader for CurLoop.
- // Exit blocks for CurLoop.
- SmallVector<MachineBasicBlock *, 8> ExitBlocks;
+ // Holds information about whether it is allowed to move load instructions
+ // out of the loop
+ SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
+
+ // Exit blocks of each Loop.
+ DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
+
+ bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) {
+ if (ExitBlockMap.contains(CurLoop))
+ return is_contained(ExitBlockMap[CurLoop], MBB);
- bool isExitBlock(const MachineBasicBlock *MBB) const {
+ SmallVector<MachineBasicBlock *, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+ ExitBlockMap[CurLoop] = ExitBlocks;
return is_contained(ExitBlocks, MBB);
}
@@ -151,8 +165,10 @@ namespace {
// Register pressure on path leading from loop preheader to current BB.
SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
- // For each opcode, keep a list of potential CSE instructions.
- DenseMap<unsigned, std::vector<MachineInstr *>> CSEMap;
+ // For each opcode per preheader, keep a list of potential CSE instructions.
+ DenseMap<MachineBasicBlock *,
+ DenseMap<unsigned, std::vector<MachineInstr *>>>
+ CSEMap;
enum {
SpeculateFalse = 0,
@@ -187,6 +203,7 @@ namespace {
RegLimit.clear();
BackTrace.clear();
CSEMap.clear();
+ ExitBlockMap.clear();
}
private:
@@ -200,24 +217,27 @@ namespace {
: MI(mi), Def(def), FI(fi) {}
};
- void HoistRegionPostRA();
+ void HoistRegionPostRA(MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader);
- void HoistPostRA(MachineInstr *MI, unsigned Def);
+ void HoistPostRA(MachineInstr *MI, unsigned Def, MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader);
void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
- SmallVectorImpl<CandidateInfo> &Candidates);
+ SmallVectorImpl<CandidateInfo> &Candidates,
+ MachineLoop *CurLoop);
- void AddToLiveIns(MCRegister Reg);
+ void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop);
- bool IsLICMCandidate(MachineInstr &I);
+ bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop);
- bool IsLoopInvariantInst(MachineInstr &I);
+ bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop);
- bool HasLoopPHIUse(const MachineInstr *MI) const;
+ bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop);
- bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
- Register Reg) const;
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg,
+ MachineLoop *CurLoop) const;
bool IsCheapInstruction(MachineInstr &MI) const;
@@ -226,9 +246,9 @@ namespace {
void UpdateBackTraceRegPressure(const MachineInstr *MI);
- bool IsProfitableToHoist(MachineInstr &MI);
+ bool IsProfitableToHoist(MachineInstr &MI, MachineLoop *CurLoop);
- bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+ bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
bool isTriviallyReMaterializable(const MachineInstr &MI) const;
@@ -241,7 +261,8 @@ namespace {
DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
- void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
+ void HoistOutOfLoop(MachineDomTreeNode *HeaderN, MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader);
void InitRegPressure(MachineBasicBlock *BB);
@@ -252,7 +273,7 @@ namespace {
void UpdateRegPressure(const MachineInstr *MI,
bool ConsiderUnseenAsDef = false);
- MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+ MachineInstr *ExtractHoistableLoad(MachineInstr *MI, MachineLoop *CurLoop);
MachineInstr *LookForDuplicate(const MachineInstr *MI,
std::vector<MachineInstr *> &PrevMIs);
@@ -263,13 +284,17 @@ namespace {
bool MayCSE(MachineInstr *MI);
- bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
+ unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
+ MachineLoop *CurLoop);
void InitCSEMap(MachineBasicBlock *BB);
+ void InitializeLoadsHoistableLoops();
+
bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
MachineBasicBlock *TgtBlock);
- MachineBasicBlock *getCurPreheader();
+ MachineBasicBlock *getCurPreheader(MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader);
};
class MachineLICM : public MachineLICMBase {
@@ -314,19 +339,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
"Early Machine Loop Invariant Code Motion", false, false)
-/// Test if the given loop is the outer-most loop that has a unique predecessor.
-static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
- // Check whether this loop even has a unique predecessor.
- if (!CurLoop->getLoopPredecessor())
- return false;
- // Ok, now check to see if any of its outer loops do.
- for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
- if (L->getLoopPredecessor())
- return false;
- // None of them did, so this is the outermost with a unique predecessor.
- return true;
-}
-
bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -366,29 +378,22 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTree>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ if (HoistConstLoads)
+ InitializeLoadsHoistableLoops();
+
SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
while (!Worklist.empty()) {
- CurLoop = Worklist.pop_back_val();
- CurPreheader = nullptr;
- ExitBlocks.clear();
-
- // If this is done before regalloc, only visit outer-most preheader-sporting
- // loops.
- if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
- Worklist.append(CurLoop->begin(), CurLoop->end());
- continue;
- }
-
- CurLoop->getExitBlocks(ExitBlocks);
+ MachineLoop *CurLoop = Worklist.pop_back_val();
+ MachineBasicBlock *CurPreheader = nullptr;
if (!PreRegAlloc)
- HoistRegionPostRA();
+ HoistRegionPostRA(CurLoop, CurPreheader);
else {
// CSEMap is initialized for loop header when the first instruction is
// being hoisted.
MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
FirstInLoop = true;
- HoistOutOfLoop(N);
+ HoistOutOfLoop(N, CurLoop, CurPreheader);
CSEMap.clear();
}
}
@@ -420,11 +425,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
/// Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
-void MachineLICMBase::ProcessMI(MachineInstr *MI,
- BitVector &PhysRegDefs,
+void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
BitVector &PhysRegClobbers,
SmallSet<int, 32> &StoredFIs,
- SmallVectorImpl<CandidateInfo> &Candidates) {
+ SmallVectorImpl<CandidateInfo> &Candidates,
+ MachineLoop *CurLoop) {
bool RuledOut = false;
bool HasNonInvariantUse = false;
unsigned Def = 0;
@@ -502,7 +507,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
// operands. FIXME: Consider unfold load folding instructions.
if (Def && !RuledOut) {
int FI = std::numeric_limits<int>::min();
- if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+ if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||
(TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
Candidates.push_back(CandidateInfo(MI, Def, FI));
}
@@ -510,8 +515,9 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
/// Walk the specified region of the CFG and hoist loop invariants out to the
/// preheader.
-void MachineLICMBase::HoistRegionPostRA() {
- MachineBasicBlock *Preheader = getCurPreheader();
+void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader) {
+ MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
if (!Preheader)
return;
@@ -544,7 +550,8 @@ void MachineLICMBase::HoistRegionPostRA() {
SpeculationState = SpeculateUnknown;
for (MachineInstr &MI : *BB)
- ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+ ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates,
+ CurLoop);
}
// Gather the registers read / clobbered by the terminator.
@@ -592,14 +599,14 @@ void MachineLICMBase::HoistRegionPostRA() {
}
}
if (Safe)
- HoistPostRA(MI, Candidate.Def);
+ HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader);
}
}
}
/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
/// sure it is not killed by any instructions in the loop.
-void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
+void MachineLICMBase::AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop) {
for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
@@ -607,7 +614,7 @@ void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
for (MachineOperand &MO : MI.all_uses()) {
if (!MO.getReg())
continue;
- if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+ if (TRI->regsOverlap(Reg, MO.getReg()))
MO.setIsKill(false);
}
}
@@ -616,8 +623,10 @@ void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
/// When an instruction is found to only use loop invariant operands that is
/// safe to hoist, this instruction is called to do the dirty work.
-void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
- MachineBasicBlock *Preheader = getCurPreheader();
+void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def,
+ MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader) {
+ MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
@@ -638,7 +647,7 @@ void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
// Add register to livein list to all the BBs in the current loop since a
// loop invariant must be kept live throughout the whole loop. This is
// important to ensure later passes do not scavenge the def register.
- AddToLiveIns(Def);
+ AddToLiveIns(Def, CurLoop);
++NumPostRAHoisted;
Changed = true;
@@ -646,7 +655,8 @@ void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
/// may not be safe to hoist.
-bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
+bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB,
+ MachineLoop *CurLoop) {
if (SpeculationState != SpeculateUnknown)
return SpeculationState == SpeculateFalse;
@@ -717,8 +727,10 @@ void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
/// specified header block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
-void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
- MachineBasicBlock *Preheader = getCurPreheader();
+void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN,
+ MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader) {
+ MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
if (!Preheader)
return;
@@ -782,10 +794,31 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
// Process the block
SpeculationState = SpeculateUnknown;
for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
- if (!Hoist(&MI, Preheader))
- UpdateRegPressure(&MI);
- // If we have hoisted an instruction that may store, it can only be a
- // constant store.
+ unsigned HoistRes = HoistResult::NotHoisted;
+ HoistRes = Hoist(&MI, Preheader, CurLoop);
+ if (HoistRes & HoistResult::NotHoisted) {
+ // We have failed to hoist MI to outermost loop's preheader. If MI is in
+ // a subloop, try to hoist it to subloop's preheader.
+ SmallVector<MachineLoop *> InnerLoopWorkList;
+ for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop;
+ L = L->getParentLoop())
+ InnerLoopWorkList.push_back(L);
+
+ while (!InnerLoopWorkList.empty()) {
+ MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val();
+ MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
+ if (InnerLoopPreheader) {
+ HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);
+ if (HoistRes & HoistResult::Hoisted)
+ break;
+ }
+ }
+ }
+
+ if (HoistRes & HoistResult::ErasedMI)
+ continue;
+
+ UpdateRegPressure(&MI);
}
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
@@ -970,9 +1003,9 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
/// Returns true if the instruction may be a suitable candidate for LICM.
/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
-bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
+bool MachineLICMBase::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {
// Check if it's safe to move the instruction.
- bool DontMoveAcrossStore = true;
+ bool DontMoveAcrossStore = !HoistConstLoads || !AllowedToHoistLoads[CurLoop];
if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
!(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
@@ -986,7 +1019,7 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
// from a jump table.
// Stores and side effects are already checked by isSafeToMove.
if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
- !IsGuaranteedToExecute(I.getParent())) {
+ !IsGuaranteedToExecute(I.getParent(), CurLoop)) {
LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
return false;
}
@@ -1005,8 +1038,9 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
}
/// Returns true if the instruction is loop invariant.
-bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
- if (!IsLICMCandidate(I)) {
+bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I,
+ MachineLoop *CurLoop) {
+ if (!IsLICMCandidate(I, CurLoop)) {
LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
return false;
}
@@ -1015,8 +1049,9 @@ bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
/// Return true if the specified instruction is used by a phi node and hoisting
/// it could cause a copy to be inserted.
-bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
- SmallVector<const MachineInstr*, 8> Work(1, MI);
+bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI,
+ MachineLoop *CurLoop) {
+ SmallVector<const MachineInstr *, 8> Work(1, MI);
do {
MI = Work.pop_back_val();
for (const MachineOperand &MO : MI->all_defs()) {
@@ -1033,7 +1068,7 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
// A PHI in an exit block can cause a copy to be inserted if the PHI
// has multiple predecessors in the loop with different values.
// For now, approximate by rejecting all exit blocks.
- if (isExitBlock(UseMI.getParent()))
+ if (isExitBlock(CurLoop, UseMI.getParent()))
return true;
continue;
}
@@ -1049,7 +1084,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
/// Compute operand latency between a def of 'Reg' and an use in the current
/// loop, return true if the target considered it high.
bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
- Register Reg) const {
+ Register Reg,
+ MachineLoop *CurLoop) const {
if (MRI->use_nodbg_empty(Reg))
return false;
@@ -1144,7 +1180,8 @@ void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {
/// Return true if it is potentially profitable to hoist the given loop
/// invariant.
-bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
+bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
+ MachineLoop *CurLoop) {
if (MI.isImplicitDef())
return true;
@@ -1164,7 +1201,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
return true;
bool CheapInstr = IsCheapInstruction(MI);
- bool CreatesCopy = HasLoopPHIUse(&MI);
+ bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop);
// Don't hoist a cheap instruction if it would create a copy in the loop.
if (CheapInstr && CreatesCopy) {
@@ -1186,7 +1223,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
- if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
+ if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) {
LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
++NumHighLatency;
return true;
@@ -1220,11 +1257,23 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
// instruction is not guaranteed to be executed in the loop, it's best to be
// conservative.
if (AvoidSpeculation &&
- (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+ (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) {
LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
return false;
}
+ // If we have a COPY with other uses in the loop, hoist to allow the users to
+ // also be hoisted.
+ if (MI.isCopy() && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).getReg().isVirtual() && MI.getOperand(1).isReg() &&
+ MI.getOperand(1).getReg().isVirtual() &&
+ IsLoopInvariantInst(MI, CurLoop) &&
+ any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&CurLoop](MachineInstr &UseMI) {
+ return CurLoop->contains(&UseMI);
+ }))
+ return true;
+
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
if (!isTriviallyReMaterializable(MI) &&
@@ -1239,7 +1288,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
/// Unfold a load from the given machineinstr if the load itself could be
/// hoisted. Return the unfolded and hoistable load, or null if the load
/// couldn't be unfolded or if it wouldn't be hoistable.
-MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
+MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI,
+ MachineLoop *CurLoop) {
// Don't unfold simple loads.
if (MI->canFoldAsLoad())
return nullptr;
@@ -1280,7 +1330,8 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
MBB->insert(Pos, NewMIs[1]);
// If unfolding produced a load that wasn't loop-invariant or profitable to
// hoist, discard the new instructions and bail.
- if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+ if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
+ !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
NewMIs[0]->eraseFromParent();
NewMIs[1]->eraseFromParent();
return nullptr;
@@ -1304,7 +1355,47 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
/// out of the loop.
void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
for (MachineInstr &MI : *BB)
- CSEMap[MI.getOpcode()].push_back(&MI);
+ CSEMap[BB][MI.getOpcode()].push_back(&MI);
+}
+
+/// Initialize AllowedToHoistLoads with information about whether invariant
+/// loads can be moved outside a given loop
+void MachineLICMBase::InitializeLoadsHoistableLoops() {
+ SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+ SmallVector<MachineLoop *, 8> LoopsInPreOrder;
+
+ // Mark all loops as hoistable initially and prepare a list of loops in
+ // pre-order DFS.
+ while (!Worklist.empty()) {
+ auto *L = Worklist.pop_back_val();
+ AllowedToHoistLoads[L] = true;
+ LoopsInPreOrder.push_back(L);
+ Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
+ L->getSubLoops().end());
+ }
+
+ // Going from the innermost to outermost loops, check if a loop has
+ // instructions preventing invariant load hoisting. If such instruction is
+ // found, mark this loop and its parent as non-hoistable and continue
+ // investigating the next loop.
+ // Visiting in a reversed pre-ordered DFS manner
+ // allows us to not process all the instructions of the outer loop if the
+ // inner loop is proved to be non-load-hoistable.
+ for (auto *Loop : reverse(LoopsInPreOrder)) {
+ for (auto *MBB : Loop->blocks()) {
+ // If this loop has already been marked as non-hoistable, skip it.
+ if (!AllowedToHoistLoads[Loop])
+ continue;
+ for (auto &MI : *MBB) {
+ if (!MI.mayStore() && !MI.isCall() &&
+ !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
+ continue;
+ for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())
+ AllowedToHoistLoads[L] = false;
+ break;
+ }
+ }
+ }
}
/// Find an instruction amount PrevMIs that is a duplicate of MI.
@@ -1328,7 +1419,12 @@ bool MachineLICMBase::EliminateCSE(
DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
// the undef property onto uses.
- if (CI == CSEMap.end() || MI->isImplicitDef())
+ if (MI->isImplicitDef())
+ return false;
+
+ // Do not CSE normal loads because between them could be store instructions
+ // that change the loaded value
+ if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
return false;
if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
@@ -1384,21 +1480,32 @@ bool MachineLICMBase::EliminateCSE(
/// Return true if the given instruction will be CSE'd if it's hoisted out of
/// the loop.
bool MachineLICMBase::MayCSE(MachineInstr *MI) {
- unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
- CSEMap.find(Opcode);
- // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
- // the undef property onto uses.
- if (CI == CSEMap.end() || MI->isImplicitDef())
+ if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
return false;
- return LookForDuplicate(MI, CI->second) != nullptr;
+ unsigned Opcode = MI->getOpcode();
+ for (auto &Map : CSEMap) {
+ // Check this CSEMap's preheader dominates MI's basic block.
+ if (DT->dominates(Map.first, MI->getParent())) {
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ Map.second.find(Opcode);
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == Map.second.end() || MI->isImplicitDef())
+ continue;
+ if (LookForDuplicate(MI, CI->second) != nullptr)
+ return true;
+ }
+ }
+
+ return false;
}
/// When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
/// It returns true if the instruction is hoisted.
-bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+unsigned MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
+ MachineLoop *CurLoop) {
MachineBasicBlock *SrcBlock = MI->getParent();
// Disable the instruction hoisting due to block hotness
@@ -1406,13 +1513,17 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
(DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
isTgtHotterThanSrc(SrcBlock, Preheader)) {
++NumNotHoistedDueToHotness;
- return false;
+ return HoistResult::NotHoisted;
}
// First check whether we should hoist this instruction.
- if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+ bool HasExtractHoistableLoad = false;
+ if (!IsLoopInvariantInst(*MI, CurLoop) ||
+ !IsProfitableToHoist(*MI, CurLoop)) {
// If not, try unfolding a hoistable load.
- MI = ExtractHoistableLoad(MI);
- if (!MI) return false;
+ MI = ExtractHoistableLoad(MI, CurLoop);
+ if (!MI)
+ return HoistResult::NotHoisted;
+ HasExtractHoistableLoad = true;
}
// If we have hoisted an instruction that may store, it can only be a constant
@@ -1440,9 +1551,22 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Look for opportunity to CSE the hoisted instruction.
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
- CSEMap.find(Opcode);
- if (!EliminateCSE(MI, CI)) {
+ bool HasCSEDone = false;
+ for (auto &Map : CSEMap) {
+ // Check this CSEMap's preheader dominates MI's basic block.
+ if (DT->dominates(Map.first, MI->getParent())) {
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ Map.second.find(Opcode);
+ if (CI != Map.second.end()) {
+ if (EliminateCSE(MI, CI)) {
+ HasCSEDone = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!HasCSEDone) {
// Otherwise, splice the instruction to the preheader.
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
@@ -1462,21 +1586,21 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
if (!MO.isDead())
MRI->clearKillFlags(MO.getReg());
- // Add to the CSE map.
- if (CI != CSEMap.end())
- CI->second.push_back(MI);
- else
- CSEMap[Opcode].push_back(MI);
+ CSEMap[Preheader][Opcode].push_back(MI);
}
++NumHoisted;
Changed = true;
- return true;
+ if (HasCSEDone || HasExtractHoistableLoad)
+ return HoistResult::Hoisted | HoistResult::ErasedMI;
+ return HoistResult::Hoisted;
}
/// Get the preheader for the current loop, splitting a critical edge if needed.
-MachineBasicBlock *MachineLICMBase::getCurPreheader() {
+MachineBasicBlock *
+MachineLICMBase::getCurPreheader(MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader) {
// Determine the block to which to hoist instructions. If we can't find a
// suitable loop predecessor, we can't do any hoisting.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c44b968b317d..aa1eb7c35425 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 37a0ff3d71c8..bdbc57099aa8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -88,7 +88,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
return BotMBB;
}
-MachineBasicBlock *MachineLoop::findLoopControlBlock() {
+MachineBasicBlock *MachineLoop::findLoopControlBlock() const {
if (MachineBasicBlock *Latch = getLoopLatch()) {
if (isLoopExiting(Latch))
return Latch;
@@ -151,6 +151,53 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
return Preheader;
}
+MDNode *MachineLoop::getLoopID() const {
+ MDNode *LoopID = nullptr;
+ if (const auto *MBB = findLoopControlBlock()) {
+ // If there is a single latch block, then the metadata
+ // node is attached to its terminating instruction.
+ const auto *BB = MBB->getBasicBlock();
+ if (!BB)
+ return nullptr;
+ if (const auto *TI = BB->getTerminator())
+ LoopID = TI->getMetadata(LLVMContext::MD_loop);
+ } else if (const auto *MBB = getHeader()) {
+ // There seem to be multiple latch blocks, so we have to
+ // visit all predecessors of the loop header and check
+ // their terminating instructions for the metadata.
+ if (const auto *Header = MBB->getBasicBlock()) {
+ // Walk over all blocks in the loop.
+ for (const auto *MBB : this->blocks()) {
+ const auto *BB = MBB->getBasicBlock();
+ if (!BB)
+ return nullptr;
+ const auto *TI = BB->getTerminator();
+ if (!TI)
+ return nullptr;
+ MDNode *MD = nullptr;
+ // Check if this terminating instruction jumps to the loop header.
+ for (const auto *Succ : successors(TI)) {
+ if (Succ == Header) {
+ // This is a jump to the header - gather the metadata from it.
+ MD = TI->getMetadata(LLVMContext::MD_loop);
+ break;
+ }
+ }
+ if (!MD)
+ return nullptr;
+ if (!LoopID)
+ LoopID = MD;
+ else if (MD != LoopID)
+ return nullptr;
+ }
+ }
+ }
+ if (LoopID &&
+ (LoopID->getNumOperands() == 0 || LoopID->getOperand(0) != LoopID))
+ LoopID = nullptr;
+ return LoopID;
+}
+
bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
MachineFunction *MF = I.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index 788c134b6ee8..12d6b79f735d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -11,13 +11,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/ADT/StableHashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/StableHashing.h"
+#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
index 439ff8babcc6..914e6b19fde9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -33,16 +33,17 @@ Error MachineFunctionPassManager::run(Module &M,
(void)RequireCodeGenSCCOrder;
assert(!RequireCodeGenSCCOrder && "not implemented");
+ // M is unused here
+ PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(M);
+
// Add a PIC to verify machine functions.
if (VerifyMachineFunction) {
- PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(M);
-
// No need to pop this callback later since MIR pipeline is flat which means
// current pipeline is the top-level pipeline. Callbacks are not used after
// current pipeline.
PI.pushBeforeNonSkippedPassCallback([&MFAM](StringRef PassID, Any IR) {
- assert(any_cast<const MachineFunction *>(&IR));
- const MachineFunction *MF = any_cast<const MachineFunction *>(IR);
+ assert(llvm::any_cast<const MachineFunction *>(&IR));
+ const MachineFunction *MF = llvm::any_cast<const MachineFunction *>(IR);
assert(MF && "Machine function should be valid for printing");
std::string Banner = std::string("After ") + std::string(PassID);
verifyMachineFunction(&MFAM, Banner, *MF);
@@ -59,8 +60,11 @@ Error MachineFunctionPassManager::run(Module &M,
do {
// Run machine module passes
for (; MachineModulePasses.count(Idx) && Idx != Size; ++Idx) {
+ if (!PI.runBeforePass<Module>(*Passes[Idx], M))
+ continue;
if (auto Err = MachineModulePasses.at(Idx)(M, MFAM))
return Err;
+ PI.runAfterPass(*Passes[Idx], M, PreservedAnalyses::all());
}
// Finish running all passes.
@@ -81,7 +85,6 @@ Error MachineFunctionPassManager::run(Module &M,
continue;
MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
- PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(MF);
for (unsigned I = Begin, E = Idx; I != E; ++I) {
auto *P = Passes[I].get();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index c7e7497dab36..8cd7f4ebe88d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1039,7 +1039,7 @@ struct FuncUnitSorter {
for (const MCWriteProcResEntry &PRE :
make_range(STI->getWriteProcResBegin(SCDesc),
STI->getWriteProcResEnd(SCDesc))) {
- if (!PRE.Cycles)
+ if (!PRE.ReleaseAtCycle)
continue;
const MCProcResourceDesc *ProcResource =
STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
@@ -1082,7 +1082,7 @@ struct FuncUnitSorter {
for (const MCWriteProcResEntry &PRE :
make_range(STI->getWriteProcResBegin(SCDesc),
STI->getWriteProcResEnd(SCDesc))) {
- if (!PRE.Cycles)
+ if (!PRE.ReleaseAtCycle)
continue;
Resources[PRE.ProcResourceIdx]++;
}
@@ -2225,7 +2225,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
}
/// Return true for an order or output dependence that is loop carried
-/// potentially. A dependence is loop carried if the destination defines a valu
+/// potentially. A dependence is loop carried if the destination defines a value
/// that may be used or defined by the source in a subsequent iteration.
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
bool isSucc) {
@@ -2251,10 +2251,12 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
return true;
- // Only chain dependences between a load and store can be loop carried.
- if (!DI->mayStore() || !SI->mayLoad())
+ if (!DI->mayLoadOrStore() || !SI->mayLoadOrStore())
return false;
+ // The conservative assumption is that a dependence between memory operations
+ // may be loop carried. The following code checks when it can be proved that
+ // there is no loop carried dependence.
unsigned DeltaS, DeltaD;
if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
return true;
@@ -2635,7 +2637,7 @@ bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
/// v1 = phi(v2, v3)
/// (Def) v3 = op v1
/// (MO) = v1
-/// If MO appears before Def, then then v1 and v3 may get assigned to the same
+/// If MO appears before Def, then v1 and v3 may get assigned to the same
/// register.
bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
MachineInstr *Def, MachineOperand &MO) {
@@ -2706,7 +2708,7 @@ bool SMSchedule::normalizeNonPipelinedInstructions(
if (OldCycle != NewCycle) {
InstrToCycle[&SU] = NewCycle;
auto &OldS = getInstructions(OldCycle);
- llvm::erase_value(OldS, &SU);
+ llvm::erase(OldS, &SU);
getInstructions(NewCycle).emplace_back(&SU);
LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum
<< ") is not pipelined; moving from cycle " << OldCycle
@@ -3092,7 +3094,7 @@ void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
assert(!UseDFA);
for (const MCWriteProcResEntry &PRE : make_range(
STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
- for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
+ for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)
++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
@@ -3104,7 +3106,7 @@ void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
assert(!UseDFA);
for (const MCWriteProcResEntry &PRE : make_range(
STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
- for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
+ for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)
--MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
@@ -3220,10 +3222,10 @@ int ResourceManager::calculateResMII() const {
if (SwpDebugResource) {
const MCProcResourceDesc *Desc =
SM.getProcResource(PRE.ProcResourceIdx);
- dbgs() << Desc->Name << ": " << PRE.Cycles << ", ";
+ dbgs() << Desc->Name << ": " << PRE.ReleaseAtCycle << ", ";
}
});
- ResourceCount[PRE.ProcResourceIdx] += PRE.Cycles;
+ ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
}
LLVM_DEBUG(if (SwpDebugResource) dbgs() << "\n");
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 0048918fc53b..087604af6a71 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -96,9 +96,9 @@ MachineRegisterInfo::constrainRegAttrs(Register Reg,
if (RegTy.isValid() && ConstrainingRegTy.isValid() &&
RegTy != ConstrainingRegTy)
return false;
- const auto ConstrainingRegCB = getRegClassOrRegBank(ConstrainingReg);
+ const auto &ConstrainingRegCB = getRegClassOrRegBank(ConstrainingReg);
if (!ConstrainingRegCB.isNull()) {
- const auto RegCB = getRegClassOrRegBank(Reg);
+ const auto &RegCB = getRegClassOrRegBank(Reg);
if (RegCB.isNull())
setRegClassOrRegBank(Reg, ConstrainingRegCB);
else if (isa<const TargetRegisterClass *>(RegCB) !=
@@ -619,7 +619,7 @@ void MachineRegisterInfo::disableCalleeSavedRegister(MCRegister Reg) {
// Remove the register (and its aliases from the list).
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- llvm::erase_value(UpdatedCSRs, *AI);
+ llvm::erase(UpdatedCSRs, *AI);
}
const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
index 324084fb9c32..e384187b6e85 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -21,15 +22,23 @@
using namespace llvm;
-void MachineSSAContext::setFunction(MachineFunction &Fn) {
- MF = &Fn;
- RegInfo = &MF->getRegInfo();
+template <>
+void MachineSSAContext::appendBlockDefs(SmallVectorImpl<Register> &defs,
+ const MachineBasicBlock &block) {
+ for (auto &instr : block.instrs()) {
+ for (auto &op : instr.all_defs())
+ defs.push_back(op.getReg());
+ }
}
-MachineBasicBlock *MachineSSAContext::getEntryBlock(MachineFunction &F) {
- return &F.front();
+template <>
+void MachineSSAContext::appendBlockTerms(SmallVectorImpl<MachineInstr *> &terms,
+ MachineBasicBlock &block) {
+ for (auto &T : block.terminators())
+ terms.push_back(&T);
}
+template <>
void MachineSSAContext::appendBlockTerms(
SmallVectorImpl<const MachineInstr *> &terms,
const MachineBasicBlock &block) {
@@ -37,37 +46,39 @@ void MachineSSAContext::appendBlockTerms(
terms.push_back(&T);
}
-void MachineSSAContext::appendBlockDefs(SmallVectorImpl<Register> &defs,
- const MachineBasicBlock &block) {
- for (const MachineInstr &instr : block.instrs()) {
- for (const MachineOperand &op : instr.all_defs())
- defs.push_back(op.getReg());
- }
-}
-
/// Get the defining block of a value.
-MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const {
+template <>
+const MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const {
if (!value)
return nullptr;
- return RegInfo->getVRegDef(value)->getParent();
+ return F->getRegInfo().getVRegDef(value)->getParent();
}
+template <>
bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
return Phi.isConstantValuePHI();
}
+template <>
+Intrinsic::ID MachineSSAContext::getIntrinsicID(const MachineInstr &MI) {
+ if (auto *GI = dyn_cast<GIntrinsic>(&MI))
+ return GI->getIntrinsicID();
+ return Intrinsic::not_intrinsic;
+}
+
+template <>
Printable MachineSSAContext::print(const MachineBasicBlock *Block) const {
if (!Block)
return Printable([](raw_ostream &Out) { Out << "<nullptr>"; });
return Printable([Block](raw_ostream &Out) { Block->printName(Out); });
}
-Printable MachineSSAContext::print(const MachineInstr *I) const {
+template <> Printable MachineSSAContext::print(const MachineInstr *I) const {
return Printable([I](raw_ostream &Out) { I->print(Out); });
}
-Printable MachineSSAContext::print(Register Value) const {
- auto *MRI = RegInfo;
+template <> Printable MachineSSAContext::print(Register Value) const {
+ auto *MRI = &F->getRegInfo();
return Printable([MRI, Value](raw_ostream &Out) {
Out << printReg(Value, MRI->getTargetRegisterInfo(), 0, MRI);
@@ -80,3 +91,8 @@ Printable MachineSSAContext::print(Register Value) const {
}
});
}
+
+template <>
+Printable MachineSSAContext::printAsOperand(const MachineBasicBlock *BB) const {
+ return Printable([BB](raw_ostream &Out) { BB->printAsOperand(Out); });
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index ba5432459d12..886137d86f87 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -747,9 +747,9 @@ void ScheduleDAGMI::finishBlock() {
ScheduleDAGInstrs::finishBlock();
}
-/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
-/// crossing a scheduling boundary. [begin, end) includes all instructions in
-/// the region, including the boundary itself and single-instruction regions
+/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
+/// after crossing a scheduling boundary. [begin, end) includes all instructions
+/// in the region, including the boundary itself and single-instruction regions
/// that don't get scheduled.
void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
@@ -793,9 +793,9 @@ bool ScheduleDAGMI::checkSchedLimit() {
}
/// Per-region scheduling driver, called back from
-/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
-/// does not consider liveness or register pressure. It is useful for PostRA
-/// scheduling and potentially other custom schedulers.
+/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
+/// that does not consider liveness or register pressure. It is useful for
+/// PostRA scheduling and potentially other custom schedulers.
void ScheduleDAGMI::schedule() {
LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
LLVM_DEBUG(SchedImpl->dumpPolicy());
@@ -980,8 +980,8 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
PE = SchedModel.getWriteProcResEnd(SC);
PI != PE; ++PI) {
- if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle)
- LastCycle = SU->TopReadyCycle + PI->Cycles - 1;
+ if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
+ LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
}
}
// Print the header with the cycles
@@ -1017,19 +1017,20 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
llvm::stable_sort(ResourcesIt,
[](const MCWriteProcResEntry &LHS,
const MCWriteProcResEntry &RHS) -> bool {
- return LHS.StartAtCycle < RHS.StartAtCycle ||
- (LHS.StartAtCycle == RHS.StartAtCycle &&
- LHS.Cycles < RHS.Cycles);
+ return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
+ (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
+ LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
});
for (const MCWriteProcResEntry &PI : ResourcesIt) {
C = FirstCycle;
const std::string ResName =
SchedModel.getResourceName(PI.ProcResourceIdx);
dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
- for (; C < SU->TopReadyCycle + PI.StartAtCycle; ++C) {
+ for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
dbgs() << llvm::left_justify("|", ColWidth);
}
- for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, ++C)
+ for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
+ ++I, ++C)
dbgs() << llvm::left_justify("| x", ColWidth);
while (C++ <= LastCycle)
dbgs() << llvm::left_justify("|", ColWidth);
@@ -1061,8 +1062,8 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
PE = SchedModel.getWriteProcResEnd(SC);
PI != PE; ++PI) {
- if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle)
- LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1;
+ if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
+ LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
}
}
// Print the header with the cycles
@@ -1097,19 +1098,20 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
llvm::stable_sort(ResourcesIt,
[](const MCWriteProcResEntry &LHS,
const MCWriteProcResEntry &RHS) -> bool {
- return LHS.StartAtCycle < RHS.StartAtCycle ||
- (LHS.StartAtCycle == RHS.StartAtCycle &&
- LHS.Cycles < RHS.Cycles);
+ return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
+ (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
+ LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
});
for (const MCWriteProcResEntry &PI : ResourcesIt) {
C = FirstCycle;
const std::string ResName =
SchedModel.getResourceName(PI.ProcResourceIdx);
dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
- for (; C > ((int)SU->BotReadyCycle - (int)PI.StartAtCycle); --C) {
+ for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
dbgs() << llvm::left_justify("|", ColWidth);
}
- for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, --C)
+ for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
+ ++I, --C)
dbgs() << llvm::left_justify("| x", ColWidth);
while (C-- >= LastCycle)
dbgs() << llvm::left_justify("|", ColWidth);
@@ -1696,11 +1698,12 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
unsigned Width;
+ bool OffsetIsScalable;
MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
- int64_t Offset, unsigned Width)
+ int64_t Offset, bool OffsetIsScalable, unsigned Width)
: SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
- Width(Width) {}
+ Width(Width), OffsetIsScalable(OffsetIsScalable) {}
static bool Compare(const MachineOperand *const &A,
const MachineOperand *const &B) {
@@ -1829,8 +1832,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
}
- if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
- CurrentClusterBytes))
+ if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
+ MemOpa.OffsetIsScalable, MemOpb.BaseOps,
+ MemOpb.Offset, MemOpb.OffsetIsScalable,
+ ClusterLength, CurrentClusterBytes))
continue;
SUnit *SUa = MemOpa.SU;
@@ -1897,7 +1902,8 @@ void BaseMemOpClusterMutation::collectMemOpRecords(
unsigned Width;
if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
OffsetIsScalable, Width, TRI)) {
- MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
+ MemOpRecords.push_back(
+ MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
<< Offset << ", OffsetIsScalable: " << OffsetIsScalable
@@ -2237,8 +2243,9 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
unsigned Factor = SchedModel->getResourceFactor(PIdx);
- assert(PI->Cycles >= PI->StartAtCycle);
- RemainingCounts[PIdx] += (Factor * (PI->Cycles - PI->StartAtCycle));
+ assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
+ RemainingCounts[PIdx] +=
+ (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
}
}
}
@@ -2291,15 +2298,15 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
/// Compute the next cycle at which the given processor resource unit
/// can be scheduled.
unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
- unsigned Cycles,
- unsigned StartAtCycle) {
+ unsigned ReleaseAtCycle,
+ unsigned AcquireAtCycle) {
if (SchedModel && SchedModel->enableIntervals()) {
if (isTop())
return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
- CurrCycle, StartAtCycle, Cycles);
+ CurrCycle, AcquireAtCycle, ReleaseAtCycle);
return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
- CurrCycle, StartAtCycle, Cycles);
+ CurrCycle, AcquireAtCycle, ReleaseAtCycle);
}
unsigned NextUnreserved = ReservedCycles[InstanceIdx];
@@ -2308,7 +2315,7 @@ unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
return CurrCycle;
// For bottom-up scheduling add the cycles needed for the current operation.
if (!isTop())
- NextUnreserved = std::max(CurrCycle, NextUnreserved + Cycles);
+ NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
return NextUnreserved;
}
@@ -2317,7 +2324,8 @@ unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
/// instance in the reserved cycles vector.
std::pair<unsigned, unsigned>
SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
- unsigned Cycles, unsigned StartAtCycle) {
+ unsigned ReleaseAtCycle,
+ unsigned AcquireAtCycle) {
if (MischedDetailResourceBooking) {
LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
LLVM_DEBUG(dumpReservedCycles());
@@ -2331,26 +2339,30 @@ SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
"Cannot have zero instances of a ProcResource");
if (isUnbufferedGroup(PIdx)) {
- // If any subunits are used by the instruction, report that the resource
- // group is available at 0, effectively removing the group record from
- // hazarding and basing the hazarding decisions on the subunit records.
- // Otherwise, choose the first available instance from among the subunits.
- // Specifications which assign cycles to both the subunits and the group or
- // which use an unbuffered group with buffered subunits will appear to
- // schedule strangely. In the first case, the additional cycles for the
- // group will be ignored. In the second, the group will be ignored
- // entirely.
+ // If any subunits are used by the instruction, report that the
+ // subunits of the resource group are available at the first cycle
+ // in which the unit is available, effectively removing the group
+ // record from hazarding and basing the hazarding decisions on the
+ // subunit records. Otherwise, choose the first available instance
+ // from among the subunits. Specifications which assign cycles to
+ // both the subunits and the group or which use an unbuffered
+ // group with buffered subunits will appear to schedule
+ // strangely. In the first case, the additional cycles for the
+ // group will be ignored. In the second, the group will be
+ // ignored entirely.
for (const MCWriteProcResEntry &PE :
make_range(SchedModel->getWriteProcResBegin(SC),
SchedModel->getWriteProcResEnd(SC)))
if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
- return std::make_pair(0u, StartIndex);
+ return std::make_pair(getNextResourceCycleByInstance(
+ StartIndex, ReleaseAtCycle, AcquireAtCycle),
+ StartIndex);
auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
unsigned NextUnreserved, NextInstanceIdx;
std::tie(NextUnreserved, NextInstanceIdx) =
- getNextResourceCycle(SC, SubUnits[I], Cycles, StartAtCycle);
+ getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
if (MinNextUnreserved > NextUnreserved) {
InstanceIdx = NextInstanceIdx;
MinNextUnreserved = NextUnreserved;
@@ -2362,7 +2374,7 @@ SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
++I) {
unsigned NextUnreserved =
- getNextResourceCycleByInstance(I, Cycles, StartAtCycle);
+ getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
if (MischedDetailResourceBooking)
LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
<< NextUnreserved << "c\n");
@@ -2419,14 +2431,14 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
make_range(SchedModel->getWriteProcResBegin(SC),
SchedModel->getWriteProcResEnd(SC))) {
unsigned ResIdx = PE.ProcResourceIdx;
- unsigned Cycles = PE.Cycles;
- unsigned StartAtCycle = PE.StartAtCycle;
+ unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
+ unsigned AcquireAtCycle = PE.AcquireAtCycle;
unsigned NRCycle, InstanceIdx;
std::tie(NRCycle, InstanceIdx) =
- getNextResourceCycle(SC, ResIdx, Cycles, StartAtCycle);
+ getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
if (NRCycle > CurrCycle) {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
- MaxObservedStall = std::max(Cycles, MaxObservedStall);
+ MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
#endif
LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
<< SchedModel->getResourceName(ResIdx)
@@ -2568,18 +2580,22 @@ void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
/// Add the given processor resource to this scheduled zone.
///
-/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
-/// during which this resource is consumed.
+/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
+/// cycles during which this resource is released.
+///
+/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
+/// cycles at which the resource is aquired after issue (assuming no stalls).
///
/// \return the next cycle at which the instruction may execute without
/// oversubscribing resources.
unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
- unsigned Cycles, unsigned NextCycle,
- unsigned StartAtCycle) {
+ unsigned ReleaseAtCycle,
+ unsigned NextCycle,
+ unsigned AcquireAtCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
- unsigned Count = Factor * (Cycles - StartAtCycle);
+ unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
- << Cycles << "x" << Factor << "u\n");
+ << ReleaseAtCycle << "x" << Factor << "u\n");
// Update Executed resources counts.
incExecutedResources(PIdx, Count);
@@ -2598,7 +2614,7 @@ unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
// For reserved resources, record the highest cycle using the resource.
unsigned NextAvailable, InstanceIdx;
std::tie(NextAvailable, InstanceIdx) =
- getNextResourceCycle(SC, PIdx, Cycles, StartAtCycle);
+ getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
if (NextAvailable > CurrCycle) {
LLVM_DEBUG(dbgs() << " Resource conflict: "
<< SchedModel->getResourceName(PIdx)
@@ -2677,8 +2693,9 @@ void SchedBoundary::bumpNode(SUnit *SU) {
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
- unsigned RCycle = countResource(SC, PI->ProcResourceIdx, PI->Cycles,
- NextCycle, PI->StartAtCycle);
+ unsigned RCycle =
+ countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
+ PI->AcquireAtCycle);
if (RCycle > NextCycle)
NextCycle = RCycle;
}
@@ -2695,27 +2712,27 @@ void SchedBoundary::bumpNode(SUnit *SU) {
if (SchedModel && SchedModel->enableIntervals()) {
unsigned ReservedUntil, InstanceIdx;
- std::tie(ReservedUntil, InstanceIdx) =
- getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle);
+ std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
+ SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
if (isTop()) {
ReservedResourceSegments[InstanceIdx].add(
ResourceSegments::getResourceIntervalTop(
- NextCycle, PI->StartAtCycle, PI->Cycles),
+ NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
MIResourceCutOff);
} else {
ReservedResourceSegments[InstanceIdx].add(
ResourceSegments::getResourceIntervalBottom(
- NextCycle, PI->StartAtCycle, PI->Cycles),
+ NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
MIResourceCutOff);
}
} else {
unsigned ReservedUntil, InstanceIdx;
- std::tie(ReservedUntil, InstanceIdx) =
- getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle);
+ std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
+ SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
if (isTop()) {
ReservedCycles[InstanceIdx] =
- std::max(ReservedUntil, NextCycle + PI->Cycles);
+ std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
} else
ReservedCycles[InstanceIdx] = NextCycle;
}
@@ -2913,9 +2930,9 @@ initResourceDelta(const ScheduleDAGMI *DAG,
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
if (PI->ProcResourceIdx == Policy.ReduceResIdx)
- ResDelta.CritResources += PI->Cycles;
+ ResDelta.CritResources += PI->ReleaseAtCycle;
if (PI->ProcResourceIdx == Policy.DemandResIdx)
- ResDelta.DemandedResources += PI->Cycles;
+ ResDelta.DemandedResources += PI->ReleaseAtCycle;
}
}
@@ -3956,7 +3973,7 @@ struct ILPOrder {
if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
return ScheduledTrees->test(SchedTreeB);
- // Trees with shallower connections have have lower priority.
+ // Trees with shallower connections have lower priority.
if (DFSResult->getSubtreeLevel(SchedTreeA)
!= DFSResult->getSubtreeLevel(SchedTreeB)) {
return DFSResult->getSubtreeLevel(SchedTreeA)
@@ -4243,7 +4260,7 @@ static bool sortIntervals(const ResourceSegments::IntervalTy &A,
}
unsigned ResourceSegments::getFirstAvailableAt(
- unsigned CurrCycle, unsigned StartAtCycle, unsigned Cycle,
+ unsigned CurrCycle, unsigned AcquireAtCycle, unsigned Cycle,
std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
IntervalBuilder) const {
assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
@@ -4251,7 +4268,7 @@ unsigned ResourceSegments::getFirstAvailableAt(
"Cannot execute on an un-sorted set of intervals.");
unsigned RetCycle = CurrCycle;
ResourceSegments::IntervalTy NewInterval =
- IntervalBuilder(RetCycle, StartAtCycle, Cycle);
+ IntervalBuilder(RetCycle, AcquireAtCycle, Cycle);
for (auto &Interval : _Intervals) {
if (!intersects(NewInterval, Interval))
continue;
@@ -4261,7 +4278,7 @@ unsigned ResourceSegments::getFirstAvailableAt(
assert(Interval.second > NewInterval.first &&
"Invalid intervals configuration.");
RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
- NewInterval = IntervalBuilder(RetCycle, StartAtCycle, Cycle);
+ NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, Cycle);
}
return RetCycle;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index 8da97dc7e742..e7e8f6026834 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -41,6 +41,7 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -56,7 +57,6 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <map>
#include <utility>
#include <vector>
@@ -115,6 +115,7 @@ STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
namespace {
class MachineSinking : public MachineFunctionPass {
+ const TargetSubtargetInfo *STI = nullptr;
const TargetInstrInfo *TII = nullptr;
const TargetRegisterInfo *TRI = nullptr;
MachineRegisterInfo *MRI = nullptr; // Machine register information
@@ -137,7 +138,7 @@ namespace {
DenseSet<Register> RegsToClearKillFlags;
using AllSuccsCache =
- std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
+ DenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
/// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
/// post-dominated by another DBG_VALUE of the same variable location.
@@ -158,14 +159,18 @@ namespace {
/// current block.
DenseSet<DebugVariable> SeenDbgVars;
- std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
+ DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
HasStoreCache;
- std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
- std::vector<MachineInstr *>>
+
+ DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
+ SmallVector<MachineInstr *>>
StoreInstrCache;
/// Cached BB's register pressure.
- std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure;
+ DenseMap<const MachineBasicBlock *, std::vector<unsigned>>
+ CachedRegisterPressure;
+
+ bool EnableSinkAndFold;
public:
static char ID; // Pass identification
@@ -187,6 +192,7 @@ namespace {
AU.addPreserved<MachineLoopInfo>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<TargetPassConfig>();
}
void releaseMemory() override {
@@ -246,11 +252,17 @@ namespace {
bool PerformTrivialForwardCoalescing(MachineInstr &MI,
MachineBasicBlock *MBB);
+ bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB);
+
SmallVector<MachineBasicBlock *, 4> &
GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const;
- std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB);
+ std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB);
+
+ bool registerPressureSetExceedsLimit(unsigned NRegs,
+ const TargetRegisterClass *RC,
+ const MachineBasicBlock &MBB);
};
} // end anonymous namespace
@@ -288,7 +300,8 @@ static bool blockPrologueInterferes(const MachineBasicBlock *BB,
if (!Reg)
continue;
if (MO.isUse()) {
- if (Reg.isPhysical() && MRI && MRI->isConstantPhysReg(Reg))
+ if (Reg.isPhysical() &&
+ (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
continue;
if (PI->modifiesRegister(Reg, TRI))
return true;
@@ -338,6 +351,236 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
return true;
}
+bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
+ MachineBasicBlock *MBB) {
+ if (MI.isCopy() || MI.mayLoadOrStore() ||
+ MI.getOpcode() == TargetOpcode::REG_SEQUENCE)
+ return false;
+
+ // Don't sink instructions that the target prefers not to sink.
+ if (!TII->shouldSink(MI))
+ return false;
+
+ // Check if it's safe to move the instruction.
+ bool SawStore = true;
+ if (!MI.isSafeToMove(AA, SawStore))
+ return false;
+
+ // Convergent operations may not be made control-dependent on additional
+ // values.
+ if (MI.isConvergent())
+ return false;
+
+ // Don't sink defs/uses of hard registers or if the instruction defines more
+ // than one register.
+ // Don't sink more than two register uses - it'll cover most of the cases and
+ // greatly simplifies the register pressure checks.
+ Register DefReg;
+ Register UsedRegA, UsedRegB;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() ||
+ MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() ||
+ MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask())
+ continue;
+ if (!MO.isReg())
+ return false;
+
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (Reg.isVirtual()) {
+ if (MO.isDef()) {
+ if (DefReg)
+ return false;
+ DefReg = Reg;
+ continue;
+ }
+
+ if (UsedRegA == 0)
+ UsedRegA = Reg;
+ else if (UsedRegB == 0)
+ UsedRegB = Reg;
+ else
+ return false;
+ continue;
+ }
+
+ if (Reg.isPhysical() &&
+ (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
+ continue;
+
+ return false;
+ }
+
+ // Scan uses of the destination register. Every use, except the last, must be
+ // a copy, with a chain of copies terminating with either a copy into a hard
+ // register, or a load/store instruction where the use is part of the
+ // address (*not* the stored value).
+ using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>;
+ SmallVector<SinkInfo> SinkInto;
+ SmallVector<Register> Worklist;
+
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ const TargetRegisterClass *RCA =
+ UsedRegA == 0 ? nullptr : MRI->getRegClass(UsedRegA);
+ const TargetRegisterClass *RCB =
+ UsedRegB == 0 ? nullptr : MRI->getRegClass(UsedRegB);
+
+ Worklist.push_back(DefReg);
+ while (!Worklist.empty()) {
+ Register Reg = Worklist.pop_back_val();
+
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ ExtAddrMode MaybeAM;
+ MachineInstr &UseInst = *MO.getParent();
+ if (UseInst.isCopy()) {
+ Register DstReg;
+ if (const MachineOperand &O = UseInst.getOperand(0); O.isReg())
+ DstReg = O.getReg();
+ if (DstReg == 0)
+ return false;
+ if (DstReg.isVirtual()) {
+ Worklist.push_back(DstReg);
+ continue;
+ }
+ // If we are going to replace a copy, the original instruction must be
+ // as cheap as a copy.
+ if (!TII->isAsCheapAsAMove(MI))
+ return false;
+ // The hard register must be in the register class of the original
+ // instruction's destination register.
+ if (!RC->contains(DstReg))
+ return false;
+ } else if (UseInst.mayLoadOrStore()) {
+ ExtAddrMode AM;
+ if (!TII->canFoldIntoAddrMode(UseInst, Reg, MI, AM))
+ return false;
+ MaybeAM = AM;
+ } else {
+ return false;
+ }
+
+ if (UseInst.getParent() != MI.getParent()) {
+ // If the register class of the register we are replacing is a superset
+ // of any of the register classes of the operands of the materialized
+ // instruction don't consider that live range extended.
+ const TargetRegisterClass *RCS = MRI->getRegClass(Reg);
+ if (RCA && RCA->hasSuperClassEq(RCS))
+ RCA = nullptr;
+ else if (RCB && RCB->hasSuperClassEq(RCS))
+ RCB = nullptr;
+ if (RCA || RCB) {
+ if (RCA == nullptr) {
+ RCA = RCB;
+ RCB = nullptr;
+ }
+
+ unsigned NRegs = !!RCA + !!RCB;
+ if (RCA == RCB)
+ RCB = nullptr;
+
+ // Check we don't exceed register pressure at the destination.
+ const MachineBasicBlock &MBB = *UseInst.getParent();
+ if (RCB == nullptr) {
+ if (registerPressureSetExceedsLimit(NRegs, RCA, MBB))
+ return false;
+ } else if (registerPressureSetExceedsLimit(1, RCA, MBB) ||
+ registerPressureSetExceedsLimit(1, RCB, MBB)) {
+ return false;
+ }
+ }
+ }
+
+ SinkInto.emplace_back(&UseInst, MaybeAM);
+ }
+ }
+
+ if (SinkInto.empty())
+ return false;
+
+ // Now we know we can fold the instruction in all its users.
+ for (auto &[SinkDst, MaybeAM] : SinkInto) {
+ MachineInstr *New = nullptr;
+ LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
+ SinkDst->dump());
+ if (SinkDst->isCopy()) {
+ // TODO: After performing the sink-and-fold, the original instruction is
+ // deleted. Its value is still available (in a hard register), so if there
+ // are debug instructions which refer to the (now deleted) virtual
+ // register they could be updated to refer to the hard register, in
+ // principle. However, it's not clear how to do that, moreover in some
+ // cases the debug instructions may need to be replicated proportionally
+ // to the number of the COPY instructions replaced and in some extreme
+ // cases we can end up with quadratic increase in the number of debug
+ // instructions.
+
+ // Sink a copy of the instruction, replacing a COPY instruction.
+ MachineBasicBlock::iterator InsertPt = SinkDst->getIterator();
+ Register DstReg = SinkDst->getOperand(0).getReg();
+ TII->reMaterialize(*SinkDst->getParent(), InsertPt, DstReg, 0, MI, *TRI);
+ New = &*std::prev(InsertPt);
+ if (!New->getDebugLoc())
+ New->setDebugLoc(SinkDst->getDebugLoc());
+
+ // The operand registers of the "sunk" instruction have their live range
+ // extended and their kill flags may no longer be correct. Conservatively
+ // clear the kill flags.
+ if (UsedRegA)
+ MRI->clearKillFlags(UsedRegA);
+ if (UsedRegB)
+ MRI->clearKillFlags(UsedRegB);
+ } else {
+ // Fold instruction into the addressing mode of a memory instruction.
+ New = TII->emitLdStWithAddr(*SinkDst, MaybeAM);
+
+ // The registers of the addressing mode may have their live range extended
+ // and their kill flags may no longer be correct. Conservatively clear the
+ // kill flags.
+ if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual())
+ MRI->clearKillFlags(R);
+ if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual())
+ MRI->clearKillFlags(R);
+ }
+ LLVM_DEBUG(dbgs() << "yielding"; New->dump());
+ // Clear the StoreInstrCache, since we may invalidate it by erasing.
+ if (SinkDst->mayStore() && !SinkDst->hasOrderedMemoryRef())
+ StoreInstrCache.clear();
+ SinkDst->eraseFromParent();
+ }
+
+ // Collect operands that need to be cleaned up because the registers no longer
+ // exist (in COPYs and debug instructions). We cannot delete instructions or
+ // clear operands while traversing register uses.
+ SmallVector<MachineOperand *> Cleanup;
+ Worklist.push_back(DefReg);
+ while (!Worklist.empty()) {
+ Register Reg = Worklist.pop_back_val();
+ for (MachineOperand &MO : MRI->use_operands(Reg)) {
+ MachineInstr *U = MO.getParent();
+ assert((U->isCopy() || U->isDebugInstr()) &&
+ "Only debug uses and copies must remain");
+ if (U->isCopy())
+ Worklist.push_back(U->getOperand(0).getReg());
+ Cleanup.push_back(&MO);
+ }
+ }
+
+ // Delete the dead COPYs and clear operands in debug instructions
+ for (MachineOperand *MO : Cleanup) {
+ MachineInstr *I = MO->getParent();
+ if (I->isCopy()) {
+ I->eraseFromParent();
+ } else {
+ MO->setReg(0);
+ MO->setSubReg(0);
+ }
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
/// AllUsesDominatedByBlock - Return true if all uses of the specified register
/// occur in blocks dominated by the specified block. If any use is in the
/// definition block, then return false since it is never legal to move def
@@ -461,8 +704,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
- TII = MF.getSubtarget().getInstrInfo();
- TRI = MF.getSubtarget().getRegisterInfo();
+ STI = &MF.getSubtarget();
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
PDT = &getAnalysis<MachinePostDominatorTree>();
@@ -471,6 +715,8 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
RegClassInfo.runOnMachineFunction(MF);
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
bool EverMadeChange = false;
@@ -496,6 +742,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
MadeChange = true;
++NumSplit;
+ CI->splitCriticalEdge(Pair.first, Pair.second, NewSucc);
} else
LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
}
@@ -547,8 +794,8 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
}
bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
- // Can't sink anything out of a block that has less than two successors.
- if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+ if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty())
+ return false;
// Don't bother sinking code out of unreachable blocks. In addition to being
// unprofitable, it can also lead to infinite looping, because in an
@@ -579,8 +826,16 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
continue;
}
- bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
- if (Joined) {
+ if (EnableSinkAndFold && PerformSinkAndFold(MI, &MBB)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1)
+ continue;
+
+ if (PerformTrivialForwardCoalescing(MI, &MBB)) {
MadeChange = true;
continue;
}
@@ -597,7 +852,6 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
SeenDbgVars.clear();
// recalculate the bb register pressure after sinking one BB.
CachedRegisterPressure.clear();
-
return MadeChange;
}
@@ -737,7 +991,7 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
}
std::vector<unsigned> &
-MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
+MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
// Currently to save compiling time, MBB's register pressure will not change
// in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
// register pressure is changed after sinking any instructions into it.
@@ -753,10 +1007,10 @@ MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(),
/*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
- for (MachineBasicBlock::iterator MII = MBB.instr_end(),
- MIE = MBB.instr_begin();
+ for (MachineBasicBlock::const_iterator MII = MBB.instr_end(),
+ MIE = MBB.instr_begin();
MII != MIE; --MII) {
- MachineInstr &MI = *std::prev(MII);
+ const MachineInstr &MI = *std::prev(MII);
if (MI.isDebugInstr() || MI.isPseudoProbe())
continue;
RegisterOperands RegOpers;
@@ -772,6 +1026,19 @@ MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
return It.first->second;
}
+bool MachineSinking::registerPressureSetExceedsLimit(
+ unsigned NRegs, const TargetRegisterClass *RC,
+ const MachineBasicBlock &MBB) {
+ unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight;
+ const int *PS = TRI->getRegClassPressureSets(RC);
+ std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB);
+ for (; *PS != -1; PS++)
+ if (Weight + BBRegisterPressure[*PS] >=
+ TRI->getRegPressureSetLimit(*MBB.getParent(), *PS))
+ return true;
+ return false;
+}
+
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
@@ -816,21 +1083,6 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
if (!MCycle)
return false;
- auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
- unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
- const int *PS = TRI->getRegClassPressureSets(RC);
- // Get register pressure for block SuccToSinkTo.
- std::vector<unsigned> BBRegisterPressure =
- getBBRegisterPressure(*SuccToSinkTo);
- for (; *PS != -1; PS++)
- // check if any register pressure set exceeds limit in block SuccToSinkTo
- // after sinking.
- if (Weight + BBRegisterPressure[*PS] >=
- TRI->getRegPressureSetLimit(*MBB->getParent(), *PS))
- return true;
- return false;
- };
-
// If this instruction is inside a Cycle and sinking this instruction can make
// more registers live range shorten, it is still prifitable.
for (const MachineOperand &MO : MI.operands()) {
@@ -870,7 +1122,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
// The DefMI is defined inside the cycle.
// If sinking this operand makes some register pressure set exceed limit,
// it is not profitable.
- if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
+ if (registerPressureSetExceedsLimit(1, MRI->getRegClass(Reg),
+ *SuccToSinkTo)) {
LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
return false;
}
@@ -915,7 +1168,7 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
- bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
+ bool HasBlockFreq = LHSFreq != 0 || RHSFreq != 0;
return HasBlockFreq ? LHSFreq < RHSFreq
: CI->getCycleDepth(L) < CI->getCycleDepth(R);
});
@@ -1006,24 +1259,19 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
if (MBB == SuccToSinkTo)
return nullptr;
- if (!SuccToSinkTo)
- return nullptr;
-
// It's not safe to sink instructions to EH landing pad. Control flow into
// landing pad is implicitly defined.
- if (SuccToSinkTo->isEHPad())
+ if (SuccToSinkTo && SuccToSinkTo->isEHPad())
return nullptr;
// It ought to be okay to sink instructions into an INLINEASM_BR target, but
// only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
// the source block (which this code does not yet do). So for now, forbid
// doing so.
- if (SuccToSinkTo->isInlineAsmBrIndirectTarget())
+ if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
return nullptr;
- MachineBasicBlock::const_iterator InsertPos =
- SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
- if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI))
+ if (SuccToSinkTo && !TII->isSafeToSink(MI, SuccToSinkTo, CI))
return nullptr;
return SuccToSinkTo;
@@ -1186,11 +1434,11 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
// Does these two blocks pair be queried before and have a definite cached
// result?
- if (HasStoreCache.find(BlockPair) != HasStoreCache.end())
- return HasStoreCache[BlockPair];
+ if (auto It = HasStoreCache.find(BlockPair); It != HasStoreCache.end())
+ return It->second;
- if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end())
- return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) {
+ if (auto It = StoreInstrCache.find(BlockPair); It != StoreInstrCache.end())
+ return llvm::any_of(It->second, [&](MachineInstr *I) {
return I->mayAlias(AA, MI, false);
});
@@ -1385,7 +1633,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
- // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+ // "zombie" define of that preg. E.g., EFLAGS.
for (const MachineOperand &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (Reg == 0 || !Reg.isPhysical())
@@ -1704,10 +1952,9 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
for (auto U : UsedOpsInCopy) {
Register SrcReg = MI->getOperand(U).getReg();
LaneBitmask Mask;
- for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) {
+ for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S)
Mask |= (*S).second;
- }
- SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll());
+ SuccBB->addLiveIn(SrcReg, Mask);
}
SuccBB->sortUniqueLiveIns();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
index 9628e4c5aeb5..1cd90474898e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -14,14 +14,12 @@
#include "llvm/CodeGen/MachineStableHash.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StableHashing.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/ilist_iterator.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -30,7 +28,6 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
-#include "llvm/CodeGen/StableHashing.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/MC/MCSymbol.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 4f66f2e672d1..3e6f36fe936f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -71,7 +71,7 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
Loops = &getAnalysis<MachineLoopInfo>();
SchedModel.init(&ST);
BlockInfo.resize(MF->getNumBlockIDs());
- ProcResourceCycles.resize(MF->getNumBlockIDs() *
+ ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
SchedModel.getNumProcResourceKinds());
return false;
}
@@ -126,7 +126,7 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
PI = SchedModel.getWriteProcResBegin(SC),
PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
- PRCycles[PI->ProcResourceIdx] += PI->Cycles;
+ PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
}
}
FBI->InstrCount = InstrCount;
@@ -134,19 +134,19 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
// Scale the resource cycles so they are comparable.
unsigned PROffset = MBB->getNumber() * PRKinds;
for (unsigned K = 0; K != PRKinds; ++K)
- ProcResourceCycles[PROffset + K] =
+ ProcReleaseAtCycles[PROffset + K] =
PRCycles[K] * SchedModel.getResourceFactor(K);
return FBI;
}
ArrayRef<unsigned>
-MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
+MachineTraceMetrics::getProcReleaseAtCycles(unsigned MBBNum) const {
assert(BlockInfo[MBBNum].hasResources() &&
- "getResources() must be called before getProcResourceCycles()");
+ "getResources() must be called before getProcReleaseAtCycles()");
unsigned PRKinds = SchedModel.getNumProcResourceKinds();
- assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
- return ArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
+ assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
+ return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
}
//===----------------------------------------------------------------------===//
@@ -197,7 +197,7 @@ computeDepthResources(const MachineBasicBlock *MBB) {
// Compute per-resource depths.
ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
- ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
+ ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
for (unsigned K = 0; K != PRKinds; ++K)
ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
}
@@ -212,7 +212,7 @@ computeHeightResources(const MachineBasicBlock *MBB) {
// Compute resources for the current block.
TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
- ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
+ ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
// The trace tail is done.
if (!TBI->Succ) {
@@ -1204,7 +1204,7 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
unsigned PRMax = 0;
ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
if (Bottom) {
- ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
+ ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
for (unsigned K = 0; K != PRDepths.size(); ++K)
PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
} else {
@@ -1248,8 +1248,8 @@ unsigned MachineTraceMetrics::Trace::getResourceLength(
PI != PE; ++PI) {
if (PI->ProcResourceIdx != ResourceIdx)
continue;
- Cycles +=
- (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
+ Cycles += (PI->ReleaseAtCycle *
+ TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
}
}
return Cycles;
@@ -1258,7 +1258,7 @@ unsigned MachineTraceMetrics::Trace::getResourceLength(
for (unsigned K = 0; K != PRDepths.size(); ++K) {
unsigned PRCycles = PRDepths[K] + PRHeights[K];
for (const MachineBasicBlock *MBB : Extrablocks)
- PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
+ PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
PRCycles += extraCycles(ExtraInstrs, K);
PRCycles -= extraCycles(RemoveInstrs, K);
PRMax = std::max(PRMax, PRCycles);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 0e02c50284c6..3e0fe2b1ba08 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -157,7 +157,7 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(
MachineFunction &F, const MachineCycleInfo &cycleInfo,
const MachineDomTree &domTree, bool HasBranchDivergence) {
assert(F.getRegInfo().isSSA() && "Expected to be run on SSA form!");
- MachineUniformityInfo UI(F, domTree, cycleInfo);
+ MachineUniformityInfo UI(domTree, cycleInfo);
if (HasBranchDivergence)
UI.compute();
return UI;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index 7acd3c4039e8..a015d9bbd2d3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -32,6 +32,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeCalc.h"
@@ -91,9 +92,15 @@ namespace {
struct MachineVerifier {
MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {}
+ MachineVerifier(const char *b, LiveVariables *LiveVars,
+ LiveIntervals *LiveInts, LiveStacks *LiveStks,
+ SlotIndexes *Indexes)
+ : Banner(b), LiveVars(LiveVars), LiveInts(LiveInts), LiveStks(LiveStks),
+ Indexes(Indexes) {}
+
unsigned verify(const MachineFunction &MF);
- Pass *const PASS;
+ Pass *const PASS = nullptr;
const char *Banner;
const MachineFunction *MF = nullptr;
const TargetMachine *TM = nullptr;
@@ -223,7 +230,11 @@ namespace {
bool verifyAllRegOpsScalar(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
+
+ bool verifyGIntrinsicSideEffects(const MachineInstr *MI);
+ bool verifyGIntrinsicConvergence(const MachineInstr *MI);
void verifyPreISelGenericInstruction(const MachineInstr *MI);
+
void visitMachineInstrBefore(const MachineInstr *MI);
void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
void visitMachineBundleAfter(const MachineInstr *MI);
@@ -350,6 +361,16 @@ bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors)
return FoundErrors == 0;
}
+bool MachineFunction::verify(LiveIntervals *LiveInts, SlotIndexes *Indexes,
+ const char *Banner, bool AbortOnErrors) const {
+ MachineFunction &MF = const_cast<MachineFunction &>(*this);
+ unsigned FoundErrors =
+ MachineVerifier(Banner, nullptr, LiveInts, nullptr, Indexes).verify(MF);
+ if (AbortOnErrors && FoundErrors)
+ report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors.");
+ return FoundErrors == 0;
+}
+
void MachineVerifier::verifySlotIndexes() const {
if (Indexes == nullptr)
return;
@@ -399,10 +420,6 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
isFunctionTracksDebugUserValues = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::TracksDebugUserValues);
- LiveVars = nullptr;
- LiveInts = nullptr;
- LiveStks = nullptr;
- Indexes = nullptr;
if (PASS) {
LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
// We don't want to verify LiveVariables if LiveIntervals is available.
@@ -871,7 +888,8 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
// There may be implicit ops after the fixed operands.
if (!MO.isImm())
break;
- NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm());
+ const InlineAsm::Flag F(MO.getImm());
+ NumOps = 1 + F.getNumOperandRegisters();
}
if (OpNo > MI->getNumOperands())
@@ -947,7 +965,7 @@ bool MachineVerifier::verifyVectorElementMatch(LLT Ty0, LLT Ty1,
return false;
}
- if (Ty0.isVector() && Ty0.getNumElements() != Ty1.getNumElements()) {
+ if (Ty0.isVector() && Ty0.getElementCount() != Ty1.getElementCount()) {
report("operand types must preserve number of vector elements", MI);
return false;
}
@@ -955,6 +973,55 @@ bool MachineVerifier::verifyVectorElementMatch(LLT Ty0, LLT Ty1,
return true;
}
+bool MachineVerifier::verifyGIntrinsicSideEffects(const MachineInstr *MI) {
+ auto Opcode = MI->getOpcode();
+ bool NoSideEffects = Opcode == TargetOpcode::G_INTRINSIC ||
+ Opcode == TargetOpcode::G_INTRINSIC_CONVERGENT;
+ unsigned IntrID = cast<GIntrinsic>(MI)->getIntrinsicID();
+ if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
+ AttributeList Attrs = Intrinsic::getAttributes(
+ MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
+ bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
+ if (NoSideEffects && DeclHasSideEffects) {
+ report(Twine(TII->getName(Opcode),
+ " used with intrinsic that accesses memory"),
+ MI);
+ return false;
+ }
+ if (!NoSideEffects && !DeclHasSideEffects) {
+ report(Twine(TII->getName(Opcode), " used with readnone intrinsic"), MI);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool MachineVerifier::verifyGIntrinsicConvergence(const MachineInstr *MI) {
+ auto Opcode = MI->getOpcode();
+ bool NotConvergent = Opcode == TargetOpcode::G_INTRINSIC ||
+ Opcode == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
+ unsigned IntrID = cast<GIntrinsic>(MI)->getIntrinsicID();
+ if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
+ AttributeList Attrs = Intrinsic::getAttributes(
+ MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
+ bool DeclIsConvergent = Attrs.hasFnAttr(Attribute::Convergent);
+ if (NotConvergent && DeclIsConvergent) {
+ report(Twine(TII->getName(Opcode), " used with a convergent intrinsic"),
+ MI);
+ return false;
+ }
+ if (!NotConvergent && !DeclIsConvergent) {
+ report(
+ Twine(TII->getName(Opcode), " used with a non-convergent intrinsic"),
+ MI);
+ return false;
+ }
+ }
+
+ return true;
+}
+
void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (isFunctionSelected)
report("Unexpected generic instruction in a Selected function", MI);
@@ -1493,7 +1560,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
case TargetOpcode::G_INTRINSIC:
- case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
// TODO: Should verify number of def and use operands, but the current
// interface requires passing in IR types for mangling.
const MachineOperand &IntrIDOp = MI->getOperand(MI->getNumExplicitDefs());
@@ -1502,21 +1571,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC;
- unsigned IntrID = IntrIDOp.getIntrinsicID();
- if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
- AttributeList Attrs = Intrinsic::getAttributes(
- MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
- bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
- if (NoSideEffects && DeclHasSideEffects) {
- report("G_INTRINSIC used with intrinsic that accesses memory", MI);
- break;
- }
- if (!NoSideEffects && !DeclHasSideEffects) {
- report("G_INTRINSIC_W_SIDE_EFFECTS used with readnone intrinsic", MI);
- break;
- }
- }
+ if (!verifyGIntrinsicSideEffects(MI))
+ break;
+ if (!verifyGIntrinsicConvergence(MI))
+ break;
break;
}
@@ -1534,6 +1592,12 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report("G_SEXT_INREG size must be less than source bit width", MI);
break;
}
+ case TargetOpcode::G_BSWAP: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (DstTy.getScalarSizeInBits() % 16 != 0)
+ report("G_BSWAP size must be a multiple of 16 bits", MI);
+ break;
+ }
case TargetOpcode::G_SHUFFLE_VECTOR: {
const MachineOperand &MaskOp = MI->getOperand(3);
if (!MaskOp.isShuffleMask()) {
@@ -1675,6 +1739,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
case TargetOpcode::G_VECREDUCE_FMUL:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMIN:
+ case TargetOpcode::G_VECREDUCE_FMAXIMUM:
+ case TargetOpcode::G_VECREDUCE_FMINIMUM:
case TargetOpcode::G_VECREDUCE_ADD:
case TargetOpcode::G_VECREDUCE_MUL:
case TargetOpcode::G_VECREDUCE_AND:
@@ -1746,6 +1812,29 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_PREFETCH: {
+ const MachineOperand &AddrOp = MI->getOperand(0);
+ if (!AddrOp.isReg() || !MRI->getType(AddrOp.getReg()).isPointer()) {
+ report("addr operand must be a pointer", &AddrOp, 0);
+ break;
+ }
+ const MachineOperand &RWOp = MI->getOperand(1);
+ if (!RWOp.isImm() || (uint64_t)RWOp.getImm() >= 2) {
+ report("rw operand must be an immediate 0-1", &RWOp, 1);
+ break;
+ }
+ const MachineOperand &LocalityOp = MI->getOperand(2);
+ if (!LocalityOp.isImm() || (uint64_t)LocalityOp.getImm() >= 4) {
+ report("locality operand must be an immediate 0-3", &LocalityOp, 2);
+ break;
+ }
+ const MachineOperand &CacheTypeOp = MI->getOperand(3);
+ if (!CacheTypeOp.isImm() || (uint64_t)CacheTypeOp.getImm() >= 2) {
+ report("cache type operand must be an immediate 0-1", &CacheTypeOp, 3);
+ break;
+ }
+ break;
+ }
case TargetOpcode::G_ASSERT_ALIGN: {
if (MI->getOperand(2).getImm() < 1)
report("alignment immediate must be >= 1", MI);
@@ -1771,6 +1860,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
<< MI->getNumOperands() << " given.\n";
}
+ if (MI->getFlag(MachineInstr::NoConvergent) && !MCID.isConvergent())
+ report("NoConvergent flag expected only on convergent instructions.", MI);
+
if (MI->isPHI()) {
if (MF->getProperties().hasProperty(
MachineFunctionProperties::Property::NoPHIs))
@@ -1868,8 +1960,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// If we have only one valid type, this is likely a copy between a virtual
// and physical register.
- unsigned SrcSize = 0;
- unsigned DstSize = 0;
+ TypeSize SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI);
+ TypeSize DstSize = TRI->getRegSizeInBits(DstReg, *MRI);
if (SrcReg.isPhysical() && DstTy.isValid()) {
const TargetRegisterClass *SrcRC =
TRI->getMinimalPhysRegClassLLT(SrcReg, DstTy);
@@ -1877,9 +1969,6 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
SrcSize = TRI->getRegSizeInBits(*SrcRC);
}
- if (SrcSize == 0)
- SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI);
-
if (DstReg.isPhysical() && SrcTy.isValid()) {
const TargetRegisterClass *DstRC =
TRI->getMinimalPhysRegClassLLT(DstReg, SrcTy);
@@ -1887,10 +1976,21 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
DstSize = TRI->getRegSizeInBits(*DstRC);
}
- if (DstSize == 0)
- DstSize = TRI->getRegSizeInBits(DstReg, *MRI);
+ // The next two checks allow COPY between physical and virtual registers,
+ // when the virtual register has a scalable size and the physical register
+ // has a fixed size. These checks allow COPY between *potentialy* mismatched
+ // sizes. However, once RegisterBankSelection occurs, MachineVerifier should
+ // be able to resolve a fixed size for the scalable vector, and at that
+ // point this function will know for sure whether the sizes are mismatched
+ // and correctly report a size mismatch.
+ if (SrcReg.isPhysical() && DstReg.isVirtual() && DstSize.isScalable() &&
+ !SrcSize.isScalable())
+ break;
+ if (SrcReg.isVirtual() && DstReg.isPhysical() && SrcSize.isScalable() &&
+ !DstSize.isScalable())
+ break;
- if (SrcSize != 0 && DstSize != 0 && SrcSize != DstSize) {
+ if (SrcSize.isNonZero() && DstSize.isNonZero() && SrcSize != DstSize) {
if (!DstOp.getSubReg() && !SrcOp.getSubReg()) {
report("Copy Instruction is illegal with mismatching sizes", MI);
errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize
@@ -2049,9 +2149,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
} else if (MO->isReg() && MO->isTied())
report("Explicit operand should not be tied", MO, MONum);
- } else {
+ } else if (!MI->isVariadic()) {
// ARM adds %reg0 operands to indicate predicates. We'll allow that.
- if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
+ if (!MO->isValidExcessOperand())
report("Extra explicit operand on non-variadic instruction", MO, MONum);
}
@@ -2100,9 +2200,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Verify two-address constraints after the twoaddressinstruction pass.
// Both twoaddressinstruction pass and phi-node-elimination pass call
- // MRI->leaveSSA() to set MF as NoSSA, we should do the verification after
- // twoaddressinstruction pass not after phi-node-elimination pass. So we
- // shouldn't use the NoSSA as the condition, we should based on
+ // MRI->leaveSSA() to set MF as not IsSSA, we should do the verification
+ // after twoaddressinstruction pass not after phi-node-elimination pass. So
+ // we shouldn't use the IsSSA as the condition, we should based on
// TiedOpsRewritten property to verify two-address constraints, this
// property will be set in twoaddressinstruction pass.
unsigned DefIdx;
@@ -2185,7 +2285,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
// Make sure the register fits into its register bank if any.
- if (RegBank && Ty.isValid() &&
+ if (RegBank && Ty.isValid() && !Ty.isScalableVector() &&
RBI->getMaximumSize(RegBank->getID()) < Ty.getSizeInBits()) {
report("Register bank is too small for virtual register", MO,
MONum);
@@ -2328,10 +2428,12 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
const LiveRange &LR,
Register VRegOrUnit,
LaneBitmask LaneMask) {
+ const MachineInstr *MI = MO->getParent();
LiveQueryResult LRQ = LR.Query(UseIdx);
+ bool HasValue = LRQ.valueIn() || (MI->isPHI() && LRQ.valueOut());
// Check if we have a segment at the use, note however that we only need one
// live subregister range, the others may be dead.
- if (!LRQ.valueIn() && LaneMask.none()) {
+ if (!HasValue && LaneMask.none()) {
report("No live segment at use", MO, MONum);
report_context_liverange(LR);
report_context_vreg_regunit(VRegOrUnit);
@@ -2437,7 +2539,14 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Check LiveInts liveness and kill.
if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
- SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
+ SlotIndex UseIdx;
+ if (MI->isPHI()) {
+ // PHI use occurs on the edge, so check for live out here instead.
+ UseIdx = LiveInts->getMBBEndIdx(
+ MI->getOperand(MONum + 1).getMBB()).getPrevSlot();
+ } else {
+ UseIdx = LiveInts->getInstructionIndex(*MI);
+ }
// Check the cached regunit intervals.
if (Reg.isPhysical() && !isReserved(Reg)) {
for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
@@ -2462,7 +2571,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
continue;
checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
LiveQueryResult LRQ = SR.Query(UseIdx);
- if (LRQ.valueIn())
+ if (LRQ.valueIn() || (MI->isPHI() && LRQ.valueOut()))
LiveInMask |= SR.LaneMask;
}
// At least parts of the register has to be live at the use.
@@ -2471,6 +2580,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
report_context(*LI);
report_context(UseIdx);
}
+ // For PHIs all lanes should be live
+ if (MI->isPHI() && LiveInMask != MOMask) {
+ report("Not all lanes of PHI source live at use", MO, MONum);
+ report_context(*LI);
+ report_context(UseIdx);
+ }
}
}
}
@@ -3306,26 +3421,28 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
assert(Reg.isVirtual());
verifyLiveRange(LI, Reg);
- LaneBitmask Mask;
- LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((Mask & SR.LaneMask).any()) {
- report("Lane masks of sub ranges overlap in live interval", MF);
- report_context(LI);
- }
- if ((SR.LaneMask & ~MaxMask).any()) {
- report("Subrange lanemask is invalid", MF);
- report_context(LI);
- }
- if (SR.empty()) {
- report("Subrange must not be empty", MF);
- report_context(SR, LI.reg(), SR.LaneMask);
- }
- Mask |= SR.LaneMask;
- verifyLiveRange(SR, LI.reg(), SR.LaneMask);
- if (!LI.covers(SR)) {
- report("A Subrange is not covered by the main range", MF);
- report_context(LI);
+ if (LI.hasSubRanges()) {
+ LaneBitmask Mask;
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((Mask & SR.LaneMask).any()) {
+ report("Lane masks of sub ranges overlap in live interval", MF);
+ report_context(LI);
+ }
+ if ((SR.LaneMask & ~MaxMask).any()) {
+ report("Subrange lanemask is invalid", MF);
+ report_context(LI);
+ }
+ if (SR.empty()) {
+ report("Subrange must not be empty", MF);
+ report_context(SR, LI.reg(), SR.LaneMask);
+ }
+ Mask |= SR.LaneMask;
+ verifyLiveRange(SR, LI.reg(), SR.LaneMask);
+ if (!LI.covers(SR)) {
+ report("A Subrange is not covered by the main range", MF);
+ report_context(LI);
+ }
}
}
@@ -3398,6 +3515,15 @@ void MachineVerifier::verifyStackFrame() {
BBState.ExitIsSetup = BBState.EntryIsSetup;
}
+ if ((int)MBB->getCallFrameSize() != -BBState.EntryValue) {
+ report("Call frame size on entry does not match value computed from "
+ "predecessor",
+ MBB);
+ errs() << "Call frame size on entry " << MBB->getCallFrameSize()
+ << " does not match value computed from predecessor "
+ << -BBState.EntryValue << '\n';
+ }
+
// Update stack state by checking contents of MBB.
for (const auto &I : *MBB) {
if (I.getOpcode() == FrameSetupOpcode) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
index fa5df68b8abc..aff4d95781f4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
@@ -137,19 +137,34 @@ namespace {
/// Post-process the DAG to create cluster edges between instrs that may
/// be fused by the processor into a single operation.
class MacroFusion : public ScheduleDAGMutation {
- ShouldSchedulePredTy shouldScheduleAdjacent;
+ std::vector<MacroFusionPredTy> Predicates;
bool FuseBlock;
bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU);
public:
- MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
- : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {}
+ MacroFusion(ArrayRef<MacroFusionPredTy> Predicates, bool FuseBlock)
+ : Predicates(Predicates.begin(), Predicates.end()), FuseBlock(FuseBlock) {
+ }
void apply(ScheduleDAGInstrs *DAGInstrs) override;
+
+ bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &STI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI);
};
} // end anonymous namespace
+bool MacroFusion::shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &STI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ return llvm::any_of(Predicates, [&](MacroFusionPredTy Predicate) {
+ return Predicate(TII, STI, FirstMI, SecondMI);
+ });
+}
+
void MacroFusion::apply(ScheduleDAGInstrs *DAG) {
if (FuseBlock)
// For each of the SUnits in the scheduling block, try to fuse the instr in
@@ -197,17 +212,15 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
}
std::unique_ptr<ScheduleDAGMutation>
-llvm::createMacroFusionDAGMutation(
- ShouldSchedulePredTy shouldScheduleAdjacent) {
- if(EnableMacroFusion)
- return std::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
+llvm::createMacroFusionDAGMutation(ArrayRef<MacroFusionPredTy> Predicates) {
+ if (EnableMacroFusion)
+ return std::make_unique<MacroFusion>(Predicates, true);
return nullptr;
}
-std::unique_ptr<ScheduleDAGMutation>
-llvm::createBranchMacroFusionDAGMutation(
- ShouldSchedulePredTy shouldScheduleAdjacent) {
- if(EnableMacroFusion)
- return std::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
+std::unique_ptr<ScheduleDAGMutation> llvm::createBranchMacroFusionDAGMutation(
+ ArrayRef<MacroFusionPredTy> Predicates) {
+ if (EnableMacroFusion)
+ return std::make_unique<MacroFusion>(Predicates, false);
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
index dbb9a9ffdf60..18f8c001bd78 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -330,9 +330,6 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (IncomingReg) {
LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
- // Increment use count of the newly created virtual register.
- LV->setPHIJoin(IncomingReg);
-
MachineInstr *OldKill = nullptr;
bool IsPHICopyAfterOldKill = false;
@@ -392,7 +389,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (IncomingReg) {
// Add the region from the beginning of MBB to the copy instruction to
// IncomingReg's live interval.
- LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg);
+ LiveInterval &IncomingLI = LIS->getOrCreateEmptyInterval(IncomingReg);
VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
if (!IncomingVNI)
IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
@@ -403,24 +400,47 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
LiveInterval &DestLI = LIS->getInterval(DestReg);
- assert(!DestLI.empty() && "PHIs should have nonempty LiveIntervals.");
- if (DestLI.endIndex().isDead()) {
- // A dead PHI's live range begins and ends at the start of the MBB, but
- // the lowered copy, which will still be dead, needs to begin and end at
- // the copy instruction.
- VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex);
- assert(OrigDestVNI && "PHI destination should be live at block entry.");
- DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot());
- DestLI.createDeadDef(DestCopyIndex.getRegSlot(),
- LIS->getVNInfoAllocator());
- DestLI.removeValNo(OrigDestVNI);
- } else {
- // Otherwise, remove the region from the beginning of MBB to the copy
- // instruction from DestReg's live interval.
- DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot());
- VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
+ assert(!DestLI.empty() && "PHIs should have non-empty LiveIntervals.");
+
+ SlotIndex NewStart = DestCopyIndex.getRegSlot();
+
+ SmallVector<LiveRange *> ToUpdate({&DestLI});
+ for (auto &SR : DestLI.subranges())
+ ToUpdate.push_back(&SR);
+
+ for (auto LR : ToUpdate) {
+ auto DestSegment = LR->find(MBBStartIndex);
+ assert(DestSegment != LR->end() &&
+ "PHI destination must be live in block");
+
+ if (LR->endIndex().isDead()) {
+ // A dead PHI's live range begins and ends at the start of the MBB, but
+ // the lowered copy, which will still be dead, needs to begin and end at
+ // the copy instruction.
+ VNInfo *OrigDestVNI = LR->getVNInfoAt(DestSegment->start);
+ assert(OrigDestVNI && "PHI destination should be live at block entry.");
+ LR->removeSegment(DestSegment->start, DestSegment->start.getDeadSlot());
+ LR->createDeadDef(NewStart, LIS->getVNInfoAllocator());
+ LR->removeValNo(OrigDestVNI);
+ continue;
+ }
+
+ // Destination copies are not inserted in the same order as the PHI nodes
+ // they replace. Hence the start of the live range may need to be adjusted
+ // to match the actual slot index of the copy.
+ if (DestSegment->start > NewStart) {
+ VNInfo *VNI = LR->getVNInfoAt(DestSegment->start);
+ assert(VNI && "value should be defined for known segment");
+ LR->addSegment(
+ LiveInterval::Segment(NewStart, DestSegment->start, VNI));
+ } else if (DestSegment->start < NewStart) {
+ assert(DestSegment->start >= MBBStartIndex);
+ assert(DestSegment->end >= DestCopyIndex.getRegSlot());
+ LR->removeSegment(DestSegment->start, NewStart);
+ }
+ VNInfo *DestVNI = LR->getVNInfoAt(NewStart);
assert(DestVNI && "PHI destination should be live at its definition.");
- DestVNI->def = DestCopyIndex.getRegSlot();
+ DestVNI->def = NewStart;
}
}
@@ -615,6 +635,10 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
SlotIndex LastUseIndex = LIS->getInstructionIndex(*KillInst);
SrcLI.removeSegment(LastUseIndex.getRegSlot(),
LIS->getMBBEndIdx(&opBlock));
+ for (auto &SR : SrcLI.subranges()) {
+ SR.removeSegment(LastUseIndex.getRegSlot(),
+ LIS->getMBBEndIdx(&opBlock));
+ }
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index a08cc78f11b1..76b3b16af16b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -149,7 +149,8 @@ namespace {
class ValueTrackerResult;
class RecurrenceInstr;
- class PeepholeOptimizer : public MachineFunctionPass {
+ class PeepholeOptimizer : public MachineFunctionPass,
+ private MachineFunction::Delegate {
const TargetInstrInfo *TII = nullptr;
const TargetRegisterInfo *TRI = nullptr;
MachineRegisterInfo *MRI = nullptr;
@@ -202,7 +203,8 @@ namespace {
bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
DenseMap<Register, MachineInstr *> &ImmDefMIs);
bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
- DenseMap<Register, MachineInstr *> &ImmDefMIs);
+ DenseMap<Register, MachineInstr *> &ImmDefMIs,
+ bool &Deleted);
/// Finds recurrence cycles, but only ones that formulated around
/// a def operand and a use operand that are tied. If there is a use
@@ -214,11 +216,10 @@ namespace {
/// If copy instruction \p MI is a virtual register copy or a copy of a
/// constant physical register to a virtual register, track it in the
- /// set \p CopyMIs. If this virtual register was previously seen as a
+ /// set CopySrcMIs. If this virtual register was previously seen as a
/// copy, replace the uses of this copy with the previously seen copy's
/// destination register.
- bool foldRedundantCopy(MachineInstr &MI,
- DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs);
+ bool foldRedundantCopy(MachineInstr &MI);
/// Is the register \p Reg a non-allocatable physical register?
bool isNAPhysCopy(Register Reg);
@@ -255,6 +256,49 @@ namespace {
MachineInstr &rewriteSource(MachineInstr &CopyLike,
RegSubRegPair Def, RewriteMapTy &RewriteMap);
+
+ // Set of copies to virtual registers keyed by source register. Never
+ // holds any physreg which requires def tracking.
+ DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
+
+ // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs.
+ void MF_HandleInsertion(MachineInstr &MI) override {
+ return;
+ }
+
+ bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) {
+ if (!MI.isCopy())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ unsigned SrcSubReg = MI.getOperand(1).getSubReg();
+ if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
+ return false;
+
+ SrcPair = RegSubRegPair(SrcReg, SrcSubReg);
+ return true;
+ }
+
+ // If a COPY instruction is to be deleted or changed, we should also remove
+ // it from CopySrcMIs.
+ void deleteChangedCopy(MachineInstr &MI) {
+ RegSubRegPair SrcPair;
+ if (!getCopySrc(MI, SrcPair))
+ return;
+
+ auto It = CopySrcMIs.find(SrcPair);
+ if (It != CopySrcMIs.end() && It->second == &MI)
+ CopySrcMIs.erase(It);
+ }
+
+ void MF_HandleRemoval(MachineInstr &MI) override {
+ deleteChangedCopy(MI);
+ }
+
+ void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override
+ {
+ deleteChangedCopy(MI);
+ }
};
/// Helper class to hold instructions that are inside recurrence cycles.
@@ -1351,18 +1395,19 @@ bool PeepholeOptimizer::isMoveImmediate(
MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
DenseMap<Register, MachineInstr *> &ImmDefMIs) {
const MCInstrDesc &MCID = MI.getDesc();
- if (!MI.isMoveImmediate())
- return false;
- if (MCID.getNumDefs() != 1)
+ if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg())
return false;
Register Reg = MI.getOperand(0).getReg();
- if (Reg.isVirtual()) {
- ImmDefMIs.insert(std::make_pair(Reg, &MI));
- ImmDefRegs.insert(Reg);
- return true;
- }
+ if (!Reg.isVirtual())
+ return false;
- return false;
+ int64_t ImmVal;
+ if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal))
+ return false;
+
+ ImmDefMIs.insert(std::make_pair(Reg, &MI));
+ ImmDefRegs.insert(Reg);
+ return true;
}
/// Try folding register operands that are defined by move immediate
@@ -1370,7 +1415,8 @@ bool PeepholeOptimizer::isMoveImmediate(
/// and only if the def and use are in the same BB.
bool PeepholeOptimizer::foldImmediate(
MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
- DenseMap<Register, MachineInstr *> &ImmDefMIs) {
+ DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) {
+ Deleted = false;
for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isDef())
@@ -1384,6 +1430,19 @@ bool PeepholeOptimizer::foldImmediate(
assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) {
++NumImmFold;
+ // FoldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
+ // is not deleted, and we happened to get a same MI, we can delete MI and
+ // replace its users.
+ if (MRI->getVRegDef(Reg) &&
+ MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ if (DstReg.isVirtual() &&
+ MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) {
+ MRI->replaceRegWith(DstReg, Reg);
+ MI.eraseFromParent();
+ Deleted = true;
+ }
+ }
return true;
}
}
@@ -1404,29 +1463,25 @@ bool PeepholeOptimizer::foldImmediate(
// %2 = COPY %0:sub1
//
// Should replace %2 uses with %1:sub1
-bool PeepholeOptimizer::foldRedundantCopy(
- MachineInstr &MI, DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs) {
+bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI) {
assert(MI.isCopy() && "expected a COPY machine instruction");
- Register SrcReg = MI.getOperand(1).getReg();
- unsigned SrcSubReg = MI.getOperand(1).getSubReg();
- if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
+ RegSubRegPair SrcPair;
+ if (!getCopySrc(MI, SrcPair))
return false;
Register DstReg = MI.getOperand(0).getReg();
if (!DstReg.isVirtual())
return false;
- RegSubRegPair SrcPair(SrcReg, SrcSubReg);
-
- if (CopyMIs.insert(std::make_pair(SrcPair, &MI)).second) {
+ if (CopySrcMIs.insert(std::make_pair(SrcPair, &MI)).second) {
// First copy of this reg seen.
return false;
}
- MachineInstr *PrevCopy = CopyMIs.find(SrcPair)->second;
+ MachineInstr *PrevCopy = CopySrcMIs.find(SrcPair)->second;
- assert(SrcSubReg == PrevCopy->getOperand(1).getSubReg() &&
+ assert(SrcPair.SubReg == PrevCopy->getOperand(1).getSubReg() &&
"Unexpected mismatching subreg!");
Register PrevDstReg = PrevCopy->getOperand(0).getReg();
@@ -1617,6 +1672,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
MLI = &getAnalysis<MachineLoopInfo>();
+ MF.setDelegate(this);
bool Changed = false;
@@ -1641,9 +1697,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// without any intervening re-definition of $physreg.
DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
- // Set of copies to virtual registers keyed by source register. Never
- // holds any physreg which requires def tracking.
- DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
+ CopySrcMIs.clear();
bool IsLoopHeader = MLI->isLoopHeader(&MBB);
@@ -1732,7 +1786,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs) ||
+ if (MI->isCopy() && (foldRedundantCopy(*MI) ||
foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
LocalMIs.erase(MI);
LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
@@ -1750,8 +1804,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// next iteration sees the new instructions.
MII = MI;
++MII;
- if (SeenMoveImm)
- Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs);
+ if (SeenMoveImm) {
+ bool Deleted;
+ Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted);
+ if (Deleted) {
+ LocalMIs.erase(MI);
+ continue;
+ }
+ }
}
// Check whether MI is a load candidate for folding into a later
@@ -1815,6 +1875,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
}
}
+ MF.resetDelegate(this);
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 170008ab67cb..ffd70a29f171 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -101,7 +101,7 @@ namespace {
private:
bool enablePostRAScheduler(
- const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
+ const TargetSubtargetInfo &ST, CodeGenOptLevel OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
};
@@ -260,8 +260,7 @@ LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
#endif
bool PostRAScheduler::enablePostRAScheduler(
- const TargetSubtargetInfo &ST,
- CodeGenOpt::Level OptLevel,
+ const TargetSubtargetInfo &ST, CodeGenOptLevel OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
Mode = ST.getAntiDepBreakMode();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 5b822b5d7b95..931830677970 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -72,7 +72,6 @@ static bool lowerLoadRelative(Function &F) {
bool Changed = false;
Type *Int32Ty = Type::getInt32Ty(F.getContext());
- Type *Int32PtrTy = Int32Ty->getPointerTo();
Type *Int8Ty = Type::getInt8Ty(F.getContext());
for (Use &U : llvm::make_early_inc_range(F.uses())) {
@@ -83,8 +82,7 @@ static bool lowerLoadRelative(Function &F) {
IRBuilder<> B(CI);
Value *OffsetPtr =
B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
- Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
- Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, Align(4));
+ Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtr, Align(4));
Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
@@ -164,6 +162,16 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
CallInst::TailCallKind TCK = CI->getTailCallKind();
NewCI->setTailCallKind(std::max(TCK, OverridingTCK));
+ // Transfer the 'returned' attribute from the intrinsic to the call site.
+ // By applying this only to intrinsic call sites, we avoid applying it to
+ // non-ARC explicit calls to things like objc_retain which have not been
+ // auto-upgraded to use the intrinsics.
+ unsigned Index;
+ if (F.getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
+ Index)
+ NewCI->addParamAttr(Index - AttributeList::FirstArgIndex,
+ Attribute::Returned);
+
if (!CI->use_empty())
CI->replaceAllUsesWith(NewCI);
CI->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 49047719fdaa..8af17e63e25c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -134,8 +133,8 @@ private:
bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
unsigned OpIdx, int SPAdj = 0);
// Does same as replaceFrameIndices but using the backward MIR walk and
- // backward register scavenger walk. Does not yet support call sequence
- // processing.
+ // backward register scavenger walk.
+ void replaceFrameIndicesBackward(MachineFunction &MF);
void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF,
int &SPAdj);
@@ -272,8 +271,17 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
// Replace all MO_FrameIndex operands with physical register references
// and actual offsets.
- //
- replaceFrameIndices(MF);
+ if (TFI->needsFrameIndexResolution(MF)) {
+ // Allow the target to determine this after knowing the frame size.
+ FrameIndexEliminationScavenging =
+ (RS && !FrameIndexVirtualScavenging) ||
+ TRI->requiresFrameIndexReplacementScavenging(MF);
+
+ if (TRI->eliminateFrameIndicesBackwards())
+ replaceFrameIndicesBackward(MF);
+ else
+ replaceFrameIndices(MF);
+ }
// If register scavenging is needed, as we've enabled doing it as a
// post-pass, scavenge the virtual registers that frame index elimination
@@ -285,7 +293,7 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo &MFI = MF.getFrameInfo();
uint64_t StackSize = MFI.getStackSize();
- unsigned Threshold = UINT_MAX;
+ uint64_t Threshold = TFI->getStackThreshold();
if (MF.getFunction().hasFnAttribute("warn-stack-size")) {
bool Failed = MF.getFunction()
.getFnAttribute("warn-stack-size")
@@ -329,7 +337,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize",
MF.getFunction().getSubprogram(),
&MF.front())
- << ore::NV("NumStackBytes", StackSize) << " stack bytes in function";
+ << ore::NV("NumStackBytes", StackSize)
+ << " stack bytes in function '"
+ << ore::NV("Function", MF.getFunction().getName()) << "'";
});
delete RS;
@@ -381,13 +391,18 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) {
MFI.setAdjustsStack(AdjustsStack);
MFI.setMaxCallFrameSize(MaxCallFrameSize);
- for (MachineBasicBlock::iterator I : FrameSDOps) {
+ if (TFI->canSimplifyCallFramePseudos(MF)) {
// If call frames are not being included as part of the stack frame, and
// the target doesn't indicate otherwise, remove the call frame pseudos
// here. The sub/add sp instruction pairs are still inserted, but we don't
// need to track the SP adjustment for frame index elimination.
- if (TFI->canSimplifyCallFramePseudos(MF))
+ for (MachineBasicBlock::iterator I : FrameSDOps)
TFI->eliminateCallFramePseudoInstr(MF, *I->getParent(), I);
+
+ // We can't track the call frame size after call frame pseudos have been
+ // eliminated. Set it to zero everywhere to keep MachineVerifier happy.
+ for (MachineBasicBlock &MBB : MF)
+ MBB.setCallFrameSize(0);
}
}
@@ -1070,7 +1085,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
MaxAlign);
// Give the targets a chance to order the objects the way they like it.
- if (MF.getTarget().getOptLevel() != CodeGenOpt::None &&
+ if (MF.getTarget().getOptLevel() != CodeGenOptLevel::None &&
MF.getTarget().Options.StackSymbolOrdering)
TFI.orderFrameObjects(MF, ObjectsToAllocate);
@@ -1080,7 +1095,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// optimizing.
BitVector StackBytesFree;
if (!ObjectsToAllocate.empty() &&
- MF.getTarget().getOptLevel() != CodeGenOpt::None &&
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None &&
MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(MF))
computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
FixedCSEnd, StackBytesFree);
@@ -1332,48 +1347,49 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
TFI.emitZeroCallUsedRegs(RegsToZero, MBB);
}
+/// Replace all FrameIndex operands with physical register references and actual
+/// offsets.
+void PEI::replaceFrameIndicesBackward(MachineFunction &MF) {
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+
+ for (auto &MBB : MF) {
+ int SPAdj = 0;
+ if (!MBB.succ_empty()) {
+ // Get the SP adjustment for the end of MBB from the start of any of its
+ // successors. They should all be the same.
+ assert(all_of(MBB.successors(), [&MBB](const MachineBasicBlock *Succ) {
+ return Succ->getCallFrameSize() ==
+ (*MBB.succ_begin())->getCallFrameSize();
+ }));
+ const MachineBasicBlock &FirstSucc = **MBB.succ_begin();
+ SPAdj = TFI.alignSPAdjust(FirstSucc.getCallFrameSize());
+ if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+ SPAdj = -SPAdj;
+ }
+
+ replaceFrameIndicesBackward(&MBB, MF, SPAdj);
+
+ // We can't track the call frame size after call frame pseudos have been
+ // eliminated. Set it to zero everywhere to keep MachineVerifier happy.
+ MBB.setCallFrameSize(0);
+ }
+}
+
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
/// register references and actual offsets.
void PEI::replaceFrameIndices(MachineFunction &MF) {
- const auto &ST = MF.getSubtarget();
- const TargetFrameLowering &TFI = *ST.getFrameLowering();
- if (!TFI.needsFrameIndexResolution(MF))
- return;
-
- const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
- // Allow the target to determine this after knowing the frame size.
- FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
- TRI->requiresFrameIndexReplacementScavenging(MF);
+ for (auto &MBB : MF) {
+ int SPAdj = TFI.alignSPAdjust(MBB.getCallFrameSize());
+ if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+ SPAdj = -SPAdj;
- // Store SPAdj at exit of a basic block.
- SmallVector<int, 8> SPState;
- SPState.resize(MF.getNumBlockIDs());
- df_iterator_default_set<MachineBasicBlock*> Reachable;
+ replaceFrameIndices(&MBB, MF, SPAdj);
- // Iterate over the reachable blocks in DFS order.
- for (auto DFI = df_ext_begin(&MF, Reachable), DFE = df_ext_end(&MF, Reachable);
- DFI != DFE; ++DFI) {
- int SPAdj = 0;
- // Check the exit state of the DFS stack predecessor.
- if (DFI.getPathLength() >= 2) {
- MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
- assert(Reachable.count(StackPred) &&
- "DFS stack predecessor is already visited.\n");
- SPAdj = SPState[StackPred->getNumber()];
- }
- MachineBasicBlock *BB = *DFI;
- replaceFrameIndices(BB, MF, SPAdj);
- SPState[BB->getNumber()] = SPAdj;
- }
-
- // Handle the unreachable blocks.
- for (auto &BB : MF) {
- if (Reachable.count(&BB))
- // Already handled in DFS traversal.
- continue;
- int SPAdj = 0;
- replaceFrameIndices(&BB, MF, SPAdj);
+ // We can't track the call frame size after call frame pseudos have been
+ // eliminated. Set it to zero everywhere to keep MachineVerifier happy.
+ MBB.setCallFrameSize(0);
}
}
@@ -1472,37 +1488,35 @@ void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
if (LocalRS)
LocalRS->enterBasicBlockEnd(*BB);
- for (MachineInstr &MI : make_early_inc_range(reverse(*BB))) {
+ for (MachineBasicBlock::iterator I = BB->end(); I != BB->begin();) {
+ MachineInstr &MI = *std::prev(I);
+
if (TII.isFrameInstr(MI)) {
+ SPAdj -= TII.getSPAdjust(MI);
TFI.eliminateCallFramePseudoInstr(MF, *BB, &MI);
continue;
}
// Step backwards to get the liveness state at (immedately after) MI.
if (LocalRS)
- LocalRS->backward(MI);
+ LocalRS->backward(I);
- for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
- if (!MI.getOperand(i).isFI())
+ bool RemovedMI = false;
+ for (const auto &[Idx, Op] : enumerate(MI.operands())) {
+ if (!Op.isFI())
continue;
- if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
+ if (replaceFrameIndexDebugInstr(MF, MI, Idx, SPAdj))
continue;
// Eliminate this FrameIndex operand.
- //
- // Save and restore the scavenger's position around the call to
- // eliminateFrameIndex in case it erases MI and invalidates the iterator.
- MachineBasicBlock::iterator Save;
- if (LocalRS)
- Save = std::next(LocalRS->getCurrentPosition());
- bool Removed = TRI.eliminateFrameIndex(MI, SPAdj, i, RS);
- if (LocalRS)
- LocalRS->skipTo(std::prev(Save));
-
- if (Removed)
+ RemovedMI = TRI.eliminateFrameIndex(MI, SPAdj, Idx, LocalRS);
+ if (RemovedMI)
break;
}
+
+ if (!RemovedMI)
+ --I;
}
}
@@ -1514,12 +1528,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- if (TRI.supportsBackwardScavenger())
- return replaceFrameIndicesBackward(BB, MF, SPAdj);
-
- if (RS && FrameIndexEliminationScavenging)
- RS->enterBasicBlock(*BB);
-
bool InsideCallSequence = false;
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
@@ -1553,8 +1561,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
// If this instruction has a FrameIndex operand, we need to
// use that target machine register info object to eliminate
// it.
- TRI.eliminateFrameIndex(MI, SPAdj, i,
- FrameIndexEliminationScavenging ? RS : nullptr);
+ TRI.eliminateFrameIndex(MI, SPAdj, i);
// Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) {
@@ -1576,10 +1583,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
if (DidFinishLoop && InsideCallSequence)
SPAdj += TII.getSPAdjust(MI);
- if (DoIncr && I != BB->end()) ++I;
-
- // Update register states.
- if (RS && FrameIndexEliminationScavenging && DidFinishLoop)
- RS->forward(MI);
+ if (DoIncr && I != BB->end())
+ ++I;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
index 40c52b9d9707..0e1a2c921c5c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
index 90520c4c3c71..7ce00a66b3ae 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
@@ -61,14 +61,7 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
std::pair<uint32_t, LaneBitmask> P = *I;
UnitInfo &UI = UnitInfos[P.first];
UI.Reg = F;
- if (P.second.any()) {
- UI.Mask = P.second;
- } else {
- if (const TargetRegisterClass *RC = RegInfos[F].RegClass)
- UI.Mask = RC->LaneMask;
- else
- UI.Mask = LaneBitmask::getAll();
- }
+ UI.Mask = P.second;
}
}
}
@@ -141,7 +134,7 @@ std::set<RegisterId> PhysicalRegisterInfo::getUnits(RegisterRef RR) const {
return Units; // Empty
for (MCRegUnitMaskIterator UM(RR.idx(), &TRI); UM.isValid(); ++UM) {
auto [U, M] = *UM;
- if (M.none() || (M & RR.Mask).any())
+ if ((M & RR.Mask).any())
Units.insert(U);
}
return Units;
@@ -200,13 +193,6 @@ bool PhysicalRegisterInfo::equal_to(RegisterRef A, RegisterRef B) const {
auto [AReg, AMask] = *AI;
auto [BReg, BMask] = *BI;
- // Lane masks are "none" for units that don't correspond to subregs
- // e.g. a single unit in a leaf register, or aliased unit.
- if (AMask.none())
- AMask = LaneBitmask::getAll();
- if (BMask.none())
- BMask = LaneBitmask::getAll();
-
// If both iterators point to a unit contained in both A and B, then
// compare the units.
if ((AMask & A.Mask).any() && (BMask & B.Mask).any()) {
@@ -245,13 +231,6 @@ bool PhysicalRegisterInfo::less(RegisterRef A, RegisterRef B) const {
auto [AReg, AMask] = *AI;
auto [BReg, BMask] = *BI;
- // Lane masks are "none" for units that don't correspond to subregs
- // e.g. a single unit in a leaf register, or aliased unit.
- if (AMask.none())
- AMask = LaneBitmask::getAll();
- if (BMask.none())
- BMask = LaneBitmask::getAll();
-
// If both iterators point to a unit contained in both A and B, then
// compare the units.
if ((AMask & A.Mask).any() && (BMask & B.Mask).any()) {
@@ -303,7 +282,7 @@ bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
std::pair<uint32_t, LaneBitmask> P = *U;
- if (P.second.none() || (P.second & RR.Mask).any())
+ if ((P.second & RR.Mask).any())
if (Units.test(P.first))
return true;
}
@@ -318,7 +297,7 @@ bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
std::pair<uint32_t, LaneBitmask> P = *U;
- if (P.second.none() || (P.second & RR.Mask).any())
+ if ((P.second & RR.Mask).any())
if (!Units.test(P.first))
return false;
}
@@ -333,7 +312,7 @@ RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
std::pair<uint32_t, LaneBitmask> P = *U;
- if (P.second.none() || (P.second & RR.Mask).any())
+ if ((P.second & RR.Mask).any())
Units.set(P.first);
}
return *this;
@@ -407,7 +386,7 @@ RegisterRef RegisterAggr::makeRegRef() const {
for (MCRegUnitMaskIterator I(F, &PRI.getTRI()); I.isValid(); ++I) {
std::pair<uint32_t, LaneBitmask> P = *I;
if (Units.test(P.first))
- M |= P.second.none() ? LaneBitmask::getAll() : P.second;
+ M |= P.second;
}
return RegisterRef(F, M);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 75fbc8ba35b1..61a668907be7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -181,7 +181,7 @@ void ReachingDefAnalysis::reprocessBasicBlock(MachineBasicBlock *MBB) {
MBBReachingDefs[MBBNumber][Unit].insert(Start, Def);
}
- // Update reaching def at end of of BB. Keep in mind that these are
+ // Update reaching def at end of BB. Keep in mind that these are
// adjusted relative to the end of the basic block.
if (MBBOutRegsInfos[MBBNumber][Unit] < Def - NumInsts)
MBBOutRegsInfos[MBBNumber][Unit] = Def - NumInsts;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index 81f3d2c8099f..47ad9c168b92 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -83,7 +83,7 @@ private:
bool doInitialization(Module &M) override {
if (NotAsRequested)
M.getContext().emitError("Requested regalloc eviction advisor analysis "
- "could be created. Using default");
+ "could not be created. Using default");
return RegAllocEvictionAdvisorAnalysis::doInitialization(M);
}
const bool NotAsRequested;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index 864beb8720f4..40c42cabf776 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -50,251 +50,248 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
STATISTIC(NumStores, "Number of stores added");
-STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumLoads, "Number of loads added");
STATISTIC(NumCoalesced, "Number of copies coalesced");
// FIXME: Remove this switch when all testcases are fixed!
static cl::opt<bool> IgnoreMissingDefs("rafast-ignore-missing-defs",
cl::Hidden);
-static RegisterRegAlloc
- fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator",
+ createFastRegisterAllocator);
namespace {
- class RegAllocFast : public MachineFunctionPass {
- public:
- static char ID;
+class RegAllocFast : public MachineFunctionPass {
+public:
+ static char ID;
- RegAllocFast(const RegClassFilterFunc F = allocateAllRegClasses,
- bool ClearVirtRegs_ = true) :
- MachineFunctionPass(ID),
- ShouldAllocateClass(F),
- StackSlotForVirtReg(-1),
- ClearVirtRegs(ClearVirtRegs_) {
- }
+ RegAllocFast(const RegClassFilterFunc F = allocateAllRegClasses,
+ bool ClearVirtRegs_ = true)
+ : MachineFunctionPass(ID), ShouldAllocateClass(F),
+ StackSlotForVirtReg(-1), ClearVirtRegs(ClearVirtRegs_) {}
- private:
- MachineFrameInfo *MFI = nullptr;
- MachineRegisterInfo *MRI = nullptr;
- const TargetRegisterInfo *TRI = nullptr;
- const TargetInstrInfo *TII = nullptr;
- RegisterClassInfo RegClassInfo;
- const RegClassFilterFunc ShouldAllocateClass;
+private:
+ MachineFrameInfo *MFI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ RegisterClassInfo RegClassInfo;
+ const RegClassFilterFunc ShouldAllocateClass;
- /// Basic block currently being allocated.
- MachineBasicBlock *MBB = nullptr;
+ /// Basic block currently being allocated.
+ MachineBasicBlock *MBB = nullptr;
- /// Maps virtual regs to the frame index where these values are spilled.
- IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+ /// Maps virtual regs to the frame index where these values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
- bool ClearVirtRegs;
+ bool ClearVirtRegs;
- /// Everything we know about a live virtual register.
- struct LiveReg {
- MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
- Register VirtReg; ///< Virtual register number.
- MCPhysReg PhysReg = 0; ///< Currently held here.
- bool LiveOut = false; ///< Register is possibly live out.
- bool Reloaded = false; ///< Register was reloaded.
- bool Error = false; ///< Could not allocate.
+ /// Everything we know about a live virtual register.
+ struct LiveReg {
+ MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
+ Register VirtReg; ///< Virtual register number.
+ MCPhysReg PhysReg = 0; ///< Currently held here.
+ bool LiveOut = false; ///< Register is possibly live out.
+ bool Reloaded = false; ///< Register was reloaded.
+ bool Error = false; ///< Could not allocate.
- explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
+ explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
- unsigned getSparseSetIndex() const {
- return Register::virtReg2Index(VirtReg);
- }
- };
-
- using LiveRegMap = SparseSet<LiveReg, identity<unsigned>, uint16_t>;
- /// This map contains entries for each virtual register that is currently
- /// available in a physical register.
- LiveRegMap LiveVirtRegs;
-
- /// Stores assigned virtual registers present in the bundle MI.
- DenseMap<Register, MCPhysReg> BundleVirtRegsMap;
-
- DenseMap<unsigned, SmallVector<MachineOperand *, 2>> LiveDbgValueMap;
- /// List of DBG_VALUE that we encountered without the vreg being assigned
- /// because they were placed after the last use of the vreg.
- DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues;
-
- /// Has a bit set for every virtual register for which it was determined
- /// that it is alive across blocks.
- BitVector MayLiveAcrossBlocks;
-
- /// State of a register unit.
- enum RegUnitState {
- /// A free register is not currently in use and can be allocated
- /// immediately without checking aliases.
- regFree,
-
- /// A pre-assigned register has been assigned before register allocation
- /// (e.g., setting up a call parameter).
- regPreAssigned,
-
- /// Used temporarily in reloadAtBegin() to mark register units that are
- /// live-in to the basic block.
- regLiveIn,
-
- /// A register state may also be a virtual register number, indication
- /// that the physical register is currently allocated to a virtual
- /// register. In that case, LiveVirtRegs contains the inverse mapping.
- };
-
- /// Maps each physical register to a RegUnitState enum or virtual register.
- std::vector<unsigned> RegUnitStates;
-
- SmallVector<MachineInstr *, 32> Coalesced;
-
- using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
- /// Set of register units that are used in the current instruction, and so
- /// cannot be allocated.
- RegUnitSet UsedInInstr;
- RegUnitSet PhysRegUses;
- SmallVector<uint16_t, 8> DefOperandIndexes;
- // Register masks attached to the current instruction.
- SmallVector<const uint32_t *> RegMasks;
-
- void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
- bool isPhysRegFree(MCPhysReg PhysReg) const;
-
- /// Mark a physreg as used in this instruction.
- void markRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnit Unit : TRI->regunits(PhysReg))
- UsedInInstr.insert(Unit);
+ unsigned getSparseSetIndex() const {
+ return Register::virtReg2Index(VirtReg);
}
+ };
- // Check if physreg is clobbered by instruction's regmask(s).
- bool isClobberedByRegMasks(MCPhysReg PhysReg) const {
- return llvm::any_of(RegMasks, [PhysReg](const uint32_t *Mask) {
- return MachineOperand::clobbersPhysReg(Mask, PhysReg);
- });
- }
+ using LiveRegMap = SparseSet<LiveReg, identity<unsigned>, uint16_t>;
+ /// This map contains entries for each virtual register that is currently
+ /// available in a physical register.
+ LiveRegMap LiveVirtRegs;
+
+ /// Stores assigned virtual registers present in the bundle MI.
+ DenseMap<Register, MCPhysReg> BundleVirtRegsMap;
+
+ DenseMap<unsigned, SmallVector<MachineOperand *, 2>> LiveDbgValueMap;
+ /// List of DBG_VALUE that we encountered without the vreg being assigned
+ /// because they were placed after the last use of the vreg.
+ DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues;
- /// Check if a physreg or any of its aliases are used in this instruction.
- bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
- if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
+ /// Has a bit set for every virtual register for which it was determined
+ /// that it is alive across blocks.
+ BitVector MayLiveAcrossBlocks;
+
+ /// State of a register unit.
+ enum RegUnitState {
+ /// A free register is not currently in use and can be allocated
+ /// immediately without checking aliases.
+ regFree,
+
+ /// A pre-assigned register has been assigned before register allocation
+ /// (e.g., setting up a call parameter).
+ regPreAssigned,
+
+ /// Used temporarily in reloadAtBegin() to mark register units that are
+ /// live-in to the basic block.
+ regLiveIn,
+
+ /// A register state may also be a virtual register number, indication
+ /// that the physical register is currently allocated to a virtual
+ /// register. In that case, LiveVirtRegs contains the inverse mapping.
+ };
+
+ /// Maps each physical register to a RegUnitState enum or virtual register.
+ std::vector<unsigned> RegUnitStates;
+
+ SmallVector<MachineInstr *, 32> Coalesced;
+
+ using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
+ /// Set of register units that are used in the current instruction, and so
+ /// cannot be allocated.
+ RegUnitSet UsedInInstr;
+ RegUnitSet PhysRegUses;
+ SmallVector<uint16_t, 8> DefOperandIndexes;
+ // Register masks attached to the current instruction.
+ SmallVector<const uint32_t *> RegMasks;
+
+ void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
+ bool isPhysRegFree(MCPhysReg PhysReg) const;
+
+ /// Mark a physreg as used in this instruction.
+ void markRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ UsedInInstr.insert(Unit);
+ }
+
+ // Check if physreg is clobbered by instruction's regmask(s).
+ bool isClobberedByRegMasks(MCPhysReg PhysReg) const {
+ return llvm::any_of(RegMasks, [PhysReg](const uint32_t *Mask) {
+ return MachineOperand::clobbersPhysReg(Mask, PhysReg);
+ });
+ }
+
+ /// Check if a physreg or any of its aliases are used in this instruction.
+ bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
+ if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
+ return true;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (UsedInInstr.count(Unit))
+ return true;
+ if (LookAtPhysRegUses && PhysRegUses.count(Unit))
return true;
- for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
- if (UsedInInstr.count(Unit))
- return true;
- if (LookAtPhysRegUses && PhysRegUses.count(Unit))
- return true;
- }
- return false;
}
+ return false;
+ }
- /// Mark physical register as being used in a register use operand.
- /// This is only used by the special livethrough handling code.
- void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnit Unit : TRI->regunits(PhysReg))
- PhysRegUses.insert(Unit);
- }
+ /// Mark physical register as being used in a register use operand.
+ /// This is only used by the special livethrough handling code.
+ void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ PhysRegUses.insert(Unit);
+ }
- /// Remove mark of physical register being used in the instruction.
- void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnit Unit : TRI->regunits(PhysReg))
- UsedInInstr.erase(Unit);
- }
+ /// Remove mark of physical register being used in the instruction.
+ void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ UsedInInstr.erase(Unit);
+ }
- enum : unsigned {
- spillClean = 50,
- spillDirty = 100,
- spillPrefBonus = 20,
- spillImpossible = ~0u
- };
+ enum : unsigned {
+ spillClean = 50,
+ spillDirty = 100,
+ spillPrefBonus = 20,
+ spillImpossible = ~0u
+ };
- public:
- StringRef getPassName() const override { return "Fast Register Allocator"; }
+public:
+ StringRef getPassName() const override { return "Fast Register Allocator"; }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoPHIs);
- }
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
- MachineFunctionProperties getSetProperties() const override {
- if (ClearVirtRegs) {
- return MachineFunctionProperties().set(
+ MachineFunctionProperties getSetProperties() const override {
+ if (ClearVirtRegs) {
+ return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
- }
-
- return MachineFunctionProperties();
}
- MachineFunctionProperties getClearedProperties() const override {
- return MachineFunctionProperties().set(
+ return MachineFunctionProperties();
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
- }
+ }
- private:
- bool runOnMachineFunction(MachineFunction &MF) override;
+private:
+ bool runOnMachineFunction(MachineFunction &MF) override;
- void allocateBasicBlock(MachineBasicBlock &MBB);
+ void allocateBasicBlock(MachineBasicBlock &MBB);
- void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
- Register Reg) const;
+ void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+ Register Reg) const;
- void findAndSortDefOperandIndexes(const MachineInstr &MI);
+ void findAndSortDefOperandIndexes(const MachineInstr &MI);
- void allocateInstruction(MachineInstr &MI);
- void handleDebugValue(MachineInstr &MI);
- void handleBundle(MachineInstr &MI);
+ void allocateInstruction(MachineInstr &MI);
+ void handleDebugValue(MachineInstr &MI);
+ void handleBundle(MachineInstr &MI);
- bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
- bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
- bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
- void freePhysReg(MCPhysReg PhysReg);
+ bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ void freePhysReg(MCPhysReg PhysReg);
- unsigned calcSpillCost(MCPhysReg PhysReg) const;
+ unsigned calcSpillCost(MCPhysReg PhysReg) const;
- LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
- return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
- }
+ LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
+ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
+ }
- LiveRegMap::const_iterator findLiveVirtReg(Register VirtReg) const {
- return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
- }
+ LiveRegMap::const_iterator findLiveVirtReg(Register VirtReg) const {
+ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
+ }
- void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg);
- void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint,
- bool LookAtPhysRegUses = false);
- void allocVirtRegUndef(MachineOperand &MO);
- void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg,
- MCPhysReg Reg);
- bool defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
- Register VirtReg);
- bool defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
- bool LookAtPhysRegUses = false);
- bool useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
-
- MachineBasicBlock::iterator
- getMBBBeginInsertionPoint(MachineBasicBlock &MBB,
- SmallSet<Register, 2> &PrologLiveIns) const;
-
- void reloadAtBegin(MachineBasicBlock &MBB);
- bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
-
- Register traceCopies(Register VirtReg) const;
- Register traceCopyChain(Register Reg) const;
-
- bool shouldAllocateRegister(const Register Reg) const;
- int getStackSpaceFor(Register VirtReg);
- void spill(MachineBasicBlock::iterator Before, Register VirtReg,
- MCPhysReg AssignedReg, bool Kill, bool LiveOut);
- void reload(MachineBasicBlock::iterator Before, Register VirtReg,
- MCPhysReg PhysReg);
-
- bool mayLiveOut(Register VirtReg);
- bool mayLiveIn(Register VirtReg);
-
- void dumpState() const;
- };
+ void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint,
+ bool LookAtPhysRegUses = false);
+ void allocVirtRegUndef(MachineOperand &MO);
+ void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg,
+ MCPhysReg Reg);
+ bool defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg);
+ bool defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ bool LookAtPhysRegUses = false);
+ bool useVirtReg(MachineInstr &MI, MachineOperand &MO, Register VirtReg);
+
+ MachineBasicBlock::iterator
+ getMBBBeginInsertionPoint(MachineBasicBlock &MBB,
+ SmallSet<Register, 2> &PrologLiveIns) const;
+
+ void reloadAtBegin(MachineBasicBlock &MBB);
+ bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+
+ Register traceCopies(Register VirtReg) const;
+ Register traceCopyChain(Register Reg) const;
+
+ bool shouldAllocateRegister(const Register Reg) const;
+ int getStackSpaceFor(Register VirtReg);
+ void spill(MachineBasicBlock::iterator Before, Register VirtReg,
+ MCPhysReg AssignedReg, bool Kill, bool LiveOut);
+ void reload(MachineBasicBlock::iterator Before, Register VirtReg,
+ MCPhysReg PhysReg);
+
+ bool mayLiveOut(Register VirtReg);
+ bool mayLiveIn(Register VirtReg);
+
+ void dumpState() const;
+};
} // end anonymous namespace
@@ -431,8 +428,8 @@ bool RegAllocFast::mayLiveIn(Register VirtReg) {
/// DBG_VALUEs with \p VirtReg operands with the stack slot.
void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg AssignedReg, bool Kill, bool LiveOut) {
- LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
- << " in " << printReg(AssignedReg, TRI));
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " in "
+ << printReg(AssignedReg, TRI));
int FI = getStackSpaceFor(VirtReg);
LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
@@ -503,9 +500,8 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
/// This is not just MBB.begin() because surprisingly we have EH_LABEL
/// instructions marking the begin of a basic block. This means we must insert
/// new instructions after such labels...
-MachineBasicBlock::iterator
-RegAllocFast::getMBBBeginInsertionPoint(
- MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const {
+MachineBasicBlock::iterator RegAllocFast::getMBBBeginInsertionPoint(
+ MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const {
MachineBasicBlock::iterator I = MBB.begin();
while (I != MBB.end()) {
if (I->isLabel()) {
@@ -542,13 +538,12 @@ void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
setPhysRegState(Reg, regLiveIn);
}
-
SmallSet<Register, 2> PrologLiveIns;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
// of spilling here is deterministic, if arbitrary.
- MachineBasicBlock::iterator InsertBefore
- = getMBBBeginInsertionPoint(MBB, PrologLiveIns);
+ MachineBasicBlock::iterator InsertBefore =
+ getMBBBeginInsertionPoint(MBB, PrologLiveIns);
for (const LiveReg &LR : LiveVirtRegs) {
MCPhysReg PhysReg = LR.PhysReg;
if (PhysReg == 0)
@@ -634,12 +629,12 @@ void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
setPhysRegState(PhysReg, regFree);
return;
default: {
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end());
- LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n');
- setPhysRegState(LRI->PhysReg, regFree);
- LRI->PhysReg = 0;
- }
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end());
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n');
+ setPhysRegState(LRI->PhysReg, regFree);
+ LRI->PhysReg = 0;
+ }
return;
}
}
@@ -673,7 +668,7 @@ void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
if (UDBGValIter == DanglingDbgValues.end())
return;
- SmallVectorImpl<MachineInstr*> &Dangling = UDBGValIter->second;
+ SmallVectorImpl<MachineInstr *> &Dangling = UDBGValIter->second;
for (MachineInstr *DbgValue : Dangling) {
assert(DbgValue->isDebugValue());
if (!DbgValue->hasDebugOperandForReg(VirtReg))
@@ -683,10 +678,11 @@ void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
MCPhysReg SetToReg = Reg;
unsigned Limit = 20;
for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()),
- E = DbgValue->getIterator(); I != E; ++I) {
+ E = DbgValue->getIterator();
+ I != E; ++I) {
if (I->modifiesRegister(Reg, TRI) || --Limit == 0) {
LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
- << '\n');
+ << '\n');
SetToReg = 0;
break;
}
@@ -716,9 +712,7 @@ void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
assignDanglingDebugValues(AtMI, VirtReg, PhysReg);
}
-static bool isCoalescable(const MachineInstr &MI) {
- return MI.isFullCopy();
-}
+static bool isCoalescable(const MachineInstr &MI) { return MI.isFullCopy(); }
Register RegAllocFast::traceCopyChain(Register Reg) const {
static const unsigned ChainLengthLimit = 3;
@@ -757,8 +751,8 @@ Register RegAllocFast::traceCopies(Register VirtReg) const {
}
/// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
- Register Hint0, bool LookAtPhysRegUses) {
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0,
+ bool LookAtPhysRegUses) {
const Register VirtReg = LR.VirtReg;
assert(LR.PhysReg == 0);
@@ -784,7 +778,6 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
Hint0 = Register();
}
-
// Try other hint.
Register Hint1 = traceCopies(VirtReg);
if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
@@ -792,12 +785,12 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
// Take hint if the register is currently free.
if (isPhysRegFree(Hint1)) {
LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
- << '\n');
+ << '\n');
assignVirtToPhysReg(MI, LR, Hint1);
return;
} else {
LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI)
- << " occupied\n");
+ << " occupied\n");
}
} else {
Hint1 = Register();
@@ -891,12 +884,12 @@ bool RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
LRI->PhysReg = 0;
allocVirtReg(MI, *LRI, 0, true);
MachineBasicBlock::iterator InsertBefore =
- std::next((MachineBasicBlock::iterator)MI.getIterator());
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to "
<< printReg(PrevReg, TRI) << '\n');
BuildMI(*MBB, InsertBefore, MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), PrevReg)
- .addReg(LRI->PhysReg, llvm::RegState::Kill);
+ .addReg(LRI->PhysReg, llvm::RegState::Kill);
}
MachineOperand &MO = MI.getOperand(OpNum);
if (MO.getSubReg() && !MO.isUndef()) {
@@ -932,9 +925,18 @@ bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
}
}
}
- if (LRI->PhysReg == 0)
+ if (LRI->PhysReg == 0) {
allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses);
- else {
+ // If no physical register is available for LRI, we assign one at random
+ // and bail out of this function immediately.
+ if (LRI->Error) {
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ if (AllocationOrder.empty())
+ return setPhysReg(MI, MO, MCRegister::NoRegister);
+ return setPhysReg(MI, MO, *AllocationOrder.begin());
+ }
+ } else {
assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) &&
"TODO: preassign mismatch");
LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI)
@@ -943,13 +945,12 @@ bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
}
MCPhysReg PhysReg = LRI->PhysReg;
- assert(PhysReg != 0 && "Register not assigned");
if (LRI->Reloaded || LRI->LiveOut) {
if (!MI.isImplicitDef()) {
MachineBasicBlock::iterator SpillBefore =
std::next((MachineBasicBlock::iterator)MI.getIterator());
- LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: "
- << LRI->Reloaded << '\n');
+ LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut
+ << " RL: " << LRI->Reloaded << '\n');
bool Kill = LRI->LastUse == nullptr;
spill(SpillBefore, VirtReg, PhysReg, Kill, LRI->LiveOut);
@@ -961,8 +962,8 @@ bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
for (MachineOperand &MO : MI.operands()) {
if (MO.isMBB()) {
MachineBasicBlock *Succ = MO.getMBB();
- TII->storeRegToStackSlot(*Succ, Succ->begin(), PhysReg, Kill,
- FI, &RC, TRI, VirtReg);
+ TII->storeRegToStackSlot(*Succ, Succ->begin(), PhysReg, Kill, FI,
+ &RC, TRI, VirtReg);
++NumStores;
Succ->addLiveIn(PhysReg);
}
@@ -983,17 +984,15 @@ bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
/// Allocates a register for a VirtReg use.
/// \return true if MI's MachineOperands were re-arranged/invalidated.
-bool RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
+bool RegAllocFast::useVirtReg(MachineInstr &MI, MachineOperand &MO,
Register VirtReg) {
assert(VirtReg.isVirtual() && "Not a virtual register");
if (!shouldAllocateRegister(VirtReg))
return false;
- MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
if (New) {
- MachineOperand &MO = MI.getOperand(OpNum);
if (!MO.isKill()) {
if (mayLiveOut(VirtReg)) {
LRI->LiveOut = true;
@@ -1024,6 +1023,8 @@ bool RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
if (LRI->Error) {
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ if (AllocationOrder.empty())
+ return setPhysReg(MI, MO, MCRegister::NoRegister);
return setPhysReg(MI, MO, *AllocationOrder.begin());
}
}
@@ -1096,8 +1097,10 @@ void RegAllocFast::dumpState() const {
assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
if (I->LiveOut || I->Reloaded) {
dbgs() << '[';
- if (I->LiveOut) dbgs() << 'O';
- if (I->Reloaded) dbgs() << 'R';
+ if (I->LiveOut)
+ dbgs() << 'O';
+ if (I->Reloaded)
+ dbgs() << 'R';
dbgs() << ']';
}
assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
@@ -1112,8 +1115,7 @@ void RegAllocFast::dumpState() const {
assert(VirtReg.isVirtual() && "Bad map key");
MCPhysReg PhysReg = LR.PhysReg;
if (PhysReg != 0) {
- assert(Register::isPhysicalRegister(PhysReg) &&
- "mapped to physreg");
+ assert(Register::isPhysicalRegister(PhysReg) && "mapped to physreg");
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
assert(RegUnitStates[Unit] == VirtReg && "inverse map valid");
}
@@ -1123,8 +1125,8 @@ void RegAllocFast::dumpState() const {
#endif
/// Count number of defs consumed from each register class by \p Reg
-void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
- Register Reg) const {
+void RegAllocFast::addRegClassDefCounts(
+ std::vector<unsigned> &RegClassDefCounts, Register Reg) const {
assert(RegClassDefCounts.size() == TRI->getNumRegClasses());
if (Reg.isVirtual()) {
@@ -1220,6 +1222,17 @@ void RegAllocFast::findAndSortDefOperandIndexes(const MachineInstr &MI) {
});
}
+// Returns true if MO is tied and the operand it's tied to is not Undef (not
+// Undef is not the same thing as Def).
+static bool isTiedToNotUndef(const MachineOperand &MO) {
+ if (!MO.isTied())
+ return false;
+ const MachineInstr &MI = *MO.getParent();
+ unsigned TiedIdx = MI.findTiedOperandIdx(MI.getOperandNo(&MO));
+ const MachineOperand &TiedMO = MI.getOperand(TiedIdx);
+ return !TiedMO.isUndef();
+}
+
void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// The basic algorithm here is:
// 1. Mark registers of def operands as free
@@ -1237,12 +1250,6 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
RegMasks.clear();
BundleVirtRegsMap.clear();
- auto TiedOpIsUndef = [&](const MachineOperand &MO, unsigned Idx) {
- assert(MO.isTied());
- unsigned TiedIdx = MI.findTiedOperandIdx(Idx);
- const MachineOperand &TiedMO = MI.getOperand(TiedIdx);
- return TiedMO.isUndef();
- };
// Scan for special cases; Apply pre-assigned register defs to state.
bool HasPhysRegUse = false;
bool HasRegMask = false;
@@ -1250,8 +1257,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
bool HasDef = false;
bool HasEarlyClobber = false;
bool NeedToAssignLiveThroughs = false;
- for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
Register Reg = MO.getReg();
if (Reg.isVirtual()) {
@@ -1264,8 +1270,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
HasEarlyClobber = true;
NeedToAssignLiveThroughs = true;
}
- if ((MO.isTied() && !TiedOpIsUndef(MO, I)) ||
- (MO.getSubReg() != 0 && !MO.isUndef()))
+ if (isTiedToNotUndef(MO) || (MO.getSubReg() != 0 && !MO.isUndef()))
NeedToAssignLiveThroughs = true;
}
} else if (Reg.isPhysical()) {
@@ -1310,35 +1315,32 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
for (uint16_t OpIdx : DefOperandIndexes) {
MachineOperand &MO = MI.getOperand(OpIdx);
LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
- unsigned Reg = MO.getReg();
- if (MO.isEarlyClobber() ||
- (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) ||
+ Register Reg = MO.getReg();
+ if (MO.isEarlyClobber() || isTiedToNotUndef(MO) ||
(MO.getSubReg() && !MO.isUndef())) {
ReArrangedImplicitOps = defineLiveThroughVirtReg(MI, OpIdx, Reg);
} else {
ReArrangedImplicitOps = defineVirtReg(MI, OpIdx, Reg);
}
- if (ReArrangedImplicitOps) {
- // Implicit operands of MI were re-arranged,
- // re-compute DefOperandIndexes.
+ // Implicit operands of MI were re-arranged,
+ // re-compute DefOperandIndexes.
+ if (ReArrangedImplicitOps)
break;
- }
}
}
} else {
// Assign virtual register defs.
while (ReArrangedImplicitOps) {
ReArrangedImplicitOps = false;
- for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
if (Reg.isVirtual()) {
- ReArrangedImplicitOps = defineVirtReg(MI, I, Reg);
- if (ReArrangedImplicitOps) {
+ ReArrangedImplicitOps =
+ defineVirtReg(MI, MI.getOperandNo(&MO), Reg);
+ if (ReArrangedImplicitOps)
break;
- }
}
}
}
@@ -1348,8 +1350,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free registers occupied by defs.
// Iterate operands in reverse order, so we see the implicit super register
// defs first (we added them earlier in case of <def,read-undef>).
- for (signed I = MI.getNumOperands() - 1; I >= 0; --I) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : reverse(MI.operands())) {
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1366,7 +1367,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
"tied def assigned to clobbered register");
// Do not free tied operands and early clobbers.
- if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || MO.isEarlyClobber())
+ if (isTiedToNotUndef(MO) || MO.isEarlyClobber())
continue;
if (!Reg)
continue;
@@ -1407,8 +1408,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
continue;
if (MRI->isReserved(Reg))
continue;
- bool displacedAny = usePhysReg(MI, Reg);
- if (!displacedAny)
+ if (!usePhysReg(MI, Reg))
MO.setIsKill(true);
}
}
@@ -1420,8 +1420,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
bool ReArrangedImplicitMOs = true;
while (ReArrangedImplicitMOs) {
ReArrangedImplicitMOs = false;
- for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
@@ -1439,7 +1438,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
assert(!MO.isInternalRead() && "Bundles not supported");
assert(MO.readsReg() && "reading use");
- ReArrangedImplicitMOs = useVirtReg(MI, I, Reg);
+ ReArrangedImplicitMOs = useVirtReg(MI, MO, Reg);
if (ReArrangedImplicitMOs)
break;
}
@@ -1461,7 +1460,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free early clobbers.
if (HasEarlyClobber) {
- for (MachineOperand &MO : llvm::reverse(MI.all_defs())) {
+ for (MachineOperand &MO : reverse(MI.all_defs())) {
if (!MO.isEarlyClobber())
continue;
assert(!MO.getSubReg() && "should be already handled in def processing");
@@ -1499,7 +1498,11 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
void RegAllocFast::handleDebugValue(MachineInstr &MI) {
// Ignore DBG_VALUEs that aren't based on virtual registers. These are
// mostly constants and frame indices.
- for (Register Reg : MI.getUsedDebugRegs()) {
+ assert(MI.isDebugValue() && "not a DBG_VALUE*");
+ for (const auto &MO : MI.debug_operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
if (!shouldAllocateRegister(Reg))
@@ -1572,10 +1575,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// Traverse block in reverse order allocating instructions one by one.
for (MachineInstr &MI : reverse(MBB)) {
- LLVM_DEBUG(
- dbgs() << "\n>> " << MI << "Regs:";
- dumpState()
- );
+ LLVM_DEBUG(dbgs() << "\n>> " << MI << "Regs:"; dumpState());
// Special handling for debug values. Note that they are not allowed to
// affect codegen of the other instructions in any way.
@@ -1593,10 +1593,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
}
}
- LLVM_DEBUG(
- dbgs() << "Begin Regs:";
- dumpState()
- );
+ LLVM_DEBUG(dbgs() << "Begin Regs:"; dumpState());
// Spill all physical registers holding virtual registers now.
LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n");
@@ -1615,7 +1612,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
if (!DbgValue->hasDebugOperandForReg(UDBGPair.first))
continue;
LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
- << '\n');
+ << '\n');
DbgValue->setDebugValueUndef();
}
}
@@ -1663,9 +1660,7 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-FunctionPass *llvm::createFastRegisterAllocator() {
- return new RegAllocFast();
-}
+FunctionPass *llvm::createFastRegisterAllocator() { return new RegAllocFast(); }
FunctionPass *llvm::createFastRegisterAllocator(RegClassFilterFunc Ftor,
bool ClearVirtRegs) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 48187e575494..a208bf89fadf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -141,6 +140,12 @@ static cl::opt<bool> GreedyReverseLocalAssignment(
"shorter local live ranges will tend to be allocated first"),
cl::Hidden);
+static cl::opt<unsigned> SplitThresholdForRegWithHint(
+ "split-threshold-for-reg-with-hint",
+ cl::desc("The threshold for splitting a virtual register with a hint, in "
+ "percentate"),
+ cl::init(75), cl::Hidden);
+
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
@@ -422,6 +427,11 @@ MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
evictInterference(VirtReg, PhysHint, NewVRegs);
return PhysHint;
}
+
+ // We can also split the virtual register in cold blocks.
+ if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
+ return 0;
+
// Record the missed hint, we may be able to recover
// at the end if the surrounding allocation changed.
SetOfBrokenHints.insert(&VirtReg);
@@ -596,7 +606,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
// Reset interference dependent info.
SplitConstraints.resize(UseBlocks.size());
- BlockFrequency StaticCost = 0;
+ BlockFrequency StaticCost = BlockFrequency(0);
for (unsigned I = 0; I != UseBlocks.size(); ++I) {
const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
@@ -757,10 +767,28 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
if (Cand.PhysReg) {
if (!addThroughConstraints(Cand.Intf, NewBlocks))
return false;
- } else
- // Provide a strong negative bias on through blocks to prevent unwanted
- // liveness on loop backedges.
- SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
+ } else {
+ // Providing that the variable being spilled does not look like a loop
+ // induction variable, which is expensive to spill around and better
+ // pushed into a condition inside the loop if possible, provide a strong
+ // negative bias on through blocks to prevent unwanted liveness on loop
+ // backedges.
+ bool PrefSpill = true;
+ if (SA->looksLikeLoopIV() && NewBlocks.size() >= 2) {
+ // Check that the current bundle is adding a Header + start+end of
+ // loop-internal blocks. If the block is indeed a header, don't make
+ // the NewBlocks as PrefSpill to allow the variable to be live in
+ // Header<->Latch.
+ MachineLoop *L = Loops->getLoopFor(MF->getBlockNumbered(NewBlocks[0]));
+ if (L && L->getHeader()->getNumber() == (int)NewBlocks[0] &&
+ all_of(NewBlocks.drop_front(), [&](unsigned Block) {
+ return L == Loops->getLoopFor(MF->getBlockNumbered(Block));
+ }))
+ PrefSpill = false;
+ }
+ if (PrefSpill)
+ SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
+ }
AddedTo = ActiveBlocks.size();
// Perhaps iterating can enable more bundles?
@@ -821,7 +849,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
/// calcSpillCost - Compute how expensive it would be to split the live range in
/// SA around all use blocks instead of forming bundle regions.
BlockFrequency RAGreedy::calcSpillCost() {
- BlockFrequency Cost = 0;
+ BlockFrequency Cost = BlockFrequency(0);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
unsigned Number = BI.MBB->getNumber();
@@ -841,7 +869,7 @@ BlockFrequency RAGreedy::calcSpillCost() {
///
BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
const AllocationOrder &Order) {
- BlockFrequency GlobalCost = 0;
+ BlockFrequency GlobalCost = BlockFrequency(0);
const BitVector &LiveBundles = Cand.LiveBundles;
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned I = 0; I != UseBlocks.size(); ++I) {
@@ -1045,13 +1073,13 @@ MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
if (HasCompact) {
// Yes, keep GlobalCand[0] as the compact region candidate.
NumCands = 1;
- BestCost = BlockFrequency::getMaxFrequency();
+ BestCost = BlockFrequency::max();
} else {
// No benefit from the compact region, our fallback will be per-block
// splitting. Make sure we find a solution that is cheaper than spilling.
BestCost = SpillCost;
- LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
- MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
+ LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "
+ << printBlockFreq(*MBFI, BestCost) << '\n');
}
unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
@@ -1064,86 +1092,97 @@ MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
}
-unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
+unsigned
+RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
AllocationOrder &Order,
BlockFrequency &BestCost,
unsigned &NumCands,
- bool IgnoreCSR) {
- unsigned BestCand = NoCand;
- for (MCPhysReg PhysReg : Order) {
- assert(PhysReg);
- if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
- continue;
-
- // Discard bad candidates before we run out of interference cache cursors.
- // This will only affect register classes with a lot of registers (>32).
- if (NumCands == IntfCache.getMaxCursors()) {
- unsigned WorstCount = ~0u;
- unsigned Worst = 0;
- for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
- if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
- continue;
- unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
- if (Count < WorstCount) {
- Worst = CandIndex;
- WorstCount = Count;
- }
+ unsigned &BestCand) {
+ // Discard bad candidates before we run out of interference cache cursors.
+ // This will only affect register classes with a lot of registers (>32).
+ if (NumCands == IntfCache.getMaxCursors()) {
+ unsigned WorstCount = ~0u;
+ unsigned Worst = 0;
+ for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
+ if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
+ continue;
+ unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
+ if (Count < WorstCount) {
+ Worst = CandIndex;
+ WorstCount = Count;
}
- --NumCands;
- GlobalCand[Worst] = GlobalCand[NumCands];
- if (BestCand == NumCands)
- BestCand = Worst;
}
+ --NumCands;
+ GlobalCand[Worst] = GlobalCand[NumCands];
+ if (BestCand == NumCands)
+ BestCand = Worst;
+ }
- if (GlobalCand.size() <= NumCands)
- GlobalCand.resize(NumCands+1);
- GlobalSplitCandidate &Cand = GlobalCand[NumCands];
- Cand.reset(IntfCache, PhysReg);
+ if (GlobalCand.size() <= NumCands)
+ GlobalCand.resize(NumCands+1);
+ GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+ Cand.reset(IntfCache, PhysReg);
- SpillPlacer->prepare(Cand.LiveBundles);
- BlockFrequency Cost;
- if (!addSplitConstraints(Cand.Intf, Cost)) {
- LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
- continue;
- }
- LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
- MBFI->printBlockFreq(dbgs(), Cost));
- if (Cost >= BestCost) {
- LLVM_DEBUG({
- if (BestCand == NoCand)
- dbgs() << " worse than no bundles\n";
- else
- dbgs() << " worse than "
- << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
- });
- continue;
- }
- if (!growRegion(Cand)) {
- LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
- continue;
- }
+ SpillPlacer->prepare(Cand.LiveBundles);
+ BlockFrequency Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
+ return BestCand;
+ }
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
+ << "\tstatic = " << printBlockFreq(*MBFI, Cost));
+ if (Cost >= BestCost) {
+ LLVM_DEBUG({
+ if (BestCand == NoCand)
+ dbgs() << " worse than no bundles\n";
+ else
+ dbgs() << " worse than "
+ << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+ });
+ return BestCand;
+ }
+ if (!growRegion(Cand)) {
+ LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+ return BestCand;
+ }
+
+ SpillPlacer->finish();
+
+ // No live bundles, defer to splitSingleBlocks().
+ if (!Cand.LiveBundles.any()) {
+ LLVM_DEBUG(dbgs() << " no bundles.\n");
+ return BestCand;
+ }
- SpillPlacer->finish();
+ Cost += calcGlobalSplitCost(Cand, Order);
+ LLVM_DEBUG({
+ dbgs() << ", total = " << printBlockFreq(*MBFI, Cost) << " with bundles";
+ for (int I : Cand.LiveBundles.set_bits())
+ dbgs() << " EB#" << I;
+ dbgs() << ".\n";
+ });
+ if (Cost < BestCost) {
+ BestCand = NumCands;
+ BestCost = Cost;
+ }
+ ++NumCands;
- // No live bundles, defer to splitSingleBlocks().
- if (!Cand.LiveBundles.any()) {
- LLVM_DEBUG(dbgs() << " no bundles.\n");
+ return BestCand;
+}
+
+unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ BlockFrequency &BestCost,
+ unsigned &NumCands,
+ bool IgnoreCSR) {
+ unsigned BestCand = NoCand;
+ for (MCPhysReg PhysReg : Order) {
+ assert(PhysReg);
+ if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
continue;
- }
- Cost += calcGlobalSplitCost(Cand, Order);
- LLVM_DEBUG({
- dbgs() << ", total = ";
- MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
- for (int I : Cand.LiveBundles.set_bits())
- dbgs() << " EB#" << I;
- dbgs() << ".\n";
- });
- if (Cost < BestCost) {
- BestCand = NumCands;
- BestCost = Cost;
- }
- ++NumCands;
+ calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
+ BestCand);
}
return BestCand;
@@ -1189,6 +1228,63 @@ unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
return 0;
}
+// VirtReg has a physical Hint, this function tries to split VirtReg around
+// Hint if we can place new COPY instructions in cold blocks.
+bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
+ const LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs,
+ AllocationOrder &Order) {
+ // Split the VirtReg may generate COPY instructions in multiple cold basic
+ // blocks, and increase code size. So we avoid it when the function is
+ // optimized for size.
+ if (MF->getFunction().hasOptSize())
+ return false;
+
+ // Don't allow repeated splitting as a safe guard against looping.
+ if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
+ return false;
+
+ BlockFrequency Cost = BlockFrequency(0);
+ Register Reg = VirtReg.reg();
+
+ // Compute the cost of assigning a non Hint physical register to VirtReg.
+ // We define it as the total frequency of broken COPY instructions to/from
+ // Hint register, and after split, they can be deleted.
+ for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
+ if (!TII->isFullCopyInstr(Instr))
+ continue;
+ Register OtherReg = Instr.getOperand(1).getReg();
+ if (OtherReg == Reg) {
+ OtherReg = Instr.getOperand(0).getReg();
+ if (OtherReg == Reg)
+ continue;
+ // Check if VirtReg interferes with OtherReg after this COPY instruction.
+ if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
+ continue;
+ }
+ MCRegister OtherPhysReg =
+ OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
+ if (OtherPhysReg == Hint)
+ Cost += MBFI->getBlockFreq(Instr.getParent());
+ }
+
+ // Decrease the cost so it will be split in colder blocks.
+ BranchProbability Threshold(SplitThresholdForRegWithHint, 100);
+ Cost *= Threshold;
+ if (Cost == BlockFrequency(0))
+ return false;
+
+ unsigned NumCands = 0;
+ unsigned BestCand = NoCand;
+ SA->analyze(&VirtReg);
+ calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
+ if (BestCand == NoCand)
+ return false;
+
+ doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Per-Block Splitting
//===----------------------------------------------------------------------===//
@@ -1255,16 +1351,20 @@ static unsigned getNumAllocatableRegsForConstraints(
static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
- const MachineInstr &MI, Register Reg) {
+ const MachineInstr &FirstMI,
+ Register Reg) {
LaneBitmask Mask;
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || MO.getReg() != Reg)
- continue;
+ SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
+ (void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops);
+ for (auto [MI, OpIdx] : Ops) {
+ const MachineOperand &MO = MI->getOperand(OpIdx);
+ assert(MO.isReg() && MO.getReg() == Reg);
unsigned SubReg = MO.getSubReg();
if (SubReg == 0 && MO.isUse()) {
- Mask |= MRI.getMaxLaneMaskForVReg(Reg);
- continue;
+ if (MO.isUndef())
+ continue;
+ return MRI.getMaxLaneMaskForVReg(Reg);
}
LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
@@ -1282,10 +1382,14 @@ static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
/// VirtReg.
static bool readsLaneSubset(const MachineRegisterInfo &MRI,
const MachineInstr *MI, const LiveInterval &VirtReg,
- const TargetRegisterInfo *TRI, SlotIndex Use) {
- // Early check the common case.
- if (MI->isCopy() &&
- MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg())
+ const TargetRegisterInfo *TRI, SlotIndex Use,
+ const TargetInstrInfo *TII) {
+ // Early check the common case. Beware of the semi-formed bundles SplitKit
+ // creates by setting the bundle flag on copies without a matching BUNDLE.
+
+ auto DestSrc = TII->isCopyInstr(*MI);
+ if (DestSrc && !MI->isBundled() &&
+ DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
return false;
// FIXME: We're only considering uses, but should be consider defs too?
@@ -1344,14 +1448,14 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
// the allocation.
for (const SlotIndex Use : Uses) {
if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
- if (MI->isFullCopy() ||
+ if (TII->isFullCopyInstr(*MI) ||
(SplitSubClass &&
SuperRCNumAllocatableRegs ==
getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
TII, TRI, RegClassInfo)) ||
// TODO: Handle split for subranges with subclass constraints?
(!SplitSubClass && VirtReg.hasSubRanges() &&
- !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) {
+ !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
continue;
}
@@ -1548,8 +1652,8 @@ unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
float BestDiff = 0;
const float blockFreq =
- SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
- (1.0f / MBFI->getEntryFreq());
+ SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
+ (1.0f / MBFI->getEntryFreq().getFrequency());
SmallVector<float, 8> GapWeight;
for (MCPhysReg PhysReg : Order) {
@@ -2117,9 +2221,9 @@ void RAGreedy::initializeCSRCost() {
return;
// Raw cost is relative to Entry == 2^14; scale it appropriately.
- uint64_t ActualEntry = MBFI->getEntryFreq();
+ uint64_t ActualEntry = MBFI->getEntryFreq().getFrequency();
if (!ActualEntry) {
- CSRCost = 0;
+ CSRCost = BlockFrequency(0);
return;
}
uint64_t FixedEntry = 1 << 14;
@@ -2130,7 +2234,8 @@ void RAGreedy::initializeCSRCost() {
CSRCost /= BranchProbability(FixedEntry, ActualEntry);
else
// Can't use BranchProbability in general, since it takes 32-bit numbers.
- CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
+ CSRCost =
+ BlockFrequency(CSRCost.getFrequency() * (ActualEntry / FixedEntry));
}
/// Collect the hint info for \p Reg.
@@ -2138,7 +2243,7 @@ void RAGreedy::initializeCSRCost() {
/// \p Out is not cleared before being populated.
void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
- if (!Instr.isFullCopy())
+ if (!TII->isFullCopyInstr(Instr))
continue;
// Look for the other end of the copy.
Register OtherReg = Instr.getOperand(0).getReg();
@@ -2161,7 +2266,7 @@ void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
/// \return The cost of \p List for \p PhysReg.
BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
MCRegister PhysReg) {
- BlockFrequency Cost = 0;
+ BlockFrequency Cost = BlockFrequency(0);
for (const HintInfo &Info : List) {
if (Info.PhysReg != PhysReg)
Cost += Info.Freq;
@@ -2230,9 +2335,9 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
LLVM_DEBUG(dbgs() << "Checking profitability:\n");
BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
- LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
- << "\nNew Cost: " << NewCopiesCost.getFrequency()
- << '\n');
+ LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost)
+ << "\nNew Cost: "
+ << printBlockFreq(*MBFI, NewCopiesCost) << '\n');
if (OldCopiesCost < NewCopiesCost) {
LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
continue;
@@ -2327,6 +2432,9 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
} else
return PhysReg;
}
+ // Non emtpy NewVRegs means VirtReg has been split.
+ if (!NewVRegs.empty())
+ return 0;
LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
@@ -2453,9 +2561,10 @@ RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
MI.getOpcode() == TargetOpcode::STATEPOINT;
};
for (MachineInstr &MI : MBB) {
- if (MI.isCopy()) {
- const MachineOperand &Dest = MI.getOperand(0);
- const MachineOperand &Src = MI.getOperand(1);
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (DestSrc) {
+ const MachineOperand &Dest = *DestSrc->Destination;
+ const MachineOperand &Src = *DestSrc->Source;
Register SrcReg = Src.getReg();
Register DestReg = Dest.getReg();
// Only count `COPY`s with a virtual register as source or destination.
@@ -2616,6 +2725,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
return false;
Indexes = &getAnalysis<SlotIndexes>();
+ // Renumber to get accurate and consistent results from
+ // SlotIndexes::getApproxInstrDistance.
+ Indexes->packIndexes();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
DomTree = &getAnalysis<MachineDominatorTree>();
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
index 0f8f9a7d5811..1941643bba9e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -20,10 +20,8 @@
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
@@ -348,6 +346,12 @@ private:
const SmallVirtRegSet &);
MCRegister tryRegionSplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
+ /// Calculate cost of region splitting around the specified register.
+ unsigned calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
+ AllocationOrder &Order,
+ BlockFrequency &BestCost,
+ unsigned &NumCands,
+ unsigned &BestCand);
/// Calculate cost of region splitting.
unsigned calculateRegionSplitCost(const LiveInterval &VirtReg,
AllocationOrder &Order,
@@ -356,6 +360,10 @@ private:
/// Perform region splitting.
unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
bool HasCompact, SmallVectorImpl<Register> &NewVRegs);
+ /// Try to split VirtReg around physical Hint register.
+ bool trySplitAroundHintReg(MCPhysReg Hint, const LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs,
+ AllocationOrder &Order);
/// Check other options before using a callee-saved register for the first
/// time.
MCRegister tryAssignCSRFirstTime(const LiveInterval &VirtReg,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 925a0f085c4b..b8ee5dc0f849 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -192,7 +192,7 @@ public:
void apply(PBQPRAGraph &G) override {
LiveIntervals &LIS = G.getMetadata().LIS;
- // A minimum spill costs, so that register constraints can can be set
+ // A minimum spill costs, so that register constraints can be set
// without normalization in the [0.0:MinSpillCost( interval.
const PBQP::PBQPNum MinSpillCost = 10.0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
index e420283dfcfa..dc946b398457 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "RegAllocScore.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/ilist_iterator.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp
index 8e0a0b0dc282..bdc6df78fd3d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp
@@ -20,19 +20,8 @@
using namespace llvm;
-const unsigned RegisterBank::InvalidID = UINT_MAX;
-
-RegisterBank::RegisterBank(unsigned ID, const char *Name,
- const uint32_t *CoveredClasses,
- unsigned NumRegClasses)
- : ID(ID), Name(Name) {
- ContainedRegClasses.resize(NumRegClasses);
- ContainedRegClasses.setBitsInMask(CoveredClasses);
-}
-
bool RegisterBank::verify(const RegisterBankInfo &RBI,
const TargetRegisterInfo &TRI) const {
- assert(isValid() && "Invalid register bank");
for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) {
const TargetRegisterClass &RC = *TRI.getRegClass(RCId);
@@ -61,14 +50,7 @@ bool RegisterBank::verify(const RegisterBankInfo &RBI,
}
bool RegisterBank::covers(const TargetRegisterClass &RC) const {
- assert(isValid() && "RB hasn't been initialized yet");
- return ContainedRegClasses.test(RC.getID());
-}
-
-bool RegisterBank::isValid() const {
- return ID != InvalidID && Name != nullptr &&
- // A register bank that does not cover anything is useless.
- !ContainedRegClasses.empty();
+ return (CoveredClasses[RC.getID() / 32] & (1U << RC.getID() % 32)) != 0;
}
bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
@@ -91,15 +73,18 @@ void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
OS << getName();
if (!IsForDebug)
return;
+
+ unsigned Count = 0;
+ for (int i = 0, e = ((NumRegClasses + 31) / 32); i != e; ++i)
+ Count += llvm::popcount(CoveredClasses[i]);
+
OS << "(ID:" << getID() << ")\n"
- << "isValid:" << isValid() << '\n'
- << "Number of Covered register classes: " << ContainedRegClasses.count()
- << '\n';
+ << "Number of Covered register classes: " << Count << '\n';
// Print all the subclasses if we can.
// This register classes may not be properly initialized yet.
- if (!TRI || ContainedRegClasses.empty())
+ if (!TRI || NumRegClasses == 0)
return;
- assert(ContainedRegClasses.size() == TRI->getNumRegClasses() &&
+ assert(NumRegClasses == TRI->getNumRegClasses() &&
"TRI does not match the initialization process?");
OS << "Covered register classes:\n";
ListSeparator LS;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
index 658a09fd8700..5548430d1b0a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
@@ -61,7 +61,8 @@ RegisterBankInfo::RegisterBankInfo(const RegisterBank **RegBanks,
#ifndef NDEBUG
for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
- assert(RegBanks[Idx]->isValid() && "RegisterBank should be valid");
+ assert(RegBanks[Idx]->getID() == Idx &&
+ "RegisterBank ID should match index");
}
#endif // NDEBUG
}
@@ -494,7 +495,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
}
}
-unsigned RegisterBankInfo::getSizeInBits(Register Reg,
+TypeSize RegisterBankInfo::getSizeInBits(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
if (Reg.isPhysical()) {
@@ -552,7 +553,7 @@ bool RegisterBankInfo::ValueMapping::partsAllUniform() const {
}
bool RegisterBankInfo::ValueMapping::verify(const RegisterBankInfo &RBI,
- unsigned MeaningfulBitWidth) const {
+ TypeSize MeaningfulBitWidth) const {
assert(NumBreakDowns && "Value mapped nowhere?!");
unsigned OrigValueBitWidth = 0;
for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
@@ -564,7 +565,8 @@ bool RegisterBankInfo::ValueMapping::verify(const RegisterBankInfo &RBI,
OrigValueBitWidth =
std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1);
}
- assert(OrigValueBitWidth >= MeaningfulBitWidth &&
+ assert((MeaningfulBitWidth.isScalable() ||
+ OrigValueBitWidth >= MeaningfulBitWidth) &&
"Meaningful bits not covered by the mapping");
APInt ValueMask(OrigValueBitWidth, 0);
for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index e49885b6ad96..c1af37c8510f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -383,6 +383,11 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override;
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
void releaseMemory() override;
/// This is the pass entry point.
@@ -1196,6 +1201,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
<< printMBBReference(MBB) << '\t' << CopyMI);
}
+ const bool IsUndefCopy = CopyMI.getOperand(1).isUndef();
+
// Remove CopyMI.
// Note: This is fine to remove the copy before updating the live-ranges.
// While updating the live-ranges, we only look at slot indices and
@@ -1209,6 +1216,19 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
&EndPoints);
BValNo->markUnused();
+
+ if (IsUndefCopy) {
+ // We're introducing an undef phi def, and need to set undef on any users of
+ // the previously local def to avoid artifically extending the lifetime
+ // through the block.
+ for (MachineOperand &MO : MRI->use_nodbg_operands(IntB.reg())) {
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI);
+ if (!IntB.liveAt(UseIdx))
+ MO.setIsUndef(true);
+ }
+ }
+
// Extend IntB to the EndPoints of its original live interval.
LIS->extendToIndices(IntB, EndPoints);
@@ -1317,6 +1337,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (SrcIdx && DstIdx)
return false;
+ [[maybe_unused]] const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
if (!DefMI->isImplicitDef()) {
if (DstReg.isPhysical()) {
@@ -1396,9 +1417,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MachineOperand &MO = CopyMI->getOperand(I);
if (MO.isReg()) {
assert(MO.isImplicit() && "No explicit operands after implicit operands.");
- // Discard VReg implicit defs.
- if (MO.getReg().isPhysical())
- ImplicitOps.push_back(MO);
+ assert((MO.getReg().isPhysical() ||
+ (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
+ "unexpected implicit virtual register def");
+ ImplicitOps.push_back(MO);
}
}
@@ -1408,14 +1430,48 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
// We need to remember these so we can add intervals once we insert
// NewMI into SlotIndexes.
+ //
+ // We also expect to have tied implicit-defs of super registers originating
+ // from SUBREG_TO_REG, such as:
+ // $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+ // undef %0.sub_32bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+ //
+ // The implicit-def of the super register may have been reduced to
+ // subregisters depending on the uses.
+
+ bool NewMIDefinesFullReg = false;
+
SmallVector<MCRegister, 4> NewMIImplDefs;
for (unsigned i = NewMI.getDesc().getNumOperands(),
e = NewMI.getNumOperands();
i != e; ++i) {
MachineOperand &MO = NewMI.getOperand(i);
if (MO.isReg() && MO.isDef()) {
- assert(MO.isImplicit() && MO.isDead() && MO.getReg().isPhysical());
- NewMIImplDefs.push_back(MO.getReg().asMCReg());
+ assert(MO.isImplicit());
+ if (MO.getReg().isPhysical()) {
+ if (MO.getReg() == DstReg)
+ NewMIDefinesFullReg = true;
+
+ assert(MO.isImplicit() && MO.getReg().isPhysical() &&
+ (MO.isDead() ||
+ (DefSubIdx &&
+ ((TRI->getSubReg(MO.getReg(), DefSubIdx) ==
+ MCRegister((unsigned)NewMI.getOperand(0).getReg())) ||
+ TRI->isSubRegisterEq(NewMI.getOperand(0).getReg(),
+ MO.getReg())))));
+ NewMIImplDefs.push_back(MO.getReg().asMCReg());
+ } else {
+ assert(MO.getReg() == NewMI.getOperand(0).getReg());
+
+ // We're only expecting another def of the main output, so the range
+ // should get updated with the regular output range.
+ //
+ // FIXME: The range updating below probably needs updating to look at
+ // the super register if subranges are tracked.
+ assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
+ "subrange update for implicit-def of super register may not be "
+ "properly handled");
+ }
}
}
@@ -1527,8 +1583,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
assert(DstReg.isPhysical() &&
"Only expect virtual or physical registers in remat");
NewMI.getOperand(0).setIsDead(true);
- NewMI.addOperand(MachineOperand::CreateReg(
- CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
+
+ if (!NewMIDefinesFullReg) {
+ NewMI.addOperand(MachineOperand::CreateReg(
+ CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
+ }
+
// Record small dead def live-ranges for all the subregisters
// of the destination register.
// Otherwise, variables that live through may miss some
@@ -1551,8 +1611,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
- if (NewMI.getOperand(0).getSubReg())
- NewMI.getOperand(0).setIsUndef();
+ NewMI.setRegisterDefReadUndef(NewMI.getOperand(0).getReg());
// Transfer over implicit operands to the rematerialized instruction.
for (MachineOperand &MO : ImplicitOps)
@@ -1649,12 +1708,19 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// The source interval may also have been on an undef use, in which case the
// copy introduced a live value.
if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) {
- CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
MachineOperand &MO = CopyMI->getOperand(i-1);
- if (MO.isReg() && MO.isUse())
+ if (MO.isReg()) {
+ if (MO.isUse())
+ CopyMI->removeOperand(i - 1);
+ } else {
+ assert(MO.isImm() &&
+ CopyMI->getOpcode() == TargetOpcode::SUBREG_TO_REG);
CopyMI->removeOperand(i-1);
+ }
}
+
+ CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
"implicit def\n");
return CopyMI;
@@ -2446,6 +2512,15 @@ class JoinVals {
Val() = default;
bool isAnalyzed() const { return WriteLanes.any(); }
+
+ /// Mark this value as an IMPLICIT_DEF which must be kept as if it were an
+ /// ordinary value.
+ void mustKeepImplicitDef(const TargetRegisterInfo &TRI,
+ const MachineInstr &ImpDef) {
+ assert(ImpDef.isImplicitDef());
+ ErasableImplicitDef = false;
+ ValidLanes = TRI.getSubRegIndexLaneMask(ImpDef.getOperand(0).getSubReg());
+ }
};
/// One entry per value number in LI.
@@ -2787,13 +2862,20 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
//
// When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
// to erase the IMPLICIT_DEF instruction.
- MachineBasicBlock *OtherMBB = Indexes->getMBBFromIndex(V.OtherVNI->def);
- if (DefMI && DefMI->getParent() != OtherMBB) {
+ //
+ // Additionally we must keep an IMPLICIT_DEF if we're redefining an incoming
+ // value.
+
+ MachineInstr *OtherImpDef =
+ Indexes->getInstructionFromIndex(V.OtherVNI->def);
+ MachineBasicBlock *OtherMBB = OtherImpDef->getParent();
+ if (DefMI &&
+ (DefMI->getParent() != OtherMBB || LIS->isLiveInToMBB(LR, OtherMBB))) {
LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
<< " extends into "
<< printMBBReference(*DefMI->getParent())
<< ", keeping it.\n");
- OtherV.ErasableImplicitDef = false;
+ OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
} else if (OtherMBB->hasEHPadSuccessor()) {
// If OtherV is defined in a basic block that has EH pad successors then
// we get the same problem not just if OtherV is live beyond its basic
@@ -2802,7 +2884,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
LLVM_DEBUG(
dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
<< " may be live into EH pad successors, keeping it.\n");
- OtherV.ErasableImplicitDef = false;
+ OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
} else {
// We deferred clearing these lanes in case we needed to save them
OtherV.ValidLanes &= ~OtherV.WriteLanes;
@@ -2958,20 +3040,6 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
// The other value is going to be pruned if this join is successful.
assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
Val &OtherV = Other.Vals[V.OtherVNI->id];
- // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
- // its lanes.
- if (OtherV.ErasableImplicitDef &&
- TrackSubRegLiveness &&
- (OtherV.ValidLanes & ~V.ValidLanes).any()) {
- LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
-
- OtherV.ErasableImplicitDef = false;
- // The valid lanes written by the implicit_def were speculatively cleared
- // before, so make this more conservative. It may be better to track this,
- // I haven't found a testcase where it matters.
- OtherV.ValidLanes = LaneBitmask::getAll();
- }
-
OtherV.Pruned = true;
[[fallthrough]];
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
index c00d3fde6426..0ac348954a63 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -59,175 +59,28 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
MRI = &MF.getRegInfo();
LiveUnits.init(*TRI);
- assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
- "Target changed?");
-
- // Self-initialize.
- if (!this->MBB) {
- NumRegUnits = TRI->getNumRegUnits();
- KillRegUnits.resize(NumRegUnits);
- DefRegUnits.resize(NumRegUnits);
- TmpRegUnits.resize(NumRegUnits);
- }
this->MBB = &MBB;
for (ScavengedInfo &SI : Scavenged) {
SI.Reg = 0;
SI.Restore = nullptr;
}
-
- Tracking = false;
}
void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
init(MBB);
LiveUnits.addLiveIns(MBB);
+ MBBI = MBB.begin();
}
void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
init(MBB);
LiveUnits.addLiveOuts(MBB);
-
- // Move internal iterator at the last instruction of the block.
- if (!MBB.empty()) {
- MBBI = std::prev(MBB.end());
- Tracking = true;
- }
-}
-
-void RegScavenger::addRegUnits(BitVector &BV, MCRegister Reg) {
- for (MCRegUnit Unit : TRI->regunits(Reg))
- BV.set(Unit);
-}
-
-void RegScavenger::removeRegUnits(BitVector &BV, MCRegister Reg) {
- for (MCRegUnit Unit : TRI->regunits(Reg))
- BV.reset(Unit);
-}
-
-void RegScavenger::determineKillsAndDefs() {
- assert(Tracking && "Must be tracking to determine kills and defs");
-
- MachineInstr &MI = *MBBI;
- assert(!MI.isDebugInstr() && "Debug values have no kills or defs");
-
- // Find out which registers are early clobbered, killed, defined, and marked
- // def-dead in this instruction.
- KillRegUnits.reset();
- DefRegUnits.reset();
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isRegMask()) {
- TmpRegUnits.reset();
- for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) {
- for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
- if (MO.clobbersPhysReg(*RURI)) {
- TmpRegUnits.set(RU);
- break;
- }
- }
- }
-
- // Apply the mask.
- KillRegUnits |= TmpRegUnits;
- }
- if (!MO.isReg())
- continue;
- if (!MO.getReg().isPhysical() || isReserved(MO.getReg()))
- continue;
- MCRegister Reg = MO.getReg().asMCReg();
-
- if (MO.isUse()) {
- // Ignore undef uses.
- if (MO.isUndef())
- continue;
- if (MO.isKill())
- addRegUnits(KillRegUnits, Reg);
- } else {
- assert(MO.isDef());
- if (MO.isDead())
- addRegUnits(KillRegUnits, Reg);
- else
- addRegUnits(DefRegUnits, Reg);
- }
- }
-}
-
-void RegScavenger::forward() {
- // Move ptr forward.
- if (!Tracking) {
- MBBI = MBB->begin();
- Tracking = true;
- } else {
- assert(MBBI != MBB->end() && "Already past the end of the basic block!");
- MBBI = std::next(MBBI);
- }
- assert(MBBI != MBB->end() && "Already at the end of the basic block!");
-
- MachineInstr &MI = *MBBI;
-
- for (ScavengedInfo &I : Scavenged) {
- if (I.Restore != &MI)
- continue;
-
- I.Reg = 0;
- I.Restore = nullptr;
- }
-
- if (MI.isDebugOrPseudoInstr())
- return;
-
- determineKillsAndDefs();
-
- // Verify uses and defs.
-#ifndef NDEBUG
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (!Reg.isPhysical() || isReserved(Reg))
- continue;
- if (MO.isUse()) {
- if (MO.isUndef())
- continue;
- if (!isRegUsed(Reg)) {
- // Check if it's partial live: e.g.
- // D0 = insert_subreg undef D0, S0
- // ... D0
- // The problem is the insert_subreg could be eliminated. The use of
- // D0 is using a partially undef value. This is not *incorrect* since
- // S1 is can be freely clobbered.
- // Ideally we would like a way to model this, but leaving the
- // insert_subreg around causes both correctness and performance issues.
- if (none_of(TRI->subregs(Reg),
- [&](MCPhysReg SR) { return isRegUsed(SR); }) &&
- none_of(TRI->superregs(Reg),
- [&](MCPhysReg SR) { return isRegUsed(SR); })) {
- MBB->getParent()->verify(nullptr, "In Register Scavenger");
- llvm_unreachable("Using an undefined register!");
- }
- }
- } else {
- assert(MO.isDef());
-#if 0
- // FIXME: Enable this once we've figured out how to correctly transfer
- // implicit kills during codegen passes like the coalescer.
- assert((KillRegs.test(Reg) || isUnused(Reg) ||
- isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
- "Re-defining a live register!");
-#endif
- }
- }
-#endif // NDEBUG
-
- // Commit the changes.
- setUnused(KillRegUnits);
- setUsed(DefRegUnits);
+ MBBI = MBB.end();
}
void RegScavenger::backward() {
- assert(Tracking && "Must be tracking to determine kills and defs");
-
- const MachineInstr &MI = *MBBI;
+ const MachineInstr &MI = *--MBBI;
LiveUnits.stepBackward(MI);
// Expire scavenge spill frameindex uses.
@@ -237,12 +90,6 @@ void RegScavenger::backward() {
I.Restore = nullptr;
}
}
-
- if (MBBI == MBB->begin()) {
- MBBI = MachineBasicBlock::iterator(nullptr);
- Tracking = false;
- } else
- --MBBI;
}
bool RegScavenger::isRegUsed(Register Reg, bool includeReserved) const {
@@ -456,9 +303,8 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
// Find the register whose use is furthest away.
MachineBasicBlock::iterator UseMI;
ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF);
- std::pair<MCPhysReg, MachineBasicBlock::iterator> P =
- findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder,
- RestoreAfter);
+ std::pair<MCPhysReg, MachineBasicBlock::iterator> P = findSurvivorBackwards(
+ *MRI, std::prev(MBBI), To, LiveUnits, AllocationOrder, RestoreAfter);
MCPhysReg Reg = P.first;
MachineBasicBlock::iterator SpillBefore = P.second;
// Found an available register?
@@ -473,9 +319,8 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
assert(Reg != 0 && "No register left to scavenge!");
- MachineBasicBlock::iterator ReloadAfter =
- RestoreAfter ? std::next(MBBI) : MBBI;
- MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
+ MachineBasicBlock::iterator ReloadBefore =
+ RestoreAfter ? std::next(MBBI) : MBBI;
if (ReloadBefore != MBB.end())
LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
@@ -553,9 +398,9 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
unsigned InitialNumVirtRegs = MRI.getNumVirtRegs();
bool NextInstructionReadsVReg = false;
for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) {
- --I;
- // Move RegScavenger to the position between *I and *std::next(I).
+ // Move RegScavenger to the position between *std::prev(I) and *I.
RS.backward(I);
+ --I;
// Look for unassigned vregs in the uses of *std::next(I).
if (NextInstructionReadsVReg) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
index feb31e59f5fd..ba8dd49ba929 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
@@ -105,7 +105,7 @@ static bool reduceDbgValsForwardScan(MachineBasicBlock &MBB) {
MachineOperand &Loc = MI.getDebugOperand(0);
if (!Loc.isReg()) {
- // If it it's not a register, just stop tracking such variable.
+ // If it's not a register, just stop tracking such variable.
if (VMI != VariableMap.end())
VariableMap.erase(VMI);
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 57cd1fcffb61..36c91b7fa97e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -155,8 +155,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// Try to find the mapping for the scalar version of this intrinsic
// and the exact vector width of the call operands in the
// TargetLibraryInfo.
- const std::string TLIName =
- std::string(TLI.getVectorizedFunction(ScalarName, VF));
+ StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF);
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
<< ScalarName << "` and vector width " << VF << ".\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index bcad7a3f24da..88db57ad46b9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -14,6 +14,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/SafeStack.h"
#include "SafeStackLayout.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -192,7 +193,7 @@ public:
SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL,
DomTreeUpdater *DTU, ScalarEvolution &SE)
: F(F), TL(TL), DL(DL), DTU(DTU), SE(SE),
- StackPtrTy(Type::getInt8PtrTy(F.getContext())),
+ StackPtrTy(PointerType::getUnqual(F.getContext())),
IntPtrTy(DL.getIntPtrType(F.getContext())),
Int32Ty(Type::getInt32Ty(F.getContext())),
Int8Ty(Type::getInt8Ty(F.getContext())) {}
@@ -793,7 +794,7 @@ bool SafeStack::run() {
DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP));
if (SafeStackUsePointerAddress) {
FunctionCallee Fn = F.getParent()->getOrInsertFunction(
- "__safestack_pointer_address", StackPtrTy->getPointerTo(0));
+ "__safestack_pointer_address", IRB.getPtrTy(0));
UnsafeStackPtr = IRB.CreateCall(Fn);
} else {
UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB);
@@ -927,6 +928,42 @@ public:
} // end anonymous namespace
+PreservedAnalyses SafeStackPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ LLVM_DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
+
+ if (!F.hasFnAttribute(Attribute::SafeStack)) {
+ LLVM_DEBUG(dbgs() << "[SafeStack] safestack is not requested"
+ " for this function\n");
+ return PreservedAnalyses::all();
+ }
+
+ if (F.isDeclaration()) {
+ LLVM_DEBUG(dbgs() << "[SafeStack] function definition"
+ " is not available\n");
+ return PreservedAnalyses::all();
+ }
+
+ auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ if (!TL)
+ report_fatal_error("TargetLowering instance is required");
+
+ auto &DL = F.getParent()->getDataLayout();
+
+ // preserve DominatorTree
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ bool Changed = SafeStack(F, *TL, DL, &DTU, SE).run();
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
char SafeStackLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
index cc29bdce1210..9002a7076840 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
@@ -52,7 +52,7 @@ bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) {
if (!MD)
return false;
const auto &Section = *cast<MDString>(MD->getOperand(0));
- if (!Section.getString().startswith(kSanitizerBinaryMetadataCoveredSection))
+ if (!Section.getString().starts_with(kSanitizerBinaryMetadataCoveredSection))
return false;
auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1));
// Assume it currently only has features.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
index 14ec41920e3e..de8e6f63794d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -201,7 +200,7 @@ void SUnit::removePred(const SDep &D) {
}
if (!isScheduled) {
if (D.isWeak()) {
- assert(WeakSuccsLeft > 0 && "WeakSuccsLeft will underflow!");
+ assert(N->WeakSuccsLeft > 0 && "WeakSuccsLeft will underflow!");
--N->WeakSuccsLeft;
} else {
assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 239b44857c28..0190fa345eb3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -211,7 +211,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
for (const MachineOperand &MO : ExitMI->all_uses()) {
Register Reg = MO.getReg();
if (Reg.isPhysical()) {
- Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
} else if (Reg.isVirtual() && MO.readsReg()) {
addVRegUseDeps(&ExitSU, MO.getOperandNo());
}
@@ -222,8 +223,11 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
// uses all the registers that are livein to the successor blocks.
for (const MachineBasicBlock *Succ : BB->successors()) {
for (const auto &LI : Succ->liveins()) {
- if (!Uses.contains(LI.PhysReg))
- Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
+ for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) {
+ auto [Unit, Mask] = *U;
+ if ((Mask & LI.LaneMask).any() && !Uses.contains(Unit))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
+ }
}
}
}
@@ -234,48 +238,51 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
assert(MO.isDef() && "expect physreg def");
+ Register Reg = MO.getReg();
// Ask the target if address-backscheduling is desirable, and if so how much.
const TargetSubtargetInfo &ST = MF.getSubtarget();
// Only use any non-zero latency for real defs/uses, in contrast to
// "fake" operands added by regalloc.
- const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
- bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
- !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
- for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
- Alias.isValid(); ++Alias) {
- for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
+ const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc();
+ bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() &&
+ !DefMIDesc.hasImplicitDefOfPhysReg(Reg));
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ for (RegUnit2SUnitsMap::iterator I = Uses.find(Unit); I != Uses.end();
+ ++I) {
SUnit *UseSU = I->SU;
if (UseSU == SU)
continue;
// Adjust the dependence latency using operand def/use information,
// then allow the target to perform its own adjustments.
- int UseOp = I->OpIdx;
- MachineInstr *RegUse = nullptr;
+ MachineInstr *UseInstr = nullptr;
+ int UseOpIdx = I->OpIdx;
+ bool ImplicitPseudoUse = false;
SDep Dep;
- if (UseOp < 0)
+ if (UseOpIdx < 0) {
Dep = SDep(SU, SDep::Artificial);
- else {
+ } else {
// Set the hasPhysRegDefs only for physreg defs that have a use within
// the scheduling region.
SU->hasPhysRegDefs = true;
- Dep = SDep(SU, SDep::Data, *Alias);
- RegUse = UseSU->getInstr();
+
+ UseInstr = UseSU->getInstr();
+ Register UseReg = UseInstr->getOperand(UseOpIdx).getReg();
+ const MCInstrDesc &UseMIDesc = UseInstr->getDesc();
+ ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) &&
+ !UseMIDesc.hasImplicitUseOfPhysReg(UseReg);
+
+ Dep = SDep(SU, SDep::Data, UseReg);
}
- const MCInstrDesc *UseMIDesc =
- (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
- bool ImplicitPseudoUse =
- (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
- !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
- RegUse, UseOp));
+ UseInstr, UseOpIdx));
} else {
Dep.setLatency(0);
}
- ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep);
UseSU->addPred(Dep);
}
}
@@ -301,63 +308,68 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
// TODO: Using a latency of 1 here for output dependencies assumes
// there's no cost for reusing registers.
SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
- for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
- if (!Defs.contains(*Alias))
- continue;
- for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ for (RegUnit2SUnitsMap::iterator I = Defs.find(Unit); I != Defs.end();
+ ++I) {
SUnit *DefSU = I->SU;
if (DefSU == &ExitSU)
continue;
+ MachineInstr *DefInstr = DefSU->getInstr();
+ MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx);
if (DefSU != SU &&
- (Kind != SDep::Output || !MO.isDead() ||
- !DefSU->getInstr()->registerDefIsDead(*Alias))) {
- SDep Dep(SU, Kind, /*Reg=*/*Alias);
- if (Kind != SDep::Anti)
+ (Kind != SDep::Output || !MO.isDead() || !DefMO.isDead())) {
+ SDep Dep(SU, Kind, DefMO.getReg());
+ if (Kind != SDep::Anti) {
Dep.setLatency(
- SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
+ SchedModel.computeOutputLatency(MI, OperIdx, DefInstr));
+ }
ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep);
DefSU->addPred(Dep);
}
}
}
- if (!MO.isDef()) {
+ if (MO.isUse()) {
SU->hasPhysRegUses = true;
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
// retrieve the existing SUnits list for this register's uses.
// Push this SUnit on the use list.
- Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ Uses.insert(PhysRegSUOper(SU, OperIdx, Unit));
if (RemoveKillFlags)
MO.setIsKill(false);
} else {
addPhysRegDataDeps(SU, OperIdx);
- // Clear previous uses and defs of this register and its subergisters.
- for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
- if (Uses.contains(SubReg))
- Uses.eraseAll(SubReg);
+ // Clear previous uses and defs of this register and its subregisters.
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ Uses.eraseAll(Unit);
if (!MO.isDead())
- Defs.eraseAll(SubReg);
+ Defs.eraseAll(Unit);
}
+
if (MO.isDead() && SU->isCall) {
// Calls will not be reordered because of chain dependencies (see
// below). Since call operands are dead, calls may continue to be added
// to the DefList making dependence checking quadratic in the size of
// the block. Instead, we leave only one call at the back of the
// DefList.
- Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
- Reg2SUnitsMap::iterator B = P.first;
- Reg2SUnitsMap::iterator I = P.second;
- for (bool isBegin = I == B; !isBegin; /* empty */) {
- isBegin = (--I) == B;
- if (!I->SU->isCall)
- break;
- I = Defs.erase(I);
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ RegUnit2SUnitsMap::RangePair P = Defs.equal_range(Unit);
+ RegUnit2SUnitsMap::iterator B = P.first;
+ RegUnit2SUnitsMap::iterator I = P.second;
+ for (bool isBegin = I == B; !isBegin; /* empty */) {
+ isBegin = (--I) == B;
+ if (!I->SU->isCall)
+ break;
+ I = Defs.erase(I);
+ }
}
}
// Defs are pushed in the order they are visited and never reordered.
- Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ Defs.insert(PhysRegSUOper(SU, OperIdx, Unit));
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
index 30d959704745..1316919e65da 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/SelectOptimize.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -39,7 +40,6 @@
#include <memory>
#include <queue>
#include <stack>
-#include <string>
using namespace llvm;
@@ -97,36 +97,22 @@ static cl::opt<bool>
namespace {
-class SelectOptimize : public FunctionPass {
+class SelectOptimizeImpl {
const TargetMachine *TM = nullptr;
const TargetSubtargetInfo *TSI = nullptr;
const TargetLowering *TLI = nullptr;
const TargetTransformInfo *TTI = nullptr;
const LoopInfo *LI = nullptr;
- DominatorTree *DT = nullptr;
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
+ BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI = nullptr;
OptimizationRemarkEmitter *ORE = nullptr;
TargetSchedModel TSchedModel;
public:
- static char ID;
-
- SelectOptimize() : FunctionPass(ID) {
- initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<TargetPassConfig>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- }
+ SelectOptimizeImpl() = default;
+ SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ bool runOnFunction(Function &F, Pass &P);
private:
// Select groups consist of consecutive select instructions with the same
@@ -212,29 +198,94 @@ private:
// Returns true if the target architecture supports lowering a given select.
bool isSelectKindSupported(SelectInst *SI);
};
+
+class SelectOptimize : public FunctionPass {
+ SelectOptimizeImpl Impl;
+
+public:
+ static char ID;
+
+ SelectOptimize() : FunctionPass(ID) {
+ initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ return Impl.runOnFunction(F, *this);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ }
+};
+
} // namespace
+PreservedAnalyses SelectOptimizePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ SelectOptimizeImpl Impl(TM);
+ return Impl.run(F, FAM);
+}
+
char SelectOptimize::ID = 0;
INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
false)
FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
-bool SelectOptimize::runOnFunction(Function &F) {
- TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+PreservedAnalyses SelectOptimizeImpl::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ TSI = TM->getSubtargetImpl(F);
+ TLI = TSI->getTargetLowering();
+
+ // If none of the select types are supported then skip this pass.
+ // This is an optimization pass. Legality issues will be handled by
+ // instruction selection.
+ if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
+ !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
+ !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
+ return PreservedAnalyses::all();
+
+ TTI = &FAM.getResult<TargetIRAnalysis>(F);
+ if (!TTI->enableSelectOptimize())
+ return PreservedAnalyses::all();
+
+ PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
+ .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ assert(PSI && "This pass requires module analysis pass `profile-summary`!");
+ BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
+
+ // When optimizing for size, selects are preferable over branches.
+ if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
+ return PreservedAnalyses::all();
+
+ LI = &FAM.getResult<LoopAnalysis>(F);
+ ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ TSchedModel.init(TSI);
+
+ bool Changed = optimizeSelects(F);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
+ TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
TSI = TM->getSubtargetImpl(F);
TLI = TSI->getTargetLowering();
- // If none of the select types is supported then skip this pass.
+ // If none of the select types are supported then skip this pass.
// This is an optimization pass. Legality issues will be handled by
// instruction selection.
if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
@@ -242,27 +293,25 @@ bool SelectOptimize::runOnFunction(Function &F) {
!TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
return false;
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
if (!TTI->enableSelectOptimize())
return false;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- BPI.reset(new BranchProbabilityInfo(F, *LI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
- PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
TSchedModel.init(TSI);
// When optimizing for size, selects are preferable over branches.
- if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
+ if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
return false;
return optimizeSelects(F);
}
-bool SelectOptimize::optimizeSelects(Function &F) {
+bool SelectOptimizeImpl::optimizeSelects(Function &F) {
// Determine for which select groups it is profitable converting to branches.
SelectGroups ProfSIGroups;
// Base heuristics apply only to non-loops and outer loops.
@@ -278,8 +327,8 @@ bool SelectOptimize::optimizeSelects(Function &F) {
return !ProfSIGroups.empty();
}
-void SelectOptimize::optimizeSelectsBase(Function &F,
- SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
+ SelectGroups &ProfSIGroups) {
// Collect all the select groups.
SelectGroups SIGroups;
for (BasicBlock &BB : F) {
@@ -294,8 +343,8 @@ void SelectOptimize::optimizeSelectsBase(Function &F,
findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
}
-void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
- SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
+ SelectGroups &ProfSIGroups) {
SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
// Need to check size on each iteration as we accumulate child loops.
for (unsigned long i = 0; i < Loops.size(); ++i)
@@ -332,7 +381,7 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue,
return V;
}
-void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
for (SelectGroup &ASI : ProfSIGroups) {
// The code transformation here is a modified version of the sinking
// transformation in CodeGenPrepare::optimizeSelectInst with a more
@@ -425,7 +474,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
- BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
// Delete the unconditional branch that was just created by the split.
StartBlock->getTerminator()->eraseFromParent();
@@ -439,7 +488,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
DIt++;
}
for (auto *DI : DebugPseudoINS) {
- DI->moveBefore(&*EndBlock->getFirstInsertionPt());
+ DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt());
}
// These are the new basic blocks for the conditional branch.
@@ -505,7 +554,8 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
SelectInst *SI = *It;
// The select itself is replaced with a PHI Node.
- PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "");
+ PN->insertBefore(EndBlock->begin());
PN->takeName(SI);
PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
@@ -531,8 +581,8 @@ static bool isSpecialSelect(SelectInst *SI) {
return false;
}
-void SelectOptimize::collectSelectGroups(BasicBlock &BB,
- SelectGroups &SIGroups) {
+void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
+ SelectGroups &SIGroups) {
BasicBlock::iterator BBIt = BB.begin();
while (BBIt != BB.end()) {
Instruction *I = &*BBIt++;
@@ -565,8 +615,8 @@ void SelectOptimize::collectSelectGroups(BasicBlock &BB,
}
}
-void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
- SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::findProfitableSIGroupsBase(
+ SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
for (SelectGroup &ASI : SIGroups) {
++NumSelectOptAnalyzed;
if (isConvertToBranchProfitableBase(ASI))
@@ -580,14 +630,14 @@ static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
ORE->emit(Rem);
}
-void SelectOptimize::findProfitableSIGroupsInnerLoops(
+void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
NumSelectOptAnalyzed += SIGroups.size();
// For each select group in an inner-most loop,
// a branch is more preferable than a select/conditional-move if:
// i) conversion to branches for all the select groups of the loop satisfies
// loop-level heuristics including reducing the loop's critical path by
- // some threshold (see SelectOptimize::checkLoopHeuristics); and
+ // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
// ii) the total cost of the select group is cheaper with a branch compared
// to its predicated version. The cost is in terms of latency and the cost
// of a select group is the cost of its most expensive select instruction
@@ -627,7 +677,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
}
}
-bool SelectOptimize::isConvertToBranchProfitableBase(
+bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
const SmallVector<SelectInst *, 2> &ASI) {
SelectInst *SI = ASI.front();
LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
@@ -635,7 +685,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
// Skip cold basic blocks. Better to optimize for size for cold blocks.
- if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
+ if (PSI->isColdBlock(SI->getParent(), BFI)) {
++NumSelectColdBB;
ORmiss << "Not converted to branch because of cold basic block. ";
EmitAndPrintRemark(ORE, ORmiss);
@@ -678,7 +728,7 @@ static InstructionCost divideNearest(InstructionCost Numerator,
return (Numerator + (Denominator / 2)) / Denominator;
}
-bool SelectOptimize::hasExpensiveColdOperand(
+bool SelectOptimizeImpl::hasExpensiveColdOperand(
const SmallVector<SelectInst *, 2> &ASI) {
bool ColdOperand = false;
uint64_t TrueWeight, FalseWeight, TotalWeight;
@@ -752,9 +802,10 @@ static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
// (sufficiently-accurate in practice), we populate this set with the
// instructions of the backwards dependence slice that only have one-use and
// form an one-use chain that leads to the source instruction.
-void SelectOptimize::getExclBackwardsSlice(Instruction *I,
- std::stack<Instruction *> &Slice,
- Instruction *SI, bool ForSinking) {
+void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
+ std::stack<Instruction *> &Slice,
+ Instruction *SI,
+ bool ForSinking) {
SmallPtrSet<Instruction *, 2> Visited;
std::queue<Instruction *> Worklist;
Worklist.push(I);
@@ -798,7 +849,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I,
}
}
-bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
+bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectInst *SI) {
uint64_t TrueWeight, FalseWeight;
if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
uint64_t Max = std::max(TrueWeight, FalseWeight);
@@ -812,8 +863,8 @@ bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
return false;
}
-bool SelectOptimize::checkLoopHeuristics(const Loop *L,
- const CostInfo LoopCost[2]) {
+bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
+ const CostInfo LoopCost[2]) {
// Loop-level checks to determine if a non-predicated version (with branches)
// of the loop is more profitable than its predicated version.
@@ -881,7 +932,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
// and non-predicated version of the given loop.
// Returns false if unable to compute these costs due to invalid cost of loop
// instruction(s).
-bool SelectOptimize::computeLoopCosts(
+bool SelectOptimizeImpl::computeLoopCosts(
const Loop *L, const SelectGroups &SIGroups,
DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
@@ -969,7 +1020,7 @@ bool SelectOptimize::computeLoopCosts(
}
SmallPtrSet<const Instruction *, 2>
-SelectOptimize::getSIset(const SelectGroups &SIGroups) {
+SelectOptimizeImpl::getSIset(const SelectGroups &SIGroups) {
SmallPtrSet<const Instruction *, 2> SIset;
for (const SelectGroup &ASI : SIGroups)
for (const SelectInst *SI : ASI)
@@ -977,7 +1028,8 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) {
return SIset;
}
-std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+std::optional<uint64_t>
+SelectOptimizeImpl::computeInstCost(const Instruction *I) {
InstructionCost ICost =
TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
if (auto OC = ICost.getValue())
@@ -986,8 +1038,8 @@ std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
}
ScaledNumber<uint64_t>
-SelectOptimize::getMispredictionCost(const SelectInst *SI,
- const Scaled64 CondCost) {
+SelectOptimizeImpl::getMispredictionCost(const SelectInst *SI,
+ const Scaled64 CondCost) {
uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
// Account for the default misprediction rate when using a branch
@@ -1012,8 +1064,8 @@ SelectOptimize::getMispredictionCost(const SelectInst *SI,
// Returns the cost of a branch when the prediction is correct.
// TrueCost * TrueProbability + FalseCost * FalseProbability.
ScaledNumber<uint64_t>
-SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
- const SelectInst *SI) {
+SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+ const SelectInst *SI) {
Scaled64 PredPathCost;
uint64_t TrueWeight, FalseWeight;
if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
@@ -1033,7 +1085,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
return PredPathCost;
}
-bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
+bool SelectOptimizeImpl::isSelectKindSupported(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
if (VectorCond)
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 235f0da86b90..c782ad117ce6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -149,7 +149,7 @@ namespace {
const TargetLowering &TLI;
const SelectionDAGTargetInfo *STI;
CombineLevel Level = BeforeLegalizeTypes;
- CodeGenOpt::Level OptLevel;
+ CodeGenOptLevel OptLevel;
bool LegalDAG = false;
bool LegalOperations = false;
bool LegalTypes = false;
@@ -242,7 +242,7 @@ namespace {
SDValue visit(SDNode *N);
public:
- DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
+ DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
: DAG(D), TLI(D.getTargetLoweringInfo()),
STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
ForCodeSize = DAG.shouldOptForSize();
@@ -430,6 +430,8 @@ namespace {
SDValue visitSADDO_CARRY(SDNode *N);
SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N);
+ SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
+ SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitUSUBO_CARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
@@ -493,6 +495,7 @@ namespace {
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
template <class MatchContextClass> SDValue visitFMA(SDNode *N);
+ SDValue visitFMAD(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
@@ -502,6 +505,7 @@ namespace {
SDValue visitUINT_TO_FP(SDNode *N);
SDValue visitFP_TO_SINT(SDNode *N);
SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitXRINT(SDNode *N);
SDValue visitFP_ROUND(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
@@ -537,6 +541,8 @@ namespace {
SDValue visitMSCATTER(SDNode *N);
SDValue visitVPGATHER(SDNode *N);
SDValue visitVPSCATTER(SDNode *N);
+ SDValue visitVP_STRIDED_LOAD(SDNode *N);
+ SDValue visitVP_STRIDED_STORE(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitFP_TO_BF16(SDNode *N);
@@ -561,7 +567,7 @@ namespace {
SDValue N1, SDNodeFlags Flags);
SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
- SDValue reassociateReduction(unsigned ResOpc, unsigned Opc, const SDLoc &DL,
+ SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
EVT VT, SDValue N0, SDValue N1,
SDNodeFlags Flags = SDNodeFlags());
@@ -607,6 +613,7 @@ namespace {
SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
+ SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
@@ -616,7 +623,10 @@ namespace {
SDValue BuildUDIV(SDNode *N);
SDValue BuildSREMPow2(SDNode *N);
SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
- SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
+ SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
+ bool KnownNeverZero = false,
+ bool InexpensiveOnly = false,
+ std::optional<EVT> OutVT = std::nullopt);
SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
@@ -698,6 +708,11 @@ namespace {
case ISD::Constant:
case ISD::ConstantFP:
return StoreSource::Constant;
+ case ISD::BUILD_VECTOR:
+ if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode()))
+ return StoreSource::Constant;
+ return StoreSource::Unknown;
case ISD::EXTRACT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR:
return StoreSource::Extract;
@@ -1329,6 +1344,30 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
}
}
+
+ // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
+ // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
+ // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
+ // comparisons with the same predicate. This enables optimizations as the
+ // following one:
+ // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
+ // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
+ if (Opc == ISD::AND || Opc == ISD::OR) {
+ if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
+ N01->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
+ ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
+ ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
+ if (CC1 == CC00 && CC1 != CC01) {
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
+ return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
+ }
+ if (CC1 == CC01 && CC1 != CC00) {
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
+ return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
+ }
+ }
+ }
}
return SDValue();
@@ -1873,6 +1912,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
}
SDValue DAGCombiner::visit(SDNode *N) {
+ // clang-format off
switch (N->getOpcode()) {
default: break;
case ISD::TokenFactor: return visitTokenFactor(N);
@@ -1963,6 +2003,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
+ case ISD::FMAD: return visitFMAD(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
@@ -1972,6 +2013,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::LRINT:
+ case ISD::LLRINT: return visitXRINT(N);
case ISD::FP_ROUND: return visitFP_ROUND(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
@@ -2026,6 +2069,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
#include "llvm/IR/VPIntrinsics.def"
return visitVPOp(N);
}
+ // clang-format on
return SDValue();
}
@@ -2124,7 +2168,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
// Don't simplify token factors if optnone.
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return SDValue();
// Don't simplify the token factor if the node itself has too many operands.
@@ -2649,15 +2693,6 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
- unsigned Opcode = V.getOpcode();
- if (Opcode == ISD::OR)
- return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
- if (Opcode == ISD::XOR)
- return isMinSignedConstant(V.getOperand(1));
- return false;
-}
-
static bool
areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) {
return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
@@ -2739,7 +2774,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// iff (or x, c0) is equivalent to (add x, c0).
// Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
// iff (xor x, c0) is equivalent to (add x, c0).
- if (isADDLike(N0, DAG)) {
+ if (DAG.isADDLike(N0)) {
SDValue N01 = N0.getOperand(1);
if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
@@ -2760,7 +2795,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// Do this optimization only when adding c does not introduce instructions
// for adding carries.
auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
- if (isADDLike(N0, DAG) && N0.hasOneUse() &&
+ if (DAG.isADDLike(N0) && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
// If N0's type does not split or is a sign mask, it does not introduce
// add carry.
@@ -3011,7 +3046,7 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
return N0;
// If it cannot overflow, transform into an add.
- if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
return SDValue();
@@ -3281,11 +3316,16 @@ SDValue DAGCombiner::visitADDO(SDNode *N) {
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// If it cannot overflow, transform into an add.
- if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getConstant(0, DL, CarryVT));
- if (!IsSigned) {
+ if (IsSigned) {
+ // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
+ return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
+ DAG.getConstant(0, DL, VT), N0.getOperand(0));
+ } else {
// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
@@ -3617,6 +3657,18 @@ SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
return SDValue();
}
+SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
+ SDValue CarryIn, SDNode *N) {
+ // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
+ if (isBitwiseNot(N0)) {
+ if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
+ return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
+ N0.getOperand(0), NotC);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3636,6 +3688,12 @@ SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
}
+ if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
+ return Combined;
+
+ if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
+ return Combined;
+
return SDValue();
}
@@ -4141,7 +4199,7 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
return N0;
// If it cannot overflow, transform into an sub.
- if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.willNotOverflowSub(IsSigned, N0, N1))
return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
return SDValue();
@@ -4207,7 +4265,7 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) {
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// If it cannot overflow, transform into an sub.
- if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.willNotOverflowSub(IsSigned, N0, N1))
return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
DAG.getConstant(0, DL, CarryVT));
@@ -4342,12 +4400,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
- DAG.isKnownToBeAPowerOfTwo(N1) &&
(!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
- SDValue LogBase2 = BuildLogBase2(N1, DL);
- EVT ShiftVT = getShiftAmountTy(N0.getValueType());
- SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ }
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
@@ -4869,31 +4927,31 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N->getValueType(0);
// fold (udiv x, (1 << c)) -> x >>u c
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
- DAG.isKnownToBeAPowerOfTwo(N1)) {
- SDValue LogBase2 = BuildLogBase2(N1, DL);
- AddToWorklist(LogBase2.getNode());
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
+ if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
+ AddToWorklist(LogBase2.getNode());
- EVT ShiftVT = getShiftAmountTy(N0.getValueType());
- SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- AddToWorklist(Trunc.getNode());
- return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
}
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
SDValue N10 = N1.getOperand(0);
- if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
- DAG.isKnownToBeAPowerOfTwo(N10)) {
- SDValue LogBase2 = BuildLogBase2(N10, DL);
- AddToWorklist(LogBase2.getNode());
-
- EVT ADDVT = N1.getOperand(1).getValueType();
- SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
- AddToWorklist(Trunc.getNode());
- SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
+ if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
+ if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+ AddToWorklist(Trunc.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
+ }
}
}
@@ -5111,14 +5169,15 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
- DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
- unsigned NumEltBits = VT.getScalarSizeInBits();
- SDValue LogBase2 = BuildLogBase2(N1, DL);
- SDValue SRLAmt = DAG.getNode(
- ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
- EVT ShiftVT = getShiftAmountTy(N0.getValueType());
- SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
- return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ hasOperation(ISD::SRL, VT)) {
+ if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
+ unsigned NumEltBits = VT.getScalarSizeInBits();
+ SDValue SRLAmt = DAG.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
}
// If the type twice as wide is legal, transform the mulhu to a wider multiply
@@ -5292,6 +5351,10 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // Constant fold.
+ if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
+ return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
+
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
@@ -5330,6 +5393,10 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // Constant fold.
+ if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
+ return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
+
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
@@ -5412,34 +5479,18 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
N->getVTList(), N0, N0);
- if (IsSigned) {
- // A 1 bit SMULO overflows if both inputs are 1.
- if (VT.getScalarSizeInBits() == 1) {
- SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
- return CombineTo(N, And,
- DAG.getSetCC(DL, CarryVT, And,
- DAG.getConstant(0, DL, VT), ISD::SETNE));
- }
-
- // Multiplying n * m significant bits yields a result of n + m significant
- // bits. If the total number of significant bits does not exceed the
- // result bit width (minus 1), there is no overflow.
- unsigned SignBits = DAG.ComputeNumSignBits(N0);
- if (SignBits > 1)
- SignBits += DAG.ComputeNumSignBits(N1);
- if (SignBits > VT.getScalarSizeInBits() + 1)
- return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
- } else {
- KnownBits N1Known = DAG.computeKnownBits(N1);
- KnownBits N0Known = DAG.computeKnownBits(N0);
- bool Overflow;
- (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
- if (!Overflow)
- return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
+ // A 1 bit SMULO overflows if both inputs are 1.
+ if (IsSigned && VT.getScalarSizeInBits() == 1) {
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
+ SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
+ DAG.getConstant(0, DL, VT), ISD::SETNE);
+ return CombineTo(N, And, Cmp);
}
+ // If it cannot overflow, transform into a mul.
+ if (DAG.willNotOverflowMul(IsSigned, N0, N1))
+ return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
return SDValue();
}
@@ -5459,12 +5510,12 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
return 0;
// The constants need to be the same or a truncated version of each other.
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- ConstantSDNode *N3C = isConstOrConstSplat(N3);
+ ConstantSDNode *N1C = isConstOrConstSplat(peekThroughTruncates(N1));
+ ConstantSDNode *N3C = isConstOrConstSplat(peekThroughTruncates(N3));
if (!N1C || !N3C)
return 0;
- const APInt &C1 = N1C->getAPIntValue();
- const APInt &C2 = N3C->getAPIntValue();
+ const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
+ const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
return 0;
return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
@@ -5579,7 +5630,7 @@ static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SelectionDAG &DAG) {
// We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
// select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
- // be truncated versions of the the setcc (N0/N1).
+ // be truncated versions of the setcc (N0/N1).
if ((N0 != N2 &&
(N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
@@ -6013,6 +6064,72 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
return SDValue();
}
+static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
+ SelectionDAG &DAG) {
+ return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
+}
+
+static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
+ SelectionDAG &DAG) {
+ return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
+}
+
+static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
+ ISD::CondCode CC, unsigned OrAndOpcode,
+ SelectionDAG &DAG,
+ bool isFMAXNUMFMINNUM_IEEE,
+ bool isFMAXNUMFMINNUM) {
+ // The optimization cannot be applied for all the predicates because
+ // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
+ // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
+ // applied at all if one of the operands is a signaling NaN.
+
+ // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
+ // are non NaN values.
+ if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
+ return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMINNUM_IEEE
+ : ISD::DELETED_NODE;
+ else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
+ (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ (OrAndOpcode == ISD::AND)))
+ return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMAXNUM_IEEE
+ : ISD::DELETED_NODE;
+ // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
+ // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
+ // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
+ // that there are not any sNaNs, then the optimization is not valid
+ // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
+ // the optimization using FMINNUM/FMAXNUM for the following cases. If
+ // we can prove that we do not have any sNaNs, then we can do the
+ // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
+ // cases.
+ else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
+ (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
+ (OrAndOpcode == ISD::AND)))
+ return isFMAXNUMFMINNUM ? ISD::FMINNUM
+ : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMINNUM_IEEE
+ : ISD::DELETED_NODE;
+ else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
+ (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETULT || CC == ISD::SETULE) &&
+ (OrAndOpcode == ISD::AND)))
+ return isFMAXNUMFMINNUM ? ISD::FMAXNUM
+ : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMAXNUM_IEEE
+ : ISD::DELETED_NODE;
+ return ISD::DELETED_NODE;
+}
+
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
assert(
@@ -6022,7 +6139,8 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
// TODO: Search past casts/truncates.
SDValue LHS = LogicOp->getOperand(0);
SDValue RHS = LogicOp->getOperand(1);
- if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC)
+ if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
+ !LHS->hasOneUse() || !RHS->hasOneUse())
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -6050,59 +6168,77 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
// (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
// and and-cmp-cmp will be replaced with max-cmp sequence:
// (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
- if (OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
- TLI.isOperationLegal(ISD::SMAX, OpVT) &&
- TLI.isOperationLegal(ISD::UMIN, OpVT) &&
- TLI.isOperationLegal(ISD::SMIN, OpVT)) {
- if (LHS->getOpcode() == ISD::SETCC && RHS->getOpcode() == ISD::SETCC &&
- LHS->hasOneUse() && RHS->hasOneUse() &&
- // The two comparisons should have either the same predicate or the
- // predicate of one of the comparisons is the opposite of the other one.
- (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR)) &&
- // The optimization does not work for `==` or `!=` .
- !ISD::isIntEqualitySetCC(CCL) && !ISD::isIntEqualitySetCC(CCR)) {
- SDValue CommonValue, Operand1, Operand2;
- ISD::CondCode CC = ISD::SETCC_INVALID;
- if (CCL == CCR) {
- if (LHS0 == RHS0) {
- CommonValue = LHS0;
- Operand1 = LHS1;
- Operand2 = RHS1;
- CC = ISD::getSetCCSwappedOperands(CCL);
- } else if (LHS1 == RHS1) {
- CommonValue = LHS1;
- Operand1 = LHS0;
- Operand2 = RHS0;
- CC = CCL;
- }
- } else {
- assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
- if (LHS0 == RHS1) {
- CommonValue = LHS0;
- Operand1 = LHS1;
- Operand2 = RHS0;
- CC = ISD::getSetCCSwappedOperands(CCL);
- } else if (RHS0 == LHS1) {
- CommonValue = LHS1;
- Operand1 = LHS0;
- Operand2 = RHS1;
- CC = CCL;
- }
+ // The optimization does not work for `==` or `!=` .
+ // The two comparisons should have either the same predicate or the
+ // predicate of one of the comparisons is the opposite of the other one.
+ bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
+ TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
+ bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
+ if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::SMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::UMIN, OpVT) &&
+ TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
+ (OpVT.isFloatingPoint() &&
+ (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
+ !ISD::isIntEqualitySetCC(CCL) && !ISD::isFPEqualitySetCC(CCL) &&
+ CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
+ CCL != ISD::SETTRUE &&
+ (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
+
+ SDValue CommonValue, Operand1, Operand2;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ if (CCL == CCR) {
+ if (LHS0 == RHS0) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS1;
+ CC = ISD::getSetCCSwappedOperands(CCL);
+ } else if (LHS1 == RHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS0;
+ CC = CCL;
}
+ } else {
+ assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
+ if (LHS0 == RHS1) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS0;
+ CC = CCR;
+ } else if (RHS0 == LHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS1;
+ CC = CCL;
+ }
+ }
+
+ // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
+ // handle it using OR/AND.
+ if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
+ CC = ISD::SETCC_INVALID;
+ else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
+ CC = ISD::SETCC_INVALID;
- if (CC != ISD::SETCC_INVALID) {
- unsigned NewOpcode;
- bool IsSigned = isSignedIntSetCC(CC);
- if (((CC == ISD::SETLE || CC == ISD::SETULE || CC == ISD::SETLT ||
- CC == ISD::SETULT) &&
- (LogicOp->getOpcode() == ISD::OR)) ||
- ((CC == ISD::SETGE || CC == ISD::SETUGE || CC == ISD::SETGT ||
- CC == ISD::SETUGT) &&
- (LogicOp->getOpcode() == ISD::AND)))
+ if (CC != ISD::SETCC_INVALID) {
+ unsigned NewOpcode = ISD::DELETED_NODE;
+ bool IsSigned = isSignedIntSetCC(CC);
+ if (OpVT.isInteger()) {
+ bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
+ CC == ISD::SETLT || CC == ISD::SETULT);
+ bool IsOr = (LogicOp->getOpcode() == ISD::OR);
+ if (IsLess == IsOr)
NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
else
NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
+ } else if (OpVT.isFloatingPoint())
+ NewOpcode =
+ getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
+ DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
+ if (NewOpcode != ISD::DELETED_NODE) {
SDValue MinMaxValue =
DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
@@ -6115,8 +6251,7 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
if (CCL == CCR &&
CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
- LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
- RHS.hasOneUse()) {
+ LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
const APInt &APLhs = LHS1C->getAPIntValue();
const APInt &APRhs = RHS1C->getAPIntValue();
@@ -6179,6 +6314,33 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
return SDValue();
}
+// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
+// We canonicalize to the `select` form in the middle end, but the `and` form
+// gets better codegen and all tested targets (arm, x86, riscv)
+static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F,
+ const SDLoc &DL, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!isNullConstant(F))
+ return SDValue();
+
+ EVT CondVT = Cond.getValueType();
+ if (TLI.getBooleanContents(CondVT) !=
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ return SDValue();
+
+ if (T.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!isOneConstant(T.getOperand(1)))
+ return SDValue();
+
+ EVT OpVT = T.getValueType();
+
+ SDValue CondMask =
+ OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
+ return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
@@ -6464,7 +6626,7 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
SmallPtrSet<SDNode*, 2> NodesWithConsts;
SDNode *FixupNode = nullptr;
if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
- if (Loads.size() == 0)
+ if (Loads.empty())
return false;
LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
@@ -6488,12 +6650,17 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
SDValue Op1 = LogicN->getOperand(1);
if (isa<ConstantSDNode>(Op0))
- std::swap(Op0, Op1);
+ Op0 =
+ DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
- SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
- Op1, MaskOp);
+ if (isa<ConstantSDNode>(Op1))
+ Op1 =
+ DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
- DAG.UpdateNodeOperands(LogicN, Op0, And);
+ if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
+ std::swap(Op0, Op1);
+
+ DAG.UpdateNodeOperands(LogicN, Op0, Op1);
}
// Create narrow loads.
@@ -6924,12 +7091,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
- SplatBitSize, HasAnyUndefs);
- if (IsSplat) {
+ // Endianness should not matter here. Code below makes sure that we only
+ // use the result if the SplatBitSize is a multiple of the vector element
+ // size. And after that we AND all element sized parts of the splat
+ // together. So the end result should be the same regardless of in which
+ // order we do those operations.
+ const bool IsBigEndian = false;
+ bool IsSplat =
+ Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, EltBitWidth, IsBigEndian);
+
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
// Undef bits can contribute to a possible optimisation if set, so
// set them.
SplatValue |= SplatUndef;
@@ -6938,23 +7116,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// the first vector value and FF for the rest, repeating. We need a mask
// that will apply equally to all members of the vector, so AND all the
// lanes of the constant together.
- unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
-
- // If the splat value has been compressed to a bitlength lower
- // than the size of the vector lane, we need to re-expand it to
- // the lane size.
- if (EltBitWidth > SplatBitSize)
- for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
- SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
- SplatValue |= SplatValue.shl(SplatBitSize);
-
- // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
- // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
- if ((SplatBitSize % EltBitWidth) == 0) {
- Constant = APInt::getAllOnes(EltBitWidth);
- for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
- Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
- }
+ Constant = APInt::getAllOnes(EltBitWidth);
+ for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+ Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
}
@@ -7467,12 +7631,12 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
getShiftAmountTy(VT)))
- return BSwap;
+ return BSwap;
// Try again with commuted operands.
if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
getShiftAmountTy(VT)))
- return BSwap;
+ return BSwap;
// Look for either
@@ -8493,7 +8657,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
/// *ExtractVectorElement
using SDByteProvider = ByteProvider<SDNode *>;
-static const std::optional<SDByteProvider>
+static std::optional<SDByteProvider>
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
std::optional<uint64_t> VectorIndex,
unsigned StartingIndex = 0) {
@@ -8701,7 +8865,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// TODO: If there is evidence that running this later would help, this
// limitation could be removed. Legality checks may need to be added
// for the created store and optional bswap/rotate.
- if (LegalOperations || OptLevel == CodeGenOpt::None)
+ if (LegalOperations || OptLevel == CodeGenOptLevel::None)
return SDValue();
// We only handle merging simple stores of 1-4 bytes.
@@ -9710,9 +9874,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
- if (SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
if (N0.getOpcode() == ISD::SHL) {
auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
@@ -9886,15 +10047,35 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0->hasOneUse() &&
- isConstantOrConstantVector(N1, /* No Opaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
+ N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue Shl1 =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
+ SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
+ AddToWorklist(Shl0.getNode());
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
+ }
+ }
+
+ // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
+ // TODO: Add zext/add_nuw variant with suitable test coverage
+ // TODO: Should we limit this with isLegalAddImmediate?
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::ADD &&
+ N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
+ N0.getOperand(0)->hasOneUse() &&
TLI.isDesirableToCommuteWithShift(N, Level)) {
- SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
- SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
- AddToWorklist(Shl0.getNode());
- AddToWorklist(Shl1.getNode());
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
+ SDValue Add = N0.getOperand(0);
+ SDLoc DL(N0);
+ if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
+ {Add.getOperand(1)})) {
+ if (SDValue ShlC =
+ DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
+ SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
+ SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
+ return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
+ }
+ }
}
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
@@ -9910,6 +10091,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
if (N0.getOpcode() == ISD::VSCALE && N1C) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
@@ -10110,25 +10294,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
- // sext_inreg.
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
- unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
- EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
- if (VT.isVector())
- ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
- VT.getVectorElementCount());
- if (!LegalOperations ||
- TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
- TargetLowering::Legal)
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
- N0.getOperand(0), DAG.getValueType(ExtVT));
- // Even if we can't convert to sext_inreg, we might be able to remove
- // this shift pair if the input is already sign extended.
- if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
- return N0.getOperand(0);
- }
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
// clamp (add c1, c2) to max shift.
@@ -10169,7 +10335,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// If truncate is free for the target sext(shl) is likely to result in better
// code.
if (N0.getOpcode() == ISD::SHL && N1C) {
- // Get the two constanst of the shifts, CN0 = m, CN = n.
+ // Get the two constants of the shifts, CN0 = m, CN = n.
const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
if (N01C) {
LLVMContext &Ctx = *DAG.getContext();
@@ -10640,7 +10806,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
RHS->getMemOperand()->getFlags(), &Fast) &&
Fast) {
SDValue NewPtr = DAG.getMemBasePlusOffset(
- RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
+ RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
AddToWorklist(NewPtr.getNode());
SDValue Load = DAG.getLoad(
VT, DL, RHS->getChain(), NewPtr,
@@ -10739,9 +10905,12 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
Op1 = AbsOp1.getOperand(1);
unsigned Opc0 = Op0.getOpcode();
+
// Check if the operands of the sub are (zero|sign)-extended.
+ // TODO: Should we use ValueTracking instead?
if (Opc0 != Op1.getOpcode() ||
- (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
+ (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
+ Opc0 != ISD::SIGN_EXTEND_INREG)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
TLI.preferABDSToABSWithNSW(VT)) {
@@ -10751,17 +10920,24 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
return SDValue();
}
- EVT VT1 = Op0.getOperand(0).getValueType();
- EVT VT2 = Op1.getOperand(0).getValueType();
- unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
+ EVT VT0, VT1;
+ if (Opc0 == ISD::SIGN_EXTEND_INREG) {
+ VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
+ VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
+ } else {
+ VT0 = Op0.getOperand(0).getValueType();
+ VT1 = Op1.getOperand(0).getValueType();
+ }
+ unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
// fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
// fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
- // NOTE: Extensions must be equivalent.
- if (VT1 == VT2 && hasOperation(ABDOpcode, VT1)) {
- Op0 = Op0.getOperand(0);
- Op1 = Op1.getOperand(0);
- SDValue ABD = DAG.getNode(ABDOpcode, DL, VT1, Op0, Op1);
+ EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
+ if ((VT0 == MaxVT || Op0->hasOneUse()) &&
+ (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
+ SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
+ DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
+ DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
@@ -11487,6 +11663,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SDValue BinOp = foldSelectOfBinops(N))
return BinOp;
+ if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
+ return R;
+
return SDValue();
}
@@ -11547,8 +11726,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
SelectionDAG &DAG, const SDLoc &DL) {
- if (Index.getOpcode() != ISD::ADD)
- return false;
// Only perform the transformation when existing operands can be reused.
if (IndexIsScaled)
@@ -11558,21 +11735,27 @@ bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
return false;
EVT VT = BasePtr.getValueType();
+
+ if (SDValue SplatVal = DAG.getSplatValue(Index);
+ SplatVal && !isNullConstant(SplatVal) &&
+ SplatVal.getValueType() == VT) {
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
+ return true;
+ }
+
+ if (Index.getOpcode() != ISD::ADD)
+ return false;
+
if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
SplatVal && SplatVal.getValueType() == VT) {
- if (isNullConstant(BasePtr))
- BasePtr = SplatVal;
- else
- BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
Index = Index.getOperand(1);
return true;
}
if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
SplatVal && SplatVal.getValueType() == VT) {
- if (isNullConstant(BasePtr))
- BasePtr = SplatVal;
- else
- BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
Index = Index.getOperand(0);
return true;
}
@@ -11586,10 +11769,9 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
// It's always safe to look through zero extends.
if (Index.getOpcode() == ISD::ZERO_EXTEND) {
- SDValue Op = Index.getOperand(0);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
+ if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
IndexType = ISD::UNSIGNED_SCALED;
- Index = Op;
+ Index = Index.getOperand(0);
return true;
}
if (ISD::isIndexTypeSigned(IndexType)) {
@@ -11600,12 +11782,10 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
// It's only safe to look through sign extends when Index is signed.
if (Index.getOpcode() == ISD::SIGN_EXTEND &&
- ISD::isIndexTypeSigned(IndexType)) {
- SDValue Op = Index.getOperand(0);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
- Index = Op;
- return true;
- }
+ ISD::isIndexTypeSigned(IndexType) &&
+ TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
+ Index = Index.getOperand(0);
+ return true;
}
return false;
@@ -11756,6 +11936,21 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
+ auto *SST = cast<VPStridedStoreSDNode>(N);
+ EVT EltVT = SST->getValue().getValueType().getVectorElementType();
+ // Combine strided stores with unit-stride to a regular VP store.
+ if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
+ CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
+ return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
+ SST->getBasePtr(), SST->getOffset(), SST->getMask(),
+ SST->getVectorLength(), SST->getMemoryVT(),
+ SST->getMemOperand(), SST->getAddressingMode(),
+ SST->isTruncatingStore(), SST->isCompressingStore());
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
SDValue Mask = MGT->getMask();
@@ -11843,6 +12038,22 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
+ auto *SLD = cast<VPStridedLoadSDNode>(N);
+ EVT EltVT = SLD->getValueType(0).getVectorElementType();
+ // Combine strided loads with unit-stride to a regular VP load.
+ if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
+ CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
+ SDValue NewLd = DAG.getLoadVP(
+ SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
+ SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
+ SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
+ SLD->getMemOperand(), SLD->isExpandingLoad());
+ return CombineTo(N, NewLd, NewLd.getValue(1));
+ }
+ return SDValue();
+}
+
/// A vector select of 2 constant vectors can be simplified to math/logic to
/// avoid a variable select instruction and possibly avoid constant loads.
SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
@@ -12255,27 +12466,132 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
- SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
- SDLoc(N), !PreferSetCC);
-
- if (!Combined)
- return SDValue();
+ SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
- // If we prefer to have a setcc, and we don't, we'll try our best to
- // recreate one using rebuildSetCC.
- if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
- SDValue NewSetCC = rebuildSetCC(Combined);
+ if (Combined) {
+ // If we prefer to have a setcc, and we don't, we'll try our best to
+ // recreate one using rebuildSetCC.
+ if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
+ SDValue NewSetCC = rebuildSetCC(Combined);
- // We don't have anything interesting to combine to.
- if (NewSetCC.getNode() == N)
- return SDValue();
+ // We don't have anything interesting to combine to.
+ if (NewSetCC.getNode() == N)
+ return SDValue();
- if (NewSetCC)
- return NewSetCC;
+ if (NewSetCC)
+ return NewSetCC;
+ }
+ return Combined;
}
- return Combined;
+ // Optimize
+ // 1) (icmp eq/ne (and X, C0), (shift X, C1))
+ // or
+ // 2) (icmp eq/ne X, (rotate X, C1))
+ // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
+ // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
+ // Then:
+ // If C1 is a power of 2, then the rotate and shift+and versions are
+ // equivilent, so we can interchange them depending on target preference.
+ // Otherwise, if we have the shift+and version we can interchange srl/shl
+ // which inturn affects the constant C0. We can use this to get better
+ // constants again determined by target preference.
+ if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
+ auto IsAndWithShift = [](SDValue A, SDValue B) {
+ return A.getOpcode() == ISD::AND &&
+ (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
+ A.getOperand(0) == B.getOperand(0);
+ };
+ auto IsRotateWithOp = [](SDValue A, SDValue B) {
+ return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
+ B.getOperand(0) == A;
+ };
+ SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
+ bool IsRotate = false;
+
+ // Find either shift+and or rotate pattern.
+ if (IsAndWithShift(N0, N1)) {
+ AndOrOp = N0;
+ ShiftOrRotate = N1;
+ } else if (IsAndWithShift(N1, N0)) {
+ AndOrOp = N1;
+ ShiftOrRotate = N0;
+ } else if (IsRotateWithOp(N0, N1)) {
+ IsRotate = true;
+ AndOrOp = N0;
+ ShiftOrRotate = N1;
+ } else if (IsRotateWithOp(N1, N0)) {
+ IsRotate = true;
+ AndOrOp = N1;
+ ShiftOrRotate = N0;
+ }
+
+ if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
+ (IsRotate || AndOrOp.hasOneUse())) {
+ EVT OpVT = N0.getValueType();
+ // Get constant shift/rotate amount and possibly mask (if its shift+and
+ // variant).
+ auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
+ ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
+ /*AllowTrunc*/ false);
+ if (CNode == nullptr)
+ return std::nullopt;
+ return CNode->getAPIntValue();
+ };
+ std::optional<APInt> AndCMask =
+ IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
+ std::optional<APInt> ShiftCAmt =
+ GetAPIntValue(ShiftOrRotate.getOperand(1));
+ unsigned NumBits = OpVT.getScalarSizeInBits();
+
+ // We found constants.
+ if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
+ unsigned ShiftOpc = ShiftOrRotate.getOpcode();
+ // Check that the constants meet the constraints.
+ bool CanTransform = IsRotate;
+ if (!CanTransform) {
+ // Check that mask and shift compliment eachother
+ CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
+ // Check that we are comparing all bits
+ CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
+ // Check that the and mask is correct for the shift
+ CanTransform &=
+ ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
+ }
+
+ // See if target prefers another shift/rotate opcode.
+ unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
+ OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
+ // Transform is valid and we have a new preference.
+ if (CanTransform && NewShiftOpc != ShiftOpc) {
+ SDLoc DL(N);
+ SDValue NewShiftOrRotate =
+ DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
+ ShiftOrRotate.getOperand(1));
+ SDValue NewAndOrOp = SDValue();
+
+ if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
+ APInt NewMask =
+ NewShiftOpc == ISD::SHL
+ ? APInt::getHighBitsSet(NumBits,
+ NumBits - ShiftCAmt->getZExtValue())
+ : APInt::getLowBitsSet(NumBits,
+ NumBits - ShiftCAmt->getZExtValue());
+ NewAndOrOp =
+ DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
+ DAG.getConstant(NewMask, DL, OpVT));
+ } else {
+ NewAndOrOp = ShiftOrRotate.getOperand(0);
+ }
+
+ return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
+ }
+ }
+ }
+ }
+ return SDValue();
}
SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
@@ -12510,7 +12826,7 @@ static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
if (BothLiveOut)
// Both unextended and extended values are live out. There had better be
// a good reason for the transformation.
- return ExtendNodes.size();
+ return !ExtendNodes.empty();
}
return true;
}
@@ -12612,7 +12928,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
+ BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
@@ -12832,11 +13148,10 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
- const TargetLowering &TLI, EVT VT,
- SDNode *N, SDValue N0,
- ISD::LoadExtType ExtLoadType,
- ISD::NodeType ExtOpc) {
+static SDValue
+tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
if (!N0.hasOneUse())
return SDValue();
@@ -12844,7 +13159,8 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
return SDValue();
- if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
+ if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
+ !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
return SDValue();
if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
@@ -13117,8 +13433,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return foldedExt;
if (SDValue foldedExt =
- tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
- ISD::SIGN_EXTEND))
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
+ ISD::SEXTLOAD, ISD::SIGN_EXTEND))
return foldedExt;
// fold (sext (load x)) to multiple smaller sextloads.
@@ -13181,9 +13497,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return V;
// fold (sext x) -> (zext x) if the sign bit is known zero.
- if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
- DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
+ if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0)) {
+ SDNodeFlags Flags;
+ Flags.setNonNeg(true);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
+ }
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
@@ -13327,8 +13647,12 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getScalarValueSizeInBits(),
std::min(Op.getScalarValueSizeInBits(),
VT.getScalarSizeInBits()));
- if (TruncatedBits.isSubsetOf(Known.Zero))
- return DAG.getZExtOrTrunc(Op, DL, VT);
+ if (TruncatedBits.isSubsetOf(Known.Zero)) {
+ SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
+ DAG.salvageDebugInfo(*N0.getNode());
+
+ return ZExtOrTrunc;
+ }
}
// fold (zext (truncate x)) -> (and x, mask)
@@ -13396,8 +13720,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return foldedExt;
if (SDValue foldedExt =
- tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
- ISD::ZERO_EXTEND))
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
return foldedExt;
// fold (zext (load x)) to multiple smaller zextloads.
@@ -13408,8 +13732,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
// Unless (and (load x) cst) will match as a zextload already and has
- // additional users.
- if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
+ // additional users, or the zext is already free.
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
@@ -14005,8 +14329,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
- TypeSize::Fixed(PtrOff), DL, Flags);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(
+ LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -14316,9 +14640,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
EVT SrcVT = N0.getValueType();
bool isLE = DAG.getDataLayout().isLittleEndian();
- // noop truncate
- if (SrcVT == VT)
- return N0;
+ // trunc(undef) = undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
@@ -14350,7 +14674,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue X = N0.getOperand(0);
SDValue ExtVal = N0.getOperand(1);
EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
- if (ExtVT.bitsLT(VT)) {
+ if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
}
@@ -14448,6 +14772,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getBuildVector(VT, DL, TruncOps);
}
+ // trunc (splat_vector x) -> splat_vector (trunc x)
+ if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
+ SDLoc DL(N);
+ EVT SVT = VT.getScalarType();
+ return DAG.getSplatVector(
+ VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
+ }
+
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -14487,12 +14821,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Handle the case where the load remains an extending load even
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ auto *LN0 = cast<LoadSDNode>(N0);
if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
- SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
- VT, LN0->getChain(), LN0->getBasePtr(),
- LN0->getMemoryVT(),
- LN0->getMemOperand());
+ SDValue NewLoad = DAG.getExtLoad(
+ LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
return NewLoad;
}
@@ -15301,7 +15634,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
// Replacing the inner FMul could cause the outer FMA to be simplified
// away.
- return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA;
+ return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
}
TmpFMA = TmpFMA->getOperand(2);
@@ -15859,7 +16192,8 @@ SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
// FADD -> FMA combines:
if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
- AddToWorklist(Fused.getNode());
+ if (Fused.getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(Fused.getNode());
return Fused;
}
return SDValue();
@@ -16051,7 +16385,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// FADD -> FMA combines:
if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
- AddToWorklist(Fused.getNode());
+ if (Fused.getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(Fused.getNode());
return Fused;
}
return SDValue();
@@ -16168,6 +16503,112 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return SDValue();
}
+// Transform IEEE Floats:
+// (fmul C, (uitofp Pow2))
+// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
+// (fdiv C, (uitofp Pow2))
+// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
+//
+// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
+// there is no need for more than an add/sub.
+//
+// This is valid under the following circumstances:
+// 1) We are dealing with IEEE floats
+// 2) C is normal
+// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
+// TODO: Much of this could also be used for generating `ldexp` on targets the
+// prefer it.
+SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue ConstOp, Pow2Op;
+
+ std::optional<int> Mantissa;
+ auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
+ if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
+ return false;
+
+ ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
+ Pow2Op = N->getOperand(1 - ConstOpIdx);
+ if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
+ (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
+ !DAG.computeKnownBits(Pow2Op).isNonNegative()))
+ return false;
+
+ Pow2Op = Pow2Op.getOperand(0);
+
+ // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
+ // TODO: We could use knownbits to make this bound more precise.
+ int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
+
+ auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
+ if (CFP == nullptr)
+ return false;
+
+ const APFloat &APF = CFP->getValueAPF();
+
+ // Make sure we have normal/ieee constant.
+ if (!APF.isNormal() || !APF.isIEEE())
+ return false;
+
+ // Make sure the floats exponent is within the bounds that this transform
+ // produces bitwise equals value.
+ int CurExp = ilogb(APF);
+ // FMul by pow2 will only increase exponent.
+ int MinExp =
+ N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
+ // FDiv by pow2 will only decrease exponent.
+ int MaxExp =
+ N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
+ if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
+ MaxExp >= APFloat::semanticsMaxExponent(APF.getSemantics()))
+ return false;
+
+ // Finally make sure we actually know the mantissa for the float type.
+ int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
+ if (!Mantissa)
+ Mantissa = ThisMantissa;
+
+ return *Mantissa == ThisMantissa && ThisMantissa > 0;
+ };
+
+ // TODO: We may be able to include undefs.
+ return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
+ };
+
+ if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
+ return SDValue();
+
+ if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
+ return SDValue();
+
+ // Get log2 after all other checks have taken place. This is because
+ // BuildLogBase2 may create a new node.
+ SDLoc DL(N);
+ // Get Log2 type with same bitwidth as the float type (VT).
+ EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
+ if (VT.isVector())
+ NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
+ VT.getVectorElementCount());
+
+ SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
+ /*InexpensiveOnly*/ true, NewIntVT);
+ if (!Log2)
+ return SDValue();
+
+ // Perform actual transform.
+ SDValue MantissaShiftCnt =
+ DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
+ // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
+ // `(X << C1) + (C << C1)`, but that isn't always the case because of the
+ // cast. We could implement that by handle here to handle the casts.
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
+ SDValue ResAsInt =
+ DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
+ NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
+ SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
+ return ResAsFP;
+}
+
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -16308,6 +16749,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return Fused;
}
+ // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
+ // able to run.
+ if (SDValue R = combineFMulOrFDivWithIntPow2(N))
+ return R;
+
return SDValue();
}
@@ -16438,6 +16884,21 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFMAD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Constant fold FMAD.
+ if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
+ isa<ConstantFPSDNode>(N2))
+ return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
+
+ return SDValue();
+}
+
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
@@ -16659,6 +17120,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
}
+ if (SDValue R = combineFMulOrFDivWithIntPow2(N))
+ return R;
+
return SDValue();
}
@@ -17046,6 +17510,21 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
return FoldIntToFPToInt(N, DAG);
}
+SDValue DAGCombiner::visitXRINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (lrint|llrint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // fold (lrint|llrint c1fp) -> c1
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -17197,6 +17676,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
case ISD::FRINT:
case ISD::FTRUNC:
case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
case ISD::FFLOOR:
case ISD::FCEIL:
return N0;
@@ -17671,6 +18151,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// can be folded with this one. We should do this to avoid having to keep
// a copy of the original base pointer.
SmallVector<SDNode *, 16> OtherUses;
+ constexpr unsigned int MaxSteps = 8192;
if (isa<ConstantSDNode>(Offset))
for (SDNode::use_iterator UI = BasePtr->use_begin(),
UE = BasePtr->use_end();
@@ -17681,7 +18162,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
continue;
- if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
+ if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
+ MaxSteps))
continue;
if (Use.getUser()->getOpcode() != ISD::ADD &&
@@ -17714,7 +18196,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
for (SDNode *Use : Ptr->uses()) {
if (Use == N)
continue;
- if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
+ if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
return false;
// If Ptr may be folded in addressing mode of other use, then it's
@@ -17888,12 +18370,13 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
// Check for #2.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 8> Worklist;
+ constexpr unsigned int MaxSteps = 8192;
// Ptr is predecessor to both N and Op.
Visited.insert(Ptr.getNode());
Worklist.push_back(N);
Worklist.push_back(Op);
- if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
- !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
+ if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
+ !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
return Op;
}
return nullptr;
@@ -18070,7 +18553,7 @@ StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
}
SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
- if (OptLevel == CodeGenOpt::None || !LD->isSimple())
+ if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
return SDValue();
SDValue Chain = LD->getOperand(0);
int64_t Offset;
@@ -18270,7 +18753,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
return V;
// Try to infer better alignment information than the load already has.
- if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
+ if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
+ !LD->isAtomic()) {
if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
if (*Alignment > LD->getAlign() &&
isAligned(*Alignment, LD->getSrcValueOffset())) {
@@ -19006,7 +19490,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue Ptr = St->getBasePtr();
if (StOffset) {
SDLoc DL(IVal);
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(StOffset), DL);
}
++OpsNarrowed;
@@ -19132,7 +19616,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
SDValue NewPtr =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
SDValue NewLD =
DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
@@ -19305,7 +19789,7 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
Chains.push_back(StoreNodes[i].MemNode->getChain());
}
- assert(Chains.size() > 0 && "Chain should have generated a chain");
+ assert(!Chains.empty() && "Chain should have generated a chain");
return DAG.getTokenFactor(StoreDL, Chains);
}
@@ -19381,23 +19865,24 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
for (unsigned I = 0; I != NumStores; ++I) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
SDValue Val = St->getValue();
- // If constant is of the wrong type, convert it now.
+ // If constant is of the wrong type, convert it now. This comes up
+ // when one of our stores was truncating.
if (MemVT != Val.getValueType()) {
Val = peekThroughBitcasts(Val);
// Deal with constants of wrong size.
if (ElementSizeBits != Val.getValueSizeInBits()) {
- EVT IntMemVT =
- EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
- if (isa<ConstantFPSDNode>(Val)) {
+ auto *C = dyn_cast<ConstantSDNode>(Val);
+ if (!C)
// Not clear how to truncate FP values.
+ // TODO: Handle truncation of build_vector constants
return false;
- }
- if (auto *C = dyn_cast<ConstantSDNode>(Val))
- Val = DAG.getConstant(C->getAPIntValue()
- .zextOrTrunc(Val.getValueSizeInBits())
- .zextOrTrunc(ElementSizeBits),
- SDLoc(C), IntMemVT);
+ EVT IntMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ Val = DAG.getConstant(C->getAPIntValue()
+ .zextOrTrunc(Val.getValueSizeInBits())
+ .zextOrTrunc(ElementSizeBits),
+ SDLoc(C), IntMemVT);
}
// Make sure correctly size type is the correct type.
Val = DAG.getBitcast(MemVT, Val);
@@ -19473,6 +19958,10 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// If fp truncation is necessary give up for now.
if (MemVT.getSizeInBits() != ElementSizeBits)
return false;
+ } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) {
+ // Not yet handled
+ return false;
} else {
llvm_unreachable("Invalid constant element type");
}
@@ -19603,7 +20092,7 @@ void DAGCombiner::getStoreMergeCandidates(
case StoreSource::Constant:
if (NoTypeMatch)
return false;
- if (!isIntOrFPConstant(OtherBC))
+ if (getStoreSource(OtherBC) != StoreSource::Constant)
return false;
break;
case StoreSource::Extract:
@@ -19825,6 +20314,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
IsElementZero = C->isZero();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
+ else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
+ IsElementZero = true;
if (IsElementZero) {
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
FirstZeroAfterNonZero = i;
@@ -20286,7 +20777,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
}
bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
+ if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
return false;
// TODO: Extend this function to merge stores of scalable vectors.
@@ -20448,8 +20939,8 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
Ptr, ST->getMemOperand());
}
- if (ST->isSimple() &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
+ !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
@@ -20464,7 +20955,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), DL);
SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
ST->getOriginalAlign(), MMOFlags, AAInfo);
@@ -20514,20 +21005,18 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
&IsFast) ||
!IsFast)
return SDValue();
- EVT PtrVT = Ptr.getValueType();
- SDValue Offset =
- DAG.getNode(ISD::MUL, DL, PtrVT, Idx,
- DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
- SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset);
MachinePointerInfo PointerInfo(ST->getAddressSpace());
// If the offset is a known constant then try to recover the pointer
// info
+ SDValue NewPtr;
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
- NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(COffset), DL);
+ NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
+ } else {
+ NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
}
return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
@@ -20565,7 +21054,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return Chain;
// Try to infer better alignment information than the store already has.
- if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
+ if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
+ !ST->isAtomic()) {
if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
if (*Alignment > ST->getAlign() &&
isAligned(*Alignment, ST->getSrcValueOffset())) {
@@ -20681,7 +21171,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && ST->isSimple() &&
ST1->isUnindexed() && ST1->isSimple()) {
- if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
+ if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
// If this is a store followed by a store with the same value to the
@@ -20689,7 +21179,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return Chain;
}
- if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+ if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
!ST1->getBasePtr().isUndef() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
// If we consider two stores and one smaller in size is a scalable
@@ -20702,7 +21192,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
ST->getMemoryVT().getStoreSize())) {
CombineTo(ST1, ST1->getChain());
- return SDValue();
+ return SDValue(N, 0);
}
} else {
const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
@@ -20715,7 +21205,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
ChainBase,
ST1->getMemoryVT().getFixedSizeInBits())) {
CombineTo(ST1, ST1->getChain());
- return SDValue();
+ return SDValue(N, 0);
}
}
}
@@ -20852,7 +21342,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
/// }
///
SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return SDValue();
// Can't change the number of memory accesses for a volatile store or break
@@ -20922,7 +21412,8 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
// Lower value store.
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
// Higher value store.
SDValue St1 = DAG.getStore(
St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
@@ -21689,14 +22180,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (DAG.isKnownNeverZero(Index))
return DAG.getUNDEF(ScalarVT);
- // Check if the result type doesn't match the inserted element type. A
- // SCALAR_TO_VECTOR may truncate the inserted element and the
- // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ // Check if the result type doesn't match the inserted element type.
+ // The inserted element and extracted element may have mismatched bitwidth.
+ // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
SDValue InOp = VecOp.getOperand(0);
if (InOp.getValueType() != ScalarVT) {
- assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() &&
- InOp.getValueType().bitsGT(ScalarVT));
- return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+ if (InOp.getValueType().bitsGT(ScalarVT))
+ return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
}
return InOp;
}
@@ -21748,6 +22240,19 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
unsigned NumElts = VecVT.getVectorNumElements();
unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+ // See if the extracted element is constant, in which case fold it if its
+ // a legal fp immediate.
+ if (IndexC && ScalarVT.isFloatingPoint()) {
+ APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
+ KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
+ if (KnownElt.isConstant()) {
+ APFloat CstFP =
+ APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
+ if (TLI.isFPImmLegal(CstFP, ScalarVT))
+ return DAG.getConstantFP(CstFP, DL, ScalarVT);
+ }
+ }
+
// TODO: These transforms should not require the 'hasOneUse' restriction, but
// there are regressions on multiple targets without it. We can end up with a
// mess of scalar and vector code if we reduce only part of the DAG to scalar.
@@ -22110,12 +22615,18 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+ EVT VT = N->getValueType(0);
+
+ // Don't run this before LegalizeTypes if VT is legal.
+ // Targets may have other preferences.
+ if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
+ return SDValue();
+
// Only for little endian
if (!DAG.getDataLayout().isLittleEndian())
return SDValue();
SDLoc DL(N);
- EVT VT = N->getValueType(0);
EVT OutScalarTy = VT.getScalarType();
uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
@@ -23576,7 +24087,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
WideNumElts / NarrowingRatio);
- if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
+ if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
+ LegalOperations))
return SDValue();
// If extraction is cheap, we don't need to look at the binop operands
@@ -23666,6 +24178,10 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
unsigned Index = Extract->getConstantOperandVal(1);
unsigned NumElts = VT.getVectorMinNumElements();
+ // A fixed length vector being extracted from a scalable vector
+ // may not be any *smaller* than the scalable one.
+ if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
+ return SDValue();
// The definition of EXTRACT_SUBVECTOR states that the index must be a
// multiple of the minimum number of elements in the result type.
@@ -23821,7 +24337,7 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
// Profitability check: only deal with extractions from the first subvector
// unless the mask becomes an identity mask.
- if (!ShuffleVectorInst::isIdentityMask(NewMask) ||
+ if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
any_of(NewMask, [](int M) { return M < 0; }))
for (auto &DemandedSubvector : DemandedSubvectors)
if (DemandedSubvector.second != 0)
@@ -25583,15 +26099,31 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return N0;
// If this is an insert of an extracted vector into an undef vector, we can
- // just use the input to the extract.
+ // just use the input to the extract if the types match, and can simplify
+ // in some cases even if they don't.
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
- return N1.getOperand(0);
+ N1.getOperand(1) == N2) {
+ EVT SrcVT = N1.getOperand(0).getValueType();
+ if (SrcVT == VT)
+ return N1.getOperand(0);
+ // TODO: To remove the zero check, need to adjust the offset to
+ // a multiple of the new src type.
+ if (isNullConstant(N2) &&
+ VT.isScalableVector() == SrcVT.isScalableVector()) {
+ if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements())
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
+ VT, N0, N1.getOperand(0), N2);
+ else
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
+ VT, N1.getOperand(0), N2);
+ }
+ }
// Simplify scalar inserts into an undef vector:
// insert_subvector undef, (splat X), N2 -> splat X
if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
- return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
+ if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
// If we are inserting a bitcast value into an undef, with the same
// number of elements, just use the bitcast input of the extract.
@@ -25635,10 +26167,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
N1, N2);
// Eliminate an intermediate insert into an undef vector:
- // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
- // insert_subvector undef, X, N2
+ // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
+ // insert_subvector undef, X, 0
if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
- N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
+ N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
+ isNullConstant(N2))
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
N1.getOperand(1), N2);
@@ -25814,6 +26347,14 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
if (SDValue SD = visitVPSCATTER(N))
return SD;
+ if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
+ if (SDValue SD = visitVP_STRIDED_LOAD(N))
+ return SD;
+
+ if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
+ if (SDValue SD = visitVP_STRIDED_STORE(N))
+ return SD;
+
// VP operations in which all vector elements are disabled - either by
// determining that the mask is all false or that the EVL is 0 - can be
// eliminated.
@@ -26535,11 +27076,11 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- EVT VT = N->getValueType(0);
SDLoc DL(N);
unsigned BinOpc = N1.getOpcode();
- if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
+ if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
+ (N1.getResNo() != N2.getResNo()))
return SDValue();
// The use checks are intentionally on SDNode because we may be dealing
@@ -26556,26 +27097,29 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
// Fold select(cond, binop(x, y), binop(z, y))
// --> binop(select(cond, x, z), y)
if (N1.getOperand(1) == N2.getOperand(1)) {
- SDValue NewSel =
- DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
+ SDValue N10 = N1.getOperand(0);
+ SDValue N20 = N2.getOperand(0);
+ SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
- return NewBinOp;
+ return SDValue(NewBinOp.getNode(), N1.getResNo());
}
// Fold select(cond, binop(x, y), binop(x, z))
// --> binop(x, select(cond, y, z))
- // Second op VT might be different (e.g. shift amount type)
- if (N1.getOperand(0) == N2.getOperand(0) &&
- VT == N1.getOperand(1).getValueType() &&
- VT == N2.getOperand(1).getValueType()) {
- SDValue NewSel =
- DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
- NewBinOp->setFlags(N1->getFlags());
- NewBinOp->intersectFlagsWith(N2->getFlags());
- return NewBinOp;
+ if (N1.getOperand(0) == N2.getOperand(0)) {
+ SDValue N11 = N1.getOperand(1);
+ SDValue N21 = N2.getOperand(1);
+ // Second op VT might be different (e.g. shift amount type)
+ if (N11.getValueType() == N21.getValueType()) {
+ SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
+ SDValue NewBinOp =
+ DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
+ NewBinOp->setFlags(N1->getFlags());
+ NewBinOp->intersectFlagsWith(N2->getFlags());
+ return SDValue(NewBinOp.getNode(), N1.getResNo());
+ }
}
// TODO: Handle isCommutativeBinOp patterns as well?
@@ -26724,8 +27268,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
- unsigned ShCt = AndMask.getBitWidth() - 1;
- if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
+ unsigned ShCt = AndMask.getBitWidth() - 1;
SDValue ShlAmt =
DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
getShiftAmountTy(AndLHS.getValueType()));
@@ -26766,10 +27310,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// zext (setcc n0, n1)
if (LegalTypes) {
SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
- if (VT.bitsLT(SCC.getValueType()))
- Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
- else
- Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
} else {
SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
@@ -26939,10 +27480,129 @@ SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
return SDValue();
}
+// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
+//
+// Returns the node that represents `Log2(Op)`. This may create a new node. If
+// we are unable to compute `Log2(Op)` its return `SDValue()`.
+//
+// All nodes will be created at `DL` and the output will be of type `VT`.
+//
+// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
+// `AssumeNonZero` if this function should simply assume (not require proving
+// `Op` is non-zero).
+static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ SDValue Op, unsigned Depth,
+ bool AssumeNonZero) {
+ assert(VT.isInteger() && "Only integer types are supported!");
+
+ auto PeekThroughCastsAndTrunc = [](SDValue V) {
+ while (true) {
+ switch (V.getOpcode()) {
+ case ISD::TRUNCATE:
+ case ISD::ZERO_EXTEND:
+ V = V.getOperand(0);
+ break;
+ default:
+ return V;
+ }
+ }
+ };
+
+ if (VT.isScalableVector())
+ return SDValue();
+
+ Op = PeekThroughCastsAndTrunc(Op);
+
+ // Helper for determining whether a value is a power-2 constant scalar or a
+ // vector of such elements.
+ SmallVector<APInt> Pow2Constants;
+ auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
+ if (C->isZero() || C->isOpaque())
+ return false;
+ // TODO: We may also be able to support negative powers of 2 here.
+ if (C->getAPIntValue().isPowerOf2()) {
+ Pow2Constants.emplace_back(C->getAPIntValue());
+ return true;
+ }
+ return false;
+ };
+
+ if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
+ if (!VT.isVector())
+ return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
+ // We need to create a build vector
+ SmallVector<SDValue> Log2Ops;
+ for (const APInt &Pow2 : Pow2Constants)
+ Log2Ops.emplace_back(
+ DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
+ return DAG.getBuildVector(VT, DL, Log2Ops);
+ }
+
+ if (Depth >= DAG.MaxRecursionDepth)
+ return SDValue();
+
+ auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
+ ToCast = PeekThroughCastsAndTrunc(ToCast);
+ EVT CurVT = ToCast.getValueType();
+ if (NewVT == CurVT)
+ return ToCast;
+
+ if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
+ return DAG.getBitcast(NewVT, ToCast);
+
+ return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
+ };
+
+ // log2(X << Y) -> log2(X) + Y
+ if (Op.getOpcode() == ISD::SHL) {
+ // 1 << Y and X nuw/nsw << Y are all non-zero.
+ if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
+ Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
+ if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
+ Depth + 1, AssumeNonZero))
+ return DAG.getNode(ISD::ADD, DL, VT, LogX,
+ CastToVT(VT, Op.getOperand(1)));
+ }
+
+ // c ? X : Y -> c ? Log2(X) : Log2(Y)
+ if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
+ Op.hasOneUse()) {
+ if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
+ Depth + 1, AssumeNonZero))
+ if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
+ Depth + 1, AssumeNonZero))
+ return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
+ }
+
+ // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
+ // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
+ if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
+ Op.hasOneUse()) {
+ // Use AssumeNonZero as false here. Otherwise we can hit case where
+ // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
+ if (SDValue LogX =
+ takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
+ /*AssumeNonZero*/ false))
+ if (SDValue LogY =
+ takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
+ /*AssumeNonZero*/ false))
+ return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
+ }
+
+ return SDValue();
+}
+
/// Determines the LogBase2 value for a non-null input value using the
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
-SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
- EVT VT = V.getValueType();
+SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
+ bool KnownNonZero, bool InexpensiveOnly,
+ std::optional<EVT> OutVT) {
+ EVT VT = OutVT ? *OutVT : V.getValueType();
+ SDValue InexpensiveLogBase2 =
+ takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
+ if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
+ return InexpensiveLogBase2;
+
SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
@@ -27330,7 +27990,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
}
case ISD::CopyFromReg:
- // Always forward past past CopyFromReg.
+ // Always forward past CopyFromReg.
C = C.getOperand(0);
return true;
@@ -27402,7 +28062,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
/// (aliasing node.)
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return OldChain;
// Ops for replacing token factor.
@@ -27412,7 +28072,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
GatherAllAliases(N, OldChain, Aliases);
// If no operands then chain to entry token.
- if (Aliases.size() == 0)
+ if (Aliases.empty())
return DAG.getEntryNode();
// If a single operand then chain to it. We don't need to revisit it.
@@ -27508,7 +28168,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
}
// If we didn't find a chained store, exit.
- if (ChainedStores.size() == 0)
+ if (ChainedStores.empty())
return false;
// Improve all chained stores (St and ChainedStores members) starting from
@@ -27559,7 +28219,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
}
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return false;
const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
@@ -27587,7 +28247,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
/// This is the main entry point to this class.
DAGCombiner(*this, AA, OptLevel).Run(Level);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index f0affce7b6b8..a83129586339 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1327,6 +1327,14 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
<< *DI << "\n");
return true;
}
+ if (auto SI = FuncInfo.StaticAllocaMap.find(dyn_cast<AllocaInst>(V));
+ SI != FuncInfo.StaticAllocaMap.end()) {
+ MachineOperand FrameIndexOp = MachineOperand::CreateFI(SI->second);
+ bool IsIndirect = false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect,
+ FrameIndexOp, Var, Expr);
+ return true;
+ }
if (Register Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
if (!FuncInfo.MF->useDebugInstrRef()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 1d0a03ccfcdc..03cba892a167 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -64,11 +64,18 @@ static ISD::NodeType getPreferredExtendForValue(const Instruction *I) {
// can be exposed.
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
unsigned NumOfSigned = 0, NumOfUnsigned = 0;
- for (const User *U : I->users()) {
- if (const auto *CI = dyn_cast<CmpInst>(U)) {
+ for (const Use &U : I->uses()) {
+ if (const auto *CI = dyn_cast<CmpInst>(U.getUser())) {
NumOfSigned += CI->isSigned();
NumOfUnsigned += CI->isUnsigned();
}
+ if (const auto *CallI = dyn_cast<CallBase>(U.getUser())) {
+ if (!CallI->isArgOperand(&U))
+ continue;
+ unsigned ArgNo = CallI->getArgOperandNo(&U);
+ NumOfUnsigned += CallI->paramHasAttr(ArgNo, Attribute::ZExt);
+ NumOfSigned += CallI->paramHasAttr(ArgNo, Attribute::SExt);
+ }
}
if (NumOfSigned > NumOfUnsigned)
ExtendKind = ISD::SIGN_EXTEND;
@@ -350,6 +357,7 @@ void FunctionLoweringInfo::clear() {
StatepointRelocationMaps.clear();
PreferredExtendType.clear();
PreprocessedDbgDeclares.clear();
+ PreprocessedDPVDeclares.clear();
}
/// CreateReg - Allocate a single virtual register for the given type.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4e7895c0b3cf..a27febe15db8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1311,15 +1311,15 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag F(Flags);
+ const unsigned NumVals = F.getNumOperandRegisters();
GroupIdx.push_back(MIB->getNumOperands());
MIB.addImm(Flags);
++i; // Skip the ID value.
- switch (InlineAsm::getKind(Flags)) {
- default: llvm_unreachable("Bad flags!");
- case InlineAsm::Kind_RegDef:
+ switch (F.getKind()) {
+ case InlineAsm::Kind::RegDef:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
// FIXME: Add dead flags for physical and virtual registers defined.
@@ -1328,8 +1328,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
MIB.addReg(Reg, RegState::Define | getImplRegState(Reg.isPhysical()));
}
break;
- case InlineAsm::Kind_RegDefEarlyClobber:
- case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind::RegDefEarlyClobber:
+ case InlineAsm::Kind::Clobber:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
@@ -1337,9 +1337,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
ECRegs.push_back(Reg);
}
break;
- case InlineAsm::Kind_RegUse: // Use of register.
- case InlineAsm::Kind_Imm: // Immediate.
- case InlineAsm::Kind_Mem: // Non-function addressing mode.
+ case InlineAsm::Kind::RegUse: // Use of register.
+ case InlineAsm::Kind::Imm: // Immediate.
+ case InlineAsm::Kind::Mem: // Non-function addressing mode.
// The addressing mode has been selected, just add all of the
// operands to the machine instruction.
for (unsigned j = 0; j != NumVals; ++j, ++i)
@@ -1347,9 +1347,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
/*IsDebug=*/false, IsClone, IsCloned);
// Manually set isTied bits.
- if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
- unsigned DefGroup = 0;
- if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+ if (F.isRegUseKind()) {
+ unsigned DefGroup;
+ if (F.isUseOperandTiedToDef(DefGroup)) {
unsigned DefIdx = GroupIdx[DefGroup] + 1;
unsigned UseIdx = GroupIdx.back() + 1;
for (unsigned j = 0; j != NumVals; ++j)
@@ -1357,7 +1357,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
break;
- case InlineAsm::Kind_Func: // Function addressing mode.
+ case InlineAsm::Kind::Func: // Function addressing mode.
for (unsigned j = 0; j != NumVals; ++j, ++i) {
SDValue Op = Node->getOperand(i);
AddOperand(MIB, Op, 0, nullptr, VRBaseMap,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 61fc31715d71..5e1f9fbcdde0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -324,7 +325,8 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
TLI.ShouldShrinkFPConstant(OrigVT)) {
Type *SType = SVT.getTypeForEVT(*DAG.getContext());
- LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ LLVMC = cast<ConstantFP>(ConstantFoldCastOperand(
+ Instruction::FPTrunc, LLVMC, SType, DAG.getDataLayout()));
VT = SVT;
Extend = true;
}
@@ -459,7 +461,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
ST->getOriginalAlign(), MMOFlags, AAInfo);
}
- if (CFP->getValueType(0) == MVT::f64) {
+ if (CFP->getValueType(0) == MVT::f64 &&
+ !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (TLI.isTypeLegal(MVT::i64)) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@@ -480,7 +483,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), dl);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
ST->getOriginalAlign(), MMOFlags, AAInfo);
@@ -589,7 +592,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Hi = DAG.getNode(
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth, dl,
@@ -802,7 +806,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -830,7 +835,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -1007,6 +1013,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
case ISD::SET_FPENV:
+ case ISD::SET_FPMODE:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(1).getValueType());
break;
@@ -1042,7 +1049,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
case ISD::ATOMIC_STORE:
Action = TLI.getOperationAction(Node->getOpcode(),
- Node->getOperand(2).getValueType());
+ Node->getOperand(1).getValueType());
break;
case ISD::SELECT_CC:
case ISD::STRICT_FSETCC:
@@ -1518,7 +1525,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
unsigned Offset = TypeByteSize*i;
- SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, TypeSize::Fixed(Offset), dl);
+ SDValue Idx =
+ DAG.getMemBasePlusOffset(FIPtr, TypeSize::getFixed(Offset), dl);
if (Truncate)
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
@@ -1580,7 +1588,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
// Advance the pointer so that the loaded byte will contain the sign bit.
unsigned ByteOffset = (NumBits / 8) - 1;
IntPtr =
- DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(ByteOffset), DL);
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::getFixed(ByteOffset), DL);
State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
ByteOffset);
}
@@ -2250,7 +2258,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
// Also pass the return address of the remainder.
SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = FIPtr;
- Entry.Ty = RetTy->getPointerTo();
+ Entry.Ty = PointerType::getUnqual(RetTy->getContext());
Entry.IsSExt = isSigned;
Entry.IsZExt = !isSigned;
Args.push_back(Entry);
@@ -2341,7 +2349,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
// Pass the return address of sin.
SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = SinPtr;
- Entry.Ty = RetTy->getPointerTo();
+ Entry.Ty = PointerType::getUnqual(RetTy->getContext());
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
@@ -2349,7 +2357,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
// Also pass the return address of the cos.
SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = CosPtr;
- Entry.Ty = RetTy->getPointerTo();
+ Entry.Ty = PointerType::getUnqual(RetTy->getContext());
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
@@ -2649,7 +2657,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot,
MachinePointerInfo());
// Store the hi of the constructed double.
- SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl);
+ SDValue HiPtr =
+ DAG.getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), dl);
SDValue Store2 =
DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo());
MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -3079,11 +3088,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::ATOMIC_STORE: {
// There is no libcall for atomic store; fake it with ATOMIC_SWAP.
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
- cast<AtomicSDNode>(Node)->getMemoryVT(),
- Node->getOperand(0),
- Node->getOperand(1), Node->getOperand(2),
- cast<AtomicSDNode>(Node)->getMemOperand());
+ SDValue Swap = DAG.getAtomic(
+ ISD::ATOMIC_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0), Node->getOperand(2), Node->getOperand(1),
+ cast<AtomicSDNode>(Node)->getMemOperand());
Results.push_back(Swap.getValue(1));
break;
}
@@ -3133,6 +3141,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Res.getValue(1));
break;
}
+ case ISD::ATOMIC_LOAD_SUB: {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue RHS = Node->getOperand(2);
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+ if (RHS->getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(RHS->getOperand(1))->getVT() == AN->getMemoryVT())
+ RHS = RHS->getOperand(0);
+ SDValue NewRHS =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
+ SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, AN->getMemoryVT(),
+ Node->getOperand(0), Node->getOperand(1),
+ NewRHS, AN->getMemOperand());
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
case ISD::DYNAMIC_STACKALLOC:
ExpandDYNAMIC_STACKALLOC(Node, Results);
break;
@@ -3333,7 +3358,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(DAG.expandVACopy(Node));
break;
case ISD::EXTRACT_VECTOR_ELT:
- if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ if (Node->getOperand(0).getValueType().getVectorElementCount().isScalar())
// This must be an access of the only element. Return it.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
Node->getOperand(0));
@@ -3904,6 +3929,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Chain = Node->getOperand(0);
SDValue Table = Node->getOperand(1);
SDValue Index = Node->getOperand(2);
+ int JTI = cast<JumpTableSDNode>(Table.getNode())->getIndex();
const DataLayout &TD = DAG.getDataLayout();
EVT PTy = TLI.getPointerTy(TD);
@@ -3938,7 +3964,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.getPICJumpTableRelocBase(Table, DAG));
}
- Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG);
+ Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, JTI, DAG);
Results.push_back(Tmp1);
break;
}
@@ -4418,6 +4444,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80,
RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128, Results);
break;
+ case ISD::FEXP10:
+ ExpandFPLibCall(Node, RTLIB::EXP10_F32, RTLIB::EXP10_F64, RTLIB::EXP10_F80,
+ RTLIB::EXP10_F128, RTLIB::EXP10_PPCF128, Results);
+ break;
case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:
ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
@@ -4820,6 +4850,46 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
DAG.makeStateFunctionCall(RTLIB::FESETENV, EnvPtr, Chain, dl));
break;
}
+ case ISD::GET_FPMODE: {
+ // Call fegetmode, which saves control modes into a stack slot. Then load
+ // the value to return from the stack.
+ EVT ModeVT = Node->getValueType(0);
+ SDValue StackPtr = DAG.CreateStackTemporary(ModeVT);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ SDValue Chain = DAG.makeStateFunctionCall(RTLIB::FEGETMODE, StackPtr,
+ Node->getOperand(0), dl);
+ SDValue LdInst = DAG.getLoad(
+ ModeVT, dl, Chain, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+ Results.push_back(LdInst);
+ Results.push_back(LdInst.getValue(1));
+ break;
+ }
+ case ISD::SET_FPMODE: {
+ // Move control modes to stack slot and then call fesetmode with the pointer
+ // to the slot as argument.
+ SDValue Mode = Node->getOperand(1);
+ EVT ModeVT = Mode.getValueType();
+ SDValue StackPtr = DAG.CreateStackTemporary(ModeVT);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ SDValue StInst = DAG.getStore(
+ Node->getOperand(0), dl, Mode, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FESETMODE, StackPtr, StInst, dl));
+ break;
+ }
+ case ISD::RESET_FPMODE: {
+ // It is legalized to a call 'fesetmode(FE_DFL_MODE)'. On most targets
+ // FE_DFL_MODE is defined as '((const femode_t *) -1)' in glibc. If not, the
+ // target must provide custom lowering.
+ const DataLayout &DL = DAG.getDataLayout();
+ EVT PtrTy = TLI.getPointerTy(DL);
+ SDValue Mode = DAG.getConstant(-1LL, dl, PtrTy);
+ Results.push_back(DAG.makeStateFunctionCall(RTLIB::FESETMODE, Mode,
+ Node->getOperand(0), dl));
+ break;
+ }
}
// Replace the original node with the legalized result.
@@ -4961,6 +5031,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::SREM:
case ISD::UDIV:
case ISD::UREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
@@ -4977,12 +5051,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
case ISD::SDIV:
case ISD::SREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
ExtOp = ISD::SIGN_EXTEND;
break;
case ISD::UDIV:
case ISD::UREM:
ExtOp = ISD::ZERO_EXTEND;
break;
+ case ISD::UMIN:
+ case ISD::UMAX:
+ if (TLI.isSExtCheaperThanZExt(OVT, NVT))
+ ExtOp = ISD::SIGN_EXTEND;
+ else
+ ExtOp = ISD::ZERO_EXTEND;
+ break;
}
TruncOp = ISD::TRUNCATE;
}
@@ -5104,7 +5187,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
unsigned ExtOp = ISD::FP_EXTEND;
if (NVT.isInteger()) {
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
- ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ if (isSignedIntSetCC(CCCode) ||
+ TLI.isSExtCheaperThanZExt(Node->getOperand(0).getValueType(), NVT))
+ ExtOp = ISD::SIGN_EXTEND;
+ else
+ ExtOp = ISD::ZERO_EXTEND;
}
if (Node->isStrictFPOpcode()) {
SDValue InChain = Node->getOperand(0);
@@ -5261,6 +5348,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FABS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
Results.push_back(
@@ -5459,6 +5547,23 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(NewAtomic.getValue(1));
break;
}
+ case ISD::SPLAT_VECTOR: {
+ SDValue Scalar = Node->getOperand(0);
+ MVT ScalarType = Scalar.getSimpleValueType();
+ MVT NewScalarType = NVT.getVectorElementType();
+ if (ScalarType.isInteger()) {
+ Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NewScalarType, Scalar);
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2));
+ break;
+ }
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewScalarType, Scalar);
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ break;
+ }
}
// Replace the original node with the legalized result.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 7e035d21ef71..c4605a6b9598 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -49,8 +49,7 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG));
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -88,6 +87,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
case ISD::STRICT_FEXP2:
case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FEXP10: R = SoftenFloatRes_FEXP10(N); break;
case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
case ISD::STRICT_FLOG:
@@ -414,6 +414,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
RTLIB::EXP2_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP10(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N,
+ GetFPLibCall(N->getValueType(0), RTLIB::EXP10_F32, RTLIB::EXP10_F64,
+ RTLIB::EXP10_F80, RTLIB::EXP10_F128, RTLIB::EXP10_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::FLOOR_F32,
@@ -890,8 +897,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE_SEQ(SDNode *N) {
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG));
SDValue Res = SDValue();
switch (N->getOpcode()) {
@@ -1257,7 +1263,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
/// have invalid operands or may have other results that need promotion, we just
/// know that (at least) one result needs expansion.
void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG));
SDValue Lo, Hi;
Lo = Hi = SDValue();
@@ -1305,6 +1311,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
case ISD::STRICT_FEXP2:
case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FEXP10: ExpandFloatRes_FEXP10(N, Lo, Hi); break;
case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
case ISD::STRICT_FLOG:
@@ -1500,6 +1507,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
RTLIB::EXP2_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FEXP10(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::EXP10_F32,
+ RTLIB::EXP10_F64, RTLIB::EXP10_F80,
+ RTLIB::EXP10_F128, RTLIB::EXP10_PPCF128),
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -1852,7 +1868,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
/// types of the node are known to be legal, but other operands of the node may
/// need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG));
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
@@ -2166,8 +2182,7 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
}
bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG));
SDValue R = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
@@ -2180,6 +2195,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
// to use the promoted float operand. Nodes that produce at least one
// promotion-requiring floating point result have their operands legalized as
// a part of PromoteFloatResult.
+ // clang-format off
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
@@ -2191,7 +2207,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break;
case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break;
+ case ISD::FP_TO_UINT:
+ case ISD::LRINT:
+ case ISD::LLRINT: R = PromoteFloatOp_UnaryOp(N, OpNo); break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break;
@@ -2200,6 +2218,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break;
}
+ // clang-format on
if (R.getNode())
ReplaceValueWith(SDValue(N, 0), R);
@@ -2233,7 +2252,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) {
}
// Convert the promoted float value to the desired integer type
-SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) {
+SDValue DAGTypeLegalizer::PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo) {
SDValue Op = GetPromotedFloat(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op);
}
@@ -2305,8 +2324,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG));
SDValue R = SDValue();
// See if the target wants to custom expand this node.
@@ -2340,6 +2358,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG2:
@@ -2688,7 +2707,7 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
LLVM_DEBUG(dbgs() << "Soft promote half result " << ResNo << ": ";
- N->dump(&DAG); dbgs() << "\n");
+ N->dump(&DAG));
SDValue R = SDValue();
// See if the target wants to custom expand this node.
@@ -2721,6 +2740,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG2:
@@ -2754,6 +2774,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FPOWI:
case ISD::FLDEXP: R = SoftPromoteHalfRes_ExpOp(N); break;
+ case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break;
+
case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
@@ -2882,6 +2904,24 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ExpOp(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FFREXP(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl,
+ DAG.getVTList(NVT, N->getValueType(1)), Op);
+
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT SVT = N->getOperand(0).getValueType();
@@ -2996,7 +3036,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N) {
bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
LLVM_DEBUG(dbgs() << "Soft promote half operand " << OpNo << ": ";
- N->dump(&DAG); dbgs() << "\n");
+ N->dump(&DAG));
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index df5878fcdf2e..362fa92dd44b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -39,8 +39,7 @@ using namespace llvm;
/// may also have invalid operands or may have other results that need
/// expansion, we just know that (at least) one result needs promotion.
void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG));
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
@@ -60,14 +59,21 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::VP_BITREVERSE:
case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
+ case ISD::VP_BSWAP:
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
case ISD::PARITY:
+ case ISD::VP_CTPOP:
case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break;
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
@@ -283,12 +289,22 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_FunnelShift(N);
break;
+ case ISD::VP_FSHL:
+ case ISD::VP_FSHR:
+ Res = PromoteIntRes_VPFunnelShift(N);
+ break;
+
case ISD::IS_FPCLASS:
Res = PromoteIntRes_IS_FPCLASS(N);
break;
case ISD::FFREXP:
Res = PromoteIntRes_FFREXP(N);
break;
+
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Res = PromoteIntRes_XRINT(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -359,7 +375,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
N->getMemOperand());
ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
- return Res.getValue(1);
+ return DAG.getSExtOrTrunc(Res.getValue(1), SDLoc(N), NVT);
}
// Op2 is used for the comparison and thus must be extended according to the
@@ -516,8 +532,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
- DAG.getShiftAmountConstant(DiffBits, NVT, dl));
+ SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl);
+ if (N->getOpcode() == ISD::BSWAP)
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ ShAmt);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ return DAG.getNode(ISD::VP_LSHR, dl, NVT,
+ DAG.getNode(ISD::VP_BSWAP, dl, NVT, Op, Mask, EVL), ShAmt,
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@@ -537,9 +560,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(ISD::SRL, dl, NVT,
- DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
- DAG.getShiftAmountConstant(DiffBits, NVT, dl));
+ SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl);
+ if (N->getOpcode() == ISD::BITREVERSE)
+ return DAG.getNode(ISD::SRL, dl, NVT,
+ DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), ShAmt);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ return DAG.getNode(ISD::VP_LSHR, dl, NVT,
+ DAG.getNode(ISD::VP_BITREVERSE, dl, NVT, Op, Mask, EVL),
+ ShAmt, Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -584,12 +613,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
// Zero extend to the promoted type and do the count there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+
// Subtract off the extra leading bits in the bigger type.
- return DAG.getNode(
- ISD::SUB, dl, NVT, Op,
- DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
- NVT));
+ SDValue ExtractLeadingBits = DAG.getConstant(
+ NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT);
+ if (!N->isVPOpcode())
+ return DAG.getNode(ISD::SUB, dl, NVT,
+ DAG.getNode(N->getOpcode(), dl, NVT, Op),
+ ExtractLeadingBits);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ return DAG.getNode(ISD::VP_SUB, dl, NVT,
+ DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL),
+ ExtractLeadingBits, Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
@@ -611,7 +647,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
// Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
+ if (!N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
+ N->getOperand(1), N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
@@ -635,15 +674,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
}
}
- if (N->getOpcode() == ISD::CTTZ) {
+ if (N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::VP_CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
OVT.getScalarSizeInBits());
- Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
+ if (N->getOpcode() == ISD::CTTZ)
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
+ else
+ Op =
+ DAG.getNode(ISD::VP_OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT),
+ N->getOperand(1), N->getOperand(2));
}
- return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ if (!N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op, N->getOperand(1),
+ N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -740,6 +787,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_XRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -1366,6 +1419,60 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt);
}
+// A vp version of PromoteIntRes_FunnelShift.
+SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) {
+ SDValue Hi = GetPromotedInteger(N->getOperand(0));
+ SDValue Lo = GetPromotedInteger(N->getOperand(1));
+ SDValue Amt = N->getOperand(2);
+ SDValue Mask = N->getOperand(3);
+ SDValue EVL = N->getOperand(4);
+ if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger)
+ Amt = ZExtPromotedInteger(Amt);
+ EVT AmtVT = Amt.getValueType();
+
+ SDLoc DL(N);
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT VT = Lo.getValueType();
+ unsigned Opcode = N->getOpcode();
+ bool IsFSHR = Opcode == ISD::VP_FSHR;
+ unsigned OldBits = OldVT.getScalarSizeInBits();
+ unsigned NewBits = VT.getScalarSizeInBits();
+
+ // Amount has to be interpreted modulo the old bit width.
+ Amt = DAG.getNode(ISD::VP_UREM, DL, AmtVT, Amt,
+ DAG.getConstant(OldBits, DL, AmtVT), Mask, EVL);
+
+ // If the promoted type is twice the size (or more), then we use the
+ // traditional funnel 'double' shift codegen. This isn't necessary if the
+ // shift amount is constant.
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
+ if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amt) &&
+ !TLI.isOperationLegalOrCustom(Opcode, VT)) {
+ SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
+ Hi = DAG.getNode(ISD::VP_SHL, DL, VT, Hi, HiShift, Mask, EVL);
+ // FIXME: Replace it by vp operations.
+ Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
+ SDValue Res = DAG.getNode(ISD::VP_OR, DL, VT, Hi, Lo, Mask, EVL);
+ Res = DAG.getNode(IsFSHR ? ISD::VP_LSHR : ISD::VP_SHL, DL, VT, Res, Amt,
+ Mask, EVL);
+ if (!IsFSHR)
+ Res = DAG.getNode(ISD::VP_LSHR, DL, VT, Res, HiShift, Mask, EVL);
+ return Res;
+ }
+
+ // Shift Lo up to occupy the upper bits of the promoted type.
+ SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT);
+ Lo = DAG.getNode(ISD::VP_SHL, DL, VT, Lo, ShiftOffset, Mask, EVL);
+
+ // Increase Amount to shift the result into the lower bits of the promoted
+ // type.
+ if (IsFSHR)
+ Amt = DAG.getNode(ISD::VP_ADD, DL, AmtVT, Amt, ShiftOffset, Mask, EVL);
+
+ return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt, Mask, EVL);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Res;
@@ -1638,8 +1745,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG));
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
@@ -1721,8 +1827,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::FRAMEADDR:
case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
- case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
-
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -1859,9 +1963,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
- SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op1 = GetPromotedInteger(N->getOperand(1));
return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
- N->getChain(), N->getBasePtr(), Op2, N->getMemOperand());
+ N->getChain(), Op1, N->getBasePtr(), N->getMemOperand());
}
SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
@@ -2236,18 +2340,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
- assert(OpNo > 1 && "Don't know how to promote this operand!");
- // Promote the rw, locality, and cache type arguments to a supported integer
- // width.
- SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
- SDValue Op3 = ZExtPromotedInteger(N->getOperand(3));
- SDValue Op4 = ZExtPromotedInteger(N->getOperand(4));
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
- Op2, Op3, Op4),
- 0);
-}
-
SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
@@ -2466,8 +2558,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
/// have invalid operands or may have other results that need promotion, we just
/// know that (at least) one result needs expansion.
void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG));
SDValue Lo, Hi;
Lo = Hi = SDValue();
@@ -2512,9 +2603,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
case ISD::GET_ROUNDING:ExpandIntRes_GET_ROUNDING(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_SINT:
- case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
- case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_XINT(N, Lo, Hi); break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break;
case ISD::STRICT_LROUND:
@@ -3591,43 +3682,24 @@ void DAGTypeLegalizer::ExpandIntRes_GET_ROUNDING(SDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), Chain);
}
-void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
- bool IsStrict = N->isStrictFPOpcode();
- SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- SDValue Op = N->getOperand(IsStrict ? 1 : 0);
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
- Op = GetPromotedFloat(Op);
-
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
- EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
- Op = GetSoftPromotedHalf(Op);
- Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
- Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
- SplitInteger(Op, Lo, Hi);
- return;
+// Helper for producing an FP_EXTEND/STRICT_FP_EXTEND of Op.
+static SDValue fpExtendHelper(SDValue Op, SDValue &Chain, bool IsStrict, EVT VT,
+ SDLoc DL, SelectionDAG &DAG) {
+ if (IsStrict) {
+ Op = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op});
+ Chain = Op.getValue(1);
+ return Op;
}
-
- RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setSExt(true);
- std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
- CallOptions, dl, Chain);
- SplitInteger(Tmp.first, Lo, Hi);
-
- if (IsStrict)
- ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return DAG.getNode(ISD::FP_EXTEND, DL, VT, Op);
}
-void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
@@ -3635,17 +3707,26 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
Op = GetPromotedFloat(Op);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
- EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+ EVT OFPVT = Op.getValueType();
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), OFPVT);
Op = GetSoftPromotedHalf(Op);
- Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
- Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
+ Op = DAG.getNode(OFPVT == MVT::f16 ? ISD::FP16_TO_FP : ISD::BF16_TO_FP, dl,
+ NFPVT, Op);
+ Op = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, VT, Op);
SplitInteger(Op, Lo, Hi);
return;
}
- RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ if (Op.getValueType() == MVT::bf16) {
+ // Extend to f32 as there is no bf16 libcall.
+ Op = fpExtendHelper(Op, Chain, IsStrict, MVT::f32, dl, DAG);
+ }
+
+ RTLIB::Libcall LC = IsSigned ? RTLIB::getFPTOSINT(Op.getValueType(), VT)
+ : RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-xint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
CallOptions, dl, Chain);
SplitInteger(Tmp.first, Lo, Hi);
@@ -3673,14 +3754,9 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo,
EVT VT = Op.getValueType();
if (VT == MVT::f16) {
- VT = MVT::f32;
// Extend to f32.
- if (IsStrict) {
- Op = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { VT, MVT::Other }, {Chain, Op});
- Chain = Op.getValue(1);
- } else {
- Op = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op);
- }
+ VT = MVT::f32;
+ Op = fpExtendHelper(Op, Chain, IsStrict, VT, dl, DAG);
}
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -3754,20 +3830,7 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue &Lo, SDValue &Hi) {
- if (N->isAtomic()) {
- // It's typical to have larger CAS than atomic load instructions.
- SDLoc dl(N);
- EVT VT = N->getMemoryVT();
- SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
- SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue Swap = DAG.getAtomicCmpSwap(
- ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
- VT, VTs, N->getOperand(0),
- N->getOperand(1), Zero, Zero, N->getMemOperand());
- ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
- ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
- return;
- }
+ assert(!N->isAtomic() && "Should have been a ATOMIC_LOAD?");
if (ISD::isNormalLoad(N)) {
ExpandRes_NormalLoad(N, Lo, Hi);
@@ -3822,7 +3885,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
N->getOriginalAlign(), MMOFlags, AAInfo);
@@ -3846,7 +3909,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
// Load the rest of the low bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -4760,7 +4823,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
// Also pass the address of the overflow check.
Entry.Node = Temp;
- Entry.Ty = PtrTy->getPointerTo();
+ Entry.Ty = PointerType::getUnqual(PtrTy->getContext());
Entry.IsSExt = true;
Entry.IsZExt = false;
Args.push_back(Entry);
@@ -4988,8 +5051,7 @@ void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG));
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -5014,11 +5076,11 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
case ISD::STRICT_SINT_TO_FP:
- case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_XINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
- case ISD::STRICT_UINT_TO_FP:
- case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
case ISD::SHL:
case ISD::SRA:
@@ -5067,16 +5129,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
GetExpandedInteger(NewRHS, RHSLo, RHSHi);
if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
- if (RHSLo == RHSHi) {
- if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
- if (RHSCST->isAllOnes()) {
- // Equality comparison to -1.
- NewLHS = DAG.getNode(ISD::AND, dl,
- LHSLo.getValueType(), LHSLo, LHSHi);
- NewRHS = RHSLo;
- return;
- }
- }
+ if (RHSLo == RHSHi && isAllOnesConstant(RHSLo)) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl, LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
}
NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
@@ -5303,14 +5360,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
}
-SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandIntOp_XINT_TO_FP(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT DstVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ RTLIB::Libcall LC = IsSigned ? RTLIB::getSINTTOFP(Op.getValueType(), DstVT)
+ : RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
- "Don't know how to expand this SINT_TO_FP!");
+ "Don't know how to expand this XINT_TO_FP!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
std::pair<SDValue, SDValue> Tmp =
@@ -5325,16 +5385,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
- if (N->isAtomic()) {
- // It's typical to have larger CAS than atomic store instructions.
- SDLoc dl(N);
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
- N->getMemoryVT(),
- N->getOperand(0), N->getOperand(2),
- N->getOperand(1),
- N->getMemOperand());
- return Swap.getValue(1);
- }
+ assert(!N->isAtomic() && "Should have been a ATOMIC_STORE?");
+
if (ISD::isNormalStore(N))
return ExpandOp_NormalStore(N, OpNo);
@@ -5372,7 +5424,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
NEVT, N->getOriginalAlign(), MMOFlags, AAInfo);
@@ -5407,7 +5459,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
// Store the lowest ExcessBits bits in the second half.
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -5423,34 +5475,12 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL);
}
-SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
- bool IsStrict = N->isStrictFPOpcode();
- SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- SDValue Op = N->getOperand(IsStrict ? 1 : 0);
- EVT DstVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL &&
- "Don't know how to expand this UINT_TO_FP!");
- TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setSExt(true);
- std::pair<SDValue, SDValue> Tmp =
- TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
-
- if (!IsStrict)
- return Tmp.first;
-
- ReplaceValueWith(SDValue(N, 1), Tmp.second);
- ReplaceValueWith(SDValue(N, 0), Tmp.first);
- return SDValue();
-}
-
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
SDLoc dl(N);
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
- cast<AtomicSDNode>(N)->getMemoryVT(),
- N->getOperand(0),
- N->getOperand(1), N->getOperand(2),
- cast<AtomicSDNode>(N)->getMemOperand());
+ SDValue Swap =
+ DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast<AtomicSDNode>(N)->getMemoryVT(),
+ N->getOperand(0), N->getOperand(2), N->getOperand(1),
+ cast<AtomicSDNode>(N)->getMemOperand());
return Swap.getValue(1);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 328939e44dcb..8a93433c5e04 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -235,7 +235,7 @@ bool DAGTypeLegalizer::run() {
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
- LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing node: "; N->dump(&DAG));
if (IgnoreNodeResults(N)) {
LLVM_DEBUG(dbgs() << "Ignoring node results\n");
goto ScanOperands;
@@ -390,8 +390,7 @@ ScanOperands:
}
if (i == NumOperands) {
- LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG));
}
}
NodeDone:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ad70655de349..9d5931b44ac6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -326,6 +326,7 @@ private:
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N);
+ SDValue PromoteIntRes_XRINT(SDNode *N);
SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
@@ -362,6 +363,7 @@ private:
SDValue PromoteIntRes_ABS(SDNode *N);
SDValue PromoteIntRes_Rotate(SDNode *N);
SDValue PromoteIntRes_FunnelShift(SDNode *N);
+ SDValue PromoteIntRes_VPFunnelShift(SDNode *N);
SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
// Integer Operand Promotion.
@@ -400,7 +402,6 @@ private:
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
- SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_ExpOp(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
@@ -442,8 +443,7 @@ private:
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_GET_ROUNDING (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_XINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XROUND_XRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -494,10 +494,9 @@ private:
SDValue ExpandIntOp_SETCC(SDNode *N);
SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
- SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ExpandIntOp_TRUNCATE(SDNode *N);
- SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_XINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_RETURNADDR(SDNode *N);
SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N);
@@ -552,6 +551,7 @@ private:
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FEXP10(SDNode *N);
SDValue SoftenFloatRes_FFLOOR(SDNode *N);
SDValue SoftenFloatRes_FLOG(SDNode *N);
SDValue SoftenFloatRes_FLOG2(SDNode *N);
@@ -633,6 +633,7 @@ private:
void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP10 (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -711,7 +712,7 @@ private:
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
- SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
@@ -737,6 +738,7 @@ private:
SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N);
SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
SDValue SoftPromoteHalfRes_ExpOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_FFREXP(SDNode *N);
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
@@ -888,6 +890,7 @@ private:
void SplitVecRes_VECTOR_INTERLEAVE(SDNode *N);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
@@ -986,6 +989,7 @@ private:
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
+ SDValue WidenVecRes_XRINT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
SDValue WidenVecRes_ExpOp(SDNode *N);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 296242c00401..a55364ea2c4e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -176,7 +176,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
StackPtr =
- DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::getFixed(IncrementSize), dl);
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
@@ -265,7 +265,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Hi = DAG.getLoad(
NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize),
LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo);
@@ -479,7 +479,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
St->getOriginalAlign(), St->getMemOperand()->getFlags(),
AAInfo);
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Hi = DAG.getStore(
Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize),
St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3862fd241897..1fbd6322f9ed 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -166,6 +166,21 @@ class VectorLegalizer {
/// truncated back to the original type.
void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ /// Implements vector reduce operation promotion.
+ ///
+ /// All vector operands are promoted to a vector type with larger element
+ /// type, and the start value is promoted to a larger scalar type. Then the
+ /// result is truncated back to the original scalar type.
+ void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ /// Implements vector setcc operation promotion.
+ ///
+ /// All vector operands are promoted to a vector type with larger element
+ /// type.
+ void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
public:
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
@@ -385,9 +400,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FLOG10:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::FNEARBYINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
@@ -551,6 +569,116 @@ bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
return true;
}
+void VectorLegalizer::PromoteReduction(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+ MVT ScalarVT = Node->getSimpleValueType(0);
+ MVT NewScalarVT = NewVecVT.getVectorElementType();
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+ // promote the initial value.
+ if (Node->getOperand(0).getValueType().isFloatingPoint())
+ Operands[0] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
+ else
+ Operands[0] =
+ DAG.getNode(ISD::ANY_EXTEND, DL, NewScalarVT, Node->getOperand(0));
+
+ for (unsigned j = 1; j != Node->getNumOperands(); ++j)
+ if (Node->getOperand(j).getValueType().isVector() &&
+ !(ISD::isVPOpcode(Node->getOpcode()) &&
+ ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
+ // promote the vector operand.
+ if (Node->getOperand(j).getValueType().isFloatingPoint())
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ else
+ Operands[j] =
+ DAG.getNode(ISD::ANY_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ else
+ Operands[j] = Node->getOperand(j); // Skip VL operand.
+
+ SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands,
+ Node->getFlags());
+
+ if (ScalarVT.isFloatingPoint())
+ Res = DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ else
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, Res);
+
+ Results.push_back(Res);
+}
+
+void VectorLegalizer::PromoteSETCC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VecVT = Node->getOperand(0).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+
+ unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 5> Operands(Node->getNumOperands());
+
+ Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0));
+ Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1));
+ Operands[2] = Node->getOperand(2);
+
+ if (Node->getOpcode() == ISD::VP_SETCC) {
+ Operands[3] = Node->getOperand(3); // mask
+ Operands[4] = Node->getOperand(4); // evl
+ }
+
+ SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0),
+ Operands, Node->getFlags());
+
+ Results.push_back(Res);
+}
+
+void VectorLegalizer::PromoteSTRICT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+
+ assert(VecVT.isFloatingPoint());
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 5> Operands(Node->getNumOperands());
+ SmallVector<SDValue, 2> Chains;
+
+ for (unsigned j = 1; j != Node->getNumOperands(); ++j)
+ if (Node->getOperand(j).getValueType().isVector() &&
+ !(ISD::isVPOpcode(Node->getOpcode()) &&
+ ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
+ {
+ // promote the vector operand.
+ SDValue Ext =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(j)});
+ Operands[j] = Ext.getValue(0);
+ Chains.push_back(Ext.getValue(1));
+ } else
+ Operands[j] = Node->getOperand(j); // Skip no vector operand.
+
+ SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1));
+
+ Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags());
+
+ SDValue Round =
+ DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other},
+ {Res.getValue(1), Res.getValue(0),
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
+
+ Results.push_back(Round.getValue(0));
+ Results.push_back(Round.getValue(1));
+}
+
void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// For a few operations there is a specific concept for promotion based on
// the operand's type.
@@ -569,6 +697,36 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// Promote the operation by extending the operand.
PromoteFP_TO_INT(Node, Results);
return;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ // Promote the operation by extending the operand.
+ PromoteReduction(Node, Results);
+ return;
+ case ISD::VP_SETCC:
+ case ISD::SETCC:
+ // Promote the operation by extending the operand.
+ PromoteSETCC(Node, Results);
+ return;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ PromoteSTRICT(Node, Results);
+ return;
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
// These operations are used to do promotion so they can't be promoted
@@ -589,7 +747,10 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
SmallVector<SDValue, 4> Operands(Node->getNumOperands());
for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
- if (Node->getOperand(j).getValueType().isVector())
+ // Do not promote the mask operand of a VP OP.
+ bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) &&
+ ISD::getVPMaskIdx(Node->getOpcode()) == j;
+ if (Node->getOperand(j).getValueType().isVector() && !SkipPromote)
if (Node->getOperand(j)
.getValueType()
.getVectorElementType()
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 9c1839f2576e..66461b26468f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -38,8 +38,8 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+ N->dump(&DAG));
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -88,6 +88,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
@@ -100,6 +101,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FSIN:
@@ -656,8 +659,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) {
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+ N->dump(&DAG));
SDValue Res = SDValue();
switch (N->getOpcode()) {
@@ -680,6 +683,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::LRINT:
+ case ISD::LLRINT:
Res = ScalarizeVecOp_UnaryOp(N);
break;
case ISD::STRICT_SINT_TO_FP:
@@ -965,7 +970,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) {
/// invalid operands or may have other results that need legalization, we just
/// know that (at least) one result needs vector splitting.
void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG));
SDValue Lo, Hi;
// See if the target wants to custom expand this node.
@@ -1075,6 +1080,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::VP_FFLOOR:
case ISD::FLOG:
@@ -1095,6 +1101,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FP_TO_UINT:
case ISD::FRINT:
case ISD::VP_FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::FROUND:
case ISD::VP_FROUND:
case ISD::FROUNDEVEN:
@@ -1201,6 +1209,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
+ case ISD::EXPERIMENTAL_VP_REVERSE:
+ SplitVecRes_VP_REVERSE(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1228,7 +1239,7 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
} else {
MPI = N->getPointerInfo().getWithOffset(IncrementSize);
// Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::getFixed(IncrementSize));
}
}
@@ -2849,6 +2860,56 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
}
+void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDValue Val = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ SDLoc DL(N);
+
+ // Fallback to VP_STRIDED_STORE to stack followed by VP_LOAD.
+ Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
+
+ EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorElementCount());
+ SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
+ EVT PtrVT = StackPtr.getValueType();
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore, MemoryLocation::UnknownSize,
+ Alignment);
+ MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize,
+ Alignment);
+
+ unsigned EltWidth = VT.getScalarSizeInBits() / 8;
+ SDValue NumElemMinus1 =
+ DAG.getNode(ISD::SUB, DL, PtrVT, DAG.getZExtOrTrunc(EVL, DL, PtrVT),
+ DAG.getConstant(1, DL, PtrVT));
+ SDValue StartOffset = DAG.getNode(ISD::MUL, DL, PtrVT, NumElemMinus1,
+ DAG.getConstant(EltWidth, DL, PtrVT));
+ SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, StartOffset);
+ SDValue Stride = DAG.getConstant(-(int64_t)EltWidth, DL, PtrVT);
+
+ SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT);
+ SDValue Store = DAG.getStridedStoreVP(DAG.getEntryNode(), DL, Val, StorePtr,
+ DAG.getUNDEF(PtrVT), Stride, TrueMask,
+ EVL, MemVT, StoreMMO, ISD::UNINDEXED);
+
+ SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO);
+
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Load,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Load,
+ DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+}
+
void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) {
SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi;
@@ -2889,7 +2950,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_INTERLEAVE(SDNode *N) {
/// the node are known to be legal, but other operands of the node may need
/// legalization as well as the specified one.
bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG));
SDValue Res = SDValue();
// See if the target wants to custom split this node.
@@ -2972,6 +3033,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::FTRUNC:
+ case ISD::LRINT:
+ case ISD::LLRINT:
Res = SplitVecOp_UnaryOp(N);
break;
case ISD::FLDEXP:
@@ -3973,8 +4036,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG));
// See if the target wants to custom widen this node.
if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
@@ -4195,11 +4257,17 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_FP_TO_XINT_SAT(N);
break;
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Res = WidenVecRes_XRINT(N);
+ break;
+
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
@@ -4779,6 +4847,27 @@ SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1));
}
+SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) {
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ElementCount WidenNumElts = WidenVT.getVectorElementCount();
+
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // Also widen the input.
+ if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
+ Src = GetWidenedVector(Src);
+ SrcVT = Src.getValueType();
+ }
+
+ // Input and output not widened to the same size, give up.
+ if (WidenNumElts != SrcVT.getVectorElementCount())
+ return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
+
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
SDValue InOp = N->getOperand(1);
SDLoc DL(N);
@@ -5919,8 +6008,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
// Widen Vector Operand
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG));
SDValue Res = SDValue();
// See if the target wants to custom widen this node.
@@ -5960,7 +6048,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
case ISD::FLDEXP:
- case ISD::FCOPYSIGN: Res = WidenVecOp_UnrollVectorOp(N); break;
+ case ISD::FCOPYSIGN:
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Res = WidenVecOp_UnrollVectorOp(N);
+ break;
case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
case ISD::ANY_EXTEND:
@@ -6322,8 +6414,30 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
SubVec = GetWidenedVector(SubVec);
- if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() &&
- N->getConstantOperandVal(2) == 0)
+ EVT SubVT = SubVec.getValueType();
+
+ // Whether or not all the elements of the widened SubVec will be inserted into
+ // valid indices of VT.
+ bool IndicesValid = false;
+ // If we statically know that VT can fit SubVT, the indices are valid.
+ if (VT.knownBitsGE(SubVT))
+ IndicesValid = true;
+ else if (VT.isScalableVector() && SubVT.isFixedLengthVector()) {
+ // Otherwise, if we're inserting a fixed vector into a scalable vector and
+ // we know the minimum vscale we can work out if it's valid ourselves.
+ Attribute Attr = DAG.getMachineFunction().getFunction().getFnAttribute(
+ Attribute::VScaleRange);
+ if (Attr.isValid()) {
+ unsigned VScaleMin = Attr.getVScaleRangeMin();
+ if (VT.getSizeInBits().getKnownMinValue() * VScaleMin >=
+ SubVT.getFixedSizeInBits())
+ IndicesValid = true;
+ }
+ }
+
+ // We need to make sure that the indices are still valid, otherwise we might
+ // widen what was previously well-defined to something undefined.
+ if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
N->getOperand(2));
@@ -6468,7 +6582,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N,
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
- assert((OpNo == 1 || OpNo == 3) &&
+ assert((OpNo == 1 || OpNo == 4) &&
"Can widen only data or mask operand of mstore");
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
@@ -7093,7 +7207,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr =
- DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::Fixed(Offset));
+ DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::getFixed(Offset));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
LD->getOriginalAlign(), MMOFlags, AAInfo);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 5b01743d23e0..ab4c33c9e976 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -498,12 +498,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag F(Flags);
+ unsigned NumVals = F.getNumOperandRegisters();
++i; // Skip the ID value.
- if (InlineAsm::isRegDefKind(Flags) ||
- InlineAsm::isRegDefEarlyClobberKind(Flags) ||
- InlineAsm::isClobberKind(Flags)) {
+ if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
+ F.isClobberKind()) {
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
@@ -808,12 +808,12 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-llvm::ScheduleDAGSDNodes *
-llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::ScheduleDAGSDNodes *llvm::createFastDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel) {
return new ScheduleDAGFast(*IS->MF);
}
-llvm::ScheduleDAGSDNodes *
-llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::ScheduleDAGSDNodes *llvm::createDAGLinearizer(SelectionDAGISel *IS,
+ CodeGenOptLevel) {
return new ScheduleDAGLinearize(*IS->MF);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 458f50c54824..47c137d2bcad 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -188,10 +188,9 @@ private:
public:
ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
SchedulingPriorityQueue *availqueue,
- CodeGenOpt::Level OptLevel)
- : ScheduleDAGSDNodes(mf),
- NeedLatency(needlatency), AvailableQueue(availqueue),
- Topo(SUnits, nullptr) {
+ CodeGenOptLevel OptLevel)
+ : ScheduleDAGSDNodes(mf), NeedLatency(needlatency),
+ AvailableQueue(availqueue), Topo(SUnits, nullptr) {
const TargetSubtargetInfo &STI = mf.getSubtarget();
if (DisableSchedCycles || !NeedLatency)
HazardRec = new ScheduleHazardRecognizer();
@@ -987,11 +986,6 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return nullptr;
- // unfolding an x86 DEC64m operation results in store, dec, load which
- // can't be handled here so quit
- if (NewNodes.size() == 3)
- return nullptr;
-
assert(NewNodes.size() == 2 && "Expected a load folding node!");
N = NewNodes[1];
@@ -1377,12 +1371,12 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag F(Flags);
+ unsigned NumVals = F.getNumOperandRegisters();
++i; // Skip the ID value.
- if (InlineAsm::isRegDefKind(Flags) ||
- InlineAsm::isRegDefEarlyClobberKind(Flags) ||
- InlineAsm::isClobberKind(Flags)) {
+ if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
+ F.isClobberKind()) {
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
@@ -3150,9 +3144,8 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-ScheduleDAGSDNodes *
-llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ScheduleDAGSDNodes *llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
@@ -3166,7 +3159,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
ScheduleDAGSDNodes *
llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
@@ -3180,7 +3173,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
ScheduleDAGSDNodes *
llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
@@ -3194,9 +3187,8 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
return SD;
}
-ScheduleDAGSDNodes *
-llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ScheduleDAGSDNodes *llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 0579c1664d5c..c9e2745f00c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -659,18 +659,19 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
if (Use->isMachineOpcode())
// Adjust the use operand index by num of defs.
OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
- int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
- if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ std::optional<unsigned> Latency =
+ TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1U && Use->getOpcode() == ISD::CopyToReg &&
!BB->succ_empty()) {
unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
if (Register::isVirtualRegister(Reg))
// This copy is a liveout value. It is likely coalesced, so reduce the
// latency so not to penalize the def.
// FIXME: need target specific adjustment here?
- Latency = Latency - 1;
+ Latency = *Latency - 1;
}
- if (Latency >= 0)
- dep.setLatency(Latency);
+ if (Latency)
+ dep.setLatency(*Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 1ba1fd65b8c9..ae42a870ea2f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -265,7 +265,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
//===----------------------------------------------------------------------===//
/// createVLIWDAGScheduler - This creates a top-down list scheduler.
-ScheduleDAGSDNodes *
-llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ScheduleDAGSDNodes *llvm::createVLIWDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel) {
return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 30d202494320..5be1892a44f6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -161,8 +162,13 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
unsigned SplatBitSize;
bool HasUndefs;
unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ // Endianness does not matter here. We are checking for a splat given the
+ // element size of the vector, and if we find such a splat for little endian
+ // layout, then that should be valid also for big endian (as the full vector
+ // size is known to be a multiple of the element size).
+ const bool IsBigEndian = false;
return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
- EltSize) &&
+ EltSize, IsBigEndian) &&
EltSize == SplatBitSize;
}
@@ -344,12 +350,13 @@ bool ISD::isFreezeUndef(const SDNode *N) {
return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef();
}
-bool ISD::matchUnaryPredicate(SDValue Op,
- std::function<bool(ConstantSDNode *)> Match,
- bool AllowUndefs) {
+template <typename ConstNodeType>
+bool ISD::matchUnaryPredicateImpl(SDValue Op,
+ std::function<bool(ConstNodeType *)> Match,
+ bool AllowUndefs) {
// FIXME: Add support for scalar UNDEF cases?
- if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
- return Match(Cst);
+ if (auto *C = dyn_cast<ConstNodeType>(Op))
+ return Match(C);
// FIXME: Add support for vector UNDEF cases?
if (ISD::BUILD_VECTOR != Op.getOpcode() &&
@@ -364,12 +371,17 @@ bool ISD::matchUnaryPredicate(SDValue Op,
continue;
}
- auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i));
if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
return false;
}
return true;
}
+// Build used template types.
+template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>(
+ SDValue, std::function<bool(ConstantSDNode *)>, bool);
+template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>(
+ SDValue, std::function<bool(ConstantFPSDNode *)>, bool);
bool ISD::matchBinaryPredicate(
SDValue LHS, SDValue RHS,
@@ -951,7 +963,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
/// doNotCSE - Return true if CSE should not be performed for this node.
static bool doNotCSE(SDNode *N) {
if (N->getValueType(0) == MVT::Glue)
- return true; // Never CSE anything that produces a flag.
+ return true; // Never CSE anything that produces a glue result.
switch (N->getOpcode()) {
default: break;
@@ -963,7 +975,7 @@ static bool doNotCSE(SDNode *N) {
// Check that remaining values produced are not flags.
for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
if (N->getValueType(i) == MVT::Glue)
- return true; // Never CSE anything that produces a flag.
+ return true; // Never CSE anything that produces a glue result.
return false;
}
@@ -1197,7 +1209,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
}
#ifndef NDEBUG
// Verify that the node was actually in one of the CSE maps, unless it has a
- // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // glue result (which cannot be CSE'd) or is one of the special cases that are
// not subject to CSE.
if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
!N->isMachineOpcode() && !doNotCSE(N)) {
@@ -1296,17 +1308,16 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
}
Align SelectionDAG::getEVTAlign(EVT VT) const {
- Type *Ty = VT == MVT::iPTR ?
- PointerType::get(Type::getInt8Ty(*getContext()), 0) :
- VT.getTypeForEVT(*getContext());
+ Type *Ty = VT == MVT::iPTR ? PointerType::get(*getContext(), 0)
+ : VT.getTypeForEVT(*getContext());
return getDataLayout().getABITypeAlign(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), OptLevel(OL),
- EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)),
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOptLevel OL)
+ : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(),
+ getVTList(MVT::Other, MVT::Glue)),
Root(getEntryNode()) {
InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
@@ -1454,6 +1465,51 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
getNode(ISD::TRUNCATE, DL, VT, Op);
}
+SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(!VT.isVector());
+ auto Type = Op.getValueType();
+ SDValue DestOp;
+ if (Type == VT)
+ return Op;
+ auto Size = Op.getValueSizeInBits();
+ DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ if (DestOp.getValueType() == VT)
+ return DestOp;
+
+ return getAnyExtOrTrunc(DestOp, DL, VT);
+}
+
+SDValue SelectionDAG::getBitcastedSExtOrTrunc(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(!VT.isVector());
+ auto Type = Op.getValueType();
+ SDValue DestOp;
+ if (Type == VT)
+ return Op;
+ auto Size = Op.getValueSizeInBits();
+ DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ if (DestOp.getValueType() == VT)
+ return DestOp;
+
+ return getSExtOrTrunc(DestOp, DL, VT);
+}
+
+SDValue SelectionDAG::getBitcastedZExtOrTrunc(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(!VT.isVector());
+ auto Type = Op.getValueType();
+ SDValue DestOp;
+ if (Type == VT)
+ return Op;
+ auto Size = Op.getValueSizeInBits();
+ DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ if (DestOp.getValueType() == VT)
+ return DestOp;
+
+ return getZExtOrTrunc(DestOp, DL, VT);
+}
+
SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
EVT OpVT) {
if (VT.bitsLE(Op.getValueType()))
@@ -1570,7 +1626,11 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypePromoteInteger) {
EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
- APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
+ APInt NewVal;
+ if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT))
+ NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits());
+ else
+ NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
// In other cases the element type is illegal and needs to be expanded, for
@@ -1587,7 +1647,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
// For scalable vectors, try to use a SPLAT_VECTOR_PARTS node.
- if (VT.isScalableVector()) {
+ if (VT.isScalableVector() ||
+ TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) {
assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 &&
"Can only handle an even split!");
unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits;
@@ -1801,6 +1862,13 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain,
+ const SDLoc &DL) {
+ EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout());
+ return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain,
+ getTargetConstant(static_cast<uint64_t>(JTI), DL, PTy, true));
+}
+
SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
MaybeAlign Alignment, int Offset,
bool isTarget, unsigned TargetFlags) {
@@ -1855,23 +1923,6 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
return SDValue(N, 0);
}
-SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
- unsigned TargetFlags) {
- FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt);
- ID.AddInteger(Index);
- ID.AddInteger(Offset);
- ID.AddInteger(TargetFlags);
- void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, IP))
- return SDValue(E, 0);
-
- auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags);
- CSEMap.InsertNode(N, IP);
- InsertNode(N);
- return SDValue(N, 0);
-}
-
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt);
@@ -1950,13 +2001,10 @@ SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
if (ConstantFold) {
const MachineFunction &MF = getMachineFunction();
- auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange);
- if (Attr.isValid()) {
- unsigned VScaleMin = Attr.getVScaleRangeMin();
- if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax())
- if (*VScaleMax == VScaleMin)
- return getConstant(MulImm * VScaleMin, DL, VT);
- }
+ const Function &F = MF.getFunction();
+ ConstantRange CR = getVScaleRange(&F, 64);
+ if (const APInt *C = CR.getSingleElement())
+ return getConstant(MulImm * C->getZExtValue(), DL, VT);
}
return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
@@ -2121,11 +2169,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (Splat && UndefElements.none()) {
// Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
// number of elements match or the value splatted is a zero constant.
- if (SameNumElts)
+ if (SameNumElts || isNullConstant(Splat))
return N1;
- if (auto *C = dyn_cast<ConstantSDNode>(Splat))
- if (C->isZero())
- return N1;
}
// If the shuffle itself creates a splat, build the vector directly.
@@ -2490,7 +2535,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
- if (N1 == N2)
+ if (N1.isUndef() || N2.isUndef() || N1 == N2)
return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT);
}
@@ -2836,6 +2881,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
}
+ // Fallback - this is a splat if all demanded elts are the same constant.
+ if (computeKnownBits(V, DemandedElts, Depth).isConstant()) {
+ UndefElts = ~DemandedElts;
+ return true;
+ }
+
return false;
}
@@ -3057,6 +3108,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth);
break;
}
+ case ISD::SPLAT_VECTOR_PARTS: {
+ unsigned ScalarSize = Op.getOperand(0).getScalarValueSizeInBits();
+ assert(ScalarSize * Op.getNumOperands() == BitWidth &&
+ "Expected SPLAT_VECTOR_PARTS scalars to cover element width");
+ for (auto [I, SrcOp] : enumerate(Op->ops())) {
+ Known.insertBits(computeKnownBits(SrcOp, Depth + 1), ScalarSize * I);
+ }
+ break;
+ }
case ISD::BUILD_VECTOR:
assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every demanded vector element.
@@ -3688,14 +3748,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
assert(Op.getResNo() == 0 &&
"We only compute knownbits for the difference here.");
- // TODO: Compute influence of the carry operand.
- if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY)
- break;
+ // With USUBO_CARRY and SSUBO_CARRY a borrow bit may be added in.
+ KnownBits Borrow(1);
+ if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) {
+ Borrow = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Borrow has bit width 1
+ Borrow = Borrow.trunc(1);
+ } else {
+ Borrow.setAllZero();
+ }
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
- Known, Known2);
+ Known = KnownBits::computeForSubBorrow(Known, Known2, Borrow);
break;
}
case ISD::UADDO:
@@ -3720,15 +3785,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (Opcode == ISD::ADDE)
// Can't track carry from glue, set carry to unknown.
Carry.resetAll();
- else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY)
- // TODO: Compute known bits for the carry operand. Not sure if it is worth
- // the trouble (how often will we find a known carry bit). And I haven't
- // tested this very much yet, but something like this might work:
- // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
- // Carry = Carry.zextOrTrunc(1, false);
- Carry.resetAll();
- else
+ else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) {
+ Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Carry has bit width 1
+ Carry = Carry.trunc(1);
+ } else {
Carry.setAllZero();
+ }
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -4047,8 +4110,11 @@ SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const {
if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
return OFK_Never;
- // TODO: Add ConstantRange::signedSubMayOverflow handling.
- return OFK_Sometime;
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true);
+ return mapOverflowResult(N0Range.signedSubMayOverflow(N1Range));
}
SelectionDAG::OverflowKind
@@ -4057,7 +4123,53 @@ SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const {
if (isNullConstant(N1))
return OFK_Never;
- // TODO: Add ConstantRange::unsignedSubMayOverflow handling.
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedSubMayOverflow(N1Range));
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForUnsignedMul(SDValue N0, SDValue N1) const {
+ // X * 0 and X * 1 never overflow.
+ if (isNullConstant(N1) || isOneConstant(N1))
+ return OFK_Never;
+
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedMulMayOverflow(N1Range));
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForSignedMul(SDValue N0, SDValue N1) const {
+ // X * 0 and X * 1 never overflow.
+ if (isNullConstant(N1) || isOneConstant(N1))
+ return OFK_Never;
+
+ // Get the size of the result.
+ unsigned BitWidth = N0.getScalarValueSizeInBits();
+
+ // Sum of the sign bits.
+ unsigned SignBits = ComputeNumSignBits(N0) + ComputeNumSignBits(N1);
+
+ // If we have enough sign bits, then there's no overflow.
+ if (SignBits > BitWidth + 1)
+ return OFK_Never;
+
+ if (SignBits == BitWidth + 1) {
+ // The overflow occurs when the true multiplication of the
+ // the operands is the minimum negative number.
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ // If one of the operands is non-negative, then there's no
+ // overflow.
+ if (N0Known.isNonNegative() || N1Known.isNonNegative())
+ return OFK_Never;
+ }
+
return OFK_Sometime;
}
@@ -4069,8 +4181,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
unsigned BitWidth = OpVT.getScalarSizeInBits();
// Is the constant a known power of 2?
- if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
- return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+ if (ISD::matchUnaryPredicate(Val, [BitWidth](ConstantSDNode *C) {
+ return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+ }))
+ return true;
// A left-shift of a constant one will have exactly one bit set because
// shifting the bit off the end is undefined.
@@ -4078,6 +4192,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue() == 1)
return true;
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) &&
+ isKnownNeverZero(Val, Depth);
}
// Similarly, a logical right-shift of a constant sign-bit will have exactly
@@ -4086,8 +4202,13 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue().isSignMask())
return true;
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) &&
+ isKnownNeverZero(Val, Depth);
}
+ if (Val.getOpcode() == ISD::ROTL || Val.getOpcode() == ISD::ROTR)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
// Are all operands of a build vector constant powers of two?
if (Val.getOpcode() == ISD::BUILD_VECTOR)
if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
@@ -4109,6 +4230,34 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1))
return true;
+ if (Val.getOpcode() == ISD::SMIN || Val.getOpcode() == ISD::SMAX ||
+ Val.getOpcode() == ISD::UMIN || Val.getOpcode() == ISD::UMAX)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1) &&
+ isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
+ if (Val.getOpcode() == ISD::SELECT || Val.getOpcode() == ISD::VSELECT)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) &&
+ isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1);
+
+ if (Val.getOpcode() == ISD::AND) {
+ // Looking for `x & -x` pattern:
+ // If x == 0:
+ // x & -x -> 0
+ // If x != 0:
+ // x & -x -> non-zero pow2
+ // so if we find the pattern return whether we know `x` is non-zero.
+ for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
+ SDValue NegOp = Val.getOperand(OpIdx);
+ if (NegOp.getOpcode() == ISD::SUB &&
+ NegOp.getOperand(1) == Val.getOperand(1 - OpIdx) &&
+ isNullOrNullSplat(NegOp.getOperand(0)))
+ return isKnownNeverZero(Val.getOperand(1 - OpIdx), Depth);
+ }
+ }
+
+ if (Val.getOpcode() == ISD::ZERO_EXTEND)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
// More could be done here, though the above checks are enough
// to handle some common cases.
return false;
@@ -4869,8 +5018,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
- case ISD::AssertSext:
- case ISD::AssertZext:
case ISD::FREEZE:
case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
@@ -4886,7 +5033,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BITREVERSE:
case ISD::PARITY:
case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
@@ -4896,6 +5042,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BUILD_PAIR:
return false;
+ // Matches hasPoisonGeneratingFlags().
+ case ISD::ZERO_EXTEND:
+ return ConsiderFlags && Op->getFlags().hasNonNeg();
+
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
@@ -4932,6 +5082,15 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return true;
}
+bool SelectionDAG::isADDLike(SDValue Op) const {
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::OR)
+ return haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1));
+ if (Opcode == ISD::XOR)
+ return isMinSignedConstant(Op.getOperand(1));
+ return false;
+}
+
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
!isa<ConstantSDNode>(Op.getOperand(1)))
@@ -4977,12 +5136,15 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FCANONICALIZE:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FTRUNC:
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::FNEARBYINT:
case ISD::FLDEXP: {
if (SNaN)
@@ -5112,21 +5274,29 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
isKnownNeverZero(Op.getOperand(2), Depth + 1);
- case ISD::SHL:
+ case ISD::SHL: {
if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
-
- // 1 << X is never zero. TODO: This can be expanded if we can bound X.
- // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero()
- if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0])
+ KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1);
+ // 1 << X is never zero.
+ if (ValKnown.One[0])
+ return true;
+ // If max shift cnt of known ones is non-zero, result is non-zero.
+ APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ if (MaxCnt.ult(ValKnown.getBitWidth()) &&
+ !ValKnown.One.shl(MaxCnt).isZero())
return true;
break;
-
+ }
case ISD::UADDSAT:
case ISD::UMAX:
return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ // TODO for smin/smax: If either operand is known negative/positive
+ // respectively we don't need the other to be known at all.
+ case ISD::SMAX:
+ case ISD::SMIN:
case ISD::UMIN:
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
isKnownNeverZero(Op.getOperand(0), Depth + 1);
@@ -5140,16 +5310,19 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
case ISD::SRA:
- case ISD::SRL:
+ case ISD::SRL: {
if (Op->getFlags().hasExact())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
- // Signed >> X is never zero. TODO: This can be expanded if we can bound X.
- // The expression is really
- // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero()
- if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative())
+ KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (ValKnown.isNegative())
+ return true;
+ // If max shift cnt of known ones is non-zero, result is non-zero.
+ APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ if (MaxCnt.ult(ValKnown.getBitWidth()) &&
+ !ValKnown.One.lshr(MaxCnt).isZero())
return true;
break;
-
+ }
case ISD::UDIV:
case ISD::SDIV:
// div exact can only produce a zero if the dividend is zero.
@@ -5425,161 +5598,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, const SDNodeFlags Flags) {
assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!");
- // Constant fold unary operations with an integer constant operand. Even
- // opaque constant will be folded, because the folding of unary operations
- // doesn't create new constants with different values. Nevertheless, the
- // opaque flag is preserved during folding to prevent future folding with
- // other constants.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
- const APInt &Val = C->getAPIntValue();
- switch (Opcode) {
- default: break;
- case ISD::SIGN_EXTEND:
- return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- case ISD::TRUNCATE:
- if (C->isOpaque())
- break;
- [[fallthrough]];
- case ISD::ZERO_EXTEND:
- return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- case ISD::ANY_EXTEND:
- // Some targets like RISCV prefer to sign extend some types.
- if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT))
- return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- case ISD::UINT_TO_FP:
- case ISD::SINT_TO_FP: {
- APFloat apf(EVTToAPFloatSemantics(VT),
- APInt::getZero(VT.getSizeInBits()));
- (void)apf.convertFromAPInt(Val,
- Opcode==ISD::SINT_TO_FP,
- APFloat::rmNearestTiesToEven);
- return getConstantFP(apf, DL, VT);
- }
- case ISD::BITCAST:
- if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
- return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
- if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
- return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
- if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
- return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
- if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
- return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
- break;
- case ISD::ABS:
- return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::BITREVERSE:
- return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::BSWAP:
- return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::CTPOP:
- return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::CTLZ:
- case ISD::CTLZ_ZERO_UNDEF:
- return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::CTTZ:
- case ISD::CTTZ_ZERO_UNDEF:
- return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::FP16_TO_FP:
- case ISD::BF16_TO_FP: {
- bool Ignored;
- APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
- : APFloat::BFloat(),
- (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
-
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)FPV.convert(EVTToAPFloatSemantics(VT),
- APFloat::rmNearestTiesToEven, &Ignored);
- return getConstantFP(FPV, DL, VT);
- }
- case ISD::STEP_VECTOR: {
- if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this))
- return V;
- break;
- }
- }
- }
-
- // Constant fold unary operations with a floating point constant operand.
- if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N1)) {
- APFloat V = C->getValueAPF(); // make copy
- switch (Opcode) {
- case ISD::FNEG:
- V.changeSign();
- return getConstantFP(V, DL, VT);
- case ISD::FABS:
- V.clearSign();
- return getConstantFP(V, DL, VT);
- case ISD::FCEIL: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, DL, VT);
- break;
- }
- case ISD::FTRUNC: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, DL, VT);
- break;
- }
- case ISD::FFLOOR: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, DL, VT);
- break;
- }
- case ISD::FP_EXTEND: {
- bool ignored;
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)V.convert(EVTToAPFloatSemantics(VT),
- APFloat::rmNearestTiesToEven, &ignored);
- return getConstantFP(V, DL, VT);
- }
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: {
- bool ignored;
- APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
- // FIXME need to be more flexible about rounding mode.
- APFloat::opStatus s =
- V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
- if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
- break;
- return getConstant(IntVal, DL, VT);
- }
- case ISD::BITCAST:
- if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
- return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16)
- return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
- return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
- return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
- break;
- case ISD::FP_TO_FP16:
- case ISD::FP_TO_BF16: {
- bool Ignored;
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
- : APFloat::BFloat(),
- APFloat::rmNearestTiesToEven, &Ignored);
- return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
- }
- }
- }
// Constant fold unary operations with a vector integer or float operand.
switch (Opcode) {
@@ -5595,12 +5613,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16:
case ISD::TRUNCATE:
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
+ case ISD::FP16_TO_FP:
+ case ISD::BF16_TO_FP:
+ case ISD::BITCAST:
case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
@@ -5608,7 +5631,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- case ISD::CTPOP: {
+ case ISD::CTPOP:
+ case ISD::STEP_VECTOR: {
SDValue Ops = {N1};
if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
return Fold;
@@ -5697,6 +5721,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
+
+ // Skip unnecessary zext_inreg pattern:
+ // (zext (trunc x)) -> x iff the upper bits are known zero.
+ // TODO: Remove (zext (trunc (and x, c))) exception which some targets
+ // use to recognise zext_inreg patterns.
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = N1.getOperand(0);
+ if (OpOp.getValueType() == VT) {
+ if (OpOp.getOpcode() != ISD::AND) {
+ APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(),
+ N1.getScalarValueSizeInBits());
+ if (MaskedValueIsZero(OpOp, HiBits)) {
+ transferDbgValues(N1, OpOp);
+ return OpOp;
+ }
+ }
+ }
+ }
break;
case ISD::ANY_EXTEND:
assert(VT.isInteger() && N1.getValueType().isInteger() &&
@@ -5853,7 +5895,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDNode *N;
SDVTList VTs = getVTList(VT);
SDValue Ops[] = {N1};
- if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+ if (VT != MVT::Glue) { // Don't CSE glue producing nodes
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
@@ -6040,9 +6082,174 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
+ // Handle unary special cases.
+ if (NumOps == 1) {
+ SDValue N1 = Ops[0];
+
+ // Constant fold unary operations with an integer constant operand. Even
+ // opaque constant will be folded, because the folding of unary operations
+ // doesn't create new constants with different values. Nevertheless, the
+ // opaque flag is preserved during folding to prevent future folding with
+ // other constants.
+ if (auto *C = dyn_cast<ConstantSDNode>(N1)) {
+ const APInt &Val = C->getAPIntValue();
+ switch (Opcode) {
+ case ISD::SIGN_EXTEND:
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::TRUNCATE:
+ if (C->isOpaque())
+ break;
+ [[fallthrough]];
+ case ISD::ZERO_EXTEND:
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::ANY_EXTEND:
+ // Some targets like RISCV prefer to sign extend some types.
+ if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT))
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BITREVERSE:
+ return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTPOP:
+ return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getZero(VT.getSizeInBits()));
+ (void)apf.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, DL, VT);
+ }
+ case ISD::FP16_TO_FP:
+ case ISD::BF16_TO_FP: {
+ bool Ignored;
+ APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
+ (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
+
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)FPV.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstantFP(FPV, DL, VT);
+ }
+ case ISD::STEP_VECTOR:
+ if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this))
+ return V;
+ break;
+ case ISD::BITCAST:
+ if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
+ return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
+ if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
+ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+ return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
+ break;
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (auto *C = dyn_cast<ConstantFPSDNode>(N1)) {
+ APFloat V = C->getValueAPF(); // make copy
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ return SDValue();
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ return SDValue();
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ return SDValue();
+ }
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &ignored);
+ return getConstantFP(V, DL, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ bool ignored;
+ APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s =
+ V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
+ if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ return getConstant(IntVal, DL, VT);
+ }
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16: {
+ bool Ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL,
+ VT);
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL,
+ VT);
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL,
+ VT);
+ if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ break;
+ }
+ }
+
+ // Early-out if we failed to constant fold a bitcast.
+ if (Opcode == ISD::BITCAST)
+ return SDValue();
+ }
+
// Handle binops special cases.
if (NumOps == 2) {
- if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1]))
+ if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops))
return CFP;
if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
@@ -6235,11 +6442,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
- EVT VT, SDValue N1, SDValue N2) {
+ EVT VT, ArrayRef<SDValue> Ops) {
+ // TODO: Add support for unary/ternary fp opcodes.
+ if (Ops.size() != 2)
+ return SDValue();
+
// TODO: We don't do any constant folding for strict FP opcodes here, but we
// should. That will require dealing with a potentially non-default
// rounding mode, checking the "opStatus" return value from the APFloat
// math calculations, and possibly other variations.
+ SDValue N1 = Ops[0];
+ SDValue N2 = Ops[1];
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false);
ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false);
if (N1CFP && N2CFP) {
@@ -6600,6 +6813,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
return getBuildVector(VT, DL, Ops);
}
+
+ if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
+ isa<ConstantSDNode>(N1.getOperand(0)))
+ return getNode(
+ ISD::SPLAT_VECTOR, DL, VT,
+ SignExtendInReg(N1.getConstantOperandAPInt(0),
+ N1.getOperand(0).getValueType()));
break;
}
case ISD::FP_TO_SINT_SAT:
@@ -6868,7 +7088,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Operand is DELETED_NODE!");
// Perform various simplifications.
switch (Opcode) {
- case ISD::FMA: {
+ case ISD::FMA:
+ case ISD::FMAD: {
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == VT && N2.getValueType() == VT &&
N3.getValueType() == VT && "FMA types must match!");
@@ -6879,7 +7100,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
APFloat V1 = N1CFP->getValueAPF();
const APFloat &V2 = N2CFP->getValueAPF();
const APFloat &V3 = N3CFP->getValueAPF();
- V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+ if (Opcode == ISD::FMAD) {
+ V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ V1.add(V3, APFloat::rmNearestTiesToEven);
+ } else
+ V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
return getConstantFP(V1, DL, VT);
}
break;
@@ -7001,7 +7226,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
- // Memoize node if it doesn't produce a flag.
+ // Memoize node if it doesn't produce a glue result.
SDNode *N;
SDVTList VTs = getVTList(VT);
SDValue Ops[] = {N1, N2, N3};
@@ -7342,7 +7567,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (Value.getNode()) {
Store = DAG.getStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
OutChains.push_back(Store);
}
@@ -7367,14 +7592,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = DAG.getExtLoad(
ISD::EXTLOAD, dl, NVT, Chain,
- DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
OutLoadChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
OutStoreChains.push_back(Store);
}
@@ -7511,7 +7736,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = DAG.getLoad(
VT, dl, Chain,
- DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
@@ -7526,7 +7751,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Store = DAG.getStore(
Chain, dl, LoadValues[i],
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
OutChains.push_back(Store);
DstOff += VTSize;
@@ -7631,19 +7856,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
}
// If this store is smaller than the largest store see whether we can get
- // the smaller value for free with a truncate.
+ // the smaller value for free with a truncate or extract vector element and
+ // then store.
SDValue Value = MemSetValue;
if (VT.bitsLT(LargestVT)) {
+ unsigned Index;
+ unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits();
+ EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts);
if (!LargestVT.isVector() && !VT.isVector() &&
TLI.isTruncateFree(LargestVT, VT))
Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
- else
+ else if (LargestVT.isVector() && !VT.isVector() &&
+ TLI.shallExtractConstSplatVectorElementToStore(
+ LargestVT.getTypeForEVT(*DAG.getContext()),
+ VT.getSizeInBits(), Index) &&
+ TLI.isTypeLegal(SVT) &&
+ LargestVT.getSizeInBits() == SVT.getSizeInBits()) {
+ // Target which can combine store(extractelement VectorTy, Idx) can get
+ // the smaller value for free.
+ SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue);
+ Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, TailValue,
+ DAG.getVectorIdxConstant(Index, dl));
+ } else
Value = getMemsetValue(Src, VT, DAG, dl);
}
assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment,
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
NewAAInfo);
@@ -7717,7 +7957,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Ty = PointerType::getUnqual(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
@@ -7819,7 +8059,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Ty = PointerType::getUnqual(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
@@ -7933,8 +8173,6 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
// FIXME: pass in SDLoc
CLI.setDebugLoc(dl).setChain(Chain);
- ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src);
- const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero();
const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO);
// Helper function to create an Entry from Node and Type.
@@ -7946,16 +8184,16 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
};
// If zeroing out and bzero is present, use it.
- if (SrcIsZero && BzeroName) {
+ if (isNullConstant(Src) && BzeroName) {
TargetLowering::ArgListTy Args;
- Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx)));
Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
CLI.setLibCallee(
TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx),
getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args));
} else {
TargetLowering::ArgListTy Args;
- Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx)));
Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx)));
Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
@@ -8127,7 +8365,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
- // Memoize the node unless it returns a flag.
+ // Memoize the node unless it returns a glue result.
MemIntrinsicSDNode *N;
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
@@ -9645,6 +9883,27 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]);
return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags);
}
+
+ if (VTList.VTs[0].isVector() &&
+ VTList.VTs[0].getVectorElementType() == MVT::i1 &&
+ VTList.VTs[1].getVectorElementType() == MVT::i1) {
+ SDValue F1 = getFreeze(N1);
+ SDValue F2 = getFreeze(N2);
+ // {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)}
+ if (Opcode == ISD::UADDO || Opcode == ISD::SADDO)
+ return getNode(ISD::MERGE_VALUES, DL, VTList,
+ {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2),
+ getNode(ISD::AND, DL, VTList.VTs[1], F1, F2)},
+ Flags);
+ // {vXi1,vXi1} (u/s)subo(vXi1 x, vXi1y) -> {xor(x,y),and(~x,y)}
+ if (Opcode == ISD::USUBO || Opcode == ISD::SSUBO) {
+ SDValue NotF1 = getNOT(DL, F1, VTList.VTs[0]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList,
+ {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2),
+ getNode(ISD::AND, DL, VTList.VTs[1], NotF1, F2)},
+ Flags);
+ }
+ }
break;
}
case ISD::SMUL_LOHI:
@@ -9654,6 +9913,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
VTList.VTs[0] == Ops[0].getValueType() &&
VTList.VTs[0] == Ops[1].getValueType() &&
"Binary operator types must match!");
+ // Constant fold.
+ ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Ops[0]);
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ops[1]);
+ if (LHS && RHS) {
+ unsigned Width = VTList.VTs[0].getScalarSizeInBits();
+ unsigned OutWidth = Width * 2;
+ APInt Val = LHS->getAPIntValue();
+ APInt Mul = RHS->getAPIntValue();
+ if (Opcode == ISD::SMUL_LOHI) {
+ Val = Val.sext(OutWidth);
+ Mul = Mul.sext(OutWidth);
+ } else {
+ Val = Val.zext(OutWidth);
+ Mul = Mul.zext(OutWidth);
+ }
+ Val *= Mul;
+
+ SDValue Hi =
+ getConstant(Val.extractBits(Width, Width), DL, VTList.VTs[0]);
+ SDValue Lo = getConstant(Val.trunc(Width), DL, VTList.VTs[0]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList, {Lo, Hi}, Flags);
+ }
break;
}
case ISD::FFREXP: {
@@ -9727,7 +10008,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
#endif
}
- // Memoize the node unless it returns a flag.
+ // Memoize the node unless it returns a glue result.
SDNode *N;
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
@@ -10100,7 +10381,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
/// For IROrder, we keep the smaller of the two
SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) {
DebugLoc NLoc = N->getDebugLoc();
- if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
+ if (NLoc && OptLevel == CodeGenOptLevel::None && OLoc.getDebugLoc() != NLoc) {
N->setDebugLoc(DebugLoc());
}
unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
@@ -10569,11 +10850,18 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
switch (N.getOpcode()) {
default:
break;
- case ISD::ADD:
+ case ISD::ADD: {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- if (!isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) {
- uint64_t Offset = N.getConstantOperandVal(1);
+ if (!isa<ConstantSDNode>(N0)) {
+ bool RHSConstant = isa<ConstantSDNode>(N1);
+ uint64_t Offset;
+ if (RHSConstant)
+ Offset = N.getConstantOperandVal(1);
+ // We are not allowed to turn indirect debug values variadic, so
+ // don't salvage those.
+ if (!RHSConstant && DV->isIndirect())
+ continue;
// Rewrite an ADD constant node into a DIExpression. Since we are
// performing arithmetic to compute the variable's *value* in the
@@ -10582,7 +10870,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
auto *DIExpr = DV->getExpression();
auto NewLocOps = DV->copyLocationOps();
bool Changed = false;
- for (size_t i = 0; i < NewLocOps.size(); ++i) {
+ size_t OrigLocOpsSize = NewLocOps.size();
+ for (size_t i = 0; i < OrigLocOpsSize; ++i) {
// We're not given a ResNo to compare against because the whole
// node is going away. We know that any ISD::ADD only has one
// result, so we can assume any node match is using the result.
@@ -10590,19 +10879,37 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
NewLocOps[i].getSDNode() != &N)
continue;
NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo());
- SmallVector<uint64_t, 3> ExprOps;
- DIExpression::appendOffset(ExprOps, Offset);
- DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true);
+ if (RHSConstant) {
+ SmallVector<uint64_t, 3> ExprOps;
+ DIExpression::appendOffset(ExprOps, Offset);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true);
+ } else {
+ // Convert to a variadic expression (if not already).
+ // convertToVariadicExpression() returns a const pointer, so we use
+ // a temporary const variable here.
+ const auto *TmpDIExpr =
+ DIExpression::convertToVariadicExpression(DIExpr);
+ SmallVector<uint64_t, 3> ExprOps;
+ ExprOps.push_back(dwarf::DW_OP_LLVM_arg);
+ ExprOps.push_back(NewLocOps.size());
+ ExprOps.push_back(dwarf::DW_OP_plus);
+ SDDbgOperand RHS =
+ SDDbgOperand::fromNode(N1.getNode(), N1.getResNo());
+ NewLocOps.push_back(RHS);
+ DIExpr = DIExpression::appendOpsToArg(TmpDIExpr, ExprOps, i, true);
+ }
Changed = true;
}
(void)Changed;
assert(Changed && "Salvage target doesn't use N");
+ bool IsVariadic =
+ DV->isVariadic() || OrigLocOpsSize != NewLocOps.size();
+
auto AdditionalDependencies = DV->getAdditionalDependencies();
- SDDbgValue *Clone = getDbgValueList(DV->getVariable(), DIExpr,
- NewLocOps, AdditionalDependencies,
- DV->isIndirect(), DV->getDebugLoc(),
- DV->getOrder(), DV->isVariadic());
+ SDDbgValue *Clone = getDbgValueList(
+ DV->getVariable(), DIExpr, NewLocOps, AdditionalDependencies,
+ DV->isIndirect(), DV->getDebugLoc(), DV->getOrder(), IsVariadic);
ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
DV->setIsEmitted();
@@ -10610,6 +10917,41 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
N0.getNode()->dumprFull(this);
dbgs() << " into " << *DIExpr << '\n');
}
+ break;
+ }
+ case ISD::TRUNCATE: {
+ SDValue N0 = N.getOperand(0);
+ TypeSize FromSize = N0.getValueSizeInBits();
+ TypeSize ToSize = N.getValueSizeInBits(0);
+
+ DIExpression *DbgExpression = DV->getExpression();
+ auto ExtOps = DIExpression::getExtOps(FromSize, ToSize, false);
+ auto NewLocOps = DV->copyLocationOps();
+ bool Changed = false;
+ for (size_t i = 0; i < NewLocOps.size(); ++i) {
+ if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE ||
+ NewLocOps[i].getSDNode() != &N)
+ continue;
+
+ NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo());
+ DbgExpression = DIExpression::appendOpsToArg(DbgExpression, ExtOps, i);
+ Changed = true;
+ }
+ assert(Changed && "Salvage target doesn't use N");
+ (void)Changed;
+
+ SDDbgValue *Clone =
+ getDbgValueList(DV->getVariable(), DbgExpression, NewLocOps,
+ DV->getAdditionalDependencies(), DV->isIndirect(),
+ DV->getDebugLoc(), DV->getOrder(), DV->isVariadic());
+
+ ClonedDVs.push_back(Clone);
+ DV->setIsInvalidated();
+ DV->setIsEmitted();
+ LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DbgExpression << '\n');
+ break;
+ }
}
}
@@ -12113,6 +12455,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
// FIXME: This does not work for vectors with elements less than 8 bits.
while (VecWidth > 8) {
+ // If we can't split in half, stop here.
+ if (VecWidth & 1)
+ break;
+
unsigned HalfSize = VecWidth / 2;
APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize);
APInt LowValue = SplatValue.extractBits(HalfSize, 0);
@@ -12130,6 +12476,12 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
VecWidth = HalfSize;
}
+ // FIXME: The loop above only tries to split in halves. But if the input
+ // vector for example is <3 x i16> it wouldn't be able to detect a
+ // SplatBitSize of 16. No idea if that is a design flaw currently limiting
+ // optimizations. I guess that back in the days when this helper was created
+ // vectors normally was power-of-2 sized.
+
SplatBitSize = VecWidth;
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index a432d8e92bca..39a1e09e83c5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -130,7 +130,7 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
// If the base are the same frame index but the we couldn't find a
// constant offset, (indices are different) be conservative.
- if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ if (A->getIndex() != B->getIndex() && (!MFI.isFixedObjectIndex(A->getIndex()) ||
!MFI.isFixedObjectIndex(B->getIndex()))) {
IsAlias = false;
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 20c37eb4cb11..12ed4a82ee91 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -76,6 +76,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -989,15 +990,15 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
-void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG,
std::vector<SDValue> &Ops) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ InlineAsm::Flag Flag(Code, Regs.size());
if (HasMatching)
- Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ Flag.setMatchingOp(MatchingIdx);
else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
// Put the register class of the virtual registers in the flag word. That
// way, later passes can recompute register class constraints for inline
@@ -1006,13 +1007,13 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
// from the def.
const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
- Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ Flag.setRegClass(RC->getID());
}
SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
Ops.push_back(Res);
- if (Code == InlineAsm::Kind_Clobber) {
+ if (Code == InlineAsm::Kind::Clobber) {
// Clobbers should always have a 1:1 mapping with registers, and may
// reference registers that have illegal (e.g. vector) types. Hence, we
// shouldn't try to apply any sort of splitting logic to them.
@@ -1147,12 +1148,61 @@ SDValue SelectionDAGBuilder::getControlRoot() {
return updateRoot(PendingExports);
}
-void SelectionDAGBuilder::visit(const Instruction &I) {
- // Set up outgoing PHI node register values before emitting the terminator.
- if (I.isTerminator()) {
- HandlePHINodesInSuccessorBlocks(I.getParent());
+void SelectionDAGBuilder::handleDebugDeclare(Value *Address,
+ DILocalVariable *Variable,
+ DIExpression *Expression,
+ DebugLoc DL) {
+ assert(Variable && "Missing variable");
+
+ // Check if address has undef value.
+ if (!Address || isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ LLVM_DEBUG(
+ dbgs()
+ << "dbg_declare: Dropping debug info (bad/undef/unused-arg address)\n");
+ return;
}
+ bool IsParameter = Variable->isParameter() || isa<Argument>(Address);
+
+ SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
+ SDDbgValue *SDV;
+ if (N.getNode()) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ auto *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (IsParameter && FINode) {
+ // Byval parameter. We have a frame index at this point.
+ SDV = DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
+ /*IsIndirect*/ true, DL, SDNodeOrder);
+ } else if (isa<Argument>(Address)) {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, DL,
+ FuncArgumentDbgValueKind::Declare, N);
+ return;
+ } else {
+ SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
+ true, DL, SDNodeOrder);
+ }
+ DAG.AddDbgValue(SDV, IsParameter);
+ } else {
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, DL,
+ FuncArgumentDbgValueKind::Declare, N)) {
+ LLVM_DEBUG(dbgs() << "dbg_declare: Dropping debug info"
+ << " (could not emit func-arg dbg_value)\n");
+ }
+ }
+ return;
+}
+
+void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) {
// Add SDDbgValue nodes for any var locs here. Do so before updating
// SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) {
@@ -1168,9 +1218,65 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
}
SmallVector<Value *> Values(It->Values.location_ops());
if (!handleDebugValue(Values, Var, It->Expr, It->DL, SDNodeOrder,
- It->Values.hasArgList()))
- addDanglingDebugInfo(It, SDNodeOrder);
+ It->Values.hasArgList())) {
+ SmallVector<Value *, 4> Vals;
+ for (Value *V : It->Values.location_ops())
+ Vals.push_back(V);
+ addDanglingDebugInfo(Vals,
+ FnVarLocs->getDILocalVariable(It->VariableID),
+ It->Expr, Vals.size() > 1, It->DL, SDNodeOrder);
+ }
+ }
+ }
+
+ // Is there is any debug-info attached to this instruction, in the form of
+ // DPValue non-instruction debug-info records.
+ for (DPValue &DPV : I.getDbgValueRange()) {
+ DILocalVariable *Variable = DPV.getVariable();
+ DIExpression *Expression = DPV.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
+
+ if (DPV.getType() == DPValue::LocationType::Declare) {
+ if (FuncInfo.PreprocessedDPVDeclares.contains(&DPV))
+ continue;
+ LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DPV
+ << "\n");
+ handleDebugDeclare(DPV.getVariableLocationOp(0), Variable, Expression,
+ DPV.getDebugLoc());
+ continue;
}
+
+ // A DPValue with no locations is a kill location.
+ SmallVector<Value *, 4> Values(DPV.location_ops());
+ if (Values.empty()) {
+ handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(),
+ SDNodeOrder);
+ continue;
+ }
+
+ // A DPValue with an undef or absent location is also a kill location.
+ if (llvm::any_of(Values,
+ [](Value *V) { return !V || isa<UndefValue>(V); })) {
+ handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(),
+ SDNodeOrder);
+ continue;
+ }
+
+ bool IsVariadic = DPV.hasArgList();
+ if (!handleDebugValue(Values, Variable, Expression, DPV.getDebugLoc(),
+ SDNodeOrder, IsVariadic)) {
+ addDanglingDebugInfo(Values, Variable, Expression, IsVariadic,
+ DPV.getDebugLoc(), SDNodeOrder);
+ }
+ }
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+ visitDbgInfo(I);
+
+ // Set up outgoing PHI node register values before emitting the terminator.
+ if (I.isTerminator()) {
+ HandlePHINodesInSuccessorBlocks(I.getParent());
}
// Increase the SDNodeOrder if dealing with a non-debug instruction.
@@ -1231,14 +1337,12 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG,
DILocalVariable *Variable,
DebugLoc DL, unsigned Order,
- RawLocationWrapper Values,
+ SmallVectorImpl<Value *> &Values,
DIExpression *Expression) {
- if (!Values.hasArgList())
- return false;
// For variadic dbg_values we will now insert an undef.
// FIXME: We can potentially recover these!
SmallVector<SDDbgOperand, 2> Locs;
- for (const Value *V : Values.location_ops()) {
+ for (const Value *V : Values) {
auto *Undef = UndefValue::get(V->getType());
Locs.push_back(SDDbgOperand::fromConst(Undef));
}
@@ -1249,44 +1353,31 @@ static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG,
return true;
}
-void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc,
- unsigned Order) {
- if (!handleDanglingVariadicDebugInfo(
- DAG,
- const_cast<DILocalVariable *>(DAG.getFunctionVarLocs()
- ->getVariable(VarLoc->VariableID)
- .getVariable()),
- VarLoc->DL, Order, VarLoc->Values, VarLoc->Expr)) {
- DanglingDebugInfoMap[VarLoc->Values.getVariableLocationOp(0)].emplace_back(
- VarLoc, Order);
- }
-}
-
-void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
+void SelectionDAGBuilder::addDanglingDebugInfo(SmallVectorImpl<Value *> &Values,
+ DILocalVariable *Var,
+ DIExpression *Expr,
+ bool IsVariadic, DebugLoc DL,
unsigned Order) {
- // We treat variadic dbg_values differently at this stage.
- if (!handleDanglingVariadicDebugInfo(
- DAG, DI->getVariable(), DI->getDebugLoc(), Order,
- DI->getWrappedLocation(), DI->getExpression())) {
- // TODO: Dangling debug info will eventually either be resolved or produce
- // an Undef DBG_VALUE. However in the resolution case, a gap may appear
- // between the original dbg.value location and its resolved DBG_VALUE,
- // which we should ideally fill with an extra Undef DBG_VALUE.
- assert(DI->getNumVariableLocationOps() == 1 &&
- "DbgValueInst without an ArgList should have a single location "
- "operand.");
- DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, Order);
+ if (IsVariadic) {
+ handleDanglingVariadicDebugInfo(DAG, Var, DL, Order, Values, Expr);
+ return;
}
+ // TODO: Dangling debug info will eventually either be resolved or produce
+ // an Undef DBG_VALUE. However in the resolution case, a gap may appear
+ // between the original dbg.value location and its resolved DBG_VALUE,
+ // which we should ideally fill with an extra Undef DBG_VALUE.
+ assert(Values.size() == 1);
+ DanglingDebugInfoMap[Values[0]].emplace_back(Var, Expr, DL, Order);
}
void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
const DIExpression *Expr) {
auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
- DIVariable *DanglingVariable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIVariable *DanglingVariable = DDI.getVariable();
DIExpression *DanglingExpr = DDI.getExpression();
if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
- LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << printDDI(DDI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping dangling debug info for "
+ << printDDI(nullptr, DDI) << "\n");
return true;
}
return false;
@@ -1299,7 +1390,7 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
// whether it can be salvaged.
for (auto &DDI : DDIV)
if (isMatchingDbgValue(DDI))
- salvageUnresolvedDbgValue(DDI);
+ salvageUnresolvedDbgValue(DDIMI.first, DDI);
erase_if(DDIV, isMatchingDbgValue);
}
@@ -1318,7 +1409,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DebugLoc DL = DDI.getDebugLoc();
unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
- DILocalVariable *Variable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DILocalVariable *Variable = DDI.getVariable();
DIExpression *Expr = DDI.getExpression();
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
@@ -1332,8 +1423,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
// calling EmitFuncArgumentDbgValue here.
if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL,
FuncArgumentDbgValueKind::Value, Val)) {
- LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " << printDDI(DDI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Resolve dangling debug info for "
+ << printDDI(V, DDI) << "\n");
LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
// Increase the SDNodeOrder for the DbgValue here to make sure it is
// inserted after the definition of Val when emitting the instructions
@@ -1347,9 +1438,11 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DAG.AddDbgValue(SDV, false);
} else
LLVM_DEBUG(dbgs() << "Resolved dangling debug info for "
- << printDDI(DDI) << " in EmitFuncArgumentDbgValue\n");
+ << printDDI(V, DDI)
+ << " in EmitFuncArgumentDbgValue\n");
} else {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(DDI) << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(V, DDI)
+ << "\n");
auto Undef = UndefValue::get(V->getType());
auto SDV =
DAG.getConstantDbgValue(Variable, Expr, Undef, DL, DbgSDNodeOrder);
@@ -1359,14 +1452,14 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DDIV.clear();
}
-void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
+void SelectionDAGBuilder::salvageUnresolvedDbgValue(const Value *V,
+ DanglingDebugInfo &DDI) {
// TODO: For the variadic implementation, instead of only checking the fail
// state of `handleDebugValue`, we need know specifically which values were
// invalid, so that we attempt to salvage only those values when processing
// a DIArgList.
- Value *V = DDI.getVariableLocationOp(0);
- Value *OrigV = V;
- DILocalVariable *Var = DDI.getVariable(DAG.getFunctionVarLocs());
+ const Value *OrigV = V;
+ DILocalVariable *Var = DDI.getVariable();
DIExpression *Expr = DDI.getExpression();
DebugLoc DL = DDI.getDebugLoc();
unsigned SDOrder = DDI.getSDNodeOrder();
@@ -1383,11 +1476,12 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// a non-instruction is seen, such as a constant expression or global
// variable. FIXME: Further work could recover those too.
while (isa<Instruction>(V)) {
- Instruction &VAsInst = *cast<Instruction>(V);
+ const Instruction &VAsInst = *cast<const Instruction>(V);
// Temporary "0", awaiting real implementation.
SmallVector<uint64_t, 16> Ops;
SmallVector<Value *, 4> AdditionalValues;
- V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
+ V = salvageDebugInfoImpl(const_cast<Instruction &>(VAsInst),
+ Expr->getNumLocationOperands(), Ops,
AdditionalValues);
// If we cannot salvage any further, and haven't yet found a suitable debug
// expression, bail out.
@@ -1420,8 +1514,8 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
auto *Undef = UndefValue::get(OrigV->getType());
auto *SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
- LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << printDDI(DDI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n "
+ << printDDI(OrigV, DDI) << "\n");
}
void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var,
@@ -1571,7 +1665,7 @@ void SelectionDAGBuilder::resolveOrClearDbgInfo() {
// Try to fixup any remaining dangling debug info -- and drop it if we can't.
for (auto &Pair : DanglingDebugInfoMap)
for (auto &DDI : Pair.second)
- salvageUnresolvedDbgValue(DDI);
+ salvageUnresolvedDbgValue(const_cast<Value *>(Pair.first), DDI);
clearDanglingDebugInfo();
}
@@ -1738,6 +1832,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const auto *NC = dyn_cast<NoCFIValue>(C))
return getValue(NC->getGlobalValue());
+ if (VT == MVT::aarch64svcount) {
+ assert(C->isNullValue() && "Can only zero this target type!");
+ return DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT,
+ DAG.getConstant(0, getCurSDLoc(), MVT::nxv16i1));
+ }
+
VectorType *VecTy = cast<VectorType>(V->getType());
// Now that we know the number and type of the elements, get that number of
@@ -1822,7 +1922,7 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
if (TargetMBB != NextBlock(FuncInfo.MBB) ||
- TM.getOptLevel() == CodeGenOpt::None)
+ TM.getOptLevel() == CodeGenOptLevel::None)
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(TargetMBB)));
return;
@@ -2049,7 +2149,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
- TypeSize::Fixed(Offsets[i]));
+ TypeSize::getFixed(Offsets[i]));
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
@@ -2478,7 +2578,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
- if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) {
+ if (Succ0MBB != NextBlock(BrMBB) ||
+ TM.getOptLevel() == CodeGenOptLevel::None) {
auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(Succ0MBB));
setValue(&I, Br);
@@ -2662,14 +2763,13 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
/// visitJumpTable - Emit JumpTable node in the current MBB
void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
// Emit the code for the jump table
+ assert(JT.SL && "Should set SDLoc for SelectionDAG!");
assert(JT.Reg != -1U && "Should lower JT Header first!");
EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
- SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
- JT.Reg, PTy);
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), *JT.SL, JT.Reg, PTy);
SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
- SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
- MVT::Other, Index.getValue(1),
- Table, Index);
+ SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, *JT.SL, MVT::Other,
+ Index.getValue(1), Table, Index);
DAG.setRoot(BrJumpTable);
}
@@ -2678,7 +2778,8 @@ void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB) {
- SDLoc dl = getCurSDLoc();
+ assert(JT.SL && "Should set SDLoc for SelectionDAG!");
+ const SDLoc &dl = *JT.SL;
// Subtract the lowest switch case value from the value being switched on.
SDValue SwitchOp = getValue(JTH.SValue);
@@ -2775,7 +2876,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
Align Align =
- DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+ DAG.getDataLayout().getPrefTypeAlign(PointerType::get(M.getContext(), 0));
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
@@ -3225,14 +3326,9 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
// We may be able to ignore unreachable behind a noreturn call.
if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
- const BasicBlock &BB = *I.getParent();
- if (&I != &BB.front()) {
- BasicBlock::const_iterator PredI =
- std::prev(BasicBlock::const_iterator(&I));
- if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
- if (Call->doesNotReturn())
- return;
- }
+ if (const CallInst *Call = dyn_cast_or_null<CallInst>(I.getPrevNode())) {
+ if (Call->doesNotReturn())
+ return;
}
}
@@ -3466,7 +3562,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
}
if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
- (TLI.isOperationLegalOrCustom(Opc, VT) ||
+ (TLI.isOperationLegalOrCustomOrPromote(Opc, VT) ||
(UseScalarMinMax &&
TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
// If the underlying comparison instruction is used by any other
@@ -3522,9 +3618,23 @@ void SelectionDAGBuilder::visitZExt(const User &I) {
// ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// ZExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
- I.getType());
- setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ SDNodeFlags Flags;
+ if (auto *PNI = dyn_cast<PossiblyNonNegInst>(&I))
+ Flags.setNonNeg(PNI->hasNonNeg());
+
+ // Eagerly use nonneg information to canonicalize towards sign_extend if
+ // that is the target's preference.
+ // TODO: Let the target do this later.
+ if (Flags.hasNonNeg() &&
+ TLI.isSExtCheaperThanZExt(N.getValueType(), DestVT)) {
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
+ return;
+ }
+
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N, Flags));
}
void SelectionDAGBuilder::visitSExt(const User &I) {
@@ -4111,7 +4221,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDValue AllocSize = getValue(I.getArraySize());
- EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), I.getAddressSpace());
+ EVT IntPtr = TLI.getPointerTy(DL, I.getAddressSpace());
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
@@ -4120,10 +4230,12 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
DAG.getVScale(dl, IntPtr,
APInt(IntPtr.getScalarSizeInBits(),
TySize.getKnownMinValue())));
- else
- AllocSize =
- DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
- DAG.getConstant(TySize.getFixedValue(), dl, IntPtr));
+ else {
+ SDValue TySizeValue =
+ DAG.getConstant(TySize.getFixedValue(), dl, MVT::getIntegerVT(64));
+ AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
+ DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
+ }
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
@@ -4809,23 +4921,6 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue Ptr = getValue(I.getPointerOperand());
-
- if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
- // TODO: Once this is better exercised by tests, it should be merged with
- // the normal path for loads to prevent future divergence.
- SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
- if (MemVT != VT)
- L = DAG.getPtrExtOrTrunc(L, dl, VT);
-
- setValue(&I, L);
- SDValue OutChain = L.getValue(1);
- if (!I.isUnordered())
- DAG.setRoot(OutChain);
- else
- PendingLoads.push_back(OutChain);
- return;
- }
-
SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
Ptr, MMO);
@@ -4865,16 +4960,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
SDValue Ptr = getValue(I.getPointerOperand());
- if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
- // TODO: Once this is better exercised by tests, it should be merged with
- // the normal path for stores to prevent future divergence.
- SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
- setValue(&I, S);
- DAG.setRoot(S);
- return;
- }
- SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
- Ptr, Val, MMO);
+ SDValue OutChain =
+ DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, Val, Ptr, MMO);
setValue(&I, OutChain);
DAG.setRoot(OutChain);
@@ -5829,26 +5916,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Op)
return false;
- // If the expression refers to the entry value of an Argument, use the
- // corresponding livein physical register. As per the Verifier, this is only
- // allowed for swiftasync Arguments.
- if (Op->isReg() && Expr->isEntryValue()) {
- assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
- auto OpReg = Op->getReg();
- for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
- if (OpReg == VirtReg || OpReg == PhysReg) {
- SDDbgValue *SDV = DAG.getVRegDbgValue(
- Variable, Expr, PhysReg,
- Kind != FuncArgumentDbgValueKind::Value /*is indirect*/, DL,
- SDNodeOrder);
- DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/);
- return true;
- }
- LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
- "couldn't find a physical register\n");
- return true;
- }
-
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
MachineInstr *NewMI = nullptr;
@@ -5937,6 +6004,41 @@ static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
llvm_unreachable("expected corresponding call to preallocated setup/arg");
}
+/// If DI is a debug value with an EntryValue expression, lower it using the
+/// corresponding physical register of the associated Argument value
+/// (guaranteed to exist by the verifier).
+bool SelectionDAGBuilder::visitEntryValueDbgValue(const DbgValueInst &DI) {
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expr = DI.getExpression();
+ if (!Expr->isEntryValue() || !hasSingleElement(DI.getValues()))
+ return false;
+
+ // These properties are guaranteed by the verifier.
+ Argument *Arg = cast<Argument>(DI.getValue(0));
+ assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
+
+ auto ArgIt = FuncInfo.ValueMap.find(Arg);
+ if (ArgIt == FuncInfo.ValueMap.end()) {
+ LLVM_DEBUG(
+ dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find an associated register for the Argument\n");
+ return true;
+ }
+ Register ArgVReg = ArgIt->getSecond();
+
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (ArgVReg == VirtReg || ArgVReg == PhysReg) {
+ SDDbgValue *SDV =
+ DAG.getVRegDbgValue(Variable, Expr, PhysReg, false /*IsIndidrect*/,
+ DI.getDebugLoc(), SDNodeOrder);
+ DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/);
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n");
+ return true;
+}
+
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
@@ -6180,61 +6282,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (AssignmentTrackingEnabled ||
FuncInfo.PreprocessedDbgDeclares.count(&DI))
return;
- // Assume dbg.declare can not currently use DIArgList, i.e.
- // it is non-variadic.
- assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
+ LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DI << "\n");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
- assert(Variable && "Missing variable");
- LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
- << "\n");
- // Check if address has undef value.
- const Value *Address = DI.getVariableLocationOp(0);
- if (!Address || isa<UndefValue>(Address) ||
- (Address->use_empty() && !isa<Argument>(Address))) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
- << " (bad/undef/unused-arg address)\n");
- return;
- }
-
- bool isParameter = Variable->isParameter() || isa<Argument>(Address);
-
- SDValue &N = NodeMap[Address];
- if (!N.getNode() && isa<Argument>(Address))
- // Check unused arguments map.
- N = UnusedArgNodeMap[Address];
- SDDbgValue *SDV;
- if (N.getNode()) {
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
- Address = BCI->getOperand(0);
- // Parameters are handled specially.
- auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
- if (isParameter && FINode) {
- // Byval parameter. We have a frame index at this point.
- SDV =
- DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
- /*IsIndirect*/ true, dl, SDNodeOrder);
- } else if (isa<Argument>(Address)) {
- // Address is an argument, so try to emit its dbg value using
- // virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
- FuncArgumentDbgValueKind::Declare, N);
- return;
- } else {
- SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
- true, dl, SDNodeOrder);
- }
- DAG.AddDbgValue(SDV, isParameter);
- } else {
- // If Address is an argument then try to emit its dbg value using
- // virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
- FuncArgumentDbgValueKind::Declare, N)) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
- << " (could not emit func-arg dbg_value)\n");
- }
- }
+ // Assume dbg.declare can not currently use DIArgList, i.e.
+ // it is non-variadic.
+ assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
+ handleDebugDeclare(DI.getVariableLocationOp(0), Variable, Expression,
+ DI.getDebugLoc());
return;
}
case Intrinsic::dbg_label: {
@@ -6266,6 +6322,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
+ if (visitEntryValueDbgValue(DI))
+ return;
+
if (DI.isKillLocation()) {
handleKillDebugValue(Variable, Expression, DI.getDebugLoc(), SDNodeOrder);
return;
@@ -6278,7 +6337,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool IsVariadic = DI.hasArgList();
if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(),
SDNodeOrder, IsVariadic))
- addDanglingDebugInfo(&DI, SDNodeOrder);
+ addDanglingDebugInfo(Values, Variable, Expression, IsVariadic,
+ DI.getDebugLoc(), SDNodeOrder);
return;
}
@@ -6391,6 +6451,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::fabs:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::exp10:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
@@ -6406,6 +6467,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::fabs: Opcode = ISD::FABS; break;
case Intrinsic::sin: Opcode = ISD::FSIN; break;
case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::exp10: Opcode = ISD::FEXP10; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
@@ -6665,6 +6727,25 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::reset_fpenv:
DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot()));
return;
+ case Intrinsic::get_fpmode:
+ Res = DAG.getNode(
+ ISD::GET_FPMODE, sdl,
+ DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ MVT::Other),
+ DAG.getRoot());
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ case Intrinsic::set_fpmode:
+ Res = DAG.getNode(ISD::SET_FPMODE, sdl, MVT::Other, {DAG.getRoot()},
+ getValue(I.getArgOperand(0)));
+ DAG.setRoot(Res);
+ return;
+ case Intrinsic::reset_fpmode: {
+ Res = DAG.getNode(ISD::RESET_FPMODE, sdl, MVT::Other, getRoot());
+ DAG.setRoot(Res);
+ return;
+ }
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
@@ -7049,15 +7130,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
Ops[0] = DAG.getRoot();
Ops[1] = getValue(I.getArgOperand(0));
- Ops[2] = getValue(I.getArgOperand(1));
- Ops[3] = getValue(I.getArgOperand(2));
- Ops[4] = getValue(I.getArgOperand(3));
+ Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
+ MVT::i32);
+ Ops[3] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(2)), sdl,
+ MVT::i32);
+ Ops[4] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(3)), sdl,
+ MVT::i32);
SDValue Result = DAG.getMemIntrinsicNode(
ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
/* align */ std::nullopt, Flags);
- // Chain the prefetch in parallell with any pending loads, to stay out of
+ // Chain the prefetch in parallel with any pending loads, to stay out of
// the way of later optimizations.
PendingLoads.push_back(Result);
Result = getRoot();
@@ -7068,7 +7152,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::lifetime_end: {
bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
// Stack coloring is not enabled in O0, discard region information.
- if (TM.getOptLevel() == CodeGenOpt::None)
+ if (TM.getOptLevel() == CodeGenOptLevel::None)
return;
const int64_t ObjectSize =
@@ -7153,6 +7237,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
llvm_unreachable("instrprof failed to lower a timestamp");
case Intrinsic::instrprof_value_profile:
llvm_unreachable("instrprof failed to lower a value profiling call");
+ case Intrinsic::instrprof_mcdc_parameters:
+ llvm_unreachable("instrprof failed to lower mcdc parameters");
+ case Intrinsic::instrprof_mcdc_tvbitmap_update:
+ llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update");
+ case Intrinsic::instrprof_mcdc_condbitmap_update:
+ llvm_unreachable("instrprof failed to lower an mcdc condbitmap update");
case Intrinsic::localescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
@@ -7380,13 +7470,62 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Val);
return;
}
+ case Intrinsic::amdgcn_cs_chain: {
+ assert(I.arg_size() == 5 && "Additional args not supported yet");
+ assert(cast<ConstantInt>(I.getOperand(4))->isZero() &&
+ "Non-zero flags not supported yet");
+
+ // At this point we don't care if it's amdgpu_cs_chain or
+ // amdgpu_cs_chain_preserve.
+ CallingConv::ID CC = CallingConv::AMDGPU_CS_Chain;
+
+ Type *RetTy = I.getType();
+ assert(RetTy->isVoidTy() && "Should not return");
+
+ SDValue Callee = getValue(I.getOperand(0));
+
+ // We only have 2 actual args: one for the SGPRs and one for the VGPRs.
+ // We'll also tack the value of the EXEC mask at the end.
+ TargetLowering::ArgListTy Args;
+ Args.reserve(3);
+
+ for (unsigned Idx : {2, 3, 1}) {
+ TargetLowering::ArgListEntry Arg;
+ Arg.Node = getValue(I.getOperand(Idx));
+ Arg.Ty = I.getOperand(Idx)->getType();
+ Arg.setAttributes(&I, Idx);
+ Args.push_back(Arg);
+ }
+
+ assert(Args[0].IsInReg && "SGPR args should be marked inreg");
+ assert(!Args[1].IsInReg && "VGPR args should not be marked inreg");
+ Args[2].IsInReg = true; // EXEC should be inreg
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(CC, RetTy, Callee, std::move(Args))
+ .setNoReturn(true)
+ .setTailCall(true)
+ .setConvergent(I.isConvergent());
+ CLI.CB = &I;
+ std::pair<SDValue, SDValue> Result =
+ lowerInvokable(CLI, /*EHPadBB*/ nullptr);
+ (void)Result;
+ assert(!Result.first.getNode() && !Result.second.getNode() &&
+ "Should've lowered as tail call");
+
+ HasTailCall = true;
+ return;
+ }
case Intrinsic::ptrmask: {
SDValue Ptr = getValue(I.getOperand(0));
- SDValue Const = getValue(I.getOperand(1));
+ SDValue Mask = getValue(I.getOperand(1));
EVT PtrVT = Ptr.getValueType();
- setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
- DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
+ assert(PtrVT == Mask.getValueType() &&
+ "Pointers with different index type are not supported by SDAG");
+ setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask));
return;
}
case Intrinsic::threadlocal_address: {
@@ -7451,6 +7590,62 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Trunc);
return;
}
+ case Intrinsic::experimental_cttz_elts: {
+ auto DL = getCurSDLoc();
+ SDValue Op = getValue(I.getOperand(0));
+ EVT OpVT = Op.getValueType();
+
+ if (!TLI.shouldExpandCttzElements(OpVT)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ }
+
+ if (OpVT.getScalarType() != MVT::i1) {
+ // Compare the input vector elements to zero & use to count trailing zeros
+ SDValue AllZero = DAG.getConstant(0, DL, OpVT);
+ OpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ OpVT.getVectorElementCount());
+ Op = DAG.getSetCC(DL, OpVT, Op, AllZero, ISD::SETNE);
+ }
+
+ // Find the smallest "sensible" element type to use for the expansion.
+ ConstantRange CR(
+ APInt(64, OpVT.getVectorElementCount().getKnownMinValue()));
+ if (OpVT.isScalableVT())
+ CR = CR.umul_sat(getVScaleRange(I.getCaller(), 64));
+
+ // If the zero-is-poison flag is set, we can assume the upper limit
+ // of the result is VF-1.
+ if (!cast<ConstantSDNode>(getValue(I.getOperand(1)))->isZero())
+ CR = CR.subtract(APInt(64, 1));
+
+ unsigned EltWidth = I.getType()->getScalarSizeInBits();
+ EltWidth = std::min(EltWidth, (unsigned)CR.getActiveBits());
+ EltWidth = std::max(llvm::bit_ceil(EltWidth), (unsigned)8);
+
+ MVT NewEltTy = MVT::getIntegerVT(EltWidth);
+
+ // Create the new vector type & get the vector length
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltTy,
+ OpVT.getVectorElementCount());
+
+ SDValue VL =
+ DAG.getElementCount(DL, NewEltTy, OpVT.getVectorElementCount());
+
+ SDValue StepVec = DAG.getStepVector(DL, NewVT);
+ SDValue SplatVL = DAG.getSplat(NewVT, DL, VL);
+ SDValue StepVL = DAG.getNode(ISD::SUB, DL, NewVT, SplatVL, StepVec);
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, Op);
+ SDValue And = DAG.getNode(ISD::AND, DL, NewVT, StepVL, Ext);
+ SDValue Max = DAG.getNode(ISD::VECREDUCE_UMAX, DL, NewEltTy, And);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, NewEltTy, VL, Max);
+
+ EVT RetTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue Ret = DAG.getZExtOrTrunc(Sub, DL, RetTy);
+
+ setValue(&I, Ret);
+ return;
+ }
case Intrinsic::vector_insert: {
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
@@ -7938,6 +8133,16 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
}
break;
}
+ case ISD::VP_IS_FPCLASS: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ EVT DestVT = TLI.getValueType(DLayout, VPIntrin.getType());
+ auto Constant = cast<ConstantSDNode>(OpValues[1])->getZExtValue();
+ SDValue Check = DAG.getTargetConstant(Constant, DL, MVT::i32);
+ SDValue V = DAG.getNode(ISD::VP_IS_FPCLASS, DL, DestVT,
+ {OpValues[0], Check, OpValues[2], OpValues[3]});
+ setValue(&VPIntrin, V);
+ return;
+ }
case ISD::VP_INTTOPTR: {
SDValue N = OpValues[0];
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), VPIntrin.getType());
@@ -8669,6 +8874,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
+ case LibFunc_exp10:
+ case LibFunc_exp10f:
+ case LibFunc_exp10l:
+ if (visitUnaryFloatCall(I, ISD::FEXP10))
+ return;
+ break;
case LibFunc_ldexp:
case LibFunc_ldexpf:
case LibFunc_ldexpl:
@@ -8964,11 +9175,11 @@ findMatchingInlineAsmOperand(unsigned OperandNo,
// Advance to the next operand.
unsigned OpFlag =
cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
- assert((InlineAsm::isRegDefKind(OpFlag) ||
- InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
- InlineAsm::isMemKind(OpFlag)) &&
- "Skipped past definitions?");
- CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
+ const InlineAsm::Flag F(OpFlag);
+ assert(
+ (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isMemKind()) &&
+ "Skipped past definitions?");
+ CurOp += F.getNumOperandRegisters() + 1;
}
return CurOp;
}
@@ -9226,14 +9437,14 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM node to know about this output.
- unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1);
+ OpFlags.setMemConstraint(ConstraintID);
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
@@ -9254,8 +9465,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Add information to the INLINEASM node to know that this register is
// set.
OpInfo.AssignedRegs.AddInlineAsmOperands(
- OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
- : InlineAsm::Kind_RegDef,
+ OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber
+ : InlineAsm::Kind::RegDef,
false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
}
break;
@@ -9269,11 +9480,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// just use its register.
auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
AsmNodeOperands);
- unsigned OpFlag =
- cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
- if (InlineAsm::isRegDefKind(OpFlag) ||
- InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
- // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ InlineAsm::Flag Flag(
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue());
+ if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
if (OpInfo.isIndirect) {
// This happens on gcc/testsuite/gcc.dg/pr8788-1.c
emitInlineAsmError(Call, "inline asm not supported yet: "
@@ -9293,8 +9502,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
: RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
: TRI.getMinimalPhysRegClass(TiedReg);
- unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
- for (unsigned i = 0; i != NumRegs; ++i)
+ for (unsigned i = 0, e = Flag.getNumOperandRegisters(); i != e; ++i)
Regs.push_back(MRI.createVirtualRegister(RC));
RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
@@ -9302,22 +9510,21 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call);
- MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
- true, OpInfo.getMatchedOperand(), dl,
- DAG, AsmNodeOperands);
+ MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, true,
+ OpInfo.getMatchedOperand(), dl, DAG,
+ AsmNodeOperands);
break;
}
- assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
- assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+ assert(Flag.isMemKind() && "Unknown matching constraint!");
+ assert(Flag.getNumOperandRegisters() == 1 &&
"Unexpected number of operands");
// Add information to the INLINEASM node to know about this input.
// See InlineAsm.h isUseOperandTiedToDef.
- OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
- OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
- OpInfo.getMatchedOperand());
+ Flag.clearMemConstraint();
+ Flag.setMatchingOp(OpInfo.getMatchedOperand());
AsmNodeOperands.push_back(DAG.getTargetConstant(
- OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+ Flag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
break;
}
@@ -9347,8 +9554,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
// Add information to the INLINEASM node to know about this input.
- unsigned ResOpType =
- InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ InlineAsm::Flag ResOpType(InlineAsm::Kind::Imm, Ops.size());
AsmNodeOperands.push_back(DAG.getTargetConstant(
ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
llvm::append_range(AsmNodeOperands, Ops);
@@ -9363,14 +9569,14 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
TLI.getPointerTy(DAG.getDataLayout()) &&
"Memory operands expect pointer values");
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM node to know about this input.
- unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+ InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1);
+ ResOpType.setMemConstraint(ConstraintID);
AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
getCurSDLoc(),
MVT::i32));
@@ -9379,24 +9585,24 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
if (OpInfo.ConstraintType == TargetLowering::C_Address) {
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
"Failed to convert memory constraint code to constraint id.");
- unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1);
SDValue AsmOp = InOperandVal;
if (isFunction(InOperandVal)) {
auto *GA = cast<GlobalAddressSDNode>(InOperandVal);
- ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Func, 1);
+ ResOpType = InlineAsm::Flag(InlineAsm::Kind::Func, 1);
AsmOp = DAG.getTargetGlobalAddress(GA->getGlobal(), getCurSDLoc(),
InOperandVal.getValueType(),
GA->getOffset());
}
// Add information to the INLINEASM node to know about this input.
- ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+ ResOpType.setMemConstraint(ConstraintID);
AsmNodeOperands.push_back(
DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32));
@@ -9434,15 +9640,15 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue,
&Call);
- OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
- dl, DAG, AsmNodeOperands);
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, false,
+ 0, dl, DAG, AsmNodeOperands);
break;
}
case InlineAsm::isClobber:
// Add the clobbered value to the operand list, so that the register
// allocator is aware that the physreg got clobbered.
if (!OpInfo.AssignedRegs.Regs.empty())
- OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::Clobber,
false, 0, getCurSDLoc(), DAG,
AsmNodeOperands);
break;
@@ -9679,7 +9885,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
- bool IsPatchPoint) {
+ AttributeSet RetAttrs, bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
@@ -9700,7 +9906,8 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
- .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
+ .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args),
+ RetAttrs)
.setDiscardResult(Call->use_empty())
.setIsPatchPoint(IsPatchPoint)
.setIsPreallocated(
@@ -9849,7 +10056,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
TargetLowering::CallLoweringInfo CLI(DAG);
populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
- ReturnTy, true);
+ ReturnTy, CB.getAttributes().getRetAttrs(), true);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
@@ -11243,7 +11450,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- if (TM.getOptLevel() != CodeGenOpt::None) {
+ if (TM.getOptLevel() != CodeGenOptLevel::None) {
// Here, we order cases by probability so the most likely case will be
// checked first. However, two clusters can have the same probability in
// which case their relative ordering is non-deterministic. So we use Low
@@ -11601,7 +11808,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
// Don't perform if there is only one cluster or optimizing for size.
if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
- TM.getOptLevel() == CodeGenOpt::None ||
+ TM.getOptLevel() == CodeGenOptLevel::None ||
SwitchMBB->getParent()->getFunction().hasMinSize())
return SwitchMBB;
@@ -11685,7 +11892,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
return;
}
- SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
+ SL->findJumpTables(Clusters, &SI, getCurSDLoc(), DefaultMBB, DAG.getPSI(),
+ DAG.getBFI());
SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
@@ -11723,7 +11931,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchWorkListItem W = WorkList.pop_back_val();
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
- if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
+ if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None &&
!DefaultMBB->getParent()->getFunction().hasMinSize()) {
// For optimized builds, lower large range as a balanced binary tree.
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f2496f24973a..2e102c002c09 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -106,54 +106,39 @@ class SelectionDAGBuilder {
/// Helper type for DanglingDebugInfoMap.
class DanglingDebugInfo {
- using DbgValTy = const DbgValueInst *;
- using VarLocTy = const VarLocInfo *;
- PointerUnion<DbgValTy, VarLocTy> Info;
unsigned SDNodeOrder = 0;
public:
+ DILocalVariable *Variable;
+ DIExpression *Expression;
+ DebugLoc dl;
DanglingDebugInfo() = default;
- DanglingDebugInfo(const DbgValueInst *DI, unsigned SDNO)
- : Info(DI), SDNodeOrder(SDNO) {}
- DanglingDebugInfo(const VarLocInfo *VarLoc, unsigned SDNO)
- : Info(VarLoc), SDNodeOrder(SDNO) {}
-
- DILocalVariable *getVariable(const FunctionVarLocs *Locs) const {
- if (isa<VarLocTy>(Info))
- return Locs->getDILocalVariable(cast<VarLocTy>(Info)->VariableID);
- return cast<DbgValTy>(Info)->getVariable();
- }
- DIExpression *getExpression() const {
- if (isa<VarLocTy>(Info))
- return cast<VarLocTy>(Info)->Expr;
- return cast<DbgValTy>(Info)->getExpression();
- }
- Value *getVariableLocationOp(unsigned Idx) const {
- assert(Idx == 0 && "Dangling variadic debug values not supported yet");
- if (isa<VarLocTy>(Info))
- return cast<VarLocTy>(Info)->Values.getVariableLocationOp(Idx);
- return cast<DbgValTy>(Info)->getVariableLocationOp(Idx);
- }
- DebugLoc getDebugLoc() const {
- if (isa<VarLocTy>(Info))
- return cast<VarLocTy>(Info)->DL;
- return cast<DbgValTy>(Info)->getDebugLoc();
- }
+ DanglingDebugInfo(DILocalVariable *Var, DIExpression *Expr, DebugLoc DL,
+ unsigned SDNO)
+ : SDNodeOrder(SDNO), Variable(Var), Expression(Expr),
+ dl(std::move(DL)) {}
+
+ DILocalVariable *getVariable() const { return Variable; }
+ DIExpression *getExpression() const { return Expression; }
+ DebugLoc getDebugLoc() const { return dl; }
unsigned getSDNodeOrder() const { return SDNodeOrder; }
/// Helper for printing DanglingDebugInfo. This hoop-jumping is to
- /// accommodate the fact that an argument is required for getVariable.
+ /// store a Value pointer, so that we can print a whole DDI as one object.
/// Call SelectionDAGBuilder::printDDI instead of using directly.
struct Print {
- Print(const DanglingDebugInfo &DDI, const FunctionVarLocs *VarLocs)
- : DDI(DDI), VarLocs(VarLocs) {}
+ Print(const Value *V, const DanglingDebugInfo &DDI) : V(V), DDI(DDI) {}
+ const Value *V;
const DanglingDebugInfo &DDI;
- const FunctionVarLocs *VarLocs;
friend raw_ostream &operator<<(raw_ostream &OS,
const DanglingDebugInfo::Print &P) {
- OS << "DDI(var=" << *P.DDI.getVariable(P.VarLocs)
- << ", val= " << *P.DDI.getVariableLocationOp(0)
- << ", expr=" << *P.DDI.getExpression()
+ OS << "DDI(var=" << *P.DDI.getVariable();
+ if (P.V)
+ OS << ", val=" << *P.V;
+ else
+ OS << ", val=nullptr";
+
+ OS << ", expr=" << *P.DDI.getExpression()
<< ", order=" << P.DDI.getSDNodeOrder()
<< ", loc=" << P.DDI.getDebugLoc() << ")";
return OS;
@@ -164,8 +149,9 @@ class SelectionDAGBuilder {
/// Returns an object that defines `raw_ostream &operator<<` for printing.
/// Usage example:
//// errs() << printDDI(MyDanglingInfo) << " is dangling\n";
- DanglingDebugInfo::Print printDDI(const DanglingDebugInfo &DDI) {
- return DanglingDebugInfo::Print(DDI, DAG.getFunctionVarLocs());
+ DanglingDebugInfo::Print printDDI(const Value *V,
+ const DanglingDebugInfo &DDI) {
+ return DanglingDebugInfo::Print(V, DDI);
}
/// Helper type for DanglingDebugInfoMap.
@@ -295,10 +281,10 @@ public:
LLVMContext *Context = nullptr;
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
- SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
+ SwiftErrorValueTracking &swifterror, CodeGenOptLevel ol)
: SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
- SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
- SwiftError(swifterror) {}
+ SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)),
+ FuncInfo(funcinfo), SwiftError(swifterror) {}
void init(GCFunctionInfo *gfi, AAResults *AA, AssumptionCache *AC,
const TargetLibraryInfo *li);
@@ -344,6 +330,7 @@ public:
ISD::NodeType ExtendType = ISD::ANY_EXTEND);
void visit(const Instruction &I);
+ void visitDbgInfo(const Instruction &I);
void visit(unsigned Opcode, const User &I);
@@ -352,8 +339,9 @@ public:
SDValue getCopyFromRegs(const Value *V, Type *Ty);
/// Register a dbg_value which relies on a Value which we have not yet seen.
- void addDanglingDebugInfo(const DbgValueInst *DI, unsigned Order);
- void addDanglingDebugInfo(const VarLocInfo *VarLoc, unsigned Order);
+ void addDanglingDebugInfo(SmallVectorImpl<Value *> &Values,
+ DILocalVariable *Var, DIExpression *Expr,
+ bool IsVariadic, DebugLoc DL, unsigned Order);
/// If we have dangling debug info that describes \p Variable, or an
/// overlapping part of variable considering the \p Expr, then this method
@@ -368,7 +356,7 @@ public:
/// For the given dangling debuginfo record, perform last-ditch efforts to
/// resolve the debuginfo to something that is represented in this DAG. If
/// this cannot be done, produce an Undef debug value record.
- void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI);
+ void salvageUnresolvedDbgValue(const Value *V, DanglingDebugInfo &DDI);
/// For a given list of Values, attempt to create and record a SDDbgValue in
/// the SelectionDAG.
@@ -380,6 +368,9 @@ public:
void handleKillDebugValue(DILocalVariable *Var, DIExpression *Expr,
DebugLoc DbgLoc, unsigned Order);
+ void handleDebugDeclare(Value *Address, DILocalVariable *Variable,
+ DIExpression *Expression, DebugLoc DL);
+
/// Evict any dangling debug information, attempting to salvage it first.
void resolveOrClearDbgInfo();
@@ -426,7 +417,8 @@ public:
void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
const CallBase *Call, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
- Type *ReturnTy, bool IsPatchPoint);
+ Type *ReturnTy, AttributeSet RetAttrs,
+ bool IsPatchPoint);
std::pair<SDValue, SDValue>
lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
@@ -625,6 +617,8 @@ private:
void visitInlineAsm(const CallBase &Call,
const BasicBlock *EHPadBB = nullptr);
+
+ bool visitEntryValueDbgValue(const DbgValueInst &I);
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
@@ -785,7 +779,7 @@ struct RegsForValue {
/// Add this value to the specified inlineasm node operand list. This adds the
/// code marker, matching input operand index (if applicable), and includes
/// the number of values added into it.
- void AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ void AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 03a1ead5bbb4..78cc60084068 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -125,6 +125,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
case ISD::FrameIndex: return "FrameIndex";
case ISD::JumpTable: return "JumpTable";
+ case ISD::JUMP_TABLE_DEBUG_INFO:
+ return "JUMP_TABLE_DEBUG_INFO";
case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR";
@@ -222,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";
case ISD::STRICT_FEXP2: return "strict_fexp2";
+ case ISD::FEXP10: return "fexp10";
case ISD::FLOG: return "flog";
case ISD::STRICT_FLOG: return "strict_flog";
case ISD::FLOG2: return "flog2";
@@ -439,6 +442,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::RESET_FPENV: return "reset_fpenv";
case ISD::GET_FPENV_MEM: return "get_fpenv_mem";
case ISD::SET_FPENV_MEM: return "set_fpenv_mem";
+ case ISD::GET_FPMODE: return "get_fpmode";
+ case ISD::SET_FPMODE: return "set_fpmode";
+ case ISD::RESET_FPMODE: return "reset_fpmode";
// Bit manipulation
case ISD::ABS: return "abs";
@@ -591,6 +597,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasExact())
OS << " exact";
+ if (getFlags().hasNonNeg())
+ OS << " nneg";
+
if (getFlags().hasNoNaNs())
OS << " nnan";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 35abd990f968..a1cf4cbbee1b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -78,6 +78,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -113,6 +114,7 @@
using namespace llvm;
#define DEBUG_TYPE "isel"
+#define ISEL_DUMP_DEBUG_TYPE DEBUG_TYPE "-dump"
STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
@@ -180,6 +182,19 @@ static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false,
ViewSchedDAGs = false, ViewSUnitDAGs = false;
#endif
+#ifndef NDEBUG
+#define ISEL_DUMP(X) \
+ do { \
+ if (llvm::DebugFlag && \
+ (isCurrentDebugType(DEBUG_TYPE) || \
+ (isCurrentDebugType(ISEL_DUMP_DEBUG_TYPE) && MatchFilterFuncName))) { \
+ X; \
+ } \
+ } while (false)
+#else
+#define ISEL_DUMP(X) do { } while (false)
+#endif
+
//===---------------------------------------------------------------------===//
///
/// RegisterScheduler class - Track the registration of instruction schedulers.
@@ -204,6 +219,16 @@ static RegisterScheduler
defaultListDAGScheduler("default", "Best scheduler for the target",
createDefaultScheduler);
+static bool dontUseFastISelFor(const Function &Fn) {
+ // Don't enable FastISel for functions with swiftasync Arguments.
+ // Debug info on those is reliant on good Argument lowering, and FastISel is
+ // not capable of lowering the entire function. Mixing the two selectors tend
+ // to result in poor lowering of Arguments.
+ return any_of(Fn.args(), [](const Argument &Arg) {
+ return Arg.hasAttribute(Attribute::AttrKind::SwiftAsync);
+ });
+}
+
namespace llvm {
//===--------------------------------------------------------------------===//
@@ -211,29 +236,31 @@ namespace llvm {
/// the optimization level on a per-function basis.
class OptLevelChanger {
SelectionDAGISel &IS;
- CodeGenOpt::Level SavedOptLevel;
+ CodeGenOptLevel SavedOptLevel;
bool SavedFastISel;
public:
- OptLevelChanger(SelectionDAGISel &ISel,
- CodeGenOpt::Level NewOptLevel) : IS(ISel) {
+ OptLevelChanger(SelectionDAGISel &ISel, CodeGenOptLevel NewOptLevel)
+ : IS(ISel) {
SavedOptLevel = IS.OptLevel;
SavedFastISel = IS.TM.Options.EnableFastISel;
- if (NewOptLevel == SavedOptLevel)
- return;
- IS.OptLevel = NewOptLevel;
- IS.TM.setOptLevel(NewOptLevel);
- LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
- << IS.MF->getFunction().getName() << "\n");
- LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O"
- << NewOptLevel << "\n");
- if (NewOptLevel == CodeGenOpt::None) {
- IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
- LLVM_DEBUG(
- dbgs() << "\tFastISel is "
- << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
- << "\n");
+ if (NewOptLevel != SavedOptLevel) {
+ IS.OptLevel = NewOptLevel;
+ IS.TM.setOptLevel(NewOptLevel);
+ LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(SavedOptLevel)
+ << " ; After: -O" << static_cast<int>(NewOptLevel)
+ << "\n");
+ if (NewOptLevel == CodeGenOptLevel::None)
+ IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
}
+ if (dontUseFastISelFor(IS.MF->getFunction()))
+ IS.TM.setFastISel(false);
+ LLVM_DEBUG(
+ dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
}
~OptLevelChanger() {
@@ -241,8 +268,8 @@ namespace llvm {
return;
LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function "
<< IS.MF->getFunction().getName() << "\n");
- LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O"
- << SavedOptLevel << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(IS.OptLevel)
+ << " ; After: -O" << static_cast<int>(SavedOptLevel) << "\n");
IS.OptLevel = SavedOptLevel;
IS.TM.setOptLevel(SavedOptLevel);
IS.TM.setFastISel(SavedFastISel);
@@ -252,8 +279,8 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// createDefaultScheduler - This creates an instruction scheduler appropriate
/// for the target.
- ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel OptLevel) {
const TargetLowering *TLI = IS->TLI;
const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
@@ -262,7 +289,7 @@ namespace llvm {
return SchedulerCtor(IS, OptLevel);
}
- if (OptLevel == CodeGenOpt::None ||
+ if (OptLevel == CodeGenOptLevel::None ||
(ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
TLI->getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
@@ -315,7 +342,7 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
//===----------------------------------------------------------------------===//
SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm,
- CodeGenOpt::Level OL)
+ CodeGenOptLevel OL)
: MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()),
SwiftError(new SwiftErrorValueTracking()),
CurDAG(new SelectionDAG(tm, OL)),
@@ -335,23 +362,23 @@ SelectionDAGISel::~SelectionDAGISel() {
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
- if (OptLevel != CodeGenOpt::None)
- AU.addRequired<AAResultsWrapperPass>();
+ if (OptLevel != CodeGenOptLevel::None)
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<StackProtector>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
- if (UseMBPI && OptLevel != CodeGenOpt::None)
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ if (UseMBPI && OptLevel != CodeGenOptLevel::None)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
// AssignmentTrackingAnalysis only runs if assignment tracking is enabled for
// the module.
AU.addRequired<AssignmentTrackingAnalysis>();
AU.addPreserved<AssignmentTrackingAnalysis>();
- if (OptLevel != CodeGenOpt::None)
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ if (OptLevel != CodeGenOptLevel::None)
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -391,6 +418,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
const Function &Fn = mf.getFunction();
MF = &mf;
+#ifndef NDEBUG
+ StringRef FuncName = Fn.getName();
+ MatchFilterFuncName = isFunctionInPrintList(FuncName);
+#else
+ (void)MatchFilterFuncName;
+#endif
+
// Decide what flavour of variable location debug-info will be used, before
// we change the optimisation level.
bool InstrRef = mf.shouldUseDebugInstrRef();
@@ -403,9 +437,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// it wants to look at it.
TM.resetTargetOptions(Fn);
// Reset OptLevel to None for optnone functions.
- CodeGenOpt::Level NewOptLevel = OptLevel;
- if (OptLevel != CodeGenOpt::None && skipFunction(Fn))
- NewOptLevel = CodeGenOpt::None;
+ CodeGenOptLevel NewOptLevel = OptLevel;
+ if (OptLevel != CodeGenOptLevel::None && skipFunction(Fn))
+ NewOptLevel = CodeGenOptLevel::None;
OptLevelChanger OLC(*this, NewOptLevel);
TII = MF->getSubtarget().getInstrInfo();
@@ -417,14 +451,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction());
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
BlockFrequencyInfo *BFI = nullptr;
- if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None)
+ if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None)
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
FunctionVarLocs const *FnVarLocs = nullptr;
if (isAssignmentTrackingEnabled(*Fn.getParent()))
FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults();
- LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+ ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << "\n");
UniformityInfo *UA = nullptr;
if (auto *UAPass = getAnalysisIfAvailable<UniformityInfoWrapperPass>())
@@ -438,12 +472,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// into account). That's unfortunate but OK because it just means we won't
// ask for passes that have been required anyway.
- if (UseMBPI && OptLevel != CodeGenOpt::None)
+ if (UseMBPI && OptLevel != CodeGenOptLevel::None)
FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else
FuncInfo->BPI = nullptr;
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOptLevel::None)
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
else
AA = nullptr;
@@ -456,7 +490,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// We split CSR if the target supports it for the given function
// and the function has only return exits.
- if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) {
+ if (OptLevel != CodeGenOptLevel::None && TLI->supportSplitCSR(MF)) {
FuncInfo->SplitCSR = true;
// Collect all the return blocks.
@@ -656,8 +690,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// at this point.
FuncInfo->clear();
- LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
- LLVM_DEBUG(MF->print(dbgs()));
+ ISEL_DUMP(dbgs() << "*** MachineFunction at end of ISel ***\n");
+ ISEL_DUMP(MF->print(dbgs()));
return true;
}
@@ -685,10 +719,13 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
CurDAG->NewNodesMustHaveLegalTypes = false;
// Lower the instructions. If a call is emitted as a tail call, cease emitting
- // nodes for this block.
+ // nodes for this block. If an instruction is elided, don't emit it, but do
+ // handle any debug-info attached to it.
for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
if (!ElidedArgCopyInstrs.count(&*I))
SDB->visit(*I);
+ else
+ SDB->visitDbgInfo(*I);
}
// Make sure the root of the DAG is up-to-date.
@@ -765,10 +802,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
BlockName =
(MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
}
- LLVM_DEBUG(dbgs() << "Initial selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nInitial selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -785,10 +822,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
- LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nOptimized lowered selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -807,10 +844,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
- LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nType-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -831,10 +868,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
- LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nOptimized type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -849,10 +886,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
if (Changed) {
- LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nVector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -865,10 +902,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->LegalizeTypes();
}
- LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nVector/type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -885,10 +922,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
}
- LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nOptimized vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -905,10 +942,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
- LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nLegalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -925,17 +962,17 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
- LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nOptimized legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOptLevel::None)
ComputeLiveOutVRegInfo();
if (ViewISelDAGs && MatchFilterBB)
@@ -949,10 +986,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DoInstructionSelection();
}
- LLVM_DEBUG(dbgs() << "Selected selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nSelected selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewSchedDAGs && MatchFilterBB)
CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -1357,6 +1394,8 @@ static bool processIfEntryValueDbgDeclare(FunctionLoweringInfo &FuncInfo,
// Find the corresponding livein physical register to this argument.
for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
if (VirtReg == ArgVReg) {
+ // Append an op deref to account for the fact that this is a dbg_declare.
+ Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
FuncInfo.MF->setVariableDbgInfo(Var, Expr, PhysReg, DbgLoc);
LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
<< ", Expr=" << *Expr << ", MCRegister=" << PhysReg
@@ -1422,6 +1461,14 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(),
DI->getVariable(), DI->getDebugLoc()))
FuncInfo.PreprocessedDbgDeclares.insert(DI);
+
+ for (const DPValue &DPV : I.getDbgValueRange()) {
+ if (DPV.getType() == DPValue::LocationType::Declare &&
+ processDbgDeclare(FuncInfo, DPV.getVariableLocationOp(0),
+ DPV.getExpression(), DPV.getVariable(),
+ DPV.getDebugLoc()))
+ FuncInfo.PreprocessedDPVDeclares.insert(&DPV);
+ }
}
}
@@ -1510,7 +1557,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Iterate over all basic blocks in the function.
StackProtector &SP = getAnalysis<StackProtector>();
for (const BasicBlock *LLVMBB : RPOT) {
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
bool AllPredsVisited = true;
for (const BasicBlock *Pred : predecessors(LLVMBB)) {
if (!FuncInfo->VisitedBBs.count(Pred)) {
@@ -2074,41 +2121,43 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
--e; // Don't process a glue operand if it is here.
while (i != e) {
- unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
- if (!InlineAsm::isMemKind(Flags) && !InlineAsm::isFuncKind(Flags)) {
+ InlineAsm::Flag Flags(cast<ConstantSDNode>(InOps[i])->getZExtValue());
+ if (!Flags.isMemKind() && !Flags.isFuncKind()) {
// Just skip over this operand, copying the operands verbatim.
- Ops.insert(Ops.end(), InOps.begin()+i,
- InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
- i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ Ops.insert(Ops.end(), InOps.begin() + i,
+ InOps.begin() + i + Flags.getNumOperandRegisters() + 1);
+ i += Flags.getNumOperandRegisters() + 1;
} else {
- assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ assert(Flags.getNumOperandRegisters() == 1 &&
"Memory operand with multiple values?");
unsigned TiedToOperand;
- if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) {
+ if (Flags.isUseOperandTiedToDef(TiedToOperand)) {
// We need the constraint ID from the operand this is tied to.
unsigned CurOp = InlineAsm::Op_FirstOperand;
- Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ Flags =
+ InlineAsm::Flag(cast<ConstantSDNode>(InOps[CurOp])->getZExtValue());
for (; TiedToOperand; --TiedToOperand) {
- CurOp += InlineAsm::getNumOperandRegisters(Flags)+1;
- Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ CurOp += Flags.getNumOperandRegisters() + 1;
+ Flags = InlineAsm::Flag(
+ cast<ConstantSDNode>(InOps[CurOp])->getZExtValue());
}
}
// Otherwise, this is a memory operand. Ask the target to select it.
std::vector<SDValue> SelOps;
- unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags);
+ const InlineAsm::ConstraintCode ConstraintID =
+ Flags.getMemoryConstraintID();
if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps))
report_fatal_error("Could not match memory address. Inline asm"
" failure!");
// Add this to the output node.
- unsigned NewFlags =
- InlineAsm::isMemKind(Flags)
- ? InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size())
- : InlineAsm::getFlagWord(InlineAsm::Kind_Func, SelOps.size());
- NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
- Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
+ Flags = InlineAsm::Flag(Flags.isMemKind() ? InlineAsm::Kind::Mem
+ : InlineAsm::Kind::Func,
+ SelOps.size());
+ Flags.setMemConstraint(ConstraintID);
+ Ops.push_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32));
llvm::append_range(Ops, SelOps);
i += 2;
}
@@ -2176,18 +2225,20 @@ static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
/// operand node N of U during instruction selection that starts at Root.
bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
SDNode *Root) const {
- if (OptLevel == CodeGenOpt::None) return false;
+ if (OptLevel == CodeGenOptLevel::None)
+ return false;
return N.hasOneUse();
}
/// IsLegalToFold - Returns true if the specific operand node N of
/// U can be folded during instruction selection that starts at Root.
bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
- CodeGenOpt::Level OptLevel,
+ CodeGenOptLevel OptLevel,
bool IgnoreChains) {
- if (OptLevel == CodeGenOpt::None) return false;
+ if (OptLevel == CodeGenOptLevel::None)
+ return false;
- // If Root use can somehow reach N through a path that that doesn't contain
+ // If Root use can somehow reach N through a path that doesn't contain
// U then folding N would create a cycle. e.g. In the following
// diagram, Root can reach N through X. If N is folded into Root, then
// X is both a predecessor and a successor of U.
@@ -2435,6 +2486,13 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
return Val;
}
+void SelectionDAGISel::Select_JUMP_TABLE_DEBUG_INFO(SDNode *N) {
+ SDLoc dl(N);
+ CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Glue,
+ CurDAG->getTargetConstant(N->getConstantOperandVal(1),
+ dl, MVT::i64, true));
+}
+
/// When a match is complete, this method updates uses of interior chain results
/// to use the new results.
void SelectionDAGISel::UpdateChains(
@@ -2591,7 +2649,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
unsigned ResNumResults = Res->getNumValues();
// Move the glue if needed.
if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
- (unsigned)OldGlueResultNo != ResNumResults-1)
+ static_cast<unsigned>(OldGlueResultNo) != ResNumResults - 1)
ReplaceUses(SDValue(Node, OldGlueResultNo),
SDValue(Res, ResNumResults - 1));
@@ -2600,7 +2658,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
- (unsigned)OldChainResultNo != ResNumResults-1)
+ static_cast<unsigned>(OldChainResultNo) != ResNumResults - 1)
ReplaceUses(SDValue(Node, OldChainResultNo),
SDValue(Res, ResNumResults - 1));
@@ -2639,8 +2697,11 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame(
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- const SelectionDAGISel &SDISel) {
- return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+ const SelectionDAGISel &SDISel, bool TwoBytePredNo) {
+ unsigned PredNo = MatcherTable[MatcherIndex++];
+ if (TwoBytePredNo)
+ PredNo |= MatcherTable[MatcherIndex++] << 8;
+ return SDISel.CheckPatternPredicate(PredNo);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
@@ -2654,35 +2715,34 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
- Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ Opc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
return N->getOpcode() == Opc;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
-CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
- const TargetLowering *TLI, const DataLayout &DL) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
- if (N.getValueType() == VT) return true;
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckType(MVT::SimpleValueType VT,
+ SDValue N,
+ const TargetLowering *TLI,
+ const DataLayout &DL) {
+ if (N.getValueType() == VT)
+ return true;
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
-CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const TargetLowering *TLI, const DataLayout &DL,
- unsigned ChildNo) {
+CheckChildType(MVT::SimpleValueType VT, SDValue N, const TargetLowering *TLI,
+ const DataLayout &DL, unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
- return false; // Match fails if out of range child #.
- return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI,
- DL);
+ return false; // Match fails if out of range child #.
+ return ::CheckType(VT, N.getOperand(ChildNo), TLI, DL);
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
- (ISD::CondCode)MatcherTable[MatcherIndex++];
+ static_cast<ISD::CondCode>(MatcherTable[MatcherIndex++]);
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
@@ -2696,7 +2756,8 @@ CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ MVT::SimpleValueType VT =
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
if (cast<VTSDNode>(N)->getVT() == VT)
return true;
@@ -2773,7 +2834,8 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
bool &Result,
const SelectionDAGISel &SDISel,
SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
- switch (Table[Index++]) {
+ unsigned Opcode = Table[Index++];
+ switch (Opcode) {
default:
Result = false;
return Index-1; // Could not evaluate this predicate.
@@ -2788,7 +2850,10 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same);
return Index;
case SelectionDAGISel::OPC_CheckPatternPredicate:
- Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ case SelectionDAGISel::OPC_CheckPatternPredicate2:
+ Result = !::CheckPatternPredicate(
+ Table, Index, SDISel,
+ Table[Index - 1] == SelectionDAGISel::OPC_CheckPatternPredicate2);
return Index;
case SelectionDAGISel::OPC_CheckPredicate:
Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
@@ -2797,12 +2862,27 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
Result = !::CheckOpcode(Table, Index, N.getNode());
return Index;
case SelectionDAGISel::OPC_CheckType:
- Result = !::CheckType(Table, Index, N, SDISel.TLI,
- SDISel.CurDAG->getDataLayout());
+ case SelectionDAGISel::OPC_CheckTypeI32:
+ case SelectionDAGISel::OPC_CheckTypeI64: {
+ MVT::SimpleValueType VT;
+ switch (Opcode) {
+ case SelectionDAGISel::OPC_CheckTypeI32:
+ VT = MVT::i32;
+ break;
+ case SelectionDAGISel::OPC_CheckTypeI64:
+ VT = MVT::i64;
+ break;
+ default:
+ VT = static_cast<MVT::SimpleValueType>(Table[Index++]);
+ break;
+ }
+ Result = !::CheckType(VT, N, SDISel.TLI, SDISel.CurDAG->getDataLayout());
return Index;
+ }
case SelectionDAGISel::OPC_CheckTypeRes: {
unsigned Res = Table[Index++];
- Result = !::CheckType(Table, Index, N.getValue(Res), SDISel.TLI,
+ Result = !::CheckType(static_cast<MVT::SimpleValueType>(Table[Index++]),
+ N.getValue(Res), SDISel.TLI,
SDISel.CurDAG->getDataLayout());
return Index;
}
@@ -2814,10 +2894,40 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
case SelectionDAGISel::OPC_CheckChild5Type:
case SelectionDAGISel::OPC_CheckChild6Type:
case SelectionDAGISel::OPC_CheckChild7Type:
- Result = !::CheckChildType(
- Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(),
- Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type);
+ case SelectionDAGISel::OPC_CheckChild0TypeI32:
+ case SelectionDAGISel::OPC_CheckChild1TypeI32:
+ case SelectionDAGISel::OPC_CheckChild2TypeI32:
+ case SelectionDAGISel::OPC_CheckChild3TypeI32:
+ case SelectionDAGISel::OPC_CheckChild4TypeI32:
+ case SelectionDAGISel::OPC_CheckChild5TypeI32:
+ case SelectionDAGISel::OPC_CheckChild6TypeI32:
+ case SelectionDAGISel::OPC_CheckChild7TypeI32:
+ case SelectionDAGISel::OPC_CheckChild0TypeI64:
+ case SelectionDAGISel::OPC_CheckChild1TypeI64:
+ case SelectionDAGISel::OPC_CheckChild2TypeI64:
+ case SelectionDAGISel::OPC_CheckChild3TypeI64:
+ case SelectionDAGISel::OPC_CheckChild4TypeI64:
+ case SelectionDAGISel::OPC_CheckChild5TypeI64:
+ case SelectionDAGISel::OPC_CheckChild6TypeI64:
+ case SelectionDAGISel::OPC_CheckChild7TypeI64: {
+ MVT::SimpleValueType VT;
+ unsigned ChildNo;
+ if (Opcode >= SelectionDAGISel::OPC_CheckChild0TypeI32 &&
+ Opcode <= SelectionDAGISel::OPC_CheckChild7TypeI32) {
+ VT = MVT::i32;
+ ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0TypeI32;
+ } else if (Opcode >= SelectionDAGISel::OPC_CheckChild0TypeI64 &&
+ Opcode <= SelectionDAGISel::OPC_CheckChild7TypeI64) {
+ VT = MVT::i64;
+ ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0TypeI64;
+ } else {
+ VT = static_cast<MVT::SimpleValueType>(Table[Index++]);
+ ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0Type;
+ }
+ Result = !::CheckChildType(VT, N, SDISel.TLI,
+ SDISel.CurDAG->getDataLayout(), ChildNo);
return Index;
+ }
case SelectionDAGISel::OPC_CheckCondCode:
Result = !::CheckCondCode(Table, Index, N);
return Index;
@@ -2981,6 +3091,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::PATCHPOINT:
Select_PATCHPOINT(NodeToMatch);
return;
+ case ISD::JUMP_TABLE_DEBUG_INFO:
+ Select_JUMP_TABLE_DEBUG_INFO(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
@@ -3042,7 +3155,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// Get the opcode, add the index to the table.
uint16_t Opc = MatcherTable[Idx++];
- Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+ Opc |= static_cast<uint16_t>(MatcherTable[Idx++]) << 8;
if (Opc >= OpcodeOffset.size())
OpcodeOffset.resize((Opc+1)*2);
OpcodeOffset[Opc] = Idx;
@@ -3059,7 +3172,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
#ifndef NDEBUG
unsigned CurrentOpcodeIndex = MatcherIndex;
#endif
- BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ BuiltinOpcodes Opcode =
+ static_cast<BuiltinOpcodes>(MatcherTable[MatcherIndex++]);
switch (Opcode) {
case OPC_Scope: {
// Okay, the semantics of this operation are that we should push a scope
@@ -3179,6 +3293,29 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
+ case OPC_MoveSibling:
+ case OPC_MoveSibling0:
+ case OPC_MoveSibling1:
+ case OPC_MoveSibling2:
+ case OPC_MoveSibling3:
+ case OPC_MoveSibling4:
+ case OPC_MoveSibling5:
+ case OPC_MoveSibling6:
+ case OPC_MoveSibling7: {
+ // Pop the current node off the NodeStack.
+ NodeStack.pop_back();
+ assert(!NodeStack.empty() && "Node stack imbalance!");
+ N = NodeStack.back();
+
+ unsigned SiblingNo = Opcode == OPC_MoveSibling
+ ? MatcherTable[MatcherIndex++]
+ : Opcode - OPC_MoveSibling0;
+ if (SiblingNo >= N.getNumOperands())
+ break; // Match fails if out of range sibling #.
+ N = N.getOperand(SiblingNo);
+ NodeStack.push_back(N);
+ continue;
+ }
case OPC_MoveParent:
// Pop the current node off the NodeStack.
NodeStack.pop_back();
@@ -3198,7 +3335,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
case OPC_CheckPatternPredicate:
- if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ case OPC_CheckPatternPredicate2:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this,
+ Opcode == OPC_CheckPatternPredicate2))
+ break;
continue;
case OPC_CheckPredicate:
if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
@@ -3240,15 +3380,29 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
case OPC_CheckType:
- if (!::CheckType(MatcherTable, MatcherIndex, N, TLI,
- CurDAG->getDataLayout()))
+ case OPC_CheckTypeI32:
+ case OPC_CheckTypeI64:
+ MVT::SimpleValueType VT;
+ switch (Opcode) {
+ case OPC_CheckTypeI32:
+ VT = MVT::i32;
+ break;
+ case OPC_CheckTypeI64:
+ VT = MVT::i64;
+ break;
+ default:
+ VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
+ break;
+ }
+ if (!::CheckType(VT, N, TLI, CurDAG->getDataLayout()))
break;
continue;
case OPC_CheckTypeRes: {
unsigned Res = MatcherTable[MatcherIndex++];
- if (!::CheckType(MatcherTable, MatcherIndex, N.getValue(Res), TLI,
- CurDAG->getDataLayout()))
+ if (!::CheckType(
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]),
+ N.getValue(Res), TLI, CurDAG->getDataLayout()))
break;
continue;
}
@@ -3265,7 +3419,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (CaseSize == 0) break;
uint16_t Opc = MatcherTable[MatcherIndex++];
- Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ Opc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
// If the opcode matches, then we will execute this case.
if (CurNodeOpcode == Opc)
@@ -3295,7 +3449,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
if (CaseSize == 0) break;
- MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ MVT CaseVT =
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
if (CaseVT == MVT::iPTR)
CaseVT = TLI->getPointerTy(CurDAG->getDataLayout());
@@ -3316,15 +3471,48 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
<< '\n');
continue;
}
- case OPC_CheckChild0Type: case OPC_CheckChild1Type:
- case OPC_CheckChild2Type: case OPC_CheckChild3Type:
- case OPC_CheckChild4Type: case OPC_CheckChild5Type:
- case OPC_CheckChild6Type: case OPC_CheckChild7Type:
- if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
- CurDAG->getDataLayout(),
- Opcode - OPC_CheckChild0Type))
+ case OPC_CheckChild0Type:
+ case OPC_CheckChild1Type:
+ case OPC_CheckChild2Type:
+ case OPC_CheckChild3Type:
+ case OPC_CheckChild4Type:
+ case OPC_CheckChild5Type:
+ case OPC_CheckChild6Type:
+ case OPC_CheckChild7Type:
+ case OPC_CheckChild0TypeI32:
+ case OPC_CheckChild1TypeI32:
+ case OPC_CheckChild2TypeI32:
+ case OPC_CheckChild3TypeI32:
+ case OPC_CheckChild4TypeI32:
+ case OPC_CheckChild5TypeI32:
+ case OPC_CheckChild6TypeI32:
+ case OPC_CheckChild7TypeI32:
+ case OPC_CheckChild0TypeI64:
+ case OPC_CheckChild1TypeI64:
+ case OPC_CheckChild2TypeI64:
+ case OPC_CheckChild3TypeI64:
+ case OPC_CheckChild4TypeI64:
+ case OPC_CheckChild5TypeI64:
+ case OPC_CheckChild6TypeI64:
+ case OPC_CheckChild7TypeI64: {
+ MVT::SimpleValueType VT;
+ unsigned ChildNo;
+ if (Opcode >= SelectionDAGISel::OPC_CheckChild0TypeI32 &&
+ Opcode <= SelectionDAGISel::OPC_CheckChild7TypeI32) {
+ VT = MVT::i32;
+ ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0TypeI32;
+ } else if (Opcode >= SelectionDAGISel::OPC_CheckChild0TypeI64 &&
+ Opcode <= SelectionDAGISel::OPC_CheckChild7TypeI64) {
+ VT = MVT::i64;
+ ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0TypeI64;
+ } else {
+ VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
+ ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0Type;
+ }
+ if (!::CheckChildType(VT, N, TLI, CurDAG->getDataLayout(), ChildNo))
break;
continue;
+ }
case OPC_CheckCondCode:
if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
continue;
@@ -3390,22 +3578,43 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
case OPC_EmitInteger:
- case OPC_EmitStringInteger: {
- MVT::SimpleValueType VT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ case OPC_EmitInteger8:
+ case OPC_EmitInteger16:
+ case OPC_EmitInteger32:
+ case OPC_EmitInteger64:
+ case OPC_EmitStringInteger:
+ case OPC_EmitStringInteger32: {
+ MVT::SimpleValueType VT;
+ switch (Opcode) {
+ case OPC_EmitInteger8:
+ VT = MVT::i8;
+ break;
+ case OPC_EmitInteger16:
+ VT = MVT::i16;
+ break;
+ case OPC_EmitInteger32:
+ case OPC_EmitStringInteger32:
+ VT = MVT::i32;
+ break;
+ case OPC_EmitInteger64:
+ VT = MVT::i64;
+ break;
+ default:
+ VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
+ break;
+ }
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
- if (Opcode == OPC_EmitInteger)
+ if (Opcode >= OPC_EmitInteger && Opcode <= OPC_EmitInteger64)
Val = decodeSignRotatedValue(Val);
- RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
- CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch),
- VT), nullptr));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode *>(
+ CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), VT), nullptr));
continue;
}
case OPC_EmitRegister: {
MVT::SimpleValueType VT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
unsigned RegNo = MatcherTable[MatcherIndex++];
RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
CurDAG->getRegister(RegNo, VT), nullptr));
@@ -3416,7 +3625,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// values are stored in two bytes in the matcher table (just like
// opcodes).
MVT::SimpleValueType VT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
unsigned RegNo = MatcherTable[MatcherIndex++];
RegNo |= MatcherTable[MatcherIndex++] << 8;
RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
@@ -3424,9 +3633,19 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
- case OPC_EmitConvertToTarget: {
+ case OPC_EmitConvertToTarget:
+ case OPC_EmitConvertToTarget0:
+ case OPC_EmitConvertToTarget1:
+ case OPC_EmitConvertToTarget2:
+ case OPC_EmitConvertToTarget3:
+ case OPC_EmitConvertToTarget4:
+ case OPC_EmitConvertToTarget5:
+ case OPC_EmitConvertToTarget6:
+ case OPC_EmitConvertToTarget7: {
// Convert from IMM/FPIMM to target version.
- unsigned RecNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = Opcode == OPC_EmitConvertToTarget
+ ? MatcherTable[MatcherIndex++]
+ : Opcode - OPC_EmitConvertToTarget0;
assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget");
SDValue Imm = RecordedNodes[RecNo].first;
@@ -3522,11 +3741,22 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
}
case OPC_EmitCopyToReg:
- case OPC_EmitCopyToReg2: {
- unsigned RecNo = MatcherTable[MatcherIndex++];
+ case OPC_EmitCopyToReg0:
+ case OPC_EmitCopyToReg1:
+ case OPC_EmitCopyToReg2:
+ case OPC_EmitCopyToReg3:
+ case OPC_EmitCopyToReg4:
+ case OPC_EmitCopyToReg5:
+ case OPC_EmitCopyToReg6:
+ case OPC_EmitCopyToReg7:
+ case OPC_EmitCopyToRegTwoByte: {
+ unsigned RecNo =
+ Opcode >= OPC_EmitCopyToReg0 && Opcode <= OPC_EmitCopyToReg7
+ ? Opcode - OPC_EmitCopyToReg0
+ : MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
- if (Opcode == OPC_EmitCopyToReg2)
+ if (Opcode == OPC_EmitCopyToRegTwoByte)
DestPhysReg |= MatcherTable[MatcherIndex++] << 8;
if (!InputChain.getNode())
@@ -3558,26 +3788,83 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
- case OPC_EmitNode: case OPC_MorphNodeTo:
- case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
- case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: {
+ case OPC_EmitNode:
+ case OPC_EmitNode0:
+ case OPC_EmitNode1:
+ case OPC_EmitNode2:
+ case OPC_EmitNode0None:
+ case OPC_EmitNode1None:
+ case OPC_EmitNode2None:
+ case OPC_EmitNode0Chain:
+ case OPC_EmitNode1Chain:
+ case OPC_EmitNode2Chain:
+ case OPC_MorphNodeTo:
+ case OPC_MorphNodeTo0:
+ case OPC_MorphNodeTo1:
+ case OPC_MorphNodeTo2:
+ case OPC_MorphNodeTo0None:
+ case OPC_MorphNodeTo1None:
+ case OPC_MorphNodeTo2None:
+ case OPC_MorphNodeTo0Chain:
+ case OPC_MorphNodeTo1Chain:
+ case OPC_MorphNodeTo2Chain:
+ case OPC_MorphNodeTo0GlueInput:
+ case OPC_MorphNodeTo1GlueInput:
+ case OPC_MorphNodeTo2GlueInput:
+ case OPC_MorphNodeTo0GlueOutput:
+ case OPC_MorphNodeTo1GlueOutput:
+ case OPC_MorphNodeTo2GlueOutput: {
uint16_t TargetOpc = MatcherTable[MatcherIndex++];
- TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
- unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+ TargetOpc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
+ unsigned EmitNodeInfo;
+ if (Opcode >= OPC_EmitNode0None && Opcode <= OPC_EmitNode2Chain) {
+ if (Opcode >= OPC_EmitNode0Chain && Opcode <= OPC_EmitNode2Chain)
+ EmitNodeInfo = OPFL_Chain;
+ else
+ EmitNodeInfo = OPFL_None;
+ } else if (Opcode >= OPC_MorphNodeTo0None &&
+ Opcode <= OPC_MorphNodeTo2GlueOutput) {
+ if (Opcode >= OPC_MorphNodeTo0Chain && Opcode <= OPC_MorphNodeTo2Chain)
+ EmitNodeInfo = OPFL_Chain;
+ else if (Opcode >= OPC_MorphNodeTo0GlueInput &&
+ Opcode <= OPC_MorphNodeTo2GlueInput)
+ EmitNodeInfo = OPFL_GlueInput;
+ else if (Opcode >= OPC_MorphNodeTo0GlueOutput &&
+ Opcode <= OPC_MorphNodeTo2GlueOutput)
+ EmitNodeInfo = OPFL_GlueOutput;
+ else
+ EmitNodeInfo = OPFL_None;
+ } else
+ EmitNodeInfo = MatcherTable[MatcherIndex++];
// Get the result VT list.
unsigned NumVTs;
// If this is one of the compressed forms, get the number of VTs based
// on the Opcode. Otherwise read the next byte from the table.
if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2)
NumVTs = Opcode - OPC_MorphNodeTo0;
+ else if (Opcode >= OPC_MorphNodeTo0None && Opcode <= OPC_MorphNodeTo2None)
+ NumVTs = Opcode - OPC_MorphNodeTo0None;
+ else if (Opcode >= OPC_MorphNodeTo0Chain &&
+ Opcode <= OPC_MorphNodeTo2Chain)
+ NumVTs = Opcode - OPC_MorphNodeTo0Chain;
+ else if (Opcode >= OPC_MorphNodeTo0GlueInput &&
+ Opcode <= OPC_MorphNodeTo2GlueInput)
+ NumVTs = Opcode - OPC_MorphNodeTo0GlueInput;
+ else if (Opcode >= OPC_MorphNodeTo0GlueOutput &&
+ Opcode <= OPC_MorphNodeTo2GlueOutput)
+ NumVTs = Opcode - OPC_MorphNodeTo0GlueOutput;
else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2)
NumVTs = Opcode - OPC_EmitNode0;
+ else if (Opcode >= OPC_EmitNode0None && Opcode <= OPC_EmitNode2None)
+ NumVTs = Opcode - OPC_EmitNode0None;
+ else if (Opcode >= OPC_EmitNode0Chain && Opcode <= OPC_EmitNode2Chain)
+ NumVTs = Opcode - OPC_EmitNode0Chain;
else
NumVTs = MatcherTable[MatcherIndex++];
SmallVector<EVT, 4> VTs;
for (unsigned i = 0; i != NumVTs; ++i) {
MVT::SimpleValueType VT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
if (VT == MVT::iPTR)
VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy;
VTs.push_back(VT);
@@ -3644,8 +3931,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// Create the node.
MachineSDNode *Res = nullptr;
- bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
- (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2);
+ bool IsMorphNodeTo =
+ Opcode == OPC_MorphNodeTo ||
+ (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2GlueOutput);
if (!IsMorphNodeTo) {
// If this is a normal EmitNode command, just create the new node and
// add the results to the RecordedNodes list.
@@ -3667,7 +3955,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
auto &Chain = ChainNodesMatched;
assert((!E || !is_contained(Chain, N)) &&
"Chain node replaced during MorphNode");
- llvm::erase_value(Chain, N);
+ llvm::erase(Chain, N);
});
Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList,
Ops, EmitNodeInfo));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 5afd05648772..cf32350036d4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -62,15 +62,15 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
STATISTIC(StatepointMaxSlotsRequired,
"Maximum number of stack slots required for a singe statepoint");
-cl::opt<bool> UseRegistersForDeoptValues(
+static cl::opt<bool> UseRegistersForDeoptValues(
"use-registers-for-deopt-values", cl::Hidden, cl::init(false),
cl::desc("Allow using registers for non pointer deopt args"));
-cl::opt<bool> UseRegistersForGCPointersInLandingPad(
+static cl::opt<bool> UseRegistersForGCPointersInLandingPad(
"use-registers-for-gc-values-in-landing-pad", cl::Hidden, cl::init(false),
cl::desc("Allow using registers for gc pointer in landing pad"));
-cl::opt<unsigned> MaxRegistersForGCPointers(
+static cl::opt<unsigned> MaxRegistersForGCPointers(
"max-registers-for-gc-values", cl::Hidden, cl::init(0),
cl::desc("Max number of VRegs allowed to pass GC pointer meta args in"));
@@ -525,7 +525,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// deopt argument length, deopt arguments.., gc arguments...
// Figure out what lowering strategy we're going to use for each part
- // Note: Is is conservatively correct to lower both "live-in" and "live-out"
+ // Note: It is conservatively correct to lower both "live-in" and "live-out"
// as "live-through". A "live-through" variable is one which is "live-in",
// "live-out", and live throughout the lifetime of the call (i.e. we can find
// it from any PC within the transitive callee of the statepoint). In
@@ -715,7 +715,8 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
assert((GFI || SI.Bases.empty()) &&
"No gc specified, so cannot relocate pointers!");
- LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
+ LLVM_DEBUG(if (SI.StatepointInstr) dbgs()
+ << "Lowering statepoint " << *SI.StatepointInstr << "\n");
#ifndef NDEBUG
for (const auto *Reloc : SI.GCRelocates)
if (Reloc->getParent() == SI.StatepointInstr->getParent())
@@ -1032,10 +1033,16 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
ActualCallee = Callee;
}
+ const auto GCResultLocality = getGCResultLocality(I);
+ AttributeSet retAttrs;
+ if (GCResultLocality.first)
+ retAttrs = GCResultLocality.first->getAttributes().getRetAttrs();
+
StatepointLoweringInfo SI(DAG);
populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos,
I.getNumCallArgs(), ActualCallee,
- I.getActualReturnType(), false /* IsPatchPoint */);
+ I.getActualReturnType(), retAttrs,
+ /*IsPatchPoint=*/false);
// There may be duplication in the gc.relocate list; such as two copies of
// each relocation on normal and exceptional path for an invoke. We only
@@ -1091,8 +1098,6 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
SDValue ReturnValue = LowerAsSTATEPOINT(SI);
// Export the result value if needed
- const auto GCResultLocality = getGCResultLocality(I);
-
if (!GCResultLocality.first && !GCResultLocality.second) {
// The return value is not needed, just generate a poison value.
// Note: This covers the void return case.
@@ -1137,7 +1142,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
populateCallLoweringInfo(
SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
- false);
+ Call->getAttributes().getRetAttrs(), /*IsPatchPoint=*/false);
if (!VarArgDisallowed)
SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg();
@@ -1156,6 +1161,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
// NB! The GC arguments are deliberately left empty.
+ LLVM_DEBUG(dbgs() << "Lowering call with deopt bundle " << *Call << "\n");
if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal);
setValue(Call, ReturnVal);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a84d35a6ea4e..c5977546828f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -472,6 +473,17 @@ TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
}
+SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
+ SDValue Addr, int JTI,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Value;
+ // Jump table debug info is only needed if CodeView is enabled.
+ if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
+ Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
+ }
+ return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
+}
+
bool
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
const TargetMachine &TM = getTargetMachine();
@@ -554,8 +566,9 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
}
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
-/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
-/// generalized for targets with other types of implicit widening casts.
+/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
+/// but it could be generalized for targets with other types of implicit
+/// widening casts.
bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
const APInt &DemandedBits,
TargetLoweringOpt &TLO) const {
@@ -1040,13 +1053,10 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
// larger type size to do the transform.
if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
return SDValue();
-
- if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0),
- Add.getOperand(1)) ==
- SelectionDAG::OFK_Never &&
- (!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0),
- Add2.getOperand(1)) ==
- SelectionDAG::OFK_Never))
+ if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
+ Add.getOperand(1)) &&
+ (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
+ Add2.getOperand(1))))
NVT = VT;
else
return SDValue();
@@ -1155,6 +1165,18 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
+ case ISD::SPLAT_VECTOR: {
+ SDValue Scl = Op.getOperand(0);
+ APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
+ KnownBits KnownScl;
+ if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+ return true;
+
+ // Implicitly truncate the bits to match the official semantics of
+ // SPLAT_VECTOR.
+ Known = KnownScl.trunc(BitWidth);
+ break;
+ }
case ISD::LOAD: {
auto *LD = cast<LoadSDNode>(Op);
if (getTargetConstantFromLoad(LD)) {
@@ -1765,8 +1787,17 @@ bool TargetLowering::SimplifyDemandedBits(
APInt InDemandedMask = DemandedBits.lshr(ShAmt);
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
- Depth + 1))
+ Depth + 1)) {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op->setFlags(Flags);
+ }
return true;
+ }
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
@@ -1788,6 +1819,37 @@ bool TargetLowering::SimplifyDemandedBits(
if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
+
+ // Narrow shift to lower half - similar to ShrinkDemandedOp.
+ // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
+ // Only do this if we demand the upper half so the knownbits are correct.
+ unsigned HalfWidth = BitWidth / 2;
+ if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
+ DemandedBits.countLeadingOnes() >= HalfWidth) {
+ EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
+ if (isNarrowingProfitable(VT, HalfVT) &&
+ isTypeDesirableForOp(ISD::SHL, HalfVT) &&
+ isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
+ // If we're demanding the upper bits at all, we must ensure
+ // that the upper bits of the shift result are known to be zero,
+ // which is equivalent to the narrow shift being NUW.
+ if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
+ bool IsNSW = Known.countMinSignBits() > HalfWidth;
+ SDNodeFlags Flags;
+ Flags.setNoSignedWrap(IsNSW);
+ Flags.setNoUnsignedWrap(IsNUW);
+ SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
+ SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
+ ShAmt, HalfVT, dl, TLO.LegalTypes());
+ SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
+ NewShiftAmt, Flags);
+ SDValue NewExt =
+ TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
+ return TLO.CombineTo(Op, NewExt);
+ }
+ }
+ }
} else {
// This is a variable shift, so we can't shift the demand mask by a known
// amount. But if we are not demanding high bits, then we are not
@@ -1870,15 +1932,15 @@ bool TargetLowering::SimplifyDemandedBits(
// Narrow shift to lower half - similar to ShrinkDemandedOp.
// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
- if ((BitWidth % 2) == 0 && !VT.isVector() &&
- ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
- TLO.DAG.MaskedValueIsZero(
- Op0, APInt::getHighBitsSet(BitWidth, BitWidth / 2)))) {
+ if ((BitWidth % 2) == 0 && !VT.isVector()) {
+ APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
if (isNarrowingProfitable(VT, HalfVT) &&
isTypeDesirableForOp(ISD::SRL, HalfVT) &&
isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
- (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
+ ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
+ TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
ShAmt, HalfVT, dl, TLO.LegalTypes());
@@ -1945,6 +2007,35 @@ bool TargetLowering::SimplifyDemandedBits(
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
+ // supports sext_inreg.
+ if (Op0.getOpcode() == ISD::SHL) {
+ if (const APInt *InnerSA =
+ TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
+ unsigned LowBits = BitWidth - ShAmt;
+ EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
+ VT.getVectorElementCount());
+
+ if (*InnerSA == ShAmt) {
+ if (!TLO.LegalOperations() ||
+ getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
+ Op0.getOperand(0),
+ TLO.DAG.getValueType(ExtVT)));
+
+ // Even if we can't convert to sext_inreg, we might be able to
+ // remove this shift pair if the input is already sign extended.
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
+ if (NumSignBits > ShAmt)
+ return TLO.CombineTo(Op, Op0.getOperand(0));
+ }
+ }
+ }
+
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -2106,30 +2197,57 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
- case ISD::UMIN: {
- // Check if one arg is always less than (or equal) to the other arg.
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
- KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
- Known = KnownBits::umin(Known0, Known1);
- if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
- return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
- if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
- return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
- break;
- }
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
case ISD::UMAX: {
- // Check if one arg is always greater than (or equal) to the other arg.
+ unsigned Opc = Op.getOpcode();
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+
+ // If we're only demanding signbits, then we can simplify to OR/AND node.
+ unsigned BitOp =
+ (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
+ unsigned NumSignBits =
+ std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
+ TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
+ unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
+ if (NumSignBits >= NumDemandedUpperBits)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
+
+ // Check if one arg is always less/greater than (or equal) to the other arg.
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
- Known = KnownBits::umax(Known0, Known1);
- if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
- return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
- if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
- return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
+ switch (Opc) {
+ case ISD::SMIN:
+ if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
+ return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
+ if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
+ return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
+ Known = KnownBits::smin(Known0, Known1);
+ break;
+ case ISD::SMAX:
+ if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
+ return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
+ if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
+ return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
+ Known = KnownBits::smax(Known0, Known1);
+ break;
+ case ISD::UMIN:
+ if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
+ if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
+ Known = KnownBits::umin(Known0, Known1);
+ break;
+ case ISD::UMAX:
+ if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
+ if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
+ Known = KnownBits::umax(Known0, Known1);
+ break;
+ }
break;
}
case ISD::BITREVERSE: {
@@ -2285,11 +2403,17 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
+ SDNodeFlags Flags = Op->getFlags();
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zext(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
- Depth + 1))
+ Depth + 1)) {
+ if (Flags.hasNonNeg()) {
+ Flags.setNonNeg(false);
+ Op->setFlags(Flags);
+ }
return true;
+ }
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth);
@@ -2653,7 +2777,7 @@ bool TargetLowering::SimplifyDemandedBits(
// neg x with only low bit demanded is simply x.
if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
- isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
+ isNullConstant(Op0))
return TLO.CombineTo(Op, Op1);
// Attempt to avoid multi-use ops if we don't need anything from them.
@@ -2913,8 +3037,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
TLO.DAG, Depth + 1);
if (NewOp0 || NewOp1) {
- SDValue NewOp = TLO.DAG.getNode(
- Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
+ SDValue NewOp =
+ TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
+ NewOp1 ? NewOp1 : Op1, Op->getFlags());
return TLO.CombineTo(Op, NewOp);
}
return false;
@@ -3823,8 +3948,12 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
+ // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
+ // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
+ // its liable to create and infinite loop.
SDValue Zero = DAG.getConstant(0, DL, OpVT);
- if (DAG.isKnownToBeAPowerOfTwo(Y)) {
+ if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
+ DAG.isKnownToBeAPowerOfTwo(Y)) {
// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
// Note that where Y is variable and is known to have at most one bit set
// (for example, if it is Z & 1) we cannot do this; the expressions are not
@@ -3843,8 +3972,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Bail out if the compare operand that we want to turn into a zero is
// already a zero (otherwise, infinite loop).
- auto *YConst = dyn_cast<ConstantSDNode>(Y);
- if (YConst && YConst->isZero())
+ if (isNullConstant(Y))
return SDValue();
// Transform this into: ~X & Y == 0.
@@ -4088,8 +4216,8 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
- // Keep the CTPOP if it is a legal vector op.
- if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ // Keep the CTPOP if it is a cheap vector op.
+ if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
return SDValue();
unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
@@ -4110,28 +4238,32 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
}
- // Expand a power-of-2 comparison based on ctpop:
- // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
- // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
+ // Expand a power-of-2 comparison based on ctpop
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
- // Keep the CTPOP if it is legal.
- if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ // Keep the CTPOP if it is cheap.
+ if (TLI.isCtpopFast(CTVT))
return SDValue();
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
assert(CTVT.isInteger());
- ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
- SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+
// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
- // check before the emit a potentially unnecessary op.
- if (DAG.isKnownNeverZero(CTOp))
+ // check before emitting a potentially unnecessary op.
+ if (DAG.isKnownNeverZero(CTOp)) {
+ // (ctpop x) == 1 --> (x & x-1) == 0
+ // (ctpop x) != 1 --> (x & x-1) != 0
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+ SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
return RHS;
- SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
- unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
- return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
+ }
+
+ // (ctpop x) == 1 --> (x ^ x-1) > x-1
+ // (ctpop x) != 1 --> (x ^ x-1) <= x-1
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
+ ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
+ return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
}
return SDValue();
@@ -4477,8 +4609,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
- Ptr =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(bestOffset),
+ dl);
SDValue NewLoad =
DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
Lod->getPointerInfo().getWithOffset(bestOffset),
@@ -4983,6 +5115,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
+ if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
+ !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
+ bool IsFabs = N0.getOpcode() == ISD::FABS;
+ SDValue Op = IsFabs ? N0.getOperand(0) : N0;
+ if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
+ FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
+ : (IsFabs ? fcInf : fcPosInf);
+ if (Cond == ISD::SETUEQ)
+ Flag |= fcNan;
+ return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
+ DAG.getTargetConstant(Flag, dl, MVT::i32));
+ }
+ }
+
// If the condition is not legal, see if we can find an equivalent one
// which is legal.
if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
@@ -5037,7 +5184,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (isBitwiseNot(N1))
return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
SDValue Not = DAG.getNOT(dl, N1, OpVT);
return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
}
@@ -5297,11 +5445,12 @@ SDValue TargetLowering::LowerAsmOutputForConstraint(
/// Lower the specified operand into the Ops vector.
/// If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- if (Constraint.length() > 1) return;
+ if (Constraint.size() > 1)
+ return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
@@ -5620,20 +5769,27 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
return ConstraintOperands;
}
-/// Return an integer indicating how general CT is.
-static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+/// Return a number indicating our preference for chosing a type of constraint
+/// over another, for the purpose of sorting them. Immediates are almost always
+/// preferrable (when they can be emitted). A higher return value means a
+/// stronger preference for one constraint type relative to another.
+/// FIXME: We should prefer registers over memory but doing so may lead to
+/// unrecoverable register exhaustion later.
+/// https://github.com/llvm/llvm-project/issues/20571
+static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
switch (CT) {
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
- case TargetLowering::C_Unknown:
- return 0;
- case TargetLowering::C_Register:
- return 1;
- case TargetLowering::C_RegisterClass:
- return 2;
+ return 4;
case TargetLowering::C_Memory:
case TargetLowering::C_Address:
return 3;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_Unknown:
+ return 0;
}
llvm_unreachable("Invalid constraint type");
}
@@ -5713,11 +5869,15 @@ TargetLowering::ConstraintWeight
/// If there are multiple different constraints that we could pick for this
/// operand (e.g. "imr") try to pick the 'best' one.
-/// This is somewhat tricky: constraints fall into four classes:
-/// Other -> immediates and magic values
+/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
+/// into seven classes:
/// Register -> one specific register
/// RegisterClass -> a group of regs
/// Memory -> memory
+/// Address -> a symbolic memory reference
+/// Immediate -> immediate values
+/// Other -> magic values (such as "Flag Output Operands")
+/// Unknown -> something we don't recognize yet and can't handle
/// Ideally, we would pick the most specific constraint possible: if we have
/// something that fits into a register, we would pick it. The problem here
/// is that if we have something that could either be in a register or in
@@ -5731,18 +5891,13 @@ TargetLowering::ConstraintWeight
/// 2) Otherwise, pick the most general constraint present. This prefers
/// 'm' over 'r', for example.
///
-static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
- const TargetLowering &TLI,
- SDValue Op, SelectionDAG *DAG) {
- assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
- unsigned BestIdx = 0;
- TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
- int BestGenerality = -1;
+TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
+ TargetLowering::AsmOperandInfo &OpInfo) const {
+ ConstraintGroup Ret;
- // Loop over the options, keeping track of the most general one.
- for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
- TargetLowering::ConstraintType CType =
- TLI.getConstraintType(OpInfo.Codes[i]);
+ Ret.reserve(OpInfo.Codes.size());
+ for (StringRef Code : OpInfo.Codes) {
+ TargetLowering::ConstraintType CType = getConstraintType(Code);
// Indirect 'other' or 'immediate' constraints are not allowed.
if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
@@ -5750,40 +5905,38 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
CType == TargetLowering::C_RegisterClass))
continue;
- // If this is an 'other' or 'immediate' constraint, see if the operand is
- // valid for it. For example, on X86 we might have an 'rI' constraint. If
- // the operand is an integer in the range [0..31] we want to use I (saving a
- // load of a register), otherwise we must use 'r'.
- if ((CType == TargetLowering::C_Other ||
- CType == TargetLowering::C_Immediate) && Op.getNode()) {
- assert(OpInfo.Codes[i].size() == 1 &&
- "Unhandled multi-letter 'other' constraint");
- std::vector<SDValue> ResultOps;
- TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
- ResultOps, *DAG);
- if (!ResultOps.empty()) {
- BestType = CType;
- BestIdx = i;
- break;
- }
- }
-
// Things with matching constraints can only be registers, per gcc
// documentation. This mainly affects "g" constraints.
if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
continue;
- // This constraint letter is more general than the previous one, use it.
- int Generality = getConstraintGenerality(CType);
- if (Generality > BestGenerality) {
- BestType = CType;
- BestIdx = i;
- BestGenerality = Generality;
- }
+ Ret.emplace_back(Code, CType);
}
- OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
- OpInfo.ConstraintType = BestType;
+ std::stable_sort(
+ Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
+ return getConstraintPiority(a.second) > getConstraintPiority(b.second);
+ });
+
+ return Ret;
+}
+
+/// If we have an immediate, see if we can lower it. Return true if we can,
+/// false otherwise.
+static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
+ SDValue Op, SelectionDAG *DAG,
+ const TargetLowering &TLI) {
+
+ assert((P.second == TargetLowering::C_Other ||
+ P.second == TargetLowering::C_Immediate) &&
+ "need immediate or other");
+
+ if (!Op.getNode())
+ return false;
+
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
+ return !ResultOps.empty();
}
/// Determines the constraint code and constraint type to use for the specific
@@ -5798,7 +5951,26 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
OpInfo.ConstraintCode = OpInfo.Codes[0];
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
} else {
- ChooseConstraint(OpInfo, *this, Op, DAG);
+ ConstraintGroup G = getConstraintPreferences(OpInfo);
+ if (G.empty())
+ return;
+
+ unsigned BestIdx = 0;
+ for (const unsigned E = G.size();
+ BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
+ G[BestIdx].second == TargetLowering::C_Immediate);
+ ++BestIdx) {
+ if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
+ break;
+ // If we're out of constraints, just pick the first one.
+ if (BestIdx + 1 == E) {
+ BestIdx = 0;
+ break;
+ }
+ }
+
+ OpInfo.ConstraintCode = G[BestIdx].first;
+ OpInfo.ConstraintType = G[BestIdx].second;
}
// 'X' matches anything.
@@ -5914,6 +6086,49 @@ TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
return SDValue();
}
+/// Build sdiv by power-of-2 with conditional move instructions
+/// Ref: "Hacker's Delight" by Henry Warren 10-1
+/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
+/// bgez x, label
+/// add x, x, 2**k-1
+/// label:
+/// sra res, x, k
+/// neg res, res (when the divisor is negative)
+SDValue TargetLowering::buildSDIVPow2WithCMov(
+ SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ unsigned Lg2 = Divisor.countr_zero();
+ EVT VT = N->getValueType(0);
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
+ SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
+
+ // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CMov.getNode());
+
+ // Divide by pow2.
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (Divisor.isNonNegative())
+ return SRA;
+
+ Created.push_back(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
+}
+
/// Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
@@ -6016,7 +6231,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value.
// FIXME: We should support doing a MUL in a wider type.
auto GetMULHS = [&](SDValue X, SDValue Y) {
- // If the type isn't legal, use a wider mul of the the type calculated
+ // If the type isn't legal, use a wider mul of the type calculated
// earlier.
if (!isTypeLegal(VT)) {
X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
@@ -6203,7 +6418,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
// FIXME: We should support doing a MUL in a wider type.
auto GetMULHU = [&](SDValue X, SDValue Y) {
- // If the type isn't legal, use a wider mul of the the type calculated
+ // If the type isn't legal, use a wider mul of the type calculated
// earlier.
if (!isTypeLegal(VT)) {
X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
@@ -9131,7 +9346,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SrcEltVT, LD->getOriginalAlign(),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
- BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
+ BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -9206,7 +9421,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
DAG.getVectorIdxConstant(Idx, SL));
SDValue Ptr =
- DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
+ DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(
@@ -9342,7 +9557,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
@@ -9352,7 +9567,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
@@ -9477,6 +9692,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
SDValue ShiftAmount = DAG.getConstant(
NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
SDValue Lo = Val;
+ // If Val is a constant, replace the upper bits with 0. The SRL will constant
+ // fold and not use the upper bits. A smaller constant may be easier to
+ // materialize.
+ if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
+ Lo = DAG.getNode(
+ ISD::AND, dl, VT, Lo,
+ DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
+ VT));
SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
// Store the two parts
@@ -9486,7 +9709,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
ST->getMemOperand()->getFlags());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Store2 = DAG.getTruncStore(
Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
@@ -9618,7 +9841,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
// Access to address of TLS varialbe xyz is lowered to a function call:
// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
+ PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
SDLoc dl(GA);
ArgListTy Args;
@@ -9657,20 +9880,18 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
return SDValue();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (C->isZero() && CC == ISD::SETEQ) {
- EVT VT = Op.getOperand(0).getValueType();
- SDValue Zext = Op.getOperand(0);
- if (VT.bitsLT(MVT::i32)) {
- VT = MVT::i32;
- Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
- }
- unsigned Log2b = Log2_32(VT.getSizeInBits());
- SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
- SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
- DAG.getConstant(Log2b, dl, MVT::i32));
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
- }
+ if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
}
return SDValue();
}
@@ -10489,9 +10710,9 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
}
- // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
+ // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
// libcall emission cannot handle this. Large result types will fail.
- if (SrcVT == MVT::f16) {
+ if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
SrcVT = Src.getValueType();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 153fe77b8b4a..38f658084294 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -106,7 +106,7 @@ ShadowStackGCLowering::ShadowStackGCLowering() : FunctionPass(ID) {
Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
// doInitialization creates the abstract type of this value.
- Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+ Type *VoidPtr = PointerType::getUnqual(F.getContext());
// Truncate the ShadowStackDescriptor if some metadata is null.
unsigned NumMeta = 0;
@@ -326,7 +326,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// Initialize the map pointer and load the current head of the shadow stack.
Instruction *CurrentHead =
- AtEntry.CreateLoad(StackEntryTy->getPointerTo(), Head, "gc_currhead");
+ AtEntry.CreateLoad(AtEntry.getPtrTy(), Head, "gc_currhead");
Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
StackEntry, 0, 1, "gc_frame.map");
AtEntry.CreateStore(FrameMap, EntryMapPtr);
@@ -368,8 +368,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
Instruction *EntryNextPtr2 =
CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
"gc_frame.next");
- Value *SavedHead = AtExit->CreateLoad(StackEntryTy->getPointerTo(),
- EntryNextPtr2, "gc_savedhead");
+ Value *SavedHead =
+ AtExit->CreateLoad(AtExit->getPtrTy(), EntryNextPtr2, "gc_savedhead");
AtExit->CreateStore(SavedHead, Head);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
index 4b1d3637a746..ab57d08e527e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -139,7 +139,7 @@ class ShrinkWrap : public MachineFunctionPass {
MachineOptimizationRemarkEmitter *ORE = nullptr;
/// Frequency of the Entry block.
- uint64_t EntryFreq = 0;
+ BlockFrequency EntryFreq;
/// Current opcode for frame setup.
unsigned FrameSetupOpcode = ~0u;
@@ -640,7 +640,7 @@ bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
- EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() ||
+ EntryFreq < MBFI->getBlockFreq(NewSave) ||
/*Entry freq has been observed more than a loop block in
some cases*/
MLI->getLoopFor(NewSave)))
@@ -675,8 +675,8 @@ bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
"Incorrect save or restore point due to dominance relations");
assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
"Unexpected save or restore point in a loop");
- assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() &&
- EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+ assert((EntryFreq >= MBFI->getBlockFreq(Save) &&
+ EntryFreq >= MBFI->getBlockFreq(Restore)) &&
"Incorrect save or restore point based on block frequency");
return true;
}
@@ -878,21 +878,21 @@ bool ShrinkWrap::performShrinkWrapping(
return false;
}
- LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
- << '\n');
+ LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: "
+ << EntryFreq.getFrequency() << '\n');
const TargetFrameLowering *TFI =
MachineFunc->getSubtarget().getFrameLowering();
do {
LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
<< printMBBReference(*Save) << ' '
- << MBFI->getBlockFreq(Save).getFrequency()
+ << printBlockFreq(*MBFI, *Save)
<< "\nRestore: " << printMBBReference(*Restore) << ' '
- << MBFI->getBlockFreq(Restore).getFrequency() << '\n');
+ << printBlockFreq(*MBFI, *Restore) << '\n');
bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
- if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) &&
- EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+ if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save)) &&
+ EntryFreq >= MBFI->getBlockFreq(Restore)) &&
((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&
TFI->canUseAsEpilogue(*Restore)))
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index d09953e76a80..515b5764a094 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/SjLjEHPrepare.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -31,13 +32,13 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
-#define DEBUG_TYPE "sjljehprepare"
+#define DEBUG_TYPE "sjlj-eh-prepare"
STATISTIC(NumInvokes, "Number of invokes replaced");
STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
-class SjLjEHPrepare : public FunctionPass {
+class SjLjEHPrepareImpl {
IntegerType *DataTy = nullptr;
Type *doubleUnderDataTy = nullptr;
Type *doubleUnderJBufTy = nullptr;
@@ -55,16 +56,9 @@ class SjLjEHPrepare : public FunctionPass {
const TargetMachine *TM = nullptr;
public:
- static char ID; // Pass identification, replacement for typeid
- explicit SjLjEHPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {}
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {}
- StringRef getPassName() const override {
- return "SJLJ Exception Handling preparation";
- }
+ explicit SjLjEHPrepareImpl(const TargetMachine *TM = nullptr) : TM(TM) {}
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
private:
bool setupEntryBlockAndCallSites(Function &F);
@@ -74,8 +68,32 @@ private:
void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes);
void insertCallSiteStore(Instruction *I, int Number);
};
+
+class SjLjEHPrepare : public FunctionPass {
+ SjLjEHPrepareImpl Impl;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit SjLjEHPrepare(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), Impl(TM) {}
+ bool doInitialization(Module &M) override { return Impl.doInitialization(M); }
+ bool runOnFunction(Function &F) override { return Impl.runOnFunction(F); };
+
+ StringRef getPassName() const override {
+ return "SJLJ Exception Handling preparation";
+ }
+};
+
} // end anonymous namespace
+PreservedAnalyses SjLjEHPreparePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ SjLjEHPrepareImpl Impl(TM);
+ Impl.doInitialization(*F.getParent());
+ bool Changed = Impl.runOnFunction(F);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
char SjLjEHPrepare::ID = 0;
INITIALIZE_PASS(SjLjEHPrepare, DEBUG_TYPE, "Prepare SjLj exceptions",
false, false)
@@ -87,10 +105,10 @@ FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) {
// doInitialization - Set up decalarations and types needed to process
// exceptions.
-bool SjLjEHPrepare::doInitialization(Module &M) {
+bool SjLjEHPrepareImpl::doInitialization(Module &M) {
// Build the function context structure.
// builtin_setjmp uses a five word jbuf
- Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ Type *VoidPtrTy = PointerType::getUnqual(M.getContext());
unsigned DataBits =
TM ? TM->getSjLjDataSize() : TargetMachine::DefaultSjLjDataSize;
DataTy = Type::getIntNTy(M.getContext(), DataBits);
@@ -104,12 +122,12 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
doubleUnderJBufTy // __jbuf
);
- return true;
+ return false;
}
/// insertCallSiteStore - Insert a store of the call-site value to the
/// function context
-void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
+void SjLjEHPrepareImpl::insertCallSiteStore(Instruction *I, int Number) {
IRBuilder<> Builder(I);
// Get a reference to the call_site field.
@@ -140,8 +158,8 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
/// substituteLPadValues - Substitute the values returned by the landingpad
/// instruction with those returned by the personality function.
-void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
- Value *SelVal) {
+void SjLjEHPrepareImpl::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal) {
SmallVector<Value *, 8> UseWorkList(LPI->users());
while (!UseWorkList.empty()) {
Value *Val = UseWorkList.pop_back_val();
@@ -175,8 +193,9 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
/// setupFunctionContext - Allocate the function context on the stack and fill
/// it with all of the data that we know at this point.
-Value *SjLjEHPrepare::setupFunctionContext(Function &F,
- ArrayRef<LandingPadInst *> LPads) {
+Value *
+SjLjEHPrepareImpl::setupFunctionContext(Function &F,
+ ArrayRef<LandingPadInst *> LPads) {
BasicBlock *EntryBB = &F.front();
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
@@ -200,7 +219,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
0, 0, "exception_gep");
Value *ExnVal = Builder.CreateLoad(DataTy, ExceptionAddr, true, "exn_val");
- ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
+ ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getPtrTy());
Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
0, 1, "exn_selector_gep");
@@ -218,9 +237,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
Value *PersonalityFn = F.getPersonalityFn();
Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(
FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep");
- Builder.CreateStore(
- Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()),
- PersonalityFieldPtr, /*isVolatile=*/true);
+ Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true);
// LSDA address
Value *LSDA = Builder.CreateCall(LSDAAddrFn, {}, "lsda_addr");
@@ -235,7 +252,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
/// specially, we lower each arg to a copy instruction in the entry block. This
/// ensures that the argument value itself cannot be live out of the entry
/// block.
-void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
+void SjLjEHPrepareImpl::lowerIncomingArguments(Function &F) {
BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
while (isa<AllocaInst>(AfterAllocaInsPt) &&
cast<AllocaInst>(AfterAllocaInsPt)->isStaticAlloca())
@@ -266,8 +283,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
/// edge and spill them.
-void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
- ArrayRef<InvokeInst *> Invokes) {
+void SjLjEHPrepareImpl::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst *> Invokes) {
// Finally, scan the code looking for instructions with bad live ranges.
for (BasicBlock &BB : F) {
for (Instruction &Inst : BB) {
@@ -360,7 +377,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
-bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
+bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) {
SmallVector<ReturnInst *, 16> Returns;
SmallVector<InvokeInst *, 16> Invokes;
SmallSetVector<LandingPadInst *, 16> LPads;
@@ -418,8 +435,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
// Store a pointer to the function context so that the back-end will know
// where to look for it.
- Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
- Builder.CreateCall(FuncCtxFn, FuncCtxArg);
+ Builder.CreateCall(FuncCtxFn, FuncCtx);
// At this point, we are all set up, update the invoke instructions to mark
// their call_site values.
@@ -482,7 +498,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
return true;
}
-bool SjLjEHPrepare::runOnFunction(Function &F) {
+bool SjLjEHPrepareImpl::runOnFunction(Function &F) {
Module &M = *F.getParent();
RegisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
@@ -490,12 +506,15 @@ bool SjLjEHPrepare::runOnFunction(Function &F) {
UnregisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
PointerType::getUnqual(FunctionContextTy));
- FrameAddrFn = Intrinsic::getDeclaration(
- &M, Intrinsic::frameaddress,
- {Type::getInt8PtrTy(M.getContext(),
- M.getDataLayout().getAllocaAddrSpace())});
- StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
- StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+
+ PointerType *AllocaPtrTy = M.getDataLayout().getAllocaPtrType(M.getContext());
+
+ FrameAddrFn =
+ Intrinsic::getDeclaration(&M, Intrinsic::frameaddress, {AllocaPtrTy});
+ StackAddrFn =
+ Intrinsic::getDeclaration(&M, Intrinsic::stacksave, {AllocaPtrTy});
+ StackRestoreFn =
+ Intrinsic::getDeclaration(&M, Intrinsic::stackrestore, {AllocaPtrTy});
BuiltinSetupDispatchFn =
Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch);
LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
index 47ee36971d0e..65726f06dedb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -237,6 +237,11 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
}
}
+void SlotIndexes::packIndexes() {
+ for (auto [Index, Entry] : enumerate(indexList))
+ Entry.setIndex(Index * SlotIndex::InstrDist);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SlotIndexes::dump() const {
for (const IndexListEntry &ILE : indexList) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
index 91da5e49713c..cdb8099e354b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -32,7 +32,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -52,7 +51,6 @@ char &llvm::SpillPlacementID = SpillPlacement::ID;
INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
@@ -60,7 +58,6 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequiredTransitive<EdgeBundles>();
- AU.addRequiredTransitive<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -109,8 +106,10 @@ struct SpillPlacement::Node {
/// clear - Reset per-query data, but preserve frequencies that only depend on
/// the CFG.
- void clear(const BlockFrequency &Threshold) {
- BiasN = BiasP = Value = 0;
+ void clear(BlockFrequency Threshold) {
+ BiasN = BlockFrequency(0);
+ BiasP = BlockFrequency(0);
+ Value = 0;
SumLinkWeights = Threshold;
Links.clear();
}
@@ -142,14 +141,14 @@ struct SpillPlacement::Node {
BiasN += freq;
break;
case MustSpill:
- BiasN = BlockFrequency::getMaxFrequency();
+ BiasN = BlockFrequency::max();
break;
}
}
/// update - Recompute Value from Bias and Links. Return true when node
/// preference changes.
- bool update(const Node nodes[], const BlockFrequency &Threshold) {
+ bool update(const Node nodes[], BlockFrequency Threshold) {
// Compute the weighted sum of inputs.
BlockFrequency SumN = BiasN;
BlockFrequency SumP = BiasP;
@@ -193,7 +192,6 @@ struct SpillPlacement::Node {
bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
bundles = &getAnalysis<EdgeBundles>();
- loops = &getAnalysis<MachineLoopInfo>();
assert(!nodes && "Leaking node array");
nodes = new Node[bundles->getNumBundles()];
@@ -237,8 +235,10 @@ void SpillPlacement::activate(unsigned n) {
// limiting the number of blocks visited and the number of links in the
// Hopfield network.
if (bundles->getBlocks(n).size() > 100) {
- nodes[n].BiasP = 0;
- nodes[n].BiasN = (MBFI->getEntryFreq() / 16);
+ nodes[n].BiasP = BlockFrequency(0);
+ BlockFrequency BiasN = MBFI->getEntryFreq();
+ BiasN >>= 4;
+ nodes[n].BiasN = BiasN;
}
}
@@ -247,12 +247,12 @@ void SpillPlacement::activate(unsigned n) {
/// Set the threshold relative to \c Entry. Since the threshold is used as a
/// bound on the open interval (-Threshold;Threshold), 1 is the minimum
/// threshold.
-void SpillPlacement::setThreshold(const BlockFrequency &Entry) {
+void SpillPlacement::setThreshold(BlockFrequency Entry) {
// Apparently 2 is a good threshold when Entry==2^14, but we need to scale
// it. Divide by 2^13, rounding as appropriate.
uint64_t Freq = Entry.getFrequency();
uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12));
- Threshold = std::max(UINT64_C(1), Scaled);
+ Threshold = BlockFrequency(std::max(UINT64_C(1), Scaled));
}
/// addConstraints - Compute node biases and weights from a set of constraints.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.h
index bd37d85c6c0d..5fd9b085259d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.h
@@ -38,13 +38,11 @@ class BitVector;
class EdgeBundles;
class MachineBlockFrequencyInfo;
class MachineFunction;
-class MachineLoopInfo;
class SpillPlacement : public MachineFunctionPass {
struct Node;
const MachineFunction *MF = nullptr;
const EdgeBundles *bundles = nullptr;
- const MachineLoopInfo *loops = nullptr;
const MachineBlockFrequencyInfo *MBFI = nullptr;
Node *nodes = nullptr;
@@ -162,7 +160,7 @@ private:
void releaseMemory() override;
void activate(unsigned n);
- void setThreshold(const BlockFrequency &Entry);
+ void setThreshold(BlockFrequency Entry);
bool update(unsigned n);
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
index eee54f09fbad..d6c0a782465e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
@@ -45,6 +45,11 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
+static cl::opt<bool>
+ EnableLoopIVHeuristic("enable-split-loopiv-heuristic",
+ cl::desc("Enable loop iv regalloc heuristic"),
+ cl::init(true));
+
STATISTIC(NumFinished, "Number of splits finished");
STATISTIC(NumSimple, "Number of splits that were simple");
STATISTIC(NumCopies, "Number of copies inserted for splitting");
@@ -126,7 +131,6 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
// If the value leaving MBB was defined after the call in MBB, it can't
// really be live-in to the landing pad. This can happen if the landing pad
// has a PHI, and this register is undef on the exceptional edge.
- // <rdar://problem/10664933>
if (!SlotIndex::isEarlierInstr(VNI->def, LIP.second) && VNI->def < MBBEnd)
return LIP.first;
@@ -294,6 +298,13 @@ void SplitAnalysis::calcLiveBlockInfo() {
MFI = LIS.getMBBFromIndex(LVI->start)->getIterator();
}
+ LooksLikeLoopIV = EnableLoopIVHeuristic && UseBlocks.size() == 2 &&
+ any_of(UseBlocks, [this](BlockInfo &BI) {
+ MachineLoop *L = Loops.getLoopFor(BI.MBB);
+ return BI.LiveIn && BI.LiveOut && BI.FirstDef && L &&
+ L->isLoopLatch(BI.MBB);
+ });
+
assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
}
@@ -514,10 +525,10 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) {
VFP = ValueForcePair(nullptr, true);
}
-SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
- unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
- const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+SlotIndex SplitEditor::buildSingleSubRegCopy(
+ Register FromReg, Register ToReg, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, unsigned SubIdx,
+ LiveInterval &DestLI, bool Late, SlotIndex Def, const MCInstrDesc &Desc) {
bool FirstCopy = !Def.isValid();
MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc)
.addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy)
@@ -536,7 +547,8 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
LaneBitmask LaneMask, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
- const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ const MCInstrDesc &Desc =
+ TII.get(TII.getLiveRangeSplitOpcode(FromReg, *MBB.getParent()));
SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
// The full vreg is copied.
@@ -564,7 +576,7 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
SlotIndex Def;
for (unsigned BestIdx : SubIndexes) {
Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
- DestLI, Late, Def);
+ DestLI, Late, Def, Desc);
}
BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
@@ -795,8 +807,10 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
return Start;
}
- VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
- MBB.SkipPHIsLabelsAndDebug(MBB.begin()));
+ unsigned RegIdx = 0;
+ Register Reg = LIS.getInterval(Edit->get(RegIdx)).reg();
+ VNInfo *VNI = defFromParent(RegIdx, ParentVNI, Start, MBB,
+ MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg));
RegAssign.insert(Start, VNI->def, OpenIdx);
LLVM_DEBUG(dump());
return VNI->def;
@@ -1584,7 +1598,9 @@ bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
if (BI.LiveIn && BI.LiveOut)
return true;
// No point in isolating a copy. It has no register class constraints.
- if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike())
+ MachineInstr *MI = LIS.getInstructionFromIndex(BI.FirstInstr);
+ bool copyLike = TII.isCopyInstr(*MI) || MI->isSubregToReg();
+ if (copyLike)
return false;
// Finally, don't isolate an end point that was created by earlier splits.
return isOriginalEndpoint(BI.FirstInstr);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
index f764ffd4750c..cc277ecc0e88 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
@@ -159,6 +159,10 @@ private:
/// NumThroughBlocks - Number of live-through blocks.
unsigned NumThroughBlocks = 0u;
+ /// LooksLikeLoopIV - The variable defines what looks like it could be a loop
+ /// IV, where it defs a variable in the latch.
+ bool LooksLikeLoopIV = false;
+
// Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
@@ -209,6 +213,8 @@ public:
return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks();
}
+ bool looksLikeLoopIV() const { return LooksLikeLoopIV; }
+
/// countLiveBlocks - Return the number of blocks where li is live. This is
/// guaranteed to return the same number as getNumLiveBlocks() after calling
/// analyze(li).
@@ -428,8 +434,11 @@ private:
bool Late, unsigned RegIdx);
SlotIndex buildSingleSubRegCopy(Register FromReg, Register ToReg,
- MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
- unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def);
+ MachineBasicBlock &MB,
+ MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI,
+ bool Late, SlotIndex Def,
+ const MCInstrDesc &Desc);
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
index 3453b718b453..37f7aa929005 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This pass implements the stack-coloring optimization that looks for
-// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
+// lifetime markers machine instructions (LIFETIME_START and LIFETIME_END),
// which represent the possible lifetime of stack slots. It attempts to
// merge disjoint stack slots and reduce the used stack space.
// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
@@ -1338,8 +1339,10 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Scan the entire function and update all machine operands that use frame
// indices to use the remapped frame index.
- expungeSlotMap(SlotRemap, NumSlots);
- remapInstructions(SlotRemap);
+ if (!SlotRemap.empty()) {
+ expungeSlotMap(SlotRemap, NumSlots);
+ remapInstructions(SlotRemap);
+ }
return removeAllMarkers();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index 387b653f8815..48dc7cb232e3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -178,8 +178,7 @@ static bool HasAddressTaken(const Instruction *AI, TypeSize AllocSize,
// the bounds of the allocated object.
std::optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
if (MemLoc && MemLoc->Size.hasValue() &&
- !TypeSize::isKnownGE(AllocSize,
- TypeSize::getFixed(MemLoc->Size.getValue())))
+ !TypeSize::isKnownGE(AllocSize, MemLoc->Size.getValue()))
return true;
switch (I->getOpcode()) {
case Instruction::Store:
@@ -216,14 +215,14 @@ static bool HasAddressTaken(const Instruction *AI, TypeSize AllocSize,
APInt Offset(IndexSize, 0);
if (!GEP->accumulateConstantOffset(DL, Offset))
return true;
- TypeSize OffsetSize = TypeSize::Fixed(Offset.getLimitedValue());
+ TypeSize OffsetSize = TypeSize::getFixed(Offset.getLimitedValue());
if (!TypeSize::isKnownGT(AllocSize, OffsetSize))
return true;
// Adjust AllocSize to be the space remaining after this offset.
// We can't subtract a fixed size from a scalable one, so in that case
// assume the scalable value is of minimum size.
TypeSize NewAllocSize =
- TypeSize::Fixed(AllocSize.getKnownMinValue()) - OffsetSize;
+ TypeSize::getFixed(AllocSize.getKnownMinValue()) - OffsetSize;
if (HasAddressTaken(I, NewAllocSize, M, VisitedPHIs))
return true;
break;
@@ -419,7 +418,7 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
Value *Guard = TLI->getIRStackGuard(B);
StringRef GuardMode = M->getStackProtectorGuard();
if ((GuardMode == "tls" || GuardMode.empty()) && Guard)
- return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard");
+ return B.CreateLoad(B.getPtrTy(), Guard, true, "StackGuard");
// Use SelectionDAG SSP handling, since there isn't an IR guard.
//
@@ -452,7 +451,7 @@ static bool CreatePrologue(Function *F, Module *M, Instruction *CheckLoc,
const TargetLoweringBase *TLI, AllocaInst *&AI) {
bool SupportsSelectionDAGSP = false;
IRBuilder<> B(&F->getEntryBlock().front());
- PointerType *PtrTy = Type::getInt8PtrTy(CheckLoc->getContext());
+ PointerType *PtrTy = PointerType::getUnqual(CheckLoc->getContext());
AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP);
@@ -540,7 +539,7 @@ bool StackProtector::InsertStackProtectors() {
// Generate the function-based epilogue instrumentation.
// The target provides a guard check function, generate a call to it.
IRBuilder<> B(CheckLoc);
- LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
+ LoadInst *Guard = B.CreateLoad(B.getPtrTy(), AI, true, "Guard");
CallInst *Call = B.CreateCall(GuardCheck, {Guard});
Call->setAttributes(GuardCheck->getAttributes());
Call->setCallingConv(GuardCheck->getCallingConv());
@@ -579,7 +578,7 @@ bool StackProtector::InsertStackProtectors() {
IRBuilder<> B(CheckLoc);
Value *Guard = getStackGuard(TLI, M, B);
- LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);
+ LoadInst *LI2 = B.CreateLoad(B.getPtrTy(), AI, true);
auto *Cmp = cast<ICmpInst>(B.CreateICmpNE(Guard, LI2));
auto SuccessProb =
BranchProbabilityInfo::getBranchProbStackProtector(true);
@@ -623,7 +622,7 @@ BasicBlock *StackProtector::CreateFailBB() {
if (Trip.isOSOpenBSD()) {
StackChkFail = M->getOrInsertFunction("__stack_smash_handler",
Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context));
+ PointerType::getUnqual(Context));
Args.push_back(B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
StackChkFail =
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index 6d933ab12041..c180f4d8f036 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -523,8 +524,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
// If there are calls to setjmp or sigsetjmp, don't perform stack slot
// coloring. The stack could be modified before the longjmp is executed,
- // resulting in the wrong value being used afterwards. (See
- // <rdar://problem/8007500>.)
+ // resulting in the wrong value being used afterwards.
if (MF.exposesReturnsTwice())
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
index 83a7063de112..74a94d6110f4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -253,6 +253,25 @@ void SwiftErrorValueTracking::propagateVRegs() {
setCurrentVReg(MBB, SwiftErrorVal, PHIVReg);
}
}
+
+ // Create implicit defs for upward uses from unreachable blocks
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (const auto &Use : VRegUpwardsUse) {
+ const MachineBasicBlock *UseBB = Use.first.first;
+ Register VReg = Use.second;
+ if (!MRI.def_begin(VReg).atEnd())
+ continue;
+
+#ifdef EXPENSIVE_CHECKS
+ assert(std::find(RPOT.begin(), RPOT.end(), UseBB) == RPOT.end() &&
+ "Reachable block has VReg upward use without definition.");
+#endif
+
+ MachineBasicBlock *UseBBMut = MF->getBlockNumbered(UseBB->getNumber());
+
+ BuildMI(*UseBBMut, UseBBMut->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ }
}
void SwiftErrorValueTracking::preassignVRegs(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 36a02d5beb4b..7982d80353bd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -45,6 +45,7 @@ SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
const SwitchInst *SI,
+ std::optional<SDLoc> SL,
MachineBasicBlock *DefaultMBB,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) {
@@ -87,7 +88,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
// Cheap case: the whole range may be suitable for jump table.
if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) {
CaseCluster JTCluster;
- if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
+ if (buildJumpTable(Clusters, 0, N - 1, SI, SL, DefaultMBB, JTCluster)) {
Clusters[0] = JTCluster;
Clusters.resize(1);
return;
@@ -95,7 +96,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
}
// The algorithm below is not suitable for -O0.
- if (TM->getOptLevel() == CodeGenOpt::None)
+ if (TM->getOptLevel() == CodeGenOptLevel::None)
return;
// Split Clusters into minimum number of dense partitions. The algorithm uses
@@ -177,7 +178,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
CaseCluster JTCluster;
if (NumClusters >= MinJumpTableEntries &&
- buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
+ buildJumpTable(Clusters, First, Last, SI, SL, DefaultMBB, JTCluster)) {
Clusters[DstIndex++] = JTCluster;
} else {
for (unsigned I = First; I <= Last; ++I)
@@ -190,6 +191,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
unsigned First, unsigned Last,
const SwitchInst *SI,
+ const std::optional<SDLoc> &SL,
MachineBasicBlock *DefaultMBB,
CaseCluster &JTCluster) {
assert(First <= Last);
@@ -251,7 +253,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
->createJumpTableIndex(Table);
// Set up the jump table info.
- JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
+ JumpTable JT(-1U, JTI, JumpTableMBB, nullptr, SL);
JumpTableHeader JTH(Clusters[First].Low->getValue(),
Clusters[Last].High->getValue(), SI->getCondition(),
nullptr, false);
@@ -278,7 +280,7 @@ void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters,
#endif
// The algorithm below is not suitable for -O0.
- if (TM->getOptLevel() == CodeGenOpt::None)
+ if (TM->getOptLevel() == CodeGenOptLevel::None)
return;
// If target does not have legal shift left, do not emit bit tests at all.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index b29404b42519..4783742a14ad 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -430,18 +431,27 @@ bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0,
return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
-MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const {
- assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated");
+MachineInstr &
+TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const MachineInstr &Orig) const {
MachineFunction &MF = *MBB.getParent();
+ // CFI instructions are marked as non-duplicable, because Darwin compact
+ // unwind info emission can't handle multiple prologue setups.
+ assert((!Orig.isNotDuplicable() ||
+ (!MF.getTarget().getTargetTriple().isOSDarwin() &&
+ Orig.isCFIInstruction())) &&
+ "Instruction cannot be duplicated");
+
return MF.cloneMachineInstrBundle(MBB, InsertBefore, Orig);
}
// If the COPY instruction in MI can be folded to a stack operation, return
// the register class to use.
static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
+ const TargetInstrInfo &TII,
unsigned FoldIdx) {
- assert(MI.isCopy() && "MI must be a COPY instruction");
+ assert(TII.isCopyInstr(MI) && "MI must be a COPY instruction");
if (MI.getNumOperands() != 2)
return nullptr;
assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
@@ -555,6 +565,72 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
return NewMI;
}
+static void foldInlineAsmMemOperand(MachineInstr *MI, unsigned OpNo, int FI,
+ const TargetInstrInfo &TII) {
+ // If the machine operand is tied, untie it first.
+ if (MI->getOperand(OpNo).isTied()) {
+ unsigned TiedTo = MI->findTiedOperandIdx(OpNo);
+ MI->untieRegOperand(OpNo);
+ // Intentional recursion!
+ foldInlineAsmMemOperand(MI, TiedTo, FI, TII);
+ }
+
+ SmallVector<MachineOperand, 5> NewOps;
+ TII.getFrameIndexOperands(NewOps, FI);
+ assert(!NewOps.empty() && "getFrameIndexOperands didn't create any operands");
+ MI->removeOperand(OpNo);
+ MI->insert(MI->operands_begin() + OpNo, NewOps);
+
+ // Change the previous operand to a MemKind InlineAsm::Flag. The second param
+ // is the per-target number of operands that represent the memory operand
+ // excluding this one (MD). This includes MO.
+ InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size());
+ F.setMemConstraint(InlineAsm::ConstraintCode::m);
+ MachineOperand &MD = MI->getOperand(OpNo - 1);
+ MD.setImm(F);
+}
+
+// Returns nullptr if not possible to fold.
+static MachineInstr *foldInlineAsmMemOperand(MachineInstr &MI,
+ ArrayRef<unsigned> Ops, int FI,
+ const TargetInstrInfo &TII) {
+ assert(MI.isInlineAsm() && "wrong opcode");
+ if (Ops.size() > 1)
+ return nullptr;
+ unsigned Op = Ops[0];
+ assert(Op && "should never be first operand");
+ assert(MI.getOperand(Op).isReg() && "shouldn't be folding non-reg operands");
+
+ if (!MI.mayFoldInlineAsmRegOp(Op))
+ return nullptr;
+
+ MachineInstr &NewMI = TII.duplicate(*MI.getParent(), MI.getIterator(), MI);
+
+ foldInlineAsmMemOperand(&NewMI, Op, FI, TII);
+
+ // Update mayload/maystore metadata, and memoperands.
+ const VirtRegInfo &RI =
+ AnalyzeVirtRegInBundle(MI, MI.getOperand(Op).getReg());
+ MachineOperand &ExtraMO = NewMI.getOperand(InlineAsm::MIOp_ExtraInfo);
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
+ if (RI.Reads) {
+ ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayLoad);
+ Flags |= MachineMemOperand::MOLoad;
+ }
+ if (RI.Writes) {
+ ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayStore);
+ Flags |= MachineMemOperand::MOStore;
+ }
+ MachineFunction *MF = NewMI.getMF();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlign(FI));
+ NewMI.addMemOperand(*MF, MMO);
+
+ return &NewMI;
+}
+
MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
ArrayRef<unsigned> Ops, int FI,
LiveIntervals *LIS,
@@ -602,6 +678,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
if (NewMI)
MBB->insert(MI, NewMI);
+ } else if (MI.isInlineAsm()) {
+ return foldInlineAsmMemOperand(MI, Ops, FI, *this);
} else {
// Ask the target to do the actual folding.
NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
@@ -630,10 +708,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
}
// Straight COPY may fold as load/store.
- if (!MI.isCopy() || Ops.size() != 1)
+ if (!isCopyInstr(MI) || Ops.size() != 1)
return nullptr;
- const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ const TargetRegisterClass *RC = canFoldCopy(MI, *this, Ops[0]);
if (!RC)
return nullptr;
@@ -673,6 +751,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
if (NewMI)
NewMI = &*MBB.insert(MI, NewMI);
+ } else if (MI.isInlineAsm() && isLoadFromStackSlot(LoadMI, FrameIndex)) {
+ return foldInlineAsmMemOperand(MI, Ops, FrameIndex, *this);
} else {
// Ask the target to do the actual folding.
NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
@@ -748,7 +828,6 @@ void TargetInstrInfo::lowerCopy(MachineInstr *MI,
if (MI->getNumOperands() > 2)
transferImplicitOperands(MI, TRI);
MI->eraseFromParent();
- return;
}
bool TargetInstrInfo::hasReassociableOperands(
@@ -1051,8 +1130,7 @@ void TargetInstrInfo::reassociateOps(
MachineInstrBuilder MIB1 =
BuildMI(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR)
.addReg(RegX, getKillRegState(KillX))
- .addReg(RegY, getKillRegState(KillY))
- .setMIFlags(Prev.getFlags());
+ .addReg(RegY, getKillRegState(KillY));
if (SwapRootOperands) {
std::swap(RegA, NewVR);
@@ -1062,8 +1140,21 @@ void TargetInstrInfo::reassociateOps(
MachineInstrBuilder MIB2 =
BuildMI(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC)
.addReg(RegA, getKillRegState(KillA))
- .addReg(NewVR, getKillRegState(KillNewVR))
- .setMIFlags(Root.getFlags());
+ .addReg(NewVR, getKillRegState(KillNewVR));
+
+ // Propagate FP flags from the original instructions.
+ // But clear poison-generating flags because those may not be valid now.
+ // TODO: There should be a helper function for copying only fast-math-flags.
+ uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
+ MIB1->setFlags(IntersectedFlags);
+ MIB1->clearFlag(MachineInstr::MIFlag::NoSWrap);
+ MIB1->clearFlag(MachineInstr::MIFlag::NoUWrap);
+ MIB1->clearFlag(MachineInstr::MIFlag::IsExact);
+
+ MIB2->setFlags(IntersectedFlags);
+ MIB2->clearFlag(MachineInstr::MIFlag::NoSWrap);
+ MIB2->clearFlag(MachineInstr::MIFlag::NoUWrap);
+ MIB2->clearFlag(MachineInstr::MIFlag::IsExact);
setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
@@ -1118,7 +1209,7 @@ MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const {
return MachineTraceStrategy::TS_MinInstrCount;
}
-bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
+bool TargetInstrInfo::isReallyTriviallyReMaterializable(
const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getMF();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1287,15 +1378,15 @@ bool TargetInstrInfo::getMemOperandWithOffset(
// SelectionDAG latency interface.
//===----------------------------------------------------------------------===//
-int
+std::optional<unsigned>
TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const {
if (!ItinData || ItinData->isEmpty())
- return -1;
+ return std::nullopt;
if (!DefNode->isMachineOpcode())
- return -1;
+ return std::nullopt;
unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
if (!UseNode->isMachineOpcode())
@@ -1304,8 +1395,8 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
-int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- SDNode *N) const {
+unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
if (!ItinData || ItinData->isEmpty())
return 1;
@@ -1369,8 +1460,29 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
return false;
unsigned DefClass = DefMI.getDesc().getSchedClass();
- int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
- return (DefCycle != -1 && DefCycle <= 1);
+ std::optional<unsigned> DefCycle =
+ ItinData->getOperandCycle(DefClass, DefIdx);
+ return DefCycle && DefCycle <= 1U;
+}
+
+bool TargetInstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
+ // TODO: We don't split functions where a section attribute has been set
+ // since the split part may not be placed in a contiguous region. It may also
+ // be more beneficial to augment the linker to ensure contiguous layout of
+ // split functions within the same section as specified by the attribute.
+ if (MF.getFunction().hasSection() ||
+ MF.getFunction().hasFnAttribute("implicit-section-name"))
+ return false;
+
+ // We don't want to proceed further for cold functions
+ // or functions of unknown hotness. Lukewarm functions have no prefix.
+ std::optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
+ if (SectionPrefix &&
+ (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) {
+ return false;
+ }
+
+ return true;
}
std::optional<ParamLoadedValue>
@@ -1450,13 +1562,27 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
return std::nullopt;
}
+// Get the call frame size just before MI.
+unsigned TargetInstrInfo::getCallFrameSizeAt(MachineInstr &MI) const {
+ // Search backwards from MI for the most recent call frame instruction.
+ MachineBasicBlock *MBB = MI.getParent();
+ for (auto &AdjI : reverse(make_range(MBB->instr_begin(), MI.getIterator()))) {
+ if (AdjI.getOpcode() == getCallFrameSetupOpcode())
+ return getFrameTotalSize(AdjI);
+ if (AdjI.getOpcode() == getCallFrameDestroyOpcode())
+ return 0;
+ }
+
+ // If none was found, use the call frame size from the start of the basic
+ // block.
+ return MBB->getCallFrameSize();
+}
+
/// Both DefMI and UseMI must be valid. By default, call directly to the
/// itinerary. This may be overriden by the target.
-int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI,
- unsigned DefIdx,
- const MachineInstr &UseMI,
- unsigned UseIdx) const {
+std::optional<unsigned> TargetInstrInfo::getOperandLatency(
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI,
+ unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
unsigned DefClass = DefMI.getDesc().getSchedClass();
unsigned UseClass = UseMI.getDesc().getSchedClass();
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
@@ -1574,27 +1700,30 @@ std::string TargetInstrInfo::createMIROperandComment(
assert(Op.isImm() && "Expected flag operand to be an immediate");
// Pretty print the inline asm operand descriptor.
unsigned Flag = Op.getImm();
- unsigned Kind = InlineAsm::getKind(Flag);
- OS << InlineAsm::getKindName(Kind);
+ const InlineAsm::Flag F(Flag);
+ OS << F.getKindName();
- unsigned RCID = 0;
- if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
- InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ unsigned RCID;
+ if (!F.isImmKind() && !F.isMemKind() && F.hasRegClassConstraint(RCID)) {
if (TRI) {
OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
} else
OS << ":RC" << RCID;
}
- if (InlineAsm::isMemKind(Flag)) {
- unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
+ if (F.isMemKind()) {
+ InlineAsm::ConstraintCode MCID = F.getMemoryConstraintID();
OS << ":" << InlineAsm::getMemConstraintName(MCID);
}
- unsigned TiedTo = 0;
- if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ unsigned TiedTo;
+ if (F.isUseOperandTiedToDef(TiedTo))
OS << " tiedto:$" << TiedTo;
+ if ((F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isRegUseKind()) &&
+ F.getRegMayBeFolded())
+ OS << " foldable";
+
return OS.str();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 10c54560da5a..626bdf683441 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -867,19 +867,18 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
// These library functions default to expand.
- setOperationAction(
- {ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP},
- VT, Expand);
+ setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT,
+ Expand);
// These operations default to expand for vector types.
if (VT.isVector())
- setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
- ISD::ANY_EXTEND_VECTOR_INREG,
- ISD::SIGN_EXTEND_VECTOR_INREG,
- ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR},
- VT, Expand);
+ setOperationAction(
+ {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
+ ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
+ ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT},
+ VT, Expand);
- // Constrained floating-point operations default to expand.
+ // Constrained floating-point operations default to expand.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
setOperationAction(ISD::STRICT_##DAGN, VT, Expand);
#include "llvm/IR/ConstrainedOps.def"
@@ -926,9 +925,9 @@ void TargetLoweringBase::initActions() {
// These library functions default to expand.
setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
- ISD::FEXP2, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
- ISD::FRINT, ISD::FTRUNC, ISD::LROUND, ISD::LLROUND,
- ISD::LRINT, ISD::LLRINT},
+ ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
+ ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
@@ -942,6 +941,12 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Expand);
setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Expand);
+
+ for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+ setOperationAction(ISD::GET_FPMODE, VT, Expand);
+ setOperationAction(ISD::SET_FPMODE, VT, Expand);
+ }
+ setOperationAction(ISD::RESET_FPMODE, MVT::Other, Expand);
}
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
@@ -1905,7 +1910,7 @@ TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
auto UnsafeStackPtr =
dyn_cast_or_null<GlobalVariable>(M->getNamedValue(UnsafeStackPtrVar));
- Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+ Type *StackPtrTy = PointerType::getUnqual(M->getContext());
if (!UnsafeStackPtr) {
auto TLSModel = UseTLS ?
@@ -1936,9 +1941,9 @@ TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
// Android provides a libc function to retrieve the address of the current
// thread's unsafe stack pointer.
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
- FunctionCallee Fn = M->getOrInsertFunction("__safestack_pointer_address",
- StackPtrTy->getPointerTo(0));
+ auto *PtrTy = PointerType::getUnqual(M->getContext());
+ FunctionCallee Fn =
+ M->getOrInsertFunction("__safestack_pointer_address", PtrTy);
return IRB.CreateCall(Fn);
}
@@ -1992,7 +1997,7 @@ bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
Value *TargetLoweringBase::getIRStackGuard(IRBuilderBase &IRB) const {
if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
- PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
+ PointerType *PtrTy = PointerType::getUnqual(M.getContext());
Constant *C = M.getOrInsertGlobal("__guard_local", PtrTy);
if (GlobalVariable *G = dyn_cast_or_null<GlobalVariable>(C))
G->setVisibility(GlobalValue::HiddenVisibility);
@@ -2005,15 +2010,16 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilderBase &IRB) const {
// TODO: add LOAD_STACK_GUARD support.
void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
if (!M.getNamedValue("__stack_chk_guard")) {
- auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
- GlobalVariable::ExternalLinkage, nullptr,
- "__stack_chk_guard");
+ auto *GV = new GlobalVariable(M, PointerType::getUnqual(M.getContext()),
+ false, GlobalVariable::ExternalLinkage,
+ nullptr, "__stack_chk_guard");
// FreeBSD has "__stack_chk_guard" defined externally on libc.so
if (M.getDirectAccessExternalData() &&
!TM.getTargetTriple().isWindowsGNUEnvironment() &&
!(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD()) &&
- !TM.getTargetTriple().isOSDarwin())
+ (!TM.getTargetTriple().isOSDarwin() ||
+ TM.getRelocationModel() == Reloc::Static))
GV->setDSOLocal(true);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 4ffffd85ee53..9a0dd92bb58e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -472,32 +472,31 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
/*AddSegmentInfo=*/false) ||
Name == getInstrProfSectionName(IPSK_covfun, Triple::ELF,
/*AddSegmentInfo=*/false) ||
+ Name == getInstrProfSectionName(IPSK_covdata, Triple::ELF,
+ /*AddSegmentInfo=*/false) ||
+ Name == getInstrProfSectionName(IPSK_covname, Triple::ELF,
+ /*AddSegmentInfo=*/false) ||
Name == ".llvmbc" || Name == ".llvmcmd")
return SectionKind::getMetadata();
if (Name.empty() || Name[0] != '.') return K;
// Default implementation based on some magic section names.
- if (Name == ".bss" ||
- Name.startswith(".bss.") ||
- Name.startswith(".gnu.linkonce.b.") ||
- Name.startswith(".llvm.linkonce.b.") ||
- Name == ".sbss" ||
- Name.startswith(".sbss.") ||
- Name.startswith(".gnu.linkonce.sb.") ||
- Name.startswith(".llvm.linkonce.sb."))
+ if (Name == ".bss" || Name.starts_with(".bss.") ||
+ Name.starts_with(".gnu.linkonce.b.") ||
+ Name.starts_with(".llvm.linkonce.b.") || Name == ".sbss" ||
+ Name.starts_with(".sbss.") || Name.starts_with(".gnu.linkonce.sb.") ||
+ Name.starts_with(".llvm.linkonce.sb."))
return SectionKind::getBSS();
- if (Name == ".tdata" ||
- Name.startswith(".tdata.") ||
- Name.startswith(".gnu.linkonce.td.") ||
- Name.startswith(".llvm.linkonce.td."))
+ if (Name == ".tdata" || Name.starts_with(".tdata.") ||
+ Name.starts_with(".gnu.linkonce.td.") ||
+ Name.starts_with(".llvm.linkonce.td."))
return SectionKind::getThreadData();
- if (Name == ".tbss" ||
- Name.startswith(".tbss.") ||
- Name.startswith(".gnu.linkonce.tb.") ||
- Name.startswith(".llvm.linkonce.tb."))
+ if (Name == ".tbss" || Name.starts_with(".tbss.") ||
+ Name.starts_with(".gnu.linkonce.tb.") ||
+ Name.starts_with(".llvm.linkonce.tb."))
return SectionKind::getThreadBSS();
return K;
@@ -512,7 +511,7 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
// Use SHT_NOTE for section whose name starts with ".note" to allow
// emitting ELF notes from C variable declaration.
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77609
- if (Name.startswith(".note"))
+ if (Name.starts_with(".note"))
return ELF::SHT_NOTE;
if (hasPrefix(Name, ".init_array"))
@@ -616,7 +615,7 @@ static unsigned getEntrySizeForKind(SectionKind Kind) {
/// DataSections.
static StringRef getSectionPrefixForGlobal(SectionKind Kind, bool IsLarge) {
if (Kind.isText())
- return ".text";
+ return IsLarge ? ".ltext" : ".text";
if (Kind.isReadOnly())
return IsLarge ? ".lrodata" : ".rodata";
if (Kind.isBSS())
@@ -650,10 +649,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
Name = ".rodata.cst";
Name += utostr(EntrySize);
} else {
- bool IsLarge = false;
- if (isa<GlobalVariable>(GO))
- IsLarge = TM.isLargeData();
- Name = getSectionPrefixForGlobal(Kind, IsLarge);
+ Name = getSectionPrefixForGlobal(Kind, TM.isLargeGlobalValue(GO));
}
bool HasPrefix = false;
@@ -755,7 +751,7 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
getELFSectionNameForGlobal(GO, Kind, Mang, TM, EntrySize, false);
if (SymbolMergeable &&
Ctx.isELFImplicitMergeableSectionNamePrefix(SectionName) &&
- SectionName.startswith(ImplicitSectionNameStem))
+ SectionName.starts_with(ImplicitSectionNameStem))
return MCContext::GenericSectionID;
// We have seen this section name before, but with different flags or entity
@@ -763,6 +759,21 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
return NextUniqueID++;
}
+static std::tuple<StringRef, bool, unsigned>
+getGlobalObjectInfo(const GlobalObject *GO, const TargetMachine &TM) {
+ StringRef Group = "";
+ bool IsComdat = false;
+ unsigned Flags = 0;
+ if (const Comdat *C = getELFComdat(GO)) {
+ Flags |= ELF::SHF_GROUP;
+ Group = C->getName();
+ IsComdat = C->getSelectionKind() == Comdat::Any;
+ }
+ if (TM.isLargeGlobalValue(GO))
+ Flags |= ELF::SHF_X86_64_LARGE;
+ return {Group, IsComdat, Flags};
+}
+
static MCSection *selectExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM,
MCContext &Ctx, Mangler &Mang, unsigned &NextUniqueID,
@@ -793,14 +804,9 @@ static MCSection *selectExplicitSectionGlobal(
// Infer section flags from the section name if we can.
Kind = getELFKindForNamedSection(SectionName, Kind);
- StringRef Group = "";
- bool IsComdat = false;
unsigned Flags = getELFSectionFlags(Kind);
- if (const Comdat *C = getELFComdat(GO)) {
- Group = C->getName();
- IsComdat = C->getSelectionKind() == Comdat::Any;
- Flags |= ELF::SHF_GROUP;
- }
+ auto [Group, IsComdat, ExtraFlags] = getGlobalObjectInfo(GO, TM);
+ Flags |= ExtraFlags;
unsigned EntrySize = getEntrySizeForKind(Kind);
const unsigned UniqueID = calcUniqueIDUpdateFlagsAndSize(
@@ -848,19 +854,8 @@ static MCSectionELF *selectELFSectionForGlobal(
const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) {
- StringRef Group = "";
- bool IsComdat = false;
- if (const Comdat *C = getELFComdat(GO)) {
- Flags |= ELF::SHF_GROUP;
- Group = C->getName();
- IsComdat = C->getSelectionKind() == Comdat::Any;
- }
- if (isa<GlobalVariable>(GO) && !cast<GlobalVariable>(GO)->isThreadLocal()) {
- if (TM.isLargeData()) {
- assert(TM.getTargetTriple().getArch() == Triple::x86_64);
- Flags |= ELF::SHF_X86_64_LARGE;
- }
- }
+ auto [Group, IsComdat, ExtraFlags] = getGlobalObjectInfo(GO, TM);
+ Flags |= ExtraFlags;
// Get the section entry size based on the kind.
unsigned EntrySize = getEntrySizeForKind(Kind);
@@ -1038,21 +1033,32 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
// under the .text.eh prefix. For regular sections, we either use a unique
// name, or a unique ID for the section.
SmallString<128> Name;
- if (MBB.getSectionID() == MBBSectionID::ColdSectionID) {
- Name += BBSectionsColdTextPrefix;
- Name += MBB.getParent()->getName();
- } else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) {
- Name += ".text.eh.";
- Name += MBB.getParent()->getName();
- } else {
- Name += MBB.getParent()->getSection()->getName();
- if (TM.getUniqueBasicBlockSectionNames()) {
- if (!Name.endswith("."))
- Name += ".";
- Name += MBB.getSymbol()->getName();
+ StringRef FunctionSectionName = MBB.getParent()->getSection()->getName();
+ if (FunctionSectionName.equals(".text") ||
+ FunctionSectionName.starts_with(".text.")) {
+ // Function is in a regular .text section.
+ StringRef FunctionName = MBB.getParent()->getName();
+ if (MBB.getSectionID() == MBBSectionID::ColdSectionID) {
+ Name += BBSectionsColdTextPrefix;
+ Name += FunctionName;
+ } else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) {
+ Name += ".text.eh.";
+ Name += FunctionName;
} else {
- UniqueID = NextUniqueID++;
+ Name += FunctionSectionName;
+ if (TM.getUniqueBasicBlockSectionNames()) {
+ if (!Name.ends_with("."))
+ Name += ".";
+ Name += MBB.getSymbol()->getName();
+ } else {
+ UniqueID = NextUniqueID++;
+ }
}
+ } else {
+ // If the original function has a custom non-dot-text section, then emit
+ // all basic block sections into that section too, each with a unique id.
+ Name = FunctionSectionName;
+ UniqueID = NextUniqueID++;
}
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
@@ -2303,8 +2309,10 @@ bool TargetLoweringObjectFileXCOFF::ShouldSetSSPCanaryBitInTB(
MCSymbol *
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) {
- return MF->getMMI().getContext().getOrCreateSymbol(
+ MCSymbol *EHInfoSym = MF->getMMI().getContext().getOrCreateSymbol(
"__ehinfo." + Twine(MF->getFunctionNumber()));
+ cast<MCSymbolXCOFF>(EHInfoSym)->setEHInfo();
+ return EHInfoSym;
}
MCSymbol *
@@ -2637,12 +2645,16 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor(
MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
const MCSymbol *Sym, const TargetMachine &TM) const {
// Use TE storage-mapping class when large code model is enabled so that
- // the chance of needing -bbigtoc is decreased.
+ // the chance of needing -bbigtoc is decreased. Also, the toc-entry for
+ // EH info is never referenced directly using instructions so it can be
+ // allocated with TE storage-mapping class.
return getContext().getXCOFFSection(
cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
- XCOFF::CsectProperties(
- TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC,
- XCOFF::XTY_SD));
+ XCOFF::CsectProperties((TM.getCodeModel() == CodeModel::Large ||
+ cast<MCSymbolXCOFF>(Sym)->isEHInfo())
+ ? XCOFF::XMC_TE
+ : XCOFF::XMC_TC,
+ XCOFF::XTY_SD));
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index 98ea2f21b3c8..faa5466b69e8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -113,10 +113,9 @@ static cl::opt<bool> DisableMergeICmps("disable-mergeicmps",
cl::init(false), cl::Hidden);
static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
cl::desc("Print LLVM IR produced by the loop-reduce pass"));
-static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
- cl::desc("Print LLVM IR input to isel pass"));
-static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
- cl::desc("Dump garbage collector data"));
+static cl::opt<bool>
+ PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
static cl::opt<cl::boolOrDefault>
VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"));
@@ -250,6 +249,11 @@ static cl::opt<bool> DisableSelectOptimize(
"disable-select-optimize", cl::init(true), cl::Hidden,
cl::desc("Disable the select-optimization pass from running"));
+/// Enable garbage-collecting empty basic blocks.
+static cl::opt<bool>
+ GCEmptyBlocks("gc-empty-basic-blocks", cl::init(false), cl::Hidden,
+ cl::desc("Enable garbage-collecting empty basic blocks"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -470,6 +474,11 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_OPTION(EnableIPRA)
SET_OPTION(OptimizeRegAlloc)
SET_OPTION(VerifyMachineCode)
+ SET_OPTION(DisableAtExitBasedGlobalDtorLowering)
+ SET_OPTION(DisableExpandReductions)
+ SET_OPTION(PrintAfterISel)
+ SET_OPTION(FSProfileFile)
+ SET_OPTION(GCEmptyBlocks)
#define SET_BOOLEAN_OPTION(Option) Opt.Option = Option;
@@ -486,7 +495,11 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_BOOLEAN_OPTION(DisableSelectOptimize)
SET_BOOLEAN_OPTION(PrintLSR)
SET_BOOLEAN_OPTION(PrintISelInput)
- SET_BOOLEAN_OPTION(PrintGCInfo)
+ SET_BOOLEAN_OPTION(DebugifyAndStripAll)
+ SET_BOOLEAN_OPTION(DebugifyCheckAndStripAll)
+ SET_BOOLEAN_OPTION(DisableRAFSProfileLoader)
+ SET_BOOLEAN_OPTION(DisableCFIFixup)
+ SET_BOOLEAN_OPTION(EnableMachineFunctionSplitter)
return Opt;
}
@@ -626,7 +639,7 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
setStartStopPasses();
}
-CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
+CodeGenOptLevel TargetPassConfig::getOptLevel() const {
return TM->getOptLevel();
}
@@ -841,7 +854,7 @@ void TargetPassConfig::addIRPasses() {
if (!DisableVerify)
addPass(createVerifierPass());
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
// Basic AliasAnalysis support.
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
@@ -865,7 +878,7 @@ void TargetPassConfig::addIRPasses() {
// target lowering hook.
if (!DisableMergeICmps)
addPass(createMergeICmpsLegacyPass());
- addPass(createExpandMemCmpPass());
+ addPass(createExpandMemCmpLegacyPass());
}
// Run GC lowering passes for builtin collectors
@@ -884,13 +897,13 @@ void TargetPassConfig::addIRPasses() {
addPass(createUnreachableBlockEliminationPass());
// Prepare expensive constants for SelectionDAG.
- if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting)
+ if (getOptLevel() != CodeGenOptLevel::None && !DisableConstantHoisting)
addPass(createConstantHoistingPass());
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createReplaceWithVeclibLegacyPass());
- if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
+ if (getOptLevel() != CodeGenOptLevel::None && !DisablePartialLibcallInlining)
addPass(createPartiallyInlineLibCallsPass());
// Expand vector predication intrinsics into standard IR instructions.
@@ -908,11 +921,11 @@ void TargetPassConfig::addIRPasses() {
if (!DisableExpandReductions)
addPass(createExpandReductionsPass());
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createTLSVariableHoistPass());
// Convert conditional moves to conditional jumps when profitable.
- if (getOptLevel() != CodeGenOpt::None && !DisableSelectOptimize)
+ if (getOptLevel() != CodeGenOptLevel::None && !DisableSelectOptimize)
addPass(createSelectOptimizePass());
}
@@ -963,7 +976,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
/// Add pass to prepare the LLVM IR for code generation. This should be done
/// before exception handling preparation passes.
void TargetPassConfig::addCodeGenPrepare() {
- if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
+ if (getOptLevel() != CodeGenOptLevel::None && !DisableCGP)
addPass(createCodeGenPreparePass());
}
@@ -1007,7 +1020,8 @@ bool TargetPassConfig::addCoreISelPasses() {
(TM->Options.EnableGlobalISel &&
EnableGlobalISelOption != cl::BOU_FALSE))
Selector = SelectorType::GlobalISel;
- else if (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())
+ else if (TM->getOptLevel() == CodeGenOptLevel::None &&
+ TM->getO0WantsFastISel())
Selector = SelectorType::FastISel;
else
Selector = SelectorType::SelectionDAG;
@@ -1124,7 +1138,7 @@ void TargetPassConfig::addMachinePasses() {
AddingMachinePasses = true;
// Add passes that optimize machine instructions in SSA form.
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addMachineSSAOptimization();
} else {
// If the target requests it, assign local variables to stack slots relative
@@ -1170,7 +1184,7 @@ void TargetPassConfig::addMachinePasses() {
addPass(&FixupStatepointCallerSavedID);
// Insert prolog/epilog code. Eliminate abstract frame index references...
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addPass(&PostRAMachineSinkingID);
addPass(&ShrinkWrapID);
}
@@ -1181,8 +1195,8 @@ void TargetPassConfig::addMachinePasses() {
addPass(createPrologEpilogInserterPass());
/// Add passes that optimize machine instructions after register allocation.
- if (getOptLevel() != CodeGenOpt::None)
- addMachineLateOptimization();
+ if (getOptLevel() != CodeGenOptLevel::None)
+ addMachineLateOptimization();
// Expand pseudo instructions before second scheduling pass.
addPass(&ExpandPostRAPseudosID);
@@ -1196,7 +1210,7 @@ void TargetPassConfig::addMachinePasses() {
// Second pass scheduler.
// Let Target optionally insert this pass by itself at some other
// point.
- if (getOptLevel() != CodeGenOpt::None &&
+ if (getOptLevel() != CodeGenOptLevel::None &&
!TM->targetSchedulesPostRAScheduling()) {
if (MISchedPostRA)
addPass(&PostMachineSchedulerID);
@@ -1205,13 +1219,10 @@ void TargetPassConfig::addMachinePasses() {
}
// GC
- if (addGCPasses()) {
- if (PrintGCInfo)
- addPass(createGCInfoPrinter(dbgs()));
- }
+ addGCPasses();
// Basic block placement.
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addBlockPlacement();
// Insert before XRay Instrumentation.
@@ -1235,7 +1246,8 @@ void TargetPassConfig::addMachinePasses() {
addPass(&LiveDebugValuesID);
addPass(&MachineSanitizerBinaryMetadataID);
- if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
+ if (TM->Options.EnableMachineOutliner &&
+ getOptLevel() != CodeGenOptLevel::None &&
EnableMachineOutliner != RunOutliner::NeverOutline) {
bool RunOnAllFunctions =
(EnableMachineOutliner == RunOutliner::AlwaysOutline);
@@ -1245,6 +1257,9 @@ void TargetPassConfig::addMachinePasses() {
addPass(createMachineOutlinerPass(RunOnAllFunctions));
}
+ if (GCEmptyBlocks)
+ addPass(llvm::createGCEmptyBasicBlocksPass());
+
if (EnableFSDiscriminator)
addPass(createMIRAddFSDiscriminatorsPass(
sampleprof::FSDiscriminatorPass::PassLast));
@@ -1257,6 +1272,7 @@ void TargetPassConfig::addMachinePasses() {
if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
addPass(llvm::createBasicBlockSectionsProfileReaderPass(
TM->getBBSectionsFuncListBuf()));
+ addPass(llvm::createBasicBlockPathCloningPass());
}
addPass(llvm::createBasicBlockSectionsPass());
} else if (TM->Options.EnableMachineFunctionSplitter ||
@@ -1336,7 +1352,8 @@ void TargetPassConfig::addMachineSSAOptimization() {
bool TargetPassConfig::getOptimizeRegAlloc() const {
switch (OptimizeRegAlloc) {
- case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None;
+ case cl::BOU_UNSET:
+ return getOptLevel() != CodeGenOptLevel::None;
case cl::BOU_TRUE: return true;
case cl::BOU_FALSE: return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 77d2dfcf2323..c50b1cf94227 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -56,12 +56,13 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
const LaneBitmask *SRILaneMasks,
LaneBitmask SRICoveringLanes,
const RegClassInfo *const RCIs,
+ const MVT::SimpleValueType *const RCVTLists,
unsigned Mode)
: InfoDesc(ID), SubRegIndexNames(SRINames),
SubRegIndexLaneMasks(SRILaneMasks),
RegClassBegin(RCB), RegClassEnd(RCE),
CoveringLanes(SRICoveringLanes),
- RCInfos(RCIs), HwMode(Mode) {
+ RCInfos(RCIs), RCVTLists(RCVTLists), HwMode(Mode) {
}
TargetRegisterInfo::~TargetRegisterInfo() = default;
@@ -498,7 +499,7 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
return true;
}
-unsigned
+TypeSize
TargetRegisterInfo::getRegSizeInBits(Register Reg,
const MachineRegisterInfo &MRI) const {
const TargetRegisterClass *RC{};
@@ -507,16 +508,15 @@ TargetRegisterInfo::getRegSizeInBits(Register Reg,
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
RC = getMinimalPhysRegClass(Reg);
- } else {
- LLT Ty = MRI.getType(Reg);
- unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0;
- // If Reg is not a generic register, query the register class to
- // get its size.
- if (RegSize)
- return RegSize;
- // Since Reg is not a generic register, it must have a register class.
- RC = MRI.getRegClass(Reg);
+ assert(RC && "Unable to deduce the register class");
+ return getRegSizeInBits(*RC);
}
+ LLT Ty = MRI.getType(Reg);
+ if (Ty.isValid())
+ return Ty.getSizeInBits();
+
+ // Since Reg is not a generic register, it may have a register class.
+ RC = MRI.getRegClass(Reg);
assert(RC && "Unable to deduce the register class");
return getRegSizeInBits(*RC);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
index dba84950f49d..ce59b096992d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -36,6 +36,10 @@ static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
cl::desc("Use InstrItineraryData for latency lookup"));
+static cl::opt<bool> ForceEnableIntervals(
+ "sched-model-force-enable-intervals", cl::Hidden, cl::init(false),
+ cl::desc("Force the use of resource intervals in the schedule model"));
+
bool TargetSchedModel::hasInstrSchedModel() const {
return EnableSchedModel && SchedModel.hasInstrSchedModel();
}
@@ -164,16 +168,20 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
return UseIdx;
}
-// Top-level API for clients that know the operand indices.
+// Top-level API for clients that know the operand indices. This doesn't need to
+// return std::optional<unsigned>, as it always returns a valid latency.
unsigned TargetSchedModel::computeOperandLatency(
const MachineInstr *DefMI, unsigned DefOperIdx,
const MachineInstr *UseMI, unsigned UseOperIdx) const {
+ const unsigned InstrLatency = computeInstrLatency(DefMI);
+ const unsigned DefaultDefLatency = TII->defaultDefLatency(SchedModel, *DefMI);
+
if (!hasInstrSchedModel() && !hasInstrItineraries())
- return TII->defaultDefLatency(SchedModel, *DefMI);
+ return DefaultDefLatency;
if (hasInstrItineraries()) {
- int OperLatency = 0;
+ std::optional<unsigned> OperLatency;
if (UseMI) {
OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
*UseMI, UseOperIdx);
@@ -182,21 +190,13 @@ unsigned TargetSchedModel::computeOperandLatency(
unsigned DefClass = DefMI->getDesc().getSchedClass();
OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
}
- if (OperLatency >= 0)
- return OperLatency;
-
- // No operand latency was found.
- unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
-
- // Expected latency is the max of the stage latency and itinerary props.
- // Rather than directly querying InstrItins stage latency, we call a TII
- // hook to allow subtargets to specialize latency. This hook is only
- // applicable to the InstrItins model. InstrSchedModel should model all
- // special cases without TII hooks.
- InstrLatency =
- std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
- return InstrLatency;
+
+ // Expected latency is the max of InstrLatency and DefaultDefLatency, if we
+ // didn't find an operand latency.
+ return OperLatency ? *OperLatency
+ : std::max(InstrLatency, DefaultDefLatency);
}
+
// hasInstrSchedModel()
const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
@@ -233,7 +233,7 @@ unsigned TargetSchedModel::computeOperandLatency(
// FIXME: Automatically giving all implicit defs defaultDefLatency is
// undesirable. We should only do it for defs that are known to the MC
// desc like flags. Truly implicit defs should get 1 cycle latency.
- return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
+ return DefMI->isTransient() ? 0 : DefaultDefLatency;
}
unsigned
@@ -341,3 +341,9 @@ TargetSchedModel::computeReciprocalThroughput(const MCInst &MI) const {
return computeReciprocalThroughput(MI.getOpcode());
}
+bool TargetSchedModel::enableIntervals() const {
+ if (ForceEnableIntervals)
+ return true;
+
+ return SchedModel.EnableIntervals;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index ba2c8dda7de5..6c97bc0568bd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -41,7 +41,7 @@ bool TargetSubtargetInfo::enableJoinGlobalCopies() const {
}
bool TargetSubtargetInfo::enableRALocalReassignment(
- CodeGenOpt::Level OptLevel) const {
+ CodeGenOptLevel OptLevel) const {
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index c3ea76bf8cea..bf689dbd308f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -95,7 +95,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
LiveVariables *LV = nullptr;
LiveIntervals *LIS = nullptr;
AliasAnalysis *AA = nullptr;
- CodeGenOpt::Level OptLevel = CodeGenOpt::None;
+ CodeGenOptLevel OptLevel = CodeGenOptLevel::None;
// The current basic block being processed.
MachineBasicBlock *MBB = nullptr;
@@ -116,12 +116,34 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// registers. e.g. r1 = move v1024.
DenseMap<Register, Register> DstRegMap;
- void removeClobberedSrcRegMap(MachineInstr *MI);
+ MachineInstr *getSingleDef(Register Reg, MachineBasicBlock *BB) const;
bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);
bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef);
+ bool isCopyToReg(MachineInstr &MI, Register &SrcReg, Register &DstReg,
+ bool &IsSrcPhys, bool &IsDstPhys) const;
+
+ bool isPlainlyKilled(const MachineInstr *MI, LiveRange &LR) const;
+ bool isPlainlyKilled(const MachineInstr *MI, Register Reg) const;
+ bool isPlainlyKilled(const MachineOperand &MO) const;
+
+ bool isKilled(MachineInstr &MI, Register Reg, bool allowFalsePositives) const;
+
+ MachineInstr *findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
+ bool &IsCopy, Register &DstReg,
+ bool &IsDstPhys) const;
+
+ bool regsAreCompatible(Register RegA, Register RegB) const;
+
+ void removeMapRegEntry(const MachineOperand &MO,
+ DenseMap<Register, Register> &RegMap) const;
+
+ void removeClobberedSrcRegMap(MachineInstr *MI);
+
+ bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg) const;
+
bool isProfitableToCommute(Register RegA, Register RegB, Register RegC,
MachineInstr *MI, unsigned Dist);
@@ -199,8 +221,9 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
/// Return the MachineInstr* if it is the single def of the Reg in current BB.
-static MachineInstr *getSingleDef(Register Reg, MachineBasicBlock *BB,
- const MachineRegisterInfo *MRI) {
+MachineInstr *
+TwoAddressInstructionPass::getSingleDef(Register Reg,
+ MachineBasicBlock *BB) const {
MachineInstr *Ret = nullptr;
for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
if (DefMI.getParent() != BB || DefMI.isDebugValue())
@@ -224,7 +247,7 @@ bool TwoAddressInstructionPass::isRevCopyChain(Register FromReg, Register ToReg,
int Maxlen) {
Register TmpReg = FromReg;
for (int i = 0; i < Maxlen; i++) {
- MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
+ MachineInstr *Def = getSingleDef(TmpReg, MBB);
if (!Def || !Def->isCopy())
return false;
@@ -263,9 +286,9 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(Register Reg, unsigned Dist,
/// Return true if the specified MI is a copy instruction or an extract_subreg
/// instruction. It also returns the source and destination registers and
/// whether they are physical registers by reference.
-static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
- Register &SrcReg, Register &DstReg, bool &IsSrcPhys,
- bool &IsDstPhys) {
+bool TwoAddressInstructionPass::isCopyToReg(MachineInstr &MI, Register &SrcReg,
+ Register &DstReg, bool &IsSrcPhys,
+ bool &IsDstPhys) const {
SrcReg = 0;
DstReg = 0;
if (MI.isCopy()) {
@@ -283,27 +306,37 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
return true;
}
+bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI,
+ LiveRange &LR) const {
+ // This is to match the kill flag version where undefs don't have kill flags.
+ if (!LR.hasAtLeastOneValue())
+ return false;
+
+ SlotIndex useIdx = LIS->getInstructionIndex(*MI);
+ LiveInterval::const_iterator I = LR.find(useIdx);
+ assert(I != LR.end() && "Reg must be live-in to use.");
+ return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+}
+
/// Test if the given register value, which is used by the
/// given instruction, is killed by the given instruction.
-static bool isPlainlyKilled(const MachineInstr *MI, Register Reg,
- LiveIntervals *LIS) {
- if (LIS && Reg.isVirtual() && !LIS->isNotInMIMap(*MI)) {
- // FIXME: Sometimes tryInstructionTransform() will add instructions and
- // test whether they can be folded before keeping them. In this case it
- // sets a kill before recursively calling tryInstructionTransform() again.
- // If there is no interval available, we assume that this instruction is
- // one of those. A kill flag is manually inserted on the operand so the
- // check below will handle it.
- LiveInterval &LI = LIS->getInterval(Reg);
- // This is to match the kill flag version where undefs don't have kill
- // flags.
- if (!LI.hasAtLeastOneValue())
+bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI,
+ Register Reg) const {
+ // FIXME: Sometimes tryInstructionTransform() will add instructions and
+ // test whether they can be folded before keeping them. In this case it
+ // sets a kill before recursively calling tryInstructionTransform() again.
+ // If there is no interval available, we assume that this instruction is
+ // one of those. A kill flag is manually inserted on the operand so the
+ // check below will handle it.
+ if (LIS && !LIS->isNotInMIMap(*MI)) {
+ if (Reg.isVirtual())
+ return isPlainlyKilled(MI, LIS->getInterval(Reg));
+ // Reserved registers are considered always live.
+ if (MRI->isReserved(Reg))
return false;
-
- SlotIndex useIdx = LIS->getInstructionIndex(*MI);
- LiveInterval::const_iterator I = LI.find(useIdx);
- assert(I != LI.end() && "Reg must be live-in to use.");
- return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+ return all_of(TRI->regunits(Reg), [&](MCRegUnit U) {
+ return isPlainlyKilled(MI, LIS->getRegUnit(U));
+ });
}
return MI->killsRegister(Reg);
@@ -311,8 +344,9 @@ static bool isPlainlyKilled(const MachineInstr *MI, Register Reg,
/// Test if the register used by the given operand is killed by the operand's
/// instruction.
-static bool isPlainlyKilled(const MachineOperand &MO, LiveIntervals *LIS) {
- return MO.isKill() || isPlainlyKilled(MO.getParent(), MO.getReg(), LIS);
+bool TwoAddressInstructionPass::isPlainlyKilled(
+ const MachineOperand &MO) const {
+ return MO.isKill() || isPlainlyKilled(MO.getParent(), MO.getReg());
}
/// Test if the given register value, which is used by the given
@@ -332,15 +366,14 @@ static bool isPlainlyKilled(const MachineOperand &MO, LiveIntervals *LIS) {
///
/// If allowFalsePositives is true then likely kills are treated as kills even
/// if it can't be proven that they are kills.
-static bool isKilled(MachineInstr &MI, Register Reg,
- const MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
- LiveIntervals *LIS, bool allowFalsePositives) {
+bool TwoAddressInstructionPass::isKilled(MachineInstr &MI, Register Reg,
+ bool allowFalsePositives) const {
MachineInstr *DefMI = &MI;
while (true) {
// All uses of physical registers are likely to be kills.
if (Reg.isPhysical() && (allowFalsePositives || MRI->hasOneUse(Reg)))
return true;
- if (!isPlainlyKilled(DefMI, Reg, LIS))
+ if (!isPlainlyKilled(DefMI, Reg))
return false;
if (Reg.isPhysical())
return true;
@@ -354,7 +387,7 @@ static bool isKilled(MachineInstr &MI, Register Reg,
Register SrcReg, DstReg;
// If the def is something other than a copy, then it isn't going to
// be coalesced, so follow the kill flag.
- if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ if (!isCopyToReg(*DefMI, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
return true;
Reg = SrcReg;
}
@@ -378,17 +411,15 @@ static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) {
/// Given a register, if all its uses are in the same basic block, return the
/// last use instruction if it's a copy or a two-address use.
-static MachineInstr *
-findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
- MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
- bool &IsCopy, Register &DstReg, bool &IsDstPhys,
- LiveIntervals *LIS) {
+MachineInstr *TwoAddressInstructionPass::findOnlyInterestingUse(
+ Register Reg, MachineBasicBlock *MBB, bool &IsCopy, Register &DstReg,
+ bool &IsDstPhys) const {
MachineOperand *UseOp = nullptr;
for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
MachineInstr *MI = MO.getParent();
if (MI->getParent() != MBB)
return nullptr;
- if (isPlainlyKilled(MI, Reg, LIS))
+ if (isPlainlyKilled(MI, Reg))
UseOp = &MO;
}
if (!UseOp)
@@ -397,7 +428,7 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
Register SrcReg;
bool IsSrcPhys;
- if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+ if (isCopyToReg(UseMI, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
IsCopy = true;
return &UseMI;
}
@@ -437,8 +468,8 @@ static MCRegister getMappedReg(Register Reg,
}
/// Return true if the two registers are equal or aliased.
-static bool regsAreCompatible(Register RegA, Register RegB,
- const TargetRegisterInfo *TRI) {
+bool TwoAddressInstructionPass::regsAreCompatible(Register RegA,
+ Register RegB) const {
if (RegA == RegB)
return true;
if (!RegA || !RegB)
@@ -447,9 +478,8 @@ static bool regsAreCompatible(Register RegA, Register RegB,
}
/// From RegMap remove entries mapped to a physical register which overlaps MO.
-static void removeMapRegEntry(const MachineOperand &MO,
- DenseMap<Register, Register> &RegMap,
- const TargetRegisterInfo *TRI) {
+void TwoAddressInstructionPass::removeMapRegEntry(
+ const MachineOperand &MO, DenseMap<Register, Register> &RegMap) const {
assert(
(MO.isReg() || MO.isRegMask()) &&
"removeMapRegEntry must be called with a register or regmask operand.");
@@ -497,13 +527,13 @@ void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
return;
Register Src = MI->getOperand(1).getReg();
- if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap), TRI))
+ if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap)))
return;
}
for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask()) {
- removeMapRegEntry(MO, SrcRegMap, TRI);
+ removeMapRegEntry(MO, SrcRegMap);
continue;
}
if (!MO.isReg() || !MO.isDef())
@@ -511,13 +541,13 @@ void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
Register Reg = MO.getReg();
if (!Reg || Reg.isVirtual())
continue;
- removeMapRegEntry(MO, SrcRegMap, TRI);
+ removeMapRegEntry(MO, SrcRegMap);
}
}
// Returns true if Reg is equal or aliased to at least one register in Set.
-static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,
- const TargetRegisterInfo *TRI) {
+bool TwoAddressInstructionPass::regOverlapsSet(
+ const SmallVectorImpl<Register> &Set, Register Reg) const {
for (unsigned R : Set)
if (TRI->regsOverlap(R, Reg))
return true;
@@ -532,7 +562,7 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
Register RegC,
MachineInstr *MI,
unsigned Dist) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return false;
// Determine if it's profitable to commute this two address instruction. In
@@ -553,7 +583,7 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
// insert => %reg1030 = COPY %reg1029
// %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
- if (!isPlainlyKilled(MI, RegC, LIS))
+ if (!isPlainlyKilled(MI, RegC))
return false;
// Ok, we have something like:
@@ -570,8 +600,8 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
if (ToRegA) {
MCRegister FromRegB = getMappedReg(RegB, SrcRegMap);
MCRegister FromRegC = getMappedReg(RegC, SrcRegMap);
- bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI);
- bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI);
+ bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA);
+ bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA);
// Compute if any of the following are true:
// -RegB is not tied to a register and RegC is compatible with RegA.
@@ -675,7 +705,7 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
if (!FromRegB)
return false;
MCRegister ToRegA = getMappedReg(RegA, DstRegMap);
- return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
+ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA));
}
/// Convert the specified two-address instruction into a three address one.
@@ -728,8 +758,8 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
bool IsCopy = false;
Register NewReg;
Register Reg = DstReg;
- while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
- NewReg, IsDstPhys, LIS)) {
+ while (MachineInstr *UseMI =
+ findOnlyInterestingUse(Reg, MBB, IsCopy, NewReg, IsDstPhys)) {
if (IsCopy && !Processed.insert(UseMI).second)
break;
@@ -781,7 +811,7 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
bool IsSrcPhys, IsDstPhys;
Register SrcReg, DstReg;
- if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ if (!isCopyToReg(*MI, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
return;
if (IsDstPhys && !IsSrcPhys) {
@@ -865,7 +895,7 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
Defs.push_back(MOReg);
else {
Uses.push_back(MOReg);
- if (MOReg != Reg && isPlainlyKilled(MO, LIS))
+ if (MOReg != Reg && isPlainlyKilled(MO))
Kills.push_back(MOReg);
}
}
@@ -876,7 +906,7 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
MachineBasicBlock::iterator End = AfterMI;
while (End != MBB->end()) {
End = skipDebugInstructionsForward(End, MBB->end());
- if (End->isCopy() && regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI))
+ if (End->isCopy() && regOverlapsSet(Defs, End->getOperand(1).getReg()))
Defs.push_back(End->getOperand(0).getReg());
else
break;
@@ -905,20 +935,20 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
if (!MOReg)
continue;
if (MO.isDef()) {
- if (regOverlapsSet(Uses, MOReg, TRI))
+ if (regOverlapsSet(Uses, MOReg))
// Physical register use would be clobbered.
return false;
- if (!MO.isDead() && regOverlapsSet(Defs, MOReg, TRI))
+ if (!MO.isDead() && regOverlapsSet(Defs, MOReg))
// May clobber a physical register def.
// FIXME: This may be too conservative. It's ok if the instruction
// is sunken completely below the use.
return false;
} else {
- if (regOverlapsSet(Defs, MOReg, TRI))
+ if (regOverlapsSet(Defs, MOReg))
return false;
- bool isKill = isPlainlyKilled(MO, LIS);
- if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) ||
- regOverlapsSet(Kills, MOReg, TRI)))
+ bool isKill = isPlainlyKilled(MO);
+ if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg)) ||
+ regOverlapsSet(Kills, MOReg)))
// Don't want to extend other live ranges and update kills.
return false;
if (MOReg == Reg && !isKill)
@@ -1044,7 +1074,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
continue;
if (isDefTooClose(MOReg, DI->second, MI))
return false;
- bool isKill = isPlainlyKilled(MO, LIS);
+ bool isKill = isPlainlyKilled(MO);
if (MOReg == Reg && !isKill)
return false;
Uses.push_back(MOReg);
@@ -1079,14 +1109,14 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
if (!MOReg)
continue;
if (MO.isUse()) {
- if (regOverlapsSet(Defs, MOReg, TRI))
+ if (regOverlapsSet(Defs, MOReg))
// Moving KillMI can clobber the physical register if the def has
// not been seen.
return false;
- if (regOverlapsSet(Kills, MOReg, TRI))
+ if (regOverlapsSet(Kills, MOReg))
// Don't want to extend other live ranges and update kills.
return false;
- if (&OtherMI != MI && MOReg == Reg && !isPlainlyKilled(MO, LIS))
+ if (&OtherMI != MI && MOReg == Reg && !isPlainlyKilled(MO))
// We can't schedule across a use of the register in question.
return false;
} else {
@@ -1096,12 +1126,12 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
Register MOReg = OtherDefs[i];
- if (regOverlapsSet(Uses, MOReg, TRI))
+ if (regOverlapsSet(Uses, MOReg))
return false;
- if (MOReg.isPhysical() && regOverlapsSet(LiveDefs, MOReg, TRI))
+ if (MOReg.isPhysical() && regOverlapsSet(LiveDefs, MOReg))
return false;
// Physical register def is seen.
- llvm::erase_value(Defs, MOReg);
+ llvm::erase(Defs, MOReg);
}
}
@@ -1169,7 +1199,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
// If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
// operands. This makes the live ranges of DstOp and OtherOp joinable.
- bool OtherOpKilled = isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+ bool OtherOpKilled = isKilled(*MI, OtherOpReg, false);
bool DoCommute = !BaseOpKilled && OtherOpKilled;
if (!DoCommute &&
@@ -1212,7 +1242,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned SrcIdx, unsigned DstIdx,
unsigned &Dist, bool shouldOnlyCommute) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return false;
MachineInstr &MI = *mi;
@@ -1220,7 +1250,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
Register regB = MI.getOperand(SrcIdx).getReg();
assert(regB.isVirtual() && "cannot make instruction into two-address form");
- bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+ bool regBKilled = isKilled(MI, regB, true);
if (regA.isVirtual())
scanUses(regA);
@@ -1252,7 +1282,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// confusing the three address conversion below.
if (Commuted) {
regB = MI.getOperand(SrcIdx).getReg();
- regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+ regBKilled = isKilled(MI, regB, true);
}
if (MI.isConvertibleTo3Addr()) {
@@ -1547,7 +1577,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
MachineOperand &MO = MI->getOperand(SrcIdx);
assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
"inconsistent operand info for 2-reg pass");
- if (MO.isKill()) {
+ if (isPlainlyKilled(MO)) {
MO.setIsKill(false);
RemovedKillFlag = true;
}
@@ -1568,7 +1598,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
for (MachineOperand &MO : MI->all_uses()) {
if (MO.getReg() == RegB) {
if (MO.getSubReg() == SubRegB && !IsEarlyClobber) {
- if (MO.isKill()) {
+ if (isPlainlyKilled(MO)) {
MO.setIsKill(false);
RemovedKillFlag = true;
}
@@ -1738,7 +1768,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// Disable optimizations if requested. We cannot skip the whole pass as some
// fixups are necessary for correctness.
if (skipFunction(Func.getFunction()))
- OptLevel = CodeGenOpt::None;
+ OptLevel = CodeGenOptLevel::None;
bool MadeChange = false;
@@ -1849,12 +1879,16 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// %reg.subidx.
LaneBitmask LaneMask =
TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg());
- SlotIndex Idx = LIS->getInstructionIndex(*mi);
+ SlotIndex Idx = LIS->getInstructionIndex(*mi).getRegSlot();
for (auto &S : LI.subranges()) {
if ((S.LaneMask & LaneMask).none()) {
- LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx);
- LiveRange::iterator DefSeg = std::next(UseSeg);
- S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
+ LiveRange::iterator DefSeg = S.FindSegmentContaining(Idx);
+ if (mi->getOperand(0).isUndef()) {
+ S.removeValNo(DefSeg->valno);
+ } else {
+ LiveRange::iterator UseSeg = std::prev(DefSeg);
+ S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index 426292345a14..053caf518bd1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -492,11 +492,13 @@ void IRPromoter::PromoteTree() {
// SafeWrap because SafeWrap.size() is used elsewhere.
// For cmp, we need to sign extend a constant appearing in either
// operand. For add, we should only sign extend the RHS.
- Constant *NewConst = (SafeWrap.contains(I) &&
+ Constant *NewConst =
+ ConstantInt::get(Const->getContext(),
+ (SafeWrap.contains(I) &&
(I->getOpcode() == Instruction::ICmp || i == 1) &&
I->getOpcode() != Instruction::Sub)
- ? ConstantExpr::getSExt(Const, ExtTy)
- : ConstantExpr::getZExt(Const, ExtTy);
+ ? Const->getValue().sext(PromotedWidth)
+ : Const->getValue().zext(PromotedWidth));
I->setOperand(i, NewConst);
} else if (isa<UndefValue>(Op))
I->setOperand(i, ConstantInt::get(ExtTy, 0));
@@ -1014,11 +1016,8 @@ bool TypePromotionLegacy::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- auto *TM = &TPC->getTM<TargetMachine>();
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto *TM = &TPC.getTM<TargetMachine>();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
index d514e1642e29..ba3b9e00e34e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -79,35 +79,43 @@ bool EVT::isExtendedVector() const {
}
bool EVT::isExtended16BitVector() const {
- return isExtendedVector() && getExtendedSizeInBits() == 16;
+ return isExtendedVector() &&
+ getExtendedSizeInBits() == TypeSize::getFixed(16);
}
bool EVT::isExtended32BitVector() const {
- return isExtendedVector() && getExtendedSizeInBits() == 32;
+ return isExtendedVector() &&
+ getExtendedSizeInBits() == TypeSize::getFixed(32);
}
bool EVT::isExtended64BitVector() const {
- return isExtendedVector() && getExtendedSizeInBits() == 64;
+ return isExtendedVector() &&
+ getExtendedSizeInBits() == TypeSize::getFixed(64);
}
bool EVT::isExtended128BitVector() const {
- return isExtendedVector() && getExtendedSizeInBits() == 128;
+ return isExtendedVector() &&
+ getExtendedSizeInBits() == TypeSize::getFixed(128);
}
bool EVT::isExtended256BitVector() const {
- return isExtendedVector() && getExtendedSizeInBits() == 256;
+ return isExtendedVector() &&
+ getExtendedSizeInBits() == TypeSize::getFixed(256);
}
bool EVT::isExtended512BitVector() const {
- return isExtendedVector() && getExtendedSizeInBits() == 512;
+ return isExtendedVector() &&
+ getExtendedSizeInBits() == TypeSize::getFixed(512);
}
bool EVT::isExtended1024BitVector() const {
- return isExtendedVector() && getExtendedSizeInBits() == 1024;
+ return isExtendedVector() &&
+ getExtendedSizeInBits() == TypeSize::getFixed(1024);
}
bool EVT::isExtended2048BitVector() const {
- return isExtendedVector() && getExtendedSizeInBits() == 2048;
+ return isExtendedVector() &&
+ getExtendedSizeInBits() == TypeSize::getFixed(2048);
}
bool EVT::isExtendedFixedLengthVector() const {
@@ -143,7 +151,7 @@ ElementCount EVT::getExtendedVectorElementCount() const {
TypeSize EVT::getExtendedSizeInBits() const {
assert(isExtended() && "Type is not extended!");
if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
- return TypeSize::Fixed(ITy->getBitWidth());
+ return TypeSize::getFixed(ITy->getBitWidth());
if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
return VTy->getPrimitiveSizeInBits();
llvm_unreachable("Unrecognized extended type!");
@@ -637,6 +645,9 @@ void MVT::dump() const {
#endif
void MVT::print(raw_ostream &OS) const {
- OS << EVT(*this).getEVTString();
+ if (SimpleTy == INVALID_SIMPLE_VALUE_TYPE)
+ OS << "invalid";
+ else
+ OS << EVT(*this).getEVTString();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
index a816bd5b52de..48f4ee29fbe9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -261,7 +261,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
Indexes = &getAnalysis<SlotIndexes>();
LIS = &getAnalysis<LiveIntervals>();
VRM = &getAnalysis<VirtRegMap>();
- DebugVars = getAnalysisIfAvailable<LiveDebugVariables>();
+ DebugVars = &getAnalysis<LiveDebugVariables>();
LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
<< "********** Function: " << MF->getName() << '\n');
LLVM_DEBUG(VRM->dump());
@@ -275,7 +275,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
// Rewrite virtual registers.
rewrite();
- if (DebugVars && ClearVirtRegs) {
+ if (ClearVirtRegs) {
// Write out new DBG_VALUE instructions.
// We only do this if ClearVirtRegs is specified since this should be the
@@ -311,8 +311,8 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
}
// Check all mbb start positions between First and Last while
- // simulatenously advancing an iterator for each subrange.
- for (SlotIndexes::MBBIndexIterator MBBI = Indexes->findMBBIndex(First);
+ // simultaneously advancing an iterator for each subrange.
+ for (SlotIndexes::MBBIndexIterator MBBI = Indexes->getMBBLowerBound(First);
MBBI != Indexes->MBBIndexEnd() && MBBI->first <= Last; ++MBBI) {
SlotIndex MBBBegin = MBBI->first;
// Advance all subrange iterators so that their end position is just
@@ -363,7 +363,7 @@ void VirtRegRewriter::addMBBLiveIns() {
// sorted by slot indexes.
SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin();
for (const auto &Seg : LI) {
- I = Indexes->advanceMBBIndex(I, Seg.start);
+ I = Indexes->getMBBLowerBound(I, Seg.start);
for (; I != Indexes->MBBIndexEnd() && I->first < Seg.end; ++I) {
MachineBasicBlock *MBB = I->second;
MBB->addLiveIn(PhysReg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
index cc04807e8455..1a9e1ba869c3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -77,6 +77,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/WasmEHPrepare.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
@@ -88,10 +89,12 @@
using namespace llvm;
-#define DEBUG_TYPE "wasmehprepare"
+#define DEBUG_TYPE "wasm-eh-prepare"
namespace {
-class WasmEHPrepare : public FunctionPass {
+class WasmEHPrepareImpl {
+ friend class WasmEHPrepare;
+
Type *LPadContextTy = nullptr; // type of 'struct _Unwind_LandingPadContext'
GlobalVariable *LPadContextGV = nullptr; // __wasm_lpad_context
@@ -114,18 +117,40 @@ class WasmEHPrepare : public FunctionPass {
void prepareEHPad(BasicBlock *BB, bool NeedPersonality, unsigned Index = 0);
public:
+ WasmEHPrepareImpl() = default;
+ WasmEHPrepareImpl(Type *LPadContextTy_) : LPadContextTy(LPadContextTy_) {}
+ bool runOnFunction(Function &F);
+};
+
+class WasmEHPrepare : public FunctionPass {
+ WasmEHPrepareImpl P;
+
+public:
static char ID; // Pass identification, replacement for typeid
WasmEHPrepare() : FunctionPass(ID) {}
bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
+ bool runOnFunction(Function &F) override { return P.runOnFunction(F); }
StringRef getPassName() const override {
return "WebAssembly Exception handling preparation";
}
};
+
} // end anonymous namespace
+PreservedAnalyses WasmEHPreparePass::run(Function &F,
+ FunctionAnalysisManager &) {
+ auto &Context = F.getContext();
+ auto *I32Ty = Type::getInt32Ty(Context);
+ auto *PtrTy = PointerType::get(Context, 0);
+ auto *LPadContextTy =
+ StructType::get(I32Ty /*lpad_index*/, PtrTy /*lsda*/, I32Ty /*selector*/);
+ WasmEHPrepareImpl P(LPadContextTy);
+ bool Changed = P.runOnFunction(F);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses ::all();
+}
+
char WasmEHPrepare::ID = 0;
INITIALIZE_PASS_BEGIN(WasmEHPrepare, DEBUG_TYPE,
"Prepare WebAssembly exceptions", false, false)
@@ -136,9 +161,9 @@ FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); }
bool WasmEHPrepare::doInitialization(Module &M) {
IRBuilder<> IRB(M.getContext());
- LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index
- IRB.getInt8PtrTy(), // lsda
- IRB.getInt32Ty() // selector
+ P.LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index
+ IRB.getPtrTy(), // lsda
+ IRB.getInt32Ty() // selector
);
return false;
}
@@ -157,14 +182,14 @@ static void eraseDeadBBsAndChildren(const Container &BBs) {
}
}
-bool WasmEHPrepare::runOnFunction(Function &F) {
+bool WasmEHPrepareImpl::runOnFunction(Function &F) {
bool Changed = false;
Changed |= prepareThrows(F);
Changed |= prepareEHPads(F);
return Changed;
}
-bool WasmEHPrepare::prepareThrows(Function &F) {
+bool WasmEHPrepareImpl::prepareThrows(Function &F) {
Module &M = *F.getParent();
IRBuilder<> IRB(F.getContext());
bool Changed = false;
@@ -192,7 +217,7 @@ bool WasmEHPrepare::prepareThrows(Function &F) {
return Changed;
}
-bool WasmEHPrepare::prepareEHPads(Function &F) {
+bool WasmEHPrepareImpl::prepareEHPads(Function &F) {
Module &M = *F.getParent();
IRBuilder<> IRB(F.getContext());
@@ -249,8 +274,8 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
// _Unwind_CallPersonality() wrapper function, which calls the personality
- CallPersonalityF = M.getOrInsertFunction(
- "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy());
+ CallPersonalityF = M.getOrInsertFunction("_Unwind_CallPersonality",
+ IRB.getInt32Ty(), IRB.getPtrTy());
if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee()))
F->setDoesNotThrow();
@@ -275,11 +300,11 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
// Prepare an EH pad for Wasm EH handling. If NeedPersonality is false, Index is
// ignored.
-void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
- unsigned Index) {
+void WasmEHPrepareImpl::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
+ unsigned Index) {
assert(BB->isEHPad() && "BB is not an EHPad!");
IRBuilder<> IRB(BB->getContext());
- IRB.SetInsertPoint(&*BB->getFirstInsertionPt());
+ IRB.SetInsertPoint(BB, BB->getFirstInsertionPt());
auto *FPI = cast<FuncletPadInst>(BB->getFirstNonPHI());
Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
index 11597b119893..95976c218c2f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -15,6 +15,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/WinEHPrepare.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
@@ -38,7 +39,7 @@
using namespace llvm;
-#define DEBUG_TYPE "winehprepare"
+#define DEBUG_TYPE "win-eh-prepare"
static cl::opt<bool> DisableDemotion(
"disable-demotion", cl::Hidden,
@@ -51,27 +52,19 @@ static cl::opt<bool> DisableCleanups(
cl::desc("Do not remove implausible terminators or other similar cleanups"),
cl::init(false));
+// TODO: Remove this option when we fully migrate to new pass manager
static cl::opt<bool> DemoteCatchSwitchPHIOnlyOpt(
"demote-catchswitch-only", cl::Hidden,
cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false));
namespace {
-class WinEHPrepare : public FunctionPass {
+class WinEHPrepareImpl {
public:
- static char ID; // Pass identification, replacement for typeid.
- WinEHPrepare(bool DemoteCatchSwitchPHIOnly = false)
- : FunctionPass(ID), DemoteCatchSwitchPHIOnly(DemoteCatchSwitchPHIOnly) {}
+ WinEHPrepareImpl(bool DemoteCatchSwitchPHIOnly)
+ : DemoteCatchSwitchPHIOnly(DemoteCatchSwitchPHIOnly) {}
- bool runOnFunction(Function &Fn) override;
-
- bool doFinalization(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- StringRef getPassName() const override {
- return "Windows exception handling preparation";
- }
+ bool runOnFunction(Function &Fn);
private:
void insertPHIStores(PHINode *OriginalPHI, AllocaInst *SpillSlot);
@@ -100,17 +93,41 @@ private:
MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks;
};
+class WinEHPrepare : public FunctionPass {
+ bool DemoteCatchSwitchPHIOnly;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ WinEHPrepare(bool DemoteCatchSwitchPHIOnly = false)
+ : FunctionPass(ID), DemoteCatchSwitchPHIOnly(DemoteCatchSwitchPHIOnly) {}
+
+ StringRef getPassName() const override {
+ return "Windows exception handling preparation";
+ }
+
+ bool runOnFunction(Function &Fn) override {
+ return WinEHPrepareImpl(DemoteCatchSwitchPHIOnly).runOnFunction(Fn);
+ }
+};
+
} // end anonymous namespace
+PreservedAnalyses WinEHPreparePass::run(Function &F,
+ FunctionAnalysisManager &) {
+ bool Changed = WinEHPrepareImpl(DemoteCatchSwitchPHIOnly).runOnFunction(F);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
char WinEHPrepare::ID = 0;
-INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions",
- false, false)
+INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions", false,
+ false)
FunctionPass *llvm::createWinEHPass(bool DemoteCatchSwitchPHIOnly) {
return new WinEHPrepare(DemoteCatchSwitchPHIOnly);
}
-bool WinEHPrepare::runOnFunction(Function &Fn) {
+bool WinEHPrepareImpl::runOnFunction(Function &Fn) {
if (!Fn.hasPersonalityFn())
return false;
@@ -125,10 +142,6 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
return prepareExplicitEH(Fn);
}
-bool WinEHPrepare::doFinalization(Module &M) { return false; }
-
-void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
-
static int addUnwindMapEntry(WinEHFuncInfo &FuncInfo, int ToState,
const BasicBlock *BB) {
CxxUnwindMapEntry UME;
@@ -311,7 +324,7 @@ void llvm::calculateSEHStateForAsynchEH(const BasicBlock *BB, int State,
const Constant *FilterOrNull = cast<Constant>(
cast<CatchPadInst>(I)->getArgOperand(0)->stripPointerCasts());
const Function *Filter = dyn_cast<Function>(FilterOrNull);
- if (!Filter || !Filter->getName().startswith("__IsLocalUnwind"))
+ if (!Filter || !Filter->getName().starts_with("__IsLocalUnwind"))
State = EHInfo.SEHUnwindMap[State].ToState; // Retrive next State
} else if ((isa<CleanupReturnInst>(TI) || isa<CatchReturnInst>(TI)) &&
State > 0) {
@@ -831,7 +844,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
calculateStateNumbersForInvokes(Fn, FuncInfo);
}
-void WinEHPrepare::colorFunclets(Function &F) {
+void WinEHPrepareImpl::colorFunclets(Function &F) {
BlockColors = colorEHFunclets(F);
// Invert the map from BB to colors to color to BBs.
@@ -842,8 +855,8 @@ void WinEHPrepare::colorFunclets(Function &F) {
}
}
-void WinEHPrepare::demotePHIsOnFunclets(Function &F,
- bool DemoteCatchSwitchPHIOnly) {
+void WinEHPrepareImpl::demotePHIsOnFunclets(Function &F,
+ bool DemoteCatchSwitchPHIOnly) {
// Strip PHI nodes off of EH pads.
SmallVector<PHINode *, 16> PHINodes;
for (BasicBlock &BB : make_early_inc_range(F)) {
@@ -873,7 +886,7 @@ void WinEHPrepare::demotePHIsOnFunclets(Function &F,
}
}
-void WinEHPrepare::cloneCommonBlocks(Function &F) {
+void WinEHPrepareImpl::cloneCommonBlocks(Function &F) {
// We need to clone all blocks which belong to multiple funclets. Values are
// remapped throughout the funclet to propagate both the new instructions
// *and* the new basic blocks themselves.
@@ -895,10 +908,10 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
if (NumColorsForBB == 1)
continue;
- DEBUG_WITH_TYPE("winehprepare-coloring",
+ DEBUG_WITH_TYPE("win-eh-prepare-coloring",
dbgs() << " Cloning block \'" << BB->getName()
- << "\' for funclet \'" << FuncletPadBB->getName()
- << "\'.\n");
+ << "\' for funclet \'" << FuncletPadBB->getName()
+ << "\'.\n");
// Create a new basic block and copy instructions into it!
BasicBlock *CBB =
@@ -929,19 +942,19 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
assert(NewColors.empty() && "A new block should only have one color!");
NewColors.push_back(FuncletPadBB);
- DEBUG_WITH_TYPE("winehprepare-coloring",
+ DEBUG_WITH_TYPE("win-eh-prepare-coloring",
dbgs() << " Assigned color \'" << FuncletPadBB->getName()
- << "\' to block \'" << NewBlock->getName()
- << "\'.\n");
+ << "\' to block \'" << NewBlock->getName()
+ << "\'.\n");
- llvm::erase_value(BlocksInFunclet, OldBlock);
+ llvm::erase(BlocksInFunclet, OldBlock);
ColorVector &OldColors = BlockColors[OldBlock];
- llvm::erase_value(OldColors, FuncletPadBB);
+ llvm::erase(OldColors, FuncletPadBB);
- DEBUG_WITH_TYPE("winehprepare-coloring",
+ DEBUG_WITH_TYPE("win-eh-prepare-coloring",
dbgs() << " Removed color \'" << FuncletPadBB->getName()
- << "\' from block \'" << OldBlock->getName()
- << "\'.\n");
+ << "\' from block \'" << OldBlock->getName()
+ << "\'.\n");
}
// Loop over all of the instructions in this funclet, fixing up operand
@@ -1075,7 +1088,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
}
}
-void WinEHPrepare::removeImplausibleInstructions(Function &F) {
+void WinEHPrepareImpl::removeImplausibleInstructions(Function &F) {
// Remove implausible terminators and replace them with UnreachableInst.
for (auto &Funclet : FuncletBlocks) {
BasicBlock *FuncletPadBB = Funclet.first;
@@ -1149,7 +1162,7 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
}
}
-void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
+void WinEHPrepareImpl::cleanupPreparedFunclets(Function &F) {
// Clean-up some of the mess we made by removing useles PHI nodes, trivial
// branches, etc.
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
@@ -1164,7 +1177,7 @@ void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
}
#ifndef NDEBUG
-void WinEHPrepare::verifyPreparedFunclets(Function &F) {
+void WinEHPrepareImpl::verifyPreparedFunclets(Function &F) {
for (BasicBlock &BB : F) {
size_t NumColors = BlockColors[&BB].size();
assert(NumColors == 1 && "Expected monochromatic BB!");
@@ -1178,7 +1191,7 @@ void WinEHPrepare::verifyPreparedFunclets(Function &F) {
}
#endif
-bool WinEHPrepare::prepareExplicitEH(Function &F) {
+bool WinEHPrepareImpl::prepareExplicitEH(Function &F) {
// Remove unreachable blocks. It is not valuable to assign them a color and
// their existence can trick us into thinking values are alive when they are
// not.
@@ -1206,15 +1219,12 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) {
LLVM_DEBUG(colorFunclets(F));
LLVM_DEBUG(verifyPreparedFunclets(F));
- BlockColors.clear();
- FuncletBlocks.clear();
-
return true;
}
// TODO: Share loads when one use dominates another, or when a catchpad exit
// dominates uses (needs dominators).
-AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
+AllocaInst *WinEHPrepareImpl::insertPHILoads(PHINode *PN, Function &F) {
BasicBlock *PHIBlock = PN->getParent();
AllocaInst *SpillSlot = nullptr;
Instruction *EHPad = PHIBlock->getFirstNonPHI();
@@ -1251,8 +1261,8 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
// to be careful not to introduce interfering stores (needs liveness analysis).
// TODO: identify related phi nodes that can share spill slots, and share them
// (also needs liveness).
-void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI,
- AllocaInst *SpillSlot) {
+void WinEHPrepareImpl::insertPHIStores(PHINode *OriginalPHI,
+ AllocaInst *SpillSlot) {
// Use a worklist of (Block, Value) pairs -- the given Value needs to be
// stored to the spill slot by the end of the given Block.
SmallVector<std::pair<BasicBlock *, Value *>, 4> Worklist;
@@ -1288,7 +1298,7 @@ void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI,
}
}
-void WinEHPrepare::insertPHIStore(
+void WinEHPrepareImpl::insertPHIStore(
BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) {
@@ -1302,9 +1312,9 @@ void WinEHPrepare::insertPHIStore(
new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator());
}
-void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
- DenseMap<BasicBlock *, Value *> &Loads,
- Function &F) {
+void WinEHPrepareImpl::replaceUseWithLoad(
+ Value *V, Use &U, AllocaInst *&SpillSlot,
+ DenseMap<BasicBlock *, Value *> &Loads, Function &F) {
// Lazilly create the spill slot.
if (!SpillSlot)
SpillSlot = new AllocaInst(V->getType(), DL->getAllocaAddrSpace(), nullptr,
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp
index e6eccb20114a..10967123a562 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -15,6 +15,7 @@
#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
#include "llvm/DWARFLinker/DWARFStreamer.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
@@ -140,37 +141,6 @@ AddressesMap::~AddressesMap() = default;
DwarfEmitter::~DwarfEmitter() = default;
-static std::optional<StringRef> StripTemplateParameters(StringRef Name) {
- // We are looking for template parameters to strip from Name. e.g.
- //
- // operator<<B>
- //
- // We look for > at the end but if it does not contain any < then we
- // have something like operator>>. We check for the operator<=> case.
- if (!Name.endswith(">") || Name.count("<") == 0 || Name.endswith("<=>"))
- return {};
-
- // How many < until we have the start of the template parameters.
- size_t NumLeftAnglesToSkip = 1;
-
- // If we have operator<=> then we need to skip its < as well.
- NumLeftAnglesToSkip += Name.count("<=>");
-
- size_t RightAngleCount = Name.count('>');
- size_t LeftAngleCount = Name.count('<');
-
- // If we have more < than > we have operator< or operator<<
- // we to account for their < as well.
- if (LeftAngleCount > RightAngleCount)
- NumLeftAnglesToSkip += LeftAngleCount - RightAngleCount;
-
- size_t StartOfTemplate = 0;
- while (NumLeftAnglesToSkip--)
- StartOfTemplate = Name.find('<', StartOfTemplate) + 1;
-
- return Name.substr(0, StartOfTemplate - 1);
-}
-
bool DWARFLinker::DIECloner::getDIENames(const DWARFDie &Die,
AttributesInfo &Info,
OffsetsStringPool &StringPool,
@@ -207,6 +177,20 @@ static void resolveRelativeObjectPath(SmallVectorImpl<char> &Buf, DWARFDie CU) {
sys::path::append(Buf, dwarf::toString(CU.find(dwarf::DW_AT_comp_dir), ""));
}
+/// Make a best effort to guess the
+/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path.
+static SmallString<128> guessToolchainBaseDir(StringRef SysRoot) {
+ SmallString<128> Result;
+ // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
+ StringRef Base = sys::path::parent_path(SysRoot);
+ if (sys::path::filename(Base) != "SDKs")
+ return Result;
+ Base = sys::path::parent_path(Base);
+ Result = Base;
+ Result += "/Toolchains";
+ return Result;
+}
+
/// Collect references to parseable Swift interfaces in imported
/// DW_TAG_module blocks.
static void analyzeImportedModule(
@@ -220,13 +204,18 @@ static void analyzeImportedModule(
return;
StringRef Path = dwarf::toStringRef(DIE.find(dwarf::DW_AT_LLVM_include_path));
- if (!Path.endswith(".swiftinterface"))
+ if (!Path.ends_with(".swiftinterface"))
return;
// Don't track interfaces that are part of the SDK.
StringRef SysRoot = dwarf::toStringRef(DIE.find(dwarf::DW_AT_LLVM_sysroot));
if (SysRoot.empty())
SysRoot = CU.getSysRoot();
- if (!SysRoot.empty() && Path.startswith(SysRoot))
+ if (!SysRoot.empty() && Path.starts_with(SysRoot))
+ return;
+ // Don't track interfaces that are part of the toolchain.
+ // For example: Swift, _Concurrency, ...
+ SmallString<128> Toolchain = guessToolchainBaseDir(SysRoot);
+ if (!Toolchain.empty() && Path.starts_with(Toolchain))
return;
std::optional<const char *> Name =
dwarf::toString(DIE.find(dwarf::DW_AT_name));
@@ -474,8 +463,10 @@ DWARFLinker::getVariableRelocAdjustment(AddressesMap &RelocMgr,
const DWARFExpression::Operation &Op = *It;
switch (Op.getCode()) {
+ case dwarf::DW_OP_const2u:
case dwarf::DW_OP_const4u:
case dwarf::DW_OP_const8u:
+ case dwarf::DW_OP_const2s:
case dwarf::DW_OP_const4s:
case dwarf::DW_OP_const8s:
if (NextIt == Expression.end() || !isTlsAddressCode(NextIt->getCode()))
@@ -1044,32 +1035,45 @@ void DWARFLinker::assignAbbrev(DIEAbbrev &Abbrev) {
unsigned DWARFLinker::DIECloner::cloneStringAttribute(DIE &Die,
AttributeSpec AttrSpec,
const DWARFFormValue &Val,
- const DWARFUnit &,
+ const DWARFUnit &U,
AttributesInfo &Info) {
std::optional<const char *> String = dwarf::toString(Val);
if (!String)
return 0;
-
DwarfStringPoolEntryRef StringEntry;
if (AttrSpec.Form == dwarf::DW_FORM_line_strp) {
StringEntry = DebugLineStrPool.getEntry(*String);
} else {
StringEntry = DebugStrPool.getEntry(*String);
+ if (AttrSpec.Attr == dwarf::DW_AT_APPLE_origin) {
+ Info.HasAppleOrigin = true;
+ if (std::optional<StringRef> FileName =
+ ObjFile.Addresses->getLibraryInstallName()) {
+ StringEntry = DebugStrPool.getEntry(*FileName);
+ }
+ }
+
// Update attributes info.
if (AttrSpec.Attr == dwarf::DW_AT_name)
Info.Name = StringEntry;
else if (AttrSpec.Attr == dwarf::DW_AT_MIPS_linkage_name ||
AttrSpec.Attr == dwarf::DW_AT_linkage_name)
Info.MangledName = StringEntry;
-
+ if (U.getVersion() >= 5) {
+ // Switch everything to DW_FORM_strx strings.
+ auto StringOffsetIndex =
+ StringOffsetPool.getValueIndex(StringEntry.getOffset());
+ return Die
+ .addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+ dwarf::DW_FORM_strx, DIEInteger(StringOffsetIndex))
+ ->sizeOf(U.getFormParams());
+ }
// Switch everything to out of line strings.
AttrSpec.Form = dwarf::DW_FORM_strp;
}
-
Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), AttrSpec.Form,
DIEInteger(StringEntry.getOffset()));
-
return 4;
}
@@ -1389,7 +1393,7 @@ unsigned DWARFLinker::DIECloner::cloneAddressAttribute(
return Unit.getOrigUnit().getAddressByteSize();
}
- auto AddrIndex = AddrPool.getAddrIndex(*Addr);
+ auto AddrIndex = AddrPool.getValueIndex(*Addr);
return Die
.addValue(DIEAlloc, static_cast<dwarf::Attribute>(AttrSpec.Attr),
@@ -1421,6 +1425,17 @@ unsigned DWARFLinker::DIECloner::cloneScalarAttribute(
}
}
+ if (AttrSpec.Attr == dwarf::DW_AT_str_offsets_base) {
+ // DWARFLinker generates common .debug_str_offsets table used for all
+ // compile units. The offset to the common .debug_str_offsets table is 8 on
+ // DWARF32.
+ Info.AttrStrOffsetBaseSeen = true;
+ return Die
+ .addValue(DIEAlloc, dwarf::DW_AT_str_offsets_base,
+ dwarf::DW_FORM_sec_offset, DIEInteger(8))
+ ->sizeOf(Unit.getOrigUnit().getFormParams());
+ }
+
if (LLVM_UNLIKELY(Linker.Options.Update)) {
if (auto OptionalValue = Val.getAsUnsignedConstant())
Value = *OptionalValue;
@@ -1600,51 +1615,25 @@ unsigned DWARFLinker::DIECloner::cloneAttribute(
return 0;
}
-static bool isObjCSelector(StringRef Name) {
- return Name.size() > 2 && (Name[0] == '-' || Name[0] == '+') &&
- (Name[1] == '[');
-}
-
void DWARFLinker::DIECloner::addObjCAccelerator(CompileUnit &Unit,
const DIE *Die,
DwarfStringPoolEntryRef Name,
OffsetsStringPool &StringPool,
bool SkipPubSection) {
- assert(isObjCSelector(Name.getString()) && "not an objc selector");
- // Objective C method or class function.
- // "- [Class(Category) selector :withArg ...]"
- StringRef ClassNameStart(Name.getString().drop_front(2));
- size_t FirstSpace = ClassNameStart.find(' ');
- if (FirstSpace == StringRef::npos)
+ std::optional<ObjCSelectorNames> Names =
+ getObjCNamesIfSelector(Name.getString());
+ if (!Names)
return;
-
- StringRef SelectorStart(ClassNameStart.data() + FirstSpace + 1);
- if (!SelectorStart.size())
- return;
-
- StringRef Selector(SelectorStart.data(), SelectorStart.size() - 1);
- Unit.addNameAccelerator(Die, StringPool.getEntry(Selector), SkipPubSection);
-
- // Add an entry for the class name that points to this
- // method/class function.
- StringRef ClassName(ClassNameStart.data(), FirstSpace);
- Unit.addObjCAccelerator(Die, StringPool.getEntry(ClassName), SkipPubSection);
-
- if (ClassName[ClassName.size() - 1] == ')') {
- size_t OpenParens = ClassName.find('(');
- if (OpenParens != StringRef::npos) {
- StringRef ClassNameNoCategory(ClassName.data(), OpenParens);
- Unit.addObjCAccelerator(Die, StringPool.getEntry(ClassNameNoCategory),
- SkipPubSection);
-
- std::string MethodNameNoCategory(Name.getString().data(), OpenParens + 2);
- // FIXME: The missing space here may be a bug, but
- // dsymutil-classic also does it this way.
- MethodNameNoCategory.append(std::string(SelectorStart));
- Unit.addNameAccelerator(Die, StringPool.getEntry(MethodNameNoCategory),
- SkipPubSection);
- }
- }
+ Unit.addNameAccelerator(Die, StringPool.getEntry(Names->Selector),
+ SkipPubSection);
+ Unit.addObjCAccelerator(Die, StringPool.getEntry(Names->ClassName),
+ SkipPubSection);
+ if (Names->ClassNameNoCategory)
+ Unit.addObjCAccelerator(
+ Die, StringPool.getEntry(*Names->ClassNameNoCategory), SkipPubSection);
+ if (Names->MethodNameNoCategory)
+ Unit.addNameAccelerator(
+ Die, StringPool.getEntry(*Names->MethodNameNoCategory), SkipPubSection);
}
static bool
@@ -1664,9 +1653,6 @@ shouldSkipAttribute(bool Update,
// Since DW_AT_rnglists_base is used for only DW_FORM_rnglistx the
// DW_AT_rnglists_base is removed.
return !Update;
- case dwarf::DW_AT_str_offsets_base:
- // FIXME: Use the string offset table with Dwarf 5.
- return true;
case dwarf::DW_AT_loclists_base:
// In case !Update the .debug_addr table is not generated/preserved.
// Thus instead of DW_FORM_loclistx the DW_FORM_sec_offset is used.
@@ -1679,6 +1665,12 @@ shouldSkipAttribute(bool Update,
}
}
+struct AttributeLinkedOffsetFixup {
+ int64_t LinkedOffsetFixupVal;
+ uint64_t InputAttrStartOffset;
+ uint64_t InputAttrEndOffset;
+};
+
DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
const DWARFFile &File, CompileUnit &Unit,
int64_t PCOffset, uint32_t OutOffset,
@@ -1762,6 +1754,9 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
Flags |= TF_SkipPC;
}
+ std::optional<StringRef> LibraryInstallName =
+ ObjFile.Addresses->getLibraryInstallName();
+ SmallVector<AttributeLinkedOffsetFixup> AttributesFixups;
for (const auto &AttrSpec : Abbrev->attributes()) {
if (shouldSkipAttribute(Update, AttrSpec, Flags & TF_SkipPC)) {
DWARFFormValue::skipValue(AttrSpec.Form, Data, &Offset,
@@ -1769,17 +1764,41 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
continue;
}
+ AttributeLinkedOffsetFixup CurAttrFixup;
+ CurAttrFixup.InputAttrStartOffset = InputDIE.getOffset() + Offset;
+ CurAttrFixup.LinkedOffsetFixupVal =
+ Unit.getStartOffset() + OutOffset - CurAttrFixup.InputAttrStartOffset;
+
DWARFFormValue Val = AttrSpec.getFormValue();
uint64_t AttrSize = Offset;
Val.extractValue(Data, &Offset, U.getFormParams(), &U);
+ CurAttrFixup.InputAttrEndOffset = InputDIE.getOffset() + Offset;
AttrSize = Offset - AttrSize;
- OutOffset += cloneAttribute(*Die, InputDIE, File, Unit, Val, AttrSpec,
- AttrSize, AttrInfo, IsLittleEndian);
+ uint64_t FinalAttrSize =
+ cloneAttribute(*Die, InputDIE, File, Unit, Val, AttrSpec, AttrSize,
+ AttrInfo, IsLittleEndian);
+ if (FinalAttrSize != 0 && ObjFile.Addresses->needToSaveValidRelocs())
+ AttributesFixups.push_back(CurAttrFixup);
+
+ OutOffset += FinalAttrSize;
}
- // Look for accelerator entries.
uint16_t Tag = InputDIE.getTag();
+ // Add the DW_AT_APPLE_origin attribute to Compile Unit die if we have
+ // an install name and the DWARF doesn't have the attribute yet.
+ const bool NeedsAppleOrigin = (Tag == dwarf::DW_TAG_compile_unit) &&
+ LibraryInstallName.has_value() &&
+ !AttrInfo.HasAppleOrigin;
+ if (NeedsAppleOrigin) {
+ auto StringEntry = DebugStrPool.getEntry(LibraryInstallName.value());
+ Die->addValue(DIEAlloc, dwarf::Attribute(dwarf::DW_AT_APPLE_origin),
+ dwarf::DW_FORM_strp, DIEInteger(StringEntry.getOffset()));
+ AttrInfo.Name = StringEntry;
+ OutOffset += 4;
+ }
+
+ // Look for accelerator entries.
// FIXME: This is slightly wrong. An inline_subroutine without a
// low_pc, but with AT_ranges might be interesting to get into the
// accelerator tables too. For now stick with dsymutil's behavior.
@@ -1797,7 +1816,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
Unit.addNameAccelerator(Die, AttrInfo.Name,
Tag == dwarf::DW_TAG_inlined_subroutine);
}
- if (AttrInfo.Name && isObjCSelector(AttrInfo.Name.getString()))
+ if (AttrInfo.Name)
addObjCAccelerator(Unit, Die, AttrInfo.Name, DebugStrPool,
/* SkipPubSection =*/true);
@@ -1833,6 +1852,14 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
}
}
+ if (Unit.getOrigUnit().getVersion() >= 5 && !AttrInfo.AttrStrOffsetBaseSeen &&
+ Die->getTag() == dwarf::DW_TAG_compile_unit) {
+ // No DW_AT_str_offsets_base seen, add it to the DIE.
+ Die->addValue(DIEAlloc, dwarf::DW_AT_str_offsets_base,
+ dwarf::DW_FORM_sec_offset, DIEInteger(8));
+ OutOffset += 4;
+ }
+
DIEAbbrev NewAbbrev = Die->generateAbbrev();
if (HasChildren)
NewAbbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
@@ -1840,8 +1867,19 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
Linker.assignAbbrev(NewAbbrev);
Die->setAbbrevNumber(NewAbbrev.getNumber());
+ uint64_t AbbrevNumberSize = getULEB128Size(Die->getAbbrevNumber());
+
// Add the size of the abbreviation number to the output offset.
- OutOffset += getULEB128Size(Die->getAbbrevNumber());
+ OutOffset += AbbrevNumberSize;
+
+ // Update fixups with the size of the abbreviation number
+ for (AttributeLinkedOffsetFixup &F : AttributesFixups)
+ F.LinkedOffsetFixupVal += AbbrevNumberSize;
+
+ for (AttributeLinkedOffsetFixup &F : AttributesFixups)
+ ObjFile.Addresses->updateAndSaveValidRelocs(
+ Unit.getOrigUnit().getVersion() >= 5, Unit.getOrigUnit().getOffset(),
+ F.LinkedOffsetFixupVal, F.InputAttrStartOffset, F.InputAttrEndOffset);
if (!HasChildren) {
// Update our size.
@@ -1868,8 +1906,8 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
/// Patch the input object file relevant debug_ranges or debug_rnglists
/// entries and emit them in the output file. Update the relevant attributes
/// to point at the new entries.
-void DWARFLinker::generateUnitRanges(CompileUnit &Unit,
- const DWARFFile &File) const {
+void DWARFLinker::generateUnitRanges(CompileUnit &Unit, const DWARFFile &File,
+ DebugDieValuePool &AddrPool) const {
if (LLVM_UNLIKELY(Options.Update))
return;
@@ -1922,14 +1960,14 @@ void DWARFLinker::generateUnitRanges(CompileUnit &Unit,
}
// Emit linked ranges.
- TheDwarfEmitter->emitDwarfDebugRangeListFragment(Unit, LinkedRanges,
- AttributePatch);
+ TheDwarfEmitter->emitDwarfDebugRangeListFragment(
+ Unit, LinkedRanges, AttributePatch, AddrPool);
}
// Emit ranges for Unit AT_ranges attribute.
if (UnitRngListAttribute.has_value())
TheDwarfEmitter->emitDwarfDebugRangeListFragment(
- Unit, LinkedFunctionRanges, *UnitRngListAttribute);
+ Unit, LinkedFunctionRanges, *UnitRngListAttribute, AddrPool);
// Emit ranges footer.
TheDwarfEmitter->emitDwarfDebugRangeListFooter(Unit, EndLabel);
@@ -2011,13 +2049,14 @@ void DWARFLinker::DIECloner::emitDebugAddrSection(
if (DwarfVersion < 5)
return;
- if (AddrPool.Addrs.empty())
+ if (AddrPool.DieValues.empty())
return;
MCSymbol *EndLabel = Emitter->emitDwarfDebugAddrsHeader(Unit);
patchAddrBase(*Unit.getOutputUnitDIE(),
DIEInteger(Emitter->getDebugAddrSectionSize()));
- Emitter->emitDwarfDebugAddrs(AddrPool.Addrs, Unit.getOrigUnit().getAddressByteSize());
+ Emitter->emitDwarfDebugAddrs(AddrPool.DieValues,
+ Unit.getOrigUnit().getAddressByteSize());
Emitter->emitDwarfDebugAddrsFooter(Unit, EndLabel);
}
@@ -2571,7 +2610,7 @@ uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits(
if (LLVM_UNLIKELY(Linker.Options.Update))
continue;
- Linker.generateUnitRanges(*CurrentUnit, File);
+ Linker.generateUnitRanges(*CurrentUnit, File, AddrPool);
auto ProcessExpr = [&](SmallVectorImpl<uint8_t> &SrcBytes,
SmallVectorImpl<uint8_t> &OutBytes,
@@ -2617,69 +2656,6 @@ uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits(
return OutputDebugInfoSize - StartOutputDebugInfoSize;
}
-bool DWARFLinker::emitPaperTrailWarnings(const DWARFFile &File,
- OffsetsStringPool &StringPool) {
-
- if (File.Warnings.empty())
- return false;
-
- DIE *CUDie = DIE::get(DIEAlloc, dwarf::DW_TAG_compile_unit);
- CUDie->setOffset(11);
- StringRef Producer;
- StringRef WarningHeader;
-
- switch (DwarfLinkerClientID) {
- case DwarfLinkerClient::Dsymutil:
- Producer = StringPool.internString("dsymutil");
- WarningHeader = "dsymutil_warning";
- break;
-
- default:
- Producer = StringPool.internString("dwarfopt");
- WarningHeader = "dwarfopt_warning";
- break;
- }
-
- StringRef FileName = StringPool.internString(File.FileName);
- CUDie->addValue(DIEAlloc, dwarf::DW_AT_producer, dwarf::DW_FORM_strp,
- DIEInteger(StringPool.getStringOffset(Producer)));
- DIEBlock *String = new (DIEAlloc) DIEBlock();
- DIEBlocks.push_back(String);
- for (auto &C : FileName)
- String->addValue(DIEAlloc, dwarf::Attribute(0), dwarf::DW_FORM_data1,
- DIEInteger(C));
- String->addValue(DIEAlloc, dwarf::Attribute(0), dwarf::DW_FORM_data1,
- DIEInteger(0));
-
- CUDie->addValue(DIEAlloc, dwarf::DW_AT_name, dwarf::DW_FORM_string, String);
- for (const auto &Warning : File.Warnings) {
- DIE &ConstDie = CUDie->addChild(DIE::get(DIEAlloc, dwarf::DW_TAG_constant));
- ConstDie.addValue(DIEAlloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp,
- DIEInteger(StringPool.getStringOffset(WarningHeader)));
- ConstDie.addValue(DIEAlloc, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag,
- DIEInteger(1));
- ConstDie.addValue(DIEAlloc, dwarf::DW_AT_const_value, dwarf::DW_FORM_strp,
- DIEInteger(StringPool.getStringOffset(Warning)));
- }
- unsigned Size = 4 /* FORM_strp */ + FileName.size() + 1 +
- File.Warnings.size() * (4 + 1 + 4) + 1 /* End of children */;
- DIEAbbrev Abbrev = CUDie->generateAbbrev();
- assignAbbrev(Abbrev);
- CUDie->setAbbrevNumber(Abbrev.getNumber());
- Size += getULEB128Size(Abbrev.getNumber());
- // Abbreviation ordering needed for classic compatibility.
- for (auto &Child : CUDie->children()) {
- Abbrev = Child.generateAbbrev();
- assignAbbrev(Abbrev);
- Child.setAbbrevNumber(Abbrev.getNumber());
- Size += getULEB128Size(Abbrev.getNumber());
- }
- CUDie->setSize(Size);
- TheDwarfEmitter->emitPaperTrailWarningsDie(*CUDie);
-
- return true;
-}
-
void DWARFLinker::copyInvariantDebugSection(DWARFContext &Dwarf) {
TheDwarfEmitter->emitSectionContents(Dwarf.getDWARFObj().getLocSection().Data,
"debug_loc");
@@ -2731,6 +2707,7 @@ Error DWARFLinker::link() {
// reproducibility.
OffsetsStringPool DebugStrPool(StringsTranslator, true);
OffsetsStringPool DebugLineStrPool(StringsTranslator, false);
+ DebugDieValuePool StringOffsetPool;
// ODR Contexts for the optimize.
DeclContextTree ODRContexts;
@@ -2743,9 +2720,6 @@ Error DWARFLinker::link() {
outs() << "OBJECT FILE: " << OptContext.File.FileName << "\n";
}
- if (emitPaperTrailWarnings(OptContext.File, DebugStrPool))
- continue;
-
if (!OptContext.File.Dwarf)
continue;
@@ -2780,12 +2754,12 @@ Error DWARFLinker::link() {
continue;
}
- // In a first phase, just read in the debug info and load all clang modules.
+ // Clone all the clang modules with requires extracting the DIE units. We
+ // don't need the full debug info until the Analyze phase.
OptContext.CompileUnits.reserve(
OptContext.File.Dwarf->getNumCompileUnits());
-
for (const auto &CU : OptContext.File.Dwarf->compile_units()) {
- auto CUDie = CU->getUnitDIE(false);
+ auto CUDie = CU->getUnitDIE(/*ExtractUnitDIEOnly=*/true);
if (Options.Verbose) {
outs() << "Input compilation unit:";
DIDumpOptions DumpOpts;
@@ -2797,7 +2771,7 @@ Error DWARFLinker::link() {
for (auto &CU : OptContext.ModuleUnits) {
if (Error Err = cloneModuleUnit(OptContext, CU, ODRContexts, DebugStrPool,
- DebugLineStrPool))
+ DebugLineStrPool, StringOffsetPool))
reportWarning(toString(std::move(Err)), CU.File);
}
}
@@ -2826,9 +2800,9 @@ Error DWARFLinker::link() {
return;
for (const auto &CU : Context.File.Dwarf->compile_units()) {
- // The !isClangModuleRef condition effectively skips over fully resolved
- // skeleton units.
- auto CUDie = CU->getUnitDIE();
+ // Previously we only extracted the unit DIEs. We need the full debug info
+ // now.
+ auto CUDie = CU->getUnitDIE(/*ExtractUnitDIEOnly=*/false);
std::string PCMFile = getPCMFile(CUDie, Options.ObjectPrefixMap);
if (!CUDie || LLVM_UNLIKELY(Options.Update) ||
@@ -2894,7 +2868,7 @@ Error DWARFLinker::link() {
SizeByObject[OptContext.File.FileName].Output =
DIECloner(*this, TheDwarfEmitter.get(), OptContext.File, DIEAlloc,
OptContext.CompileUnits, Options.Update, DebugStrPool,
- DebugLineStrPool)
+ DebugLineStrPool, StringOffsetPool)
.cloneAllCompileUnits(*OptContext.File.Dwarf, OptContext.File,
OptContext.File.Dwarf->isLittleEndian());
}
@@ -2911,6 +2885,8 @@ Error DWARFLinker::link() {
if (TheDwarfEmitter != nullptr) {
TheDwarfEmitter->emitAbbrevs(Abbreviations, Options.TargetDWARFVersion);
TheDwarfEmitter->emitStrings(DebugStrPool);
+ TheDwarfEmitter->emitStringOffsets(StringOffsetPool.DieValues,
+ Options.TargetDWARFVersion);
TheDwarfEmitter->emitLineStrings(DebugLineStrPool);
for (AccelTableKind TableKind : Options.AccelTables) {
switch (TableKind) {
@@ -3027,6 +3003,7 @@ Error DWARFLinker::cloneModuleUnit(LinkContext &Context, RefModuleUnit &Unit,
DeclContextTree &ODRContexts,
OffsetsStringPool &DebugStrPool,
OffsetsStringPool &DebugLineStrPool,
+ DebugDieValuePool &StringOffsetPool,
unsigned Indent) {
assert(Unit.Unit.get() != nullptr);
@@ -3053,7 +3030,7 @@ Error DWARFLinker::cloneModuleUnit(LinkContext &Context, RefModuleUnit &Unit,
CompileUnits.emplace_back(std::move(Unit.Unit));
assert(TheDwarfEmitter);
DIECloner(*this, TheDwarfEmitter.get(), Unit.File, DIEAlloc, CompileUnits,
- Options.Update, DebugStrPool, DebugLineStrPool)
+ Options.Update, DebugStrPool, DebugLineStrPool, StringOffsetPool)
.cloneAllCompileUnits(*Unit.File.Dwarf, Unit.File,
Unit.File.Dwarf->isLittleEndian());
return Error::success();
@@ -3062,11 +3039,13 @@ Error DWARFLinker::cloneModuleUnit(LinkContext &Context, RefModuleUnit &Unit,
void DWARFLinker::verifyInput(const DWARFFile &File) {
assert(File.Dwarf);
- raw_ostream &os = Options.Verbose ? errs() : nulls();
+
+ std::string Buffer;
+ raw_string_ostream OS(Buffer);
DIDumpOptions DumpOpts;
- if (!File.Dwarf->verify(os, DumpOpts.noImplicitRecursion())) {
+ if (!File.Dwarf->verify(OS, DumpOpts.noImplicitRecursion())) {
if (Options.InputVerificationHandler)
- Options.InputVerificationHandler(File);
+ Options.InputVerificationHandler(File, OS.str());
}
}
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
index add0d94da73f..06559bc38c86 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
@@ -97,8 +97,10 @@ void CompileUnit::markEverythingAsKept() {
++NextIt;
switch (It->getCode()) {
+ case dwarf::DW_OP_const2u:
case dwarf::DW_OP_const4u:
case dwarf::DW_OP_const8u:
+ case dwarf::DW_OP_const2s:
case dwarf::DW_OP_const4s:
case dwarf::DW_OP_const8s:
if (NextIt == Expression.end() ||
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index fbd89dcf1ca1..cd649c328ed9 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -234,18 +234,6 @@ void DwarfStreamer::emitSectionContents(StringRef SecData, StringRef SecName) {
}
}
-/// Emit DIE containing warnings.
-void DwarfStreamer::emitPaperTrailWarningsDie(DIE &Die) {
- switchToDebugInfoSection(/* Version */ 2);
- auto &Asm = getAsmPrinter();
- Asm.emitInt32(11 + Die.getSize() - 4);
- Asm.emitInt16(2);
- Asm.emitInt32(0);
- Asm.emitInt8(MC->getTargetTriple().isArch64Bit() ? 8 : 4);
- DebugInfoSectionSize += 11;
- emitDIE(Die);
-}
-
/// Emit the debug_str section stored in \p Pool.
void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) {
Asm->OutStreamer->switchSection(MOFI->getDwarfStrSection());
@@ -258,6 +246,39 @@ void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) {
}
}
+/// Emit the debug string offset table described by \p StringOffsets into the
+/// .debug_str_offsets table.
+void DwarfStreamer::emitStringOffsets(
+ const SmallVector<uint64_t> &StringOffsets, uint16_t TargetDWARFVersion) {
+
+ if (TargetDWARFVersion < 5 || StringOffsets.empty())
+ return;
+
+ Asm->OutStreamer->switchSection(MOFI->getDwarfStrOffSection());
+
+ MCSymbol *BeginLabel = Asm->createTempSymbol("Bdebugstroff");
+ MCSymbol *EndLabel = Asm->createTempSymbol("Edebugstroff");
+
+ // Length.
+ Asm->emitLabelDifference(EndLabel, BeginLabel, sizeof(uint32_t));
+ Asm->OutStreamer->emitLabel(BeginLabel);
+ StrOffsetSectionSize += sizeof(uint32_t);
+
+ // Version.
+ MS->emitInt16(5);
+ StrOffsetSectionSize += sizeof(uint16_t);
+
+ // Padding.
+ MS->emitInt16(0);
+ StrOffsetSectionSize += sizeof(uint16_t);
+
+ for (auto Off : StringOffsets) {
+ Asm->OutStreamer->emitInt32(Off);
+ StrOffsetSectionSize += sizeof(uint32_t);
+ }
+ Asm->OutStreamer->emitLabel(EndLabel);
+}
+
/// Emit the debug_line_str section stored in \p Pool.
void DwarfStreamer::emitLineStrings(const NonRelocatableStringpool &Pool) {
Asm->OutStreamer->switchSection(MOFI->getDwarfLineStrSection());
@@ -270,14 +291,13 @@ void DwarfStreamer::emitLineStrings(const NonRelocatableStringpool &Pool) {
}
}
-void DwarfStreamer::emitDebugNames(
- AccelTable<DWARF5AccelTableStaticData> &Table) {
+void DwarfStreamer::emitDebugNames(DWARF5AccelTable &Table) {
if (EmittedUnits.empty())
return;
// Build up data structures needed to emit this section.
- std::vector<MCSymbol *> CompUnits;
- DenseMap<unsigned, size_t> UniqueIdToCuMap;
+ std::vector<std::variant<MCSymbol *, uint64_t>> CompUnits;
+ DenseMap<unsigned, unsigned> UniqueIdToCuMap;
unsigned Id = 0;
for (auto &CU : EmittedUnits) {
CompUnits.push_back(CU.LabelBegin);
@@ -286,10 +306,19 @@ void DwarfStreamer::emitDebugNames(
}
Asm->OutStreamer->switchSection(MOFI->getDwarfDebugNamesSection());
+ dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false,
+ (uint64_t)UniqueIdToCuMap.size() - 1);
+ /// llvm-dwarfutil doesn't support type units + .debug_names right now.
+ // FIXME: add support for type units + .debug_names. For now the behavior is
+ // unsuported.
emitDWARF5AccelTable(
Asm.get(), Table, CompUnits,
- [&UniqueIdToCuMap](const DWARF5AccelTableStaticData &Entry) {
- return UniqueIdToCuMap[Entry.getCUIndex()];
+ [&](const DWARF5AccelTableData &Entry)
+ -> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
+ if (UniqueIdToCuMap.size() > 1)
+ return {{UniqueIdToCuMap[Entry.getUnitID()],
+ {dwarf::DW_IDX_compile_unit, Form}}};
+ return std::nullopt;
});
}
@@ -455,13 +484,13 @@ DwarfStreamer::emitDwarfDebugRangeListHeader(const CompileUnit &Unit) {
void DwarfStreamer::emitDwarfDebugRangeListFragment(
const CompileUnit &Unit, const AddressRanges &LinkedRanges,
- PatchLocation Patch) {
+ PatchLocation Patch, DebugDieValuePool &AddrPool) {
if (Unit.getOrigUnit().getVersion() < 5) {
emitDwarfDebugRangesTableFragment(Unit, LinkedRanges, Patch);
return;
}
- emitDwarfDebugRngListsTableFragment(Unit, LinkedRanges, Patch);
+ emitDwarfDebugRngListsTableFragment(Unit, LinkedRanges, Patch, AddrPool);
}
void DwarfStreamer::emitDwarfDebugRangeListFooter(const CompileUnit &Unit,
@@ -478,25 +507,35 @@ void DwarfStreamer::emitDwarfDebugRangeListFooter(const CompileUnit &Unit,
void DwarfStreamer::emitDwarfDebugRngListsTableFragment(
const CompileUnit &Unit, const AddressRanges &LinkedRanges,
- PatchLocation Patch) {
+ PatchLocation Patch, DebugDieValuePool &AddrPool) {
Patch.set(RngListsSectionSize);
// Make .debug_rnglists to be current section.
MS->switchSection(MC->getObjectFileInfo()->getDwarfRnglistsSection());
-
- unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
+ std::optional<uint64_t> BaseAddress;
for (const AddressRange &Range : LinkedRanges) {
+
+ if (!BaseAddress) {
+ BaseAddress = Range.start();
+
+ // Emit base address.
+ MS->emitInt8(dwarf::DW_RLE_base_addressx);
+ RngListsSectionSize += 1;
+ RngListsSectionSize +=
+ MS->emitULEB128IntValue(AddrPool.getValueIndex(*BaseAddress));
+ }
+
// Emit type of entry.
- MS->emitInt8(dwarf::DW_RLE_start_length);
+ MS->emitInt8(dwarf::DW_RLE_offset_pair);
RngListsSectionSize += 1;
- // Emit start address.
- MS->emitIntValue(Range.start(), AddressSize);
- RngListsSectionSize += AddressSize;
+ // Emit start offset relative to base address.
+ RngListsSectionSize +=
+ MS->emitULEB128IntValue(Range.start() - *BaseAddress);
- // Emit length of the range.
- RngListsSectionSize += MS->emitULEB128IntValue(Range.end() - Range.start());
+ // Emit end offset relative to base address.
+ RngListsSectionSize += MS->emitULEB128IntValue(Range.end() - *BaseAddress);
}
// Emit the terminator entry.
@@ -544,7 +583,7 @@ MCSymbol *DwarfStreamer::emitDwarfDebugLocListHeader(const CompileUnit &Unit) {
void DwarfStreamer::emitDwarfDebugLocListFragment(
const CompileUnit &Unit,
const DWARFLocationExpressionsVector &LinkedLocationExpression,
- PatchLocation Patch, DebugAddrPool &AddrPool) {
+ PatchLocation Patch, DebugDieValuePool &AddrPool) {
if (Unit.getOrigUnit().getVersion() < 5) {
emitDwarfDebugLocTableFragment(Unit, LinkedLocationExpression, Patch);
return;
@@ -662,7 +701,7 @@ void DwarfStreamer::emitDwarfDebugAddrsFooter(const CompileUnit &Unit,
void DwarfStreamer::emitDwarfDebugLocListsTableFragment(
const CompileUnit &Unit,
const DWARFLocationExpressionsVector &LinkedLocationExpression,
- PatchLocation Patch, DebugAddrPool &AddrPool) {
+ PatchLocation Patch, DebugDieValuePool &AddrPool) {
Patch.set(LocListsSectionSize);
// Make .debug_loclists the current section.
@@ -681,7 +720,7 @@ void DwarfStreamer::emitDwarfDebugLocListsTableFragment(
MS->emitInt8(dwarf::DW_LLE_base_addressx);
LocListsSectionSize += 1;
LocListsSectionSize +=
- MS->emitULEB128IntValue(AddrPool.getAddrIndex(*BaseAddress));
+ MS->emitULEB128IntValue(AddrPool.getValueIndex(*BaseAddress));
}
// Emit type of entry.
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.cpp
new file mode 100644
index 000000000000..5ec25cfe5fd2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.cpp
@@ -0,0 +1,295 @@
+//=== AcceleratorRecordsSaver.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AcceleratorRecordsSaver.h"
+#include "Utils.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+#include "llvm/Support/DJB.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+static uint32_t hashFullyQualifiedName(CompileUnit &InputCU, DWARFDie &InputDIE,
+ int ChildRecurseDepth = 0) {
+ const char *Name = nullptr;
+ CompileUnit *CU = &InputCU;
+ std::optional<DWARFFormValue> RefVal;
+
+ if (Error Err = finiteLoop([&]() -> Expected<bool> {
+ if (const char *CurrentName = InputDIE.getName(DINameKind::ShortName))
+ Name = CurrentName;
+
+ if (!(RefVal = InputDIE.find(dwarf::DW_AT_specification)) &&
+ !(RefVal = InputDIE.find(dwarf::DW_AT_abstract_origin)))
+ return false;
+
+ if (!RefVal->isFormClass(DWARFFormValue::FC_Reference))
+ return false;
+
+ std::optional<UnitEntryPairTy> RefDie = CU->resolveDIEReference(
+ *RefVal, ResolveInterCUReferencesMode::Resolve);
+ if (!RefDie)
+ return false;
+
+ if (!RefDie->DieEntry)
+ return false;
+
+ CU = RefDie->CU;
+ InputDIE = RefDie->CU->getDIE(RefDie->DieEntry);
+ return true;
+ })) {
+ consumeError(std::move(Err));
+ }
+
+ if (!Name && InputDIE.getTag() == dwarf::DW_TAG_namespace)
+ Name = "(anonymous namespace)";
+
+ DWARFDie ParentDie = InputDIE.getParent();
+ if (!ParentDie.isValid() || ParentDie.getTag() == dwarf::DW_TAG_compile_unit)
+ return djbHash(Name ? Name : "", djbHash(ChildRecurseDepth ? "" : "::"));
+
+ return djbHash(
+ (Name ? Name : ""),
+ djbHash((Name ? "::" : ""),
+ hashFullyQualifiedName(*CU, ParentDie, ++ChildRecurseDepth)));
+}
+
+void AcceleratorRecordsSaver::save(const DWARFDebugInfoEntry *InputDieEntry,
+ DIE *OutDIE, AttributesInfo &AttrInfo,
+ TypeEntry *TypeEntry) {
+ if (GlobalData.getOptions().AccelTables.empty())
+ return;
+
+ DWARFDie InputDIE = InUnit.getDIE(InputDieEntry);
+
+ // Look for short name recursively if short name is not known yet.
+ if (AttrInfo.Name == nullptr)
+ if (const char *ShortName = InputDIE.getShortName())
+ AttrInfo.Name = GlobalData.getStringPool().insert(ShortName).first;
+
+ switch (InputDieEntry->getTag()) {
+ case dwarf::DW_TAG_array_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_string_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_subroutine_type:
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_ptr_to_member_type:
+ case dwarf::DW_TAG_set_type:
+ case dwarf::DW_TAG_subrange_type:
+ case dwarf::DW_TAG_base_type:
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_constant:
+ case dwarf::DW_TAG_file_type:
+ case dwarf::DW_TAG_namelist:
+ case dwarf::DW_TAG_packed_type:
+ case dwarf::DW_TAG_volatile_type:
+ case dwarf::DW_TAG_restrict_type:
+ case dwarf::DW_TAG_atomic_type:
+ case dwarf::DW_TAG_interface_type:
+ case dwarf::DW_TAG_unspecified_type:
+ case dwarf::DW_TAG_shared_type:
+ case dwarf::DW_TAG_immutable_type:
+ case dwarf::DW_TAG_rvalue_reference_type: {
+ if (!AttrInfo.IsDeclaration && AttrInfo.Name != nullptr &&
+ !AttrInfo.Name->getKey().empty()) {
+ uint32_t Hash = hashFullyQualifiedName(InUnit, InputDIE);
+
+ uint64_t RuntimeLang =
+ dwarf::toUnsigned(InputDIE.find(dwarf::DW_AT_APPLE_runtime_class))
+ .value_or(0);
+
+ bool ObjCClassIsImplementation =
+ (RuntimeLang == dwarf::DW_LANG_ObjC ||
+ RuntimeLang == dwarf::DW_LANG_ObjC_plus_plus) &&
+ dwarf::toUnsigned(
+ InputDIE.find(dwarf::DW_AT_APPLE_objc_complete_type))
+ .value_or(0);
+
+ saveTypeRecord(AttrInfo.Name, OutDIE, InputDieEntry->getTag(), Hash,
+ ObjCClassIsImplementation, TypeEntry);
+ }
+ } break;
+ case dwarf::DW_TAG_namespace: {
+ if (AttrInfo.Name == nullptr)
+ AttrInfo.Name =
+ GlobalData.getStringPool().insert("(anonymous namespace)").first;
+
+ saveNamespaceRecord(AttrInfo.Name, OutDIE, InputDieEntry->getTag(),
+ TypeEntry);
+ } break;
+ case dwarf::DW_TAG_imported_declaration: {
+ if (AttrInfo.Name != nullptr)
+ saveNamespaceRecord(AttrInfo.Name, OutDIE, InputDieEntry->getTag(),
+ TypeEntry);
+ } break;
+ case dwarf::DW_TAG_compile_unit:
+ case dwarf::DW_TAG_lexical_block: {
+ // Nothing to do.
+ } break;
+ default:
+ if (TypeEntry)
+ // Do not store this kind of accelerator entries for type entries.
+ return;
+
+ if (AttrInfo.HasLiveAddress || AttrInfo.HasRanges) {
+ if (AttrInfo.Name)
+ saveNameRecord(AttrInfo.Name, OutDIE, InputDieEntry->getTag(),
+ InputDieEntry->getTag() ==
+ dwarf::DW_TAG_inlined_subroutine);
+
+ // Look for mangled name recursively if mangled name is not known yet.
+ if (!AttrInfo.MangledName)
+ if (const char *LinkageName = InputDIE.getLinkageName())
+ AttrInfo.MangledName =
+ GlobalData.getStringPool().insert(LinkageName).first;
+
+ if (AttrInfo.MangledName && AttrInfo.MangledName != AttrInfo.Name)
+ saveNameRecord(AttrInfo.MangledName, OutDIE, InputDieEntry->getTag(),
+ InputDieEntry->getTag() ==
+ dwarf::DW_TAG_inlined_subroutine);
+
+ // Strip template parameters from the short name.
+ if (AttrInfo.Name && AttrInfo.MangledName != AttrInfo.Name &&
+ (InputDieEntry->getTag() != dwarf::DW_TAG_inlined_subroutine)) {
+ if (std::optional<StringRef> Name =
+ StripTemplateParameters(AttrInfo.Name->getKey())) {
+ StringEntry *NameWithoutTemplateParams =
+ GlobalData.getStringPool().insert(*Name).first;
+
+ saveNameRecord(NameWithoutTemplateParams, OutDIE,
+ InputDieEntry->getTag(), true);
+ }
+ }
+
+ if (AttrInfo.Name)
+ saveObjC(InputDieEntry, OutDIE, AttrInfo);
+ }
+ break;
+ }
+}
+
+void AcceleratorRecordsSaver::saveObjC(const DWARFDebugInfoEntry *InputDieEntry,
+ DIE *OutDIE, AttributesInfo &AttrInfo) {
+ std::optional<ObjCSelectorNames> Names =
+ getObjCNamesIfSelector(AttrInfo.Name->getKey());
+ if (!Names)
+ return;
+
+ StringEntry *Selector =
+ GlobalData.getStringPool().insert(Names->Selector).first;
+ saveNameRecord(Selector, OutDIE, InputDieEntry->getTag(), true);
+ StringEntry *ClassName =
+ GlobalData.getStringPool().insert(Names->ClassName).first;
+ saveObjCNameRecord(ClassName, OutDIE, InputDieEntry->getTag());
+ if (Names->ClassNameNoCategory) {
+ StringEntry *ClassNameNoCategory =
+ GlobalData.getStringPool().insert(*Names->ClassNameNoCategory).first;
+ saveObjCNameRecord(ClassNameNoCategory, OutDIE, InputDieEntry->getTag());
+ }
+ if (Names->MethodNameNoCategory) {
+ StringEntry *MethodNameNoCategory =
+ GlobalData.getStringPool().insert(*Names->MethodNameNoCategory).first;
+ saveNameRecord(MethodNameNoCategory, OutDIE, InputDieEntry->getTag(), true);
+ }
+}
+
+void AcceleratorRecordsSaver::saveNameRecord(StringEntry *Name, DIE *OutDIE,
+ dwarf::Tag Tag,
+ bool AvoidForPubSections) {
+ DwarfUnit::AccelInfo Info;
+
+ Info.Type = DwarfUnit::AccelType::Name;
+ Info.String = Name;
+ Info.OutOffset = OutDIE->getOffset();
+ Info.Tag = Tag;
+ Info.AvoidForPubSections = AvoidForPubSections;
+
+ OutUnit.getAsCompileUnit()->saveAcceleratorInfo(Info);
+}
+void AcceleratorRecordsSaver::saveNamespaceRecord(StringEntry *Name,
+ DIE *OutDIE, dwarf::Tag Tag,
+ TypeEntry *TypeEntry) {
+ if (OutUnit.isCompileUnit()) {
+ assert(TypeEntry == nullptr);
+ DwarfUnit::AccelInfo Info;
+
+ Info.Type = DwarfUnit::AccelType::Namespace;
+ Info.String = Name;
+ Info.OutOffset = OutDIE->getOffset();
+ Info.Tag = Tag;
+
+ OutUnit.getAsCompileUnit()->saveAcceleratorInfo(Info);
+ return;
+ }
+
+ assert(TypeEntry != nullptr);
+ TypeUnit::TypeUnitAccelInfo Info;
+ Info.Type = DwarfUnit::AccelType::Namespace;
+ Info.String = Name;
+ Info.OutOffset = 0xbaddef;
+ Info.Tag = Tag;
+ Info.OutDIE = OutDIE;
+ Info.TypeEntryBodyPtr = TypeEntry->getValue().load();
+
+ OutUnit.getAsTypeUnit()->saveAcceleratorInfo(Info);
+}
+
+void AcceleratorRecordsSaver::saveObjCNameRecord(StringEntry *Name, DIE *OutDIE,
+ dwarf::Tag Tag) {
+ DwarfUnit::AccelInfo Info;
+
+ Info.Type = DwarfUnit::AccelType::ObjC;
+ Info.String = Name;
+ Info.OutOffset = OutDIE->getOffset();
+ Info.Tag = Tag;
+ Info.AvoidForPubSections = true;
+
+ OutUnit.getAsCompileUnit()->saveAcceleratorInfo(Info);
+}
+
+void AcceleratorRecordsSaver::saveTypeRecord(StringEntry *Name, DIE *OutDIE,
+ dwarf::Tag Tag,
+ uint32_t QualifiedNameHash,
+ bool ObjcClassImplementation,
+ TypeEntry *TypeEntry) {
+ if (OutUnit.isCompileUnit()) {
+ assert(TypeEntry == nullptr);
+ DwarfUnit::AccelInfo Info;
+
+ Info.Type = DwarfUnit::AccelType::Type;
+ Info.String = Name;
+ Info.OutOffset = OutDIE->getOffset();
+ Info.Tag = Tag;
+ Info.QualifiedNameHash = QualifiedNameHash;
+ Info.ObjcClassImplementation = ObjcClassImplementation;
+
+ OutUnit.getAsCompileUnit()->saveAcceleratorInfo(Info);
+ return;
+ }
+
+ assert(TypeEntry != nullptr);
+ TypeUnit::TypeUnitAccelInfo Info;
+
+ Info.Type = DwarfUnit::AccelType::Type;
+ Info.String = Name;
+ Info.OutOffset = 0xbaddef;
+ Info.Tag = Tag;
+ Info.QualifiedNameHash = QualifiedNameHash;
+ Info.ObjcClassImplementation = ObjcClassImplementation;
+ Info.OutDIE = OutDIE;
+ Info.TypeEntryBodyPtr = TypeEntry->getValue().load();
+ OutUnit.getAsTypeUnit()->saveAcceleratorInfo(Info);
+}
+
+} // end of namespace dwarflinker_parallel
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.h
new file mode 100644
index 000000000000..5e7f4d0c3166
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.h
@@ -0,0 +1,70 @@
+//===- AcceleratorRecordsSaver.h --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_ACCELERATORRECORDSSAVER_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_ACCELERATORRECORDSSAVER_H
+
+#include "DIEAttributeCloner.h"
+#include "DWARFLinkerCompileUnit.h"
+#include "DWARFLinkerGlobalData.h"
+#include "DWARFLinkerTypeUnit.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// This class helps to store information for accelerator entries.
+/// It prepares accelerator info for the certain DIE and store it inside
+/// OutUnit.
+class AcceleratorRecordsSaver {
+public:
+ AcceleratorRecordsSaver(LinkingGlobalData &GlobalData, CompileUnit &InUnit,
+ CompileUnit *OutUnit)
+ : AcceleratorRecordsSaver(GlobalData, InUnit,
+ CompileUnit::OutputUnitVariantPtr(OutUnit)) {}
+
+ AcceleratorRecordsSaver(LinkingGlobalData &GlobalData, CompileUnit &InUnit,
+ TypeUnit *OutUnit)
+ : AcceleratorRecordsSaver(GlobalData, InUnit,
+ CompileUnit::OutputUnitVariantPtr(OutUnit)) {}
+
+ /// Save accelerator info for the specified \p OutDIE inside OutUnit.
+ /// Side effects: set attributes in \p AttrInfo.
+ void save(const DWARFDebugInfoEntry *InputDieEntry, DIE *OutDIE,
+ AttributesInfo &AttrInfo, TypeEntry *TypeEntry);
+
+protected:
+ AcceleratorRecordsSaver(LinkingGlobalData &GlobalData, CompileUnit &InUnit,
+ CompileUnit::OutputUnitVariantPtr OutUnit)
+ : GlobalData(GlobalData), InUnit(InUnit), OutUnit(OutUnit) {}
+
+ void saveObjC(const DWARFDebugInfoEntry *InputDieEntry, DIE *OutDIE,
+ AttributesInfo &AttrInfo);
+
+ void saveNameRecord(StringEntry *Name, DIE *OutDIE, dwarf::Tag Tag,
+ bool AvoidForPubSections);
+ void saveNamespaceRecord(StringEntry *Name, DIE *OutDIE, dwarf::Tag Tag,
+ TypeEntry *TypeEntry);
+ void saveObjCNameRecord(StringEntry *Name, DIE *OutDIE, dwarf::Tag Tag);
+ void saveTypeRecord(StringEntry *Name, DIE *OutDIE, dwarf::Tag Tag,
+ uint32_t QualifiedNameHash, bool ObjcClassImplementation,
+ TypeEntry *TypeEntry);
+
+ /// Global linking data.
+ LinkingGlobalData &GlobalData;
+
+ /// Comiple unit corresponding to input DWARF.
+ CompileUnit &InUnit;
+
+ /// Compile unit or Artificial type unit corresponding to the output DWARF.
+ CompileUnit::OutputUnitVariantPtr OutUnit;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_ACCELERATORRECORDSSAVER_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/ArrayList.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/ArrayList.h
new file mode 100644
index 000000000000..def83f91bc6f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/ArrayList.h
@@ -0,0 +1,165 @@
+//===- ArrayList.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_ARRAYLIST_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_ARRAYLIST_H
+
+#include "llvm/Support/PerThreadBumpPtrAllocator.h"
+#include <atomic>
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// This class is a simple list of T structures. It keeps elements as
+/// pre-allocated groups to save memory for each element's next pointer.
+/// It allocates internal data using specified per-thread BumpPtrAllocator.
+/// Method add() can be called asynchronously.
+template <typename T, size_t ItemsGroupSize = 512> class ArrayList {
+public:
+ ArrayList(parallel::PerThreadBumpPtrAllocator *Allocator)
+ : Allocator(Allocator) {}
+
+ /// Add specified \p Item to the list.
+ T &add(const T &Item) {
+ assert(Allocator);
+
+ // Allocate head group if it is not allocated yet.
+ while (!LastGroup) {
+ if (allocateNewGroup(GroupsHead))
+ LastGroup = GroupsHead.load();
+ }
+
+ ItemsGroup *CurGroup;
+ size_t CurItemsCount;
+ do {
+ CurGroup = LastGroup;
+ CurItemsCount = CurGroup->ItemsCount.fetch_add(1);
+
+ // Check whether current group is full.
+ if (CurItemsCount < ItemsGroupSize)
+ break;
+
+ // Allocate next group if necessary.
+ if (!CurGroup->Next)
+ allocateNewGroup(CurGroup->Next);
+
+ LastGroup.compare_exchange_weak(CurGroup, CurGroup->Next);
+ } while (true);
+
+ // Store item into the current group.
+ CurGroup->Items[CurItemsCount] = Item;
+ return CurGroup->Items[CurItemsCount];
+ }
+
+ using ItemHandlerTy = function_ref<void(T &)>;
+
+ /// Enumerate all items and apply specified \p Handler to each.
+ void forEach(ItemHandlerTy Handler) {
+ for (ItemsGroup *CurGroup = GroupsHead; CurGroup;
+ CurGroup = CurGroup->Next) {
+ for (T &Item : *CurGroup)
+ Handler(Item);
+ }
+ }
+
+ /// Check whether list is empty.
+ bool empty() { return !GroupsHead; }
+
+ /// Erase list.
+ void erase() {
+ GroupsHead = nullptr;
+ LastGroup = nullptr;
+ }
+
+ void sort(function_ref<bool(const T &LHS, const T &RHS)> Comparator) {
+ SmallVector<T> SortedItems;
+ forEach([&](T &Item) { SortedItems.push_back(Item); });
+
+ if (SortedItems.size()) {
+ std::sort(SortedItems.begin(), SortedItems.end(), Comparator);
+
+ size_t SortedItemIdx = 0;
+ forEach([&](T &Item) { Item = SortedItems[SortedItemIdx++]; });
+ assert(SortedItemIdx == SortedItems.size());
+ }
+ }
+
+ size_t size() {
+ size_t Result = 0;
+
+ for (ItemsGroup *CurGroup = GroupsHead; CurGroup != nullptr;
+ CurGroup = CurGroup->Next)
+ Result += CurGroup->getItemsCount();
+
+ return Result;
+ }
+
+protected:
+ struct ItemsGroup {
+ using ArrayTy = std::array<T, ItemsGroupSize>;
+
+ // Array of items kept by this group.
+ ArrayTy Items;
+
+ // Pointer to the next items group.
+ std::atomic<ItemsGroup *> Next = nullptr;
+
+ // Number of items in this group.
+ // NOTE: ItemsCount could be inaccurate as it might be incremented by
+ // several threads. Use getItemsCount() method to get real number of items
+ // inside ItemsGroup.
+ std::atomic<size_t> ItemsCount = 0;
+
+ size_t getItemsCount() const {
+ return std::min(ItemsCount.load(), ItemsGroupSize);
+ }
+
+ typename ArrayTy::iterator begin() { return Items.begin(); }
+ typename ArrayTy::iterator end() { return Items.begin() + getItemsCount(); }
+ };
+
+ // Allocate new group. Put allocated group into the \p AtomicGroup if
+ // it is empty. If \p AtomicGroup is filled by another thread then
+ // put allocated group into the end of groups list.
+ // \returns true if allocated group is put into the \p AtomicGroup.
+ bool allocateNewGroup(std::atomic<ItemsGroup *> &AtomicGroup) {
+ ItemsGroup *CurGroup = nullptr;
+
+ // Allocate new group.
+ ItemsGroup *NewGroup = Allocator->Allocate<ItemsGroup>();
+ NewGroup->ItemsCount = 0;
+ NewGroup->Next = nullptr;
+
+ // Try to replace current group with allocated one.
+ if (AtomicGroup.compare_exchange_weak(CurGroup, NewGroup))
+ return true;
+
+ // Put allocated group as last group.
+ while (CurGroup) {
+ ItemsGroup *NextGroup = CurGroup->Next;
+
+ if (!NextGroup) {
+ if (CurGroup->Next.compare_exchange_weak(NextGroup, NewGroup))
+ break;
+ }
+
+ CurGroup = NextGroup;
+ }
+
+ return false;
+ }
+
+ std::atomic<ItemsGroup *> GroupsHead = nullptr;
+ std::atomic<ItemsGroup *> LastGroup = nullptr;
+ parallel::PerThreadBumpPtrAllocator *Allocator = nullptr;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_ARRAYLIST_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.cpp
new file mode 100644
index 000000000000..81fc57f7cabb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.cpp
@@ -0,0 +1,655 @@
+//=== DIEAttributeCloner.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIEAttributeCloner.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+void DIEAttributeCloner::clone() {
+ // Extract and clone every attribute.
+ DWARFDataExtractor Data = InUnit.getOrigUnit().getDebugInfoExtractor();
+
+ uint64_t Offset = InputDieEntry->getOffset();
+ // Point to the next DIE (generally there is always at least a NULL
+ // entry after the current one). If this is a lone
+ // DW_TAG_compile_unit without any children, point to the next unit.
+ uint64_t NextOffset = (InputDIEIdx + 1 < InUnit.getOrigUnit().getNumDIEs())
+ ? InUnit.getDIEAtIndex(InputDIEIdx + 1).getOffset()
+ : InUnit.getOrigUnit().getNextUnitOffset();
+
+ // We could copy the data only if we need to apply a relocation to it. After
+ // testing, it seems there is no performance downside to doing the copy
+ // unconditionally, and it makes the code simpler.
+ SmallString<40> DIECopy(Data.getData().substr(Offset, NextOffset - Offset));
+ Data =
+ DWARFDataExtractor(DIECopy, Data.isLittleEndian(), Data.getAddressSize());
+
+ // Modify the copy with relocated addresses.
+ InUnit.getContaingFile().Addresses->applyValidRelocs(DIECopy, Offset,
+ Data.isLittleEndian());
+
+ // Reset the Offset to 0 as we will be working on the local copy of
+ // the data.
+ Offset = 0;
+
+ const auto *Abbrev = InputDieEntry->getAbbreviationDeclarationPtr();
+ Offset += getULEB128Size(Abbrev->getCode());
+
+ // Set current output offset.
+ AttrOutOffset = OutUnit.isCompileUnit() ? OutDIE->getOffset() : 0;
+ for (const auto &AttrSpec : Abbrev->attributes()) {
+ // Check whether current attribute should be skipped.
+ if (shouldSkipAttribute(AttrSpec)) {
+ DWARFFormValue::skipValue(AttrSpec.Form, Data, &Offset,
+ InUnit.getFormParams());
+ continue;
+ }
+
+ DWARFFormValue Val = AttrSpec.getFormValue();
+ Val.extractValue(Data, &Offset, InUnit.getFormParams(),
+ &InUnit.getOrigUnit());
+
+ // Clone current attribute.
+ switch (AttrSpec.Form) {
+ case dwarf::DW_FORM_strp:
+ case dwarf::DW_FORM_line_strp:
+ case dwarf::DW_FORM_string:
+ case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_strx3:
+ case dwarf::DW_FORM_strx4:
+ AttrOutOffset += cloneStringAttr(Val, AttrSpec);
+ break;
+ case dwarf::DW_FORM_ref_addr:
+ case dwarf::DW_FORM_ref1:
+ case dwarf::DW_FORM_ref2:
+ case dwarf::DW_FORM_ref4:
+ case dwarf::DW_FORM_ref8:
+ case dwarf::DW_FORM_ref_udata:
+ AttrOutOffset += cloneDieRefAttr(Val, AttrSpec);
+ break;
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_udata:
+ case dwarf::DW_FORM_sdata:
+ case dwarf::DW_FORM_sec_offset:
+ case dwarf::DW_FORM_flag:
+ case dwarf::DW_FORM_flag_present:
+ case dwarf::DW_FORM_rnglistx:
+ case dwarf::DW_FORM_loclistx:
+ case dwarf::DW_FORM_implicit_const:
+ AttrOutOffset += cloneScalarAttr(Val, AttrSpec);
+ break;
+ case dwarf::DW_FORM_block:
+ case dwarf::DW_FORM_block1:
+ case dwarf::DW_FORM_block2:
+ case dwarf::DW_FORM_block4:
+ case dwarf::DW_FORM_exprloc:
+ AttrOutOffset += cloneBlockAttr(Val, AttrSpec);
+ break;
+ case dwarf::DW_FORM_addr:
+ case dwarf::DW_FORM_addrx:
+ case dwarf::DW_FORM_addrx1:
+ case dwarf::DW_FORM_addrx2:
+ case dwarf::DW_FORM_addrx3:
+ case dwarf::DW_FORM_addrx4:
+ AttrOutOffset += cloneAddressAttr(Val, AttrSpec);
+ break;
+ default:
+ InUnit.warn("unsupported attribute form " +
+ dwarf::FormEncodingString(AttrSpec.Form) +
+ " in DieAttributeCloner::clone(). Dropping.",
+ InputDieEntry);
+ }
+ }
+
+ // We convert source strings into the indexed form for DWARFv5.
+ // Check if original compile unit already has DW_AT_str_offsets_base
+ // attribute.
+ if (InputDieEntry->getTag() == dwarf::DW_TAG_compile_unit &&
+ InUnit.getVersion() >= 5 && !AttrInfo.HasStringOffsetBaseAttr) {
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugOffsetPatch{AttrOutOffset,
+ &OutUnit->getOrCreateSectionDescriptor(
+ DebugSectionKind::DebugStrOffsets),
+ true},
+ PatchesOffsets);
+
+ AttrOutOffset +=
+ Generator
+ .addScalarAttribute(dwarf::DW_AT_str_offsets_base,
+ dwarf::DW_FORM_sec_offset,
+ OutUnit->getDebugStrOffsetsHeaderSize())
+ .second;
+ }
+}
+
+bool DIEAttributeCloner::shouldSkipAttribute(
+ DWARFAbbreviationDeclaration::AttributeSpec AttrSpec) {
+ switch (AttrSpec.Attr) {
+ default:
+ return false;
+ case dwarf::DW_AT_low_pc:
+ case dwarf::DW_AT_high_pc:
+ case dwarf::DW_AT_ranges:
+ if (InUnit.getGlobalData().getOptions().UpdateIndexTablesOnly)
+ return false;
+
+ // Skip address attribute if we are in function scope and function does not
+ // reference live address.
+ return InUnit.getDIEInfo(InputDIEIdx).getIsInFunctionScope() &&
+ !FuncAddressAdjustment.has_value();
+ case dwarf::DW_AT_rnglists_base:
+ // In case !Update the .debug_addr table is not generated/preserved.
+ // Thus instead of DW_FORM_rnglistx the DW_FORM_sec_offset is used.
+ // Since DW_AT_rnglists_base is used for only DW_FORM_rnglistx the
+ // DW_AT_rnglists_base is removed.
+ return !InUnit.getGlobalData().getOptions().UpdateIndexTablesOnly;
+ case dwarf::DW_AT_loclists_base:
+ // In case !Update the .debug_addr table is not generated/preserved.
+ // Thus instead of DW_FORM_loclistx the DW_FORM_sec_offset is used.
+ // Since DW_AT_loclists_base is used for only DW_FORM_loclistx the
+ // DW_AT_loclists_base is removed.
+ return !InUnit.getGlobalData().getOptions().UpdateIndexTablesOnly;
+ case dwarf::DW_AT_location:
+ case dwarf::DW_AT_frame_base:
+ if (InUnit.getGlobalData().getOptions().UpdateIndexTablesOnly)
+ return false;
+
+ // When location expression contains an address: skip this attribute
+ // if it does not reference live address.
+ if (HasLocationExpressionAddress)
+ return !VarAddressAdjustment.has_value();
+
+ // Skip location attribute if we are in function scope and function does not
+ // reference live address.
+ return InUnit.getDIEInfo(InputDIEIdx).getIsInFunctionScope() &&
+ !FuncAddressAdjustment.has_value();
+ }
+}
+
+size_t DIEAttributeCloner::cloneStringAttr(
+ const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec) {
+ std::optional<const char *> String = dwarf::toString(Val);
+ if (!String) {
+ InUnit.warn("cann't read string attribute.");
+ return 0;
+ }
+
+ StringEntry *StringInPool =
+ InUnit.getGlobalData().getStringPool().insert(*String).first;
+
+ // Update attributes info.
+ if (AttrSpec.Attr == dwarf::DW_AT_name)
+ AttrInfo.Name = StringInPool;
+ else if (AttrSpec.Attr == dwarf::DW_AT_MIPS_linkage_name ||
+ AttrSpec.Attr == dwarf::DW_AT_linkage_name)
+ AttrInfo.MangledName = StringInPool;
+
+ if (AttrSpec.Form == dwarf::DW_FORM_line_strp) {
+ if (OutUnit.isTypeUnit()) {
+ DebugInfoOutputSection.notePatch(DebugTypeLineStrPatch{
+ AttrOutOffset, OutDIE, InUnit.getDieTypeEntry(InputDIEIdx),
+ StringInPool});
+ } else {
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugLineStrPatch{{AttrOutOffset}, StringInPool}, PatchesOffsets);
+ }
+ return Generator
+ .addStringPlaceholderAttribute(AttrSpec.Attr, dwarf::DW_FORM_line_strp)
+ .second;
+ }
+
+ if (Use_DW_FORM_strp) {
+ if (OutUnit.isTypeUnit()) {
+ DebugInfoOutputSection.notePatch(
+ DebugTypeStrPatch{AttrOutOffset, OutDIE,
+ InUnit.getDieTypeEntry(InputDIEIdx), StringInPool});
+ } else {
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugStrPatch{{AttrOutOffset}, StringInPool}, PatchesOffsets);
+ }
+
+ return Generator
+ .addStringPlaceholderAttribute(AttrSpec.Attr, dwarf::DW_FORM_strp)
+ .second;
+ }
+
+ return Generator
+ .addIndexedStringAttribute(AttrSpec.Attr, dwarf::DW_FORM_strx,
+ OutUnit->getDebugStrIndex(StringInPool))
+ .second;
+}
+
+size_t DIEAttributeCloner::cloneDieRefAttr(
+ const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec) {
+ if (AttrSpec.Attr == dwarf::DW_AT_sibling)
+ return 0;
+
+ std::optional<UnitEntryPairTy> RefDiePair =
+ InUnit.resolveDIEReference(Val, ResolveInterCUReferencesMode::Resolve);
+ if (!RefDiePair || !RefDiePair->DieEntry) {
+ // If the referenced DIE is not found, drop the attribute.
+ InUnit.warn("cann't find referenced DIE.", InputDieEntry);
+ return 0;
+ }
+
+ TypeEntry *RefTypeName = nullptr;
+ const CompileUnit::DIEInfo &RefDIEInfo =
+ RefDiePair->CU->getDIEInfo(RefDiePair->DieEntry);
+ if (RefDIEInfo.needToPlaceInTypeTable())
+ RefTypeName = RefDiePair->CU->getDieTypeEntry(RefDiePair->DieEntry);
+
+ if (OutUnit.isTypeUnit()) {
+ assert(RefTypeName && "Type name for referenced DIE is not set");
+ assert(InUnit.getDieTypeEntry(InputDIEIdx) &&
+ "Type name for DIE is not set");
+
+ DebugInfoOutputSection.notePatch(DebugType2TypeDieRefPatch{
+ AttrOutOffset, OutDIE, InUnit.getDieTypeEntry(InputDIEIdx),
+ RefTypeName});
+
+ return Generator
+ .addScalarAttribute(AttrSpec.Attr, dwarf::DW_FORM_ref4, 0xBADDEF)
+ .second;
+ }
+
+ if (RefTypeName) {
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugDieTypeRefPatch{AttrOutOffset, RefTypeName}, PatchesOffsets);
+
+ return Generator
+ .addScalarAttribute(AttrSpec.Attr, dwarf::DW_FORM_ref_addr, 0xBADDEF)
+ .second;
+ }
+
+ // Get output offset for referenced DIE.
+ uint64_t OutDieOffset = RefDiePair->CU->getDieOutOffset(RefDiePair->DieEntry);
+
+ // Examine whether referenced DIE is in current compile unit.
+ bool IsLocal = OutUnit->getUniqueID() == RefDiePair->CU->getUniqueID();
+
+ // Set attribute form basing on the kind of referenced DIE(local or not?).
+ dwarf::Form NewForm = IsLocal ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr;
+
+ // Check whether current attribute references already cloned DIE inside
+ // the same compilation unit. If true - write the already known offset value.
+ if (IsLocal && (OutDieOffset != 0))
+ return Generator.addScalarAttribute(AttrSpec.Attr, NewForm, OutDieOffset)
+ .second;
+
+ // If offset value is not known at this point then create patch for the
+ // reference value and write dummy value into the attribute.
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugDieRefPatch{AttrOutOffset, OutUnit.getAsCompileUnit(),
+ RefDiePair->CU,
+ RefDiePair->CU->getDIEIndex(RefDiePair->DieEntry)},
+ PatchesOffsets);
+ return Generator.addScalarAttribute(AttrSpec.Attr, NewForm, 0xBADDEF).second;
+}
+
+size_t DIEAttributeCloner::cloneScalarAttr(
+ const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec) {
+
+ // Create patches for attribute referencing other non invariant section.
+ // Invariant section could not be updated here as this section and
+ // reference to it do not change value in case --update.
+ switch (AttrSpec.Attr) {
+ case dwarf::DW_AT_macro_info: {
+ if (std::optional<uint64_t> Offset = Val.getAsSectionOffset()) {
+ const DWARFDebugMacro *Macro =
+ InUnit.getContaingFile().Dwarf->getDebugMacinfo();
+ if (Macro == nullptr || !Macro->hasEntryForOffset(*Offset))
+ return 0;
+
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugOffsetPatch{AttrOutOffset,
+ &OutUnit->getOrCreateSectionDescriptor(
+ DebugSectionKind::DebugMacinfo)},
+ PatchesOffsets);
+ }
+ } break;
+ case dwarf::DW_AT_macros: {
+ if (std::optional<uint64_t> Offset = Val.getAsSectionOffset()) {
+ const DWARFDebugMacro *Macro =
+ InUnit.getContaingFile().Dwarf->getDebugMacro();
+ if (Macro == nullptr || !Macro->hasEntryForOffset(*Offset))
+ return 0;
+
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugOffsetPatch{AttrOutOffset,
+ &OutUnit->getOrCreateSectionDescriptor(
+ DebugSectionKind::DebugMacro)},
+ PatchesOffsets);
+ }
+ } break;
+ case dwarf::DW_AT_stmt_list: {
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugOffsetPatch{AttrOutOffset, &OutUnit->getOrCreateSectionDescriptor(
+ DebugSectionKind::DebugLine)},
+ PatchesOffsets);
+ } break;
+ case dwarf::DW_AT_str_offsets_base: {
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugOffsetPatch{AttrOutOffset,
+ &OutUnit->getOrCreateSectionDescriptor(
+ DebugSectionKind::DebugStrOffsets),
+ true},
+ PatchesOffsets);
+
+ // Use size of .debug_str_offsets header as attribute value. The offset
+ // to .debug_str_offsets would be added later while patching.
+ AttrInfo.HasStringOffsetBaseAttr = true;
+ return Generator
+ .addScalarAttribute(AttrSpec.Attr, AttrSpec.Form,
+ OutUnit->getDebugStrOffsetsHeaderSize())
+ .second;
+ } break;
+ case dwarf::DW_AT_decl_file: {
+ // Value of DW_AT_decl_file may exceed original form. Longer
+ // form can affect offsets to the following attributes. To not
+ // update offsets of the following attributes we always remove
+ // original DW_AT_decl_file and attach it to the last position
+ // later.
+ if (OutUnit.isTypeUnit()) {
+ if (std::optional<std::pair<StringRef, StringRef>> DirAndFilename =
+ InUnit.getDirAndFilenameFromLineTable(Val))
+ DebugInfoOutputSection.notePatch(DebugTypeDeclFilePatch{
+ OutDIE,
+ InUnit.getDieTypeEntry(InputDIEIdx),
+ OutUnit->getGlobalData()
+ .getStringPool()
+ .insert(DirAndFilename->first)
+ .first,
+ OutUnit->getGlobalData()
+ .getStringPool()
+ .insert(DirAndFilename->second)
+ .first,
+ });
+ return 0;
+ }
+ } break;
+ default: {
+ } break;
+ };
+
+ uint64_t Value;
+ if (AttrSpec.Attr == dwarf::DW_AT_const_value &&
+ (InputDieEntry->getTag() == dwarf::DW_TAG_variable ||
+ InputDieEntry->getTag() == dwarf::DW_TAG_constant))
+ AttrInfo.HasLiveAddress = true;
+
+ if (InUnit.getGlobalData().getOptions().UpdateIndexTablesOnly) {
+ if (auto OptionalValue = Val.getAsUnsignedConstant())
+ Value = *OptionalValue;
+ else if (auto OptionalValue = Val.getAsSignedConstant())
+ Value = *OptionalValue;
+ else if (auto OptionalValue = Val.getAsSectionOffset())
+ Value = *OptionalValue;
+ else {
+ InUnit.warn("unsupported scalar attribute form. Dropping attribute.",
+ InputDieEntry);
+ return 0;
+ }
+
+ if (AttrSpec.Attr == dwarf::DW_AT_declaration && Value)
+ AttrInfo.IsDeclaration = true;
+
+ if (AttrSpec.Form == dwarf::DW_FORM_loclistx)
+ return Generator.addLocListAttribute(AttrSpec.Attr, AttrSpec.Form, Value)
+ .second;
+
+ return Generator.addScalarAttribute(AttrSpec.Attr, AttrSpec.Form, Value)
+ .second;
+ }
+
+ dwarf::Form ResultingForm = AttrSpec.Form;
+ if (AttrSpec.Form == dwarf::DW_FORM_rnglistx) {
+ // DWARFLinker does not generate .debug_addr table. Thus we need to change
+ // all "addrx" related forms to "addr" version. Change DW_FORM_rnglistx
+ // to DW_FORM_sec_offset here.
+ std::optional<uint64_t> Index = Val.getAsSectionOffset();
+ if (!Index) {
+ InUnit.warn("cann't read the attribute. Dropping.", InputDieEntry);
+ return 0;
+ }
+ std::optional<uint64_t> Offset =
+ InUnit.getOrigUnit().getRnglistOffset(*Index);
+ if (!Offset) {
+ InUnit.warn("cann't read the attribute. Dropping.", InputDieEntry);
+ return 0;
+ }
+
+ Value = *Offset;
+ ResultingForm = dwarf::DW_FORM_sec_offset;
+ } else if (AttrSpec.Form == dwarf::DW_FORM_loclistx) {
+ // DWARFLinker does not generate .debug_addr table. Thus we need to change
+ // all "addrx" related forms to "addr" version. Change DW_FORM_loclistx
+ // to DW_FORM_sec_offset here.
+ std::optional<uint64_t> Index = Val.getAsSectionOffset();
+ if (!Index) {
+ InUnit.warn("cann't read the attribute. Dropping.", InputDieEntry);
+ return 0;
+ }
+ std::optional<uint64_t> Offset =
+ InUnit.getOrigUnit().getLoclistOffset(*Index);
+ if (!Offset) {
+ InUnit.warn("cann't read the attribute. Dropping.", InputDieEntry);
+ return 0;
+ }
+
+ Value = *Offset;
+ ResultingForm = dwarf::DW_FORM_sec_offset;
+ } else if (AttrSpec.Attr == dwarf::DW_AT_high_pc &&
+ InputDieEntry->getTag() == dwarf::DW_TAG_compile_unit) {
+ if (!OutUnit.isCompileUnit())
+ return 0;
+
+ std::optional<uint64_t> LowPC = OutUnit.getAsCompileUnit()->getLowPc();
+ if (!LowPC)
+ return 0;
+ // Dwarf >= 4 high_pc is an size, not an address.
+ Value = OutUnit.getAsCompileUnit()->getHighPc() - *LowPC;
+ } else if (AttrSpec.Form == dwarf::DW_FORM_sec_offset)
+ Value = *Val.getAsSectionOffset();
+ else if (AttrSpec.Form == dwarf::DW_FORM_sdata)
+ Value = *Val.getAsSignedConstant();
+ else if (auto OptionalValue = Val.getAsUnsignedConstant())
+ Value = *OptionalValue;
+ else {
+ InUnit.warn("unsupported scalar attribute form. Dropping attribute.",
+ InputDieEntry);
+ return 0;
+ }
+
+ if (AttrSpec.Attr == dwarf::DW_AT_ranges ||
+ AttrSpec.Attr == dwarf::DW_AT_start_scope) {
+ // Create patch for the range offset value.
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugRangePatch{{AttrOutOffset},
+ InputDieEntry->getTag() == dwarf::DW_TAG_compile_unit},
+ PatchesOffsets);
+ AttrInfo.HasRanges = true;
+ } else if (DWARFAttribute::mayHaveLocationList(AttrSpec.Attr) &&
+ dwarf::doesFormBelongToClass(AttrSpec.Form,
+ DWARFFormValue::FC_SectionOffset,
+ InUnit.getOrigUnit().getVersion())) {
+ int64_t AddrAdjustmentValue = 0;
+ if (VarAddressAdjustment)
+ AddrAdjustmentValue = *VarAddressAdjustment;
+ else if (FuncAddressAdjustment)
+ AddrAdjustmentValue = *FuncAddressAdjustment;
+
+ // Create patch for the location offset value.
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugLocPatch{{AttrOutOffset}, AddrAdjustmentValue}, PatchesOffsets);
+ } else if (AttrSpec.Attr == dwarf::DW_AT_addr_base) {
+ DebugInfoOutputSection.notePatchWithOffsetUpdate(
+ DebugOffsetPatch{
+ AttrOutOffset,
+ &OutUnit->getOrCreateSectionDescriptor(DebugSectionKind::DebugAddr),
+ true},
+ PatchesOffsets);
+
+ // Use size of .debug_addr header as attribute value. The offset to
+ // .debug_addr would be added later while patching.
+ return Generator
+ .addScalarAttribute(AttrSpec.Attr, AttrSpec.Form,
+ OutUnit->getDebugAddrHeaderSize())
+ .second;
+ } else if (AttrSpec.Attr == dwarf::DW_AT_declaration && Value)
+ AttrInfo.IsDeclaration = true;
+
+ return Generator.addScalarAttribute(AttrSpec.Attr, ResultingForm, Value)
+ .second;
+}
+
+size_t DIEAttributeCloner::cloneBlockAttr(
+ const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec) {
+
+ if (OutUnit.isTypeUnit())
+ return 0;
+
+ size_t NumberOfPatchesAtStart = PatchesOffsets.size();
+
+ // If the block is a DWARF Expression, clone it into the temporary
+ // buffer using cloneExpression(), otherwise copy the data directly.
+ SmallVector<uint8_t, 32> Buffer;
+ ArrayRef<uint8_t> Bytes = *Val.getAsBlock();
+ if (DWARFAttribute::mayHaveLocationExpr(AttrSpec.Attr) &&
+ (Val.isFormClass(DWARFFormValue::FC_Block) ||
+ Val.isFormClass(DWARFFormValue::FC_Exprloc))) {
+ DataExtractor Data(StringRef((const char *)Bytes.data(), Bytes.size()),
+ InUnit.getOrigUnit().isLittleEndian(),
+ InUnit.getOrigUnit().getAddressByteSize());
+ DWARFExpression Expr(Data, InUnit.getOrigUnit().getAddressByteSize(),
+ InUnit.getFormParams().Format);
+
+ InUnit.cloneDieAttrExpression(Expr, Buffer, DebugInfoOutputSection,
+ VarAddressAdjustment, PatchesOffsets);
+ Bytes = Buffer;
+ }
+
+ // The expression location data might be updated and exceed the original size.
+ // Check whether the new data fits into the original form.
+ dwarf::Form ResultForm = AttrSpec.Form;
+ if ((ResultForm == dwarf::DW_FORM_block1 && Bytes.size() > UINT8_MAX) ||
+ (ResultForm == dwarf::DW_FORM_block2 && Bytes.size() > UINT16_MAX) ||
+ (ResultForm == dwarf::DW_FORM_block4 && Bytes.size() > UINT32_MAX))
+ ResultForm = dwarf::DW_FORM_block;
+
+ size_t FinalAttributeSize;
+ if (AttrSpec.Form == dwarf::DW_FORM_exprloc)
+ FinalAttributeSize =
+ Generator.addLocationAttribute(AttrSpec.Attr, ResultForm, Bytes).second;
+ else
+ FinalAttributeSize =
+ Generator.addBlockAttribute(AttrSpec.Attr, ResultForm, Bytes).second;
+
+ // Update patches offsets with the size of length field for Bytes.
+ for (size_t Idx = NumberOfPatchesAtStart; Idx < PatchesOffsets.size();
+ Idx++) {
+ assert(FinalAttributeSize > Bytes.size());
+ *PatchesOffsets[Idx] +=
+ (AttrOutOffset + (FinalAttributeSize - Bytes.size()));
+ }
+
+ if (HasLocationExpressionAddress)
+ AttrInfo.HasLiveAddress =
+ VarAddressAdjustment.has_value() ||
+ InUnit.getGlobalData().getOptions().UpdateIndexTablesOnly;
+
+ return FinalAttributeSize;
+}
+
+size_t DIEAttributeCloner::cloneAddressAttr(
+ const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec) {
+ if (AttrSpec.Attr == dwarf::DW_AT_low_pc)
+ AttrInfo.HasLiveAddress = true;
+
+ if (InUnit.getGlobalData().getOptions().UpdateIndexTablesOnly)
+ return Generator
+ .addScalarAttribute(AttrSpec.Attr, AttrSpec.Form, Val.getRawUValue())
+ .second;
+
+ if (OutUnit.isTypeUnit())
+ return 0;
+
+ // Cloned Die may have address attributes relocated to a
+ // totally unrelated value. This can happen:
+ // - If high_pc is an address (Dwarf version == 2), then it might have been
+ // relocated to a totally unrelated value (because the end address in the
+ // object file might be start address of another function which got moved
+ // independently by the linker).
+ // - If address relocated in an inline_subprogram that happens at the
+ // beginning of its inlining function.
+ // To avoid above cases and to not apply relocation twice (in
+ // applyValidRelocs and here), read address attribute from InputDIE and apply
+ // Info.PCOffset here.
+
+ std::optional<DWARFFormValue> AddrAttribute =
+ InUnit.find(InputDieEntry, AttrSpec.Attr);
+ if (!AddrAttribute)
+ llvm_unreachable("Cann't find attribute");
+
+ std::optional<uint64_t> Addr = AddrAttribute->getAsAddress();
+ if (!Addr) {
+ InUnit.warn("cann't read address attribute value.");
+ return 0;
+ }
+
+ if (InputDieEntry->getTag() == dwarf::DW_TAG_compile_unit &&
+ AttrSpec.Attr == dwarf::DW_AT_low_pc) {
+ if (std::optional<uint64_t> LowPC = OutUnit.getAsCompileUnit()->getLowPc())
+ Addr = *LowPC;
+ else
+ return 0;
+ } else if (InputDieEntry->getTag() == dwarf::DW_TAG_compile_unit &&
+ AttrSpec.Attr == dwarf::DW_AT_high_pc) {
+ if (uint64_t HighPc = OutUnit.getAsCompileUnit()->getHighPc())
+ Addr = HighPc;
+ else
+ return 0;
+ } else {
+ if (VarAddressAdjustment)
+ *Addr += *VarAddressAdjustment;
+ else if (FuncAddressAdjustment)
+ *Addr += *FuncAddressAdjustment;
+ }
+
+ if (AttrSpec.Form == dwarf::DW_FORM_addr) {
+ return Generator.addScalarAttribute(AttrSpec.Attr, AttrSpec.Form, *Addr)
+ .second;
+ }
+
+ return Generator
+ .addScalarAttribute(AttrSpec.Attr, dwarf::Form::DW_FORM_addrx,
+ OutUnit.getAsCompileUnit()->getDebugAddrIndex(*Addr))
+ .second;
+}
+
+unsigned DIEAttributeCloner::finalizeAbbreviations(bool HasChildrenToClone) {
+ // Add the size of the abbreviation number to the output offset.
+ AttrOutOffset +=
+ Generator.finalizeAbbreviations(HasChildrenToClone, &PatchesOffsets);
+
+ return AttrOutOffset;
+}
+
+} // end of namespace dwarflinker_parallel
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.h
new file mode 100644
index 000000000000..e18c0a15cefc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.h
@@ -0,0 +1,184 @@
+//===- DIEAttributeCloner.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DIEATTRIBUTECLONER_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_DIEATTRIBUTECLONER_H
+
+#include "ArrayList.h"
+#include "DIEGenerator.h"
+#include "DWARFLinkerCompileUnit.h"
+#include "DWARFLinkerGlobalData.h"
+#include "DWARFLinkerTypeUnit.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// Information gathered and exchanged between the various
+/// clone*Attr helpers about the attributes of a particular DIE.
+struct AttributesInfo {
+ /// Short Name.
+ StringEntry *Name = nullptr;
+
+ /// Mangled Name.
+ StringEntry *MangledName = nullptr;
+
+ /// Does the DIE have an address pointing to live code section?
+ bool HasLiveAddress = false;
+
+ /// Is this DIE only a declaration?
+ bool IsDeclaration = false;
+
+ /// Does the DIE have a ranges attribute?
+ bool HasRanges = false;
+
+ /// Does the DIE have a string offset attribute?
+ bool HasStringOffsetBaseAttr = false;
+};
+
+/// This class creates clones of input DIE attributes.
+/// It enumerates attributes of input DIE, creates clone for each
+/// attribute, adds cloned attribute to the output DIE.
+class DIEAttributeCloner {
+public:
+ DIEAttributeCloner(DIE *OutDIE, CompileUnit &InUnit, CompileUnit *OutUnit,
+ const DWARFDebugInfoEntry *InputDieEntry,
+ DIEGenerator &Generator,
+ std::optional<int64_t> FuncAddressAdjustment,
+ std::optional<int64_t> VarAddressAdjustment,
+ bool HasLocationExpressionAddress)
+ : DIEAttributeCloner(OutDIE, InUnit,
+ CompileUnit::OutputUnitVariantPtr(OutUnit),
+ InputDieEntry, Generator, FuncAddressAdjustment,
+ VarAddressAdjustment, HasLocationExpressionAddress) {
+ }
+
+ DIEAttributeCloner(DIE *OutDIE, CompileUnit &InUnit, TypeUnit *OutUnit,
+ const DWARFDebugInfoEntry *InputDieEntry,
+ DIEGenerator &Generator,
+ std::optional<int64_t> FuncAddressAdjustment,
+ std::optional<int64_t> VarAddressAdjustment,
+ bool HasLocationExpressionAddress)
+ : DIEAttributeCloner(OutDIE, InUnit,
+ CompileUnit::OutputUnitVariantPtr(OutUnit),
+ InputDieEntry, Generator, FuncAddressAdjustment,
+ VarAddressAdjustment, HasLocationExpressionAddress) {
+ }
+
+ /// Clone attributes of input DIE.
+ void clone();
+
+ /// Create abbreviations for the output DIE after all attributes are cloned.
+ unsigned finalizeAbbreviations(bool HasChildrenToClone);
+
+ /// Cannot be used concurrently.
+ AttributesInfo AttrInfo;
+
+ unsigned getOutOffset() { return AttrOutOffset; }
+
+protected:
+ DIEAttributeCloner(DIE *OutDIE, CompileUnit &InUnit,
+ CompileUnit::OutputUnitVariantPtr OutUnit,
+ const DWARFDebugInfoEntry *InputDieEntry,
+ DIEGenerator &Generator,
+ std::optional<int64_t> FuncAddressAdjustment,
+ std::optional<int64_t> VarAddressAdjustment,
+ bool HasLocationExpressionAddress)
+ : OutDIE(OutDIE), InUnit(InUnit), OutUnit(OutUnit),
+ DebugInfoOutputSection(
+ OutUnit->getSectionDescriptor(DebugSectionKind::DebugInfo)),
+ InputDieEntry(InputDieEntry), Generator(Generator),
+ FuncAddressAdjustment(FuncAddressAdjustment),
+ VarAddressAdjustment(VarAddressAdjustment),
+ HasLocationExpressionAddress(HasLocationExpressionAddress) {
+ InputDIEIdx = InUnit.getDIEIndex(InputDieEntry);
+
+ // Use DW_FORM_strp form for string attributes for DWARF version less than 5
+ // or if output unit is type unit and we need to produce deterministic
+ // result. (We can not generate deterministic results for debug_str_offsets
+ // section when attributes are cloned parallelly).
+ Use_DW_FORM_strp =
+ (InUnit.getVersion() < 5) ||
+ (OutUnit.isTypeUnit() &&
+ ((InUnit.getGlobalData().getOptions().Threads != 1) &&
+ !InUnit.getGlobalData().getOptions().AllowNonDeterministicOutput));
+ }
+
+ /// Clone string attribute.
+ size_t
+ cloneStringAttr(const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec);
+
+ /// Clone attribute referencing another DIE.
+ size_t
+ cloneDieRefAttr(const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec);
+
+ /// Clone scalar attribute.
+ size_t
+ cloneScalarAttr(const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec);
+
+ /// Clone block or exprloc attribute.
+ size_t
+ cloneBlockAttr(const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec);
+
+ /// Clone address attribute.
+ size_t
+ cloneAddressAttr(const DWARFFormValue &Val,
+ const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec);
+
+ /// Returns true if attribute should be skipped.
+ bool
+ shouldSkipAttribute(DWARFAbbreviationDeclaration::AttributeSpec AttrSpec);
+
+ /// Output DIE.
+ DIE *OutDIE = nullptr;
+
+ /// Input compilation unit.
+ CompileUnit &InUnit;
+
+ /// Output unit(either "plain" compilation unit, either artificial type unit).
+ CompileUnit::OutputUnitVariantPtr OutUnit;
+
+ /// .debug_info section descriptor.
+ SectionDescriptor &DebugInfoOutputSection;
+
+ /// Input DIE entry.
+ const DWARFDebugInfoEntry *InputDieEntry = nullptr;
+
+ /// Input DIE index.
+ uint32_t InputDIEIdx = 0;
+
+ /// Output DIE generator.
+ DIEGenerator &Generator;
+
+ /// Relocation adjustment for the function address ranges.
+ std::optional<int64_t> FuncAddressAdjustment;
+
+ /// Relocation adjustment for the variable locations.
+ std::optional<int64_t> VarAddressAdjustment;
+
+ /// Indicates whether InputDieEntry has an location attribute
+ /// containg address expression.
+ bool HasLocationExpressionAddress = false;
+
+ /// Output offset after all attributes.
+ unsigned AttrOutOffset = 0;
+
+ /// Patches for the cloned attributes.
+ OffsetsPtrVector PatchesOffsets;
+
+ /// This flag forces using DW_FORM_strp for string attributes.
+ bool Use_DW_FORM_strp = false;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_DIEATTRIBUTECLONER_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEGenerator.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEGenerator.h
new file mode 100644
index 000000000000..42bf00f55ff1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DIEGenerator.h
@@ -0,0 +1,180 @@
+//===- DIEGenerator.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DIEGENERATOR_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_DIEGENERATOR_H
+
+#include "DWARFLinkerGlobalData.h"
+#include "DWARFLinkerUnit.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/Support/LEB128.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// This class is a helper to create output DIE tree.
+class DIEGenerator {
+public:
+ DIEGenerator(BumpPtrAllocator &Allocator, DwarfUnit &CU)
+ : Allocator(Allocator), CU(CU) {}
+
+ DIEGenerator(DIE *OutputDIE, BumpPtrAllocator &Allocator, DwarfUnit &CU)
+ : Allocator(Allocator), CU(CU), OutputDIE(OutputDIE) {}
+
+ /// Creates a DIE of specified tag \p DieTag and \p OutOffset.
+ DIE *createDIE(dwarf::Tag DieTag, uint32_t OutOffset) {
+ OutputDIE = DIE::get(Allocator, DieTag);
+
+ OutputDIE->setOffset(OutOffset);
+
+ return OutputDIE;
+ }
+
+ DIE *getDIE() { return OutputDIE; }
+
+ /// Adds a specified \p Child to the current DIE.
+ void addChild(DIE *Child) {
+ assert(Child != nullptr);
+ assert(OutputDIE != nullptr);
+
+ OutputDIE->addChild(Child);
+ }
+
+ /// Adds specified scalar attribute to the current DIE.
+ std::pair<DIEValue &, size_t> addScalarAttribute(dwarf::Attribute Attr,
+ dwarf::Form AttrForm,
+ uint64_t Value) {
+ return addAttribute(Attr, AttrForm, DIEInteger(Value));
+ }
+
+ /// Adds specified location attribute to the current DIE.
+ std::pair<DIEValue &, size_t> addLocationAttribute(dwarf::Attribute Attr,
+ dwarf::Form AttrForm,
+ ArrayRef<uint8_t> Bytes) {
+ DIELoc *Loc = new (Allocator) DIELoc;
+ for (auto Byte : Bytes)
+ static_cast<DIEValueList *>(Loc)->addValue(
+ Allocator, static_cast<dwarf::Attribute>(0), dwarf::DW_FORM_data1,
+ DIEInteger(Byte));
+ Loc->setSize(Bytes.size());
+
+ return addAttribute(Attr, AttrForm, Loc);
+ }
+
+ /// Adds specified block or exprloc attribute to the current DIE.
+ std::pair<DIEValue &, size_t> addBlockAttribute(dwarf::Attribute Attr,
+ dwarf::Form AttrForm,
+ ArrayRef<uint8_t> Bytes) {
+ // The expression location data might be updated and exceed the original
+ // size. Check whether the new data fits into the original form.
+ assert((AttrForm == dwarf::DW_FORM_block) ||
+ (AttrForm == dwarf::DW_FORM_exprloc) ||
+ (AttrForm == dwarf::DW_FORM_block1 && Bytes.size() <= UINT8_MAX) ||
+ (AttrForm == dwarf::DW_FORM_block2 && Bytes.size() <= UINT16_MAX) ||
+ (AttrForm == dwarf::DW_FORM_block4 && Bytes.size() <= UINT32_MAX));
+
+ DIEBlock *Block = new (Allocator) DIEBlock;
+ for (auto Byte : Bytes)
+ static_cast<DIEValueList *>(Block)->addValue(
+ Allocator, static_cast<dwarf::Attribute>(0), dwarf::DW_FORM_data1,
+ DIEInteger(Byte));
+ Block->setSize(Bytes.size());
+
+ return addAttribute(Attr, AttrForm, Block);
+ }
+
+ /// Adds specified location list attribute to the current DIE.
+ std::pair<DIEValue &, size_t> addLocListAttribute(dwarf::Attribute Attr,
+ dwarf::Form AttrForm,
+ uint64_t Value) {
+ return addAttribute(Attr, AttrForm, DIELocList(Value));
+ }
+
+ /// Adds indexed string attribute.
+ std::pair<DIEValue &, size_t> addIndexedStringAttribute(dwarf::Attribute Attr,
+ dwarf::Form AttrForm,
+ uint64_t Idx) {
+ assert(AttrForm == dwarf::DW_FORM_strx);
+ return addAttribute(Attr, AttrForm, DIEInteger(Idx));
+ }
+
+ /// Adds string attribute with dummy offset to the current DIE.
+ std::pair<DIEValue &, size_t>
+ addStringPlaceholderAttribute(dwarf::Attribute Attr, dwarf::Form AttrForm) {
+ assert(AttrForm == dwarf::DW_FORM_strp ||
+ AttrForm == dwarf::DW_FORM_line_strp);
+ return addAttribute(Attr, AttrForm, DIEInteger(0xBADDEF));
+ }
+
+ /// Adds inplace string attribute to the current DIE.
+ std::pair<DIEValue &, size_t> addInplaceString(dwarf::Attribute Attr,
+ StringRef String) {
+ DIEBlock *Block = new (Allocator) DIEBlock;
+ for (auto Byte : String.bytes())
+ static_cast<DIEValueList *>(Block)->addValue(
+ Allocator, static_cast<dwarf::Attribute>(0), dwarf::DW_FORM_data1,
+ DIEInteger(Byte));
+
+ static_cast<DIEValueList *>(Block)->addValue(
+ Allocator, static_cast<dwarf::Attribute>(0), dwarf::DW_FORM_data1,
+ DIEInteger(0));
+ Block->setSize(String.size() + 1);
+
+ DIEValue &ValueRef =
+ *OutputDIE->addValue(Allocator, Attr, dwarf::DW_FORM_string, Block);
+ return std::pair<DIEValue &, size_t>(ValueRef, String.size() + 1);
+ }
+
+ /// Creates appreviations for the current DIE. Returns value of
+ /// abbreviation number. Updates offsets with the size of abbreviation
+ /// number.
+ size_t finalizeAbbreviations(bool CHILDREN_yes,
+ OffsetsPtrVector *OffsetsList) {
+ // Create abbreviations for output DIE.
+ DIEAbbrev NewAbbrev = OutputDIE->generateAbbrev();
+ if (CHILDREN_yes)
+ NewAbbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+
+ CU.assignAbbrev(NewAbbrev);
+ OutputDIE->setAbbrevNumber(NewAbbrev.getNumber());
+
+ size_t AbbrevNumberSize = getULEB128Size(OutputDIE->getAbbrevNumber());
+
+ // Add size of abbreviation number to the offsets.
+ if (OffsetsList != nullptr) {
+ for (uint64_t *OffsetPtr : *OffsetsList)
+ *OffsetPtr += AbbrevNumberSize;
+ }
+
+ return AbbrevNumberSize;
+ }
+
+protected:
+ template <typename T>
+ std::pair<DIEValue &, size_t> addAttribute(dwarf::Attribute Attr,
+ dwarf::Form AttrForm, T &&Value) {
+ DIEValue &ValueRef =
+ *OutputDIE->addValue(Allocator, Attr, AttrForm, std::forward<T>(Value));
+ unsigned ValueSize = ValueRef.sizeOf(CU.getFormParams());
+ return std::pair<DIEValue &, size_t>(ValueRef, ValueSize);
+ }
+
+ // Allocator for output DIEs and values.
+ BumpPtrAllocator &Allocator;
+
+ // Unit for the output DIE.
+ DwarfUnit &CU;
+
+ // OutputDIE.
+ DIE *OutputDIE = nullptr;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_DIEGENERATOR_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp
index 50909c0ba669..355cfae3a646 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp
@@ -114,18 +114,168 @@ Error DwarfEmitterImpl::init(Triple TheTriple,
TripleName.c_str());
Asm->setDwarfUsesRelocationsAcrossSections(false);
- RangesSectionSize = 0;
- RngListsSectionSize = 0;
- LocSectionSize = 0;
- LocListsSectionSize = 0;
- LineSectionSize = 0;
- FrameSectionSize = 0;
DebugInfoSectionSize = 0;
- MacInfoSectionSize = 0;
- MacroSectionSize = 0;
return Error::success();
}
+void DwarfEmitterImpl::emitSwiftAST(StringRef Buffer) {
+ MCSection *SwiftASTSection = MOFI->getDwarfSwiftASTSection();
+ SwiftASTSection->setAlignment(Align(32));
+ MS->switchSection(SwiftASTSection);
+ MS->emitBytes(Buffer);
+}
+
+/// Emit the swift reflection section stored in \p Buffer.
+void DwarfEmitterImpl::emitSwiftReflectionSection(
+ llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
+ StringRef Buffer, uint32_t Alignment, uint32_t) {
+ MCSection *ReflectionSection =
+ MOFI->getSwift5ReflectionSection(ReflSectionKind);
+ if (ReflectionSection == nullptr)
+ return;
+ ReflectionSection->setAlignment(Align(Alignment));
+ MS->switchSection(ReflectionSection);
+ MS->emitBytes(Buffer);
+}
+
+void DwarfEmitterImpl::emitSectionContents(StringRef SecData,
+ StringRef SecName) {
+ if (SecData.empty())
+ return;
+
+ if (MCSection *Section = switchSection(SecName)) {
+ MS->switchSection(Section);
+
+ MS->emitBytes(SecData);
+ }
+}
+
+MCSection *DwarfEmitterImpl::switchSection(StringRef SecName) {
+ return StringSwitch<MCSection *>(SecName)
+ .Case("debug_info", MC->getObjectFileInfo()->getDwarfInfoSection())
+ .Case("debug_abbrev", MC->getObjectFileInfo()->getDwarfAbbrevSection())
+ .Case("debug_line", MC->getObjectFileInfo()->getDwarfLineSection())
+ .Case("debug_loc", MC->getObjectFileInfo()->getDwarfLocSection())
+ .Case("debug_ranges", MC->getObjectFileInfo()->getDwarfRangesSection())
+ .Case("debug_frame", MC->getObjectFileInfo()->getDwarfFrameSection())
+ .Case("debug_aranges", MC->getObjectFileInfo()->getDwarfARangesSection())
+ .Case("debug_rnglists",
+ MC->getObjectFileInfo()->getDwarfRnglistsSection())
+ .Case("debug_loclists",
+ MC->getObjectFileInfo()->getDwarfLoclistsSection())
+ .Case("debug_macro", MC->getObjectFileInfo()->getDwarfMacroSection())
+ .Case("debug_macinfo", MC->getObjectFileInfo()->getDwarfMacinfoSection())
+ .Case("debug_addr", MC->getObjectFileInfo()->getDwarfAddrSection())
+ .Case("debug_str", MC->getObjectFileInfo()->getDwarfStrSection())
+ .Case("debug_line_str", MC->getObjectFileInfo()->getDwarfLineStrSection())
+ .Case("debug_str_offsets",
+ MC->getObjectFileInfo()->getDwarfStrOffSection())
+ .Case("debug_pubnames",
+ MC->getObjectFileInfo()->getDwarfPubNamesSection())
+ .Case("debug_pubtypes",
+ MC->getObjectFileInfo()->getDwarfPubTypesSection())
+ .Case("debug_names", MC->getObjectFileInfo()->getDwarfDebugNamesSection())
+ .Case("apple_names", MC->getObjectFileInfo()->getDwarfAccelNamesSection())
+ .Case("apple_namespac",
+ MC->getObjectFileInfo()->getDwarfAccelNamespaceSection())
+ .Case("apple_objc", MC->getObjectFileInfo()->getDwarfAccelObjCSection())
+ .Case("apple_types", MC->getObjectFileInfo()->getDwarfAccelTypesSection())
+
+ .Default(nullptr);
+}
+
+void DwarfEmitterImpl::emitAbbrevs(
+ const SmallVector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
+ unsigned DwarfVersion) {
+ MS->switchSection(MOFI->getDwarfAbbrevSection());
+ MC->setDwarfVersion(DwarfVersion);
+ Asm->emitDwarfAbbrevs(Abbrevs);
+}
+
+void DwarfEmitterImpl::emitCompileUnitHeader(DwarfUnit &Unit) {
+ MS->switchSection(MOFI->getDwarfInfoSection());
+ MC->setDwarfVersion(Unit.getVersion());
+
+ // Emit size of content not including length itself. The size has already
+ // been computed in CompileUnit::computeOffsets(). Subtract 4 to that size to
+ // account for the length field.
+ Asm->emitInt32(Unit.getUnitSize() - 4);
+ Asm->emitInt16(Unit.getVersion());
+
+ if (Unit.getVersion() >= 5) {
+ Asm->emitInt8(dwarf::DW_UT_compile);
+ Asm->emitInt8(Unit.getFormParams().AddrSize);
+ // Proper offset to the abbreviations table will be set later.
+ Asm->emitInt32(0);
+ DebugInfoSectionSize += 12;
+ } else {
+ // Proper offset to the abbreviations table will be set later.
+ Asm->emitInt32(0);
+ Asm->emitInt8(Unit.getFormParams().AddrSize);
+ DebugInfoSectionSize += 11;
+ }
+}
+
+void DwarfEmitterImpl::emitDIE(DIE &Die) {
+ MS->switchSection(MOFI->getDwarfInfoSection());
+ Asm->emitDwarfDIE(Die);
+ DebugInfoSectionSize += Die.getSize();
+}
+
+void DwarfEmitterImpl::emitDebugNames(DWARF5AccelTable &Table,
+ DebugNamesUnitsOffsets &CUOffsets,
+ CompUnitIDToIdx &CUidToIdx) {
+ if (CUOffsets.empty())
+ return;
+
+ Asm->OutStreamer->switchSection(MOFI->getDwarfDebugNamesSection());
+ dwarf::Form Form =
+ DIEInteger::BestForm(/*IsSigned*/ false, (uint64_t)CUidToIdx.size() - 1);
+ // FIXME: add support for type units + .debug_names. For now the behavior is
+ // unsuported.
+ emitDWARF5AccelTable(
+ Asm.get(), Table, CUOffsets,
+ [&](const DWARF5AccelTableData &Entry)
+ -> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
+ if (CUidToIdx.size() > 1)
+ return {{CUidToIdx[Entry.getUnitID()],
+ {dwarf::DW_IDX_compile_unit, Form}}};
+ return std::nullopt;
+ });
+}
+
+void DwarfEmitterImpl::emitAppleNamespaces(
+ AccelTable<AppleAccelTableStaticOffsetData> &Table) {
+ Asm->OutStreamer->switchSection(MOFI->getDwarfAccelNamespaceSection());
+ auto *SectionBegin = Asm->createTempSymbol("namespac_begin");
+ Asm->OutStreamer->emitLabel(SectionBegin);
+ emitAppleAccelTable(Asm.get(), Table, "namespac", SectionBegin);
+}
+
+void DwarfEmitterImpl::emitAppleNames(
+ AccelTable<AppleAccelTableStaticOffsetData> &Table) {
+ Asm->OutStreamer->switchSection(MOFI->getDwarfAccelNamesSection());
+ auto *SectionBegin = Asm->createTempSymbol("names_begin");
+ Asm->OutStreamer->emitLabel(SectionBegin);
+ emitAppleAccelTable(Asm.get(), Table, "names", SectionBegin);
+}
+
+void DwarfEmitterImpl::emitAppleObjc(
+ AccelTable<AppleAccelTableStaticOffsetData> &Table) {
+ Asm->OutStreamer->switchSection(MOFI->getDwarfAccelObjCSection());
+ auto *SectionBegin = Asm->createTempSymbol("objc_begin");
+ Asm->OutStreamer->emitLabel(SectionBegin);
+ emitAppleAccelTable(Asm.get(), Table, "objc", SectionBegin);
+}
+
+void DwarfEmitterImpl::emitAppleTypes(
+ AccelTable<AppleAccelTableStaticTypeData> &Table) {
+ Asm->OutStreamer->switchSection(MOFI->getDwarfAccelTypesSection());
+ auto *SectionBegin = Asm->createTempSymbol("types_begin");
+ Asm->OutStreamer->emitLabel(SectionBegin);
+ emitAppleAccelTable(Asm.get(), Table, "types", SectionBegin);
+}
+
} // end of namespace dwarflinker_parallel
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h
index d07397a30419..d03336c1c11a 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
-#include "llvm/DWARFLinkerParallel/StringTable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -36,190 +35,86 @@ namespace llvm {
template <typename DataT> class AccelTable;
class MCCodeEmitter;
-class DWARFDebugMacro;
namespace dwarflinker_parallel {
-struct UnitStartSymbol {
- unsigned UnitID = 0;
- MCSymbol *Symbol = 0;
-};
-using UnitStartSymbolsTy = SmallVector<UnitStartSymbol>;
-using Offset2UnitMapTy = DenseMap<uint64_t, CompileUnit *>;
-
-struct RangeAttrPatch;
-struct LocAttrPatch;
+using DebugNamesUnitsOffsets = std::vector<std::variant<MCSymbol *, uint64_t>>;
+using CompUnitIDToIdx = DenseMap<unsigned, unsigned>;
-/// The Dwarf emission logic.
-///
-/// All interactions with the MC layer that is used to build the debug
-/// information binary representation are handled in this class.
+/// This class emits DWARF data to the output stream. It emits already
+/// generated section data and specific data, which could not be generated
+/// by CompileUnit.
class DwarfEmitterImpl : public ExtraDwarfEmitter {
public:
DwarfEmitterImpl(DWARFLinker::OutputFileType OutFileType,
- raw_pwrite_stream &OutFile,
- std::function<StringRef(StringRef Input)> Translator,
- DWARFLinker::MessageHandlerTy Warning)
- : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator),
- WarningHandler(Warning) {}
+ raw_pwrite_stream &OutFile)
+ : OutFile(OutFile), OutFileType(OutFileType) {}
+ /// Initialize AsmPrinter data.
Error init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
+ /// Returns triple of output stream.
+ const Triple &getTargetTriple() { return MC->getTargetTriple(); }
+
/// Dump the file to the disk.
void finish() override { MS->finish(); }
+ /// Returns AsmPrinter.
AsmPrinter &getAsmPrinter() const override { return *Asm; }
- /// Set the current output section to debug_info and change
- /// the MC Dwarf version to \p DwarfVersion.
- void switchToDebugInfoSection(unsigned DwarfVersion) {}
-
/// Emit the swift_ast section stored in \p Buffer.
- void emitSwiftAST(StringRef Buffer) override {}
+ void emitSwiftAST(StringRef Buffer) override;
/// Emit the swift reflection section stored in \p Buffer.
void emitSwiftReflectionSection(
llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
- StringRef Buffer, uint32_t Alignment, uint32_t Size) override {}
-
- void emitPaperTrailWarningsDie(DIE &Die) {}
+ StringRef Buffer, uint32_t Alignment, uint32_t) override;
- void emitSectionContents(StringRef SecData, StringRef SecName) override {}
-
- MCSymbol *emitTempSym(StringRef SecName, StringRef SymName) override {
- return nullptr;
- }
+ /// Emit specified section data.
+ void emitSectionContents(StringRef SecData, StringRef SecName) override;
+ /// Emit abbreviations.
void emitAbbrevs(const SmallVector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
- unsigned DwarfVersion) {}
-
- void emitStrings(const StringTable &Strings) {}
-
- void emitLineStrings(const StringTable &Strings) {}
-
- void emitDebugNames(AccelTable<DWARF5AccelTableStaticData> &,
- UnitStartSymbolsTy &UnitOffsets) {}
-
- void emitAppleNamespaces(AccelTable<AppleAccelTableStaticOffsetData> &) {}
-
- void emitAppleNames(AccelTable<AppleAccelTableStaticOffsetData> &) {}
-
- void emitAppleObjc(AccelTable<AppleAccelTableStaticOffsetData> &) {}
-
- void emitAppleTypes(AccelTable<AppleAccelTableStaticTypeData> &) {}
-
- MCSymbol *emitDwarfDebugRangeListHeader(const CompileUnit &Unit) {
- return nullptr;
- }
-
- void emitDwarfDebugRangeListFragment(const CompileUnit &Unit,
- const AddressRanges &LinkedRanges,
- RangeAttrPatch &Patch) {}
-
- void emitDwarfDebugRangeListFooter(const CompileUnit &Unit,
- MCSymbol *EndLabel) {}
-
- MCSymbol *emitDwarfDebugLocListHeader(const CompileUnit &Unit) {
- return nullptr;
- }
-
- void emitDwarfDebugLocListFragment(
- const CompileUnit &Unit,
- const DWARFLocationExpressionsVector &LinkedLocationExpression,
- LocAttrPatch &Patch) {}
-
- void emitDwarfDebugLocListFooter(const CompileUnit &Unit,
- MCSymbol *EndLabel) {}
-
- void emitDwarfDebugArangesTable(const CompileUnit &Unit,
- const AddressRanges &LinkedRanges) {}
+ unsigned DwarfVersion);
- void translateLineTable(DataExtractor LineData, uint64_t Offset) {}
+ /// Emit compile unit header.
+ void emitCompileUnitHeader(DwarfUnit &Unit);
- void emitLineTableForUnit(MCDwarfLineTableParams Params,
- StringRef PrologueBytes, unsigned MinInstLength,
- std::vector<DWARFDebugLine::Row> &Rows,
- unsigned AdddressSize) {}
-
- void emitLineTableForUnit(const DWARFDebugLine::LineTable &LineTable,
- const CompileUnit &Unit, const StringTable &Strings,
- const StringTable &LineTableStrings) {}
-
- void emitPubNamesForUnit(const CompileUnit &Unit) {}
-
- void emitPubTypesForUnit(const CompileUnit &Unit) {}
-
- void emitCIE(StringRef CIEBytes) {}
-
- void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint64_t Address,
- StringRef Bytes) {}
-
- void emitCompileUnitHeader(CompileUnit &Unit, unsigned DwarfVersion) {}
-
- void emitDIE(DIE &Die) {}
-
- void emitMacroTables(DWARFContext *Context,
- const Offset2UnitMapTy &UnitMacroMap,
- StringTable &Strings) {}
-
- /// Returns size of generated .debug_line section.
- uint64_t getDebugLineSectionSize() const { return LineSectionSize; }
-
- /// Returns size of generated .debug_frame section.
- uint64_t getDebugFrameSectionSize() const { return FrameSectionSize; }
-
- /// Returns size of generated .debug_ranges section.
- uint64_t getDebugRangesSectionSize() const { return RangesSectionSize; }
-
- /// Returns size of generated .debug_rnglists section.
- uint64_t getDebugRngListsSectionSize() const { return RngListsSectionSize; }
+ /// Emit DIE recursively.
+ void emitDIE(DIE &Die);
/// Returns size of generated .debug_info section.
uint64_t getDebugInfoSectionSize() const { return DebugInfoSectionSize; }
- /// Returns size of generated .debug_macinfo section.
- uint64_t getDebugMacInfoSectionSize() const { return MacInfoSectionSize; }
-
- /// Returns size of generated .debug_macro section.
- uint64_t getDebugMacroSectionSize() const { return MacroSectionSize; }
-
- /// Returns size of generated .debug_loc section.
- uint64_t getDebugLocSectionSize() const { return LocSectionSize; }
-
- /// Returns size of generated .debug_loclists section.
- uint64_t getDebugLocListsSectionSize() const { return LocListsSectionSize; }
-
-private:
- inline void warn(const Twine &Warning, StringRef Context = "") {
- if (WarningHandler)
- WarningHandler(Warning, Context, nullptr);
- }
+ /// Emits .debug_names section according to the specified \p Table.
+ void emitDebugNames(DWARF5AccelTable &Table,
+ DebugNamesUnitsOffsets &CUOffsets,
+ CompUnitIDToIdx &UnitIDToIdxMap);
- void emitMacroTableImpl(const DWARFDebugMacro *MacroTable,
- const Offset2UnitMapTy &UnitMacroMap,
- StringPool &StringPool, uint64_t &OutOffset) {}
+ /// Emits .apple_names section according to the specified \p Table.
+ void emitAppleNames(AccelTable<AppleAccelTableStaticOffsetData> &Table);
- /// Emit piece of .debug_ranges for \p LinkedRanges.
- void emitDwarfDebugRangesTableFragment(const CompileUnit &Unit,
- const AddressRanges &LinkedRanges,
- RangeAttrPatch &Patch) {}
+ /// Emits .apple_namespaces section according to the specified \p Table.
+ void emitAppleNamespaces(AccelTable<AppleAccelTableStaticOffsetData> &Table);
- /// Emit piece of .debug_rnglists for \p LinkedRanges.
- void emitDwarfDebugRngListsTableFragment(const CompileUnit &Unit,
- const AddressRanges &LinkedRanges,
- RangeAttrPatch &Patch) {}
+ /// Emits .apple_objc section according to the specified \p Table.
+ void emitAppleObjc(AccelTable<AppleAccelTableStaticOffsetData> &Table);
- /// Emit piece of .debug_loc for \p LinkedRanges.
- void emitDwarfDebugLocTableFragment(
- const CompileUnit &Unit,
- const DWARFLocationExpressionsVector &LinkedLocationExpression,
- LocAttrPatch &Patch) {}
+ /// Emits .apple_types section according to the specified \p Table.
+ void emitAppleTypes(AccelTable<AppleAccelTableStaticTypeData> &Table);
- /// Emit piece of .debug_loclists for \p LinkedRanges.
- void emitDwarfDebugLocListsTableFragment(
- const CompileUnit &Unit,
- const DWARFLocationExpressionsVector &LinkedLocationExpression,
- LocAttrPatch &Patch) {}
+private:
+ // Enumerate all string patches and write them into the destination section.
+ // Order of patches is the same as in original input file. To avoid emitting
+ // the same string twice we accumulate NextOffset value. Thus if string
+ // offset smaller than NextOffset value then the patch is skipped (as that
+ // string was emitted earlier).
+ template <typename PatchTy>
+ void emitStringsImpl(ArrayList<PatchTy> &StringPatches,
+ const StringEntryToDwarfStringPoolEntryMap &Strings,
+ uint64_t &NextOffset, MCSection *OutSection);
+
+ MCSection *switchSection(StringRef SecName);
/// \defgroup MCObjects MC layer objects constructed by the streamer
/// @{
@@ -240,32 +135,8 @@ private:
/// The output file we stream the linked Dwarf to.
raw_pwrite_stream &OutFile;
DWARFLinker::OutputFileType OutFileType = DWARFLinker::OutputFileType::Object;
- std::function<StringRef(StringRef Input)> Translator;
- uint64_t RangesSectionSize = 0;
- uint64_t RngListsSectionSize = 0;
- uint64_t LocSectionSize = 0;
- uint64_t LocListsSectionSize = 0;
- uint64_t LineSectionSize = 0;
- uint64_t FrameSectionSize = 0;
uint64_t DebugInfoSectionSize = 0;
- uint64_t MacInfoSectionSize = 0;
- uint64_t MacroSectionSize = 0;
-
- /// Keep track of emitted CUs and their Unique ID.
- struct EmittedUnit {
- unsigned ID;
- MCSymbol *LabelBegin;
- };
- std::vector<EmittedUnit> EmittedUnitsTy;
-
- /// Emit the pubnames or pubtypes section contribution for \p
- /// Unit into \p Sec. The data is provided in \p Names.
- void emitPubSectionForUnit(MCSection *Sec, StringRef Name,
- const CompileUnit &Unit,
- const std::vector<CompileUnit::AccelInfo> &Names);
-
- DWARFLinker::MessageHandlerTy WarningHandler = nullptr;
};
} // end namespace dwarflinker_parallel
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFFile.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFFile.cpp
new file mode 100644
index 000000000000..5a3486e6398d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFFile.cpp
@@ -0,0 +1,17 @@
+//=== DWARFFile.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DWARFLinkerParallel/DWARFFile.h"
+#include "DWARFLinkerGlobalData.h"
+
+llvm::dwarflinker_parallel::DWARFFile::DWARFFile(
+ StringRef Name, std::unique_ptr<DWARFContext> Dwarf,
+ std::unique_ptr<AddressesMap> Addresses,
+ DWARFFile::UnloadCallbackTy UnloadFunc)
+ : FileName(Name), Dwarf(std::move(Dwarf)), Addresses(std::move(Addresses)),
+ UnloadFunc(UnloadFunc) {}
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp
index f082fd603610..269f24b1a13b 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "DWARFLinkerImpl.h"
+#include "DependencyTracker.h"
std::unique_ptr<llvm::dwarflinker_parallel::DWARFLinker>
llvm::dwarflinker_parallel::DWARFLinker::createLinker(
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.cpp
new file mode 100644
index 000000000000..3f0e75690272
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.cpp
@@ -0,0 +1,1879 @@
+//=== DWARFLinkerCompileUnit.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFLinkerCompileUnit.h"
+#include "AcceleratorRecordsSaver.h"
+#include "DIEAttributeCloner.h"
+#include "DIEGenerator.h"
+#include "DependencyTracker.h"
+#include "SyntheticTypeNameBuilder.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
+#include "llvm/Support/DJB.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Path.h"
+#include <utility>
+
+using namespace llvm;
+using namespace llvm::dwarflinker_parallel;
+
+CompileUnit::CompileUnit(LinkingGlobalData &GlobalData, unsigned ID,
+ StringRef ClangModuleName, DWARFFile &File,
+ OffsetToUnitTy UnitFromOffset,
+ dwarf::FormParams Format, llvm::endianness Endianess)
+ : DwarfUnit(GlobalData, ID, ClangModuleName), File(File),
+ getUnitFromOffset(UnitFromOffset), Stage(Stage::CreatedNotLoaded),
+ AcceleratorRecords(&GlobalData.getAllocator()) {
+ UnitName = File.FileName;
+ setOutputFormat(Format, Endianess);
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+}
+
+CompileUnit::CompileUnit(LinkingGlobalData &GlobalData, DWARFUnit &OrigUnit,
+ unsigned ID, StringRef ClangModuleName,
+ DWARFFile &File, OffsetToUnitTy UnitFromOffset,
+ dwarf::FormParams Format, llvm::endianness Endianess)
+ : DwarfUnit(GlobalData, ID, ClangModuleName), File(File),
+ OrigUnit(&OrigUnit), getUnitFromOffset(UnitFromOffset),
+ Stage(Stage::CreatedNotLoaded),
+ AcceleratorRecords(&GlobalData.getAllocator()) {
+ setOutputFormat(Format, Endianess);
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+
+ DWARFDie CUDie = OrigUnit.getUnitDIE();
+ if (!CUDie)
+ return;
+
+ if (std::optional<DWARFFormValue> Val = CUDie.find(dwarf::DW_AT_language)) {
+ uint16_t LangVal = dwarf::toUnsigned(Val, 0);
+ if (isODRLanguage(LangVal))
+ Language = LangVal;
+ }
+
+ if (!GlobalData.getOptions().NoODR && Language.has_value())
+ NoODR = false;
+
+ if (const char *CUName = CUDie.getName(DINameKind::ShortName))
+ UnitName = CUName;
+ else
+ UnitName = File.FileName;
+ SysRoot = dwarf::toStringRef(CUDie.find(dwarf::DW_AT_LLVM_sysroot)).str();
+}
+
+void CompileUnit::loadLineTable() {
+ LineTablePtr = File.Dwarf->getLineTableForUnit(&getOrigUnit());
+}
+
+void CompileUnit::maybeResetToLoadedStage() {
+ // Nothing to reset if stage is less than "Loaded".
+ if (getStage() < Stage::Loaded)
+ return;
+
+ // Note: We need to do erasing for "Loaded" stage because
+ // if live analysys failed then we will have "Loaded" stage
+ // with marking from "LivenessAnalysisDone" stage partially
+ // done. That marking should be cleared.
+
+ for (DIEInfo &Info : DieInfoArray)
+ Info.unsetFlagsWhichSetDuringLiveAnalysis();
+
+ LowPc = std::nullopt;
+ HighPc = 0;
+ Labels.clear();
+ Ranges.clear();
+ Dependencies.reset(nullptr);
+
+ if (getStage() < Stage::Cloned) {
+ setStage(Stage::Loaded);
+ return;
+ }
+
+ AcceleratorRecords.erase();
+ AbbreviationsSet.clear();
+ Abbreviations.clear();
+ OutUnitDIE = nullptr;
+ DebugAddrIndexMap.clear();
+
+ for (uint64_t &Offset : OutDieOffsetArray)
+ Offset = 0;
+ for (TypeEntry *&Name : TypeEntries)
+ Name = nullptr;
+ eraseSections();
+
+ setStage(Stage::CreatedNotLoaded);
+}
+
+bool CompileUnit::loadInputDIEs() {
+ DWARFDie InputUnitDIE = getUnitDIE(false);
+ if (!InputUnitDIE)
+ return false;
+
+ // load input dies, resize Info structures array.
+ DieInfoArray.resize(getOrigUnit().getNumDIEs());
+ OutDieOffsetArray.resize(getOrigUnit().getNumDIEs(), 0);
+ if (!NoODR)
+ TypeEntries.resize(getOrigUnit().getNumDIEs());
+ return true;
+}
+
+void CompileUnit::analyzeDWARFStructureRec(const DWARFDebugInfoEntry *DieEntry,
+ bool IsODRUnavailableFunctionScope) {
+ CompileUnit::DIEInfo &DieInfo = getDIEInfo(DieEntry);
+
+ for (const DWARFDebugInfoEntry *CurChild = getFirstChildEntry(DieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = getSiblingEntry(CurChild)) {
+ CompileUnit::DIEInfo &ChildInfo = getDIEInfo(CurChild);
+ bool ChildIsODRUnavailableFunctionScope = IsODRUnavailableFunctionScope;
+
+ if (DieInfo.getIsInMouduleScope())
+ ChildInfo.setIsInMouduleScope();
+
+ if (DieInfo.getIsInFunctionScope())
+ ChildInfo.setIsInFunctionScope();
+
+ if (DieInfo.getIsInAnonNamespaceScope())
+ ChildInfo.setIsInAnonNamespaceScope();
+
+ switch (CurChild->getTag()) {
+ case dwarf::DW_TAG_module:
+ ChildInfo.setIsInMouduleScope();
+ if (DieEntry->getTag() == dwarf::DW_TAG_compile_unit &&
+ dwarf::toString(find(CurChild, dwarf::DW_AT_name), "") !=
+ getClangModuleName())
+ analyzeImportedModule(CurChild);
+ break;
+ case dwarf::DW_TAG_subprogram:
+ ChildInfo.setIsInFunctionScope();
+ if (!ChildIsODRUnavailableFunctionScope &&
+ !ChildInfo.getIsInMouduleScope()) {
+ if (find(CurChild,
+ {dwarf::DW_AT_abstract_origin, dwarf::DW_AT_specification}))
+ ChildIsODRUnavailableFunctionScope = true;
+ }
+ break;
+ case dwarf::DW_TAG_namespace: {
+ UnitEntryPairTy NamespaceEntry = {this, CurChild};
+
+ if (find(CurChild, dwarf::DW_AT_extension))
+ NamespaceEntry = NamespaceEntry.getNamespaceOrigin();
+
+ if (!NamespaceEntry.CU->find(NamespaceEntry.DieEntry, dwarf::DW_AT_name))
+ ChildInfo.setIsInAnonNamespaceScope();
+ } break;
+ default:
+ break;
+ }
+
+ if (!isClangModule() && !getGlobalData().getOptions().UpdateIndexTablesOnly)
+ ChildInfo.setTrackLiveness();
+
+ if ((!ChildInfo.getIsInAnonNamespaceScope() &&
+ !ChildIsODRUnavailableFunctionScope && !NoODR))
+ ChildInfo.setODRAvailable();
+
+ if (CurChild->hasChildren())
+ analyzeDWARFStructureRec(CurChild, ChildIsODRUnavailableFunctionScope);
+ }
+}
+
+StringEntry *CompileUnit::getFileName(unsigned FileIdx,
+ StringPool &GlobalStrings) {
+ if (LineTablePtr) {
+ if (LineTablePtr->hasFileAtIndex(FileIdx)) {
+ // Cache the resolved paths based on the index in the line table,
+ // because calling realpath is expensive.
+ ResolvedPathsMap::const_iterator It = ResolvedFullPaths.find(FileIdx);
+ if (It == ResolvedFullPaths.end()) {
+ std::string OrigFileName;
+ bool FoundFileName = LineTablePtr->getFileNameByIndex(
+ FileIdx, getOrigUnit().getCompilationDir(),
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+ OrigFileName);
+ (void)FoundFileName;
+ assert(FoundFileName && "Must get file name from line table");
+
+ // Second level of caching, this time based on the file's parent
+ // path.
+ StringRef FileName = sys::path::filename(OrigFileName);
+ StringRef ParentPath = sys::path::parent_path(OrigFileName);
+
+ // If the ParentPath has not yet been resolved, resolve and cache it for
+ // future look-ups.
+ StringMap<StringEntry *>::iterator ParentIt =
+ ResolvedParentPaths.find(ParentPath);
+ if (ParentIt == ResolvedParentPaths.end()) {
+ SmallString<256> RealPath;
+ sys::fs::real_path(ParentPath, RealPath);
+ ParentIt =
+ ResolvedParentPaths
+ .insert({ParentPath, GlobalStrings.insert(RealPath).first})
+ .first;
+ }
+
+ // Join the file name again with the resolved path.
+ SmallString<256> ResolvedPath(ParentIt->second->first());
+ sys::path::append(ResolvedPath, FileName);
+
+ It = ResolvedFullPaths
+ .insert(std::make_pair(
+ FileIdx, GlobalStrings.insert(ResolvedPath).first))
+ .first;
+ }
+
+ return It->second;
+ }
+ }
+
+ return nullptr;
+}
+
+void CompileUnit::cleanupDataAfterClonning() {
+ AbbreviationsSet.clear();
+ ResolvedFullPaths.shrink_and_clear();
+ ResolvedParentPaths.clear();
+ FileNames.shrink_and_clear();
+ DieInfoArray = SmallVector<DIEInfo>();
+ OutDieOffsetArray = SmallVector<uint64_t>();
+ TypeEntries = SmallVector<TypeEntry *>();
+ Dependencies.reset(nullptr);
+ getOrigUnit().clear();
+}
+
+/// Make a best effort to guess the
+/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path.
+static SmallString<128> guessToolchainBaseDir(StringRef SysRoot) {
+ SmallString<128> Result;
+ // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
+ StringRef Base = sys::path::parent_path(SysRoot);
+ if (sys::path::filename(Base) != "SDKs")
+ return Result;
+ Base = sys::path::parent_path(Base);
+ Result = Base;
+ Result += "/Toolchains";
+ return Result;
+}
+
+/// Collect references to parseable Swift interfaces in imported
+/// DW_TAG_module blocks.
+void CompileUnit::analyzeImportedModule(const DWARFDebugInfoEntry *DieEntry) {
+ if (!Language || Language != dwarf::DW_LANG_Swift)
+ return;
+
+ if (!GlobalData.getOptions().ParseableSwiftInterfaces)
+ return;
+
+ StringRef Path =
+ dwarf::toStringRef(find(DieEntry, dwarf::DW_AT_LLVM_include_path));
+ if (!Path.ends_with(".swiftinterface"))
+ return;
+ // Don't track interfaces that are part of the SDK.
+ StringRef SysRoot =
+ dwarf::toStringRef(find(DieEntry, dwarf::DW_AT_LLVM_sysroot));
+ if (SysRoot.empty())
+ SysRoot = getSysRoot();
+ if (!SysRoot.empty() && Path.starts_with(SysRoot))
+ return;
+ // Don't track interfaces that are part of the toolchain.
+ // For example: Swift, _Concurrency, ...
+ SmallString<128> Toolchain = guessToolchainBaseDir(SysRoot);
+ if (!Toolchain.empty() && Path.starts_with(Toolchain))
+ return;
+ if (std::optional<DWARFFormValue> Val = find(DieEntry, dwarf::DW_AT_name)) {
+ Expected<const char *> Name = Val->getAsCString();
+ if (!Name) {
+ warn(Name.takeError());
+ return;
+ }
+
+ auto &Entry = (*GlobalData.getOptions().ParseableSwiftInterfaces)[*Name];
+ // The prepend path is applied later when copying.
+ SmallString<128> ResolvedPath;
+ if (sys::path::is_relative(Path))
+ sys::path::append(
+ ResolvedPath,
+ dwarf::toString(getUnitDIE().find(dwarf::DW_AT_comp_dir), ""));
+ sys::path::append(ResolvedPath, Path);
+ if (!Entry.empty() && Entry != ResolvedPath) {
+ DWARFDie Die = getDIE(DieEntry);
+ warn(Twine("conflicting parseable interfaces for Swift Module ") + *Name +
+ ": " + Entry + " and " + Path + ".",
+ &Die);
+ }
+ Entry = std::string(ResolvedPath.str());
+ }
+}
+
+Error CompileUnit::assignTypeNames(TypePool &TypePoolRef) {
+ if (!getUnitDIE().isValid())
+ return Error::success();
+
+ SyntheticTypeNameBuilder NameBuilder(TypePoolRef);
+ return assignTypeNamesRec(getDebugInfoEntry(0), NameBuilder);
+}
+
+Error CompileUnit::assignTypeNamesRec(const DWARFDebugInfoEntry *DieEntry,
+ SyntheticTypeNameBuilder &NameBuilder) {
+ OrderedChildrenIndexAssigner ChildrenIndexAssigner(*this, DieEntry);
+ for (const DWARFDebugInfoEntry *CurChild = getFirstChildEntry(DieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = getSiblingEntry(CurChild)) {
+ CompileUnit::DIEInfo &ChildInfo = getDIEInfo(CurChild);
+ if (!ChildInfo.needToPlaceInTypeTable())
+ continue;
+
+ assert(ChildInfo.getODRAvailable());
+ if (Error Err = NameBuilder.assignName(
+ {this, CurChild},
+ ChildrenIndexAssigner.getChildIndex(*this, CurChild)))
+ return Err;
+
+ if (Error Err = assignTypeNamesRec(CurChild, NameBuilder))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+void CompileUnit::updateDieRefPatchesWithClonedOffsets() {
+ if (std::optional<SectionDescriptor *> DebugInfoSection =
+ tryGetSectionDescriptor(DebugSectionKind::DebugInfo)) {
+
+ (*DebugInfoSection)
+ ->ListDebugDieRefPatch.forEach([&](DebugDieRefPatch &Patch) {
+ /// Replace stored DIE indexes with DIE output offsets.
+ Patch.RefDieIdxOrClonedOffset =
+ Patch.RefCU.getPointer()->getDieOutOffset(
+ Patch.RefDieIdxOrClonedOffset);
+ });
+
+ (*DebugInfoSection)
+ ->ListDebugULEB128DieRefPatch.forEach(
+ [&](DebugULEB128DieRefPatch &Patch) {
+ /// Replace stored DIE indexes with DIE output offsets.
+ Patch.RefDieIdxOrClonedOffset =
+ Patch.RefCU.getPointer()->getDieOutOffset(
+ Patch.RefDieIdxOrClonedOffset);
+ });
+ }
+
+ if (std::optional<SectionDescriptor *> DebugLocSection =
+ tryGetSectionDescriptor(DebugSectionKind::DebugLoc)) {
+ (*DebugLocSection)
+ ->ListDebugULEB128DieRefPatch.forEach(
+ [](DebugULEB128DieRefPatch &Patch) {
+ /// Replace stored DIE indexes with DIE output offsets.
+ Patch.RefDieIdxOrClonedOffset =
+ Patch.RefCU.getPointer()->getDieOutOffset(
+ Patch.RefDieIdxOrClonedOffset);
+ });
+ }
+
+ if (std::optional<SectionDescriptor *> DebugLocListsSection =
+ tryGetSectionDescriptor(DebugSectionKind::DebugLocLists)) {
+ (*DebugLocListsSection)
+ ->ListDebugULEB128DieRefPatch.forEach(
+ [](DebugULEB128DieRefPatch &Patch) {
+ /// Replace stored DIE indexes with DIE output offsets.
+ Patch.RefDieIdxOrClonedOffset =
+ Patch.RefCU.getPointer()->getDieOutOffset(
+ Patch.RefDieIdxOrClonedOffset);
+ });
+ }
+}
+
+std::optional<UnitEntryPairTy> CompileUnit::resolveDIEReference(
+ const DWARFFormValue &RefValue,
+ ResolveInterCUReferencesMode CanResolveInterCUReferences) {
+ if (std::optional<DWARFFormValue::UnitOffset> Ref =
+ *RefValue.getAsRelativeReference()) {
+ if (Ref->Unit == OrigUnit) {
+ // Referenced DIE is in current compile unit.
+ if (std::optional<uint32_t> RefDieIdx =
+ getDIEIndexForOffset(OrigUnit->getOffset() + Ref->Offset))
+ return UnitEntryPairTy{this, OrigUnit->getDebugInfoEntry(*RefDieIdx)};
+ }
+ uint64_t RefDIEOffset =
+ Ref->Unit ? Ref->Unit->getOffset() + Ref->Offset : Ref->Offset;
+ if (CompileUnit *RefCU = getUnitFromOffset(RefDIEOffset)) {
+ if (RefCU == this) {
+ // Referenced DIE is in current compile unit.
+ if (std::optional<uint32_t> RefDieIdx =
+ getDIEIndexForOffset(RefDIEOffset))
+ return UnitEntryPairTy{this, getDebugInfoEntry(*RefDieIdx)};
+ } else if (CanResolveInterCUReferences) {
+ // Referenced DIE is in other compile unit.
+
+ // Check whether DIEs are loaded for that compile unit.
+ enum Stage ReferredCUStage = RefCU->getStage();
+ if (ReferredCUStage < Stage::Loaded || ReferredCUStage > Stage::Cloned)
+ return UnitEntryPairTy{RefCU, nullptr};
+
+ if (std::optional<uint32_t> RefDieIdx =
+ RefCU->getDIEIndexForOffset(RefDIEOffset))
+ return UnitEntryPairTy{RefCU, RefCU->getDebugInfoEntry(*RefDieIdx)};
+ } else
+ return UnitEntryPairTy{RefCU, nullptr};
+ }
+ }
+
+ return std::nullopt;
+}
+
+std::optional<UnitEntryPairTy> CompileUnit::resolveDIEReference(
+ const DWARFDebugInfoEntry *DieEntry, dwarf::Attribute Attr,
+ ResolveInterCUReferencesMode CanResolveInterCUReferences) {
+ if (std::optional<DWARFFormValue> AttrVal = find(DieEntry, Attr))
+ return resolveDIEReference(*AttrVal, CanResolveInterCUReferences);
+
+ return std::nullopt;
+}
+
+void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc,
+ int64_t PcOffset) {
+ std::lock_guard<std::mutex> Guard(RangesMutex);
+
+ Ranges.insert({FuncLowPc, FuncHighPc}, PcOffset);
+ if (LowPc)
+ LowPc = std::min(*LowPc, FuncLowPc + PcOffset);
+ else
+ LowPc = FuncLowPc + PcOffset;
+ this->HighPc = std::max(HighPc, FuncHighPc + PcOffset);
+}
+
+void CompileUnit::addLabelLowPc(uint64_t LabelLowPc, int64_t PcOffset) {
+ std::lock_guard<std::mutex> Guard(LabelsMutex);
+ Labels.insert({LabelLowPc, PcOffset});
+}
+
+Error CompileUnit::cloneAndEmitDebugLocations() {
+ if (getGlobalData().getOptions().UpdateIndexTablesOnly)
+ return Error::success();
+
+ if (getOrigUnit().getVersion() < 5) {
+ emitLocations(DebugSectionKind::DebugLoc);
+ return Error::success();
+ }
+
+ emitLocations(DebugSectionKind::DebugLocLists);
+ return Error::success();
+}
+
+void CompileUnit::emitLocations(DebugSectionKind LocationSectionKind) {
+ SectionDescriptor &DebugInfoSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+
+ if (!DebugInfoSection.ListDebugLocPatch.empty()) {
+ SectionDescriptor &OutLocationSection =
+ getOrCreateSectionDescriptor(LocationSectionKind);
+ DWARFUnit &OrigUnit = getOrigUnit();
+
+ uint64_t OffsetAfterUnitLength = emitLocListHeader(OutLocationSection);
+
+ DebugInfoSection.ListDebugLocPatch.forEach([&](DebugLocPatch &Patch) {
+ // Get location expressions vector corresponding to the current
+ // attribute from the source DWARF.
+ uint64_t InputDebugLocSectionOffset = DebugInfoSection.getIntVal(
+ Patch.PatchOffset,
+ DebugInfoSection.getFormParams().getDwarfOffsetByteSize());
+ Expected<DWARFLocationExpressionsVector> OriginalLocations =
+ OrigUnit.findLoclistFromOffset(InputDebugLocSectionOffset);
+
+ if (!OriginalLocations) {
+ warn(OriginalLocations.takeError());
+ return;
+ }
+
+ LinkedLocationExpressionsVector LinkedLocationExpressions;
+ for (DWARFLocationExpression &CurExpression : *OriginalLocations) {
+ LinkedLocationExpressionsWithOffsetPatches LinkedExpression;
+
+ if (CurExpression.Range) {
+ // Relocate address range.
+ LinkedExpression.Expression.Range = {
+ CurExpression.Range->LowPC + Patch.AddrAdjustmentValue,
+ CurExpression.Range->HighPC + Patch.AddrAdjustmentValue};
+ }
+
+ DataExtractor Data(CurExpression.Expr, OrigUnit.isLittleEndian(),
+ OrigUnit.getAddressByteSize());
+
+ DWARFExpression InputExpression(Data, OrigUnit.getAddressByteSize(),
+ OrigUnit.getFormParams().Format);
+ cloneDieAttrExpression(InputExpression,
+ LinkedExpression.Expression.Expr,
+ OutLocationSection, Patch.AddrAdjustmentValue,
+ LinkedExpression.Patches);
+
+ LinkedLocationExpressions.push_back({LinkedExpression});
+ }
+
+ // Emit locations list table fragment corresponding to the CurLocAttr.
+ DebugInfoSection.apply(Patch.PatchOffset, dwarf::DW_FORM_sec_offset,
+ OutLocationSection.OS.tell());
+ emitLocListFragment(LinkedLocationExpressions, OutLocationSection);
+ });
+
+ if (OffsetAfterUnitLength > 0) {
+ assert(OffsetAfterUnitLength -
+ OutLocationSection.getFormParams().getDwarfOffsetByteSize() <
+ OffsetAfterUnitLength);
+ OutLocationSection.apply(
+ OffsetAfterUnitLength -
+ OutLocationSection.getFormParams().getDwarfOffsetByteSize(),
+ dwarf::DW_FORM_sec_offset,
+ OutLocationSection.OS.tell() - OffsetAfterUnitLength);
+ }
+ }
+}
+
+/// Emit debug locations(.debug_loc, .debug_loclists) header.
+uint64_t CompileUnit::emitLocListHeader(SectionDescriptor &OutLocationSection) {
+ if (getOrigUnit().getVersion() < 5)
+ return 0;
+
+ // unit_length.
+ OutLocationSection.emitUnitLength(0xBADDEF);
+ uint64_t OffsetAfterUnitLength = OutLocationSection.OS.tell();
+
+ // Version.
+ OutLocationSection.emitIntVal(5, 2);
+
+ // Address size.
+ OutLocationSection.emitIntVal(OutLocationSection.getFormParams().AddrSize, 1);
+
+ // Seg_size
+ OutLocationSection.emitIntVal(0, 1);
+
+ // Offset entry count
+ OutLocationSection.emitIntVal(0, 4);
+
+ return OffsetAfterUnitLength;
+}
+
+/// Emit debug locations(.debug_loc, .debug_loclists) fragment.
+uint64_t CompileUnit::emitLocListFragment(
+ const LinkedLocationExpressionsVector &LinkedLocationExpression,
+ SectionDescriptor &OutLocationSection) {
+ uint64_t OffsetBeforeLocationExpression = 0;
+
+ if (getOrigUnit().getVersion() < 5) {
+ uint64_t BaseAddress = 0;
+ if (std::optional<uint64_t> LowPC = getLowPc())
+ BaseAddress = *LowPC;
+
+ for (const LinkedLocationExpressionsWithOffsetPatches &LocExpression :
+ LinkedLocationExpression) {
+ if (LocExpression.Expression.Range) {
+ OutLocationSection.emitIntVal(
+ LocExpression.Expression.Range->LowPC - BaseAddress,
+ OutLocationSection.getFormParams().AddrSize);
+ OutLocationSection.emitIntVal(
+ LocExpression.Expression.Range->HighPC - BaseAddress,
+ OutLocationSection.getFormParams().AddrSize);
+ }
+
+ OutLocationSection.emitIntVal(LocExpression.Expression.Expr.size(), 2);
+ OffsetBeforeLocationExpression = OutLocationSection.OS.tell();
+ for (uint64_t *OffsetPtr : LocExpression.Patches)
+ *OffsetPtr += OffsetBeforeLocationExpression;
+
+ OutLocationSection.OS
+ << StringRef((const char *)LocExpression.Expression.Expr.data(),
+ LocExpression.Expression.Expr.size());
+ }
+
+ // Emit the terminator entry.
+ OutLocationSection.emitIntVal(0,
+ OutLocationSection.getFormParams().AddrSize);
+ OutLocationSection.emitIntVal(0,
+ OutLocationSection.getFormParams().AddrSize);
+ return OffsetBeforeLocationExpression;
+ }
+
+ std::optional<uint64_t> BaseAddress;
+ for (const LinkedLocationExpressionsWithOffsetPatches &LocExpression :
+ LinkedLocationExpression) {
+ if (LocExpression.Expression.Range) {
+ // Check whether base address is set. If it is not set yet
+ // then set current base address and emit base address selection entry.
+ if (!BaseAddress) {
+ BaseAddress = LocExpression.Expression.Range->LowPC;
+
+ // Emit base address.
+ OutLocationSection.emitIntVal(dwarf::DW_LLE_base_addressx, 1);
+ encodeULEB128(DebugAddrIndexMap.getValueIndex(*BaseAddress),
+ OutLocationSection.OS);
+ }
+
+ // Emit type of entry.
+ OutLocationSection.emitIntVal(dwarf::DW_LLE_offset_pair, 1);
+
+ // Emit start offset relative to base address.
+ encodeULEB128(LocExpression.Expression.Range->LowPC - *BaseAddress,
+ OutLocationSection.OS);
+
+ // Emit end offset relative to base address.
+ encodeULEB128(LocExpression.Expression.Range->HighPC - *BaseAddress,
+ OutLocationSection.OS);
+ } else
+ // Emit type of entry.
+ OutLocationSection.emitIntVal(dwarf::DW_LLE_default_location, 1);
+
+ encodeULEB128(LocExpression.Expression.Expr.size(), OutLocationSection.OS);
+ OffsetBeforeLocationExpression = OutLocationSection.OS.tell();
+ for (uint64_t *OffsetPtr : LocExpression.Patches)
+ *OffsetPtr += OffsetBeforeLocationExpression;
+
+ OutLocationSection.OS << StringRef(
+ (const char *)LocExpression.Expression.Expr.data(),
+ LocExpression.Expression.Expr.size());
+ }
+
+ // Emit the terminator entry.
+ OutLocationSection.emitIntVal(dwarf::DW_LLE_end_of_list, 1);
+ return OffsetBeforeLocationExpression;
+}
+
+Error CompileUnit::emitDebugAddrSection() {
+ if (GlobalData.getOptions().UpdateIndexTablesOnly)
+ return Error::success();
+
+ if (getVersion() < 5)
+ return Error::success();
+
+ if (DebugAddrIndexMap.empty())
+ return Error::success();
+
+ SectionDescriptor &OutAddrSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugAddr);
+
+ // Emit section header.
+
+ // Emit length.
+ OutAddrSection.emitUnitLength(0xBADDEF);
+ uint64_t OffsetAfterSectionLength = OutAddrSection.OS.tell();
+
+ // Emit version.
+ OutAddrSection.emitIntVal(5, 2);
+
+ // Emit address size.
+ OutAddrSection.emitIntVal(getFormParams().AddrSize, 1);
+
+ // Emit segment size.
+ OutAddrSection.emitIntVal(0, 1);
+
+ // Emit addresses.
+ for (uint64_t AddrValue : DebugAddrIndexMap.getValues())
+ OutAddrSection.emitIntVal(AddrValue, getFormParams().AddrSize);
+
+ // Patch section length.
+ OutAddrSection.apply(
+ OffsetAfterSectionLength -
+ OutAddrSection.getFormParams().getDwarfOffsetByteSize(),
+ dwarf::DW_FORM_sec_offset,
+ OutAddrSection.OS.tell() - OffsetAfterSectionLength);
+
+ return Error::success();
+}
+
+Error CompileUnit::cloneAndEmitRanges() {
+ if (getGlobalData().getOptions().UpdateIndexTablesOnly)
+ return Error::success();
+
+ // Build set of linked address ranges for unit function ranges.
+ AddressRanges LinkedFunctionRanges;
+ for (const AddressRangeValuePair &Range : getFunctionRanges())
+ LinkedFunctionRanges.insert(
+ {Range.Range.start() + Range.Value, Range.Range.end() + Range.Value});
+
+ emitAranges(LinkedFunctionRanges);
+
+ if (getOrigUnit().getVersion() < 5) {
+ cloneAndEmitRangeList(DebugSectionKind::DebugRange, LinkedFunctionRanges);
+ return Error::success();
+ }
+
+ cloneAndEmitRangeList(DebugSectionKind::DebugRngLists, LinkedFunctionRanges);
+ return Error::success();
+}
+
+void CompileUnit::cloneAndEmitRangeList(DebugSectionKind RngSectionKind,
+ AddressRanges &LinkedFunctionRanges) {
+ SectionDescriptor &DebugInfoSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+ SectionDescriptor &OutRangeSection =
+ getOrCreateSectionDescriptor(RngSectionKind);
+
+ if (!DebugInfoSection.ListDebugRangePatch.empty()) {
+ std::optional<AddressRangeValuePair> CachedRange;
+ uint64_t OffsetAfterUnitLength = emitRangeListHeader(OutRangeSection);
+
+ DebugRangePatch *CompileUnitRangePtr = nullptr;
+ DebugInfoSection.ListDebugRangePatch.forEach([&](DebugRangePatch &Patch) {
+ if (Patch.IsCompileUnitRanges) {
+ CompileUnitRangePtr = &Patch;
+ } else {
+ // Get ranges from the source DWARF corresponding to the current
+ // attribute.
+ AddressRanges LinkedRanges;
+ uint64_t InputDebugRangesSectionOffset = DebugInfoSection.getIntVal(
+ Patch.PatchOffset,
+ DebugInfoSection.getFormParams().getDwarfOffsetByteSize());
+ if (Expected<DWARFAddressRangesVector> InputRanges =
+ getOrigUnit().findRnglistFromOffset(
+ InputDebugRangesSectionOffset)) {
+ // Apply relocation adjustment.
+ for (const auto &Range : *InputRanges) {
+ if (!CachedRange || !CachedRange->Range.contains(Range.LowPC))
+ CachedRange =
+ getFunctionRanges().getRangeThatContains(Range.LowPC);
+
+ // All range entries should lie in the function range.
+ if (!CachedRange) {
+ warn("inconsistent range data.");
+ continue;
+ }
+
+ // Store range for emiting.
+ LinkedRanges.insert({Range.LowPC + CachedRange->Value,
+ Range.HighPC + CachedRange->Value});
+ }
+ } else {
+ llvm::consumeError(InputRanges.takeError());
+ warn("invalid range list ignored.");
+ }
+
+ // Emit linked ranges.
+ DebugInfoSection.apply(Patch.PatchOffset, dwarf::DW_FORM_sec_offset,
+ OutRangeSection.OS.tell());
+ emitRangeListFragment(LinkedRanges, OutRangeSection);
+ }
+ });
+
+ if (CompileUnitRangePtr != nullptr) {
+ // Emit compile unit ranges last to be binary compatible with classic
+ // dsymutil.
+ DebugInfoSection.apply(CompileUnitRangePtr->PatchOffset,
+ dwarf::DW_FORM_sec_offset,
+ OutRangeSection.OS.tell());
+ emitRangeListFragment(LinkedFunctionRanges, OutRangeSection);
+ }
+
+ if (OffsetAfterUnitLength > 0) {
+ assert(OffsetAfterUnitLength -
+ OutRangeSection.getFormParams().getDwarfOffsetByteSize() <
+ OffsetAfterUnitLength);
+ OutRangeSection.apply(
+ OffsetAfterUnitLength -
+ OutRangeSection.getFormParams().getDwarfOffsetByteSize(),
+ dwarf::DW_FORM_sec_offset,
+ OutRangeSection.OS.tell() - OffsetAfterUnitLength);
+ }
+ }
+}
+
+uint64_t CompileUnit::emitRangeListHeader(SectionDescriptor &OutRangeSection) {
+ if (OutRangeSection.getFormParams().Version < 5)
+ return 0;
+
+ // unit_length.
+ OutRangeSection.emitUnitLength(0xBADDEF);
+ uint64_t OffsetAfterUnitLength = OutRangeSection.OS.tell();
+
+ // Version.
+ OutRangeSection.emitIntVal(5, 2);
+
+ // Address size.
+ OutRangeSection.emitIntVal(OutRangeSection.getFormParams().AddrSize, 1);
+
+ // Seg_size
+ OutRangeSection.emitIntVal(0, 1);
+
+ // Offset entry count
+ OutRangeSection.emitIntVal(0, 4);
+
+ return OffsetAfterUnitLength;
+}
+
+void CompileUnit::emitRangeListFragment(const AddressRanges &LinkedRanges,
+ SectionDescriptor &OutRangeSection) {
+ if (OutRangeSection.getFormParams().Version < 5) {
+ // Emit ranges.
+ uint64_t BaseAddress = 0;
+ if (std::optional<uint64_t> LowPC = getLowPc())
+ BaseAddress = *LowPC;
+
+ for (const AddressRange &Range : LinkedRanges) {
+ OutRangeSection.emitIntVal(Range.start() - BaseAddress,
+ OutRangeSection.getFormParams().AddrSize);
+ OutRangeSection.emitIntVal(Range.end() - BaseAddress,
+ OutRangeSection.getFormParams().AddrSize);
+ }
+
+ // Add the terminator entry.
+ OutRangeSection.emitIntVal(0, OutRangeSection.getFormParams().AddrSize);
+ OutRangeSection.emitIntVal(0, OutRangeSection.getFormParams().AddrSize);
+ return;
+ }
+
+ std::optional<uint64_t> BaseAddress;
+ for (const AddressRange &Range : LinkedRanges) {
+ if (!BaseAddress) {
+ BaseAddress = Range.start();
+
+ // Emit base address.
+ OutRangeSection.emitIntVal(dwarf::DW_RLE_base_addressx, 1);
+ encodeULEB128(getDebugAddrIndex(*BaseAddress), OutRangeSection.OS);
+ }
+
+ // Emit type of entry.
+ OutRangeSection.emitIntVal(dwarf::DW_RLE_offset_pair, 1);
+
+ // Emit start offset relative to base address.
+ encodeULEB128(Range.start() - *BaseAddress, OutRangeSection.OS);
+
+ // Emit end offset relative to base address.
+ encodeULEB128(Range.end() - *BaseAddress, OutRangeSection.OS);
+ }
+
+ // Emit the terminator entry.
+ OutRangeSection.emitIntVal(dwarf::DW_RLE_end_of_list, 1);
+}
+
+void CompileUnit::emitAranges(AddressRanges &LinkedFunctionRanges) {
+ if (LinkedFunctionRanges.empty())
+ return;
+
+ SectionDescriptor &DebugInfoSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+ SectionDescriptor &OutArangesSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugARanges);
+
+ // Emit Header.
+ unsigned HeaderSize =
+ sizeof(int32_t) + // Size of contents (w/o this field
+ sizeof(int16_t) + // DWARF ARange version number
+ sizeof(int32_t) + // Offset of CU in the .debug_info section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int8_t); // Segment Size (in bytes)
+
+ unsigned TupleSize = OutArangesSection.getFormParams().AddrSize * 2;
+ unsigned Padding = offsetToAlignment(HeaderSize, Align(TupleSize));
+
+ OutArangesSection.emitOffset(0xBADDEF); // Aranges length
+ uint64_t OffsetAfterArangesLengthField = OutArangesSection.OS.tell();
+
+ OutArangesSection.emitIntVal(dwarf::DW_ARANGES_VERSION, 2); // Version number
+ OutArangesSection.notePatch(
+ DebugOffsetPatch{OutArangesSection.OS.tell(), &DebugInfoSection});
+ OutArangesSection.emitOffset(0xBADDEF); // Corresponding unit's offset
+ OutArangesSection.emitIntVal(OutArangesSection.getFormParams().AddrSize,
+ 1); // Address size
+ OutArangesSection.emitIntVal(0, 1); // Segment size
+
+ for (size_t Idx = 0; Idx < Padding; Idx++)
+ OutArangesSection.emitIntVal(0, 1); // Padding
+
+ // Emit linked ranges.
+ for (const AddressRange &Range : LinkedFunctionRanges) {
+ OutArangesSection.emitIntVal(Range.start(),
+ OutArangesSection.getFormParams().AddrSize);
+ OutArangesSection.emitIntVal(Range.end() - Range.start(),
+ OutArangesSection.getFormParams().AddrSize);
+ }
+
+ // Emit terminator.
+ OutArangesSection.emitIntVal(0, OutArangesSection.getFormParams().AddrSize);
+ OutArangesSection.emitIntVal(0, OutArangesSection.getFormParams().AddrSize);
+
+ uint64_t OffsetAfterArangesEnd = OutArangesSection.OS.tell();
+
+ // Update Aranges lentgh.
+ OutArangesSection.apply(
+ OffsetAfterArangesLengthField -
+ OutArangesSection.getFormParams().getDwarfOffsetByteSize(),
+ dwarf::DW_FORM_sec_offset,
+ OffsetAfterArangesEnd - OffsetAfterArangesLengthField);
+}
+
+Error CompileUnit::cloneAndEmitDebugMacro() {
+ if (getOutUnitDIE() == nullptr)
+ return Error::success();
+
+ DWARFUnit &OrigUnit = getOrigUnit();
+ DWARFDie OrigUnitDie = OrigUnit.getUnitDIE();
+
+ // Check for .debug_macro table.
+ if (std::optional<uint64_t> MacroAttr =
+ dwarf::toSectionOffset(OrigUnitDie.find(dwarf::DW_AT_macros))) {
+ if (const DWARFDebugMacro *Table =
+ getContaingFile().Dwarf->getDebugMacro()) {
+ emitMacroTableImpl(Table, *MacroAttr, true);
+ }
+ }
+
+ // Check for .debug_macinfo table.
+ if (std::optional<uint64_t> MacroAttr =
+ dwarf::toSectionOffset(OrigUnitDie.find(dwarf::DW_AT_macro_info))) {
+ if (const DWARFDebugMacro *Table =
+ getContaingFile().Dwarf->getDebugMacinfo()) {
+ emitMacroTableImpl(Table, *MacroAttr, false);
+ }
+ }
+
+ return Error::success();
+}
+
+void CompileUnit::emitMacroTableImpl(const DWARFDebugMacro *MacroTable,
+ uint64_t OffsetToMacroTable,
+ bool hasDWARFv5Header) {
+ SectionDescriptor &OutSection =
+ hasDWARFv5Header
+ ? getOrCreateSectionDescriptor(DebugSectionKind::DebugMacro)
+ : getOrCreateSectionDescriptor(DebugSectionKind::DebugMacinfo);
+
+ bool DefAttributeIsReported = false;
+ bool UndefAttributeIsReported = false;
+ bool ImportAttributeIsReported = false;
+
+ for (const DWARFDebugMacro::MacroList &List : MacroTable->MacroLists) {
+ if (OffsetToMacroTable == List.Offset) {
+ // Write DWARFv5 header.
+ if (hasDWARFv5Header) {
+ // Write header version.
+ OutSection.emitIntVal(List.Header.Version, sizeof(List.Header.Version));
+
+ uint8_t Flags = List.Header.Flags;
+
+ // Check for OPCODE_OPERANDS_TABLE.
+ if (Flags &
+ DWARFDebugMacro::HeaderFlagMask::MACRO_OPCODE_OPERANDS_TABLE) {
+ Flags &=
+ ~DWARFDebugMacro::HeaderFlagMask::MACRO_OPCODE_OPERANDS_TABLE;
+ warn("opcode_operands_table is not supported yet.");
+ }
+
+ // Check for DEBUG_LINE_OFFSET.
+ std::optional<uint64_t> StmtListOffset;
+ if (Flags & DWARFDebugMacro::HeaderFlagMask::MACRO_DEBUG_LINE_OFFSET) {
+ // Get offset to the line table from the cloned compile unit.
+ for (auto &V : getOutUnitDIE()->values()) {
+ if (V.getAttribute() == dwarf::DW_AT_stmt_list) {
+ StmtListOffset = V.getDIEInteger().getValue();
+ break;
+ }
+ }
+
+ if (!StmtListOffset) {
+ Flags &= ~DWARFDebugMacro::HeaderFlagMask::MACRO_DEBUG_LINE_OFFSET;
+ warn("couldn`t find line table for macro table.");
+ }
+ }
+
+ // Write flags.
+ OutSection.emitIntVal(Flags, sizeof(Flags));
+
+ // Write offset to line table.
+ if (StmtListOffset) {
+ OutSection.notePatch(DebugOffsetPatch{
+ OutSection.OS.tell(),
+ &getOrCreateSectionDescriptor(DebugSectionKind::DebugLine)});
+ // TODO: check that List.Header.getOffsetByteSize() and
+ // DebugOffsetPatch agree on size.
+ OutSection.emitIntVal(0xBADDEF, List.Header.getOffsetByteSize());
+ }
+ }
+
+ // Write macro entries.
+ for (const DWARFDebugMacro::Entry &MacroEntry : List.Macros) {
+ if (MacroEntry.Type == 0) {
+ encodeULEB128(MacroEntry.Type, OutSection.OS);
+ continue;
+ }
+
+ uint8_t MacroType = MacroEntry.Type;
+ switch (MacroType) {
+ default: {
+ bool HasVendorSpecificExtension =
+ (!hasDWARFv5Header &&
+ MacroType == dwarf::DW_MACINFO_vendor_ext) ||
+ (hasDWARFv5Header && (MacroType >= dwarf::DW_MACRO_lo_user &&
+ MacroType <= dwarf::DW_MACRO_hi_user));
+
+ if (HasVendorSpecificExtension) {
+ // Write macinfo type.
+ OutSection.emitIntVal(MacroType, 1);
+
+ // Write vendor extension constant.
+ encodeULEB128(MacroEntry.ExtConstant, OutSection.OS);
+
+ // Write vendor extension string.
+ OutSection.emitString(dwarf::DW_FORM_string, MacroEntry.ExtStr);
+ } else
+ warn("unknown macro type. skip.");
+ } break;
+ // debug_macro and debug_macinfo share some common encodings.
+ // DW_MACRO_define == DW_MACINFO_define
+ // DW_MACRO_undef == DW_MACINFO_undef
+ // DW_MACRO_start_file == DW_MACINFO_start_file
+ // DW_MACRO_end_file == DW_MACINFO_end_file
+ // For readibility/uniformity we are using DW_MACRO_*.
+ case dwarf::DW_MACRO_define:
+ case dwarf::DW_MACRO_undef: {
+ // Write macinfo type.
+ OutSection.emitIntVal(MacroType, 1);
+
+ // Write source line.
+ encodeULEB128(MacroEntry.Line, OutSection.OS);
+
+ // Write macro string.
+ OutSection.emitString(dwarf::DW_FORM_string, MacroEntry.MacroStr);
+ } break;
+ case dwarf::DW_MACRO_define_strp:
+ case dwarf::DW_MACRO_undef_strp:
+ case dwarf::DW_MACRO_define_strx:
+ case dwarf::DW_MACRO_undef_strx: {
+ // DW_MACRO_*_strx forms are not supported currently.
+ // Convert to *_strp.
+ switch (MacroType) {
+ case dwarf::DW_MACRO_define_strx: {
+ MacroType = dwarf::DW_MACRO_define_strp;
+ if (!DefAttributeIsReported) {
+ warn("DW_MACRO_define_strx unsupported yet. Convert to "
+ "DW_MACRO_define_strp.");
+ DefAttributeIsReported = true;
+ }
+ } break;
+ case dwarf::DW_MACRO_undef_strx: {
+ MacroType = dwarf::DW_MACRO_undef_strp;
+ if (!UndefAttributeIsReported) {
+ warn("DW_MACRO_undef_strx unsupported yet. Convert to "
+ "DW_MACRO_undef_strp.");
+ UndefAttributeIsReported = true;
+ }
+ } break;
+ default:
+ // Nothing to do.
+ break;
+ }
+
+ // Write macinfo type.
+ OutSection.emitIntVal(MacroType, 1);
+
+ // Write source line.
+ encodeULEB128(MacroEntry.Line, OutSection.OS);
+
+ // Write macro string.
+ OutSection.emitString(dwarf::DW_FORM_strp, MacroEntry.MacroStr);
+ break;
+ }
+ case dwarf::DW_MACRO_start_file: {
+ // Write macinfo type.
+ OutSection.emitIntVal(MacroType, 1);
+ // Write source line.
+ encodeULEB128(MacroEntry.Line, OutSection.OS);
+ // Write source file id.
+ encodeULEB128(MacroEntry.File, OutSection.OS);
+ } break;
+ case dwarf::DW_MACRO_end_file: {
+ // Write macinfo type.
+ OutSection.emitIntVal(MacroType, 1);
+ } break;
+ case dwarf::DW_MACRO_import:
+ case dwarf::DW_MACRO_import_sup: {
+ if (!ImportAttributeIsReported) {
+ warn("DW_MACRO_import and DW_MACRO_import_sup are unsupported "
+ "yet. remove.");
+ ImportAttributeIsReported = true;
+ }
+ } break;
+ }
+ }
+
+ return;
+ }
+ }
+}
+
+void CompileUnit::cloneDieAttrExpression(
+ const DWARFExpression &InputExpression,
+ SmallVectorImpl<uint8_t> &OutputExpression, SectionDescriptor &Section,
+ std::optional<int64_t> VarAddressAdjustment,
+ OffsetsPtrVector &PatchesOffsets) {
+ using Encoding = DWARFExpression::Operation::Encoding;
+
+ DWARFUnit &OrigUnit = getOrigUnit();
+ uint8_t OrigAddressByteSize = OrigUnit.getAddressByteSize();
+
+ uint64_t OpOffset = 0;
+ for (auto &Op : InputExpression) {
+ auto Desc = Op.getDescription();
+ // DW_OP_const_type is variable-length and has 3
+ // operands. Thus far we only support 2.
+ if ((Desc.Op.size() == 2 && Desc.Op[0] == Encoding::BaseTypeRef) ||
+ (Desc.Op.size() == 2 && Desc.Op[1] == Encoding::BaseTypeRef &&
+ Desc.Op[0] != Encoding::Size1))
+ warn("unsupported DW_OP encoding.");
+
+ if ((Desc.Op.size() == 1 && Desc.Op[0] == Encoding::BaseTypeRef) ||
+ (Desc.Op.size() == 2 && Desc.Op[1] == Encoding::BaseTypeRef &&
+ Desc.Op[0] == Encoding::Size1)) {
+ // This code assumes that the other non-typeref operand fits into 1 byte.
+ assert(OpOffset < Op.getEndOffset());
+ uint32_t ULEBsize = Op.getEndOffset() - OpOffset - 1;
+ assert(ULEBsize <= 16);
+
+ // Copy over the operation.
+ assert(!Op.getSubCode() && "SubOps not yet supported");
+ OutputExpression.push_back(Op.getCode());
+ uint64_t RefOffset;
+ if (Desc.Op.size() == 1) {
+ RefOffset = Op.getRawOperand(0);
+ } else {
+ OutputExpression.push_back(Op.getRawOperand(0));
+ RefOffset = Op.getRawOperand(1);
+ }
+ uint8_t ULEB[16];
+ uint32_t Offset = 0;
+ unsigned RealSize = 0;
+ // Look up the base type. For DW_OP_convert, the operand may be 0 to
+ // instead indicate the generic type. The same holds for
+ // DW_OP_reinterpret, which is currently not supported.
+ if (RefOffset > 0 || Op.getCode() != dwarf::DW_OP_convert) {
+ RefOffset += OrigUnit.getOffset();
+ uint32_t RefDieIdx = 0;
+ if (std::optional<uint32_t> Idx =
+ OrigUnit.getDIEIndexForOffset(RefOffset))
+ RefDieIdx = *Idx;
+
+ // Use fixed size for ULEB128 data, since we need to update that size
+ // later with the proper offsets. Use 5 for DWARF32, 9 for DWARF64.
+ ULEBsize = getFormParams().getDwarfOffsetByteSize() + 1;
+
+ RealSize = encodeULEB128(0xBADDEF, ULEB, ULEBsize);
+
+ Section.notePatchWithOffsetUpdate(
+ DebugULEB128DieRefPatch(OutputExpression.size(), this, this,
+ RefDieIdx),
+ PatchesOffsets);
+ } else
+ RealSize = encodeULEB128(Offset, ULEB, ULEBsize);
+
+ if (RealSize > ULEBsize) {
+ // Emit the generic type as a fallback.
+ RealSize = encodeULEB128(0, ULEB, ULEBsize);
+ warn("base type ref doesn't fit.");
+ }
+ assert(RealSize == ULEBsize && "padding failed");
+ ArrayRef<uint8_t> ULEBbytes(ULEB, ULEBsize);
+ OutputExpression.append(ULEBbytes.begin(), ULEBbytes.end());
+ } else if (!getGlobalData().getOptions().UpdateIndexTablesOnly &&
+ Op.getCode() == dwarf::DW_OP_addrx) {
+ if (std::optional<object::SectionedAddress> SA =
+ OrigUnit.getAddrOffsetSectionItem(Op.getRawOperand(0))) {
+ // DWARFLinker does not use addrx forms since it generates relocated
+ // addresses. Replace DW_OP_addrx with DW_OP_addr here.
+ // Argument of DW_OP_addrx should be relocated here as it is not
+ // processed by applyValidRelocs.
+ OutputExpression.push_back(dwarf::DW_OP_addr);
+ uint64_t LinkedAddress =
+ SA->Address + (VarAddressAdjustment ? *VarAddressAdjustment : 0);
+ if (getEndianness() != llvm::endianness::native)
+ sys::swapByteOrder(LinkedAddress);
+ ArrayRef<uint8_t> AddressBytes(
+ reinterpret_cast<const uint8_t *>(&LinkedAddress),
+ OrigAddressByteSize);
+ OutputExpression.append(AddressBytes.begin(), AddressBytes.end());
+ } else
+ warn("cann't read DW_OP_addrx operand.");
+ } else if (!getGlobalData().getOptions().UpdateIndexTablesOnly &&
+ Op.getCode() == dwarf::DW_OP_constx) {
+ if (std::optional<object::SectionedAddress> SA =
+ OrigUnit.getAddrOffsetSectionItem(Op.getRawOperand(0))) {
+ // DWARFLinker does not use constx forms since it generates relocated
+ // addresses. Replace DW_OP_constx with DW_OP_const[*]u here.
+ // Argument of DW_OP_constx should be relocated here as it is not
+ // processed by applyValidRelocs.
+ std::optional<uint8_t> OutOperandKind;
+ switch (OrigAddressByteSize) {
+ case 2:
+ OutOperandKind = dwarf::DW_OP_const2u;
+ break;
+ case 4:
+ OutOperandKind = dwarf::DW_OP_const4u;
+ break;
+ case 8:
+ OutOperandKind = dwarf::DW_OP_const8u;
+ break;
+ default:
+ warn(
+ formatv(("unsupported address size: {0}."), OrigAddressByteSize));
+ break;
+ }
+
+ if (OutOperandKind) {
+ OutputExpression.push_back(*OutOperandKind);
+ uint64_t LinkedAddress =
+ SA->Address + (VarAddressAdjustment ? *VarAddressAdjustment : 0);
+ if (getEndianness() != llvm::endianness::native)
+ sys::swapByteOrder(LinkedAddress);
+ ArrayRef<uint8_t> AddressBytes(
+ reinterpret_cast<const uint8_t *>(&LinkedAddress),
+ OrigAddressByteSize);
+ OutputExpression.append(AddressBytes.begin(), AddressBytes.end());
+ }
+ } else
+ warn("cann't read DW_OP_constx operand.");
+ } else {
+ // Copy over everything else unmodified.
+ StringRef Bytes =
+ InputExpression.getData().slice(OpOffset, Op.getEndOffset());
+ OutputExpression.append(Bytes.begin(), Bytes.end());
+ }
+ OpOffset = Op.getEndOffset();
+ }
+}
+
+Error CompileUnit::cloneAndEmit(std::optional<Triple> TargetTriple,
+ TypeUnit *ArtificialTypeUnit) {
+ BumpPtrAllocator Allocator;
+
+ DWARFDie OrigUnitDIE = getOrigUnit().getUnitDIE();
+ if (!OrigUnitDIE.isValid())
+ return Error::success();
+
+ TypeEntry *RootEntry = nullptr;
+ if (ArtificialTypeUnit)
+ RootEntry = ArtificialTypeUnit->getTypePool().getRoot();
+
+ // Clone input DIE entry recursively.
+ std::pair<DIE *, TypeEntry *> OutCUDie = cloneDIE(
+ OrigUnitDIE.getDebugInfoEntry(), RootEntry, getDebugInfoHeaderSize(),
+ std::nullopt, std::nullopt, Allocator, ArtificialTypeUnit);
+ setOutUnitDIE(OutCUDie.first);
+
+ if (getGlobalData().getOptions().NoOutput || (OutCUDie.first == nullptr))
+ return Error::success();
+
+ assert(TargetTriple.has_value());
+ if (Error Err = cloneAndEmitLineTable(*TargetTriple))
+ return Err;
+
+ if (Error Err = cloneAndEmitDebugMacro())
+ return Err;
+
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+ if (Error Err = emitDebugInfo(*TargetTriple))
+ return Err;
+
+ // ASSUMPTION: .debug_info section should already be emitted at this point.
+ // cloneAndEmitRanges & cloneAndEmitDebugLocations use .debug_info section
+ // data.
+
+ if (Error Err = cloneAndEmitRanges())
+ return Err;
+
+ if (Error Err = cloneAndEmitDebugLocations())
+ return Err;
+
+ if (Error Err = emitDebugAddrSection())
+ return Err;
+
+ // Generate Pub accelerator tables.
+ if (llvm::is_contained(GlobalData.getOptions().AccelTables,
+ DWARFLinker::AccelTableKind::Pub))
+ emitPubAccelerators();
+
+ if (Error Err = emitDebugStringOffsetSection())
+ return Err;
+
+ return emitAbbreviations();
+}
+
+std::pair<DIE *, TypeEntry *> CompileUnit::cloneDIE(
+ const DWARFDebugInfoEntry *InputDieEntry, TypeEntry *ClonedParentTypeDIE,
+ uint64_t OutOffset, std::optional<int64_t> FuncAddressAdjustment,
+ std::optional<int64_t> VarAddressAdjustment, BumpPtrAllocator &Allocator,
+ TypeUnit *ArtificialTypeUnit) {
+ uint32_t InputDieIdx = getDIEIndex(InputDieEntry);
+ CompileUnit::DIEInfo &Info = getDIEInfo(InputDieIdx);
+
+ bool NeedToClonePlainDIE = Info.needToKeepInPlainDwarf();
+ bool NeedToCloneTypeDIE =
+ (InputDieEntry->getTag() != dwarf::DW_TAG_compile_unit) &&
+ Info.needToPlaceInTypeTable();
+ std::pair<DIE *, TypeEntry *> ClonedDIE;
+
+ DIEGenerator PlainDIEGenerator(Allocator, *this);
+
+ if (NeedToClonePlainDIE)
+ // Create a cloned DIE which would be placed into the cloned version
+ // of input compile unit.
+ ClonedDIE.first = createPlainDIEandCloneAttributes(
+ InputDieEntry, PlainDIEGenerator, OutOffset, FuncAddressAdjustment,
+ VarAddressAdjustment);
+ if (NeedToCloneTypeDIE) {
+ // Create a cloned DIE which would be placed into the artificial type
+ // unit.
+ assert(ArtificialTypeUnit != nullptr);
+ DIEGenerator TypeDIEGenerator(
+ ArtificialTypeUnit->getTypePool().getThreadLocalAllocator(), *this);
+
+ ClonedDIE.second = createTypeDIEandCloneAttributes(
+ InputDieEntry, TypeDIEGenerator, ClonedParentTypeDIE,
+ ArtificialTypeUnit);
+ }
+ TypeEntry *TypeParentForChild =
+ ClonedDIE.second ? ClonedDIE.second : ClonedParentTypeDIE;
+
+ bool HasPlainChildrenToClone =
+ (ClonedDIE.first && Info.getKeepPlainChildren());
+
+ bool HasTypeChildrenToClone =
+ ((ClonedDIE.second ||
+ InputDieEntry->getTag() == dwarf::DW_TAG_compile_unit) &&
+ Info.getKeepTypeChildren());
+
+ // Recursively clone children.
+ if (HasPlainChildrenToClone || HasTypeChildrenToClone) {
+ for (const DWARFDebugInfoEntry *CurChild =
+ getFirstChildEntry(InputDieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = getSiblingEntry(CurChild)) {
+ std::pair<DIE *, TypeEntry *> ClonedChild = cloneDIE(
+ CurChild, TypeParentForChild, OutOffset, FuncAddressAdjustment,
+ VarAddressAdjustment, Allocator, ArtificialTypeUnit);
+
+ if (ClonedChild.first) {
+ OutOffset =
+ ClonedChild.first->getOffset() + ClonedChild.first->getSize();
+ PlainDIEGenerator.addChild(ClonedChild.first);
+ }
+ }
+ assert(ClonedDIE.first == nullptr ||
+ HasPlainChildrenToClone == ClonedDIE.first->hasChildren());
+
+ // Account for the end of children marker.
+ if (HasPlainChildrenToClone)
+ OutOffset += sizeof(int8_t);
+ }
+
+ // Update our size.
+ if (ClonedDIE.first != nullptr)
+ ClonedDIE.first->setSize(OutOffset - ClonedDIE.first->getOffset());
+
+ return ClonedDIE;
+}
+
+DIE *CompileUnit::createPlainDIEandCloneAttributes(
+ const DWARFDebugInfoEntry *InputDieEntry, DIEGenerator &PlainDIEGenerator,
+ uint64_t &OutOffset, std::optional<int64_t> &FuncAddressAdjustment,
+ std::optional<int64_t> &VarAddressAdjustment) {
+ uint32_t InputDieIdx = getDIEIndex(InputDieEntry);
+ CompileUnit::DIEInfo &Info = getDIEInfo(InputDieIdx);
+ DIE *ClonedDIE = nullptr;
+ bool HasLocationExpressionAddress = false;
+ if (InputDieEntry->getTag() == dwarf::DW_TAG_subprogram) {
+ // Get relocation adjustment value for the current function.
+ FuncAddressAdjustment =
+ getContaingFile().Addresses->getSubprogramRelocAdjustment(
+ getDIE(InputDieEntry));
+ } else if (InputDieEntry->getTag() == dwarf::DW_TAG_label) {
+ // Get relocation adjustment value for the current label.
+ std::optional<uint64_t> lowPC =
+ dwarf::toAddress(find(InputDieEntry, dwarf::DW_AT_low_pc));
+ if (lowPC) {
+ LabelMapTy::iterator It = Labels.find(*lowPC);
+ if (It != Labels.end())
+ FuncAddressAdjustment = It->second;
+ }
+ } else if (InputDieEntry->getTag() == dwarf::DW_TAG_variable) {
+ // Get relocation adjustment value for the current variable.
+ std::pair<bool, std::optional<int64_t>> LocExprAddrAndRelocAdjustment =
+ getContaingFile().Addresses->getVariableRelocAdjustment(
+ getDIE(InputDieEntry));
+
+ HasLocationExpressionAddress = LocExprAddrAndRelocAdjustment.first;
+ if (LocExprAddrAndRelocAdjustment.first &&
+ LocExprAddrAndRelocAdjustment.second)
+ VarAddressAdjustment = *LocExprAddrAndRelocAdjustment.second;
+ }
+
+ ClonedDIE = PlainDIEGenerator.createDIE(InputDieEntry->getTag(), OutOffset);
+
+ // Offset to the DIE would be used after output DIE tree is deleted.
+ // Thus we need to remember DIE offset separately.
+ rememberDieOutOffset(InputDieIdx, OutOffset);
+
+ // Clone Attributes.
+ DIEAttributeCloner AttributesCloner(ClonedDIE, *this, this, InputDieEntry,
+ PlainDIEGenerator, FuncAddressAdjustment,
+ VarAddressAdjustment,
+ HasLocationExpressionAddress);
+ AttributesCloner.clone();
+
+ // Remember accelerator info.
+ AcceleratorRecordsSaver AccelRecordsSaver(getGlobalData(), *this, this);
+ AccelRecordsSaver.save(InputDieEntry, ClonedDIE, AttributesCloner.AttrInfo,
+ nullptr);
+
+ OutOffset =
+ AttributesCloner.finalizeAbbreviations(Info.getKeepPlainChildren());
+
+ return ClonedDIE;
+}
+
+/// Allocates output DIE for the specified \p TypeDescriptor.
+DIE *CompileUnit::allocateTypeDie(TypeEntryBody *TypeDescriptor,
+ DIEGenerator &TypeDIEGenerator,
+ dwarf::Tag DieTag, bool IsDeclaration,
+ bool IsParentDeclaration) {
+ DIE *DefinitionDie = TypeDescriptor->Die;
+ // Do not allocate any new DIE if definition DIE is already met.
+ if (DefinitionDie)
+ return nullptr;
+
+ DIE *DeclarationDie = TypeDescriptor->DeclarationDie;
+ bool OldParentIsDeclaration = TypeDescriptor->ParentIsDeclaration;
+
+ if (IsDeclaration && !DeclarationDie) {
+ // Alocate declaration DIE.
+ DIE *NewDie = TypeDIEGenerator.createDIE(DieTag, 0);
+ if (TypeDescriptor->DeclarationDie.compare_exchange_weak(DeclarationDie,
+ NewDie))
+ return NewDie;
+ } else if (IsDeclaration && !IsParentDeclaration && OldParentIsDeclaration) {
+ // Overwrite existing declaration DIE if it's parent is also an declaration
+ // while parent of current declaration DIE is a definition.
+ if (TypeDescriptor->ParentIsDeclaration.compare_exchange_weak(
+ OldParentIsDeclaration, false)) {
+ DIE *NewDie = TypeDIEGenerator.createDIE(DieTag, 0);
+ TypeDescriptor->DeclarationDie = NewDie;
+ return NewDie;
+ }
+ } else if (!IsDeclaration && IsParentDeclaration && !DeclarationDie) {
+ // Alocate declaration DIE since parent of current DIE is marked as
+ // declaration.
+ DIE *NewDie = TypeDIEGenerator.createDIE(DieTag, 0);
+ if (TypeDescriptor->DeclarationDie.compare_exchange_weak(DeclarationDie,
+ NewDie))
+ return NewDie;
+ } else if (!IsDeclaration && !IsParentDeclaration) {
+ // Allocate definition DIE.
+ DIE *NewDie = TypeDIEGenerator.createDIE(DieTag, 0);
+ if (TypeDescriptor->Die.compare_exchange_weak(DefinitionDie, NewDie)) {
+ TypeDescriptor->ParentIsDeclaration = false;
+ return NewDie;
+ }
+ }
+
+ return nullptr;
+}
+
+TypeEntry *CompileUnit::createTypeDIEandCloneAttributes(
+ const DWARFDebugInfoEntry *InputDieEntry, DIEGenerator &TypeDIEGenerator,
+ TypeEntry *ClonedParentTypeDIE, TypeUnit *ArtificialTypeUnit) {
+ assert(ArtificialTypeUnit != nullptr);
+ uint32_t InputDieIdx = getDIEIndex(InputDieEntry);
+
+ TypeEntry *Entry = getDieTypeEntry(InputDieIdx);
+ assert(Entry != nullptr);
+ assert(ClonedParentTypeDIE != nullptr);
+ TypeEntryBody *EntryBody =
+ ArtificialTypeUnit->getTypePool().getOrCreateTypeEntryBody(
+ Entry, ClonedParentTypeDIE);
+ assert(EntryBody);
+
+ bool IsDeclaration =
+ dwarf::toUnsigned(find(InputDieEntry, dwarf::DW_AT_declaration), 0);
+
+ bool ParentIsDeclaration = false;
+ if (std::optional<uint32_t> ParentIdx = InputDieEntry->getParentIdx())
+ ParentIsDeclaration =
+ dwarf::toUnsigned(find(*ParentIdx, dwarf::DW_AT_declaration), 0);
+
+ DIE *OutDIE =
+ allocateTypeDie(EntryBody, TypeDIEGenerator, InputDieEntry->getTag(),
+ IsDeclaration, ParentIsDeclaration);
+
+ if (OutDIE != nullptr) {
+ assert(ArtificialTypeUnit != nullptr);
+ ArtificialTypeUnit->getSectionDescriptor(DebugSectionKind::DebugInfo);
+
+ DIEAttributeCloner AttributesCloner(OutDIE, *this, ArtificialTypeUnit,
+ InputDieEntry, TypeDIEGenerator,
+ std::nullopt, std::nullopt, false);
+ AttributesCloner.clone();
+
+ // Remember accelerator info.
+ AcceleratorRecordsSaver AccelRecordsSaver(getGlobalData(), *this,
+ ArtificialTypeUnit);
+ AccelRecordsSaver.save(InputDieEntry, OutDIE, AttributesCloner.AttrInfo,
+ Entry);
+
+ // if AttributesCloner.getOutOffset() == 0 then we need to add
+ // 1 to avoid assertion for zero size. We will subtract it back later.
+ OutDIE->setSize(AttributesCloner.getOutOffset() + 1);
+ }
+
+ return Entry;
+}
+
+Error CompileUnit::cloneAndEmitLineTable(Triple &TargetTriple) {
+ const DWARFDebugLine::LineTable *InputLineTable =
+ getContaingFile().Dwarf->getLineTableForUnit(&getOrigUnit());
+ if (InputLineTable == nullptr) {
+ if (getOrigUnit().getUnitDIE().find(dwarf::DW_AT_stmt_list))
+ warn("cann't load line table.");
+ return Error::success();
+ }
+
+ DWARFDebugLine::LineTable OutLineTable;
+
+ // Set Line Table header.
+ OutLineTable.Prologue = InputLineTable->Prologue;
+ OutLineTable.Prologue.FormParams.AddrSize = getFormParams().AddrSize;
+
+ // Set Line Table Rows.
+ if (getGlobalData().getOptions().UpdateIndexTablesOnly) {
+ OutLineTable.Rows = InputLineTable->Rows;
+ // If all the line table contains is a DW_LNE_end_sequence, clear the line
+ // table rows, it will be inserted again in the DWARFStreamer.
+ if (OutLineTable.Rows.size() == 1 && OutLineTable.Rows[0].EndSequence)
+ OutLineTable.Rows.clear();
+
+ OutLineTable.Sequences = InputLineTable->Sequences;
+ } else {
+ // This vector is the output line table.
+ std::vector<DWARFDebugLine::Row> NewRows;
+ NewRows.reserve(InputLineTable->Rows.size());
+
+ // Current sequence of rows being extracted, before being inserted
+ // in NewRows.
+ std::vector<DWARFDebugLine::Row> Seq;
+
+ const auto &FunctionRanges = getFunctionRanges();
+ std::optional<AddressRangeValuePair> CurrRange;
+
+ // FIXME: This logic is meant to generate exactly the same output as
+ // Darwin's classic dsymutil. There is a nicer way to implement this
+ // by simply putting all the relocated line info in NewRows and simply
+ // sorting NewRows before passing it to emitLineTableForUnit. This
+ // should be correct as sequences for a function should stay
+ // together in the sorted output. There are a few corner cases that
+ // look suspicious though, and that required to implement the logic
+ // this way. Revisit that once initial validation is finished.
+
+ // Iterate over the object file line info and extract the sequences
+ // that correspond to linked functions.
+ for (DWARFDebugLine::Row Row : InputLineTable->Rows) {
+ // Check whether we stepped out of the range. The range is
+ // half-open, but consider accept the end address of the range if
+ // it is marked as end_sequence in the input (because in that
+ // case, the relocation offset is accurate and that entry won't
+ // serve as the start of another function).
+ if (!CurrRange || !CurrRange->Range.contains(Row.Address.Address)) {
+ // We just stepped out of a known range. Insert a end_sequence
+ // corresponding to the end of the range.
+ uint64_t StopAddress =
+ CurrRange ? CurrRange->Range.end() + CurrRange->Value : -1ULL;
+ CurrRange = FunctionRanges.getRangeThatContains(Row.Address.Address);
+ if (StopAddress != -1ULL && !Seq.empty()) {
+ // Insert end sequence row with the computed end address, but
+ // the same line as the previous one.
+ auto NextLine = Seq.back();
+ NextLine.Address.Address = StopAddress;
+ NextLine.EndSequence = 1;
+ NextLine.PrologueEnd = 0;
+ NextLine.BasicBlock = 0;
+ NextLine.EpilogueBegin = 0;
+ Seq.push_back(NextLine);
+ insertLineSequence(Seq, NewRows);
+ }
+
+ if (!CurrRange)
+ continue;
+ }
+
+ // Ignore empty sequences.
+ if (Row.EndSequence && Seq.empty())
+ continue;
+
+ // Relocate row address and add it to the current sequence.
+ Row.Address.Address += CurrRange->Value;
+ Seq.emplace_back(Row);
+
+ if (Row.EndSequence)
+ insertLineSequence(Seq, NewRows);
+ }
+
+ OutLineTable.Rows = std::move(NewRows);
+ }
+
+ return emitDebugLine(TargetTriple, OutLineTable);
+}
+
+void CompileUnit::insertLineSequence(std::vector<DWARFDebugLine::Row> &Seq,
+ std::vector<DWARFDebugLine::Row> &Rows) {
+ if (Seq.empty())
+ return;
+
+ if (!Rows.empty() && Rows.back().Address < Seq.front().Address) {
+ llvm::append_range(Rows, Seq);
+ Seq.clear();
+ return;
+ }
+
+ object::SectionedAddress Front = Seq.front().Address;
+ auto InsertPoint = partition_point(
+ Rows, [=](const DWARFDebugLine::Row &O) { return O.Address < Front; });
+
+ // FIXME: this only removes the unneeded end_sequence if the
+ // sequences have been inserted in order. Using a global sort like
+ // described in cloneAndEmitLineTable() and delaying the end_sequene
+ // elimination to DebugLineEmitter::emit() we can get rid of all of them.
+ if (InsertPoint != Rows.end() && InsertPoint->Address == Front &&
+ InsertPoint->EndSequence) {
+ *InsertPoint = Seq.front();
+ Rows.insert(InsertPoint + 1, Seq.begin() + 1, Seq.end());
+ } else {
+ Rows.insert(InsertPoint, Seq.begin(), Seq.end());
+ }
+
+ Seq.clear();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void CompileUnit::DIEInfo::dump() {
+ llvm::errs() << "{";
+ llvm::errs() << " Placement: ";
+ switch (getPlacement()) {
+ case NotSet:
+ llvm::errs() << "NotSet";
+ break;
+ case TypeTable:
+ llvm::errs() << "TypeTable";
+ break;
+ case PlainDwarf:
+ llvm::errs() << "PlainDwarf";
+ break;
+ case Both:
+ llvm::errs() << "Both";
+ break;
+ }
+
+ llvm::errs() << " Keep: " << getKeep();
+ llvm::errs() << " KeepPlainChildren: " << getKeepPlainChildren();
+ llvm::errs() << " KeepTypeChildren: " << getKeepTypeChildren();
+ llvm::errs() << " IsInMouduleScope: " << getIsInMouduleScope();
+ llvm::errs() << " IsInFunctionScope: " << getIsInFunctionScope();
+ llvm::errs() << " IsInAnonNamespaceScope: " << getIsInAnonNamespaceScope();
+ llvm::errs() << " ODRAvailable: " << getODRAvailable();
+ llvm::errs() << " TrackLiveness: " << getTrackLiveness();
+ llvm::errs() << "}\n";
+}
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+std::optional<std::pair<StringRef, StringRef>>
+CompileUnit::getDirAndFilenameFromLineTable(
+ const DWARFFormValue &FileIdxValue) {
+ uint64_t FileIdx;
+ if (std::optional<uint64_t> Val = FileIdxValue.getAsUnsignedConstant())
+ FileIdx = *Val;
+ else if (std::optional<int64_t> Val = FileIdxValue.getAsSignedConstant())
+ FileIdx = *Val;
+ else if (std::optional<uint64_t> Val = FileIdxValue.getAsSectionOffset())
+ FileIdx = *Val;
+ else
+ return std::nullopt;
+
+ return getDirAndFilenameFromLineTable(FileIdx);
+}
+
+static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) {
+ // Debug info can contain paths from any OS, not necessarily
+ // an OS we're currently running on. Moreover different compilation units can
+ // be compiled on different operating systems and linked together later.
+ return sys::path::is_absolute(Path, sys::path::Style::posix) ||
+ sys::path::is_absolute(Path, sys::path::Style::windows);
+}
+
+std::optional<std::pair<StringRef, StringRef>>
+CompileUnit::getDirAndFilenameFromLineTable(uint64_t FileIdx) {
+ FileNamesCache::iterator FileData = FileNames.find(FileIdx);
+ if (FileData != FileNames.end())
+ return std::make_pair(StringRef(FileData->second.first),
+ StringRef(FileData->second.second));
+
+ if (const DWARFDebugLine::LineTable *LineTable =
+ getOrigUnit().getContext().getLineTableForUnit(&getOrigUnit())) {
+ if (LineTable->hasFileAtIndex(FileIdx)) {
+
+ const llvm::DWARFDebugLine::FileNameEntry &Entry =
+ LineTable->Prologue.getFileNameEntry(FileIdx);
+
+ Expected<const char *> Name = Entry.Name.getAsCString();
+ if (!Name) {
+ warn(Name.takeError());
+ return std::nullopt;
+ }
+
+ std::string FileName = *Name;
+ if (isPathAbsoluteOnWindowsOrPosix(FileName)) {
+ FileNamesCache::iterator FileData =
+ FileNames
+ .insert(std::make_pair(
+ FileIdx,
+ std::make_pair(std::string(""), std::move(FileName))))
+ .first;
+ return std::make_pair(StringRef(FileData->second.first),
+ StringRef(FileData->second.second));
+ }
+
+ SmallString<256> FilePath;
+ StringRef IncludeDir;
+ // Be defensive about the contents of Entry.
+ if (getVersion() >= 5) {
+ // DirIdx 0 is the compilation directory, so don't include it for
+ // relative names.
+ if ((Entry.DirIdx != 0) &&
+ Entry.DirIdx < LineTable->Prologue.IncludeDirectories.size()) {
+ Expected<const char *> DirName =
+ LineTable->Prologue.IncludeDirectories[Entry.DirIdx]
+ .getAsCString();
+ if (DirName)
+ IncludeDir = *DirName;
+ else {
+ warn(DirName.takeError());
+ return std::nullopt;
+ }
+ }
+ } else {
+ if (0 < Entry.DirIdx &&
+ Entry.DirIdx <= LineTable->Prologue.IncludeDirectories.size()) {
+ Expected<const char *> DirName =
+ LineTable->Prologue.IncludeDirectories[Entry.DirIdx - 1]
+ .getAsCString();
+ if (DirName)
+ IncludeDir = *DirName;
+ else {
+ warn(DirName.takeError());
+ return std::nullopt;
+ }
+ }
+ }
+
+ StringRef CompDir = getOrigUnit().getCompilationDir();
+
+ if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) {
+ sys::path::append(FilePath, sys::path::Style::native, CompDir);
+ }
+
+ sys::path::append(FilePath, sys::path::Style::native, IncludeDir);
+
+ FileNamesCache::iterator FileData =
+ FileNames
+ .insert(
+ std::make_pair(FileIdx, std::make_pair(std::string(FilePath),
+ std::move(FileName))))
+ .first;
+ return std::make_pair(StringRef(FileData->second.first),
+ StringRef(FileData->second.second));
+ }
+ }
+
+ return std::nullopt;
+}
+
+#define MAX_REFERENCIES_DEPTH 1000
+UnitEntryPairTy UnitEntryPairTy::getNamespaceOrigin() {
+ UnitEntryPairTy CUDiePair(*this);
+ std::optional<UnitEntryPairTy> RefDiePair;
+ int refDepth = 0;
+ do {
+ RefDiePair = CUDiePair.CU->resolveDIEReference(
+ CUDiePair.DieEntry, dwarf::DW_AT_extension,
+ ResolveInterCUReferencesMode::Resolve);
+ if (!RefDiePair || !RefDiePair->DieEntry)
+ return CUDiePair;
+
+ CUDiePair = *RefDiePair;
+ } while (refDepth++ < MAX_REFERENCIES_DEPTH);
+
+ return CUDiePair;
+}
+
+std::optional<UnitEntryPairTy> UnitEntryPairTy::getParent() {
+ if (std::optional<uint32_t> ParentIdx = DieEntry->getParentIdx())
+ return UnitEntryPairTy{CU, CU->getDebugInfoEntry(*ParentIdx)};
+
+ return std::nullopt;
+}
+
+CompileUnit::OutputUnitVariantPtr::OutputUnitVariantPtr(CompileUnit *U)
+ : Ptr(U) {
+ assert(U != nullptr);
+}
+
+CompileUnit::OutputUnitVariantPtr::OutputUnitVariantPtr(TypeUnit *U) : Ptr(U) {
+ assert(U != nullptr);
+}
+
+DwarfUnit *CompileUnit::OutputUnitVariantPtr::operator->() {
+ if (isCompileUnit())
+ return getAsCompileUnit();
+ else
+ return getAsTypeUnit();
+}
+
+bool CompileUnit::OutputUnitVariantPtr::isCompileUnit() {
+ return Ptr.is<CompileUnit *>();
+}
+
+bool CompileUnit::OutputUnitVariantPtr::isTypeUnit() {
+ return Ptr.is<TypeUnit *>();
+}
+
+CompileUnit *CompileUnit::OutputUnitVariantPtr::getAsCompileUnit() {
+ return Ptr.get<CompileUnit *>();
+}
+
+TypeUnit *CompileUnit::OutputUnitVariantPtr::getAsTypeUnit() {
+ return Ptr.get<TypeUnit *>();
+}
+
+bool CompileUnit::resolveDependenciesAndMarkLiveness(
+ bool InterCUProcessingStarted, std::atomic<bool> &HasNewInterconnectedCUs) {
+ if (!Dependencies.get())
+ Dependencies.reset(new DependencyTracker(*this));
+
+ return Dependencies->resolveDependenciesAndMarkLiveness(
+ InterCUProcessingStarted, HasNewInterconnectedCUs);
+}
+
+bool CompileUnit::updateDependenciesCompleteness() {
+ assert(Dependencies.get());
+
+ return Dependencies.get()->updateDependenciesCompleteness();
+}
+
+void CompileUnit::verifyDependencies() {
+ assert(Dependencies.get());
+
+ Dependencies.get()->verifyKeepChain();
+}
+
+ArrayRef<dwarf::Attribute> llvm::dwarflinker_parallel::getODRAttributes() {
+ static dwarf::Attribute ODRAttributes[] = {
+ dwarf::DW_AT_type, dwarf::DW_AT_specification,
+ dwarf::DW_AT_abstract_origin, dwarf::DW_AT_import};
+
+ return ODRAttributes;
+}
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h
index 1617a848512d..28fcc34d867d 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h
@@ -11,53 +11,438 @@
#include "DWARFLinkerUnit.h"
#include "llvm/DWARFLinkerParallel/DWARFFile.h"
-#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
#include <optional>
namespace llvm {
namespace dwarflinker_parallel {
-struct LinkContext;
-class DWARFFile;
+using OffsetToUnitTy = function_ref<CompileUnit *(uint64_t Offset)>;
+
+struct AttributesInfo;
+class SyntheticTypeNameBuilder;
+class DIEGenerator;
+class TypeUnit;
+class DependencyTracker;
+
+class CompileUnit;
+
+/// This is a helper structure which keeps a debug info entry
+/// with it's containing compilation unit.
+struct UnitEntryPairTy {
+ UnitEntryPairTy() = default;
+ UnitEntryPairTy(CompileUnit *CU, const DWARFDebugInfoEntry *DieEntry)
+ : CU(CU), DieEntry(DieEntry) {}
+
+ CompileUnit *CU = nullptr;
+ const DWARFDebugInfoEntry *DieEntry = nullptr;
+
+ UnitEntryPairTy getNamespaceOrigin();
+ std::optional<UnitEntryPairTy> getParent();
+};
+
+enum ResolveInterCUReferencesMode : bool {
+ Resolve = true,
+ AvoidResolving = false,
+};
/// Stores all information related to a compile unit, be it in its original
/// instance of the object file or its brand new cloned and generated DIE tree.
-class CompileUnit : public DwarfUnit {
+/// NOTE: we need alignment of at least 8 bytes as we use
+/// PointerIntPair<CompileUnit *, 3> in the DependencyTracker.h
+class alignas(8) CompileUnit : public DwarfUnit {
public:
- CompileUnit(LinkContext &, unsigned ID, StringRef ClangModuleName,
- DWARFFile &File,
- DWARFLinker::SwiftInterfacesMapTy *,
- UnitMessageHandlerTy WarningHandler)
- : DwarfUnit(ID, ClangModuleName, WarningHandler), ContaingFile(File) {
- FormParams.Version = 4;
- FormParams.Format = dwarf::DWARF32;
- FormParams.AddrSize = 4;
- UnitName = ContaingFile.FileName;
- }
-
- CompileUnit(LinkContext &, DWARFUnit &OrigUnit, unsigned ID,
+ /// The stages of new compile unit processing.
+ enum class Stage : uint8_t {
+ /// Created, linked with input DWARF file.
+ CreatedNotLoaded = 0,
+
+ /// Input DWARF is loaded.
+ Loaded,
+
+ /// Input DWARF is analysed(DIEs pointing to the real code section are
+ /// discovered, type names are assigned if ODR is requested).
+ LivenessAnalysisDone,
+
+ /// Check if dependencies have incompatible placement.
+ /// If that is the case modify placement to be compatible.
+ UpdateDependenciesCompleteness,
+
+ /// Type names assigned to DIEs.
+ TypeNamesAssigned,
+
+ /// Output DWARF is generated.
+ Cloned,
+
+ /// Offsets inside patch records are updated.
+ PatchesUpdated,
+
+ /// Resources(Input DWARF, Output DWARF tree) are released.
+ Cleaned,
+
+ /// Compile Unit should be skipped
+ Skipped
+ };
+
+ CompileUnit(LinkingGlobalData &GlobalData, unsigned ID,
StringRef ClangModuleName, DWARFFile &File,
- UnitMessageHandlerTy WarningHandler)
- : DwarfUnit(ID, ClangModuleName, WarningHandler),
- ContaingFile(File), OrigUnit(&OrigUnit) {
- DWARFDie CUDie = OrigUnit.getUnitDIE();
- if (!CUDie)
- return;
+ OffsetToUnitTy UnitFromOffset, dwarf::FormParams Format,
+ llvm::endianness Endianess);
+
+ CompileUnit(LinkingGlobalData &GlobalData, DWARFUnit &OrigUnit, unsigned ID,
+ StringRef ClangModuleName, DWARFFile &File,
+ OffsetToUnitTy UnitFromOffset, dwarf::FormParams Format,
+ llvm::endianness Endianess);
+
+ /// Returns stage of overall processing.
+ Stage getStage() const { return Stage; }
+
+ /// Set stage of overall processing.
+ void setStage(Stage Stage) { this->Stage = Stage; }
+
+ /// Loads unit line table.
+ void loadLineTable();
- if (File.Dwarf)
- Endianess = File.Dwarf->isLittleEndian() ? support::endianness::little
- : support::endianness::big;
+ /// Returns name of the file for the \p FileIdx
+ /// from the unit`s line table.
+ StringEntry *getFileName(unsigned FileIdx, StringPool &GlobalStrings);
- FormParams.Version = OrigUnit.getVersion();
- FormParams.Format = dwarf::DWARF32;
- FormParams.AddrSize = OrigUnit.getAddressByteSize();
+ /// Returns DWARFFile containing this compile unit.
+ const DWARFFile &getContaingFile() const { return File; }
- Language = dwarf::toUnsigned(CUDie.find(dwarf::DW_AT_language), 0);
+ /// Load DIEs of input compilation unit. \returns true if input DIEs
+ /// successfully loaded.
+ bool loadInputDIEs();
- UnitName = ContaingFile.FileName;
- SysRoot = dwarf::toStringRef(CUDie.find(dwarf::DW_AT_LLVM_sysroot)).str();
+ /// Reset compile units data(results of liveness analysis, clonning)
+ /// if current stage greater than Stage::Loaded. We need to reset data
+ /// as we are going to repeat stages.
+ void maybeResetToLoadedStage();
+
+ /// Collect references to parseable Swift interfaces in imported
+ /// DW_TAG_module blocks.
+ void analyzeImportedModule(const DWARFDebugInfoEntry *DieEntry);
+
+ /// Navigate DWARF tree and set die properties.
+ void analyzeDWARFStructure() {
+ analyzeDWARFStructureRec(getUnitDIE().getDebugInfoEntry(), false);
}
+ /// Cleanup unneeded resources after compile unit is cloned.
+ void cleanupDataAfterClonning();
+
+ /// After cloning stage the output DIEs offsets are deallocated.
+ /// This method copies output offsets for referenced DIEs into DIEs patches.
+ void updateDieRefPatchesWithClonedOffsets();
+
+ /// Search for subprograms and variables referencing live code and discover
+ /// dependend DIEs. Mark live DIEs, set placement for DIEs.
+ bool resolveDependenciesAndMarkLiveness(
+ bool InterCUProcessingStarted,
+ std::atomic<bool> &HasNewInterconnectedCUs);
+
+ /// Check dependend DIEs for incompatible placement.
+ /// Make placement to be consistent.
+ bool updateDependenciesCompleteness();
+
+ /// Check DIEs to have a consistent marking(keep marking, placement marking).
+ void verifyDependencies();
+
+ /// Search for type entries and assign names.
+ Error assignTypeNames(TypePool &TypePoolRef);
+
+ /// Kinds of placement for the output die.
+ enum DieOutputPlacement : uint8_t {
+ NotSet = 0,
+
+ /// Corresponding DIE goes to the type table only.
+ TypeTable = 1,
+
+ /// Corresponding DIE goes to the plain dwarf only.
+ PlainDwarf = 2,
+
+ /// Corresponding DIE goes to type table and to plain dwarf.
+ Both = 3,
+ };
+
+ /// Information gathered about source DIEs.
+ struct DIEInfo {
+ DIEInfo() = default;
+ DIEInfo(const DIEInfo &Other) { Flags = Other.Flags.load(); }
+ DIEInfo &operator=(const DIEInfo &Other) {
+ Flags = Other.Flags.load();
+ return *this;
+ }
+
+ /// Data member keeping various flags.
+ std::atomic<uint16_t> Flags = {0};
+
+ /// \returns Placement kind for the corresponding die.
+ DieOutputPlacement getPlacement() const {
+ return DieOutputPlacement(Flags & 0x7);
+ }
+
+ /// Sets Placement kind for the corresponding die.
+ void setPlacement(DieOutputPlacement Placement) {
+ auto InputData = Flags.load();
+ while (!Flags.compare_exchange_weak(InputData,
+ ((InputData & ~0x7) | Placement))) {
+ }
+ }
+
+ /// Unsets Placement kind for the corresponding die.
+ void unsetPlacement() {
+ auto InputData = Flags.load();
+ while (!Flags.compare_exchange_weak(InputData, (InputData & ~0x7))) {
+ }
+ }
+
+ /// Sets Placement kind for the corresponding die.
+ bool setPlacementIfUnset(DieOutputPlacement Placement) {
+ auto InputData = Flags.load();
+ if ((InputData & 0x7) == NotSet)
+ if (Flags.compare_exchange_weak(InputData, (InputData | Placement)))
+ return true;
+
+ return false;
+ }
+
+#define SINGLE_FLAG_METHODS_SET(Name, Value) \
+ bool get##Name() const { return Flags & Value; } \
+ void set##Name() { \
+ auto InputData = Flags.load(); \
+ while (!Flags.compare_exchange_weak(InputData, InputData | Value)) { \
+ } \
+ } \
+ void unset##Name() { \
+ auto InputData = Flags.load(); \
+ while (!Flags.compare_exchange_weak(InputData, InputData & ~Value)) { \
+ } \
+ }
+
+ /// DIE is a part of the linked output.
+ SINGLE_FLAG_METHODS_SET(Keep, 0x08)
+
+ /// DIE has children which are part of the linked output.
+ SINGLE_FLAG_METHODS_SET(KeepPlainChildren, 0x10)
+
+ /// DIE has children which are part of the type table.
+ SINGLE_FLAG_METHODS_SET(KeepTypeChildren, 0x20)
+
+ /// DIE is in module scope.
+ SINGLE_FLAG_METHODS_SET(IsInMouduleScope, 0x40)
+
+ /// DIE is in function scope.
+ SINGLE_FLAG_METHODS_SET(IsInFunctionScope, 0x80)
+
+ /// DIE is in anonymous namespace scope.
+ SINGLE_FLAG_METHODS_SET(IsInAnonNamespaceScope, 0x100)
+
+ /// DIE is available for ODR type deduplication.
+ SINGLE_FLAG_METHODS_SET(ODRAvailable, 0x200)
+
+ /// Track liveness for the DIE.
+ SINGLE_FLAG_METHODS_SET(TrackLiveness, 0x400)
+
+ /// Track liveness for the DIE.
+ SINGLE_FLAG_METHODS_SET(HasAnAddress, 0x800)
+
+ void unsetFlagsWhichSetDuringLiveAnalysis() {
+ auto InputData = Flags.load();
+ while (!Flags.compare_exchange_weak(
+ InputData, InputData & ~(0x7 | 0x8 | 0x10 | 0x20))) {
+ }
+ }
+
+ /// Erase all flags.
+ void eraseData() { Flags = 0; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump();
+#endif
+
+ bool needToPlaceInTypeTable() const {
+ return (getKeep() && (getPlacement() == CompileUnit::TypeTable ||
+ getPlacement() == CompileUnit::Both)) ||
+ getKeepTypeChildren();
+ }
+
+ bool needToKeepInPlainDwarf() const {
+ return (getKeep() && (getPlacement() == CompileUnit::PlainDwarf ||
+ getPlacement() == CompileUnit::Both)) ||
+ getKeepPlainChildren();
+ }
+ };
+
+ /// \defgroup Group of functions returning DIE info.
+ ///
+ /// @{
+
+ /// \p Idx index of the DIE.
+ /// \returns DieInfo descriptor.
+ DIEInfo &getDIEInfo(unsigned Idx) { return DieInfoArray[Idx]; }
+
+ /// \p Idx index of the DIE.
+ /// \returns DieInfo descriptor.
+ const DIEInfo &getDIEInfo(unsigned Idx) const { return DieInfoArray[Idx]; }
+
+ /// \p Idx index of the DIE.
+ /// \returns DieInfo descriptor.
+ DIEInfo &getDIEInfo(const DWARFDebugInfoEntry *Entry) {
+ return DieInfoArray[getOrigUnit().getDIEIndex(Entry)];
+ }
+
+ /// \p Idx index of the DIE.
+ /// \returns DieInfo descriptor.
+ const DIEInfo &getDIEInfo(const DWARFDebugInfoEntry *Entry) const {
+ return DieInfoArray[getOrigUnit().getDIEIndex(Entry)];
+ }
+
+ /// \p Die
+ /// \returns PlainDieInfo descriptor.
+ DIEInfo &getDIEInfo(const DWARFDie &Die) {
+ return DieInfoArray[getOrigUnit().getDIEIndex(Die)];
+ }
+
+ /// \p Die
+ /// \returns PlainDieInfo descriptor.
+ const DIEInfo &getDIEInfo(const DWARFDie &Die) const {
+ return DieInfoArray[getOrigUnit().getDIEIndex(Die)];
+ }
+
+ /// \p Idx index of the DIE.
+ /// \returns DieInfo descriptor.
+ uint64_t getDieOutOffset(uint32_t Idx) {
+ return reinterpret_cast<std::atomic<uint64_t> *>(&OutDieOffsetArray[Idx])
+ ->load();
+ }
+
+ /// \p Idx index of the DIE.
+ /// \returns type entry.
+ TypeEntry *getDieTypeEntry(uint32_t Idx) {
+ return reinterpret_cast<std::atomic<TypeEntry *> *>(&TypeEntries[Idx])
+ ->load();
+ }
+
+ /// \p InputDieEntry debug info entry.
+ /// \returns DieInfo descriptor.
+ uint64_t getDieOutOffset(const DWARFDebugInfoEntry *InputDieEntry) {
+ return reinterpret_cast<std::atomic<uint64_t> *>(
+ &OutDieOffsetArray[getOrigUnit().getDIEIndex(InputDieEntry)])
+ ->load();
+ }
+
+ /// \p InputDieEntry debug info entry.
+ /// \returns type entry.
+ TypeEntry *getDieTypeEntry(const DWARFDebugInfoEntry *InputDieEntry) {
+ return reinterpret_cast<std::atomic<TypeEntry *> *>(
+ &TypeEntries[getOrigUnit().getDIEIndex(InputDieEntry)])
+ ->load();
+ }
+
+ /// \p Idx index of the DIE.
+ /// \returns DieInfo descriptor.
+ void rememberDieOutOffset(uint32_t Idx, uint64_t Offset) {
+ reinterpret_cast<std::atomic<uint64_t> *>(&OutDieOffsetArray[Idx])
+ ->store(Offset);
+ }
+
+ /// \p Idx index of the DIE.
+ /// \p Type entry.
+ void setDieTypeEntry(uint32_t Idx, TypeEntry *Entry) {
+ reinterpret_cast<std::atomic<TypeEntry *> *>(&TypeEntries[Idx])
+ ->store(Entry);
+ }
+
+ /// \p InputDieEntry debug info entry.
+ /// \p Type entry.
+ void setDieTypeEntry(const DWARFDebugInfoEntry *InputDieEntry,
+ TypeEntry *Entry) {
+ reinterpret_cast<std::atomic<TypeEntry *> *>(
+ &TypeEntries[getOrigUnit().getDIEIndex(InputDieEntry)])
+ ->store(Entry);
+ }
+
+ /// @}
+
+ /// Returns value of DW_AT_low_pc attribute.
+ std::optional<uint64_t> getLowPc() const { return LowPc; }
+
+ /// Returns value of DW_AT_high_pc attribute.
+ uint64_t getHighPc() const { return HighPc; }
+
+ /// Returns true if there is a label corresponding to the specified \p Addr.
+ bool hasLabelAt(uint64_t Addr) const { return Labels.count(Addr); }
+
+ /// Add the low_pc of a label that is relocated by applying
+ /// offset \p PCOffset.
+ void addLabelLowPc(uint64_t LabelLowPc, int64_t PcOffset);
+
+ /// Resolve the DIE attribute reference that has been extracted in \p
+ /// RefValue. The resulting DIE might be in another CompileUnit.
+ /// \returns referenced die and corresponding compilation unit.
+ /// compilation unit is null if reference could not be resolved.
+ std::optional<UnitEntryPairTy>
+ resolveDIEReference(const DWARFFormValue &RefValue,
+ ResolveInterCUReferencesMode CanResolveInterCUReferences);
+
+ std::optional<UnitEntryPairTy>
+ resolveDIEReference(const DWARFDebugInfoEntry *DieEntry,
+ dwarf::Attribute Attr,
+ ResolveInterCUReferencesMode CanResolveInterCUReferences);
+
+ /// @}
+
+ /// Add a function range [\p LowPC, \p HighPC) that is relocated by applying
+ /// offset \p PCOffset.
+ void addFunctionRange(uint64_t LowPC, uint64_t HighPC, int64_t PCOffset);
+
+ /// Returns function ranges of this unit.
+ const RangesTy &getFunctionRanges() const { return Ranges; }
+
+ /// Clone and emit this compilation unit.
+ Error cloneAndEmit(std::optional<Triple> TargetTriple,
+ TypeUnit *ArtificialTypeUnit);
+
+ /// Clone and emit debug locations(.debug_loc/.debug_loclists).
+ Error cloneAndEmitDebugLocations();
+
+ /// Clone and emit ranges.
+ Error cloneAndEmitRanges();
+
+ /// Clone and emit debug macros(.debug_macinfo/.debug_macro).
+ Error cloneAndEmitDebugMacro();
+
+ // Clone input DIE entry.
+ std::pair<DIE *, TypeEntry *>
+ cloneDIE(const DWARFDebugInfoEntry *InputDieEntry,
+ TypeEntry *ClonedParentTypeDIE, uint64_t OutOffset,
+ std::optional<int64_t> FuncAddressAdjustment,
+ std::optional<int64_t> VarAddressAdjustment,
+ BumpPtrAllocator &Allocator, TypeUnit *ArtificialTypeUnit);
+
+ // Clone and emit line table.
+ Error cloneAndEmitLineTable(Triple &TargetTriple);
+
+ /// Clone attribute location axpression.
+ void cloneDieAttrExpression(const DWARFExpression &InputExpression,
+ SmallVectorImpl<uint8_t> &OutputExpression,
+ SectionDescriptor &Section,
+ std::optional<int64_t> VarAddressAdjustment,
+ OffsetsPtrVector &PatchesOffsets);
+
+ /// Returns index(inside .debug_addr) of an address.
+ uint64_t getDebugAddrIndex(uint64_t Addr) {
+ return DebugAddrIndexMap.getValueIndex(Addr);
+ }
+
+ /// Returns directory and file from the line table by index.
+ std::optional<std::pair<StringRef, StringRef>>
+ getDirAndFilenameFromLineTable(const DWARFFormValue &FileIdxValue);
+
+ /// Returns directory and file from the line table by index.
+ std::optional<std::pair<StringRef, StringRef>>
+ getDirAndFilenameFromLineTable(uint64_t FileIdx);
+
/// \defgroup Helper methods to access OrigUnit.
///
/// @{
@@ -142,14 +527,209 @@ public:
/// @}
+ /// \defgroup Methods used for reporting warnings and errors:
+ ///
+ /// @{
+
+ void warn(const Twine &Warning, const DWARFDie *DIE = nullptr) {
+ GlobalData.warn(Warning, getUnitName(), DIE);
+ }
+
+ void warn(Error Warning, const DWARFDie *DIE = nullptr) {
+ handleAllErrors(std::move(Warning), [&](ErrorInfoBase &Info) {
+ GlobalData.warn(Info.message(), getUnitName(), DIE);
+ });
+ }
+
+ void warn(const Twine &Warning, const DWARFDebugInfoEntry *DieEntry) {
+ if (DieEntry != nullptr) {
+ DWARFDie DIE(&getOrigUnit(), DieEntry);
+ GlobalData.warn(Warning, getUnitName(), &DIE);
+ return;
+ }
+
+ GlobalData.warn(Warning, getUnitName());
+ }
+
+ void error(const Twine &Err, const DWARFDie *DIE = nullptr) {
+ GlobalData.warn(Err, getUnitName(), DIE);
+ }
+
+ void error(Error Err, const DWARFDie *DIE = nullptr) {
+ handleAllErrors(std::move(Err), [&](ErrorInfoBase &Info) {
+ GlobalData.error(Info.message(), getUnitName(), DIE);
+ });
+ }
+
+ /// @}
+
+ /// Save specified accelerator info \p Info.
+ void saveAcceleratorInfo(const DwarfUnit::AccelInfo &Info) {
+ AcceleratorRecords.add(Info);
+ }
+
+ /// Enumerates all units accelerator records.
+ void
+ forEachAcceleratorRecord(function_ref<void(AccelInfo &)> Handler) override {
+ AcceleratorRecords.forEach(Handler);
+ }
+
+ /// Output unit selector.
+ class OutputUnitVariantPtr {
+ public:
+ OutputUnitVariantPtr(CompileUnit *U);
+ OutputUnitVariantPtr(TypeUnit *U);
+
+ /// Accessor for common functionality.
+ DwarfUnit *operator->();
+
+ bool isCompileUnit();
+
+ bool isTypeUnit();
+
+ /// Returns CompileUnit if applicable.
+ CompileUnit *getAsCompileUnit();
+
+ /// Returns TypeUnit if applicable.
+ TypeUnit *getAsTypeUnit();
+
+ protected:
+ PointerUnion<CompileUnit *, TypeUnit *> Ptr;
+ };
+
private:
+ /// Navigate DWARF tree recursively and set die properties.
+ void analyzeDWARFStructureRec(const DWARFDebugInfoEntry *DieEntry,
+ bool IsODRUnavailableFunctionScope);
+
+ struct LinkedLocationExpressionsWithOffsetPatches {
+ DWARFLocationExpression Expression;
+ OffsetsPtrVector Patches;
+ };
+ using LinkedLocationExpressionsVector =
+ SmallVector<LinkedLocationExpressionsWithOffsetPatches>;
+
+ /// Emit debug locations.
+ void emitLocations(DebugSectionKind LocationSectionKind);
+
+ /// Emit location list header.
+ uint64_t emitLocListHeader(SectionDescriptor &OutLocationSection);
+
+ /// Emit location list fragment.
+ uint64_t emitLocListFragment(
+ const LinkedLocationExpressionsVector &LinkedLocationExpression,
+ SectionDescriptor &OutLocationSection);
+
+ /// Emit the .debug_addr section fragment for current unit.
+ Error emitDebugAddrSection();
+
+ /// Emit .debug_aranges.
+ void emitAranges(AddressRanges &LinkedFunctionRanges);
+
+ /// Clone and emit .debug_ranges/.debug_rnglists.
+ void cloneAndEmitRangeList(DebugSectionKind RngSectionKind,
+ AddressRanges &LinkedFunctionRanges);
+
+ /// Emit range list header.
+ uint64_t emitRangeListHeader(SectionDescriptor &OutRangeSection);
+
+ /// Emit range list fragment.
+ void emitRangeListFragment(const AddressRanges &LinkedRanges,
+ SectionDescriptor &OutRangeSection);
+
+ /// Insert the new line info sequence \p Seq into the current
+ /// set of already linked line info \p Rows.
+ void insertLineSequence(std::vector<DWARFDebugLine::Row> &Seq,
+ std::vector<DWARFDebugLine::Row> &Rows);
+
+ /// Emits body for both macro sections.
+ void emitMacroTableImpl(const DWARFDebugMacro *MacroTable,
+ uint64_t OffsetToMacroTable, bool hasDWARFv5Header);
+
+ /// Creates DIE which would be placed into the "Plain" compile unit.
+ DIE *createPlainDIEandCloneAttributes(
+ const DWARFDebugInfoEntry *InputDieEntry, DIEGenerator &PlainDIEGenerator,
+ uint64_t &OutOffset, std::optional<int64_t> &FuncAddressAdjustment,
+ std::optional<int64_t> &VarAddressAdjustment);
+
+ /// Creates DIE which would be placed into the "Type" compile unit.
+ TypeEntry *createTypeDIEandCloneAttributes(
+ const DWARFDebugInfoEntry *InputDieEntry, DIEGenerator &TypeDIEGenerator,
+ TypeEntry *ClonedParentTypeDIE, TypeUnit *ArtificialTypeUnit);
+
+ /// Create output DIE inside specified \p TypeDescriptor.
+ DIE *allocateTypeDie(TypeEntryBody *TypeDescriptor,
+ DIEGenerator &TypeDIEGenerator, dwarf::Tag DieTag,
+ bool IsDeclaration, bool IsParentDeclaration);
+
+ /// Enumerate \p DieEntry children and assign names for them.
+ Error assignTypeNamesRec(const DWARFDebugInfoEntry *DieEntry,
+ SyntheticTypeNameBuilder &NameBuilder);
+
/// DWARFFile containing this compile unit.
- DWARFFile &ContaingFile;
+ DWARFFile &File;
/// Pointer to the paired compile unit from the input DWARF.
DWARFUnit *OrigUnit = nullptr;
+
+ /// The DW_AT_language of this unit.
+ std::optional<uint16_t> Language;
+
+ /// Line table for this unit.
+ const DWARFDebugLine::LineTable *LineTablePtr = nullptr;
+
+ /// Cached resolved paths from the line table.
+ /// The key is <UniqueUnitID, FileIdx>.
+ using ResolvedPathsMap = DenseMap<unsigned, StringEntry *>;
+ ResolvedPathsMap ResolvedFullPaths;
+ StringMap<StringEntry *> ResolvedParentPaths;
+
+ /// Maps an address into the index inside .debug_addr section.
+ IndexedValuesMap<uint64_t> DebugAddrIndexMap;
+
+ std::unique_ptr<DependencyTracker> Dependencies;
+
+ /// \defgroup Data Members accessed asinchronously.
+ ///
+ /// @{
+ OffsetToUnitTy getUnitFromOffset;
+
+ std::optional<uint64_t> LowPc;
+ uint64_t HighPc = 0;
+
+ /// Flag indicating whether type de-duplication is forbidden.
+ bool NoODR = true;
+
+ /// The ranges in that map are the PC ranges for functions in this unit,
+ /// associated with the PC offset to apply to the addresses to get
+ /// the linked address.
+ RangesTy Ranges;
+ std::mutex RangesMutex;
+
+ /// The DW_AT_low_pc of each DW_TAG_label.
+ using LabelMapTy = SmallDenseMap<uint64_t, uint64_t, 1>;
+ LabelMapTy Labels;
+ std::mutex LabelsMutex;
+
+ /// This field keeps current stage of overall compile unit processing.
+ std::atomic<Stage> Stage;
+
+ /// DIE info indexed by DIE index.
+ SmallVector<DIEInfo> DieInfoArray;
+ SmallVector<uint64_t> OutDieOffsetArray;
+ SmallVector<TypeEntry *> TypeEntries;
+
+ /// The list of accelerator records for this unit.
+ ArrayList<AccelInfo> AcceleratorRecords;
+ /// @}
};
+/// \returns list of attributes referencing type DIEs which might be
+/// deduplicated.
+/// Note: it does not include DW_AT_containing_type attribute to avoid
+/// infinite recursion.
+ArrayRef<dwarf::Attribute> getODRAttributes();
+
} // end of namespace dwarflinker_parallel
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerGlobalData.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerGlobalData.h
new file mode 100644
index 000000000000..31724770093d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerGlobalData.h
@@ -0,0 +1,159 @@
+//===- DWARFLinkerGlobalData.h ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERGLOBALDATA_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERGLOBALDATA_H
+
+#include "TypePool.h"
+#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
+#include "llvm/DWARFLinkerParallel/StringPool.h"
+#include "llvm/Support/PerThreadBumpPtrAllocator.h"
+
+namespace llvm {
+
+class DWARFDie;
+
+namespace dwarflinker_parallel {
+
+using TranslatorFuncTy = std::function<StringRef(StringRef)>;
+using MessageHandlerTy = std::function<void(
+ const Twine &Warning, StringRef Context, const DWARFDie *DIE)>;
+
+/// linking options
+struct DWARFLinkerOptions {
+ /// DWARF version for the output.
+ uint16_t TargetDWARFVersion = 0;
+
+ /// Generate processing log to the standard output.
+ bool Verbose = false;
+
+ /// Print statistics.
+ bool Statistics = false;
+
+ /// Verify the input DWARF.
+ bool VerifyInputDWARF = false;
+
+ /// Do not emit output.
+ bool NoOutput = false;
+
+ /// Do not unique types according to ODR
+ bool NoODR = false;
+
+ /// Update index tables.
+ bool UpdateIndexTablesOnly = false;
+
+ /// Whether we want a static variable to force us to keep its enclosing
+ /// function.
+ bool KeepFunctionForStatic = false;
+
+ /// Allow to generate valid, but non deterministic output.
+ bool AllowNonDeterministicOutput = false;
+
+ /// Number of threads.
+ unsigned Threads = 1;
+
+ /// The accelerator table kinds
+ SmallVector<DWARFLinker::AccelTableKind, 1> AccelTables;
+
+ /// Prepend path for the clang modules.
+ std::string PrependPath;
+
+ /// input verification handler(it might be called asynchronously).
+ DWARFLinker::InputVerificationHandlerTy InputVerificationHandler = nullptr;
+
+ /// A list of all .swiftinterface files referenced by the debug
+ /// info, mapping Module name to path on disk. The entries need to
+ /// be uniqued and sorted and there are only few entries expected
+ /// per compile unit, which is why this is a std::map.
+ /// this is dsymutil specific fag.
+ ///
+ /// (it might be called asynchronously).
+ DWARFLinker::SwiftInterfacesMapTy *ParseableSwiftInterfaces = nullptr;
+
+ /// A list of remappings to apply to file paths.
+ ///
+ /// (it might be called asynchronously).
+ DWARFLinker::ObjectPrefixMapTy *ObjectPrefixMap = nullptr;
+};
+
+class DWARFLinkerImpl;
+
+/// This class keeps data and services common for the whole linking process.
+class LinkingGlobalData {
+ friend DWARFLinkerImpl;
+
+public:
+ /// Returns global per-thread allocator.
+ parallel::PerThreadBumpPtrAllocator &getAllocator() { return Allocator; }
+
+ /// Returns global string pool.
+ StringPool &getStringPool() { return Strings; }
+
+ /// Set translation function.
+ void setTranslator(TranslatorFuncTy Translator) {
+ this->Translator = Translator;
+ }
+
+ /// Translate specified string.
+ StringRef translateString(StringRef String) {
+ if (Translator)
+ return Translator(String);
+
+ return String;
+ }
+
+ /// Returns linking options.
+ const DWARFLinkerOptions &getOptions() const { return Options; }
+
+ /// Set warning handler.
+ void setWarningHandler(MessageHandlerTy Handler) { WarningHandler = Handler; }
+
+ /// Set error handler.
+ void setErrorHandler(MessageHandlerTy Handler) { ErrorHandler = Handler; }
+
+ /// Report warning.
+ void warn(const Twine &Warning, StringRef Context,
+ const DWARFDie *DIE = nullptr) {
+ if (WarningHandler)
+ (WarningHandler)(Warning, Context, DIE);
+ }
+
+ /// Report warning.
+ void warn(Error Warning, StringRef Context, const DWARFDie *DIE = nullptr) {
+ handleAllErrors(std::move(Warning), [&](ErrorInfoBase &Info) {
+ warn(Info.message(), Context, DIE);
+ });
+ }
+
+ /// Report error.
+ void error(const Twine &Err, StringRef Context,
+ const DWARFDie *DIE = nullptr) {
+ if (ErrorHandler)
+ (ErrorHandler)(Err, Context, DIE);
+ }
+
+ /// Report error.
+ void error(Error Err, StringRef Context, const DWARFDie *DIE = nullptr) {
+ handleAllErrors(std::move(Err), [&](ErrorInfoBase &Info) {
+ error(Info.message(), Context, DIE);
+ });
+ }
+
+protected:
+ parallel::PerThreadBumpPtrAllocator Allocator;
+ StringPool Strings;
+ TranslatorFuncTy Translator;
+ DWARFLinkerOptions Options;
+ MessageHandlerTy WarningHandler;
+ MessageHandlerTy ErrorHandler;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERGLOBALDATA_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp
index dfd77af92f27..c49b9ef0cdf9 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp
@@ -7,33 +7,66 @@
//===----------------------------------------------------------------------===//
#include "DWARFLinkerImpl.h"
+#include "DIEGenerator.h"
+#include "DependencyTracker.h"
+#include "Utils.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Parallel.h"
+#include "llvm/Support/ThreadPool.h"
namespace llvm {
namespace dwarflinker_parallel {
-/// Similar to DWARFUnitSection::getUnitForOffset(), but returning our
-/// CompileUnit object instead.
-CompileUnit *
-DWARFLinkerImpl::LinkContext::getUnitForOffset(CompileUnit &CurrentCU,
- uint64_t Offset) const {
- if (CurrentCU.isClangModule())
- return &CurrentCU;
+DWARFLinkerImpl::DWARFLinkerImpl(MessageHandlerTy ErrorHandler,
+ MessageHandlerTy WarningHandler,
+ TranslatorFuncTy StringsTranslator)
+ : UniqueUnitID(0), DebugStrStrings(GlobalData),
+ DebugLineStrStrings(GlobalData), CommonSections(GlobalData) {
+ GlobalData.setTranslator(StringsTranslator);
+ GlobalData.setErrorHandler(ErrorHandler);
+ GlobalData.setWarningHandler(WarningHandler);
+}
- auto CU = llvm::upper_bound(
- CompileUnits, Offset,
- [](uint64_t LHS, const std::unique_ptr<CompileUnit> &RHS) {
- return LHS < RHS->getOrigUnit().getNextUnitOffset();
- });
+DWARFLinkerImpl::LinkContext::LinkContext(LinkingGlobalData &GlobalData,
+ DWARFFile &File,
+ StringMap<uint64_t> &ClangModules,
+ std::atomic<size_t> &UniqueUnitID,
+ std::optional<Triple> TargetTriple)
+ : OutputSections(GlobalData), InputDWARFFile(File),
+ ClangModules(ClangModules), TargetTriple(TargetTriple),
+ UniqueUnitID(UniqueUnitID) {
+
+ if (File.Dwarf) {
+ if (!File.Dwarf->compile_units().empty())
+ CompileUnits.reserve(File.Dwarf->getNumCompileUnits());
- return CU != CompileUnits.end() ? CU->get() : nullptr;
+ // Set context format&endianness based on the input file.
+ Format.Version = File.Dwarf->getMaxVersion();
+ Format.AddrSize = File.Dwarf->getCUAddrSize();
+ Endianness = File.Dwarf->isLittleEndian() ? llvm::endianness::little
+ : llvm::endianness::big;
+ }
+}
+
+DWARFLinkerImpl::LinkContext::RefModuleUnit::RefModuleUnit(
+ DWARFFile &File, std::unique_ptr<CompileUnit> Unit)
+ : File(File), Unit(std::move(Unit)) {}
+
+DWARFLinkerImpl::LinkContext::RefModuleUnit::RefModuleUnit(
+ LinkContext::RefModuleUnit &&Other)
+ : File(Other.File), Unit(std::move(Other.Unit)) {}
+
+void DWARFLinkerImpl::LinkContext::addModulesCompileUnit(
+ LinkContext::RefModuleUnit &&Unit) {
+ ModulesCompileUnits.emplace_back(std::move(Unit));
}
Error DWARFLinkerImpl::createEmitter(const Triple &TheTriple,
OutputFileType FileType,
raw_pwrite_stream &OutFile) {
- TheDwarfEmitter = std::make_unique<DwarfEmitterImpl>(
- FileType, OutFile, OutputStrings.getTranslator(), WarningHandler);
+ TheDwarfEmitter = std::make_unique<DwarfEmitterImpl>(FileType, OutFile);
return TheDwarfEmitter->init(TheTriple, "__DWARF");
}
@@ -42,5 +75,1382 @@ ExtraDwarfEmitter *DWARFLinkerImpl::getEmitter() {
return TheDwarfEmitter.get();
}
+void DWARFLinkerImpl::addObjectFile(DWARFFile &File, ObjFileLoaderTy Loader,
+ CompileUnitHandlerTy OnCUDieLoaded) {
+ ObjectContexts.emplace_back(std::make_unique<LinkContext>(
+ GlobalData, File, ClangModules, UniqueUnitID,
+ (TheDwarfEmitter.get() == nullptr ? std::optional<Triple>(std::nullopt)
+ : TheDwarfEmitter->getTargetTriple())));
+
+ if (ObjectContexts.back()->InputDWARFFile.Dwarf) {
+ for (const std::unique_ptr<DWARFUnit> &CU :
+ ObjectContexts.back()->InputDWARFFile.Dwarf->compile_units()) {
+ DWARFDie CUDie = CU->getUnitDIE();
+ OverallNumberOfCU++;
+
+ if (!CUDie)
+ continue;
+
+ OnCUDieLoaded(*CU);
+
+ // Register mofule reference.
+ if (!GlobalData.getOptions().UpdateIndexTablesOnly)
+ ObjectContexts.back()->registerModuleReference(CUDie, Loader,
+ OnCUDieLoaded);
+ }
+ }
+}
+
+void DWARFLinkerImpl::setEstimatedObjfilesAmount(unsigned ObjFilesNum) {
+ ObjectContexts.reserve(ObjFilesNum);
+}
+
+Error DWARFLinkerImpl::link() {
+ // reset compile unit unique ID counter.
+ UniqueUnitID = 0;
+
+ if (Error Err = validateAndUpdateOptions())
+ return Err;
+
+ dwarf::FormParams GlobalFormat = {GlobalData.getOptions().TargetDWARFVersion,
+ 0, dwarf::DwarfFormat::DWARF32};
+ llvm::endianness GlobalEndianness = llvm::endianness::native;
+
+ if (TheDwarfEmitter) {
+ GlobalEndianness = TheDwarfEmitter->getTargetTriple().isLittleEndian()
+ ? llvm::endianness::little
+ : llvm::endianness::big;
+ }
+ std::optional<uint16_t> Language;
+
+ for (std::unique_ptr<LinkContext> &Context : ObjectContexts) {
+ if (Context->InputDWARFFile.Dwarf.get() == nullptr) {
+ Context->setOutputFormat(Context->getFormParams(), GlobalEndianness);
+ continue;
+ }
+
+ if (GlobalData.getOptions().Verbose) {
+ outs() << "OBJECT: " << Context->InputDWARFFile.FileName << "\n";
+
+ for (const std::unique_ptr<DWARFUnit> &OrigCU :
+ Context->InputDWARFFile.Dwarf->compile_units()) {
+ outs() << "Input compilation unit:";
+ DIDumpOptions DumpOpts;
+ DumpOpts.ChildRecurseDepth = 0;
+ DumpOpts.Verbose = GlobalData.getOptions().Verbose;
+ OrigCU->getUnitDIE().dump(outs(), 0, DumpOpts);
+ }
+ }
+
+ // Verify input DWARF if requested.
+ if (GlobalData.getOptions().VerifyInputDWARF)
+ verifyInput(Context->InputDWARFFile);
+
+ if (!TheDwarfEmitter)
+ GlobalEndianness = Context->getEndianness();
+ GlobalFormat.AddrSize =
+ std::max(GlobalFormat.AddrSize, Context->getFormParams().AddrSize);
+
+ Context->setOutputFormat(Context->getFormParams(), GlobalEndianness);
+
+ // FIXME: move creation of CompileUnits into the addObjectFile.
+ // This would allow to not scan for context Language and Modules state
+ // twice. And then following handling might be removed.
+ for (const std::unique_ptr<DWARFUnit> &OrigCU :
+ Context->InputDWARFFile.Dwarf->compile_units()) {
+ DWARFDie UnitDie = OrigCU.get()->getUnitDIE();
+
+ if (!Language) {
+ if (std::optional<DWARFFormValue> Val =
+ UnitDie.find(dwarf::DW_AT_language)) {
+ uint16_t LangVal = dwarf::toUnsigned(Val, 0);
+ if (isODRLanguage(LangVal))
+ Language = LangVal;
+ }
+ }
+ }
+ }
+
+ if (GlobalFormat.AddrSize == 0) {
+ if (TheDwarfEmitter)
+ GlobalFormat.AddrSize =
+ TheDwarfEmitter->getTargetTriple().isArch32Bit() ? 4 : 8;
+ else
+ GlobalFormat.AddrSize = 8;
+ }
+
+ CommonSections.setOutputFormat(GlobalFormat, GlobalEndianness);
+
+ if (!GlobalData.Options.NoODR && Language.has_value()) {
+ parallel::TaskGroup TGroup;
+ TGroup.spawn([&]() {
+ ArtificialTypeUnit = std::make_unique<TypeUnit>(
+ GlobalData, UniqueUnitID++, Language, GlobalFormat, GlobalEndianness);
+ });
+ }
+
+ // Set parallel options.
+ if (GlobalData.getOptions().Threads == 0)
+ parallel::strategy = optimal_concurrency(OverallNumberOfCU);
+ else
+ parallel::strategy = hardware_concurrency(GlobalData.getOptions().Threads);
+
+ // Link object files.
+ if (GlobalData.getOptions().Threads == 1) {
+ for (std::unique_ptr<LinkContext> &Context : ObjectContexts) {
+ // Link object file.
+ if (Error Err = Context->link(ArtificialTypeUnit.get()))
+ GlobalData.error(std::move(Err), Context->InputDWARFFile.FileName);
+
+ Context->InputDWARFFile.unload();
+ }
+ } else {
+ ThreadPool Pool(parallel::strategy);
+ for (std::unique_ptr<LinkContext> &Context : ObjectContexts)
+ Pool.async([&]() {
+ // Link object file.
+ if (Error Err = Context->link(ArtificialTypeUnit.get()))
+ GlobalData.error(std::move(Err), Context->InputDWARFFile.FileName);
+
+ Context->InputDWARFFile.unload();
+ });
+
+ Pool.wait();
+ }
+
+ if (ArtificialTypeUnit.get() != nullptr && !ArtificialTypeUnit->getTypePool()
+ .getRoot()
+ ->getValue()
+ .load()
+ ->Children.empty()) {
+ std::optional<Triple> OutTriple = TheDwarfEmitter.get() == nullptr
+ ? std::optional<Triple>(std::nullopt)
+ : TheDwarfEmitter->getTargetTriple();
+
+ if (Error Err = ArtificialTypeUnit.get()->finishCloningAndEmit(OutTriple))
+ return Err;
+ }
+
+ // At this stage each compile units are cloned to their own set of debug
+ // sections. Now, update patches, assign offsets and assemble final file
+ // glueing debug tables from each compile unit.
+ glueCompileUnitsAndWriteToTheOutput();
+
+ return Error::success();
+}
+
+void DWARFLinkerImpl::verifyInput(const DWARFFile &File) {
+ assert(File.Dwarf);
+
+ std::string Buffer;
+ raw_string_ostream OS(Buffer);
+ DIDumpOptions DumpOpts;
+ if (!File.Dwarf->verify(OS, DumpOpts.noImplicitRecursion())) {
+ if (GlobalData.getOptions().InputVerificationHandler)
+ GlobalData.getOptions().InputVerificationHandler(File, OS.str());
+ }
+}
+
+Error DWARFLinkerImpl::validateAndUpdateOptions() {
+ if (GlobalData.getOptions().TargetDWARFVersion == 0)
+ return createStringError(std::errc::invalid_argument,
+ "target DWARF version is not set");
+
+ GlobalData.Options.NoOutput = TheDwarfEmitter.get() == nullptr;
+
+ if (GlobalData.getOptions().Verbose && GlobalData.getOptions().Threads != 1) {
+ GlobalData.Options.Threads = 1;
+ GlobalData.warn(
+ "set number of threads to 1 to make --verbose to work properly.", "");
+ }
+
+ // Do not do types deduplication in case --update.
+ if (GlobalData.getOptions().UpdateIndexTablesOnly &&
+ !GlobalData.Options.NoODR)
+ GlobalData.Options.NoODR = true;
+
+ return Error::success();
+}
+
+/// Resolve the relative path to a build artifact referenced by DWARF by
+/// applying DW_AT_comp_dir.
+static void resolveRelativeObjectPath(SmallVectorImpl<char> &Buf, DWARFDie CU) {
+ sys::path::append(Buf, dwarf::toString(CU.find(dwarf::DW_AT_comp_dir), ""));
+}
+
+static uint64_t getDwoId(const DWARFDie &CUDie) {
+ auto DwoId = dwarf::toUnsigned(
+ CUDie.find({dwarf::DW_AT_dwo_id, dwarf::DW_AT_GNU_dwo_id}));
+ if (DwoId)
+ return *DwoId;
+ return 0;
+}
+
+static std::string
+remapPath(StringRef Path,
+ const DWARFLinker::ObjectPrefixMapTy &ObjectPrefixMap) {
+ if (ObjectPrefixMap.empty())
+ return Path.str();
+
+ SmallString<256> p = Path;
+ for (const auto &Entry : ObjectPrefixMap)
+ if (llvm::sys::path::replace_path_prefix(p, Entry.first, Entry.second))
+ break;
+ return p.str().str();
+}
+
+static std::string getPCMFile(const DWARFDie &CUDie,
+ DWARFLinker::ObjectPrefixMapTy *ObjectPrefixMap) {
+ std::string PCMFile = dwarf::toString(
+ CUDie.find({dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), "");
+
+ if (PCMFile.empty())
+ return PCMFile;
+
+ if (ObjectPrefixMap)
+ PCMFile = remapPath(PCMFile, *ObjectPrefixMap);
+
+ return PCMFile;
+}
+
+std::pair<bool, bool> DWARFLinkerImpl::LinkContext::isClangModuleRef(
+ const DWARFDie &CUDie, std::string &PCMFile, unsigned Indent, bool Quiet) {
+ if (PCMFile.empty())
+ return std::make_pair(false, false);
+
+ // Clang module DWARF skeleton CUs abuse this for the path to the module.
+ uint64_t DwoId = getDwoId(CUDie);
+
+ std::string Name = dwarf::toString(CUDie.find(dwarf::DW_AT_name), "");
+ if (Name.empty()) {
+ if (!Quiet)
+ GlobalData.warn("anonymous module skeleton CU for " + PCMFile + ".",
+ InputDWARFFile.FileName);
+ return std::make_pair(true, true);
+ }
+
+ if (!Quiet && GlobalData.getOptions().Verbose) {
+ outs().indent(Indent);
+ outs() << "Found clang module reference " << PCMFile;
+ }
+
+ auto Cached = ClangModules.find(PCMFile);
+ if (Cached != ClangModules.end()) {
+ // FIXME: Until PR27449 (https://llvm.org/bugs/show_bug.cgi?id=27449) is
+ // fixed in clang, only warn about DWO_id mismatches in verbose mode.
+ // ASTFileSignatures will change randomly when a module is rebuilt.
+ if (!Quiet && GlobalData.getOptions().Verbose && (Cached->second != DwoId))
+ GlobalData.warn(
+ Twine("hash mismatch: this object file was built against a "
+ "different version of the module ") +
+ PCMFile + ".",
+ InputDWARFFile.FileName);
+ if (!Quiet && GlobalData.getOptions().Verbose)
+ outs() << " [cached].\n";
+ return std::make_pair(true, true);
+ }
+
+ return std::make_pair(true, false);
+}
+
+/// If this compile unit is really a skeleton CU that points to a
+/// clang module, register it in ClangModules and return true.
+///
+/// A skeleton CU is a CU without children, a DW_AT_gnu_dwo_name
+/// pointing to the module, and a DW_AT_gnu_dwo_id with the module
+/// hash.
+bool DWARFLinkerImpl::LinkContext::registerModuleReference(
+ const DWARFDie &CUDie, ObjFileLoaderTy Loader,
+ CompileUnitHandlerTy OnCUDieLoaded, unsigned Indent) {
+ std::string PCMFile =
+ getPCMFile(CUDie, GlobalData.getOptions().ObjectPrefixMap);
+ std::pair<bool, bool> IsClangModuleRef =
+ isClangModuleRef(CUDie, PCMFile, Indent, false);
+
+ if (!IsClangModuleRef.first)
+ return false;
+
+ if (IsClangModuleRef.second)
+ return true;
+
+ if (GlobalData.getOptions().Verbose)
+ outs() << " ...\n";
+
+ // Cyclic dependencies are disallowed by Clang, but we still
+ // shouldn't run into an infinite loop, so mark it as processed now.
+ ClangModules.insert({PCMFile, getDwoId(CUDie)});
+
+ if (Error E =
+ loadClangModule(Loader, CUDie, PCMFile, OnCUDieLoaded, Indent + 2)) {
+ consumeError(std::move(E));
+ return false;
+ }
+ return true;
+}
+
+Error DWARFLinkerImpl::LinkContext::loadClangModule(
+ ObjFileLoaderTy Loader, const DWARFDie &CUDie, const std::string &PCMFile,
+ CompileUnitHandlerTy OnCUDieLoaded, unsigned Indent) {
+
+ uint64_t DwoId = getDwoId(CUDie);
+ std::string ModuleName = dwarf::toString(CUDie.find(dwarf::DW_AT_name), "");
+
+ /// Using a SmallString<0> because loadClangModule() is recursive.
+ SmallString<0> Path(GlobalData.getOptions().PrependPath);
+ if (sys::path::is_relative(PCMFile))
+ resolveRelativeObjectPath(Path, CUDie);
+ sys::path::append(Path, PCMFile);
+ // Don't use the cached binary holder because we have no thread-safety
+ // guarantee and the lifetime is limited.
+
+ if (Loader == nullptr) {
+ GlobalData.error("cann't load clang module: loader is not specified.",
+ InputDWARFFile.FileName);
+ return Error::success();
+ }
+
+ auto ErrOrObj = Loader(InputDWARFFile.FileName, Path);
+ if (!ErrOrObj)
+ return Error::success();
+
+ std::unique_ptr<CompileUnit> Unit;
+ for (const auto &CU : ErrOrObj->Dwarf->compile_units()) {
+ OnCUDieLoaded(*CU);
+ // Recursively get all modules imported by this one.
+ auto ChildCUDie = CU->getUnitDIE();
+ if (!ChildCUDie)
+ continue;
+ if (!registerModuleReference(ChildCUDie, Loader, OnCUDieLoaded, Indent)) {
+ if (Unit) {
+ std::string Err =
+ (PCMFile +
+ ": Clang modules are expected to have exactly 1 compile unit.\n");
+ GlobalData.error(Err, InputDWARFFile.FileName);
+ return make_error<StringError>(Err, inconvertibleErrorCode());
+ }
+ // FIXME: Until PR27449 (https://llvm.org/bugs/show_bug.cgi?id=27449) is
+ // fixed in clang, only warn about DWO_id mismatches in verbose mode.
+ // ASTFileSignatures will change randomly when a module is rebuilt.
+ uint64_t PCMDwoId = getDwoId(ChildCUDie);
+ if (PCMDwoId != DwoId) {
+ if (GlobalData.getOptions().Verbose)
+ GlobalData.warn(
+ Twine("hash mismatch: this object file was built against a "
+ "different version of the module ") +
+ PCMFile + ".",
+ InputDWARFFile.FileName);
+ // Update the cache entry with the DwoId of the module loaded from disk.
+ ClangModules[PCMFile] = PCMDwoId;
+ }
+
+ // Empty modules units should not be cloned.
+ if (!ChildCUDie.hasChildren())
+ continue;
+
+ // Add this module.
+ Unit = std::make_unique<CompileUnit>(
+ GlobalData, *CU, UniqueUnitID.fetch_add(1), ModuleName, *ErrOrObj,
+ getUnitForOffset, CU->getFormParams(), getEndianness());
+ }
+ }
+
+ if (Unit) {
+ ModulesCompileUnits.emplace_back(RefModuleUnit{*ErrOrObj, std::move(Unit)});
+ // Preload line table, as it can't be loaded asynchronously.
+ ModulesCompileUnits.back().Unit->loadLineTable();
+ }
+
+ return Error::success();
+}
+
+Error DWARFLinkerImpl::LinkContext::link(TypeUnit *ArtificialTypeUnit) {
+ InterCUProcessingStarted = false;
+ if (!InputDWARFFile.Dwarf)
+ return Error::success();
+
+ // Preload macro tables, as they can't be loaded asynchronously.
+ InputDWARFFile.Dwarf->getDebugMacinfo();
+ InputDWARFFile.Dwarf->getDebugMacro();
+
+ // Link modules compile units first.
+ parallelForEach(ModulesCompileUnits, [&](RefModuleUnit &RefModule) {
+ linkSingleCompileUnit(*RefModule.Unit, ArtificialTypeUnit);
+ });
+
+ // Check for live relocations. If there is no any live relocation then we
+ // can skip entire object file.
+ if (!GlobalData.getOptions().UpdateIndexTablesOnly &&
+ !InputDWARFFile.Addresses->hasValidRelocs()) {
+ if (GlobalData.getOptions().Verbose)
+ outs() << "No valid relocations found. Skipping.\n";
+ return Error::success();
+ }
+
+ OriginalDebugInfoSize = getInputDebugInfoSize();
+
+ // Create CompileUnit structures to keep information about source
+ // DWARFUnit`s, load line tables.
+ for (const auto &OrigCU : InputDWARFFile.Dwarf->compile_units()) {
+ // Load only unit DIE at this stage.
+ auto CUDie = OrigCU->getUnitDIE();
+ std::string PCMFile =
+ getPCMFile(CUDie, GlobalData.getOptions().ObjectPrefixMap);
+
+ // The !isClangModuleRef condition effectively skips over fully resolved
+ // skeleton units.
+ if (!CUDie || GlobalData.getOptions().UpdateIndexTablesOnly ||
+ !isClangModuleRef(CUDie, PCMFile, 0, true).first) {
+ CompileUnits.emplace_back(std::make_unique<CompileUnit>(
+ GlobalData, *OrigCU, UniqueUnitID.fetch_add(1), "", InputDWARFFile,
+ getUnitForOffset, OrigCU->getFormParams(), getEndianness()));
+
+ // Preload line table, as it can't be loaded asynchronously.
+ CompileUnits.back()->loadLineTable();
+ }
+ };
+
+ HasNewInterconnectedCUs = false;
+
+ // Link self-sufficient compile units and discover inter-connected compile
+ // units.
+ parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) {
+ linkSingleCompileUnit(*CU, ArtificialTypeUnit);
+ });
+
+ // Link all inter-connected units.
+ if (HasNewInterconnectedCUs) {
+ InterCUProcessingStarted = true;
+
+ if (Error Err = finiteLoop([&]() -> Expected<bool> {
+ HasNewInterconnectedCUs = false;
+
+ // Load inter-connected units.
+ parallelForEach(
+ CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) {
+ if (CU->isInterconnectedCU()) {
+ CU->maybeResetToLoadedStage();
+ linkSingleCompileUnit(*CU, ArtificialTypeUnit,
+ CompileUnit::Stage::Loaded);
+ }
+ });
+
+ // Do liveness analysis for inter-connected units.
+ parallelForEach(CompileUnits,
+ [&](std::unique_ptr<CompileUnit> &CU) {
+ linkSingleCompileUnit(
+ *CU, ArtificialTypeUnit,
+ CompileUnit::Stage::LivenessAnalysisDone);
+ });
+
+ return HasNewInterconnectedCUs.load();
+ }))
+ return Err;
+
+ // Update dependencies.
+ if (Error Err = finiteLoop([&]() -> Expected<bool> {
+ HasNewGlobalDependency = false;
+ parallelForEach(
+ CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) {
+ linkSingleCompileUnit(
+ *CU, ArtificialTypeUnit,
+ CompileUnit::Stage::UpdateDependenciesCompleteness);
+ });
+ return HasNewGlobalDependency.load();
+ }))
+ return Err;
+ parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) {
+ if (CU->isInterconnectedCU() &&
+ CU->getStage() == CompileUnit::Stage::LivenessAnalysisDone)
+ CU->setStage(CompileUnit::Stage::UpdateDependenciesCompleteness);
+ });
+
+ // Assign type names.
+ parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) {
+ linkSingleCompileUnit(*CU, ArtificialTypeUnit,
+ CompileUnit::Stage::TypeNamesAssigned);
+ });
+
+ // Clone inter-connected units.
+ parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) {
+ linkSingleCompileUnit(*CU, ArtificialTypeUnit,
+ CompileUnit::Stage::Cloned);
+ });
+
+ // Update patches for inter-connected units.
+ parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) {
+ linkSingleCompileUnit(*CU, ArtificialTypeUnit,
+ CompileUnit::Stage::PatchesUpdated);
+ });
+
+ // Release data.
+ parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) {
+ linkSingleCompileUnit(*CU, ArtificialTypeUnit,
+ CompileUnit::Stage::Cleaned);
+ });
+ }
+
+ if (GlobalData.getOptions().UpdateIndexTablesOnly) {
+ // Emit Invariant sections.
+
+ if (Error Err = emitInvariantSections())
+ return Err;
+ } else if (!CompileUnits.empty()) {
+ // Emit .debug_frame section.
+
+ Error ResultErr = Error::success();
+ parallel::TaskGroup TGroup;
+ // We use task group here as PerThreadBumpPtrAllocator should be called from
+ // the threads created by ThreadPoolExecutor.
+ TGroup.spawn([&]() {
+ if (Error Err = cloneAndEmitDebugFrame())
+ ResultErr = std::move(Err);
+ });
+ return ResultErr;
+ }
+
+ return Error::success();
+}
+
+void DWARFLinkerImpl::LinkContext::linkSingleCompileUnit(
+ CompileUnit &CU, TypeUnit *ArtificialTypeUnit,
+ enum CompileUnit::Stage DoUntilStage) {
+ if (InterCUProcessingStarted != CU.isInterconnectedCU())
+ return;
+
+ if (Error Err = finiteLoop([&]() -> Expected<bool> {
+ if (CU.getStage() >= DoUntilStage)
+ return false;
+
+ switch (CU.getStage()) {
+ case CompileUnit::Stage::CreatedNotLoaded: {
+ // Load input compilation unit DIEs.
+ // Analyze properties of DIEs.
+ if (!CU.loadInputDIEs()) {
+ // We do not need to do liveness analysis for invalid compilation
+ // unit.
+ CU.setStage(CompileUnit::Stage::Skipped);
+ } else {
+ CU.analyzeDWARFStructure();
+
+ // The registerModuleReference() condition effectively skips
+ // over fully resolved skeleton units. This second pass of
+ // registerModuleReferences doesn't do any new work, but it
+ // will collect top-level errors, which are suppressed. Module
+ // warnings were already displayed in the first iteration.
+ if (registerModuleReference(
+ CU.getOrigUnit().getUnitDIE(), nullptr,
+ [](const DWARFUnit &) {}, 0))
+ CU.setStage(CompileUnit::Stage::PatchesUpdated);
+ else
+ CU.setStage(CompileUnit::Stage::Loaded);
+ }
+ } break;
+
+ case CompileUnit::Stage::Loaded: {
+ // Mark all the DIEs that need to be present in the generated output.
+ // If ODR requested, build type names.
+ if (!CU.resolveDependenciesAndMarkLiveness(InterCUProcessingStarted,
+ HasNewInterconnectedCUs)) {
+ assert(HasNewInterconnectedCUs &&
+ "Flag indicating new inter-connections is not set");
+ return false;
+ }
+
+ CU.setStage(CompileUnit::Stage::LivenessAnalysisDone);
+ } break;
+
+ case CompileUnit::Stage::LivenessAnalysisDone: {
+ if (InterCUProcessingStarted) {
+ if (CU.updateDependenciesCompleteness())
+ HasNewGlobalDependency = true;
+ return false;
+ } else {
+ if (Error Err = finiteLoop([&]() -> Expected<bool> {
+ return CU.updateDependenciesCompleteness();
+ }))
+ return std::move(Err);
+
+ CU.setStage(CompileUnit::Stage::UpdateDependenciesCompleteness);
+ }
+ } break;
+
+ case CompileUnit::Stage::UpdateDependenciesCompleteness:
+#ifndef NDEBUG
+ CU.verifyDependencies();
+#endif
+
+ if (ArtificialTypeUnit) {
+ if (Error Err =
+ CU.assignTypeNames(ArtificialTypeUnit->getTypePool()))
+ return std::move(Err);
+ }
+ CU.setStage(CompileUnit::Stage::TypeNamesAssigned);
+ break;
+
+ case CompileUnit::Stage::TypeNamesAssigned:
+ // Clone input compile unit.
+ if (CU.isClangModule() ||
+ GlobalData.getOptions().UpdateIndexTablesOnly ||
+ CU.getContaingFile().Addresses->hasValidRelocs()) {
+ if (Error Err = CU.cloneAndEmit(TargetTriple, ArtificialTypeUnit))
+ return std::move(Err);
+ }
+
+ CU.setStage(CompileUnit::Stage::Cloned);
+ break;
+
+ case CompileUnit::Stage::Cloned:
+ // Update DIEs referencies.
+ CU.updateDieRefPatchesWithClonedOffsets();
+ CU.setStage(CompileUnit::Stage::PatchesUpdated);
+ break;
+
+ case CompileUnit::Stage::PatchesUpdated:
+ // Cleanup resources.
+ CU.cleanupDataAfterClonning();
+ CU.setStage(CompileUnit::Stage::Cleaned);
+ break;
+
+ case CompileUnit::Stage::Cleaned:
+ assert(false);
+ break;
+
+ case CompileUnit::Stage::Skipped:
+ // Nothing to do.
+ break;
+ }
+
+ return true;
+ })) {
+ CU.error(std::move(Err));
+ CU.cleanupDataAfterClonning();
+ CU.setStage(CompileUnit::Stage::Skipped);
+ }
+}
+
+Error DWARFLinkerImpl::LinkContext::emitInvariantSections() {
+ if (GlobalData.getOptions().NoOutput)
+ return Error::success();
+
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugLoc).OS
+ << InputDWARFFile.Dwarf->getDWARFObj().getLocSection().Data;
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugLocLists).OS
+ << InputDWARFFile.Dwarf->getDWARFObj().getLoclistsSection().Data;
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugRange).OS
+ << InputDWARFFile.Dwarf->getDWARFObj().getRangesSection().Data;
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugRngLists).OS
+ << InputDWARFFile.Dwarf->getDWARFObj().getRnglistsSection().Data;
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugARanges).OS
+ << InputDWARFFile.Dwarf->getDWARFObj().getArangesSection();
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugFrame).OS
+ << InputDWARFFile.Dwarf->getDWARFObj().getFrameSection().Data;
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugAddr).OS
+ << InputDWARFFile.Dwarf->getDWARFObj().getAddrSection().Data;
+
+ return Error::success();
+}
+
+Error DWARFLinkerImpl::LinkContext::cloneAndEmitDebugFrame() {
+ if (GlobalData.getOptions().NoOutput)
+ return Error::success();
+
+ if (InputDWARFFile.Dwarf.get() == nullptr)
+ return Error::success();
+
+ const DWARFObject &InputDWARFObj = InputDWARFFile.Dwarf->getDWARFObj();
+
+ StringRef OrigFrameData = InputDWARFObj.getFrameSection().Data;
+ if (OrigFrameData.empty())
+ return Error::success();
+
+ RangesTy AllUnitsRanges;
+ for (std::unique_ptr<CompileUnit> &Unit : CompileUnits) {
+ for (auto CurRange : Unit->getFunctionRanges())
+ AllUnitsRanges.insert(CurRange.Range, CurRange.Value);
+ }
+
+ unsigned SrcAddrSize = InputDWARFObj.getAddressSize();
+
+ SectionDescriptor &OutSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugFrame);
+
+ DataExtractor Data(OrigFrameData, InputDWARFObj.isLittleEndian(), 0);
+ uint64_t InputOffset = 0;
+
+ // Store the data of the CIEs defined in this object, keyed by their
+ // offsets.
+ DenseMap<uint64_t, StringRef> LocalCIES;
+
+ /// The CIEs that have been emitted in the output section. The actual CIE
+ /// data serves a the key to this StringMap.
+ StringMap<uint32_t> EmittedCIEs;
+
+ while (Data.isValidOffset(InputOffset)) {
+ uint64_t EntryOffset = InputOffset;
+ uint32_t InitialLength = Data.getU32(&InputOffset);
+ if (InitialLength == 0xFFFFFFFF)
+ return createFileError(InputDWARFObj.getFileName(),
+ createStringError(std::errc::invalid_argument,
+ "Dwarf64 bits no supported"));
+
+ uint32_t CIEId = Data.getU32(&InputOffset);
+ if (CIEId == 0xFFFFFFFF) {
+ // This is a CIE, store it.
+ StringRef CIEData = OrigFrameData.substr(EntryOffset, InitialLength + 4);
+ LocalCIES[EntryOffset] = CIEData;
+ // The -4 is to account for the CIEId we just read.
+ InputOffset += InitialLength - 4;
+ continue;
+ }
+
+ uint64_t Loc = Data.getUnsigned(&InputOffset, SrcAddrSize);
+
+ // Some compilers seem to emit frame info that doesn't start at
+ // the function entry point, thus we can't just lookup the address
+ // in the debug map. Use the AddressInfo's range map to see if the FDE
+ // describes something that we can relocate.
+ std::optional<AddressRangeValuePair> Range =
+ AllUnitsRanges.getRangeThatContains(Loc);
+ if (!Range) {
+ // The +4 is to account for the size of the InitialLength field itself.
+ InputOffset = EntryOffset + InitialLength + 4;
+ continue;
+ }
+
+ // This is an FDE, and we have a mapping.
+ // Have we already emitted a corresponding CIE?
+ StringRef CIEData = LocalCIES[CIEId];
+ if (CIEData.empty())
+ return createFileError(
+ InputDWARFObj.getFileName(),
+ createStringError(std::errc::invalid_argument,
+ "Inconsistent debug_frame content. Dropping."));
+
+ uint64_t OffsetToCIERecord = OutSection.OS.tell();
+
+ // Look if we already emitted a CIE that corresponds to the
+ // referenced one (the CIE data is the key of that lookup).
+ auto IteratorInserted =
+ EmittedCIEs.insert(std::make_pair(CIEData, OffsetToCIERecord));
+ OffsetToCIERecord = IteratorInserted.first->getValue();
+
+ // Emit CIE for this ID if it is not emitted yet.
+ if (IteratorInserted.second)
+ OutSection.OS << CIEData;
+
+ // Remember offset to the FDE record, so that we might update
+ // field referencing CIE record(containing OffsetToCIERecord),
+ // when final offsets are known. OffsetToCIERecord(which is written later)
+ // is local to the current .debug_frame section, it should be updated
+ // with final offset of the .debug_frame section.
+ OutSection.notePatch(
+ DebugOffsetPatch{OutSection.OS.tell() + 4, &OutSection, true});
+
+ // Emit the FDE with updated address and CIE pointer.
+ // (4 + AddrSize) is the size of the CIEId + initial_location
+ // fields that will get reconstructed by emitFDE().
+ unsigned FDERemainingBytes = InitialLength - (4 + SrcAddrSize);
+ emitFDE(OffsetToCIERecord, SrcAddrSize, Loc + Range->Value,
+ OrigFrameData.substr(InputOffset, FDERemainingBytes), OutSection);
+ InputOffset += FDERemainingBytes;
+ }
+
+ return Error::success();
+}
+
+/// Emit a FDE into the debug_frame section. \p FDEBytes
+/// contains the FDE data without the length, CIE offset and address
+/// which will be replaced with the parameter values.
+void DWARFLinkerImpl::LinkContext::emitFDE(uint32_t CIEOffset,
+ uint32_t AddrSize, uint64_t Address,
+ StringRef FDEBytes,
+ SectionDescriptor &Section) {
+ Section.emitIntVal(FDEBytes.size() + 4 + AddrSize, 4);
+ Section.emitIntVal(CIEOffset, 4);
+ Section.emitIntVal(Address, AddrSize);
+ Section.OS.write(FDEBytes.data(), FDEBytes.size());
+}
+
+void DWARFLinkerImpl::glueCompileUnitsAndWriteToTheOutput() {
+ if (GlobalData.getOptions().NoOutput)
+ return;
+
+ // Go through all object files, all compile units and assign
+ // offsets to them.
+ assignOffsets();
+
+ // Patch size/offsets fields according to the assigned CU offsets.
+ patchOffsetsAndSizes();
+
+ // Emit common sections and write debug tables from all object files/compile
+ // units into the resulting file.
+ emitCommonSectionsAndWriteCompileUnitsToTheOutput();
+
+ if (ArtificialTypeUnit.get() != nullptr)
+ ArtificialTypeUnit.reset();
+
+ // Write common debug sections into the resulting file.
+ writeCommonSectionsToTheOutput();
+
+ // Cleanup data.
+ cleanupDataAfterDWARFOutputIsWritten();
+
+ if (GlobalData.getOptions().Statistics)
+ printStatistic();
+}
+
+void DWARFLinkerImpl::printStatistic() {
+
+ // For each object file map how many bytes were emitted.
+ StringMap<DebugInfoSize> SizeByObject;
+
+ for (const std::unique_ptr<LinkContext> &Context : ObjectContexts) {
+ uint64_t AllDebugInfoSectionsSize = 0;
+
+ for (std::unique_ptr<CompileUnit> &CU : Context->CompileUnits)
+ if (std::optional<SectionDescriptor *> DebugInfo =
+ CU->tryGetSectionDescriptor(DebugSectionKind::DebugInfo))
+ AllDebugInfoSectionsSize += (*DebugInfo)->getContents().size();
+
+ SizeByObject[Context->InputDWARFFile.FileName].Input =
+ Context->OriginalDebugInfoSize;
+ SizeByObject[Context->InputDWARFFile.FileName].Output =
+ AllDebugInfoSectionsSize;
+ }
+
+ // Create a vector sorted in descending order by output size.
+ std::vector<std::pair<StringRef, DebugInfoSize>> Sorted;
+ for (auto &E : SizeByObject)
+ Sorted.emplace_back(E.first(), E.second);
+ llvm::sort(Sorted, [](auto &LHS, auto &RHS) {
+ return LHS.second.Output > RHS.second.Output;
+ });
+
+ auto ComputePercentange = [](int64_t Input, int64_t Output) -> float {
+ const float Difference = Output - Input;
+ const float Sum = Input + Output;
+ if (Sum == 0)
+ return 0;
+ return (Difference / (Sum / 2));
+ };
+
+ int64_t InputTotal = 0;
+ int64_t OutputTotal = 0;
+ const char *FormatStr = "{0,-45} {1,10}b {2,10}b {3,8:P}\n";
+
+ // Print header.
+ outs() << ".debug_info section size (in bytes)\n";
+ outs() << "----------------------------------------------------------------"
+ "---------------\n";
+ outs() << "Filename Object "
+ " dSYM Change\n";
+ outs() << "----------------------------------------------------------------"
+ "---------------\n";
+
+ // Print body.
+ for (auto &E : Sorted) {
+ InputTotal += E.second.Input;
+ OutputTotal += E.second.Output;
+ llvm::outs() << formatv(
+ FormatStr, sys::path::filename(E.first).take_back(45), E.second.Input,
+ E.second.Output, ComputePercentange(E.second.Input, E.second.Output));
+ }
+ // Print total and footer.
+ outs() << "----------------------------------------------------------------"
+ "---------------\n";
+ llvm::outs() << formatv(FormatStr, "Total", InputTotal, OutputTotal,
+ ComputePercentange(InputTotal, OutputTotal));
+ outs() << "----------------------------------------------------------------"
+ "---------------\n\n";
+}
+
+void DWARFLinkerImpl::assignOffsets() {
+ parallel::TaskGroup TGroup;
+ TGroup.spawn([&]() { assignOffsetsToStrings(); });
+ TGroup.spawn([&]() { assignOffsetsToSections(); });
+}
+
+void DWARFLinkerImpl::assignOffsetsToStrings() {
+ size_t CurDebugStrIndex = 1; // start from 1 to take into account zero entry.
+ uint64_t CurDebugStrOffset =
+ 1; // start from 1 to take into account zero entry.
+ size_t CurDebugLineStrIndex = 0;
+ uint64_t CurDebugLineStrOffset = 0;
+
+ // Enumerates all strings, add them into the DwarfStringPoolEntry map,
+ // assign offset and index to the string if it is not indexed yet.
+ forEachOutputString([&](StringDestinationKind Kind,
+ const StringEntry *String) {
+ switch (Kind) {
+ case StringDestinationKind::DebugStr: {
+ DwarfStringPoolEntryWithExtString *Entry = DebugStrStrings.add(String);
+ assert(Entry != nullptr);
+
+ if (!Entry->isIndexed()) {
+ Entry->Offset = CurDebugStrOffset;
+ CurDebugStrOffset += Entry->String.size() + 1;
+ Entry->Index = CurDebugStrIndex++;
+ }
+ } break;
+ case StringDestinationKind::DebugLineStr: {
+ DwarfStringPoolEntryWithExtString *Entry =
+ DebugLineStrStrings.add(String);
+ assert(Entry != nullptr);
+
+ if (!Entry->isIndexed()) {
+ Entry->Offset = CurDebugLineStrOffset;
+ CurDebugLineStrOffset += Entry->String.size() + 1;
+ Entry->Index = CurDebugLineStrIndex++;
+ }
+ } break;
+ }
+ });
+}
+
+void DWARFLinkerImpl::assignOffsetsToSections() {
+ std::array<uint64_t, SectionKindsNum> SectionSizesAccumulator = {0};
+
+ forEachObjectSectionsSet([&](OutputSections &UnitSections) {
+ UnitSections.assignSectionsOffsetAndAccumulateSize(SectionSizesAccumulator);
+ });
+}
+
+void DWARFLinkerImpl::forEachOutputString(
+ function_ref<void(StringDestinationKind Kind, const StringEntry *String)>
+ StringHandler) {
+ // To save space we do not create any separate string table.
+ // We use already allocated string patches and accelerator entries:
+ // enumerate them in natural order and assign offsets.
+ // ASSUMPTION: strings should be stored into .debug_str/.debug_line_str
+ // sections in the same order as they were assigned offsets.
+ forEachCompileUnit([&](CompileUnit *CU) {
+ CU->forEach([&](SectionDescriptor &OutSection) {
+ OutSection.ListDebugStrPatch.forEach([&](DebugStrPatch &Patch) {
+ StringHandler(StringDestinationKind::DebugStr, Patch.String);
+ });
+
+ OutSection.ListDebugLineStrPatch.forEach([&](DebugLineStrPatch &Patch) {
+ StringHandler(StringDestinationKind::DebugLineStr, Patch.String);
+ });
+ });
+
+ CU->forEachAcceleratorRecord([&](DwarfUnit::AccelInfo &Info) {
+ StringHandler(DebugStr, Info.String);
+ });
+ });
+
+ if (ArtificialTypeUnit.get() != nullptr) {
+ ArtificialTypeUnit->forEach([&](SectionDescriptor &OutSection) {
+ OutSection.ListDebugStrPatch.forEach([&](DebugStrPatch &Patch) {
+ StringHandler(StringDestinationKind::DebugStr, Patch.String);
+ });
+
+ OutSection.ListDebugLineStrPatch.forEach([&](DebugLineStrPatch &Patch) {
+ StringHandler(StringDestinationKind::DebugLineStr, Patch.String);
+ });
+
+ OutSection.ListDebugTypeStrPatch.forEach([&](DebugTypeStrPatch &Patch) {
+ if (Patch.Die == nullptr)
+ return;
+
+ StringHandler(StringDestinationKind::DebugStr, Patch.String);
+ });
+
+ OutSection.ListDebugTypeLineStrPatch.forEach(
+ [&](DebugTypeLineStrPatch &Patch) {
+ if (Patch.Die == nullptr)
+ return;
+
+ StringHandler(StringDestinationKind::DebugStr, Patch.String);
+ });
+ });
+ }
+}
+
+void DWARFLinkerImpl::forEachObjectSectionsSet(
+ function_ref<void(OutputSections &)> SectionsSetHandler) {
+ // Handle artificial type unit first.
+ if (ArtificialTypeUnit.get() != nullptr)
+ SectionsSetHandler(*ArtificialTypeUnit);
+
+ // Then all modules(before regular compilation units).
+ for (const std::unique_ptr<LinkContext> &Context : ObjectContexts)
+ for (LinkContext::RefModuleUnit &ModuleUnit : Context->ModulesCompileUnits)
+ if (ModuleUnit.Unit->getStage() != CompileUnit::Stage::Skipped)
+ SectionsSetHandler(*ModuleUnit.Unit);
+
+ // Finally all compilation units.
+ for (const std::unique_ptr<LinkContext> &Context : ObjectContexts) {
+ // Handle object file common sections.
+ SectionsSetHandler(*Context);
+
+ // Handle compilation units.
+ for (std::unique_ptr<CompileUnit> &CU : Context->CompileUnits)
+ if (CU->getStage() != CompileUnit::Stage::Skipped)
+ SectionsSetHandler(*CU);
+ }
+}
+
+void DWARFLinkerImpl::forEachCompileAndTypeUnit(
+ function_ref<void(DwarfUnit *CU)> UnitHandler) {
+ if (ArtificialTypeUnit.get() != nullptr)
+ UnitHandler(ArtificialTypeUnit.get());
+
+ // Enumerate module units.
+ for (const std::unique_ptr<LinkContext> &Context : ObjectContexts)
+ for (LinkContext::RefModuleUnit &ModuleUnit : Context->ModulesCompileUnits)
+ if (ModuleUnit.Unit->getStage() != CompileUnit::Stage::Skipped)
+ UnitHandler(ModuleUnit.Unit.get());
+
+ // Enumerate compile units.
+ for (const std::unique_ptr<LinkContext> &Context : ObjectContexts)
+ for (std::unique_ptr<CompileUnit> &CU : Context->CompileUnits)
+ if (CU->getStage() != CompileUnit::Stage::Skipped)
+ UnitHandler(CU.get());
+}
+
+void DWARFLinkerImpl::forEachCompileUnit(
+ function_ref<void(CompileUnit *CU)> UnitHandler) {
+ // Enumerate module units.
+ for (const std::unique_ptr<LinkContext> &Context : ObjectContexts)
+ for (LinkContext::RefModuleUnit &ModuleUnit : Context->ModulesCompileUnits)
+ if (ModuleUnit.Unit->getStage() != CompileUnit::Stage::Skipped)
+ UnitHandler(ModuleUnit.Unit.get());
+
+ // Enumerate compile units.
+ for (const std::unique_ptr<LinkContext> &Context : ObjectContexts)
+ for (std::unique_ptr<CompileUnit> &CU : Context->CompileUnits)
+ if (CU->getStage() != CompileUnit::Stage::Skipped)
+ UnitHandler(CU.get());
+}
+
+void DWARFLinkerImpl::patchOffsetsAndSizes() {
+ forEachObjectSectionsSet([&](OutputSections &SectionsSet) {
+ SectionsSet.forEach([&](SectionDescriptor &OutSection) {
+ SectionsSet.applyPatches(OutSection, DebugStrStrings, DebugLineStrStrings,
+ ArtificialTypeUnit.get());
+ });
+ });
+}
+
+void DWARFLinkerImpl::emitCommonSectionsAndWriteCompileUnitsToTheOutput() {
+ parallel::TaskGroup TG;
+
+ // Create section descriptors ahead if they are not exist at the moment.
+ // SectionDescriptors container is not thread safe. Thus we should be sure
+ // that descriptors would not be created in following parallel tasks.
+
+ CommonSections.getOrCreateSectionDescriptor(DebugSectionKind::DebugStr);
+ CommonSections.getOrCreateSectionDescriptor(DebugSectionKind::DebugLineStr);
+
+ if (llvm::is_contained(GlobalData.Options.AccelTables,
+ AccelTableKind::Apple)) {
+ CommonSections.getOrCreateSectionDescriptor(DebugSectionKind::AppleNames);
+ CommonSections.getOrCreateSectionDescriptor(
+ DebugSectionKind::AppleNamespaces);
+ CommonSections.getOrCreateSectionDescriptor(DebugSectionKind::AppleObjC);
+ CommonSections.getOrCreateSectionDescriptor(DebugSectionKind::AppleTypes);
+ }
+
+ if (llvm::is_contained(GlobalData.Options.AccelTables,
+ AccelTableKind::DebugNames))
+ CommonSections.getOrCreateSectionDescriptor(DebugSectionKind::DebugNames);
+
+ const Triple &TargetTriple = TheDwarfEmitter->getTargetTriple();
+
+ // Emit .debug_str and .debug_line_str sections.
+ TG.spawn([&]() { emitStringSections(); });
+
+ if (llvm::is_contained(GlobalData.Options.AccelTables,
+ AccelTableKind::Apple)) {
+ // Emit apple accelerator sections.
+ TG.spawn([&]() { emitAppleAcceleratorSections(TargetTriple); });
+ }
+
+ if (llvm::is_contained(GlobalData.Options.AccelTables,
+ AccelTableKind::DebugNames)) {
+ // Emit .debug_names section.
+ TG.spawn([&]() { emitDWARFv5DebugNamesSection(TargetTriple); });
+ }
+
+ // Write compile units to the output file.
+ TG.spawn([&]() { writeCompileUnitsToTheOutput(); });
+}
+
+void DWARFLinkerImpl::emitStringSections() {
+ uint64_t DebugStrNextOffset = 0;
+ uint64_t DebugLineStrNextOffset = 0;
+
+ // Emit zero length string. Accelerator tables does not work correctly
+ // if the first string is not zero length string.
+ CommonSections.getSectionDescriptor(DebugSectionKind::DebugStr)
+ .emitInplaceString("");
+ DebugStrNextOffset++;
+
+ forEachOutputString(
+ [&](StringDestinationKind Kind, const StringEntry *String) {
+ switch (Kind) {
+ case StringDestinationKind::DebugStr: {
+ DwarfStringPoolEntryWithExtString *StringToEmit =
+ DebugStrStrings.getExistingEntry(String);
+ assert(StringToEmit->isIndexed());
+
+ // Strings may be repeated. Use accumulated DebugStrNextOffset
+ // to understand whether corresponding string is already emitted.
+ // Skip string if its offset less than accumulated offset.
+ if (StringToEmit->Offset >= DebugStrNextOffset) {
+ DebugStrNextOffset =
+ StringToEmit->Offset + StringToEmit->String.size() + 1;
+ // Emit the string itself.
+ CommonSections.getSectionDescriptor(DebugSectionKind::DebugStr)
+ .emitInplaceString(StringToEmit->String);
+ }
+ } break;
+ case StringDestinationKind::DebugLineStr: {
+ DwarfStringPoolEntryWithExtString *StringToEmit =
+ DebugLineStrStrings.getExistingEntry(String);
+ assert(StringToEmit->isIndexed());
+
+ // Strings may be repeated. Use accumulated DebugLineStrStrings
+ // to understand whether corresponding string is already emitted.
+ // Skip string if its offset less than accumulated offset.
+ if (StringToEmit->Offset >= DebugLineStrNextOffset) {
+ DebugLineStrNextOffset =
+ StringToEmit->Offset + StringToEmit->String.size() + 1;
+ // Emit the string itself.
+ CommonSections.getSectionDescriptor(DebugSectionKind::DebugLineStr)
+ .emitInplaceString(StringToEmit->String);
+ }
+ } break;
+ }
+ });
+}
+
+void DWARFLinkerImpl::emitAppleAcceleratorSections(const Triple &TargetTriple) {
+ AccelTable<AppleAccelTableStaticOffsetData> AppleNamespaces;
+ AccelTable<AppleAccelTableStaticOffsetData> AppleNames;
+ AccelTable<AppleAccelTableStaticOffsetData> AppleObjC;
+ AccelTable<AppleAccelTableStaticTypeData> AppleTypes;
+
+ forEachCompileAndTypeUnit([&](DwarfUnit *CU) {
+ CU->forEachAcceleratorRecord([&](const DwarfUnit::AccelInfo &Info) {
+ uint64_t OutOffset = Info.OutOffset;
+ switch (Info.Type) {
+ case DwarfUnit::AccelType::None: {
+ llvm_unreachable("Unknown accelerator record");
+ } break;
+ case DwarfUnit::AccelType::Namespace: {
+ AppleNamespaces.addName(
+ *DebugStrStrings.getExistingEntry(Info.String),
+ CU->getSectionDescriptor(DebugSectionKind::DebugInfo).StartOffset +
+ OutOffset);
+ } break;
+ case DwarfUnit::AccelType::Name: {
+ AppleNames.addName(
+ *DebugStrStrings.getExistingEntry(Info.String),
+ CU->getSectionDescriptor(DebugSectionKind::DebugInfo).StartOffset +
+ OutOffset);
+ } break;
+ case DwarfUnit::AccelType::ObjC: {
+ AppleObjC.addName(
+ *DebugStrStrings.getExistingEntry(Info.String),
+ CU->getSectionDescriptor(DebugSectionKind::DebugInfo).StartOffset +
+ OutOffset);
+ } break;
+ case DwarfUnit::AccelType::Type: {
+ AppleTypes.addName(
+ *DebugStrStrings.getExistingEntry(Info.String),
+ CU->getSectionDescriptor(DebugSectionKind::DebugInfo).StartOffset +
+ OutOffset,
+ Info.Tag,
+ Info.ObjcClassImplementation ? dwarf::DW_FLAG_type_implementation
+ : 0,
+ Info.QualifiedNameHash);
+ } break;
+ }
+ });
+ });
+
+ {
+ // FIXME: we use AsmPrinter to emit accelerator sections.
+ // It might be beneficial to directly emit accelerator data
+ // to the raw_svector_ostream.
+ SectionDescriptor &OutSection =
+ CommonSections.getSectionDescriptor(DebugSectionKind::AppleNamespaces);
+ DwarfEmitterImpl Emitter(DWARFLinker::OutputFileType::Object,
+ OutSection.OS);
+ if (Error Err = Emitter.init(TargetTriple, "__DWARF")) {
+ consumeError(std::move(Err));
+ return;
+ }
+
+ // Emit table.
+ Emitter.emitAppleNamespaces(AppleNamespaces);
+ Emitter.finish();
+
+ // Set start offset and size for output section.
+ OutSection.setSizesForSectionCreatedByAsmPrinter();
+ }
+
+ {
+ // FIXME: we use AsmPrinter to emit accelerator sections.
+ // It might be beneficial to directly emit accelerator data
+ // to the raw_svector_ostream.
+ SectionDescriptor &OutSection =
+ CommonSections.getSectionDescriptor(DebugSectionKind::AppleNames);
+ DwarfEmitterImpl Emitter(DWARFLinker::OutputFileType::Object,
+ OutSection.OS);
+ if (Error Err = Emitter.init(TargetTriple, "__DWARF")) {
+ consumeError(std::move(Err));
+ return;
+ }
+
+ // Emit table.
+ Emitter.emitAppleNames(AppleNames);
+ Emitter.finish();
+
+ // Set start offset ans size for output section.
+ OutSection.setSizesForSectionCreatedByAsmPrinter();
+ }
+
+ {
+ // FIXME: we use AsmPrinter to emit accelerator sections.
+ // It might be beneficial to directly emit accelerator data
+ // to the raw_svector_ostream.
+ SectionDescriptor &OutSection =
+ CommonSections.getSectionDescriptor(DebugSectionKind::AppleObjC);
+ DwarfEmitterImpl Emitter(DWARFLinker::OutputFileType::Object,
+ OutSection.OS);
+ if (Error Err = Emitter.init(TargetTriple, "__DWARF")) {
+ consumeError(std::move(Err));
+ return;
+ }
+
+ // Emit table.
+ Emitter.emitAppleObjc(AppleObjC);
+ Emitter.finish();
+
+ // Set start offset ans size for output section.
+ OutSection.setSizesForSectionCreatedByAsmPrinter();
+ }
+
+ {
+ // FIXME: we use AsmPrinter to emit accelerator sections.
+ // It might be beneficial to directly emit accelerator data
+ // to the raw_svector_ostream.
+ SectionDescriptor &OutSection =
+ CommonSections.getSectionDescriptor(DebugSectionKind::AppleTypes);
+ DwarfEmitterImpl Emitter(DWARFLinker::OutputFileType::Object,
+ OutSection.OS);
+ if (Error Err = Emitter.init(TargetTriple, "__DWARF")) {
+ consumeError(std::move(Err));
+ return;
+ }
+
+ // Emit table.
+ Emitter.emitAppleTypes(AppleTypes);
+ Emitter.finish();
+
+ // Set start offset ans size for output section.
+ OutSection.setSizesForSectionCreatedByAsmPrinter();
+ }
+}
+
+void DWARFLinkerImpl::emitDWARFv5DebugNamesSection(const Triple &TargetTriple) {
+ std::unique_ptr<DWARF5AccelTable> DebugNames;
+
+ DebugNamesUnitsOffsets CompUnits;
+ CompUnitIDToIdx CUidToIdx;
+
+ unsigned Id = 0;
+
+ forEachCompileAndTypeUnit([&](DwarfUnit *CU) {
+ bool HasRecords = false;
+ CU->forEachAcceleratorRecord([&](const DwarfUnit::AccelInfo &Info) {
+ if (DebugNames.get() == nullptr)
+ DebugNames = std::make_unique<DWARF5AccelTable>();
+
+ HasRecords = true;
+ switch (Info.Type) {
+ case DwarfUnit::AccelType::Name:
+ case DwarfUnit::AccelType::Namespace:
+ case DwarfUnit::AccelType::Type: {
+ DebugNames->addName(*DebugStrStrings.getExistingEntry(Info.String),
+ Info.OutOffset, Info.Tag, CU->getUniqueID());
+ } break;
+
+ default:
+ break; // Nothing to do.
+ };
+ });
+
+ if (HasRecords) {
+ CompUnits.push_back(
+ CU->getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo)
+ .StartOffset);
+ CUidToIdx[CU->getUniqueID()] = Id++;
+ }
+ });
+
+ if (DebugNames.get() != nullptr) {
+ // FIXME: we use AsmPrinter to emit accelerator sections.
+ // It might be beneficial to directly emit accelerator data
+ // to the raw_svector_ostream.
+ SectionDescriptor &OutSection =
+ CommonSections.getSectionDescriptor(DebugSectionKind::DebugNames);
+ DwarfEmitterImpl Emitter(DWARFLinker::OutputFileType::Object,
+ OutSection.OS);
+ if (Error Err = Emitter.init(TargetTriple, "__DWARF")) {
+ consumeError(std::move(Err));
+ return;
+ }
+
+ // Emit table.
+ Emitter.emitDebugNames(*DebugNames, CompUnits, CUidToIdx);
+ Emitter.finish();
+
+ // Set start offset ans size for output section.
+ OutSection.setSizesForSectionCreatedByAsmPrinter();
+ }
+}
+
+void DWARFLinkerImpl::cleanupDataAfterDWARFOutputIsWritten() {
+ GlobalData.getStringPool().clear();
+ DebugStrStrings.clear();
+ DebugLineStrStrings.clear();
+}
+
+void DWARFLinkerImpl::writeCompileUnitsToTheOutput() {
+ bool HasAbbreviations = false;
+
+ // Enumerate all sections and store them into the final emitter.
+ forEachObjectSectionsSet([&](OutputSections &Sections) {
+ Sections.forEach([&](SectionDescriptor &OutSection) {
+ if (!HasAbbreviations && !OutSection.getContents().empty() &&
+ OutSection.getKind() == DebugSectionKind::DebugAbbrev)
+ HasAbbreviations = true;
+
+ // Emit section content.
+ TheDwarfEmitter->emitSectionContents(OutSection.getContents(),
+ OutSection.getName());
+ OutSection.clearSectionContent();
+ });
+ });
+
+ if (!HasAbbreviations) {
+ const SmallVector<std::unique_ptr<DIEAbbrev>> Abbreviations;
+ TheDwarfEmitter->emitAbbrevs(Abbreviations, 3);
+ }
+}
+
+void DWARFLinkerImpl::writeCommonSectionsToTheOutput() {
+ CommonSections.forEach([&](SectionDescriptor &OutSection) {
+ // Emit section content.
+ TheDwarfEmitter->emitSectionContents(OutSection.getContents(),
+ OutSection.getName());
+ OutSection.clearSectionContent();
+ });
+}
+
} // end of namespace dwarflinker_parallel
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h
index a8fa9b4b46d8..60018eea121f 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h
@@ -11,29 +11,24 @@
#include "DWARFEmitterImpl.h"
#include "DWARFLinkerCompileUnit.h"
+#include "DWARFLinkerTypeUnit.h"
+#include "StringEntryToDwarfStringPoolEntryMap.h"
#include "llvm/ADT/AddressRanges.h"
#include "llvm/CodeGen/AccelTable.h"
#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
#include "llvm/DWARFLinkerParallel/StringPool.h"
-#include "llvm/DWARFLinkerParallel/StringTable.h"
namespace llvm {
namespace dwarflinker_parallel {
-using Offset2UnitMapTy = DenseMap<uint64_t, CompileUnit *>;
-
-struct RangeAttrPatch;
-struct LocAttrPatch;
-
+/// This class links debug info.
class DWARFLinkerImpl : public DWARFLinker {
public:
DWARFLinkerImpl(MessageHandlerTy ErrorHandler,
MessageHandlerTy WarningHandler,
- TranslatorFuncTy StringsTranslator)
- : UniqueUnitID(0), ErrorHandler(ErrorHandler),
- WarningHandler(WarningHandler),
- OutputStrings(Strings, StringsTranslator) {}
+ TranslatorFuncTy StringsTranslator);
+ /// Create debug info emitter.
Error createEmitter(const Triple &TheTriple, OutputFileType FileType,
raw_pwrite_stream &OutFile) override;
@@ -47,13 +42,11 @@ public:
/// \pre NoODR, Update options should be set before call to addObjectFile.
void addObjectFile(
DWARFFile &File, ObjFileLoaderTy Loader = nullptr,
- CompileUnitHandlerTy OnCUDieLoaded = [](const DWARFUnit &) {}) override {}
+
+ CompileUnitHandlerTy OnCUDieLoaded = [](const DWARFUnit &) {}) override;
/// Link debug info for added files.
- Error link() override {
- reportWarning("LLVM parallel dwarflinker is not implemented yet.", "");
- return Error::success();
- }
+ Error link() override;
/// \defgroup Methods setting various linking options:
///
@@ -61,73 +54,74 @@ public:
///
/// Allows to generate log of linking process to the standard output.
- void setVerbosity(bool Verbose) override { Options.Verbose = Verbose; }
+ void setVerbosity(bool Verbose) override {
+ GlobalData.Options.Verbose = Verbose;
+ }
/// Print statistics to standard output.
void setStatistics(bool Statistics) override {
- Options.Statistics = Statistics;
+ GlobalData.Options.Statistics = Statistics;
}
/// Verify the input DWARF.
void setVerifyInputDWARF(bool Verify) override {
- Options.VerifyInputDWARF = Verify;
+ GlobalData.Options.VerifyInputDWARF = Verify;
}
/// Do not unique types according to ODR.
- void setNoODR(bool NoODR) override { Options.NoODR = NoODR; }
+ void setNoODR(bool NoODR) override { GlobalData.Options.NoODR = NoODR; }
/// Update index tables only(do not modify rest of DWARF).
void setUpdateIndexTablesOnly(bool UpdateIndexTablesOnly) override {
- Options.UpdateIndexTablesOnly = UpdateIndexTablesOnly;
+ GlobalData.Options.UpdateIndexTablesOnly = UpdateIndexTablesOnly;
}
/// Allow generating valid, but non-deterministic output.
void
setAllowNonDeterministicOutput(bool AllowNonDeterministicOutput) override {
- Options.AllowNonDeterministicOutput = AllowNonDeterministicOutput;
+ GlobalData.Options.AllowNonDeterministicOutput =
+ AllowNonDeterministicOutput;
}
/// Set to keep the enclosing function for a static variable.
void setKeepFunctionForStatic(bool KeepFunctionForStatic) override {
- Options.KeepFunctionForStatic = KeepFunctionForStatic;
+ GlobalData.Options.KeepFunctionForStatic = KeepFunctionForStatic;
}
/// Use specified number of threads for parallel files linking.
void setNumThreads(unsigned NumThreads) override {
- Options.Threads = NumThreads;
+ GlobalData.Options.Threads = NumThreads;
}
/// Add kind of accelerator tables to be generated.
void addAccelTableKind(AccelTableKind Kind) override {
- assert(!llvm::is_contained(Options.AccelTables, Kind));
- Options.AccelTables.emplace_back(Kind);
+ assert(!llvm::is_contained(GlobalData.getOptions().AccelTables, Kind));
+ GlobalData.Options.AccelTables.emplace_back(Kind);
}
/// Set prepend path for clang modules.
void setPrependPath(const std::string &Ppath) override {
- Options.PrependPath = Ppath;
+ GlobalData.Options.PrependPath = Ppath;
}
/// Set estimated objects files amount, for preliminary data allocation.
- void setEstimatedObjfilesAmount(unsigned ObjFilesNum) override {
- ObjectContexts.reserve(ObjFilesNum);
- }
+ void setEstimatedObjfilesAmount(unsigned ObjFilesNum) override;
/// Set verification handler which would be used to report verification
/// errors.
void
setInputVerificationHandler(InputVerificationHandlerTy Handler) override {
- Options.InputVerificationHandler = Handler;
+ GlobalData.Options.InputVerificationHandler = Handler;
}
/// Set map for Swift interfaces.
void setSwiftInterfacesMap(SwiftInterfacesMapTy *Map) override {
- Options.ParseableSwiftInterfaces = Map;
+ GlobalData.Options.ParseableSwiftInterfaces = Map;
}
/// Set prefix map for objects.
void setObjectPrefixMap(ObjectPrefixMapTy *Map) override {
- Options.ObjectPrefixMap = Map;
+ GlobalData.Options.ObjectPrefixMap = Map;
}
/// Set target DWARF version.
@@ -137,36 +131,28 @@ public:
"unsupported DWARF version: %d",
TargetDWARFVersion);
- Options.TargetDWARFVersion = TargetDWARFVersion;
+ GlobalData.Options.TargetDWARFVersion = TargetDWARFVersion;
return Error::success();
}
/// @}
protected:
- /// Reports Warning.
- void reportWarning(const Twine &Warning, const DWARFFile &File,
- const DWARFDie *DIE = nullptr) const {
- if (WarningHandler != nullptr)
- WarningHandler(Warning, File.FileName, DIE);
- }
+ /// Verify input DWARF file.
+ void verifyInput(const DWARFFile &File);
- /// Reports Warning.
- void reportWarning(const Twine &Warning, StringRef FileName,
- const DWARFDie *DIE = nullptr) const {
- if (WarningHandler != nullptr)
- WarningHandler(Warning, FileName, DIE);
- }
+ /// Validate specified options.
+ Error validateAndUpdateOptions();
- /// Reports Error.
- void reportError(const Twine &Warning, StringRef FileName,
- const DWARFDie *DIE = nullptr) const {
- if (ErrorHandler != nullptr)
- ErrorHandler(Warning, FileName, DIE);
- }
+ /// Take already linked compile units and glue them into single file.
+ void glueCompileUnitsAndWriteToTheOutput();
- /// Returns next available unique Compile Unit ID.
- unsigned getNextUniqueUnitID() { return UniqueUnitID.fetch_add(1); }
+ /// Hold the input and output of the debug info size in bytes.
+ struct DebugInfoSize {
+ uint64_t Input;
+ uint64_t Output;
+ };
+ friend class DependencyTracker;
/// Keeps track of data associated with one object during linking.
/// i.e. source file descriptor, compilation units, output data
/// for compilation units common tables.
@@ -176,10 +162,8 @@ protected:
/// Keep information for referenced clang module: already loaded DWARF info
/// of the clang module and a CompileUnit of the module.
struct RefModuleUnit {
- RefModuleUnit(DWARFFile &File, std::unique_ptr<CompileUnit> Unit)
- : File(File), Unit(std::move(Unit)) {}
- RefModuleUnit(RefModuleUnit &&Other)
- : File(Other.File), Unit(std::move(Other.Unit)) {}
+ RefModuleUnit(DWARFFile &File, std::unique_ptr<CompileUnit> Unit);
+ RefModuleUnit(RefModuleUnit &&Other);
RefModuleUnit(const RefModuleUnit &) = delete;
DWARFFile &File;
@@ -188,7 +172,7 @@ protected:
using ModuleUnitListTy = SmallVector<RefModuleUnit>;
/// Object file descriptor.
- DWARFFile &File;
+ DWARFFile &InputDWARFFile;
/// Set of Compilation Units(may be accessed asynchroniously for reading).
UnitListTy CompileUnits;
@@ -199,117 +183,194 @@ protected:
/// Size of Debug info before optimizing.
uint64_t OriginalDebugInfoSize = 0;
- /// Output sections, common for all compilation units.
- OutTablesFileTy OutDebugInfoBytes;
+ /// Flag indicating that all inter-connected units are loaded
+ /// and the dwarf linking process for these units is started.
+ bool InterCUProcessingStarted = false;
- /// Endianness for the final file.
- support::endianness Endianess = support::endianness::little;
+ StringMap<uint64_t> &ClangModules;
- LinkContext(DWARFFile &File) : File(File) {
- if (File.Dwarf) {
- if (!File.Dwarf->compile_units().empty())
- CompileUnits.reserve(File.Dwarf->getNumCompileUnits());
+ std::optional<Triple> TargetTriple;
- Endianess = File.Dwarf->isLittleEndian() ? support::endianness::little
- : support::endianness::big;
- }
- }
+ /// Flag indicating that new inter-connected compilation units were
+ /// discovered. It is used for restarting units processing
+ /// if new inter-connected units were found.
+ std::atomic<bool> HasNewInterconnectedCUs = {false};
+
+ std::atomic<bool> HasNewGlobalDependency = {false};
+
+ /// Counter for compile units ID.
+ std::atomic<size_t> &UniqueUnitID;
+
+ LinkContext(LinkingGlobalData &GlobalData, DWARFFile &File,
+ StringMap<uint64_t> &ClangModules,
+ std::atomic<size_t> &UniqueUnitID,
+ std::optional<Triple> TargetTriple);
+
+ /// Check whether specified \p CUDie is a Clang module reference.
+ /// if \p Quiet is false then display error messages.
+ /// \return first == true if CUDie is a Clang module reference.
+ /// second == true if module is already loaded.
+ std::pair<bool, bool> isClangModuleRef(const DWARFDie &CUDie,
+ std::string &PCMFile,
+ unsigned Indent, bool Quiet);
+
+ /// If this compile unit is really a skeleton CU that points to a
+ /// clang module, register it in ClangModules and return true.
+ ///
+ /// A skeleton CU is a CU without children, a DW_AT_gnu_dwo_name
+ /// pointing to the module, and a DW_AT_gnu_dwo_id with the module
+ /// hash.
+ bool registerModuleReference(const DWARFDie &CUDie, ObjFileLoaderTy Loader,
+ CompileUnitHandlerTy OnCUDieLoaded,
+ unsigned Indent = 0);
+
+ /// Recursively add the debug info in this clang module .pcm
+ /// file (and all the modules imported by it in a bottom-up fashion)
+ /// to ModuleUnits.
+ Error loadClangModule(ObjFileLoaderTy Loader, const DWARFDie &CUDie,
+ const std::string &PCMFile,
+ CompileUnitHandlerTy OnCUDieLoaded,
+ unsigned Indent = 0);
/// Add Compile Unit corresponding to the module.
- void addModulesCompileUnit(RefModuleUnit &&Unit) {
- ModulesCompileUnits.emplace_back(std::move(Unit));
+ void addModulesCompileUnit(RefModuleUnit &&Unit);
+
+ /// Computes the total size of the debug info.
+ uint64_t getInputDebugInfoSize() const {
+ uint64_t Size = 0;
+
+ if (InputDWARFFile.Dwarf == nullptr)
+ return Size;
+
+ for (auto &Unit : InputDWARFFile.Dwarf->compile_units())
+ Size += Unit->getLength();
+
+ return Size;
}
- /// Return Endiannes of the source DWARF information.
- support::endianness getEndianness() { return Endianess; }
+ /// Link compile units for this context.
+ Error link(TypeUnit *ArtificialTypeUnit);
+
+ /// Link specified compile unit until specified stage.
+ void linkSingleCompileUnit(
+ CompileUnit &CU, TypeUnit *ArtificialTypeUnit,
+ enum CompileUnit::Stage DoUntilStage = CompileUnit::Stage::Cleaned);
+
+ /// Emit invariant sections.
+ Error emitInvariantSections();
- /// \returns pointer to compilation unit which corresponds \p Offset.
- CompileUnit *getUnitForOffset(CompileUnit &CU, uint64_t Offset) const;
+ /// Clone and emit .debug_frame.
+ Error cloneAndEmitDebugFrame();
+
+ /// Emit FDE record.
+ void emitFDE(uint32_t CIEOffset, uint32_t AddrSize, uint64_t Address,
+ StringRef FDEBytes, SectionDescriptor &Section);
+
+ std::function<CompileUnit *(uint64_t)> getUnitForOffset =
+ [&](uint64_t Offset) -> CompileUnit * {
+ auto CU = llvm::upper_bound(
+ CompileUnits, Offset,
+ [](uint64_t LHS, const std::unique_ptr<CompileUnit> &RHS) {
+ return LHS < RHS->getOrigUnit().getNextUnitOffset();
+ });
+
+ return CU != CompileUnits.end() ? CU->get() : nullptr;
+ };
};
- /// linking options
- struct DWARFLinkerOptions {
- /// DWARF version for the output.
- uint16_t TargetDWARFVersion = 0;
+ /// Enumerate all compile units and assign offsets to their sections and
+ /// strings.
+ void assignOffsets();
- /// Generate processing log to the standard output.
- bool Verbose = false;
+ /// Enumerate all compile units and assign offsets to their sections.
+ void assignOffsetsToSections();
- /// Print statistics.
- bool Statistics = false;
+ /// Enumerate all compile units and assign offsets to their strings.
+ void assignOffsetsToStrings();
- /// Verify the input DWARF.
- bool VerifyInputDWARF = false;
+ /// Print statistic for processed Debug Info.
+ void printStatistic();
- /// Do not unique types according to ODR
- bool NoODR = false;
+ enum StringDestinationKind : uint8_t { DebugStr, DebugLineStr };
- /// Update index tables.
- bool UpdateIndexTablesOnly = false;
+ /// Enumerates all strings.
+ void forEachOutputString(
+ function_ref<void(StringDestinationKind, const StringEntry *)>
+ StringHandler);
- /// Whether we want a static variable to force us to keep its enclosing
- /// function.
- bool KeepFunctionForStatic = false;
+ /// Enumerates sections for modules, invariant for object files, compile
+ /// units.
+ void forEachObjectSectionsSet(
+ function_ref<void(OutputSections &SectionsSet)> SectionsSetHandler);
- /// Allow to generate valid, but non deterministic output.
- bool AllowNonDeterministicOutput = false;
+ /// Enumerates all compile and type units.
+ void forEachCompileAndTypeUnit(function_ref<void(DwarfUnit *CU)> UnitHandler);
- /// Number of threads.
- unsigned Threads = 1;
+ /// Enumerates all comple units.
+ void forEachCompileUnit(function_ref<void(CompileUnit *CU)> UnitHandler);
- /// The accelerator table kinds
- SmallVector<AccelTableKind, 1> AccelTables;
+ /// Enumerates all patches and update them with the correct values.
+ void patchOffsetsAndSizes();
- /// Prepend path for the clang modules.
- std::string PrependPath;
+ /// Emit debug sections common for all input files.
+ void emitCommonSectionsAndWriteCompileUnitsToTheOutput();
- /// input verification handler(it might be called asynchronously).
- InputVerificationHandlerTy InputVerificationHandler = nullptr;
+ /// Emit apple accelerator sections.
+ void emitAppleAcceleratorSections(const Triple &TargetTriple);
- /// A list of all .swiftinterface files referenced by the debug
- /// info, mapping Module name to path on disk. The entries need to
- /// be uniqued and sorted and there are only few entries expected
- /// per compile unit, which is why this is a std::map.
- /// this is dsymutil specific fag.
- ///
- /// (it might be called asynchronously).
- SwiftInterfacesMapTy *ParseableSwiftInterfaces = nullptr;
+ /// Emit .debug_names section.
+ void emitDWARFv5DebugNamesSection(const Triple &TargetTriple);
- /// A list of remappings to apply to file paths.
- ///
- /// (it might be called asynchronously).
- ObjectPrefixMapTy *ObjectPrefixMap = nullptr;
- } Options;
+ /// Emit string sections.
+ void emitStringSections();
+
+ /// Cleanup data(string pools) after output sections are generated.
+ void cleanupDataAfterDWARFOutputIsWritten();
+
+ /// Enumerate all compile units and put their data into the output stream.
+ void writeCompileUnitsToTheOutput();
+
+ /// Enumerate common sections and put their data into the output stream.
+ void writeCommonSectionsToTheOutput();
/// \defgroup Data members accessed asinchroniously.
///
/// @{
/// Unique ID for compile unit.
- std::atomic<unsigned> UniqueUnitID;
+ std::atomic<size_t> UniqueUnitID;
- /// Strings pool. Keeps all strings.
- StringPool Strings;
+ /// Mapping the PCM filename to the DwoId.
+ StringMap<uint64_t> ClangModules;
+ std::mutex ClangModulesMutex;
- /// error handler(it might be called asynchronously).
- MessageHandlerTy ErrorHandler = nullptr;
-
- /// warning handler(it might be called asynchronously).
- MessageHandlerTy WarningHandler = nullptr;
+ /// Type unit.
+ std::unique_ptr<TypeUnit> ArtificialTypeUnit;
/// @}
/// \defgroup Data members accessed sequentially.
///
/// @{
+ /// DwarfStringPoolEntries for .debug_str section.
+ StringEntryToDwarfStringPoolEntryMap DebugStrStrings;
- /// Set of strings which should be emitted.
- StringTable OutputStrings;
+ /// DwarfStringPoolEntries for .debug_line_str section.
+ StringEntryToDwarfStringPoolEntryMap DebugLineStrStrings;
/// Keeps all linking contexts.
SmallVector<std::unique_ptr<LinkContext>> ObjectContexts;
+ /// Common sections.
+ OutputSections CommonSections;
+
/// The emitter of final dwarf file.
std::unique_ptr<DwarfEmitterImpl> TheDwarfEmitter;
+
+ /// Overall compile units number.
+ uint64_t OverallNumberOfCU = 0;
+
+ /// Data global for the whole linking process.
+ LinkingGlobalData GlobalData;
/// @}
};
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.cpp
new file mode 100644
index 000000000000..9d5c213085c2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.cpp
@@ -0,0 +1,391 @@
+//===- DWARFLinkerTypeUnit.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFLinkerTypeUnit.h"
+#include "DIEGenerator.h"
+#include "DWARFEmitterImpl.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+using namespace llvm::dwarflinker_parallel;
+
+TypeUnit::TypeUnit(LinkingGlobalData &GlobalData, unsigned ID,
+ std::optional<uint16_t> Language, dwarf::FormParams Format,
+ endianness Endianess)
+ : DwarfUnit(GlobalData, ID, ""), Language(Language),
+ AcceleratorRecords(&GlobalData.getAllocator()) {
+
+ UnitName = "__artificial_type_unit";
+
+ setOutputFormat(Format, Endianess);
+
+ // Create line table prologue.
+ LineTable.Prologue.FormParams = getFormParams();
+ LineTable.Prologue.MinInstLength = 1;
+ LineTable.Prologue.MaxOpsPerInst = 1;
+ LineTable.Prologue.DefaultIsStmt = 1;
+ LineTable.Prologue.LineBase = -5;
+ LineTable.Prologue.LineRange = 14;
+ LineTable.Prologue.OpcodeBase = 13;
+ LineTable.Prologue.StandardOpcodeLengths = {0, 1, 1, 1, 1, 0,
+ 0, 0, 1, 0, 0, 1};
+
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+}
+
+void TypeUnit::createDIETree(BumpPtrAllocator &Allocator) {
+ prepareDataForTreeCreation();
+
+ // TaskGroup is created here as internal code has calls to
+ // PerThreadBumpPtrAllocator which should be called from the task group task.
+ parallel::TaskGroup TG;
+ TG.spawn([&]() {
+ SectionDescriptor &DebugInfoSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+ SectionDescriptor &DebugLineSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugLine);
+
+ DIEGenerator DIETreeGenerator(Allocator, *this);
+ OffsetsPtrVector PatchesOffsets;
+
+ // Create a Die for artificial compilation unit for types.
+ DIE *UnitDIE = DIETreeGenerator.createDIE(dwarf::DW_TAG_compile_unit, 0);
+ uint64_t OutOffset = getDebugInfoHeaderSize();
+ UnitDIE->setOffset(OutOffset);
+
+ SmallString<200> ProducerString;
+ ProducerString += "llvm DWARFLinkerParallel library version ";
+ DebugInfoSection.notePatchWithOffsetUpdate(
+ DebugStrPatch{
+ {OutOffset},
+ GlobalData.getStringPool().insert(ProducerString.str()).first},
+ PatchesOffsets);
+ OutOffset += DIETreeGenerator
+ .addStringPlaceholderAttribute(dwarf::DW_AT_producer,
+ dwarf::DW_FORM_strp)
+ .second;
+
+ if (Language) {
+ OutOffset += DIETreeGenerator
+ .addScalarAttribute(dwarf::DW_AT_language,
+ dwarf::DW_FORM_data2, *Language)
+ .second;
+ }
+
+ DebugInfoSection.notePatchWithOffsetUpdate(
+ DebugStrPatch{{OutOffset},
+ GlobalData.getStringPool().insert(getUnitName()).first},
+ PatchesOffsets);
+ OutOffset += DIETreeGenerator
+ .addStringPlaceholderAttribute(dwarf::DW_AT_name,
+ dwarf::DW_FORM_strp)
+ .second;
+
+ if (!LineTable.Prologue.FileNames.empty()) {
+ DebugInfoSection.notePatchWithOffsetUpdate(
+ DebugOffsetPatch{OutOffset, &DebugLineSection}, PatchesOffsets);
+
+ OutOffset += DIETreeGenerator
+ .addScalarAttribute(dwarf::DW_AT_stmt_list,
+ dwarf::DW_FORM_sec_offset, 0xbaddef)
+ .second;
+ }
+
+ DebugInfoSection.notePatchWithOffsetUpdate(
+ DebugStrPatch{{OutOffset}, GlobalData.getStringPool().insert("").first},
+ PatchesOffsets);
+ OutOffset += DIETreeGenerator
+ .addStringPlaceholderAttribute(dwarf::DW_AT_comp_dir,
+ dwarf::DW_FORM_strp)
+ .second;
+
+ if (!DebugStringIndexMap.empty()) {
+ // Type unit is assumed to be emitted first. Thus we can use direct value
+ // for DW_AT_str_offsets_base attribute(No need to fix it up with unit
+ // offset value).
+ OutOffset += DIETreeGenerator
+ .addScalarAttribute(dwarf::DW_AT_str_offsets_base,
+ dwarf::DW_FORM_sec_offset,
+ getDebugStrOffsetsHeaderSize())
+ .second;
+ }
+
+ UnitDIE->setSize(OutOffset - UnitDIE->getOffset() + 1);
+ OutOffset =
+ finalizeTypeEntryRec(UnitDIE->getOffset(), UnitDIE, Types.getRoot());
+
+ // Update patch offsets.
+ for (uint64_t *OffsetPtr : PatchesOffsets)
+ *OffsetPtr += getULEB128Size(UnitDIE->getAbbrevNumber());
+
+ setOutUnitDIE(UnitDIE);
+ });
+}
+
+void TypeUnit::prepareDataForTreeCreation() {
+ SectionDescriptor &DebugInfoSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+
+ // Type unit data created parallelly. So the order of data is not
+ // deterministic. Order data here if we need deterministic output.
+
+ parallel::TaskGroup TG;
+
+ if (!GlobalData.getOptions().AllowNonDeterministicOutput) {
+ TG.spawn([&]() {
+ // Sort types to have a deterministic output.
+ Types.sortTypes();
+ });
+ }
+
+ TG.spawn([&]() {
+ if (!GlobalData.getOptions().AllowNonDeterministicOutput) {
+ // Sort decl type patches to have a deterministic output.
+ std::function<bool(const DebugTypeDeclFilePatch &LHS,
+ const DebugTypeDeclFilePatch &RHS)>
+ PatchesComparator = [&](const DebugTypeDeclFilePatch &LHS,
+ const DebugTypeDeclFilePatch &RHS) {
+ return LHS.Directory->first() < RHS.Directory->first() ||
+ (!(RHS.Directory->first() < LHS.Directory->first()) &&
+ LHS.FilePath->first() < RHS.FilePath->first());
+ };
+ // Sort patches to have a deterministic output.
+ DebugInfoSection.ListDebugTypeDeclFilePatch.sort(PatchesComparator);
+ }
+
+ // Update DW_AT_decl_file attribute
+ dwarf::Form DeclFileForm =
+ getScalarFormForValue(
+ DebugInfoSection.ListDebugTypeDeclFilePatch.size())
+ .first;
+
+ DebugInfoSection.ListDebugTypeDeclFilePatch.forEach(
+ [&](DebugTypeDeclFilePatch &Patch) {
+ TypeEntryBody *TypeEntry = Patch.TypeName->getValue().load();
+ assert(TypeEntry &&
+ formatv("No data for type {0}", Patch.TypeName->getKey())
+ .str()
+ .c_str());
+ if (&TypeEntry->getFinalDie() != Patch.Die)
+ return;
+
+ uint32_t FileIdx =
+ addFileNameIntoLinetable(Patch.Directory, Patch.FilePath);
+
+ unsigned DIESize = Patch.Die->getSize();
+ DIEGenerator DIEGen(Patch.Die, Types.getThreadLocalAllocator(),
+ *this);
+
+ DIESize += DIEGen
+ .addScalarAttribute(dwarf::DW_AT_decl_file,
+ DeclFileForm, FileIdx)
+ .second;
+ Patch.Die->setSize(DIESize);
+ });
+ });
+
+ if (!GlobalData.getOptions().AllowNonDeterministicOutput) {
+ // Sort patches to have a deterministic output.
+ TG.spawn([&]() {
+ forEach([&](SectionDescriptor &OutSection) {
+ std::function<bool(const DebugStrPatch &LHS, const DebugStrPatch &RHS)>
+ StrPatchesComparator =
+ [&](const DebugStrPatch &LHS, const DebugStrPatch &RHS) {
+ return LHS.String->getKey() < RHS.String->getKey();
+ };
+ OutSection.ListDebugStrPatch.sort(StrPatchesComparator);
+
+ std::function<bool(const DebugTypeStrPatch &LHS,
+ const DebugTypeStrPatch &RHS)>
+ TypeStrPatchesComparator = [&](const DebugTypeStrPatch &LHS,
+ const DebugTypeStrPatch &RHS) {
+ return LHS.String->getKey() < RHS.String->getKey();
+ };
+ OutSection.ListDebugTypeStrPatch.sort(TypeStrPatchesComparator);
+ });
+ });
+ }
+
+ if (!GlobalData.getOptions().AllowNonDeterministicOutput) {
+ // Sort patches to have a deterministic output.
+ TG.spawn([&]() {
+ forEach([&](SectionDescriptor &OutSection) {
+ std::function<bool(const DebugLineStrPatch &LHS,
+ const DebugLineStrPatch &RHS)>
+ LineStrPatchesComparator = [&](const DebugLineStrPatch &LHS,
+ const DebugLineStrPatch &RHS) {
+ return LHS.String->getKey() < RHS.String->getKey();
+ };
+ OutSection.ListDebugLineStrPatch.sort(LineStrPatchesComparator);
+
+ std::function<bool(const DebugTypeLineStrPatch &LHS,
+ const DebugTypeLineStrPatch &RHS)>
+ TypeLineStrPatchesComparator =
+ [&](const DebugTypeLineStrPatch &LHS,
+ const DebugTypeLineStrPatch &RHS) {
+ return LHS.String->getKey() < RHS.String->getKey();
+ };
+ OutSection.ListDebugTypeLineStrPatch.sort(TypeLineStrPatchesComparator);
+ });
+ });
+ }
+}
+
+uint64_t TypeUnit::finalizeTypeEntryRec(uint64_t OutOffset, DIE *OutDIE,
+ TypeEntry *Entry) {
+ bool HasChildren = !Entry->getValue().load()->Children.empty();
+ DIEGenerator DIEGen(OutDIE, Types.getThreadLocalAllocator(), *this);
+ OutOffset += DIEGen.finalizeAbbreviations(HasChildren, nullptr);
+ OutOffset += OutDIE->getSize() - 1;
+
+ if (HasChildren) {
+ Entry->getValue().load()->Children.forEach([&](TypeEntry *ChildEntry) {
+ DIE *ChildDIE = &ChildEntry->getValue().load()->getFinalDie();
+ DIEGen.addChild(ChildDIE);
+
+ ChildDIE->setOffset(OutOffset);
+
+ OutOffset = finalizeTypeEntryRec(OutOffset, ChildDIE, ChildEntry);
+ });
+
+ // End of children marker.
+ OutOffset += sizeof(int8_t);
+ }
+
+ OutDIE->setSize(OutOffset - OutDIE->getOffset());
+ return OutOffset;
+}
+
+uint32_t TypeUnit::addFileNameIntoLinetable(StringEntry *Dir,
+ StringEntry *FileName) {
+ uint32_t DirIdx = 0;
+
+ if (Dir->first() == "") {
+ DirIdx = 0;
+ } else {
+ DirectoriesMapTy::iterator DirEntry = DirectoriesMap.find(Dir);
+ if (DirEntry == DirectoriesMap.end()) {
+ // We currently do not support more than UINT32_MAX directories.
+ assert(LineTable.Prologue.IncludeDirectories.size() < UINT32_MAX);
+ DirIdx = LineTable.Prologue.IncludeDirectories.size();
+ DirectoriesMap.insert({Dir, DirIdx});
+ LineTable.Prologue.IncludeDirectories.push_back(
+ DWARFFormValue::createFromPValue(dwarf::DW_FORM_string,
+ Dir->getKeyData()));
+ } else {
+ DirIdx = DirEntry->second;
+ }
+
+ if (getVersion() < 5)
+ DirIdx++;
+ }
+
+ uint32_t FileIdx = 0;
+ FilenamesMapTy::iterator FileEntry = FileNamesMap.find({FileName, DirIdx});
+ if (FileEntry == FileNamesMap.end()) {
+ // We currently do not support more than UINT32_MAX files.
+ assert(LineTable.Prologue.FileNames.size() < UINT32_MAX);
+ FileIdx = LineTable.Prologue.FileNames.size();
+ FileNamesMap.insert({{FileName, DirIdx}, FileIdx});
+ LineTable.Prologue.FileNames.push_back(DWARFDebugLine::FileNameEntry());
+ LineTable.Prologue.FileNames.back().Name = DWARFFormValue::createFromPValue(
+ dwarf::DW_FORM_string, FileName->getKeyData());
+ LineTable.Prologue.FileNames.back().DirIdx = DirIdx;
+ } else {
+ FileIdx = FileEntry->second;
+ }
+
+ return getVersion() < 5 ? FileIdx + 1 : FileIdx;
+}
+
+std::pair<dwarf::Form, uint8_t>
+TypeUnit::getScalarFormForValue(uint64_t Value) const {
+ if (Value > 0xFFFFFFFF)
+ return std::make_pair(dwarf::DW_FORM_data8, 8);
+
+ if (Value > 0xFFFF)
+ return std::make_pair(dwarf::DW_FORM_data4, 4);
+
+ if (Value > 0xFF)
+ return std::make_pair(dwarf::DW_FORM_data2, 2);
+
+ return std::make_pair(dwarf::DW_FORM_data1, 1);
+}
+
+uint8_t TypeUnit::getSizeByAttrForm(dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_data1)
+ return 1;
+
+ if (Form == dwarf::DW_FORM_data2)
+ return 2;
+
+ if (Form == dwarf::DW_FORM_data4)
+ return 4;
+
+ if (Form == dwarf::DW_FORM_data8)
+ return 8;
+
+ if (Form == dwarf::DW_FORM_data16)
+ return 16;
+
+ llvm_unreachable("Unsupported Attr Form");
+}
+
+Error TypeUnit::finishCloningAndEmit(std::optional<Triple> TargetTriple) {
+ BumpPtrAllocator Allocator;
+ createDIETree(Allocator);
+
+ if (getGlobalData().getOptions().NoOutput || (getOutUnitDIE() == nullptr))
+ return Error::success();
+
+ // Create sections ahead so that they should not be created asynchronously
+ // later.
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugLine);
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugStrOffsets);
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugAbbrev);
+ if (llvm::is_contained(GlobalData.getOptions().AccelTables,
+ DWARFLinker::AccelTableKind::Pub)) {
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugPubNames);
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugPubTypes);
+ }
+
+ SmallVector<std::function<Error(void)>> Tasks;
+
+ // Add task for emitting .debug_line section.
+ if (!LineTable.Prologue.FileNames.empty()) {
+ Tasks.push_back([&]() -> Error {
+ assert(TargetTriple.has_value());
+ return emitDebugLine(*TargetTriple, LineTable);
+ });
+ }
+
+ // Add task for emitting .debug_info section.
+ Tasks.push_back([&]() -> Error { return emitDebugInfo(*TargetTriple); });
+
+ // Add task for emitting Pub accelerator sections.
+ if (llvm::is_contained(GlobalData.getOptions().AccelTables,
+ DWARFLinker::AccelTableKind::Pub)) {
+ Tasks.push_back([&]() -> Error {
+ emitPubAccelerators();
+ return Error::success();
+ });
+ }
+
+ // Add task for emitting .debug_str_offsets section.
+ Tasks.push_back([&]() -> Error { return emitDebugStringOffsetSection(); });
+
+ // Add task for emitting .debug_abbr section.
+ Tasks.push_back([&]() -> Error { return emitAbbreviations(); });
+
+ if (auto Err = parallelForEachError(
+ Tasks, [&](std::function<Error(void)> F) { return F(); }))
+ return Err;
+
+ return Error::success();
+}
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.h
new file mode 100644
index 000000000000..97e620eee0c4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.h
@@ -0,0 +1,138 @@
+//===- DWARFLinkerTypeUnit.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DWARFLINKERPARALLEL_DWARFLINKERTYPEUNIT_H
+#define LLVM_DWARFLINKERPARALLEL_DWARFLINKERTYPEUNIT_H
+
+#include "DWARFLinkerUnit.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// Type Unit is used to represent an artificial compilation unit
+/// which keeps all type information. This type information is referenced
+/// from other compilation units.
+class TypeUnit : public DwarfUnit {
+public:
+ TypeUnit(LinkingGlobalData &GlobalData, unsigned ID,
+ std::optional<uint16_t> Language, dwarf::FormParams Format,
+ llvm::endianness Endianess);
+
+ /// Generates DIE tree based on information from TypesMap.
+ void createDIETree(BumpPtrAllocator &Allocator);
+
+ /// Emits resulting dwarf based on information from DIE tree.
+ Error finishCloningAndEmit(std::optional<Triple> TargetTriple);
+
+ /// Returns global type pool.
+ TypePool &getTypePool() { return Types; }
+
+ /// TypeUnitAccelInfo extends AccelInfo structure with type specific fileds.
+ /// We need these additional fields to decide whether OutDIE should have an
+ /// accelerator record or not. The TypeEntryBodyPtr can refer to the
+ /// declaration DIE and definition DIE corresponding to the type entry.
+ /// Only one of them would be used in final output. So if TypeUnitAccelInfo
+ /// refers OutDIE which does not match with TypeEntryBodyPtr->getFinalDie()
+ /// then such record should be skipped.
+ struct TypeUnitAccelInfo : public AccelInfo {
+ /// Pointer to the output DIE which owns this accelerator record.
+ DIE *OutDIE = nullptr;
+
+ /// Pointer to the type entry body.
+ TypeEntryBody *TypeEntryBodyPtr = nullptr;
+ };
+
+ /// Enumerates all accelerator records and call \p Handler for each.
+ void
+ forEachAcceleratorRecord(function_ref<void(AccelInfo &)> Handler) override {
+ AcceleratorRecords.forEach([&](TypeUnitAccelInfo &Info) {
+ // Check whether current record is for the final DIE.
+ assert(Info.TypeEntryBodyPtr != nullptr);
+
+ if (&Info.TypeEntryBodyPtr->getFinalDie() != Info.OutDIE)
+ return;
+
+ Info.OutOffset = Info.OutDIE->getOffset();
+ Handler(Info);
+ });
+ }
+
+ /// Returns index for the specified \p String inside .debug_str_offsets.
+ uint64_t getDebugStrIndex(const StringEntry *String) override {
+ std::unique_lock<std::mutex> LockGuard(DebugStringIndexMapMutex);
+ return DebugStringIndexMap.getValueIndex(String);
+ }
+
+ /// Adds \p Info to the unit's accelerator records.
+ void saveAcceleratorInfo(const TypeUnitAccelInfo &Info) {
+ AcceleratorRecords.add(Info);
+ }
+
+private:
+ /// Type DIEs are partially created at clonning stage. They are organised
+ /// as a tree using type entries. This function links DIEs(corresponding
+ /// to the type entries) into the tree structure.
+ uint64_t finalizeTypeEntryRec(uint64_t OutOffset, DIE *OutDIE,
+ TypeEntry *Entry);
+
+ /// Prepares DIEs to be linked into the tree.
+ void prepareDataForTreeCreation();
+
+ /// Add specified \p Dir and \p Filename into the line table
+ /// of this type unit.
+ uint32_t addFileNameIntoLinetable(StringEntry *Dir, StringEntry *FileName);
+
+ std::pair<dwarf::Form, uint8_t> getScalarFormForValue(uint64_t Value) const;
+
+ uint8_t getSizeByAttrForm(dwarf::Form Form) const;
+
+ struct CmpStringEntryRef {
+ bool operator()(const StringEntry *LHS, const StringEntry *RHS) const {
+ return LHS->first() < RHS->first();
+ }
+ };
+ struct CmpDirIDStringEntryRef {
+ bool operator()(const std::pair<StringEntry *, uint64_t> &LHS,
+ const std::pair<StringEntry *, uint64_t> &RHS) const {
+ return LHS.second < RHS.second ||
+ (!(RHS.second < LHS.second) &&
+ LHS.first->first() < RHS.first->first());
+ }
+ };
+
+ /// The DW_AT_language of this unit.
+ std::optional<uint16_t> Language;
+
+ /// This unit line table.
+ DWARFDebugLine::LineTable LineTable;
+
+ /// Data members keeping file names for line table.
+ using DirectoriesMapTy = std::map<StringEntry *, size_t, CmpStringEntryRef>;
+ using FilenamesMapTy = std::map<std::pair<StringEntry *, uint64_t>, size_t,
+ CmpDirIDStringEntryRef>;
+
+ DirectoriesMapTy DirectoriesMap;
+ FilenamesMapTy FileNamesMap;
+
+ /// Type DIEs tree.
+ TypePool Types;
+
+ /// List of accelerator entries for this unit.
+ ArrayList<TypeUnitAccelInfo> AcceleratorRecords;
+
+ /// Guard for DebugStringIndexMap.
+ std::mutex DebugStringIndexMapMutex;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_DWARFLINKERPARALLEL_DWARFLINKERTYPEUNIT_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.cpp
new file mode 100644
index 000000000000..b1da1900d65e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.cpp
@@ -0,0 +1,250 @@
+//===- DWARFLinkerUnit.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFLinkerUnit.h"
+#include "DWARFEmitterImpl.h"
+#include "DebugLineSectionEmitter.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+void DwarfUnit::assignAbbrev(DIEAbbrev &Abbrev) {
+ // Check the set for priors.
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+ void *InsertToken;
+
+ DIEAbbrev *InSet = AbbreviationsSet.FindNodeOrInsertPos(ID, InsertToken);
+ // If it's newly added.
+ if (InSet) {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ } else {
+ // Add to abbreviation list.
+ Abbreviations.push_back(
+ std::make_unique<DIEAbbrev>(Abbrev.getTag(), Abbrev.hasChildren()));
+ for (const auto &Attr : Abbrev.getData())
+ Abbreviations.back()->AddAttribute(Attr);
+ AbbreviationsSet.InsertNode(Abbreviations.back().get(), InsertToken);
+ // Assign the unique abbreviation number.
+ Abbrev.setNumber(Abbreviations.size());
+ Abbreviations.back()->setNumber(Abbreviations.size());
+ }
+}
+
+Error DwarfUnit::emitAbbreviations() {
+ const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs = getAbbreviations();
+ if (Abbrevs.empty())
+ return Error::success();
+
+ SectionDescriptor &AbbrevSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugAbbrev);
+
+ // For each abbreviation.
+ for (const auto &Abbrev : Abbrevs)
+ emitDwarfAbbrevEntry(*Abbrev, AbbrevSection);
+
+ // Mark end of abbreviations.
+ encodeULEB128(0, AbbrevSection.OS);
+
+ return Error::success();
+}
+
+void DwarfUnit::emitDwarfAbbrevEntry(const DIEAbbrev &Abbrev,
+ SectionDescriptor &AbbrevSection) {
+ // Emit the abbreviations code (base 1 index.)
+ encodeULEB128(Abbrev.getNumber(), AbbrevSection.OS);
+
+ // Emit the abbreviations data.
+ // Emit its Dwarf tag type.
+ encodeULEB128(Abbrev.getTag(), AbbrevSection.OS);
+
+ // Emit whether it has children DIEs.
+ encodeULEB128((unsigned)Abbrev.hasChildren(), AbbrevSection.OS);
+
+ // For each attribute description.
+ const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData();
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ encodeULEB128(AttrData.getAttribute(), AbbrevSection.OS);
+
+ // Emit form type.
+ encodeULEB128(AttrData.getForm(), AbbrevSection.OS);
+
+ // Emit value for DW_FORM_implicit_const.
+ if (AttrData.getForm() == dwarf::DW_FORM_implicit_const)
+ encodeSLEB128(AttrData.getValue(), AbbrevSection.OS);
+ }
+
+ // Mark end of abbreviation.
+ encodeULEB128(0, AbbrevSection.OS);
+ encodeULEB128(0, AbbrevSection.OS);
+}
+
+Error DwarfUnit::emitDebugInfo(const Triple &TargetTriple) {
+ DIE *OutUnitDIE = getOutUnitDIE();
+ if (OutUnitDIE == nullptr)
+ return Error::success();
+
+ // FIXME: Remove dependence on DwarfEmitterImpl/AsmPrinter and emit DIEs
+ // directly.
+
+ SectionDescriptor &OutSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo);
+ DwarfEmitterImpl Emitter(DWARFLinker::OutputFileType::Object, OutSection.OS);
+ if (Error Err = Emitter.init(TargetTriple, "__DWARF"))
+ return Err;
+
+ // Emit compile unit header.
+ Emitter.emitCompileUnitHeader(*this);
+ size_t OffsetToAbbreviationTableOffset =
+ (getFormParams().Version >= 5) ? 8 : 6;
+ OutSection.notePatch(DebugOffsetPatch{
+ OffsetToAbbreviationTableOffset,
+ &getOrCreateSectionDescriptor(DebugSectionKind::DebugAbbrev)});
+
+ // Emit DIEs.
+ Emitter.emitDIE(*OutUnitDIE);
+ Emitter.finish();
+
+ // Set start offset ans size for .debug_info section.
+ OutSection.setSizesForSectionCreatedByAsmPrinter();
+ return Error::success();
+}
+
+Error DwarfUnit::emitDebugLine(const Triple &TargetTriple,
+ const DWARFDebugLine::LineTable &OutLineTable) {
+ DebugLineSectionEmitter DebugLineEmitter(TargetTriple, *this);
+
+ return DebugLineEmitter.emit(OutLineTable);
+}
+
+Error DwarfUnit::emitDebugStringOffsetSection() {
+ if (getVersion() < 5)
+ return Error::success();
+
+ if (DebugStringIndexMap.empty())
+ return Error::success();
+
+ SectionDescriptor &OutDebugStrOffsetsSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugStrOffsets);
+
+ // Emit section header.
+
+ // Emit length.
+ OutDebugStrOffsetsSection.emitUnitLength(0xBADDEF);
+ uint64_t OffsetAfterSectionLength = OutDebugStrOffsetsSection.OS.tell();
+
+ // Emit version.
+ OutDebugStrOffsetsSection.emitIntVal(5, 2);
+
+ // Emit padding.
+ OutDebugStrOffsetsSection.emitIntVal(0, 2);
+
+ // Emit index to offset map.
+ for (const StringEntry *String : DebugStringIndexMap.getValues()) {
+ // Note patch for string offset value.
+ OutDebugStrOffsetsSection.notePatch(
+ DebugStrPatch{{OutDebugStrOffsetsSection.OS.tell()}, String});
+
+ // Emit placeholder for offset value.
+ OutDebugStrOffsetsSection.emitOffset(0xBADDEF);
+ }
+
+ // Patch section length.
+ OutDebugStrOffsetsSection.apply(
+ OffsetAfterSectionLength -
+ OutDebugStrOffsetsSection.getFormParams().getDwarfOffsetByteSize(),
+ dwarf::DW_FORM_sec_offset,
+ OutDebugStrOffsetsSection.OS.tell() - OffsetAfterSectionLength);
+
+ return Error::success();
+}
+
+/// Emit the pubnames or pubtypes section contribution for \p
+/// Unit into \p Sec. The data is provided in \p Info.
+std::optional<uint64_t>
+DwarfUnit::emitPubAcceleratorEntry(SectionDescriptor &OutSection,
+ const DwarfUnit::AccelInfo &Info,
+ std::optional<uint64_t> LengthOffset) {
+ if (!LengthOffset) {
+ // Emit the header.
+ OutSection.emitIntVal(0xBADDEF,
+ getFormParams().getDwarfOffsetByteSize()); // Length
+ LengthOffset = OutSection.OS.tell();
+
+ OutSection.emitIntVal(dwarf::DW_PUBNAMES_VERSION, 2); // Version
+
+ OutSection.notePatch(DebugOffsetPatch{
+ OutSection.OS.tell(),
+ &getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo)});
+ OutSection.emitOffset(0xBADDEF); // Unit offset
+
+ OutSection.emitIntVal(getUnitSize(), 4); // Size
+ }
+ OutSection.emitOffset(Info.OutOffset);
+
+ // Emit the string itself.
+ OutSection.emitInplaceString(Info.String->first());
+
+ return LengthOffset;
+}
+
+/// Emit .debug_pubnames and .debug_pubtypes for \p Unit.
+void DwarfUnit::emitPubAccelerators() {
+ std::optional<uint64_t> NamesLengthOffset;
+ std::optional<uint64_t> TypesLengthOffset;
+
+ forEachAcceleratorRecord([&](const DwarfUnit::AccelInfo &Info) {
+ if (Info.AvoidForPubSections)
+ return;
+
+ switch (Info.Type) {
+ case DwarfUnit::AccelType::Name: {
+ NamesLengthOffset = emitPubAcceleratorEntry(
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugPubNames), Info,
+ NamesLengthOffset);
+ } break;
+ case DwarfUnit::AccelType::Type: {
+ TypesLengthOffset = emitPubAcceleratorEntry(
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugPubTypes), Info,
+ TypesLengthOffset);
+ } break;
+ default: {
+ // Nothing to do.
+ } break;
+ }
+ });
+
+ if (NamesLengthOffset) {
+ SectionDescriptor &OutSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugPubNames);
+ OutSection.emitIntVal(0, 4); // End marker.
+
+ OutSection.apply(*NamesLengthOffset -
+ OutSection.getFormParams().getDwarfOffsetByteSize(),
+ dwarf::DW_FORM_sec_offset,
+ OutSection.OS.tell() - *NamesLengthOffset);
+ }
+
+ if (TypesLengthOffset) {
+ SectionDescriptor &OutSection =
+ getOrCreateSectionDescriptor(DebugSectionKind::DebugPubTypes);
+ OutSection.emitIntVal(0, 4); // End marker.
+
+ OutSection.apply(*TypesLengthOffset -
+ OutSection.getFormParams().getDwarfOffsetByteSize(),
+ dwarf::DW_FORM_sec_offset,
+ OutSection.OS.tell() - *TypesLengthOffset);
+ }
+}
+
+} // end of namespace dwarflinker_parallel
+} // end of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h
index 78e8d82ea061..9640a8ee711e 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h
@@ -9,9 +9,11 @@
#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERUNIT_H
#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERUNIT_H
+#include "DWARFLinkerGlobalData.h"
+#include "IndexedValuesMap.h"
#include "OutputSections.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/DIE.h"
+#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
#include "llvm/DWARFLinkerParallel/StringPool.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/LEB128.h"
@@ -19,54 +21,21 @@
namespace llvm {
namespace dwarflinker_parallel {
-using UnitMessageHandlerTy = function_ref<void(
- const Twine &Error, StringRef Context, const DWARFDie *DIE)>;
-
-/// Each unit keeps output data as a file with debug tables
-/// corresponding to the concrete unit.
-using OutTablesFileTy = SmallString<0>;
+class DwarfUnit;
+using MacroOffset2UnitMapTy = DenseMap<uint64_t, DwarfUnit *>;
/// Base class for all Dwarf units(Compile unit/Type table unit).
class DwarfUnit : public OutputSections {
public:
virtual ~DwarfUnit() {}
- DwarfUnit(unsigned ID, StringRef ClangModuleName,
- UnitMessageHandlerTy WarningHandler)
- : ID(ID), ClangModuleName(ClangModuleName),
- WarningHandler(WarningHandler) {
- FormParams.Version = 4;
- FormParams.Format = dwarf::DWARF32;
- FormParams.AddrSize = 4;
- }
-
- /// Endiannes for the compile unit.
- support::endianness getEndianness() const { return Endianess; }
-
- /// Return DWARF version.
- uint16_t getVersion() const { return FormParams.Version; }
-
- /// Return size of header of debug_info table.
- uint16_t getHeaderSize() const { return FormParams.Version >= 5 ? 12 : 11; }
-
- /// Return size of address.
- uint8_t getAddressByteSize() const { return FormParams.AddrSize; }
-
- /// Return size of reference.
- uint8_t getRefAddrByteSize() const { return FormParams.getRefAddrByteSize(); }
-
- /// Return format of the Dwarf(DWARF32 or DWARF64).
- /// TODO: DWARF64 is not currently supported.
- dwarf::DwarfFormat getDwarfFormat() const { return FormParams.Format; }
+ DwarfUnit(LinkingGlobalData &GlobalData, unsigned ID,
+ StringRef ClangModuleName)
+ : OutputSections(GlobalData), ID(ID), ClangModuleName(ClangModuleName),
+ OutUnitDIE(nullptr) {}
/// Unique id of the unit.
unsigned getUniqueID() const { return ID; }
- /// Return language of this unit.
- uint16_t getLanguage() const { return Language; }
-
- /// Set size of this(newly generated) compile unit.
- void setUnitSize(uint64_t UnitSize) { this->UnitSize = UnitSize; }
-
/// Returns size of this(newly generated) compile unit.
uint64_t getUnitSize() const { return UnitSize; }
@@ -76,84 +45,128 @@ public:
/// Return the DW_AT_LLVM_sysroot of the compile unit or an empty StringRef.
StringRef getSysRoot() { return SysRoot; }
- /// Create a Die for this unit.
- void setOutputDIE(DIE *UnitDie) { NewUnit = UnitDie; }
-
- /// Return Die for this compile unit.
- DIE *getOutputUnitDIE() const { return NewUnit; }
-
/// Return true if this compile unit is from Clang module.
bool isClangModule() const { return !ClangModuleName.empty(); }
/// Return Clang module name;
const std::string &getClangModuleName() const { return ClangModuleName; }
- /// Returns generated file keeping debug tables for this compile unit.
- OutTablesFileTy &getOutDwarfBits() { return OutDebugInfoBits; }
+ /// Return global data.
+ LinkingGlobalData &getGlobalData() { return GlobalData; }
- /// Erases generated file keeping debug tables for this compile unit.
- void eraseDwarfBits() { OutDebugInfoBits = OutTablesFileTy(); }
+ /// Returns true if unit is inter-connected(it references/referenced by other
+ /// unit).
+ bool isInterconnectedCU() const { return IsInterconnectedCU; }
- MCSymbol *getLabelBegin() { return LabelBegin; }
- void setLabelBegin(MCSymbol *S) { LabelBegin = S; }
+ /// Mark this unit as inter-connected(it references/referenced by other unit).
+ void setInterconnectedCU() { IsInterconnectedCU = true; }
- /// Error reporting methods.
- /// @{
+ /// Adds \p Abbrev into unit`s abbreviation table.
+ void assignAbbrev(DIEAbbrev &Abbrev);
- void reportWarning(const Twine &Warning,
- const DWARFDie *Die = nullptr) const {
- if (WarningHandler)
- WarningHandler(Warning, getUnitName(), Die);
+ /// Returns abbreviations for this compile unit.
+ const std::vector<std::unique_ptr<DIEAbbrev>> &getAbbreviations() const {
+ return Abbreviations;
}
- void reportWarning(Error Warning) const {
- handleAllErrors(std::move(Warning), [&](ErrorInfoBase &Info) {
- if (WarningHandler)
- WarningHandler(Info.message(), getUnitName(), nullptr);
- });
+
+ /// Returns output unit DIE.
+ DIE *getOutUnitDIE() { return OutUnitDIE; }
+
+ /// Set output unit DIE.
+ void setOutUnitDIE(DIE *UnitDie) {
+ OutUnitDIE = UnitDie;
+
+ if (OutUnitDIE != nullptr)
+ UnitSize = getDebugInfoHeaderSize() + OutUnitDIE->getSize();
}
+
+ /// \defgroup Methods used to emit unit's debug info:
+ ///
+ /// @{
+ /// Emit unit's abbreviations.
+ Error emitAbbreviations();
+
+ /// Emit .debug_info section for unit DIEs.
+ Error emitDebugInfo(const Triple &TargetTriple);
+
+ /// Emit .debug_line section.
+ Error emitDebugLine(const Triple &TargetTriple,
+ const DWARFDebugLine::LineTable &OutLineTable);
+
+ /// Emit the .debug_str_offsets section for current unit.
+ Error emitDebugStringOffsetSection();
/// @}
+ /// \defgroup Methods used for reporting warnings and errors:
+ ///
+ /// @{
+ void warn(const Twine &Warning) { GlobalData.warn(Warning, getUnitName()); }
+
+ void error(const Twine &Err) { GlobalData.warn(Err, getUnitName()); }
+ /// @}
+
+ /// \defgroup Methods and data members used for building accelerator tables:
+ ///
+ /// @{
+
+ enum class AccelType : uint8_t { None, Name, Namespace, ObjC, Type };
+
/// This structure keeps fields which would be used for creating accelerator
/// table.
struct AccelInfo {
- AccelInfo(StringEntry *Name, const DIE *Die, bool SkipPubSection = false);
- AccelInfo(StringEntry *Name, const DIE *Die, uint32_t QualifiedNameHash,
- bool ObjCClassIsImplementation);
+ AccelInfo() {
+ AvoidForPubSections = false;
+ ObjcClassImplementation = false;
+ }
/// Name of the entry.
- StringEntry *Name = nullptr;
-
- /// Tag of the DIE this entry describes.
- dwarf::Tag Tag = dwarf::DW_TAG_null;
+ StringEntry *String = nullptr;
/// Output offset of the DIE this entry describes.
- uint64_t OutOffset = 0;
+ uint64_t OutOffset;
/// Hash of the fully qualified name.
uint32_t QualifiedNameHash = 0;
- /// Emit this entry only in the apple_* sections.
- bool SkipPubSection = false;
+ /// Tag of the DIE this entry describes.
+ dwarf::Tag Tag = dwarf::DW_TAG_null;
- /// Is this an ObjC class implementation?
- bool ObjcClassImplementation = false;
+ /// Type of this accelerator record.
+ AccelType Type = AccelType::None;
+
+ /// Avoid emitting this entry for pub sections.
+ bool AvoidForPubSections : 1;
- /// Cloned Die containing acceleration info.
- const DIE *Die = nullptr;
+ /// Is this an ObjC class implementation?
+ bool ObjcClassImplementation : 1;
};
-protected:
- /// Unique ID for the unit.
- unsigned ID = 0;
+ /// Emit .debug_pubnames and .debug_pubtypes for \p Unit.
+ void emitPubAccelerators();
- /// Properties of the unit.
- dwarf::FormParams FormParams;
+ /// Enumerates accelerator data.
+ virtual void
+ forEachAcceleratorRecord(function_ref<void(AccelInfo &)> Handler) = 0;
- /// DIE for newly generated compile unit.
- DIE *NewUnit = nullptr;
+ /// @}
- /// The DW_AT_language of this unit.
- uint16_t Language = 0;
+ /// Returns index(inside .debug_str_offsets) of specified string.
+ virtual uint64_t getDebugStrIndex(const StringEntry *String) {
+ return DebugStringIndexMap.getValueIndex(String);
+ }
+
+protected:
+ /// Emit single abbreviation entry.
+ void emitDwarfAbbrevEntry(const DIEAbbrev &Abbrev,
+ SectionDescriptor &AbbrevSection);
+
+ /// Emit single pubnames/pubtypes accelerator entry.
+ std::optional<uint64_t>
+ emitPubAcceleratorEntry(SectionDescriptor &OutSection, const AccelInfo &Info,
+ std::optional<uint64_t> LengthOffset);
+
+ /// Unique ID for the unit.
+ unsigned ID = 0;
/// The name of this unit.
std::string UnitName;
@@ -166,20 +179,42 @@ protected:
uint64_t UnitSize = 0;
- /// Elf file containg generated debug tables for this compile unit.
- OutTablesFileTy OutDebugInfoBits;
+ /// true if current unit references_to/is_referenced by other unit.
+ std::atomic<bool> IsInterconnectedCU = {false};
- /// Endiannes for this compile unit.
- support::endianness Endianess = support::endianness::little;
+ /// FoldingSet that uniques the abbreviations.
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
- MCSymbol *LabelBegin = nullptr;
+ /// Storage for the unique Abbreviations.
+ std::vector<std::unique_ptr<DIEAbbrev>> Abbreviations;
- /// true if current unit references_to/is_referenced by other unit.
- std::atomic<bool> IsInterconnectedCU = {false};
+ /// Output unit DIE.
+ DIE *OutUnitDIE = nullptr;
+
+ /// Cache for file names for this unit.
+ using FileNamesCache =
+ DenseMap<uint64_t, std::pair<std::string, std::string>>;
+ FileNamesCache FileNames;
- UnitMessageHandlerTy WarningHandler;
+ /// Maps a string into the index inside .debug_str_offsets section.
+ IndexedValuesMap<const StringEntry *> DebugStringIndexMap;
};
+inline bool isODRLanguage(uint16_t Language) {
+ switch (Language) {
+ case dwarf::DW_LANG_C_plus_plus:
+ case dwarf::DW_LANG_C_plus_plus_03:
+ case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ return true;
+ default:
+ return false;
+ };
+
+ return false;
+}
+
} // end of namespace dwarflinker_parallel
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DebugLineSectionEmitter.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DebugLineSectionEmitter.h
new file mode 100644
index 000000000000..fc7f8cbc4a8e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DebugLineSectionEmitter.h
@@ -0,0 +1,384 @@
+//===- DebugLineSectionEmitter.h --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DEBUGLINESECTIONEMITTER_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_DEBUGLINESECTIONEMITTER_H
+
+#include "DWARFEmitterImpl.h"
+#include "llvm/DWARFLinkerParallel/AddressesMap.h"
+#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// This class emits specified line table into the .debug_line section.
+class DebugLineSectionEmitter {
+public:
+ DebugLineSectionEmitter(const Triple &TheTriple, DwarfUnit &U)
+ : TheTriple(TheTriple), U(U) {}
+
+ Error emit(const DWARFDebugLine::LineTable &LineTable) {
+ // FIXME: remove dependence on MCDwarfLineAddr::encode.
+ // As we reuse MCDwarfLineAddr::encode, we need to create/initialize
+ // some MC* classes.
+ if (Error Err = init(TheTriple))
+ return Err;
+
+ // Get descriptor for output .debug_line section.
+ SectionDescriptor &OutSection =
+ U.getOrCreateSectionDescriptor(DebugSectionKind::DebugLine);
+
+ // unit_length.
+ OutSection.emitUnitLength(0xBADDEF);
+ uint64_t OffsetAfterUnitLength = OutSection.OS.tell();
+
+ // Emit prologue.
+ emitLineTablePrologue(LineTable.Prologue, OutSection);
+
+ // Emit rows.
+ emitLineTableRows(LineTable, OutSection);
+ uint64_t OffsetAfterEnd = OutSection.OS.tell();
+
+ // Update unit length field with actual length value.
+ assert(OffsetAfterUnitLength -
+ OutSection.getFormParams().getDwarfOffsetByteSize() <
+ OffsetAfterUnitLength);
+ OutSection.apply(OffsetAfterUnitLength -
+ OutSection.getFormParams().getDwarfOffsetByteSize(),
+ dwarf::DW_FORM_sec_offset,
+ OffsetAfterEnd - OffsetAfterUnitLength);
+
+ return Error::success();
+ }
+
+private:
+ Error init(Triple TheTriple) {
+ std::string ErrorStr;
+ std::string TripleName;
+
+ // Get the target.
+ const Target *TheTarget =
+ TargetRegistry::lookupTarget(TripleName, TheTriple, ErrorStr);
+ if (!TheTarget)
+ return createStringError(std::errc::invalid_argument, ErrorStr.c_str());
+ TripleName = TheTriple.getTriple();
+
+ // Create all the MC Objects.
+ MRI.reset(TheTarget->createMCRegInfo(TripleName));
+ if (!MRI)
+ return createStringError(std::errc::invalid_argument,
+ "no register info for target %s",
+ TripleName.c_str());
+
+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
+ MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
+ if (!MAI)
+ return createStringError(std::errc::invalid_argument,
+ "no asm info for target %s", TripleName.c_str());
+
+ MSTI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+ if (!MSTI)
+ return createStringError(std::errc::invalid_argument,
+ "no subtarget info for target %s",
+ TripleName.c_str());
+
+ MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr,
+ nullptr, true, "__DWARF"));
+
+ return Error::success();
+ }
+
+ void emitLineTablePrologue(const DWARFDebugLine::Prologue &P,
+ SectionDescriptor &Section) {
+ // version (uhalf).
+ Section.emitIntVal(P.getVersion(), 2);
+ if (P.getVersion() == 5) {
+ // address_size (ubyte).
+ Section.emitIntVal(P.getAddressSize(), 1);
+
+ // segment_selector_size (ubyte).
+ Section.emitIntVal(P.SegSelectorSize, 1);
+ }
+
+ // header_length.
+ Section.emitOffset(0xBADDEF);
+
+ uint64_t OffsetAfterPrologueLength = Section.OS.tell();
+ emitLineTableProloguePayload(P, Section);
+ uint64_t OffsetAfterPrologueEnd = Section.OS.tell();
+
+ // Update prologue length field with actual length value.
+ Section.apply(OffsetAfterPrologueLength -
+ Section.getFormParams().getDwarfOffsetByteSize(),
+ dwarf::DW_FORM_sec_offset,
+ OffsetAfterPrologueEnd - OffsetAfterPrologueLength);
+ }
+
+ void
+ emitLineTablePrologueV2IncludeAndFileTable(const DWARFDebugLine::Prologue &P,
+ SectionDescriptor &Section) {
+ // include_directories (sequence of path names).
+ for (const DWARFFormValue &Include : P.IncludeDirectories) {
+ std::optional<const char *> IncludeStr = dwarf::toString(Include);
+ if (!IncludeStr) {
+ U.warn("cann't read string from line table.");
+ return;
+ }
+
+ Section.emitString(Include.getForm(), *IncludeStr);
+ }
+ // The last entry is followed by a single null byte.
+ Section.emitIntVal(0, 1);
+
+ // file_names (sequence of file entries).
+ for (const DWARFDebugLine::FileNameEntry &File : P.FileNames) {
+ std::optional<const char *> FileNameStr = dwarf::toString(File.Name);
+ if (!FileNameStr) {
+ U.warn("cann't read string from line table.");
+ return;
+ }
+
+ // A null-terminated string containing the full or relative path name of a
+ // source file.
+ Section.emitString(File.Name.getForm(), *FileNameStr);
+
+ // An unsigned LEB128 number representing the directory index of a
+ // directory in the include_directories section.
+ encodeULEB128(File.DirIdx, Section.OS);
+ // An unsigned LEB128 number representing the (implementation-defined)
+ // time of last modification for the file, or 0 if not available.
+ encodeULEB128(File.ModTime, Section.OS);
+ // An unsigned LEB128 number representing the length in bytes of the file,
+ // or 0 if not available.
+ encodeULEB128(File.Length, Section.OS);
+ }
+ // The last entry is followed by a single null byte.
+ Section.emitIntVal(0, 1);
+ }
+
+ void
+ emitLineTablePrologueV5IncludeAndFileTable(const DWARFDebugLine::Prologue &P,
+ SectionDescriptor &Section) {
+ if (P.IncludeDirectories.empty()) {
+ // directory_entry_format_count(ubyte).
+ Section.emitIntVal(0, 1);
+ } else {
+ // directory_entry_format_count(ubyte).
+ Section.emitIntVal(1, 1);
+
+ // directory_entry_format (sequence of ULEB128 pairs).
+ encodeULEB128(dwarf::DW_LNCT_path, Section.OS);
+ encodeULEB128(P.IncludeDirectories[0].getForm(), Section.OS);
+ }
+
+ // directories_count (ULEB128).
+ encodeULEB128(P.IncludeDirectories.size(), Section.OS);
+ // directories (sequence of directory names).
+ for (auto Include : P.IncludeDirectories) {
+ std::optional<const char *> IncludeStr = dwarf::toString(Include);
+ if (!IncludeStr) {
+ U.warn("cann't read string from line table.");
+ return;
+ }
+
+ Section.emitString(Include.getForm(), *IncludeStr);
+ }
+
+ if (P.FileNames.empty()) {
+ // file_name_entry_format_count (ubyte).
+ Section.emitIntVal(0, 1);
+ } else {
+ // file_name_entry_format_count (ubyte).
+ Section.emitIntVal(2, 1);
+
+ // file_name_entry_format (sequence of ULEB128 pairs).
+ encodeULEB128(dwarf::DW_LNCT_path, Section.OS);
+ encodeULEB128(P.FileNames[0].Name.getForm(), Section.OS);
+
+ encodeULEB128(dwarf::DW_LNCT_directory_index, Section.OS);
+ encodeULEB128(dwarf::DW_FORM_data1, Section.OS);
+ }
+
+ // file_names_count (ULEB128).
+ encodeULEB128(P.FileNames.size(), Section.OS);
+
+ // file_names (sequence of file name entries).
+ for (auto File : P.FileNames) {
+ std::optional<const char *> FileNameStr = dwarf::toString(File.Name);
+ if (!FileNameStr) {
+ U.warn("cann't read string from line table.");
+ return;
+ }
+
+ // A null-terminated string containing the full or relative path name of a
+ // source file.
+ Section.emitString(File.Name.getForm(), *FileNameStr);
+ Section.emitIntVal(File.DirIdx, 1);
+ }
+ }
+
+ void emitLineTableProloguePayload(const DWARFDebugLine::Prologue &P,
+ SectionDescriptor &Section) {
+ // minimum_instruction_length (ubyte).
+ Section.emitIntVal(P.MinInstLength, 1);
+ if (P.FormParams.Version >= 4) {
+ // maximum_operations_per_instruction (ubyte).
+ Section.emitIntVal(P.MaxOpsPerInst, 1);
+ }
+ // default_is_stmt (ubyte).
+ Section.emitIntVal(P.DefaultIsStmt, 1);
+ // line_base (sbyte).
+ Section.emitIntVal(P.LineBase, 1);
+ // line_range (ubyte).
+ Section.emitIntVal(P.LineRange, 1);
+ // opcode_base (ubyte).
+ Section.emitIntVal(P.OpcodeBase, 1);
+
+ // standard_opcode_lengths (array of ubyte).
+ for (auto Length : P.StandardOpcodeLengths)
+ Section.emitIntVal(Length, 1);
+
+ if (P.FormParams.Version < 5)
+ emitLineTablePrologueV2IncludeAndFileTable(P, Section);
+ else
+ emitLineTablePrologueV5IncludeAndFileTable(P, Section);
+ }
+
+ void emitLineTableRows(const DWARFDebugLine::LineTable &LineTable,
+ SectionDescriptor &Section) {
+
+ MCDwarfLineTableParams Params;
+ Params.DWARF2LineOpcodeBase = LineTable.Prologue.OpcodeBase;
+ Params.DWARF2LineBase = LineTable.Prologue.LineBase;
+ Params.DWARF2LineRange = LineTable.Prologue.LineRange;
+
+ SmallString<128> EncodingBuffer;
+
+ if (LineTable.Rows.empty()) {
+ // We only have the dummy entry, dsymutil emits an entry with a 0
+ // address in that case.
+ MCDwarfLineAddr::encode(*MC, Params, std::numeric_limits<int64_t>::max(),
+ 0, EncodingBuffer);
+ Section.OS.write(EncodingBuffer.c_str(), EncodingBuffer.size());
+ return;
+ }
+
+ // Line table state machine fields
+ unsigned FileNum = 1;
+ unsigned LastLine = 1;
+ unsigned Column = 0;
+ unsigned IsStatement = 1;
+ unsigned Isa = 0;
+ uint64_t Address = -1ULL;
+
+ unsigned RowsSinceLastSequence = 0;
+
+ for (const DWARFDebugLine::Row &Row : LineTable.Rows) {
+ int64_t AddressDelta;
+ if (Address == -1ULL) {
+ Section.emitIntVal(dwarf::DW_LNS_extended_op, 1);
+ encodeULEB128(Section.getFormParams().AddrSize + 1, Section.OS);
+ Section.emitIntVal(dwarf::DW_LNE_set_address, 1);
+ Section.emitIntVal(Row.Address.Address,
+ Section.getFormParams().AddrSize);
+ AddressDelta = 0;
+ } else {
+ AddressDelta =
+ (Row.Address.Address - Address) / LineTable.Prologue.MinInstLength;
+ }
+
+ // FIXME: code copied and transformed from
+ // MCDwarf.cpp::EmitDwarfLineTable. We should find a way to share this
+ // code, but the current compatibility requirement with classic dsymutil
+ // makes it hard. Revisit that once this requirement is dropped.
+
+ if (FileNum != Row.File) {
+ FileNum = Row.File;
+ Section.emitIntVal(dwarf::DW_LNS_set_file, 1);
+ encodeULEB128(FileNum, Section.OS);
+ }
+ if (Column != Row.Column) {
+ Column = Row.Column;
+ Section.emitIntVal(dwarf::DW_LNS_set_column, 1);
+ encodeULEB128(Column, Section.OS);
+ }
+
+ // FIXME: We should handle the discriminator here, but dsymutil doesn't
+ // consider it, thus ignore it for now.
+
+ if (Isa != Row.Isa) {
+ Isa = Row.Isa;
+ Section.emitIntVal(dwarf::DW_LNS_set_isa, 1);
+ encodeULEB128(Isa, Section.OS);
+ }
+ if (IsStatement != Row.IsStmt) {
+ IsStatement = Row.IsStmt;
+ Section.emitIntVal(dwarf::DW_LNS_negate_stmt, 1);
+ }
+ if (Row.BasicBlock)
+ Section.emitIntVal(dwarf::DW_LNS_set_basic_block, 1);
+
+ if (Row.PrologueEnd)
+ Section.emitIntVal(dwarf::DW_LNS_set_prologue_end, 1);
+
+ if (Row.EpilogueBegin)
+ Section.emitIntVal(dwarf::DW_LNS_set_epilogue_begin, 1);
+
+ int64_t LineDelta = int64_t(Row.Line) - LastLine;
+ if (!Row.EndSequence) {
+ MCDwarfLineAddr::encode(*MC, Params, LineDelta, AddressDelta,
+ EncodingBuffer);
+ Section.OS.write(EncodingBuffer.c_str(), EncodingBuffer.size());
+ EncodingBuffer.resize(0);
+ Address = Row.Address.Address;
+ LastLine = Row.Line;
+ RowsSinceLastSequence++;
+ } else {
+ if (LineDelta) {
+ Section.emitIntVal(dwarf::DW_LNS_advance_line, 1);
+ encodeSLEB128(LineDelta, Section.OS);
+ }
+ if (AddressDelta) {
+ Section.emitIntVal(dwarf::DW_LNS_advance_pc, 1);
+ encodeULEB128(AddressDelta, Section.OS);
+ }
+ MCDwarfLineAddr::encode(*MC, Params,
+ std::numeric_limits<int64_t>::max(), 0,
+ EncodingBuffer);
+ Section.OS.write(EncodingBuffer.c_str(), EncodingBuffer.size());
+ EncodingBuffer.resize(0);
+ Address = -1ULL;
+ LastLine = FileNum = IsStatement = 1;
+ RowsSinceLastSequence = Column = Isa = 0;
+ }
+ }
+
+ if (RowsSinceLastSequence) {
+ MCDwarfLineAddr::encode(*MC, Params, std::numeric_limits<int64_t>::max(),
+ 0, EncodingBuffer);
+ Section.OS.write(EncodingBuffer.c_str(), EncodingBuffer.size());
+ EncodingBuffer.resize(0);
+ }
+ }
+
+ Triple TheTriple;
+ DwarfUnit &U;
+
+ std::unique_ptr<MCRegisterInfo> MRI;
+ std::unique_ptr<MCAsmInfo> MAI;
+ std::unique_ptr<MCContext> MC;
+ std::unique_ptr<MCSubtargetInfo> MSTI;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_DEBUGLINESECTIONEMITTER_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DependencyTracker.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DependencyTracker.cpp
new file mode 100644
index 000000000000..052eb6cf57d4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DependencyTracker.cpp
@@ -0,0 +1,839 @@
+//=== DependencyTracker.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DependencyTracker.h"
+#include "llvm/Support/FormatVariadic.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// A broken link in the keep chain. By recording both the parent and the child
+/// we can show only broken links for DIEs with multiple children.
+struct BrokenLink {
+ BrokenLink(DWARFDie Parent, DWARFDie Child, const char *Message)
+ : Parent(Parent), Child(Child), Message(Message) {}
+ DWARFDie Parent;
+ DWARFDie Child;
+ std::string Message;
+};
+
+/// Verify the keep chain by looking for DIEs that are kept but who's parent
+/// isn't.
+void DependencyTracker::verifyKeepChain() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ SmallVector<DWARFDie> Worklist;
+ Worklist.push_back(CU.getOrigUnit().getUnitDIE());
+
+ // List of broken links.
+ SmallVector<BrokenLink> BrokenLinks;
+
+ while (!Worklist.empty()) {
+ const DWARFDie Current = Worklist.back();
+ Worklist.pop_back();
+
+ if (!Current.isValid())
+ continue;
+
+ CompileUnit::DIEInfo &CurrentInfo =
+ CU.getDIEInfo(Current.getDebugInfoEntry());
+ const bool ParentPlainDieIsKept = CurrentInfo.needToKeepInPlainDwarf();
+ const bool ParentTypeDieIsKept = CurrentInfo.needToPlaceInTypeTable();
+
+ for (DWARFDie Child : reverse(Current.children())) {
+ Worklist.push_back(Child);
+
+ CompileUnit::DIEInfo &ChildInfo =
+ CU.getDIEInfo(Child.getDebugInfoEntry());
+ const bool ChildPlainDieIsKept = ChildInfo.needToKeepInPlainDwarf();
+ const bool ChildTypeDieIsKept = ChildInfo.needToPlaceInTypeTable();
+
+ if (!ParentPlainDieIsKept && ChildPlainDieIsKept)
+ BrokenLinks.emplace_back(Current, Child,
+ "Found invalid link in keep chain");
+
+ if (Child.getTag() == dwarf::DW_TAG_subprogram) {
+ if (!ChildInfo.getKeep() && isLiveSubprogramEntry(UnitEntryPairTy(
+ &CU, Child.getDebugInfoEntry()))) {
+ BrokenLinks.emplace_back(Current, Child,
+ "Live subprogram is not marked as kept");
+ }
+ }
+
+ if (!ChildInfo.getODRAvailable()) {
+ assert(!ChildTypeDieIsKept);
+ continue;
+ }
+
+ if (!ParentTypeDieIsKept && ChildTypeDieIsKept)
+ BrokenLinks.emplace_back(Current, Child,
+ "Found invalid link in keep chain");
+
+ if (CurrentInfo.getIsInAnonNamespaceScope() &&
+ ChildInfo.needToPlaceInTypeTable()) {
+ BrokenLinks.emplace_back(Current, Child,
+ "Found invalid placement marking for member "
+ "of anonymous namespace");
+ }
+ }
+ }
+
+ if (!BrokenLinks.empty()) {
+ for (BrokenLink Link : BrokenLinks) {
+ errs() << "\n=================================\n";
+ WithColor::error() << formatv("{0} between {1:x} and {2:x}", Link.Message,
+ Link.Parent.getOffset(),
+ Link.Child.getOffset());
+
+ errs() << "\nParent:";
+ Link.Parent.dump(errs(), 0, {});
+ errs() << "\n";
+ CU.getDIEInfo(Link.Parent).dump();
+
+ errs() << "\nChild:";
+ Link.Child.dump(errs(), 2, {});
+ errs() << "\n";
+ CU.getDIEInfo(Link.Child).dump();
+ }
+ report_fatal_error("invalid keep chain");
+ }
+#endif
+}
+
+bool DependencyTracker::resolveDependenciesAndMarkLiveness(
+ bool InterCUProcessingStarted, std::atomic<bool> &HasNewInterconnectedCUs) {
+ RootEntriesWorkList.clear();
+
+ // Search for live root DIEs.
+ CompileUnit::DIEInfo &CUInfo = CU.getDIEInfo(CU.getDebugInfoEntry(0));
+ CUInfo.setPlacement(CompileUnit::PlainDwarf);
+ collectRootsToKeep(UnitEntryPairTy{&CU, CU.getDebugInfoEntry(0)},
+ std::nullopt, false);
+
+ // Mark live DIEs as kept.
+ return markCollectedLiveRootsAsKept(InterCUProcessingStarted,
+ HasNewInterconnectedCUs);
+}
+
+void DependencyTracker::addActionToRootEntriesWorkList(
+ LiveRootWorklistActionTy Action, const UnitEntryPairTy &Entry,
+ std::optional<UnitEntryPairTy> ReferencedBy) {
+ if (ReferencedBy) {
+ RootEntriesWorkList.emplace_back(Action, Entry, *ReferencedBy);
+ return;
+ }
+
+ RootEntriesWorkList.emplace_back(Action, Entry);
+}
+
+void DependencyTracker::collectRootsToKeep(
+ const UnitEntryPairTy &Entry, std::optional<UnitEntryPairTy> ReferencedBy,
+ bool IsLiveParent) {
+ for (const DWARFDebugInfoEntry *CurChild =
+ Entry.CU->getFirstChildEntry(Entry.DieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = Entry.CU->getSiblingEntry(CurChild)) {
+ UnitEntryPairTy ChildEntry(Entry.CU, CurChild);
+ CompileUnit::DIEInfo &ChildInfo = Entry.CU->getDIEInfo(CurChild);
+
+ bool IsLiveChild = false;
+
+ switch (CurChild->getTag()) {
+ case dwarf::DW_TAG_label: {
+ IsLiveChild = isLiveSubprogramEntry(ChildEntry);
+
+ // Keep label referencing live address.
+ // Keep label which is child of live parent entry.
+ if (IsLiveChild || (IsLiveParent && ChildInfo.getHasAnAddress())) {
+ addActionToRootEntriesWorkList(
+ LiveRootWorklistActionTy::MarkLiveEntryRec, ChildEntry,
+ ReferencedBy);
+ }
+ } break;
+ case dwarf::DW_TAG_subprogram: {
+ IsLiveChild = isLiveSubprogramEntry(ChildEntry);
+
+ // Keep subprogram referencing live address.
+ if (IsLiveChild) {
+ // If subprogram is in module scope and this module allows ODR
+ // deduplication set "TypeTable" placement, otherwise set "" placement
+ LiveRootWorklistActionTy Action =
+ (ChildInfo.getIsInMouduleScope() && ChildInfo.getODRAvailable())
+ ? LiveRootWorklistActionTy::MarkTypeEntryRec
+ : LiveRootWorklistActionTy::MarkLiveEntryRec;
+
+ addActionToRootEntriesWorkList(Action, ChildEntry, ReferencedBy);
+ }
+ } break;
+ case dwarf::DW_TAG_constant:
+ case dwarf::DW_TAG_variable: {
+ IsLiveChild = isLiveVariableEntry(ChildEntry, IsLiveParent);
+
+ // Keep variable referencing live address.
+ if (IsLiveChild) {
+ // If variable is in module scope and this module allows ODR
+ // deduplication set "TypeTable" placement, otherwise set "" placement
+
+ LiveRootWorklistActionTy Action =
+ (ChildInfo.getIsInMouduleScope() && ChildInfo.getODRAvailable())
+ ? LiveRootWorklistActionTy::MarkTypeEntryRec
+ : LiveRootWorklistActionTy::MarkLiveEntryRec;
+
+ addActionToRootEntriesWorkList(Action, ChildEntry, ReferencedBy);
+ }
+ } break;
+ case dwarf::DW_TAG_base_type: {
+ // Always keep base types.
+ addActionToRootEntriesWorkList(
+ LiveRootWorklistActionTy::MarkSingleLiveEntry, ChildEntry,
+ ReferencedBy);
+ } break;
+ case dwarf::DW_TAG_imported_module:
+ case dwarf::DW_TAG_imported_declaration:
+ case dwarf::DW_TAG_imported_unit: {
+ // Always keep DIEs having DW_AT_import attribute.
+ if (Entry.DieEntry->getTag() == dwarf::DW_TAG_compile_unit) {
+ addActionToRootEntriesWorkList(
+ LiveRootWorklistActionTy::MarkSingleLiveEntry, ChildEntry,
+ ReferencedBy);
+ break;
+ }
+
+ addActionToRootEntriesWorkList(
+ LiveRootWorklistActionTy::MarkSingleTypeEntry, ChildEntry,
+ ReferencedBy);
+ } break;
+ case dwarf::DW_TAG_type_unit:
+ case dwarf::DW_TAG_partial_unit:
+ case dwarf::DW_TAG_compile_unit: {
+ llvm_unreachable("Called for incorrect DIE");
+ } break;
+ default:
+ // Nothing to do.
+ break;
+ }
+
+ collectRootsToKeep(ChildEntry, ReferencedBy, IsLiveChild || IsLiveParent);
+ }
+}
+
+bool DependencyTracker::markCollectedLiveRootsAsKept(
+ bool InterCUProcessingStarted, std::atomic<bool> &HasNewInterconnectedCUs) {
+ bool Res = true;
+
+ // Mark roots as kept.
+ while (!RootEntriesWorkList.empty()) {
+ LiveRootWorklistItemTy Root = RootEntriesWorkList.pop_back_val();
+
+ if (markDIEEntryAsKeptRec(Root.getAction(), Root.getRootEntry(),
+ Root.getRootEntry(), InterCUProcessingStarted,
+ HasNewInterconnectedCUs)) {
+ if (Root.hasReferencedByOtherEntry())
+ Dependencies.push_back(Root);
+ } else
+ Res = false;
+ }
+
+ return Res;
+}
+
+bool DependencyTracker::updateDependenciesCompleteness() {
+ bool HasNewDependency = false;
+ for (LiveRootWorklistItemTy &Root : Dependencies) {
+ assert(Root.hasReferencedByOtherEntry() &&
+ "Root entry without dependency inside the dependencies list");
+
+ UnitEntryPairTy RootEntry = Root.getRootEntry();
+ CompileUnit::DIEInfo &RootInfo =
+ RootEntry.CU->getDIEInfo(RootEntry.DieEntry);
+
+ UnitEntryPairTy ReferencedByEntry = Root.getReferencedByEntry();
+ CompileUnit::DIEInfo &ReferencedByInfo =
+ ReferencedByEntry.CU->getDIEInfo(ReferencedByEntry.DieEntry);
+
+ if (!RootInfo.needToPlaceInTypeTable() &&
+ ReferencedByInfo.needToPlaceInTypeTable()) {
+ HasNewDependency = true;
+ setPlainDwarfPlacementRec(ReferencedByEntry);
+
+ // FIXME: we probably need to update getKeepTypeChildren status for
+ // parents of *Root.ReferencedBy.
+ }
+ }
+
+ return HasNewDependency;
+}
+
+void DependencyTracker::setPlainDwarfPlacementRec(
+ const UnitEntryPairTy &Entry) {
+ CompileUnit::DIEInfo &Info = Entry.CU->getDIEInfo(Entry.DieEntry);
+ if (Info.getPlacement() == CompileUnit::PlainDwarf &&
+ !Info.getKeepTypeChildren())
+ return;
+
+ Info.setPlacement(CompileUnit::PlainDwarf);
+ Info.unsetKeepTypeChildren();
+ markParentsAsKeepingChildren(Entry);
+
+ for (const DWARFDebugInfoEntry *CurChild =
+ Entry.CU->getFirstChildEntry(Entry.DieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = Entry.CU->getSiblingEntry(CurChild))
+ setPlainDwarfPlacementRec(UnitEntryPairTy{Entry.CU, CurChild});
+}
+
+static bool isNamespaceLikeEntry(const DWARFDebugInfoEntry *Entry) {
+ switch (Entry->getTag()) {
+ case dwarf::DW_TAG_compile_unit:
+ case dwarf::DW_TAG_module:
+ case dwarf::DW_TAG_namespace:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+bool isAlreadyMarked(const CompileUnit::DIEInfo &Info,
+ CompileUnit::DieOutputPlacement NewPlacement) {
+ if (!Info.getKeep())
+ return false;
+
+ switch (NewPlacement) {
+ case CompileUnit::TypeTable:
+ return Info.needToPlaceInTypeTable();
+
+ case CompileUnit::PlainDwarf:
+ return Info.needToKeepInPlainDwarf();
+
+ case CompileUnit::Both:
+ return Info.needToPlaceInTypeTable() && Info.needToKeepInPlainDwarf();
+
+ case CompileUnit::NotSet:
+ llvm_unreachable("Unset placement type is specified.");
+ };
+
+ llvm_unreachable("Unknown CompileUnit::DieOutputPlacement enum");
+}
+
+bool isAlreadyMarked(const UnitEntryPairTy &Entry,
+ CompileUnit::DieOutputPlacement NewPlacement) {
+ return isAlreadyMarked(Entry.CU->getDIEInfo(Entry.DieEntry), NewPlacement);
+}
+
+void DependencyTracker::markParentsAsKeepingChildren(
+ const UnitEntryPairTy &Entry) {
+ if (Entry.DieEntry->getAbbreviationDeclarationPtr() == nullptr)
+ return;
+
+ CompileUnit::DIEInfo &Info = Entry.CU->getDIEInfo(Entry.DieEntry);
+ bool NeedKeepTypeChildren = Info.needToPlaceInTypeTable();
+ bool NeedKeepPlainChildren = Info.needToKeepInPlainDwarf();
+
+ bool AreTypeParentsDone = !NeedKeepTypeChildren;
+ bool ArePlainParentsDone = !NeedKeepPlainChildren;
+
+ // Mark parents as 'Keep*Children'.
+ std::optional<uint32_t> ParentIdx = Entry.DieEntry->getParentIdx();
+ while (ParentIdx) {
+ const DWARFDebugInfoEntry *ParentEntry =
+ Entry.CU->getDebugInfoEntry(*ParentIdx);
+ CompileUnit::DIEInfo &ParentInfo = Entry.CU->getDIEInfo(*ParentIdx);
+
+ if (!AreTypeParentsDone && NeedKeepTypeChildren) {
+ if (ParentInfo.getKeepTypeChildren())
+ AreTypeParentsDone = true;
+ else {
+ bool AddToWorklist = !isAlreadyMarked(
+ ParentInfo, CompileUnit::DieOutputPlacement::TypeTable);
+ ParentInfo.setKeepTypeChildren();
+ if (AddToWorklist && !isNamespaceLikeEntry(ParentEntry)) {
+ addActionToRootEntriesWorkList(
+ LiveRootWorklistActionTy::MarkTypeChildrenRec,
+ UnitEntryPairTy{Entry.CU, ParentEntry}, std::nullopt);
+ }
+ }
+ }
+
+ if (!ArePlainParentsDone && NeedKeepPlainChildren) {
+ if (ParentInfo.getKeepPlainChildren())
+ ArePlainParentsDone = true;
+ else {
+ bool AddToWorklist = !isAlreadyMarked(
+ ParentInfo, CompileUnit::DieOutputPlacement::PlainDwarf);
+ ParentInfo.setKeepPlainChildren();
+ if (AddToWorklist && !isNamespaceLikeEntry(ParentEntry)) {
+ addActionToRootEntriesWorkList(
+ LiveRootWorklistActionTy::MarkLiveChildrenRec,
+ UnitEntryPairTy{Entry.CU, ParentEntry}, std::nullopt);
+ }
+ }
+ }
+
+ if (AreTypeParentsDone && ArePlainParentsDone)
+ break;
+
+ ParentIdx = ParentEntry->getParentIdx();
+ }
+}
+
+// This function tries to set specified \p Placement for the \p Entry.
+// Depending on the concrete entry, the placement could be:
+// a) changed to another.
+// b) joined with current entry placement.
+// c) set as requested.
+static CompileUnit::DieOutputPlacement
+getFinalPlacementForEntry(const UnitEntryPairTy &Entry,
+ CompileUnit::DieOutputPlacement Placement) {
+ assert((Placement != CompileUnit::NotSet) && "Placement is not set");
+ CompileUnit::DIEInfo &EntryInfo = Entry.CU->getDIEInfo(Entry.DieEntry);
+
+ if (!EntryInfo.getODRAvailable())
+ return CompileUnit::PlainDwarf;
+
+ if (Entry.DieEntry->getTag() == dwarf::DW_TAG_variable) {
+ // Do not put variable into the "TypeTable" and "PlainDwarf" at the same
+ // time.
+ if (EntryInfo.getPlacement() == CompileUnit::PlainDwarf ||
+ EntryInfo.getPlacement() == CompileUnit::Both)
+ return CompileUnit::PlainDwarf;
+
+ if (Placement == CompileUnit::PlainDwarf || Placement == CompileUnit::Both)
+ return CompileUnit::PlainDwarf;
+ }
+
+ switch (EntryInfo.getPlacement()) {
+ case CompileUnit::NotSet:
+ return Placement;
+
+ case CompileUnit::TypeTable:
+ return Placement == CompileUnit::PlainDwarf ? CompileUnit::Both : Placement;
+
+ case CompileUnit::PlainDwarf:
+ return Placement == CompileUnit::TypeTable ? CompileUnit::Both : Placement;
+
+ case CompileUnit::Both:
+ return CompileUnit::Both;
+ };
+
+ llvm_unreachable("Unknown placement type.");
+ return Placement;
+}
+
+bool DependencyTracker::markDIEEntryAsKeptRec(
+ LiveRootWorklistActionTy Action, const UnitEntryPairTy &RootEntry,
+ const UnitEntryPairTy &Entry, bool InterCUProcessingStarted,
+ std::atomic<bool> &HasNewInterconnectedCUs) {
+ if (Entry.DieEntry->getAbbreviationDeclarationPtr() == nullptr)
+ return true;
+
+ CompileUnit::DIEInfo &Info = Entry.CU->getDIEInfo(Entry.DieEntry);
+
+ // Calculate final placement placement.
+ CompileUnit::DieOutputPlacement Placement = getFinalPlacementForEntry(
+ Entry,
+ isLiveAction(Action) ? CompileUnit::PlainDwarf : CompileUnit::TypeTable);
+ assert((Info.getODRAvailable() || isLiveAction(Action) ||
+ Placement == CompileUnit::PlainDwarf) &&
+ "Wrong kind of placement for ODR unavailable entry");
+
+ if (!isChildrenAction(Action))
+ if (isAlreadyMarked(Entry, Placement))
+ return true;
+
+ // Mark current DIE as kept.
+ Info.setKeep();
+ Info.setPlacement(Placement);
+
+ // Set keep children property for parents.
+ markParentsAsKeepingChildren(Entry);
+
+ UnitEntryPairTy FinalRootEntry =
+ Entry.DieEntry->getTag() == dwarf::DW_TAG_subprogram ? Entry : RootEntry;
+
+ // Analyse referenced DIEs.
+ bool Res = true;
+ if (!maybeAddReferencedRoots(Action, FinalRootEntry, Entry,
+ InterCUProcessingStarted,
+ HasNewInterconnectedCUs))
+ Res = false;
+
+ // Return if we do not need to process children.
+ if (isSingleAction(Action))
+ return Res;
+
+ // Process children.
+ // Check for subprograms special case.
+ if (Entry.DieEntry->getTag() == dwarf::DW_TAG_subprogram &&
+ Info.getODRAvailable()) {
+ // Subprograms is a special case. As it can be root for type DIEs
+ // and itself may be subject to move into the artificial type unit.
+ // a) Non removable children(like DW_TAG_formal_parameter) should always
+ // be cloned. They are placed into the "PlainDwarf" and into the
+ // "TypeTable".
+ // b) ODR deduplication candidates(type DIEs) children should not be put
+ // into the "PlainDwarf".
+ // c) Children keeping addresses and locations(like DW_TAG_call_site)
+ // should not be put into the "TypeTable".
+ for (const DWARFDebugInfoEntry *CurChild =
+ Entry.CU->getFirstChildEntry(Entry.DieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = Entry.CU->getSiblingEntry(CurChild)) {
+ CompileUnit::DIEInfo ChildInfo = Entry.CU->getDIEInfo(CurChild);
+
+ switch (CurChild->getTag()) {
+ case dwarf::DW_TAG_variable:
+ case dwarf::DW_TAG_constant:
+ case dwarf::DW_TAG_subprogram:
+ case dwarf::DW_TAG_label: {
+ if (ChildInfo.getHasAnAddress())
+ continue;
+ } break;
+
+ // Entries having following tags could not be removed from the subprogram.
+ case dwarf::DW_TAG_lexical_block:
+ case dwarf::DW_TAG_friend:
+ case dwarf::DW_TAG_inheritance:
+ case dwarf::DW_TAG_formal_parameter:
+ case dwarf::DW_TAG_unspecified_parameters:
+ case dwarf::DW_TAG_template_type_parameter:
+ case dwarf::DW_TAG_template_value_parameter:
+ case dwarf::DW_TAG_GNU_template_parameter_pack:
+ case dwarf::DW_TAG_GNU_formal_parameter_pack:
+ case dwarf::DW_TAG_GNU_template_template_param:
+ case dwarf::DW_TAG_thrown_type: {
+ // Go to the default child handling.
+ } break;
+
+ default: {
+ bool ChildIsTypeTableCandidate = isTypeTableCandidate(CurChild);
+
+ // Skip child marked to be copied into the artificial type unit.
+ if (isLiveAction(Action) && ChildIsTypeTableCandidate)
+ continue;
+
+ // Skip child marked to be copied into the plain unit.
+ if (isTypeAction(Action) && !ChildIsTypeTableCandidate)
+ continue;
+
+ // Go to the default child handling.
+ } break;
+ }
+
+ if (!markDIEEntryAsKeptRec(
+ Action, FinalRootEntry, UnitEntryPairTy{Entry.CU, CurChild},
+ InterCUProcessingStarted, HasNewInterconnectedCUs))
+ Res = false;
+ }
+
+ return Res;
+ }
+
+ // Recursively process children.
+ for (const DWARFDebugInfoEntry *CurChild =
+ Entry.CU->getFirstChildEntry(Entry.DieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = Entry.CU->getSiblingEntry(CurChild)) {
+ CompileUnit::DIEInfo ChildInfo = Entry.CU->getDIEInfo(CurChild);
+ switch (CurChild->getTag()) {
+ case dwarf::DW_TAG_variable:
+ case dwarf::DW_TAG_constant:
+ case dwarf::DW_TAG_subprogram:
+ case dwarf::DW_TAG_label: {
+ if (ChildInfo.getHasAnAddress())
+ continue;
+ } break;
+ default:
+ break; // Nothing to do.
+ };
+
+ if (!markDIEEntryAsKeptRec(
+ Action, FinalRootEntry, UnitEntryPairTy{Entry.CU, CurChild},
+ InterCUProcessingStarted, HasNewInterconnectedCUs))
+ Res = false;
+ }
+
+ return Res;
+}
+
+bool DependencyTracker::isTypeTableCandidate(
+ const DWARFDebugInfoEntry *DIEEntry) {
+ switch (DIEEntry->getTag()) {
+ default:
+ return false;
+
+ case dwarf::DW_TAG_imported_module:
+ case dwarf::DW_TAG_imported_declaration:
+ case dwarf::DW_TAG_imported_unit:
+ case dwarf::DW_TAG_array_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_string_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_subroutine_type:
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_variant:
+ case dwarf::DW_TAG_module:
+ case dwarf::DW_TAG_ptr_to_member_type:
+ case dwarf::DW_TAG_set_type:
+ case dwarf::DW_TAG_subrange_type:
+ case dwarf::DW_TAG_base_type:
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_enumerator:
+ case dwarf::DW_TAG_file_type:
+ case dwarf::DW_TAG_packed_type:
+ case dwarf::DW_TAG_thrown_type:
+ case dwarf::DW_TAG_volatile_type:
+ case dwarf::DW_TAG_dwarf_procedure:
+ case dwarf::DW_TAG_restrict_type:
+ case dwarf::DW_TAG_interface_type:
+ case dwarf::DW_TAG_namespace:
+ case dwarf::DW_TAG_unspecified_type:
+ case dwarf::DW_TAG_shared_type:
+ case dwarf::DW_TAG_rvalue_reference_type:
+ case dwarf::DW_TAG_coarray_type:
+ case dwarf::DW_TAG_dynamic_type:
+ case dwarf::DW_TAG_atomic_type:
+ case dwarf::DW_TAG_immutable_type:
+ case dwarf::DW_TAG_function_template:
+ case dwarf::DW_TAG_class_template:
+ return true;
+ }
+}
+
+bool DependencyTracker::maybeAddReferencedRoots(
+ LiveRootWorklistActionTy Action, const UnitEntryPairTy &RootEntry,
+ const UnitEntryPairTy &Entry, bool InterCUProcessingStarted,
+ std::atomic<bool> &HasNewInterconnectedCUs) {
+ const auto *Abbrev = Entry.DieEntry->getAbbreviationDeclarationPtr();
+ if (Abbrev == nullptr)
+ return true;
+
+ DWARFUnit &Unit = Entry.CU->getOrigUnit();
+ DWARFDataExtractor Data = Unit.getDebugInfoExtractor();
+ uint64_t Offset =
+ Entry.DieEntry->getOffset() + getULEB128Size(Abbrev->getCode());
+
+ // For each DIE attribute...
+ for (const auto &AttrSpec : Abbrev->attributes()) {
+ DWARFFormValue Val(AttrSpec.Form);
+ if (!Val.isFormClass(DWARFFormValue::FC_Reference) ||
+ AttrSpec.Attr == dwarf::DW_AT_sibling) {
+ DWARFFormValue::skipValue(AttrSpec.Form, Data, &Offset,
+ Unit.getFormParams());
+ continue;
+ }
+ Val.extractValue(Data, &Offset, Unit.getFormParams(), &Unit);
+
+ // Resolve reference.
+ std::optional<UnitEntryPairTy> RefDie = Entry.CU->resolveDIEReference(
+ Val, InterCUProcessingStarted
+ ? ResolveInterCUReferencesMode::Resolve
+ : ResolveInterCUReferencesMode::AvoidResolving);
+ if (!RefDie) {
+ Entry.CU->warn("cann't find referenced DIE", Entry.DieEntry);
+ continue;
+ }
+
+ if (!RefDie->DieEntry) {
+ // Delay resolving reference.
+ RefDie->CU->setInterconnectedCU();
+ Entry.CU->setInterconnectedCU();
+ HasNewInterconnectedCUs = true;
+ return false;
+ }
+
+ assert((Entry.CU->getUniqueID() == RefDie->CU->getUniqueID() ||
+ InterCUProcessingStarted) &&
+ "Inter-CU reference while inter-CU processing is not started");
+
+ CompileUnit::DIEInfo &RefInfo = RefDie->CU->getDIEInfo(RefDie->DieEntry);
+ if (!RefInfo.getODRAvailable())
+ Action = LiveRootWorklistActionTy::MarkLiveEntryRec;
+ else if (RefInfo.getODRAvailable() &&
+ llvm::is_contained(getODRAttributes(), AttrSpec.Attr))
+ // Note: getODRAttributes does not include DW_AT_containing_type.
+ // It should be OK as we do getRootForSpecifiedEntry(). So any containing
+ // type would be found as the root for the entry.
+ Action = LiveRootWorklistActionTy::MarkTypeEntryRec;
+ else if (isLiveAction(Action))
+ Action = LiveRootWorklistActionTy::MarkLiveEntryRec;
+ else
+ Action = LiveRootWorklistActionTy::MarkTypeEntryRec;
+
+ if (AttrSpec.Attr == dwarf::DW_AT_import) {
+ if (isNamespaceLikeEntry(RefDie->DieEntry)) {
+ addActionToRootEntriesWorkList(
+ isTypeAction(Action)
+ ? LiveRootWorklistActionTy::MarkSingleTypeEntry
+ : LiveRootWorklistActionTy::MarkSingleLiveEntry,
+ *RefDie, RootEntry);
+ continue;
+ }
+
+ addActionToRootEntriesWorkList(Action, *RefDie, RootEntry);
+ continue;
+ }
+
+ UnitEntryPairTy RootForReferencedDie = getRootForSpecifiedEntry(*RefDie);
+ addActionToRootEntriesWorkList(Action, RootForReferencedDie, RootEntry);
+ }
+
+ return true;
+}
+
+UnitEntryPairTy
+DependencyTracker::getRootForSpecifiedEntry(UnitEntryPairTy Entry) {
+ UnitEntryPairTy Result = Entry;
+
+ do {
+ switch (Entry.DieEntry->getTag()) {
+ case dwarf::DW_TAG_subprogram:
+ case dwarf::DW_TAG_label:
+ case dwarf::DW_TAG_variable:
+ case dwarf::DW_TAG_constant: {
+ return Result;
+ } break;
+
+ default: {
+ // Nothing to do.
+ }
+ }
+
+ std::optional<uint32_t> ParentIdx = Result.DieEntry->getParentIdx();
+ if (!ParentIdx)
+ return Result;
+
+ const DWARFDebugInfoEntry *ParentEntry =
+ Result.CU->getDebugInfoEntry(*ParentIdx);
+ if (isNamespaceLikeEntry(ParentEntry))
+ break;
+ Result.DieEntry = ParentEntry;
+ } while (true);
+
+ return Result;
+}
+
+bool DependencyTracker::isLiveVariableEntry(const UnitEntryPairTy &Entry,
+ bool IsLiveParent) {
+ DWARFDie DIE = Entry.CU->getDIE(Entry.DieEntry);
+ CompileUnit::DIEInfo &Info = Entry.CU->getDIEInfo(DIE);
+
+ if (Info.getTrackLiveness()) {
+ const auto *Abbrev = DIE.getAbbreviationDeclarationPtr();
+
+ if (!Info.getIsInFunctionScope() &&
+ Abbrev->findAttributeIndex(dwarf::DW_AT_const_value)) {
+ // Global variables with constant value can always be kept.
+ } else {
+ // See if there is a relocation to a valid debug map entry inside this
+ // variable's location. The order is important here. We want to always
+ // check if the variable has a location expression address. However, we
+ // don't want a static variable in a function to force us to keep the
+ // enclosing function, unless requested explicitly.
+ std::pair<bool, std::optional<int64_t>> LocExprAddrAndRelocAdjustment =
+ Entry.CU->getContaingFile().Addresses->getVariableRelocAdjustment(
+ DIE);
+
+ if (LocExprAddrAndRelocAdjustment.first)
+ Info.setHasAnAddress();
+
+ if (!LocExprAddrAndRelocAdjustment.second)
+ return false;
+
+ if (!IsLiveParent && Info.getIsInFunctionScope() &&
+ !Entry.CU->getGlobalData().getOptions().KeepFunctionForStatic)
+ return false;
+ }
+ }
+ Info.setHasAnAddress();
+
+ if (Entry.CU->getGlobalData().getOptions().Verbose) {
+ outs() << "Keeping variable DIE:";
+ DIDumpOptions DumpOpts;
+ DumpOpts.ChildRecurseDepth = 0;
+ DumpOpts.Verbose = Entry.CU->getGlobalData().getOptions().Verbose;
+ DIE.dump(outs(), 8 /* Indent */, DumpOpts);
+ }
+
+ return true;
+}
+
+bool DependencyTracker::isLiveSubprogramEntry(const UnitEntryPairTy &Entry) {
+ DWARFDie DIE = Entry.CU->getDIE(Entry.DieEntry);
+ CompileUnit::DIEInfo &Info = Entry.CU->getDIEInfo(Entry.DieEntry);
+ std::optional<DWARFFormValue> LowPCVal = DIE.find(dwarf::DW_AT_low_pc);
+
+ std::optional<uint64_t> LowPc;
+ std::optional<uint64_t> HighPc;
+ std::optional<int64_t> RelocAdjustment;
+ if (Info.getTrackLiveness()) {
+ LowPc = dwarf::toAddress(LowPCVal);
+ if (!LowPc)
+ return false;
+
+ Info.setHasAnAddress();
+
+ RelocAdjustment =
+ Entry.CU->getContaingFile().Addresses->getSubprogramRelocAdjustment(
+ DIE);
+ if (!RelocAdjustment)
+ return false;
+
+ if (DIE.getTag() == dwarf::DW_TAG_subprogram) {
+ // Validate subprogram address range.
+
+ HighPc = DIE.getHighPC(*LowPc);
+ if (!HighPc) {
+ Entry.CU->warn("function without high_pc. Range will be discarded.",
+ &DIE);
+ return false;
+ }
+
+ if (*LowPc > *HighPc) {
+ Entry.CU->warn("low_pc greater than high_pc. Range will be discarded.",
+ &DIE);
+ return false;
+ }
+ } else if (DIE.getTag() == dwarf::DW_TAG_label) {
+ if (Entry.CU->hasLabelAt(*LowPc))
+ return false;
+
+ // FIXME: dsymutil-classic compat. dsymutil-classic doesn't consider
+ // labels that don't fall into the CU's aranges. This is wrong IMO. Debug
+ // info generation bugs aside, this is really wrong in the case of labels,
+ // where a label marking the end of a function will have a PC == CU's
+ // high_pc.
+ if (dwarf::toAddress(Entry.CU->find(Entry.DieEntry, dwarf::DW_AT_high_pc))
+ .value_or(UINT64_MAX) <= LowPc)
+ return false;
+
+ Entry.CU->addLabelLowPc(*LowPc, *RelocAdjustment);
+ }
+ } else
+ Info.setHasAnAddress();
+
+ if (Entry.CU->getGlobalData().getOptions().Verbose) {
+ outs() << "Keeping subprogram DIE:";
+ DIDumpOptions DumpOpts;
+ DumpOpts.ChildRecurseDepth = 0;
+ DumpOpts.Verbose = Entry.CU->getGlobalData().getOptions().Verbose;
+ DIE.dump(outs(), 8 /* Indent */, DumpOpts);
+ }
+
+ if (!Info.getTrackLiveness() || DIE.getTag() == dwarf::DW_TAG_label)
+ return true;
+
+ Entry.CU->addFunctionRange(*LowPc, *HighPc, *RelocAdjustment);
+ return true;
+}
+
+} // end of namespace dwarflinker_parallel
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DependencyTracker.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DependencyTracker.h
new file mode 100644
index 000000000000..b0b6ad3a1e8c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/DependencyTracker.h
@@ -0,0 +1,272 @@
+//===- "DependencyTracker.h" ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DEPENDENCYTRACKER_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_DEPENDENCYTRACKER_H
+
+#include "DWARFLinkerCompileUnit.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+class DWARFDebugInfoEntry;
+class DWARFDie;
+
+namespace dwarflinker_parallel {
+
+/// This class discovers DIEs dependencies: marks "live" DIEs, marks DIE
+/// locations (whether DIE should be cloned as regular DIE or it should be put
+/// into the artificial type unit).
+class DependencyTracker {
+public:
+ DependencyTracker(CompileUnit &CU) : CU(CU) {}
+
+ /// Recursively walk the \p DIE tree and look for DIEs to keep. Store that
+ /// information in \p CU's DIEInfo.
+ ///
+ /// This function is the entry point of the DIE selection algorithm. It is
+ /// expected to walk the DIE tree and(through the mediation of
+ /// Context.File.Addresses) ask for relocation adjustment value on each
+ /// DIE that might be a 'root DIE'(f.e. subprograms, variables).
+ ///
+ /// Returns true if all dependencies are correctly discovered. Inter-CU
+ /// dependencies cannot be discovered if referenced CU is not analyzed yet.
+ /// If that is the case this method returns false.
+ bool resolveDependenciesAndMarkLiveness(
+ bool InterCUProcessingStarted,
+ std::atomic<bool> &HasNewInterconnectedCUs);
+
+ /// Check if dependencies have incompatible placement.
+ /// If that is the case modify placement to be compatible.
+ /// \returns true if any placement was updated, otherwise returns false.
+ /// This method should be called as a followup processing after
+ /// resolveDependenciesAndMarkLiveness().
+ bool updateDependenciesCompleteness();
+
+ /// Recursively walk the \p DIE tree and check "keepness" and "placement"
+ /// information. It is an error if parent node does not have "keep" flag,
+ /// while child has one. It is an error if parent node has "TypeTable"
+ /// placement while child has "PlainDwarf" placement. This function dump error
+ /// at stderr in that case.
+ void verifyKeepChain();
+
+protected:
+ enum class LiveRootWorklistActionTy : uint8_t {
+ /// Mark current item as live entry.
+ MarkSingleLiveEntry = 0,
+
+ /// Mark current item as type entry.
+ MarkSingleTypeEntry,
+
+ /// Mark current item and all its children as live entry.
+ MarkLiveEntryRec,
+
+ /// Mark current item and all its children as type entry.
+ MarkTypeEntryRec,
+
+ /// Mark all children of current item as live entry.
+ MarkLiveChildrenRec,
+
+ /// Mark all children of current item as type entry.
+ MarkTypeChildrenRec,
+ };
+
+ /// \returns true if the specified action is for the "PlainDwarf".
+ bool isLiveAction(LiveRootWorklistActionTy Action) {
+ switch (Action) {
+ default:
+ return false;
+
+ case LiveRootWorklistActionTy::MarkSingleLiveEntry:
+ case LiveRootWorklistActionTy::MarkLiveEntryRec:
+ case LiveRootWorklistActionTy::MarkLiveChildrenRec:
+ return true;
+ }
+ }
+
+ /// \returns true if the specified action is for the "TypeTable".
+ bool isTypeAction(LiveRootWorklistActionTy Action) {
+ switch (Action) {
+ default:
+ return false;
+
+ case LiveRootWorklistActionTy::MarkSingleTypeEntry:
+ case LiveRootWorklistActionTy::MarkTypeEntryRec:
+ case LiveRootWorklistActionTy::MarkTypeChildrenRec:
+ return true;
+ }
+ }
+
+ /// \returns true if the specified action affects only Root entry
+ /// itself and does not affect it`s children.
+ bool isSingleAction(LiveRootWorklistActionTy Action) {
+ switch (Action) {
+ default:
+ return false;
+
+ case LiveRootWorklistActionTy::MarkSingleLiveEntry:
+ case LiveRootWorklistActionTy::MarkSingleTypeEntry:
+ return true;
+ }
+ }
+
+ /// \returns true if the specified action affects only Root entry
+ /// itself and does not affect it`s children.
+ bool isChildrenAction(LiveRootWorklistActionTy Action) {
+ switch (Action) {
+ default:
+ return false;
+
+ case LiveRootWorklistActionTy::MarkLiveChildrenRec:
+ case LiveRootWorklistActionTy::MarkTypeChildrenRec:
+ return true;
+ }
+ }
+
+ /// Class keeping live worklist item data.
+ class LiveRootWorklistItemTy {
+ public:
+ LiveRootWorklistItemTy() = default;
+ LiveRootWorklistItemTy(const LiveRootWorklistItemTy &) = default;
+ LiveRootWorklistItemTy(LiveRootWorklistActionTy Action,
+ UnitEntryPairTy RootEntry) {
+ RootCU.setInt(Action);
+ RootCU.setPointer(RootEntry.CU);
+
+ RootDieEntry = RootEntry.DieEntry;
+ }
+ LiveRootWorklistItemTy(LiveRootWorklistActionTy Action,
+ UnitEntryPairTy RootEntry,
+ UnitEntryPairTy ReferencedBy) {
+ RootCU.setPointer(RootEntry.CU);
+ RootCU.setInt(Action);
+ RootDieEntry = RootEntry.DieEntry;
+
+ ReferencedByCU = ReferencedBy.CU;
+ ReferencedByDieEntry = ReferencedBy.DieEntry;
+ }
+
+ UnitEntryPairTy getRootEntry() const {
+ return UnitEntryPairTy{RootCU.getPointer(), RootDieEntry};
+ }
+
+ CompileUnit::DieOutputPlacement getPlacement() const {
+ return static_cast<CompileUnit::DieOutputPlacement>(RootCU.getInt());
+ }
+
+ bool hasReferencedByOtherEntry() const { return ReferencedByCU != nullptr; }
+
+ UnitEntryPairTy getReferencedByEntry() const {
+ assert(ReferencedByCU);
+ assert(ReferencedByDieEntry);
+ return UnitEntryPairTy{ReferencedByCU, ReferencedByDieEntry};
+ }
+
+ LiveRootWorklistActionTy getAction() const {
+ return static_cast<LiveRootWorklistActionTy>(RootCU.getInt());
+ }
+
+ protected:
+ /// Root entry.
+ /// ASSUMPTION: 3 bits are used to store LiveRootWorklistActionTy value.
+ /// Thus LiveRootWorklistActionTy should have no more eight elements.
+
+ /// Pointer traits for CompileUnit.
+ struct CompileUnitPointerTraits {
+ static inline void *getAsVoidPointer(CompileUnit *P) { return P; }
+ static inline CompileUnit *getFromVoidPointer(void *P) {
+ return (CompileUnit *)P;
+ }
+ static constexpr int NumLowBitsAvailable = 3;
+ static_assert(
+ alignof(CompileUnit) >= (1 << NumLowBitsAvailable),
+ "CompileUnit insufficiently aligned to have enough low bits.");
+ };
+
+ PointerIntPair<CompileUnit *, 3, LiveRootWorklistActionTy,
+ CompileUnitPointerTraits>
+ RootCU;
+ const DWARFDebugInfoEntry *RootDieEntry = nullptr;
+
+ /// Another root entry which references this RootDieEntry.
+ /// ReferencedByDieEntry is kept to update placement.
+ /// if RootDieEntry has placement incompatible with placement
+ /// of ReferencedByDieEntry then it should be updated.
+ CompileUnit *ReferencedByCU = nullptr;
+ const DWARFDebugInfoEntry *ReferencedByDieEntry = nullptr;
+ };
+
+ using RootEntriesListTy = SmallVector<LiveRootWorklistItemTy>;
+
+ /// This function navigates DIEs tree starting from specified \p Entry.
+ /// It puts found 'root DIE' into the worklist. The \p CollectLiveEntries
+ /// instructs to collect either live roots(like subprograms having live
+ /// DW_AT_low_pc) or otherwise roots which is not live(they need to be
+ /// collected if they are imported f.e. by DW_TAG_imported_module).
+ void collectRootsToKeep(const UnitEntryPairTy &Entry,
+ std::optional<UnitEntryPairTy> ReferencedBy,
+ bool IsLiveParent);
+
+ /// Returns true if specified variable references live code section.
+ static bool isLiveVariableEntry(const UnitEntryPairTy &Entry,
+ bool IsLiveParent);
+
+ /// Returns true if specified subprogram references live code section.
+ static bool isLiveSubprogramEntry(const UnitEntryPairTy &Entry);
+
+ /// Examine worklist and mark all 'root DIE's as kept and set "Placement"
+ /// property.
+ bool markCollectedLiveRootsAsKept(bool InterCUProcessingStarted,
+ std::atomic<bool> &HasNewInterconnectedCUs);
+
+ /// Mark whole DIE tree as kept recursively.
+ bool markDIEEntryAsKeptRec(LiveRootWorklistActionTy Action,
+ const UnitEntryPairTy &RootEntry,
+ const UnitEntryPairTy &Entry,
+ bool InterCUProcessingStarted,
+ std::atomic<bool> &HasNewInterconnectedCUs);
+
+ /// Mark parents as keeping children.
+ void markParentsAsKeepingChildren(const UnitEntryPairTy &Entry);
+
+ /// Mark whole DIE tree as placed in "PlainDwarf".
+ void setPlainDwarfPlacementRec(const UnitEntryPairTy &Entry);
+
+ /// Check referenced DIEs and add them into the worklist.
+ bool maybeAddReferencedRoots(LiveRootWorklistActionTy Action,
+ const UnitEntryPairTy &RootEntry,
+ const UnitEntryPairTy &Entry,
+ bool InterCUProcessingStarted,
+ std::atomic<bool> &HasNewInterconnectedCUs);
+
+ /// \returns true if \p DIEEntry can possibly be put into the artificial type
+ /// unit.
+ bool isTypeTableCandidate(const DWARFDebugInfoEntry *DIEEntry);
+
+ /// \returns root for the specified \p Entry.
+ UnitEntryPairTy getRootForSpecifiedEntry(UnitEntryPairTy Entry);
+
+ /// Add action item to the work list.
+ void
+ addActionToRootEntriesWorkList(LiveRootWorklistActionTy Action,
+ const UnitEntryPairTy &Entry,
+ std::optional<UnitEntryPairTy> ReferencedBy);
+
+ CompileUnit &CU;
+
+ /// List of entries which are 'root DIE's.
+ RootEntriesListTy RootEntriesWorkList;
+
+ /// List of entries dependencies.
+ RootEntriesListTy Dependencies;
+};
+
+} // end namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_DEPENDENCYTRACKER_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/IndexedValuesMap.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/IndexedValuesMap.h
new file mode 100644
index 000000000000..0dc8de860a42
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/IndexedValuesMap.h
@@ -0,0 +1,49 @@
+//===- IndexedValuesMap.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_INDEXEDVALUESMAP_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_INDEXEDVALUESMAP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cstdint>
+#include <utility>
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+template <typename T> class IndexedValuesMap {
+public:
+ uint64_t getValueIndex(T Value) {
+ typename ValueToIndexMapTy::iterator It = ValueToIndexMap.find(Value);
+ if (It == ValueToIndexMap.end()) {
+ It = ValueToIndexMap.insert(std::make_pair(Value, Values.size())).first;
+ Values.push_back(Value);
+ }
+ return It->second;
+ }
+
+ const SmallVector<T> &getValues() { return Values; }
+
+ void clear() {
+ ValueToIndexMap.clear();
+ Values.clear();
+ }
+
+ bool empty() { return Values.empty(); }
+
+protected:
+ using ValueToIndexMapTy = DenseMap<T, uint64_t>;
+ ValueToIndexMapTy ValueToIndexMap;
+ SmallVector<T> Values;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_INDEXEDVALUESMAP_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.cpp
index 69c5bfaa7bdf..9c3e3ebd220a 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.cpp
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.cpp
@@ -7,30 +7,526 @@
//===----------------------------------------------------------------------===//
#include "OutputSections.h"
+#include "DWARFLinkerCompileUnit.h"
+#include "DWARFLinkerTypeUnit.h"
#include "llvm/ADT/StringSwitch.h"
namespace llvm {
namespace dwarflinker_parallel {
-std::optional<OutputSections::DebugSectionKind>
-OutputSections::parseDebugSectionName(llvm::StringRef SecName) {
- return llvm::StringSwitch<std::optional<OutputSections::DebugSectionKind>>(
- SecName)
- .Case("debug_info", DebugSectionKind::DebugInfo)
- .Case("debug_line", DebugSectionKind::DebugLine)
- .Case("debug_frame", DebugSectionKind::DebugFrame)
- .Case("debug_ranges", DebugSectionKind::DebugRange)
- .Case("debug_rnglists", DebugSectionKind::DebugRngLists)
- .Case("debug_loc", DebugSectionKind::DebugLoc)
- .Case("debug_loclists", DebugSectionKind::DebugLocLists)
- .Case("debug_aranges", DebugSectionKind::DebugARanges)
- .Case("debug_abbrev", DebugSectionKind::DebugAbbrev)
- .Case("debug_macinfo", DebugSectionKind::DebugMacinfo)
- .Case("debug_macro", DebugSectionKind::DebugMacro)
+static constexpr StringLiteral SectionNames[SectionKindsNum] = {
+ "debug_info", "debug_line", "debug_frame", "debug_ranges",
+ "debug_rnglists", "debug_loc", "debug_loclists", "debug_aranges",
+ "debug_abbrev", "debug_macinfo", "debug_macro", "debug_addr",
+ "debug_str", "debug_line_str", "debug_str_offsets", "debug_pubnames",
+ "debug_pubtypes", "debug_names", "apple_names", "apple_namespac",
+ "apple_objc", "apple_types"};
+
+const StringLiteral &getSectionName(DebugSectionKind SectionKind) {
+ return SectionNames[static_cast<uint8_t>(SectionKind)];
+}
+
+std::optional<DebugSectionKind> parseDebugTableName(llvm::StringRef SecName) {
+ return llvm::StringSwitch<std::optional<DebugSectionKind>>(
+ SecName.substr(SecName.find_first_not_of("._")))
+ .Case(getSectionName(DebugSectionKind::DebugInfo),
+ DebugSectionKind::DebugInfo)
+ .Case(getSectionName(DebugSectionKind::DebugLine),
+ DebugSectionKind::DebugLine)
+ .Case(getSectionName(DebugSectionKind::DebugFrame),
+ DebugSectionKind::DebugFrame)
+ .Case(getSectionName(DebugSectionKind::DebugRange),
+ DebugSectionKind::DebugRange)
+ .Case(getSectionName(DebugSectionKind::DebugRngLists),
+ DebugSectionKind::DebugRngLists)
+ .Case(getSectionName(DebugSectionKind::DebugLoc),
+ DebugSectionKind::DebugLoc)
+ .Case(getSectionName(DebugSectionKind::DebugLocLists),
+ DebugSectionKind::DebugLocLists)
+ .Case(getSectionName(DebugSectionKind::DebugARanges),
+ DebugSectionKind::DebugARanges)
+ .Case(getSectionName(DebugSectionKind::DebugAbbrev),
+ DebugSectionKind::DebugAbbrev)
+ .Case(getSectionName(DebugSectionKind::DebugMacinfo),
+ DebugSectionKind::DebugMacinfo)
+ .Case(getSectionName(DebugSectionKind::DebugMacro),
+ DebugSectionKind::DebugMacro)
+ .Case(getSectionName(DebugSectionKind::DebugAddr),
+ DebugSectionKind::DebugAddr)
+ .Case(getSectionName(DebugSectionKind::DebugStr),
+ DebugSectionKind::DebugStr)
+ .Case(getSectionName(DebugSectionKind::DebugLineStr),
+ DebugSectionKind::DebugLineStr)
+ .Case(getSectionName(DebugSectionKind::DebugStrOffsets),
+ DebugSectionKind::DebugStrOffsets)
+ .Case(getSectionName(DebugSectionKind::DebugPubNames),
+ DebugSectionKind::DebugPubNames)
+ .Case(getSectionName(DebugSectionKind::DebugPubTypes),
+ DebugSectionKind::DebugPubTypes)
+ .Case(getSectionName(DebugSectionKind::DebugNames),
+ DebugSectionKind::DebugNames)
+ .Case(getSectionName(DebugSectionKind::AppleNames),
+ DebugSectionKind::AppleNames)
+ .Case(getSectionName(DebugSectionKind::AppleNamespaces),
+ DebugSectionKind::AppleNamespaces)
+ .Case(getSectionName(DebugSectionKind::AppleObjC),
+ DebugSectionKind::AppleObjC)
+ .Case(getSectionName(DebugSectionKind::AppleTypes),
+ DebugSectionKind::AppleTypes)
.Default(std::nullopt);
return std::nullopt;
}
+DebugDieRefPatch::DebugDieRefPatch(uint64_t PatchOffset, CompileUnit *SrcCU,
+ CompileUnit *RefCU, uint32_t RefIdx)
+ : SectionPatch({PatchOffset}),
+ RefCU(RefCU, (SrcCU != nullptr) &&
+ (SrcCU->getUniqueID() == RefCU->getUniqueID())),
+ RefDieIdxOrClonedOffset(RefIdx) {}
+
+DebugULEB128DieRefPatch::DebugULEB128DieRefPatch(uint64_t PatchOffset,
+ CompileUnit *SrcCU,
+ CompileUnit *RefCU,
+ uint32_t RefIdx)
+ : SectionPatch({PatchOffset}),
+ RefCU(RefCU, SrcCU->getUniqueID() == RefCU->getUniqueID()),
+ RefDieIdxOrClonedOffset(RefIdx) {}
+
+DebugDieTypeRefPatch::DebugDieTypeRefPatch(uint64_t PatchOffset,
+ TypeEntry *RefTypeName)
+ : SectionPatch({PatchOffset}), RefTypeName(RefTypeName) {}
+
+DebugType2TypeDieRefPatch::DebugType2TypeDieRefPatch(uint64_t PatchOffset,
+ DIE *Die,
+ TypeEntry *TypeName,
+ TypeEntry *RefTypeName)
+ : SectionPatch({PatchOffset}), Die(Die), TypeName(TypeName),
+ RefTypeName(RefTypeName) {}
+
+DebugTypeStrPatch::DebugTypeStrPatch(uint64_t PatchOffset, DIE *Die,
+ TypeEntry *TypeName, StringEntry *String)
+ : SectionPatch({PatchOffset}), Die(Die), TypeName(TypeName),
+ String(String) {}
+
+DebugTypeLineStrPatch::DebugTypeLineStrPatch(uint64_t PatchOffset, DIE *Die,
+ TypeEntry *TypeName,
+ StringEntry *String)
+ : SectionPatch({PatchOffset}), Die(Die), TypeName(TypeName),
+ String(String) {}
+
+DebugTypeDeclFilePatch::DebugTypeDeclFilePatch(DIE *Die, TypeEntry *TypeName,
+ StringEntry *Directory,
+ StringEntry *FilePath)
+ : Die(Die), TypeName(TypeName), Directory(Directory), FilePath(FilePath) {}
+
+void SectionDescriptor::clearAllSectionData() {
+ StartOffset = 0;
+ clearSectionContent();
+ ListDebugStrPatch.erase();
+ ListDebugLineStrPatch.erase();
+ ListDebugRangePatch.erase();
+ ListDebugLocPatch.erase();
+ ListDebugDieRefPatch.erase();
+ ListDebugULEB128DieRefPatch.erase();
+ ListDebugOffsetPatch.erase();
+ ListDebugType2TypeDieRefPatch.erase();
+ ListDebugTypeDeclFilePatch.erase();
+ ListDebugTypeLineStrPatch.erase();
+ ListDebugTypeStrPatch.erase();
+}
+
+void SectionDescriptor::clearSectionContent() { Contents = OutSectionDataTy(); }
+
+void SectionDescriptor::setSizesForSectionCreatedByAsmPrinter() {
+ if (Contents.empty())
+ return;
+
+ MemoryBufferRef Mem(Contents, "obj");
+ Expected<std::unique_ptr<object::ObjectFile>> Obj =
+ object::ObjectFile::createObjectFile(Mem);
+ if (!Obj) {
+ consumeError(Obj.takeError());
+ Contents.clear();
+ return;
+ }
+
+ for (const object::SectionRef &Sect : (*Obj).get()->sections()) {
+ Expected<StringRef> SectNameOrErr = Sect.getName();
+ if (!SectNameOrErr) {
+ consumeError(SectNameOrErr.takeError());
+ continue;
+ }
+ if (std::optional<DebugSectionKind> SectKind =
+ parseDebugTableName(*SectNameOrErr)) {
+ if (*SectKind == SectionKind) {
+ Expected<StringRef> Data = Sect.getContents();
+ if (!Data) {
+ consumeError(SectNameOrErr.takeError());
+ Contents.clear();
+ return;
+ }
+
+ SectionOffsetInsideAsmPrinterOutputStart =
+ Data->data() - Contents.data();
+ SectionOffsetInsideAsmPrinterOutputEnd =
+ SectionOffsetInsideAsmPrinterOutputStart + Data->size();
+ }
+ }
+ }
+}
+
+void SectionDescriptor::emitString(dwarf::Form StringForm,
+ const char *StringVal) {
+ assert(StringVal != nullptr);
+
+ switch (StringForm) {
+ case dwarf::DW_FORM_string: {
+ emitInplaceString(StringVal);
+ } break;
+ case dwarf::DW_FORM_strp: {
+ notePatch(DebugStrPatch{
+ {OS.tell()}, GlobalData.getStringPool().insert(StringVal).first});
+ emitStringPlaceholder();
+ } break;
+ case dwarf::DW_FORM_line_strp: {
+ notePatch(DebugLineStrPatch{
+ {OS.tell()}, GlobalData.getStringPool().insert(StringVal).first});
+ emitStringPlaceholder();
+ } break;
+ default:
+ llvm_unreachable("Unsupported string form");
+ break;
+ };
+}
+
+void SectionDescriptor::emitIntVal(uint64_t Val, unsigned Size) {
+ switch (Size) {
+ case 1: {
+ OS.write(static_cast<uint8_t>(Val));
+ } break;
+ case 2: {
+ uint16_t ShortVal = static_cast<uint16_t>(Val);
+ if (Endianess != llvm::endianness::native)
+ sys::swapByteOrder(ShortVal);
+ OS.write(reinterpret_cast<const char *>(&ShortVal), Size);
+ } break;
+ case 4: {
+ uint32_t ShortVal = static_cast<uint32_t>(Val);
+ if (Endianess != llvm::endianness::native)
+ sys::swapByteOrder(ShortVal);
+ OS.write(reinterpret_cast<const char *>(&ShortVal), Size);
+ } break;
+ case 8: {
+ if (Endianess != llvm::endianness::native)
+ sys::swapByteOrder(Val);
+ OS.write(reinterpret_cast<const char *>(&Val), Size);
+ } break;
+ default:
+ llvm_unreachable("Unsupported integer type size");
+ }
+}
+
+void SectionDescriptor::apply(uint64_t PatchOffset, dwarf::Form AttrForm,
+ uint64_t Val) {
+ switch (AttrForm) {
+ case dwarf::DW_FORM_strp:
+ case dwarf::DW_FORM_line_strp: {
+ applyIntVal(PatchOffset, Val, Format.getDwarfOffsetByteSize());
+ } break;
+
+ case dwarf::DW_FORM_ref_addr: {
+ applyIntVal(PatchOffset, Val, Format.getRefAddrByteSize());
+ } break;
+ case dwarf::DW_FORM_ref1: {
+ applyIntVal(PatchOffset, Val, 1);
+ } break;
+ case dwarf::DW_FORM_ref2: {
+ applyIntVal(PatchOffset, Val, 2);
+ } break;
+ case dwarf::DW_FORM_ref4: {
+ applyIntVal(PatchOffset, Val, 4);
+ } break;
+ case dwarf::DW_FORM_ref8: {
+ applyIntVal(PatchOffset, Val, 8);
+ } break;
+
+ case dwarf::DW_FORM_data1: {
+ applyIntVal(PatchOffset, Val, 1);
+ } break;
+ case dwarf::DW_FORM_data2: {
+ applyIntVal(PatchOffset, Val, 2);
+ } break;
+ case dwarf::DW_FORM_data4: {
+ applyIntVal(PatchOffset, Val, 4);
+ } break;
+ case dwarf::DW_FORM_data8: {
+ applyIntVal(PatchOffset, Val, 8);
+ } break;
+ case dwarf::DW_FORM_udata: {
+ applyULEB128(PatchOffset, Val);
+ } break;
+ case dwarf::DW_FORM_sdata: {
+ applySLEB128(PatchOffset, Val);
+ } break;
+ case dwarf::DW_FORM_sec_offset: {
+ applyIntVal(PatchOffset, Val, Format.getDwarfOffsetByteSize());
+ } break;
+ case dwarf::DW_FORM_flag: {
+ applyIntVal(PatchOffset, Val, 1);
+ } break;
+
+ default:
+ llvm_unreachable("Unsupported attribute form");
+ break;
+ }
+}
+
+uint64_t SectionDescriptor::getIntVal(uint64_t PatchOffset, unsigned Size) {
+ assert(PatchOffset < getContents().size());
+ switch (Size) {
+ case 1: {
+ return *reinterpret_cast<const uint8_t *>(
+ (getContents().data() + PatchOffset));
+ }
+ case 2: {
+ return support::endian::read16(getContents().data() + PatchOffset,
+ Endianess);
+ }
+ case 4: {
+ return support::endian::read32(getContents().data() + PatchOffset,
+ Endianess);
+ }
+ case 8: {
+ return support::endian::read64(getContents().data() + PatchOffset,
+ Endianess);
+ }
+ }
+ llvm_unreachable("Unsupported integer type size");
+ return 0;
+}
+
+void SectionDescriptor::applyIntVal(uint64_t PatchOffset, uint64_t Val,
+ unsigned Size) {
+ assert(PatchOffset < getContents().size());
+
+ switch (Size) {
+ case 1: {
+ support::endian::write(
+ const_cast<char *>(getContents().data() + PatchOffset),
+ static_cast<uint8_t>(Val), Endianess);
+ } break;
+ case 2: {
+ support::endian::write(
+ const_cast<char *>(getContents().data() + PatchOffset),
+ static_cast<uint16_t>(Val), Endianess);
+ } break;
+ case 4: {
+ support::endian::write(
+ const_cast<char *>(getContents().data() + PatchOffset),
+ static_cast<uint32_t>(Val), Endianess);
+ } break;
+ case 8: {
+ support::endian::write(
+ const_cast<char *>(getContents().data() + PatchOffset),
+ static_cast<uint64_t>(Val), Endianess);
+ } break;
+ default:
+ llvm_unreachable("Unsupported integer type size");
+ }
+}
+
+void SectionDescriptor::applyULEB128(uint64_t PatchOffset, uint64_t Val) {
+ assert(PatchOffset < getContents().size());
+
+ uint8_t ULEB[16];
+ uint8_t DestSize = Format.getDwarfOffsetByteSize() + 1;
+ uint8_t RealSize = encodeULEB128(Val, ULEB, DestSize);
+
+ memcpy(const_cast<char *>(getContents().data() + PatchOffset), ULEB,
+ RealSize);
+}
+
+/// Writes integer value \p Val of SLEB128 format by specified \p PatchOffset.
+void SectionDescriptor::applySLEB128(uint64_t PatchOffset, uint64_t Val) {
+ assert(PatchOffset < getContents().size());
+
+ uint8_t SLEB[16];
+ uint8_t DestSize = Format.getDwarfOffsetByteSize() + 1;
+ uint8_t RealSize = encodeSLEB128(Val, SLEB, DestSize);
+
+ memcpy(const_cast<char *>(getContents().data() + PatchOffset), SLEB,
+ RealSize);
+}
+
+void OutputSections::applyPatches(
+ SectionDescriptor &Section,
+ StringEntryToDwarfStringPoolEntryMap &DebugStrStrings,
+ StringEntryToDwarfStringPoolEntryMap &DebugLineStrStrings,
+ TypeUnit *TypeUnitPtr) {
+ Section.ListDebugStrPatch.forEach([&](DebugStrPatch &Patch) {
+ DwarfStringPoolEntryWithExtString *Entry =
+ DebugStrStrings.getExistingEntry(Patch.String);
+ assert(Entry != nullptr);
+
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_strp, Entry->Offset);
+ });
+ Section.ListDebugTypeStrPatch.forEach([&](DebugTypeStrPatch &Patch) {
+ assert(TypeUnitPtr != nullptr);
+ TypeEntryBody *TypeEntry = Patch.TypeName->getValue().load();
+ assert(TypeEntry &&
+ formatv("No data for type {0}", Patch.TypeName->getKey())
+ .str()
+ .c_str());
+
+ if (&TypeEntry->getFinalDie() != Patch.Die)
+ return;
+
+ DwarfStringPoolEntryWithExtString *Entry =
+ DebugStrStrings.getExistingEntry(Patch.String);
+ assert(Entry != nullptr);
+
+ Patch.PatchOffset +=
+ Patch.Die->getOffset() + getULEB128Size(Patch.Die->getAbbrevNumber());
+
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_strp, Entry->Offset);
+ });
+
+ Section.ListDebugLineStrPatch.forEach([&](DebugLineStrPatch &Patch) {
+ DwarfStringPoolEntryWithExtString *Entry =
+ DebugLineStrStrings.getExistingEntry(Patch.String);
+ assert(Entry != nullptr);
+
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_line_strp, Entry->Offset);
+ });
+ Section.ListDebugTypeLineStrPatch.forEach([&](DebugTypeLineStrPatch &Patch) {
+ assert(TypeUnitPtr != nullptr);
+ TypeEntryBody *TypeEntry = Patch.TypeName->getValue().load();
+ assert(TypeEntry &&
+ formatv("No data for type {0}", Patch.TypeName->getKey())
+ .str()
+ .c_str());
+
+ if (&TypeEntry->getFinalDie() != Patch.Die)
+ return;
+
+ DwarfStringPoolEntryWithExtString *Entry =
+ DebugLineStrStrings.getExistingEntry(Patch.String);
+ assert(Entry != nullptr);
+
+ Patch.PatchOffset +=
+ Patch.Die->getOffset() + getULEB128Size(Patch.Die->getAbbrevNumber());
+
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_line_strp, Entry->Offset);
+ });
+
+ std::optional<SectionDescriptor *> RangeSection;
+ if (Format.Version >= 5)
+ RangeSection = tryGetSectionDescriptor(DebugSectionKind::DebugRngLists);
+ else
+ RangeSection = tryGetSectionDescriptor(DebugSectionKind::DebugRange);
+
+ if (RangeSection) {
+ Section.ListDebugRangePatch.forEach([&](DebugRangePatch &Patch) {
+ uint64_t FinalValue =
+ Section.getIntVal(Patch.PatchOffset, Format.getDwarfOffsetByteSize());
+ FinalValue += (*RangeSection)->StartOffset;
+
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_sec_offset, FinalValue);
+ });
+ }
+
+ std::optional<SectionDescriptor *> LocationSection;
+ if (Format.Version >= 5)
+ LocationSection = tryGetSectionDescriptor(DebugSectionKind::DebugLocLists);
+ else
+ LocationSection = tryGetSectionDescriptor(DebugSectionKind::DebugLoc);
+
+ if (LocationSection) {
+ Section.ListDebugLocPatch.forEach([&](DebugLocPatch &Patch) {
+ uint64_t FinalValue =
+ Section.getIntVal(Patch.PatchOffset, Format.getDwarfOffsetByteSize());
+ FinalValue += (*LocationSection)->StartOffset;
+
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_sec_offset, FinalValue);
+ });
+ }
+
+ Section.ListDebugDieRefPatch.forEach([&](DebugDieRefPatch &Patch) {
+ uint64_t FinalOffset = Patch.RefDieIdxOrClonedOffset;
+ dwarf::Form FinalForm = dwarf::DW_FORM_ref4;
+
+ // Check whether it is local or inter-CU reference.
+ if (!Patch.RefCU.getInt()) {
+ SectionDescriptor &ReferencedSectionDescriptor =
+ Patch.RefCU.getPointer()->getSectionDescriptor(
+ DebugSectionKind::DebugInfo);
+
+ FinalForm = dwarf::DW_FORM_ref_addr;
+ FinalOffset += ReferencedSectionDescriptor.StartOffset;
+ }
+
+ Section.apply(Patch.PatchOffset, FinalForm, FinalOffset);
+ });
+
+ Section.ListDebugULEB128DieRefPatch.forEach(
+ [&](DebugULEB128DieRefPatch &Patch) {
+ assert(Patch.RefCU.getInt());
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_udata,
+ Patch.RefDieIdxOrClonedOffset);
+ });
+
+ Section.ListDebugDieTypeRefPatch.forEach([&](DebugDieTypeRefPatch &Patch) {
+ assert(TypeUnitPtr != nullptr);
+ assert(Patch.RefTypeName != nullptr);
+
+ TypeEntryBody *TypeEntry = Patch.RefTypeName->getValue().load();
+ assert(TypeEntry &&
+ formatv("No data for type {0}", Patch.RefTypeName->getKey())
+ .str()
+ .c_str());
+
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_ref_addr,
+ TypeEntry->getFinalDie().getOffset());
+ });
+
+ Section.ListDebugType2TypeDieRefPatch.forEach(
+ [&](DebugType2TypeDieRefPatch &Patch) {
+ assert(TypeUnitPtr != nullptr);
+ TypeEntryBody *TypeEntry = Patch.TypeName->getValue().load();
+ assert(TypeEntry &&
+ formatv("No data for type {0}", Patch.TypeName->getKey())
+ .str()
+ .c_str());
+
+ if (&TypeEntry->getFinalDie() != Patch.Die)
+ return;
+
+ Patch.PatchOffset += Patch.Die->getOffset() +
+ getULEB128Size(Patch.Die->getAbbrevNumber());
+
+ assert(Patch.RefTypeName != nullptr);
+ TypeEntryBody *RefTypeEntry = Patch.RefTypeName->getValue().load();
+ assert(TypeEntry &&
+ formatv("No data for type {0}", Patch.RefTypeName->getKey())
+ .str()
+ .c_str());
+
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_ref4,
+ RefTypeEntry->getFinalDie().getOffset());
+ });
+
+ Section.ListDebugOffsetPatch.forEach([&](DebugOffsetPatch &Patch) {
+ uint64_t FinalValue = Patch.SectionPtr.getPointer()->StartOffset;
+
+ // Check whether we need to read value from the original location.
+ if (Patch.SectionPtr.getInt())
+ FinalValue +=
+ Section.getIntVal(Patch.PatchOffset, Format.getDwarfOffsetByteSize());
+
+ Section.apply(Patch.PatchOffset, dwarf::DW_FORM_sec_offset, FinalValue);
+ });
+}
+
} // end of namespace dwarflinker_parallel
} // end of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.h
index 15ab4cc1167a..f23b2efb869d 100644
--- a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.h
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/OutputSections.h
@@ -9,56 +9,491 @@
#ifndef LLVM_LIB_DWARFLINKERPARALLEL_OUTPUTSECTIONS_H
#define LLVM_LIB_DWARFLINKERPARALLEL_OUTPUTSECTIONS_H
+#include "ArrayList.h"
+#include "StringEntryToDwarfStringPoolEntryMap.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/DWARFLinkerParallel/StringPool.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/raw_ostream.h"
#include <array>
#include <cstdint>
namespace llvm {
namespace dwarflinker_parallel {
-/// This class keeps offsets to the debug sections. Any object which is
-/// supposed to be emitted into the debug section should use this class to
-/// track debug sections offsets.
-class OutputSections {
-public:
- /// List of tracked debug sections.
- enum class DebugSectionKind : uint8_t {
- DebugInfo = 0,
- DebugLine,
- DebugFrame,
- DebugRange,
- DebugRngLists,
- DebugLoc,
- DebugLocLists,
- DebugARanges,
- DebugAbbrev,
- DebugMacinfo,
- DebugMacro,
- };
- constexpr static size_t SectionKindsNum = 11;
-
- /// Recognise the section name and match it with the DebugSectionKind.
- static std::optional<DebugSectionKind> parseDebugSectionName(StringRef Name);
+class TypeUnit;
+
+/// List of tracked debug tables.
+enum class DebugSectionKind : uint8_t {
+ DebugInfo = 0,
+ DebugLine,
+ DebugFrame,
+ DebugRange,
+ DebugRngLists,
+ DebugLoc,
+ DebugLocLists,
+ DebugARanges,
+ DebugAbbrev,
+ DebugMacinfo,
+ DebugMacro,
+ DebugAddr,
+ DebugStr,
+ DebugLineStr,
+ DebugStrOffsets,
+ DebugPubNames,
+ DebugPubTypes,
+ DebugNames,
+ AppleNames,
+ AppleNamespaces,
+ AppleObjC,
+ AppleTypes,
+ NumberOfEnumEntries // must be last
+};
+constexpr static size_t SectionKindsNum =
+ static_cast<size_t>(DebugSectionKind::NumberOfEnumEntries);
+
+/// Recognise the table name and match it with the DebugSectionKind.
+std::optional<DebugSectionKind> parseDebugTableName(StringRef Name);
+
+/// Return the name of the section.
+const StringLiteral &getSectionName(DebugSectionKind SectionKind);
+
+/// There are fields(sizes, offsets) which should be updated after
+/// sections are generated. To remember offsets and related data
+/// the descendants of SectionPatch structure should be used.
+
+struct SectionPatch {
+ uint64_t PatchOffset = 0;
+};
+
+/// This structure is used to update strings offsets into .debug_str.
+struct DebugStrPatch : SectionPatch {
+ const StringEntry *String = nullptr;
+};
+
+/// This structure is used to update strings offsets into .debug_line_str.
+struct DebugLineStrPatch : SectionPatch {
+ const StringEntry *String = nullptr;
+};
+
+/// This structure is used to update range list offset into
+/// .debug_ranges/.debug_rnglists.
+struct DebugRangePatch : SectionPatch {
+ /// Indicates patch which points to immediate compile unit's attribute.
+ bool IsCompileUnitRanges = false;
+};
+
+/// This structure is used to update location list offset into
+/// .debug_loc/.debug_loclists.
+struct DebugLocPatch : SectionPatch {
+ int64_t AddrAdjustmentValue = 0;
+};
+
+/// This structure is used to update offset with start of another section.
+struct SectionDescriptor;
+struct DebugOffsetPatch : SectionPatch {
+ DebugOffsetPatch(uint64_t PatchOffset, SectionDescriptor *SectionPtr,
+ bool AddLocalValue = false)
+ : SectionPatch({PatchOffset}), SectionPtr(SectionPtr, AddLocalValue) {}
+
+ PointerIntPair<SectionDescriptor *, 1> SectionPtr;
+};
+
+/// This structure is used to update reference to the DIE.
+struct DebugDieRefPatch : SectionPatch {
+ DebugDieRefPatch(uint64_t PatchOffset, CompileUnit *SrcCU, CompileUnit *RefCU,
+ uint32_t RefIdx);
+
+ PointerIntPair<CompileUnit *, 1> RefCU;
+ uint64_t RefDieIdxOrClonedOffset = 0;
+};
+
+/// This structure is used to update reference to the DIE of ULEB128 form.
+struct DebugULEB128DieRefPatch : SectionPatch {
+ DebugULEB128DieRefPatch(uint64_t PatchOffset, CompileUnit *SrcCU,
+ CompileUnit *RefCU, uint32_t RefIdx);
+
+ PointerIntPair<CompileUnit *, 1> RefCU;
+ uint64_t RefDieIdxOrClonedOffset = 0;
+};
+
+/// This structure is used to update reference to the type DIE.
+struct DebugDieTypeRefPatch : SectionPatch {
+ DebugDieTypeRefPatch(uint64_t PatchOffset, TypeEntry *RefTypeName);
+
+ TypeEntry *RefTypeName = nullptr;
+};
+
+/// This structure is used to update reference to the type DIE.
+struct DebugType2TypeDieRefPatch : SectionPatch {
+ DebugType2TypeDieRefPatch(uint64_t PatchOffset, DIE *Die, TypeEntry *TypeName,
+ TypeEntry *RefTypeName);
+
+ DIE *Die = nullptr;
+ TypeEntry *TypeName = nullptr;
+ TypeEntry *RefTypeName = nullptr;
+};
+
+struct DebugTypeStrPatch : SectionPatch {
+ DebugTypeStrPatch(uint64_t PatchOffset, DIE *Die, TypeEntry *TypeName,
+ StringEntry *String);
+
+ DIE *Die = nullptr;
+ TypeEntry *TypeName = nullptr;
+ StringEntry *String = nullptr;
+};
+
+struct DebugTypeLineStrPatch : SectionPatch {
+ DebugTypeLineStrPatch(uint64_t PatchOffset, DIE *Die, TypeEntry *TypeName,
+ StringEntry *String);
+
+ DIE *Die = nullptr;
+ TypeEntry *TypeName = nullptr;
+ StringEntry *String = nullptr;
+};
+
+struct DebugTypeDeclFilePatch {
+ DebugTypeDeclFilePatch(DIE *Die, TypeEntry *TypeName, StringEntry *Directory,
+ StringEntry *FilePath);
+
+ DIE *Die = nullptr;
+ TypeEntry *TypeName = nullptr;
+ StringEntry *Directory = nullptr;
+ StringEntry *FilePath = nullptr;
+ uint32_t FileID = 0;
+};
+
+/// Type for section data.
+using OutSectionDataTy = SmallString<0>;
+
+/// Type for list of pointers to patches offsets.
+using OffsetsPtrVector = SmallVector<uint64_t *>;
+
+class OutputSections;
+
+/// This structure is used to keep data of the concrete section.
+/// Like data bits, list of patches, format.
+struct SectionDescriptor {
+ friend OutputSections;
+
+ SectionDescriptor(DebugSectionKind SectionKind, LinkingGlobalData &GlobalData,
+ dwarf::FormParams Format, llvm::endianness Endianess)
+ : OS(Contents), ListDebugStrPatch(&GlobalData.getAllocator()),
+ ListDebugLineStrPatch(&GlobalData.getAllocator()),
+ ListDebugRangePatch(&GlobalData.getAllocator()),
+ ListDebugLocPatch(&GlobalData.getAllocator()),
+ ListDebugDieRefPatch(&GlobalData.getAllocator()),
+ ListDebugULEB128DieRefPatch(&GlobalData.getAllocator()),
+ ListDebugOffsetPatch(&GlobalData.getAllocator()),
+ ListDebugDieTypeRefPatch(&GlobalData.getAllocator()),
+ ListDebugType2TypeDieRefPatch(&GlobalData.getAllocator()),
+ ListDebugTypeStrPatch(&GlobalData.getAllocator()),
+ ListDebugTypeLineStrPatch(&GlobalData.getAllocator()),
+ ListDebugTypeDeclFilePatch(&GlobalData.getAllocator()),
+ GlobalData(GlobalData), SectionKind(SectionKind), Format(Format),
+ Endianess(Endianess) {}
+
+ /// Erase whole section contents(data bits, list of patches).
+ void clearAllSectionData();
+
+ /// Erase only section output data bits.
+ void clearSectionContent();
/// When objects(f.e. compile units) are glued into the single file,
/// the debug sections corresponding to the concrete object are assigned
- /// with offsets inside the whole file. This method returns offset
- /// to the \p SectionKind debug section, corresponding to this object.
- uint64_t getStartOffset(DebugSectionKind SectionKind) const {
- return Offsets[static_cast<
- typename std::underlying_type<DebugSectionKind>::type>(SectionKind)];
+ /// with offsets inside the whole file. This field keeps offset
+ /// to the debug section, corresponding to this object.
+ uint64_t StartOffset = 0;
+
+ /// Stream which stores data to the Contents.
+ raw_svector_ostream OS;
+
+ /// Section patches.
+#define ADD_PATCHES_LIST(T) \
+ T &notePatch(const T &Patch) { return List##T.add(Patch); } \
+ ArrayList<T> List##T;
+
+ ADD_PATCHES_LIST(DebugStrPatch)
+ ADD_PATCHES_LIST(DebugLineStrPatch)
+ ADD_PATCHES_LIST(DebugRangePatch)
+ ADD_PATCHES_LIST(DebugLocPatch)
+ ADD_PATCHES_LIST(DebugDieRefPatch)
+ ADD_PATCHES_LIST(DebugULEB128DieRefPatch)
+ ADD_PATCHES_LIST(DebugOffsetPatch)
+ ADD_PATCHES_LIST(DebugDieTypeRefPatch)
+ ADD_PATCHES_LIST(DebugType2TypeDieRefPatch)
+ ADD_PATCHES_LIST(DebugTypeStrPatch)
+ ADD_PATCHES_LIST(DebugTypeLineStrPatch)
+ ADD_PATCHES_LIST(DebugTypeDeclFilePatch)
+
+ /// While creating patches, offsets to attributes may be partially
+ /// unknown(because size of abbreviation number is unknown). In such case we
+ /// remember patch itself and pointer to patch application offset to add size
+ /// of abbreviation number later.
+ template <typename T>
+ void notePatchWithOffsetUpdate(const T &Patch,
+ OffsetsPtrVector &PatchesOffsetsList) {
+ PatchesOffsetsList.emplace_back(&notePatch(Patch).PatchOffset);
+ }
+
+ /// Some sections are emitted using AsmPrinter. In that case "Contents"
+ /// member of SectionDescriptor contains elf file. This method searches
+ /// for section data inside elf file and remember offset to it.
+ void setSizesForSectionCreatedByAsmPrinter();
+
+ /// Returns section content.
+ StringRef getContents() {
+ if (SectionOffsetInsideAsmPrinterOutputStart == 0)
+ return StringRef(Contents.data(), Contents.size());
+
+ return Contents.slice(SectionOffsetInsideAsmPrinterOutputStart,
+ SectionOffsetInsideAsmPrinterOutputEnd);
+ }
+
+ /// Emit unit length into the current section contents.
+ void emitUnitLength(uint64_t Length) {
+ maybeEmitDwarf64Mark();
+ emitIntVal(Length, getFormParams().getDwarfOffsetByteSize());
+ }
+
+ /// Emit DWARF64 mark into the current section contents.
+ void maybeEmitDwarf64Mark() {
+ if (getFormParams().Format != dwarf::DWARF64)
+ return;
+ emitIntVal(dwarf::DW_LENGTH_DWARF64, 4);
+ }
+
+ /// Emit specified offset value into the current section contents.
+ void emitOffset(uint64_t Val) {
+ emitIntVal(Val, getFormParams().getDwarfOffsetByteSize());
+ }
+
+ /// Emit specified integer value into the current section contents.
+ void emitIntVal(uint64_t Val, unsigned Size);
+
+ void emitString(dwarf::Form StringForm, const char *StringVal);
+
+ /// Emit specified inplace string value into the current section contents.
+ void emitInplaceString(StringRef String) {
+ OS << GlobalData.translateString(String);
+ emitIntVal(0, 1);
}
- /// Set offset to the start of specified \p SectionKind debug section,
- /// corresponding to this object.
- void setStartOffset(DebugSectionKind SectionKind, uint64_t Offset) {
- Offsets[static_cast<typename std::underlying_type<DebugSectionKind>::type>(
- SectionKind)] = Offset;
+ /// Emit string placeholder into the current section contents.
+ void emitStringPlaceholder() {
+ // emit bad offset which should be updated later.
+ emitOffset(0xBADDEF);
}
+ /// Write specified \p Value of \p AttrForm to the \p PatchOffset.
+ void apply(uint64_t PatchOffset, dwarf::Form AttrForm, uint64_t Val);
+
+ /// Returns section kind.
+ DebugSectionKind getKind() { return SectionKind; }
+
+ /// Returns section name.
+ const StringLiteral &getName() const { return getSectionName(SectionKind); }
+
+ /// Returns endianess used by section.
+ llvm::endianness getEndianess() const { return Endianess; }
+
+ /// Returns FormParams used by section.
+ dwarf::FormParams getFormParams() const { return Format; }
+
+ /// Returns integer value of \p Size located by specified \p PatchOffset.
+ uint64_t getIntVal(uint64_t PatchOffset, unsigned Size);
+
protected:
- /// Offsets to the debug sections composing this object.
- std::array<uint64_t, SectionKindsNum> Offsets = {0};
+ /// Writes integer value \p Val of \p Size by specified \p PatchOffset.
+ void applyIntVal(uint64_t PatchOffset, uint64_t Val, unsigned Size);
+
+ /// Writes integer value \p Val of ULEB128 format by specified \p PatchOffset.
+ void applyULEB128(uint64_t PatchOffset, uint64_t Val);
+
+ /// Writes integer value \p Val of SLEB128 format by specified \p PatchOffset.
+ void applySLEB128(uint64_t PatchOffset, uint64_t Val);
+
+ /// Sets output format.
+ void setOutputFormat(dwarf::FormParams Format, llvm::endianness Endianess) {
+ this->Format = Format;
+ this->Endianess = Endianess;
+ }
+
+ LinkingGlobalData &GlobalData;
+
+ /// The section kind.
+ DebugSectionKind SectionKind = DebugSectionKind::NumberOfEnumEntries;
+
+ /// Section data bits.
+ OutSectionDataTy Contents;
+
+ /// Some sections are generated using AsmPrinter. The real section data
+ /// located inside elf file in that case. Following fields points to the
+ /// real section content inside elf file.
+ size_t SectionOffsetInsideAsmPrinterOutputStart = 0;
+ size_t SectionOffsetInsideAsmPrinterOutputEnd = 0;
+
+ /// Output format.
+ dwarf::FormParams Format = {4, 4, dwarf::DWARF32};
+ llvm::endianness Endianess = llvm::endianness::little;
+};
+
+/// This class keeps contents and offsets to the debug sections. Any objects
+/// which is supposed to be emitted into the debug sections should use this
+/// class to track debug sections offsets and keep sections data.
+class OutputSections {
+public:
+ OutputSections(LinkingGlobalData &GlobalData) : GlobalData(GlobalData) {}
+
+ /// Sets output format for all keeping sections.
+ void setOutputFormat(dwarf::FormParams Format, llvm::endianness Endianness) {
+ this->Format = Format;
+ this->Endianness = Endianness;
+ }
+
+ /// Returns descriptor for the specified section of \p SectionKind.
+ /// The descriptor should already be created. The llvm_unreachable
+ /// would be raised if it is not.
+ const SectionDescriptor &
+ getSectionDescriptor(DebugSectionKind SectionKind) const {
+ SectionsSetTy::const_iterator It = SectionDescriptors.find(SectionKind);
+
+ if (It == SectionDescriptors.end())
+ llvm_unreachable(
+ formatv("Section {0} does not exist", getSectionName(SectionKind))
+ .str()
+ .c_str());
+
+ return It->second;
+ }
+
+ /// Returns descriptor for the specified section of \p SectionKind.
+ /// The descriptor should already be created. The llvm_unreachable
+ /// would be raised if it is not.
+ SectionDescriptor &getSectionDescriptor(DebugSectionKind SectionKind) {
+ SectionsSetTy::iterator It = SectionDescriptors.find(SectionKind);
+
+ if (It == SectionDescriptors.end())
+ llvm_unreachable(
+ formatv("Section {0} does not exist", getSectionName(SectionKind))
+ .str()
+ .c_str());
+
+ return It->second;
+ }
+
+ /// Returns descriptor for the specified section of \p SectionKind.
+ /// Returns std::nullopt if section descriptor is not created yet.
+ std::optional<const SectionDescriptor *>
+ tryGetSectionDescriptor(DebugSectionKind SectionKind) const {
+ SectionsSetTy::const_iterator It = SectionDescriptors.find(SectionKind);
+
+ if (It == SectionDescriptors.end())
+ return std::nullopt;
+
+ return &It->second;
+ }
+
+ /// Returns descriptor for the specified section of \p SectionKind.
+ /// Returns std::nullopt if section descriptor is not created yet.
+ std::optional<SectionDescriptor *>
+ tryGetSectionDescriptor(DebugSectionKind SectionKind) {
+ SectionsSetTy::iterator It = SectionDescriptors.find(SectionKind);
+
+ if (It == SectionDescriptors.end())
+ return std::nullopt;
+
+ return &It->second;
+ }
+
+ /// Returns descriptor for the specified section of \p SectionKind.
+ /// If descriptor does not exist then creates it.
+ SectionDescriptor &
+ getOrCreateSectionDescriptor(DebugSectionKind SectionKind) {
+ return SectionDescriptors
+ .try_emplace(SectionKind, SectionKind, GlobalData, Format, Endianness)
+ .first->second;
+ }
+
+ /// Erases data of all sections.
+ void eraseSections() {
+ for (auto &Section : SectionDescriptors)
+ Section.second.clearAllSectionData();
+ }
+
+ /// Enumerate all sections and call \p Handler for each.
+ void forEach(function_ref<void(SectionDescriptor &)> Handler) {
+ for (auto &Section : SectionDescriptors)
+ Handler(Section.second);
+ }
+
+ /// Enumerate all sections, for each section set current offset
+ /// (kept by \p SectionSizesAccumulator), update current offset with section
+ /// length.
+ void assignSectionsOffsetAndAccumulateSize(
+ std::array<uint64_t, SectionKindsNum> &SectionSizesAccumulator) {
+ for (auto &Section : SectionDescriptors) {
+ Section.second.StartOffset = SectionSizesAccumulator[static_cast<uint8_t>(
+ Section.second.getKind())];
+ SectionSizesAccumulator[static_cast<uint8_t>(Section.second.getKind())] +=
+ Section.second.getContents().size();
+ }
+ }
+
+ /// Enumerate all sections, for each section apply all section patches.
+ void applyPatches(SectionDescriptor &Section,
+ StringEntryToDwarfStringPoolEntryMap &DebugStrStrings,
+ StringEntryToDwarfStringPoolEntryMap &DebugLineStrStrings,
+ TypeUnit *TypeUnitPtr);
+
+ /// Endiannes for the sections.
+ llvm::endianness getEndianness() const { return Endianness; }
+
+ /// Return DWARF version.
+ uint16_t getVersion() const { return Format.Version; }
+
+ /// Return size of header of debug_info table.
+ uint16_t getDebugInfoHeaderSize() const {
+ return Format.Version >= 5 ? 12 : 11;
+ }
+
+ /// Return size of header of debug_ table.
+ uint16_t getDebugAddrHeaderSize() const {
+ assert(Format.Version >= 5);
+ return Format.Format == dwarf::DwarfFormat::DWARF32 ? 8 : 16;
+ }
+
+ /// Return size of header of debug_str_offsets table.
+ uint16_t getDebugStrOffsetsHeaderSize() const {
+ assert(Format.Version >= 5);
+ return Format.Format == dwarf::DwarfFormat::DWARF32 ? 8 : 16;
+ }
+
+ /// Return size of address.
+ const dwarf::FormParams &getFormParams() const { return Format; }
+
+protected:
+ LinkingGlobalData &GlobalData;
+
+ /// Format for sections.
+ dwarf::FormParams Format = {4, 4, dwarf::DWARF32};
+
+ /// Endiannes for sections.
+ llvm::endianness Endianness = llvm::endianness::native;
+
+ /// All keeping sections.
+ using SectionsSetTy = std::map<DebugSectionKind, SectionDescriptor>;
+ SectionsSetTy SectionDescriptors;
};
} // end of namespace dwarflinker_parallel
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/StringEntryToDwarfStringPoolEntryMap.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/StringEntryToDwarfStringPoolEntryMap.h
new file mode 100644
index 000000000000..b4c74d0adba9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/StringEntryToDwarfStringPoolEntryMap.h
@@ -0,0 +1,72 @@
+//===- StringEntryToDwarfStringPoolEntryMap.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_STRINGENTRYTODWARFSTRINGPOOLENTRYMAP_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_STRINGENTRYTODWARFSTRINGPOOLENTRYMAP_H
+
+#include "DWARFLinkerGlobalData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DWARFLinkerParallel/StringPool.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// This class creates a DwarfStringPoolEntry for the corresponding StringEntry.
+class StringEntryToDwarfStringPoolEntryMap {
+public:
+ StringEntryToDwarfStringPoolEntryMap(LinkingGlobalData &GlobalData)
+ : GlobalData(GlobalData) {}
+ ~StringEntryToDwarfStringPoolEntryMap() {}
+
+ /// Create DwarfStringPoolEntry for specified StringEntry if necessary.
+ /// Initialize DwarfStringPoolEntry with initial values.
+ DwarfStringPoolEntryWithExtString *add(const StringEntry *String) {
+ DwarfStringPoolEntriesTy::iterator it = DwarfStringPoolEntries.find(String);
+
+ if (it == DwarfStringPoolEntries.end()) {
+ DwarfStringPoolEntryWithExtString *DataPtr =
+ GlobalData.getAllocator()
+ .Allocate<DwarfStringPoolEntryWithExtString>();
+ DataPtr->String = GlobalData.translateString(String->getKey());
+ DataPtr->Index = DwarfStringPoolEntry::NotIndexed;
+ DataPtr->Offset = 0;
+ DataPtr->Symbol = nullptr;
+ it = DwarfStringPoolEntries.insert(std::make_pair(String, DataPtr)).first;
+ }
+
+ assert(it->second != nullptr);
+ return it->second;
+ }
+
+ /// Returns already existed DwarfStringPoolEntry for the specified
+ /// StringEntry.
+ DwarfStringPoolEntryWithExtString *
+ getExistingEntry(const StringEntry *String) const {
+ DwarfStringPoolEntriesTy::const_iterator it =
+ DwarfStringPoolEntries.find(String);
+
+ assert(it != DwarfStringPoolEntries.end());
+ assert(it->second != nullptr);
+ return it->second;
+ }
+
+ /// Erase contents of StringsForEmission.
+ void clear() { DwarfStringPoolEntries.clear(); }
+
+protected:
+ using DwarfStringPoolEntriesTy =
+ DenseMap<const StringEntry *, DwarfStringPoolEntryWithExtString *>;
+ DwarfStringPoolEntriesTy DwarfStringPoolEntries;
+
+ LinkingGlobalData &GlobalData;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_STRINGENTRYTODWARFSTRINGPOOLENTRYMAP_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.cpp b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.cpp
new file mode 100644
index 000000000000..a9b4478e33c4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.cpp
@@ -0,0 +1,767 @@
+//===- SyntheticTypeNameBuilder.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SyntheticTypeNameBuilder.h"
+#include "DWARFLinkerCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+#include "llvm/Support/ScopedPrinter.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+Error SyntheticTypeNameBuilder::assignName(
+ UnitEntryPairTy InputUnitEntryPair,
+ std::optional<std::pair<size_t, size_t>> ChildIndex) {
+ [[maybe_unused]] const CompileUnit::DIEInfo &Info =
+ InputUnitEntryPair.CU->getDIEInfo(InputUnitEntryPair.DieEntry);
+ assert(Info.needToPlaceInTypeTable() &&
+ "Cann't assign name for non-type DIE");
+
+ if (InputUnitEntryPair.CU->getDieTypeEntry(InputUnitEntryPair.DieEntry) !=
+ nullptr)
+ return Error::success();
+
+ SyntheticName.resize(0);
+ RecursionDepth = 0;
+ return addDIETypeName(InputUnitEntryPair, ChildIndex, true);
+}
+
+void SyntheticTypeNameBuilder::addArrayDimension(
+ UnitEntryPairTy InputUnitEntryPair) {
+ for (const DWARFDebugInfoEntry *CurChild =
+ InputUnitEntryPair.CU->getFirstChildEntry(
+ InputUnitEntryPair.DieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = InputUnitEntryPair.CU->getSiblingEntry(CurChild)) {
+ if (CurChild->getTag() == dwarf::DW_TAG_subrange_type ||
+ CurChild->getTag() == dwarf::DW_TAG_generic_subrange) {
+ SyntheticName += "[";
+ if (std::optional<DWARFFormValue> Val =
+ InputUnitEntryPair.CU->find(CurChild, dwarf::DW_AT_count)) {
+ if (std::optional<uint64_t> ConstVal = Val->getAsUnsignedConstant()) {
+ SyntheticName += std::to_string(*ConstVal);
+ } else if (std::optional<int64_t> ConstVal =
+ Val->getAsSignedConstant()) {
+ SyntheticName += std::to_string(*ConstVal);
+ }
+ }
+
+ SyntheticName += "]";
+ }
+ }
+}
+
+static dwarf::Attribute TypeAttr[] = {dwarf::DW_AT_type};
+Error SyntheticTypeNameBuilder::addSignature(UnitEntryPairTy InputUnitEntryPair,
+ bool addTemplateParameters) {
+ // Add entry type.
+ if (Error Err = addReferencedODRDies(InputUnitEntryPair, false, TypeAttr))
+ return Err;
+ SyntheticName += ':';
+
+ SmallVector<const DWARFDebugInfoEntry *, 10> TemplateParameters;
+ SmallVector<const DWARFDebugInfoEntry *, 20> FunctionParameters;
+ for (const DWARFDebugInfoEntry *CurChild =
+ InputUnitEntryPair.CU->getFirstChildEntry(
+ InputUnitEntryPair.DieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = InputUnitEntryPair.CU->getSiblingEntry(CurChild)) {
+ dwarf::Tag ChildTag = CurChild->getTag();
+ if (addTemplateParameters &&
+ (ChildTag == dwarf::DW_TAG_template_type_parameter ||
+ ChildTag == dwarf::DW_TAG_template_value_parameter))
+ TemplateParameters.push_back(CurChild);
+ else if (ChildTag == dwarf::DW_TAG_formal_parameter ||
+ ChildTag == dwarf::DW_TAG_unspecified_parameters)
+ FunctionParameters.push_back(CurChild);
+ else if (addTemplateParameters &&
+ ChildTag == dwarf::DW_TAG_GNU_template_parameter_pack) {
+ for (const DWARFDebugInfoEntry *CurGNUChild =
+ InputUnitEntryPair.CU->getFirstChildEntry(CurChild);
+ CurGNUChild && CurGNUChild->getAbbreviationDeclarationPtr();
+ CurGNUChild = InputUnitEntryPair.CU->getSiblingEntry(CurGNUChild))
+ TemplateParameters.push_back(CurGNUChild);
+ } else if (ChildTag == dwarf::DW_TAG_GNU_formal_parameter_pack) {
+ for (const DWARFDebugInfoEntry *CurGNUChild =
+ InputUnitEntryPair.CU->getFirstChildEntry(CurChild);
+ CurGNUChild && CurGNUChild->getAbbreviationDeclarationPtr();
+ CurGNUChild = InputUnitEntryPair.CU->getSiblingEntry(CurGNUChild))
+ FunctionParameters.push_back(CurGNUChild);
+ }
+ }
+
+ // Add parameters.
+ if (Error Err = addParamNames(*InputUnitEntryPair.CU, FunctionParameters))
+ return Err;
+
+ // Add template parameters.
+ if (Error Err =
+ addTemplateParamNames(*InputUnitEntryPair.CU, TemplateParameters))
+ return Err;
+
+ return Error::success();
+}
+
+Error SyntheticTypeNameBuilder::addParamNames(
+ CompileUnit &CU,
+ SmallVector<const DWARFDebugInfoEntry *, 20> &FunctionParameters) {
+ SyntheticName += '(';
+ for (const DWARFDebugInfoEntry *FunctionParameter : FunctionParameters) {
+ if (SyntheticName.back() != '(')
+ SyntheticName += ", ";
+ if (dwarf::toUnsigned(CU.find(FunctionParameter, dwarf::DW_AT_artificial),
+ 0))
+ SyntheticName += "^";
+ if (Error Err = addReferencedODRDies(
+ UnitEntryPairTy{&CU, FunctionParameter}, false, TypeAttr))
+ return Err;
+ }
+ SyntheticName += ')';
+ return Error::success();
+}
+
+Error SyntheticTypeNameBuilder::addTemplateParamNames(
+ CompileUnit &CU,
+ SmallVector<const DWARFDebugInfoEntry *, 10> &TemplateParameters) {
+ if (!TemplateParameters.empty()) {
+ SyntheticName += '<';
+ for (const DWARFDebugInfoEntry *Parameter : TemplateParameters) {
+ if (SyntheticName.back() != '<')
+ SyntheticName += ", ";
+
+ if (Parameter->getTag() == dwarf::DW_TAG_template_value_parameter) {
+ if (std::optional<DWARFFormValue> Val =
+ CU.find(Parameter, dwarf::DW_AT_const_value)) {
+ if (std::optional<uint64_t> ConstVal = Val->getAsUnsignedConstant())
+ SyntheticName += std::to_string(*ConstVal);
+ else if (std::optional<int64_t> ConstVal = Val->getAsSignedConstant())
+ SyntheticName += std::to_string(*ConstVal);
+ }
+ }
+
+ if (Error Err = addReferencedODRDies(UnitEntryPairTy{&CU, Parameter},
+ false, TypeAttr))
+ return Err;
+ }
+ SyntheticName += '>';
+ }
+ return Error::success();
+}
+
+void SyntheticTypeNameBuilder::addOrderedName(
+ std::pair<size_t, size_t> ChildIdx) {
+ std::string Name;
+ llvm::raw_string_ostream stream(Name);
+ stream << format_hex_no_prefix(ChildIdx.first, ChildIdx.second);
+ SyntheticName += Name;
+}
+
+// Examine DIE and return type deduplication candidate: some DIEs could not be
+// deduplicated, namespace may refer to another namespace.
+static std::optional<UnitEntryPairTy>
+getTypeDeduplicationCandidate(UnitEntryPairTy UnitEntryPair) {
+ switch (UnitEntryPair.DieEntry->getTag()) {
+ case dwarf::DW_TAG_null:
+ case dwarf::DW_TAG_compile_unit:
+ case dwarf::DW_TAG_partial_unit:
+ case dwarf::DW_TAG_type_unit:
+ case dwarf::DW_TAG_skeleton_unit: {
+ return std::nullopt;
+ }
+ case dwarf::DW_TAG_namespace: {
+ // Check if current namespace refers another.
+ if (UnitEntryPair.CU->find(UnitEntryPair.DieEntry, dwarf::DW_AT_extension))
+ UnitEntryPair = UnitEntryPair.getNamespaceOrigin();
+
+ // Content of anonimous namespaces should not be deduplicated.
+ if (!UnitEntryPair.CU->find(UnitEntryPair.DieEntry, dwarf::DW_AT_name))
+ llvm_unreachable("Cann't deduplicate anonimous namespace");
+
+ return UnitEntryPair;
+ }
+ default:
+ return UnitEntryPair;
+ }
+}
+
+Error SyntheticTypeNameBuilder::addParentName(
+ UnitEntryPairTy &InputUnitEntryPair) {
+ std::optional<UnitEntryPairTy> UnitEntryPair = InputUnitEntryPair.getParent();
+ if (!UnitEntryPair)
+ return Error::success();
+
+ UnitEntryPair = getTypeDeduplicationCandidate(*UnitEntryPair);
+ if (!UnitEntryPair)
+ return Error::success();
+
+ if (TypeEntry *ImmediateParentName =
+ UnitEntryPair->CU->getDieTypeEntry(UnitEntryPair->DieEntry)) {
+ SyntheticName += ImmediateParentName->getKey();
+ SyntheticName += ".";
+ return Error::success();
+ }
+
+ // Collect parent entries.
+ SmallVector<UnitEntryPairTy, 10> Parents;
+ do {
+ Parents.push_back(*UnitEntryPair);
+
+ UnitEntryPair = UnitEntryPair->getParent();
+ if (!UnitEntryPair)
+ break;
+
+ UnitEntryPair = getTypeDeduplicationCandidate(*UnitEntryPair);
+ if (!UnitEntryPair)
+ break;
+
+ } while (!UnitEntryPair->CU->getDieTypeEntry(UnitEntryPair->DieEntry));
+
+ // Assign name for each parent entry.
+ size_t NameStart = SyntheticName.size();
+ for (UnitEntryPairTy Parent : reverse(Parents)) {
+ SyntheticName.resize(NameStart);
+ if (Error Err = addDIETypeName(Parent, std::nullopt, true))
+ return Err;
+ }
+
+ // Add parents delimiter.
+ SyntheticName += ".";
+ return Error::success();
+}
+
+void SyntheticTypeNameBuilder::addDieNameFromDeclFileAndDeclLine(
+ UnitEntryPairTy &InputUnitEntryPair, bool &HasDeclFileName) {
+ if (std::optional<DWARFFormValue> DeclFileVal = InputUnitEntryPair.CU->find(
+ InputUnitEntryPair.DieEntry, dwarf::DW_AT_decl_file)) {
+ if (std::optional<DWARFFormValue> DeclLineVal = InputUnitEntryPair.CU->find(
+ InputUnitEntryPair.DieEntry, dwarf::DW_AT_decl_line)) {
+ if (std::optional<std::pair<StringRef, StringRef>> DirAndFilename =
+ InputUnitEntryPair.CU->getDirAndFilenameFromLineTable(
+ *DeclFileVal)) {
+ SyntheticName += DirAndFilename->first;
+ SyntheticName += DirAndFilename->second;
+
+ if (std::optional<uint64_t> DeclLineIntVal =
+ dwarf::toUnsigned(*DeclLineVal)) {
+ SyntheticName += " ";
+ SyntheticName += utohexstr(*DeclLineIntVal);
+ }
+
+ HasDeclFileName = true;
+ }
+ }
+ }
+}
+
+void SyntheticTypeNameBuilder::addValueName(UnitEntryPairTy InputUnitEntryPair,
+ dwarf::Attribute Attr) {
+ if (std::optional<DWARFFormValue> Val =
+ InputUnitEntryPair.CU->find(InputUnitEntryPair.DieEntry, Attr)) {
+ if (std::optional<uint64_t> ConstVal = Val->getAsUnsignedConstant()) {
+ SyntheticName += " ";
+ SyntheticName += std::to_string(*ConstVal);
+ } else if (std::optional<int64_t> ConstVal = Val->getAsSignedConstant()) {
+ SyntheticName += " ";
+ SyntheticName += std::to_string(*ConstVal);
+ }
+ }
+}
+
+Error SyntheticTypeNameBuilder::addReferencedODRDies(
+ UnitEntryPairTy InputUnitEntryPair, bool AssignNameToTypeDescriptor,
+ ArrayRef<dwarf::Attribute> ODRAttrs) {
+ bool FirstIteration = true;
+ for (dwarf::Attribute Attr : ODRAttrs) {
+ if (std::optional<DWARFFormValue> AttrValue =
+ InputUnitEntryPair.CU->find(InputUnitEntryPair.DieEntry, Attr)) {
+ std::optional<UnitEntryPairTy> RefDie =
+ InputUnitEntryPair.CU->resolveDIEReference(
+ *AttrValue, ResolveInterCUReferencesMode::Resolve);
+
+ if (!RefDie)
+ continue;
+
+ if (!RefDie->DieEntry)
+ return createStringError(std::errc::invalid_argument,
+ "Cann't resolve DIE reference");
+
+ if (!FirstIteration)
+ SyntheticName += ",";
+
+ RecursionDepth++;
+ if (RecursionDepth > 1000)
+ return createStringError(
+ std::errc::invalid_argument,
+ "Cann't parse input DWARF. Recursive dependence.");
+
+ if (Error Err =
+ addDIETypeName(*RefDie, std::nullopt, AssignNameToTypeDescriptor))
+ return Err;
+ RecursionDepth--;
+ FirstIteration = false;
+ }
+ }
+
+ return Error::success();
+}
+
+Error SyntheticTypeNameBuilder::addTypeName(UnitEntryPairTy InputUnitEntryPair,
+ bool AddParentNames) {
+ bool HasLinkageName = false;
+ bool HasShortName = false;
+ bool HasTemplatesInShortName = false;
+ bool HasDeclFileName = false;
+
+ // Try to get name from the DIE.
+ if (std::optional<DWARFFormValue> Val = InputUnitEntryPair.CU->find(
+ InputUnitEntryPair.DieEntry,
+ {dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_AT_linkage_name})) {
+ // Firstly check for linkage name.
+ SyntheticName += dwarf::toStringRef(Val);
+ HasLinkageName = true;
+ } else if (std::optional<DWARFFormValue> Val = InputUnitEntryPair.CU->find(
+ InputUnitEntryPair.DieEntry, dwarf::DW_AT_name)) {
+ // Then check for short name.
+ StringRef Name = dwarf::toStringRef(Val);
+ SyntheticName += Name;
+
+ HasShortName = true;
+ HasTemplatesInShortName =
+ Name.ends_with(">") && Name.count("<") != 0 && !Name.ends_with("<=>");
+ } else {
+ // Finally check for declaration attributes.
+ addDieNameFromDeclFileAndDeclLine(InputUnitEntryPair, HasDeclFileName);
+ }
+
+ // Add additional name parts for some DIEs.
+ switch (InputUnitEntryPair.DieEntry->getTag()) {
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_interface_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_subroutine_type:
+ case dwarf::DW_TAG_subprogram: {
+ if (InputUnitEntryPair.CU->find(InputUnitEntryPair.DieEntry,
+ dwarf::DW_AT_artificial))
+ SyntheticName += "^";
+
+ // No need to add signature information for linkage name,
+ // also no need to add template parameters name if short name already
+ // includes them.
+ if (!HasLinkageName)
+ if (Error Err =
+ addSignature(InputUnitEntryPair, !HasTemplatesInShortName))
+ return Err;
+ } break;
+ case dwarf::DW_TAG_coarray_type:
+ case dwarf::DW_TAG_array_type: {
+ addArrayDimension(InputUnitEntryPair);
+ } break;
+ case dwarf::DW_TAG_subrange_type: {
+ addValueName(InputUnitEntryPair, dwarf::DW_AT_count);
+ } break;
+ case dwarf::DW_TAG_template_value_parameter: {
+ if (!HasTemplatesInShortName) {
+ // TODO add support for DW_AT_location
+ addValueName(InputUnitEntryPair, dwarf::DW_AT_const_value);
+ }
+ } break;
+ default: {
+ // Nothing to do.
+ } break;
+ }
+
+ // If name for the DIE is not determined yet add referenced types to the name.
+ if (!HasLinkageName && !HasShortName && !HasDeclFileName) {
+ if (InputUnitEntryPair.CU->find(InputUnitEntryPair.DieEntry,
+ getODRAttributes()))
+ if (Error Err = addReferencedODRDies(InputUnitEntryPair, AddParentNames,
+ getODRAttributes()))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+Error SyntheticTypeNameBuilder::addDIETypeName(
+ UnitEntryPairTy InputUnitEntryPair,
+ std::optional<std::pair<size_t, size_t>> ChildIndex,
+ bool AssignNameToTypeDescriptor) {
+ std::optional<UnitEntryPairTy> UnitEntryPair =
+ getTypeDeduplicationCandidate(InputUnitEntryPair);
+ if (!UnitEntryPair)
+ return Error::success();
+
+ TypeEntry *TypeEntryPtr =
+ InputUnitEntryPair.CU->getDieTypeEntry(InputUnitEntryPair.DieEntry);
+ // Check if DIE already has a name.
+ if (!TypeEntryPtr) {
+ size_t NameStart = SyntheticName.size();
+ if (AssignNameToTypeDescriptor) {
+ if (Error Err = addParentName(*UnitEntryPair))
+ return Err;
+ }
+ addTypePrefix(UnitEntryPair->DieEntry);
+
+ if (ChildIndex) {
+ addOrderedName(*ChildIndex);
+ } else {
+ if (Error Err = addTypeName(*UnitEntryPair, AssignNameToTypeDescriptor))
+ return Err;
+ }
+
+ if (AssignNameToTypeDescriptor) {
+ // Add built name to the DIE.
+ TypeEntryPtr = TypePoolRef.insert(SyntheticName.substr(NameStart));
+ InputUnitEntryPair.CU->setDieTypeEntry(InputUnitEntryPair.DieEntry,
+ TypeEntryPtr);
+ }
+ } else
+ SyntheticName += TypeEntryPtr->getKey();
+
+ return Error::success();
+}
+
+void SyntheticTypeNameBuilder::addTypePrefix(
+ const DWARFDebugInfoEntry *DieEntry) {
+ switch (DieEntry->getTag()) {
+ case dwarf::DW_TAG_base_type: {
+ SyntheticName += "{0}";
+ } break;
+ case dwarf::DW_TAG_namespace: {
+ SyntheticName += "{1}";
+ } break;
+ case dwarf::DW_TAG_formal_parameter: {
+ SyntheticName += "{2}";
+ } break;
+ // dwarf::DW_TAG_unspecified_parameters have the same prefix as before.
+ case dwarf::DW_TAG_unspecified_parameters: {
+ SyntheticName += "{2}";
+ } break;
+ case dwarf::DW_TAG_template_type_parameter: {
+ SyntheticName += "{3}";
+ } break;
+ // dwarf::DW_TAG_template_value_parameter have the same prefix as before.
+ case dwarf::DW_TAG_template_value_parameter: {
+ SyntheticName += "{3}";
+ } break;
+ case dwarf::DW_TAG_GNU_formal_parameter_pack: {
+ SyntheticName += "{4}";
+ } break;
+ case dwarf::DW_TAG_GNU_template_parameter_pack: {
+ SyntheticName += "{5}";
+ } break;
+ case dwarf::DW_TAG_inheritance: {
+ SyntheticName += "{6}";
+ } break;
+ case dwarf::DW_TAG_array_type: {
+ SyntheticName += "{7}";
+ } break;
+ case dwarf::DW_TAG_class_type: {
+ SyntheticName += "{8}";
+ } break;
+ case dwarf::DW_TAG_enumeration_type: {
+ SyntheticName += "{9}";
+ } break;
+ case dwarf::DW_TAG_imported_declaration: {
+ SyntheticName += "{A}";
+ } break;
+ case dwarf::DW_TAG_member: {
+ SyntheticName += "{B}";
+ } break;
+ case dwarf::DW_TAG_pointer_type: {
+ SyntheticName += "{C}";
+ } break;
+ case dwarf::DW_TAG_reference_type: {
+ SyntheticName += "{D}";
+ } break;
+ case dwarf::DW_TAG_string_type: {
+ SyntheticName += "{E}";
+ } break;
+ case dwarf::DW_TAG_structure_type: {
+ SyntheticName += "{F}";
+ } break;
+ case dwarf::DW_TAG_subroutine_type: {
+ SyntheticName += "{G}";
+ } break;
+ case dwarf::DW_TAG_typedef: {
+ SyntheticName += "{H}";
+ } break;
+ case dwarf::DW_TAG_union_type: {
+ SyntheticName += "{I}";
+ } break;
+ case dwarf::DW_TAG_variant: {
+ SyntheticName += "{J}";
+ } break;
+ case dwarf::DW_TAG_inlined_subroutine: {
+ SyntheticName += "{K}";
+ } break;
+ case dwarf::DW_TAG_module: {
+ SyntheticName += "{L}";
+ } break;
+ case dwarf::DW_TAG_ptr_to_member_type: {
+ SyntheticName += "{M}";
+ } break;
+ case dwarf::DW_TAG_set_type: {
+ SyntheticName += "{N}";
+ } break;
+ case dwarf::DW_TAG_subrange_type: {
+ SyntheticName += "{O}";
+ } break;
+ case dwarf::DW_TAG_with_stmt: {
+ SyntheticName += "{P}";
+ } break;
+ case dwarf::DW_TAG_access_declaration: {
+ SyntheticName += "{Q}";
+ } break;
+ case dwarf::DW_TAG_catch_block: {
+ SyntheticName += "{R}";
+ } break;
+ case dwarf::DW_TAG_const_type: {
+ SyntheticName += "{S}";
+ } break;
+ case dwarf::DW_TAG_constant: {
+ SyntheticName += "{T}";
+ } break;
+ case dwarf::DW_TAG_enumerator: {
+ SyntheticName += "{U}";
+ } break;
+ case dwarf::DW_TAG_file_type: {
+ SyntheticName += "{V}";
+ } break;
+ case dwarf::DW_TAG_friend: {
+ SyntheticName += "{W}";
+ } break;
+ case dwarf::DW_TAG_namelist: {
+ SyntheticName += "{X}";
+ } break;
+ case dwarf::DW_TAG_namelist_item: {
+ SyntheticName += "{Y}";
+ } break;
+ case dwarf::DW_TAG_packed_type: {
+ SyntheticName += "{Z}";
+ } break;
+ case dwarf::DW_TAG_subprogram: {
+ SyntheticName += "{a}";
+ } break;
+ case dwarf::DW_TAG_thrown_type: {
+ SyntheticName += "{b}";
+ } break;
+ case dwarf::DW_TAG_variant_part: {
+ SyntheticName += "{c}";
+ } break;
+ case dwarf::DW_TAG_variable: {
+ SyntheticName += "{d}";
+ } break;
+ case dwarf::DW_TAG_volatile_type: {
+ SyntheticName += "{e}";
+ } break;
+ case dwarf::DW_TAG_dwarf_procedure: {
+ SyntheticName += "{f}";
+ } break;
+ case dwarf::DW_TAG_restrict_type: {
+ SyntheticName += "{g}";
+ } break;
+ case dwarf::DW_TAG_interface_type: {
+ SyntheticName += "{h}";
+ } break;
+ case dwarf::DW_TAG_imported_module: {
+ SyntheticName += "{i}";
+ } break;
+ case dwarf::DW_TAG_unspecified_type: {
+ SyntheticName += "{j}";
+ } break;
+ case dwarf::DW_TAG_imported_unit: {
+ SyntheticName += "{k}";
+ } break;
+ case dwarf::DW_TAG_condition: {
+ SyntheticName += "{l}";
+ } break;
+ case dwarf::DW_TAG_shared_type: {
+ SyntheticName += "{m}";
+ } break;
+ case dwarf::DW_TAG_rvalue_reference_type: {
+ SyntheticName += "{n}";
+ } break;
+ case dwarf::DW_TAG_template_alias: {
+ SyntheticName += "{o}";
+ } break;
+ case dwarf::DW_TAG_coarray_type: {
+ SyntheticName += "{p}";
+ } break;
+ case dwarf::DW_TAG_generic_subrange: {
+ SyntheticName += "{q}";
+ } break;
+ case dwarf::DW_TAG_dynamic_type: {
+ SyntheticName += "{r}";
+ } break;
+ case dwarf::DW_TAG_atomic_type: {
+ SyntheticName += "{s}";
+ } break;
+ case dwarf::DW_TAG_call_site: {
+ SyntheticName += "{t}";
+ } break;
+ case dwarf::DW_TAG_call_site_parameter: {
+ SyntheticName += "{u}";
+ } break;
+ case dwarf::DW_TAG_immutable_type: {
+ SyntheticName += "{v}";
+ } break;
+ case dwarf::DW_TAG_entry_point: {
+ SyntheticName += "{w}";
+ } break;
+ case dwarf::DW_TAG_label: {
+ SyntheticName += "{x}";
+ } break;
+ case dwarf::DW_TAG_lexical_block: {
+ SyntheticName += "{y}";
+ } break;
+ case dwarf::DW_TAG_common_block: {
+ SyntheticName += "{z}";
+ } break;
+ case dwarf::DW_TAG_common_inclusion: {
+ SyntheticName += "{|}";
+ } break;
+ case dwarf::DW_TAG_try_block: {
+ SyntheticName += "{~}";
+ } break;
+
+ case dwarf::DW_TAG_null: {
+ llvm_unreachable("No type prefix for DW_TAG_null");
+ } break;
+ case dwarf::DW_TAG_compile_unit: {
+ llvm_unreachable("No type prefix for DW_TAG_compile_unit");
+ } break;
+ case dwarf::DW_TAG_partial_unit: {
+ llvm_unreachable("No type prefix for DW_TAG_partial_unit");
+ } break;
+ case dwarf::DW_TAG_type_unit: {
+ llvm_unreachable("No type prefix for DW_TAG_type_unit");
+ } break;
+ case dwarf::DW_TAG_skeleton_unit: {
+ llvm_unreachable("No type prefix for DW_TAG_skeleton_unit");
+ } break;
+
+ default: {
+ SyntheticName += "{~~";
+ SyntheticName += utohexstr(DieEntry->getTag());
+ SyntheticName += "}";
+ } break;
+ }
+}
+
+OrderedChildrenIndexAssigner::OrderedChildrenIndexAssigner(
+ CompileUnit &CU, const DWARFDebugInfoEntry *DieEntry) {
+ switch (DieEntry->getTag()) {
+ case dwarf::DW_TAG_array_type:
+ case dwarf::DW_TAG_coarray_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_common_block:
+ case dwarf::DW_TAG_lexical_block:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_subprogram:
+ case dwarf::DW_TAG_subroutine_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_GNU_template_template_param:
+ case dwarf::DW_TAG_GNU_formal_parameter_pack: {
+ NeedCountChildren = true;
+ } break;
+ case dwarf::DW_TAG_enumeration_type: {
+ // TODO : do we need to add condition
+ NeedCountChildren = true;
+ } break;
+ default: {
+ // Nothing to do.
+ }
+ }
+
+ // Calculate maximal index value
+ if (NeedCountChildren) {
+ for (const DWARFDebugInfoEntry *CurChild = CU.getFirstChildEntry(DieEntry);
+ CurChild && CurChild->getAbbreviationDeclarationPtr();
+ CurChild = CU.getSiblingEntry(CurChild)) {
+ std::optional<size_t> ArrayIndex = tagToArrayIndex(CU, CurChild);
+ if (!ArrayIndex)
+ continue;
+
+ assert((*ArrayIndex < ChildIndexesWidth.size()) &&
+ "Wrong index for ChildIndexesWidth");
+ ChildIndexesWidth[*ArrayIndex]++;
+ }
+
+ // Calculate index field width(number of digits in hexadecimal
+ // representation).
+ for (size_t &Width : ChildIndexesWidth) {
+ size_t digitsCounter = 1;
+ size_t NumToCompare = 15;
+
+ while (NumToCompare < Width) {
+ NumToCompare <<= 4;
+ digitsCounter++;
+ }
+
+ Width = digitsCounter;
+ }
+ }
+}
+
+std::optional<size_t> OrderedChildrenIndexAssigner::tagToArrayIndex(
+ CompileUnit &CU, const DWARFDebugInfoEntry *DieEntry) {
+ if (!NeedCountChildren)
+ return std::nullopt;
+
+ switch (DieEntry->getTag()) {
+ case dwarf::DW_TAG_unspecified_parameters:
+ case dwarf::DW_TAG_formal_parameter:
+ return 0;
+ case dwarf::DW_TAG_template_value_parameter:
+ case dwarf::DW_TAG_template_type_parameter:
+ return 1;
+ case dwarf::DW_TAG_enumeration_type:
+ if (std::optional<uint32_t> ParentIdx = DieEntry->getParentIdx()) {
+ if (*ParentIdx && CU.getDebugInfoEntry(*ParentIdx)->getTag() ==
+ dwarf::DW_TAG_array_type)
+ return 2;
+ }
+ return std::nullopt;
+ case dwarf::DW_TAG_subrange_type:
+ return 3;
+ case dwarf::DW_TAG_generic_subrange:
+ return 4;
+ case dwarf::DW_TAG_enumerator:
+ return 5;
+ case dwarf::DW_TAG_namelist_item:
+ return 6;
+ case dwarf::DW_TAG_member:
+ return 7;
+ default:
+ return std::nullopt;
+ };
+}
+
+std::optional<std::pair<size_t, size_t>>
+OrderedChildrenIndexAssigner::getChildIndex(
+ CompileUnit &CU, const DWARFDebugInfoEntry *ChildDieEntry) {
+ std::optional<size_t> ArrayIndex = tagToArrayIndex(CU, ChildDieEntry);
+ if (!ArrayIndex)
+ return std::nullopt;
+
+ assert((*ArrayIndex < OrderedChildIdxs.size()) &&
+ "Wrong index for ChildIndexesWidth");
+ assert(ChildIndexesWidth[*ArrayIndex] < 16 &&
+ "Index width exceeds 16 digits.");
+
+ std::pair<size_t, size_t> Result = std::make_pair(
+ OrderedChildIdxs[*ArrayIndex], ChildIndexesWidth[*ArrayIndex]);
+ OrderedChildIdxs[*ArrayIndex]++;
+ return Result;
+}
+
+} // end of namespace dwarflinker_parallel
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.h
new file mode 100644
index 000000000000..c9dce4e94fb0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.h
@@ -0,0 +1,155 @@
+//===- SyntheticTypeNameBuilder.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===/
+
+#ifndef LLVM_LIB_DWARFLINKERNEXT_SYNTHETICTYPENAMEBUILDER_H
+#define LLVM_LIB_DWARFLINKERNEXT_SYNTHETICTYPENAMEBUILDER_H
+
+#include "DWARFLinkerCompileUnit.h"
+#include "DWARFLinkerGlobalData.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+class DWARFDebugInfoEntry;
+
+namespace dwarflinker_parallel {
+struct LinkContext;
+class TypeTableUnit;
+class CompileUnit;
+
+/// The helper class to build type name based on DIE properties.
+/// It builds synthetic name based on explicit attributes: DW_AT_name,
+/// DW_AT_linkage_name or based on implicit attributes(DW_AT_decl*).
+/// Names for specific DIEs(like subprograms, template classes...) include
+/// additional attributes: subprogram parameters, template parameters,
+/// array ranges. Examples of built name:
+///
+/// class A { } : {8}A
+///
+/// namspace llvm { class A { } } : {1}llvm{8}A
+///
+/// template <int> structure B { } : {F}B<{0}int>
+///
+/// void foo ( int p1, float p3 ) : {a}void foo({0}int, {0}int)
+///
+/// int *ptr; : {c}ptr {0}int
+///
+/// int var; : {d}var
+///
+/// These names is used to refer DIEs describing types.
+class SyntheticTypeNameBuilder {
+public:
+ SyntheticTypeNameBuilder(TypePool &TypePoolRef) : TypePoolRef(TypePoolRef) {}
+
+ /// Create synthetic name for the specified DIE \p InputUnitEntryPair
+ /// and assign created name to the DIE type info. \p ChildIndex is used
+ /// to create name for ordered DIEs(function arguments f.e.).
+ Error assignName(UnitEntryPairTy InputUnitEntryPair,
+ std::optional<std::pair<size_t, size_t>> ChildIndex);
+
+protected:
+ /// Add array type dimension.
+ void addArrayDimension(UnitEntryPairTy InputUnitEntryPair);
+
+ /// Add signature( entry type plus type of parameters plus type of template
+ /// parameters(if \p addTemplateParameters is true).
+ Error addSignature(UnitEntryPairTy InputUnitEntryPair,
+ bool addTemplateParameters);
+
+ /// Add specified \p FunctionParameters to the built name.
+ Error addParamNames(
+ CompileUnit &CU,
+ SmallVector<const DWARFDebugInfoEntry *, 20> &FunctionParameters);
+
+ /// Add specified \p TemplateParameters to the built name.
+ Error addTemplateParamNames(
+ CompileUnit &CU,
+ SmallVector<const DWARFDebugInfoEntry *, 10> &TemplateParameters);
+
+ /// Add ordered name to the built name.
+ void addOrderedName(CompileUnit &CU, const DWARFDebugInfoEntry *DieEntry);
+
+ /// Analyze \p InputUnitEntryPair's ODR attributes and put names
+ /// of the referenced type dies to the built name.
+ Error addReferencedODRDies(UnitEntryPairTy InputUnitEntryPair,
+ bool AssignNameToTypeDescriptor,
+ ArrayRef<dwarf::Attribute> ODRAttrs);
+
+ /// Add names of parent dies to the built name.
+ Error addParentName(UnitEntryPairTy &InputUnitEntryPair);
+
+ /// \returns synthetic name of the specified \p DieEntry.
+ /// The name is constructed from the dwarf::DW_AT_decl_file
+ /// and dwarf::DW_AT_decl_line attributes.
+ void addDieNameFromDeclFileAndDeclLine(UnitEntryPairTy &InputUnitEntryPair,
+ bool &HasDeclFileName);
+
+ /// Add type prefix to the built name.
+ void addTypePrefix(const DWARFDebugInfoEntry *DieEntry);
+
+ /// Add type name to the built name.
+ Error addTypeName(UnitEntryPairTy InputUnitEntryPair, bool AddParentNames);
+
+ /// Analyze \p InputUnitEntryPair for the type name and possibly assign
+ /// built type name to the DIE's type info.
+ /// NOTE: while analyzing types we may create different kind of names
+ /// for the same type depending on whether the type is part of another type.
+ /// f.e. DW_TAG_formal_parameter would receive "{02}01" name when
+ /// examined alone. Or "{0}int" name when it is a part of a function name:
+ /// {a}void foo({0}int). The \p AssignNameToTypeDescriptor tells whether
+ /// the type name is part of another type name and then should not be assigned
+ /// to DIE type descriptor.
+ Error addDIETypeName(UnitEntryPairTy InputUnitEntryPair,
+ std::optional<std::pair<size_t, size_t>> ChildIndex,
+ bool AssignNameToTypeDescriptor);
+
+ /// Add ordered name to the built name.
+ void addOrderedName(std::pair<size_t, size_t> ChildIdx);
+
+ /// Add value name to the built name.
+ void addValueName(UnitEntryPairTy InputUnitEntryPair, dwarf::Attribute Attr);
+
+ /// Buffer keeping bult name.
+ SmallString<1000> SyntheticName;
+
+ /// Recursion counter
+ size_t RecursionDepth = 0;
+
+ /// Type pool
+ TypePool &TypePoolRef;
+};
+
+/// This class helps to assign indexes for DIE children.
+/// Indexes are used to create type name for children which
+/// should be presented in the original order(function parameters,
+/// array dimensions, enumeration members, class/structure members).
+class OrderedChildrenIndexAssigner {
+public:
+ OrderedChildrenIndexAssigner(CompileUnit &CU,
+ const DWARFDebugInfoEntry *DieEntry);
+
+ /// Returns index of the specified child and width of hexadecimal
+ /// representation.
+ std::optional<std::pair<size_t, size_t>>
+ getChildIndex(CompileUnit &CU, const DWARFDebugInfoEntry *ChildDieEntry);
+
+protected:
+ using OrderedChildrenIndexesArrayTy = std::array<size_t, 8>;
+
+ std::optional<size_t> tagToArrayIndex(CompileUnit &CU,
+ const DWARFDebugInfoEntry *DieEntry);
+
+ bool NeedCountChildren = false;
+ OrderedChildrenIndexesArrayTy OrderedChildIdxs = {0};
+ OrderedChildrenIndexesArrayTy ChildIndexesWidth = {0};
+};
+
+} // end namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERNEXT_SYNTHETICTYPENAMEBUILDER_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/TypePool.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/TypePool.h
new file mode 100644
index 000000000000..bbb3261027ce
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/TypePool.h
@@ -0,0 +1,177 @@
+//===- TypePool.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DWARFLINKERPARALLEL_TYPEPOOL_H
+#define LLVM_DWARFLINKERPARALLEL_TYPEPOOL_H
+
+#include "ArrayList.h"
+#include "llvm/ADT/ConcurrentHashtable.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/Support/Allocator.h"
+#include <atomic>
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+class TypePool;
+class CompileUnit;
+class TypeEntryBody;
+
+using TypeEntry = StringMapEntry<std::atomic<TypeEntryBody *>>;
+
+/// Keeps cloned data for the type DIE.
+class TypeEntryBody {
+public:
+ /// Returns copy of type DIE which should be emitted into resulting file.
+ DIE &getFinalDie() const {
+ if (Die)
+ return *Die;
+
+ assert(DeclarationDie);
+ return *DeclarationDie;
+ }
+
+ /// Returns true if type die entry has only declaration die.
+ bool hasOnlyDeclaration() const { return Die == nullptr; }
+
+ /// Creates type DIE for the specified name.
+ static TypeEntryBody *create(parallel::PerThreadBumpPtrAllocator &Allocator) {
+ TypeEntryBody *Result = Allocator.Allocate<TypeEntryBody>();
+ new (Result) TypeEntryBody(Allocator);
+ return Result;
+ }
+
+ /// TypeEntryBody keeps partially cloned DIEs corresponding to this type.
+ /// The two kinds of DIE can be kept: declaration and definition.
+ /// If definition DIE was met while parsing input DWARF then this DIE would
+ /// be used as a final DIE for this type. If definition DIE is not met then
+ /// declaration DIE would be used as a final DIE.
+
+ // Keeps definition die.
+ std::atomic<DIE *> Die = {nullptr};
+
+ // Keeps declaration die.
+ std::atomic<DIE *> DeclarationDie = {nullptr};
+
+ // True if parent type die is declaration.
+ std::atomic<bool> ParentIsDeclaration = {true};
+
+ /// Children for current type.
+ ArrayList<TypeEntry *, 5> Children;
+
+protected:
+ TypeEntryBody() = delete;
+ TypeEntryBody(const TypeEntryBody &RHS) = delete;
+ TypeEntryBody(TypeEntryBody &&RHS) = delete;
+ TypeEntryBody &operator=(const TypeEntryBody &RHS) = delete;
+ TypeEntryBody &operator=(const TypeEntryBody &&RHS) = delete;
+
+ TypeEntryBody(parallel::PerThreadBumpPtrAllocator &Allocator)
+ : Children(&Allocator) {}
+};
+
+class TypeEntryInfo {
+public:
+ /// \returns Hash value for the specified \p Key.
+ static inline uint64_t getHashValue(const StringRef &Key) {
+ return xxh3_64bits(Key);
+ }
+
+ /// \returns true if both \p LHS and \p RHS are equal.
+ static inline bool isEqual(const StringRef &LHS, const StringRef &RHS) {
+ return LHS == RHS;
+ }
+
+ /// \returns key for the specified \p KeyData.
+ static inline StringRef getKey(const TypeEntry &KeyData) {
+ return KeyData.getKey();
+ }
+
+ /// \returns newly created object of KeyDataTy type.
+ static inline TypeEntry *
+ create(const StringRef &Key, parallel::PerThreadBumpPtrAllocator &Allocator) {
+ return TypeEntry::create(Key, Allocator);
+ }
+};
+
+/// TypePool keeps type descriptors which contain partially cloned DIE
+/// correspinding to each type. Types are identified by names.
+class TypePool : ConcurrentHashTableByPtr<StringRef, TypeEntry,
+ parallel::PerThreadBumpPtrAllocator,
+ TypeEntryInfo> {
+public:
+ TypePool()
+ : ConcurrentHashTableByPtr<StringRef, TypeEntry,
+ parallel::PerThreadBumpPtrAllocator,
+ TypeEntryInfo>(Allocator) {
+ Root = TypeEntry::create("", Allocator);
+ Root->getValue().store(TypeEntryBody::create(Allocator));
+ }
+
+ TypeEntry *insert(StringRef Name) {
+ return ConcurrentHashTableByPtr<StringRef, TypeEntry,
+ parallel::PerThreadBumpPtrAllocator,
+ TypeEntryInfo>::insert(Name)
+ .first;
+ }
+
+ /// Create or return existing type entry body for the specified \p Entry.
+ /// Link that entry as child for the specified \p ParentEntry.
+ /// \returns The existing or created type entry body.
+ TypeEntryBody *getOrCreateTypeEntryBody(TypeEntry *Entry,
+ TypeEntry *ParentEntry) {
+ TypeEntryBody *DIE = Entry->getValue().load();
+ if (DIE)
+ return DIE;
+
+ TypeEntryBody *NewDIE = TypeEntryBody::create(Allocator);
+ if (Entry->getValue().compare_exchange_weak(DIE, NewDIE)) {
+ ParentEntry->getValue().load()->Children.add(Entry);
+ return NewDIE;
+ }
+
+ return DIE;
+ }
+
+ /// Sort children for each kept type entry.
+ void sortTypes() {
+ std::function<void(TypeEntry * Entry)> SortChildrenRec =
+ [&](TypeEntry *Entry) {
+ Entry->getValue().load()->Children.sort(TypesComparator);
+ Entry->getValue().load()->Children.forEach(SortChildrenRec);
+ };
+
+ SortChildrenRec(getRoot());
+ }
+
+ /// Return root for all type entries.
+ TypeEntry *getRoot() const { return Root; }
+
+ /// Return thread local allocator used by pool.
+ BumpPtrAllocator &getThreadLocalAllocator() {
+ return Allocator.getThreadLocalAllocator();
+ }
+
+protected:
+ std::function<bool(const TypeEntry *LHS, const TypeEntry *RHS)>
+ TypesComparator = [](const TypeEntry *LHS, const TypeEntry *RHS) -> bool {
+ return LHS->getKey() < RHS->getKey();
+ };
+
+ // Root of all type entries.
+ TypeEntry *Root = nullptr;
+
+private:
+ parallel::PerThreadBumpPtrAllocator Allocator;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_DWARFLINKERPARALLEL_TYPEPOOL_H
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/Utils.h b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/Utils.h
new file mode 100644
index 000000000000..91f9dca46a82
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinkerParallel/Utils.h
@@ -0,0 +1,40 @@
+//===- Utils.h --------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_UTILS_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_UTILS_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// This function calls \p Iteration() until it returns false.
+/// If number of iterations exceeds \p MaxCounter then an Error is returned.
+/// This function should be used for loops which assumed to have number of
+/// iterations significantly smaller than \p MaxCounter to avoid infinite
+/// looping in error cases.
+inline Error finiteLoop(function_ref<Expected<bool>()> Iteration,
+ size_t MaxCounter = 100000) {
+ size_t iterationsCounter = 0;
+ while (iterationsCounter++ < MaxCounter) {
+ Expected<bool> IterationResultOrError = Iteration();
+ if (!IterationResultOrError)
+ return IterationResultOrError.takeError();
+
+ if (!IterationResultOrError.get())
+ return Error::success();
+ }
+
+ return createStringError(std::errc::invalid_argument, "Infinite recursion");
+}
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_UTILS_H
diff --git a/contrib/llvm-project/llvm/lib/DWP/DWP.cpp b/contrib/llvm-project/llvm/lib/DWP/DWP.cpp
index 89101ca7e573..77bd22d1f071 100644
--- a/contrib/llvm-project/llvm/lib/DWP/DWP.cpp
+++ b/contrib/llvm-project/llvm/lib/DWP/DWP.cpp
@@ -183,14 +183,19 @@ static StringRef getSubsection(StringRef Section,
static Error sectionOverflowErrorOrWarning(uint32_t PrevOffset,
uint32_t OverflowedOffset,
StringRef SectionName,
- bool ContinueOnCuIndexOverflow) {
+ OnCuIndexOverflow OverflowOptValue,
+ bool &AnySectionOverflow) {
std::string Msg =
(SectionName +
Twine(" Section Contribution Offset overflow 4G. Previous Offset ") +
Twine(PrevOffset) + Twine(", After overflow offset ") +
Twine(OverflowedOffset) + Twine("."))
.str();
- if (ContinueOnCuIndexOverflow) {
+ if (OverflowOptValue == OnCuIndexOverflow::Continue) {
+ WithColor::defaultWarningHandler(make_error<DWPError>(Msg));
+ return Error::success();
+ } else if (OverflowOptValue == OnCuIndexOverflow::SoftStop) {
+ AnySectionOverflow = true;
WithColor::defaultWarningHandler(make_error<DWPError>(Msg));
return Error::success();
}
@@ -201,7 +206,8 @@ static Error addAllTypesFromDWP(
MCStreamer &Out, MapVector<uint64_t, UnitIndexEntry> &TypeIndexEntries,
const DWARFUnitIndex &TUIndex, MCSection *OutputTypes, StringRef Types,
const UnitIndexEntry &TUEntry, uint32_t &TypesOffset,
- unsigned TypesContributionIndex, bool ContinueOnCuIndexOverflow) {
+ unsigned TypesContributionIndex, OnCuIndexOverflow OverflowOptValue,
+ bool &AnySectionOverflow) {
Out.switchSection(OutputTypes);
for (const DWARFUnitIndex::Entry &E : TUIndex.getRows()) {
auto *I = E.getContributions();
@@ -232,9 +238,14 @@ static Error addAllTypesFromDWP(
static_assert(sizeof(OldOffset) == sizeof(TypesOffset));
TypesOffset += C.getLength();
if (OldOffset > TypesOffset) {
- if (Error Err = sectionOverflowErrorOrWarning(
- OldOffset, TypesOffset, "Types", ContinueOnCuIndexOverflow))
+ if (Error Err = sectionOverflowErrorOrWarning(OldOffset, TypesOffset,
+ "Types", OverflowOptValue,
+ AnySectionOverflow))
return Err;
+ if (AnySectionOverflow) {
+ TypesOffset = OldOffset;
+ return Error::success();
+ }
}
}
return Error::success();
@@ -244,7 +255,7 @@ static Error addAllTypesFromTypesSection(
MCStreamer &Out, MapVector<uint64_t, UnitIndexEntry> &TypeIndexEntries,
MCSection *OutputTypes, const std::vector<StringRef> &TypesSections,
const UnitIndexEntry &CUEntry, uint32_t &TypesOffset,
- bool ContinueOnCuIndexOverflow) {
+ OnCuIndexOverflow OverflowOptValue, bool &AnySectionOverflow) {
for (StringRef Types : TypesSections) {
Out.switchSection(OutputTypes);
uint64_t Offset = 0;
@@ -273,9 +284,14 @@ static Error addAllTypesFromTypesSection(
uint32_t OldOffset = TypesOffset;
TypesOffset += C.getLength32();
if (OldOffset > TypesOffset) {
- if (Error Err = sectionOverflowErrorOrWarning(
- OldOffset, TypesOffset, "types", ContinueOnCuIndexOverflow))
+ if (Error Err = sectionOverflowErrorOrWarning(OldOffset, TypesOffset,
+ "Types", OverflowOptValue,
+ AnySectionOverflow))
return Err;
+ if (AnySectionOverflow) {
+ TypesOffset = OldOffset;
+ return Error::success();
+ }
}
}
}
@@ -583,7 +599,7 @@ Error handleSection(
}
Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
- bool ContinueOnCuIndexOverflow) {
+ OnCuIndexOverflow OverflowOptValue) {
const auto &MCOFI = *Out.getContext().getObjectFileInfo();
MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
@@ -613,6 +629,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
uint32_t ContributionOffsets[8] = {};
uint16_t Version = 0;
uint32_t IndexVersion = 0;
+ bool AnySectionOverflow = false;
DWPStringPool Strings(Out, StrSection);
@@ -687,12 +704,15 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
uint32_t SectionIndex = 0;
for (auto &Section : Obj.sections()) {
if (SectionIndex == Index) {
- return sectionOverflowErrorOrWarning(
- OldOffset, ContributionOffsets[Index], *Section.getName(),
- ContinueOnCuIndexOverflow);
+ if (Error Err = sectionOverflowErrorOrWarning(
+ OldOffset, ContributionOffsets[Index], *Section.getName(),
+ OverflowOptValue, AnySectionOverflow))
+ return Err;
}
++SectionIndex;
}
+ if (AnySectionOverflow)
+ break;
}
}
@@ -720,8 +740,14 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
C.getLength32()) {
if (Error Err = sectionOverflowErrorOrWarning(
InfoSectionOffset, InfoSectionOffset + C.getLength32(),
- "debug_info", ContinueOnCuIndexOverflow))
+ "debug_info", OverflowOptValue, AnySectionOverflow))
return Err;
+ if (AnySectionOverflow) {
+ if (Header.Version < 5 ||
+ Header.UnitType == dwarf::DW_UT_split_compile)
+ FoundCUUnit = true;
+ break;
+ }
}
UnitOffset += C.getLength32();
@@ -752,6 +778,8 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
Info.substr(UnitOffset - C.getLength32(), C.getLength32()));
InfoSectionOffset += C.getLength32();
}
+ if (AnySectionOverflow)
+ break;
}
if (!FoundCUUnit)
@@ -762,9 +790,11 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
if (Error Err = addAllTypesFromTypesSection(
Out, TypeIndexEntries, TypesSection, CurTypesSection, CurEntry,
ContributionOffsets[getContributionIndex(DW_SECT_EXT_TYPES, 2)],
- ContinueOnCuIndexOverflow))
+ OverflowOptValue, AnySectionOverflow))
return Err;
}
+ if (AnySectionOverflow)
+ break;
continue;
}
@@ -860,9 +890,11 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
if (Error Err = addAllTypesFromDWP(
Out, TypeIndexEntries, TUIndex, OutSection, TypeInputSection,
CurEntry, ContributionOffsets[TypesContributionIndex],
- TypesContributionIndex, ContinueOnCuIndexOverflow))
+ TypesContributionIndex, OverflowOptValue, AnySectionOverflow))
return Err;
}
+ if (AnySectionOverflow)
+ break;
}
if (Version < 5) {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFContext.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFContext.cpp
index 24898739b824..2e651cb378db 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFContext.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFContext.cpp
@@ -63,7 +63,9 @@ std::unique_ptr<BTFContext>
BTFContext::create(const ObjectFile &Obj,
std::function<void(Error)> ErrorHandler) {
auto Ctx = std::make_unique<BTFContext>();
- if (Error E = Ctx->BTF.parse(Obj))
+ BTFParser::ParseOptions Opts;
+ Opts.LoadLines = true;
+ if (Error E = Ctx->BTF.parse(Obj, Opts))
ErrorHandler(std::move(E));
return Ctx;
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFParser.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFParser.cpp
index 6151e1b15cbb..4fc31a445603 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFParser.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/BTF/BTFParser.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/BTF/BTFParser.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
#define DEBUG_TYPE "debug-info-btf-parser"
@@ -74,11 +76,13 @@ public:
// Used by BTFParser::parse* auxiliary functions.
struct BTFParser::ParseContext {
const ObjectFile &Obj;
+ const ParseOptions &Opts;
// Map from ELF section name to SectionRef
DenseMap<StringRef, SectionRef> Sections;
public:
- ParseContext(const ObjectFile &Obj) : Obj(Obj) {}
+ ParseContext(const ObjectFile &Obj, const ParseOptions &Opts)
+ : Obj(Obj), Opts(Opts) {}
Expected<DataExtractor> makeExtractor(SectionRef Sec) {
Expected<StringRef> Contents = Sec.getContents();
@@ -119,19 +123,126 @@ Error BTFParser::parseBTF(ParseContext &Ctx, SectionRef BTF) {
return Err(".BTF", C);
if (HdrLen < 8)
return Err("unexpected .BTF header length: ") << HdrLen;
- (void)Extractor.getU32(C); // type_off
- (void)Extractor.getU32(C); // type_len
+ uint32_t TypeOff = Extractor.getU32(C);
+ uint32_t TypeLen = Extractor.getU32(C);
uint32_t StrOff = Extractor.getU32(C);
uint32_t StrLen = Extractor.getU32(C);
uint32_t StrStart = HdrLen + StrOff;
uint32_t StrEnd = StrStart + StrLen;
+ uint32_t TypesInfoStart = HdrLen + TypeOff;
+ uint32_t TypesInfoEnd = TypesInfoStart + TypeLen;
+ uint32_t BytesExpected = std::max(StrEnd, TypesInfoEnd);
if (!C)
return Err(".BTF", C);
- if (Extractor.getData().size() < StrEnd)
+ if (Extractor.getData().size() < BytesExpected)
return Err("invalid .BTF section size, expecting at-least ")
- << StrEnd << " bytes";
+ << BytesExpected << " bytes";
+
+ StringsTable = Extractor.getData().slice(StrStart, StrEnd);
+
+ if (TypeLen > 0 && Ctx.Opts.LoadTypes) {
+ StringRef RawData = Extractor.getData().slice(TypesInfoStart, TypesInfoEnd);
+ if (Error E = parseTypesInfo(Ctx, TypesInfoStart, RawData))
+ return E;
+ }
+
+ return Error::success();
+}
+
+// Compute record size for each BTF::CommonType sub-type
+// (including entries in the tail position).
+static size_t byteSize(BTF::CommonType *Type) {
+ size_t Size = sizeof(BTF::CommonType);
+ switch (Type->getKind()) {
+ case BTF::BTF_KIND_INT:
+ Size += sizeof(uint32_t);
+ break;
+ case BTF::BTF_KIND_ARRAY:
+ Size += sizeof(BTF::BTFArray);
+ break;
+ case BTF::BTF_KIND_VAR:
+ Size += sizeof(uint32_t);
+ break;
+ case BTF::BTF_KIND_DECL_TAG:
+ Size += sizeof(uint32_t);
+ break;
+ case BTF::BTF_KIND_STRUCT:
+ case BTF::BTF_KIND_UNION:
+ Size += sizeof(BTF::BTFMember) * Type->getVlen();
+ break;
+ case BTF::BTF_KIND_ENUM:
+ Size += sizeof(BTF::BTFEnum) * Type->getVlen();
+ break;
+ case BTF::BTF_KIND_ENUM64:
+ Size += sizeof(BTF::BTFEnum64) * Type->getVlen();
+ break;
+ case BTF::BTF_KIND_FUNC_PROTO:
+ Size += sizeof(BTF::BTFParam) * Type->getVlen();
+ break;
+ case BTF::BTF_KIND_DATASEC:
+ Size += sizeof(BTF::BTFDataSec) * Type->getVlen();
+ break;
+ }
+ return Size;
+}
+
+// Guard value for voids, simplifies code a bit, but NameOff is not
+// actually valid.
+const BTF::CommonType VoidTypeInst = {0, BTF::BTF_KIND_UNKN << 24, {0}};
+
+// Type information "parsing" is very primitive:
+// - The `RawData` is copied to a buffer owned by `BTFParser` instance.
+// - The buffer is treated as an array of `uint32_t` values, each value
+// is swapped to use native endianness. This is possible, because
+// according to BTF spec all buffer elements are structures comprised
+// of `uint32_t` fields.
+// - `BTFParser::Types` vector is filled with pointers to buffer
+// elements, using `byteSize()` function to slice the buffer at type
+// record boundaries.
+// - If at some point a type definition with incorrect size (logical size
+// exceeding buffer boundaries) is reached it is not added to the
+// `BTFParser::Types` vector and the process stops.
+Error BTFParser::parseTypesInfo(ParseContext &Ctx, uint64_t TypesInfoStart,
+ StringRef RawData) {
+ using support::endian::byte_swap;
+
+ TypesBuffer = OwningArrayRef<uint8_t>(arrayRefFromStringRef(RawData));
+ // Switch endianness if necessary.
+ endianness Endianness = Ctx.Obj.isLittleEndian() ? llvm::endianness::little
+ : llvm::endianness::big;
+ uint32_t *TypesBuffer32 = (uint32_t *)TypesBuffer.data();
+ for (uint64_t I = 0; I < TypesBuffer.size() / 4; ++I)
+ TypesBuffer32[I] = byte_swap(TypesBuffer32[I], Endianness);
+
+ // The type id 0 is reserved for void type.
+ Types.push_back(&VoidTypeInst);
+
+ uint64_t Pos = 0;
+ while (Pos < RawData.size()) {
+ uint64_t BytesLeft = RawData.size() - Pos;
+ uint64_t Offset = TypesInfoStart + Pos;
+ BTF::CommonType *Type = (BTF::CommonType *)&TypesBuffer[Pos];
+ if (BytesLeft < sizeof(*Type))
+ return Err("incomplete type definition in .BTF section:")
+ << " offset " << Offset << ", index " << Types.size();
+
+ uint64_t Size = byteSize(Type);
+ if (BytesLeft < Size)
+ return Err("incomplete type definition in .BTF section:")
+ << " offset=" << Offset << ", index=" << Types.size()
+ << ", vlen=" << Type->getVlen();
+
+ LLVM_DEBUG({
+ llvm::dbgs() << "Adding BTF type:\n"
+ << " Id = " << Types.size() << "\n"
+ << " Kind = " << Type->getKind() << "\n"
+ << " Name = " << findString(Type->NameOff) << "\n"
+ << " Record Size = " << Size << "\n";
+ });
+ Types.push_back(Type);
+ Pos += Size;
+ }
- StringsTable = Extractor.getData().substr(StrStart, StrLen);
return Error::success();
}
@@ -162,12 +273,24 @@ Error BTFParser::parseBTFExt(ParseContext &Ctx, SectionRef BTFExt) {
(void)Extractor.getU32(C); // func_info_len
uint32_t LineInfoOff = Extractor.getU32(C);
uint32_t LineInfoLen = Extractor.getU32(C);
+ uint32_t RelocInfoOff = Extractor.getU32(C);
+ uint32_t RelocInfoLen = Extractor.getU32(C);
if (!C)
return Err(".BTF.ext", C);
- uint32_t LineInfoStart = HdrLen + LineInfoOff;
- uint32_t LineInfoEnd = LineInfoStart + LineInfoLen;
- if (Error E = parseLineInfo(Ctx, Extractor, LineInfoStart, LineInfoEnd))
- return E;
+
+ if (LineInfoLen > 0 && Ctx.Opts.LoadLines) {
+ uint32_t LineInfoStart = HdrLen + LineInfoOff;
+ uint32_t LineInfoEnd = LineInfoStart + LineInfoLen;
+ if (Error E = parseLineInfo(Ctx, Extractor, LineInfoStart, LineInfoEnd))
+ return E;
+ }
+
+ if (RelocInfoLen > 0 && Ctx.Opts.LoadRelocs) {
+ uint32_t RelocInfoStart = HdrLen + RelocInfoOff;
+ uint32_t RelocInfoEnd = RelocInfoStart + RelocInfoLen;
+ if (Error E = parseRelocInfo(Ctx, Extractor, RelocInfoStart, RelocInfoEnd))
+ return E;
+ }
return Error::success();
}
@@ -214,11 +337,52 @@ Error BTFParser::parseLineInfo(ParseContext &Ctx, DataExtractor &Extractor,
return Error::success();
}
-Error BTFParser::parse(const ObjectFile &Obj) {
+Error BTFParser::parseRelocInfo(ParseContext &Ctx, DataExtractor &Extractor,
+ uint64_t RelocInfoStart,
+ uint64_t RelocInfoEnd) {
+ DataExtractor::Cursor C = DataExtractor::Cursor(RelocInfoStart);
+ uint32_t RecSize = Extractor.getU32(C);
+ if (!C)
+ return Err(".BTF.ext", C);
+ if (RecSize < 16)
+ return Err("unexpected .BTF.ext field reloc info record length: ")
+ << RecSize;
+ while (C && C.tell() < RelocInfoEnd) {
+ uint32_t SecNameOff = Extractor.getU32(C);
+ uint32_t NumInfo = Extractor.getU32(C);
+ StringRef SecName = findString(SecNameOff);
+ std::optional<SectionRef> Sec = Ctx.findSection(SecName);
+ BTFRelocVector &Relocs = SectionRelocs[Sec->getIndex()];
+ for (uint32_t I = 0; C && I < NumInfo; ++I) {
+ uint64_t RecStart = C.tell();
+ uint32_t InsnOff = Extractor.getU32(C);
+ uint32_t TypeID = Extractor.getU32(C);
+ uint32_t OffsetNameOff = Extractor.getU32(C);
+ uint32_t RelocKind = Extractor.getU32(C);
+ if (!C)
+ return Err(".BTF.ext", C);
+ Relocs.push_back({InsnOff, TypeID, OffsetNameOff, RelocKind});
+ C.seek(RecStart + RecSize);
+ }
+ llvm::stable_sort(
+ Relocs, [](const BTF::BPFFieldReloc &L, const BTF::BPFFieldReloc &R) {
+ return L.InsnOffset < R.InsnOffset;
+ });
+ }
+ if (!C)
+ return Err(".BTF.ext", C);
+
+ return Error::success();
+}
+
+Error BTFParser::parse(const ObjectFile &Obj, const ParseOptions &Opts) {
StringsTable = StringRef();
SectionLines.clear();
+ SectionRelocs.clear();
+ Types.clear();
+ TypesBuffer = OwningArrayRef<uint8_t>();
- ParseContext Ctx(Obj);
+ ParseContext Ctx(Obj, Opts);
std::optional<SectionRef> BTF;
std::optional<SectionRef> BTFExt;
for (SectionRef Sec : Obj.sections()) {
@@ -264,20 +428,430 @@ StringRef BTFParser::findString(uint32_t Offset) const {
return StringsTable.slice(Offset, StringsTable.find(0, Offset));
}
-const BTF::BPFLineInfo *
-BTFParser::findLineInfo(SectionedAddress Address) const {
- auto MaybeSecInfo = SectionLines.find(Address.SectionIndex);
- if (MaybeSecInfo == SectionLines.end())
+template <typename T>
+static const T *findInfo(const DenseMap<uint64_t, SmallVector<T, 0>> &SecMap,
+ SectionedAddress Address) {
+ auto MaybeSecInfo = SecMap.find(Address.SectionIndex);
+ if (MaybeSecInfo == SecMap.end())
return nullptr;
- const BTFLinesVector &SecInfo = MaybeSecInfo->second;
+ const SmallVector<T, 0> &SecInfo = MaybeSecInfo->second;
const uint64_t TargetOffset = Address.Address;
- BTFLinesVector::const_iterator LineInfo =
- llvm::partition_point(SecInfo, [=](const BTF::BPFLineInfo &Line) {
- return Line.InsnOffset < TargetOffset;
- });
- if (LineInfo == SecInfo.end() || LineInfo->InsnOffset != Address.Address)
+ typename SmallVector<T, 0>::const_iterator MaybeInfo = llvm::partition_point(
+ SecInfo, [=](const T &Entry) { return Entry.InsnOffset < TargetOffset; });
+ if (MaybeInfo == SecInfo.end() || MaybeInfo->InsnOffset != Address.Address)
return nullptr;
- return LineInfo;
+ return &*MaybeInfo;
+}
+
+const BTF::BPFLineInfo *
+BTFParser::findLineInfo(SectionedAddress Address) const {
+ return findInfo(SectionLines, Address);
+}
+
+const BTF::BPFFieldReloc *
+BTFParser::findFieldReloc(SectionedAddress Address) const {
+ return findInfo(SectionRelocs, Address);
+}
+
+const BTF::CommonType *BTFParser::findType(uint32_t Id) const {
+ if (Id < Types.size())
+ return Types[Id];
+ return nullptr;
+}
+
+enum RelocKindGroup {
+ RKG_FIELD,
+ RKG_TYPE,
+ RKG_ENUMVAL,
+ RKG_UNKNOWN,
+};
+
+static RelocKindGroup relocKindGroup(const BTF::BPFFieldReloc *Reloc) {
+ switch (Reloc->RelocKind) {
+ case BTF::FIELD_BYTE_OFFSET:
+ case BTF::FIELD_BYTE_SIZE:
+ case BTF::FIELD_EXISTENCE:
+ case BTF::FIELD_SIGNEDNESS:
+ case BTF::FIELD_LSHIFT_U64:
+ case BTF::FIELD_RSHIFT_U64:
+ return RKG_FIELD;
+ case BTF::BTF_TYPE_ID_LOCAL:
+ case BTF::BTF_TYPE_ID_REMOTE:
+ case BTF::TYPE_EXISTENCE:
+ case BTF::TYPE_MATCH:
+ case BTF::TYPE_SIZE:
+ return RKG_TYPE;
+ case BTF::ENUM_VALUE_EXISTENCE:
+ case BTF::ENUM_VALUE:
+ return RKG_ENUMVAL;
+ default:
+ return RKG_UNKNOWN;
+ }
+}
+
+static bool isMod(const BTF::CommonType *Type) {
+ switch (Type->getKind()) {
+ case BTF::BTF_KIND_VOLATILE:
+ case BTF::BTF_KIND_CONST:
+ case BTF::BTF_KIND_RESTRICT:
+ case BTF::BTF_KIND_TYPE_TAG:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool printMod(const BTFParser &BTF, const BTF::CommonType *Type,
+ raw_ostream &Stream) {
+ switch (Type->getKind()) {
+ case BTF::BTF_KIND_CONST:
+ Stream << " const";
+ break;
+ case BTF::BTF_KIND_VOLATILE:
+ Stream << " volatile";
+ break;
+ case BTF::BTF_KIND_RESTRICT:
+ Stream << " restrict";
+ break;
+ case BTF::BTF_KIND_TYPE_TAG:
+ Stream << " type_tag(\"" << BTF.findString(Type->NameOff) << "\")";
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+static const BTF::CommonType *skipModsAndTypedefs(const BTFParser &BTF,
+ const BTF::CommonType *Type) {
+ while (isMod(Type) || Type->getKind() == BTF::BTF_KIND_TYPEDEF) {
+ auto *Base = BTF.findType(Type->Type);
+ if (!Base)
+ break;
+ Type = Base;
+ }
+ return Type;
+}
+
+namespace {
+struct StrOrAnon {
+ const BTFParser &BTF;
+ uint32_t Offset;
+ uint32_t Idx;
+};
+
+static raw_ostream &operator<<(raw_ostream &Stream, const StrOrAnon &S) {
+ StringRef Str = S.BTF.findString(S.Offset);
+ if (Str.empty())
+ Stream << "<anon " << S.Idx << ">";
+ else
+ Stream << Str;
+ return Stream;
+}
+} // anonymous namespace
+
+static void relocKindName(uint32_t X, raw_ostream &Out) {
+ Out << "<";
+ switch (X) {
+ default:
+ Out << "reloc kind #" << X;
+ break;
+ case BTF::FIELD_BYTE_OFFSET:
+ Out << "byte_off";
+ break;
+ case BTF::FIELD_BYTE_SIZE:
+ Out << "byte_sz";
+ break;
+ case BTF::FIELD_EXISTENCE:
+ Out << "field_exists";
+ break;
+ case BTF::FIELD_SIGNEDNESS:
+ Out << "signed";
+ break;
+ case BTF::FIELD_LSHIFT_U64:
+ Out << "lshift_u64";
+ break;
+ case BTF::FIELD_RSHIFT_U64:
+ Out << "rshift_u64";
+ break;
+ case BTF::BTF_TYPE_ID_LOCAL:
+ Out << "local_type_id";
+ break;
+ case BTF::BTF_TYPE_ID_REMOTE:
+ Out << "target_type_id";
+ break;
+ case BTF::TYPE_EXISTENCE:
+ Out << "type_exists";
+ break;
+ case BTF::TYPE_MATCH:
+ Out << "type_matches";
+ break;
+ case BTF::TYPE_SIZE:
+ Out << "type_size";
+ break;
+ case BTF::ENUM_VALUE_EXISTENCE:
+ Out << "enumval_exists";
+ break;
+ case BTF::ENUM_VALUE:
+ Out << "enumval_value";
+ break;
+ }
+ Out << ">";
+}
+
+// Produces a human readable description of a CO-RE relocation.
+// Such relocations are generated by BPF backend, and processed
+// by libbpf's BPF program loader [1].
+//
+// Each relocation record has the following information:
+// - Relocation kind;
+// - BTF type ID;
+// - Access string offset in string table.
+//
+// There are different kinds of relocations, these kinds could be split
+// in three groups:
+// - load-time information about types (size, existence),
+// `BTFParser::symbolize()` output for such relocations uses the template:
+//
+// <relocation-kind> [<id>] <type-name>
+//
+// For example:
+// - "<type_exists> [7] struct foo"
+// - "<type_size> [7] struct foo"
+//
+// - load-time information about enums (literal existence, literal value),
+// `BTFParser::symbolize()` output for such relocations uses the template:
+//
+// <relocation-kind> [<id>] <type-name>::<literal-name> = <original-value>
+//
+// For example:
+// - "<enumval_exists> [5] enum foo::U = 1"
+// - "<enumval_value> [5] enum foo::V = 2"
+//
+// - load-time information about fields (e.g. field offset),
+// `BTFParser::symbolize()` output for such relocations uses the template:
+//
+// <relocation-kind> [<id>] \
+// <type-name>::[N].<field-1-name>...<field-M-name> \
+// (<access string>)
+//
+// For example:
+// - "<byte_off> [8] struct bar::[7].v (7:1)"
+// - "<field_exists> [8] struct bar::v (0:1)"
+//
+// If relocation description is not valid output follows the following pattern:
+//
+// <relocation-kind> <type-id>::<unprocessedaccess-string> <<error-msg>>
+//
+// For example:
+//
+// - "<type_sz> [42] '' <unknown type id: 42>"
+// - "<byte_off> [4] '0:' <field spec too short>"
+//
+// Additional examples could be found in unit tests, see
+// llvm/unittests/DebugInfo/BTF/BTFParserTest.cpp.
+//
+// [1] https://www.kernel.org/doc/html/latest/bpf/libbpf/index.html
+void BTFParser::symbolize(const BTF::BPFFieldReloc *Reloc,
+ SmallVectorImpl<char> &Result) const {
+ raw_svector_ostream Stream(Result);
+ StringRef FullSpecStr = findString(Reloc->OffsetNameOff);
+ SmallVector<uint32_t, 8> RawSpec;
+
+ auto Fail = [&](auto Msg) {
+ Result.resize(0);
+ relocKindName(Reloc->RelocKind, Stream);
+ Stream << " [" << Reloc->TypeID << "] '" << FullSpecStr << "'"
+ << " <" << Msg << ">";
+ };
+
+ // Relocation access string follows pattern [0-9]+(:[0-9]+)*,
+ // e.g.: 12:22:3. Code below splits `SpecStr` by ':', parses
+ // numbers, and pushes them to `RawSpec`.
+ StringRef SpecStr = FullSpecStr;
+ while (SpecStr.size()) {
+ unsigned long long Val;
+ if (consumeUnsignedInteger(SpecStr, 10, Val))
+ return Fail("spec string is not a number");
+ RawSpec.push_back(Val);
+ if (SpecStr.empty())
+ break;
+ if (SpecStr[0] != ':')
+ return Fail(format("unexpected spec string delimiter: '%c'", SpecStr[0]));
+ SpecStr = SpecStr.substr(1);
+ }
+
+ // Print relocation kind to `Stream`.
+ relocKindName(Reloc->RelocKind, Stream);
+
+ uint32_t CurId = Reloc->TypeID;
+ const BTF::CommonType *Type = findType(CurId);
+ if (!Type)
+ return Fail(format("unknown type id: %d", CurId));
+
+ Stream << " [" << CurId << "]";
+
+ // `Type` might have modifiers, e.g. for type 'const int' the `Type`
+ // would refer to BTF type of kind BTF_KIND_CONST.
+ // Print all these modifiers to `Stream`.
+ for (uint32_t ChainLen = 0; printMod(*this, Type, Stream); ++ChainLen) {
+ if (ChainLen >= 32)
+ return Fail("modifiers chain is too long");
+
+ CurId = Type->Type;
+ const BTF::CommonType *NextType = findType(CurId);
+ if (!NextType)
+ return Fail(format("unknown type id: %d in modifiers chain", CurId));
+ Type = NextType;
+ }
+ // Print the type name to `Stream`.
+ if (CurId == 0) {
+ Stream << " void";
+ } else {
+ switch (Type->getKind()) {
+ case BTF::BTF_KIND_TYPEDEF:
+ Stream << " typedef";
+ break;
+ case BTF::BTF_KIND_STRUCT:
+ Stream << " struct";
+ break;
+ case BTF::BTF_KIND_UNION:
+ Stream << " union";
+ break;
+ case BTF::BTF_KIND_ENUM:
+ Stream << " enum";
+ break;
+ case BTF::BTF_KIND_ENUM64:
+ Stream << " enum";
+ break;
+ case BTF::BTF_KIND_FWD:
+ if (Type->Info & BTF::FWD_UNION_FLAG)
+ Stream << " fwd union";
+ else
+ Stream << " fwd struct";
+ break;
+ default:
+ break;
+ }
+ Stream << " " << StrOrAnon({*this, Type->NameOff, CurId});
+ }
+
+ RelocKindGroup Group = relocKindGroup(Reloc);
+ // Type-based relocations don't use access string but clang backend
+ // generates '0' and libbpf checks it's value, do the same here.
+ if (Group == RKG_TYPE) {
+ if (RawSpec.size() != 1 || RawSpec[0] != 0)
+ return Fail("unexpected type-based relocation spec: should be '0'");
+ return;
+ }
+
+ Stream << "::";
+
+ // For enum-based relocations access string is a single number,
+ // corresponding to the enum literal sequential number.
+ // E.g. for `enum E { U, V }`, relocation requesting value of `V`
+ // would look as follows:
+ // - kind: BTF::ENUM_VALUE
+ // - BTF id: id for `E`
+ // - access string: "1"
+ if (Group == RKG_ENUMVAL) {
+ Type = skipModsAndTypedefs(*this, Type);
+
+ if (RawSpec.size() != 1)
+ return Fail("unexpected enumval relocation spec size");
+
+ uint32_t NameOff;
+ uint64_t Val;
+ uint32_t Idx = RawSpec[0];
+ if (auto *T = dyn_cast<BTF::EnumType>(Type)) {
+ if (T->values().size() <= Idx)
+ return Fail(format("bad value index: %d", Idx));
+ const BTF::BTFEnum &E = T->values()[Idx];
+ NameOff = E.NameOff;
+ Val = E.Val;
+ } else if (auto *T = dyn_cast<BTF::Enum64Type>(Type)) {
+ if (T->values().size() <= Idx)
+ return Fail(format("bad value index: %d", Idx));
+ const BTF::BTFEnum64 &E = T->values()[Idx];
+ NameOff = E.NameOff;
+ Val = (uint64_t)E.Val_Hi32 << 32u | E.Val_Lo32;
+ } else {
+ return Fail(format("unexpected type kind for enum relocation: %d",
+ Type->getKind()));
+ }
+
+ Stream << StrOrAnon({*this, NameOff, Idx});
+ if (Type->Info & BTF::ENUM_SIGNED_FLAG)
+ Stream << " = " << (int64_t)Val;
+ else
+ Stream << " = " << (uint64_t)Val;
+ return;
+ }
+
+ // For type-based relocations access string is an array of numbers,
+ // which resemble index parameters for `getelementptr` LLVM IR instruction.
+ // E.g. for the following types:
+ //
+ // struct foo {
+ // int a;
+ // int b;
+ // };
+ // struct bar {
+ // int u;
+ // struct foo v[7];
+ // };
+ //
+ // Relocation requesting `offsetof(struct bar, v[2].b)` will have
+ // the following access string: 0:1:2:1
+ // ^ ^ ^ ^
+ // | | | |
+ // initial index | | field 'b' is a field #1
+ // | | (counting from 0)
+ // | array index #2
+ // field 'v' is a field #1
+ // (counting from 0)
+ if (Group == RKG_FIELD) {
+ if (RawSpec.size() < 1)
+ return Fail("field spec too short");
+
+ if (RawSpec[0] != 0)
+ Stream << "[" << RawSpec[0] << "]";
+ for (uint32_t I = 1; I < RawSpec.size(); ++I) {
+ Type = skipModsAndTypedefs(*this, Type);
+ uint32_t Idx = RawSpec[I];
+
+ if (auto *T = dyn_cast<BTF::StructType>(Type)) {
+ if (T->getVlen() <= Idx)
+ return Fail(
+ format("member index %d for spec sub-string %d is out of range",
+ Idx, I));
+
+ const BTF::BTFMember &Member = T->members()[Idx];
+ if (I != 1 || RawSpec[0] != 0)
+ Stream << ".";
+ Stream << StrOrAnon({*this, Member.NameOff, Idx});
+ Type = findType(Member.Type);
+ if (!Type)
+ return Fail(format("unknown member type id %d for spec sub-string %d",
+ Member.Type, I));
+ } else if (auto *T = dyn_cast<BTF::ArrayType>(Type)) {
+ Stream << "[" << Idx << "]";
+ Type = findType(T->getArray().ElemType);
+ if (!Type)
+ return Fail(
+ format("unknown element type id %d for spec sub-string %d",
+ T->getArray().ElemType, I));
+ } else {
+ return Fail(format("unexpected type kind %d for spec sub-string %d",
+ Type->getKind(), I));
+ }
+ }
+
+ Stream << " (" << FullSpecStr << ")";
+ return;
+ }
+
+ return Fail(format("unknown relocation kind: %d", Reloc->RelocKind));
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index 689c643a7006..3cafa3a93a0d 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -185,7 +185,7 @@ Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader &Reader) {
struct FieldListVisitHelper {
FieldListVisitHelper(TypeVisitorCallbacks &Callbacks, ArrayRef<uint8_t> Data,
VisitorDataSource Source)
- : Stream(Data, llvm::support::little), Reader(Stream),
+ : Stream(Data, llvm::endianness::little), Reader(Stream),
Deserializer(Reader),
Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) {
if (Source == VDS_BytesPresent) {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
index b2f0099bd01c..7e3087373bfa 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -434,6 +434,20 @@ static const EnumEntry<uint16_t> LabelTypeEnum[] = {
CV_ENUM_CLASS_ENT(LabelType, Far),
};
+static const EnumEntry<uint16_t> JumpTableEntrySizeNames[] = {
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, Int8),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, UInt8),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, Int16),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, UInt16),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, Int32),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, UInt32),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, Pointer),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, UInt8ShiftLeft),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, UInt16ShiftLeft),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, Int8ShiftLeft),
+ CV_ENUM_CLASS_ENT(JumpTableEntrySize, Int16ShiftLeft),
+};
+
namespace llvm {
namespace codeview {
@@ -559,5 +573,9 @@ ArrayRef<EnumEntry<uint16_t>> getLabelTypeEnum() {
return ArrayRef(LabelTypeEnum);
}
+ArrayRef<EnumEntry<uint16_t>> getJumpTableEntrySizeNames() {
+ return ArrayRef(JumpTableEntrySizeNames);
+}
+
} // end namespace codeview
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index 460f95d96a29..e59a0197d650 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -15,7 +15,6 @@
#include "llvm/DebugInfo/CodeView/RecordName.h"
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cassert>
@@ -69,13 +68,13 @@ void LazyRandomTypeCollection::reset(BinaryStreamReader &Reader,
}
void LazyRandomTypeCollection::reset(StringRef Data, uint32_t RecordCountHint) {
- BinaryStreamReader Reader(Data, support::little);
+ BinaryStreamReader Reader(Data, llvm::endianness::little);
reset(Reader, RecordCountHint);
}
void LazyRandomTypeCollection::reset(ArrayRef<uint8_t> Data,
uint32_t RecordCountHint) {
- BinaryStreamReader Reader(Data, support::little);
+ BinaryStreamReader Reader(Data, llvm::endianness::little);
reset(Reader, RecordCountHint);
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp
index 5fbbc4a5d497..e06b036ede63 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordName.cpp
@@ -324,7 +324,7 @@ StringRef llvm::codeview::getSymbolName(CVSymbol Sym) {
if (Sym.kind() == SymbolKind::S_CONSTANT) {
// S_CONSTANT is preceded by an APSInt, which has a variable length. So we
// have to do a full deserialization.
- BinaryStreamReader Reader(Sym.content(), llvm::support::little);
+ BinaryStreamReader Reader(Sym.content(), llvm::endianness::little);
// The container doesn't matter for single records.
SymbolRecordMapping Mapping(Reader, CodeViewContainer::ObjectFile);
ConstantSym Const(SymbolKind::S_CONSTANT);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
index d76905df8681..032704478ffe 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
@@ -103,7 +103,7 @@ Error llvm::codeview::consume(BinaryStreamReader &Reader, APSInt &Num) {
Error llvm::codeview::consume(StringRef &Data, APSInt &Num) {
ArrayRef<uint8_t> Bytes(Data.bytes_begin(), Data.bytes_end());
- BinaryByteStream S(Bytes, llvm::support::little);
+ BinaryByteStream S(Bytes, llvm::endianness::little);
BinaryStreamReader SR(S);
auto EC = consume(SR, Num);
Data = Data.take_back(SR.bytesRemaining());
@@ -129,7 +129,7 @@ Error llvm::codeview::consume(BinaryStreamReader &Reader, uint32_t &Item) {
Error llvm::codeview::consume(StringRef &Data, uint32_t &Item) {
ArrayRef<uint8_t> Bytes(Data.bytes_begin(), Data.bytes_end());
- BinaryByteStream S(Bytes, llvm::support::little);
+ BinaryByteStream S(Bytes, llvm::endianness::little);
BinaryStreamReader SR(S);
auto EC = consume(SR, Item);
Data = Data.take_back(SR.bytesRemaining());
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
index cf0c877fdbf8..25725853fb39 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
@@ -34,7 +34,7 @@ SimpleTypeSerializer::~SimpleTypeSerializer() = default;
template <typename T>
ArrayRef<uint8_t> SimpleTypeSerializer::serialize(T &Record) {
- BinaryStreamWriter Writer(ScratchBuffer, support::little);
+ BinaryStreamWriter Writer(ScratchBuffer, llvm::endianness::little);
TypeRecordMapping Mapping(Writer);
// Write the record prefix first with a dummy length but real kind.
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
index cfb12dbae845..f56739db7c75 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -589,7 +589,22 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
}
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CallerSym &Caller) {
- ListScope S(W, CVR.kind() == S_CALLEES ? "Callees" : "Callers");
+ llvm::StringRef ScopeName;
+ switch (CVR.kind()) {
+ case S_CALLEES:
+ ScopeName = "Callees";
+ break;
+ case S_CALLERS:
+ ScopeName = "Callers";
+ break;
+ case S_INLINEES:
+ ScopeName = "Inlinees";
+ break;
+ default:
+ return llvm::make_error<CodeViewError>(
+ "Unknown CV Record type for a CallerSym object!");
+ }
+ ListScope S(W, ScopeName);
for (auto FuncID : Caller.Indices)
printTypeIndex("FuncID", FuncID);
return Error::success();
@@ -643,6 +658,20 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
return Error::success();
}
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+ JumpTableSym &JumpTable) {
+ W.printHex("BaseOffset", JumpTable.BaseOffset);
+ W.printNumber("BaseSegment", JumpTable.BaseSegment);
+ W.printEnum("SwitchType", static_cast<uint16_t>(JumpTable.SwitchType),
+ getJumpTableEntrySizeNames());
+ W.printHex("BranchOffset", JumpTable.BranchOffset);
+ W.printHex("TableOffset", JumpTable.TableOffset);
+ W.printNumber("BranchSegment", JumpTable.BranchSegment);
+ W.printNumber("TableSegment", JumpTable.TableSegment);
+ W.printNumber("EntriesCount", JumpTable.EntriesCount);
+ return Error::success();
+}
+
Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
W.printNumber("Length", CVR.length());
return Error::success();
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index 3b627930e271..b5e366b965a9 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -483,6 +483,19 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
return Error::success();
}
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+ JumpTableSym &JumpTable) {
+ error(IO.mapInteger(JumpTable.BaseOffset));
+ error(IO.mapInteger(JumpTable.BaseSegment));
+ error(IO.mapEnum(JumpTable.SwitchType));
+ error(IO.mapInteger(JumpTable.BranchOffset));
+ error(IO.mapInteger(JumpTable.TableOffset));
+ error(IO.mapInteger(JumpTable.BranchSegment));
+ error(IO.mapInteger(JumpTable.TableSegment));
+ error(IO.mapInteger(JumpTable.EntriesCount));
+ return Error::success();
+}
+
RegisterId codeview::decodeFramePtrReg(EncodedFramePtrReg EncodedReg,
CPUType CPU) {
assert(unsigned(EncodedReg) < 4);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp
index 5fb8d497b957..eadc50f2da80 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp
@@ -8,7 +8,6 @@
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
@@ -20,8 +19,8 @@ using namespace llvm::codeview;
SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator,
CodeViewContainer Container)
- : Storage(Allocator), Stream(RecordBuffer, support::little), Writer(Stream),
- Mapping(Writer, Container) {}
+ : Storage(Allocator), Stream(RecordBuffer, llvm::endianness::little),
+ Writer(Stream), Mapping(Writer, Container) {}
Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) {
assert(!CurrentSymbol && "Already in a symbol mapping!");
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 682747a2b81f..59e2a85c4d4c 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -442,6 +442,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
case SymbolKind::S_THUNK32:
case SymbolKind::S_FRAMECOOKIE:
case SymbolKind::S_UNAMESPACE:
+ case SymbolKind::S_ARMSWITCHTABLE:
break;
// Scope ending symbols.
case SymbolKind::S_END:
@@ -469,7 +470,7 @@ static void resolveTypeIndexReferences(ArrayRef<uint8_t> RecordData,
RecordData = RecordData.drop_front(sizeof(RecordPrefix));
- BinaryStreamReader Reader(RecordData, support::little);
+ BinaryStreamReader Reader(RecordData, llvm::endianness::little);
for (const auto &Ref : Refs) {
Reader.setOffset(Ref.Offset);
FixedStreamArray<TypeIndex> Run;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 14962cd36c23..0f9c8ef485d4 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -621,7 +621,10 @@ std::optional<uint64_t> DWARFDebugNames::Entry::getCUIndex() const {
if (std::optional<DWARFFormValue> Off = lookup(dwarf::DW_IDX_compile_unit))
return Off->getAsUnsignedConstant();
// In a per-CU index, the entries without a DW_IDX_compile_unit attribute
- // implicitly refer to the single CU.
+ // implicitly refer to the single CU, but only if we don't have a
+ // DW_IDX_type_unit.
+ if (lookup(dwarf::DW_IDX_type_unit).has_value())
+ return std::nullopt;
if (NameIdx->getCUCount() == 1)
return 0;
return std::nullopt;
@@ -634,8 +637,21 @@ std::optional<uint64_t> DWARFDebugNames::Entry::getCUOffset() const {
return NameIdx->getCUOffset(*Index);
}
+std::optional<uint64_t> DWARFDebugNames::Entry::getLocalTUOffset() const {
+ std::optional<uint64_t> Index = getLocalTUIndex();
+ if (!Index || *Index >= NameIdx->getLocalTUCount())
+ return std::nullopt;
+ return NameIdx->getLocalTUOffset(*Index);
+}
+
+std::optional<uint64_t> DWARFDebugNames::Entry::getLocalTUIndex() const {
+ if (std::optional<DWARFFormValue> Off = lookup(dwarf::DW_IDX_type_unit))
+ return Off->getAsUnsignedConstant();
+ return std::nullopt;
+}
+
void DWARFDebugNames::Entry::dump(ScopedPrinter &W) const {
- W.printHex("Abbrev", Abbr->Code);
+ W.startLine() << formatv("Abbrev: {0:x}\n", Abbr->Code);
W.startLine() << formatv("Tag: {0}\n", Abbr->Tag);
assert(Abbr->Attributes.size() == Values.size());
for (auto Tuple : zip_first(Abbr->Attributes, Values)) {
@@ -969,3 +985,71 @@ DWARFDebugNames::getCUNameIndex(uint64_t CUOffset) {
}
return CUToNameIndex.lookup(CUOffset);
}
+
+static bool isObjCSelector(StringRef Name) {
+ return Name.size() > 2 && (Name[0] == '-' || Name[0] == '+') &&
+ (Name[1] == '[');
+}
+
+std::optional<ObjCSelectorNames> llvm::getObjCNamesIfSelector(StringRef Name) {
+ if (!isObjCSelector(Name))
+ return std::nullopt;
+ // "-[Atom setMass:]"
+ StringRef ClassNameStart(Name.drop_front(2));
+ size_t FirstSpace = ClassNameStart.find(' ');
+ if (FirstSpace == StringRef::npos)
+ return std::nullopt;
+
+ StringRef SelectorStart = ClassNameStart.drop_front(FirstSpace + 1);
+ if (!SelectorStart.size())
+ return std::nullopt;
+
+ ObjCSelectorNames Ans;
+ Ans.ClassName = ClassNameStart.take_front(FirstSpace);
+ Ans.Selector = SelectorStart.drop_back(); // drop ']';
+
+ // "-[Class(Category) selector :withArg ...]"
+ if (Ans.ClassName.back() == ')') {
+ size_t OpenParens = Ans.ClassName.find('(');
+ if (OpenParens != StringRef::npos) {
+ Ans.ClassNameNoCategory = Ans.ClassName.take_front(OpenParens);
+
+ Ans.MethodNameNoCategory = Name.take_front(OpenParens + 2);
+ // FIXME: The missing space here may be a bug, but dsymutil-classic also
+ // does it this way.
+ append_range(*Ans.MethodNameNoCategory, SelectorStart);
+ }
+ }
+ return Ans;
+}
+
+std::optional<StringRef> llvm::StripTemplateParameters(StringRef Name) {
+ // We are looking for template parameters to strip from Name. e.g.
+ //
+ // operator<<B>
+ //
+ // We look for > at the end but if it does not contain any < then we
+ // have something like operator>>. We check for the operator<=> case.
+ if (!Name.ends_with(">") || Name.count("<") == 0 || Name.ends_with("<=>"))
+ return {};
+
+ // How many < until we have the start of the template parameters.
+ size_t NumLeftAnglesToSkip = 1;
+
+ // If we have operator<=> then we need to skip its < as well.
+ NumLeftAnglesToSkip += Name.count("<=>");
+
+ size_t RightAngleCount = Name.count('>');
+ size_t LeftAngleCount = Name.count('<');
+
+ // If we have more < than > we have operator< or operator<<
+ // we to account for their < as well.
+ if (LeftAngleCount > RightAngleCount)
+ NumLeftAnglesToSkip += LeftAngleCount - RightAngleCount;
+
+ size_t StartOfTemplate = 0;
+ while (NumLeftAnglesToSkip--)
+ StartOfTemplate = Name.find('<', StartOfTemplate) + 1;
+
+ return Name.substr(0, StartOfTemplate - 1);
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 33168abbdc38..c671aedbc9e5 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -70,13 +70,692 @@ using DWARFLineTable = DWARFDebugLine::LineTable;
using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind;
using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
+
+void fixupIndexV4(DWARFContext &C, DWARFUnitIndex &Index) {
+ using EntryType = DWARFUnitIndex::Entry::SectionContribution;
+ using EntryMap = DenseMap<uint32_t, EntryType>;
+ EntryMap Map;
+ const auto &DObj = C.getDWARFObj();
+ if (DObj.getCUIndexSection().empty())
+ return;
+
+ uint64_t Offset = 0;
+ uint32_t TruncOffset = 0;
+ DObj.forEachInfoDWOSections([&](const DWARFSection &S) {
+ if (!(C.getParseCUTUIndexManually() ||
+ S.Data.size() >= std::numeric_limits<uint32_t>::max()))
+ return;
+
+ DWARFDataExtractor Data(DObj, S, C.isLittleEndian(), 0);
+ while (Data.isValidOffset(Offset)) {
+ DWARFUnitHeader Header;
+ if (Error ExtractionErr = Header.extract(
+ C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) {
+ C.getWarningHandler()(
+ createError("Failed to parse CU header in DWP file: " +
+ toString(std::move(ExtractionErr))));
+ Map.clear();
+ break;
+ }
+
+ auto Iter = Map.insert({TruncOffset,
+ {Header.getOffset(), Header.getNextUnitOffset() -
+ Header.getOffset()}});
+ if (!Iter.second) {
+ logAllUnhandledErrors(
+ createError("Collision occured between for truncated offset 0x" +
+ Twine::utohexstr(TruncOffset)),
+ errs());
+ Map.clear();
+ return;
+ }
+
+ Offset = Header.getNextUnitOffset();
+ TruncOffset = Offset;
+ }
+ });
+
+ if (Map.empty())
+ return;
+
+ for (DWARFUnitIndex::Entry &E : Index.getMutableRows()) {
+ if (!E.isValid())
+ continue;
+ DWARFUnitIndex::Entry::SectionContribution &CUOff = E.getContribution();
+ auto Iter = Map.find(CUOff.getOffset());
+ if (Iter == Map.end()) {
+ logAllUnhandledErrors(createError("Could not find CU offset 0x" +
+ Twine::utohexstr(CUOff.getOffset()) +
+ " in the Map"),
+ errs());
+ break;
+ }
+ CUOff.setOffset(Iter->second.getOffset());
+ if (CUOff.getOffset() != Iter->second.getOffset())
+ logAllUnhandledErrors(createError("Length of CU in CU index doesn't "
+ "match calculated length at offset 0x" +
+ Twine::utohexstr(CUOff.getOffset())),
+ errs());
+ }
+}
+
+void fixupIndexV5(DWARFContext &C, DWARFUnitIndex &Index) {
+ DenseMap<uint64_t, uint64_t> Map;
+
+ const auto &DObj = C.getDWARFObj();
+ DObj.forEachInfoDWOSections([&](const DWARFSection &S) {
+ if (!(C.getParseCUTUIndexManually() ||
+ S.Data.size() >= std::numeric_limits<uint32_t>::max()))
+ return;
+ DWARFDataExtractor Data(DObj, S, C.isLittleEndian(), 0);
+ uint64_t Offset = 0;
+ while (Data.isValidOffset(Offset)) {
+ DWARFUnitHeader Header;
+ if (Error ExtractionErr = Header.extract(
+ C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) {
+ C.getWarningHandler()(
+ createError("Failed to parse CU header in DWP file: " +
+ toString(std::move(ExtractionErr))));
+ break;
+ }
+ bool CU = Header.getUnitType() == DW_UT_split_compile;
+ uint64_t Sig = CU ? *Header.getDWOId() : Header.getTypeHash();
+ Map[Sig] = Header.getOffset();
+ Offset = Header.getNextUnitOffset();
+ }
+ });
+ if (Map.empty())
+ return;
+ for (DWARFUnitIndex::Entry &E : Index.getMutableRows()) {
+ if (!E.isValid())
+ continue;
+ DWARFUnitIndex::Entry::SectionContribution &CUOff = E.getContribution();
+ auto Iter = Map.find(E.getSignature());
+ if (Iter == Map.end()) {
+ logAllUnhandledErrors(
+ createError("Could not find unit with signature 0x" +
+ Twine::utohexstr(E.getSignature()) + " in the Map"),
+ errs());
+ break;
+ }
+ CUOff.setOffset(Iter->second);
+ }
+}
+
+void fixupIndex(DWARFContext &C, DWARFUnitIndex &Index) {
+ if (Index.getVersion() < 5)
+ fixupIndexV4(C, Index);
+ else
+ fixupIndexV5(C, Index);
+}
+
+template <typename T>
+static T &getAccelTable(std::unique_ptr<T> &Cache, const DWARFObject &Obj,
+ const DWARFSection &Section, StringRef StringSection,
+ bool IsLittleEndian) {
+ if (Cache)
+ return *Cache;
+ DWARFDataExtractor AccelSection(Obj, Section, IsLittleEndian, 0);
+ DataExtractor StrData(StringSection, IsLittleEndian, 0);
+ Cache = std::make_unique<T>(AccelSection, StrData);
+ if (Error E = Cache->extract())
+ llvm::consumeError(std::move(E));
+ return *Cache;
+}
+
+
+std::unique_ptr<DWARFDebugMacro>
+DWARFContext::DWARFContextState::parseMacroOrMacinfo(MacroSecType SectionType) {
+ auto Macro = std::make_unique<DWARFDebugMacro>();
+ auto ParseAndDump = [&](DWARFDataExtractor &Data, bool IsMacro) {
+ if (Error Err = IsMacro ? Macro->parseMacro(SectionType == MacroSection
+ ? D.compile_units()
+ : D.dwo_compile_units(),
+ SectionType == MacroSection
+ ? D.getStringExtractor()
+ : D.getStringDWOExtractor(),
+ Data)
+ : Macro->parseMacinfo(Data)) {
+ D.getRecoverableErrorHandler()(std::move(Err));
+ Macro = nullptr;
+ }
+ };
+ const DWARFObject &DObj = D.getDWARFObj();
+ switch (SectionType) {
+ case MacinfoSection: {
+ DWARFDataExtractor Data(DObj.getMacinfoSection(), D.isLittleEndian(), 0);
+ ParseAndDump(Data, /*IsMacro=*/false);
+ break;
+ }
+ case MacinfoDwoSection: {
+ DWARFDataExtractor Data(DObj.getMacinfoDWOSection(), D.isLittleEndian(), 0);
+ ParseAndDump(Data, /*IsMacro=*/false);
+ break;
+ }
+ case MacroSection: {
+ DWARFDataExtractor Data(DObj, DObj.getMacroSection(), D.isLittleEndian(),
+ 0);
+ ParseAndDump(Data, /*IsMacro=*/true);
+ break;
+ }
+ case MacroDwoSection: {
+ DWARFDataExtractor Data(DObj.getMacroDWOSection(), D.isLittleEndian(), 0);
+ ParseAndDump(Data, /*IsMacro=*/true);
+ break;
+ }
+ }
+ return Macro;
+}
+
+class ThreadUnsafeDWARFContextState : public DWARFContext::DWARFContextState {
+
+ DWARFUnitVector NormalUnits;
+ std::optional<DenseMap<uint64_t, DWARFTypeUnit *>> NormalTypeUnits;
+ std::unique_ptr<DWARFUnitIndex> CUIndex;
+ std::unique_ptr<DWARFGdbIndex> GdbIndex;
+ std::unique_ptr<DWARFUnitIndex> TUIndex;
+ std::unique_ptr<DWARFDebugAbbrev> Abbrev;
+ std::unique_ptr<DWARFDebugLoc> Loc;
+ std::unique_ptr<DWARFDebugAranges> Aranges;
+ std::unique_ptr<DWARFDebugLine> Line;
+ std::unique_ptr<DWARFDebugFrame> DebugFrame;
+ std::unique_ptr<DWARFDebugFrame> EHFrame;
+ std::unique_ptr<DWARFDebugMacro> Macro;
+ std::unique_ptr<DWARFDebugMacro> Macinfo;
+ std::unique_ptr<DWARFDebugNames> Names;
+ std::unique_ptr<AppleAcceleratorTable> AppleNames;
+ std::unique_ptr<AppleAcceleratorTable> AppleTypes;
+ std::unique_ptr<AppleAcceleratorTable> AppleNamespaces;
+ std::unique_ptr<AppleAcceleratorTable> AppleObjC;
+ DWARFUnitVector DWOUnits;
+ std::optional<DenseMap<uint64_t, DWARFTypeUnit *>> DWOTypeUnits;
+ std::unique_ptr<DWARFDebugAbbrev> AbbrevDWO;
+ std::unique_ptr<DWARFDebugMacro> MacinfoDWO;
+ std::unique_ptr<DWARFDebugMacro> MacroDWO;
+ struct DWOFile {
+ object::OwningBinary<object::ObjectFile> File;
+ std::unique_ptr<DWARFContext> Context;
+ };
+ StringMap<std::weak_ptr<DWOFile>> DWOFiles;
+ std::weak_ptr<DWOFile> DWP;
+ bool CheckedForDWP = false;
+ std::string DWPName;
+
+public:
+ ThreadUnsafeDWARFContextState(DWARFContext &DC, std::string &DWP) :
+ DWARFContext::DWARFContextState(DC),
+ DWPName(std::move(DWP)) {}
+
+ DWARFUnitVector &getNormalUnits() override {
+ if (NormalUnits.empty()) {
+ const DWARFObject &DObj = D.getDWARFObj();
+ DObj.forEachInfoSections([&](const DWARFSection &S) {
+ NormalUnits.addUnitsForSection(D, S, DW_SECT_INFO);
+ });
+ NormalUnits.finishedInfoUnits();
+ DObj.forEachTypesSections([&](const DWARFSection &S) {
+ NormalUnits.addUnitsForSection(D, S, DW_SECT_EXT_TYPES);
+ });
+ }
+ return NormalUnits;
+ }
+
+ DWARFUnitVector &getDWOUnits(bool Lazy) override {
+ if (DWOUnits.empty()) {
+ const DWARFObject &DObj = D.getDWARFObj();
+
+ DObj.forEachInfoDWOSections([&](const DWARFSection &S) {
+ DWOUnits.addUnitsForDWOSection(D, S, DW_SECT_INFO, Lazy);
+ });
+ DWOUnits.finishedInfoUnits();
+ DObj.forEachTypesDWOSections([&](const DWARFSection &S) {
+ DWOUnits.addUnitsForDWOSection(D, S, DW_SECT_EXT_TYPES, Lazy);
+ });
+ }
+ return DWOUnits;
+ }
+
+ const DWARFDebugAbbrev *getDebugAbbrevDWO() override {
+ if (AbbrevDWO)
+ return AbbrevDWO.get();
+ const DWARFObject &DObj = D.getDWARFObj();
+ DataExtractor abbrData(DObj.getAbbrevDWOSection(), D.isLittleEndian(), 0);
+ AbbrevDWO = std::make_unique<DWARFDebugAbbrev>(abbrData);
+ return AbbrevDWO.get();
+ }
+
+ const DWARFUnitIndex &getCUIndex() override {
+ if (CUIndex)
+ return *CUIndex;
+
+ DataExtractor Data(D.getDWARFObj().getCUIndexSection(),
+ D.isLittleEndian(), 0);
+ CUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_INFO);
+ if (CUIndex->parse(Data))
+ fixupIndex(D, *CUIndex);
+ return *CUIndex;
+ }
+ const DWARFUnitIndex &getTUIndex() override {
+ if (TUIndex)
+ return *TUIndex;
+
+ DataExtractor Data(D.getDWARFObj().getTUIndexSection(),
+ D.isLittleEndian(), 0);
+ TUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_EXT_TYPES);
+ bool isParseSuccessful = TUIndex->parse(Data);
+ // If we are parsing TU-index and for .debug_types section we don't need
+ // to do anything.
+ if (isParseSuccessful && TUIndex->getVersion() != 2)
+ fixupIndex(D, *TUIndex);
+ return *TUIndex;
+ }
+
+ DWARFGdbIndex &getGdbIndex() override {
+ if (GdbIndex)
+ return *GdbIndex;
+
+ DataExtractor Data(D.getDWARFObj().getGdbIndexSection(), true /*LE*/, 0);
+ GdbIndex = std::make_unique<DWARFGdbIndex>();
+ GdbIndex->parse(Data);
+ return *GdbIndex;
+ }
+
+ const DWARFDebugAbbrev *getDebugAbbrev() override {
+ if (Abbrev)
+ return Abbrev.get();
+
+ DataExtractor Data(D.getDWARFObj().getAbbrevSection(),
+ D.isLittleEndian(), 0);
+ Abbrev = std::make_unique<DWARFDebugAbbrev>(Data);
+ return Abbrev.get();
+ }
+
+ const DWARFDebugLoc *getDebugLoc() override {
+ if (Loc)
+ return Loc.get();
+
+ const DWARFObject &DObj = D.getDWARFObj();
+ // Assume all units have the same address byte size.
+ auto Data =
+ D.getNumCompileUnits()
+ ? DWARFDataExtractor(DObj, DObj.getLocSection(), D.isLittleEndian(),
+ D.getUnitAtIndex(0)->getAddressByteSize())
+ : DWARFDataExtractor("", D.isLittleEndian(), 0);
+ Loc = std::make_unique<DWARFDebugLoc>(std::move(Data));
+ return Loc.get();
+ }
+
+ const DWARFDebugAranges *getDebugAranges() override {
+ if (Aranges)
+ return Aranges.get();
+
+ Aranges = std::make_unique<DWARFDebugAranges>();
+ Aranges->generate(&D);
+ return Aranges.get();
+ }
+
+ Expected<const DWARFDebugLine::LineTable *>
+ getLineTableForUnit(DWARFUnit *U, function_ref<void(Error)> RecoverableErrorHandler) override {
+ if (!Line)
+ Line = std::make_unique<DWARFDebugLine>();
+
+ auto UnitDIE = U->getUnitDIE();
+ if (!UnitDIE)
+ return nullptr;
+
+ auto Offset = toSectionOffset(UnitDIE.find(DW_AT_stmt_list));
+ if (!Offset)
+ return nullptr; // No line table for this compile unit.
+
+ uint64_t stmtOffset = *Offset + U->getLineTableOffset();
+ // See if the line table is cached.
+ if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
+ return lt;
+
+ // Make sure the offset is good before we try to parse.
+ if (stmtOffset >= U->getLineSection().Data.size())
+ return nullptr;
+
+ // We have to parse it first.
+ DWARFDataExtractor Data(U->getContext().getDWARFObj(), U->getLineSection(),
+ U->isLittleEndian(), U->getAddressByteSize());
+ return Line->getOrParseLineTable(Data, stmtOffset, U->getContext(), U,
+ RecoverableErrorHandler);
+
+ }
+
+ void clearLineTableForUnit(DWARFUnit *U) override {
+ if (!Line)
+ return;
+
+ auto UnitDIE = U->getUnitDIE();
+ if (!UnitDIE)
+ return;
+
+ auto Offset = toSectionOffset(UnitDIE.find(DW_AT_stmt_list));
+ if (!Offset)
+ return;
+
+ uint64_t stmtOffset = *Offset + U->getLineTableOffset();
+ Line->clearLineTable(stmtOffset);
+ }
+
+ Expected<const DWARFDebugFrame *> getDebugFrame() override {
+ if (DebugFrame)
+ return DebugFrame.get();
+ const DWARFObject &DObj = D.getDWARFObj();
+ const DWARFSection &DS = DObj.getFrameSection();
+
+ // There's a "bug" in the DWARFv3 standard with respect to the target address
+ // size within debug frame sections. While DWARF is supposed to be independent
+ // of its container, FDEs have fields with size being "target address size",
+ // which isn't specified in DWARF in general. It's only specified for CUs, but
+ // .eh_frame can appear without a .debug_info section. Follow the example of
+ // other tools (libdwarf) and extract this from the container (ObjectFile
+ // provides this information). This problem is fixed in DWARFv4
+ // See this dwarf-discuss discussion for more details:
+ // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html
+ DWARFDataExtractor Data(DObj, DS, D.isLittleEndian(),
+ DObj.getAddressSize());
+ auto DF =
+ std::make_unique<DWARFDebugFrame>(D.getArch(), /*IsEH=*/false,
+ DS.Address);
+ if (Error E = DF->parse(Data))
+ return std::move(E);
+
+ DebugFrame.swap(DF);
+ return DebugFrame.get();
+ }
+
+ Expected<const DWARFDebugFrame *> getEHFrame() override {
+ if (EHFrame)
+ return EHFrame.get();
+ const DWARFObject &DObj = D.getDWARFObj();
+
+ const DWARFSection &DS = DObj.getEHFrameSection();
+ DWARFDataExtractor Data(DObj, DS, D.isLittleEndian(),
+ DObj.getAddressSize());
+ auto DF =
+ std::make_unique<DWARFDebugFrame>(D.getArch(), /*IsEH=*/true,
+ DS.Address);
+ if (Error E = DF->parse(Data))
+ return std::move(E);
+ EHFrame.swap(DF);
+ return EHFrame.get();
+ }
+
+ const DWARFDebugMacro *getDebugMacinfo() override {
+ if (!Macinfo)
+ Macinfo = parseMacroOrMacinfo(MacinfoSection);
+ return Macinfo.get();
+ }
+ const DWARFDebugMacro *getDebugMacinfoDWO() override {
+ if (!MacinfoDWO)
+ MacinfoDWO = parseMacroOrMacinfo(MacinfoDwoSection);
+ return MacinfoDWO.get();
+ }
+ const DWARFDebugMacro *getDebugMacro() override {
+ if (!Macro)
+ Macro = parseMacroOrMacinfo(MacroSection);
+ return Macro.get();
+ }
+ const DWARFDebugMacro *getDebugMacroDWO() override {
+ if (!MacroDWO)
+ MacroDWO = parseMacroOrMacinfo(MacroDwoSection);
+ return MacroDWO.get();
+ }
+ const DWARFDebugNames &getDebugNames() override {
+ const DWARFObject &DObj = D.getDWARFObj();
+ return getAccelTable(Names, DObj, DObj.getNamesSection(),
+ DObj.getStrSection(), D.isLittleEndian());
+ }
+ const AppleAcceleratorTable &getAppleNames() override {
+ const DWARFObject &DObj = D.getDWARFObj();
+ return getAccelTable(AppleNames, DObj, DObj.getAppleNamesSection(),
+ DObj.getStrSection(), D.isLittleEndian());
+
+ }
+ const AppleAcceleratorTable &getAppleTypes() override {
+ const DWARFObject &DObj = D.getDWARFObj();
+ return getAccelTable(AppleTypes, DObj, DObj.getAppleTypesSection(),
+ DObj.getStrSection(), D.isLittleEndian());
+
+ }
+ const AppleAcceleratorTable &getAppleNamespaces() override {
+ const DWARFObject &DObj = D.getDWARFObj();
+ return getAccelTable(AppleNamespaces, DObj,
+ DObj.getAppleNamespacesSection(),
+ DObj.getStrSection(), D.isLittleEndian());
+
+ }
+ const AppleAcceleratorTable &getAppleObjC() override {
+ const DWARFObject &DObj = D.getDWARFObj();
+ return getAccelTable(AppleObjC, DObj, DObj.getAppleObjCSection(),
+ DObj.getStrSection(), D.isLittleEndian());
+ }
+
+ std::shared_ptr<DWARFContext>
+ getDWOContext(StringRef AbsolutePath) override {
+ if (auto S = DWP.lock()) {
+ DWARFContext *Ctxt = S->Context.get();
+ return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
+ }
+
+ std::weak_ptr<DWOFile> *Entry = &DWOFiles[AbsolutePath];
+
+ if (auto S = Entry->lock()) {
+ DWARFContext *Ctxt = S->Context.get();
+ return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
+ }
+
+ const DWARFObject &DObj = D.getDWARFObj();
+
+ Expected<OwningBinary<ObjectFile>> Obj = [&] {
+ if (!CheckedForDWP) {
+ SmallString<128> DWPName;
+ auto Obj = object::ObjectFile::createObjectFile(
+ this->DWPName.empty()
+ ? (DObj.getFileName() + ".dwp").toStringRef(DWPName)
+ : StringRef(this->DWPName));
+ if (Obj) {
+ Entry = &DWP;
+ return Obj;
+ } else {
+ CheckedForDWP = true;
+ // TODO: Should this error be handled (maybe in a high verbosity mode)
+ // before falling back to .dwo files?
+ consumeError(Obj.takeError());
+ }
+ }
+
+ return object::ObjectFile::createObjectFile(AbsolutePath);
+ }();
+
+ if (!Obj) {
+ // TODO: Actually report errors helpfully.
+ consumeError(Obj.takeError());
+ return nullptr;
+ }
+
+ auto S = std::make_shared<DWOFile>();
+ S->File = std::move(Obj.get());
+ // Allow multi-threaded access if there is a .dwp file as the CU index and
+ // TU index might be accessed from multiple threads.
+ bool ThreadSafe = isThreadSafe();
+ S->Context = DWARFContext::create(
+ *S->File.getBinary(), DWARFContext::ProcessDebugRelocations::Ignore,
+ nullptr, "", WithColor::defaultErrorHandler,
+ WithColor::defaultWarningHandler, ThreadSafe);
+ *Entry = S;
+ auto *Ctxt = S->Context.get();
+ return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
+ }
+
+ bool isThreadSafe() const override { return false; }
+
+ const DenseMap<uint64_t, DWARFTypeUnit *> &getNormalTypeUnitMap() {
+ if (!NormalTypeUnits) {
+ NormalTypeUnits.emplace();
+ for (const auto &U :D.normal_units()) {
+ if (DWARFTypeUnit *TU = dyn_cast<DWARFTypeUnit>(U.get()))
+ (*NormalTypeUnits)[TU->getTypeHash()] = TU;
+ }
+ }
+ return *NormalTypeUnits;
+ }
+
+ const DenseMap<uint64_t, DWARFTypeUnit *> &getDWOTypeUnitMap() {
+ if (!DWOTypeUnits) {
+ DWOTypeUnits.emplace();
+ for (const auto &U :D.dwo_units()) {
+ if (DWARFTypeUnit *TU = dyn_cast<DWARFTypeUnit>(U.get()))
+ (*DWOTypeUnits)[TU->getTypeHash()] = TU;
+ }
+ }
+ return *DWOTypeUnits;
+ }
+
+ const DenseMap<uint64_t, DWARFTypeUnit *> &
+ getTypeUnitMap(bool IsDWO) override {
+ if (IsDWO)
+ return getDWOTypeUnitMap();
+ else
+ return getNormalTypeUnitMap();
+ }
+
+
+};
+
+class ThreadSafeState : public ThreadUnsafeDWARFContextState {
+ std::recursive_mutex Mutex;
+
+public:
+ ThreadSafeState(DWARFContext &DC, std::string &DWP) :
+ ThreadUnsafeDWARFContextState(DC, DWP) {}
+
+ DWARFUnitVector &getNormalUnits() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getNormalUnits();
+ }
+ DWARFUnitVector &getDWOUnits(bool Lazy) override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ // We need to not do lazy parsing when we need thread safety as
+ // DWARFUnitVector, in lazy mode, will slowly add things to itself and
+ // will cause problems in a multi-threaded environment.
+ return ThreadUnsafeDWARFContextState::getDWOUnits(false);
+ }
+ const DWARFUnitIndex &getCUIndex() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getCUIndex();
+ }
+ const DWARFDebugAbbrev *getDebugAbbrevDWO() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugAbbrevDWO();
+ }
+
+ const DWARFUnitIndex &getTUIndex() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getTUIndex();
+ }
+ DWARFGdbIndex &getGdbIndex() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getGdbIndex();
+ }
+ const DWARFDebugAbbrev *getDebugAbbrev() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugAbbrev();
+ }
+ const DWARFDebugLoc *getDebugLoc() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugLoc();
+ }
+ const DWARFDebugAranges *getDebugAranges() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugAranges();
+ }
+ Expected<const DWARFDebugLine::LineTable *>
+ getLineTableForUnit(DWARFUnit *U, function_ref<void(Error)> RecoverableErrorHandler) override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getLineTableForUnit(U, RecoverableErrorHandler);
+ }
+ void clearLineTableForUnit(DWARFUnit *U) override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::clearLineTableForUnit(U);
+ }
+ Expected<const DWARFDebugFrame *> getDebugFrame() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugFrame();
+ }
+ Expected<const DWARFDebugFrame *> getEHFrame() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getEHFrame();
+ }
+ const DWARFDebugMacro *getDebugMacinfo() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugMacinfo();
+ }
+ const DWARFDebugMacro *getDebugMacinfoDWO() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugMacinfoDWO();
+ }
+ const DWARFDebugMacro *getDebugMacro() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugMacro();
+ }
+ const DWARFDebugMacro *getDebugMacroDWO() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugMacroDWO();
+ }
+ const DWARFDebugNames &getDebugNames() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDebugNames();
+ }
+ const AppleAcceleratorTable &getAppleNames() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getAppleNames();
+ }
+ const AppleAcceleratorTable &getAppleTypes() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getAppleTypes();
+ }
+ const AppleAcceleratorTable &getAppleNamespaces() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getAppleNamespaces();
+ }
+ const AppleAcceleratorTable &getAppleObjC() override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getAppleObjC();
+ }
+ std::shared_ptr<DWARFContext>
+ getDWOContext(StringRef AbsolutePath) override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getDWOContext(AbsolutePath);
+ }
+
+ bool isThreadSafe() const override { return true; }
+
+ const DenseMap<uint64_t, DWARFTypeUnit *> &
+ getTypeUnitMap(bool IsDWO) override {
+ std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
+ return ThreadUnsafeDWARFContextState::getTypeUnitMap(IsDWO);
+ }
+};
+
+
+
DWARFContext::DWARFContext(std::unique_ptr<const DWARFObject> DObj,
std::string DWPName,
std::function<void(Error)> RecoverableErrorHandler,
- std::function<void(Error)> WarningHandler)
- : DIContext(CK_DWARF), DWPName(std::move(DWPName)),
+ std::function<void(Error)> WarningHandler,
+ bool ThreadSafe)
+ : DIContext(CK_DWARF),
RecoverableErrorHandler(RecoverableErrorHandler),
- WarningHandler(WarningHandler), DObj(std::move(DObj)) {}
+ WarningHandler(WarningHandler), DObj(std::move(DObj)) {
+ if (ThreadSafe)
+ State = std::make_unique<ThreadSafeState>(*this, DWPName);
+ else
+ State = std::make_unique<ThreadUnsafeDWARFContextState>(*this, DWPName);
+ }
DWARFContext::~DWARFContext() = default;
@@ -266,47 +945,6 @@ static void dumpRnglistsSection(
}
}
-std::unique_ptr<DWARFDebugMacro>
-DWARFContext::parseMacroOrMacinfo(MacroSecType SectionType) {
- auto Macro = std::make_unique<DWARFDebugMacro>();
- auto ParseAndDump = [&](DWARFDataExtractor &Data, bool IsMacro) {
- if (Error Err = IsMacro ? Macro->parseMacro(SectionType == MacroSection
- ? compile_units()
- : dwo_compile_units(),
- SectionType == MacroSection
- ? getStringExtractor()
- : getStringDWOExtractor(),
- Data)
- : Macro->parseMacinfo(Data)) {
- RecoverableErrorHandler(std::move(Err));
- Macro = nullptr;
- }
- };
- switch (SectionType) {
- case MacinfoSection: {
- DWARFDataExtractor Data(DObj->getMacinfoSection(), isLittleEndian(), 0);
- ParseAndDump(Data, /*IsMacro=*/false);
- break;
- }
- case MacinfoDwoSection: {
- DWARFDataExtractor Data(DObj->getMacinfoDWOSection(), isLittleEndian(), 0);
- ParseAndDump(Data, /*IsMacro=*/false);
- break;
- }
- case MacroSection: {
- DWARFDataExtractor Data(*DObj, DObj->getMacroSection(), isLittleEndian(),
- 0);
- ParseAndDump(Data, /*IsMacro=*/true);
- break;
- }
- case MacroDwoSection: {
- DWARFDataExtractor Data(DObj->getMacroDWOSection(), isLittleEndian(), 0);
- ParseAndDump(Data, /*IsMacro=*/true);
- break;
- }
- }
- return Macro;
-}
static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts,
DWARFDataExtractor Data, const DWARFObject &Obj,
@@ -700,34 +1338,22 @@ void DWARFContext::dump(
DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint16_t Version, uint64_t Hash,
bool IsDWO) {
- parseDWOUnits(LazyParse);
-
+ DWARFUnitVector &DWOUnits = State->getDWOUnits();
if (const auto &TUI = getTUIndex()) {
if (const auto *R = TUI.getFromHash(Hash))
return dyn_cast_or_null<DWARFTypeUnit>(
DWOUnits.getUnitForIndexEntry(*R));
return nullptr;
}
-
- struct UnitContainers {
- const DWARFUnitVector &Units;
- std::optional<DenseMap<uint64_t, DWARFTypeUnit *>> &Map;
- };
- UnitContainers Units = IsDWO ? UnitContainers{DWOUnits, DWOTypeUnits}
- : UnitContainers{NormalUnits, NormalTypeUnits};
- if (!Units.Map) {
- Units.Map.emplace();
- for (const auto &U : IsDWO ? dwo_units() : normal_units()) {
- if (DWARFTypeUnit *TU = dyn_cast<DWARFTypeUnit>(U.get()))
- (*Units.Map)[TU->getTypeHash()] = TU;
- }
- }
-
- return (*Units.Map)[Hash];
+ const DenseMap<uint64_t, DWARFTypeUnit *> &Map = State->getTypeUnitMap(IsDWO);
+ auto Iter = Map.find(Hash);
+ if (Iter != Map.end())
+ return Iter->second;
+ return nullptr;
}
DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
- parseDWOUnits(LazyParse);
+ DWARFUnitVector &DWOUnits = State->getDWOUnits(LazyParse);
if (const auto &CUI = getCUIndex()) {
if (const auto *R = CUI.getFromHash(Hash))
@@ -757,8 +1383,7 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
}
DWARFDie DWARFContext::getDIEForOffset(uint64_t Offset) {
- parseNormalUnits();
- if (auto *CU = NormalUnits.getUnitForOffset(Offset))
+ if (auto *CU = State->getNormalUnits().getUnitForOffset(Offset))
return CU->getDIEForOffset(Offset);
return DWARFDie();
}
@@ -782,302 +1407,77 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpOptions DumpOpts) {
return Success;
}
-void fixupIndexV4(const DWARFObject &DObj, DWARFContext &C,
- DWARFUnitIndex &Index) {
- using EntryType = DWARFUnitIndex::Entry::SectionContribution;
- using EntryMap = DenseMap<uint32_t, EntryType>;
- EntryMap Map;
- if (DObj.getCUIndexSection().empty())
- return;
-
- uint64_t Offset = 0;
- uint32_t TruncOffset = 0;
- DObj.forEachInfoDWOSections([&](const DWARFSection &S) {
- if (!(C.getParseCUTUIndexManually() ||
- S.Data.size() >= std::numeric_limits<uint32_t>::max()))
- return;
-
- DWARFDataExtractor Data(DObj, S, C.isLittleEndian(), 0);
- while (Data.isValidOffset(Offset)) {
- DWARFUnitHeader Header;
- if (!Header.extract(C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) {
- logAllUnhandledErrors(
- createError("Failed to parse CU header in DWP file"), errs());
- Map.clear();
- break;
- }
-
- auto Iter = Map.insert({TruncOffset,
- {Header.getOffset(), Header.getNextUnitOffset() -
- Header.getOffset()}});
- if (!Iter.second) {
- logAllUnhandledErrors(
- createError("Collision occured between for truncated offset 0x" +
- Twine::utohexstr(TruncOffset)),
- errs());
- Map.clear();
- return;
- }
-
- Offset = Header.getNextUnitOffset();
- TruncOffset = Offset;
- }
- });
-
- if (Map.empty())
- return;
-
- for (DWARFUnitIndex::Entry &E : Index.getMutableRows()) {
- if (!E.isValid())
- continue;
- DWARFUnitIndex::Entry::SectionContribution &CUOff = E.getContribution();
- auto Iter = Map.find(CUOff.getOffset());
- if (Iter == Map.end()) {
- logAllUnhandledErrors(createError("Could not find CU offset 0x" +
- Twine::utohexstr(CUOff.getOffset()) +
- " in the Map"),
- errs());
- break;
- }
- CUOff.setOffset(Iter->second.getOffset());
- if (CUOff.getOffset() != Iter->second.getOffset())
- logAllUnhandledErrors(createError("Length of CU in CU index doesn't "
- "match calculated length at offset 0x" +
- Twine::utohexstr(CUOff.getOffset())),
- errs());
- }
-}
-
-void fixupIndexV5(const DWARFObject &DObj, DWARFContext &C,
- DWARFUnitIndex &Index) {
- DenseMap<uint64_t, uint64_t> Map;
-
- DObj.forEachInfoDWOSections([&](const DWARFSection &S) {
- if (!(C.getParseCUTUIndexManually() ||
- S.Data.size() >= std::numeric_limits<uint32_t>::max()))
- return;
- DWARFDataExtractor Data(DObj, S, C.isLittleEndian(), 0);
- uint64_t Offset = 0;
- while (Data.isValidOffset(Offset)) {
- DWARFUnitHeader Header;
- if (!Header.extract(C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) {
- logAllUnhandledErrors(
- createError("Failed to parse unit header in DWP file"), errs());
- break;
- }
- bool CU = Header.getUnitType() == DW_UT_split_compile;
- uint64_t Sig = CU ? *Header.getDWOId() : Header.getTypeHash();
- Map[Sig] = Header.getOffset();
- Offset = Header.getNextUnitOffset();
- }
- });
- if (Map.empty())
- return;
- for (DWARFUnitIndex::Entry &E : Index.getMutableRows()) {
- if (!E.isValid())
- continue;
- DWARFUnitIndex::Entry::SectionContribution &CUOff = E.getContribution();
- auto Iter = Map.find(E.getSignature());
- if (Iter == Map.end()) {
- logAllUnhandledErrors(
- createError("Could not find unit with signature 0x" +
- Twine::utohexstr(E.getSignature()) + " in the Map"),
- errs());
- break;
- }
- CUOff.setOffset(Iter->second);
- }
-}
-
-void fixupIndex(const DWARFObject &DObj, DWARFContext &C,
- DWARFUnitIndex &Index) {
- if (Index.getVersion() < 5)
- fixupIndexV4(DObj, C, Index);
- else
- fixupIndexV5(DObj, C, Index);
-}
-
const DWARFUnitIndex &DWARFContext::getCUIndex() {
- if (CUIndex)
- return *CUIndex;
-
- DataExtractor CUIndexData(DObj->getCUIndexSection(), isLittleEndian(), 0);
- CUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_INFO);
- bool IsParseSuccessful = CUIndex->parse(CUIndexData);
- if (IsParseSuccessful)
- fixupIndex(*DObj, *this, *CUIndex);
- return *CUIndex;
+ return State->getCUIndex();
}
const DWARFUnitIndex &DWARFContext::getTUIndex() {
- if (TUIndex)
- return *TUIndex;
-
- DataExtractor TUIndexData(DObj->getTUIndexSection(), isLittleEndian(), 0);
- TUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_EXT_TYPES);
- bool isParseSuccessful = TUIndex->parse(TUIndexData);
- // If we are parsing TU-index and for .debug_types section we don't need
- // to do anything.
- if (isParseSuccessful && TUIndex->getVersion() != 2)
- fixupIndex(*DObj, *this, *TUIndex);
- return *TUIndex;
+ return State->getTUIndex();
}
DWARFGdbIndex &DWARFContext::getGdbIndex() {
- if (GdbIndex)
- return *GdbIndex;
-
- DataExtractor GdbIndexData(DObj->getGdbIndexSection(), true /*LE*/, 0);
- GdbIndex = std::make_unique<DWARFGdbIndex>();
- GdbIndex->parse(GdbIndexData);
- return *GdbIndex;
+ return State->getGdbIndex();
}
const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
- if (Abbrev)
- return Abbrev.get();
-
- DataExtractor abbrData(DObj->getAbbrevSection(), isLittleEndian(), 0);
- Abbrev = std::make_unique<DWARFDebugAbbrev>(abbrData);
- return Abbrev.get();
+ return State->getDebugAbbrev();
}
const DWARFDebugAbbrev *DWARFContext::getDebugAbbrevDWO() {
- if (AbbrevDWO)
- return AbbrevDWO.get();
-
- DataExtractor abbrData(DObj->getAbbrevDWOSection(), isLittleEndian(), 0);
- AbbrevDWO = std::make_unique<DWARFDebugAbbrev>(abbrData);
- return AbbrevDWO.get();
+ return State->getDebugAbbrevDWO();
}
const DWARFDebugLoc *DWARFContext::getDebugLoc() {
- if (Loc)
- return Loc.get();
-
- // Assume all units have the same address byte size.
- auto LocData =
- getNumCompileUnits()
- ? DWARFDataExtractor(*DObj, DObj->getLocSection(), isLittleEndian(),
- getUnitAtIndex(0)->getAddressByteSize())
- : DWARFDataExtractor("", isLittleEndian(), 0);
- Loc.reset(new DWARFDebugLoc(std::move(LocData)));
- return Loc.get();
+ return State->getDebugLoc();
}
const DWARFDebugAranges *DWARFContext::getDebugAranges() {
- if (Aranges)
- return Aranges.get();
-
- Aranges.reset(new DWARFDebugAranges());
- Aranges->generate(this);
- return Aranges.get();
+ return State->getDebugAranges();
}
Expected<const DWARFDebugFrame *> DWARFContext::getDebugFrame() {
- if (DebugFrame)
- return DebugFrame.get();
-
- const DWARFSection &DS = DObj->getFrameSection();
-
- // There's a "bug" in the DWARFv3 standard with respect to the target address
- // size within debug frame sections. While DWARF is supposed to be independent
- // of its container, FDEs have fields with size being "target address size",
- // which isn't specified in DWARF in general. It's only specified for CUs, but
- // .eh_frame can appear without a .debug_info section. Follow the example of
- // other tools (libdwarf) and extract this from the container (ObjectFile
- // provides this information). This problem is fixed in DWARFv4
- // See this dwarf-discuss discussion for more details:
- // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html
- DWARFDataExtractor DebugFrameData(*DObj, DS, isLittleEndian(),
- DObj->getAddressSize());
- auto DF =
- std::make_unique<DWARFDebugFrame>(getArch(), /*IsEH=*/false, DS.Address);
- if (Error E = DF->parse(DebugFrameData))
- return std::move(E);
-
- DebugFrame.swap(DF);
- return DebugFrame.get();
+ return State->getDebugFrame();
}
Expected<const DWARFDebugFrame *> DWARFContext::getEHFrame() {
- if (EHFrame)
- return EHFrame.get();
-
- const DWARFSection &DS = DObj->getEHFrameSection();
- DWARFDataExtractor DebugFrameData(*DObj, DS, isLittleEndian(),
- DObj->getAddressSize());
-
- auto DF =
- std::make_unique<DWARFDebugFrame>(getArch(), /*IsEH=*/true, DS.Address);
- if (Error E = DF->parse(DebugFrameData))
- return std::move(E);
- DebugFrame.swap(DF);
- return DebugFrame.get();
+ return State->getEHFrame();
}
const DWARFDebugMacro *DWARFContext::getDebugMacro() {
- if (!Macro)
- Macro = parseMacroOrMacinfo(MacroSection);
- return Macro.get();
+ return State->getDebugMacro();
}
const DWARFDebugMacro *DWARFContext::getDebugMacroDWO() {
- if (!MacroDWO)
- MacroDWO = parseMacroOrMacinfo(MacroDwoSection);
- return MacroDWO.get();
+ return State->getDebugMacroDWO();
}
const DWARFDebugMacro *DWARFContext::getDebugMacinfo() {
- if (!Macinfo)
- Macinfo = parseMacroOrMacinfo(MacinfoSection);
- return Macinfo.get();
+ return State->getDebugMacinfo();
}
const DWARFDebugMacro *DWARFContext::getDebugMacinfoDWO() {
- if (!MacinfoDWO)
- MacinfoDWO = parseMacroOrMacinfo(MacinfoDwoSection);
- return MacinfoDWO.get();
+ return State->getDebugMacinfoDWO();
}
-template <typename T>
-static T &getAccelTable(std::unique_ptr<T> &Cache, const DWARFObject &Obj,
- const DWARFSection &Section, StringRef StringSection,
- bool IsLittleEndian) {
- if (Cache)
- return *Cache;
- DWARFDataExtractor AccelSection(Obj, Section, IsLittleEndian, 0);
- DataExtractor StrData(StringSection, IsLittleEndian, 0);
- Cache.reset(new T(AccelSection, StrData));
- if (Error E = Cache->extract())
- llvm::consumeError(std::move(E));
- return *Cache;
-}
const DWARFDebugNames &DWARFContext::getDebugNames() {
- return getAccelTable(Names, *DObj, DObj->getNamesSection(),
- DObj->getStrSection(), isLittleEndian());
+ return State->getDebugNames();
}
const AppleAcceleratorTable &DWARFContext::getAppleNames() {
- return getAccelTable(AppleNames, *DObj, DObj->getAppleNamesSection(),
- DObj->getStrSection(), isLittleEndian());
+ return State->getAppleNames();
}
const AppleAcceleratorTable &DWARFContext::getAppleTypes() {
- return getAccelTable(AppleTypes, *DObj, DObj->getAppleTypesSection(),
- DObj->getStrSection(), isLittleEndian());
+ return State->getAppleTypes();
}
const AppleAcceleratorTable &DWARFContext::getAppleNamespaces() {
- return getAccelTable(AppleNamespaces, *DObj,
- DObj->getAppleNamespacesSection(),
- DObj->getStrSection(), isLittleEndian());
+ return State->getAppleNamespaces();
}
const AppleAcceleratorTable &DWARFContext::getAppleObjC() {
- return getAccelTable(AppleObjC, *DObj, DObj->getAppleObjCSection(),
- DObj->getStrSection(), isLittleEndian());
+ return State->getAppleObjC();
}
const DWARFDebugLine::LineTable *
@@ -1093,77 +1493,20 @@ DWARFContext::getLineTableForUnit(DWARFUnit *U) {
Expected<const DWARFDebugLine::LineTable *> DWARFContext::getLineTableForUnit(
DWARFUnit *U, function_ref<void(Error)> RecoverableErrorHandler) {
- if (!Line)
- Line.reset(new DWARFDebugLine);
-
- auto UnitDIE = U->getUnitDIE();
- if (!UnitDIE)
- return nullptr;
-
- auto Offset = toSectionOffset(UnitDIE.find(DW_AT_stmt_list));
- if (!Offset)
- return nullptr; // No line table for this compile unit.
-
- uint64_t stmtOffset = *Offset + U->getLineTableOffset();
- // See if the line table is cached.
- if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
- return lt;
-
- // Make sure the offset is good before we try to parse.
- if (stmtOffset >= U->getLineSection().Data.size())
- return nullptr;
-
- // We have to parse it first.
- DWARFDataExtractor lineData(*DObj, U->getLineSection(), isLittleEndian(),
- U->getAddressByteSize());
- return Line->getOrParseLineTable(lineData, stmtOffset, *this, U,
- RecoverableErrorHandler);
+ return State->getLineTableForUnit(U, RecoverableErrorHandler);
}
void DWARFContext::clearLineTableForUnit(DWARFUnit *U) {
- if (!Line)
- return;
-
- auto UnitDIE = U->getUnitDIE();
- if (!UnitDIE)
- return;
-
- auto Offset = toSectionOffset(UnitDIE.find(DW_AT_stmt_list));
- if (!Offset)
- return;
-
- uint64_t stmtOffset = *Offset + U->getLineTableOffset();
- Line->clearLineTable(stmtOffset);
+ return State->clearLineTableForUnit(U);
}
-void DWARFContext::parseNormalUnits() {
- if (!NormalUnits.empty())
- return;
- DObj->forEachInfoSections([&](const DWARFSection &S) {
- NormalUnits.addUnitsForSection(*this, S, DW_SECT_INFO);
- });
- NormalUnits.finishedInfoUnits();
- DObj->forEachTypesSections([&](const DWARFSection &S) {
- NormalUnits.addUnitsForSection(*this, S, DW_SECT_EXT_TYPES);
- });
-}
-
-void DWARFContext::parseDWOUnits(bool Lazy) {
- if (!DWOUnits.empty())
- return;
- DObj->forEachInfoDWOSections([&](const DWARFSection &S) {
- DWOUnits.addUnitsForDWOSection(*this, S, DW_SECT_INFO, Lazy);
- });
- DWOUnits.finishedInfoUnits();
- DObj->forEachTypesDWOSections([&](const DWARFSection &S) {
- DWOUnits.addUnitsForDWOSection(*this, S, DW_SECT_EXT_TYPES, Lazy);
- });
+DWARFUnitVector &DWARFContext::getDWOUnits(bool Lazy) {
+ return State->getDWOUnits(Lazy);
}
DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint64_t Offset) {
- parseNormalUnits();
return dyn_cast_or_null<DWARFCompileUnit>(
- NormalUnits.getUnitForOffset(Offset));
+ State->getNormalUnits().getUnitForOffset(Offset));
}
DWARFCompileUnit *DWARFContext::getCompileUnitForCodeAddress(uint64_t Address) {
@@ -1187,7 +1530,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForDataAddress(uint64_t Address) {
// So, we walk the CU's and their child DI's manually, looking for the
// specific global variable.
for (std::unique_ptr<DWARFUnit> &CU : compile_units()) {
- if (DWARFDie Die = CU->getVariableForAddress(Address)) {
+ if (CU->getVariableForAddress(Address)) {
return static_cast<DWARFCompileUnit *>(CU.get());
}
}
@@ -1519,52 +1862,7 @@ DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address,
std::shared_ptr<DWARFContext>
DWARFContext::getDWOContext(StringRef AbsolutePath) {
- if (auto S = DWP.lock()) {
- DWARFContext *Ctxt = S->Context.get();
- return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
- }
-
- std::weak_ptr<DWOFile> *Entry = &DWOFiles[AbsolutePath];
-
- if (auto S = Entry->lock()) {
- DWARFContext *Ctxt = S->Context.get();
- return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
- }
-
- Expected<OwningBinary<ObjectFile>> Obj = [&] {
- if (!CheckedForDWP) {
- SmallString<128> DWPName;
- auto Obj = object::ObjectFile::createObjectFile(
- this->DWPName.empty()
- ? (DObj->getFileName() + ".dwp").toStringRef(DWPName)
- : StringRef(this->DWPName));
- if (Obj) {
- Entry = &DWP;
- return Obj;
- } else {
- CheckedForDWP = true;
- // TODO: Should this error be handled (maybe in a high verbosity mode)
- // before falling back to .dwo files?
- consumeError(Obj.takeError());
- }
- }
-
- return object::ObjectFile::createObjectFile(AbsolutePath);
- }();
-
- if (!Obj) {
- // TODO: Actually report errors helpfully.
- consumeError(Obj.takeError());
- return nullptr;
- }
-
- auto S = std::make_shared<DWOFile>();
- S->File = std::move(Obj.get());
- S->Context = DWARFContext::create(*S->File.getBinary(),
- ProcessDebugRelocations::Ignore);
- *Entry = S;
- auto *Ctxt = S->Context.get();
- return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
+ return State->getDWOContext(AbsolutePath);
}
static Error createError(const Twine &Reason, llvm::Error E) {
@@ -1909,7 +2207,7 @@ public:
continue;
if (!Section.relocations().empty() && Name.ends_with(".dwo") &&
- RelSecName.startswith(".debug")) {
+ RelSecName.starts_with(".debug")) {
HandleWarning(createError("unexpected relocations for dwo section '" +
RelSecName + "'"));
}
@@ -2115,23 +2413,27 @@ DWARFContext::create(const object::ObjectFile &Obj,
ProcessDebugRelocations RelocAction,
const LoadedObjectInfo *L, std::string DWPName,
std::function<void(Error)> RecoverableErrorHandler,
- std::function<void(Error)> WarningHandler) {
+ std::function<void(Error)> WarningHandler,
+ bool ThreadSafe) {
auto DObj = std::make_unique<DWARFObjInMemory>(
Obj, L, RecoverableErrorHandler, WarningHandler, RelocAction);
- return std::make_unique<DWARFContext>(std::move(DObj), std::move(DWPName),
+ return std::make_unique<DWARFContext>(std::move(DObj),
+ std::move(DWPName),
RecoverableErrorHandler,
- WarningHandler);
+ WarningHandler,
+ ThreadSafe);
}
std::unique_ptr<DWARFContext>
DWARFContext::create(const StringMap<std::unique_ptr<MemoryBuffer>> &Sections,
uint8_t AddrSize, bool isLittleEndian,
std::function<void(Error)> RecoverableErrorHandler,
- std::function<void(Error)> WarningHandler) {
+ std::function<void(Error)> WarningHandler,
+ bool ThreadSafe) {
auto DObj =
std::make_unique<DWARFObjInMemory>(Sections, AddrSize, isLittleEndian);
return std::make_unique<DWARFContext>(
- std::move(DObj), "", RecoverableErrorHandler, WarningHandler);
+ std::move(DObj), "", RecoverableErrorHandler, WarningHandler, ThreadSafe);
}
uint8_t DWARFContext::getCUAddrSize() {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 3014e61f566a..85959ecc5e17 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -105,9 +105,9 @@ std::string DWARFAbbreviationDeclarationSet::getCodeRange() const {
DWARFDebugAbbrev::DWARFDebugAbbrev(DataExtractor Data)
: AbbrDeclSets(), PrevAbbrOffsetPos(AbbrDeclSets.end()), Data(Data) {}
-void DWARFDebugAbbrev::parse() const {
+Error DWARFDebugAbbrev::parse() const {
if (!Data)
- return;
+ return Error::success();
uint64_t Offset = 0;
auto I = AbbrDeclSets.begin();
while (Data->isValidOffset(Offset)) {
@@ -116,17 +116,19 @@ void DWARFDebugAbbrev::parse() const {
uint64_t CUAbbrOffset = Offset;
DWARFAbbreviationDeclarationSet AbbrDecls;
if (Error Err = AbbrDecls.extract(*Data, &Offset)) {
- // FIXME: We should propagate the error upwards.
- consumeError(std::move(Err));
- break;
+ Data = std::nullopt;
+ return Err;
}
AbbrDeclSets.insert(I, std::make_pair(CUAbbrOffset, std::move(AbbrDecls)));
}
Data = std::nullopt;
+ return Error::success();
}
void DWARFDebugAbbrev::dump(raw_ostream &OS) const {
- parse();
+ if (Error Err = parse())
+ // FIXME: We should propagate this error or otherwise display it.
+ llvm::consumeError(std::move(Err));
if (AbbrDeclSets.empty()) {
OS << "< EMPTY >\n";
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 6f2afe5d50e9..78792cf83891 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -170,9 +170,14 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS,
if (ContentTypes.HasLength)
OS << format(" length: 0x%8.8" PRIx64 "\n", FileEntry.Length);
if (ContentTypes.HasSource) {
- OS << " source: ";
- FileEntry.Source.dump(OS, DumpOptions);
- OS << '\n';
+ auto Source = FileEntry.Source.getAsCString();
+ if (!Source)
+ consumeError(Source.takeError());
+ else if ((*Source)[0]) {
+ OS << " source: ";
+ FileEntry.Source.dump(OS, DumpOptions);
+ OS << '\n';
+ }
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 7af7ed8be7b4..66492f7bf804 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -147,7 +148,8 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
if (!Name.empty())
WithColor(OS, Color) << Name;
- else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line) {
+ else if (Attr == DW_AT_decl_line || Attr == DW_AT_decl_column ||
+ Attr == DW_AT_call_line || Attr == DW_AT_call_column) {
if (std::optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
OS << *Val;
else
@@ -189,7 +191,8 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
// We have dumped the attribute raw value. For some attributes
// having both the raw value and the pretty-printed value is
// interesting. These attributes are handled below.
- if (Attr == DW_AT_specification || Attr == DW_AT_abstract_origin) {
+ if (Attr == DW_AT_specification || Attr == DW_AT_abstract_origin ||
+ Attr == DW_AT_call_origin) {
if (const char *Name =
Die.getAttributeValueAsReferencedDie(FormValue).getName(
DINameKind::LinkageName))
@@ -487,18 +490,23 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0);
}
-std::optional<uint64_t> DWARFDie::getTypeSize(uint64_t PointerSize) {
- if (auto SizeAttr = find(DW_AT_byte_size))
+static std::optional<uint64_t>
+getTypeSizeImpl(DWARFDie Die, uint64_t PointerSize,
+ SmallPtrSetImpl<const DWARFDebugInfoEntry *> &Visited) {
+ // Cycle detected?
+ if (!Visited.insert(Die.getDebugInfoEntry()).second)
+ return {};
+ if (auto SizeAttr = Die.find(DW_AT_byte_size))
if (std::optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant())
return Size;
- switch (getTag()) {
+ switch (Die.getTag()) {
case DW_TAG_pointer_type:
case DW_TAG_reference_type:
case DW_TAG_rvalue_reference_type:
return PointerSize;
case DW_TAG_ptr_to_member_type: {
- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type))
+ if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type))
if (BaseType.getTag() == DW_TAG_subroutine_type)
return 2 * PointerSize;
return PointerSize;
@@ -508,19 +516,20 @@ std::optional<uint64_t> DWARFDie::getTypeSize(uint64_t PointerSize) {
case DW_TAG_volatile_type:
case DW_TAG_restrict_type:
case DW_TAG_typedef: {
- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type))
- return BaseType.getTypeSize(PointerSize);
+ if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type))
+ return getTypeSizeImpl(BaseType, PointerSize, Visited);
break;
}
case DW_TAG_array_type: {
- DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type);
+ DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type);
if (!BaseType)
return std::nullopt;
- std::optional<uint64_t> BaseSize = BaseType.getTypeSize(PointerSize);
+ std::optional<uint64_t> BaseSize =
+ getTypeSizeImpl(BaseType, PointerSize, Visited);
if (!BaseSize)
return std::nullopt;
uint64_t Size = *BaseSize;
- for (DWARFDie Child : *this) {
+ for (DWARFDie Child : Die) {
if (Child.getTag() != DW_TAG_subrange_type)
continue;
@@ -540,13 +549,18 @@ std::optional<uint64_t> DWARFDie::getTypeSize(uint64_t PointerSize) {
return Size;
}
default:
- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type))
- return BaseType.getTypeSize(PointerSize);
+ if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type))
+ return getTypeSizeImpl(BaseType, PointerSize, Visited);
break;
}
return std::nullopt;
}
+std::optional<uint64_t> DWARFDie::getTypeSize(uint64_t PointerSize) {
+ SmallPtrSet<const DWARFDebugInfoEntry *, 4> Visited;
+ return getTypeSizeImpl(*this, PointerSize, Visited);
+}
+
/// Helper to dump a DIE with all of its parents, but no siblings.
static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent,
DIDumpOptions DumpOpts, unsigned Depth = 0) {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp
index c474de607626..20242d958b6b 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp
@@ -8,7 +8,7 @@ void DWARFTypePrinter::appendTypeTagName(dwarf::Tag T) {
StringRef TagStr = TagString(T);
static constexpr StringRef Prefix = "DW_TAG_";
static constexpr StringRef Suffix = "_type";
- if (!TagStr.startswith(Prefix) || !TagStr.endswith(Suffix))
+ if (!TagStr.starts_with(Prefix) || !TagStr.ends_with(Suffix))
return;
OS << TagStr.substr(Prefix.size(),
TagStr.size() - (Prefix.size() + Suffix.size()))
@@ -181,7 +181,7 @@ DWARFTypePrinter::appendUnqualifiedNameBefore(DWARFDie D,
Word = true;
StringRef Name = NamePtr;
static constexpr StringRef MangledPrefix = "_STN|";
- if (Name.startswith(MangledPrefix)) {
+ if (Name.starts_with(MangledPrefix)) {
Name = Name.drop_front(MangledPrefix.size());
auto Separator = Name.find('|');
assert(Separator != StringRef::npos);
@@ -191,12 +191,12 @@ DWARFTypePrinter::appendUnqualifiedNameBefore(DWARFDie D,
*OriginalFullName = (BaseName + TemplateArgs).str();
Name = BaseName;
} else
- EndedWithTemplate = Name.endswith(">");
+ EndedWithTemplate = Name.ends_with(">");
OS << Name;
// This check would be insufficient for operator overloads like
// "operator>>" - but for now Clang doesn't try to simplify them, so this
// is OK. Add more nuanced operator overload handling here if/when needed.
- if (Name.endswith(">"))
+ if (Name.ends_with(">"))
break;
if (!appendTemplateParameters(D))
break;
@@ -620,6 +620,9 @@ void DWARFTypePrinter::appendSubroutineNameAfter(
case CallingConvention::DW_CC_LLVM_X86RegCall:
OS << " __attribute__((regcall))";
break;
+ case CallingConvention::DW_CC_LLVM_M68kRTD:
+ OS << " __attribute__((m68k_rtd))";
+ break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 19678f121982..9f455fa7e96a 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -81,8 +81,11 @@ void DWARFUnitVector::addUnitsImpl(
if (!Data.isValidOffset(Offset))
return nullptr;
DWARFUnitHeader Header;
- if (!Header.extract(Context, Data, &Offset, SectionKind))
+ if (Error ExtractErr =
+ Header.extract(Context, Data, &Offset, SectionKind)) {
+ Context.getWarningHandler()(std::move(ExtractErr));
return nullptr;
+ }
if (!IndexEntry && IsDWO) {
const DWARFUnitIndex &Index = getDWARFUnitIndex(
Context, Header.isTypeUnit() ? DW_SECT_EXT_TYPES : DW_SECT_INFO);
@@ -244,10 +247,10 @@ Expected<uint64_t> DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const {
return DA.getRelocatedValue(ItemSize, &Offset);
}
-bool DWARFUnitHeader::extract(DWARFContext &Context,
- const DWARFDataExtractor &debug_info,
- uint64_t *offset_ptr,
- DWARFSectionKind SectionKind) {
+Error DWARFUnitHeader::extract(DWARFContext &Context,
+ const DWARFDataExtractor &debug_info,
+ uint64_t *offset_ptr,
+ DWARFSectionKind SectionKind) {
Offset = *offset_ptr;
Error Err = Error::success();
IndexEntry = nullptr;
@@ -277,72 +280,58 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
} else if (UnitType == DW_UT_split_compile || UnitType == DW_UT_skeleton)
DWOId = debug_info.getU64(offset_ptr, &Err);
- if (Err) {
- Context.getWarningHandler()(joinErrors(
+ if (Err)
+ return joinErrors(
createStringError(
errc::invalid_argument,
"DWARF unit at 0x%8.8" PRIx64 " cannot be parsed:", Offset),
- std::move(Err)));
- return false;
- }
+ std::move(Err));
// Header fields all parsed, capture the size of this unit header.
assert(*offset_ptr - Offset <= 255 && "unexpected header size");
Size = uint8_t(*offset_ptr - Offset);
uint64_t NextCUOffset = Offset + getUnitLengthFieldByteSize() + getLength();
- if (!debug_info.isValidOffset(getNextUnitOffset() - 1)) {
- Context.getWarningHandler()(
- createStringError(errc::invalid_argument,
- "DWARF unit from offset 0x%8.8" PRIx64 " incl. "
- "to offset 0x%8.8" PRIx64 " excl. "
- "extends past section size 0x%8.8zx",
- Offset, NextCUOffset, debug_info.size()));
- return false;
- }
+ if (!debug_info.isValidOffset(getNextUnitOffset() - 1))
+ return createStringError(errc::invalid_argument,
+ "DWARF unit from offset 0x%8.8" PRIx64 " incl. "
+ "to offset 0x%8.8" PRIx64 " excl. "
+ "extends past section size 0x%8.8zx",
+ Offset, NextCUOffset, debug_info.size());
- if (!DWARFContext::isSupportedVersion(getVersion())) {
- Context.getWarningHandler()(createStringError(
+ if (!DWARFContext::isSupportedVersion(getVersion()))
+ return createStringError(
errc::invalid_argument,
"DWARF unit at offset 0x%8.8" PRIx64 " "
"has unsupported version %" PRIu16 ", supported are 2-%u",
- Offset, getVersion(), DWARFContext::getMaxSupportedVersion()));
- return false;
- }
+ Offset, getVersion(), DWARFContext::getMaxSupportedVersion());
// Type offset is unit-relative; should be after the header and before
// the end of the current unit.
- if (isTypeUnit() && TypeOffset < Size) {
- Context.getWarningHandler()(
- createStringError(errc::invalid_argument,
- "DWARF type unit at offset "
- "0x%8.8" PRIx64 " "
- "has its relocated type_offset 0x%8.8" PRIx64 " "
- "pointing inside the header",
- Offset, Offset + TypeOffset));
- return false;
- }
- if (isTypeUnit() &&
- TypeOffset >= getUnitLengthFieldByteSize() + getLength()) {
- Context.getWarningHandler()(createStringError(
+ if (isTypeUnit() && TypeOffset < Size)
+ return createStringError(errc::invalid_argument,
+ "DWARF type unit at offset "
+ "0x%8.8" PRIx64 " "
+ "has its relocated type_offset 0x%8.8" PRIx64 " "
+ "pointing inside the header",
+ Offset, Offset + TypeOffset);
+
+ if (isTypeUnit() && TypeOffset >= getUnitLengthFieldByteSize() + getLength())
+ return createStringError(
errc::invalid_argument,
"DWARF type unit from offset 0x%8.8" PRIx64 " incl. "
"to offset 0x%8.8" PRIx64 " excl. has its "
"relocated type_offset 0x%8.8" PRIx64 " pointing past the unit end",
- Offset, NextCUOffset, Offset + TypeOffset));
- return false;
- }
+ Offset, NextCUOffset, Offset + TypeOffset);
if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
getAddressByteSize(), errc::invalid_argument,
- "DWARF unit at offset 0x%8.8" PRIx64, Offset)) {
- Context.getWarningHandler()(std::move(SizeErr));
- return false;
- }
+ "DWARF unit at offset 0x%8.8" PRIx64, Offset))
+ return SizeErr;
// Keep track of the highest DWARF version we encounter across all units.
Context.setMaxVersionIfGreater(getVersion());
- return true;
+ return Error::success();
}
bool DWARFUnitHeader::applyIndexEntry(const DWARFUnitIndex::Entry *Entry) {
@@ -784,7 +773,7 @@ void DWARFUnit::updateVariableDieMap(DWARFDie Die) {
for (const DWARFLocationExpression &Location : *Locations) {
uint8_t AddressSize = getAddressByteSize();
- DataExtractor Data(Location.Expr, /*IsLittleEndian=*/true, AddressSize);
+ DataExtractor Data(Location.Expr, isLittleEndian(), AddressSize);
DWARFExpression Expr(Data, AddressSize);
auto It = Expr.begin();
if (It == Expr.end())
@@ -828,7 +817,7 @@ void DWARFUnit::updateVariableDieMap(DWARFDie Die) {
// no type), then we use a size of one to still allow symbolization of the
// exact address.
uint64_t GVSize = 1;
- if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type))
+ if (Die.getAttributeValueAsReferencedDie(DW_AT_type))
if (std::optional<uint64_t> Size = Die.getTypeSize(getAddressByteSize()))
GVSize = *Size;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 58900e1e80cb..43ed60d7f977 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -1351,12 +1351,34 @@ DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
return NumErrors;
}
-static SmallVector<StringRef, 2> getNames(const DWARFDie &DIE,
- bool IncludeLinkageName = true) {
- SmallVector<StringRef, 2> Result;
- if (const char *Str = DIE.getShortName())
- Result.emplace_back(Str);
- else if (DIE.getTag() == dwarf::DW_TAG_namespace)
+static SmallVector<std::string, 3> getNames(const DWARFDie &DIE,
+ bool IncludeStrippedTemplateNames,
+ bool IncludeObjCNames = true,
+ bool IncludeLinkageName = true) {
+ SmallVector<std::string, 3> Result;
+ if (const char *Str = DIE.getShortName()) {
+ StringRef Name(Str);
+ Result.emplace_back(Name);
+ if (IncludeStrippedTemplateNames) {
+ if (std::optional<StringRef> StrippedName =
+ StripTemplateParameters(Result.back()))
+ // Convert to std::string and push; emplacing the StringRef may trigger
+ // a vector resize which may destroy the StringRef memory.
+ Result.push_back(StrippedName->str());
+ }
+
+ if (IncludeObjCNames) {
+ if (std::optional<ObjCSelectorNames> ObjCNames =
+ getObjCNamesIfSelector(Name)) {
+ Result.emplace_back(ObjCNames->ClassName);
+ Result.emplace_back(ObjCNames->Selector);
+ if (ObjCNames->ClassNameNoCategory)
+ Result.emplace_back(*ObjCNames->ClassNameNoCategory);
+ if (ObjCNames->MethodNameNoCategory)
+ Result.push_back(std::move(*ObjCNames->MethodNameNoCategory));
+ }
+ }
+ } else if (DIE.getTag() == dwarf::DW_TAG_namespace)
Result.emplace_back("(anonymous namespace)");
if (IncludeLinkageName) {
@@ -1423,7 +1445,12 @@ unsigned DWARFVerifier::verifyNameIndexEntries(
++NumErrors;
}
- auto EntryNames = getNames(DIE);
+ // We allow an extra name for functions: their name without any template
+ // parameters.
+ auto IncludeStrippedTemplateNames =
+ DIE.getTag() == DW_TAG_subprogram ||
+ DIE.getTag() == DW_TAG_inlined_subroutine;
+ auto EntryNames = getNames(DIE, IncludeStrippedTemplateNames);
if (!is_contained(EntryNames, Str)) {
error() << formatv("Name Index @ {0:x}: Entry @ {1:x}: mismatched Name "
"of DIE @ {2:x}: index - {3}; debug_info - {4}.\n",
@@ -1496,7 +1523,12 @@ unsigned DWARFVerifier::verifyNameIndexCompleteness(
// the linkage name."
auto IncludeLinkageName = Die.getTag() == DW_TAG_subprogram ||
Die.getTag() == DW_TAG_inlined_subroutine;
- auto EntryNames = getNames(Die, IncludeLinkageName);
+ // We *allow* stripped template names / ObjectiveC names as extra entries into
+ // the table, but we don't *require* them to pass the completeness test.
+ auto IncludeStrippedTemplateNames = false;
+ auto IncludeObjCNames = false;
+ auto EntryNames = getNames(Die, IncludeStrippedTemplateNames,
+ IncludeObjCNames, IncludeLinkageName);
if (EntryNames.empty())
return 0;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index d266960ae302..0b225376349e 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -65,10 +65,10 @@ struct llvm::gsym::CUInfo {
/// the first client that asks for a compile unit file index will end up
/// doing the conversion, and subsequent clients will get the cached GSYM
/// index.
- uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) {
- if (!LineTable)
- return 0;
- assert(DwarfFileIdx < FileCache.size());
+ std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
+ uint32_t DwarfFileIdx) {
+ if (!LineTable || DwarfFileIdx >= FileCache.size())
+ return std::nullopt;
uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
if (GsymFileIdx != UINT32_MAX)
return GsymFileIdx;
@@ -132,11 +132,11 @@ static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
static std::optional<uint32_t>
getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
// If the dwarf has mangled name, use mangled name
- if (auto LinkageName =
- dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name,
- dwarf::DW_AT_linkage_name}),
- nullptr))
- return Gsym.insertString(LinkageName, /* Copy */ false);
+ if (auto LinkageName = Die.getLinkageName()) {
+ // We have seen cases were linkage name is actually empty.
+ if (strlen(LinkageName) > 0)
+ return Gsym.insertString(LinkageName, /* Copy */ false);
+ }
StringRef ShortName(Die.getName(DINameKind::ShortName));
if (ShortName.empty())
@@ -156,7 +156,7 @@ getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
// Some GCC optimizations create functions with names ending with .isra.<num>
// or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
// If it looks like it could be the case, don't add any prefix
- if (ShortName.startswith("_Z") &&
+ if (ShortName.starts_with("_Z") &&
(ShortName.contains(".isra.") || ShortName.contains(".part.")))
return Gsym.insertString(ShortName, /* Copy */ false);
@@ -205,9 +205,21 @@ static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
return false;
}
-static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die,
- uint32_t Depth, FunctionInfo &FI,
- InlineInfo &parent) {
+static AddressRanges
+ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) {
+ AddressRanges Ranges;
+ for (const DWARFAddressRange &DwarfRange : DwarfRanges) {
+ if (DwarfRange.LowPC < DwarfRange.HighPC)
+ Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC});
+ }
+ return Ranges;
+}
+
+static void parseInlineInfo(GsymCreator &Gsym, raw_ostream *Log, CUInfo &CUI,
+ DWARFDie Die, uint32_t Depth, FunctionInfo &FI,
+ InlineInfo &Parent,
+ const AddressRanges &AllParentRanges,
+ bool &WarnIfEmpty) {
if (!hasInlineInfo(Die, Depth))
return;
@@ -215,39 +227,80 @@ static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die,
if (Tag == dwarf::DW_TAG_inlined_subroutine) {
// create new InlineInfo and append to parent.children
InlineInfo II;
- DWARFAddressRange FuncRange =
- DWARFAddressRange(FI.startAddress(), FI.endAddress());
+ AddressRanges AllInlineRanges;
Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
if (RangesOrError) {
- for (const DWARFAddressRange &Range : RangesOrError.get()) {
- // Check that the inlined function is within the range of the function
- // info, it might not be in case of split functions
- if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC)
- II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC));
+ AllInlineRanges = ConvertDWARFRanges(RangesOrError.get());
+ uint32_t EmptyCount = 0;
+ for (const AddressRange &InlineRange : AllInlineRanges) {
+ // Check for empty inline range in case inline function was outlined
+ // or has not code
+ if (InlineRange.empty()) {
+ ++EmptyCount;
+ } else {
+ if (Parent.Ranges.contains(InlineRange)) {
+ II.Ranges.insert(InlineRange);
+ } else {
+ // Only warn if the current inline range is not within any of all
+ // of the parent ranges. If we have a DW_TAG_subpgram with multiple
+ // ranges we will emit a FunctionInfo for each range of that
+ // function that only emits information within the current range,
+ // so we only want to emit an error if the DWARF has issues, not
+ // when a range currently just isn't in the range we are currently
+ // parsing for.
+ if (AllParentRanges.contains(InlineRange)) {
+ WarnIfEmpty = false;
+ } else if (Log) {
+ *Log << "error: inlined function DIE at "
+ << HEX32(Die.getOffset()) << " has a range ["
+ << HEX64(InlineRange.start()) << " - "
+ << HEX64(InlineRange.end()) << ") that isn't contained in "
+ << "any parent address ranges, this inline range will be "
+ "removed.\n";
+ }
+ }
+ }
}
+ // If we have all empty ranges for the inlines, then don't warn if we
+ // have an empty InlineInfo at the top level as all inline functions
+ // were elided.
+ if (EmptyCount == AllInlineRanges.size())
+ WarnIfEmpty = false;
}
if (II.Ranges.empty())
return;
if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
II.Name = *NameIndex;
- II.CallFile = CUI.DWARFToGSYMFileIndex(
- Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0));
- II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
- // parse all children and append to parent
- for (DWARFDie ChildDie : Die.children())
- parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II);
- parent.Children.emplace_back(std::move(II));
+ const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+ Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
+ std::optional<uint32_t> OptGSymFileIdx =
+ CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
+ if (OptGSymFileIdx) {
+ II.CallFile = OptGSymFileIdx.value();
+ II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
+ // parse all children and append to parent
+ for (DWARFDie ChildDie : Die.children())
+ parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, II,
+ AllInlineRanges, WarnIfEmpty);
+ Parent.Children.emplace_back(std::move(II));
+ } else if (Log) {
+ *Log << "error: inlined function DIE at " << HEX32(Die.getOffset())
+ << " has an invalid file index " << DwarfFileIdx
+ << " in its DW_AT_call_file attribute, this inline entry and all "
+ << "children will be removed.\n";
+ }
return;
}
if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
// skip this Die and just recurse down
for (DWARFDie ChildDie : Die.children())
- parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent);
+ parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, Parent,
+ AllParentRanges, WarnIfEmpty);
}
}
-static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
+static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI,
DWARFDie Die, GsymCreator &Gsym,
FunctionInfo &FI) {
std::vector<uint32_t> RowVector;
@@ -263,8 +316,20 @@ static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
// the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
std::string FilePath = Die.getDeclFile(
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
- if (FilePath.empty())
+ if (FilePath.empty()) {
+ // If we had a DW_AT_decl_file, but got no file then we need to emit a
+ // warning.
+ if (Log) {
+ const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+ Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
+ *Log << "error: function DIE at " << HEX32(Die.getOffset())
+ << " has an invalid file index " << DwarfFileIdx
+ << " in its DW_AT_decl_file attribute, unable to create a single "
+ << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
+ << "attributes.\n";
+ }
return;
+ }
if (auto Line =
dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
@@ -279,7 +344,20 @@ static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
for (uint32_t RowIndex : RowVector) {
// Take file number and line/column from the row.
const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
- const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
+ std::optional<uint32_t> OptFileIdx =
+ CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
+ if (!OptFileIdx) {
+ if (Log) {
+ *Log << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
+ << "a line entry with invalid DWARF file index, this entry will "
+ << "be removed:\n";
+ Row.dumpTableHeader(*Log, /*Indent=*/0);
+ Row.dump(*Log);
+ *Log << "\n";
+ }
+ continue;
+ }
+ const uint32_t FileIdx = OptFileIdx.value();
uint64_t RowAddress = Row.Address.Address;
// Watch out for a RowAddress that is in the middle of a line table entry
// in the DWARF. If we pass an address in between two line table entries
@@ -289,10 +367,12 @@ static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
// an error, but not worth stopping the creation of the GSYM.
if (!FI.Range.contains(RowAddress)) {
if (RowAddress < FI.Range.start()) {
- Log << "error: DIE has a start address whose LowPC is between the "
- "line table Row[" << RowIndex << "] with address "
- << HEX64(RowAddress) << " and the next one.\n";
- Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
+ if (Log) {
+ *Log << "error: DIE has a start address whose LowPC is between the "
+ "line table Row[" << RowIndex << "] with address "
+ << HEX64(RowAddress) << " and the next one.\n";
+ Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE());
+ }
RowAddress = FI.Range.start();
} else {
continue;
@@ -302,25 +382,25 @@ static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
LineEntry LE(RowAddress, FileIdx, Row.Line);
if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
// We have seen full duplicate line tables for functions in some
- // DWARF files. Watch for those here by checking the the last
+ // DWARF files. Watch for those here by checking the last
// row was the function's end address (HighPC) and that the
// current line table entry's address is the same as the first
// line entry we already have in our "function_info.Lines". If
// so break out after printing a warning.
auto FirstLE = FI.OptLineTable->first();
if (FirstLE && *FirstLE == LE) {
- if (!Gsym.isQuiet()) {
- Log << "warning: duplicate line table detected for DIE:\n";
- Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
+ if (Log && !Gsym.isQuiet()) {
+ *Log << "warning: duplicate line table detected for DIE:\n";
+ Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE());
}
} else {
- // Print out (ignore if os == nulls as this is expensive)
- Log << "error: line table has addresses that do not "
- << "monotonically increase:\n";
- for (uint32_t RowIndex2 : RowVector) {
- CUI.LineTable->Rows[RowIndex2].dump(Log);
+ if (Log) {
+ *Log << "error: line table has addresses that do not "
+ << "monotonically increase:\n";
+ for (uint32_t RowIndex2 : RowVector)
+ CUI.LineTable->Rows[RowIndex2].dump(*Log);
+ Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE());
}
- Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
}
break;
}
@@ -349,7 +429,7 @@ static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
FI.OptLineTable = std::nullopt;
}
-void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
+void DwarfTransformer::handleDie(raw_ostream *OS, CUInfo &CUI, DWARFDie Die) {
switch (Die.getTag()) {
case dwarf::DW_TAG_subprogram: {
Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
@@ -362,11 +442,20 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
break;
auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
if (!NameIndex) {
- OS << "error: function at " << HEX64(Die.getOffset())
- << " has no name\n ";
- Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
+ if (OS) {
+ *OS << "error: function at " << HEX64(Die.getOffset())
+ << " has no name\n ";
+ Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE());
+ }
break;
}
+ // All ranges for the subprogram DIE in case it has multiple. We need to
+ // pass this down into parseInlineInfo so we don't warn about inline
+ // ranges that are not in the current subrange of a function when they
+ // actually are in another subgrange. We do this because when a function
+ // has discontiguos ranges, we create multiple function entries with only
+ // the info for that range contained inside of it.
+ AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges);
// Create a function_info for each range
for (const DWARFAddressRange &Range : Ranges) {
@@ -393,11 +482,13 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
if (Range.LowPC != 0) {
if (!Gsym.isQuiet()) {
// Unexpected invalid address, emit a warning
- OS << "warning: DIE has an address range whose start address is "
- "not in any executable sections ("
- << *Gsym.GetValidTextRanges()
- << ") and will not be processed:\n";
- Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
+ if (OS) {
+ *OS << "warning: DIE has an address range whose start address "
+ "is not in any executable sections ("
+ << *Gsym.GetValidTextRanges()
+ << ") and will not be processed:\n";
+ Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE());
+ }
}
}
break;
@@ -406,14 +497,33 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
FunctionInfo FI;
FI.Range = {Range.LowPC, Range.HighPC};
FI.Name = *NameIndex;
- if (CUI.LineTable) {
+ if (CUI.LineTable)
convertFunctionLineTable(OS, CUI, Die, Gsym, FI);
- }
+
if (hasInlineInfo(Die, 0)) {
FI.Inline = InlineInfo();
FI.Inline->Name = *NameIndex;
FI.Inline->Ranges.insert(FI.Range);
- parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline);
+ bool WarnIfEmpty = true;
+ parseInlineInfo(Gsym, OS, CUI, Die, 0, FI, *FI.Inline,
+ AllSubprogramRanges, WarnIfEmpty);
+ // Make sure we at least got some valid inline info other than just
+ // the top level function. If we didn't then remove the inline info
+ // from the function info. We have seen cases where LTO tries to modify
+ // the DWARF for functions and it messes up the address ranges for
+ // the inline functions so it is no longer valid.
+ //
+ // By checking if there are any valid children on the top level inline
+ // information object, we will know if we got anything valid from the
+ // debug info.
+ if (FI.Inline->Children.empty()) {
+ if (WarnIfEmpty && OS && !Gsym.isQuiet()) {
+ *OS << "warning: DIE contains inline function information that has "
+ "no valid ranges, removing inline information:\n";
+ Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE());
+ }
+ FI.Inline = std::nullopt;
+ }
}
Gsym.addFunctionInfo(std::move(FI));
}
@@ -425,18 +535,18 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
handleDie(OS, CUI, ChildDie);
}
-Error DwarfTransformer::convert(uint32_t NumThreads) {
+Error DwarfTransformer::convert(uint32_t NumThreads, raw_ostream *OS) {
size_t NumBefore = Gsym.getNumFunctionInfos();
auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
- if (std::optional<uint64_t> DWOId = DwarfUnit.getDWOId()) {
+ if (DwarfUnit.getDWOId()) {
DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
- if (!DWOCU->isDWOUnit()) {
+ if (OS && !DWOCU->isDWOUnit()) {
std::string DWOName = dwarf::toString(
DwarfUnit.getUnitDIE().find(
{dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
"");
- Log << "warning: Unable to retrieve DWO .debug_info section for "
+ *OS << "warning: Unable to retrieve DWO .debug_info section for "
<< DWOName << "\n";
} else {
ReturnDie = DWOCU->getUnitDIE(false);
@@ -450,7 +560,7 @@ Error DwarfTransformer::convert(uint32_t NumThreads) {
for (const auto &CU : DICtx.compile_units()) {
DWARFDie Die = getDie(*CU);
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
- handleDie(Log, CUI, Die);
+ handleDie(OS, CUI, Die);
}
} else {
// LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
@@ -476,15 +586,15 @@ Error DwarfTransformer::convert(uint32_t NumThreads) {
DWARFDie Die = getDie(*CU);
if (Die) {
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
- pool.async([this, CUI, &LogMutex, Die]() mutable {
+ pool.async([this, CUI, &LogMutex, OS, Die]() mutable {
std::string ThreadLogStorage;
raw_string_ostream ThreadOS(ThreadLogStorage);
- handleDie(ThreadOS, CUI, Die);
+ handleDie(OS ? &ThreadOS: nullptr, CUI, Die);
ThreadOS.flush();
- if (!ThreadLogStorage.empty()) {
+ if (OS && !ThreadLogStorage.empty()) {
// Print ThreadLogStorage lines into an actual stream under a lock
std::lock_guard<std::mutex> guard(LogMutex);
- Log << ThreadLogStorage;
+ *OS << ThreadLogStorage;
}
});
}
@@ -492,11 +602,12 @@ Error DwarfTransformer::convert(uint32_t NumThreads) {
pool.wait();
}
size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
- Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
+ if (OS)
+ *OS << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
return Error::success();
}
-llvm::Error DwarfTransformer::verify(StringRef GsymPath) {
+llvm::Error DwarfTransformer::verify(StringRef GsymPath, raw_ostream &Log) {
Log << "Verifying GSYM file \"" << GsymPath << "\":\n";
auto Gsym = GsymReader::openFile(GsymPath);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 145a43d3b381..07303d551af5 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -101,7 +101,7 @@ uint64_t FunctionInfo::cacheEncoding() {
if (!isValid())
return 0;
raw_svector_ostream OutStrm(EncodingCache);
- FileWriter FW(OutStrm, support::endian::system_endianness());
+ FileWriter FW(OutStrm, llvm::endianness::native);
llvm::Expected<uint64_t> Result = encode(FW);
if (!Result) {
EncodingCache.clear();
@@ -123,7 +123,7 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const {
// precompute exactly how big FunctionInfo objects encode into so we can
// accurately make segments of a specific size.
if (!EncodingCache.empty() &&
- support::endian::system_endianness() == Out.getByteOrder()) {
+ llvm::endianness::native == Out.getByteOrder()) {
// We already encoded this object, just write out the bytes.
Out.writeData(llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(),
EncodingCache.size()));
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 60b6dbc6a12d..ee7b0efba5ea 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -61,9 +61,7 @@ uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
return insertFileEntry(DstFE);
}
-
-llvm::Error GsymCreator::save(StringRef Path,
- llvm::support::endianness ByteOrder,
+llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
std::optional<uint64_t> SegmentSize) const {
if (SegmentSize)
return saveSegments(Path, ByteOrder, *SegmentSize);
@@ -187,35 +185,12 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
return ErrorSuccess();
}
-// Similar to std::remove_if, but the predicate is binary and it is passed both
-// the previous and the current element.
-template <class ForwardIt, class BinaryPredicate>
-static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
- BinaryPredicate Pred) {
- if (FirstIt != LastIt) {
- auto PrevIt = FirstIt++;
- FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
- return Pred(*PrevIt++, Curr);
- });
- if (FirstIt != LastIt)
- for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
- if (!Pred(*PrevIt, *CurrIt)) {
- PrevIt = FirstIt;
- *FirstIt++ = std::move(*CurrIt);
- }
- }
- return FirstIt;
-}
-
llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
std::lock_guard<std::mutex> Guard(Mutex);
if (Finalized)
return createStringError(std::errc::invalid_argument, "already finalized");
Finalized = true;
- // Sort function infos so we can emit sorted functions.
- llvm::sort(Funcs);
-
// Don't let the string table indexes change by finalizing in order.
StrTab.finalizeInOrder();
@@ -239,83 +214,85 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
// Note that in case of (b), we cannot include Y in the result because then
// we wouldn't find any function for range (end of Y, end of X)
// with binary search
- auto NumBefore = Funcs.size();
- Funcs.erase(
- removeIfBinary(Funcs.begin(), Funcs.end(),
- [&](const auto &Prev, const auto &Curr) {
- // Empty ranges won't intersect, but we still need to
- // catch the case where we have multiple symbols at the
- // same address and coalesce them.
- const bool ranges_equal = Prev.Range == Curr.Range;
- if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
- // Overlapping ranges or empty identical ranges.
- if (ranges_equal) {
- // Same address range. Check if one is from debug
- // info and the other is from a symbol table. If
- // so, then keep the one with debug info. Our
- // sorting guarantees that entries with matching
- // address ranges that have debug info are last in
- // the sort.
- if (Prev == Curr) {
- // FunctionInfo entries match exactly (range,
- // lines, inlines)
-
- // We used to output a warning here, but this was
- // so frequent on some binaries, in particular
- // when those were built with GCC, that it slowed
- // down processing extremely.
- return true;
- } else {
- if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
- // Same address range, one with no debug info
- // (symbol) and the next with debug info. Keep
- // the latter.
- return true;
- } else {
- if (!Quiet) {
- OS << "warning: same address range contains "
- "different debug "
- << "info. Removing:\n"
- << Prev << "\nIn favor of this one:\n"
- << Curr << "\n";
- }
- return true;
- }
- }
- } else {
- if (!Quiet) { // print warnings about overlaps
- OS << "warning: function ranges overlap:\n"
- << Prev << "\n"
- << Curr << "\n";
- }
- }
- } else if (Prev.Range.size() == 0 &&
- Curr.Range.contains(Prev.Range.start())) {
- if (!Quiet) {
- OS << "warning: removing symbol:\n"
- << Prev << "\nKeeping:\n"
- << Curr << "\n";
- }
- return true;
- }
-
- return false;
- }),
- Funcs.end());
-
- // If our last function info entry doesn't have a size and if we have valid
- // text ranges, we should set the size of the last entry since any search for
- // a high address might match our last entry. By fixing up this size, we can
- // help ensure we don't cause lookups to always return the last symbol that
- // has no size when doing lookups.
- if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
- if (auto Range =
- ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
- Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
+
+ const auto NumBefore = Funcs.size();
+ // Only sort and unique if this isn't a segment. If this is a segment we
+ // already finalized the main GsymCreator with all of the function infos
+ // and then the already sorted and uniqued function infos were added to this
+ // object.
+ if (!IsSegment) {
+ if (NumBefore > 1) {
+ // Sort function infos so we can emit sorted functions.
+ llvm::sort(Funcs);
+ std::vector<FunctionInfo> FinalizedFuncs;
+ FinalizedFuncs.reserve(Funcs.size());
+ FinalizedFuncs.emplace_back(std::move(Funcs.front()));
+ for (size_t Idx=1; Idx < NumBefore; ++Idx) {
+ FunctionInfo &Prev = FinalizedFuncs.back();
+ FunctionInfo &Curr = Funcs[Idx];
+ // Empty ranges won't intersect, but we still need to
+ // catch the case where we have multiple symbols at the
+ // same address and coalesce them.
+ const bool ranges_equal = Prev.Range == Curr.Range;
+ if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
+ // Overlapping ranges or empty identical ranges.
+ if (ranges_equal) {
+ // Same address range. Check if one is from debug
+ // info and the other is from a symbol table. If
+ // so, then keep the one with debug info. Our
+ // sorting guarantees that entries with matching
+ // address ranges that have debug info are last in
+ // the sort.
+ if (!(Prev == Curr)) {
+ if (Prev.hasRichInfo() && Curr.hasRichInfo()) {
+ if (!Quiet) {
+ OS << "warning: same address range contains "
+ "different debug "
+ << "info. Removing:\n"
+ << Prev << "\nIn favor of this one:\n"
+ << Curr << "\n";
+ }
+ }
+ // We want to swap the current entry with the previous since
+ // later entries with the same range always have more debug info
+ // or different debug info.
+ std::swap(Prev, Curr);
+ }
+ } else {
+ if (!Quiet) { // print warnings about overlaps
+ OS << "warning: function ranges overlap:\n"
+ << Prev << "\n"
+ << Curr << "\n";
+ }
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ } else {
+ if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
+ // Symbols on macOS don't have address ranges, so if the range
+ // doesn't match and the size is zero, then we replace the empty
+ // symbol function info with the current one.
+ std::swap(Prev, Curr);
+ } else {
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ }
+ }
+ std::swap(Funcs, FinalizedFuncs);
+ }
+ // If our last function info entry doesn't have a size and if we have valid
+ // text ranges, we should set the size of the last entry since any search for
+ // a high address might match our last entry. By fixing up this size, we can
+ // help ensure we don't cause lookups to always return the last symbol that
+ // has no size when doing lookups.
+ if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
+ if (auto Range =
+ ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
+ Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
+ }
}
+ OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
+ << Funcs.size() << " total\n";
}
- OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
- << Funcs.size() << " total\n";
return Error::success();
}
@@ -355,7 +332,6 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
std::lock_guard<std::mutex> Guard(Mutex);
- Ranges.insert(FI.Range);
Funcs.emplace_back(std::move(FI));
}
@@ -388,31 +364,24 @@ bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
return true; // No valid text ranges has been set, so accept all ranges.
}
-bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
- std::lock_guard<std::mutex> Guard(Mutex);
- return Ranges.contains(Addr);
-}
-
std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
- if (Finalized && !Funcs.empty())
+ // If we have finalized then Funcs are sorted. If we are a segment then
+ // Funcs will be sorted as well since function infos get added from an
+ // already finalized GsymCreator object where its functions were sorted and
+ // uniqued.
+ if ((Finalized || IsSegment) && !Funcs.empty())
return std::optional<uint64_t>(Funcs.front().startAddress());
- // This code gets used by the segmentation of GSYM files to help determine the
- // size of the GSYM header while continually adding new FunctionInfo objects
- // to this object, so we haven't finalized this object yet.
- if (Ranges.empty())
- return std::nullopt;
- return std::optional<uint64_t>(Ranges.begin()->start());
+ return std::nullopt;
}
std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
- if (Finalized && !Funcs.empty())
+ // If we have finalized then Funcs are sorted. If we are a segment then
+ // Funcs will be sorted as well since function infos get added from an
+ // already finalized GsymCreator object where its functions were sorted and
+ // uniqued.
+ if ((Finalized || IsSegment) && !Funcs.empty())
return std::optional<uint64_t>(Funcs.back().startAddress());
- // This code gets used by the segmentation of GSYM files to help determine the
- // size of the GSYM header while continually adding new FunctionInfo objects
- // to this object, so we haven't finalized this object yet.
- if (Ranges.empty())
- return std::nullopt;
- return std::optional<uint64_t>((Ranges.end() - 1)->end());
+ return std::nullopt;
}
std::optional<uint64_t> GsymCreator::getBaseAddress() const {
@@ -477,7 +446,6 @@ uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx)
// this GsymCreator and then copy the function info and update the string
// table offsets to match the new offsets.
const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
- Ranges.insert(SrcFI.Range);
FunctionInfo DstFI;
DstFI.Range = SrcFI.Range;
@@ -503,12 +471,12 @@ uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx)
fixupInlineInfo(SrcGC, *DstFI.Inline);
}
std::lock_guard<std::mutex> Guard(Mutex);
- Funcs.push_back(DstFI);
+ Funcs.emplace_back(DstFI);
return Funcs.back().cacheEncoding();
}
llvm::Error GsymCreator::saveSegments(StringRef Path,
- llvm::support::endianness ByteOrder,
+ llvm::endianness ByteOrder,
uint64_t SegmentSize) const {
if (SegmentSize == 0)
return createStringError(std::errc::invalid_argument,
@@ -551,6 +519,10 @@ GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
return std::unique_ptr<GsymCreator>();
std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
+
+ // Tell the creator that this is a segment.
+ GC->setIsSegment();
+
// Set the base address if there is one.
if (BaseAddress)
GC->setBaseAddress(*BaseAddress);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 6afaeea8f598..4b1b35246617 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -23,11 +23,10 @@
using namespace llvm;
using namespace gsym;
-GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer) :
- MemBuffer(std::move(Buffer)),
- Endian(support::endian::system_endianness()) {}
+GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
+ : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
- GsymReader::GsymReader(GsymReader &&RHS) = default;
+GsymReader::GsymReader(GsymReader &&RHS) = default;
GsymReader::~GsymReader() = default;
@@ -60,8 +59,7 @@ GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
llvm::Error
GsymReader::parse() {
- BinaryStreamReader FileData(MemBuffer->getBuffer(),
- support::endian::system_endianness());
+ BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
// Check for the magic bytes. This file format is designed to be mmap'ed
// into a process and accessed as read only. This is done for performance
// and efficiency for symbolicating and parsing GSYM data.
@@ -69,14 +67,15 @@ GsymReader::parse() {
return createStringError(std::errc::invalid_argument,
"not enough data for a GSYM header");
- const auto HostByteOrder = support::endian::system_endianness();
+ const auto HostByteOrder = llvm::endianness::native;
switch (Hdr->Magic) {
case GSYM_MAGIC:
Endian = HostByteOrder;
break;
case GSYM_CIGAM:
// This is a GSYM file, but not native endianness.
- Endian = sys::IsBigEndianHost ? support::little : support::big;
+ Endian = sys::IsBigEndianHost ? llvm::endianness::little
+ : llvm::endianness::big;
Swap.reset(new SwappedData);
break;
default:
@@ -84,7 +83,7 @@ GsymReader::parse() {
"not a GSYM file");
}
- bool DataIsLittleEndian = HostByteOrder != support::little;
+ bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
// Read a correctly byte swapped header if we need to.
if (Swap) {
DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
@@ -254,41 +253,94 @@ GsymReader::getAddressIndex(const uint64_t Addr) const {
}
-llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
- Expected<uint64_t> AddressIndex = getAddressIndex(Addr);
- if (!AddressIndex)
- return AddressIndex.takeError();
- // Address info offsets size should have been checked in parse().
- assert(*AddressIndex < AddrInfoOffsets.size());
- auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
- DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
- if (std::optional<uint64_t> OptAddr = getAddress(*AddressIndex)) {
- auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr);
- if (ExpectedFI) {
- if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0)
- return ExpectedFI;
- return createStringError(std::errc::invalid_argument,
- "address 0x%" PRIx64 " is not in GSYM", Addr);
+llvm::Expected<DataExtractor>
+GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
+ uint64_t &FuncStartAddr) const {
+ Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
+ if (!ExpectedAddrIdx)
+ return ExpectedAddrIdx.takeError();
+ const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
+ // The AddrIdx is the first index of the function info entries that match
+ // \a Addr. We need to iterate over all function info objects that start with
+ // the same address until we find a range that contains \a Addr.
+ std::optional<uint64_t> FirstFuncStartAddr;
+ const size_t NumAddresses = getNumAddresses();
+ for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
+ auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
+ // If there was an error, return the error.
+ if (!ExpextedData)
+ return ExpextedData;
+
+ // Remember the first function start address if it hasn't already been set.
+ // If it is already valid, check to see if it matches the first function
+ // start address and only continue if it matches.
+ if (FirstFuncStartAddr.has_value()) {
+ if (*FirstFuncStartAddr != FuncStartAddr)
+ break; // Done with consecutive function entries with same address.
+ } else {
+ FirstFuncStartAddr = FuncStartAddr;
}
+ // Make sure the current function address ranges contains \a Addr.
+ // Some symbols on Darwin don't have valid sizes, so if we run into a
+ // symbol with zero size, then we have found a match for our address.
+
+ // The first thing the encoding of a FunctionInfo object is the function
+ // size.
+ uint64_t Offset = 0;
+ uint32_t FuncSize = ExpextedData->getU32(&Offset);
+ if (FuncSize == 0 ||
+ AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
+ return ExpextedData;
}
return createStringError(std::errc::invalid_argument,
- "failed to extract address[%" PRIu64 "]",
- *AddressIndex);
+ "address 0x%" PRIx64 " is not in GSYM", Addr);
+}
+
+llvm::Expected<DataExtractor>
+GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
+ uint64_t &FuncStartAddr) const {
+ if (AddrIdx >= getNumAddresses())
+ return createStringError(std::errc::invalid_argument,
+ "invalid address index %" PRIu64, AddrIdx);
+ const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
+ assert((Endian == endianness::big || Endian == endianness::little) &&
+ "Endian must be either big or little");
+ StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);
+ if (Bytes.empty())
+ return createStringError(std::errc::invalid_argument,
+ "invalid address info offset 0x%" PRIx32,
+ AddrInfoOffset);
+ std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);
+ if (!OptFuncStartAddr)
+ return createStringError(std::errc::invalid_argument,
+ "failed to extract address[%" PRIu64 "]", AddrIdx);
+ FuncStartAddr = *OptFuncStartAddr;
+ return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
+}
+
+llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
+ return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<FunctionInfo>
+GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
+ return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
+ else
+ return ExpectedData.takeError();
}
llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
- Expected<uint64_t> AddressIndex = getAddressIndex(Addr);
- if (!AddressIndex)
- return AddressIndex.takeError();
- // Address info offsets size should have been checked in parse().
- assert(*AddressIndex < AddrInfoOffsets.size());
- auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
- DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
- if (std::optional<uint64_t> OptAddr = getAddress(*AddressIndex))
- return FunctionInfo::lookup(Data, *this, *OptAddr, Addr);
- return createStringError(std::errc::invalid_argument,
- "failed to extract address[%" PRIu64 "]",
- *AddressIndex);
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
+ return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr);
+ else
+ return ExpectedData.takeError();
}
void GsymReader::dump(raw_ostream &OS) {
@@ -339,7 +391,7 @@ void GsymReader::dump(raw_ostream &OS) {
for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
- if (auto FI = getFunctionInfo(*getAddress(I)))
+ if (auto FI = getFunctionInfoAtIndex(I))
dump(OS, *FI);
else
logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
index f775ab8fb65c..ecfb21501eda 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -264,3 +264,14 @@ llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const {
}
return Error::success();
}
+
+static uint64_t GetTotalNumChildren(const InlineInfo &II) {
+ uint64_t NumChildren = II.Children.size();
+ for (const auto &Child : II.Children)
+ NumChildren += GetTotalNumChildren(Child);
+ return NumChildren;
+}
+
+bool InlineInfo::operator<(const InlineInfo &RHS) const {
+ return GetTotalNumChildren(*this) < GetTotalNumChildren(RHS);
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp
index a49a3ba9bf2a..666d9f15f1b4 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp
@@ -270,11 +270,6 @@ Expected<LineEntry> LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, ui
if (Addr < Row.Addr)
return false; // Stop parsing, result contains the line table row!
Result = Row;
- if (Addr == Row.Addr) {
- // Stop parsing, this is the row we are looking for since the address
- // matches.
- return false;
- }
return true; // Keep parsing till we find the right row.
});
if (Err)
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp
index ad35aefe7774..a60b2d386076 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp
@@ -68,7 +68,7 @@ static std::vector<uint8_t> getUUID(const object::ObjectFile &Obj) {
}
llvm::Error ObjectFileTransformer::convert(const object::ObjectFile &Obj,
- raw_ostream &Log,
+ raw_ostream *Log,
GsymCreator &Gsym) {
using namespace llvm::object;
@@ -92,15 +92,18 @@ llvm::Error ObjectFileTransformer::convert(const object::ObjectFile &Obj,
return AddrOrErr.takeError();
if (SymType.get() != SymbolRef::Type::ST_Function ||
- !Gsym.IsValidTextAddress(*AddrOrErr) ||
- Gsym.hasFunctionInfoForAddress(*AddrOrErr))
+ !Gsym.IsValidTextAddress(*AddrOrErr))
continue;
// Function size for MachO files will be 0
constexpr bool NoCopy = false;
const uint64_t size = IsELF ? ELFSymbolRef(Sym).getSize() : 0;
Expected<StringRef> Name = Sym.getName();
if (!Name) {
- logAllUnhandledErrors(Name.takeError(), Log, "ObjectFileTransformer: ");
+ if (Log)
+ logAllUnhandledErrors(Name.takeError(), *Log,
+ "ObjectFileTransformer: ");
+ else
+ consumeError(Name.takeError());
continue;
}
// Remove the leading '_' character in any symbol names if there is one
@@ -111,6 +114,8 @@ llvm::Error ObjectFileTransformer::convert(const object::ObjectFile &Obj,
FunctionInfo(*AddrOrErr, size, Gsym.insertString(*Name, NoCopy)));
}
size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
- Log << "Loaded " << FunctionsAddedCount << " functions from symbol table.\n";
+ if (Log)
+ *Log << "Loaded " << FunctionsAddedCount
+ << " functions from symbol table.\n";
return Error::success();
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVCompare.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVCompare.cpp
index 65baf52ffb44..3ed0de14f93f 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVCompare.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVCompare.cpp
@@ -206,11 +206,10 @@ Error LVCompare::execute(LVReader *ReferenceReader, LVReader *TargetReader) {
updateExpected(Reference);
Reference->setIsInCompare();
LVElement *CurrentTarget = nullptr;
- if (std::any_of(Targets.begin(), Targets.end(),
- [&](auto Target) -> bool {
- CurrentTarget = Target;
- return Reference->equals(Target);
- })) {
+ if (llvm::any_of(Targets, [&](auto Target) -> bool {
+ CurrentTarget = Target;
+ return Reference->equals(Target);
+ })) {
if (Pass == LVComparePass::Missing && Reference->getIsScope()) {
// If the elements being compared are scopes and are a match,
// they are recorded, to be used when creating the augmented
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
index cfe304eead51..30ce937cda44 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
@@ -252,8 +252,7 @@ void LVElement::generateName(std::string &Prefix) const {
Prefix.append(isLined() ? lineNumberAsString(/*ShowZero=*/true) : "?");
// Remove any whitespaces.
- Prefix.erase(std::remove_if(Prefix.begin(), Prefix.end(), ::isspace),
- Prefix.end());
+ llvm::erase_if(Prefix, ::isspace);
}
// Generate a name for unnamed elements.
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
index 2f26025d01ec..8bbaf93db0ca 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
@@ -299,20 +299,12 @@ void LVScope::addMissingElements(LVScope *Reference) {
LVSymbols References;
References.append(ReferenceSymbols->begin(), ReferenceSymbols->end());
- auto RemoveSymbol = [&](LVSymbols &Symbols, LVSymbol *Symbol) {
- LVSymbols::iterator Iter = std::remove_if(
- Symbols.begin(), Symbols.end(),
- [Symbol](LVSymbol *Item) -> bool { return Item == Symbol; });
- if (Iter != Symbols.end())
- Symbols.erase(Iter, Symbols.end());
- };
-
// Erase abstract symbols already in this scope from the collection of
// symbols in the referenced scope.
if (getSymbols())
for (const LVSymbol *Symbol : *getSymbols())
if (Symbol->getHasReferenceAbstract())
- RemoveSymbol(References, Symbol->getReference());
+ llvm::erase(References, Symbol->getReference());
// If we have elements left in 'References', those are the elements that
// need to be inserted in the current scope.
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp
index d72fe2683f92..1f6724988ae9 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp
@@ -221,7 +221,7 @@ bool LVCodeViewReader::isSystemEntry(LVElement *Element, StringRef Name) const {
return StringRef::npos != Name.find(String);
};
auto Starts = [=](const char *Pattern) -> bool {
- return Name.startswith(Pattern);
+ return Name.starts_with(Pattern);
};
auto CheckExclude = [&]() -> bool {
if (Starts("__") || Starts("_PMD") || Starts("_PMFN"))
@@ -276,7 +276,7 @@ Error LVCodeViewReader::collectInlineeInfo(
}
Error LVCodeViewReader::traverseInlineeLines(StringRef Subsection) {
- BinaryStreamReader SR(Subsection, llvm::support::little);
+ BinaryStreamReader SR(Subsection, llvm::endianness::little);
DebugInlineeLinesSubsectionRef Lines;
if (Error E = Lines.initialize(SR))
return createStringError(errorToErrorCode(std::move(E)), getFileName());
@@ -349,7 +349,7 @@ Error LVCodeViewReader::initializeFileAndStringTables(
if (Error E = Reader.readFixedString(Contents, SubSectionSize))
return createStringError(errorToErrorCode(std::move(E)), getFileName());
- BinaryStreamRef ST(Contents, support::little);
+ BinaryStreamRef ST(Contents, llvm::endianness::little);
switch (DebugSubsectionKind(SubType)) {
case DebugSubsectionKind::FileChecksums:
if (Error E = CVFileChecksumTable.initialize(ST))
@@ -478,8 +478,8 @@ Error LVCodeViewReader::loadPrecompiledObject(PrecompRecord &Precomp,
if (Magic != COFF::DEBUG_SECTION_MAGIC)
return errorCodeToError(object_error::parse_failed);
- ReaderPrecomp =
- std::make_unique<BinaryStreamReader>(*DataOrErr, support::little);
+ ReaderPrecomp = std::make_unique<BinaryStreamReader>(
+ *DataOrErr, llvm::endianness::little);
cantFail(
ReaderPrecomp->readArray(CVTypesPrecomp, ReaderPrecomp->getLength()));
@@ -514,7 +514,7 @@ Error LVCodeViewReader::loadPrecompiledObject(PrecompRecord &Precomp,
[&](TypeIndex TI, const CVType &Type) { TypeArray.push_back(Type); });
ItemStream =
- std::make_unique<BinaryItemStream<CVType>>(llvm::support::little);
+ std::make_unique<BinaryItemStream<CVType>>(llvm::endianness::little);
ItemStream->setItems(TypeArray);
TypeStream.setUnderlyingStream(*ItemStream);
@@ -550,7 +550,7 @@ Error LVCodeViewReader::traverseTypeSection(StringRef SectionName,
// Get the first type record. It will indicate if this object uses a type
// server (/Zi) or a PCH file (/Yu).
CVTypeArray CVTypes;
- BinaryStreamReader Reader(*DataOrErr, support::little);
+ BinaryStreamReader Reader(*DataOrErr, llvm::endianness::little);
cantFail(Reader.readArray(CVTypes, Reader.getLength()));
CVTypeArray::Iterator FirstType = CVTypes.begin();
@@ -621,7 +621,7 @@ Error LVCodeViewReader::traverseSymbolsSubsection(StringRef Subsection,
LVSymbolVisitorDelegate VisitorDelegate(this, Section, &getObj(),
SectionContents);
CVSymbolArray Symbols;
- BinaryStreamReader Reader(BinaryData, llvm::support::little);
+ BinaryStreamReader Reader(BinaryData, llvm::endianness::little);
if (Error E = Reader.readArray(Symbols, Reader.getLength()))
return createStringError(errorToErrorCode(std::move(E)), getFileName());
@@ -664,7 +664,7 @@ Error LVCodeViewReader::traverseSymbolSection(StringRef SectionName,
if (Magic != COFF::DEBUG_SECTION_MAGIC)
return createStringError(object_error::parse_failed, getFileName());
- BinaryStreamReader FSReader(Data, support::little);
+ BinaryStreamReader FSReader(Data, llvm::endianness::little);
if (Error Err = initializeFileAndStringTables(FSReader))
return Err;
@@ -752,7 +752,8 @@ Error LVCodeViewReader::traverseSymbolSection(StringRef SectionName,
W.printString("Symbol Name", SymbolName);
});
- BinaryStreamReader Reader(FunctionLineTables[SymbolName], support::little);
+ BinaryStreamReader Reader(FunctionLineTables[SymbolName],
+ llvm::endianness::little);
DebugLinesSubsectionRef Lines;
if (Error E = Lines.initialize(Reader))
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
index e4f5f533262b..1d0178532882 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
@@ -465,13 +465,10 @@ LVScope *LVNamespaceDeduction::get(LVStringRefs Components) {
LVScope *LVNamespaceDeduction::get(StringRef ScopedName, bool CheckScope) {
LVStringRefs Components = getAllLexicalComponents(ScopedName);
if (CheckScope)
- Components.erase(std::remove_if(Components.begin(), Components.end(),
- [&](StringRef Component) {
- LookupSet::iterator Iter =
- IdentifiedNamespaces.find(Component);
- return Iter == IdentifiedNamespaces.end();
- }),
- Components.end());
+ llvm::erase_if(Components, [&](StringRef Component) {
+ LookupSet::iterator Iter = IdentifiedNamespaces.find(Component);
+ return Iter == IdentifiedNamespaces.end();
+ });
LLVM_DEBUG(
{ dbgs() << formatv("ScopedName: '{0}'\n", ScopedName.str().c_str()); });
@@ -1688,6 +1685,48 @@ Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
return Error::success();
}
+// S_ARMSWITCHTABLE
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &CVR,
+ JumpTableSym &JumpTable) {
+ LLVM_DEBUG({
+ W.printHex("BaseOffset", JumpTable.BaseOffset);
+ W.printNumber("BaseSegment", JumpTable.BaseSegment);
+ W.printFlags("SwitchType", static_cast<uint16_t>(JumpTable.SwitchType),
+ getJumpTableEntrySizeNames());
+ W.printHex("BranchOffset", JumpTable.BranchOffset);
+ W.printHex("TableOffset", JumpTable.TableOffset);
+ W.printNumber("BranchSegment", JumpTable.BranchSegment);
+ W.printNumber("TableSegment", JumpTable.TableSegment);
+ W.printNumber("EntriesCount", JumpTable.EntriesCount);
+ });
+ return Error::success();
+}
+
+// S_CALLERS, S_CALLEES, S_INLINEES
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, CallerSym &Caller) {
+ LLVM_DEBUG({
+ llvm::StringRef FieldName;
+ switch (Caller.getKind()) {
+ case SymbolRecordKind::CallerSym:
+ FieldName = "Callee";
+ break;
+ case SymbolRecordKind::CalleeSym:
+ FieldName = "Caller";
+ break;
+ case SymbolRecordKind::InlineesSym:
+ FieldName = "Inlinee";
+ break;
+ default:
+ return llvm::make_error<CodeViewError>(
+ "Unknown CV Record type for a CallerSym object!");
+ }
+ for (auto FuncID : Caller.Indices) {
+ printTypeIndex(FieldName, FuncID);
+ }
+ });
+ return Error::success();
+}
+
#undef DEBUG_TYPE
#define DEBUG_TYPE "CodeViewLogicalVisitor"
@@ -2897,7 +2936,7 @@ Error LVLogicalVisitor::finishVisitation(CVType &Record, TypeIndex TI,
// Customized version of 'FieldListVisitHelper'.
Error LVLogicalVisitor::visitFieldListMemberStream(
TypeIndex TI, LVElement *Element, ArrayRef<uint8_t> FieldList) {
- BinaryByteStream Stream(FieldList, llvm::support::little);
+ BinaryByteStream Stream(FieldList, llvm::endianness::little);
BinaryStreamReader Reader(Stream);
FieldListDeserializer Deserializer(Reader);
TypeVisitorCallbackPipeline Pipeline;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp
index ab458341a0bd..4469092099da 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp
@@ -1058,7 +1058,7 @@ void LVELFReader::processLocationMember(dwarf::Attribute Attr,
CurrentSymbol->addLocationConstant(Attr, *FormValue.getAsUnsignedConstant(),
OffsetOnEntry);
else
- // This is a a location description, or a reference to one.
+ // This is a location description, or a reference to one.
processLocationList(Attr, FormValue, Die, OffsetOnEntry);
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
index c26caa647ed9..ed2d14dd79e4 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/TimeProfiler.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -248,6 +249,8 @@ uint32_t MSFBuilder::computeDirectoryByteSize() const {
}
Expected<MSFLayout> MSFBuilder::generateLayout() {
+ llvm::TimeTraceScope timeScope("MSF: Generate layout");
+
SuperBlock *SB = Allocator.Allocate<SuperBlock>();
MSFLayout L;
L.SB = SB;
@@ -336,6 +339,8 @@ static void commitFpm(WritableBinaryStream &MsfBuffer, const MSFLayout &Layout,
Expected<FileBufferByteStream> MSFBuilder::commit(StringRef Path,
MSFLayout &Layout) {
+ llvm::TimeTraceScope timeScope("Commit MSF");
+
Expected<MSFLayout> L = generateLayout();
if (!L)
return L.takeError();
@@ -381,7 +386,7 @@ Expected<FileBufferByteStream> MSFBuilder::commit(StringRef Path,
return std::move(EC);
FileBufferByteStream Buffer(std::move(*OutFileOrError),
- llvm::support::little);
+ llvm::endianness::little);
BinaryStreamWriter Writer(Buffer);
if (auto EC = Writer.writeObject(*Layout.SB))
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
index 94935d63452e..5ebb76994b31 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -10,7 +10,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
index 9755f2ca3bdc..f27a645c7704 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
@@ -9,7 +9,6 @@
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
#include <cstdint>
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 37c1b0407268..ad3d09ae50e9 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -18,6 +18,7 @@
#include "llvm/Object/COFF.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Parallel.h"
+#include "llvm/Support/TimeProfiler.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -187,7 +188,7 @@ Error DbiStreamBuilder::generateFileInfoSubstream() {
uint32_t NamesOffset = calculateNamesOffset();
FileInfoBuffer = MutableBinaryByteStream(MutableArrayRef<uint8_t>(Data, Size),
- llvm::support::little);
+ llvm::endianness::little);
WritableBinaryStreamRef MetadataBuffer =
WritableBinaryStreamRef(FileInfoBuffer).keep_front(NamesOffset);
@@ -381,6 +382,7 @@ void DbiStreamBuilder::createSectionMap(
Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
WritableBinaryStreamRef MsfBuffer) {
+ llvm::TimeTraceScope timeScope("Commit DBI stream");
if (auto EC = finalize())
return EC;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp
index 9c05d585831a..c5999bffc021 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
@@ -119,9 +120,7 @@ std::string llvm::pdb::formatTypeLeafKind(TypeLeafKind K) {
return #EnumName;
#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
default:
- return formatv("UNKNOWN RECORD ({0:X})",
- static_cast<std::underlying_type_t<TypeLeafKind>>(K))
- .str();
+ return formatv("UNKNOWN RECORD ({0:X})", llvm::to_underlying(K)).str();
}
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index b17fbd63e9fd..c195754c0c67 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/BinaryItemStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Parallel.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/xxhash.h"
#include <algorithm>
#include <vector>
@@ -393,7 +394,7 @@ static Error writePublics(BinaryStreamWriter &Writer,
static Error writeRecords(BinaryStreamWriter &Writer,
ArrayRef<CVSymbol> Records) {
- BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
+ BinaryItemStream<CVSymbol> ItemStream(llvm::endianness::little);
ItemStream.setItems(Records);
BinaryStreamRef RecordsRef(ItemStream);
return Writer.writeStreamRef(RecordsRef);
@@ -478,6 +479,7 @@ Error GSIStreamBuilder::commitGlobalsHashStream(
Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
WritableBinaryStreamRef Buffer) {
+ llvm::TimeTraceScope timeScope("Commit GSI stream");
auto GS = WritableMappedBlockStream::createIndexedStream(
Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
auto PS = WritableMappedBlockStream::createIndexedStream(
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index e8f5a451b08e..95107125701d 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -14,6 +14,7 @@
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/TimeProfiler.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -55,6 +56,7 @@ Error InfoStreamBuilder::finalizeMsfLayout() {
Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout,
WritableBinaryStreamRef Buffer) const {
+ llvm::TimeTraceScope timeScope("Commit info stream");
auto InfoS = WritableMappedBlockStream::createIndexedStream(
Layout, Buffer, StreamPDB, Msf.getAllocator());
BinaryStreamWriter Writer(*InfoS);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
index f1e8adeb1b21..841068c77d48 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
@@ -14,7 +14,6 @@
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::msf;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp
index 85c22483fa90..328d0f5ab060 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp
@@ -107,7 +107,7 @@ static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
return false;
}
- Reader = BinaryStreamReader(*ContentsOrErr, support::little);
+ Reader = BinaryStreamReader(*ContentsOrErr, llvm::endianness::little);
uint32_t Magic;
if (Reader.bytesRemaining() < sizeof(uint32_t))
return false;
@@ -561,7 +561,7 @@ static bool isMyCode(const SymbolGroup &Group) {
return true;
StringRef Name = Group.name();
- if (Name.startswith("Import:"))
+ if (Name.starts_with("Import:"))
return false;
if (Name.ends_with_insensitive(".dll"))
return false;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index 500923e57fbb..bdf8e6ec1acd 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 91b428afaddb..d5cac33d1519 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -74,7 +74,7 @@ Error NativeSession::createFromPdb(std::unique_ptr<MemoryBuffer> Buffer,
std::unique_ptr<IPDBSession> &Session) {
StringRef Path = Buffer->getBufferIdentifier();
auto Stream = std::make_unique<MemoryBufferByteStream>(
- std::move(Buffer), llvm::support::little);
+ std::move(Buffer), llvm::endianness::little);
auto Allocator = std::make_unique<BumpPtrAllocator>();
auto File = std::make_unique<PDBFile>(Path, std::move(Stream), *Allocator);
@@ -104,8 +104,8 @@ loadPdbFile(StringRef PdbPath, std::unique_ptr<BumpPtrAllocator> &Allocator) {
if (EC || Magic != file_magic::pdb)
return make_error<RawError>(EC);
- auto Stream = std::make_unique<MemoryBufferByteStream>(std::move(Buffer),
- llvm::support::little);
+ auto Stream = std::make_unique<MemoryBufferByteStream>(
+ std::move(Buffer), llvm::endianness::little);
auto File = std::make_unique<PDBFile>(PdbPath, std::move(Stream), *Allocator);
if (auto EC = File->parseFileHeaders())
@@ -176,7 +176,7 @@ NativeSession::searchForPdb(const PdbSearchOptions &Opts) {
if (!PathOrErr)
return PathOrErr.takeError();
StringRef PathFromExe = PathOrErr.get();
- sys::path::Style Style = PathFromExe.startswith("/")
+ sys::path::Style Style = PathFromExe.starts_with("/")
? sys::path::Style::posix
: sys::path::Style::windows;
StringRef PdbName = sys::path::filename(PathFromExe, Style);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index cd30b56be7cd..06e379c3f6d2 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/xxhash.h"
#include <ctime>
@@ -129,6 +130,7 @@ void PDBFileBuilder::addInjectedSource(StringRef Name,
}
Error PDBFileBuilder::finalizeMsfLayout() {
+ llvm::TimeTraceScope timeScope("MSF layout");
if (Ipi && Ipi->getRecordCount() > 0) {
// In theory newer PDBs always have an ID stream, but by saying that we're
@@ -254,6 +256,7 @@ void PDBFileBuilder::commitInjectedSources(WritableBinaryStream &MsfBuffer,
if (InjectedSourceTable.empty())
return;
+ llvm::TimeTraceScope timeScope("Commit injected sources");
commitSrcHeaderBlock(MsfBuffer, Layout);
for (const auto &IS : InjectedSources) {
@@ -290,15 +293,18 @@ Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) {
if (auto EC = Strings.commit(NSWriter))
return EC;
- for (const auto &NSE : NamedStreamData) {
- if (NSE.second.empty())
- continue;
-
- auto NS = WritableMappedBlockStream::createIndexedStream(
- Layout, Buffer, NSE.first, Allocator);
- BinaryStreamWriter NSW(*NS);
- if (auto EC = NSW.writeBytes(arrayRefFromStringRef(NSE.second)))
- return EC;
+ {
+ llvm::TimeTraceScope timeScope("Named stream data");
+ for (const auto &NSE : NamedStreamData) {
+ if (NSE.second.empty())
+ continue;
+
+ auto NS = WritableMappedBlockStream::createIndexedStream(
+ Layout, Buffer, NSE.first, Allocator);
+ BinaryStreamWriter NSW(*NS);
+ if (auto EC = NSW.writeBytes(arrayRefFromStringRef(NSE.second)))
+ return EC;
+ }
}
if (Info) {
@@ -338,6 +344,8 @@ Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) {
// Set the build id at the very end, after every other byte of the PDB
// has been written.
if (Info->hashPDBContentsToGUID()) {
+ llvm::TimeTraceScope timeScope("Compute build ID");
+
// Compute a hash of all sections of the output file.
uint64_t Digest =
xxh3_64bits({Buffer.getBufferStart(), Buffer.getBufferEnd()});
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index c0245dc17cf1..91b3dd5c32b9 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -13,6 +13,7 @@
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/TimeProfiler.h"
#include <map>
@@ -207,6 +208,7 @@ Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
}
Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
+ llvm::TimeTraceScope timeScope("Commit strings table");
BinaryStreamWriter SectionWriter;
std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index c7b9f443da5e..c350e0e0b3e1 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -26,7 +26,6 @@
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <cstdint>
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp
index 5802d1c77527..5dd636f326b7 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp
@@ -9,7 +9,6 @@
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::msf;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index b71b2b158144..941ce78027a2 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -19,7 +19,7 @@ using namespace llvm::pdb;
// Corresponds to `fUDTAnon`.
static bool isAnonymous(StringRef Name) {
return Name == "<unnamed-tag>" || Name == "__unnamed" ||
- Name.endswith("::<unnamed-tag>") || Name.endswith("::__unnamed");
+ Name.ends_with("::<unnamed-tag>") || Name.ends_with("::__unnamed");
}
// Computes the hash for a user-defined type record. This could be a struct,
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index aad5847651a0..22663f009637 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/TimeProfiler.h"
#include <algorithm>
#include <cstdint>
#include <numeric>
@@ -164,13 +165,14 @@ Error TpiStreamBuilder::finalizeMsfLayout() {
reinterpret_cast<const uint8_t *>(HashBuffer.data()),
calculateHashBufferSize());
HashValueStream =
- std::make_unique<BinaryByteStream>(Bytes, llvm::support::little);
+ std::make_unique<BinaryByteStream>(Bytes, llvm::endianness::little);
}
return Error::success();
}
Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
WritableBinaryStreamRef Buffer) {
+ llvm::TimeTraceScope timeScope("Commit TPI stream");
if (auto EC = finalize())
return EC;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index f9669b554b47..716312f26e0b 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -90,7 +90,7 @@ public:
size_t PosEnd = PrunedSource->find('\n', Pos);
StringRef String = PrunedSource->substr(
Pos, (PosEnd == StringRef::npos) ? StringRef::npos : (PosEnd - Pos));
- if (String.endswith("\r"))
+ if (String.ends_with("\r"))
String = String.drop_back(1);
OS << format_decimal(L, MaxLineNumberWidth);
if (L == Line)
@@ -105,10 +105,10 @@ public:
}
};
-void PlainPrinterBase::printHeader(uint64_t Address) {
- if (Config.PrintAddress) {
+void PlainPrinterBase::printHeader(std::optional<uint64_t> Address) {
+ if (Address.has_value() && Config.PrintAddress) {
OS << "0x";
- OS.write_hex(Address);
+ OS.write_hex(*Address);
StringRef Delimiter = Config.Pretty ? ": " : "\n";
OS << Delimiter;
}
@@ -182,7 +182,7 @@ void PlainPrinterBase::print(const DILineInfo &Info, bool Inlined) {
}
void PlainPrinterBase::print(const Request &Request, const DILineInfo &Info) {
- printHeader(*Request.Address);
+ printHeader(Request.Address);
print(Info, false);
printFooter();
}
@@ -260,9 +260,15 @@ void PlainPrinterBase::print(const Request &Request,
printFooter();
}
-void PlainPrinterBase::printInvalidCommand(const Request &Request,
- StringRef Command) {
- OS << Command << '\n';
+void PlainPrinterBase::print(const Request &Request,
+ const std::vector<DILineInfo> &Locations) {
+ if (Locations.empty()) {
+ print(Request, DILineInfo());
+ } else {
+ for (const DILineInfo &L : Locations)
+ print(L, false);
+ printFooter();
+ }
}
bool PlainPrinterBase::printError(const Request &Request,
@@ -278,6 +284,8 @@ static std::string toHex(uint64_t V) {
static json::Object toJSON(const Request &Request, StringRef ErrorMsg = "") {
json::Object Json({{"ModuleName", Request.ModuleName.str()}});
+ if (!Request.Symbol.empty())
+ Json["SymName"] = Request.Symbol.str();
if (Request.Address)
Json["Address"] = toHex(*Request.Address);
if (!ErrorMsg.empty())
@@ -367,11 +375,17 @@ void JSONPrinter::print(const Request &Request,
printJSON(std::move(Json));
}
-void JSONPrinter::printInvalidCommand(const Request &Request,
- StringRef Command) {
- printError(Request,
- StringError("unable to parse arguments: " + Command,
- std::make_error_code(std::errc::invalid_argument)));
+void JSONPrinter::print(const Request &Request,
+ const std::vector<DILineInfo> &Locations) {
+ json::Array Definitions;
+ for (const DILineInfo &L : Locations)
+ Definitions.push_back(toJSON(L));
+ json::Object Json = toJSON(Request);
+ Json["Loc"] = std::move(Definitions);
+ if (ObjectList)
+ ObjectList->push_back(std::move(Json));
+ else
+ printJSON(std::move(Json));
}
bool JSONPrinter::printError(const Request &Request,
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
index a2bc2577b70a..01d49709f9b2 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -41,8 +41,8 @@ MarkupFilter::MarkupFilter(raw_ostream &OS, LLVMSymbolizer &Symbolizer,
ColorsEnabled(
ColorsEnabled.value_or(WithColor::defaultAutoDetectFunction()(OS))) {}
-void MarkupFilter::filter(StringRef Line) {
- this->Line = Line;
+void MarkupFilter::filter(std::string &&InputLine) {
+ Line = std::move(InputLine);
resetColor();
Parser.parseLine(Line);
@@ -552,7 +552,7 @@ std::optional<uint64_t> MarkupFilter::parseAddr(StringRef Str) const {
}
if (all_of(Str, [](char C) { return C == '0'; }))
return 0;
- if (!Str.startswith("0x")) {
+ if (!Str.starts_with("0x")) {
reportTypeError(Str, "address");
return std::nullopt;
}
@@ -695,7 +695,9 @@ void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const {
// passed to beginLine().
void MarkupFilter::reportLocation(StringRef::iterator Loc) const {
errs() << Line;
- WithColor(errs().indent(Loc - Line.begin()), HighlightColor::String) << '^';
+ WithColor(errs().indent(Loc - StringRef(Line).begin()),
+ HighlightColor::String)
+ << '^';
errs() << '\n';
}
@@ -741,7 +743,7 @@ uint64_t MarkupFilter::adjustAddr(uint64_t Addr, PCType Type) const {
}
StringRef MarkupFilter::lineEnding() const {
- return Line.endswith("\r\n") ? "\r\n" : "\n";
+ return StringRef(Line).ends_with("\r\n") ? "\r\n" : "\n";
}
bool MarkupFilter::MMap::contains(uint64_t Addr) const {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 6b8068a531c0..0c404327c693 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -351,6 +351,21 @@ std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
return DebugInfoContext->getLocalsForAddress(ModuleOffset);
}
+std::vector<object::SectionedAddress>
+SymbolizableObjectFile::findSymbol(StringRef Symbol, uint64_t Offset) const {
+ std::vector<object::SectionedAddress> Result;
+ for (const SymbolDesc &Sym : Symbols) {
+ if (Sym.Name.equals(Symbol)) {
+ uint64_t Addr = Sym.Addr;
+ if (Offset < Sym.Size)
+ Addr += Offset;
+ object::SectionedAddress A{Addr, getModuleSectionIndexForAddress(Addr)};
+ Result.push_back(A);
+ }
+ }
+ return Result;
+}
+
/// Search for the first occurence of specified Address in ObjectFile.
uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
uint64_t Address) const {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 1c6dca344085..0f1618d26d12 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -231,6 +231,54 @@ LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
return symbolizeFrameCommon(BuildID, ModuleOffset);
}
+template <typename T>
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
+ uint64_t Offset) {
+ auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
+ if (!InfoOrErr)
+ return InfoOrErr.takeError();
+
+ SymbolizableModule *Info = *InfoOrErr;
+ std::vector<DILineInfo> Result;
+
+ // A null module means an error has already been reported. Return an empty
+ // result.
+ if (!Info)
+ return Result;
+
+ for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) {
+ DILineInfo LineInfo = Info->symbolizeCode(
+ A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
+ Opts.UseSymbolTable);
+ if (LineInfo.FileName != DILineInfo::BadString) {
+ if (Opts.Demangle)
+ LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
+ Result.push_back(LineInfo);
+ }
+ }
+
+ return Result;
+}
+
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol,
+ uint64_t Offset) {
+ return findSymbolCommon(Obj, Symbol, Offset);
+}
+
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbol(const std::string &ModuleName, StringRef Symbol,
+ uint64_t Offset) {
+ return findSymbolCommon(ModuleName, Symbol, Offset);
+}
+
+Expected<std::vector<DILineInfo>>
+LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
+ uint64_t Offset) {
+ return findSymbolCommon(BuildID, Symbol, Offset);
+}
+
void LLVMSymbolizer::flush() {
ObjectForUBPathAndArch.clear();
LRUBinaries.clear();
@@ -679,7 +727,7 @@ StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
// Remove any ending '@' for vectorcall.
bool IsVectorCall = false;
- if (HasAtNumSuffix && SymbolName.endswith("@")) {
+ if (HasAtNumSuffix && SymbolName.ends_with("@")) {
SymbolName = SymbolName.drop_back();
IsVectorCall = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp b/contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp
index 394f2b29aee6..9df30ab55cba 100644
--- a/contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp
+++ b/contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -41,13 +41,22 @@
#include "llvm/Support/xxhash.h"
#include <atomic>
+#include <optional>
#include <thread>
namespace llvm {
using llvm::object::BuildIDRef;
-static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
+namespace {
+std::optional<SmallVector<StringRef>> DebuginfodUrls;
+// Many Readers/Single Writer lock protecting the global debuginfod URL list.
+llvm::sys::RWMutex UrlsMutex;
+} // namespace
+
+static std::string uniqueKey(llvm::StringRef S) {
+ return utostr(xxh3_64bits(S));
+}
// Returns a binary BuildID as a normalized hex string.
// Uses lowercase for compatibility with common debuginfod servers.
@@ -60,13 +69,27 @@ bool canUseDebuginfod() {
}
SmallVector<StringRef> getDefaultDebuginfodUrls() {
- const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS");
- if (DebuginfodUrlsEnv == nullptr)
- return SmallVector<StringRef>();
+ std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex);
+ if (!DebuginfodUrls) {
+ // Only read from the environment variable if the user hasn't already
+ // set the value
+ ReadGuard.unlock();
+ std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
+ DebuginfodUrls = SmallVector<StringRef>();
+ if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) {
+ StringRef(DebuginfodUrlsEnv)
+ .split(DebuginfodUrls.value(), " ", -1, false);
+ }
+ WriteGuard.unlock();
+ ReadGuard.lock();
+ }
+ return DebuginfodUrls.value();
+}
- SmallVector<StringRef> DebuginfodUrls;
- StringRef(DebuginfodUrlsEnv).split(DebuginfodUrls, " ");
- return DebuginfodUrls;
+// Set the default debuginfod URL list, override the environment variable
+void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) {
+ std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
+ DebuginfodUrls = URLs;
}
/// Finds a default local file caching directory for the debuginfod client,
diff --git a/contrib/llvm-project/llvm/lib/Debuginfod/HTTPClient.cpp b/contrib/llvm-project/llvm/lib/Debuginfod/HTTPClient.cpp
index f9201e4f9626..4cca250746a5 100644
--- a/contrib/llvm-project/llvm/lib/Debuginfod/HTTPClient.cpp
+++ b/contrib/llvm-project/llvm/lib/Debuginfod/HTTPClient.cpp
@@ -97,6 +97,8 @@ HTTPClient::HTTPClient() {
assert(Curl && "Curl could not be initialized");
// Set the callback hooks.
curl_easy_setopt(Curl, CURLOPT_WRITEFUNCTION, curlWriteFunction);
+ // Detect supported compressed encodings and accept all.
+ curl_easy_setopt(Curl, CURLOPT_ACCEPT_ENCODING, "");
}
HTTPClient::~HTTPClient() { curl_easy_cleanup(Curl); }
diff --git a/contrib/llvm-project/llvm/lib/Debuginfod/HTTPServer.cpp b/contrib/llvm-project/llvm/lib/Debuginfod/HTTPServer.cpp
index a5e992254ead..1264353ce4b3 100644
--- a/contrib/llvm-project/llvm/lib/Debuginfod/HTTPServer.cpp
+++ b/contrib/llvm-project/llvm/lib/Debuginfod/HTTPServer.cpp
@@ -51,7 +51,7 @@ bool llvm::streamFile(HTTPServerRequest &Request, StringRef FilePath) {
Request.setResponse({404u, "text/plain", "Could not memory-map file.\n"});
return false;
}
- // Lambdas are copied on conversion to to std::function, preventing use of
+ // Lambdas are copied on conversion to std::function, preventing use of
// smart pointers.
MemoryBuffer *MB = MBOrErr->release();
Request.setResponse({200u, "application/octet-stream", MB->getBufferSize(),
diff --git a/contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp b/contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp
index f2aa571d685f..83f3cdc88c01 100644
--- a/contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp
+++ b/contrib/llvm-project/llvm/lib/Demangle/Demangle.cpp
@@ -24,7 +24,8 @@ std::string llvm::demangle(std::string_view MangledName) {
return Result;
if (starts_with(MangledName, '_') &&
- nonMicrosoftDemangle(MangledName.substr(1), Result))
+ nonMicrosoftDemangle(MangledName.substr(1), Result,
+ /*CanHaveLeadingDot=*/false))
return Result;
if (char *Demangled = microsoftDemangle(MangledName, nullptr, nullptr)) {
@@ -46,8 +47,15 @@ static bool isRustEncoding(std::string_view S) { return starts_with(S, "_R"); }
static bool isDLangEncoding(std::string_view S) { return starts_with(S, "_D"); }
bool llvm::nonMicrosoftDemangle(std::string_view MangledName,
- std::string &Result) {
+ std::string &Result, bool CanHaveLeadingDot) {
char *Demangled = nullptr;
+
+ // Do not consider the dot prefix as part of the demangled symbol name.
+ if (CanHaveLeadingDot && MangledName.size() > 0 && MangledName[0] == '.') {
+ MangledName.remove_prefix(1);
+ Result = ".";
+ }
+
if (isItaniumEncoding(MangledName))
Demangled = itaniumDemangle(MangledName);
else if (isRustEncoding(MangledName))
@@ -58,7 +66,7 @@ bool llvm::nonMicrosoftDemangle(std::string_view MangledName,
if (!Demangled)
return false;
- Result = Demangled;
+ Result += Demangled;
std::free(Demangled);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index 768d84501337..2559ed6a31a6 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -340,7 +340,7 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
Array = std::make_unique<char[]>((InputArgv.size()+1)*PtrSize);
LLVM_DEBUG(dbgs() << "JIT: ARGV = " << (void *)Array.get() << "\n");
- Type *SBytePtr = Type::getInt8PtrTy(C);
+ Type *SBytePtr = PointerType::getUnqual(C);
for (unsigned i = 0; i != InputArgv.size(); ++i) {
unsigned Size = InputArgv[i].size()+1;
@@ -430,7 +430,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
// Check main() type
unsigned NumArgs = Fn->getFunctionType()->getNumParams();
FunctionType *FTy = Fn->getFunctionType();
- Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
+ Type *PPInt8Ty = PointerType::get(Fn->getContext(), 0);
// Check the argument types.
if (NumArgs > 3)
@@ -471,7 +471,7 @@ EngineBuilder::EngineBuilder() : EngineBuilder(nullptr) {}
EngineBuilder::EngineBuilder(std::unique_ptr<Module> M)
: M(std::move(M)), WhichEngine(EngineKind::Either), ErrorStr(nullptr),
- OptLevel(CodeGenOpt::Default), MemMgr(nullptr), Resolver(nullptr) {
+ OptLevel(CodeGenOptLevel::Default), MemMgr(nullptr), Resolver(nullptr) {
// IR module verification is enabled by default in debug builds, and disabled
// by default in release builds.
#ifndef NDEBUG
@@ -618,7 +618,18 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
case Type::ScalableVectorTyID:
report_fatal_error(
"Scalable vector support not yet implemented in ExecutionEngine");
- case Type::FixedVectorTyID:
+ case Type::ArrayTyID: {
+ auto *ArrTy = cast<ArrayType>(C->getType());
+ Type *ElemTy = ArrTy->getElementType();
+ unsigned int elemNum = ArrTy->getNumElements();
+ Result.AggregateVal.resize(elemNum);
+ if (ElemTy->isIntegerTy())
+ for (unsigned int i = 0; i < elemNum; ++i)
+ Result.AggregateVal[i].IntVal =
+ APInt(ElemTy->getPrimitiveSizeInBits(), 0);
+ break;
+ }
+ case Type::FixedVectorTyID: {
// if the whole vector is 'undef' just reserve memory for the value.
auto *VTy = cast<FixedVectorType>(C->getType());
Type *ElemTy = VTy->getElementType();
@@ -629,6 +640,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
Result.AggregateVal[i].IntVal =
APInt(ElemTy->getPrimitiveSizeInBits(), 0);
break;
+ }
}
return Result;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index dc9a07e3f212..772a3fa93c51 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -138,8 +138,8 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
std::string Error;
EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
builder.setEngineKind(EngineKind::JIT)
- .setErrorStr(&Error)
- .setOptLevel((CodeGenOpt::Level)OptLevel);
+ .setErrorStr(&Error)
+ .setOptLevel((CodeGenOptLevel)OptLevel);
if (ExecutionEngine *JIT = builder.create()) {
*OutJIT = wrap(JIT);
return 0;
@@ -196,9 +196,9 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
std::string Error;
EngineBuilder builder(std::move(Mod));
builder.setEngineKind(EngineKind::JIT)
- .setErrorStr(&Error)
- .setOptLevel((CodeGenOpt::Level)options.OptLevel)
- .setTargetOptions(targetOptions);
+ .setErrorStr(&Error)
+ .setOptLevel((CodeGenOptLevel)options.OptLevel)
+ .setTargetOptions(targetOptions);
bool JIT;
if (std::optional<CodeModel::Model> CM = unwrap(options.CodeModel, JIT))
builder.setCodeModel(*CM);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFF.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFF.cpp
index fddc9b813fb2..f4701bc830d6 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFF.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFF.cpp
@@ -15,7 +15,6 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/ExecutionEngine/JITLink/COFF_x86_64.h"
#include "llvm/Object/COFF.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cstring>
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp
index 30c1579a1ba0..f23f3ed9406b 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp
@@ -36,20 +36,10 @@ static constexpr const ArrayRef<StringLiteral>
PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1);
// Create table mapping all options defined in COFFOptions.td
+using namespace llvm::opt;
static constexpr opt::OptTable::Info infoTable[] = {
-#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
- {X1, \
- X2, \
- X10, \
- X11, \
- COFF_OPT_##ID, \
- opt::Option::KIND##Class, \
- X9, \
- X8, \
- COFF_OPT_##GROUP, \
- COFF_OPT_##ALIAS, \
- X7, \
- X12},
+#define OPTION(...) \
+ LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(COFF_OPT_, __VA_ARGS__),
#include "COFFOptions.inc"
#undef OPTION
};
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h
index 5c953da7581f..21808f0afcb5 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h
@@ -26,7 +26,7 @@ namespace jitlink {
enum {
COFF_OPT_INVALID = 0,
-#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) COFF_OPT_##ID,
+#define OPTION(...) LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(COFF_OPT_, __VA_ARGS__),
#include "COFFOptions.inc"
#undef OPTION
};
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
index 6668854e1a6a..1fd2a33d3f11 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Generic COFF LinkGraph buliding code.
+// Generic COFF LinkGraph building code.
//
//===----------------------------------------------------------------------===//
#include "COFFLinkGraphBuilder.h"
@@ -43,9 +43,10 @@ COFFLinkGraphBuilder::getPointerSize(const object::COFFObjectFile &Obj) {
return Obj.getBytesInAddress();
}
-support::endianness
+llvm::endianness
COFFLinkGraphBuilder::getEndianness(const object::COFFObjectFile &Obj) {
- return Obj.isLittleEndian() ? support::little : support::big;
+ return Obj.isLittleEndian() ? llvm::endianness::little
+ : llvm::endianness::big;
}
uint64_t COFFLinkGraphBuilder::getSectionSize(const object::COFFObjectFile &Obj,
@@ -161,7 +162,7 @@ Error COFFLinkGraphBuilder::graphifySections() {
if (!GraphSec) {
GraphSec = &G->createSection(SectionName, Prot);
if ((*Sec)->Characteristics & COFF::IMAGE_SCN_LNK_REMOVE)
- GraphSec->setMemLifetimePolicy(orc::MemLifetimePolicy::NoAlloc);
+ GraphSec->setMemLifetime(orc::MemLifetime::NoAlloc);
}
if (GraphSec->getMemProt() != Prot)
return make_error<JITLinkError>("MemProt should match");
@@ -606,7 +607,7 @@ COFFLinkGraphBuilder::exportCOMDATSymbol(COFFSymbolIndex SymIndex,
object::COFFSymbolRef Symbol) {
Block *B = getGraphBlock(Symbol.getSectionNumber());
auto &PendingComdatExport = PendingComdatExports[Symbol.getSectionNumber()];
- // NOTE: ComdatDef->Legnth is the size of "section" not size of symbol.
+ // NOTE: ComdatDef->Length is the size of "section" not size of symbol.
// We use zero symbol size to not reach out of bound of block when symbol
// offset is non-zero.
auto GSym = &G->addDefinedSymbol(
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
index e64823759540..e5f3ce8c53f5 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
@@ -14,7 +14,6 @@
#define LIB_EXECUTIONENGINE_JITLINK_COFFLINKGRAPHBUILDER_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/Object/COFF.h"
@@ -162,7 +161,7 @@ private:
const object::coff_section *Section);
static bool isComdatSection(const object::coff_section *Section);
static unsigned getPointerSize(const object::COFFObjectFile &Obj);
- static support::endianness getEndianness(const object::COFFObjectFile &Obj);
+ static llvm::endianness getEndianness(const object::COFFObjectFile &Obj);
static StringRef getDLLImportStubPrefix() { return "__imp_"; }
static StringRef getDirectiveSectionName() { return ".drectve"; }
StringRef getCOFFSectionName(COFFSectionIndex SectionIndex,
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
index 86249591a9be..c11577b03fd7 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -126,83 +126,71 @@ Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) {
}
// Find the offsets of any existing edges from this block.
- BlockEdgeMap BlockEdges;
+ BlockEdgesInfo BlockEdges;
for (auto &E : B.edges())
if (E.isRelocation()) {
- if (BlockEdges.count(E.getOffset()))
- return make_error<JITLinkError>(
- "Multiple relocations at offset " +
- formatv("{0:x16}", E.getOffset()) + " in " + EHFrameSectionName +
- " block at address " + formatv("{0:x16}", B.getAddress()));
-
- BlockEdges[E.getOffset()] = EdgeTarget(E);
+ // Check if we already saw more than one relocation at this offset.
+ if (BlockEdges.Multiple.contains(E.getOffset()))
+ continue;
+
+ // Otherwise check if we previously had exactly one relocation at this
+ // offset. If so, we now have a second one and move it from the TargetMap
+ // into the Multiple set.
+ auto It = BlockEdges.TargetMap.find(E.getOffset());
+ if (It != BlockEdges.TargetMap.end()) {
+ BlockEdges.TargetMap.erase(It);
+ BlockEdges.Multiple.insert(E.getOffset());
+ } else {
+ BlockEdges.TargetMap[E.getOffset()] = EdgeTarget(E);
+ }
}
- CIEInfosMap CIEInfos;
BinaryStreamReader BlockReader(
StringRef(B.getContent().data(), B.getContent().size()),
PC.G.getEndianness());
- while (!BlockReader.empty()) {
- size_t RecordStartOffset = BlockReader.getOffset();
-
- LLVM_DEBUG({
- dbgs() << " Processing CFI record at "
- << (B.getAddress() + RecordStartOffset) << "\n";
- });
- // Get the record length.
- Expected<size_t> RecordRemaining = readCFIRecordLength(B, BlockReader);
- if (!RecordRemaining)
- return RecordRemaining.takeError();
-
- if (BlockReader.bytesRemaining() < *RecordRemaining)
- return make_error<JITLinkError>(
- "Incomplete CFI record at " +
- formatv("{0:x16}", B.getAddress() + RecordStartOffset));
+ // Get the record length.
+ Expected<size_t> RecordRemaining = readCFIRecordLength(B, BlockReader);
+ if (!RecordRemaining)
+ return RecordRemaining.takeError();
+
+ // We expect DWARFRecordSectionSplitter to split each CFI record into its own
+ // block.
+ if (BlockReader.bytesRemaining() != *RecordRemaining)
+ return make_error<JITLinkError>("Incomplete CFI record at " +
+ formatv("{0:x16}", B.getAddress()));
+
+ // Read the CIE delta for this record.
+ uint64_t CIEDeltaFieldOffset = BlockReader.getOffset();
+ uint32_t CIEDelta;
+ if (auto Err = BlockReader.readInteger(CIEDelta))
+ return Err;
- // Read the CIE delta for this record.
- uint64_t CIEDeltaFieldOffset = BlockReader.getOffset() - RecordStartOffset;
- uint32_t CIEDelta;
- if (auto Err = BlockReader.readInteger(CIEDelta))
+ if (CIEDelta == 0) {
+ if (auto Err = processCIE(PC, B, CIEDeltaFieldOffset, BlockEdges))
+ return Err;
+ } else {
+ if (auto Err = processFDE(PC, B, CIEDeltaFieldOffset, CIEDelta, BlockEdges))
return Err;
-
- if (CIEDelta == 0) {
- if (auto Err = processCIE(PC, B, RecordStartOffset,
- CIEDeltaFieldOffset + *RecordRemaining,
- CIEDeltaFieldOffset, BlockEdges))
- return Err;
- } else {
- if (auto Err = processFDE(PC, B, RecordStartOffset,
- CIEDeltaFieldOffset + *RecordRemaining,
- CIEDeltaFieldOffset, CIEDelta, BlockEdges))
- return Err;
- }
-
- // Move to the next record.
- BlockReader.setOffset(RecordStartOffset + CIEDeltaFieldOffset +
- *RecordRemaining);
}
return Error::success();
}
Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
- size_t RecordOffset, size_t RecordLength,
size_t CIEDeltaFieldOffset,
- const BlockEdgeMap &BlockEdges) {
+ const BlockEdgesInfo &BlockEdges) {
- LLVM_DEBUG(dbgs() << " Record is CIE\n");
+ LLVM_DEBUG(dbgs() << " Record is CIE\n");
- auto RecordContent = B.getContent().slice(RecordOffset, RecordLength);
BinaryStreamReader RecordReader(
- StringRef(RecordContent.data(), RecordContent.size()),
+ StringRef(B.getContent().data(), B.getContent().size()),
PC.G.getEndianness());
// Skip past the CIE delta field: we've already processed this far.
RecordReader.setOffset(CIEDeltaFieldOffset + 4);
- auto &CIESymbol =
- PC.G.addAnonymousSymbol(B, RecordOffset, RecordLength, false, false);
+ auto &CIESymbol = PC.G.addAnonymousSymbol(B, 0, B.getSize(), false, false);
CIEInformation CIEInfo(CIESymbol);
uint8_t Version = 0;
@@ -268,7 +256,7 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
if (auto Err =
getOrCreateEncodedPointerEdge(
PC, BlockEdges, *PersonalityPointerEncoding, RecordReader,
- B, RecordOffset + RecordReader.getOffset(), "personality")
+ B, RecordReader.getOffset(), "personality")
.takeError())
return Err;
break;
@@ -279,7 +267,7 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
if (CIEInfo.AddressEncoding == dwarf::DW_EH_PE_omit)
return make_error<JITLinkError>(
"Invalid address encoding DW_EH_PE_omit in CIE at " +
- formatv("{0:x}", (B.getAddress() + RecordOffset).getValue()));
+ formatv("{0:x}", B.getAddress().getValue()));
} else
return PE.takeError();
break;
@@ -302,35 +290,37 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
}
Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
- size_t RecordOffset, size_t RecordLength,
size_t CIEDeltaFieldOffset,
uint32_t CIEDelta,
- const BlockEdgeMap &BlockEdges) {
- LLVM_DEBUG(dbgs() << " Record is FDE\n");
+ const BlockEdgesInfo &BlockEdges) {
+ LLVM_DEBUG(dbgs() << " Record is FDE\n");
- orc::ExecutorAddr RecordAddress = B.getAddress() + RecordOffset;
+ orc::ExecutorAddr RecordAddress = B.getAddress();
- auto RecordContent = B.getContent().slice(RecordOffset, RecordLength);
BinaryStreamReader RecordReader(
- StringRef(RecordContent.data(), RecordContent.size()),
+ StringRef(B.getContent().data(), B.getContent().size()),
PC.G.getEndianness());
// Skip past the CIE delta field: we've already read this far.
RecordReader.setOffset(CIEDeltaFieldOffset + 4);
- auto &FDESymbol =
- PC.G.addAnonymousSymbol(B, RecordOffset, RecordLength, false, false);
+ auto &FDESymbol = PC.G.addAnonymousSymbol(B, 0, B.getSize(), false, false);
CIEInformation *CIEInfo = nullptr;
{
// Process the CIE pointer field.
- auto CIEEdgeItr = BlockEdges.find(RecordOffset + CIEDeltaFieldOffset);
+ if (BlockEdges.Multiple.contains(CIEDeltaFieldOffset))
+ return make_error<JITLinkError>(
+ "CIE pointer field already has multiple edges at " +
+ formatv("{0:x16}", RecordAddress + CIEDeltaFieldOffset));
+
+ auto CIEEdgeItr = BlockEdges.TargetMap.find(CIEDeltaFieldOffset);
+
orc::ExecutorAddr CIEAddress =
RecordAddress + orc::ExecutorAddrDiff(CIEDeltaFieldOffset) -
orc::ExecutorAddrDiff(CIEDelta);
- if (CIEEdgeItr == BlockEdges.end()) {
-
+ if (CIEEdgeItr == BlockEdges.TargetMap.end()) {
LLVM_DEBUG({
dbgs() << " Adding edge at "
<< (RecordAddress + CIEDeltaFieldOffset)
@@ -341,8 +331,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
else
return CIEInfoOrErr.takeError();
assert(CIEInfo->CIESymbol && "CIEInfo has no CIE symbol set");
- B.addEdge(NegDelta32, RecordOffset + CIEDeltaFieldOffset,
- *CIEInfo->CIESymbol, 0);
+ B.addEdge(NegDelta32, CIEDeltaFieldOffset, *CIEInfo->CIESymbol, 0);
} else {
LLVM_DEBUG({
dbgs() << " Already has edge at "
@@ -364,7 +353,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
// Process the PC-Begin field.
LLVM_DEBUG({
- dbgs() << " Processing PC-begin at "
+ dbgs() << " Processing PC-begin at "
<< (RecordAddress + RecordReader.getOffset()) << "\n";
});
if (auto PCBegin = getOrCreateEncodedPointerEdge(
@@ -375,14 +364,14 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
// Add a keep-alive edge from the FDE target to the FDE to ensure that the
// FDE is kept alive if its target is.
LLVM_DEBUG({
- dbgs() << " Adding keep-alive edge from target at "
+ dbgs() << " Adding keep-alive edge from target at "
<< (*PCBegin)->getBlock().getAddress() << " to FDE at "
<< RecordAddress << "\n";
});
(*PCBegin)->getBlock().addEdge(Edge::KeepAlive, 0, FDESymbol, 0);
} else {
LLVM_DEBUG({
- dbgs() << " WARNING: Not adding keep-alive edge to FDE at "
+ dbgs() << " WARNING: Not adding keep-alive edge to FDE at "
<< RecordAddress << ", which points to "
<< ((*PCBegin)->isExternal() ? "external" : "absolute")
<< " symbol \"" << (*PCBegin)->getName()
@@ -409,7 +398,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
.takeError())
return Err;
} else {
- LLVM_DEBUG(dbgs() << " Record does not have LSDA field.\n");
+ LLVM_DEBUG(dbgs() << " Record does not have LSDA field.\n");
}
return Error::success();
@@ -520,7 +509,7 @@ Error EHFrameEdgeFixer::skipEncodedPointer(uint8_t PointerEncoding,
}
Expected<Symbol *> EHFrameEdgeFixer::getOrCreateEncodedPointerEdge(
- ParseContext &PC, const BlockEdgeMap &BlockEdges, uint8_t PointerEncoding,
+ ParseContext &PC, const BlockEdgesInfo &BlockEdges, uint8_t PointerEncoding,
BinaryStreamReader &RecordReader, Block &BlockToFix,
size_t PointerFieldOffset, const char *FieldName) {
using namespace dwarf;
@@ -531,10 +520,10 @@ Expected<Symbol *> EHFrameEdgeFixer::getOrCreateEncodedPointerEdge(
// If there's already an edge here then just skip the encoded pointer and
// return the edge's target.
{
- auto EdgeI = BlockEdges.find(PointerFieldOffset);
- if (EdgeI != BlockEdges.end()) {
+ auto EdgeI = BlockEdges.TargetMap.find(PointerFieldOffset);
+ if (EdgeI != BlockEdges.TargetMap.end()) {
LLVM_DEBUG({
- dbgs() << " Existing edge at "
+ dbgs() << " Existing edge at "
<< (BlockToFix.getAddress() + PointerFieldOffset) << " to "
<< FieldName << " at " << EdgeI->second.Target->getAddress();
if (EdgeI->second.Target->hasName())
@@ -545,6 +534,10 @@ Expected<Symbol *> EHFrameEdgeFixer::getOrCreateEncodedPointerEdge(
return std::move(Err);
return EdgeI->second.Target;
}
+
+ if (BlockEdges.Multiple.contains(PointerFieldOffset))
+ return make_error<JITLinkError>("Multiple relocations at offset " +
+ formatv("{0:x16}", PointerFieldOffset));
}
// Switch absptr to corresponding udata encoding.
@@ -596,7 +589,7 @@ Expected<Symbol *> EHFrameEdgeFixer::getOrCreateEncodedPointerEdge(
BlockToFix.addEdge(PtrEdgeKind, PointerFieldOffset, *TargetSym, 0);
LLVM_DEBUG({
- dbgs() << " Adding edge at "
+ dbgs() << " Adding edge at "
<< (BlockToFix.getAddress() + PointerFieldOffset) << " to "
<< FieldName << " at " << TargetSym->getAddress();
if (TargetSym->hasName())
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
index 55cf7fc63ee7..49fbf650e7a7 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
@@ -60,7 +60,11 @@ private:
Edge::AddendT Addend = 0;
};
- using BlockEdgeMap = DenseMap<Edge::OffsetT, EdgeTarget>;
+ struct BlockEdgesInfo {
+ DenseMap<Edge::OffsetT, EdgeTarget> TargetMap;
+ DenseSet<Edge::OffsetT> Multiple;
+ };
+
using CIEInfosMap = DenseMap<orc::ExecutorAddr, CIEInformation>;
struct ParseContext {
@@ -81,12 +85,10 @@ private:
};
Error processBlock(ParseContext &PC, Block &B);
- Error processCIE(ParseContext &PC, Block &B, size_t RecordOffset,
- size_t RecordLength, size_t CIEDeltaFieldOffset,
- const BlockEdgeMap &BlockEdges);
- Error processFDE(ParseContext &PC, Block &B, size_t RecordOffset,
- size_t RecordLength, size_t CIEDeltaFieldOffset,
- uint32_t CIEDelta, const BlockEdgeMap &BlockEdges);
+ Error processCIE(ParseContext &PC, Block &B, size_t CIEDeltaFieldOffset,
+ const BlockEdgesInfo &BlockEdges);
+ Error processFDE(ParseContext &PC, Block &B, size_t CIEDeltaFieldOffset,
+ uint32_t CIEDelta, const BlockEdgesInfo &BlockEdges);
Expected<AugmentationInfo>
parseAugmentationString(BinaryStreamReader &RecordReader);
@@ -96,9 +98,9 @@ private:
Error skipEncodedPointer(uint8_t PointerEncoding,
BinaryStreamReader &RecordReader);
Expected<Symbol *> getOrCreateEncodedPointerEdge(
- ParseContext &PC, const BlockEdgeMap &BlockEdges, uint8_t PointerEncoding,
- BinaryStreamReader &RecordReader, Block &BlockToFix,
- size_t PointerFieldOffset, const char *FieldName);
+ ParseContext &PC, const BlockEdgesInfo &BlockEdges,
+ uint8_t PointerEncoding, BinaryStreamReader &RecordReader,
+ Block &BlockToFix, size_t PointerFieldOffset, const char *FieldName);
Expected<Symbol &> getOrCreateSymbol(ParseContext &PC,
orc::ExecutorAddr Addr);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
index dd08a23306ff..fdcce20cd2d1 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
@@ -21,7 +21,6 @@
#include "llvm/ExecutionEngine/JITLink/ELF_riscv.h"
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
#include "llvm/Object/ELF.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cstring>
@@ -52,6 +51,22 @@ Expected<uint16_t> readTargetMachineArch(StringRef Buffer) {
}
}
+ if (Data[ELF::EI_DATA] == ELF::ELFDATA2MSB) {
+ if (Data[ELF::EI_CLASS] == ELF::ELFCLASS64) {
+ if (auto File = llvm::object::ELF64BEFile::create(Buffer)) {
+ return File->getHeader().e_machine;
+ } else {
+ return File.takeError();
+ }
+ } else if (Data[ELF::EI_CLASS] == ELF::ELFCLASS32) {
+ if (auto File = llvm::object::ELF32BEFile::create(Buffer)) {
+ return File->getHeader().e_machine;
+ } else {
+ return File.takeError();
+ }
+ }
+ }
+
return ELF::EM_NONE;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp
index 5a983c219627..e081f47ca42f 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Generic ELF LinkGraph buliding code.
+// Generic ELF LinkGraph building code.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
index e72645798349..56d1efa4bdef 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
@@ -51,7 +51,7 @@ private:
Section *CommonSection = nullptr;
};
-/// Ling-graph building code that's specific to the given ELFT, but common
+/// LinkGraph building code that's specific to the given ELFT, but common
/// across all architectures.
template <typename ELFT>
class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase {
@@ -193,7 +193,7 @@ ELFLinkGraphBuilder<ELFT>::ELFLinkGraphBuilder(
StringRef FileName, LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
: ELFLinkGraphBuilderBase(std::make_unique<LinkGraph>(
FileName.str(), Triple(std::move(TT)), std::move(Features),
- ELFT::Is64Bits ? 8 : 4, support::endianness(ELFT::TargetEndianness),
+ ELFT::Is64Bits ? 8 : 4, llvm::endianness(ELFT::TargetEndianness),
std::move(GetEdgeKindName))),
Obj(Obj) {
LLVM_DEBUG(
@@ -366,7 +366,7 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySections() {
GraphSec = &G->createSection(*Name, Prot);
// Non-SHF_ALLOC sections get NoAlloc memory lifetimes.
if (!(Sec.sh_flags & ELF::SHF_ALLOC)) {
- GraphSec->setMemLifetimePolicy(orc::MemLifetimePolicy::NoAlloc);
+ GraphSec->setMemLifetime(orc::MemLifetime::NoAlloc);
LLVM_DEBUG({
dbgs() << " " << SecIndex << ": \"" << *Name
<< "\" is not a SHF_ALLOC section. Using NoAlloc lifetime.\n";
@@ -374,7 +374,14 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySections() {
}
}
- assert(GraphSec->getMemProt() == Prot && "MemProt should match");
+ if (GraphSec->getMemProt() != Prot) {
+ std::string ErrMsg;
+ raw_string_ostream(ErrMsg)
+ << "In " << G->getName() << ", section " << *Name
+ << " is present more than once with different permissions: "
+ << GraphSec->getMemProt() << " vs " << Prot;
+ return make_error<JITLinkError>(std::move(ErrMsg));
+ }
Block *B = nullptr;
if (Sec.sh_type != ELF::SHT_NOBITS) {
@@ -499,6 +506,22 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
TargetFlagsType Flags = makeTargetFlags(Sym);
orc::ExecutorAddrDiff Offset = getRawOffset(Sym, Flags);
+ if (Offset + Sym.st_size > B->getSize()) {
+ std::string ErrMsg;
+ raw_string_ostream ErrStream(ErrMsg);
+ ErrStream << "In " << G->getName() << ", symbol ";
+ if (!Name->empty())
+ ErrStream << *Name;
+ else
+ ErrStream << "<anon>";
+ ErrStream << " (" << (B->getAddress() + Offset) << " -- "
+ << (B->getAddress() + Offset + Sym.st_size) << ") extends "
+ << formatv("{0:x}", Offset + Sym.st_size - B->getSize())
+ << " bytes past the end of its containing block ("
+ << B->getRange() << ")";
+ return make_error<JITLinkError>(std::move(ErrMsg));
+ }
+
// In RISCV, temporary symbols (Used to generate dwarf, eh_frame
// sections...) will appear in object code's symbol table, and LLVM does
// not use names on these temporary symbols (RISCV gnu toolchain uses
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp
index a1bc4c853323..132989fcbce0 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp
@@ -17,7 +17,6 @@
#include "llvm/ExecutionEngine/JITLink/aarch32.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TargetParser/ARMTargetParser.h"
@@ -40,6 +39,12 @@ Expected<aarch32::EdgeKind_aarch32> getJITLinkEdgeKind(uint32_t ELFType) {
return aarch32::Data_Delta32;
case ELF::R_ARM_CALL:
return aarch32::Arm_Call;
+ case ELF::R_ARM_JUMP24:
+ return aarch32::Arm_Jump24;
+ case ELF::R_ARM_MOVW_ABS_NC:
+ return aarch32::Arm_MovwAbsNC;
+ case ELF::R_ARM_MOVT_ABS:
+ return aarch32::Arm_MovtAbs;
case ELF::R_ARM_THM_CALL:
return aarch32::Thumb_Call;
case ELF::R_ARM_THM_JUMP24:
@@ -48,6 +53,10 @@ Expected<aarch32::EdgeKind_aarch32> getJITLinkEdgeKind(uint32_t ELFType) {
return aarch32::Thumb_MovwAbsNC;
case ELF::R_ARM_THM_MOVT_ABS:
return aarch32::Thumb_MovtAbs;
+ case ELF::R_ARM_THM_MOVW_PREL_NC:
+ return aarch32::Thumb_MovwPrelNC;
+ case ELF::R_ARM_THM_MOVT_PREL:
+ return aarch32::Thumb_MovtPrel;
}
return make_error<JITLinkError>(
@@ -64,6 +73,12 @@ Expected<uint32_t> getELFRelocationType(Edge::Kind Kind) {
return ELF::R_ARM_ABS32;
case aarch32::Arm_Call:
return ELF::R_ARM_CALL;
+ case aarch32::Arm_Jump24:
+ return ELF::R_ARM_JUMP24;
+ case aarch32::Arm_MovwAbsNC:
+ return ELF::R_ARM_MOVW_ABS_NC;
+ case aarch32::Arm_MovtAbs:
+ return ELF::R_ARM_MOVT_ABS;
case aarch32::Thumb_Call:
return ELF::R_ARM_THM_CALL;
case aarch32::Thumb_Jump24:
@@ -72,6 +87,10 @@ Expected<uint32_t> getELFRelocationType(Edge::Kind Kind) {
return ELF::R_ARM_THM_MOVW_ABS_NC;
case aarch32::Thumb_MovtAbs:
return ELF::R_ARM_THM_MOVT_ABS;
+ case aarch32::Thumb_MovwPrelNC:
+ return ELF::R_ARM_THM_MOVW_PREL_NC;
+ case aarch32::Thumb_MovtPrel:
+ return ELF::R_ARM_THM_MOVT_PREL;
}
return make_error<JITLinkError>(formatv("Invalid aarch32 edge {0:d}: ",
@@ -102,7 +121,7 @@ private:
}
};
-template <support::endianness DataEndianness>
+template <llvm::endianness DataEndianness>
class ELFLinkGraphBuilder_aarch32
: public ELFLinkGraphBuilder<ELFType<DataEndianness, false>> {
private:
@@ -154,14 +173,13 @@ private:
auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset;
Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress();
- Edge E(*Kind, Offset, *GraphSymbol, 0);
Expected<int64_t> Addend =
- aarch32::readAddend(*Base::G, BlockToFix, E, ArmCfg);
+ aarch32::readAddend(*Base::G, BlockToFix, Offset, *Kind, ArmCfg);
if (!Addend)
return Addend.takeError();
- E.setAddend(*Addend);
+ Edge E(*Kind, Offset, *GraphSymbol, *Addend);
LLVM_DEBUG({
dbgs() << " ";
printEdge(dbgs(), BlockToFix, E, getELFAArch32EdgeKindName(*Kind));
@@ -253,7 +271,7 @@ createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) {
case Triple::arm:
case Triple::thumb: {
auto &ELFFile = cast<ELFObjectFile<ELF32LE>>(**ELFObj).getELFFile();
- return ELFLinkGraphBuilder_aarch32<support::little>(
+ return ELFLinkGraphBuilder_aarch32<llvm::endianness::little>(
(*ELFObj)->getFileName(), ELFFile, TT, std::move(*Features),
ArmCfg)
.buildGraph();
@@ -261,7 +279,7 @@ createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) {
case Triple::armeb:
case Triple::thumbeb: {
auto &ELFFile = cast<ELFObjectFile<ELF32BE>>(**ELFObj).getELFFile();
- return ELFLinkGraphBuilder_aarch32<support::big>(
+ return ELFLinkGraphBuilder_aarch32<llvm::endianness::big>(
(*ELFObj)->getFileName(), ELFFile, TT, std::move(*Features),
ArmCfg)
.buildGraph();
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
index 652eb931190e..f17b2c626ac2 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
@@ -598,7 +598,7 @@ void link_ELF_aarch64(std::unique_ptr<LinkGraph> G,
PassConfiguration Config;
const Triple &TT = G->getTargetTriple();
if (Ctx->shouldAddDefaultTargetPasses(TT)) {
- // Add eh-frame passses.
+ // Add eh-frame passes.
Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame"));
Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
".eh_frame", 8, aarch64::Pointer32, aarch64::Pointer64,
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp
index 7f76b45aecbb..aa9385fcb183 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp
@@ -186,7 +186,7 @@ void link_ELF_loongarch(std::unique_ptr<LinkGraph> G,
PassConfiguration Config;
const Triple &TT = G->getTargetTriple();
if (Ctx->shouldAddDefaultTargetPasses(TT)) {
- // Add eh-frame passses.
+ // Add eh-frame passes.
Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame"));
Config.PrePrunePasses.push_back(
EHFrameEdgeFixer(".eh_frame", G->getPointerSize(), Pointer32, Pointer64,
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp
index a30b9ce51c84..3b86250b60a4 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp
@@ -15,7 +15,6 @@
#include "llvm/ExecutionEngine/JITLink/TableManager.h"
#include "llvm/ExecutionEngine/JITLink/ppc64.h"
#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Support/Endian.h"
#include "EHFrameSupportImpl.h"
#include "ELFLinkGraphBuilder.h"
@@ -31,8 +30,77 @@ using namespace llvm::jitlink;
constexpr StringRef ELFTOCSymbolName = ".TOC.";
constexpr StringRef TOCSymbolAliasIdent = "__TOC__";
constexpr uint64_t ELFTOCBaseOffset = 0x8000;
+constexpr StringRef ELFTLSInfoSectionName = "$__TLSINFO";
-template <support::endianness Endianness>
+template <llvm::endianness Endianness>
+class TLSInfoTableManager_ELF_ppc64
+ : public TableManager<TLSInfoTableManager_ELF_ppc64<Endianness>> {
+public:
+ static const uint8_t TLSInfoEntryContent[16];
+
+ static StringRef getSectionName() { return ELFTLSInfoSectionName; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ Edge::Kind K = E.getKind();
+ switch (K) {
+ case ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16HA:
+ E.setKind(ppc64::TOCDelta16HA);
+ E.setTarget(this->getEntryForTarget(G, E.getTarget()));
+ return true;
+ case ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16LO:
+ E.setKind(ppc64::TOCDelta16LO);
+ E.setTarget(this->getEntryForTarget(G, E.getTarget()));
+ return true;
+ case ppc64::RequestTLSDescInGOTAndTransformToDelta34:
+ E.setKind(ppc64::Delta34);
+ E.setTarget(this->getEntryForTarget(G, E.getTarget()));
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ // The TLS Info entry's key value will be written by
+ // `fixTLVSectionsAndEdges`, so create mutable content.
+ auto &TLSInfoEntry = G.createMutableContentBlock(
+ getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()),
+ orc::ExecutorAddr(), 8, 0);
+ TLSInfoEntry.addEdge(ppc64::Pointer64, 8, Target, 0);
+ return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false);
+ }
+
+private:
+ Section &getTLSInfoSection(LinkGraph &G) {
+ if (!TLSInfoTable)
+ TLSInfoTable =
+ &G.createSection(ELFTLSInfoSectionName, orc::MemProt::Read);
+ return *TLSInfoTable;
+ }
+
+ ArrayRef<char> getTLSInfoEntryContent() const {
+ return {reinterpret_cast<const char *>(TLSInfoEntryContent),
+ sizeof(TLSInfoEntryContent)};
+ }
+
+ Section *TLSInfoTable = nullptr;
+};
+
+template <>
+const uint8_t TLSInfoTableManager_ELF_ppc64<
+ llvm::endianness::little>::TLSInfoEntryContent[16] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*pthread key */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /*data address*/
+};
+
+template <>
+const uint8_t TLSInfoTableManager_ELF_ppc64<
+ llvm::endianness::big>::TLSInfoEntryContent[16] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*pthread key */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /*data address*/
+};
+
+template <llvm::endianness Endianness>
Symbol &createELFGOTHeader(LinkGraph &G,
ppc64::TOCTableManager<Endianness> &TOC) {
Symbol *TOCSymbol = nullptr;
@@ -58,7 +126,7 @@ Symbol &createELFGOTHeader(LinkGraph &G,
}
// Register preexisting GOT entries with TOC table manager.
-template <support::endianness Endianness>
+template <llvm::endianness Endianness>
inline void
registerExistingGOTEntries(LinkGraph &G,
ppc64::TOCTableManager<Endianness> &TOC) {
@@ -76,7 +144,7 @@ registerExistingGOTEntries(LinkGraph &G,
}
}
-template <support::endianness Endianness>
+template <llvm::endianness Endianness>
Error buildTables_ELF_ppc64(LinkGraph &G) {
LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
ppc64::TOCTableManager<Endianness> TOC;
@@ -91,8 +159,8 @@ Error buildTables_ELF_ppc64(LinkGraph &G) {
registerExistingGOTEntries(G, TOC);
ppc64::PLTTableManager<Endianness> PLT(TOC);
- visitExistingEdges(G, TOC, PLT);
- // TODO: Add TLS support.
+ TLSInfoTableManager_ELF_ppc64<Endianness> TLSInfo;
+ visitExistingEdges(G, TOC, PLT, TLSInfo);
// After visiting edges in LinkGraph, we have GOT entries built in the
// synthesized section.
@@ -125,7 +193,7 @@ Error buildTables_ELF_ppc64(LinkGraph &G) {
namespace llvm::jitlink {
-template <support::endianness Endianness>
+template <llvm::endianness Endianness>
class ELFLinkGraphBuilder_ppc64
: public ELFLinkGraphBuilder<object::ELFType<Endianness, true>> {
private:
@@ -164,6 +232,21 @@ private:
if (LLVM_UNLIKELY(ELFReloc == ELF::R_PPC64_NONE))
return Error::success();
+ // TLS model markers. We only support global-dynamic model now.
+ if (ELFReloc == ELF::R_PPC64_TLSGD)
+ return Error::success();
+ if (ELFReloc == ELF::R_PPC64_TLSLD)
+ return make_error<StringError>("Local-dynamic TLS model is not supported",
+ inconvertibleErrorCode());
+
+ if (ELFReloc == ELF::R_PPC64_PCREL_OPT)
+ // TODO: Support PCREL optimization, now ignore it.
+ return Error::success();
+
+ if (ELFReloc == ELF::R_PPC64_TPREL34)
+ return make_error<StringError>("Local-exec TLS model is not supported",
+ inconvertibleErrorCode());
+
auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
if (!ObjSymbol)
return ObjSymbol.takeError();
@@ -192,9 +275,60 @@ private:
case ELF::R_PPC64_ADDR64:
Kind = ppc64::Pointer64;
break;
+ case ELF::R_PPC64_ADDR32:
+ Kind = ppc64::Pointer32;
+ break;
+ case ELF::R_PPC64_ADDR16:
+ Kind = ppc64::Pointer16;
+ break;
+ case ELF::R_PPC64_ADDR16_DS:
+ Kind = ppc64::Pointer16DS;
+ break;
+ case ELF::R_PPC64_ADDR16_HA:
+ Kind = ppc64::Pointer16HA;
+ break;
+ case ELF::R_PPC64_ADDR16_HI:
+ Kind = ppc64::Pointer16HI;
+ break;
+ case ELF::R_PPC64_ADDR16_HIGH:
+ Kind = ppc64::Pointer16HIGH;
+ break;
+ case ELF::R_PPC64_ADDR16_HIGHA:
+ Kind = ppc64::Pointer16HIGHA;
+ break;
+ case ELF::R_PPC64_ADDR16_HIGHER:
+ Kind = ppc64::Pointer16HIGHER;
+ break;
+ case ELF::R_PPC64_ADDR16_HIGHERA:
+ Kind = ppc64::Pointer16HIGHERA;
+ break;
+ case ELF::R_PPC64_ADDR16_HIGHEST:
+ Kind = ppc64::Pointer16HIGHEST;
+ break;
+ case ELF::R_PPC64_ADDR16_HIGHESTA:
+ Kind = ppc64::Pointer16HIGHESTA;
+ break;
+ case ELF::R_PPC64_ADDR16_LO:
+ Kind = ppc64::Pointer16LO;
+ break;
+ case ELF::R_PPC64_ADDR16_LO_DS:
+ Kind = ppc64::Pointer16LODS;
+ break;
+ case ELF::R_PPC64_ADDR14:
+ Kind = ppc64::Pointer14;
+ break;
+ case ELF::R_PPC64_TOC:
+ Kind = ppc64::TOC;
+ break;
+ case ELF::R_PPC64_TOC16:
+ Kind = ppc64::TOCDelta16;
+ break;
case ELF::R_PPC64_TOC16_HA:
Kind = ppc64::TOCDelta16HA;
break;
+ case ELF::R_PPC64_TOC16_HI:
+ Kind = ppc64::TOCDelta16HI;
+ break;
case ELF::R_PPC64_TOC16_DS:
Kind = ppc64::TOCDelta16DS;
break;
@@ -210,6 +344,9 @@ private:
case ELF::R_PPC64_REL16_HA:
Kind = ppc64::Delta16HA;
break;
+ case ELF::R_PPC64_REL16_HI:
+ Kind = ppc64::Delta16HI;
+ break;
case ELF::R_PPC64_REL16_LO:
Kind = ppc64::Delta16LO;
break;
@@ -217,26 +354,36 @@ private:
Kind = ppc64::Delta32;
break;
case ELF::R_PPC64_REL24_NOTOC:
- case ELF::R_PPC64_REL24: {
- bool isLocal = !GraphSymbol->isExternal();
- if (isLocal) {
- // TODO: There are cases a local function call need a call stub.
- // 1. Caller uses TOC, the callee doesn't, need a r2 save stub.
- // 2. Caller doesn't use TOC, the callee does, need a r12 setup stub.
- // FIXME: For a local call, we might need a thunk if branch target is
- // out of range.
- Kind = ppc64::CallBranchDelta;
- // Branch to local entry.
- Addend += ELF::decodePPC64LocalEntryOffset((*ObjSymbol)->st_other);
- } else {
- Kind = ELFReloc == ELF::R_PPC64_REL24 ? ppc64::RequestPLTCallStubSaveTOC
- : ppc64::RequestPLTCallStubNoTOC;
- }
+ Kind = ppc64::RequestCallNoTOC;
+ break;
+ case ELF::R_PPC64_REL24:
+ Kind = ppc64::RequestCall;
+ // Determining a target is external or not is deferred in PostPrunePass.
+ // We assume branching to local entry by default, since in PostPrunePass,
+ // we don't have any context to determine LocalEntryOffset. If it finally
+ // turns out to be an external call, we'll have a stub for the external
+ // target, the target of this edge will be the stub and its addend will be
+ // set 0.
+ Addend += ELF::decodePPC64LocalEntryOffset((*ObjSymbol)->st_other);
break;
- }
case ELF::R_PPC64_REL64:
Kind = ppc64::Delta64;
break;
+ case ELF::R_PPC64_PCREL34:
+ Kind = ppc64::Delta34;
+ break;
+ case ELF::R_PPC64_GOT_PCREL34:
+ Kind = ppc64::RequestGOTAndTransformToDelta34;
+ break;
+ case ELF::R_PPC64_GOT_TLSGD16_HA:
+ Kind = ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16HA;
+ break;
+ case ELF::R_PPC64_GOT_TLSGD16_LO:
+ Kind = ppc64::RequestTLSDescInGOTAndTransformToTOCDelta16LO;
+ break;
+ case ELF::R_PPC64_GOT_TLSGD_PCREL34:
+ Kind = ppc64::RequestTLSDescInGOTAndTransformToDelta34;
+ break;
}
Edge GE(Kind, Offset, *GraphSymbol, Addend);
@@ -252,7 +399,7 @@ public:
FileName, ppc64::getEdgeKindName) {}
};
-template <support::endianness Endianness>
+template <llvm::endianness Endianness>
class ELFJITLinker_ppc64 : public JITLinker<ELFJITLinker_ppc64<Endianness>> {
using JITLinkerBase = JITLinker<ELFJITLinker_ppc64<Endianness>>;
friend JITLinkerBase;
@@ -314,7 +461,7 @@ private:
}
};
-template <support::endianness Endianness>
+template <llvm::endianness Endianness>
Expected<std::unique_ptr<LinkGraph>>
createLinkGraphFromELFObject_ppc64(MemoryBufferRef ObjectBuffer) {
LLVM_DEBUG({
@@ -338,7 +485,7 @@ createLinkGraphFromELFObject_ppc64(MemoryBufferRef ObjectBuffer) {
.buildGraph();
}
-template <support::endianness Endianness>
+template <llvm::endianness Endianness>
void link_ELF_ppc64(std::unique_ptr<LinkGraph> G,
std::unique_ptr<JITLinkContext> Ctx) {
PassConfiguration Config;
@@ -346,7 +493,7 @@ void link_ELF_ppc64(std::unique_ptr<LinkGraph> G,
if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
// Construct a JITLinker and run the link function.
- // Add eh-frame passses.
+ // Add eh-frame passes.
Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame"));
Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
".eh_frame", G->getPointerSize(), ppc64::Pointer32, ppc64::Pointer64,
@@ -371,26 +518,26 @@ void link_ELF_ppc64(std::unique_ptr<LinkGraph> G,
Expected<std::unique_ptr<LinkGraph>>
createLinkGraphFromELFObject_ppc64(MemoryBufferRef ObjectBuffer) {
- return createLinkGraphFromELFObject_ppc64<support::big>(
+ return createLinkGraphFromELFObject_ppc64<llvm::endianness::big>(
std::move(ObjectBuffer));
}
Expected<std::unique_ptr<LinkGraph>>
createLinkGraphFromELFObject_ppc64le(MemoryBufferRef ObjectBuffer) {
- return createLinkGraphFromELFObject_ppc64<support::little>(
+ return createLinkGraphFromELFObject_ppc64<llvm::endianness::little>(
std::move(ObjectBuffer));
}
/// jit-link the given object buffer, which must be a ELF ppc64 object file.
void link_ELF_ppc64(std::unique_ptr<LinkGraph> G,
std::unique_ptr<JITLinkContext> Ctx) {
- return link_ELF_ppc64<support::big>(std::move(G), std::move(Ctx));
+ return link_ELF_ppc64<llvm::endianness::big>(std::move(G), std::move(Ctx));
}
/// jit-link the given object buffer, which must be a ELF ppc64le object file.
void link_ELF_ppc64le(std::unique_ptr<LinkGraph> G,
std::unique_ptr<JITLinkContext> Ctx) {
- return link_ELF_ppc64<support::little>(std::move(G), std::move(Ctx));
+ return link_ELF_ppc64<llvm::endianness::little>(std::move(G), std::move(Ctx));
}
} // end namespace llvm::jitlink
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
index 410dd7fedad1..d0701ba08bd9 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
@@ -11,10 +11,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/ELF_riscv.h"
+#include "EHFrameSupportImpl.h"
#include "ELFLinkGraphBuilder.h"
#include "JITLinkGeneric.h"
#include "PerGraphGOTAndPLTStubsBuilder.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/ExecutionEngine/JITLink/riscv.h"
#include "llvm/Object/ELF.h"
@@ -456,6 +458,13 @@ private:
case AlignRelaxable:
// Ignore when the relaxation pass did not run
break;
+ case NegDelta32: {
+ int64_t Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
+ if (LLVM_UNLIKELY(!isInRangeForImm(Value, 32)))
+ return makeTargetOutOfRangeError(G, B, E);
+ *(little32_t *)FixupPtr = static_cast<uint32_t>(Value);
+ break;
+ }
}
return Error::success();
}
@@ -516,8 +525,7 @@ static RelaxAux initRelaxAux(LinkGraph &G) {
RelaxAux Aux;
Aux.Config.IsRV32 = G.getTargetTriple().isRISCV32();
const auto &Features = G.getFeatures().getFeatures();
- Aux.Config.HasRVC =
- std::find(Features.begin(), Features.end(), "+c") != Features.end();
+ Aux.Config.HasRVC = llvm::is_contained(Features, "+c");
for (auto &S : G.sections()) {
if (!shouldRelax(S))
@@ -959,6 +967,13 @@ void link_ELF_riscv(std::unique_ptr<LinkGraph> G,
PassConfiguration Config;
const Triple &TT = G->getTargetTriple();
if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+
+ Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame"));
+ Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
+ ".eh_frame", G->getPointerSize(), Edge::Invalid, Edge::Invalid,
+ Edge::Invalid, Edge::Invalid, NegDelta32));
+ Config.PrePrunePasses.push_back(EHFrameNullTerminator(".eh_frame"));
+
if (auto MarkLive = Ctx->getMarkLivePass(TT))
Config.PrePrunePasses.push_back(std::move(MarkLive));
else
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
index 1bdddd4c722b..a1fe9c5fcd73 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
@@ -16,7 +16,6 @@
#include "llvm/ExecutionEngine/JITLink/TableManager.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Support/Endian.h"
#include "DefineExternalSectionStartAndEndSymbols.h"
#include "EHFrameSupportImpl.h"
@@ -242,8 +241,10 @@ public:
std::unique_ptr<LinkGraph> G,
PassConfiguration PassConfig)
: JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {
- getPassConfig().PostAllocationPasses.push_back(
- [this](LinkGraph &G) { return getOrCreateGOTSymbol(G); });
+
+ if (shouldAddDefaultTargetPasses(getGraph().getTargetTriple()))
+ getPassConfig().PostAllocationPasses.push_back(
+ [this](LinkGraph &G) { return getOrCreateGOTSymbol(G); });
}
private:
@@ -348,11 +349,11 @@ identifyELFSectionStartAndEndSymbols(LinkGraph &G, Symbol &Sym) {
constexpr StringRef EndSymbolPrefix = "__end";
auto SymName = Sym.getName();
- if (SymName.startswith(StartSymbolPrefix)) {
+ if (SymName.starts_with(StartSymbolPrefix)) {
if (auto *Sec =
G.findSectionByName(SymName.drop_front(StartSymbolPrefix.size())))
return {*Sec, true};
- } else if (SymName.startswith(EndSymbolPrefix)) {
+ } else if (SymName.starts_with(EndSymbolPrefix)) {
if (auto *Sec =
G.findSectionByName(SymName.drop_front(EndSymbolPrefix.size())))
return {*Sec, false};
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index 4a2755d3696b..d86ceb99ded0 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -13,6 +13,10 @@
#include "llvm/ExecutionEngine/JITLink/COFF.h"
#include "llvm/ExecutionEngine/JITLink/ELF.h"
#include "llvm/ExecutionEngine/JITLink/MachO.h"
+#include "llvm/ExecutionEngine/JITLink/aarch64.h"
+#include "llvm/ExecutionEngine/JITLink/i386.h"
+#include "llvm/ExecutionEngine/JITLink/loongarch.h"
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
@@ -417,6 +421,38 @@ Error makeAlignmentError(llvm::orc::ExecutorAddr Loc, uint64_t Value, int N,
" is not aligned to " + Twine(N) + " bytes");
}
+AnonymousPointerCreator getAnonymousPointerCreator(const Triple &TT) {
+ switch (TT.getArch()) {
+ case Triple::aarch64:
+ return aarch64::createAnonymousPointer;
+ case Triple::x86_64:
+ return x86_64::createAnonymousPointer;
+ case Triple::x86:
+ return i386::createAnonymousPointer;
+ case Triple::loongarch32:
+ case Triple::loongarch64:
+ return loongarch::createAnonymousPointer;
+ default:
+ return nullptr;
+ }
+}
+
+PointerJumpStubCreator getPointerJumpStubCreator(const Triple &TT) {
+ switch (TT.getArch()) {
+ case Triple::aarch64:
+ return aarch64::createAnonymousPointerJumpStub;
+ case Triple::x86_64:
+ return x86_64::createAnonymousPointerJumpStub;
+ case Triple::x86:
+ return i386::createAnonymousPointerJumpStub;
+ case Triple::loongarch32:
+ case Triple::loongarch64:
+ return loongarch::createAnonymousPointerJumpStub;
+ default:
+ return nullptr;
+ }
+}
+
Expected<std::unique_ptr<LinkGraph>>
createLinkGraphFromObject(MemoryBufferRef ObjectBuffer) {
auto Magic = identify_magic(ObjectBuffer.getBuffer());
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index feaa0fb6a58c..5361272ae79e 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -65,7 +65,7 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
if (AR)
Alloc = std::move(*AR);
else
- return abandonAllocAndBailOut(std::move(Self), AR.takeError());
+ return Ctx->notifyFailed(AR.takeError());
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName()
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
index e69eddd6e119..e5d05e6b1b7b 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -13,7 +13,6 @@
#ifndef LIB_EXECUTIONENGINE_JITLINK_JITLINKGENERIC_H
#define LIB_EXECUTIONENGINE_JITLINK_JITLINKGENERIC_H
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#define DEBUG_TYPE "jitlink"
@@ -43,6 +42,16 @@ protected:
using AllocResult = Expected<std::unique_ptr<InFlightAlloc>>;
using FinalizeResult = Expected<JITLinkMemoryManager::FinalizedAlloc>;
+ // Returns a reference to the graph being linked.
+ LinkGraph &getGraph() { return *G; }
+
+ // Returns true if the context says that the linker should add default
+ // passes. This can be used by JITLinkerBase implementations when deciding
+ // whether they should add default passes.
+ bool shouldAddDefaultTargetPasses(const Triple &TT) {
+ return Ctx->shouldAddDefaultTargetPasses(TT);
+ }
+
// Returns the PassConfiguration for this instance. This can be used by
// JITLinkerBase implementations to add late passes that reference their
// own data structures (e.g. for ELF implementations to locate / construct
@@ -124,8 +133,7 @@ private:
LLVM_DEBUG(dbgs() << "Fixing up blocks:\n");
for (auto &Sec : G.sections()) {
- bool NoAllocSection =
- Sec.getMemLifetimePolicy() == orc::MemLifetimePolicy::NoAlloc;
+ bool NoAllocSection = Sec.getMemLifetime() == orc::MemLifetime::NoAlloc;
for (auto *B : Sec.blocks()) {
LLVM_DEBUG(dbgs() << " " << *B << ":\n");
@@ -153,12 +161,11 @@ private:
// If B is a block in a Standard or Finalize section then make sure
// that no edges point to symbols in NoAlloc sections.
- assert(
- (NoAllocSection || !E.getTarget().isDefined() ||
- E.getTarget().getBlock().getSection().getMemLifetimePolicy() !=
- orc::MemLifetimePolicy::NoAlloc) &&
- "Block in allocated section has edge pointing to no-alloc "
- "section");
+ assert((NoAllocSection || !E.getTarget().isDefined() ||
+ E.getTarget().getBlock().getSection().getMemLifetime() !=
+ orc::MemLifetime::NoAlloc) &&
+ "Block in allocated section has edge pointing to no-alloc "
+ "section");
// Dispatch to LinkerImpl for fixup.
if (auto Err = impl().applyFixup(G, *B, E))
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index f481504135a5..474a0b5160bc 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -26,10 +26,10 @@ BasicLayout::BasicLayout(LinkGraph &G) : G(G) {
for (auto &Sec : G.sections()) {
// Skip empty sections, and sections with NoAlloc lifetime policies.
if (Sec.blocks().empty() ||
- Sec.getMemLifetimePolicy() == orc::MemLifetimePolicy::NoAlloc)
+ Sec.getMemLifetime() == orc::MemLifetime::NoAlloc)
continue;
- auto &Seg = Segments[{Sec.getMemProt(), Sec.getMemLifetimePolicy()}];
+ auto &Seg = Segments[{Sec.getMemProt(), Sec.getMemLifetime()}];
for (auto *B : Sec.blocks())
if (LLVM_LIKELY(!B->isZeroFill()))
Seg.ContentBlocks.push_back(B);
@@ -90,7 +90,7 @@ BasicLayout::getContiguousPageBasedLayoutSizes(uint64_t PageSize) {
inconvertibleErrorCode());
uint64_t SegSize = alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize);
- if (AG.getMemLifetimePolicy() == orc::MemLifetimePolicy::Standard)
+ if (AG.getMemLifetime() == orc::MemLifetime::Standard)
SegsSizes.StandardSegs += SegSize;
else
SegsSizes.FinalizeSegs += SegSize;
@@ -155,8 +155,8 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr,
"__---.finalize", "__R--.finalize", "__-W-.finalize", "__RW-.finalize",
"__--X.finalize", "__R-X.finalize", "__-WX.finalize", "__RWX.finalize"};
- auto G =
- std::make_unique<LinkGraph>("", Triple(), 0, support::native, nullptr);
+ auto G = std::make_unique<LinkGraph>("", Triple(), 0,
+ llvm::endianness::native, nullptr);
orc::AllocGroupSmallMap<Block *> ContentBlocks;
orc::ExecutorAddr NextAddr(0x100000);
@@ -164,15 +164,15 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr,
auto &AG = KV.first;
auto &Seg = KV.second;
- assert(AG.getMemLifetimePolicy() != orc::MemLifetimePolicy::NoAlloc &&
+ assert(AG.getMemLifetime() != orc::MemLifetime::NoAlloc &&
"NoAlloc segments are not supported by SimpleSegmentAlloc");
auto AGSectionName =
AGSectionNames[static_cast<unsigned>(AG.getMemProt()) |
- static_cast<bool>(AG.getMemLifetimePolicy()) << 3];
+ static_cast<bool>(AG.getMemLifetime()) << 3];
auto &Sec = G->createSection(AGSectionName, AG.getMemProt());
- Sec.setMemLifetimePolicy(AG.getMemLifetimePolicy());
+ Sec.setMemLifetime(AG.getMemLifetime());
if (Seg.ContentSize != 0) {
NextAddr =
@@ -419,10 +419,9 @@ void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G,
auto &AG = KV.first;
auto &Seg = KV.second;
- auto &SegAddr =
- (AG.getMemLifetimePolicy() == orc::MemLifetimePolicy::Standard)
- ? NextStandardSegAddr
- : NextFinalizeSegAddr;
+ auto &SegAddr = (AG.getMemLifetime() == orc::MemLifetime::Standard)
+ ? NextStandardSegAddr
+ : NextFinalizeSegAddr;
Seg.WorkingMem = SegAddr.toPtr<char *>();
Seg.Addr = SegAddr;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
index c40e0f9ffc8d..bb21f633d982 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Generic MachO LinkGraph buliding code.
+// Generic MachO LinkGraph building code.
//
//===----------------------------------------------------------------------===//
@@ -73,7 +73,7 @@ Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) {
Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) {
if (Type & MachO::N_EXT) {
- if ((Type & MachO::N_PEXT) || Name.startswith("l"))
+ if ((Type & MachO::N_PEXT) || Name.starts_with("l"))
return Scope::Hidden;
else
return Scope::Default;
@@ -106,9 +106,10 @@ MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
return Obj.is64Bit() ? 8 : 4;
}
-support::endianness
+llvm::endianness
MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
- return Obj.isLittleEndian() ? support::little : support::big;
+ return Obj.isLittleEndian() ? llvm::endianness::little
+ : llvm::endianness::big;
}
Section &MachOLinkGraphBuilder::getCommonSection() {
@@ -192,7 +193,7 @@ Error MachOLinkGraphBuilder::createNormalizedSections() {
// TODO: Are there any other criteria for NoAlloc lifetime?
if (NSec.Flags & MachO::S_ATTR_DEBUG)
- NSec.GraphSection->setMemLifetimePolicy(orc::MemLifetimePolicy::NoAlloc);
+ NSec.GraphSection->setMemLifetime(orc::MemLifetime::NoAlloc);
IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index 2805c2960b9b..a4ae0ac1ecfc 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -181,7 +181,7 @@ protected:
private:
static unsigned getPointerSize(const object::MachOObjectFile &Obj);
- static support::endianness getEndianness(const object::MachOObjectFile &Obj);
+ static llvm::endianness getEndianness(const object::MachOObjectFile &Obj);
void setCanonicalSymbol(NormalizedSection &NSec, Symbol &Sym) {
auto *&CanonicalSymEntry = NSec.CanonicalSymbols[Sym.getAddress()];
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index dd0b5d37d1b7..409bec7a874b 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -567,7 +567,7 @@ void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
Config.PrePrunePasses.push_back(
CompactUnwindSplitter("__LD,__compact_unwind"));
- // Add eh-frame passses.
+ // Add eh-frame passes.
// FIXME: Prune eh-frames for which compact-unwind is available once
// we support compact-unwind registration with libunwind.
Config.PrePrunePasses.push_back(createEHFrameSplitterPass_MachO_arm64());
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index 4dba27bc61cb..49f619357f08 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -482,7 +482,7 @@ void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
PassConfiguration Config;
if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
- // Add eh-frame passses.
+ // Add eh-frame passes.
Config.PrePrunePasses.push_back(createEHFrameSplitterPass_MachO_x86_64());
Config.PrePrunePasses.push_back(createEHFrameEdgeFixerPass_MachO_x86_64());
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
index 83829dde0508..671ee1a81252 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
@@ -17,6 +17,7 @@
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#define DEBUG_TYPE "jitlink"
@@ -25,6 +26,11 @@ namespace llvm {
namespace jitlink {
namespace aarch32 {
+/// Check whether the given target flags are set for this Symbol.
+bool hasTargetFlags(Symbol &Sym, TargetFlagsType Flags) {
+ return static_cast<TargetFlagsType>(Sym.getTargetFlags()) & Flags;
+}
+
/// Encode 22-bit immediate value for branch instructions without J1J2 range
/// extension (formats B T4, BL T1 and BLX T2).
///
@@ -78,6 +84,24 @@ int64_t decodeImmBT4BlT1BlxT2_J1J2(uint32_t Hi, uint32_t Lo) {
return SignExtend64<25>(S << 14 | I1 | I2 | Imm10 << 12 | Imm11 << 1);
}
+/// Encode 26-bit immediate value for branch instructions
+/// (formats B A1, BL A1 and BLX A2).
+///
+/// Imm24:00 -> 00000000:Imm24
+///
+uint32_t encodeImmBA1BlA1BlxA2(int64_t Value) {
+ return (Value >> 2) & 0x00ffffff;
+}
+
+/// Decode 26-bit immediate value for branch instructions
+/// (formats B A1, BL A1 and BLX A2).
+///
+/// 00000000:Imm24 -> Imm24:00
+///
+int64_t decodeImmBA1BlA1BlxA2(int64_t Value) {
+ return SignExtend64<26>((Value & 0x00ffffff) << 2);
+}
+
/// Encode 16-bit immediate value for move instruction formats MOVT T1 and
/// MOVW T3.
///
@@ -124,6 +148,50 @@ int64_t decodeRegMovtT1MovwT3(uint32_t Hi, uint32_t Lo) {
return Rd4;
}
+/// Encode 16-bit immediate value for move instruction formats MOVT A1 and
+/// MOVW A2.
+///
+/// Imm4:Imm12 -> 000000000000:Imm4:0000:Imm12
+///
+uint32_t encodeImmMovtA1MovwA2(uint16_t Value) {
+ uint32_t Imm4 = (Value >> 12) & 0x0f;
+ uint32_t Imm12 = Value & 0x0fff;
+ return (Imm4 << 16) | Imm12;
+}
+
+/// Decode 16-bit immediate value for move instruction formats MOVT A1 and
+/// MOVW A2.
+///
+/// 000000000000:Imm4:0000:Imm12 -> Imm4:Imm12
+///
+uint16_t decodeImmMovtA1MovwA2(uint64_t Value) {
+ uint32_t Imm4 = (Value >> 16) & 0x0f;
+ uint32_t Imm12 = Value & 0x0fff;
+ return (Imm4 << 12) | Imm12;
+}
+
+/// Encode register ID for instruction formats MOVT A1 and
+/// MOVW A2.
+///
+/// Rd4 -> 0000000000000000:Rd4:000000000000
+///
+uint32_t encodeRegMovtA1MovwA2(int64_t Value) {
+ uint32_t Rd4 = (Value & 0x00000f) << 12;
+ return Rd4;
+}
+
+/// Decode register ID for instruction formats MOVT A1 and
+/// MOVW A2.
+///
+/// 0000000000000000:Rd4:000000000000 -> Rd4
+///
+int64_t decodeRegMovtA1MovwA2(uint64_t Value) {
+ uint32_t Rd4 = (Value >> 12) & 0x00000f;
+ return Rd4;
+}
+
+namespace {
+
/// 32-bit Thumb instructions are stored as two little-endian halfwords.
/// An instruction at address A encodes bytes A+1, A in the first halfword (Hi),
/// followed by bytes A+3, A+2 in the second halfword (Lo).
@@ -151,18 +219,126 @@ struct ThumbRelocation {
const support::ulittle16_t &Lo; // Second halfword
};
+struct WritableArmRelocation {
+ WritableArmRelocation(char *FixupPtr)
+ : Wd{*reinterpret_cast<support::ulittle32_t *>(FixupPtr)} {}
+
+ support::ulittle32_t &Wd;
+};
+
+struct ArmRelocation {
+ ArmRelocation(const char *FixupPtr)
+ : Wd{*reinterpret_cast<const support::ulittle32_t *>(FixupPtr)} {}
+
+ ArmRelocation(WritableArmRelocation &Writable) : Wd{Writable.Wd} {}
+
+ const support::ulittle32_t &Wd;
+};
+
Error makeUnexpectedOpcodeError(const LinkGraph &G, const ThumbRelocation &R,
Edge::Kind Kind) {
return make_error<JITLinkError>(
- formatv("Invalid opcode [ 0x{0:x4}, 0x{1:x4} ] for relocation: {2}",
+ formatv("Invalid opcode [ {0:x4}, {1:x4} ] for relocation: {2}",
static_cast<uint16_t>(R.Hi), static_cast<uint16_t>(R.Lo),
G.getEdgeKindName(Kind)));
}
-template <EdgeKind_aarch32 Kind> bool checkOpcode(const ThumbRelocation &R) {
- uint16_t Hi = R.Hi & FixupInfo<Kind>::OpcodeMask.Hi;
- uint16_t Lo = R.Lo & FixupInfo<Kind>::OpcodeMask.Lo;
- return Hi == FixupInfo<Kind>::Opcode.Hi && Lo == FixupInfo<Kind>::Opcode.Lo;
+Error makeUnexpectedOpcodeError(const LinkGraph &G, const ArmRelocation &R,
+ Edge::Kind Kind) {
+ return make_error<JITLinkError>(
+ formatv("Invalid opcode {0:x8} for relocation: {1}",
+ static_cast<uint32_t>(R.Wd), G.getEdgeKindName(Kind)));
+}
+
+template <EdgeKind_aarch32 K> constexpr bool isArm() {
+ return FirstArmRelocation <= K && K <= LastArmRelocation;
+}
+template <EdgeKind_aarch32 K> constexpr bool isThumb() {
+ return FirstThumbRelocation <= K && K <= LastThumbRelocation;
+}
+
+template <EdgeKind_aarch32 K> static bool checkOpcodeArm(uint32_t Wd) {
+ return (Wd & FixupInfo<K>::OpcodeMask) == FixupInfo<K>::Opcode;
+}
+
+template <EdgeKind_aarch32 K>
+static bool checkOpcodeThumb(uint16_t Hi, uint16_t Lo) {
+ return (Hi & FixupInfo<K>::OpcodeMask.Hi) == FixupInfo<K>::Opcode.Hi &&
+ (Lo & FixupInfo<K>::OpcodeMask.Lo) == FixupInfo<K>::Opcode.Lo;
+}
+
+class FixupInfoTable {
+ static constexpr size_t Items = LastRelocation + 1;
+
+public:
+ FixupInfoTable() {
+ populateEntries<FirstArmRelocation, LastArmRelocation>();
+ populateEntries<FirstThumbRelocation, LastThumbRelocation>();
+ }
+
+ const FixupInfoBase *getEntry(Edge::Kind K) {
+ assert(K < Data.size() && "Index out of bounds");
+ return Data.at(K).get();
+ }
+
+private:
+ template <EdgeKind_aarch32 K, EdgeKind_aarch32 LastK> void populateEntries() {
+ assert(K < Data.size() && "Index out of range");
+ assert(Data.at(K) == nullptr && "Initialized entries are immutable");
+ Data[K] = initEntry<K>();
+ if constexpr (K < LastK) {
+ constexpr auto Next = static_cast<EdgeKind_aarch32>(K + 1);
+ populateEntries<Next, LastK>();
+ }
+ }
+
+ template <EdgeKind_aarch32 K>
+ static std::unique_ptr<FixupInfoBase> initEntry() {
+ auto Entry = std::make_unique<FixupInfo<K>>();
+ static_assert(isArm<K>() != isThumb<K>(), "Classes are mutually exclusive");
+ if constexpr (isArm<K>())
+ Entry->checkOpcode = checkOpcodeArm<K>;
+ if constexpr (isThumb<K>())
+ Entry->checkOpcode = checkOpcodeThumb<K>;
+ return Entry;
+ }
+
+private:
+ std::array<std::unique_ptr<FixupInfoBase>, Items> Data;
+};
+
+ManagedStatic<FixupInfoTable> DynFixupInfos;
+
+} // namespace
+
+static Error checkOpcode(LinkGraph &G, const ArmRelocation &R,
+ Edge::Kind Kind) {
+ assert(Kind >= FirstArmRelocation && Kind <= LastArmRelocation &&
+ "Edge kind must be Arm relocation");
+ const FixupInfoBase *Entry = DynFixupInfos->getEntry(Kind);
+ const FixupInfoArm &Info = *static_cast<const FixupInfoArm *>(Entry);
+ assert(Info.checkOpcode && "Opcode check is mandatory for Arm edges");
+ if (!Info.checkOpcode(R.Wd))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+
+ return Error::success();
+}
+
+static Error checkOpcode(LinkGraph &G, const ThumbRelocation &R,
+ Edge::Kind Kind) {
+ assert(Kind >= FirstThumbRelocation && Kind <= LastThumbRelocation &&
+ "Edge kind must be Thumb relocation");
+ const FixupInfoBase *Entry = DynFixupInfos->getEntry(Kind);
+ const FixupInfoThumb &Info = *static_cast<const FixupInfoThumb *>(Entry);
+ assert(Info.checkOpcode && "Opcode check is mandatory for Thumb edges");
+ if (!Info.checkOpcode(R.Hi, R.Lo))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+
+ return Error::success();
+}
+
+const FixupInfoBase *FixupInfoBase::getDynFixupInfo(Edge::Kind K) {
+ return DynFixupInfos->getEntry(K);
}
template <EdgeKind_aarch32 Kind>
@@ -173,30 +349,48 @@ bool checkRegister(const ThumbRelocation &R, HalfWords Reg) {
}
template <EdgeKind_aarch32 Kind>
+bool checkRegister(const ArmRelocation &R, uint32_t Reg) {
+ uint32_t Wd = R.Wd & FixupInfo<Kind>::RegMask;
+ return Wd == Reg;
+}
+
+template <EdgeKind_aarch32 Kind>
void writeRegister(WritableThumbRelocation &R, HalfWords Reg) {
static constexpr HalfWords Mask = FixupInfo<Kind>::RegMask;
- assert((Mask.Hi & Reg.Hi) == Reg.Hi && (Mask.Hi & Reg.Hi) == Reg.Hi &&
+ assert((Mask.Hi & Reg.Hi) == Reg.Hi && (Mask.Lo & Reg.Lo) == Reg.Lo &&
"Value bits exceed bit range of given mask");
R.Hi = (R.Hi & ~Mask.Hi) | Reg.Hi;
R.Lo = (R.Lo & ~Mask.Lo) | Reg.Lo;
}
template <EdgeKind_aarch32 Kind>
+void writeRegister(WritableArmRelocation &R, uint32_t Reg) {
+ static constexpr uint32_t Mask = FixupInfo<Kind>::RegMask;
+ assert((Mask & Reg) == Reg && "Value bits exceed bit range of given mask");
+ R.Wd = (R.Wd & ~Mask) | Reg;
+}
+
+template <EdgeKind_aarch32 Kind>
void writeImmediate(WritableThumbRelocation &R, HalfWords Imm) {
static constexpr HalfWords Mask = FixupInfo<Kind>::ImmMask;
- assert((Mask.Hi & Imm.Hi) == Imm.Hi && (Mask.Hi & Imm.Hi) == Imm.Hi &&
+ assert((Mask.Hi & Imm.Hi) == Imm.Hi && (Mask.Lo & Imm.Lo) == Imm.Lo &&
"Value bits exceed bit range of given mask");
R.Hi = (R.Hi & ~Mask.Hi) | Imm.Hi;
R.Lo = (R.Lo & ~Mask.Lo) | Imm.Lo;
}
-Expected<int64_t> readAddendData(LinkGraph &G, Block &B, const Edge &E) {
- support::endianness Endian = G.getEndianness();
- assert(Endian != support::native && "Declare as little or big explicitly");
+template <EdgeKind_aarch32 Kind>
+void writeImmediate(WritableArmRelocation &R, uint32_t Imm) {
+ static constexpr uint32_t Mask = FixupInfo<Kind>::ImmMask;
+ assert((Mask & Imm) == Imm && "Value bits exceed bit range of given mask");
+ R.Wd = (R.Wd & ~Mask) | Imm;
+}
- Edge::Kind Kind = E.getKind();
+Expected<int64_t> readAddendData(LinkGraph &G, Block &B, Edge::OffsetT Offset,
+ Edge::Kind Kind) {
+ endianness Endian = G.getEndianness();
const char *BlockWorkingMem = B.getContent().data();
- const char *FixupPtr = BlockWorkingMem + E.getOffset();
+ const char *FixupPtr = BlockWorkingMem + Offset;
switch (Kind) {
case Data_Delta32:
@@ -206,59 +400,53 @@ Expected<int64_t> readAddendData(LinkGraph &G, Block &B, const Edge &E) {
return make_error<JITLinkError>(
"In graph " + G.getName() + ", section " + B.getSection().getName() +
" can not read implicit addend for aarch32 edge kind " +
- G.getEdgeKindName(E.getKind()));
+ G.getEdgeKindName(Kind));
}
}
-Expected<int64_t> readAddendArm(LinkGraph &G, Block &B, const Edge &E) {
- Edge::Kind Kind = E.getKind();
+Expected<int64_t> readAddendArm(LinkGraph &G, Block &B, Edge::OffsetT Offset,
+ Edge::Kind Kind) {
+ ArmRelocation R(B.getContent().data() + Offset);
+ if (Error Err = checkOpcode(G, R, Kind))
+ return std::move(Err);
switch (Kind) {
case Arm_Call:
- return make_error<JITLinkError>(
- "Addend extraction for relocation type not yet implemented: " +
- StringRef(G.getEdgeKindName(Kind)));
+ case Arm_Jump24:
+ return decodeImmBA1BlA1BlxA2(R.Wd);
+
+ case Arm_MovtAbs:
+ case Arm_MovwAbsNC:
+ return decodeImmMovtA1MovwA2(R.Wd);
+
default:
return make_error<JITLinkError>(
"In graph " + G.getName() + ", section " + B.getSection().getName() +
" can not read implicit addend for aarch32 edge kind " +
- G.getEdgeKindName(E.getKind()));
+ G.getEdgeKindName(Kind));
}
}
-Expected<int64_t> readAddendThumb(LinkGraph &G, Block &B, const Edge &E,
- const ArmConfig &ArmCfg) {
- ThumbRelocation R(B.getContent().data() + E.getOffset());
- Edge::Kind Kind = E.getKind();
+Expected<int64_t> readAddendThumb(LinkGraph &G, Block &B, Edge::OffsetT Offset,
+ Edge::Kind Kind, const ArmConfig &ArmCfg) {
+ ThumbRelocation R(B.getContent().data() + Offset);
+ if (Error Err = checkOpcode(G, R, Kind))
+ return std::move(Err);
switch (Kind) {
case Thumb_Call:
- if (!checkOpcode<Thumb_Call>(R))
- return makeUnexpectedOpcodeError(G, R, Kind);
+ case Thumb_Jump24:
return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)
? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo)
: decodeImmBT4BlT1BlxT2(R.Hi, R.Lo);
- case Thumb_Jump24:
- if (!checkOpcode<Thumb_Jump24>(R))
- return makeUnexpectedOpcodeError(G, R, Kind);
- if (R.Lo & FixupInfo<Thumb_Jump24>::LoBitConditional)
- return make_error<JITLinkError>("Relocation expects an unconditional "
- "B.W branch instruction: " +
- StringRef(G.getEdgeKindName(Kind)));
- return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)
- ? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo)
- : decodeImmBT4BlT1BlxT2(R.Hi, R.Lo);
-
case Thumb_MovwAbsNC:
- if (!checkOpcode<Thumb_MovwAbsNC>(R))
- return makeUnexpectedOpcodeError(G, R, Kind);
+ case Thumb_MovwPrelNC:
// Initial addend is interpreted as a signed value
return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo));
case Thumb_MovtAbs:
- if (!checkOpcode<Thumb_MovtAbs>(R))
- return makeUnexpectedOpcodeError(G, R, Kind);
+ case Thumb_MovtPrel:
// Initial addend is interpreted as a signed value
return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo));
@@ -266,7 +454,7 @@ Expected<int64_t> readAddendThumb(LinkGraph &G, Block &B, const Edge &E,
return make_error<JITLinkError>(
"In graph " + G.getName() + ", section " + B.getSection().getName() +
" can not read implicit addend for aarch32 edge kind " +
- G.getEdgeKindName(E.getKind()));
+ G.getEdgeKindName(Kind));
}
}
@@ -277,13 +465,12 @@ Error applyFixupData(LinkGraph &G, Block &B, const Edge &E) {
char *FixupPtr = BlockWorkingMem + E.getOffset();
auto Write32 = [FixupPtr, Endian = G.getEndianness()](int64_t Value) {
- assert(Endian != native && "Must be explicit: little or big");
assert(isInt<32>(Value) && "Must be in signed 32-bit range");
uint32_t Imm = static_cast<int32_t>(Value);
- if (LLVM_LIKELY(Endian == little))
- endian::write32<little>(FixupPtr, Imm);
+ if (LLVM_LIKELY(Endian == endianness::little))
+ endian::write32<endianness::little>(FixupPtr, Imm);
else
- endian::write32<big>(FixupPtr, Imm);
+ endian::write32<endianness::big>(FixupPtr, Imm);
};
Edge::Kind Kind = E.getKind();
@@ -291,7 +478,6 @@ Error applyFixupData(LinkGraph &G, Block &B, const Edge &E) {
int64_t Addend = E.getAddend();
Symbol &TargetSymbol = E.getTarget();
uint64_t TargetAddress = TargetSymbol.getAddress().getValue();
- assert(!TargetSymbol.hasTargetFlags(ThumbSymbol));
// Regular data relocations have size 4, alignment 1 and write the full 32-bit
// result to the place; no need for overflow checking. There are three
@@ -320,13 +506,71 @@ Error applyFixupData(LinkGraph &G, Block &B, const Edge &E) {
}
Error applyFixupArm(LinkGraph &G, Block &B, const Edge &E) {
+ WritableArmRelocation R(B.getAlreadyMutableContent().data() + E.getOffset());
Edge::Kind Kind = E.getKind();
+ if (Error Err = checkOpcode(G, R, Kind))
+ return Err;
+
+ uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue();
+ int64_t Addend = E.getAddend();
+ Symbol &TargetSymbol = E.getTarget();
+ uint64_t TargetAddress = TargetSymbol.getAddress().getValue();
switch (Kind) {
- case Arm_Call:
- return make_error<JITLinkError>(
- "Fix-up for relocation type not yet implemented: " +
- StringRef(G.getEdgeKindName(Kind)));
+ case Arm_Jump24: {
+ if (hasTargetFlags(TargetSymbol, ThumbSymbol))
+ return make_error<JITLinkError>("Branch relocation needs interworking "
+ "stub when bridging to Thumb: " +
+ StringRef(G.getEdgeKindName(Kind)));
+
+ int64_t Value = TargetAddress - FixupAddress + Addend;
+
+ if (!isInt<26>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+ writeImmediate<Arm_Jump24>(R, encodeImmBA1BlA1BlxA2(Value));
+
+ return Error::success();
+ }
+ case Arm_Call: {
+ if ((R.Wd & FixupInfo<Arm_Call>::CondMask) !=
+ FixupInfo<Arm_Call>::Unconditional)
+ return make_error<JITLinkError>("Relocation expects an unconditional "
+ "BL/BLX branch instruction: " +
+ StringRef(G.getEdgeKindName(Kind)));
+
+ int64_t Value = TargetAddress - FixupAddress + Addend;
+
+ // The call instruction itself is Arm. The call destination can either be
+ // Thumb or Arm. We use BL to stay in Arm and BLX to change to Thumb.
+ bool TargetIsThumb = hasTargetFlags(TargetSymbol, ThumbSymbol);
+ bool InstrIsBlx = (~R.Wd & FixupInfo<Arm_Call>::BitBlx) == 0;
+ if (TargetIsThumb != InstrIsBlx) {
+ if (LLVM_LIKELY(TargetIsThumb)) {
+ // Change opcode BL -> BLX
+ R.Wd = R.Wd | FixupInfo<Arm_Call>::BitBlx;
+ R.Wd = R.Wd & ~FixupInfo<Arm_Call>::BitH;
+ } else {
+ // Change opcode BLX -> BL
+ R.Wd = R.Wd & ~FixupInfo<Arm_Call>::BitBlx;
+ }
+ }
+
+ if (!isInt<26>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+ writeImmediate<Arm_Call>(R, encodeImmBA1BlA1BlxA2(Value));
+
+ return Error::success();
+ }
+ case Arm_MovwAbsNC: {
+ uint16_t Value = (TargetAddress + Addend) & 0xffff;
+ writeImmediate<Arm_MovwAbsNC>(R, encodeImmMovtA1MovwA2(Value));
+ return Error::success();
+ }
+ case Arm_MovtAbs: {
+ uint16_t Value = ((TargetAddress + Addend) >> 16) & 0xffff;
+ writeImmediate<Arm_MovtAbs>(R, encodeImmMovtA1MovwA2(Value));
+ return Error::success();
+ }
default:
return make_error<JITLinkError>(
"In graph " + G.getName() + ", section " + B.getSection().getName() +
@@ -339,24 +583,18 @@ Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E,
const ArmConfig &ArmCfg) {
WritableThumbRelocation R(B.getAlreadyMutableContent().data() +
E.getOffset());
-
Edge::Kind Kind = E.getKind();
+ if (Error Err = checkOpcode(G, R, Kind))
+ return Err;
+
uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue();
int64_t Addend = E.getAddend();
Symbol &TargetSymbol = E.getTarget();
uint64_t TargetAddress = TargetSymbol.getAddress().getValue();
- if (TargetSymbol.hasTargetFlags(ThumbSymbol))
- TargetAddress |= 0x01;
switch (Kind) {
case Thumb_Jump24: {
- if (!checkOpcode<Thumb_Jump24>(R))
- return makeUnexpectedOpcodeError(G, R, Kind);
- if (R.Lo & FixupInfo<Thumb_Jump24>::LoBitConditional)
- return make_error<JITLinkError>("Relocation expects an unconditional "
- "B.W branch instruction: " +
- StringRef(G.getEdgeKindName(Kind)));
- if (!(TargetSymbol.hasTargetFlags(ThumbSymbol)))
+ if (!hasTargetFlags(TargetSymbol, ThumbSymbol))
return make_error<JITLinkError>("Branch relocation needs interworking "
"stub when bridging to ARM: " +
StringRef(G.getEdgeKindName(Kind)));
@@ -376,27 +614,22 @@ Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E,
}
case Thumb_Call: {
- if (!checkOpcode<Thumb_Call>(R))
- return makeUnexpectedOpcodeError(G, R, Kind);
-
int64_t Value = TargetAddress - FixupAddress + Addend;
// The call instruction itself is Thumb. The call destination can either be
// Thumb or Arm. We use BL to stay in Thumb and BLX to change to Arm.
- bool TargetIsArm = !TargetSymbol.hasTargetFlags(ThumbSymbol);
+ bool TargetIsArm = !hasTargetFlags(TargetSymbol, ThumbSymbol);
bool InstrIsBlx = (R.Lo & FixupInfo<Thumb_Call>::LoBitNoBlx) == 0;
if (TargetIsArm != InstrIsBlx) {
if (LLVM_LIKELY(TargetIsArm)) {
- // Change opcode BL -> BLX and fix range value (account for 4-byte
+ // Change opcode BL -> BLX and fix range value: account for 4-byte
// aligned destination while instruction may only be 2-byte aligned
- // and clear Thumb bit).
R.Lo = R.Lo & ~FixupInfo<Thumb_Call>::LoBitNoBlx;
R.Lo = R.Lo & ~FixupInfo<Thumb_Call>::LoBitH;
Value = alignTo(Value, 4);
} else {
- // Change opcode BLX -> BL and set Thumb bit
+ // Change opcode BLX -> BL
R.Lo = R.Lo & ~FixupInfo<Thumb_Call>::LoBitNoBlx;
- Value |= 0x01;
}
}
@@ -417,20 +650,25 @@ Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E,
}
case Thumb_MovwAbsNC: {
- if (!checkOpcode<Thumb_MovwAbsNC>(R))
- return makeUnexpectedOpcodeError(G, R, Kind);
uint16_t Value = (TargetAddress + Addend) & 0xffff;
writeImmediate<Thumb_MovwAbsNC>(R, encodeImmMovtT1MovwT3(Value));
return Error::success();
}
-
case Thumb_MovtAbs: {
- if (!checkOpcode<Thumb_MovtAbs>(R))
- return makeUnexpectedOpcodeError(G, R, Kind);
uint16_t Value = ((TargetAddress + Addend) >> 16) & 0xffff;
writeImmediate<Thumb_MovtAbs>(R, encodeImmMovtT1MovwT3(Value));
return Error::success();
}
+ case Thumb_MovwPrelNC: {
+ uint16_t Value = ((TargetAddress + Addend - FixupAddress) & 0xffff);
+ writeImmediate<Thumb_MovwPrelNC>(R, encodeImmMovtT1MovwT3(Value));
+ return Error::success();
+ }
+ case Thumb_MovtPrel: {
+ uint16_t Value = (((TargetAddress + Addend - FixupAddress) >> 16) & 0xffff);
+ writeImmediate<Thumb_MovtPrel>(R, encodeImmMovtT1MovwT3(Value));
+ return Error::success();
+ }
default:
return make_error<JITLinkError>(
@@ -471,11 +709,17 @@ const char *getEdgeKindName(Edge::Kind K) {
switch (K) {
KIND_NAME_CASE(Data_Delta32)
+ KIND_NAME_CASE(Data_Pointer32)
KIND_NAME_CASE(Arm_Call)
+ KIND_NAME_CASE(Arm_Jump24)
+ KIND_NAME_CASE(Arm_MovwAbsNC)
+ KIND_NAME_CASE(Arm_MovtAbs)
KIND_NAME_CASE(Thumb_Call)
KIND_NAME_CASE(Thumb_Jump24)
KIND_NAME_CASE(Thumb_MovwAbsNC)
KIND_NAME_CASE(Thumb_MovtAbs)
+ KIND_NAME_CASE(Thumb_MovwPrelNC)
+ KIND_NAME_CASE(Thumb_MovtPrel)
default:
return getGenericEdgeKindName(K);
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp
index 4e21eace21d0..27484aaf2059 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp
@@ -64,8 +64,36 @@ const char *getEdgeKindName(Edge::Kind K) {
return "Pointer64";
case Pointer32:
return "Pointer32";
+ case Pointer16:
+ return "Pointer16";
+ case Pointer16DS:
+ return "Pointer16DS";
+ case Pointer16HA:
+ return "Pointer16HA";
+ case Pointer16HI:
+ return "Pointer16HI";
+ case Pointer16HIGH:
+ return "Pointer16HIGH";
+ case Pointer16HIGHA:
+ return "Pointer16HIGHA";
+ case Pointer16HIGHER:
+ return "Pointer16HIGHER";
+ case Pointer16HIGHERA:
+ return "Pointer16HIGHERA";
+ case Pointer16HIGHEST:
+ return "Pointer16HIGHEST";
+ case Pointer16HIGHESTA:
+ return "Pointer16HIGHESTA";
+ case Pointer16LO:
+ return "Pointer16LO";
+ case Pointer16LODS:
+ return "Pointer16LODS";
+ case Pointer14:
+ return "Pointer14";
case Delta64:
return "Delta64";
+ case Delta34:
+ return "Delta34";
case Delta32:
return "Delta32";
case NegDelta32:
@@ -74,26 +102,40 @@ const char *getEdgeKindName(Edge::Kind K) {
return "Delta16";
case Delta16HA:
return "Delta16HA";
+ case Delta16HI:
+ return "Delta16HI";
case Delta16LO:
return "Delta16LO";
+ case TOC:
+ return "TOC";
+ case TOCDelta16:
+ return "TOCDelta16";
+ case TOCDelta16DS:
+ return "TOCDelta16DS";
case TOCDelta16HA:
return "TOCDelta16HA";
+ case TOCDelta16HI:
+ return "TOCDelta16HI";
case TOCDelta16LO:
return "TOCDelta16LO";
- case TOCDelta16DS:
- return "TOCDelta16DS";
case TOCDelta16LODS:
return "TOCDelta16LODS";
+ case RequestGOTAndTransformToDelta34:
+ return "RequestGOTAndTransformToDelta34";
case CallBranchDelta:
return "CallBranchDelta";
case CallBranchDeltaRestoreTOC:
return "CallBranchDeltaRestoreTOC";
- case RequestPLTCallStub:
- return "RequestPLTCallStub";
- case RequestPLTCallStubSaveTOC:
- return "RequestPLTCallStubSaveTOC";
- case RequestPLTCallStubNoTOC:
- return "RequestPLTCallStubNoTOC";
+ case RequestCall:
+ return "RequestCall";
+ case RequestCallNoTOC:
+ return "RequestCallNoTOC";
+ case RequestTLSDescInGOTAndTransformToTOCDelta16HA:
+ return "RequestTLSDescInGOTAndTransformToTOCDelta16HA";
+ case RequestTLSDescInGOTAndTransformToTOCDelta16LO:
+ return "RequestTLSDescInGOTAndTransformToTOCDelta16LO";
+ case RequestTLSDescInGOTAndTransformToDelta34:
+ return "RequestTLSDescInGOTAndTransformToDelta34";
default:
return getGenericEdgeKindName(static_cast<Edge::Kind>(K));
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
index a78843b16147..a4e4daef97fb 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
@@ -82,6 +82,8 @@ const char *getEdgeKindName(Edge::Kind K) {
return "CallRelaxable";
case AlignRelaxable:
return "AlignRelaxable";
+ case NegDelta32:
+ return "NegDelta32";
}
return getGenericEdgeKindName(K);
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp
index 7c869bead0b0..c8f5a99099ea 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp
@@ -54,13 +54,13 @@ public:
void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
unsigned PointerSize;
- support::endianness Endianness;
+ llvm::endianness Endianness;
const auto &TT = CP.getExecutionSession().getTargetTriple();
switch (TT.getArch()) {
case Triple::x86_64:
PointerSize = 8;
- Endianness = support::endianness::little;
+ Endianness = llvm::endianness::little;
break;
default:
llvm_unreachable("Unrecognized architecture");
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 0c23f2b25219..56838e9bc86d 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -31,6 +31,7 @@ char SymbolsCouldNotBeRemoved::ID = 0;
char MissingSymbolDefinitions::ID = 0;
char UnexpectedSymbolDefinitions::ID = 0;
char MaterializationTask::ID = 0;
+char LookupTask::ID = 0;
RegisterDependenciesFunction NoDependenciesToRegister =
RegisterDependenciesFunction();
@@ -348,7 +349,7 @@ void ReExportsMaterializationUnit::materialize(
}
}
- // The OnResolveInfo struct will hold the aliases and responsibilty for each
+ // The OnResolveInfo struct will hold the aliases and responsibility for each
// query in the list.
struct OnResolveInfo {
OnResolveInfo(std::unique_ptr<MaterializationResponsibility> R,
@@ -529,11 +530,16 @@ public:
SymbolLookupSet LookupSet;
SymbolState RequiredState;
- std::unique_lock<std::mutex> GeneratorLock;
size_t CurSearchOrderIndex = 0;
bool NewJITDylib = true;
SymbolLookupSet DefGeneratorCandidates;
SymbolLookupSet DefGeneratorNonCandidates;
+
+ enum {
+ NotInGenerator, // Not currently using a generator.
+ ResumedForGenerator, // Resumed after being auto-suspended before generator.
+ InGenerator // Currently using generator.
+ } GenState = NotInGenerator;
std::vector<std::weak_ptr<DefinitionGenerator>> CurDefGeneratorStack;
};
@@ -547,15 +553,11 @@ public:
OnComplete(std::move(OnComplete)) {}
void complete(std::unique_ptr<InProgressLookupState> IPLS) override {
- GeneratorLock = {}; // Unlock and release.
auto &ES = SearchOrder.front().first->getExecutionSession();
ES.OL_completeLookupFlags(std::move(IPLS), std::move(OnComplete));
}
- void fail(Error Err) override {
- GeneratorLock = {}; // Unlock and release.
- OnComplete(std::move(Err));
- }
+ void fail(Error Err) override { OnComplete(std::move(Err)); }
private:
unique_function<void(Expected<SymbolFlagsMap>)> OnComplete;
@@ -574,14 +576,12 @@ public:
}
void complete(std::unique_ptr<InProgressLookupState> IPLS) override {
- GeneratorLock = {}; // Unlock and release.
auto &ES = SearchOrder.front().first->getExecutionSession();
ES.OL_completeLookup(std::move(IPLS), std::move(Q),
std::move(RegisterDependencies));
}
void fail(Error Err) override {
- GeneratorLock = {};
Q->detach();
Q->handleFailed(std::move(Err));
}
@@ -638,7 +638,19 @@ void LookupState::continueLookup(Error Err) {
ES.OL_applyQueryPhase1(std::move(IPLS), std::move(Err));
}
-DefinitionGenerator::~DefinitionGenerator() = default;
+DefinitionGenerator::~DefinitionGenerator() {
+ std::deque<LookupState> LookupsToFail;
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ std::swap(PendingLookups, LookupsToFail);
+ InUse = false;
+ }
+
+ for (auto &LS : LookupsToFail)
+ LS.continueLookup(make_error<StringError>(
+ "Query waiting on DefinitionGenerator that was destroyed",
+ inconvertibleErrorCode()));
+}
JITDylib::~JITDylib() {
LLVM_DEBUG(dbgs() << "Destroying JITDylib " << getName() << "\n");
@@ -677,6 +689,10 @@ ResourceTrackerSP JITDylib::createResourceTracker() {
}
void JITDylib::removeGenerator(DefinitionGenerator &G) {
+ // DefGenerator moved into TmpDG to ensure that it's destroyed outside the
+ // session lock (since it may have to send errors to pending queries).
+ std::shared_ptr<DefinitionGenerator> TmpDG;
+
ES.runSessionLocked([&] {
assert(State == Open && "JD is defunct");
auto I = llvm::find_if(DefGenerators,
@@ -684,6 +700,7 @@ void JITDylib::removeGenerator(DefinitionGenerator &G) {
return H.get() == &G;
});
assert(I != DefGenerators.end() && "Generator not found");
+ TmpDG = std::move(*I);
DefGenerators.erase(I);
});
}
@@ -1336,7 +1353,7 @@ void JITDylib::addToLinkOrder(const JITDylibSearchOrder &NewLinks) {
ES.runSessionLocked([&]() {
for (auto &KV : NewLinks) {
// Skip elements of NewLinks that are already in the link order.
- if (llvm::find(LinkOrder, KV) != LinkOrder.end())
+ if (llvm::is_contained(LinkOrder, KV))
continue;
LinkOrder.push_back(std::move(KV));
@@ -1903,6 +1920,10 @@ void MaterializationTask::printDescription(raw_ostream &OS) {
void MaterializationTask::run() { MU->materialize(std::move(MR)); }
+void LookupTask::printDescription(raw_ostream &OS) { OS << "Lookup task"; }
+
+void LookupTask::run() { LS.continueLookup(Error::success()); }
+
ExecutionSession::ExecutionSession(std::unique_ptr<ExecutorProcessControl> EPC)
: EPC(std::move(EPC)) {
// Associated EPC and this.
@@ -1918,16 +1939,14 @@ ExecutionSession::~ExecutionSession() {
Error ExecutionSession::endSession() {
LLVM_DEBUG(dbgs() << "Ending ExecutionSession " << this << "\n");
- std::vector<JITDylibSP> JITDylibsToClose = runSessionLocked([&] {
+ auto JDsToRemove = runSessionLocked([&] {
SessionOpen = false;
- return std::move(JDs);
+ return JDs;
});
- // TODO: notifiy platform? run static deinits?
+ std::reverse(JDsToRemove.begin(), JDsToRemove.end());
- Error Err = Error::success();
- for (auto &JD : reverse(JITDylibsToClose))
- Err = joinErrors(std::move(Err), JD->clear());
+ auto Err = removeJITDylibs(std::move(JDsToRemove));
Err = joinErrors(std::move(Err), EPC->disconnect());
@@ -1977,42 +1996,44 @@ Expected<JITDylib &> ExecutionSession::createJITDylib(std::string Name) {
return JD;
}
-Error ExecutionSession::removeJITDylib(JITDylib &JD) {
- // Keep JD alive throughout this routine, even if all other references
- // have been dropped.
- JITDylibSP JDKeepAlive = &JD;
+Error ExecutionSession::removeJITDylibs(std::vector<JITDylibSP> JDsToRemove) {
// Set JD to 'Closing' state and remove JD from the ExecutionSession.
runSessionLocked([&] {
- assert(JD.State == JITDylib::Open && "JD already closed");
- JD.State = JITDylib::Closing;
- auto I = llvm::find(JDs, &JD);
- assert(I != JDs.end() && "JD does not appear in session JDs");
- JDs.erase(I);
+ for (auto &JD : JDsToRemove) {
+ assert(JD->State == JITDylib::Open && "JD already closed");
+ JD->State = JITDylib::Closing;
+ auto I = llvm::find(JDs, JD);
+ assert(I != JDs.end() && "JD does not appear in session JDs");
+ JDs.erase(I);
+ }
});
- // Clear the JITDylib. Hold on to any error while we clean up the
- // JITDylib members below.
- auto Err = JD.clear();
-
- // Notify the platform of the teardown.
- if (P)
- Err = joinErrors(std::move(Err), P->teardownJITDylib(JD));
+ // Clear JITDylibs and notify the platform.
+ Error Err = Error::success();
+ for (auto JD : JDsToRemove) {
+ Err = joinErrors(std::move(Err), JD->clear());
+ if (P)
+ Err = joinErrors(std::move(Err), P->teardownJITDylib(*JD));
+ }
// Set JD to closed state. Clear remaining data structures.
runSessionLocked([&] {
- assert(JD.State == JITDylib::Closing && "JD should be closing");
- JD.State = JITDylib::Closed;
- assert(JD.Symbols.empty() && "JD.Symbols is not empty after clear");
- assert(JD.UnmaterializedInfos.empty() &&
- "JD.UnmaterializedInfos is not empty after clear");
- assert(JD.MaterializingInfos.empty() &&
- "JD.MaterializingInfos is not empty after clear");
- assert(JD.TrackerSymbols.empty() &&
- "TrackerSymbols is not empty after clear");
- JD.DefGenerators.clear();
- JD.LinkOrder.clear();
+ for (auto &JD : JDsToRemove) {
+ assert(JD->State == JITDylib::Closing && "JD should be closing");
+ JD->State = JITDylib::Closed;
+ assert(JD->Symbols.empty() && "JD.Symbols is not empty after clear");
+ assert(JD->UnmaterializedInfos.empty() &&
+ "JD.UnmaterializedInfos is not empty after clear");
+ assert(JD->MaterializingInfos.empty() &&
+ "JD.MaterializingInfos is not empty after clear");
+ assert(JD->TrackerSymbols.empty() &&
+ "TrackerSymbols is not empty after clear");
+ JD->DefGenerators.clear();
+ JD->LinkOrder.clear();
+ }
});
+
return Err;
}
@@ -2406,6 +2427,37 @@ Error ExecutionSession::IL_updateCandidatesFor(
});
}
+void ExecutionSession::OL_resumeLookupAfterGeneration(
+ InProgressLookupState &IPLS) {
+
+ assert(IPLS.GenState != InProgressLookupState::NotInGenerator &&
+ "Should not be called for not-in-generator lookups");
+ IPLS.GenState = InProgressLookupState::NotInGenerator;
+
+ LookupState LS;
+
+ if (auto DG = IPLS.CurDefGeneratorStack.back().lock()) {
+ IPLS.CurDefGeneratorStack.pop_back();
+ std::lock_guard<std::mutex> Lock(DG->M);
+
+ // If there are no pending lookups then mark the generator as free and
+ // return.
+ if (DG->PendingLookups.empty()) {
+ DG->InUse = false;
+ return;
+ }
+
+ // Otherwise resume the next lookup.
+ LS = std::move(DG->PendingLookups.front());
+ DG->PendingLookups.pop_front();
+ }
+
+ if (LS.IPLS) {
+ LS.IPLS->GenState = InProgressLookupState::ResumedForGenerator;
+ dispatchTask(std::make_unique<LookupTask>(std::move(LS)));
+ }
+}
+
void ExecutionSession::OL_applyQueryPhase1(
std::unique_ptr<InProgressLookupState> IPLS, Error Err) {
@@ -2422,6 +2474,12 @@ void ExecutionSession::OL_applyQueryPhase1(
<< IPLS->DefGeneratorNonCandidates << "\n";
});
+ if (IPLS->GenState == InProgressLookupState::InGenerator)
+ OL_resumeLookupAfterGeneration(*IPLS);
+
+ assert(IPLS->GenState != InProgressLookupState::InGenerator &&
+ "Lookup should not be in InGenerator state here");
+
// FIXME: We should attach the query as we go: This provides a result in a
// single pass in the common case where all symbols have already reached the
// required state. The query could be detached again in the 'fail' method on
@@ -2447,10 +2505,6 @@ void ExecutionSession::OL_applyQueryPhase1(
// If we've just reached a new JITDylib then perform some setup.
if (IPLS->NewJITDylib) {
-
- // Acquire the generator lock for this JITDylib.
- IPLS->GeneratorLock = std::unique_lock<std::mutex>(JD.GeneratorsMutex);
-
// Add any non-candidates from the last JITDylib (if any) back on to the
// list of definition candidates for this JITDylib, reset definition
// non-candidates to the empty set.
@@ -2488,6 +2542,13 @@ void ExecutionSession::OL_applyQueryPhase1(
dbgs() << " Remaining candidates = " << IPLS->DefGeneratorCandidates
<< "\n";
});
+
+ // If this lookup was resumed after auto-suspension but all candidates
+ // have already been generated (by some previous call to the generator)
+ // treat the lookup as if it had completed generation.
+ if (IPLS->GenState == InProgressLookupState::ResumedForGenerator &&
+ IPLS->DefGeneratorCandidates.empty())
+ OL_resumeLookupAfterGeneration(*IPLS);
});
// If we encountered an error while filtering generation candidates then
@@ -2509,13 +2570,32 @@ void ExecutionSession::OL_applyQueryPhase1(
while (!IPLS->CurDefGeneratorStack.empty() &&
!IPLS->DefGeneratorCandidates.empty()) {
auto DG = IPLS->CurDefGeneratorStack.back().lock();
- IPLS->CurDefGeneratorStack.pop_back();
if (!DG)
return IPLS->fail(make_error<StringError>(
"DefinitionGenerator removed while lookup in progress",
inconvertibleErrorCode()));
+ // At this point the lookup is in either the NotInGenerator state, or in
+ // the ResumedForGenerator state.
+ // If this lookup is in the NotInGenerator state then check whether the
+ // generator is in use. If the generator is not in use then move the
+ // lookup to the InGenerator state and continue. If the generator is
+ // already in use then just add this lookup to the pending lookups list
+ // and bail out.
+ // If this lookup is in the ResumedForGenerator state then just move it
+ // to InGenerator and continue.
+ if (IPLS->GenState == InProgressLookupState::NotInGenerator) {
+ std::lock_guard<std::mutex> Lock(DG->M);
+ if (DG->InUse) {
+ DG->PendingLookups.push_back(std::move(IPLS));
+ return;
+ }
+ DG->InUse = true;
+ }
+
+ IPLS->GenState = InProgressLookupState::InGenerator;
+
auto K = IPLS->K;
auto &LookupSet = IPLS->DefGeneratorCandidates;
@@ -2528,6 +2608,11 @@ void ExecutionSession::OL_applyQueryPhase1(
IPLS = std::move(LS.IPLS);
}
+ // If the lookup returned then pop the generator stack and unblock the
+ // next lookup on this generator (if any).
+ if (IPLS)
+ OL_resumeLookupAfterGeneration(*IPLS);
+
// If there was an error then fail the query.
if (Err) {
LLVM_DEBUG({
@@ -2677,7 +2762,7 @@ void ExecutionSession::OL_completeLookup(
// Otherwise this is a match.
- // If this symbol is already in the requried state then notify the
+ // If this symbol is already in the required state then notify the
// query, remove the symbol and continue.
if (SymI->second.getState() >= Q->getRequiredState()) {
LLVM_DEBUG(dbgs()
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
deleted file mode 100644
index 830582bb3649..000000000000
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
+++ /dev/null
@@ -1,472 +0,0 @@
-//===------- DebuggerSupportPlugin.cpp - Utils for debugger support -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h"
-
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/BinaryFormat/MachO.h"
-
-#define DEBUG_TYPE "orc"
-
-using namespace llvm;
-using namespace llvm::jitlink;
-using namespace llvm::orc;
-
-static const char *SynthDebugSectionName = "__jitlink_synth_debug_object";
-
-namespace {
-
-struct MachO64LE {
- using UIntPtr = uint64_t;
-
- using Header = MachO::mach_header_64;
- using SegmentLC = MachO::segment_command_64;
- using Section = MachO::section_64;
- using NList = MachO::nlist_64;
-
- static constexpr support::endianness Endianness = support::little;
- static constexpr const uint32_t Magic = MachO::MH_MAGIC_64;
- static constexpr const uint32_t SegmentCmd = MachO::LC_SEGMENT_64;
-};
-
-class MachODebugObjectSynthesizerBase
- : public GDBJITDebugInfoRegistrationPlugin::DebugSectionSynthesizer {
-public:
- static bool isDebugSection(Section &Sec) {
- return Sec.getName().startswith("__DWARF,");
- }
-
- MachODebugObjectSynthesizerBase(LinkGraph &G, ExecutorAddr RegisterActionAddr)
- : G(G), RegisterActionAddr(RegisterActionAddr) {}
- virtual ~MachODebugObjectSynthesizerBase() = default;
-
- Error preserveDebugSections() {
- if (G.findSectionByName(SynthDebugSectionName)) {
- LLVM_DEBUG({
- dbgs() << "MachODebugObjectSynthesizer skipping graph " << G.getName()
- << " which contains an unexpected existing "
- << SynthDebugSectionName << " section.\n";
- });
- return Error::success();
- }
-
- LLVM_DEBUG({
- dbgs() << "MachODebugObjectSynthesizer visiting graph " << G.getName()
- << "\n";
- });
- for (auto &Sec : G.sections()) {
- if (!isDebugSection(Sec))
- continue;
- // Preserve blocks in this debug section by marking one existing symbol
- // live for each block, and introducing a new live, anonymous symbol for
- // each currently unreferenced block.
- LLVM_DEBUG({
- dbgs() << " Preserving debug section " << Sec.getName() << "\n";
- });
- SmallSet<Block *, 8> PreservedBlocks;
- for (auto *Sym : Sec.symbols()) {
- bool NewPreservedBlock =
- PreservedBlocks.insert(&Sym->getBlock()).second;
- if (NewPreservedBlock)
- Sym->setLive(true);
- }
- for (auto *B : Sec.blocks())
- if (!PreservedBlocks.count(B))
- G.addAnonymousSymbol(*B, 0, 0, false, true);
- }
- return Error::success();
- }
-
-protected:
- LinkGraph &G;
- ExecutorAddr RegisterActionAddr;
-};
-
-template <typename MachOTraits>
-class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase {
-private:
- class MachOStructWriter {
- public:
- MachOStructWriter(MutableArrayRef<char> Buffer) : Buffer(Buffer) {}
-
- size_t getOffset() const { return Offset; }
-
- template <typename MachOStruct> void write(MachOStruct S) {
- assert(Offset + sizeof(S) <= Buffer.size() &&
- "Container block overflow while constructing debug MachO");
- if (MachOTraits::Endianness != support::endian::system_endianness())
- MachO::swapStruct(S);
- memcpy(Buffer.data() + Offset, &S, sizeof(S));
- Offset += sizeof(S);
- }
-
- private:
- MutableArrayRef<char> Buffer;
- size_t Offset = 0;
- };
-
-public:
- using MachODebugObjectSynthesizerBase::MachODebugObjectSynthesizerBase;
-
- Error startSynthesis() override {
- LLVM_DEBUG({
- dbgs() << "Creating " << SynthDebugSectionName << " for " << G.getName()
- << "\n";
- });
- auto &SDOSec = G.createSection(SynthDebugSectionName, MemProt::Read);
-
- struct DebugSectionInfo {
- Section *Sec = nullptr;
- StringRef SegName;
- StringRef SecName;
- uint64_t Alignment = 0;
- orc::ExecutorAddr StartAddr;
- uint64_t Size = 0;
- };
-
- SmallVector<DebugSectionInfo, 12> DebugSecInfos;
- size_t NumSections = 0;
- for (auto &Sec : G.sections()) {
- if (Sec.blocks().empty())
- continue;
-
- ++NumSections;
- if (isDebugSection(Sec)) {
- size_t SepPos = Sec.getName().find(',');
- if (SepPos > 16 || (Sec.getName().size() - (SepPos + 1) > 16)) {
- LLVM_DEBUG({
- dbgs() << "Skipping debug object synthesis for graph "
- << G.getName()
- << ": encountered non-standard DWARF section name \""
- << Sec.getName() << "\"\n";
- });
- return Error::success();
- }
- DebugSecInfos.push_back({&Sec, Sec.getName().substr(0, SepPos),
- Sec.getName().substr(SepPos + 1), 0,
- orc::ExecutorAddr(), 0});
- } else {
- NonDebugSections.push_back(&Sec);
-
- // If the first block in the section has a non-zero alignment offset
- // then we need to add a padding block, since the section command in
- // the header doesn't allow for aligment offsets.
- SectionRange R(Sec);
- if (!R.empty()) {
- auto &FB = *R.getFirstBlock();
- if (FB.getAlignmentOffset() != 0) {
- auto Padding = G.allocateBuffer(FB.getAlignmentOffset());
- memset(Padding.data(), 0, Padding.size());
- G.createContentBlock(Sec, Padding,
- FB.getAddress() - FB.getAlignmentOffset(),
- FB.getAlignment(), 0);
- }
- }
- }
- }
-
- // Create container block.
- size_t SectionsCmdSize =
- sizeof(typename MachOTraits::Section) * NumSections;
- size_t SegmentLCSize =
- sizeof(typename MachOTraits::SegmentLC) + SectionsCmdSize;
- size_t ContainerBlockSize =
- sizeof(typename MachOTraits::Header) + SegmentLCSize;
- auto ContainerBlockContent = G.allocateBuffer(ContainerBlockSize);
- MachOContainerBlock = &G.createMutableContentBlock(
- SDOSec, ContainerBlockContent, orc::ExecutorAddr(), 8, 0);
-
- // Copy debug section blocks and symbols.
- orc::ExecutorAddr NextBlockAddr(MachOContainerBlock->getSize());
- for (auto &SI : DebugSecInfos) {
- assert(!SI.Sec->blocks().empty() && "Empty debug info section?");
-
- // Update addresses in debug section.
- LLVM_DEBUG({
- dbgs() << " Appending " << SI.Sec->getName() << " ("
- << SI.Sec->blocks_size() << " block(s)) at "
- << formatv("{0:x8}", NextBlockAddr) << "\n";
- });
- for (auto *B : SI.Sec->blocks()) {
- NextBlockAddr = alignToBlock(NextBlockAddr, *B);
- B->setAddress(NextBlockAddr);
- NextBlockAddr += B->getSize();
- }
-
- auto &FirstBlock = **SI.Sec->blocks().begin();
- if (FirstBlock.getAlignmentOffset() != 0)
- return make_error<StringError>(
- "First block in " + SI.Sec->getName() +
- " section has non-zero alignment offset",
- inconvertibleErrorCode());
- if (FirstBlock.getAlignment() > std::numeric_limits<uint32_t>::max())
- return make_error<StringError>("First block in " + SI.Sec->getName() +
- " has alignment >4Gb",
- inconvertibleErrorCode());
-
- SI.Alignment = FirstBlock.getAlignment();
- SI.StartAddr = FirstBlock.getAddress();
- SI.Size = NextBlockAddr - SI.StartAddr;
- G.mergeSections(SDOSec, *SI.Sec);
- SI.Sec = nullptr;
- }
- size_t DebugSectionsSize =
- NextBlockAddr - orc::ExecutorAddr(MachOContainerBlock->getSize());
-
- // Write MachO header and debug section load commands.
- MachOStructWriter Writer(MachOContainerBlock->getAlreadyMutableContent());
- typename MachOTraits::Header Hdr;
- memset(&Hdr, 0, sizeof(Hdr));
- Hdr.magic = MachOTraits::Magic;
- switch (G.getTargetTriple().getArch()) {
- case Triple::x86_64:
- Hdr.cputype = MachO::CPU_TYPE_X86_64;
- Hdr.cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL;
- break;
- case Triple::aarch64:
- Hdr.cputype = MachO::CPU_TYPE_ARM64;
- Hdr.cpusubtype = MachO::CPU_SUBTYPE_ARM64_ALL;
- break;
- default:
- llvm_unreachable("Unsupported architecture");
- }
- Hdr.filetype = MachO::MH_OBJECT;
- Hdr.ncmds = 1;
- Hdr.sizeofcmds = SegmentLCSize;
- Hdr.flags = 0;
- Writer.write(Hdr);
-
- typename MachOTraits::SegmentLC SegLC;
- memset(&SegLC, 0, sizeof(SegLC));
- SegLC.cmd = MachOTraits::SegmentCmd;
- SegLC.cmdsize = SegmentLCSize;
- SegLC.vmaddr = ContainerBlockSize;
- SegLC.vmsize = DebugSectionsSize;
- SegLC.fileoff = ContainerBlockSize;
- SegLC.filesize = DebugSectionsSize;
- SegLC.maxprot =
- MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
- SegLC.initprot =
- MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
- SegLC.nsects = NumSections;
- SegLC.flags = 0;
- Writer.write(SegLC);
-
- StringSet<> ExistingLongNames;
- for (auto &SI : DebugSecInfos) {
- typename MachOTraits::Section Sec;
- memset(&Sec, 0, sizeof(Sec));
- memcpy(Sec.sectname, SI.SecName.data(), SI.SecName.size());
- memcpy(Sec.segname, SI.SegName.data(), SI.SegName.size());
- Sec.addr = SI.StartAddr.getValue();
- Sec.size = SI.Size;
- Sec.offset = SI.StartAddr.getValue();
- Sec.align = SI.Alignment;
- Sec.reloff = 0;
- Sec.nreloc = 0;
- Sec.flags = MachO::S_ATTR_DEBUG;
- Writer.write(Sec);
- }
-
- // Set MachOContainerBlock to indicate success to
- // completeSynthesisAndRegister.
- NonDebugSectionsStart = Writer.getOffset();
- return Error::success();
- }
-
- Error completeSynthesisAndRegister() override {
- if (!MachOContainerBlock) {
- LLVM_DEBUG({
- dbgs() << "Not writing MachO debug object header for " << G.getName()
- << " since createDebugSection failed\n";
- });
- return Error::success();
- }
-
- LLVM_DEBUG({
- dbgs() << "Writing MachO debug object header for " << G.getName() << "\n";
- });
-
- MachOStructWriter Writer(
- MachOContainerBlock->getAlreadyMutableContent().drop_front(
- NonDebugSectionsStart));
-
- unsigned LongSectionNameIdx = 0;
- for (auto *Sec : NonDebugSections) {
- size_t SepPos = Sec->getName().find(',');
- StringRef SegName, SecName;
- std::string CustomSecName;
-
- if ((SepPos == StringRef::npos && Sec->getName().size() <= 16)) {
- // No embedded segment name, short section name.
- SegName = "__JITLINK_CUSTOM";
- SecName = Sec->getName();
- } else if (SepPos < 16 && (Sec->getName().size() - (SepPos + 1) <= 16)) {
- // Canonical embedded segment and section name.
- SegName = Sec->getName().substr(0, SepPos);
- SecName = Sec->getName().substr(SepPos + 1);
- } else {
- // Long section name that needs to be truncated.
- assert(Sec->getName().size() > 16 &&
- "Short section name should have been handled above");
- SegName = "__JITLINK_CUSTOM";
- auto IdxStr = std::to_string(++LongSectionNameIdx);
- CustomSecName = Sec->getName().substr(0, 15 - IdxStr.size()).str();
- CustomSecName += ".";
- CustomSecName += IdxStr;
- SecName = StringRef(CustomSecName.data(), 16);
- }
-
- SectionRange R(*Sec);
- if (R.getFirstBlock()->getAlignmentOffset() != 0)
- return make_error<StringError>(
- "While building MachO debug object for " + G.getName() +
- " first block has non-zero alignment offset",
- inconvertibleErrorCode());
-
- typename MachOTraits::Section SecCmd;
- memset(&SecCmd, 0, sizeof(SecCmd));
- memcpy(SecCmd.sectname, SecName.data(), SecName.size());
- memcpy(SecCmd.segname, SegName.data(), SegName.size());
- SecCmd.addr = R.getStart().getValue();
- SecCmd.size = R.getSize();
- SecCmd.offset = 0;
- SecCmd.align = R.getFirstBlock()->getAlignment();
- SecCmd.reloff = 0;
- SecCmd.nreloc = 0;
- SecCmd.flags = 0;
- Writer.write(SecCmd);
- }
-
- static constexpr bool AutoRegisterCode = true;
- SectionRange R(MachOContainerBlock->getSection());
- G.allocActions().push_back(
- {cantFail(shared::WrapperFunctionCall::Create<
- shared::SPSArgList<shared::SPSExecutorAddrRange, bool>>(
- RegisterActionAddr, R.getRange(), AutoRegisterCode)),
- {}});
- return Error::success();
- }
-
-private:
- Block *MachOContainerBlock = nullptr;
- SmallVector<Section *, 16> NonDebugSections;
- size_t NonDebugSectionsStart = 0;
-};
-
-} // end anonymous namespace
-
-namespace llvm {
-namespace orc {
-
-Expected<std::unique_ptr<GDBJITDebugInfoRegistrationPlugin>>
-GDBJITDebugInfoRegistrationPlugin::Create(ExecutionSession &ES,
- JITDylib &ProcessJD,
- const Triple &TT) {
- auto RegisterActionAddr =
- TT.isOSBinFormatMachO()
- ? ES.intern("_llvm_orc_registerJITLoaderGDBAllocAction")
- : ES.intern("llvm_orc_registerJITLoaderGDBAllocAction");
-
- if (auto RegisterSym = ES.lookup({&ProcessJD}, RegisterActionAddr))
- return std::make_unique<GDBJITDebugInfoRegistrationPlugin>(
- RegisterSym->getAddress());
- else
- return RegisterSym.takeError();
-}
-
-Error GDBJITDebugInfoRegistrationPlugin::notifyFailed(
- MaterializationResponsibility &MR) {
- return Error::success();
-}
-
-Error GDBJITDebugInfoRegistrationPlugin::notifyRemovingResources(
- JITDylib &JD, ResourceKey K) {
- return Error::success();
-}
-
-void GDBJITDebugInfoRegistrationPlugin::notifyTransferringResources(
- JITDylib &JD, ResourceKey DstKey, ResourceKey SrcKey) {}
-
-void GDBJITDebugInfoRegistrationPlugin::modifyPassConfig(
- MaterializationResponsibility &MR, LinkGraph &LG,
- PassConfiguration &PassConfig) {
-
- if (LG.getTargetTriple().getObjectFormat() == Triple::MachO)
- modifyPassConfigForMachO(MR, LG, PassConfig);
- else {
- LLVM_DEBUG({
- dbgs() << "GDBJITDebugInfoRegistrationPlugin skipping unspported graph "
- << LG.getName() << "(triple = " << LG.getTargetTriple().str()
- << "\n";
- });
- }
-}
-
-void GDBJITDebugInfoRegistrationPlugin::modifyPassConfigForMachO(
- MaterializationResponsibility &MR, jitlink::LinkGraph &LG,
- jitlink::PassConfiguration &PassConfig) {
-
- switch (LG.getTargetTriple().getArch()) {
- case Triple::x86_64:
- case Triple::aarch64:
- // Supported, continue.
- assert(LG.getPointerSize() == 8 && "Graph has incorrect pointer size");
- assert(LG.getEndianness() == support::little &&
- "Graph has incorrect endianness");
- break;
- default:
- // Unsupported.
- LLVM_DEBUG({
- dbgs() << "GDBJITDebugInfoRegistrationPlugin skipping unsupported "
- << "MachO graph " << LG.getName()
- << "(triple = " << LG.getTargetTriple().str()
- << ", pointer size = " << LG.getPointerSize() << ", endianness = "
- << (LG.getEndianness() == support::big ? "big" : "little")
- << ")\n";
- });
- return;
- }
-
- // Scan for debug sections. If we find one then install passes.
- bool HasDebugSections = false;
- for (auto &Sec : LG.sections())
- if (MachODebugObjectSynthesizerBase::isDebugSection(Sec)) {
- HasDebugSections = true;
- break;
- }
-
- if (HasDebugSections) {
- LLVM_DEBUG({
- dbgs() << "GDBJITDebugInfoRegistrationPlugin: Graph " << LG.getName()
- << " contains debug info. Installing debugger support passes.\n";
- });
-
- auto MDOS = std::make_shared<MachODebugObjectSynthesizer<MachO64LE>>(
- LG, RegisterActionAddr);
- PassConfig.PrePrunePasses.push_back(
- [=](LinkGraph &G) { return MDOS->preserveDebugSections(); });
- PassConfig.PostPrunePasses.push_back(
- [=](LinkGraph &G) { return MDOS->startSynthesis(); });
- PassConfig.PreFixupPasses.push_back(
- [=](LinkGraph &G) { return MDOS->completeSynthesisAndRegister(); });
- } else {
- LLVM_DEBUG({
- dbgs() << "GDBJITDebugInfoRegistrationPlugin: Graph " << LG.getName()
- << " contains no debug info. Skipping.\n";
- });
- }
-}
-
-} // namespace orc
-} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp
new file mode 100644
index 000000000000..f65ec27ff875
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp
@@ -0,0 +1,121 @@
+//===--- DebugInfoSupport.cpp -- Utils for debug info support ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities to preserve and parse debug info from LinkGraphs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h"
+
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::jitlink;
+
+namespace {
+static DenseSet<StringRef> DWARFSectionNames = {
+#define HANDLE_DWARF_SECTION(ENUM_NAME, ELF_NAME, CMDLINE_NAME, OPTION) \
+ StringRef(ELF_NAME),
+#include "llvm/BinaryFormat/Dwarf.def"
+#undef HANDLE_DWARF_SECTION
+};
+
+// We might be able to drop relocations to symbols that do end up
+// being pruned by the linker, but for now we just preserve all
+static void preserveDWARFSection(LinkGraph &G, Section &Sec) {
+ DenseMap<Block *, Symbol *> Preserved;
+ for (auto Sym : Sec.symbols()) {
+ if (Sym->isLive())
+ Preserved[&Sym->getBlock()] = Sym;
+ else if (!Preserved.count(&Sym->getBlock()))
+ Preserved[&Sym->getBlock()] = Sym;
+ }
+ for (auto Block : Sec.blocks()) {
+ auto &PSym = Preserved[Block];
+ if (!PSym)
+ PSym = &G.addAnonymousSymbol(*Block, 0, 0, false, true);
+ else if (!PSym->isLive())
+ PSym->setLive(true);
+ }
+}
+
+static SmallVector<char, 0> getSectionData(Section &Sec) {
+ SmallVector<char, 0> SecData;
+ SmallVector<Block *, 8> SecBlocks(Sec.blocks().begin(), Sec.blocks().end());
+ std::sort(SecBlocks.begin(), SecBlocks.end(), [](Block *LHS, Block *RHS) {
+ return LHS->getAddress() < RHS->getAddress();
+ });
+ // Convert back to what object file would have, one blob of section content
+ // Assumes all zerofill
+ // TODO handle alignment?
+ // TODO handle alignment offset?
+ for (auto *Block : SecBlocks) {
+ if (Block->isZeroFill())
+ SecData.resize(SecData.size() + Block->getSize(), 0);
+ else
+ SecData.append(Block->getContent().begin(), Block->getContent().end());
+ }
+ return SecData;
+}
+
+static void dumpDWARFContext(DWARFContext &DC) {
+ auto options = llvm::DIDumpOptions();
+ options.DumpType &= ~DIDT_UUID;
+ options.DumpType &= ~(1 << DIDT_ID_DebugFrame);
+ LLVM_DEBUG(DC.dump(dbgs(), options));
+}
+
+} // namespace
+
+Error llvm::orc::preserveDebugSections(LinkGraph &G) {
+ if (!G.getTargetTriple().isOSBinFormatELF()) {
+ return make_error<StringError>(
+ "preserveDebugSections only supports ELF LinkGraphs!",
+ inconvertibleErrorCode());
+ }
+ for (auto &Sec : G.sections()) {
+ if (DWARFSectionNames.count(Sec.getName())) {
+ LLVM_DEBUG(dbgs() << "Preserving DWARF section " << Sec.getName()
+ << "\n");
+ preserveDWARFSection(G, Sec);
+ }
+ }
+ return Error::success();
+}
+
+Expected<std::pair<std::unique_ptr<DWARFContext>,
+ StringMap<std::unique_ptr<MemoryBuffer>>>>
+llvm::orc::createDWARFContext(LinkGraph &G) {
+ if (!G.getTargetTriple().isOSBinFormatELF()) {
+ return make_error<StringError>(
+ "createDWARFContext only supports ELF LinkGraphs!",
+ inconvertibleErrorCode());
+ }
+ StringMap<std::unique_ptr<MemoryBuffer>> DWARFSectionData;
+ for (auto &Sec : G.sections()) {
+ if (DWARFSectionNames.count(Sec.getName())) {
+ auto SecData = getSectionData(Sec);
+ auto Name = Sec.getName();
+ // DWARFContext expects the section name to not start with a dot
+ if (Name.starts_with("."))
+ Name = Name.drop_front();
+ LLVM_DEBUG(dbgs() << "Creating DWARFContext section " << Name
+ << " with size " << SecData.size() << "\n");
+ DWARFSectionData[Name] =
+ std::make_unique<SmallVectorMemoryBuffer>(std::move(SecData));
+ }
+ }
+ auto Ctx =
+ DWARFContext::create(DWARFSectionData, G.getPointerSize(),
+ G.getEndianness() == llvm::endianness::little);
+ dumpDWARFContext(*Ctx);
+ return std::make_pair(std::move(Ctx), std::move(DWARFSectionData));
+}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupport.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupport.cpp
new file mode 100644
index 000000000000..1668473c0eb4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupport.cpp
@@ -0,0 +1,61 @@
+//===------ DebuggerSupport.cpp - Utils for enabling debugger support -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Debugging/DebuggerSupport.h"
+#include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h"
+#include "llvm/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.h"
+#include "llvm/ExecutionEngine/Orc/LLJIT.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+namespace llvm::orc {
+
+Error enableDebuggerSupport(LLJIT &J) {
+ auto *ObjLinkingLayer = dyn_cast<ObjectLinkingLayer>(&J.getObjLinkingLayer());
+ if (!ObjLinkingLayer)
+ return make_error<StringError>("Cannot enable LLJIT debugger support: "
+ "Debugger support requires JITLink",
+ inconvertibleErrorCode());
+ auto ProcessSymsJD = J.getProcessSymbolsJITDylib();
+ if (!ProcessSymsJD)
+ return make_error<StringError>("Cannot enable LLJIT debugger support: "
+ "Process symbols are not available",
+ inconvertibleErrorCode());
+
+ auto &ES = J.getExecutionSession();
+ const auto &TT = J.getTargetTriple();
+
+ switch (TT.getObjectFormat()) {
+ case Triple::ELF: {
+ auto Registrar = createJITLoaderGDBRegistrar(ES);
+ if (!Registrar)
+ return Registrar.takeError();
+ ObjLinkingLayer->addPlugin(std::make_unique<DebugObjectManagerPlugin>(
+ ES, std::move(*Registrar), false, true));
+ return Error::success();
+ }
+ case Triple::MachO: {
+ auto DS = GDBJITDebugInfoRegistrationPlugin::Create(ES, *ProcessSymsJD, TT);
+ if (!DS)
+ return DS.takeError();
+ ObjLinkingLayer->addPlugin(std::move(*DS));
+ return Error::success();
+ }
+ default:
+ return make_error<StringError>(
+ "Cannot enable LLJIT debugger support: " +
+ Triple::getObjectFormatTypeName(TT.getObjectFormat()) +
+ " is not supported",
+ inconvertibleErrorCode());
+ }
+}
+
+} // namespace llvm::orc
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp
new file mode 100644
index 000000000000..e387b06ee934
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp
@@ -0,0 +1,423 @@
+//===------- DebuggerSupportPlugin.cpp - Utils for debugger support -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.h"
+#include "llvm/ExecutionEngine/Orc/MachOBuilder.h"
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+
+#include <chrono>
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::orc;
+
+static const char *SynthDebugSectionName = "__jitlink_synth_debug_object";
+
+namespace {
+
+class MachODebugObjectSynthesizerBase
+ : public GDBJITDebugInfoRegistrationPlugin::DebugSectionSynthesizer {
+public:
+ static bool isDebugSection(Section &Sec) {
+ return Sec.getName().starts_with("__DWARF,");
+ }
+
+ MachODebugObjectSynthesizerBase(LinkGraph &G, ExecutorAddr RegisterActionAddr)
+ : G(G), RegisterActionAddr(RegisterActionAddr) {}
+ virtual ~MachODebugObjectSynthesizerBase() = default;
+
+ Error preserveDebugSections() {
+ if (G.findSectionByName(SynthDebugSectionName)) {
+ LLVM_DEBUG({
+ dbgs() << "MachODebugObjectSynthesizer skipping graph " << G.getName()
+ << " which contains an unexpected existing "
+ << SynthDebugSectionName << " section.\n";
+ });
+ return Error::success();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "MachODebugObjectSynthesizer visiting graph " << G.getName()
+ << "\n";
+ });
+ for (auto &Sec : G.sections()) {
+ if (!isDebugSection(Sec))
+ continue;
+ // Preserve blocks in this debug section by marking one existing symbol
+ // live for each block, and introducing a new live, anonymous symbol for
+ // each currently unreferenced block.
+ LLVM_DEBUG({
+ dbgs() << " Preserving debug section " << Sec.getName() << "\n";
+ });
+ SmallSet<Block *, 8> PreservedBlocks;
+ for (auto *Sym : Sec.symbols()) {
+ bool NewPreservedBlock =
+ PreservedBlocks.insert(&Sym->getBlock()).second;
+ if (NewPreservedBlock)
+ Sym->setLive(true);
+ }
+ for (auto *B : Sec.blocks())
+ if (!PreservedBlocks.count(B))
+ G.addAnonymousSymbol(*B, 0, 0, false, true);
+ }
+
+ return Error::success();
+ }
+
+protected:
+ LinkGraph &G;
+ ExecutorAddr RegisterActionAddr;
+};
+
+template <typename MachOTraits>
+class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase {
+public:
+ MachODebugObjectSynthesizer(ExecutionSession &ES, LinkGraph &G,
+ ExecutorAddr RegisterActionAddr)
+ : MachODebugObjectSynthesizerBase(G, RegisterActionAddr),
+ Builder(ES.getPageSize()) {}
+
+ using MachODebugObjectSynthesizerBase::MachODebugObjectSynthesizerBase;
+
+ Error startSynthesis() override {
+ LLVM_DEBUG({
+ dbgs() << "Creating " << SynthDebugSectionName << " for " << G.getName()
+ << "\n";
+ });
+
+ for (auto &Sec : G.sections()) {
+ if (Sec.blocks().empty())
+ continue;
+
+ // Skip sections whose name's don't fit the MachO standard.
+ if (Sec.getName().empty() || Sec.getName().size() > 33 ||
+ Sec.getName().find(',') > 16)
+ continue;
+
+ if (isDebugSection(Sec))
+ DebugSections.push_back({&Sec, nullptr});
+ else if (Sec.getMemLifetime() != MemLifetime::NoAlloc)
+ NonDebugSections.push_back({&Sec, nullptr});
+ }
+
+ // Bail out early if no debug sections.
+ if (DebugSections.empty())
+ return Error::success();
+
+ // Write MachO header and debug section load commands.
+ Builder.Header.filetype = MachO::MH_OBJECT;
+ switch (G.getTargetTriple().getArch()) {
+ case Triple::x86_64:
+ Builder.Header.cputype = MachO::CPU_TYPE_X86_64;
+ Builder.Header.cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL;
+ break;
+ case Triple::aarch64:
+ Builder.Header.cputype = MachO::CPU_TYPE_ARM64;
+ Builder.Header.cpusubtype = MachO::CPU_SUBTYPE_ARM64_ALL;
+ break;
+ default:
+ llvm_unreachable("Unsupported architecture");
+ }
+
+ Seg = &Builder.addSegment("");
+
+ StringMap<std::unique_ptr<MemoryBuffer>> DebugSectionMap;
+ StringRef DebugLineSectionData;
+ for (auto &DSec : DebugSections) {
+ auto [SegName, SecName] = DSec.GraphSec->getName().split(',');
+ DSec.BuilderSec = &Seg->addSection(SecName, SegName);
+
+ SectionRange SR(*DSec.GraphSec);
+ DSec.BuilderSec->Content.Size = SR.getSize();
+ if (!SR.empty()) {
+ DSec.BuilderSec->align = Log2_64(SR.getFirstBlock()->getAlignment());
+ StringRef SectionData(SR.getFirstBlock()->getContent().data(),
+ SR.getFirstBlock()->getSize());
+ DebugSectionMap[SecName] =
+ MemoryBuffer::getMemBuffer(SectionData, G.getName(), false);
+ if (SecName == "__debug_line")
+ DebugLineSectionData = SectionData;
+ }
+ }
+
+ std::optional<StringRef> FileName;
+ if (!DebugLineSectionData.empty()) {
+ assert((G.getEndianness() == llvm::endianness::big ||
+ G.getEndianness() == llvm::endianness::little) &&
+ "G.getEndianness() must be either big or little");
+ auto DWARFCtx =
+ DWARFContext::create(DebugSectionMap, G.getPointerSize(),
+ G.getEndianness() == llvm::endianness::little);
+ DWARFDataExtractor DebugLineData(
+ DebugLineSectionData, G.getEndianness() == llvm::endianness::little,
+ G.getPointerSize());
+ uint64_t Offset = 0;
+ DWARFDebugLine::LineTable LineTable;
+
+ // Try to parse line data. Consume error on failure.
+ if (auto Err = LineTable.parse(DebugLineData, &Offset, *DWARFCtx, nullptr,
+ consumeError)) {
+ handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
+ LLVM_DEBUG({
+ dbgs() << "Cannot parse line table for \"" << G.getName() << "\": ";
+ EIB.log(dbgs());
+ dbgs() << "\n";
+ });
+ });
+ } else {
+ if (!LineTable.Prologue.FileNames.empty())
+ FileName = *dwarf::toString(LineTable.Prologue.FileNames[0].Name);
+ }
+ }
+
+ // If no line table (or unable to use) then use graph name.
+ // FIXME: There are probably other debug sections we should look in first.
+ if (!FileName)
+ FileName = StringRef(G.getName());
+
+ Builder.addSymbol("", MachO::N_SO, 0, 0, 0);
+ Builder.addSymbol(*FileName, MachO::N_SO, 0, 0, 0);
+ auto TimeStamp = std::chrono::duration_cast<std::chrono::seconds>(
+ std::chrono::system_clock::now().time_since_epoch())
+ .count();
+ Builder.addSymbol("", MachO::N_OSO, 3, 1, TimeStamp);
+
+ for (auto &NDSP : NonDebugSections) {
+ auto [SegName, SecName] = NDSP.GraphSec->getName().split(',');
+ NDSP.BuilderSec = &Seg->addSection(SecName, SegName);
+ SectionRange SR(*NDSP.GraphSec);
+ if (!SR.empty())
+ NDSP.BuilderSec->align = Log2_64(SR.getFirstBlock()->getAlignment());
+
+ // Add stabs.
+ for (auto *Sym : NDSP.GraphSec->symbols()) {
+ // Skip anonymous symbols.
+ if (!Sym->hasName())
+ continue;
+
+ uint8_t SymType = Sym->isCallable() ? MachO::N_FUN : MachO::N_GSYM;
+
+ Builder.addSymbol("", MachO::N_BNSYM, 1, 0, 0);
+ StabSymbols.push_back(
+ {*Sym, Builder.addSymbol(Sym->getName(), SymType, 1, 0, 0),
+ Builder.addSymbol(Sym->getName(), SymType, 0, 0, 0)});
+ Builder.addSymbol("", MachO::N_ENSYM, 1, 0, 0);
+ }
+ }
+
+ Builder.addSymbol("", MachO::N_SO, 1, 0, 0);
+
+ // Lay out the debug object, create a section and block for it.
+ size_t DebugObjectSize = Builder.layout();
+
+ auto &SDOSec = G.createSection(SynthDebugSectionName, MemProt::Read);
+ MachOContainerBlock = &G.createMutableContentBlock(
+ SDOSec, G.allocateBuffer(DebugObjectSize), orc::ExecutorAddr(), 8, 0);
+
+ return Error::success();
+ }
+
+ Error completeSynthesisAndRegister() override {
+ if (!MachOContainerBlock) {
+ LLVM_DEBUG({
+ dbgs() << "Not writing MachO debug object header for " << G.getName()
+ << " since createDebugSection failed\n";
+ });
+
+ return Error::success();
+ }
+ ExecutorAddr MaxAddr;
+ for (auto &NDSec : NonDebugSections) {
+ SectionRange SR(*NDSec.GraphSec);
+ NDSec.BuilderSec->addr = SR.getStart().getValue();
+ NDSec.BuilderSec->size = SR.getSize();
+ NDSec.BuilderSec->offset = SR.getStart().getValue();
+ if (SR.getEnd() > MaxAddr)
+ MaxAddr = SR.getEnd();
+ }
+
+ for (auto &DSec : DebugSections) {
+ if (DSec.GraphSec->blocks_size() != 1)
+ return make_error<StringError>(
+ "Unexpected number of blocks in debug info section",
+ inconvertibleErrorCode());
+
+ if (ExecutorAddr(DSec.BuilderSec->addr) + DSec.BuilderSec->size > MaxAddr)
+ MaxAddr = ExecutorAddr(DSec.BuilderSec->addr) + DSec.BuilderSec->size;
+
+ auto &B = **DSec.GraphSec->blocks().begin();
+ DSec.BuilderSec->Content.Data = B.getContent().data();
+ DSec.BuilderSec->Content.Size = B.getContent().size();
+ DSec.BuilderSec->flags |= MachO::S_ATTR_DEBUG;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Writing MachO debug object header for " << G.getName() << "\n";
+ });
+
+ // Update stab symbol addresses.
+ for (auto &SS : StabSymbols) {
+ SS.StartStab.nlist().n_value = SS.Sym.getAddress().getValue();
+ SS.EndStab.nlist().n_value = SS.Sym.getSize();
+ }
+
+ Builder.write(MachOContainerBlock->getAlreadyMutableContent());
+
+ static constexpr bool AutoRegisterCode = true;
+ SectionRange R(MachOContainerBlock->getSection());
+ G.allocActions().push_back(
+ {cantFail(shared::WrapperFunctionCall::Create<
+ shared::SPSArgList<shared::SPSExecutorAddrRange, bool>>(
+ RegisterActionAddr, R.getRange(), AutoRegisterCode)),
+ {}});
+
+ return Error::success();
+ }
+
+private:
+ struct SectionPair {
+ Section *GraphSec = nullptr;
+ typename MachOBuilder<MachOTraits>::Section *BuilderSec = nullptr;
+ };
+
+ struct StabSymbolsEntry {
+ using RelocTarget = typename MachOBuilder<MachOTraits>::RelocTarget;
+
+ StabSymbolsEntry(Symbol &Sym, RelocTarget StartStab, RelocTarget EndStab)
+ : Sym(Sym), StartStab(StartStab), EndStab(EndStab) {}
+
+ Symbol &Sym;
+ RelocTarget StartStab, EndStab;
+ };
+
+ using BuilderType = MachOBuilder<MachOTraits>;
+
+ Block *MachOContainerBlock = nullptr;
+ MachOBuilder<MachOTraits> Builder;
+ typename MachOBuilder<MachOTraits>::Segment *Seg = nullptr;
+ std::vector<StabSymbolsEntry> StabSymbols;
+ SmallVector<SectionPair, 16> DebugSections;
+ SmallVector<SectionPair, 16> NonDebugSections;
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+namespace orc {
+
+Expected<std::unique_ptr<GDBJITDebugInfoRegistrationPlugin>>
+GDBJITDebugInfoRegistrationPlugin::Create(ExecutionSession &ES,
+ JITDylib &ProcessJD,
+ const Triple &TT) {
+ auto RegisterActionAddr =
+ TT.isOSBinFormatMachO()
+ ? ES.intern("_llvm_orc_registerJITLoaderGDBAllocAction")
+ : ES.intern("llvm_orc_registerJITLoaderGDBAllocAction");
+
+ if (auto RegisterSym = ES.lookup({&ProcessJD}, RegisterActionAddr))
+ return std::make_unique<GDBJITDebugInfoRegistrationPlugin>(
+ RegisterSym->getAddress());
+ else
+ return RegisterSym.takeError();
+}
+
+Error GDBJITDebugInfoRegistrationPlugin::notifyFailed(
+ MaterializationResponsibility &MR) {
+ return Error::success();
+}
+
+Error GDBJITDebugInfoRegistrationPlugin::notifyRemovingResources(
+ JITDylib &JD, ResourceKey K) {
+ return Error::success();
+}
+
+void GDBJITDebugInfoRegistrationPlugin::notifyTransferringResources(
+ JITDylib &JD, ResourceKey DstKey, ResourceKey SrcKey) {}
+
+void GDBJITDebugInfoRegistrationPlugin::modifyPassConfig(
+ MaterializationResponsibility &MR, LinkGraph &LG,
+ PassConfiguration &PassConfig) {
+
+ if (LG.getTargetTriple().getObjectFormat() == Triple::MachO)
+ modifyPassConfigForMachO(MR, LG, PassConfig);
+ else {
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin skipping unspported graph "
+ << LG.getName() << "(triple = " << LG.getTargetTriple().str()
+ << "\n";
+ });
+ }
+}
+
+void GDBJITDebugInfoRegistrationPlugin::modifyPassConfigForMachO(
+ MaterializationResponsibility &MR, jitlink::LinkGraph &LG,
+ jitlink::PassConfiguration &PassConfig) {
+
+ switch (LG.getTargetTriple().getArch()) {
+ case Triple::x86_64:
+ case Triple::aarch64:
+ // Supported, continue.
+ assert(LG.getPointerSize() == 8 && "Graph has incorrect pointer size");
+ assert(LG.getEndianness() == llvm::endianness::little &&
+ "Graph has incorrect endianness");
+ break;
+ default:
+ // Unsupported.
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin skipping unsupported "
+ << "MachO graph " << LG.getName()
+ << "(triple = " << LG.getTargetTriple().str()
+ << ", pointer size = " << LG.getPointerSize() << ", endianness = "
+ << (LG.getEndianness() == llvm::endianness::big ? "big" : "little")
+ << ")\n";
+ });
+ return;
+ }
+
+ // Scan for debug sections. If we find one then install passes.
+ bool HasDebugSections = false;
+ for (auto &Sec : LG.sections())
+ if (MachODebugObjectSynthesizerBase::isDebugSection(Sec)) {
+ HasDebugSections = true;
+ break;
+ }
+
+ if (HasDebugSections) {
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin: Graph " << LG.getName()
+ << " contains debug info. Installing debugger support passes.\n";
+ });
+
+ auto MDOS = std::make_shared<MachODebugObjectSynthesizer<MachO64LE>>(
+ MR.getTargetJITDylib().getExecutionSession(), LG, RegisterActionAddr);
+ PassConfig.PrePrunePasses.push_back(
+ [=](LinkGraph &G) { return MDOS->preserveDebugSections(); });
+ PassConfig.PostPrunePasses.push_back(
+ [=](LinkGraph &G) { return MDOS->startSynthesis(); });
+ PassConfig.PostFixupPasses.push_back(
+ [=](LinkGraph &G) { return MDOS->completeSynthesisAndRegister(); });
+ } else {
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin: Graph " << LG.getName()
+ << " contains no debug info. Skipping.\n";
+ });
+ }
+}
+
+} // namespace orc
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/LLJITUtilsCBindings.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/LLJITUtilsCBindings.cpp
new file mode 100644
index 000000000000..2df5aef733fb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/LLJITUtilsCBindings.cpp
@@ -0,0 +1,22 @@
+//===--------- LLJITUtilsCBindings.cpp - Advanced LLJIT features ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/LLJIT.h"
+#include "llvm-c/LLJITUtils.h"
+
+#include "llvm/ExecutionEngine/Orc/Debugging/DebuggerSupport.h"
+#include "llvm/ExecutionEngine/Orc/LLJIT.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJIT, LLVMOrcLLJITRef)
+
+LLVMErrorRef LLVMOrcLLJITEnableDebugSupport(LLVMOrcLLJITRef J) {
+ return wrap(llvm::orc::enableDebuggerSupport(*unwrap(J)));
+}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.cpp
new file mode 100644
index 000000000000..fffecfc97814
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.cpp
@@ -0,0 +1,303 @@
+//===----- PerfSupportPlugin.cpp --- Utils for perf support -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Handles support for registering code with perf
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.h"
+
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
+#include "llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h"
+#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::jitlink;
+
+namespace {
+
+// Creates an EH frame header prepared for a 32-bit relative relocation
+// to the start of the .eh_frame section. Absolute injects a 64-bit absolute
+// address space offset 4 bytes from the start instead of 4 bytes
+Expected<std::string> createX64EHFrameHeader(Section &EHFrame,
+ llvm::endianness endianness,
+ bool absolute) {
+ uint8_t Version = 1;
+ uint8_t EhFramePtrEnc = 0;
+ if (absolute) {
+ EhFramePtrEnc |= dwarf::DW_EH_PE_sdata8 | dwarf::DW_EH_PE_absptr;
+ } else {
+ EhFramePtrEnc |= dwarf::DW_EH_PE_sdata4 | dwarf::DW_EH_PE_datarel;
+ }
+ uint8_t FDECountEnc = dwarf::DW_EH_PE_omit;
+ uint8_t TableEnc = dwarf::DW_EH_PE_omit;
+ // X86_64_64 relocation to the start of the .eh_frame section
+ uint32_t EHFrameRelocation = 0;
+ // uint32_t FDECount = 0;
+ // Skip the FDE binary search table
+ // We'd have to reprocess the CIEs to get this information,
+ // which seems like more trouble than it's worth
+ // TODO consider implementing this.
+ // binary search table goes here
+
+ size_t HeaderSize =
+ (sizeof(Version) + sizeof(EhFramePtrEnc) + sizeof(FDECountEnc) +
+ sizeof(TableEnc) +
+ (absolute ? sizeof(uint64_t) : sizeof(EHFrameRelocation)));
+ std::string HeaderContent(HeaderSize, '\0');
+ BinaryStreamWriter Writer(
+ MutableArrayRef<uint8_t>(
+ reinterpret_cast<uint8_t *>(HeaderContent.data()), HeaderSize),
+ endianness);
+ if (auto Err = Writer.writeInteger(Version))
+ return std::move(Err);
+ if (auto Err = Writer.writeInteger(EhFramePtrEnc))
+ return std::move(Err);
+ if (auto Err = Writer.writeInteger(FDECountEnc))
+ return std::move(Err);
+ if (auto Err = Writer.writeInteger(TableEnc))
+ return std::move(Err);
+ if (absolute) {
+ uint64_t EHFrameAddr = SectionRange(EHFrame).getStart().getValue();
+ if (auto Err = Writer.writeInteger(EHFrameAddr))
+ return std::move(Err);
+ } else {
+ if (auto Err = Writer.writeInteger(EHFrameRelocation))
+ return std::move(Err);
+ }
+ return HeaderContent;
+}
+
+constexpr StringRef RegisterPerfStartSymbolName =
+ "llvm_orc_registerJITLoaderPerfStart";
+constexpr StringRef RegisterPerfEndSymbolName =
+ "llvm_orc_registerJITLoaderPerfEnd";
+constexpr StringRef RegisterPerfImplSymbolName =
+ "llvm_orc_registerJITLoaderPerfImpl";
+
+static PerfJITCodeLoadRecord
+getCodeLoadRecord(const Symbol &Sym, std::atomic<uint64_t> &CodeIndex) {
+ PerfJITCodeLoadRecord Record;
+ auto Name = Sym.getName();
+ auto Addr = Sym.getAddress();
+ auto Size = Sym.getSize();
+ Record.Prefix.Id = PerfJITRecordType::JIT_CODE_LOAD;
+ // Runtime sets PID
+ Record.Pid = 0;
+ // Runtime sets TID
+ Record.Tid = 0;
+ Record.Vma = Addr.getValue();
+ Record.CodeAddr = Addr.getValue();
+ Record.CodeSize = Size;
+ Record.CodeIndex = CodeIndex++;
+ Record.Name = Name.str();
+ // Initialize last, once all the other fields are filled
+ Record.Prefix.TotalSize =
+ (2 * sizeof(uint32_t) // id, total_size
+ + sizeof(uint64_t) // timestamp
+ + 2 * sizeof(uint32_t) // pid, tid
+ + 4 * sizeof(uint64_t) // vma, code_addr, code_size, code_index
+ + Name.size() + 1 // symbol name
+ + Record.CodeSize // code
+ );
+ return Record;
+}
+
+static std::optional<PerfJITDebugInfoRecord>
+getDebugInfoRecord(const Symbol &Sym, DWARFContext &DC) {
+ auto &Section = Sym.getBlock().getSection();
+ auto Addr = Sym.getAddress();
+ auto Size = Sym.getSize();
+ auto SAddr = object::SectionedAddress{Addr.getValue(), Section.getOrdinal()};
+ LLVM_DEBUG(dbgs() << "Getting debug info for symbol " << Sym.getName()
+ << " at address " << Addr.getValue() << " with size "
+ << Size << "\n"
+ << "Section ordinal: " << Section.getOrdinal() << "\n");
+ auto LInfo = DC.getLineInfoForAddressRange(
+ SAddr, Size, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
+ if (LInfo.empty()) {
+ // No line info available
+ LLVM_DEBUG(dbgs() << "No line info available\n");
+ return std::nullopt;
+ }
+ PerfJITDebugInfoRecord Record;
+ Record.Prefix.Id = PerfJITRecordType::JIT_CODE_DEBUG_INFO;
+ Record.CodeAddr = Addr.getValue();
+ for (const auto &Entry : LInfo) {
+ auto Addr = Entry.first;
+ // The function re-created by perf is preceded by a elf
+ // header. Need to adjust for that, otherwise the results are
+ // wrong.
+ Addr += 0x40;
+ Record.Entries.push_back({Addr, Entry.second.Line,
+ Entry.second.Discriminator,
+ Entry.second.FileName});
+ }
+ size_t EntriesBytes = (2 // record header
+ + 2 // record fields
+ ) *
+ sizeof(uint64_t);
+ for (const auto &Entry : Record.Entries) {
+ EntriesBytes +=
+ sizeof(uint64_t) + 2 * sizeof(uint32_t); // Addr, Line/Discrim
+ EntriesBytes += Entry.Name.size() + 1; // Name
+ }
+ Record.Prefix.TotalSize = EntriesBytes;
+ LLVM_DEBUG(dbgs() << "Created debug info record\n"
+ << "Total size: " << Record.Prefix.TotalSize << "\n"
+ << "Nr entries: " << Record.Entries.size() << "\n");
+ return Record;
+}
+
+static Expected<PerfJITCodeUnwindingInfoRecord>
+getUnwindingRecord(LinkGraph &G) {
+ PerfJITCodeUnwindingInfoRecord Record;
+ Record.Prefix.Id = PerfJITRecordType::JIT_CODE_UNWINDING_INFO;
+ Record.Prefix.TotalSize = 0;
+ auto Eh_frame = G.findSectionByName(".eh_frame");
+ if (!Eh_frame) {
+ LLVM_DEBUG(dbgs() << "No .eh_frame section found\n");
+ return Record;
+ }
+ if (!G.getTargetTriple().isOSBinFormatELF()) {
+ LLVM_DEBUG(dbgs() << "Not an ELF file, will not emit unwinding info\n");
+ return Record;
+ }
+ auto SR = SectionRange(*Eh_frame);
+ auto EHFrameSize = SR.getSize();
+ auto Eh_frame_hdr = G.findSectionByName(".eh_frame_hdr");
+ if (!Eh_frame_hdr) {
+ if (G.getTargetTriple().getArch() == Triple::x86_64) {
+ auto Hdr = createX64EHFrameHeader(*Eh_frame, G.getEndianness(), true);
+ if (!Hdr)
+ return Hdr.takeError();
+ Record.EHFrameHdr = std::move(*Hdr);
+ } else {
+ LLVM_DEBUG(dbgs() << "No .eh_frame_hdr section found\n");
+ return Record;
+ }
+ Record.EHFrameHdrAddr = 0;
+ Record.EHFrameHdrSize = Record.EHFrameHdr.size();
+ Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize;
+ Record.MappedSize = 0; // Because the EHFrame header was not mapped
+ } else {
+ auto SR = SectionRange(*Eh_frame_hdr);
+ Record.EHFrameHdrAddr = SR.getStart().getValue();
+ Record.EHFrameHdrSize = SR.getSize();
+ Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize;
+ Record.MappedSize = Record.UnwindDataSize;
+ }
+ Record.EHFrameAddr = SR.getStart().getValue();
+ Record.Prefix.TotalSize =
+ (2 * sizeof(uint32_t) // id, total_size
+ + sizeof(uint64_t) // timestamp
+ +
+ 3 * sizeof(uint64_t) // unwind_data_size, eh_frame_hdr_size, mapped_size
+ + Record.UnwindDataSize // eh_frame_hdr, eh_frame
+ );
+ LLVM_DEBUG(dbgs() << "Created unwind record\n"
+ << "Total size: " << Record.Prefix.TotalSize << "\n"
+ << "Unwind size: " << Record.UnwindDataSize << "\n"
+ << "EHFrame size: " << EHFrameSize << "\n"
+ << "EHFrameHdr size: " << Record.EHFrameHdrSize << "\n");
+ return Record;
+}
+
+static PerfJITRecordBatch getRecords(ExecutionSession &ES, LinkGraph &G,
+ std::atomic<uint64_t> &CodeIndex,
+ bool EmitDebugInfo, bool EmitUnwindInfo) {
+ std::unique_ptr<DWARFContext> DC;
+ StringMap<std::unique_ptr<MemoryBuffer>> DCBacking;
+ if (EmitDebugInfo) {
+ auto EDC = createDWARFContext(G);
+ if (!EDC) {
+ ES.reportError(EDC.takeError());
+ EmitDebugInfo = false;
+ } else {
+ DC = std::move(EDC->first);
+ DCBacking = std::move(EDC->second);
+ }
+ }
+ PerfJITRecordBatch Batch;
+ for (auto Sym : G.defined_symbols()) {
+ if (!Sym->hasName() || !Sym->isCallable())
+ continue;
+ if (EmitDebugInfo) {
+ auto DebugInfo = getDebugInfoRecord(*Sym, *DC);
+ if (DebugInfo)
+ Batch.DebugInfoRecords.push_back(std::move(*DebugInfo));
+ }
+ Batch.CodeLoadRecords.push_back(getCodeLoadRecord(*Sym, CodeIndex));
+ }
+ if (EmitUnwindInfo) {
+ auto UWR = getUnwindingRecord(G);
+ if (!UWR) {
+ ES.reportError(UWR.takeError());
+ } else {
+ Batch.UnwindingRecord = std::move(*UWR);
+ }
+ } else {
+ Batch.UnwindingRecord.Prefix.TotalSize = 0;
+ }
+ return Batch;
+}
+} // namespace
+
+PerfSupportPlugin::PerfSupportPlugin(ExecutorProcessControl &EPC,
+ ExecutorAddr RegisterPerfStartAddr,
+ ExecutorAddr RegisterPerfEndAddr,
+ ExecutorAddr RegisterPerfImplAddr,
+ bool EmitDebugInfo, bool EmitUnwindInfo)
+ : EPC(EPC), RegisterPerfStartAddr(RegisterPerfStartAddr),
+ RegisterPerfEndAddr(RegisterPerfEndAddr),
+ RegisterPerfImplAddr(RegisterPerfImplAddr), CodeIndex(0),
+ EmitDebugInfo(EmitDebugInfo), EmitUnwindInfo(EmitUnwindInfo) {
+ cantFail(EPC.callSPSWrapper<void()>(RegisterPerfStartAddr));
+}
+PerfSupportPlugin::~PerfSupportPlugin() {
+ cantFail(EPC.callSPSWrapper<void()>(RegisterPerfEndAddr));
+}
+
+void PerfSupportPlugin::modifyPassConfig(MaterializationResponsibility &MR,
+ LinkGraph &G,
+ PassConfiguration &Config) {
+ Config.PostFixupPasses.push_back([this](LinkGraph &G) {
+ auto Batch = getRecords(EPC.getExecutionSession(), G, CodeIndex,
+ EmitDebugInfo, EmitUnwindInfo);
+ G.allocActions().push_back(
+ {cantFail(shared::WrapperFunctionCall::Create<
+ shared::SPSArgList<shared::SPSPerfJITRecordBatch>>(
+ RegisterPerfImplAddr, Batch)),
+ {}});
+ return Error::success();
+ });
+}
+
+Expected<std::unique_ptr<PerfSupportPlugin>>
+PerfSupportPlugin::Create(ExecutorProcessControl &EPC, JITDylib &JD,
+ bool EmitDebugInfo, bool EmitUnwindInfo) {
+ if (!EPC.getTargetTriple().isOSBinFormatELF()) {
+ return make_error<StringError>(
+ "Perf support only available for ELF LinkGraphs!",
+ inconvertibleErrorCode());
+ }
+ auto &ES = EPC.getExecutionSession();
+ ExecutorAddr StartAddr, EndAddr, ImplAddr;
+ if (auto Err = lookupAndRecordAddrs(
+ ES, LookupKind::Static, makeJITDylibSearchOrder({&JD}),
+ {{ES.intern(RegisterPerfStartSymbolName), &StartAddr},
+ {ES.intern(RegisterPerfEndSymbolName), &EndAddr},
+ {ES.intern(RegisterPerfImplSymbolName), &ImplAddr}}))
+ return std::move(Err);
+ return std::make_unique<PerfSupportPlugin>(EPC, StartAddr, EndAddr, ImplAddr,
+ EmitDebugInfo, EmitUnwindInfo);
+}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
index 1bb4ecdff299..2b6c4b9e7f43 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
@@ -11,6 +11,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
#include "llvm/ExecutionEngine/JITLink/aarch64.h"
+#include "llvm/ExecutionEngine/JITLink/ppc64.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
@@ -39,21 +40,31 @@ public:
void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
unsigned PointerSize;
- support::endianness Endianness;
+ llvm::endianness Endianness;
jitlink::Edge::Kind EdgeKind;
const auto &TT = ENP.getExecutionSession().getTargetTriple();
switch (TT.getArch()) {
case Triple::x86_64:
PointerSize = 8;
- Endianness = support::endianness::little;
+ Endianness = llvm::endianness::little;
EdgeKind = jitlink::x86_64::Pointer64;
break;
case Triple::aarch64:
PointerSize = 8;
- Endianness = support::endianness::little;
+ Endianness = llvm::endianness::little;
EdgeKind = jitlink::aarch64::Pointer64;
break;
+ case Triple::ppc64:
+ PointerSize = 8;
+ Endianness = llvm::endianness::big;
+ EdgeKind = jitlink::ppc64::Pointer64;
+ break;
+ case Triple::ppc64le:
+ PointerSize = 8;
+ Endianness = llvm::endianness::little;
+ EdgeKind = jitlink::ppc64::Pointer64;
+ break;
default:
llvm_unreachable("Unrecognized architecture");
}
@@ -238,6 +249,9 @@ bool ELFNixPlatform::supportedTarget(const Triple &TT) {
switch (TT.getArch()) {
case Triple::x86_64:
case Triple::aarch64:
+ // FIXME: jitlink for ppc64 hasn't been well tested, leave it unsupported
+ // right now.
+ case Triple::ppc64le:
return true;
default:
return false;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
index 56cd982cd5e1..f15315260ab0 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
@@ -9,67 +9,40 @@
#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h"
#include "llvm/ExecutionEngine/Orc/Core.h"
-#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
using namespace llvm::orc::shared;
namespace llvm {
namespace orc {
-Expected<std::unique_ptr<EPCEHFrameRegistrar>> EPCEHFrameRegistrar::Create(
- ExecutionSession &ES,
- std::optional<ExecutorAddr> RegistrationFunctionsDylib) {
- // FIXME: Proper mangling here -- we really need to decouple linker mangling
- // from DataLayout.
-
- // Find the addresses of the registration/deregistration functions in the
- // executor process.
- auto &EPC = ES.getExecutorProcessControl();
-
- if (!RegistrationFunctionsDylib) {
- if (auto D = EPC.loadDylib(nullptr))
- RegistrationFunctionsDylib = *D;
- else
- return D.takeError();
- }
-
- std::string RegisterWrapperName, DeregisterWrapperName;
- if (EPC.getTargetTriple().isOSBinFormatMachO()) {
- RegisterWrapperName += '_';
- DeregisterWrapperName += '_';
- }
- RegisterWrapperName += "llvm_orc_registerEHFrameSectionWrapper";
- DeregisterWrapperName += "llvm_orc_deregisterEHFrameSectionWrapper";
-
- SymbolLookupSet RegistrationSymbols;
- RegistrationSymbols.add(EPC.intern(RegisterWrapperName));
- RegistrationSymbols.add(EPC.intern(DeregisterWrapperName));
-
- auto Result =
- EPC.lookupSymbols({{*RegistrationFunctionsDylib, RegistrationSymbols}});
- if (!Result)
- return Result.takeError();
-
- assert(Result->size() == 1 && "Unexpected number of dylibs in result");
- assert((*Result)[0].size() == 2 &&
- "Unexpected number of addresses in result");
-
- auto RegisterEHFrameWrapperFnAddr = (*Result)[0][0];
- auto DeregisterEHFrameWrapperFnAddr = (*Result)[0][1];
-
- return std::make_unique<EPCEHFrameRegistrar>(ES, RegisterEHFrameWrapperFnAddr,
- DeregisterEHFrameWrapperFnAddr);
+Expected<std::unique_ptr<EPCEHFrameRegistrar>>
+EPCEHFrameRegistrar::Create(ExecutionSession &ES) {
+
+ // Lookup addresseses of the registration/deregistration functions in the
+ // bootstrap map.
+ ExecutorAddr RegisterEHFrameSectionWrapper;
+ ExecutorAddr DeregisterEHFrameSectionWrapper;
+ if (auto Err = ES.getExecutorProcessControl().getBootstrapSymbols(
+ {{RegisterEHFrameSectionWrapper,
+ rt::RegisterEHFrameSectionWrapperName},
+ {DeregisterEHFrameSectionWrapper,
+ rt::DeregisterEHFrameSectionWrapperName}}))
+ return std::move(Err);
+
+ return std::make_unique<EPCEHFrameRegistrar>(
+ ES, RegisterEHFrameSectionWrapper, DeregisterEHFrameSectionWrapper);
}
Error EPCEHFrameRegistrar::registerEHFrames(ExecutorAddrRange EHFrameSection) {
return ES.callSPSWrapper<void(SPSExecutorAddrRange)>(
- RegisterEHFrameWrapperFnAddr, EHFrameSection);
+ RegisterEHFrameSectionWrapper, EHFrameSection);
}
Error EPCEHFrameRegistrar::deregisterEHFrames(
ExecutorAddrRange EHFrameSection) {
return ES.callSPSWrapper<void(SPSExecutorAddrRange)>(
- DeregisterEHFrameWrapperFnAddr, EHFrameSection);
+ DeregisterEHFrameSectionWrapper, EHFrameSection);
}
} // end namespace orc
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index fb685e6c3727..8d5608cc4d4c 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -103,8 +103,8 @@ bool StaticInitGVIterator::isStaticInitGlobal(GlobalValue &GV) {
// FIXME: These section checks are too strict: We should match first and
// second word split by comma.
if (GV.hasSection() &&
- (GV.getSection().startswith("__DATA,__objc_classlist") ||
- GV.getSection().startswith("__DATA,__objc_selrefs")))
+ (GV.getSection().starts_with("__DATA,__objc_classlist") ||
+ GV.getSection().starts_with("__DATA,__objc_selrefs")))
return true;
}
@@ -284,7 +284,7 @@ StaticLibraryDefinitionGenerator::Load(
// If this is a universal binary then search for a slice matching the given
// Triple.
- if (auto *UB = cast<object::MachOUniversalBinary>(B->getBinary())) {
+ if (auto *UB = dyn_cast<object::MachOUniversalBinary>(B->getBinary())) {
const auto &TT = L.getExecutionSession().getTargetTriple();
@@ -347,7 +347,7 @@ StaticLibraryDefinitionGenerator::Create(
// If this is a universal binary then search for a slice matching the given
// Triple.
- if (auto *UB = cast<object::MachOUniversalBinary>(B->get())) {
+ if (auto *UB = dyn_cast<object::MachOUniversalBinary>(B->get())) {
const auto &TT = L.getExecutionSession().getTargetTriple();
@@ -503,7 +503,7 @@ Error DLLImportDefinitionGenerator::tryToGenerate(
DenseMap<StringRef, SymbolLookupFlags> ToLookUpSymbols;
for (auto &KV : Symbols) {
StringRef Deinterned = *KV.first;
- if (Deinterned.startswith(getImpPrefix()))
+ if (Deinterned.starts_with(getImpPrefix()))
Deinterned = Deinterned.drop_front(StringRef(getImpPrefix()).size());
// Don't degrade the required state
if (ToLookUpSymbols.count(Deinterned) &&
@@ -538,11 +538,11 @@ DLLImportDefinitionGenerator::getTargetPointerSize(const Triple &TT) {
}
}
-Expected<support::endianness>
+Expected<llvm::endianness>
DLLImportDefinitionGenerator::getTargetEndianness(const Triple &TT) {
switch (TT.getArch()) {
case Triple::x86_64:
- return support::endianness::little;
+ return llvm::endianness::little;
default:
return make_error<StringError>(
"architecture unsupported by DLLImportDefinitionGenerator",
@@ -553,7 +553,7 @@ DLLImportDefinitionGenerator::getTargetEndianness(const Triple &TT) {
Expected<std::unique_ptr<jitlink::LinkGraph>>
DLLImportDefinitionGenerator::createStubsGraph(const SymbolMap &Resolved) {
Triple TT = ES.getTargetTriple();
- auto PointerSize = getTargetEndianness(TT);
+ auto PointerSize = getTargetPointerSize(TT);
if (!PointerSize)
return PointerSize.takeError();
auto Endianness = getTargetEndianness(TT);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
index b8b013f8a7a9..ad27deff38d9 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
@@ -9,6 +9,8 @@
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Process.h"
@@ -27,7 +29,8 @@ SelfExecutorProcessControl::SelfExecutorProcessControl(
std::shared_ptr<SymbolStringPool> SSP, std::unique_ptr<TaskDispatcher> D,
Triple TargetTriple, unsigned PageSize,
std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr)
- : ExecutorProcessControl(std::move(SSP), std::move(D)) {
+ : ExecutorProcessControl(std::move(SSP), std::move(D)),
+ InProcessMemoryAccess(TargetTriple.isArch64Bit()) {
OwnedMemMgr = std::move(MemMgr);
if (!OwnedMemMgr)
@@ -42,6 +45,11 @@ SelfExecutorProcessControl::SelfExecutorProcessControl(
ExecutorAddr::fromPtr(this)};
if (this->TargetTriple.isOSBinFormatMachO())
GlobalManglingPrefix = '_';
+
+ this->BootstrapSymbols[rt::RegisterEHFrameSectionWrapperName] =
+ ExecutorAddr::fromPtr(&llvm_orc_registerEHFrameSectionWrapper);
+ this->BootstrapSymbols[rt::DeregisterEHFrameSectionWrapperName] =
+ ExecutorAddr::fromPtr(&llvm_orc_deregisterEHFrameSectionWrapper);
}
Expected<std::unique_ptr<SelfExecutorProcessControl>>
@@ -139,41 +147,54 @@ Error SelfExecutorProcessControl::disconnect() {
return Error::success();
}
-void SelfExecutorProcessControl::writeUInt8sAsync(
- ArrayRef<tpctypes::UInt8Write> Ws, WriteResultFn OnWriteComplete) {
+void InProcessMemoryAccess::writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
*W.Addr.toPtr<uint8_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeUInt16sAsync(
+void InProcessMemoryAccess::writeUInt16sAsync(
ArrayRef<tpctypes::UInt16Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
*W.Addr.toPtr<uint16_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeUInt32sAsync(
+void InProcessMemoryAccess::writeUInt32sAsync(
ArrayRef<tpctypes::UInt32Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
*W.Addr.toPtr<uint32_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeUInt64sAsync(
+void InProcessMemoryAccess::writeUInt64sAsync(
ArrayRef<tpctypes::UInt64Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
*W.Addr.toPtr<uint64_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeBuffersAsync(
+void InProcessMemoryAccess::writeBuffersAsync(
ArrayRef<tpctypes::BufferWrite> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
memcpy(W.Addr.toPtr<char *>(), W.Buffer.data(), W.Buffer.size());
OnWriteComplete(Error::success());
}
+void InProcessMemoryAccess::writePointersAsync(
+ ArrayRef<tpctypes::PointerWrite> Ws, WriteResultFn OnWriteComplete) {
+ if (IsArch64Bit) {
+ for (auto &W : Ws)
+ *W.Addr.toPtr<uint64_t *>() = W.Value.getValue();
+ } else {
+ for (auto &W : Ws)
+ *W.Addr.toPtr<uint32_t *>() = static_cast<uint32_t>(W.Value.getValue());
+ }
+
+ OnWriteComplete(Error::success());
+}
+
shared::CWrapperFunctionResult
SelfExecutorProcessControl::jitDispatchViaWrapperFunctionManager(
void *Ctx, const void *FnTag, const char *Data, size_t Size) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index a0d81cdf2086..f9efff148df9 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -244,8 +244,7 @@ Constant* createIRTypedAddress(FunctionType &FT, ExecutorAddr Addr) {
Constant *AddrIntVal =
ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr.getValue());
Constant *AddrPtrVal =
- ConstantExpr::getCast(Instruction::IntToPtr, AddrIntVal,
- PointerType::get(&FT, 0));
+ ConstantExpr::getIntToPtr(AddrIntVal, PointerType::get(&FT, 0));
return AddrPtrVal;
}
@@ -286,7 +285,7 @@ std::vector<GlobalValue *> SymbolLinkagePromoter::operator()(Module &M) {
// Rename if necessary.
if (!GV.hasName())
GV.setName("__orc_anon." + Twine(NextId++));
- else if (GV.getName().startswith("\01L"))
+ else if (GV.getName().starts_with("\01L"))
GV.setName("__" + GV.getName().substr(1) + "." + Twine(NextId++));
else if (GV.hasLocalLinkage())
GV.setName("__orc_lcl." + GV.getName() + "." + Twine(NextId++));
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index b66f52f1ec5d..17a96dee1000 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -126,16 +126,16 @@ void JITTargetMachineBuilderPrinter::print(raw_ostream &OS) const {
OS << "\n"
<< Indent << " Optimization Level = ";
switch (JTMB.OptLevel) {
- case CodeGenOpt::None:
+ case CodeGenOptLevel::None:
OS << "None";
break;
- case CodeGenOpt::Less:
+ case CodeGenOptLevel::Less:
OS << "Less";
break;
- case CodeGenOpt::Default:
+ case CodeGenOptLevel::Default:
OS << "Default";
break;
- case CodeGenOpt::Aggressive:
+ case CodeGenOptLevel::Aggressive:
OS << "Aggressive";
break;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 7c7c2f000368..a19e17029810 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -10,8 +10,6 @@
#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/COFFPlatform.h"
-#include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h"
-#include "llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h"
#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h"
#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h"
@@ -29,8 +27,6 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/DynamicLibrary.h"
-#include <map>
-
#define DEBUG_TYPE "orc"
using namespace llvm;
@@ -88,65 +84,6 @@ Function *addHelperAndWrapper(Module &M, StringRef WrapperName,
return WrapperFn;
}
-class ORCPlatformSupport : public LLJIT::PlatformSupport {
-public:
- ORCPlatformSupport(orc::LLJIT &J) : J(J) {}
-
- Error initialize(orc::JITDylib &JD) override {
- using llvm::orc::shared::SPSExecutorAddr;
- using llvm::orc::shared::SPSString;
- using SPSDLOpenSig = SPSExecutorAddr(SPSString, int32_t);
- enum dlopen_mode : int32_t {
- ORC_RT_RTLD_LAZY = 0x1,
- ORC_RT_RTLD_NOW = 0x2,
- ORC_RT_RTLD_LOCAL = 0x4,
- ORC_RT_RTLD_GLOBAL = 0x8
- };
-
- auto &ES = J.getExecutionSession();
- auto MainSearchOrder = J.getMainJITDylib().withLinkOrderDo(
- [](const JITDylibSearchOrder &SO) { return SO; });
-
- if (auto WrapperAddr =
- ES.lookup(MainSearchOrder,
- J.mangleAndIntern("__orc_rt_jit_dlopen_wrapper"))) {
- return ES.callSPSWrapper<SPSDLOpenSig>(WrapperAddr->getAddress(),
- DSOHandles[&JD], JD.getName(),
- int32_t(ORC_RT_RTLD_LAZY));
- } else
- return WrapperAddr.takeError();
- }
-
- Error deinitialize(orc::JITDylib &JD) override {
- using llvm::orc::shared::SPSExecutorAddr;
- using SPSDLCloseSig = int32_t(SPSExecutorAddr);
-
- auto &ES = J.getExecutionSession();
- auto MainSearchOrder = J.getMainJITDylib().withLinkOrderDo(
- [](const JITDylibSearchOrder &SO) { return SO; });
-
- if (auto WrapperAddr =
- ES.lookup(MainSearchOrder,
- J.mangleAndIntern("__orc_rt_jit_dlclose_wrapper"))) {
- int32_t result;
- auto E = J.getExecutionSession().callSPSWrapper<SPSDLCloseSig>(
- WrapperAddr->getAddress(), result, DSOHandles[&JD]);
- if (E)
- return E;
- else if (result)
- return make_error<StringError>("dlclose failed",
- inconvertibleErrorCode());
- DSOHandles.erase(&JD);
- } else
- return WrapperAddr.takeError();
- return Error::success();
- }
-
-private:
- orc::LLJIT &J;
- DenseMap<orc::JITDylib *, orc::ExecutorAddr> DSOHandles;
-};
-
class GenericLLVMIRPlatformSupport;
/// orc::Platform component of Generic LLVM IR Platform support.
@@ -276,11 +213,11 @@ public:
// will trigger a lookup to materialize the module) and the InitFunctions
// map (which holds the names of the symbols to execute).
for (auto &KV : MU.getSymbols())
- if ((*KV.first).startswith(InitFunctionPrefix)) {
+ if ((*KV.first).starts_with(InitFunctionPrefix)) {
InitSymbols[&JD].add(KV.first,
SymbolLookupFlags::WeaklyReferencedSymbol);
InitFunctions[&JD].add(KV.first);
- } else if ((*KV.first).startswith(DeInitFunctionPrefix)) {
+ } else if ((*KV.first).starts_with(DeInitFunctionPrefix)) {
DeInitFunctions[&JD].add(KV.first);
}
}
@@ -660,6 +597,54 @@ public:
namespace llvm {
namespace orc {
+Error ORCPlatformSupport::initialize(orc::JITDylib &JD) {
+ using llvm::orc::shared::SPSExecutorAddr;
+ using llvm::orc::shared::SPSString;
+ using SPSDLOpenSig = SPSExecutorAddr(SPSString, int32_t);
+ enum dlopen_mode : int32_t {
+ ORC_RT_RTLD_LAZY = 0x1,
+ ORC_RT_RTLD_NOW = 0x2,
+ ORC_RT_RTLD_LOCAL = 0x4,
+ ORC_RT_RTLD_GLOBAL = 0x8
+ };
+
+ auto &ES = J.getExecutionSession();
+ auto MainSearchOrder = J.getMainJITDylib().withLinkOrderDo(
+ [](const JITDylibSearchOrder &SO) { return SO; });
+
+ if (auto WrapperAddr = ES.lookup(
+ MainSearchOrder, J.mangleAndIntern("__orc_rt_jit_dlopen_wrapper"))) {
+ return ES.callSPSWrapper<SPSDLOpenSig>(WrapperAddr->getAddress(),
+ DSOHandles[&JD], JD.getName(),
+ int32_t(ORC_RT_RTLD_LAZY));
+ } else
+ return WrapperAddr.takeError();
+}
+
+Error ORCPlatformSupport::deinitialize(orc::JITDylib &JD) {
+ using llvm::orc::shared::SPSExecutorAddr;
+ using SPSDLCloseSig = int32_t(SPSExecutorAddr);
+
+ auto &ES = J.getExecutionSession();
+ auto MainSearchOrder = J.getMainJITDylib().withLinkOrderDo(
+ [](const JITDylibSearchOrder &SO) { return SO; });
+
+ if (auto WrapperAddr = ES.lookup(
+ MainSearchOrder, J.mangleAndIntern("__orc_rt_jit_dlclose_wrapper"))) {
+ int32_t result;
+ auto E = J.getExecutionSession().callSPSWrapper<SPSDLCloseSig>(
+ WrapperAddr->getAddress(), result, DSOHandles[&JD]);
+ if (E)
+ return E;
+ else if (result)
+ return make_error<StringError>("dlclose failed",
+ inconvertibleErrorCode());
+ DSOHandles.erase(&JD);
+ } else
+ return WrapperAddr.takeError();
+ return Error::success();
+}
+
void LLJIT::PlatformSupport::setInitTransform(
LLJIT &J, IRTransformLayer::TransformFunction T) {
J.InitHelperTransformLayer->setTransform(std::move(T));
@@ -752,6 +737,12 @@ Error LLJITBuilderState::prepareForConstruction() {
case Triple::x86_64:
UseJITLink = !TT.isOSBinFormatCOFF();
break;
+ case Triple::ppc64:
+ UseJITLink = TT.isPPC64ELFv2ABI();
+ break;
+ case Triple::ppc64le:
+ UseJITLink = TT.isOSBinFormatELF();
+ break;
default:
break;
}
@@ -775,25 +766,17 @@ Error LLJITBuilderState::prepareForConstruction() {
// If we need a process JITDylib but no setup function has been given then
// create a default one.
- if (!SetupProcessSymbolsJITDylib &&
- (LinkProcessSymbolsByDefault || EnableDebuggerSupport)) {
-
- LLVM_DEBUG({
- dbgs() << "Creating default Process JD setup function (neeeded for";
- if (LinkProcessSymbolsByDefault)
- dbgs() << " <link-process-syms-by-default>";
- if (EnableDebuggerSupport)
- dbgs() << " <debugger-support>";
- dbgs() << ")\n";
- });
-
- SetupProcessSymbolsJITDylib = [this](JITDylib &JD) -> Error {
+ if (!SetupProcessSymbolsJITDylib && LinkProcessSymbolsByDefault) {
+ LLVM_DEBUG(dbgs() << "Creating default Process JD setup function\n");
+ SetupProcessSymbolsJITDylib = [this](LLJIT &J) -> Expected<JITDylibSP> {
+ auto &JD =
+ J.getExecutionSession().createBareJITDylib("<Process Symbols>");
auto G = orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
DL->getGlobalPrefix());
if (!G)
return G.takeError();
JD.addGenerator(std::move(*G));
- return Error::success();
+ return &JD;
};
}
@@ -998,50 +981,18 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
}
if (S.SetupProcessSymbolsJITDylib) {
- ProcessSymbols = &ES->createBareJITDylib("<Process Symbols>");
- if (auto Err2 = S.SetupProcessSymbolsJITDylib(*ProcessSymbols)) {
- Err = std::move(Err2);
+ if (auto ProcSymsJD = S.SetupProcessSymbolsJITDylib(*this)) {
+ ProcessSymbols = ProcSymsJD->get();
+ } else {
+ Err = ProcSymsJD.takeError();
return;
}
}
- if (S.EnableDebuggerSupport) {
- if (auto *OLL = dyn_cast<ObjectLinkingLayer>(ObjLinkingLayer.get())) {
- switch (TT.getObjectFormat()) {
- case Triple::ELF: {
- auto Registrar = createJITLoaderGDBRegistrar(*ES);
- if (!Registrar) {
- Err = Registrar.takeError();
- return;
- }
- OLL->addPlugin(std::make_unique<DebugObjectManagerPlugin>(
- *ES, std::move(*Registrar), true, true));
- break;
- }
- case Triple::MachO: {
- assert(ProcessSymbols && "ProcessSymbols JD should be available when "
- "EnableDebuggerSupport is set");
- auto DS =
- GDBJITDebugInfoRegistrationPlugin::Create(*ES, *ProcessSymbols, TT);
- if (!DS) {
- Err = DS.takeError();
- return;
- }
- OLL->addPlugin(std::move(*DS));
- break;
- }
- default:
- LLVM_DEBUG({
- dbgs() << "Cannot enable LLJIT debugger support: "
- << Triple::getObjectFormatTypeName(TT.getObjectFormat())
- << " not supported.\n";
- });
- }
- } else {
- LLVM_DEBUG({
- dbgs() << "Cannot enable LLJIT debugger support: "
- " debugger support is only available when using JITLink.\n";
- });
+ if (S.PrePlatformSetup) {
+ if (auto Err2 = S.PrePlatformSetup(*this)) {
+ Err = std::move(Err2);
+ return;
}
}
@@ -1131,7 +1082,7 @@ Expected<JITDylibSP> ExecutorNativePlatform::operator()(LLJIT &J) {
if (!ObjLinkingLayer)
return make_error<StringError>(
- "SetUpTargetPlatform requires ObjectLinkingLayer",
+ "ExecutorNativePlatform requires ObjectLinkingLayer",
inconvertibleErrorCode());
std::unique_ptr<MemoryBuffer> RuntimeArchiveBuffer;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index a3a766d602c1..9057300bf043 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -15,6 +15,7 @@
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
+#include "llvm/ExecutionEngine/Orc/MachOBuilder.h"
#include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/Debug.h"
@@ -34,6 +35,8 @@ using SPSMachOJITDylibDepInfo = SPSTuple<bool, SPSSequence<SPSExecutorAddr>>;
using SPSMachOJITDylibDepInfoMap =
SPSSequence<SPSTuple<SPSExecutorAddr, SPSMachOJITDylibDepInfo>>;
+class SPSMachOExecutorSymbolFlags;
+
template <>
class SPSSerializationTraits<SPSMachOJITDylibDepInfo,
MachOPlatform::MachOJITDylibDepInfo> {
@@ -55,23 +58,54 @@ public:
}
};
+template <>
+class SPSSerializationTraits<SPSMachOExecutorSymbolFlags,
+ MachOPlatform::MachOExecutorSymbolFlags> {
+private:
+ using UT = std::underlying_type_t<MachOPlatform::MachOExecutorSymbolFlags>;
+
+public:
+ static size_t size(const MachOPlatform::MachOExecutorSymbolFlags &SF) {
+ return sizeof(UT);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const MachOPlatform::MachOExecutorSymbolFlags &SF) {
+ return SPSArgList<UT>::serialize(OB, static_cast<UT>(SF));
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ MachOPlatform::MachOExecutorSymbolFlags &SF) {
+ UT Tmp;
+ if (!SPSArgList<UT>::deserialize(IB, Tmp))
+ return false;
+ SF = static_cast<MachOPlatform::MachOExecutorSymbolFlags>(Tmp);
+ return true;
+ }
+};
+
} // namespace shared
} // namespace orc
} // namespace llvm
namespace {
+using SPSRegisterSymbolsArgs =
+ SPSArgList<SPSExecutorAddr,
+ SPSSequence<SPSTuple<SPSExecutorAddr, SPSExecutorAddr,
+ SPSMachOExecutorSymbolFlags>>>;
+
std::unique_ptr<jitlink::LinkGraph> createPlatformGraph(MachOPlatform &MOP,
std::string Name) {
unsigned PointerSize;
- support::endianness Endianness;
+ llvm::endianness Endianness;
const auto &TT = MOP.getExecutionSession().getTargetTriple();
switch (TT.getArch()) {
case Triple::aarch64:
case Triple::x86_64:
PointerSize = 8;
- Endianness = support::endianness::little;
+ Endianness = llvm::endianness::little;
break;
default:
llvm_unreachable("Unrecognized architecture");
@@ -82,119 +116,32 @@ std::unique_ptr<jitlink::LinkGraph> createPlatformGraph(MachOPlatform &MOP,
jitlink::getGenericEdgeKindName);
}
-// Generates a MachO header.
-class MachOHeaderMaterializationUnit : public MaterializationUnit {
-public:
- MachOHeaderMaterializationUnit(MachOPlatform &MOP,
- const SymbolStringPtr &HeaderStartSymbol)
- : MaterializationUnit(createHeaderInterface(MOP, HeaderStartSymbol)),
- MOP(MOP) {}
-
- StringRef getName() const override { return "MachOHeaderMU"; }
-
- void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
- auto G = createPlatformGraph(MOP, "<MachOHeaderMU>");
- addMachOHeader(*G, MOP, R->getInitializerSymbol());
- MOP.getObjectLinkingLayer().emit(std::move(R), std::move(G));
- }
-
- void discard(const JITDylib &JD, const SymbolStringPtr &Sym) override {}
-
- static void addMachOHeader(jitlink::LinkGraph &G, MachOPlatform &MOP,
- const SymbolStringPtr &InitializerSymbol) {
- auto &HeaderSection = G.createSection("__header", MemProt::Read);
- auto &HeaderBlock = createHeaderBlock(G, HeaderSection);
-
- // Init symbol is header-start symbol.
- G.addDefinedSymbol(HeaderBlock, 0, *InitializerSymbol,
- HeaderBlock.getSize(), jitlink::Linkage::Strong,
- jitlink::Scope::Default, false, true);
- for (auto &HS : AdditionalHeaderSymbols)
- G.addDefinedSymbol(HeaderBlock, HS.Offset, HS.Name, HeaderBlock.getSize(),
- jitlink::Linkage::Strong, jitlink::Scope::Default,
- false, true);
- }
-
-private:
- struct HeaderSymbol {
- const char *Name;
- uint64_t Offset;
- };
-
- static constexpr HeaderSymbol AdditionalHeaderSymbols[] = {
- {"___mh_executable_header", 0}};
-
- static jitlink::Block &createHeaderBlock(jitlink::LinkGraph &G,
- jitlink::Section &HeaderSection) {
- MachO::mach_header_64 Hdr;
- Hdr.magic = MachO::MH_MAGIC_64;
- switch (G.getTargetTriple().getArch()) {
- case Triple::aarch64:
- Hdr.cputype = MachO::CPU_TYPE_ARM64;
- Hdr.cpusubtype = MachO::CPU_SUBTYPE_ARM64_ALL;
- break;
- case Triple::x86_64:
- Hdr.cputype = MachO::CPU_TYPE_X86_64;
- Hdr.cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL;
- break;
- default:
- llvm_unreachable("Unrecognized architecture");
- }
- Hdr.filetype = MachO::MH_DYLIB; // Custom file type?
- Hdr.ncmds = 0;
- Hdr.sizeofcmds = 0;
- Hdr.flags = 0;
- Hdr.reserved = 0;
-
- if (G.getEndianness() != support::endian::system_endianness())
- MachO::swapStruct(Hdr);
-
- auto HeaderContent = G.allocateContent(
- ArrayRef<char>(reinterpret_cast<const char *>(&Hdr), sizeof(Hdr)));
-
- return G.createContentBlock(HeaderSection, HeaderContent, ExecutorAddr(), 8,
- 0);
- }
-
- static MaterializationUnit::Interface
- createHeaderInterface(MachOPlatform &MOP,
- const SymbolStringPtr &HeaderStartSymbol) {
- SymbolFlagsMap HeaderSymbolFlags;
-
- HeaderSymbolFlags[HeaderStartSymbol] = JITSymbolFlags::Exported;
- for (auto &HS : AdditionalHeaderSymbols)
- HeaderSymbolFlags[MOP.getExecutionSession().intern(HS.Name)] =
- JITSymbolFlags::Exported;
-
- return MaterializationUnit::Interface(std::move(HeaderSymbolFlags),
- HeaderStartSymbol);
- }
-
- MachOPlatform &MOP;
-};
-
-constexpr MachOHeaderMaterializationUnit::HeaderSymbol
- MachOHeaderMaterializationUnit::AdditionalHeaderSymbols[];
-
// Creates a Bootstrap-Complete LinkGraph to run deferred actions.
class MachOPlatformCompleteBootstrapMaterializationUnit
: public MaterializationUnit {
public:
+ using SymbolTableVector =
+ SmallVector<std::tuple<ExecutorAddr, ExecutorAddr,
+ MachOPlatform::MachOExecutorSymbolFlags>>;
+
MachOPlatformCompleteBootstrapMaterializationUnit(
MachOPlatform &MOP, StringRef PlatformJDName,
- SymbolStringPtr CompleteBootstrapSymbol, shared::AllocActions DeferredAAs,
+ SymbolStringPtr CompleteBootstrapSymbol, SymbolTableVector SymTab,
+ shared::AllocActions DeferredAAs, ExecutorAddr MachOHeaderAddr,
ExecutorAddr PlatformBootstrap, ExecutorAddr PlatformShutdown,
ExecutorAddr RegisterJITDylib, ExecutorAddr DeregisterJITDylib,
- ExecutorAddr MachOHeaderAddr)
+ ExecutorAddr RegisterObjectSymbolTable,
+ ExecutorAddr DeregisterObjectSymbolTable)
: MaterializationUnit(
{{{CompleteBootstrapSymbol, JITSymbolFlags::None}}, nullptr}),
MOP(MOP), PlatformJDName(PlatformJDName),
CompleteBootstrapSymbol(std::move(CompleteBootstrapSymbol)),
- DeferredAAs(std::move(DeferredAAs)),
- PlatformBootstrap(PlatformBootstrap),
+ SymTab(std::move(SymTab)), DeferredAAs(std::move(DeferredAAs)),
+ MachOHeaderAddr(MachOHeaderAddr), PlatformBootstrap(PlatformBootstrap),
PlatformShutdown(PlatformShutdown), RegisterJITDylib(RegisterJITDylib),
DeregisterJITDylib(DeregisterJITDylib),
- MachOHeaderAddr(MachOHeaderAddr) {}
+ RegisterObjectSymbolTable(RegisterObjectSymbolTable),
+ DeregisterObjectSymbolTable(DeregisterObjectSymbolTable) {}
StringRef getName() const override {
return "MachOPlatformCompleteBootstrap";
@@ -211,7 +158,7 @@ public:
Linkage::Strong, Scope::Hidden, false, true);
// Reserve space for the stolen actions, plus two extras.
- G->allocActions().reserve(DeferredAAs.size() + 2);
+ G->allocActions().reserve(DeferredAAs.size() + 3);
// 1. Bootstrap the platform support code.
G->allocActions().push_back(
@@ -227,7 +174,14 @@ public:
cantFail(WrapperFunctionCall::Create<SPSArgList<SPSExecutorAddr>>(
DeregisterJITDylib, MachOHeaderAddr))});
- // 3. Add the deferred actions to the graph.
+ // 3. Register deferred symbols.
+ G->allocActions().push_back(
+ {cantFail(WrapperFunctionCall::Create<SPSRegisterSymbolsArgs>(
+ RegisterObjectSymbolTable, MachOHeaderAddr, SymTab)),
+ cantFail(WrapperFunctionCall::Create<SPSRegisterSymbolsArgs>(
+ DeregisterObjectSymbolTable, MachOHeaderAddr, SymTab))});
+
+ // 4. Add the deferred actions to the graph.
std::move(DeferredAAs.begin(), DeferredAAs.end(),
std::back_inserter(G->allocActions()));
@@ -240,12 +194,15 @@ private:
MachOPlatform &MOP;
StringRef PlatformJDName;
SymbolStringPtr CompleteBootstrapSymbol;
+ SymbolTableVector SymTab;
shared::AllocActions DeferredAAs;
+ ExecutorAddr MachOHeaderAddr;
ExecutorAddr PlatformBootstrap;
ExecutorAddr PlatformShutdown;
ExecutorAddr RegisterJITDylib;
ExecutorAddr DeregisterJITDylib;
- ExecutorAddr MachOHeaderAddr;
+ ExecutorAddr RegisterObjectSymbolTable;
+ ExecutorAddr DeregisterObjectSymbolTable;
};
static StringRef ObjCRuntimeObjectSectionsData[] = {
@@ -266,6 +223,33 @@ static StringRef ObjCRuntimeObjectSectionName =
static StringRef ObjCImageInfoSymbolName =
"__llvm_jitlink_macho_objc_imageinfo";
+struct ObjCImageInfoFlags {
+ uint16_t SwiftABIVersion;
+ uint16_t SwiftVersion;
+ bool HasCategoryClassProperties;
+ bool HasSignedObjCClassROs;
+
+ static constexpr uint32_t SIGNED_CLASS_RO = (1 << 4);
+ static constexpr uint32_t HAS_CATEGORY_CLASS_PROPERTIES = (1 << 6);
+
+ explicit ObjCImageInfoFlags(uint32_t RawFlags) {
+ HasSignedObjCClassROs = RawFlags & SIGNED_CLASS_RO;
+ HasCategoryClassProperties = RawFlags & HAS_CATEGORY_CLASS_PROPERTIES;
+ SwiftABIVersion = (RawFlags >> 8) & 0xFF;
+ SwiftVersion = (RawFlags >> 16) & 0xFFFF;
+ }
+
+ uint32_t rawFlags() const {
+ uint32_t Result = 0;
+ if (HasCategoryClassProperties)
+ Result |= HAS_CATEGORY_CLASS_PROPERTIES;
+ if (HasSignedObjCClassROs)
+ Result |= SIGNED_CLASS_RO;
+ Result |= (SwiftABIVersion << 8);
+ Result |= (SwiftVersion << 16);
+ return Result;
+ }
+};
} // end anonymous namespace
namespace llvm {
@@ -275,6 +259,7 @@ Expected<std::unique_ptr<MachOPlatform>>
MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
JITDylib &PlatformJD,
std::unique_ptr<DefinitionGenerator> OrcRuntime,
+ MachOHeaderMUBuilder BuildMachOHeaderMU,
std::optional<SymbolAliasMap> RuntimeAliases) {
// If the target is not supported then bail out immediately.
@@ -305,8 +290,9 @@ MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
// Create the instance.
Error Err = Error::success();
- auto P = std::unique_ptr<MachOPlatform>(new MachOPlatform(
- ES, ObjLinkingLayer, PlatformJD, std::move(OrcRuntime), Err));
+ auto P = std::unique_ptr<MachOPlatform>(
+ new MachOPlatform(ES, ObjLinkingLayer, PlatformJD, std::move(OrcRuntime),
+ std::move(BuildMachOHeaderMU), Err));
if (Err)
return std::move(Err);
return std::move(P);
@@ -315,6 +301,7 @@ MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
Expected<std::unique_ptr<MachOPlatform>>
MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
JITDylib &PlatformJD, const char *OrcRuntimePath,
+ MachOHeaderMUBuilder BuildMachOHeaderMU,
std::optional<SymbolAliasMap> RuntimeAliases) {
// Create a generator for the ORC runtime archive.
@@ -325,12 +312,11 @@ MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
return Create(ES, ObjLinkingLayer, PlatformJD,
std::move(*OrcRuntimeArchiveGenerator),
- std::move(RuntimeAliases));
+ std::move(BuildMachOHeaderMU), std::move(RuntimeAliases));
}
Error MachOPlatform::setupJITDylib(JITDylib &JD) {
- if (auto Err = JD.define(std::make_unique<MachOHeaderMaterializationUnit>(
- *this, MachOHeaderStartSymbol)))
+ if (auto Err = JD.define(BuildMachOHeaderMU(*this)))
return Err;
return ES.lookup({&JD}, MachOHeaderStartSymbol).takeError();
@@ -419,11 +405,36 @@ bool MachOPlatform::supportedTarget(const Triple &TT) {
}
}
+jitlink::Edge::Kind MachOPlatform::getPointerEdgeKind(jitlink::LinkGraph &G) {
+ switch (G.getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ return jitlink::aarch64::Pointer64;
+ case Triple::x86_64:
+ return jitlink::x86_64::Pointer64;
+ default:
+ llvm_unreachable("Unsupported architecture");
+ }
+}
+
+MachOPlatform::MachOExecutorSymbolFlags
+MachOPlatform::flagsForSymbol(jitlink::Symbol &Sym) {
+ MachOPlatform::MachOExecutorSymbolFlags Flags{};
+ if (Sym.getLinkage() == jitlink::Linkage::Weak)
+ Flags |= MachOExecutorSymbolFlags::Weak;
+
+ if (Sym.isCallable())
+ Flags |= MachOExecutorSymbolFlags::Callable;
+
+ return Flags;
+}
+
MachOPlatform::MachOPlatform(
ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
JITDylib &PlatformJD,
- std::unique_ptr<DefinitionGenerator> OrcRuntimeGenerator, Error &Err)
- : ES(ES), PlatformJD(PlatformJD), ObjLinkingLayer(ObjLinkingLayer) {
+ std::unique_ptr<DefinitionGenerator> OrcRuntimeGenerator,
+ MachOHeaderMUBuilder BuildMachOHeaderMU, Error &Err)
+ : ES(ES), PlatformJD(PlatformJD), ObjLinkingLayer(ObjLinkingLayer),
+ BuildMachOHeaderMU(std::move(BuildMachOHeaderMU)) {
ErrorAsOutParameter _(&Err);
ObjLinkingLayer.addPlugin(std::make_unique<MachOPlatformPlugin>(*this));
PlatformJD.addGenerator(std::move(OrcRuntimeGenerator));
@@ -442,11 +453,11 @@ MachOPlatform::MachOPlatform(
// itself (to build the allocation actions that will call the registration
// functions). Further complicating the situation (a) the graph containing
// the registration functions is allowed to depend on other graphs (e.g. the
- // graph containing the ORC runtime RTTI support) so we need to handle with
- // an unknown set of dependencies during bootstrap, and (b) these graphs may
+ // graph containing the ORC runtime RTTI support) so we need to handle an
+ // unknown set of dependencies during bootstrap, and (b) these graphs may
// be linked concurrently if the user has installed a concurrent dispatcher.
//
- // We satisfy these constraint by implementing a bootstrap phase during which
+ // We satisfy these constraints by implementing a bootstrap phase during which
// allocation actions generated by MachOPlatform are appended to a list of
// deferred allocation actions, rather than to the graphs themselves. At the
// end of the bootstrap process the deferred actions are attached to a final
@@ -486,8 +497,7 @@ MachOPlatform::MachOPlatform(
// the support methods callable. The bootstrap is now complete.
// Step (1) Add header materialization unit and request.
- if ((Err = PlatformJD.define(std::make_unique<MachOHeaderMaterializationUnit>(
- *this, MachOHeaderStartSymbol))))
+ if ((Err = PlatformJD.define(this->BuildMachOHeaderMU(*this))))
return;
if ((Err = ES.lookup(&PlatformJD, MachOHeaderStartSymbol).takeError()))
return;
@@ -498,6 +508,8 @@ MachOPlatform::MachOPlatform(
SymbolLookupSet(
{PlatformBootstrap.Name, PlatformShutdown.Name,
RegisterJITDylib.Name, DeregisterJITDylib.Name,
+ RegisterObjectSymbolTable.Name,
+ DeregisterObjectSymbolTable.Name,
RegisterObjectPlatformSections.Name,
DeregisterObjectPlatformSections.Name,
CreatePThreadKey.Name}))
@@ -516,9 +528,11 @@ MachOPlatform::MachOPlatform(
if ((Err = PlatformJD.define(
std::make_unique<MachOPlatformCompleteBootstrapMaterializationUnit>(
*this, PlatformJD.getName(), BootstrapCompleteSymbol,
- std::move(BI.DeferredAAs), PlatformBootstrap.Addr,
+ std::move(BI.SymTab), std::move(BI.DeferredAAs),
+ BI.MachOHeaderAddr, PlatformBootstrap.Addr,
PlatformShutdown.Addr, RegisterJITDylib.Addr,
- DeregisterJITDylib.Addr, BI.MachOHeaderAddr))))
+ DeregisterJITDylib.Addr, RegisterObjectSymbolTable.Addr,
+ DeregisterObjectSymbolTable.Addr))))
return;
if ((Err = ES.lookup(makeJITDylibSearchOrder(
&PlatformJD, JITDylibLookupFlags::MatchAllSymbols),
@@ -540,11 +554,11 @@ Error MachOPlatform::associateRuntimeSupportFunctions() {
ES.wrapAsyncWithSPS<PushInitializersSPSSig>(
this, &MachOPlatform::rt_pushInitializers);
- using LookupSymbolSPSSig =
- SPSExpected<SPSExecutorAddr>(SPSExecutorAddr, SPSString);
- WFs[ES.intern("___orc_rt_macho_symbol_lookup_tag")] =
- ES.wrapAsyncWithSPS<LookupSymbolSPSSig>(this,
- &MachOPlatform::rt_lookupSymbol);
+ using PushSymbolsSPSSig =
+ SPSError(SPSExecutorAddr, SPSSequence<SPSTuple<SPSString, bool>>);
+ WFs[ES.intern("___orc_rt_macho_push_symbols_tag")] =
+ ES.wrapAsyncWithSPS<PushSymbolsSPSSig>(this,
+ &MachOPlatform::rt_pushSymbols);
return ES.registerJITDispatchHandlers(PlatformJD, std::move(WFs));
}
@@ -665,11 +679,9 @@ void MachOPlatform::rt_pushInitializers(PushInitializersSendResultFn SendResult,
pushInitializersLoop(std::move(SendResult), JD);
}
-void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
- ExecutorAddr Handle, StringRef SymbolName) {
- LLVM_DEBUG({
- dbgs() << "MachOPlatform::rt_lookupSymbol(\"" << Handle << "\")\n";
- });
+void MachOPlatform::rt_pushSymbols(
+ PushSymbolsInSendResultFn SendResult, ExecutorAddr Handle,
+ const std::vector<std::pair<StringRef, bool>> &SymbolNames) {
JITDylib *JD = nullptr;
@@ -679,39 +691,37 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
if (I != HeaderAddrToJITDylib.end())
JD = I->second;
}
+ LLVM_DEBUG({
+ dbgs() << "MachOPlatform::rt_pushSymbols(";
+ if (JD)
+ dbgs() << "\"" << JD->getName() << "\", [ ";
+ else
+ dbgs() << "<invalid handle " << Handle << ">, [ ";
+ for (auto &Name : SymbolNames)
+ dbgs() << "\"" << Name.first << "\" ";
+ dbgs() << "])\n";
+ });
if (!JD) {
- LLVM_DEBUG(dbgs() << " No JITDylib for handle " << Handle << "\n");
SendResult(make_error<StringError>("No JITDylib associated with handle " +
formatv("{0:x}", Handle),
inconvertibleErrorCode()));
return;
}
- // Use functor class to work around XL build compiler issue on AIX.
- class RtLookupNotifyComplete {
- public:
- RtLookupNotifyComplete(SendSymbolAddressFn &&SendResult)
- : SendResult(std::move(SendResult)) {}
- void operator()(Expected<SymbolMap> Result) {
- if (Result) {
- assert(Result->size() == 1 && "Unexpected result map count");
- SendResult(Result->begin()->second.getAddress());
- } else {
- SendResult(Result.takeError());
- }
- }
-
- private:
- SendSymbolAddressFn SendResult;
- };
+ SymbolLookupSet LS;
+ for (auto &[Name, Required] : SymbolNames)
+ LS.add(ES.intern(Name), Required
+ ? SymbolLookupFlags::RequiredSymbol
+ : SymbolLookupFlags::WeaklyReferencedSymbol);
- // FIXME: Proper mangling.
- auto MangledName = ("_" + SymbolName).str();
ES.lookup(
LookupKind::DLSym, {{JD, JITDylibLookupFlags::MatchExportedSymbolsOnly}},
- SymbolLookupSet(ES.intern(MangledName)), SymbolState::Ready,
- RtLookupNotifyComplete(std::move(SendResult)), NoDependenciesToRegister);
+ std::move(LS), SymbolState::Ready,
+ [SendResult = std::move(SendResult)](Expected<SymbolMap> Result) mutable {
+ SendResult(Result.takeError());
+ },
+ NoDependenciesToRegister);
}
Expected<uint64_t> MachOPlatform::createPThreadKey() {
@@ -781,6 +791,18 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
return fixTLVSectionsAndEdges(G, JD);
});
+ // Add symbol table prepare and register passes: These will add strings for
+ // all symbols to the c-strings section, and build a symbol table registration
+ // call.
+ auto JITSymTabInfo = std::make_shared<JITSymTabVector>();
+ Config.PostPrunePasses.push_back([this, JITSymTabInfo](LinkGraph &G) {
+ return prepareSymbolTableRegistration(G, *JITSymTabInfo);
+ });
+ Config.PostFixupPasses.push_back([this, &MR, JITSymTabInfo,
+ InBootstrapPhase](LinkGraph &G) {
+ return addSymbolTableRegistration(G, MR, *JITSymTabInfo, InBootstrapPhase);
+ });
+
// Add a pass to register the final addresses of any special sections in the
// object with the runtime.
Config.PostAllocationPasses.push_back(
@@ -826,6 +848,9 @@ Error MachOPlatform::MachOPlatformPlugin::
{*MP.PlatformShutdown.Name, &MP.PlatformShutdown.Addr},
{*MP.RegisterJITDylib.Name, &MP.RegisterJITDylib.Addr},
{*MP.DeregisterJITDylib.Name, &MP.DeregisterJITDylib.Addr},
+ {*MP.RegisterObjectSymbolTable.Name, &MP.RegisterObjectSymbolTable.Addr},
+ {*MP.DeregisterObjectSymbolTable.Name,
+ &MP.DeregisterObjectSymbolTable.Addr},
{*MP.RegisterObjectPlatformSections.Name,
&MP.RegisterObjectPlatformSections.Addr},
{*MP.DeregisterObjectPlatformSections.Name,
@@ -1029,15 +1054,19 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
" does not match first registered version",
inconvertibleErrorCode());
if (ObjCImageInfoItr->second.Flags != Flags)
- return make_error<StringError>("ObjC flags in " + G.getName() +
- " do not match first registered flags",
- inconvertibleErrorCode());
+ if (Error E = mergeImageInfoFlags(G, MR, ObjCImageInfoItr->second, Flags))
+ return E;
// __objc_imageinfo is valid. Delete the block.
for (auto *S : ObjCImageInfo->symbols())
G.removeDefinedSymbol(*S);
G.removeBlock(ObjCImageInfoBlock);
} else {
+ LLVM_DEBUG({
+ dbgs() << "MachOPlatform: Registered __objc_imageinfo for "
+ << MR.getTargetJITDylib().getName() << " in " << G.getName()
+ << "; flags = " << formatv("{0:x4}", Flags) << "\n";
+ });
// We haven't registered an __objc_imageinfo section yet. Register and
// move on. The section should already be marked no-dead-strip.
G.addDefinedSymbol(ObjCImageInfoBlock, 0, ObjCImageInfoSymbolName,
@@ -1047,12 +1076,66 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
{{MR.getExecutionSession().intern(ObjCImageInfoSymbolName),
JITSymbolFlags()}}))
return Err;
- ObjCImageInfos[&MR.getTargetJITDylib()] = {Version, Flags};
+ ObjCImageInfos[&MR.getTargetJITDylib()] = {Version, Flags, false};
}
return Error::success();
}
+Error MachOPlatform::MachOPlatformPlugin::mergeImageInfoFlags(
+ jitlink::LinkGraph &G, MaterializationResponsibility &MR,
+ ObjCImageInfo &Info, uint32_t NewFlags) {
+ if (Info.Flags == NewFlags)
+ return Error::success();
+
+ ObjCImageInfoFlags Old(Info.Flags);
+ ObjCImageInfoFlags New(NewFlags);
+
+ // Check for incompatible flags.
+ if (Old.SwiftABIVersion && New.SwiftABIVersion &&
+ Old.SwiftABIVersion != New.SwiftABIVersion)
+ return make_error<StringError>("Swift ABI version in " + G.getName() +
+ " does not match first registered flags",
+ inconvertibleErrorCode());
+
+ if (Old.HasCategoryClassProperties != New.HasCategoryClassProperties)
+ return make_error<StringError>("ObjC category class property support in " +
+ G.getName() +
+ " does not match first registered flags",
+ inconvertibleErrorCode());
+ if (Old.HasSignedObjCClassROs != New.HasSignedObjCClassROs)
+ return make_error<StringError>("ObjC class_ro_t pointer signing in " +
+ G.getName() +
+ " does not match first registered flags",
+ inconvertibleErrorCode());
+
+ // If we cannot change the flags, ignore any remaining differences. Adding
+ // Swift or changing its version are unlikely to cause problems in practice.
+ if (Info.Finalized)
+ return Error::success();
+
+ // Use the minimum Swift version.
+ if (Old.SwiftVersion && New.SwiftVersion)
+ New.SwiftVersion = std::min(Old.SwiftVersion, New.SwiftVersion);
+ else if (Old.SwiftVersion)
+ New.SwiftVersion = Old.SwiftVersion;
+ // Add a Swift ABI version if it was pure objc before.
+ if (!New.SwiftABIVersion)
+ New.SwiftABIVersion = Old.SwiftABIVersion;
+
+ LLVM_DEBUG({
+ dbgs() << "MachOPlatform: Merging __objc_imageinfo flags for "
+ << MR.getTargetJITDylib().getName() << " (was "
+ << formatv("{0:x4}", Old.rawFlags()) << ")"
+ << " with " << G.getName() << " (" << formatv("{0:x4}", NewFlags)
+ << ")"
+ << " -> " << formatv("{0:x4}", New.rawFlags()) << "\n";
+ });
+
+ Info.Flags = New.rawFlags();
+ return Error::success();
+}
+
Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges(
jitlink::LinkGraph &G, JITDylib &JD) {
@@ -1250,15 +1333,6 @@ Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections(
UI->CompactUnwindSection);
if (!MachOPlatformSecs.empty() || UnwindInfo) {
- ExecutorAddr HeaderAddr;
- {
- std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
- auto I = MP.JITDylibToHeaderAddr.find(&JD);
- assert(I != MP.JITDylibToHeaderAddr.end() &&
- "Missing header for JITDylib");
- HeaderAddr = I->second;
- }
-
// Dump the scraped inits.
LLVM_DEBUG({
dbgs() << "MachOPlatform: Scraped " << G.getName() << " init sections:\n";
@@ -1276,6 +1350,15 @@ Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections(
? G.allocActions()
: MP.Bootstrap.load()->DeferredAAs;
+ ExecutorAddr HeaderAddr;
+ {
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ auto I = MP.JITDylibToHeaderAddr.find(&JD);
+ assert(I != MP.JITDylibToHeaderAddr.end() &&
+ "No header registered for JD");
+ assert(I->second && "Null header registered for JD");
+ HeaderAddr = I->second;
+ }
allocActions.push_back(
{cantFail(
WrapperFunctionCall::Create<SPSRegisterObjectPlatformSectionsArgs>(
@@ -1374,17 +1457,7 @@ Error MachOPlatform::MachOPlatformPlugin::populateObjCRuntimeObject(
strcpy(SD.Sec.segname, "__DATA");
SD.Sec.size = 8;
SD.AddFixups = [&](size_t RecordOffset) {
- jitlink::Edge::Kind PointerEdge = jitlink::Edge::Invalid;
- switch (G.getTargetTriple().getArch()) {
- case Triple::aarch64:
- PointerEdge = jitlink::aarch64::Pointer64;
- break;
- case Triple::x86_64:
- PointerEdge = jitlink::x86_64::Pointer64;
- break;
- default:
- llvm_unreachable("Unsupported architecture");
- }
+ auto PointerEdge = getPointerEdgeKind(G);
// Look for an existing __objc_imageinfo symbol.
jitlink::Symbol *ObjCImageInfoSym = nullptr;
@@ -1403,6 +1476,24 @@ Error MachOPlatform::MachOPlatformPlugin::populateObjCRuntimeObject(
for (auto *Sym : G.defined_symbols())
if (Sym->hasName() && Sym->getName() == ObjCImageInfoSymbolName) {
ObjCImageInfoSym = Sym;
+ std::optional<uint32_t> Flags;
+ {
+ std::lock_guard<std::mutex> Lock(PluginMutex);
+ auto It = ObjCImageInfos.find(&MR.getTargetJITDylib());
+ if (It != ObjCImageInfos.end()) {
+ It->second.Finalized = true;
+ Flags = It->second.Flags;
+ }
+ }
+
+ if (Flags) {
+ // We own the definition of __objc_image_info; write the final
+ // merged flags value.
+ auto Content = Sym->getBlock().getMutableContent(G);
+ assert(Content.size() == 8 &&
+ "__objc_image_info size should have been verified already");
+ support::endian::write32(&Content[4], *Flags, G.getEndianness());
+ }
break;
}
if (!ObjCImageInfoSym)
@@ -1460,7 +1551,7 @@ Error MachOPlatform::MachOPlatformPlugin::populateObjCRuntimeObject(
auto SecContent = SecBlock.getAlreadyMutableContent();
char *P = SecContent.data();
auto WriteMachOStruct = [&](auto S) {
- if (G.getEndianness() != support::endian::system_endianness())
+ if (G.getEndianness() != llvm::endianness::native)
MachO::swapStruct(S);
memcpy(P, &S, sizeof(S));
P += sizeof(S);
@@ -1492,5 +1583,179 @@ Error MachOPlatform::MachOPlatformPlugin::populateObjCRuntimeObject(
return Error::success();
}
+Error MachOPlatform::MachOPlatformPlugin::prepareSymbolTableRegistration(
+ jitlink::LinkGraph &G, JITSymTabVector &JITSymTabInfo) {
+
+ auto *CStringSec = G.findSectionByName(MachOCStringSectionName);
+ if (!CStringSec)
+ CStringSec = &G.createSection(MachOCStringSectionName,
+ MemProt::Read | MemProt::Exec);
+
+ // Make a map of existing strings so that we can re-use them:
+ DenseMap<StringRef, jitlink::Symbol *> ExistingStrings;
+ for (auto *Sym : CStringSec->symbols()) {
+
+ // The LinkGraph builder should have created single strings blocks, and all
+ // plugins should have maintained this invariant.
+ auto Content = Sym->getBlock().getContent();
+ ExistingStrings.insert(
+ std::make_pair(StringRef(Content.data(), Content.size()), Sym));
+ }
+
+ // Add all symbol names to the string section, and record the symbols for
+ // those names.
+ {
+ SmallVector<jitlink::Symbol *> SymsToProcess;
+ for (auto *Sym : G.defined_symbols())
+ SymsToProcess.push_back(Sym);
+
+ for (auto *Sym : SymsToProcess) {
+ if (!Sym->hasName())
+ continue;
+
+ auto I = ExistingStrings.find(Sym->getName());
+ if (I == ExistingStrings.end()) {
+ auto &NameBlock = G.createMutableContentBlock(
+ *CStringSec, G.allocateCString(Sym->getName()), orc::ExecutorAddr(),
+ 1, 0);
+ auto &SymbolNameSym = G.addAnonymousSymbol(
+ NameBlock, 0, NameBlock.getSize(), false, true);
+ JITSymTabInfo.push_back({Sym, &SymbolNameSym});
+ } else
+ JITSymTabInfo.push_back({Sym, I->second});
+ }
+ }
+
+ return Error::success();
+}
+
+Error MachOPlatform::MachOPlatformPlugin::addSymbolTableRegistration(
+ jitlink::LinkGraph &G, MaterializationResponsibility &MR,
+ JITSymTabVector &JITSymTabInfo, bool InBootstrapPhase) {
+
+ ExecutorAddr HeaderAddr;
+ {
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ auto I = MP.JITDylibToHeaderAddr.find(&MR.getTargetJITDylib());
+ assert(I != MP.JITDylibToHeaderAddr.end() && "No header registered for JD");
+ assert(I->second && "Null header registered for JD");
+ HeaderAddr = I->second;
+ }
+
+ SymbolTableVector LocalSymTab;
+ auto &SymTab = LLVM_LIKELY(!InBootstrapPhase) ? LocalSymTab
+ : MP.Bootstrap.load()->SymTab;
+ for (auto &[OriginalSymbol, NameSym] : JITSymTabInfo)
+ SymTab.push_back({NameSym->getAddress(), OriginalSymbol->getAddress(),
+ flagsForSymbol(*OriginalSymbol)});
+
+ // Bail out if we're in the bootstrap phase -- registration of thees symbols
+ // will be attached to the bootstrap graph.
+ if (LLVM_UNLIKELY(InBootstrapPhase))
+ return Error::success();
+
+ shared::AllocActions &allocActions = LLVM_LIKELY(!InBootstrapPhase)
+ ? G.allocActions()
+ : MP.Bootstrap.load()->DeferredAAs;
+ allocActions.push_back(
+ {cantFail(WrapperFunctionCall::Create<SPSRegisterSymbolsArgs>(
+ MP.RegisterObjectSymbolTable.Addr, HeaderAddr, SymTab)),
+ cantFail(WrapperFunctionCall::Create<SPSRegisterSymbolsArgs>(
+ MP.DeregisterObjectSymbolTable.Addr, HeaderAddr, SymTab))});
+
+ return Error::success();
+}
+
+template <typename MachOTraits>
+jitlink::Block &createTrivialHeaderBlock(MachOPlatform &MOP,
+ jitlink::LinkGraph &G,
+ jitlink::Section &HeaderSection) {
+ auto HdrInfo =
+ getMachOHeaderInfoFromTriple(MOP.getExecutionSession().getTargetTriple());
+ MachOBuilder<MachOTraits> B(HdrInfo.PageSize);
+
+ B.Header.filetype = MachO::MH_DYLIB;
+ B.Header.cputype = HdrInfo.CPUType;
+ B.Header.cpusubtype = HdrInfo.CPUSubType;
+
+ auto HeaderContent = G.allocateBuffer(B.layout());
+ B.write(HeaderContent);
+
+ return G.createContentBlock(HeaderSection, HeaderContent, ExecutorAddr(), 8,
+ 0);
+}
+
+SimpleMachOHeaderMU::SimpleMachOHeaderMU(MachOPlatform &MOP,
+ SymbolStringPtr HeaderStartSymbol)
+ : MaterializationUnit(
+ createHeaderInterface(MOP, std::move(HeaderStartSymbol))),
+ MOP(MOP) {}
+
+void SimpleMachOHeaderMU::materialize(
+ std::unique_ptr<MaterializationResponsibility> R) {
+ auto G = createPlatformGraph(MOP, "<MachOHeaderMU>");
+ addMachOHeader(R->getTargetJITDylib(), *G, R->getInitializerSymbol());
+ MOP.getObjectLinkingLayer().emit(std::move(R), std::move(G));
+}
+
+void SimpleMachOHeaderMU::discard(const JITDylib &JD,
+ const SymbolStringPtr &Sym) {}
+
+void SimpleMachOHeaderMU::addMachOHeader(
+ JITDylib &JD, jitlink::LinkGraph &G,
+ const SymbolStringPtr &InitializerSymbol) {
+ auto &HeaderSection = G.createSection("__header", MemProt::Read);
+ auto &HeaderBlock = createHeaderBlock(JD, G, HeaderSection);
+
+ // Init symbol is header-start symbol.
+ G.addDefinedSymbol(HeaderBlock, 0, *InitializerSymbol, HeaderBlock.getSize(),
+ jitlink::Linkage::Strong, jitlink::Scope::Default, false,
+ true);
+ for (auto &HS : AdditionalHeaderSymbols)
+ G.addDefinedSymbol(HeaderBlock, HS.Offset, HS.Name, HeaderBlock.getSize(),
+ jitlink::Linkage::Strong, jitlink::Scope::Default, false,
+ true);
+}
+
+jitlink::Block &
+SimpleMachOHeaderMU::createHeaderBlock(JITDylib &JD, jitlink::LinkGraph &G,
+ jitlink::Section &HeaderSection) {
+ switch (MOP.getExecutionSession().getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ case Triple::x86_64:
+ return createTrivialHeaderBlock<MachO64LE>(MOP, G, HeaderSection);
+ default:
+ llvm_unreachable("Unsupported architecture");
+ }
+}
+
+MaterializationUnit::Interface SimpleMachOHeaderMU::createHeaderInterface(
+ MachOPlatform &MOP, const SymbolStringPtr &HeaderStartSymbol) {
+ SymbolFlagsMap HeaderSymbolFlags;
+
+ HeaderSymbolFlags[HeaderStartSymbol] = JITSymbolFlags::Exported;
+ for (auto &HS : AdditionalHeaderSymbols)
+ HeaderSymbolFlags[MOP.getExecutionSession().intern(HS.Name)] =
+ JITSymbolFlags::Exported;
+
+ return MaterializationUnit::Interface(std::move(HeaderSymbolFlags),
+ HeaderStartSymbol);
+}
+
+MachOHeaderInfo getMachOHeaderInfoFromTriple(const Triple &TT) {
+ switch (TT.getArch()) {
+ case Triple::aarch64:
+ return {/* PageSize = */ 16 * 1024,
+ /* CPUType = */ MachO::CPU_TYPE_ARM64,
+ /* CPUSubType = */ MachO::CPU_SUBTYPE_ARM64_ALL};
+ case Triple::x86_64:
+ return {/* PageSize = */ 4 * 1024,
+ /* CPUType = */ MachO::CPU_TYPE_X86_64,
+ /* CPUSubType = */ MachO::CPU_SUBTYPE_X86_64_ALL};
+ default:
+ llvm_unreachable("Unrecognized architecture");
+ }
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index ca4950077ffe..9cfe547c84c3 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -322,8 +322,8 @@ void SharedMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
std::memset(Base + Segment.ContentSize, 0, Segment.ZeroFillSize);
tpctypes::SharedMemorySegFinalizeRequest SegReq;
- SegReq.RAG = {Segment.AG.getMemProt(), Segment.AG.getMemLifetimePolicy() ==
- MemLifetimePolicy::Finalize};
+ SegReq.RAG = {Segment.AG.getMemProt(),
+ Segment.AG.getMemLifetime() == MemLifetime::Finalize};
SegReq.Addr = AI.MappingBase + Segment.Offset;
SegReq.Size = Segment.ContentSize + Segment.ZeroFillSize;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
index 7c8fa63477d0..0286b0c93197 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
@@ -72,7 +72,7 @@ getMachOObjectFileSymbolInfo(ExecutionSession &ES,
return SymFlags.takeError();
// Strip the 'exported' flag from MachO linker-private symbols.
- if (Name->startswith("l"))
+ if (Name->starts_with("l"))
*SymFlags &= ~JITSymbolFlags::Exported;
I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
index a29f3d1c3aec..3d77f82e6569 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -46,7 +46,7 @@ ExecutorAddr getJITSymbolPtrForSymbol(Symbol &Sym, const Triple &TT) {
case Triple::armeb:
case Triple::thumb:
case Triple::thumbeb:
- if (Sym.hasTargetFlags(aarch32::ThumbSymbol)) {
+ if (hasTargetFlags(Sym, aarch32::ThumbSymbol)) {
// Set LSB to indicate thumb target
assert(Sym.isCallable() && "Only callable symbols can have thumb flag");
assert((Sym.getAddress().getValue() & 0x01) == 0 && "LSB is clear");
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index a73aec6d98c6..72314cceedf3 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -27,42 +27,6 @@ class InProgressLookupState;
class OrcV2CAPIHelper {
public:
- using PoolEntry = SymbolStringPtr::PoolEntry;
- using PoolEntryPtr = SymbolStringPtr::PoolEntryPtr;
-
- // Move from SymbolStringPtr to PoolEntryPtr (no change in ref count).
- static PoolEntryPtr moveFromSymbolStringPtr(SymbolStringPtr S) {
- PoolEntryPtr Result = nullptr;
- std::swap(Result, S.S);
- return Result;
- }
-
- // Move from a PoolEntryPtr to a SymbolStringPtr (no change in ref count).
- static SymbolStringPtr moveToSymbolStringPtr(PoolEntryPtr P) {
- SymbolStringPtr S;
- S.S = P;
- return S;
- }
-
- // Copy a pool entry to a SymbolStringPtr (increments ref count).
- static SymbolStringPtr copyToSymbolStringPtr(PoolEntryPtr P) {
- return SymbolStringPtr(P);
- }
-
- static PoolEntryPtr getRawPoolEntryPtr(const SymbolStringPtr &S) {
- return S.S;
- }
-
- static void retainPoolEntry(PoolEntryPtr P) {
- SymbolStringPtr S(P);
- S.S = nullptr;
- }
-
- static void releasePoolEntry(PoolEntryPtr P) {
- SymbolStringPtr S;
- S.S = P;
- }
-
static InProgressLookupState *extractLookupState(LookupState &LS) {
return LS.IPLS.release();
}
@@ -75,10 +39,16 @@ public:
} // namespace orc
} // namespace llvm
+inline LLVMOrcSymbolStringPoolEntryRef wrap(SymbolStringPoolEntryUnsafe E) {
+ return reinterpret_cast<LLVMOrcSymbolStringPoolEntryRef>(E.rawPtr());
+}
+
+inline SymbolStringPoolEntryUnsafe unwrap(LLVMOrcSymbolStringPoolEntryRef E) {
+ return reinterpret_cast<SymbolStringPoolEntryUnsafe::PoolEntry *>(E);
+}
+
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionSession, LLVMOrcExecutionSessionRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(SymbolStringPool, LLVMOrcSymbolStringPoolRef)
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcV2CAPIHelper::PoolEntry,
- LLVMOrcSymbolStringPoolEntryRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MaterializationUnit,
LLVMOrcMaterializationUnitRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MaterializationResponsibility,
@@ -136,7 +106,7 @@ public:
private:
void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {
- Discard(Ctx, wrap(&JD), wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(Name)));
+ Discard(Ctx, wrap(&JD), wrap(SymbolStringPoolEntryUnsafe::from(Name)));
}
std::string Name;
@@ -184,7 +154,7 @@ static SymbolMap toSymbolMap(LLVMOrcCSymbolMapPairs Syms, size_t NumPairs) {
SymbolMap SM;
for (size_t I = 0; I != NumPairs; ++I) {
JITSymbolFlags Flags = toJITSymbolFlags(Syms[I].Sym.Flags);
- SM[OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Syms[I].Name))] = {
+ SM[unwrap(Syms[I].Name).moveToSymbolStringPtr()] = {
ExecutorAddr(Syms[I].Sym.Address), Flags};
}
return SM;
@@ -199,7 +169,7 @@ toSymbolDependenceMap(LLVMOrcCDependenceMapPairs Pairs, size_t NumPairs) {
for (size_t J = 0; J != Pairs[I].Names.Length; ++J) {
auto Sym = Pairs[I].Names.Symbols[J];
- Names.insert(OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Sym)));
+ Names.insert(unwrap(Sym).moveToSymbolStringPtr());
}
SDM[JD] = Names;
}
@@ -309,7 +279,7 @@ public:
CLookupSet.reserve(LookupSet.size());
for (auto &KV : LookupSet) {
LLVMOrcSymbolStringPoolEntryRef Name =
- ::wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first));
+ ::wrap(SymbolStringPoolEntryUnsafe::from(KV.first));
LLVMOrcSymbolLookupFlags SLF = fromSymbolLookupFlags(KV.second);
CLookupSet.push_back({Name, SLF});
}
@@ -353,8 +323,7 @@ void LLVMOrcSymbolStringPoolClearDeadEntries(LLVMOrcSymbolStringPoolRef SSP) {
LLVMOrcSymbolStringPoolEntryRef
LLVMOrcExecutionSessionIntern(LLVMOrcExecutionSessionRef ES, const char *Name) {
- return wrap(
- OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(ES)->intern(Name)));
+ return wrap(SymbolStringPoolEntryUnsafe::take(unwrap(ES)->intern(Name)));
}
void LLVMOrcExecutionSessionLookup(
@@ -374,7 +343,7 @@ void LLVMOrcExecutionSessionLookup(
SymbolLookupSet SLS;
for (size_t I = 0; I != SymbolsSize; ++I)
- SLS.add(OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Symbols[I].Name)),
+ SLS.add(unwrap(Symbols[I].Name).moveToSymbolStringPtr(),
toSymbolLookupFlags(Symbols[I].LookupFlags));
unwrap(ES)->lookup(
@@ -384,7 +353,7 @@ void LLVMOrcExecutionSessionLookup(
SmallVector<LLVMOrcCSymbolMapPair> CResult;
for (auto &KV : *Result)
CResult.push_back(LLVMOrcCSymbolMapPair{
- wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first)),
+ wrap(SymbolStringPoolEntryUnsafe::from(KV.first)),
fromExecutorSymbolDef(KV.second)});
HandleResult(LLVMErrorSuccess, CResult.data(), CResult.size(), Ctx);
} else
@@ -394,15 +363,15 @@ void LLVMOrcExecutionSessionLookup(
}
void LLVMOrcRetainSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S) {
- OrcV2CAPIHelper::retainPoolEntry(unwrap(S));
+ unwrap(S).retain();
}
void LLVMOrcReleaseSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S) {
- OrcV2CAPIHelper::releasePoolEntry(unwrap(S));
+ unwrap(S).release();
}
const char *LLVMOrcSymbolStringPoolEntryStr(LLVMOrcSymbolStringPoolEntryRef S) {
- return unwrap(S)->getKey().data();
+ return unwrap(S).rawPtr()->getKey().data();
}
LLVMOrcResourceTrackerRef
@@ -452,10 +421,10 @@ LLVMOrcMaterializationUnitRef LLVMOrcCreateCustomMaterializationUnit(
LLVMOrcMaterializationUnitDestroyFunction Destroy) {
SymbolFlagsMap SFM;
for (size_t I = 0; I != NumSyms; ++I)
- SFM[OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Syms[I].Name))] =
+ SFM[unwrap(Syms[I].Name).moveToSymbolStringPtr()] =
toJITSymbolFlags(Syms[I].Flags);
- auto IS = OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(InitSym));
+ auto IS = unwrap(InitSym).moveToSymbolStringPtr();
return wrap(new OrcCAPIMaterializationUnit(
Name, std::move(SFM), std::move(IS), Ctx, Materialize, Discard, Destroy));
@@ -476,9 +445,8 @@ LLVMOrcMaterializationUnitRef LLVMOrcLazyReexports(
for (size_t I = 0; I != NumPairs; ++I) {
auto pair = CallableAliases[I];
JITSymbolFlags Flags = toJITSymbolFlags(pair.Entry.Flags);
- SymbolStringPtr Name =
- OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(pair.Entry.Name));
- SAM[OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(pair.Name))] =
+ SymbolStringPtr Name = unwrap(pair.Entry.Name).moveToSymbolStringPtr();
+ SAM[unwrap(pair.Name).moveToSymbolStringPtr()] =
SymbolAliasMapEntry(Name, Flags);
}
@@ -511,7 +479,7 @@ LLVMOrcCSymbolFlagsMapPairs LLVMOrcMaterializationResponsibilityGetSymbols(
safe_malloc(Symbols.size() * sizeof(LLVMOrcCSymbolFlagsMapPair)));
size_t I = 0;
for (auto const &pair : Symbols) {
- auto Name = wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(pair.first));
+ auto Name = wrap(SymbolStringPoolEntryUnsafe::from(pair.first));
auto Flags = pair.second;
Result[I] = {Name, fromJITSymbolFlags(Flags)};
I++;
@@ -528,7 +496,7 @@ LLVMOrcSymbolStringPoolEntryRef
LLVMOrcMaterializationResponsibilityGetInitializerSymbol(
LLVMOrcMaterializationResponsibilityRef MR) {
auto Sym = unwrap(MR)->getInitializerSymbol();
- return wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(Sym));
+ return wrap(SymbolStringPoolEntryUnsafe::from(Sym));
}
LLVMOrcSymbolStringPoolEntryRef *
@@ -541,7 +509,7 @@ LLVMOrcMaterializationResponsibilityGetRequestedSymbols(
Symbols.size() * sizeof(LLVMOrcSymbolStringPoolEntryRef)));
size_t I = 0;
for (auto &Name : Symbols) {
- Result[I] = wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(Name));
+ Result[I] = wrap(SymbolStringPoolEntryUnsafe::from(Name));
I++;
}
*NumSymbols = Symbols.size();
@@ -569,7 +537,7 @@ LLVMErrorRef LLVMOrcMaterializationResponsibilityDefineMaterializing(
LLVMOrcCSymbolFlagsMapPairs Syms, size_t NumSyms) {
SymbolFlagsMap SFM;
for (size_t I = 0; I != NumSyms; ++I)
- SFM[OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Syms[I].Name))] =
+ SFM[unwrap(Syms[I].Name).moveToSymbolStringPtr()] =
toJITSymbolFlags(Syms[I].Flags);
return wrap(unwrap(MR)->defineMaterializing(std::move(SFM)));
@@ -588,7 +556,7 @@ LLVMErrorRef LLVMOrcMaterializationResponsibilityDelegate(
LLVMOrcMaterializationResponsibilityRef *Result) {
SymbolNameSet Syms;
for (size_t I = 0; I != NumSymbols; I++) {
- Syms.insert(OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Symbols[I])));
+ Syms.insert(unwrap(Symbols[I]).moveToSymbolStringPtr());
}
auto OtherMR = unwrap(MR)->delegate(Syms);
@@ -605,7 +573,7 @@ void LLVMOrcMaterializationResponsibilityAddDependencies(
LLVMOrcCDependenceMapPairs Dependencies, size_t NumPairs) {
SymbolDependenceMap SDM = toSymbolDependenceMap(Dependencies, NumPairs);
- auto Sym = OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Name));
+ auto Sym = unwrap(Name).moveToSymbolStringPtr();
unwrap(MR)->addDependencies(Sym, SDM);
}
@@ -698,7 +666,7 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
DynamicLibrarySearchGenerator::SymbolPredicate Pred;
if (Filter)
Pred = [=](const SymbolStringPtr &Name) -> bool {
- return Filter(FilterCtx, wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(Name)));
+ return Filter(FilterCtx, wrap(SymbolStringPoolEntryUnsafe::from(Name)));
};
auto ProcessSymsGenerator =
@@ -724,7 +692,7 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForPath(
DynamicLibrarySearchGenerator::SymbolPredicate Pred;
if (Filter)
Pred = [=](const SymbolStringPtr &Name) -> bool {
- return Filter(FilterCtx, wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(Name)));
+ return Filter(FilterCtx, wrap(SymbolStringPoolEntryUnsafe::from(Name)));
};
auto LibrarySymsGenerator =
@@ -992,7 +960,7 @@ char LLVMOrcLLJITGetGlobalPrefix(LLVMOrcLLJITRef J) {
LLVMOrcSymbolStringPoolEntryRef
LLVMOrcLLJITMangleAndIntern(LLVMOrcLLJITRef J, const char *UnmangledName) {
- return wrap(OrcV2CAPIHelper::moveFromSymbolStringPtr(
+ return wrap(SymbolStringPoolEntryUnsafe::take(
unwrap(J)->mangleAndIntern(UnmangledName)));
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 9ef333222028..f9630161b95e 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -233,7 +233,7 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(&Obj)) {
auto &ES = getExecutionSession();
- // For all resolved symbols that are not already in the responsibilty set:
+ // For all resolved symbols that are not already in the responsibility set:
// check whether the symbol is in a comdat section and if so mark it as
// weak.
for (auto &Sym : COFFObj->symbols()) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp
index ecf5e2915773..a407fcab6ae3 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp
@@ -19,6 +19,7 @@ StringRef MachODataCommonSectionName = "__DATA,__common";
StringRef MachODataDataSectionName = "__DATA,__data";
StringRef MachOEHFrameSectionName = "__TEXT,__eh_frame";
StringRef MachOCompactUnwindInfoSectionName = "__TEXT,__unwind_info";
+StringRef MachOCStringSectionName = "__TEXT,__cstring";
StringRef MachOModInitFuncSectionName = "__DATA,__mod_init_func";
StringRef MachOObjCCatListSectionName = "__DATA,__objc_catlist";
StringRef MachOObjCCatList2SectionName = "__DATA,__objc_catlist2";
@@ -56,7 +57,19 @@ StringRef MachOInitSectionNames[19] = {
};
StringRef ELFEHFrameSectionName = ".eh_frame";
+
StringRef ELFInitArrayFuncSectionName = ".init_array";
+StringRef ELFInitFuncSectionName = ".init";
+StringRef ELFFiniArrayFuncSectionName = ".fini_array";
+StringRef ELFFiniFuncSectionName = ".fini";
+StringRef ELFCtorArrayFuncSectionName = ".ctors";
+StringRef ELFDtorArrayFuncSectionName = ".dtors";
+
+StringRef ELFInitSectionNames[3]{
+ ELFInitArrayFuncSectionName,
+ ELFInitFuncSectionName,
+ ELFCtorArrayFuncSectionName,
+};
StringRef ELFThreadBSSSectionName = ".tbss";
StringRef ELFThreadDataSectionName = ".tdata";
@@ -80,14 +93,16 @@ bool isMachOInitializerSection(StringRef QualifiedName) {
}
bool isELFInitializerSection(StringRef SecName) {
- if (SecName.consume_front(ELFInitArrayFuncSectionName) &&
- (SecName.empty() || SecName[0] == '.'))
- return true;
+ for (StringRef InitSection : ELFInitSectionNames) {
+ StringRef Name = SecName;
+ if (Name.consume_front(InitSection) && (Name.empty() || Name[0] == '.'))
+ return true;
+ }
return false;
}
bool isCOFFInitializerSection(StringRef SecName) {
- return SecName.startswith(".CRT");
+ return SecName.starts_with(".CRT");
}
} // namespace orc
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
index 86e31c52100e..ae39b1d1bfaa 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
@@ -51,9 +51,9 @@ const char *MemoryWriteBuffersWrapperName =
"__llvm_orc_bootstrap_mem_write_buffers_wrapper";
const char *RegisterEHFrameSectionWrapperName =
- "__llvm_orc_bootstrap_register_ehframe_section_wrapper";
+ "llvm_orc_registerEHFrameSectionWrapper";
const char *DeregisterEHFrameSectionWrapperName =
- "__llvm_orc_bootstrap_deregister_ehframe_section_wrapper";
+ "llvm_orc_deregisterEHFrameSectionWrapper";
const char *RunAsMainWrapperName = "__llvm_orc_bootstrap_run_as_main_wrapper";
const char *RunAsVoidFunctionWrapperName =
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp
index 0388725dfb63..8f42de91b5bb 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp
@@ -10,7 +10,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
@@ -227,7 +226,7 @@ void SequenceBBQuery::traverseToExitBlock(const BasicBlock *AtBB,
VisitedBlocks);
}
-// Get Block frequencies for blocks and take most frquently executed block,
+// Get Block frequencies for blocks and take most frequently executed block,
// walk towards the entry block from those blocks and discover the basic blocks
// with call.
SequenceBBQuery::BlockListTy
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
index d4cbd1970d8f..70b536d2feda 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
@@ -67,7 +67,7 @@ void IRSpeculationLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
auto SpeculatorVTy = StructType::create(MContext, "Class.Speculator");
auto RuntimeCallTy = FunctionType::get(
Type::getVoidTy(MContext),
- {SpeculatorVTy->getPointerTo(), Type::getInt64Ty(MContext)}, false);
+ {PointerType::getUnqual(MContext), Type::getInt64Ty(MContext)}, false);
auto RuntimeCall =
Function::Create(RuntimeCallTy, Function::LinkageTypes::ExternalLinkage,
"__orc_speculate_for", &M);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
index 3f70dbf60437..e8b0e240ac1f 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
@@ -194,9 +194,7 @@ Error ExecutorSharedMemoryMapperService::deinitialize(
// Remove the allocation from the allocation list of its reservation
for (auto &Reservation : Reservations) {
- auto AllocationIt =
- std::find(Reservation.second.Allocations.begin(),
- Reservation.second.Allocations.end(), Base);
+ auto AllocationIt = llvm::find(Reservation.second.Allocations, Base);
if (AllocationIt != Reservation.second.Allocations.end()) {
Reservation.second.Allocations.erase(AllocationIt);
break;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
new file mode 100644
index 000000000000..5e0623102d33
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
@@ -0,0 +1,457 @@
+//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Register objects for access by profilers via the perf JIT interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+
+#include <mutex>
+#include <optional>
+
+#ifdef __linux__
+
+#include <sys/mman.h> // mmap()
+#include <time.h> // clock_gettime(), time(), localtime_r() */
+#include <unistd.h> // for read(), close()
+
+#define DEBUG_TYPE "orc"
+
+// language identifier (XXX: should we generate something better from debug
+// info?)
+#define JIT_LANG "llvm-IR"
+#define LLVM_PERF_JIT_MAGIC \
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
+ (uint32_t)'D')
+#define LLVM_PERF_JIT_VERSION 1
+
+using namespace llvm;
+using namespace llvm::orc;
+
+struct PerfState {
+ // cache lookups
+ uint32_t Pid;
+
+ // base directory for output data
+ std::string JitPath;
+
+ // output data stream, closed via Dumpstream
+ int DumpFd = -1;
+
+ // output data stream
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
+
+ // perf mmap marker
+ void *MarkerAddr = NULL;
+};
+
+// prevent concurrent dumps from messing up the output file
+static std::mutex Mutex;
+static std::optional<PerfState> State;
+
+struct RecHeader {
+ uint32_t Id;
+ uint32_t TotalSize;
+ uint64_t Timestamp;
+};
+
+struct DIR {
+ RecHeader Prefix;
+ uint64_t CodeAddr;
+ uint64_t NrEntry;
+};
+
+struct DIE {
+ uint64_t CodeAddr;
+ uint32_t Line;
+ uint32_t Discrim;
+};
+
+struct CLR {
+ RecHeader Prefix;
+ uint32_t Pid;
+ uint32_t Tid;
+ uint64_t Vma;
+ uint64_t CodeAddr;
+ uint64_t CodeSize;
+ uint64_t CodeIndex;
+};
+
+struct UWR {
+ RecHeader Prefix;
+ uint64_t UnwindDataSize;
+ uint64_t EhFrameHeaderSize;
+ uint64_t MappedSize;
+};
+
+static inline uint64_t timespec_to_ns(const struct timespec *TS) {
+ const uint64_t NanoSecPerSec = 1000000000;
+ return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;
+}
+
+static inline uint64_t perf_get_timestamp() {
+ timespec TS;
+ if (clock_gettime(CLOCK_MONOTONIC, &TS))
+ return 0;
+
+ return timespec_to_ns(&TS);
+}
+
+static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
+ assert(State && "PerfState not initialized");
+ LLVM_DEBUG(dbgs() << "Writing debug record with "
+ << DebugRecord.Entries.size() << " entries\n");
+ [[maybe_unused]] size_t Written = 0;
+ DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
+ DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
+ DebugRecord.CodeAddr, DebugRecord.Entries.size()};
+ State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir));
+ Written += sizeof(Dir);
+ for (auto &Die : DebugRecord.Entries) {
+ DIE d{Die.Addr, Die.Lineno, Die.Discrim};
+ State->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));
+ State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1);
+ Written += sizeof(d) + Die.Name.size() + 1;
+ }
+ LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
+}
+
+static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
+ assert(State && "PerfState not initialized");
+ uint32_t Tid = get_threadid();
+ LLVM_DEBUG(dbgs() << "Writing code record with code size "
+ << CodeRecord.CodeSize << " and code index "
+ << CodeRecord.CodeIndex << "\n");
+ CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
+ CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
+ State->Pid,
+ Tid,
+ CodeRecord.Vma,
+ CodeRecord.CodeAddr,
+ CodeRecord.CodeSize,
+ CodeRecord.CodeIndex};
+ LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, "
+ << CodeRecord.Name.size() + 1 << " bytes of name, "
+ << CodeRecord.CodeSize << " bytes of code\n");
+ State->Dumpstream->write(reinterpret_cast<const char *>(&Clr), sizeof(Clr));
+ State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);
+ State->Dumpstream->write((const char *)CodeRecord.CodeAddr,
+ CodeRecord.CodeSize);
+}
+
+static void
+writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
+ assert(State && "PerfState not initialized");
+ dbgs() << "Writing unwind record with unwind data size "
+ << UnwindRecord.UnwindDataSize << " and EH frame header size "
+ << UnwindRecord.EHFrameHdrSize << " and mapped size "
+ << UnwindRecord.MappedSize << "\n";
+ UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
+ UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
+ UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
+ UnwindRecord.MappedSize};
+ LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "
+ << UnwindRecord.EHFrameHdrSize
+ << " bytes of EH frame header, "
+ << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
+ << " bytes of EH frame\n");
+ State->Dumpstream->write(reinterpret_cast<const char *>(&Uwr), sizeof(Uwr));
+ if (UnwindRecord.EHFrameHdrAddr)
+ State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,
+ UnwindRecord.EHFrameHdrSize);
+ else
+ State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),
+ UnwindRecord.EHFrameHdrSize);
+ State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,
+ UnwindRecord.UnwindDataSize -
+ UnwindRecord.EHFrameHdrSize);
+}
+
+static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
+ if (!State)
+ return make_error<StringError>("PerfState not initialized",
+ inconvertibleErrorCode());
+
+ // Serialize the batch
+ std::lock_guard<std::mutex> Lock(Mutex);
+ if (Batch.UnwindingRecord.Prefix.TotalSize > 0)
+ writeUnwindRecord(Batch.UnwindingRecord);
+
+ for (const auto &DebugInfo : Batch.DebugInfoRecords)
+ writeDebugRecord(DebugInfo);
+
+ for (const auto &CodeLoad : Batch.CodeLoadRecords)
+ writeCodeRecord(CodeLoad);
+
+ State->Dumpstream->flush();
+
+ return Error::success();
+}
+
+struct Header {
+ uint32_t Magic; // characters "JiTD"
+ uint32_t Version; // header version
+ uint32_t TotalSize; // total size of header
+ uint32_t ElfMach; // elf mach target
+ uint32_t Pad1; // reserved
+ uint32_t Pid;
+ uint64_t Timestamp; // timestamp
+ uint64_t Flags; // flags
+};
+
+static Error OpenMarker(PerfState &State) {
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
+ // is captured either live (perf record running when we mmap) or in deferred
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
+ // file for more meta data info about the jitted code. Perf report/annotate
+ // detect this special filename and process the jitdump file.
+ //
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
+ // even when not using -d option.
+ State.MarkerAddr =
+ ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, State.DumpFd, 0);
+
+ if (State.MarkerAddr == MAP_FAILED)
+ return make_error<llvm::StringError>("could not mmap JIT marker",
+ inconvertibleErrorCode());
+
+ return Error::success();
+}
+
+void CloseMarker(PerfState &State) {
+ if (!State.MarkerAddr)
+ return;
+
+ munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate());
+ State.MarkerAddr = nullptr;
+}
+
+static Expected<Header> FillMachine(PerfState &State) {
+ Header Hdr;
+ Hdr.Magic = LLVM_PERF_JIT_MAGIC;
+ Hdr.Version = LLVM_PERF_JIT_VERSION;
+ Hdr.TotalSize = sizeof(Hdr);
+ Hdr.Pid = State.Pid;
+ Hdr.Timestamp = perf_get_timestamp();
+
+ char Id[16];
+ struct {
+ uint16_t e_type;
+ uint16_t e_machine;
+ } Info;
+
+ size_t RequiredMemory = sizeof(Id) + sizeof(Info);
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);
+
+ // This'll not guarantee that enough data was actually read from the
+ // underlying file. Instead the trailing part of the buffer would be
+ // zeroed. Given the ELF signature check below that seems ok though,
+ // it's unlikely that the file ends just after that, and the
+ // consequence would just be that perf wouldn't recognize the
+ // signature.
+ if (!MB)
+ return make_error<llvm::StringError>("could not open /proc/self/exe",
+ MB.getError());
+
+ memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id));
+ memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info));
+
+ // check ELF signature
+ if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F')
+ return make_error<llvm::StringError>("invalid ELF signature",
+ inconvertibleErrorCode());
+
+ Hdr.ElfMach = Info.e_machine;
+
+ return Hdr;
+}
+
+static Error InitDebuggingDir(PerfState &State) {
+ time_t Time;
+ struct tm LocalTime;
+ char TimeBuffer[sizeof("YYYYMMDD")];
+ SmallString<64> Path;
+
+ // search for location to dump data to
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
+ Path.append(BaseDir);
+ else if (!sys::path::home_directory(Path))
+ Path = ".";
+
+ // create debug directory
+ Path += "/.debug/jit/";
+ if (auto EC = sys::fs::create_directories(Path)) {
+ std::string ErrStr;
+ raw_string_ostream ErrStream(ErrStr);
+ ErrStream << "could not create jit cache directory " << Path << ": "
+ << EC.message() << "\n";
+ return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
+ }
+
+ // create unique directory for dump data related to this process
+ time(&Time);
+ localtime_r(&Time, &LocalTime);
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
+ Path += JIT_LANG "-jit-";
+ Path += TimeBuffer;
+
+ SmallString<128> UniqueDebugDir;
+
+ using sys::fs::createUniqueDirectory;
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
+ std::string ErrStr;
+ raw_string_ostream ErrStream(ErrStr);
+ ErrStream << "could not create unique jit cache directory "
+ << UniqueDebugDir << ": " << EC.message() << "\n";
+ return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
+ }
+
+ State.JitPath = std::string(UniqueDebugDir.str());
+
+ return Error::success();
+}
+
+static Error registerJITLoaderPerfStartImpl() {
+ PerfState Tentative;
+ Tentative.Pid = sys::Process::getProcessId();
+ // check if clock-source is supported
+ if (!perf_get_timestamp())
+ return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
+ inconvertibleErrorCode());
+
+ if (auto Err = InitDebuggingDir(Tentative))
+ return Err;
+
+ std::string Filename;
+ raw_string_ostream FilenameBuf(Filename);
+ FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump";
+
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
+ // raw_fd_ostream doesn't expose the FD.
+ using sys::fs::openFileForWrite;
+ if (auto EC = openFileForReadWrite(FilenameBuf.str(), Tentative.DumpFd,
+ sys::fs::CD_CreateNew, sys::fs::OF_None)) {
+ std::string ErrStr;
+ raw_string_ostream ErrStream(ErrStr);
+ ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": "
+ << EC.message() << "\n";
+ return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
+ }
+
+ Tentative.Dumpstream =
+ std::make_unique<raw_fd_ostream>(Tentative.DumpFd, true);
+
+ auto Header = FillMachine(Tentative);
+ if (!Header)
+ return Header.takeError();
+
+ // signal this process emits JIT information
+ if (auto Err = OpenMarker(Tentative))
+ return Err;
+
+ Tentative.Dumpstream->write(reinterpret_cast<const char *>(&Header.get()),
+ sizeof(*Header));
+
+ // Everything initialized, can do profiling now.
+ if (Tentative.Dumpstream->has_error())
+ return make_error<StringError>("could not write JIT dump header",
+ inconvertibleErrorCode());
+
+ State = std::move(Tentative);
+ return Error::success();
+}
+
+static Error registerJITLoaderPerfEndImpl() {
+ if (!State)
+ return make_error<StringError>("PerfState not initialized",
+ inconvertibleErrorCode());
+
+ RecHeader Close;
+ Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
+ Close.TotalSize = sizeof(Close);
+ Close.Timestamp = perf_get_timestamp();
+ State->Dumpstream->write(reinterpret_cast<const char *>(&Close),
+ sizeof(Close));
+ if (State->MarkerAddr)
+ CloseMarker(*State);
+
+ State.reset();
+ return Error::success();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
+ Data, Size, registerJITLoaderPerfImpl)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size,
+ registerJITLoaderPerfStartImpl)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size,
+ registerJITLoaderPerfEndImpl)
+ .release();
+}
+
+#else
+
+using namespace llvm;
+using namespace llvm::orc;
+
+static Error badOS() {
+ using namespace llvm;
+ return llvm::make_error<StringError>(
+ "unsupported OS (perf support is only available on linux!)",
+ inconvertibleErrorCode());
+}
+
+static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
+ using namespace shared;
+ return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,
+ badOSBatch)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
+ using namespace shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
+ using namespace shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
+}
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
index 67bc379f9821..a585767bf474 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
@@ -8,7 +8,9 @@
#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Process.h"
#include "llvm/TargetParser/Host.h"
@@ -206,6 +208,10 @@ Error SimpleRemoteEPCServer::sendSetupMessage(
"Dispatch function name should not be set");
EI.BootstrapSymbols[ExecutorSessionObjectName] = ExecutorAddr::fromPtr(this);
EI.BootstrapSymbols[DispatchFnName] = ExecutorAddr::fromPtr(jitDispatchEntry);
+ EI.BootstrapSymbols[rt::RegisterEHFrameSectionWrapperName] =
+ ExecutorAddr::fromPtr(&llvm_orc_registerEHFrameSectionWrapper);
+ EI.BootstrapSymbols[rt::DeregisterEHFrameSectionWrapperName] =
+ ExecutorAddr::fromPtr(&llvm_orc_deregisterEHFrameSectionWrapper);
using SPSSerialize =
shared::SPSArgList<shared::SPSSimpleRemoteEPCExecutorInfo>;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index 62cab22a1c45..e2b5ce49ba2e 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -275,7 +275,7 @@ void PerfJITEventListener::notifyObjectLoaded(
SectionIndex = SectOrErr.get()->getIndex();
// According to spec debugging info has to come before loading the
- // corresonding code load.
+ // corresponding code load.
DILineInfoTable Lines = Context->getLineInfoForAddressRange(
{*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath);
@@ -447,7 +447,7 @@ void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
rec.CodeAddr = CodeAddr;
rec.NrEntry = Lines.size();
- // compute total size size of record (variable due to filenames)
+ // compute total size of record (variable due to filenames)
DILineInfoTable::iterator Begin = Lines.begin();
DILineInfoTable::iterator End = Lines.end();
for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
index 210fbf6e43e3..c153b4464568 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
@@ -42,7 +42,7 @@ JITSymbolFlags llvm::JITSymbolFlags::fromGlobalValue(const GlobalValue &GV) {
const auto &DL = M->getDataLayout();
StringRef LPGP = DL.getLinkerPrivateGlobalPrefix();
if (!LPGP.empty() && GV.getName().front() == '\01' &&
- GV.getName().substr(1).startswith(LPGP))
+ GV.getName().substr(1).starts_with(LPGP))
Flags &= ~JITSymbolFlags::Exported;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index bc42eebf3fec..fd11450b635b 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -269,7 +269,7 @@ RTDyldMemoryManager::getSymbolAddressInProcess(const std::string &Name) {
const char *NameStr = Name.c_str();
- // DynamicLibrary::SearchForAddresOfSymbol expects an unmangled 'C' symbol
+ // DynamicLibrary::SearchForAddressOfSymbol expects an unmangled 'C' symbol
// name so ff we're on Darwin, strip the leading '_' off.
#ifdef __APPLE__
if (NameStr[0] == '_')
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 9255311f992d..25a2d8780fb5 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -83,7 +83,8 @@ uint64_t RuntimeDyldCOFF::getDLLImportOffset(unsigned SectionID, StubMap &Stubs,
StringRef Name,
bool SetSectionIDMinus1) {
LLVM_DEBUG(dbgs() << "Getting DLLImport entry for " << Name << "... ");
- assert(Name.startswith(getImportSymbolPrefix()) && "Not a DLLImport symbol?");
+ assert(Name.starts_with(getImportSymbolPrefix()) &&
+ "Not a DLLImport symbol?");
RelocationValueRef Reloc;
Reloc.SymbolName = Name.data();
auto I = Stubs.find(Reloc);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index ae1bb5a1da4b..7fadbdd6a1ff 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -10,9 +10,16 @@
#include "RuntimeDyldCheckerImpl.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -25,6 +32,19 @@
using namespace llvm;
+namespace {
+struct TargetInfo {
+ const Target *TheTarget;
+ std::unique_ptr<MCSubtargetInfo> STI;
+ std::unique_ptr<MCRegisterInfo> MRI;
+ std::unique_ptr<MCAsmInfo> MAI;
+ std::unique_ptr<MCContext> Ctx;
+ std::unique_ptr<MCDisassembler> Disassembler;
+ std::unique_ptr<MCInstrInfo> MII;
+ std::unique_ptr<MCInstPrinter> InstPrinter;
+};
+} // anonymous namespace
+
namespace llvm {
// Helper class that implements the language evaluated by RuntimeDyldChecker.
@@ -122,7 +142,7 @@ private:
std::tie(Token, Remaining) = parseNumberString(Expr);
else {
unsigned TokLen = 1;
- if (Expr.startswith("<<") || Expr.startswith(">>"))
+ if (Expr.starts_with("<<") || Expr.starts_with(">>"))
TokLen = 2;
Token = Expr.substr(0, TokLen);
}
@@ -157,9 +177,9 @@ private:
return std::make_pair(BinOpToken::Invalid, "");
// Handle the two 2-character tokens.
- if (Expr.startswith("<<"))
+ if (Expr.starts_with("<<"))
return std::make_pair(BinOpToken::ShiftLeft, Expr.substr(2).ltrim());
- if (Expr.startswith(">>"))
+ if (Expr.starts_with(">>"))
return std::make_pair(BinOpToken::ShiftRight, Expr.substr(2).ltrim());
// Handle one-character tokens.
@@ -222,7 +242,7 @@ private:
// On success, returns a pair containing the value of the operand, plus
// the expression remaining to be evaluated.
std::pair<EvalResult, StringRef> evalDecodeOperand(StringRef Expr) const {
- if (!Expr.startswith("("))
+ if (!Expr.starts_with("("))
return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
StringRef RemainingExpr = Expr.substr(1).ltrim();
StringRef Symbol;
@@ -253,7 +273,7 @@ private:
"");
}
- if (!RemainingExpr.startswith(","))
+ if (!RemainingExpr.starts_with(","))
return std::make_pair(
unexpectedToken(RemainingExpr, RemainingExpr, "expected ','"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -263,7 +283,7 @@ private:
if (OpIdxExpr.hasError())
return std::make_pair(OpIdxExpr, "");
- if (!RemainingExpr.startswith(")"))
+ if (!RemainingExpr.starts_with(")"))
return std::make_pair(
unexpectedToken(RemainingExpr, RemainingExpr, "expected ')'"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -276,6 +296,20 @@ private:
"");
unsigned OpIdx = OpIdxExpr.getValue();
+
+ auto printInst = [this](StringRef Symbol, MCInst Inst,
+ raw_string_ostream &ErrMsgStream) {
+ auto TT = Checker.getTripleForSymbol(Checker.getTargetFlag(Symbol));
+ auto TI = getTargetInfo(TT, Checker.getCPU(), Checker.getFeatures());
+ if (auto E = TI.takeError()) {
+ errs() << "Error obtaining instruction printer: "
+ << toString(std::move(E)) << "\n";
+ return std::make_pair(EvalResult(ErrMsgStream.str()), "");
+ }
+ Inst.dump_pretty(ErrMsgStream, TI->InstPrinter.get());
+ return std::make_pair(EvalResult(ErrMsgStream.str()), "");
+ };
+
if (OpIdx >= Inst.getNumOperands()) {
std::string ErrMsg;
raw_string_ostream ErrMsgStream(ErrMsg);
@@ -284,8 +318,8 @@ private:
<< "'. Instruction has only "
<< format("%i", Inst.getNumOperands())
<< " operands.\nInstruction is:\n ";
- Inst.dump_pretty(ErrMsgStream, Checker.InstPrinter);
- return std::make_pair(EvalResult(ErrMsgStream.str()), "");
+
+ return printInst(Symbol, Inst, ErrMsgStream);
}
const MCOperand &Op = Inst.getOperand(OpIdx);
@@ -294,9 +328,8 @@ private:
raw_string_ostream ErrMsgStream(ErrMsg);
ErrMsgStream << "Operand '" << format("%i", OpIdx) << "' of instruction '"
<< Symbol << "' is not an immediate.\nInstruction is:\n ";
- Inst.dump_pretty(ErrMsgStream, Checker.InstPrinter);
- return std::make_pair(EvalResult(ErrMsgStream.str()), "");
+ return printInst(Symbol, Inst, ErrMsgStream);
}
return std::make_pair(EvalResult(Op.getImm()), RemainingExpr);
@@ -310,7 +343,7 @@ private:
// expression remaining to be evaluated.
std::pair<EvalResult, StringRef> evalNextPC(StringRef Expr,
ParseContext PCtx) const {
- if (!Expr.startswith("("))
+ if (!Expr.starts_with("("))
return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
StringRef RemainingExpr = Expr.substr(1).ltrim();
StringRef Symbol;
@@ -321,7 +354,7 @@ private:
EvalResult(("Cannot decode unknown symbol '" + Symbol + "'").str()),
"");
- if (!RemainingExpr.startswith(")"))
+ if (!RemainingExpr.starts_with(")"))
return std::make_pair(
unexpectedToken(RemainingExpr, RemainingExpr, "expected ')'"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -348,7 +381,7 @@ private:
// remaining to be evaluated.
std::pair<EvalResult, StringRef>
evalStubOrGOTAddr(StringRef Expr, ParseContext PCtx, bool IsStubAddr) const {
- if (!Expr.startswith("("))
+ if (!Expr.starts_with("("))
return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
StringRef RemainingExpr = Expr.substr(1).ltrim();
@@ -359,7 +392,7 @@ private:
StubContainerName = RemainingExpr.substr(0, ComaIdx).rtrim();
RemainingExpr = RemainingExpr.substr(ComaIdx).ltrim();
- if (!RemainingExpr.startswith(","))
+ if (!RemainingExpr.starts_with(","))
return std::make_pair(
unexpectedToken(RemainingExpr, Expr, "expected ','"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -367,7 +400,7 @@ private:
StringRef Symbol;
std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
- if (!RemainingExpr.startswith(")"))
+ if (!RemainingExpr.starts_with(")"))
return std::make_pair(
unexpectedToken(RemainingExpr, Expr, "expected ')'"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -385,7 +418,7 @@ private:
std::pair<EvalResult, StringRef> evalSectionAddr(StringRef Expr,
ParseContext PCtx) const {
- if (!Expr.startswith("("))
+ if (!Expr.starts_with("("))
return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
StringRef RemainingExpr = Expr.substr(1).ltrim();
@@ -396,7 +429,7 @@ private:
FileName = RemainingExpr.substr(0, ComaIdx).rtrim();
RemainingExpr = RemainingExpr.substr(ComaIdx).ltrim();
- if (!RemainingExpr.startswith(","))
+ if (!RemainingExpr.starts_with(","))
return std::make_pair(
unexpectedToken(RemainingExpr, Expr, "expected ','"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -406,7 +439,7 @@ private:
SectionName = RemainingExpr.substr(0, CloseParensIdx).rtrim();
RemainingExpr = RemainingExpr.substr(CloseParensIdx).ltrim();
- if (!RemainingExpr.startswith(")"))
+ if (!RemainingExpr.starts_with(")"))
return std::make_pair(
unexpectedToken(RemainingExpr, Expr, "expected ')'"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -422,7 +455,7 @@ private:
return std::make_pair(EvalResult(StubAddr), RemainingExpr);
}
- // Evaluate an identiefer expr, which may be a symbol, or a call to
+ // Evaluate an identifier expr, which may be a symbol, or a call to
// one of the builtin functions: get_insn_opcode or get_insn_length.
// Return the result, plus the expression remaining to be parsed.
std::pair<EvalResult, StringRef> evalIdentifierExpr(StringRef Expr,
@@ -447,7 +480,7 @@ private:
std::string ErrMsg("No known address for symbol '");
ErrMsg += Symbol;
ErrMsg += "'";
- if (Symbol.startswith("L"))
+ if (Symbol.starts_with("L"))
ErrMsg += " (this appears to be an assembler local label - "
" perhaps drop the 'L'?)";
@@ -468,7 +501,7 @@ private:
// pair representing the number and the expression remaining to be parsed.
std::pair<StringRef, StringRef> parseNumberString(StringRef Expr) const {
size_t FirstNonDigit = StringRef::npos;
- if (Expr.startswith("0x")) {
+ if (Expr.starts_with("0x")) {
FirstNonDigit = Expr.find_first_not_of("0123456789abcdefABCDEF", 2);
if (FirstNonDigit == StringRef::npos)
FirstNonDigit = Expr.size();
@@ -502,14 +535,14 @@ private:
// remaining to be parsed.
std::pair<EvalResult, StringRef> evalParensExpr(StringRef Expr,
ParseContext PCtx) const {
- assert(Expr.startswith("(") && "Not a parenthesized expression");
+ assert(Expr.starts_with("(") && "Not a parenthesized expression");
EvalResult SubExprResult;
StringRef RemainingExpr;
std::tie(SubExprResult, RemainingExpr) =
evalComplexExpr(evalSimpleExpr(Expr.substr(1).ltrim(), PCtx), PCtx);
if (SubExprResult.hasError())
return std::make_pair(SubExprResult, "");
- if (!RemainingExpr.startswith(")"))
+ if (!RemainingExpr.starts_with(")"))
return std::make_pair(
unexpectedToken(RemainingExpr, Expr, "expected ')'"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -521,11 +554,11 @@ private:
// Return a pair containing the result, plus the expression remaining to be
// parsed.
std::pair<EvalResult, StringRef> evalLoadExpr(StringRef Expr) const {
- assert(Expr.startswith("*") && "Not a load expression");
+ assert(Expr.starts_with("*") && "Not a load expression");
StringRef RemainingExpr = Expr.substr(1).ltrim();
// Parse read size.
- if (!RemainingExpr.startswith("{"))
+ if (!RemainingExpr.starts_with("{"))
return std::make_pair(EvalResult("Expected '{' following '*'."), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
EvalResult ReadSizeExpr;
@@ -535,7 +568,7 @@ private:
uint64_t ReadSize = ReadSizeExpr.getValue();
if (ReadSize < 1 || ReadSize > 8)
return std::make_pair(EvalResult("Invalid size for dereference."), "");
- if (!RemainingExpr.startswith("}"))
+ if (!RemainingExpr.starts_with("}"))
return std::make_pair(EvalResult("Missing '}' for dereference."), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -592,7 +625,7 @@ private:
return std::make_pair(SubExprResult, RemainingExpr);
// Evaluate bit-slice if present.
- if (RemainingExpr.startswith("["))
+ if (RemainingExpr.starts_with("["))
std::tie(SubExprResult, RemainingExpr) =
evalSliceExpr(std::make_pair(SubExprResult, RemainingExpr));
@@ -612,7 +645,7 @@ private:
StringRef RemainingExpr;
std::tie(SubExprResult, RemainingExpr) = Ctx;
- assert(RemainingExpr.startswith("[") && "Not a slice expr.");
+ assert(RemainingExpr.starts_with("[") && "Not a slice expr.");
RemainingExpr = RemainingExpr.substr(1).ltrim();
EvalResult HighBitExpr;
@@ -621,7 +654,7 @@ private:
if (HighBitExpr.hasError())
return std::make_pair(HighBitExpr, RemainingExpr);
- if (!RemainingExpr.startswith(":"))
+ if (!RemainingExpr.starts_with(":"))
return std::make_pair(
unexpectedToken(RemainingExpr, RemainingExpr, "expected ':'"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -632,7 +665,7 @@ private:
if (LowBitExpr.hasError())
return std::make_pair(LowBitExpr, RemainingExpr);
- if (!RemainingExpr.startswith("]"))
+ if (!RemainingExpr.starts_with("]"))
return std::make_pair(
unexpectedToken(RemainingExpr, RemainingExpr, "expected ']'"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
@@ -662,7 +695,7 @@ private:
if (LHSResult.hasError() || RemainingExpr == "")
return std::make_pair(LHSResult, RemainingExpr);
- // Otherwise check if this is a binary expressioan.
+ // Otherwise check if this is a binary expression.
BinOpToken BinOp;
std::tie(BinOp, RemainingExpr) = parseBinOpToken(RemainingExpr);
@@ -687,31 +720,100 @@ private:
bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size,
int64_t Offset) const {
- MCDisassembler *Dis = Checker.Disassembler;
+ auto TT = Checker.getTripleForSymbol(Checker.getTargetFlag(Symbol));
+ auto TI = getTargetInfo(TT, Checker.getCPU(), Checker.getFeatures());
+
+ if (auto E = TI.takeError()) {
+ errs() << "Error obtaining disassembler: " << toString(std::move(E))
+ << "\n";
+ return false;
+ }
+
StringRef SymbolMem = Checker.getSymbolContent(Symbol);
ArrayRef<uint8_t> SymbolBytes(SymbolMem.bytes_begin() + Offset,
SymbolMem.size() - Offset);
MCDisassembler::DecodeStatus S =
- Dis->getInstruction(Inst, Size, SymbolBytes, 0, nulls());
+ TI->Disassembler->getInstruction(Inst, Size, SymbolBytes, 0, nulls());
return (S == MCDisassembler::Success);
}
+
+ Expected<TargetInfo> getTargetInfo(const Triple &TT, const StringRef &CPU,
+ const SubtargetFeatures &TF) const {
+
+ auto TripleName = TT.str();
+ std::string ErrorStr;
+ const Target *TheTarget =
+ TargetRegistry::lookupTarget(TripleName, ErrorStr);
+ if (!TheTarget)
+ return make_error<StringError>("Error accessing target '" + TripleName +
+ "': " + ErrorStr,
+ inconvertibleErrorCode());
+
+ std::unique_ptr<MCSubtargetInfo> STI(
+ TheTarget->createMCSubtargetInfo(TripleName, CPU, TF.getString()));
+ if (!STI)
+ return make_error<StringError>("Unable to create subtarget for " +
+ TripleName,
+ inconvertibleErrorCode());
+
+ std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
+ if (!MRI)
+ return make_error<StringError>("Unable to create target register info "
+ "for " +
+ TripleName,
+ inconvertibleErrorCode());
+
+ MCTargetOptions MCOptions;
+ std::unique_ptr<MCAsmInfo> MAI(
+ TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
+ if (!MAI)
+ return make_error<StringError>("Unable to create target asm info " +
+ TripleName,
+ inconvertibleErrorCode());
+
+ auto Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(),
+ MRI.get(), STI.get());
+
+ std::unique_ptr<MCDisassembler> Disassembler(
+ TheTarget->createMCDisassembler(*STI, *Ctx));
+ if (!Disassembler)
+ return make_error<StringError>("Unable to create disassembler for " +
+ TripleName,
+ inconvertibleErrorCode());
+
+ std::unique_ptr<MCInstrInfo> MII(TheTarget->createMCInstrInfo());
+ if (!MII)
+ return make_error<StringError>("Unable to create instruction info for" +
+ TripleName,
+ inconvertibleErrorCode());
+
+ std::unique_ptr<MCInstPrinter> InstPrinter(TheTarget->createMCInstPrinter(
+ Triple(TripleName), 0, *MAI, *MII, *MRI));
+ if (!InstPrinter)
+ return make_error<StringError>(
+ "Unable to create instruction printer for" + TripleName,
+ inconvertibleErrorCode());
+
+ return TargetInfo({TheTarget, std::move(STI), std::move(MRI),
+ std::move(MAI), std::move(Ctx), std::move(Disassembler),
+ std::move(MII), std::move(InstPrinter)});
+ }
};
} // namespace llvm
RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(
IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
GetSectionInfoFunction GetSectionInfo, GetStubInfoFunction GetStubInfo,
- GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
- MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
- raw_ostream &ErrStream)
+ GetGOTInfoFunction GetGOTInfo, llvm::endianness Endianness, Triple TT,
+ StringRef CPU, SubtargetFeatures TF, raw_ostream &ErrStream)
: IsSymbolValid(std::move(IsSymbolValid)),
GetSymbolInfo(std::move(GetSymbolInfo)),
GetSectionInfo(std::move(GetSectionInfo)),
GetStubInfo(std::move(GetStubInfo)), GetGOTInfo(std::move(GetGOTInfo)),
- Endianness(Endianness), Disassembler(Disassembler),
- InstPrinter(InstPrinter), ErrStream(ErrStream) {}
+ Endianness(Endianness), TT(std::move(TT)), CPU(std::move(CPU)),
+ TF(std::move(TF)), ErrStream(ErrStream) {}
bool RuntimeDyldCheckerImpl::check(StringRef CheckExpr) const {
CheckExpr = CheckExpr.trim();
@@ -744,7 +846,7 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
++LineEnd;
StringRef Line(LineStart, LineEnd - LineStart);
- if (Line.startswith(RulePrefix))
+ if (Line.starts_with(RulePrefix))
CheckExpr += Line.substr(RulePrefix.size()).str();
// If there's a check expr string...
@@ -822,6 +924,36 @@ StringRef RuntimeDyldCheckerImpl::getSymbolContent(StringRef Symbol) const {
return {SymInfo->getContent().data(), SymInfo->getContent().size()};
}
+TargetFlagsType RuntimeDyldCheckerImpl::getTargetFlag(StringRef Symbol) const {
+ auto SymInfo = GetSymbolInfo(Symbol);
+ if (!SymInfo) {
+ logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
+ return TargetFlagsType{};
+ }
+ return SymInfo->getTargetFlags();
+}
+
+Triple
+RuntimeDyldCheckerImpl::getTripleForSymbol(TargetFlagsType Flag) const {
+ Triple TheTriple = TT;
+
+ switch (TT.getArch()) {
+ case Triple::ArchType::arm:
+ if (~Flag & 0x1)
+ return TT;
+ TheTriple.setArchName((Twine("thumb") + TT.getArchName().substr(3)).str());
+ return TheTriple;
+ case Triple::ArchType::thumb:
+ if (Flag & 0x1)
+ return TT;
+ TheTriple.setArchName((Twine("arm") + TT.getArchName().substr(5)).str());
+ return TheTriple;
+
+ default:
+ return TT;
+ }
+}
+
std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getSectionAddr(
StringRef FileName, StringRef SectionName, bool IsInsideLoad) const {
@@ -884,14 +1016,13 @@ std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getStubOrGOTAddrFor(
RuntimeDyldChecker::RuntimeDyldChecker(
IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
GetSectionInfoFunction GetSectionInfo, GetStubInfoFunction GetStubInfo,
- GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
- MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
- raw_ostream &ErrStream)
+ GetGOTInfoFunction GetGOTInfo, llvm::endianness Endianness, Triple TT,
+ StringRef CPU, SubtargetFeatures TF, raw_ostream &ErrStream)
: Impl(::std::make_unique<RuntimeDyldCheckerImpl>(
std::move(IsSymbolValid), std::move(GetSymbolInfo),
std::move(GetSectionInfo), std::move(GetStubInfo),
- std::move(GetGOTInfo), Endianness, Disassembler, InstPrinter,
- ErrStream)) {}
+ std::move(GetGOTInfo), Endianness, std::move(TT), std::move(CPU),
+ std::move(TF), ErrStream)) {}
RuntimeDyldChecker::~RuntimeDyldChecker() = default;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index f564b0035bff..9f44a9389f47 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -13,6 +13,9 @@
namespace llvm {
+/// Holds target-specific properties for a symbol.
+using TargetFlagsType = uint8_t;
+
class RuntimeDyldCheckerImpl {
friend class RuntimeDyldChecker;
friend class RuntimeDyldCheckerExprEval;
@@ -25,12 +28,13 @@ class RuntimeDyldCheckerImpl {
using GetGOTInfoFunction = RuntimeDyldChecker::GetGOTInfoFunction;
public:
- RuntimeDyldCheckerImpl(
- IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
- GetSectionInfoFunction GetSectionInfo, GetStubInfoFunction GetStubInfo,
- GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
- MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
- llvm::raw_ostream &ErrStream);
+ RuntimeDyldCheckerImpl(IsSymbolValidFunction IsSymbolValid,
+ GetSymbolInfoFunction GetSymbolInfo,
+ GetSectionInfoFunction GetSectionInfo,
+ GetStubInfoFunction GetStubInfo,
+ GetGOTInfoFunction GetGOTInfo,
+ llvm::endianness Endianness, Triple TT, StringRef CPU,
+ SubtargetFeatures TF, llvm::raw_ostream &ErrStream);
bool check(StringRef CheckExpr) const;
bool checkAllRulesInBuffer(StringRef RulePrefix, MemoryBuffer *MemBuf) const;
@@ -49,6 +53,11 @@ private:
StringRef getSymbolContent(StringRef Symbol) const;
+ TargetFlagsType getTargetFlag(StringRef Symbol) const;
+ Triple getTripleForSymbol(TargetFlagsType Flag) const;
+ StringRef getCPU() const { return CPU; }
+ SubtargetFeatures getFeatures() const { return TF; }
+
std::pair<uint64_t, std::string> getSectionAddr(StringRef FileName,
StringRef SectionName,
bool IsInsideLoad) const;
@@ -64,9 +73,10 @@ private:
GetSectionInfoFunction GetSectionInfo;
GetStubInfoFunction GetStubInfo;
GetGOTInfoFunction GetGOTInfo;
- support::endianness Endianness;
- MCDisassembler *Disassembler;
- MCInstPrinter *InstPrinter;
+ llvm::endianness Endianness;
+ Triple TT;
+ std::string CPU;
+ SubtargetFeatures TF;
llvm::raw_ostream &ErrStream;
};
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index d439b1b4ebfb..9fdabf310d6e 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -35,7 +35,8 @@ static void or32AArch64Imm(void *L, uint64_t Imm) {
}
template <class T> static void write(bool isBE, void *P, T V) {
- isBE ? write<T, support::big>(P, V) : write<T, support::little>(P, V);
+ isBE ? write<T, llvm::endianness::big>(P, V)
+ : write<T, llvm::endianness::little>(P, V);
}
static void write32AArch64Addr(void *L, uint64_t Imm) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index dfdd98cb3a34..b73d2af8c0c4 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -108,7 +108,7 @@ private:
uint64_t findOrAllocGOTEntry(const RelocationValueRef &Value,
unsigned GOTRelType);
- // Resolve the relvative address of GOTOffset in Section ID and place
+ // Resolve the relative address of GOTOffset in Section ID and place
// it at the given Offset
void resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset,
uint64_t GOTOffset, uint32_t Type);
@@ -121,8 +121,8 @@ private:
// Compute the address in memory where we can find the placeholder
void *computePlaceholderAddress(unsigned SectionID, uint64_t Offset) const;
- // Split out common case for createing the RelocationEntry for when the relocation requires
- // no particular advanced processing.
+ // Split out common case for creating the RelocationEntry for when the
+ // relocation requires no particular advanced processing.
void processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value);
// Return matching *LO16 relocation (Mips specific)
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 501417db421a..73e2b365f109 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -301,7 +301,7 @@ protected:
// won't be interleaved between modules. It is also used in mapSectionAddress
// and resolveRelocations to protect write access to internal data structures.
//
- // loadObject may be called on the same thread during the handling of of
+ // loadObject may be called on the same thread during the handling of
// processRelocations, and that's OK. The handling of the relocation lists
// is written in such a way as to work correctly if new elements are added to
// the end of the list while the list is being processed.
@@ -318,18 +318,24 @@ protected:
std::string ErrorStr;
void writeInt16BE(uint8_t *Addr, uint16_t Value) {
- llvm::support::endian::write<uint16_t, llvm::support::unaligned>(
- Addr, Value, IsTargetLittleEndian ? support::little : support::big);
+ llvm::support::endian::write<uint16_t>(Addr, Value,
+ IsTargetLittleEndian
+ ? llvm::endianness::little
+ : llvm::endianness::big);
}
void writeInt32BE(uint8_t *Addr, uint32_t Value) {
- llvm::support::endian::write<uint32_t, llvm::support::unaligned>(
- Addr, Value, IsTargetLittleEndian ? support::little : support::big);
+ llvm::support::endian::write<uint32_t>(Addr, Value,
+ IsTargetLittleEndian
+ ? llvm::endianness::little
+ : llvm::endianness::big);
}
void writeInt64BE(uint8_t *Addr, uint64_t Value) {
- llvm::support::endian::write<uint64_t, llvm::support::unaligned>(
- Addr, Value, IsTargetLittleEndian ? support::little : support::big);
+ llvm::support::endian::write<uint64_t>(Addr, Value,
+ IsTargetLittleEndian
+ ? llvm::endianness::little
+ : llvm::endianness::big);
}
virtual void setMipsABI(const ObjectFile &Obj) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
index da381986e9de..66c9753a72fd 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
@@ -27,7 +27,7 @@ using namespace llvm::support::endian;
namespace llvm {
// This relocation type is used for handling long branch instruction
-// throught the Stub.
+// through the Stub.
enum InternalRelocationType : unsigned {
INTERNAL_REL_ARM64_LONG_BRANCH26 = 0x111,
};
@@ -174,7 +174,7 @@ public:
unsigned TargetSectionID = -1;
uint64_t TargetOffset = -1;
- if (TargetName.startswith(getImportSymbolPrefix())) {
+ if (TargetName.starts_with(getImportSymbolPrefix())) {
TargetSectionID = SectionID;
TargetOffset = getDLLImportOffset(SectionID, Stubs, TargetName);
TargetName = StringRef();
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
index 2a54728fd0bf..0d5afc289b8c 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
@@ -60,7 +60,7 @@ public:
unsigned TargetSectionID = -1;
uint64_t TargetOffset = -1;
- if (TargetName.startswith(getImportSymbolPrefix())) {
+ if (TargetName.starts_with(getImportSymbolPrefix())) {
TargetSectionID = SectionID;
TargetOffset = getDLLImportOffset(SectionID, Stubs, TargetName, true);
TargetName = StringRef();
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index 22f1cf33158c..c079d8896c1d 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -54,6 +54,28 @@ public:
return 16; // 8-byte load instructions, 4-byte jump, 4-byte padding
}
+ Expected<JITSymbolFlags> getJITSymbolFlags(const SymbolRef &SR) override {
+
+ auto Flags = RuntimeDyldImpl::getJITSymbolFlags(SR);
+
+ if (!Flags) {
+ return Flags.takeError();
+ }
+ auto SectionIterOrErr = SR.getSection();
+ if (!SectionIterOrErr) {
+ return SectionIterOrErr.takeError();
+ }
+ SectionRef Sec = *SectionIterOrErr.get();
+ const object::COFFObjectFile *COFFObjPtr =
+ cast<object::COFFObjectFile>(Sec.getObject());
+ const coff_section *CoffSec = COFFObjPtr->getCOFFSection(Sec);
+ bool isThumb = CoffSec->Characteristics & COFF::IMAGE_SCN_MEM_16BIT;
+
+ Flags->getTargetFlags() = isThumb;
+
+ return Flags;
+ }
+
Align getStubAlignment() override { return Align(1); }
Expected<object::relocation_iterator>
@@ -107,7 +129,7 @@ public:
unsigned TargetSectionID = -1;
uint64_t TargetOffset = -1;
- if (TargetName.startswith(getImportSymbolPrefix())) {
+ if (TargetName.starts_with(getImportSymbolPrefix())) {
TargetSectionID = SectionID;
TargetOffset = getDLLImportOffset(SectionID, Stubs, TargetName, true);
TargetName = StringRef();
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index 89156b992d87..984a8d765c84 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -134,6 +134,13 @@ public:
break;
}
+ case COFF::IMAGE_REL_AMD64_SECTION: {
+ assert(static_cast<int16_t>(RE.SectionID) <= INT16_MAX && "Relocation overflow");
+ assert(static_cast<int16_t>(RE.SectionID) >= INT16_MIN && "Relocation underflow");
+ writeBytesUnaligned(RE.SectionID, Target, 2);
+ break;
+ }
+
default:
llvm_unreachable("Relocation type not implemented yet!");
break;
@@ -219,7 +226,7 @@ public:
unsigned TargetSectionID = 0;
uint64_t TargetOffset = 0;
- if (TargetName.startswith(getImportSymbolPrefix())) {
+ if (TargetName.starts_with(getImportSymbolPrefix())) {
assert(IsExtern && "DLLImport not marked extern?");
TargetSectionID = SectionID;
TargetOffset = getDLLImportOffset(SectionID, Stubs, TargetName);
diff --git a/contrib/llvm-project/llvm/lib/FileCheck/FileCheck.cpp b/contrib/llvm-project/llvm/lib/FileCheck/FileCheck.cpp
index 3e4514f2545b..b728c14d288a 100644
--- a/contrib/llvm-project/llvm/lib/FileCheck/FileCheck.cpp
+++ b/contrib/llvm-project/llvm/lib/FileCheck/FileCheck.cpp
@@ -78,18 +78,9 @@ Expected<std::string> ExpressionFormat::getWildcardRegex() const {
}
Expected<std::string>
-ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const {
- APInt IntValue = IntegerValue.getAPIntValue();
- // Error out for values that cannot be represented by the appropriate 64-bit
- // integer (e.g. int64_t for a signed format) to keep the getter of
- // ExpressionValue as an APInt an NFC.
- if (Value == Kind::Signed) {
- if (!IntValue.isSignedIntN(64))
- return make_error<OverflowError>();
- } else {
- if (!IntValue.isIntN(64))
- return make_error<OverflowError>();
- }
+ExpressionFormat::getMatchingString(APInt IntValue) const {
+ if (Value != Kind::Signed && IntValue.isNegative())
+ return make_error<OverflowError>();
unsigned Radix;
bool UpperCase = false;
@@ -129,140 +120,122 @@ ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const {
.str();
}
-Expected<ExpressionValue>
-ExpressionFormat::valueFromStringRepr(StringRef StrVal,
- const SourceMgr &SM) const {
- bool ValueIsSigned = Value == Kind::Signed;
- // Both the FileCheck utility and library only call this method with a valid
- // value in StrVal. This is guaranteed by the regex returned by
- // getWildcardRegex() above. Only underflow and overflow errors can thus
- // occur. However new uses of this method could be added in the future so
- // the error message does not make assumptions about StrVal.
- StringRef IntegerParseErrorStr = "unable to represent numeric value";
- if (ValueIsSigned) {
- int64_t SignedValue;
-
- if (StrVal.getAsInteger(10, SignedValue))
- return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr);
+static unsigned nextAPIntBitWidth(unsigned BitWidth) {
+ return (BitWidth < APInt::APINT_BITS_PER_WORD) ? APInt::APINT_BITS_PER_WORD
+ : BitWidth * 2;
+}
- return ExpressionValue(SignedValue);
- }
+static APInt toSigned(APInt AbsVal, bool Negative) {
+ if (AbsVal.isSignBitSet())
+ AbsVal = AbsVal.zext(nextAPIntBitWidth(AbsVal.getBitWidth()));
+ APInt Result = AbsVal;
+ if (Negative)
+ Result.negate();
+ return Result;
+}
+APInt ExpressionFormat::valueFromStringRepr(StringRef StrVal,
+ const SourceMgr &SM) const {
+ bool ValueIsSigned = Value == Kind::Signed;
+ bool Negative = StrVal.consume_front("-");
bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower;
- uint64_t UnsignedValue;
- bool MissingFormPrefix = AlternateForm && !StrVal.consume_front("0x");
+ bool MissingFormPrefix =
+ !ValueIsSigned && AlternateForm && !StrVal.consume_front("0x");
(void)MissingFormPrefix;
assert(!MissingFormPrefix && "missing alternate form prefix");
- if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue))
- return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr);
-
- return ExpressionValue(UnsignedValue);
+ APInt ResultValue;
+ [[maybe_unused]] bool ParseFailure =
+ StrVal.getAsInteger(Hex ? 16 : 10, ResultValue);
+ // Both the FileCheck utility and library only call this method with a valid
+ // value in StrVal. This is guaranteed by the regex returned by
+ // getWildcardRegex() above.
+ assert(!ParseFailure && "unable to represent numeric value");
+ return toSigned(ResultValue, Negative);
}
-Expected<ExpressionValue> llvm::operator+(const ExpressionValue &LeftOperand,
- const ExpressionValue &RightOperand) {
- bool Overflow;
- APInt Result = LeftOperand.getAPIntValue().sadd_ov(
- RightOperand.getAPIntValue(), Overflow);
- if (Overflow ||
- (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
- return make_error<OverflowError>();
-
- if (Result.isNegative())
- return ExpressionValue(Result.getSExtValue());
- else
- return ExpressionValue(Result.getZExtValue());
+Expected<APInt> llvm::exprAdd(const APInt &LeftOperand,
+ const APInt &RightOperand, bool &Overflow) {
+ return LeftOperand.sadd_ov(RightOperand, Overflow);
}
-Expected<ExpressionValue> llvm::operator-(const ExpressionValue &LeftOperand,
- const ExpressionValue &RightOperand) {
- bool Overflow;
- APInt Result = LeftOperand.getAPIntValue().ssub_ov(
- RightOperand.getAPIntValue(), Overflow);
- if (Overflow ||
- (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
- return make_error<OverflowError>();
-
- if (Result.isNegative())
- return ExpressionValue(Result.getSExtValue());
- else
- return ExpressionValue(Result.getZExtValue());
+Expected<APInt> llvm::exprSub(const APInt &LeftOperand,
+ const APInt &RightOperand, bool &Overflow) {
+ return LeftOperand.ssub_ov(RightOperand, Overflow);
}
-Expected<ExpressionValue> llvm::operator*(const ExpressionValue &LeftOperand,
- const ExpressionValue &RightOperand) {
- bool Overflow;
- APInt Result = LeftOperand.getAPIntValue().smul_ov(
- RightOperand.getAPIntValue(), Overflow);
- if (Overflow ||
- (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
- return make_error<OverflowError>();
-
- if (Result.isNegative())
- return ExpressionValue(Result.getSExtValue());
- else
- return ExpressionValue(Result.getZExtValue());
+Expected<APInt> llvm::exprMul(const APInt &LeftOperand,
+ const APInt &RightOperand, bool &Overflow) {
+ return LeftOperand.smul_ov(RightOperand, Overflow);
}
-Expected<ExpressionValue> llvm::operator/(const ExpressionValue &LeftOperand,
- const ExpressionValue &RightOperand) {
+Expected<APInt> llvm::exprDiv(const APInt &LeftOperand,
+ const APInt &RightOperand, bool &Overflow) {
// Check for division by zero.
- if (RightOperand.getAPIntValue().isZero())
+ if (RightOperand.isZero())
return make_error<OverflowError>();
- bool Overflow;
- APInt Result = LeftOperand.getAPIntValue().sdiv_ov(
- RightOperand.getAPIntValue(), Overflow);
- if (Overflow ||
- (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
- return make_error<OverflowError>();
-
- if (Result.isNegative())
- return ExpressionValue(Result.getSExtValue());
- else
- return ExpressionValue(Result.getZExtValue());
+ return LeftOperand.sdiv_ov(RightOperand, Overflow);
}
-Expected<ExpressionValue> llvm::max(const ExpressionValue &LeftOperand,
- const ExpressionValue &RightOperand) {
- return LeftOperand.getAPIntValue().slt(RightOperand.getAPIntValue())
- ? RightOperand
- : LeftOperand;
+Expected<APInt> llvm::exprMax(const APInt &LeftOperand,
+ const APInt &RightOperand, bool &Overflow) {
+ Overflow = false;
+ return LeftOperand.slt(RightOperand) ? RightOperand : LeftOperand;
}
-Expected<ExpressionValue> llvm::min(const ExpressionValue &LeftOperand,
- const ExpressionValue &RightOperand) {
- if (cantFail(max(LeftOperand, RightOperand)).getAPIntValue() ==
- LeftOperand.getAPIntValue())
+Expected<APInt> llvm::exprMin(const APInt &LeftOperand,
+ const APInt &RightOperand, bool &Overflow) {
+ Overflow = false;
+ if (cantFail(exprMax(LeftOperand, RightOperand, Overflow)) == LeftOperand)
return RightOperand;
return LeftOperand;
}
-Expected<ExpressionValue> NumericVariableUse::eval() const {
- std::optional<ExpressionValue> Value = Variable->getValue();
+Expected<APInt> NumericVariableUse::eval() const {
+ std::optional<APInt> Value = Variable->getValue();
if (Value)
return *Value;
return make_error<UndefVarError>(getExpressionStr());
}
-Expected<ExpressionValue> BinaryOperation::eval() const {
- Expected<ExpressionValue> LeftOp = LeftOperand->eval();
- Expected<ExpressionValue> RightOp = RightOperand->eval();
+Expected<APInt> BinaryOperation::eval() const {
+ Expected<APInt> MaybeLeftOp = LeftOperand->eval();
+ Expected<APInt> MaybeRightOp = RightOperand->eval();
// Bubble up any error (e.g. undefined variables) in the recursive
// evaluation.
- if (!LeftOp || !RightOp) {
+ if (!MaybeLeftOp || !MaybeRightOp) {
Error Err = Error::success();
- if (!LeftOp)
- Err = joinErrors(std::move(Err), LeftOp.takeError());
- if (!RightOp)
- Err = joinErrors(std::move(Err), RightOp.takeError());
+ if (!MaybeLeftOp)
+ Err = joinErrors(std::move(Err), MaybeLeftOp.takeError());
+ if (!MaybeRightOp)
+ Err = joinErrors(std::move(Err), MaybeRightOp.takeError());
return std::move(Err);
}
- return EvalBinop(*LeftOp, *RightOp);
+ APInt LeftOp = *MaybeLeftOp;
+ APInt RightOp = *MaybeRightOp;
+ bool Overflow;
+ // Ensure both operands have the same bitwidth.
+ unsigned LeftBitWidth = LeftOp.getBitWidth();
+ unsigned RightBitWidth = RightOp.getBitWidth();
+ unsigned NewBitWidth = std::max(LeftBitWidth, RightBitWidth);
+ LeftOp = LeftOp.sext(NewBitWidth);
+ RightOp = RightOp.sext(NewBitWidth);
+ do {
+ Expected<APInt> MaybeResult = EvalBinop(LeftOp, RightOp, Overflow);
+ if (!MaybeResult)
+ return MaybeResult.takeError();
+
+ if (!Overflow)
+ return MaybeResult;
+
+ NewBitWidth = nextAPIntBitWidth(NewBitWidth);
+ LeftOp = LeftOp.sext(NewBitWidth);
+ RightOp = RightOp.sext(NewBitWidth);
+ } while (true);
}
Expected<ExpressionFormat>
@@ -295,8 +268,7 @@ BinaryOperation::getImplicitFormat(const SourceMgr &SM) const {
Expected<std::string> NumericSubstitution::getResult() const {
assert(ExpressionPointer->getAST() != nullptr &&
"Substituting empty expression");
- Expected<ExpressionValue> EvaluatedValue =
- ExpressionPointer->getAST()->eval();
+ Expected<APInt> EvaluatedValue = ExpressionPointer->getAST()->eval();
if (!EvaluatedValue)
return EvaluatedValue.takeError();
ExpressionFormat Format = ExpressionPointer->getFormat();
@@ -432,7 +404,7 @@ Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand(
StringRef &Expr, AllowedOperand AO, bool MaybeInvalidConstraint,
std::optional<size_t> LineNumber, FileCheckPatternContext *Context,
const SourceMgr &SM) {
- if (Expr.startswith("(")) {
+ if (Expr.starts_with("(")) {
if (AO != AllowedOperand::Any)
return ErrorDiagnostic::get(
SM, Expr, "parenthesized expression not permitted here");
@@ -445,7 +417,7 @@ Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand(
parseVariable(Expr, SM);
if (ParseVarResult) {
// Try to parse a function call.
- if (Expr.ltrim(SpaceChars).startswith("(")) {
+ if (Expr.ltrim(SpaceChars).starts_with("(")) {
if (AO != AllowedOperand::Any)
return ErrorDiagnostic::get(SM, ParseVarResult->Name,
"unexpected function call");
@@ -466,21 +438,17 @@ Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand(
}
// Otherwise, parse it as a literal.
- int64_t SignedLiteralValue;
- uint64_t UnsignedLiteralValue;
+ APInt LiteralValue;
StringRef SaveExpr = Expr;
- // Accept both signed and unsigned literal, default to signed literal.
+ bool Negative = Expr.consume_front("-");
if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0,
- UnsignedLiteralValue))
+ LiteralValue)) {
+ LiteralValue = toSigned(LiteralValue, Negative);
return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()),
- UnsignedLiteralValue);
- Expr = SaveExpr;
- if (AO == AllowedOperand::Any && !Expr.consumeInteger(0, SignedLiteralValue))
- return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()),
- SignedLiteralValue);
-
+ LiteralValue);
+ }
return ErrorDiagnostic::get(
- SM, Expr,
+ SM, SaveExpr,
Twine("invalid ") +
(MaybeInvalidConstraint ? "matching constraint or " : "") +
"operand format");
@@ -490,7 +458,7 @@ Expected<std::unique_ptr<ExpressionAST>>
Pattern::parseParenExpr(StringRef &Expr, std::optional<size_t> LineNumber,
FileCheckPatternContext *Context, const SourceMgr &SM) {
Expr = Expr.ltrim(SpaceChars);
- assert(Expr.startswith("("));
+ assert(Expr.starts_with("("));
// Parse right operand.
Expr.consume_front("(");
@@ -503,7 +471,7 @@ Pattern::parseParenExpr(StringRef &Expr, std::optional<size_t> LineNumber,
Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber,
Context, SM);
Expr = Expr.ltrim(SpaceChars);
- while (SubExprResult && !Expr.empty() && !Expr.startswith(")")) {
+ while (SubExprResult && !Expr.empty() && !Expr.starts_with(")")) {
StringRef OrigExpr = Expr;
SubExprResult = parseBinop(OrigExpr, Expr, std::move(*SubExprResult), false,
LineNumber, Context, SM);
@@ -535,10 +503,10 @@ Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr,
binop_eval_t EvalBinop;
switch (Operator) {
case '+':
- EvalBinop = operator+;
+ EvalBinop = exprAdd;
break;
case '-':
- EvalBinop = operator-;
+ EvalBinop = exprSub;
break;
default:
return ErrorDiagnostic::get(
@@ -569,15 +537,15 @@ Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName,
std::optional<size_t> LineNumber,
FileCheckPatternContext *Context, const SourceMgr &SM) {
Expr = Expr.ltrim(SpaceChars);
- assert(Expr.startswith("("));
+ assert(Expr.starts_with("("));
auto OptFunc = StringSwitch<binop_eval_t>(FuncName)
- .Case("add", operator+)
- .Case("div", operator/)
- .Case("max", max)
- .Case("min", min)
- .Case("mul", operator*)
- .Case("sub", operator-)
+ .Case("add", exprAdd)
+ .Case("div", exprDiv)
+ .Case("max", exprMax)
+ .Case("min", exprMin)
+ .Case("mul", exprMul)
+ .Case("sub", exprSub)
.Default(nullptr);
if (!OptFunc)
@@ -589,8 +557,8 @@ Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName,
// Parse call arguments, which are comma separated.
SmallVector<std::unique_ptr<ExpressionAST>, 4> Args;
- while (!Expr.empty() && !Expr.startswith(")")) {
- if (Expr.startswith(","))
+ while (!Expr.empty() && !Expr.starts_with(")")) {
+ if (Expr.starts_with(","))
return ErrorDiagnostic::get(SM, Expr, "missing argument");
// Parse the argument, which is an arbitary expression.
@@ -601,7 +569,7 @@ Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName,
while (Arg && !Expr.empty()) {
Expr = Expr.ltrim(SpaceChars);
// Have we reached an argument terminator?
- if (Expr.startswith(",") || Expr.startswith(")"))
+ if (Expr.starts_with(",") || Expr.starts_with(")"))
break;
// Arg = Arg <op> <expr>
@@ -620,7 +588,7 @@ Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName,
break;
Expr = Expr.ltrim(SpaceChars);
- if (Expr.startswith(")"))
+ if (Expr.starts_with(")"))
return ErrorDiagnostic::get(SM, Expr, "missing argument");
}
@@ -850,7 +818,7 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
// by escaping scary characters in fixed strings, building up one big regex.
while (!PatternStr.empty()) {
// RegEx matches.
- if (PatternStr.startswith("{{")) {
+ if (PatternStr.starts_with("{{")) {
// This is the start of a regex match. Scan for the }}.
size_t End = PatternStr.find("}}");
if (End == StringRef::npos) {
@@ -864,12 +832,16 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
// capturing the result for any purpose. This is required in case the
// expression contains an alternation like: CHECK: abc{{x|z}}def. We
// want this to turn into: "abc(x|z)def" not "abcx|zdef".
- RegExStr += '(';
- ++CurParen;
+ bool HasAlternation = PatternStr.contains('|');
+ if (HasAlternation) {
+ RegExStr += '(';
+ ++CurParen;
+ }
if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
return true;
- RegExStr += ')';
+ if (HasAlternation)
+ RegExStr += ')';
PatternStr = PatternStr.substr(End + 2);
continue;
@@ -885,7 +857,7 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
// names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be
// valid, as this helps catch some common errors. If there are extra '['s
// before the "[[", treat them literally.
- if (PatternStr.startswith("[[") && !PatternStr.startswith("[[[")) {
+ if (PatternStr.starts_with("[[") && !PatternStr.starts_with("[[[")) {
StringRef UnparsedPatternStr = PatternStr.substr(2);
// Find the closing bracket pair ending the match. End is going to be an
// offset relative to the beginning of the match string.
@@ -1124,7 +1096,8 @@ Pattern::MatchResult Pattern::match(StringRef Buffer,
if (!Substitutions.empty()) {
TmpStr = RegExStr;
if (LineNumber)
- Context->LineVariable->setValue(ExpressionValue(*LineNumber));
+ Context->LineVariable->setValue(
+ APInt(sizeof(*LineNumber) * 8, *LineNumber));
size_t InsertOffset = 0;
// Substitute all string variables and expressions whose values are only
@@ -1203,11 +1176,8 @@ Pattern::MatchResult Pattern::match(StringRef Buffer,
StringRef MatchedValue = MatchInfo[CaptureParenGroup];
ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat();
- Expected<ExpressionValue> Value =
- Format.valueFromStringRepr(MatchedValue, SM);
- if (!Value)
- return MatchResult(TheMatch, Value.takeError());
- DefinedNumericVariable->setValue(*Value, MatchedValue);
+ APInt Value = Format.valueFromStringRepr(MatchedValue, SM);
+ DefinedNumericVariable->setValue(Value, MatchedValue);
}
return MatchResult(TheMatch, Error::success());
@@ -1422,7 +1392,7 @@ size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
size_t BracketDepth = 0;
while (!Str.empty()) {
- if (Str.startswith("]]") && BracketDepth == 0)
+ if (Str.starts_with("]]") && BracketDepth == 0)
return Offset;
if (Str[0] == '\\') {
// Backslash escapes the next char within regexes, so skip them both.
@@ -1621,10 +1591,10 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix,
}
// You can't combine -NOT with another suffix.
- if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
- Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
- Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
- Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
+ if (Rest.starts_with("DAG-NOT:") || Rest.starts_with("NOT-DAG:") ||
+ Rest.starts_with("NEXT-NOT:") || Rest.starts_with("NOT-NEXT:") ||
+ Rest.starts_with("SAME-NOT:") || Rest.starts_with("NOT-SAME:") ||
+ Rest.starts_with("EMPTY-NOT:") || Rest.starts_with("NOT-EMPTY:"))
return {Check::CheckBadNot, Rest};
if (Rest.consume_front("NEXT"))
@@ -1664,6 +1634,60 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
return Loc;
}
+static const char *DefaultCheckPrefixes[] = {"CHECK"};
+static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};
+
+static void addDefaultPrefixes(FileCheckRequest &Req) {
+ if (Req.CheckPrefixes.empty()) {
+ for (const char *Prefix : DefaultCheckPrefixes)
+ Req.CheckPrefixes.push_back(Prefix);
+ Req.IsDefaultCheckPrefix = true;
+ }
+ if (Req.CommentPrefixes.empty())
+ for (const char *Prefix : DefaultCommentPrefixes)
+ Req.CommentPrefixes.push_back(Prefix);
+}
+
+struct PrefixMatcher {
+ /// Prefixes and their first occurrence past the current position.
+ SmallVector<std::pair<StringRef, size_t>> Prefixes;
+ StringRef Input;
+
+ PrefixMatcher(ArrayRef<StringRef> CheckPrefixes,
+ ArrayRef<StringRef> CommentPrefixes, StringRef Input)
+ : Input(Input) {
+ for (StringRef Prefix : CheckPrefixes)
+ Prefixes.push_back({Prefix, Input.find(Prefix)});
+ for (StringRef Prefix : CommentPrefixes)
+ Prefixes.push_back({Prefix, Input.find(Prefix)});
+
+ // Sort by descending length.
+ llvm::sort(Prefixes,
+ [](auto A, auto B) { return A.first.size() > B.first.size(); });
+ }
+
+ /// Find the next match of a prefix in Buffer.
+ /// Returns empty StringRef if not found.
+ StringRef match(StringRef Buffer) {
+ assert(Buffer.data() >= Input.data() &&
+ Buffer.data() + Buffer.size() == Input.data() + Input.size() &&
+ "Buffer must be suffix of Input");
+
+ size_t From = Buffer.data() - Input.data();
+ StringRef Match;
+ for (auto &[Prefix, Pos] : Prefixes) {
+ // If the last occurrence was before From, find the next one after From.
+ if (Pos < From)
+ Pos = Input.find(Prefix, From);
+ // Find the first prefix with the lowest position.
+ if (Pos != StringRef::npos &&
+ (Match.empty() || size_t(Match.data() - Input.data()) > Pos))
+ Match = StringRef(Input.substr(Pos, Prefix.size()));
+ }
+ return Match;
+ }
+};
+
/// Searches the buffer for the first prefix in the prefix regular expression.
///
/// This searches the buffer using the provided regular expression, however it
@@ -1688,20 +1712,16 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
/// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
/// is unspecified.
static std::pair<StringRef, StringRef>
-FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE,
+FindFirstMatchingPrefix(const FileCheckRequest &Req, PrefixMatcher &Matcher,
StringRef &Buffer, unsigned &LineNumber,
Check::FileCheckType &CheckTy) {
- SmallVector<StringRef, 2> Matches;
-
while (!Buffer.empty()) {
- // Find the first (longest) match using the RE.
- if (!PrefixRE.match(Buffer, &Matches))
+ // Find the first (longest) prefix match.
+ StringRef Prefix = Matcher.match(Buffer);
+ if (Prefix.empty())
// No match at all, bail.
return {StringRef(), StringRef()};
- StringRef Prefix = Matches[0];
- Matches.clear();
-
assert(Prefix.data() >= Buffer.data() &&
Prefix.data() < Buffer.data() + Buffer.size() &&
"Prefix doesn't start inside of buffer!");
@@ -1750,7 +1770,7 @@ FileCheck::FileCheck(FileCheckRequest Req)
FileCheck::~FileCheck() = default;
bool FileCheck::readCheckFile(
- SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
+ SourceMgr &SM, StringRef Buffer,
std::pair<unsigned, unsigned> *ImpPatBufferIDRange) {
if (ImpPatBufferIDRange)
ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0;
@@ -1799,6 +1819,8 @@ bool FileCheck::readCheckFile(
// found.
unsigned LineNumber = 1;
+ addDefaultPrefixes(Req);
+ PrefixMatcher Matcher(Req.CheckPrefixes, Req.CommentPrefixes, Buffer);
std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(),
Req.CheckPrefixes.end());
const size_t DistinctPrefixes = PrefixesNotFound.size();
@@ -1809,7 +1831,7 @@ bool FileCheck::readCheckFile(
StringRef UsedPrefix;
StringRef AfterSuffix;
std::tie(UsedPrefix, AfterSuffix) =
- FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy);
+ FindFirstMatchingPrefix(Req, Matcher, Buffer, LineNumber, CheckTy);
if (UsedPrefix.empty())
break;
if (CheckTy != Check::CheckComment)
@@ -2461,9 +2483,6 @@ static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes,
return true;
}
-static const char *DefaultCheckPrefixes[] = {"CHECK"};
-static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};
-
bool FileCheck::ValidateCheckPrefixes() {
StringSet<> UniquePrefixes;
// Add default prefixes to catch user-supplied duplicates of them below.
@@ -2484,33 +2503,6 @@ bool FileCheck::ValidateCheckPrefixes() {
return true;
}
-Regex FileCheck::buildCheckPrefixRegex() {
- if (Req.CheckPrefixes.empty()) {
- for (const char *Prefix : DefaultCheckPrefixes)
- Req.CheckPrefixes.push_back(Prefix);
- Req.IsDefaultCheckPrefix = true;
- }
- if (Req.CommentPrefixes.empty()) {
- for (const char *Prefix : DefaultCommentPrefixes)
- Req.CommentPrefixes.push_back(Prefix);
- }
-
- // We already validated the contents of CheckPrefixes and CommentPrefixes so
- // just concatenate them as alternatives.
- SmallString<32> PrefixRegexStr;
- for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) {
- if (I != 0)
- PrefixRegexStr.push_back('|');
- PrefixRegexStr.append(Req.CheckPrefixes[I]);
- }
- for (StringRef Prefix : Req.CommentPrefixes) {
- PrefixRegexStr.push_back('|');
- PrefixRegexStr.append(Prefix);
- }
-
- return Regex(PrefixRegexStr);
-}
-
Error FileCheckPatternContext::defineCmdlineVariables(
ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) {
assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&
@@ -2590,7 +2582,7 @@ Error FileCheckPatternContext::defineCmdlineVariables(
// to, since the expression of a command-line variable definition should
// only use variables defined earlier on the command-line. If not, this
// is an error and we report it.
- Expected<ExpressionValue> Value = Expression->getAST()->eval();
+ Expected<APInt> Value = Expression->getAST()->eval();
if (!Value) {
Errs = joinErrors(std::move(Errs), Value.takeError());
continue;
diff --git a/contrib/llvm-project/llvm/lib/FileCheck/FileCheckImpl.h b/contrib/llvm-project/llvm/lib/FileCheck/FileCheckImpl.h
index 10fe8d46ffac..c15461684ea3 100644
--- a/contrib/llvm-project/llvm/lib/FileCheck/FileCheckImpl.h
+++ b/contrib/llvm-project/llvm/lib/FileCheck/FileCheckImpl.h
@@ -32,8 +32,6 @@ namespace llvm {
// Numeric substitution handling code.
//===----------------------------------------------------------------------===//
-class ExpressionValue;
-
/// Type representing the format an expression value should be textualized into
/// for matching. Used to represent both explicit format specifiers as well as
/// implicit format from using numeric variables.
@@ -95,14 +93,11 @@ public:
/// \returns the string representation of \p Value in the format represented
/// by this instance, or an error if conversion to this format failed or the
/// format is NoFormat.
- Expected<std::string> getMatchingString(ExpressionValue Value) const;
+ Expected<std::string> getMatchingString(APInt Value) const;
/// \returns the value corresponding to string representation \p StrVal
- /// according to the matching format represented by this instance or an error
- /// with diagnostic against \p SM if \p StrVal does not correspond to a valid
- /// and representable value.
- Expected<ExpressionValue> valueFromStringRepr(StringRef StrVal,
- const SourceMgr &SM) const;
+ /// according to the matching format represented by this instance.
+ APInt valueFromStringRepr(StringRef StrVal, const SourceMgr &SM) const;
};
/// Class to represent an overflow error that might result when manipulating a
@@ -118,33 +113,14 @@ public:
void log(raw_ostream &OS) const override { OS << "overflow error"; }
};
-/// Class representing a numeric value.
-class ExpressionValue {
-private:
- APInt Value;
-
-public:
- // Store signed and unsigned 64-bit integers in a signed 65-bit APInt.
- template <class T>
- explicit ExpressionValue(T Val) : Value(65, Val, /*isSigned=*/Val < 0) {}
-
- APInt getAPIntValue() const { return Value; }
-};
-
/// Performs operation and \returns its result or an error in case of failure,
/// such as if an overflow occurs.
-Expected<ExpressionValue> operator+(const ExpressionValue &Lhs,
- const ExpressionValue &Rhs);
-Expected<ExpressionValue> operator-(const ExpressionValue &Lhs,
- const ExpressionValue &Rhs);
-Expected<ExpressionValue> operator*(const ExpressionValue &Lhs,
- const ExpressionValue &Rhs);
-Expected<ExpressionValue> operator/(const ExpressionValue &Lhs,
- const ExpressionValue &Rhs);
-Expected<ExpressionValue> max(const ExpressionValue &Lhs,
- const ExpressionValue &Rhs);
-Expected<ExpressionValue> min(const ExpressionValue &Lhs,
- const ExpressionValue &Rhs);
+Expected<APInt> exprAdd(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
+Expected<APInt> exprSub(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
+Expected<APInt> exprMul(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
+Expected<APInt> exprDiv(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
+Expected<APInt> exprMax(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
+Expected<APInt> exprMin(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
/// Base class representing the AST of a given expression.
class ExpressionAST {
@@ -160,7 +136,7 @@ public:
/// Evaluates and \returns the value of the expression represented by this
/// AST or an error if evaluation fails.
- virtual Expected<ExpressionValue> eval() const = 0;
+ virtual Expected<APInt> eval() const = 0;
/// \returns either the implicit format of this AST, a diagnostic against
/// \p SM if implicit formats of the AST's components conflict, or NoFormat
@@ -176,15 +152,14 @@ public:
class ExpressionLiteral : public ExpressionAST {
private:
/// Actual value of the literal.
- ExpressionValue Value;
+ APInt Value;
public:
- template <class T>
- explicit ExpressionLiteral(StringRef ExpressionStr, T Val)
+ explicit ExpressionLiteral(StringRef ExpressionStr, APInt Val)
: ExpressionAST(ExpressionStr), Value(Val) {}
/// \returns the literal's value.
- Expected<ExpressionValue> eval() const override { return Value; }
+ Expected<APInt> eval() const override { return Value; }
};
/// Class to represent an undefined variable error, which quotes that
@@ -243,7 +218,7 @@ private:
ExpressionFormat ImplicitFormat;
/// Value of numeric variable, if defined, or std::nullopt otherwise.
- std::optional<ExpressionValue> Value;
+ std::optional<APInt> Value;
/// The input buffer's string from which Value was parsed, or std::nullopt.
/// See comments on getStringValue for a discussion of the std::nullopt case.
@@ -270,7 +245,7 @@ public:
ExpressionFormat getImplicitFormat() const { return ImplicitFormat; }
/// \returns this variable's value.
- std::optional<ExpressionValue> getValue() const { return Value; }
+ std::optional<APInt> getValue() const { return Value; }
/// \returns the input buffer's string from which this variable's value was
/// parsed, or std::nullopt if the value is not yet defined or was not parsed
@@ -282,7 +257,7 @@ public:
/// Sets value of this numeric variable to \p NewValue, and sets the input
/// buffer string from which it was parsed to \p NewStrValue. See comments on
/// getStringValue for a discussion of when the latter can be std::nullopt.
- void setValue(ExpressionValue NewValue,
+ void setValue(APInt NewValue,
std::optional<StringRef> NewStrValue = std::nullopt) {
Value = NewValue;
StrValue = NewStrValue;
@@ -311,7 +286,7 @@ public:
NumericVariableUse(StringRef Name, NumericVariable *Variable)
: ExpressionAST(Name), Variable(Variable) {}
/// \returns the value of the variable referenced by this instance.
- Expected<ExpressionValue> eval() const override;
+ Expected<APInt> eval() const override;
/// \returns implicit format of this numeric variable.
Expected<ExpressionFormat>
@@ -321,8 +296,7 @@ public:
};
/// Type of functions evaluating a given binary operation.
-using binop_eval_t = Expected<ExpressionValue> (*)(const ExpressionValue &,
- const ExpressionValue &);
+using binop_eval_t = Expected<APInt> (*)(const APInt &, const APInt &, bool &);
/// Class representing a single binary operation in the AST of an expression.
class BinaryOperation : public ExpressionAST {
@@ -349,7 +323,7 @@ public:
/// using EvalBinop on the result of recursively evaluating the operands.
/// \returns the expression value or an error if an undefined numeric
/// variable is used in one of the operands.
- Expected<ExpressionValue> eval() const override;
+ Expected<APInt> eval() const override;
/// \returns the implicit format of this AST, if any, a diagnostic against
/// \p SM if the implicit formats of the AST's components conflict, or no
diff --git a/contrib/llvm-project/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/contrib/llvm-project/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
new file mode 100644
index 000000000000..96c5b19a4a59
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
@@ -0,0 +1,55 @@
+//===--- CodeGenOptions.cpp - Shared codegen option handling --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Frontend/Driver/CodeGenOptions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/TargetParser/Triple.h"
+
+namespace llvm::driver {
+
+TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
+ driver::VectorLibrary Veclib) {
+ TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple);
+
+ using VectorLibrary = llvm::driver::VectorLibrary;
+ switch (Veclib) {
+ case VectorLibrary::Accelerate:
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate,
+ TargetTriple);
+ break;
+ case VectorLibrary::LIBMVEC:
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC_X86,
+ TargetTriple);
+ break;
+ case VectorLibrary::MASSV:
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV,
+ TargetTriple);
+ break;
+ case VectorLibrary::SVML:
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML,
+ TargetTriple);
+ break;
+ case VectorLibrary::SLEEF:
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI,
+ TargetTriple);
+ break;
+ case VectorLibrary::Darwin_libsystem_m:
+ TLII->addVectorizableFunctionsFromVecLib(
+ TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple);
+ break;
+ case VectorLibrary::ArmPL:
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL,
+ TargetTriple);
+ break;
+ default:
+ break;
+ }
+ return TLII;
+}
+
+} // namespace llvm::driver
diff --git a/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp b/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp
index 59f730d8a495..709fe3212623 100644
--- a/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp
+++ b/contrib/llvm-project/llvm/lib/Frontend/HLSL/HLSLResource.cpp
@@ -27,30 +27,37 @@ StringRef FrontendResource::getSourceType() {
return cast<MDString>(Entry->getOperand(1))->getString();
}
-uint32_t FrontendResource::FrontendResource::getResourceKind() {
+ResourceKind FrontendResource::getResourceKind() {
+ return static_cast<ResourceKind>(
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Entry->getOperand(2))->getValue())
+ ->getLimitedValue());
+}
+bool FrontendResource::getIsROV() {
return cast<ConstantInt>(
- cast<ConstantAsMetadata>(Entry->getOperand(2))->getValue())
+ cast<ConstantAsMetadata>(Entry->getOperand(3))->getValue())
->getLimitedValue();
}
uint32_t FrontendResource::getResourceIndex() {
return cast<ConstantInt>(
- cast<ConstantAsMetadata>(Entry->getOperand(3))->getValue())
+ cast<ConstantAsMetadata>(Entry->getOperand(4))->getValue())
->getLimitedValue();
}
uint32_t FrontendResource::getSpace() {
return cast<ConstantInt>(
- cast<ConstantAsMetadata>(Entry->getOperand(4))->getValue())
+ cast<ConstantAsMetadata>(Entry->getOperand(5))->getValue())
->getLimitedValue();
}
FrontendResource::FrontendResource(GlobalVariable *GV, StringRef TypeStr,
- ResourceKind RK, uint32_t ResIndex,
- uint32_t Space) {
+ ResourceKind RK, bool IsROV,
+ uint32_t ResIndex, uint32_t Space) {
auto &Ctx = GV->getContext();
IRBuilder<> B(Ctx);
Entry = MDNode::get(
Ctx, {ValueAsMetadata::get(GV), MDString::get(Ctx, TypeStr),
ConstantAsMetadata::get(B.getInt32(static_cast<int>(RK))),
+ ConstantAsMetadata::get(B.getInt1(IsROV)),
ConstantAsMetadata::get(B.getInt32(ResIndex)),
ConstantAsMetadata::get(B.getInt32(Space))});
}
diff --git a/contrib/llvm-project/llvm/lib/Frontend/Offloading/Utility.cpp b/contrib/llvm-project/llvm/lib/Frontend/Offloading/Utility.cpp
new file mode 100644
index 000000000000..25f609517ebe
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -0,0 +1,110 @@
+//===- Utility.cpp ------ Collection of geneirc offloading utilities ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Frontend/Offloading/Utility.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+using namespace llvm::offloading;
+
+StructType *offloading::getEntryTy(Module &M) {
+ LLVMContext &C = M.getContext();
+ StructType *EntryTy =
+ StructType::getTypeByName(C, "struct.__tgt_offload_entry");
+ if (!EntryTy)
+ EntryTy = StructType::create(
+ "struct.__tgt_offload_entry", PointerType::getUnqual(C),
+ PointerType::getUnqual(C), M.getDataLayout().getIntPtrType(C),
+ Type::getInt32Ty(C), Type::getInt32Ty(C));
+ return EntryTy;
+}
+
+// TODO: Rework this interface to be more generic.
+void offloading::emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name,
+ uint64_t Size, int32_t Flags, int32_t Data,
+ StringRef SectionName) {
+ llvm::Triple Triple(M.getTargetTriple());
+
+ Type *Int8PtrTy = PointerType::getUnqual(M.getContext());
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ Type *SizeTy = M.getDataLayout().getIntPtrType(M.getContext());
+
+ Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
+
+ // Create the constant string used to look up the symbol in the device.
+ auto *Str = new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
+ GlobalValue::InternalLinkage, AddrName,
+ ".omp_offloading.entry_name");
+ Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ // Construct the offloading entry.
+ Constant *EntryData[] = {
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, Int8PtrTy),
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, Int8PtrTy),
+ ConstantInt::get(SizeTy, Size),
+ ConstantInt::get(Int32Ty, Flags),
+ ConstantInt::get(Int32Ty, Data),
+ };
+ Constant *EntryInitializer = ConstantStruct::get(getEntryTy(M), EntryData);
+
+ auto *Entry = new GlobalVariable(
+ M, getEntryTy(M),
+ /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
+ ".omp_offloading.entry." + Name, nullptr, GlobalValue::NotThreadLocal,
+ M.getDataLayout().getDefaultGlobalsAddressSpace());
+
+ // The entry has to be created in the section the linker expects it to be.
+ if (Triple.isOSBinFormatCOFF())
+ Entry->setSection((SectionName + "$OE").str());
+ else
+ Entry->setSection(SectionName);
+ Entry->setAlignment(Align(1));
+}
+
+std::pair<GlobalVariable *, GlobalVariable *>
+offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
+ llvm::Triple Triple(M.getTargetTriple());
+
+ auto *ZeroInitilaizer =
+ ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
+ auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
+ auto *EntryType = ArrayType::get(getEntryTy(M), 0);
+
+ auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true,
+ GlobalValue::ExternalLinkage, EntryInit,
+ "__start_" + SectionName);
+ EntriesB->setVisibility(GlobalValue::HiddenVisibility);
+ auto *EntriesE = new GlobalVariable(M, EntryType, /*isConstant=*/true,
+ GlobalValue::ExternalLinkage, EntryInit,
+ "__stop_" + SectionName);
+ EntriesE->setVisibility(GlobalValue::HiddenVisibility);
+
+ if (Triple.isOSBinFormatELF()) {
+ // We assume that external begin/end symbols that we have created above will
+ // be defined by the linker. This is done whenever a section name with a
+ // valid C-identifier is present. We define a dummy variable here to force
+ // the linker to always provide these symbols.
+ auto *DummyEntry = new GlobalVariable(
+ M, ZeroInitilaizer->getType(), true, GlobalVariable::ExternalLinkage,
+ ZeroInitilaizer, "__dummy." + SectionName);
+ DummyEntry->setSection(SectionName);
+ DummyEntry->setVisibility(GlobalValue::HiddenVisibility);
+ } else {
+ // The COFF linker will merge sections containing a '$' together into a
+ // single section. The order of entries in this section will be sorted
+ // alphabetically by the characters following the '$' in the name. Set the
+ // sections here to ensure that the beginning and end symbols are sorted.
+ EntriesB->setSection((SectionName + "$OA").str());
+ EntriesE->setSection((SectionName + "$OZ").str());
+ }
+
+ return std::make_pair(EntriesB, EntriesE);
+}
diff --git a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 4c3696f9c342..ce428f78dc84 100644
--- a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -14,6 +14,7 @@
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -22,18 +23,27 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Frontend/Offloading/Utility.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -135,6 +145,19 @@ static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType) {
}
#endif
+static const omp::GV &getGridValue(const Triple &T, Function *Kernel) {
+ if (T.isAMDGPU()) {
+ StringRef Features =
+ Kernel->getFnAttribute("target-features").getValueAsString();
+ if (Features.count("+wavefrontsize64"))
+ return omp::getAMDGPUGridValues<64>();
+ return omp::getAMDGPUGridValues<32>();
+ }
+ if (T.isNVPTX())
+ return omp::NVPTXGridValues;
+ llvm_unreachable("No grid value available for this architecture!");
+}
+
/// Determine which scheduling algorithm to use, determined from schedule clause
/// arguments.
static OMPScheduleType
@@ -331,6 +354,140 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
return splitBB(Builder, CreateBranch, Old->getName() + Suffix);
}
+// This function creates a fake integer value and a fake use for the integer
+// value. It returns the fake value created. This is useful in modeling the
+// extra arguments to the outlined functions.
+Value *createFakeIntVal(IRBuilder<> &Builder,
+ OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
+ std::stack<Instruction *> &ToBeDeleted,
+ OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
+ const Twine &Name = "", bool AsPtr = true) {
+ Builder.restoreIP(OuterAllocaIP);
+ Instruction *FakeVal;
+ AllocaInst *FakeValAddr =
+ Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr");
+ ToBeDeleted.push(FakeValAddr);
+
+ if (AsPtr) {
+ FakeVal = FakeValAddr;
+ } else {
+ FakeVal =
+ Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val");
+ ToBeDeleted.push(FakeVal);
+ }
+
+ // Generate a fake use of this value
+ Builder.restoreIP(InnerAllocaIP);
+ Instruction *UseFakeVal;
+ if (AsPtr) {
+ UseFakeVal =
+ Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use");
+ } else {
+ UseFakeVal =
+ cast<BinaryOperator>(Builder.CreateAdd(FakeVal, Builder.getInt32(10)));
+ }
+ ToBeDeleted.push(UseFakeVal);
+ return FakeVal;
+}
+
+//===----------------------------------------------------------------------===//
+// OpenMPIRBuilderConfig
+//===----------------------------------------------------------------------===//
+
+namespace {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+/// Values for bit flags for marking which requires clauses have been used.
+enum OpenMPOffloadingRequiresDirFlags {
+ /// flag undefined.
+ OMP_REQ_UNDEFINED = 0x000,
+ /// no requires directive present.
+ OMP_REQ_NONE = 0x001,
+ /// reverse_offload clause.
+ OMP_REQ_REVERSE_OFFLOAD = 0x002,
+ /// unified_address clause.
+ OMP_REQ_UNIFIED_ADDRESS = 0x004,
+ /// unified_shared_memory clause.
+ OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
+ /// dynamic_allocators clause.
+ OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
+};
+
+} // anonymous namespace
+
+OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
+ : RequiresFlags(OMP_REQ_UNDEFINED) {}
+
+OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
+ bool IsTargetDevice, bool IsGPU, bool OpenMPOffloadMandatory,
+ bool HasRequiresReverseOffload, bool HasRequiresUnifiedAddress,
+ bool HasRequiresUnifiedSharedMemory, bool HasRequiresDynamicAllocators)
+ : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
+ OpenMPOffloadMandatory(OpenMPOffloadMandatory),
+ RequiresFlags(OMP_REQ_UNDEFINED) {
+ if (HasRequiresReverseOffload)
+ RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
+ if (HasRequiresUnifiedAddress)
+ RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
+ if (HasRequiresUnifiedSharedMemory)
+ RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
+ if (HasRequiresDynamicAllocators)
+ RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
+}
+
+bool OpenMPIRBuilderConfig::hasRequiresReverseOffload() const {
+ return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
+}
+
+bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress() const {
+ return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
+}
+
+bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory() const {
+ return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
+}
+
+bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators() const {
+ return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
+}
+
+int64_t OpenMPIRBuilderConfig::getRequiresFlags() const {
+ return hasRequiresFlags() ? RequiresFlags
+ : static_cast<int64_t>(OMP_REQ_NONE);
+}
+
+void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(bool Value) {
+ if (Value)
+ RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
+ else
+ RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
+}
+
+void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(bool Value) {
+ if (Value)
+ RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
+ else
+ RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
+}
+
+void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(bool Value) {
+ if (Value)
+ RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
+ else
+ RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
+}
+
+void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(bool Value) {
+ if (Value)
+ RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
+ else
+ RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
+}
+
+//===----------------------------------------------------------------------===//
+// OpenMPIRBuilder
+//===----------------------------------------------------------------------===//
+
void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
IRBuilderBase &Builder,
SmallVector<Value *> &ArgsVector) {
@@ -362,7 +519,6 @@ void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
LLVMContext &Ctx = Fn.getContext();
- Triple T(M.getTargetTriple());
// Get the function's current attributes.
auto Attrs = Fn.getAttributes();
@@ -383,9 +539,9 @@ void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
if (Param) {
if (auto AK = TargetLibraryInfo::getExtAttrForI32Param(T, HasSignExt))
FnAS = FnAS.addAttribute(Ctx, AK);
- } else
- if (auto AK = TargetLibraryInfo::getExtAttrForI32Return(T, HasSignExt))
- FnAS = FnAS.addAttribute(Ctx, AK);
+ } else if (auto AK =
+ TargetLibraryInfo::getExtAttrForI32Return(T, HasSignExt))
+ FnAS = FnAS.addAttribute(Ctx, AK);
} else {
FnAS = FnAS.addAttributes(Ctx, AS);
}
@@ -399,7 +555,7 @@ void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
case Enum: \
FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
- addAttrSet(RetAttrs, RetAttrSet, /*Param*/false); \
+ addAttrSet(RetAttrs, RetAttrSet, /*Param*/ false); \
for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
@@ -475,31 +631,7 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
return Fn;
}
-void OpenMPIRBuilder::initialize(StringRef HostFilePath) {
- initializeTypes(M);
-
- if (HostFilePath.empty())
- return;
-
- auto Buf = MemoryBuffer::getFile(HostFilePath);
- if (std::error_code Err = Buf.getError()) {
- report_fatal_error(("error opening host file from host file path inside of "
- "OpenMPIRBuilder: " +
- Err.message())
- .c_str());
- }
-
- LLVMContext Ctx;
- auto M = expectedToErrorOrAndEmitErrors(
- Ctx, parseBitcodeFile(Buf.get()->getMemBufferRef(), Ctx));
- if (std::error_code Err = M.getError()) {
- report_fatal_error(
- ("error parsing host file inside of OpenMPIRBuilder: " + Err.message())
- .c_str());
- }
-
- loadOffloadInfoMetadata(*M.get());
-}
+void OpenMPIRBuilder::initialize() { initializeTypes(M); }
void OpenMPIRBuilder::finalize(Function *Fn) {
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
@@ -519,6 +651,13 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
Function *OuterFn = OI.getFunction();
CodeExtractorAnalysisCache CEAC(*OuterFn);
+ // If we generate code for the target device, we need to allocate
+ // struct for aggregate params in the device default alloca address space.
+ // OpenMP runtime requires that the params of the extracted functions are
+ // passed as zero address space pointers. This flag ensures that
+ // CodeExtractor generates correct code for extracted functions
+ // which are used by OpenMP runtime.
+ bool ArgsInZeroAddressSpace = Config.isTargetDevice();
CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ true,
/* BlockFrequencyInfo */ nullptr,
@@ -527,7 +666,7 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
/* AllocaBlock*/ OI.OuterAllocaBB,
- /* Suffix */ ".omp_par");
+ /* Suffix */ ".omp_par", ArgsInZeroAddressSpace);
LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
@@ -572,7 +711,7 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
if (I.isTerminator())
continue;
- I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
+ I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
}
OI.EntryBB->moveBefore(&ArtificialEntry);
@@ -839,44 +978,6 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
return Builder.saveIP();
}
-void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name,
- uint64_t Size, int32_t Flags,
- StringRef SectionName) {
- Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
- Type *Int32Ty = Type::getInt32Ty(M.getContext());
- Type *SizeTy = M.getDataLayout().getIntPtrType(M.getContext());
-
- Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
-
- // Create the constant string used to look up the symbol in the device.
- auto *Str =
- new llvm::GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
- llvm::GlobalValue::InternalLinkage, AddrName,
- ".omp_offloading.entry_name");
- Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
- // Construct the offloading entry.
- Constant *EntryData[] = {
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, Int8PtrTy),
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, Int8PtrTy),
- ConstantInt::get(SizeTy, Size),
- ConstantInt::get(Int32Ty, Flags),
- ConstantInt::get(Int32Ty, 0),
- };
- Constant *EntryInitializer =
- ConstantStruct::get(OpenMPIRBuilder::OffloadEntry, EntryData);
-
- auto *Entry = new GlobalVariable(
- M, OpenMPIRBuilder::OffloadEntry,
- /* isConstant = */ true, GlobalValue::WeakAnyLinkage, EntryInitializer,
- ".omp_offloading.entry." + Name, nullptr, GlobalValue::NotThreadLocal,
- M.getDataLayout().getDefaultGlobalsAddressSpace());
-
- // The entry has to be created in the section the linker expects it to be.
- Entry->setSection(SectionName);
- Entry->setAlignment(Align(1));
-}
-
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return,
Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads,
@@ -930,7 +1031,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitKernelLaunch(
(void)OutlinedFnID;
// Return value of the runtime offloading call.
- Value *Return;
+ Value *Return = nullptr;
// Arguments for the target kernel.
SmallVector<Value *> ArgsVector;
@@ -1007,6 +1108,182 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
}
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the device.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP DeviceRTL runtime function (kmpc_parallel_51)
+static void targetParallelCallback(
+ OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn,
+ BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition,
+ Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr,
+ Value *ThreadID, const SmallVector<Instruction *, 4> &ToBeDeleted) {
+ // Add some known attributes.
+ IRBuilder<> &Builder = OMPIRBuilder->Builder;
+ OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+ OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+ OutlinedFn.addParamAttr(0, Attribute::NoUndef);
+ OutlinedFn.addParamAttr(1, Attribute::NoUndef);
+ OutlinedFn.addFnAttr(Attribute::NoUnwind);
+
+ assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+ unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+ CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
+ assert(CI && "Expected call instruction to outlined function");
+ CI->getParent()->setName("omp_parallel");
+
+ Builder.SetInsertPoint(CI);
+ Type *PtrTy = OMPIRBuilder->VoidPtr;
+ Value *NullPtrValue = Constant::getNullValue(PtrTy);
+
+ // Add alloca for kernel args
+ OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
+ Builder.SetInsertPoint(OuterAllocaBB, OuterAllocaBB->getFirstInsertionPt());
+ AllocaInst *ArgsAlloca =
+ Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
+ Value *Args = ArgsAlloca;
+ // Add address space cast if array for storing arguments is not allocated
+ // in address space 0
+ if (ArgsAlloca->getAddressSpace())
+ Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
+ Builder.restoreIP(CurrentIP);
+
+ // Store captured vars which are used by kmpc_parallel_51
+ for (unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
+ Value *V = *(CI->arg_begin() + 2 + Idx);
+ Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
+ ArrayType::get(PtrTy, NumCapturedVars), Args, 0, Idx);
+ Builder.CreateStore(V, StoreAddress);
+ }
+
+ Value *Cond =
+ IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
+ : Builder.getInt32(1);
+
+ // Build kmpc_parallel_51 call
+ Value *Parallel51CallArgs[] = {
+ /* identifier*/ Ident,
+ /* global thread num*/ ThreadID,
+ /* if expression */ Cond,
+ /* number of threads */ NumThreads ? NumThreads : Builder.getInt32(-1),
+ /* Proc bind */ Builder.getInt32(-1),
+ /* outlined function */
+ Builder.CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr),
+ /* wrapper function */ NullPtrValue,
+ /* arguments of the outlined funciton*/ Args,
+ /* number of arguments */ Builder.getInt64(NumCapturedVars)};
+
+ FunctionCallee RTLFn =
+ OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
+
+ Builder.CreateCall(RTLFn, Parallel51CallArgs);
+
+ LLVM_DEBUG(dbgs() << "With kmpc_parallel_51 placed: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
+
+ // Initialize the local TID stack location with the argument value.
+ Builder.SetInsertPoint(PrivTID);
+ Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
+ Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
+ PrivTIDAddr);
+
+ // Remove redundant call to the outlined function.
+ CI->eraseFromParent();
+
+ for (Instruction *I : ToBeDeleted) {
+ I->eraseFromParent();
+ }
+}
+
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the host.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP host runtime function ( __kmpc_fork_call[_if])
+static void
+hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
+ Function *OuterFn, Value *Ident, Value *IfCondition,
+ Instruction *PrivTID, AllocaInst *PrivTIDAddr,
+ const SmallVector<Instruction *, 4> &ToBeDeleted) {
+ IRBuilder<> &Builder = OMPIRBuilder->Builder;
+ FunctionCallee RTLFn;
+ if (IfCondition) {
+ RTLFn =
+ OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
+ } else {
+ RTLFn =
+ OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
+ }
+ if (auto *F = dyn_cast<Function>(RTLFn.getCallee())) {
+ if (!F->hasMetadata(LLVMContext::MD_callback)) {
+ LLVMContext &Ctx = F->getContext();
+ MDBuilder MDB(Ctx);
+ // Annotate the callback behavior of the __kmpc_fork_call:
+ // - The callback callee is argument number 2 (microtask).
+ // - The first two arguments of the callback callee are unknown (-1).
+ // - All variadic arguments to the __kmpc_fork_call are passed to the
+ // callback callee.
+ F->addMetadata(LLVMContext::MD_callback,
+ *MDNode::get(Ctx, {MDB.createCallbackEncoding(
+ 2, {-1, -1},
+ /* VarArgsArePassed */ true)}));
+ }
+ }
+ // Add some known attributes.
+ OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+ OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+ OutlinedFn.addFnAttr(Attribute::NoUnwind);
+
+ assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+ unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+ CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
+ CI->getParent()->setName("omp_parallel");
+ Builder.SetInsertPoint(CI);
+
+ // Build call __kmpc_fork_call[_if](Ident, n, microtask, var1, .., varn);
+ Value *ForkCallArgs[] = {
+ Ident, Builder.getInt32(NumCapturedVars),
+ Builder.CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr)};
+
+ SmallVector<Value *, 16> RealArgs;
+ RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
+ if (IfCondition) {
+ Value *Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
+ RealArgs.push_back(Cond);
+ }
+ RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
+
+ // __kmpc_fork_call_if always expects a void ptr as the last argument
+ // If there are no arguments, pass a null pointer.
+ auto PtrTy = OMPIRBuilder->VoidPtr;
+ if (IfCondition && NumCapturedVars == 0) {
+ Value *NullPtrValue = Constant::getNullValue(PtrTy);
+ RealArgs.push_back(NullPtrValue);
+ }
+ if (IfCondition && RealArgs.back()->getType() != PtrTy)
+ RealArgs.back() = Builder.CreateBitCast(RealArgs.back(), PtrTy);
+
+ Builder.CreateCall(RTLFn, RealArgs);
+
+ LLVM_DEBUG(dbgs() << "With fork_call placed: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
+
+ // Initialize the local TID stack location with the argument value.
+ Builder.SetInsertPoint(PrivTID);
+ Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
+ Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
+ PrivTIDAddr);
+
+ // Remove redundant call to the outlined function.
+ CI->eraseFromParent();
+
+ for (Instruction *I : ToBeDeleted) {
+ I->eraseFromParent();
+ }
+}
+
IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
@@ -1021,9 +1298,16 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
Value *ThreadID = getOrCreateThreadID(Ident);
-
- if (NumThreads) {
- // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
+ // If we generate code for the target device, we need to allocate
+ // struct for aggregate params in the device default alloca address space.
+ // OpenMP runtime requires that the params of the extracted functions are
+ // passed as zero address space pointers. This flag ensures that extracted
+ // function arguments are declared in zero address space
+ bool ArgsInZeroAddressSpace = Config.isTargetDevice();
+
+ // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
+ // only if we compile for host side.
+ if (NumThreads && !Config.isTargetDevice()) {
Value *Args[] = {
Ident, ThreadID,
Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
@@ -1054,13 +1338,28 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
// Change the location to the outer alloca insertion point to create and
// initialize the allocas we pass into the parallel region.
Builder.restoreIP(OuterAllocaIP);
- AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
- AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
+ AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
+ AllocaInst *ZeroAddrAlloca =
+ Builder.CreateAlloca(Int32, nullptr, "zero.addr");
+ Instruction *TIDAddr = TIDAddrAlloca;
+ Instruction *ZeroAddr = ZeroAddrAlloca;
+ if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
+ // Add additional casts to enforce pointers in zero address space
+ TIDAddr = new AddrSpaceCastInst(
+ TIDAddrAlloca, PointerType ::get(M.getContext(), 0), "tid.addr.ascast");
+ TIDAddr->insertAfter(TIDAddrAlloca);
+ ToBeDeleted.push_back(TIDAddr);
+ ZeroAddr = new AddrSpaceCastInst(ZeroAddrAlloca,
+ PointerType ::get(M.getContext(), 0),
+ "zero.addr.ascast");
+ ZeroAddr->insertAfter(ZeroAddrAlloca);
+ ToBeDeleted.push_back(ZeroAddr);
+ }
// We only need TIDAddr and ZeroAddr for modeling purposes to get the
// associated arguments in the outlined function, so we delete them later.
- ToBeDeleted.push_back(TIDAddr);
- ToBeDeleted.push_back(ZeroAddr);
+ ToBeDeleted.push_back(TIDAddrAlloca);
+ ToBeDeleted.push_back(ZeroAddrAlloca);
// Create an artificial insertion point that will also ensure the blocks we
// are about to split are not degenerated.
@@ -1128,87 +1427,24 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
BodyGenCB(InnerAllocaIP, CodeGenIP);
LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
- FunctionCallee RTLFn;
- if (IfCondition)
- RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
- else
- RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
-
- if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
- if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
- llvm::LLVMContext &Ctx = F->getContext();
- MDBuilder MDB(Ctx);
- // Annotate the callback behavior of the __kmpc_fork_call:
- // - The callback callee is argument number 2 (microtask).
- // - The first two arguments of the callback callee are unknown (-1).
- // - All variadic arguments to the __kmpc_fork_call are passed to the
- // callback callee.
- F->addMetadata(
- llvm::LLVMContext::MD_callback,
- *llvm::MDNode::get(
- Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
- /* VarArgsArePassed */ true)}));
- }
- }
OutlineInfo OI;
- OI.PostOutlineCB = [=](Function &OutlinedFn) {
- // Add some known attributes.
- OutlinedFn.addParamAttr(0, Attribute::NoAlias);
- OutlinedFn.addParamAttr(1, Attribute::NoAlias);
- OutlinedFn.addFnAttr(Attribute::NoUnwind);
- OutlinedFn.addFnAttr(Attribute::NoRecurse);
-
- assert(OutlinedFn.arg_size() >= 2 &&
- "Expected at least tid and bounded tid as arguments");
- unsigned NumCapturedVars =
- OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
-
- CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
- CI->getParent()->setName("omp_parallel");
- Builder.SetInsertPoint(CI);
-
- // Build call __kmpc_fork_call[_if](Ident, n, microtask, var1, .., varn);
- Value *ForkCallArgs[] = {
- Ident, Builder.getInt32(NumCapturedVars),
- Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
-
- SmallVector<Value *, 16> RealArgs;
- RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
- if (IfCondition) {
- Value *Cond = Builder.CreateSExtOrTrunc(IfCondition,
- Type::getInt32Ty(M.getContext()));
- RealArgs.push_back(Cond);
- }
- RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
-
- // __kmpc_fork_call_if always expects a void ptr as the last argument
- // If there are no arguments, pass a null pointer.
- auto PtrTy = Type::getInt8PtrTy(M.getContext());
- if (IfCondition && NumCapturedVars == 0) {
- llvm::Value *Void = ConstantPointerNull::get(PtrTy);
- RealArgs.push_back(Void);
- }
- if (IfCondition && RealArgs.back()->getType() != PtrTy)
- RealArgs.back() = Builder.CreateBitCast(RealArgs.back(), PtrTy);
-
- Builder.CreateCall(RTLFn, RealArgs);
-
- LLVM_DEBUG(dbgs() << "With fork_call placed: "
- << *Builder.GetInsertBlock()->getParent() << "\n");
-
- InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
-
- // Initialize the local TID stack location with the argument value.
- Builder.SetInsertPoint(PrivTID);
- Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
- Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
-
- CI->eraseFromParent();
-
- for (Instruction *I : ToBeDeleted)
- I->eraseFromParent();
- };
+ if (Config.isTargetDevice()) {
+ // Generate OpenMP target specific runtime call
+ OI.PostOutlineCB = [=, ToBeDeletedVec =
+ std::move(ToBeDeleted)](Function &OutlinedFn) {
+ targetParallelCallback(this, OutlinedFn, OuterFn, OuterAllocaBlock, Ident,
+ IfCondition, NumThreads, PrivTID, PrivTIDAddr,
+ ThreadID, ToBeDeletedVec);
+ };
+ } else {
+ // Generate OpenMP host runtime call
+ OI.PostOutlineCB = [=, ToBeDeletedVec =
+ std::move(ToBeDeleted)](Function &OutlinedFn) {
+ hostParallelCallback(this, OutlinedFn, OuterFn, Ident, IfCondition,
+ PrivTID, PrivTIDAddr, ToBeDeletedVec);
+ };
+ }
// Adjust the finalization stack, verify the adjustment, and call the
// finalize function a last time to finalize values between the pre-fini
@@ -1248,7 +1484,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
/* AllocationBlock */ OuterAllocaBlock,
- /* Suffix */ ".omp_par");
+ /* Suffix */ ".omp_par", ArgsInZeroAddressSpace);
// Find inputs to, outputs from the code region.
BasicBlock *CommonExit = nullptr;
@@ -1413,6 +1649,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
bool Tied, Value *Final, Value *IfCondition,
SmallVector<DependData> Dependencies) {
+
if (!updateToLocation(Loc))
return InsertPointTy();
@@ -1440,41 +1677,31 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
BasicBlock *TaskAllocaBB =
splitBB(Builder, /*CreateBranch=*/true, "task.alloca");
+ InsertPointTy TaskAllocaIP =
+ InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin());
+ InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin());
+ BodyGenCB(TaskAllocaIP, TaskBodyIP);
+
OutlineInfo OI;
OI.EntryBB = TaskAllocaBB;
OI.OuterAllocaBB = AllocaIP.getBlock();
OI.ExitBB = TaskExitBB;
- OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition,
- Dependencies](Function &OutlinedFn) {
- // The input IR here looks like the following-
- // ```
- // func @current_fn() {
- // outlined_fn(%args)
- // }
- // func @outlined_fn(%args) { ... }
- // ```
- //
- // This is changed to the following-
- //
- // ```
- // func @current_fn() {
- // runtime_call(..., wrapper_fn, ...)
- // }
- // func @wrapper_fn(..., %args) {
- // outlined_fn(%args)
- // }
- // func @outlined_fn(%args) { ... }
- // ```
- // The stale call instruction will be replaced with a new call instruction
- // for runtime call with a wrapper function.
+ // Add the thread ID argument.
+ std::stack<Instruction *> ToBeDeleted;
+ OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
+ Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
+
+ OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
+ TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable {
+ // Replace the Stale CI by appropriate RTL function call.
assert(OutlinedFn.getNumUses() == 1 &&
"there must be a single user for the outlined function");
CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
- // HasTaskData is true if any variables are captured in the outlined region,
+ // HasShareds is true if any variables are captured in the outlined region,
// false otherwise.
- bool HasTaskData = StaleCI->arg_size() > 0;
+ bool HasShareds = StaleCI->arg_size() > 1;
Builder.SetInsertPoint(StaleCI);
// Gather the arguments for emitting the runtime call for
@@ -1502,10 +1729,17 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
// Argument - `sizeof_kmp_task_t` (TaskSize)
// Tasksize refers to the size in bytes of kmp_task_t data structure
// including private vars accessed in task.
- Value *TaskSize = Builder.getInt64(0);
- if (HasTaskData) {
+ // TODO: add kmp_task_t_with_privates (privates)
+ Value *TaskSize = Builder.getInt64(
+ divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
+
+ // Argument - `sizeof_shareds` (SharedsSize)
+ // SharedsSize refers to the shareds array size in the kmp_task_t data
+ // structure.
+ Value *SharedsSize = Builder.getInt64(0);
+ if (HasShareds) {
AllocaInst *ArgStructAlloca =
- dyn_cast<AllocaInst>(StaleCI->getArgOperand(0));
+ dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
assert(ArgStructAlloca &&
"Unable to find the alloca instruction corresponding to arguments "
"for extracted function");
@@ -1513,51 +1747,34 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
assert(ArgStructType && "Unable to find struct type corresponding to "
"arguments for extracted function");
- TaskSize =
+ SharedsSize =
Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
}
-
- // TODO: Argument - sizeof_shareds
-
- // Argument - task_entry (the wrapper function)
- // If the outlined function has some captured variables (i.e. HasTaskData is
- // true), then the wrapper function will have an additional argument (the
- // struct containing captured variables). Otherwise, no such argument will
- // be present.
- SmallVector<Type *> WrapperArgTys{Builder.getInt32Ty()};
- if (HasTaskData)
- WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType());
- FunctionCallee WrapperFuncVal = M.getOrInsertFunction(
- (Twine(OutlinedFn.getName()) + ".wrapper").str(),
- FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false));
- Function *WrapperFunc = dyn_cast<Function>(WrapperFuncVal.getCallee());
-
// Emit the @__kmpc_omp_task_alloc runtime call
// The runtime call returns a pointer to an area where the task captured
- // variables must be copied before the task is run (NewTaskData)
- CallInst *NewTaskData = Builder.CreateCall(
- TaskAllocFn,
- {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
- /*sizeof_task=*/TaskSize, /*sizeof_shared=*/Builder.getInt64(0),
- /*task_func=*/WrapperFunc});
+ // variables must be copied before the task is run (TaskData)
+ CallInst *TaskData = Builder.CreateCall(
+ TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
+ /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
+ /*task_func=*/&OutlinedFn});
// Copy the arguments for outlined function
- if (HasTaskData) {
- Value *TaskData = StaleCI->getArgOperand(0);
+ if (HasShareds) {
+ Value *Shareds = StaleCI->getArgOperand(1);
Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
- Builder.CreateMemCpy(NewTaskData, Alignment, TaskData, Alignment,
- TaskSize);
+ Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
+ Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
+ SharedsSize);
}
- Value *DepArrayPtr = nullptr;
+ Value *DepArray = nullptr;
if (Dependencies.size()) {
InsertPointTy OldIP = Builder.saveIP();
Builder.SetInsertPoint(
&OldIP.getBlock()->getParent()->getEntryBlock().back());
Type *DepArrayTy = ArrayType::get(DependInfo, Dependencies.size());
- Value *DepArray =
- Builder.CreateAlloca(DepArrayTy, nullptr, ".dep.arr.addr");
+ DepArray = Builder.CreateAlloca(DepArrayTy, nullptr, ".dep.arr.addr");
unsigned P = 0;
for (const DependData &Dep : Dependencies) {
@@ -1588,7 +1805,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
++P;
}
- DepArrayPtr = Builder.CreateBitCast(DepArray, Builder.getInt8PtrTy());
Builder.restoreIP(OldIP);
}
@@ -1601,7 +1817,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
// br label %exit
// else:
// call @__kmpc_omp_task_begin_if0(...)
- // call @wrapper_fn(...)
+ // call @outlined_fn(...)
// call @__kmpc_omp_task_complete_if0(...)
// br label %exit
// exit:
@@ -1609,10 +1825,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
if (IfCondition) {
// `SplitBlockAndInsertIfThenElse` requires the block to have a
// terminator.
- BasicBlock *NewBasicBlock =
- splitBB(Builder, /*CreateBranch=*/true, "if.end");
+ splitBB(Builder, /*CreateBranch=*/true, "if.end");
Instruction *IfTerminator =
- NewBasicBlock->getSinglePredecessor()->getTerminator();
+ Builder.GetInsertPoint()->getParent()->getTerminator();
Instruction *ThenTI = IfTerminator, *ElseTI = nullptr;
Builder.SetInsertPoint(IfTerminator);
SplitBlockAndInsertIfThenElse(IfCondition, IfTerminator, &ThenTI,
@@ -1622,12 +1837,14 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
Function *TaskCompleteFn =
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
- Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, NewTaskData});
- if (HasTaskData)
- Builder.CreateCall(WrapperFunc, {ThreadID, NewTaskData});
+ Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
+ CallInst *CI = nullptr;
+ if (HasShareds)
+ CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
else
- Builder.CreateCall(WrapperFunc, {ThreadID});
- Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, NewTaskData});
+ CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
+ CI->setDebugLoc(StaleCI->getDebugLoc());
+ Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
Builder.SetInsertPoint(ThenTI);
}
@@ -1636,35 +1853,32 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
Builder.CreateCall(
TaskFn,
- {Ident, ThreadID, NewTaskData, Builder.getInt32(Dependencies.size()),
- DepArrayPtr, ConstantInt::get(Builder.getInt32Ty(), 0),
- ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext()))});
+ {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
+ DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
+ ConstantPointerNull::get(PointerType::getUnqual(M.getContext()))});
} else {
// Emit the @__kmpc_omp_task runtime call to spawn the task
Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
- Builder.CreateCall(TaskFn, {Ident, ThreadID, NewTaskData});
+ Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
}
StaleCI->eraseFromParent();
- // Emit the body for wrapper function
- BasicBlock *WrapperEntryBB =
- BasicBlock::Create(M.getContext(), "", WrapperFunc);
- Builder.SetInsertPoint(WrapperEntryBB);
- if (HasTaskData)
- Builder.CreateCall(&OutlinedFn, {WrapperFunc->getArg(1)});
- else
- Builder.CreateCall(&OutlinedFn);
- Builder.CreateRet(Builder.getInt32(0));
+ Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
+ if (HasShareds) {
+ LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
+ OutlinedFn.getArg(1)->replaceUsesWithIf(
+ Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; });
+ }
+
+ while (!ToBeDeleted.empty()) {
+ ToBeDeleted.top()->eraseFromParent();
+ ToBeDeleted.pop();
+ }
};
addOutlineInfo(std::move(OI));
-
- InsertPointTy TaskAllocaIP =
- InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin());
- InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin());
- BodyGenCB(TaskAllocaIP, TaskBodyIP);
Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin());
return Builder.saveIP();
@@ -1832,7 +2046,7 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc,
/// the given module and return it.
Function *getFreshReductionFunc(Module &M) {
Type *VoidTy = Type::getVoidTy(M.getContext());
- Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ Type *Int8PtrTy = PointerType::getUnqual(M.getContext());
auto *FuncTy =
FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
return Function::Create(FuncTy, GlobalVariable::InternalLinkage,
@@ -1866,7 +2080,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
// Create and populate array of type-erased pointers to private reduction
// values.
unsigned NumReductions = ReductionInfos.size();
- Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
+ Type *RedArrayTy = ArrayType::get(Builder.getPtrTy(), NumReductions);
Builder.restoreIP(AllocaIP);
Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
@@ -1877,18 +2091,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
const ReductionInfo &RI = En.value();
Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
- Value *Casted =
- Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
- "private.red.var." + Twine(Index) + ".casted");
- Builder.CreateStore(Casted, RedArrayElemPtr);
+ Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
}
// Emit a call to the runtime function that orchestrates the reduction.
// Declare the reduction function in the process.
Function *Func = Builder.GetInsertBlock()->getParent();
Module *Module = Func->getParent();
- Value *RedArrayPtr =
- Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
bool CanGenerateAtomic =
@@ -1911,8 +2120,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
: RuntimeFunction::OMPRTL___kmpc_reduce);
CallInst *ReduceCall =
Builder.CreateCall(ReduceFunc,
- {Ident, ThreadId, NumVariables, RedArraySize,
- RedArrayPtr, ReductionFunc, Lock},
+ {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
+ ReductionFunc, Lock},
"reduce");
// Create final reduction entry blocks for the atomic and non-atomic case.
@@ -1981,12 +2190,12 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
const ReductionInfo &RI = En.value();
Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
RedArrayTy, LHSArrayPtr, 0, En.index());
- Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
+ Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
Value *LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
RedArrayTy, RHSArrayPtr, 0, En.index());
- Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
+ Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
Value *RHSPtr =
Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
@@ -2465,11 +2674,242 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
return {DispatchAfter, DispatchAfter->getFirstInsertionPt()};
}
+// Returns an LLVM function to call for executing an OpenMP static worksharing
+// for loop depending on `type`. Only i32 and i64 are supported by the runtime.
+// Always interpret integers as unsigned similarly to CanonicalLoopInfo.
+static FunctionCallee
+getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder,
+ WorksharingLoopType LoopType) {
+ unsigned Bitwidth = Ty->getIntegerBitWidth();
+ Module &M = OMPBuilder->M;
+ switch (LoopType) {
+ case WorksharingLoopType::ForStaticLoop:
+ if (Bitwidth == 32)
+ return OMPBuilder->getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
+ if (Bitwidth == 64)
+ return OMPBuilder->getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
+ break;
+ case WorksharingLoopType::DistributeStaticLoop:
+ if (Bitwidth == 32)
+ return OMPBuilder->getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
+ if (Bitwidth == 64)
+ return OMPBuilder->getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
+ break;
+ case WorksharingLoopType::DistributeForStaticLoop:
+ if (Bitwidth == 32)
+ return OMPBuilder->getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
+ if (Bitwidth == 64)
+ return OMPBuilder->getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
+ break;
+ }
+ if (Bitwidth != 32 && Bitwidth != 64) {
+ llvm_unreachable("Unknown OpenMP loop iterator bitwidth");
+ }
+ llvm_unreachable("Unknown type of OpenMP worksharing loop");
+}
+
+// Inserts a call to proper OpenMP Device RTL function which handles
+// loop worksharing.
+static void createTargetLoopWorkshareCall(
+ OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType,
+ BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg,
+ Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn) {
+ Type *TripCountTy = TripCount->getType();
+ Module &M = OMPBuilder->M;
+ IRBuilder<> &Builder = OMPBuilder->Builder;
+ FunctionCallee RTLFn =
+ getKmpcForStaticLoopForType(TripCountTy, OMPBuilder, LoopType);
+ SmallVector<Value *, 8> RealArgs;
+ RealArgs.push_back(Ident);
+ RealArgs.push_back(Builder.CreateBitCast(&LoopBodyFn, ParallelTaskPtr));
+ RealArgs.push_back(LoopBodyArg);
+ RealArgs.push_back(TripCount);
+ if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
+ RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
+ Builder.CreateCall(RTLFn, RealArgs);
+ return;
+ }
+ FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
+ Builder.restoreIP({InsertBlock, std::prev(InsertBlock->end())});
+ Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
+
+ RealArgs.push_back(
+ Builder.CreateZExtOrTrunc(NumThreads, TripCountTy, "num.threads.cast"));
+ RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
+ if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
+ RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
+ }
+
+ Builder.CreateCall(RTLFn, RealArgs);
+}
+
+static void
+workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder,
+ CanonicalLoopInfo *CLI, Value *Ident,
+ Function &OutlinedFn, Type *ParallelTaskPtr,
+ const SmallVector<Instruction *, 4> &ToBeDeleted,
+ WorksharingLoopType LoopType) {
+ IRBuilder<> &Builder = OMPIRBuilder->Builder;
+ BasicBlock *Preheader = CLI->getPreheader();
+ Value *TripCount = CLI->getTripCount();
+
+ // After loop body outling, the loop body contains only set up
+ // of loop body argument structure and the call to the outlined
+ // loop body function. Firstly, we need to move setup of loop body args
+ // into loop preheader.
+ Preheader->splice(std::prev(Preheader->end()), CLI->getBody(),
+ CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
+
+ // The next step is to remove the whole loop. We do not it need anymore.
+ // That's why make an unconditional branch from loop preheader to loop
+ // exit block
+ Builder.restoreIP({Preheader, Preheader->end()});
+ Preheader->getTerminator()->eraseFromParent();
+ Builder.CreateBr(CLI->getExit());
+
+ // Delete dead loop blocks
+ OpenMPIRBuilder::OutlineInfo CleanUpInfo;
+ SmallPtrSet<BasicBlock *, 32> RegionBlockSet;
+ SmallVector<BasicBlock *, 32> BlocksToBeRemoved;
+ CleanUpInfo.EntryBB = CLI->getHeader();
+ CleanUpInfo.ExitBB = CLI->getExit();
+ CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
+ DeleteDeadBlocks(BlocksToBeRemoved);
+
+ // Find the instruction which corresponds to loop body argument structure
+ // and remove the call to loop body function instruction.
+ Value *LoopBodyArg;
+ User *OutlinedFnUser = OutlinedFn.getUniqueUndroppableUser();
+ assert(OutlinedFnUser &&
+ "Expected unique undroppable user of outlined function");
+ CallInst *OutlinedFnCallInstruction = dyn_cast<CallInst>(OutlinedFnUser);
+ assert(OutlinedFnCallInstruction && "Expected outlined function call");
+ assert((OutlinedFnCallInstruction->getParent() == Preheader) &&
+ "Expected outlined function call to be located in loop preheader");
+ // Check in case no argument structure has been passed.
+ if (OutlinedFnCallInstruction->arg_size() > 1)
+ LoopBodyArg = OutlinedFnCallInstruction->getArgOperand(1);
+ else
+ LoopBodyArg = Constant::getNullValue(Builder.getPtrTy());
+ OutlinedFnCallInstruction->eraseFromParent();
+
+ createTargetLoopWorkshareCall(OMPIRBuilder, LoopType, Preheader, Ident,
+ LoopBodyArg, ParallelTaskPtr, TripCount,
+ OutlinedFn);
+
+ for (auto &ToBeDeletedItem : ToBeDeleted)
+ ToBeDeletedItem->eraseFromParent();
+ CLI->invalidate();
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP,
+ WorksharingLoopType LoopType) {
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+
+ OutlineInfo OI;
+ OI.OuterAllocaBB = CLI->getPreheader();
+ Function *OuterFn = CLI->getPreheader()->getParent();
+
+ // Instructions which need to be deleted at the end of code generation
+ SmallVector<Instruction *, 4> ToBeDeleted;
+
+ OI.OuterAllocaBB = AllocaIP.getBlock();
+
+ // Mark the body loop as region which needs to be extracted
+ OI.EntryBB = CLI->getBody();
+ OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
+ "omp.prelatch", true);
+
+ // Prepare loop body for extraction
+ Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
+
+ // Insert new loop counter variable which will be used only in loop
+ // body.
+ AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0, "");
+ Instruction *NewLoopCntLoad =
+ Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
+ // New loop counter instructions are redundant in the loop preheader when
+ // code generation for workshare loop is finshed. That's why mark them as
+ // ready for deletion.
+ ToBeDeleted.push_back(NewLoopCntLoad);
+ ToBeDeleted.push_back(NewLoopCnt);
+
+ // Analyse loop body region. Find all input variables which are used inside
+ // loop body region.
+ SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
+ SmallVector<BasicBlock *, 32> Blocks;
+ OI.collectBlocks(ParallelRegionBlockSet, Blocks);
+ SmallVector<BasicBlock *, 32> BlocksT(ParallelRegionBlockSet.begin(),
+ ParallelRegionBlockSet.end());
+
+ CodeExtractorAnalysisCache CEAC(*OuterFn);
+ CodeExtractor Extractor(Blocks,
+ /* DominatorTree */ nullptr,
+ /* AggregateArgs */ true,
+ /* BlockFrequencyInfo */ nullptr,
+ /* BranchProbabilityInfo */ nullptr,
+ /* AssumptionCache */ nullptr,
+ /* AllowVarArgs */ true,
+ /* AllowAlloca */ true,
+ /* AllocationBlock */ CLI->getPreheader(),
+ /* Suffix */ ".omp_wsloop",
+ /* AggrArgsIn0AddrSpace */ true);
+
+ BasicBlock *CommonExit = nullptr;
+ SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
+
+ // Find allocas outside the loop body region which are used inside loop
+ // body
+ Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+
+ // We need to model loop body region as the function f(cnt, loop_arg).
+ // That's why we replace loop induction variable by the new counter
+ // which will be one of loop body function argument
+ for (auto Use = CLI->getIndVar()->user_begin();
+ Use != CLI->getIndVar()->user_end(); ++Use) {
+ if (Instruction *Inst = dyn_cast<Instruction>(*Use)) {
+ if (ParallelRegionBlockSet.count(Inst->getParent())) {
+ Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
+ }
+ }
+ }
+ // Make sure that loop counter variable is not merged into loop body
+ // function argument structure and it is passed as separate variable
+ OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
+
+ // PostOutline CB is invoked when loop body function is outlined and
+ // loop body is replaced by call to outlined function. We need to add
+ // call to OpenMP device rtl inside loop preheader. OpenMP device rtl
+ // function will handle loop control logic.
+ //
+ OI.PostOutlineCB = [=, ToBeDeletedVec =
+ std::move(ToBeDeleted)](Function &OutlinedFn) {
+ workshareLoopTargetCallback(this, CLI, Ident, OutlinedFn, ParallelTaskPtr,
+ ToBeDeletedVec, LoopType);
+ };
+ addOutlineInfo(std::move(OI));
+ return CLI->getAfterIP();
+}
+
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
- bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind,
- llvm::Value *ChunkSize, bool HasSimdModifier, bool HasMonotonicModifier,
- bool HasNonmonotonicModifier, bool HasOrderedClause) {
+ bool NeedsBarrier, omp::ScheduleKind SchedKind, Value *ChunkSize,
+ bool HasSimdModifier, bool HasMonotonicModifier,
+ bool HasNonmonotonicModifier, bool HasOrderedClause,
+ WorksharingLoopType LoopType) {
+ if (Config.isTargetDevice())
+ return applyWorkshareLoopTarget(DL, CLI, AllocaIP, LoopType);
OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType(
SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
HasNonmonotonicModifier, HasOrderedClause);
@@ -3311,7 +3751,7 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
/// "target-features" that determine the TargetMachine are per-function and can
/// be overrided using __attribute__((target("OPTIONS"))).
static std::unique_ptr<TargetMachine>
-createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) {
+createTargetMachine(Function *F, CodeGenOptLevel OptLevel) {
Module *M = F->getParent();
StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
@@ -3337,7 +3777,7 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
// Assume the user requests the most aggressive unrolling, even if the rest of
// the code is optimized using a lower setting.
- CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
+ CodeGenOptLevel OptLevel = CodeGenOptLevel::Aggressive;
std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
FunctionAnalysisManager FAM;
@@ -3370,7 +3810,7 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
TargetTransformInfo::UnrollingPreferences UP =
gatherUnrollingPreferences(L, SE, TTI,
/*BlockFrequencyInfo=*/nullptr,
- /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
+ /*ProfileSummaryInfo=*/nullptr, ORE, static_cast<int>(OptLevel),
/*UserThreshold=*/std::nullopt,
/*UserCount=*/std::nullopt,
/*UserAllowPartial=*/true,
@@ -3429,20 +3869,16 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
}
}
- unsigned NumInlineCandidates;
- bool NotDuplicatable;
- bool Convergent;
- InstructionCost LoopSizeIC =
- ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
- TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSizeIC << "\n");
+ UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
// Loop is not unrollable if the loop contains certain instructions.
- if (NotDuplicatable || Convergent || !LoopSizeIC.isValid()) {
+ if (!UCE.canUnroll() || UCE.Convergent) {
LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
return 1;
}
- unsigned LoopSize = *LoopSizeIC.getValue();
+
+ LLVM_DEBUG(dbgs() << "Estimated loop size is " << UCE.getRolledLoopSize()
+ << "\n");
// TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
// be able to use it.
@@ -3453,7 +3889,7 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
bool UseUpperBound = false;
computeUnrollCount(L, TTI, DT, &LI, &AC, SE, EphValues, &ORE, TripCount,
- MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
+ MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
UseUpperBound);
unsigned Factor = UP.Count;
LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
@@ -3917,7 +4353,7 @@ CallInst *OpenMPIRBuilder::createOMPInteropInit(
Constant *InteropTypeVal = ConstantInt::get(Int32, (int)InteropType);
if (NumDependences == nullptr) {
NumDependences = ConstantInt::get(Int32, 0);
- PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
+ PointerType *PointerTypeVar = PointerType::getUnqual(M.getContext());
DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
}
Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
@@ -3944,7 +4380,7 @@ CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
Device = ConstantInt::get(Int32, -1);
if (NumDependences == nullptr) {
NumDependences = ConstantInt::get(Int32, 0);
- PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
+ PointerType *PointerTypeVar = PointerType::getUnqual(M.getContext());
DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
}
Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
@@ -3972,7 +4408,7 @@ CallInst *OpenMPIRBuilder::createOMPInteropUse(const LocationDescription &Loc,
Device = ConstantInt::get(Int32, -1);
if (NumDependences == nullptr) {
NumDependences = ConstantInt::get(Int32, 0);
- PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
+ PointerType *PointerTypeVar = PointerType::getUnqual(M.getContext());
DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
}
Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
@@ -4006,24 +4442,103 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
}
OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
+OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
+ int32_t MinThreadsVal, int32_t MaxThreadsVal,
+ int32_t MinTeamsVal, int32_t MaxTeamsVal) {
if (!updateToLocation(Loc))
return Loc.IP;
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
- ConstantInt *IsSPMDVal = ConstantInt::getSigned(
- IntegerType::getInt8Ty(Int8->getContext()),
- IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
- ConstantInt *UseGenericStateMachine =
- ConstantInt::getBool(Int32->getContext(), !IsSPMD);
+ Constant *IsSPMDVal = ConstantInt::getSigned(
+ Int8, IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
+ Constant *UseGenericStateMachineVal = ConstantInt::getSigned(Int8, !IsSPMD);
+ Constant *MayUseNestedParallelismVal = ConstantInt::getSigned(Int8, true);
+ Constant *DebugIndentionLevelVal = ConstantInt::getSigned(Int16, 0);
+
+ Function *Kernel = Builder.GetInsertBlock()->getParent();
+
+ // Manifest the launch configuration in the metadata matching the kernel
+ // environment.
+ if (MinTeamsVal > 1 || MaxTeamsVal > 0)
+ writeTeamsForKernel(T, *Kernel, MinTeamsVal, MaxTeamsVal);
+
+ // For max values, < 0 means unset, == 0 means set but unknown.
+ if (MaxThreadsVal < 0)
+ MaxThreadsVal = std::max(
+ int32_t(getGridValue(T, Kernel).GV_Default_WG_Size), MinThreadsVal);
+
+ if (MaxThreadsVal > 0)
+ writeThreadBoundsForKernel(T, *Kernel, MinThreadsVal, MaxThreadsVal);
+
+ Constant *MinThreads = ConstantInt::getSigned(Int32, MinThreadsVal);
+ Constant *MaxThreads = ConstantInt::getSigned(Int32, MaxThreadsVal);
+ Constant *MinTeams = ConstantInt::getSigned(Int32, MinTeamsVal);
+ Constant *MaxTeams = ConstantInt::getSigned(Int32, MaxTeamsVal);
+ Constant *ReductionDataSize = ConstantInt::getSigned(Int32, 0);
+ Constant *ReductionBufferLength = ConstantInt::getSigned(Int32, 0);
+
+ // We need to strip the debug prefix to get the correct kernel name.
+ StringRef KernelName = Kernel->getName();
+ const std::string DebugPrefix = "_debug__";
+ if (KernelName.ends_with(DebugPrefix))
+ KernelName = KernelName.drop_back(DebugPrefix.length());
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_init);
-
- CallInst *ThreadKind = Builder.CreateCall(
- Fn, {Ident, IsSPMDVal, UseGenericStateMachine});
+ const DataLayout &DL = Fn->getParent()->getDataLayout();
+
+ Twine DynamicEnvironmentName = KernelName + "_dynamic_environment";
+ Constant *DynamicEnvironmentInitializer =
+ ConstantStruct::get(DynamicEnvironment, {DebugIndentionLevelVal});
+ GlobalVariable *DynamicEnvironmentGV = new GlobalVariable(
+ M, DynamicEnvironment, /*IsConstant=*/false, GlobalValue::WeakODRLinkage,
+ DynamicEnvironmentInitializer, DynamicEnvironmentName,
+ /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
+ DL.getDefaultGlobalsAddressSpace());
+ DynamicEnvironmentGV->setVisibility(GlobalValue::ProtectedVisibility);
+
+ Constant *DynamicEnvironment =
+ DynamicEnvironmentGV->getType() == DynamicEnvironmentPtr
+ ? DynamicEnvironmentGV
+ : ConstantExpr::getAddrSpaceCast(DynamicEnvironmentGV,
+ DynamicEnvironmentPtr);
+
+ Constant *ConfigurationEnvironmentInitializer = ConstantStruct::get(
+ ConfigurationEnvironment, {
+ UseGenericStateMachineVal,
+ MayUseNestedParallelismVal,
+ IsSPMDVal,
+ MinThreads,
+ MaxThreads,
+ MinTeams,
+ MaxTeams,
+ ReductionDataSize,
+ ReductionBufferLength,
+ });
+ Constant *KernelEnvironmentInitializer = ConstantStruct::get(
+ KernelEnvironment, {
+ ConfigurationEnvironmentInitializer,
+ Ident,
+ DynamicEnvironment,
+ });
+ Twine KernelEnvironmentName = KernelName + "_kernel_environment";
+ GlobalVariable *KernelEnvironmentGV = new GlobalVariable(
+ M, KernelEnvironment, /*IsConstant=*/true, GlobalValue::WeakODRLinkage,
+ KernelEnvironmentInitializer, KernelEnvironmentName,
+ /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
+ DL.getDefaultGlobalsAddressSpace());
+ KernelEnvironmentGV->setVisibility(GlobalValue::ProtectedVisibility);
+
+ Constant *KernelEnvironment =
+ KernelEnvironmentGV->getType() == KernelEnvironmentPtr
+ ? KernelEnvironmentGV
+ : ConstantExpr::getAddrSpaceCast(KernelEnvironmentGV,
+ KernelEnvironmentPtr);
+ Value *KernelLaunchEnvironment = Kernel->getArg(0);
+ CallInst *ThreadKind =
+ Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
Value *ExecUserCode = Builder.CreateICmpEQ(
ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
@@ -4057,46 +4572,153 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
}
void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
- bool IsSPMD) {
+ int32_t TeamsReductionDataSize,
+ int32_t TeamsReductionBufferLength) {
if (!updateToLocation(Loc))
return;
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
- ConstantInt *IsSPMDVal = ConstantInt::getSigned(
- IntegerType::getInt8Ty(Int8->getContext()),
- IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
-
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
- Builder.CreateCall(Fn, {Ident, IsSPMDVal});
+ Builder.CreateCall(Fn, {});
+
+ if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
+ return;
+
+ Function *Kernel = Builder.GetInsertBlock()->getParent();
+ // We need to strip the debug prefix to get the correct kernel name.
+ StringRef KernelName = Kernel->getName();
+ const std::string DebugPrefix = "_debug__";
+ if (KernelName.ends_with(DebugPrefix))
+ KernelName = KernelName.drop_back(DebugPrefix.length());
+ auto *KernelEnvironmentGV =
+ M.getNamedGlobal((KernelName + "_kernel_environment").str());
+ assert(KernelEnvironmentGV && "Expected kernel environment global\n");
+ auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
+ auto *NewInitializer = ConstantFoldInsertValueInstruction(
+ KernelEnvironmentInitializer,
+ ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
+ NewInitializer = ConstantFoldInsertValueInstruction(
+ NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
+ {0, 8});
+ KernelEnvironmentGV->setInitializer(NewInitializer);
+}
+
+static MDNode *getNVPTXMDNode(Function &Kernel, StringRef Name) {
+ Module &M = *Kernel.getParent();
+ NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+ for (auto *Op : MD->operands()) {
+ if (Op->getNumOperands() != 3)
+ continue;
+ auto *KernelOp = dyn_cast<ConstantAsMetadata>(Op->getOperand(0));
+ if (!KernelOp || KernelOp->getValue() != &Kernel)
+ continue;
+ auto *Prop = dyn_cast<MDString>(Op->getOperand(1));
+ if (!Prop || Prop->getString() != Name)
+ continue;
+ return Op;
+ }
+ return nullptr;
+}
+
+static void updateNVPTXMetadata(Function &Kernel, StringRef Name, int32_t Value,
+ bool Min) {
+ // Update the "maxntidx" metadata for NVIDIA, or add it.
+ MDNode *ExistingOp = getNVPTXMDNode(Kernel, Name);
+ if (ExistingOp) {
+ auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
+ int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
+ ExistingOp->replaceOperandWith(
+ 2, ConstantAsMetadata::get(ConstantInt::get(
+ OldVal->getValue()->getType(),
+ Min ? std::min(OldLimit, Value) : std::max(OldLimit, Value))));
+ } else {
+ LLVMContext &Ctx = Kernel.getContext();
+ Metadata *MDVals[] = {ConstantAsMetadata::get(&Kernel),
+ MDString::get(Ctx, Name),
+ ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), Value))};
+ // Append metadata to nvvm.annotations
+ Module &M = *Kernel.getParent();
+ NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+ MD->addOperand(MDNode::get(Ctx, MDVals));
+ }
+}
+
+std::pair<int32_t, int32_t>
+OpenMPIRBuilder::readThreadBoundsForKernel(const Triple &T, Function &Kernel) {
+ int32_t ThreadLimit =
+ Kernel.getFnAttributeAsParsedInteger("omp_target_thread_limit");
+
+ if (T.isAMDGPU()) {
+ const auto &Attr = Kernel.getFnAttribute("amdgpu-flat-work-group-size");
+ if (!Attr.isValid() || !Attr.isStringAttribute())
+ return {0, ThreadLimit};
+ auto [LBStr, UBStr] = Attr.getValueAsString().split(',');
+ int32_t LB, UB;
+ if (!llvm::to_integer(UBStr, UB, 10))
+ return {0, ThreadLimit};
+ UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
+ if (!llvm::to_integer(LBStr, LB, 10))
+ return {0, UB};
+ return {LB, UB};
+ }
+
+ if (MDNode *ExistingOp = getNVPTXMDNode(Kernel, "maxntidx")) {
+ auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
+ int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
+ return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
+ }
+ return {0, ThreadLimit};
+}
+
+void OpenMPIRBuilder::writeThreadBoundsForKernel(const Triple &T,
+ Function &Kernel, int32_t LB,
+ int32_t UB) {
+ Kernel.addFnAttr("omp_target_thread_limit", std::to_string(UB));
+
+ if (T.isAMDGPU()) {
+ Kernel.addFnAttr("amdgpu-flat-work-group-size",
+ llvm::utostr(LB) + "," + llvm::utostr(UB));
+ return;
+ }
+
+ updateNVPTXMetadata(Kernel, "maxntidx", UB, true);
+}
+
+std::pair<int32_t, int32_t>
+OpenMPIRBuilder::readTeamBoundsForKernel(const Triple &, Function &Kernel) {
+ // TODO: Read from backend annotations if available.
+ return {0, Kernel.getFnAttributeAsParsedInteger("omp_target_num_teams")};
+}
+
+void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel,
+ int32_t LB, int32_t UB) {
+ if (T.isNVPTX()) {
+ if (UB > 0)
+ updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true);
+ updateNVPTXMetadata(Kernel, "minctasm", LB, false);
+ }
+ Kernel.addFnAttr("omp_target_num_teams", std::to_string(LB));
}
void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
- Function *OutlinedFn, int32_t NumTeams, int32_t NumThreads) {
+ Function *OutlinedFn) {
if (Config.isTargetDevice()) {
OutlinedFn->setLinkage(GlobalValue::WeakODRLinkage);
// TODO: Determine if DSO local can be set to true.
OutlinedFn->setDSOLocal(false);
OutlinedFn->setVisibility(GlobalValue::ProtectedVisibility);
- if (Triple(M.getTargetTriple()).isAMDGCN())
+ if (T.isAMDGCN())
OutlinedFn->setCallingConv(CallingConv::AMDGPU_KERNEL);
}
-
- if (NumTeams > 0)
- OutlinedFn->addFnAttr("omp_target_num_teams", std::to_string(NumTeams));
- if (NumThreads > 0)
- OutlinedFn->addFnAttr("omp_target_thread_limit",
- std::to_string(NumThreads));
}
Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
StringRef EntryFnIDName) {
if (Config.isTargetDevice()) {
assert(OutlinedFn && "The outlined function must exist if embedded");
- return ConstantExpr::getBitCast(OutlinedFn, Builder.getInt8PtrTy());
+ return OutlinedFn;
}
return new GlobalVariable(
@@ -4118,9 +4740,8 @@ Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
void OpenMPIRBuilder::emitTargetRegionFunction(
TargetRegionEntryInfo &EntryInfo,
- FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams,
- int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn,
- Constant *&OutlinedFnID) {
+ FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry,
+ Function *&OutlinedFn, Constant *&OutlinedFnID) {
SmallString<64> EntryFnName;
OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
@@ -4140,16 +4761,15 @@ void OpenMPIRBuilder::emitTargetRegionFunction(
? std::string(EntryFnName)
: createPlatformSpecificName({EntryFnName, "region_id"});
- OutlinedFnID = registerTargetRegionFunction(
- EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams, NumThreads);
+ OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
+ EntryFnName, EntryFnIDName);
}
Constant *OpenMPIRBuilder::registerTargetRegionFunction(
TargetRegionEntryInfo &EntryInfo, Function *OutlinedFn,
- StringRef EntryFnName, StringRef EntryFnIDName, int32_t NumTeams,
- int32_t NumThreads) {
+ StringRef EntryFnName, StringRef EntryFnIDName) {
if (OutlinedFn)
- setOutlinedTargetRegionFunctionAttributes(OutlinedFn, NumTeams, NumThreads);
+ setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
OffloadInfoManager.registerTargetRegionEntryInfo(
@@ -4161,8 +4781,7 @@ Constant *OpenMPIRBuilder::registerTargetRegionFunction(
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
const LocationDescription &Loc, InsertPointTy AllocaIP,
InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
- TargetDataInfo &Info,
- function_ref<MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCB,
+ TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
omp::RuntimeFunction *MapperFunc,
function_ref<InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)>
BodyGenCB,
@@ -4171,6 +4790,10 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
if (!updateToLocation(Loc))
return InsertPointTy();
+ // Disable TargetData CodeGen on Device pass.
+ if (Config.IsTargetDevice.value_or(false))
+ return Builder.saveIP();
+
Builder.restoreIP(CodeGenIP);
bool IsStandAlone = !BodyGenCB;
MapInfosTy *MapInfo;
@@ -4293,13 +4916,104 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
return Builder.saveIP();
}
-static Function *
-createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
- StringRef FuncName, SmallVectorImpl<Value *> &Inputs,
- OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc) {
+FunctionCallee
+OpenMPIRBuilder::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
+ bool IsGPUDistribute) {
+ assert((IVSize == 32 || IVSize == 64) &&
+ "IV size is not compatible with the omp runtime");
+ RuntimeFunction Name;
+ if (IsGPUDistribute)
+ Name = IVSize == 32
+ ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
+ : omp::OMPRTL___kmpc_distribute_static_init_4u)
+ : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
+ : omp::OMPRTL___kmpc_distribute_static_init_8u);
+ else
+ Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
+ : omp::OMPRTL___kmpc_for_static_init_4u)
+ : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
+ : omp::OMPRTL___kmpc_for_static_init_8u);
+
+ return getOrCreateRuntimeFunction(M, Name);
+}
+
+FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(unsigned IVSize,
+ bool IVSigned) {
+ assert((IVSize == 32 || IVSize == 64) &&
+ "IV size is not compatible with the omp runtime");
+ RuntimeFunction Name = IVSize == 32
+ ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
+ : omp::OMPRTL___kmpc_dispatch_init_4u)
+ : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
+ : omp::OMPRTL___kmpc_dispatch_init_8u);
+
+ return getOrCreateRuntimeFunction(M, Name);
+}
+
+FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(unsigned IVSize,
+ bool IVSigned) {
+ assert((IVSize == 32 || IVSize == 64) &&
+ "IV size is not compatible with the omp runtime");
+ RuntimeFunction Name = IVSize == 32
+ ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
+ : omp::OMPRTL___kmpc_dispatch_next_4u)
+ : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
+ : omp::OMPRTL___kmpc_dispatch_next_8u);
+
+ return getOrCreateRuntimeFunction(M, Name);
+}
+
+FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(unsigned IVSize,
+ bool IVSigned) {
+ assert((IVSize == 32 || IVSize == 64) &&
+ "IV size is not compatible with the omp runtime");
+ RuntimeFunction Name = IVSize == 32
+ ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
+ : omp::OMPRTL___kmpc_dispatch_fini_4u)
+ : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
+ : omp::OMPRTL___kmpc_dispatch_fini_8u);
+
+ return getOrCreateRuntimeFunction(M, Name);
+}
+
+static void replaceConstatExprUsesInFuncWithInstr(ConstantExpr *ConstExpr,
+ Function *Func) {
+ for (User *User : make_early_inc_range(ConstExpr->users()))
+ if (auto *Instr = dyn_cast<Instruction>(User))
+ if (Instr->getFunction() == Func)
+ Instr->replaceUsesOfWith(ConstExpr, ConstExpr->getAsInstruction(Instr));
+}
+
+static void replaceConstantValueUsesInFuncWithInstr(llvm::Value *Input,
+ Function *Func) {
+ for (User *User : make_early_inc_range(Input->users()))
+ if (auto *Const = dyn_cast<Constant>(User))
+ if (auto *ConstExpr = dyn_cast<ConstantExpr>(Const))
+ replaceConstatExprUsesInFuncWithInstr(ConstExpr, Func);
+}
+
+static Function *createOutlinedFunction(
+ OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName,
+ SmallVectorImpl<Value *> &Inputs,
+ OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
+ OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
SmallVector<Type *> ParameterTypes;
- for (auto &Arg : Inputs)
- ParameterTypes.push_back(Arg->getType());
+ if (OMPBuilder.Config.isTargetDevice()) {
+ // Add the "implicit" runtime argument we use to provide launch specific
+ // information for target devices.
+ auto *Int8PtrTy = PointerType::getUnqual(Builder.getContext());
+ ParameterTypes.push_back(Int8PtrTy);
+
+ // All parameters to target devices are passed as pointers
+ // or i64. This assumes 64-bit address spaces/pointers.
+ for (auto &Arg : Inputs)
+ ParameterTypes.push_back(Arg->getType()->isPointerTy()
+ ? Arg->getType()
+ : Type::getInt64Ty(Builder.getContext()));
+ } else {
+ for (auto &Arg : Inputs)
+ ParameterTypes.push_back(Arg->getType());
+ }
auto FuncType = FunctionType::get(Builder.getVoidTy(), ParameterTypes,
/*isVarArg*/ false);
@@ -4317,25 +5031,56 @@ createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
if (OMPBuilder.Config.isTargetDevice())
Builder.restoreIP(OMPBuilder.createTargetInit(Builder, /*IsSPMD*/ false));
- Builder.restoreIP(CBFunc(Builder.saveIP(), Builder.saveIP()));
+ BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
// Insert target deinit call in the device compilation pass.
+ Builder.restoreIP(CBFunc(Builder.saveIP(), Builder.saveIP()));
if (OMPBuilder.Config.isTargetDevice())
- OMPBuilder.createTargetDeinit(Builder, /*IsSPMD*/ false);
+ OMPBuilder.createTargetDeinit(Builder);
// Insert return instruction.
Builder.CreateRetVoid();
+ // New Alloca IP at entry point of created device function.
+ Builder.SetInsertPoint(EntryBB->getFirstNonPHI());
+ auto AllocaIP = Builder.saveIP();
+
+ Builder.SetInsertPoint(UserCodeEntryBB->getFirstNonPHIOrDbg());
+
+ // Skip the artificial dyn_ptr on the device.
+ const auto &ArgRange =
+ OMPBuilder.Config.isTargetDevice()
+ ? make_range(Func->arg_begin() + 1, Func->arg_end())
+ : Func->args();
+
// Rewrite uses of input valus to parameters.
- for (auto InArg : zip(Inputs, Func->args())) {
+ for (auto InArg : zip(Inputs, ArgRange)) {
Value *Input = std::get<0>(InArg);
Argument &Arg = std::get<1>(InArg);
+ Value *InputCopy = nullptr;
+
+ Builder.restoreIP(
+ ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.saveIP()));
+
+ // Things like GEP's can come in the form of Constants. Constants and
+ // ConstantExpr's do not have access to the knowledge of what they're
+ // contained in, so we must dig a little to find an instruction so we can
+ // tell if they're used inside of the function we're outlining. We also
+ // replace the original constant expression with a new instruction
+ // equivalent; an instruction as it allows easy modification in the
+ // following loop, as we can now know the constant (instruction) is owned by
+ // our target function and replaceUsesOfWith can now be invoked on it
+ // (cannot do this with constants it seems). A brand new one also allows us
+ // to be cautious as it is perhaps possible the old expression was used
+ // inside of the function but exists and is used externally (unlikely by the
+ // nature of a Constant, but still).
+ replaceConstantValueUsesInFuncWithInstr(Input, Func);
// Collect all the instructions
for (User *User : make_early_inc_range(Input->users()))
- if (auto Instr = dyn_cast<Instruction>(User))
+ if (auto *Instr = dyn_cast<Instruction>(User))
if (Instr->getFunction() == Func)
- Instr->replaceUsesOfWith(Input, &Arg);
+ Instr->replaceUsesOfWith(Input, InputCopy);
}
// Restore insert point.
@@ -4344,45 +5089,96 @@ createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
return Func;
}
-static void
-emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
- TargetRegionEntryInfo &EntryInfo,
- Function *&OutlinedFn, int32_t NumTeams,
- int32_t NumThreads, SmallVectorImpl<Value *> &Inputs,
- OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc) {
+static void emitTargetOutlinedFunction(
+ OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn,
+ Constant *&OutlinedFnID, SmallVectorImpl<Value *> &Inputs,
+ OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
+ OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
- [&OMPBuilder, &Builder, &Inputs, &CBFunc](StringRef EntryFnName) {
+ [&OMPBuilder, &Builder, &Inputs, &CBFunc,
+ &ArgAccessorFuncCB](StringRef EntryFnName) {
return createOutlinedFunction(OMPBuilder, Builder, EntryFnName, Inputs,
- CBFunc);
+ CBFunc, ArgAccessorFuncCB);
};
- Constant *OutlinedFnID;
- OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
- NumTeams, NumThreads, true, OutlinedFn,
- OutlinedFnID);
+ OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, true,
+ OutlinedFn, OutlinedFnID);
}
-static void emitTargetCall(IRBuilderBase &Builder, Function *OutlinedFn,
- SmallVectorImpl<Value *> &Args) {
- // TODO: Add kernel launch call
- Builder.CreateCall(OutlinedFn, Args);
+static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ OpenMPIRBuilder::InsertPointTy AllocaIP,
+ Function *OutlinedFn, Constant *OutlinedFnID,
+ int32_t NumTeams, int32_t NumThreads,
+ SmallVectorImpl<Value *> &Args,
+ OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB) {
+
+ OpenMPIRBuilder::TargetDataInfo Info(
+ /*RequiresDevicePointerInfo=*/false,
+ /*SeparateBeginEndCalls=*/true);
+
+ OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
+ OMPBuilder.emitOffloadingArrays(AllocaIP, Builder.saveIP(), MapInfo, Info,
+ /*IsNonContiguous=*/true);
+
+ OpenMPIRBuilder::TargetDataRTArgs RTArgs;
+ OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info,
+ !MapInfo.Names.empty());
+
+ // emitKernelLaunch
+ auto &&EmitTargetCallFallbackCB =
+ [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
+ Builder.restoreIP(IP);
+ Builder.CreateCall(OutlinedFn, Args);
+ return Builder.saveIP();
+ };
+
+ unsigned NumTargetItems = MapInfo.BasePointers.size();
+ // TODO: Use correct device ID
+ Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
+ Value *NumTeamsVal = Builder.getInt32(NumTeams);
+ Value *NumThreadsVal = Builder.getInt32(NumThreads);
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
+ Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
+ llvm::omp::IdentFlag(0), 0);
+ // TODO: Use correct NumIterations
+ Value *NumIterations = Builder.getInt64(0);
+ // TODO: Use correct DynCGGroupMem
+ Value *DynCGGroupMem = Builder.getInt32(0);
+
+ bool HasNoWait = false;
+
+ OpenMPIRBuilder::TargetKernelArgs KArgs(NumTargetItems, RTArgs, NumIterations,
+ NumTeamsVal, NumThreadsVal,
+ DynCGGroupMem, HasNoWait);
+
+ Builder.restoreIP(OMPBuilder.emitKernelLaunch(
+ Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
+ DeviceID, RTLoc, AllocaIP));
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
- const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy CodeGenIP,
- TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads,
- SmallVectorImpl<Value *> &Args, TargetBodyGenCallbackTy CBFunc) {
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
+ int32_t NumThreads, SmallVectorImpl<Value *> &Args,
+ GenMapInfoCallbackTy GenMapInfoCB,
+ OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
+ OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB) {
if (!updateToLocation(Loc))
return InsertPointTy();
Builder.restoreIP(CodeGenIP);
Function *OutlinedFn;
- emitTargetOutlinedFunction(*this, Builder, EntryInfo, OutlinedFn, NumTeams,
- NumThreads, Args, CBFunc);
+ Constant *OutlinedFnID;
+ emitTargetOutlinedFunction(*this, Builder, EntryInfo, OutlinedFn,
+ OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB);
if (!Config.isTargetDevice())
- emitTargetCall(Builder, OutlinedFn, Args);
+ emitTargetCall(*this, Builder, AllocaIP, OutlinedFn, OutlinedFnID, NumTeams,
+ NumThreads, Args, GenMapInfoCB);
+
return Builder.saveIP();
}
@@ -4417,11 +5213,17 @@ OpenMPIRBuilder::getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
// variable for possibly changing that to internal or private, or maybe
// create different versions of the function for different OMP internal
// variables.
- auto *GV = new GlobalVariable(
- M, Ty, /*IsConstant=*/false, GlobalValue::CommonLinkage,
- Constant::getNullValue(Ty), Elem.first(),
- /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, AddressSpace);
- GV->setAlignment(M.getDataLayout().getABITypeAlign(Ty));
+ auto Linkage = this->M.getTargetTriple().rfind("wasm32") == 0
+ ? GlobalValue::ExternalLinkage
+ : GlobalValue::CommonLinkage;
+ auto *GV = new GlobalVariable(M, Ty, /*IsConstant=*/false, Linkage,
+ Constant::getNullValue(Ty), Elem.first(),
+ /*InsertBefore=*/nullptr,
+ GlobalValue::NotThreadLocal, AddressSpace);
+ const DataLayout &DL = M.getDataLayout();
+ const llvm::Align TypeAlign = DL.getABITypeAlign(Ty);
+ const llvm::Align PtrAlign = DL.getPointerABIAlignment(AddressSpace);
+ GV->setAlignment(std::max(TypeAlign, PtrAlign));
Elem.second = GV;
}
@@ -4513,10 +5315,11 @@ void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
bool ForEndCall) {
assert((!ForEndCall || Info.separateBeginEndCalls()) &&
"expected region end call to runtime only when end call is separate");
- auto VoidPtrTy = Type::getInt8PtrTy(M.getContext());
- auto VoidPtrPtrTy = VoidPtrTy->getPointerTo(0);
+ auto UnqualPtrTy = PointerType::getUnqual(M.getContext());
+ auto VoidPtrTy = UnqualPtrTy;
+ auto VoidPtrPtrTy = UnqualPtrTy;
auto Int64Ty = Type::getInt64Ty(M.getContext());
- auto Int64PtrTy = Type::getInt64PtrTy(M.getContext());
+ auto Int64PtrTy = UnqualPtrTy;
if (!Info.NumberOfPtrs) {
RTArgs.BasePointersArray = ConstantPointerNull::get(VoidPtrPtrTy);
@@ -4622,12 +5425,12 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
// args[I] = &dims
Builder.restoreIP(CodeGenIP);
Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
- DimsAddr, Builder.getInt8PtrTy());
+ DimsAddr, Builder.getPtrTy());
Value *P = Builder.CreateConstInBoundsGEP2_32(
- ArrayType::get(Builder.getInt8PtrTy(), Info.NumberOfPtrs),
+ ArrayType::get(Builder.getPtrTy(), Info.NumberOfPtrs),
Info.RTArgs.PointersArray, 0, I);
Builder.CreateAlignedStore(
- DAddr, P, M.getDataLayout().getPrefTypeAlign(Builder.getInt8PtrTy()));
+ DAddr, P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
++L;
}
}
@@ -4649,7 +5452,7 @@ void OpenMPIRBuilder::emitOffloadingArrays(
// Detect if we have any capture size requiring runtime evaluation of the
// size so that a constant array could be eventually used.
ArrayType *PointerArrayType =
- ArrayType::get(Builder.getInt8PtrTy(), Info.NumberOfPtrs);
+ ArrayType::get(Builder.getPtrTy(), Info.NumberOfPtrs);
Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
PointerArrayType, /* ArraySize = */ nullptr, ".offload_baseptrs");
@@ -4665,7 +5468,7 @@ void OpenMPIRBuilder::emitOffloadingArrays(
// need to fill up the arrays as we do for the pointers.
Type *Int64Ty = Builder.getInt64Ty();
SmallVector<Constant *> ConstSizes(CombinedInfo.Sizes.size(),
- ConstantInt::get(Builder.getInt64Ty(), 0));
+ ConstantInt::get(Int64Ty, 0));
SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
if (auto *CI = dyn_cast<Constant>(CombinedInfo.Sizes[I])) {
@@ -4674,8 +5477,8 @@ void OpenMPIRBuilder::emitOffloadingArrays(
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
CombinedInfo.Types[I] &
OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
- ConstSizes[I] = ConstantInt::get(Builder.getInt64Ty(),
- CombinedInfo.NonContigInfo.Dims[I]);
+ ConstSizes[I] =
+ ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
else
ConstSizes[I] = CI;
continue;
@@ -4708,11 +5511,9 @@ void OpenMPIRBuilder::emitOffloadingArrays(
SizeArrayType, /* ArraySize = */ nullptr, ".offload_sizes");
Buffer->setAlignment(OffloadSizeAlign);
Builder.restoreIP(CodeGenIP);
- Value *GblConstPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
- SizesArrayGbl, Int64Ty->getPointerTo());
Builder.CreateMemCpy(
Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->getType()),
- GblConstPtr, OffloadSizeAlign,
+ SizesArrayGbl, OffloadSizeAlign,
Builder.getIntN(
IndexSize,
Buffer->getAllocationSize(M.getDataLayout())->getFixedValue()));
@@ -4740,8 +5541,8 @@ void OpenMPIRBuilder::emitOffloadingArrays(
createOffloadMapnames(CombinedInfo.Names, MapnamesName);
Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
} else {
- Info.RTArgs.MapNamesArray = Constant::getNullValue(
- Type::getInt8Ty(Builder.getContext())->getPointerTo());
+ Info.RTArgs.MapNamesArray =
+ Constant::getNullValue(PointerType::getUnqual(Builder.getContext()));
}
// If there's a present map type modifier, it must not be applied to the end
@@ -4762,60 +5563,54 @@ void OpenMPIRBuilder::emitOffloadingArrays(
}
}
+ PointerType *PtrTy = Builder.getPtrTy();
for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
Value *BPVal = CombinedInfo.BasePointers[I];
Value *BP = Builder.CreateConstInBoundsGEP2_32(
- ArrayType::get(Builder.getInt8PtrTy(), Info.NumberOfPtrs),
- Info.RTArgs.BasePointersArray, 0, I);
- BP = Builder.CreatePointerBitCastOrAddrSpaceCast(
- BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
- Builder.CreateAlignedStore(
- BPVal, BP, M.getDataLayout().getPrefTypeAlign(Builder.getInt8PtrTy()));
+ ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
+ 0, I);
+ Builder.CreateAlignedStore(BPVal, BP,
+ M.getDataLayout().getPrefTypeAlign(PtrTy));
if (Info.requiresDevicePointerInfo()) {
if (CombinedInfo.DevicePointers[I] == DeviceInfoTy::Pointer) {
CodeGenIP = Builder.saveIP();
Builder.restoreIP(AllocaIP);
- Info.DevicePtrInfoMap[BPVal] = {
- BP, Builder.CreateAlloca(Builder.getPtrTy())};
+ Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
Builder.restoreIP(CodeGenIP);
- assert(DeviceAddrCB &&
- "DeviceAddrCB missing for DevicePtr code generation");
- DeviceAddrCB(I, Info.DevicePtrInfoMap[BPVal].second);
+ if (DeviceAddrCB)
+ DeviceAddrCB(I, Info.DevicePtrInfoMap[BPVal].second);
} else if (CombinedInfo.DevicePointers[I] == DeviceInfoTy::Address) {
Info.DevicePtrInfoMap[BPVal] = {BP, BP};
- assert(DeviceAddrCB &&
- "DeviceAddrCB missing for DevicePtr code generation");
- DeviceAddrCB(I, BP);
+ if (DeviceAddrCB)
+ DeviceAddrCB(I, BP);
}
}
Value *PVal = CombinedInfo.Pointers[I];
Value *P = Builder.CreateConstInBoundsGEP2_32(
- ArrayType::get(Builder.getInt8PtrTy(), Info.NumberOfPtrs),
- Info.RTArgs.PointersArray, 0, I);
- P = Builder.CreatePointerBitCastOrAddrSpaceCast(
- P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
+ ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
+ I);
// TODO: Check alignment correct.
- Builder.CreateAlignedStore(
- PVal, P, M.getDataLayout().getPrefTypeAlign(Builder.getInt8PtrTy()));
+ Builder.CreateAlignedStore(PVal, P,
+ M.getDataLayout().getPrefTypeAlign(PtrTy));
if (RuntimeSizes.test(I)) {
Value *S = Builder.CreateConstInBoundsGEP2_32(
ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
/*Idx0=*/0,
/*Idx1=*/I);
- Builder.CreateAlignedStore(
- Builder.CreateIntCast(CombinedInfo.Sizes[I], Int64Ty,
- /*isSigned=*/true),
- S, M.getDataLayout().getPrefTypeAlign(Builder.getInt8PtrTy()));
+ Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[I],
+ Int64Ty,
+ /*isSigned=*/true),
+ S, M.getDataLayout().getPrefTypeAlign(PtrTy));
}
// Fill up the mapper array.
unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
- Value *MFunc = ConstantPointerNull::get(Builder.getInt8PtrTy());
+ Value *MFunc = ConstantPointerNull::get(PtrTy);
if (CustomMapperCB)
if (Value *CustomMFunc = CustomMapperCB(I))
- MFunc = Builder.CreatePointerCast(CustomMFunc, Builder.getInt8PtrTy());
+ MFunc = Builder.CreatePointerCast(CustomMFunc, PtrTy);
Value *MAddr = Builder.CreateInBoundsGEP(
MappersArray->getAllocatedType(), MappersArray,
{Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
@@ -5007,8 +5802,8 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
if (!updateToLocation(Loc))
return Loc.IP;
- Type *XTy = X.Var->getType();
- assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
+ assert(X.Var->getType()->isPointerTy() &&
+ "OMP Atomic expects a pointer to target memory");
Type *XElemTy = X.ElemTy;
assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
XElemTy->isPointerTy()) &&
@@ -5019,14 +5814,11 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
XSt->setAtomic(AO);
} else {
// We need to bitcast and perform atomic op as integers
- unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
IntegerType *IntCastTy =
IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
- Value *XBCast = Builder.CreateBitCast(
- X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
Value *ExprCast =
Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
- StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
+ StoreInst *XSt = Builder.CreateStore(ExprCast, X.Var, X.IsVolatile);
XSt->setAtomic(AO);
}
@@ -5406,12 +6198,152 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
return Builder.saveIP();
}
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
+ BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower,
+ Value *NumTeamsUpper, Value *ThreadLimit,
+ Value *IfExpr) {
+ if (!updateToLocation(Loc))
+ return InsertPointTy();
+
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Function *CurrentFunction = Builder.GetInsertBlock()->getParent();
+
+ // Outer allocation basicblock is the entry block of the current function.
+ BasicBlock &OuterAllocaBB = CurrentFunction->getEntryBlock();
+ if (&OuterAllocaBB == Builder.GetInsertBlock()) {
+ BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.entry");
+ Builder.SetInsertPoint(BodyBB, BodyBB->begin());
+ }
+
+ // The current basic block is split into four basic blocks. After outlining,
+ // they will be mapped as follows:
+ // ```
+ // def current_fn() {
+ // current_basic_block:
+ // br label %teams.exit
+ // teams.exit:
+ // ; instructions after teams
+ // }
+ //
+ // def outlined_fn() {
+ // teams.alloca:
+ // br label %teams.body
+ // teams.body:
+ // ; instructions within teams body
+ // }
+ // ```
+ BasicBlock *ExitBB = splitBB(Builder, /*CreateBranch=*/true, "teams.exit");
+ BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.body");
+ BasicBlock *AllocaBB =
+ splitBB(Builder, /*CreateBranch=*/true, "teams.alloca");
+
+ // Push num_teams
+ if (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr) {
+ assert((NumTeamsLower == nullptr || NumTeamsUpper != nullptr) &&
+ "if lowerbound is non-null, then upperbound must also be non-null "
+ "for bounds on num_teams");
+
+ if (NumTeamsUpper == nullptr)
+ NumTeamsUpper = Builder.getInt32(0);
+
+ if (NumTeamsLower == nullptr)
+ NumTeamsLower = NumTeamsUpper;
+
+ if (IfExpr) {
+ assert(IfExpr->getType()->isIntegerTy() &&
+ "argument to if clause must be an integer value");
+
+ // upper = ifexpr ? upper : 1
+ if (IfExpr->getType() != Int1)
+ IfExpr = Builder.CreateICmpNE(IfExpr,
+ ConstantInt::get(IfExpr->getType(), 0));
+ NumTeamsUpper = Builder.CreateSelect(
+ IfExpr, NumTeamsUpper, Builder.getInt32(1), "numTeamsUpper");
+
+ // lower = ifexpr ? lower : 1
+ NumTeamsLower = Builder.CreateSelect(
+ IfExpr, NumTeamsLower, Builder.getInt32(1), "numTeamsLower");
+ }
+
+ if (ThreadLimit == nullptr)
+ ThreadLimit = Builder.getInt32(0);
+
+ Value *ThreadNum = getOrCreateThreadID(Ident);
+ Builder.CreateCall(
+ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
+ {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
+ }
+ // Generate the body of teams.
+ InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin());
+ InsertPointTy CodeGenIP(BodyBB, BodyBB->begin());
+ BodyGenCB(AllocaIP, CodeGenIP);
+
+ OutlineInfo OI;
+ OI.EntryBB = AllocaBB;
+ OI.ExitBB = ExitBB;
+ OI.OuterAllocaBB = &OuterAllocaBB;
+
+ // Insert fake values for global tid and bound tid.
+ std::stack<Instruction *> ToBeDeleted;
+ InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin());
+ OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
+ Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "gid", true));
+ OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
+ Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "tid", true));
+
+ OI.PostOutlineCB = [this, Ident, ToBeDeleted](Function &OutlinedFn) mutable {
+ // The stale call instruction will be replaced with a new call instruction
+ // for runtime call with the outlined function.
+
+ assert(OutlinedFn.getNumUses() == 1 &&
+ "there must be a single user for the outlined function");
+ CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
+ ToBeDeleted.push(StaleCI);
+
+ assert((OutlinedFn.arg_size() == 2 || OutlinedFn.arg_size() == 3) &&
+ "Outlined function must have two or three arguments only");
+
+ bool HasShared = OutlinedFn.arg_size() == 3;
+
+ OutlinedFn.getArg(0)->setName("global.tid.ptr");
+ OutlinedFn.getArg(1)->setName("bound.tid.ptr");
+ if (HasShared)
+ OutlinedFn.getArg(2)->setName("data");
+
+ // Call to the runtime function for teams in the current function.
+ assert(StaleCI && "Error while outlining - no CallInst user found for the "
+ "outlined function.");
+ Builder.SetInsertPoint(StaleCI);
+ SmallVector<Value *> Args = {
+ Ident, Builder.getInt32(StaleCI->arg_size() - 2), &OutlinedFn};
+ if (HasShared)
+ Args.push_back(StaleCI->getArgOperand(2));
+ Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
+ omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
+ Args);
+
+ while (!ToBeDeleted.empty()) {
+ ToBeDeleted.top()->eraseFromParent();
+ ToBeDeleted.pop();
+ }
+ };
+
+ addOutlineInfo(std::move(OI));
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+ return Builder.saveIP();
+}
+
GlobalVariable *
OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
std::string VarName) {
llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
- llvm::ArrayType::get(
- llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
+ llvm::ArrayType::get(llvm::PointerType::getUnqual(M.getContext()),
+ Names.size()),
Names);
auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
M, MapNamesArrayInit->getType(),
@@ -5460,9 +6392,12 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks(
void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
uint64_t Size, int32_t Flags,
- GlobalValue::LinkageTypes) {
+ GlobalValue::LinkageTypes,
+ StringRef Name) {
if (!Config.isGPU()) {
- emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
+ llvm::offloading::emitOffloadingEntry(
+ M, ID, Name.empty() ? Addr->getName() : Name, Size, Flags, /*Data=*/0,
+ "omp_offloading_entries");
return;
}
// TODO: Add support for global variables on the device after declare target
@@ -5485,7 +6420,7 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
// Add a function attribute for the kernel.
Fn->addFnAttr(Attribute::get(Ctx, "kernel"));
- if (Triple(M.getTargetTriple()).isAMDGCN())
+ if (T.isAMDGCN())
Fn->addFnAttr("uniform-work-group-size", "true");
Fn->addFnAttr(Attribute::MustProgress);
}
@@ -5622,13 +6557,20 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
// Hidden or internal symbols on the device are not externally visible.
// We should not attempt to register them by creating an offloading
- // entry.
+ // entry. Indirect variables are handled separately on the device.
if (auto *GV = dyn_cast<GlobalValue>(CE->getAddress()))
- if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ if ((GV->hasLocalLinkage() || GV->hasHiddenVisibility()) &&
+ Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
continue;
- createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(),
- Flags, CE->getLinkage());
+ // Indirect globals need to use a special name that doesn't match the name
+ // of the associated host global.
+ if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
+ createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(),
+ Flags, CE->getLinkage(), CE->getVarName());
+ else
+ createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(),
+ Flags, CE->getLinkage());
} else {
llvm_unreachable("Unsupported entry kind.");
@@ -5670,6 +6612,42 @@ OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
std::get<1>(FileIDInfo));
}
+unsigned OpenMPIRBuilder::getFlagMemberOffset() {
+ unsigned Offset = 0;
+ for (uint64_t Remain =
+ static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>>(
+ omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
+ !(Remain & 1); Remain = Remain >> 1)
+ Offset++;
+ return Offset;
+}
+
+omp::OpenMPOffloadMappingFlags
+OpenMPIRBuilder::getMemberOfFlag(unsigned Position) {
+ // Rotate by getFlagMemberOffset() bits.
+ return static_cast<omp::OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
+ << getFlagMemberOffset());
+}
+
+void OpenMPIRBuilder::setCorrectMemberOfFlag(
+ omp::OpenMPOffloadMappingFlags &Flags,
+ omp::OpenMPOffloadMappingFlags MemberOfFlag) {
+ // If the entry is PTR_AND_OBJ but has not been marked with the special
+ // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
+ // marked as MEMBER_OF.
+ if (static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>>(
+ Flags & omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) &&
+ static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>>(
+ (Flags & omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
+ omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF))
+ return;
+
+ // Reset the placeholder value to prepare the flag for the assignment of the
+ // proper MEMBER_OF value.
+ Flags &= ~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
+ Flags |= MemberOfFlag;
+}
+
Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
@@ -5853,6 +6831,63 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata(Module &M) {
}
}
+void OpenMPIRBuilder::loadOffloadInfoMetadata(StringRef HostFilePath) {
+ if (HostFilePath.empty())
+ return;
+
+ auto Buf = MemoryBuffer::getFile(HostFilePath);
+ if (std::error_code Err = Buf.getError()) {
+ report_fatal_error(("error opening host file from host file path inside of "
+ "OpenMPIRBuilder: " +
+ Err.message())
+ .c_str());
+ }
+
+ LLVMContext Ctx;
+ auto M = expectedToErrorOrAndEmitErrors(
+ Ctx, parseBitcodeFile(Buf.get()->getMemBufferRef(), Ctx));
+ if (std::error_code Err = M.getError()) {
+ report_fatal_error(
+ ("error parsing host file inside of OpenMPIRBuilder: " + Err.message())
+ .c_str());
+ }
+
+ loadOffloadInfoMetadata(*M.get());
+}
+
+Function *OpenMPIRBuilder::createRegisterRequires(StringRef Name) {
+ // Skip the creation of the registration function if this is device codegen
+ if (Config.isTargetDevice())
+ return nullptr;
+
+ Builder.ClearInsertionPoint();
+
+ // Create registration function prototype
+ auto *RegFnTy = FunctionType::get(Builder.getVoidTy(), {});
+ auto *RegFn = Function::Create(
+ RegFnTy, GlobalVariable::LinkageTypes::InternalLinkage, Name, M);
+ RegFn->setSection(".text.startup");
+ RegFn->addFnAttr(Attribute::NoInline);
+ RegFn->addFnAttr(Attribute::NoUnwind);
+
+ // Create registration function body
+ auto *BB = BasicBlock::Create(M.getContext(), "entry", RegFn);
+ ConstantInt *FlagsVal =
+ ConstantInt::getSigned(Builder.getInt64Ty(), Config.getRequiresFlags());
+ Function *RTLRegFn = getOrCreateRuntimeFunctionPtr(
+ omp::RuntimeFunction::OMPRTL___tgt_register_requires);
+
+ Builder.SetInsertPoint(BB);
+ Builder.CreateCall(RTLRegFn, {FlagsVal});
+ Builder.CreateRetVoid();
+
+ return RegFn;
+}
+
+//===----------------------------------------------------------------------===//
+// OffloadEntriesInfoManager
+//===----------------------------------------------------------------------===//
+
bool OffloadEntriesInfoManager::empty() const {
return OffloadEntriesTargetRegion.empty() &&
OffloadEntriesDeviceGlobalVar.empty();
@@ -5973,8 +7008,13 @@ void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
}
return;
}
- OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
- Addr, VarSize, Flags, Linkage);
+ if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
+ OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
+ Addr, VarSize, Flags, Linkage,
+ VarName.str());
+ else
+ OffloadEntriesDeviceGlobalVar.try_emplace(
+ VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage, "");
++OffloadingEntriesNum;
}
}
@@ -5986,6 +7026,10 @@ void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
Action(E.getKey(), E.getValue());
}
+//===----------------------------------------------------------------------===//
+// CanonicalLoopInfo
+//===----------------------------------------------------------------------===//
+
void CanonicalLoopInfo::collectControlBlocks(
SmallVectorImpl<BasicBlock *> &BBs) {
// We only count those BBs as control block for which we do not need to
diff --git a/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp b/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp
index 0e47e3cc3af2..c64e9c04e199 100644
--- a/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp
+++ b/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp
@@ -43,7 +43,7 @@ void llvm::handleExecNameEncodedBEOpts(StringRef ExecName) {
Args.push_back("-global-isel");
// For now we default GlobalISel to -O0
Args.push_back("-O0");
- } else if (Opt.startswith("O")) {
+ } else if (Opt.starts_with("O")) {
Args.push_back("-" + Opt.str());
} else if (Triple(Opt).getArch()) {
Args.push_back("-mtriple=" + Opt.str());
@@ -140,7 +140,7 @@ int llvm::runFuzzerOnInputs(int ArgC, char *ArgV[], FuzzerTestFun TestOne,
for (int I = 1; I < ArgC; ++I) {
StringRef Arg(ArgV[I]);
- if (Arg.startswith("-")) {
+ if (Arg.starts_with("-")) {
if (Arg.equals("-ignore_remaining_args=1"))
break;
continue;
diff --git a/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
index be4a3ed79d88..95cdec722062 100644
--- a/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugProgramInstruction.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
@@ -285,18 +286,28 @@ static const Module *getModuleFromVal(const Value *V) {
return nullptr;
}
+static const Module *getModuleFromDPI(const DPMarker *Marker) {
+ const Function *M =
+ Marker->getParent() ? Marker->getParent()->getParent() : nullptr;
+ return M ? M->getParent() : nullptr;
+}
+
+static const Module *getModuleFromDPI(const DPValue *DPV) {
+ return getModuleFromDPI(DPV->getMarker());
+}
+
static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
switch (cc) {
default: Out << "cc" << cc; break;
case CallingConv::Fast: Out << "fastcc"; break;
case CallingConv::Cold: Out << "coldcc"; break;
- case CallingConv::WebKit_JS: Out << "webkit_jscc"; break;
case CallingConv::AnyReg: Out << "anyregcc"; break;
case CallingConv::PreserveMost: Out << "preserve_mostcc"; break;
case CallingConv::PreserveAll: Out << "preserve_allcc"; break;
case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break;
case CallingConv::GHC: Out << "ghccc"; break;
case CallingConv::Tail: Out << "tailcc"; break;
+ case CallingConv::GRAAL: Out << "graalcc"; break;
case CallingConv::CFGuard_Check: Out << "cfguard_checkcc"; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
@@ -350,6 +361,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
break;
case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break;
+ case CallingConv::M68k_RTD: Out << "m68k_rtdcc"; break;
}
}
@@ -724,6 +736,11 @@ private:
StringMap<unsigned> TypeIdMap;
unsigned TypeIdNext = 0;
+ /// TypeIdCompatibleVtableMap - The slot map for type compatible vtable ids
+ /// used in the summary index.
+ StringMap<unsigned> TypeIdCompatibleVtableMap;
+ unsigned TypeIdCompatibleVtableNext = 0;
+
public:
/// Construct from a module.
///
@@ -767,6 +784,7 @@ public:
int getModulePathSlot(StringRef Path);
int getGUIDSlot(GlobalValue::GUID GUID);
int getTypeIdSlot(StringRef Id);
+ int getTypeIdCompatibleVtableSlot(StringRef Id);
/// If you'd like to deal with a function instead of just a module, use
/// this method to get its data into the SlotTracker.
@@ -822,6 +840,7 @@ private:
inline void CreateModulePathSlot(StringRef Path);
void CreateGUIDSlot(GlobalValue::GUID GUID);
void CreateTypeIdSlot(StringRef Id);
+ void CreateTypeIdCompatibleVtableSlot(StringRef Id);
/// Add all of the module level global variables (and their initializers)
/// and function declarations, but not the contents of those functions.
@@ -1069,12 +1088,13 @@ int SlotTracker::processIndex() {
// The first block of slots are just the module ids, which start at 0 and are
// assigned consecutively. Since the StringMap iteration order isn't
- // guaranteed, use a std::map to order by module ID before assigning slots.
- std::map<uint64_t, StringRef> ModuleIdToPathMap;
- for (auto &[ModPath, ModId] : TheIndex->modulePaths())
- ModuleIdToPathMap[ModId.first] = ModPath;
- for (auto &ModPair : ModuleIdToPathMap)
- CreateModulePathSlot(ModPair.second);
+ // guaranteed, order by path string before assigning slots.
+ std::vector<StringRef> ModulePaths;
+ for (auto &[ModPath, _] : TheIndex->modulePaths())
+ ModulePaths.push_back(ModPath);
+ llvm::sort(ModulePaths.begin(), ModulePaths.end());
+ for (auto &ModPath : ModulePaths)
+ CreateModulePathSlot(ModPath);
// Start numbering the GUIDs after the module ids.
GUIDNext = ModulePathNext;
@@ -1082,11 +1102,13 @@ int SlotTracker::processIndex() {
for (auto &GlobalList : *TheIndex)
CreateGUIDSlot(GlobalList.first);
+ // Start numbering the TypeIdCompatibleVtables after the GUIDs.
+ TypeIdCompatibleVtableNext = GUIDNext;
for (auto &TId : TheIndex->typeIdCompatibleVtableMap())
- CreateGUIDSlot(GlobalValue::getGUID(TId.first));
+ CreateTypeIdCompatibleVtableSlot(TId.first);
- // Start numbering the TypeIds after the GUIDs.
- TypeIdNext = GUIDNext;
+ // Start numbering the TypeIds after the TypeIdCompatibleVtables.
+ TypeIdNext = TypeIdCompatibleVtableNext;
for (const auto &TID : TheIndex->typeIds())
CreateTypeIdSlot(TID.second.first);
@@ -1219,6 +1241,15 @@ int SlotTracker::getTypeIdSlot(StringRef Id) {
return I == TypeIdMap.end() ? -1 : (int)I->second;
}
+int SlotTracker::getTypeIdCompatibleVtableSlot(StringRef Id) {
+ // Check for uninitialized state and do lazy initialization.
+ initializeIndexIfNeeded();
+
+ // Find the TypeIdCompatibleVtable string in the map
+ auto I = TypeIdCompatibleVtableMap.find(Id);
+ return I == TypeIdCompatibleVtableMap.end() ? -1 : (int)I->second;
+}
+
/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
assert(V && "Can't insert a null Value into SlotTracker!");
@@ -1253,9 +1284,8 @@ void SlotTracker::CreateFunctionSlot(const Value *V) {
void SlotTracker::CreateMetadataSlot(const MDNode *N) {
assert(N && "Can't insert a null Value into SlotTracker!");
- // Don't make slots for DIExpressions or DIArgLists. We just print them inline
- // everywhere.
- if (isa<DIExpression>(N) || isa<DIArgList>(N))
+ // Don't make slots for DIExpressions. We just print them inline everywhere.
+ if (isa<DIExpression>(N))
return;
unsigned DestSlot = mdnNext;
@@ -1295,6 +1325,11 @@ void SlotTracker::CreateTypeIdSlot(StringRef Id) {
TypeIdMap[Id] = TypeIdNext++;
}
+/// Create a new slot for the specified Id
+void SlotTracker::CreateTypeIdCompatibleVtableSlot(StringRef Id) {
+ TypeIdCompatibleVtableMap[Id] = TypeIdCompatibleVtableNext++;
+}
+
namespace {
/// Common instances used by most of the printer functions.
struct AsmWriterContext {
@@ -1343,9 +1378,16 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
dyn_cast<PossiblyExactOperator>(U)) {
if (Div->isExact())
Out << " exact";
+ } else if (const PossiblyDisjointInst *PDI =
+ dyn_cast<PossiblyDisjointInst>(U)) {
+ if (PDI->isDisjoint())
+ Out << " disjoint";
} else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
if (GEP->isInBounds())
Out << " inbounds";
+ } else if (const auto *NNI = dyn_cast<PossiblyNonNegInst>(U)) {
+ if (NNI->hasNonNeg())
+ Out << " nneg";
}
}
@@ -2608,6 +2650,8 @@ public:
void printBasicBlock(const BasicBlock *BB);
void printInstructionLine(const Instruction &I);
void printInstruction(const Instruction &I);
+ void printDPMarker(const DPMarker &DPI);
+ void printDPValue(const DPValue &DPI);
void printUseListOrder(const Value *V, const std::vector<unsigned> &Shuffle);
void printUseLists(const Function *F);
@@ -2890,12 +2934,11 @@ void AssemblyWriter::printModuleSummaryIndex() {
std::string RegularLTOModuleName =
ModuleSummaryIndex::getRegularLTOModuleName();
moduleVec.resize(TheIndex->modulePaths().size());
- for (auto &[ModPath, ModId] : TheIndex->modulePaths())
+ for (auto &[ModPath, ModHash] : TheIndex->modulePaths())
moduleVec[Machine.getModulePathSlot(ModPath)] = std::make_pair(
- // A module id of -1 is a special entry for a regular LTO module created
- // during the thin link.
- ModId.first == -1u ? RegularLTOModuleName : std::string(ModPath),
- ModId.second);
+ // An empty module path is a special entry for a regular LTO module
+ // created during the thin link.
+ ModPath.empty() ? RegularLTOModuleName : std::string(ModPath), ModHash);
unsigned i = 0;
for (auto &ModPair : moduleVec) {
@@ -2935,7 +2978,7 @@ void AssemblyWriter::printModuleSummaryIndex() {
// Print the TypeIdCompatibleVtableMap entries.
for (auto &TId : TheIndex->typeIdCompatibleVtableMap()) {
auto GUID = GlobalValue::getGUID(TId.first);
- Out << "^" << Machine.getGUIDSlot(GUID)
+ Out << "^" << Machine.getTypeIdCompatibleVtableSlot(TId.first)
<< " = typeidCompatibleVTable: (name: \"" << TId.first << "\"";
printTypeIdCompatibleVtableSummary(TId.second);
Out << ") ; guid = " << GUID << "\n";
@@ -3200,6 +3243,10 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
Out << ", hotness: " << getHotnessName(Call.second.getHotness());
else if (Call.second.RelBlockFreq)
Out << ", relbf: " << Call.second.RelBlockFreq;
+ // Follow the convention of emitting flags as a boolean value, but only
+ // emit if true to avoid unnecessary verbosity and test churn.
+ if (Call.second.HasTailCall)
+ Out << ", tail: 1";
Out << ")";
}
Out << ")";
@@ -3473,15 +3520,15 @@ static void printMetadataIdentifier(StringRef Name,
if (Name.empty()) {
Out << "<empty name> ";
} else {
- if (isalpha(static_cast<unsigned char>(Name[0])) || Name[0] == '-' ||
- Name[0] == '$' || Name[0] == '.' || Name[0] == '_')
- Out << Name[0];
+ unsigned char FirstC = static_cast<unsigned char>(Name[0]);
+ if (isalpha(FirstC) || FirstC == '-' || FirstC == '$' || FirstC == '.' ||
+ FirstC == '_')
+ Out << FirstC;
else
- Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F);
+ Out << '\\' << hexdigit(FirstC >> 4) << hexdigit(FirstC & 0x0F);
for (unsigned i = 1, e = Name.size(); i != e; ++i) {
unsigned char C = Name[i];
- if (isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
- C == '.' || C == '_')
+ if (isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_')
Out << C;
else
Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
@@ -3500,8 +3547,6 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
// Write DIExpressions inline.
// FIXME: Ban DIExpressions in NamedMDNodes, they will serve no purpose.
MDNode *Op = NMD->getOperand(i);
- assert(!isa<DIArgList>(Op) &&
- "DIArgLists should not appear in NamedMDNodes");
if (auto *Expr = dyn_cast<DIExpression>(Op)) {
writeDIExpression(Out, Expr, AsmWriterContext::getEmpty());
continue;
@@ -3631,6 +3676,27 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
printEscapedString(GV->getPartition(), Out);
Out << '"';
}
+ if (auto CM = GV->getCodeModel()) {
+ Out << ", code_model \"";
+ switch (*CM) {
+ case CodeModel::Tiny:
+ Out << "tiny";
+ break;
+ case CodeModel::Small:
+ Out << "small";
+ break;
+ case CodeModel::Kernel:
+ Out << "kernel";
+ break;
+ case CodeModel::Medium:
+ Out << "medium";
+ break;
+ case CodeModel::Large:
+ Out << "large";
+ break;
+ }
+ Out << '"';
+ }
using SanitizerMetadata = llvm::GlobalValue::SanitizerMetadata;
if (GV->hasSanitizerMetadata()) {
@@ -3768,6 +3834,9 @@ void AssemblyWriter::printTypeIdentities() {
/// printFunction - Print all aspects of a function.
void AssemblyWriter::printFunction(const Function *F) {
+ bool ConvertBack = F->IsNewDbgInfoFormat;
+ if (ConvertBack)
+ const_cast<Function *>(F)->convertFromNewDbgValues();
if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
if (F->isMaterializable())
@@ -3910,6 +3979,8 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << "}\n";
}
+ if (ConvertBack)
+ const_cast<Function *>(F)->convertToNewDbgValues();
Machine.purgeFunction();
}
@@ -4004,8 +4075,15 @@ void AssemblyWriter::printInfoComment(const Value &V) {
if (const auto *Relocate = dyn_cast<GCRelocateInst>(&V))
printGCRelocateComment(*Relocate);
- if (AnnotationWriter)
+ if (AnnotationWriter) {
AnnotationWriter->printInfoComment(V, Out);
+ } else if (const Instruction *I = dyn_cast<Instruction>(&V)) {
+ if (I->DbgMarker) {
+ // In the new, experimental DPValue representation of debug-info, print
+ // out which instructions have DPMarkers and where they are.
+ Out << "; dbgmarker @ " << I->DbgMarker;
+ }
+ }
}
static void maybePrintCallAddrSpace(const Value *Operand, const Instruction *I,
@@ -4465,6 +4543,36 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
printInfoComment(I);
}
+void AssemblyWriter::printDPMarker(const DPMarker &Marker) {
+ // There's no formal representation of a DPMarker -- print purely as a
+ // debugging aid.
+ for (const DPValue &DPI2 : Marker.StoredDPValues) {
+ printDPValue(DPI2);
+ Out << "\n";
+ }
+
+ Out << " DPMarker -> { ";
+ printInstruction(*Marker.MarkedInstr);
+ Out << " }";
+ return;
+}
+
+void AssemblyWriter::printDPValue(const DPValue &Value) {
+ // There's no formal representation of a DPValue -- print purely as a
+ // debugging aid.
+ Out << " DPValue { ";
+ auto WriterCtx = getContext();
+ WriteAsOperandInternal(Out, Value.getRawLocation(), WriterCtx, true);
+ Out << ", ";
+ WriteAsOperandInternal(Out, Value.getVariable(), WriterCtx, true);
+ Out << ", ";
+ WriteAsOperandInternal(Out, Value.getExpression(), WriterCtx, true);
+ Out << ", ";
+ WriteAsOperandInternal(Out, Value.getDebugLoc().get(), WriterCtx, true);
+ Out << " marker @" << Value.getMarker();
+ Out << " }";
+}
+
void AssemblyWriter::printMetadataAttachments(
const SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs,
StringRef Separator) {
@@ -4610,11 +4718,19 @@ void BasicBlock::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
bool ShouldPreserveUseListOrder, bool IsForDebug) const {
+ // RemoveDIs: always print with debug-info in intrinsic format.
+ bool ConvertAfter = IsNewDbgInfoFormat;
+ if (IsNewDbgInfoFormat)
+ const_cast<Module *>(this)->convertFromNewDbgValues();
+
SlotTracker SlotTable(this);
formatted_raw_ostream OS(ROS);
AssemblyWriter W(OS, SlotTable, this, AAW, IsForDebug,
ShouldPreserveUseListOrder);
W.printModule(this);
+
+ if (ConvertAfter)
+ const_cast<Module *>(this)->convertToNewDbgValues();
}
void NamedMDNode::print(raw_ostream &ROS, bool IsForDebug) const {
@@ -4691,6 +4807,53 @@ static bool isReferencingMDNode(const Instruction &I) {
return false;
}
+void DPMarker::print(raw_ostream &ROS, bool IsForDebug) const {
+
+ ModuleSlotTracker MST(getModuleFromDPI(this), true);
+ print(ROS, MST, IsForDebug);
+}
+
+void DPValue::print(raw_ostream &ROS, bool IsForDebug) const {
+
+ ModuleSlotTracker MST(getModuleFromDPI(this), true);
+ print(ROS, MST, IsForDebug);
+}
+
+void DPMarker::print(raw_ostream &ROS, ModuleSlotTracker &MST,
+ bool IsForDebug) const {
+ // There's no formal representation of a DPMarker -- print purely as a
+ // debugging aid.
+ formatted_raw_ostream OS(ROS);
+ SlotTracker EmptySlotTable(static_cast<const Module *>(nullptr));
+ SlotTracker &SlotTable =
+ MST.getMachine() ? *MST.getMachine() : EmptySlotTable;
+ auto incorporateFunction = [&](const Function *F) {
+ if (F)
+ MST.incorporateFunction(*F);
+ };
+ incorporateFunction(getParent() ? getParent()->getParent() : nullptr);
+ AssemblyWriter W(OS, SlotTable, getModuleFromDPI(this), nullptr, IsForDebug);
+ W.printDPMarker(*this);
+}
+
+void DPValue::print(raw_ostream &ROS, ModuleSlotTracker &MST,
+ bool IsForDebug) const {
+ // There's no formal representation of a DPValue -- print purely as a
+ // debugging aid.
+ formatted_raw_ostream OS(ROS);
+ SlotTracker EmptySlotTable(static_cast<const Module *>(nullptr));
+ SlotTracker &SlotTable =
+ MST.getMachine() ? *MST.getMachine() : EmptySlotTable;
+ auto incorporateFunction = [&](const Function *F) {
+ if (F)
+ MST.incorporateFunction(*F);
+ };
+ incorporateFunction(Marker->getParent() ? Marker->getParent()->getParent()
+ : nullptr);
+ AssemblyWriter W(OS, SlotTable, getModuleFromDPI(this), nullptr, IsForDebug);
+ W.printDPValue(*this);
+}
+
void Value::print(raw_ostream &ROS, bool IsForDebug) const {
bool ShouldInitializeAllMetadata = false;
if (auto *I = dyn_cast<Instruction>(this))
@@ -4805,7 +4968,7 @@ static void printMetadataImplRec(raw_ostream &ROS, const Metadata &MD,
WriteAsOperandInternal(OS, &MD, WriterCtx, /* FromValue */ true);
auto *N = dyn_cast<MDNode>(&MD);
- if (!N || isa<DIExpression>(MD) || isa<DIArgList>(MD))
+ if (!N || isa<DIExpression>(MD))
return;
OS << " = ";
@@ -4873,7 +5036,7 @@ static void printMetadataImpl(raw_ostream &ROS, const Metadata &MD,
WriteAsOperandInternal(OS, &MD, *WriterCtx, /* FromValue */ true);
auto *N = dyn_cast<MDNode>(&MD);
- if (OnlyAsOperand || !N || isa<DIExpression>(MD) || isa<DIArgList>(MD))
+ if (OnlyAsOperand || !N || isa<DIExpression>(MD))
return;
OS << " = ";
@@ -4936,6 +5099,14 @@ void ModuleSlotTracker::collectMDNodes(MachineMDNodeListType &L, unsigned LB,
LLVM_DUMP_METHOD
void Value::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; }
+// Value::dump - allow easy printing of Values from the debugger.
+LLVM_DUMP_METHOD
+void DPMarker::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; }
+
+// Value::dump - allow easy printing of Values from the debugger.
+LLVM_DUMP_METHOD
+void DPValue::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; }
+
// Type::dump - allow easy printing of Types from the debugger.
LLVM_DUMP_METHOD
void Type::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; }
diff --git a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp
index 3d89d18e5822..fd5160209506 100644
--- a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp
@@ -1961,7 +1961,9 @@ AttributeMask AttributeFuncs::typeIncompatible(Type *Ty,
.addAttribute(Attribute::ReadNone)
.addAttribute(Attribute::ReadOnly)
.addAttribute(Attribute::Dereferenceable)
- .addAttribute(Attribute::DereferenceableOrNull);
+ .addAttribute(Attribute::DereferenceableOrNull)
+ .addAttribute(Attribute::Writable)
+ .addAttribute(Attribute::DeadOnUnwind);
if (ASK & ASK_UNSAFE_TO_DROP)
Incompatible.addAttribute(Attribute::Nest)
.addAttribute(Attribute::SwiftError)
diff --git a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
index 71b5722925a1..6b54047020a0 100644
--- a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
@@ -122,342 +122,372 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
// like to use this information to remove upgrade code for some older
// intrinsics. It is currently undecided how we will determine that future
// point.
- if (Name == "addcarryx.u32" || // Added in 8.0
- Name == "addcarryx.u64" || // Added in 8.0
- Name == "addcarry.u32" || // Added in 8.0
- Name == "addcarry.u64" || // Added in 8.0
- Name == "subborrow.u32" || // Added in 8.0
- Name == "subborrow.u64" || // Added in 8.0
- Name.startswith("sse2.padds.") || // Added in 8.0
- Name.startswith("sse2.psubs.") || // Added in 8.0
- Name.startswith("sse2.paddus.") || // Added in 8.0
- Name.startswith("sse2.psubus.") || // Added in 8.0
- Name.startswith("avx2.padds.") || // Added in 8.0
- Name.startswith("avx2.psubs.") || // Added in 8.0
- Name.startswith("avx2.paddus.") || // Added in 8.0
- Name.startswith("avx2.psubus.") || // Added in 8.0
- Name.startswith("avx512.padds.") || // Added in 8.0
- Name.startswith("avx512.psubs.") || // Added in 8.0
- Name.startswith("avx512.mask.padds.") || // Added in 8.0
- Name.startswith("avx512.mask.psubs.") || // Added in 8.0
- Name.startswith("avx512.mask.paddus.") || // Added in 8.0
- Name.startswith("avx512.mask.psubus.") || // Added in 8.0
- Name=="ssse3.pabs.b.128" || // Added in 6.0
- Name=="ssse3.pabs.w.128" || // Added in 6.0
- Name=="ssse3.pabs.d.128" || // Added in 6.0
- Name.startswith("fma4.vfmadd.s") || // Added in 7.0
- Name.startswith("fma.vfmadd.") || // Added in 7.0
- Name.startswith("fma.vfmsub.") || // Added in 7.0
- Name.startswith("fma.vfmsubadd.") || // Added in 7.0
- Name.startswith("fma.vfnmadd.") || // Added in 7.0
- Name.startswith("fma.vfnmsub.") || // Added in 7.0
- Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
- Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
- Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
- Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
- Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
- Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
- Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
- Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
- Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
- Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
- Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
- Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
- Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
- Name.startswith("avx512.kunpck") || //added in 6.0
- Name.startswith("avx2.pabs.") || // Added in 6.0
- Name.startswith("avx512.mask.pabs.") || // Added in 6.0
- Name.startswith("avx512.broadcastm") || // Added in 6.0
- Name == "sse.sqrt.ss" || // Added in 7.0
- Name == "sse2.sqrt.sd" || // Added in 7.0
- Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
- Name.startswith("avx.sqrt.p") || // Added in 7.0
- Name.startswith("sse2.sqrt.p") || // Added in 7.0
- Name.startswith("sse.sqrt.p") || // Added in 7.0
- Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
- Name.startswith("sse2.pcmpeq.") || // Added in 3.1
- Name.startswith("sse2.pcmpgt.") || // Added in 3.1
- Name.startswith("avx2.pcmpeq.") || // Added in 3.1
- Name.startswith("avx2.pcmpgt.") || // Added in 3.1
- Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
- Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
- Name.startswith("avx.vperm2f128.") || // Added in 6.0
- Name == "avx2.vperm2i128" || // Added in 6.0
- Name == "sse.add.ss" || // Added in 4.0
- Name == "sse2.add.sd" || // Added in 4.0
- Name == "sse.sub.ss" || // Added in 4.0
- Name == "sse2.sub.sd" || // Added in 4.0
- Name == "sse.mul.ss" || // Added in 4.0
- Name == "sse2.mul.sd" || // Added in 4.0
- Name == "sse.div.ss" || // Added in 4.0
- Name == "sse2.div.sd" || // Added in 4.0
- Name == "sse41.pmaxsb" || // Added in 3.9
- Name == "sse2.pmaxs.w" || // Added in 3.9
- Name == "sse41.pmaxsd" || // Added in 3.9
- Name == "sse2.pmaxu.b" || // Added in 3.9
- Name == "sse41.pmaxuw" || // Added in 3.9
- Name == "sse41.pmaxud" || // Added in 3.9
- Name == "sse41.pminsb" || // Added in 3.9
- Name == "sse2.pmins.w" || // Added in 3.9
- Name == "sse41.pminsd" || // Added in 3.9
- Name == "sse2.pminu.b" || // Added in 3.9
- Name == "sse41.pminuw" || // Added in 3.9
- Name == "sse41.pminud" || // Added in 3.9
- Name == "avx512.kand.w" || // Added in 7.0
- Name == "avx512.kandn.w" || // Added in 7.0
- Name == "avx512.knot.w" || // Added in 7.0
- Name == "avx512.kor.w" || // Added in 7.0
- Name == "avx512.kxor.w" || // Added in 7.0
- Name == "avx512.kxnor.w" || // Added in 7.0
- Name == "avx512.kortestc.w" || // Added in 7.0
- Name == "avx512.kortestz.w" || // Added in 7.0
- Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
- Name.startswith("avx2.pmax") || // Added in 3.9
- Name.startswith("avx2.pmin") || // Added in 3.9
- Name.startswith("avx512.mask.pmax") || // Added in 4.0
- Name.startswith("avx512.mask.pmin") || // Added in 4.0
- Name.startswith("avx2.vbroadcast") || // Added in 3.8
- Name.startswith("avx2.pbroadcast") || // Added in 3.8
- Name.startswith("avx.vpermil.") || // Added in 3.1
- Name.startswith("sse2.pshuf") || // Added in 3.9
- Name.startswith("avx512.pbroadcast") || // Added in 3.9
- Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
- Name.startswith("avx512.mask.movddup") || // Added in 3.9
- Name.startswith("avx512.mask.movshdup") || // Added in 3.9
- Name.startswith("avx512.mask.movsldup") || // Added in 3.9
- Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
- Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
- Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
- Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
- Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
- Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
- Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
- Name.startswith("avx512.mask.punpckl") || // Added in 3.9
- Name.startswith("avx512.mask.punpckh") || // Added in 3.9
- Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
- Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
- Name.startswith("avx512.mask.pand.") || // Added in 3.9
- Name.startswith("avx512.mask.pandn.") || // Added in 3.9
- Name.startswith("avx512.mask.por.") || // Added in 3.9
- Name.startswith("avx512.mask.pxor.") || // Added in 3.9
- Name.startswith("avx512.mask.and.") || // Added in 3.9
- Name.startswith("avx512.mask.andn.") || // Added in 3.9
- Name.startswith("avx512.mask.or.") || // Added in 3.9
- Name.startswith("avx512.mask.xor.") || // Added in 3.9
- Name.startswith("avx512.mask.padd.") || // Added in 4.0
- Name.startswith("avx512.mask.psub.") || // Added in 4.0
- Name.startswith("avx512.mask.pmull.") || // Added in 4.0
- Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
- Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
- Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
- Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
- Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
- Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
- Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
- Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
- Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
- Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
- Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
- Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
- Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
- Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
- Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
- Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
- Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
- Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
- Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
- Name == "avx512.cvtusi2sd" || // Added in 7.0
- Name.startswith("avx512.mask.permvar.") || // Added in 7.0
- Name == "sse2.pmulu.dq" || // Added in 7.0
- Name == "sse41.pmuldq" || // Added in 7.0
- Name == "avx2.pmulu.dq" || // Added in 7.0
- Name == "avx2.pmul.dq" || // Added in 7.0
- Name == "avx512.pmulu.dq.512" || // Added in 7.0
- Name == "avx512.pmul.dq.512" || // Added in 7.0
- Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
- Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
- Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
- Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
- Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
- Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
- Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
- Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
- Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
- Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
- Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
- Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
- Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
- Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
- Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
- Name.startswith("avx512.cmp.p") || // Added in 12.0
- Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
- Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
- Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
- Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
- Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
- Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
- Name.startswith("avx512.mask.psll.d") || // Added in 4.0
- Name.startswith("avx512.mask.psll.q") || // Added in 4.0
- Name.startswith("avx512.mask.psll.w") || // Added in 4.0
- Name.startswith("avx512.mask.psra.d") || // Added in 4.0
- Name.startswith("avx512.mask.psra.q") || // Added in 4.0
- Name.startswith("avx512.mask.psra.w") || // Added in 4.0
- Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
- Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
- Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
- Name.startswith("avx512.mask.pslli") || // Added in 4.0
- Name.startswith("avx512.mask.psrai") || // Added in 4.0
- Name.startswith("avx512.mask.psrli") || // Added in 4.0
- Name.startswith("avx512.mask.psllv") || // Added in 4.0
- Name.startswith("avx512.mask.psrav") || // Added in 4.0
- Name.startswith("avx512.mask.psrlv") || // Added in 4.0
- Name.startswith("sse41.pmovsx") || // Added in 3.8
- Name.startswith("sse41.pmovzx") || // Added in 3.9
- Name.startswith("avx2.pmovsx") || // Added in 3.9
- Name.startswith("avx2.pmovzx") || // Added in 3.9
- Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
- Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
- Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
- Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
- Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
- Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
- Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
- Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
- Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
- Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
- Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
- Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
- Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
- Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
- Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
- Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
- Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
- Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
- Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
- Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
- Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
- Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
- Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
- Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
- Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
- Name.startswith("avx512.vpshld.") || // Added in 8.0
- Name.startswith("avx512.vpshrd.") || // Added in 8.0
- Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
- Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
- Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
- Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
- Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
- Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
- Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
- Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
- Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
- Name.startswith("avx512.mask.conflict.") || // Added in 9.0
- Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
- Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
- Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
- Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
- Name == "sse.cvtsi2ss" || // Added in 7.0
- Name == "sse.cvtsi642ss" || // Added in 7.0
- Name == "sse2.cvtsi2sd" || // Added in 7.0
- Name == "sse2.cvtsi642sd" || // Added in 7.0
- Name == "sse2.cvtss2sd" || // Added in 7.0
- Name == "sse2.cvtdq2pd" || // Added in 3.9
- Name == "sse2.cvtdq2ps" || // Added in 7.0
- Name == "sse2.cvtps2pd" || // Added in 3.9
- Name == "avx.cvtdq2.pd.256" || // Added in 3.9
- Name == "avx.cvtdq2.ps.256" || // Added in 7.0
- Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
- Name.startswith("vcvtph2ps.") || // Added in 11.0
- Name.startswith("avx.vinsertf128.") || // Added in 3.7
- Name == "avx2.vinserti128" || // Added in 3.7
- Name.startswith("avx512.mask.insert") || // Added in 4.0
- Name.startswith("avx.vextractf128.") || // Added in 3.7
- Name == "avx2.vextracti128" || // Added in 3.7
- Name.startswith("avx512.mask.vextract") || // Added in 4.0
- Name.startswith("sse4a.movnt.") || // Added in 3.9
- Name.startswith("avx.movnt.") || // Added in 3.2
- Name.startswith("avx512.storent.") || // Added in 3.9
- Name == "sse41.movntdqa" || // Added in 5.0
- Name == "avx2.movntdqa" || // Added in 5.0
- Name == "avx512.movntdqa" || // Added in 5.0
- Name == "sse2.storel.dq" || // Added in 3.9
- Name.startswith("sse.storeu.") || // Added in 3.9
- Name.startswith("sse2.storeu.") || // Added in 3.9
- Name.startswith("avx.storeu.") || // Added in 3.9
- Name.startswith("avx512.mask.storeu.") || // Added in 3.9
- Name.startswith("avx512.mask.store.p") || // Added in 3.9
- Name.startswith("avx512.mask.store.b.") || // Added in 3.9
- Name.startswith("avx512.mask.store.w.") || // Added in 3.9
- Name.startswith("avx512.mask.store.d.") || // Added in 3.9
- Name.startswith("avx512.mask.store.q.") || // Added in 3.9
- Name == "avx512.mask.store.ss" || // Added in 7.0
- Name.startswith("avx512.mask.loadu.") || // Added in 3.9
- Name.startswith("avx512.mask.load.") || // Added in 3.9
- Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
- Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
- Name.startswith("avx512.mask.expand.b") || // Added in 9.0
- Name.startswith("avx512.mask.expand.w") || // Added in 9.0
- Name.startswith("avx512.mask.expand.d") || // Added in 9.0
- Name.startswith("avx512.mask.expand.q") || // Added in 9.0
- Name.startswith("avx512.mask.expand.p") || // Added in 9.0
- Name.startswith("avx512.mask.compress.b") || // Added in 9.0
- Name.startswith("avx512.mask.compress.w") || // Added in 9.0
- Name.startswith("avx512.mask.compress.d") || // Added in 9.0
- Name.startswith("avx512.mask.compress.q") || // Added in 9.0
- Name.startswith("avx512.mask.compress.p") || // Added in 9.0
- Name == "sse42.crc32.64.8" || // Added in 3.4
- Name.startswith("avx.vbroadcast.s") || // Added in 3.5
- Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
- Name.startswith("avx512.mask.palignr.") || // Added in 3.9
- Name.startswith("avx512.mask.valign.") || // Added in 4.0
- Name.startswith("sse2.psll.dq") || // Added in 3.7
- Name.startswith("sse2.psrl.dq") || // Added in 3.7
- Name.startswith("avx2.psll.dq") || // Added in 3.7
- Name.startswith("avx2.psrl.dq") || // Added in 3.7
- Name.startswith("avx512.psll.dq") || // Added in 3.9
- Name.startswith("avx512.psrl.dq") || // Added in 3.9
- Name == "sse41.pblendw" || // Added in 3.7
- Name.startswith("sse41.blendp") || // Added in 3.7
- Name.startswith("avx.blend.p") || // Added in 3.7
- Name == "avx2.pblendw" || // Added in 3.7
- Name.startswith("avx2.pblendd.") || // Added in 3.7
- Name.startswith("avx.vbroadcastf128") || // Added in 4.0
- Name == "avx2.vbroadcasti128" || // Added in 3.7
- Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
- Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
- Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
- Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
- Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
- Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
- Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
- Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
- Name == "xop.vpcmov" || // Added in 3.8
- Name == "xop.vpcmov.256" || // Added in 5.0
- Name.startswith("avx512.mask.move.s") || // Added in 4.0
- Name.startswith("avx512.cvtmask2") || // Added in 5.0
- Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
- Name.startswith("xop.vprot") || // Added in 8.0
- Name.startswith("avx512.prol") || // Added in 8.0
- Name.startswith("avx512.pror") || // Added in 8.0
- Name.startswith("avx512.mask.prorv.") || // Added in 8.0
- Name.startswith("avx512.mask.pror.") || // Added in 8.0
- Name.startswith("avx512.mask.prolv.") || // Added in 8.0
- Name.startswith("avx512.mask.prol.") || // Added in 8.0
- Name.startswith("avx512.ptestm") || //Added in 6.0
- Name.startswith("avx512.ptestnm") || //Added in 6.0
- Name.startswith("avx512.mask.pavg")) // Added in 6.0
- return true;
+ if (Name.consume_front("avx."))
+ return (Name.starts_with("blend.p") || // Added in 3.7
+ Name == "cvt.ps2.pd.256" || // Added in 3.9
+ Name == "cvtdq2.pd.256" || // Added in 3.9
+ Name == "cvtdq2.ps.256" || // Added in 7.0
+ Name.starts_with("movnt.") || // Added in 3.2
+ Name.starts_with("sqrt.p") || // Added in 7.0
+ Name.starts_with("storeu.") || // Added in 3.9
+ Name.starts_with("vbroadcast.s") || // Added in 3.5
+ Name.starts_with("vbroadcastf128") || // Added in 4.0
+ Name.starts_with("vextractf128.") || // Added in 3.7
+ Name.starts_with("vinsertf128.") || // Added in 3.7
+ Name.starts_with("vperm2f128.") || // Added in 6.0
+ Name.starts_with("vpermil.")); // Added in 3.1
+
+ if (Name.consume_front("avx2."))
+ return (Name == "movntdqa" || // Added in 5.0
+ Name.starts_with("pabs.") || // Added in 6.0
+ Name.starts_with("padds.") || // Added in 8.0
+ Name.starts_with("paddus.") || // Added in 8.0
+ Name.starts_with("pblendd.") || // Added in 3.7
+ Name == "pblendw" || // Added in 3.7
+ Name.starts_with("pbroadcast") || // Added in 3.8
+ Name.starts_with("pcmpeq.") || // Added in 3.1
+ Name.starts_with("pcmpgt.") || // Added in 3.1
+ Name.starts_with("pmax") || // Added in 3.9
+ Name.starts_with("pmin") || // Added in 3.9
+ Name.starts_with("pmovsx") || // Added in 3.9
+ Name.starts_with("pmovzx") || // Added in 3.9
+ Name == "pmul.dq" || // Added in 7.0
+ Name == "pmulu.dq" || // Added in 7.0
+ Name.starts_with("psll.dq") || // Added in 3.7
+ Name.starts_with("psrl.dq") || // Added in 3.7
+ Name.starts_with("psubs.") || // Added in 8.0
+ Name.starts_with("psubus.") || // Added in 8.0
+ Name.starts_with("vbroadcast") || // Added in 3.8
+ Name == "vbroadcasti128" || // Added in 3.7
+ Name == "vextracti128" || // Added in 3.7
+ Name == "vinserti128" || // Added in 3.7
+ Name == "vperm2i128"); // Added in 6.0
+
+ if (Name.consume_front("avx512.")) {
+ if (Name.consume_front("mask."))
+ // 'avx512.mask.*'
+ return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
+ Name.starts_with("and.") || // Added in 3.9
+ Name.starts_with("andn.") || // Added in 3.9
+ Name.starts_with("broadcast.s") || // Added in 3.9
+ Name.starts_with("broadcastf32x4.") || // Added in 6.0
+ Name.starts_with("broadcastf32x8.") || // Added in 6.0
+ Name.starts_with("broadcastf64x2.") || // Added in 6.0
+ Name.starts_with("broadcastf64x4.") || // Added in 6.0
+ Name.starts_with("broadcasti32x4.") || // Added in 6.0
+ Name.starts_with("broadcasti32x8.") || // Added in 6.0
+ Name.starts_with("broadcasti64x2.") || // Added in 6.0
+ Name.starts_with("broadcasti64x4.") || // Added in 6.0
+ Name.starts_with("cmp.b") || // Added in 5.0
+ Name.starts_with("cmp.d") || // Added in 5.0
+ Name.starts_with("cmp.q") || // Added in 5.0
+ Name.starts_with("cmp.w") || // Added in 5.0
+ Name.starts_with("compress.b") || // Added in 9.0
+ Name.starts_with("compress.d") || // Added in 9.0
+ Name.starts_with("compress.p") || // Added in 9.0
+ Name.starts_with("compress.q") || // Added in 9.0
+ Name.starts_with("compress.store.") || // Added in 7.0
+ Name.starts_with("compress.w") || // Added in 9.0
+ Name.starts_with("conflict.") || // Added in 9.0
+ Name.starts_with("cvtdq2pd.") || // Added in 4.0
+ Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
+ Name == "cvtpd2dq.256" || // Added in 7.0
+ Name == "cvtpd2ps.256" || // Added in 7.0
+ Name == "cvtps2pd.128" || // Added in 7.0
+ Name == "cvtps2pd.256" || // Added in 7.0
+ Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
+ Name == "cvtqq2ps.256" || // Added in 9.0
+ Name == "cvtqq2ps.512" || // Added in 9.0
+ Name == "cvttpd2dq.256" || // Added in 7.0
+ Name == "cvttps2dq.128" || // Added in 7.0
+ Name == "cvttps2dq.256" || // Added in 7.0
+ Name.starts_with("cvtudq2pd.") || // Added in 4.0
+ Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
+ Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
+ Name == "cvtuqq2ps.256" || // Added in 9.0
+ Name == "cvtuqq2ps.512" || // Added in 9.0
+ Name.starts_with("dbpsadbw.") || // Added in 7.0
+ Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
+ Name.starts_with("expand.b") || // Added in 9.0
+ Name.starts_with("expand.d") || // Added in 9.0
+ Name.starts_with("expand.load.") || // Added in 7.0
+ Name.starts_with("expand.p") || // Added in 9.0
+ Name.starts_with("expand.q") || // Added in 9.0
+ Name.starts_with("expand.w") || // Added in 9.0
+ Name.starts_with("fpclass.p") || // Added in 7.0
+ Name.starts_with("insert") || // Added in 4.0
+ Name.starts_with("load.") || // Added in 3.9
+ Name.starts_with("loadu.") || // Added in 3.9
+ Name.starts_with("lzcnt.") || // Added in 5.0
+ Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
+ Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
+ Name.starts_with("movddup") || // Added in 3.9
+ Name.starts_with("move.s") || // Added in 4.0
+ Name.starts_with("movshdup") || // Added in 3.9
+ Name.starts_with("movsldup") || // Added in 3.9
+ Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
+ Name.starts_with("or.") || // Added in 3.9
+ Name.starts_with("pabs.") || // Added in 6.0
+ Name.starts_with("packssdw.") || // Added in 5.0
+ Name.starts_with("packsswb.") || // Added in 5.0
+ Name.starts_with("packusdw.") || // Added in 5.0
+ Name.starts_with("packuswb.") || // Added in 5.0
+ Name.starts_with("padd.") || // Added in 4.0
+ Name.starts_with("padds.") || // Added in 8.0
+ Name.starts_with("paddus.") || // Added in 8.0
+ Name.starts_with("palignr.") || // Added in 3.9
+ Name.starts_with("pand.") || // Added in 3.9
+ Name.starts_with("pandn.") || // Added in 3.9
+ Name.starts_with("pavg") || // Added in 6.0
+ Name.starts_with("pbroadcast") || // Added in 6.0
+ Name.starts_with("pcmpeq.") || // Added in 3.9
+ Name.starts_with("pcmpgt.") || // Added in 3.9
+ Name.starts_with("perm.df.") || // Added in 3.9
+ Name.starts_with("perm.di.") || // Added in 3.9
+ Name.starts_with("permvar.") || // Added in 7.0
+ Name.starts_with("pmaddubs.w.") || // Added in 7.0
+ Name.starts_with("pmaddw.d.") || // Added in 7.0
+ Name.starts_with("pmax") || // Added in 4.0
+ Name.starts_with("pmin") || // Added in 4.0
+ Name == "pmov.qd.256" || // Added in 9.0
+ Name == "pmov.qd.512" || // Added in 9.0
+ Name == "pmov.wb.256" || // Added in 9.0
+ Name == "pmov.wb.512" || // Added in 9.0
+ Name.starts_with("pmovsx") || // Added in 4.0
+ Name.starts_with("pmovzx") || // Added in 4.0
+ Name.starts_with("pmul.dq.") || // Added in 4.0
+ Name.starts_with("pmul.hr.sw.") || // Added in 7.0
+ Name.starts_with("pmulh.w.") || // Added in 7.0
+ Name.starts_with("pmulhu.w.") || // Added in 7.0
+ Name.starts_with("pmull.") || // Added in 4.0
+ Name.starts_with("pmultishift.qb.") || // Added in 8.0
+ Name.starts_with("pmulu.dq.") || // Added in 4.0
+ Name.starts_with("por.") || // Added in 3.9
+ Name.starts_with("prol.") || // Added in 8.0
+ Name.starts_with("prolv.") || // Added in 8.0
+ Name.starts_with("pror.") || // Added in 8.0
+ Name.starts_with("prorv.") || // Added in 8.0
+ Name.starts_with("pshuf.b.") || // Added in 4.0
+ Name.starts_with("pshuf.d.") || // Added in 3.9
+ Name.starts_with("pshufh.w.") || // Added in 3.9
+ Name.starts_with("pshufl.w.") || // Added in 3.9
+ Name.starts_with("psll.d") || // Added in 4.0
+ Name.starts_with("psll.q") || // Added in 4.0
+ Name.starts_with("psll.w") || // Added in 4.0
+ Name.starts_with("pslli") || // Added in 4.0
+ Name.starts_with("psllv") || // Added in 4.0
+ Name.starts_with("psra.d") || // Added in 4.0
+ Name.starts_with("psra.q") || // Added in 4.0
+ Name.starts_with("psra.w") || // Added in 4.0
+ Name.starts_with("psrai") || // Added in 4.0
+ Name.starts_with("psrav") || // Added in 4.0
+ Name.starts_with("psrl.d") || // Added in 4.0
+ Name.starts_with("psrl.q") || // Added in 4.0
+ Name.starts_with("psrl.w") || // Added in 4.0
+ Name.starts_with("psrli") || // Added in 4.0
+ Name.starts_with("psrlv") || // Added in 4.0
+ Name.starts_with("psub.") || // Added in 4.0
+ Name.starts_with("psubs.") || // Added in 8.0
+ Name.starts_with("psubus.") || // Added in 8.0
+ Name.starts_with("pternlog.") || // Added in 7.0
+ Name.starts_with("punpckh") || // Added in 3.9
+ Name.starts_with("punpckl") || // Added in 3.9
+ Name.starts_with("pxor.") || // Added in 3.9
+ Name.starts_with("shuf.f") || // Added in 6.0
+ Name.starts_with("shuf.i") || // Added in 6.0
+ Name.starts_with("shuf.p") || // Added in 4.0
+ Name.starts_with("sqrt.p") || // Added in 7.0
+ Name.starts_with("store.b.") || // Added in 3.9
+ Name.starts_with("store.d.") || // Added in 3.9
+ Name.starts_with("store.p") || // Added in 3.9
+ Name.starts_with("store.q.") || // Added in 3.9
+ Name.starts_with("store.w.") || // Added in 3.9
+ Name == "store.ss" || // Added in 7.0
+ Name.starts_with("storeu.") || // Added in 3.9
+ Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
+ Name.starts_with("ucmp.") || // Added in 5.0
+ Name.starts_with("unpckh.") || // Added in 3.9
+ Name.starts_with("unpckl.") || // Added in 3.9
+ Name.starts_with("valign.") || // Added in 4.0
+ Name == "vcvtph2ps.128" || // Added in 11.0
+ Name == "vcvtph2ps.256" || // Added in 11.0
+ Name.starts_with("vextract") || // Added in 4.0
+ Name.starts_with("vfmadd.") || // Added in 7.0
+ Name.starts_with("vfmaddsub.") || // Added in 7.0
+ Name.starts_with("vfnmadd.") || // Added in 7.0
+ Name.starts_with("vfnmsub.") || // Added in 7.0
+ Name.starts_with("vpdpbusd.") || // Added in 7.0
+ Name.starts_with("vpdpbusds.") || // Added in 7.0
+ Name.starts_with("vpdpwssd.") || // Added in 7.0
+ Name.starts_with("vpdpwssds.") || // Added in 7.0
+ Name.starts_with("vpermi2var.") || // Added in 7.0
+ Name.starts_with("vpermil.p") || // Added in 3.9
+ Name.starts_with("vpermilvar.") || // Added in 4.0
+ Name.starts_with("vpermt2var.") || // Added in 7.0
+ Name.starts_with("vpmadd52") || // Added in 7.0
+ Name.starts_with("vpshld.") || // Added in 7.0
+ Name.starts_with("vpshldv.") || // Added in 8.0
+ Name.starts_with("vpshrd.") || // Added in 7.0
+ Name.starts_with("vpshrdv.") || // Added in 8.0
+ Name.starts_with("vpshufbitqmb.") || // Added in 8.0
+ Name.starts_with("xor.")); // Added in 3.9
+
+ if (Name.consume_front("mask3."))
+ // 'avx512.mask3.*'
+ return (Name.starts_with("vfmadd.") || // Added in 7.0
+ Name.starts_with("vfmaddsub.") || // Added in 7.0
+ Name.starts_with("vfmsub.") || // Added in 7.0
+ Name.starts_with("vfmsubadd.") || // Added in 7.0
+ Name.starts_with("vfnmsub.")); // Added in 7.0
+
+ if (Name.consume_front("maskz."))
+ // 'avx512.maskz.*'
+ return (Name.starts_with("pternlog.") || // Added in 7.0
+ Name.starts_with("vfmadd.") || // Added in 7.0
+ Name.starts_with("vfmaddsub.") || // Added in 7.0
+ Name.starts_with("vpdpbusd.") || // Added in 7.0
+ Name.starts_with("vpdpbusds.") || // Added in 7.0
+ Name.starts_with("vpdpwssd.") || // Added in 7.0
+ Name.starts_with("vpdpwssds.") || // Added in 7.0
+ Name.starts_with("vpermt2var.") || // Added in 7.0
+ Name.starts_with("vpmadd52") || // Added in 7.0
+ Name.starts_with("vpshldv.") || // Added in 8.0
+ Name.starts_with("vpshrdv.")); // Added in 8.0
+
+ // 'avx512.*'
+ return (Name == "movntdqa" || // Added in 5.0
+ Name == "pmul.dq.512" || // Added in 7.0
+ Name == "pmulu.dq.512" || // Added in 7.0
+ Name.starts_with("broadcastm") || // Added in 6.0
+ Name.starts_with("cmp.p") || // Added in 12.0
+ Name.starts_with("cvtb2mask.") || // Added in 7.0
+ Name.starts_with("cvtd2mask.") || // Added in 7.0
+ Name.starts_with("cvtmask2") || // Added in 5.0
+ Name.starts_with("cvtq2mask.") || // Added in 7.0
+ Name == "cvtusi2sd" || // Added in 7.0
+ Name.starts_with("cvtw2mask.") || // Added in 7.0
+ Name == "kand.w" || // Added in 7.0
+ Name == "kandn.w" || // Added in 7.0
+ Name == "knot.w" || // Added in 7.0
+ Name == "kor.w" || // Added in 7.0
+ Name == "kortestc.w" || // Added in 7.0
+ Name == "kortestz.w" || // Added in 7.0
+ Name.starts_with("kunpck") || // added in 6.0
+ Name == "kxnor.w" || // Added in 7.0
+ Name == "kxor.w" || // Added in 7.0
+ Name.starts_with("padds.") || // Added in 8.0
+ Name.starts_with("pbroadcast") || // Added in 3.9
+ Name.starts_with("prol") || // Added in 8.0
+ Name.starts_with("pror") || // Added in 8.0
+ Name.starts_with("psll.dq") || // Added in 3.9
+ Name.starts_with("psrl.dq") || // Added in 3.9
+ Name.starts_with("psubs.") || // Added in 8.0
+ Name.starts_with("ptestm") || // Added in 6.0
+ Name.starts_with("ptestnm") || // Added in 6.0
+ Name.starts_with("storent.") || // Added in 3.9
+ Name.starts_with("vbroadcast.s") || // Added in 7.0
+ Name.starts_with("vpshld.") || // Added in 8.0
+ Name.starts_with("vpshrd.")); // Added in 8.0
+ }
- return false;
+ if (Name.consume_front("fma."))
+ return (Name.starts_with("vfmadd.") || // Added in 7.0
+ Name.starts_with("vfmsub.") || // Added in 7.0
+ Name.starts_with("vfmsubadd.") || // Added in 7.0
+ Name.starts_with("vfnmadd.") || // Added in 7.0
+ Name.starts_with("vfnmsub.")); // Added in 7.0
+
+ if (Name.consume_front("fma4."))
+ return Name.starts_with("vfmadd.s"); // Added in 7.0
+
+ if (Name.consume_front("sse."))
+ return (Name == "add.ss" || // Added in 4.0
+ Name == "cvtsi2ss" || // Added in 7.0
+ Name == "cvtsi642ss" || // Added in 7.0
+ Name == "div.ss" || // Added in 4.0
+ Name == "mul.ss" || // Added in 4.0
+ Name.starts_with("sqrt.p") || // Added in 7.0
+ Name == "sqrt.ss" || // Added in 7.0
+ Name.starts_with("storeu.") || // Added in 3.9
+ Name == "sub.ss"); // Added in 4.0
+
+ if (Name.consume_front("sse2."))
+ return (Name == "add.sd" || // Added in 4.0
+ Name == "cvtdq2pd" || // Added in 3.9
+ Name == "cvtdq2ps" || // Added in 7.0
+ Name == "cvtps2pd" || // Added in 3.9
+ Name == "cvtsi2sd" || // Added in 7.0
+ Name == "cvtsi642sd" || // Added in 7.0
+ Name == "cvtss2sd" || // Added in 7.0
+ Name == "div.sd" || // Added in 4.0
+ Name == "mul.sd" || // Added in 4.0
+ Name.starts_with("padds.") || // Added in 8.0
+ Name.starts_with("paddus.") || // Added in 8.0
+ Name.starts_with("pcmpeq.") || // Added in 3.1
+ Name.starts_with("pcmpgt.") || // Added in 3.1
+ Name == "pmaxs.w" || // Added in 3.9
+ Name == "pmaxu.b" || // Added in 3.9
+ Name == "pmins.w" || // Added in 3.9
+ Name == "pminu.b" || // Added in 3.9
+ Name == "pmulu.dq" || // Added in 7.0
+ Name.starts_with("pshuf") || // Added in 3.9
+ Name.starts_with("psll.dq") || // Added in 3.7
+ Name.starts_with("psrl.dq") || // Added in 3.7
+ Name.starts_with("psubs.") || // Added in 8.0
+ Name.starts_with("psubus.") || // Added in 8.0
+ Name.starts_with("sqrt.p") || // Added in 7.0
+ Name == "sqrt.sd" || // Added in 7.0
+ Name == "storel.dq" || // Added in 3.9
+ Name.starts_with("storeu.") || // Added in 3.9
+ Name == "sub.sd"); // Added in 4.0
+
+ if (Name.consume_front("sse41."))
+ return (Name.starts_with("blendp") || // Added in 3.7
+ Name == "movntdqa" || // Added in 5.0
+ Name == "pblendw" || // Added in 3.7
+ Name == "pmaxsb" || // Added in 3.9
+ Name == "pmaxsd" || // Added in 3.9
+ Name == "pmaxud" || // Added in 3.9
+ Name == "pmaxuw" || // Added in 3.9
+ Name == "pminsb" || // Added in 3.9
+ Name == "pminsd" || // Added in 3.9
+ Name == "pminud" || // Added in 3.9
+ Name == "pminuw" || // Added in 3.9
+ Name.starts_with("pmovsx") || // Added in 3.8
+ Name.starts_with("pmovzx") || // Added in 3.9
+ Name == "pmuldq"); // Added in 7.0
+
+ if (Name.consume_front("sse42."))
+ return Name == "crc32.64.8"; // Added in 3.4
+
+ if (Name.consume_front("sse4a."))
+ return Name.starts_with("movnt."); // Added in 3.9
+
+ if (Name.consume_front("ssse3."))
+ return (Name == "pabs.b.128" || // Added in 6.0
+ Name == "pabs.d.128" || // Added in 6.0
+ Name == "pabs.w.128"); // Added in 6.0
+
+ if (Name.consume_front("xop."))
+ return (Name == "vpcmov" || // Added in 3.8
+ Name == "vpcmov.256" || // Added in 5.0
+ Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
+ Name.starts_with("vprot")); // Added in 8.0
+
+ return (Name == "addcarry.u32" || // Added in 8.0
+ Name == "addcarry.u64" || // Added in 8.0
+ Name == "addcarryx.u32" || // Added in 8.0
+ Name == "addcarryx.u64" || // Added in 8.0
+ Name == "subborrow.u32" || // Added in 8.0
+ Name == "subborrow.u64" || // Added in 8.0
+ Name.starts_with("vcvtph2ps.")); // Added in 11.0
}
static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
Function *&NewFn) {
// Only handle intrinsics that start with "x86.".
- if (!Name.startswith("x86."))
+ if (!Name.consume_front("x86."))
return false;
- // Remove "x86." prefix.
- Name = Name.substr(4);
if (ShouldUpgradeX86Intrinsic(F, Name)) {
NewFn = nullptr;
@@ -475,113 +505,112 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
return true;
}
+ Intrinsic::ID ID;
+
// SSE4.1 ptest functions may have an old signature.
- if (Name.startswith("sse41.ptest")) { // Added in 3.2
- if (Name.substr(11) == "c")
- return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
- if (Name.substr(11) == "z")
- return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
- if (Name.substr(11) == "nzc")
- return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
+ if (Name.consume_front("sse41.ptest")) { // Added in 3.2
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .Case("c", Intrinsic::x86_sse41_ptestc)
+ .Case("z", Intrinsic::x86_sse41_ptestz)
+ .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic)
+ return UpgradePTESTIntrinsic(F, ID, NewFn);
+
+ return false;
}
+
// Several blend and other instructions with masks used the wrong number of
// bits.
- if (Name == "sse41.insertps") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
- NewFn);
- if (Name == "sse41.dppd") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
- NewFn);
- if (Name == "sse41.dpps") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
- NewFn);
- if (Name == "sse41.mpsadbw") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
- NewFn);
- if (Name == "avx.dp.ps.256") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
- NewFn);
- if (Name == "avx2.mpsadbw") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
- NewFn);
- if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
- NewFn);
- if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
- NewFn);
- if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
- NewFn);
- if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
- NewFn);
- if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
- NewFn);
- if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
- NewFn);
- if (Name == "avx512bf16.cvtne2ps2bf16.128") // Added in 9.0
- return UpgradeX86BF16Intrinsic(
- F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128, NewFn);
- if (Name == "avx512bf16.cvtne2ps2bf16.256") // Added in 9.0
- return UpgradeX86BF16Intrinsic(
- F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256, NewFn);
- if (Name == "avx512bf16.cvtne2ps2bf16.512") // Added in 9.0
- return UpgradeX86BF16Intrinsic(
- F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512, NewFn);
- if (Name == "avx512bf16.mask.cvtneps2bf16.128") // Added in 9.0
- return UpgradeX86BF16Intrinsic(
- F, Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128, NewFn);
- if (Name == "avx512bf16.cvtneps2bf16.256") // Added in 9.0
- return UpgradeX86BF16Intrinsic(
- F, Intrinsic::x86_avx512bf16_cvtneps2bf16_256, NewFn);
- if (Name == "avx512bf16.cvtneps2bf16.512") // Added in 9.0
- return UpgradeX86BF16Intrinsic(
- F, Intrinsic::x86_avx512bf16_cvtneps2bf16_512, NewFn);
- if (Name == "avx512bf16.dpbf16ps.128") // Added in 9.0
- return UpgradeX86BF16DPIntrinsic(
- F, Intrinsic::x86_avx512bf16_dpbf16ps_128, NewFn);
- if (Name == "avx512bf16.dpbf16ps.256") // Added in 9.0
- return UpgradeX86BF16DPIntrinsic(
- F, Intrinsic::x86_avx512bf16_dpbf16ps_256, NewFn);
- if (Name == "avx512bf16.dpbf16ps.512") // Added in 9.0
- return UpgradeX86BF16DPIntrinsic(
- F, Intrinsic::x86_avx512bf16_dpbf16ps_512, NewFn);
-
- // frcz.ss/sd may need to have an argument dropped. Added in 3.2
- if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::x86_xop_vfrcz_ss);
- return true;
+
+ // Added in 3.6
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
+ .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
+ .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
+ .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
+ .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
+ .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic)
+ return UpgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
+
+ if (Name.consume_front("avx512.mask.cmp.")) {
+ // Added in 7.0
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
+ .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
+ .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
+ .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
+ .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
+ .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic)
+ return UpgradeX86MaskedFPCompare(F, ID, NewFn);
+ return false; // No other 'x86.avx523.mask.cmp.*'.
}
- if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::x86_xop_vfrcz_sd);
- return true;
+
+ if (Name.consume_front("avx512bf16.")) {
+ // Added in 9.0
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .Case("cvtne2ps2bf16.128",
+ Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
+ .Case("cvtne2ps2bf16.256",
+ Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
+ .Case("cvtne2ps2bf16.512",
+ Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
+ .Case("mask.cvtneps2bf16.128",
+ Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
+ .Case("cvtneps2bf16.256",
+ Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
+ .Case("cvtneps2bf16.512",
+ Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic)
+ return UpgradeX86BF16Intrinsic(F, ID, NewFn);
+
+ // Added in 9.0
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
+ .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
+ .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic)
+ return UpgradeX86BF16DPIntrinsic(F, ID, NewFn);
+ return false; // No other 'x86.avx512bf16.*'.
}
- // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
- if (Name.startswith("xop.vpermil2")) { // Added in 3.9
- auto Idx = F->getFunctionType()->getParamType(2);
- if (Idx->isFPOrFPVectorTy()) {
+
+ if (Name.consume_front("xop.")) {
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ if (Name.starts_with("vpermil2")) { // Added in 3.9
+ // Upgrade any XOP PERMIL2 index operand still using a float/double
+ // vector.
+ auto Idx = F->getFunctionType()->getParamType(2);
+ if (Idx->isFPOrFPVectorTy()) {
+ unsigned IdxSize = Idx->getPrimitiveSizeInBits();
+ unsigned EltSize = Idx->getScalarSizeInBits();
+ if (EltSize == 64 && IdxSize == 128)
+ ID = Intrinsic::x86_xop_vpermil2pd;
+ else if (EltSize == 32 && IdxSize == 128)
+ ID = Intrinsic::x86_xop_vpermil2ps;
+ else if (EltSize == 64 && IdxSize == 256)
+ ID = Intrinsic::x86_xop_vpermil2pd_256;
+ else
+ ID = Intrinsic::x86_xop_vpermil2ps_256;
+ }
+ } else if (F->arg_size() == 2)
+ // frcz.ss/sd may need to have an argument dropped. Added in 3.2
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
+ .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
+ .Default(Intrinsic::not_intrinsic);
+
+ if (ID != Intrinsic::not_intrinsic) {
rename(F);
- unsigned IdxSize = Idx->getPrimitiveSizeInBits();
- unsigned EltSize = Idx->getScalarSizeInBits();
- Intrinsic::ID Permil2ID;
- if (EltSize == 64 && IdxSize == 128)
- Permil2ID = Intrinsic::x86_xop_vpermil2pd;
- else if (EltSize == 32 && IdxSize == 128)
- Permil2ID = Intrinsic::x86_xop_vpermil2ps;
- else if (EltSize == 64 && IdxSize == 256)
- Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
- else
- Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
- NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
return true;
}
+ return false; // No other 'x86.xop.*'
}
if (Name == "seh.recoverfp") {
@@ -593,93 +622,108 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
}
static Intrinsic::ID ShouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
- return StringSwitch<Intrinsic::ID>(Name)
- .Case("abs.bf16", Intrinsic::nvvm_abs_bf16)
- .Case("abs.bf16x2", Intrinsic::nvvm_abs_bf16x2)
- .Case("fma.rn.bf16", Intrinsic::nvvm_fma_rn_bf16)
- .Case("fma.rn.bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
- .Case("fma.rn.ftz_bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
- .Case("fma.rn.ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
- .Case("fma.rn.ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
- .Case("fma.rn.ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
- .Case("fma.rn.ftz_sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
- .Case("fma.rn.ftz_sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
- .Case("fma.rn.relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
- .Case("fma.rn.relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
- .Case("fma.rn.sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
- .Case("fma.rn.sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
- .Case("fmax.bf16", Intrinsic::nvvm_fmax_bf16)
- .Case("fmax.bf16x2", Intrinsic::nvvm_fmax_bf16x2)
- .Case("fmax.ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
- .Case("fmax.ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
- .Case("fmax.ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
- .Case("fmax.ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
- .Case("fmax.ftz.nan.xorsign.abs.bf16",
- Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
- .Case("fmax.ftz.nan.xorsign.abs.bf16x2",
- Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
- .Case("fmax.ftz.xorsign.abs.bf16",
- Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
- .Case("fmax.ftz.xorsign.abs.bf16x2",
- Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
- .Case("fmax.nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
- .Case("fmax.nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
- .Case("fmax.nan.xorsign.abs.bf16",
- Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
- .Case("fmax.nan.xorsign.abs.bf16x2",
- Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
- .Case("fmax.xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
- .Case("fmax.xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
- .Case("fmin.bf16", Intrinsic::nvvm_fmin_bf16)
- .Case("fmin.bf16x2", Intrinsic::nvvm_fmin_bf16x2)
- .Case("fmin.ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
- .Case("fmin.ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
- .Case("fmin.ftz.nan_bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
- .Case("fmin.ftz.nan_bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
- .Case("fmin.ftz.nan.xorsign.abs.bf16",
- Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
- .Case("fmin.ftz.nan.xorsign.abs.bf16x2",
- Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
- .Case("fmin.ftz.xorsign.abs.bf16",
- Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
- .Case("fmin.ftz.xorsign.abs.bf16x2",
- Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
- .Case("fmin.nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
- .Case("fmin.nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
- .Case("fmin.nan.xorsign.abs.bf16",
- Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
- .Case("fmin.nan.xorsign.abs.bf16x2",
- Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
- .Case("fmin.xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
- .Case("fmin.xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
- .Case("neg.bf16", Intrinsic::nvvm_neg_bf16)
- .Case("neg.bf16x2", Intrinsic::nvvm_neg_bf16x2)
- .Default(Intrinsic::not_intrinsic);
+ if (Name.consume_front("abs."))
+ return StringSwitch<Intrinsic::ID>(Name)
+ .Case("bf16", Intrinsic::nvvm_abs_bf16)
+ .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
+ .Default(Intrinsic::not_intrinsic);
+
+ if (Name.consume_front("fma.rn."))
+ return StringSwitch<Intrinsic::ID>(Name)
+ .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
+ .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
+ .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
+ .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
+ .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
+ .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
+ .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
+ .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
+ .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
+ .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
+ .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
+ .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
+ .Default(Intrinsic::not_intrinsic);
+
+ if (Name.consume_front("fmax."))
+ return StringSwitch<Intrinsic::ID>(Name)
+ .Case("bf16", Intrinsic::nvvm_fmax_bf16)
+ .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
+ .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
+ .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
+ .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
+ .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
+ .Case("ftz.nan.xorsign.abs.bf16",
+ Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
+ .Case("ftz.nan.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
+ .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
+ .Case("ftz.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
+ .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
+ .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
+ .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
+ .Case("nan.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
+ .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
+ .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
+ .Default(Intrinsic::not_intrinsic);
+
+ if (Name.consume_front("fmin."))
+ return StringSwitch<Intrinsic::ID>(Name)
+ .Case("bf16", Intrinsic::nvvm_fmin_bf16)
+ .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
+ .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
+ .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
+ .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
+ .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
+ .Case("ftz.nan.xorsign.abs.bf16",
+ Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
+ .Case("ftz.nan.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
+ .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
+ .Case("ftz.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
+ .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
+ .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
+ .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
+ .Case("nan.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
+ .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
+ .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
+ .Default(Intrinsic::not_intrinsic);
+
+ if (Name.consume_front("neg."))
+ return StringSwitch<Intrinsic::ID>(Name)
+ .Case("bf16", Intrinsic::nvvm_neg_bf16)
+ .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
+ .Default(Intrinsic::not_intrinsic);
+
+ return Intrinsic::not_intrinsic;
}
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
- // Quickly eliminate it, if it's not a candidate.
StringRef Name = F->getName();
- if (Name.size() <= 7 || !Name.startswith("llvm."))
+
+ // Quickly eliminate it, if it's not a candidate.
+ if (!Name.consume_front("llvm.") || Name.empty())
return false;
- Name = Name.substr(5); // Strip off "llvm."
switch (Name[0]) {
default: break;
case 'a': {
- if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
+ if (Name.starts_with("arm.rbit") || Name.starts_with("aarch64.rbit")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
return true;
}
- if (Name.startswith("aarch64.neon.frintn")) {
+ if (Name.starts_with("aarch64.neon.frintn")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
return true;
}
- if (Name.startswith("aarch64.neon.rbit")) {
+ if (Name.starts_with("aarch64.neon.rbit")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
return true;
@@ -715,13 +759,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Ty);
return true;
}
- if (Name.startswith("aarch64.sve.tuple.get")) {
+ if (Name.starts_with("aarch64.sve.tuple.get")) {
Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::vector_extract, Tys);
return true;
}
- if (Name.startswith("aarch64.sve.tuple.set")) {
+ if (Name.starts_with("aarch64.sve.tuple.set")) {
auto Args = F->getFunctionType()->params();
Type *Tys[] = {Args[0], Args[2], Args[1]};
NewFn = Intrinsic::getDeclaration(F->getParent(),
@@ -737,20 +781,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Intrinsic::vector_insert, Tys);
return true;
}
- if (Name.startswith("arm.neon.vclz")) {
- Type* args[2] = {
- F->arg_begin()->getType(),
- Type::getInt1Ty(F->getContext())
- };
- // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
- // the end of the name. Change name from llvm.arm.neon.vclz.* to
- // llvm.ctlz.*
- FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
- NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
- "llvm.ctlz." + Name.substr(14), F->getParent());
+ if (Name.starts_with("arm.neon.vclz")) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ F->arg_begin()->getType());
return true;
}
- if (Name.startswith("arm.neon.vcnt")) {
+ if (Name.starts_with("arm.neon.vcnt")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
F->arg_begin()->getType());
return true;
@@ -781,27 +817,27 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
return true;
}
- if (Name.startswith("arm.neon.vqadds.")) {
+ if (Name.starts_with("arm.neon.vqadds.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
F->arg_begin()->getType());
return true;
}
- if (Name.startswith("arm.neon.vqaddu.")) {
+ if (Name.starts_with("arm.neon.vqaddu.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
F->arg_begin()->getType());
return true;
}
- if (Name.startswith("arm.neon.vqsubs.")) {
+ if (Name.starts_with("arm.neon.vqsubs.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
F->arg_begin()->getType());
return true;
}
- if (Name.startswith("arm.neon.vqsubu.")) {
+ if (Name.starts_with("arm.neon.vqsubu.")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
F->arg_begin()->getType());
return true;
}
- if (Name.startswith("aarch64.neon.addp")) {
+ if (Name.starts_with("aarch64.neon.addp")) {
if (F->arg_size() != 2)
break; // Invalid IR.
VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
@@ -814,9 +850,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
// respectively
- if ((Name.startswith("arm.neon.bfdot.") ||
- Name.startswith("aarch64.neon.bfdot.")) &&
- Name.endswith("i8")) {
+ if ((Name.starts_with("arm.neon.bfdot.") ||
+ Name.starts_with("aarch64.neon.bfdot.")) &&
+ Name.ends_with("i8")) {
Intrinsic::ID IID =
StringSwitch<Intrinsic::ID>(Name)
.Cases("arm.neon.bfdot.v2f32.v8i8",
@@ -843,9 +879,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
// and accept v8bf16 instead of v16i8
- if ((Name.startswith("arm.neon.bfm") ||
- Name.startswith("aarch64.neon.bfm")) &&
- Name.endswith(".v4f32.v16i8")) {
+ if ((Name.starts_with("arm.neon.bfm") ||
+ Name.starts_with("aarch64.neon.bfm")) &&
+ Name.ends_with(".v4f32.v16i8")) {
Intrinsic::ID IID =
StringSwitch<Intrinsic::ID>(Name)
.Case("arm.neon.bfmmla.v4f32.v16i8",
@@ -897,150 +933,139 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
return true;
- if (Name.startswith("amdgcn."))
- Name = Name.substr(7); // Strip off "amdgcn."
+ if (Name.consume_front("amdgcn.")) {
+ if (Name == "alignbit") {
+ // Target specific intrinsic became redundant
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
+ {F->getReturnType()});
+ return true;
+ }
- if (Name == "alignbit") {
- // Target specific intrinsic became redundant
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
- {F->getReturnType()});
- return true;
- }
+ if (Name.consume_front("atomic.")) {
+ if (Name.starts_with("inc") || Name.starts_with("dec")) {
+ // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
+ // there's no new declaration.
+ NewFn = nullptr;
+ return true;
+ }
+ break; // No other 'amdgcn.atomic.*'
+ }
- if (Name.startswith("atomic.inc") || Name.startswith("atomic.dec")) {
- // This was replaced with atomicrmw uinc_wrap and udec_wrap, so there's no
- // new declaration.
- NewFn = nullptr;
- return true;
+ if (Name.starts_with("ldexp.")) {
+ // Target specific intrinsic became redundant
+ NewFn = Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::ldexp,
+ {F->getReturnType(), F->getArg(1)->getType()});
+ return true;
+ }
+ break; // No other 'amdgcn.*'
}
break;
}
case 'c': {
- if (Name.startswith("ctlz.") && F->arg_size() == 1) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
- F->arg_begin()->getType());
- return true;
- }
- if (Name.startswith("cttz.") && F->arg_size() == 1) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
- F->arg_begin()->getType());
- return true;
- }
- break;
- }
- case 'd': {
- if (Name == "dbg.addr") {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
- return true;
- }
- if (Name == "dbg.value" && F->arg_size() == 4) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
- return true;
- }
- break;
- }
- case 'e': {
- if (Name.startswith("experimental.vector.extract.")) {
- rename(F);
- Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::vector_extract, Tys);
- return true;
+ if (F->arg_size() == 1) {
+ Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
+ .StartsWith("ctlz.", Intrinsic::ctlz)
+ .StartsWith("cttz.", Intrinsic::cttz)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
+ F->arg_begin()->getType());
+ return true;
+ }
}
- if (Name.startswith("experimental.vector.insert.")) {
+ if (F->arg_size() == 2 && Name.equals("coro.end")) {
rename(F);
- auto Args = F->getFunctionType()->params();
- Type *Tys[] = {Args[0], Args[1]};
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::vector_insert, Tys);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
return true;
}
- SmallVector<StringRef, 2> Groups;
- static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
- if (R.match(Name, &Groups)) {
- Intrinsic::ID ID;
- ID = StringSwitch<Intrinsic::ID>(Groups[1])
- .Case("add", Intrinsic::vector_reduce_add)
- .Case("mul", Intrinsic::vector_reduce_mul)
- .Case("and", Intrinsic::vector_reduce_and)
- .Case("or", Intrinsic::vector_reduce_or)
- .Case("xor", Intrinsic::vector_reduce_xor)
- .Case("smax", Intrinsic::vector_reduce_smax)
- .Case("smin", Intrinsic::vector_reduce_smin)
- .Case("umax", Intrinsic::vector_reduce_umax)
- .Case("umin", Intrinsic::vector_reduce_umin)
- .Case("fmax", Intrinsic::vector_reduce_fmax)
- .Case("fmin", Intrinsic::vector_reduce_fmin)
- .Default(Intrinsic::not_intrinsic);
- if (ID != Intrinsic::not_intrinsic) {
+ break;
+ }
+ case 'd':
+ if (Name.consume_front("dbg.")) {
+ if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
rename(F);
- auto Args = F->getFunctionType()->params();
- NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
return true;
}
+ break; // No other 'dbg.*'.
}
- static const Regex R2(
- "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
- Groups.clear();
- if (R2.match(Name, &Groups)) {
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
- if (Groups[1] == "fadd")
- ID = Intrinsic::vector_reduce_fadd;
- if (Groups[1] == "fmul")
- ID = Intrinsic::vector_reduce_fmul;
+ break;
+ case 'e':
+ if (Name.consume_front("experimental.vector.")) {
+ Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
+ .StartsWith("extract.", Intrinsic::vector_extract)
+ .StartsWith("insert.", Intrinsic::vector_insert)
+ .Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
+ const auto *FT = F->getFunctionType();
+ SmallVector<Type *, 2> Tys;
+ if (ID == Intrinsic::vector_extract)
+ // Extracting overloads the return type.
+ Tys.push_back(FT->getReturnType());
+ Tys.push_back(FT->getParamType(0));
+ if (ID == Intrinsic::vector_insert)
+ // Inserting overloads the inserted type.
+ Tys.push_back(FT->getParamType(1));
rename(F);
- auto Args = F->getFunctionType()->params();
- Type *Tys[] = {Args[1]};
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
return true;
}
+
+ if (Name.consume_front("reduce.")) {
+ SmallVector<StringRef, 2> Groups;
+ static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
+ if (R.match(Name, &Groups))
+ ID = StringSwitch<Intrinsic::ID>(Groups[1])
+ .Case("add", Intrinsic::vector_reduce_add)
+ .Case("mul", Intrinsic::vector_reduce_mul)
+ .Case("and", Intrinsic::vector_reduce_and)
+ .Case("or", Intrinsic::vector_reduce_or)
+ .Case("xor", Intrinsic::vector_reduce_xor)
+ .Case("smax", Intrinsic::vector_reduce_smax)
+ .Case("smin", Intrinsic::vector_reduce_smin)
+ .Case("umax", Intrinsic::vector_reduce_umax)
+ .Case("umin", Intrinsic::vector_reduce_umin)
+ .Case("fmax", Intrinsic::vector_reduce_fmax)
+ .Case("fmin", Intrinsic::vector_reduce_fmin)
+ .Default(Intrinsic::not_intrinsic);
+
+ bool V2 = false;
+ if (ID == Intrinsic::not_intrinsic) {
+ static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
+ Groups.clear();
+ V2 = true;
+ if (R2.match(Name, &Groups))
+ ID = StringSwitch<Intrinsic::ID>(Groups[1])
+ .Case("fadd", Intrinsic::vector_reduce_fadd)
+ .Case("fmul", Intrinsic::vector_reduce_fmul)
+ .Default(Intrinsic::not_intrinsic);
+ }
+ if (ID != Intrinsic::not_intrinsic) {
+ rename(F);
+ auto Args = F->getFunctionType()->params();
+ NewFn =
+ Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
+ return true;
+ }
+ break; // No other 'expermental.vector.reduce.*'.
+ }
+ break; // No other 'experimental.vector.*'.
}
- break;
- }
+ break; // No other 'e*'.
case 'f':
- if (Name.startswith("flt.rounds")) {
+ if (Name.starts_with("flt.rounds")) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
return true;
}
break;
case 'i':
- case 'l': {
- bool IsLifetimeStart = Name.startswith("lifetime.start");
- if (IsLifetimeStart || Name.startswith("invariant.start")) {
- Intrinsic::ID ID = IsLifetimeStart ?
- Intrinsic::lifetime_start : Intrinsic::invariant_start;
- auto Args = F->getFunctionType()->params();
- Type* ObjectPtr[1] = {Args[1]};
- if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
- return true;
- }
- }
-
- bool IsLifetimeEnd = Name.startswith("lifetime.end");
- if (IsLifetimeEnd || Name.startswith("invariant.end")) {
- Intrinsic::ID ID = IsLifetimeEnd ?
- Intrinsic::lifetime_end : Intrinsic::invariant_end;
-
- auto Args = F->getFunctionType()->params();
- Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
- if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
- return true;
- }
- }
- if (Name.startswith("invariant.group.barrier")) {
+ if (Name.starts_with("invariant.group.barrier")) {
// Rename invariant.group.barrier to launder.invariant.group
auto Args = F->getFunctionType()->params();
Type* ObjectPtr[1] = {Args[0]};
@@ -1048,78 +1073,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::launder_invariant_group, ObjectPtr);
return true;
-
}
-
break;
- }
case 'm': {
- if (Name.startswith("masked.load.")) {
- Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
- if (F->getName() !=
- Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::masked_load,
- Tys);
- return true;
- }
- }
- if (Name.startswith("masked.store.")) {
- auto Args = F->getFunctionType()->params();
- Type *Tys[] = { Args[0], Args[1] };
- if (F->getName() !=
- Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::masked_store,
- Tys);
- return true;
- }
- }
- // Renaming gather/scatter intrinsics with no address space overloading
- // to the new overload which includes an address space
- if (Name.startswith("masked.gather.")) {
- Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
- if (F->getName() !=
- Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::masked_gather, Tys);
- return true;
- }
- }
- if (Name.startswith("masked.scatter.")) {
- auto Args = F->getFunctionType()->params();
- Type *Tys[] = {Args[0], Args[1]};
- if (F->getName() !=
- Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::masked_scatter, Tys);
- return true;
- }
- }
// Updating the memory intrinsics (memcpy/memmove/memset) that have an
// alignment parameter to embedding the alignment as an attribute of
// the pointer args.
- if (Name.startswith("memcpy.") && F->arg_size() == 5) {
- rename(F);
- // Get the types of dest, src, and len
- ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
- ParamTypes);
- return true;
- }
- if (Name.startswith("memmove.") && F->arg_size() == 5) {
- rename(F);
- // Get the types of dest, src, and len
- ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
- ParamTypes);
- return true;
+ if (unsigned ID = StringSwitch<unsigned>(Name)
+ .StartsWith("memcpy.", Intrinsic::memcpy)
+ .StartsWith("memmove.", Intrinsic::memmove)
+ .Default(0)) {
+ if (F->arg_size() == 5) {
+ rename(F);
+ // Get the types of dest, src, and len
+ ArrayRef<Type *> ParamTypes =
+ F->getFunctionType()->params().slice(0, 3);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
+ return true;
+ }
}
- if (Name.startswith("memset.") && F->arg_size() == 5) {
+ if (Name.starts_with("memset.") && F->arg_size() == 5) {
rename(F);
// Get the types of dest, and len
const auto *FT = F->getFunctionType();
@@ -1134,49 +1107,62 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
break;
}
case 'n': {
- if (Name.startswith("nvvm.")) {
- Name = Name.substr(5);
-
- // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
- Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
- .Cases("brev32", "brev64", Intrinsic::bitreverse)
- .Case("clz.i", Intrinsic::ctlz)
- .Case("popc.i", Intrinsic::ctpop)
- .Default(Intrinsic::not_intrinsic);
- if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
- NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
- {F->getReturnType()});
- return true;
+ if (Name.consume_front("nvvm.")) {
+ // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
+ if (F->arg_size() == 1) {
+ Intrinsic::ID IID =
+ StringSwitch<Intrinsic::ID>(Name)
+ .Cases("brev32", "brev64", Intrinsic::bitreverse)
+ .Case("clz.i", Intrinsic::ctlz)
+ .Case("popc.i", Intrinsic::ctpop)
+ .Default(Intrinsic::not_intrinsic);
+ if (IID != Intrinsic::not_intrinsic) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
+ {F->getReturnType()});
+ return true;
+ }
}
- IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
- if (IID != Intrinsic::not_intrinsic &&
- !F->getReturnType()->getScalarType()->isBFloatTy()) {
- NewFn = nullptr;
- return true;
+
+ // Check for nvvm intrinsics that need a return type adjustment.
+ if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
+ Intrinsic::ID IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
+ if (IID != Intrinsic::not_intrinsic) {
+ NewFn = nullptr;
+ return true;
+ }
}
+
// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
//
// TODO: We could add lohi.i2d.
- bool Expand = StringSwitch<bool>(Name)
- .Cases("abs.i", "abs.ll", true)
- .Cases("clz.ll", "popc.ll", "h2f", true)
- .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
- .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
- .StartsWith("atomic.load.add.f32.p", true)
- .StartsWith("atomic.load.add.f64.p", true)
- .Default(false);
+ bool Expand = false;
+ if (Name.consume_front("abs."))
+ // nvvm.abs.{i,ii}
+ Expand = Name == "i" || Name == "ll";
+ else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
+ Expand = true;
+ else if (Name.consume_front("max.") || Name.consume_front("min."))
+ // nvvm.{min,max}.{i,ii,ui,ull}
+ Expand = Name == "i" || Name == "ll" || Name == "ui" || Name == "ull";
+ else if (Name.consume_front("atomic.load.add."))
+ // nvvm.atomic.load.add.{f32.p,f64.p}
+ Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
+ else
+ Expand = false;
+
if (Expand) {
NewFn = nullptr;
return true;
}
+ break; // No other 'nvvm.*'.
}
break;
}
case 'o':
// We only need to change the name to match the mangling including the
// address space.
- if (Name.startswith("objectsize.")) {
+ if (Name.starts_with("objectsize.")) {
Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
if (F->arg_size() == 2 || F->arg_size() == 3 ||
F->getName() !=
@@ -1190,17 +1176,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
break;
case 'p':
- if (Name == "prefetch") {
- // Handle address space overloading.
- Type *Tys[] = {F->arg_begin()->getType()};
- if (F->getName() !=
- Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
- rename(F);
- NewFn =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
- return true;
- }
- } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
+ if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
rename(F);
NewFn = Intrinsic::getDeclaration(
F->getParent(), Intrinsic::ptr_annotation,
@@ -1209,86 +1185,57 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
- case 'r':
- if (Name == "riscv.aes32dsi" &&
- !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32dsi);
- return true;
- }
- if (Name == "riscv.aes32dsmi" &&
- !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32dsmi);
- return true;
- }
- if (Name == "riscv.aes32esi" &&
- !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32esi);
- return true;
- }
- if (Name == "riscv.aes32esmi" &&
- !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32esmi);
- return true;
- }
- if (Name.startswith("riscv.sm4ks") &&
- (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
- F->getFunctionType()->getReturnType()->isIntegerTy(64))) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm4ks);
- return true;
- }
- if (Name.startswith("riscv.sm4ed") &&
- (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
- F->getFunctionType()->getReturnType()->isIntegerTy(64))) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm4ed);
- return true;
- }
- if (Name.startswith("riscv.sha256sig0") &&
- F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::riscv_sha256sig0);
- return true;
- }
- if (Name.startswith("riscv.sha256sig1") &&
- F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::riscv_sha256sig1);
- return true;
- }
- if (Name.startswith("riscv.sha256sum0") &&
- F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::riscv_sha256sum0);
- return true;
- }
- if (Name.startswith("riscv.sha256sum1") &&
- F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::riscv_sha256sum1);
- return true;
- }
- if (Name.startswith("riscv.sm3p0") &&
- F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm3p0);
- return true;
- }
- if (Name.startswith("riscv.sm3p1") &&
- F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm3p1);
- return true;
+ case 'r': {
+ if (Name.consume_front("riscv.")) {
+ Intrinsic::ID ID;
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
+ .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
+ .Case("aes32esi", Intrinsic::riscv_aes32esi)
+ .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic) {
+ if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
+ return true;
+ }
+ break; // No other applicable upgrades.
+ }
+
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
+ .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic) {
+ if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
+ F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
+ return true;
+ }
+ break; // No other applicable upgrades.
+ }
+
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
+ .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
+ .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
+ .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
+ .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
+ .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic) {
+ if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
+ return true;
+ }
+ break; // No other applicable upgrades.
+ }
+ break; // No other 'riscv.*' intrinsics
}
- break;
+ } break;
case 's':
if (Name == "stackprotectorcheck") {
@@ -1309,36 +1256,34 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
case 'w':
- if (Name.startswith("wasm.fma.")) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(
- F->getParent(), Intrinsic::wasm_relaxed_madd, F->getReturnType());
- return true;
- }
- if (Name.startswith("wasm.fms.")) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(
- F->getParent(), Intrinsic::wasm_relaxed_nmadd, F->getReturnType());
- return true;
- }
- if (Name.startswith("wasm.laneselect.")) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(
- F->getParent(), Intrinsic::wasm_relaxed_laneselect,
- F->getReturnType());
- return true;
- }
- if (Name == "wasm.dot.i8x16.i7x16.signed") {
- rename(F);
- NewFn = Intrinsic::getDeclaration(
- F->getParent(), Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
- return true;
- }
- if (Name == "wasm.dot.i8x16.i7x16.add.signed") {
- rename(F);
- NewFn = Intrinsic::getDeclaration(
- F->getParent(), Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
- return true;
+ if (Name.consume_front("wasm.")) {
+ Intrinsic::ID ID =
+ StringSwitch<Intrinsic::ID>(Name)
+ .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
+ .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
+ .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic) {
+ rename(F);
+ NewFn =
+ Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
+ return true;
+ }
+
+ if (Name.consume_front("dot.i8x16.i7x16.")) {
+ ID = StringSwitch<Intrinsic::ID>(Name)
+ .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
+ .Case("add.signed",
+ Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
+ return true;
+ }
+ break; // No other 'wasm.dot.i8x16.i7x16.*'.
+ }
+ break; // No other 'wasm.*'.
}
break;
@@ -1348,7 +1293,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
auto *ST = dyn_cast<StructType>(F->getReturnType());
- if (ST && (!ST->isLiteral() || ST->isPacked())) {
+ if (ST && (!ST->isLiteral() || ST->isPacked()) &&
+ F->getIntrinsicID() != Intrinsic::not_intrinsic) {
// Replace return type with literal non-packed struct. Only do this for
// intrinsics declared to return a struct, not for intrinsics with
// overloaded return type, in which case the exact struct type will be
@@ -1413,15 +1359,15 @@ GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
LLVMContext &C = GV->getContext();
IRBuilder<> IRB(C);
auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
- IRB.getInt8PtrTy());
+ IRB.getPtrTy());
Constant *Init = GV->getInitializer();
unsigned N = Init->getNumOperands();
std::vector<Constant *> NewCtors(N);
for (unsigned i = 0; i != N; ++i) {
auto Ctor = cast<Constant>(Init->getOperand(i));
- NewCtors[i] = ConstantStruct::get(
- EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
- Constant::getNullValue(IRB.getInt8PtrTy()));
+ NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
+ Ctor->getAggregateElement(1),
+ Constant::getNullValue(IRB.getPtrTy()));
}
Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
@@ -1947,7 +1893,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
unsigned EltWidth = CI.getType()->getScalarSizeInBits();
Intrinsic::ID IID;
- if (Name.startswith("max.p")) {
+ if (Name.starts_with("max.p")) {
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_sse_max_ps;
else if (VecWidth == 128 && EltWidth == 64)
@@ -1958,7 +1904,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx_max_pd_256;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("min.p")) {
+ } else if (Name.starts_with("min.p")) {
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_sse_min_ps;
else if (VecWidth == 128 && EltWidth == 64)
@@ -1969,7 +1915,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx_min_pd_256;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pshuf.b.")) {
+ } else if (Name.starts_with("pshuf.b.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pshuf_b_128;
else if (VecWidth == 256)
@@ -1978,7 +1924,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_pshuf_b_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pmul.hr.sw.")) {
+ } else if (Name.starts_with("pmul.hr.sw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
else if (VecWidth == 256)
@@ -1987,7 +1933,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pmulh.w.")) {
+ } else if (Name.starts_with("pmulh.w.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_pmulh_w;
else if (VecWidth == 256)
@@ -1996,7 +1942,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_pmulh_w_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pmulhu.w.")) {
+ } else if (Name.starts_with("pmulhu.w.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_pmulhu_w;
else if (VecWidth == 256)
@@ -2005,7 +1951,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_pmulhu_w_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pmaddw.d.")) {
+ } else if (Name.starts_with("pmaddw.d.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_pmadd_wd;
else if (VecWidth == 256)
@@ -2014,7 +1960,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_pmaddw_d_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pmaddubs.w.")) {
+ } else if (Name.starts_with("pmaddubs.w.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
else if (VecWidth == 256)
@@ -2023,7 +1969,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_pmaddubs_w_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("packsswb.")) {
+ } else if (Name.starts_with("packsswb.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_packsswb_128;
else if (VecWidth == 256)
@@ -2032,7 +1978,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_packsswb_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("packssdw.")) {
+ } else if (Name.starts_with("packssdw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_packssdw_128;
else if (VecWidth == 256)
@@ -2041,7 +1987,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_packssdw_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("packuswb.")) {
+ } else if (Name.starts_with("packuswb.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse2_packuswb_128;
else if (VecWidth == 256)
@@ -2050,7 +1996,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_packuswb_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("packusdw.")) {
+ } else if (Name.starts_with("packusdw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_sse41_packusdw;
else if (VecWidth == 256)
@@ -2059,7 +2005,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_packusdw_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("vpermilvar.")) {
+ } else if (Name.starts_with("vpermilvar.")) {
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_avx_vpermilvar_ps;
else if (VecWidth == 128 && EltWidth == 64)
@@ -2084,7 +2030,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_sse2_cvttps2dq;
} else if (Name == "cvttps2dq.256") {
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
- } else if (Name.startswith("permvar.")) {
+ } else if (Name.starts_with("permvar.")) {
bool IsFloat = CI.getType()->isFPOrFPVectorTy();
if (VecWidth == 256 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx2_permps;
@@ -2116,7 +2062,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_permvar_qi_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("dbpsadbw.")) {
+ } else if (Name.starts_with("dbpsadbw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_avx512_dbpsadbw_128;
else if (VecWidth == 256)
@@ -2125,7 +2071,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_dbpsadbw_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pmultishift.qb.")) {
+ } else if (Name.starts_with("pmultishift.qb.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_avx512_pmultishift_qb_128;
else if (VecWidth == 256)
@@ -2134,7 +2080,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_pmultishift_qb_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("conflict.")) {
+ } else if (Name.starts_with("conflict.")) {
if (Name[9] == 'd' && VecWidth == 128)
IID = Intrinsic::x86_avx512_conflict_d_128;
else if (Name[9] == 'd' && VecWidth == 256)
@@ -2149,7 +2095,7 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_conflict_q_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pavg.")) {
+ } else if (Name.starts_with("pavg.")) {
if (Name[5] == 'b' && VecWidth == 128)
IID = Intrinsic::x86_sse2_pavg_b;
else if (Name[5] == 'b' && VecWidth == 256)
@@ -2285,8 +2231,8 @@ static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
static Value *UpgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
- const bool IsInc = Name.startswith("atomic.inc.");
- if (IsInc || Name.startswith("atomic.dec.")) {
+ const bool IsInc = Name.starts_with("atomic.inc.");
+ if (IsInc || Name.starts_with("atomic.dec.")) {
if (CI->getNumOperands() != 6) // Malformed bitcode.
return nullptr;
@@ -2305,7 +2251,11 @@ static Value *UpgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Order == AtomicOrdering::Unordered)
Order = AtomicOrdering::SequentiallyConsistent;
- AtomicRMWInst *RMW = Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order);
+ // The scope argument never really worked correctly. Use agent as the most
+ // conservative option which should still always produce the instruction.
+ SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
+ AtomicRMWInst *RMW =
+ Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
if (!VolatileArg || !VolatileArg->isZero())
RMW->setVolatile(true);
@@ -2333,23 +2283,23 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
// Get the Function's name.
StringRef Name = F->getName();
- assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
+ assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
Name = Name.substr(5);
- bool IsX86 = Name.startswith("x86.");
+ bool IsX86 = Name.starts_with("x86.");
if (IsX86)
Name = Name.substr(4);
- bool IsNVVM = Name.startswith("nvvm.");
+ bool IsNVVM = Name.starts_with("nvvm.");
if (IsNVVM)
Name = Name.substr(5);
- bool IsARM = Name.startswith("arm.");
+ bool IsARM = Name.starts_with("arm.");
if (IsARM)
Name = Name.substr(4);
- bool IsAMDGCN = Name.startswith("amdgcn.");
+ bool IsAMDGCN = Name.starts_with("amdgcn.");
if (IsAMDGCN)
Name = Name.substr(7);
- if (IsX86 && Name.startswith("sse4a.movnt.")) {
+ if (IsX86 && Name.starts_with("sse4a.movnt.")) {
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
@@ -2374,8 +2324,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
return;
}
- if (IsX86 && (Name.startswith("avx.movnt.") ||
- Name.startswith("avx512.storent."))) {
+ if (IsX86 && (Name.starts_with("avx.movnt.") ||
+ Name.starts_with("avx512.storent."))) {
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
@@ -2415,9 +2365,9 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
return;
}
- if (IsX86 && (Name.startswith("sse.storeu.") ||
- Name.startswith("sse2.storeu.") ||
- Name.startswith("avx.storeu."))) {
+ if (IsX86 && (Name.starts_with("sse.storeu.") ||
+ Name.starts_with("sse2.storeu.") ||
+ Name.starts_with("avx.storeu."))) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
@@ -2441,7 +2391,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
return;
}
- if (IsX86 && (Name.startswith("avx512.mask.store"))) {
+ if (IsX86 && (Name.starts_with("avx512.mask.store"))) {
// "avx512.mask.storeu." or "avx512.mask.store."
bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
@@ -2454,14 +2404,14 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Value *Rep;
// Upgrade packed integer vector compare intrinsics to compare instructions.
- if (IsX86 && (Name.startswith("sse2.pcmp") ||
- Name.startswith("avx2.pcmp"))) {
+ if (IsX86 && (Name.starts_with("sse2.pcmp") ||
+ Name.starts_with("avx2.pcmp"))) {
// "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
bool CmpEq = Name[9] == 'e';
Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
CI->getArgOperand(0), CI->getArgOperand(1));
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
- } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.broadcastm"))) {
Type *ExtTy = Type::getInt32Ty(C);
if (CI->getOperand(0)->getType()->isIntegerTy(8))
ExtTy = Type::getInt64Ty(C);
@@ -2477,14 +2427,14 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Intrinsic::sqrt, Elt0->getType());
Elt0 = Builder.CreateCall(Intr, Elt0);
Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
- } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
- Name.startswith("sse2.sqrt.p") ||
- Name.startswith("sse.sqrt.p"))) {
+ } else if (IsX86 && (Name.starts_with("avx.sqrt.p") ||
+ Name.starts_with("sse2.sqrt.p") ||
+ Name.starts_with("sse.sqrt.p"))) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
Intrinsic::sqrt,
CI->getType()),
{CI->getArgOperand(0)});
- } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.sqrt.p"))) {
if (CI->arg_size() == 4 &&
(!isa<ConstantInt>(CI->getArgOperand(3)) ||
cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
@@ -2502,8 +2452,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
- Name.startswith("avx512.ptestnm"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.ptestm") ||
+ Name.starts_with("avx512.ptestnm"))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -2511,16 +2461,16 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
llvm::Type *Ty = Op0->getType();
Value *Zero = llvm::Constant::getNullValue(Ty);
ICmpInst::Predicate Pred =
- Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+ Name.starts_with("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
Rep = Builder.CreateICmp(Pred, Rep, Zero);
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
- } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
+ } else if (IsX86 && (Name.starts_with("avx512.mask.pbroadcast"))){
unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
->getNumElements();
Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.kunpck"))) {
unsigned NumElts = CI->getType()->getScalarSizeInBits();
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
@@ -2602,11 +2552,11 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
EltOp = Builder.CreateFDiv(Elt0, Elt1);
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
ConstantInt::get(I32Ty, 0));
- } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.pcmp")) {
// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
bool CmpEq = Name[16] == 'e';
Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
- } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.vpshufbitqmb.")) {
Type *OpTy = CI->getArgOperand(0)->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
Intrinsic::ID IID;
@@ -2620,7 +2570,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getOperand(0), CI->getArgOperand(1) });
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.fpclass.p")) {
Type *OpTy = CI->getArgOperand(0)->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
unsigned EltWidth = OpTy->getScalarSizeInBits();
@@ -2643,7 +2593,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getOperand(0), CI->getArgOperand(1) });
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
+ } else if (IsX86 && Name.starts_with("avx512.cmp.p")) {
SmallVector<Value *, 4> Args(CI->args());
Type *OpTy = Args[0]->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
@@ -2671,17 +2621,17 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
Args);
- } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.cmp.")) {
// Integer compare intrinsics.
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
- } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.ucmp.")) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
- } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
- Name.startswith("avx512.cvtw2mask.") ||
- Name.startswith("avx512.cvtd2mask.") ||
- Name.startswith("avx512.cvtq2mask."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.cvtb2mask.") ||
+ Name.starts_with("avx512.cvtw2mask.") ||
+ Name.starts_with("avx512.cvtd2mask.") ||
+ Name.starts_with("avx512.cvtq2mask."))) {
Value *Op = CI->getArgOperand(0);
Value *Zero = llvm::Constant::getNullValue(Op->getType());
Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
@@ -2689,42 +2639,42 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
} else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
Name == "ssse3.pabs.w.128" ||
Name == "ssse3.pabs.d.128" ||
- Name.startswith("avx2.pabs") ||
- Name.startswith("avx512.mask.pabs"))) {
+ Name.starts_with("avx2.pabs") ||
+ Name.starts_with("avx512.mask.pabs"))) {
Rep = upgradeAbs(Builder, *CI);
} else if (IsX86 && (Name == "sse41.pmaxsb" ||
Name == "sse2.pmaxs.w" ||
Name == "sse41.pmaxsd" ||
- Name.startswith("avx2.pmaxs") ||
- Name.startswith("avx512.mask.pmaxs"))) {
+ Name.starts_with("avx2.pmaxs") ||
+ Name.starts_with("avx512.mask.pmaxs"))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
} else if (IsX86 && (Name == "sse2.pmaxu.b" ||
Name == "sse41.pmaxuw" ||
Name == "sse41.pmaxud" ||
- Name.startswith("avx2.pmaxu") ||
- Name.startswith("avx512.mask.pmaxu"))) {
+ Name.starts_with("avx2.pmaxu") ||
+ Name.starts_with("avx512.mask.pmaxu"))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
} else if (IsX86 && (Name == "sse41.pminsb" ||
Name == "sse2.pmins.w" ||
Name == "sse41.pminsd" ||
- Name.startswith("avx2.pmins") ||
- Name.startswith("avx512.mask.pmins"))) {
+ Name.starts_with("avx2.pmins") ||
+ Name.starts_with("avx512.mask.pmins"))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
} else if (IsX86 && (Name == "sse2.pminu.b" ||
Name == "sse41.pminuw" ||
Name == "sse41.pminud" ||
- Name.startswith("avx2.pminu") ||
- Name.startswith("avx512.mask.pminu"))) {
+ Name.starts_with("avx2.pminu") ||
+ Name.starts_with("avx512.mask.pminu"))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
} else if (IsX86 && (Name == "sse2.pmulu.dq" ||
Name == "avx2.pmulu.dq" ||
Name == "avx512.pmulu.dq.512" ||
- Name.startswith("avx512.mask.pmulu.dq."))) {
+ Name.starts_with("avx512.mask.pmulu.dq."))) {
Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
} else if (IsX86 && (Name == "sse41.pmuldq" ||
Name == "avx2.pmul.dq" ||
Name == "avx512.pmul.dq.512" ||
- Name.startswith("avx512.mask.pmul.dq."))) {
+ Name.starts_with("avx512.mask.pmul.dq."))) {
Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
} else if (IsX86 && (Name == "sse.cvtsi2ss" ||
Name == "sse2.cvtsi2sd" ||
@@ -2748,12 +2698,12 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Name == "sse2.cvtdq2ps" ||
Name == "avx.cvtdq2.pd.256" ||
Name == "avx.cvtdq2.ps.256" ||
- Name.startswith("avx512.mask.cvtdq2pd.") ||
- Name.startswith("avx512.mask.cvtudq2pd.") ||
- Name.startswith("avx512.mask.cvtdq2ps.") ||
- Name.startswith("avx512.mask.cvtudq2ps.") ||
- Name.startswith("avx512.mask.cvtqq2pd.") ||
- Name.startswith("avx512.mask.cvtuqq2pd.") ||
+ Name.starts_with("avx512.mask.cvtdq2pd.") ||
+ Name.starts_with("avx512.mask.cvtudq2pd.") ||
+ Name.starts_with("avx512.mask.cvtdq2ps.") ||
+ Name.starts_with("avx512.mask.cvtudq2ps.") ||
+ Name.starts_with("avx512.mask.cvtqq2pd.") ||
+ Name.starts_with("avx512.mask.cvtuqq2pd.") ||
Name == "avx512.mask.cvtqq2ps.256" ||
Name == "avx512.mask.cvtqq2ps.512" ||
Name == "avx512.mask.cvtuqq2ps.256" ||
@@ -2792,8 +2742,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
if (CI->arg_size() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
- Name.startswith("vcvtph2ps."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.vcvtph2ps.") ||
+ Name.starts_with("vcvtph2ps."))) {
auto *DstTy = cast<FixedVectorType>(CI->getType());
Rep = CI->getArgOperand(0);
auto *SrcTy = cast<FixedVectorType>(Rep->getType());
@@ -2808,13 +2758,13 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
if (CI->arg_size() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && Name.startswith("avx512.mask.load")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.load")) {
// "avx512.mask.loadu." or "avx512.mask.load."
bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
Rep =
UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), Aligned);
- } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.expand.load.")) {
auto *ResultTy = cast<FixedVectorType>(CI->getType());
Type *PtrTy = ResultTy->getElementType();
@@ -2829,7 +2779,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Intrinsic::masked_expandload,
ResultTy);
Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
- } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.compress.store.")) {
auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
Type *PtrTy = ResultTy->getElementType();
@@ -2845,8 +2795,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Intrinsic::masked_compressstore,
ResultTy);
Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
- } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
- Name.startswith("avx512.mask.expand."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.compress.") ||
+ Name.starts_with("avx512.mask.expand."))) {
auto *ResultTy = cast<FixedVectorType>(CI->getType());
Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
@@ -2858,13 +2808,13 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
MaskVec });
- } else if (IsX86 && Name.startswith("xop.vpcom")) {
+ } else if (IsX86 && Name.starts_with("xop.vpcom")) {
bool IsSigned;
- if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
- Name.endswith("uq"))
+ if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
+ Name.ends_with("uq"))
IsSigned = false;
- else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
- Name.endswith("q"))
+ else if (Name.ends_with("b") || Name.ends_with("w") || Name.ends_with("d") ||
+ Name.ends_with("q"))
IsSigned = true;
else
llvm_unreachable("Unknown suffix");
@@ -2874,48 +2824,48 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
} else {
Name = Name.substr(9); // strip off "xop.vpcom"
- if (Name.startswith("lt"))
+ if (Name.starts_with("lt"))
Imm = 0;
- else if (Name.startswith("le"))
+ else if (Name.starts_with("le"))
Imm = 1;
- else if (Name.startswith("gt"))
+ else if (Name.starts_with("gt"))
Imm = 2;
- else if (Name.startswith("ge"))
+ else if (Name.starts_with("ge"))
Imm = 3;
- else if (Name.startswith("eq"))
+ else if (Name.starts_with("eq"))
Imm = 4;
- else if (Name.startswith("ne"))
+ else if (Name.starts_with("ne"))
Imm = 5;
- else if (Name.startswith("false"))
+ else if (Name.starts_with("false"))
Imm = 6;
- else if (Name.startswith("true"))
+ else if (Name.starts_with("true"))
Imm = 7;
else
llvm_unreachable("Unknown condition");
}
Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
- } else if (IsX86 && Name.startswith("xop.vpcmov")) {
+ } else if (IsX86 && Name.starts_with("xop.vpcmov")) {
Value *Sel = CI->getArgOperand(2);
Value *NotSel = Builder.CreateNot(Sel);
Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
Rep = Builder.CreateOr(Sel0, Sel1);
- } else if (IsX86 && (Name.startswith("xop.vprot") ||
- Name.startswith("avx512.prol") ||
- Name.startswith("avx512.mask.prol"))) {
+ } else if (IsX86 && (Name.starts_with("xop.vprot") ||
+ Name.starts_with("avx512.prol") ||
+ Name.starts_with("avx512.mask.prol"))) {
Rep = upgradeX86Rotate(Builder, *CI, false);
- } else if (IsX86 && (Name.startswith("avx512.pror") ||
- Name.startswith("avx512.mask.pror"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.pror") ||
+ Name.starts_with("avx512.mask.pror"))) {
Rep = upgradeX86Rotate(Builder, *CI, true);
- } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
- Name.startswith("avx512.mask.vpshld") ||
- Name.startswith("avx512.maskz.vpshld"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.vpshld.") ||
+ Name.starts_with("avx512.mask.vpshld") ||
+ Name.starts_with("avx512.maskz.vpshld"))) {
bool ZeroMask = Name[11] == 'z';
Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
- } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
- Name.startswith("avx512.mask.vpshrd") ||
- Name.startswith("avx512.maskz.vpshrd"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.vpshrd.") ||
+ Name.starts_with("avx512.mask.vpshrd") ||
+ Name.starts_with("avx512.maskz.vpshrd"))) {
bool ZeroMask = Name[11] == 'z';
Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
} else if (IsX86 && Name == "sse42.crc32.64.8") {
@@ -2924,26 +2874,24 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
- } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
- Name.startswith("avx512.vbroadcast.s"))) {
+ } else if (IsX86 && (Name.starts_with("avx.vbroadcast.s") ||
+ Name.starts_with("avx512.vbroadcast.s"))) {
// Replace broadcasts with a series of insertelements.
auto *VecTy = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecTy->getElementType();
unsigned EltNum = VecTy->getNumElements();
- Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
- EltTy->getPointerTo());
- Value *Load = Builder.CreateLoad(EltTy, Cast);
+ Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
Type *I32Ty = Type::getInt32Ty(C);
Rep = PoisonValue::get(VecTy);
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
- } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
- Name.startswith("sse41.pmovzx") ||
- Name.startswith("avx2.pmovsx") ||
- Name.startswith("avx2.pmovzx") ||
- Name.startswith("avx512.mask.pmovsx") ||
- Name.startswith("avx512.mask.pmovzx"))) {
+ } else if (IsX86 && (Name.starts_with("sse41.pmovsx") ||
+ Name.starts_with("sse41.pmovzx") ||
+ Name.starts_with("avx2.pmovsx") ||
+ Name.starts_with("avx2.pmovzx") ||
+ Name.starts_with("avx512.mask.pmovsx") ||
+ Name.starts_with("avx512.mask.pmovzx"))) {
auto *DstTy = cast<FixedVectorType>(CI->getType());
unsigned NumDstElts = DstTy->getNumElements();
@@ -2970,7 +2918,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
+ } else if (IsX86 && (Name.starts_with("avx.vbroadcastf128") ||
Name == "avx2.vbroadcasti128")) {
// Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
@@ -2984,8 +2932,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
else
Rep = Builder.CreateShuffleVector(
Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
- } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
- Name.startswith("avx512.mask.shuf.f"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.shuf.i") ||
+ Name.starts_with("avx512.mask.shuf.f"))) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Type *VT = CI->getType();
unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
@@ -3006,8 +2954,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
CI->getArgOperand(1), ShuffleMask);
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
- }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
- Name.startswith("avx512.mask.broadcasti"))) {
+ }else if (IsX86 && (Name.starts_with("avx512.mask.broadcastf") ||
+ Name.starts_with("avx512.mask.broadcasti"))) {
unsigned NumSrcElts =
cast<FixedVectorType>(CI->getArgOperand(0)->getType())
->getNumElements();
@@ -3023,10 +2971,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
ShuffleMask);
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
- Name.startswith("avx2.vbroadcast") ||
- Name.startswith("avx512.pbroadcast") ||
- Name.startswith("avx512.mask.broadcast.s"))) {
+ } else if (IsX86 && (Name.starts_with("avx2.pbroadcast") ||
+ Name.starts_with("avx2.vbroadcast") ||
+ Name.starts_with("avx512.pbroadcast") ||
+ Name.starts_with("avx512.mask.broadcast.s"))) {
// Replace vp?broadcasts with a vector shuffle.
Value *Op = CI->getArgOperand(0);
ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
@@ -3038,32 +2986,32 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
if (CI->arg_size() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && (Name.startswith("sse2.padds.") ||
- Name.startswith("avx2.padds.") ||
- Name.startswith("avx512.padds.") ||
- Name.startswith("avx512.mask.padds."))) {
+ } else if (IsX86 && (Name.starts_with("sse2.padds.") ||
+ Name.starts_with("avx2.padds.") ||
+ Name.starts_with("avx512.padds.") ||
+ Name.starts_with("avx512.mask.padds."))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
- } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
- Name.startswith("avx2.psubs.") ||
- Name.startswith("avx512.psubs.") ||
- Name.startswith("avx512.mask.psubs."))) {
+ } else if (IsX86 && (Name.starts_with("sse2.psubs.") ||
+ Name.starts_with("avx2.psubs.") ||
+ Name.starts_with("avx512.psubs.") ||
+ Name.starts_with("avx512.mask.psubs."))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
- } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
- Name.startswith("avx2.paddus.") ||
- Name.startswith("avx512.mask.paddus."))) {
+ } else if (IsX86 && (Name.starts_with("sse2.paddus.") ||
+ Name.starts_with("avx2.paddus.") ||
+ Name.starts_with("avx512.mask.paddus."))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
- } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
- Name.startswith("avx2.psubus.") ||
- Name.startswith("avx512.mask.psubus."))) {
+ } else if (IsX86 && (Name.starts_with("sse2.psubus.") ||
+ Name.starts_with("avx2.psubus.") ||
+ Name.starts_with("avx512.mask.psubus."))) {
Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
- } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.palignr.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
CI->getArgOperand(2),
CI->getArgOperand(3),
CI->getArgOperand(4),
false);
- } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.valign.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
CI->getArgOperand(2),
@@ -3095,10 +3043,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
} else if (IsX86 && (Name == "sse41.pblendw" ||
- Name.startswith("sse41.blendp") ||
- Name.startswith("avx.blend.p") ||
+ Name.starts_with("sse41.blendp") ||
+ Name.starts_with("avx.blend.p") ||
Name == "avx2.pblendw" ||
- Name.startswith("avx2.pblendd."))) {
+ Name.starts_with("avx2.pblendd."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
@@ -3110,9 +3058,9 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
- } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
+ } else if (IsX86 && (Name.starts_with("avx.vinsertf128.") ||
Name == "avx2.vinserti128" ||
- Name.startswith("avx512.mask.insert"))) {
+ Name.starts_with("avx512.mask.insert"))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
@@ -3156,9 +3104,9 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
if (CI->arg_size() == 5)
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
- } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
+ } else if (IsX86 && (Name.starts_with("avx.vextractf128.") ||
Name == "avx2.vextracti128" ||
- Name.startswith("avx512.mask.vextract"))) {
+ Name.starts_with("avx512.mask.vextract"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned DstNumElts =
@@ -3183,8 +3131,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
CI->getArgOperand(2));
} else if (!IsX86 && Name == "stackprotectorcheck") {
Rep = nullptr;
- } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
- Name.startswith("avx512.mask.perm.di."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.perm.df.") ||
+ Name.starts_with("avx512.mask.perm.di."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
auto *VecTy = cast<FixedVectorType>(CI->getType());
@@ -3199,7 +3147,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
+ } else if (IsX86 && (Name.starts_with("avx.vperm2f128.") ||
Name == "avx2.vperm2i128")) {
// The immediate permute control byte looks like this:
// [1:0] - select 128 bits from sources for low half of destination
@@ -3235,10 +3183,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
- } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
+ } else if (IsX86 && (Name.starts_with("avx.vpermil.") ||
Name == "sse2.pshuf.d" ||
- Name.startswith("avx512.mask.vpermil.p") ||
- Name.startswith("avx512.mask.pshuf.d."))) {
+ Name.starts_with("avx512.mask.vpermil.p") ||
+ Name.starts_with("avx512.mask.pshuf.d."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
auto *VecTy = cast<FixedVectorType>(CI->getType());
@@ -3260,7 +3208,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufl.w" ||
- Name.startswith("avx512.mask.pshufl.w."))) {
+ Name.starts_with("avx512.mask.pshufl.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
@@ -3279,7 +3227,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufh.w" ||
- Name.startswith("avx512.mask.pshufh.w."))) {
+ Name.starts_with("avx512.mask.pshufh.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
@@ -3297,7 +3245,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.shuf.p")) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
@@ -3322,15 +3270,15 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
- } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
- Name.startswith("avx512.mask.movshdup") ||
- Name.startswith("avx512.mask.movsldup"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.movddup") ||
+ Name.starts_with("avx512.mask.movshdup") ||
+ Name.starts_with("avx512.mask.movsldup"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned Offset = 0;
- if (Name.startswith("avx512.mask.movshdup."))
+ if (Name.starts_with("avx512.mask.movshdup."))
Offset = 1;
SmallVector<int, 16> Idxs(NumElts);
@@ -3344,8 +3292,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
- Name.startswith("avx512.mask.unpckl."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.punpckl") ||
+ Name.starts_with("avx512.mask.unpckl."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
@@ -3360,8 +3308,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
- Name.startswith("avx512.mask.unpckh."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.punpckh") ||
+ Name.starts_with("avx512.mask.unpckh."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
@@ -3376,8 +3324,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
- Name.startswith("avx512.mask.pand."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.and.") ||
+ Name.starts_with("avx512.mask.pand."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -3385,8 +3333,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
- Name.startswith("avx512.mask.pandn."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.andn.") ||
+ Name.starts_with("avx512.mask.pandn."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
@@ -3395,8 +3343,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
- Name.startswith("avx512.mask.por."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.or.") ||
+ Name.starts_with("avx512.mask.por."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -3404,8 +3352,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
- Name.startswith("avx512.mask.pxor."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.xor.") ||
+ Name.starts_with("avx512.mask.pxor."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -3413,20 +3361,20 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.padd.")) {
Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.psub.")) {
Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.pmull.")) {
Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
- if (Name.endswith(".512")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.add.p")) {
+ if (Name.ends_with(".512")) {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_add_ps_512;
@@ -3441,8 +3389,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
- if (Name.endswith(".512")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.div.p")) {
+ if (Name.ends_with(".512")) {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_div_ps_512;
@@ -3457,8 +3405,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
- if (Name.endswith(".512")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.mul.p")) {
+ if (Name.ends_with(".512")) {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_mul_ps_512;
@@ -3473,8 +3421,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
- if (Name.endswith(".512")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.sub.p")) {
+ if (Name.ends_with(".512")) {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_sub_ps_512;
@@ -3489,8 +3437,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
- Name.startswith("avx512.mask.min.p")) &&
+ } else if (IsX86 && (Name.starts_with("avx512.mask.max.p") ||
+ Name.starts_with("avx512.mask.min.p")) &&
Name.drop_front(18) == ".512") {
bool IsDouble = Name[17] == 'd';
bool IsMin = Name[13] == 'i';
@@ -3505,14 +3453,14 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
CI->getArgOperand(4) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.lzcnt.")) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
Intrinsic::ctlz,
CI->getType()),
{ CI->getArgOperand(0), Builder.getInt1(false) });
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.psll")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
bool IsVariable = Name[16] == 'v';
@@ -3539,7 +3487,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
IID = Intrinsic::x86_avx512_psllv_w_512;
else
llvm_unreachable("Unexpected size");
- } else if (Name.endswith(".128")) {
+ } else if (Name.ends_with(".128")) {
if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
: Intrinsic::x86_sse2_psll_d;
@@ -3551,7 +3499,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
: Intrinsic::x86_sse2_psll_w;
else
llvm_unreachable("Unexpected size");
- } else if (Name.endswith(".256")) {
+ } else if (Name.ends_with(".256")) {
if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
: Intrinsic::x86_avx2_psll_d;
@@ -3580,7 +3528,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
- } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.psrl")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
bool IsVariable = Name[16] == 'v';
@@ -3607,7 +3555,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
IID = Intrinsic::x86_avx512_psrlv_w_512;
else
llvm_unreachable("Unexpected size");
- } else if (Name.endswith(".128")) {
+ } else if (Name.ends_with(".128")) {
if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
: Intrinsic::x86_sse2_psrl_d;
@@ -3619,7 +3567,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
: Intrinsic::x86_sse2_psrl_w;
else
llvm_unreachable("Unexpected size");
- } else if (Name.endswith(".256")) {
+ } else if (Name.ends_with(".256")) {
if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
: Intrinsic::x86_avx2_psrl_d;
@@ -3648,7 +3596,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
- } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.psra")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
bool IsVariable = Name[16] == 'v';
@@ -3671,7 +3619,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
IID = Intrinsic::x86_avx512_psrav_w_512;
else
llvm_unreachable("Unexpected size");
- } else if (Name.endswith(".128")) {
+ } else if (Name.ends_with(".128")) {
if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
: Intrinsic::x86_sse2_psra_d;
@@ -3684,7 +3632,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
: Intrinsic::x86_sse2_psra_w;
else
llvm_unreachable("Unexpected size");
- } else if (Name.endswith(".256")) {
+ } else if (Name.ends_with(".256")) {
if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
: Intrinsic::x86_avx2_psra_d;
@@ -3714,11 +3662,11 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
- } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
+ } else if (IsX86 && Name.starts_with("avx512.mask.move.s")) {
Rep = upgradeMaskedMove(Builder, *CI);
- } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
+ } else if (IsX86 && Name.starts_with("avx512.cvtmask2")) {
Rep = UpgradeMaskToInt(Builder, *CI);
- } else if (IsX86 && Name.endswith(".movntdqa")) {
+ } else if (IsX86 && Name.ends_with(".movntdqa")) {
MDNode *Node = MDNode::get(
C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
@@ -3732,10 +3680,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
LI->setMetadata(LLVMContext::MD_nontemporal, Node);
Rep = LI;
- } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
- Name.startswith("fma.vfmsub.") ||
- Name.startswith("fma.vfnmadd.") ||
- Name.startswith("fma.vfnmsub."))) {
+ } else if (IsX86 && (Name.starts_with("fma.vfmadd.") ||
+ Name.starts_with("fma.vfmsub.") ||
+ Name.starts_with("fma.vfnmadd.") ||
+ Name.starts_with("fma.vfnmsub."))) {
bool NegMul = Name[6] == 'n';
bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
@@ -3764,7 +3712,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
if (IsScalar)
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
(uint64_t)0);
- } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
+ } else if (IsX86 && Name.starts_with("fma4.vfmadd.s")) {
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
@@ -3779,11 +3727,11 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
Rep, (uint64_t)0);
- } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
- Name.startswith("avx512.maskz.vfmadd.s") ||
- Name.startswith("avx512.mask3.vfmadd.s") ||
- Name.startswith("avx512.mask3.vfmsub.s") ||
- Name.startswith("avx512.mask3.vfnmsub.s"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.s") ||
+ Name.starts_with("avx512.maskz.vfmadd.s") ||
+ Name.starts_with("avx512.mask3.vfmadd.s") ||
+ Name.starts_with("avx512.mask3.vfmsub.s") ||
+ Name.starts_with("avx512.mask3.vfnmsub.s"))) {
bool IsMask3 = Name[11] == '3';
bool IsMaskZ = Name[11] == 'z';
// Drop the "avx512.mask." to make it easier.
@@ -3837,13 +3785,13 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Rep, PassThru);
Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
Rep, (uint64_t)0);
- } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
- Name.startswith("avx512.mask.vfnmadd.p") ||
- Name.startswith("avx512.mask.vfnmsub.p") ||
- Name.startswith("avx512.mask3.vfmadd.p") ||
- Name.startswith("avx512.mask3.vfmsub.p") ||
- Name.startswith("avx512.mask3.vfnmsub.p") ||
- Name.startswith("avx512.maskz.vfmadd.p"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.p") ||
+ Name.starts_with("avx512.mask.vfnmadd.p") ||
+ Name.starts_with("avx512.mask.vfnmsub.p") ||
+ Name.starts_with("avx512.mask3.vfmadd.p") ||
+ Name.starts_with("avx512.mask3.vfmsub.p") ||
+ Name.starts_with("avx512.mask3.vfnmsub.p") ||
+ Name.starts_with("avx512.maskz.vfmadd.p"))) {
bool IsMask3 = Name[11] == '3';
bool IsMaskZ = Name[11] == 'z';
// Drop the "avx512.mask." to make it easier.
@@ -3886,7 +3834,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
- } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
+ } else if (IsX86 && Name.starts_with("fma.vfmsubadd.p")) {
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
Intrinsic::ID IID;
@@ -3906,10 +3854,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Ops[2] = Builder.CreateFNeg(Ops[2]);
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
Ops);
- } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
- Name.startswith("avx512.mask3.vfmaddsub.p") ||
- Name.startswith("avx512.maskz.vfmaddsub.p") ||
- Name.startswith("avx512.mask3.vfmsubadd.p"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.vfmaddsub.p") ||
+ Name.starts_with("avx512.mask3.vfmaddsub.p") ||
+ Name.starts_with("avx512.maskz.vfmaddsub.p") ||
+ Name.starts_with("avx512.mask3.vfmsubadd.p"))) {
bool IsMask3 = Name[11] == '3';
bool IsMaskZ = Name[11] == 'z';
// Drop the "avx512.mask." to make it easier.
@@ -3957,8 +3905,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
- } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
- Name.startswith("avx512.maskz.pternlog."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.pternlog.") ||
+ Name.starts_with("avx512.maskz.pternlog."))) {
bool ZeroMask = Name[11] == 'z';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
@@ -3985,8 +3933,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
- } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
- Name.startswith("avx512.maskz.vpmadd52"))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.vpmadd52") ||
+ Name.starts_with("avx512.maskz.vpmadd52"))) {
bool ZeroMask = Name[11] == 'z';
bool High = Name[20] == 'h' || Name[21] == 'h';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
@@ -4013,16 +3961,16 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
- } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
- Name.startswith("avx512.mask.vpermt2var.") ||
- Name.startswith("avx512.maskz.vpermt2var."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.vpermi2var.") ||
+ Name.starts_with("avx512.mask.vpermt2var.") ||
+ Name.starts_with("avx512.maskz.vpermt2var."))) {
bool ZeroMask = Name[11] == 'z';
bool IndexForm = Name[17] == 'i';
Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
- } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
- Name.startswith("avx512.maskz.vpdpbusd.") ||
- Name.startswith("avx512.mask.vpdpbusds.") ||
- Name.startswith("avx512.maskz.vpdpbusds."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpbusd.") ||
+ Name.starts_with("avx512.maskz.vpdpbusd.") ||
+ Name.starts_with("avx512.mask.vpdpbusds.") ||
+ Name.starts_with("avx512.maskz.vpdpbusds."))) {
bool ZeroMask = Name[11] == 'z';
bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
@@ -4049,10 +3997,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
- } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
- Name.startswith("avx512.maskz.vpdpwssd.") ||
- Name.startswith("avx512.mask.vpdpwssds.") ||
- Name.startswith("avx512.maskz.vpdpwssds."))) {
+ } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpwssd.") ||
+ Name.starts_with("avx512.maskz.vpdpwssd.") ||
+ Name.starts_with("avx512.mask.vpdpwssds.") ||
+ Name.starts_with("avx512.maskz.vpdpwssds."))) {
bool ZeroMask = Name[11] == 'z';
bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
@@ -4112,7 +4060,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
CI->replaceAllUsesWith(CF);
Rep = nullptr;
- } else if (IsX86 && Name.startswith("avx512.mask.") &&
+ } else if (IsX86 && Name.starts_with("avx512.mask.") &&
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
// Rep will be updated by the call in the condition.
} else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
@@ -4121,8 +4069,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Value *Cmp = Builder.CreateICmpSGE(
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
- } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
- Name.startswith("atomic.load.add.f64.p"))) {
+ } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
+ Name.starts_with("atomic.load.add.f64.p"))) {
Value *Ptr = CI->getArgOperand(0);
Value *Val = CI->getArgOperand(1);
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
@@ -4131,7 +4079,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Name == "max.ui" || Name == "max.ull")) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
- Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
+ Value *Cmp = Name.ends_with(".ui") || Name.ends_with(".ull")
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
@@ -4139,7 +4087,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Name == "min.ui" || Name == "min.ull")) {
Value *Arg0 = CI->getArgOperand(0);
Value *Arg1 = CI->getArgOperand(1);
- Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
+ Value *Cmp = Name.ends_with(".ui") || Name.ends_with(".ull")
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
@@ -4290,10 +4238,17 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
break;
}
+ case Intrinsic::coro_end: {
+ SmallVector<Value *, 3> Args(CI->args());
+ Args.push_back(ConstantTokenNone::get(CI->getContext()));
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
+ }
+
case Intrinsic::vector_extract: {
StringRef Name = F->getName();
Name = Name.substr(5); // Strip llvm
- if (!Name.startswith("aarch64.sve.tuple.get")) {
+ if (!Name.starts_with("aarch64.sve.tuple.get")) {
DefaultCase();
return;
}
@@ -4309,11 +4264,11 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
case Intrinsic::vector_insert: {
StringRef Name = F->getName();
Name = Name.substr(5);
- if (!Name.startswith("aarch64.sve.tuple")) {
+ if (!Name.starts_with("aarch64.sve.tuple")) {
DefaultCase();
return;
}
- if (Name.startswith("aarch64.sve.tuple.set")) {
+ if (Name.starts_with("aarch64.sve.tuple.set")) {
unsigned I = dyn_cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
ScalableVectorType *Ty =
dyn_cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
@@ -4323,7 +4278,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
break;
}
- if (Name.startswith("aarch64.sve.tuple.create")) {
+ if (Name.starts_with("aarch64.sve.tuple.create")) {
unsigned N = StringSwitch<unsigned>(Name)
.StartsWith("aarch64.sve.tuple.create2", 2)
.StartsWith("aarch64.sve.tuple.create3", 3)
@@ -4402,7 +4357,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
StringRef Name = F->getName();
Name = Name.substr(5); // Strip llvm.
// Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
- if (Name.startswith("dbg.addr")) {
+ if (Name.starts_with("dbg.addr")) {
DIExpression *Expr = cast<DIExpression>(
cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
@@ -4434,10 +4389,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
// Create a new call with an added null annotation attribute argument.
- NewCall = Builder.CreateCall(
- NewFn,
- {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
- CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
+ NewCall =
+ Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3),
+ Constant::getNullValue(Builder.getPtrTy())});
NewCall->takeName(CI);
CI->replaceAllUsesWith(NewCall);
CI->eraseFromParent();
@@ -4450,10 +4405,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
return;
}
// Create a new call with an added null annotation attribute argument.
- NewCall = Builder.CreateCall(
- NewFn,
- {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
- CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
+ NewCall =
+ Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3),
+ Constant::getNullValue(Builder.getPtrTy())});
NewCall->takeName(CI);
CI->replaceAllUsesWith(NewCall);
CI->eraseFromParent();
@@ -4653,22 +4608,6 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
break;
}
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end: {
- SmallVector<Value *, 4> Args(CI->args());
- NewCall = Builder.CreateCall(NewFn, Args);
- break;
- }
- case Intrinsic::masked_load:
- case Intrinsic::masked_store:
- case Intrinsic::masked_gather:
- case Intrinsic::masked_scatter: {
- SmallVector<Value *, 4> Args(CI->args());
- NewCall = Builder.CreateCall(NewFn, Args);
- NewCall->copyMetadata(*CI);
- break;
- }
-
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset: {
@@ -5024,7 +4963,7 @@ bool llvm::UpgradeModuleFlags(Module &M) {
// Upgrade branch protection and return address signing module flags. The
// module flag behavior for these fields were Error and now they are Min.
if (ID->getString() == "branch-target-enforcement" ||
- ID->getString().startswith("sign-return-address")) {
+ ID->getString().starts_with("sign-return-address")) {
if (auto *Behavior =
mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
if (Behavior->getLimitedValue() == Module::Error) {
@@ -5127,7 +5066,7 @@ void llvm::UpgradeSectionAttributes(Module &M) {
StringRef Section = GV.getSection();
- if (!Section.startswith("__DATA, __objc_catlist"))
+ if (!Section.starts_with("__DATA, __objc_catlist"))
continue;
// __DATA, __objc_catlist, regular, no_dead_strip
@@ -5187,12 +5126,12 @@ static bool isOldLoopArgument(Metadata *MD) {
auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
if (!S)
return false;
- return S->getString().startswith("llvm.vectorizer.");
+ return S->getString().starts_with("llvm.vectorizer.");
}
static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
StringRef OldPrefix = "llvm.vectorizer.";
- assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
+ assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
if (OldTag == "llvm.vectorizer.unroll")
return MDString::get(C, "llvm.loop.interleave.count");
@@ -5211,7 +5150,7 @@ static Metadata *upgradeLoopArgument(Metadata *MD) {
auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
if (!OldTag)
return MD;
- if (!OldTag->getString().startswith("llvm.vectorizer."))
+ if (!OldTag->getString().starts_with("llvm.vectorizer."))
return MD;
// This has an old tag. Upgrade it.
@@ -5245,7 +5184,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
// The only data layout upgrades needed for pre-GCN are setting the address
// space of globals to 1.
if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
- !DL.startswith("G")) {
+ !DL.starts_with("G")) {
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
}
@@ -5267,18 +5206,22 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
// Add missing non-integral declarations.
// This goes before adding new address spaces to prevent incoherent string
// values.
- if (!DL.contains("-ni") && !DL.startswith("ni"))
- Res.append("-ni:7:8");
- // Update ni:7 to ni:7:8.
+ if (!DL.contains("-ni") && !DL.starts_with("ni"))
+ Res.append("-ni:7:8:9");
+ // Update ni:7 to ni:7:8:9.
if (DL.ends_with("ni:7"))
- Res.append(":8");
+ Res.append(":8:9");
+ if (DL.ends_with("ni:7:8"))
+ Res.append(":9");
// Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
// resources) An empty data layout has already been upgraded to G1 by now.
- if (!DL.contains("-p7") && !DL.startswith("p7"))
+ if (!DL.contains("-p7") && !DL.starts_with("p7"))
Res.append("-p7:160:256:256:32");
- if (!DL.contains("-p8") && !DL.startswith("p8"))
+ if (!DL.contains("-p8") && !DL.starts_with("p8"))
Res.append("-p8:128:128");
+ if (!DL.contains("-p9") && !DL.starts_with("p9"))
+ Res.append("-p9:192:256:256:32");
return Res;
}
@@ -5289,13 +5232,29 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
// If the datalayout matches the expected format, add pointer size address
// spaces to the datalayout.
std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
- if (!DL.contains(AddrSpaces)) {
+ if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
SmallVector<StringRef, 4> Groups;
Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
- if (R.match(DL, &Groups))
+ if (R.match(Res, &Groups))
Res = (Groups[1] + AddrSpaces + Groups[3]).str();
}
+ // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
+ // for i128 operations prior to this being reflected in the data layout, and
+ // clang mostly produced LLVM IR that already aligned i128 to 16 byte
+ // boundaries, so although this is a breaking change, the upgrade is expected
+ // to fix more IR than it breaks.
+ // Intel MCU is an exception and uses 4-byte-alignment.
+ if (!T.isOSIAMCU()) {
+ std::string I128 = "-i128:128";
+ if (StringRef Ref = Res; !Ref.contains(I128)) {
+ SmallVector<StringRef, 4> Groups;
+ Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
+ if (R.match(Res, &Groups))
+ Res = (Groups[1] + I128 + Groups[3]).str();
+ }
+ }
+
// For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
// Raising the alignment is safe because Clang did not produce f80 values in
// the MSVC environment before this upgrade was added.
diff --git a/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp
index 14e1787c2b14..03b74b0480f0 100644
--- a/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp
@@ -16,16 +16,191 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugProgramInstruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+
+#include "LLVMContextImpl.h"
using namespace llvm;
#define DEBUG_TYPE "ir"
STATISTIC(NumInstrRenumberings, "Number of renumberings across all blocks");
+cl::opt<bool>
+ UseNewDbgInfoFormat("experimental-debuginfo-iterators",
+ cl::desc("Enable communicating debuginfo positions "
+ "through iterators, eliminating intrinsics"),
+ cl::init(false));
+
+DPMarker *BasicBlock::createMarker(Instruction *I) {
+ assert(IsNewDbgInfoFormat &&
+ "Tried to create a marker in a non new debug-info block!");
+ if (I->DbgMarker)
+ return I->DbgMarker;
+ DPMarker *Marker = new DPMarker();
+ Marker->MarkedInstr = I;
+ I->DbgMarker = Marker;
+ return Marker;
+}
+
+DPMarker *BasicBlock::createMarker(InstListType::iterator It) {
+ assert(IsNewDbgInfoFormat &&
+ "Tried to create a marker in a non new debug-info block!");
+ if (It != end())
+ return createMarker(&*It);
+ DPMarker *DPM = getTrailingDPValues();
+ if (DPM)
+ return DPM;
+ DPM = new DPMarker();
+ setTrailingDPValues(DPM);
+ return DPM;
+}
+
+void BasicBlock::convertToNewDbgValues() {
+ // Is the command line option set?
+ if (!UseNewDbgInfoFormat)
+ return;
+
+ IsNewDbgInfoFormat = true;
+
+ // Iterate over all instructions in the instruction list, collecting dbg.value
+ // instructions and converting them to DPValues. Once we find a "real"
+ // instruction, attach all those DPValues to a DPMarker in that instruction.
+ SmallVector<DPValue *, 4> DPVals;
+ for (Instruction &I : make_early_inc_range(InstList)) {
+ assert(!I.DbgMarker && "DbgMarker already set on old-format instrs?");
+ if (DbgVariableIntrinsic *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ if (isa<DbgAssignIntrinsic>(DVI))
+ continue;
+
+ // Convert this dbg.value to a DPValue.
+ DPValue *Value = new DPValue(DVI);
+ DPVals.push_back(Value);
+ DVI->eraseFromParent();
+ continue;
+ }
+
+ // Create a marker to store DPValues in. Technically we don't need to store
+ // one marker per instruction, but that's a future optimisation.
+ createMarker(&I);
+ DPMarker *Marker = I.DbgMarker;
+
+ for (DPValue *DPV : DPVals)
+ Marker->insertDPValue(DPV, false);
+
+ DPVals.clear();
+ }
+}
+
+void BasicBlock::convertFromNewDbgValues() {
+ invalidateOrders();
+ IsNewDbgInfoFormat = false;
+
+ // Iterate over the block, finding instructions annotated with DPMarkers.
+ // Convert any attached DPValues to dbg.values and insert ahead of the
+ // instruction.
+ for (auto &Inst : *this) {
+ if (!Inst.DbgMarker)
+ continue;
+
+ DPMarker &Marker = *Inst.DbgMarker;
+ for (DPValue &DPV : Marker.getDbgValueRange())
+ InstList.insert(Inst.getIterator(),
+ DPV.createDebugIntrinsic(getModule(), nullptr));
+
+ Marker.eraseFromParent();
+ };
+
+ // Assume no trailing DPValues: we could technically create them at the end
+ // of the block, after a terminator, but this would be non-cannonical and
+ // indicates that something else is broken somewhere.
+ assert(!getTrailingDPValues());
+}
+
+bool BasicBlock::validateDbgValues(bool Assert, bool Msg, raw_ostream *OS) {
+ bool RetVal = false;
+ if (!OS)
+ OS = &errs();
+
+ // Helper lambda for reporting failures: via assertion, printing, and return
+ // value.
+ auto TestFailure = [Assert, Msg, &RetVal, OS](bool Val, const char *Text) {
+ // Did the test fail?
+ if (Val)
+ return;
+
+ // If we're asserting, then fire off an assertion.
+ if (Assert)
+ llvm_unreachable(Text);
+
+ if (Msg)
+ *OS << Text << "\n";
+ RetVal = true;
+ };
+
+ // We should have the same debug-format as the parent function.
+ TestFailure(getParent()->IsNewDbgInfoFormat == IsNewDbgInfoFormat,
+ "Parent function doesn't have the same debug-info format");
+
+ // Only validate if we are using the new format.
+ if (!IsNewDbgInfoFormat)
+ return RetVal;
+
+ // Match every DPMarker to every Instruction and vice versa, and
+ // verify that there are no invalid DPValues.
+ for (auto It = begin(); It != end(); ++It) {
+ if (!It->DbgMarker)
+ continue;
+
+ // Validate DebugProgramMarkers.
+ DPMarker *CurrentDebugMarker = It->DbgMarker;
+
+ // If this is a marker, it should match the instruction and vice versa.
+ TestFailure(CurrentDebugMarker->MarkedInstr == &*It,
+ "Debug Marker points to incorrect instruction?");
+
+ // Now validate any DPValues in the marker.
+ for (DPValue &DPV : CurrentDebugMarker->getDbgValueRange()) {
+ // Validate DebugProgramValues.
+ TestFailure(DPV.getMarker() == CurrentDebugMarker,
+ "Not pointing at correct next marker!");
+
+ // Verify that no DbgValues appear prior to PHIs.
+ TestFailure(
+ !isa<PHINode>(It),
+ "DebugProgramValues must not appear before PHI nodes in a block!");
+ }
+ }
+
+ // Except transiently when removing + re-inserting the block terminator, there
+ // should be no trailing DPValues.
+ TestFailure(!getTrailingDPValues(), "Trailing DPValues in block");
+ return RetVal;
+}
+
+#ifndef NDEBUG
+void BasicBlock::dumpDbgValues() const {
+ for (auto &Inst : *this) {
+ if (!Inst.DbgMarker)
+ continue;
+
+ dbgs() << "@ " << Inst.DbgMarker << " ";
+ Inst.DbgMarker->dump();
+ };
+}
+#endif
+
+void BasicBlock::setIsNewDbgInfoFormat(bool NewFlag) {
+ if (NewFlag && !IsNewDbgInfoFormat)
+ convertToNewDbgValues();
+ else if (!NewFlag && IsNewDbgInfoFormat)
+ convertFromNewDbgValues();
+}
+
ValueSymbolTable *BasicBlock::getValueSymbolTable() {
if (Function *F = getParent())
return F->getValueSymbolTable();
@@ -42,11 +217,13 @@ template <> void llvm::invalidateParentIListOrdering(BasicBlock *BB) {
// Explicit instantiation of SymbolTableListTraits since some of the methods
// are not in the public header file...
-template class llvm::SymbolTableListTraits<Instruction>;
+template class llvm::SymbolTableListTraits<Instruction,
+ ilist_iterator_bits<true>>;
BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
BasicBlock *InsertBefore)
- : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(nullptr) {
+ : Value(Type::getLabelTy(C), Value::BasicBlockVal),
+ IsNewDbgInfoFormat(false), Parent(nullptr) {
if (NewParent)
insertInto(NewParent, InsertBefore);
@@ -55,12 +232,16 @@ BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
"Cannot insert block before another block with no function!");
setName(Name);
+ if (NewParent)
+ setIsNewDbgInfoFormat(NewParent->IsNewDbgInfoFormat);
}
void BasicBlock::insertInto(Function *NewParent, BasicBlock *InsertBefore) {
assert(NewParent && "Expected a parent");
assert(!Parent && "Already has a parent");
+ setIsNewDbgInfoFormat(NewParent->IsNewDbgInfoFormat);
+
if (InsertBefore)
NewParent->insert(InsertBefore->getIterator(), this);
else
@@ -90,6 +271,11 @@ BasicBlock::~BasicBlock() {
assert(getParent() == nullptr && "BasicBlock still linked into the program!");
dropAllReferences();
+ for (auto &Inst : *this) {
+ if (!Inst.DbgMarker)
+ continue;
+ Inst.DbgMarker->eraseFromParent();
+ }
InstList.clear();
}
@@ -220,6 +406,16 @@ const Instruction* BasicBlock::getFirstNonPHI() const {
return nullptr;
}
+BasicBlock::const_iterator BasicBlock::getFirstNonPHIIt() const {
+ const Instruction *I = getFirstNonPHI();
+ BasicBlock::const_iterator It = I->getIterator();
+ // Set the head-inclusive bit to indicate that this iterator includes
+ // any debug-info at the start of the block. This is a no-op unless the
+ // appropriate CMake flag is set.
+ It.setHeadBit(true);
+ return It;
+}
+
const Instruction *BasicBlock::getFirstNonPHIOrDbg(bool SkipPseudoOp) const {
for (const Instruction &I : *this) {
if (isa<PHINode>(I) || isa<DbgInfoIntrinsic>(I))
@@ -257,6 +453,10 @@ BasicBlock::const_iterator BasicBlock::getFirstInsertionPt() const {
const_iterator InsertPt = FirstNonPHI->getIterator();
if (InsertPt->isEHPad()) ++InsertPt;
+ // Set the head-inclusive bit to indicate that this iterator includes
+ // any debug-info at the start of the block. This is a no-op unless the
+ // appropriate CMake flag is set.
+ InsertPt.setHeadBit(true);
return InsertPt;
}
@@ -396,8 +596,9 @@ bool BasicBlock::isLegalToHoistInto() const {
// If the block has no successors, there can be no instructions to hoist.
assert(Term->getNumSuccessors() > 0);
- // Instructions should not be hoisted across exception handling boundaries.
- return !Term->isExceptionalTerminator();
+ // Instructions should not be hoisted across special terminators, which may
+ // have side effects or return values.
+ return !Term->isSpecialTerminator();
}
bool BasicBlock::isEntryBlock() const {
@@ -419,7 +620,7 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName,
this->getNextNode());
// Save DebugLoc of split point before invalidating iterator.
- DebugLoc Loc = I->getDebugLoc();
+ DebugLoc Loc = I->getStableDebugLoc();
// Move all of the specified instructions from the original basic block into
// the new basic block.
New->splice(New->end(), this, I, end());
@@ -475,18 +676,6 @@ BasicBlock *BasicBlock::splitBasicBlockBefore(iterator I, const Twine &BBName) {
return New;
}
-void BasicBlock::splice(BasicBlock::iterator ToIt, BasicBlock *FromBB,
- BasicBlock::iterator FromBeginIt,
- BasicBlock::iterator FromEndIt) {
-#ifdef EXPENSIVE_CHECKS
- // Check that FromBeginIt is befor FromEndIt.
- auto FromBBEnd = FromBB->end();
- for (auto It = FromBeginIt; It != FromEndIt; ++It)
- assert(It != FromBBEnd && "FromBeginIt not before FromEndIt!");
-#endif // EXPENSIVE_CHECKS
- getInstList().splice(ToIt, FromBB->getInstList(), FromBeginIt, FromEndIt);
-}
-
BasicBlock::iterator BasicBlock::erase(BasicBlock::iterator FromIt,
BasicBlock::iterator ToIt) {
return InstList.erase(FromIt, ToIt);
@@ -558,6 +747,420 @@ void BasicBlock::renumberInstructions() {
NumInstrRenumberings++;
}
+void BasicBlock::flushTerminatorDbgValues() {
+ // If we erase the terminator in a block, any DPValues will sink and "fall
+ // off the end", existing after any terminator that gets inserted. With
+ // dbg.value intrinsics we would just insert the terminator at end() and
+ // the dbg.values would come before the terminator. With DPValues, we must
+ // do this manually.
+ // To get out of this unfortunate form, whenever we insert a terminator,
+ // check whether there's anything trailing at the end and move those DPValues
+ // in front of the terminator.
+
+ // Do nothing if we're not in new debug-info format.
+ if (!IsNewDbgInfoFormat)
+ return;
+
+ // If there's no terminator, there's nothing to do.
+ Instruction *Term = getTerminator();
+ if (!Term)
+ return;
+
+ // Are there any dangling DPValues?
+ DPMarker *TrailingDPValues = getTrailingDPValues();
+ if (!TrailingDPValues)
+ return;
+
+ // Transfer DPValues from the trailing position onto the terminator.
+ Term->DbgMarker->absorbDebugValues(*TrailingDPValues, false);
+ TrailingDPValues->eraseFromParent();
+ deleteTrailingDPValues();
+}
+
+void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest,
+ BasicBlock *Src,
+ BasicBlock::iterator First,
+ BasicBlock::iterator Last) {
+ // Imagine the folowing:
+ //
+ // bb1:
+ // dbg.value(...
+ // ret i32 0
+ //
+ // If an optimisation pass attempts to splice the contents of the block from
+ // BB1->begin() to BB1->getTerminator(), then the dbg.value will be
+ // transferred to the destination.
+ // However, in the "new" DPValue format for debug-info, that range is empty:
+ // begin() returns an iterator to the terminator, as there will only be a
+ // single instruction in the block. We must piece together from the bits set
+ // in the iterators whether there was the intention to transfer any debug
+ // info.
+
+ // If we're not in "new" debug-info format, do nothing.
+ if (!IsNewDbgInfoFormat)
+ return;
+
+ assert(First == Last);
+ bool InsertAtHead = Dest.getHeadBit();
+ bool ReadFromHead = First.getHeadBit();
+
+ // If the source block is completely empty, including no terminator, then
+ // transfer any trailing DPValues that are still hanging around. This can
+ // occur when a block is optimised away and the terminator has been moved
+ // somewhere else.
+ if (Src->empty()) {
+ assert(Dest != end() &&
+ "Transferring trailing DPValues to another trailing position");
+ DPMarker *SrcTrailingDPValues = Src->getTrailingDPValues();
+ if (!SrcTrailingDPValues)
+ return;
+
+ DPMarker *M = Dest->DbgMarker;
+ M->absorbDebugValues(*SrcTrailingDPValues, InsertAtHead);
+ SrcTrailingDPValues->eraseFromParent();
+ Src->deleteTrailingDPValues();
+ return;
+ }
+
+ // There are instructions in this block; if the First iterator was
+ // with begin() / getFirstInsertionPt() then the caller intended debug-info
+ // at the start of the block to be transferred.
+ if (!Src->empty() && First == Src->begin() && ReadFromHead)
+ Dest->DbgMarker->absorbDebugValues(*First->DbgMarker, InsertAtHead);
+
+ return;
+}
+
+void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
+ BasicBlock::iterator First,
+ BasicBlock::iterator Last) {
+ /* Do a quick normalisation before calling the real splice implementation. We
+ might be operating on a degenerate basic block that has no instructions
+ in it, a legitimate transient state. In that case, Dest will be end() and
+ any DPValues temporarily stored in the TrailingDPValues map in LLVMContext.
+ We might illustrate it thus:
+
+ Dest
+ |
+ this-block: ~~~~~~~~
+ Src-block: ++++B---B---B---B:::C
+ | |
+ First Last
+
+ However: does the caller expect the "~" DPValues to end up before or after
+ the spliced segment? This is communciated in the "Head" bit of Dest, which
+ signals whether the caller called begin() or end() on this block.
+
+ If the head bit is set, then all is well, we leave DPValues trailing just
+ like how dbg.value instructions would trail after instructions spliced to
+ the beginning of this block.
+
+ If the head bit isn't set, then try to jam the "~" DPValues onto the front
+ of the First instruction, then splice like normal, which joins the "~"
+ DPValues with the "+" DPValues. However if the "+" DPValues are supposed to
+ be left behind in Src, then:
+ * detach the "+" DPValues,
+ * move the "~" DPValues onto First,
+ * splice like normal,
+ * replace the "+" DPValues onto the Last position.
+ Complicated, but gets the job done. */
+
+ // If we're inserting at end(), and not in front of dangling DPValues, then
+ // move the DPValues onto "First". They'll then be moved naturally in the
+ // splice process.
+ DPMarker *MoreDanglingDPValues = nullptr;
+ DPMarker *OurTrailingDPValues = getTrailingDPValues();
+ if (Dest == end() && !Dest.getHeadBit() && OurTrailingDPValues) {
+ // Are the "+" DPValues not supposed to move? If so, detach them
+ // temporarily.
+ if (!First.getHeadBit() && First->hasDbgValues()) {
+ MoreDanglingDPValues = Src->getMarker(First);
+ MoreDanglingDPValues->removeFromParent();
+ }
+
+ if (First->hasDbgValues()) {
+ DPMarker *CurMarker = Src->getMarker(First);
+ // Place them at the front, it would look like this:
+ // Dest
+ // |
+ // this-block:
+ // Src-block: ~~~~~~~~++++B---B---B---B:::C
+ // | |
+ // First Last
+ CurMarker->absorbDebugValues(*OurTrailingDPValues, true);
+ OurTrailingDPValues->eraseFromParent();
+ } else {
+ // No current marker, create one and absorb in. (FIXME: we can avoid an
+ // allocation in the future).
+ DPMarker *CurMarker = Src->createMarker(&*First);
+ CurMarker->absorbDebugValues(*OurTrailingDPValues, false);
+ OurTrailingDPValues->eraseFromParent();
+ }
+ deleteTrailingDPValues();
+ First.setHeadBit(true);
+ }
+
+ // Call the main debug-info-splicing implementation.
+ spliceDebugInfoImpl(Dest, Src, First, Last);
+
+ // Do we have some "+" DPValues hanging around that weren't supposed to move,
+ // and we detached to make things easier?
+ if (!MoreDanglingDPValues)
+ return;
+
+ // FIXME: we could avoid an allocation here sometimes.
+ DPMarker *LastMarker = Src->createMarker(Last);
+ LastMarker->absorbDebugValues(*MoreDanglingDPValues, true);
+ MoreDanglingDPValues->eraseFromParent();
+}
+
+void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
+ BasicBlock::iterator First,
+ BasicBlock::iterator Last) {
+ // Find out where to _place_ these dbg.values; if InsertAtHead is specified,
+ // this will be at the start of Dest's debug value range, otherwise this is
+ // just Dest's marker.
+ bool InsertAtHead = Dest.getHeadBit();
+ bool ReadFromHead = First.getHeadBit();
+ // Use this flag to signal the abnormal case, where we don't want to copy the
+ // DPValues ahead of the "Last" position.
+ bool ReadFromTail = !Last.getTailBit();
+ bool LastIsEnd = (Last == Src->end());
+
+ /*
+ Here's an illustration of what we're about to do. We have two blocks, this
+ and Src, and two segments of list. Each instruction is marked by a capital
+ while potential DPValue debug-info is marked out by "-" characters and a few
+ other special characters (+:=) where I want to highlight what's going on.
+
+ Dest
+ |
+ this-block: A----A----A ====A----A----A----A---A---A
+ Src-block ++++B---B---B---B:::C
+ | |
+ First Last
+
+ The splice method is going to take all the instructions from First up to
+ (but not including) Last and insert them in _front_ of Dest, forming one
+ long list. All the DPValues attached to instructions _between_ First and
+ Last need no maintenence. However, we have to do special things with the
+ DPValues marked with the +:= characters. We only have three positions:
+ should the "+" DPValues be transferred, and if so to where? Do we move the
+ ":" DPValues? Would they go in front of the "=" DPValues, or should the "="
+ DPValues go before "+" DPValues?
+
+ We're told which way it should be by the bits carried in the iterators. The
+ "Head" bit indicates whether the specified position is supposed to be at the
+ front of the attached DPValues (true) or not (false). The Tail bit is true
+ on the other end of a range: is the range intended to include DPValues up to
+ the end (false) or not (true).
+
+ FIXME: the tail bit doesn't need to be distinct from the head bit, we could
+ combine them.
+
+ Here are some examples of different configurations:
+
+ Dest.Head = true, First.Head = true, Last.Tail = false
+
+ this-block: A----A----A++++B---B---B---B:::====A----A----A----A---A---A
+ | |
+ First Dest
+
+ Wheras if we didn't want to read from the Src list,
+
+ Dest.Head = true, First.Head = false, Last.Tail = false
+
+ this-block: A----A----AB---B---B---B:::====A----A----A----A---A---A
+ | |
+ First Dest
+
+ Or if we didn't want to insert at the head of Dest:
+
+ Dest.Head = false, First.Head = false, Last.Tail = false
+
+ this-block: A----A----A====B---B---B---B:::A----A----A----A---A---A
+ | |
+ First Dest
+
+ Tests for these various configurations can be found in the unit test file
+ BasicBlockDbgInfoTest.cpp.
+
+ */
+
+ // Detach the marker at Dest -- this lets us move the "====" DPValues around.
+ DPMarker *DestMarker = nullptr;
+ if (Dest != end()) {
+ DestMarker = getMarker(Dest);
+ DestMarker->removeFromParent();
+ createMarker(&*Dest);
+ }
+
+ // If we're moving the tail range of DPValues (":::"), absorb them into the
+ // front of the DPValues at Dest.
+ if (ReadFromTail && Src->getMarker(Last)) {
+ DPMarker *OntoDest = getMarker(Dest);
+ DPMarker *FromLast = Src->getMarker(Last);
+ OntoDest->absorbDebugValues(*FromLast, true);
+ if (LastIsEnd) {
+ FromLast->eraseFromParent();
+ Src->deleteTrailingDPValues();
+ }
+ }
+
+ // If we're _not_ reading from the head of First, i.e. the "++++" DPValues,
+ // move their markers onto Last. They remain in the Src block. No action
+ // needed.
+ if (!ReadFromHead && First->hasDbgValues()) {
+ DPMarker *OntoLast = Src->createMarker(Last);
+ DPMarker *FromFirst = Src->createMarker(First);
+ OntoLast->absorbDebugValues(*FromFirst,
+ true); // Always insert at head of it.
+ }
+
+ // Finally, do something with the "====" DPValues we detached.
+ if (DestMarker) {
+ if (InsertAtHead) {
+ // Insert them at the end of the DPValues at Dest. The "::::" DPValues
+ // might be in front of them.
+ DPMarker *NewDestMarker = getMarker(Dest);
+ NewDestMarker->absorbDebugValues(*DestMarker, false);
+ } else {
+ // Insert them right at the start of the range we moved, ahead of First
+ // and the "++++" DPValues.
+ DPMarker *FirstMarker = getMarker(First);
+ FirstMarker->absorbDebugValues(*DestMarker, true);
+ }
+ DestMarker->eraseFromParent();
+ } else if (Dest == end() && !InsertAtHead) {
+ // In the rare circumstance where we insert at end(), and we did not
+ // generate the iterator with begin() / getFirstInsertionPt(), it means
+ // any trailing debug-info at the end of the block would "normally" have
+ // been pushed in front of "First". Move it there now.
+ DPMarker *FirstMarker = getMarker(First);
+ DPMarker *TrailingDPValues = getTrailingDPValues();
+ if (TrailingDPValues) {
+ FirstMarker->absorbDebugValues(*TrailingDPValues, true);
+ TrailingDPValues->eraseFromParent();
+ deleteTrailingDPValues();
+ }
+ }
+}
+
+void BasicBlock::splice(iterator Dest, BasicBlock *Src, iterator First,
+ iterator Last) {
+ assert(Src->IsNewDbgInfoFormat == IsNewDbgInfoFormat);
+
+#ifdef EXPENSIVE_CHECKS
+ // Check that First is before Last.
+ auto FromBBEnd = Src->end();
+ for (auto It = First; It != Last; ++It)
+ assert(It != FromBBEnd && "FromBeginIt not before FromEndIt!");
+#endif // EXPENSIVE_CHECKS
+
+ // Lots of horrible special casing for empty transfers: the dbg.values between
+ // two positions could be spliced in dbg.value mode.
+ if (First == Last) {
+ spliceDebugInfoEmptyBlock(Dest, Src, First, Last);
+ return;
+ }
+
+ // Handle non-instr debug-info specific juggling.
+ if (IsNewDbgInfoFormat)
+ spliceDebugInfo(Dest, Src, First, Last);
+
+ // And move the instructions.
+ getInstList().splice(Dest, Src->getInstList(), First, Last);
+
+ flushTerminatorDbgValues();
+}
+
+void BasicBlock::insertDPValueAfter(DPValue *DPV, Instruction *I) {
+ assert(IsNewDbgInfoFormat);
+ assert(I->getParent() == this);
+
+ iterator NextIt = std::next(I->getIterator());
+ DPMarker *NextMarker = getMarker(NextIt);
+ if (!NextMarker)
+ NextMarker = createMarker(NextIt);
+ NextMarker->insertDPValue(DPV, true);
+}
+
+void BasicBlock::insertDPValueBefore(DPValue *DPV,
+ InstListType::iterator Where) {
+ // We should never directly insert at the end of the block, new DPValues
+ // shouldn't be generated at times when there's no terminator.
+ assert(Where != end());
+ assert(Where->getParent() == this);
+ if (!Where->DbgMarker)
+ createMarker(Where);
+ bool InsertAtHead = Where.getHeadBit();
+ Where->DbgMarker->insertDPValue(DPV, InsertAtHead);
+}
+
+DPMarker *BasicBlock::getNextMarker(Instruction *I) {
+ return getMarker(std::next(I->getIterator()));
+}
+
+DPMarker *BasicBlock::getMarker(InstListType::iterator It) {
+ if (It == end()) {
+ DPMarker *DPM = getTrailingDPValues();
+ return DPM;
+ }
+ return It->DbgMarker;
+}
+
+void BasicBlock::reinsertInstInDPValues(
+ Instruction *I, std::optional<DPValue::self_iterator> Pos) {
+ // "I" was originally removed from a position where it was
+ // immediately in front of Pos. Any DPValues on that position then "fell down"
+ // onto Pos. "I" has been re-inserted at the front of that wedge of DPValues,
+ // shuffle them around to represent the original positioning. To illustrate:
+ //
+ // Instructions: I1---I---I0
+ // DPValues: DDD DDD
+ //
+ // Instruction "I" removed,
+ //
+ // Instructions: I1------I0
+ // DPValues: DDDDDD
+ // ^Pos
+ //
+ // Instruction "I" re-inserted (now):
+ //
+ // Instructions: I1---I------I0
+ // DPValues: DDDDDD
+ // ^Pos
+ //
+ // After this method completes:
+ //
+ // Instructions: I1---I---I0
+ // DPValues: DDD DDD
+
+ // This happens if there were no DPValues on I0. Are there now DPValues there?
+ if (!Pos) {
+ DPMarker *NextMarker = getNextMarker(I);
+ if (!NextMarker)
+ return;
+ if (NextMarker->StoredDPValues.empty())
+ return;
+ // There are DPMarkers there now -- they fell down from "I".
+ DPMarker *ThisMarker = createMarker(I);
+ ThisMarker->absorbDebugValues(*NextMarker, false);
+ return;
+ }
+
+ // Is there even a range of DPValues to move?
+ DPMarker *DPM = (*Pos)->getMarker();
+ auto Range = make_range(DPM->StoredDPValues.begin(), (*Pos));
+ if (Range.begin() == Range.end())
+ return;
+
+ // Otherwise: splice.
+ DPMarker *ThisMarker = createMarker(I);
+ assert(ThisMarker->StoredDPValues.empty());
+ ThisMarker->absorbDebugValues(Range, *DPM, true);
+}
+
#ifndef NDEBUG
/// In asserts builds, this checks the numbering. In non-asserts builds, it
/// is defined as a no-op inline function in BasicBlock.h.
@@ -572,3 +1175,16 @@ void BasicBlock::validateInstrOrdering() const {
}
}
#endif
+
+void BasicBlock::setTrailingDPValues(DPMarker *foo) {
+ getContext().pImpl->setTrailingDPValues(this, foo);
+}
+
+DPMarker *BasicBlock::getTrailingDPValues() {
+ return getContext().pImpl->getTrailingDPValues(this);
+}
+
+void BasicBlock::deleteTrailingDPValues() {
+ getContext().pImpl->deleteTrailingDPValues(this);
+}
+
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
index 4c3325063c09..d499d74f7ba0 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
@@ -37,45 +37,6 @@ using namespace llvm::PatternMatch;
// ConstantFold*Instruction Implementations
//===----------------------------------------------------------------------===//
-/// Convert the specified vector Constant node to the specified vector type.
-/// At this point, we know that the elements of the input vector constant are
-/// all simple integer or FP values.
-static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) {
-
- if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
- if (CV->isNullValue()) return Constant::getNullValue(DstTy);
-
- // Do not iterate on scalable vector. The num of elements is unknown at
- // compile-time.
- if (isa<ScalableVectorType>(DstTy))
- return nullptr;
-
- // If this cast changes element count then we can't handle it here:
- // doing so requires endianness information. This should be handled by
- // Analysis/ConstantFolding.cpp
- unsigned NumElts = cast<FixedVectorType>(DstTy)->getNumElements();
- if (NumElts != cast<FixedVectorType>(CV->getType())->getNumElements())
- return nullptr;
-
- Type *DstEltTy = DstTy->getElementType();
- // Fast path for splatted constants.
- if (Constant *Splat = CV->getSplatValue()) {
- return ConstantVector::getSplat(DstTy->getElementCount(),
- ConstantExpr::getBitCast(Splat, DstEltTy));
- }
-
- SmallVector<Constant*, 16> Result;
- Type *Ty = IntegerType::get(CV->getContext(), 32);
- for (unsigned i = 0; i != NumElts; ++i) {
- Constant *C =
- ConstantExpr::getExtractElement(CV, ConstantInt::get(Ty, i));
- C = ConstantExpr::getBitCast(C, DstEltTy);
- Result.push_back(C);
- }
-
- return ConstantVector::get(Result);
-}
-
/// This function determines which opcode to use to fold two constant cast
/// expressions together. It uses CastInst::isEliminableCastPair to determine
/// the opcode. Consequently its just a wrapper around that function.
@@ -114,38 +75,19 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
// Handle casts from one vector constant to another. We know that the src
// and dest type have the same size (otherwise its an illegal cast).
if (VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
- if (VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
- assert(DestPTy->getPrimitiveSizeInBits() ==
- SrcTy->getPrimitiveSizeInBits() &&
- "Not cast between same sized vectors!");
- SrcTy = nullptr;
- // First, check for null. Undef is already handled.
- if (isa<ConstantAggregateZero>(V))
- return Constant::getNullValue(DestTy);
-
- // Handle ConstantVector and ConstantAggregateVector.
- return BitCastConstantVector(V, DestPTy);
- }
+ if (V->isAllOnesValue())
+ return Constant::getAllOnesValue(DestTy);
// Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
// This allows for other simplifications (although some of them
// can only be handled by Analysis/ConstantFolding.cpp).
if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy);
+ return nullptr;
}
- // Finally, implement bitcast folding now. The code below doesn't handle
- // bitcast right.
- if (isa<ConstantPointerNull>(V)) // ptr->ptr cast.
- return ConstantPointerNull::get(cast<PointerType>(DestTy));
-
// Handle integral constant input.
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- if (DestTy->isIntegerTy())
- // Integral -> Integral. This is a no-op because the bit widths must
- // be the same. Consequently, we just fold to V.
- return V;
-
// See note below regarding the PPC_FP128 restriction.
if (DestTy->isFloatingPointTy() && !DestTy->isPPC_FP128Ty())
return ConstantFP::get(DestTy->getContext(),
@@ -192,7 +134,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
assert(C->getType()->isIntegerTy() &&
(cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
"Non-byte sized integer input");
- unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
+ [[maybe_unused]] unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
assert(ByteSize && "Must be accessing some piece");
assert(ByteStart+ByteSize <= CSize && "Extracting invalid piece from input");
assert(ByteSize != CSize && "Should not extract everything");
@@ -213,58 +155,6 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
switch (CE->getOpcode()) {
default: return nullptr;
- case Instruction::Or: {
- Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
- if (!RHS)
- return nullptr;
-
- // X | -1 -> -1.
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS))
- if (RHSC->isMinusOne())
- return RHSC;
-
- Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
- if (!LHS)
- return nullptr;
- return ConstantExpr::getOr(LHS, RHS);
- }
- case Instruction::And: {
- Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
- if (!RHS)
- return nullptr;
-
- // X & 0 -> 0.
- if (RHS->isNullValue())
- return RHS;
-
- Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
- if (!LHS)
- return nullptr;
- return ConstantExpr::getAnd(LHS, RHS);
- }
- case Instruction::LShr: {
- ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
- if (!Amt)
- return nullptr;
- APInt ShAmt = Amt->getValue();
- // Cannot analyze non-byte shifts.
- if ((ShAmt & 7) != 0)
- return nullptr;
- ShAmt.lshrInPlace(3);
-
- // If the extract is known to be all zeros, return zero.
- if (ShAmt.uge(CSize - ByteStart))
- return Constant::getNullValue(
- IntegerType::get(CE->getContext(), ByteSize * 8));
- // If the extract is known to be fully in the input, extract it.
- if (ShAmt.ule(CSize - (ByteStart + ByteSize)))
- return ExtractConstantBytes(CE->getOperand(0),
- ByteStart + ShAmt.getZExtValue(), ByteSize);
-
- // TODO: Handle the 'partially zero' case.
- return nullptr;
- }
-
case Instruction::Shl: {
ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
if (!Amt)
@@ -287,43 +177,16 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
// TODO: Handle the 'partially zero' case.
return nullptr;
}
-
- case Instruction::ZExt: {
- unsigned SrcBitSize =
- cast<IntegerType>(CE->getOperand(0)->getType())->getBitWidth();
-
- // If extracting something that is completely zero, return 0.
- if (ByteStart*8 >= SrcBitSize)
- return Constant::getNullValue(IntegerType::get(CE->getContext(),
- ByteSize*8));
-
- // If exactly extracting the input, return it.
- if (ByteStart == 0 && ByteSize*8 == SrcBitSize)
- return CE->getOperand(0);
-
- // If extracting something completely in the input, if the input is a
- // multiple of 8 bits, recurse.
- if ((SrcBitSize&7) == 0 && (ByteStart+ByteSize)*8 <= SrcBitSize)
- return ExtractConstantBytes(CE->getOperand(0), ByteStart, ByteSize);
-
- // Otherwise, if extracting a subset of the input, which is not multiple of
- // 8 bits, do a shift and trunc to get the bits.
- if ((ByteStart+ByteSize)*8 < SrcBitSize) {
- assert((SrcBitSize&7) && "Shouldn't get byte sized case here");
- Constant *Res = CE->getOperand(0);
- if (ByteStart)
- Res = ConstantExpr::getLShr(Res,
- ConstantInt::get(Res->getType(), ByteStart*8));
- return ConstantExpr::getTrunc(Res, IntegerType::get(C->getContext(),
- ByteSize*8));
- }
-
- // TODO: Handle the 'partially zero' case.
- return nullptr;
- }
}
}
+static Constant *foldMaybeUndesirableCast(unsigned opc, Constant *V,
+ Type *DestTy) {
+ return ConstantExpr::isDesirableCastOp(opc)
+ ? ConstantExpr::getCast(opc, V, DestTy)
+ : ConstantFoldCastInstruction(opc, V, DestTy);
+}
+
Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
Type *DestTy) {
if (isa<PoisonValue>(V))
@@ -349,29 +212,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
if (CE->isCast()) {
// Try hard to fold cast of cast because they are often eliminable.
if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
- return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy);
- } else if (CE->getOpcode() == Instruction::GetElementPtr &&
- // Do not fold addrspacecast (gep 0, .., 0). It might make the
- // addrspacecast uncanonicalized.
- opc != Instruction::AddrSpaceCast &&
- // Do not fold bitcast (gep) with inrange index, as this loses
- // information.
- !cast<GEPOperator>(CE)->getInRangeIndex() &&
- // Do not fold if the gep type is a vector, as bitcasting
- // operand 0 of a vector gep will result in a bitcast between
- // different sizes.
- !CE->getType()->isVectorTy()) {
- // If all of the indexes in the GEP are null values, there is no pointer
- // adjustment going on. We might as well cast the source pointer.
- bool isAllNull = true;
- for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
- if (!CE->getOperand(i)->isNullValue()) {
- isAllNull = false;
- break;
- }
- if (isAllNull)
- // This is casting one pointer type to another, always BitCast
- return ConstantExpr::getPointerCast(CE->getOperand(0), DestTy);
+ return foldMaybeUndesirableCast(newOpc, CE->getOperand(0), DestTy);
}
}
@@ -386,18 +227,22 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
Type *DstEltTy = DestVecTy->getElementType();
// Fast path for splatted constants.
if (Constant *Splat = V->getSplatValue()) {
+ Constant *Res = foldMaybeUndesirableCast(opc, Splat, DstEltTy);
+ if (!Res)
+ return nullptr;
return ConstantVector::getSplat(
- cast<VectorType>(DestTy)->getElementCount(),
- ConstantExpr::getCast(opc, Splat, DstEltTy));
+ cast<VectorType>(DestTy)->getElementCount(), Res);
}
SmallVector<Constant *, 16> res;
Type *Ty = IntegerType::get(V->getContext(), 32);
for (unsigned i = 0,
e = cast<FixedVectorType>(V->getType())->getNumElements();
i != e; ++i) {
- Constant *C =
- ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i));
- res.push_back(ConstantExpr::getCast(opc, C, DstEltTy));
+ Constant *C = ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i));
+ Constant *Casted = foldMaybeUndesirableCast(opc, C, DstEltTy);
+ if (!Casted)
+ return nullptr;
+ res.push_back(Casted);
}
return ConstantVector::get(res);
}
@@ -433,16 +278,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
return ConstantInt::get(FPC->getContext(), IntVal);
}
return nullptr; // Can't fold.
- case Instruction::IntToPtr: //always treated as unsigned
- if (V->isNullValue()) // Is it an integral null value?
- return ConstantPointerNull::get(cast<PointerType>(DestTy));
- return nullptr; // Other pointer types cannot be casted
- case Instruction::PtrToInt: // always treated as unsigned
- // Is it a null pointer value?
- if (V->isNullValue())
- return ConstantInt::get(DestTy, 0);
- // Other pointer types cannot be casted
- return nullptr;
case Instruction::UIToFP:
case Instruction::SIToFP:
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -491,6 +326,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
case Instruction::BitCast:
return FoldBitCast(V, DestTy);
case Instruction::AddrSpaceCast:
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
return nullptr;
}
}
@@ -1004,16 +841,6 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
return C1; // X & -1 == X
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
- // (zext i32 to i64) & 4294967295 -> (zext i32 to i64)
- if (CE1->getOpcode() == Instruction::ZExt) {
- unsigned DstWidth = CI2->getType()->getBitWidth();
- unsigned SrcWidth =
- CE1->getOperand(0)->getType()->getPrimitiveSizeInBits();
- APInt PossiblySetBits(APInt::getLowBitsSet(DstWidth, SrcWidth));
- if ((PossiblySetBits & CI2->getValue()) == PossiblySetBits)
- return C1;
- }
-
// If and'ing the address of a global with a constant, fold it.
if (CE1->getOpcode() == Instruction::PtrToInt &&
isa<GlobalValue>(CE1->getOperand(0))) {
@@ -1074,17 +901,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
}
}
break;
- case Instruction::AShr:
- // ashr (zext C to Ty), C2 -> lshr (zext C, CSA), C2
- if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
- if (CE1->getOpcode() == Instruction::ZExt) // Top bits known zero.
- return ConstantExpr::getLShr(C1, C2);
- break;
}
} else if (isa<ConstantInt>(C1)) {
// If C1 is a ConstantInt and C2 is not, swap the operands.
if (Instruction::isCommutative(Opcode))
- return ConstantExpr::get(Opcode, C2, C1);
+ return ConstantExpr::isDesirableBinOp(Opcode)
+ ? ConstantExpr::get(Opcode, C2, C1)
+ : ConstantFoldBinaryInstruction(Opcode, C2, C1);
}
if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
@@ -1241,8 +1064,6 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
case Instruction::Add:
case Instruction::Sub:
return ConstantExpr::getXor(C1, C2);
- case Instruction::Mul:
- return ConstantExpr::getAnd(C1, C2);
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
@@ -1268,70 +1089,6 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
return nullptr;
}
-/// This function determines if there is anything we can decide about the two
-/// constants provided. This doesn't need to handle simple things like
-/// ConstantFP comparisons, but should instead handle ConstantExprs.
-/// If we can determine that the two constants have a particular relation to
-/// each other, we should return the corresponding FCmpInst predicate,
-/// otherwise return FCmpInst::BAD_FCMP_PREDICATE. This is used below in
-/// ConstantFoldCompareInstruction.
-///
-/// To simplify this code we canonicalize the relation so that the first
-/// operand is always the most "complex" of the two. We consider ConstantFP
-/// to be the simplest, and ConstantExprs to be the most complex.
-static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
- assert(V1->getType() == V2->getType() &&
- "Cannot compare values of different types!");
-
- // We do not know if a constant expression will evaluate to a number or NaN.
- // Therefore, we can only say that the relation is unordered or equal.
- if (V1 == V2) return FCmpInst::FCMP_UEQ;
-
- if (!isa<ConstantExpr>(V1)) {
- if (!isa<ConstantExpr>(V2)) {
- // Simple case, use the standard constant folder.
- ConstantInt *R = nullptr;
- R = dyn_cast<ConstantInt>(
- ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
- if (R && !R->isZero())
- return FCmpInst::FCMP_OEQ;
- R = dyn_cast<ConstantInt>(
- ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, V1, V2));
- if (R && !R->isZero())
- return FCmpInst::FCMP_OLT;
- R = dyn_cast<ConstantInt>(
- ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, V1, V2));
- if (R && !R->isZero())
- return FCmpInst::FCMP_OGT;
-
- // Nothing more we can do
- return FCmpInst::BAD_FCMP_PREDICATE;
- }
-
- // If the first operand is simple and second is ConstantExpr, swap operands.
- FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
- if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
- return FCmpInst::getSwappedPredicate(SwappedRelation);
- } else {
- // Ok, the LHS is known to be a constantexpr. The RHS can be any of a
- // constantexpr or a simple constant.
- ConstantExpr *CE1 = cast<ConstantExpr>(V1);
- switch (CE1->getOpcode()) {
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- // We might be able to do something with these but we don't right now.
- break;
- default:
- break;
- }
- }
- // There are MANY other foldings that we could perform here. They will
- // probably be added on demand, as they seem needed.
- return FCmpInst::BAD_FCMP_PREDICATE;
-}
-
static ICmpInst::Predicate areGlobalsPotentiallyEqual(const GlobalValue *GV1,
const GlobalValue *GV2) {
auto isGlobalUnsafeForEquality = [](const GlobalValue *GV) {
@@ -1362,66 +1119,54 @@ static ICmpInst::Predicate areGlobalsPotentiallyEqual(const GlobalValue *GV1,
/// If we can determine that the two constants have a particular relation to
/// each other, we should return the corresponding ICmp predicate, otherwise
/// return ICmpInst::BAD_ICMP_PREDICATE.
-///
-/// To simplify this code we canonicalize the relation so that the first
-/// operand is always the most "complex" of the two. We consider simple
-/// constants (like ConstantInt) to be the simplest, followed by
-/// GlobalValues, followed by ConstantExpr's (the most complex).
-///
-static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
- bool isSigned) {
+static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2) {
assert(V1->getType() == V2->getType() &&
"Cannot compare different types of values!");
if (V1 == V2) return ICmpInst::ICMP_EQ;
- if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1) &&
- !isa<BlockAddress>(V1)) {
- if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2) &&
- !isa<BlockAddress>(V2)) {
- // We distilled this down to a simple case, use the standard constant
- // folder.
- ConstantInt *R = nullptr;
- ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
- R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
- if (R && !R->isZero())
- return pred;
- pred = isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
- R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
- if (R && !R->isZero())
- return pred;
- pred = isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
- R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
- if (R && !R->isZero())
- return pred;
-
- // If we couldn't figure it out, bail.
- return ICmpInst::BAD_ICMP_PREDICATE;
- }
-
- // If the first operand is simple, swap operands.
- ICmpInst::Predicate SwappedRelation =
- evaluateICmpRelation(V2, V1, isSigned);
+ // The following folds only apply to pointers.
+ if (!V1->getType()->isPointerTy())
+ return ICmpInst::BAD_ICMP_PREDICATE;
+
+ // To simplify this code we canonicalize the relation so that the first
+ // operand is always the most "complex" of the two. We consider simple
+ // constants (like ConstantPointerNull) to be the simplest, followed by
+ // BlockAddress, GlobalValues, and ConstantExpr's (the most complex).
+ auto GetComplexity = [](Constant *V) {
+ if (isa<ConstantExpr>(V))
+ return 3;
+ if (isa<GlobalValue>(V))
+ return 2;
+ if (isa<BlockAddress>(V))
+ return 1;
+ return 0;
+ };
+ if (GetComplexity(V1) < GetComplexity(V2)) {
+ ICmpInst::Predicate SwappedRelation = evaluateICmpRelation(V2, V1);
if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
return ICmpInst::getSwappedPredicate(SwappedRelation);
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
- } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) {
- if (isa<ConstantExpr>(V2)) { // Swap as necessary.
- ICmpInst::Predicate SwappedRelation =
- evaluateICmpRelation(V2, V1, isSigned);
- if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
- return ICmpInst::getSwappedPredicate(SwappedRelation);
- return ICmpInst::BAD_ICMP_PREDICATE;
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) {
+ // Now we know that the RHS is a BlockAddress or simple constant.
+ if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) {
+ // Block address in another function can't equal this one, but block
+ // addresses in the current function might be the same if blocks are
+ // empty.
+ if (BA2->getFunction() != BA->getFunction())
+ return ICmpInst::ICMP_NE;
+ } else if (isa<ConstantPointerNull>(V2)) {
+ return ICmpInst::ICMP_NE;
}
-
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) {
// Now we know that the RHS is a GlobalValue, BlockAddress or simple
- // constant (which, since the types must match, means that it's a
- // ConstantPointerNull).
+ // constant.
if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
return areGlobalsPotentiallyEqual(GV, GV2);
} else if (isa<BlockAddress>(V2)) {
return ICmpInst::ICMP_NE; // Globals never equal labels.
- } else {
- assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!");
+ } else if (isa<ConstantPointerNull>(V2)) {
// GlobalVals can never be null unless they have external weak linkage.
// We don't try to evaluate aliases here.
// NOTE: We should not be doing this constant folding if null pointer
@@ -1432,30 +1177,6 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
GV->getType()->getAddressSpace()))
return ICmpInst::ICMP_UGT;
}
- } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) {
- if (isa<ConstantExpr>(V2)) { // Swap as necessary.
- ICmpInst::Predicate SwappedRelation =
- evaluateICmpRelation(V2, V1, isSigned);
- if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
- return ICmpInst::getSwappedPredicate(SwappedRelation);
- return ICmpInst::BAD_ICMP_PREDICATE;
- }
-
- // Now we know that the RHS is a GlobalValue, BlockAddress or simple
- // constant (which, since the types must match, means that it is a
- // ConstantPointerNull).
- if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) {
- // Block address in another function can't equal this one, but block
- // addresses in the current function might be the same if blocks are
- // empty.
- if (BA2->getFunction() != BA->getFunction())
- return ICmpInst::ICMP_NE;
- } else {
- // Block addresses aren't null, don't equal the address of globals.
- assert((isa<ConstantPointerNull>(V2) || isa<GlobalValue>(V2)) &&
- "Canonicalization guarantee!");
- return ICmpInst::ICMP_NE;
- }
} else {
// Ok, the LHS is known to be a constantexpr. The RHS can be any of a
// constantexpr, a global, block address, or a simple constant.
@@ -1463,39 +1184,6 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
Constant *CE1Op0 = CE1->getOperand(0);
switch (CE1->getOpcode()) {
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- break; // We can't evaluate floating point casts or truncations.
-
- case Instruction::BitCast:
- // If this is a global value cast, check to see if the RHS is also a
- // GlobalValue.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0))
- if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2))
- return areGlobalsPotentiallyEqual(GV, GV2);
- [[fallthrough]];
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::ZExt:
- case Instruction::SExt:
- // We can't evaluate floating point casts or truncations.
- if (CE1Op0->getType()->isFPOrFPVectorTy())
- break;
-
- // If the cast is not actually changing bits, and the second operand is a
- // null pointer, do the comparison with the pre-casted value.
- if (V2->isNullValue() && CE1->getType()->isIntOrPtrTy()) {
- if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
- if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
- return evaluateICmpRelation(CE1Op0,
- Constant::getNullValue(CE1Op0->getType()),
- isSigned);
- }
- break;
-
case Instruction::GetElementPtr: {
GEPOperator *CE1GEP = cast<GEPOperator>(CE1);
// Ok, since this is a getelementptr, we know that the constant has a
@@ -1541,25 +1229,6 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
return ICmpInst::BAD_ICMP_PREDICATE;
}
-static Constant *constantFoldCompareGlobalToNull(CmpInst::Predicate Predicate,
- Constant *C1, Constant *C2) {
- const GlobalValue *GV = dyn_cast<GlobalValue>(C2);
- if (!GV || !C1->isNullValue())
- return nullptr;
-
- // Don't try to evaluate aliases. External weak GV can be null.
- if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage() &&
- !NullPointerIsDefined(nullptr /* F */,
- GV->getType()->getAddressSpace())) {
- if (Predicate == ICmpInst::ICMP_EQ)
- return ConstantInt::getFalse(C1->getContext());
- else if (Predicate == ICmpInst::ICMP_NE)
- return ConstantInt::getTrue(C1->getContext());
- }
-
- return nullptr;
-}
-
Constant *llvm::ConstantFoldCompareInstruction(CmpInst::Predicate Predicate,
Constant *C1, Constant *C2) {
Type *ResultTy;
@@ -1598,14 +1267,6 @@ Constant *llvm::ConstantFoldCompareInstruction(CmpInst::Predicate Predicate,
return ConstantInt::get(ResultTy, CmpInst::isUnordered(Predicate));
}
- // icmp eq/ne(null,GV) -> false/true
- if (Constant *Folded = constantFoldCompareGlobalToNull(Predicate, C1, C2))
- return Folded;
-
- // icmp eq/ne(GV,null) -> false/true
- if (Constant *Folded = constantFoldCompareGlobalToNull(Predicate, C2, C1))
- return Folded;
-
if (C2->isNullValue()) {
// The caller is expected to commute the operands if the constant expression
// is C2.
@@ -1671,83 +1332,18 @@ Constant *llvm::ConstantFoldCompareInstruction(CmpInst::Predicate Predicate,
return ConstantVector::get(ResElts);
}
- if (C1->getType()->isFloatingPointTy() &&
- // Only call evaluateFCmpRelation if we have a constant expr to avoid
- // infinite recursive loop
- (isa<ConstantExpr>(C1) || isa<ConstantExpr>(C2))) {
- int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
- switch (evaluateFCmpRelation(C1, C2)) {
- default: llvm_unreachable("Unknown relation!");
- case FCmpInst::FCMP_UNO:
- case FCmpInst::FCMP_ORD:
- case FCmpInst::FCMP_UNE:
- case FCmpInst::FCMP_ULT:
- case FCmpInst::FCMP_UGT:
- case FCmpInst::FCMP_ULE:
- case FCmpInst::FCMP_UGE:
- case FCmpInst::FCMP_TRUE:
- case FCmpInst::FCMP_FALSE:
- case FCmpInst::BAD_FCMP_PREDICATE:
- break; // Couldn't determine anything about these constants.
- case FCmpInst::FCMP_OEQ: // We know that C1 == C2
- Result =
- (Predicate == FCmpInst::FCMP_UEQ || Predicate == FCmpInst::FCMP_OEQ ||
- Predicate == FCmpInst::FCMP_ULE || Predicate == FCmpInst::FCMP_OLE ||
- Predicate == FCmpInst::FCMP_UGE || Predicate == FCmpInst::FCMP_OGE);
- break;
- case FCmpInst::FCMP_OLT: // We know that C1 < C2
- Result =
- (Predicate == FCmpInst::FCMP_UNE || Predicate == FCmpInst::FCMP_ONE ||
- Predicate == FCmpInst::FCMP_ULT || Predicate == FCmpInst::FCMP_OLT ||
- Predicate == FCmpInst::FCMP_ULE || Predicate == FCmpInst::FCMP_OLE);
- break;
- case FCmpInst::FCMP_OGT: // We know that C1 > C2
- Result =
- (Predicate == FCmpInst::FCMP_UNE || Predicate == FCmpInst::FCMP_ONE ||
- Predicate == FCmpInst::FCMP_UGT || Predicate == FCmpInst::FCMP_OGT ||
- Predicate == FCmpInst::FCMP_UGE || Predicate == FCmpInst::FCMP_OGE);
- break;
- case FCmpInst::FCMP_OLE: // We know that C1 <= C2
- // We can only partially decide this relation.
- if (Predicate == FCmpInst::FCMP_UGT || Predicate == FCmpInst::FCMP_OGT)
- Result = 0;
- else if (Predicate == FCmpInst::FCMP_ULT ||
- Predicate == FCmpInst::FCMP_OLT)
- Result = 1;
- break;
- case FCmpInst::FCMP_OGE: // We known that C1 >= C2
- // We can only partially decide this relation.
- if (Predicate == FCmpInst::FCMP_ULT || Predicate == FCmpInst::FCMP_OLT)
- Result = 0;
- else if (Predicate == FCmpInst::FCMP_UGT ||
- Predicate == FCmpInst::FCMP_OGT)
- Result = 1;
- break;
- case FCmpInst::FCMP_ONE: // We know that C1 != C2
- // We can only partially decide this relation.
- if (Predicate == FCmpInst::FCMP_OEQ || Predicate == FCmpInst::FCMP_UEQ)
- Result = 0;
- else if (Predicate == FCmpInst::FCMP_ONE ||
- Predicate == FCmpInst::FCMP_UNE)
- Result = 1;
- break;
- case FCmpInst::FCMP_UEQ: // We know that C1 == C2 || isUnordered(C1, C2).
- // We can only partially decide this relation.
+ if (C1->getType()->isFPOrFPVectorTy()) {
+ if (C1 == C2) {
+ // We know that C1 == C2 || isUnordered(C1, C2).
if (Predicate == FCmpInst::FCMP_ONE)
- Result = 0;
+ return ConstantInt::getFalse(ResultTy);
else if (Predicate == FCmpInst::FCMP_UEQ)
- Result = 1;
- break;
+ return ConstantInt::getTrue(ResultTy);
}
-
- // If we evaluated the result, return it now.
- if (Result != -1)
- return ConstantInt::get(ResultTy, Result);
-
} else {
// Evaluate the relation between the two constants, per the predicate.
int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
- switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(Predicate))) {
+ switch (evaluateICmpRelation(C1, C2)) {
default: llvm_unreachable("Unknown relational!");
case ICmpInst::BAD_ICMP_PREDICATE:
break; // Couldn't determine anything about these constants.
@@ -1832,38 +1428,6 @@ Constant *llvm::ConstantFoldCompareInstruction(CmpInst::Predicate Predicate,
if (Result != -1)
return ConstantInt::get(ResultTy, Result);
- // If the right hand side is a bitcast, try using its inverse to simplify
- // it by moving it to the left hand side. We can't do this if it would turn
- // a vector compare into a scalar compare or visa versa, or if it would turn
- // the operands into FP values.
- if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
- Constant *CE2Op0 = CE2->getOperand(0);
- if (CE2->getOpcode() == Instruction::BitCast &&
- CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy() &&
- !CE2Op0->getType()->isFPOrFPVectorTy()) {
- Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
- return ConstantExpr::getICmp(Predicate, Inverse, CE2Op0);
- }
- }
-
- // If the left hand side is an extension, try eliminating it.
- if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
- if ((CE1->getOpcode() == Instruction::SExt &&
- ICmpInst::isSigned(Predicate)) ||
- (CE1->getOpcode() == Instruction::ZExt &&
- !ICmpInst::isSigned(Predicate))) {
- Constant *CE1Op0 = CE1->getOperand(0);
- Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
- if (CE1Inverse == CE1Op0) {
- // Check whether we can safely truncate the right hand side.
- Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType());
- if (ConstantExpr::getCast(CE1->getOpcode(), C2Inverse,
- C2->getType()) == C2)
- return ConstantExpr::getICmp(Predicate, CE1Inverse, C2Inverse);
- }
- }
- }
-
if ((!isa<ConstantExpr>(C1) && isa<ConstantExpr>(C2)) ||
(C1->isNullValue() && !C2->isNullValue())) {
// If C2 is a constant expr and C1 isn't, flip them around and fold the
@@ -1974,8 +1538,13 @@ static Constant *foldGEPOfGEP(GEPOperator *GEP, Type *PointeeTy, bool InBounds,
Type *CommonTy =
Type::getIntNTy(LastIdxTy->getContext(), CommonExtendedWidth);
- Idx0 = ConstantExpr::getSExtOrBitCast(Idx0, CommonTy);
- LastIdx = ConstantExpr::getSExtOrBitCast(LastIdx, CommonTy);
+ if (Idx0->getType() != CommonTy)
+ Idx0 = ConstantFoldCastInstruction(Instruction::SExt, Idx0, CommonTy);
+ if (LastIdx->getType() != CommonTy)
+ LastIdx =
+ ConstantFoldCastInstruction(Instruction::SExt, LastIdx, CommonTy);
+ if (!Idx0 || !LastIdx)
+ return nullptr;
}
NewIndices.push_back(ConstantExpr::get(Instruction::Add, Idx0, LastIdx));
@@ -2025,39 +1594,6 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
cast<VectorType>(GEPTy)->getElementCount(), C)
: C;
- if (C->isNullValue()) {
- bool isNull = true;
- for (Value *Idx : Idxs)
- if (!isa<UndefValue>(Idx) && !cast<Constant>(Idx)->isNullValue()) {
- isNull = false;
- break;
- }
- if (isNull) {
- PointerType *PtrTy = cast<PointerType>(C->getType()->getScalarType());
- Type *Ty = GetElementPtrInst::getIndexedType(PointeeTy, Idxs);
-
- assert(Ty && "Invalid indices for GEP!");
- Type *OrigGEPTy = PointerType::get(Ty, PtrTy->getAddressSpace());
- Type *GEPTy = PointerType::get(Ty, PtrTy->getAddressSpace());
- if (VectorType *VT = dyn_cast<VectorType>(C->getType()))
- GEPTy = VectorType::get(OrigGEPTy, VT->getElementCount());
-
- // The GEP returns a vector of pointers when one of more of
- // its arguments is a vector.
- for (Value *Idx : Idxs) {
- if (auto *VT = dyn_cast<VectorType>(Idx->getType())) {
- assert((!isa<VectorType>(GEPTy) || isa<ScalableVectorType>(GEPTy) ==
- isa<ScalableVectorType>(VT)) &&
- "Mismatched GEPTy vector types");
- GEPTy = VectorType::get(OrigGEPTy, VT->getElementCount());
- break;
- }
- }
-
- return Constant::getNullValue(GEPTy);
- }
- }
-
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (auto *GEP = dyn_cast<GEPOperator>(CE))
if (Constant *C = foldGEPOfGEP(GEP, PointeeTy, InBounds, Idxs))
@@ -2193,11 +1729,13 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
: cast<FixedVectorType>(CurrIdx->getType())->getNumElements());
if (!PrevIdx->getType()->isIntOrIntVectorTy(CommonExtendedWidth))
- PrevIdx = ConstantExpr::getSExt(PrevIdx, ExtendedTy);
+ PrevIdx =
+ ConstantFoldCastInstruction(Instruction::SExt, PrevIdx, ExtendedTy);
if (!Div->getType()->isIntOrIntVectorTy(CommonExtendedWidth))
- Div = ConstantExpr::getSExt(Div, ExtendedTy);
+ Div = ConstantFoldCastInstruction(Instruction::SExt, Div, ExtendedTy);
+ assert(PrevIdx && Div && "Should have folded");
NewIdxs[i - 1] = ConstantExpr::getAdd(PrevIdx, Div);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp
index e9344a8815c0..cbb64b299e64 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp
@@ -326,6 +326,10 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
if (Unsigned)
return makeExactMulNUWRegion(Other.getUnsignedMax());
+ // Avoid one makeExactMulNSWRegion() call for the common case of constants.
+ if (const APInt *C = Other.getSingleElement())
+ return makeExactMulNSWRegion(*C);
+
return makeExactMulNSWRegion(Other.getSignedMin())
.intersectWith(makeExactMulNSWRegion(Other.getSignedMax()));
@@ -945,6 +949,8 @@ bool ConstantRange::isIntrinsicSupported(Intrinsic::ID IntrinsicID) {
case Intrinsic::smax:
case Intrinsic::abs:
case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::ctpop:
return true;
default:
return false;
@@ -982,6 +988,14 @@ ConstantRange ConstantRange::intrinsic(Intrinsic::ID IntrinsicID,
assert(ZeroIsPoison->getBitWidth() == 1 && "Must be boolean");
return Ops[0].ctlz(ZeroIsPoison->getBoolValue());
}
+ case Intrinsic::cttz: {
+ const APInt *ZeroIsPoison = Ops[1].getSingleElement();
+ assert(ZeroIsPoison && "Must be known (immarg)");
+ assert(ZeroIsPoison->getBitWidth() == 1 && "Must be boolean");
+ return Ops[0].cttz(ZeroIsPoison->getBoolValue());
+ }
+ case Intrinsic::ctpop:
+ return Ops[0].ctpop();
default:
assert(!isIntrinsicSupported(IntrinsicID) && "Shouldn't be supported");
llvm_unreachable("Unsupported intrinsic");
@@ -1477,6 +1491,13 @@ ConstantRange::shl(const ConstantRange &Other) const {
}
APInt OtherMax = Other.getUnsignedMax();
+ if (isAllNegative() && OtherMax.ule(Min.countl_one())) {
+ // For negative numbers, if the shift does not overflow in a signed sense,
+ // a larger shift will make the number smaller.
+ Max <<= Other.getUnsignedMin();
+ Min <<= OtherMax;
+ return ConstantRange::getNonEmpty(std::move(Min), std::move(Max) + 1);
+ }
// There's overflow!
if (OtherMax.ugt(Max.countl_zero()))
@@ -1725,6 +1746,120 @@ ConstantRange ConstantRange::ctlz(bool ZeroIsPoison) const {
APInt(getBitWidth(), getUnsignedMin().countl_zero() + 1));
}
+static ConstantRange getUnsignedCountTrailingZerosRange(const APInt &Lower,
+ const APInt &Upper) {
+ assert(!ConstantRange(Lower, Upper).isWrappedSet() &&
+ "Unexpected wrapped set.");
+ assert(Lower != Upper && "Unexpected empty set.");
+ unsigned BitWidth = Lower.getBitWidth();
+ if (Lower + 1 == Upper)
+ return ConstantRange(APInt(BitWidth, Lower.countr_zero()));
+ if (Lower.isZero())
+ return ConstantRange(APInt::getZero(BitWidth),
+ APInt(BitWidth, BitWidth + 1));
+
+ // Calculate longest common prefix.
+ unsigned LCPLength = (Lower ^ (Upper - 1)).countl_zero();
+ // If Lower is {LCP, 000...}, the maximum is Lower.countr_zero().
+ // Otherwise, the maximum is BitWidth - LCPLength - 1 ({LCP, 100...}).
+ return ConstantRange(
+ APInt::getZero(BitWidth),
+ APInt(BitWidth,
+ std::max(BitWidth - LCPLength - 1, Lower.countr_zero()) + 1));
+}
+
+ConstantRange ConstantRange::cttz(bool ZeroIsPoison) const {
+ if (isEmptySet())
+ return getEmpty();
+
+ unsigned BitWidth = getBitWidth();
+ APInt Zero = APInt::getZero(BitWidth);
+ if (ZeroIsPoison && contains(Zero)) {
+ // ZeroIsPoison is set, and zero is contained. We discern three cases, in
+ // which a zero can appear:
+ // 1) Lower is zero, handling cases of kind [0, 1), [0, 2), etc.
+ // 2) Upper is zero, wrapped set, handling cases of kind [3, 0], etc.
+ // 3) Zero contained in a wrapped set, e.g., [3, 2), [3, 1), etc.
+
+ if (Lower.isZero()) {
+ if (Upper == 1) {
+ // We have in input interval of kind [0, 1). In this case we cannot
+ // really help but return empty-set.
+ return getEmpty();
+ }
+
+ // Compute the resulting range by excluding zero from Lower.
+ return getUnsignedCountTrailingZerosRange(APInt(BitWidth, 1), Upper);
+ } else if (Upper == 1) {
+ // Compute the resulting range by excluding zero from Upper.
+ return getUnsignedCountTrailingZerosRange(Lower, Zero);
+ } else {
+ ConstantRange CR1 = getUnsignedCountTrailingZerosRange(Lower, Zero);
+ ConstantRange CR2 =
+ getUnsignedCountTrailingZerosRange(APInt(BitWidth, 1), Upper);
+ return CR1.unionWith(CR2);
+ }
+ }
+
+ if (isFullSet())
+ return getNonEmpty(Zero, APInt(BitWidth, BitWidth + 1));
+ if (!isWrappedSet())
+ return getUnsignedCountTrailingZerosRange(Lower, Upper);
+ // The range is wrapped. We decompose it into two ranges, [0, Upper) and
+ // [Lower, 0).
+ // Handle [Lower, 0)
+ ConstantRange CR1 = getUnsignedCountTrailingZerosRange(Lower, Zero);
+ // Handle [0, Upper)
+ ConstantRange CR2 = getUnsignedCountTrailingZerosRange(Zero, Upper);
+ return CR1.unionWith(CR2);
+}
+
+static ConstantRange getUnsignedPopCountRange(const APInt &Lower,
+ const APInt &Upper) {
+ assert(!ConstantRange(Lower, Upper).isWrappedSet() &&
+ "Unexpected wrapped set.");
+ assert(Lower != Upper && "Unexpected empty set.");
+ unsigned BitWidth = Lower.getBitWidth();
+ if (Lower + 1 == Upper)
+ return ConstantRange(APInt(BitWidth, Lower.popcount()));
+
+ APInt Max = Upper - 1;
+ // Calculate longest common prefix.
+ unsigned LCPLength = (Lower ^ Max).countl_zero();
+ unsigned LCPPopCount = Lower.getHiBits(LCPLength).popcount();
+ // If Lower is {LCP, 000...}, the minimum is the popcount of LCP.
+ // Otherwise, the minimum is the popcount of LCP + 1.
+ unsigned MinBits =
+ LCPPopCount + (Lower.countr_zero() < BitWidth - LCPLength ? 1 : 0);
+ // If Max is {LCP, 111...}, the maximum is the popcount of LCP + (BitWidth -
+ // length of LCP).
+ // Otherwise, the minimum is the popcount of LCP + (BitWidth -
+ // length of LCP - 1).
+ unsigned MaxBits = LCPPopCount + (BitWidth - LCPLength) -
+ (Max.countr_one() < BitWidth - LCPLength ? 1 : 0);
+ return ConstantRange(APInt(BitWidth, MinBits), APInt(BitWidth, MaxBits + 1));
+}
+
+ConstantRange ConstantRange::ctpop() const {
+ if (isEmptySet())
+ return getEmpty();
+
+ unsigned BitWidth = getBitWidth();
+ APInt Zero = APInt::getZero(BitWidth);
+ if (isFullSet())
+ return getNonEmpty(Zero, APInt(BitWidth, BitWidth + 1));
+ if (!isWrappedSet())
+ return getUnsignedPopCountRange(Lower, Upper);
+ // The range is wrapped. We decompose it into two ranges, [0, Upper) and
+ // [Lower, 0).
+ // Handle [Lower, 0) == [Lower, Max]
+ ConstantRange CR1 = ConstantRange(APInt(BitWidth, Lower.countl_one()),
+ APInt(BitWidth, BitWidth + 1));
+ // Handle [0, Upper)
+ ConstantRange CR2 = getUnsignedPopCountRange(Zero, Upper);
+ return CR1.unionWith(CR2);
+}
+
ConstantRange::OverflowResult ConstantRange::unsignedAddMayOverflow(
const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
diff --git a/contrib/llvm-project/llvm/lib/IR/Constants.cpp b/contrib/llvm-project/llvm/lib/IR/Constants.cpp
index c69c7c095f78..a38b912164b1 100644
--- a/contrib/llvm-project/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Constants.cpp
@@ -1770,7 +1770,7 @@ BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
}
BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
- : Constant(Type::getInt8PtrTy(F->getContext(), F->getAddressSpace()),
+ : Constant(PointerType::get(F->getContext(), F->getAddressSpace()),
Value::BlockAddressVal, &Op<0>(), 2) {
setOperand(0, F);
setOperand(1, BB);
@@ -1958,6 +1958,8 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty,
bool OnlyIfReduced) {
Instruction::CastOps opc = Instruction::CastOps(oc);
assert(Instruction::isCast(opc) && "opcode out of range");
+ assert(isSupportedCastOp(opc) &&
+ "Cast opcode not supported as constant expression");
assert(C && Ty && "Null arguments to getCast");
assert(CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!");
@@ -1966,22 +1968,6 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty,
llvm_unreachable("Invalid cast opcode");
case Instruction::Trunc:
return getTrunc(C, Ty, OnlyIfReduced);
- case Instruction::ZExt:
- return getZExt(C, Ty, OnlyIfReduced);
- case Instruction::SExt:
- return getSExt(C, Ty, OnlyIfReduced);
- case Instruction::FPTrunc:
- return getFPTrunc(C, Ty, OnlyIfReduced);
- case Instruction::FPExt:
- return getFPExtend(C, Ty, OnlyIfReduced);
- case Instruction::UIToFP:
- return getUIToFP(C, Ty, OnlyIfReduced);
- case Instruction::SIToFP:
- return getSIToFP(C, Ty, OnlyIfReduced);
- case Instruction::FPToUI:
- return getFPToUI(C, Ty, OnlyIfReduced);
- case Instruction::FPToSI:
- return getFPToSI(C, Ty, OnlyIfReduced);
case Instruction::PtrToInt:
return getPtrToInt(C, Ty, OnlyIfReduced);
case Instruction::IntToPtr:
@@ -1993,35 +1979,12 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty,
}
}
-Constant *ConstantExpr::getZExtOrBitCast(Constant *C, Type *Ty) {
- if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
- return getBitCast(C, Ty);
- return getZExt(C, Ty);
-}
-
-Constant *ConstantExpr::getSExtOrBitCast(Constant *C, Type *Ty) {
- if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
- return getBitCast(C, Ty);
- return getSExt(C, Ty);
-}
-
Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) {
if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return getBitCast(C, Ty);
return getTrunc(C, Ty);
}
-Constant *ConstantExpr::getSExtOrTrunc(Constant *C, Type *Ty) {
- assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
- "Can only sign extend/truncate integers!");
- Type *CTy = C->getType();
- if (CTy->getScalarSizeInBits() < Ty->getScalarSizeInBits())
- return getSExt(C, Ty);
- if (CTy->getScalarSizeInBits() > Ty->getScalarSizeInBits())
- return getTrunc(C, Ty);
- return C;
-}
-
Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) {
assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) &&
@@ -2048,30 +2011,6 @@ Constant *ConstantExpr::getPointerBitCastOrAddrSpaceCast(Constant *S,
return getBitCast(S, Ty);
}
-Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty, bool isSigned) {
- assert(C->getType()->isIntOrIntVectorTy() &&
- Ty->isIntOrIntVectorTy() && "Invalid cast");
- unsigned SrcBits = C->getType()->getScalarSizeInBits();
- unsigned DstBits = Ty->getScalarSizeInBits();
- Instruction::CastOps opcode =
- (SrcBits == DstBits ? Instruction::BitCast :
- (SrcBits > DstBits ? Instruction::Trunc :
- (isSigned ? Instruction::SExt : Instruction::ZExt)));
- return getCast(opcode, C, Ty);
-}
-
-Constant *ConstantExpr::getFPCast(Constant *C, Type *Ty) {
- assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
- "Invalid cast");
- unsigned SrcBits = C->getType()->getScalarSizeInBits();
- unsigned DstBits = Ty->getScalarSizeInBits();
- if (SrcBits == DstBits)
- return C; // Avoid a useless cast
- Instruction::CastOps opcode =
- (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
- return getCast(opcode, C, Ty);
-}
-
Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = isa<VectorType>(C->getType());
@@ -2086,102 +2025,6 @@ Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced) {
return getFoldedCast(Instruction::Trunc, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getSExt(Constant *C, Type *Ty, bool OnlyIfReduced) {
-#ifndef NDEBUG
- bool fromVec = isa<VectorType>(C->getType());
- bool toVec = isa<VectorType>(Ty);
-#endif
- assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
- assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
- assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
- "SrcTy must be smaller than DestTy for SExt!");
-
- return getFoldedCast(Instruction::SExt, C, Ty, OnlyIfReduced);
-}
-
-Constant *ConstantExpr::getZExt(Constant *C, Type *Ty, bool OnlyIfReduced) {
-#ifndef NDEBUG
- bool fromVec = isa<VectorType>(C->getType());
- bool toVec = isa<VectorType>(Ty);
-#endif
- assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
- assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
- assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
- "SrcTy must be smaller than DestTy for ZExt!");
-
- return getFoldedCast(Instruction::ZExt, C, Ty, OnlyIfReduced);
-}
-
-Constant *ConstantExpr::getFPTrunc(Constant *C, Type *Ty, bool OnlyIfReduced) {
-#ifndef NDEBUG
- bool fromVec = isa<VectorType>(C->getType());
- bool toVec = isa<VectorType>(Ty);
-#endif
- assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
- C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
- "This is an illegal floating point truncation!");
- return getFoldedCast(Instruction::FPTrunc, C, Ty, OnlyIfReduced);
-}
-
-Constant *ConstantExpr::getFPExtend(Constant *C, Type *Ty, bool OnlyIfReduced) {
-#ifndef NDEBUG
- bool fromVec = isa<VectorType>(C->getType());
- bool toVec = isa<VectorType>(Ty);
-#endif
- assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
- C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
- "This is an illegal floating point extension!");
- return getFoldedCast(Instruction::FPExt, C, Ty, OnlyIfReduced);
-}
-
-Constant *ConstantExpr::getUIToFP(Constant *C, Type *Ty, bool OnlyIfReduced) {
-#ifndef NDEBUG
- bool fromVec = isa<VectorType>(C->getType());
- bool toVec = isa<VectorType>(Ty);
-#endif
- assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
- "This is an illegal uint to floating point cast!");
- return getFoldedCast(Instruction::UIToFP, C, Ty, OnlyIfReduced);
-}
-
-Constant *ConstantExpr::getSIToFP(Constant *C, Type *Ty, bool OnlyIfReduced) {
-#ifndef NDEBUG
- bool fromVec = isa<VectorType>(C->getType());
- bool toVec = isa<VectorType>(Ty);
-#endif
- assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
- "This is an illegal sint to floating point cast!");
- return getFoldedCast(Instruction::SIToFP, C, Ty, OnlyIfReduced);
-}
-
-Constant *ConstantExpr::getFPToUI(Constant *C, Type *Ty, bool OnlyIfReduced) {
-#ifndef NDEBUG
- bool fromVec = isa<VectorType>(C->getType());
- bool toVec = isa<VectorType>(Ty);
-#endif
- assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
- "This is an illegal floating point to uint cast!");
- return getFoldedCast(Instruction::FPToUI, C, Ty, OnlyIfReduced);
-}
-
-Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty, bool OnlyIfReduced) {
-#ifndef NDEBUG
- bool fromVec = isa<VectorType>(C->getType());
- bool toVec = isa<VectorType>(Ty);
-#endif
- assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
- "This is an illegal floating point to sint cast!");
- return getFoldedCast(Instruction::FPToSI, C, Ty, OnlyIfReduced);
-}
-
Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
assert(C->getType()->isPtrOrPtrVectorTy() &&
@@ -2288,15 +2131,15 @@ bool ConstantExpr::isDesirableBinOp(unsigned Opcode) {
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::LShr:
+ case Instruction::AShr:
return false;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
case Instruction::Xor:
return true;
default:
@@ -2315,15 +2158,15 @@ bool ConstantExpr::isSupportedBinOp(unsigned Opcode) {
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::LShr:
+ case Instruction::AShr:
return false;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
case Instruction::Xor:
return true;
default:
@@ -2331,6 +2174,50 @@ bool ConstantExpr::isSupportedBinOp(unsigned Opcode) {
}
}
+bool ConstantExpr::isDesirableCastOp(unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return false;
+ case Instruction::Trunc:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ return true;
+ default:
+ llvm_unreachable("Argument must be cast opcode");
+ }
+}
+
+bool ConstantExpr::isSupportedCastOp(unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return false;
+ case Instruction::Trunc:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ return true;
+ default:
+ llvm_unreachable("Argument must be cast opcode");
+ }
+}
+
Constant *ConstantExpr::getSizeOf(Type* Ty) {
// sizeof is implemented as: (i64) gep (Ty*)null, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
@@ -2345,7 +2232,7 @@ Constant *ConstantExpr::getAlignOf(Type* Ty) {
// alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
Type *AligningTy = StructType::get(Type::getInt1Ty(Ty->getContext()), Ty);
- Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo(0));
+ Constant *NullPtr = Constant::getNullValue(PointerType::getUnqual(AligningTy->getContext()));
Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
Constant *Indices[2] = { Zero, One };
@@ -2584,14 +2471,6 @@ Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
return get(Instruction::Mul, C1, C2, Flags);
}
-Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
- return get(Instruction::And, C1, C2);
-}
-
-Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
- return get(Instruction::Or, C1, C2);
-}
-
Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
return get(Instruction::Xor, C1, C2);
}
@@ -2603,16 +2482,6 @@ Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
return get(Instruction::Shl, C1, C2, Flags);
}
-Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2, bool isExact) {
- return get(Instruction::LShr, C1, C2,
- isExact ? PossiblyExactOperator::IsExact : 0);
-}
-
-Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
- return get(Instruction::AShr, C1, C2,
- isExact ? PossiblyExactOperator::IsExact : 0);
-}
-
Constant *ConstantExpr::getExactLogBase2(Constant *C) {
Type *Ty = C->getType();
const APInt *IVal;
@@ -2687,6 +2556,32 @@ Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty,
}
}
+Constant *ConstantExpr::getIntrinsicIdentity(Intrinsic::ID ID, Type *Ty) {
+ switch (ID) {
+ case Intrinsic::umax:
+ return Constant::getNullValue(Ty);
+ case Intrinsic::umin:
+ return Constant::getAllOnesValue(Ty);
+ case Intrinsic::smax:
+ return Constant::getIntegerValue(
+ Ty, APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
+ case Intrinsic::smin:
+ return Constant::getIntegerValue(
+ Ty, APInt::getSignedMaxValue(Ty->getIntegerBitWidth()));
+ default:
+ return nullptr;
+ }
+}
+
+Constant *ConstantExpr::getIdentity(Instruction *I, Type *Ty,
+ bool AllowRHSConstant, bool NSZ) {
+ if (I->isBinaryOp())
+ return getBinOpIdentity(I->getOpcode(), Ty, AllowRHSConstant, NSZ);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return getIntrinsicIdentity(II->getIntrinsicID(), Ty);
+ return nullptr;
+}
+
Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) {
switch (Opcode) {
default:
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantsContext.h b/contrib/llvm-project/llvm/lib/IR/ConstantsContext.h
index 6023216a5070..44a926b5dc58 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantsContext.h
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantsContext.h
@@ -182,7 +182,7 @@ public:
};
/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
-/// used behind the scenes to implement getelementpr constant exprs.
+/// used behind the scenes to implement getelementptr constant exprs.
class GetElementPtrConstantExpr final : public ConstantExpr {
Type *SrcElementTy;
Type *ResElementTy;
diff --git a/contrib/llvm-project/llvm/lib/IR/ConvergenceVerifier.cpp b/contrib/llvm-project/llvm/lib/IR/ConvergenceVerifier.cpp
new file mode 100644
index 000000000000..336c202b6f94
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/IR/ConvergenceVerifier.cpp
@@ -0,0 +1,69 @@
+//===- ConvergenceVerifier.cpp - Verify convergence control -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/ConvergenceVerifier.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GenericConvergenceVerifierImpl.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/SSAContext.h"
+
+using namespace llvm;
+
+template <>
+const Instruction *
+GenericConvergenceVerifier<SSAContext>::findAndCheckConvergenceTokenUsed(
+ const Instruction &I) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ return nullptr;
+
+ unsigned Count =
+ CB->countOperandBundlesOfType(LLVMContext::OB_convergencectrl);
+ CheckOrNull(Count <= 1,
+ "The 'convergencectrl' bundle can occur at most once on a call",
+ {Context.print(CB)});
+ if (!Count)
+ return nullptr;
+
+ auto Bundle = CB->getOperandBundle(LLVMContext::OB_convergencectrl);
+ CheckOrNull(Bundle->Inputs.size() == 1 &&
+ Bundle->Inputs[0]->getType()->isTokenTy(),
+ "The 'convergencectrl' bundle requires exactly one token use.",
+ {Context.print(CB)});
+ auto *Token = Bundle->Inputs[0].get();
+ auto *Def = dyn_cast<Instruction>(Token);
+
+ CheckOrNull(
+ Def && isConvergenceControlIntrinsic(SSAContext::getIntrinsicID(*Def)),
+ "Convergence control tokens can only be produced by calls to the "
+ "convergence control intrinsics.",
+ {Context.print(Token), Context.print(&I)});
+
+ if (Def)
+ Tokens[&I] = Def;
+
+ return Def;
+}
+
+template <>
+bool GenericConvergenceVerifier<SSAContext>::isInsideConvergentFunction(
+ const InstructionT &I) {
+ auto *F = I.getFunction();
+ return F->isConvergent();
+}
+
+template <>
+bool GenericConvergenceVerifier<SSAContext>::isConvergent(
+ const InstructionT &I) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ return CB->isConvergent();
+ }
+ return false;
+}
+
+template class llvm::GenericConvergenceVerifier<SSAContext>;
diff --git a/contrib/llvm-project/llvm/lib/IR/Core.cpp b/contrib/llvm-project/llvm/lib/IR/Core.cpp
index f7b6d54013de..fb30fbce0ba2 100644
--- a/contrib/llvm-project/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Core.cpp
@@ -43,6 +43,8 @@
using namespace llvm;
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OperandBundleDef, LLVMOperandBundleRef)
+
#define DEBUG_TYPE "ir"
void llvm::initializeCore(PassRegistry &Registry) {
@@ -460,8 +462,8 @@ const char *LLVMGetModuleInlineAsm(LLVMModuleRef M, size_t *Len) {
return Str.c_str();
}
-LLVMValueRef LLVMGetInlineAsm(LLVMTypeRef Ty, char *AsmString,
- size_t AsmStringSize, char *Constraints,
+LLVMValueRef LLVMGetInlineAsm(LLVMTypeRef Ty, const char *AsmString,
+ size_t AsmStringSize, const char *Constraints,
size_t ConstraintsSize, LLVMBool HasSideEffects,
LLVMBool IsAlignStack,
LLVMInlineAsmDialect Dialect, LLVMBool CanThrow) {
@@ -480,6 +482,61 @@ LLVMValueRef LLVMGetInlineAsm(LLVMTypeRef Ty, char *AsmString,
HasSideEffects, IsAlignStack, AD, CanThrow));
}
+const char *LLVMGetInlineAsmAsmString(LLVMValueRef InlineAsmVal, size_t *Len) {
+
+ Value *Val = unwrap<Value>(InlineAsmVal);
+ const std::string &AsmString = cast<InlineAsm>(Val)->getAsmString();
+
+ *Len = AsmString.length();
+ return AsmString.c_str();
+}
+
+const char *LLVMGetInlineAsmConstraintString(LLVMValueRef InlineAsmVal,
+ size_t *Len) {
+ Value *Val = unwrap<Value>(InlineAsmVal);
+ const std::string &ConstraintString =
+ cast<InlineAsm>(Val)->getConstraintString();
+
+ *Len = ConstraintString.length();
+ return ConstraintString.c_str();
+}
+
+LLVMInlineAsmDialect LLVMGetInlineAsmDialect(LLVMValueRef InlineAsmVal) {
+
+ Value *Val = unwrap<Value>(InlineAsmVal);
+ InlineAsm::AsmDialect Dialect = cast<InlineAsm>(Val)->getDialect();
+
+ switch (Dialect) {
+ case InlineAsm::AD_ATT:
+ return LLVMInlineAsmDialectATT;
+ case InlineAsm::AD_Intel:
+ return LLVMInlineAsmDialectIntel;
+ }
+
+ llvm_unreachable("Unrecognized inline assembly dialect");
+ return LLVMInlineAsmDialectATT;
+}
+
+LLVMTypeRef LLVMGetInlineAsmFunctionType(LLVMValueRef InlineAsmVal) {
+ Value *Val = unwrap<Value>(InlineAsmVal);
+ return (LLVMTypeRef)cast<InlineAsm>(Val)->getFunctionType();
+}
+
+LLVMBool LLVMGetInlineAsmHasSideEffects(LLVMValueRef InlineAsmVal) {
+ Value *Val = unwrap<Value>(InlineAsmVal);
+ return cast<InlineAsm>(Val)->hasSideEffects();
+}
+
+LLVMBool LLVMGetInlineAsmNeedsAlignedStack(LLVMValueRef InlineAsmVal) {
+ Value *Val = unwrap<Value>(InlineAsmVal);
+ return cast<InlineAsm>(Val)->isAlignStack();
+}
+
+LLVMBool LLVMGetInlineAsmCanUnwind(LLVMValueRef InlineAsmVal) {
+ Value *Val = unwrap<Value>(InlineAsmVal);
+ return cast<InlineAsm>(Val)->canThrow();
+}
+
/*--.. Operations on module contexts ......................................--*/
LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
return wrap(&unwrap(M)->getContext());
@@ -1633,16 +1690,6 @@ LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
unwrap<Constant>(RHSConstant)));
}
-LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
@@ -1667,16 +1714,6 @@ LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
unwrap<Constant>(RHSConstant)));
}
-LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
LLVMValueRef LLVMConstGEP2(LLVMTypeRef Ty, LLVMValueRef ConstantVal,
LLVMValueRef *ConstantIndices, unsigned NumIndices) {
ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
@@ -1699,46 +1736,6 @@ LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
unwrap(ToType)));
}
-LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getSIToFP(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getFPToUI(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
@@ -1760,18 +1757,6 @@ LLVMValueRef LLVMConstAddrSpaceCast(LLVMValueRef ConstantVal,
unwrap(ToType)));
}
-LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
- LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
- LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getSExtOrBitCast(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
LLVMTypeRef ToType) {
return wrap(ConstantExpr::getTruncOrBitCast(unwrap<Constant>(ConstantVal),
@@ -1784,17 +1769,6 @@ LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
unwrap(ToType)));
}
-LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
- LLVMBool isSigned) {
- return wrap(ConstantExpr::getIntegerCast(unwrap<Constant>(ConstantVal),
- unwrap(ToType), isSigned));
-}
-
-LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getFPCast(unwrap<Constant>(ConstantVal),
- unwrap(ToType)));
-}
-
LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
LLVMValueRef IndexConstant) {
return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
@@ -2475,7 +2449,7 @@ void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
unsigned LLVMCountParams(LLVMValueRef FnRef) {
// This function is strictly redundant to
- // LLVMCountParamTypes(LLVMGetElementType(LLVMTypeOf(FnRef)))
+ // LLVMCountParamTypes(LLVMGlobalGetValueType(FnRef))
return unwrap<Function>(FnRef)->arg_size();
}
@@ -2595,6 +2569,34 @@ void LLVMRemoveGlobalIFunc(LLVMValueRef IFunc) {
unwrap<GlobalIFunc>(IFunc)->removeFromParent();
}
+/*--.. Operations on operand bundles........................................--*/
+
+LLVMOperandBundleRef LLVMCreateOperandBundle(const char *Tag, size_t TagLen,
+ LLVMValueRef *Args,
+ unsigned NumArgs) {
+ return wrap(new OperandBundleDef(std::string(Tag, TagLen),
+ ArrayRef(unwrap(Args), NumArgs)));
+}
+
+void LLVMDisposeOperandBundle(LLVMOperandBundleRef Bundle) {
+ delete unwrap(Bundle);
+}
+
+const char *LLVMGetOperandBundleTag(LLVMOperandBundleRef Bundle, size_t *Len) {
+ StringRef Str = unwrap(Bundle)->getTag();
+ *Len = Str.size();
+ return Str.data();
+}
+
+unsigned LLVMGetNumOperandBundleArgs(LLVMOperandBundleRef Bundle) {
+ return unwrap(Bundle)->inputs().size();
+}
+
+LLVMValueRef LLVMGetOperandBundleArgAtIndex(LLVMOperandBundleRef Bundle,
+ unsigned Index) {
+ return wrap(unwrap(Bundle)->inputs()[Index]);
+}
+
/*--.. Operations on basic blocks ..........................................--*/
LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
@@ -2886,6 +2888,16 @@ LLVMTypeRef LLVMGetCalledFunctionType(LLVMValueRef Instr) {
return wrap(unwrap<CallBase>(Instr)->getFunctionType());
}
+unsigned LLVMGetNumOperandBundles(LLVMValueRef C) {
+ return unwrap<CallBase>(C)->getNumOperandBundles();
+}
+
+LLVMOperandBundleRef LLVMGetOperandBundleAtIndex(LLVMValueRef C,
+ unsigned Index) {
+ return wrap(
+ new OperandBundleDef(unwrap<CallBase>(C)->getOperandBundleAt(Index)));
+}
+
/*--.. Operations on call instructions (only) ..............................--*/
LLVMBool LLVMIsTailCall(LLVMValueRef Call) {
@@ -2896,6 +2908,14 @@ void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
unwrap<CallInst>(Call)->setTailCall(isTailCall);
}
+LLVMTailCallKind LLVMGetTailCallKind(LLVMValueRef Call) {
+ return (LLVMTailCallKind)unwrap<CallInst>(Call)->getTailCallKind();
+}
+
+void LLVMSetTailCallKind(LLVMValueRef Call, LLVMTailCallKind kind) {
+ unwrap<CallInst>(Call)->setTailCallKind((CallInst::TailCallKind)kind);
+}
+
/*--.. Operations on invoke instructions (only) ............................--*/
LLVMBasicBlockRef LLVMGetNormalDest(LLVMValueRef Invoke) {
@@ -3160,6 +3180,20 @@ LLVMValueRef LLVMBuildInvoke2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
ArrayRef(unwrap(Args), NumArgs), Name));
}
+LLVMValueRef LLVMBuildInvokeWithOperandBundles(
+ LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn, LLVMValueRef *Args,
+ unsigned NumArgs, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+ LLVMOperandBundleRef *Bundles, unsigned NumBundles, const char *Name) {
+ SmallVector<OperandBundleDef, 8> OBs;
+ for (auto *Bundle : ArrayRef(Bundles, NumBundles)) {
+ OperandBundleDef *OB = unwrap(Bundle);
+ OBs.push_back(*OB);
+ }
+ return wrap(unwrap(B)->CreateInvoke(
+ unwrap<FunctionType>(Ty), unwrap(Fn), unwrap(Then), unwrap(Catch),
+ ArrayRef(unwrap(Args), NumArgs), OBs, Name));
+}
+
LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
LLVMValueRef PersFn, unsigned NumClauses,
const char *Name) {
@@ -3285,6 +3319,39 @@ void LLVMSetArgOperand(LLVMValueRef Funclet, unsigned i, LLVMValueRef value) {
/*--.. Arithmetic ..........................................................--*/
+static FastMathFlags mapFromLLVMFastMathFlags(LLVMFastMathFlags FMF) {
+ FastMathFlags NewFMF;
+ NewFMF.setAllowReassoc((FMF & LLVMFastMathAllowReassoc) != 0);
+ NewFMF.setNoNaNs((FMF & LLVMFastMathNoNaNs) != 0);
+ NewFMF.setNoInfs((FMF & LLVMFastMathNoInfs) != 0);
+ NewFMF.setNoSignedZeros((FMF & LLVMFastMathNoSignedZeros) != 0);
+ NewFMF.setAllowReciprocal((FMF & LLVMFastMathAllowReciprocal) != 0);
+ NewFMF.setAllowContract((FMF & LLVMFastMathAllowContract) != 0);
+ NewFMF.setApproxFunc((FMF & LLVMFastMathApproxFunc) != 0);
+
+ return NewFMF;
+}
+
+static LLVMFastMathFlags mapToLLVMFastMathFlags(FastMathFlags FMF) {
+ LLVMFastMathFlags NewFMF = LLVMFastMathNone;
+ if (FMF.allowReassoc())
+ NewFMF |= LLVMFastMathAllowReassoc;
+ if (FMF.noNaNs())
+ NewFMF |= LLVMFastMathNoNaNs;
+ if (FMF.noInfs())
+ NewFMF |= LLVMFastMathNoInfs;
+ if (FMF.noSignedZeros())
+ NewFMF |= LLVMFastMathNoSignedZeros;
+ if (FMF.allowReciprocal())
+ NewFMF |= LLVMFastMathAllowReciprocal;
+ if (FMF.allowContract())
+ NewFMF |= LLVMFastMathAllowContract;
+ if (FMF.approxFunc())
+ NewFMF |= LLVMFastMathApproxFunc;
+
+ return NewFMF;
+}
+
LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
const char *Name) {
return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name));
@@ -3474,6 +3541,42 @@ void LLVMSetExact(LLVMValueRef DivOrShrInst, LLVMBool IsExact) {
cast<Instruction>(P)->setIsExact(IsExact);
}
+LLVMBool LLVMGetNNeg(LLVMValueRef NonNegInst) {
+ Value *P = unwrap<Value>(NonNegInst);
+ return cast<Instruction>(P)->hasNonNeg();
+}
+
+void LLVMSetNNeg(LLVMValueRef NonNegInst, LLVMBool IsNonNeg) {
+ Value *P = unwrap<Value>(NonNegInst);
+ cast<Instruction>(P)->setNonNeg(IsNonNeg);
+}
+
+LLVMFastMathFlags LLVMGetFastMathFlags(LLVMValueRef FPMathInst) {
+ Value *P = unwrap<Value>(FPMathInst);
+ FastMathFlags FMF = cast<Instruction>(P)->getFastMathFlags();
+ return mapToLLVMFastMathFlags(FMF);
+}
+
+void LLVMSetFastMathFlags(LLVMValueRef FPMathInst, LLVMFastMathFlags FMF) {
+ Value *P = unwrap<Value>(FPMathInst);
+ cast<Instruction>(P)->setFastMathFlags(mapFromLLVMFastMathFlags(FMF));
+}
+
+LLVMBool LLVMCanValueUseFastMathFlags(LLVMValueRef V) {
+ Value *Val = unwrap<Value>(V);
+ return isa<FPMathOperator>(Val);
+}
+
+LLVMBool LLVMGetIsDisjoint(LLVMValueRef Inst) {
+ Value *P = unwrap<Value>(Inst);
+ return cast<PossiblyDisjointInst>(P)->isDisjoint();
+}
+
+void LLVMSetIsDisjoint(LLVMValueRef Inst, LLVMBool IsDisjoint) {
+ Value *P = unwrap<Value>(Inst);
+ cast<PossiblyDisjointInst>(P)->setIsDisjoint(IsDisjoint);
+}
+
/*--.. Memory ..............................................................--*/
LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
@@ -3481,10 +3584,8 @@ LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
- Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
- ITy, unwrap(Ty), AllocSize,
- nullptr, nullptr, "");
- return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+ return wrap(unwrap(B)->CreateMalloc(ITy, unwrap(Ty), AllocSize, nullptr,
+ nullptr, Name));
}
LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
@@ -3492,10 +3593,8 @@ LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
- Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
- ITy, unwrap(Ty), AllocSize,
- unwrap(Val), nullptr, "");
- return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+ return wrap(unwrap(B)->CreateMalloc(ITy, unwrap(Ty), AllocSize, unwrap(Val),
+ nullptr, Name));
}
LLVMValueRef LLVMBuildMemSet(LLVMBuilderRef B, LLVMValueRef Ptr,
@@ -3534,8 +3633,7 @@ LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
}
LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
- return wrap(unwrap(B)->Insert(
- CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
+ return wrap(unwrap(B)->CreateFree(unwrap(PointerVal)));
}
LLVMValueRef LLVMBuildLoad2(LLVMBuilderRef B, LLVMTypeRef Ty,
@@ -3705,6 +3803,8 @@ LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) {
O = LI->getOrdering();
else if (StoreInst *SI = dyn_cast<StoreInst>(P))
O = SI->getOrdering();
+ else if (FenceInst *FI = dyn_cast<FenceInst>(P))
+ O = FI->getOrdering();
else
O = cast<AtomicRMWInst>(P)->getOrdering();
return mapToLLVMOrdering(O);
@@ -3716,6 +3816,10 @@ void LLVMSetOrdering(LLVMValueRef MemAccessInst, LLVMAtomicOrdering Ordering) {
if (LoadInst *LI = dyn_cast<LoadInst>(P))
return LI->setOrdering(O);
+ else if (FenceInst *FI = dyn_cast<FenceInst>(P))
+ return FI->setOrdering(O);
+ else if (AtomicRMWInst *ARWI = dyn_cast<AtomicRMWInst>(P))
+ return ARWI->setOrdering(O);
return cast<StoreInst>(P)->setOrdering(O);
}
@@ -3877,6 +3981,21 @@ LLVMValueRef LLVMBuildCall2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
ArrayRef(unwrap(Args), NumArgs), Name));
}
+LLVMValueRef
+LLVMBuildCallWithOperandBundles(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Fn, LLVMValueRef *Args,
+ unsigned NumArgs, LLVMOperandBundleRef *Bundles,
+ unsigned NumBundles, const char *Name) {
+ FunctionType *FTy = unwrap<FunctionType>(Ty);
+ SmallVector<OperandBundleDef, 8> OBs;
+ for (auto *Bundle : ArrayRef(Bundles, NumBundles)) {
+ OperandBundleDef *OB = unwrap(Bundle);
+ OBs.push_back(*OB);
+ }
+ return wrap(unwrap(B)->CreateCall(
+ FTy, unwrap(Fn), ArrayRef(unwrap(Args), NumArgs), OBs, Name));
+}
+
LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
LLVMValueRef Then, LLVMValueRef Else,
const char *Name) {
@@ -3986,6 +4105,12 @@ LLVMBool LLVMIsAtomicSingleThread(LLVMValueRef AtomicInst) {
if (AtomicRMWInst *I = dyn_cast<AtomicRMWInst>(P))
return I->getSyncScopeID() == SyncScope::SingleThread;
+ else if (FenceInst *FI = dyn_cast<FenceInst>(P))
+ return FI->getSyncScopeID() == SyncScope::SingleThread;
+ else if (StoreInst *SI = dyn_cast<StoreInst>(P))
+ return SI->getSyncScopeID() == SyncScope::SingleThread;
+ else if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ return LI->getSyncScopeID() == SyncScope::SingleThread;
return cast<AtomicCmpXchgInst>(P)->getSyncScopeID() ==
SyncScope::SingleThread;
}
@@ -3996,6 +4121,12 @@ void LLVMSetAtomicSingleThread(LLVMValueRef AtomicInst, LLVMBool NewValue) {
if (AtomicRMWInst *I = dyn_cast<AtomicRMWInst>(P))
return I->setSyncScopeID(SSID);
+ else if (FenceInst *FI = dyn_cast<FenceInst>(P))
+ return FI->setSyncScopeID(SSID);
+ else if (StoreInst *SI = dyn_cast<StoreInst>(P))
+ return SI->setSyncScopeID(SSID);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ return LI->setSyncScopeID(SSID);
return cast<AtomicCmpXchgInst>(P)->setSyncScopeID(SSID);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
index 1ce8c17f8a88..62efaba02534 100644
--- a/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
@@ -405,12 +405,11 @@ DIDerivedType *
DIBuilder::createStaticMemberType(DIScope *Scope, StringRef Name, DIFile *File,
unsigned LineNumber, DIType *Ty,
DINode::DIFlags Flags, llvm::Constant *Val,
- uint32_t AlignInBits) {
+ unsigned Tag, uint32_t AlignInBits) {
Flags |= DINode::FlagStaticMember;
- return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
- LineNumber, getNonCompileUnitScope(Scope), Ty, 0,
- AlignInBits, 0, std::nullopt, Flags,
- getConstantOrNull(Val));
+ return DIDerivedType::get(VMContext, Tag, Name, File, LineNumber,
+ getNonCompileUnitScope(Scope), Ty, 0, AlignInBits,
+ 0, std::nullopt, Flags, getConstantOrNull(Val));
}
DIDerivedType *
@@ -477,14 +476,15 @@ DICompositeType *DIBuilder::createClassType(
DIScope *Context, StringRef Name, DIFile *File, unsigned LineNumber,
uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
DINode::DIFlags Flags, DIType *DerivedFrom, DINodeArray Elements,
- DIType *VTableHolder, MDNode *TemplateParams, StringRef UniqueIdentifier) {
+ unsigned RunTimeLang, DIType *VTableHolder, MDNode *TemplateParams,
+ StringRef UniqueIdentifier) {
assert((!Context || isa<DIScope>(Context)) &&
"createClassType should be called with a valid Context");
auto *R = DICompositeType::get(
VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber,
getNonCompileUnitScope(Context), DerivedFrom, SizeInBits, AlignInBits,
- OffsetInBits, Flags, Elements, 0, VTableHolder,
+ OffsetInBits, Flags, Elements, RunTimeLang, VTableHolder,
cast_or_null<MDTuple>(TemplateParams), UniqueIdentifier);
trackIfUnresolved(R);
return R;
@@ -535,15 +535,17 @@ DISubroutineType *DIBuilder::createSubroutineType(DITypeRefArray ParameterTypes,
return DISubroutineType::get(VMContext, Flags, CC, ParameterTypes);
}
-DICompositeType *DIBuilder::createEnumerationType(
- DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
- uint64_t SizeInBits, uint32_t AlignInBits, DINodeArray Elements,
- DIType *UnderlyingType, StringRef UniqueIdentifier, bool IsScoped) {
+DICompositeType *
+DIBuilder::createEnumerationType(DIScope *Scope, StringRef Name, DIFile *File,
+ unsigned LineNumber, uint64_t SizeInBits,
+ uint32_t AlignInBits, DINodeArray Elements,
+ DIType *UnderlyingType, unsigned RunTimeLang,
+ StringRef UniqueIdentifier, bool IsScoped) {
auto *CTy = DICompositeType::get(
VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber,
getNonCompileUnitScope(Scope), UnderlyingType, SizeInBits, AlignInBits, 0,
- IsScoped ? DINode::FlagEnumClass : DINode::FlagZero, Elements, 0, nullptr,
- nullptr, UniqueIdentifier);
+ IsScoped ? DINode::FlagEnumClass : DINode::FlagZero, Elements,
+ RunTimeLang, nullptr, nullptr, UniqueIdentifier);
AllEnumTypes.emplace_back(CTy);
trackIfUnresolved(CTy);
return CTy;
@@ -986,9 +988,11 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V,
DIExpression *Expr,
const DILocation *DL,
Instruction *InsertBefore) {
- return insertDbgValueIntrinsic(
+ Instruction *DVI = insertDbgValueIntrinsic(
V, VarInfo, Expr, DL, InsertBefore ? InsertBefore->getParent() : nullptr,
InsertBefore);
+ cast<CallInst>(DVI)->setTailCall();
+ return DVI;
}
Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V,
diff --git a/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp b/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp
index 53842b184ed6..e28f043cf9e0 100644
--- a/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp
@@ -46,7 +46,7 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
StructLayout::StructLayout(StructType *ST, const DataLayout &DL)
- : StructSize(TypeSize::Fixed(0)) {
+ : StructSize(TypeSize::getFixed(0)) {
assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
IsPadded = false;
NumElements = ST->getNumElements();
@@ -55,7 +55,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL)
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Type *Ty = ST->getElementType(i);
if (i == 0 && Ty->isScalableTy())
- StructSize = TypeSize::Scalable(0);
+ StructSize = TypeSize::getScalable(0);
const Align TyAlign = ST->isPacked() ? Align(1) : DL.getABITypeAlign(Ty);
@@ -68,7 +68,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL)
// contains both fixed size and scalable size data type members).
if (!StructSize.isScalable() && !isAligned(TyAlign, StructSize)) {
IsPadded = true;
- StructSize = TypeSize::Fixed(alignTo(StructSize, TyAlign));
+ StructSize = TypeSize::getFixed(alignTo(StructSize, TyAlign));
}
// Keep track of maximum alignment constraint.
@@ -83,7 +83,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL)
// and all array elements would be aligned correctly.
if (!StructSize.isScalable() && !isAligned(StructAlignment, StructSize)) {
IsPadded = true;
- StructSize = TypeSize::Fixed(alignTo(StructSize, StructAlignment));
+ StructSize = TypeSize::getFixed(alignTo(StructSize, StructAlignment));
}
}
@@ -93,7 +93,7 @@ unsigned StructLayout::getElementContainingOffset(uint64_t FixedOffset) const {
assert(!StructSize.isScalable() &&
"Cannot get element at offset for structure containing scalable "
"vector types");
- TypeSize Offset = TypeSize::Fixed(FixedOffset);
+ TypeSize Offset = TypeSize::getFixed(FixedOffset);
ArrayRef<TypeSize> MemberOffsets = getMemberOffsets();
const auto *SI =
@@ -171,7 +171,7 @@ const char *DataLayout::getManglingComponent(const Triple &T) {
return "-m:l";
if (T.isOSBinFormatMachO())
return "-m:o";
- if (T.isOSWindows() && T.isOSBinFormatCOFF())
+ if ((T.isOSWindows() || T.isUEFI()) && T.isOSBinFormatCOFF())
return T.getArch() == Triple::x86 ? "-m:x" : "-m:w";
if (T.isOSBinFormatXCOFF())
return "-m:a";
@@ -649,6 +649,8 @@ Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign,
if (PrefAlign < ABIAlign)
return reportError(
"Preferred alignment cannot be less than the ABI alignment");
+ if (IndexBitWidth > TypeBitWidth)
+ return reportError("Index width cannot be larger than pointer width");
auto I = lower_bound(Pointers, AddrSpace,
[](const PointerAlignElem &A, uint32_t AddressSpace) {
diff --git a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
index 48b5501c55ba..eab05eed428e 100644
--- a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugProgramInstruction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/Instruction.h"
@@ -43,29 +44,10 @@ using namespace llvm;
using namespace llvm::at;
using namespace llvm::dwarf;
-TinyPtrVector<DbgDeclareInst *> llvm::FindDbgDeclareUses(Value *V) {
- // This function is hot. Check whether the value has any metadata to avoid a
- // DenseMap lookup.
- if (!V->isUsedByMetadata())
- return {};
- auto *L = LocalAsMetadata::getIfExists(V);
- if (!L)
- return {};
- auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L);
- if (!MDV)
- return {};
-
- TinyPtrVector<DbgDeclareInst *> Declares;
- for (User *U : MDV->users()) {
- if (auto *DDI = dyn_cast<DbgDeclareInst>(U))
- Declares.push_back(DDI);
- }
-
- return Declares;
-}
-
-template <typename IntrinsicT>
-static void findDbgIntrinsics(SmallVectorImpl<IntrinsicT *> &Result, Value *V) {
+template <typename IntrinsicT,
+ DPValue::LocationType Type = DPValue::LocationType::Any>
+static void findDbgIntrinsics(SmallVectorImpl<IntrinsicT *> &Result, Value *V,
+ SmallVectorImpl<DPValue *> *DPValues) {
// This function is hot. Check whether the value has any metadata to avoid a
// DenseMap lookup.
if (!V->isUsedByMetadata())
@@ -78,31 +60,59 @@ static void findDbgIntrinsics(SmallVectorImpl<IntrinsicT *> &Result, Value *V) {
// V will also appear twice in a dbg.assign if its used in the both the value
// and address components.
SmallPtrSet<IntrinsicT *, 4> EncounteredIntrinsics;
+ SmallPtrSet<DPValue *, 4> EncounteredDPValues;
/// Append IntrinsicT users of MetadataAsValue(MD).
- auto AppendUsers = [&Ctx, &EncounteredIntrinsics, &Result](Metadata *MD) {
+ auto AppendUsers = [&Ctx, &EncounteredIntrinsics, &Result,
+ DPValues](Metadata *MD) {
if (auto *MDV = MetadataAsValue::getIfExists(Ctx, MD)) {
for (User *U : MDV->users())
if (IntrinsicT *DVI = dyn_cast<IntrinsicT>(U))
if (EncounteredIntrinsics.insert(DVI).second)
Result.push_back(DVI);
}
+ if (!DPValues)
+ return;
+ // Get DPValues that use this as a single value.
+ if (LocalAsMetadata *L = dyn_cast<LocalAsMetadata>(MD)) {
+ for (DPValue *DPV : L->getAllDPValueUsers()) {
+ if (Type == DPValue::LocationType::Any || DPV->getType() == Type)
+ DPValues->push_back(DPV);
+ }
+ }
};
if (auto *L = LocalAsMetadata::getIfExists(V)) {
AppendUsers(L);
- for (Metadata *AL : L->getAllArgListUsers())
+ for (Metadata *AL : L->getAllArgListUsers()) {
AppendUsers(AL);
+ if (!DPValues)
+ continue;
+ DIArgList *DI = cast<DIArgList>(AL);
+ for (DPValue *DPV : DI->getAllDPValueUsers())
+ if (Type == DPValue::LocationType::Any || DPV->getType() == Type)
+ if (EncounteredDPValues.insert(DPV).second)
+ DPValues->push_back(DPV);
+ }
}
}
-void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
- findDbgIntrinsics<DbgValueInst>(DbgValues, V);
+void llvm::findDbgDeclares(SmallVectorImpl<DbgDeclareInst *> &DbgUsers,
+ Value *V, SmallVectorImpl<DPValue *> *DPValues) {
+ findDbgIntrinsics<DbgDeclareInst, DPValue::LocationType::Declare>(DbgUsers, V,
+ DPValues);
+}
+
+void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues,
+ Value *V, SmallVectorImpl<DPValue *> *DPValues) {
+ findDbgIntrinsics<DbgValueInst, DPValue::LocationType::Value>(DbgValues, V,
+ DPValues);
}
void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
- Value *V) {
- findDbgIntrinsics<DbgVariableIntrinsic>(DbgUsers, V);
+ Value *V, SmallVectorImpl<DPValue *> *DPValues) {
+ findDbgIntrinsics<DbgVariableIntrinsic, DPValue::LocationType::Any>(
+ DbgUsers, V, DPValues);
}
DISubprogram *llvm::getDISubprogram(const MDNode *Scope) {
@@ -183,10 +193,13 @@ void DebugInfoFinder::processCompileUnit(DICompileUnit *CU) {
void DebugInfoFinder::processInstruction(const Module &M,
const Instruction &I) {
if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
- processVariable(M, *DVI);
+ processVariable(M, DVI->getVariable());
if (auto DbgLoc = I.getDebugLoc())
processLocation(M, DbgLoc.get());
+
+ for (const DPValue &DPV : I.getDbgValueRange())
+ processDPValue(M, DPV);
}
void DebugInfoFinder::processLocation(const Module &M, const DILocation *Loc) {
@@ -196,6 +209,11 @@ void DebugInfoFinder::processLocation(const Module &M, const DILocation *Loc) {
processLocation(M, Loc->getInlinedAt());
}
+void DebugInfoFinder::processDPValue(const Module &M, const DPValue &DPV) {
+ processVariable(M, DPV.getVariable());
+ processLocation(M, DPV.getDebugLoc().get());
+}
+
void DebugInfoFinder::processType(DIType *DT) {
if (!addType(DT))
return;
@@ -270,15 +288,7 @@ void DebugInfoFinder::processSubprogram(DISubprogram *SP) {
}
void DebugInfoFinder::processVariable(const Module &M,
- const DbgVariableIntrinsic &DVI) {
- auto *N = dyn_cast<MDNode>(DVI.getVariable());
- if (!N)
- return;
-
- auto *DV = dyn_cast<DILocalVariable>(N);
- if (!DV)
- return;
-
+ const DILocalVariable *DV) {
if (!NodesSeen.insert(DV).second)
return;
processScope(DV->getScope());
@@ -525,6 +535,7 @@ bool llvm::stripDebugInfo(Function &F) {
// DIAssignID are debug info metadata primitives.
I.setMetadata(LLVMContext::MD_DIAssignID, nullptr);
}
+ I.dropDbgValues();
}
}
return Changed;
@@ -536,7 +547,7 @@ bool llvm::StripDebugInfo(Module &M) {
for (NamedMDNode &NMD : llvm::make_early_inc_range(M.named_metadata())) {
// We're stripping debug info, and without them, coverage information
// doesn't quite make sense.
- if (NMD.getName().startswith("llvm.dbg.") ||
+ if (NMD.getName().starts_with("llvm.dbg.") ||
NMD.getName() == "llvm.gcov") {
NMD.eraseFromParent();
Changed = true;
@@ -1315,17 +1326,15 @@ LLVMDIBuilderCreateUnspecifiedType(LLVMDIBuilderRef Builder, const char *Name,
return wrap(unwrap(Builder)->createUnspecifiedType({Name, NameLen}));
}
-LLVMMetadataRef
-LLVMDIBuilderCreateStaticMemberType(
+LLVMMetadataRef LLVMDIBuilderCreateStaticMemberType(
LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
size_t NameLen, LLVMMetadataRef File, unsigned LineNumber,
LLVMMetadataRef Type, LLVMDIFlags Flags, LLVMValueRef ConstantVal,
uint32_t AlignInBits) {
return wrap(unwrap(Builder)->createStaticMemberType(
- unwrapDI<DIScope>(Scope), {Name, NameLen},
- unwrapDI<DIFile>(File), LineNumber, unwrapDI<DIType>(Type),
- map_from_llvmDIFlags(Flags), unwrap<Constant>(ConstantVal),
- AlignInBits));
+ unwrapDI<DIScope>(Scope), {Name, NameLen}, unwrapDI<DIFile>(File),
+ LineNumber, unwrapDI<DIType>(Type), map_from_llvmDIFlags(Flags),
+ unwrap<Constant>(ConstantVal), DW_TAG_member, AlignInBits));
}
LLVMMetadataRef
@@ -1467,13 +1476,12 @@ LLVMMetadataRef LLVMDIBuilderCreateClassType(LLVMDIBuilderRef Builder,
auto Elts = unwrap(Builder)->getOrCreateArray({unwrap(Elements),
NumElements});
return wrap(unwrap(Builder)->createClassType(
- unwrapDI<DIScope>(Scope), {Name, NameLen},
- unwrapDI<DIFile>(File), LineNumber,
- SizeInBits, AlignInBits, OffsetInBits,
- map_from_llvmDIFlags(Flags), unwrapDI<DIType>(DerivedFrom),
- Elts, unwrapDI<DIType>(VTableHolder),
- unwrapDI<MDNode>(TemplateParamsNode),
- {UniqueIdentifier, UniqueIdentifierLen}));
+ unwrapDI<DIScope>(Scope), {Name, NameLen}, unwrapDI<DIFile>(File),
+ LineNumber, SizeInBits, AlignInBits, OffsetInBits,
+ map_from_llvmDIFlags(Flags), unwrapDI<DIType>(DerivedFrom), Elts,
+ /*RunTimeLang=*/0, unwrapDI<DIType>(VTableHolder),
+ unwrapDI<MDNode>(TemplateParamsNode),
+ {UniqueIdentifier, UniqueIdentifierLen}));
}
LLVMMetadataRef
diff --git a/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp
index 4933b6032688..51950fc937f0 100644
--- a/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -14,9 +14,9 @@
#include "LLVMContextImpl.h"
#include "MetadataImpl.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/DebugProgramInstruction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Type.h"
@@ -42,6 +42,11 @@ DebugVariable::DebugVariable(const DbgVariableIntrinsic *DII)
Fragment(DII->getExpression()->getFragmentInfo()),
InlinedAt(DII->getDebugLoc().getInlinedAt()) {}
+DebugVariable::DebugVariable(const DPValue *DPV)
+ : Variable(DPV->getVariable()),
+ Fragment(DPV->getExpression()->getFragmentInfo()),
+ InlinedAt(DPV->getDebugLoc().getInlinedAt()) {}
+
DebugVariableAggregate::DebugVariableAggregate(const DbgVariableIntrinsic *DVI)
: DebugVariable(DVI->getVariable(), std::nullopt,
DVI->getDebugLoc()->getInlinedAt()) {}
@@ -712,7 +717,9 @@ Constant *DIDerivedType::getStorageOffsetInBits() const {
}
Constant *DIDerivedType::getConstant() const {
- assert(getTag() == dwarf::DW_TAG_member && isStaticMember());
+ assert((getTag() == dwarf::DW_TAG_member ||
+ getTag() == dwarf::DW_TAG_variable) &&
+ isStaticMember());
if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
return C->getValue();
return nullptr;
@@ -914,11 +921,11 @@ DICompileUnit::DICompileUnit(LLVMContext &C, StorageType Storage,
bool DebugInfoForProfiling, unsigned NameTableKind,
bool RangesBaseAddress, ArrayRef<Metadata *> Ops)
: DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
- SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
- RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind), DWOId(DWOId),
- SplitDebugInlining(SplitDebugInlining),
+ SourceLanguage(SourceLanguage), RuntimeVersion(RuntimeVersion),
+ DWOId(DWOId), EmissionKind(EmissionKind), NameTableKind(NameTableKind),
+ IsOptimized(IsOptimized), SplitDebugInlining(SplitDebugInlining),
DebugInfoForProfiling(DebugInfoForProfiling),
- NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) {
+ RangesBaseAddress(RangesBaseAddress) {
assert(Storage != Uniqued);
}
@@ -1180,8 +1187,9 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context,
DINamespace::DINamespace(LLVMContext &Context, StorageType Storage,
bool ExportSymbols, ArrayRef<Metadata *> Ops)
- : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops),
- ExportSymbols(ExportSymbols) {}
+ : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops) {
+ SubclassData1 = ExportSymbols;
+}
DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
MDString *Name, bool ExportSymbols,
StorageType Storage, bool ShouldCreate) {
@@ -1195,8 +1203,9 @@ DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
DICommonBlock::DICommonBlock(LLVMContext &Context, StorageType Storage,
unsigned LineNo, ArrayRef<Metadata *> Ops)
: DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
- Ops),
- LineNo(LineNo) {}
+ Ops) {
+ SubclassData32 = LineNo;
+}
DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
Metadata *Decl, MDString *Name,
Metadata *File, unsigned LineNo,
@@ -1210,8 +1219,10 @@ DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
DIModule::DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo,
bool IsDecl, ArrayRef<Metadata *> Ops)
- : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops),
- LineNo(LineNo), IsDecl(IsDecl) {}
+ : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops) {
+ SubclassData1 = IsDecl;
+ SubclassData32 = LineNo;
+}
DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *File,
Metadata *Scope, MDString *Name,
MDString *ConfigurationMacros,
@@ -1300,8 +1311,9 @@ DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
DIVariable::DIVariable(LLVMContext &C, unsigned ID, StorageType Storage,
signed Line, ArrayRef<Metadata *> Ops,
uint32_t AlignInBits)
- : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line),
- AlignInBits(AlignInBits) {}
+ : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line) {
+ SubclassData32 = AlignInBits;
+}
std::optional<uint64_t> DIVariable::getSizeInBits() const {
// This is used by the Verifier so be mindful of broken types.
const Metadata *RawType = getRawType();
@@ -1327,7 +1339,9 @@ std::optional<uint64_t> DIVariable::getSizeInBits() const {
DILabel::DILabel(LLVMContext &C, StorageType Storage, unsigned Line,
ArrayRef<Metadata *> Ops)
- : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
+ : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops) {
+ SubclassData32 = Line;
+}
DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
Metadata *File, unsigned Line, StorageType Storage,
bool ShouldCreate) {
@@ -1345,13 +1359,23 @@ DIExpression *DIExpression::getImpl(LLVMContext &Context,
DEFINE_GETIMPL_STORE_NO_OPS(DIExpression, (Elements));
}
bool DIExpression::isEntryValue() const {
- return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_LLVM_entry_value;
+ if (auto singleLocElts = getSingleLocationExpressionElements()) {
+ return singleLocElts->size() > 0 &&
+ (*singleLocElts)[0] == dwarf::DW_OP_LLVM_entry_value;
+ }
+ return false;
}
bool DIExpression::startsWithDeref() const {
- return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref;
+ if (auto singleLocElts = getSingleLocationExpressionElements())
+ return singleLocElts->size() > 0 &&
+ (*singleLocElts)[0] == dwarf::DW_OP_deref;
+ return false;
}
bool DIExpression::isDeref() const {
- return getNumElements() == 1 && startsWithDeref();
+ if (auto singleLocElts = getSingleLocationExpressionElements())
+ return singleLocElts->size() == 1 &&
+ (*singleLocElts)[0] == dwarf::DW_OP_deref;
+ return false;
}
DIAssignID *DIAssignID::getImpl(LLVMContext &Context, StorageType Storage,
@@ -1528,14 +1552,34 @@ bool DIExpression::isSingleLocationExpression() const {
auto ExprOpBegin = expr_ops().begin();
auto ExprOpEnd = expr_ops().end();
- if (ExprOpBegin->getOp() == dwarf::DW_OP_LLVM_arg)
+ if (ExprOpBegin->getOp() == dwarf::DW_OP_LLVM_arg) {
+ if (ExprOpBegin->getArg(0) != 0)
+ return false;
++ExprOpBegin;
+ }
return !std::any_of(ExprOpBegin, ExprOpEnd, [](auto Op) {
return Op.getOp() == dwarf::DW_OP_LLVM_arg;
});
}
+std::optional<ArrayRef<uint64_t>>
+DIExpression::getSingleLocationExpressionElements() const {
+ // Check for `isValid` covered by `isSingleLocationExpression`.
+ if (!isSingleLocationExpression())
+ return std::nullopt;
+
+ // An empty expression is already non-variadic.
+ if (!getNumElements())
+ return ArrayRef<uint64_t>();
+
+ // If Expr does not have a leading DW_OP_LLVM_arg then we don't need to do
+ // anything.
+ if (getElements()[0] == dwarf::DW_OP_LLVM_arg)
+ return getElements().drop_front(2);
+ return getElements();
+}
+
const DIExpression *
DIExpression::convertToUndefExpression(const DIExpression *Expr) {
SmallVector<uint64_t, 3> UndefOps;
@@ -1561,23 +1605,13 @@ DIExpression::convertToVariadicExpression(const DIExpression *Expr) {
std::optional<const DIExpression *>
DIExpression::convertToNonVariadicExpression(const DIExpression *Expr) {
- // Check for `isValid` covered by `isSingleLocationExpression`.
- if (!Expr->isSingleLocationExpression())
+ if (!Expr)
return std::nullopt;
- // An empty expression is already non-variadic.
- if (!Expr->getNumElements())
- return Expr;
-
- auto ElementsBegin = Expr->elements_begin();
- // If Expr does not have a leading DW_OP_LLVM_arg then we don't need to do
- // anything.
- if (*ElementsBegin != dwarf::DW_OP_LLVM_arg)
- return Expr;
+ if (auto Elts = Expr->getSingleLocationExpressionElements())
+ return DIExpression::get(Expr->getContext(), *Elts);
- SmallVector<uint64_t> NonVariadicOps(
- make_range(ElementsBegin + 2, Expr->elements_end()));
- return DIExpression::get(Expr->getContext(), NonVariadicOps);
+ return std::nullopt;
}
void DIExpression::canonicalizeExpressionOps(SmallVectorImpl<uint64_t> &Ops,
@@ -1648,23 +1682,29 @@ void DIExpression::appendOffset(SmallVectorImpl<uint64_t> &Ops,
}
bool DIExpression::extractIfOffset(int64_t &Offset) const {
- if (getNumElements() == 0) {
+ auto SingleLocEltsOpt = getSingleLocationExpressionElements();
+ if (!SingleLocEltsOpt)
+ return false;
+ auto SingleLocElts = *SingleLocEltsOpt;
+
+ if (SingleLocElts.size() == 0) {
Offset = 0;
return true;
}
- if (getNumElements() == 2 && Elements[0] == dwarf::DW_OP_plus_uconst) {
- Offset = Elements[1];
+ if (SingleLocElts.size() == 2 &&
+ SingleLocElts[0] == dwarf::DW_OP_plus_uconst) {
+ Offset = SingleLocElts[1];
return true;
}
- if (getNumElements() == 3 && Elements[0] == dwarf::DW_OP_constu) {
- if (Elements[2] == dwarf::DW_OP_plus) {
- Offset = Elements[1];
+ if (SingleLocElts.size() == 3 && SingleLocElts[0] == dwarf::DW_OP_constu) {
+ if (SingleLocElts[2] == dwarf::DW_OP_plus) {
+ Offset = SingleLocElts[1];
return true;
}
- if (Elements[2] == dwarf::DW_OP_minus) {
- Offset = -Elements[1];
+ if (SingleLocElts[2] == dwarf::DW_OP_minus) {
+ Offset = -SingleLocElts[1];
return true;
}
}
@@ -1687,18 +1727,23 @@ const DIExpression *DIExpression::extractAddressClass(const DIExpression *Expr,
unsigned &AddrClass) {
// FIXME: This seems fragile. Nothing that verifies that these elements
// actually map to ops and not operands.
+ auto SingleLocEltsOpt = Expr->getSingleLocationExpressionElements();
+ if (!SingleLocEltsOpt)
+ return nullptr;
+ auto SingleLocElts = *SingleLocEltsOpt;
+
const unsigned PatternSize = 4;
- if (Expr->Elements.size() >= PatternSize &&
- Expr->Elements[PatternSize - 4] == dwarf::DW_OP_constu &&
- Expr->Elements[PatternSize - 2] == dwarf::DW_OP_swap &&
- Expr->Elements[PatternSize - 1] == dwarf::DW_OP_xderef) {
- AddrClass = Expr->Elements[PatternSize - 3];
+ if (SingleLocElts.size() >= PatternSize &&
+ SingleLocElts[PatternSize - 4] == dwarf::DW_OP_constu &&
+ SingleLocElts[PatternSize - 2] == dwarf::DW_OP_swap &&
+ SingleLocElts[PatternSize - 1] == dwarf::DW_OP_xderef) {
+ AddrClass = SingleLocElts[PatternSize - 3];
- if (Expr->Elements.size() == PatternSize)
+ if (SingleLocElts.size() == PatternSize)
return nullptr;
- return DIExpression::get(Expr->getContext(),
- ArrayRef(&*Expr->Elements.begin(),
- Expr->Elements.size() - PatternSize));
+ return DIExpression::get(
+ Expr->getContext(),
+ ArrayRef(&*SingleLocElts.begin(), SingleLocElts.size() - PatternSize));
}
return Expr;
}
@@ -2076,11 +2121,14 @@ DIMacroFile *DIMacroFile::getImpl(LLVMContext &Context, unsigned MIType,
DEFINE_GETIMPL_STORE(DIMacroFile, (MIType, Line), Ops);
}
-DIArgList *DIArgList::getImpl(LLVMContext &Context,
- ArrayRef<ValueAsMetadata *> Args,
- StorageType Storage, bool ShouldCreate) {
- DEFINE_GETIMPL_LOOKUP(DIArgList, (Args));
- DEFINE_GETIMPL_STORE_NO_OPS(DIArgList, (Args));
+DIArgList *DIArgList::get(LLVMContext &Context,
+ ArrayRef<ValueAsMetadata *> Args) {
+ auto ExistingIt = Context.pImpl->DIArgLists.find_as(DIArgListKeyInfo(Args));
+ if (ExistingIt != Context.pImpl->DIArgLists.end())
+ return *ExistingIt;
+ DIArgList *NewArgList = new DIArgList(Context, Args);
+ Context.pImpl->DIArgLists.insert(NewArgList);
+ return NewArgList;
}
void DIArgList::handleChangedOperand(void *Ref, Metadata *New) {
@@ -2088,12 +2136,9 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) {
assert((!New || isa<ValueAsMetadata>(New)) &&
"DIArgList must be passed a ValueAsMetadata");
untrack();
- bool Uniq = isUniqued();
- if (Uniq) {
- // We need to update the uniqueness once the Args are updated since they
- // form the key to the DIArgLists store.
- eraseFromStore();
- }
+ // We need to update the set storage once the Args are updated since they
+ // form the key to the DIArgLists store.
+ getContext().pImpl->DIArgLists.erase(this);
ValueAsMetadata *NewVM = cast_or_null<ValueAsMetadata>(New);
for (ValueAsMetadata *&VM : Args) {
if (&VM == OldVMPtr) {
@@ -2103,10 +2148,19 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) {
VM = ValueAsMetadata::get(PoisonValue::get(VM->getValue()->getType()));
}
}
- if (Uniq) {
- if (uniquify() != this)
- storeDistinctInContext();
+ // We've changed the contents of this DIArgList, and the set storage may
+ // already contain a DIArgList with our new set of args; if it does, then we
+ // must RAUW this with the existing DIArgList, otherwise we simply insert this
+ // back into the set storage.
+ DIArgList *ExistingArgList = getUniqued(getContext().pImpl->DIArgLists, this);
+ if (ExistingArgList) {
+ replaceAllUsesWith(ExistingArgList);
+ // Clear this here so we don't try to untrack in the destructor.
+ Args.clear();
+ delete this;
+ return;
}
+ getContext().pImpl->DIArgLists.insert(this);
track();
}
void DIArgList::track() {
@@ -2119,8 +2173,9 @@ void DIArgList::untrack() {
if (VAM)
MetadataTracking::untrack(&VAM, *VAM);
}
-void DIArgList::dropAllReferences() {
- untrack();
+void DIArgList::dropAllReferences(bool Untrack) {
+ if (Untrack)
+ untrack();
Args.clear();
- MDNode::dropAllReferences();
+ ReplaceableMetadataImpl::resolveAllUses(/* ResolveUsers */ false);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/DebugProgramInstruction.cpp b/contrib/llvm-project/llvm/lib/IR/DebugProgramInstruction.cpp
new file mode 100644
index 000000000000..7b709a2de033
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/IR/DebugProgramInstruction.cpp
@@ -0,0 +1,388 @@
+//======-- DebugProgramInstruction.cpp - Implement DPValues/DPMarkers --======//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugProgramInstruction.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+
+namespace llvm {
+
+DPValue::DPValue(const DbgVariableIntrinsic *DVI)
+ : DebugValueUser(DVI->getRawLocation()), Variable(DVI->getVariable()),
+ Expression(DVI->getExpression()), DbgLoc(DVI->getDebugLoc()) {
+ switch (DVI->getIntrinsicID()) {
+ case Intrinsic::dbg_value:
+ Type = LocationType::Value;
+ break;
+ case Intrinsic::dbg_declare:
+ Type = LocationType::Declare;
+ break;
+ default:
+ llvm_unreachable(
+ "Trying to create a DPValue with an invalid intrinsic type!");
+ }
+}
+
+DPValue::DPValue(const DPValue &DPV)
+ : DebugValueUser(DPV.getRawLocation()),
+ Variable(DPV.getVariable()), Expression(DPV.getExpression()),
+ DbgLoc(DPV.getDebugLoc()), Type(DPV.getType()) {}
+
+DPValue::DPValue(Metadata *Location, DILocalVariable *DV, DIExpression *Expr,
+ const DILocation *DI, LocationType Type)
+ : DebugValueUser(Location), Variable(DV), Expression(Expr), DbgLoc(DI),
+ Type(Type) {}
+
+void DPValue::deleteInstr() { delete this; }
+
+iterator_range<DPValue::location_op_iterator> DPValue::location_ops() const {
+ auto *MD = getRawLocation();
+ // If a Value has been deleted, the "location" for this DPValue will be
+ // replaced by nullptr. Return an empty range.
+ if (!MD)
+ return {location_op_iterator(static_cast<ValueAsMetadata *>(nullptr)),
+ location_op_iterator(static_cast<ValueAsMetadata *>(nullptr))};
+
+ // If operand is ValueAsMetadata, return a range over just that operand.
+ if (auto *VAM = dyn_cast<ValueAsMetadata>(MD))
+ return {location_op_iterator(VAM), location_op_iterator(VAM + 1)};
+
+ // If operand is DIArgList, return a range over its args.
+ if (auto *AL = dyn_cast<DIArgList>(MD))
+ return {location_op_iterator(AL->args_begin()),
+ location_op_iterator(AL->args_end())};
+
+ // Operand is an empty metadata tuple, so return empty iterator.
+ assert(cast<MDNode>(MD)->getNumOperands() == 0);
+ return {location_op_iterator(static_cast<ValueAsMetadata *>(nullptr)),
+ location_op_iterator(static_cast<ValueAsMetadata *>(nullptr))};
+}
+
+unsigned DPValue::getNumVariableLocationOps() const {
+ if (hasArgList())
+ return cast<DIArgList>(getRawLocation())->getArgs().size();
+ return 1;
+}
+
+Value *DPValue::getVariableLocationOp(unsigned OpIdx) const {
+ auto *MD = getRawLocation();
+ if (!MD)
+ return nullptr;
+
+ if (auto *AL = dyn_cast<DIArgList>(MD))
+ return AL->getArgs()[OpIdx]->getValue();
+ if (isa<MDNode>(MD))
+ return nullptr;
+ assert(isa<ValueAsMetadata>(MD) &&
+ "Attempted to get location operand from DPValue with none.");
+ auto *V = cast<ValueAsMetadata>(MD);
+ assert(OpIdx == 0 && "Operand Index must be 0 for a debug intrinsic with a "
+ "single location operand.");
+ return V->getValue();
+}
+
+static ValueAsMetadata *getAsMetadata(Value *V) {
+ return isa<MetadataAsValue>(V) ? dyn_cast<ValueAsMetadata>(
+ cast<MetadataAsValue>(V)->getMetadata())
+ : ValueAsMetadata::get(V);
+}
+
+void DPValue::replaceVariableLocationOp(Value *OldValue, Value *NewValue,
+ bool AllowEmpty) {
+ assert(NewValue && "Values must be non-null");
+ auto Locations = location_ops();
+ auto OldIt = find(Locations, OldValue);
+ if (OldIt == Locations.end()) {
+ if (AllowEmpty)
+ return;
+ llvm_unreachable("OldValue must be a current location");
+ }
+
+ if (!hasArgList()) {
+ // Set our location to be the MAV wrapping the new Value.
+ setRawLocation(isa<MetadataAsValue>(NewValue)
+ ? cast<MetadataAsValue>(NewValue)->getMetadata()
+ : ValueAsMetadata::get(NewValue));
+ return;
+ }
+
+ // We must be referring to a DIArgList, produce a new operands vector with the
+ // old value replaced, generate a new DIArgList and set it as our location.
+ SmallVector<ValueAsMetadata *, 4> MDs;
+ ValueAsMetadata *NewOperand = getAsMetadata(NewValue);
+ for (auto *VMD : Locations)
+ MDs.push_back(VMD == *OldIt ? NewOperand : getAsMetadata(VMD));
+ setRawLocation(DIArgList::get(getVariableLocationOp(0)->getContext(), MDs));
+}
+
+void DPValue::replaceVariableLocationOp(unsigned OpIdx, Value *NewValue) {
+ assert(OpIdx < getNumVariableLocationOps() && "Invalid Operand Index");
+
+ if (!hasArgList()) {
+ setRawLocation(isa<MetadataAsValue>(NewValue)
+ ? cast<MetadataAsValue>(NewValue)->getMetadata()
+ : ValueAsMetadata::get(NewValue));
+ return;
+ }
+
+ SmallVector<ValueAsMetadata *, 4> MDs;
+ ValueAsMetadata *NewOperand = getAsMetadata(NewValue);
+ for (unsigned Idx = 0; Idx < getNumVariableLocationOps(); ++Idx)
+ MDs.push_back(Idx == OpIdx ? NewOperand
+ : getAsMetadata(getVariableLocationOp(Idx)));
+
+ setRawLocation(DIArgList::get(getVariableLocationOp(0)->getContext(), MDs));
+}
+
+void DPValue::addVariableLocationOps(ArrayRef<Value *> NewValues,
+ DIExpression *NewExpr) {
+ assert(NewExpr->hasAllLocationOps(getNumVariableLocationOps() +
+ NewValues.size()) &&
+ "NewExpr for debug variable intrinsic does not reference every "
+ "location operand.");
+ assert(!is_contained(NewValues, nullptr) && "New values must be non-null");
+ setExpression(NewExpr);
+ SmallVector<ValueAsMetadata *, 4> MDs;
+ for (auto *VMD : location_ops())
+ MDs.push_back(getAsMetadata(VMD));
+ for (auto *VMD : NewValues)
+ MDs.push_back(getAsMetadata(VMD));
+ setRawLocation(DIArgList::get(getVariableLocationOp(0)->getContext(), MDs));
+}
+
+void DPValue::setKillLocation() {
+ // TODO: When/if we remove duplicate values from DIArgLists, we don't need
+ // this set anymore.
+ SmallPtrSet<Value *, 4> RemovedValues;
+ for (Value *OldValue : location_ops()) {
+ if (!RemovedValues.insert(OldValue).second)
+ continue;
+ Value *Poison = PoisonValue::get(OldValue->getType());
+ replaceVariableLocationOp(OldValue, Poison);
+ }
+}
+
+bool DPValue::isKillLocation() const {
+ return (getNumVariableLocationOps() == 0 &&
+ !getExpression()->isComplex()) ||
+ any_of(location_ops(), [](Value *V) { return isa<UndefValue>(V); });
+}
+
+std::optional<uint64_t> DPValue::getFragmentSizeInBits() const {
+ if (auto Fragment = getExpression()->getFragmentInfo())
+ return Fragment->SizeInBits;
+ return getVariable()->getSizeInBits();
+}
+
+DPValue *DPValue::clone() const { return new DPValue(*this); }
+
+DbgVariableIntrinsic *
+DPValue::createDebugIntrinsic(Module *M, Instruction *InsertBefore) const {
+ [[maybe_unused]] DICompileUnit *Unit =
+ getDebugLoc().get()->getScope()->getSubprogram()->getUnit();
+ assert(M && Unit &&
+ "Cannot clone from BasicBlock that is not part of a Module or "
+ "DICompileUnit!");
+ LLVMContext &Context = getDebugLoc()->getContext();
+ Value *Args[] = {MetadataAsValue::get(Context, getRawLocation()),
+ MetadataAsValue::get(Context, getVariable()),
+ MetadataAsValue::get(Context, getExpression())};
+ Function *IntrinsicFn;
+
+ // Work out what sort of intrinsic we're going to produce.
+ switch (getType()) {
+ case DPValue::LocationType::Declare:
+ IntrinsicFn = Intrinsic::getDeclaration(M, Intrinsic::dbg_declare);
+ break;
+ case DPValue::LocationType::Value:
+ IntrinsicFn = Intrinsic::getDeclaration(M, Intrinsic::dbg_value);
+ break;
+ case DPValue::LocationType::End:
+ case DPValue::LocationType::Any:
+ llvm_unreachable("Invalid LocationType");
+ break;
+ }
+
+ // Create the intrinsic from this DPValue's information, optionally insert
+ // into the target location.
+ DbgVariableIntrinsic *DVI = cast<DbgVariableIntrinsic>(
+ CallInst::Create(IntrinsicFn->getFunctionType(), IntrinsicFn, Args));
+ DVI->setTailCall();
+ DVI->setDebugLoc(getDebugLoc());
+ if (InsertBefore)
+ DVI->insertBefore(InsertBefore);
+
+ return DVI;
+}
+
+void DPValue::handleChangedLocation(Metadata *NewLocation) {
+ resetDebugValue(NewLocation);
+}
+
+const BasicBlock *DPValue::getParent() const {
+ return Marker->MarkedInstr->getParent();
+}
+
+BasicBlock *DPValue::getParent() { return Marker->MarkedInstr->getParent(); }
+
+BasicBlock *DPValue::getBlock() { return Marker->getParent(); }
+
+const BasicBlock *DPValue::getBlock() const { return Marker->getParent(); }
+
+Function *DPValue::getFunction() { return getBlock()->getParent(); }
+
+const Function *DPValue::getFunction() const { return getBlock()->getParent(); }
+
+Module *DPValue::getModule() { return getFunction()->getParent(); }
+
+const Module *DPValue::getModule() const { return getFunction()->getParent(); }
+
+LLVMContext &DPValue::getContext() { return getBlock()->getContext(); }
+
+const LLVMContext &DPValue::getContext() const {
+ return getBlock()->getContext();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+// An empty, global, DPMarker for the purpose of describing empty ranges of
+// DPValues.
+DPMarker DPMarker::EmptyDPMarker;
+
+void DPMarker::dropDPValues() {
+ while (!StoredDPValues.empty()) {
+ auto It = StoredDPValues.begin();
+ DPValue *DPV = &*It;
+ StoredDPValues.erase(It);
+ DPV->deleteInstr();
+ }
+}
+
+void DPMarker::dropOneDPValue(DPValue *DPV) {
+ assert(DPV->getMarker() == this);
+ StoredDPValues.erase(DPV->getIterator());
+ DPV->deleteInstr();
+}
+
+const BasicBlock *DPMarker::getParent() const {
+ return MarkedInstr->getParent();
+}
+
+BasicBlock *DPMarker::getParent() { return MarkedInstr->getParent(); }
+
+void DPMarker::removeMarker() {
+ // Are there any DPValues in this DPMarker? If not, nothing to preserve.
+ Instruction *Owner = MarkedInstr;
+ if (StoredDPValues.empty()) {
+ eraseFromParent();
+ Owner->DbgMarker = nullptr;
+ return;
+ }
+
+ // The attached DPValues need to be preserved; attach them to the next
+ // instruction. If there isn't a next instruction, put them on the
+ // "trailing" list.
+ DPMarker *NextMarker = Owner->getParent()->getNextMarker(Owner);
+ if (NextMarker == nullptr) {
+ NextMarker = new DPMarker();
+ Owner->getParent()->setTrailingDPValues(NextMarker);
+ }
+ NextMarker->absorbDebugValues(*this, true);
+
+ eraseFromParent();
+}
+
+void DPMarker::removeFromParent() {
+ MarkedInstr->DbgMarker = nullptr;
+ MarkedInstr = nullptr;
+}
+
+void DPMarker::eraseFromParent() {
+ if (MarkedInstr)
+ removeFromParent();
+ dropDPValues();
+ delete this;
+}
+
+iterator_range<DPValue::self_iterator> DPMarker::getDbgValueRange() {
+ return make_range(StoredDPValues.begin(), StoredDPValues.end());
+}
+
+void DPValue::removeFromParent() {
+ getMarker()->StoredDPValues.erase(getIterator());
+}
+
+void DPValue::eraseFromParent() {
+ removeFromParent();
+ deleteInstr();
+}
+
+void DPMarker::insertDPValue(DPValue *New, bool InsertAtHead) {
+ auto It = InsertAtHead ? StoredDPValues.begin() : StoredDPValues.end();
+ StoredDPValues.insert(It, *New);
+ New->setMarker(this);
+}
+
+void DPMarker::absorbDebugValues(DPMarker &Src, bool InsertAtHead) {
+ auto It = InsertAtHead ? StoredDPValues.begin() : StoredDPValues.end();
+ for (DPValue &DPV : Src.StoredDPValues)
+ DPV.setMarker(this);
+
+ StoredDPValues.splice(It, Src.StoredDPValues);
+}
+
+void DPMarker::absorbDebugValues(iterator_range<DPValue::self_iterator> Range,
+ DPMarker &Src, bool InsertAtHead) {
+ for (DPValue &DPV : Range)
+ DPV.setMarker(this);
+
+ auto InsertPos =
+ (InsertAtHead) ? StoredDPValues.begin() : StoredDPValues.end();
+
+ StoredDPValues.splice(InsertPos, Src.StoredDPValues, Range.begin(),
+ Range.end());
+}
+
+iterator_range<simple_ilist<DPValue>::iterator> DPMarker::cloneDebugInfoFrom(
+ DPMarker *From, std::optional<simple_ilist<DPValue>::iterator> from_here,
+ bool InsertAtHead) {
+ DPValue *First = nullptr;
+ // Work out what range of DPValues to clone: normally all the contents of the
+ // "From" marker, optionally we can start from the from_here position down to
+ // end().
+ auto Range =
+ make_range(From->StoredDPValues.begin(), From->StoredDPValues.end());
+ if (from_here.has_value())
+ Range = make_range(*from_here, From->StoredDPValues.end());
+
+ // Clone each DPValue and insert into StoreDPValues; optionally place them at
+ // the start or the end of the list.
+ auto Pos = (InsertAtHead) ? StoredDPValues.begin() : StoredDPValues.end();
+ for (DPValue &DPV : Range) {
+ DPValue *New = DPV.clone();
+ New->setMarker(this);
+ StoredDPValues.insert(Pos, *New);
+ if (!First)
+ First = New;
+ }
+
+ if (!First)
+ return {StoredDPValues.end(), StoredDPValues.end()};
+
+ if (InsertAtHead)
+ // If InsertAtHead is set, we cloned a range onto the front of of the
+ // StoredDPValues collection, return that range.
+ return {StoredDPValues.begin(), Pos};
+ else
+ // We inserted a block at the end, return that range.
+ return {First->getIterator(), StoredDPValues.end()};
+}
+
+} // end namespace llvm
+
diff --git a/contrib/llvm-project/llvm/lib/IR/Dominators.cpp b/contrib/llvm-project/llvm/lib/IR/Dominators.cpp
index 24cc9f46ff79..0f4d112c69c1 100644
--- a/contrib/llvm-project/llvm/lib/IR/Dominators.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Dominators.cpp
@@ -25,6 +25,7 @@
#include "llvm/PassRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
diff --git a/contrib/llvm-project/llvm/lib/IR/EHPersonalities.cpp b/contrib/llvm-project/llvm/lib/IR/EHPersonalities.cpp
index afbb2bb8275d..fb5e2d5c517e 100644
--- a/contrib/llvm-project/llvm/lib/IR/EHPersonalities.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/EHPersonalities.cpp
@@ -114,7 +114,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
// Note: Despite not being a funclet in the truest sense, a catchswitch is
// considered to belong to its own funclet for the purposes of coloring.
- DEBUG_WITH_TYPE("winehprepare-coloring",
+ DEBUG_WITH_TYPE("win-eh-prepare-coloring",
dbgs() << "\nColoring funclets for " << F.getName() << "\n");
Worklist.push_back({EntryBlock, EntryBlock});
@@ -123,7 +123,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
BasicBlock *Visiting;
BasicBlock *Color;
std::tie(Visiting, Color) = Worklist.pop_back_val();
- DEBUG_WITH_TYPE("winehprepare-coloring",
+ DEBUG_WITH_TYPE("win-eh-prepare-coloring",
dbgs() << "Visiting " << Visiting->getName() << ", "
<< Color->getName() << "\n");
Instruction *VisitingHead = Visiting->getFirstNonPHI();
@@ -138,7 +138,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
else
continue;
- DEBUG_WITH_TYPE("winehprepare-coloring",
+ DEBUG_WITH_TYPE("win-eh-prepare-coloring",
dbgs() << " Assigned color \'" << Color->getName()
<< "\' to block \'" << Visiting->getName()
<< "\'.\n");
diff --git a/contrib/llvm-project/llvm/lib/IR/Function.cpp b/contrib/llvm-project/llvm/lib/IR/Function.cpp
index 27219e89dc5f..22e2455462bf 100644
--- a/contrib/llvm-project/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Function.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/IntrinsicsBPF.h"
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/IntrinsicsLoongArch.h"
#include "llvm/IR/IntrinsicsMips.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
@@ -80,6 +81,27 @@ static cl::opt<unsigned> NonGlobalValueMaxNameSize(
"non-global-value-max-name-size", cl::Hidden, cl::init(1024),
cl::desc("Maximum size for the name of non-global values."));
+void Function::convertToNewDbgValues() {
+ IsNewDbgInfoFormat = true;
+ for (auto &BB : *this) {
+ BB.convertToNewDbgValues();
+ }
+}
+
+void Function::convertFromNewDbgValues() {
+ IsNewDbgInfoFormat = false;
+ for (auto &BB : *this) {
+ BB.convertFromNewDbgValues();
+ }
+}
+
+void Function::setIsNewDbgInfoFormat(bool NewFlag) {
+ if (NewFlag && !IsNewDbgInfoFormat)
+ convertToNewDbgValues();
+ else if (!NewFlag && IsNewDbgInfoFormat)
+ convertFromNewDbgValues();
+}
+
//===----------------------------------------------------------------------===//
// Argument Implementation
//===----------------------------------------------------------------------===//
@@ -401,7 +423,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
: GlobalObject(Ty, Value::FunctionVal,
OperandTraits<Function>::op_begin(this), 0, Linkage, name,
computeAddrSpace(AddrSpace, ParentModule)),
- NumArgs(Ty->getNumParams()) {
+ NumArgs(Ty->getNumParams()), IsNewDbgInfoFormat(false) {
assert(FunctionType::isValidReturnType(getReturnType()) &&
"invalid return type");
setGlobalObjectSubClassData(0);
@@ -417,7 +439,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
if (ParentModule)
ParentModule->getFunctionList().push_back(this);
- HasLLVMReservedName = getName().startswith("llvm.");
+ HasLLVMReservedName = getName().starts_with("llvm.");
// Ensure intrinsics have the right parameter attributes.
// Note, the IntID field will have been set in Value::setName if this function
// name is a valid intrinsic ID.
@@ -517,15 +539,7 @@ void Function::stealArgumentListFrom(Function &Src) {
Src.setValueSubclassData(Src.getSubclassDataFromValue() | (1 << 0));
}
-// dropAllReferences() - This function causes all the subinstructions to "let
-// go" of all references that they are maintaining. This allows one to
-// 'delete' a whole class at a time, even though there may be circular
-// references... first all references are dropped, and all use counts go to
-// zero. Then everything is deleted for real. Note that no operations are
-// valid on an object that has "dropped all references", except operator
-// delete.
-//
-void Function::dropAllReferences() {
+void Function::deleteBodyImpl(bool ShouldDrop) {
setIsMaterializable(false);
for (BasicBlock &BB : *this)
@@ -536,10 +550,18 @@ void Function::dropAllReferences() {
while (!BasicBlocks.empty())
BasicBlocks.begin()->eraseFromParent();
- // Drop uses of any optional data (real or placeholder).
if (getNumOperands()) {
- User::dropAllReferences();
- setNumHungOffUseOperands(0);
+ if (ShouldDrop) {
+ // Drop uses of any optional data (real or placeholder).
+ User::dropAllReferences();
+ setNumHungOffUseOperands(0);
+ } else {
+ // The code needs to match Function::allocHungoffUselist().
+ auto *CPN = ConstantPointerNull::get(PointerType::get(getContext(), 0));
+ Op<0>().set(CPN);
+ Op<1>().set(CPN);
+ Op<2>().set(CPN);
+ }
setValueSubclassData(getSubclassDataFromValue() & ~0xe);
}
@@ -854,7 +876,7 @@ bool Function::isTargetIntrinsic() const {
///
/// Returns the relevant slice of \c IntrinsicNameTable
static ArrayRef<const char *> findTargetSubtable(StringRef Name) {
- assert(Name.startswith("llvm."));
+ assert(Name.starts_with("llvm."));
ArrayRef<IntrinsicTargetInfo> Targets(TargetInfos);
// Drop "llvm." and take the first dotted component. That will be the target
@@ -890,9 +912,10 @@ Intrinsic::ID Function::lookupIntrinsicID(StringRef Name) {
: Intrinsic::not_intrinsic;
}
-void Function::recalculateIntrinsicID() {
+void Function::updateAfterNameChange() {
+ LibFuncCache = UnknownLibFunc;
StringRef Name = getName();
- if (!Name.startswith("llvm.")) {
+ if (!Name.starts_with("llvm.")) {
HasLLVMReservedName = false;
IntID = Intrinsic::not_intrinsic;
return;
@@ -1751,7 +1774,8 @@ std::optional<Function *> Intrinsic::remangleIntrinsicFunction(Function *F) {
bool Function::hasAddressTaken(const User **PutOffender,
bool IgnoreCallbackUses,
bool IgnoreAssumeLikeCalls, bool IgnoreLLVMUsed,
- bool IgnoreARCAttachedCall) const {
+ bool IgnoreARCAttachedCall,
+ bool IgnoreCastedDirectCall) const {
for (const Use &U : uses()) {
const User *FU = U.getUser();
if (isa<BlockAddress>(FU))
@@ -1800,7 +1824,8 @@ bool Function::hasAddressTaken(const User **PutOffender,
continue;
}
- if (!Call->isCallee(&U) || Call->getFunctionType() != getFunctionType()) {
+ if (!Call->isCallee(&U) || (!IgnoreCastedDirectCall &&
+ Call->getFunctionType() != getFunctionType())) {
if (IgnoreARCAttachedCall &&
Call->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall,
U.getOperandNo()))
@@ -1878,7 +1903,7 @@ void Function::allocHungoffUselist() {
setNumHungOffUseOperands(3);
// Initialize the uselist with placeholder operands to allow traversal.
- auto *CPN = ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0));
+ auto *CPN = ConstantPointerNull::get(PointerType::get(getContext(), 0));
Op<0>().set(CPN);
Op<1>().set(CPN);
Op<2>().set(CPN);
@@ -1890,8 +1915,7 @@ void Function::setHungoffOperand(Constant *C) {
allocHungoffUselist();
Op<Idx>().set(C);
} else if (getNumOperands()) {
- Op<Idx>().set(
- ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0)));
+ Op<Idx>().set(ConstantPointerNull::get(PointerType::get(getContext(), 0)));
}
}
diff --git a/contrib/llvm-project/llvm/lib/IR/Globals.cpp b/contrib/llvm-project/llvm/lib/IR/Globals.cpp
index 7bd4503a689e..51bdbeb0abf2 100644
--- a/contrib/llvm-project/llvm/lib/IR/Globals.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Globals.cpp
@@ -482,6 +482,8 @@ void GlobalVariable::copyAttributesFrom(const GlobalVariable *Src) {
GlobalObject::copyAttributesFrom(Src);
setExternallyInitialized(Src->isExternallyInitialized());
setAttributes(Src->getAttributes());
+ if (auto CM = Src->getCodeModel())
+ setCodeModel(*CM);
}
void GlobalVariable::dropAllReferences() {
@@ -489,6 +491,15 @@ void GlobalVariable::dropAllReferences() {
clearMetadata();
}
+void GlobalVariable::setCodeModel(CodeModel::Model CM) {
+ unsigned CodeModelData = static_cast<unsigned>(CM) + 1;
+ unsigned OldData = getGlobalValueSubClassData();
+ unsigned NewData = (OldData & ~(CodeModelMask << CodeModelShift)) |
+ (CodeModelData << CodeModelShift);
+ setGlobalValueSubClassData(NewData);
+ assert(getCodeModel() == CM && "Code model representation error!");
+}
+
//===----------------------------------------------------------------------===//
// GlobalAlias Implementation
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp
index 094819dc39b5..b09b80f95871 100644
--- a/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp
@@ -220,6 +220,9 @@ CallInst *IRBuilderBase::CreateMemTransferInst(
Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src,
MaybeAlign SrcAlign, Value *Size, bool isVolatile, MDNode *TBAATag,
MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) {
+ assert((IntrID == Intrinsic::memcpy || IntrID == Intrinsic::memcpy_inline ||
+ IntrID == Intrinsic::memmove) &&
+ "Unexpected intrinsic ID");
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
Module *M = BB->getParent()->getParent();
@@ -250,41 +253,6 @@ CallInst *IRBuilderBase::CreateMemTransferInst(
return CI;
}
-CallInst *IRBuilderBase::CreateMemCpyInline(
- Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign,
- Value *Size, bool IsVolatile, MDNode *TBAATag, MDNode *TBAAStructTag,
- MDNode *ScopeTag, MDNode *NoAliasTag) {
- Value *Ops[] = {Dst, Src, Size, getInt1(IsVolatile)};
- Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
- Function *F = BB->getParent();
- Module *M = F->getParent();
- Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy_inline, Tys);
-
- CallInst *CI = CreateCall(TheFn, Ops);
-
- auto *MCI = cast<MemCpyInlineInst>(CI);
- if (DstAlign)
- MCI->setDestAlignment(*DstAlign);
- if (SrcAlign)
- MCI->setSourceAlignment(*SrcAlign);
-
- // Set the TBAA info if present.
- if (TBAATag)
- MCI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
- // Set the TBAA Struct info if present.
- if (TBAAStructTag)
- MCI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag);
-
- if (ScopeTag)
- MCI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag);
-
- if (NoAliasTag)
- MCI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
-
- return CI;
-}
-
CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size,
uint32_t ElementSize, MDNode *TBAATag, MDNode *TBAAStructTag,
@@ -323,35 +291,82 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
return CI;
}
-CallInst *IRBuilderBase::CreateMemMove(Value *Dst, MaybeAlign DstAlign,
- Value *Src, MaybeAlign SrcAlign,
- Value *Size, bool isVolatile,
- MDNode *TBAATag, MDNode *ScopeTag,
- MDNode *NoAliasTag) {
- Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
- Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+/// isConstantOne - Return true only if val is constant int 1
+static bool isConstantOne(const Value *Val) {
+ assert(Val && "isConstantOne does not work with nullptr Val");
+ const ConstantInt *CVal = dyn_cast<ConstantInt>(Val);
+ return CVal && CVal->isOne();
+}
+
+CallInst *IRBuilderBase::CreateMalloc(Type *IntPtrTy, Type *AllocTy,
+ Value *AllocSize, Value *ArraySize,
+ ArrayRef<OperandBundleDef> OpB,
+ Function *MallocF, const Twine &Name) {
+ // malloc(type) becomes:
+ // i8* malloc(typeSize)
+ // malloc(type, arraySize) becomes:
+ // i8* malloc(typeSize*arraySize)
+ if (!ArraySize)
+ ArraySize = ConstantInt::get(IntPtrTy, 1);
+ else if (ArraySize->getType() != IntPtrTy)
+ ArraySize = CreateIntCast(ArraySize, IntPtrTy, false);
+
+ if (!isConstantOne(ArraySize)) {
+ if (isConstantOne(AllocSize)) {
+ AllocSize = ArraySize; // Operand * 1 = Operand
+ } else {
+ // Multiply type size by the array size...
+ AllocSize = CreateMul(ArraySize, AllocSize, "mallocsize");
+ }
+ }
+
+ assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
+ // Create the call to Malloc.
Module *M = BB->getParent()->getParent();
- Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
+ Type *BPTy = PointerType::getUnqual(Context);
+ FunctionCallee MallocFunc = MallocF;
+ if (!MallocFunc)
+ // prototype malloc as "void *malloc(size_t)"
+ MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy);
+ CallInst *MCall = CreateCall(MallocFunc, AllocSize, OpB, Name);
+
+ MCall->setTailCall();
+ if (Function *F = dyn_cast<Function>(MallocFunc.getCallee())) {
+ MCall->setCallingConv(F->getCallingConv());
+ F->setReturnDoesNotAlias();
+ }
- CallInst *CI = CreateCall(TheFn, Ops);
+ assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
- auto *MMI = cast<MemMoveInst>(CI);
- if (DstAlign)
- MMI->setDestAlignment(*DstAlign);
- if (SrcAlign)
- MMI->setSourceAlignment(*SrcAlign);
+ return MCall;
+}
- // Set the TBAA info if present.
- if (TBAATag)
- CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+CallInst *IRBuilderBase::CreateMalloc(Type *IntPtrTy, Type *AllocTy,
+ Value *AllocSize, Value *ArraySize,
+ Function *MallocF, const Twine &Name) {
- if (ScopeTag)
- CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag);
+ return CreateMalloc(IntPtrTy, AllocTy, AllocSize, ArraySize, std::nullopt,
+ MallocF, Name);
+}
- if (NoAliasTag)
- CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+/// CreateFree - Generate the IR for a call to the builtin free function.
+CallInst *IRBuilderBase::CreateFree(Value *Source,
+ ArrayRef<OperandBundleDef> Bundles) {
+ assert(Source->getType()->isPointerTy() &&
+ "Can not free something of nonpointer type!");
- return CI;
+ Module *M = BB->getParent()->getParent();
+
+ Type *VoidTy = Type::getVoidTy(M->getContext());
+ Type *VoidPtrTy = PointerType::getUnqual(M->getContext());
+ // prototype free as "void free(void*)"
+ FunctionCallee FreeFunc = M->getOrInsertFunction("free", VoidTy, VoidPtrTy);
+ CallInst *Result = CreateCall(FreeFunc, Source, Bundles, "");
+ Result->setTailCall();
+ if (Function *F = dyn_cast<Function>(FreeFunc.getCallee()))
+ Result->setCallingConv(F->getCallingConv());
+
+ return Result;
}
CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove(
@@ -521,19 +536,8 @@ static MaybeAlign getAlign(Value *Ptr) {
}
CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) {
-#ifndef NDEBUG
- // Handle specially for constexpr cast. This is possible when
- // opaque pointers not enabled since constant could be sinked
- // directly by the design of llvm. This could be eliminated
- // after we eliminate the abuse of constexpr.
- auto *V = Ptr;
- if (auto *CE = dyn_cast<ConstantExpr>(V))
- if (CE->isCast())
- V = CE->getOperand(0);
-
- assert(isa<GlobalValue>(V) && cast<GlobalValue>(V)->isThreadLocal() &&
+ assert(isa<GlobalValue>(Ptr) && cast<GlobalValue>(Ptr)->isThreadLocal() &&
"threadlocal_address only applies to thread local variables.");
-#endif
CallInst *CI = CreateIntrinsic(llvm::Intrinsic::threadlocal_address,
{Ptr->getType()}, {Ptr});
if (MaybeAlign A = getAlign(Ptr)) {
@@ -1225,29 +1229,6 @@ Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V,
return CreateShuffleVector(V, Zeros, Name + ".splat");
}
-Value *IRBuilderBase::CreateExtractInteger(
- const DataLayout &DL, Value *From, IntegerType *ExtractedTy,
- uint64_t Offset, const Twine &Name) {
- auto *IntTy = cast<IntegerType>(From->getType());
- assert(DL.getTypeStoreSize(ExtractedTy) + Offset <=
- DL.getTypeStoreSize(IntTy) &&
- "Element extends past full value");
- uint64_t ShAmt = 8 * Offset;
- Value *V = From;
- if (DL.isBigEndian())
- ShAmt = 8 * (DL.getTypeStoreSize(IntTy) -
- DL.getTypeStoreSize(ExtractedTy) - Offset);
- if (ShAmt) {
- V = CreateLShr(V, ShAmt, Name + ".shift");
- }
- assert(ExtractedTy->getBitWidth() <= IntTy->getBitWidth() &&
- "Cannot extract to a larger integer!");
- if (ExtractedTy != IntTy) {
- V = CreateTrunc(V, ExtractedTy, Name + ".trunc");
- }
- return V;
-}
-
Value *IRBuilderBase::CreatePreserveArrayAccessIndex(
Type *ElTy, Value *Base, unsigned Dimension, unsigned LastIndex,
MDNode *DbgInfo) {
diff --git a/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp
index 78c2f192d2c0..b19210e776ed 100644
--- a/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp
@@ -39,6 +39,12 @@ public:
ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
bool runOnModule(Module &M) override {
+ // RemoveDIs: there's no textual representation of the DPValue debug-info,
+ // convert to dbg.values before writing out.
+ bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
+ if (IsNewDbgInfoFormat)
+ M.convertFromNewDbgValues();
+
if (llvm::isFunctionInPrintList("*")) {
if (!Banner.empty())
OS << Banner << "\n";
@@ -55,6 +61,10 @@ public:
}
}
}
+
+ if (IsNewDbgInfoFormat)
+ M.convertToNewDbgValues();
+
return false;
}
@@ -77,6 +87,12 @@ public:
// This pass just prints a banner followed by the function as it's processed.
bool runOnFunction(Function &F) override {
+ // RemoveDIs: there's no textual representation of the DPValue debug-info,
+ // convert to dbg.values before writing out.
+ bool IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
+ if (IsNewDbgInfoFormat)
+ F.convertFromNewDbgValues();
+
if (isFunctionInPrintList(F.getName())) {
if (forcePrintModuleIR())
OS << Banner << " (function: " << F.getName() << ")\n"
@@ -84,6 +100,10 @@ public:
else
OS << Banner << '\n' << static_cast<Value &>(F);
}
+
+ if (IsNewDbgInfoFormat)
+ F.convertToNewDbgValues();
+
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/IR/Instruction.cpp b/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
index 0dcf0ac6a78a..717e33f1857b 100644
--- a/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
@@ -77,23 +77,45 @@ const Function *Instruction::getFunction() const {
}
void Instruction::removeFromParent() {
+ // Perform any debug-info maintenence required.
+ handleMarkerRemoval();
+
getParent()->getInstList().remove(getIterator());
}
-iplist<Instruction>::iterator Instruction::eraseFromParent() {
+void Instruction::handleMarkerRemoval() {
+ if (!Parent->IsNewDbgInfoFormat || !DbgMarker)
+ return;
+
+ DbgMarker->removeMarker();
+}
+
+BasicBlock::iterator Instruction::eraseFromParent() {
+ handleMarkerRemoval();
return getParent()->getInstList().erase(getIterator());
}
+void Instruction::insertBefore(Instruction *InsertPos) {
+ insertBefore(InsertPos->getIterator());
+}
+
/// Insert an unlinked instruction into a basic block immediately before the
/// specified instruction.
-void Instruction::insertBefore(Instruction *InsertPos) {
- insertInto(InsertPos->getParent(), InsertPos->getIterator());
+void Instruction::insertBefore(BasicBlock::iterator InsertPos) {
+ insertBefore(*InsertPos->getParent(), InsertPos);
}
/// Insert an unlinked instruction into a basic block immediately after the
/// specified instruction.
void Instruction::insertAfter(Instruction *InsertPos) {
- insertInto(InsertPos->getParent(), std::next(InsertPos->getIterator()));
+ BasicBlock *DestParent = InsertPos->getParent();
+
+ DestParent->getInstList().insertAfter(InsertPos->getIterator(), this);
+
+ // No need to manually update DPValues: if we insert after an instruction
+ // position, then we can never have any DPValues on "this".
+ if (DestParent->IsNewDbgInfoFormat)
+ DestParent->createMarker(this);
}
BasicBlock::iterator Instruction::insertInto(BasicBlock *ParentBB,
@@ -101,23 +123,160 @@ BasicBlock::iterator Instruction::insertInto(BasicBlock *ParentBB,
assert(getParent() == nullptr && "Expected detached instruction");
assert((It == ParentBB->end() || It->getParent() == ParentBB) &&
"It not in ParentBB");
- return ParentBB->getInstList().insert(It, this);
+ insertBefore(*ParentBB, It);
+ return getIterator();
+}
+
+extern cl::opt<bool> UseNewDbgInfoFormat;
+
+void Instruction::insertBefore(BasicBlock &BB,
+ InstListType::iterator InsertPos) {
+ assert(!DbgMarker);
+
+ BB.getInstList().insert(InsertPos, this);
+
+ if (!BB.IsNewDbgInfoFormat)
+ return;
+
+ BB.createMarker(this);
+
+ // We've inserted "this": if InsertAtHead is set then it comes before any
+ // DPValues attached to InsertPos. But if it's not set, then any DPValues
+ // should now come before "this".
+ bool InsertAtHead = InsertPos.getHeadBit();
+ if (!InsertAtHead) {
+ DPMarker *SrcMarker = BB.getMarker(InsertPos);
+ if (!SrcMarker)
+ SrcMarker = BB.createMarker(InsertPos);
+ DbgMarker->absorbDebugValues(*SrcMarker, false);
+ }
+
+ // If we're inserting a terminator, check if we need to flush out
+ // TrailingDPValues.
+ if (isTerminator())
+ getParent()->flushTerminatorDbgValues();
}
/// Unlink this instruction from its current basic block and insert it into the
/// basic block that MovePos lives in, right before MovePos.
void Instruction::moveBefore(Instruction *MovePos) {
- moveBefore(*MovePos->getParent(), MovePos->getIterator());
+ moveBeforeImpl(*MovePos->getParent(), MovePos->getIterator(), false);
+}
+
+void Instruction::moveBeforePreserving(Instruction *MovePos) {
+ moveBeforeImpl(*MovePos->getParent(), MovePos->getIterator(), true);
}
void Instruction::moveAfter(Instruction *MovePos) {
- moveBefore(*MovePos->getParent(), ++MovePos->getIterator());
+ auto NextIt = std::next(MovePos->getIterator());
+ // We want this instruction to be moved to before NextIt in the instruction
+ // list, but before NextIt's debug value range.
+ NextIt.setHeadBit(true);
+ moveBeforeImpl(*MovePos->getParent(), NextIt, false);
}
-void Instruction::moveBefore(BasicBlock &BB,
- SymbolTableList<Instruction>::iterator I) {
+void Instruction::moveAfterPreserving(Instruction *MovePos) {
+ auto NextIt = std::next(MovePos->getIterator());
+ // We want this instruction and its debug range to be moved to before NextIt
+ // in the instruction list, but before NextIt's debug value range.
+ NextIt.setHeadBit(true);
+ moveBeforeImpl(*MovePos->getParent(), NextIt, true);
+}
+
+void Instruction::moveBefore(BasicBlock &BB, InstListType::iterator I) {
+ moveBeforeImpl(BB, I, false);
+}
+
+void Instruction::moveBeforePreserving(BasicBlock &BB,
+ InstListType::iterator I) {
+ moveBeforeImpl(BB, I, true);
+}
+
+void Instruction::moveBeforeImpl(BasicBlock &BB, InstListType::iterator I,
+ bool Preserve) {
assert(I == BB.end() || I->getParent() == &BB);
- BB.splice(I, getParent(), getIterator());
+ bool InsertAtHead = I.getHeadBit();
+
+ // If we've been given the "Preserve" flag, then just move the DPValues with
+ // the instruction, no more special handling needed.
+ if (BB.IsNewDbgInfoFormat && DbgMarker && !Preserve) {
+ if (I != this->getIterator() || InsertAtHead) {
+ // "this" is definitely moving in the list, or it's moving ahead of its
+ // attached DPValues. Detach any existing DPValues.
+ handleMarkerRemoval();
+ }
+ }
+
+ // Move this single instruction. Use the list splice method directly, not
+ // the block splicer, which will do more debug-info things.
+ BB.getInstList().splice(I, getParent()->getInstList(), getIterator());
+
+ if (BB.IsNewDbgInfoFormat && !Preserve) {
+ if (!DbgMarker)
+ BB.createMarker(this);
+ DPMarker *NextMarker = getParent()->getNextMarker(this);
+
+ // If we're inserting at point I, and not in front of the DPValues attached
+ // there, then we should absorb the DPValues attached to I.
+ if (NextMarker && !InsertAtHead)
+ DbgMarker->absorbDebugValues(*NextMarker, false);
+ }
+
+ if (isTerminator())
+ getParent()->flushTerminatorDbgValues();
+}
+
+iterator_range<DPValue::self_iterator>
+Instruction::cloneDebugInfoFrom(const Instruction *From,
+ std::optional<DPValue::self_iterator> FromHere,
+ bool InsertAtHead) {
+ if (!From->DbgMarker)
+ return DPMarker::getEmptyDPValueRange();
+
+ assert(getParent()->IsNewDbgInfoFormat);
+ assert(getParent()->IsNewDbgInfoFormat ==
+ From->getParent()->IsNewDbgInfoFormat);
+
+ if (!DbgMarker)
+ getParent()->createMarker(this);
+
+ return DbgMarker->cloneDebugInfoFrom(From->DbgMarker, FromHere, InsertAtHead);
+}
+
+iterator_range<DPValue::self_iterator>
+Instruction::getDbgValueRange() const {
+ BasicBlock *Parent = const_cast<BasicBlock *>(getParent());
+ assert(Parent && "Instruction must be inserted to have DPValues");
+ (void)Parent;
+
+ if (!DbgMarker)
+ return DPMarker::getEmptyDPValueRange();
+
+ return DbgMarker->getDbgValueRange();
+}
+
+std::optional<DPValue::self_iterator> Instruction::getDbgReinsertionPosition() {
+ // Is there a marker on the next instruction?
+ DPMarker *NextMarker = getParent()->getNextMarker(this);
+ if (!NextMarker)
+ return std::nullopt;
+
+ // Are there any DPValues in the next marker?
+ if (NextMarker->StoredDPValues.empty())
+ return std::nullopt;
+
+ return NextMarker->StoredDPValues.begin();
+}
+
+bool Instruction::hasDbgValues() const { return !getDbgValueRange().empty(); }
+
+void Instruction::dropDbgValues() {
+ if (DbgMarker)
+ DbgMarker->dropDPValues();
+}
+
+void Instruction::dropOneDbgValue(DPValue *DPV) {
+ DbgMarker->dropOneDPValue(DPV);
}
bool Instruction::comesBefore(const Instruction *Other) const {
@@ -129,7 +288,7 @@ bool Instruction::comesBefore(const Instruction *Other) const {
return Order < Other->Order;
}
-Instruction *Instruction::getInsertionPointAfterDef() {
+std::optional<BasicBlock::iterator> Instruction::getInsertionPointAfterDef() {
assert(!getType()->isVoidTy() && "Instruction must define result");
BasicBlock *InsertBB;
BasicBlock::iterator InsertPt;
@@ -142,18 +301,22 @@ Instruction *Instruction::getInsertionPointAfterDef() {
} else if (isa<CallBrInst>(this)) {
// Def is available in multiple successors, there's no single dominating
// insertion point.
- return nullptr;
+ return std::nullopt;
} else {
assert(!isTerminator() && "Only invoke/callbr terminators return value");
InsertBB = getParent();
InsertPt = std::next(getIterator());
+ // Any instruction inserted immediately after "this" will come before any
+ // debug-info records take effect -- thus, set the head bit indicating that
+ // to debug-info-transfer code.
+ InsertPt.setHeadBit(true);
}
// catchswitch blocks don't have any legal insertion point (because they
// are both an exception pad and a terminator).
if (InsertPt == InsertBB->end())
- return nullptr;
- return &*InsertPt;
+ return std::nullopt;
+ return InsertPt;
}
bool Instruction::isOnlyUserOfAnyOperand() {
@@ -172,6 +335,12 @@ void Instruction::setIsExact(bool b) {
cast<PossiblyExactOperator>(this)->setIsExact(b);
}
+void Instruction::setNonNeg(bool b) {
+ assert(isa<PossiblyNonNegInst>(this) && "Must be zext");
+ SubclassOptionalData = (SubclassOptionalData & ~PossiblyNonNegInst::NonNeg) |
+ (b * PossiblyNonNegInst::NonNeg);
+}
+
bool Instruction::hasNoUnsignedWrap() const {
return cast<OverflowingBinaryOperator>(this)->hasNoUnsignedWrap();
}
@@ -180,6 +349,11 @@ bool Instruction::hasNoSignedWrap() const {
return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
}
+bool Instruction::hasNonNeg() const {
+ assert(isa<PossiblyNonNegInst>(this) && "Must be zext");
+ return (SubclassOptionalData & PossiblyNonNegInst::NonNeg) != 0;
+}
+
bool Instruction::hasPoisonGeneratingFlags() const {
return cast<Operator>(this)->hasPoisonGeneratingFlags();
}
@@ -201,10 +375,19 @@ void Instruction::dropPoisonGeneratingFlags() {
cast<PossiblyExactOperator>(this)->setIsExact(false);
break;
+ case Instruction::Or:
+ cast<PossiblyDisjointInst>(this)->setIsDisjoint(false);
+ break;
+
case Instruction::GetElementPtr:
cast<GetElementPtrInst>(this)->setIsInBounds(false);
break;
+
+ case Instruction::ZExt:
+ setNonNeg(false);
+ break;
}
+
if (isa<FPMathOperator>(this)) {
setHasNoNaNs(false);
setHasNoInfs(false);
@@ -371,6 +554,10 @@ void Instruction::copyIRFlags(const Value *V, bool IncludeWrapFlags) {
if (isa<PossiblyExactOperator>(this))
setIsExact(PE->isExact());
+ if (auto *SrcPD = dyn_cast<PossiblyDisjointInst>(V))
+ if (auto *DestPD = dyn_cast<PossiblyDisjointInst>(this))
+ DestPD->setIsDisjoint(SrcPD->isDisjoint());
+
// Copy the fast-math flags.
if (auto *FP = dyn_cast<FPMathOperator>(V))
if (isa<FPMathOperator>(this))
@@ -379,6 +566,10 @@ void Instruction::copyIRFlags(const Value *V, bool IncludeWrapFlags) {
if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(V))
if (auto *DestGEP = dyn_cast<GetElementPtrInst>(this))
DestGEP->setIsInBounds(SrcGEP->isInBounds() || DestGEP->isInBounds());
+
+ if (auto *NNI = dyn_cast<PossiblyNonNegInst>(V))
+ if (isa<PossiblyNonNegInst>(this))
+ setNonNeg(NNI->hasNonNeg());
}
void Instruction::andIRFlags(const Value *V) {
@@ -393,6 +584,10 @@ void Instruction::andIRFlags(const Value *V) {
if (isa<PossiblyExactOperator>(this))
setIsExact(isExact() && PE->isExact());
+ if (auto *SrcPD = dyn_cast<PossiblyDisjointInst>(V))
+ if (auto *DestPD = dyn_cast<PossiblyDisjointInst>(this))
+ DestPD->setIsDisjoint(DestPD->isDisjoint() && SrcPD->isDisjoint());
+
if (auto *FP = dyn_cast<FPMathOperator>(V)) {
if (isa<FPMathOperator>(this)) {
FastMathFlags FM = getFastMathFlags();
@@ -404,6 +599,10 @@ void Instruction::andIRFlags(const Value *V) {
if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(V))
if (auto *DestGEP = dyn_cast<GetElementPtrInst>(this))
DestGEP->setIsInBounds(SrcGEP->isInBounds() && DestGEP->isInBounds());
+
+ if (auto *NNI = dyn_cast<PossiblyNonNegInst>(V))
+ if (isa<PossiblyNonNegInst>(this))
+ setNonNeg(hasNonNeg() && NNI->hasNonNeg());
}
const char *Instruction::getOpcodeName(unsigned OpCode) {
@@ -885,7 +1084,16 @@ Instruction::getPrevNonDebugInstruction(bool SkipPseudoOp) const {
return nullptr;
}
+const DebugLoc &Instruction::getStableDebugLoc() const {
+ if (isa<DbgInfoIntrinsic>(this))
+ if (const Instruction *Next = getNextNonDebugInstruction())
+ return Next->getDebugLoc();
+ return getDebugLoc();
+}
+
bool Instruction::isAssociative() const {
+ if (auto *II = dyn_cast<IntrinsicInst>(this))
+ return II->isAssociative();
unsigned Opcode = getOpcode();
if (isAssociative(Opcode))
return true;
diff --git a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
index cb0ac0f8eae6..299b4e74677d 100644
--- a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
@@ -130,7 +130,7 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
// clients might not expect this to happen. The code as it is thrashes the
// use/def lists, which is kinda lame.
std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx);
- copyIncomingBlocks(make_range(block_begin() + Idx + 1, block_end()), Idx);
+ copyIncomingBlocks(drop_begin(blocks(), Idx + 1), Idx);
// Nuke the last value.
Op<-1>().set(nullptr);
@@ -145,6 +145,39 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
return Removed;
}
+void PHINode::removeIncomingValueIf(function_ref<bool(unsigned)> Predicate,
+ bool DeletePHIIfEmpty) {
+ SmallDenseSet<unsigned> RemoveIndices;
+ for (unsigned Idx = 0; Idx < getNumIncomingValues(); ++Idx)
+ if (Predicate(Idx))
+ RemoveIndices.insert(Idx);
+
+ if (RemoveIndices.empty())
+ return;
+
+ // Remove operands.
+ auto NewOpEnd = remove_if(operands(), [&](Use &U) {
+ return RemoveIndices.contains(U.getOperandNo());
+ });
+ for (Use &U : make_range(NewOpEnd, op_end()))
+ U.set(nullptr);
+
+ // Remove incoming blocks.
+ (void)std::remove_if(const_cast<block_iterator>(block_begin()),
+ const_cast<block_iterator>(block_end()), [&](BasicBlock *&BB) {
+ return RemoveIndices.contains(&BB - block_begin());
+ });
+
+ setNumHungOffUseOperands(getNumOperands() - RemoveIndices.size());
+
+ // If the PHI node is dead, because it has zero entries, nuke it now.
+ if (getNumOperands() == 0 && DeletePHIIfEmpty) {
+ // If anyone is using this PHI, make them use a dummy value instead...
+ replaceAllUsesWith(PoisonValue::get(getType()));
+ eraseFromParent();
+ }
+}
+
/// growOperands - grow operands - This grows the operand list in response
/// to a push_back style of operation. This grows the number of ops by 1.5
/// times.
@@ -776,204 +809,6 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
}
-/// IsConstantOne - Return true only if val is constant int 1
-static bool IsConstantOne(Value *val) {
- assert(val && "IsConstantOne does not work with nullptr val");
- const ConstantInt *CVal = dyn_cast<ConstantInt>(val);
- return CVal && CVal->isOne();
-}
-
-static Instruction *createMalloc(Instruction *InsertBefore,
- BasicBlock *InsertAtEnd, Type *IntPtrTy,
- Type *AllocTy, Value *AllocSize,
- Value *ArraySize,
- ArrayRef<OperandBundleDef> OpB,
- Function *MallocF, const Twine &Name) {
- assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
- "createMalloc needs either InsertBefore or InsertAtEnd");
-
- // malloc(type) becomes:
- // bitcast (i8* malloc(typeSize)) to type*
- // malloc(type, arraySize) becomes:
- // bitcast (i8* malloc(typeSize*arraySize)) to type*
- if (!ArraySize)
- ArraySize = ConstantInt::get(IntPtrTy, 1);
- else if (ArraySize->getType() != IntPtrTy) {
- if (InsertBefore)
- ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
- "", InsertBefore);
- else
- ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
- "", InsertAtEnd);
- }
-
- if (!IsConstantOne(ArraySize)) {
- if (IsConstantOne(AllocSize)) {
- AllocSize = ArraySize; // Operand * 1 = Operand
- } else if (Constant *CO = dyn_cast<Constant>(ArraySize)) {
- Constant *Scale = ConstantExpr::getIntegerCast(CO, IntPtrTy,
- false /*ZExt*/);
- // Malloc arg is constant product of type size and array size
- AllocSize = ConstantExpr::getMul(Scale, cast<Constant>(AllocSize));
- } else {
- // Multiply type size by the array size...
- if (InsertBefore)
- AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
- "mallocsize", InsertBefore);
- else
- AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
- "mallocsize", InsertAtEnd);
- }
- }
-
- assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
- // Create the call to Malloc.
- BasicBlock *BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
- Module *M = BB->getParent()->getParent();
- Type *BPTy = Type::getInt8PtrTy(BB->getContext());
- FunctionCallee MallocFunc = MallocF;
- if (!MallocFunc)
- // prototype malloc as "void *malloc(size_t)"
- MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy);
- PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
- CallInst *MCall = nullptr;
- Instruction *Result = nullptr;
- if (InsertBefore) {
- MCall = CallInst::Create(MallocFunc, AllocSize, OpB, "malloccall",
- InsertBefore);
- Result = MCall;
- if (Result->getType() != AllocPtrType)
- // Create a cast instruction to convert to the right type...
- Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
- } else {
- MCall = CallInst::Create(MallocFunc, AllocSize, OpB, "malloccall");
- Result = MCall;
- if (Result->getType() != AllocPtrType) {
- MCall->insertInto(InsertAtEnd, InsertAtEnd->end());
- // Create a cast instruction to convert to the right type...
- Result = new BitCastInst(MCall, AllocPtrType, Name);
- }
- }
- MCall->setTailCall();
- if (Function *F = dyn_cast<Function>(MallocFunc.getCallee())) {
- MCall->setCallingConv(F->getCallingConv());
- if (!F->returnDoesNotAlias())
- F->setReturnDoesNotAlias();
- }
- assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
-
- return Result;
-}
-
-/// CreateMalloc - Generate the IR for a call to malloc:
-/// 1. Compute the malloc call's argument as the specified type's size,
-/// possibly multiplied by the array size if the array size is not
-/// constant 1.
-/// 2. Call malloc with that argument.
-/// 3. Bitcast the result of the malloc call to the specified type.
-Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
- Type *IntPtrTy, Type *AllocTy,
- Value *AllocSize, Value *ArraySize,
- Function *MallocF,
- const Twine &Name) {
- return createMalloc(InsertBefore, nullptr, IntPtrTy, AllocTy, AllocSize,
- ArraySize, std::nullopt, MallocF, Name);
-}
-Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
- Type *IntPtrTy, Type *AllocTy,
- Value *AllocSize, Value *ArraySize,
- ArrayRef<OperandBundleDef> OpB,
- Function *MallocF,
- const Twine &Name) {
- return createMalloc(InsertBefore, nullptr, IntPtrTy, AllocTy, AllocSize,
- ArraySize, OpB, MallocF, Name);
-}
-
-/// CreateMalloc - Generate the IR for a call to malloc:
-/// 1. Compute the malloc call's argument as the specified type's size,
-/// possibly multiplied by the array size if the array size is not
-/// constant 1.
-/// 2. Call malloc with that argument.
-/// 3. Bitcast the result of the malloc call to the specified type.
-/// Note: This function does not add the bitcast to the basic block, that is the
-/// responsibility of the caller.
-Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
- Type *IntPtrTy, Type *AllocTy,
- Value *AllocSize, Value *ArraySize,
- Function *MallocF, const Twine &Name) {
- return createMalloc(nullptr, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
- ArraySize, std::nullopt, MallocF, Name);
-}
-Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
- Type *IntPtrTy, Type *AllocTy,
- Value *AllocSize, Value *ArraySize,
- ArrayRef<OperandBundleDef> OpB,
- Function *MallocF, const Twine &Name) {
- return createMalloc(nullptr, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
- ArraySize, OpB, MallocF, Name);
-}
-
-static Instruction *createFree(Value *Source,
- ArrayRef<OperandBundleDef> Bundles,
- Instruction *InsertBefore,
- BasicBlock *InsertAtEnd) {
- assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
- "createFree needs either InsertBefore or InsertAtEnd");
- assert(Source->getType()->isPointerTy() &&
- "Can not free something of nonpointer type!");
-
- BasicBlock *BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
- Module *M = BB->getParent()->getParent();
-
- Type *VoidTy = Type::getVoidTy(M->getContext());
- Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
- // prototype free as "void free(void*)"
- FunctionCallee FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy);
- CallInst *Result = nullptr;
- Value *PtrCast = Source;
- if (InsertBefore) {
- if (Source->getType() != IntPtrTy)
- PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertBefore);
- Result = CallInst::Create(FreeFunc, PtrCast, Bundles, "", InsertBefore);
- } else {
- if (Source->getType() != IntPtrTy)
- PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertAtEnd);
- Result = CallInst::Create(FreeFunc, PtrCast, Bundles, "");
- }
- Result->setTailCall();
- if (Function *F = dyn_cast<Function>(FreeFunc.getCallee()))
- Result->setCallingConv(F->getCallingConv());
-
- return Result;
-}
-
-/// CreateFree - Generate the IR for a call to the builtin free function.
-Instruction *CallInst::CreateFree(Value *Source, Instruction *InsertBefore) {
- return createFree(Source, std::nullopt, InsertBefore, nullptr);
-}
-Instruction *CallInst::CreateFree(Value *Source,
- ArrayRef<OperandBundleDef> Bundles,
- Instruction *InsertBefore) {
- return createFree(Source, Bundles, InsertBefore, nullptr);
-}
-
-/// CreateFree - Generate the IR for a call to the builtin free function.
-/// Note: This function does not add the call to the basic block, that is the
-/// responsibility of the caller.
-Instruction *CallInst::CreateFree(Value *Source, BasicBlock *InsertAtEnd) {
- Instruction *FreeCall =
- createFree(Source, std::nullopt, nullptr, InsertAtEnd);
- assert(FreeCall && "CreateFree did not create a CallInst");
- return FreeCall;
-}
-Instruction *CallInst::CreateFree(Value *Source,
- ArrayRef<OperandBundleDef> Bundles,
- BasicBlock *InsertAtEnd) {
- Instruction *FreeCall = createFree(Source, Bundles, nullptr, InsertAtEnd);
- assert(FreeCall && "CreateFree did not create a CallInst");
- return FreeCall;
-}
-
//===----------------------------------------------------------------------===//
// InvokeInst Implementation
//===----------------------------------------------------------------------===//
@@ -1627,6 +1462,9 @@ StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
: StoreInst(val, addr, /*isVolatile=*/false, InsertAtEnd) {}
+StoreInst::StoreInst(Value *val, Value *addr, BasicBlock::iterator InsertBefore)
+ : StoreInst(val, addr, /*isVolatile=*/false, InsertBefore) {}
+
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
Instruction *InsertBefore)
: StoreInst(val, addr, isVolatile,
@@ -1639,6 +1477,12 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
computeLoadStoreDefaultAlign(val->getType(), InsertAtEnd),
InsertAtEnd) {}
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ BasicBlock::iterator InsertBefore)
+ : StoreInst(val, addr, isVolatile,
+ computeLoadStoreDefaultAlign(val->getType(), &*InsertBefore),
+ InsertBefore) {}
+
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Align Align,
Instruction *InsertBefore)
: StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic,
@@ -1650,6 +1494,11 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Align Align,
SyncScope::System, InsertAtEnd) {}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Align Align,
+ BasicBlock::iterator InsertBefore)
+ : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic,
+ SyncScope::System, InsertBefore) {}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Align Align,
AtomicOrdering Order, SyncScope::ID SSID,
Instruction *InsertBefore)
: Instruction(Type::getVoidTy(val->getContext()), Store,
@@ -1677,6 +1526,20 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Align Align,
AssertOK();
}
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Align Align,
+ AtomicOrdering Order, SyncScope::ID SSID,
+ BasicBlock::iterator InsertBefore)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this)) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(Order, SSID);
+ insertBefore(*InsertBefore->getParent(), InsertBefore);
+ AssertOK();
+}
//===----------------------------------------------------------------------===//
// AtomicCmpXchgInst Implementation
@@ -2301,10 +2164,10 @@ static bool isSingleSourceMaskImpl(ArrayRef<int> Mask, int NumOpElts) {
return UsesLHS || UsesRHS;
}
-bool ShuffleVectorInst::isSingleSourceMask(ArrayRef<int> Mask) {
+bool ShuffleVectorInst::isSingleSourceMask(ArrayRef<int> Mask, int NumSrcElts) {
// We don't have vector operand size information, so assume operands are the
// same size as the mask.
- return isSingleSourceMaskImpl(Mask, Mask.size());
+ return isSingleSourceMaskImpl(Mask, NumSrcElts);
}
static bool isIdentityMaskImpl(ArrayRef<int> Mask, int NumOpElts) {
@@ -2319,65 +2182,75 @@ static bool isIdentityMaskImpl(ArrayRef<int> Mask, int NumOpElts) {
return true;
}
-bool ShuffleVectorInst::isIdentityMask(ArrayRef<int> Mask) {
+bool ShuffleVectorInst::isIdentityMask(ArrayRef<int> Mask, int NumSrcElts) {
+ if (Mask.size() != static_cast<unsigned>(NumSrcElts))
+ return false;
// We don't have vector operand size information, so assume operands are the
// same size as the mask.
- return isIdentityMaskImpl(Mask, Mask.size());
+ return isIdentityMaskImpl(Mask, NumSrcElts);
}
-bool ShuffleVectorInst::isReverseMask(ArrayRef<int> Mask) {
- if (!isSingleSourceMask(Mask))
+bool ShuffleVectorInst::isReverseMask(ArrayRef<int> Mask, int NumSrcElts) {
+ if (Mask.size() != static_cast<unsigned>(NumSrcElts))
+ return false;
+ if (!isSingleSourceMask(Mask, NumSrcElts))
return false;
// The number of elements in the mask must be at least 2.
- int NumElts = Mask.size();
- if (NumElts < 2)
+ if (NumSrcElts < 2)
return false;
- for (int i = 0; i < NumElts; ++i) {
- if (Mask[i] == -1)
+ for (int I = 0, E = Mask.size(); I < E; ++I) {
+ if (Mask[I] == -1)
continue;
- if (Mask[i] != (NumElts - 1 - i) && Mask[i] != (NumElts + NumElts - 1 - i))
+ if (Mask[I] != (NumSrcElts - 1 - I) &&
+ Mask[I] != (NumSrcElts + NumSrcElts - 1 - I))
return false;
}
return true;
}
-bool ShuffleVectorInst::isZeroEltSplatMask(ArrayRef<int> Mask) {
- if (!isSingleSourceMask(Mask))
+bool ShuffleVectorInst::isZeroEltSplatMask(ArrayRef<int> Mask, int NumSrcElts) {
+ if (Mask.size() != static_cast<unsigned>(NumSrcElts))
return false;
- for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) {
- if (Mask[i] == -1)
+ if (!isSingleSourceMask(Mask, NumSrcElts))
+ return false;
+ for (int I = 0, E = Mask.size(); I < E; ++I) {
+ if (Mask[I] == -1)
continue;
- if (Mask[i] != 0 && Mask[i] != NumElts)
+ if (Mask[I] != 0 && Mask[I] != NumSrcElts)
return false;
}
return true;
}
-bool ShuffleVectorInst::isSelectMask(ArrayRef<int> Mask) {
+bool ShuffleVectorInst::isSelectMask(ArrayRef<int> Mask, int NumSrcElts) {
+ if (Mask.size() != static_cast<unsigned>(NumSrcElts))
+ return false;
// Select is differentiated from identity. It requires using both sources.
- if (isSingleSourceMask(Mask))
+ if (isSingleSourceMask(Mask, NumSrcElts))
return false;
- for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) {
- if (Mask[i] == -1)
+ for (int I = 0, E = Mask.size(); I < E; ++I) {
+ if (Mask[I] == -1)
continue;
- if (Mask[i] != i && Mask[i] != (NumElts + i))
+ if (Mask[I] != I && Mask[I] != (NumSrcElts + I))
return false;
}
return true;
}
-bool ShuffleVectorInst::isTransposeMask(ArrayRef<int> Mask) {
+bool ShuffleVectorInst::isTransposeMask(ArrayRef<int> Mask, int NumSrcElts) {
// Example masks that will return true:
// v1 = <a, b, c, d>
// v2 = <e, f, g, h>
// trn1 = shufflevector v1, v2 <0, 4, 2, 6> = <a, e, c, g>
// trn2 = shufflevector v1, v2 <1, 5, 3, 7> = <b, f, d, h>
+ if (Mask.size() != static_cast<unsigned>(NumSrcElts))
+ return false;
// 1. The number of elements in the mask must be a power-of-2 and at least 2.
- int NumElts = Mask.size();
- if (NumElts < 2 || !isPowerOf2_32(NumElts))
+ int Sz = Mask.size();
+ if (Sz < 2 || !isPowerOf2_32(Sz))
return false;
// 2. The first element of the mask must be either a 0 or a 1.
@@ -2386,23 +2259,26 @@ bool ShuffleVectorInst::isTransposeMask(ArrayRef<int> Mask) {
// 3. The difference between the first 2 elements must be equal to the
// number of elements in the mask.
- if ((Mask[1] - Mask[0]) != NumElts)
+ if ((Mask[1] - Mask[0]) != NumSrcElts)
return false;
// 4. The difference between consecutive even-numbered and odd-numbered
// elements must be equal to 2.
- for (int i = 2; i < NumElts; ++i) {
- int MaskEltVal = Mask[i];
+ for (int I = 2; I < Sz; ++I) {
+ int MaskEltVal = Mask[I];
if (MaskEltVal == -1)
return false;
- int MaskEltPrevVal = Mask[i - 2];
+ int MaskEltPrevVal = Mask[I - 2];
if (MaskEltVal - MaskEltPrevVal != 2)
return false;
}
return true;
}
-bool ShuffleVectorInst::isSpliceMask(ArrayRef<int> Mask, int &Index) {
+bool ShuffleVectorInst::isSpliceMask(ArrayRef<int> Mask, int NumSrcElts,
+ int &Index) {
+ if (Mask.size() != static_cast<unsigned>(NumSrcElts))
+ return false;
// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4>
int StartIndex = -1;
for (int I = 0, E = Mask.size(); I != E; ++I) {
@@ -2413,7 +2289,7 @@ bool ShuffleVectorInst::isSpliceMask(ArrayRef<int> Mask, int &Index) {
if (StartIndex == -1) {
// Don't support a StartIndex that begins in the second input, or if the
// first non-undef index would access below the StartIndex.
- if (MaskEltVal < I || E <= (MaskEltVal - I))
+ if (MaskEltVal < I || NumSrcElts <= (MaskEltVal - I))
return false;
StartIndex = MaskEltVal - I;
@@ -2536,9 +2412,6 @@ bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef<int> Mask,
}
bool ShuffleVectorInst::isIdentityWithPadding() const {
- if (isa<UndefValue>(Op<2>()))
- return false;
-
// FIXME: Not currently possible to express a shuffle mask for a scalable
// vector for this case.
if (isa<ScalableVectorType>(getType()))
@@ -2563,9 +2436,6 @@ bool ShuffleVectorInst::isIdentityWithPadding() const {
}
bool ShuffleVectorInst::isIdentityWithExtract() const {
- if (isa<UndefValue>(Op<2>()))
- return false;
-
// FIXME: Not currently possible to express a shuffle mask for a scalable
// vector for this case.
if (isa<ScalableVectorType>(getType()))
@@ -2581,8 +2451,7 @@ bool ShuffleVectorInst::isIdentityWithExtract() const {
bool ShuffleVectorInst::isConcat() const {
// Vector concatenation is differentiated from identity with padding.
- if (isa<UndefValue>(Op<0>()) || isa<UndefValue>(Op<1>()) ||
- isa<UndefValue>(Op<2>()))
+ if (isa<UndefValue>(Op<0>()) || isa<UndefValue>(Op<1>()))
return false;
// FIXME: Not currently possible to express a shuffle mask for a scalable
@@ -2607,7 +2476,7 @@ static bool isReplicationMaskWithParams(ArrayRef<int> Mask,
assert(Mask.size() == (unsigned)ReplicationFactor * VF &&
"Unexpected mask size.");
- for (int CurrElt : seq(0, VF)) {
+ for (int CurrElt : seq(VF)) {
ArrayRef<int> CurrSubMask = Mask.take_front(ReplicationFactor);
assert(CurrSubMask.size() == (unsigned)ReplicationFactor &&
"Run out of mask?");
@@ -2692,10 +2561,10 @@ bool ShuffleVectorInst::isOneUseSingleSourceMask(ArrayRef<int> Mask, int VF) {
if (all_of(SubMask, [](int Idx) { return Idx == PoisonMaskElem; }))
continue;
SmallBitVector Used(VF, false);
- for_each(SubMask, [&Used, VF](int Idx) {
+ for (int Idx : SubMask) {
if (Idx != PoisonMaskElem && Idx < VF)
Used.set(Idx);
- });
+ }
if (!Used.all())
return false;
}
@@ -2708,7 +2577,7 @@ bool ShuffleVectorInst::isOneUseSingleSourceMask(int VF) const {
// case.
if (isa<ScalableVectorType>(getType()))
return false;
- if (!isSingleSourceMask(ShuffleMask))
+ if (!isSingleSourceMask(ShuffleMask, VF))
return false;
return isOneUseSingleSourceMask(ShuffleMask, VF);
@@ -2806,6 +2675,45 @@ bool ShuffleVectorInst::isInterleaveMask(
return true;
}
+/// Try to lower a vector shuffle as a bit rotation.
+///
+/// Look for a repeated rotation pattern in each sub group.
+/// Returns an element-wise left bit rotation amount or -1 if failed.
+static int matchShuffleAsBitRotate(ArrayRef<int> Mask, int NumSubElts) {
+ int NumElts = Mask.size();
+ assert((NumElts % NumSubElts) == 0 && "Illegal shuffle mask");
+
+ int RotateAmt = -1;
+ for (int i = 0; i != NumElts; i += NumSubElts) {
+ for (int j = 0; j != NumSubElts; ++j) {
+ int M = Mask[i + j];
+ if (M < 0)
+ continue;
+ if (M < i || M >= i + NumSubElts)
+ return -1;
+ int Offset = (NumSubElts - (M - (i + j))) % NumSubElts;
+ if (0 <= RotateAmt && Offset != RotateAmt)
+ return -1;
+ RotateAmt = Offset;
+ }
+ }
+ return RotateAmt;
+}
+
+bool ShuffleVectorInst::isBitRotateMask(
+ ArrayRef<int> Mask, unsigned EltSizeInBits, unsigned MinSubElts,
+ unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt) {
+ for (NumSubElts = MinSubElts; NumSubElts <= MaxSubElts; NumSubElts *= 2) {
+ int EltRotateAmt = matchShuffleAsBitRotate(Mask, NumSubElts);
+ if (EltRotateAmt < 0)
+ continue;
+ RotateAmt = EltRotateAmt * EltSizeInBits;
+ return true;
+ }
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// InsertValueInst Class
//===----------------------------------------------------------------------===//
@@ -4719,7 +4627,7 @@ void SwitchInstProfUpdateWrapper::addCase(
"num of prof branch_weights must accord with num of successors");
}
-SymbolTableList<Instruction>::iterator
+Instruction::InstListType::iterator
SwitchInstProfUpdateWrapper::eraseFromParent() {
// Instruction is erased. Mark as unchanged to not touch it in the destructor.
Changed = false;
diff --git a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
index 36d56699c64e..7a3b708e7400 100644
--- a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
@@ -235,7 +235,7 @@ void DbgAssignIntrinsic::setValue(Value *V) {
int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
StringRef Name) {
- assert(Name.startswith("llvm."));
+ assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix");
// Do successive binary searches of the dotted name components. For
// "llvm.gc.experimental.statepoint.p1i8.p1i32", we will find the range of
@@ -265,18 +265,18 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
return -1;
StringRef NameFound = *LastLow;
if (Name == NameFound ||
- (Name.startswith(NameFound) && Name[NameFound.size()] == '.'))
+ (Name.starts_with(NameFound) && Name[NameFound.size()] == '.'))
return LastLow - NameTable.begin();
return -1;
}
-ConstantInt *InstrProfInstBase::getNumCounters() const {
+ConstantInt *InstrProfCntrInstBase::getNumCounters() const {
if (InstrProfValueProfileInst::classof(this))
llvm_unreachable("InstrProfValueProfileInst does not have counters!");
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2)));
}
-ConstantInt *InstrProfInstBase::getIndex() const {
+ConstantInt *InstrProfCntrInstBase::getIndex() const {
if (InstrProfValueProfileInst::classof(this))
llvm_unreachable("Please use InstrProfValueProfileInst::getIndex()");
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
@@ -503,7 +503,7 @@ std::optional<unsigned> VPIntrinsic::getMemoryDataParamPos(Intrinsic::ID VPID) {
return std::nullopt;
}
-bool VPIntrinsic::isVPIntrinsic(Intrinsic::ID ID) {
+constexpr bool isVPIntrinsic(Intrinsic::ID ID) {
switch (ID) {
default:
break;
@@ -515,22 +515,70 @@ bool VPIntrinsic::isVPIntrinsic(Intrinsic::ID ID) {
return false;
}
+bool VPIntrinsic::isVPIntrinsic(Intrinsic::ID ID) {
+ return ::isVPIntrinsic(ID);
+}
+
// Equivalent non-predicated opcode
+constexpr static std::optional<unsigned>
+getFunctionalOpcodeForVP(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) return Instruction::OPC;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return std::nullopt;
+}
+
std::optional<unsigned>
VPIntrinsic::getFunctionalOpcodeForVP(Intrinsic::ID ID) {
+ return ::getFunctionalOpcodeForVP(ID);
+}
+
+// Equivalent non-predicated intrinsic ID
+constexpr static std::optional<Intrinsic::ID>
+getFunctionalIntrinsicIDForVP(Intrinsic::ID ID) {
switch (ID) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
-#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) return Instruction::OPC;
+#define VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) return Intrinsic::INTRIN;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
return std::nullopt;
}
+std::optional<Intrinsic::ID>
+VPIntrinsic::getFunctionalIntrinsicIDForVP(Intrinsic::ID ID) {
+ return ::getFunctionalIntrinsicIDForVP(ID);
+}
+
+constexpr static bool doesVPHaveNoFunctionalEquivalent(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_NO_FUNCTIONAL return true;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return false;
+}
+
+// All VP intrinsics should have an equivalent non-VP opcode or intrinsic
+// defined, or be marked that they don't have one.
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) \
+ static_assert(doesVPHaveNoFunctionalEquivalent(Intrinsic::VPID) || \
+ getFunctionalOpcodeForVP(Intrinsic::VPID) || \
+ getFunctionalIntrinsicIDForVP(Intrinsic::VPID));
+#include "llvm/IR/VPIntrinsics.def"
+
// Equivalent non-predicated constrained intrinsic
-std::optional<unsigned>
+std::optional<Intrinsic::ID>
VPIntrinsic::getConstrainedIntrinsicIDForVP(Intrinsic::ID ID) {
switch (ID) {
default:
@@ -621,6 +669,9 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
VPFunc =
Intrinsic::getDeclaration(M, VPID, {ReturnType, Params[0]->getType()});
break;
+ case Intrinsic::vp_is_fpclass:
+ VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[0]->getType()});
+ break;
case Intrinsic::vp_merge:
case Intrinsic::vp_select:
VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[1]->getType()});
@@ -691,6 +742,18 @@ bool VPCmpIntrinsic::isVPCmp(Intrinsic::ID ID) {
return false;
}
+bool VPBinOpIntrinsic::isVPBinOp(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_BINARYOP return true;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return false;
+}
+
static ICmpInst::Predicate getIntPredicateFromMD(const Value *Op) {
Metadata *MD = cast<MetadataAsValue>(Op)->getMetadata();
if (!MD || !isa<MDString>(MD))
@@ -807,6 +870,10 @@ const Value *GCProjectionInst::getStatepoint() const {
if (isa<UndefValue>(Token))
return Token;
+ // Treat none token as if it was undef here
+ if (isa<ConstantTokenNone>(Token))
+ return UndefValue::get(Token->getType());
+
// This takes care both of relocates for call statepoints and relocates
// on normal path of invoke statepoint.
if (!isa<LandingPadInst>(Token))
diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp
index 2076eeed9417..15c90a4fe7b2 100644
--- a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.cpp
@@ -45,6 +45,14 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {}
LLVMContextImpl::~LLVMContextImpl() {
+#ifndef NDEBUG
+ // Check that any variable location records that fell off the end of a block
+ // when it's terminator was removed were eventually replaced. This assertion
+ // firing indicates that DPValues went missing during the lifetime of the
+ // LLVMContext.
+ assert(TrailingDPValues.empty() && "DPValue records in blocks not cleaned");
+#endif
+
// NOTE: We need to delete the contents of OwnedModules, but Module's dtor
// will call LLVMContextImpl::removeModule, thus invalidating iterators into
// the container. Avoid iterators during this operation:
@@ -60,15 +68,8 @@ LLVMContextImpl::~LLVMContextImpl() {
// Drop references for MDNodes. Do this before Values get deleted to avoid
// unnecessary RAUW when nodes are still unresolved.
- for (auto *I : DistinctMDNodes) {
- // We may have DIArgList that were uniqued, and as it has a custom
- // implementation of dropAllReferences, it needs to be explicitly invoked.
- if (auto *AL = dyn_cast<DIArgList>(I)) {
- AL->dropAllReferences();
- continue;
- }
+ for (auto *I : DistinctMDNodes)
I->dropAllReferences();
- }
#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
for (auto *I : CLASS##s) \
I->dropAllReferences();
@@ -79,6 +80,13 @@ LLVMContextImpl::~LLVMContextImpl() {
Pair.second->dropUsers();
for (auto &Pair : MetadataAsValues)
Pair.second->dropUse();
+ // Do not untrack ValueAsMetadata references for DIArgLists, as they have
+ // already been more efficiently untracked above.
+ for (DIArgList *AL : DIArgLists) {
+ AL->dropAllReferences(/* Untrack */ false);
+ delete AL;
+ }
+ DIArgLists.clear();
// Destroy MDNodes.
for (MDNode *I : DistinctMDNodes)
diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h
index 4cc3f8da6b75..6a2029134498 100644
--- a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h
@@ -57,6 +57,7 @@ class AttributeListImpl;
class AttributeSetNode;
class BasicBlock;
struct DiagnosticHandler;
+class DPMarker;
class ElementCount;
class Function;
class GlobalObject;
@@ -72,9 +73,7 @@ class StringRef;
class TypedPointerType;
class ValueHandleBase;
-using DenseMapAPIntKeyInfo = DenseMapInfo<APInt>;
-
-struct DenseMapAPFloatKeyInfo {
+template <> struct DenseMapInfo<APFloat> {
static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); }
static inline APFloat getTombstoneKey() {
return APFloat(APFloat::Bogus(), 2);
@@ -1307,11 +1306,13 @@ template <> struct MDNodeKeyImpl<DIMacroFile> {
}
};
-template <> struct MDNodeKeyImpl<DIArgList> {
+// DIArgLists are not MDNodes, but we still want to unique them in a DenseSet
+// based on a hash of their arguments.
+struct DIArgListKeyInfo {
ArrayRef<ValueAsMetadata *> Args;
- MDNodeKeyImpl(ArrayRef<ValueAsMetadata *> Args) : Args(Args) {}
- MDNodeKeyImpl(const DIArgList *N) : Args(N->getArgs()) {}
+ DIArgListKeyInfo(ArrayRef<ValueAsMetadata *> Args) : Args(Args) {}
+ DIArgListKeyInfo(const DIArgList *N) : Args(N->getArgs()) {}
bool isKeyOf(const DIArgList *RHS) const { return Args == RHS->getArgs(); }
@@ -1320,6 +1321,35 @@ template <> struct MDNodeKeyImpl<DIArgList> {
}
};
+/// DenseMapInfo for DIArgList.
+struct DIArgListInfo {
+ using KeyTy = DIArgListKeyInfo;
+
+ static inline DIArgList *getEmptyKey() {
+ return DenseMapInfo<DIArgList *>::getEmptyKey();
+ }
+
+ static inline DIArgList *getTombstoneKey() {
+ return DenseMapInfo<DIArgList *>::getTombstoneKey();
+ }
+
+ static unsigned getHashValue(const KeyTy &Key) { return Key.getHashValue(); }
+
+ static unsigned getHashValue(const DIArgList *N) {
+ return KeyTy(N).getHashValue();
+ }
+
+ static bool isEqual(const KeyTy &LHS, const DIArgList *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ return LHS.isKeyOf(RHS);
+ }
+
+ static bool isEqual(const DIArgList *LHS, const DIArgList *RHS) {
+ return LHS == RHS;
+ }
+};
+
/// DenseMapInfo for MDNode subclasses.
template <class NodeTy> struct MDNodeInfo {
using KeyTy = MDNodeKeyImpl<NodeTy>;
@@ -1457,11 +1487,9 @@ public:
DenseMap<unsigned, std::unique_ptr<ConstantInt>> IntZeroConstants;
DenseMap<unsigned, std::unique_ptr<ConstantInt>> IntOneConstants;
- DenseMap<APInt, std::unique_ptr<ConstantInt>, DenseMapAPIntKeyInfo>
- IntConstants;
+ DenseMap<APInt, std::unique_ptr<ConstantInt>> IntConstants;
- DenseMap<APFloat, std::unique_ptr<ConstantFP>, DenseMapAPFloatKeyInfo>
- FPConstants;
+ DenseMap<APFloat, std::unique_ptr<ConstantFP>> FPConstants;
FoldingSet<AttributeImpl> AttrsSet;
FoldingSet<AttributeListImpl> AttrsLists;
@@ -1470,6 +1498,7 @@ public:
StringMap<MDString, BumpPtrAllocator> MDStringCache;
DenseMap<Value *, ValueAsMetadata *> ValuesAsMetadata;
DenseMap<Metadata *, MetadataAsValue *> MetadataAsValues;
+ DenseSet<DIArgList *, DIArgListInfo> DIArgLists;
#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
DenseSet<CLASS *, CLASS##Info> CLASS##s;
@@ -1633,6 +1662,36 @@ public:
/// The lifetime of the object must be guaranteed to extend as long as the
/// LLVMContext is used by compilation.
void setOptPassGate(OptPassGate &);
+
+ /// Mapping of blocks to collections of "trailing" DPValues. As part of the
+ /// "RemoveDIs" project, debug-info variable location records are going to
+ /// cease being instructions... which raises the problem of where should they
+ /// be recorded when we remove the terminator of a blocks, such as:
+ ///
+ /// %foo = add i32 0, 0
+ /// br label %bar
+ ///
+ /// If the branch is removed, a legitimate transient state while editing a
+ /// block, any debug-records between those two instructions will not have a
+ /// location. Each block thus records any DPValue records that "trail" in
+ /// such a way. These are stored in LLVMContext because typically LLVM only
+ /// edits a small number of blocks at a time, so there's no need to bloat
+ /// BasicBlock with such a data structure.
+ SmallDenseMap<BasicBlock *, DPMarker *> TrailingDPValues;
+
+ // Set, get and delete operations for TrailingDPValues.
+ void setTrailingDPValues(BasicBlock *B, DPMarker *M) {
+ assert(!TrailingDPValues.count(B));
+ TrailingDPValues[B] = M;
+ }
+
+ DPMarker *getTrailingDPValues(BasicBlock *B) {
+ return TrailingDPValues.lookup(B);
+ }
+
+ void deleteTrailingDPValues(BasicBlock *B) {
+ TrailingDPValues.erase(B);
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
index 6c223d4ec381..dac4fbce17e4 100644
--- a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
@@ -31,6 +31,7 @@
using namespace llvm;
+extern cl::opt<bool> UseNewDbgInfoFormat;
// See PassManagers.h for Pass Manager infrastructure overview.
//===----------------------------------------------------------------------===//
@@ -527,6 +528,11 @@ bool PassManagerImpl::run(Module &M) {
dumpArguments();
dumpPasses();
+ // RemoveDIs: if a command line flag is given, convert to the DPValue
+ // representation of debug-info for the duration of these passes.
+ if (UseNewDbgInfoFormat)
+ M.convertToNewDbgValues();
+
for (ImmutablePass *ImPass : getImmutablePasses())
Changed |= ImPass->doInitialization(M);
@@ -539,6 +545,8 @@ bool PassManagerImpl::run(Module &M) {
for (ImmutablePass *ImPass : getImmutablePasses())
Changed |= ImPass->doFinalization(M);
+ M.convertFromNewDbgValues();
+
return Changed;
}
} // namespace legacy
diff --git a/contrib/llvm-project/llvm/lib/IR/Mangler.cpp b/contrib/llvm-project/llvm/lib/IR/Mangler.cpp
index 8d9880ecba58..3acac2c3e3db 100644
--- a/contrib/llvm-project/llvm/lib/IR/Mangler.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Mangler.cpp
@@ -149,8 +149,8 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
// Don't add byte count suffixes when '\01' or '?' are in the first
// character.
- if (Name.startswith("\01") ||
- (DL.doNotMangleLeadingQuestionMark() && Name.startswith("?")))
+ if (Name.starts_with("\01") ||
+ (DL.doNotMangleLeadingQuestionMark() && Name.starts_with("?")))
MSFunc = nullptr;
CallingConv::ID CC =
diff --git a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
index c153ffb71a73..7bc25e30b893 100644
--- a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugProgramInstruction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalVariable.h"
@@ -147,6 +148,32 @@ void MetadataAsValue::untrack() {
MetadataTracking::untrack(MD);
}
+DPValue *DebugValueUser::getUser() { return static_cast<DPValue *>(this); }
+const DPValue *DebugValueUser::getUser() const {
+ return static_cast<const DPValue *>(this);
+}
+void DebugValueUser::handleChangedValue(Metadata *NewMD) {
+ getUser()->handleChangedLocation(NewMD);
+}
+
+void DebugValueUser::trackDebugValue() {
+ if (DebugValue)
+ MetadataTracking::track(&DebugValue, *DebugValue, *this);
+}
+
+void DebugValueUser::untrackDebugValue() {
+ if (DebugValue)
+ MetadataTracking::untrack(DebugValue);
+}
+
+void DebugValueUser::retrackDebugValue(DebugValueUser &X) {
+ assert(DebugValue == X.DebugValue && "Expected values to match");
+ if (X.DebugValue) {
+ MetadataTracking::retrack(X.DebugValue, DebugValue);
+ X.DebugValue = nullptr;
+ }
+}
+
bool MetadataTracking::track(void *Ref, Metadata &MD, OwnerTy Owner) {
assert(Ref && "Expected live reference");
assert((Owner || *static_cast<Metadata **>(Ref) == &MD) &&
@@ -195,6 +222,8 @@ SmallVector<Metadata *> ReplaceableMetadataImpl::getAllArgListUsers() {
SmallVector<std::pair<OwnerTy, uint64_t> *> MDUsersWithID;
for (auto Pair : UseMap) {
OwnerTy Owner = Pair.second.first;
+ if (Owner.isNull())
+ continue;
if (!isa<Metadata *>(Owner))
continue;
Metadata *OwnerMD = cast<Metadata *>(Owner);
@@ -210,6 +239,30 @@ SmallVector<Metadata *> ReplaceableMetadataImpl::getAllArgListUsers() {
return MDUsers;
}
+SmallVector<DPValue *> ReplaceableMetadataImpl::getAllDPValueUsers() {
+ SmallVector<std::pair<OwnerTy, uint64_t> *> DPVUsersWithID;
+ for (auto Pair : UseMap) {
+ OwnerTy Owner = Pair.second.first;
+ if (Owner.isNull())
+ continue;
+ if (!Owner.is<DebugValueUser *>())
+ continue;
+ DPVUsersWithID.push_back(&UseMap[Pair.first]);
+ }
+ // Order DPValue users in reverse-creation order. Normal dbg.value users
+ // of MetadataAsValues are ordered by their UseList, i.e. reverse order of
+ // when they were added: we need to replicate that here. The structure of
+ // debug-info output depends on the ordering of intrinsics, thus we need
+ // to keep them consistent for comparisons sake.
+ llvm::sort(DPVUsersWithID, [](auto UserA, auto UserB) {
+ return UserA->second > UserB->second;
+ });
+ SmallVector<DPValue *> DPVUsers;
+ for (auto UserWithID : DPVUsersWithID)
+ DPVUsers.push_back(UserWithID->first.get<DebugValueUser *>()->getUser());
+ return DPVUsers;
+}
+
void ReplaceableMetadataImpl::addRef(void *Ref, OwnerTy Owner) {
bool WasInserted =
UseMap.insert(std::make_pair(Ref, std::make_pair(Owner, NextIndex)))
@@ -308,6 +361,11 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
continue;
}
+ if (Owner.is<DebugValueUser *>()) {
+ Owner.get<DebugValueUser *>()->getUser()->handleChangedLocation(MD);
+ continue;
+ }
+
// There's a Metadata owner -- dispatch.
Metadata *OwnerMD = cast<Metadata *>(Owner);
switch (OwnerMD->getMetadataID()) {
@@ -343,7 +401,7 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) {
auto Owner = Pair.second.first;
if (!Owner)
continue;
- if (isa<MetadataAsValue *>(Owner))
+ if (!Owner.is<Metadata *>())
continue;
// Resolve MDNodes that point at this.
@@ -356,22 +414,29 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) {
}
}
+// Special handing of DIArgList is required in the RemoveDIs project, see
+// commentry in DIArgList::handleChangedOperand for details. Hidden behind
+// conditional compilation to avoid a compile time regression.
ReplaceableMetadataImpl *ReplaceableMetadataImpl::getOrCreate(Metadata &MD) {
if (auto *N = dyn_cast<MDNode>(&MD))
return N->isResolved() ? nullptr : N->Context.getOrCreateReplaceableUses();
+ if (auto ArgList = dyn_cast<DIArgList>(&MD))
+ return ArgList;
return dyn_cast<ValueAsMetadata>(&MD);
}
ReplaceableMetadataImpl *ReplaceableMetadataImpl::getIfExists(Metadata &MD) {
if (auto *N = dyn_cast<MDNode>(&MD))
return N->isResolved() ? nullptr : N->Context.getReplaceableUses();
+ if (auto ArgList = dyn_cast<DIArgList>(&MD))
+ return ArgList;
return dyn_cast<ValueAsMetadata>(&MD);
}
bool ReplaceableMetadataImpl::isReplaceable(const Metadata &MD) {
if (auto *N = dyn_cast<MDNode>(&MD))
return !N->isResolved();
- return isa<ValueAsMetadata>(&MD);
+ return isa<ValueAsMetadata>(&MD) || isa<DIArgList>(&MD);
}
static DISubprogram *getLocalFunctionMetadata(Value *V) {
@@ -1351,25 +1416,22 @@ bool MDAttachments::erase(unsigned ID) {
return OldSize != Attachments.size();
}
-MDNode *Value::getMetadata(unsigned KindID) const {
+MDNode *Value::getMetadata(StringRef Kind) const {
if (!hasMetadata())
return nullptr;
- const auto &Info = getContext().pImpl->ValueMetadata[this];
- assert(!Info.empty() && "bit out of sync with hash table");
- return Info.lookup(KindID);
+ unsigned KindID = getContext().getMDKindID(Kind);
+ return getMetadataImpl(KindID);
}
-MDNode *Value::getMetadata(StringRef Kind) const {
- if (!hasMetadata())
- return nullptr;
- const auto &Info = getContext().pImpl->ValueMetadata[this];
- assert(!Info.empty() && "bit out of sync with hash table");
- return Info.lookup(getContext().getMDKindID(Kind));
+MDNode *Value::getMetadataImpl(unsigned KindID) const {
+ const LLVMContext &Ctx = getContext();
+ const MDAttachments &Attachements = Ctx.pImpl->ValueMetadata.at(this);
+ return Attachements.lookup(KindID);
}
void Value::getMetadata(unsigned KindID, SmallVectorImpl<MDNode *> &MDs) const {
if (hasMetadata())
- getContext().pImpl->ValueMetadata[this].get(KindID, MDs);
+ getContext().pImpl->ValueMetadata.at(this).get(KindID, MDs);
}
void Value::getMetadata(StringRef Kind, SmallVectorImpl<MDNode *> &MDs) const {
@@ -1382,8 +1444,7 @@ void Value::getAllMetadata(
if (hasMetadata()) {
assert(getContext().pImpl->ValueMetadata.count(this) &&
"bit out of sync with hash table");
- const auto &Info = getContext().pImpl->ValueMetadata.find(this)->second;
- assert(!Info.empty() && "Shouldn't have called this");
+ const MDAttachments &Info = getContext().pImpl->ValueMetadata.at(this);
Info.getAll(MDs);
}
}
@@ -1393,7 +1454,7 @@ void Value::setMetadata(unsigned KindID, MDNode *Node) {
// Handle the case when we're adding/updating metadata on a value.
if (Node) {
- auto &Info = getContext().pImpl->ValueMetadata[this];
+ MDAttachments &Info = getContext().pImpl->ValueMetadata[this];
assert(!Info.empty() == HasMetadata && "bit out of sync with hash table");
if (Info.empty())
HasMetadata = true;
@@ -1406,7 +1467,7 @@ void Value::setMetadata(unsigned KindID, MDNode *Node) {
"bit out of sync with hash table");
if (!HasMetadata)
return; // Nothing to remove!
- auto &Info = getContext().pImpl->ValueMetadata[this];
+ MDAttachments &Info = getContext().pImpl->ValueMetadata.find(this)->second;
// Handle removal of an existing value.
Info.erase(KindID);
@@ -1438,7 +1499,7 @@ bool Value::eraseMetadata(unsigned KindID) {
if (!HasMetadata)
return false;
- auto &Store = getContext().pImpl->ValueMetadata[this];
+ MDAttachments &Store = getContext().pImpl->ValueMetadata.find(this)->second;
bool Changed = Store.erase(KindID);
if (Store.empty())
clearMetadata();
@@ -1461,7 +1522,11 @@ void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
}
MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
- return getMetadataImpl(getContext().getMDKindID(Kind));
+ const LLVMContext &Ctx = getContext();
+ unsigned KindID = Ctx.getMDKindID(Kind);
+ if (KindID == LLVMContext::MD_dbg)
+ return DbgLoc.getAsMDNode();
+ return Value::getMetadata(KindID);
}
void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
@@ -1475,7 +1540,7 @@ void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
KnownSet.insert(LLVMContext::MD_DIAssignID);
auto &MetadataStore = getContext().pImpl->ValueMetadata;
- auto &Info = MetadataStore[this];
+ MDAttachments &Info = MetadataStore.find(this)->second;
assert(!Info.empty() && "bit out of sync with hash table");
Info.remove_if([&KnownSet](const MDAttachments::Attachment &I) {
return !KnownSet.count(I.MDKind);
@@ -1542,13 +1607,10 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
}
void Instruction::addAnnotationMetadata(SmallVector<StringRef> Annotations) {
- SmallSetVector<StringRef, 2> AnnotationsSet(Annotations.begin(),
- Annotations.end());
- MDBuilder MDB(getContext());
-
- auto *Existing = getMetadata(LLVMContext::MD_annotation);
SmallVector<Metadata *, 4> Names;
- if (Existing) {
+ if (auto *Existing = getMetadata(LLVMContext::MD_annotation)) {
+ SmallSetVector<StringRef, 2> AnnotationsSet(Annotations.begin(),
+ Annotations.end());
auto *Tuple = cast<MDTuple>(Existing);
for (auto &N : Tuple->operands()) {
if (isa<MDString>(N.get())) {
@@ -1564,6 +1626,7 @@ void Instruction::addAnnotationMetadata(SmallVector<StringRef> Annotations) {
}
}
+ MDBuilder MDB(getContext());
SmallVector<Metadata *> MDAnnotationStrings;
for (StringRef Annotation : Annotations)
MDAnnotationStrings.push_back(MDB.createString(Annotation));
@@ -1574,11 +1637,8 @@ void Instruction::addAnnotationMetadata(SmallVector<StringRef> Annotations) {
}
void Instruction::addAnnotationMetadata(StringRef Name) {
- MDBuilder MDB(getContext());
-
- auto *Existing = getMetadata(LLVMContext::MD_annotation);
SmallVector<Metadata *, 4> Names;
- if (Existing) {
+ if (auto *Existing = getMetadata(LLVMContext::MD_annotation)) {
auto *Tuple = cast<MDTuple>(Existing);
for (auto &N : Tuple->operands()) {
if (isa<MDString>(N.get()) &&
@@ -1588,6 +1648,7 @@ void Instruction::addAnnotationMetadata(StringRef Name) {
}
}
+ MDBuilder MDB(getContext());
Names.push_back(MDB.createString(Name));
MDNode *MD = MDTuple::get(getContext(), Names);
setMetadata(LLVMContext::MD_annotation, MD);
@@ -1598,7 +1659,7 @@ AAMDNodes Instruction::getAAMetadata() const {
// Not using Instruction::hasMetadata() because we're not interested in
// DebugInfoMetadata.
if (Value::hasMetadata()) {
- const auto &Info = getContext().pImpl->ValueMetadata[this];
+ const MDAttachments &Info = getContext().pImpl->ValueMetadata.at(this);
Result.TBAA = Info.lookup(LLVMContext::MD_tbaa);
Result.TBAAStruct = Info.lookup(LLVMContext::MD_tbaa_struct);
Result.Scope = Info.lookup(LLVMContext::MD_alias_scope);
@@ -1619,13 +1680,6 @@ void Instruction::setNoSanitizeMetadata() {
llvm::MDNode::get(getContext(), std::nullopt));
}
-MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
- // Handle 'dbg' as a special case since it is not stored in the hash table.
- if (KindID == LLVMContext::MD_dbg)
- return DbgLoc.getAsMDNode();
- return Value::getMetadata(KindID);
-}
-
void Instruction::getAllMetadataImpl(
SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
Result.clear();
diff --git a/contrib/llvm-project/llvm/lib/IR/Module.cpp b/contrib/llvm-project/llvm/lib/IR/Module.cpp
index 73354a8f36d2..eeb90a6cb3c4 100644
--- a/contrib/llvm-project/llvm/lib/IR/Module.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Module.cpp
@@ -71,7 +71,8 @@ template class llvm::SymbolTableListTraits<GlobalIFunc>;
Module::Module(StringRef MID, LLVMContext &C)
: Context(C), ValSymTab(std::make_unique<ValueSymbolTable>(-1)),
- ModuleID(std::string(MID)), SourceFileName(std::string(MID)), DL("") {
+ ModuleID(std::string(MID)), SourceFileName(std::string(MID)), DL(""),
+ IsNewDbgInfoFormat(false) {
Context.addModule(this);
}
@@ -155,12 +156,6 @@ FunctionCallee Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
return {Ty, New}; // Return the new prototype.
}
- // If the function exists but has the wrong type, return a bitcast to the
- // right type.
- auto *PTy = PointerType::get(Ty, F->getAddressSpace());
- if (F->getType() != PTy)
- return {Ty, ConstantExpr::getBitCast(F, PTy)};
-
// Otherwise, we just found the existing function or a prototype.
return {Ty, F};
}
@@ -211,13 +206,6 @@ Constant *Module::getOrInsertGlobal(
GV = CreateGlobalCallback();
assert(GV && "The CreateGlobalCallback is expected to create a global");
- // If the variable exists but has the wrong type, return a bitcast to the
- // right type.
- Type *GVTy = GV->getType();
- PointerType *PTy = PointerType::get(Ty, GVTy->getPointerAddressSpace());
- if (GVTy != PTy)
- return ConstantExpr::getBitCast(GV, PTy);
-
// Otherwise, we just found the existing function or a prototype.
return GV;
}
@@ -395,8 +383,6 @@ void Module::setDataLayout(StringRef Desc) {
void Module::setDataLayout(const DataLayout &Other) { DL = Other; }
-const DataLayout &Module::getDataLayout() const { return DL; }
-
DICompileUnit *Module::debug_compile_units_iterator::operator*() const {
return cast<DICompileUnit>(CUs->getOperand(Idx));
}
@@ -633,6 +619,23 @@ void Module::setCodeModel(CodeModel::Model CL) {
addModuleFlag(ModFlagBehavior::Error, "Code Model", CL);
}
+std::optional<uint64_t> Module::getLargeDataThreshold() const {
+ auto *Val =
+ cast_or_null<ConstantAsMetadata>(getModuleFlag("Large Data Threshold"));
+
+ if (!Val)
+ return std::nullopt;
+
+ return cast<ConstantInt>(Val->getValue())->getZExtValue();
+}
+
+void Module::setLargeDataThreshold(uint64_t Threshold) {
+ // Since the large data threshold goes along with the code model, the merge
+ // behavior is the same.
+ addModuleFlag(ModFlagBehavior::Error, "Large Data Threshold",
+ ConstantInt::get(Type::getInt64Ty(Context), Threshold));
+}
+
void Module::setProfileSummary(Metadata *M, ProfileSummary::Kind Kind) {
if (Kind == ProfileSummary::PSK_CSInstr)
setModuleFlag(ModFlagBehavior::Error, "CSProfileSummary", M);
diff --git a/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp b/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp
index 15fe342969d6..198c730418c7 100644
--- a/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -554,6 +554,17 @@ void ModuleSummaryIndex::exportToDot(
std::map<StringRef, GVSOrderedMapTy> ModuleToDefinedGVS;
collectDefinedGVSummariesPerModule(ModuleToDefinedGVS);
+ // Assign an id to each module path for use in graph labels. Since the
+ // StringMap iteration order isn't guaranteed, order by path string before
+ // assigning ids.
+ std::vector<StringRef> ModulePaths;
+ for (auto &[ModPath, _] : modulePaths())
+ ModulePaths.push_back(ModPath);
+ llvm::sort(ModulePaths);
+ DenseMap<StringRef, uint64_t> ModuleIdMap;
+ for (auto &ModPath : ModulePaths)
+ ModuleIdMap.try_emplace(ModPath, ModuleIdMap.size());
+
// Get node identifier in form MXXX_<GUID>. The MXXX prefix is required,
// because we may have multiple linkonce functions summaries.
auto NodeId = [](uint64_t ModId, GlobalValue::GUID Id) {
@@ -589,7 +600,10 @@ void ModuleSummaryIndex::exportToDot(
OS << "digraph Summary {\n";
for (auto &ModIt : ModuleToDefinedGVS) {
- auto ModId = getModuleId(ModIt.first);
+ // Will be empty for a just built per-module index, which doesn't setup a
+ // module paths table. In that case use 0 as the module id.
+ assert(ModuleIdMap.count(ModIt.first) || ModuleIdMap.empty());
+ auto ModId = ModuleIdMap.empty() ? 0 : ModuleIdMap[ModIt.first];
OS << " // Module: " << ModIt.first << "\n";
OS << " subgraph cluster_" << std::to_string(ModId) << " {\n";
OS << " style = filled;\n";
diff --git a/contrib/llvm-project/llvm/lib/IR/Operator.cpp b/contrib/llvm-project/llvm/lib/IR/Operator.cpp
index b57f3e3b2967..cd982c7da102 100644
--- a/contrib/llvm-project/llvm/lib/IR/Operator.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Operator.cpp
@@ -32,11 +32,17 @@ bool Operator::hasPoisonGeneratingFlags() const {
case Instruction::AShr:
case Instruction::LShr:
return cast<PossiblyExactOperator>(this)->isExact();
+ case Instruction::Or:
+ return cast<PossiblyDisjointInst>(this)->isDisjoint();
case Instruction::GetElementPtr: {
auto *GEP = cast<GEPOperator>(this);
// Note: inrange exists on constexpr only
return GEP->isInBounds() || GEP->getInRangeIndex() != std::nullopt;
}
+ case Instruction::ZExt:
+ if (auto *NNI = dyn_cast<PossiblyNonNegInst>(this))
+ return NNI->hasNonNeg();
+ return false;
default:
if (const auto *FP = dyn_cast<FPMathOperator>(this))
return FP->hasNoNaNs() || FP->hasNoInfs();
@@ -127,9 +133,7 @@ bool GEPOperator::accumulateConstantOffset(
auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
// Scalable vectors are multiplied by a runtime constant.
- bool ScalableType = false;
- if (isa<ScalableVectorType>(GTI.getIndexedType()))
- ScalableType = true;
+ bool ScalableType = GTI.getIndexedType()->isScalableTy();
Value *V = GTI.getOperand();
StructType *STy = GTI.getStructTypeOrNull();
@@ -189,7 +193,7 @@ bool GEPOperator::collectOffset(
for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
GTI != GTE; ++GTI) {
// Scalable vectors are multiplied by a runtime constant.
- bool ScalableType = isa<ScalableVectorType>(GTI.getIndexedType());
+ bool ScalableType = GTI.getIndexedType()->isScalableTy();
Value *V = GTI.getOperand();
StructType *STy = GTI.getStructTypeOrNull();
@@ -225,8 +229,8 @@ bool GEPOperator::collectOffset(
// Insert an initial offset of 0 for V iff none exists already, then
// increment the offset by IndexedSize.
if (!IndexedSize.isZero()) {
- VariableOffsets.insert({V, APInt(BitWidth, 0)});
- VariableOffsets[V] += IndexedSize;
+ auto *It = VariableOffsets.insert({V, APInt(BitWidth, 0)}).first;
+ It->second += IndexedSize;
}
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/IR/Pass.cpp b/contrib/llvm-project/llvm/lib/IR/Pass.cpp
index 716d9d546f4f..d6096ebb3af7 100644
--- a/contrib/llvm-project/llvm/lib/IR/Pass.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Pass.cpp
@@ -139,9 +139,13 @@ LLVM_DUMP_METHOD void Pass::dump() const {
#endif
#ifdef EXPENSIVE_CHECKS
-uint64_t Pass::structuralHash(Module &M) const { return StructuralHash(M); }
+uint64_t Pass::structuralHash(Module &M) const {
+ return StructuralHash(M, true);
+}
-uint64_t Pass::structuralHash(Function &F) const { return StructuralHash(F); }
+uint64_t Pass::structuralHash(Function &F) const {
+ return StructuralHash(F, true);
+}
#endif
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp b/contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp
index d85cefbbe6f7..6d5f3acb7a4d 100644
--- a/contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PassInstrumentation.cpp
@@ -35,7 +35,8 @@ bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) {
StringRef Prefix = PassID;
if (Pos != StringRef::npos)
Prefix = PassID.substr(0, Pos);
- return any_of(Specials, [Prefix](StringRef S) { return Prefix.endswith(S); });
+ return any_of(Specials,
+ [Prefix](StringRef S) { return Prefix.ends_with(S); });
}
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/IR/PassManager.cpp b/contrib/llvm-project/llvm/lib/IR/PassManager.cpp
index 92b729c44d21..cbddf3dfb056 100644
--- a/contrib/llvm-project/llvm/lib/IR/PassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PassManager.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/PassManager.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/IR/PassManagerImpl.h"
#include <optional>
diff --git a/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp b/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp
index cfd27bf78793..3816eff5c0f2 100644
--- a/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp
@@ -252,9 +252,14 @@ static bool shouldIgnorePass(StringRef PassID) {
void TimePassesHandler::startPassTimer(StringRef PassID) {
if (shouldIgnorePass(PassID))
return;
- assert(!ActivePassTimer && "should only have one pass timer at a time");
+ // Stop the previous pass timer to prevent double counting when a
+ // pass requests another pass.
+ if (!PassActiveTimerStack.empty()) {
+ assert(PassActiveTimerStack.back()->isRunning());
+ PassActiveTimerStack.back()->stopTimer();
+ }
Timer &MyTimer = getPassTimer(PassID, /*IsPass*/ true);
- ActivePassTimer = &MyTimer;
+ PassActiveTimerStack.push_back(&MyTimer);
assert(!MyTimer.isRunning());
MyTimer.startTimer();
}
@@ -262,10 +267,17 @@ void TimePassesHandler::startPassTimer(StringRef PassID) {
void TimePassesHandler::stopPassTimer(StringRef PassID) {
if (shouldIgnorePass(PassID))
return;
- assert(ActivePassTimer);
- assert(ActivePassTimer->isRunning());
- ActivePassTimer->stopTimer();
- ActivePassTimer = nullptr;
+ assert(!PassActiveTimerStack.empty() && "empty stack in popTimer");
+ Timer *MyTimer = PassActiveTimerStack.pop_back_val();
+ assert(MyTimer && "timer should be present");
+ assert(MyTimer->isRunning());
+ MyTimer->stopTimer();
+
+ // Restart the previously stopped timer.
+ if (!PassActiveTimerStack.empty()) {
+ assert(!PassActiveTimerStack.back()->isRunning());
+ PassActiveTimerStack.back()->startTimer();
+ }
}
void TimePassesHandler::startAnalysisTimer(StringRef PassID) {
diff --git a/contrib/llvm-project/llvm/lib/IR/PrintPasses.cpp b/contrib/llvm-project/llvm/lib/IR/PrintPasses.cpp
index d2a9827fd07c..e2ef20bb81ba 100644
--- a/contrib/llvm-project/llvm/lib/IR/PrintPasses.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PrintPasses.cpp
@@ -212,7 +212,7 @@ std::string llvm::doSystemDiff(StringRef Before, StringRef After,
static SmallVector<int> FD{-1, -1, -1};
SmallVector<StringRef> SR{Before, After};
static SmallVector<std::string> FileName{"", "", ""};
- if (auto Err = prepareTempFiles(FD, SR, FileName))
+ if (prepareTempFiles(FD, SR, FileName))
return "Unable to create temporary file.";
static ErrorOr<std::string> DiffExe = sys::findProgramByName(DiffBinary);
@@ -238,7 +238,7 @@ std::string llvm::doSystemDiff(StringRef Before, StringRef After,
else
return "Unable to read result.";
- if (auto Err = cleanUpTempFiles(FileName))
+ if (cleanUpTempFiles(FileName))
return "Unable to remove temporary file.";
return Diff;
diff --git a/contrib/llvm-project/llvm/lib/IR/ProfDataUtils.cpp b/contrib/llvm-project/llvm/lib/IR/ProfDataUtils.cpp
index e534368b05e4..29536b0b090c 100644
--- a/contrib/llvm-project/llvm/lib/IR/ProfDataUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ProfDataUtils.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
@@ -45,26 +46,6 @@ constexpr unsigned WeightsIdx = 1;
// the minimum number of operands for MD_prof nodes with branch weights
constexpr unsigned MinBWOps = 3;
-bool extractWeights(const MDNode *ProfileData,
- SmallVectorImpl<uint32_t> &Weights) {
- // Assume preconditions are already met (i.e. this is valid metadata)
- assert(ProfileData && "ProfileData was nullptr in extractWeights");
- unsigned NOps = ProfileData->getNumOperands();
-
- assert(WeightsIdx < NOps && "Weights Index must be less than NOps.");
- Weights.resize(NOps - WeightsIdx);
-
- for (unsigned Idx = WeightsIdx, E = NOps; Idx != E; ++Idx) {
- ConstantInt *Weight =
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(Idx));
- assert(Weight && "Malformed branch_weight in MD_prof node");
- assert(Weight->getValue().getActiveBits() <= 32 &&
- "Too many bits for uint32_t");
- Weights[Idx - WeightsIdx] = Weight->getZExtValue();
- }
- return true;
-}
-
// We may want to add support for other MD_prof types, so provide an abstraction
// for checking the metadata type.
bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) {
@@ -119,11 +100,30 @@ MDNode *getValidBranchWeightMDNode(const Instruction &I) {
return nullptr;
}
+void extractFromBranchWeightMD(const MDNode *ProfileData,
+ SmallVectorImpl<uint32_t> &Weights) {
+ assert(isBranchWeightMD(ProfileData) && "wrong metadata");
+
+ unsigned NOps = ProfileData->getNumOperands();
+ assert(WeightsIdx < NOps && "Weights Index must be less than NOps.");
+ Weights.resize(NOps - WeightsIdx);
+
+ for (unsigned Idx = WeightsIdx, E = NOps; Idx != E; ++Idx) {
+ ConstantInt *Weight =
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(Idx));
+ assert(Weight && "Malformed branch_weight in MD_prof node");
+ assert(Weight->getValue().getActiveBits() <= 32 &&
+ "Too many bits for uint32_t");
+ Weights[Idx - WeightsIdx] = Weight->getZExtValue();
+ }
+}
+
bool extractBranchWeights(const MDNode *ProfileData,
SmallVectorImpl<uint32_t> &Weights) {
if (!isBranchWeightMD(ProfileData))
return false;
- return extractWeights(ProfileData, Weights);
+ extractFromBranchWeightMD(ProfileData, Weights);
+ return true;
}
bool extractBranchWeights(const Instruction &I,
@@ -184,4 +184,10 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) {
return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal);
}
+void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights) {
+ MDBuilder MDB(I.getContext());
+ MDNode *BranchWeights = MDB.createBranchWeights(Weights);
+ I.setMetadata(LLVMContext::MD_prof, BranchWeights);
+}
+
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/IR/ReplaceConstant.cpp b/contrib/llvm-project/llvm/lib/IR/ReplaceConstant.cpp
index 58aa040eb032..42dec7c72328 100644
--- a/contrib/llvm-project/llvm/lib/IR/ReplaceConstant.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ReplaceConstant.cpp
@@ -22,24 +22,29 @@ static bool isExpandableUser(User *U) {
return isa<ConstantExpr>(U) || isa<ConstantAggregate>(U);
}
-static Instruction *expandUser(Instruction *InsertPt, Constant *C) {
+static SmallVector<Instruction *, 4> expandUser(Instruction *InsertPt,
+ Constant *C) {
+ SmallVector<Instruction *, 4> NewInsts;
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
- return CE->getAsInstruction(InsertPt);
+ NewInsts.push_back(CE->getAsInstruction(InsertPt));
} else if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
Value *V = PoisonValue::get(C->getType());
- for (auto [Idx, Op] : enumerate(C->operands()))
+ for (auto [Idx, Op] : enumerate(C->operands())) {
V = InsertValueInst::Create(V, Op, Idx, "", InsertPt);
- return cast<Instruction>(V);
+ NewInsts.push_back(cast<Instruction>(V));
+ }
} else if (isa<ConstantVector>(C)) {
Type *IdxTy = Type::getInt32Ty(C->getContext());
Value *V = PoisonValue::get(C->getType());
- for (auto [Idx, Op] : enumerate(C->operands()))
+ for (auto [Idx, Op] : enumerate(C->operands())) {
V = InsertElementInst::Create(V, Op, ConstantInt::get(IdxTy, Idx), "",
InsertPt);
- return cast<Instruction>(V);
+ NewInsts.push_back(cast<Instruction>(V));
+ }
} else {
llvm_unreachable("Not an expandable user");
}
+ return NewInsts;
}
bool convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts) {
@@ -73,6 +78,7 @@ bool convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts) {
bool Changed = false;
while (!InstructionWorklist.empty()) {
Instruction *I = InstructionWorklist.pop_back_val();
+ DebugLoc Loc = I->getDebugLoc();
for (Use &U : I->operands()) {
auto *BI = I;
if (auto *Phi = dyn_cast<PHINode>(I)) {
@@ -85,9 +91,11 @@ bool convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts) {
if (auto *C = dyn_cast<Constant>(U.get())) {
if (ExpandableUsers.contains(C)) {
Changed = true;
- Instruction *NI = expandUser(BI, C);
- InstructionWorklist.insert(NI);
- U.set(NI);
+ auto NewInsts = expandUser(BI, C);
+ for (auto *NI : NewInsts)
+ NI->setDebugLoc(Loc);
+ InstructionWorklist.insert(NewInsts.begin(), NewInsts.end());
+ U.set(NewInsts.back());
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/IR/SSAContext.cpp b/contrib/llvm-project/llvm/lib/IR/SSAContext.cpp
index 4790d19b74b5..220abe3083eb 100644
--- a/contrib/llvm-project/llvm/lib/IR/SSAContext.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/SSAContext.cpp
@@ -16,34 +16,24 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-void SSAContext::setFunction(Function &Fn) { F = &Fn; }
-
-BasicBlock *SSAContext::getEntryBlock(Function &F) {
- return &F.getEntryBlock();
-}
-
-const BasicBlock *SSAContext::getEntryBlock(const Function &F) {
- return &F.getEntryBlock();
-}
-
+template <>
void SSAContext::appendBlockDefs(SmallVectorImpl<Value *> &defs,
BasicBlock &block) {
- for (auto &instr : block.instructionsWithoutDebug(/*SkipPseudoOp=*/true)) {
+ for (auto &instr : block) {
if (instr.isTerminator())
break;
- if (instr.getType()->isVoidTy())
- continue;
- auto *def = &instr;
- defs.push_back(def);
+ defs.push_back(&instr);
}
}
+template <>
void SSAContext::appendBlockDefs(SmallVectorImpl<const Value *> &defs,
const BasicBlock &block) {
for (auto &instr : block) {
@@ -53,41 +43,47 @@ void SSAContext::appendBlockDefs(SmallVectorImpl<const Value *> &defs,
}
}
+template <>
void SSAContext::appendBlockTerms(SmallVectorImpl<Instruction *> &terms,
BasicBlock &block) {
terms.push_back(block.getTerminator());
}
+template <>
void SSAContext::appendBlockTerms(SmallVectorImpl<const Instruction *> &terms,
const BasicBlock &block) {
terms.push_back(block.getTerminator());
}
+template <>
const BasicBlock *SSAContext::getDefBlock(const Value *value) const {
if (const auto *instruction = dyn_cast<Instruction>(value))
return instruction->getParent();
return nullptr;
}
-bool SSAContext::comesBefore(const Instruction *lhs, const Instruction *rhs) {
- return lhs->comesBefore(rhs);
-}
-
+template <>
bool SSAContext::isConstantOrUndefValuePhi(const Instruction &Instr) {
if (auto *Phi = dyn_cast<PHINode>(&Instr))
return Phi->hasConstantOrUndefValue();
return false;
}
-Printable SSAContext::print(const Value *V) const {
+template <> Intrinsic::ID SSAContext::getIntrinsicID(const Instruction &I) {
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ return CB->getIntrinsicID();
+ return Intrinsic::not_intrinsic;
+}
+
+template <> Printable SSAContext::print(const Value *V) const {
return Printable([V](raw_ostream &Out) { V->print(Out); });
}
-Printable SSAContext::print(const Instruction *Inst) const {
+template <> Printable SSAContext::print(const Instruction *Inst) const {
return print(cast<Value>(Inst));
}
-Printable SSAContext::print(const BasicBlock *BB) const {
+template <> Printable SSAContext::print(const BasicBlock *BB) const {
if (!BB)
return Printable([](raw_ostream &Out) { Out << "<nullptr>"; });
if (BB->hasName())
@@ -99,3 +95,7 @@ Printable SSAContext::print(const BasicBlock *BB) const {
Out << MST.getLocalSlot(BB);
});
}
+
+template <> Printable SSAContext::printAsOperand(const BasicBlock *BB) const {
+ return Printable([BB](raw_ostream &Out) { BB->printAsOperand(Out); });
+}
diff --git a/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp b/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp
index 6ea108d831a1..ce2b5a38b2f3 100644
--- a/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp
@@ -7,8 +7,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/StructuralHash.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
using namespace llvm;
@@ -16,23 +20,89 @@ using namespace llvm;
namespace {
// Basic hashing mechanism to detect structural change to the IR, used to verify
-// pass return status consistency with actual change. Loosely copied from
-// llvm/lib/Transforms/Utils/FunctionComparator.cpp
+// pass return status consistency with actual change. In addition to being used
+// by the MergeFunctions pass.
class StructuralHashImpl {
- hash_code Hash;
+ uint64_t Hash;
- template <typename T> void hash(const T &V) { Hash = hash_combine(Hash, V); }
+ void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+
+ // This will produce different values on 32-bit and 64-bit systens as
+ // hash_combine returns a size_t. However, this is only used for
+ // detailed hashing which, in-tree, only needs to distinguish between
+ // differences in functions.
+ template <typename T> void hashArbitaryType(const T &V) {
+ hash(hash_combine(V));
+ }
+
+ void hashType(Type *ValueType) {
+ hash(ValueType->getTypeID());
+ if (ValueType->isIntegerTy())
+ hash(ValueType->getIntegerBitWidth());
+ }
public:
StructuralHashImpl() : Hash(4) {}
- void update(const Function &F) {
+ void updateOperand(Value *Operand) {
+ hashType(Operand->getType());
+
+ // The cases enumerated below are not exhaustive and are only aimed to
+ // get decent coverage over the function.
+ if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
+ hashArbitaryType(ConstInt->getValue());
+ } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
+ hashArbitaryType(ConstFP->getValue());
+ } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
+ hash(Arg->getArgNo());
+ } else if (Function *Func = dyn_cast<Function>(Operand)) {
+ // Hashing the name will be deterministic as LLVM's hashing infrastructure
+ // has explicit support for hashing strings and will not simply hash
+ // the pointer.
+ hashArbitaryType(Func->getName());
+ }
+ }
+
+ void updateInstruction(const Instruction &Inst, bool DetailedHash) {
+ hash(Inst.getOpcode());
+
+ if (!DetailedHash)
+ return;
+
+ hashType(Inst.getType());
+
+ // Handle additional properties of specific instructions that cause
+ // semantic differences in the IR.
+ if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
+ hash(ComparisonInstruction->getPredicate());
+
+ for (const auto &Op : Inst.operands())
+ updateOperand(Op);
+ }
+
+ // A function hash is calculated by considering only the number of arguments
+ // and whether a function is varargs, the order of basic blocks (given by the
+ // successors of each basic block in depth first order), and the order of
+ // opcodes of each instruction within each of these basic blocks. This mirrors
+ // the strategy FunctionComparator::compare() uses to compare functions by
+ // walking the BBs in depth first order and comparing each instruction in
+ // sequence. Because this hash currently does not look at the operands, it is
+ // insensitive to things such as the target of calls and the constants used in
+ // the function, which makes it useful when possibly merging functions which
+ // are the same modulo constants and call targets.
+ //
+ // Note that different users of StructuralHash will want different behavior
+ // out of it (i.e., MergeFunctions will want something different from PM
+ // expensive checks for pass modification status). When modifying this
+ // function, most changes should be gated behind an option and enabled
+ // selectively.
+ void update(const Function &F, bool DetailedHash) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;
- hash(12345); // Function header
+ hash(0x62642d6b6b2d6b72); // Function header
hash(F.isVarArg());
hash(F.arg_size());
@@ -40,13 +110,20 @@ public:
SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
+ // Walk the blocks in the same order as
+ // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
+ // function "structure." (BB and opcode sequence)
BBs.push_back(&F.getEntryBlock());
VisitedBBs.insert(BBs[0]);
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();
- hash(45798); // Block header
+
+ // This random value acts as a block header, as otherwise the partition of
+ // opcodes into BBs wouldn't affect the hash, only the order of the
+ // opcodes
+ hash(45798);
for (auto &Inst : *BB)
- hash(Inst.getOpcode());
+ updateInstruction(Inst, DetailedHash);
const Instruction *Term = BB->getTerminator();
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
@@ -67,11 +144,11 @@ public:
hash(GV.getValueType()->getTypeID());
}
- void update(const Module &M) {
+ void update(const Module &M, bool DetailedHash) {
for (const GlobalVariable &GV : M.globals())
update(GV);
for (const Function &F : M)
- update(F);
+ update(F, DetailedHash);
}
uint64_t getHash() const { return Hash; }
@@ -79,14 +156,14 @@ public:
} // namespace
-uint64_t llvm::StructuralHash(const Function &F) {
+IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
StructuralHashImpl H;
- H.update(F);
+ H.update(F, DetailedHash);
return H.getHash();
}
-uint64_t llvm::StructuralHash(const Module &M) {
+IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H;
- H.update(M);
+ H.update(M, DetailedHash);
return H.getHash();
}
diff --git a/contrib/llvm-project/llvm/lib/IR/SymbolTableListTraitsImpl.h b/contrib/llvm-project/llvm/lib/IR/SymbolTableListTraitsImpl.h
index 4283744bd058..990552f9b65a 100644
--- a/contrib/llvm-project/llvm/lib/IR/SymbolTableListTraitsImpl.h
+++ b/contrib/llvm-project/llvm/lib/IR/SymbolTableListTraitsImpl.h
@@ -28,10 +28,10 @@ template <> void invalidateParentIListOrdering(BasicBlock *BB);
/// setSymTabObject - This is called when (f.e.) the parent of a basic block
/// changes. This requires us to remove all the instruction symtab entries from
/// the current function and reinsert them into the new function.
-template <typename ValueSubClass>
+template <typename ValueSubClass, typename... Args>
template <typename TPtr>
-void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest,
- TPtr Src) {
+void SymbolTableListTraits<ValueSubClass, Args...>::setSymTabObject(TPtr *Dest,
+ TPtr Src) {
// Get the old symtab and value list before doing the assignment.
ValueSymbolTable *OldST = getSymTab(getListOwner());
@@ -61,11 +61,11 @@ void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest,
if (I->hasName())
NewST->reinsertValue(&*I);
}
-
}
-template <typename ValueSubClass>
-void SymbolTableListTraits<ValueSubClass>::addNodeToList(ValueSubClass *V) {
+template <typename ValueSubClass, typename... Args>
+void SymbolTableListTraits<ValueSubClass, Args...>::addNodeToList(
+ ValueSubClass *V) {
assert(!V->getParent() && "Value already in a container!!");
ItemParentClass *Owner = getListOwner();
V->setParent(Owner);
@@ -75,8 +75,8 @@ void SymbolTableListTraits<ValueSubClass>::addNodeToList(ValueSubClass *V) {
ST->reinsertValue(V);
}
-template <typename ValueSubClass>
-void SymbolTableListTraits<ValueSubClass>::removeNodeFromList(
+template <typename ValueSubClass, typename... Args>
+void SymbolTableListTraits<ValueSubClass, Args...>::removeNodeFromList(
ValueSubClass *V) {
V->setParent(nullptr);
if (V->hasName())
@@ -84,8 +84,8 @@ void SymbolTableListTraits<ValueSubClass>::removeNodeFromList(
ST->removeValueName(V->getValueName());
}
-template <typename ValueSubClass>
-void SymbolTableListTraits<ValueSubClass>::transferNodesFromList(
+template <typename ValueSubClass, typename... Args>
+void SymbolTableListTraits<ValueSubClass, Args...>::transferNodesFromList(
SymbolTableListTraits &L2, iterator first, iterator last) {
// Transfering nodes, even within the same BB, invalidates the ordering. The
// list that we removed the nodes from still has a valid ordering.
diff --git a/contrib/llvm-project/llvm/lib/IR/Type.cpp b/contrib/llvm-project/llvm/lib/IR/Type.cpp
index ba4d0f5dc18d..85d779c98a9b 100644
--- a/contrib/llvm-project/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Type.cpp
@@ -58,6 +58,8 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
}
bool Type::isScalableTy() const {
+ if (const auto *ATy = dyn_cast<ArrayType>(this))
+ return ATy->getElementType()->isScalableTy();
if (const auto *STy = dyn_cast<StructType>(this)) {
SmallPtrSet<Type *, 4> Visited;
return STy->containsScalableVectorType(&Visited);
@@ -139,16 +141,9 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
Ty->getPrimitiveSizeInBits().getFixedValue() == 8192)
return true;
- // At this point we have only various mismatches of the first class types
- // remaining and ptr->ptr. Just select the lossless conversions. Everything
- // else is not lossless. Conservatively assume we can't losslessly convert
- // between pointers with different address spaces.
- if (auto *PTy = dyn_cast<PointerType>(this)) {
- if (auto *OtherPTy = dyn_cast<PointerType>(Ty))
- return PTy->getAddressSpace() == OtherPTy->getAddressSpace();
- return false;
- }
- return false; // Other types have no identity values
+ // Conservatively assume we can't losslessly convert between pointers with
+ // different address spaces.
+ return false;
}
bool Type::isEmptyTy() const {
@@ -170,17 +165,26 @@ bool Type::isEmptyTy() const {
TypeSize Type::getPrimitiveSizeInBits() const {
switch (getTypeID()) {
- case Type::HalfTyID: return TypeSize::Fixed(16);
- case Type::BFloatTyID: return TypeSize::Fixed(16);
- case Type::FloatTyID: return TypeSize::Fixed(32);
- case Type::DoubleTyID: return TypeSize::Fixed(64);
- case Type::X86_FP80TyID: return TypeSize::Fixed(80);
- case Type::FP128TyID: return TypeSize::Fixed(128);
- case Type::PPC_FP128TyID: return TypeSize::Fixed(128);
- case Type::X86_MMXTyID: return TypeSize::Fixed(64);
- case Type::X86_AMXTyID: return TypeSize::Fixed(8192);
+ case Type::HalfTyID:
+ return TypeSize::getFixed(16);
+ case Type::BFloatTyID:
+ return TypeSize::getFixed(16);
+ case Type::FloatTyID:
+ return TypeSize::getFixed(32);
+ case Type::DoubleTyID:
+ return TypeSize::getFixed(64);
+ case Type::X86_FP80TyID:
+ return TypeSize::getFixed(80);
+ case Type::FP128TyID:
+ return TypeSize::getFixed(128);
+ case Type::PPC_FP128TyID:
+ return TypeSize::getFixed(128);
+ case Type::X86_MMXTyID:
+ return TypeSize::getFixed(64);
+ case Type::X86_AMXTyID:
+ return TypeSize::getFixed(8192);
case Type::IntegerTyID:
- return TypeSize::Fixed(cast<IntegerType>(this)->getBitWidth());
+ return TypeSize::getFixed(cast<IntegerType>(this)->getBitWidth());
case Type::FixedVectorTyID:
case Type::ScalableVectorTyID: {
const VectorType *VTy = cast<VectorType>(this);
@@ -189,7 +193,8 @@ TypeSize Type::getPrimitiveSizeInBits() const {
assert(!ETS.isScalable() && "Vector type should have fixed-width elements");
return {ETS.getFixedValue() * EC.getKnownMinValue(), EC.isScalable()};
}
- default: return TypeSize::Fixed(0);
+ default:
+ return TypeSize::getFixed(0);
}
}
@@ -254,66 +259,6 @@ IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
return IntegerType::get(C, N);
}
-PointerType *Type::getHalfPtrTy(LLVMContext &C, unsigned AS) {
- return getHalfTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getBFloatPtrTy(LLVMContext &C, unsigned AS) {
- return getBFloatTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
- return getFloatTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
- return getDoubleTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
- return getX86_FP80Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
- return getFP128Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
- return getPPC_FP128Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
- return getX86_MMXTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getX86_AMXPtrTy(LLVMContext &C, unsigned AS) {
- return getX86_AMXTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
- return getIntNTy(C, N)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
- return getInt1Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
- return getInt8Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
- return getInt16Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
- return getInt32Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
- return getInt64Ty(C)->getPointerTo(AS);
-}
-
Type *Type::getWasm_ExternrefTy(LLVMContext &C) {
// opaque pointer in addrspace(10)
static PointerType *Ty = PointerType::get(C, 10);
@@ -714,8 +659,7 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
bool ArrayType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
- !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy() &&
- !isa<ScalableVectorType>(ElemTy);
+ !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy();
}
//===----------------------------------------------------------------------===//
@@ -890,13 +834,14 @@ struct TargetTypeInfo {
static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
LLVMContext &C = Ty->getContext();
StringRef Name = Ty->getName();
- if (Name.startswith("spirv."))
- return TargetTypeInfo(Type::getInt8PtrTy(C, 0), TargetExtType::HasZeroInit,
+ if (Name.starts_with("spirv."))
+ return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::HasZeroInit,
TargetExtType::CanBeGlobal);
// Opaque types in the AArch64 name space.
if (Name == "aarch64.svcount")
- return TargetTypeInfo(ScalableVectorType::get(Type::getInt1Ty(C), 16));
+ return TargetTypeInfo(ScalableVectorType::get(Type::getInt1Ty(C), 16),
+ TargetExtType::HasZeroInit);
return TargetTypeInfo(Type::getVoidTy(C));
}
diff --git a/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp b/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp
index 904af7e737cc..003155a4af48 100644
--- a/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp
@@ -136,6 +136,11 @@ void TypeFinder::incorporateValue(const Value *V) {
return incorporateMDNode(N);
if (const auto *MDV = dyn_cast<ValueAsMetadata>(M->getMetadata()))
return incorporateValue(MDV->getValue());
+ if (const auto *AL = dyn_cast<DIArgList>(M->getMetadata())) {
+ for (auto *Arg : AL->getArgs())
+ incorporateValue(Arg->getValue());
+ return;
+ }
return;
}
@@ -168,14 +173,6 @@ void TypeFinder::incorporateMDNode(const MDNode *V) {
if (!VisitedMetadata.insert(V).second)
return;
- // The arguments in DIArgList are not exposed as operands, so handle such
- // nodes specifically here.
- if (const auto *AL = dyn_cast<DIArgList>(V)) {
- for (auto *Arg : AL->getArgs())
- incorporateValue(Arg->getValue());
- return;
- }
-
// Look in operands for types.
for (Metadata *Op : V->operands()) {
if (!Op)
diff --git a/contrib/llvm-project/llvm/lib/IR/Value.cpp b/contrib/llvm-project/llvm/lib/IR/Value.cpp
index 41260a98e3ce..b6e25c46b514 100644
--- a/contrib/llvm-project/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Value.cpp
@@ -330,8 +330,7 @@ void Value::setNameImpl(const Twine &NewName) {
SmallString<256> NameData;
StringRef NameRef = NeedNewName ? NewName.toStringRef(NameData) : "";
- assert(NameRef.find_first_of(0) == StringRef::npos &&
- "Null bytes are not allowed in names");
+ assert(!NameRef.contains(0) && "Null bytes are not allowed in names");
// Name isn't changing?
if (getName() == NameRef)
@@ -378,7 +377,7 @@ void Value::setNameImpl(const Twine &NewName) {
void Value::setName(const Twine &NewName) {
setNameImpl(NewName);
if (Function *F = dyn_cast<Function>(this))
- F->recalculateIntrinsicID();
+ F->updateAfterNameChange();
}
void Value::takeName(Value *V) {
@@ -575,11 +574,17 @@ void Value::replaceUsesWithIf(Value *New,
/// with New.
static void replaceDbgUsesOutsideBlock(Value *V, Value *New, BasicBlock *BB) {
SmallVector<DbgVariableIntrinsic *> DbgUsers;
- findDbgUsers(DbgUsers, V);
+ SmallVector<DPValue *> DPUsers;
+ findDbgUsers(DbgUsers, V, &DPUsers);
for (auto *DVI : DbgUsers) {
if (DVI->getParent() != BB)
DVI->replaceVariableLocationOp(V, New);
}
+ for (auto *DPV : DPUsers) {
+ DPMarker *Marker = DPV->getMarker();
+ if (Marker->getParent() != BB)
+ DPV->replaceVariableLocationOp(V, New);
+ }
}
// Like replaceAllUsesWith except it does not handle constants or basic blocks.
diff --git a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
index 1408ce293ca6..8aba28026306 100644
--- a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
@@ -73,7 +73,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/CycleInfo.h"
+#include "llvm/IR/ConvergenceVerifier.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -329,16 +329,6 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// The current source language.
dwarf::SourceLanguage CurrentSourceLang = dwarf::DW_LANG_lo_user;
- /// Whether the current function has convergencectrl operand bundles.
- enum {
- ControlledConvergence,
- UncontrolledConvergence,
- NoConvergence
- } ConvergenceKind = NoConvergence;
-
- /// Whether source was present on the first DIFile encountered in each CU.
- DenseMap<const DICompileUnit *, bool> HasSourceDebugInfo;
-
/// Stores the count of how many objects were passed to llvm.localescape for a
/// given function and the largest index passed to llvm.localrecover.
DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
@@ -370,6 +360,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
SmallVector<const DILocalVariable *, 16> DebugFnArgs;
TBAAVerifier TBAAVerifyHelper;
+ ConvergenceVerifier ConvergenceVerifyHelper;
SmallVector<IntrinsicInst *, 4> NoAliasScopeDecls;
@@ -411,12 +402,19 @@ public:
return false;
}
+ auto FailureCB = [this](const Twine &Message) {
+ this->CheckFailed(Message);
+ };
+ ConvergenceVerifyHelper.initialize(OS, FailureCB, F);
+
Broken = false;
// FIXME: We strip const here because the inst visitor strips const.
visit(const_cast<Function &>(F));
verifySiblingFuncletUnwinds();
- if (ConvergenceKind == ControlledConvergence)
- verifyConvergenceControl(const_cast<Function &>(F));
+
+ if (ConvergenceVerifyHelper.sawTokens())
+ ConvergenceVerifyHelper.verify(DT);
+
InstsInThisBlock.clear();
DebugFnArgs.clear();
LandingPadResultTy = nullptr;
@@ -424,7 +422,6 @@ public:
SiblingFuncletInfo.clear();
verifyNoAliasScopeDecl();
NoAliasScopeDecls.clear();
- ConvergenceKind = NoConvergence;
return !Broken;
}
@@ -483,6 +480,7 @@ private:
void visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs);
void visitMetadataAsValue(const MetadataAsValue &MD, Function *F);
void visitValueAsMetadata(const ValueAsMetadata &MD, Function *F);
+ void visitDIArgList(const DIArgList &AL, Function *F);
void visitComdat(const Comdat &C);
void visitModuleIdents();
void visitModuleCommandLines();
@@ -600,7 +598,6 @@ private:
void verifyStatepoint(const CallBase &Call);
void verifyFrameRecoverIndices();
void verifySiblingFuncletUnwinds();
- void verifyConvergenceControl(Function &F);
void verifyFragmentExpression(const DbgVariableIntrinsic &I);
template <typename ValueOrMetadata>
@@ -620,9 +617,6 @@ private:
void verifyAttachedCallBundle(const CallBase &Call,
const OperandBundleUse &BU);
- /// Verify all-or-nothing property of DIFile source attribute within a CU.
- void verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F);
-
/// Verify the llvm.experimental.noalias.scope.decl declarations
void verifyNoAliasScopeDecl();
};
@@ -797,8 +791,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getValueType())) {
StructType *STy = dyn_cast<StructType>(ATy->getElementType());
PointerType *FuncPtrTy =
- FunctionType::get(Type::getVoidTy(Context), false)->
- getPointerTo(DL.getProgramAddressSpace());
+ PointerType::get(Context, DL.getProgramAddressSpace());
Check(STy && (STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
STy->getTypeAtIndex(1) == FuncPtrTy,
@@ -852,17 +845,9 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
}
// Scalable vectors cannot be global variables, since we don't know
- // the runtime size. If the global is an array containing scalable vectors,
- // that will be caught by the isValidElementType methods in StructType or
- // ArrayType instead.
- Check(!isa<ScalableVectorType>(GV.getValueType()),
- "Globals cannot contain scalable vectors", &GV);
-
- if (auto *STy = dyn_cast<StructType>(GV.getValueType())) {
- SmallPtrSet<Type *, 4> Visited;
- Check(!STy->containsScalableVectorType(&Visited),
- "Globals cannot contain scalable vectors", &GV);
- }
+ // the runtime size.
+ Check(!GV.getValueType()->isScalableTy(),
+ "Globals cannot contain scalable types", &GV);
// Check if it's a target extension type that disallows being used as a
// global.
@@ -973,7 +958,7 @@ void Verifier::visitGlobalIFunc(const GlobalIFunc &GI) {
void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
// There used to be various other llvm.dbg.* nodes, but we don't support
// upgrading them and we want to reserve the namespace for future uses.
- if (NMD.getName().startswith("llvm.dbg."))
+ if (NMD.getName().starts_with("llvm.dbg."))
CheckDI(NMD.getName() == "llvm.dbg.cu",
"unrecognized named metadata node in the llvm.dbg namespace", &NMD);
for (const MDNode *MD : NMD.operands()) {
@@ -1056,6 +1041,11 @@ void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) {
Check(ActualF == F, "function-local metadata used in wrong function", L);
}
+void Verifier::visitDIArgList(const DIArgList &AL, Function *F) {
+ for (const ValueAsMetadata *VAM : AL.getArgs())
+ visitValueAsMetadata(*VAM, F);
+}
+
void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) {
Metadata *MD = MDV.getMetadata();
if (auto *N = dyn_cast<MDNode>(MD)) {
@@ -1070,6 +1060,9 @@ void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) {
if (auto *V = dyn_cast<ValueAsMetadata>(MD))
visitValueAsMetadata(*V, F);
+
+ if (auto *AL = dyn_cast<DIArgList>(MD))
+ visitDIArgList(*AL, F);
}
static bool isType(const Metadata *MD) { return !MD || isa<DIType>(MD); }
@@ -1182,6 +1175,7 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) {
N.getTag() == dwarf::DW_TAG_restrict_type ||
N.getTag() == dwarf::DW_TAG_atomic_type ||
N.getTag() == dwarf::DW_TAG_member ||
+ (N.getTag() == dwarf::DW_TAG_variable && N.isStaticMember()) ||
N.getTag() == dwarf::DW_TAG_inheritance ||
N.getTag() == dwarf::DW_TAG_friend ||
N.getTag() == dwarf::DW_TAG_set_type,
@@ -1297,6 +1291,10 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
CheckDI(N.getTag() == dwarf::DW_TAG_array_type,
"rank can only appear in array type");
}
+
+ if (N.getTag() == dwarf::DW_TAG_array_type) {
+ CheckDI(N.getRawBaseType(), "array types must have a base type", &N);
+ }
}
void Verifier::visitDISubroutineType(const DISubroutineType &N) {
@@ -1348,8 +1346,6 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) {
CurrentSourceLang = (dwarf::SourceLanguage)N.getSourceLanguage();
- verifySourceDebugInfo(N, *N.getFile());
-
CheckDI((N.getEmissionKind() <= DICompileUnit::LastEmissionKind),
"invalid emission kind", &N);
@@ -1429,8 +1425,15 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
CheckDI(N.isDistinct(), "subprogram definitions must be distinct", &N);
CheckDI(Unit, "subprogram definitions must have a compile unit", &N);
CheckDI(isa<DICompileUnit>(Unit), "invalid unit type", &N, Unit);
- if (N.getFile())
- verifySourceDebugInfo(*N.getUnit(), *N.getFile());
+ // There's no good way to cross the CU boundary to insert a nested
+ // DISubprogram definition in one CU into a type defined in another CU.
+ auto *CT = dyn_cast_or_null<DICompositeType>(N.getRawScope());
+ if (CT && CT->getRawIdentifier() &&
+ M.getContext().isODRUniquingDebugTypes())
+ CheckDI(N.getDeclaration(),
+ "definition subprograms cannot be nested within DICompositeType "
+ "when enabling ODR",
+ &N);
} else {
// Subprogram declarations (part of the type hierarchy).
CheckDI(!Unit, "subprogram declarations must not have a compile unit", &N);
@@ -1508,13 +1511,6 @@ void Verifier::visitDIMacroFile(const DIMacroFile &N) {
}
}
-void Verifier::visitDIArgList(const DIArgList &N) {
- CheckDI(!N.getNumOperands(),
- "DIArgList should have no operands other than a list of "
- "ValueAsMetadata",
- &N);
-}
-
void Verifier::visitDIModule(const DIModule &N) {
CheckDI(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N);
CheckDI(!N.getName().empty(), "anonymous module", &N);
@@ -1928,6 +1924,14 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
"'noinline and alwaysinline' are incompatible!",
V);
+ Check(!(Attrs.hasAttribute(Attribute::Writable) &&
+ Attrs.hasAttribute(Attribute::ReadNone)),
+ "Attributes writable and readnone are incompatible!", V);
+
+ Check(!(Attrs.hasAttribute(Attribute::Writable) &&
+ Attrs.hasAttribute(Attribute::ReadOnly)),
+ "Attributes writable and readonly are incompatible!", V);
+
AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
for (Attribute Attr : Attrs) {
if (!Attr.isStringAttribute() &&
@@ -2023,6 +2027,17 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
"' does not apply to function return values",
V);
+ unsigned MaxParameterWidth = 0;
+ auto GetMaxParameterWidth = [&MaxParameterWidth](Type *Ty) {
+ if (Ty->isVectorTy()) {
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ unsigned Size = VT->getPrimitiveSizeInBits().getFixedValue();
+ if (Size > MaxParameterWidth)
+ MaxParameterWidth = Size;
+ }
+ }
+ };
+ GetMaxParameterWidth(FT->getReturnType());
verifyParameterAttrs(RetAttrs, FT->getReturnType(), V);
// Verify parameter attributes.
@@ -2041,6 +2056,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
}
verifyParameterAttrs(ArgAttrs, Ty, V);
+ GetMaxParameterWidth(Ty);
if (ArgAttrs.hasAttribute(Attribute::Nest)) {
Check(!SawNest, "More than one parameter has attribute nest!", V);
@@ -2107,8 +2123,24 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
Check(!Attrs.hasFnAttr(Attribute::MinSize),
"Attributes 'minsize and optnone' are incompatible!", V);
+
+ Check(!Attrs.hasFnAttr(Attribute::OptimizeForDebugging),
+ "Attributes 'optdebug and optnone' are incompatible!", V);
+ }
+
+ if (Attrs.hasFnAttr(Attribute::OptimizeForDebugging)) {
+ Check(!Attrs.hasFnAttr(Attribute::OptimizeForSize),
+ "Attributes 'optsize and optdebug' are incompatible!", V);
+
+ Check(!Attrs.hasFnAttr(Attribute::MinSize),
+ "Attributes 'minsize and optdebug' are incompatible!", V);
}
+ Check(!Attrs.hasAttrSomewhere(Attribute::Writable) ||
+ isModSet(Attrs.getMemoryEffects().getModRef(IRMemLocation::ArgMem)),
+ "Attribute writable and memory without argmem: write are incompatible!",
+ V);
+
if (Attrs.hasFnAttr("aarch64_pstate_sm_enabled")) {
Check(!Attrs.hasFnAttr("aarch64_pstate_sm_compatible"),
"Attributes 'aarch64_pstate_sm_enabled and "
@@ -2196,9 +2228,37 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
CheckFailed("invalid value for 'frame-pointer' attribute: " + FP, V);
}
+ // Check EVEX512 feature.
+ if (MaxParameterWidth >= 512 && Attrs.hasFnAttr("target-features") &&
+ TT.isX86()) {
+ StringRef TF = Attrs.getFnAttr("target-features").getValueAsString();
+ Check(!TF.contains("+avx512f") || !TF.contains("-evex512"),
+ "512-bit vector arguments require 'evex512' for AVX512", V);
+ }
+
checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-prefix", V);
checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-entry", V);
checkUnsignedBaseTenFuncAttr(Attrs, "warn-stack-size", V);
+
+ if (auto A = Attrs.getFnAttr("sign-return-address"); A.isValid()) {
+ StringRef S = A.getValueAsString();
+ if (S != "none" && S != "all" && S != "non-leaf")
+ CheckFailed("invalid value for 'sign-return-address' attribute: " + S, V);
+ }
+
+ if (auto A = Attrs.getFnAttr("sign-return-address-key"); A.isValid()) {
+ StringRef S = A.getValueAsString();
+ if (S != "a_key" && S != "b_key")
+ CheckFailed("invalid value for 'sign-return-address-key' attribute: " + S,
+ V);
+ }
+
+ if (auto A = Attrs.getFnAttr("branch-target-enforcement"); A.isValid()) {
+ StringRef S = A.getValueAsString();
+ if (S != "true" && S != "false")
+ CheckFailed(
+ "invalid value for 'branch-target-enforcement' attribute: " + S, V);
+ }
}
void Verifier::verifyFunctionMetadata(
@@ -2526,118 +2586,6 @@ void Verifier::verifySiblingFuncletUnwinds() {
}
}
-void Verifier::verifyConvergenceControl(Function &F) {
- DenseMap<BasicBlock *, SmallVector<CallBase *, 8>> LiveTokenMap;
- DenseMap<const Cycle *, const CallBase *> CycleHearts;
-
- // Just like the DominatorTree, compute the CycleInfo locally so that we
- // can run the verifier outside of a pass manager and we don't rely on
- // potentially out-dated analysis results.
- CycleInfo CI;
- CI.compute(F);
-
- auto checkBundle = [&](OperandBundleUse &Bundle, CallBase *CB,
- SmallVectorImpl<CallBase *> &LiveTokens) {
- Check(Bundle.Inputs.size() == 1 && Bundle.Inputs[0]->getType()->isTokenTy(),
- "The 'convergencectrl' bundle requires exactly one token use.", CB);
-
- Value *Token = Bundle.Inputs[0].get();
- auto *Def = dyn_cast<CallBase>(Token);
- Check(Def != nullptr,
- "Convergence control tokens can only be produced by call "
- "instructions.",
- Token);
-
- Check(llvm::is_contained(LiveTokens, Token),
- "Convergence region is not well-nested.", Token, CB);
-
- while (LiveTokens.back() != Token)
- LiveTokens.pop_back();
-
- // Check static rules about cycles.
- auto *BB = CB->getParent();
- auto *BBCycle = CI.getCycle(BB);
- if (!BBCycle)
- return;
-
- BasicBlock *DefBB = Def->getParent();
- if (DefBB == BB || BBCycle->contains(DefBB)) {
- // degenerate occurrence of a loop intrinsic
- return;
- }
-
- auto *II = dyn_cast<IntrinsicInst>(CB);
- Check(II &&
- II->getIntrinsicID() == Intrinsic::experimental_convergence_loop,
- "Convergence token used by an instruction other than "
- "llvm.experimental.convergence.loop in a cycle that does "
- "not contain the token's definition.",
- CB, CI.print(BBCycle));
-
- while (true) {
- auto *Parent = BBCycle->getParentCycle();
- if (!Parent || Parent->contains(DefBB))
- break;
- BBCycle = Parent;
- };
-
- Check(BBCycle->isReducible() && BB == BBCycle->getHeader(),
- "Cycle heart must dominate all blocks in the cycle.", CB, BB,
- CI.print(BBCycle));
- Check(!CycleHearts.count(BBCycle),
- "Two static convergence token uses in a cycle that does "
- "not contain either token's definition.",
- CB, CycleHearts[BBCycle], CI.print(BBCycle));
- CycleHearts[BBCycle] = CB;
- };
-
- ReversePostOrderTraversal<Function *> RPOT(&F);
- SmallVector<CallBase *, 8> LiveTokens;
- for (BasicBlock *BB : RPOT) {
- LiveTokens.clear();
- auto LTIt = LiveTokenMap.find(BB);
- if (LTIt != LiveTokenMap.end()) {
- LiveTokens = std::move(LTIt->second);
- LiveTokenMap.erase(LTIt);
- }
-
- for (Instruction &I : *BB) {
- CallBase *CB = dyn_cast<CallBase>(&I);
- if (!CB)
- continue;
-
- auto Bundle = CB->getOperandBundle(LLVMContext::OB_convergencectrl);
- if (Bundle)
- checkBundle(*Bundle, CB, LiveTokens);
-
- if (CB->getType()->isTokenTy())
- LiveTokens.push_back(CB);
- }
-
- // Propagate token liveness
- for (BasicBlock *Succ : successors(BB)) {
- DomTreeNode *SuccNode = DT.getNode(Succ);
- LTIt = LiveTokenMap.find(Succ);
- if (LTIt == LiveTokenMap.end()) {
- // We're the first predecessor: all tokens which dominate the
- // successor are live for now.
- LTIt = LiveTokenMap.try_emplace(Succ).first;
- for (CallBase *LiveToken : LiveTokens) {
- if (!DT.dominates(DT.getNode(LiveToken->getParent()), SuccNode))
- break;
- LTIt->second.push_back(LiveToken);
- }
- } else {
- // Compute the intersection of live tokens.
- auto It = llvm::partition(LTIt->second, [&LiveTokens](CallBase *Token) {
- return llvm::is_contained(LiveTokens, Token);
- });
- LTIt->second.erase(It, LTIt->second.end());
- }
- }
- }
-}
-
// visitFunction - Verify that a function is ok.
//
void Verifier::visitFunction(const Function &F) {
@@ -2958,6 +2906,7 @@ void Verifier::visitFunction(const Function &F) {
//
void Verifier::visitBasicBlock(BasicBlock &BB) {
InstsInThisBlock.clear();
+ ConvergenceVerifyHelper.visit(BB);
// Ensure that basic blocks have terminators!
Check(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
@@ -3007,6 +2956,14 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
{
Check(I.getParent() == &BB, "Instruction has bogus parent pointer!");
}
+
+ // Confirm that no issues arise from the debug program.
+ if (BB.IsNewDbgInfoFormat) {
+ // Configure the validate function to not fire assertions, instead print
+ // errors and return true if there's a problem.
+ bool RetVal = BB.validateDbgValues(false, true, OS);
+ Check(!RetVal, "Invalid configuration of new-debug-info data found");
+ }
}
void Verifier::visitTerminator(Instruction &I) {
@@ -3366,20 +3323,6 @@ void Verifier::visitPHINode(PHINode &PN) {
visitInstruction(PN);
}
-static bool isControlledConvergent(const CallBase &Call) {
- if (Call.getOperandBundle(LLVMContext::OB_convergencectrl))
- return true;
- if (const auto *F = dyn_cast<Function>(Call.getCalledOperand())) {
- switch (F->getIntrinsicID()) {
- case Intrinsic::experimental_convergence_anchor:
- case Intrinsic::experimental_convergence_entry:
- case Intrinsic::experimental_convergence_loop:
- return true;
- }
- }
- return false;
-}
-
void Verifier::visitCallBase(CallBase &Call) {
Check(Call.getCalledOperand()->getType()->isPointerTy(),
"Called function must be a pointer!", Call);
@@ -3673,22 +3616,7 @@ void Verifier::visitCallBase(CallBase &Call) {
if (Call.isInlineAsm())
verifyInlineAsmCall(Call);
- if (isControlledConvergent(Call)) {
- Check(Call.isConvergent(),
- "Expected convergent attribute on a controlled convergent call.",
- Call);
- Check(ConvergenceKind != UncontrolledConvergence,
- "Cannot mix controlled and uncontrolled convergence in the same "
- "function.",
- Call);
- ConvergenceKind = ControlledConvergence;
- } else if (Call.isConvergent()) {
- Check(ConvergenceKind != ControlledConvergence,
- "Cannot mix controlled and uncontrolled convergence in the same "
- "function.",
- Call);
- ConvergenceKind = UncontrolledConvergence;
- }
+ ConvergenceVerifyHelper.visit(Call);
visitInstruction(Call);
}
@@ -5444,7 +5372,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2,
"rw argument to llvm.prefetch must be 0-1", Call);
Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
- "locality argument to llvm.prefetch must be 0-4", Call);
+ "locality argument to llvm.prefetch must be 0-3", Call);
Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 2,
"cache type argument to llvm.prefetch must be 0-1", Call);
break;
@@ -5777,12 +5705,30 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
}
break;
}
- case Intrinsic::lround:
- case Intrinsic::llround:
case Intrinsic::lrint:
case Intrinsic::llrint: {
Type *ValTy = Call.getArgOperand(0)->getType();
Type *ResultTy = Call.getType();
+ Check(
+ ValTy->isFPOrFPVectorTy() && ResultTy->isIntOrIntVectorTy(),
+ "llvm.lrint, llvm.llrint: argument must be floating-point or vector "
+ "of floating-points, and result must be integer or vector of integers",
+ &Call);
+ Check(ValTy->isVectorTy() == ResultTy->isVectorTy(),
+ "llvm.lrint, llvm.llrint: argument and result disagree on vector use",
+ &Call);
+ if (ValTy->isVectorTy()) {
+ Check(cast<VectorType>(ValTy)->getElementCount() ==
+ cast<VectorType>(ResultTy)->getElementCount(),
+ "llvm.lrint, llvm.llrint: argument must be same length as result",
+ &Call);
+ }
+ break;
+ }
+ case Intrinsic::lround:
+ case Intrinsic::llround: {
+ Type *ValTy = Call.getArgOperand(0)->getType();
+ Type *ResultTy = Call.getType();
Check(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
"Intrinsic does not support vectors", &Call);
break;
@@ -5964,7 +5910,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"vector_extract index must be a constant multiple of "
"the result type's known minimum vector length.");
- // If this extraction is not the 'mixed' case where a fixed vector is is
+ // If this extraction is not the 'mixed' case where a fixed vector is
// extracted from a scalable vector, ensure that the extraction does not
// overrun the parent vector.
if (VecEC.isScalable() == ResultEC.isScalable()) {
@@ -6053,27 +5999,66 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
&Call);
break;
}
+
+ Check(Call.paramHasAttr(2, Attribute::InReg),
+ "SGPR arguments must have the `inreg` attribute", &Call);
+ Check(!Call.paramHasAttr(3, Attribute::InReg),
+ "VGPR arguments must not have the `inreg` attribute", &Call);
+ break;
+ }
+ case Intrinsic::amdgcn_set_inactive_chain_arg: {
+ auto CallerCC = Call.getCaller()->getCallingConv();
+ switch (CallerCC) {
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ break;
+ default:
+ CheckFailed("Intrinsic can only be used from functions with the "
+ "amdgpu_cs_chain or amdgpu_cs_chain_preserve "
+ "calling conventions",
+ &Call);
+ break;
+ }
+
+ unsigned InactiveIdx = 1;
+ Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg),
+ "Value for inactive lanes must not have the `inreg` attribute",
+ &Call);
+ Check(isa<Argument>(Call.getArgOperand(InactiveIdx)),
+ "Value for inactive lanes must be a function argument", &Call);
+ Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(),
+ "Value for inactive lanes must be a VGPR function argument", &Call);
break;
}
case Intrinsic::experimental_convergence_entry:
- Check(Call.getFunction()->isConvergent(),
- "Entry intrinsic can occur only in a convergent function.", &Call);
- Check(Call.getParent()->isEntryBlock(),
- "Entry intrinsic must occur in the entry block.", &Call);
- Check(Call.getParent()->getFirstNonPHI() == &Call,
- "Entry intrinsic must occur at the start of the basic block.", &Call);
LLVM_FALLTHROUGH;
case Intrinsic::experimental_convergence_anchor:
- Check(!Call.getOperandBundle(LLVMContext::OB_convergencectrl),
- "Entry or anchor intrinsic must not have a convergencectrl bundle.",
- &Call);
break;
case Intrinsic::experimental_convergence_loop:
- Check(Call.getOperandBundle(LLVMContext::OB_convergencectrl),
- "Loop intrinsic must have a convergencectrl bundle.", &Call);
- Check(Call.getParent()->getFirstNonPHI() == &Call,
- "Loop intrinsic must occur at the start of the basic block.", &Call);
break;
+ case Intrinsic::ptrmask: {
+ Type *Ty0 = Call.getArgOperand(0)->getType();
+ Type *Ty1 = Call.getArgOperand(1)->getType();
+ Check(Ty0->isPtrOrPtrVectorTy(),
+ "llvm.ptrmask intrinsic first argument must be pointer or vector "
+ "of pointers",
+ &Call);
+ Check(
+ Ty0->isVectorTy() == Ty1->isVectorTy(),
+ "llvm.ptrmask intrinsic arguments must be both scalars or both vectors",
+ &Call);
+ if (Ty0->isVectorTy())
+ Check(cast<VectorType>(Ty0)->getElementCount() ==
+ cast<VectorType>(Ty1)->getElementCount(),
+ "llvm.ptrmask intrinsic arguments must have the same number of "
+ "elements",
+ &Call);
+ Check(DL.getIndexTypeSizeInBits(Ty0) == Ty1->getScalarSizeInBits(),
+ "llvm.ptrmask intrinsic second argument bitwidth must match "
+ "pointer index type size of first argument",
+ &Call);
+ break;
+ }
};
// Verify that there aren't any unmediated control transfers between funclets.
@@ -6220,6 +6205,11 @@ void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) {
Check(CmpInst::isIntPredicate(Pred),
"invalid predicate for VP integer comparison intrinsic", &VPI);
}
+ if (VPI.getIntrinsicID() == Intrinsic::vp_is_fpclass) {
+ auto TestMask = cast<ConstantInt>(VPI.getOperand(1));
+ Check((TestMask->getZExtValue() & ~static_cast<unsigned>(fcAllFlags)) == 0,
+ "unsupported bits for llvm.vp.is.fpclass test mask");
+ }
}
void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
@@ -6532,12 +6522,16 @@ void Verifier::verifyNotEntryValue(const DbgVariableIntrinsic &I) {
if (!E || !E->isValid())
return;
- // We allow EntryValues for swift async arguments, as they have an
- // ABI-guarantee to be turned into a specific register.
- if (isa<ValueAsMetadata>(I.getRawLocation()))
- if (auto *ArgLoc = dyn_cast_or_null<Argument>(I.getVariableLocationOp(0));
+ if (isa<ValueAsMetadata>(I.getRawLocation())) {
+ Value *VarValue = I.getVariableLocationOp(0);
+ if (isa<UndefValue>(VarValue) || isa<PoisonValue>(VarValue))
+ return;
+ // We allow EntryValues for swift async arguments, as they have an
+ // ABI-guarantee to be turned into a specific register.
+ if (auto *ArgLoc = dyn_cast_or_null<Argument>(VarValue);
ArgLoc && ArgLoc->hasAttribute(Attribute::SwiftAsync))
return;
+ }
CheckDI(!E->isEntryValue(),
"Entry values are only allowed in MIR unless they target a "
@@ -6604,14 +6598,6 @@ void Verifier::verifyAttachedCallBundle(const CallBase &Call,
}
}
-void Verifier::verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F) {
- bool HasSource = F.getSource().has_value();
- if (!HasSourceDebugInfo.count(&U))
- HasSourceDebugInfo[&U] = HasSource;
- CheckDI(HasSource == HasSourceDebugInfo[&U],
- "inconsistent use of embedded source");
-}
-
void Verifier::verifyNoAliasScopeDecl() {
if (NoAliasScopeDecls.empty())
return;
diff --git a/contrib/llvm-project/llvm/lib/IRPrinter/IRPrintingPasses.cpp b/contrib/llvm-project/llvm/lib/IRPrinter/IRPrintingPasses.cpp
index 9552ce3862c5..52b242b4dcd5 100644
--- a/contrib/llvm-project/llvm/lib/IRPrinter/IRPrintingPasses.cpp
+++ b/contrib/llvm-project/llvm/lib/IRPrinter/IRPrintingPasses.cpp
@@ -31,6 +31,12 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
EmitSummaryIndex(EmitSummaryIndex) {}
PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &AM) {
+ // RemoveDIs: there's no textual representation of the DPValue debug-info,
+ // convert to dbg.values before writing out.
+ bool ShouldConvert = M.IsNewDbgInfoFormat;
+ if (ShouldConvert)
+ M.convertFromNewDbgValues();
+
if (llvm::isFunctionInPrintList("*")) {
if (!Banner.empty())
OS << Banner << "\n";
@@ -53,10 +59,13 @@ PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &AM) {
: nullptr;
if (Index) {
if (Index->modulePaths().empty())
- Index->addModule("", 0);
+ Index->addModule("");
Index->print(OS);
}
+ if (ShouldConvert)
+ M.convertToNewDbgValues();
+
return PreservedAnalyses::all();
}
@@ -66,11 +75,21 @@ PrintFunctionPass::PrintFunctionPass(raw_ostream &OS, const std::string &Banner)
PreservedAnalyses PrintFunctionPass::run(Function &F,
FunctionAnalysisManager &) {
+ // RemoveDIs: there's no textual representation of the DPValue debug-info,
+ // convert to dbg.values before writing out.
+ bool ShouldConvert = F.IsNewDbgInfoFormat;
+ if (ShouldConvert)
+ F.convertFromNewDbgValues();
+
if (isFunctionInPrintList(F.getName())) {
if (forcePrintModuleIR())
OS << Banner << " (function: " << F.getName() << ")\n" << *F.getParent();
else
OS << Banner << '\n' << static_cast<Value &>(F);
}
+
+ if (ShouldConvert)
+ F.convertToNewDbgValues();
+
return PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp
index 49ed27e265d4..c1256563d0d6 100644
--- a/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp
+++ b/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp
@@ -57,7 +57,7 @@ static void initELFHeader(typename ELFT::Ehdr &ElfHeader, uint16_t Machine) {
ElfHeader.e_ident[EI_MAG2] = ElfMagic[EI_MAG2];
ElfHeader.e_ident[EI_MAG3] = ElfMagic[EI_MAG3];
ElfHeader.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32;
- bool IsLittleEndian = ELFT::TargetEndianness == support::little;
+ bool IsLittleEndian = ELFT::TargetEndianness == llvm::endianness::little;
ElfHeader.e_ident[EI_DATA] = IsLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
ElfHeader.e_ident[EI_VERSION] = EV_CURRENT;
ElfHeader.e_ident[EI_OSABI] = ELFOSABI_NONE;
diff --git a/contrib/llvm-project/llvm/lib/InterfaceStub/IFSHandler.cpp b/contrib/llvm-project/llvm/lib/InterfaceStub/IFSHandler.cpp
index aa5817dceed5..da46592bd381 100644
--- a/contrib/llvm-project/llvm/lib/InterfaceStub/IFSHandler.cpp
+++ b/contrib/llvm-project/llvm/lib/InterfaceStub/IFSHandler.cpp
@@ -167,7 +167,7 @@ template <> struct MappingTraits<IFSStubTriple> {
bool usesTriple(StringRef Buf) {
for (line_iterator I(MemoryBufferRef(Buf, "ELFStub")); !I.is_at_eof(); ++I) {
StringRef Line = (*I).trim();
- if (Line.startswith("Target:")) {
+ if (Line.starts_with("Target:")) {
if (Line == "Target:" || Line.contains("{")) {
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTO.cpp b/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
index bc8abb751221..05836fd28f52 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
@@ -74,7 +74,6 @@ namespace llvm {
cl::opt<bool> EnableLTOInternalization(
"enable-lto-internalization", cl::init(true), cl::Hidden,
cl::desc("Enable global value internalization in LTO"));
-}
/// Indicate we are linking with an allocator that supports hot/cold operator
/// new interfaces.
@@ -82,6 +81,7 @@ extern cl::opt<bool> SupportsHotColdNew;
/// Enable MemProf context disambiguation for thin link.
extern cl::opt<bool> EnableMemProfContextDisambiguation;
+} // namespace llvm
// Computes a unique hash for the Module considering the current list of
// export/import and other global analysis results.
@@ -142,8 +142,8 @@ void llvm::computeLTOCacheKey(
AddUnsigned(-1);
for (const auto &S : Conf.MllvmArgs)
AddString(S);
- AddUnsigned(Conf.CGOptLevel);
- AddUnsigned(Conf.CGFileType);
+ AddUnsigned(static_cast<int>(Conf.CGOptLevel));
+ AddUnsigned(static_cast<int>(Conf.CGFileType));
AddUnsigned(Conf.OptLevel);
AddUnsigned(Conf.Freestanding);
AddString(Conf.OptPipeline);
@@ -178,12 +178,12 @@ void llvm::computeLTOCacheKey(
ImportMapIteratorTy ModIt;
const ModuleSummaryIndex::ModuleInfo *ModInfo;
- StringRef getIdentifier() const { return ModIt->getKey(); }
+ StringRef getIdentifier() const { return ModIt->getFirst(); }
const FunctionImporter::FunctionsToImportTy &getFunctions() const {
return ModIt->second;
}
- const ModuleHash &getHash() const { return ModInfo->second.second; }
+ const ModuleHash &getHash() const { return ModInfo->second; }
};
std::vector<ImportModule> ImportModulesVector;
@@ -191,7 +191,7 @@ void llvm::computeLTOCacheKey(
for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end();
++It) {
- ImportModulesVector.push_back({It, Index.getModule(It->getKey())});
+ ImportModulesVector.push_back({It, Index.getModule(It->getFirst())});
}
// Order using module hash, to be both independent of module name and
// module order.
@@ -468,24 +468,13 @@ static void thinLTOInternalizeAndPromoteGUID(
if (!EnableLTOInternalization)
continue;
- // Ignore local and appending linkage values since the linker
- // doesn't resolve them (and there is no need to internalize if this is
- // already internal).
- if (GlobalValue::isLocalLinkage(S->linkage()) ||
- S->linkage() == GlobalValue::AppendingLinkage)
- continue;
-
- // We can't internalize available_externally globals because this
- // can break function pointer equality.
- if (S->linkage() == GlobalValue::AvailableExternallyLinkage)
- continue;
-
- bool IsPrevailing = isPrevailing(VI.getGUID(), S.get());
-
- if (GlobalValue::isInterposableLinkage(S->linkage()) && !IsPrevailing)
+ // Non-exported values with external linkage can be internalized.
+ if (GlobalValue::isExternalLinkage(S->linkage())) {
+ S->setLinkage(GlobalValue::InternalLinkage);
continue;
+ }
- // Non-exported functions and variables with linkonce_odr or weak_odr
+ // Non-exported function and variable definitions with a weak-for-linker
// linkage can be internalized in certain cases. The minimum legality
// requirements would be that they are not address taken to ensure that we
// don't break pointer equality checks, and that variables are either read-
@@ -494,7 +483,7 @@ static void thinLTOInternalizeAndPromoteGUID(
// (which is how this is guaranteed for variables, when analyzing whether
// they are read or write-only).
//
- // However, we only get to this code for weak/linkonce ODR values in one of
+ // However, we only get to this code for weak-for-linkage values in one of
// two cases:
// 1) The prevailing copy is not in IR (it is in native code).
// 2) The prevailing copy in IR is not exported from its module.
@@ -506,10 +495,10 @@ static void thinLTOInternalizeAndPromoteGUID(
// duplicate linkonce_odr copies as exported via the tool, so we need
// to handle that case below by checking the number of copies.
//
- // Generally, we only want to internalize a linkonce/weak ODR value in case
+ // Generally, we only want to internalize a weak-for-linker value in case
// 2, because in case 1 we cannot see how the value is used to know if it
// is read or write-only. We also don't want to bloat the binary with
- // multiple internalized copies of non-prevailing linkonce_odr functions.
+ // multiple internalized copies of non-prevailing linkonce/weak functions.
// Note if we don't internalize, we will convert non-prevailing copies to
// available_externally anyway, so that we drop them after inlining. The
// only reason to internalize such a function is if we indeed have a single
@@ -520,18 +509,16 @@ static void thinLTOInternalizeAndPromoteGUID(
// already perform this elsewhere in the ThinLTO backend handling for
// read or write-only variables (processGlobalForThinLTO).
//
- // Therefore, only internalize linkonce/weak ODR if there is a single copy,
- // that is prevailing in this IR module. We can do so aggressively, without
+ // Therefore, only internalize linkonce/weak if there is a single copy, that
+ // is prevailing in this IR module. We can do so aggressively, without
// requiring the address to be insignificant, or that a variable be read or
// write-only.
- if ((S->linkage() == GlobalValue::WeakODRLinkage ||
- S->linkage() == GlobalValue::LinkOnceODRLinkage) &&
- // We can have only one copy in ThinLTO that isn't prevailing, if the
- // prevailing copy is in a native object.
- (!IsPrevailing || ExternallyVisibleCopies > 1))
+ if (!GlobalValue::isWeakForLinker(S->linkage()) ||
+ GlobalValue::isExternalWeakLinkage(S->linkage()))
continue;
- S->setLinkage(GlobalValue::InternalLinkage);
+ if (isPrevailing(VI.getGUID(), S.get()) && ExternallyVisibleCopies == 1)
+ S->setLinkage(GlobalValue::InternalLinkage);
}
}
@@ -623,13 +610,7 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
- StringRef Name = Sym.getName();
- // Strip the __imp_ prefix from COFF dllimport symbols (similar to the
- // way they are handled by lld), otherwise we can end up with two
- // global resolutions (one with and one for a copy of the symbol without).
- if (TT.isOSBinFormatCOFF() && Name.startswith("__imp_"))
- Name = Name.substr(strlen("__imp_"));
- auto &GlobalRes = GlobalResolutions[Name];
+ auto &GlobalRes = GlobalResolutions[Sym.getName()];
GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
if (Res.Prevailing) {
assert(!GlobalRes.Prevailing &&
@@ -778,7 +759,7 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
// Regular LTO module summaries are added to a dummy module that represents
// the combined regular LTO module.
- if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, "", -1ull))
+ if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, ""))
return Err;
RegularLTO.ModsWithSummaries.push_back(std::move(*ModOrErr));
return Error::success();
@@ -1026,16 +1007,14 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
}
}
- uint64_t ModuleId = ThinLTO.ModuleMap.size();
if (Error Err =
BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
- ModuleId, [&](GlobalValue::GUID GUID) {
+ [&](GlobalValue::GUID GUID) {
return ThinLTO.PrevailingModuleForGUID[GUID] ==
BM.getModuleIdentifier();
}))
return Err;
- LLVM_DEBUG(dbgs() << "Module " << ModuleId << ": " << BM.getModuleIdentifier()
- << "\n");
+ LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n");
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
@@ -1275,7 +1254,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
ConstantAggregateZero::get(Ty), "");
GV->setAlignment(I.second.Alignment);
if (OldGV) {
- OldGV->replaceAllUsesWith(ConstantExpr::getBitCast(GV, OldGV->getType()));
+ OldGV->replaceAllUsesWith(GV);
GV->takeName(OldGV);
OldGV->eraseFromParent();
} else {
@@ -1285,13 +1264,27 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex);
+ bool WholeProgramVisibilityEnabledInLTO =
+ Conf.HasWholeProgramVisibility &&
+ // If validation is enabled, upgrade visibility only when all vtables
+ // have typeinfos.
+ (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
+
+ // This returns true when the name is local or not defined. Locals are
+ // expected to be handled separately.
+ auto IsVisibleToRegularObj = [&](StringRef name) {
+ auto It = GlobalResolutions.find(name);
+ return (It == GlobalResolutions.end() || It->second.VisibleOutsideSummary);
+ };
+
// If allowed, upgrade public vcall visibility metadata to linkage unit
// visibility before whole program devirtualization in the optimizer.
- updateVCallVisibilityInModule(*RegularLTO.CombinedModule,
- Conf.HasWholeProgramVisibility,
- DynamicExportSymbols);
+ updateVCallVisibilityInModule(
+ *RegularLTO.CombinedModule, WholeProgramVisibilityEnabledInLTO,
+ DynamicExportSymbols, Conf.ValidateAllVtablesHaveTypeInfos,
+ IsVisibleToRegularObj);
updatePublicTypeTestCalls(*RegularLTO.CombinedModule,
- Conf.HasWholeProgramVisibility);
+ WholeProgramVisibilityEnabledInLTO);
if (Conf.PreOptModuleHook &&
!Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
@@ -1362,14 +1355,15 @@ class lto::ThinBackendProc {
protected:
const Config &Conf;
ModuleSummaryIndex &CombinedIndex;
- const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries;
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries;
lto::IndexWriteCallback OnWrite;
bool ShouldEmitImportsFiles;
public:
- ThinBackendProc(const Config &Conf, ModuleSummaryIndex &CombinedIndex,
- const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles)
+ ThinBackendProc(
+ const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles)
: Conf(Conf), CombinedIndex(CombinedIndex),
ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries),
OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles) {}
@@ -1426,7 +1420,7 @@ public:
InProcessThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
- const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
: ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
@@ -1548,13 +1542,15 @@ ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
lto::IndexWriteCallback OnWrite,
bool ShouldEmitIndexFiles,
bool ShouldEmitImportsFiles) {
- return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
- const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, FileCache Cache) {
- return std::make_unique<InProcessThinBackend>(
- Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream,
- Cache, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles);
- };
+ return
+ [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ AddStreamFn AddStream, FileCache Cache) {
+ return std::make_unique<InProcessThinBackend>(
+ Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
+ AddStream, Cache, OnWrite, ShouldEmitIndexFiles,
+ ShouldEmitImportsFiles);
+ };
}
// Given the original \p Path to an output file, replace any path
@@ -1584,7 +1580,7 @@ class WriteIndexesThinBackend : public ThinBackendProc {
public:
WriteIndexesThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
- const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
std::string OldPrefix, std::string NewPrefix,
std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
@@ -1632,13 +1628,15 @@ ThinBackend lto::createWriteIndexesThinBackend(
std::string OldPrefix, std::string NewPrefix,
std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) {
- return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
- const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, FileCache Cache) {
- return std::make_unique<WriteIndexesThinBackend>(
- Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix,
- NativeObjectPrefix, ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite);
- };
+ return
+ [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ AddStreamFn AddStream, FileCache Cache) {
+ return std::make_unique<WriteIndexesThinBackend>(
+ Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix,
+ NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles,
+ LinkedObjectsFile, OnWrite);
+ };
}
Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
@@ -1664,8 +1662,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
// Collect for each module the list of function it defines (GUID ->
// Summary).
- StringMap<GVSummaryMapTy>
- ModuleToDefinedGVSummaries(ThinLTO.ModuleMap.size());
+ DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(
+ ThinLTO.ModuleMap.size());
ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule(
ModuleToDefinedGVSummaries);
// Create entries for any modules that didn't have any GV summaries
@@ -1682,9 +1680,9 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
// Synthesize entry counts for functions in the CombinedIndex.
computeSyntheticCounts(ThinLTO.CombinedIndex);
- StringMap<FunctionImporter::ImportMapTy> ImportLists(
+ DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(
ThinLTO.ModuleMap.size());
- StringMap<FunctionImporter::ExportSetTy> ExportLists(
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(
ThinLTO.ModuleMap.size());
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
@@ -1693,13 +1691,38 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
std::set<GlobalValue::GUID> ExportedGUIDs;
- if (hasWholeProgramVisibility(Conf.HasWholeProgramVisibility))
+ bool WholeProgramVisibilityEnabledInLTO =
+ Conf.HasWholeProgramVisibility &&
+ // If validation is enabled, upgrade visibility only when all vtables
+ // have typeinfos.
+ (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
+ if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
ThinLTO.CombinedIndex.setWithWholeProgramVisibility();
+
+ // If we're validating, get the vtable symbols that should not be
+ // upgraded because they correspond to typeIDs outside of index-based
+ // WPD info.
+ DenseSet<GlobalValue::GUID> VisibleToRegularObjSymbols;
+ if (WholeProgramVisibilityEnabledInLTO &&
+ Conf.ValidateAllVtablesHaveTypeInfos) {
+ // This returns true when the name is local or not defined. Locals are
+ // expected to be handled separately.
+ auto IsVisibleToRegularObj = [&](StringRef name) {
+ auto It = GlobalResolutions.find(name);
+ return (It == GlobalResolutions.end() ||
+ It->second.VisibleOutsideSummary);
+ };
+
+ getVisibleToRegularObjVtableGUIDs(ThinLTO.CombinedIndex,
+ VisibleToRegularObjSymbols,
+ IsVisibleToRegularObj);
+ }
+
// If allowed, upgrade public vcall visibility to linkage unit visibility in
// the summaries before whole program devirtualization below.
- updateVCallVisibilityInIndex(ThinLTO.CombinedIndex,
- Conf.HasWholeProgramVisibility,
- DynamicExportSymbols);
+ updateVCallVisibilityInIndex(
+ ThinLTO.CombinedIndex, WholeProgramVisibilityEnabledInLTO,
+ DynamicExportSymbols, VisibleToRegularObjSymbols);
// Perform index-based WPD. This will return immediately if there are
// no index entries in the typeIdMetadata map (e.g. if we are instead
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp b/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp
index 29e288767608..ccc4276e36da 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp
@@ -225,7 +225,12 @@ createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) {
std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
CodeModel, Conf.CGOptLevel));
+
assert(TM && "Failed to create target machine");
+
+ if (std::optional<uint64_t> LargeDataThreshold = M.getLargeDataThreshold())
+ TM->setLargeDataThreshold(*LargeDataThreshold);
+
return TM;
}
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp
index 1402da7fbbd2..52d8fff14be9 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -200,7 +200,7 @@ void LTOCodeGenerator::setOptLevel(unsigned Level) {
Config.OptLevel = Level;
Config.PTO.LoopVectorization = Config.OptLevel > 1;
Config.PTO.SLPVectorization = Config.OptLevel > 1;
- std::optional<CodeGenOpt::Level> CGOptLevelOrNone =
+ std::optional<CodeGenOptLevel> CGOptLevelOrNone =
CodeGenOpt::getLevel(Config.OptLevel);
assert(CGOptLevelOrNone && "Unknown optimization level!");
Config.CGOptLevel = *CGOptLevelOrNone;
@@ -306,7 +306,7 @@ bool LTOCodeGenerator::runAIXSystemAssembler(SmallString<128> &AssemblyFile) {
bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
if (useAIXSystemAssembler())
- setFileType(CGFT_AssemblyFile);
+ setFileType(CodeGenFileType::AssemblyFile);
// make unique temp output file to put generated code
SmallString<128> Filename;
@@ -314,7 +314,8 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
auto AddStream =
[&](size_t Task,
const Twine &ModuleName) -> std::unique_ptr<CachedFileStream> {
- StringRef Extension(Config.CGFileType == CGFT_AssemblyFile ? "s" : "o");
+ StringRef Extension(
+ Config.CGFileType == CodeGenFileType::AssemblyFile ? "s" : "o");
int FD;
std::error_code EC =
@@ -604,11 +605,14 @@ bool LTOCodeGenerator::optimize() {
// pipeline run below.
updatePublicTypeTestCalls(*MergedModule,
/* WholeProgramVisibilityEnabledInLTO */ false);
- updateVCallVisibilityInModule(*MergedModule,
- /* WholeProgramVisibilityEnabledInLTO */ false,
- // FIXME: This needs linker information via a
- // TBD new interface.
- /* DynamicExportSymbols */ {});
+ updateVCallVisibilityInModule(
+ *MergedModule,
+ /* WholeProgramVisibilityEnabledInLTO */ false,
+ // FIXME: These need linker information via a
+ // TBD new interface.
+ /*DynamicExportSymbols=*/{},
+ /*ValidateAllVtablesHaveTypeInfos=*/false,
+ /*IsVisibleToRegularObj=*/[](StringRef) { return true; });
// We always run the verifier once on the merged module, the `DisableVerify`
// parameter only applies to subsequent verify.
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp
index 868169e78225..f839fe944e18 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTOModule.cpp
@@ -91,7 +91,7 @@ bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(*BCOrErr));
if (!TripleOrErr)
return false;
- return StringRef(*TripleOrErr).startswith(TriplePrefix);
+ return StringRef(*TripleOrErr).starts_with(TriplePrefix);
}
std::string LTOModule::getProducerString(MemoryBuffer *Buffer) {
@@ -382,17 +382,17 @@ void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) {
// special case if this data blob is an ObjC class definition
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) {
StringRef Section = GV->getSection();
- if (Section.startswith("__OBJC,__class,")) {
+ if (Section.starts_with("__OBJC,__class,")) {
addObjCClass(GV);
}
// special case if this data blob is an ObjC category definition
- else if (Section.startswith("__OBJC,__category,")) {
+ else if (Section.starts_with("__OBJC,__category,")) {
addObjCCategory(GV);
}
// special case if this data blob is the list of referenced classes
- else if (Section.startswith("__OBJC,__cls_refs,")) {
+ else if (Section.starts_with("__OBJC,__cls_refs,")) {
addObjCClassRef(GV);
}
}
diff --git a/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 24cd6e1a0b41..443439b71e75 100644
--- a/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -151,6 +151,7 @@ static StringMap<lto::InputFile *>
generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) {
StringMap<lto::InputFile *> ModuleMap;
for (auto &M : Modules) {
+ LLVM_DEBUG(dbgs() << "Adding module " << M->getName() << " to ModuleMap\n");
assert(!ModuleMap.contains(M->getName()) &&
"Expect unique Buffer Identifier");
ModuleMap[M->getName()] = M.get();
@@ -338,7 +339,7 @@ std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
PM.add(createObjCARCContractPass());
// Setup the codegen now.
- if (TM.addPassesToEmitFile(PM, OS, nullptr, CGFT_ObjectFile,
+ if (TM.addPassesToEmitFile(PM, OS, nullptr, CodeGenFileType::ObjectFile,
/* DisableVerify */ true))
report_fatal_error("Failed to setup codegen");
@@ -617,11 +618,9 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
std::unique_ptr<ModuleSummaryIndex> CombinedIndex =
std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
- uint64_t NextModuleId = 0;
for (auto &Mod : Modules) {
auto &M = Mod->getSingleBitcodeModule();
- if (Error Err =
- M.readSummary(*CombinedIndex, Mod->getName(), NextModuleId++)) {
+ if (Error Err = M.readSummary(*CombinedIndex, Mod->getName())) {
// FIXME diagnose
logAllUnhandledErrors(
std::move(Err), errs(),
@@ -634,11 +633,12 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
namespace {
struct IsExported {
- const StringMap<FunctionImporter::ExportSetTy> &ExportLists;
+ const DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists;
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols;
- IsExported(const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
- const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols)
+ IsExported(
+ const DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols)
: ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {}
bool operator()(StringRef ModuleIdentifier, ValueInfo VI) const {
@@ -687,7 +687,7 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
auto ModuleIdentifier = TheModule.getModuleIdentifier();
// Collect for each module the list of function it defines (GUID -> Summary).
- StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
+ DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries;
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
// Convert the preserved symbols set from string to GUID
@@ -705,8 +705,8 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
computePrevailingCopies(Index, PrevailingCopy);
// Generate import/export list
- StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
- StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
@@ -740,7 +740,7 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
auto ModuleCount = Index.modulePaths().size();
// Collect for each module the list of function it defines (GUID -> Summary).
- StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
+ DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
// Convert the preserved symbols set from string to GUID
@@ -757,8 +757,8 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
computePrevailingCopies(Index, PrevailingCopy);
// Generate import/export list
- StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
- StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
@@ -780,7 +780,7 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
auto ModuleIdentifier = TheModule.getModuleIdentifier();
// Collect for each module the list of function it defines (GUID -> Summary).
- StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
+ DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
// Convert the preserved symbols set from string to GUID
@@ -797,8 +797,8 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
computePrevailingCopies(Index, PrevailingCopy);
// Generate import/export list
- StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
- StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
@@ -818,7 +818,7 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
auto ModuleIdentifier = TheModule.getModuleIdentifier();
// Collect for each module the list of function it defines (GUID -> Summary).
- StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
+ DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
// Convert the preserved symbols set from string to GUID
@@ -835,8 +835,8 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
computePrevailingCopies(Index, PrevailingCopy);
// Generate import/export list
- StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
- StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
@@ -871,7 +871,7 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
// Collect for each module the list of function it defines (GUID -> Summary).
- StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
+ DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
// Compute "dead" symbols, we don't want to import/export these!
@@ -882,8 +882,8 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
computePrevailingCopies(Index, PrevailingCopy);
// Generate import/export list
- StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
- StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
@@ -1033,7 +1033,7 @@ void ThinLTOCodeGenerator::run() {
auto ModuleCount = Modules.size();
// Collect for each module the list of function it defines (GUID -> Summary).
- StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
+ DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
// Convert the preserved symbols set from string to GUID, this is needed for
@@ -1058,11 +1058,14 @@ void ThinLTOCodeGenerator::run() {
// via the internal option. Must be done before WPD below.
if (hasWholeProgramVisibility(/* WholeProgramVisibilityEnabledInLTO */ false))
Index->setWithWholeProgramVisibility();
+
+ // FIXME: This needs linker information via a TBD new interface
updateVCallVisibilityInIndex(*Index,
- /* WholeProgramVisibilityEnabledInLTO */ false,
- // FIXME: This needs linker information via a
+ /*WholeProgramVisibilityEnabledInLTO=*/false,
+ // FIXME: These need linker information via a
// TBD new interface.
- /* DynamicExportSymbols */ {});
+ /*DynamicExportSymbols=*/{},
+ /*VisibleToRegularObjSymbols=*/{});
// Perform index-based WPD. This will return immediately if there are
// no index entries in the typeIdMetadata map (e.g. if we are instead
@@ -1079,8 +1082,8 @@ void ThinLTOCodeGenerator::run() {
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
- StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
- StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries,
IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
diff --git a/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp b/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp
index df090c5990e6..1bd562d1e8ae 100644
--- a/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp
@@ -937,7 +937,7 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
IsOldStructor = true;
}
- PointerType *VoidPtrTy = Type::getInt8Ty(SrcGV->getContext())->getPointerTo();
+ PointerType *VoidPtrTy = PointerType::get(SrcGV->getContext(), 0);
if (IsOldStructor) {
auto &ST = *cast<StructType>(EltTy);
Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy};
@@ -989,8 +989,7 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
// Replace any uses of the two global variables with uses of the new
// global.
if (DstGV) {
- RAUWWorklist.push_back(
- std::make_pair(DstGV, ConstantExpr::getBitCast(NG, DstGV->getType())));
+ RAUWWorklist.push_back(std::make_pair(DstGV, NG));
}
return Ret;
@@ -1135,6 +1134,7 @@ Error IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
Dst.setPrologueData(Src.getPrologueData());
if (Src.hasPersonalityFn())
Dst.setPersonalityFn(Src.getPersonalityFn());
+ assert(Src.IsNewDbgInfoFormat == Dst.IsNewDbgInfoFormat);
// Copy over the metadata attachments without remapping.
Dst.copyMetadata(&Src, 0);
@@ -1545,6 +1545,8 @@ Error IRLinker::run() {
if (Error Err = SrcM->getMaterializer()->materializeMetadata())
return Err;
+ DstM.IsNewDbgInfoFormat = SrcM->IsNewDbgInfoFormat;
+
// Inherit the target data from the source module if the destination module
// doesn't have one already.
if (DstM.getDataLayout().isDefault())
@@ -1567,7 +1569,7 @@ Error IRLinker::run() {
std::string ModuleId = SrcM->getModuleIdentifier();
StringRef FileName = llvm::sys::path::filename(ModuleId);
bool SrcIsLibDevice =
- FileName.startswith("libdevice") && FileName.endswith(".10.bc");
+ FileName.starts_with("libdevice") && FileName.ends_with(".10.bc");
bool SrcHasLibDeviceDL =
(SrcM->getDataLayoutStr().empty() ||
SrcM->getDataLayoutStr() == "e-i64:64-v16:16-v32:32-n16:32:64");
diff --git a/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
index 2f5fac4951f2..4fe1f1a0f518 100644
--- a/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
@@ -462,6 +462,7 @@ void ModuleLinker::dropReplacedComdat(
bool ModuleLinker::run() {
Module &DstM = Mover.getModule();
DenseSet<const Comdat *> ReplacedDstComdats;
+ DenseSet<const Comdat *> NonPrevailingComdats;
for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
const Comdat &C = SMEC.getValue();
@@ -473,6 +474,9 @@ bool ModuleLinker::run() {
return true;
ComdatsChosen[&C] = std::make_pair(SK, From);
+ if (From == LinkFrom::Dst)
+ NonPrevailingComdats.insert(&C);
+
if (From != LinkFrom::Src)
continue;
@@ -497,6 +501,23 @@ bool ModuleLinker::run() {
for (Function &GV : llvm::make_early_inc_range(DstM))
dropReplacedComdat(GV, ReplacedDstComdats);
+ if (!NonPrevailingComdats.empty()) {
+ DenseSet<GlobalObject *> AliasedGlobals;
+ for (auto &GA : SrcM->aliases())
+ if (GlobalObject *GO = GA.getAliaseeObject(); GO && GO->getComdat())
+ AliasedGlobals.insert(GO);
+ for (const Comdat *C : NonPrevailingComdats) {
+ SmallVector<GlobalObject *> ToUpdate;
+ for (GlobalObject *GO : C->getUsers())
+ if (GO->hasPrivateLinkage() && !AliasedGlobals.contains(GO))
+ ToUpdate.push_back(GO);
+ for (GlobalObject *GO : ToUpdate) {
+ GO->setLinkage(GlobalValue::AvailableExternallyLinkage);
+ GO->setComdat(nullptr);
+ }
+ }
+ }
+
for (GlobalVariable &GV : SrcM->globals())
if (GV.hasLinkOnceLinkage())
if (const Comdat *SC = GV.getComdat())
diff --git a/contrib/llvm-project/llvm/lib/MC/DXContainerPSVInfo.cpp b/contrib/llvm-project/llvm/lib/MC/DXContainerPSVInfo.cpp
index 148e56c6b5bc..48182fcd31df 100644
--- a/contrib/llvm-project/llvm/lib/MC/DXContainerPSVInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/DXContainerPSVInfo.cpp
@@ -8,13 +8,67 @@
#include "llvm/MC/DXContainerPSVInfo.h"
#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::mcdxbc;
using namespace llvm::dxbc::PSV;
+static constexpr size_t npos = StringRef::npos;
+
+static size_t FindSequence(ArrayRef<uint32_t> Buffer,
+ ArrayRef<uint32_t> Sequence) {
+ if (Buffer.size() < Sequence.size())
+ return npos;
+ for (size_t Idx = 0; Idx <= Buffer.size() - Sequence.size(); ++Idx) {
+ if (0 == memcmp(static_cast<const void *>(&Buffer[Idx]),
+ static_cast<const void *>(Sequence.begin()),
+ Sequence.size() * sizeof(uint32_t)))
+ return Idx;
+ }
+ return npos;
+}
+
+static void
+ProcessElementList(StringTableBuilder &StrTabBuilder,
+ SmallVectorImpl<uint32_t> &IndexBuffer,
+ SmallVectorImpl<v0::SignatureElement> &FinalElements,
+ SmallVectorImpl<StringRef> &SemanticNames,
+ ArrayRef<PSVSignatureElement> Elements) {
+ for (const auto &El : Elements) {
+ // Put the name in the string table and the name list.
+ StrTabBuilder.add(El.Name);
+ SemanticNames.push_back(El.Name);
+
+ v0::SignatureElement FinalElement;
+ memset(&FinalElement, 0, sizeof(v0::SignatureElement));
+ FinalElement.Rows = static_cast<uint8_t>(El.Indices.size());
+ FinalElement.StartRow = El.StartRow;
+ FinalElement.Cols = El.Cols;
+ FinalElement.StartCol = El.StartCol;
+ FinalElement.Allocated = El.Allocated;
+ FinalElement.Kind = El.Kind;
+ FinalElement.Type = El.Type;
+ FinalElement.Mode = El.Mode;
+ FinalElement.DynamicMask = El.DynamicMask;
+ FinalElement.Stream = El.Stream;
+
+ size_t Idx = FindSequence(IndexBuffer, El.Indices);
+ if (Idx == npos) {
+ FinalElement.IndicesOffset = static_cast<uint32_t>(IndexBuffer.size());
+ IndexBuffer.insert(IndexBuffer.end(), El.Indices.begin(),
+ El.Indices.end());
+ } else
+ FinalElement.IndicesOffset = static_cast<uint32_t>(Idx);
+ FinalElements.push_back(FinalElement);
+ }
+}
+
void PSVRuntimeInfo::write(raw_ostream &OS, uint32_t Version) const {
+ assert(IsFinalized && "finalize must be called before write");
+
uint32_t InfoSize;
uint32_t BindingSize;
switch (Version) {
@@ -31,24 +85,127 @@ void PSVRuntimeInfo::write(raw_ostream &OS, uint32_t Version) const {
InfoSize = sizeof(dxbc::PSV::v2::RuntimeInfo);
BindingSize = sizeof(dxbc::PSV::v2::ResourceBindInfo);
}
- uint32_t InfoSizeSwapped = InfoSize;
- if (sys::IsBigEndianHost)
- sys::swapByteOrder(InfoSizeSwapped);
// Write the size of the info.
- OS.write(reinterpret_cast<const char *>(&InfoSizeSwapped), sizeof(uint32_t));
+
+ support::endian::write(OS, InfoSize, llvm::endianness::little);
// Write the info itself.
OS.write(reinterpret_cast<const char *>(&BaseData), InfoSize);
uint32_t ResourceCount = static_cast<uint32_t>(Resources.size());
- uint32_t BindingSizeSwapped = BindingSize;
- if (sys::IsBigEndianHost) {
- sys::swapByteOrder(ResourceCount);
- sys::swapByteOrder(BindingSizeSwapped);
- }
- OS.write(reinterpret_cast<const char *>(&ResourceCount), sizeof(uint32_t));
- OS.write(reinterpret_cast<const char *>(&BindingSizeSwapped), sizeof(uint32_t));
-
+ support::endian::write(OS, ResourceCount, llvm::endianness::little);
+ if (ResourceCount > 0)
+ support::endian::write(OS, BindingSize, llvm::endianness::little);
+
for (const auto &Res : Resources)
OS.write(reinterpret_cast<const char *>(&Res), BindingSize);
+
+ // PSV Version 0 stops after the resource list.
+ if (Version == 0)
+ return;
+
+ StringTableBuilder StrTabBuilder((StringTableBuilder::DXContainer));
+ SmallVector<uint32_t, 64> IndexBuffer;
+ SmallVector<v0::SignatureElement, 32> SignatureElements;
+ SmallVector<StringRef, 32> SemanticNames;
+
+ ProcessElementList(StrTabBuilder, IndexBuffer, SignatureElements,
+ SemanticNames, InputElements);
+ ProcessElementList(StrTabBuilder, IndexBuffer, SignatureElements,
+ SemanticNames, OutputElements);
+ ProcessElementList(StrTabBuilder, IndexBuffer, SignatureElements,
+ SemanticNames, PatchOrPrimElements);
+
+ StrTabBuilder.finalize();
+ for (auto ElAndName : zip(SignatureElements, SemanticNames)) {
+ v0::SignatureElement &El = std::get<0>(ElAndName);
+ StringRef Name = std::get<1>(ElAndName);
+ El.NameOffset = static_cast<uint32_t>(StrTabBuilder.getOffset(Name));
+ if (sys::IsBigEndianHost)
+ El.swapBytes();
+ }
+
+ support::endian::write(OS, static_cast<uint32_t>(StrTabBuilder.getSize()),
+ llvm::endianness::little);
+
+ // Write the string table.
+ StrTabBuilder.write(OS);
+
+ // Write the index table size, then table.
+ support::endian::write(OS, static_cast<uint32_t>(IndexBuffer.size()),
+ llvm::endianness::little);
+ for (auto I : IndexBuffer)
+ support::endian::write(OS, I, llvm::endianness::little);
+
+ if (SignatureElements.size() > 0) {
+ // write the size of the signature elements.
+ support::endian::write(OS,
+ static_cast<uint32_t>(sizeof(v0::SignatureElement)),
+ llvm::endianness::little);
+
+ // write the signature elements.
+ OS.write(reinterpret_cast<const char *>(&SignatureElements[0]),
+ SignatureElements.size() * sizeof(v0::SignatureElement));
+ }
+
+ for (const auto &MaskVector : OutputVectorMasks)
+ support::endian::write_array(OS, ArrayRef<uint32_t>(MaskVector),
+ llvm::endianness::little);
+ support::endian::write_array(OS, ArrayRef<uint32_t>(PatchOrPrimMasks),
+ llvm::endianness::little);
+ for (const auto &MaskVector : InputOutputMap)
+ support::endian::write_array(OS, ArrayRef<uint32_t>(MaskVector),
+ llvm::endianness::little);
+ support::endian::write_array(OS, ArrayRef<uint32_t>(InputPatchMap),
+ llvm::endianness::little);
+ support::endian::write_array(OS, ArrayRef<uint32_t>(PatchOutputMap),
+ llvm::endianness::little);
+}
+
+void Signature::write(raw_ostream &OS) {
+ SmallVector<dxbc::ProgramSignatureElement> SigParams;
+ SigParams.reserve(Params.size());
+ StringTableBuilder StrTabBuilder((StringTableBuilder::DWARF));
+
+ // Name offsets are from the start of the part. Pre-calculate the offset to
+ // the start of the string table so that it can be added to the table offset.
+ uint32_t TableStart = sizeof(dxbc::ProgramSignatureHeader) +
+ (sizeof(dxbc::ProgramSignatureElement) * Params.size());
+
+ for (const auto &P : Params) {
+ // zero out the data
+ dxbc::ProgramSignatureElement FinalElement;
+ memset(&FinalElement, 0, sizeof(dxbc::ProgramSignatureElement));
+ FinalElement.Stream = P.Stream;
+ FinalElement.NameOffset =
+ static_cast<uint32_t>(StrTabBuilder.add(P.Name)) + TableStart;
+ FinalElement.Index = P.Index;
+ FinalElement.SystemValue = P.SystemValue;
+ FinalElement.CompType = P.CompType;
+ FinalElement.Register = P.Register;
+ FinalElement.Mask = P.Mask;
+ FinalElement.ExclusiveMask = P.ExclusiveMask;
+ FinalElement.MinPrecision = P.MinPrecision;
+ SigParams.push_back(FinalElement);
+ }
+
+ StrTabBuilder.finalizeInOrder();
+ stable_sort(SigParams, [&](const dxbc::ProgramSignatureElement &L,
+ const dxbc::ProgramSignatureElement R) {
+ return std::tie(L.Stream, L.Register, L.NameOffset) <
+ std::tie(R.Stream, R.Register, R.NameOffset);
+ });
+ if (sys::IsBigEndianHost)
+ for (auto &El : SigParams)
+ El.swapBytes();
+
+ dxbc::ProgramSignatureHeader Header = {static_cast<uint32_t>(Params.size()),
+ sizeof(dxbc::ProgramSignatureHeader)};
+ if (sys::IsBigEndianHost)
+ Header.swapBytes();
+ OS.write(reinterpret_cast<const char *>(&Header),
+ sizeof(dxbc::ProgramSignatureHeader));
+ OS.write(reinterpret_cast<const char *>(SigParams.data()),
+ sizeof(dxbc::ProgramSignatureElement) * SigParams.size());
+ StrTabBuilder.write(OS);
}
diff --git a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
index 6a6befdd3054..cb8af1aa9955 100644
--- a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
@@ -72,7 +72,7 @@ class ELFObjectWriter;
struct ELFWriter;
bool isDwoSection(const MCSectionELF &Sec) {
- return Sec.getName().endswith(".dwo");
+ return Sec.getName().ends_with(".dwo");
}
class SymbolTableWriter {
@@ -152,8 +152,9 @@ struct ELFWriter {
public:
ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS,
bool IsLittleEndian, DwoMode Mode)
- : OWriter(OWriter),
- W(OS, IsLittleEndian ? support::little : support::big), Mode(Mode) {}
+ : OWriter(OWriter), W(OS, IsLittleEndian ? llvm::endianness::little
+ : llvm::endianness::big),
+ Mode(Mode) {}
void WriteWord(uint64_t Word) {
if (is64Bit())
@@ -227,8 +228,7 @@ class ELFObjectWriter : public MCObjectWriter {
bool hasRelocationAddend() const;
- bool shouldRelocateWithSymbol(const MCAssembler &Asm,
- const MCSymbolRefExpr *RefA,
+ bool shouldRelocateWithSymbol(const MCAssembler &Asm, const MCValue &Val,
const MCSymbolELF *Sym, uint64_t C,
unsigned Type) const;
@@ -407,8 +407,8 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) {
W.OS << char(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
// e_ident[EI_DATA]
- W.OS << char(W.Endian == support::little ? ELF::ELFDATA2LSB
- : ELF::ELFDATA2MSB);
+ W.OS << char(W.Endian == llvm::endianness::little ? ELF::ELFDATA2LSB
+ : ELF::ELFDATA2MSB);
W.OS << char(ELF::EV_CURRENT); // e_ident[EI_VERSION]
// e_ident[EI_OSABI]
@@ -843,7 +843,7 @@ bool ELFWriter::maybeWriteCompression(
uint32_t ChType, uint64_t Size,
SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
uint64_t HdrSize =
- is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr);
+ is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
if (Size <= HdrSize + CompressedContents.size())
return false;
// Platform specific header is followed by compressed data.
@@ -872,7 +872,7 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
const DebugCompressionType CompressionType = MAI->compressDebugSections();
if (CompressionType == DebugCompressionType::None ||
- !SectionName.startswith(".debug_")) {
+ !SectionName.starts_with(".debug_")) {
Asm.writeSectionData(W.OS, &Section, Layout);
return;
}
@@ -1250,7 +1250,7 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
StringRef Prefix = AliasName.substr(0, Pos);
StringRef Rest = AliasName.substr(Pos);
StringRef Tail = Rest;
- if (Rest.startswith("@@@"))
+ if (Rest.starts_with("@@@"))
Tail = Rest.substr(Symbol.isUndefined() ? 2 : 1);
auto *Alias =
@@ -1268,8 +1268,8 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
if (!Symbol.isUndefined() && S.KeepOriginalSym)
continue;
- if (Symbol.isUndefined() && Rest.startswith("@@") &&
- !Rest.startswith("@@@")) {
+ if (Symbol.isUndefined() && Rest.starts_with("@@") &&
+ !Rest.starts_with("@@@")) {
Asm.getContext().reportError(S.Loc, "default version symbol " +
AliasName + " must be defined");
continue;
@@ -1287,7 +1287,7 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
for (const MCSymbol *&Sym : AddrsigSyms) {
if (const MCSymbol *R = Renames.lookup(cast<MCSymbolELF>(Sym)))
Sym = R;
- if (Sym->isInSection() && Sym->getName().startswith(".L"))
+ if (Sym->isInSection() && Sym->getName().starts_with(".L"))
Sym = Sym->getSection().getBeginSymbol();
Sym->setUsedInReloc();
}
@@ -1297,10 +1297,11 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
// to use a relocation with a section if that is possible. Using the section
// allows us to omit some local symbols from the symbol table.
bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
- const MCSymbolRefExpr *RefA,
+ const MCValue &Val,
const MCSymbolELF *Sym,
uint64_t C,
unsigned Type) const {
+ const MCSymbolRefExpr *RefA = Val.getSymA();
// A PCRel relocation to an absolute value has no symbol (or section). We
// represent that with a relocation to a null section.
if (!RefA)
@@ -1419,7 +1420,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
if (Asm.isThumbFunc(Sym))
return true;
- if (TargetObjectWriter->needsRelocateWithSymbol(*Sym, Type))
+ if (TargetObjectWriter->needsRelocateWithSymbol(Val, *Sym, Type))
return true;
return false;
}
@@ -1484,7 +1485,7 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
const auto *Parent = cast<MCSectionELF>(Fragment->getParent());
// Emiting relocation with sybmol for CG Profile to help with --cg-profile.
bool RelocateWithSymbol =
- shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type) ||
+ shouldRelocateWithSymbol(Asm, Target, SymA, C, Type) ||
(Parent->getType() == ELF::SHT_LLVM_CALL_GRAPH_PROFILE);
uint64_t Addend = 0;
diff --git a/contrib/llvm-project/llvm/lib/MC/GOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/GOFFObjectWriter.cpp
new file mode 100644
index 000000000000..addeb6db9596
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MC/GOFFObjectWriter.cpp
@@ -0,0 +1,297 @@
+//===- lib/MC/GOFFObjectWriter.cpp - GOFF File Writer ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements GOFF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/GOFF.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCGOFFObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "goff-writer"
+
+namespace {
+
+// The standard System/390 convention is to name the high-order (leftmost) bit
+// in a byte as bit zero. The Flags type helps to set bits in a byte according
+// to this numeration order.
+class Flags {
+ uint8_t Val;
+
+ constexpr static uint8_t bits(uint8_t BitIndex, uint8_t Length, uint8_t Value,
+ uint8_t OldValue) {
+ assert(BitIndex < 8 && "Bit index out of bounds!");
+ assert(Length + BitIndex <= 8 && "Bit length too long!");
+
+ uint8_t Mask = ((1 << Length) - 1) << (8 - BitIndex - Length);
+ Value = Value << (8 - BitIndex - Length);
+ assert((Value & Mask) == Value && "Bits set outside of range!");
+
+ return (OldValue & ~Mask) | Value;
+ }
+
+public:
+ constexpr Flags() : Val(0) {}
+ constexpr Flags(uint8_t BitIndex, uint8_t Length, uint8_t Value)
+ : Val(bits(BitIndex, Length, Value, 0)) {}
+
+ void set(uint8_t BitIndex, uint8_t Length, uint8_t Value) {
+ Val = bits(BitIndex, Length, Value, Val);
+ }
+
+ constexpr operator uint8_t() const { return Val; }
+};
+
+// Common flag values on records.
+
+// Flag: This record is continued.
+constexpr uint8_t RecContinued = Flags(7, 1, 1);
+
+// Flag: This record is a continuation.
+constexpr uint8_t RecContinuation = Flags(6, 1, 1);
+
+// The GOFFOstream is responsible to write the data into the fixed physical
+// records of the format. A user of this class announces the start of a new
+// logical record and the size of its content. While writing the content, the
+// physical records are created for the data. Possible fill bytes at the end of
+// a physical record are written automatically. In principle, the GOFFOstream
+// is agnostic of the endianness of the content. However, it also supports
+// writing data in big endian byte order.
+class GOFFOstream : public raw_ostream {
+ /// The underlying raw_pwrite_stream.
+ raw_pwrite_stream &OS;
+
+ /// The remaining size of this logical record, including fill bytes.
+ size_t RemainingSize;
+
+#ifndef NDEBUG
+ /// The number of bytes needed to fill up the last physical record.
+ size_t Gap = 0;
+#endif
+
+ /// The number of logical records emitted to far.
+ uint32_t LogicalRecords;
+
+ /// The type of the current (logical) record.
+ GOFF::RecordType CurrentType;
+
+ /// Signals start of new record.
+ bool NewLogicalRecord;
+
+ /// Static allocated buffer for the stream, used by the raw_ostream class. The
+ /// buffer is sized to hold the content of a physical record.
+ char Buffer[GOFF::RecordContentLength];
+
+ // Return the number of bytes left to write until next physical record.
+ // Please note that we maintain the total numbers of byte left, not the
+ // written size.
+ size_t bytesToNextPhysicalRecord() {
+ size_t Bytes = RemainingSize % GOFF::RecordContentLength;
+ return Bytes ? Bytes : GOFF::RecordContentLength;
+ }
+
+ /// Write the record prefix of a physical record, using the given record type.
+ static void writeRecordPrefix(raw_ostream &OS, GOFF::RecordType Type,
+ size_t RemainingSize,
+ uint8_t Flags = RecContinuation);
+
+ /// Fill the last physical record of a logical record with zero bytes.
+ void fillRecord();
+
+ /// See raw_ostream::write_impl.
+ void write_impl(const char *Ptr, size_t Size) override;
+
+ /// Return the current position within the stream, not counting the bytes
+ /// currently in the buffer.
+ uint64_t current_pos() const override { return OS.tell(); }
+
+public:
+ explicit GOFFOstream(raw_pwrite_stream &OS)
+ : OS(OS), RemainingSize(0), LogicalRecords(0), NewLogicalRecord(false) {
+ SetBuffer(Buffer, sizeof(Buffer));
+ }
+
+ ~GOFFOstream() { finalize(); }
+
+ raw_pwrite_stream &getOS() { return OS; }
+
+ void newRecord(GOFF::RecordType Type, size_t Size);
+
+ void finalize() { fillRecord(); }
+
+ uint32_t logicalRecords() { return LogicalRecords; }
+
+ // Support for endian-specific data.
+ template <typename value_type> void writebe(value_type Value) {
+ Value =
+ support::endian::byte_swap<value_type>(Value, llvm::endianness::big);
+ write(reinterpret_cast<const char *>(&Value), sizeof(value_type));
+ }
+};
+
+void GOFFOstream::writeRecordPrefix(raw_ostream &OS, GOFF::RecordType Type,
+ size_t RemainingSize, uint8_t Flags) {
+ uint8_t TypeAndFlags = Flags | (Type << 4);
+ if (RemainingSize > GOFF::RecordLength)
+ TypeAndFlags |= RecContinued;
+ OS << static_cast<unsigned char>(GOFF::PTVPrefix) // Record Type
+ << static_cast<unsigned char>(TypeAndFlags) // Continuation
+ << static_cast<unsigned char>(0); // Version
+}
+
+void GOFFOstream::newRecord(GOFF::RecordType Type, size_t Size) {
+ fillRecord();
+ CurrentType = Type;
+ RemainingSize = Size;
+#ifdef NDEBUG
+ size_t Gap;
+#endif
+ Gap = (RemainingSize % GOFF::RecordContentLength);
+ if (Gap) {
+ Gap = GOFF::RecordContentLength - Gap;
+ RemainingSize += Gap;
+ }
+ NewLogicalRecord = true;
+ ++LogicalRecords;
+}
+
+void GOFFOstream::fillRecord() {
+ assert((GetNumBytesInBuffer() <= RemainingSize) &&
+ "More bytes in buffer than expected");
+ size_t Remains = RemainingSize - GetNumBytesInBuffer();
+ if (Remains) {
+ assert(Remains == Gap && "Wrong size of fill gap");
+ assert((Remains < GOFF::RecordLength) &&
+ "Attempt to fill more than one physical record");
+ raw_ostream::write_zeros(Remains);
+ }
+ flush();
+ assert(RemainingSize == 0 && "Not fully flushed");
+ assert(GetNumBytesInBuffer() == 0 && "Buffer not fully empty");
+}
+
+// This function is called from the raw_ostream implementation if:
+// - The internal buffer is full. Size is excactly the size of the buffer.
+// - Data larger than the internal buffer is written. Size is a multiple of the
+// buffer size.
+// - flush() has been called. Size is at most the buffer size.
+// The GOFFOstream implementation ensures that flush() is called before a new
+// logical record begins. Therefore it is sufficient to check for a new block
+// only once.
+void GOFFOstream::write_impl(const char *Ptr, size_t Size) {
+ assert((RemainingSize >= Size) && "Attempt to write too much data");
+ assert(RemainingSize && "Logical record overflow");
+ if (!(RemainingSize % GOFF::RecordContentLength)) {
+ writeRecordPrefix(OS, CurrentType, RemainingSize,
+ NewLogicalRecord ? 0 : RecContinuation);
+ NewLogicalRecord = false;
+ }
+ assert(!NewLogicalRecord &&
+ "New logical record not on physical record boundary");
+
+ size_t Idx = 0;
+ while (Size > 0) {
+ size_t BytesToWrite = bytesToNextPhysicalRecord();
+ if (BytesToWrite > Size)
+ BytesToWrite = Size;
+ OS.write(Ptr + Idx, BytesToWrite);
+ Idx += BytesToWrite;
+ Size -= BytesToWrite;
+ RemainingSize -= BytesToWrite;
+ if (Size)
+ writeRecordPrefix(OS, CurrentType, RemainingSize);
+ }
+}
+
+class GOFFObjectWriter : public MCObjectWriter {
+ // The target specific GOFF writer instance.
+ std::unique_ptr<MCGOFFObjectTargetWriter> TargetObjectWriter;
+
+ // The stream used to write the GOFF records.
+ GOFFOstream OS;
+
+public:
+ GOFFObjectWriter(std::unique_ptr<MCGOFFObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS)
+ : TargetObjectWriter(std::move(MOTW)), OS(OS) {}
+
+ ~GOFFObjectWriter() override {}
+
+ // Write GOFF records.
+ void writeHeader();
+ void writeEnd();
+
+ // Implementation of the MCObjectWriter interface.
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) override {}
+ void executePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) override {}
+ uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+};
+} // end anonymous namespace
+
+void GOFFObjectWriter::writeHeader() {
+ OS.newRecord(GOFF::RT_HDR, /*Size=*/57);
+ OS.write_zeros(1); // Reserved
+ OS.writebe<uint32_t>(0); // Target Hardware Environment
+ OS.writebe<uint32_t>(0); // Target Operating System Environment
+ OS.write_zeros(2); // Reserved
+ OS.writebe<uint16_t>(0); // CCSID
+ OS.write_zeros(16); // Character Set name
+ OS.write_zeros(16); // Language Product Identifier
+ OS.writebe<uint32_t>(1); // Architecture Level
+ OS.writebe<uint16_t>(0); // Module Properties Length
+ OS.write_zeros(6); // Reserved
+}
+
+void GOFFObjectWriter::writeEnd() {
+ uint8_t F = GOFF::END_EPR_None;
+ uint8_t AMODE = 0;
+ uint32_t ESDID = 0;
+
+ // TODO Set Flags/AMODE/ESDID for entry point.
+
+ OS.newRecord(GOFF::RT_END, /*Size=*/13);
+ OS.writebe<uint8_t>(Flags(6, 2, F)); // Indicator flags
+ OS.writebe<uint8_t>(AMODE); // AMODE
+ OS.write_zeros(3); // Reserved
+ // The record count is the number of logical records. In principle, this value
+ // is available as OS.logicalRecords(). However, some tools rely on this field
+ // being zero.
+ OS.writebe<uint32_t>(0); // Record Count
+ OS.writebe<uint32_t>(ESDID); // ESDID (of entry point)
+ OS.finalize();
+}
+
+uint64_t GOFFObjectWriter::writeObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ uint64_t StartOffset = OS.tell();
+
+ writeHeader();
+ writeEnd();
+
+ LLVM_DEBUG(dbgs() << "Wrote " << OS.logicalRecords() << " logical records.");
+
+ return OS.tell() - StartOffset;
+}
+
+std::unique_ptr<MCObjectWriter>
+llvm::createGOFFObjectWriter(std::unique_ptr<MCGOFFObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS) {
+ return std::make_unique<GOFFObjectWriter>(std::move(MOTW), OS);
+}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp
index 64bbc63719c7..616576e945be 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp
@@ -10,6 +10,7 @@
#include "llvm/MC/MCDXContainerWriter.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCGOFFObjectWriter.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSPIRVObjectWriter.h"
@@ -22,7 +23,7 @@
using namespace llvm;
-MCAsmBackend::MCAsmBackend(support::endianness Endian, unsigned RelaxFixupKind)
+MCAsmBackend::MCAsmBackend(llvm::endianness Endian, unsigned RelaxFixupKind)
: Endian(Endian), RelaxFixupKind(RelaxFixupKind) {}
MCAsmBackend::~MCAsmBackend() = default;
@@ -32,11 +33,11 @@ MCAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
auto TW = createObjectTargetWriter();
switch (TW->getFormat()) {
case Triple::ELF:
- return createELFObjectWriter(cast<MCELFObjectTargetWriter>(std::move(TW)), OS,
- Endian == support::little);
+ return createELFObjectWriter(cast<MCELFObjectTargetWriter>(std::move(TW)),
+ OS, Endian == llvm::endianness::little);
case Triple::MachO:
return createMachObjectWriter(cast<MCMachObjectTargetWriter>(std::move(TW)),
- OS, Endian == support::little);
+ OS, Endian == llvm::endianness::little);
case Triple::COFF:
return createWinCOFFObjectWriter(
cast<MCWinCOFFObjectTargetWriter>(std::move(TW)), OS);
@@ -46,6 +47,9 @@ MCAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
case Triple::Wasm:
return createWasmObjectWriter(cast<MCWasmObjectTargetWriter>(std::move(TW)),
OS);
+ case Triple::GOFF:
+ return createGOFFObjectWriter(cast<MCGOFFObjectTargetWriter>(std::move(TW)),
+ OS);
case Triple::XCOFF:
return createXCOFFObjectWriter(
cast<MCXCOFFObjectTargetWriter>(std::move(TW)), OS);
@@ -68,7 +72,7 @@ MCAsmBackend::createDwoObjectWriter(raw_pwrite_stream &OS,
case Triple::ELF:
return createELFDwoObjectWriter(
cast<MCELFObjectTargetWriter>(std::move(TW)), OS, DwoOS,
- Endian == support::little);
+ Endian == llvm::endianness::little);
case Triple::Wasm:
return createWasmDwoObjectWriter(
cast<MCWasmObjectTargetWriter>(std::move(TW)), OS, DwoOS);
@@ -88,7 +92,7 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"FK_Data_2", 0, 16, 0},
{"FK_Data_4", 0, 32, 0},
{"FK_Data_8", 0, 64, 0},
- {"FK_Data_6b", 0, 6, 0},
+ {"FK_Data_leb128", 0, 0, 0},
{"FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel},
{"FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
{"FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp
index 71564ba9d5a1..290be4037166 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp
@@ -64,22 +64,10 @@ MCAsmInfo::MCAsmInfo() {
SupportsExtendedDwarfLocDirective = DwarfExtendedLoc == Enable;
if (UseLEB128Directives != cl::BOU_UNSET)
HasLEB128Directives = UseLEB128Directives == cl::BOU_TRUE;
-
- // FIXME: Clang's logic should be synced with the logic used to initialize
- // this member and the two implementations should be merged.
- // For reference:
- // - Solaris always enables the integrated assembler by default
- // - SparcELFMCAsmInfo and X86ELFMCAsmInfo are handling this case
- // - Windows always enables the integrated assembler by default
- // - MCAsmInfoCOFF is handling this case, should it be MCAsmInfoMicrosoft?
- // - MachO targets always enables the integrated assembler by default
- // - MCAsmInfoDarwin is handling this case
- // - Generic_GCC toolchains enable the integrated assembler on a per
- // architecture basis.
- // - The target subclasses for AArch64, ARM, and X86 handle these cases
UseIntegratedAssembler = true;
ParseInlineAsmUsingAsmParser = false;
PreserveAsmComments = true;
+ PPCUseFullRegisterNames = false;
}
MCAsmInfo::~MCAsmInfo() = default;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp
index 9b8b8db794f0..7eb89ef6a02b 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp
@@ -21,6 +21,10 @@ using namespace llvm;
void MCAsmInfoELF::anchor() {}
MCSection *MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const {
+ // Solaris doesn't know/doesn't care about .note.GNU-stack sections, so
+ // don't emit them.
+ if (Ctx.getTargetTriple().isOSSolaris())
+ return nullptr;
return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0);
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
index 06de70ad2f39..9e1d108ac14d 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
@@ -190,7 +190,7 @@ public:
void emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size,
MCSymbol *CsectSym, Align Alignment) override;
void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol,
- MCSymbolAttr Linakge,
+ MCSymbolAttr Linkage,
MCSymbolAttr Visibility) override;
void emitXCOFFRenameDirective(const MCSymbol *Name,
StringRef Rename) override;
@@ -467,12 +467,12 @@ void MCAsmStreamer::addExplicitComment(const Twine &T) {
StringRef c = T.getSingleStringRef();
if (c.equals(StringRef(MAI->getSeparatorString())))
return;
- if (c.startswith(StringRef("//"))) {
+ if (c.starts_with(StringRef("//"))) {
ExplicitCommentToEmit.append("\t");
ExplicitCommentToEmit.append(MAI->getCommentString());
// drop //
ExplicitCommentToEmit.append(c.slice(2, c.size()).str());
- } else if (c.startswith(StringRef("/*"))) {
+ } else if (c.starts_with(StringRef("/*"))) {
size_t p = 2, len = c.size() - 2;
// emit each line in comment as separate newline.
do {
@@ -485,7 +485,7 @@ void MCAsmStreamer::addExplicitComment(const Twine &T) {
ExplicitCommentToEmit.append("\n");
p = newp + 1;
} while (p < len);
- } else if (c.startswith(StringRef(MAI->getCommentString()))) {
+ } else if (c.starts_with(StringRef(MAI->getCommentString()))) {
ExplicitCommentToEmit.append("\t");
ExplicitCommentToEmit.append(c.str());
} else if (c.front() == '#') {
@@ -629,18 +629,11 @@ void MCAsmStreamer::emitVersionMin(MCVersionMinType Type, unsigned Major,
static const char *getPlatformName(MachO::PlatformType Type) {
switch (Type) {
- case MachO::PLATFORM_UNKNOWN: /* silence warning*/
- break;
- case MachO::PLATFORM_MACOS: return "macos";
- case MachO::PLATFORM_IOS: return "ios";
- case MachO::PLATFORM_TVOS: return "tvos";
- case MachO::PLATFORM_WATCHOS: return "watchos";
- case MachO::PLATFORM_BRIDGEOS: return "bridgeos";
- case MachO::PLATFORM_MACCATALYST: return "macCatalyst";
- case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator";
- case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator";
- case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
- case MachO::PLATFORM_DRIVERKIT: return "driverkit";
+#define PLATFORM(platform, id, name, build_name, target, tapi_target, \
+ marketing) \
+ case MachO::PLATFORM_##platform: \
+ return #build_name;
+#include "llvm/BinaryFormat/MachO.def"
}
llvm_unreachable("Invalid Mach-O platform type");
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp
index 55ed1a285cd7..def13044dfcc 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp
@@ -193,9 +193,9 @@ const MCSymbol *MCAssembler::getAtom(const MCSymbol &S) const {
return S.getFragment()->getAtom();
}
-bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- MCValue &Target, uint64_t &Value,
+bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup,
+ const MCFragment *DF, MCValue &Target,
+ const MCSubtargetInfo *STI, uint64_t &Value,
bool &WasForced) const {
++stats::evaluateFixup;
@@ -227,7 +227,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
if (IsTarget)
return getBackend().evaluateTargetFixup(*this, Layout, Fixup, DF, Target,
- Value, WasForced);
+ STI, Value, WasForced);
unsigned FixupFlags = getBackendPtr()->getFixupKindInfo(Fixup.getKind()).Flags;
bool IsPCRel = getBackendPtr()->getFixupKindInfo(Fixup.getKind()).Flags &
@@ -282,7 +282,8 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
}
// Let the backend force a relocation if needed.
- if (IsResolved && getBackend().shouldForceRelocation(*this, Fixup, Target)) {
+ if (IsResolved &&
+ getBackend().shouldForceRelocation(*this, Fixup, Target, STI)) {
IsResolved = false;
WasForced = true;
}
@@ -521,7 +522,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
// FIXME: Embed in fragments instead?
uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
- support::endianness Endian = Asm.getBackend().Endian;
+ llvm::endianness Endian = Asm.getBackend().Endian;
if (const MCEncodedFragment *EF = dyn_cast<MCEncodedFragment>(&F))
Asm.writeFragmentPadding(OS, *EF, FragmentSize);
@@ -606,7 +607,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
// Duplicate V into Data as byte vector to reduce number of
// writes done. As such, do endian conversion here.
for (unsigned I = 0; I != VSize; ++I) {
- unsigned index = Endian == support::little ? I : (VSize - I - 1);
+ unsigned index = Endian == llvm::endianness::little ? I : (VSize - I - 1);
Data[I] = uint8_t(V >> (index * 8));
}
for (unsigned I = VSize; I < MaxChunkSize; ++I)
@@ -796,13 +797,13 @@ void MCAssembler::writeSectionData(raw_ostream &OS, const MCSection *Sec,
std::tuple<MCValue, uint64_t, bool>
MCAssembler::handleFixup(const MCAsmLayout &Layout, MCFragment &F,
- const MCFixup &Fixup) {
+ const MCFixup &Fixup, const MCSubtargetInfo *STI) {
// Evaluate the fixup.
MCValue Target;
uint64_t FixedValue;
bool WasForced;
- bool IsResolved = evaluateFixup(Layout, Fixup, &F, Target, FixedValue,
- WasForced);
+ bool IsResolved =
+ evaluateFixup(Layout, Fixup, &F, Target, STI, FixedValue, WasForced);
if (!IsResolved) {
// The fixup was unresolved, we need a relocation. Inform the object
// writer of the relocation, and give it an opportunity to adjust the
@@ -918,6 +919,12 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
Contents = DF.getContents();
break;
}
+ case MCFragment::FT_LEB: {
+ auto &LF = cast<MCLEBFragment>(Frag);
+ Fixups = LF.getFixups();
+ Contents = LF.getContents();
+ break;
+ }
case MCFragment::FT_PseudoProbe: {
MCPseudoProbeAddrFragment &PF = cast<MCPseudoProbeAddrFragment>(Frag);
Fixups = PF.getFixups();
@@ -930,7 +937,7 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
bool IsResolved;
MCValue Target;
std::tie(Target, FixedValue, IsResolved) =
- handleFixup(Layout, Frag, Fixup);
+ handleFixup(Layout, Frag, Fixup, STI);
getBackend().applyFixup(*this, Fixup, Target, Contents, FixedValue,
IsResolved, STI);
}
@@ -954,7 +961,8 @@ bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
MCValue Target;
uint64_t Value;
bool WasForced;
- bool Resolved = evaluateFixup(Layout, Fixup, DF, Target, Value, WasForced);
+ bool Resolved = evaluateFixup(Layout, Fixup, DF, Target,
+ DF->getSubtargetInfo(), Value, WasForced);
if (Target.getSymA() &&
Target.getSymA()->getKind() == MCSymbolRefExpr::VK_X86_ABS8 &&
Fixup.getKind() == FK_Data_1)
@@ -1006,12 +1014,27 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
}
bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
- uint64_t OldSize = LF.getContents().size();
+ const unsigned OldSize = static_cast<unsigned>(LF.getContents().size());
+ unsigned PadTo = OldSize;
int64_t Value;
- bool Abs = LF.getValue().evaluateKnownAbsolute(Value, Layout);
- if (!Abs)
- report_fatal_error("sleb128 and uleb128 expressions must be absolute");
- SmallString<8> &Data = LF.getContents();
+ SmallVectorImpl<char> &Data = LF.getContents();
+ LF.getFixups().clear();
+ // Use evaluateKnownAbsolute for Mach-O as a hack: .subsections_via_symbols
+ // requires that .uleb128 A-B is foldable where A and B reside in different
+ // fragments. This is used by __gcc_except_table.
+ bool Abs = getSubsectionsViaSymbols()
+ ? LF.getValue().evaluateKnownAbsolute(Value, Layout)
+ : LF.getValue().evaluateAsAbsolute(Value, Layout);
+ if (!Abs) {
+ if (!getBackend().relaxLEB128(LF, Layout, Value)) {
+ getContext().reportError(LF.getValue().getLoc(),
+ Twine(LF.isSigned() ? ".s" : ".u") +
+ "leb128 expression is not absolute");
+ LF.setValue(MCConstantExpr::create(0, Context));
+ }
+ uint8_t Tmp[10]; // maximum size: ceil(64/7)
+ PadTo = std::max(PadTo, encodeULEB128(uint64_t(Value), Tmp));
+ }
Data.clear();
raw_svector_ostream OSE(Data);
// The compiler can generate EH table assembly that is impossible to assemble
@@ -1019,9 +1042,9 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
// to a later alignment fragment. To accommodate such tables, relaxation can
// only increase an LEB fragment size here, not decrease it. See PR35809.
if (LF.isSigned())
- encodeSLEB128(Value, OSE, OldSize);
+ encodeSLEB128(Value, OSE, PadTo);
else
- encodeULEB128(Value, OSE, OldSize);
+ encodeULEB128(Value, OSE, PadTo);
return OldSize != LF.getContents().size();
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/MC/MCCodeEmitter.cpp
index afbe31e0070c..0d114f12d58c 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCCodeEmitter.cpp
@@ -7,18 +7,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
MCCodeEmitter::MCCodeEmitter() = default;
MCCodeEmitter::~MCCodeEmitter() = default;
-
-void MCCodeEmitter::encodeInstruction(const MCInst &Inst,
- SmallVectorImpl<char> &CB,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- raw_svector_ostream OS(CB);
- encodeInstruction(Inst, OS, Fixups, STI);
-}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp b/contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp
index a27ef64bec0f..d234ce110918 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCCodeView.cpp
@@ -275,32 +275,35 @@ void CodeViewContext::addLineEntry(const MCCVLoc &LineEntry) {
std::vector<MCCVLoc>
CodeViewContext::getFunctionLineEntries(unsigned FuncId) {
std::vector<MCCVLoc> FilteredLines;
- auto I = MCCVLineStartStop.find(FuncId);
- if (I != MCCVLineStartStop.end()) {
- MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);
- for (size_t Idx = I->second.first, End = I->second.second; Idx != End;
- ++Idx) {
- unsigned LocationFuncId = MCCVLines[Idx].getFunctionId();
- if (LocationFuncId == FuncId) {
- // This was a .cv_loc directly for FuncId, so record it.
- FilteredLines.push_back(MCCVLines[Idx]);
- } else {
- // Check if the current location is inlined in this function. If it is,
- // synthesize a statement .cv_loc at the original inlined call site.
- auto I = SiteInfo->InlinedAtMap.find(LocationFuncId);
- if (I != SiteInfo->InlinedAtMap.end()) {
- MCCVFunctionInfo::LineInfo &IA = I->second;
- // Only add the location if it differs from the previous location.
- // Large inlined calls will have many .cv_loc entries and we only need
- // one line table entry in the parent function.
- if (FilteredLines.empty() ||
- FilteredLines.back().getFileNum() != IA.File ||
- FilteredLines.back().getLine() != IA.Line ||
- FilteredLines.back().getColumn() != IA.Col) {
- FilteredLines.push_back(MCCVLoc(
- MCCVLines[Idx].getLabel(),
- FuncId, IA.File, IA.Line, IA.Col, false, false));
- }
+ size_t LocBegin;
+ size_t LocEnd;
+ std::tie(LocBegin, LocEnd) = getLineExtentIncludingInlinees(FuncId);
+ if (LocBegin >= LocEnd) {
+ return FilteredLines;
+ }
+
+ MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);
+ for (size_t Idx = LocBegin; Idx != LocEnd; ++Idx) {
+ unsigned LocationFuncId = MCCVLines[Idx].getFunctionId();
+ if (LocationFuncId == FuncId) {
+ // This was a .cv_loc directly for FuncId, so record it.
+ FilteredLines.push_back(MCCVLines[Idx]);
+ } else {
+ // Check if the current location is inlined in this function. If it is,
+ // synthesize a statement .cv_loc at the original inlined call site.
+ auto I = SiteInfo->InlinedAtMap.find(LocationFuncId);
+ if (I != SiteInfo->InlinedAtMap.end()) {
+ MCCVFunctionInfo::LineInfo &IA = I->second;
+ // Only add the location if it differs from the previous location.
+ // Large inlined calls will have many .cv_loc entries and we only need
+ // one line table entry in the parent function.
+ if (FilteredLines.empty() ||
+ FilteredLines.back().getFileNum() != IA.File ||
+ FilteredLines.back().getLine() != IA.Line ||
+ FilteredLines.back().getColumn() != IA.Col) {
+ FilteredLines.push_back(MCCVLoc(MCCVLines[Idx].getLabel(), FuncId,
+ IA.File, IA.Line, IA.Col, false,
+ false));
}
}
}
@@ -316,6 +319,26 @@ std::pair<size_t, size_t> CodeViewContext::getLineExtent(unsigned FuncId) {
return I->second;
}
+std::pair<size_t, size_t>
+CodeViewContext::getLineExtentIncludingInlinees(unsigned FuncId) {
+ size_t LocBegin;
+ size_t LocEnd;
+ std::tie(LocBegin, LocEnd) = getLineExtent(FuncId);
+
+ // Include all child inline call sites in our extent.
+ MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);
+ if (SiteInfo) {
+ for (auto &KV : SiteInfo->InlinedAtMap) {
+ unsigned ChildId = KV.first;
+ auto Extent = getLineExtent(ChildId);
+ LocBegin = std::min(LocBegin, Extent.first);
+ LocEnd = std::max(LocEnd, Extent.second);
+ }
+ }
+
+ return {LocBegin, LocEnd};
+}
+
ArrayRef<MCCVLoc> CodeViewContext::getLinesForExtent(size_t L, size_t R) {
if (R <= L)
return std::nullopt;
@@ -463,16 +486,7 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
MCCVInlineLineTableFragment &Frag) {
size_t LocBegin;
size_t LocEnd;
- std::tie(LocBegin, LocEnd) = getLineExtent(Frag.SiteFuncId);
-
- // Include all child inline call sites in our .cv_loc extent.
- MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(Frag.SiteFuncId);
- for (auto &KV : SiteInfo->InlinedAtMap) {
- unsigned ChildId = KV.first;
- auto Extent = getLineExtent(ChildId);
- LocBegin = std::min(LocBegin, Extent.first);
- LocEnd = std::max(LocEnd, Extent.second);
- }
+ std::tie(LocBegin, LocEnd) = getLineExtentIncludingInlinees(Frag.SiteFuncId);
if (LocBegin >= LocEnd)
return;
@@ -507,6 +521,8 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
LastSourceLoc.File = Frag.StartFileId;
LastSourceLoc.Line = Frag.StartLineNum;
+ MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(Frag.SiteFuncId);
+
SmallVectorImpl<char> &Buffer = Frag.getContents();
Buffer.clear(); // Clear old contents if we went through relaxation.
for (const MCCVLoc &Loc : Locs) {
@@ -638,7 +654,7 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
}
unsigned NumGaps = J - I - 1;
- support::endian::Writer LEWriter(OS, support::little);
+ support::endian::Writer LEWriter(OS, llvm::endianness::little);
unsigned Bias = 0;
// We must split the range into chunks of MaxDefRange, this is a fundamental
diff --git a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
index c443f46e0242..6e72b5062a1d 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCContext.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
@@ -272,7 +271,7 @@ MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix,
// label, if used.
bool IsTemporary = CanBeUnnamed;
if (AllowTemporaryLabels && !IsTemporary)
- IsTemporary = Name.startswith(MAI->getPrivateGlobalPrefix());
+ IsTemporary = Name.starts_with(MAI->getPrivateGlobalPrefix());
SmallString<128> NewName = Name;
bool AddSuffix = AlwaysAddSuffix;
@@ -383,8 +382,8 @@ MCContext::createXCOFFSymbolImpl(const StringMapEntry<bool> *Name,
return new (nullptr, *this) MCSymbolXCOFF(nullptr, IsTemporary);
StringRef OriginalName = Name->first();
- if (OriginalName.startswith("._Renamed..") ||
- OriginalName.startswith("_Renamed.."))
+ if (OriginalName.starts_with("._Renamed..") ||
+ OriginalName.starts_with("_Renamed.."))
reportError(SMLoc(), "invalid symbol name from source");
if (MAI->isValidUnquotedName(OriginalName))
@@ -592,7 +591,7 @@ MCSectionELF *MCContext::getELFSection(const Twine &Section, unsigned Type,
.StartsWith(".gnu.linkonce.td.", SectionKind::getThreadData())
.StartsWith(".llvm.linkonce.td.", SectionKind::getThreadData())
.StartsWith(".debug_", SectionKind::getMetadata())
- .Default(SectionKind::getText());
+ .Default(SectionKind::getReadOnly());
MCSectionELF *Result =
createELFSectionImpl(CachedName, Type, Flags, Kind, EntrySize, GroupSym,
@@ -629,8 +628,8 @@ void MCContext::recordELFMergeableSectionInfo(StringRef SectionName,
}
bool MCContext::isELFImplicitMergeableSectionNamePrefix(StringRef SectionName) {
- return SectionName.startswith(".rodata.str") ||
- SectionName.startswith(".rodata.cst");
+ return SectionName.starts_with(".rodata.str") ||
+ SectionName.starts_with(".rodata.cst");
}
bool MCContext::isELFGenericMergeableSection(StringRef SectionName) {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCDXContainerWriter.cpp b/contrib/llvm-project/llvm/lib/MC/MCDXContainerWriter.cpp
index 028bfe6e79a1..0580dc7e4282 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCDXContainerWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCDXContainerWriter.cpp
@@ -30,7 +30,7 @@ class DXContainerObjectWriter : public MCObjectWriter {
public:
DXContainerObjectWriter(std::unique_ptr<MCDXContainerTargetWriter> MOTW,
raw_pwrite_stream &OS)
- : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {}
+ : W(OS, llvm::endianness::little), TargetObjectWriter(std::move(MOTW)) {}
~DXContainerObjectWriter() override {}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index 067b951fbfcc..5e5a163c2902 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -180,12 +180,13 @@ static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode());
unsigned SCClass = Desc.getSchedClass();
- int Latency = 0;
- for (unsigned OpIdx = 0, OpIdxEnd = Inst.getNumOperands(); OpIdx != OpIdxEnd;
- ++OpIdx)
- Latency = std::max(Latency, IID.getOperandCycle(SCClass, OpIdx));
+ unsigned Latency = 0;
- return Latency;
+ for (unsigned Idx = 0, IdxEnd = Inst.getNumOperands(); Idx != IdxEnd; ++Idx)
+ if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
+ Latency = std::max(Latency, *OperCycle);
+
+ return (int)Latency;
}
/// Gets latency information for \p Inst, based on \p DC information.
diff --git a/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp
index 55632f2fe76a..d0face9140de 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp
@@ -386,7 +386,7 @@ void MCDwarfLineTableHeader::emitV2FileDirTables(MCStreamer *MCOS) const {
}
static void emitOneV5FileEntry(MCStreamer *MCOS, const MCDwarfFile &DwarfFile,
- bool EmitMD5, bool HasSource,
+ bool EmitMD5, bool HasAnySource,
std::optional<MCDwarfLineStr> &LineStr) {
assert(!DwarfFile.Name.empty());
if (LineStr)
@@ -401,7 +401,7 @@ static void emitOneV5FileEntry(MCStreamer *MCOS, const MCDwarfFile &DwarfFile,
MCOS->emitBinaryData(
StringRef(reinterpret_cast<const char *>(Cksum.data()), Cksum.size()));
}
- if (HasSource) {
+ if (HasAnySource) {
if (LineStr)
LineStr->emitRef(MCOS, DwarfFile.Source.value_or(StringRef()));
else {
@@ -452,7 +452,7 @@ void MCDwarfLineTableHeader::emitV5FileDirTables(
uint64_t Entries = 2;
if (HasAllMD5)
Entries += 1;
- if (HasSource)
+ if (HasAnySource)
Entries += 1;
MCOS->emitInt8(Entries);
MCOS->emitULEB128IntValue(dwarf::DW_LNCT_path);
@@ -464,7 +464,7 @@ void MCDwarfLineTableHeader::emitV5FileDirTables(
MCOS->emitULEB128IntValue(dwarf::DW_LNCT_MD5);
MCOS->emitULEB128IntValue(dwarf::DW_FORM_data16);
}
- if (HasSource) {
+ if (HasAnySource) {
MCOS->emitULEB128IntValue(dwarf::DW_LNCT_LLVM_source);
MCOS->emitULEB128IntValue(LineStr ? dwarf::DW_FORM_line_strp
: dwarf::DW_FORM_string);
@@ -479,9 +479,9 @@ void MCDwarfLineTableHeader::emitV5FileDirTables(
assert((!RootFile.Name.empty() || MCDwarfFiles.size() >= 1) &&
"No root file and no .file directives");
emitOneV5FileEntry(MCOS, RootFile.Name.empty() ? MCDwarfFiles[1] : RootFile,
- HasAllMD5, HasSource, LineStr);
+ HasAllMD5, HasAnySource, LineStr);
for (unsigned i = 1; i < MCDwarfFiles.size(); ++i)
- emitOneV5FileEntry(MCOS, MCDwarfFiles[i], HasAllMD5, HasSource, LineStr);
+ emitOneV5FileEntry(MCOS, MCDwarfFiles[i], HasAllMD5, HasAnySource, LineStr);
}
std::pair<MCSymbol *, MCSymbol *>
@@ -598,7 +598,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory, StringRef &FileName,
// If any files have embedded source, they all must.
if (MCDwarfFiles.empty()) {
trackMD5Usage(Checksum.has_value());
- HasSource = (Source != std::nullopt);
+ HasAnySource |= Source.has_value();
}
if (DwarfVersion >= 5 && isRootFile(RootFile, Directory, FileName, Checksum))
return 0;
@@ -625,11 +625,6 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory, StringRef &FileName,
return make_error<StringError>("file number already allocated",
inconvertibleErrorCode());
- // If any files have embedded source, they all must.
- if (HasSource != (Source != std::nullopt))
- return make_error<StringError>("inconsistent use of embedded source",
- inconvertibleErrorCode());
-
if (Directory.empty()) {
// Separate the directory part from the basename of the FileName.
StringRef tFileName = sys::path::filename(FileName);
@@ -662,8 +657,8 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory, StringRef &FileName,
File.Checksum = Checksum;
trackMD5Usage(Checksum.has_value());
File.Source = Source;
- if (Source)
- HasSource = true;
+ if (Source.has_value())
+ HasAnySource = true;
// return the allocated FileNumber.
return FileNumber;
@@ -1216,7 +1211,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
// The dwarf label's name does not have the symbol name's leading
// underbar if any.
StringRef Name = Symbol->getName();
- if (Name.startswith("_"))
+ if (Name.starts_with("_"))
Name = Name.substr(1, Name.size()-1);
// Get the dwarf file number to be used for the dwarf label.
@@ -1940,8 +1935,9 @@ void MCDwarfFrameEmitter::encodeAdvanceLoc(MCContext &Context,
if (AddrDelta == 0)
return;
- support::endianness E =
- Context.getAsmInfo()->isLittleEndian() ? support::little : support::big;
+ llvm::endianness E = Context.getAsmInfo()->isLittleEndian()
+ ? llvm::endianness::little
+ : llvm::endianness::big;
if (isUIntN(6, AddrDelta)) {
uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm-project/llvm/lib/MC/MCELFObjectTargetWriter.cpp
index a81eab9ca296..c35e1f26dc1e 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCELFObjectTargetWriter.cpp
@@ -17,7 +17,8 @@ MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_,
: OSABI(OSABI_), ABIVersion(ABIVersion_), EMachine(EMachine_),
HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {}
-bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCValue &,
+ const MCSymbol &,
unsigned Type) const {
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
index 653ff4e9435a..e541090769e9 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
@@ -501,7 +501,6 @@ void MCELFStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE,
SRE->getLoc());
}
const MCConstantExpr *MCOffset = MCConstantExpr::create(Offset, getContext());
- MCObjectStreamer::visitUsedExpr(*SRE);
if (std::optional<std::pair<bool, std::string>> Err =
MCObjectStreamer::emitRelocDirective(
*MCOffset, "BFD_RELOC_NONE", SRE, SRE->getLoc(),
diff --git a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
index a7b980553af0..73e6569f96e4 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
@@ -327,6 +327,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
return "gd";
case VK_PPC_AIX_TLSGDM:
return "m";
+ case VK_PPC_AIX_TLSIE:
+ return "ie";
case VK_PPC_AIX_TLSLE:
return "le";
case VK_PPC_GOT_TLSLD: return "got@tlsld";
@@ -611,11 +613,6 @@ static void AttemptToFoldSymbolOffsetDifference(
if (Asm->isThumbFunc(&SA))
Addend |= 1;
- // If symbol is labeled as micromips, we set low-bit to ensure
- // correct offset in .gcc_except_table
- if (Asm->getBackend().isMicroMips(&SA))
- Addend |= 1;
-
// Clear the symbol expr pointers to indicate we have folded these
// operands.
A = B = nullptr;
@@ -743,17 +740,24 @@ static void AttemptToFoldSymbolOffsetDifference(
/// They might look redundant, but this function can be used before layout
/// is done (see the object streamer for example) and having the Asm argument
/// lets us avoid relaxations early.
-static bool
-EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout,
- const SectionAddrMap *Addrs, bool InSet, const MCValue &LHS,
- const MCSymbolRefExpr *RHS_A, const MCSymbolRefExpr *RHS_B,
- int64_t RHS_Cst, MCValue &Res) {
+static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs, bool InSet,
+ const MCValue &LHS, const MCValue &RHS,
+ MCValue &Res) {
// FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
// about dealing with modifiers. This will ultimately bite us, one day.
const MCSymbolRefExpr *LHS_A = LHS.getSymA();
const MCSymbolRefExpr *LHS_B = LHS.getSymB();
int64_t LHS_Cst = LHS.getConstant();
+ const MCSymbolRefExpr *RHS_A = RHS.getSymA();
+ const MCSymbolRefExpr *RHS_B = RHS.getSymB();
+ int64_t RHS_Cst = RHS.getConstant();
+
+ if (LHS.getRefKind() != RHS.getRefKind())
+ return false;
+
// Fold the result constant immediately.
int64_t Result_Cst = LHS_Cst + RHS_Cst;
@@ -962,14 +966,19 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
case MCBinaryExpr::Sub:
// Negate RHS and add.
// The cast avoids undefined behavior if the constant is INT64_MIN.
- return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
- RHSValue.getSymB(), RHSValue.getSymA(),
- -(uint64_t)RHSValue.getConstant(), Res);
+ return EvaluateSymbolicAdd(
+ Asm, Layout, Addrs, InSet, LHSValue,
+ MCValue::get(RHSValue.getSymB(), RHSValue.getSymA(),
+ -(uint64_t)RHSValue.getConstant(),
+ RHSValue.getRefKind()),
+ Res);
case MCBinaryExpr::Add:
- return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
- RHSValue.getSymA(), RHSValue.getSymB(),
- RHSValue.getConstant(), Res);
+ return EvaluateSymbolicAdd(
+ Asm, Layout, Addrs, InSet, LHSValue,
+ MCValue::get(RHSValue.getSymA(), RHSValue.getSymB(),
+ RHSValue.getConstant(), RHSValue.getRefKind()),
+ Res);
}
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCGOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCGOFFStreamer.cpp
new file mode 100644
index 000000000000..58d13c9f3788
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/MC/MCGOFFStreamer.cpp
@@ -0,0 +1,34 @@
+//===- lib/MC/MCGOFFStreamer.cpp - GOFF Object Output ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits GOFF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCGOFFStreamer.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/TargetRegistry.h"
+
+using namespace llvm;
+
+MCGOFFStreamer::~MCGOFFStreamer() {}
+
+MCStreamer *llvm::createGOFFStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&CE,
+ bool RelaxAll) {
+ MCGOFFStreamer *S =
+ new MCGOFFStreamer(Context, std::move(MAB), std::move(OW), std::move(CE));
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp
index 27719c8135ec..e4faeba04a8f 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp
@@ -170,14 +170,6 @@ const char *MCInstPrinter::matchAliasPatterns(const MCInst *MI,
return M.AsmStrings.data() + AsmStrOffset;
}
-/// Utility functions to make adding mark ups simpler.
-StringRef MCInstPrinter::markup(StringRef s) const {
- if (getUseMarkup())
- return s;
- else
- return "";
-}
-
// For asm-style hex (e.g. 0ffh) the first digit always has to be a number.
static bool needsLeadingZero(uint64_t Value)
{
@@ -231,3 +223,53 @@ format_object<uint64_t> MCInstPrinter::formatHex(uint64_t Value) const {
}
llvm_unreachable("unsupported print style");
}
+
+MCInstPrinter::WithMarkup MCInstPrinter::markup(raw_ostream &OS,
+ Markup S) const {
+ return WithMarkup(OS, S, getUseMarkup(), getUseColor());
+}
+
+MCInstPrinter::WithMarkup::WithMarkup(raw_ostream &OS, Markup M,
+ bool EnableMarkup, bool EnableColor)
+ : OS(OS), EnableMarkup(EnableMarkup), EnableColor(EnableColor) {
+ if (EnableColor) {
+ switch (M) {
+ case Markup::Immediate:
+ OS.changeColor(raw_ostream::RED);
+ break;
+ case Markup::Register:
+ OS.changeColor(raw_ostream::CYAN);
+ break;
+ case Markup::Target:
+ OS.changeColor(raw_ostream::YELLOW);
+ break;
+ case Markup::Memory:
+ OS.changeColor(raw_ostream::GREEN);
+ break;
+ }
+ }
+
+ if (EnableMarkup) {
+ switch (M) {
+ case Markup::Immediate:
+ OS << "<imm:";
+ break;
+ case Markup::Register:
+ OS << "<reg:";
+ break;
+ case Markup::Target:
+ OS << "<target:";
+ break;
+ case Markup::Memory:
+ OS << "<mem:";
+ break;
+ }
+ }
+}
+
+MCInstPrinter::WithMarkup::~WithMarkup() {
+ if (EnableMarkup)
+ OS << '>';
+ if (EnableColor)
+ OS.resetColor();
+}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp
index 0b5109e41e71..a79759557b2e 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -547,8 +547,13 @@ void MCObjectFileInfo::initGOFFMCObjectFileInfo(const Triple &T) {
PPA1Section =
Ctx->getGOFFSection(".ppa1", SectionKind::getMetadata(), TextSection,
MCConstantExpr::create(GOFF::SK_PPA1, *Ctx));
+ PPA2Section =
+ Ctx->getGOFFSection(".ppa2", SectionKind::getMetadata(), TextSection,
+ MCConstantExpr::create(GOFF::SK_PPA2, *Ctx));
ADASection =
Ctx->getGOFFSection(".ada", SectionKind::getData(), nullptr, nullptr);
+ IDRLSection =
+ Ctx->getGOFFSection("B_IDRL", SectionKind::getData(), nullptr, nullptr);
}
void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
@@ -928,10 +933,16 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
// the ABI or object file format, but various tools rely on the section
// name being empty (considering named symbols to be "user symbol names").
TextSection = Ctx->getXCOFFSection(
- "", SectionKind::getText(),
+ "..text..", // Use a non-null name to work around an AIX assembler bug...
+ SectionKind::getText(),
XCOFF::CsectProperties(XCOFF::StorageMappingClass::XMC_PR, XCOFF::XTY_SD),
/* MultiSymbolsAllowed*/ true);
+ // ... but use a null name when generating the symbol table.
+ MCSectionXCOFF *TS = static_cast<MCSectionXCOFF *>(TextSection);
+ TS->getQualNameSymbol()->setSymbolTableName("");
+ TS->setSymbolTableName("");
+
DataSection = Ctx->getXCOFFSection(
".data", SectionKind::getData(),
XCOFF::CsectProperties(XCOFF::StorageMappingClass::XMC_RW, XCOFF::XTY_SD),
diff --git a/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp
index 3cf7b4359cab..d11ccfb5e269 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp
@@ -797,8 +797,9 @@ MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
return std::make_pair(true, std::string("unknown relocation name"));
MCFixupKind Kind = *MaybeKind;
-
- if (Expr == nullptr)
+ if (Expr)
+ visitUsedExpr(*Expr);
+ else
Expr =
MCSymbolRefExpr::create(getContext().createTempSymbol(), getContext());
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp
index f13549b24e2d..e08404ae0ad9 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -32,7 +32,7 @@
using namespace llvm;
AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
- AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
+ AllowAtInIdentifier = !StringRef(MAI.getCommentString()).starts_with("@");
LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers();
}
@@ -605,7 +605,7 @@ AsmToken AsmLexer::LexSingleQuote() {
StringRef Res = StringRef(TokStart,CurPtr - TokStart);
long long Value;
- if (Res.startswith("\'\\")) {
+ if (Res.starts_with("\'\\")) {
char theChar = Res[2];
switch (theChar) {
default: Value = theChar; break;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp
index 04590ed57a9f..8e508dbdb1c6 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -125,6 +125,7 @@ private:
void *SavedDiagContext;
std::unique_ptr<MCAsmParserExtension> PlatformParser;
SMLoc StartTokLoc;
+ std::optional<SMLoc> CFIStartProcLoc;
/// This is the current buffer index we're lexing from as managed by the
/// SourceMgr object.
@@ -807,7 +808,7 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
PlatformParser.reset(createXCOFFAsmParser());
break;
case MCContext::IsDXContainer:
- llvm_unreachable("DXContainer is not supported yet");
+ report_fatal_error("DXContainer is not supported yet");
break;
}
@@ -1949,6 +1950,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
Lex();
}
+ if (MAI.hasSubsectionsViaSymbols() && CFIStartProcLoc && Sym->isExternal())
+ return Error(StartTokLoc, "non-private labels cannot appear between "
+ ".cfi_startproc / .cfi_endproc pairs") &&
+ Error(*CFIStartProcLoc, "previous .cfi_startproc was here");
+
if (discardLTOSymbol(IDVal))
return false;
@@ -1985,7 +1991,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
// Otherwise, we have a normal instruction or directive.
// Directives start with "."
- if (IDVal.startswith(".") && IDVal != ".") {
+ if (IDVal.starts_with(".") && IDVal != ".") {
// There are several entities interested in parsing directives:
//
// 1. The target-specific assembly parser. Some directives are target
@@ -3392,6 +3398,7 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
bool HasFillExpr = false;
int64_t FillExpr = 0;
int64_t MaxBytesToFill = 0;
+ SMLoc FillExprLoc;
auto parseAlign = [&]() -> bool {
if (parseAbsoluteExpression(Alignment))
@@ -3402,7 +3409,7 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
// .align 3,,4
if (getTok().isNot(AsmToken::Comma)) {
HasFillExpr = true;
- if (parseAbsoluteExpression(FillExpr))
+ if (parseTokenLoc(FillExprLoc) || parseAbsoluteExpression(FillExpr))
return true;
}
if (parseOptionalToken(AsmToken::Comma))
@@ -3451,6 +3458,17 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
}
}
+ if (HasFillExpr && FillExpr != 0) {
+ MCSection *Sec = getStreamer().getCurrentSectionOnly();
+ if (Sec && Sec->isVirtualSection()) {
+ ReturnVal |=
+ Warning(FillExprLoc, "ignoring non-zero fill value in " +
+ Sec->getVirtualSectionKind() + " section '" +
+ Sec->getName() + "'");
+ FillExpr = 0;
+ }
+ }
+
// Diagnose non-sensical max bytes to align.
if (MaxBytesLoc.isValid()) {
if (MaxBytesToFill < 1) {
@@ -4181,6 +4199,8 @@ bool AsmParser::parseDirectiveCFISections() {
/// parseDirectiveCFIStartProc
/// ::= .cfi_startproc [simple]
bool AsmParser::parseDirectiveCFIStartProc() {
+ CFIStartProcLoc = StartTokLoc;
+
StringRef Simple;
if (!parseOptionalToken(AsmToken::EndOfStatement)) {
if (check(parseIdentifier(Simple) || Simple != "simple",
@@ -4201,8 +4221,11 @@ bool AsmParser::parseDirectiveCFIStartProc() {
/// parseDirectiveCFIEndProc
/// ::= .cfi_endproc
bool AsmParser::parseDirectiveCFIEndProc() {
+ CFIStartProcLoc = std::nullopt;
+
if (parseEOL())
return true;
+
getStreamer().emitCFIEndProc();
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index 34aa5bf2ae39..8adb0dcddb16 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -250,7 +250,7 @@ bool COFFMasmParser::ParseDirectiveSegment(StringRef Directive, SMLoc Loc) {
SmallVector<char, 247> SectionNameVector;
StringRef Class;
- if (SegmentName == "_TEXT" || SegmentName.startswith("_TEXT$")) {
+ if (SegmentName == "_TEXT" || SegmentName.starts_with("_TEXT$")) {
if (SegmentName.size() == 5) {
SectionName = ".text";
} else {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 7c390041b369..edea5a56bec3 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -1167,14 +1167,13 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
return TokError("platform name expected");
unsigned Platform = StringSwitch<unsigned>(PlatformName)
- .Case("macos", MachO::PLATFORM_MACOS)
- .Case("ios", MachO::PLATFORM_IOS)
- .Case("tvos", MachO::PLATFORM_TVOS)
- .Case("watchos", MachO::PLATFORM_WATCHOS)
- .Case("macCatalyst", MachO::PLATFORM_MACCATALYST)
- .Case("driverkit", MachO::PLATFORM_DRIVERKIT)
- .Default(0);
- if (Platform == 0)
+#define PLATFORM(platform, id, name, build_name, target, tapi_target, \
+ marketing) \
+ .Case(#build_name, MachO::PLATFORM_##platform)
+#include "llvm/BinaryFormat/MachO.def"
+ .Default(MachO::PLATFORM_UNKNOWN);
+
+ if (Platform == MachO::PLATFORM_UNKNOWN)
return Error(PlatformLoc, "unknown platform name");
if (getLexer().isNot(AsmToken::Comma))
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index dbfe0d83e1b2..93e1d2f44b8c 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -531,7 +531,7 @@ static bool allowSectionTypeMismatch(const Triple &TT, StringRef SectionName,
// MIPS .debug_* sections should have SHT_MIPS_DWARF section type to
// distinguish among sections contain DWARF and ECOFF debug formats,
// but in assembly files these sections have SHT_PROGBITS type.
- return SectionName.startswith(".debug_") && Type == ELF::SHT_PROGBITS;
+ return SectionName.starts_with(".debug_") && Type == ELF::SHT_PROGBITS;
}
return false;
}
@@ -634,7 +634,7 @@ EndStmt:
unsigned Type = ELF::SHT_PROGBITS;
if (TypeName.empty()) {
- if (SectionName.startswith(".note"))
+ if (SectionName.starts_with(".note"))
Type = ELF::SHT_NOTE;
else if (hasPrefix(SectionName, ".init_array"))
Type = ELF::SHT_INIT_ARRAY;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp
index 307256ffaf45..51563ea86a6c 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -2117,7 +2117,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
// Treat ".<number>" as a valid identifier in this context.
IDVal = getTok().getString();
Lex(); // always eat a token
- if (!IDVal.startswith("."))
+ if (!IDVal.starts_with("."))
return Error(IDLoc, "unexpected token at start of statement");
} else if (parseIdentifier(IDVal, StartOfStatement)) {
if (!TheCondState.Ignore) {
@@ -6234,8 +6234,8 @@ bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
bool is_defined = false;
MCRegister Reg;
SMLoc StartLoc, EndLoc;
- is_defined = (getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc) ==
- MatchOperand_Success);
+ is_defined =
+ getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
if (!is_defined) {
StringRef Name;
if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
@@ -6354,8 +6354,8 @@ bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
bool is_defined = false;
MCRegister Reg;
SMLoc StartLoc, EndLoc;
- is_defined = (getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc) ==
- MatchOperand_Success);
+ is_defined =
+ getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
if (!is_defined) {
StringRef Name;
if (check(parseIdentifier(Name),
@@ -6526,8 +6526,8 @@ bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
bool IsDefined = false;
MCRegister Reg;
SMLoc StartLoc, EndLoc;
- IsDefined = (getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc) ==
- MatchOperand_Success);
+ IsDefined =
+ getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
if (!IsDefined) {
StringRef Name;
if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
@@ -7190,7 +7190,7 @@ bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
std::string Message = parseStringTo(AsmToken::EndOfStatement);
llvm::outs() << Message;
- if (!StringRef(Message).endswith("\n"))
+ if (!StringRef(Message).ends_with("\n"))
llvm::outs() << '\n';
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp b/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp
index caec98e9ea6a..eb3894dbb3c2 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFragment.h"
@@ -209,9 +210,18 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
void MCPseudoProbeSections::emit(MCObjectStreamer *MCOS) {
MCContext &Ctx = MCOS->getContext();
- for (auto &ProbeSec : MCProbeDivisions) {
- const auto *FuncSym = ProbeSec.first;
- const auto &Root = ProbeSec.second;
+ SmallVector<std::pair<MCSymbol *, MCPseudoProbeInlineTree *>> Vec;
+ Vec.reserve(MCProbeDivisions.size());
+ for (auto &ProbeSec : MCProbeDivisions)
+ Vec.emplace_back(ProbeSec.first, &ProbeSec.second);
+ for (auto I : llvm::enumerate(MCOS->getAssembler()))
+ I.value().setOrdinal(I.index());
+ llvm::sort(Vec, [](auto A, auto B) {
+ return A.first->getSection().getOrdinal() <
+ B.first->getSection().getOrdinal();
+ });
+ for (auto [FuncSym, RootPtr] : Vec) {
+ const auto &Root = *RootPtr;
if (auto *S = Ctx.getObjectFileInfo()->getPseudoProbeSection(
FuncSym->getSection())) {
// Switch to the .pseudoprobe section or a comdat group.
@@ -333,7 +343,7 @@ template <typename T> ErrorOr<T> MCPseudoProbeDecoder::readUnencodedNumber() {
if (Data + sizeof(T) > End) {
return std::error_code();
}
- T Val = endian::readNext<T, little, unaligned>(Data);
+ T Val = endian::readNext<T, llvm::endianness::little, unaligned>(Data);
return ErrorOr<T>(Val);
}
@@ -562,9 +572,8 @@ void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
}
void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) {
- std::vector<uint64_t> Addresses;
- for (auto Entry : Address2ProbesMap)
- Addresses.push_back(Entry.first);
+ auto Entries = make_first_range(Address2ProbesMap);
+ SmallVector<uint64_t, 0> Addresses(Entries.begin(), Entries.end());
llvm::sort(Addresses);
for (auto K : Addresses) {
OS << "Address:\t";
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp b/contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp
index 5a893b803fd0..4f7125864c5a 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSchedule.cpp
@@ -20,8 +20,8 @@
using namespace llvm;
-static_assert(std::is_pod<MCSchedModel>::value,
- "We shouldn't have a static constructor here");
+static_assert(std::is_trivial_v<MCSchedModel>,
+ "MCSchedModel is required to be a trivial type");
const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
DefaultMicroOpBufferSize,
DefaultLoopMicroOpBufferSize,
@@ -30,7 +30,7 @@ const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
DefaultMispredictPenalty,
false,
true,
- false /*EnableIntervals*/,
+ /*EnableIntervals=*/false,
0,
nullptr,
nullptr,
@@ -94,10 +94,10 @@ MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
const MCWriteProcResEntry *I = STI.getWriteProcResBegin(&SCDesc);
const MCWriteProcResEntry *E = STI.getWriteProcResEnd(&SCDesc);
for (; I != E; ++I) {
- if (!I->Cycles)
+ if (!I->ReleaseAtCycle)
continue;
unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits;
- double Temp = NumUnits * 1.0 / I->Cycles;
+ double Temp = NumUnits * 1.0 / I->ReleaseAtCycle;
Throughput = Throughput ? std::min(*Throughput, Temp) : Temp;
}
if (Throughput)
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp
index 666252ffcb74..95fdf3352207 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSectionELF.cpp
@@ -123,6 +123,9 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
} else if (Arch == Triple::hexagon) {
if (Flags & ELF::SHF_HEX_GPREL)
OS << 's';
+ } else if (Arch == Triple::x86_64) {
+ if (Flags & ELF::SHF_X86_64_LARGE)
+ OS << 'l';
}
OS << '"';
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp b/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp
index e13f30ccbbdc..02b1b972f339 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -42,6 +42,16 @@ void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
return;
}
+ if (getKind().isReadOnlyWithRel()) {
+ if (getMappingClass() != XCOFF::XMC_RW &&
+ getMappingClass() != XCOFF::XMC_RO &&
+ getMappingClass() != XCOFF::XMC_TD)
+ report_fatal_error(
+ "Unexepected storage-mapping class for ReadOnlyWithRel kind");
+ printCsectDirective(OS);
+ return;
+ }
+
// Initialized TLS data.
if (getKind().isThreadData()) {
// We only expect XMC_TL here for initialized TLS data.
@@ -72,8 +82,7 @@ void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
}
if (isCsect() && getMappingClass() == XCOFF::XMC_TD) {
- assert((getKind().isBSSExtern() || getKind().isBSSLocal() ||
- getKind().isReadOnlyWithRel()) &&
+ assert((getKind().isBSSExtern() || getKind().isBSSLocal()) &&
"Unexepected section kind for toc-data");
printCsectDirective(OS);
return;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp
index 7f9c0c3b0b8d..0062d0835314 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp
@@ -137,7 +137,7 @@ void MCStreamer::emitIntValue(uint64_t Value, unsigned Size) {
"Invalid size");
const bool IsLittleEndian = Context.getAsmInfo()->isLittleEndian();
uint64_t Swapped = support::endian::byte_swap(
- Value, IsLittleEndian ? support::little : support::big);
+ Value, IsLittleEndian ? llvm::endianness::little : llvm::endianness::big);
unsigned Index = IsLittleEndian ? 0 : 8 - Size;
emitBytes(StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size));
}
@@ -1190,10 +1190,7 @@ void MCStreamer::emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol,
}
void MCStreamer::emitXCOFFRenameDirective(const MCSymbol *Name,
- StringRef Rename) {
- llvm_unreachable("emitXCOFFRenameDirective is only supported on "
- "XCOFF targets");
-}
+ StringRef Rename) {}
void MCStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) {
llvm_unreachable("emitXCOFFRefDirective is only supported on XCOFF targets");
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp
index 8ee823e0377b..cf3aba17fc3d 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -214,7 +214,7 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU,
if (!TuneCPU.empty())
CPUSchedModel = &getSchedModelForCPU(TuneCPU);
else
- CPUSchedModel = &MCSchedModel::GetDefaultSchedModel();
+ CPUSchedModel = &MCSchedModel::Default;
}
void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef TuneCPU,
@@ -319,7 +319,7 @@ const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
errs() << "'" << CPU
<< "' is not a recognized processor for this target"
<< " (ignoring processor)\n";
- return MCSchedModel::GetDefaultSchedModel();
+ return MCSchedModel::Default;
}
assert(CPUEntry->SchedModel && "Missing processor SchedModel value");
return *CPUEntry->SchedModel;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp b/contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp
index 8fea8c7715bd..07c6e752cb61 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp
@@ -19,7 +19,7 @@ MCTargetOptions::MCTargetOptions()
PreserveAsmComments(true), Dwarf64(false),
EmitDwarfUnwind(EmitDwarfUnwindType::Default),
MCUseDwarfDirectory(DefaultDwarfDirectory),
- EmitCompactUnwindNonCanonical(false) {}
+ EmitCompactUnwindNonCanonical(false), PPCUseFullRegisterNames(false) {}
StringRef MCTargetOptions::getABIName() const {
return ABIName;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp
index bb3492bec8aa..bd5cf354659b 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCWin64EH.cpp
@@ -318,6 +318,7 @@ static void checkARM64Instructions(MCStreamer &Streamer,
case Win64EH::UOP_TrapFrame:
case Win64EH::UOP_PushMachFrame:
case Win64EH::UOP_Context:
+ case Win64EH::UOP_ECContext:
case Win64EH::UOP_ClearUnwoundToCall:
// Can't reason about these opcodes and how they map to actual
// instructions.
@@ -411,6 +412,9 @@ static uint32_t ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
case Win64EH::UOP_Context:
Count += 1;
break;
+ case Win64EH::UOP_ECContext:
+ Count += 1;
+ break;
case Win64EH::UOP_ClearUnwoundToCall:
Count += 1;
break;
@@ -593,6 +597,10 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer,
b = 0xEA;
streamer.emitInt8(b);
break;
+ case Win64EH::UOP_ECContext:
+ b = 0xEB;
+ streamer.emitInt8(b);
+ break;
case Win64EH::UOP_ClearUnwoundToCall:
b = 0xEC;
streamer.emitInt8(b);
@@ -1010,6 +1018,7 @@ static bool tryARM64PackedUnwind(WinEH::FrameInfo *info, uint32_t FuncLength,
return false;
case Win64EH::UOP_TrapFrame:
case Win64EH::UOP_Context:
+ case Win64EH::UOP_ECContext:
case Win64EH::UOP_ClearUnwoundToCall:
case Win64EH::UOP_PushMachFrame:
// These are special opcodes that aren't normally generated.
diff --git a/contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp
index 8585416cd081..458b4be61983 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -96,6 +96,13 @@ void MCXCOFFStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) {
DF->getFixups().push_back(Fixup);
}
+void MCXCOFFStreamer::emitXCOFFRenameDirective(const MCSymbol *Name,
+ StringRef Rename) {
+ const MCSymbolXCOFF *Symbol = cast<const MCSymbolXCOFF>(Name);
+ if (!Symbol->hasRename())
+ report_fatal_error("Only explicit .rename is supported for XCOFF.");
+}
+
void MCXCOFFStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol,
const MCSymbol *Trap,
unsigned Lang, unsigned Reason,
diff --git a/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp
index 6b263df92cbe..d17e6e125d87 100644
--- a/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp
@@ -630,7 +630,7 @@ void MachObjectWriter::computeSymbolTable(
// Set the Index and the IsExtern bit.
unsigned Index = Rel.Sym->getIndex();
assert(isInt<24>(Index));
- if (W.Endian == support::little)
+ if (W.Endian == llvm::endianness::little)
Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
else
Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
@@ -710,16 +710,6 @@ bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
return false;
return true;
}
- // For Darwin x86_64, there is one special case when the reference IsPCRel.
- // If the fragment with the reference does not have a base symbol but meets
- // the simple way of dealing with this, in that it is a temporary symbol in
- // the same atom then it is assumed to be fully resolved. This is needed so
- // a relocation entry is not created and so the static linker does not
- // mess up the reference later.
- else if(!FB.getAtom() &&
- SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
- return true;
- }
}
// If they are not in the same section, we can't compute the diff.
diff --git a/contrib/llvm-project/llvm/lib/MC/SPIRVObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/SPIRVObjectWriter.cpp
index cb49f5eeca8d..39856e96e9be 100644
--- a/contrib/llvm-project/llvm/lib/MC/SPIRVObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/SPIRVObjectWriter.cpp
@@ -24,7 +24,7 @@ class SPIRVObjectWriter : public MCObjectWriter {
public:
SPIRVObjectWriter(std::unique_ptr<MCSPIRVObjectTargetWriter> MOTW,
raw_pwrite_stream &OS)
- : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {}
+ : W(OS, llvm::endianness::little), TargetObjectWriter(std::move(MOTW)) {}
~SPIRVObjectWriter() override {}
diff --git a/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp b/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp
index bb948fe3d13b..df316bae98ce 100644
--- a/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/StringTableBuilder.cpp
@@ -41,6 +41,7 @@ void StringTableBuilder::initSize() {
case MachO:
case MachO64:
case ELF:
+ case DXContainer:
// Start the table with a NUL byte.
Size = 1;
break;
@@ -149,7 +150,7 @@ void StringTableBuilder::finalizeStringTable(bool Optimize) {
StringRef Previous;
for (StringPair *P : Strings) {
StringRef S = P->first.val();
- if (Previous.endswith(S)) {
+ if (Previous.ends_with(S)) {
size_t Pos = Size - S.size() - (K != RAW);
if (isAligned(Alignment, Pos)) {
P->second = Pos;
@@ -167,7 +168,7 @@ void StringTableBuilder::finalizeStringTable(bool Optimize) {
}
}
- if (K == MachO || K == MachOLinked)
+ if (K == MachO || K == MachOLinked || K == DXContainer)
Size = alignTo(Size, 4); // Pad to multiple of 4.
if (K == MachO64 || K == MachO64Linked)
Size = alignTo(Size, 8); // Pad to multiple of 8.
diff --git a/contrib/llvm-project/llvm/lib/MC/TargetRegistry.cpp b/contrib/llvm-project/llvm/lib/MC/TargetRegistry.cpp
index fa7aaccabcd6..0aa48916c7d2 100644
--- a/contrib/llvm-project/llvm/lib/MC/TargetRegistry.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/TargetRegistry.cpp
@@ -49,9 +49,8 @@ const Target *TargetRegistry::lookupTarget(StringRef ArchName,
std::string TempError;
TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), TempError);
if (!TheTarget) {
- Error = "unable to get target for '"
- + TheTriple.getTriple()
- + "', see --version and --triple.\n";
+ Error = "unable to get target for '" + TheTriple.getTriple() +
+ "', see --version and --triple.";
return nullptr;
}
}
diff --git a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
index 2b886449f052..fd48d5080ff6 100644
--- a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
@@ -193,7 +193,7 @@ static void patchI64(raw_pwrite_stream &Stream, uint64_t Value,
}
bool isDwoSection(const MCSection &Sec) {
- return Sec.getName().endswith(".dwo");
+ return Sec.getName().ends_with(".dwo");
}
class WasmObjectWriter : public MCObjectWriter {
@@ -529,7 +529,7 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
const auto *SymA = cast<MCSymbolWasm>(&RefA->getSymbol());
// The .init_array isn't translated as data, so don't do relocations in it.
- if (FixupSection.getName().startswith(".init_array")) {
+ if (FixupSection.getName().starts_with(".init_array")) {
SymA->setUsedInInitArray();
return;
}
@@ -550,7 +550,7 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
TargetObjectWriter->getRelocType(Target, Fixup, FixupSection, IsLocRel);
// Absolute offset within a section or a function.
- // Currently only supported for for metadata sections.
+ // Currently only supported for metadata sections.
// See: test/MC/WebAssembly/blockaddress.ll
if ((Type == wasm::R_WASM_FUNCTION_OFFSET_I32 ||
Type == wasm::R_WASM_FUNCTION_OFFSET_I64 ||
@@ -1438,12 +1438,12 @@ void WasmObjectWriter::prepareImports(
uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
- support::endian::Writer MainWriter(*OS, support::little);
+ support::endian::Writer MainWriter(*OS, llvm::endianness::little);
W = &MainWriter;
if (IsSplitDwarf) {
uint64_t TotalSize = writeOneObject(Asm, Layout, DwoMode::NonDwoOnly);
assert(DwoOS);
- support::endian::Writer DwoWriter(*DwoOS, support::little);
+ support::endian::Writer DwoWriter(*DwoOS, llvm::endianness::little);
W = &DwoWriter;
return TotalSize + writeOneObject(Asm, Layout, DwoMode::DwoOnly);
} else {
@@ -1491,7 +1491,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
<< Section.getGroup() << "\n";);
// .init_array sections are handled specially elsewhere.
- if (SectionName.startswith(".init_array"))
+ if (SectionName.starts_with(".init_array"))
continue;
// Code is handled separately
@@ -1526,7 +1526,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
StringRef Name = SectionName;
// For user-defined custom sections, strip the prefix
- if (Name.startswith(".custom_section."))
+ if (Name.starts_with(".custom_section."))
Name = Name.substr(strlen(".custom_section."));
MCSymbol *Begin = Sec.getBeginSymbol();
@@ -1851,9 +1851,9 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
// Translate .init_array section contents into start functions.
for (const MCSection &S : Asm) {
const auto &WS = static_cast<const MCSectionWasm &>(S);
- if (WS.getName().startswith(".fini_array"))
+ if (WS.getName().starts_with(".fini_array"))
report_fatal_error(".fini_array sections are unsupported");
- if (!WS.getName().startswith(".init_array"))
+ if (!WS.getName().starts_with(".init_array"))
continue;
if (WS.getFragmentList().empty())
continue;
diff --git a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
index c203280d2c10..f265fafa59e7 100644
--- a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -241,7 +241,7 @@ public:
} // end anonymous namespace
static bool isDwoSection(const MCSection &Sec) {
- return Sec.getName().endswith(".dwo");
+ return Sec.getName().ends_with(".dwo");
}
//------------------------------------------------------------------------------
@@ -260,7 +260,7 @@ void COFFSymbol::set_name_offset(uint32_t Offset) {
WinCOFFWriter::WinCOFFWriter(WinCOFFObjectWriter &OWriter,
raw_pwrite_stream &OS, DwoMode Mode)
- : OWriter(OWriter), W(OS, support::little), Mode(Mode) {
+ : OWriter(OWriter), W(OS, llvm::endianness::little), Mode(Mode) {
Header.Machine = OWriter.TargetObjectWriter->getMachine();
// Some relocations on ARM64 (the 21 bit ADRP relocations) have a slightly
// limited range for the immediate offset (+/- 1 MB); create extra offset
@@ -847,7 +847,9 @@ void WinCOFFWriter::executePostLayoutBinding(MCAssembler &Asm,
if (Mode != DwoOnly)
for (const MCSymbol &Symbol : Asm.symbols())
- if (!Symbol.isTemporary())
+ // Define non-temporary or temporary static (private-linkage) symbols
+ if (!Symbol.isTemporary() ||
+ cast<MCSymbolCOFF>(Symbol).getClass() == COFF::IMAGE_SYM_CLASS_STATIC)
DefineSymbol(Symbol, Asm, Layout);
}
@@ -909,7 +911,7 @@ void WinCOFFWriter::recordRelocation(MCAssembler &Asm,
Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
// Turn relocations for temporary symbols into section relocations.
- if (A.isTemporary()) {
+ if (A.isTemporary() && !SymbolMap[&A]) {
MCSection *TargetSection = &A.getSection();
assert(
SectionMap.contains(TargetSection) &&
diff --git a/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp
index 036210d6b0ef..343e2fc877bc 100644
--- a/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -448,7 +448,7 @@ public:
XCOFFObjectWriter::XCOFFObjectWriter(
std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
- : W(OS, support::big), TargetObjectWriter(std::move(MOTW)),
+ : W(OS, llvm::endianness::big), TargetObjectWriter(std::move(MOTW)),
Strings(StringTableBuilder::XCOFF),
Text(".text", XCOFF::STYP_TEXT, /* IsVirtual */ false,
CsectGroups{&ProgramCodeCsects, &ReadOnlyCsects}),
@@ -697,7 +697,8 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
const uint32_t Index = getIndex(SymA, SymASec);
if (Type == XCOFF::RelocationType::R_POS ||
Type == XCOFF::RelocationType::R_TLS ||
- Type == XCOFF::RelocationType::R_TLS_LE)
+ Type == XCOFF::RelocationType::R_TLS_LE ||
+ Type == XCOFF::RelocationType::R_TLS_IE)
// The FixedValue should be symbol's virtual address in this object file
// plus any constant value that we might get.
FixedValue = getVirtualAddress(SymA, SymASec) + Target.getConstant();
@@ -1562,7 +1563,7 @@ void XCOFFObjectWriter::writeSectionForControlSectionEntry(
}
// The size of the tail padding in a section is the end virtual address of
- // the current section minus the the end virtual address of the last csect
+ // the current section minus the end virtual address of the last csect
// in that section.
if (uint64_t PaddingSize =
CsectEntry.Address + CsectEntry.Size - CurrentAddressLocation) {
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index 393548dd5bd3..8d99695f4c29 100644
--- a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -346,7 +346,7 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
void ResourceManager::issueInstruction(
const InstrDesc &Desc,
- SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) {
+ SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &Pipes) {
for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
const CycleSegment &CS = R.second.CS;
if (!CS.size()) {
@@ -359,8 +359,8 @@ void ResourceManager::issueInstruction(
ResourceRef Pipe = selectPipe(R.first);
use(Pipe);
BusyResources[Pipe] += CS.size();
- Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
- Pipe, ResourceCycles(CS.size())));
+ Pipes.emplace_back(std::pair<ResourceRef, ReleaseAtCycles>(
+ Pipe, ReleaseAtCycles(CS.size())));
} else {
assert((llvm::popcount(R.first) > 1) && "Expected a group!");
// Mark this group as reserved.
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
index 31ea751f1c44..a9bbf6979919 100644
--- a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -69,7 +69,7 @@ Scheduler::Status Scheduler::isAvailable(const InstRef &IR) {
void Scheduler::issueInstructionImpl(
InstRef &IR,
- SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) {
+ SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &UsedResources) {
Instruction *IS = IR.getInstruction();
const InstrDesc &D = IS->getDesc();
@@ -98,7 +98,7 @@ void Scheduler::issueInstructionImpl(
// Release the buffered resources and issue the instruction.
void Scheduler::issueInstruction(
InstRef &IR,
- SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
+ SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &UsedResources,
SmallVectorImpl<InstRef> &PendingInstructions,
SmallVectorImpl<InstRef> &ReadyInstructions) {
const Instruction &Inst = *IR.getInstruction();
diff --git a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
index bddd370ea448..1a82e45763a2 100644
--- a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
@@ -69,7 +69,7 @@ static void initializeUsedResources(InstrDesc &ID,
for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
- if (!PRE->Cycles) {
+ if (!PRE->ReleaseAtCycle) {
#ifndef NDEBUG
WithColor::warning()
<< "Ignoring invalid write of zero cycles on processor resource "
@@ -89,11 +89,11 @@ static void initializeUsedResources(InstrDesc &ID,
AllInOrderResources &= (PR.BufferSize <= 1);
}
- CycleSegment RCy(0, PRE->Cycles, false);
+ CycleSegment RCy(0, PRE->ReleaseAtCycle, false);
Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
if (PR.SuperIdx) {
uint64_t Super = ProcResourceMasks[PR.SuperIdx];
- SuperResources[Super] += PRE->Cycles;
+ SuperResources[Super] += PRE->ReleaseAtCycle;
}
}
@@ -156,7 +156,7 @@ static void initializeUsedResources(InstrDesc &ID,
// is reserved. For example (on target x86; cpu Haswell):
//
// SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
- // let ResourceCycles = [2, 2, 3];
+ // let ReleaseAtCycles = [2, 2, 3];
// }
//
// This means:
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp
index 5c82ce780478..3a47ffe80391 100644
--- a/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/EntryStage.cpp
@@ -67,8 +67,7 @@ llvm::Error EntryStage::cycleResume() {
llvm::Error EntryStage::cycleEnd() {
// Find the first instruction which hasn't been retired.
- auto Range =
- make_range(Instructions.begin() + NumRetired, Instructions.end());
+ auto Range = drop_begin(Instructions, NumRetired);
auto It = find_if(Range, [](const std::unique_ptr<Instruction> &I) {
return !I->isRetired();
});
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp
index 6d36c4aa3533..7714d4ff8aed 100644
--- a/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp
@@ -196,7 +196,7 @@ Error ExecuteStage::execute(InstRef &IR) {
// Reserve a slot in each buffered resource. Also, mark units with
// BufferSize=0 as reserved. Resources with a buffer size of zero will only
- // be released after MCIS is issued, and all the ResourceCycles for those
+ // be released after MCIS is issued, and all the ReleaseAtCycles for those
// units have been consumed.
bool IsReadyInstruction = HWS.dispatch(IR);
const Instruction &Inst = *IR.getInstruction();
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
index a842b52dcd39..937cc7da8de7 100644
--- a/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
@@ -38,7 +38,7 @@ Error InstructionTables::execute(InstRef &IR) {
for (unsigned I = 0, E = NumUnits; I < E; ++I) {
ResourceRef ResourceUnit = std::make_pair(Index, 1U << I);
UsedResources.emplace_back(
- std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits)));
+ std::make_pair(ResourceUnit, ReleaseAtCycles(Cycles, NumUnits)));
}
continue;
}
@@ -53,7 +53,8 @@ Error InstructionTables::execute(InstRef &IR) {
for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) {
ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2);
UsedResources.emplace_back(std::make_pair(
- ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits)));
+ ResourceUnit,
+ ReleaseAtCycles(Cycles, NumUnits * SubUnit.NumUnits)));
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/MCA/Support.cpp b/contrib/llvm-project/llvm/lib/MCA/Support.cpp
index 517738c959fc..f8b8a2d129c1 100644
--- a/contrib/llvm-project/llvm/lib/MCA/Support.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/Support.cpp
@@ -21,7 +21,7 @@ namespace mca {
#define DEBUG_TYPE "llvm-mca"
-ResourceCycles &ResourceCycles::operator+=(const ResourceCycles &RHS) {
+ReleaseAtCycles &ReleaseAtCycles::operator+=(const ReleaseAtCycles &RHS) {
if (Denominator == RHS.Denominator)
Numerator += RHS.Numerator;
else {
@@ -92,18 +92,18 @@ double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
// The number of available resource units affects the resource pressure
// distribution, as well as how many blocks can be executed every cycle.
for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
- unsigned ResourceCycles = ProcResourceUsage[I];
- if (!ResourceCycles)
+ unsigned ReleaseAtCycles = ProcResourceUsage[I];
+ if (!ReleaseAtCycles)
continue;
const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
- double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits;
+ double Throughput = static_cast<double>(ReleaseAtCycles) / MCDesc.NumUnits;
Max = std::max(Max, Throughput);
}
// The block reciprocal throughput is computed as the MAX of:
// - (NumMicroOps / DispatchWidth)
- // - (NumUnits / ResourceCycles) for every consumed processor resource.
+ // - (NumUnits / ReleaseAtCycles) for every consumed processor resource.
return Max;
}
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/Archive.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/Archive.cpp
index 742ca0b890cf..b6da4dc1e239 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/Archive.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/Archive.cpp
@@ -60,8 +60,9 @@ createNewArchiveMembers(const MultiFormatConfig &Config, const Archive &Ar) {
// For thin archives it writes the archive file itself as well as its members.
static Error deepWriteArchive(StringRef ArcName,
ArrayRef<NewArchiveMember> NewMembers,
- bool WriteSymtab, object::Archive::Kind Kind,
- bool Deterministic, bool Thin) {
+ SymtabWritingMode WriteSymtab,
+ object::Archive::Kind Kind, bool Deterministic,
+ bool Thin) {
if (Kind == object::Archive::K_BSD && !NewMembers.empty() &&
NewMembers.front().detectKindFromObject() == object::Archive::K_DARWIN)
Kind = object::Archive::K_DARWIN;
@@ -102,8 +103,10 @@ Error executeObjcopyOnArchive(const MultiFormatConfig &Config,
return NewArchiveMembersOrErr.takeError();
const CommonConfig &CommonConfig = Config.getCommonConfig();
return deepWriteArchive(CommonConfig.OutputFilename, *NewArchiveMembersOrErr,
- Ar.hasSymbolTable(), Ar.kind(),
- CommonConfig.DeterministicArchives, Ar.isThin());
+ Ar.hasSymbolTable() ? SymtabWritingMode::NormalSymtab
+ : SymtabWritingMode::NoSymtab,
+ Ar.kind(), CommonConfig.DeterministicArchives,
+ Ar.isThin());
}
} // end namespace objcopy
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp
index 622726be8ce5..782d5b2f70c3 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp
@@ -29,7 +29,7 @@ using namespace object;
using namespace COFF;
static bool isDebugSection(const Section &Sec) {
- return Sec.Name.startswith(".debug");
+ return Sec.Name.starts_with(".debug");
}
static uint64_t getNextRVA(const Object &Obj) {
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/CommonConfig.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/CommonConfig.cpp
index e85715d0c44c..f44e70d996b2 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/CommonConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/CommonConfig.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/Support/Errc.h"
namespace llvm {
namespace objcopy {
@@ -38,6 +39,12 @@ NameOrPattern::create(StringRef Pattern, MatchStyle MS,
IsPositiveMatch);
}
case MatchStyle::Regex: {
+ Regex RegEx(Pattern);
+ std::string Err;
+ if (!RegEx.isValid(Err))
+ return createStringError(errc::invalid_argument,
+ "cannot compile regular expression \'" +
+ Pattern + "\': " + Err);
SmallVector<char, 32> Data;
return NameOrPattern(std::make_shared<Regex>(
("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data)));
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/ConfigManager.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/ConfigManager.cpp
index 5b8e2f5dc200..10ece49028f2 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/ConfigManager.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/ConfigManager.cpp
@@ -23,7 +23,8 @@ Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const {
Common.ExtractDWO || Common.PreserveDates || Common.StripDWO ||
Common.StripNonAlloc || Common.StripSections || Common.Weaken ||
Common.DecompressDebugSections ||
- Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty())
+ Common.DiscardMode == DiscardType::Locals ||
+ !Common.SymbolsToAdd.empty() || Common.GapFill != 0 || Common.PadTo != 0)
return createStringError(llvm::errc::invalid_argument,
"option is not supported for COFF");
@@ -34,15 +35,16 @@ Expected<const MachOConfig &> ConfigManager::getMachOConfig() const {
if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
!Common.AllocSectionsPrefix.empty() || !Common.KeepSection.empty() ||
!Common.SymbolsToGlobalize.empty() || !Common.SymbolsToKeep.empty() ||
- !Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() ||
+ !Common.SymbolsToLocalize.empty() ||
!Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() ||
!Common.UnneededSymbolsToRemove.empty() ||
!Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() ||
!Common.SetSectionType.empty() || Common.ExtractDWO ||
Common.PreserveDates || Common.StripAllGNU || Common.StripDWO ||
- Common.StripNonAlloc || Common.StripSections || Common.Weaken ||
+ Common.StripNonAlloc || Common.StripSections ||
Common.DecompressDebugSections || Common.StripUnneeded ||
- Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty())
+ Common.DiscardMode == DiscardType::Locals ||
+ !Common.SymbolsToAdd.empty() || Common.GapFill != 0 || Common.PadTo != 0)
return createStringError(llvm::errc::invalid_argument,
"option is not supported for MachO");
@@ -60,7 +62,8 @@ Expected<const WasmConfig &> ConfigManager::getWasmConfig() const {
!Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
!Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
!Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() ||
- !Common.SymbolsToRename.empty())
+ !Common.SymbolsToRename.empty() || Common.GapFill != 0 ||
+ Common.PadTo != 0)
return createStringError(llvm::errc::invalid_argument,
"only flags for section dumping, removal, and "
"addition are supported");
@@ -86,7 +89,8 @@ Expected<const XCOFFConfig &> ConfigManager::getXCOFFConfig() const {
Common.ExtractMainPartition || Common.OnlyKeepDebug ||
Common.PreserveDates || Common.StripAllGNU || Common.StripDWO ||
Common.StripDebug || Common.StripNonAlloc || Common.StripSections ||
- Common.Weaken || Common.StripUnneeded || Common.DecompressDebugSections) {
+ Common.Weaken || Common.StripUnneeded || Common.DecompressDebugSections ||
+ Common.GapFill != 0 || Common.PadTo != 0) {
return createStringError(
llvm::errc::invalid_argument,
"no flags are supported yet, only basic copying is allowed");
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index dfe843e1d4b7..daf03810fd7b 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -52,11 +52,11 @@ using namespace llvm::object;
using SectionPred = std::function<bool(const SectionBase &Sec)>;
static bool isDebugSection(const SectionBase &Sec) {
- return StringRef(Sec.Name).startswith(".debug") || Sec.Name == ".gdb_index";
+ return StringRef(Sec.Name).starts_with(".debug") || Sec.Name == ".gdb_index";
}
static bool isDWOSection(const SectionBase &Sec) {
- return StringRef(Sec.Name).endswith(".dwo");
+ return StringRef(Sec.Name).ends_with(".dwo");
}
static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) {
@@ -68,7 +68,8 @@ static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) {
return !isDWOSection(Sec);
}
-static uint64_t getNewShfFlags(SectionFlag AllFlags) {
+static Expected<uint64_t> getNewShfFlags(SectionFlag AllFlags,
+ uint16_t EMachine) {
uint64_t NewFlags = 0;
if (AllFlags & SectionFlag::SecAlloc)
NewFlags |= ELF::SHF_ALLOC;
@@ -82,18 +83,27 @@ static uint64_t getNewShfFlags(SectionFlag AllFlags) {
NewFlags |= ELF::SHF_STRINGS;
if (AllFlags & SectionFlag::SecExclude)
NewFlags |= ELF::SHF_EXCLUDE;
+ if (AllFlags & SectionFlag::SecLarge) {
+ if (EMachine != EM_X86_64)
+ return createStringError(errc::invalid_argument,
+ "section flag SHF_X86_64_LARGE can only be used "
+ "with x86_64 architecture");
+ NewFlags |= ELF::SHF_X86_64_LARGE;
+ }
return NewFlags;
}
static uint64_t getSectionFlagsPreserveMask(uint64_t OldFlags,
- uint64_t NewFlags) {
+ uint64_t NewFlags,
+ uint16_t EMachine) {
// Preserve some flags which should not be dropped when setting flags.
// Also, preserve anything OS/processor dependant.
const uint64_t PreserveMask =
(ELF::SHF_COMPRESSED | ELF::SHF_GROUP | ELF::SHF_LINK_ORDER |
ELF::SHF_MASKOS | ELF::SHF_MASKPROC | ELF::SHF_TLS |
ELF::SHF_INFO_LINK) &
- ~ELF::SHF_EXCLUDE;
+ ~ELF::SHF_EXCLUDE &
+ ~(EMachine == EM_X86_64 ? (uint64_t)ELF::SHF_X86_64_LARGE : 0UL);
return (OldFlags & PreserveMask) | (NewFlags & ~PreserveMask);
}
@@ -105,8 +115,12 @@ static void setSectionType(SectionBase &Sec, uint64_t Type) {
Sec.Type = Type;
}
-static void setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags) {
- Sec.Flags = getSectionFlagsPreserveMask(Sec.Flags, getNewShfFlags(Flags));
+static Error setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags,
+ uint16_t EMachine) {
+ Expected<uint64_t> NewFlags = getNewShfFlags(Flags, EMachine);
+ if (!NewFlags)
+ return NewFlags.takeError();
+ Sec.Flags = getSectionFlagsPreserveMask(Sec.Flags, *NewFlags, EMachine);
// In GNU objcopy, certain flags promote SHT_NOBITS to SHT_PROGBITS. This rule
// may promote more non-ALLOC sections than GNU objcopy, but it is fine as
@@ -115,6 +129,8 @@ static void setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags) {
(!(Sec.Flags & ELF::SHF_ALLOC) ||
Flags & (SectionFlag::SecContents | SectionFlag::SecLoad)))
setSectionType(Sec, ELF::SHT_PROGBITS);
+
+ return Error::success();
}
static ElfType getOutputElfType(const Binary &Bin) {
@@ -164,7 +180,7 @@ static std::unique_ptr<Writer> createWriter(const CommonConfig &Config,
ElfType OutputElfType) {
switch (Config.OutputFormat) {
case FileFormat::Binary:
- return std::make_unique<BinaryWriter>(Obj, Out);
+ return std::make_unique<BinaryWriter>(Obj, Out, Config);
case FileFormat::IHex:
return std::make_unique<IHexWriter>(Obj, Out);
default:
@@ -198,7 +214,7 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
static bool isCompressable(const SectionBase &Sec) {
return !(Sec.Flags & ELF::SHF_COMPRESSED) &&
- StringRef(Sec.Name).startswith(".debug");
+ StringRef(Sec.Name).starts_with(".debug");
}
static Error replaceDebugSections(
@@ -232,7 +248,7 @@ static bool isAArch64MappingSymbol(const Symbol &Sym) {
StringRef Name = Sym.Name;
if (!Name.consume_front("$x") && !Name.consume_front("$d"))
return false;
- return Name.empty() || Name.startswith(".");
+ return Name.empty() || Name.starts_with(".");
}
static bool isArmMappingSymbol(const Symbol &Sym) {
@@ -243,7 +259,7 @@ static bool isArmMappingSymbol(const Symbol &Sym) {
if (!Name.consume_front("$a") && !Name.consume_front("$d") &&
!Name.consume_front("$t"))
return false;
- return Name.empty() || Name.startswith(".");
+ return Name.empty() || Name.starts_with(".");
}
// Check if the symbol should be preserved because it is required by ABI.
@@ -345,7 +361,7 @@ static Error updateAndRemoveSymbols(const CommonConfig &Config,
if ((Config.DiscardMode == DiscardType::All ||
(Config.DiscardMode == DiscardType::Locals &&
- StringRef(Sym.Name).startswith(".L"))) &&
+ StringRef(Sym.Name).starts_with(".L"))) &&
Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF &&
Sym.Type != STT_FILE && Sym.Type != STT_SECTION)
return true;
@@ -432,7 +448,7 @@ static Error replaceAndRemoveSections(const CommonConfig &Config,
return true;
if (&Sec == Obj.SectionNames)
return false;
- if (StringRef(Sec.Name).startswith(".gnu.warning"))
+ if (StringRef(Sec.Name).starts_with(".gnu.warning"))
return false;
// We keep the .ARM.attribute section to maintain compatibility
// with Debian derived distributions. This is a bug in their
@@ -646,7 +662,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
OwnedDataSection &NewSection =
Obj.addSection<OwnedDataSection>(Name, Data);
- if (Name.startswith(".note") && Name != ".note.GNU-stack")
+ if (Name.starts_with(".note") && Name != ".note.GNU-stack")
NewSection.Type = SHT_NOTE;
return Error::success();
};
@@ -681,7 +697,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
const auto Iter = Config.SetSectionFlags.find(Sec.Name);
if (Iter != Config.SetSectionFlags.end()) {
const SectionFlagsUpdate &SFU = Iter->second;
- setSectionFlagsAndType(Sec, SFU.NewFlags);
+ if (Error E = setSectionFlagsAndType(Sec, SFU.NewFlags, Obj.Machine))
+ return E;
}
auto It2 = Config.SetSectionType.find(Sec.Name);
if (It2 != Config.SetSectionType.end())
@@ -698,8 +715,10 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
if (Iter != Config.SectionsToRename.end()) {
const SectionRename &SR = Iter->second;
Sec.Name = std::string(SR.NewName);
- if (SR.NewFlags)
- setSectionFlagsAndType(Sec, *SR.NewFlags);
+ if (SR.NewFlags) {
+ if (Error E = setSectionFlagsAndType(Sec, *SR.NewFlags, Obj.Machine))
+ return E;
+ }
RenamedSections.insert(&Sec);
} else if (RelocSec && !(Sec.Flags & SHF_ALLOC))
// Postpone processing relocation sections which are not specified in
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 697afab2a617..5352736bdcb9 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -1987,8 +1987,9 @@ template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
Ehdr.e_ident[EI_MAG2] = 'L';
Ehdr.e_ident[EI_MAG3] = 'F';
Ehdr.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32;
- Ehdr.e_ident[EI_DATA] =
- ELFT::TargetEndianness == support::big ? ELFDATA2MSB : ELFDATA2LSB;
+ Ehdr.e_ident[EI_DATA] = ELFT::TargetEndianness == llvm::endianness::big
+ ? ELFDATA2MSB
+ : ELFDATA2LSB;
Ehdr.e_ident[EI_VERSION] = EV_CURRENT;
Ehdr.e_ident[EI_OSABI] = Obj.OSABI;
Ehdr.e_ident[EI_ABIVERSION] = Obj.ABIVersion;
@@ -2089,7 +2090,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() {
Size);
}
- for (auto it : Obj.getUpdatedSections()) {
+ for (const auto &it : Obj.getUpdatedSections()) {
SectionBase *Sec = it.first;
ArrayRef<uint8_t> Data = it.second;
@@ -2635,9 +2636,36 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
}
Error BinaryWriter::write() {
- for (const SectionBase &Sec : Obj.allocSections())
+ SmallVector<const SectionBase *, 30> SectionsToWrite;
+ for (const SectionBase &Sec : Obj.allocSections()) {
+ if (Sec.Type != SHT_NOBITS)
+ SectionsToWrite.push_back(&Sec);
+ }
+
+ if (SectionsToWrite.empty())
+ return Error::success();
+
+ llvm::stable_sort(SectionsToWrite,
+ [](const SectionBase *LHS, const SectionBase *RHS) {
+ return LHS->Offset < RHS->Offset;
+ });
+
+ assert(SectionsToWrite.front()->Offset == 0);
+
+ for (size_t i = 0; i != SectionsToWrite.size(); ++i) {
+ const SectionBase &Sec = *SectionsToWrite[i];
if (Error Err = Sec.accept(*SecWriter))
return Err;
+ if (GapFill == 0)
+ continue;
+ uint64_t PadOffset = (i < SectionsToWrite.size() - 1)
+ ? SectionsToWrite[i + 1]->Offset
+ : Buf->getBufferSize();
+ assert(PadOffset <= Buf->getBufferSize());
+ assert(Sec.Offset + Sec.Size <= PadOffset);
+ std::fill(Buf->getBufferStart() + Sec.Offset + Sec.Size,
+ Buf->getBufferStart() + PadOffset, GapFill);
+ }
// TODO: Implement direct writing to the output stream (without intermediate
// memory buffer Buf).
@@ -2663,7 +2691,7 @@ Error BinaryWriter::finalize() {
// file size. This might not be the same as the offset returned by
// layoutSections, because we want to truncate the last segment to the end of
// its last non-empty section, to match GNU objcopy's behaviour.
- TotalSize = 0;
+ TotalSize = PadTo > MinAddr ? PadTo - MinAddr : 0;
for (SectionBase &Sec : Obj.allocSections())
if (Sec.Type != SHT_NOBITS && Sec.Size > 0) {
Sec.Offset = Sec.Addr - MinAddr;
@@ -2695,11 +2723,11 @@ uint64_t IHexWriter::writeEntryPointRecord(uint8_t *Buf) {
if (Obj.Entry <= 0xFFFFFU) {
Data[0] = ((Obj.Entry & 0xF0000U) >> 12) & 0xFF;
support::endian::write(&Data[2], static_cast<uint16_t>(Obj.Entry),
- support::big);
+ llvm::endianness::big);
HexData = IHexRecord::getLine(IHexRecord::StartAddr80x86, 0, Data);
} else {
support::endian::write(Data, static_cast<uint32_t>(Obj.Entry),
- support::big);
+ llvm::endianness::big);
HexData = IHexRecord::getLine(IHexRecord::StartAddr, 0, Data);
}
memcpy(Buf, HexData.data(), HexData.size());
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.h b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.h
index 89a03b3fe0ee..95bea0964eae 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -357,6 +357,8 @@ public:
class BinaryWriter : public Writer {
private:
+ const uint8_t GapFill;
+ const uint64_t PadTo;
std::unique_ptr<BinarySectionWriter> SecWriter;
uint64_t TotalSize = 0;
@@ -365,7 +367,8 @@ public:
~BinaryWriter() {}
Error finalize() override;
Error write() override;
- BinaryWriter(Object &Obj, raw_ostream &Out) : Writer(Obj, Out) {}
+ BinaryWriter(Object &Obj, raw_ostream &Out, const CommonConfig &Config)
+ : Writer(Obj, Out), GapFill(Config.GapFill), PadTo(Config.PadTo) {}
};
class IHexWriter : public Writer {
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp
index 067ef39d9052..a3d4ba3a94f7 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp
@@ -10,6 +10,7 @@
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SystemZ/zOSSupport.h"
using namespace llvm;
using namespace llvm::objcopy::macho;
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp
index e26b363df21c..91500c2d9dd4 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp
@@ -94,6 +94,14 @@ static void updateAndRemoveSymbols(const CommonConfig &Config,
const MachOConfig &MachOConfig,
Object &Obj) {
for (SymbolEntry &Sym : Obj.SymTable) {
+ // Weaken symbols first to match ELFObjcopy behavior.
+ bool IsExportedAndDefined =
+ (Sym.n_type & llvm::MachO::N_EXT) &&
+ (Sym.n_type & llvm::MachO::N_TYPE) != llvm::MachO::N_UNDF;
+ if (IsExportedAndDefined &&
+ (Config.Weaken || Config.SymbolsToWeaken.matches(Sym.Name)))
+ Sym.n_desc |= llvm::MachO::N_WEAK_DEF;
+
auto I = Config.SymbolsToRename.find(Sym.Name);
if (I != Config.SymbolsToRename.end())
Sym.Name = std::string(I->getValue());
@@ -490,10 +498,12 @@ Error objcopy::macho::executeObjcopyOnMachOUniversalBinary(
if (Kind == object::Archive::K_BSD)
Kind = object::Archive::K_DARWIN;
Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
- writeArchiveToBuffer(*NewArchiveMembersOrErr,
- (*ArOrErr)->hasSymbolTable(), Kind,
- Config.getCommonConfig().DeterministicArchives,
- (*ArOrErr)->isThin());
+ writeArchiveToBuffer(
+ *NewArchiveMembersOrErr,
+ (*ArOrErr)->hasSymbolTable() ? SymtabWritingMode::NormalSymtab
+ : SymtabWritingMode::NoSymtab,
+ Kind, Config.getCommonConfig().DeterministicArchives,
+ (*ArOrErr)->isThin());
if (!OutputBufferOrErr)
return OutputBufferOrErr.takeError();
Expected<std::unique_ptr<Binary>> BinaryOrErr =
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.cpp
index 9a4abadc8710..d593d6788e11 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.cpp
@@ -8,6 +8,7 @@
#include "MachOObject.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/SystemZ/zOSSupport.h"
#include <unordered_set>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.h b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.h
index 1cbd2eb5f320..b3303fd291c8 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.h
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.h
@@ -119,8 +119,8 @@ struct SymbolEntry {
}
bool isSwiftSymbol() const {
- return StringRef(Name).startswith("_$s") ||
- StringRef(Name).startswith("_$S");
+ return StringRef(Name).starts_with("_$s") ||
+ StringRef(Name).starts_with("_$S");
}
std::optional<uint32_t> section() const {
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOReader.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOReader.cpp
index 2cbffc12adbf..4549977c12c3 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOReader.cpp
@@ -11,6 +11,7 @@
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Object/MachO.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/SystemZ/zOSSupport.h"
#include <memory>
using namespace llvm;
@@ -67,7 +68,8 @@ Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
LoadCmd.C.cmdsize);
Curr < End; ++Curr) {
SectionType Sec;
- memcpy((void *)&Sec, Curr, sizeof(SectionType));
+ memcpy((void *)&Sec, reinterpret_cast<const char *>(Curr),
+ sizeof(SectionType));
if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
MachO::swapStruct(Sec);
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h b/contrib/llvm-project/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
index 54c7b5f3ccbe..8620548ed599 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
@@ -13,7 +13,6 @@
#include "XCOFFObject.h"
#include <cstdint>
-#include <vector>
namespace llvm {
namespace objcopy {
diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp
index e5af59f93280..5bba1dea9adf 100644
--- a/contrib/llvm-project/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp
@@ -22,11 +22,11 @@ using namespace object;
using SectionPred = std::function<bool(const Section &Sec)>;
static bool isDebugSection(const Section &Sec) {
- return Sec.Name.startswith(".debug");
+ return Sec.Name.starts_with(".debug");
}
static bool isLinkerSection(const Section &Sec) {
- return Sec.Name.startswith("reloc.") || Sec.Name == "linking";
+ return Sec.Name.starts_with("reloc.") || Sec.Name == "linking";
}
static bool isNameSection(const Section &Sec) { return Sec.Name == "name"; }
diff --git a/contrib/llvm-project/llvm/lib/Object/Archive.cpp b/contrib/llvm-project/llvm/lib/Object/Archive.cpp
index 9920145a2f3c..4ac4d727afb6 100644
--- a/contrib/llvm-project/llvm/lib/Object/Archive.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/Archive.cpp
@@ -227,7 +227,7 @@ Expected<StringRef> BigArchiveMemberHeader::getRawName() const {
StringRef NameTerminator = "`\n";
StringRef NameStringWithNameTerminator =
StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size());
- if (!NameStringWithNameTerminator.endswith(NameTerminator)) {
+ if (!NameStringWithNameTerminator.ends_with(NameTerminator)) {
uint64_t Offset =
reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) -
Parent->getData().data();
@@ -315,7 +315,7 @@ Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
return Parent->getStringTable().begin() + StringOffset;
}
- if (Name.startswith("#1/")) {
+ if (Name.starts_with("#1/")) {
uint64_t NameLength;
if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
std::string Buf;
@@ -524,7 +524,7 @@ Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
// The actual start of the file is after the name and any necessary
// even-alignment padding.
StartOfFile += ((Name.size() + 1) >> 1) << 1;
- } else if (Name.startswith("#1/")) {
+ } else if (Name.starts_with("#1/")) {
uint64_t NameSize;
StringRef RawNameSize = Name.substr(3).rtrim(' ');
if (RawNameSize.getAsInteger(10, NameSize)) {
@@ -671,7 +671,7 @@ Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
std::unique_ptr<Archive> Ret;
StringRef Buffer = Source.getBuffer();
- if (Buffer.startswith(BigArchiveMagic))
+ if (Buffer.starts_with(BigArchiveMagic))
Ret = std::make_unique<BigArchive>(Source, Err);
else
Ret = std::make_unique<Archive>(Source, Err);
@@ -711,11 +711,11 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
ErrorAsOutParameter ErrAsOutParam(&Err);
StringRef Buffer = Data.getBuffer();
// Check for sufficient magic.
- if (Buffer.startswith(ThinArchiveMagic)) {
+ if (Buffer.starts_with(ThinArchiveMagic)) {
IsThin = true;
- } else if (Buffer.startswith(ArchiveMagic)) {
+ } else if (Buffer.starts_with(ArchiveMagic)) {
IsThin = false;
- } else if (Buffer.startswith(BigArchiveMagic)) {
+ } else if (Buffer.starts_with(BigArchiveMagic)) {
Format = K_AIXBIG;
IsThin = false;
return;
@@ -800,7 +800,7 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
return;
}
- if (Name.startswith("#1/")) {
+ if (Name.starts_with("#1/")) {
Format = K_BSD;
// We know this is BSD, so getName will work since there is no string table.
Expected<StringRef> NameOrErr = C->getName();
@@ -1392,6 +1392,8 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
GlobSymtab32Loc, GlobSymtab32Size, "32-bit");
if (Err)
return;
+
+ Has32BitGlobalSymtab = true;
}
if (GlobSymtab64Offset) {
@@ -1400,6 +1402,8 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
GlobSymtab64Loc, GlobSymtab64Size, "64-bit");
if (Err)
return;
+
+ Has64BitGlobalSymtab = true;
}
SmallVector<GlobalSymtabInfo> SymtabInfos;
@@ -1417,7 +1421,7 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
// 64-bit global symbol tables, we need to merge them into a single table.
raw_string_ostream Out(MergedGlobalSymtabBuf);
uint64_t SymNum = SymtabInfos[0].SymNum + SymtabInfos[1].SymNum;
- write(Out, SymNum, support::big);
+ write(Out, SymNum, llvm::endianness::big);
// Merge symbol offset.
Out << SymtabInfos[0].SymbolOffsetTable;
Out << SymtabInfos[1].SymbolOffsetTable;
diff --git a/contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp
index d79a5c6bef30..2f70c9edd13e 100644
--- a/contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ArchiveWriter.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/MachO.h"
@@ -202,11 +203,12 @@ static bool isBSDLike(object::Archive::Kind Kind) {
template <class T>
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) {
support::endian::write(Out, Val,
- isBSDLike(Kind) ? support::little : support::big);
+ isBSDLike(Kind) ? llvm::endianness::little
+ : llvm::endianness::big);
}
template <class T> static void printLE(raw_ostream &Out, T Val) {
- support::endian::write(Out, Val, support::little);
+ support::endian::write(Out, Val, llvm::endianness::little);
}
static void printRestOfMemberHeader(
@@ -331,6 +333,8 @@ struct MemberData {
std::string Header;
StringRef Data;
StringRef Padding;
+ uint64_t PreHeadPadSize = 0;
+ std::unique_ptr<SymbolicFile> SymFile = nullptr;
};
} // namespace
@@ -496,21 +500,66 @@ getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) {
}
}
-static Expected<bool> is64BitSymbolicFile(const StringRef &ObjStringRef) {
- MemoryBufferRef ObjMbf(ObjStringRef, "");
- // In the scenario when LLVMContext is populated SymbolicFile will contain a
- // reference to it, thus SymbolicFile should be destroyed first.
- LLVMContext Context;
- Expected<std::unique_ptr<SymbolicFile>> ObjOrErr =
- getSymbolicFile(ObjMbf, Context);
- if (!ObjOrErr)
- return ObjOrErr.takeError();
+static bool is64BitSymbolicFile(const SymbolicFile *SymObj) {
+ return SymObj != nullptr ? SymObj->is64Bit() : false;
+}
- // Treat non-symbolic file types as not 64-bits.
- if (!*ObjOrErr)
- return false;
+// Log2 of PAGESIZE(4096) on an AIX system.
+static const uint32_t Log2OfAIXPageSize = 12;
+
+// In the AIX big archive format, since the data content follows the member file
+// name, if the name ends on an odd byte, an extra byte will be added for
+// padding. This ensures that the data within the member file starts at an even
+// byte.
+static const uint32_t MinBigArchiveMemDataAlign = 2;
+
+template <typename AuxiliaryHeader>
+uint16_t getAuxMaxAlignment(uint16_t AuxHeaderSize, AuxiliaryHeader *AuxHeader,
+ uint16_t Log2OfMaxAlign) {
+ // If the member doesn't have an auxiliary header, it isn't a loadable object
+ // and so it just needs aligning at the minimum value.
+ if (AuxHeader == nullptr)
+ return MinBigArchiveMemDataAlign;
+
+ // If the auxiliary header does not have both MaxAlignOfData and
+ // MaxAlignOfText field, it is not a loadable shared object file, so align at
+ // the minimum value. The 'ModuleType' member is located right after
+ // 'MaxAlignOfData' in the AuxiliaryHeader.
+ if (AuxHeaderSize < offsetof(AuxiliaryHeader, ModuleType))
+ return MinBigArchiveMemDataAlign;
+
+ // If the XCOFF object file does not have a loader section, it is not
+ // loadable, so align at the minimum value.
+ if (AuxHeader->SecNumOfLoader == 0)
+ return MinBigArchiveMemDataAlign;
+
+ // The content of the loadable member file needs to be aligned at MAX(maximum
+ // alignment of .text, maximum alignment of .data) if there are both fields.
+ // If the desired alignment is > PAGESIZE, 32-bit members are aligned on a
+ // word boundary, while 64-bit members are aligned on a PAGESIZE(2^12=4096)
+ // boundary.
+ uint16_t Log2OfAlign =
+ std::max(AuxHeader->MaxAlignOfText, AuxHeader->MaxAlignOfData);
+ return 1 << (Log2OfAlign > Log2OfAIXPageSize ? Log2OfMaxAlign : Log2OfAlign);
+}
- return (*ObjOrErr)->is64Bit();
+// AIX big archives may contain shared object members. The AIX OS requires these
+// members to be aligned if they are 64-bit and recommends it for 32-bit
+// members. This ensures that when these members are loaded they are aligned in
+// memory.
+static uint32_t getMemberAlignment(SymbolicFile *SymObj) {
+ XCOFFObjectFile *XCOFFObj = dyn_cast_or_null<XCOFFObjectFile>(SymObj);
+ if (!XCOFFObj)
+ return MinBigArchiveMemDataAlign;
+
+ // If the desired alignment is > PAGESIZE, 32-bit members are aligned on a
+ // word boundary, while 64-bit members are aligned on a PAGESIZE boundary.
+ return XCOFFObj->is64Bit()
+ ? getAuxMaxAlignment(XCOFFObj->fileHeader64()->AuxHeaderSize,
+ XCOFFObj->auxiliaryHeader64(),
+ Log2OfAIXPageSize)
+ : getAuxMaxAlignment(XCOFFObj->fileHeader32()->AuxHeaderSize,
+ XCOFFObj->auxiliaryHeader32(), 2);
}
static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
@@ -539,13 +588,8 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
uint64_t Pos = MembersOffset;
for (const MemberData &M : Members) {
if (isAIXBigArchive(Kind)) {
- Expected<bool> Is64BitOrErr = is64BitSymbolicFile(M.Data);
- // If there is an error, the error will have been emitted when
- // 'computeMemberData' called the 'getSymbol' function, so don't need to
- // handle it here.
- if (!Is64BitOrErr)
- cantFail(Is64BitOrErr.takeError());
- if (*Is64BitOrErr != Is64Bit) {
+ Pos += M.PreHeadPadSize;
+ if (is64BitSymbolicFile(M.SymFile.get()) != Is64Bit) {
Pos += M.Header.size() + M.Data.size() + M.Padding.size();
continue;
}
@@ -617,6 +661,10 @@ static bool isECObject(object::SymbolicFile &Obj) {
return cast<llvm::object::COFFObjectFile>(&Obj)->getMachine() !=
COFF::IMAGE_FILE_MACHINE_ARM64;
+ if (Obj.isCOFFImportFile())
+ return cast<llvm::object::COFFImportFile>(&Obj)->getMachine() !=
+ COFF::IMAGE_FILE_MACHINE_ARM64;
+
if (Obj.isIR()) {
Expected<std::string> TripleStr =
getBitcodeTargetTriple(Obj.getMemoryBufferRef());
@@ -629,29 +677,19 @@ static bool isECObject(object::SymbolicFile &Obj) {
return false;
}
-static Expected<std::vector<unsigned>>
-getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames,
- SymMap *SymMap, bool &HasObject) {
- // In the scenario when LLVMContext is populated SymbolicFile will contain a
- // reference to it, thus SymbolicFile should be destroyed first.
- LLVMContext Context;
-
+static Expected<std::vector<unsigned>> getSymbols(SymbolicFile *Obj,
+ uint16_t Index,
+ raw_ostream &SymNames,
+ SymMap *SymMap) {
std::vector<unsigned> Ret;
- Expected<std::unique_ptr<SymbolicFile>> ObjOrErr =
- getSymbolicFile(Buf, Context);
- if (!ObjOrErr)
- return ObjOrErr.takeError();
- // If the member is non-symbolic file, treat it as having no symbols.
- if (!*ObjOrErr)
+ if (Obj == nullptr)
return Ret;
- std::unique_ptr<object::SymbolicFile> Obj = std::move(*ObjOrErr);
-
std::map<std::string, uint16_t> *Map = nullptr;
if (SymMap)
Map = SymMap->UseECMap && isECObject(*Obj) ? &SymMap->ECMap : &SymMap->Map;
- HasObject = true;
+
for (const object::BasicSymbolRef &S : Obj->symbols()) {
if (!isArchiveSymbol(S))
continue;
@@ -680,10 +718,10 @@ getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames,
static Expected<std::vector<MemberData>>
computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
object::Archive::Kind Kind, bool Thin, bool Deterministic,
- bool NeedSymbols, SymMap *SymMap,
- ArrayRef<NewArchiveMember> NewMembers) {
+ SymtabWritingMode NeedSymbols, SymMap *SymMap,
+ LLVMContext &Context, ArrayRef<NewArchiveMember> NewMembers) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
-
+ uint64_t MemHeadPadSize = 0;
uint64_t Pos =
isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 0;
@@ -698,7 +736,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
// UniqueTimestamps is a special case to improve debugging on Darwin:
//
// The Darwin linker does not link debug info into the final
- // binary. Instead, it emits entries of type N_OSO in in the output
+ // binary. Instead, it emits entries of type N_OSO in the output
// binary's symbol table, containing references to the linked-in
// object files. Using that reference, the debugger can read the
// debug data directly from the object files. Alternatively, an
@@ -748,12 +786,16 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
// The big archive format needs to know the offset of the previous member
// header.
uint64_t PrevOffset = 0;
+ uint64_t NextMemHeadPadSize = 0;
+ std::unique_ptr<SymbolicFile> CurSymFile;
+ std::unique_ptr<SymbolicFile> NextSymFile;
uint16_t Index = 0;
- for (const NewArchiveMember &M : NewMembers) {
+
+ for (auto M = NewMembers.begin(); M < NewMembers.end(); ++M) {
std::string Header;
raw_string_ostream Out(Header);
- MemoryBufferRef Buf = M.Buf->getMemBufferRef();
+ MemoryBufferRef Buf = M->Buf->getMemBufferRef();
StringRef Data = Thin ? "" : Buf.getBuffer();
Index++;
@@ -771,48 +813,101 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
sys::TimePoint<std::chrono::seconds> ModTime;
if (UniqueTimestamps)
// Increment timestamp for each file of a given name.
- ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
+ ModTime = sys::toTimePoint(FilenameCount[M->MemberName]++);
else
- ModTime = M.ModTime;
+ ModTime = M->ModTime;
uint64_t Size = Buf.getBufferSize() + MemberPadding;
if (Size > object::Archive::MaxMemberSize) {
std::string StringMsg =
- "File " + M.MemberName.str() + " exceeds size limit";
+ "File " + M->MemberName.str() + " exceeds size limit";
return make_error<object::GenericBinaryError>(
std::move(StringMsg), object::object_error::parse_failed);
}
+ if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
+ auto SetNextSymFile = [&NextSymFile,
+ &Context](MemoryBufferRef Buf,
+ StringRef MemberName) -> Error {
+ Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
+ getSymbolicFile(Buf, Context);
+ if (!SymFileOrErr)
+ return createFileError(MemberName, SymFileOrErr.takeError());
+ NextSymFile = std::move(*SymFileOrErr);
+ return Error::success();
+ };
+
+ if (M == NewMembers.begin())
+ if (Error Err = SetNextSymFile(Buf, M->MemberName))
+ return std::move(Err);
+
+ CurSymFile = std::move(NextSymFile);
+
+ if ((M + 1) != NewMembers.end())
+ if (Error Err = SetNextSymFile((M + 1)->Buf->getMemBufferRef(),
+ (M + 1)->MemberName))
+ return std::move(Err);
+ }
+
+ // In the big archive file format, we need to calculate and include the next
+ // member offset and previous member offset in the file member header.
if (isAIXBigArchive(Kind)) {
+ uint64_t OffsetToMemData = Pos + sizeof(object::BigArMemHdrType) +
+ alignTo(M->MemberName.size(), 2);
+
+ if (M == NewMembers.begin())
+ NextMemHeadPadSize =
+ alignToPowerOf2(OffsetToMemData,
+ getMemberAlignment(CurSymFile.get())) -
+ OffsetToMemData;
+
+ MemHeadPadSize = NextMemHeadPadSize;
+ Pos += MemHeadPadSize;
uint64_t NextOffset = Pos + sizeof(object::BigArMemHdrType) +
- alignTo(M.MemberName.size(), 2) + alignTo(Size, 2);
- printBigArchiveMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID,
- M.Perms, Size, PrevOffset, NextOffset);
+ alignTo(M->MemberName.size(), 2) + alignTo(Size, 2);
+
+ // If there is another member file after this, we need to calculate the
+ // padding before the header.
+ if ((M + 1) != NewMembers.end()) {
+ uint64_t OffsetToNextMemData = NextOffset +
+ sizeof(object::BigArMemHdrType) +
+ alignTo((M + 1)->MemberName.size(), 2);
+ NextMemHeadPadSize =
+ alignToPowerOf2(OffsetToNextMemData,
+ getMemberAlignment(NextSymFile.get())) -
+ OffsetToNextMemData;
+ NextOffset += NextMemHeadPadSize;
+ }
+ printBigArchiveMemberHeader(Out, M->MemberName, ModTime, M->UID, M->GID,
+ M->Perms, Size, PrevOffset, NextOffset);
PrevOffset = Pos;
} else {
- printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M,
+ printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, *M,
ModTime, Size);
}
Out.flush();
std::vector<unsigned> Symbols;
- if (NeedSymbols) {
+ if (NeedSymbols != SymtabWritingMode::NoSymtab) {
Expected<std::vector<unsigned>> SymbolsOrErr =
- getSymbols(Buf, Index, SymNames, SymMap, HasObject);
+ getSymbols(CurSymFile.get(), Index, SymNames, SymMap);
if (!SymbolsOrErr)
- return createFileError(M.MemberName, SymbolsOrErr.takeError());
+ return createFileError(M->MemberName, SymbolsOrErr.takeError());
Symbols = std::move(*SymbolsOrErr);
+ if (CurSymFile)
+ HasObject = true;
}
Pos += Header.size() + Data.size() + Padding.size();
- Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding});
+ Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding,
+ MemHeadPadSize, std::move(CurSymFile)});
}
// If there are no symbols, emit an empty symbol table, to satisfy Solaris
// tools, older versions of which expect a symbol table in a non-empty
// archive, regardless of whether there are any symbols in it.
if (HasObject && SymNames.tell() == 0 && !isCOFFArchive(Kind))
SymNames << '\0' << '\0' << '\0';
- return Ret;
+ return std::move(Ret);
}
namespace llvm {
@@ -860,7 +955,8 @@ Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
static Error writeArchiveToStream(raw_ostream &Out,
ArrayRef<NewArchiveMember> NewMembers,
- bool WriteSymtab, object::Archive::Kind Kind,
+ SymtabWritingMode WriteSymtab,
+ object::Archive::Kind Kind,
bool Deterministic, bool Thin, bool IsEC) {
assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
@@ -875,10 +971,14 @@ static Error writeArchiveToStream(raw_ostream &Out,
if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe)
Kind = object::Archive::K_GNU;
+ // In the scenario when LLVMContext is populated SymbolicFile will contain a
+ // reference to it, thus SymbolicFile should be destroyed first.
+ LLVMContext Context;
+
SymMap.UseECMap = IsEC;
Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab,
- isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers);
+ isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers);
if (Error E = DataOrErr.takeError())
return E;
std::vector<MemberData> &Data = *DataOrErr;
@@ -897,9 +997,11 @@ static Error writeArchiveToStream(raw_ostream &Out,
uint64_t LastMemberHeaderOffset = 0;
uint64_t NumSyms = 0;
uint64_t NumSyms32 = 0; // Store symbol number of 32-bit member files.
+ bool ShouldWriteSymtab = WriteSymtab != SymtabWritingMode::NoSymtab;
for (const auto &M : Data) {
// Record the start of the member's offset
+ LastMemberEndOffset += M.PreHeadPadSize;
LastMemberHeaderOffset = LastMemberEndOffset;
// Account for the size of each part associated with the member.
LastMemberEndOffset += M.Header.size() + M.Data.size() + M.Padding.size();
@@ -910,21 +1012,17 @@ static Error writeArchiveToStream(raw_ostream &Out,
// symbols; the second global symbol table does the same for 64-bit file
// members. As a big archive can have both 32-bit and 64-bit file members,
// we need to know the number of symbols in each symbol table individually.
- if (isAIXBigArchive(Kind) && WriteSymtab) {
- Expected<bool> Is64BitOrErr = is64BitSymbolicFile(M.Data);
- if (Error E = Is64BitOrErr.takeError())
- return E;
-
- if (!*Is64BitOrErr)
- NumSyms32 += M.Symbols.size();
- }
+ if (isAIXBigArchive(Kind) && ShouldWriteSymtab) {
+ if (!is64BitSymbolicFile(M.SymFile.get()))
+ NumSyms32 += M.Symbols.size();
+ }
}
std::optional<uint64_t> HeadersSize;
// The symbol table is put at the end of the big archive file. The symbol
// table is at the start of the archive file for other archive formats.
- if (WriteSymtab && !is64BitKind(Kind)) {
+ if (ShouldWriteSymtab && !is64BitKind(Kind)) {
// We assume 32-bit offsets to see if 32-bit symbols are possible or not.
HeadersSize = computeHeadersSize(Kind, Data.size(), StringTableSize,
NumSyms, SymNamesBuf.size(),
@@ -962,7 +1060,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
Out << "!<arch>\n";
if (!isAIXBigArchive(Kind)) {
- if (WriteSymtab) {
+ if (ShouldWriteSymtab) {
if (!HeadersSize)
HeadersSize = computeHeadersSize(
Kind, Data.size(), StringTableSize, NumSyms, SymNamesBuf.size(),
@@ -978,7 +1076,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
Out << StringTableMember.Header << StringTableMember.Data
<< StringTableMember.Padding;
- if (WriteSymtab && SymMap.ECMap.size())
+ if (ShouldWriteSymtab && SymMap.ECMap.size())
writeECSymbols(Out, Kind, Deterministic, Data, SymMap);
for (const MemberData &M : Data)
@@ -998,13 +1096,14 @@ static Error writeArchiveToStream(raw_ostream &Out,
for (size_t I = 0, Size = NewMembers.size(); I != Size; ++I) {
const NewArchiveMember &Member = NewMembers[I];
MemberTableNameStrTblSize += Member.MemberName.size() + 1;
+ MemberEndOffset += Data[I].PreHeadPadSize;
MemberOffsets.push_back(MemberEndOffset);
MemberNames.push_back(Member.MemberName);
// File member name ended with "`\n". The length is included in
// BigArMemHdrType.
MemberEndOffset += sizeof(object::BigArMemHdrType) +
- alignTo(Data[I].Data.size(), 2) +
- alignTo(Member.MemberName.size(), 2);
+ alignTo(Data[I].Data.size(), 2) +
+ alignTo(Member.MemberName.size(), 2);
}
// AIX member table size.
@@ -1017,18 +1116,13 @@ static Error writeArchiveToStream(raw_ostream &Out,
raw_svector_ostream SymNames32(SymNamesBuf32);
raw_svector_ostream SymNames64(SymNamesBuf64);
- if (WriteSymtab && NumSyms)
+ if (ShouldWriteSymtab && NumSyms)
// Generate the symbol names for the members.
- for (const NewArchiveMember &M : NewMembers) {
- MemoryBufferRef Buf = M.Buf->getMemBufferRef();
- Expected<bool> Is64BitOrErr = is64BitSymbolicFile(Buf.getBuffer());
- if (!Is64BitOrErr)
- return Is64BitOrErr.takeError();
-
- bool HasObject;
- Expected<std::vector<unsigned>> SymbolsOrErr =
- getSymbols(Buf, 0, *Is64BitOrErr ? SymNames64 : SymNames32, nullptr,
- HasObject);
+ for (const auto &M : Data) {
+ Expected<std::vector<unsigned>> SymbolsOrErr = getSymbols(
+ M.SymFile.get(), 0,
+ is64BitSymbolicFile(M.SymFile.get()) ? SymNames64 : SymNames32,
+ nullptr);
if (!SymbolsOrErr)
return SymbolsOrErr.takeError();
}
@@ -1041,11 +1135,15 @@ static Error writeArchiveToStream(raw_ostream &Out,
// the offset to the 32-bit global symbol table, and the 'GlobSym64Offset'
// contains the offset to the 64-bit global symbol table.
uint64_t GlobalSymbolOffset =
- (WriteSymtab && NumSyms32 > 0) ? MemberTableEndOffset : 0;
+ (ShouldWriteSymtab &&
+ (WriteSymtab != SymtabWritingMode::BigArchive64) && NumSyms32 > 0)
+ ? MemberTableEndOffset
+ : 0;
uint64_t GlobalSymbolOffset64 = 0;
uint64_t NumSyms64 = NumSyms - NumSyms32;
- if (WriteSymtab && NumSyms64 > 0) {
+ if (ShouldWriteSymtab && (WriteSymtab != SymtabWritingMode::BigArchive32) &&
+ NumSyms64 > 0) {
if (GlobalSymbolOffset == 0)
GlobalSymbolOffset64 = MemberTableEndOffset;
else
@@ -1063,9 +1161,12 @@ static Error writeArchiveToStream(raw_ostream &Out,
// symbol table.
printWithSpacePadding(Out, GlobalSymbolOffset, 20);
printWithSpacePadding(Out, GlobalSymbolOffset64, 20);
- printWithSpacePadding(
- Out, NewMembers.size() ? sizeof(object::BigArchive::FixLenHdr) : 0,
- 20); // Offset to first archive member
+ printWithSpacePadding(Out,
+ NewMembers.size()
+ ? sizeof(object::BigArchive::FixLenHdr) +
+ Data[0].PreHeadPadSize
+ : 0,
+ 20); // Offset to first archive member
printWithSpacePadding(Out, NewMembers.size() ? LastMemberHeaderOffset : 0,
20); // Offset to last archive member
printWithSpacePadding(
@@ -1073,6 +1174,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
20); // Offset to first member of free list - Not supported yet
for (const MemberData &M : Data) {
+ Out << std::string(M.PreHeadPadSize, '\0');
Out << M.Header << M.Data;
if (M.Data.size() % 2)
Out << '\0';
@@ -1095,7 +1197,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
Out << '\0'; // Name table must be tail padded to an even number of
// bytes.
- if (WriteSymtab) {
+ if (ShouldWriteSymtab) {
// Write global symbol table for 32-bit file members.
if (GlobalSymbolOffset) {
writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf32,
@@ -1121,7 +1223,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
}
Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
- bool WriteSymtab, object::Archive::Kind Kind,
+ SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
std::unique_ptr<MemoryBuffer> OldArchiveBuf, bool IsEC) {
Expected<sys::fs::TempFile> Temp =
@@ -1153,9 +1255,9 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
}
Expected<std::unique_ptr<MemoryBuffer>>
-writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab,
- object::Archive::Kind Kind, bool Deterministic,
- bool Thin) {
+writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers,
+ SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
+ bool Deterministic, bool Thin) {
SmallVector<char, 0> ArchiveBufferVector;
raw_svector_ostream ArchiveStream(ArchiveBufferVector);
diff --git a/contrib/llvm-project/llvm/lib/Object/Binary.cpp b/contrib/llvm-project/llvm/lib/Object/Binary.cpp
index d18aed8b3b8c..0b9d95485287 100644
--- a/contrib/llvm-project/llvm/lib/Object/Binary.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/Binary.cpp
@@ -87,6 +87,9 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
case file_magic::cuda_fatbinary:
case file_magic::coff_cl_gl_object:
case file_magic::dxcontainer_object:
+ case file_magic::offload_bundle:
+ case file_magic::offload_bundle_compressed:
+ case file_magic::spirv_object:
// Unrecognized object file format.
return errorCodeToError(object_error::invalid_file_type);
case file_magic::offload_binary:
diff --git a/contrib/llvm-project/llvm/lib/Object/COFFImportFile.cpp b/contrib/llvm-project/llvm/lib/Object/COFFImportFile.cpp
index 765c12cc076c..eeb13ffe9c11 100644
--- a/contrib/llvm-project/llvm/lib/Object/COFFImportFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/COFFImportFile.cpp
@@ -33,21 +33,6 @@ using namespace llvm;
namespace llvm {
namespace object {
-static bool is32bit(MachineTypes Machine) {
- switch (Machine) {
- default:
- llvm_unreachable("unsupported machine");
- case IMAGE_FILE_MACHINE_ARM64:
- case IMAGE_FILE_MACHINE_ARM64EC:
- case IMAGE_FILE_MACHINE_ARM64X:
- case IMAGE_FILE_MACHINE_AMD64:
- return false;
- case IMAGE_FILE_MACHINE_ARMNT:
- case IMAGE_FILE_MACHINE_I386:
- return true;
- }
-}
-
static uint16_t getImgRelRelocation(MachineTypes Machine) {
switch (Machine) {
default:
@@ -106,11 +91,11 @@ static ImportNameType getNameType(StringRef Sym, StringRef ExtName,
// stdcall function still omits the underscore (IMPORT_NAME_NOPREFIX).
// See the comment in isDecorated in COFFModuleDefinition.cpp for more
// details.
- if (ExtName.startswith("_") && ExtName.contains('@') && !MinGW)
+ if (ExtName.starts_with("_") && ExtName.contains('@') && !MinGW)
return IMPORT_NAME;
if (Sym != ExtName)
return IMPORT_NAME_UNDECORATE;
- if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_"))
+ if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.starts_with("_"))
return IMPORT_NAME_NOPREFIX;
return IMPORT_NAME;
}
@@ -120,7 +105,7 @@ static Expected<std::string> replace(StringRef S, StringRef From,
size_t Pos = S.find(From);
// From and To may be mangled, but substrings in S may not.
- if (Pos == StringRef::npos && From.startswith("_") && To.startswith("_")) {
+ if (Pos == StringRef::npos && From.starts_with("_") && To.starts_with("_")) {
From = From.substr(1);
To = To.substr(1);
Pos = S.find(From);
@@ -155,7 +140,7 @@ class ObjectFactory {
public:
ObjectFactory(StringRef S, MachineTypes M)
- : Machine(M), ImportName(S), Library(S.drop_back(4)),
+ : Machine(M), ImportName(S), Library(llvm::sys::path::stem(S)),
ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()),
NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {}
@@ -204,7 +189,7 @@ ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
(ImportName.size() + 1)),
u32(NumberOfSymbols),
u16(0),
- u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid),
+ u16(is64Bit(Machine) ? C_Invalid : IMAGE_FILE_32BIT_MACHINE),
};
append(Buffer, Header);
@@ -340,7 +325,7 @@ ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
sizeof(coff_import_directory_table_entry)),
u32(NumberOfSymbols),
u16(0),
- u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid),
+ u16(is64Bit(Machine) ? C_Invalid : IMAGE_FILE_32BIT_MACHINE),
};
append(Buffer, Header);
@@ -389,7 +374,7 @@ ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
const uint32_t NumberOfSections = 2;
const uint32_t NumberOfSymbols = 1;
- uint32_t VASize = is32bit(Machine) ? 4 : 8;
+ uint32_t VASize = is64Bit(Machine) ? 8 : 4;
// COFF Header
coff_file_header Header{
@@ -403,7 +388,7 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
VASize),
u32(NumberOfSymbols),
u16(0),
- u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid),
+ u16(is64Bit(Machine) ? C_Invalid : IMAGE_FILE_32BIT_MACHINE),
};
append(Buffer, Header);
@@ -418,8 +403,8 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
u32(0),
u16(0),
u16(0),
- u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES
- : IMAGE_SCN_ALIGN_8BYTES) |
+ u32((is64Bit(Machine) ? IMAGE_SCN_ALIGN_8BYTES
+ : IMAGE_SCN_ALIGN_4BYTES) |
IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
IMAGE_SCN_MEM_WRITE)},
{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'},
@@ -432,8 +417,8 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
u32(0),
u16(0),
u16(0),
- u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES
- : IMAGE_SCN_ALIGN_8BYTES) |
+ u32((is64Bit(Machine) ? IMAGE_SCN_ALIGN_8BYTES
+ : IMAGE_SCN_ALIGN_4BYTES) |
IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
IMAGE_SCN_MEM_WRITE)},
};
@@ -441,12 +426,12 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
// .idata$5, ILT
append(Buffer, u32(0));
- if (!is32bit(Machine))
+ if (is64Bit(Machine))
append(Buffer, u32(0));
// .idata$4, IAT
append(Buffer, u32(0));
- if (!is32bit(Machine))
+ if (is64Bit(Machine))
append(Buffer, u32(0));
// Symbol Table
@@ -593,7 +578,7 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path,
std::vector<uint8_t> NullThunk;
Members.push_back(OF.createNullThunk(NullThunk));
- for (COFFShortExport E : Exports) {
+ for (const COFFShortExport &E : Exports) {
if (E.Private)
continue;
@@ -625,9 +610,10 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path,
OF.createShortImport(*Name, E.Ordinal, ImportType, NameType));
}
- return writeArchive(Path, Members, /*WriteSymtab*/ true,
- object::Archive::K_GNU,
- /*Deterministic*/ true, /*Thin*/ false);
+ return writeArchive(Path, Members, SymtabWritingMode::NormalSymtab,
+ MinGW ? object::Archive::K_GNU : object::Archive::K_COFF,
+ /*Deterministic*/ true, /*Thin*/ false,
+ /*OldArchiveBuf*/ nullptr, isArm64EC(Machine));
}
} // namespace object
diff --git a/contrib/llvm-project/llvm/lib/Object/COFFModuleDefinition.cpp b/contrib/llvm-project/llvm/lib/Object/COFFModuleDefinition.cpp
index a33949733c8e..648f01f823d0 100644
--- a/contrib/llvm-project/llvm/lib/Object/COFFModuleDefinition.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/COFFModuleDefinition.cpp
@@ -74,7 +74,7 @@ static bool isDecorated(StringRef Sym, bool MingwDef) {
// We can't check for a leading underscore here, since function names
// themselves can start with an underscore, while a second one still needs
// to be added.
- return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
+ return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
(!MingwDef && Sym.contains('@'));
}
@@ -97,7 +97,7 @@ public:
}
case '=':
Buf = Buf.drop_front();
- if (Buf.startswith("=")) {
+ if (Buf.starts_with("=")) {
Buf = Buf.drop_front();
return Token(EqualEqual, "==");
}
diff --git a/contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp
index 08eb0d034c53..8700912614db 100644
--- a/contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/COFFObjectFile.cpp
@@ -336,7 +336,7 @@ bool COFFObjectFile::isDebugSection(DataRefImpl Ref) const {
return false;
}
StringRef SectionName = SectionNameOrErr.get();
- return SectionName.startswith(".debug");
+ return SectionName.starts_with(".debug");
}
unsigned COFFObjectFile::getSectionID(SectionRef Sec) const {
@@ -1203,9 +1203,9 @@ COFFObjectFile::getSectionName(const coff_section *Sec) const {
StringRef Name = StringRef(Sec->Name, COFF::NameSize).split('\0').first;
// Check for string table entry. First byte is '/'.
- if (Name.startswith("/")) {
+ if (Name.starts_with("/")) {
uint32_t Offset;
- if (Name.startswith("//")) {
+ if (Name.starts_with("//")) {
if (decodeBase64StringEntry(Name.substr(2), Offset))
return createStringError(object_error::parse_failed,
"invalid section name");
@@ -1907,7 +1907,7 @@ Error ResourceSectionRef::load(const COFFObjectFile *O, const SectionRef &S) {
Expected<StringRef> Contents = Section.getContents();
if (!Contents)
return Contents.takeError();
- BBS = BinaryByteStream(*Contents, support::little);
+ BBS = BinaryByteStream(*Contents, llvm::endianness::little);
const coff_section *COFFSect = Obj->getCOFFSection(Section);
ArrayRef<coff_relocation> OrigRelocs = Obj->getRelocations(COFFSect);
Relocs.reserve(OrigRelocs.size());
diff --git a/contrib/llvm-project/llvm/lib/Object/DXContainer.cpp b/contrib/llvm-project/llvm/lib/Object/DXContainer.cpp
index 48932afea84b..4aabe9cea3e5 100644
--- a/contrib/llvm-project/llvm/lib/Object/DXContainer.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/DXContainer.cpp
@@ -9,6 +9,7 @@
#include "llvm/Object/DXContainer.h"
#include "llvm/BinaryFormat/DXContainer.h"
#include "llvm/Object/Error.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/FormatVariadic.h"
using namespace llvm;
@@ -100,6 +101,31 @@ Error DXContainer::parsePSVInfo(StringRef Part) {
return Error::success();
}
+Error DirectX::Signature::initialize(StringRef Part) {
+ dxbc::ProgramSignatureHeader SigHeader;
+ if (Error Err = readStruct(Part, Part.begin(), SigHeader))
+ return Err;
+ size_t Size = sizeof(dxbc::ProgramSignatureElement) * SigHeader.ParamCount;
+
+ if (Part.size() < Size + SigHeader.FirstParamOffset)
+ return parseFailed("Signature parameters extend beyond the part boundary");
+
+ Parameters.Data = Part.substr(SigHeader.FirstParamOffset, Size);
+
+ StringTableOffset = SigHeader.FirstParamOffset + static_cast<uint32_t>(Size);
+ StringTable = Part.substr(SigHeader.FirstParamOffset + Size);
+
+ for (const auto &Param : Parameters) {
+ if (Param.NameOffset < StringTableOffset)
+ return parseFailed("Invalid parameter name offset: name starts before "
+ "the first name offset");
+ if (Param.NameOffset - StringTableOffset > StringTable.size())
+ return parseFailed("Invalid parameter name offset: name starts after the "
+ "end of the part data");
+ }
+ return Error::success();
+}
+
Error DXContainer::parsePartOffsets() {
uint32_t LastOffset =
sizeof(dxbc::Header) + (Header.PartCount * sizeof(uint32_t));
@@ -153,6 +179,18 @@ Error DXContainer::parsePartOffsets() {
if (Error Err = parsePSVInfo(PartData))
return Err;
break;
+ case dxbc::PartType::ISG1:
+ if (Error Err = InputSignature.initialize(PartData))
+ return Err;
+ break;
+ case dxbc::PartType::OSG1:
+ if (Error Err = OutputSignature.initialize(PartData))
+ return Err;
+ break;
+ case dxbc::PartType::PSG1:
+ if (Error Err = PatchConstantSignature.initialize(PartData))
+ return Err;
+ break;
case dxbc::PartType::Unknown:
break;
}
@@ -223,14 +261,17 @@ Error DirectX::PSVRuntimeInfo::parse(uint16_t ShaderKind) {
if (sys::IsBigEndianHost)
Info.swapBytes(ShaderStage);
BasicInfo = Info;
- } else {
+ } else if (PSVVersion == 0) {
v0::RuntimeInfo Info;
if (Error Err = readStruct(PSVInfoData, Current, Info))
return Err;
if (sys::IsBigEndianHost)
Info.swapBytes(ShaderStage);
BasicInfo = Info;
- }
+ } else
+ return parseFailed(
+ "Cannot read PSV Runtime Info, unsupported PSV version.");
+
Current += Size;
uint32_t ResourceCount = 0;
@@ -251,7 +292,157 @@ Error DirectX::PSVRuntimeInfo::parse(uint16_t ShaderKind) {
"Resource binding data extends beyond the bounds of the part");
Current += BindingDataSize;
+ } else
+ Resources.Stride = sizeof(v2::ResourceBindInfo);
+
+ // PSV version 0 ends after the resource bindings.
+ if (PSVVersion == 0)
+ return Error::success();
+
+ // String table starts at a 4-byte offset.
+ Current = reinterpret_cast<const char *>(
+ alignTo<4>(reinterpret_cast<uintptr_t>(Current)));
+
+ uint32_t StringTableSize = 0;
+ if (Error Err = readInteger(Data, Current, StringTableSize))
+ return Err;
+ if (StringTableSize % 4 != 0)
+ return parseFailed("String table misaligned");
+ Current += sizeof(uint32_t);
+ StringTable = StringRef(Current, StringTableSize);
+
+ Current += StringTableSize;
+
+ uint32_t SemanticIndexTableSize = 0;
+ if (Error Err = readInteger(Data, Current, SemanticIndexTableSize))
+ return Err;
+ Current += sizeof(uint32_t);
+
+ SemanticIndexTable.reserve(SemanticIndexTableSize);
+ for (uint32_t I = 0; I < SemanticIndexTableSize; ++I) {
+ uint32_t Index = 0;
+ if (Error Err = readInteger(Data, Current, Index))
+ return Err;
+ Current += sizeof(uint32_t);
+ SemanticIndexTable.push_back(Index);
+ }
+
+ uint8_t InputCount = getSigInputCount();
+ uint8_t OutputCount = getSigOutputCount();
+ uint8_t PatchOrPrimCount = getSigPatchOrPrimCount();
+
+ uint32_t ElementCount = InputCount + OutputCount + PatchOrPrimCount;
+
+ if (ElementCount > 0) {
+ if (Error Err = readInteger(Data, Current, SigInputElements.Stride))
+ return Err;
+ Current += sizeof(uint32_t);
+ // Assign the stride to all the arrays.
+ SigOutputElements.Stride = SigPatchOrPrimElements.Stride =
+ SigInputElements.Stride;
+
+ if (Data.end() - Current < ElementCount * SigInputElements.Stride)
+ return parseFailed(
+ "Signature elements extend beyond the size of the part");
+
+ size_t InputSize = SigInputElements.Stride * InputCount;
+ SigInputElements.Data = Data.substr(Current - Data.begin(), InputSize);
+ Current += InputSize;
+
+ size_t OutputSize = SigOutputElements.Stride * OutputCount;
+ SigOutputElements.Data = Data.substr(Current - Data.begin(), OutputSize);
+ Current += OutputSize;
+
+ size_t PSize = SigPatchOrPrimElements.Stride * PatchOrPrimCount;
+ SigPatchOrPrimElements.Data = Data.substr(Current - Data.begin(), PSize);
+ Current += PSize;
+ }
+
+ ArrayRef<uint8_t> OutputVectorCounts = getOutputVectorCounts();
+ uint8_t PatchConstOrPrimVectorCount = getPatchConstOrPrimVectorCount();
+ uint8_t InputVectorCount = getInputVectorCount();
+
+ auto maskDwordSize = [](uint8_t Vector) {
+ return (static_cast<uint32_t>(Vector) + 7) >> 3;
+ };
+
+ auto mapTableSize = [maskDwordSize](uint8_t X, uint8_t Y) {
+ return maskDwordSize(Y) * X * 4;
+ };
+
+ if (usesViewID()) {
+ for (uint32_t I = 0; I < OutputVectorCounts.size(); ++I) {
+ // The vector mask is one bit per component and 4 components per vector.
+ // We can compute the number of dwords required by rounding up to the next
+ // multiple of 8.
+ uint32_t NumDwords =
+ maskDwordSize(static_cast<uint32_t>(OutputVectorCounts[I]));
+ size_t NumBytes = NumDwords * sizeof(uint32_t);
+ OutputVectorMasks[I].Data = Data.substr(Current - Data.begin(), NumBytes);
+ Current += NumBytes;
+ }
+
+ if (ShaderStage == Triple::Hull && PatchConstOrPrimVectorCount > 0) {
+ uint32_t NumDwords = maskDwordSize(PatchConstOrPrimVectorCount);
+ size_t NumBytes = NumDwords * sizeof(uint32_t);
+ PatchOrPrimMasks.Data = Data.substr(Current - Data.begin(), NumBytes);
+ Current += NumBytes;
+ }
+ }
+
+ // Input/Output mapping table
+ for (uint32_t I = 0; I < OutputVectorCounts.size(); ++I) {
+ if (InputVectorCount == 0 || OutputVectorCounts[I] == 0)
+ continue;
+ uint32_t NumDwords = mapTableSize(InputVectorCount, OutputVectorCounts[I]);
+ size_t NumBytes = NumDwords * sizeof(uint32_t);
+ InputOutputMap[I].Data = Data.substr(Current - Data.begin(), NumBytes);
+ Current += NumBytes;
+ }
+
+ // Hull shader: Input/Patch mapping table
+ if (ShaderStage == Triple::Hull && PatchConstOrPrimVectorCount > 0 &&
+ InputVectorCount > 0) {
+ uint32_t NumDwords =
+ mapTableSize(InputVectorCount, PatchConstOrPrimVectorCount);
+ size_t NumBytes = NumDwords * sizeof(uint32_t);
+ InputPatchMap.Data = Data.substr(Current - Data.begin(), NumBytes);
+ Current += NumBytes;
+ }
+
+ // Domain Shader: Patch/Output mapping table
+ if (ShaderStage == Triple::Domain && PatchConstOrPrimVectorCount > 0 &&
+ OutputVectorCounts[0] > 0) {
+ uint32_t NumDwords =
+ mapTableSize(PatchConstOrPrimVectorCount, OutputVectorCounts[0]);
+ size_t NumBytes = NumDwords * sizeof(uint32_t);
+ PatchOutputMap.Data = Data.substr(Current - Data.begin(), NumBytes);
+ Current += NumBytes;
}
return Error::success();
}
+
+uint8_t DirectX::PSVRuntimeInfo::getSigInputCount() const {
+ if (const auto *P = std::get_if<dxbc::PSV::v2::RuntimeInfo>(&BasicInfo))
+ return P->SigInputElements;
+ if (const auto *P = std::get_if<dxbc::PSV::v1::RuntimeInfo>(&BasicInfo))
+ return P->SigInputElements;
+ return 0;
+}
+
+uint8_t DirectX::PSVRuntimeInfo::getSigOutputCount() const {
+ if (const auto *P = std::get_if<dxbc::PSV::v2::RuntimeInfo>(&BasicInfo))
+ return P->SigOutputElements;
+ if (const auto *P = std::get_if<dxbc::PSV::v1::RuntimeInfo>(&BasicInfo))
+ return P->SigOutputElements;
+ return 0;
+}
+
+uint8_t DirectX::PSVRuntimeInfo::getSigPatchOrPrimCount() const {
+ if (const auto *P = std::get_if<dxbc::PSV::v2::RuntimeInfo>(&BasicInfo))
+ return P->SigPatchOrPrimElements;
+ if (const auto *P = std::get_if<dxbc::PSV::v1::RuntimeInfo>(&BasicInfo))
+ return P->SigPatchOrPrimElements;
+ return 0;
+}
diff --git a/contrib/llvm-project/llvm/lib/Object/ELF.cpp b/contrib/llvm-project/llvm/lib/Object/ELF.cpp
index 0d1862e57371..300639f2bfa0 100644
--- a/contrib/llvm-project/llvm/lib/Object/ELF.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ELF.cpp
@@ -273,6 +273,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
break;
case ELF::EM_AARCH64:
switch (Type) {
+ STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_AUTH_RELR);
STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC);
STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
}
@@ -645,11 +646,36 @@ ELFFile<ELFT>::toMappedAddr(uint64_t VAddr, WarningHandler WarnHandler) const {
return base() + Offset;
}
-template <class ELFT>
-Expected<std::vector<BBAddrMap>>
-ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec,
- const Elf_Shdr *RelaSec) const {
- bool IsRelocatable = getHeader().e_type == ELF::ET_REL;
+// Helper to extract and decode the next ULEB128 value as unsigned int.
+// Returns zero and sets ULEBSizeErr if the ULEB128 value exceeds the unsigned
+// int limit.
+// Also returns zero if ULEBSizeErr is already in an error state.
+// ULEBSizeErr is an out variable if an error occurs.
+template <typename IntTy, std::enable_if_t<std::is_unsigned_v<IntTy>, int> = 0>
+static IntTy readULEB128As(DataExtractor &Data, DataExtractor::Cursor &Cur,
+ Error &ULEBSizeErr) {
+ // Bail out and do not extract data if ULEBSizeErr is already set.
+ if (ULEBSizeErr)
+ return 0;
+ uint64_t Offset = Cur.tell();
+ uint64_t Value = Data.getULEB128(Cur);
+ if (Value > std::numeric_limits<IntTy>::max()) {
+ ULEBSizeErr = createError("ULEB128 value at offset 0x" +
+ Twine::utohexstr(Offset) + " exceeds UINT" +
+ Twine(std::numeric_limits<IntTy>::digits) +
+ "_MAX (0x" + Twine::utohexstr(Value) + ")");
+ return 0;
+ }
+ return static_cast<IntTy>(Value);
+}
+
+template <typename ELFT>
+static Expected<std::vector<BBAddrMap>>
+decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
+ const typename ELFFile<ELFT>::Elf_Shdr &Sec,
+ const typename ELFFile<ELFT>::Elf_Shdr *RelaSec,
+ std::vector<PGOAnalysisMap> *PGOAnalyses) {
+ bool IsRelocatable = EF.getHeader().e_type == ELF::ET_REL;
// This DenseMap maps the offset of each function (the location of the
// reference to the function in the SHT_LLVM_BB_ADDR_MAP section) to the
@@ -659,44 +685,28 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec,
assert(RelaSec &&
"Can't read a SHT_LLVM_BB_ADDR_MAP section in a relocatable "
"object file without providing a relocation section.");
- Expected<Elf_Rela_Range> Relas = this->relas(*RelaSec);
+ Expected<typename ELFFile<ELFT>::Elf_Rela_Range> Relas = EF.relas(*RelaSec);
if (!Relas)
return createError("unable to read relocations for section " +
- describe(*this, Sec) + ": " +
+ describe(EF, Sec) + ": " +
toString(Relas.takeError()));
- for (Elf_Rela Rela : *Relas)
+ for (typename ELFFile<ELFT>::Elf_Rela Rela : *Relas)
FunctionOffsetTranslations[Rela.r_offset] = Rela.r_addend;
}
- Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec);
+ Expected<ArrayRef<uint8_t>> ContentsOrErr = EF.getSectionContents(Sec);
if (!ContentsOrErr)
return ContentsOrErr.takeError();
ArrayRef<uint8_t> Content = *ContentsOrErr;
- DataExtractor Data(Content, isLE(), ELFT::Is64Bits ? 8 : 4);
+ DataExtractor Data(Content, EF.isLE(), ELFT::Is64Bits ? 8 : 4);
std::vector<BBAddrMap> FunctionEntries;
DataExtractor::Cursor Cur(0);
Error ULEBSizeErr = Error::success();
Error MetadataDecodeErr = Error::success();
- // Helper to extract and decode the next ULEB128 value as uint32_t.
- // Returns zero and sets ULEBSizeErr if the ULEB128 value exceeds the uint32_t
- // limit.
- // Also returns zero if ULEBSizeErr is already in an error state.
- auto ReadULEB128AsUInt32 = [&Data, &Cur, &ULEBSizeErr]() -> uint32_t {
- // Bail out and do not extract data if ULEBSizeErr is already set.
- if (ULEBSizeErr)
- return 0;
- uint64_t Offset = Cur.tell();
- uint64_t Value = Data.getULEB128(Cur);
- if (Value > UINT32_MAX) {
- ULEBSizeErr = createError(
- "ULEB128 value at offset 0x" + Twine::utohexstr(Offset) +
- " exceeds UINT32_MAX (0x" + Twine::utohexstr(Value) + ")");
- return 0;
- }
- return static_cast<uint32_t>(Value);
- };
uint8_t Version = 0;
+ uint8_t Feature = 0;
+ PGOAnalysisMap::Features FeatEnable{};
while (!ULEBSizeErr && !MetadataDecodeErr && Cur &&
Cur.tell() < Content.size()) {
if (Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) {
@@ -706,10 +716,24 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec,
if (Version > 2)
return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " +
Twine(static_cast<int>(Version)));
- Data.getU8(Cur); // Feature byte
+ Feature = Data.getU8(Cur); // Feature byte
+ if (!Cur)
+ break;
+ auto FeatEnableOrErr = PGOAnalysisMap::Features::decode(Feature);
+ if (!FeatEnableOrErr)
+ return FeatEnableOrErr.takeError();
+ FeatEnable =
+ FeatEnableOrErr ? *FeatEnableOrErr : PGOAnalysisMap::Features{};
+ if (Feature != 0 && Version < 2 && Cur)
+ return createError(
+ "version should be >= 2 for SHT_LLVM_BB_ADDR_MAP when "
+ "PGO features are enabled: version = " +
+ Twine(static_cast<int>(Version)) +
+ " feature = " + Twine(static_cast<int>(Feature)));
}
uint64_t SectionOffset = Cur.tell();
- uintX_t Address = static_cast<uintX_t>(Data.getAddress(Cur));
+ auto Address =
+ static_cast<typename ELFFile<ELFT>::uintX_t>(Data.getAddress(Cur));
if (!Cur)
return Cur.takeError();
if (IsRelocatable) {
@@ -718,20 +742,23 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec,
if (FOTIterator == FunctionOffsetTranslations.end()) {
return createError("failed to get relocation data for offset: " +
Twine::utohexstr(SectionOffset) + " in section " +
- describe(*this, Sec));
+ describe(EF, Sec));
}
Address = FOTIterator->second;
}
- uint32_t NumBlocks = ReadULEB128AsUInt32();
+ uint32_t NumBlocks = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+
std::vector<BBAddrMap::BBEntry> BBEntries;
uint32_t PrevBBEndOffset = 0;
for (uint32_t BlockIndex = 0;
!MetadataDecodeErr && !ULEBSizeErr && Cur && (BlockIndex < NumBlocks);
++BlockIndex) {
- uint32_t ID = Version >= 2 ? ReadULEB128AsUInt32() : BlockIndex;
- uint32_t Offset = ReadULEB128AsUInt32();
- uint32_t Size = ReadULEB128AsUInt32();
- uint32_t MD = ReadULEB128AsUInt32();
+ uint32_t ID = Version >= 2
+ ? readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr)
+ : BlockIndex;
+ uint32_t Offset = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+ uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+ uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
if (Version >= 1) {
// Offset is calculated relative to the end of the previous BB.
Offset += PrevBBEndOffset;
@@ -745,7 +772,45 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec,
}
BBEntries.push_back({ID, Offset, Size, *MetadataOrErr});
}
- FunctionEntries.push_back({Address, std::move(BBEntries)});
+ FunctionEntries.emplace_back(Address, std::move(BBEntries));
+
+ if (FeatEnable.FuncEntryCount || FeatEnable.BBFreq || FeatEnable.BrProb) {
+ // Function entry count
+ uint64_t FuncEntryCount =
+ FeatEnable.FuncEntryCount
+ ? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
+ : 0;
+
+ std::vector<PGOAnalysisMap::PGOBBEntry> PGOBBEntries;
+ for (uint32_t BlockIndex = 0; !MetadataDecodeErr && !ULEBSizeErr && Cur &&
+ (BlockIndex < NumBlocks);
+ ++BlockIndex) {
+ // Block frequency
+ uint64_t BBF = FeatEnable.BBFreq
+ ? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
+ : 0;
+
+ // Branch probability
+ llvm::SmallVector<PGOAnalysisMap::PGOBBEntry::SuccessorEntry, 2>
+ Successors;
+ if (FeatEnable.BrProb) {
+ auto SuccCount = readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr);
+ for (uint64_t I = 0; I < SuccCount; ++I) {
+ uint32_t BBID = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+ uint32_t BrProb = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+ if (PGOAnalyses)
+ Successors.push_back({BBID, BranchProbability::getRaw(BrProb)});
+ }
+ }
+
+ if (PGOAnalyses)
+ PGOBBEntries.push_back({BlockFrequency(BBF), std::move(Successors)});
+ }
+
+ if (PGOAnalyses)
+ PGOAnalyses->push_back(
+ {FuncEntryCount, std::move(PGOBBEntries), FeatEnable});
+ }
}
// Either Cur is in the error state, or we have an error in ULEBSizeErr or
// MetadataDecodeErr (but not both), but we join all errors here to be safe.
@@ -756,6 +821,18 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec,
}
template <class ELFT>
+Expected<std::vector<BBAddrMap>>
+ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec, const Elf_Shdr *RelaSec,
+ std::vector<PGOAnalysisMap> *PGOAnalyses) const {
+ size_t OriginalPGOSize = PGOAnalyses ? PGOAnalyses->size() : 0;
+ auto AddrMapsOrErr = decodeBBAddrMapImpl(*this, Sec, RelaSec, PGOAnalyses);
+ // remove new analyses when an error occurs
+ if (!AddrMapsOrErr && PGOAnalyses)
+ PGOAnalyses->resize(OriginalPGOSize);
+ return std::move(AddrMapsOrErr);
+}
+
+template <class ELFT>
Expected<
MapVector<const typename ELFT::Shdr *, const typename ELFT::Shdr *>>
ELFFile<ELFT>::getSectionAndRelocations(
diff --git a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
index 143f9d37849d..3c86b0f25dda 100644
--- a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
@@ -506,6 +506,12 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1150";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151:
return "gfx1151";
+
+ // AMDGCN GFX12.
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200:
+ return "gfx1200";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201:
+ return "gfx1201";
default:
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
}
@@ -710,10 +716,13 @@ std::vector<ELFPltEntry> ELFObjectFileBase::getPltEntries() const {
template <class ELFT>
Expected<std::vector<BBAddrMap>> static readBBAddrMapImpl(
- const ELFFile<ELFT> &EF, std::optional<unsigned> TextSectionIndex) {
+ const ELFFile<ELFT> &EF, std::optional<unsigned> TextSectionIndex,
+ std::vector<PGOAnalysisMap> *PGOAnalyses) {
using Elf_Shdr = typename ELFT::Shdr;
bool IsRelocatable = EF.getHeader().e_type == ELF::ET_REL;
std::vector<BBAddrMap> BBAddrMaps;
+ if (PGOAnalyses)
+ PGOAnalyses->clear();
const auto &Sections = cantFail(EF.sections());
auto IsMatch = [&](const Elf_Shdr &Sec) -> Expected<bool> {
@@ -742,10 +751,13 @@ Expected<std::vector<BBAddrMap>> static readBBAddrMapImpl(
return createError("unable to get relocation section for " +
describe(EF, *Sec));
Expected<std::vector<BBAddrMap>> BBAddrMapOrErr =
- EF.decodeBBAddrMap(*Sec, RelocSec);
- if (!BBAddrMapOrErr)
+ EF.decodeBBAddrMap(*Sec, RelocSec, PGOAnalyses);
+ if (!BBAddrMapOrErr) {
+ if (PGOAnalyses)
+ PGOAnalyses->clear();
return createError("unable to read " + describe(EF, *Sec) + ": " +
toString(BBAddrMapOrErr.takeError()));
+ }
std::move(BBAddrMapOrErr->begin(), BBAddrMapOrErr->end(),
std::back_inserter(BBAddrMaps));
}
@@ -822,13 +834,14 @@ ELFObjectFileBase::readDynsymVersions() const {
}
Expected<std::vector<BBAddrMap>> ELFObjectFileBase::readBBAddrMap(
- std::optional<unsigned> TextSectionIndex) const {
+ std::optional<unsigned> TextSectionIndex,
+ std::vector<PGOAnalysisMap> *PGOAnalyses) const {
if (const auto *Obj = dyn_cast<ELF32LEObjectFile>(this))
- return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
+ return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex, PGOAnalyses);
if (const auto *Obj = dyn_cast<ELF64LEObjectFile>(this))
- return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
+ return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex, PGOAnalyses);
if (const auto *Obj = dyn_cast<ELF32BEObjectFile>(this))
- return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
+ return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex, PGOAnalyses);
return readBBAddrMapImpl(cast<ELF64BEObjectFile>(this)->getELFFile(),
- TextSectionIndex);
+ TextSectionIndex, PGOAnalyses);
}
diff --git a/contrib/llvm-project/llvm/lib/Object/IRObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/IRObjectFile.cpp
index 091930988bd0..cd93a84fc6fa 100644
--- a/contrib/llvm-project/llvm/lib/Object/IRObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/IRObjectFile.cpp
@@ -12,7 +12,6 @@
#include "llvm/Object/IRObjectFile.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/PointerUnion.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/Module.h"
diff --git a/contrib/llvm-project/llvm/lib/Object/IRSymtab.cpp b/contrib/llvm-project/llvm/lib/Object/IRSymtab.cpp
index 14db7a10f310..18fc2e4d4a37 100644
--- a/contrib/llvm-project/llvm/lib/Object/IRSymtab.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/IRSymtab.cpp
@@ -215,6 +215,11 @@ Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {
return P.first->second;
}
+static DenseSet<StringRef> buildPreservedSymbolsSet() {
+ return DenseSet<StringRef>(std::begin(PreservedSymbols),
+ std::end(PreservedSymbols));
+}
+
Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
const SmallPtrSet<GlobalValue *, 4> &Used,
ModuleSymbolTable::Symbol Msym) {
@@ -270,7 +275,9 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
setStr(Sym.IRName, GV->getName());
- bool IsPreservedSymbol = llvm::is_contained(PreservedSymbols, GV->getName());
+ static const DenseSet<StringRef> PreservedSymbolsSet =
+ buildPreservedSymbolsSet();
+ bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName());
if (Used.count(GV) || IsPreservedSymbol)
Sym.Flags |= 1 << storage::Symbol::FB_used;
diff --git a/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp
index 6ca83a955d5a..1cfd0a069463 100644
--- a/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp
@@ -108,9 +108,11 @@ getSectionPtr(const MachOObjectFile &O, MachOObjectFile::LoadCommandInfo L,
return reinterpret_cast<const char*>(SectionAddr);
}
-static const char *getPtr(const MachOObjectFile &O, size_t Offset) {
- assert(Offset <= O.getData().size());
- return O.getData().data() + Offset;
+static const char *getPtr(const MachOObjectFile &O, size_t Offset,
+ size_t MachOFilesetEntryOffset = 0) {
+ assert(Offset <= O.getData().size() &&
+ MachOFilesetEntryOffset <= O.getData().size());
+ return O.getData().data() + Offset + MachOFilesetEntryOffset;
}
static MachO::nlist_base
@@ -208,7 +210,8 @@ getFirstLoadCommandInfo(const MachOObjectFile &Obj) {
if (sizeof(MachO::load_command) > Obj.getHeader().sizeofcmds)
return malformedError("load command 0 extends past the end all load "
"commands in the file");
- return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize), 0);
+ return getLoadCommandInfo(
+ Obj, getPtr(Obj, HeaderSize, Obj.getMachOFilesetEntryOffset()), 0);
}
static Expected<MachOObjectFile::LoadCommandInfo>
@@ -217,7 +220,8 @@ getNextLoadCommandInfo(const MachOObjectFile &Obj, uint32_t LoadCommandIndex,
unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64)
: sizeof(MachO::mach_header);
if (L.Ptr + L.C.cmdsize + sizeof(MachO::load_command) >
- Obj.getData().data() + HeaderSize + Obj.getHeader().sizeofcmds)
+ Obj.getData().data() + Obj.getMachOFilesetEntryOffset() + HeaderSize +
+ Obj.getHeader().sizeofcmds)
return malformedError("load command " + Twine(LoadCommandIndex + 1) +
" extends past the end all load commands in the file");
return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize, LoadCommandIndex + 1);
@@ -231,7 +235,8 @@ static void parseHeader(const MachOObjectFile &Obj, T &Header,
"file");
return;
}
- if (auto HeaderOrErr = getStructOrErr<T>(Obj, getPtr(Obj, 0)))
+ if (auto HeaderOrErr = getStructOrErr<T>(
+ Obj, getPtr(Obj, 0, Obj.getMachOFilesetEntryOffset())))
Header = *HeaderOrErr;
else
Err = HeaderOrErr.takeError();
@@ -1247,12 +1252,12 @@ static bool isLoadCommandObsolete(uint32_t cmd) {
Expected<std::unique_ptr<MachOObjectFile>>
MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian,
bool Is64Bits, uint32_t UniversalCputype,
- uint32_t UniversalIndex) {
+ uint32_t UniversalIndex,
+ size_t MachOFilesetEntryOffset) {
Error Err = Error::success();
- std::unique_ptr<MachOObjectFile> Obj(
- new MachOObjectFile(std::move(Object), IsLittleEndian,
- Is64Bits, Err, UniversalCputype,
- UniversalIndex));
+ std::unique_ptr<MachOObjectFile> Obj(new MachOObjectFile(
+ std::move(Object), IsLittleEndian, Is64Bits, Err, UniversalCputype,
+ UniversalIndex, MachOFilesetEntryOffset));
if (Err)
return std::move(Err);
return std::move(Obj);
@@ -1261,8 +1266,10 @@ MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian,
MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
bool Is64bits, Error &Err,
uint32_t UniversalCputype,
- uint32_t UniversalIndex)
- : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) {
+ uint32_t UniversalIndex,
+ size_t MachOFilesetEntryOffset)
+ : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
+ MachOFilesetEntryOffset(MachOFilesetEntryOffset) {
ErrorAsOutParameter ErrAsOutParam(&Err);
uint64_t SizeOfHeaders;
uint32_t cputype;
@@ -2059,9 +2066,9 @@ bool MachOObjectFile::isDebugSection(DataRefImpl Sec) const {
return false;
}
StringRef SectionName = SectionNameOrErr.get();
- return SectionName.startswith("__debug") ||
- SectionName.startswith("__zdebug") ||
- SectionName.startswith("__apple") || SectionName == "__gdb_index" ||
+ return SectionName.starts_with("__debug") ||
+ SectionName.starts_with("__zdebug") ||
+ SectionName.starts_with("__apple") || SectionName == "__gdb_index" ||
SectionName == "__swift_ast";
}
@@ -2076,7 +2083,7 @@ ArrayRef<uint8_t> getSegmentContents(const MachOObjectFile &Obj,
return {};
}
auto &Segment = SegmentOrErr.get();
- if (StringRef(Segment.segname, 16).startswith(SegmentName))
+ if (StringRef(Segment.segname, 16).starts_with(SegmentName))
return arrayRefFromStringRef(Obj.getData().slice(
Segment.fileoff, Segment.fileoff + Segment.filesize));
return {};
@@ -2462,7 +2469,7 @@ StringRef MachOObjectFile::guessLibraryShortName(StringRef Name,
if (c == Name.npos || c == 0)
goto guess_library;
V = Name.slice(c+1, Name.npos);
- if (!V.startswith("Versions/"))
+ if (!V.starts_with("Versions/"))
goto guess_library;
d = Name.rfind('/', c);
if (d == Name.npos)
@@ -2989,7 +2996,7 @@ void ExportEntry::pushNode(uint64_t offset) {
ErrorAsOutParameter ErrAsOutParam(E);
const uint8_t *Ptr = Trie.begin() + offset;
NodeState State(Ptr);
- const char *error;
+ const char *error = nullptr;
uint64_t ExportInfoSize = readULEB128(State.Current, &error);
if (error) {
*E = malformedError("export info size " + Twine(error) +
@@ -3124,7 +3131,7 @@ void ExportEntry::pushNode(uint64_t offset) {
void ExportEntry::pushDownUntilBottom() {
ErrorAsOutParameter ErrAsOutParam(E);
- const char *error;
+ const char *error = nullptr;
while (Stack.back().NextChildIndex < Stack.back().ChildCount) {
NodeState &Top = Stack.back();
CumulativeString.resize(Top.ParentStringLength);
@@ -4761,6 +4768,11 @@ MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::thread_command>(*this, L.Ptr);
}
+MachO::fileset_entry_command
+MachOObjectFile::getFilesetEntryLoadCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::fileset_entry_command>(*this, L.Ptr);
+}
+
MachO::any_relocation_info
MachOObjectFile::getRelocation(DataRefImpl Rel) const {
uint32_t Offset;
@@ -5300,23 +5312,29 @@ bool MachOObjectFile::isRelocatableObject() const {
return getHeader().filetype == MachO::MH_OBJECT;
}
-Expected<std::unique_ptr<MachOObjectFile>>
-ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer,
- uint32_t UniversalCputype,
- uint32_t UniversalIndex) {
+/// Create a MachOObjectFile instance from a given buffer.
+///
+/// \param Buffer Memory buffer containing the MachO binary data.
+/// \param UniversalCputype CPU type when the MachO part of a universal binary.
+/// \param UniversalIndex Index of the MachO within a universal binary.
+/// \param MachOFilesetEntryOffset Offset of the MachO entry in a fileset MachO.
+/// \returns A std::unique_ptr to a MachOObjectFile instance on success.
+Expected<std::unique_ptr<MachOObjectFile>> ObjectFile::createMachOObjectFile(
+ MemoryBufferRef Buffer, uint32_t UniversalCputype, uint32_t UniversalIndex,
+ size_t MachOFilesetEntryOffset) {
StringRef Magic = Buffer.getBuffer().slice(0, 4);
if (Magic == "\xFE\xED\xFA\xCE")
- return MachOObjectFile::create(Buffer, false, false,
- UniversalCputype, UniversalIndex);
+ return MachOObjectFile::create(Buffer, false, false, UniversalCputype,
+ UniversalIndex, MachOFilesetEntryOffset);
if (Magic == "\xCE\xFA\xED\xFE")
- return MachOObjectFile::create(Buffer, true, false,
- UniversalCputype, UniversalIndex);
+ return MachOObjectFile::create(Buffer, true, false, UniversalCputype,
+ UniversalIndex, MachOFilesetEntryOffset);
if (Magic == "\xFE\xED\xFA\xCF")
- return MachOObjectFile::create(Buffer, false, true,
- UniversalCputype, UniversalIndex);
+ return MachOObjectFile::create(Buffer, false, true, UniversalCputype,
+ UniversalIndex, MachOFilesetEntryOffset);
if (Magic == "\xCF\xFA\xED\xFE")
- return MachOObjectFile::create(Buffer, true, true,
- UniversalCputype, UniversalIndex);
+ return MachOObjectFile::create(Buffer, true, true, UniversalCputype,
+ UniversalIndex, MachOFilesetEntryOffset);
return make_error<GenericBinaryError>("Unrecognized MachO magic number",
object_error::invalid_file_type);
}
diff --git a/contrib/llvm-project/llvm/lib/Object/MachOUniversalWriter.cpp b/contrib/llvm-project/llvm/lib/Object/MachOUniversalWriter.cpp
index 909a10b2c072..17940495cddd 100644
--- a/contrib/llvm-project/llvm/lib/Object/MachOUniversalWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/MachOUniversalWriter.cpp
@@ -100,7 +100,7 @@ Slice::Slice(const IRObjectFile &IRO, uint32_t CPUType, uint32_t CPUSubType,
Slice::Slice(const MachOObjectFile &O) : Slice(O, calculateAlignment(O)) {}
-using MachoCPUTy = std::pair<unsigned, unsigned>;
+using MachoCPUTy = std::pair<uint32_t, uint32_t>;
static Expected<MachoCPUTy> getMachoCPUFromTriple(Triple TT) {
auto CPU = std::make_pair(MachO::getCPUType(TT), MachO::getCPUSubType(TT));
@@ -117,10 +117,15 @@ static Expected<MachoCPUTy> getMachoCPUFromTriple(StringRef TT) {
return getMachoCPUFromTriple(Triple{TT});
}
+static MachoCPUTy getMachoCPUFromObjectFile(const MachOObjectFile &O) {
+ return std::make_pair(O.getHeader().cputype, O.getHeader().cpusubtype);
+}
+
Expected<Slice> Slice::create(const Archive &A, LLVMContext *LLVMCtx) {
Error Err = Error::success();
std::unique_ptr<MachOObjectFile> MFO = nullptr;
std::unique_ptr<IRObjectFile> IRFO = nullptr;
+ std::optional<MachoCPUTy> CPU = std::nullopt;
for (const Archive::Child &Child : A.children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary(LLVMCtx);
if (!ChildOrErr)
@@ -134,65 +139,56 @@ Expected<Slice> Slice::create(const Archive &A, LLVMContext *LLVMCtx) {
.c_str());
if (Bin->isMachO()) {
MachOObjectFile *O = cast<MachOObjectFile>(Bin);
- if (IRFO) {
- return createStringError(
- std::errc::invalid_argument,
- "archive member %s is a MachO, while previous archive member "
- "%s was an IR LLVM object",
- O->getFileName().str().c_str(), IRFO->getFileName().str().c_str());
- }
- if (MFO &&
- std::tie(MFO->getHeader().cputype, MFO->getHeader().cpusubtype) !=
- std::tie(O->getHeader().cputype, O->getHeader().cpusubtype)) {
+ MachoCPUTy ObjectCPU = getMachoCPUFromObjectFile(*O);
+
+ if (CPU && CPU != ObjectCPU) {
+ // If CPU != nullptr, one of MFO, IRFO will be != nullptr.
+ StringRef PreviousName = MFO ? MFO->getFileName() : IRFO->getFileName();
return createStringError(
std::errc::invalid_argument,
("archive member " + O->getFileName() + " cputype (" +
- Twine(O->getHeader().cputype) + ") and cpusubtype(" +
- Twine(O->getHeader().cpusubtype) +
+ Twine(ObjectCPU.first) + ") and cpusubtype(" +
+ Twine(ObjectCPU.second) +
") does not match previous archive members cputype (" +
- Twine(MFO->getHeader().cputype) + ") and cpusubtype(" +
- Twine(MFO->getHeader().cpusubtype) +
- ") (all members must match) " + MFO->getFileName())
+ Twine(CPU->first) + ") and cpusubtype(" + Twine(CPU->second) +
+ ") (all members must match) " + PreviousName)
.str()
.c_str());
}
if (!MFO) {
ChildOrErr.get().release();
MFO.reset(O);
+ if (!CPU)
+ CPU.emplace(ObjectCPU);
}
} else if (Bin->isIR()) {
IRObjectFile *O = cast<IRObjectFile>(Bin);
- if (MFO) {
- return createStringError(std::errc::invalid_argument,
- "archive member '%s' is an LLVM IR object, "
- "while previous archive member "
- "'%s' was a MachO",
- O->getFileName().str().c_str(),
- MFO->getFileName().str().c_str());
+ Expected<MachoCPUTy> ObjectCPU =
+ getMachoCPUFromTriple(O->getTargetTriple());
+ if (!ObjectCPU)
+ return ObjectCPU.takeError();
+
+ if (CPU && CPU != *ObjectCPU) {
+ // If CPU != nullptr, one of MFO, IRFO will be != nullptr.
+ StringRef PreviousName =
+ IRFO ? IRFO->getFileName() : MFO->getFileName();
+ return createStringError(
+ std::errc::invalid_argument,
+ ("archive member " + O->getFileName() + " cputype (" +
+ Twine(ObjectCPU->first) + ") and cpusubtype(" +
+ Twine(ObjectCPU->second) +
+ ") does not match previous archive members cputype (" +
+ Twine(CPU->first) + ") and cpusubtype(" + Twine(CPU->second) +
+ ") (all members must match) " + PreviousName)
+ .str()
+ .c_str());
}
- if (IRFO) {
- Expected<MachoCPUTy> CPUO = getMachoCPUFromTriple(O->getTargetTriple());
- Expected<MachoCPUTy> CPUFO =
- getMachoCPUFromTriple(IRFO->getTargetTriple());
- if (!CPUO)
- return CPUO.takeError();
- if (!CPUFO)
- return CPUFO.takeError();
- if (*CPUO != *CPUFO) {
- return createStringError(
- std::errc::invalid_argument,
- ("archive member " + O->getFileName() + " cputype (" +
- Twine(CPUO->first) + ") and cpusubtype(" + Twine(CPUO->second) +
- ") does not match previous archive members cputype (" +
- Twine(CPUFO->first) + ") and cpusubtype(" +
- Twine(CPUFO->second) + ") (all members must match) " +
- IRFO->getFileName())
- .str()
- .c_str());
- }
- } else {
+
+ if (!IRFO) {
ChildOrErr.get().release();
IRFO.reset(O);
+ if (!CPU)
+ CPU.emplace(*ObjectCPU);
}
} else
return createStringError(std::errc::invalid_argument,
@@ -240,25 +236,48 @@ Expected<Slice> Slice::create(const IRObjectFile &IRO, uint32_t Align) {
return Slice{IRO, CPUType, CPUSubType, std::move(ArchName), Align};
}
-static Expected<SmallVector<MachO::fat_arch, 2>>
+template <typename FatArchTy> struct FatArchTraits {
+ static const uint64_t OffsetLimit;
+ static const std::string StructName;
+ static const uint8_t BitCount;
+};
+
+template <> struct FatArchTraits<MachO::fat_arch> {
+ static const uint64_t OffsetLimit = UINT32_MAX;
+ static const std::string StructName;
+ static const uint8_t BitCount = 32;
+};
+const std::string FatArchTraits<MachO::fat_arch>::StructName = "fat_arch";
+
+template <> struct FatArchTraits<MachO::fat_arch_64> {
+ static const uint64_t OffsetLimit = UINT64_MAX;
+ static const std::string StructName;
+ static const uint8_t BitCount = 64;
+};
+const std::string FatArchTraits<MachO::fat_arch_64>::StructName = "fat_arch_64";
+
+template <typename FatArchTy>
+static Expected<SmallVector<FatArchTy, 2>>
buildFatArchList(ArrayRef<Slice> Slices) {
- SmallVector<MachO::fat_arch, 2> FatArchList;
+ SmallVector<FatArchTy, 2> FatArchList;
uint64_t Offset =
- sizeof(MachO::fat_header) + Slices.size() * sizeof(MachO::fat_arch);
+ sizeof(MachO::fat_header) + Slices.size() * sizeof(FatArchTy);
for (const auto &S : Slices) {
Offset = alignTo(Offset, 1ull << S.getP2Alignment());
- if (Offset > UINT32_MAX)
+ if (Offset > FatArchTraits<FatArchTy>::OffsetLimit)
return createStringError(
std::errc::invalid_argument,
- ("fat file too large to be created because the offset "
- "field in struct fat_arch is only 32-bits and the offset " +
+ ("fat file too large to be created because the offset field in the "
+ "struct " +
+ Twine(FatArchTraits<FatArchTy>::StructName) + " is only " +
+ Twine(FatArchTraits<FatArchTy>::BitCount) + "-bits and the offset " +
Twine(Offset) + " for " + S.getBinary()->getFileName() +
" for architecture " + S.getArchString() + "exceeds that.")
.str()
.c_str());
- MachO::fat_arch FatArch;
+ FatArchTy FatArch = {};
FatArch.cputype = S.getCPUType();
FatArch.cpusubtype = S.getCPUSubType();
FatArch.offset = Offset;
@@ -270,17 +289,15 @@ buildFatArchList(ArrayRef<Slice> Slices) {
return FatArchList;
}
-Error object::writeUniversalBinaryToStream(ArrayRef<Slice> Slices,
- raw_ostream &Out) {
- MachO::fat_header FatHeader;
- FatHeader.magic = MachO::FAT_MAGIC;
- FatHeader.nfat_arch = Slices.size();
-
- Expected<SmallVector<MachO::fat_arch, 2>> FatArchListOrErr =
- buildFatArchList(Slices);
+template <typename FatArchTy>
+static Error writeUniversalArchsToStream(MachO::fat_header FatHeader,
+ ArrayRef<Slice> Slices,
+ raw_ostream &Out) {
+ Expected<SmallVector<FatArchTy, 2>> FatArchListOrErr =
+ buildFatArchList<FatArchTy>(Slices);
if (!FatArchListOrErr)
return FatArchListOrErr.takeError();
- SmallVector<MachO::fat_arch, 2> FatArchList = *FatArchListOrErr;
+ SmallVector<FatArchTy, 2> FatArchList = *FatArchListOrErr;
if (sys::IsLittleEndianHost)
MachO::swapStruct(FatHeader);
@@ -288,17 +305,17 @@ Error object::writeUniversalBinaryToStream(ArrayRef<Slice> Slices,
sizeof(MachO::fat_header));
if (sys::IsLittleEndianHost)
- for (MachO::fat_arch &FA : FatArchList)
+ for (FatArchTy &FA : FatArchList)
MachO::swapStruct(FA);
Out.write(reinterpret_cast<const char *>(FatArchList.data()),
- sizeof(MachO::fat_arch) * FatArchList.size());
+ sizeof(FatArchTy) * FatArchList.size());
if (sys::IsLittleEndianHost)
- for (MachO::fat_arch &FA : FatArchList)
+ for (FatArchTy &FA : FatArchList)
MachO::swapStruct(FA);
size_t Offset =
- sizeof(MachO::fat_header) + sizeof(MachO::fat_arch) * FatArchList.size();
+ sizeof(MachO::fat_header) + sizeof(FatArchTy) * FatArchList.size();
for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) {
MemoryBufferRef BufferRef = Slices[Index].getBinary()->getMemoryBufferRef();
assert((Offset <= FatArchList[Index].offset) && "Incorrect slice offset");
@@ -311,8 +328,30 @@ Error object::writeUniversalBinaryToStream(ArrayRef<Slice> Slices,
return Error::success();
}
+Error object::writeUniversalBinaryToStream(ArrayRef<Slice> Slices,
+ raw_ostream &Out,
+ FatHeaderType HeaderType) {
+ MachO::fat_header FatHeader;
+ FatHeader.nfat_arch = Slices.size();
+
+ switch (HeaderType) {
+ case FatHeaderType::Fat64Header:
+ FatHeader.magic = MachO::FAT_MAGIC_64;
+ return writeUniversalArchsToStream<MachO::fat_arch_64>(FatHeader, Slices,
+ Out);
+ break;
+ case FatHeaderType::FatHeader:
+ FatHeader.magic = MachO::FAT_MAGIC;
+ return writeUniversalArchsToStream<MachO::fat_arch>(FatHeader, Slices, Out);
+ break;
+ }
+
+ llvm_unreachable("Invalid fat header type");
+}
+
Error object::writeUniversalBinary(ArrayRef<Slice> Slices,
- StringRef OutputFileName) {
+ StringRef OutputFileName,
+ FatHeaderType HeaderType) {
const bool IsExecutable = any_of(Slices, [](Slice S) {
return sys::fs::can_execute(S.getBinary()->getFileName());
});
@@ -324,7 +363,7 @@ Error object::writeUniversalBinary(ArrayRef<Slice> Slices,
if (!Temp)
return Temp.takeError();
raw_fd_ostream Out(Temp->FD, false);
- if (Error E = writeUniversalBinaryToStream(Slices, Out)) {
+ if (Error E = writeUniversalBinaryToStream(Slices, Out, HeaderType)) {
if (Error DiscardError = Temp->discard())
return joinErrors(std::move(E), std::move(DiscardError));
return E;
diff --git a/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp
index 0290a819e5de..ab073e18cb46 100644
--- a/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -15,7 +15,6 @@
#include "llvm/Object/ModuleSymbolTable.h"
#include "RecordStreamer.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
@@ -216,7 +215,7 @@ uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const {
GV->hasExternalWeakLinkage())
Res |= BasicSymbolRef::SF_Weak;
- if (GV->getName().startswith("llvm."))
+ if (GV->getName().starts_with("llvm."))
Res |= BasicSymbolRef::SF_FormatSpecific;
else if (auto *Var = dyn_cast<GlobalVariable>(GV)) {
if (Var->getSection() == "llvm.metadata")
diff --git a/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp
index 0820187f32e1..ca921836b7f6 100644
--- a/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp
@@ -79,7 +79,7 @@ uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; }
bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const {
Expected<StringRef> NameOrErr = getSectionName(Sec);
if (NameOrErr)
- return *NameOrErr == ".llvmbc" || *NameOrErr == ".llvm.lto";
+ return *NameOrErr == ".llvm.lto";
consumeError(NameOrErr.takeError());
return false;
}
@@ -158,6 +158,9 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
case file_magic::cuda_fatbinary:
case file_magic::offload_binary:
case file_magic::dxcontainer_object:
+ case file_magic::offload_bundle:
+ case file_magic::offload_bundle_compressed:
+ case file_magic::spirv_object:
return errorCodeToError(object_error::invalid_file_type);
case file_magic::tapi_file:
return errorCodeToError(object_error::invalid_file_type);
diff --git a/contrib/llvm-project/llvm/lib/Object/OffloadBinary.cpp b/contrib/llvm-project/llvm/lib/Object/OffloadBinary.cpp
index 342327daf7e4..1de784c44da1 100644
--- a/contrib/llvm-project/llvm/lib/Object/OffloadBinary.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/OffloadBinary.cpp
@@ -204,8 +204,7 @@ OffloadBinary::create(MemoryBufferRef Buf) {
new OffloadBinary(Buf, TheHeader, TheEntry));
}
-std::unique_ptr<MemoryBuffer>
-OffloadBinary::write(const OffloadingImage &OffloadingData) {
+SmallString<0> OffloadBinary::write(const OffloadingImage &OffloadingData) {
// Create a null-terminated string table with all the used strings.
StringTableBuilder StrTab(StringTableBuilder::ELF);
for (auto &KeyAndValue : OffloadingData.StringData) {
@@ -243,7 +242,7 @@ OffloadBinary::write(const OffloadingImage &OffloadingData) {
TheEntry.ImageOffset = BinaryDataSize;
TheEntry.ImageSize = OffloadingData.Image->getBufferSize();
- SmallVector<char> Data;
+ SmallString<0> Data;
Data.reserve(TheHeader.Size);
raw_svector_ostream OS(Data);
OS << StringRef(reinterpret_cast<char *>(&TheHeader), sizeof(Header));
@@ -264,7 +263,7 @@ OffloadBinary::write(const OffloadingImage &OffloadingData) {
OS.write_zeros(TheHeader.Size - OS.tell());
assert(TheHeader.Size == OS.tell() && "Size mismatch");
- return MemoryBuffer::getMemBufferCopy(OS.str());
+ return Data;
}
Error object::extractOffloadBinaries(MemoryBufferRef Buffer,
diff --git a/contrib/llvm-project/llvm/lib/Object/RecordStreamer.cpp b/contrib/llvm-project/llvm/lib/Object/RecordStreamer.cpp
index 2548dd6c84d2..891016cf7475 100644
--- a/contrib/llvm-project/llvm/lib/Object/RecordStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/RecordStreamer.cpp
@@ -81,11 +81,6 @@ RecordStreamer::const_iterator RecordStreamer::begin() {
RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
-void RecordStreamer::emitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
- MCStreamer::emitInstruction(Inst, STI);
-}
-
void RecordStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
MCStreamer::emitLabel(Symbol);
markDefined(*Symbol);
@@ -211,7 +206,7 @@ void RecordStreamer::flushSymverDirectives() {
for (auto AliasName : Symver.second) {
std::pair<StringRef, StringRef> Split = AliasName.split("@@@");
SmallString<128> NewName;
- if (!Split.second.empty() && !Split.second.startswith("@")) {
+ if (!Split.second.empty() && !Split.second.starts_with("@")) {
// Special processing for "@@@" according
// https://sourceware.org/binutils/docs/as/Symver.html
const char *Separator = IsDefined ? "@@" : "@";
diff --git a/contrib/llvm-project/llvm/lib/Object/RecordStreamer.h b/contrib/llvm-project/llvm/lib/Object/RecordStreamer.h
index a568739d3763..70b41f270720 100644
--- a/contrib/llvm-project/llvm/lib/Object/RecordStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Object/RecordStreamer.h
@@ -45,7 +45,6 @@ private:
public:
RecordStreamer(MCContext &Context, const Module &M);
- void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
diff --git a/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp b/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp
index 03ac59289528..ae97107f67fa 100644
--- a/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp
@@ -25,7 +25,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TargetParser/Triple.h"
#include <cassert>
-#include <vector>
namespace llvm {
namespace object {
diff --git a/contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp b/contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp
index eee5505b8c14..cb20feffb710 100644
--- a/contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/SymbolSize.cpp
@@ -59,6 +59,12 @@ llvm::object::computeSymbolSizes(const ObjectFile &O) {
return Ret;
}
+ if (const auto *E = dyn_cast<XCOFFObjectFile>(&O)) {
+ for (XCOFFSymbolRef Sym : E->symbols())
+ Ret.push_back({Sym, Sym.getSize()});
+ return Ret;
+ }
+
// Collect sorted symbol addresses. Include dummy addresses for the end
// of each section.
std::vector<SymEntry> Addresses;
@@ -86,7 +92,7 @@ llvm::object::computeSymbolSizes(const ObjectFile &O) {
// Compute the size as the gap to the next symbol. If multiple symbols have
// the same address, give both the same size. Because Addresses is sorted,
- // using two pointers to keep track of the current symbol vs. the next symbol
+ // use two pointers to keep track of the current symbol vs. the next symbol
// that doesn't have the same address for size computation.
for (unsigned I = 0, NextI = 0, N = Addresses.size() - 1; I < N; ++I) {
auto &P = Addresses[I];
diff --git a/contrib/llvm-project/llvm/lib/Object/TapiFile.cpp b/contrib/llvm-project/llvm/lib/Object/TapiFile.cpp
index b5f4d277bbfe..fcf61541941e 100644
--- a/contrib/llvm-project/llvm/lib/Object/TapiFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/TapiFile.cpp
@@ -49,7 +49,8 @@ static SymbolRef::Type getType(const Symbol *Sym) {
TapiFile::TapiFile(MemoryBufferRef Source, const InterfaceFile &Interface,
Architecture Arch)
- : SymbolicFile(ID_TapiFile, Source), Arch(Arch) {
+ : SymbolicFile(ID_TapiFile, Source), Arch(Arch),
+ FileKind(Interface.getFileType()) {
for (const auto *Symbol : Interface.symbols()) {
if (!Symbol->getArchitectures().has(Arch))
continue;
diff --git a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
index 11b9b579a8d7..168fb57935d6 100644
--- a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
@@ -723,17 +723,21 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
Info.Name = readString(Ctx);
if (IsDefined) {
auto Index = readVaruint32(Ctx);
- if (Index >= DataSegments.size())
- return make_error<GenericBinaryError>("invalid data symbol index",
- object_error::parse_failed);
auto Offset = readVaruint64(Ctx);
auto Size = readVaruint64(Ctx);
- size_t SegmentSize = DataSegments[Index].Data.Content.size();
- if (Offset > SegmentSize)
- return make_error<GenericBinaryError>(
- "invalid data symbol offset: `" + Info.Name + "` (offset: " +
- Twine(Offset) + " segment size: " + Twine(SegmentSize) + ")",
- object_error::parse_failed);
+ if (!(Info.Flags & wasm::WASM_SYMBOL_ABSOLUTE)) {
+ if (static_cast<size_t>(Index) >= DataSegments.size())
+ return make_error<GenericBinaryError>(
+ "invalid data segment index: " + Twine(Index),
+ object_error::parse_failed);
+ size_t SegmentSize = DataSegments[Index].Data.Content.size();
+ if (Offset > SegmentSize)
+ return make_error<GenericBinaryError>(
+ "invalid data symbol offset: `" + Info.Name +
+ "` (offset: " + Twine(Offset) +
+ " segment size: " + Twine(SegmentSize) + ")",
+ object_error::parse_failed);
+ }
Info.DataRef = wasm::WasmDataReference{Index, Offset, Size};
}
break;
@@ -1088,7 +1092,7 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) {
} else if (Sec.Name == "target_features") {
if (Error Err = parseTargetFeaturesSection(Ctx))
return Err;
- } else if (Sec.Name.startswith("reloc.")) {
+ } else if (Sec.Name.starts_with("reloc.")) {
if (Error Err = parseRelocSection(Sec.Name, Ctx))
return Err;
}
diff --git a/contrib/llvm-project/llvm/lib/Object/WindowsResource.cpp b/contrib/llvm-project/llvm/lib/Object/WindowsResource.cpp
index 0764dc8f7523..61ca49e290da 100644
--- a/contrib/llvm-project/llvm/lib/Object/WindowsResource.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/WindowsResource.cpp
@@ -50,7 +50,7 @@ WindowsResource::WindowsResource(MemoryBufferRef Source)
: Binary(Binary::ID_WinRes, Source) {
size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE;
BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize),
- support::little);
+ llvm::endianness::little);
}
// static
diff --git a/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp
index fa4917e354e9..3fbd51887831 100644
--- a/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -299,7 +299,11 @@ Expected<SymbolRef::Type>
XCOFFObjectFile::getSymbolType(DataRefImpl Symb) const {
XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
- if (XCOFFSym.isFunction())
+ Expected<bool> IsFunction = XCOFFSym.isFunction();
+ if (!IsFunction)
+ return IsFunction.takeError();
+
+ if (*IsFunction)
return SymbolRef::ST_Function;
if (XCOFF::C_FILE == XCOFFSym.getStorageClass())
@@ -689,6 +693,10 @@ basic_symbol_iterator XCOFFObjectFile::symbol_end() const {
return basic_symbol_iterator(SymbolRef(SymDRI, this));
}
+XCOFFObjectFile::xcoff_symbol_iterator_range XCOFFObjectFile::symbols() const {
+ return xcoff_symbol_iterator_range(symbol_begin(), symbol_end());
+}
+
section_iterator XCOFFObjectFile::section_begin() const {
DataRefImpl DRI;
DRI.p = getSectionHeaderTableAddress();
@@ -1221,7 +1229,7 @@ std::optional<StringRef> XCOFFObjectFile::tryGetCPUName() const {
return StringRef("future");
}
-bool XCOFFSymbolRef::isFunction() const {
+Expected<bool> XCOFFSymbolRef::isFunction() const {
if (!isCsectSymbol())
return false;
@@ -1229,34 +1237,62 @@ bool XCOFFSymbolRef::isFunction() const {
return true;
Expected<XCOFFCsectAuxRef> ExpCsectAuxEnt = getXCOFFCsectAuxRef();
- if (!ExpCsectAuxEnt) {
- // If we could not get the CSECT auxiliary entry, then treat this symbol as
- // if it isn't a function. Consume the error and return `false` to move on.
- consumeError(ExpCsectAuxEnt.takeError());
- return false;
- }
+ if (!ExpCsectAuxEnt)
+ return ExpCsectAuxEnt.takeError();
const XCOFFCsectAuxRef CsectAuxRef = ExpCsectAuxEnt.get();
- // A function definition should be a label definition.
- // FIXME: This is not necessarily the case when -ffunction-sections is
- // enabled.
- if (!CsectAuxRef.isLabel())
+ if (CsectAuxRef.getStorageMappingClass() != XCOFF::XMC_PR &&
+ CsectAuxRef.getStorageMappingClass() != XCOFF::XMC_GL)
return false;
- if (CsectAuxRef.getStorageMappingClass() != XCOFF::XMC_PR)
+ // A function definition should not be a common type symbol or an external
+ // symbol.
+ if (CsectAuxRef.getSymbolType() == XCOFF::XTY_CM ||
+ CsectAuxRef.getSymbolType() == XCOFF::XTY_ER)
return false;
- const int16_t SectNum = getSectionNumber();
- Expected<DataRefImpl> SI = OwningObjectPtr->getSectionByNum(SectNum);
- if (!SI) {
- // If we could not get the section, then this symbol should not be
- // a function. So consume the error and return `false` to move on.
- consumeError(SI.takeError());
- return false;
+ // If the next symbol is an XTY_LD type symbol with the same address, this
+ // XTY_SD symbol is not a function. Otherwise this is a function symbol for
+ // -ffunction-sections.
+ if (CsectAuxRef.getSymbolType() == XCOFF::XTY_SD) {
+ // If this is a csect with size 0, it won't be a function definition.
+ // This is used to work around the fact that LLVM always generates below
+ // symbol for -ffunction-sections:
+ // m 0x00000000 .text 1 unamex **No Symbol**
+ // a4 0x00000000 0 0 SD PR 0 0
+ // FIXME: remove or replace this meaningless symbol.
+ if (getSize() == 0)
+ return false;
+
+ xcoff_symbol_iterator NextIt(this);
+ // If this is the last main symbol table entry, there won't be an XTY_LD
+ // type symbol below.
+ if (++NextIt == getObject()->symbol_end())
+ return true;
+
+ if (cantFail(getAddress()) != cantFail(NextIt->getAddress()))
+ return true;
+
+ // Check next symbol is XTY_LD. If so, this symbol is not a function.
+ Expected<XCOFFCsectAuxRef> NextCsectAuxEnt = NextIt->getXCOFFCsectAuxRef();
+ if (!NextCsectAuxEnt)
+ return NextCsectAuxEnt.takeError();
+
+ if (NextCsectAuxEnt.get().getSymbolType() == XCOFF::XTY_LD)
+ return false;
+
+ return true;
}
- return (OwningObjectPtr->getSectionFlags(SI.get()) & XCOFF::STYP_TEXT);
+ if (CsectAuxRef.getSymbolType() == XCOFF::XTY_LD)
+ return true;
+
+ return createError(
+ "symbol csect aux entry with index " +
+ Twine(getObject()->getSymbolIndex(CsectAuxRef.getEntryAddress())) +
+ " has invalid symbol type " +
+ Twine::utohexstr(CsectAuxRef.getSymbolType()));
}
bool XCOFFSymbolRef::isCsectSymbol() const {
@@ -1275,13 +1311,13 @@ Expected<XCOFFCsectAuxRef> XCOFFSymbolRef::getXCOFFCsectAuxRef() const {
if (auto Err = NameOrErr.takeError())
return std::move(Err);
- uint32_t SymbolIdx = OwningObjectPtr->getSymbolIndex(getEntryAddress());
+ uint32_t SymbolIdx = getObject()->getSymbolIndex(getEntryAddress());
if (!NumberOfAuxEntries) {
return createError("csect symbol \"" + *NameOrErr + "\" with index " +
Twine(SymbolIdx) + " contains no auxiliary entry");
}
- if (!OwningObjectPtr->is64Bit()) {
+ if (!getObject()->is64Bit()) {
// In XCOFF32, the csect auxilliary entry is always the last auxiliary
// entry for the symbol.
uintptr_t AuxAddr = XCOFFObjectFile::getAdvancedSymbolEntryAddress(
@@ -1294,10 +1330,10 @@ Expected<XCOFFCsectAuxRef> XCOFFSymbolRef::getXCOFFCsectAuxRef() const {
for (uint8_t Index = NumberOfAuxEntries; Index > 0; --Index) {
uintptr_t AuxAddr = XCOFFObjectFile::getAdvancedSymbolEntryAddress(
getEntryAddress(), Index);
- if (*OwningObjectPtr->getSymbolAuxType(AuxAddr) ==
+ if (*getObject()->getSymbolAuxType(AuxAddr) ==
XCOFF::SymbolAuxType::AUX_CSECT) {
#ifndef NDEBUG
- OwningObjectPtr->checkSymbolEntryPointer(AuxAddr);
+ getObject()->checkSymbolEntryPointer(AuxAddr);
#endif
return XCOFFCsectAuxRef(viewAs<XCOFFCsectAuxEnt64>(AuxAddr));
}
@@ -1314,14 +1350,15 @@ Expected<StringRef> XCOFFSymbolRef::getName() const {
if (getStorageClass() & 0x80)
return StringRef("Unimplemented Debug Name");
- if (Entry32) {
- if (Entry32->NameInStrTbl.Magic != XCOFFSymbolRef::NAME_IN_STR_TBL_MAGIC)
- return generateXCOFFFixedNameStringRef(Entry32->SymbolName);
+ if (!getObject()->is64Bit()) {
+ if (getSymbol32()->NameInStrTbl.Magic !=
+ XCOFFSymbolRef::NAME_IN_STR_TBL_MAGIC)
+ return generateXCOFFFixedNameStringRef(getSymbol32()->SymbolName);
- return OwningObjectPtr->getStringTableEntry(Entry32->NameInStrTbl.Offset);
+ return getObject()->getStringTableEntry(getSymbol32()->NameInStrTbl.Offset);
}
- return OwningObjectPtr->getStringTableEntry(Entry64->Offset);
+ return getObject()->getStringTableEntry(getSymbol64()->Offset);
}
// Explictly instantiate template classes.
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
index 2e72e4fa7f49..7088223b9b67 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -182,7 +182,7 @@ toDebugS(ArrayRef<CodeViewYAML::YAMLDebugSubsection> Subsections,
}
uint8_t *Buffer = Allocator.Allocate<uint8_t>(Size);
MutableArrayRef<uint8_t> Output(Buffer, Size);
- BinaryStreamWriter Writer(Output, support::little);
+ BinaryStreamWriter Writer(Output, llvm::endianness::little);
Err(Writer.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC));
for (const auto &B : Builders) {
@@ -314,8 +314,8 @@ template <typename value_type>
raw_ostream &operator<<(raw_ostream &OS,
const binary_le_impl<value_type> &BLE) {
char Buffer[sizeof(BLE.Value)];
- support::endian::write<value_type, support::little, support::unaligned>(
- Buffer, BLE.Value);
+ support::endian::write<value_type, llvm::endianness::little>(Buffer,
+ BLE.Value);
OS.write(Buffer, sizeof(BLE.Value));
return OS;
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFYAML.cpp
index 3fe2ea5af08f..cd1db24f0d5d 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFYAML.cpp
@@ -689,11 +689,12 @@ void MappingTraits<COFFYAML::Section>::mapping(IO &IO, COFFYAML::Section &Sec) {
return;
}
- // Uninitialized sections, such as .bss, typically have no data, but the size
- // is carried in SizeOfRawData, even though PointerToRawData is zero.
- if (Sec.SectionData.binary_size() == 0 && Sec.StructuredData.empty() &&
- NC->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
- IO.mapOptional("SizeOfRawData", Sec.Header.SizeOfRawData);
+ IO.mapOptional("SizeOfRawData", Sec.Header.SizeOfRawData, 0U);
+
+ if (!Sec.StructuredData.empty() && Sec.Header.SizeOfRawData) {
+ IO.setError("StructuredData and SizeOfRawData can't be used together");
+ return;
+ }
IO.mapOptional("Relocations", Sec.Relocations);
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index 02f053bb0e0f..662eb63f835d 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -900,7 +900,7 @@ YAMLDebugSubsection::fromCodeViewSubection(const StringsAndChecksumsRef &SC,
std::vector<YAMLDebugSubsection>
llvm::CodeViewYAML::fromDebugS(ArrayRef<uint8_t> Data,
const StringsAndChecksumsRef &SC) {
- BinaryStreamReader Reader(Data, support::little);
+ BinaryStreamReader Reader(Data, llvm::endianness::little);
uint32_t Magic;
ExitOnError Err("Invalid .debug$S section!");
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 8d2028abfe9b..64e1a58aa71a 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -61,6 +61,7 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(CPUType)
LLVM_YAML_DECLARE_ENUM_TRAITS(RegisterId)
LLVM_YAML_DECLARE_ENUM_TRAITS(TrampolineType)
LLVM_YAML_DECLARE_ENUM_TRAITS(ThunkOrdinal)
+LLVM_YAML_DECLARE_ENUM_TRAITS(JumpTableEntrySize)
LLVM_YAML_STRONG_TYPEDEF(StringRef, TypeName)
@@ -207,6 +208,15 @@ void ScalarEnumerationTraits<FrameCookieKind>::enumeration(
}
}
+void ScalarEnumerationTraits<JumpTableEntrySize>::enumeration(
+ IO &io, JumpTableEntrySize &FC) {
+ auto ThunkNames = getJumpTableEntrySizeNames();
+ for (const auto &E : ThunkNames) {
+ io.enumCase(FC, E.Name.str().c_str(),
+ static_cast<JumpTableEntrySize>(E.Value));
+ }
+}
+
namespace llvm {
namespace yaml {
template <> struct MappingTraits<LocalVariableAddrRange> {
@@ -586,6 +596,17 @@ template <> void SymbolRecordImpl<AnnotationSym>::map(IO &IO) {
IO.mapRequired("Strings", Symbol.Strings);
}
+template <> void SymbolRecordImpl<JumpTableSym>::map(IO &IO) {
+ IO.mapRequired("BaseOffset", Symbol.BaseOffset);
+ IO.mapRequired("BaseSegment", Symbol.BaseSegment);
+ IO.mapRequired("SwitchType", Symbol.SwitchType);
+ IO.mapRequired("BranchOffset", Symbol.BranchOffset);
+ IO.mapRequired("TableOffset", Symbol.TableOffset);
+ IO.mapRequired("BranchSegment", Symbol.BranchSegment);
+ IO.mapRequired("TableSegment", Symbol.TableSegment);
+ IO.mapRequired("EntriesCount", Symbol.EntriesCount);
+}
+
} // end namespace detail
} // end namespace CodeViewYAML
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp
index e921ae1e7d8d..0ec8662f42ff 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp
@@ -49,7 +49,7 @@ DebugHSection llvm::CodeViewYAML::fromDebugH(ArrayRef<uint8_t> DebugH) {
assert(DebugH.size() >= 8);
assert((DebugH.size() - 8) % 8 == 0);
- BinaryStreamReader Reader(DebugH, llvm::support::little);
+ BinaryStreamReader Reader(DebugH, llvm::endianness::little);
DebugHSection DHS;
cantFail(Reader.readInteger(DHS.Magic));
cantFail(Reader.readInteger(DHS.Version));
@@ -69,7 +69,7 @@ ArrayRef<uint8_t> llvm::CodeViewYAML::toDebugH(const DebugHSection &DebugH,
uint32_t Size = 8 + 8 * DebugH.Hashes.size();
uint8_t *Data = Alloc.Allocate<uint8_t>(Size);
MutableArrayRef<uint8_t> Buffer(Data, Size);
- BinaryStreamWriter Writer(Buffer, llvm::support::little);
+ BinaryStreamWriter Writer(Buffer, llvm::endianness::little);
cantFail(Writer.writeInteger(DebugH.Magic));
cantFail(Writer.writeInteger(DebugH.Version));
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index e4e2b2a6d21a..99689786a13c 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -784,7 +784,7 @@ std::vector<LeafRecord>
llvm::CodeViewYAML::fromDebugT(ArrayRef<uint8_t> DebugTorP,
StringRef SectionName) {
ExitOnError Err("Invalid " + std::string(SectionName) + " section!");
- BinaryStreamReader Reader(DebugTorP, support::little);
+ BinaryStreamReader Reader(DebugTorP, llvm::endianness::little);
CVTypeArray Types;
uint32_t Magic;
@@ -813,7 +813,7 @@ ArrayRef<uint8_t> llvm::CodeViewYAML::toDebugT(ArrayRef<LeafRecord> Leafs,
}
uint8_t *ResultBuffer = Alloc.Allocate<uint8_t>(Size);
MutableArrayRef<uint8_t> Output(ResultBuffer, Size);
- BinaryStreamWriter Writer(Output, support::little);
+ BinaryStreamWriter Writer(Output, llvm::endianness::little);
ExitOnError Err("Error writing type record to " + std::string(SectionName) +
" section");
Err(Writer.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC));
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
index 64b13fc0ccde..09a5e41c7123 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
@@ -201,12 +201,63 @@ void DXContainerWriter::writeParts(raw_ostream &OS) {
memcpy(&PSV.BaseData, &P.Info->Info, sizeof(dxbc::PSV::v2::RuntimeInfo));
PSV.Resources = P.Info->Resources;
- if (sys::IsBigEndianHost)
- PSV.swapBytes(static_cast<Triple::EnvironmentType>(
- Triple::Pixel + P.Info->Info.ShaderStage));
+ for (auto El : P.Info->SigInputElements)
+ PSV.InputElements.push_back(mcdxbc::PSVSignatureElement{
+ El.Name, El.Indices, El.StartRow, El.Cols, El.StartCol,
+ El.Allocated, El.Kind, El.Type, El.Mode, El.DynamicMask,
+ El.Stream});
+
+ for (auto El : P.Info->SigOutputElements)
+ PSV.OutputElements.push_back(mcdxbc::PSVSignatureElement{
+ El.Name, El.Indices, El.StartRow, El.Cols, El.StartCol,
+ El.Allocated, El.Kind, El.Type, El.Mode, El.DynamicMask,
+ El.Stream});
+
+ for (auto El : P.Info->SigPatchOrPrimElements)
+ PSV.PatchOrPrimElements.push_back(mcdxbc::PSVSignatureElement{
+ El.Name, El.Indices, El.StartRow, El.Cols, El.StartCol,
+ El.Allocated, El.Kind, El.Type, El.Mode, El.DynamicMask,
+ El.Stream});
+
+ static_assert(PSV.OutputVectorMasks.size() == PSV.InputOutputMap.size());
+ for (unsigned I = 0; I < PSV.OutputVectorMasks.size(); ++I) {
+ PSV.OutputVectorMasks[I].insert(PSV.OutputVectorMasks[I].begin(),
+ P.Info->OutputVectorMasks[I].begin(),
+ P.Info->OutputVectorMasks[I].end());
+ PSV.InputOutputMap[I].insert(PSV.InputOutputMap[I].begin(),
+ P.Info->InputOutputMap[I].begin(),
+ P.Info->InputOutputMap[I].end());
+ }
+
+ PSV.PatchOrPrimMasks.insert(PSV.PatchOrPrimMasks.begin(),
+ P.Info->PatchOrPrimMasks.begin(),
+ P.Info->PatchOrPrimMasks.end());
+ PSV.InputPatchMap.insert(PSV.InputPatchMap.begin(),
+ P.Info->InputPatchMap.begin(),
+ P.Info->InputPatchMap.end());
+ PSV.PatchOutputMap.insert(PSV.PatchOutputMap.begin(),
+ P.Info->PatchOutputMap.begin(),
+ P.Info->PatchOutputMap.end());
+
+ PSV.finalize(static_cast<Triple::EnvironmentType>(
+ Triple::Pixel + P.Info->Info.ShaderStage));
PSV.write(OS, P.Info->Version);
break;
}
+ case dxbc::PartType::ISG1:
+ case dxbc::PartType::OSG1:
+ case dxbc::PartType::PSG1: {
+ mcdxbc::Signature Sig;
+ if (P.Signature.has_value()) {
+ for (const auto &Param : P.Signature->Parameters) {
+ Sig.addParam(Param.Stream, Param.Name, Param.Index, Param.SystemValue,
+ Param.CompType, Param.Register, Param.Mask,
+ Param.ExclusiveMask, Param.MinPrecision);
+ }
+ }
+ Sig.write(OS);
+ break;
+ }
case dxbc::PartType::Unknown:
break; // Skip any handling for unrecognized parts.
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerYAML.cpp
index ed9f39954111..1f03f2c7d399 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/DXContainerYAML.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/DXContainerYAML.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/Support/ScopedPrinter.h"
namespace llvm {
@@ -122,6 +124,9 @@ void MappingTraits<DXContainerYAML::PSVInfo>::mapping(
uint32_t Version = PSV.Version;
IO.setContext(&Version);
+ // Restore the YAML context on function exit.
+ auto RestoreContext = make_scope_exit([&]() { IO.setContext(OldContext); });
+
// Shader stage is only included in binaries for v1 and later, but we always
// include it since it simplifies parsing and file construction.
IO.mapRequired("ShaderStage", PSV.Info.ShaderStage);
@@ -129,9 +134,47 @@ void MappingTraits<DXContainerYAML::PSVInfo>::mapping(
IO.mapRequired("ResourceStride", PSV.ResourceStride);
IO.mapRequired("Resources", PSV.Resources);
+ if (PSV.Version == 0)
+ return;
+ IO.mapRequired("SigInputElements", PSV.SigInputElements);
+ IO.mapRequired("SigOutputElements", PSV.SigOutputElements);
+ IO.mapRequired("SigPatchOrPrimElements", PSV.SigPatchOrPrimElements);
+
+ Triple::EnvironmentType Stage = dxbc::getShaderStage(PSV.Info.ShaderStage);
+ if (PSV.Info.UsesViewID) {
+ MutableArrayRef<SmallVector<llvm::yaml::Hex32>> MutableOutMasks(
+ PSV.OutputVectorMasks);
+ IO.mapRequired("OutputVectorMasks", MutableOutMasks);
+ if (Stage == Triple::EnvironmentType::Hull)
+ IO.mapRequired("PatchOrPrimMasks", PSV.PatchOrPrimMasks);
+ }
+ MutableArrayRef<SmallVector<llvm::yaml::Hex32>> MutableIOMap(
+ PSV.InputOutputMap);
+ IO.mapRequired("InputOutputMap", MutableIOMap);
+
+ if (Stage == Triple::EnvironmentType::Hull)
+ IO.mapRequired("InputPatchMap", PSV.InputPatchMap);
+
+ if (Stage == Triple::EnvironmentType::Domain)
+ IO.mapRequired("PatchOutputMap", PSV.PatchOutputMap);
+}
- // Restore the YAML context.
- IO.setContext(OldContext);
+void MappingTraits<DXContainerYAML::SignatureParameter>::mapping(
+ IO &IO, DXContainerYAML::SignatureParameter &S) {
+ IO.mapRequired("Stream", S.Stream);
+ IO.mapRequired("Name", S.Name);
+ IO.mapRequired("Index", S.Index);
+ IO.mapRequired("SystemValue", S.SystemValue);
+ IO.mapRequired("CompType", S.CompType);
+ IO.mapRequired("Register", S.Register);
+ IO.mapRequired("Mask", S.Mask);
+ IO.mapRequired("ExclusiveMask", S.ExclusiveMask);
+ IO.mapRequired("MinPrecision", S.MinPrecision);
+}
+
+void MappingTraits<DXContainerYAML::Signature>::mapping(
+ IO &IO, DXContainerYAML::Signature &S) {
+ IO.mapRequired("Parameters", S.Parameters);
}
void MappingTraits<DXContainerYAML::Part>::mapping(IO &IO,
@@ -142,6 +185,7 @@ void MappingTraits<DXContainerYAML::Part>::mapping(IO &IO,
IO.mapOptional("Flags", P.Flags);
IO.mapOptional("Hash", P.Hash);
IO.mapOptional("PSVInfo", P.Info);
+ IO.mapOptional("Signature", P.Signature);
}
void MappingTraits<DXContainerYAML::Object>::mapping(
@@ -166,6 +210,57 @@ void MappingTraits<DXContainerYAML::ResourceBindInfo>::mapping(
IO.mapRequired("Flags", Res.Flags);
}
+void MappingTraits<DXContainerYAML::SignatureElement>::mapping(
+ IO &IO, DXContainerYAML::SignatureElement &El) {
+ IO.mapRequired("Name", El.Name);
+ IO.mapRequired("Indices", El.Indices);
+ IO.mapRequired("StartRow", El.StartRow);
+ IO.mapRequired("Cols", El.Cols);
+ IO.mapRequired("StartCol", El.StartCol);
+ IO.mapRequired("Allocated", El.Allocated);
+ IO.mapRequired("Kind", El.Kind);
+ IO.mapRequired("ComponentType", El.Type);
+ IO.mapRequired("Interpolation", El.Mode);
+ IO.mapRequired("DynamicMask", El.DynamicMask);
+ IO.mapRequired("Stream", El.Stream);
+}
+
+void ScalarEnumerationTraits<dxbc::PSV::SemanticKind>::enumeration(
+ IO &IO, dxbc::PSV::SemanticKind &Value) {
+ for (const auto &E : dxbc::PSV::getSemanticKinds())
+ IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::PSV::ComponentType>::enumeration(
+ IO &IO, dxbc::PSV::ComponentType &Value) {
+ for (const auto &E : dxbc::PSV::getComponentTypes())
+ IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::PSV::InterpolationMode>::enumeration(
+ IO &IO, dxbc::PSV::InterpolationMode &Value) {
+ for (const auto &E : dxbc::PSV::getInterpolationModes())
+ IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::D3DSystemValue>::enumeration(
+ IO &IO, dxbc::D3DSystemValue &Value) {
+ for (const auto &E : dxbc::getD3DSystemValues())
+ IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::SigMinPrecision>::enumeration(
+ IO &IO, dxbc::SigMinPrecision &Value) {
+ for (const auto &E : dxbc::getSigMinPrecisions())
+ IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+void ScalarEnumerationTraits<dxbc::SigComponentType>::enumeration(
+ IO &IO, dxbc::SigComponentType &Value) {
+ for (const auto &E : dxbc::getSigComponentTypes())
+ IO.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
} // namespace yaml
void DXContainerYAML::PSVInfo::mapInfoForVersion(yaml::IO &IO) {
@@ -242,10 +337,6 @@ void DXContainerYAML::PSVInfo::mapInfoForVersion(yaml::IO &IO) {
break;
}
- IO.mapRequired("SigInputElements", Info.SigInputElements);
- IO.mapRequired("SigOutputElements", Info.SigOutputElements);
- IO.mapRequired("SigPatchConstOrPrimElements",
- Info.SigPatchConstOrPrimElements);
IO.mapRequired("SigInputVectors", Info.SigInputVectors);
MutableArrayRef<uint8_t> Vec(Info.SigOutputVectors);
IO.mapRequired("SigOutputVectors", Vec);
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp
index a64ab62ef22b..94b0529f7610 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
+#include <variant>
using namespace llvm;
@@ -122,7 +123,7 @@ public:
return encodeULEB128(Val, OS);
}
- template <typename T> void write(T Val, support::endianness E) {
+ template <typename T> void write(T Val, llvm::endianness E) {
if (checkLimit(sizeof(T)))
support::endian::write<T>(OS, Val, E);
}
@@ -666,7 +667,7 @@ bool ELFState<ELFT>::initImplicitHeader(ContiguousBlobAccumulator &CBA,
initSymtabSectionHeader(Header, SymtabType::Static, CBA, YAMLSec);
else if (SecName == ".dynsym")
initSymtabSectionHeader(Header, SymtabType::Dynamic, CBA, YAMLSec);
- else if (SecName.startswith(".debug_")) {
+ else if (SecName.starts_with(".debug_")) {
// If a ".debug_*" section's type is a preserved one, e.g., SHT_DYNAMIC, we
// will not treat it as a debug section.
if (YAMLSec && !isa<ELFYAML::RawContentSection>(YAMLSec))
@@ -1390,10 +1391,24 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(
Elf_Shdr &SHeader, const ELFYAML::BBAddrMapSection &Section,
ContiguousBlobAccumulator &CBA) {
- if (!Section.Entries)
+ if (!Section.Entries) {
+ if (Section.PGOAnalyses)
+ WithColor::warning()
+ << "PGOAnalyses should not exist in SHT_LLVM_BB_ADDR_MAP when "
+ "Entries does not exist";
return;
+ }
+
+ const std::vector<ELFYAML::PGOAnalysisMapEntry> *PGOAnalyses = nullptr;
+ if (Section.PGOAnalyses) {
+ if (Section.Entries->size() != Section.PGOAnalyses->size())
+ WithColor::warning() << "PGOAnalyses must be the same length as Entries "
+ "in SHT_LLVM_BB_ADDR_MAP";
+ else
+ PGOAnalyses = &Section.PGOAnalyses.value();
+ }
- for (const ELFYAML::BBAddrMapEntry &E : *Section.Entries) {
+ for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) {
// Write version and feature values.
if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) {
if (E.Version > 2)
@@ -1404,6 +1419,14 @@ void ELFState<ELFT>::writeSectionContent(
CBA.write(E.Feature);
SHeader.sh_size += 2;
}
+
+ if (Section.PGOAnalyses) {
+ if (E.Version < 2)
+ WithColor::warning()
+ << "unsupported SHT_LLVM_BB_ADDR_MAP version when using PGO: "
+ << static_cast<int>(E.Version) << "; must use version >= 2";
+ }
+
// Write the address of the function.
CBA.write<uintX_t>(E.Address, ELFT::TargetEndianness);
// Write number of BBEntries (number of basic blocks in the function). This
@@ -1412,14 +1435,43 @@ void ELFState<ELFT>::writeSectionContent(
E.NumBlocks.value_or(E.BBEntries ? E.BBEntries->size() : 0);
SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks);
// Write all BBEntries.
- if (!E.BBEntries)
+ if (E.BBEntries) {
+ for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *E.BBEntries) {
+ if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP && E.Version > 1)
+ SHeader.sh_size += CBA.writeULEB128(BBE.ID);
+ SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset) +
+ CBA.writeULEB128(BBE.Size) +
+ CBA.writeULEB128(BBE.Metadata);
+ }
+ }
+
+ if (!PGOAnalyses)
continue;
- for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *E.BBEntries) {
- if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP && E.Version > 1)
- SHeader.sh_size += CBA.writeULEB128(BBE.ID);
- SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset) +
- CBA.writeULEB128(BBE.Size) +
- CBA.writeULEB128(BBE.Metadata);
+ const ELFYAML::PGOAnalysisMapEntry &PGOEntry = PGOAnalyses->at(Idx);
+
+ if (PGOEntry.FuncEntryCount)
+ SHeader.sh_size += CBA.writeULEB128(*PGOEntry.FuncEntryCount);
+
+ if (!PGOEntry.PGOBBEntries)
+ continue;
+
+ const auto &PGOBBEntries = PGOEntry.PGOBBEntries.value();
+ if (!E.BBEntries || E.BBEntries->size() != PGOBBEntries.size()) {
+ WithColor::warning() << "PBOBBEntries must be the same length as "
+ "BBEntries in SHT_LLVM_BB_ADDR_MAP.\n"
+ << "Mismatch on function with address: "
+ << E.Address;
+ continue;
+ }
+
+ for (const auto &PGOBBE : PGOBBEntries) {
+ if (PGOBBE.BBFreq)
+ SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq);
+ if (PGOBBE.Successors) {
+ SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size());
+ for (const auto &[ID, BrProb] : *PGOBBE.Successors)
+ SHeader.sh_size += CBA.writeULEB128(ID) + CBA.writeULEB128(BrProb);
+ }
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
index e92c61d81055..6ad4a067415a 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -131,6 +131,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration(
ECase(NT_ARM_HW_WATCH);
ECase(NT_ARM_SVE);
ECase(NT_ARM_PAC_MASK);
+ ECase(NT_ARM_TAGGED_ADDR_CTRL);
ECase(NT_ARM_SSVE);
ECase(NT_ARM_ZA);
ECase(NT_ARM_ZT);
@@ -609,6 +610,8 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1103, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1150, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
@@ -716,6 +719,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_MSP430_ATTRIBUTES);
break;
case ELF::EM_AARCH64:
+ ECase(SHT_AARCH64_AUTH_RELR);
ECase(SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
ECase(SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC);
break;
@@ -1287,10 +1291,10 @@ StringRef ScalarTraits<ELFYAML::YAMLIntUInt>::input(StringRef Scalar, void *Ctx,
StringRef ErrMsg = "invalid number";
// We do not accept negative hex numbers because their meaning is ambiguous.
// For example, would -0xfffffffff mean 1 or INT32_MIN?
- if (Scalar.empty() || Scalar.startswith("-0x"))
+ if (Scalar.empty() || Scalar.starts_with("-0x"))
return ErrMsg;
- if (Scalar.startswith("-")) {
+ if (Scalar.starts_with("-")) {
const int64_t MinVal = Is64 ? INT64_MIN : INT32_MIN;
long long Int;
if (getAsSignedInteger(Scalar, /*Radix=*/0, Int) || (Int < MinVal))
@@ -1386,6 +1390,7 @@ static void sectionMapping(IO &IO, ELFYAML::BBAddrMapSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Content", Section.Content);
IO.mapOptional("Entries", Section.Entries);
+ IO.mapOptional("PGOAnalyses", Section.PGOAnalyses);
}
static void sectionMapping(IO &IO, ELFYAML::StackSizesSection &Section) {
@@ -1555,7 +1560,7 @@ void MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::mapping(
// When the Type string does not have a "SHT_" prefix, we know it is not a
// description of a regular ELF output section.
TypeStr = getStringValue(IO, "Type");
- if (TypeStr.startswith("SHT_") || isInteger(TypeStr))
+ if (TypeStr.starts_with("SHT_") || isInteger(TypeStr))
IO.mapRequired("Type", Type);
}
@@ -1821,6 +1826,28 @@ void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping(
IO.mapRequired("Metadata", E.Metadata);
}
+void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping(
+ IO &IO, ELFYAML::PGOAnalysisMapEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapOptional("FuncEntryCount", E.FuncEntryCount);
+ IO.mapOptional("PGOBBEntries", E.PGOBBEntries);
+}
+
+void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry>::mapping(
+ IO &IO, ELFYAML::PGOAnalysisMapEntry::PGOBBEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapOptional("BBFreq", E.BBFreq);
+ IO.mapOptional("Successors", E.Successors);
+}
+
+void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry::SuccessorEntry>::
+ mapping(IO &IO,
+ ELFYAML::PGOAnalysisMapEntry::PGOBBEntry::SuccessorEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapRequired("ID", E.ID);
+ IO.mapRequired("BrProb", E.BrProb);
+}
+
void MappingTraits<ELFYAML::GnuHashHeader>::mapping(IO &IO,
ELFYAML::GnuHashHeader &E) {
assert(IO.getContext() && "The IO context is not initialized");
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/GOFFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/GOFFEmitter.cpp
new file mode 100644
index 000000000000..345904407e1d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/GOFFEmitter.cpp
@@ -0,0 +1,282 @@
+//===- yaml2goff - Convert YAML to a GOFF object file ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// The GOFF component of yaml2obj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ObjectYAML/ObjectYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/ConvertEBCDIC.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+// Common flag values on records.
+enum {
+ // Flag: This record is continued.
+ Rec_Continued = 1,
+
+ // Flag: This record is a continuation.
+ Rec_Continuation = 1 << (8 - 6 - 1),
+};
+
+template <typename ValueType> struct BinaryBeImpl {
+ ValueType Value;
+ BinaryBeImpl(ValueType V) : Value(V) {}
+};
+
+template <typename ValueType>
+raw_ostream &operator<<(raw_ostream &OS, const BinaryBeImpl<ValueType> &BBE) {
+ char Buffer[sizeof(BBE.Value)];
+ support::endian::write<ValueType, llvm::endianness::big, support::unaligned>(
+ Buffer, BBE.Value);
+ OS.write(Buffer, sizeof(BBE.Value));
+ return OS;
+}
+
+template <typename ValueType> BinaryBeImpl<ValueType> binaryBe(ValueType V) {
+ return BinaryBeImpl<ValueType>(V);
+}
+
+struct ZerosImpl {
+ size_t NumBytes;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ZerosImpl &Z) {
+ OS.write_zeros(Z.NumBytes);
+ return OS;
+}
+
+ZerosImpl zeros(const size_t NumBytes) { return ZerosImpl{NumBytes}; }
+
+// The GOFFOstream is responsible to write the data into the fixed physical
+// records of the format. A user of this class announces the start of a new
+// logical record and the size of its payload. While writing the payload, the
+// physical records are created for the data. Possible fill bytes at the end of
+// a physical record are written automatically.
+class GOFFOstream : public raw_ostream {
+public:
+ explicit GOFFOstream(raw_ostream &OS)
+ : OS(OS), LogicalRecords(0), RemainingSize(0), NewLogicalRecord(false) {
+ SetBufferSize(GOFF::PayloadLength);
+ }
+
+ ~GOFFOstream() { finalize(); }
+
+ void makeNewRecord(GOFF::RecordType Type, size_t Size) {
+ fillRecord();
+ CurrentType = Type;
+ RemainingSize = Size;
+ if (size_t Gap = (RemainingSize % GOFF::PayloadLength))
+ RemainingSize += GOFF::PayloadLength - Gap;
+ NewLogicalRecord = true;
+ ++LogicalRecords;
+ }
+
+ void finalize() { fillRecord(); }
+
+ uint32_t logicalRecords() { return LogicalRecords; }
+
+private:
+ // The underlying raw_ostream.
+ raw_ostream &OS;
+
+ // The number of logical records emitted so far.
+ uint32_t LogicalRecords;
+
+ // The remaining size of this logical record, including fill bytes.
+ size_t RemainingSize;
+
+ // The type of the current (logical) record.
+ GOFF::RecordType CurrentType;
+
+ // Signals start of new record.
+ bool NewLogicalRecord;
+
+ // Return the number of bytes left to write until next physical record.
+ // Please note that we maintain the total number of bytes left, not the
+ // written size.
+ size_t bytesToNextPhysicalRecord() {
+ size_t Bytes = RemainingSize % GOFF::PayloadLength;
+ return Bytes ? Bytes : GOFF::PayloadLength;
+ }
+
+ // Write the record prefix of a physical record, using the current record
+ // type.
+ static void writeRecordPrefix(raw_ostream &OS, GOFF::RecordType Type,
+ size_t RemainingSize,
+ uint8_t Flags = Rec_Continuation) {
+ uint8_t TypeAndFlags = Flags | (Type << 4);
+ if (RemainingSize > GOFF::RecordLength)
+ TypeAndFlags |= Rec_Continued;
+ OS << binaryBe(static_cast<unsigned char>(GOFF::PTVPrefix))
+ << binaryBe(static_cast<unsigned char>(TypeAndFlags))
+ << binaryBe(static_cast<unsigned char>(0));
+ }
+
+ // Fill the last physical record of a logical record with zero bytes.
+ void fillRecord() {
+ assert((GetNumBytesInBuffer() <= RemainingSize) &&
+ "More bytes in buffer than expected");
+ size_t Remains = RemainingSize - GetNumBytesInBuffer();
+ if (Remains) {
+ assert((Remains < GOFF::RecordLength) &&
+ "Attempting to fill more than one physical record");
+ raw_ostream::write_zeros(Remains);
+ }
+ flush();
+ assert(RemainingSize == 0 && "Not fully flushed");
+ assert(GetNumBytesInBuffer() == 0 && "Buffer not fully empty");
+ }
+
+ // See raw_ostream::write_impl.
+ void write_impl(const char *Ptr, size_t Size) override {
+ assert((RemainingSize >= Size) && "Attempt to write too much data");
+ assert(RemainingSize && "Logical record overflow");
+ if (!(RemainingSize % GOFF::PayloadLength)) {
+ writeRecordPrefix(OS, CurrentType, RemainingSize,
+ NewLogicalRecord ? 0 : Rec_Continuation);
+ NewLogicalRecord = false;
+ }
+ assert(!NewLogicalRecord &&
+ "New logical record not on physical record boundary");
+
+ size_t Idx = 0;
+ while (Size > 0) {
+ size_t BytesToWrite = bytesToNextPhysicalRecord();
+ if (BytesToWrite > Size)
+ BytesToWrite = Size;
+ OS.write(Ptr + Idx, BytesToWrite);
+ Idx += BytesToWrite;
+ Size -= BytesToWrite;
+ RemainingSize -= BytesToWrite;
+ if (Size) {
+ writeRecordPrefix(OS, CurrentType, RemainingSize);
+ }
+ }
+ }
+
+ // Return the current position within the stream, not counting the bytes
+ // currently in the buffer.
+ uint64_t current_pos() const override { return OS.tell(); }
+};
+
+class GOFFState {
+ void writeHeader(GOFFYAML::FileHeader &FileHdr);
+ void writeEnd();
+
+ void reportError(const Twine &Msg) {
+ ErrHandler(Msg);
+ HasError = true;
+ }
+
+ GOFFState(raw_ostream &OS, GOFFYAML::Object &Doc,
+ yaml::ErrorHandler ErrHandler)
+ : GW(OS), Doc(Doc), ErrHandler(ErrHandler), HasError(false) {}
+
+ ~GOFFState() { GW.finalize(); }
+
+ bool writeObject();
+
+public:
+ static bool writeGOFF(raw_ostream &OS, GOFFYAML::Object &Doc,
+ yaml::ErrorHandler ErrHandler);
+
+private:
+ GOFFOstream GW;
+ GOFFYAML::Object &Doc;
+ yaml::ErrorHandler ErrHandler;
+ bool HasError;
+};
+
+void GOFFState::writeHeader(GOFFYAML::FileHeader &FileHdr) {
+ SmallString<16> CCSIDName;
+ if (std::error_code EC =
+ ConverterEBCDIC::convertToEBCDIC(FileHdr.CharacterSetName, CCSIDName))
+ reportError("Conversion error on " + FileHdr.CharacterSetName);
+ if (CCSIDName.size() > 16) {
+ reportError("CharacterSetName too long");
+ CCSIDName.resize(16);
+ }
+ SmallString<16> LangProd;
+ if (std::error_code EC = ConverterEBCDIC::convertToEBCDIC(
+ FileHdr.LanguageProductIdentifier, LangProd))
+ reportError("Conversion error on " + FileHdr.LanguageProductIdentifier);
+ if (LangProd.size() > 16) {
+ reportError("LanguageProductIdentifier too long");
+ LangProd.resize(16);
+ }
+
+ GW.makeNewRecord(GOFF::RT_HDR, GOFF::PayloadLength);
+ GW << binaryBe(FileHdr.TargetEnvironment) // TargetEnvironment
+ << binaryBe(FileHdr.TargetOperatingSystem) // TargetOperatingSystem
+ << zeros(2) // Reserved
+ << binaryBe(FileHdr.CCSID) // CCSID
+ << CCSIDName // CharacterSetName
+ << zeros(16 - CCSIDName.size()) // Fill bytes
+ << LangProd // LanguageProductIdentifier
+ << zeros(16 - LangProd.size()) // Fill bytes
+ << binaryBe(FileHdr.ArchitectureLevel); // ArchitectureLevel
+ // The module propties are optional. Figure out if we need to write them.
+ uint16_t ModPropLen = 0;
+ if (FileHdr.TargetSoftwareEnvironment)
+ ModPropLen = 3;
+ else if (FileHdr.InternalCCSID)
+ ModPropLen = 2;
+ if (ModPropLen) {
+ GW << binaryBe(ModPropLen) << zeros(6);
+ if (ModPropLen >= 2)
+ GW << binaryBe(FileHdr.InternalCCSID ? *FileHdr.InternalCCSID : 0);
+ if (ModPropLen >= 3)
+ GW << binaryBe(FileHdr.TargetSoftwareEnvironment
+ ? *FileHdr.TargetSoftwareEnvironment
+ : 0);
+ }
+}
+
+void GOFFState::writeEnd() {
+ GW.makeNewRecord(GOFF::RT_END, GOFF::PayloadLength);
+ GW << binaryBe(uint8_t(0)) // No entry point
+ << binaryBe(uint8_t(0)) // No AMODE
+ << zeros(3) // Reserved
+ << binaryBe(GW.logicalRecords());
+ // No entry point yet. Automatically fill remaining space with zero bytes.
+ GW.finalize();
+}
+
+bool GOFFState::writeObject() {
+ writeHeader(Doc.Header);
+ if (HasError)
+ return false;
+ writeEnd();
+ return true;
+}
+
+bool GOFFState::writeGOFF(raw_ostream &OS, GOFFYAML::Object &Doc,
+ yaml::ErrorHandler ErrHandler) {
+ GOFFState State(OS, Doc, ErrHandler);
+ return State.writeObject();
+}
+} // namespace
+
+namespace llvm {
+namespace yaml {
+
+bool yaml2goff(llvm::GOFFYAML::Object &Doc, raw_ostream &Out,
+ ErrorHandler ErrHandler) {
+ return GOFFState::writeGOFF(Out, Doc, ErrHandler);
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/GOFFYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/GOFFYAML.cpp
new file mode 100644
index 000000000000..ae857980a521
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/GOFFYAML.cpp
@@ -0,0 +1,46 @@
+//===-- GOFFYAML.cpp - GOFF YAMLIO implementation ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of GOFF.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/GOFFYAML.h"
+#include "llvm/BinaryFormat/GOFF.h"
+#include <string.h>
+
+namespace llvm {
+namespace GOFFYAML {
+
+Object::Object() {}
+
+} // namespace GOFFYAML
+
+namespace yaml {
+
+void MappingTraits<GOFFYAML::FileHeader>::mapping(
+ IO &IO, GOFFYAML::FileHeader &FileHdr) {
+ IO.mapOptional("TargetEnvironment", FileHdr.TargetEnvironment, 0);
+ IO.mapOptional("TargetOperatingSystem", FileHdr.TargetOperatingSystem, 0);
+ IO.mapOptional("CCSID", FileHdr.CCSID, 0);
+ IO.mapOptional("CharacterSetName", FileHdr.CharacterSetName, "");
+ IO.mapOptional("LanguageProductIdentifier", FileHdr.LanguageProductIdentifier,
+ "");
+ IO.mapOptional("ArchitectureLevel", FileHdr.ArchitectureLevel, 1);
+ IO.mapOptional("InternalCCSID", FileHdr.InternalCCSID);
+ IO.mapOptional("TargetSoftwareEnvironment",
+ FileHdr.TargetSoftwareEnvironment);
+}
+
+void MappingTraits<GOFFYAML::Object>::mapping(IO &IO, GOFFYAML::Object &Obj) {
+ IO.mapTag("!GOFF", true);
+ IO.mapRequired("FileHeader", Obj.Header);
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
index 0de9112a4ac4..c08b389daea9 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/SystemZ/zOSSupport.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
@@ -426,7 +427,7 @@ void MachOWriter::writeRelocations(raw_ostream &OS) {
void MachOWriter::writeBindOpcodes(
raw_ostream &OS, std::vector<MachOYAML::BindOpcode> &BindOpcodes) {
- for (auto &Opcode : BindOpcodes) {
+ for (const auto &Opcode : BindOpcodes) {
uint8_t OpByte = Opcode.Opcode | Opcode.Imm;
OS.write(reinterpret_cast<char *>(&OpByte), 1);
for (auto Data : Opcode.ULEBExtraData) {
@@ -458,7 +459,7 @@ void MachOWriter::dumpExportEntry(raw_ostream &OS,
}
}
OS.write(static_cast<uint8_t>(Entry.Children.size()));
- for (auto EE : Entry.Children) {
+ for (const auto &EE : Entry.Children) {
OS << EE.Name;
OS.write('\0');
encodeULEB128(EE.NodeOffset, OS);
@@ -559,7 +560,7 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) {
void MachOWriter::writeRebaseOpcodes(raw_ostream &OS) {
MachOYAML::LinkEditData &LinkEdit = Obj.LinkEdit;
- for (auto Opcode : LinkEdit.RebaseOpcodes) {
+ for (const auto &Opcode : LinkEdit.RebaseOpcodes) {
uint8_t OpByte = Opcode.Opcode | Opcode.Imm;
OS.write(reinterpret_cast<char *>(&OpByte), 1);
for (auto Data : Opcode.ExtraData)
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp
index 56120901be23..82b2eaecec9b 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/SystemZ/zOSSupport.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Host.h"
@@ -627,7 +628,8 @@ void MappingTraits<MachO::fileset_entry_command>::mapping(
IO &IO, MachO::fileset_entry_command &LoadCommand) {
IO.mapRequired("vmaddr", LoadCommand.vmaddr);
IO.mapRequired("fileoff", LoadCommand.fileoff);
- IO.mapRequired("id", LoadCommand.entry_id);
+ IO.mapRequired("id", LoadCommand.entry_id.offset);
+ IO.mapOptional("reserved", LoadCommand.reserved);
}
} // end namespace yaml
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp
index d57e5583016b..1815eaff8e36 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ObjectYAML.cpp
@@ -26,6 +26,8 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
MappingTraits<ELFYAML::Object>::mapping(IO, *ObjectFile.Elf);
if (ObjectFile.Coff)
MappingTraits<COFFYAML::Object>::mapping(IO, *ObjectFile.Coff);
+ if (ObjectFile.Goff)
+ MappingTraits<GOFFYAML::Object>::mapping(IO, *ObjectFile.Goff);
if (ObjectFile.MachO)
MappingTraits<MachOYAML::Object>::mapping(IO, *ObjectFile.MachO);
if (ObjectFile.FatMachO)
@@ -46,6 +48,9 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
} else if (IO.mapTag("!COFF")) {
ObjectFile.Coff.reset(new COFFYAML::Object());
MappingTraits<COFFYAML::Object>::mapping(IO, *ObjectFile.Coff);
+ } else if (IO.mapTag("!GOFF")) {
+ ObjectFile.Goff.reset(new GOFFYAML::Object());
+ MappingTraits<GOFFYAML::Object>::mapping(IO, *ObjectFile.Goff);
} else if (IO.mapTag("!mach-o")) {
ObjectFile.MachO.reset(new MachOYAML::Object());
MappingTraits<MachOYAML::Object>::mapping(IO, *ObjectFile.MachO);
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/OffloadEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/OffloadEmitter.cpp
index dfb572531660..8692ad8a5865 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/OffloadEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/OffloadEmitter.cpp
@@ -38,14 +38,10 @@ bool yaml2offload(Binary &Doc, raw_ostream &Out, ErrorHandler EH) {
Member.Content->writeAsBinary(OS);
Image.Image = MemoryBuffer::getMemBufferCopy(OS.str());
- std::unique_ptr<MemoryBuffer> Binary = object::OffloadBinary::write(Image);
-
// Copy the data to a new buffer so we can modify the bytes directly.
- SmallVector<char> NewBuffer;
- std::copy(Binary->getBufferStart(), Binary->getBufferEnd(),
- std::back_inserter(NewBuffer));
+ auto Buffer = object::OffloadBinary::write(Image);
auto *TheHeader =
- reinterpret_cast<object::OffloadBinary::Header *>(&NewBuffer[0]);
+ reinterpret_cast<object::OffloadBinary::Header *>(&Buffer[0]);
if (Doc.Version)
TheHeader->Version = *Doc.Version;
if (Doc.Size)
@@ -55,7 +51,7 @@ bool yaml2offload(Binary &Doc, raw_ostream &Out, ErrorHandler EH) {
if (Doc.EntrySize)
TheHeader->EntrySize = *Doc.EntrySize;
- Out.write(NewBuffer.begin(), NewBuffer.size());
+ Out.write(Buffer.begin(), Buffer.size());
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp
index ef47766a2394..9502fe5e4077 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -518,7 +518,9 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
IO.mapRequired("Tag", Info.ElementIndex);
} else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_DATA) {
if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) {
- IO.mapRequired("Segment", Info.DataRef.Segment);
+ if ((Info.Flags & wasm::WASM_SYMBOL_ABSOLUTE) == 0) {
+ IO.mapRequired("Segment", Info.DataRef.Segment);
+ }
IO.mapOptional("Offset", Info.DataRef.Offset, 0u);
IO.mapRequired("Size", Info.DataRef.Size);
}
@@ -573,6 +575,7 @@ void ScalarBitSetTraits<WasmYAML::SymbolFlags>::bitset(
BCaseMask(EXPLICIT_NAME, EXPLICIT_NAME);
BCaseMask(NO_STRIP, NO_STRIP);
BCaseMask(TLS, TLS);
+ BCaseMask(ABSOLUTE, ABSOLUTE);
#undef BCaseMask
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
index 7ad878f04c88..ccf768c06aeb 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
@@ -33,7 +33,7 @@ constexpr uint32_t MaxRawDataSize = UINT32_MAX;
class XCOFFWriter {
public:
XCOFFWriter(XCOFFYAML::Object &Obj, raw_ostream &OS, yaml::ErrorHandler EH)
- : Obj(Obj), W(OS, support::big), ErrHandler(EH),
+ : Obj(Obj), W(OS, llvm::endianness::big), ErrHandler(EH),
StrTblBuilder(StringTableBuilder::XCOFF) {
Is64Bit = Obj.Header.Magic == (llvm::yaml::Hex16)XCOFF::XCOFF64;
}
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFYAML.cpp
index 44ef33501b65..398b09c72170 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/XCOFFYAML.cpp
@@ -280,47 +280,60 @@ static void auxSymMapping(IO &IO, XCOFFYAML::SectAuxEntForStat &AuxSym) {
IO.mapOptional("NumberOfLineNum", AuxSym.NumberOfLineNum);
}
+template <typename AuxEntT>
+static void ResetAuxSym(IO &IO,
+ std::unique_ptr<XCOFFYAML::AuxSymbolEnt> &AuxSym) {
+ if (!IO.outputting())
+ AuxSym.reset(new AuxEntT);
+}
+
void MappingTraits<std::unique_ptr<XCOFFYAML::AuxSymbolEnt>>::mapping(
IO &IO, std::unique_ptr<XCOFFYAML::AuxSymbolEnt> &AuxSym) {
- assert(!IO.outputting() && "We don't dump aux symbols currently.");
const bool Is64 =
static_cast<XCOFFYAML::Object *>(IO.getContext())->Header.Magic ==
(llvm::yaml::Hex16)XCOFF::XCOFF64;
+
XCOFFYAML::AuxSymbolType AuxType;
+ if (IO.outputting())
+ AuxType = AuxSym.get()->Type;
IO.mapRequired("Type", AuxType);
switch (AuxType) {
case XCOFFYAML::AUX_EXCEPT:
- if (!Is64)
+ if (!Is64) {
IO.setError("an auxiliary symbol of type AUX_EXCEPT cannot be defined in "
"XCOFF32");
- AuxSym.reset(new XCOFFYAML::ExcpetionAuxEnt());
+ return;
+ }
+ ResetAuxSym<XCOFFYAML::ExcpetionAuxEnt>(IO, AuxSym);
auxSymMapping(IO, *cast<XCOFFYAML::ExcpetionAuxEnt>(AuxSym.get()));
break;
case XCOFFYAML::AUX_FCN:
- AuxSym.reset(new XCOFFYAML::FunctionAuxEnt());
+ ResetAuxSym<XCOFFYAML::FunctionAuxEnt>(IO, AuxSym);
auxSymMapping(IO, *cast<XCOFFYAML::FunctionAuxEnt>(AuxSym.get()), Is64);
break;
case XCOFFYAML::AUX_SYM:
- AuxSym.reset(new XCOFFYAML::BlockAuxEnt());
+ ResetAuxSym<XCOFFYAML::BlockAuxEnt>(IO, AuxSym);
auxSymMapping(IO, *cast<XCOFFYAML::BlockAuxEnt>(AuxSym.get()), Is64);
break;
case XCOFFYAML::AUX_FILE:
- AuxSym.reset(new XCOFFYAML::FileAuxEnt());
+ ResetAuxSym<XCOFFYAML::FileAuxEnt>(IO, AuxSym);
auxSymMapping(IO, *cast<XCOFFYAML::FileAuxEnt>(AuxSym.get()));
break;
case XCOFFYAML::AUX_CSECT:
- AuxSym.reset(new XCOFFYAML::CsectAuxEnt());
+ ResetAuxSym<XCOFFYAML::CsectAuxEnt>(IO, AuxSym);
auxSymMapping(IO, *cast<XCOFFYAML::CsectAuxEnt>(AuxSym.get()), Is64);
break;
case XCOFFYAML::AUX_SECT:
- AuxSym.reset(new XCOFFYAML::SectAuxEntForDWARF());
+ ResetAuxSym<XCOFFYAML::SectAuxEntForDWARF>(IO, AuxSym);
auxSymMapping(IO, *cast<XCOFFYAML::SectAuxEntForDWARF>(AuxSym.get()));
break;
case XCOFFYAML::AUX_STAT:
- if (Is64)
+ if (Is64) {
IO.setError(
"an auxiliary symbol of type AUX_STAT cannot be defined in XCOFF64");
- AuxSym.reset(new XCOFFYAML::SectAuxEntForStat());
+ return;
+ }
+ ResetAuxSym<XCOFFYAML::SectAuxEntForStat>(IO, AuxSym);
auxSymMapping(IO, *cast<XCOFFYAML::SectAuxEntForStat>(AuxSym.get()));
break;
}
@@ -334,8 +347,7 @@ void MappingTraits<XCOFFYAML::Symbol>::mapping(IO &IO, XCOFFYAML::Symbol &S) {
IO.mapOptional("Type", S.Type);
IO.mapOptional("StorageClass", S.StorageClass);
IO.mapOptional("NumberOfAuxEntries", S.NumberOfAuxEntries);
- if (!IO.outputting())
- IO.mapOptional("AuxEntries", S.AuxEntries);
+ IO.mapOptional("AuxEntries", S.AuxEntries);
}
void MappingTraits<XCOFFYAML::StringTable>::mapping(IO &IO, XCOFFYAML::StringTable &Str) {
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp
index 06050e246fbf..b9a9ad639709 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/yaml2obj.cpp
@@ -38,6 +38,8 @@ bool convertYAML(yaml::Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler,
return yaml2elf(*Doc.Elf, Out, ErrHandler, MaxSize);
if (Doc.Coff)
return yaml2coff(*Doc.Coff, Out, ErrHandler);
+ if (Doc.Goff)
+ return yaml2goff(*Doc.Goff, Out, ErrHandler);
if (Doc.MachO || Doc.FatMachO)
return yaml2macho(Doc, Out, ErrHandler);
if (Doc.Minidump)
diff --git a/contrib/llvm-project/llvm/lib/Option/Arg.cpp b/contrib/llvm-project/llvm/lib/Option/Arg.cpp
index 48d173accdac..b6384d0db65b 100644
--- a/contrib/llvm-project/llvm/lib/Option/Arg.cpp
+++ b/contrib/llvm-project/llvm/lib/Option/Arg.cpp
@@ -45,10 +45,8 @@ Arg::~Arg() {
}
void Arg::print(raw_ostream& O) const {
- O << "<";
-
- O << " Opt:";
- Opt.print(O);
+ O << "<Opt:";
+ Opt.print(O, /*AddNewLine=*/false);
O << " Index:" << Index;
diff --git a/contrib/llvm-project/llvm/lib/Option/ArgList.cpp b/contrib/llvm-project/llvm/lib/Option/ArgList.cpp
index 86f28e578e5d..72003e3a5259 100644
--- a/contrib/llvm-project/llvm/lib/Option/ArgList.cpp
+++ b/contrib/llvm-project/llvm/lib/Option/ArgList.cpp
@@ -132,17 +132,14 @@ void ArgList::AddAllArgsExcept(ArgStringList &Output,
}
/// This is a nicer interface when you don't have a list of Ids to exclude.
-void ArgList::AddAllArgs(ArgStringList &Output,
+void ArgList::addAllArgs(ArgStringList &Output,
ArrayRef<OptSpecifier> Ids) const {
ArrayRef<OptSpecifier> Exclude = std::nullopt;
AddAllArgsExcept(Output, Ids, Exclude);
}
-/// This 3-opt variant of AddAllArgs could be eliminated in favor of one
-/// that accepts a single specifier, given the above which accepts any number.
-void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
- OptSpecifier Id1, OptSpecifier Id2) const {
- for (auto *Arg : filtered(Id0, Id1, Id2)) {
+void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0) const {
+ for (auto *Arg : filtered(Id0)) {
Arg->claim();
Arg->render(*this, Output);
}
@@ -188,8 +185,8 @@ const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
StringRef LHS,
StringRef RHS) const {
StringRef Cur = getArgString(Index);
- if (Cur.size() == LHS.size() + RHS.size() &&
- Cur.startswith(LHS) && Cur.endswith(RHS))
+ if (Cur.size() == LHS.size() + RHS.size() && Cur.starts_with(LHS) &&
+ Cur.ends_with(RHS))
return Cur.data();
return MakeArgString(LHS + RHS);
diff --git a/contrib/llvm-project/llvm/lib/Option/OptTable.cpp b/contrib/llvm-project/llvm/lib/Option/OptTable.cpp
index 3f53ac119c69..cf69f6173b6d 100644
--- a/contrib/llvm-project/llvm/lib/Option/OptTable.cpp
+++ b/contrib/llvm-project/llvm/lib/Option/OptTable.cpp
@@ -59,7 +59,7 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
if (&A == &B)
return false;
- if (int N = StrCmpOptionName(A.Name, B.Name))
+ if (int N = StrCmpOptionName(A.getName(), B.getName()))
return N < 0;
for (size_t I = 0, K = std::min(A.Prefixes.size(), B.Prefixes.size()); I != K;
@@ -77,7 +77,7 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
// Support lower_bound between info and an option name.
static inline bool operator<(const OptTable::Info &I, StringRef Name) {
- return StrCmpOptionNameIgnoreCase(I.Name, Name) < 0;
+ return StrCmpOptionNameIgnoreCase(I.getName(), Name) < 0;
}
} // end namespace opt
@@ -152,7 +152,7 @@ static bool isInput(const ArrayRef<StringLiteral> &Prefixes, StringRef Arg) {
if (Arg == "-")
return true;
for (const StringRef &Prefix : Prefixes)
- if (Arg.startswith(Prefix))
+ if (Arg.starts_with(Prefix))
return false;
return true;
}
@@ -161,12 +161,12 @@ static bool isInput(const ArrayRef<StringLiteral> &Prefixes, StringRef Arg) {
static unsigned matchOption(const OptTable::Info *I, StringRef Str,
bool IgnoreCase) {
for (auto Prefix : I->Prefixes) {
- if (Str.startswith(Prefix)) {
+ if (Str.starts_with(Prefix)) {
StringRef Rest = Str.substr(Prefix.size());
- bool Matched = IgnoreCase ? Rest.starts_with_insensitive(I->Name)
- : Rest.startswith(I->Name);
+ bool Matched = IgnoreCase ? Rest.starts_with_insensitive(I->getName())
+ : Rest.starts_with(I->getName());
if (Matched)
- return Prefix.size() + StringRef(I->Name).size();
+ return Prefix.size() + StringRef(I->getName()).size();
}
}
return 0;
@@ -175,8 +175,8 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str,
// Returns true if one of the Prefixes + In.Names matches Option
static bool optionMatches(const OptTable::Info &In, StringRef Option) {
for (auto Prefix : In.Prefixes)
- if (Option.endswith(In.Name))
- if (Option.slice(0, Option.size() - In.Name.size()) == Prefix)
+ if (Option.ends_with(In.getName()))
+ if (Option.slice(0, Option.size() - In.getName().size()) == Prefix)
return true;
return false;
}
@@ -197,7 +197,7 @@ OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const {
std::vector<std::string> Result;
for (StringRef Val : Candidates)
- if (Val.startswith(Arg) && Arg.compare(Val))
+ if (Val.starts_with(Arg) && Arg.compare(Val))
Result.push_back(std::string(Val));
return Result;
}
@@ -205,20 +205,23 @@ OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const {
}
std::vector<std::string>
-OptTable::findByPrefix(StringRef Cur, unsigned int DisableFlags) const {
+OptTable::findByPrefix(StringRef Cur, Visibility VisibilityMask,
+ unsigned int DisableFlags) const {
std::vector<std::string> Ret;
for (size_t I = FirstSearchableIndex, E = OptionInfos.size(); I < E; I++) {
const Info &In = OptionInfos[I];
if (In.Prefixes.empty() || (!In.HelpText && !In.GroupID))
continue;
+ if (!(In.Visibility & VisibilityMask))
+ continue;
if (In.Flags & DisableFlags)
continue;
for (auto Prefix : In.Prefixes) {
- std::string S = (Prefix + In.Name + "\t").str();
+ std::string S = (Prefix + In.getName() + "\t").str();
if (In.HelpText)
S += In.HelpText;
- if (StringRef(S).startswith(Cur) && S != std::string(Cur) + "\t")
+ if (StringRef(S).starts_with(Cur) && S != std::string(Cur) + "\t")
Ret.push_back(S);
}
}
@@ -226,9 +229,35 @@ OptTable::findByPrefix(StringRef Cur, unsigned int DisableFlags) const {
}
unsigned OptTable::findNearest(StringRef Option, std::string &NearestString,
+ Visibility VisibilityMask,
+ unsigned MinimumLength,
+ unsigned MaximumDistance) const {
+ return internalFindNearest(
+ Option, NearestString, MinimumLength, MaximumDistance,
+ [VisibilityMask](const Info &CandidateInfo) {
+ return (CandidateInfo.Visibility & VisibilityMask) == 0;
+ });
+}
+
+unsigned OptTable::findNearest(StringRef Option, std::string &NearestString,
unsigned FlagsToInclude, unsigned FlagsToExclude,
unsigned MinimumLength,
unsigned MaximumDistance) const {
+ return internalFindNearest(
+ Option, NearestString, MinimumLength, MaximumDistance,
+ [FlagsToInclude, FlagsToExclude](const Info &CandidateInfo) {
+ if (FlagsToInclude && !(CandidateInfo.Flags & FlagsToInclude))
+ return true;
+ if (CandidateInfo.Flags & FlagsToExclude)
+ return true;
+ return false;
+ });
+}
+
+unsigned OptTable::internalFindNearest(
+ StringRef Option, std::string &NearestString, unsigned MinimumLength,
+ unsigned MaximumDistance,
+ std::function<bool(const Info &)> ExcludeOption) const {
assert(!Option.empty());
// Consider each [option prefix + option name] pair as a candidate, finding
@@ -240,7 +269,7 @@ unsigned OptTable::findNearest(StringRef Option, std::string &NearestString,
for (const Info &CandidateInfo :
ArrayRef<Info>(OptionInfos).drop_front(FirstSearchableIndex)) {
- StringRef CandidateName = CandidateInfo.Name;
+ StringRef CandidateName = CandidateInfo.getName();
// We can eliminate some option prefix/name pairs as candidates right away:
// * Ignore option candidates with empty names, such as "--", or names
@@ -248,12 +277,8 @@ unsigned OptTable::findNearest(StringRef Option, std::string &NearestString,
if (CandidateName.size() < MinimumLength)
continue;
- // * If FlagsToInclude were specified, ignore options that don't include
- // those flags.
- if (FlagsToInclude && !(CandidateInfo.Flags & FlagsToInclude))
- continue;
- // * Ignore options that contain the FlagsToExclude.
- if (CandidateInfo.Flags & FlagsToExclude)
+ // Ignore options that are excluded via masks
+ if (ExcludeOption(CandidateInfo))
continue;
// * Ignore positional argument option candidates (which do not
@@ -315,8 +340,8 @@ unsigned OptTable::findNearest(StringRef Option, std::string &NearestString,
// Parse a single argument, return the new argument, and update Index. If
// GroupedShortOptions is true, -a matches "-abc" and the argument in Args will
-// be updated to "-bc". This overload does not support
-// FlagsToInclude/FlagsToExclude or case insensitive options.
+// be updated to "-bc". This overload does not support VisibilityMask or case
+// insensitive options.
std::unique_ptr<Arg> OptTable::parseOneArgGrouped(InputArgList &Args,
unsigned &Index) const {
// Anything that doesn't start with PrefixesUnion is an input, as is '-'
@@ -381,8 +406,28 @@ std::unique_ptr<Arg> OptTable::parseOneArgGrouped(InputArgList &Args,
}
std::unique_ptr<Arg> OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
+ Visibility VisibilityMask) const {
+ return internalParseOneArg(Args, Index, [VisibilityMask](const Option &Opt) {
+ return !Opt.hasVisibilityFlag(VisibilityMask);
+ });
+}
+
+std::unique_ptr<Arg> OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
unsigned FlagsToInclude,
unsigned FlagsToExclude) const {
+ return internalParseOneArg(
+ Args, Index, [FlagsToInclude, FlagsToExclude](const Option &Opt) {
+ if (FlagsToInclude && !Opt.hasFlag(FlagsToInclude))
+ return true;
+ if (Opt.hasFlag(FlagsToExclude))
+ return true;
+ return false;
+ });
+}
+
+std::unique_ptr<Arg> OptTable::internalParseOneArg(
+ const ArgList &Args, unsigned &Index,
+ std::function<bool(const Option &)> ExcludeOption) const {
unsigned Prev = Index;
StringRef Str = Args.getArgString(Index);
@@ -418,9 +463,7 @@ std::unique_ptr<Arg> OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
Option Opt(Start, this);
- if (FlagsToInclude && !Opt.hasFlag(FlagsToInclude))
- continue;
- if (Opt.hasFlag(FlagsToExclude))
+ if (ExcludeOption(Opt))
continue;
// See if this option matches.
@@ -444,11 +487,37 @@ std::unique_ptr<Arg> OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
Str.data());
}
-InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
+InputArgList OptTable::ParseArgs(ArrayRef<const char *> Args,
+ unsigned &MissingArgIndex,
+ unsigned &MissingArgCount,
+ Visibility VisibilityMask) const {
+ return internalParseArgs(
+ Args, MissingArgIndex, MissingArgCount,
+ [VisibilityMask](const Option &Opt) {
+ return !Opt.hasVisibilityFlag(VisibilityMask);
+ });
+}
+
+InputArgList OptTable::ParseArgs(ArrayRef<const char *> Args,
unsigned &MissingArgIndex,
unsigned &MissingArgCount,
unsigned FlagsToInclude,
unsigned FlagsToExclude) const {
+ return internalParseArgs(
+ Args, MissingArgIndex, MissingArgCount,
+ [FlagsToInclude, FlagsToExclude](const Option &Opt) {
+ if (FlagsToInclude && !Opt.hasFlag(FlagsToInclude))
+ return true;
+ if (Opt.hasFlag(FlagsToExclude))
+ return true;
+ return false;
+ });
+}
+
+InputArgList OptTable::internalParseArgs(
+ ArrayRef<const char *> ArgArr, unsigned &MissingArgIndex,
+ unsigned &MissingArgCount,
+ std::function<bool(const Option &)> ExcludeOption) const {
InputArgList Args(ArgArr.begin(), ArgArr.end());
// FIXME: Handle '@' args (or at least error on them).
@@ -481,7 +550,7 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
unsigned Prev = Index;
std::unique_ptr<Arg> A = GroupedShortOptions
? parseOneArgGrouped(Args, Index)
- : ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
+ : internalParseOneArg(Args, Index, ExcludeOption);
assert((Index > Prev || GroupedShortOptions) &&
"Parser failed to consume argument.");
@@ -502,7 +571,7 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
InputArgList OptTable::parseArgs(int Argc, char *const *Argv,
OptSpecifier Unknown, StringSaver &Saver,
- function_ref<void(StringRef)> ErrorFn) const {
+ std::function<void(StringRef)> ErrorFn) const {
SmallVector<const char *, 0> NewArgv;
// The environment variable specifies initial options which can be overridden
// by commnad line options.
@@ -529,7 +598,7 @@ InputArgList OptTable::parseArgs(int Argc, char *const *Argv,
static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
const Option O = Opts.getOption(Id);
- std::string Name = O.getPrefixedName();
+ std::string Name = O.getPrefixedName().str();
// Add metavar, if used.
switch (O.getKind()) {
@@ -595,15 +664,24 @@ static void PrintHelpOptionList(raw_ostream &OS, StringRef Title,
const unsigned InitialPad = 2;
for (const OptionInfo &Opt : OptionHelp) {
const std::string &Option = Opt.Name;
- int Pad = OptionFieldWidth - int(Option.size());
+ int Pad = OptionFieldWidth + InitialPad;
+ int FirstLinePad = OptionFieldWidth - int(Option.size());
OS.indent(InitialPad) << Option;
// Break on long option names.
- if (Pad < 0) {
+ if (FirstLinePad < 0) {
OS << "\n";
- Pad = OptionFieldWidth + InitialPad;
+ FirstLinePad = OptionFieldWidth + InitialPad;
+ Pad = FirstLinePad;
}
- OS.indent(Pad + 1) << Opt.HelpText << '\n';
+
+ SmallVector<StringRef> Lines;
+ Opt.HelpText.split(Lines, '\n');
+ assert(Lines.size() && "Expected at least the first line in the help text");
+ auto *LinesIt = Lines.begin();
+ OS.indent(FirstLinePad + 1) << *LinesIt << '\n';
+ while (Lines.end() != ++LinesIt)
+ OS.indent(Pad + 1) << *LinesIt << '\n';
}
}
@@ -626,14 +704,35 @@ static const char *getOptionHelpGroup(const OptTable &Opts, OptSpecifier Id) {
}
void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title,
- bool ShowHidden, bool ShowAllAliases) const {
- printHelp(OS, Usage, Title, /*Include*/ 0, /*Exclude*/
- (ShowHidden ? 0 : HelpHidden), ShowAllAliases);
+ bool ShowHidden, bool ShowAllAliases,
+ Visibility VisibilityMask) const {
+ return internalPrintHelp(
+ OS, Usage, Title, ShowHidden, ShowAllAliases,
+ [VisibilityMask](const Info &CandidateInfo) -> bool {
+ return (CandidateInfo.Visibility & VisibilityMask) == 0;
+ });
}
void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title,
unsigned FlagsToInclude, unsigned FlagsToExclude,
bool ShowAllAliases) const {
+ bool ShowHidden = !(FlagsToExclude & HelpHidden);
+ FlagsToExclude &= ~HelpHidden;
+ return internalPrintHelp(
+ OS, Usage, Title, ShowHidden, ShowAllAliases,
+ [FlagsToInclude, FlagsToExclude](const Info &CandidateInfo) {
+ if (FlagsToInclude && !(CandidateInfo.Flags & FlagsToInclude))
+ return true;
+ if (CandidateInfo.Flags & FlagsToExclude)
+ return true;
+ return false;
+ });
+}
+
+void OptTable::internalPrintHelp(
+ raw_ostream &OS, const char *Usage, const char *Title, bool ShowHidden,
+ bool ShowAllAliases,
+ std::function<bool(const Info &)> ExcludeOption) const {
OS << "OVERVIEW: " << Title << "\n\n";
OS << "USAGE: " << Usage << "\n\n";
@@ -646,10 +745,11 @@ void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title,
if (getOptionKind(Id) == Option::GroupClass)
continue;
- unsigned Flags = getInfo(Id).Flags;
- if (FlagsToInclude && !(Flags & FlagsToInclude))
+ const Info &CandidateInfo = getInfo(Id);
+ if (!ShowHidden && (CandidateInfo.Flags & opt::HelpHidden))
continue;
- if (Flags & FlagsToExclude)
+
+ if (ExcludeOption(CandidateInfo))
continue;
// If an alias doesn't have a help text, show a help text for the aliased
diff --git a/contrib/llvm-project/llvm/lib/Option/Option.cpp b/contrib/llvm-project/llvm/lib/Option/Option.cpp
index c570b02b08ce..500768588bc9 100644
--- a/contrib/llvm-project/llvm/lib/Option/Option.cpp
+++ b/contrib/llvm-project/llvm/lib/Option/Option.cpp
@@ -38,7 +38,7 @@ Option::Option(const OptTable::Info *info, const OptTable *owner)
}
}
-void Option::print(raw_ostream &O) const {
+void Option::print(raw_ostream &O, bool AddNewLine) const {
O << "<";
switch (getKind()) {
#define P(N) case N: O << #N; break
@@ -70,19 +70,21 @@ void Option::print(raw_ostream &O) const {
const Option Group = getGroup();
if (Group.isValid()) {
O << " Group:";
- Group.print(O);
+ Group.print(O, /*AddNewLine=*/false);
}
const Option Alias = getAlias();
if (Alias.isValid()) {
O << " Alias:";
- Alias.print(O);
+ Alias.print(O, /*AddNewLine=*/false);
}
if (getKind() == MultiArgClass)
O << " NumArgs:" << getNumArgs();
- O << ">\n";
+ O << ">";
+ if (AddNewLine)
+ O << "\n";
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
index d0cbbcc0e310..95b9fb7ad735 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
@@ -67,12 +67,28 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
+#include "llvm/Analysis/StructuralHash.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/CallBrPrepare.h"
+#include "llvm/CodeGen/DwarfEHPrepare.h"
+#include "llvm/CodeGen/ExpandLargeDivRem.h"
+#include "llvm/CodeGen/ExpandLargeFpConvert.h"
+#include "llvm/CodeGen/ExpandMemCmp.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/HardwareLoops.h"
+#include "llvm/CodeGen/IndirectBrExpand.h"
+#include "llvm/CodeGen/InterleavedAccess.h"
+#include "llvm/CodeGen/InterleavedLoadCombine.h"
+#include "llvm/CodeGen/JMCInstrumenter.h"
+#include "llvm/CodeGen/SafeStack.h"
+#include "llvm/CodeGen/SelectOptimize.h"
+#include "llvm/CodeGen/SjLjEHPrepare.h"
#include "llvm/CodeGen/TypePromotion.h"
+#include "llvm/CodeGen/WasmEHPrepare.h"
+#include "llvm/CodeGen/WinEHPrepare.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PassManager.h"
@@ -88,11 +104,13 @@
#include "llvm/Support/Regex.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/CFGuard.h"
#include "llvm/Transforms/Coroutines/CoroCleanup.h"
#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "llvm/Transforms/Coroutines/CoroElide.h"
#include "llvm/Transforms/Coroutines/CoroSplit.h"
+#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/Annotation2Metadata.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
@@ -170,6 +188,7 @@
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
+#include "llvm/Transforms/Scalar/InferAlignment.h"
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/Scalar/LICM.h"
@@ -231,6 +250,7 @@
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
#include "llvm/Transforms/Utils/CountVisits.h"
+#include "llvm/Transforms/Utils/DXILUpgrade.h"
#include "llvm/Transforms/Utils/Debugify.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
@@ -399,6 +419,32 @@ public:
static StringRef name() { return "TriggerCrashPass"; }
};
+// A pass for testing message reporting of -verify-each failures.
+// DO NOT USE THIS EXCEPT FOR TESTING!
+class TriggerVerifierErrorPass
+ : public PassInfoMixin<TriggerVerifierErrorPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &) {
+ // Intentionally break the Module by creating an alias without setting the
+ // aliasee.
+ auto *PtrTy = llvm::PointerType::getUnqual(M.getContext());
+ GlobalAlias::create(PtrTy, PtrTy->getAddressSpace(),
+ GlobalValue::LinkageTypes::InternalLinkage,
+ "__bad_alias", nullptr, &M);
+ return PreservedAnalyses::none();
+ }
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &) {
+ // Intentionally break the Function by inserting a terminator
+ // instruction in the middle of a basic block.
+ BasicBlock &BB = F.getEntryBlock();
+ new UnreachableInst(F.getContext(), BB.getTerminator());
+ return PreservedAnalyses::none();
+ }
+
+ static StringRef name() { return "TriggerVerifierErrorPass"; }
+};
+
} // namespace
PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
@@ -525,7 +571,7 @@ static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
// normal pass name w/o parameters == default parameters
if (Name.empty())
return true;
- return Name.startswith("<") && Name.endswith(">");
+ return Name.starts_with("<") && Name.ends_with(">");
}
static std::optional<OptimizationLevel> parseOptLevel(StringRef S) {
@@ -693,10 +739,30 @@ Expected<bool> parseCoroSplitPassOptions(StringRef Params) {
}
Expected<bool> parsePostOrderFunctionAttrsPassOptions(StringRef Params) {
- return parseSinglePassOption(Params, "skip-non-recursive",
+ return parseSinglePassOption(Params, "skip-non-recursive-function-attrs",
"PostOrderFunctionAttrs");
}
+Expected<CFGuardPass::Mechanism> parseCFGuardPassOptions(StringRef Params) {
+ if (Params.empty())
+ return CFGuardPass::Mechanism::Check;
+
+ auto [Param, RHS] = Params.split(';');
+ if (!RHS.empty())
+ return make_error<StringError>(
+ formatv("too many CFGuardPass parameters '{0}' ", Params).str(),
+ inconvertibleErrorCode());
+
+ if (Param == "check")
+ return CFGuardPass::Mechanism::Check;
+ if (Param == "dispatch")
+ return CFGuardPass::Mechanism::Dispatch;
+
+ return make_error<StringError>(
+ formatv("invalid CFGuardPass mechanism: '{0}' ", Param).str(),
+ inconvertibleErrorCode());
+}
+
Expected<bool> parseEarlyCSEPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "memssa", "EarlyCSE");
}
@@ -751,26 +817,6 @@ Expected<HWAddressSanitizerOptions> parseHWASanPassOptions(StringRef Params) {
return Result;
}
-Expected<EmbedBitcodeOptions> parseEmbedBitcodePassOptions(StringRef Params) {
- EmbedBitcodeOptions Result;
- while (!Params.empty()) {
- StringRef ParamName;
- std::tie(ParamName, Params) = Params.split(';');
-
- if (ParamName == "thinlto") {
- Result.IsThinLTO = true;
- } else if (ParamName == "emit-summary") {
- Result.EmitLTOSummary = true;
- } else {
- return make_error<StringError>(
- formatv("invalid EmbedBitcode pass parameter '{0}' ", ParamName)
- .str(),
- inconvertibleErrorCode());
- }
- }
- return Result;
-}
-
Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
MemorySanitizerOptions Result;
while (!Params.empty()) {
@@ -845,6 +891,9 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
Expected<InstCombineOptions> parseInstCombineOptions(StringRef Params) {
InstCombineOptions Result;
+ // When specifying "instcombine" in -passes enable fix-point verification by
+ // default, as this is what most tests should use.
+ Result.setVerifyFixpoint(true);
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
@@ -852,6 +901,8 @@ Expected<InstCombineOptions> parseInstCombineOptions(StringRef Params) {
bool Enable = !ParamName.consume_front("no-");
if (ParamName == "use-loop-info") {
Result.setUseLoopInfo(Enable);
+ } else if (ParamName == "verify-fixpoint") {
+ Result.setVerifyFixpoint(Enable);
} else if (Enable && ParamName.consume_front("max-iterations=")) {
APInt MaxIterations;
if (ParamName.getAsInteger(0, MaxIterations))
@@ -1071,6 +1122,11 @@ Expected<bool> parseMemorySSAPrinterPassOptions(StringRef Params) {
"MemorySSAPrinterPass");
}
+Expected<bool> parseSpeculativeExecutionPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "only-if-divergent-target",
+ "SpeculativeExecutionPass");
+}
+
Expected<std::string> parseMemProfUsePassOptions(StringRef Params) {
std::string Result;
while (!Params.empty()) {
@@ -1088,13 +1144,23 @@ Expected<std::string> parseMemProfUsePassOptions(StringRef Params) {
return Result;
}
+Expected<bool> parseStructuralHashPrinterPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "detailed",
+ "StructuralHashPrinterPass");
+}
+
+Expected<bool> parseWinEHPrepareOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "demote-catchswitch-only",
+ "WinEHPreparePass");
+}
+
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
/// alias.
static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) {
- return Name.startswith("default") || Name.startswith("thinlto") ||
- Name.startswith("lto");
+ return Name.starts_with("default") || Name.starts_with("thinlto") ||
+ Name.starts_with("lto");
}
/// Tests whether registered callbacks will accept a given pass name.
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
index 660cb2e974d7..5c6c391049a7 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -37,6 +37,7 @@
#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "llvm/Transforms/Coroutines/CoroElide.h"
#include "llvm/Transforms/Coroutines/CoroSplit.h"
+#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/Annotation2Metadata.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
@@ -88,6 +89,7 @@
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
+#include "llvm/Transforms/Scalar/InferAlignment.h"
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/Scalar/LICM.h"
@@ -104,6 +106,7 @@
#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
+#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
@@ -161,7 +164,7 @@ static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
cl::desc("Enable module inliner"));
static cl::opt<bool> PerformMandatoryInliningsFirst(
- "mandatory-inlining-first", cl::init(true), cl::Hidden,
+ "mandatory-inlining-first", cl::init(false), cl::Hidden,
cl::desc("Perform mandatory inlinings module-wide, before performing "
"inlining"));
@@ -270,10 +273,18 @@ static cl::opt<AttributorRunOption> AttributorRun(
clEnumValN(AttributorRunOption::NONE, "none",
"disable attributor runs")));
+static cl::opt<bool> UseLoopVersioningLICM(
+ "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
+ cl::desc("Enable the experimental Loop Versioning LICM pass"));
+
+namespace llvm {
cl::opt<bool> EnableMemProfContextDisambiguation(
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
+extern cl::opt<bool> EnableInferAlignmentPass;
+} // namespace llvm
+
PipelineTuningOptions::PipelineTuningOptions() {
LoopInterleaving = true;
LoopVectorization = true;
@@ -291,7 +302,6 @@ PipelineTuningOptions::PipelineTuningOptions() {
namespace llvm {
extern cl::opt<unsigned> MaxDevirtIterations;
-extern cl::opt<bool> EnableKnowledgeRetention;
} // namespace llvm
void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
@@ -554,9 +564,6 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(InstCombinePass());
FPM.addPass(AggressiveInstCombinePass());
- if (EnableConstraintElimination)
- FPM.addPass(ConstraintEliminationPass());
-
if (!Level.isOptimizingForSize())
FPM.addPass(LibCallsShrinkWrapPass());
@@ -577,6 +584,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// minimal multiplication trees.
FPM.addPass(ReassociatePass());
+ if (EnableConstraintElimination)
+ FPM.addPass(ConstraintEliminationPass());
+
// Add the primary loop simplification pipeline.
// FIXME: Currently this is split into two loop pass pipelines because we run
// some function passes in between them. These can and should be removed
@@ -722,46 +732,52 @@ void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
MPM.addPass(NameAnonGlobalPass());
}
-void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
- OptimizationLevel Level, bool RunProfileGen,
- bool IsCS, std::string ProfileFile,
- std::string ProfileRemappingFile,
- ThinOrFullLTOPhase LTOPhase,
- IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase LTOPhase) {
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
- if (!IsCS && !DisablePreInliner) {
- InlineParams IP;
+ if (DisablePreInliner)
+ return;
+ InlineParams IP;
- IP.DefaultThreshold = PreInlineThreshold;
+ IP.DefaultThreshold = PreInlineThreshold;
- // FIXME: The hint threshold has the same value used by the regular inliner
- // when not optimzing for size. This should probably be lowered after
- // performance testing.
- // FIXME: this comment is cargo culted from the old pass manager, revisit).
- IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
- ModuleInlinerWrapperPass MIWP(
- IP, /* MandatoryFirst */ true,
- InlineContext{LTOPhase, InlinePass::EarlyInliner});
- CGSCCPassManager &CGPipeline = MIWP.getPM();
+ // FIXME: The hint threshold has the same value used by the regular inliner
+ // when not optimzing for size. This should probably be lowered after
+ // performance testing.
+ // FIXME: this comment is cargo culted from the old pass manager, revisit).
+ IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
+ ModuleInlinerWrapperPass MIWP(
+ IP, /* MandatoryFirst */ true,
+ InlineContext{LTOPhase, InlinePass::EarlyInliner});
+ CGSCCPassManager &CGPipeline = MIWP.getPM();
- FunctionPassManager FPM;
- FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
- FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
- FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
- true))); // Merge & remove basic blocks.
- FPM.addPass(InstCombinePass()); // Combine silly sequences.
- invokePeepholeEPCallbacks(FPM, Level);
+ FunctionPassManager FPM;
+ FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
+ FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove basic blocks.
+ FPM.addPass(InstCombinePass()); // Combine silly sequences.
+ invokePeepholeEPCallbacks(FPM, Level);
- CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- std::move(FPM), PTO.EagerlyInvalidateAnalyses));
+ CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ std::move(FPM), PTO.EagerlyInvalidateAnalyses));
- MPM.addPass(std::move(MIWP));
+ MPM.addPass(std::move(MIWP));
- // Delete anything that is now dead to make sure that we don't instrument
- // dead code. Instrumentation can end up keeping dead code around and
- // dramatically increase code size.
- MPM.addPass(GlobalDCEPass());
- }
+ // Delete anything that is now dead to make sure that we don't instrument
+ // dead code. Instrumentation can end up keeping dead code around and
+ // dramatically increase code size.
+ MPM.addPass(GlobalDCEPass());
+}
+
+void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
+ OptimizationLevel Level, bool RunProfileGen,
+ bool IsCS, bool AtomicCounterUpdate,
+ std::string ProfileFile,
+ std::string ProfileRemappingFile,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+ assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
if (!RunProfileGen) {
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
@@ -793,13 +809,14 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
// Do counter promotion at Level greater than O0.
Options.DoCounterPromotion = true;
Options.UseBFIInPromotion = IsCS;
- MPM.addPass(InstrProfiling(Options, IsCS));
+ Options.Atomic = AtomicCounterUpdate;
+ MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
}
void PassBuilder::addPGOInstrPassesForO0(
ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
- std::string ProfileFile, std::string ProfileRemappingFile,
- IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+ bool AtomicCounterUpdate, std::string ProfileFile,
+ std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
if (!RunProfileGen) {
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
MPM.addPass(
@@ -819,7 +836,8 @@ void PassBuilder::addPGOInstrPassesForO0(
// Do not do counter promotion at O0.
Options.DoCounterPromotion = false;
Options.UseBFIInPromotion = IsCS;
- MPM.addPass(InstrProfiling(Options, IsCS));
+ Options.Atomic = AtomicCounterUpdate;
+ MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
}
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
@@ -853,11 +871,13 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
// Require the GlobalsAA analysis for the module so we can query it within
// the CGSCC pipeline.
- MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
- // Invalidate AAManager so it can be recreated and pick up the newly available
- // GlobalsAA.
- MIWP.addModulePass(
- createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+ if (EnableGlobalAnalyses) {
+ MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
+ // Invalidate AAManager so it can be recreated and pick up the newly
+ // available GlobalsAA.
+ MIWP.addModulePass(
+ createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+ }
// Require the ProfileSummaryAnalysis for the module so we can query it within
// the inliner pass.
@@ -1088,14 +1108,21 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
PTO.EagerlyInvalidateAnalyses));
+ // Invoke the pre-inliner passes for instrumentation PGO or MemProf.
+ if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+ (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse || !PGOOpt->MemoryProfile.empty()))
+ addPreInlinerPasses(MPM, Level, Phase);
+
// Add all the requested passes for instrumentation PGO, if requested.
if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
(PGOOpt->Action == PGOOptions::IRInstr ||
PGOOpt->Action == PGOOptions::IRUse)) {
addPGOInstrPasses(MPM, Level,
- /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
- /* IsCS */ false, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile, Phase, PGOOpt->FS);
+ /*RunProfileGen=*/PGOOpt->Action == PGOOptions::IRInstr,
+ /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
+ PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
+ PGOOpt->FS);
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
@@ -1110,6 +1137,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (EnableSyntheticCounts && !PGOOpt)
MPM.addPass(SyntheticCountsPropagation());
+ MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
+
if (EnableModuleInliner)
MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
else
@@ -1134,6 +1163,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
FPM.addPass(LoopVectorizePass(
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
+ if (EnableInferAlignmentPass)
+ FPM.addPass(InferAlignmentPass());
if (IsFullLTO) {
// The vectorizer may have significantly shortened a loop body; unroll
// again. Unroll small loops to hide loop backedge latency and saturate any
@@ -1251,6 +1282,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
}
+ if (EnableInferAlignmentPass)
+ FPM.addPass(InferAlignmentPass());
FPM.addPass(InstCombinePass());
// This is needed for two reasons:
@@ -1306,13 +1339,15 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// instrumentation is after all the inlines are done.
if (!LTOPreLink && PGOOpt) {
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
- /* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS);
+ addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
+ /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
+ PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
+ PGOOpt->FS);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
- /* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS);
+ addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
+ /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
+ PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
+ PGOOpt->FS);
}
// Re-compute GlobalsAA here prior to function passes. This is particularly
@@ -1322,11 +1357,27 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// information for all local globals here, the late loop passes and notably
// the vectorizer will be able to use them to help recognize vectorizable
// memory operations.
- MPM.addPass(RecomputeGlobalsAAPass());
+ if (EnableGlobalAnalyses)
+ MPM.addPass(RecomputeGlobalsAAPass());
invokeOptimizerEarlyEPCallbacks(MPM, Level);
FunctionPassManager OptimizePM;
+ // Scheduling LoopVersioningLICM when inlining is over, because after that
+ // we may see more accurate aliasing. Reason to run this late is that too
+ // early versioning may prevent further inlining due to increase of code
+ // size. Other optimizations which runs later might get benefit of no-alias
+ // assumption in clone loop.
+ if (UseLoopVersioningLICM) {
+ OptimizePM.addPass(
+ createFunctionToLoopPassAdaptor(LoopVersioningLICMPass()));
+ // LoopVersioningLICM pass might increase new LICM opportunities.
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true),
+ /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ }
+
OptimizePM.addPass(Float2IntPass());
OptimizePM.addPass(LowerConstantIntrinsicsPass());
@@ -1479,14 +1530,22 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
}
ModulePassManager
-PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
- bool EmitSummary) {
+PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level) {
ModulePassManager MPM;
- MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary,
- ThinLTO
- ? buildThinLTOPreLinkDefaultPipeline(Level)
- : buildLTOPreLinkDefaultPipeline(Level)));
- MPM.addPass(buildPerModuleDefaultPipeline(Level));
+ // FatLTO always uses UnifiedLTO, so use the ThinLTOPreLink pipeline
+ MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
+ MPM.addPass(EmbedBitcodePass());
+
+ // Use the ThinLTO post-link pipeline with sample profiling, other
+ if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
+ MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
+ else {
+ // otherwise, just use module optimization
+ MPM.addPass(
+ buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+ }
return MPM;
}
@@ -1787,15 +1846,15 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// sensitive PGO pass.
if (PGOOpt) {
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
- /* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile,
- ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
+ addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
+ /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
+ PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
+ PGOOpt->FS);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
- /* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile,
- ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
+ addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
+ /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
+ PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
+ PGOOpt->FS);
}
// Break up allocas
@@ -1814,11 +1873,13 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// Require the GlobalsAA analysis for the module so we can query it within
// MainFPM.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
- // Invalidate AAManager so it can be recreated and pick up the newly available
- // GlobalsAA.
- MPM.addPass(
- createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+ if (EnableGlobalAnalyses) {
+ MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+ // Invalidate AAManager so it can be recreated and pick up the newly
+ // available GlobalsAA.
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+ }
FunctionPassManager MainFPM;
MainFPM.addPass(createFunctionToLoopPassAdaptor(
@@ -1939,9 +2000,9 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
PGOOpt->Action == PGOOptions::IRUse))
addPGOInstrPassesForO0(
MPM,
- /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
- /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
- PGOOpt->FS);
+ /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
+ /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile, PGOOpt->FS);
invokePipelineStartEPCallbacks(MPM, Level);
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
index e10dc995c493..d8fc7cd8a231 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
@@ -19,15 +19,16 @@
#define MODULE_ANALYSIS(NAME, CREATE_PASS)
#endif
MODULE_ANALYSIS("callgraph", CallGraphAnalysis())
+MODULE_ANALYSIS("collector-metadata", CollectorMetadataAnalysis())
+MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis())
+MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis())
MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis())
MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis())
MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis())
+MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis())
MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis())
MODULE_ANALYSIS("verify", VerifierAnalysis())
-MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
-MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis())
-MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis())
#ifndef MODULE_ALIAS_ANALYSIS
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
@@ -41,155 +42,146 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
#define MODULE_PASS(NAME, CREATE_PASS)
#endif
MODULE_PASS("always-inline", AlwaysInlinerPass())
-MODULE_PASS("attributor", AttributorPass())
MODULE_PASS("annotation2metadata", Annotation2MetadataPass())
-MODULE_PASS("openmp-opt", OpenMPOptPass())
-MODULE_PASS("openmp-opt-postlink", OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink))
+MODULE_PASS("attributor", AttributorPass())
+MODULE_PASS("attributor-light", AttributorLightPass())
MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
MODULE_PASS("cg-profile", CGProfilePass())
MODULE_PASS("check-debugify", NewPMCheckDebugifyPass())
MODULE_PASS("constmerge", ConstantMergePass())
-MODULE_PASS("coro-early", CoroEarlyPass())
MODULE_PASS("coro-cleanup", CoroCleanupPass())
+MODULE_PASS("coro-early", CoroEarlyPass())
MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
MODULE_PASS("debugify", NewPMDebugifyPass())
+MODULE_PASS("dfsan", DataFlowSanitizerPass())
MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
+MODULE_PASS("dxil-upgrade", DXILUpgradePass())
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
+MODULE_PASS("embed-bitcode", EmbedBitcodePass())
MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
MODULE_PASS("function-import", FunctionImportPass())
MODULE_PASS("globalopt", GlobalOptPass())
MODULE_PASS("globalsplit", GlobalSplitPass())
+MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass())
+MODULE_PASS("hipstdpar-select-accelerator-code",
+ HipStdParAcceleratorCodeSelectionPass())
MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
+MODULE_PASS("inliner-ml-advisor-release",
+ ModuleInlinerWrapperPass(getInlineParams(), true, {},
+ InliningAdvisorMode::Release, 0))
MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
-MODULE_PASS("inliner-ml-advisor-release", ModuleInlinerWrapperPass(getInlineParams(), true, {}, InliningAdvisorMode::Release, 0))
-MODULE_PASS("print<inline-advisor>", InlineAdvisorAnalysisPrinterPass(dbgs()))
-MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass(
- getInlineParams(),
- false))
+MODULE_PASS("inliner-wrapper-no-mandatory-first",
+ ModuleInlinerWrapperPass(getInlineParams(), false))
MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
MODULE_PASS("instrorderfile", InstrOrderFilePass())
-MODULE_PASS("instrprof", InstrProfiling())
+MODULE_PASS("instrprof", InstrProfilingLoweringPass())
MODULE_PASS("internalize", InternalizePass())
MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("iroutliner", IROutlinerPass())
-MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
+MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass())
MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass())
MODULE_PASS("lower-ifunc", LowerIFuncPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
-MODULE_PASS("metarenamer", MetaRenamerPass())
+MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
+MODULE_PASS("memprof-module", ModuleMemProfilerPass())
MODULE_PASS("mergefunc", MergeFunctionsPass())
+MODULE_PASS("metarenamer", MetaRenamerPass())
+MODULE_PASS("module-inline", ModuleInlinerPass())
MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
+MODULE_PASS("openmp-opt", OpenMPOptPass())
+MODULE_PASS("openmp-opt-postlink",
+ OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink))
MODULE_PASS("partial-inliner", PartialInlinerPass())
-MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion())
MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen())
MODULE_PASS("pgo-instr-use", PGOInstrumentationUse())
-MODULE_PASS("print-profile-summary", ProfileSummaryPrinterPass(dbgs()))
+MODULE_PASS("poison-checking", PoisonCheckingPass())
+MODULE_PASS("print", PrintModulePass(dbgs()))
MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs()))
MODULE_PASS("print-callgraph-sccs", CallGraphSCCsPrinterPass(dbgs()))
-MODULE_PASS("print", PrintModulePass(dbgs()))
+MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs()))
MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs()))
-MODULE_PASS("print-must-be-executed-contexts", MustBeExecutedContextPrinterPass(dbgs()))
+MODULE_PASS("print-must-be-executed-contexts",
+ MustBeExecutedContextPrinterPass(dbgs()))
+MODULE_PASS("print-profile-summary", ProfileSummaryPrinterPass(dbgs()))
MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs()))
+MODULE_PASS("print<inline-advisor>", InlineAdvisorAnalysisPrinterPass(dbgs()))
MODULE_PASS("print<module-debuginfo>", ModuleDebugInfoPrinterPass(dbgs()))
+MODULE_PASS("pseudo-probe", SampleProfileProbePass(TM))
+MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
MODULE_PASS("recompute-globalsaa", RecomputeGlobalsAAPass())
MODULE_PASS("rel-lookup-table-converter", RelLookupTableConverterPass())
MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC())
MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass())
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
+MODULE_PASS("sancov-module", SanitizerCoveragePass())
+MODULE_PASS("sanmd-module", SanitizerBinaryMetadataPass())
MODULE_PASS("scc-oz-module-inliner",
- buildInlinerPipeline(OptimizationLevel::Oz, ThinOrFullLTOPhase::None))
+ buildInlinerPipeline(OptimizationLevel::Oz,
+ ThinOrFullLTOPhase::None))
MODULE_PASS("strip", StripSymbolsPass())
MODULE_PASS("strip-dead-debug-info", StripDeadDebugInfoPass())
-MODULE_PASS("pseudo-probe", SampleProfileProbePass(TM))
MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
MODULE_PASS("strip-debug-declare", StripDebugDeclarePass())
MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass())
MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass())
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("trigger-crash", TriggerCrashPass())
+MODULE_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
+MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
MODULE_PASS("verify", VerifierPass())
MODULE_PASS("view-callgraph", CallGraphViewerPass())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
-MODULE_PASS("dfsan", DataFlowSanitizerPass())
-MODULE_PASS("module-inline", ModuleInlinerPass())
-MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
-MODULE_PASS("sancov-module", SanitizerCoveragePass())
-MODULE_PASS("sanmd-module", SanitizerBinaryMetadataPass())
-MODULE_PASS("memprof-module", ModuleMemProfilerPass())
-MODULE_PASS("poison-checking", PoisonCheckingPass())
-MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
#undef MODULE_PASS
#ifndef MODULE_PASS_WITH_PARAMS
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#endif
-MODULE_PASS_WITH_PARAMS("loop-extract",
- "LoopExtractorPass",
- [](bool Single) {
- if (Single)
- return LoopExtractorPass(1);
- return LoopExtractorPass();
- },
- parseLoopExtractorPassOptions,
- "single")
-MODULE_PASS_WITH_PARAMS("globaldce",
- "GlobalDCEPass",
- [](bool InLTOPostLink) {
- return GlobalDCEPass(InLTOPostLink);
- },
- parseGlobalDCEPassOptions,
- "in-lto-post-link")
-MODULE_PASS_WITH_PARAMS("hwasan",
- "HWAddressSanitizerPass",
- [](HWAddressSanitizerOptions Opts) {
- return HWAddressSanitizerPass(Opts);
- },
- parseHWASanPassOptions,
- "kernel;recover")
-MODULE_PASS_WITH_PARAMS("asan",
- "AddressSanitizerPass",
- [](AddressSanitizerOptions Opts) {
- return AddressSanitizerPass(Opts);
- },
- parseASanPassOptions,
- "kernel")
-MODULE_PASS_WITH_PARAMS("msan",
- "MemorySanitizerPass",
- [](MemorySanitizerOptions Opts) {
- return MemorySanitizerPass(Opts);
- },
- parseMSanPassOptions,
- "recover;kernel;eager-checks;track-origins=N")
-MODULE_PASS_WITH_PARAMS("ipsccp",
- "IPSCCPPass",
- [](IPSCCPOptions Opts) {
- return IPSCCPPass(Opts);
- },
- parseIPSCCPOptions,
- "no-func-spec;func-spec")
-MODULE_PASS_WITH_PARAMS("embed-bitcode",
- "EmbedBitcodePass",
- [](EmbedBitcodeOptions Opts) {
- return EmbedBitcodePass(Opts);
- },
- parseEmbedBitcodePassOptions,
- "thinlto;emit-summary")
-MODULE_PASS_WITH_PARAMS("memprof-use",
- "MemProfUsePass",
- [](std::string Opts) {
- return MemProfUsePass(Opts);
- },
- parseMemProfUsePassOptions,
- "profile-filename=S")
+MODULE_PASS_WITH_PARAMS(
+ "asan", "AddressSanitizerPass",
+ [](AddressSanitizerOptions Opts) { return AddressSanitizerPass(Opts); },
+ parseASanPassOptions, "kernel")
+MODULE_PASS_WITH_PARAMS(
+ "globaldce", "GlobalDCEPass",
+ [](bool InLTOPostLink) { return GlobalDCEPass(InLTOPostLink); },
+ parseGlobalDCEPassOptions, "in-lto-post-link")
+MODULE_PASS_WITH_PARAMS(
+ "hwasan", "HWAddressSanitizerPass",
+ [](HWAddressSanitizerOptions Opts) { return HWAddressSanitizerPass(Opts); },
+ parseHWASanPassOptions, "kernel;recover")
+MODULE_PASS_WITH_PARAMS(
+ "ipsccp", "IPSCCPPass", [](IPSCCPOptions Opts) { return IPSCCPPass(Opts); },
+ parseIPSCCPOptions, "no-func-spec;func-spec")
+MODULE_PASS_WITH_PARAMS(
+ "loop-extract", "LoopExtractorPass",
+ [](bool Single) {
+ if (Single)
+ return LoopExtractorPass(1);
+ return LoopExtractorPass();
+ },
+ parseLoopExtractorPassOptions, "single")
+MODULE_PASS_WITH_PARAMS(
+ "memprof-use", "MemProfUsePass",
+ [](std::string Opts) { return MemProfUsePass(Opts); },
+ parseMemProfUsePassOptions, "profile-filename=S")
+MODULE_PASS_WITH_PARAMS(
+ "msan", "MemorySanitizerPass",
+ [](MemorySanitizerOptions Opts) { return MemorySanitizerPass(Opts); },
+ parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N")
+MODULE_PASS_WITH_PARAMS(
+ "print<structural-hash>", "StructuralHashPrinterPass",
+ [](bool EnableDetailedStructuralHash) {
+ return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash);
+ },
+ parseStructuralHashPrinterPassOptions, "detailed")
#undef MODULE_PASS_WITH_PARAMS
#ifndef CGSCC_ANALYSIS
@@ -204,72 +196,69 @@ CGSCC_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#define CGSCC_PASS(NAME, CREATE_PASS)
#endif
CGSCC_PASS("argpromotion", ArgumentPromotionPass())
-CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass())
-CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass())
+CGSCC_PASS("attributor-light-cgscc", AttributorLightCGSCCPass())
+CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
+CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass())
#undef CGSCC_PASS
#ifndef CGSCC_PASS_WITH_PARAMS
#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#endif
-CGSCC_PASS_WITH_PARAMS("inline",
- "InlinerPass",
- [](bool OnlyMandatory) {
- return InlinerPass(OnlyMandatory);
- },
- parseInlinerPassOptions,
- "only-mandatory")
-CGSCC_PASS_WITH_PARAMS("coro-split",
- "CoroSplitPass",
- [](bool OptimizeFrame) {
- return CoroSplitPass(OptimizeFrame);
- },
- parseCoroSplitPassOptions,
- "reuse-storage")
-CGSCC_PASS_WITH_PARAMS("function-attrs",
- "PostOrderFunctionAttrsPass",
- [](bool SkipNonRecursive) {
- return PostOrderFunctionAttrsPass(SkipNonRecursive);
- },
- parsePostOrderFunctionAttrsPassOptions,
- "skip-non-recursive")
+CGSCC_PASS_WITH_PARAMS(
+ "coro-split", "CoroSplitPass",
+ [](bool OptimizeFrame) { return CoroSplitPass(OptimizeFrame); },
+ parseCoroSplitPassOptions, "reuse-storage")
+CGSCC_PASS_WITH_PARAMS(
+ "function-attrs", "PostOrderFunctionAttrsPass",
+ [](bool SkipNonRecursive) {
+ return PostOrderFunctionAttrsPass(SkipNonRecursive);
+ },
+ parsePostOrderFunctionAttrsPassOptions, "skip-non-recursive-function-attrs")
+CGSCC_PASS_WITH_PARAMS(
+ "inline", "InlinerPass",
+ [](bool OnlyMandatory) { return InlinerPass(OnlyMandatory); },
+ parseInlinerPassOptions, "only-mandatory")
#undef CGSCC_PASS_WITH_PARAMS
#ifndef FUNCTION_ANALYSIS
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#endif
FUNCTION_ANALYSIS("aa", AAManager())
+FUNCTION_ANALYSIS("access-info", LoopAccessAnalysis())
FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis())
FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis())
FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis())
FUNCTION_ANALYSIS("cycles", CycleAnalysis())
-FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
-FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis())
+FUNCTION_ANALYSIS("da", DependenceAnalysis())
FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis())
FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis())
+FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
FUNCTION_ANALYSIS("func-properties", FunctionPropertiesAnalysis())
-FUNCTION_ANALYSIS("loops", LoopAnalysis())
-FUNCTION_ANALYSIS("access-info", LoopAccessAnalysis())
-FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
-FUNCTION_ANALYSIS("da", DependenceAnalysis())
+FUNCTION_ANALYSIS("gc-function", GCFunctionAnalysis())
FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis())
+FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
+FUNCTION_ANALYSIS("loops", LoopAnalysis())
FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis())
FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis())
-FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis())
-FUNCTION_ANALYSIS("regions", RegionInfoAnalysis())
FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis())
+FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis())
+FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis())
+FUNCTION_ANALYSIS("regions", RegionInfoAnalysis())
FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
-FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis())
-FUNCTION_ANALYSIS("should-run-extra-vector-passes", ShouldRunExtraVectorPasses())
+FUNCTION_ANALYSIS("should-not-run-function-passes",
+ ShouldNotRunFunctionPassesAnalysis())
+FUNCTION_ANALYSIS("should-run-extra-vector-passes",
+ ShouldRunExtraVectorPasses())
FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
-FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
FUNCTION_ANALYSIS("targetir",
TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
-FUNCTION_ANALYSIS("verify", VerifierAnalysis())
-FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
FUNCTION_ANALYSIS("uniformity", UniformityInfoAnalysis())
+FUNCTION_ANALYSIS("verify", VerifierAnalysis())
#ifndef FUNCTION_ALIAS_ANALYSIS
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
@@ -290,129 +279,143 @@ FUNCTION_PASS("aa-eval", AAEvaluator())
FUNCTION_PASS("adce", ADCEPass())
FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass())
FUNCTION_PASS("aggressive-instcombine", AggressiveInstCombinePass())
-FUNCTION_PASS("assume-builder", AssumeBuilderPass())
-FUNCTION_PASS("assume-simplify", AssumeSimplifyPass())
FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass())
FUNCTION_PASS("annotation-remarks", AnnotationRemarksPass())
+FUNCTION_PASS("assume-builder", AssumeBuilderPass())
+FUNCTION_PASS("assume-simplify", AssumeSimplifyPass())
FUNCTION_PASS("bdce", BDCEPass())
FUNCTION_PASS("bounds-checking", BoundsCheckingPass())
FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
+FUNCTION_PASS("callbrprepare", CallBrPreparePass())
FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
+FUNCTION_PASS("chr", ControlHeightReductionPass())
FUNCTION_PASS("consthoist", ConstantHoistingPass())
-FUNCTION_PASS("count-visits", CountVisitsPass())
FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass())
-FUNCTION_PASS("chr", ControlHeightReductionPass())
FUNCTION_PASS("coro-elide", CoroElidePass())
FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
+FUNCTION_PASS("count-visits", CountVisitsPass())
FUNCTION_PASS("dce", DCEPass())
+FUNCTION_PASS("declare-to-assign", llvm::AssignmentTrackingPass())
FUNCTION_PASS("dfa-jump-threading", DFAJumpThreadingPass())
FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
-FUNCTION_PASS("dse", DSEPass())
FUNCTION_PASS("dot-cfg", CFGPrinterPass())
FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
FUNCTION_PASS("dot-dom", DomPrinter())
FUNCTION_PASS("dot-dom-only", DomOnlyPrinter())
FUNCTION_PASS("dot-post-dom", PostDomPrinter())
FUNCTION_PASS("dot-post-dom-only", PostDomOnlyPrinter())
-FUNCTION_PASS("view-dom", DomViewer())
-FUNCTION_PASS("view-dom-only", DomOnlyViewer())
-FUNCTION_PASS("view-post-dom", PostDomViewer())
-FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
+FUNCTION_PASS("dse", DSEPass())
+FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass(TM))
+FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM))
+FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass(TM))
+FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
FUNCTION_PASS("flattencfg", FlattenCFGPass())
-FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
+FUNCTION_PASS("float2int", Float2IntPass())
+FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("gvn-hoist", GVNHoistPass())
FUNCTION_PASS("gvn-sink", GVNSinkPass())
FUNCTION_PASS("helloworld", HelloWorldPass())
+FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass(TM))
FUNCTION_PASS("infer-address-spaces", InferAddressSpacesPass())
-FUNCTION_PASS("instcombine", InstCombinePass())
+FUNCTION_PASS("infer-alignment", InferAlignmentPass())
+FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings())
FUNCTION_PASS("instcount", InstCountPass())
+FUNCTION_PASS("instnamer", InstructionNamerPass())
FUNCTION_PASS("instsimplify", InstSimplifyPass())
+FUNCTION_PASS("interleaved-access", InterleavedAccessPass(TM))
+FUNCTION_PASS("interleaved-load-combine", InterleavedLoadCombinePass(TM))
FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
FUNCTION_PASS("irce", IRCEPass())
-FUNCTION_PASS("float2int", Float2IntPass())
-FUNCTION_PASS("no-op-function", NoOpFunctionPass())
+FUNCTION_PASS("jump-threading", JumpThreadingPass())
+FUNCTION_PASS("kcfi", KCFIPass())
+FUNCTION_PASS("lcssa", LCSSAPass())
FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
FUNCTION_PASS("lint", LintPass())
-FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings())
-FUNCTION_PASS("instnamer", InstructionNamerPass())
-FUNCTION_PASS("loweratomic", LowerAtomicPass())
-FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
-FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
-FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
-FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
-FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
+FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
+FUNCTION_PASS("loop-distribute", LoopDistributePass())
+FUNCTION_PASS("loop-fusion", LoopFusePass())
+FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
FUNCTION_PASS("loop-sink", LoopSinkPass())
+FUNCTION_PASS("loop-versioning", LoopVersioningPass())
+FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
+FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
+FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
+FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
+FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lowerinvoke", LowerInvokePass())
FUNCTION_PASS("lowerswitch", LowerSwitchPass())
+FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
+FUNCTION_PASS("memprof", MemProfilerPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
FUNCTION_PASS("newgvn", NewGVNPass())
-FUNCTION_PASS("jump-threading", JumpThreadingPass())
-FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
-FUNCTION_PASS("kcfi", KCFIPass())
-FUNCTION_PASS("lcssa", LCSSAPass())
-FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
-FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
-FUNCTION_PASS("loop-fusion", LoopFusePass())
-FUNCTION_PASS("loop-distribute", LoopDistributePass())
-FUNCTION_PASS("loop-versioning", LoopVersioningPass())
+FUNCTION_PASS("no-op-function", NoOpFunctionPass())
FUNCTION_PASS("objc-arc", ObjCARCOptPass())
FUNCTION_PASS("objc-arc-contract", ObjCARCContractPass())
FUNCTION_PASS("objc-arc-expand", ObjCARCExpandPass())
FUNCTION_PASS("pa-eval", PAEvalPass())
+FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())
FUNCTION_PASS("place-safepoints", PlaceSafepointsPass())
FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
+// TODO: rename to print<foo> after NPM switch
+FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(dbgs()))
+FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(dbgs()))
+FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(dbgs()))
+FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(dbgs()))
+FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(dbgs()))
+FUNCTION_PASS("print<access-info>", LoopAccessInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
FUNCTION_PASS("print<cost-model>", CostModelPrinterPass(dbgs()))
FUNCTION_PASS("print<cycles>", CycleInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
-FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
-FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs()))
FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs()))
FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs()))
+FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<func-properties>", FunctionPropertiesPrinterPass(dbgs()))
FUNCTION_PASS("print<inline-cost>", InlineCostAnnotationPrinterPass(dbgs()))
FUNCTION_PASS("print<inliner-size-estimator>",
- InlineSizeEstimatorAnalysisPrinterPass(dbgs()))
+ InlineSizeEstimatorAnalysisPrinterPass(dbgs()))
+FUNCTION_PASS("print<lazy-value-info>", LazyValueInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
FUNCTION_PASS("print<memoryssa-walker>", MemorySSAWalkerPrinterPass(dbgs()))
FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
+FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs()))
-FUNCTION_PASS("print<access-info>", LoopAccessInfoPrinterPass(dbgs()))
-// TODO: rename to print<foo> after NPM switch
-FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(dbgs()))
-FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(dbgs()))
-FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(dbgs()))
-FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(dbgs()))
-FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(dbgs()))
FUNCTION_PASS("print<uniformity>", UniformityInfoPrinterPass(dbgs()))
FUNCTION_PASS("reassociate", ReassociatePass())
FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass())
FUNCTION_PASS("reg2mem", RegToMemPass())
+FUNCTION_PASS("safe-stack", SafeStackPass(TM))
FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass())
FUNCTION_PASS("scalarizer", ScalarizerPass())
-FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass())
FUNCTION_PASS("sccp", SCCPPass())
+FUNCTION_PASS("select-optimize", SelectOptimizePass(TM))
+FUNCTION_PASS("separate-const-offset-from-gep",
+ SeparateConstOffsetFromGEPPass())
FUNCTION_PASS("sink", SinkingPass())
+FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM))
FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
-FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
+FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
+FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
+FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
+FUNCTION_PASS("tsan", ThreadSanitizerPass())
FUNCTION_PASS("typepromotion", TypePromotionPass(TM))
FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass())
FUNCTION_PASS("vector-combine", VectorCombinePass())
@@ -425,151 +428,121 @@ FUNCTION_PASS("verify<safepoint-ir>", SafepointIRVerifierPass())
FUNCTION_PASS("verify<scalar-evolution>", ScalarEvolutionVerifierPass())
FUNCTION_PASS("view-cfg", CFGViewerPass())
FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
-FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
-FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("tsan", ThreadSanitizerPass())
-FUNCTION_PASS("memprof", MemProfilerPass())
-FUNCTION_PASS("declare-to-assign", llvm::AssignmentTrackingPass())
+FUNCTION_PASS("view-dom", DomViewer())
+FUNCTION_PASS("view-dom-only", DomOnlyViewer())
+FUNCTION_PASS("view-post-dom", PostDomViewer())
+FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
+FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
#undef FUNCTION_PASS
#ifndef FUNCTION_PASS_WITH_PARAMS
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#endif
-FUNCTION_PASS_WITH_PARAMS("early-cse",
- "EarlyCSEPass",
- [](bool UseMemorySSA) {
- return EarlyCSEPass(UseMemorySSA);
- },
- parseEarlyCSEPassOptions,
- "memssa")
-FUNCTION_PASS_WITH_PARAMS("ee-instrument",
- "EntryExitInstrumenterPass",
- [](bool PostInlining) {
- return EntryExitInstrumenterPass(PostInlining);
- },
- parseEntryExitInstrumenterPassOptions,
- "post-inline")
-FUNCTION_PASS_WITH_PARAMS("hardware-loops",
- "HardwareLoopsPass",
- [](HardwareLoopOptions Opts) {
- return HardwareLoopsPass(Opts);
- },
- parseHardwareLoopOptions,
- "force-hardware-loops;"
- "force-hardware-loop-phi;"
- "force-nested-hardware-loop;"
- "force-hardware-loop-guard;"
- "hardware-loop-decrement=N;"
- "hardware-loop-counter-bitwidth=N")
-FUNCTION_PASS_WITH_PARAMS("lower-matrix-intrinsics",
- "LowerMatrixIntrinsicsPass",
- [](bool Minimal) {
- return LowerMatrixIntrinsicsPass(Minimal);
- },
- parseLowerMatrixIntrinsicsPassOptions,
- "minimal")
-FUNCTION_PASS_WITH_PARAMS("loop-unroll",
- "LoopUnrollPass",
- [](LoopUnrollOptions Opts) {
- return LoopUnrollPass(Opts);
- },
- parseLoopUnrollOptions,
- "O0;O1;O2;O3;full-unroll-max=N;"
- "no-partial;partial;"
- "no-peeling;peeling;"
- "no-profile-peeling;profile-peeling;"
- "no-runtime;runtime;"
- "no-upperbound;upperbound")
-FUNCTION_PASS_WITH_PARAMS("simplifycfg",
- "SimplifyCFGPass",
- [](SimplifyCFGOptions Opts) {
- return SimplifyCFGPass(Opts);
- },
- parseSimplifyCFGOptions,
- "no-forward-switch-cond;forward-switch-cond;"
- "no-switch-range-to-icmp;switch-range-to-icmp;"
- "no-switch-to-lookup;switch-to-lookup;"
- "no-keep-loops;keep-loops;"
- "no-hoist-common-insts;hoist-common-insts;"
- "no-sink-common-insts;sink-common-insts;"
- "bonus-inst-threshold=N"
- )
-FUNCTION_PASS_WITH_PARAMS("loop-vectorize",
- "LoopVectorizePass",
- [](LoopVectorizeOptions Opts) {
- return LoopVectorizePass(Opts);
- },
- parseLoopVectorizeOptions,
- "no-interleave-forced-only;interleave-forced-only;"
- "no-vectorize-forced-only;vectorize-forced-only")
-FUNCTION_PASS_WITH_PARAMS("instcombine",
- "InstCombinePass",
- [](InstCombineOptions Opts) {
- return InstCombinePass(Opts);
- },
- parseInstCombineOptions,
- "no-use-loop-info;use-loop-info;"
- "max-iterations=N"
- )
-FUNCTION_PASS_WITH_PARAMS("mldst-motion",
- "MergedLoadStoreMotionPass",
- [](MergedLoadStoreMotionOptions Opts) {
- return MergedLoadStoreMotionPass(Opts);
- },
- parseMergedLoadStoreMotionOptions,
- "no-split-footer-bb;split-footer-bb")
-FUNCTION_PASS_WITH_PARAMS("gvn",
- "GVNPass",
- [](GVNOptions Opts) {
- return GVNPass(Opts);
- },
- parseGVNOptions,
- "no-pre;pre;"
- "no-load-pre;load-pre;"
- "no-split-backedge-load-pre;split-backedge-load-pre;"
- "no-memdep;memdep")
-FUNCTION_PASS_WITH_PARAMS("sroa",
- "SROAPass",
- [](SROAOptions PreserveCFG) {
- return SROAPass(PreserveCFG);
- },
- parseSROAOptions,
- "preserve-cfg;modify-cfg")
-FUNCTION_PASS_WITH_PARAMS("print<stack-lifetime>",
- "StackLifetimePrinterPass",
- [](StackLifetime::LivenessType Type) {
- return StackLifetimePrinterPass(dbgs(), Type);
- },
- parseStackLifetimeOptions,
- "may;must")
-FUNCTION_PASS_WITH_PARAMS("print<da>",
- "DependenceAnalysisPrinterPass",
- [](bool NormalizeResults) {
- return DependenceAnalysisPrinterPass(dbgs(), NormalizeResults);
- },
- parseDependenceAnalysisPrinterOptions,
- "normalized-results")
-FUNCTION_PASS_WITH_PARAMS("separate-const-offset-from-gep",
- "SeparateConstOffsetFromGEPPass",
- [](bool LowerGEP) {
- return SeparateConstOffsetFromGEPPass(LowerGEP);
- },
- parseSeparateConstOffsetFromGEPPassOptions,
- "lower-gep")
-FUNCTION_PASS_WITH_PARAMS("function-simplification",
- "",
- [this](OptimizationLevel OL) {
- return buildFunctionSimplificationPipeline(OL, ThinOrFullLTOPhase::None);
- },
- parseFunctionSimplificationPipelineOptions,
- "O1;O2;O3;Os;Oz")
-FUNCTION_PASS_WITH_PARAMS("print<memoryssa>",
- "MemorySSAPrinterPass",
- [](bool NoEnsureOptimizedUses) {
- return MemorySSAPrinterPass(dbgs(), !NoEnsureOptimizedUses);
- },
- parseMemorySSAPrinterPassOptions,
- "no-ensure-optimized-uses")
+FUNCTION_PASS_WITH_PARAMS(
+ "cfguard", "CFGuardPass",
+ [](CFGuardPass::Mechanism M) { return CFGuardPass(M); },
+ parseCFGuardPassOptions, "check;dispatch")
+FUNCTION_PASS_WITH_PARAMS(
+ "early-cse", "EarlyCSEPass",
+ [](bool UseMemorySSA) { return EarlyCSEPass(UseMemorySSA); },
+ parseEarlyCSEPassOptions, "memssa")
+FUNCTION_PASS_WITH_PARAMS(
+ "ee-instrument", "EntryExitInstrumenterPass",
+ [](bool PostInlining) { return EntryExitInstrumenterPass(PostInlining); },
+ parseEntryExitInstrumenterPassOptions, "post-inline")
+FUNCTION_PASS_WITH_PARAMS(
+ "function-simplification", "",
+ [this](OptimizationLevel OL) {
+ return buildFunctionSimplificationPipeline(OL, ThinOrFullLTOPhase::None);
+ },
+ parseFunctionSimplificationPipelineOptions, "O1;O2;O3;Os;Oz")
+FUNCTION_PASS_WITH_PARAMS(
+ "gvn", "GVNPass", [](GVNOptions Opts) { return GVNPass(Opts); },
+ parseGVNOptions,
+ "no-pre;pre;no-load-pre;load-pre;no-split-backedge-load-pre;"
+ "split-backedge-load-pre;no-memdep;memdep")
+FUNCTION_PASS_WITH_PARAMS(
+ "hardware-loops", "HardwareLoopsPass",
+ [](HardwareLoopOptions Opts) { return HardwareLoopsPass(Opts); },
+ parseHardwareLoopOptions,
+ "force-hardware-loops;force-hardware-loop-phi;force-nested-hardware-loop;"
+ "force-hardware-loop-guard;hardware-loop-decrement=N;"
+ "hardware-loop-counter-bitwidth=N")
+FUNCTION_PASS_WITH_PARAMS(
+ "instcombine", "InstCombinePass",
+ [](InstCombineOptions Opts) { return InstCombinePass(Opts); },
+ parseInstCombineOptions,
+ "no-use-loop-info;use-loop-info;no-verify-fixpoint;verify-fixpoint;"
+ "max-iterations=N")
+FUNCTION_PASS_WITH_PARAMS(
+ "loop-unroll", "LoopUnrollPass",
+ [](LoopUnrollOptions Opts) { return LoopUnrollPass(Opts); },
+ parseLoopUnrollOptions,
+ "O0;O1;O2;O3;full-unroll-max=N;no-partial;partial;no-peeling;peeling;"
+ "no-profile-peeling;profile-peeling;no-runtime;runtime;no-upperbound;"
+ "upperbound")
+FUNCTION_PASS_WITH_PARAMS(
+ "loop-vectorize", "LoopVectorizePass",
+ [](LoopVectorizeOptions Opts) { return LoopVectorizePass(Opts); },
+ parseLoopVectorizeOptions,
+ "no-interleave-forced-only;interleave-forced-only;no-vectorize-forced-only;"
+ "vectorize-forced-only")
+FUNCTION_PASS_WITH_PARAMS(
+ "lower-matrix-intrinsics", "LowerMatrixIntrinsicsPass",
+ [](bool Minimal) { return LowerMatrixIntrinsicsPass(Minimal); },
+ parseLowerMatrixIntrinsicsPassOptions, "minimal")
+FUNCTION_PASS_WITH_PARAMS(
+ "mldst-motion", "MergedLoadStoreMotionPass",
+ [](MergedLoadStoreMotionOptions Opts) {
+ return MergedLoadStoreMotionPass(Opts);
+ },
+ parseMergedLoadStoreMotionOptions, "no-split-footer-bb;split-footer-bb")
+FUNCTION_PASS_WITH_PARAMS(
+ "print<da>", "DependenceAnalysisPrinterPass",
+ [](bool NormalizeResults) {
+ return DependenceAnalysisPrinterPass(dbgs(), NormalizeResults);
+ },
+ parseDependenceAnalysisPrinterOptions, "normalized-results")
+FUNCTION_PASS_WITH_PARAMS(
+ "print<memoryssa>", "MemorySSAPrinterPass",
+ [](bool NoEnsureOptimizedUses) {
+ return MemorySSAPrinterPass(dbgs(), !NoEnsureOptimizedUses);
+ },
+ parseMemorySSAPrinterPassOptions, "no-ensure-optimized-uses")
+FUNCTION_PASS_WITH_PARAMS(
+ "print<stack-lifetime>", "StackLifetimePrinterPass",
+ [](StackLifetime::LivenessType Type) {
+ return StackLifetimePrinterPass(dbgs(), Type);
+ },
+ parseStackLifetimeOptions, "may;must")
+FUNCTION_PASS_WITH_PARAMS(
+ "separate-const-offset-from-gep", "SeparateConstOffsetFromGEPPass",
+ [](bool LowerGEP) { return SeparateConstOffsetFromGEPPass(LowerGEP); },
+ parseSeparateConstOffsetFromGEPPassOptions, "lower-gep")
+FUNCTION_PASS_WITH_PARAMS(
+ "simplifycfg", "SimplifyCFGPass",
+ [](SimplifyCFGOptions Opts) { return SimplifyCFGPass(Opts); },
+ parseSimplifyCFGOptions,
+ "no-forward-switch-cond;forward-switch-cond;no-switch-range-to-icmp;"
+ "switch-range-to-icmp;no-switch-to-lookup;switch-to-lookup;no-keep-loops;"
+ "keep-loops;no-hoist-common-insts;hoist-common-insts;no-sink-common-insts;"
+ "sink-common-insts;bonus-inst-threshold=N")
+FUNCTION_PASS_WITH_PARAMS(
+ "speculative-execution", "SpeculativeExecutionPass",
+ [](bool OnlyIfDivergentTarget) {
+ return SpeculativeExecutionPass(OnlyIfDivergentTarget);
+ },
+ parseSpeculativeExecutionPassOptions, "only-if-divergent-target")
+FUNCTION_PASS_WITH_PARAMS(
+ "sroa", "SROAPass",
+ [](SROAOptions PreserveCFG) { return SROAPass(PreserveCFG); },
+ parseSROAOptions, "preserve-cfg;modify-cfg")
+FUNCTION_PASS_WITH_PARAMS(
+ "win-eh-prepare", "WinEHPreparePass",
+ [](bool DemoteCatchSwitchPHIOnly) {
+ return WinEHPreparePass(DemoteCatchSwitchPHIOnly);
+ },
+ parseWinEHPrepareOptions, "demote-catchswitch-only")
#undef FUNCTION_PASS_WITH_PARAMS
#ifndef LOOPNEST_PASS
@@ -584,9 +557,9 @@ LOOPNEST_PASS("no-op-loopnest", NoOpLoopNestPass())
#ifndef LOOP_ANALYSIS
#define LOOP_ANALYSIS(NAME, CREATE_PASS)
#endif
-LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
LOOP_ANALYSIS("ddg", DDGAnalysis())
LOOP_ANALYSIS("iv-users", IVUsersAnalysis())
+LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#undef LOOP_ANALYSIS
@@ -595,57 +568,48 @@ LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#endif
LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
LOOP_PASS("dot-ddg", DDGDotPrinterPass())
+LOOP_PASS("guard-widening", GuardWideningPass())
+LOOP_PASS("indvars", IndVarSimplifyPass())
LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+LOOP_PASS("loop-bound-split", LoopBoundSplitPass())
+LOOP_PASS("loop-deletion", LoopDeletionPass())
LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
-LOOP_PASS("no-op-loop", NoOpLoopPass())
-LOOP_PASS("print", PrintLoopPass(dbgs()))
-LOOP_PASS("loop-deletion", LoopDeletionPass())
-LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
+LOOP_PASS("loop-predication", LoopPredicationPass())
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
-LOOP_PASS("indvars", IndVarSimplifyPass())
+LOOP_PASS("loop-reroll", LoopRerollPass())
+LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
+LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass())
+LOOP_PASS("no-op-loop", NoOpLoopPass())
+LOOP_PASS("print", PrintLoopPass(dbgs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))
LOOP_PASS("print<iv-users>", IVUsersPrinterPass(dbgs()))
-LOOP_PASS("print<loopnest>", LoopNestPrinterPass(dbgs()))
LOOP_PASS("print<loop-cache-cost>", LoopCachePrinterPass(dbgs()))
-LOOP_PASS("loop-predication", LoopPredicationPass())
-LOOP_PASS("guard-widening", GuardWideningPass())
-LOOP_PASS("loop-bound-split", LoopBoundSplitPass())
-LOOP_PASS("loop-reroll", LoopRerollPass())
-LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass())
+LOOP_PASS("print<loopnest>", LoopNestPrinterPass(dbgs()))
#undef LOOP_PASS
#ifndef LOOP_PASS_WITH_PARAMS
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#endif
-LOOP_PASS_WITH_PARAMS("simple-loop-unswitch",
- "SimpleLoopUnswitchPass",
- [](std::pair<bool, bool> Params) {
- return SimpleLoopUnswitchPass(Params.first, Params.second);
- },
- parseLoopUnswitchOptions,
- "nontrivial;no-nontrivial;trivial;no-trivial")
-
-LOOP_PASS_WITH_PARAMS("licm", "LICMPass",
- [](LICMOptions Params) {
- return LICMPass(Params);
- },
- parseLICMOptions,
- "allowspeculation");
-
-LOOP_PASS_WITH_PARAMS("lnicm", "LNICMPass",
- [](LICMOptions Params) {
- return LNICMPass(Params);
- },
- parseLICMOptions,
- "allowspeculation");
-
-LOOP_PASS_WITH_PARAMS("loop-rotate",
- "LoopRotatePass",
- [](std::pair<bool, bool> Params) {
- return LoopRotatePass(Params.first, Params.second);
- },
- parseLoopRotateOptions,
- "no-header-duplication;header-duplication;no-prepare-for-lto;prepare-for-lto")
+LOOP_PASS_WITH_PARAMS(
+ "licm", "LICMPass", [](LICMOptions Params) { return LICMPass(Params); },
+ parseLICMOptions, "allowspeculation")
+LOOP_PASS_WITH_PARAMS(
+ "lnicm", "LNICMPass", [](LICMOptions Params) { return LNICMPass(Params); },
+ parseLICMOptions, "allowspeculation")
+LOOP_PASS_WITH_PARAMS(
+ "loop-rotate", "LoopRotatePass",
+ [](std::pair<bool, bool> Params) {
+ return LoopRotatePass(Params.first, Params.second);
+ },
+ parseLoopRotateOptions,
+ "no-header-duplication;header-duplication;"
+ "no-prepare-for-lto;prepare-for-lto")
+LOOP_PASS_WITH_PARAMS(
+ "simple-loop-unswitch", "SimpleLoopUnswitchPass",
+ [](std::pair<bool, bool> Params) {
+ return SimpleLoopUnswitchPass(Params.first, Params.second);
+ },
+ parseLoopUnswitchOptions, "nontrivial;no-nontrivial;trivial;no-trivial")
#undef LOOP_PASS_WITH_PARAMS
diff --git a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
index 7eef511928ec..fd1317e3eb25 100644
--- a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -14,10 +14,12 @@
#include "llvm/Passes/StandardInstrumentations.h"
#include "llvm/ADT/Any.h"
+#include "llvm/ADT/StableHashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -33,6 +35,7 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/Signals.h"
@@ -120,6 +123,18 @@ static cl::opt<unsigned>
cl::desc("Print IR at pass with this number as "
"reported by print-passes-names"));
+static cl::opt<std::string> IRDumpDirectory(
+ "ir-dump-directory",
+ cl::desc("If specified, IR printed using the "
+ "-print-[before|after]{-all} options will be dumped into "
+ "files in this directory rather than written to stderr"),
+ cl::Hidden, cl::value_desc("filename"));
+
+template <typename IRUnitT> static const IRUnitT *unwrapIR(Any IR) {
+ const IRUnitT **IRPtr = llvm::any_cast<const IRUnitT *>(&IR);
+ return IRPtr ? *IRPtr : nullptr;
+}
+
namespace {
// An option for specifying an executable that will be called with the IR
@@ -137,18 +152,18 @@ static cl::opt<std::string>
/// Extract Module out of \p IR unit. May return nullptr if \p IR does not match
/// certain global filters. Will never return nullptr if \p Force is true.
const Module *unwrapModule(Any IR, bool Force = false) {
- if (const auto **M = any_cast<const Module *>(&IR))
- return *M;
+ if (const auto *M = unwrapIR<Module>(IR))
+ return M;
- if (const auto **F = any_cast<const Function *>(&IR)) {
- if (!Force && !isFunctionInPrintList((*F)->getName()))
+ if (const auto *F = unwrapIR<Function>(IR)) {
+ if (!Force && !isFunctionInPrintList(F->getName()))
return nullptr;
- return (*F)->getParent();
+ return F->getParent();
}
- if (const auto **C = any_cast<const LazyCallGraph::SCC *>(&IR)) {
- for (const LazyCallGraph::Node &N : **C) {
+ if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR)) {
+ for (const LazyCallGraph::Node &N : *C) {
const Function &F = N.getFunction();
if (Force || (!F.isDeclaration() && isFunctionInPrintList(F.getName()))) {
return F.getParent();
@@ -158,8 +173,8 @@ const Module *unwrapModule(Any IR, bool Force = false) {
return nullptr;
}
- if (const auto **L = any_cast<const Loop *>(&IR)) {
- const Function *F = (*L)->getHeader()->getParent();
+ if (const auto *L = unwrapIR<Loop>(IR)) {
+ const Function *F = L->getHeader()->getParent();
if (!Force && !isFunctionInPrintList(F->getName()))
return nullptr;
return F->getParent();
@@ -201,17 +216,20 @@ void printIR(raw_ostream &OS, const Loop *L) {
}
std::string getIRName(Any IR) {
- if (any_cast<const Module *>(&IR))
+ if (unwrapIR<Module>(IR))
return "[module]";
- if (const auto **F = any_cast<const Function *>(&IR))
- return (*F)->getName().str();
+ if (const auto *F = unwrapIR<Function>(IR))
+ return F->getName().str();
+
+ if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR))
+ return C->getName();
- if (const auto **C = any_cast<const LazyCallGraph::SCC *>(&IR))
- return (*C)->getName();
+ if (const auto *L = unwrapIR<Loop>(IR))
+ return L->getName().str();
- if (const auto **L = any_cast<const Loop *>(&IR))
- return (*L)->getName().str();
+ if (const auto *MF = unwrapIR<MachineFunction>(IR))
+ return MF->getName().str();
llvm_unreachable("Unknown wrapped IR type");
}
@@ -233,17 +251,17 @@ bool sccContainsFilterPrintFunc(const LazyCallGraph::SCC &C) {
}
bool shouldPrintIR(Any IR) {
- if (const auto **M = any_cast<const Module *>(&IR))
- return moduleContainsFilterPrintFunc(**M);
+ if (const auto *M = unwrapIR<Module>(IR))
+ return moduleContainsFilterPrintFunc(*M);
- if (const auto **F = any_cast<const Function *>(&IR))
- return isFunctionInPrintList((*F)->getName());
+ if (const auto *F = unwrapIR<Function>(IR))
+ return isFunctionInPrintList(F->getName());
- if (const auto **C = any_cast<const LazyCallGraph::SCC *>(&IR))
- return sccContainsFilterPrintFunc(**C);
+ if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR))
+ return sccContainsFilterPrintFunc(*C);
- if (const auto **L = any_cast<const Loop *>(&IR))
- return isFunctionInPrintList((*L)->getHeader()->getParent()->getName());
+ if (const auto *L = unwrapIR<Loop>(IR))
+ return isFunctionInPrintList(L->getHeader()->getParent()->getName());
llvm_unreachable("Unknown wrapped IR type");
}
@@ -260,23 +278,23 @@ void unwrapAndPrint(raw_ostream &OS, Any IR) {
return;
}
- if (const auto **M = any_cast<const Module *>(&IR)) {
- printIR(OS, *M);
+ if (const auto *M = unwrapIR<Module>(IR)) {
+ printIR(OS, M);
return;
}
- if (const auto **F = any_cast<const Function *>(&IR)) {
- printIR(OS, *F);
+ if (const auto *F = unwrapIR<Function>(IR)) {
+ printIR(OS, F);
return;
}
- if (const auto **C = any_cast<const LazyCallGraph::SCC *>(&IR)) {
- printIR(OS, *C);
+ if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR)) {
+ printIR(OS, C);
return;
}
- if (const auto **L = any_cast<const Loop *>(&IR)) {
- printIR(OS, *L);
+ if (const auto *L = unwrapIR<Loop>(IR)) {
+ printIR(OS, L);
return;
}
llvm_unreachable("Unknown wrapped IR type");
@@ -286,7 +304,8 @@ void unwrapAndPrint(raw_ostream &OS, Any IR) {
bool isIgnored(StringRef PassID) {
return isSpecialPass(PassID,
{"PassManager", "PassAdaptor", "AnalysisManagerProxy",
- "DevirtSCCRepeatedPass", "ModuleInlinerWrapperPass"});
+ "DevirtSCCRepeatedPass", "ModuleInlinerWrapperPass",
+ "VerifierPass", "PrintModulePass"});
}
std::string makeHTMLReady(StringRef SR) {
@@ -306,13 +325,10 @@ std::string makeHTMLReady(StringRef SR) {
// Return the module when that is the appropriate level of comparison for \p IR.
const Module *getModuleForComparison(Any IR) {
- if (const auto **M = any_cast<const Module *>(&IR))
- return *M;
- if (const auto **C = any_cast<const LazyCallGraph::SCC *>(&IR))
- return (*C)
- ->begin()
- ->getFunction()
- .getParent();
+ if (const auto *M = unwrapIR<Module>(IR))
+ return M;
+ if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR))
+ return C->begin()->getFunction().getParent();
return nullptr;
}
@@ -325,8 +341,8 @@ bool isInterestingFunction(const Function &F) {
bool isInteresting(Any IR, StringRef PassID, StringRef PassName) {
if (isIgnored(PassID) || !isPassInPrintList(PassName))
return false;
- if (const auto **F = any_cast<const Function *>(&IR))
- return isInterestingFunction(**F);
+ if (const auto *F = unwrapIR<Function>(IR))
+ return isInterestingFunction(*F);
return true;
}
@@ -501,7 +517,7 @@ void IRChangedTester::handleIR(const std::string &S, StringRef PassID) {
static SmallVector<int> FD{-1};
SmallVector<StringRef> SR{S};
static SmallVector<std::string> FileName{""};
- if (auto Err = prepareTempFiles(FD, SR, FileName)) {
+ if (prepareTempFiles(FD, SR, FileName)) {
dbgs() << "Unable to create temporary file.";
return;
}
@@ -518,7 +534,7 @@ void IRChangedTester::handleIR(const std::string &S, StringRef PassID) {
return;
}
- if (auto Err = cleanUpTempFiles(FileName))
+ if (cleanUpTempFiles(FileName))
dbgs() << "Unable to remove temporary file.";
}
@@ -648,12 +664,11 @@ template <typename T> void IRComparer<T>::analyzeIR(Any IR, IRDataT<T> &Data) {
return;
}
- const Function **FPtr = any_cast<const Function *>(&IR);
- const Function *F = FPtr ? *FPtr : nullptr;
+ const auto *F = unwrapIR<Function>(IR);
if (!F) {
- const Loop **L = any_cast<const Loop *>(&IR);
+ const auto *L = unwrapIR<Loop>(IR);
assert(L && "Unknown IR unit.");
- F = (*L)->getHeader()->getParent();
+ F = L->getHeader()->getParent();
}
assert(F && "Unknown IR unit.");
generateFunctionData(Data, *F);
@@ -681,33 +696,119 @@ bool IRComparer<T>::generateFunctionData(IRDataT<T> &Data, const Function &F) {
}
PrintIRInstrumentation::~PrintIRInstrumentation() {
- assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit");
+ assert(PassRunDescriptorStack.empty() &&
+ "PassRunDescriptorStack is not empty at exit");
+}
+
+static SmallString<32> getIRFileDisplayName(Any IR) {
+ SmallString<32> Result;
+ raw_svector_ostream ResultStream(Result);
+ const Module *M = unwrapModule(IR);
+ stable_hash NameHash = stable_hash_combine_string(M->getName());
+ unsigned int MaxHashWidth = sizeof(stable_hash) * 8 / 4;
+ write_hex(ResultStream, NameHash, HexPrintStyle::Lower, MaxHashWidth);
+ if (unwrapIR<Module>(IR)) {
+ ResultStream << "-module";
+ } else if (const auto *F = unwrapIR<Function>(IR)) {
+ ResultStream << "-function-";
+ stable_hash FunctionNameHash = stable_hash_combine_string(F->getName());
+ write_hex(ResultStream, FunctionNameHash, HexPrintStyle::Lower,
+ MaxHashWidth);
+ } else if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR)) {
+ ResultStream << "-scc-";
+ stable_hash SCCNameHash = stable_hash_combine_string(C->getName());
+ write_hex(ResultStream, SCCNameHash, HexPrintStyle::Lower, MaxHashWidth);
+ } else if (const auto *L = unwrapIR<Loop>(IR)) {
+ ResultStream << "-loop-";
+ stable_hash LoopNameHash = stable_hash_combine_string(L->getName());
+ write_hex(ResultStream, LoopNameHash, HexPrintStyle::Lower, MaxHashWidth);
+ } else {
+ llvm_unreachable("Unknown wrapped IR type");
+ }
+ return Result;
+}
+
+std::string PrintIRInstrumentation::fetchDumpFilename(StringRef PassName,
+ Any IR) {
+ const StringRef RootDirectory = IRDumpDirectory;
+ assert(!RootDirectory.empty() &&
+ "The flag -ir-dump-directory must be passed to dump IR to files");
+ SmallString<128> ResultPath;
+ ResultPath += RootDirectory;
+ SmallString<64> Filename;
+ raw_svector_ostream FilenameStream(Filename);
+ FilenameStream << CurrentPassNumber;
+ FilenameStream << "-";
+ FilenameStream << getIRFileDisplayName(IR);
+ FilenameStream << "-";
+ FilenameStream << PassName;
+ sys::path::append(ResultPath, Filename);
+ return std::string(ResultPath);
+}
+
+enum class IRDumpFileSuffixType {
+ Before,
+ After,
+ Invalidated,
+};
+
+static StringRef getFileSuffix(IRDumpFileSuffixType Type) {
+ static constexpr std::array FileSuffixes = {"-before.ll", "-after.ll",
+ "-invalidated.ll"};
+ return FileSuffixes[static_cast<size_t>(Type)];
}
-void PrintIRInstrumentation::pushModuleDesc(StringRef PassID, Any IR) {
+void PrintIRInstrumentation::pushPassRunDescriptor(
+ StringRef PassID, Any IR, std::string &DumpIRFilename) {
const Module *M = unwrapModule(IR);
- ModuleDescStack.emplace_back(M, getIRName(IR), PassID);
+ PassRunDescriptorStack.emplace_back(
+ PassRunDescriptor(M, DumpIRFilename, getIRName(IR), PassID));
+}
+
+PrintIRInstrumentation::PassRunDescriptor
+PrintIRInstrumentation::popPassRunDescriptor(StringRef PassID) {
+ assert(!PassRunDescriptorStack.empty() && "empty PassRunDescriptorStack");
+ PassRunDescriptor Descriptor = PassRunDescriptorStack.pop_back_val();
+ assert(Descriptor.PassID.equals(PassID) &&
+ "malformed PassRunDescriptorStack");
+ return Descriptor;
}
-PrintIRInstrumentation::PrintModuleDesc
-PrintIRInstrumentation::popModuleDesc(StringRef PassID) {
- assert(!ModuleDescStack.empty() && "empty ModuleDescStack");
- PrintModuleDesc ModuleDesc = ModuleDescStack.pop_back_val();
- assert(std::get<2>(ModuleDesc).equals(PassID) && "malformed ModuleDescStack");
- return ModuleDesc;
+// Callers are responsible for closing the returned file descriptor
+static int prepareDumpIRFileDescriptor(const StringRef DumpIRFilename) {
+ std::error_code EC;
+ auto ParentPath = llvm::sys::path::parent_path(DumpIRFilename);
+ if (!ParentPath.empty()) {
+ std::error_code EC = llvm::sys::fs::create_directories(ParentPath);
+ if (EC)
+ report_fatal_error(Twine("Failed to create directory ") + ParentPath +
+ " to support -ir-dump-directory: " + EC.message());
+ }
+ int Result = 0;
+ EC = sys::fs::openFile(DumpIRFilename, Result, sys::fs::CD_OpenAlways,
+ sys::fs::FA_Write, sys::fs::OF_None);
+ if (EC)
+ report_fatal_error(Twine("Failed to open ") + DumpIRFilename +
+ " to support -ir-dump-directory: " + EC.message());
+ return Result;
}
void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
if (isIgnored(PassID))
return;
+ std::string DumpIRFilename;
+ if (!IRDumpDirectory.empty() &&
+ (shouldPrintBeforePass(PassID) || shouldPrintAfterPass(PassID)))
+ DumpIRFilename = fetchDumpFilename(PassID, IR);
+
// Saving Module for AfterPassInvalidated operations.
// Note: here we rely on a fact that we do not change modules while
// traversing the pipeline, so the latest captured module is good
// for all print operations that has not happen yet.
if (shouldPrintPassNumbers() || shouldPrintAtPassNumber() ||
shouldPrintAfterPass(PassID))
- pushModuleDesc(PassID, IR);
+ pushPassRunDescriptor(PassID, IR, DumpIRFilename);
if (!shouldPrintIR(IR))
return;
@@ -715,14 +816,26 @@ void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
++CurrentPassNumber;
if (shouldPrintPassNumbers())
- dbgs() << " Running pass " << CurrentPassNumber << " " << PassID << "\n";
+ dbgs() << " Running pass " << CurrentPassNumber << " " << PassID
+ << " on " << getIRName(IR) << "\n";
if (!shouldPrintBeforePass(PassID))
return;
- dbgs() << "*** IR Dump Before " << PassID << " on " << getIRName(IR)
- << " ***\n";
- unwrapAndPrint(dbgs(), IR);
+ auto WriteIRToStream = [&](raw_ostream &Stream) {
+ Stream << "; *** IR Dump Before " << PassID << " on " << getIRName(IR)
+ << " ***\n";
+ unwrapAndPrint(Stream, IR);
+ };
+
+ if (!DumpIRFilename.empty()) {
+ DumpIRFilename += getFileSuffix(IRDumpFileSuffixType::Before);
+ llvm::raw_fd_ostream DumpIRFileStream{
+ prepareDumpIRFileDescriptor(DumpIRFilename), /* shouldClose */ true};
+ WriteIRToStream(DumpIRFileStream);
+ } else {
+ WriteIRToStream(dbgs());
+ }
}
void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
@@ -733,21 +846,33 @@ void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
!shouldPrintAtPassNumber())
return;
- const Module *M;
- std::string IRName;
- StringRef StoredPassID;
- std::tie(M, IRName, StoredPassID) = popModuleDesc(PassID);
+ auto [M, DumpIRFilename, IRName, StoredPassID] = popPassRunDescriptor(PassID);
assert(StoredPassID == PassID && "mismatched PassID");
if (!shouldPrintIR(IR) || !shouldPrintAfterPass(PassID))
return;
- dbgs() << "*** IR Dump "
- << (shouldPrintAtPassNumber()
- ? StringRef(formatv("At {0}-{1}", CurrentPassNumber, PassID))
- : StringRef(formatv("After {0}", PassID)))
- << " on " << IRName << " ***\n";
- unwrapAndPrint(dbgs(), IR);
+ auto WriteIRToStream = [&](raw_ostream &Stream, const StringRef IRName) {
+ Stream << "; *** IR Dump "
+ << (shouldPrintAtPassNumber()
+ ? StringRef(formatv("At {0}-{1}", CurrentPassNumber, PassID))
+ : StringRef(formatv("After {0}", PassID)))
+ << " on " << IRName << " ***\n";
+ unwrapAndPrint(Stream, IR);
+ };
+
+ if (!IRDumpDirectory.empty()) {
+ assert(!DumpIRFilename.empty() && "DumpIRFilename must not be empty and "
+ "should be set in printBeforePass");
+ const std::string DumpIRFilenameWithSuffix =
+ DumpIRFilename + getFileSuffix(IRDumpFileSuffixType::After).str();
+ llvm::raw_fd_ostream DumpIRFileStream{
+ prepareDumpIRFileDescriptor(DumpIRFilenameWithSuffix),
+ /* shouldClose */ true};
+ WriteIRToStream(DumpIRFileStream, IRName);
+ } else {
+ WriteIRToStream(dbgs(), IRName);
+ }
}
void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
@@ -758,25 +883,38 @@ void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
!shouldPrintAtPassNumber())
return;
- const Module *M;
- std::string IRName;
- StringRef StoredPassID;
- std::tie(M, IRName, StoredPassID) = popModuleDesc(PassID);
+ auto [M, DumpIRFilename, IRName, StoredPassID] = popPassRunDescriptor(PassID);
assert(StoredPassID == PassID && "mismatched PassID");
// Additional filtering (e.g. -filter-print-func) can lead to module
// printing being skipped.
if (!M || !shouldPrintAfterPass(PassID))
return;
- SmallString<20> Banner;
- if (shouldPrintAtPassNumber())
- Banner = formatv("*** IR Dump At {0}-{1} on {2} (invalidated) ***",
- CurrentPassNumber, PassID, IRName);
- else
- Banner = formatv("*** IR Dump After {0} on {1} (invalidated) ***",
- PassID, IRName);
- dbgs() << Banner << "\n";
- printIR(dbgs(), M);
+ auto WriteIRToStream = [&](raw_ostream &Stream, const Module *M,
+ const StringRef IRName) {
+ SmallString<20> Banner;
+ if (shouldPrintAtPassNumber())
+ Banner = formatv("; *** IR Dump At {0}-{1} on {2} (invalidated) ***",
+ CurrentPassNumber, PassID, IRName);
+ else
+ Banner = formatv("; *** IR Dump After {0} on {1} (invalidated) ***",
+ PassID, IRName);
+ Stream << Banner << "\n";
+ printIR(Stream, M);
+ };
+
+ if (!IRDumpDirectory.empty()) {
+ assert(!DumpIRFilename.empty() && "DumpIRFilename must not be empty and "
+ "should be set in printBeforePass");
+ const std::string DumpIRFilenameWithSuffix =
+ DumpIRFilename + getFileSuffix(IRDumpFileSuffixType::Invalidated).str();
+ llvm::raw_fd_ostream DumpIRFileStream{
+ prepareDumpIRFileDescriptor(DumpIRFilenameWithSuffix),
+ /* shouldClose */ true};
+ WriteIRToStream(DumpIRFileStream, M, IRName);
+ } else {
+ WriteIRToStream(dbgs(), M, IRName);
+ }
}
bool PrintIRInstrumentation::shouldPrintBeforePass(StringRef PassID) {
@@ -837,11 +975,10 @@ void OptNoneInstrumentation::registerCallbacks(
}
bool OptNoneInstrumentation::shouldRun(StringRef PassID, Any IR) {
- const Function **FPtr = any_cast<const Function *>(&IR);
- const Function *F = FPtr ? *FPtr : nullptr;
+ const auto *F = unwrapIR<Function>(IR);
if (!F) {
- if (const auto **L = any_cast<const Loop *>(&IR))
- F = (*L)->getHeader()->getParent();
+ if (const auto *L = unwrapIR<Loop>(IR))
+ F = L->getHeader()->getParent();
}
bool ShouldRun = !(F && F->hasOptNone());
if (!ShouldRun && DebugLogging) {
@@ -916,14 +1053,14 @@ void PrintPassInstrumentation::registerCallbacks(
auto &OS = print();
OS << "Running pass: " << PassID << " on " << getIRName(IR);
- if (const auto **F = any_cast<const Function *>(&IR)) {
- unsigned Count = (*F)->getInstructionCount();
+ if (const auto *F = unwrapIR<Function>(IR)) {
+ unsigned Count = F->getInstructionCount();
OS << " (" << Count << " instruction";
if (Count != 1)
OS << 's';
OS << ')';
- } else if (const auto **C = any_cast<const LazyCallGraph::SCC *>(&IR)) {
- int Count = (*C)->size();
+ } else if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR)) {
+ int Count = C->size();
OS << " (" << Count << " node";
if (Count != 1)
OS << 's';
@@ -1138,10 +1275,10 @@ bool PreservedCFGCheckerInstrumentation::CFG::invalidate(
static SmallVector<Function *, 1> GetFunctions(Any IR) {
SmallVector<Function *, 1> Functions;
- if (const auto **MaybeF = any_cast<const Function *>(&IR)) {
- Functions.push_back(*const_cast<Function **>(MaybeF));
- } else if (const auto **MaybeM = any_cast<const Module *>(&IR)) {
- for (Function &F : **const_cast<Module **>(MaybeM))
+ if (const auto *MaybeF = unwrapIR<Function>(IR)) {
+ Functions.push_back(const_cast<Function *>(MaybeF));
+ } else if (const auto *MaybeM = unwrapIR<Module>(IR)) {
+ for (Function &F : *const_cast<Module *>(MaybeM))
Functions.push_back(&F);
}
return Functions;
@@ -1176,8 +1313,8 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks(
FAM.getResult<PreservedFunctionHashAnalysis>(*F);
}
- if (auto *MaybeM = any_cast<const Module *>(&IR)) {
- Module &M = **const_cast<Module **>(MaybeM);
+ if (const auto *MPtr = unwrapIR<Module>(IR)) {
+ auto &M = *const_cast<Module *>(MPtr);
MAM.getResult<PreservedModuleHashAnalysis>(M);
}
});
@@ -1235,8 +1372,8 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks(
CheckCFG(P, F->getName(), *GraphBefore,
CFG(F, /* TrackBBLifetime */ false));
}
- if (auto *MaybeM = any_cast<const Module *>(&IR)) {
- Module &M = **const_cast<Module **>(MaybeM);
+ if (const auto *MPtr = unwrapIR<Module>(IR)) {
+ auto &M = *const_cast<Module *>(MPtr);
if (auto *HashBefore =
MAM.getCachedResult<PreservedModuleHashAnalysis>(M)) {
if (HashBefore->Hash != StructuralHash(M)) {
@@ -1254,11 +1391,10 @@ void VerifyInstrumentation::registerCallbacks(
[this](StringRef P, Any IR, const PreservedAnalyses &PassPA) {
if (isIgnored(P) || P == "VerifierPass")
return;
- const Function **FPtr = any_cast<const Function *>(&IR);
- const Function *F = FPtr ? *FPtr : nullptr;
+ const auto *F = unwrapIR<Function>(IR);
if (!F) {
- if (const auto **L = any_cast<const Loop *>(&IR))
- F = (*L)->getHeader()->getParent();
+ if (const auto *L = unwrapIR<Loop>(IR))
+ F = L->getHeader()->getParent();
}
if (F) {
@@ -1266,13 +1402,14 @@ void VerifyInstrumentation::registerCallbacks(
dbgs() << "Verifying function " << F->getName() << "\n";
if (verifyFunction(*F, &errs()))
- report_fatal_error("Broken function found, compilation aborted!");
+ report_fatal_error(formatv("Broken function found after pass "
+ "\"{0}\", compilation aborted!",
+ P));
} else {
- const Module **MPtr = any_cast<const Module *>(&IR);
- const Module *M = MPtr ? *MPtr : nullptr;
+ const auto *M = unwrapIR<Module>(IR);
if (!M) {
- if (const auto **C = any_cast<const LazyCallGraph::SCC *>(&IR))
- M = (*C)->begin()->getFunction().getParent();
+ if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR))
+ M = C->begin()->getFunction().getParent();
}
if (M) {
@@ -1280,7 +1417,9 @@ void VerifyInstrumentation::registerCallbacks(
dbgs() << "Verifying module " << M->getName() << "\n";
if (verifyModule(*M, &errs()))
- report_fatal_error("Broken module found, compilation aborted!");
+ report_fatal_error(formatv("Broken module found after pass "
+ "\"{0}\", compilation aborted!",
+ P));
}
}
});
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index 849ee80bfaa3..eece6a2cc717 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -31,6 +30,7 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
+#include <cmath>
#include <cstdint>
#include <iterator>
#include <map>
@@ -167,43 +167,373 @@ void CounterMappingContext::dump(const Counter &C, raw_ostream &OS) const {
}
Expected<int64_t> CounterMappingContext::evaluate(const Counter &C) const {
- switch (C.getKind()) {
- case Counter::Zero:
- return 0;
- case Counter::CounterValueReference:
- if (C.getCounterID() >= CounterValues.size())
- return errorCodeToError(errc::argument_out_of_domain);
- return CounterValues[C.getCounterID()];
- case Counter::Expression: {
- if (C.getExpressionID() >= Expressions.size())
- return errorCodeToError(errc::argument_out_of_domain);
- const auto &E = Expressions[C.getExpressionID()];
- Expected<int64_t> LHS = evaluate(E.LHS);
- if (!LHS)
- return LHS;
- Expected<int64_t> RHS = evaluate(E.RHS);
- if (!RHS)
- return RHS;
- return E.Kind == CounterExpression::Subtract ? *LHS - *RHS : *LHS + *RHS;
+ struct StackElem {
+ Counter ICounter;
+ int64_t LHS = 0;
+ enum {
+ KNeverVisited = 0,
+ KVisitedOnce = 1,
+ KVisitedTwice = 2,
+ } VisitCount = KNeverVisited;
+ };
+
+ std::stack<StackElem> CounterStack;
+ CounterStack.push({C});
+
+ int64_t LastPoppedValue;
+
+ while (!CounterStack.empty()) {
+ StackElem &Current = CounterStack.top();
+
+ switch (Current.ICounter.getKind()) {
+ case Counter::Zero:
+ LastPoppedValue = 0;
+ CounterStack.pop();
+ break;
+ case Counter::CounterValueReference:
+ if (Current.ICounter.getCounterID() >= CounterValues.size())
+ return errorCodeToError(errc::argument_out_of_domain);
+ LastPoppedValue = CounterValues[Current.ICounter.getCounterID()];
+ CounterStack.pop();
+ break;
+ case Counter::Expression: {
+ if (Current.ICounter.getExpressionID() >= Expressions.size())
+ return errorCodeToError(errc::argument_out_of_domain);
+ const auto &E = Expressions[Current.ICounter.getExpressionID()];
+ if (Current.VisitCount == StackElem::KNeverVisited) {
+ CounterStack.push(StackElem{E.LHS});
+ Current.VisitCount = StackElem::KVisitedOnce;
+ } else if (Current.VisitCount == StackElem::KVisitedOnce) {
+ Current.LHS = LastPoppedValue;
+ CounterStack.push(StackElem{E.RHS});
+ Current.VisitCount = StackElem::KVisitedTwice;
+ } else {
+ int64_t LHS = Current.LHS;
+ int64_t RHS = LastPoppedValue;
+ LastPoppedValue =
+ E.Kind == CounterExpression::Subtract ? LHS - RHS : LHS + RHS;
+ CounterStack.pop();
+ }
+ break;
+ }
+ }
}
+
+ return LastPoppedValue;
+}
+
+Expected<BitVector> CounterMappingContext::evaluateBitmap(
+ const CounterMappingRegion *MCDCDecision) const {
+ unsigned ID = MCDCDecision->MCDCParams.BitmapIdx;
+ unsigned NC = MCDCDecision->MCDCParams.NumConditions;
+ unsigned SizeInBits = llvm::alignTo(uint64_t(1) << NC, CHAR_BIT);
+ unsigned SizeInBytes = SizeInBits / CHAR_BIT;
+
+ ArrayRef<uint8_t> Bytes(&BitmapBytes[ID], SizeInBytes);
+
+ // Mask each bitmap byte into the BitVector. Go in reverse so that the
+ // bitvector can just be shifted over by one byte on each iteration.
+ BitVector Result(SizeInBits, false);
+ for (auto Byte = std::rbegin(Bytes); Byte != std::rend(Bytes); ++Byte) {
+ uint32_t Data = *Byte;
+ Result <<= CHAR_BIT;
+ Result.setBitsInMask(&Data, 1);
}
- llvm_unreachable("Unhandled CounterKind");
+ return Result;
}
-unsigned CounterMappingContext::getMaxCounterID(const Counter &C) const {
- switch (C.getKind()) {
- case Counter::Zero:
- return 0;
- case Counter::CounterValueReference:
- return C.getCounterID();
- case Counter::Expression: {
- if (C.getExpressionID() >= Expressions.size())
- return 0;
- const auto &E = Expressions[C.getExpressionID()];
- return std::max(getMaxCounterID(E.LHS), getMaxCounterID(E.RHS));
+class MCDCRecordProcessor {
+ /// A bitmap representing the executed test vectors for a boolean expression.
+ /// Each index of the bitmap corresponds to a possible test vector. An index
+ /// with a bit value of '1' indicates that the corresponding Test Vector
+ /// identified by that index was executed.
+ BitVector &ExecutedTestVectorBitmap;
+
+ /// Decision Region to which the ExecutedTestVectorBitmap applies.
+ CounterMappingRegion &Region;
+
+ /// Array of branch regions corresponding each conditions in the boolean
+ /// expression.
+ ArrayRef<CounterMappingRegion> Branches;
+
+ /// Total number of conditions in the boolean expression.
+ unsigned NumConditions;
+
+ /// Mapping of a condition ID to its corresponding branch region.
+ llvm::DenseMap<unsigned, const CounterMappingRegion *> Map;
+
+ /// Vector used to track whether a condition is constant folded.
+ MCDCRecord::BoolVector Folded;
+
+ /// Mapping of calculated MC/DC Independence Pairs for each condition.
+ MCDCRecord::TVPairMap IndependencePairs;
+
+ /// Total number of possible Test Vectors for the boolean expression.
+ MCDCRecord::TestVectors TestVectors;
+
+ /// Actual executed Test Vectors for the boolean expression, based on
+ /// ExecutedTestVectorBitmap.
+ MCDCRecord::TestVectors ExecVectors;
+
+public:
+ MCDCRecordProcessor(BitVector &Bitmap, CounterMappingRegion &Region,
+ ArrayRef<CounterMappingRegion> Branches)
+ : ExecutedTestVectorBitmap(Bitmap), Region(Region), Branches(Branches),
+ NumConditions(Region.MCDCParams.NumConditions),
+ Folded(NumConditions, false), IndependencePairs(NumConditions),
+ TestVectors((size_t)1 << NumConditions) {}
+
+private:
+ void recordTestVector(MCDCRecord::TestVector &TV,
+ MCDCRecord::CondState Result) {
+ // Calculate an index that is used to identify the test vector in a vector
+ // of test vectors. This index also corresponds to the index values of an
+ // MCDC Region's bitmap (see findExecutedTestVectors()).
+ unsigned Index = 0;
+ for (auto Cond = std::rbegin(TV); Cond != std::rend(TV); ++Cond) {
+ Index <<= 1;
+ Index |= (*Cond == MCDCRecord::MCDC_True) ? 0x1 : 0x0;
+ }
+
+ // Copy the completed test vector to the vector of testvectors.
+ TestVectors[Index] = TV;
+
+ // The final value (T,F) is equal to the last non-dontcare state on the
+ // path (in a short-circuiting system).
+ TestVectors[Index].push_back(Result);
+ }
+
+ void shouldCopyOffTestVectorForTruePath(MCDCRecord::TestVector &TV,
+ unsigned ID) {
+ // Branch regions are hashed based on an ID.
+ const CounterMappingRegion *Branch = Map[ID];
+
+ TV[ID - 1] = MCDCRecord::MCDC_True;
+ if (Branch->MCDCParams.TrueID > 0)
+ buildTestVector(TV, Branch->MCDCParams.TrueID);
+ else
+ recordTestVector(TV, MCDCRecord::MCDC_True);
+ }
+
+ void shouldCopyOffTestVectorForFalsePath(MCDCRecord::TestVector &TV,
+ unsigned ID) {
+ // Branch regions are hashed based on an ID.
+ const CounterMappingRegion *Branch = Map[ID];
+
+ TV[ID - 1] = MCDCRecord::MCDC_False;
+ if (Branch->MCDCParams.FalseID > 0)
+ buildTestVector(TV, Branch->MCDCParams.FalseID);
+ else
+ recordTestVector(TV, MCDCRecord::MCDC_False);
+ }
+
+ /// Starting with the base test vector, build a comprehensive list of
+ /// possible test vectors by recursively walking the branch condition IDs
+ /// provided. Once an end node is reached, record the test vector in a vector
+ /// of test vectors that can be matched against during MC/DC analysis, and
+ /// then reset the positions to 'DontCare'.
+ void buildTestVector(MCDCRecord::TestVector &TV, unsigned ID = 1) {
+ shouldCopyOffTestVectorForTruePath(TV, ID);
+ shouldCopyOffTestVectorForFalsePath(TV, ID);
+
+ // Reset back to DontCare.
+ TV[ID - 1] = MCDCRecord::MCDC_DontCare;
+ }
+
+ /// Walk the bits in the bitmap. A bit set to '1' indicates that the test
+ /// vector at the corresponding index was executed during a test run.
+ void findExecutedTestVectors(BitVector &ExecutedTestVectorBitmap) {
+ for (unsigned Idx = 0; Idx < ExecutedTestVectorBitmap.size(); ++Idx) {
+ if (ExecutedTestVectorBitmap[Idx] == 0)
+ continue;
+ assert(!TestVectors[Idx].empty() && "Test Vector doesn't exist.");
+ ExecVectors.push_back(TestVectors[Idx]);
+ }
+ }
+
+ /// For a given condition and two executed Test Vectors, A and B, see if the
+ /// two test vectors match forming an Independence Pair for the condition.
+ /// For two test vectors to match, the following must be satisfied:
+ /// - The condition's value in each test vector must be opposite.
+ /// - The result's value in each test vector must be opposite.
+ /// - All other conditions' values must be equal or marked as "don't care".
+ bool matchTestVectors(unsigned Aidx, unsigned Bidx, unsigned ConditionIdx) {
+ const MCDCRecord::TestVector &A = ExecVectors[Aidx];
+ const MCDCRecord::TestVector &B = ExecVectors[Bidx];
+
+ // If condition values in both A and B aren't opposites, no match.
+ // Because a value can be 0 (false), 1 (true), or -1 (DontCare), a check
+ // that "XOR != 1" will ensure that the values are opposites and that
+ // neither of them is a DontCare.
+ // 1 XOR 0 == 1 | 0 XOR 0 == 0 | -1 XOR 0 == -1
+ // 1 XOR 1 == 0 | 0 XOR 1 == 1 | -1 XOR 1 == -2
+ // 1 XOR -1 == -2 | 0 XOR -1 == -1 | -1 XOR -1 == 0
+ if ((A[ConditionIdx] ^ B[ConditionIdx]) != 1)
+ return false;
+
+ // If the results of both A and B aren't opposites, no match.
+ if ((A[NumConditions] ^ B[NumConditions]) != 1)
+ return false;
+
+ for (unsigned Idx = 0; Idx < NumConditions; ++Idx) {
+ // Look for other conditions that don't match. Skip over the given
+ // Condition as well as any conditions marked as "don't care".
+ const auto ARecordTyForCond = A[Idx];
+ const auto BRecordTyForCond = B[Idx];
+ if (Idx == ConditionIdx ||
+ ARecordTyForCond == MCDCRecord::MCDC_DontCare ||
+ BRecordTyForCond == MCDCRecord::MCDC_DontCare)
+ continue;
+
+ // If there is a condition mismatch with any of the other conditions,
+ // there is no match for the test vectors.
+ if (ARecordTyForCond != BRecordTyForCond)
+ return false;
+ }
+
+ // Otherwise, match.
+ return true;
+ }
+
+ /// Find all possible Independence Pairs for a boolean expression given its
+ /// executed Test Vectors. This process involves looking at each condition
+ /// and attempting to find two Test Vectors that "match", giving us a pair.
+ void findIndependencePairs() {
+ unsigned NumTVs = ExecVectors.size();
+
+ // For each condition.
+ for (unsigned C = 0; C < NumConditions; ++C) {
+ bool PairFound = false;
+
+ // For each executed test vector.
+ for (unsigned I = 0; !PairFound && I < NumTVs; ++I) {
+ // Compared to every other executed test vector.
+ for (unsigned J = 0; !PairFound && J < NumTVs; ++J) {
+ if (I == J)
+ continue;
+
+ // If a matching pair of vectors is found, record them.
+ if ((PairFound = matchTestVectors(I, J, C)))
+ IndependencePairs[C] = std::make_pair(I + 1, J + 1);
+ }
+ }
+ }
+ }
+
+public:
+ /// Process the MC/DC Record in order to produce a result for a boolean
+ /// expression. This process includes tracking the conditions that comprise
+ /// the decision region, calculating the list of all possible test vectors,
+ /// marking the executed test vectors, and then finding an Independence Pair
+ /// out of the executed test vectors for each condition in the boolean
+ /// expression. A condition is tracked to ensure that its ID can be mapped to
+ /// its ordinal position in the boolean expression. The condition's source
+ /// location is also tracked, as well as whether it is constant folded (in
+ /// which case it is excuded from the metric).
+ MCDCRecord processMCDCRecord() {
+ unsigned I = 0;
+ MCDCRecord::CondIDMap PosToID;
+ MCDCRecord::LineColPairMap CondLoc;
+
+ // Walk the Record's BranchRegions (representing Conditions) in order to:
+ // - Hash the condition based on its corresponding ID. This will be used to
+ // calculate the test vectors.
+ // - Keep a map of the condition's ordinal position (1, 2, 3, 4) to its
+ // actual ID. This will be used to visualize the conditions in the
+ // correct order.
+ // - Keep track of the condition source location. This will be used to
+ // visualize where the condition is.
+ // - Record whether the condition is constant folded so that we exclude it
+ // from being measured.
+ for (const auto &B : Branches) {
+ Map[B.MCDCParams.ID] = &B;
+ PosToID[I] = B.MCDCParams.ID - 1;
+ CondLoc[I] = B.startLoc();
+ Folded[I++] = (B.Count.isZero() && B.FalseCount.isZero());
+ }
+
+ // Initialize a base test vector as 'DontCare'.
+ MCDCRecord::TestVector TV(NumConditions, MCDCRecord::MCDC_DontCare);
+
+ // Use the base test vector to build the list of all possible test vectors.
+ buildTestVector(TV);
+
+ // Using Profile Bitmap from runtime, mark the executed test vectors.
+ findExecutedTestVectors(ExecutedTestVectorBitmap);
+
+ // Compare executed test vectors against each other to find an independence
+ // pairs for each condition. This processing takes the most time.
+ findIndependencePairs();
+
+ // Record Test vectors, executed vectors, and independence pairs.
+ MCDCRecord Res(Region, ExecVectors, IndependencePairs, Folded, PosToID,
+ CondLoc);
+ return Res;
}
+};
+
+Expected<MCDCRecord> CounterMappingContext::evaluateMCDCRegion(
+ CounterMappingRegion Region, BitVector ExecutedTestVectorBitmap,
+ ArrayRef<CounterMappingRegion> Branches) {
+
+ MCDCRecordProcessor MCDCProcessor(ExecutedTestVectorBitmap, Region, Branches);
+ return MCDCProcessor.processMCDCRecord();
+}
+
+unsigned CounterMappingContext::getMaxCounterID(const Counter &C) const {
+ struct StackElem {
+ Counter ICounter;
+ int64_t LHS = 0;
+ enum {
+ KNeverVisited = 0,
+ KVisitedOnce = 1,
+ KVisitedTwice = 2,
+ } VisitCount = KNeverVisited;
+ };
+
+ std::stack<StackElem> CounterStack;
+ CounterStack.push({C});
+
+ int64_t LastPoppedValue;
+
+ while (!CounterStack.empty()) {
+ StackElem &Current = CounterStack.top();
+
+ switch (Current.ICounter.getKind()) {
+ case Counter::Zero:
+ LastPoppedValue = 0;
+ CounterStack.pop();
+ break;
+ case Counter::CounterValueReference:
+ LastPoppedValue = Current.ICounter.getCounterID();
+ CounterStack.pop();
+ break;
+ case Counter::Expression: {
+ if (Current.ICounter.getExpressionID() >= Expressions.size()) {
+ LastPoppedValue = 0;
+ CounterStack.pop();
+ } else {
+ const auto &E = Expressions[Current.ICounter.getExpressionID()];
+ if (Current.VisitCount == StackElem::KNeverVisited) {
+ CounterStack.push(StackElem{E.LHS});
+ Current.VisitCount = StackElem::KVisitedOnce;
+ } else if (Current.VisitCount == StackElem::KVisitedOnce) {
+ Current.LHS = LastPoppedValue;
+ CounterStack.push(StackElem{E.RHS});
+ Current.VisitCount = StackElem::KVisitedTwice;
+ } else {
+ int64_t LHS = Current.LHS;
+ int64_t RHS = LastPoppedValue;
+ LastPoppedValue = std::max(LHS, RHS);
+ CounterStack.pop();
+ }
+ }
+ break;
+ }
+ }
}
- llvm_unreachable("Unhandled CounterKind");
+
+ return LastPoppedValue;
}
void FunctionRecordIterator::skipOtherFiles() {
@@ -232,12 +562,31 @@ static unsigned getMaxCounterID(const CounterMappingContext &Ctx,
return MaxCounterID;
}
+static unsigned getMaxBitmapSize(const CounterMappingContext &Ctx,
+ const CoverageMappingRecord &Record) {
+ unsigned MaxBitmapID = 0;
+ unsigned NumConditions = 0;
+ // The last DecisionRegion has the highest bitmap byte index used in the
+ // function, which when combined with its number of conditions, yields the
+ // full bitmap size.
+ for (const auto &Region : reverse(Record.MappingRegions)) {
+ if (Region.Kind == CounterMappingRegion::MCDCDecisionRegion) {
+ MaxBitmapID = Region.MCDCParams.BitmapIdx;
+ NumConditions = Region.MCDCParams.NumConditions;
+ break;
+ }
+ }
+ unsigned SizeInBits = llvm::alignTo(uint64_t(1) << NumConditions, CHAR_BIT);
+ return MaxBitmapID + (SizeInBits / CHAR_BIT);
+}
+
Error CoverageMapping::loadFunctionRecord(
const CoverageMappingRecord &Record,
IndexedInstrProfReader &ProfileReader) {
StringRef OrigFuncName = Record.FunctionName;
if (OrigFuncName.empty())
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "record function name is empty");
if (Record.Filenames.empty())
OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName);
@@ -254,12 +603,28 @@ Error CoverageMapping::loadFunctionRecord(
FuncHashMismatches.emplace_back(std::string(Record.FunctionName),
Record.FunctionHash);
return Error::success();
- } else if (IPE != instrprof_error::unknown_function)
+ }
+ if (IPE != instrprof_error::unknown_function)
return make_error<InstrProfError>(IPE);
Counts.assign(getMaxCounterID(Ctx, Record) + 1, 0);
}
Ctx.setCounts(Counts);
+ std::vector<uint8_t> BitmapBytes;
+ if (Error E = ProfileReader.getFunctionBitmapBytes(
+ Record.FunctionName, Record.FunctionHash, BitmapBytes)) {
+ instrprof_error IPE = std::get<0>(InstrProfError::take(std::move(E)));
+ if (IPE == instrprof_error::hash_mismatch) {
+ FuncHashMismatches.emplace_back(std::string(Record.FunctionName),
+ Record.FunctionHash);
+ return Error::success();
+ }
+ if (IPE != instrprof_error::unknown_function)
+ return make_error<InstrProfError>(IPE);
+ BitmapBytes.assign(getMaxBitmapSize(Ctx, Record) + 1, 0);
+ }
+ Ctx.setBitmapBytes(BitmapBytes);
+
assert(!Record.MappingRegions.empty() && "Function has no regions");
// This coverage record is a zero region for a function that's unused in
@@ -271,8 +636,20 @@ Error CoverageMapping::loadFunctionRecord(
Record.MappingRegions[0].Count.isZero() && Counts[0] > 0)
return Error::success();
+ unsigned NumConds = 0;
+ const CounterMappingRegion *MCDCDecision;
+ std::vector<CounterMappingRegion> MCDCBranches;
+
FunctionRecord Function(OrigFuncName, Record.Filenames);
for (const auto &Region : Record.MappingRegions) {
+ // If an MCDCDecisionRegion is seen, track the BranchRegions that follow
+ // it according to Region.NumConditions.
+ if (Region.Kind == CounterMappingRegion::MCDCDecisionRegion) {
+ assert(NumConds == 0);
+ MCDCDecision = &Region;
+ NumConds = Region.MCDCParams.NumConditions;
+ continue;
+ }
Expected<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
if (auto E = ExecutionCount.takeError()) {
consumeError(std::move(E));
@@ -284,6 +661,44 @@ Error CoverageMapping::loadFunctionRecord(
return Error::success();
}
Function.pushRegion(Region, *ExecutionCount, *AltExecutionCount);
+
+ // If a MCDCDecisionRegion was seen, store the BranchRegions that
+ // correspond to it in a vector, according to the number of conditions
+ // recorded for the region (tracked by NumConds).
+ if (NumConds > 0 && Region.Kind == CounterMappingRegion::MCDCBranchRegion) {
+ MCDCBranches.push_back(Region);
+
+ // As we move through all of the MCDCBranchRegions that follow the
+ // MCDCDecisionRegion, decrement NumConds to make sure we account for
+ // them all before we calculate the bitmap of executed test vectors.
+ if (--NumConds == 0) {
+ // Evaluating the test vector bitmap for the decision region entails
+ // calculating precisely what bits are pertinent to this region alone.
+ // This is calculated based on the recorded offset into the global
+ // profile bitmap; the length is calculated based on the recorded
+ // number of conditions.
+ Expected<BitVector> ExecutedTestVectorBitmap =
+ Ctx.evaluateBitmap(MCDCDecision);
+ if (auto E = ExecutedTestVectorBitmap.takeError()) {
+ consumeError(std::move(E));
+ return Error::success();
+ }
+
+ // Since the bitmap identifies the executed test vectors for an MC/DC
+ // DecisionRegion, all of the information is now available to process.
+ // This is where the bulk of the MC/DC progressing takes place.
+ Expected<MCDCRecord> Record = Ctx.evaluateMCDCRegion(
+ *MCDCDecision, *ExecutedTestVectorBitmap, MCDCBranches);
+ if (auto E = Record.takeError()) {
+ consumeError(std::move(E));
+ return Error::success();
+ }
+
+ // Save the MC/DC Record so that it can be visualized later.
+ Function.pushMCDCRecord(*Record);
+ MCDCBranches.clear();
+ }
+ }
}
// Don't create records for (filenames, function) pairs we've already seen.
@@ -342,7 +757,7 @@ static Error handleMaybeNoDataFoundError(Error E) {
std::move(E), [](const CoverageMapError &CME) {
if (CME.get() == coveragemap_error::no_data_found)
return static_cast<Error>(Error::success());
- return make_error<CoverageMapError>(CME.get());
+ return make_error<CoverageMapError>(CME.get(), CME.getMessage());
});
}
@@ -790,6 +1205,10 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const {
for (const auto &CR : Function.CountedBranchRegions)
if (FileIDs.test(CR.FileID) && (CR.FileID == CR.ExpandedFileID))
FileCoverage.BranchRegions.push_back(CR);
+ // Capture MCDC records specific to the function.
+ for (const auto &MR : Function.MCDCRecords)
+ if (FileIDs.test(MR.getDecisionRegion().FileID))
+ FileCoverage.MCDCRecords.push_back(MR);
}
LLVM_DEBUG(dbgs() << "Emitting segments for file: " << Filename << "\n");
@@ -842,6 +1261,11 @@ CoverageMapping::getCoverageForFunction(const FunctionRecord &Function) const {
if (CR.FileID == *MainFileID)
FunctionCoverage.BranchRegions.push_back(CR);
+ // Capture MCDC records specific to the function.
+ for (const auto &MR : Function.MCDCRecords)
+ if (MR.getDecisionRegion().FileID == *MainFileID)
+ FunctionCoverage.MCDCRecords.push_back(MR);
+
LLVM_DEBUG(dbgs() << "Emitting segments for function: " << Function.Name
<< "\n");
FunctionCoverage.Segments = SegmentBuilder::buildSegments(Regions);
@@ -925,26 +1349,43 @@ LineCoverageIterator &LineCoverageIterator::operator++() {
return *this;
}
-static std::string getCoverageMapErrString(coveragemap_error Err) {
+static std::string getCoverageMapErrString(coveragemap_error Err,
+ const std::string &ErrMsg = "") {
+ std::string Msg;
+ raw_string_ostream OS(Msg);
+
switch (Err) {
case coveragemap_error::success:
- return "Success";
+ OS << "success";
+ break;
case coveragemap_error::eof:
- return "End of File";
+ OS << "end of File";
+ break;
case coveragemap_error::no_data_found:
- return "No coverage data found";
+ OS << "no coverage data found";
+ break;
case coveragemap_error::unsupported_version:
- return "Unsupported coverage format version";
+ OS << "unsupported coverage format version";
+ break;
case coveragemap_error::truncated:
- return "Truncated coverage data";
+ OS << "truncated coverage data";
+ break;
case coveragemap_error::malformed:
- return "Malformed coverage data";
+ OS << "malformed coverage data";
+ break;
case coveragemap_error::decompression_failed:
- return "Failed to decompress coverage data (zlib)";
+ OS << "failed to decompress coverage data (zlib)";
+ break;
case coveragemap_error::invalid_or_missing_arch_specifier:
- return "`-arch` specifier is invalid or missing for universal binary";
+ OS << "`-arch` specifier is invalid or missing for universal binary";
+ break;
}
- llvm_unreachable("A value of coveragemap_error has no message.");
+
+ // If optional error message is not empty, append it to the message.
+ if (!ErrMsg.empty())
+ OS << ": " << ErrMsg;
+
+ return Msg;
}
namespace {
@@ -962,7 +1403,7 @@ class CoverageMappingErrorCategoryType : public std::error_category {
} // end anonymous namespace
std::string CoverageMapError::message() const {
- return getCoverageMapErrString(Err);
+ return getCoverageMapErrString(Err, Msg);
}
const std::error_category &llvm::coverage::coveragemap_category() {
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index 05737323314a..ac8e6b56379f 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -68,7 +68,8 @@ Error RawCoverageReader::readULEB128(uint64_t &Result) {
unsigned N = 0;
Result = decodeULEB128(Data.bytes_begin(), &N);
if (N > Data.size())
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "the size of ULEB128 is too big");
Data = Data.substr(N);
return Error::success();
}
@@ -77,7 +78,9 @@ Error RawCoverageReader::readIntMax(uint64_t &Result, uint64_t MaxPlus1) {
if (auto Err = readULEB128(Result))
return Err;
if (Result >= MaxPlus1)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "the value of ULEB128 is greater than or equal to MaxPlus1");
return Error::success();
}
@@ -85,7 +88,8 @@ Error RawCoverageReader::readSize(uint64_t &Result) {
if (auto Err = readULEB128(Result))
return Err;
if (Result > Data.size())
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "the value of ULEB128 is too big");
return Error::success();
}
@@ -103,7 +107,8 @@ Error RawCoverageFilenamesReader::read(CovMapVersion Version) {
if (auto Err = readSize(NumFilenames))
return Err;
if (!NumFilenames)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "number of filenames is zero");
if (Version < CovMapVersion::Version4)
return readUncompressed(Version, NumFilenames);
@@ -201,13 +206,15 @@ Error RawCoverageMappingReader::decodeCounter(unsigned Value, Counter &C) {
case CounterExpression::Add: {
auto ID = Value >> Counter::EncodingTagBits;
if (ID >= Expressions.size())
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "counter expression is invalid");
Expressions[ID].Kind = CounterExpression::ExprKind(Tag);
C = Counter::getExpression(ID);
break;
}
default:
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "counter expression kind is invalid");
}
return Error::success();
}
@@ -237,6 +244,7 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
unsigned LineStart = 0;
for (size_t I = 0; I < NumRegions; ++I) {
Counter C, C2;
+ uint64_t BIDX = 0, NC = 0, ID = 0, TID = 0, FID = 0;
CounterMappingRegion::RegionKind Kind = CounterMappingRegion::CodeRegion;
// Read the combined counter + region kind.
@@ -268,7 +276,8 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
ExpandedFileID = EncodedCounterAndRegion >>
Counter::EncodingCounterTagAndExpansionRegionTagBits;
if (ExpandedFileID >= NumFileIDs)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "ExpandedFileID is invalid");
} else {
switch (EncodedCounterAndRegion >>
Counter::EncodingCounterTagAndExpansionRegionTagBits) {
@@ -286,8 +295,30 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
if (auto Err = readCounter(C2))
return Err;
break;
+ case CounterMappingRegion::MCDCBranchRegion:
+ // For a MCDC Branch Region, read two successive counters and 3 IDs.
+ Kind = CounterMappingRegion::MCDCBranchRegion;
+ if (auto Err = readCounter(C))
+ return Err;
+ if (auto Err = readCounter(C2))
+ return Err;
+ if (auto Err = readIntMax(ID, std::numeric_limits<unsigned>::max()))
+ return Err;
+ if (auto Err = readIntMax(TID, std::numeric_limits<unsigned>::max()))
+ return Err;
+ if (auto Err = readIntMax(FID, std::numeric_limits<unsigned>::max()))
+ return Err;
+ break;
+ case CounterMappingRegion::MCDCDecisionRegion:
+ Kind = CounterMappingRegion::MCDCDecisionRegion;
+ if (auto Err = readIntMax(BIDX, std::numeric_limits<unsigned>::max()))
+ return Err;
+ if (auto Err = readIntMax(NC, std::numeric_limits<unsigned>::max()))
+ return Err;
+ break;
default:
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "region kind is incorrect");
}
}
}
@@ -300,7 +331,8 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
if (auto Err = readULEB128(ColumnStart))
return Err;
if (ColumnStart > std::numeric_limits<unsigned>::max())
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "start column is too big");
if (auto Err = readIntMax(NumLines, std::numeric_limits<unsigned>::max()))
return Err;
if (auto Err = readIntMax(ColumnEnd, std::numeric_limits<unsigned>::max()))
@@ -337,11 +369,18 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
dbgs() << "\n";
});
- auto CMR = CounterMappingRegion(C, C2, InferredFileID, ExpandedFileID,
- LineStart, ColumnStart,
- LineStart + NumLines, ColumnEnd, Kind);
+ auto CMR = CounterMappingRegion(
+ C, C2,
+ CounterMappingRegion::MCDCParameters{
+ static_cast<unsigned>(BIDX), static_cast<unsigned>(NC),
+ static_cast<unsigned>(ID), static_cast<unsigned>(TID),
+ static_cast<unsigned>(FID)},
+ InferredFileID, ExpandedFileID, LineStart, ColumnStart,
+ LineStart + NumLines, ColumnEnd, Kind);
if (CMR.startLoc() > CMR.endLoc())
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "counter mapping region locations are incorrect");
MappingRegions.push_back(CMR);
}
return Error::success();
@@ -454,9 +493,13 @@ Error InstrProfSymtab::create(SectionRef &Section) {
// If this is a linked PE/COFF file, then we have to skip over the null byte
// that is allocated in the .lprfn$A section in the LLVM profiling runtime.
+ // If the name section is .lprfcovnames, it doesn't have the null byte at the
+ // beginning.
const ObjectFile *Obj = Section.getObject();
if (isa<COFFObjectFile>(Obj) && !Obj->isRelocatableObject())
- Data = Data.drop_front(1);
+ if (Expected<StringRef> NameOrErr = Section.getName())
+ if (*NameOrErr != getInstrProfSectionName(IPSK_covname, Triple::COFF))
+ Data = Data.drop_front(1);
return Error::success();
}
@@ -523,7 +566,7 @@ struct CovMapFuncRecordReader {
const char *OutOfLineMappingBuf,
const char *OutOfLineMappingBufEnd) = 0;
- template <class IntPtrT, support::endianness Endian>
+ template <class IntPtrT, llvm::endianness Endian>
static Expected<std::unique_ptr<CovMapFuncRecordReader>>
get(CovMapVersion Version, InstrProfSymtab &P,
std::vector<BinaryCoverageReader::ProfileMappingRecord> &R, StringRef D,
@@ -531,7 +574,7 @@ struct CovMapFuncRecordReader {
};
// A class for reading coverage mapping function records for a module.
-template <CovMapVersion Version, class IntPtrT, support::endianness Endian>
+template <CovMapVersion Version, class IntPtrT, llvm::endianness Endian>
class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader {
using FuncRecordType =
typename CovMapTraits<Version, IntPtrT>::CovMapFuncRecordType;
@@ -613,7 +656,9 @@ public:
using namespace support;
if (CovBuf + sizeof(CovMapHeader) > CovBufEnd)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "coverage mapping header section is larger than buffer size");
auto CovHeader = reinterpret_cast<const CovMapHeader *>(CovBuf);
uint32_t NRecords = CovHeader->getNRecords<Endian>();
uint32_t FilenamesSize = CovHeader->getFilenamesSize<Endian>();
@@ -634,7 +679,9 @@ public:
// Get the filenames.
if (CovBuf + FilenamesSize > CovBufEnd)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "filenames section is larger than buffer size");
size_t FilenamesBegin = Filenames.size();
StringRef FilenameRegion(CovBuf, FilenamesSize);
RawCoverageFilenamesReader Reader(FilenameRegion, Filenames,
@@ -673,12 +720,15 @@ public:
// coverage header).
const char *MappingBuf = CovBuf;
if (Version >= CovMapVersion::Version4 && CoverageSize != 0)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "coverage mapping size is not zero");
CovBuf += CoverageSize;
const char *MappingEnd = CovBuf;
if (CovBuf > CovBufEnd)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "function records section is larger than buffer size");
if (Version < CovMapVersion::Version4) {
// Read each function record.
@@ -707,7 +757,9 @@ public:
CFR->template advanceByOne<Endian>(OutOfLineMappingBuf);
if (Version < CovMapVersion::Version4)
if (NextMappingBuf > OutOfLineMappingBufEnd)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "next mapping buffer is larger than buffer size");
// Look up the set of filenames associated with this function record.
std::optional<FilenameRange> FileRange;
@@ -717,7 +769,10 @@ public:
uint64_t FilenamesRef = CFR->template getFilenamesRef<Endian>();
auto It = FileRangeMap.find(FilenamesRef);
if (It == FileRangeMap.end())
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "no filename found for function with hash=0x" +
+ Twine::utohexstr(FilenamesRef));
else
FileRange = It->getSecond();
}
@@ -728,7 +783,9 @@ public:
CFR->template getCoverageMapping<Endian>(OutOfLineMappingBuf);
if (Version >= CovMapVersion::Version4 &&
Mapping.data() + Mapping.size() > FuncRecBufEnd)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "coverage mapping data is larger than buffer size");
if (Error Err = insertFunctionRecordIfNeeded(CFR, Mapping, *FileRange))
return Err;
}
@@ -741,7 +798,7 @@ public:
} // end anonymous namespace
-template <class IntPtrT, support::endianness Endian>
+template <class IntPtrT, llvm::endianness Endian>
Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
CovMapVersion Version, InstrProfSymtab &P,
std::vector<BinaryCoverageReader::ProfileMappingRecord> &R, StringRef D,
@@ -757,6 +814,7 @@ Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
case CovMapVersion::Version4:
case CovMapVersion::Version5:
case CovMapVersion::Version6:
+ case CovMapVersion::Version7:
// Decompress the name data.
if (Error E = P.create(P.getNameData()))
return std::move(E);
@@ -775,11 +833,14 @@ Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
else if (Version == CovMapVersion::Version6)
return std::make_unique<VersionedCovMapFuncRecordReader<
CovMapVersion::Version6, IntPtrT, Endian>>(P, R, D, F);
+ else if (Version == CovMapVersion::Version7)
+ return std::make_unique<VersionedCovMapFuncRecordReader<
+ CovMapVersion::Version7, IntPtrT, Endian>>(P, R, D, F);
}
llvm_unreachable("Unsupported version");
}
-template <typename T, support::endianness Endian>
+template <typename T, llvm::endianness Endian>
static Error readCoverageMappingData(
InstrProfSymtab &ProfileNames, StringRef CovMap, StringRef FuncRecords,
std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
@@ -822,114 +883,158 @@ static Error readCoverageMappingData(
return Error::success();
}
-static const char *TestingFormatMagic = "llvmcovmtestdata";
-
Expected<std::unique_ptr<BinaryCoverageReader>>
BinaryCoverageReader::createCoverageReaderFromBuffer(
StringRef Coverage, FuncRecordsStorage &&FuncRecords,
InstrProfSymtab &&ProfileNames, uint8_t BytesInAddress,
- support::endianness Endian, StringRef CompilationDir) {
+ llvm::endianness Endian, StringRef CompilationDir) {
std::unique_ptr<BinaryCoverageReader> Reader(
new BinaryCoverageReader(std::move(FuncRecords)));
Reader->ProfileNames = std::move(ProfileNames);
StringRef FuncRecordsRef = Reader->FuncRecords->getBuffer();
- if (BytesInAddress == 4 && Endian == support::endianness::little) {
- if (Error E =
- readCoverageMappingData<uint32_t, support::endianness::little>(
- Reader->ProfileNames, Coverage, FuncRecordsRef,
- Reader->MappingRecords, CompilationDir, Reader->Filenames))
+ if (BytesInAddress == 4 && Endian == llvm::endianness::little) {
+ if (Error E = readCoverageMappingData<uint32_t, llvm::endianness::little>(
+ Reader->ProfileNames, Coverage, FuncRecordsRef,
+ Reader->MappingRecords, CompilationDir, Reader->Filenames))
return std::move(E);
- } else if (BytesInAddress == 4 && Endian == support::endianness::big) {
- if (Error E = readCoverageMappingData<uint32_t, support::endianness::big>(
+ } else if (BytesInAddress == 4 && Endian == llvm::endianness::big) {
+ if (Error E = readCoverageMappingData<uint32_t, llvm::endianness::big>(
Reader->ProfileNames, Coverage, FuncRecordsRef,
Reader->MappingRecords, CompilationDir, Reader->Filenames))
return std::move(E);
- } else if (BytesInAddress == 8 && Endian == support::endianness::little) {
- if (Error E =
- readCoverageMappingData<uint64_t, support::endianness::little>(
- Reader->ProfileNames, Coverage, FuncRecordsRef,
- Reader->MappingRecords, CompilationDir, Reader->Filenames))
+ } else if (BytesInAddress == 8 && Endian == llvm::endianness::little) {
+ if (Error E = readCoverageMappingData<uint64_t, llvm::endianness::little>(
+ Reader->ProfileNames, Coverage, FuncRecordsRef,
+ Reader->MappingRecords, CompilationDir, Reader->Filenames))
return std::move(E);
- } else if (BytesInAddress == 8 && Endian == support::endianness::big) {
- if (Error E = readCoverageMappingData<uint64_t, support::endianness::big>(
+ } else if (BytesInAddress == 8 && Endian == llvm::endianness::big) {
+ if (Error E = readCoverageMappingData<uint64_t, llvm::endianness::big>(
Reader->ProfileNames, Coverage, FuncRecordsRef,
Reader->MappingRecords, CompilationDir, Reader->Filenames))
return std::move(E);
} else
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "not supported endianness or bytes in address");
return std::move(Reader);
}
static Expected<std::unique_ptr<BinaryCoverageReader>>
loadTestingFormat(StringRef Data, StringRef CompilationDir) {
uint8_t BytesInAddress = 8;
- support::endianness Endian = support::endianness::little;
-
- Data = Data.substr(StringRef(TestingFormatMagic).size());
+ llvm::endianness Endian = llvm::endianness::little;
+
+ // Read the magic and version.
+ Data = Data.substr(sizeof(TestingFormatMagic));
+ if (Data.size() < sizeof(uint64_t))
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "the size of data is too small");
+ auto TestingVersion =
+ support::endian::byte_swap<uint64_t, llvm::endianness::little>(
+ *reinterpret_cast<const uint64_t *>(Data.data()));
+ Data = Data.substr(sizeof(uint64_t));
+
+ // Read the ProfileNames data.
if (Data.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
unsigned N = 0;
uint64_t ProfileNamesSize = decodeULEB128(Data.bytes_begin(), &N);
if (N > Data.size())
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "the size of TestingFormatMagic is too big");
Data = Data.substr(N);
if (Data.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
N = 0;
uint64_t Address = decodeULEB128(Data.bytes_begin(), &N);
if (N > Data.size())
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "the size of ULEB128 is too big");
Data = Data.substr(N);
if (Data.size() < ProfileNamesSize)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "the size of ProfileNames is too big");
InstrProfSymtab ProfileNames;
if (Error E = ProfileNames.create(Data.substr(0, ProfileNamesSize), Address))
return std::move(E);
Data = Data.substr(ProfileNamesSize);
+
+ // In Version2, the size of CoverageMapping is stored directly.
+ uint64_t CoverageMappingSize;
+ if (TestingVersion == uint64_t(TestingFormatVersion::Version2)) {
+ N = 0;
+ CoverageMappingSize = decodeULEB128(Data.bytes_begin(), &N);
+ if (N > Data.size())
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "the size of ULEB128 is too big");
+ Data = Data.substr(N);
+ if (CoverageMappingSize < sizeof(CovMapHeader))
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "the size of CoverageMapping is teoo small");
+ } else if (TestingVersion != uint64_t(TestingFormatVersion::Version1)) {
+ return make_error<CoverageMapError>(coveragemap_error::unsupported_version);
+ }
+
// Skip the padding bytes because coverage map data has an alignment of 8.
- size_t Pad = offsetToAlignedAddr(Data.data(), Align(8));
+ auto Pad = offsetToAlignedAddr(Data.data(), Align(8));
if (Data.size() < Pad)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "insufficient padding");
Data = Data.substr(Pad);
if (Data.size() < sizeof(CovMapHeader))
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "coverage mapping header section is larger than data size");
auto const *CovHeader = reinterpret_cast<const CovMapHeader *>(
Data.substr(0, sizeof(CovMapHeader)).data());
- CovMapVersion Version =
- (CovMapVersion)CovHeader->getVersion<support::endianness::little>();
- StringRef CoverageMapping;
- BinaryCoverageReader::FuncRecordsStorage CoverageRecords;
+ auto Version =
+ CovMapVersion(CovHeader->getVersion<llvm::endianness::little>());
+
+ // In Version1, the size of CoverageMapping is calculated.
+ if (TestingVersion == uint64_t(TestingFormatVersion::Version1)) {
+ if (Version < CovMapVersion::Version4) {
+ CoverageMappingSize = Data.size();
+ } else {
+ auto FilenamesSize =
+ CovHeader->getFilenamesSize<llvm::endianness::little>();
+ CoverageMappingSize = sizeof(CovMapHeader) + FilenamesSize;
+ }
+ }
+
+ auto CoverageMapping = Data.substr(0, CoverageMappingSize);
+ Data = Data.substr(CoverageMappingSize);
+
+ // Read the CoverageRecords data.
if (Version < CovMapVersion::Version4) {
- CoverageMapping = Data;
- if (CoverageMapping.empty())
- return make_error<CoverageMapError>(coveragemap_error::truncated);
- CoverageRecords = MemoryBuffer::getMemBuffer("");
+ if (!Data.empty())
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "data is not empty");
} else {
- uint32_t FilenamesSize =
- CovHeader->getFilenamesSize<support::endianness::little>();
- uint32_t CoverageMappingSize = sizeof(CovMapHeader) + FilenamesSize;
- CoverageMapping = Data.substr(0, CoverageMappingSize);
- if (CoverageMapping.empty())
- return make_error<CoverageMapError>(coveragemap_error::truncated);
- Data = Data.substr(CoverageMappingSize);
// Skip the padding bytes because coverage records data has an alignment
// of 8.
Pad = offsetToAlignedAddr(Data.data(), Align(8));
if (Data.size() < Pad)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
- CoverageRecords = MemoryBuffer::getMemBuffer(Data.substr(Pad));
- if (CoverageRecords->getBufferSize() == 0)
- return make_error<CoverageMapError>(coveragemap_error::truncated);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "insufficient padding");
+ Data = Data.substr(Pad);
}
+ BinaryCoverageReader::FuncRecordsStorage CoverageRecords =
+ MemoryBuffer::getMemBuffer(Data);
+
return BinaryCoverageReader::createCoverageReaderFromBuffer(
CoverageMapping, std::move(CoverageRecords), std::move(ProfileNames),
BytesInAddress, Endian, CompilationDir);
}
-/// Find all sections that match \p Name. There may be more than one if comdats
-/// are in use, e.g. for the __llvm_covfun section on ELF.
-static Expected<std::vector<SectionRef>> lookupSections(ObjectFile &OF,
- StringRef Name) {
+/// Find all sections that match \p IPSK name. There may be more than one if
+/// comdats are in use, e.g. for the __llvm_covfun section on ELF.
+static Expected<std::vector<SectionRef>>
+lookupSections(ObjectFile &OF, InstrProfSectKind IPSK) {
+ auto ObjFormat = OF.getTripleObjectFormat();
+ auto Name =
+ getInstrProfSectionName(IPSK, ObjFormat, /*AddSegmentInfo=*/false);
// On COFF, the object file section name may end in "$M". This tells the
// linker to sort these sections between "$A" and "$Z". The linker removes the
// dollar and everything after it in the final binary. Do the same to match.
@@ -944,8 +1049,13 @@ static Expected<std::vector<SectionRef>> lookupSections(ObjectFile &OF,
Expected<StringRef> NameOrErr = Section.getName();
if (!NameOrErr)
return NameOrErr.takeError();
- if (stripSuffix(*NameOrErr) == Name)
+ if (stripSuffix(*NameOrErr) == Name) {
+ // COFF profile name section contains two null bytes indicating the
+ // start/end of the section. If its size is 2 bytes, it's empty.
+ if (IsCOFF && IPSK == IPSK_name && Section.getSize() == 2)
+ continue;
Sections.push_back(Section);
+ }
}
if (Sections.empty())
return make_error<CoverageMapError>(coveragemap_error::no_data_found);
@@ -972,45 +1082,49 @@ loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch,
return errorCodeToError(object_error::arch_not_found);
} else
// We can only handle object files.
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "binary is not an object file");
// The coverage uses native pointer sizes for the object it's written in.
uint8_t BytesInAddress = OF->getBytesInAddress();
- support::endianness Endian = OF->isLittleEndian()
- ? support::endianness::little
- : support::endianness::big;
+ llvm::endianness Endian =
+ OF->isLittleEndian() ? llvm::endianness::little : llvm::endianness::big;
// Look for the sections that we are interested in.
- auto ObjFormat = OF->getTripleObjectFormat();
- auto NamesSection =
- lookupSections(*OF, getInstrProfSectionName(IPSK_name, ObjFormat,
- /*AddSegmentInfo=*/false));
- if (auto E = NamesSection.takeError())
+ InstrProfSymtab ProfileNames;
+ std::vector<SectionRef> NamesSectionRefs;
+ // If IPSK_name is not found, fallback to search for IPK_covname, which is
+ // used when binary correlation is enabled.
+ auto NamesSection = lookupSections(*OF, IPSK_name);
+ if (auto E = NamesSection.takeError()) {
+ consumeError(std::move(E));
+ NamesSection = lookupSections(*OF, IPSK_covname);
+ if (auto E = NamesSection.takeError())
+ return std::move(E);
+ }
+ NamesSectionRefs = *NamesSection;
+
+ if (NamesSectionRefs.size() != 1)
+ return make_error<CoverageMapError>(
+ coveragemap_error::malformed,
+ "the size of coverage mapping section is not one");
+ if (Error E = ProfileNames.create(NamesSectionRefs.back()))
return std::move(E);
- auto CoverageSection =
- lookupSections(*OF, getInstrProfSectionName(IPSK_covmap, ObjFormat,
- /*AddSegmentInfo=*/false));
+
+ auto CoverageSection = lookupSections(*OF, IPSK_covmap);
if (auto E = CoverageSection.takeError())
return std::move(E);
std::vector<SectionRef> CoverageSectionRefs = *CoverageSection;
if (CoverageSectionRefs.size() != 1)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return make_error<CoverageMapError>(coveragemap_error::malformed,
+ "the size of name section is not one");
auto CoverageMappingOrErr = CoverageSectionRefs.back().getContents();
if (!CoverageMappingOrErr)
return CoverageMappingOrErr.takeError();
StringRef CoverageMapping = CoverageMappingOrErr.get();
- InstrProfSymtab ProfileNames;
- std::vector<SectionRef> NamesSectionRefs = *NamesSection;
- if (NamesSectionRefs.size() != 1)
- return make_error<CoverageMapError>(coveragemap_error::malformed);
- if (Error E = ProfileNames.create(NamesSectionRefs.back()))
- return std::move(E);
-
// Look for the coverage records section (Version4 only).
- auto CoverageRecordsSections =
- lookupSections(*OF, getInstrProfSectionName(IPSK_covfun, ObjFormat,
- /*AddSegmentInfo=*/false));
+ auto CoverageRecordsSections = lookupSections(*OF, IPSK_covfun);
BinaryCoverageReader::FuncRecordsStorage FuncRecords;
if (auto E = CoverageRecordsSections.takeError()) {
@@ -1081,14 +1195,19 @@ BinaryCoverageReader::create(
StringRef CompilationDir, SmallVectorImpl<object::BuildIDRef> *BinaryIDs) {
std::vector<std::unique_ptr<BinaryCoverageReader>> Readers;
- if (ObjectBuffer.getBuffer().startswith(TestingFormatMagic)) {
- // This is a special format used for testing.
- auto ReaderOrErr =
- loadTestingFormat(ObjectBuffer.getBuffer(), CompilationDir);
- if (!ReaderOrErr)
- return ReaderOrErr.takeError();
- Readers.push_back(std::move(ReaderOrErr.get()));
- return std::move(Readers);
+ if (ObjectBuffer.getBuffer().size() > sizeof(TestingFormatMagic)) {
+ uint64_t Magic =
+ support::endian::byte_swap<uint64_t, llvm::endianness::little>(
+ *reinterpret_cast<const uint64_t *>(ObjectBuffer.getBufferStart()));
+ if (Magic == TestingFormatMagic) {
+ // This is a special format used for testing.
+ auto ReaderOrErr =
+ loadTestingFormat(ObjectBuffer.getBuffer(), CompilationDir);
+ if (!ReaderOrErr)
+ return ReaderOrErr.takeError();
+ Readers.push_back(std::move(ReaderOrErr.get()));
+ return std::move(Readers);
+ }
}
auto BinOrErr = createBinary(ObjectBuffer);
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index df65032da517..1c7d8a8909c4 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -237,6 +237,23 @@ void CoverageMappingWriter::write(raw_ostream &OS) {
writeCounter(MinExpressions, Count, OS);
writeCounter(MinExpressions, FalseCount, OS);
break;
+ case CounterMappingRegion::MCDCBranchRegion:
+ encodeULEB128(unsigned(I->Kind)
+ << Counter::EncodingCounterTagAndExpansionRegionTagBits,
+ OS);
+ writeCounter(MinExpressions, Count, OS);
+ writeCounter(MinExpressions, FalseCount, OS);
+ encodeULEB128(unsigned(I->MCDCParams.ID), OS);
+ encodeULEB128(unsigned(I->MCDCParams.TrueID), OS);
+ encodeULEB128(unsigned(I->MCDCParams.FalseID), OS);
+ break;
+ case CounterMappingRegion::MCDCDecisionRegion:
+ encodeULEB128(unsigned(I->Kind)
+ << Counter::EncodingCounterTagAndExpansionRegionTagBits,
+ OS);
+ encodeULEB128(unsigned(I->MCDCParams.BitmapIdx), OS);
+ encodeULEB128(unsigned(I->MCDCParams.NumConditions), OS);
+ break;
}
assert(I->LineStart >= PrevLineStart);
encodeULEB128(I->LineStart - PrevLineStart, OS);
@@ -249,3 +266,37 @@ void CoverageMappingWriter::write(raw_ostream &OS) {
// Ensure that all file ids have at least one mapping region.
assert(CurrentFileID == (VirtualFileMapping.size() - 1));
}
+
+void TestingFormatWriter::write(raw_ostream &OS, TestingFormatVersion Version) {
+ auto ByteSwap = [](uint64_t N) {
+ return support::endian::byte_swap<uint64_t, llvm::endianness::little>(N);
+ };
+
+ // Output a 64bit magic number.
+ auto Magic = ByteSwap(TestingFormatMagic);
+ OS.write(reinterpret_cast<char *>(&Magic), sizeof(Magic));
+
+ // Output a 64bit version field.
+ auto VersionLittle = ByteSwap(uint64_t(Version));
+ OS.write(reinterpret_cast<char *>(&VersionLittle), sizeof(VersionLittle));
+
+ // Output the ProfileNames data.
+ encodeULEB128(ProfileNamesData.size(), OS);
+ encodeULEB128(ProfileNamesAddr, OS);
+ OS << ProfileNamesData;
+
+ // Version2 adds an extra field to indicate the size of the
+ // CoverageMappingData.
+ if (Version == TestingFormatVersion::Version2)
+ encodeULEB128(CoverageMappingData.size(), OS);
+
+ // Coverage mapping data is expected to have an alignment of 8.
+ for (unsigned Pad = offsetToAlignment(OS.tell(), Align(8)); Pad; --Pad)
+ OS.write(uint8_t(0));
+ OS << CoverageMappingData;
+
+ // Coverage records data is expected to have an alignment of 8.
+ for (unsigned Pad = offsetToAlignment(OS.tell(), Align(8)); Pad; --Pad)
+ OS.write(uint8_t(0));
+ OS << CoverageRecordsData;
+}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp b/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp
index 1e70431a1fae..f7bf42e5c4d2 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp
@@ -237,23 +237,14 @@ bool GCOVFile::readGCDA(GCOVBuffer &buf) {
if (tag == GCOV_TAG_OBJECT_SUMMARY) {
buf.readInt(runCount);
buf.readInt(dummy);
- // clang<11 uses a fake 4.2 format which sets length to 9.
- if (length == 9)
- buf.readInt(runCount);
} else if (tag == GCOV_TAG_PROGRAM_SUMMARY) {
- // clang<11 uses a fake 4.2 format which sets length to 0.
- if (length > 0) {
- buf.readInt(dummy);
- buf.readInt(dummy);
- buf.readInt(runCount);
- }
+ buf.readInt(dummy);
+ buf.readInt(dummy);
+ buf.readInt(runCount);
++programCount;
} else if (tag == GCOV_TAG_FUNCTION) {
if (length == 0) // Placeholder
continue;
- // As of GCC 10, GCOV_TAG_FUNCTION_LENGTH has never been larger than 3.
- // However, clang<11 uses a fake 4.2 format which may set length larger
- // than 3.
if (length < 2 || !buf.readInt(ident))
return false;
auto It = identToFunction.find(ident);
@@ -346,7 +337,7 @@ StringRef GCOVFunction::getName(bool demangle) const {
return Name;
if (demangled.empty()) {
do {
- if (Name.startswith("_Z")) {
+ if (Name.starts_with("_Z")) {
// Name is guaranteed to be NUL-terminated.
if (char *res = itaniumDemangle(Name.data())) {
demangled = res;
@@ -374,25 +365,60 @@ GCOVBlock &GCOVFunction::getExitBlock() const {
// For each basic block, the sum of incoming edge counts equals the sum of
// outgoing edge counts by Kirchoff's circuit law. If the unmeasured arcs form a
// spanning tree, the count for each unmeasured arc (GCOV_ARC_ON_TREE) can be
-// uniquely identified.
-uint64_t GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) {
- // If GCOV_ARC_ON_TREE edges do form a tree, visited is not needed; otherwise
- // this prevents infinite recursion.
- if (!visited.insert(&v).second)
- return 0;
-
- uint64_t excess = 0;
- for (GCOVArc *e : v.srcs())
- if (e != pred)
- excess += e->onTree() ? propagateCounts(e->src, e) : e->count;
- for (GCOVArc *e : v.dsts())
- if (e != pred)
- excess -= e->onTree() ? propagateCounts(e->dst, e) : e->count;
- if (int64_t(excess) < 0)
- excess = -excess;
- if (pred)
- pred->count = excess;
- return excess;
+// uniquely identified. Use an iterative algorithm to decrease stack usage for
+// library users in threads. See the edge propagation algorithm in Optimally
+// Profiling and Tracing Programs, ACM Transactions on Programming Languages and
+// Systems, 1994.
+void GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) {
+ struct Elem {
+ const GCOVBlock &v;
+ GCOVArc *pred;
+ bool inDst;
+ size_t i = 0;
+ uint64_t excess = 0;
+ };
+
+ SmallVector<Elem, 0> stack;
+ stack.push_back({v, pred, false});
+ for (;;) {
+ Elem &u = stack.back();
+ // If GCOV_ARC_ON_TREE edges do form a tree, visited is not needed;
+ // otherwise, this prevents infinite recursion for bad input.
+ if (u.i == 0 && !visited.insert(&u.v).second) {
+ stack.pop_back();
+ if (stack.empty())
+ break;
+ continue;
+ }
+ if (u.i < u.v.pred.size()) {
+ GCOVArc *e = u.v.pred[u.i++];
+ if (e != u.pred) {
+ if (e->onTree())
+ stack.push_back({e->src, e, /*inDst=*/false});
+ else
+ u.excess += e->count;
+ }
+ } else if (u.i < u.v.pred.size() + u.v.succ.size()) {
+ GCOVArc *e = u.v.succ[u.i++ - u.v.pred.size()];
+ if (e != u.pred) {
+ if (e->onTree())
+ stack.push_back({e->dst, e, /*inDst=*/true});
+ else
+ u.excess -= e->count;
+ }
+ } else {
+ uint64_t excess = u.excess;
+ if (static_cast<int64_t>(excess) < 0)
+ excess = -excess;
+ if (u.pred)
+ u.pred->count = excess;
+ bool inDst = u.inDst;
+ stack.pop_back();
+ if (stack.empty())
+ break;
+ stack.back().excess += inDst ? -excess : excess;
+ }
+ }
}
void GCOVFunction::print(raw_ostream &OS) const {
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
index 0f9c33de3f52..649d814cfd9d 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
@@ -112,11 +113,11 @@ static std::string getInstrProfErrString(instrprof_error Err,
case instrprof_error::malformed:
OS << "malformed instrumentation profile data";
break;
- case instrprof_error::missing_debug_info_for_correlation:
- OS << "debug info for correlation is required";
+ case instrprof_error::missing_correlation_info:
+ OS << "debug info/binary for correlation is required";
break;
- case instrprof_error::unexpected_debug_info_for_correlation:
- OS << "debug info for correlation is not necessary";
+ case instrprof_error::unexpected_correlation_info:
+ OS << "debug info/binary for correlation is not necessary";
break;
case instrprof_error::unable_to_correlate_profile:
OS << "unable to correlate profile";
@@ -135,6 +136,9 @@ static std::string getInstrProfErrString(instrprof_error Err,
case instrprof_error::count_mismatch:
OS << "function basic block count change detected (counter mismatch)";
break;
+ case instrprof_error::bitmap_mismatch:
+ OS << "function bitmap size change detected (bitmap size mismatch)";
+ break;
case instrprof_error::counter_overflow:
OS << "counter overflow";
break;
@@ -157,6 +161,9 @@ static std::string getInstrProfErrString(instrprof_error Err,
case instrprof_error::raw_profile_version_mismatch:
OS << "raw profile version mismatch";
break;
+ case instrprof_error::counter_value_too_large:
+ OS << "excessively large counter value suggests corrupted profile data";
+ break;
}
// If optional error message is not empty, append it to the message.
@@ -264,11 +271,56 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
return PathNameStr.substr(LastPos);
}
-// Return the PGOFuncName. This function has some special handling when called
-// in LTO optimization. The following only applies when calling in LTO passes
-// (when \c InLTO is true): LTO's internalization privatizes many global linkage
-// symbols. This happens after value profile annotation, but those internal
-// linkage functions should not have a source prefix.
+static StringRef getStrippedSourceFileName(const GlobalObject &GO) {
+ StringRef FileName(GO.getParent()->getSourceFileName());
+ uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
+ if (StripLevel < StaticFuncStripDirNamePrefix)
+ StripLevel = StaticFuncStripDirNamePrefix;
+ if (StripLevel)
+ FileName = stripDirPrefix(FileName, StripLevel);
+ return FileName;
+}
+
+// The PGO name has the format [<filepath>;]<linkage-name> where <filepath>; is
+// provided if linkage is local and <linkage-name> is the mangled function
+// name. The filepath is used to discriminate possibly identical function names.
+// ; is used because it is unlikely to be found in either <filepath> or
+// <linkage-name>.
+//
+// Older compilers used getPGOFuncName() which has the format
+// [<filepath>:]<function-name>. <filepath> is used to discriminate between
+// possibly identical function names when linkage is local and <function-name>
+// simply comes from F.getName(). This caused trouble for Objective-C functions
+// which commonly have :'s in their names. Also, since <function-name> is not
+// mangled, they cannot be passed to Mach-O linkers via -order_file. We still
+// need to compute this name to lookup functions from profiles built by older
+// compilers.
+static std::string
+getIRPGONameForGlobalObject(const GlobalObject &GO,
+ GlobalValue::LinkageTypes Linkage,
+ StringRef FileName) {
+ SmallString<64> Name;
+ if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
+ Name.append(FileName.empty() ? "<unknown>" : FileName);
+ Name.append(";");
+ }
+ Mangler().getNameWithPrefix(Name, &GO, /*CannotUsePrivateLabel=*/true);
+ return Name.str().str();
+}
+
+static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) {
+ if (MD != nullptr) {
+ StringRef S = cast<MDString>(MD->getOperand(0))->getString();
+ return S.str();
+ }
+ return {};
+}
+
+// Returns the PGO object name. This function has some special handling
+// when called in LTO optimization. The following only applies when calling in
+// LTO passes (when \c InLTO is true): LTO's internalization privatizes many
+// global linkage symbols. This happens after value profile annotation, but
+// those internal linkage functions should not have a source prefix.
// Additionally, for ThinLTO mode, exported internal functions are promoted
// and renamed. We need to ensure that the original internal PGO name is
// used when computing the GUID that is compared against the profiled GUIDs.
@@ -277,22 +329,42 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
// symbols in the value profile annotation step
// (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
// data, its original linkage must be non-internal.
+static std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO,
+ MDNode *PGONameMetadata) {
+ if (!InLTO) {
+ auto FileName = getStrippedSourceFileName(GO);
+ return getIRPGONameForGlobalObject(GO, GO.getLinkage(), FileName);
+ }
+
+ // In LTO mode (when InLTO is true), first check if there is a meta data.
+ if (auto IRPGOFuncName = lookupPGONameFromMetadata(PGONameMetadata))
+ return *IRPGOFuncName;
+
+ // If there is no meta data, the function must be a global before the value
+ // profile annotation pass. Its current linkage may be internal if it is
+ // internalized in LTO mode.
+ return getIRPGONameForGlobalObject(GO, GlobalValue::ExternalLinkage, "");
+}
+
+// Returns the IRPGO function name and does special handling when called
+// in LTO optimization. See the comments of `getIRPGOObjectName` for details.
+std::string getIRPGOFuncName(const Function &F, bool InLTO) {
+ return getIRPGOObjectName(F, InLTO, getPGOFuncNameMetadata(F));
+}
+
+// This is similar to `getIRPGOFuncName` except that this function calls
+// 'getPGOFuncName' to get a name and `getIRPGOFuncName` calls
+// 'getIRPGONameForGlobalObject'. See the difference between two callees in the
+// comments of `getIRPGONameForGlobalObject`.
std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
if (!InLTO) {
- StringRef FileName(F.getParent()->getSourceFileName());
- uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
- if (StripLevel < StaticFuncStripDirNamePrefix)
- StripLevel = StaticFuncStripDirNamePrefix;
- if (StripLevel)
- FileName = stripDirPrefix(FileName, StripLevel);
+ auto FileName = getStrippedSourceFileName(F);
return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version);
}
// In LTO mode (when InLTO is true), first check if there is a meta data.
- if (MDNode *MD = getPGOFuncNameMetadata(F)) {
- StringRef S = cast<MDString>(MD->getOperand(0))->getString();
- return S.str();
- }
+ if (auto PGOFuncName = lookupPGONameFromMetadata(getPGOFuncNameMetadata(F)))
+ return *PGOFuncName;
// If there is no meta data, the function must be a global before the value
// profile annotation pass. Its current linkage may be internal if it is
@@ -300,11 +372,20 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, "");
}
+// See getIRPGOFuncName() for a discription of the format.
+std::pair<StringRef, StringRef>
+getParsedIRPGOFuncName(StringRef IRPGOFuncName) {
+ auto [FileName, FuncName] = IRPGOFuncName.split(';');
+ if (FuncName.empty())
+ return std::make_pair(StringRef(), IRPGOFuncName);
+ return std::make_pair(FileName, FuncName);
+}
+
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) {
if (FileName.empty())
return PGOFuncName;
// Drop the file name including ':'. See also getPGOFuncName.
- if (PGOFuncName.startswith(FileName))
+ if (PGOFuncName.starts_with(FileName))
PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1);
return PGOFuncName;
}
@@ -320,7 +401,7 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
return VarName;
// Now fix up illegal chars in local VarName that may upset the assembler.
- const char *InvalidChars = "-:<>/\"'";
+ const char InvalidChars[] = "-:;<>/\"'";
size_t found = VarName.find_first_of(InvalidChars);
while (found != std::string::npos) {
VarName[found] = '_';
@@ -366,41 +447,102 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
// Ignore in this case.
if (!F.hasName())
continue;
- const std::string &PGOFuncName = getPGOFuncName(F, InLTO);
- if (Error E = addFuncName(PGOFuncName))
+ if (Error E = addFuncWithName(F, getIRPGOFuncName(F, InLTO)))
+ return E;
+ // Also use getPGOFuncName() so that we can find records from older profiles
+ if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO)))
return E;
- MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
- // In ThinLTO, local function may have been promoted to global and have
- // suffix ".llvm." added to the function name. We need to add the
- // stripped function name to the symbol table so that we can find a match
- // from profile.
- //
- // We may have other suffixes similar as ".llvm." which are needed to
- // be stripped before the matching, but ".__uniq." suffix which is used
- // to differentiate internal linkage functions in different modules
- // should be kept. Now this is the only suffix with the pattern ".xxx"
- // which is kept before matching.
- const std::string UniqSuffix = ".__uniq.";
- auto pos = PGOFuncName.find(UniqSuffix);
- // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
- // search '.' from the beginning.
- if (pos != std::string::npos)
- pos += UniqSuffix.length();
- else
- pos = 0;
- pos = PGOFuncName.find('.', pos);
- if (pos != std::string::npos && pos != 0) {
- const std::string &OtherFuncName = PGOFuncName.substr(0, pos);
- if (Error E = addFuncName(OtherFuncName))
- return E;
- MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
- }
}
Sorted = false;
finalizeSymtab();
return Error::success();
}
+/// \c NameStrings is a string composed of one of more possibly encoded
+/// sub-strings. The substrings are separated by 0 or more zero bytes. This
+/// method decodes the string and calls `NameCallback` for each substring.
+static Error
+readAndDecodeStrings(StringRef NameStrings,
+ std::function<Error(StringRef)> NameCallback) {
+ const uint8_t *P = NameStrings.bytes_begin();
+ const uint8_t *EndP = NameStrings.bytes_end();
+ while (P < EndP) {
+ uint32_t N;
+ uint64_t UncompressedSize = decodeULEB128(P, &N);
+ P += N;
+ uint64_t CompressedSize = decodeULEB128(P, &N);
+ P += N;
+ bool isCompressed = (CompressedSize != 0);
+ SmallVector<uint8_t, 128> UncompressedNameStrings;
+ StringRef NameStrings;
+ if (isCompressed) {
+ if (!llvm::compression::zlib::isAvailable())
+ return make_error<InstrProfError>(instrprof_error::zlib_unavailable);
+
+ if (Error E = compression::zlib::decompress(ArrayRef(P, CompressedSize),
+ UncompressedNameStrings,
+ UncompressedSize)) {
+ consumeError(std::move(E));
+ return make_error<InstrProfError>(instrprof_error::uncompress_failed);
+ }
+ P += CompressedSize;
+ NameStrings = toStringRef(UncompressedNameStrings);
+ } else {
+ NameStrings =
+ StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
+ P += UncompressedSize;
+ }
+ // Now parse the name strings.
+ SmallVector<StringRef, 0> Names;
+ NameStrings.split(Names, getInstrProfNameSeparator());
+ for (StringRef &Name : Names)
+ if (Error E = NameCallback(Name))
+ return E;
+
+ while (P < EndP && *P == 0)
+ P++;
+ }
+ return Error::success();
+}
+
+Error InstrProfSymtab::create(StringRef NameStrings) {
+ return readAndDecodeStrings(
+ NameStrings,
+ std::bind(&InstrProfSymtab::addFuncName, this, std::placeholders::_1));
+}
+
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+ if (Error E = addFuncName(PGOFuncName))
+ return E;
+ MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+ // In ThinLTO, local function may have been promoted to global and have
+ // suffix ".llvm." added to the function name. We need to add the
+ // stripped function name to the symbol table so that we can find a match
+ // from profile.
+ //
+ // We may have other suffixes similar as ".llvm." which are needed to
+ // be stripped before the matching, but ".__uniq." suffix which is used
+ // to differentiate internal linkage functions in different modules
+ // should be kept. Now this is the only suffix with the pattern ".xxx"
+ // which is kept before matching.
+ const std::string UniqSuffix = ".__uniq.";
+ auto pos = PGOFuncName.find(UniqSuffix);
+ // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
+ // search '.' from the beginning.
+ if (pos != std::string::npos)
+ pos += UniqSuffix.length();
+ else
+ pos = 0;
+ pos = PGOFuncName.find('.', pos);
+ if (pos != std::string::npos && pos != 0) {
+ StringRef OtherFuncName = PGOFuncName.substr(0, pos);
+ if (Error E = addFuncName(OtherFuncName))
+ return E;
+ MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+ }
+ return Error::success();
+}
+
uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
finalizeSymtab();
auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
@@ -422,11 +564,11 @@ void InstrProfSymtab::dumpNames(raw_ostream &OS) const {
OS << S << '\n';
}
-Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
- bool doCompression, std::string &Result) {
+Error collectGlobalObjectNameStrings(ArrayRef<std::string> NameStrs,
+ bool doCompression, std::string &Result) {
assert(!NameStrs.empty() && "No name data to emit");
- uint8_t Header[16], *P = Header;
+ uint8_t Header[20], *P = Header;
std::string UncompressedNameStrings =
join(NameStrs.begin(), NameStrs.end(), getInstrProfNameSeparator());
@@ -473,52 +615,10 @@ Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
for (auto *NameVar : NameVars) {
NameStrs.push_back(std::string(getPGOFuncNameVarInitializer(NameVar)));
}
- return collectPGOFuncNameStrings(
+ return collectGlobalObjectNameStrings(
NameStrs, compression::zlib::isAvailable() && doCompression, Result);
}
-Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
- const uint8_t *P = NameStrings.bytes_begin();
- const uint8_t *EndP = NameStrings.bytes_end();
- while (P < EndP) {
- uint32_t N;
- uint64_t UncompressedSize = decodeULEB128(P, &N);
- P += N;
- uint64_t CompressedSize = decodeULEB128(P, &N);
- P += N;
- bool isCompressed = (CompressedSize != 0);
- SmallVector<uint8_t, 128> UncompressedNameStrings;
- StringRef NameStrings;
- if (isCompressed) {
- if (!llvm::compression::zlib::isAvailable())
- return make_error<InstrProfError>(instrprof_error::zlib_unavailable);
-
- if (Error E = compression::zlib::decompress(ArrayRef(P, CompressedSize),
- UncompressedNameStrings,
- UncompressedSize)) {
- consumeError(std::move(E));
- return make_error<InstrProfError>(instrprof_error::uncompress_failed);
- }
- P += CompressedSize;
- NameStrings = toStringRef(UncompressedNameStrings);
- } else {
- NameStrings =
- StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
- P += UncompressedSize;
- }
- // Now parse the name strings.
- SmallVector<StringRef, 0> Names;
- NameStrings.split(Names, getInstrProfNameSeparator());
- for (StringRef &Name : Names)
- if (Error E = Symtab.addFuncName(Name))
- return E;
-
- while (P < EndP && *P == 0)
- P++;
- }
- return Error::success();
-}
-
void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const {
uint64_t FuncSum = 0;
Sum.NumEntries += Counts.size();
@@ -732,6 +832,18 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight,
Warn(instrprof_error::counter_overflow);
}
+ // If the number of bitmap bytes doesn't match we either have bad data
+ // or a hash collision.
+ if (BitmapBytes.size() != Other.BitmapBytes.size()) {
+ Warn(instrprof_error::bitmap_mismatch);
+ return;
+ }
+
+ // Bitmap bytes are merged by simply ORing them together.
+ for (size_t I = 0, E = Other.BitmapBytes.size(); I < E; ++I) {
+ BitmapBytes[I] = Other.BitmapBytes[I] | BitmapBytes[I];
+ }
+
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
mergeValueProfData(Kind, Other, Weight, Warn);
}
@@ -910,14 +1022,13 @@ void ValueProfRecord::deserializeTo(InstrProfRecord &Record,
// For writing/serializing, Old is the host endianness, and New is
// byte order intended on disk. For Reading/deserialization, Old
// is the on-disk source endianness, and New is the host endianness.
-void ValueProfRecord::swapBytes(support::endianness Old,
- support::endianness New) {
+void ValueProfRecord::swapBytes(llvm::endianness Old, llvm::endianness New) {
using namespace support;
if (Old == New)
return;
- if (getHostEndianness() != Old) {
+ if (llvm::endianness::native != Old) {
sys::swapByteOrder<uint32_t>(NumValueSites);
sys::swapByteOrder<uint32_t>(Kind);
}
@@ -929,7 +1040,7 @@ void ValueProfRecord::swapBytes(support::endianness Old,
sys::swapByteOrder<uint64_t>(VD[I].Value);
sys::swapByteOrder<uint64_t>(VD[I].Count);
}
- if (getHostEndianness() == Old) {
+ if (llvm::endianness::native == Old) {
sys::swapByteOrder<uint32_t>(NumValueSites);
sys::swapByteOrder<uint32_t>(Kind);
}
@@ -948,13 +1059,13 @@ void ValueProfData::deserializeTo(InstrProfRecord &Record,
}
template <class T>
-static T swapToHostOrder(const unsigned char *&D, support::endianness Orig) {
+static T swapToHostOrder(const unsigned char *&D, llvm::endianness Orig) {
using namespace support;
- if (Orig == little)
- return endian::readNext<T, little, unaligned>(D);
+ if (Orig == llvm::endianness::little)
+ return endian::readNext<T, llvm::endianness::little, unaligned>(D);
else
- return endian::readNext<T, big, unaligned>(D);
+ return endian::readNext<T, llvm::endianness::big, unaligned>(D);
}
static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) {
@@ -988,7 +1099,7 @@ Error ValueProfData::checkIntegrity() {
Expected<std::unique_ptr<ValueProfData>>
ValueProfData::getValueProfData(const unsigned char *D,
const unsigned char *const BufferEnd,
- support::endianness Endianness) {
+ llvm::endianness Endianness) {
using namespace support;
if (D + sizeof(ValueProfData) > BufferEnd)
@@ -1011,10 +1122,10 @@ ValueProfData::getValueProfData(const unsigned char *D,
return std::move(VPD);
}
-void ValueProfData::swapBytesToHost(support::endianness Endianness) {
+void ValueProfData::swapBytesToHost(llvm::endianness Endianness) {
using namespace support;
- if (Endianness == getHostEndianness())
+ if (Endianness == llvm::endianness::native)
return;
sys::swapByteOrder<uint32_t>(TotalSize);
@@ -1022,21 +1133,21 @@ void ValueProfData::swapBytesToHost(support::endianness Endianness) {
ValueProfRecord *VR = getFirstValueProfRecord(this);
for (uint32_t K = 0; K < NumValueKinds; K++) {
- VR->swapBytes(Endianness, getHostEndianness());
+ VR->swapBytes(Endianness, llvm::endianness::native);
VR = getValueProfRecordNext(VR);
}
}
-void ValueProfData::swapBytesFromHost(support::endianness Endianness) {
+void ValueProfData::swapBytesFromHost(llvm::endianness Endianness) {
using namespace support;
- if (Endianness == getHostEndianness())
+ if (Endianness == llvm::endianness::native)
return;
ValueProfRecord *VR = getFirstValueProfRecord(this);
for (uint32_t K = 0; K < NumValueKinds; K++) {
ValueProfRecord *NVR = getValueProfRecordNext(VR);
- VR->swapBytes(getHostEndianness(), Endianness);
+ VR->swapBytes(llvm::endianness::native, Endianness);
VR = NVR;
}
sys::swapByteOrder<uint32_t>(TotalSize);
@@ -1378,7 +1489,7 @@ static inline uint64_t read(const unsigned char *Buffer, size_t Offset) {
uint64_t Header::formatVersion() const {
using namespace support;
- return endian::byte_swap<uint64_t, little>(Version);
+ return endian::byte_swap<uint64_t, llvm::endianness::little>(Version);
}
Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
@@ -1390,7 +1501,8 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
H.Magic = read(Buffer, offsetOf(&Header::Magic));
// Check the magic number.
- uint64_t Magic = endian::byte_swap<uint64_t, little>(H.Magic);
+ uint64_t Magic =
+ endian::byte_swap<uint64_t, llvm::endianness::little>(H.Magic);
if (Magic != IndexedInstrProf::Magic)
return make_error<InstrProfError>(instrprof_error::bad_magic);
@@ -1404,9 +1516,11 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
// When a new field is added in the header add a case statement here to
// populate it.
static_assert(
- IndexedInstrProf::ProfVersion::CurrentVersion == Version10,
+ IndexedInstrProf::ProfVersion::CurrentVersion == Version11,
"Please update the reading code below if a new field has been added, "
"if not add a case statement to fall through to the latest version.");
+ case 11ull:
+ [[fallthrough]];
case 10ull:
H.TemporalProfTracesOffset =
read(Buffer, offsetOf(&Header::TemporalProfTracesOffset));
@@ -1430,10 +1544,12 @@ size_t Header::size() const {
// When a new field is added to the header add a case statement here to
// compute the size as offset of the new field + size of the new field. This
// relies on the field being added to the end of the list.
- static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version10,
+ static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version11,
"Please update the size computation below if a new field has "
"been added to the header, if not add a case statement to "
"fall through to the latest version.");
+ case 11ull:
+ [[fallthrough]];
case 10ull:
return offsetOf(&Header::TemporalProfTracesOffset) +
sizeof(Header::TemporalProfTracesOffset);
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfCorrelator.cpp
index c822d81f8bef..cf80a58f43bd 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfCorrelator.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -16,21 +16,36 @@
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Object/MachO.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/WithColor.h"
#include <optional>
#define DEBUG_TYPE "correlator"
using namespace llvm;
-/// Get the __llvm_prf_cnts section.
-Expected<object::SectionRef> getCountersSection(const object::ObjectFile &Obj) {
- for (auto &Section : Obj.sections())
+/// Get profile section.
+Expected<object::SectionRef> getInstrProfSection(const object::ObjectFile &Obj,
+ InstrProfSectKind IPSK) {
+ // On COFF, the getInstrProfSectionName returns the section names may followed
+ // by "$M". The linker removes the dollar and everything after it in the final
+ // binary. Do the same to match.
+ Triple::ObjectFormatType ObjFormat = Obj.getTripleObjectFormat();
+ auto StripSuffix = [ObjFormat](StringRef N) {
+ return ObjFormat == Triple::COFF ? N.split('$').first : N;
+ };
+ std::string ExpectedSectionName =
+ getInstrProfSectionName(IPSK, ObjFormat,
+ /*AddSegmentInfo=*/false);
+ ExpectedSectionName = StripSuffix(ExpectedSectionName);
+ for (auto &Section : Obj.sections()) {
if (auto SectionName = Section.getName())
- if (SectionName.get() == INSTR_PROF_CNTS_SECT_NAME)
+ if (*SectionName == ExpectedSectionName)
return Section;
+ }
return make_error<InstrProfError>(
instrprof_error::unable_to_correlate_profile,
- "could not find counter section (" INSTR_PROF_CNTS_SECT_NAME ")");
+ "could not find section (" + Twine(ExpectedSectionName) + ")");
}
const char *InstrProfCorrelator::FunctionNameAttributeName = "Function Name";
@@ -39,56 +54,95 @@ const char *InstrProfCorrelator::NumCountersAttributeName = "Num Counters";
llvm::Expected<std::unique_ptr<InstrProfCorrelator::Context>>
InstrProfCorrelator::Context::get(std::unique_ptr<MemoryBuffer> Buffer,
- const object::ObjectFile &Obj) {
- auto CountersSection = getCountersSection(Obj);
+ const object::ObjectFile &Obj,
+ ProfCorrelatorKind FileKind) {
+ auto C = std::make_unique<Context>();
+ auto CountersSection = getInstrProfSection(Obj, IPSK_cnts);
if (auto Err = CountersSection.takeError())
return std::move(Err);
- auto C = std::make_unique<Context>();
+ if (FileKind == InstrProfCorrelator::BINARY) {
+ auto DataSection = getInstrProfSection(Obj, IPSK_covdata);
+ if (auto Err = DataSection.takeError())
+ return std::move(Err);
+ auto DataOrErr = DataSection->getContents();
+ if (!DataOrErr)
+ return DataOrErr.takeError();
+ auto NameSection = getInstrProfSection(Obj, IPSK_covname);
+ if (auto Err = NameSection.takeError())
+ return std::move(Err);
+ auto NameOrErr = NameSection->getContents();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ C->DataStart = DataOrErr->data();
+ C->DataEnd = DataOrErr->data() + DataOrErr->size();
+ C->NameStart = NameOrErr->data();
+ C->NameSize = NameOrErr->size();
+ }
C->Buffer = std::move(Buffer);
C->CountersSectionStart = CountersSection->getAddress();
C->CountersSectionEnd = C->CountersSectionStart + CountersSection->getSize();
+ // In COFF object file, there's a null byte at the beginning of the counter
+ // section which doesn't exist in raw profile.
+ if (Obj.getTripleObjectFormat() == Triple::COFF)
+ ++C->CountersSectionStart;
+
C->ShouldSwapBytes = Obj.isLittleEndian() != sys::IsLittleEndianHost;
return Expected<std::unique_ptr<Context>>(std::move(C));
}
llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
-InstrProfCorrelator::get(StringRef DebugInfoFilename) {
- auto DsymObjectsOrErr =
- object::MachOObjectFile::findDsymObjectMembers(DebugInfoFilename);
- if (auto Err = DsymObjectsOrErr.takeError())
- return std::move(Err);
- if (!DsymObjectsOrErr->empty()) {
- // TODO: Enable profile correlation when there are multiple objects in a
- // dSYM bundle.
- if (DsymObjectsOrErr->size() > 1)
- return make_error<InstrProfError>(
- instrprof_error::unable_to_correlate_profile,
- "using multiple objects is not yet supported");
- DebugInfoFilename = *DsymObjectsOrErr->begin();
+InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind) {
+ if (FileKind == DEBUG_INFO) {
+ auto DsymObjectsOrErr =
+ object::MachOObjectFile::findDsymObjectMembers(Filename);
+ if (auto Err = DsymObjectsOrErr.takeError())
+ return std::move(Err);
+ if (!DsymObjectsOrErr->empty()) {
+ // TODO: Enable profile correlation when there are multiple objects in a
+ // dSYM bundle.
+ if (DsymObjectsOrErr->size() > 1)
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile,
+ "using multiple objects is not yet supported");
+ Filename = *DsymObjectsOrErr->begin();
+ }
+ auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename));
+ if (auto Err = BufferOrErr.takeError())
+ return std::move(Err);
+
+ return get(std::move(*BufferOrErr), FileKind);
}
- auto BufferOrErr =
- errorOrToExpected(MemoryBuffer::getFile(DebugInfoFilename));
- if (auto Err = BufferOrErr.takeError())
- return std::move(Err);
+ if (FileKind == BINARY) {
+ auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename));
+ if (auto Err = BufferOrErr.takeError())
+ return std::move(Err);
- return get(std::move(*BufferOrErr));
+ return get(std::move(*BufferOrErr), FileKind);
+ }
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile,
+ "unsupported correlation kind (only DWARF debug info and Binary format "
+ "(ELF/COFF) are supported)");
}
llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
-InstrProfCorrelator::get(std::unique_ptr<MemoryBuffer> Buffer) {
+InstrProfCorrelator::get(std::unique_ptr<MemoryBuffer> Buffer,
+ ProfCorrelatorKind FileKind) {
auto BinOrErr = object::createBinary(*Buffer);
if (auto Err = BinOrErr.takeError())
return std::move(Err);
if (auto *Obj = dyn_cast<object::ObjectFile>(BinOrErr->get())) {
- auto CtxOrErr = Context::get(std::move(Buffer), *Obj);
+ auto CtxOrErr = Context::get(std::move(Buffer), *Obj, FileKind);
if (auto Err = CtxOrErr.takeError())
return std::move(Err);
auto T = Obj->makeTriple();
if (T.isArch64Bit())
- return InstrProfCorrelatorImpl<uint64_t>::get(std::move(*CtxOrErr), *Obj);
+ return InstrProfCorrelatorImpl<uint64_t>::get(std::move(*CtxOrErr), *Obj,
+ FileKind);
if (T.isArch32Bit())
- return InstrProfCorrelatorImpl<uint32_t>::get(std::move(*CtxOrErr), *Obj);
+ return InstrProfCorrelatorImpl<uint32_t>::get(std::move(*CtxOrErr), *Obj,
+ FileKind);
}
return make_error<InstrProfError>(
instrprof_error::unable_to_correlate_profile, "not an object file");
@@ -130,29 +184,35 @@ template <class IntPtrT>
llvm::Expected<std::unique_ptr<InstrProfCorrelatorImpl<IntPtrT>>>
InstrProfCorrelatorImpl<IntPtrT>::get(
std::unique_ptr<InstrProfCorrelator::Context> Ctx,
- const object::ObjectFile &Obj) {
- if (Obj.isELF() || Obj.isMachO()) {
- auto DICtx = DWARFContext::create(Obj);
- return std::make_unique<DwarfInstrProfCorrelator<IntPtrT>>(std::move(DICtx),
- std::move(Ctx));
+ const object::ObjectFile &Obj, ProfCorrelatorKind FileKind) {
+ if (FileKind == DEBUG_INFO) {
+ if (Obj.isELF() || Obj.isMachO()) {
+ auto DICtx = DWARFContext::create(Obj);
+ return std::make_unique<DwarfInstrProfCorrelator<IntPtrT>>(
+ std::move(DICtx), std::move(Ctx));
+ }
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile,
+ "unsupported debug info format (only DWARF is supported)");
}
+ if (Obj.isELF() || Obj.isCOFF())
+ return std::make_unique<BinaryInstrProfCorrelator<IntPtrT>>(std::move(Ctx));
return make_error<InstrProfError>(
instrprof_error::unable_to_correlate_profile,
- "unsupported debug info format (only DWARF is supported)");
+ "unsupported binary format (only ELF and COFF are supported)");
}
template <class IntPtrT>
-Error InstrProfCorrelatorImpl<IntPtrT>::correlateProfileData() {
+Error InstrProfCorrelatorImpl<IntPtrT>::correlateProfileData(int MaxWarnings) {
assert(Data.empty() && Names.empty() && NamesVec.empty());
- correlateProfileDataImpl();
- if (Data.empty() || NamesVec.empty())
+ correlateProfileDataImpl(MaxWarnings);
+ if (this->Data.empty())
return make_error<InstrProfError>(
instrprof_error::unable_to_correlate_profile,
- "could not find any profile metadata in debug info");
- auto Result =
- collectPGOFuncNameStrings(NamesVec, /*doCompression=*/false, Names);
- CounterOffsets.clear();
- NamesVec.clear();
+ "could not find any profile data metadata in correlated file");
+ Error Result = correlateProfileNameImpl();
+ this->CounterOffsets.clear();
+ this->NamesVec.clear();
return Result;
}
@@ -180,40 +240,44 @@ template <> struct yaml::SequenceElementTraits<InstrProfCorrelator::Probe> {
};
template <class IntPtrT>
-Error InstrProfCorrelatorImpl<IntPtrT>::dumpYaml(raw_ostream &OS) {
+Error InstrProfCorrelatorImpl<IntPtrT>::dumpYaml(int MaxWarnings,
+ raw_ostream &OS) {
InstrProfCorrelator::CorrelationData Data;
- correlateProfileDataImpl(&Data);
+ correlateProfileDataImpl(MaxWarnings, &Data);
if (Data.Probes.empty())
return make_error<InstrProfError>(
instrprof_error::unable_to_correlate_profile,
- "could not find any profile metadata in debug info");
+ "could not find any profile data metadata in debug info");
yaml::Output YamlOS(OS);
YamlOS << Data;
return Error::success();
}
template <class IntPtrT>
-void InstrProfCorrelatorImpl<IntPtrT>::addProbe(StringRef FunctionName,
- uint64_t CFGHash,
- IntPtrT CounterOffset,
- IntPtrT FunctionPtr,
- uint32_t NumCounters) {
+void InstrProfCorrelatorImpl<IntPtrT>::addDataProbe(uint64_t NameRef,
+ uint64_t CFGHash,
+ IntPtrT CounterOffset,
+ IntPtrT FunctionPtr,
+ uint32_t NumCounters) {
// Check if a probe was already added for this counter offset.
if (!CounterOffsets.insert(CounterOffset).second)
return;
Data.push_back({
- maybeSwap<uint64_t>(IndexedInstrProf::ComputeHash(FunctionName)),
+ maybeSwap<uint64_t>(NameRef),
maybeSwap<uint64_t>(CFGHash),
// In this mode, CounterPtr actually stores the section relative address
// of the counter.
maybeSwap<IntPtrT>(CounterOffset),
+ // TODO: MC/DC is not yet supported.
+ /*BitmapOffset=*/maybeSwap<IntPtrT>(0),
maybeSwap<IntPtrT>(FunctionPtr),
// TODO: Value profiling is not yet supported.
/*ValuesPtr=*/maybeSwap<IntPtrT>(0),
maybeSwap<uint32_t>(NumCounters),
/*NumValueSites=*/{maybeSwap<uint16_t>(0), maybeSwap<uint16_t>(0)},
+ // TODO: MC/DC is not yet supported.
+ /*NumBitmapBytes=*/maybeSwap<uint32_t>(0),
});
- NamesVec.push_back(FunctionName.str());
}
template <class IntPtrT>
@@ -254,13 +318,16 @@ bool DwarfInstrProfCorrelator<IntPtrT>::isDIEOfProbe(const DWARFDie &Die) {
if (!Die.hasChildren())
return false;
if (const char *Name = Die.getName(DINameKind::ShortName))
- return StringRef(Name).startswith(getInstrProfCountersVarPrefix());
+ return StringRef(Name).starts_with(getInstrProfCountersVarPrefix());
return false;
}
template <class IntPtrT>
void DwarfInstrProfCorrelator<IntPtrT>::correlateProfileDataImpl(
- InstrProfCorrelator::CorrelationData *Data) {
+ int MaxWarnings, InstrProfCorrelator::CorrelationData *Data) {
+ bool UnlimitedWarnings = (MaxWarnings == 0);
+ // -N suppressed warnings means we can emit up to N (unsuppressed) warnings
+ int NumSuppressedWarnings = -MaxWarnings;
auto maybeAddProbe = [&](DWARFDie Die) {
if (!isDIEOfProbe(Die))
return;
@@ -297,30 +364,34 @@ void DwarfInstrProfCorrelator<IntPtrT>::correlateProfileDataImpl(
}
}
if (!FunctionName || !CFGHash || !CounterPtr || !NumCounters) {
- LLVM_DEBUG(dbgs() << "Incomplete DIE for probe\n\tFunctionName: "
- << FunctionName << "\n\tCFGHash: " << CFGHash
- << "\n\tCounterPtr: " << CounterPtr
- << "\n\tNumCounters: " << NumCounters);
- LLVM_DEBUG(Die.dump(dbgs()));
+ if (UnlimitedWarnings || ++NumSuppressedWarnings < 1) {
+ WithColor::warning()
+ << "Incomplete DIE for function " << FunctionName
+ << ": CFGHash=" << CFGHash << " CounterPtr=" << CounterPtr
+ << " NumCounters=" << NumCounters << "\n";
+ LLVM_DEBUG(Die.dump(dbgs()));
+ }
return;
}
uint64_t CountersStart = this->Ctx->CountersSectionStart;
uint64_t CountersEnd = this->Ctx->CountersSectionEnd;
if (*CounterPtr < CountersStart || *CounterPtr >= CountersEnd) {
- LLVM_DEBUG(
- dbgs() << "CounterPtr out of range for probe\n\tFunction Name: "
- << FunctionName << "\n\tExpected: [0x"
- << Twine::utohexstr(CountersStart) << ", 0x"
- << Twine::utohexstr(CountersEnd) << ")\n\tActual: 0x"
- << Twine::utohexstr(*CounterPtr));
- LLVM_DEBUG(Die.dump(dbgs()));
+ if (UnlimitedWarnings || ++NumSuppressedWarnings < 1) {
+ WithColor::warning()
+ << format("CounterPtr out of range for function %s: Actual=0x%x "
+ "Expected=[0x%x, 0x%x)\n",
+ *FunctionName, *CounterPtr, CountersStart, CountersEnd);
+ LLVM_DEBUG(Die.dump(dbgs()));
+ }
return;
}
- if (!FunctionPtr) {
- LLVM_DEBUG(dbgs() << "Could not find address of " << *FunctionName
- << "\n");
+ if (!FunctionPtr && (UnlimitedWarnings || ++NumSuppressedWarnings < 1)) {
+ WithColor::warning() << format("Could not find address of function %s\n",
+ *FunctionName);
LLVM_DEBUG(Die.dump(dbgs()));
}
+ // In debug info correlation mode, the CounterPtr is an absolute address of
+ // the counter, but it's expected to be relative later when iterating Data.
IntPtrT CounterOffset = *CounterPtr - CountersStart;
if (Data) {
InstrProfCorrelator::Probe P;
@@ -338,8 +409,9 @@ void DwarfInstrProfCorrelator<IntPtrT>::correlateProfileDataImpl(
P.LineNumber = LineNumber;
Data->Probes.push_back(P);
} else {
- this->addProbe(*FunctionName, *CFGHash, CounterOffset,
- FunctionPtr.value_or(0), *NumCounters);
+ this->addDataProbe(IndexedInstrProf::ComputeHash(*FunctionName), *CFGHash,
+ CounterOffset, FunctionPtr.value_or(0), *NumCounters);
+ this->NamesVec.push_back(*FunctionName);
}
};
for (auto &CU : DICtx->normal_units())
@@ -348,4 +420,64 @@ void DwarfInstrProfCorrelator<IntPtrT>::correlateProfileDataImpl(
for (auto &CU : DICtx->dwo_units())
for (const auto &Entry : CU->dies())
maybeAddProbe(DWARFDie(CU.get(), &Entry));
+
+ if (!UnlimitedWarnings && NumSuppressedWarnings > 0)
+ WithColor::warning() << format("Suppressed %d additional warnings\n",
+ NumSuppressedWarnings);
+}
+
+template <class IntPtrT>
+Error DwarfInstrProfCorrelator<IntPtrT>::correlateProfileNameImpl() {
+ if (this->NamesVec.empty()) {
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile,
+ "could not find any profile name metadata in debug info");
+ }
+ auto Result =
+ collectGlobalObjectNameStrings(this->NamesVec,
+ /*doCompression=*/false, this->Names);
+ return Result;
+}
+
+template <class IntPtrT>
+void BinaryInstrProfCorrelator<IntPtrT>::correlateProfileDataImpl(
+ int MaxWarnings, InstrProfCorrelator::CorrelationData *CorrelateData) {
+ using RawProfData = RawInstrProf::ProfileData<IntPtrT>;
+ bool UnlimitedWarnings = (MaxWarnings == 0);
+ // -N suppressed warnings means we can emit up to N (unsuppressed) warnings
+ int NumSuppressedWarnings = -MaxWarnings;
+
+ const RawProfData *DataStart = (const RawProfData *)this->Ctx->DataStart;
+ const RawProfData *DataEnd = (const RawProfData *)this->Ctx->DataEnd;
+ // We need to use < here because the last data record may have no padding.
+ for (const RawProfData *I = DataStart; I < DataEnd; ++I) {
+ uint64_t CounterPtr = this->template maybeSwap<IntPtrT>(I->CounterPtr);
+ uint64_t CountersStart = this->Ctx->CountersSectionStart;
+ uint64_t CountersEnd = this->Ctx->CountersSectionEnd;
+ if (CounterPtr < CountersStart || CounterPtr >= CountersEnd) {
+ if (UnlimitedWarnings || ++NumSuppressedWarnings < 1) {
+ WithColor::warning()
+ << format("CounterPtr out of range for function: Actual=0x%x "
+ "Expected=[0x%x, 0x%x) at data offset=0x%x\n",
+ CounterPtr, CountersStart, CountersEnd,
+ (I - DataStart) * sizeof(RawProfData));
+ }
+ }
+ // In binary correlation mode, the CounterPtr is an absolute address of the
+ // counter, but it's expected to be relative later when iterating Data.
+ IntPtrT CounterOffset = CounterPtr - CountersStart;
+ this->addDataProbe(I->NameRef, I->FuncHash, CounterOffset,
+ I->FunctionPointer, I->NumCounters);
+ }
+}
+
+template <class IntPtrT>
+Error BinaryInstrProfCorrelator<IntPtrT>::correlateProfileNameImpl() {
+ if (this->Ctx->NameSize == 0) {
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile,
+ "could not find any profile data metadata in object file");
+ }
+ this->Names.append(this->Ctx->NameStart, this->Ctx->NameSize);
+ return Error::success();
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
index 4160f7e6dfd5..068922d421f8 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -38,7 +38,7 @@
using namespace llvm;
-// Extracts the variant information from the top 8 bits in the version and
+// Extracts the variant information from the top 32 bits in the version and
// returns an enum specifying the variants present.
static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
InstrProfKind ProfileKind = InstrProfKind::Unknown;
@@ -92,7 +92,7 @@ readBinaryIdsInternal(const MemoryBuffer &DataBuffer,
const uint64_t BinaryIdsSize,
const uint8_t *BinaryIdsStart,
std::vector<llvm::object::BuildID> &BinaryIds,
- const llvm::support::endianness Endian) {
+ const llvm::endianness Endian) {
using namespace support;
if (BinaryIdsSize == 0)
@@ -112,10 +112,11 @@ readBinaryIdsInternal(const MemoryBuffer &DataBuffer,
"not enough data to read binary id length");
uint64_t BILen = 0;
- if (Endian == little)
- BILen = endian::readNext<uint64_t, little, unaligned>(BI);
+ if (Endian == llvm::endianness::little)
+ BILen =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(BI);
else
- BILen = endian::readNext<uint64_t, big, unaligned>(BI);
+ BILen = endian::readNext<uint64_t, llvm::endianness::big, unaligned>(BI);
if (BILen == 0)
return make_error<InstrProfError>(instrprof_error::malformed,
@@ -141,42 +142,33 @@ readBinaryIdsInternal(const MemoryBuffer &DataBuffer,
return Error::success();
}
-static Error printBinaryIdsInternal(raw_ostream &OS,
- const MemoryBuffer &DataBuffer,
- uint64_t BinaryIdsSize,
- const uint8_t *BinaryIdsStart,
- llvm::support::endianness Endian) {
- if (BinaryIdsSize == 0)
- return Error::success();
-
- std::vector<llvm::object::BuildID> BinaryIds;
- if (Error E = readBinaryIdsInternal(DataBuffer, BinaryIdsSize, BinaryIdsStart,
- BinaryIds, Endian))
- return E;
-
+static void
+printBinaryIdsInternal(raw_ostream &OS,
+ std::vector<llvm::object::BuildID> &BinaryIds) {
OS << "Binary IDs: \n";
for (auto BI : BinaryIds) {
for (uint64_t I = 0; I < BI.size(); I++)
OS << format("%02x", BI[I]);
OS << "\n";
}
-
- return Error::success();
}
Expected<std::unique_ptr<InstrProfReader>>
InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
- const InstrProfCorrelator *Correlator) {
+ const InstrProfCorrelator *Correlator,
+ std::function<void(Error)> Warn) {
// Set up the buffer to read.
auto BufferOrError = setupMemoryBuffer(Path, FS);
if (Error E = BufferOrError.takeError())
return std::move(E);
- return InstrProfReader::create(std::move(BufferOrError.get()), Correlator);
+ return InstrProfReader::create(std::move(BufferOrError.get()), Correlator,
+ Warn);
}
Expected<std::unique_ptr<InstrProfReader>>
InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
- const InstrProfCorrelator *Correlator) {
+ const InstrProfCorrelator *Correlator,
+ std::function<void(Error)> Warn) {
if (Buffer->getBufferSize() == 0)
return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
@@ -185,9 +177,9 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
if (IndexedInstrProfReader::hasFormat(*Buffer))
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
else if (RawInstrProfReader64::hasFormat(*Buffer))
- Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator));
+ Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator, Warn));
else if (RawInstrProfReader32::hasFormat(*Buffer))
- Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator));
+ Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator, Warn));
else if (TextInstrProfReader::hasFormat(*Buffer))
Result.reset(new TextInstrProfReader(std::move(Buffer)));
else
@@ -254,7 +246,7 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
Error TextInstrProfReader::readHeader() {
Symtab.reset(new InstrProfSymtab());
- while (Line->startswith(":")) {
+ while (Line->starts_with(":")) {
StringRef Str = Line->substr(1);
if (Str.equals_insensitive("ir"))
ProfileKind |= InstrProfKind::IRInstrumentation;
@@ -267,6 +259,8 @@ Error TextInstrProfReader::readHeader() {
ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
else if (Str.equals_insensitive("not_entry_first"))
ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
+ else if (Str.equals_insensitive("single_byte_coverage"))
+ ProfileKind |= InstrProfKind::SingleByteCoverage;
else if (Str.equals_insensitive("temporal_prof_traces")) {
ProfileKind |= InstrProfKind::TemporalProfile;
if (auto Err = readTemporalProfTraceData())
@@ -392,7 +386,7 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
// Skip empty lines and comments.
- while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
+ while (!Line.is_at_end() && (Line->empty() || Line->starts_with("#")))
++Line;
// If we hit EOF while looking for a name, we're done.
if (Line.is_at_end()) {
@@ -433,6 +427,29 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
Record.Counts.push_back(Count);
}
+ // Bitmap byte information is indicated with special character.
+ if (Line->starts_with("$")) {
+ Record.BitmapBytes.clear();
+ // Read the number of bitmap bytes.
+ uint64_t NumBitmapBytes;
+ if ((Line++)->drop_front(1).trim().getAsInteger(0, NumBitmapBytes))
+ return error(instrprof_error::malformed,
+ "number of bitmap bytes is not a valid integer");
+ if (NumBitmapBytes != 0) {
+ // Read each bitmap and fill our internal storage with the values.
+ Record.BitmapBytes.reserve(NumBitmapBytes);
+ for (uint8_t I = 0; I < NumBitmapBytes; ++I) {
+ if (Line.is_at_end())
+ return error(instrprof_error::truncated);
+ uint8_t BitmapByte;
+ if ((Line++)->getAsInteger(0, BitmapByte))
+ return error(instrprof_error::malformed,
+ "bitmap byte is not a valid integer");
+ Record.BitmapBytes.push_back(BitmapByte);
+ }
+ }
+ }
+
// Check if value profile data exists and read it if so.
if (Error E = readValueProfileData(Record))
return error(std::move(E));
@@ -471,7 +488,7 @@ bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
uint64_t Magic =
*reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
return RawInstrProf::getMagic<IntPtrT>() == Magic ||
- sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
+ llvm::byteswap(RawInstrProf::getMagic<IntPtrT>()) == Magic;
}
template <class IntPtrT>
@@ -539,21 +556,31 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
"\nPLEASE update this tool to version in the raw profile, or "
"regenerate raw profile with expected version.")
.str());
- if (useDebugInfoCorrelate() && !Correlator)
- return error(instrprof_error::missing_debug_info_for_correlation);
- if (!useDebugInfoCorrelate() && Correlator)
- return error(instrprof_error::unexpected_debug_info_for_correlation);
- BinaryIdsSize = swap(Header.BinaryIdsSize);
- if (BinaryIdsSize % sizeof(uint64_t))
+ uint64_t BinaryIdSize = swap(Header.BinaryIdsSize);
+ // Binary id start just after the header if exists.
+ const uint8_t *BinaryIdStart =
+ reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
+ const uint8_t *BinaryIdEnd = BinaryIdStart + BinaryIdSize;
+ const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
+ if (BinaryIdSize % sizeof(uint64_t) || BinaryIdEnd > BufferEnd)
return error(instrprof_error::bad_header);
+ if (BinaryIdSize != 0) {
+ if (Error Err =
+ readBinaryIdsInternal(*DataBuffer, BinaryIdSize, BinaryIdStart,
+ BinaryIds, getDataEndianness()))
+ return Err;
+ }
CountersDelta = swap(Header.CountersDelta);
+ BitmapDelta = swap(Header.BitmapDelta);
NamesDelta = swap(Header.NamesDelta);
- auto NumData = swap(Header.DataSize);
+ auto NumData = swap(Header.NumData);
auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
- auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize();
+ auto CountersSize = swap(Header.NumCounters) * getCounterTypeSize();
auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
+ auto NumBitmapBytes = swap(Header.NumBitmapBytes);
+ auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes);
auto NamesSize = swap(Header.NamesSize);
ValueKindLast = swap(Header.ValueKindLast);
@@ -561,10 +588,12 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
auto PaddingSize = getNumPaddingBytes(NamesSize);
// Profile data starts after profile header and binary ids if exist.
- ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize;
+ ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdSize;
ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters;
- ptrdiff_t NamesOffset =
+ ptrdiff_t BitmapOffset =
CountersOffset + CountersSize + PaddingBytesAfterCounters;
+ ptrdiff_t NamesOffset =
+ BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes;
ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
auto *Start = reinterpret_cast<const char *>(&Header);
@@ -574,8 +603,9 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
if (Correlator) {
// These sizes in the raw file are zero because we constructed them in the
// Correlator.
- assert(DataSize == 0 && NamesSize == 0);
- assert(CountersDelta == 0 && NamesDelta == 0);
+ if (!(DataSize == 0 && NamesSize == 0 && CountersDelta == 0 &&
+ NamesDelta == 0))
+ return error(instrprof_error::unexpected_correlation_info);
Data = Correlator->getDataPointer();
DataEnd = Data + Correlator->getDataSize();
NamesStart = Correlator->getNamesPointer();
@@ -588,17 +618,12 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
NamesEnd = NamesStart + NamesSize;
}
- // Binary ids start just after the header.
- BinaryIdsStart =
- reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
CountersStart = Start + CountersOffset;
CountersEnd = CountersStart + CountersSize;
+ BitmapStart = Start + BitmapOffset;
+ BitmapEnd = BitmapStart + NumBitmapBytes;
ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
- const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
- if (BinaryIdsStart + BinaryIdsSize > BufferEnd)
- return error(instrprof_error::bad_header);
-
std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
if (Error E = createSymtab(*NewSymtab))
return E;
@@ -675,8 +700,12 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
// A value of zero signifies the block is covered.
Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
} else {
- const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr);
- Record.Counts.push_back(swap(*CounterValue));
+ uint64_t CounterValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
+ if (CounterValue > MaxCounterValue && Warn)
+ Warn(make_error<InstrProfError>(
+ instrprof_error::counter_value_too_large, Twine(CounterValue)));
+
+ Record.Counts.push_back(CounterValue);
}
}
@@ -684,6 +713,49 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
}
template <class IntPtrT>
+Error RawInstrProfReader<IntPtrT>::readRawBitmapBytes(InstrProfRecord &Record) {
+ uint32_t NumBitmapBytes = swap(Data->NumBitmapBytes);
+
+ Record.BitmapBytes.clear();
+ Record.BitmapBytes.reserve(NumBitmapBytes);
+
+ // It's possible MCDC is either not enabled or only used for some functions
+ // and not others. So if we record 0 bytes, just move on.
+ if (NumBitmapBytes == 0)
+ return success();
+
+ // BitmapDelta decreases as we advance to the next data record.
+ ptrdiff_t BitmapOffset = swap(Data->BitmapPtr) - BitmapDelta;
+ if (BitmapOffset < 0)
+ return error(
+ instrprof_error::malformed,
+ ("bitmap offset " + Twine(BitmapOffset) + " is negative").str());
+
+ if (BitmapOffset >= BitmapEnd - BitmapStart)
+ return error(instrprof_error::malformed,
+ ("bitmap offset " + Twine(BitmapOffset) +
+ " is greater than the maximum bitmap offset " +
+ Twine(BitmapEnd - BitmapStart - 1))
+ .str());
+
+ uint64_t MaxNumBitmapBytes =
+ (BitmapEnd - (BitmapStart + BitmapOffset)) / sizeof(uint8_t);
+ if (NumBitmapBytes > MaxNumBitmapBytes)
+ return error(instrprof_error::malformed,
+ ("number of bitmap bytes " + Twine(NumBitmapBytes) +
+ " is greater than the maximum number of bitmap bytes " +
+ Twine(MaxNumBitmapBytes))
+ .str());
+
+ for (uint32_t I = 0; I < NumBitmapBytes; I++) {
+ const char *Ptr = BitmapStart + BitmapOffset + I;
+ Record.BitmapBytes.push_back(swap(*Ptr));
+ }
+
+ return success();
+}
+
+template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
InstrProfRecord &Record) {
Record.clearValueData();
@@ -733,6 +805,10 @@ Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record)
if (Error E = readRawCounts(Record))
return error(std::move(E));
+ // Read raw bitmap bytes and set Record.
+ if (Error E = readRawBitmapBytes(Record))
+ return error(std::move(E));
+
// Read value data and set Record.
if (Error E = readValueProfilingData(Record))
return error(std::move(E));
@@ -745,14 +821,16 @@ Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record)
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::readBinaryIds(
std::vector<llvm::object::BuildID> &BinaryIds) {
- return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart,
- BinaryIds, getDataEndianness());
+ BinaryIds.insert(BinaryIds.begin(), this->BinaryIds.begin(),
+ this->BinaryIds.end());
+ return Error::success();
}
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
- return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart,
- getDataEndianness());
+ if (!BinaryIds.empty())
+ printBinaryIdsInternal(OS, BinaryIds);
+ return Error::success();
}
namespace llvm {
@@ -794,13 +872,15 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
DataBuffer.clear();
std::vector<uint64_t> CounterBuffer;
+ std::vector<uint8_t> BitmapByteBuffer;
const unsigned char *End = D + N;
while (D < End) {
// Read hash.
if (D + sizeof(uint64_t) >= End)
return data_type();
- uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
+ uint64_t Hash =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D);
// Initialize number of counters for GET_VERSION(FormatVersion) == 1.
uint64_t CountsSize = N / sizeof(uint64_t) - 1;
@@ -808,7 +888,8 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
if (D + sizeof(uint64_t) > End)
return data_type();
- CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
+ CountsSize =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D);
}
// Read counter values.
if (D + CountsSize * sizeof(uint64_t) > End)
@@ -817,9 +898,29 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
CounterBuffer.clear();
CounterBuffer.reserve(CountsSize);
for (uint64_t J = 0; J < CountsSize; ++J)
- CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
+ CounterBuffer.push_back(
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D));
- DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
+ // Read bitmap bytes for GET_VERSION(FormatVersion) > 10.
+ if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) {
+ uint64_t BitmapBytes = 0;
+ if (D + sizeof(uint64_t) > End)
+ return data_type();
+ BitmapBytes =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D);
+ // Read bitmap byte values.
+ if (D + BitmapBytes * sizeof(uint8_t) > End)
+ return data_type();
+ BitmapByteBuffer.clear();
+ BitmapByteBuffer.reserve(BitmapBytes);
+ for (uint64_t J = 0; J < BitmapBytes; ++J)
+ BitmapByteBuffer.push_back(static_cast<uint8_t>(
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(
+ D)));
+ }
+
+ DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer),
+ std::move(BitmapByteBuffer));
// Read value profiling data.
if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
@@ -913,7 +1014,7 @@ public:
std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
while (true) {
Parts = Parts.second.split(':');
- if (Parts.first.startswith("_Z"))
+ if (Parts.first.starts_with("_Z"))
return Parts.first;
if (Parts.second.empty())
return Name;
@@ -1001,8 +1102,8 @@ bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
if (DataBuffer.getBufferSize() < 8)
return false;
- uint64_t Magic =
- endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
+ uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
+ DataBuffer.getBufferStart());
// Verify that it's magical.
return Magic == IndexedInstrProf::Magic;
}
@@ -1016,10 +1117,10 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
if (Version >= IndexedInstrProf::Version4) {
const IndexedInstrProf::Summary *SummaryInLE =
reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
- uint64_t NFields =
- endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields);
- uint64_t NEntries =
- endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries);
+ uint64_t NFields = endian::byte_swap<uint64_t, llvm::endianness::little>(
+ SummaryInLE->NumSummaryFields);
+ uint64_t NEntries = endian::byte_swap<uint64_t, llvm::endianness::little>(
+ SummaryInLE->NumCutoffEntries);
uint32_t SummarySize =
IndexedInstrProf::Summary::getSize(NFields, NEntries);
std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
@@ -1028,7 +1129,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
- Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
+ Dst[I] = endian::byte_swap<uint64_t, llvm::endianness::little>(Src[I]);
SummaryEntryVector DetailedSummary;
for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
@@ -1085,11 +1186,12 @@ Error IndexedInstrProfReader::readHeader() {
/* UseCS */ true);
// Read the hash type and start offset.
IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
- endian::byte_swap<uint64_t, little>(Header->HashType));
+ endian::byte_swap<uint64_t, llvm::endianness::little>(Header->HashType));
if (HashType > IndexedInstrProf::HashT::Last)
return error(instrprof_error::unsupported_hash_type);
- uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
+ uint64_t HashOffset =
+ endian::byte_swap<uint64_t, llvm::endianness::little>(Header->HashOffset);
// The hash table with profile counts comes next.
auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
@@ -1100,19 +1202,23 @@ Error IndexedInstrProfReader::readHeader() {
if (GET_VERSION(Header->formatVersion()) >= 8 &&
Header->formatVersion() & VARIANT_MASK_MEMPROF) {
uint64_t MemProfOffset =
- endian::byte_swap<uint64_t, little>(Header->MemProfOffset);
+ endian::byte_swap<uint64_t, llvm::endianness::little>(
+ Header->MemProfOffset);
const unsigned char *Ptr = Start + MemProfOffset;
// The value returned from RecordTableGenerator.Emit.
const uint64_t RecordTableOffset =
- support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
// The offset in the stream right before invoking
// FrameTableGenerator.Emit.
const uint64_t FramePayloadOffset =
- support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
// The value returned from FrameTableGenerator.Emit.
const uint64_t FrameTableOffset =
- support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
@@ -1137,10 +1243,13 @@ Error IndexedInstrProfReader::readHeader() {
// is higher than 9 (when it was introduced).
if (GET_VERSION(Header->formatVersion()) >= 9) {
uint64_t BinaryIdOffset =
- endian::byte_swap<uint64_t, little>(Header->BinaryIdOffset);
+ endian::byte_swap<uint64_t, llvm::endianness::little>(
+ Header->BinaryIdOffset);
const unsigned char *Ptr = Start + BinaryIdOffset;
// Read binary ids size.
- BinaryIdsSize = support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ BinaryIdsSize =
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
if (BinaryIdsSize % sizeof(uint64_t))
return error(instrprof_error::bad_header);
// Set the binary ids start.
@@ -1153,31 +1262,37 @@ Error IndexedInstrProfReader::readHeader() {
if (GET_VERSION(Header->formatVersion()) >= 10 &&
Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) {
uint64_t TemporalProfTracesOffset =
- endian::byte_swap<uint64_t, little>(Header->TemporalProfTracesOffset);
+ endian::byte_swap<uint64_t, llvm::endianness::little>(
+ Header->TemporalProfTracesOffset);
const unsigned char *Ptr = Start + TemporalProfTracesOffset;
const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd();
// Expect at least two 64 bit fields: NumTraces, and TraceStreamSize
if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
return error(instrprof_error::truncated);
const uint64_t NumTraces =
- support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
TemporalProfTraceStreamSize =
- support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
for (unsigned i = 0; i < NumTraces; i++) {
// Expect at least two 64 bit fields: Weight and NumFunctions
if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
return error(instrprof_error::truncated);
TemporalProfTraceTy Trace;
Trace.Weight =
- support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
const uint64_t NumFunctions =
- support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
// Expect at least NumFunctions 64 bit fields
if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd)
return error(instrprof_error::truncated);
for (unsigned j = 0; j < NumFunctions; j++) {
const uint64_t NameRef =
- support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
Trace.FunctionNameRefs.push_back(NameRef);
}
TemporalProfTraces.push_back(std::move(Trace));
@@ -1214,12 +1329,25 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
}
Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
- StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) {
+ StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName,
+ uint64_t *MismatchedFuncSum) {
ArrayRef<NamedInstrProfRecord> Data;
uint64_t FuncSum = 0;
- Error Err = Remapper->getRecords(FuncName, Data);
- if (Err)
- return std::move(Err);
+ auto Err = Remapper->getRecords(FuncName, Data);
+ if (Err) {
+ // If we don't find FuncName, try DeprecatedFuncName to handle profiles
+ // built by older compilers.
+ auto Err2 =
+ handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error {
+ if (IE.get() != instrprof_error::unknown_function)
+ return make_error<InstrProfError>(IE);
+ if (auto Err = Remapper->getRecords(DeprecatedFuncName, Data))
+ return Err;
+ return Error::success();
+ });
+ if (Err2)
+ return std::move(Err2);
+ }
// Found it. Look for counters with the right hash.
// A flag to indicate if the records are from the same type
@@ -1306,6 +1434,16 @@ Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
return success();
}
+Error IndexedInstrProfReader::getFunctionBitmapBytes(
+ StringRef FuncName, uint64_t FuncHash, std::vector<uint8_t> &BitmapBytes) {
+ Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
+ if (Error E = Record.takeError())
+ return error(std::move(E));
+
+ BitmapBytes = Record.get().BitmapBytes;
+ return success();
+}
+
Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
ArrayRef<NamedInstrProfRecord> Data;
@@ -1324,12 +1462,15 @@ Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
Error IndexedInstrProfReader::readBinaryIds(
std::vector<llvm::object::BuildID> &BinaryIds) {
return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart,
- BinaryIds, llvm::support::little);
+ BinaryIds, llvm::endianness::little);
}
Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) {
- return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart,
- llvm::support::little);
+ std::vector<llvm::object::BuildID> BinaryIds;
+ if (Error E = readBinaryIds(BinaryIds))
+ return E;
+ printBinaryIdsInternal(OS, BinaryIds);
+ return Error::success();
}
void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp
index b74d5c3862d8..d65f8fe50313 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -49,9 +49,9 @@ namespace llvm {
class ProfOStream {
public:
ProfOStream(raw_fd_ostream &FD)
- : IsFDOStream(true), OS(FD), LE(FD, support::little) {}
+ : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
ProfOStream(raw_string_ostream &STR)
- : IsFDOStream(false), OS(STR), LE(STR, support::little) {}
+ : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
uint64_t tell() { return OS.tell(); }
void write(uint64_t V) { LE.write<uint64_t>(V); }
@@ -80,7 +80,8 @@ public:
std::string &Data = SOStream.str(); // with flush
for (int K = 0; K < NItems; K++) {
for (int I = 0; I < P[K].N; I++) {
- uint64_t Bytes = endian::byte_swap<uint64_t, little>(P[K].D[I]);
+ uint64_t Bytes =
+ endian::byte_swap<uint64_t, llvm::endianness::little>(P[K].D[I]);
Data.replace(P[K].Pos + I * sizeof(uint64_t), sizeof(uint64_t),
(const char *)&Bytes, sizeof(uint64_t));
}
@@ -106,7 +107,7 @@ public:
using hash_value_type = uint64_t;
using offset_type = uint64_t;
- support::endianness ValueProfDataEndianness = support::little;
+ llvm::endianness ValueProfDataEndianness = llvm::endianness::little;
InstrProfSummaryBuilder *SummaryBuilder;
InstrProfSummaryBuilder *CSSummaryBuilder;
@@ -120,7 +121,7 @@ public:
EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
using namespace support;
- endian::Writer LE(Out, little);
+ endian::Writer LE(Out, llvm::endianness::little);
offset_type N = K.size();
LE.write<offset_type>(N);
@@ -131,6 +132,8 @@ public:
M += sizeof(uint64_t); // The function hash
M += sizeof(uint64_t); // The size of the Counts vector
M += ProfRecord.Counts.size() * sizeof(uint64_t);
+ M += sizeof(uint64_t); // The size of the Bitmap vector
+ M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t);
// Value data
M += ValueProfData::getSize(ProfileData.second);
@@ -147,7 +150,7 @@ public:
void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) {
using namespace support;
- endian::Writer LE(Out, little);
+ endian::Writer LE(Out, llvm::endianness::little);
for (const auto &ProfileData : *V) {
const InstrProfRecord &ProfRecord = ProfileData.second;
if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first))
@@ -160,6 +163,10 @@ public:
for (uint64_t I : ProfRecord.Counts)
LE.write<uint64_t>(I);
+ LE.write<uint64_t>(ProfRecord.BitmapBytes.size());
+ for (uint64_t I : ProfRecord.BitmapBytes)
+ LE.write<uint64_t>(I);
+
// Write value data
std::unique_ptr<ValueProfData> VDataPtr =
ValueProfData::serializeFrom(ProfileData.second);
@@ -182,8 +189,7 @@ InstrProfWriter::InstrProfWriter(bool Sparse,
InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
// Internal interface for testing purpose only.
-void InstrProfWriter::setValueProfDataEndianness(
- support::endianness Endianness) {
+void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {
InfoObj->ValueProfDataEndianness = Endianness;
}
@@ -380,6 +386,8 @@ bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
const InstrProfRecord &IPR = Func.second;
if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; }))
return true;
+ if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; }))
+ return true;
}
return false;
}
@@ -528,7 +536,12 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// Insert the key (func hash) and value (memprof record).
RecordTableGenerator.insert(I.first, I.second);
}
+ // Release the memory of this MapVector as it is no longer needed.
+ MemProfRecordData.clear();
+ // The call to Emit invokes RecordWriterTrait::EmitData which destructs
+ // the memprof record copies owned by the RecordTableGenerator. This works
+ // because the RecordTableGenerator is not used after this point.
uint64_t RecordTableOffset =
RecordTableGenerator.Emit(OS.OS, *RecordWriter);
@@ -541,6 +554,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// Insert the key (frame id) and value (frame contents).
FrameTableGenerator.insert(I.first, I.second);
}
+ // Release the memory of this MapVector as it is no longer needed.
+ MemProfFrameData.clear();
uint64_t FrameTableOffset = FrameTableGenerator.Emit(OS.OS, *FrameWriter);
@@ -703,6 +718,17 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
for (uint64_t Count : Func.Counts)
OS << Count << "\n";
+ if (Func.BitmapBytes.size() > 0) {
+ OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n";
+ OS << "# Bitmap Byte Values:\n";
+ for (uint8_t Byte : Func.BitmapBytes) {
+ OS << "0x";
+ OS.write_hex(Byte);
+ OS << "\n";
+ }
+ OS << "\n";
+ }
+
uint32_t NumValueKinds = Func.getNumValueKinds();
if (!NumValueKinds) {
OS << "\n";
@@ -722,7 +748,7 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
for (uint32_t I = 0; I < ND; I++) {
if (VK == IPVK_IndirectCallTarget)
- OS << Symtab.getFuncNameOrExternalSymbol(VD[I].Value) << ":"
+ OS << Symtab.getFuncOrVarNameIfDefined(VD[I].Value) << ":"
<< VD[I].Count << "\n";
else
OS << VD[I].Value << ":" << VD[I].Count << "\n";
@@ -743,6 +769,8 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
if (static_cast<bool>(ProfileKind &
InstrProfKind::FunctionEntryInstrumentation))
OS << "# Always instrument the function entry block\n:entry_first\n";
+ if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
+ OS << "# Instrument block coverage\n:single_byte_coverage\n";
InstrProfSymtab Symtab;
using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>;
@@ -790,7 +818,7 @@ void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS,
for (auto &Trace : TemporalProfTraces) {
OS << "# Weight:\n" << Trace.Weight << "\n";
for (auto &NameRef : Trace.FunctionNameRefs)
- OS << Symtab.getFuncName(NameRef) << ",";
+ OS << Symtab.getFuncOrVarName(NameRef) << ",";
OS << "\n";
}
OS << "\n";
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/ItaniumManglingCanonicalizer.cpp b/contrib/llvm-project/llvm/lib/ProfileData/ItaniumManglingCanonicalizer.cpp
index afbb09ed35fc..6271b1622693 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/ItaniumManglingCanonicalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/ItaniumManglingCanonicalizer.cpp
@@ -149,7 +149,7 @@ class CanonicalizerAllocator : public FoldingNodeAllocator {
// Node is pre-existing; check if it's in our remapping table.
if (auto *N = Remappings.lookup(Result.first)) {
Result.first = N;
- assert(Remappings.find(Result.first) == Remappings.end() &&
+ assert(!Remappings.contains(Result.first) &&
"should never need multiple remap steps");
}
if (Result.first == TrackedNode)
@@ -225,7 +225,7 @@ ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First,
// arguments. This mostly just falls out, as almost all template names
// are valid as <name>s, but we also want to parse <substitution>s as
// <name>s, even though they're not.
- else if (Str.startswith("S"))
+ else if (Str.starts_with("S"))
// Parse the substitution and optional following template arguments.
N = P->Demangler.parseType();
else
@@ -289,8 +289,8 @@ parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling,
// encoding 6memcpy 7memmove
// consistent with how they are encoded as local-names inside a C++ mangling.
Node *N;
- if (Mangling.startswith("_Z") || Mangling.startswith("__Z") ||
- Mangling.startswith("___Z") || Mangling.startswith("____Z"))
+ if (Mangling.starts_with("_Z") || Mangling.starts_with("__Z") ||
+ Mangling.starts_with("___Z") || Mangling.starts_with("____Z"))
N = Demangler.parse();
else
N = Demangler.make<itanium_demangle::NameType>(
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/MemProf.cpp b/contrib/llvm-project/llvm/lib/ProfileData/MemProf.cpp
index 3d44cf0b4c37..0461f0e9f840 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/MemProf.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/MemProf.cpp
@@ -2,6 +2,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Function.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
@@ -12,7 +13,7 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
raw_ostream &OS) {
using namespace support;
- endian::Writer LE(OS, little);
+ endian::Writer LE(OS, llvm::endianness::little);
LE.write<uint64_t>(AllocSites.size());
for (const IndexedAllocationInfo &N : AllocSites) {
@@ -39,13 +40,15 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
IndexedMemProfRecord Record;
// Read the meminfo nodes.
- const uint64_t NumNodes = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ const uint64_t NumNodes =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
for (uint64_t I = 0; I < NumNodes; I++) {
IndexedAllocationInfo Node;
const uint64_t NumFrames =
- endian::readNext<uint64_t, little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
for (uint64_t J = 0; J < NumFrames; J++) {
- const FrameId Id = endian::readNext<FrameId, little, unaligned>(Ptr);
+ const FrameId Id =
+ endian::readNext<FrameId, llvm::endianness::little, unaligned>(Ptr);
Node.CallStack.push_back(Id);
}
Node.Info.deserialize(Schema, Ptr);
@@ -54,14 +57,16 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
}
// Read the callsite information.
- const uint64_t NumCtxs = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ const uint64_t NumCtxs =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
for (uint64_t J = 0; J < NumCtxs; J++) {
const uint64_t NumFrames =
- endian::readNext<uint64_t, little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
llvm::SmallVector<FrameId> Frames;
Frames.reserve(NumFrames);
for (uint64_t K = 0; K < NumFrames; K++) {
- const FrameId Id = endian::readNext<FrameId, little, unaligned>(Ptr);
+ const FrameId Id =
+ endian::readNext<FrameId, llvm::endianness::little, unaligned>(Ptr);
Frames.push_back(Id);
}
Record.CallSites.push_back(Frames);
@@ -71,14 +76,19 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
}
GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
- const auto Pos = FunctionName.find(".llvm.");
+ // Canonicalize the function name to drop suffixes such as ".llvm.". Note
+ // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
+ // those by default. This is by design to differentiate internal linkage
+ // functions during matching. By dropping the other suffixes we can then match
+ // functions in the profile use phase prior to their addition. Note that this
+ // applies to both instrumented and sampled function names.
+ StringRef CanonicalName =
+ sampleprof::FunctionSamples::getCanonicalFnName(FunctionName);
// We use the function guid which we expect to be a uint64_t. At
- // this time, it is the lower 64 bits of the md5 of the function
- // name. Any suffix with .llvm. is trimmed since these are added by
- // thinLTO global promotion. At the time the profile is consumed,
- // these suffixes will not be present.
- return Function::getGUID(FunctionName.take_front(Pos));
+ // this time, it is the lower 64 bits of the md5 of the canonical
+ // function name.
+ return Function::getGUID(CanonicalName);
}
Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
@@ -86,7 +96,7 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
const unsigned char *Ptr = Buffer;
const uint64_t NumSchemaIds =
- endian::readNext<uint64_t, little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) {
return make_error<InstrProfError>(instrprof_error::malformed,
"memprof schema invalid");
@@ -94,7 +104,8 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
MemProfSchema Result;
for (size_t I = 0; I < NumSchemaIds; I++) {
- const uint64_t Tag = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ const uint64_t Tag =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
if (Tag >= static_cast<uint64_t>(Meta::Size)) {
return make_error<InstrProfError>(instrprof_error::malformed,
"memprof schema invalid");
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index 8e07478fb083..3a45113b0a2e 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -204,9 +204,7 @@ SampleProfileSummaryBuilder::computeSummaryForProfiles(
// profiles before computing profile summary.
if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
!UseContextLessSummary.getNumOccurrences())) {
- for (const auto &I : Profiles) {
- ContextLessProfiles[I.second.getName()].merge(I.second);
- }
+ ProfileConverter::flattenProfile(Profiles, ContextLessProfiles, true);
ProfilesToUse = &ContextLessProfiles;
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp
index bccb205fb243..af2db8d61179 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -34,6 +33,7 @@
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/ProfileData/RawMemProfReader.h"
+#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
@@ -86,7 +86,7 @@ llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
using namespace support;
const uint64_t NumItemsToRead =
- endian::readNext<uint64_t, little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
llvm::SmallVector<SegmentEntry> Items;
for (uint64_t I = 0; I < NumItemsToRead; I++) {
Items.push_back(*reinterpret_cast<const SegmentEntry *>(
@@ -100,10 +100,11 @@ readMemInfoBlocks(const char *Ptr) {
using namespace support;
const uint64_t NumItemsToRead =
- endian::readNext<uint64_t, little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
for (uint64_t I = 0; I < NumItemsToRead; I++) {
- const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ const uint64_t Id =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
Items.push_back({Id, MIB});
// Only increment by size of MIB since readNext implicitly increments.
@@ -116,16 +117,19 @@ CallStackMap readStackInfo(const char *Ptr) {
using namespace support;
const uint64_t NumItemsToRead =
- endian::readNext<uint64_t, little, unaligned>(Ptr);
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
CallStackMap Items;
for (uint64_t I = 0; I < NumItemsToRead; I++) {
- const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
- const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ const uint64_t StackId =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ const uint64_t NumPCs =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
SmallVector<uint64_t> CallStack;
for (uint64_t J = 0; J < NumPCs; J++) {
- CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
+ CallStack.push_back(
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr));
}
Items[StackId] = CallStack;
@@ -507,12 +511,16 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() {
const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
// Only the last entry is not an inlined location.
I != NumFrames - 1);
- // Here we retain a mapping from the GUID to symbol name instead of
- // adding it to the frame object directly to reduce memory overhead.
- // This is because there can be many unique frames, particularly for
- // callsite frames.
- if (KeepSymbolName)
- GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
+ // Here we retain a mapping from the GUID to canonical symbol name
+ // instead of adding it to the frame object directly to reduce memory
+ // overhead. This is because there can be many unique frames,
+ // particularly for callsite frames.
+ if (KeepSymbolName) {
+ StringRef CanonicalName =
+ sampleprof::FunctionSamples::getCanonicalFnName(
+ DIFrame.FunctionName);
+ GuidToSymbolName.insert({Guid, CanonicalName.str()});
+ }
const FrameId Hash = F.hash();
IdToFrame.insert({Hash, F});
@@ -640,13 +648,12 @@ RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
return object::SectionedAddress{VirtualAddress};
}
-Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
- if (FunctionProfileData.empty())
- return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
-
- if (Iter == FunctionProfileData.end())
- return make_error<InstrProfError>(instrprof_error::eof);
-
+Error RawMemProfReader::readNextRecord(
+ GuidMemProfRecordPair &GuidRecord,
+ std::function<const Frame(const FrameId)> Callback) {
+ // Create a new callback for the RawMemProfRecord iterator so that we can
+ // provide the symbol name if the reader was initialized with KeepSymbolName =
+ // true. This is useful for debugging and testing.
auto IdToFrameCallback = [this](const FrameId Id) {
Frame F = this->idToFrame(Id);
if (!this->KeepSymbolName)
@@ -656,11 +663,7 @@ Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
F.SymbolName = Iter->getSecond();
return F;
};
-
- const IndexedMemProfRecord &IndexedRecord = Iter->second;
- GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
- Iter++;
- return Error::success();
+ return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback);
}
} // namespace memprof
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp
index fdae8a011e71..59fa71899ed4 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp
@@ -121,7 +121,7 @@ sampleprof_error SampleRecord::merge(const SampleRecord &Other,
sampleprof_error Result;
Result = addSamples(Other.getSamples(), Weight);
for (const auto &I : Other.getCallTargets()) {
- MergeResult(Result, addCalledTarget(I.first(), I.second, Weight));
+ MergeResult(Result, addCalledTarget(I.first, I.second, Weight));
}
return Result;
}
@@ -181,7 +181,8 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
for (const auto &CS : SortedCallsiteSamples.get()) {
for (const auto &FS : CS->second) {
OS.indent(Indent + 2);
- OS << CS->first << ": inlined callee: " << FS.second.getName() << ": ";
+ OS << CS->first << ": inlined callee: " << FS.second.getFunction()
+ << ": ";
FS.second.print(OS, Indent + 4);
}
}
@@ -202,13 +203,12 @@ void sampleprof::sortFuncProfiles(
const SampleProfileMap &ProfileMap,
std::vector<NameFunctionSamples> &SortedProfiles) {
for (const auto &I : ProfileMap) {
- assert(I.first == I.second.getContext() && "Inconsistent profile map");
- SortedProfiles.push_back(std::make_pair(I.second.getContext(), &I.second));
+ SortedProfiles.push_back(std::make_pair(I.first, &I.second));
}
llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A,
const NameFunctionSamples &B) {
if (A.second->getTotalSamples() == B.second->getTotalSamples())
- return A.first < B.first;
+ return A.second->getContext() < B.second->getContext();
return A.second->getTotalSamples() > B.second->getTotalSamples();
});
}
@@ -235,14 +235,6 @@ LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL,
}
}
-uint64_t FunctionSamples::getCallSiteHash(StringRef CalleeName,
- const LineLocation &Callsite) {
- uint64_t NameHash = std::hash<std::string>{}(CalleeName.str());
- uint64_t LocId =
- (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator;
- return NameHash + (LocId << 5) + LocId;
-}
-
const FunctionSamples *FunctionSamples::findFunctionSamples(
const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper) const {
assert(DIL);
@@ -269,11 +261,11 @@ const FunctionSamples *FunctionSamples::findFunctionSamples(
return FS;
}
-void FunctionSamples::findAllNames(DenseSet<StringRef> &NameSet) const {
- NameSet.insert(getName());
+void FunctionSamples::findAllNames(DenseSet<FunctionId> &NameSet) const {
+ NameSet.insert(getFunction());
for (const auto &BS : BodySamples)
for (const auto &TS : BS.second.getCallTargets())
- NameSet.insert(TS.getKey());
+ NameSet.insert(TS.first);
for (const auto &CS : CallsiteSamples) {
for (const auto &NameFS : CS.second) {
@@ -288,18 +280,15 @@ const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
SampleProfileReaderItaniumRemapper *Remapper) const {
CalleeName = getCanonicalFnName(CalleeName);
- std::string CalleeGUID;
- CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID);
-
auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
if (iter == CallsiteSamples.end())
return nullptr;
- auto FS = iter->second.find(CalleeName);
+ auto FS = iter->second.find(getRepInFormat(CalleeName));
if (FS != iter->second.end())
return &FS->second;
if (Remapper) {
if (auto NameInProfile = Remapper->lookUpNameInProfile(CalleeName)) {
- auto FS = iter->second.find(*NameInProfile);
+ auto FS = iter->second.find(getRepInFormat(*NameInProfile));
if (FS != iter->second.end())
return &FS->second;
}
@@ -357,13 +346,13 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
// Filter the cold profiles from ProfileMap and move them into a tmp
// container
- std::vector<std::pair<SampleContext, const FunctionSamples *>> ColdProfiles;
+ std::vector<std::pair<hash_code, const FunctionSamples *>> ColdProfiles;
for (const auto &I : ProfileMap) {
- const SampleContext &Context = I.first;
+ const SampleContext &Context = I.second.getContext();
const FunctionSamples &FunctionProfile = I.second;
if (FunctionProfile.getTotalSamples() < ColdCountThreshold &&
(!TrimBaseProfileOnly || Context.isBaseContext()))
- ColdProfiles.emplace_back(Context, &I.second);
+ ColdProfiles.emplace_back(I.first, &I.second);
}
// Remove the cold profile from ProfileMap and merge them into
@@ -374,8 +363,8 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
auto MergedContext = I.second->getContext().getContextFrames();
if (ColdContextFrameLength < MergedContext.size())
MergedContext = MergedContext.take_back(ColdContextFrameLength);
- auto Ret = MergedProfileMap.emplace(MergedContext, FunctionSamples());
- FunctionSamples &MergedProfile = Ret.first->second;
+ // Need to set MergedProfile's context here otherwise it will be lost.
+ FunctionSamples &MergedProfile = MergedProfileMap.Create(MergedContext);
MergedProfile.merge(*I.second);
}
ProfileMap.erase(I.first);
@@ -385,57 +374,17 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
for (const auto &I : MergedProfileMap) {
// Filter the cold merged profile
if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
- ProfileMap.find(I.first) == ProfileMap.end())
+ ProfileMap.find(I.second.getContext()) == ProfileMap.end())
continue;
// Merge the profile if the original profile exists, otherwise just insert
- // as a new profile
- auto Ret = ProfileMap.emplace(I.first, FunctionSamples());
- if (Ret.second) {
- SampleContext FContext(Ret.first->first, RawContext);
- FunctionSamples &FProfile = Ret.first->second;
- FProfile.setContext(FContext);
- }
+ // as a new profile. If inserted as a new profile from MergedProfileMap, it
+ // already has the right context.
+ auto Ret = ProfileMap.emplace(I.second.getContext(), FunctionSamples());
FunctionSamples &OrigProfile = Ret.first->second;
OrigProfile.merge(I.second);
}
}
-void SampleContextTrimmer::canonicalizeContextProfiles() {
- std::vector<SampleContext> ProfilesToBeRemoved;
- SampleProfileMap ProfilesToBeAdded;
- for (auto &I : ProfileMap) {
- FunctionSamples &FProfile = I.second;
- SampleContext &Context = FProfile.getContext();
- if (I.first == Context)
- continue;
-
- // Use the context string from FunctionSamples to update the keys of
- // ProfileMap. They can get out of sync after context profile promotion
- // through pre-inliner.
- // Duplicate the function profile for later insertion to avoid a conflict
- // caused by a context both to be add and to be removed. This could happen
- // when a context is promoted to another context which is also promoted to
- // the third context. For example, given an original context A @ B @ C that
- // is promoted to B @ C and the original context B @ C which is promoted to
- // just C, adding B @ C to the profile map while removing same context (but
- // with different profiles) from the map can cause a conflict if they are
- // not handled in a right order. This can be solved by just caching the
- // profiles to be added.
- auto Ret = ProfilesToBeAdded.emplace(Context, FProfile);
- (void)Ret;
- assert(Ret.second && "Context conflict during canonicalization");
- ProfilesToBeRemoved.push_back(I.first);
- }
-
- for (auto &I : ProfilesToBeRemoved) {
- ProfileMap.erase(I);
- }
-
- for (auto &I : ProfilesToBeAdded) {
- ProfileMap.emplace(I.first, I.second);
- }
-}
-
std::error_code ProfileSymbolList::write(raw_ostream &OS) {
// Sort the symbols before output. If doing compression.
// It will make the compression much more effective.
@@ -463,7 +412,7 @@ void ProfileSymbolList::dump(raw_ostream &OS) const {
ProfileConverter::FrameNode *
ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite,
- StringRef CalleeName) {
+ FunctionId CalleeName) {
uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
auto It = AllChildFrames.find(Hash);
if (It != AllChildFrames.end()) {
@@ -491,7 +440,7 @@ ProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
auto Node = &RootFrame;
LineLocation CallSiteLoc(0, 0);
for (auto &Callsite : Context.getContextFrames()) {
- Node = Node->getOrCreateChildFrame(CallSiteLoc, Callsite.FuncName);
+ Node = Node->getOrCreateChildFrame(CallSiteLoc, Callsite.Func);
CallSiteLoc = Callsite.Location;
}
return Node;
@@ -509,21 +458,23 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
if (!ChildProfile)
continue;
SampleContext OrigChildContext = ChildProfile->getContext();
+ uint64_t OrigChildContextHash = OrigChildContext.getHashCode();
// Reset the child context to be contextless.
- ChildProfile->getContext().setName(OrigChildContext.getName());
+ ChildProfile->getContext().setFunction(OrigChildContext.getFunction());
if (NodeProfile) {
// Add child profile to the callsite profile map.
auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
- SamplesMap.emplace(OrigChildContext.getName().str(), *ChildProfile);
+ SamplesMap.emplace(OrigChildContext.getFunction(), *ChildProfile);
NodeProfile->addTotalSamples(ChildProfile->getTotalSamples());
// Remove the corresponding body sample for the callsite and update the
// total weight.
auto Count = NodeProfile->removeCalledTargetAndBodySample(
ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator,
- OrigChildContext.getName());
+ OrigChildContext.getFunction());
NodeProfile->removeTotalSamples(Count);
}
+ uint64_t NewChildProfileHash = 0;
// Separate child profile to be a standalone profile, if the current parent
// profile doesn't exist. This is a duplicating operation when the child
// profile is already incorporated into the parent which is still useful and
@@ -532,15 +483,20 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
// profile in the prelink phase for to-be-fully-inlined functions.
if (!NodeProfile) {
ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
+ NewChildProfileHash = ChildProfile->getContext().getHashCode();
} else if (GenerateMergedBaseProfiles) {
ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
+ NewChildProfileHash = ChildProfile->getContext().getHashCode();
auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
- SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
+ SamplesMap[ChildProfile->getFunction()].getContext().setAttribute(
ContextDuplicatedIntoBase);
}
- // Remove the original child profile.
- ProfileMap.erase(OrigChildContext);
+ // Remove the original child profile. Check if MD5 of new child profile
+ // collides with old profile, in this case the [] operator already
+ // overwritten it without the need of erase.
+ if (NewChildProfileHash != OrigChildContextHash)
+ ProfileMap.erase(OrigChildContextHash);
}
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
index fbdd9a307321..98d0aa794529 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -61,9 +61,9 @@ static cl::opt<bool> ProfileIsFSDisciminator(
///
/// \param FContext Name + context of the function to print.
/// \param OS Stream to emit the output to.
-void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
+void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
raw_ostream &OS) {
- OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
+ OS << "Function: " << FS.getContext().toString() << ": " << FS;
}
/// Dump all the function profiles found on stream \p OS.
@@ -71,7 +71,7 @@ void SampleProfileReader::dump(raw_ostream &OS) {
std::vector<NameFunctionSamples> V;
sortFuncProfiles(Profiles, V);
for (const auto &I : V)
- dumpFunctionProfile(I.first, OS);
+ dumpFunctionProfile(*I.second, OS);
}
static void dumpFunctionProfileJson(const FunctionSamples &S,
@@ -91,7 +91,7 @@ static void dumpFunctionProfileJson(const FunctionSamples &S,
JOS.attributeArray("calls", [&] {
for (const auto &J : CallTargets) {
JOS.object([&] {
- JOS.attribute("function", J.first);
+ JOS.attribute("function", J.first.str());
JOS.attribute("samples", J.second);
});
}
@@ -117,7 +117,7 @@ static void dumpFunctionProfileJson(const FunctionSamples &S,
};
JOS.object([&] {
- JOS.attribute("name", S.getName());
+ JOS.attribute("name", S.getFunction().str());
JOS.attribute("total", S.getTotalSamples());
if (TopLevel)
JOS.attribute("head", S.getHeadSamples());
@@ -180,12 +180,12 @@ static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
/// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
uint32_t &Attributes) {
- if (Input.startswith("!CFGChecksum:")) {
+ if (Input.starts_with("!CFGChecksum:")) {
StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
return !CFGInfo.getAsInteger(10, FunctionHash);
}
- if (Input.startswith("!Attributes:")) {
+ if (Input.starts_with("!Attributes:")) {
StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
return !Attrib.getAsInteger(10, Attributes);
}
@@ -355,9 +355,7 @@ std::error_code SampleProfileReaderText::readImpl() {
SampleContext FContext(FName, CSNameTable);
if (FContext.hasContext())
++CSProfileCount;
- Profiles[FContext] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[FContext];
- FProfile.setContext(FContext);
+ FunctionSamples &FProfile = Profiles.Create(FContext);
MergeResult(Result, FProfile.addTotalSamples(NumSamples));
MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
InlineStack.clear();
@@ -394,8 +392,8 @@ std::error_code SampleProfileReaderText::readImpl() {
switch (LineTy) {
case LineType::CallSiteProfile: {
FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
- LineLocation(LineOffset, Discriminator))[std::string(FName)];
- FSamples.setName(FName);
+ LineLocation(LineOffset, Discriminator))[FunctionId(FName)];
+ FSamples.setFunction(FunctionId(FName));
MergeResult(Result, FSamples.addTotalSamples(NumSamples));
InlineStack.push_back(&FSamples);
DepthMetadata = 0;
@@ -408,7 +406,8 @@ std::error_code SampleProfileReaderText::readImpl() {
FunctionSamples &FProfile = *InlineStack.back();
for (const auto &name_count : TargetCountMap) {
MergeResult(Result, FProfile.addCalledTargetSamples(
- LineOffset, Discriminator, name_count.first,
+ LineOffset, Discriminator,
+ FunctionId(name_count.first),
name_count.second));
}
MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
@@ -467,17 +466,14 @@ bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
unsigned NumBytesRead = 0;
- std::error_code EC;
uint64_t Val = decodeULEB128(Data, &NumBytesRead);
- if (Val > std::numeric_limits<T>::max())
- EC = sampleprof_error::malformed;
- else if (Data + NumBytesRead > End)
- EC = sampleprof_error::truncated;
- else
- EC = sampleprof_error::success;
-
- if (EC) {
+ if (Val > std::numeric_limits<T>::max()) {
+ std::error_code EC = sampleprof_error::malformed;
+ reportError(0, EC.message());
+ return EC;
+ } else if (Data + NumBytesRead > End) {
+ std::error_code EC = sampleprof_error::truncated;
reportError(0, EC.message());
return EC;
}
@@ -487,10 +483,9 @@ template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
}
ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
- std::error_code EC;
StringRef Str(reinterpret_cast<const char *>(Data));
if (Data + Str.size() + 1 > End) {
- EC = sampleprof_error::truncated;
+ std::error_code EC = sampleprof_error::truncated;
reportError(0, EC.message());
return EC;
}
@@ -501,22 +496,19 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
template <typename T>
ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
- std::error_code EC;
-
if (Data + sizeof(T) > End) {
- EC = sampleprof_error::truncated;
+ std::error_code EC = sampleprof_error::truncated;
reportError(0, EC.message());
return EC;
}
using namespace support;
- T Val = endian::readNext<T, little, unaligned>(Data);
+ T Val = endian::readNext<T, llvm::endianness::little, unaligned>(Data);
return Val;
}
template <typename T>
inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
- std::error_code EC;
auto Idx = readNumber<size_t>();
if (std::error_code EC = Idx.getError())
return EC;
@@ -525,48 +517,55 @@ inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
return *Idx;
}
-ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
+ErrorOr<FunctionId>
+SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
auto Idx = readStringIndex(NameTable);
if (std::error_code EC = Idx.getError())
return EC;
-
- // Lazy loading, if the string has not been materialized from memory storing
- // MD5 values, then it is default initialized with the null pointer. This can
- // only happen when using fixed length MD5, that bounds check is performed
- // while parsing the name table to ensure MD5NameMemStart points to an array
- // with enough MD5 entries.
- StringRef &SR = NameTable[*Idx];
- if (!SR.data()) {
- assert(MD5NameMemStart);
- using namespace support;
- uint64_t FID = endian::read<uint64_t, little, unaligned>(
- MD5NameMemStart + (*Idx) * sizeof(uint64_t));
- SR = MD5StringBuf.emplace_back(std::to_string(FID));
- }
- return SR;
+ if (RetIdx)
+ *RetIdx = *Idx;
+ return NameTable[*Idx];
}
-ErrorOr<SampleContextFrames> SampleProfileReaderBinary::readContextFromTable() {
+ErrorOr<SampleContextFrames>
+SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) {
auto ContextIdx = readNumber<size_t>();
if (std::error_code EC = ContextIdx.getError())
return EC;
if (*ContextIdx >= CSNameTable.size())
return sampleprof_error::truncated_name_table;
+ if (RetIdx)
+ *RetIdx = *ContextIdx;
return CSNameTable[*ContextIdx];
}
-ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
+ErrorOr<std::pair<SampleContext, uint64_t>>
+SampleProfileReaderBinary::readSampleContextFromTable() {
+ SampleContext Context;
+ size_t Idx;
if (ProfileIsCS) {
- auto FContext(readContextFromTable());
+ auto FContext(readContextFromTable(&Idx));
if (std::error_code EC = FContext.getError())
return EC;
- return SampleContext(*FContext);
+ Context = SampleContext(*FContext);
} else {
- auto FName(readStringFromTable());
+ auto FName(readStringFromTable(&Idx));
if (std::error_code EC = FName.getError())
return EC;
- return SampleContext(*FName);
+ Context = SampleContext(*FName);
+ }
+ // Since MD5SampleContextStart may point to the profile's file data, need to
+ // make sure it is reading the same value on big endian CPU.
+ uint64_t Hash = support::endian::read64le(MD5SampleContextStart + Idx);
+ // Lazy computing of hash value, write back to the table to cache it. Only
+ // compute the context's hash value if it is being referenced for the first
+ // time.
+ if (Hash == 0) {
+ assert(MD5SampleContextStart == MD5SampleContextTable.data());
+ Hash = Context.getHashCode();
+ support::endian::write64le(&MD5SampleContextTable[Idx], Hash);
}
+ return std::make_pair(Context, Hash);
}
std::error_code
@@ -643,8 +642,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
- LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)];
- CalleeProfile.setName(*FName);
+ LineLocation(*LineOffset, DiscriminatorVal))[*FName];
+ CalleeProfile.setFunction(*FName);
if (std::error_code EC = readProfile(CalleeProfile))
return EC;
}
@@ -659,16 +658,18 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
if (std::error_code EC = NumHeadSamples.getError())
return EC;
- ErrorOr<SampleContext> FContext(readSampleContextFromTable());
- if (std::error_code EC = FContext.getError())
+ auto FContextHash(readSampleContextFromTable());
+ if (std::error_code EC = FContextHash.getError())
return EC;
- Profiles[*FContext] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[*FContext];
- FProfile.setContext(*FContext);
+ auto &[FContext, Hash] = *FContextHash;
+ // Use the cached hash value for insertion instead of recalculating it.
+ auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples());
+ FunctionSamples &FProfile = Res.first->second;
+ FProfile.setContext(FContext);
FProfile.addHeadSamples(*NumHeadSamples);
- if (FContext->hasContext())
+ if (FContext.hasContext())
CSProfileCount++;
if (std::error_code EC = readProfile(FProfile))
@@ -816,18 +817,21 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
FuncOffsetTable.reserve(*Size);
for (uint64_t I = 0; I < *Size; ++I) {
- auto FContext(readSampleContextFromTable());
- if (std::error_code EC = FContext.getError())
+ auto FContextHash(readSampleContextFromTable());
+ if (std::error_code EC = FContextHash.getError())
return EC;
+ auto &[FContext, Hash] = *FContextHash;
auto Offset = readNumber<uint64_t>();
if (std::error_code EC = Offset.getError())
return EC;
if (UseFuncOffsetList)
- FuncOffsetList.emplace_back(*FContext, *Offset);
+ FuncOffsetList.emplace_back(FContext, *Offset);
else
- FuncOffsetTable[*FContext] = *Offset;
+ // Because Porfiles replace existing value with new value if collision
+ // happens, we also use the latest offset so that they are consistent.
+ FuncOffsetTable[Hash] = *Offset;
}
return sampleprof_error::success;
@@ -877,13 +881,17 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
const SampleContext *CommonContext = nullptr;
for (const auto &NameOffset : FuncOffsetList) {
const auto &FContext = NameOffset.first;
- auto FName = FContext.getName();
+ FunctionId FName = FContext.getFunction();
+ StringRef FNameString;
+ if (!useMD5())
+ FNameString = FName.stringRef();
+
// For function in the current module, keep its farthest ancestor
// context. This can be used to load itself and its child and
// sibling contexts.
- if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) ||
- (!useMD5() && (FuncsToUse.count(FName) ||
- (Remapper && Remapper->exist(FName))))) {
+ if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
+ (!useMD5() && (FuncsToUse.count(FNameString) ||
+ (Remapper && Remapper->exist(FNameString))))) {
if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
CommonContext = &FContext;
}
@@ -900,8 +908,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
} else if (useMD5()) {
assert(!useFuncOffsetList());
for (auto Name : FuncsToUse) {
- auto GUID = std::to_string(MD5Hash(Name));
- auto iter = FuncOffsetTable.find(StringRef(GUID));
+ auto GUID = MD5Hash(Name);
+ auto iter = FuncOffsetTable.find(GUID);
if (iter == FuncOffsetTable.end())
continue;
const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -912,8 +920,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
assert(useFuncOffsetList());
for (auto NameOffset : FuncOffsetList) {
SampleContext FContext(NameOffset.first);
- auto FuncName = FContext.getName();
- if (!FuncsToUse.count(FuncName) && !Remapper->exist(FuncName))
+ auto FuncName = FContext.getFunction();
+ StringRef FuncNameStr = FuncName.stringRef();
+ if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
continue;
const uint8_t *FuncProfileAddr = Start + NameOffset.second;
if (std::error_code EC = readFuncProfile(FuncProfileAddr))
@@ -922,7 +931,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
} else {
assert(!useFuncOffsetList());
for (auto Name : FuncsToUse) {
- auto iter = FuncOffsetTable.find(Name);
+ auto iter = FuncOffsetTable.find(MD5Hash(Name));
if (iter == FuncOffsetTable.end())
continue;
const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -1045,22 +1054,33 @@ std::error_code SampleProfileReaderBinary::readNameTable() {
// tables mixing string and MD5, all of them have to be normalized to use MD5,
// because optimization passes can only handle either type.
bool UseMD5 = useMD5();
- if (UseMD5)
- MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
NameTable.clear();
NameTable.reserve(*Size);
+ if (!ProfileIsCS) {
+ MD5SampleContextTable.clear();
+ if (UseMD5)
+ MD5SampleContextTable.reserve(*Size);
+ else
+ // If we are using strings, delay MD5 computation since only a portion of
+ // names are used by top level functions. Use 0 to indicate MD5 value is
+ // to be calculated as no known string has a MD5 value of 0.
+ MD5SampleContextTable.resize(*Size);
+ }
for (size_t I = 0; I < *Size; ++I) {
auto Name(readString());
if (std::error_code EC = Name.getError())
return EC;
if (UseMD5) {
- uint64_t FID = MD5Hash(*Name);
- NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(FID)));
+ FunctionId FID(*Name);
+ if (!ProfileIsCS)
+ MD5SampleContextTable.emplace_back(FID.getHashCode());
+ NameTable.emplace_back(FID);
} else
- NameTable.push_back(*Name);
+ NameTable.push_back(FunctionId(*Name));
}
-
+ if (!ProfileIsCS)
+ MD5SampleContextStart = MD5SampleContextTable.data();
return sampleprof_error::success;
}
@@ -1080,14 +1100,16 @@ SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
if (Data + (*Size) * sizeof(uint64_t) > End)
return sampleprof_error::truncated;
- // Preallocate and initialize NameTable so we can check whether a name
- // index has been read before by checking whether the element in the
- // NameTable is empty, meanwhile readStringIndex can do the boundary
- // check using the size of NameTable.
- MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
NameTable.clear();
- NameTable.resize(*Size);
- MD5NameMemStart = Data;
+ NameTable.reserve(*Size);
+ for (size_t I = 0; I < *Size; ++I) {
+ using namespace support;
+ uint64_t FID = endian::read<uint64_t, endianness::little, unaligned>(
+ Data + I * sizeof(uint64_t));
+ NameTable.emplace_back(FunctionId(FID));
+ }
+ if (!ProfileIsCS)
+ MD5SampleContextStart = reinterpret_cast<const uint64_t *>(Data);
Data = Data + (*Size) * sizeof(uint64_t);
return sampleprof_error::success;
}
@@ -1098,15 +1120,20 @@ SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
if (std::error_code EC = Size.getError())
return EC;
- MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
NameTable.clear();
NameTable.reserve(*Size);
+ if (!ProfileIsCS)
+ MD5SampleContextTable.resize(*Size);
for (size_t I = 0; I < *Size; ++I) {
auto FID = readNumber<uint64_t>();
if (std::error_code EC = FID.getError())
return EC;
- NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID)));
+ if (!ProfileIsCS)
+ support::endian::write64le(&MD5SampleContextTable[I], *FID);
+ NameTable.emplace_back(FunctionId(*FID));
}
+ if (!ProfileIsCS)
+ MD5SampleContextStart = MD5SampleContextTable.data();
return sampleprof_error::success;
}
@@ -1124,6 +1151,14 @@ std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
CSNameTable.clear();
CSNameTable.reserve(*Size);
+ if (ProfileIsCS) {
+ // Delay MD5 computation of CS context until they are needed. Use 0 to
+ // indicate MD5 value is to be calculated as no known string has a MD5
+ // value of 0.
+ MD5SampleContextTable.clear();
+ MD5SampleContextTable.resize(*Size);
+ MD5SampleContextStart = MD5SampleContextTable.data();
+ }
for (size_t I = 0; I < *Size; ++I) {
CSNameTable.emplace_back(SampleContextFrameVector());
auto ContextSize = readNumber<uint32_t>();
@@ -1187,16 +1222,17 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
if (std::error_code EC = Discriminator.getError())
return EC;
- auto FContext(readSampleContextFromTable());
- if (std::error_code EC = FContext.getError())
+ auto FContextHash(readSampleContextFromTable());
+ if (std::error_code EC = FContextHash.getError())
return EC;
+ auto &[FContext, Hash] = *FContextHash;
FunctionSamples *CalleeProfile = nullptr;
if (FProfile) {
CalleeProfile = const_cast<FunctionSamples *>(
&FProfile->functionSamplesAt(LineLocation(
*LineOffset,
- *Discriminator))[std::string(FContext.get().getName())]);
+ *Discriminator))[FContext.getFunction()]);
}
if (std::error_code EC =
readFuncMetadata(ProfileHasAttribute, CalleeProfile))
@@ -1211,11 +1247,12 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
std::error_code
SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
while (Data < End) {
- auto FContext(readSampleContextFromTable());
- if (std::error_code EC = FContext.getError())
+ auto FContextHash(readSampleContextFromTable());
+ if (std::error_code EC = FContextHash.getError())
return EC;
+ auto &[FContext, Hash] = *FContextHash;
FunctionSamples *FProfile = nullptr;
- auto It = Profiles.find(*FContext);
+ auto It = Profiles.find(FContext);
if (It != Profiles.end())
FProfile = &It->second;
@@ -1605,7 +1642,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
// body, there will be identical replicated profiles for the
// original function. In this case, we simply not bother updating
// the profile of the original function.
- FProfile = &Profiles[Name];
+ FProfile = &Profiles[FunctionId(Name)];
FProfile->addHeadSamples(HeadCount);
if (FProfile->getTotalSamples() > 0)
Update = false;
@@ -1617,9 +1654,9 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
uint32_t LineOffset = Offset >> 16;
uint32_t Discriminator = Offset & 0xffff;
FProfile = &CallerProfile->functionSamplesAt(
- LineLocation(LineOffset, Discriminator))[std::string(Name)];
+ LineLocation(LineOffset, Discriminator))[FunctionId(Name)];
}
- FProfile->setName(Name);
+ FProfile->setFunction(FunctionId(Name));
for (uint32_t I = 0; I < NumPosCounts; ++I) {
uint32_t Offset;
@@ -1675,7 +1712,8 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
if (Update)
FProfile->addCalledTargetSamples(LineOffset, Discriminator,
- TargetName, TargetCount);
+ FunctionId(TargetName),
+ TargetCount);
}
}
@@ -1736,11 +1774,13 @@ void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
// We will need to remap the entire context string.
assert(Remappings && "should be initialized while creating remapper");
for (auto &Sample : Reader.getProfiles()) {
- DenseSet<StringRef> NamesInSample;
+ DenseSet<FunctionId> NamesInSample;
Sample.second.findAllNames(NamesInSample);
- for (auto &Name : NamesInSample)
- if (auto Key = Remappings->insert(Name))
- NameMap.insert({Key, Name});
+ for (auto &Name : NamesInSample) {
+ StringRef NameStr = Name.stringRef();
+ if (auto Key = Remappings->insert(NameStr))
+ NameMap.insert({Key, NameStr});
+ }
}
RemappingApplied = true;
@@ -1748,8 +1788,11 @@ void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
std::optional<StringRef>
SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
- if (auto Key = Remappings->lookup(Fname))
- return NameMap.lookup(Key);
+ if (auto Key = Remappings->lookup(Fname)) {
+ StringRef Result = NameMap.lookup(Key);
+ if (!Result.empty())
+ return Result;
+ }
return std::nullopt;
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp
index 0873093ad426..625e523f13ce 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -83,11 +83,9 @@ void DefaultFunctionPruningStrategy::Erase(size_t CurrentOutputSize) {
NumToRemove = 1;
assert(NumToRemove <= SortedFunctions.size());
- llvm::for_each(
- llvm::make_range(SortedFunctions.begin() + SortedFunctions.size() -
- NumToRemove,
- SortedFunctions.end()),
- [&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); });
+ for (const NameFunctionSamples &E :
+ llvm::drop_begin(SortedFunctions, SortedFunctions.size() - NumToRemove))
+ ProfileMap.erase(E.first);
SortedFunctions.resize(SortedFunctions.size() - NumToRemove);
}
@@ -242,7 +240,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeContextIdx(
if (Context.hasContext())
return writeCSNameIdx(Context);
else
- return SampleProfileWriterBinary::writeNameIdx(Context.getName());
+ return SampleProfileWriterBinary::writeNameIdx(Context.getFunction());
}
std::error_code
@@ -348,23 +346,22 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
return SampleProfileWriterBinary::writeNameTable();
auto &OS = *OutputStream;
- std::set<StringRef> V;
+ std::set<FunctionId> V;
stablizeNameTable(NameTable, V);
// Write out the MD5 name table. We wrote unencoded MD5 so reader can
// retrieve the name using the name index without having to read the
// whole name table.
encodeULEB128(NameTable.size(), OS);
- support::endian::Writer Writer(OS, support::little);
+ support::endian::Writer Writer(OS, llvm::endianness::little);
for (auto N : V)
- Writer.write(MD5Hash(N));
+ Writer.write(N.getHashCode());
return sampleprof_error::success;
}
std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
const SampleProfileMap &ProfileMap) {
for (const auto &I : ProfileMap) {
- assert(I.first == I.second.getContext() && "Inconsistent profile map");
addContext(I.second.getContext());
addNames(I.second);
}
@@ -372,10 +369,13 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
// If NameTable contains ".__uniq." suffix, set SecFlagUniqSuffix flag
// so compiler won't strip the suffix during profile matching after
// seeing the flag in the profile.
- for (const auto &I : NameTable) {
- if (I.first.contains(FunctionSamples::UniqSuffix)) {
- addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagUniqSuffix);
- break;
+ // Original names are unavailable if using MD5, so this option has no use.
+ if (!UseMD5) {
+ for (const auto &I : NameTable) {
+ if (I.first.stringRef().contains(FunctionSamples::UniqSuffix)) {
+ addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagUniqSuffix);
+ break;
+ }
}
}
@@ -397,12 +397,12 @@ std::error_code SampleProfileWriterExtBinaryBase::writeCSNameTableSection() {
auto &OS = *OutputStream;
encodeULEB128(OrderedContexts.size(), OS);
- support::endian::Writer Writer(OS, support::little);
+ support::endian::Writer Writer(OS, llvm::endianness::little);
for (auto Context : OrderedContexts) {
auto Frames = Context.getContextFrames();
encodeULEB128(Frames.size(), OS);
for (auto &Callsite : Frames) {
- if (std::error_code EC = writeNameIdx(Callsite.FuncName))
+ if (std::error_code EC = writeNameIdx(Callsite.Func))
return EC;
encodeULEB128(Callsite.Location.LineOffset, OS);
encodeULEB128(Callsite.Location.Discriminator, OS);
@@ -570,7 +570,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
if (FunctionSamples::ProfileIsCS)
OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples();
else
- OS << S.getName() << ":" << S.getTotalSamples();
+ OS << S.getFunction() << ":" << S.getTotalSamples();
if (Indent == 0)
OS << ":" << S.getHeadSamples();
@@ -630,10 +630,10 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
std::error_code
SampleProfileWriterBinary::writeContextIdx(const SampleContext &Context) {
assert(!Context.hasContext() && "cs profile is not supported");
- return writeNameIdx(Context.getName());
+ return writeNameIdx(Context.getFunction());
}
-std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
+std::error_code SampleProfileWriterBinary::writeNameIdx(FunctionId FName) {
auto &NTable = getNameTable();
const auto &Ret = NTable.find(FName);
if (Ret == NTable.end())
@@ -642,13 +642,13 @@ std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
return sampleprof_error::success;
}
-void SampleProfileWriterBinary::addName(StringRef FName) {
+void SampleProfileWriterBinary::addName(FunctionId FName) {
auto &NTable = getNameTable();
NTable.insert(std::make_pair(FName, 0));
}
void SampleProfileWriterBinary::addContext(const SampleContext &Context) {
- addName(Context.getName());
+ addName(Context.getFunction());
}
void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
@@ -656,14 +656,14 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
for (const auto &I : S.getBodySamples()) {
const SampleRecord &Sample = I.second;
for (const auto &J : Sample.getCallTargets())
- addName(J.first());
+ addName(J.first);
}
// Recursively add all the names for inlined callsites.
for (const auto &J : S.getCallsiteSamples())
for (const auto &FS : J.second) {
const FunctionSamples &CalleeSamples = FS.second;
- addName(CalleeSamples.getName());
+ addName(CalleeSamples.getFunction());
addNames(CalleeSamples);
}
}
@@ -672,26 +672,26 @@ void SampleProfileWriterExtBinaryBase::addContext(
const SampleContext &Context) {
if (Context.hasContext()) {
for (auto &Callsite : Context.getContextFrames())
- SampleProfileWriterBinary::addName(Callsite.FuncName);
+ SampleProfileWriterBinary::addName(Callsite.Func);
CSNameTable.insert(std::make_pair(Context, 0));
} else {
- SampleProfileWriterBinary::addName(Context.getName());
+ SampleProfileWriterBinary::addName(Context.getFunction());
}
}
void SampleProfileWriterBinary::stablizeNameTable(
- MapVector<StringRef, uint32_t> &NameTable, std::set<StringRef> &V) {
+ MapVector<FunctionId, uint32_t> &NameTable, std::set<FunctionId> &V) {
// Sort the names to make NameTable deterministic.
for (const auto &I : NameTable)
V.insert(I.first);
int i = 0;
- for (const StringRef &N : V)
+ for (const FunctionId &N : V)
NameTable[N] = i++;
}
std::error_code SampleProfileWriterBinary::writeNameTable() {
auto &OS = *OutputStream;
- std::set<StringRef> V;
+ std::set<FunctionId> V;
stablizeNameTable(NameTable, V);
// Write out the name table.
@@ -726,8 +726,7 @@ SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
// Generate the name table for all the functions referenced in the profile.
for (const auto &I : ProfileMap) {
- assert(I.first == I.second.getContext() && "Inconsistent profile map");
- addContext(I.first);
+ addContext(I.second.getContext());
addNames(I.second);
}
@@ -745,7 +744,7 @@ void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) {
}
void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {
- support::endian::Writer Writer(*OutputStream, support::little);
+ support::endian::Writer Writer(*OutputStream, llvm::endianness::little);
Writer.write(static_cast<uint64_t>(SectionHdrLayout.size()));
SecHdrTableOffset = OutputStream->tell();
@@ -775,7 +774,8 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
// but it needs to be read before SecLBRProfile (the order in
// SectionHdrLayout). So we use IndexMap above to switch the order.
support::endian::SeekableWriter Writer(
- static_cast<raw_pwrite_stream &>(*OutputStream), support::little);
+ static_cast<raw_pwrite_stream &>(*OutputStream),
+ llvm::endianness::little);
for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size();
LayoutIdx++) {
assert(IndexMap[LayoutIdx] < SecHdrTable.size() &&
@@ -838,7 +838,7 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
encodeULEB128(Sample.getSamples(), OS);
encodeULEB128(Sample.getCallTargets().size(), OS);
for (const auto &J : Sample.getSortedCallTargets()) {
- StringRef Callee = J.first;
+ FunctionId Callee = J.first;
uint64_t CalleeSamples = J.second;
if (std::error_code EC = writeNameIdx(Callee))
return EC;
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SymbolRemappingReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SymbolRemappingReader.cpp
index 78457beb3e49..805f66b68ce7 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SymbolRemappingReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SymbolRemappingReader.cpp
@@ -37,7 +37,7 @@ Error SymbolRemappingReader::read(MemoryBuffer &B) {
StringRef Line = *LineIt;
Line = Line.ltrim(' ');
// line_iterator only detects comments starting in column 1.
- if (Line.startswith("#") || Line.empty())
+ if (Line.starts_with("#") || Line.empty())
continue;
SmallVector<StringRef, 4> Parts;
diff --git a/contrib/llvm-project/llvm/lib/Remarks/Remark.cpp b/contrib/llvm-project/llvm/lib/Remarks/Remark.cpp
index 1b248db41747..ef42271a3c8d 100644
--- a/contrib/llvm-project/llvm/lib/Remarks/Remark.cpp
+++ b/contrib/llvm-project/llvm/lib/Remarks/Remark.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Remarks/Remark.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include <optional>
@@ -25,6 +26,16 @@ std::string Remark::getArgsAsMsg() const {
return OS.str();
}
+/// Returns the value of a specified key parsed from StringRef.
+std::optional<int> Argument::getValAsInt() const {
+ APInt KeyVal;
+ if (Val.getAsInteger(10, KeyVal))
+ return std::nullopt;
+ return KeyVal.getSExtValue();
+}
+
+bool Argument::isValInt() const { return getValAsInt().has_value(); }
+
void RemarkLocation::print(raw_ostream &OS) const {
OS << "{ "
<< "File: " << SourceFilePath << ", Line: " << SourceLine
diff --git a/contrib/llvm-project/llvm/lib/Remarks/YAMLRemarkParser.cpp b/contrib/llvm-project/llvm/lib/Remarks/YAMLRemarkParser.cpp
index f5123b0f64ce..218b6691398b 100644
--- a/contrib/llvm-project/llvm/lib/Remarks/YAMLRemarkParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Remarks/YAMLRemarkParser.cpp
@@ -75,8 +75,7 @@ static Expected<uint64_t> parseVersion(StringRef &Buf) {
"Expecting version number.");
uint64_t Version =
- support::endian::read<uint64_t, support::little, support::unaligned>(
- Buf.data());
+ support::endian::read<uint64_t, llvm::endianness::little>(Buf.data());
if (Version != remarks::CurrentRemarkVersion)
return createStringError(std::errc::illegal_byte_sequence,
"Mismatching remark version. Got %" PRId64
@@ -91,8 +90,7 @@ static Expected<uint64_t> parseStrTabSize(StringRef &Buf) {
return createStringError(std::errc::illegal_byte_sequence,
"Expecting string table size.");
uint64_t StrTabSize =
- support::endian::read<uint64_t, support::little, support::unaligned>(
- Buf.data());
+ support::endian::read<uint64_t, llvm::endianness::little>(Buf.data());
Buf = Buf.drop_front(sizeof(uint64_t));
return StrTabSize;
}
@@ -138,7 +136,7 @@ Expected<std::unique_ptr<YAMLRemarkParser>> remarks::createYAMLParserFromMeta(
StrTab = std::move(*MaybeStrTab);
}
// If it starts with "---", there is no external file.
- if (!Buf.startswith("---")) {
+ if (!Buf.starts_with("---")) {
// At this point, we expect Buf to contain the external file path.
StringRef ExternalFilePath = Buf;
SmallString<80> FullPath;
diff --git a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
index 4a73739b5282..0a4f5ac01553 100644
--- a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
@@ -3148,7 +3148,7 @@ bool IEEEFloat::convertFromStringSpecials(StringRef str) {
return false;
}
- if (str.startswith("nan") || str.startswith("NaN")) {
+ if (str.starts_with("nan") || str.starts_with("NaN")) {
str = str.drop_front(3);
// A NaN without payload.
@@ -4292,6 +4292,35 @@ bool IEEEFloat::getExactInverse(APFloat *inv) const {
return true;
}
+int IEEEFloat::getExactLog2Abs() const {
+ if (!isFinite() || isZero())
+ return INT_MIN;
+
+ const integerPart *Parts = significandParts();
+ const int PartCount = partCountForBits(semantics->precision);
+
+ int PopCount = 0;
+ for (int i = 0; i < PartCount; ++i) {
+ PopCount += llvm::popcount(Parts[i]);
+ if (PopCount > 1)
+ return INT_MIN;
+ }
+
+ if (exponent != semantics->minExponent)
+ return exponent;
+
+ int CountrParts = 0;
+ for (int i = 0; i < PartCount;
+ ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
+ if (Parts[i] != 0) {
+ return exponent - semantics->precision + CountrParts +
+ llvm::countr_zero(Parts[i]) + 1;
+ }
+ }
+
+ llvm_unreachable("didn't find the set bit");
+}
+
bool IEEEFloat::isSignaling() const {
if (!isNaN())
return false;
@@ -5087,6 +5116,16 @@ bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
return Ret;
}
+int DoubleAPFloat::getExactLog2() const {
+ // TODO: Implement me
+ return INT_MIN;
+}
+
+int DoubleAPFloat::getExactLog2Abs() const {
+ // TODO: Implement me
+ return INT_MIN;
+}
+
DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
APFloat::roundingMode RM) {
assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
diff --git a/contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp b/contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp
index 26f189302e37..6ff74e02820d 100644
--- a/contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/ARMBuildAttributes.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/LEB128.h"
#include <iomanip>
diff --git a/contrib/llvm-project/llvm/lib/Support/AutoConvert.cpp b/contrib/llvm-project/llvm/lib/Support/AutoConvert.cpp
index 4fb7e242c348..8170e553ac6e 100644
--- a/contrib/llvm-project/llvm/lib/Support/AutoConvert.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/AutoConvert.cpp
@@ -14,21 +14,36 @@
#ifdef __MVS__
#include "llvm/Support/AutoConvert.h"
+#include <cassert>
#include <fcntl.h>
#include <sys/stat.h>
+#include <unistd.h>
-std::error_code llvm::disableAutoConversion(int FD) {
+static int savedStdHandleAutoConversionMode[3] = {-1, -1, -1};
+
+int disableAutoConversion(int FD) {
static const struct f_cnvrt Convert = {
- SETCVTOFF, // cvtcmd
- 0, // pccsid
- (short)FT_BINARY, // fccsid
+ SETCVTOFF, // cvtcmd
+ 0, // pccsid
+ 0, // fccsid
};
- if (fcntl(FD, F_CONTROL_CVT, &Convert) == -1)
- return std::error_code(errno, std::generic_category());
- return std::error_code();
+
+ return fcntl(FD, F_CONTROL_CVT, &Convert);
}
-std::error_code llvm::enableAutoConversion(int FD) {
+int restoreStdHandleAutoConversion(int FD) {
+ assert(FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO);
+ if (savedStdHandleAutoConversionMode[FD] == -1)
+ return 0;
+ struct f_cnvrt Cvt = {
+ savedStdHandleAutoConversionMode[FD], // cvtcmd
+ 0, // pccsid
+ 0, // fccsid
+ };
+ return (fcntl(FD, F_CONTROL_CVT, &Cvt));
+}
+
+int enableAutoConversion(int FD) {
struct f_cnvrt Query = {
QUERYCVT, // cvtcmd
0, // pccsid
@@ -36,17 +51,53 @@ std::error_code llvm::enableAutoConversion(int FD) {
};
if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
- return std::error_code(errno, std::generic_category());
+ return -1;
+
+ // We don't need conversion for UTF-8 tagged files.
+ // TODO: Remove the assumption of ISO8859-1 = UTF-8 here when we fully resolve
+ // problems related to UTF-8 tagged source files.
+ // When the pccsid is not ISO8859-1, autoconversion is still needed.
+ if (Query.pccsid == CCSID_ISO8859_1 &&
+ (Query.fccsid == CCSID_UTF_8 || Query.fccsid == CCSID_ISO8859_1))
+ return 0;
+
+ // Save the state of std handles before we make changes to it.
+ if ((FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO) &&
+ savedStdHandleAutoConversionMode[FD] == -1)
+ savedStdHandleAutoConversionMode[FD] = Query.cvtcmd;
+
+ if (FD == STDOUT_FILENO || FD == STDERR_FILENO)
+ Query.cvtcmd = SETCVTON;
+ else
+ Query.cvtcmd = SETCVTALL;
- Query.cvtcmd = SETCVTALL;
Query.pccsid =
(FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO)
? 0
: CCSID_UTF_8;
// Assume untagged files to be IBM-1047 encoded.
Query.fccsid = (Query.fccsid == FT_UNTAGGED) ? CCSID_IBM_1047 : Query.fccsid;
- if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
+ return fcntl(FD, F_CONTROL_CVT, &Query);
+}
+
+std::error_code llvm::disableAutoConversion(int FD) {
+ if (::disableAutoConversion(FD) == -1)
+ return std::error_code(errno, std::generic_category());
+
+ return std::error_code();
+}
+
+std::error_code llvm::enableAutoConversion(int FD) {
+ if (::enableAutoConversion(FD) == -1)
return std::error_code(errno, std::generic_category());
+
+ return std::error_code();
+}
+
+std::error_code llvm::restoreStdHandleAutoConversion(int FD) {
+ if (::restoreStdHandleAutoConversion(FD) == -1)
+ return std::error_code(errno, std::generic_category());
+
return std::error_code();
}
diff --git a/contrib/llvm-project/llvm/lib/Support/BLAKE3/blake3_impl.h b/contrib/llvm-project/llvm/lib/Support/BLAKE3/blake3_impl.h
index 8e5456d745cd..c679ecde4c4e 100644
--- a/contrib/llvm-project/llvm/lib/Support/BLAKE3/blake3_impl.h
+++ b/contrib/llvm-project/llvm/lib/Support/BLAKE3/blake3_impl.h
@@ -54,8 +54,9 @@ enum blake3_flags {
#endif
#if !defined(BLAKE3_USE_NEON)
- // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
- #if defined(IS_AARCH64)
+ // If BLAKE3_USE_NEON not manually set, autodetect based on
+ // AArch64ness and endianness.
+ #if defined(IS_AARCH64) && !defined(__ARM_BIG_ENDIAN)
#define BLAKE3_USE_NEON 1
#else
#define BLAKE3_USE_NEON 0
diff --git a/contrib/llvm-project/llvm/lib/Support/BalancedPartitioning.cpp b/contrib/llvm-project/llvm/lib/Support/BalancedPartitioning.cpp
index 113e9484f528..5843be949911 100644
--- a/contrib/llvm-project/llvm/lib/Support/BalancedPartitioning.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/BalancedPartitioning.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/BalancedPartitioning.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
diff --git a/contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp b/contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp
index 2fe450db11dd..afc00864a5fb 100644
--- a/contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp
@@ -13,7 +13,6 @@
#include "llvm/Support/LEB128.h"
using namespace llvm;
-using endianness = llvm::support::endianness;
BinaryStreamReader::BinaryStreamReader(BinaryStreamRef Ref) : Stream(Ref) {}
diff --git a/contrib/llvm-project/llvm/lib/Support/BinaryStreamRef.cpp b/contrib/llvm-project/llvm/lib/Support/BinaryStreamRef.cpp
index 0ef00af4fcb7..a6d0bfa10b75 100644
--- a/contrib/llvm-project/llvm/lib/Support/BinaryStreamRef.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/BinaryStreamRef.cpp
@@ -10,7 +10,6 @@
#include "llvm/Support/BinaryByteStream.h"
using namespace llvm;
-using namespace llvm::support;
namespace {
@@ -18,9 +17,7 @@ class ArrayRefImpl : public BinaryStream {
public:
ArrayRefImpl(ArrayRef<uint8_t> Data, endianness Endian) : BBS(Data, Endian) {}
- llvm::support::endianness getEndian() const override {
- return BBS.getEndian();
- }
+ llvm::endianness getEndian() const override { return BBS.getEndian(); }
Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readBytes(Offset, Size, Buffer);
@@ -41,9 +38,7 @@ public:
: BBS(Data, Endian) {}
// Inherited via WritableBinaryStream
- llvm::support::endianness getEndian() const override {
- return BBS.getEndian();
- }
+ llvm::endianness getEndian() const override { return BBS.getEndian(); }
Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readBytes(Offset, Size, Buffer);
diff --git a/contrib/llvm-project/llvm/lib/Support/BinaryStreamWriter.cpp b/contrib/llvm-project/llvm/lib/Support/BinaryStreamWriter.cpp
index 3d87a30a86a1..dff08fee3fef 100644
--- a/contrib/llvm-project/llvm/lib/Support/BinaryStreamWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/BinaryStreamWriter.cpp
@@ -22,7 +22,7 @@ BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStream &Stream)
: Stream(Stream) {}
BinaryStreamWriter::BinaryStreamWriter(MutableArrayRef<uint8_t> Data,
- llvm::support::endianness Endian)
+ llvm::endianness Endian)
: Stream(Data, Endian) {}
Error BinaryStreamWriter::writeBytes(ArrayRef<uint8_t> Buffer) {
diff --git a/contrib/llvm-project/llvm/lib/Support/BlockFrequency.cpp b/contrib/llvm-project/llvm/lib/Support/BlockFrequency.cpp
index a4a1e477d940..329f1e12cdc2 100644
--- a/contrib/llvm-project/llvm/lib/Support/BlockFrequency.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/BlockFrequency.cpp
@@ -12,6 +12,7 @@
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -36,3 +37,11 @@ BlockFrequency BlockFrequency::operator/(BranchProbability Prob) const {
Freq /= Prob;
return Freq;
}
+
+std::optional<BlockFrequency> BlockFrequency::mul(uint64_t Factor) const {
+ bool Overflow;
+ uint64_t ResultFrequency = SaturatingMultiply(Frequency, Factor, &Overflow);
+ if (Overflow)
+ return {};
+ return BlockFrequency(ResultFrequency);
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/CachePruning.cpp b/contrib/llvm-project/llvm/lib/Support/CachePruning.cpp
index a56d8356d838..4eae08b18c9b 100644
--- a/contrib/llvm-project/llvm/lib/Support/CachePruning.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CachePruning.cpp
@@ -218,7 +218,7 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy,
// This acts as a safeguard against data loss if the user specifies the
// wrong directory as their cache directory.
StringRef filename = sys::path::filename(File->path());
- if (!filename.startswith("llvmcache-") && !filename.startswith("Thin-"))
+ if (!filename.starts_with("llvmcache-") && !filename.starts_with("Thin-"))
continue;
// Look at this file. If we can't stat it, there's nothing interesting
diff --git a/contrib/llvm-project/llvm/lib/Support/Caching.cpp b/contrib/llvm-project/llvm/lib/Support/Caching.cpp
index f20f08a865c7..628e23e1cb3d 100644
--- a/contrib/llvm-project/llvm/lib/Support/Caching.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Caching.cpp
@@ -145,7 +145,9 @@ Expected<FileCache> llvm::localCache(const Twine &CacheNameRef,
// ensures the filesystem isn't mutated until the cache is.
if (std::error_code EC = sys::fs::create_directories(
CacheDirectoryPath, /*IgnoreExisting=*/true))
- return errorCodeToError(EC);
+ return createStringError(EC, Twine("can't create cache directory ") +
+ CacheDirectoryPath + ": " +
+ EC.message());
// Write to a temporary to avoid race condition
SmallString<64> TempFilenameModel;
diff --git a/contrib/llvm-project/llvm/lib/Support/Chrono.cpp b/contrib/llvm-project/llvm/lib/Support/Chrono.cpp
index 859ece8f5500..993d200675fe 100644
--- a/contrib/llvm-project/llvm/lib/Support/Chrono.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Chrono.cpp
@@ -40,6 +40,24 @@ static inline struct tm getStructTM(TimePoint<> TP) {
return Storage;
}
+static inline struct tm getStructTMUtc(UtcTime<> TP) {
+ struct tm Storage;
+ std::time_t OurTime = toTimeT(TP);
+
+#if defined(LLVM_ON_UNIX)
+ struct tm *LT = ::gmtime_r(&OurTime, &Storage);
+ assert(LT);
+ (void)LT;
+#endif
+#if defined(_WIN32)
+ int Error = ::gmtime_s(&Storage, &OurTime);
+ assert(!Error);
+ (void)Error;
+#endif
+
+ return Storage;
+}
+
raw_ostream &operator<<(raw_ostream &OS, TimePoint<> TP) {
struct tm LT = getStructTM(TP);
char Buffer[sizeof("YYYY-MM-DD HH:MM:SS")];
@@ -50,12 +68,10 @@ raw_ostream &operator<<(raw_ostream &OS, TimePoint<> TP) {
.count()));
}
-void format_provider<TimePoint<>>::format(const TimePoint<> &T, raw_ostream &OS,
- StringRef Style) {
+template <class T>
+static void format(const T &Fractional, struct tm &LT, raw_ostream &OS,
+ StringRef Style) {
using namespace std::chrono;
- TimePoint<seconds> Truncated = time_point_cast<seconds>(T);
- auto Fractional = T - Truncated;
- struct tm LT = getStructTM(Truncated);
// Handle extensions first. strftime mangles unknown %x on some platforms.
if (Style.empty()) Style = "%Y-%m-%d %H:%M:%S.%N";
std::string Format;
@@ -90,4 +106,23 @@ void format_provider<TimePoint<>>::format(const TimePoint<> &T, raw_ostream &OS,
OS << (Len ? Buffer : "BAD-DATE-FORMAT");
}
+void format_provider<UtcTime<std::chrono::seconds>>::format(
+ const UtcTime<std::chrono::seconds> &T, raw_ostream &OS, StringRef Style) {
+ using namespace std::chrono;
+ UtcTime<seconds> Truncated =
+ UtcTime<seconds>(duration_cast<seconds>(T.time_since_epoch()));
+ auto Fractional = T - Truncated;
+ struct tm LT = getStructTMUtc(Truncated);
+ llvm::format(Fractional, LT, OS, Style);
+}
+
+void format_provider<TimePoint<>>::format(const TimePoint<> &T, raw_ostream &OS,
+ StringRef Style) {
+ using namespace std::chrono;
+ TimePoint<seconds> Truncated = time_point_cast<seconds>(T);
+ auto Fractional = T - Truncated;
+ struct tm LT = getStructTM(Truncated);
+ llvm::format(Fractional, LT, OS, Style);
+}
+
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Support/CodeGenCoverage.cpp b/contrib/llvm-project/llvm/lib/Support/CodeGenCoverage.cpp
index d5ab77b9c66f..0df45b4ff2ba 100644
--- a/contrib/llvm-project/llvm/lib/Support/CodeGenCoverage.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CodeGenCoverage.cpp
@@ -58,7 +58,8 @@ bool CodeGenCoverage::parse(MemoryBuffer &Buffer, StringRef BackendName) {
if (std::distance(CurPtr, Buffer.getBufferEnd()) < 8)
return false; // Data is invalid. Not enough bytes for another rule id.
- uint64_t RuleID = support::endian::read64(CurPtr, support::native);
+ uint64_t RuleID =
+ support::endian::read64(CurPtr, llvm::endianness::native);
CurPtr += 8;
// ~0ull terminates the rule id list.
diff --git a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
index d3efb8b67be5..088b4e4d755c 100644
--- a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
@@ -324,6 +324,13 @@ public:
return false;
}
+ bool hasNamedSubCommands() const {
+ for (const auto *S : RegisteredSubCommands)
+ if (!S->getName().empty())
+ return true;
+ return false;
+ }
+
SubCommand *getActiveSubCommand() { return ActiveSubCommand; }
void updateArgStr(Option *O, StringRef NewName, SubCommand *SC) {
@@ -425,7 +432,7 @@ private:
return nullptr;
return Opt;
}
- SubCommand *LookupSubCommand(StringRef Name);
+ SubCommand *LookupSubCommand(StringRef Name, std::string &NearestString);
};
} // namespace
@@ -550,9 +557,12 @@ Option *CommandLineParser::LookupOption(SubCommand &Sub, StringRef &Arg,
return I->second;
}
-SubCommand *CommandLineParser::LookupSubCommand(StringRef Name) {
+SubCommand *CommandLineParser::LookupSubCommand(StringRef Name,
+ std::string &NearestString) {
if (Name.empty())
return &SubCommand::getTopLevel();
+ // Find a subcommand with the edit distance == 1.
+ SubCommand *NearestMatch = nullptr;
for (auto *S : RegisteredSubCommands) {
if (S == &SubCommand::getAll())
continue;
@@ -561,7 +571,14 @@ SubCommand *CommandLineParser::LookupSubCommand(StringRef Name) {
if (StringRef(S->getName()) == StringRef(Name))
return S;
+
+ if (!NearestMatch && S->getName().edit_distance(Name) < 2)
+ NearestMatch = S;
}
+
+ if (NearestMatch)
+ NearestString = NearestMatch->getName();
+
return &SubCommand::getTopLevel();
}
@@ -1527,10 +1544,14 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
int FirstArg = 1;
SubCommand *ChosenSubCommand = &SubCommand::getTopLevel();
- if (argc >= 2 && argv[FirstArg][0] != '-') {
+ std::string NearestSubCommandString;
+ bool MaybeNamedSubCommand =
+ argc >= 2 && argv[FirstArg][0] != '-' && hasNamedSubCommands();
+ if (MaybeNamedSubCommand) {
// If the first argument specifies a valid subcommand, start processing
// options from the second argument.
- ChosenSubCommand = LookupSubCommand(StringRef(argv[FirstArg]));
+ ChosenSubCommand =
+ LookupSubCommand(StringRef(argv[FirstArg]), NearestSubCommandString);
if (ChosenSubCommand != &SubCommand::getTopLevel())
FirstArg = 2;
}
@@ -1602,7 +1623,6 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
bool DashDashFound = false; // Have we read '--'?
for (int i = FirstArg; i < argc; ++i) {
Option *Handler = nullptr;
- Option *NearestHandler = nullptr;
std::string NearestHandlerString;
StringRef Value;
StringRef ArgName = "";
@@ -1667,6 +1687,13 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
Handler = LookupLongOption(*ChosenSubCommand, ArgName, Value,
LongOptionsUseDoubleDash, HaveDoubleDash);
+ // If Handler is not found in a specialized subcommand, look up handler
+ // in the top-level subcommand.
+ // cl::opt without cl::sub belongs to top-level subcommand.
+ if (!Handler && ChosenSubCommand != &SubCommand::getTopLevel())
+ Handler = LookupLongOption(SubCommand::getTopLevel(), ArgName, Value,
+ LongOptionsUseDoubleDash, HaveDoubleDash);
+
// Check to see if this "option" is really a prefixed or grouped argument.
if (!Handler && !(LongOptionsUseDoubleDash && HaveDoubleDash))
Handler = HandlePrefixedOrGroupedOption(ArgName, Value, ErrorParsing,
@@ -1675,26 +1702,39 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
// Otherwise, look for the closest available option to report to the user
// in the upcoming error.
if (!Handler && SinkOpts.empty())
- NearestHandler =
- LookupNearestOption(ArgName, OptionsMap, NearestHandlerString);
+ LookupNearestOption(ArgName, OptionsMap, NearestHandlerString);
}
if (!Handler) {
- if (SinkOpts.empty()) {
- *Errs << ProgramName << ": Unknown command line argument '" << argv[i]
- << "'. Try: '" << argv[0] << " --help'\n";
-
- if (NearestHandler) {
- // If we know a near match, report it as well.
- *Errs << ProgramName << ": Did you mean '"
- << PrintArg(NearestHandlerString, 0) << "'?\n";
- }
-
- ErrorParsing = true;
- } else {
+ if (!SinkOpts.empty()) {
for (Option *SinkOpt : SinkOpts)
SinkOpt->addOccurrence(i, "", StringRef(argv[i]));
+ continue;
}
+
+ auto ReportUnknownArgument = [&](bool IsArg,
+ StringRef NearestArgumentName) {
+ *Errs << ProgramName << ": Unknown "
+ << (IsArg ? "command line argument" : "subcommand") << " '"
+ << argv[i] << "'. Try: '" << argv[0] << " --help'\n";
+
+ if (NearestArgumentName.empty())
+ return;
+
+ *Errs << ProgramName << ": Did you mean '";
+ if (IsArg)
+ *Errs << PrintArg(NearestArgumentName, 0);
+ else
+ *Errs << NearestArgumentName;
+ *Errs << "'?\n";
+ };
+
+ if (i > 1 || !MaybeNamedSubCommand)
+ ReportUnknownArgument(/*IsArg=*/true, NearestHandlerString);
+ else
+ ReportUnknownArgument(/*IsArg=*/false, NearestSubCommandString);
+
+ ErrorParsing = true;
continue;
}
@@ -2181,7 +2221,7 @@ void generic_parser_base::printGenericOptionDiff(
unsigned NumOpts = getNumOptions();
for (unsigned i = 0; i != NumOpts; ++i) {
- if (Value.compare(getOptionValue(i)))
+ if (!Value.compare(getOptionValue(i)))
continue;
outs() << "= " << getOption(i);
@@ -2189,7 +2229,7 @@ void generic_parser_base::printGenericOptionDiff(
size_t NumSpaces = MaxOptWidth > L ? MaxOptWidth - L : 0;
outs().indent(NumSpaces) << " (default: ";
for (unsigned j = 0; j != NumOpts; ++j) {
- if (Default.compare(getOptionValue(j)))
+ if (!Default.compare(getOptionValue(j)))
continue;
outs() << getOption(j);
break;
@@ -2365,7 +2405,7 @@ public:
if (Sub == &SubCommand::getTopLevel()) {
outs() << "USAGE: " << GlobalParser->ProgramName;
- if (Subs.size() > 2)
+ if (!Subs.empty())
outs() << " [subcommand]";
outs() << " [options]";
} else {
diff --git a/contrib/llvm-project/llvm/lib/Support/DataExtractor.cpp b/contrib/llvm-project/llvm/lib/Support/DataExtractor.cpp
index 59a44f4071b5..eac3c32cfd3b 100644
--- a/contrib/llvm-project/llvm/lib/Support/DataExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/DataExtractor.cpp
@@ -202,7 +202,7 @@ static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
if (isError(Err))
return T();
- const char *error;
+ const char *error = nullptr;
unsigned bytes_read;
T result =
Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
diff --git a/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp b/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp
index 26293bf92a42..502665d2a834 100644
--- a/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp
@@ -100,7 +100,7 @@ void DebugCounter::push_back(const std::string &Val) {
}
// Now we need to see if this is the skip or the count, remove the suffix, and
// add it to the counter values.
- if (CounterPair.first.endswith("-skip")) {
+ if (CounterPair.first.ends_with("-skip")) {
auto CounterName = CounterPair.first.drop_back(5);
unsigned CounterID = getCounterId(std::string(CounterName));
if (!CounterID) {
@@ -113,7 +113,7 @@ void DebugCounter::push_back(const std::string &Val) {
CounterInfo &Counter = Counters[CounterID];
Counter.Skip = CounterVal;
Counter.IsSet = true;
- } else if (CounterPair.first.endswith("-count")) {
+ } else if (CounterPair.first.ends_with("-count")) {
auto CounterName = CounterPair.first.drop_back(6);
unsigned CounterID = getCounterId(std::string(CounterName));
if (!CounterID) {
diff --git a/contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp b/contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp
index 2e90b70dc83f..d3100c9ebb21 100644
--- a/contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ELFAttributeParser.cpp
@@ -189,9 +189,9 @@ Error ELFAttributeParser::parseSubsection(uint32_t length) {
}
Error ELFAttributeParser::parse(ArrayRef<uint8_t> section,
- support::endianness endian) {
+ llvm::endianness endian) {
unsigned sectionNumber = 0;
- de = DataExtractor(section, endian == support::little, 0);
+ de = DataExtractor(section, endian == llvm::endianness::little, 0);
// For early returns, we have more specific errors, consume the Error in
// cursor.
diff --git a/contrib/llvm-project/llvm/lib/Support/ELFAttributes.cpp b/contrib/llvm-project/llvm/lib/Support/ELFAttributes.cpp
index f2a1732fc376..63d144864446 100644
--- a/contrib/llvm-project/llvm/lib/Support/ELFAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ELFAttributes.cpp
@@ -23,7 +23,7 @@ StringRef ELFAttrs::attrTypeAsString(unsigned attr, TagNameMap tagNameMap,
std::optional<unsigned> ELFAttrs::attrTypeFromString(StringRef tag,
TagNameMap tagNameMap) {
- bool hasTagPrefix = tag.startswith("Tag_");
+ bool hasTagPrefix = tag.starts_with("Tag_");
auto tagNameIt =
find_if(tagNameMap, [tag, hasTagPrefix](const TagNameItem item) {
return item.tagName.drop_front(hasTagPrefix ? 0 : 4) == tag;
diff --git a/contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp
index c11ee59da0dd..58a06a34e8cf 100644
--- a/contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Memory.h"
+#include "llvm/Support/TimeProfiler.h"
#include <system_error>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -43,6 +44,8 @@ public:
size_t getBufferSize() const override { return Buffer.size(); }
Error commit() override {
+ llvm::TimeTraceScope timeScope("Commit buffer to disk");
+
// Unmap buffer, letting OS flush dirty pages to file on disk.
Buffer.unmap();
diff --git a/contrib/llvm-project/llvm/lib/Support/FloatingPointMode.cpp b/contrib/llvm-project/llvm/lib/Support/FloatingPointMode.cpp
index 9543884ff46e..5a2836eb8243 100644
--- a/contrib/llvm-project/llvm/lib/Support/FloatingPointMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/FloatingPointMode.cpp
@@ -32,7 +32,7 @@ FPClassTest llvm::fneg(FPClassTest Mask) {
return NewMask;
}
-FPClassTest llvm::fabs(FPClassTest Mask) {
+FPClassTest llvm::inverse_fabs(FPClassTest Mask) {
FPClassTest NewMask = Mask & fcNan;
if (Mask & fcPosZero)
NewMask |= fcZero;
@@ -45,6 +45,19 @@ FPClassTest llvm::fabs(FPClassTest Mask) {
return NewMask;
}
+FPClassTest llvm::unknown_sign(FPClassTest Mask) {
+ FPClassTest NewMask = Mask & fcNan;
+ if (Mask & fcZero)
+ NewMask |= fcZero;
+ if (Mask & fcSubnormal)
+ NewMask |= fcSubnormal;
+ if (Mask & fcNormal)
+ NewMask |= fcNormal;
+ if (Mask & fcInf)
+ NewMask |= fcInf;
+ return NewMask;
+}
+
// Every bitfield has a unique name and one or more aliasing names that cover
// multiple bits. Names should be listed in order of preference, with higher
// popcounts listed first.
diff --git a/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp b/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
index b8c6ea80b44c..7004adf461a0 100644
--- a/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
@@ -11,16 +11,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/GlobPattern.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Errc.h"
using namespace llvm;
-static bool hasWildcard(StringRef S) {
- return S.find_first_of("?*[\\") != StringRef::npos;
-}
-
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
static Expected<BitVector> expand(StringRef S, StringRef Original) {
@@ -58,120 +53,198 @@ static Expected<BitVector> expand(StringRef S, StringRef Original) {
return BV;
}
-// This is a scanner for the glob pattern.
-// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
-// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
-// equivalent to "[^<chars>]"), or a non-meta character.
-// This function returns the first token in S.
-static Expected<BitVector> scan(StringRef &S, StringRef Original) {
- switch (S[0]) {
- case '*':
- S = S.substr(1);
- // '*' is represented by an empty bitvector.
- // All other bitvectors are 256-bit long.
- return BitVector();
- case '?':
- S = S.substr(1);
- return BitVector(256, true);
- case '[': {
- // ']' is allowed as the first character of a character class. '[]' is
- // invalid. So, just skip the first character.
- size_t End = S.find(']', 2);
- if (End == StringRef::npos)
- return make_error<StringError>("invalid glob pattern: " + Original,
- errc::invalid_argument);
-
- StringRef Chars = S.substr(1, End - 1);
- S = S.substr(End + 1);
- if (Chars.startswith("^") || Chars.startswith("!")) {
- Expected<BitVector> BV = expand(Chars.substr(1), Original);
- if (!BV)
- return BV.takeError();
- return BV->flip();
+// Identify brace expansions in S and return the list of patterns they expand
+// into.
+static Expected<SmallVector<std::string, 1>>
+parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
+ SmallVector<std::string> SubPatterns = {S.str()};
+ if (!MaxSubPatterns || !S.contains('{'))
+ return std::move(SubPatterns);
+
+ struct BraceExpansion {
+ size_t Start;
+ size_t Length;
+ SmallVector<StringRef, 2> Terms;
+ };
+ SmallVector<BraceExpansion, 0> BraceExpansions;
+
+ BraceExpansion *CurrentBE = nullptr;
+ size_t TermBegin;
+ for (size_t I = 0, E = S.size(); I != E; ++I) {
+ if (S[I] == '[') {
+ I = S.find(']', I + 2);
+ if (I == std::string::npos)
+ return make_error<StringError>("invalid glob pattern, unmatched '['",
+ errc::invalid_argument);
+ } else if (S[I] == '{') {
+ if (CurrentBE)
+ return make_error<StringError>(
+ "nested brace expansions are not supported",
+ errc::invalid_argument);
+ CurrentBE = &BraceExpansions.emplace_back();
+ CurrentBE->Start = I;
+ TermBegin = I + 1;
+ } else if (S[I] == ',') {
+ if (!CurrentBE)
+ continue;
+ CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
+ TermBegin = I + 1;
+ } else if (S[I] == '}') {
+ if (!CurrentBE)
+ continue;
+ if (CurrentBE->Terms.empty())
+ return make_error<StringError>(
+ "empty or singleton brace expansions are not supported",
+ errc::invalid_argument);
+ CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
+ CurrentBE->Length = I - CurrentBE->Start + 1;
+ CurrentBE = nullptr;
+ } else if (S[I] == '\\') {
+ if (++I == E)
+ return make_error<StringError>("invalid glob pattern, stray '\\'",
+ errc::invalid_argument);
}
- return expand(Chars, Original);
}
- case '\\':
- // Eat this character and fall through below to treat it like a non-meta
- // character.
- S = S.substr(1);
- [[fallthrough]];
- default:
- BitVector BV(256, false);
- BV[(uint8_t)S[0]] = true;
- S = S.substr(1);
- return BV;
+ if (CurrentBE)
+ return make_error<StringError>("incomplete brace expansion",
+ errc::invalid_argument);
+
+ size_t NumSubPatterns = 1;
+ for (auto &BE : BraceExpansions) {
+ if (NumSubPatterns > std::numeric_limits<size_t>::max() / BE.Terms.size()) {
+ NumSubPatterns = std::numeric_limits<size_t>::max();
+ break;
+ }
+ NumSubPatterns *= BE.Terms.size();
+ }
+ if (NumSubPatterns > *MaxSubPatterns)
+ return make_error<StringError>("too many brace expansions",
+ errc::invalid_argument);
+ // Replace brace expansions in reverse order so that we don't invalidate
+ // earlier start indices
+ for (auto &BE : reverse(BraceExpansions)) {
+ SmallVector<std::string> OrigSubPatterns;
+ std::swap(SubPatterns, OrigSubPatterns);
+ for (StringRef Term : BE.Terms)
+ for (StringRef Orig : OrigSubPatterns)
+ SubPatterns.emplace_back(Orig).replace(BE.Start, BE.Length, Term);
}
+ return std::move(SubPatterns);
}
-Expected<GlobPattern> GlobPattern::create(StringRef S) {
+Expected<GlobPattern>
+GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
- // S doesn't contain any metacharacter,
- // so the regular string comparison should work.
- if (!hasWildcard(S)) {
- Pat.Exact = S;
- return Pat;
- }
-
- // S is something like "foo*", and the "* is not escaped. We can use
- // startswith().
- if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
- Pat.Prefix = S.drop_back();
+ // Store the prefix that does not contain any metacharacter.
+ size_t PrefixSize = S.find_first_of("?*[{\\");
+ Pat.Prefix = S.substr(0, PrefixSize);
+ if (PrefixSize == std::string::npos)
return Pat;
+ S = S.substr(PrefixSize);
+
+ SmallVector<std::string, 1> SubPats;
+ if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
+ return std::move(Err);
+ for (StringRef SubPat : SubPats) {
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+ if (!SubGlobOrErr)
+ return SubGlobOrErr.takeError();
+ Pat.SubGlobs.push_back(*SubGlobOrErr);
}
- // S is something like "*foo". We can use endswith().
- if (S.startswith("*") && !hasWildcard(S.drop_front())) {
- Pat.Suffix = S.drop_front();
- return Pat;
- }
+ return Pat;
+}
- // Otherwise, we need to do real glob pattern matching.
- // Parse the pattern now.
- StringRef Original = S;
- while (!S.empty()) {
- Expected<BitVector> BV = scan(S, Original);
- if (!BV)
- return BV.takeError();
- Pat.Tokens.push_back(*BV);
+Expected<GlobPattern::SubGlobPattern>
+GlobPattern::SubGlobPattern::create(StringRef S) {
+ SubGlobPattern Pat;
+
+ // Parse brackets.
+ Pat.Pat.assign(S.begin(), S.end());
+ for (size_t I = 0, E = S.size(); I != E; ++I) {
+ if (S[I] == '[') {
+ // ']' is allowed as the first character of a character class. '[]' is
+ // invalid. So, just skip the first character.
+ ++I;
+ size_t J = S.find(']', I + 1);
+ if (J == StringRef::npos)
+ return make_error<StringError>("invalid glob pattern, unmatched '['",
+ errc::invalid_argument);
+ StringRef Chars = S.substr(I, J - I);
+ bool Invert = S[I] == '^' || S[I] == '!';
+ Expected<BitVector> BV =
+ Invert ? expand(Chars.substr(1), S) : expand(Chars, S);
+ if (!BV)
+ return BV.takeError();
+ if (Invert)
+ BV->flip();
+ Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
+ I = J;
+ } else if (S[I] == '\\') {
+ if (++I == E)
+ return make_error<StringError>("invalid glob pattern, stray '\\'",
+ errc::invalid_argument);
+ }
}
return Pat;
}
bool GlobPattern::match(StringRef S) const {
- if (Exact)
- return S == *Exact;
- if (Prefix)
- return S.startswith(*Prefix);
- if (Suffix)
- return S.endswith(*Suffix);
- return matchOne(Tokens, S);
+ if (!S.consume_front(Prefix))
+ return false;
+ if (SubGlobs.empty() && S.empty())
+ return true;
+ for (auto &Glob : SubGlobs)
+ if (Glob.match(S))
+ return true;
+ return false;
}
-// Runs glob pattern Pats against string S.
-bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
- for (;;) {
- if (Pats.empty())
- return S.empty();
-
- // If Pats[0] is '*', try to match Pats[1..] against all possible
- // tail strings of S to see at least one pattern succeeds.
- if (Pats[0].size() == 0) {
- Pats = Pats.slice(1);
- if (Pats.empty())
- // Fast path. If a pattern is '*', it matches anything.
- return true;
- for (size_t I = 0, E = S.size(); I < E; ++I)
- if (matchOne(Pats, S.substr(I)))
- return true;
- return false;
+// Factor the pattern into segments split by '*'. The segment is matched
+// sequentianlly by finding the first occurrence past the end of the previous
+// match.
+bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+ const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
+ *SavedS = S;
+ const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
+ size_t B = 0, SavedB = 0;
+ while (S != End) {
+ if (P == PEnd)
+ ;
+ else if (*P == '*') {
+ // The non-* substring on the left of '*' matches the tail of S. Save the
+ // positions to be used by backtracking if we see a mismatch later.
+ SegmentBegin = ++P;
+ SavedS = S;
+ SavedB = B;
+ continue;
+ } else if (*P == '[') {
+ if (Brackets[B].Bytes[uint8_t(*S)]) {
+ P = Pat.data() + Brackets[B++].NextOffset;
+ ++S;
+ continue;
+ }
+ } else if (*P == '\\') {
+ if (*++P == *S) {
+ ++P;
+ ++S;
+ continue;
+ }
+ } else if (*P == *S || *P == '?') {
+ ++P;
+ ++S;
+ continue;
}
-
- // If Pats[0] is not '*', it must consume one character.
- if (S.empty() || !Pats[0][(uint8_t)S[0]])
+ if (!SegmentBegin)
return false;
- Pats = Pats.slice(1);
- S = S.substr(1);
+ // We have seen a '*'. Backtrack to the saved positions. Shift the S
+ // position to probe the next starting position in the segment.
+ P = SegmentBegin;
+ S = ++SavedS;
+ B = SavedB;
}
+ // All bytes in Str have been matched. Return true if the rest part of Pat is
+ // empty or contains only '*'.
+ return getPat().find_first_not_of('*', P - Pat.data()) == std::string::npos;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp b/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp
index 2b7173b28940..7f475f42f3cb 100644
--- a/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp
@@ -8,6 +8,8 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/AutoConvert.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PrettyStackTrace.h"
@@ -15,15 +17,31 @@
#include "llvm/Support/SwapByteOrder.h"
#ifdef _WIN32
-#include "llvm/Support/Error.h"
#include "llvm/Support/Windows/WindowsSupport.h"
#endif
+#ifdef __MVS__
+#include <unistd.h>
+
+void CleanupStdHandles(void *Cookie) {
+ llvm::raw_ostream *Outs = &llvm::outs(), *Errs = &llvm::errs();
+ Outs->flush();
+ Errs->flush();
+ llvm::restoreStdHandleAutoConversion(STDIN_FILENO);
+ llvm::restoreStdHandleAutoConversion(STDOUT_FILENO);
+ llvm::restoreStdHandleAutoConversion(STDERR_FILENO);
+}
+#endif
+
using namespace llvm;
using namespace llvm::sys;
InitLLVM::InitLLVM(int &Argc, const char **&Argv,
bool InstallPipeSignalExitHandler) {
+#ifdef __MVS__
+ // Bring stdin/stdout/stderr into a known state.
+ sys::AddSignalHandler(CleanupStdHandles, nullptr);
+#endif
if (InstallPipeSignalExitHandler)
// The pipe signal handler must be installed before any other handlers are
// registered. This is because the Unix \ref RegisterHandlers function does
@@ -37,6 +55,20 @@ InitLLVM::InitLLVM(int &Argc, const char **&Argv,
sys::PrintStackTraceOnErrorSignal(Argv[0]);
install_out_of_memory_new_handler();
+#ifdef __MVS__
+
+ // We use UTF-8 as the internal character encoding. On z/OS, all external
+ // output is encoded in EBCDIC. In order to be able to read all
+ // error messages, we turn conversion to EBCDIC on for stderr fd.
+ std::string Banner = std::string(Argv[0]) + ": ";
+ ExitOnError ExitOnErr(Banner);
+
+ // If turning on conversion for stderr fails then the error message
+ // may be garbled. There is no solution to this problem.
+ ExitOnErr(errorCodeToError(llvm::enableAutoConversion(STDERR_FILENO)));
+ ExitOnErr(errorCodeToError(llvm::enableAutoConversion(STDOUT_FILENO)));
+#endif
+
#ifdef _WIN32
// We use UTF-8 as the internal character encoding. On Windows,
// arguments passed to main() may not be encoded in UTF-8. In order
@@ -61,4 +93,9 @@ InitLLVM::InitLLVM(int &Argc, const char **&Argv,
#endif
}
-InitLLVM::~InitLLVM() { llvm_shutdown(); }
+InitLLVM::~InitLLVM() {
+#ifdef __MVS__
+ CleanupStdHandles(nullptr);
+#endif
+ llvm_shutdown();
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp b/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
index 097c22d33dd1..770e4051ca3f 100644
--- a/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
@@ -85,6 +85,18 @@ KnownBits KnownBits::computeForAddSub(bool Add, bool NSW,
return KnownOut;
}
+KnownBits KnownBits::computeForSubBorrow(const KnownBits &LHS, KnownBits RHS,
+ const KnownBits &Borrow) {
+ assert(Borrow.getBitWidth() == 1 && "Borrow must be 1-bit");
+
+ // LHS - RHS = LHS + ~RHS + 1
+ // Carry 1 - Borrow in ::computeForAddCarry
+ std::swap(RHS.Zero, RHS.One);
+ return ::computeForAddCarry(LHS, RHS,
+ /*CarryZero=*/Borrow.One.getBoolValue(),
+ /*CarryOne=*/Borrow.Zero.getBoolValue());
+}
+
KnownBits KnownBits::sextInReg(unsigned SrcBitWidth) const {
unsigned BitWidth = getBitWidth();
assert(0 < SrcBitWidth && SrcBitWidth <= BitWidth &&
diff --git a/contrib/llvm-project/llvm/lib/Support/PGOOptions.cpp b/contrib/llvm-project/llvm/lib/Support/PGOOptions.cpp
index 04d50cc70d91..7e57b52e4ba2 100644
--- a/contrib/llvm-project/llvm/lib/Support/PGOOptions.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/PGOOptions.cpp
@@ -16,13 +16,14 @@ PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
std::string MemoryProfile,
IntrusiveRefCntPtr<vfs::FileSystem> FS, PGOAction Action,
CSPGOAction CSAction, bool DebugInfoForProfiling,
- bool PseudoProbeForProfiling)
+ bool PseudoProbeForProfiling, bool AtomicCounterUpdate)
: ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
ProfileRemappingFile(ProfileRemappingFile), MemoryProfile(MemoryProfile),
Action(Action), CSAction(CSAction),
DebugInfoForProfiling(DebugInfoForProfiling ||
(Action == SampleUse && !PseudoProbeForProfiling)),
- PseudoProbeForProfiling(PseudoProbeForProfiling), FS(std::move(FS)) {
+ PseudoProbeForProfiling(PseudoProbeForProfiling),
+ AtomicCounterUpdate(AtomicCounterUpdate), FS(std::move(FS)) {
// Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
// callback with IRUse action without ProfileFile.
diff --git a/contrib/llvm-project/llvm/lib/Support/Path.cpp b/contrib/llvm-project/llvm/lib/Support/Path.cpp
index 7a57c104ef10..c2456dcac097 100644
--- a/contrib/llvm-project/llvm/lib/Support/Path.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Path.cpp
@@ -263,7 +263,7 @@ const_iterator &const_iterator::operator++() {
// Root dir.
if (was_net ||
// c:/
- (is_style_windows(S) && Component.endswith(":"))) {
+ (is_style_windows(S) && Component.ends_with(":"))) {
Component = Path.substr(Position, 1);
return *this;
}
@@ -352,7 +352,7 @@ StringRef root_path(StringRef path, Style style) {
if (b != e) {
bool has_net =
b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
- bool has_drive = is_style_windows(style) && b->endswith(":");
+ bool has_drive = is_style_windows(style) && b->ends_with(":");
if (has_net || has_drive) {
if ((++pos != e) && is_separator((*pos)[0], style)) {
@@ -377,7 +377,7 @@ StringRef root_name(StringRef path, Style style) {
if (b != e) {
bool has_net =
b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
- bool has_drive = is_style_windows(style) && b->endswith(":");
+ bool has_drive = is_style_windows(style) && b->ends_with(":");
if (has_net || has_drive) {
// just {C:,//net}, return the first component.
@@ -394,7 +394,7 @@ StringRef root_directory(StringRef path, Style style) {
if (b != e) {
bool has_net =
b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
- bool has_drive = is_style_windows(style) && b->endswith(":");
+ bool has_drive = is_style_windows(style) && b->ends_with(":");
if ((has_net || has_drive) &&
// {C:,//net}, skip to the next component.
@@ -514,7 +514,7 @@ static bool starts_with(StringRef Path, StringRef Prefix,
}
return true;
}
- return Path.startswith(Prefix);
+ return Path.starts_with(Prefix);
}
bool replace_path_prefix(SmallVectorImpl<char> &Path, StringRef OldPrefix,
diff --git a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
index 70fab8010831..bbbaf26a7bd4 100644
--- a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
@@ -59,6 +59,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"i", RISCVExtensionVersion{2, 1}},
{"m", RISCVExtensionVersion{2, 0}},
+ {"smaia", RISCVExtensionVersion{1, 0}},
+ {"ssaia", RISCVExtensionVersion{1, 0}},
{"svinval", RISCVExtensionVersion{1, 0}},
{"svnapot", RISCVExtensionVersion{1, 0}},
{"svpbmt", RISCVExtensionVersion{1, 0}},
@@ -66,10 +68,19 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"v", RISCVExtensionVersion{1, 0}},
// vendor-defined ('X') extensions
+ {"xcvalu", RISCVExtensionVersion{1, 0}},
+ {"xcvbi", RISCVExtensionVersion{1, 0}},
{"xcvbitmanip", RISCVExtensionVersion{1, 0}},
+ {"xcvelw", RISCVExtensionVersion{1, 0}},
{"xcvmac", RISCVExtensionVersion{1, 0}},
+ {"xcvmem", RISCVExtensionVersion{1, 0}},
+ {"xcvsimd", RISCVExtensionVersion{1, 0}},
{"xsfcie", RISCVExtensionVersion{1, 0}},
{"xsfvcp", RISCVExtensionVersion{1, 0}},
+ {"xsfvfnrclipxfqf", RISCVExtensionVersion{1, 0}},
+ {"xsfvfwmaccqqq", RISCVExtensionVersion{1, 0}},
+ {"xsfvqmaccdod", RISCVExtensionVersion{1, 0}},
+ {"xsfvqmaccqoq", RISCVExtensionVersion{1, 0}},
{"xtheadba", RISCVExtensionVersion{1, 0}},
{"xtheadbb", RISCVExtensionVersion{1, 0}},
{"xtheadbs", RISCVExtensionVersion{1, 0}},
@@ -103,6 +114,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"zdinx", RISCVExtensionVersion{1, 0}},
+ {"zfa", RISCVExtensionVersion{1, 0}},
{"zfh", RISCVExtensionVersion{1, 0}},
{"zfhmin", RISCVExtensionVersion{1, 0}},
{"zfinx", RISCVExtensionVersion{1, 0}},
@@ -113,11 +125,12 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"zicbom", RISCVExtensionVersion{1, 0}},
{"zicbop", RISCVExtensionVersion{1, 0}},
{"zicboz", RISCVExtensionVersion{1, 0}},
- {"zicntr", RISCVExtensionVersion{1, 0}},
+ {"zicntr", RISCVExtensionVersion{2, 0}},
{"zicsr", RISCVExtensionVersion{2, 0}},
{"zifencei", RISCVExtensionVersion{2, 0}},
+ {"zihintntl", RISCVExtensionVersion{1, 0}},
{"zihintpause", RISCVExtensionVersion{2, 0}},
- {"zihpm", RISCVExtensionVersion{1, 0}},
+ {"zihpm", RISCVExtensionVersion{2, 0}},
{"zk", RISCVExtensionVersion{1, 0}},
{"zkn", RISCVExtensionVersion{1, 0}},
@@ -139,6 +152,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"zve64x", RISCVExtensionVersion{1, 0}},
{"zvfh", RISCVExtensionVersion{1, 0}},
+ {"zvfhmin", RISCVExtensionVersion{1, 0}},
{"zvl1024b", RISCVExtensionVersion{1, 0}},
{"zvl128b", RISCVExtensionVersion{1, 0}},
@@ -156,27 +170,23 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
// NOTE: This table should be sorted alphabetically by extension name.
static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
- {"smaia", RISCVExtensionVersion{1, 0}},
- {"ssaia", RISCVExtensionVersion{1, 0}},
-
{"zacas", RISCVExtensionVersion{1, 0}},
- {"zfa", RISCVExtensionVersion{0, 2}},
- {"zfbfmin", RISCVExtensionVersion{0, 6}},
+ {"zfbfmin", RISCVExtensionVersion{0, 8}},
+ {"zicfilp", RISCVExtensionVersion{0, 4}},
{"zicond", RISCVExtensionVersion{1, 0}},
- {"zihintntl", RISCVExtensionVersion{0, 2}},
-
{"ztso", RISCVExtensionVersion{0, 1}},
{"zvbb", RISCVExtensionVersion{1, 0}},
{"zvbc", RISCVExtensionVersion{1, 0}},
- {"zvfbfmin", RISCVExtensionVersion{0, 6}},
- {"zvfbfwma", RISCVExtensionVersion{0, 6}},
+ {"zvfbfmin", RISCVExtensionVersion{0, 8}},
+ {"zvfbfwma", RISCVExtensionVersion{0, 8}},
// vector crypto
+ {"zvkb", RISCVExtensionVersion{1, 0}},
{"zvkg", RISCVExtensionVersion{1, 0}},
{"zvkn", RISCVExtensionVersion{1, 0}},
{"zvknc", RISCVExtensionVersion{1, 0}},
@@ -205,6 +215,41 @@ static void verifyTables() {
#endif
}
+static void PrintExtension(const std::string Name, const std::string Version,
+ const std::string Description) {
+ outs() << " "
+ << format(Description.empty() ? "%-20s%s\n" : "%-20s%-10s%s\n",
+ Name.c_str(), Version.c_str(), Description.c_str());
+}
+
+void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) {
+
+ outs() << "All available -march extensions for RISC-V\n\n";
+ PrintExtension("Name", "Version", (DescMap.empty() ? "" : "Description"));
+
+ RISCVISAInfo::OrderedExtensionMap ExtMap;
+ for (const auto &E : SupportedExtensions)
+ ExtMap[E.Name] = {E.Version.Major, E.Version.Minor};
+ for (const auto &E : ExtMap) {
+ std::string Version = std::to_string(E.second.MajorVersion) + "." +
+ std::to_string(E.second.MinorVersion);
+ PrintExtension(E.first, Version, DescMap[E.first].str());
+ }
+
+ outs() << "\nExperimental extensions\n";
+ ExtMap.clear();
+ for (const auto &E : SupportedExperimentalExtensions)
+ ExtMap[E.Name] = {E.Version.Major, E.Version.Minor};
+ for (const auto &E : ExtMap) {
+ std::string Version = std::to_string(E.second.MajorVersion) + "." +
+ std::to_string(E.second.MinorVersion);
+ PrintExtension(E.first, Version, DescMap["experimental-" + E.first].str());
+ }
+
+ outs() << "\nUse -march to specify the target's extension.\n"
+ "For example, clang -march=rv32i_v1p0\n";
+}
+
static bool stripExperimentalPrefix(StringRef &Ext) {
return Ext.consume_front("experimental-");
}
@@ -267,21 +312,21 @@ void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion,
}
static StringRef getExtensionTypeDesc(StringRef Ext) {
- if (Ext.startswith("s"))
+ if (Ext.starts_with("s"))
return "standard supervisor-level extension";
- if (Ext.startswith("x"))
+ if (Ext.starts_with("x"))
return "non-standard user-level extension";
- if (Ext.startswith("z"))
+ if (Ext.starts_with("z"))
return "standard user-level extension";
return StringRef();
}
static StringRef getExtensionType(StringRef Ext) {
- if (Ext.startswith("s"))
+ if (Ext.starts_with("s"))
return "s";
- if (Ext.startswith("x"))
+ if (Ext.starts_with("x"))
return "x";
- if (Ext.startswith("z"))
+ if (Ext.starts_with("z"))
return "z";
return StringRef();
}
@@ -498,7 +543,7 @@ static Error getExtensionVersion(StringRef Ext, StringRef In, unsigned &Major,
return createStringError(errc::invalid_argument, Error);
}
- // If experimental extension, require use of current version number number
+ // If experimental extension, require use of current version number
if (auto ExperimentalExtension = isExperimentalExtension(Ext)) {
if (!EnableExperimentalExtension) {
std::string Error = "requires '-menable-experimental-extensions' for "
@@ -597,9 +642,9 @@ RISCVISAInfo::parseNormalizedArchString(StringRef Arch) {
}
// Must start with a valid base ISA name.
unsigned XLen;
- if (Arch.startswith("rv32i") || Arch.startswith("rv32e"))
+ if (Arch.starts_with("rv32i") || Arch.starts_with("rv32e"))
XLen = 32;
- else if (Arch.startswith("rv64i") || Arch.startswith("rv64e"))
+ else if (Arch.starts_with("rv64i") || Arch.starts_with("rv64e"))
XLen = 64;
else
return createStringError(errc::invalid_argument,
@@ -660,9 +705,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
"string must be lowercase");
}
- bool HasRV64 = Arch.startswith("rv64");
+ bool HasRV64 = Arch.starts_with("rv64");
// ISA string must begin with rv32 or rv64.
- if (!(Arch.startswith("rv32") || HasRV64) || (Arch.size() < 5)) {
+ if (!(Arch.starts_with("rv32") || HasRV64) || (Arch.size() < 5)) {
return createStringError(
errc::invalid_argument,
"string must begin with rv32{i,e,g} or rv64{i,e,g}");
@@ -943,10 +988,6 @@ Error RISCVISAInfo::checkDependency() {
return createStringError(errc::invalid_argument,
"'zcf' is only supported for 'rv32'");
- // Additional dependency checks.
- // TODO: The 'q' extension requires rv64.
- // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'.
-
return Error::success();
}
@@ -955,17 +996,21 @@ static const char *ImpliedExtsF[] = {"zicsr"};
static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"};
static const char *ImpliedExtsXTHeadVdot[] = {"v"};
static const char *ImpliedExtsXsfvcp[] = {"zve32x"};
+static const char *ImpliedExtsXsfvfnrclipxfqf[] = {"zve32f"};
+static const char *ImpliedExtsXsfvfwmaccqqq[] = {"zve32f", "zvfbfmin"};
+static const char *ImpliedExtsXsfvqmaccdod[] = {"zve32x"};
+static const char *ImpliedExtsXsfvqmaccqoq[] = {"zve32x"};
static const char *ImpliedExtsZacas[] = {"a"};
static const char *ImpliedExtsZcb[] = {"zca"};
-static const char *ImpliedExtsZcd[] = {"zca"};
+static const char *ImpliedExtsZcd[] = {"d", "zca"};
static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"};
-static const char *ImpliedExtsZcf[] = {"zca"};
+static const char *ImpliedExtsZcf[] = {"f", "zca"};
static const char *ImpliedExtsZcmp[] = {"zca"};
static const char *ImpliedExtsZcmt[] = {"zca"};
static const char *ImpliedExtsZdinx[] = {"zfinx"};
static const char *ImpliedExtsZfa[] = {"f"};
static const char *ImpliedExtsZfbfmin[] = {"f"};
-static const char *ImpliedExtsZfh[] = {"f"};
+static const char *ImpliedExtsZfh[] = {"zfhmin"};
static const char *ImpliedExtsZfhmin[] = {"f"};
static const char *ImpliedExtsZfinx[] = {"zicsr"};
static const char *ImpliedExtsZhinx[] = {"zfinx"};
@@ -976,19 +1021,21 @@ static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx",
"zkne", "zknd", "zknh"};
static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"};
+static const char *ImpliedExtsZvbb[] = {"zvkb"};
static const char *ImpliedExtsZve32f[] = {"zve32x", "f"};
static const char *ImpliedExtsZve32x[] = {"zvl32b", "zicsr"};
static const char *ImpliedExtsZve64d[] = {"zve64f", "d"};
static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
-static const char *ImpliedExtsZvfbfmin[] = {"zve32f"};
-static const char *ImpliedExtsZvfbfwma[] = {"zve32f"};
+static const char *ImpliedExtsZvfbfmin[] = {"zve32f", "zfbfmin"};
+static const char *ImpliedExtsZvfbfwma[] = {"zvfbfmin"};
static const char *ImpliedExtsZvfh[] = {"zve32f", "zfhmin"};
-static const char *ImpliedExtsZvkn[] = {"zvbb", "zvkned", "zvknhb", "zvkt"};
+static const char *ImpliedExtsZvfhmin[] = {"zve32f"};
+static const char *ImpliedExtsZvkn[] = {"zvkb", "zvkned", "zvknhb", "zvkt"};
static const char *ImpliedExtsZvknc[] = {"zvbc", "zvkn"};
static const char *ImpliedExtsZvkng[] = {"zvkg", "zvkn"};
-static const char *ImpliedExtsZvknhb[] = {"zvknha"};
-static const char *ImpliedExtsZvks[] = {"zvbb", "zvksed", "zvksh", "zvkt"};
+static const char *ImpliedExtsZvknhb[] = {"zve64x"};
+static const char *ImpliedExtsZvks[] = {"zvkb", "zvksed", "zvksh", "zvkt"};
static const char *ImpliedExtsZvksc[] = {"zvbc", "zvks"};
static const char *ImpliedExtsZvksg[] = {"zvkg", "zvks"};
static const char *ImpliedExtsZvl1024b[] = {"zvl512b"};
@@ -1020,6 +1067,10 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"f"}, {ImpliedExtsF}},
{{"v"}, {ImpliedExtsV}},
{{"xsfvcp"}, {ImpliedExtsXsfvcp}},
+ {{"xsfvfnrclipxfqf"}, {ImpliedExtsXsfvfnrclipxfqf}},
+ {{"xsfvfwmaccqqq"}, {ImpliedExtsXsfvfwmaccqqq}},
+ {{"xsfvqmaccdod"}, {ImpliedExtsXsfvqmaccdod}},
+ {{"xsfvqmaccqoq"}, {ImpliedExtsXsfvqmaccqoq}},
{{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}},
{{"zacas"}, {ImpliedExtsZacas}},
{{"zcb"}, {ImpliedExtsZcb}},
@@ -1041,6 +1092,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"zk"}, {ImpliedExtsZk}},
{{"zkn"}, {ImpliedExtsZkn}},
{{"zks"}, {ImpliedExtsZks}},
+ {{"zvbb"}, {ImpliedExtsZvbb}},
{{"zve32f"}, {ImpliedExtsZve32f}},
{{"zve32x"}, {ImpliedExtsZve32x}},
{{"zve64d"}, {ImpliedExtsZve64d}},
@@ -1049,6 +1101,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"zvfbfmin"}, {ImpliedExtsZvfbfmin}},
{{"zvfbfwma"}, {ImpliedExtsZvfbfwma}},
{{"zvfh"}, {ImpliedExtsZvfh}},
+ {{"zvfhmin"}, {ImpliedExtsZvfhmin}},
{{"zvkn"}, {ImpliedExtsZvkn}},
{{"zvknc"}, {ImpliedExtsZvknc}},
{{"zvkng"}, {ImpliedExtsZvkng}},
@@ -1239,15 +1292,56 @@ StringRef RISCVISAInfo::computeDefaultABI() const {
if (XLen == 32) {
if (hasExtension("d"))
return "ilp32d";
+ if (hasExtension("f"))
+ return "ilp32f";
if (hasExtension("e"))
return "ilp32e";
return "ilp32";
} else if (XLen == 64) {
if (hasExtension("d"))
return "lp64d";
+ if (hasExtension("f"))
+ return "lp64f";
if (hasExtension("e"))
return "lp64e";
return "lp64";
}
llvm_unreachable("Invalid XLEN");
}
+
+bool RISCVISAInfo::isSupportedExtensionWithVersion(StringRef Ext) {
+ if (Ext.empty())
+ return false;
+
+ auto Pos = findLastNonVersionCharacter(Ext) + 1;
+ StringRef Name = Ext.substr(0, Pos);
+ StringRef Vers = Ext.substr(Pos);
+ if (Vers.empty())
+ return false;
+
+ unsigned Major, Minor, ConsumeLength;
+ if (auto E = getExtensionVersion(Name, Vers, Major, Minor, ConsumeLength,
+ true, true)) {
+ consumeError(std::move(E));
+ return false;
+ }
+
+ return true;
+}
+
+std::string RISCVISAInfo::getTargetFeatureForExtension(StringRef Ext) {
+ if (Ext.empty())
+ return std::string();
+
+ auto Pos = findLastNonVersionCharacter(Ext) + 1;
+ StringRef Name = Ext.substr(0, Pos);
+
+ if (Pos != Ext.size() && !isSupportedExtensionWithVersion(Ext))
+ return std::string();
+
+ if (!isSupportedExtension(Name))
+ return std::string();
+
+ return isExperimentalExtension(Name) ? "experimental-" + Name.str()
+ : Name.str();
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/Regex.cpp b/contrib/llvm-project/llvm/lib/Support/Regex.cpp
index dfbd373e4a98..5eedf95c48e3 100644
--- a/contrib/llvm-project/llvm/lib/Support/Regex.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Regex.cpp
@@ -92,6 +92,10 @@ bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
+ // Update null string to empty string.
+ if (String.data() == nullptr)
+ String = "";
+
// pmatch needs to have at least one element.
SmallVector<llvm_regmatch_t, 8> pm;
pm.resize(nmatch > 0 ? nmatch : 1);
@@ -163,6 +167,25 @@ std::string Regex::sub(StringRef Repl, StringRef String,
// FIXME: We should have a StringExtras function for mapping C99 escapes.
switch (Repl[0]) {
+
+ // Backreference with the "\g<ref>" syntax
+ case 'g':
+ if (Repl.size() >= 4 && Repl[1] == '<') {
+ size_t End = Repl.find('>');
+ StringRef Ref = Repl.slice(2, End);
+ unsigned RefValue;
+ if (End != StringRef::npos && !Ref.getAsInteger(10, RefValue)) {
+ Repl = Repl.substr(End + 1);
+ if (RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error =
+ ("invalid backreference string 'g<" + Twine(Ref) + ">'").str();
+ break;
+ }
+ }
+ [[fallthrough]];
+
// Treat all unrecognized characters as self-quoting.
default:
Res += Repl[0];
diff --git a/contrib/llvm-project/llvm/lib/Support/SHA1.cpp b/contrib/llvm-project/llvm/lib/Support/SHA1.cpp
index 7e66063b0760..412f894492bf 100644
--- a/contrib/llvm-project/llvm/lib/Support/SHA1.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/SHA1.cpp
@@ -270,7 +270,7 @@ void SHA1::final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult) {
} else {
// Swap byte order back
for (int i = 0; i < 5; i++) {
- HashResult[i] = sys::getSwappedBytes(InternalState.State[i]);
+ HashResult[i] = llvm::byteswap(InternalState.State[i]);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Support/SHA256.cpp b/contrib/llvm-project/llvm/lib/Support/SHA256.cpp
index b1c07f83b68f..a8019002499c 100644
--- a/contrib/llvm-project/llvm/lib/Support/SHA256.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/SHA256.cpp
@@ -250,7 +250,7 @@ void SHA256::final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult) {
} else {
// Swap byte order back
for (int i = 0; i < 8; i++) {
- HashResult[i] = sys::getSwappedBytes(InternalState.State[i]);
+ HashResult[i] = llvm::byteswap(InternalState.State[i]);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Signals.cpp b/contrib/llvm-project/llvm/lib/Support/Signals.cpp
index c681266f03b7..669a9e2a8396 100644
--- a/contrib/llvm-project/llvm/lib/Support/Signals.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Signals.cpp
@@ -72,6 +72,7 @@ void llvm::initSignalsOptions() {
constexpr char DisableSymbolizationEnv[] = "LLVM_DISABLE_SYMBOLIZATION";
constexpr char LLVMSymbolizerPathEnv[] = "LLVM_SYMBOLIZER_PATH";
+constexpr char EnableSymbolizerMarkupEnv[] = "LLVM_ENABLE_SYMBOLIZER_MARKUP";
// Callbacks to run in signal handler must be lock-free because a signal handler
// could be running as we add new callbacks. We don't add unbounded numbers of
@@ -237,12 +238,12 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
if (FunctionName.empty())
break;
PrintLineHeader();
- if (!FunctionName.startswith("??"))
+ if (!FunctionName.starts_with("??"))
OS << FunctionName << ' ';
if (CurLine == Lines.end())
return false;
StringRef FileLineInfo = *CurLine++;
- if (!FileLineInfo.startswith("??"))
+ if (!FileLineInfo.starts_with("??"))
OS << FileLineInfo;
else
OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
@@ -252,6 +253,25 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
return true;
}
+static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName);
+
+LLVM_ATTRIBUTE_USED
+static bool printMarkupStackTrace(StringRef Argv0, void **StackTrace, int Depth,
+ raw_ostream &OS) {
+ const char *Env = getenv(EnableSymbolizerMarkupEnv);
+ if (!Env || !*Env)
+ return false;
+
+ std::string MainExecutableName =
+ sys::fs::exists(Argv0) ? std::string(Argv0)
+ : sys::fs::getMainExecutable(nullptr, nullptr);
+ if (!printMarkupContext(OS, MainExecutableName.c_str()))
+ return false;
+ for (int I = 0; I < Depth; I++)
+ OS << format("{{{bt:%d:%#016x}}}\n", I, StackTrace[I]);
+ return true;
+}
+
// Include the platform-specific parts of this class.
#ifdef LLVM_ON_UNIX
#include "Unix/Signals.inc"
diff --git a/contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp
index 8065f0ad663a..ebeff87c3954 100644
--- a/contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp
@@ -117,7 +117,7 @@ unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const {
return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1;
}
-/// Look up a given \p Ptr in in the buffer, determining which line it came
+/// Look up a given \p Ptr in the buffer, determining which line it came
/// from.
unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
size_t Sz = Buffer->getBufferSize();
diff --git a/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp b/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp
index 64f66e0f8179..7a23421eaeb8 100644
--- a/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp
@@ -14,58 +14,70 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/SpecialCaseList.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
#include "llvm/Support/VirtualFileSystem.h"
+#include <stdio.h>
#include <string>
#include <system_error>
#include <utility>
-#include <stdio.h>
namespace llvm {
-bool SpecialCaseList::Matcher::insert(std::string Regexp,
- unsigned LineNumber,
- std::string &REError) {
- if (Regexp.empty()) {
- REError = "Supplied regexp was blank";
- return false;
- }
+Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
+ bool UseGlobs) {
+ if (Pattern.empty())
+ return createStringError(errc::invalid_argument,
+ Twine("Supplied ") +
+ (UseGlobs ? "glob" : "regex") + " was blank");
+
+ if (!UseGlobs) {
+ // Replace * with .*
+ auto Regexp = Pattern.str();
+ for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
+ pos += strlen(".*")) {
+ Regexp.replace(pos, strlen("*"), ".*");
+ }
- if (Regex::isLiteralERE(Regexp)) {
- Strings[Regexp] = LineNumber;
- return true;
- }
+ Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
- // Replace * with .*
- for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
- pos += strlen(".*")) {
- Regexp.replace(pos, strlen("*"), ".*");
- }
+ // Check that the regexp is valid.
+ Regex CheckRE(Regexp);
+ std::string REError;
+ if (!CheckRE.isValid(REError))
+ return createStringError(errc::invalid_argument, REError);
- Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
+ RegExes.emplace_back(std::make_pair(
+ std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
- // Check that the regexp is valid.
- Regex CheckRE(Regexp);
- if (!CheckRE.isValid(REError))
- return false;
+ return Error::success();
+ }
- RegExes.emplace_back(
- std::make_pair(std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
- return true;
+ auto [It, DidEmplace] = Globs.try_emplace(Pattern);
+ if (DidEmplace) {
+ // We must be sure to use the string in the map rather than the provided
+ // reference which could be destroyed before match() is called
+ Pattern = It->getKey();
+ auto &Pair = It->getValue();
+ if (auto Err = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024)
+ .moveInto(Pair.first))
+ return Err;
+ Pair.second = LineNumber;
+ }
+ return Error::success();
}
unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
- auto It = Strings.find(Query);
- if (It != Strings.end())
- return It->second;
- for (const auto &RegExKV : RegExes)
- if (RegExKV.first->match(Query))
- return RegExKV.second;
+ for (const auto &[Pattern, Pair] : Globs)
+ if (Pair.first.match(Query))
+ return Pair.second;
+ for (const auto &[Regex, LineNumber] : RegExes)
+ if (Regex->match(Query))
+ return LineNumber;
return 0;
}
+// TODO: Refactor this to return Expected<...>
std::unique_ptr<SpecialCaseList>
SpecialCaseList::create(const std::vector<std::string> &Paths,
llvm::vfs::FileSystem &FS, std::string &Error) {
@@ -94,7 +106,6 @@ SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
vfs::FileSystem &VFS, std::string &Error) {
- StringMap<size_t> Sections;
for (const auto &Path : Paths) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
VFS.getBufferForFile(Path);
@@ -103,7 +114,7 @@ bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
return false;
}
std::string ParseError;
- if (!parse(FileOrErr.get().get(), Sections, ParseError)) {
+ if (!parse(FileOrErr.get().get(), ParseError)) {
Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
return false;
}
@@ -113,82 +124,78 @@ bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
std::string &Error) {
- StringMap<size_t> Sections;
- if (!parse(MB, Sections, Error))
+ if (!parse(MB, Error))
return false;
return true;
}
-bool SpecialCaseList::parse(const MemoryBuffer *MB,
- StringMap<size_t> &SectionsMap,
- std::string &Error) {
- // Iterate through each line in the exclusion list file.
- SmallVector<StringRef, 16> Lines;
- MB->getBuffer().split(Lines, '\n');
+Expected<SpecialCaseList::Section *>
+SpecialCaseList::addSection(StringRef SectionStr, unsigned LineNo,
+ bool UseGlobs) {
+ auto [It, DidEmplace] = Sections.try_emplace(SectionStr);
+ auto &Section = It->getValue();
+ if (DidEmplace)
+ if (auto Err = Section.SectionMatcher->insert(SectionStr, LineNo, UseGlobs))
+ return createStringError(errc::invalid_argument,
+ "malformed section at line " + Twine(LineNo) +
+ ": '" + SectionStr +
+ "': " + toString(std::move(Err)));
+ return &Section;
+}
- unsigned LineNo = 1;
- StringRef Section = "*";
+bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
+ Section *CurrentSection;
+ if (auto Err = addSection("*", 1).moveInto(CurrentSection)) {
+ Error = toString(std::move(Err));
+ return false;
+ }
- for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) {
- *I = I->trim();
- // Ignore empty lines and lines starting with "#"
- if (I->empty() || I->startswith("#"))
+ // In https://reviews.llvm.org/D154014 we added glob support and planned to
+ // remove regex support in patterns. We temporarily support the original
+ // behavior using regexes if "#!special-case-list-v1" is the first line of the
+ // file. For more details, see
+ // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
+ bool UseGlobs = !MB->getBuffer().starts_with("#!special-case-list-v1\n");
+
+ for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
+ !LineIt.is_at_eof(); LineIt++) {
+ unsigned LineNo = LineIt.line_number();
+ StringRef Line = LineIt->trim();
+ if (Line.empty())
continue;
// Save section names
- if (I->startswith("[")) {
- if (!I->endswith("]")) {
- Error = (Twine("malformed section header on line ") + Twine(LineNo) +
- ": " + *I).str();
- return false;
- }
-
- Section = I->slice(1, I->size() - 1);
-
- std::string REError;
- Regex CheckRE(Section);
- if (!CheckRE.isValid(REError)) {
+ if (Line.starts_with("[")) {
+ if (!Line.ends_with("]")) {
Error =
- (Twine("malformed regex for section ") + Section + ": '" + REError)
+ ("malformed section header on line " + Twine(LineNo) + ": " + Line)
.str();
return false;
}
+ if (auto Err = addSection(Line.drop_front().drop_back(), LineNo, UseGlobs)
+ .moveInto(CurrentSection)) {
+ Error = toString(std::move(Err));
+ return false;
+ }
continue;
}
- // Get our prefix and unparsed regexp.
- std::pair<StringRef, StringRef> SplitLine = I->split(":");
- StringRef Prefix = SplitLine.first;
- if (SplitLine.second.empty()) {
+ // Get our prefix and unparsed glob.
+ auto [Prefix, Postfix] = Line.split(":");
+ if (Postfix.empty()) {
// Missing ':' in the line.
- Error = (Twine("malformed line ") + Twine(LineNo) + ": '" +
- SplitLine.first + "'").str();
+ Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
return false;
}
- std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
- std::string Regexp = std::string(SplitRegexp.first);
- StringRef Category = SplitRegexp.second;
-
- // Create this section if it has not been seen before.
- if (!SectionsMap.contains(Section)) {
- std::unique_ptr<Matcher> M = std::make_unique<Matcher>();
- std::string REError;
- if (!M->insert(std::string(Section), LineNo, REError)) {
- Error = (Twine("malformed section ") + Section + ": '" + REError).str();
- return false;
- }
-
- SectionsMap[Section] = Sections.size();
- Sections.emplace_back(std::move(M));
- }
-
- auto &Entry = Sections[SectionsMap[Section]].Entries[Prefix][Category];
- std::string REError;
- if (!Entry.insert(std::move(Regexp), LineNo, REError)) {
- Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" +
- SplitLine.second + "': " + REError).str();
+ auto [Pattern, Category] = Postfix.split("=");
+ auto &Entry = CurrentSection->Entries[Prefix][Category];
+ if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
+ Error =
+ (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
+ Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
+ .str();
return false;
}
}
@@ -205,13 +212,14 @@ bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
StringRef Query,
StringRef Category) const {
- for (const auto &SectionIter : Sections)
- if (SectionIter.SectionMatcher->match(Section)) {
- unsigned Blame =
- inSectionBlame(SectionIter.Entries, Prefix, Query, Category);
+ for (const auto &It : Sections) {
+ const auto &S = It.getValue();
+ if (S.SectionMatcher->match(Section)) {
+ unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category);
if (Blame)
return Blame;
}
+ }
return 0;
}
@@ -226,4 +234,4 @@ unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
return II->getValue().match(Query);
}
-} // namespace llvm
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Support/StringExtras.cpp b/contrib/llvm-project/llvm/lib/Support/StringExtras.cpp
index 5683d7005584..6ae26267337b 100644
--- a/contrib/llvm-project/llvm/lib/Support/StringExtras.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/StringExtras.cpp
@@ -98,15 +98,16 @@ std::string llvm::convertToSnakeFromCamelCase(StringRef input) {
std::string snakeCase;
snakeCase.reserve(input.size());
- for (char c : input) {
- if (!std::isupper(c)) {
- snakeCase.push_back(c);
- continue;
- }
-
- if (!snakeCase.empty() && snakeCase.back() != '_')
+ auto check = [&input](size_t j, function_ref<bool(int)> predicate) {
+ return j < input.size() && predicate(input[j]);
+ };
+ for (size_t i = 0; i < input.size(); ++i) {
+ snakeCase.push_back(tolower(input[i]));
+ // Handles "runs" of capitals, such as in OPName -> op_name.
+ if (check(i, isupper) && check(i + 1, isupper) && check(i + 2, islower))
+ snakeCase.push_back('_');
+ if ((check(i, islower) || check(i, isdigit)) && check(i + 1, isupper))
snakeCase.push_back('_');
- snakeCase.push_back(llvm::toLower(c));
}
return snakeCase;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/StringRef.cpp b/contrib/llvm-project/llvm/lib/Support/StringRef.cpp
index 3cce83a982c4..feee47ca693b 100644
--- a/contrib/llvm-project/llvm/lib/Support/StringRef.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/StringRef.cpp
@@ -388,17 +388,17 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
if (Str.empty())
return 10;
- if (Str.startswith("0x") || Str.startswith("0X")) {
+ if (Str.starts_with("0x") || Str.starts_with("0X")) {
Str = Str.substr(2);
return 16;
}
- if (Str.startswith("0b") || Str.startswith("0B")) {
+ if (Str.starts_with("0b") || Str.starts_with("0B")) {
Str = Str.substr(2);
return 2;
}
- if (Str.startswith("0o")) {
+ if (Str.starts_with("0o")) {
Str = Str.substr(2);
return 8;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/StringSaver.cpp b/contrib/llvm-project/llvm/lib/Support/StringSaver.cpp
index f7ccfb97ea79..4a616d734832 100644
--- a/contrib/llvm-project/llvm/lib/Support/StringSaver.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/StringSaver.cpp
@@ -8,6 +8,8 @@
#include "llvm/Support/StringSaver.h"
+#include "llvm/ADT/SmallString.h"
+
using namespace llvm;
StringRef StringSaver::save(StringRef S) {
@@ -18,9 +20,19 @@ StringRef StringSaver::save(StringRef S) {
return StringRef(P, S.size());
}
+StringRef StringSaver::save(const Twine &S) {
+ SmallString<128> Storage;
+ return save(S.toStringRef(Storage));
+}
+
StringRef UniqueStringSaver::save(StringRef S) {
auto R = Unique.insert(S);
if (R.second) // cache miss, need to actually save the string
*R.first = Strings.save(S); // safe replacement with equal value
return *R.first;
}
+
+StringRef UniqueStringSaver::save(const Twine &S) {
+ SmallString<128> Storage;
+ return save(S.toStringRef(Storage));
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/Twine.cpp b/contrib/llvm-project/llvm/lib/Support/Twine.cpp
index 8bbfd0815a40..495b9cf2dbd6 100644
--- a/contrib/llvm-project/llvm/lib/Support/Twine.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Twine.cpp
@@ -44,6 +44,8 @@ StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
const std::string *str = LHS.stdString;
return StringRef(str->c_str(), str->size());
}
+ case StringLiteralKind:
+ return StringRef(LHS.ptrAndLength.ptr, LHS.ptrAndLength.length);
default:
break;
}
@@ -69,6 +71,7 @@ void Twine::printOneChild(raw_ostream &OS, Child Ptr,
OS << *Ptr.stdString;
break;
case Twine::PtrAndLengthKind:
+ case Twine::StringLiteralKind:
OS << StringRef(Ptr.ptrAndLength.ptr, Ptr.ptrAndLength.length);
break;
case Twine::FormatvObjectKind:
@@ -124,6 +127,10 @@ void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr,
OS << "ptrAndLength:\""
<< StringRef(Ptr.ptrAndLength.ptr, Ptr.ptrAndLength.length) << "\"";
break;
+ case Twine::StringLiteralKind:
+ OS << "constexprPtrAndLength:\""
+ << StringRef(Ptr.ptrAndLength.ptr, Ptr.ptrAndLength.length) << "\"";
+ break;
case Twine::FormatvObjectKind:
OS << "formatv:\"" << *Ptr.formatvObject << "\"";
break;
diff --git a/contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepoint.cpp
index accebf1098ab..40592660acaa 100644
--- a/contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepoint.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepoint.cpp
@@ -119,11 +119,11 @@ static Node readNode(uint32_t Offset, const Node *Parent = nullptr) {
static bool startsWith(StringRef Name, StringRef Needle, bool Strict,
std::size_t &Consummed, char &PreviousCharInName,
- char &PreviousCharInNeedle, bool IsPrefix = false) {
+ bool IsPrefix = false) {
Consummed = 0;
if (Strict) {
- if (!Name.startswith(Needle))
+ if (!Name.starts_with(Needle))
return false;
Consummed = Needle.size();
return true;
@@ -135,18 +135,18 @@ static bool startsWith(StringRef Name, StringRef Needle, bool Strict,
auto NeedlePos = Needle.begin();
char PreviousCharInNameOrigin = PreviousCharInName;
- char PreviousCharInNeedleOrigin = PreviousCharInNeedle;
-
+ char PreviousCharInNeedle = *Needle.begin();
auto IgnoreSpaces = [](auto It, auto End, char &PreviousChar,
- bool IgnoreEnd = false) {
+ bool IsPrefix = false) {
while (It != End) {
const auto Next = std::next(It);
// Ignore spaces, underscore, medial hyphens
- // https://unicode.org/reports/tr44/#UAX44-LM2.
+ // The generator ensures a needle never ends (or starts) by a medial
+ // hyphen https://unicode.org/reports/tr44/#UAX44-LM2.
bool Ignore =
*It == ' ' || *It == '_' ||
(*It == '-' && isAlnum(PreviousChar) &&
- ((Next != End && isAlnum(*Next)) || (Next == End && IgnoreEnd)));
+ ((Next != End && isAlnum(*Next)) || (Next == End && IsPrefix)));
PreviousChar = *It;
if (!Ignore)
break;
@@ -171,20 +171,18 @@ static bool startsWith(StringRef Name, StringRef Needle, bool Strict,
Consummed = std::distance(Name.begin(), NamePos);
if (NeedlePos != Needle.end()) {
PreviousCharInName = PreviousCharInNameOrigin;
- PreviousCharInNeedle = PreviousCharInNeedleOrigin;
}
return NeedlePos == Needle.end();
}
static std::tuple<Node, bool, uint32_t>
compareNode(uint32_t Offset, StringRef Name, bool Strict,
- char PreviousCharInName, char PreviousCharInNeedle,
- BufferType &Buffer, const Node *Parent = nullptr) {
+ char PreviousCharInName, BufferType &Buffer,
+ const Node *Parent = nullptr) {
Node N = readNode(Offset, Parent);
std::size_t Consummed = 0;
- bool DoesStartWith =
- N.IsRoot || startsWith(Name, N.Name, Strict, Consummed,
- PreviousCharInName, PreviousCharInNeedle);
+ bool DoesStartWith = N.IsRoot || startsWith(Name, N.Name, Strict, Consummed,
+ PreviousCharInName);
if (!DoesStartWith)
return std::make_tuple(N, false, 0);
@@ -199,7 +197,7 @@ compareNode(uint32_t Offset, StringRef Name, bool Strict,
uint32_t Value;
std::tie(C, Matches, Value) =
compareNode(ChildOffset, Name.substr(Consummed), Strict,
- PreviousCharInName, PreviousCharInNeedle, Buffer, &N);
+ PreviousCharInName, Buffer, &N);
if (Matches) {
std::reverse_copy(C.Name.begin(), C.Name.end(),
std::back_inserter(Buffer));
@@ -215,7 +213,7 @@ compareNode(uint32_t Offset, StringRef Name, bool Strict,
static std::tuple<Node, bool, uint32_t>
compareNode(uint32_t Offset, StringRef Name, bool Strict, BufferType &Buffer) {
- return compareNode(Offset, Name, Strict, 0, 0, Buffer);
+ return compareNode(Offset, Name, Strict, 0, Buffer);
}
// clang-format off
@@ -262,7 +260,6 @@ static std::size_t findSyllable(StringRef Name, bool Strict,
char &PreviousInName, int &Pos, int Column) {
assert(Column == 0 || Column == 1 || Column == 2);
static std::size_t CountPerColumn[] = {LCount, VCount, TCount};
- char NeedleStart = 0;
int Len = -1;
int Prev = PreviousInName;
for (std::size_t I = 0; I < CountPerColumn[Column]; I++) {
@@ -271,8 +268,8 @@ static std::size_t findSyllable(StringRef Name, bool Strict,
continue;
std::size_t Consummed = 0;
char PreviousInNameCopy = PreviousInName;
- bool DoesStartWith = startsWith(Name, Syllable, Strict, Consummed,
- PreviousInNameCopy, NeedleStart);
+ bool DoesStartWith =
+ startsWith(Name, Syllable, Strict, Consummed, PreviousInNameCopy);
if (!DoesStartWith)
continue;
Len = Consummed;
@@ -290,9 +287,9 @@ nameToHangulCodePoint(StringRef Name, bool Strict, BufferType &Buffer) {
Buffer.clear();
// Hangul Syllable Decomposition
std::size_t Consummed = 0;
- char NameStart = 0, NeedleStart = 0;
- bool DoesStartWith = startsWith(Name, "HANGUL SYLLABLE ", Strict, Consummed,
- NameStart, NeedleStart);
+ char NameStart = 0;
+ bool DoesStartWith =
+ startsWith(Name, "HANGUL SYLLABLE ", Strict, Consummed, NameStart);
if (!DoesStartWith)
return std::nullopt;
Name = Name.substr(Consummed);
@@ -348,9 +345,9 @@ nameToGeneratedCodePoint(StringRef Name, bool Strict, BufferType &Buffer) {
for (auto &&Item : GeneratedNamesDataTable) {
Buffer.clear();
std::size_t Consummed = 0;
- char NameStart = 0, NeedleStart = 0;
+ char NameStart = 0;
bool DoesStartWith = startsWith(Name, Item.Prefix, Strict, Consummed,
- NameStart, NeedleStart, /*isPrefix*/ true);
+ NameStart, /*IsPrefix=*/true);
if (!DoesStartWith)
continue;
auto Number = Name.substr(Consummed);
@@ -390,8 +387,7 @@ static std::optional<char32_t> nameToCodepoint(StringRef Name, bool Strict,
std::reverse(Buffer.begin(), Buffer.end());
// UAX44-LM2. Ignore case, whitespace, underscore ('_'), and all medial
// hyphens except the hyphen in U+1180 HANGUL JUNGSEONG O-E.
- if (!Strict && Value == 0x116c &&
- Name.find_insensitive("O-E") != StringRef::npos) {
+ if (!Strict && Value == 0x116c && Name.contains_insensitive("O-E")) {
Buffer = "HANGUL JUNGSEONG O-E";
Value = 0x1180;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp b/contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp
index 656f492374c5..336318706439 100644
--- a/contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp
@@ -73,10660 +73,10666 @@ extern const uint8_t *UnicodeNameToCodepointIndex;
extern const std::size_t UnicodeNameToCodepointIndexSize;
extern const std::size_t UnicodeNameToCodepointLargestNameSize;
const char *UnicodeNameToCodepointDict =
- " _-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789PER BODY TILTING FROM HIP JOINTS "
- "BETWEEN MIDDLE AND RING FINGERSFACE WITH SYMBOLS COVERING MOUTHLE-LINE "
- "EQUAL ABOVE GREATER-THANLE COMMA QUOTATION MARK ORNAMENTLAR SIGN WITH "
- "OVERLAID BACKSLASH BAR AT END OF HORIZONTAL STROKEWNWARDS ARROW WITH TIP "
- "LEFTWARDSLAGAB TIMES U OVER LAGAB TIMES UWO DOTS ABOVE AND TWO DOTS "
- "BELOWAND LEFTWARDS OPEN CIRCLE ARROWSFT-POINTING ANGLE QUOTATION MARKWO "
- "DOTS OVER ONE DOT PUNCTUATIONUPPER AND LOWER ONE EIGHTH "
- "BLOCKTRIANGLE-HEADED RIGHTWARDS ARROWAND MIDDLE RIGHT TO LOWER CENTRE "
- "SHADOWED WHITE RIGHTWARDS ARROWLOWER LEFT CURLY BRACKET SECTIONROFLEX "
- "CLICK WITH RETROFLEX HOOKL ARABIC LETTER TAH AND TWO DOTSRONG "
- "CENTRALIZATION STROKE BELOWOWNWARDS HARPOON WITH BARB RIGHTOWER RIGHT "
- "CURLY BRACKET SECTIONGREATER-THAN ABOVE SLANTED EQUALTOP HALF DIVIDED BY "
- "VERTICAL BARAISING BOTH HANDS IN CELEBRATION DOWNWARDS EQUILATERAL "
- "ARROWHEADUG2 OVER TUG2 TUG2 OVER TUG2 PAP DOWNWARDS TRIANGLE-HEADED "
- "ARROWLL BUT UPPER LEFT QUADRANT BLACKEN ARM ENDING IN ARROW POINTING "
- "DIFIER LETTER LABIALIZATION MARKDIC KASHMIRI INDEPENDENT SVARITARIGHT "
- "SEMICIRCLE WITH THREE DOTS CONTAINING SMALL WHITE TRIANGLE HORIZONTAL "
- "STROKES TO THE RIGHTER ARROWS CIRCLING ANTICLOCKWISEER IGI SHIR OVER SHIR "
- "UD OVER UDRONT-TILTED SHADOWED WHITE ARROWER TAB NI OVER NI DISH OVER "
- "DISHDED HIGH STOP WITH FILLED CENTRELEFTWARDS HARPOON WITH BARB DOWN "
- "KORANIC STOP SIGN ISOLATED FORMESS-THAN ABOVE DOUBLE-LINE EQUAL LEFTWARDS "
- "TRIANGLE-HEADED ARROW LETTER BYELORUSSIAN-UKRAINIAN IETALLED BLACK AND "
- "WHITE FLORETTE LOWER HALF INVERSE MEDIUM SHADERIGHTWARDS TRIANGLE-HEADED "
- "ARROWD ARROW WITH TRIANGLE ARROWHEADSHT-POINTING ANGLE QUOTATION MARKED "
- "ARABIC-INDIC DIGIT FOUR ABOVETER-THAN ABOVE DOUBLE-LINE EQUALNORMAL "
- "FACTOR SEMIDIRECT PRODUCTTICAL BAR DOUBLE RIGHT TURNSTILEE SQUARED LATIN "
- "CAPITAL LETTER PIRECT PRODUCT WITH BOTTOM CLOSEDROXIMATELY NOR ACTUALLY "
- "EQUAL TOIGHTWARDS HARPOON WITH BARB DOWNATHARVAVEDIC INDEPENDENT "
- "SVARITARIANGLE-HEADED OPEN CIRCLE ARROWINTING DOWNWARDS THEN NORTH EASTED "
- "ARABIC-INDIC DIGIT FOUR BELOWHUR KAZAKH KIRGHIZ ALEF MAKSURA THIRD WHITE "
- "RIGHT POINTING INDEXVED STEM PARAGRAPH SIGN ORNAMENTS VERTICALLY BELOW "
- "AND SMALL TAHIDE AND JOINED WITH INTERSECTIONE CONTAINING BLACK SMALL "
- "LOZENGEOF UPWARDS TRIANGLE-HEADED ARROWE-POINTED BLACK RIGHTWARDS ARROWE "
- "TO MIDDLE LEFT TO LOWER CENTRE-SHADOWED WHITE RIGHTWARDS ARROW-POINTING "
- "ANGLE BRACKET ORNAMENTSEMICIRCULAR ANTICLOCKWISE ARROWH HAMZA ABOVE WITH "
- "ALEF MAKSURA H HORIZONTAL MIDDLE BLACK STRIPEMARK WITH LEFT RIGHT ARROW "
- "ABOVESWIRL BIRGA WITH DOUBLE ORNAMENTH SUPERSCRIPT ALEF ISOLATED "
- "FORMTOM-LIGHTED RIGHTWARDS ARROWHEADVE LESS-THAN ABOVE SLANTED EQUALS AND "
- "UPWARDS OPEN CIRCLE ARROWS TORTOISE SHELL BRACKET ORNAMENT TRIANGULAR "
- "THREE QUARTERS BLOCKHAND WITH MIDDLE FINGER EXTENDEDEDIUM SHADE AND RIGHT "
- "HALF BLOCKM LEFT MEMBER OF DOUBLE VERTICALTION SIGN WITH CIRCUMFLEX "
- "ACCENTEDIUM SHADE AND LOWER HALF BLOCKARDROP-SPOKED PROPELLER "
- "ASTERISKARDS ARROW ABOVE LEFTWARDS ARROWED UPWARDS EQUILATERAL "
- "ARROWHEADARDS HARPOON WITH BARB DOWNWARDSTIMES ASH2 KU OVER HI TIMES "
- "ASH2OUBLE-LINE EQUAL ABOVE LESS-THANED COMMA QUOTATION MARK ORNAMENTONE "
- "HUNDRED THIRTY-FIVE DEGREESAISED HAND WITH FINGERS SPLAYEDLEFTWARDS "
- "EQUILATERAL ARROWHEADETALLED OUTLINED BLACK FLORETTERIGHTWARDS HARPOON "
- "WITH BARB UPRING OVER TWO RINGS PUNCTUATIONRINGS OVER ONE RING "
- "PUNCTUATIONAND MIDDLE LEFT TO LOWER CENTRETNAMESE ALTERNATE READING MARK "
- "UPWARDS HARPOON WITH BARB RIGHTIN WHITE CIRCLE IN BLACK SQUAREACK-TILTED "
- "SHADOWED WHITE ARROW CROSSING ASH OVER ASH OVER ASHINDEX THUMB CURVE "
- "THUMB INSIDEHT CENTRALIZATION STROKE BELOWDOWNWARDS AND RIGHTWARDS "
- "ARROW-HIRAGANA PROLONGED SOUND MARKAGGRAVATED INDEPENDENT "
- "SVARITAGREATER-THAN ABOVE EQUALS SIGNGHT FOUR POINTED PINWHEEL STARDOT "
- "BELOW AND THREE DOTS ABOVEEFT SEMICIRCLE WITH THREE DOTSOPEN CENTRE EIGHT "
- "POINTED STARAND JOINED BY DASH WITH SUBSETMODIFIER LETTER LEFT HALF "
- "RINGMINTON RACQUET AND SHUTTLECOCKQAF WITH LAM WITH ALEF MAKSURAMAKSURA "
- "WITH SUPERSCRIPT ALEF DIVIDED BY HORIZONTAL BAR AND HIGH-REVERSED-9 "
- "QUOTATION MARKSAD WITH LAM WITH ALEF MAKSURAEART EXCLAMATION MARK "
- "ORNAMENTON WITH RIGHTWARDS ARROW ABOVEDOWN HEAVY AND RIGHT UP LIGHTONAL "
- "INDICATOR SYMBOL LETTER DOT OVER TWO DOTS PUNCTUATION WITH REVERSED "
- "NEGATION SLASHDOWN MIDDLE THUMB INDEX CROSS DOWN INDEX THUMB HOOK "
- "MIDDLENS-SERIF INTERROBANG ORNAMENTCKED FACE WITH EXPLODING HEADEFTWARDS "
- "HARPOON WITH BARB UPOMBINING ANUSVARA ABOVE RIGHTCONTAINING BLACK SMALL "
- "CIRCLEUP HEAVY AND RIGHT DOWN LIGHT OVER RIGHTWARDS ARROW TO BARWITH "
- "RIGHTWARDS ARROW AT LEFTPUNCTUATION CHINOOK FULL STOPEXTENDED "
- "ARABIC-INDIC DIGIT TEVERSED LUNATE EPSILON SYMBOLSUPERSCRIPT ALEF INITIAL "
- "FORMSEMICIRCULAR PATH AROUND POLELIGHT FOUR POINTED BLACK CUSPEFT ARC "
- "GREATER-THAN BRACKETACE DIRECTION POSITION NOSE DOWN HEAVY AND LEFT UP "
- "LIGHTFTING POINT RIGHTWARDS ARROWEDGE-TAILED RIGHTWARDS ARROWLEFTWARDS OF "
- "DOWNWARDS ARROWUPPER CENTRE TO MIDDLE RIGHTDOUBLE ANUSVARA "
- "ANTARGOMUKHASALTIRE WITH ROUNDED CORNERSBESIDE AND JOINED WITH "
- "UNIONMONOGRAMMOS TESSERA DODEKATAUP HEAVY AND LEFT DOWN LIGHTLIQUID "
- "MEASURE FIRST SUBUNITHREE HUNDRED FIFTEEN DEGREESHADED WHITE RIGHTWARDS "
- "ARROWOUND-TIPPED RIGHTWARDS ARROWU ALAYHI WAAALIHEE WA-SALLAMASTERISKS "
- "ALIGNED VERTICALLYT LITTER IN ITS PLACE SYMBOLISMILLAH AR-RAHMAN "
- "AR-RAHEEMRECTANGULAR PATH AROUND POLEIBE SYLLABLE BOUNDARY MARKERMIDDLE "
- "RING LITTLE CONJOINEDUPWARDS AND RIGHTWARDS ARROWBRDA RNYING YIG MGO MDUN "
- "MAPPY PERSON RAISING ONE HANDBRDA RNYING YIG MGO SGAB MAFINGER COVERING "
- "CLOSED LIPSOUR BALLOON-SPOKED ASTERISKHEAD MARK WITH MOON AND SUNTALIC "
- "LATIN CAPITAL LETTER ZERO FOR ODD POWERS OF FOURCONTINUOUS UNDERLINE "
- "SYMBOL AND LEFT SEMICIRCLE ARROWSLOWER MIDDLE LEFT TO LOWER STRUMENTAL "
- "NOTATION SYMBOL-KATHAKA INDEPENDENT SVARITARIGHT ARC LESS-THAN BRACKETIDE "
- "ARC ANTICLOCKWISE ARROWCIRCLE WITH NORTHWEST ARROWIDE-HEADED RIGHTWARDS "
- "ARROWLOWER TONAL RANGE INDICATORBETWEEN TWO HORIZONTAL BARSEAVY WHITE "
- "RIGHTWARDS ARROWRIPLE VERTICAL BAR OPERATORPHARYNGEAL VOICED FRICATIVEWO "
- "DOTS BELOW AND DOT ABOVEFOUR FINGERS CONJOINED BENT-FEATHERED RIGHTWARDS "
- "ARROW BARREE WITH TWO DOTS BELOWHANDED INTERLACED PENTAGRAMLARGE "
- "EQUILATERAL ARROWHEADUPPER MIDDLE LEFT TO UPPER LIGHT CENTRALIZATION "
- "STROKEDOUBLE PRIME QUOTATION MARKDOUBLE ANGLE QUOTATION MARKTWO HUNDRED "
- "SEVENTY DEGREESWALLPLANE SHOULDER HIP MOVELESS-THAN ABOVE EQUALS "
- "SIGNEMICIRCULAR CLOCKWISE ARROW DIVIDED BY HORIZONTAL RULERIANGULAR ONE "
- "QUARTER BLOCKIMPERFECTUM CUM PROLATIONE ONE LARGE AND ONE SMALL "
- "EYEENTATION FORM FOR VERTICAL CRIPT LIGATURE ET ORNAMENTU REVERSED OVER U "
- "REVERSEDMEEM WITH HAH WITH TATWEELCAT FACE WITH SMILING EYESOORPLANE "
- "SHOULDER HIP MOVELINE FEED SEPARATOR SYMBOLMARRIED PARTNERSHIP "
- "SYMBOLUNEVEN EYES AND WAVY MOUTHRIGHT PARENTHESIS ORNAMENTMODIFIER "
- "FITZPATRICK TYPE-LLALLAHOU ALAYHE WASSALLAMRIPLE DOT PUNCTUATION "
- "MARKDROP-SHADOWED WHITE SQUARETHREE DOTS ABOVE DOWNWARDS OR APPROXIMATELY "
- "EQUAL TOSSIAN ASTROLOGICAL SYMBOL AND MIDDLE FINGERS CROSSEDCLOCKWISE "
- "ARROW WITH MINUSE ONE-WAY LEFT WAY TRAFFICIDEOGRAPHIC ITERATION MARKAND "
- "RIGHT ONE EIGHTH BLOCKLEFTWARDS OF UPWARDS ARROWJECT REPLACEMENT "
- "CHARACTERLANTED SOUTH ARROW WITH HOERSTRASS ELLIPTIC FUNCTIONDOTTED "
- "SUBSTITUTION MARKERBLE TENNIS PADDLE AND BALLALEF MAKSURA ISOLATED FORM "
- "GAD OVER GAD GAR OVER GARHTORA SKLIRON CHROMA VASISDOTTED LUNATE SIGMA "
- "SYMBOL LESS THAN THE DENOMINATORWITH LEFTWARDS ARROW ABOVEINDUSTRIAL "
- "STANDARD SYMBOLOCKED FEMALE AND MALE SIGN NEGATED WITH VERTICAL "
- "BARORTHOGONAL CROSSHATCH FILLONOMICAL SYMBOL FOR URANUSOTATED FLORAL "
- "HEART BULLETYAJURVEDIC MIDLINE SVARITA WITHIN TRIANGLE ARROWHEADSTROKE "
- "AND TWO DOTS ABOVETTED SUBSTITUTION BRACKETLEFTWARDS ARROW WITH HOOKRIGHT "
- "DIAGONAL HALF BLACKTAB OVER TAB GAR OVER GARDOUBLE CANDRABINDU VIRAMABUT "
- "NOT ACTUALLY EQUAL TODIAERESIS AND HOOK SYMBOLOLD ASSYRIAN WORD "
- "DIVIDERELD HOCKEY STICK AND BALLINVERTED EXCLAMATION MARKMULTIPLICATION "
- "SIGN BELOWLEFT PARENTHESIS ORNAMENTTERNION INTEGRAL OPERATORLOW QUILT "
- "SQUARE ORNAMENTBREVE WITH INVERTED BREVERIST CIRCLE HITTING WALL "
- "REE-HUNDRED-AND-TWENTIETHFECTIVENESS OR DISTORTION WITH DOUBLE VERTICAL "
- "BAR WITH DOUBLE GRAVE ACCENTMBINING CRYPTOGRAMMIC DOTCROSSING NORTH EAST "
- "ARROW OVER NUN LAGAR TIMES SALMEDIUM TRIANGLE ARROWHEADCAT FACE WITH "
- "CLOSED EYESOUTLINED RIGHTWARDS ARROWRANCH BANK IDENTIFICATION WITH "
- "CIRCLED ONE OVERLAYOUBLE BIRGA WITH ORNAMENTING ON THE FLOOR LAUGHINGDRY "
- "MEASURE FIRST SUBUNITONE UNDER EIGHTEEN SYMBOLNIVERSAL RECYCLING "
- "SYMBOLFFICULTY AT THE BEGINNING WITH DOUBLE MIDDLE TILDERIPLE BIRGA WITH "
- "ORNAMENTWELVE POINTED BLACK STARORIZONTAL BAR WITH NOTCHUPWARD POINTING "
- "TRIANGLEGLOTTAL STOP WITH STROKEWITH CANCELLATION STROKET BLACK "
- "RIGHTWARDS ARROWEQUAL TO OR GREATER-THANTED INTERPOLATION MARKERMALL "
- "CIRCLE TO THE RIGHTUPRIGHT RECTANGULAR ZEROHEAVY BLACK HEART "
- "BULLETDESCENDING MUSICAL NOTESARM CIRCLE HITTING WALL REVERSED NINE-LIKE "
- "BHALESMALL ARABIC LETTER TAH ISOSCELES RIGHT TRIANGLESYLLABLE REPETITION "
- "MARKBLIC ADDRESS LOUDSPEAKERTEARDROP-SPOKED ASTERISKARROW POINTING "
- "DIRECTLY OTTOM-SHADED WHITE ARROWGREATER-THAN OR EQUAL TOLICATION PROGRAM "
- "COMMANDOTTOM SHADED WHITE ARROWBUSINESS SUIT LEVITATINGDIAGONAL "
- "CROSSHATCH FILLDOWNSCALING FACTOR KIIZHBLACK LENTICULAR BRACKETSTROKE "
- "THROUGH DESCENDERINTERSECTION WITH SERIFSLINE HORIZONTAL "
- "ELLIPSISSINGLE-LINE NOT EQUAL TOPERSCRIPT ALEF MOKHASSASBERKANAN BEORC "
- "BJARKAN BING FACE WITH OPEN MOUTHWITH UPWARDS ARROW ABOVECOMPATIBILITY "
- "IDEOGRAPH- OVER TOP SQUARE BRACKETPOINTING DOWNWARDS ABOVEAND MALE AND "
- "FEMALE SIGNWO DOTS VERTICALLY ABOVE POINTING BACKHAND INDEXEYES AND HAND "
- "OVER MOUTHLY-RECYCLED PAPER SYMBOLCULINE ORDINAL INDICATORIGATURE OPEN ET "
- "ORNAMENTCONSECUTIVE EQUALS SIGNSDOUBLE HORIZONTAL STROKERIGHTWARDS THEN "
- "CURVING LARGE TRIANGLE ARROWHEADALTERNATE SECTION MARKERFINGER AND THUMB "
- "CROSSEDE PLUS A PLUS SU PLUS NAMTAVRULI CAPITAL LETTER "
- "ONE-HUNDRED-AND-SIXTIETHVOICED LARYNGEAL SPIRANTLEFT DIAGONAL HALF "
- "BLACKORAH WITH NINE BRANCHESPUNCTUATION END OF TEXTRIGHT DIAGONAL "
- "ELLIPSISIVE FINGERS SPREAD OPENGREEK SMALL LETTER IOTAWESTERN PWO KAREN "
- "TONE-RIGHT HORIZONTAL SECANTRIGHT-POINTING TRIANGLEISTED RIGHTWARDS "
- "ARROWSOVERLAPPING LOGICAL ANDOVER RIGHTWARDS HARPOON WITH HALF-CIRCLE "
- "BELOWIGHT-SHADED WHITE ARROWMITIAN CONJUGATE MATRIXRTOISE SHELL BRACKETED "
- "HREE POINTED BLACK STARSOLIDUS BINARY RELATIONHANKED RIGHTWARDS "
- "ARROWININE ORDINAL INDICATORLSCHREIBER PAUSE SYMBOLUBLE VERTICAL BAR "
- "BELOWAVOURING DELICIOUS FOODORIGINAL OF OR EQUAL TOBARBED RIGHTWARDS "
- "ARROWLEFT-SHADED WHITE ARROWDOWNWARDS THEN CURVING HT TRIFOLIATE "
- "SNOWFLAKERAISED OMISSION BRACKETARABIC LETTER TAH ABOVEESS OUTLINED WHITE "
- "STARCURRENT SYMBOL FORM TWOCLUSTER-INITIAL LETTER PA OVER PA GAR OVER GAR "
- "ROTATED NINETY DEGREESANG DEPARTING TONE MARKWO-WAY LEFT WAY TRAFFICUP "
- "SPREAD THUMB FORWARDZANTINE MUSICAL SYMBOL CAKE WITH SWIRL DESIGN "
- "HUNDRED TWENTY-EIGHTH ASCENDING MUSICAL NOTESIDE ARC CLOCKWISE ARROW "
- "POINTING AT THE VIEWERAND WOMAN HOLDING HANDSSING DIAGONAL CROSSING "
- "CIRCUMFLEX ACCENT ABOVEREE VARIATION SELECTOR ARTY HORN AND PARTY HATDOT "
- "BELOW AND DOT ABOVEWHITE FOUR POINTED CUSPALEF MAKSURA FINAL FORMUPWARDS "
- "THEN NORTH WESTATTACHING VERTICAL OMETALGAMATION OR COPRODUCTVERTICAL "
- "BISECTING LINEP WITH EXCLAMATION MARKSHAPE WITH A DOT INSIDEWITH "
- "HORIZONTAL STROKEDOMAIN ANTIRESTRICTIONLONG HORIZONTAL STROKERECTILINEAR "
- "BLACK STARSHORT VERTICAL STROKESINVERTED SMALL V ABOVE WITH DECORATIVE "
- "COVERQUESTION MARK ORNAMENTLE BESIDE VERTICAL BARYIAKENG PUACHUE HMONG "
- "TWO HORIZONTAL STROKESINVERTED SMALL V BELOWNOT INCLUDING THE "
- "POLEUBSCRIPT SMALL LETTER ARMENIAN ETERNITY SIGNDOUBLE-LINED HEAD "
- "MARKDOUBLE VERTICAL STROKELEFT TRIANGLE OPERATORING HEAD IN "
- "SILHOUETTEPUNCTUATION KUNDDALIYAROUND A POINT OPERATOR WITH THREE DOTS "
- "ABOVEHORT HORIZONTAL STROKEWASALLAM ISOLATED FORMDOUBLE SOLIDUS "
- "OVERLAYCURLY BRACKET ORNAMENT WITH SINGLE ZAPYATAYAWITH JEEM INITIAL "
- "FORMHEXIFORM LONG ANUSVARAEAST-POINTING AIRPLANEIN DEPARTING TONE "
- "MARKUIGHUR KIRGHIZ YEH WITGREATER-THAN DIAERESISLOW PARAPHRASE BRACKETND "
- "RECORDING COPYRIGHTKEEPING STILL MOUNTAINCONSONANT SIGN MEDIAL CONSONANT "
- "MODIFIER BAR SYMBOL FOR LIGHTHOUSETRIPLE RIGHT TURNSTILEDOWN-POINTING "
- "TRIANGLE-ROTATED DIVISION SIGNUSTOMER ACCOUNT NUMBERI YFESIS "
- "TETARTIMORIONTRIPLE VERTICAL STROKEUPPER RIGHT AND LOWER REE-CIRCLE "
- "ALTERNATE ICJK UNIFIED IDEOGRAPH-RROW WITH ROUNDED HEADRIGHT-POINTING "
- "FLEURONREASE FONT SIZE SYMBOL OVER STAMPED ENVELOPEDRESSED TO THE "
- "SUBJECTSEMI-VOICED SOUND MARKMNYAM YIG GI MGO RGYANPRECEDED BY "
- "APOSTROPHEIGEL LONG-BRANCH-SOL S WITH CIRCUMFLEX ABOVESYMPTOTICALLY EQUAL "
- "TOINDIRECT QUESTION MARKDOWNWARDS ZIGZAG ARROW RIGHT ARROWHEAD "
- "ABOVEACKSLANTED SOUTH ARROWLATIN CAPITAL LETTER SWITH VOICED SOUND "
- "MARKSHORT RIGHTWARDS ARROWPERSET OF NOR EQUAL TOPOINTING UPWARDS BELOWBAR "
- "ABOVE INTERSECTIONOVER LEFTWARDS HARPOONOVER NU11 BUR OVER BUROVER SHIR "
- "BUR OVER BURUPPER ONE EIGHTH BLOCKMALE WITH STROKE SIGNBESIDE RIGHT "
- "TRIANGLETEEN POINTED ASTERISKINVERTED BRIDGE BELOWRELICT HOUSE BUILDINGOP "
- "SHADED WHITE ARROWOHAMMAD ISOLATED FORMHOUSAND MILLIONS SIGNINTERSECTION "
- "OPERATOROCAL NOTATION SYMBOL--SHAPED BAG DELIMITER-ROUND NOTEHEAD DOWN "
- "BOTTOM U-SHAPED ARROWTHICK LETTER SELECTORHORIZONTAL "
- "TABULATIONINTERSECTING LOGICAL YIG MGO TSHEG SHAD MAREVERSED FEATHER "
- "MARKOPPOSING AN PLUS NAGAMORPHOLOGICAL DIVIDERALLING DIAGONAL SLASHSIDE "
- "TO SIDE SCISSORSCTOR OR CROSS PRODUCTFINAL CONSONANT SIGN CRUCIFORM "
- "NUMBER FOURCROSSE STICK AND BALLVOICED ITERATION MARKLATTENED OPEN A "
- "ABOVEFLATTENED PARENTHESISSHORT LEFTWARDS ARROWRISING DIAGONAL "
- "SLASHTRIANGULAR HALF BLOCKDOWN HORIZONTAL HEAVYDOWN HORIZONTAL LIGHTAND "
- "VOWEL LENGTH MARKWO-CIRCLE ALTERNATE IPAP PLUS PAP PLUS LU3WO-CIRCLE "
- "NUKTA ABOVELESS-THAN OR EQUAL TOEIGHT SPOKED ASTERISKELATIONAL "
- "COMPOSITIONACUTE AND HOOK SYMBOLRATING SYSTEM COMMANDDIGRAMMOS EX "
- "DODEKATAEFT-POINTING TRIANGLELETTER SMALL CAPITAL EQUAL TO OR LESS-THANER "
- "RIGHT CORNER ANGLEINVERTED GLOTTAL STOPDELIMITER TSHEG BSTARAKIA TELOUS "
- "ICHIMATOSDOUBLE LEFT TURNSTILEEFT OPEN BOX OPERATORMIDDLE RING LITTLE "
- "ONQUADRANT CIRCULAR ARCEFT HORIZONTAL SECANTWITH INVERTED V ABOVEABOVE "
- "SHORT DOWN TACK AND SLANTED PARALLELDOWNWARDS ARROW ABOVEVERTICAL LINE "
- "OVERLAYXTRA SHORT VOWEL MARKBUT NOT EQUIVALENT TOGRUENT WITH DOT ABOVESH "
- "AMPERSAND ORNAMENTARYSTIAN FIVE HUNDREDND TELEPHONE RECEIVERARKENING OF "
- "THE LIGHTHALF TRIANGULAR COLONARD SHELL FLOPPY DISKVERY HEAVY BARB "
- "ARROWGHT OPEN BOX OPERATOR WITH VERTICAL STROKENTISTRY SYMBOL LIGHT HAND "
- "INTERIOR PRODUCTHOCKEY STICK AND PUCKGRAMMOS OKTO DODEKATA WITH SHORT "
- "RIGHT LEGTILDE OPERATOR ABOVE TOUCHING INSIDE MOUTHWITH FOUR DOTS "
- "ABOVELEFT-TO-RIGHT SECANTNIS RACQUET AND BALLARXIS KAI FTHORA VOUSINGLE "
- "DOT TONE MARKINVERSE WHITE CIRCLEINDEX RING LITTLE ONYIG MGO PHUR SHAD "
- "MASCRIPTION CHARACTER 79 OVER LAK-079 GUNUIRCLES HITTING WALL "
- "WARE-FUNCTION SYMBOLINVERTED CANDRABINDUE POINTED WHITE STARMULTIPLE "
- "PUNCTUATIONL FUNCTIONAL SYMBOL WET CULTIVATION SIGNANGE "
- "ANTIRESTRICTIONSTAR WITH MIDDLE DOTWORD REPETITION MARKAND SOUTH WEST "
- "ARROWAND SOUTH EAST ARROWAND NORTH WEST ARROWA- SHOG GI MGO RGYANAND "
- "NORTH EAST ARROWKHAMTI REDUPLICATIONSIDEWAYS NOON GHUNNAINSIDE MOUTH "
- "RELAXEDLASHING SWEAT SYMBOLLATALIZED HOOK BELOWINING OBLIQUE "
- "STROKEJUDEO-SPANISH VARIKAWITH SOROCHYA NOZHKASEPARATOR MIDDLE DOTLE WITH "
- "POPPING CORKSEPARATOR KEY SYMBOLLEFT AND LOWER RIGHTHIGH RATHA OR LOW "
- "PAHIGH TONE APOSTROPHERIGHT-POINTING ANGLERIGHT U-SHAPED ARROWOTLESS J "
- "WITH STROKEUBHAANAHU WA TAAALAABSET OF NOR EQUAL TOTHROUGH SMALL "
- "CIRCLEENARMONIOS ANTIFONIAHORIZONTAL BAR WITH RIGHT QUADRANT BLACKEVERSED "
- "ROTATED RANAEMESTVENNY ZADERZHKADOTLESS HEAD OF KHAHDOUBLE ANGLE "
- "BRACKETREAN STANDARD SYMBOLDOUBLE DOT TONE MARK BEGIN LOGOGRAM "
- "MARKHREE-DOT NUKTA ABOVEDOWN ARROWHEAD BELOWYRENAIC TWO DRACHMASCRESCENT "
- "MOON SYMBOLFOUR RAISED KNUCKLESCONSONANT SIGN HAARUPLUS GISH TIMES "
- "TAK4TRANSPOSITION MARKERGHTWARDS ARROW BELOWFORMS LIGHT VERTICALCHEMICAL "
- "SYMBOL FOR POTABLE WATER SYMBOL OVER TUR ZA OVER ZAGRA GCAN -CHAR "
- "RTAGSCHARACTER INTRODUCER WITH HORIZONTAL BARFIVE SPOKED ASTERISK OVER "
- "LAGAR GUNU SHEOVER LEFTWARDS ARROWCENTRE VERTICAL LINEFINAL CONSONANT "
- "MARKCURRENCY SYMBOL RIELCURVED ANGLE BRACKETHALF CIRCLE WITH DOTHAR2 "
- "TIMES GAL PLUS UP-POINTING TRIANGLERIGHT ANGLE WITH DOT ABOVE LEFT "
- "TRIANGLEE CONSONANT MODIFIERREVERSED ONE HUNDREDONCAVE-SIDED DIAMONDVAL "
- "WITH OVAL INSIDEIMAGE OF OR EQUAL TOONE MARK SGAW KAREN RUMAI PALAUNG "
- "TONE-5 AND DIAGONAL STROKEBETWEEN PALM FACINGSALTERNATE LAKH MARKSYMBOL "
- "FOR BEGINNERNETWORKED COMPUTERSVASTNESS OR WASTINGOVER E NUN OVER "
- "NUNSEVEN EIGHTHS BLOCKIMIDIA SEXTULA SIGNPREFIXED NASAL SIGNHURISAZ THURS "
- "THORNNE HUNDRED TWENTY PTO LOWER RIGHT FILLSH PLUS HU PLUS ASHNDRED "
- "POINTS SYMBOLFIVE FINGERS SPREADRROW NO-BREAK SPACED CIRCUMFLEX "
- "ACCENTEVEN POWERS OF FOURARENTHESIS NOTEHEADLEFT TO LOWER RIGHTLEFT "
- "POINTING INDEXQUADRUPLE CRESCENTSHEAD-SHAPED POINTERHAND COVERING "
- "MOUTHARABIC FORM SHAPINGDOWN AND HORIZONTALTWENTY-FIVE DEGREESTURNED "
- "SECTION MARKSEQUENCE INTRODUCERTURNED PADA PISELEHLEADING MCHAN "
- "RTAGSGAPPED CIRCLE ARROWTRANNO MALO POVYSHEVARIANT FORM ILIMMUTHANG LONG "
- "ANUSVARAPARAGRAPH SEPARATORCIRCLED SANS-SERIF IN CHEN SPUNGS SHADCIRCLED "
- "WHITE ARROWMOVES AGAINST CHEEKWOMEN HOLDING HANDSRY CULTIVATION "
- "SIGNLANTED EQUAL ABOVE MSHELL MOBILE PHONEFT-POINTING FLEURONA END "
- "LOGOGRAM MARKDOWN POINTING INDEXNAXIAN FIVE HUNDREDOW-9 QUOTATION "
- "MARKVARIATION INDICATORVARIANT WITH SQUARESLANTED NORTH ARROWANSPOSITION "
- "BRACKETANS-SERIF CAPITAL LILDING CONSTRUCTIONFLOORPLANE TWISTINGSTRATIAN "
- "FIFTY MNASSTRAIGHT THUMB BENTWITH YEH FINAL FORMGIBBOUS MOON SYMBOLKULL "
- "AND CROSSBONESREE-QUARTER CIRCLE RIGHT TO LOWER LEFTRAILING MCHAN "
- "RTAGSUBLE DOT WITHIN DOTOTATED ARDHAVISARGAING SHIRT WITH SASHVERSE FINAL "
- "BARLINEAHU ALAYHI WA-AALIHLF MADDA OVER MADDA AND PROSGEGRAMMENI WITH "
- "STRIKETHROUGHLUB-SPOKED ASTERISK IN A RECTANGLE BOXTIGHTLY-CLOSED "
- "EYESDENOMINATOR SIXTEENARTIAL DIFFERENTIALNORTH ARROW WITH HORIGHT MIDDLE "
- "STROKEBETWEEN MIDDLE RINGTART OF RUB EL HIZBOP SEMICIRCLE ARROWHORT "
- "STROKE OVERLAYINSIDE CIRCLE BELOWRATUM SUPER STRATUMIGATURE AYIN-DALETH "
- "LOVE YOU HAND SIGNATED TELLER MACHINE FLUTTERING IN WIND WITH VERTICAL "
- "TAIL AND RETROFLEX HOOKVERSAL INTERSECTIONISPUTED END OF AYAHUP "
- "HORIZONTAL HEAVYC DIGRAPH WITH CURLQUESTION MARK ABOVEUP HORIZONTAL "
- "LIGHTNOGRAPHIC FULL STOPITED LIABILITY SIGNET WITH WHITE CROSSVERTICAL "
- "TABULATIONURRENCY SYMBOL BAHTLEFT U-SHAPED ARROW KASKAL U GUNU DISHLOWER "
- "MIDDLE RIGHTCTLY EQUIVALENT TOSMALL RED TRIANGLE FOR SIMALUNGUN "
- "SAFORTY-FIVE DEGREESSOUL ISOLATED FORMLEFT MIDDLE STROKE AND SMASH "
- "PRODUCTQUARTER NOTE STEM TYPE A ELECTRONICSEVERSED CHELYUSTKA AND "
- "YPOGEGRAMMENICOPPER ANTIMONIATEUPPER MIDDLE RIGHTEVENTEEN FULL "
- "STOPCONTINUING OVERLAPLOSED CIRCLE ARROWBAARAKA WA-TAAALAACUP WITHOUT "
- "HANDLEDOUBLE PUNCTUATIONEAST POINTING LEAF AND NO DOTS ABOVESINGLE "
- "PUNCTUATIONERTICAL BAR VIRAMASMALL CIRCLE ABOVE OVER INVERTED SHUFIVE "
- "EIGHTHS BLOCKWEST POINTING LEAFWHITE VERTICAL BAREPIGRAPHIC LETTER FORKED "
- "PARAGRAPHOSPUNCTUATION BINDU UP ARROWHEAD BELOWABBREVIATION MARK "
- "DOWN-OUTPUT SYMBOLENTY-TWO POINT TWOENTERING TONE MARKU-SHAPED "
- "ORNAMENTSWITH STROKE SYMBOLMATHEMATICAL SPACE NOT LITTER SYMBOLUM WITH "
- "DRUMSTICKSLEFT-STEM TONE BARWITH JUSTIFICATIONHILOSOPHERS "
- "SULFURCANTILLATION SIGN VERY SMALL DIAMONDSYMMETRIC SWAPPINGHAIS LUS NTOG "
- "NTOGS PRESSED TOGETHERSEPTUPLE CRESCENTSNORMAL SUBGROUP OFARROW SHAFT "
- "WIDTH TO LOWER LEFT FILL WITH KAVYKA ABOVEJEEM ISOLATED FORMSET OVER "
- "BUILDINGSOU ALAYHE WASALLAMS INSIDE AND ABOVEOTLESS DALATH RISHIGATURE "
- "ZAYIN-YODHIRROR HORIZONTALLYKANTAJA NAASIKYAYAOUCHTONE TELEPHONE WITH "
- "FLOWING SANDBOTTOM RIGHT KASRAATIN SMALL LETTER HORIZONTALLY BELOWTILTING "
- "FROM WAISTORK ON THE DECAYEDNITE PART INTEGRALNISH VERSE DIVIDER THUMB "
- "INDEX THUMBITAN SMALL SCRIPT WITH CROSSED-TAILHOUSANDS SEPARATOR WITH "
- "CIRCLE BELOW WITH CIRCLE ABOVEHUNDREDS UNIT MARKNINETEEN FULL STOPIX "
- "SPOKED ASTERISKINDEPENDENT VOWEL KOREAN CHARACTER OPACING CANDRABINDU "
- "SPREAD THUMB SIDEGGLY VERTICAL LINEBASELINE ROUND DOTASTED SWEET "
- "POTATOGATIVE ACKNOWLEDGEGISH CROSSING GISHTROFLEX HOOK BELOWRYUKOVAYA "
- "SVETLAYAATERRESTRIAL ALIENPERFIXED LETTER RAVARIANT FORM LIMMUYLLABLE "
- "LENGTHENERASTROLOGICAL SIGN IGSAW PUZZLE PIECEIN POSSESSION SIGNOW TONE "
- "APOSTROPHEYPTIAN HIEROGLYPH KBAR ISOLATED FORMHOLDING BACK TEARSAND LOW "
- "RIGHT RINGRIST CIRCLE FRONT NYET THYOOM TA-ROLUTLINED BLACK STARSHAN "
- "REDUPLICATIONTOP U-SHAPED ARROWNYOOGA NAAKSIKYAYAAEUM ONE PLETHRONBRACKET "
- "EXTENSIONEMICOLON UNDERBARVERAGE WITH SLASHHORIZONTAL SINGLEHORIZONTAL "
- "DOUBLETERSYLLABIC TSHEGRIATION SELECTOR-VARIANT FORM ASH9A PLUS HA PLUS "
- "DAVARIANT FORM IMINVARIANT FORM USSUEAR SCREEN SYMBOLINEAR ANNOTATION TEN "
- "THOUSAND SIGNBE WITH MERIDIANSIGHTEEN FULL STOPING POLE AND "
- "FISHABBREVIATION SIGNATHERING TOGETHERBERBER ACADEMY YAULAR MEDIUM "
- "SHADEVEE WITH UNDERBARINITIAL LETTER RAAUKAZ LAGU LOGR LIDEOGRAPHIC "
- "COMMAICTED LEFT ENTRY-IASTRE MARK ABOVEBOTTOM HALF BLACKTARTING FROM "
- "SIGNGENERIC MATERIALSCTION APPLICATIONAPLI DYO DODEKATAGREATER-THAN "
- "SIGNFINGER-POST ARROWLD PERMIC LETTER ALTERNATE NUMBER GREATER-THAN NOR "
- "CROSS PUNCTUATIONKE BOTTLE AND CUPAMBDA WITH STROKEFLICK "
- "ALTERNATINGVOCALIZATION MARKKHAH INITIAL FORMSTRAIGHT MOVEMENTCIRCLES "
- "WITH DOTSCONTOURED OUTLINEAMPHYLIAN DIGAMMALAPPING LESS-THANCONTINUATION "
- "SIGNCLOSED LITTLE YUSTRIPLE DASH ARROWCONSONANT SIGN PALAH ISOLATED "
- "FORMLAGOLITIC LETTER COMBINING NUMBER AND BLACK SQUARESFRACTION ONE "
- "HALFURNED DAMMA BELOWBRIGHTNESS SYMBOLASTERN PWO KAREN TAN ISOLATED "
- "FORMTHREE SOUND WAVESLIAN HIEROGLYPH ALHOUETTE OF JAPANTIAL ARTS "
- "UNIFORMHIRTEEN FULL STOPDENTAL PERCUSSIVEBUT RELIEVED FACEDELPHIC FIVE "
- "MNASUP AND HORIZONTALFROM SMALL CIRCLEINVERTED MCHU CANVRE TOURNOIS "
- "SIGNURNED COMMA ABOVECAL SYMBOL BOTTOMCANCELLATION MARKUP POINTING "
- "INDEXHEART-SHAPED EYESCANDRABINDU ABOVEVERY SMALL SQUAREFACING BABY "
- "CHICKSQUARED TIMES KURHAH ISOLATED FORMTONAL RANGE MARK MANENT PAPER "
- "SIGNY ON BLACK SQUAREREATIONAL VEHICLE LAGAB TIMES ASH2 LAGAR OVER LAGAR "
- "TO BLACK DIAMONDWITH NOT EQUAL TOMILITARY AIRPLANEQUAT REVERSED ESHMEEM "
- "INITIAL FORM WITH BULLET NOSE OVER ZU PLUS SARMALL WHITE CIRCLEYEH "
- "ISOLATED FORMONE EIGHTH BLOCK-0 WHEELED CHARIOT GRAVEYARD SYMBOLPINWHEEL "
- "ASTERISKRYBLION BASE SIGNRIGHT HALF CIRCLEOURTEEN FULL STOPNE EYEBROW "
- "RAISEDNEGATIVE CIRCLED MEDIUM BARB ARROW PLUS SHA3 PLUS AOWER NUMERAL "
- "SIGNWO VERTICAL DOTS NAUDIZ NYD NAUD NSIGN O WITH CROSSPRIZNAK MODIFIER "
- "NFORMATION SOURCE ALTERNATION MARKS REVOLVING LIGHT-HEIGHT LEFT "
- "HOOKSERVER EYE SYMBOL WITH TILDE ABOVEPRESSIONLESS FACEPUNCTUATION SIGN "
- "WITH SOUND WAVESOPEN-HEADED ARROW-GAAHLAA TTUDDAAGPRECEDING SOLIDUS AND "
- "PALATAL HOOKNASALIZATION MARKWHITE PARENTHESISWHITE SHOGI PIECENG STROKE "
- "OVERLAYPLACEHOLDER MARKFLOORPLANE SPACESTRING FRETBOARDINDEPENDENT "
- "SHINFFERENCE BETWEENUPPER OVER LOWERVARIANT FORM ESHRIZONTAL "
- "ELLIPSEMIDDLE AND BELOWSTICKING OUT FARMIDDLE AND RIGHTMESTVENNY "
- "KLYUCHCOMPRESSED ARROWRISING TONE MARKFALLING DIAGONALPRECHGESANG "
- "STEMCREAMING IN FEARINES CONVERGING POSTPOSITION MENOLD TAMIL VIRAMAWITH "
- "CENTRED DOTPRECEDING SUBSETCORNER LEFTWARDSCORNER DOWNWARDSB2 TENU PLUS "
- "TABINDEX THUMB SIDESTRAIGHT STRETCHOLD WHITE CIRCLE OR THE IMAGE OF PLUS "
- "KAK PLUS ANAL DIGIT SHAPESYIR MKPARAQ MEUNPLE MEASURE RESTCONSONANT "
- "JOINERHMATULLAH ALAYHECASIAN ALBANIAN IFTEEN FULL STOP WITH BUNNY "
- "EARSSHU2 PLUS KASKALLD ASSYRIAN ONE WITH DOT INSIDEIDED GREEK CROSS WITH "
- "HEADSTROKEHIGH SPACING DOTHOLDING TOGETHERHYPHENATION MARKANABAZAR SQUARE "
- "BOTTOM HALF RINGORIZONTAL JOINERBOLD GREEK CROSSSTERISK OPERATORPEN "
- "CENTRE CROSSVOWEL LENGTHENERWITH HAMZA ABOVEWITH HEARING AIDIMENSIONAL "
- "ANGLE SMALL ROTATIONSTRIPLE CRESCENTSLEFT HALF CIRCLEWITH LEFT UPTURN "
- "TIMES DISH TENUCH WITH UMBRELLALEFT ARROW ABOVEIGHTH NOTE STEM TIMES "
- "GAN2 TENUTAKANA-HIRAGANA DOTTED CRESCENTSSMALL NOON ABOVESMALL CAPITAL "
- "ELRIGHT HALF BLACKUDLY CRYING FACENTAIGANA LETTER IRCLE X NOTEHEADRAYS "
- "AND DOTTED DITORIAL CORONIS AND LIGHT RIGHTEN MILLIONS SIGNIPPER-MOUTH "
- "FACE AND HEAVY RIGHTECIMAL SEPARATORWAVY HAMZA BELOWSANS-SERIF ARROWEND "
- "OF TEXT MARKRAIDO RAD REID RSTUCK-OUT TONGUEDVUMYA ZAPYATYMIRIGHT DOWN "
- "BARB LTERNATE HASANTADOWN RIGHT BARB EFORE COMPLETIONDOUBLE "
- "ZAPYATAYADOUBLE TURNSTILEREPETITION MARK-RECORD SEPARATOR CAPPED "
- "MOUNTAINISTOS DISC SIGN ULDERED OPEN BOXRESH-AYIN-DALETHIVE POINTED "
- "STARDOUBLE HEAD MARKDOUBLE DOT ABOVELOWER OVER UPPERDOUBLE "
- "CRESCENTSSITION INDICATORK PERMITTED HERERIGHT HALF BELOWVERY HEAVY "
- "SHAFTARENTHESES ABOVEO-MINOAN SIGN CM LIGHT MOON ARTAENTRE WHITE STARUP "
- "MIDDLE HINGEDQUALS SIGN BELOWUP-OUTPUT SYMBOLEVERSED VISARGA AFFRICATION "
- "MARKEVERY OTHER TIMESERIFS AT BOTTOMVIEWING CEREMONYSIGN RISING "
- "TONEWALLED ENCLOSURELIGHT BARB ARROWLIGHT AND RIGHT E PLUS GAN2 "
- "TENUUPERSCRIPT ALAPHQ WITH HOOK TAILMAGNIFYING GLASSENUMERATION "
- "SIGNUBJOINED LETTER ADIAN SYLLABICS DEWAYS U BRACKETUMBER SIGN "
- "ABOVEEPSILON UNDERBARYATHOS BASE SIGN INSERTION POINTERCURY SUBLIMATER "
- "PLUS GAN2 TENUNION WITH SERIFSSLANTED EQUAL TOLLOW PAN OF FOODBINDING "
- "BRACKETITH FINGERNAILSBAR ABOVE UNIONIDING ENCLOSURE-PIECE "
- "SWIMSUITNJOINING MACRONVERTICAL SECANTORIZONTAL COLONUSTER NOTEHEAD BLOCK "
- "DIAGONAL -NO-EVIL MONKEYTHREE DISH TENUIAMOND UNDERBARXTEEN FULL STOPINUS "
- "SIGN BELOWATA LINK ESCAPEINVERTED LAZY SNVERTED UBADAMA2 CHARIOT "
- "FRAMESALTER PAHLAVI OLIDUS OPERATORON TOP OF MODEMATINATE MYSLITEODIFIER "
- "DAMAGEDS ELEVATUS MARKS IN SILHOUETTEODO SOFT HYPHENING SYMBOL FOR "
- "OGOGRAM KHAMTI INGLE HEAD MARKNOT APPROXIMATEBENT OVER INDEXBELOW LONG "
- "DASHNOON FINAL FORMONTOUR INTEGRALBELGTHOR SYMBOLNOON WITH KASRAONJOINED "
- "HINGEDNO GOOD GESTUREIGN PALI VIRAMATAI LAING TONE-IZED "
- "WHEELCHAIRIRCUMFLEX BELOWOND PLACE MEDALIMISEOS CHRONOUIMPERFECTA "
- "RESTINVERTED STROKEYNCHRONOUS IDLERIGHT RERENGGANESIDE LESS-THANR WITH "
- "FISHHOOKERICAN FOOTBALLDESCENDING TONEDI ALLAHOU ANHUDIALYTIKA "
- "TONOSESSARON CHRONONUBLE RING BELOWDIGA AELA-PILLARIGHT HALF RINGDOING "
- "CARTWHEELEH INITIAL FORMRIGHTWARDS TICKPRISHTHAMATRA EPUT SYMBOL FOR "
- "TVIMADUR SYMBOLZERO WITH SLASH NEPOSTOYANNAYAEXPONENT SYMBOL DEYTEROU "
- "ICHOUQUADRUPLE ARROWQUADRUPLE DASH DAGESH OR MAPIQTWO WITH STROKEETIC "
- "VERSE SIGNRIGHTWARDS AND U PLUS U PLUS UULTIPLICATION XDOWNWARDS TREND "
- "AND LOWER LEFTREVERSE SOLIDUS AND BOTTOM ENDREVERSED OPEN EUMAI PALAUNG "
- "FAE ISOLATED FORME MUSICAL NOTES ALTERNATE FORME OVER INFINITYDOWN "
- "SEQUENTIALUGMENTATION DOTDOUBLE TRIANGLERECITATIVE MARKRECEPTIVE "
- "EARTHREAMY EYEBROWS CLOUD AND RAINEFT REPEAT SIGNEFTWARDS ARROWS CORNER "
- "BRACKETHAKING PARALLELHEH MEDIAL FORM WITH DESCENDERTIP ON THE "
- "LEFTCAPITAL LETTERSHALF FILL SPACE TOUCHING INDEX WITH DIAERESIS TIMES "
- "IGI GUNUGROUP SEPARATORUR POINTED STAR THUMB STRAIGHT THROUGH "
- "CIRCLETHROWING A KISSHOUSAND STATERS-CARRIER LETTERORT EQUALS "
- "SIGNHORIZONTAL FILLBREAKING HYPHENHORIZONTAL DASHBROWS STRAIGHT "
- "OW-FALLING TONEHOOKED INDEX UP WITH RIGHT LEGOUBLE BACKSLASHTILDE "
- "DIAERESIS PLUS HI PLUS AFTER COMPLETIONFRACTION DIGIT FOUR ENCLOSURESFOUR "
- "DOTS WITH PERTHO PEORTH PPLUS SIGN BELOWFORWARD TILTING PARESTIGMENON "
- "CORNER WITH DOT OVER SIG4 SHU2FIVE-LIKE BHALE OVER MOUNTAINSHREE DOTS "
- "BELOWCLOSED BY CURVETRIANGULAR MARKRMAN PENNY SIGNPENSION RAILWAYPEN "
- "SQUARED DOTCKET CALCULATORRNAMENT STROKE-CIRCUIT-OUTPUT GEMINATION "
- "MARKGGLY LINE BELOWGHT REPEAT SIGNOWER HALF BLACKTOP RIGHT FATHAZAIN "
- "FINAL FORMVOWEL SHORTENERSTROKE NOT SIGNALMOST EQUAL TOACHES THE "
- "LIMITALIF LENGTHENERLEFT DOWN BARB ND OF PARAGRAPHLEFT HALF BELOWLEFT "
- "HAND INDEXVOWEL SEPARATORALAYHE ASSALLAMWITH RAIN DROPSMIDDLE "
- "DIAGONALWITH DOWN ARROWWHITE DOT RIGHTLETTER CAPITAL LOSED INSULAR GLEVEL "
- "TONE MARKLEVEN FULL STOPAI LAING DIGIT SSICAL BUILDINGNA DOUBLE "
- "HELIXSTERESIS SYMBOLN-ARY SUMMATIONAND PARALLEL TOSHITA PLUS GISHMURDA "
- "MAHAPRANAKTOVIK NUMERAL ST SYRIAC CROSSST QUARTER MOONLEFT HALF "
- "BLACKSHORT OVER LONGAMOUNT OF CHECKKISIM5 TIMES BISIXTEENTH "
- "NOTESNASALIZED TONE-LATION FUNCTIONAMARITAN SOURCEMARRYING MAIDENLD TAMIL "
- "SHORT LONG VOWEL SIGNLOOK OF TRIUMPHWALLPLANE SPACELMOST EQUAL TO MEEM "
- "FINAL FORMMALAKON CHROMA LONG HOOK BELOWLIGHT AND LEFT HIRD-STAGE "
- "HLIENDED MULTIMAPSMALL LETTER DOT TILDE ABOVETHREE POINTED MOTHETIC "
- "ABOVEOSITION SYMBOLAMNUC PII KUUHENNA WITH BARSTIMES OPERATOROUR OBOLS "
- "SIGNERTICAL JOINERSQUIGGLE ARROW WITH BACK YERLOWER DIAGONALMEDIUM "
- "DIAMONDTIMES SHU TENUERIAL ARAMAIC HESIVE BANDAGEERIC INDICATORREATIVE "
- "HEAVEN WITH LONG LEG-CURRENCY SIGNOTHERS CIRCLEDSMALL LETTER JDIAGONAL "
- "PATH THALAN ETHEL OTHAKA ANUDATTAEPENTHETIC YUTOOTNOTE MARKERTETARTOS "
- "ICHOSDEYTEROS ICHOSDIAGONAL MOUTHSHESHIG TIMES UNIT SEPARATORLACE OF "
- "SAJDAHBHATTIPROLU AAIGHTWARDS VANEUNION OPERATORTERMINAL MARK-IKHAYA "
- "PUTNAYATELPIECE CLOCKYOUTHFUL FOLLYONE SOUND WAVEDI ALLAAHU ANH HASER FOR "
- "VAVTHOUSANDS MARKOUSING THUNDERMRACHNOTIKHAYAUSPENSION MARKHREE-LEGGED "
- "TEUBHAYATO MUKHADOUBLE-STRUCK THOUSANDS SIGNORANGE DIAMONDBLADE "
- "SCISSORSBLACK TRIANGLEOPPOSING PIRIGTHER CHRISTMASREE-LINE STAFFRIGHT "
- "CROSSBARREH FINAL FORMAND COLD SWEATD-UP NEWSPAPERLETTER OVERLAPEMIVOWEL "
- "SIGN TWO ENCLOSURESDIRECTION FLIPFORWARD INDEX LESS-THAN NOR PLITTING "
- "APARTPPED MIDDLE UPPLETE INFINITYPLE WITH HEARTLEFTWARDS TICKMID-LEVEL "
- "TONEWITH DOT BELOWAFETY SCISSORSVRON SNOWFLAKEUP RIGHT BARB LEFTWARDS AND "
- "UNDER RELATIONFFICE BUILDINGEXTENDED BHALERING MEGAPHONETWO DOT "
- "LEADERUPPED INDEX UPSPEECH BUBBLESFATHATAN ABOVETURKIC LETTER FFED "
- "FLATBREADPROTECTED AREAAI LENGTH MARKFINAL ANUSVARALEVEN TWELFTHSLIGHTLY "
- "SMALL EXCLAMATION OHDIO MICROPHONELINE EXTENSIONCRIFICIAL WINEMENSION "
- "ORIGINCEPTER OF JOVEDASIA PNEUMATAQUINARIUS SIGNVONIC ASTERISKSMOKING "
- "SYMBOLRD PLACE MEDALCERTAINTY SIGNOVER GUD LUGALTOP HALF BLACKWITH LEFT "
- "HOOKCENTURIAL SIGNGUNU TIMES ASHGYPTOLOGICAL AROSS ON SHIELDEND OF "
- "SECTION CROSSING ESH2ALLAJALALOUHOUSIDE-DOWN FACEUAL WHEELCHAIRCOLON "
- "OPERATORUND MARK ABOVETWO WHITE DOTSWITH FATHATAN PEN MARK BELOWLITTLE "
- "SECTIONTRIANGLE WITH DOES NOT EXISTPARTMENT STOREZAH WITH MEEM PARATED "
- "SYMBOLLOTUS POSITIONGERED TREMOLO-TRANSMIT STATEPAO KAREN TONESMALL "
- "TRIANGLEGHT WITH STARSZAKAYA LANTERNSEPARATOR MARKALF TREE "
- "TRUNKREVERSED-SCHWAIRCULAR VIRAMAANGLE OPENING SHAN MEDIAL WAED SYMBOL "
- "FOR ISSION TICKETSIVE OBOLS SIGNSEL LOCOMOTIVEXO EKFONITIKONNORTH ARABIAN "
- "DUN3 GUNU GUNUOBLIQUE HYPHENVERTICAL COLONIWAZ TIR TYR TSECOND "
- "SUBUNITSURROUND FROM VERTICAL LIGHT1 OVER LAK-081VERTICAL "
- "HEAVYAROUND-PROFILEJOINED SQUARESING HANDS SIGNINOLOGICAL DOTINSERTION "
- "SIGNISED ROUND DOT AND DIAERESISIVE KEY SYMBOLINFINITY BELOWVERTICAL "
- "ABOVEOLD RESOLUTIONAU LENGTH MARKVENIENCE STOREITA PLUS GISH WORD "
- "SEPARATORBAG MEMBERSHIPINVERTED BIRGASTRONG ISOLATEINVERTED DAMMANYI ZLA "
- "NAA DAHINESE TONE YAUDATE CHRIVIWITH INTEGRAL WITH JEGOGANMARKS "
- "CHAPTERDOUBLE MUCAADSIGN AVAGRAHA WITH OVERBARUPWARDS TRENDLEFT "
- "CROSSBARVERGREEN TREEDOUBLE HYPHENHIEROGLYPHIC VEN POINT ONEMODIFIER "
- "MARKNUMERATOR ONETOP HALF RINGSSAGE WAITINGPLUS "
- "OPERATORCHRYSANTHEMUMARRED TRIDENTURNED W BELOW WITH INK PEN OF THE "
- "HORNSWITH ASTERISKBY DEFINITIONDOUBLE STROKEVERTICAL FILLCHECKER "
- "BOARDMESSENIAN TENLATERAL CLICKUPERIMPOSED XSIGN "
- "PAMUDPODFOREMENTIONEDOGOTYPE SIGN VERLAY MIDDLEITING THROUGH WITHOUT "
- "SNOWINVERTED TURNDIAERESIZED UEHU FEOH FE F DIRECTIONAL SAMYOK "
- "SANNYASMALL LETTERSK WORK SYMBOLUFFLE PRODUCTOTEHEAD BLACKUPONDIUS "
- "SIGNINVERTED FORKS KAI APOTHESS KRYZHEM ON VERTICAL BARSWAW-AYIN-RESH "
- "WITH TEE TOP BZHI MIG CAN TIMES KASKALGYA GRAM SHADHAM DIGIT ONEISH LIRA "
- "SIGNCROSSING MUSHEAVENLY EARTHMINUS SIMILARPAIRED ARROWS VARIANT FORMALL "
- "LOST SIGNPREPONDERANCEMINUS WHITE XGTER TSHEG MAFINAL SEMKATHTIRTA "
- "TUMETESMINDER RIBBONSAL PLUS TUG2OUT MIDDLE UPH-TYPE SYMBOLGIFT "
- "ENVELOPEDOTTED ZLAMA EMPHATIC TONEPPOSING LUGALCROSSING GABAYUUKALEAPINTU "
- "CROSSING KA2HAKASSIAN CHECROSSING GAN2OURTH SUBUNITPA NJI PIPAEMOBLIQUE "
- "LINE APPED PRESENTRIPLE SVARITACULATED LORRYKAPYEOUNPIEUPMOBILE "
- "PHONESMASORA CIRCLEINTEREST SIGNCRIPTION TAKEFIXED-FORM RAHERICAL "
- "ANGLENUMERIC SIGN RANKS CASKET DOUBLE CIRCLEFINAL LETTER S UP "
- "TOGETHERROTATED BIRGALVE FULL STOPVICE CONTROL ACCOMMODATIONW RING INSIDE "
- "OPPOSING KUR DIMINUTION-1TU WAS-SALAAMFICATION CARDARROW "
- "OVERLAYSIA-AUSTRALIACRIPTIONAL PARTABLE STEREOZHOU NUMERAL OLVING "
- "HEARTSUDDISA SIRRAHNTY FULL STOPIFI ROHINGYA OMAN NUMERAL FT ARROWHEAD AR "
- "WITH QUILLSTERTIUS SIGNOON NOTEHEAD NING MOVEMENTLOCATION SIGNSELECTED "
- "AREALVEOLAR CLICKSHESH PLUS KI-OFF CALENDARED PAPERCLIPSU WITH "
- "STROKECITATION MARKIOT SYLLABLE ARGOSYNTHETONGAW KAREN SHAIVE SLOW "
- "SIGNIGHT TWELFTHSIDE LOST SIGNERTION SYMBOLFROWNING FACEGAR FRACTION "
- "-OR-PLUS SIGN-PER-EM SPACERN PENTATHLONUPSILON WITH FULL SURROUNDEDIC "
- "ANUSVARAL-TYPE SYMBOLILE SEPARATORNG TERMINATOR AND SKI BOOTIMAGE "
- "BRACKETYOD YOD PATAHWO-LINE STAFFTED HAND SIGNLOSING SPIRALONAL COMPUTER "
- "HEADED ARROWRIZONTAL TAILBALL AND HOOPTROFLEX CLICKEELING PERSONER BOARD "
- "FILLRACKETS ABOVE AND YEN SIGNIRCLED INDEX YMBOL TAU RHOLOCK WITH "
- "KEYONGRATULATIONVOWEL SIGN PAVE-LINE STAFFONG-LEGGED DERIGHT "
- "HARPOONARCHAIC KOPPANINE TWELFTHSSYNDESMOS NEOONG RIGHT LEGILABIAL "
- "CLICKBASAN LETTER MELODIC QITSAEVERSED DAMMASILI PNEUMATAARLAUG "
- "SYMBOLEUROPE-AFRICAQUARTERS SIGNMAILBOX WITH BOWING DEEPLYED DOUBLE VERTE "
- "ORDER MARKANNED LEATHERLEGETOS ICHOSBOTTOM CORNERDVOECHELNAYA USHING "
- "UPWARDAMUHU ALAYNAAMALO POVYSHE USICAL LEIMMAAWELLEMET YAZAN RUPEE "
- "SIGN-ESASA DOTTEDHREE TWELFTHSBOHAIRIC KHEICIRCLE INSIDEDIC MARK "
- "SIGNOPPOSING NAGAWAVY LOW LINEMALL LETTER Z B BAR SYMBOLSTABLE SYMBOLI "
- "WITH STROKEUE OF LIBERTYMULTIOCULAR OMEDIUM SQUARERRIAGE RETURNTHIRD "
- "SUBUNITLISION SYMBOLIVE POINT ONERPENTRY PLANEWAVY OVERLINEJES SU NGA RO "
- "BAT AND BALLNAP PIZZICATOENT ARROW POIRTY-SECOND EMELY HEAVY FOLDED "
- "HANDSEMISOFT SIGNFORMING ARTSMIDDLE PIECELER CONSTANTUBSCRIPT TWOPOETRY "
- "MARK-FOUNTAIN PENNOTCHED HOOKVICTORY HANDKANA REPEAT FT RERENGGANPORT "
- "CONTROLRISTMAS TREESAZ IS ISS ISH ZIDA TENUSUR OVER SURNOTCHED TAIL LINE "
- "SYMBOLDE MARK SIGNAR DIAERESISLLE PATTERN MEDIUM SHAFTQUIRREL TAILSMALL "
- "TRIPLELL MODIFIER-DASHED ARROWDICTION SIGNETRETES SIGNNERSHIP SIGNLINKING "
- "MARKLINGING FIRENJALA GONDI QUARTER SIGNNEPOSTOYANNYNGLE "
- "BARLINEARALLELOGRAMRAFFIC LIGHTRIGHT SINGLERAH BEN YOMOMADDA ABOVE MEDARY "
- "CAMELENOS CHRONOUM NSHUT NYAMEVERING FACEDENTAL CLICK IN TRIANGLEXIRON "
- "KLASMASMILING FACEERPENDICULARIVE TWELFTHS KLYUCHEVAYADIGRAPH YORIRIAGE "
- "SYMBOLPROTOS ICHOSCROSSING NUNCROSSING LU2CROSSING KALCROSSING GI4ACUTE "
- "ACCENT OVER KISIM5FERENCE MARK OVER MIDDLEUPPER CORNERFLAG ON POSTEAGULL "
- "BELOWCRESCENT BARPOUTING FACENORTHERN TSE OVER TWO PIMETA STAVROUSECTION "
- "SIGNQUARED ARROWENARIUS SIGNAFU LEERAEWAT MONGKEUAEQLIMBS DIGITSLICKING "
- "LIPSRANCHING OUTRING OVERLAY OF ANTIMONYRIAL TRAMWAYFALLING DOTSRION "
- "CHRONONDIGRAPH KOTOFEMININE DOT ON PEDESTALING HAND FANKHAMTI "
- "TONE-BRATION MODELARGE TRIPLEKHMIMIC KHEIOID NOTEHEADOING TO MEETING "
- "OPERATORING HITTING OK HAND SIGNLARGE DOUBLETHREE HEARTSOHAZARD SIGN-DZUD "
- "RTAGS MPHASIS MARKMPTY CENTRE HREE FINGERSING ENVELOPES ABOVE SIGNLAM "
- "WITH YEHREVERSE MARKKLYUCHEVAYA BLUE DIAMONDBLOWING FACENARROW SHAFTUN "
- "WITH RAYSALTERNATING SS-THAN SIGNHIBITED SIGNLAYING CARDS WITH INDEX "
- "INTEGRATION BYSMAL WATERWDATA SQUAREBUTTON MOUSEDUG TIMES NIOTTOM HALF "
- "OLOWERED FLAGMANNAZ MAN MTHODOX CROSSSHORT RIKRIKUSEATED FACEBSCRIPT "
- "ALEFOGOGRAM NYAJRIGHT DOUBLEMONOGRAPH UKBRIDGE ABOVE WITH UPTURNROUNDED "
- "ZEROLATALIZATIONN ELEMENT OF-SIMPLIFIED IGATURE SHRIUTH-SLAVEY KSHED "
- "BARLINEREN CROSSINGEEPING SMALLYMBOL BINDU AND OPERATORBACK OF HANDBEHIND "
- "CLOUDSMALL DOUBLEONISHED FACE-SHAPED SIGNTERNATE AYININ MIDDLE "
- "UPYOUTHFULNESS AND TOP ENDLU PLUS ESH2ANG KHANG GYON US SYMBOLILLED "
- "CIRCLEVAKRAHASANYAYOD TRIANGLE AND PICTURETEARS OF "
- "JOYBAHIRGOMUKHAHYPHEN-MINUSTE SEPARATOROQ NSHUT YUMBLACK CIRCLEUGHT "
- "BALLOONDOWN HARPOONTHIC LETTER THETA SYMBOLBINING MARK -MAIL "
- "SYMBOLOPPOSING LU2INDEX MIDDLEREFACE COLONMALL SECTIONDOWN NEUTRALOPLE "
- "HUGGINGOPENING LEFTAND CRESCENTIDENTICAL TOBETWEEN LIPSMUM TIMES PAUTH "
- "ARABIAN UH PLUS GISHDOT OPERATORRCHAIC SAMPI SKEWED LEFTLEFT "
- "HARPOONTRAGRAM FOR ROLLING EYESTOP-LIGHTED LUS "
- "NOTEHEADCHARACTER-1BCHARACTER-18WIGGLY FENCECHAIR SYMBOLOVER KASKAL GREEN "
- "DRAGONGROUND SLIDEVOLTAGE SIGNPHEME JOINERTRIPLE FLAMETRIPLE "
- "DANDALEFT-LIGHTEDIRAGANA HOKANDING PERSONKAPPA SYMBOLCK-O-LANTERNRNAM "
- "BCAD MAGAW KAREN EUOUCHES THUMBEIGHTH NOTESGBY FOOTBALLNUITY "
- "SYMBOLCIRCLES AND AA AS-SALAAMCIRCLED PLUSGEBA KAREN ICIAN LETTER OUR "
- "TWELFTHSS SUBPUNCTISHAND FORMAT CALENDAR PAD WITH DAGESHDOUBLE ARROWATNAH "
- "HAFUKHULL NOTEHEADALT PAN SIGNOUGHT BUBBLEACE INTEGRALHASIS SYMBOLDOUBLE "
- "ARCH LOWER CORNERVER EQUAL TOHEAD-BANDAGEATTOOED HEADNCK CONSTANT "
- "CROSSING GUINUSOID SIGNINVERTEBRATECABBAGE-TREECENDING "
- "NODEMBELLISHMENTGUARDED AREAYAN NUMERAL UME INTEGRALNATURAL SIGNHAH WITH "
- "DALLEADING EYESALLPOINT PEN6 LONG NGGOOWITH SMALL VMING TO MEETHALF "
- "BRACKETENG DIGRAPHEDICAL MASKRIGHT GUARDRIGHT HEAVYME LONG CANUISHED "
- "FACEZZA WA JALLLOSED ENTRYMACING FACEMED RGYINGSRESPONDS TOLOTI NAGRI "
- "SMALL CLOUDE2 TIMES ANLU PLUS IGIEEZING FACELTED FLOWERULO TWO SUMLT OF "
- "CLOTHMBLER GLASSRANSMISSIONLUPOVODNAYAUG RTAGS GYRIGHT LIGHTEAVER DENE "
- "MBA BAYANNARAISED FLAGLTERNATE AALTERNATE YALOSING MARKSINGLE AND RCHAIC "
- "JNYAUBLE TONGUEE WITH VEILUETTE BREADREAKTHROUGHEN STRAIGHTSHEQEL "
- "SIGNOPPOSING IMOPPOSING ENTHAM DIGIT MUNCIA SIGNIEN "
- "MONSTERTH-THALATHATETRAFONIASMUUSIKATOANONIAN SIGN N THE VERGETENS DIGIT "
- "IL FRAGMENTILCROW SIGNILIQUA SIGNL TIMES LALL OF THREADSTEAMY ROOMSTERED "
- "SIGNRUNNING MANHREE BALUDATIEE SHEUOQHITE DRAGONHLETIC SHOEHOKHLOM ON "
- "LATIN CROSSMONOCULAR OTHREE TIMESHORA DIGIT MOVED BELOWORT BARLINERPOON "
- "ABOVEORCE SYMBOLOR OPERATORTHIRDS SIGNI TIMES BADI TIMES NUNLACKLETTER "
- "LACK SULFURIBLE-CREE YJOYOUS LAKENUMBER ZERONUMBER SIGNIPLE TONGUENTIMONY "
- "OREIRCLED TEXTKAARA POLLUNSE CHEEKS ISIGOTHIC ZTA EQUAL "
- "TOSHAAYATHIYASECOND MARKSVASTI SIGNJIHVAMULIYANITIAL IZHEIVERY TRUCKJECT "
- "SYMBOLIX TWELFTHSSYURA SASAKNIKOLSBURG NG LEFT LEGSYMBOL "
- "VIDJIMULTANEOUSIN EQUAL TOOMAN SIYAQ KTIESELSKABRYVNIA "
- "SIGNINDERGARTENSTORIC SITEINDICESIMA OLD NUBIAN SSANGKIYEOKOHINGYA YEHOF "
- "MASHFAATOF ENVELOPENBLENDED UKNYIS -KHYILND ODD SIGNND OF PIECENUSVARA "
- "ONENUN HAFUKHALETION MARKPUT MORTUUMSPEED TRAINTUNE COOKIEFGHANI "
- "SIGNFINAL SIGMALEU SATANGAFIRE ENGINERIPLE PRIMEMFON PIPAEMPOST "
- "OFFICESPIRATED FAMICAL HEARTPODCHASHIEMRISING DOTSMIDDLE BENTFOURTH "
- "ROOTRITING HANDESH DIGRAPHENTHESIZED EORGIAN NARLON SKEWED EPIDAUREAN "
- "RADITIONAL RACHMA SIGNRA SOMPENG ERCENT SIGNRIGHT-HAND RIGHT-LIGHTPHNAEK "
- "MUANQUIQUADRATEQUERED FLAGETEI MAYEK SIMILE SIGNEUNJOMNDEUQTY "
- "THOUSANDLINE FILLERTWO FINGERSRING LIQUIDHAWH HMONG GSUM -KHYILGUISED "
- "FACEROSS ACCENTOUTHERN TSETONAL MARK HAGGAR YAZHLEEP SYMBOLLEAF "
- "CLOVERHANG KHUDAMOVERSTRUCK OUNDED FACELD SCRIPT XHEATED FACEHEAVEN "
- "MARKLD POLISH OHEELED SHOELCE TSA CANOUBLE DANDAHIGH STROKEOUBLE "
- "ACUTETIGHT ACUTETRUNCATED AMIDDLE HOOKPERISPOMENIGAP FILLER-LEFT "
- "SYMBOLLEFT SINGLEGAYANUKITTATRIAN CAMELGE AT NIGHTMIDDLE "
- "STEMPARAKALESMAGENTLE WINDTRESVETLAYATRESS SIGN GHAIN WITH TRAIGHT "
- "WAWROKUTASTI ARONTHISMATAROR-BARRED GRAMMA SIGNGRAVE-ACUTELEFT "
- "DOUBLEAILLESS PHIAESHAE NYAMCK SEXTANT--OFF SYMBOLBETA SYMBOL PLUS NUNUZ "
- "PLUS MASH2AGAZ DAEG D DECORATIONUTH CORNERSAGONAL SIGNUTING WHALEBELOW "
- "RIGHT-MINUS SIGNCOMBINATIONXAGRAM FOR VYKA ABOVE VA V CHELNUCOND "
- "SCREENCONTAIN AS DIATONON DI0 FOOTSTOOLBANK SYMBOLDOUBLE AND WRIST "
- "FLEXUSHING HANDACKED COMMABOTTOM MARKACTIVE SIGNACUTE-GRAVECHING "
- "CHICKDOUBLE SHADDOUBLE RINGDOUBLE MARKVAMAGOMUKHAWASLA ABOVE-COPPER OREWO "
- "TWELFTHS PROPORTIONDONG TSHUGSBLACK ARROWDOLLAR "
- "SIGNDOACHASHMEEAESCULAPIUSARM SPIRAL I ZAPYATOYVISARGA ONEASE TO THE "
- "YEORINHIEUH OF FORTUNECURVED BENDXTINGUISHER OF FLOWERSARPEGGIATO DE "
- "KIKAKUI ARMS RAISEDZERO THIRDSUPADHMANIYAARAM GONDI DASH "
- "SYMBOLDALETH-RESHCROSSING URVARYS ICHOSCREDIT SIGNXESTES SIGNVE OF "
- "PEACEAWNING FACECROSSING BUUNIT DIGIT CROSSING ENCROSSING IMZIGZAG "
- "LINECROSSING PIWING NEEDLEDESK PERSONUNJO WYNN WCTION MARK CTION MARK- "
- "OVER IDIM OVER BULUGATION SPACEATION POINT3 LONG NGGOXI RADICAL AMUSED "
- "FACEATH PRODUCTDENT EMBLEM WITH PLATEWHITE JOKER UPPER HALF WITH "
- "JACKSCCUMULATIONDYO CHRONON AND MACRONCELANDIC-YRWINKING EYECAN "
- "RGYINGSWITH GARDEN6 LONG NGGEDUATION CAP WITH DASIACARET TILDE TIMES ESH2 "
- "WITH CARON WITH COMMA WITH MAPIQCH AND LAMPCE OF PIZZA WITH TRILL WITH "
- "TITLO5 LONG MBOOBREVE BELOWUMAN FIGUREBZHI -KHYIL S ZAPYATOY SHOE "
- "STILEURRENT SIGN TACK BELOWBUMPY ABOVE WITH FLASHWE PALAUNG OUT INDEX "
- "TIMES SHEVERAGE BOXHAAPRAANA CURLED WAWION BOTTLEASTERISCUSTAMAN "
- "SIGNASPIRATIONCURLY HAIR1 LONG MBE1 PLASTICSTIMES SIGNING-SHIFT OLING "
- "FACEC WITH DOTGUA PI MAOTOP CORNEROLON EQUALINHERENT AOUR FIFTHS WITH "
- "BASEUR YIG MGOCROSS MARKPPOPOTAMUSBA SATANGACROPHONIC "
- "HERMOMETERATRICHISMAATTY WITH OUTHERN TAOUTER JOINVEL SLIDER OVER "
- "KAD5FFICULTIESROEZENIAN FINAL HETHPRALINEAR INITIAL RAAUTOMOBILEDA "
- "SATANGAUP NEUTRALUP OR DOWNEUTRAL YERTWO THIRDSTWO SHORTSSYLLABLE MAR "
- "ECLIPSEHAN DIGIT DA PLUS HAOANDAKHIATIXTHS DISHEVERSED PED WITH DOTT AND "
- "BOLTZH DIGRAPHNTITY MARKNTO SHRINEKAI SYMBOLKA SATANGAK2 PLUS BUNUMBER "
- "TENTYPE COLON2 LONG MBOHAMZA MARKJONG TILE ISMUTH ORERING THUMBITH DIGIT "
- "IT MBAAKETF SHE-GOATYEH BARREE OF DHARMAHEAVY BEATFAHRENHEITOF HYGIEIACY "
- "MESSAGERING ABOVED KEYBOARDCAPITAL ETTAIL GLASSTING "
- "HEARTURIPIGMENTTWENTIETHSCIRCLED CAGBAKURUNENICAL TAPERCISIVENESS WITH "
- "PLUS WITH RAINPEDAL MARK-LUE KARANTRETCHED C POVODNAYAI ARCHAION "
- "SCHROEDERBLACK FLAGGAS BZUNG OPEN SHELF WITH LOW IED SHRIMPGANDA MARK "
- "PLUS SHU2IDEOGRAPH IDDLE MARK WITH PAGETHAPASCAN GOLUBCHIK ICK "
- "FIGURETHDAY CAKEHREE ABOVE WITH TAILGHT LIFTERHOOK ABOVEBOLD "
- "SHAFTBSTRUCTIONROUNDED ERORTHERN TAOSSED SHEI WITH TICKBROKEN BAROSTAL "
- "MARKHOTIC HOOKUSEL HORSEGGRAVATIONOTING STARTRESS AND GGING FACEURVED "
- "OMETTHMIKON N ZAR AMULET WITH RINGTREFACTIONRKING FACEOROME SIGNHUNGARIAN "
- "BLUE HEART RESUPINUS RGYA GRAMGREE SLASHFORMATTING WITH BELTCHAD "
- "RTAGSUPPER HOOKCH BALLOONILE FOLDEROUND OMEGAROAD "
- "OMEGACOMPONENT-COMPONENT TEMPLATIONIKRON ISONZEIRO SIGNINDU BELOWAYER "
- "BEADSYMBOL AIVAINDEX BENTOMMA BELOWPOUND SIGNGIMEL-HETHON MEDIAL FOR "
- "RECORDRUPEE MARKPOLICE CARPASSIMBANG-SHAPED HAUR CORNERSPLUS ERIN2TRIPLE "
- "DOTTESE CROSSRSI SYMBOL WITH EGGSPLUS BELOWG MUNDARI UTH OR SPYIGATURE "
- "OECHECK MARKIGATING RAOP NKAARAEGAGE CLAIMRUDIMENTA FRONT WALLTERNATIVE "
- "FTOGGOS OUCOLATE BAROON LILITHOON SELENAUR HUNDREDBELOW LEFTPLUS NAGA AB2 "
- "TIMES ENS SYMBOLABATA TREELEFT HEAVYLTIC CROSSXED BICEPSLEFT GUARDUBLE "
- "DASH SHMIRI YEHENTHUSIASM5 PLASTICSRESH BELOWEDESTRIANSLEFT SERIFACE "
- "FILLERDIATONIKI LOWER HOOKLEFT-HAND LPAPRAANA ENETRATIONMOTORCYCLEALEF "
- "LAMEDALEF WITH EEKING EYEALENT SIGNLEFT LIGHTLTERNATE "
- "URAGGISMATASSANGARAEAED FINGERSED FIGURE-MONOGRAM BRIGHT SIGNMONOFONIASA "
- "PLUS KURNGUAGE TAGA PLUS IGISSANGPIEUP AND KNIFEUNKIA SIGNMADDA "
- "MARKSELINE ESHWITH STRAWWITH SPOONM STALLION3 PLASTICSNGLICANA "
- "WLE-DELAYEDALLY MARK AND MOUSEA TANG LAILE LETTER MONTH SIGNMONOSPACE SS "
- "OF MILKWITH WINGSEONGCHIEUMN NGGEUAETRED DRAGONLINDRICITYLOGICAL "
- "ORLIMITATION4 PLASTICSLIGHT BULBELT BUCKLELIGHT BEATNDA PA NJILHAG "
- "RTAGSNAKE BELOWRECIPITATESMILO SIGNLLOW HEARTEIGHTIETHSAEDA-PILLASPACE "
- "MARKSHARP SIGNNANGMONTHOEICH STARKNASPIRATEDRDHACANDRALEK ATTAK SGOR "
- "RTAGS DRAWINGS SMALL TAH LENDED YUSDIGIT ZERORIGHT HOOKAKKHANGYAONDU "
- "TEMPLERCHAIC KHAWO SHORTS LGIZ EOLHXLONG FINALAILED BIRDDIRGA MUREW OR "
- "MODELLONG OVER LET SYMBOLLONG TSHEGADDA WITH DOUBLE BARADAK BINDIY AND "
- "RICEQUISH QUADRHO SYMBOLSTRAL SIGNU2 PLUS BAUMAN EARTHSE-CREE "
- "SKKRYZHEVAYAEARLY FORMEARTH MARKANGKHANKHUDED PERSONUAM TSHOOJRGE "
- "SQUAREMILLE SIGNSIXTEENTHSRGE CIRCLEANDHI MARK KABA TENU2 PLUS "
- "ASHETTA-PILLAMEEM ABOVEVIE CAMERAMEL SYMBOLU CIN HAU E PLUS SUMWING "
- "HEART2 PLASTICSSTRUCTION SBUB -CHALSCAN LINE-STROM SIGNME PLUS "
- "ENEQUIHOPPERUM ROTUNDAKINDI MVOP LATE FORMNOTE WITH ANGULAR TO7 "
- "PLASTICSUP HARPOONEBENSTIMMEWHOLE NOTE AND ARROWLACK JOKERUARDEDNESSMINO "
- "TILE VIOUS PAGEST-FEEDINGST PALETTEREVERSED ISIDEWAYS IWHITE HAIR AND "
- "BREVEWHITE FLAGMALL ALEPHLANE MERGEDUOUS TREEXHEEJ CEEV6 PLASTICSLA USED "
- "AS AND ACUTEZU OVER ZUR2 PLUS SUMHANCHOLLROTATION-CAPITAL QADMA GDANHWAZ "
- "EH EHINOCEROSMETOBELUSCAPITAL ISMALL YUSMBROIDERYCAPITAL DLOND HAIRURNED "
- "GANURUZ UR UWASH TAILHARACTERSHARD SIGNMAEMGBIEEOUTH WINDOUT MOUTHBING "
- "CANE-KHYUD PAWING STARIALECT-P MUKPHRENGBITE LIPSHIMA SIMAIA "
- "SYMBOLBKHASIAN I PLUS LI TIMES UDMRACHNAYAOW KAVYKAM HE-GOAT5 CYPERUS "
- "TIMES HAMALL RINGBOTH BENTOURA SIGNHI SYMBOL TROMIKONOVER GAN2HOT "
- "SASAKSIVE FACEACE NOTE OTAL SIGNLOWER DOT5 LONG JOTIMES PAPABOVE TO BRUL "
- "SHADTIMES NA2HEADSCARFURNED AYBLVIN SIGNURLY LOOPRPORATIONCANG "
- "TE-U-AMMONIACA PLUS NAORT-TWIG-ORTH WINDZ DIGRAPHLONG TIP ADEG ADEGBO "
- "BAIMAIWITH FACE5 BATHTUBBO GYFU GOVER MUSHHREE FOR HOOK "
- "MARKHIYYAALAACARTRIDGEMARK SHADMARK CIM STAL "
- "BALLTANDSTILLVISARGAYATAKHALLUSLACKFOOT OF STIMMEIRST MARKNING SIGNAS "
- "MEMBERVINE LEAFISEN-ISENTABE SIGNL ME HANDYEH WITH ANEROSIS "
- "ODIASTOLESTEBASKETKYO TOWEROKED HEADVOETOCHIESEMICOLONNIGGAHITANIGHT OF "
- "VOCALIC RINNYIIYHESSANGSIOSVELOPMENTNIHSHVASAOKOUFISMAOKED TAILNO "
- "TELEIAATTACHED LARGEMENTVER LUGALXHAUSTIONS DIGRAPHATHAMASATLAN SIGN "
- "VERLONG AANTHAKHATKING BOOTVEUAENGAMVIGINTILE2 GARMENTJERUSALEMNVERTED "
- "RANSKRIT SANTAYALANNSERT AT NUSVARAYAKEMPHRENGAOS ICHOSK GESTUREVICE "
- "MARKNTERPRISENTESSENCEKA- SHOG NTRACTIONKAMEYTSA YEH ABOVEOCCLUSIONANG "
- "CITI ANGGEUAETKU RU KHA247 DIPTET ON BONEITRA SIGNT OF MEATY "
- "BLOSSOMKSTREPTON1 CHARIOTARM CLOCKARISTERA YEAR "
- "SIGNIVE-PULL-IVINATIONY-FOURTH ARCHAIC MY-FOURTHSSTRELNAYANDAILING "
- "WRINKLES IGH HAMZA-STACCATORTER MARKIGHTH ASHAINTBRUSHAISED DOTBEER "
- "MUGSWRY SMILEUTRA MARKSHAB CEEBRTS MEDALUURDHAJA SPIRITUS UVUZHAKKUIGN "
- "NUKTAIGN SAFHAIGN TOMPIOO DENNENAJANYALANBASE UNITIDEOGRAM NA "
- "KHONNALLABLE OM-LOW TONEICE CREAMLLABLE B0SOF PASUQ-MID "
- "TONELIVERANCELITTLE UPLISSANDO BICYCLISTBIAL "
- "SIGNXCITEMENTWORDSPACEIDEWAYS UAF PERSONAFFE FACELINE FACEYRANISMA "
- "LIGHTNINGNCLOSING OP HALF ONGER SHIPRWARI DDAALI GALI SEPTEMBERING CARD "
- "NGER ROOTING DOLLSING GLOVENG SANDAL3 PLUS ANOM SYMBOLING LANESING "
- "STONEAVE ARROWALPAPRANAINISHMENTNGUN SIGNONGSEONG ILE TILDETEH ABOVEAL "
- "RUNOUTILIPPINE ONG GRAVENFORZANDORUM CLEF-VRAKHIYA ONE THIRDONE MARK-TED "
- "PLANTALAYALAM LEFT TACK0 LONG LEVANAGARI LEFT RINGIN SQUAREON TEUAEQON "
- "GROUNDINAL NOTEINARBORASB DIGRAPHTRESVETLO APODEXIA RA OR RITREDECILEEEN "
- "WITH CHOSEONG AND TAIL OF PAPERTUTEYASATDRAM SIGN "
- "RICKSHAWPROJECTORTUKWENTISCUBE ROOTDOWN SIGNZIR SASAKDOWN "
- "STEPCULTATIONENTRY SAWUNGLASSESUNGSEONG FEH WITH ULL BLANKED DIGIT D "
- "SALTIREUMED HEADRNEY PARAZAL SASAK AND CURLRO WIDTH CHAVIYANIDEPARTUREDUS "
- "RTAGS OF BLOOD FROM BARTRAIGHT UPENTASEMEZERO SIGNPENT SIGN "
- "GARSHUNIGITTARIUSED PLANETRCHAIC RAEMBEDDINGCRESCENDO PLUS GALFLAT "
- "SIGNCOIN SIGNEIGHT OF PLUS GUDG IN HOLE PLUS KU3CLOSED PLUPPER "
- "DOTPPOINTED CORN FACEFOUR BENTELEGRAPH CONJOINERREAK HERE PLUS LALRDEL "
- "DKARCOMPLETEDELLOWSHIPUBSTITUTEPOSITIONSTTED STEMFROM WALLDOWN HAND PLUS "
- "TURFINAL NUNFINAL NGAREFORMED FINAL MEMUPTSTIMMEEFORMED TEFAIDRIN PLUS "
- "ZA7 OVER BALZENE RINGPIDERY HAPLE HEART DIVIDERSRCHAIC IIUE "
- "MAEMBAPPOSITIONFISH TAILRED JOKERFACING UP OVER LUMREDNE ON PLUS "
- "SAGEMPTY SET AND BELTZAKRYTAYAE AT LEFTETER SIGNRFUL FACE STREAMERDAD "
- "WITH DENT AND URAMAZDAAEAVY DOWNTO CORNERHAIKSUKI SPARKLERQUADCOLONTONE "
- "MAI ERTY LINERIED FACEES AKURU EURO SIGNQUEEN OF ER BUBBLEP ELAMITEESAME "
- "DOTP DIGRAPHR PLUS RAERCIAL ATPANYANGGAETTI BALLROJECTIONETRASIMOUEBIT "
- "SIGNCHANICAL PAA-PILLAGRIK SIGNHALF SIGNQUSHSHAYAUR-DE-LISHALF "
- "NOTEHALSHELETGREATER YPADE SUITL-LAKUNATTO MARKEN NTEUM OTTAVA ING ROD "
- "XCELLENTAVY BANDDANTAJA OM NTEUMVE SASAKETA SIGN1 BARLEYING BELL OVER "
- "KGANE TREEKATAKANAKASRATANLESSER YAIYANNOIYIDDISH PRECEDESNEIFORM LESS "
- "SHANO SLASHOLLOWINGDIM GUNUSUCCEEDSUNG DASHNO THUMBDAMMATANFINAL THL "
- "POLISHETRASEMEFILE BOXNED FOODNAVIYANINCE SIGNFOR "
- "STOPBACKWARDUNDERDOTINAGARI WRINKLEDNOTE PAD LANTANGESH LOOP "
- "CURRENTCOUNCIL RDEL NAGSE WEDGEIMANSIS FLOURISHDDY BEARELEPHANTONE FOR "
- "COUNTERS OVER MULF RING AY-NIGHTOMANIAN ANS SIGNNRES TOSVERGENCEING "
- "BOWLWRITING ET SHOESSPERSIONW PRINTS OVER ZIY POPPEROMMA BARAZHAAKKUEST "
- "WINDQUINTILE KEMBANGDIT CARDLFWIDTH TTENTIONNOVEMBERSCRIPT GUNDERTIEY "
- "BEETLEUAREG YAOCK SALTEOUT BOXEVEN OF ER TRUTHUBJOINER "
- "NUTILLUYAMAKKANOCALIC M HANDLESSENTAGONPUSHPIKALEANING ARTYRIA "
- "ALLIANCESWIMMINGARSI YEHODESTONETAALUJA ERAL URNSQUEEZEDITA "
- "MFONARRIVINGOCUS OF UP TRUCKVESSEL "
- "BYBEYFILIJAVIYANIARCASITEUATRILLOEVERANCENINE OF NI ABOVEAR TSHESD "
- "BUBBLET NGGEETD CROSS QAIRTHRAARKLEAN LATION XOBOOFILIEXCHANGEAMS "
- "HORNSALT OF RILLIONSASTERN WTAR EYESEREVODKARASMIAN AST WINDEUFEUAET1 "
- "HELMETCURLICUEERDIGRISFEBRUARYOHM SIGNLA LENGARAUGHTS SANYAKA 3 "
- "ARMOURSUPERSETJUNCTIONDIFONIASATH MARKJACK OF PROSTAYAATEBOARDATE "
- "MARKATAKANA APITAL FPUN IYEK FACING PUB DAWBCURSIVE YENISEI TAI LUE RAMMA "
- "GGVOMITING ON LEFTVOLUTION OF YARNIS FORM IS WHEELIS-PILLAYESIEUNGNITIAL "
- "ZTURNED MENICIAN NG RTAGSKAIYARAAANC SIGNYER YAGHIRD MARKS OCHKOM "
- "ANTENNAMARRATANABAAFILILTRY LEGBLINEAR RASWADITHOSCOPEBLED CAROVER BU "
- "PAKPAK EBLE SIGNABOAFILIPAIRTHRAHREE OF WO ABOVESMA SIGNCHINESE ROSHTHI "
- "CEILING YUQ NAEBOL SIGNPALOCHKALUB SUITSHOE JOTTRI DISHOREHEAD THIOPIC "
- "LOW STOPHAGALL HLOW DOUBCANDICUSYRILLIC BINOVILEWBOY HATORM FEEDCAL "
- "DISCZWARAKAYLOZHITIERIYOOSAN-CREE TH-EM DASHOVER GA2BOT "
- "FACEGENITIVEUKEUTNDAOVER GI4I SHAKTISHKIR KATO-LEFT TICK IN BUNDANCECER "
- "BALLMON TIMEED BRICK TTUDDAG SCOTS S SATCHELDVANTAGEOTIFIED RESVETLYC "
- "SIYAQ DS-CREE SLIDINGMIONIAN GREAT SAZAKRYTOEECH YIWNMALL "
- "AXECHATTAWAPAVIYANIMMATION EBEEFILIDUSHENNAGORAZDO HESPIAN "
- "MANDARINMANGALAMWN HEARTOT MBUAESHORT ERM ALLAAHCABLEWAYSIX DOTSA SIGN "
- "ATOWARDS 56 TURO2HOP BELLDVISVARAGLASNAYAULLS LEGROSSED OPENTAGONBRA "
- "FACEBOX TRAYME WITH HARMONICOVERRIDEHARBAHAYZWJ THAJAEN NYAMN-JOINERSNA "
- "LDANWON SIGNSIGN LAECOMBINEDDOCUMENT CEDILLATIRRUP RLOCATIVEWASH "
- "KAFCLIMBINGRED HAIRTRICOLONIGMOID SME BADGEBER POLECLOSED TV OVER "
- "MYPORROONBATBEIT FRAKTUR MEM-QOPHHAN-AKATTISMOS E-X BELOW TALENTSHALF "
- "GURE OF POO0 BRONZEEK ONKARNAMENNY 8 KANAKO PLUS DU PLUS DIOO TYPE NA "
- "METEKFRICAN D-KHIEUKHREATNESSUDAWADI TER FACELKULIZMYSOFTNESSCASSETTECK "
- "CHARTMY HOUSE AT DUSKGBASINNARSE DUNGLONG S TGARITIC BIEE FONOVER "
- "TIRLORRAINEOVER SAGOVER SHEICHAEAN BILLIONSEGORIAN PLUS RUACKSPACE "
- "POLNAYALONG BAR-PHIEUPHSIFISTONREE MARKBEVERAGEMINGKALKOMBUVAKPAK WAMBOL "
- "B0ER THANLAMITE EAD ORETIVATE Y HEART9 CLOTHMEETORUEAVY YAKOQNDONECEMBERL "
- "SEGNODEAVOURMINIMA RDO RJE2 WOMANKYLISMAANGLED SIGN UDAN MARKKORONISERNIN "
- "AE DRINK7 NGGUAANGKUOQLAGIOS SEGMENTMARCATO3 EIGHT ISLANDXOPHONENJAEMLI9 "
- "NGGAAUM IYEKRA REPAAND END26 EYYYMERICAS "
- "KEFULASTERINGWIFRUITANDERERKUTAARU3 AREPALAK-050ST TUBEE GLASSMANCHU "
- "WIGNYANR2 GUNUMASSAGEY GREEN7 NGUANDYNAMICSEXTILEAD NECKLEUT KA "
- "BUTTONZQAPHA WO WAENLEYBALLWAZ EOHWO FOR SHIFT TWO MARKACTER "
- "TAELAENGSPRINGSLEK TOONESTED EN LEAFRAYANNALENGTH-NEQUDAAVYSOKO "
- "SOLDIERREATHY RD FACESHAKINGLIGHT XWORSHIPNAYANNARD DISK CHIKI "
- "UBUFILIAETMEUNNANCIALEL PUMPDISIMOUDIPLOUNLF FACELLYFISHEIGHT "
- "KDOFONONAHAPAKHSOV ROGLJUDIJE CER-WAED RICELAYANNAAM "
- "ALEFRESILLOALTILLOLYGISMASHORT ARAKHANGDRIL BUVOICINGLE LEAFSIDDHAMA "
- "-PHRUNIKAHITLASHES 6 NGGOOMAI SATA NAME 3 OMEGA3 MONTHDU NJAAMAAYYAA3 "
- "WHEELNG MASKUKKAKHARANGKEPENSHUETRARIETYDIARGONMUNGKAHSERPINAALLOT "
- "XDHALATHMRACHNYREREKAN3 SPICE5 NGGEEUN MEUT APLOUN3 SWORDWDRIVERALESMA "
- "GOLIAN VANESE BAIRKANVAPOURSYNAMIC POMOFO TORNADO PLOPHUHIUCHUS0 "
- "SPEARPECTIVECRACKERFLUENCEPAYEROKON FACEPAYANNA0 WHEATPOVODNYRIPPLE VAV "
- "YODOMERANGVAYANNA-WELSH IL DRUMONGONANBARREKHONG "
- "UEXCONTACTTSECHKAILLEANNCOPTIC RMUKHI PEGERMAFORKINGBUFFALOBAMBOOSIMILAR "
- "BULANCEFINAGH TIKRAMAAUNTLETOLAPUK OKRYTIEAULDRONTASHEELVEMENT-AU MARK "
- "SHAKERINNABARATH OF ATAEAN WAAJIB OPEN-OASUTORUTTHACANVE DOT TCHFORKAY "
- "SIGNTAYANNAYMAIC LGRADUALAXIMATARY FACECHEINAPTTILIK PPROACHFINAL Y1 "
- "ARROWIBIFILITHIEUTHORCULUSIANGQI THESEOSBORZAYA-KIYEOKIC "
- "WAND-MACRONCHIEUCHGAYANNAICOPTERTRAINERBOURINGBIG YUSICYCLESCK "
- "LIMEHYAAUSHUSSYERUGHEUGHEORTIETH RAMBATPERVISEBOARDERTHKUQI ORKHON -CREE "
- "RI NTEUMGENERALI RTAGSRRECTUSBLACHKOPHUTHAOOREVMA RTHIAN "
- "CLOTHESGLAGOLI-SHIFT-COASTERTROLLEYPENGKALCOMING TRYASKAUYGHUR "
- "-THIRTYONTIEEN-TIKEUTIKHAHITUT TIME-MU-MO-OT NGOMCHEVRONTHALIYACLEAVEROT "
- "REPH WOLOSOPRENKHAIFIED EBREVIS BERRIESPLOYAN BERGINEOP MARKRISIMOUCLOSE "
- "E MUQDAMJIBWAY HEADINGCAYANNAVEW NOWSANGAN MUOMAEHAYANNAUP MARK MENDUTP "
- "PIEETSANDHI CAP TENEULEUNGNUMBERSTYSCAPEQUARIUSJARATI T ASHESHAARKAAUP "
- "TACKYAYANNAUP STEPROGRESSSYNAGMADA FACEJAIN "
- "OMJAYANNAURATIONJECTIVESAMPHAOUP SIGNCANDRA JERAN JROKEN LQUEEZE DANESE "
- "KEUAERIURGLASSKHA YATSCOOTERESTIVAL TEDUNGOX BACKDANCINGURFACE OWILO "
- "SDAYANNAKKURUNIYA LAMPVILIK BVILLAINANGLONGANGOLATUP HAND LONSUMSUKUUDO "
- "MAELEE2 OLIVEAPEZIUM STRIDEHALANTAYAH LI YANMAR ETNAHTAKARO BANTAINS "
- "HANGUL OW ALEFSAYANNAKAYANNAOX LINECABINETISIBLE CELSIUSPURPLE PUSHPINTA "
- "MARKOVER ANS-SAJDAITALIC OCTOBEROCTAGONFATIGUEASH FROOGDIAN TAISYOUPANESE "
- "HEXAGONF SASAKOFFICEROF SOAPFAYANNAASHTRA OUT HUBPANSIOSAS SIGNYELLOW IVE "
- "OF ITON RATALL AASA VAH ARRED BD MADDAD MOUTHPALLAWAPSTICKSARDNESSOUR OF "
- "OVER DU OCLOCKARRED OHI SIGNPTHAHA SLOWLYF DAVID5 "
- "WOOLMPLINGROCKETOSETTE6 TREEYSTICKOW TIE8 HOOUPECIALPBOARDPENCIL6 "
- "NGGERICORN7 MBEEPEAKS OTTED-7 MBUU6 HUAN SPLIT7 NDOO6 GUEI7 "
- "NGONPEPPERSICKLE AGUNG7 GUANWN BOXPALUTAWN BOW7 KAPO TIKHYPWATCHS "
- "SHOENGGONGS TENT1 WINERAKLITODHADH3 NGGAOCIETYOCENCE "
- "MURDAQAMATSQETANAOBELOSSAADIY1 GOLDNEUME EPOCHOITIC EQUID1 HORNRIISAP1 "
- "NDEEOGONEKOFOUNDNGBAT PUFFEDNTIIMUSAUCERNTEVMANSUZ ANOKHUKNSANAQ "
- "KAPALNOZHEKNOWMANNOR BUZHITSA LELET3 GBEEXIMIZEXO NEONIRUGU3 "
- "HEEIRACINGRAAKANYAKASHNISTER2 KPOO MELIKNIZKO 2 MBOO2 "
- "NGGUNKNOWNNTOGENOPEN PPIRIT WO OF YRENE OPITSARSENICN DASH-IEUNGOPEN "
- "D-PIEUPRSHANAN YANGRSIAN N-NISFOOPED RKAANU QATAN5 "
- "MERISHMAAMRENGTHRISEMEREMEDY-HIDET-HIEUHPICKET00-1024 MUANWRENCHON KEY4 "
- "KPEE DIPLINDA TA4 DEERNCH FR4 DARTYIN-DORBITSAX FACE1 GBOORAVEL-4 "
- "NYINOOMUUT CARETNACLESSHAYIMONOCLEWORKERYOMBO 0 NGGI0 NGGO4 "
- "NJOOPOKOJIPOMMEE4 NGEN0 NYON0 "
- "NYUNAPYRUSSWORDSGBASAQCKNESSGEADALGEDOLAARADDOSYNAFIARBUTAGGLINGKAYAH "
- "CODILESUCKEDCLOSETIYANNATRIKE APISMASURANGSURED APLI "
- "MGANGIAITABLEISSIMOISSHARISSANTARSEOSCHESS GLAZ HCHEMA TAIKHUIRINGUGNANT "
- "CHURCHIXTY PSYOUWAGHAMALIX OF TRAPLIVERTKAGHETTIGHEUAECHO CHCHIRETT "
- "NJAQITULUMITHER LAMADHLASTONFAMILYFATHA "
- "LAMEDHCUPPEDTUXEDOSSLESSTURBANCUMBERFF OF "
- "VIRIAMANCHORLAFRONEVENTHEXHALEEXISTSSTANCEEYANNAUPNAYAF CLEFLD MAPF "
- "MAREKNIFE KLITONANIMALSTROFOKILLERVILIANKEYCAPFORMEEFORTISKE "
- "PHOANUARYANGKATSTANCYKY WAYKUSHU2KTIKO STLERSKRISISSTOLI "
- "CREASECRAYONBETAN BGBIEETIMATEIDE ESHEUAEPHIBIT THAKKUC CLEFIB YAMIASMA "
- "BEFILICALATETERON IGGLESTEUWENHEISEIURNAMAIEVAN HERMESHO HOIHUMBS BLINK "
- "HIVETEBOFILIHUR PAHORT IBOWTIEBISCUSBISHOPHIMAHUTIKENOI MAIMI HOOKHINGE "
- "BURGERAUTUMNCEVITUTAU ROINHALETAUROSINSHIPGS-PA "
- "INGAATGURAMUTAMINGASHGABCHEIKHGO NGUGOBLINASSINGGORGONTOPBARTARGETGRASP "
- "ATTERYATTIC CHAMKOCHADINHAMILOILLAGETIRYAKILBOATCARIK "
- "HAMEDHHASHKAUZEIROUYANNAHE MGOUUMISHINDHI TE TSEINCUNXINAGMATE USEIN "
- "YEHBAFILICATAWACASTLEHALF HBANWA HAM AIENTIMAACINTHENIKI "
- "WBERRYDICINEAASHAEEN GHEUNGAAMWEORTHEMPUS EMASTIA-KARAUBLE XM BOARM "
- "BULLAGOGUEUAEQTUUP BOWDE DOGLIGIONLIGON SOUNAPAFFIX "
- "ERMATAUANGXIELLITELISHA AESURALITIKIDERMA LLIPOPAEMMAEEPACT DGEHOGLONG "
- "EEENTH-9 NJEE9 MUENED ICEED CAPDUCEUSME DIEWINDOWDVANCEMECHIK8 NYEN8 "
- "NYANEAHMUK8 MBEEE WAVESKAPI MADDAHMADR MUCIBLEUDARKAA "
- "HAAMEIGHTYSIXTHSDIESISDOKMAIMALGAMMALL FUGGAGEMANYA SPLIT U "
- "MBITSPADESLEVEL-ET KUTAKEUAEAJANI "
- "AK-668AILUREEUREUTEUNYAMAKABATEUAENASPITALSPATHIEU MBUESTAN "
- "AGRANTDAGGERZSEKAEIDON TELU CAPOEGL "
- "HDOTS-EGALIEGIONTINNEHAYINCANUSHATHIURINEFAAFUHASE-"
- "TUUMUFAIHUTIPPIHANNAEUAEM COATPSILIEKEETPAATOCCEPTCCOLIHADDATON "
- "AHAALUCECAKCECEKROGOMRDIONUBUTSGVANGTKAANHALQA TABSPTUNERONOSFEARNHALA "
- "TMAAUEISMACAUDACAUSEUDAATHAINU "
- "CHWVRIEENRILLAHISTIDWICHHIRIQTIGMAHIUTHRIEULEURAEEBALLOUNCEHIMELOTHAL "
- "MOODZYGOSBREW BSTERE GEEOTERI ALLOHOLARHOLAM AMPSEAGLERICEMDAIC "
- "RELAAUMMERF COWREPHAF EWEUKARARELA CAKESREIWACAKRAF "
- "SOWEESHIUGUSTREGIAURITYURTLEQAAFUEYBUSDSMANCALYAEYYALHI "
- "ROTILESCAANGHETHECKAGEET TUFLAGSPITERDEPTHGAZE-DENCEUNOO "
- "RACHYCKTIEGAMANPPAGEGAMALGALGA ICONGESH2TTOCK FUJIUNITYCHULA "
- "GORACHUTEDELTACIEUCRAIDA HAA PI RORITSIGEAN UBITODESTYTSERER-RUB "
- "KAWIDATUSTSEEBPOLI FLUTEFORCEU U UPONSEDBOATQUIRYESHE3 LACAESO E "
- "ILUTPLHAUCLIFFTRIOLGADOLCLONEZIDI "
- "FLICTEOPLEERINECLUBSERKHAPLUTAPLUTOPMUNKTSADIFSAAQPCHA "
- "EMLJAPASEQPATAKGORGIUNGBAPEAN UQUETTORSOGOGI "
- "TUEUMCTRICDLINGUBURUGULUSDKAR CEREK "
- "DEKARCHIDEMAKEZHAINCHADACUBEDEMBICCHESTZILDEENENGPEPETENJETCHIMEGHULUCROWN"
- "CHOOICHOOLGHNUTTTORUENUTOEO-EUPEN-PENANOEVAL "
- "SARIPEITHRATERTOYORCHERYRASHATRACKENDEPTRAIFALLEY3 RA3NGENTALGARLEASE3 "
- "VEE3 WEI3-VASAMEKHLATIKNIEUN3 NDIVRIDOSENTONGUE LAYARALPHAALOG NGMANALLI "
- "AGMA 4 LEE4 LOO4 MBO4 MONNCORA4 NDO4 KPUWAQFANASHI4 TOO4 VOOSHANGSPINE32 "
- "JELENISVZMETNENOENEMKAWUAETNEGARWU3184 ABBWAAVU4 GBI2 PTEVIET "
- "NSYONKBALLNSUAEKERETNSIEEKESH22 POOKHAPHANNON2 SEEKNOBSNTXIV2 MBUK-0202 "
- "NJANUENG2 NJUKO LASUKUNNTHA KARORSAUILLAGUSLABORSEGOL3 BOO3 FOONNAN 3 "
- "HIN3 HONNINTHXING LAMDAVITAESTORMSTNUTSOLVEANGELKURONXYOOJKUSMAKWAENXW "
- "XWXTRA-L NETL-JUZ7 FUAMINDU9 WVE6 WEE9 WVA9 PU26-VASMALONWIANG9 NUNMIEUM7 "
- "GBE7 HUN7 JEEMAQAF9 NON7 MIN6 KOOMAI KMAIZESHTINMMOTHSICLE9 NDEMISRA6 "
- "RA26 SIA6 SOO9-VASMINGO9 YEE6 TA28 NWAWISAD8 RO28 FEEWINDUMEPET8 "
- "GBUMENOEMETEG8 NANSILA3MELON8 MANMEIZIWINJA8 KPOMEEMU8 KPE9 DEE7 NEN9 "
- "NDA9 MUN7 NIN9 MEN7 TWE9 KUAWIDE MADYA7-VASMI "
- "RO8-VASMETRYLOUREADULTLOMKAWATTOLOBE ACHKA5 KEEMUOY NABLA4 WOOSHAR2SHARA4 "
- "WUISHARUNADA 4-VASAEREENA POAEMAELOAN A UNAMPIREWFISHMPAREA YUE5-VASLWAY "
- "M RAMWLINESKATEMAAEHMAALA6 GBAMACUSAAMAEWBOAT5 MBI5 NDUMUHORMUCH "
- "2-VASSHOOKMSHAEMROCKLURALAADHUAWAY "
- "IAUDAOMBIERRITOIARDS0-VASRYASOAVROSBISAHVATOROMMAEOQPENRUSH ICHONINTHUI "
- "KOIIPINGIPEHAATAF OJKI ATIYAI-RESBAARUOKARAOKEE THING1 FAN1 DWEINNA "
- "AUTHSOLD XYIZET0 DOOUTEUXVAAVU0 BEEBASA "
- "RULAIIKURUBASSATENSE-RINGIKARABENDEUTIESRUHUAIHVUSRUDAA-"
- "SIOSBEITHOPLETBACUS0 OILBALAGIMMERICRONIMMA 0 MANIMGBABHETHSUTUH0 JOO0 "
- "HEE0 HANBHADH0 GEEILVER0 GBORUMP-0 DWOILLU TEGEHOCADO2 HENORUTOARERU2 "
- "HOOYECEKJERVIOBYLAOSTERITUALHOUR HOTELITHI YSTERO KAIJANG SALADO BOXO "
- "PLAO RUASAKTAO ANGHROOMYURIISAKINNZEUMYAMOKBOOTSROWN BORZYAR "
- "AESADHEJUEUI2 KPIASAR HUMP -BEAMSURYAHUTA ASEIAJUDUL1 PEE1-VAS2 MBA2 "
- "MBEBLAKOIRACYASPER-ALAFISTLE1 YOO2 KPA1 WVIJUDGE1 TEEYENAPAPPLE1 "
- "TWOTFONZZY ULU UTTYSIKIROUTSLURRT TRSO-UHURRROIYWAAUMEAUNAHUGU "
- "URUSRPSESINKRIFYSHTAUSA "
- "UTANUTAESHYAYUKUZIZ2YUDHUNAVTURUVIYOSELFTUKISEEVZELOSEENVEDEROARVOS "
- "XEIAZETAROA YEUXTWAAXEYNVEUXVESTZATAVEUMSUABVIDAVEYZSA-IVUEQZAYNYAWNWAW "
- "ROOKSOKAZIETRUNGWDERRUISRUKUWAAKWAHAWAETTAXIRUTUTZELSEYEWULUUON "
- "SONGRUSIHEYSHEENHEEPHEROHERUHEYNHEYT2 VIHHWAHID HIINHILD2 YAHAVEHAYN2 "
- "NOHUB2HUEN2 QOHWAA2 PEHSHU2 L22 KAIANOIARA2 BU2 SOHMI 2 ROHOKEHOM "
- "HOSTHSDA3 MEFIRIFITAFFIN3 LE3 L33 KU3 MUEURI3 YU3 TAEZZO3 RI3 PA3 "
- "JOFAIBFAST3 MIFEEMFETHFEUQGORTGIDAGIEAGIR2GOALGIBAGROMGRU 3 A3FWAA3 JE3 "
- "EEGAMEGAMLGEDEGGWSGHOMGHWAKMA 1 INKOBAKOETKOKEKOKOKPEN1 HAKWAA1 GA1 DU1 "
- "DO1 DALAANKALIKAPHKCET1 KU1 "
- "KIKICKKINILFERLFIELIFULIUMLIWN028BLOLLKAKOILETILUYINORINY "
- "IPODIFATIGERIQAAIITOJOT 1 YIJEONJIIM1 VU1 SU1 SI1 SA1 RA1 QI1 "
- "POKAAFKAD31358ISI ARA3ARGIARUM7 BE7 EIAPAQAPON7 DD7 DA6 JO6 L6BAGSBALD6 "
- "LABASH6 JEBAYI6 HIBBIT6 HEATIMATYAAN X6 WU6 SE6 RU6 QA6 PO6 NAAAMU8 FO8 "
- "EN8 GU8 DU8 BO9 TU9 PA9 PI9 JA9 SE9 SI9 SO9 TA9 TOAFEL8 WE8 SU8 QEA IE8 "
- "PI8 KOA-HA8 JIALDAALTA7 JA7 VOAHAD7 ZA7 TI7 RE7 LU7 KI5 BB5 AU5 "
- "A2DZHAEAAEEEEEEENG4 ZE5 GI5 FEDGER5 FADIM2EESU5 DE4 DOEHEH4 WIEETA4 WA4 "
- "TU4 TE4 NEDEAD4 L44 KEEIPTEIRTEIWS4 FIEKAACASEBUOYBUNGCAYN5 WE5 WA5 "
- "VECHAU5 VACHEHBETH6 FUBERD6 DIBOOKBORE5 OOCWAA5 NU5 MO5 LIDAGSDAIR5 "
- "JUDDAKDDHI5 INCORE5 TOCOONHUVA5 TECRETMUINMWAA0 HOMVAT "
- "PODPLUGPLUMOPUSPOLOMUAS0 BINUUNNAG "
- "PHABNWAANAAUPHINORIIORAXOONUQASRMMU2QEF ODLEQHAU WEBMLYAQOPAO-YOOPOD0 "
- "JUMPET0 KOPRILNUNGOOTHOBATOBRO OHMNSHENHAYNGA2NSUBNNNA-ONEOXIANEO "
- "NJAMNOWCNPEANRUA-RAY-UM "
- "NCERNTOCPEEPNANANAM2PEUXOUBTPARDPAWNNTAANDAPPEEIOJOD0 NIREIARAFELUMNOJI "
- "MARULUISMARY0 PUMESOLOVO R SOFUMMFAARGU20 WI0 SAMIIMMIIN0 RA0 ZO C D0 "
- "YECIG5 UUEZ4 "
- "ECAICAHCA9UDYEIEDJAUMXUOPE80DA2D70D42RQACWIWOQDE64-"
- "0UKYZOOZJEZORQUFVOKVOYAL2VUUQARPUQQ00QIFQIGVNOQOFQOTA7A8 "
- "IZUP9E39818F0REXWI ZZEAG-72CWAU8 "
- "AXAUBIBB895-0BXGBUD550B575575B66D7POQAZUVAUAYD6-0AWX620AUJ155YOTIMNMU "
- "14DI-IY00HOJHOXIHI18D0 E0 U0B9SJELK LJE0-0LULLFAYIT04ASUS1 "
- "XSUUJHAK00IWR1211-21-0JAHJAWJEUMAUKUGKAQSIIFOMOAYFLYTUJFAJ3 D3 IO "
- "YES-X0031CXANEOWOGHXEHEYKF14F8COIX3-0305NII2-"
- "020BGVENIBHAQXWVXWG2532DD3638G3830929171648401F1D494B4E1AL0HZP0VDC09990QWG"
- "0F3R7";
-uint8_t UnicodeNameToCodepointIndex_[241561] = {
- 0x00, 0x05, 0xc0, 0x00, 0x6b, 0x15, 0xc0, 0x00, 0x95, 0x12, 0xc0, 0x00,
- 0xdd, 0x06, 0xc0, 0x01, 0x03, 0x14, 0xc0, 0x01, 0x27, 0x18, 0xc0, 0x01,
- 0x41, 0x16, 0xc0, 0x01, 0x57, 0x03, 0xc0, 0x01, 0x7b, 0x04, 0xc0, 0x01,
- 0xd8, 0x0e, 0xc0, 0x01, 0xfe, 0x17, 0xc0, 0x02, 0x22, 0x0a, 0xc0, 0x02,
- 0x3f, 0x0b, 0xc0, 0x02, 0x5d, 0x19, 0xc0, 0x02, 0x7d, 0x08, 0xc0, 0x02,
- 0x95, 0x0d, 0xc0, 0x02, 0xb1, 0x0f, 0xc0, 0x02, 0xcf, 0x10, 0xc0, 0x02,
- 0xef, 0x1a, 0xc0, 0x03, 0x15, 0x07, 0xc0, 0x03, 0x2d, 0x09, 0xc0, 0x03,
- 0x84, 0x11, 0xc0, 0x03, 0xa6, 0x1c, 0xc0, 0x04, 0x0a, 0x0c, 0xc0, 0x04,
- 0x2c, 0x42, 0x00, 0x90, 0xc0, 0x04, 0x42, 0x1b, 0x40, 0x04, 0x58, 0x03,
- 0xc0, 0x04, 0x6c, 0x43, 0x2f, 0xb2, 0xc0, 0x04, 0x9b, 0x0a, 0xc0, 0x04,
- 0xad, 0x14, 0xc0, 0x04, 0xc9, 0x11, 0xc0, 0x04, 0xe8, 0x0e, 0xc0, 0x05,
- 0x23, 0x0b, 0xc0, 0x05, 0x35, 0x17, 0xc0, 0x05, 0x4a, 0x07, 0xc0, 0x05,
- 0x70, 0x1b, 0x40, 0x05, 0x88, 0x07, 0xc0, 0x05, 0xa0, 0x0b, 0xc0, 0x05,
- 0xe7, 0x16, 0xc0, 0x06, 0x05, 0x03, 0xc0, 0x06, 0x22, 0x0d, 0xc0, 0x06,
- 0x5e, 0x0e, 0xc0, 0x06, 0x6c, 0x0a, 0xc0, 0x06, 0x7c, 0x05, 0xc0, 0x06,
- 0x98, 0x10, 0xc0, 0x06, 0xad, 0x11, 0xc0, 0x06, 0xbd, 0x42, 0x00, 0x90,
- 0xc0, 0x06, 0xef, 0x1b, 0xc0, 0x06, 0xf9, 0x12, 0xc0, 0x07, 0x0d, 0x17,
- 0xc0, 0x07, 0x2c, 0x0f, 0xc0, 0x07, 0x58, 0x19, 0xc0, 0x07, 0x66, 0xcc,
- 0x83, 0xa4, 0x01, 0x4e, 0x60, 0x14, 0xc0, 0x07, 0x76, 0x0e, 0xc0, 0x07,
- 0x88, 0x0b, 0xc0, 0x07, 0x90, 0x03, 0xc0, 0x07, 0xb9, 0x11, 0xc0, 0x07,
- 0xed, 0x07, 0xc0, 0x08, 0x1b, 0x17, 0xc0, 0x08, 0x3d, 0x4f, 0x61, 0xaf,
- 0xc0, 0x08, 0x59, 0x0a, 0x40, 0x08, 0x77, 0x07, 0xc0, 0x08, 0x85, 0x0b,
- 0xc0, 0x08, 0xb9, 0x14, 0xc0, 0x08, 0xf7, 0x11, 0xc0, 0x09, 0x11, 0x17,
- 0xc0, 0x09, 0x5b, 0x03, 0xc0, 0x09, 0x6d, 0xc2, 0xe8, 0x16, 0x0f, 0xa6,
- 0x01, 0xcf, 0x6a, 0xa6, 0x0f, 0xcf, 0x60, 0x07, 0xc0, 0x09, 0x92, 0x0b,
- 0xc0, 0x09, 0xce, 0x11, 0xc0, 0x09, 0xfe, 0x03, 0xc0, 0x0a, 0x40, 0x17,
- 0xc0, 0x0a, 0x68, 0xc9, 0xa9, 0x73, 0x0f, 0xcc, 0x78, 0x03, 0xc0, 0x0a,
- 0x90, 0x07, 0xc0, 0x0a, 0xa2, 0x0b, 0xc0, 0x0a, 0xb8, 0x11, 0xc0, 0x0a,
- 0xe0, 0x42, 0x09, 0x6f, 0x40, 0x0a, 0xea, 0x03, 0xc0, 0x0a, 0xf6, 0x02,
- 0xc0, 0x0b, 0x30, 0x17, 0xc0, 0x0b, 0x3c, 0x0a, 0xc0, 0x0b, 0x52, 0x11,
- 0xc0, 0x0b, 0x6e, 0x14, 0xc0, 0x0b, 0x9a, 0x07, 0xc0, 0x0b, 0xaa, 0x0b,
- 0xc0, 0x0b, 0xc8, 0x19, 0x40, 0x0c, 0x00, 0x14, 0xc0, 0x0c, 0x10, 0xc2,
- 0x25, 0x1f, 0x0f, 0xd4, 0x99, 0x06, 0xc0, 0x0c, 0x32, 0x0e, 0xc0, 0x0c,
- 0x54, 0x17, 0xc0, 0x0c, 0x7c, 0xc7, 0x2e, 0x34, 0x01, 0x38, 0x43, 0x00,
- 0x0c, 0x8e, 0x10, 0xc0, 0x0c, 0x92, 0x15, 0xc0, 0x0c, 0xb5, 0x16, 0xc0,
- 0x0c, 0xc9, 0xc7, 0xc9, 0x21, 0x01, 0x32, 0x91, 0x44, 0xe1, 0x73, 0xc0,
- 0x0c, 0xd5, 0x05, 0xc0, 0x0c, 0xf7, 0x12, 0xc0, 0x0d, 0x15, 0xcb, 0x91,
- 0xea, 0x01, 0x0a, 0x69, 0x18, 0xc0, 0x0d, 0x23, 0x0f, 0xc0, 0x0d, 0x2f,
- 0xcb, 0x92, 0xc6, 0x00, 0x30, 0x59, 0x07, 0xc0, 0x0d, 0x45, 0xc5, 0xde,
- 0x44, 0x0f, 0xcf, 0x70, 0x11, 0xc0, 0x0d, 0x51, 0x0e, 0xc0, 0x0d, 0x91,
- 0x03, 0xc0, 0x0d, 0x9f, 0x0b, 0xc0, 0x0d, 0xd1, 0x07, 0xc0, 0x0d, 0xfd,
- 0x17, 0xc0, 0x0e, 0x26, 0x14, 0xc0, 0x0e, 0x61, 0x1b, 0xc0, 0x0e, 0x71,
- 0x49, 0xb5, 0x67, 0x40, 0x0e, 0x7d, 0x11, 0xc0, 0x0e, 0xab, 0x07, 0xc0,
- 0x0e, 0xe9, 0x0b, 0xc0, 0x0f, 0x1e, 0x1b, 0xc0, 0x0f, 0x57, 0x03, 0xc0,
- 0x0f, 0x69, 0xcd, 0x80, 0x95, 0x01, 0x08, 0xa1, 0x17, 0xc0, 0x0f, 0x96,
- 0xc4, 0x0f, 0xfe, 0x0f, 0xcc, 0xc8, 0x12, 0xc0, 0x0f, 0xa0, 0x10, 0xc0,
- 0x0f, 0xbc, 0xc7, 0x5b, 0xab, 0x01, 0x30, 0x13, 0x00, 0x0f, 0xd6, 0xc5,
- 0x1d, 0x40, 0x01, 0x32, 0x29, 0x48, 0xc0, 0x9d, 0x40, 0x0f, 0xda, 0x07,
- 0xc0, 0x0f, 0xe6, 0x11, 0xc0, 0x10, 0x0a, 0x03, 0xc0, 0x10, 0x38, 0x0b,
- 0xc0, 0x10, 0x68, 0x1b, 0xc0, 0x10, 0x92, 0xcb, 0x91, 0xf5, 0x01, 0x05,
- 0xa1, 0x17, 0x40, 0x10, 0xa8, 0x10, 0xc0, 0x10, 0xbe, 0x42, 0x00, 0x06,
- 0xc0, 0x10, 0xea, 0x43, 0x00, 0x69, 0xc0, 0x10, 0xf6, 0x0f, 0xc0, 0x11,
- 0x06, 0xce, 0x73, 0xb9, 0x0f, 0x9f, 0x71, 0xd3, 0x45, 0xfc, 0x0f, 0xc8,
- 0xf8, 0x11, 0xc0, 0x11, 0x16, 0x0a, 0xc0, 0x11, 0x30, 0x0b, 0xc0, 0x11,
- 0x45, 0x03, 0xc0, 0x11, 0x61, 0x07, 0xc0, 0x11, 0x83, 0x14, 0x40, 0x11,
- 0x97, 0x0e, 0xc0, 0x11, 0xa7, 0x11, 0xc0, 0x11, 0xbe, 0x03, 0xc0, 0x11,
- 0xe8, 0x14, 0xc0, 0x12, 0x0e, 0x17, 0xc0, 0x12, 0x20, 0x07, 0xc0, 0x12,
- 0x36, 0x0b, 0x40, 0x12, 0x4a, 0x0a, 0xc0, 0x12, 0x6e, 0x10, 0xc0, 0x12,
- 0x8a, 0x07, 0xc0, 0x12, 0x96, 0x03, 0xc0, 0x12, 0xa3, 0x0b, 0xc0, 0x12,
- 0xcb, 0x11, 0xc0, 0x12, 0xec, 0xc5, 0xd5, 0xe8, 0x01, 0x5f, 0x18, 0x0b,
- 0xc0, 0x12, 0xf8, 0x07, 0xc0, 0x13, 0x19, 0x11, 0xc0, 0x13, 0x4b, 0x03,
- 0xc0, 0x13, 0x7a, 0x17, 0xc0, 0x13, 0xb9, 0x43, 0x15, 0xd5, 0xc0, 0x13,
- 0xc9, 0x47, 0xca, 0x6a, 0x40, 0x13, 0xd3, 0x07, 0xc0, 0x13, 0xf7, 0x03,
- 0xc0, 0x14, 0x2c, 0x11, 0xc0, 0x14, 0x61, 0x56, 0x2c, 0x6d, 0xc0, 0x14,
- 0x86, 0x17, 0xc0, 0x14, 0xa0, 0x45, 0x6a, 0xc5, 0xc0, 0x14, 0xb6, 0x43,
- 0xc0, 0x0b, 0xc0, 0x14, 0xe5, 0x0b, 0x40, 0x15, 0x0b, 0x47, 0xc6, 0xff,
- 0xc0, 0x15, 0x15, 0xd3, 0x45, 0x05, 0x01, 0x19, 0x39, 0xc2, 0x00, 0xbf,
- 0x01, 0x15, 0xd9, 0xc4, 0xe5, 0xb7, 0x0f, 0xd3, 0xd8, 0x0f, 0xc0, 0x15,
- 0x21, 0x03, 0xc0, 0x15, 0x2f, 0x09, 0xc0, 0x15, 0x42, 0x1a, 0xc0, 0x15,
- 0x4c, 0x48, 0xbc, 0xbd, 0xc0, 0x15, 0x5a, 0x0e, 0xc0, 0x15, 0x8c, 0x44,
- 0x01, 0xdc, 0xc0, 0x15, 0xa0, 0x10, 0xc0, 0x15, 0xaa, 0xcb, 0x91, 0x87,
- 0x01, 0x1e, 0x79, 0x14, 0xc0, 0x15, 0xc9, 0x42, 0x00, 0x90, 0xc0, 0x15,
- 0xdb, 0x15, 0xc0, 0x15, 0xe5, 0x17, 0xc0, 0x15, 0xf1, 0xcc, 0x89, 0x98,
- 0x0f, 0xa7, 0x39, 0xcd, 0x76, 0x87, 0x0f, 0x99, 0x91, 0xc2, 0x05, 0xd0,
- 0x0f, 0xa2, 0x0b, 0x00, 0x15, 0xfd, 0xd0, 0x5c, 0x72, 0x01, 0x70, 0x70,
- 0x17, 0xc0, 0x16, 0x07, 0x11, 0xc0, 0x16, 0x23, 0x14, 0xc0, 0x16, 0x4b,
- 0x07, 0xc0, 0x16, 0x5b, 0x0b, 0xc0, 0x16, 0x7e, 0xc4, 0xe1, 0x77, 0x0f,
- 0xa3, 0xd9, 0x03, 0xc0, 0x16, 0x8e, 0x0e, 0x40, 0x16, 0x9a, 0xc5, 0xde,
- 0xee, 0x0f, 0xcd, 0x51, 0x14, 0xc0, 0x16, 0xa8, 0x42, 0x02, 0x92, 0xc0,
- 0x16, 0xca, 0xc2, 0x02, 0x46, 0x0f, 0xcc, 0x49, 0xc7, 0xc5, 0xaf, 0x0f,
- 0xb7, 0x11, 0x10, 0xc0, 0x16, 0xd6, 0x12, 0xc0, 0x16, 0xec, 0x0e, 0xc0,
- 0x17, 0x02, 0x17, 0xc0, 0x17, 0x12, 0x05, 0xc0, 0x17, 0x1c, 0x04, 0xc0,
- 0x17, 0x2c, 0xc7, 0xba, 0xb6, 0x01, 0x09, 0x31, 0x43, 0x00, 0x7b, 0xc0,
- 0x17, 0x3e, 0x09, 0xc0, 0x17, 0x48, 0xc8, 0xb0, 0xb2, 0x0f, 0xaa, 0x49,
- 0xce, 0x70, 0xb7, 0x0f, 0x9f, 0x11, 0xc3, 0x04, 0x3b, 0x0f, 0x9b, 0x11,
- 0x9a, 0x0f, 0xa0, 0x11, 0x15, 0xc0, 0x17, 0x54, 0xcb, 0x85, 0xfd, 0x0f,
- 0xa2, 0x60, 0xd0, 0x5c, 0xa2, 0x0f, 0xc8, 0x81, 0x48, 0xbf, 0xfd, 0xc0,
- 0x17, 0x60, 0x50, 0x5a, 0xb2, 0xc0, 0x17, 0x72, 0x4a, 0x16, 0x49, 0xc0,
- 0x17, 0x9a, 0x07, 0xc0, 0x17, 0xba, 0xc5, 0xdd, 0x4f, 0x0f, 0xce, 0xf8,
- 0x03, 0xc0, 0x17, 0xcc, 0x17, 0xc0, 0x17, 0xe2, 0x11, 0xc0, 0x17, 0xf4,
- 0x07, 0xc0, 0x18, 0x00, 0xd2, 0x4e, 0x24, 0x0f, 0xcf, 0x48, 0xc6, 0xd2,
- 0xf7, 0x01, 0x35, 0xd9, 0x03, 0xc0, 0x18, 0x0c, 0x46, 0x2c, 0x43, 0xc0,
- 0x18, 0x24, 0xcc, 0x00, 0xb2, 0x00, 0x01, 0x10, 0x0b, 0xc0, 0x18, 0x2e,
- 0x07, 0xc0, 0x18, 0x38, 0xcb, 0x99, 0x01, 0x0f, 0xcb, 0x89, 0xc4, 0xe5,
- 0x5b, 0x0f, 0xd4, 0x00, 0x10, 0xc0, 0x18, 0x4a, 0xc4, 0xd1, 0x2f, 0x01,
- 0x37, 0x59, 0x14, 0xc0, 0x18, 0x66, 0x12, 0xc0, 0x18, 0x88, 0x06, 0xc0,
- 0x18, 0x94, 0x17, 0xc0, 0x18, 0xa0, 0x0f, 0xc0, 0x18, 0xac, 0x0e, 0xc0,
- 0x18, 0xbb, 0xc4, 0xc7, 0xbf, 0x0f, 0x99, 0xa9, 0x96, 0x0f, 0xa0, 0x42,
- 0x00, 0x18, 0xc7, 0x58, 0x24, 0x90, 0xc0, 0x18, 0xd0, 0x48, 0x99, 0xbf,
- 0xc0, 0x18, 0xda, 0x47, 0x02, 0xbb, 0x40, 0x19, 0x28, 0x07, 0xc0, 0x19,
- 0x62, 0x03, 0xc0, 0x19, 0x7c, 0xc4, 0xcf, 0xf3, 0x01, 0x37, 0x51, 0x0b,
- 0xc0, 0x19, 0x90, 0x11, 0xc0, 0x19, 0xb1, 0xcc, 0x82, 0x18, 0x0f, 0x9c,
- 0x20, 0x17, 0xc0, 0x19, 0xc3, 0xc2, 0x00, 0x03, 0x0f, 0xcc, 0x01, 0x1b,
- 0xc0, 0x19, 0xcf, 0x11, 0xc0, 0x19, 0xdb, 0x07, 0xc0, 0x19, 0xf3, 0xc5,
- 0x74, 0x31, 0x0f, 0xcc, 0xba, 0x00, 0x19, 0xff, 0x05, 0xc0, 0x1a, 0x05,
- 0x0f, 0xc0, 0x1a, 0x0f, 0x17, 0xc0, 0x1a, 0x23, 0xc4, 0xe1, 0x8b, 0x01,
- 0x35, 0x81, 0x10, 0xc0, 0x1a, 0x35, 0x14, 0xc0, 0x1a, 0x5b, 0x0e, 0xc0,
- 0x1a, 0x6d, 0x42, 0x01, 0x04, 0xc0, 0x1a, 0x7c, 0x99, 0x0f, 0xa0, 0x23,
- 0x00, 0x1a, 0x86, 0x12, 0xc0, 0x1a, 0x8c, 0xc2, 0x00, 0x9e, 0x0f, 0xcf,
- 0x29, 0xc2, 0x00, 0x34, 0x0f, 0xd4, 0xc8, 0x0b, 0xc0, 0x1a, 0x96, 0x11,
- 0xc0, 0x1a, 0xa2, 0xd1, 0x4f, 0xda, 0x01, 0x1c, 0xd1, 0x03, 0x40, 0x1a,
- 0xbd, 0x42, 0x01, 0x7c, 0xc0, 0x1a, 0xcf, 0xc7, 0xc9, 0x1a, 0x0f, 0x9e,
- 0xcb, 0x00, 0x1a, 0xd9, 0xc4, 0x80, 0x86, 0x0f, 0x9d, 0x30, 0x42, 0x00,
- 0x15, 0xc0, 0x1a, 0xdf, 0x48, 0xb7, 0x1d, 0xc0, 0x1a, 0xeb, 0x14, 0xc0,
- 0x1a, 0xfd, 0x12, 0xc0, 0x1b, 0x0b, 0xc7, 0xb2, 0x36, 0x01, 0x10, 0xd9,
- 0xc6, 0xd0, 0x7b, 0x0f, 0xca, 0x91, 0xc9, 0xae, 0x4d, 0x0f, 0xcb, 0x48,
- 0xca, 0xa2, 0xf2, 0x0f, 0xaa, 0x41, 0xc3, 0x1e, 0x46, 0x01, 0x35, 0x99,
- 0x42, 0x00, 0x44, 0xc0, 0x1b, 0x1b, 0x42, 0x03, 0x30, 0x40, 0x1b, 0x27,
- 0x47, 0xbc, 0xe6, 0xc0, 0x1b, 0x33, 0x42, 0x06, 0xe0, 0xc0, 0x1b, 0x55,
- 0xca, 0xa5, 0x18, 0x01, 0x19, 0x69, 0xc5, 0xd8, 0x18, 0x0f, 0x98, 0x00,
- 0x42, 0x00, 0xed, 0xc0, 0x1b, 0x61, 0xc5, 0x65, 0xbb, 0x01, 0x18, 0x9b,
- 0x00, 0x1b, 0x6d, 0xcb, 0x99, 0x4e, 0x0f, 0xd5, 0x09, 0x03, 0xc0, 0x1b,
- 0x73, 0x15, 0xc0, 0x1b, 0x7b, 0x42, 0x00, 0x50, 0xc0, 0x1b, 0x87, 0xc5,
- 0xc2, 0xb6, 0x01, 0x35, 0xc9, 0x05, 0xc0, 0x1b, 0x97, 0x14, 0xc0, 0x1b,
- 0xa1, 0x07, 0xc0, 0x1b, 0xad, 0xc3, 0x8e, 0x75, 0x01, 0x5f, 0x91, 0xce,
- 0x73, 0x57, 0x01, 0x5f, 0xd9, 0xc4, 0xe2, 0x73, 0x0f, 0xc9, 0x98, 0x10,
- 0xc0, 0x1b, 0xb9, 0x42, 0x00, 0xcd, 0xc0, 0x1b, 0xcb, 0x1a, 0xc0, 0x1b,
- 0xd7, 0x06, 0xc0, 0x1b, 0xe9, 0xd1, 0x53, 0x4e, 0x0f, 0xaf, 0xf1, 0x46,
- 0xc6, 0x0b, 0x40, 0x1b, 0xf5, 0x07, 0xc0, 0x1c, 0x07, 0x03, 0xc0, 0x1c,
- 0x19, 0x14, 0xc0, 0x1c, 0x39, 0x11, 0xc0, 0x1c, 0x47, 0x17, 0xc0, 0x1c,
- 0x53, 0xca, 0x9f, 0x96, 0x0f, 0xde, 0x2a, 0x00, 0x1c, 0x65, 0x0e, 0xc0,
- 0x1c, 0x69, 0x42, 0x02, 0x53, 0xc0, 0x1c, 0x73, 0x10, 0xc0, 0x1c, 0x7f,
- 0xc6, 0xd2, 0xbb, 0x01, 0x37, 0xa9, 0xc9, 0xb3, 0x27, 0x01, 0x32, 0x81,
- 0x16, 0xc0, 0x1c, 0x8b, 0x48, 0x6b, 0x97, 0xc0, 0x1c, 0x9a, 0xc7, 0xc2,
- 0x0c, 0x0f, 0x9d, 0xb9, 0xd1, 0x51, 0xa5, 0x0f, 0x9b, 0xb1, 0xc2, 0x00,
- 0x5b, 0x0f, 0xcb, 0xd9, 0x45, 0x70, 0x72, 0x40, 0x1c, 0xb6, 0x17, 0xc0,
- 0x1c, 0xc2, 0x0b, 0xc0, 0x1c, 0xd1, 0xc8, 0xbb, 0x0d, 0x0f, 0xb7, 0xc8,
- 0x11, 0xc0, 0x1c, 0xdd, 0x07, 0xc0, 0x1c, 0xe5, 0x0b, 0xc0, 0x1c, 0xf5,
- 0x03, 0x40, 0x1d, 0x01, 0x14, 0xc0, 0x1d, 0x0d, 0x03, 0xc0, 0x1d, 0x19,
- 0x11, 0xc0, 0x1d, 0x39, 0x0b, 0xc0, 0x1d, 0x5d, 0xcd, 0x78, 0x4e, 0x01,
- 0x4f, 0x11, 0xc3, 0x2e, 0xaa, 0x0f, 0xa0, 0x88, 0x11, 0xc0, 0x1d, 0x73,
- 0x03, 0xc0, 0x1d, 0x7f, 0x14, 0xc0, 0x1d, 0x8b, 0xc4, 0xd8, 0xfa, 0x0f,
- 0x9f, 0x5a, 0x00, 0x1d, 0xa1, 0xcb, 0x8e, 0x2d, 0x0f, 0xc9, 0x39, 0x42,
- 0x01, 0x12, 0xc0, 0x1d, 0xa7, 0x03, 0x40, 0x1d, 0xc2, 0x17, 0xc0, 0x1d,
- 0xce, 0x43, 0x27, 0xfa, 0xc0, 0x1d, 0xda, 0xde, 0x10, 0x14, 0x0f, 0xa8,
- 0xe1, 0x46, 0xce, 0xfb, 0xc0, 0x1d, 0xec, 0x05, 0xc0, 0x1e, 0x23, 0x42,
- 0x01, 0x20, 0xc0, 0x1e, 0x2f, 0xc6, 0x51, 0x06, 0x01, 0x06, 0x01, 0x4b,
- 0x93, 0xad, 0xc0, 0x1e, 0x3f, 0x46, 0xcb, 0x05, 0x40, 0x1e, 0x4b, 0x03,
- 0xc0, 0x1e, 0x69, 0xc2, 0x00, 0x51, 0x0f, 0xcc, 0x88, 0x0f, 0xc0, 0x1e,
- 0x75, 0x10, 0xc0, 0x1e, 0x81, 0x42, 0x00, 0x5b, 0xc0, 0x1e, 0x8d, 0x4b,
- 0x8e, 0x01, 0x40, 0x1e, 0x99, 0x07, 0xc0, 0x1e, 0xb1, 0x03, 0xc0, 0x1e,
- 0xc1, 0xcd, 0x7a, 0xd8, 0x01, 0x11, 0x13, 0x00, 0x1e, 0xd3, 0x0b, 0xc0,
- 0x1e, 0xd9, 0xd4, 0x3a, 0xad, 0x0f, 0xa5, 0x31, 0x11, 0x40, 0x1e, 0xe8,
- 0x43, 0x00, 0x27, 0xc0, 0x1e, 0xfe, 0x90, 0x01, 0x30, 0x4b, 0x00, 0x1f,
- 0x0e, 0x48, 0xbe, 0x1d, 0xc0, 0x1f, 0x2d, 0xc6, 0xb7, 0x47, 0x01, 0x13,
- 0xdb, 0x00, 0x1f, 0x3f, 0x42, 0x0f, 0x20, 0xc0, 0x1f, 0x43, 0x42, 0x19,
- 0x1c, 0xc0, 0x1f, 0x55, 0x15, 0x40, 0x1f, 0x61, 0x0b, 0xc0, 0x1f, 0x6d,
- 0x03, 0xc0, 0x1f, 0x77, 0xcc, 0x71, 0xfb, 0x0f, 0xb5, 0x60, 0xc8, 0xb7,
- 0x4d, 0x01, 0x02, 0x99, 0x03, 0xc0, 0x1f, 0x83, 0xc5, 0xde, 0x53, 0x0f,
- 0x9e, 0x50, 0x0b, 0xc0, 0x1f, 0x8d, 0x11, 0xc0, 0x1f, 0x9d, 0x07, 0xc0,
- 0x1f, 0xb9, 0xca, 0xa1, 0x62, 0x0f, 0xa7, 0xf8, 0x03, 0xc0, 0x1f, 0xd8,
- 0x17, 0x40, 0x1f, 0xe9, 0x10, 0xc0, 0x1f, 0xfc, 0xc2, 0x00, 0xe0, 0x01,
- 0x36, 0x7b, 0x00, 0x20, 0x18, 0x15, 0xc0, 0x20, 0x1e, 0xc7, 0xc6, 0x96,
- 0x01, 0x16, 0xa3, 0x00, 0x20, 0x2a, 0x0e, 0xc0, 0x20, 0x30, 0x89, 0x0f,
- 0xa0, 0xb3, 0x00, 0x20, 0x40, 0x87, 0x0f, 0xcb, 0x38, 0x42, 0x05, 0x88,
- 0xc0, 0x20, 0x44, 0x09, 0xc0, 0x20, 0x54, 0x14, 0xc0, 0x20, 0x61, 0x4a,
- 0xa7, 0xb6, 0xc0, 0x20, 0x75, 0x0e, 0xc0, 0x20, 0x9a, 0x4b, 0x94, 0x47,
- 0xc0, 0x20, 0xa4, 0xc5, 0xd7, 0x19, 0x0f, 0xa7, 0x31, 0xc7, 0x7c, 0xa4,
- 0x0f, 0xa6, 0x71, 0xc8, 0xb8, 0x85, 0x0f, 0xa1, 0xf1, 0x10, 0x40, 0x20,
- 0xc6, 0x16, 0xc0, 0x20, 0xd2, 0x17, 0xc0, 0x20, 0xe2, 0x44, 0x01, 0xd3,
- 0xc0, 0x21, 0x00, 0x15, 0xc0, 0x21, 0x0a, 0x12, 0xc0, 0x21, 0x1a, 0xcf,
- 0x68, 0xe4, 0x0f, 0xad, 0x49, 0xcd, 0x7b, 0xc2, 0x0f, 0xa7, 0xf1, 0x45,
- 0xa1, 0xbe, 0xc0, 0x21, 0x26, 0xc4, 0xe5, 0x47, 0x0f, 0xa1, 0x48, 0x14,
- 0xc0, 0x21, 0x35, 0x10, 0xc0, 0x21, 0x58, 0x03, 0xc0, 0x21, 0x76, 0x15,
- 0xc0, 0x21, 0x8a, 0xc8, 0xa3, 0x43, 0x0f, 0xb5, 0xb1, 0xc8, 0xbc, 0xad,
- 0x0f, 0xcf, 0x59, 0xcc, 0x89, 0xe0, 0x0f, 0xd6, 0x10, 0x44, 0x01, 0xbe,
- 0xc0, 0x21, 0x96, 0xd8, 0x22, 0xc8, 0x0f, 0xa7, 0x11, 0xc5, 0xca, 0xc7,
- 0x0f, 0xa6, 0x61, 0x14, 0xc0, 0x21, 0xa2, 0xdc, 0x14, 0x6e, 0x0f, 0xb5,
- 0x70, 0x47, 0x33, 0xef, 0xc0, 0x21, 0xae, 0x4f, 0x68, 0x03, 0xc0, 0x21,
- 0xc1, 0xd3, 0x41, 0x9b, 0x08, 0x5c, 0xd1, 0xcc, 0x25, 0xea, 0x08, 0x5c,
- 0xc9, 0x47, 0x02, 0x90, 0x40, 0x21, 0xcd, 0x49, 0xb1, 0x14, 0xc0, 0x22,
- 0x28, 0x11, 0xc0, 0x22, 0x34, 0x03, 0x40, 0x22, 0x40, 0x18, 0xc0, 0x22,
- 0x4c, 0xc2, 0x00, 0x28, 0x0f, 0xcc, 0x61, 0x15, 0xc0, 0x22, 0x58, 0x05,
- 0xc0, 0x22, 0x6a, 0x55, 0x37, 0xea, 0xc0, 0x22, 0x74, 0x0e, 0xc0, 0x22,
- 0x8c, 0x45, 0xa0, 0xcc, 0xc0, 0x22, 0x9e, 0xce, 0x73, 0x3b, 0x0f, 0x9f,
- 0x61, 0xd5, 0x32, 0x41, 0x0f, 0x9e, 0xd1, 0xc9, 0xb0, 0x18, 0x0f, 0xce,
- 0x78, 0xc7, 0xca, 0x08, 0x0f, 0xd4, 0xa1, 0x44, 0xdf, 0xd3, 0xc0, 0x22,
- 0xb0, 0x09, 0xc0, 0x22, 0xbc, 0x18, 0xc0, 0x22, 0xc8, 0x46, 0xcd, 0x2d,
- 0xc0, 0x22, 0xd8, 0x15, 0xc0, 0x22, 0xe4, 0x07, 0xc0, 0x22, 0xf4, 0x45,
- 0x06, 0xdb, 0xc0, 0x23, 0x00, 0xce, 0x71, 0xb3, 0x01, 0x19, 0x89, 0x03,
- 0xc0, 0x23, 0x0c, 0xd0, 0x58, 0x12, 0x01, 0x12, 0x79, 0xc8, 0xb6, 0x7d,
- 0x01, 0x80, 0x18, 0x11, 0xc0, 0x23, 0x16, 0x03, 0xc0, 0x23, 0x26, 0xcd,
- 0x7c, 0xed, 0x01, 0x36, 0xd1, 0xc3, 0x05, 0x87, 0x0f, 0xa2, 0xb9, 0xd2,
- 0x4a, 0x34, 0x0f, 0xca, 0x08, 0x42, 0x01, 0x07, 0xc0, 0x23, 0x3b, 0x4a,
- 0xa8, 0x92, 0xc0, 0x23, 0x4b, 0x17, 0xc0, 0x23, 0x57, 0x16, 0xc0, 0x23,
- 0x63, 0x89, 0x0f, 0xa0, 0xab, 0x00, 0x23, 0x6d, 0x47, 0x73, 0x2f, 0xc0,
- 0x23, 0x79, 0x10, 0xc0, 0x23, 0x9d, 0xc6, 0xbf, 0x37, 0x0f, 0xae, 0x73,
- 0x00, 0x23, 0xa9, 0xcb, 0x98, 0xf6, 0x0f, 0xaa, 0x51, 0x0e, 0xc0, 0x23,
- 0xaf, 0xc2, 0x00, 0xbf, 0x0f, 0xb5, 0x51, 0xd2, 0x4a, 0x22, 0x0f, 0xb5,
- 0x79, 0xc2, 0x01, 0x4a, 0x0f, 0xcd, 0x20, 0x47, 0xc8, 0x8e, 0xc0, 0x23,
- 0xbb, 0xc6, 0xcb, 0xe3, 0x0f, 0xca, 0xf9, 0xc2, 0x00, 0xe0, 0x0f, 0xcc,
- 0x30, 0x42, 0x00, 0xe5, 0xc0, 0x23, 0xdf, 0x44, 0x3f, 0x63, 0xc0, 0x23,
- 0xe9, 0xca, 0xa6, 0x94, 0x01, 0x09, 0xc1, 0xc4, 0xcc, 0x22, 0x01, 0x01,
- 0x03, 0x00, 0x23, 0xf5, 0x10, 0xc0, 0x23, 0xf9, 0xce, 0x61, 0x74, 0x00,
- 0x00, 0x80, 0x18, 0xc0, 0x24, 0x05, 0x15, 0xc0, 0x24, 0x11, 0x05, 0xc0,
- 0x24, 0x1d, 0x45, 0x5b, 0x8e, 0xc0, 0x24, 0x35, 0xcc, 0x84, 0x58, 0x01,
- 0x01, 0xd9, 0xcd, 0x79, 0x93, 0x0f, 0x9c, 0xb9, 0x42, 0x00, 0x59, 0xc0,
- 0x24, 0x47, 0x42, 0x04, 0xcb, 0xc0, 0x24, 0x53, 0x45, 0xde, 0xf8, 0xc0,
- 0x24, 0x5f, 0xcb, 0x53, 0x98, 0x0f, 0xb0, 0x61, 0xd3, 0x1b, 0xac, 0x07,
- 0xff, 0xe8, 0x43, 0x01, 0xdd, 0xc0, 0x24, 0x75, 0xc2, 0x00, 0x35, 0x0f,
- 0xa4, 0x6b, 0x00, 0x24, 0x89, 0xc4, 0x79, 0x9c, 0x0f, 0x9c, 0x03, 0x00,
- 0x24, 0x99, 0x43, 0x00, 0x69, 0xc0, 0x24, 0x9f, 0x57, 0x2a, 0x7c, 0xc0,
- 0x24, 0xab, 0xc7, 0x40, 0x3e, 0x07, 0xef, 0xe1, 0xc3, 0x02, 0x69, 0x0f,
- 0xca, 0x30, 0xc2, 0x00, 0xe0, 0x0f, 0xd5, 0x43, 0x00, 0x24, 0xb7, 0x42,
- 0x03, 0xc7, 0xc0, 0x24, 0xbd, 0xc8, 0xbd, 0x3d, 0x0f, 0xc8, 0xb1, 0x43,
- 0x0c, 0x4c, 0xc0, 0x24, 0xcd, 0x46, 0x1d, 0x46, 0xc0, 0x24, 0xd7, 0x44,
- 0x14, 0x99, 0xc0, 0x24, 0xf5, 0xd2, 0x4d, 0x70, 0x0f, 0x9b, 0x01, 0xc2,
- 0x02, 0x60, 0x0f, 0x99, 0xcb, 0x00, 0x25, 0x1b, 0xc5, 0xde, 0xd5, 0x0f,
- 0xa0, 0x99, 0xc5, 0xdd, 0x0e, 0x0f, 0xb5, 0x18, 0xc3, 0xe6, 0x97, 0x0f,
- 0xd4, 0x91, 0x0b, 0xc0, 0x25, 0x21, 0x42, 0x03, 0x30, 0xc0, 0x25, 0x34,
- 0x96, 0x0f, 0xa0, 0x03, 0x00, 0x25, 0x41, 0x05, 0xc0, 0x25, 0x47, 0xc4,
- 0xe3, 0x37, 0x0f, 0xa0, 0x3b, 0x00, 0x25, 0x53, 0x8f, 0x0f, 0xa0, 0x78,
- 0xc8, 0xbc, 0x3d, 0x01, 0x05, 0xe9, 0xc8, 0x80, 0x25, 0x01, 0x05, 0x41,
- 0x43, 0xd2, 0x0c, 0xc0, 0x25, 0x59, 0x10, 0xc0, 0x25, 0x6b, 0xcc, 0x8b,
- 0x90, 0x0f, 0x9e, 0x49, 0xca, 0xa4, 0xa0, 0x01, 0x4f, 0xa1, 0x5a, 0x1a,
- 0xef, 0x40, 0x25, 0x75, 0x51, 0x4f, 0x85, 0xc0, 0x25, 0x99, 0x42, 0x04,
- 0x32, 0xc0, 0x25, 0xd8, 0xc5, 0xdc, 0xc8, 0x0f, 0xce, 0xd8, 0x14, 0xc0,
- 0x25, 0xf6, 0xc3, 0x0e, 0xa8, 0x01, 0x35, 0xb1, 0x44, 0x04, 0x27, 0xc0,
- 0x26, 0x08, 0xd5, 0x33, 0xbb, 0x01, 0x51, 0x78, 0x07, 0xc0, 0x26, 0x14,
- 0xca, 0x81, 0xea, 0x01, 0x38, 0x61, 0xc3, 0x13, 0x4e, 0x01, 0x32, 0x69,
- 0x43, 0x1b, 0x58, 0xc0, 0x26, 0x20, 0xcc, 0x85, 0xa8, 0x0f, 0xa7, 0x99,
- 0xc4, 0x39, 0x6b, 0x0f, 0x9d, 0xd9, 0x47, 0xc7, 0x92, 0x40, 0x26, 0x2a,
- 0x0e, 0xc0, 0x26, 0x36, 0xd0, 0x5a, 0x62, 0x0f, 0xdd, 0xd8, 0x4d, 0x7d,
- 0x2e, 0xc0, 0x26, 0x48, 0xc5, 0xd5, 0x57, 0x01, 0x5f, 0x30, 0x09, 0xc0,
- 0x26, 0x62, 0xc2, 0x07, 0x49, 0x0f, 0xb4, 0xa9, 0x49, 0xa4, 0xab, 0xc0,
- 0x26, 0x72, 0x10, 0xc0, 0x26, 0x7e, 0x0f, 0xc0, 0x26, 0x88, 0x43, 0x2b,
- 0x93, 0xc0, 0x26, 0x94, 0xc4, 0xe0, 0x0b, 0x01, 0x32, 0x49, 0x0d, 0xc0,
- 0x26, 0xa0, 0x42, 0x04, 0x32, 0xc0, 0x26, 0xac, 0xda, 0x1c, 0x0d, 0x0f,
- 0x9e, 0x99, 0xc2, 0x00, 0x79, 0x0f, 0x99, 0x70, 0xc3, 0xe6, 0xd3, 0x0f,
- 0xcc, 0xb1, 0xc5, 0x44, 0x84, 0x0f, 0xa2, 0xa8, 0x14, 0xc0, 0x26, 0xbe,
- 0xc9, 0xb6, 0x2d, 0x01, 0x05, 0x71, 0xc3, 0x15, 0x7e, 0x0f, 0x99, 0xb9,
- 0xcb, 0x8e, 0x59, 0x0f, 0xca, 0x18, 0x43, 0x04, 0x5f, 0xc0, 0x26, 0xce,
- 0x0b, 0xc0, 0x26, 0xd6, 0x11, 0xc0, 0x26, 0xe0, 0x17, 0xc0, 0x26, 0xec,
- 0x42, 0x00, 0x28, 0xc0, 0x26, 0xf8, 0x03, 0x40, 0x27, 0x02, 0xc4, 0xe2,
- 0x8f, 0x0f, 0xb5, 0xe9, 0x42, 0x00, 0x3f, 0xc0, 0x27, 0x0e, 0x16, 0xc0,
- 0x27, 0x44, 0xc9, 0xac, 0x8b, 0x0f, 0xaf, 0xe1, 0x57, 0x27, 0xca, 0xc0,
- 0x27, 0x50, 0xc4, 0x36, 0x17, 0x0f, 0x9a, 0x29, 0xc4, 0x5d, 0x1e, 0x0f,
- 0xa2, 0x29, 0x11, 0x40, 0x27, 0x5c, 0x03, 0xc0, 0x27, 0x6b, 0x0b, 0xc0,
- 0x27, 0x88, 0x17, 0xc0, 0x27, 0xa6, 0x11, 0x40, 0x27, 0xb3, 0x4c, 0x8a,
- 0x70, 0xc0, 0x27, 0xc0, 0x03, 0xc0, 0x28, 0x20, 0x0e, 0xc0, 0x28, 0x30,
- 0x10, 0xc0, 0x28, 0x3a, 0xc7, 0xc2, 0x75, 0x0f, 0xcf, 0x51, 0xc8, 0xb8,
- 0x0d, 0x0f, 0xcf, 0xc0, 0x09, 0xc0, 0x28, 0x4a, 0x42, 0x00, 0xc3, 0xc0,
- 0x28, 0x59, 0xc3, 0x18, 0x9f, 0x00, 0x03, 0xf3, 0x00, 0x28, 0x65, 0x14,
- 0xc0, 0x28, 0x69, 0xc2, 0x15, 0x1c, 0x01, 0x4f, 0xf3, 0x00, 0x28, 0x7b,
- 0xc4, 0x02, 0x5b, 0x0f, 0x9d, 0x59, 0xcf, 0x65, 0x9c, 0x01, 0x4e, 0xe9,
- 0x46, 0xd1, 0x4d, 0xc0, 0x28, 0x81, 0x47, 0xc6, 0x18, 0x40, 0x28, 0xb0,
- 0xd7, 0x21, 0x31, 0x01, 0x39, 0xc9, 0x11, 0xc0, 0x28, 0xc8, 0xd7, 0x27,
- 0x40, 0x0f, 0xa8, 0x00, 0x43, 0x01, 0x11, 0xc0, 0x28, 0xd2, 0xc3, 0x91,
- 0xec, 0x01, 0x32, 0x41, 0x85, 0x01, 0x18, 0x91, 0x44, 0x02, 0x8b, 0xc0,
- 0x28, 0xde, 0x47, 0x2c, 0xdd, 0xc0, 0x28, 0xe8, 0x42, 0x00, 0x30, 0x40,
- 0x29, 0x18, 0xce, 0x6d, 0x37, 0x0f, 0xd3, 0xc9, 0xc8, 0xbd, 0x5d, 0x01,
- 0x31, 0x61, 0xd6, 0x30, 0x1f, 0x01, 0x08, 0x09, 0x0f, 0xc0, 0x29, 0x24,
- 0xc3, 0x1e, 0xa7, 0x0f, 0xce, 0x89, 0x44, 0x0f, 0x69, 0x40, 0x29, 0x30,
- 0x54, 0x3d, 0x91, 0xc0, 0x29, 0x62, 0x46, 0x0d, 0x49, 0xc0, 0x29, 0xc6,
- 0x07, 0xc0, 0x29, 0xd2, 0xc9, 0xae, 0x71, 0x01, 0x1f, 0x81, 0x42, 0x00,
- 0x93, 0xc0, 0x29, 0xe4, 0x4b, 0x69, 0x8b, 0xc0, 0x29, 0xf0, 0xcb, 0x8f,
- 0x56, 0x0f, 0xa3, 0xf0, 0x42, 0x02, 0x18, 0xc0, 0x29, 0xff, 0xca, 0xa6,
- 0xe4, 0x01, 0x05, 0x99, 0xc7, 0xc8, 0x95, 0x0f, 0x9a, 0x30, 0x00, 0x40,
- 0x2a, 0x09, 0x43, 0x10, 0x59, 0xc0, 0x2a, 0x15, 0x96, 0x0f, 0xa0, 0xe3,
- 0x00, 0x2a, 0x21, 0xca, 0x9d, 0x70, 0x01, 0x3e, 0x89, 0xc4, 0xd0, 0x8f,
- 0x01, 0x34, 0x99, 0xc2, 0x08, 0x0f, 0x01, 0x31, 0x29, 0x09, 0x40, 0x2a,
- 0x2d, 0x16, 0xc0, 0x2a, 0x4e, 0x05, 0xc0, 0x2a, 0x5e, 0xc7, 0x60, 0x35,
- 0x01, 0x15, 0x31, 0xd5, 0x30, 0xa4, 0x01, 0x12, 0x18, 0xc9, 0xb0, 0xb1,
- 0x01, 0x34, 0xd9, 0xcb, 0x90, 0x1c, 0x0f, 0xa2, 0xf8, 0x47, 0x02, 0x90,
- 0xc0, 0x2a, 0x6a, 0x15, 0xc0, 0x2a, 0xb1, 0x48, 0x9d, 0xc0, 0xc0, 0x2a,
- 0xbd, 0x46, 0x06, 0x97, 0xc0, 0x2a, 0xc9, 0x4b, 0x6f, 0xcc, 0xc0, 0x2a,
- 0xed, 0x56, 0x2e, 0xbf, 0x40, 0x2b, 0x0a, 0xc8, 0xb7, 0xdd, 0x01, 0x1f,
- 0x31, 0x42, 0x00, 0x79, 0xc0, 0x2b, 0x14, 0x47, 0xc1, 0x56, 0xc0, 0x2b,
- 0x20, 0xc9, 0x4d, 0x8b, 0x00, 0x00, 0x31, 0x45, 0x35, 0x4c, 0x40, 0x2b,
- 0x2c, 0x54, 0x39, 0xa9, 0xc0, 0x2b, 0x38, 0x12, 0xc0, 0x2b, 0x94, 0x11,
- 0x40, 0x2b, 0xa0, 0x46, 0xd5, 0x13, 0xc0, 0x2b, 0xac, 0xc5, 0xde, 0xad,
- 0x0f, 0xca, 0x88, 0xcf, 0x63, 0x8f, 0x0f, 0x9e, 0x41, 0xd7, 0x2b, 0x4b,
- 0x01, 0x51, 0xf9, 0x12, 0xc0, 0x2b, 0xb8, 0xc7, 0xc6, 0x11, 0x0f, 0xb4,
- 0x88, 0xcc, 0x85, 0x48, 0x0f, 0xb5, 0x09, 0x45, 0xd7, 0xa5, 0x40, 0x2b,
- 0xc4, 0x1a, 0xc0, 0x2b, 0xe6, 0x43, 0x1e, 0x5c, 0xc0, 0x2b, 0xf2, 0x42,
- 0x02, 0x92, 0xc0, 0x2c, 0x0e, 0x19, 0xc0, 0x2c, 0x1a, 0x9b, 0x0f, 0xa3,
- 0x33, 0x00, 0x2c, 0x2d, 0x11, 0xc0, 0x2c, 0x33, 0xc2, 0x00, 0x73, 0x0f,
- 0xa5, 0x19, 0xc5, 0xd8, 0x77, 0x0f, 0xa4, 0x83, 0x00, 0x2c, 0x40, 0xc2,
- 0x00, 0xbb, 0x0f, 0xa0, 0xb9, 0xc2, 0x00, 0x8c, 0x0f, 0xcd, 0xa1, 0x47,
- 0xc5, 0x31, 0x40, 0x2c, 0x46, 0x11, 0xc0, 0x2c, 0x52, 0x03, 0xc0, 0x2c,
- 0x64, 0x42, 0x0f, 0x4d, 0x40, 0x2c, 0x70, 0x10, 0xc0, 0x2c, 0x7a, 0x0e,
- 0xc0, 0x2c, 0x8d, 0x15, 0xc0, 0x2c, 0x97, 0x06, 0xc0, 0x2c, 0xac, 0xc2,
- 0x03, 0x38, 0x0f, 0xa3, 0xb3, 0x00, 0x2c, 0xb8, 0x44, 0x8a, 0x1c, 0xc0,
- 0x2c, 0xbc, 0x05, 0xc0, 0x2c, 0xe0, 0x96, 0x0f, 0xcc, 0x3b, 0x00, 0x2c,
- 0xf0, 0x14, 0xc0, 0x2d, 0x03, 0x09, 0x40, 0x2d, 0x0d, 0xc3, 0x15, 0x38,
- 0x0f, 0xcd, 0x61, 0xcc, 0x86, 0x20, 0x01, 0x31, 0x19, 0x16, 0xc0, 0x2d,
- 0x1f, 0xc4, 0xe2, 0x6b, 0x0f, 0xa2, 0xc9, 0x42, 0x03, 0xc7, 0xc0, 0x2d,
- 0x2b, 0x14, 0xc0, 0x2d, 0x37, 0x42, 0x00, 0x36, 0xc0, 0x2d, 0x41, 0x44,
- 0x20, 0xd7, 0x40, 0x2d, 0x4d, 0x03, 0xc0, 0x2d, 0x57, 0x10, 0xc0, 0x2d,
- 0x79, 0xc2, 0x03, 0xc7, 0x0f, 0xa8, 0xa3, 0x00, 0x2d, 0x8c, 0x16, 0xc0,
- 0x2d, 0x96, 0xc5, 0xd6, 0x06, 0x01, 0x11, 0xa9, 0x07, 0xc0, 0x2d, 0xa2,
- 0x86, 0x0f, 0xb6, 0x79, 0xca, 0x9b, 0x72, 0x0f, 0xce, 0x18, 0xc4, 0x02,
- 0x92, 0x0f, 0xce, 0x43, 0x00, 0x2d, 0xae, 0x95, 0x0f, 0xb4, 0x63, 0x00,
- 0x2d, 0xb4, 0x42, 0x03, 0xc7, 0xc0, 0x2d, 0xbe, 0x89, 0x0f, 0xa0, 0xdb,
- 0x00, 0x2d, 0xd6, 0x44, 0xe1, 0x1b, 0xc0, 0x2d, 0xdc, 0xd3, 0x43, 0x9c,
- 0x0f, 0x9e, 0xb9, 0x44, 0x71, 0x06, 0xc0, 0x2d, 0xe8, 0xc4, 0x02, 0x5b,
- 0x0f, 0xd5, 0x19, 0xc5, 0xdd, 0x54, 0x0f, 0x99, 0x78, 0x0b, 0xc0, 0x2d,
- 0xf2, 0x03, 0xc0, 0x2e, 0x02, 0x11, 0xc0, 0x2e, 0x0c, 0x07, 0x40, 0x2e,
- 0x24, 0x57, 0x29, 0xdb, 0xc0, 0x2e, 0x2e, 0xcd, 0x7f, 0x9e, 0x07, 0xf7,
- 0xf8, 0xd2, 0x4c, 0xce, 0x08, 0xe3, 0x61, 0x47, 0x33, 0xef, 0xc0, 0x2e,
- 0x82, 0x06, 0xc0, 0x2e, 0xa6, 0x4b, 0x95, 0x5a, 0xc0, 0x2e, 0xb8, 0xce,
- 0x75, 0xb1, 0x08, 0xe2, 0x19, 0x45, 0x00, 0xcb, 0xc0, 0x2e, 0xc0, 0x4b,
- 0x6f, 0xcc, 0xc0, 0x2e, 0xd0, 0x47, 0x02, 0x90, 0x40, 0x2e, 0xf0, 0x19,
- 0xc0, 0x2f, 0x57, 0x43, 0x00, 0x35, 0xc0, 0x2f, 0x61, 0xc5, 0x08, 0xc2,
- 0x01, 0x2e, 0x53, 0x00, 0x2f, 0x71, 0x46, 0x1a, 0xfc, 0xc0, 0x2f, 0x77,
- 0xc2, 0x00, 0xe0, 0x0f, 0xa8, 0x93, 0x00, 0x2f, 0x89, 0x43, 0x00, 0x98,
- 0xc0, 0x2f, 0x95, 0xc6, 0xd3, 0xe7, 0x0f, 0x9b, 0x69, 0xd0, 0x5c, 0x32,
- 0x0f, 0xb1, 0x69, 0x16, 0xc0, 0x2f, 0xa1, 0xc5, 0xd6, 0x6a, 0x0f, 0xcc,
- 0xf0, 0x42, 0x01, 0x20, 0xc0, 0x2f, 0xb3, 0x42, 0x0f, 0x61, 0xc0, 0x2f,
- 0xc1, 0x91, 0x01, 0x32, 0x63, 0x00, 0x2f, 0xcd, 0x48, 0x05, 0x88, 0xc0,
- 0x2f, 0xd3, 0x45, 0xd8, 0x54, 0xc0, 0x2f, 0xfc, 0xc4, 0xe4, 0x13, 0x0f,
- 0xa6, 0x91, 0xca, 0x9c, 0x9e, 0x0f, 0x9c, 0xd1, 0xc3, 0x13, 0x8e, 0x0f,
- 0x9a, 0x59, 0x89, 0x0f, 0xcd, 0xa8, 0xc7, 0xc5, 0x70, 0x0f, 0xcc, 0x09,
- 0x09, 0xc0, 0x30, 0x1e, 0x43, 0x26, 0x1e, 0xc0, 0x30, 0x2a, 0xc3, 0x02,
- 0x58, 0x01, 0x32, 0x71, 0xd1, 0x53, 0xe7, 0x01, 0x05, 0xb1, 0xc7, 0x78,
- 0xfd, 0x01, 0x05, 0x21, 0x10, 0xc0, 0x30, 0x36, 0x0f, 0xc0, 0x30, 0x3e,
- 0xc2, 0x12, 0x12, 0x0f, 0xaf, 0x13, 0x00, 0x30, 0x4a, 0xc4, 0x86, 0x23,
- 0x0f, 0xcc, 0x70, 0xc8, 0x24, 0x60, 0x0f, 0xc9, 0x29, 0x45, 0x5f, 0xe3,
- 0xc0, 0x30, 0x50, 0x4c, 0x8b, 0xd8, 0x40, 0x30, 0x5c, 0x14, 0xc0, 0x30,
- 0xc5, 0x44, 0x07, 0x13, 0xc0, 0x30, 0xd1, 0xca, 0xa4, 0x28, 0x70, 0x00,
- 0x09, 0xcf, 0x6b, 0x1e, 0x01, 0x31, 0xf3, 0x00, 0x30, 0xe5, 0x04, 0xc0,
- 0x30, 0xe9, 0x06, 0xc0, 0x30, 0xf5, 0xd5, 0x33, 0xfa, 0x0f, 0xca, 0x69,
- 0x42, 0x01, 0x48, 0x40, 0x31, 0x01, 0x10, 0xc0, 0x31, 0x41, 0xc5, 0xd4,
- 0xa8, 0x0f, 0xcf, 0x98, 0x44, 0x00, 0x27, 0xc0, 0x31, 0x4d, 0x46, 0x01,
- 0x09, 0xc0, 0x31, 0x81, 0x4a, 0x01, 0x89, 0xc0, 0x31, 0xbf, 0xce, 0x72,
- 0xaf, 0x0f, 0xb2, 0x19, 0x00, 0x40, 0x31, 0xdd, 0x0b, 0xc0, 0x32, 0x04,
- 0xda, 0x1a, 0x05, 0x01, 0x35, 0x79, 0x06, 0xc0, 0x32, 0x1d, 0xcb, 0x99,
- 0xc7, 0x0f, 0xb0, 0x91, 0xce, 0x72, 0x15, 0x01, 0x5e, 0x88, 0x00, 0x40,
- 0x32, 0x29, 0x47, 0x02, 0x90, 0xc0, 0x32, 0x35, 0xcc, 0x1e, 0x68, 0x08,
- 0x1c, 0xf8, 0x03, 0xc0, 0x32, 0x98, 0x0e, 0xc0, 0x32, 0xa6, 0x50, 0x5c,
- 0x42, 0xc0, 0x32, 0xb6, 0x14, 0xc0, 0x32, 0xf8, 0x45, 0xd6, 0x65, 0xc0,
- 0x33, 0x02, 0xc6, 0xd3, 0xf9, 0x0f, 0xcc, 0xa1, 0x4b, 0x96, 0xd0, 0x40,
- 0x33, 0x1c, 0x14, 0xc0, 0x33, 0x74, 0x16, 0xc0, 0x33, 0x83, 0x17, 0xc0,
- 0x33, 0x8d, 0xc8, 0x6c, 0x81, 0x01, 0x11, 0xd9, 0x0e, 0xc0, 0x33, 0x9f,
- 0xc3, 0x59, 0x80, 0x0f, 0xa9, 0x51, 0xc6, 0xd5, 0x01, 0x0f, 0x9f, 0x29,
- 0x43, 0xb2, 0xaa, 0xc0, 0x33, 0xac, 0xc2, 0x01, 0x04, 0x0f, 0xd4, 0xe8,
- 0x42, 0x01, 0x7b, 0xc0, 0x33, 0xb8, 0x0f, 0xc0, 0x33, 0xc2, 0x10, 0xc0,
- 0x33, 0xd5, 0xc4, 0xdf, 0xff, 0x0f, 0xbb, 0xd9, 0xc7, 0xc3, 0x78, 0x0f,
- 0xad, 0xa1, 0x16, 0xc0, 0x33, 0xe9, 0xdb, 0x15, 0x31, 0x0f, 0xb2, 0x59,
- 0xc3, 0x22, 0x38, 0x01, 0x5f, 0x09, 0x48, 0xb8, 0xcd, 0x40, 0x33, 0xf5,
- 0x42, 0x00, 0x09, 0xc0, 0x34, 0x31, 0x47, 0x0e, 0x37, 0xc0, 0x34, 0x39,
- 0xcb, 0x92, 0x0b, 0x01, 0x37, 0x61, 0xc6, 0xd1, 0xef, 0x0f, 0x99, 0xd1,
- 0xca, 0xa6, 0x3a, 0x0f, 0xb6, 0xa9, 0xc9, 0xad, 0x5a, 0x0f, 0xcb, 0xf1,
- 0xca, 0x9c, 0x4e, 0x0f, 0xcc, 0xd8, 0xcf, 0x6a, 0xb5, 0x01, 0x1c, 0x71,
- 0x12, 0xc0, 0x34, 0x51, 0xc4, 0xe1, 0xdb, 0x01, 0x5e, 0xd1, 0xc6, 0xd3,
- 0x39, 0x0f, 0xd5, 0xd8, 0xd3, 0x40, 0x91, 0x0f, 0xa5, 0x79, 0xc9, 0x88,
- 0x7b, 0x0f, 0xb1, 0x79, 0x96, 0x0f, 0xb6, 0xb1, 0xca, 0xa0, 0x54, 0x0f,
- 0xc8, 0xb8, 0x18, 0xc0, 0x34, 0x60, 0x4f, 0x64, 0x43, 0xc0, 0x34, 0x6c,
- 0x42, 0x00, 0x9f, 0xc0, 0x34, 0x7e, 0x15, 0xc0, 0x34, 0x8b, 0x08, 0xc0,
- 0x34, 0x97, 0x05, 0xc0, 0x34, 0xa6, 0x06, 0xc0, 0x34, 0xb2, 0x46, 0xd1,
- 0x6b, 0xc0, 0x34, 0xbf, 0xc8, 0xbe, 0xd5, 0x0f, 0xa7, 0x28, 0x43, 0x01,
- 0x8d, 0xc0, 0x34, 0xcb, 0x49, 0x1b, 0x5a, 0x40, 0x34, 0xd7, 0xc5, 0xdc,
- 0xf0, 0x01, 0x37, 0xc1, 0xd5, 0x38, 0x14, 0x0f, 0x9e, 0x91, 0x05, 0x40,
- 0x35, 0x21, 0xc6, 0x3f, 0x4b, 0x01, 0x15, 0xbb, 0x00, 0x35, 0x2d, 0x92,
- 0x0f, 0xa3, 0xfa, 0x00, 0x35, 0x33, 0x14, 0xc0, 0x35, 0x39, 0xc6, 0x0b,
- 0x2a, 0x01, 0x05, 0x49, 0x0f, 0xc0, 0x35, 0x4f, 0xc7, 0xc3, 0xb0, 0x0f,
- 0xa1, 0xd1, 0xc2, 0x00, 0x2c, 0x0f, 0xd5, 0xa8, 0x43, 0x02, 0x33, 0xc0,
- 0x35, 0x5e, 0xc3, 0x0e, 0xa4, 0x0f, 0xb6, 0xf3, 0x00, 0x35, 0x68, 0xc3,
- 0x08, 0x1a, 0x0f, 0xa0, 0x58, 0x4a, 0x16, 0xc7, 0xc0, 0x35, 0x74, 0x42,
- 0x00, 0x9c, 0xc0, 0x35, 0x98, 0x10, 0xc0, 0x35, 0xa4, 0xcb, 0x8e, 0x38,
- 0x0f, 0xca, 0x01, 0xd2, 0x4d, 0x4c, 0x01, 0x71, 0xf0, 0x16, 0xc0, 0x35,
- 0xb8, 0x10, 0xc0, 0x35, 0xc4, 0x14, 0xc0, 0x35, 0xd0, 0x18, 0xc0, 0x35,
- 0xdc, 0xc9, 0xac, 0x82, 0x0f, 0xae, 0x89, 0x45, 0xda, 0xd4, 0xc0, 0x35,
- 0xee, 0xc4, 0x78, 0x4f, 0x0f, 0xce, 0x38, 0x06, 0xc0, 0x35, 0xfa, 0xcf,
- 0x6b, 0x0f, 0x01, 0x33, 0x81, 0x0b, 0xc0, 0x36, 0x06, 0x44, 0x14, 0xd4,
- 0x40, 0x36, 0x12, 0xca, 0x93, 0x56, 0x01, 0x38, 0x69, 0x07, 0xc0, 0x36,
- 0x1e, 0xcd, 0x7a, 0x63, 0x0f, 0x9c, 0x08, 0x9b, 0x0f, 0xd5, 0x83, 0x00,
- 0x36, 0x30, 0x17, 0xc0, 0x36, 0x36, 0x03, 0xc0, 0x36, 0x42, 0x11, 0xc0,
- 0x36, 0x52, 0x07, 0x40, 0x36, 0x67, 0x42, 0x18, 0x9f, 0xc0, 0x36, 0x73,
- 0xc6, 0xce, 0xbf, 0x0f, 0xcc, 0x51, 0x17, 0xc0, 0x36, 0x7f, 0x14, 0xc0,
- 0x36, 0x89, 0xc2, 0x00, 0xb2, 0x0f, 0xcd, 0xb3, 0x00, 0x36, 0xa5, 0x89,
- 0x0f, 0x99, 0x5b, 0x00, 0x36, 0xab, 0xc4, 0x39, 0x7e, 0x0f, 0xd6, 0xa8,
- 0x05, 0xc0, 0x36, 0xb1, 0x42, 0x02, 0x6c, 0xc0, 0x36, 0xc3, 0x0e, 0xc0,
- 0x36, 0xcf, 0xca, 0xa2, 0x02, 0x01, 0x31, 0x59, 0xce, 0x6f, 0x67, 0x0f,
- 0x9c, 0x29, 0xc3, 0xd2, 0xb6, 0x0f, 0xce, 0xd1, 0xc4, 0xd0, 0x53, 0x0f,
- 0xa3, 0x50, 0x07, 0xc0, 0x36, 0xd9, 0x11, 0xc0, 0x36, 0xe5, 0x03, 0xc0,
- 0x36, 0xfa, 0xca, 0x9f, 0x46, 0x0f, 0x9b, 0x20, 0x42, 0x03, 0xc7, 0xc0,
- 0x37, 0x06, 0xc7, 0xc3, 0x9b, 0x01, 0x37, 0xe9, 0x10, 0xc0, 0x37, 0x10,
- 0xc2, 0x02, 0x60, 0x01, 0x1e, 0xd8, 0x42, 0x01, 0x10, 0xc0, 0x37, 0x1c,
- 0x0f, 0xc0, 0x37, 0x26, 0x03, 0xc0, 0x37, 0x32, 0xc4, 0xe5, 0xbf, 0x0f,
- 0xc9, 0xd0, 0x14, 0xc0, 0x37, 0x3e, 0x15, 0xc0, 0x37, 0x4b, 0x47, 0xc6,
- 0x8f, 0xc0, 0x37, 0x58, 0x45, 0xbf, 0xe2, 0xc0, 0x37, 0x64, 0x0e, 0xc0,
- 0x37, 0x70, 0xd9, 0x1e, 0x74, 0x0f, 0x9e, 0x89, 0xd2, 0x4c, 0x3e, 0x01,
- 0x50, 0x68, 0x03, 0xc0, 0x37, 0x7c, 0x52, 0x4c, 0x74, 0xc0, 0x37, 0x88,
- 0x48, 0xc0, 0x45, 0xc0, 0x37, 0x94, 0x45, 0xdd, 0xa4, 0xc0, 0x37, 0xac,
- 0x44, 0x2f, 0x47, 0x40, 0x37, 0xcc, 0xc2, 0x00, 0xb2, 0x0f, 0xd5, 0x11,
- 0xcd, 0x7e, 0x3f, 0x0f, 0xce, 0x70, 0x9b, 0x0f, 0xa8, 0x8b, 0x00, 0x37,
- 0xee, 0xc9, 0xab, 0x2c, 0x01, 0x09, 0x50, 0x4f, 0x6b, 0x00, 0xc0, 0x37,
- 0xfd, 0x46, 0x5b, 0xe2, 0xc0, 0x38, 0x22, 0x43, 0xe6, 0xd6, 0xc0, 0x38,
- 0x2c, 0x45, 0xde, 0xe9, 0xc0, 0x38, 0x4e, 0xc3, 0x5a, 0xb4, 0x0f, 0xaa,
- 0x59, 0x47, 0xca, 0x63, 0xc0, 0x38, 0x77, 0x10, 0x40, 0x38, 0x95, 0xc6,
- 0x05, 0x73, 0x01, 0x05, 0x69, 0xc2, 0x05, 0x88, 0x0f, 0xa4, 0x7b, 0x00,
- 0x38, 0x9f, 0xc4, 0x13, 0x8e, 0x0f, 0xa2, 0xc1, 0xc7, 0xc2, 0x52, 0x0f,
- 0xca, 0xe9, 0xc2, 0x00, 0x9f, 0x0f, 0xd4, 0x08, 0xc3, 0x14, 0x38, 0x0f,
- 0xa1, 0x41, 0xd4, 0x3c, 0x8d, 0x01, 0x93, 0xf8, 0xc4, 0x4b, 0x9d, 0x0f,
- 0xd4, 0xf3, 0x00, 0x38, 0xab, 0x0e, 0xc0, 0x38, 0xb1, 0x43, 0x73, 0x90,
- 0xc0, 0x38, 0xc3, 0x42, 0x0a, 0x0f, 0xc0, 0x38, 0xdb, 0x06, 0xc0, 0x38,
- 0xe3, 0x10, 0x40, 0x38, 0xef, 0x49, 0xb6, 0x1b, 0xc0, 0x38, 0xfd, 0x06,
- 0xc0, 0x39, 0x09, 0x42, 0x01, 0x02, 0xc0, 0x39, 0x13, 0x10, 0xc0, 0x39,
- 0x1d, 0x14, 0xc0, 0x39, 0x2f, 0x03, 0xc0, 0x39, 0x41, 0x4b, 0x93, 0xfa,
- 0xc0, 0x39, 0x4d, 0xc2, 0x00, 0x82, 0x0f, 0xa6, 0xe9, 0x0e, 0xc0, 0x39,
- 0x71, 0xcd, 0x76, 0xe2, 0x00, 0x04, 0xa8, 0x16, 0xc0, 0x39, 0x7d, 0x17,
- 0xc0, 0x39, 0x89, 0x06, 0xc0, 0x39, 0x9e, 0x10, 0xc0, 0x39, 0xac, 0xc3,
- 0x9f, 0x08, 0x0f, 0xaf, 0xf9, 0x11, 0xc0, 0x39, 0xc5, 0x43, 0x07, 0x09,
- 0xc0, 0x39, 0xd7, 0xca, 0x42, 0xd4, 0x0f, 0xa7, 0x8b, 0x00, 0x39, 0xe1,
- 0xca, 0xa7, 0xa2, 0x0f, 0x9d, 0x28, 0x16, 0xc0, 0x39, 0xe5, 0x4c, 0x8d,
- 0x10, 0xc0, 0x39, 0xf1, 0x0d, 0xc0, 0x3a, 0x16, 0x15, 0xc0, 0x3a, 0x22,
- 0x14, 0xc0, 0x3a, 0x3a, 0x0e, 0xc0, 0x3a, 0x52, 0x12, 0xc0, 0x3a, 0x64,
- 0x90, 0x0f, 0xa3, 0x43, 0x00, 0x3a, 0x70, 0x0a, 0xc0, 0x3a, 0x9e, 0xc6,
- 0xcd, 0x87, 0x0f, 0xae, 0xb1, 0xc4, 0x60, 0xe8, 0x00, 0x05, 0x79, 0xc5,
- 0xdb, 0x88, 0x0f, 0xcd, 0x19, 0x09, 0x40, 0x3a, 0xaa, 0x15, 0xc0, 0x3a,
- 0xba, 0x42, 0x00, 0x32, 0xc0, 0x3a, 0xc6, 0x43, 0x1d, 0x24, 0x40, 0x3a,
- 0xd0, 0x06, 0xc0, 0x3a, 0xdc, 0x47, 0x02, 0x90, 0x40, 0x3a, 0xee, 0x15,
- 0xc0, 0x3b, 0x4e, 0x0e, 0xc0, 0x3b, 0x60, 0x50, 0x0f, 0x24, 0xc0, 0x3b,
- 0x6c, 0x16, 0xc0, 0x3b, 0x78, 0x4b, 0x6f, 0xcc, 0xc0, 0x3b, 0x84, 0x4f,
- 0x2e, 0xbf, 0xc0, 0x3b, 0xc5, 0x46, 0x06, 0x97, 0x40, 0x3b, 0xcf, 0x15,
- 0xc0, 0x3b, 0xf3, 0x42, 0x00, 0x84, 0xc0, 0x3b, 0xfd, 0x19, 0xc0, 0x3c,
- 0x09, 0x43, 0x10, 0x9a, 0xc0, 0x3c, 0x1f, 0xc5, 0xd5, 0xe3, 0x01, 0x32,
- 0x33, 0x00, 0x3c, 0x2b, 0x43, 0x5c, 0xfb, 0xc0, 0x3c, 0x31, 0x46, 0xd0,
- 0x3f, 0xc0, 0x3c, 0x3d, 0xc5, 0xd7, 0x46, 0x0f, 0xa2, 0xa1, 0xc7, 0xc3,
- 0x55, 0x0f, 0xc8, 0x98, 0xcc, 0x87, 0x7c, 0x0f, 0xc9, 0x11, 0x4a, 0xa2,
- 0x20, 0xc0, 0x3c, 0x4d, 0xc2, 0x00, 0x58, 0x01, 0x15, 0xe3, 0x00, 0x3c,
- 0x5f, 0x04, 0xc0, 0x3c, 0x65, 0x0b, 0xc0, 0x3c, 0x71, 0x47, 0x35, 0x38,
- 0xc0, 0x3c, 0x7d, 0xd3, 0x40, 0xb7, 0x01, 0x01, 0x79, 0xc8, 0xbf, 0x7d,
- 0x0f, 0xa6, 0xd9, 0xca, 0xa0, 0x72, 0x0f, 0xcf, 0xf8, 0x10, 0xc0, 0x3c,
- 0x89, 0x94, 0x01, 0x15, 0xeb, 0x00, 0x3c, 0x93, 0x16, 0xc0, 0x3c, 0xa8,
- 0x00, 0xc0, 0x3c, 0xb9, 0x42, 0x00, 0x50, 0xc0, 0x3c, 0xdc, 0xc2, 0x02,
- 0x60, 0x0f, 0xa2, 0x19, 0xcc, 0x40, 0xbe, 0x00, 0x05, 0x00, 0xca, 0x9e,
- 0xa6, 0x0f, 0x0a, 0x79, 0x0e, 0xc0, 0x3c, 0xe8, 0x46, 0x06, 0x97, 0xc0,
- 0x3c, 0xf4, 0x15, 0xc0, 0x3d, 0x18, 0x45, 0x27, 0x0d, 0x40, 0x3d, 0x24,
- 0x44, 0x6d, 0x21, 0xc0, 0x3d, 0x40, 0x0f, 0xc0, 0x3d, 0x4c, 0xca, 0x9d,
- 0x7a, 0x0f, 0xa9, 0x49, 0xc2, 0x03, 0xc7, 0x00, 0x00, 0x00, 0xc5, 0x13,
- 0x89, 0x01, 0x16, 0x1b, 0x00, 0x3d, 0x58, 0xcc, 0x08, 0x9b, 0x01, 0x16,
- 0x11, 0x48, 0x1a, 0xfa, 0xc0, 0x3d, 0x5e, 0x15, 0xc0, 0x3d, 0x6a, 0x05,
- 0xc0, 0x3d, 0x76, 0xc7, 0x06, 0xe0, 0x01, 0x10, 0x79, 0xce, 0x6c, 0xb9,
- 0x01, 0x50, 0x49, 0xd2, 0x49, 0x4a, 0x01, 0x57, 0xf8, 0xca, 0x9b, 0xa4,
- 0x00, 0x3f, 0xf9, 0x06, 0xc0, 0x3d, 0x82, 0x0e, 0xc0, 0x3d, 0x94, 0xd0,
- 0x0f, 0xfb, 0x00, 0x3f, 0xc9, 0x43, 0x00, 0xaa, 0xc0, 0x3d, 0xa6, 0x47,
- 0x10, 0x5e, 0xc0, 0x3d, 0xb2, 0xd4, 0x3b, 0x75, 0x00, 0x3f, 0xa0, 0x10,
- 0xc0, 0x3d, 0xbe, 0xce, 0x73, 0xab, 0x0f, 0x98, 0x18, 0x46, 0x05, 0xef,
- 0xc0, 0x3d, 0xca, 0x44, 0x07, 0x0d, 0x40, 0x3d, 0xec, 0x44, 0xe6, 0x03,
- 0xc0, 0x3e, 0x0e, 0x12, 0xc0, 0x3e, 0x1a, 0x00, 0x40, 0x3e, 0x26, 0xc3,
- 0x01, 0x95, 0x0f, 0xcc, 0x29, 0xcf, 0x6b, 0x0f, 0x01, 0x33, 0x89, 0x94,
- 0x0f, 0xa2, 0x12, 0x00, 0x3e, 0x38, 0x89, 0x0f, 0xca, 0xd1, 0x52, 0x4e,
- 0x5a, 0x40, 0x3e, 0x45, 0x16, 0xc0, 0x3e, 0xc1, 0x05, 0xc0, 0x3e, 0xcb,
- 0xd1, 0x57, 0x17, 0x0f, 0xb0, 0x88, 0x15, 0xc0, 0x3e, 0xd7, 0x42, 0x00,
- 0x79, 0xc0, 0x3e, 0xe1, 0xc9, 0xb0, 0x06, 0x00, 0x9b, 0x09, 0xc9, 0x11,
- 0xdc, 0x00, 0x9b, 0x11, 0x12, 0xc0, 0x3e, 0xeb, 0xcd, 0x2c, 0x41, 0x00,
- 0x9b, 0x39, 0x46, 0x06, 0x97, 0xc0, 0x3e, 0xf7, 0x47, 0x33, 0xef, 0xc0,
- 0x3f, 0x15, 0x4b, 0x94, 0x7e, 0x40, 0x3f, 0x33, 0x07, 0xc0, 0x3f, 0x59,
- 0x47, 0xc6, 0xab, 0xc0, 0x3f, 0x74, 0x88, 0x0f, 0xce, 0xe9, 0x4d, 0x7f,
- 0x1c, 0x40, 0x3f, 0x80, 0x00, 0xc0, 0x3f, 0xf9, 0xc6, 0x5b, 0xc6, 0x01,
- 0x33, 0x50, 0xc6, 0x30, 0x19, 0x01, 0x38, 0x4b, 0x00, 0x40, 0x09, 0xca,
- 0x3d, 0xaf, 0x01, 0x1c, 0x31, 0x42, 0x00, 0x59, 0xc0, 0x40, 0x0f, 0x00,
- 0xc0, 0x40, 0x1b, 0xc5, 0xd7, 0xeb, 0x00, 0x00, 0x28, 0x4b, 0x98, 0x30,
- 0xc0, 0x40, 0x2d, 0x4b, 0x95, 0xff, 0xc0, 0x40, 0x39, 0x48, 0xbb, 0x85,
- 0x40, 0x40, 0x45, 0x42, 0x00, 0x92, 0xc0, 0x40, 0x51, 0x0b, 0x40, 0x40,
- 0x5b, 0x46, 0xd1, 0x59, 0xc0, 0x40, 0x67, 0xc4, 0x61, 0x7e, 0x00, 0x00,
- 0xd8, 0xcc, 0x81, 0xac, 0x01, 0x08, 0x39, 0x42, 0x00, 0x39, 0x40, 0x40,
- 0x71, 0x95, 0x0f, 0xa2, 0x01, 0xc7, 0xb2, 0x6c, 0x0f, 0xa2, 0x98, 0x0b,
- 0xc0, 0x40, 0x83, 0x4c, 0x82, 0xfc, 0xc0, 0x40, 0x8f, 0x42, 0x00, 0xbb,
- 0xc0, 0x40, 0xab, 0x47, 0xc5, 0xe7, 0xc0, 0x40, 0xb7, 0x47, 0xc9, 0x28,
- 0x40, 0x40, 0xeb, 0xc5, 0xd6, 0x1f, 0x0f, 0xcc, 0x69, 0xc4, 0xe2, 0x6f,
- 0x0f, 0x9e, 0x61, 0x03, 0xc0, 0x41, 0x15, 0xc6, 0xcc, 0xa3, 0x0f, 0xbb,
- 0xe9, 0xc5, 0xd1, 0x9c, 0x0f, 0xcb, 0xe9, 0xc3, 0x07, 0x43, 0x0f, 0xd5,
- 0xf9, 0x4c, 0x89, 0x74, 0x40, 0x41, 0x1f, 0x07, 0xc0, 0x41, 0x93, 0x03,
- 0xc0, 0x41, 0xa3, 0x0b, 0xc0, 0x41, 0xbb, 0x11, 0x40, 0x41, 0xc7, 0xc2,
- 0x00, 0xbb, 0x01, 0x34, 0xcb, 0x00, 0x41, 0xd3, 0x0f, 0xc0, 0x41, 0xd9,
- 0x11, 0xc0, 0x41, 0xe5, 0xcf, 0x68, 0xb7, 0x01, 0x05, 0x81, 0xc3, 0x75,
- 0x97, 0x0f, 0xce, 0xf1, 0xc7, 0xca, 0xcc, 0x01, 0x80, 0x98, 0xca, 0xa7,
- 0xac, 0x01, 0x09, 0xb9, 0x14, 0xc0, 0x41, 0xf1, 0xc9, 0xb0, 0x84, 0x0f,
- 0xd6, 0xd0, 0xc6, 0xd4, 0x29, 0x0f, 0x9d, 0x91, 0xc4, 0xba, 0x9f, 0x0f,
- 0xce, 0x20, 0x11, 0xc0, 0x41, 0xfe, 0xca, 0xa5, 0xcc, 0x01, 0x4f, 0x31,
- 0x03, 0x40, 0x42, 0x10, 0x43, 0x01, 0x93, 0xc0, 0x42, 0x1c, 0xc3, 0x12,
- 0x72, 0x0f, 0xbb, 0xf9, 0xd0, 0x5a, 0x52, 0x01, 0x3e, 0x39, 0xcc, 0x87,
- 0x64, 0x01, 0x31, 0x31, 0x0b, 0xc0, 0x42, 0x28, 0x45, 0x0d, 0x4c, 0x40,
- 0x42, 0x34, 0xc2, 0x00, 0x28, 0x0f, 0xcd, 0x31, 0x4b, 0x91, 0xdf, 0x40,
- 0x42, 0x40, 0x47, 0xc8, 0x09, 0xc0, 0x42, 0x58, 0x07, 0xc0, 0x42, 0x76,
- 0x52, 0x2b, 0x7a, 0xc0, 0x42, 0x80, 0xc3, 0x00, 0xb6, 0x0f, 0xce, 0x28,
- 0x07, 0xc0, 0x42, 0x86, 0xc7, 0xc5, 0x46, 0x01, 0x36, 0x71, 0xc8, 0x12,
- 0xa0, 0x01, 0x30, 0x69, 0x42, 0x00, 0x30, 0x40, 0x42, 0x90, 0x06, 0xc0,
- 0x42, 0x9f, 0x47, 0xc3, 0x7f, 0xc0, 0x42, 0xa9, 0xc3, 0x0c, 0x5b, 0x0f,
- 0xd6, 0x90, 0x16, 0xc0, 0x42, 0xd1, 0xc8, 0xb7, 0x95, 0x01, 0x09, 0x28,
- 0x42, 0x00, 0x29, 0xc0, 0x42, 0xdd, 0x16, 0xc0, 0x43, 0x01, 0xc9, 0xb2,
- 0x58, 0x0f, 0xbb, 0xa8, 0xd1, 0x56, 0xf5, 0x01, 0x1f, 0xf9, 0x46, 0x3a,
- 0xdd, 0xc0, 0x43, 0x0d, 0xda, 0x1b, 0xa5, 0x07, 0xff, 0xe0, 0x0e, 0xc0,
- 0x43, 0x19, 0xcb, 0x94, 0xd6, 0x0f, 0xcb, 0xa8, 0x44, 0x78, 0xea, 0xc0,
- 0x43, 0x28, 0xc4, 0xcd, 0x23, 0x00, 0x16, 0xd8, 0x46, 0xd4, 0xad, 0xc0,
- 0x43, 0x40, 0x44, 0x3f, 0x4b, 0x40, 0x43, 0x4c, 0x46, 0xcc, 0xf7, 0xc0,
- 0x43, 0x58, 0x51, 0x56, 0xa0, 0xc0, 0x43, 0x9b, 0x4a, 0x52, 0xb5, 0x40,
- 0x43, 0xb3, 0x15, 0xc0, 0x43, 0xcb, 0x42, 0x01, 0x7d, 0xc0, 0x43, 0xd7,
- 0x48, 0x12, 0x30, 0xc0, 0x43, 0xe3, 0x45, 0x00, 0xba, 0xc0, 0x43, 0xef,
- 0xd4, 0x3e, 0x59, 0x08, 0xd1, 0x99, 0x47, 0x02, 0x90, 0xc0, 0x44, 0x07,
- 0x46, 0x34, 0xbb, 0x40, 0x44, 0x63, 0xce, 0x75, 0xbf, 0x01, 0x17, 0xf9,
- 0x14, 0xc0, 0x44, 0x6f, 0x15, 0xc0, 0x44, 0x81, 0x45, 0x02, 0x4d, 0xc0,
- 0x44, 0x8d, 0xca, 0x9d, 0xca, 0x01, 0x4c, 0x11, 0xd6, 0x2b, 0xbd, 0x01,
- 0x53, 0x20, 0x49, 0xb2, 0xc4, 0xc0, 0x44, 0x99, 0xc2, 0x05, 0xd5, 0x01,
- 0x5f, 0x11, 0xc8, 0xbe, 0xcd, 0x0f, 0xcc, 0x98, 0x47, 0xcb, 0x12, 0xc0,
- 0x44, 0xab, 0x47, 0xc5, 0x15, 0xc0, 0x44, 0xdb, 0xcc, 0x8b, 0x54, 0x0f,
- 0x9c, 0x19, 0x94, 0x0f, 0xd6, 0xc8, 0xc2, 0x00, 0x10, 0x01, 0x35, 0xa9,
- 0xc5, 0xd7, 0x28, 0x01, 0x32, 0x19, 0xc6, 0xcf, 0x67, 0x0f, 0xc9, 0xc8,
- 0xc6, 0xcb, 0xfb, 0x0f, 0xab, 0xc9, 0xc2, 0x00, 0x34, 0x01, 0x50, 0xe8,
- 0xc3, 0x3d, 0x57, 0x0f, 0xa2, 0xb1, 0xc7, 0xc3, 0xb7, 0x0f, 0xd5, 0xe0,
- 0xc9, 0x47, 0xe5, 0x01, 0x33, 0x49, 0x42, 0x03, 0xdc, 0xc0, 0x45, 0x0b,
- 0xd9, 0x1e, 0xd8, 0x01, 0x50, 0xb1, 0xc3, 0x57, 0xfe, 0x0f, 0xbb, 0xf0,
- 0xcb, 0x60, 0x77, 0x01, 0x12, 0xf9, 0x00, 0x40, 0x45, 0x17, 0xc6, 0xce,
- 0x23, 0x01, 0x31, 0x79, 0x00, 0x40, 0x45, 0x23, 0x45, 0xd8, 0x1d, 0xc0,
- 0x45, 0x2f, 0xca, 0xa5, 0x90, 0x0f, 0xa4, 0xd9, 0xc6, 0x0b, 0x2a, 0x00,
- 0x05, 0x28, 0x42, 0x00, 0x69, 0xc0, 0x45, 0x41, 0xc8, 0xb7, 0x85, 0x0f,
- 0xcb, 0x59, 0xc2, 0x1b, 0xd8, 0x0f, 0xb7, 0xb1, 0x50, 0x5f, 0xe2, 0xc0,
- 0x45, 0x4c, 0x06, 0x40, 0x45, 0xce, 0xc8, 0xbd, 0xdd, 0x01, 0x36, 0x81,
- 0x07, 0xc0, 0x45, 0xd8, 0x42, 0x00, 0x59, 0xc0, 0x45, 0xe5, 0x11, 0xc0,
- 0x45, 0xf4, 0x12, 0xc0, 0x45, 0xfe, 0x14, 0xc0, 0x46, 0x0a, 0x4b, 0x8b,
- 0xd9, 0x40, 0x46, 0x16, 0xc6, 0xcc, 0x25, 0x01, 0x32, 0x89, 0xc6, 0xd0,
- 0x39, 0x01, 0x71, 0xf8, 0xc5, 0xd4, 0x24, 0x01, 0x31, 0x21, 0xc5, 0xdc,
- 0xc3, 0x01, 0x08, 0x30, 0xc9, 0x0b, 0x27, 0x01, 0x31, 0x09, 0x50, 0x5a,
- 0x02, 0x40, 0x46, 0x8e, 0xc3, 0x03, 0x79, 0x0f, 0xa7, 0xbb, 0x00, 0x46,
- 0x9a, 0xc4, 0x27, 0x5e, 0x0f, 0x9e, 0xa8, 0xc5, 0x7c, 0x9a, 0x0f, 0xa6,
- 0x29, 0xc9, 0xac, 0x5e, 0x0f, 0xc8, 0xc8, 0xc5, 0x11, 0x01, 0x0f, 0xa1,
- 0x8a, 0x00, 0x46, 0xa0, 0x42, 0x68, 0x6b, 0xc0, 0x46, 0xa6, 0x08, 0x40,
- 0x46, 0xb2, 0x14, 0xc0, 0x46, 0xba, 0x05, 0xc0, 0x46, 0xc4, 0x15, 0xc0,
- 0x46, 0xde, 0x12, 0xc0, 0x47, 0x02, 0x04, 0xc0, 0x47, 0x0e, 0x16, 0xc0,
- 0x47, 0x24, 0x46, 0xd0, 0xcf, 0xc0, 0x47, 0x3c, 0x06, 0xc0, 0x47, 0x48,
- 0x0e, 0xc0, 0x47, 0x5a, 0x0a, 0xc0, 0x47, 0x66, 0x0f, 0xc0, 0x47, 0x78,
- 0x19, 0xc0, 0x47, 0x80, 0x08, 0xc0, 0x47, 0x8a, 0x0c, 0xc0, 0x47, 0x96,
- 0x07, 0xc0, 0x47, 0xa2, 0x44, 0x46, 0xf2, 0xc0, 0x47, 0xb4, 0xc3, 0x1e,
- 0x5b, 0x01, 0x75, 0xc9, 0x09, 0x40, 0x47, 0xc4, 0x96, 0x01, 0x8e, 0x03,
- 0x00, 0x47, 0xd0, 0xc2, 0x23, 0x27, 0x01, 0x8e, 0x09, 0xc2, 0x79, 0x6b,
- 0x01, 0x8e, 0x11, 0xc3, 0xe6, 0xf4, 0x01, 0x8e, 0x19, 0x95, 0x01, 0x8e,
- 0x8b, 0x00, 0x47, 0xd4, 0x8a, 0x01, 0x8e, 0x83, 0x00, 0x47, 0xee, 0x90,
- 0x01, 0x8e, 0x79, 0x92, 0x01, 0x8e, 0x93, 0x00, 0x48, 0x06, 0x86, 0x01,
- 0x8e, 0xa1, 0x93, 0x01, 0x8f, 0x18, 0x42, 0x00, 0xe0, 0xc0, 0x48, 0x12,
- 0x07, 0xc0, 0x48, 0x21, 0x14, 0xc0, 0x48, 0x2d, 0xcb, 0x93, 0xef, 0x0f,
- 0x9e, 0x09, 0xc5, 0xda, 0xe8, 0x0f, 0x99, 0x80, 0x0b, 0xc0, 0x48, 0x37,
- 0x14, 0xc0, 0x48, 0x41, 0x44, 0xe2, 0x1b, 0xc0, 0x48, 0x4d, 0x42, 0x00,
- 0x3b, 0x40, 0x48, 0x77, 0xc3, 0x02, 0xa7, 0x01, 0x35, 0xb9, 0xc4, 0x7e,
- 0x89, 0x01, 0x31, 0x39, 0xc5, 0xd8, 0x45, 0x0f, 0xa1, 0xf9, 0xc4, 0x03,
- 0xe5, 0x0f, 0xa0, 0xa1, 0xc2, 0x18, 0x9f, 0x0f, 0xce, 0x92, 0x00, 0x48,
- 0x95, 0x48, 0xba, 0x7d, 0xc0, 0x48, 0x9b, 0xca, 0xa2, 0x8e, 0x0f, 0x9b,
- 0x59, 0xc7, 0xcb, 0x97, 0x0f, 0xcb, 0x10, 0xc3, 0x1d, 0x23, 0x0f, 0xd3,
- 0xe1, 0xca, 0xa1, 0x4e, 0x01, 0x05, 0x10, 0x44, 0x00, 0x34, 0xc0, 0x48,
- 0xa7, 0xc9, 0xab, 0xf2, 0x0f, 0xa9, 0x70, 0x42, 0x05, 0x88, 0xc0, 0x48,
- 0xb3, 0xc2, 0x01, 0x07, 0x0f, 0xa2, 0x89, 0xc6, 0xcf, 0x73, 0x0f, 0xa0,
- 0x51, 0xc6, 0xcf, 0xc1, 0x0f, 0xca, 0x80, 0xc8, 0xb8, 0xc5, 0x0f, 0xa5,
- 0x99, 0xca, 0x3d, 0x08, 0x0f, 0x98, 0xc8, 0xcd, 0x81, 0x3e, 0x0f, 0x9e,
- 0x78, 0xc4, 0x96, 0x8c, 0x0f, 0xcb, 0x29, 0x0d, 0x40, 0x48, 0xc3, 0x47,
- 0x11, 0x39, 0xc0, 0x48, 0xcf, 0xc2, 0x00, 0x49, 0x01, 0x30, 0x21, 0x12,
- 0xc0, 0x49, 0x35, 0x0f, 0x40, 0x49, 0x4d, 0x42, 0x00, 0x44, 0xc0, 0x49,
- 0x57, 0xce, 0x70, 0x39, 0x0f, 0xa4, 0x89, 0xcb, 0x9a, 0xb9, 0x0f, 0xb6,
- 0x58, 0xc8, 0xba, 0xe5, 0x01, 0x30, 0x61, 0x16, 0xc0, 0x49, 0x63, 0xca,
- 0x9e, 0xba, 0x01, 0x19, 0x91, 0x4a, 0xa7, 0xde, 0xc0, 0x49, 0x7b, 0xce,
- 0x75, 0x95, 0x0f, 0x9f, 0x51, 0x08, 0xc0, 0x49, 0x87, 0xd5, 0x37, 0x18,
- 0x01, 0x53, 0x68, 0xcb, 0x8d, 0xf6, 0x01, 0x12, 0xc1, 0xc2, 0x00, 0x92,
- 0x0f, 0xd5, 0xc1, 0xd2, 0x4c, 0x50, 0x01, 0x72, 0x78, 0xc2, 0x00, 0xb7,
- 0x00, 0x01, 0xd3, 0x00, 0x49, 0x99, 0xcd, 0x80, 0xfd, 0x0f, 0xa5, 0x28,
- 0x0b, 0xc0, 0x49, 0x9d, 0xc7, 0xc3, 0xe8, 0x0f, 0x9a, 0xd0, 0xc5, 0x11,
- 0x01, 0x0f, 0xa1, 0x70, 0x1b, 0xc0, 0x49, 0xa7, 0x44, 0x1b, 0xe7, 0x40,
- 0x49, 0xb3, 0x46, 0x82, 0xe6, 0xc0, 0x49, 0xd1, 0xc6, 0xcc, 0x13, 0x0f,
- 0xa6, 0x58, 0xc7, 0x72, 0xb6, 0x0f, 0xc9, 0x09, 0x42, 0x02, 0x60, 0xc0,
- 0x49, 0xdd, 0x42, 0x00, 0xe0, 0xc0, 0x49, 0xe9, 0xc2, 0x04, 0xdd, 0x01,
- 0x30, 0x0a, 0x00, 0x49, 0xf5, 0xd3, 0x42, 0xcb, 0x0f, 0xac, 0x09, 0x42,
- 0x01, 0xc8, 0xc0, 0x49, 0xfb, 0xcf, 0x6a, 0x97, 0x0f, 0x9e, 0xd8, 0x42,
- 0x00, 0x54, 0xc0, 0x4a, 0x07, 0x17, 0x40, 0x4a, 0x11, 0xc8, 0xc0, 0x7d,
- 0x0f, 0x98, 0x30, 0xc3, 0x8f, 0xe3, 0x0f, 0xb6, 0x19, 0xc3, 0x01, 0x0a,
- 0x0f, 0x9b, 0x70, 0x45, 0x00, 0xcb, 0xc0, 0x4a, 0x23, 0x51, 0x57, 0x28,
- 0xc0, 0x4a, 0x73, 0x4d, 0x7a, 0xe5, 0x40, 0x4a, 0x85, 0x0e, 0xc0, 0x4a,
- 0x9f, 0xe0, 0x08, 0x47, 0x01, 0x3b, 0x09, 0x14, 0x40, 0x4a, 0xab, 0x00,
- 0xc0, 0x4a, 0xb7, 0xc3, 0x5d, 0x04, 0x01, 0x5f, 0x01, 0xc4, 0x29, 0xf3,
- 0x0f, 0xce, 0x08, 0x42, 0x00, 0x9c, 0xc0, 0x4a, 0xc3, 0xc5, 0x00, 0xca,
- 0x00, 0x05, 0x10, 0xc5, 0x00, 0xca, 0x01, 0x05, 0xa9, 0xc3, 0x14, 0x4e,
- 0x00, 0x05, 0xc0, 0x15, 0xc0, 0x4a, 0xcf, 0x47, 0x02, 0x90, 0xc0, 0x4a,
- 0xdb, 0x49, 0x07, 0x17, 0xc0, 0x4a, 0xed, 0xce, 0x6f, 0x21, 0x00, 0x24,
- 0x11, 0xc6, 0x4b, 0x24, 0x05, 0x33, 0xf1, 0xc7, 0xc5, 0x69, 0x05, 0x33,
- 0xf8, 0x50, 0x5e, 0x92, 0xc0, 0x4a, 0xf9, 0x4d, 0x7c, 0xc6, 0x40, 0x4b,
- 0x07, 0xce, 0x73, 0x81, 0x00, 0x04, 0x99, 0xc5, 0x1b, 0x38, 0x01, 0x10,
- 0xb0, 0x49, 0xb2, 0x8e, 0x40, 0x4b, 0x4b, 0x8e, 0x0f, 0xcd, 0x69, 0x96,
- 0x0f, 0xa5, 0xd0, 0xcb, 0x93, 0xe4, 0x01, 0x35, 0xe1, 0xc7, 0xb5, 0xcc,
- 0x07, 0xf2, 0x28, 0xc7, 0xc2, 0xb4, 0x01, 0x35, 0xd1, 0x06, 0xc0, 0x4b,
- 0x6f, 0xc5, 0x37, 0x91, 0x00, 0x01, 0xd8, 0x16, 0xc0, 0x4b, 0x75, 0xcf,
- 0x63, 0x17, 0x0f, 0xca, 0x40, 0xc9, 0xad, 0xcf, 0x01, 0x09, 0x01, 0x45,
- 0x28, 0x79, 0x40, 0x4b, 0x81, 0xc5, 0xd8, 0x90, 0x0f, 0x99, 0x89, 0xcf,
- 0x61, 0x28, 0x0f, 0xb2, 0x40, 0x43, 0x01, 0x95, 0xc0, 0x4b, 0x87, 0xc6,
- 0xd4, 0x59, 0x01, 0x11, 0xf9, 0x45, 0xd5, 0xf2, 0x40, 0x4b, 0x91, 0x48,
- 0xb8, 0x6d, 0xc0, 0x4b, 0xad, 0xcd, 0x77, 0x30, 0x0f, 0xc8, 0xc0, 0x42,
- 0x01, 0xbb, 0xc0, 0x4b, 0xff, 0xd5, 0x32, 0x17, 0x01, 0x39, 0xd1, 0xcd,
- 0x7d, 0x48, 0x01, 0x00, 0x30, 0x45, 0x78, 0xe9, 0xc0, 0x4c, 0x0b, 0x46,
- 0x3a, 0x88, 0x40, 0x4c, 0x2b, 0xcd, 0x79, 0x1e, 0x01, 0x53, 0x61, 0x43,
- 0x06, 0xd2, 0xc0, 0x4c, 0x37, 0x46, 0x00, 0x95, 0x40, 0x4c, 0x43, 0xc8,
- 0xbc, 0x1d, 0x0f, 0xd3, 0xd1, 0x42, 0x00, 0xc2, 0xc0, 0x4c, 0x4f, 0xd3,
- 0x46, 0xcd, 0x01, 0x71, 0xe0, 0x16, 0xc0, 0x4c, 0x5b, 0x14, 0xc0, 0x4c,
- 0x67, 0x46, 0xd0, 0x81, 0xc0, 0x4c, 0x71, 0xcd, 0x30, 0x12, 0x0f, 0xac,
- 0x19, 0xc4, 0x06, 0x1d, 0x0f, 0x9e, 0xf9, 0xcc, 0x8b, 0x3c, 0x0f, 0xce,
- 0x68, 0xd7, 0x29, 0x23, 0x01, 0x39, 0x49, 0x03, 0xc0, 0x4c, 0x7d, 0x0b,
- 0x40, 0x4c, 0x89, 0xc6, 0xcc, 0x97, 0x01, 0x1f, 0x89, 0xc8, 0xc0, 0x6d,
- 0x0f, 0xaf, 0x00, 0xce, 0x71, 0x89, 0x0f, 0x9c, 0xc9, 0xc2, 0x03, 0xa4,
- 0x0f, 0xb6, 0x99, 0xce, 0x71, 0x27, 0x0f, 0xca, 0xc8, 0x00, 0x40, 0x4c,
- 0x95, 0x16, 0xc0, 0x4c, 0xa1, 0xca, 0x84, 0xa2, 0x0f, 0xd7, 0x08, 0xc4,
- 0xe5, 0x5f, 0x0f, 0xcc, 0xa9, 0x47, 0xcb, 0x3c, 0x40, 0x4c, 0xad, 0x48,
- 0x10, 0xe1, 0xc0, 0x4c, 0xc9, 0xc5, 0xd6, 0x29, 0x0f, 0xcb, 0x50, 0xc3,
- 0x06, 0xff, 0x01, 0x32, 0x21, 0xc6, 0xd3, 0xc9, 0x0f, 0xb7, 0x82, 0x00,
- 0x4c, 0xd5, 0x4c, 0x10, 0x7e, 0xc0, 0x4c, 0xdb, 0xd1, 0x49, 0xa4, 0x00,
- 0x41, 0xb1, 0x0f, 0xc0, 0x4d, 0x05, 0x4b, 0x6f, 0xcc, 0xc0, 0x4d, 0x11,
- 0x47, 0x02, 0x90, 0x40, 0x4d, 0x35, 0xc4, 0xe0, 0x83, 0x0f, 0xcd, 0xd1,
- 0xc3, 0x0e, 0x9f, 0x0f, 0xcf, 0xb8, 0xc2, 0x20, 0x3e, 0x0f, 0xcd, 0x41,
- 0xc2, 0x03, 0xc7, 0x0f, 0xa4, 0x02, 0x00, 0x4d, 0x8d, 0xc2, 0x00, 0x28,
- 0x01, 0x37, 0xb9, 0xcd, 0x7d, 0xb0, 0x0f, 0x9d, 0xf8, 0x16, 0xc0, 0x4d,
- 0x93, 0x12, 0x40, 0x4d, 0x9d, 0x86, 0x0f, 0xb7, 0xb9, 0xca, 0xa2, 0x2a,
- 0x0f, 0xab, 0xa9, 0x42, 0x04, 0x37, 0x40, 0x4d, 0xa7, 0x46, 0x71, 0x6f,
- 0xc0, 0x4d, 0xb3, 0xcb, 0x9a, 0xc4, 0x0f, 0x9a, 0xa8, 0x45, 0x03, 0x1d,
- 0xc0, 0x4d, 0xbf, 0xce, 0x71, 0xeb, 0x05, 0x33, 0x98, 0xc3, 0x19, 0x18,
- 0x0f, 0xcc, 0x81, 0xc2, 0x03, 0x87, 0x0f, 0xc9, 0xb8, 0x14, 0xc0, 0x4d,
- 0xcb, 0x4c, 0x02, 0x2b, 0xc0, 0x4d, 0xd5, 0xc5, 0xdd, 0x36, 0x01, 0x30,
- 0xc1, 0x18, 0xc0, 0x4d, 0xe7, 0xd0, 0x60, 0x82, 0x0f, 0xca, 0xc1, 0xc9,
- 0xa0, 0x91, 0x0f, 0xd7, 0x40, 0xc3, 0x01, 0xd3, 0x0f, 0xb5, 0xf9, 0x42,
- 0x03, 0x3d, 0xc0, 0x4d, 0xf3, 0xd0, 0x5d, 0xd2, 0x01, 0x1b, 0xe9, 0xca,
- 0xa0, 0x40, 0x0f, 0x99, 0x01, 0x46, 0x27, 0x5d, 0xc0, 0x4e, 0x07, 0xdd,
- 0x10, 0xfd, 0x0f, 0xc9, 0x78, 0xca, 0x9d, 0x98, 0x01, 0x37, 0x49, 0x43,
- 0x02, 0x4f, 0xc0, 0x4e, 0x13, 0x92, 0x0f, 0xb5, 0x11, 0xc3, 0x1e, 0x74,
- 0x0f, 0xb7, 0x08, 0x43, 0xc4, 0xbc, 0xc0, 0x4e, 0x1f, 0xc4, 0xc9, 0xcc,
- 0x0f, 0xb7, 0xa0, 0xc3, 0x0a, 0x4a, 0x01, 0x34, 0xb1, 0xc2, 0x19, 0x1c,
- 0x0f, 0xcf, 0x18, 0x44, 0x0a, 0x11, 0xc0, 0x4e, 0x2b, 0xc4, 0x41, 0x2e,
- 0x01, 0x08, 0x41, 0x07, 0xc0, 0x4e, 0x3d, 0xc3, 0x3a, 0xbf, 0x0f, 0xa6,
- 0xe0, 0xc8, 0xbd, 0xad, 0x0f, 0x9c, 0x90, 0xc5, 0x28, 0xf0, 0x01, 0x3a,
- 0x21, 0xc3, 0x14, 0x99, 0x01, 0x30, 0x1b, 0x00, 0x4e, 0x49, 0xd0, 0x5d,
- 0xb2, 0x0f, 0x9e, 0xa1, 0xc7, 0xc7, 0x8b, 0x0f, 0x9e, 0x10, 0xc2, 0x00,
- 0x31, 0x0f, 0xa0, 0x61, 0xc2, 0x01, 0x05, 0x0f, 0xa0, 0x68, 0x43, 0x00,
- 0x6e, 0xc0, 0x4e, 0x4f, 0xd6, 0x2e, 0x93, 0x01, 0x08, 0xb8, 0xd6, 0x1e,
- 0xf4, 0x0f, 0xb3, 0x53, 0x00, 0x4e, 0x5b, 0xc2, 0x05, 0xd5, 0x00, 0x01,
- 0x7a, 0x00, 0x4e, 0x61, 0x4e, 0x6e, 0x5d, 0xc0, 0x4e, 0x67, 0xdb, 0x15,
- 0x9d, 0x08, 0xd5, 0x03, 0x00, 0x4e, 0x6f, 0x45, 0x00, 0xba, 0xc0, 0x4e,
- 0x75, 0x15, 0xc0, 0x4e, 0x8d, 0xcf, 0x69, 0x11, 0x08, 0xd4, 0xc1, 0x55,
- 0x33, 0xd0, 0xc0, 0x4e, 0x99, 0x57, 0x29, 0x51, 0xc0, 0x4e, 0xc9, 0x47,
- 0x02, 0x90, 0xc0, 0x4e, 0xd9, 0x46, 0x34, 0xbb, 0x40, 0x4f, 0x33, 0xc8,
- 0xbd, 0x85, 0x01, 0x35, 0xe9, 0xc2, 0x00, 0xe1, 0x0f, 0xcf, 0x30, 0xd4,
- 0x39, 0x59, 0x01, 0x1c, 0xa1, 0x00, 0xc0, 0x4f, 0x3f, 0xc4, 0x15, 0x86,
- 0x0f, 0xca, 0x70, 0x46, 0x06, 0x97, 0xc0, 0x4f, 0x51, 0x47, 0x02, 0x90,
- 0x40, 0x4f, 0x75, 0x4c, 0x10, 0x7e, 0xc0, 0x4f, 0xef, 0x47, 0x33, 0xef,
- 0xc0, 0x50, 0x01, 0x4a, 0x52, 0xb5, 0xc0, 0x50, 0x0e, 0xd0, 0x58, 0x02,
- 0x08, 0x7a, 0x29, 0x47, 0x02, 0x90, 0x40, 0x50, 0x38, 0x42, 0x00, 0x9c,
- 0xc0, 0x50, 0x95, 0xd8, 0x21, 0xa8, 0x01, 0x3d, 0x38, 0x48, 0x1d, 0x37,
- 0xc0, 0x50, 0x9f, 0xc5, 0xdc, 0x5a, 0x01, 0x19, 0x78, 0xc6, 0xd3, 0x8d,
- 0x0f, 0xaa, 0x69, 0xcd, 0x63, 0x55, 0x00, 0x00, 0xb0, 0x43, 0x6b, 0x16,
- 0xc0, 0x50, 0xf3, 0xc3, 0x00, 0x62, 0x0f, 0xa4, 0x48, 0x47, 0x02, 0x90,
- 0xc0, 0x51, 0x4b, 0x45, 0x00, 0xcb, 0xc0, 0x51, 0xa1, 0x4b, 0x6f, 0xcc,
- 0xc0, 0x51, 0xb1, 0x4c, 0x81, 0xc4, 0x40, 0x51, 0xc7, 0x07, 0xc0, 0x51,
- 0xd7, 0xca, 0xa5, 0x86, 0x01, 0x05, 0xb9, 0x42, 0x08, 0x0e, 0x40, 0x51,
- 0xe3, 0x43, 0x34, 0x22, 0xc0, 0x51, 0xf8, 0xc6, 0xd0, 0x1b, 0x0f, 0x9a,
- 0xe9, 0xc2, 0x00, 0x69, 0x00, 0x01, 0x00, 0x49, 0x74, 0x04, 0x40, 0x52,
- 0x05, 0x44, 0x03, 0x7a, 0xc0, 0x52, 0x11, 0xc3, 0x06, 0x25, 0x0f, 0xab,
- 0xba, 0x00, 0x52, 0x23, 0xc9, 0xaf, 0x2e, 0x0f, 0x9e, 0x29, 0xcb, 0x96,
- 0xa4, 0x0f, 0xa1, 0x99, 0x11, 0xc0, 0x52, 0x29, 0xc3, 0x00, 0x62, 0x0f,
- 0xcf, 0xe8, 0x15, 0xc0, 0x52, 0x33, 0xc4, 0xe1, 0x03, 0x0f, 0xcd, 0xc1,
- 0xc7, 0xca, 0x16, 0x0f, 0xcd, 0xc8, 0x00, 0xc0, 0x52, 0x3f, 0x47, 0xc9,
- 0xad, 0xc0, 0x52, 0x4b, 0xc6, 0x8e, 0x3d, 0x0f, 0x99, 0xd9, 0xc4, 0xad,
- 0xdd, 0x0f, 0x98, 0x2b, 0x00, 0x52, 0x75, 0xd2, 0x4b, 0x42, 0x0f, 0x98,
- 0x38, 0xc6, 0x07, 0x3a, 0x01, 0x1d, 0x99, 0xc3, 0x03, 0x27, 0x01, 0x1d,
- 0x91, 0xcd, 0x76, 0x1f, 0x01, 0x50, 0x58, 0x00, 0x40, 0x52, 0x7b, 0x43,
- 0x02, 0x5d, 0xc0, 0x52, 0x93, 0x46, 0x0a, 0x0f, 0xc0, 0x52, 0xa8, 0xc6,
- 0xb4, 0x02, 0x00, 0x00, 0xd0, 0xcc, 0x8c, 0x5c, 0x01, 0x11, 0x79, 0xc2,
- 0x00, 0x28, 0x0f, 0x9e, 0x20, 0xc2, 0x00, 0x0a, 0x0f, 0x9b, 0x19, 0xcf,
- 0x68, 0xc6, 0x0f, 0xb4, 0xf8, 0x0e, 0xc0, 0x52, 0xe2, 0xca, 0xa0, 0x90,
- 0x0f, 0xb0, 0x78, 0x42, 0x03, 0xc7, 0xc0, 0x52, 0xec, 0xca, 0x47, 0xd8,
- 0x01, 0x51, 0x98, 0xd5, 0x37, 0x2d, 0x0f, 0xb3, 0xa9, 0x90, 0x0f, 0xcd,
- 0x10, 0x42, 0x04, 0x41, 0xc0, 0x52, 0xf9, 0x10, 0xc0, 0x53, 0x05, 0xc2,
- 0x00, 0xc3, 0x01, 0x01, 0x90, 0xc9, 0xa9, 0xfa, 0x0f, 0xcd, 0x79, 0xc7,
- 0xc9, 0x7c, 0x01, 0x18, 0x29, 0x12, 0xc0, 0x53, 0x12, 0xc7, 0xc4, 0xba,
- 0x01, 0x5e, 0xc1, 0xcc, 0x86, 0xe0, 0x0f, 0xb6, 0x38, 0xca, 0xa8, 0x9c,
- 0x01, 0x1c, 0xb9, 0xc5, 0xb7, 0x18, 0x01, 0x13, 0xd3, 0x00, 0x53, 0x21,
- 0x15, 0xc0, 0x53, 0x25, 0x46, 0xcf, 0xeb, 0xc0, 0x53, 0x31, 0xc4, 0xe0,
- 0xdf, 0x0f, 0xcb, 0x40, 0x05, 0xc0, 0x53, 0x43, 0xcc, 0x8c, 0xb0, 0x01,
- 0x08, 0x73, 0x00, 0x53, 0x4f, 0x1b, 0x40, 0x53, 0x55, 0xc2, 0x02, 0x18,
- 0x01, 0x32, 0x3b, 0x00, 0x53, 0x61, 0x15, 0xc0, 0x53, 0x67, 0xc4, 0x00,
- 0x62, 0x0f, 0xd5, 0x00, 0x42, 0x11, 0xd4, 0xc0, 0x53, 0x76, 0xca, 0x0e,
- 0xa2, 0x01, 0x39, 0x79, 0x07, 0xc0, 0x53, 0x82, 0xc7, 0x22, 0x2a, 0x0f,
- 0xd3, 0xb9, 0xc3, 0x13, 0xa7, 0x0f, 0xd4, 0x28, 0xc8, 0xb9, 0xbd, 0x0f,
- 0xb7, 0xd8, 0xc3, 0x57, 0xf0, 0x01, 0x32, 0x99, 0xc3, 0x23, 0x3e, 0x0f,
- 0xa9, 0x58, 0xcd, 0x78, 0xb6, 0x01, 0x56, 0xd0, 0xc8, 0xbf, 0xe5, 0x0f,
- 0xa5, 0x49, 0x8e, 0x0f, 0xa4, 0x51, 0xc9, 0x8f, 0xb0, 0x00, 0x05, 0xb0,
- 0x00, 0x40, 0x53, 0x8e, 0xcc, 0x82, 0x0c, 0x0f, 0xb6, 0x11, 0x49, 0xae,
- 0x8c, 0xc0, 0x53, 0x9a, 0x07, 0x40, 0x53, 0xa6, 0x87, 0x0f, 0xae, 0x7b,
- 0x00, 0x53, 0xb2, 0xc3, 0x7b, 0x53, 0x0f, 0xb6, 0xa0, 0x16, 0xc0, 0x53,
- 0xbe, 0x4b, 0x93, 0x6b, 0xc0, 0x53, 0xd6, 0x03, 0xc0, 0x53, 0xfa, 0xc3,
- 0x29, 0x80, 0x0f, 0xcc, 0xe0, 0xcc, 0x21, 0x78, 0x08, 0xd7, 0xab, 0x00,
- 0x54, 0x0c, 0x0e, 0xc0, 0x54, 0x10, 0xce, 0x70, 0x55, 0x08, 0xd7, 0x7b,
- 0x00, 0x54, 0x1f, 0x47, 0xc8, 0xfe, 0xc0, 0x54, 0x23, 0xcb, 0x58, 0xa2,
- 0x08, 0xd7, 0x32, 0x00, 0x54, 0x35, 0xc3, 0x00, 0xe3, 0x01, 0x35, 0xa1,
- 0x0f, 0x40, 0x54, 0x39, 0x05, 0xc0, 0x54, 0x49, 0x45, 0x00, 0xcb, 0xc0,
- 0x54, 0x55, 0x47, 0x33, 0xef, 0xc0, 0x54, 0x8d, 0x46, 0x06, 0x97, 0xc0,
- 0x54, 0x9d, 0x49, 0xab, 0xe9, 0xc0, 0x54, 0xc1, 0x47, 0x4d, 0x10, 0x40,
- 0x54, 0xd3, 0xc7, 0xc9, 0x60, 0x0f, 0xa1, 0xe1, 0xc5, 0xd6, 0xbf, 0x0f,
- 0xca, 0xf0, 0x03, 0xc0, 0x54, 0xeb, 0xc8, 0x60, 0x8a, 0x0f, 0x9b, 0x91,
- 0xc9, 0xb1, 0x53, 0x0f, 0xd5, 0xa0, 0x45, 0x00, 0x33, 0xc0, 0x54, 0xf7,
- 0xc8, 0xbc, 0x05, 0x0f, 0x9a, 0xb9, 0xc7, 0x46, 0x08, 0x00, 0x05, 0x19,
- 0xcb, 0x94, 0x3c, 0x0f, 0xd6, 0xb9, 0xc2, 0x11, 0xd4, 0x0f, 0xa2, 0xe8,
- 0x15, 0xc0, 0x55, 0x03, 0x42, 0x00, 0xb7, 0x40, 0x55, 0x0f, 0xcf, 0x5e,
- 0x43, 0x01, 0x18, 0xb1, 0x16, 0xc0, 0x55, 0x1b, 0xc5, 0xd5, 0x2a, 0x01,
- 0x5f, 0x38, 0x4d, 0x78, 0x1a, 0xc0, 0x55, 0x27, 0xc4, 0x13, 0x6b, 0x0f,
- 0x9b, 0xf8, 0xc3, 0x64, 0x0d, 0x0f, 0xb4, 0x9b, 0x00, 0x55, 0x33, 0xc7,
- 0xcb, 0x27, 0x0f, 0xa3, 0x70, 0xca, 0x83, 0xca, 0x01, 0x3e, 0x13, 0x00,
- 0x55, 0x39, 0x15, 0xc0, 0x55, 0x3f, 0xd1, 0x54, 0xb3, 0x01, 0x33, 0xf1,
- 0x00, 0xc0, 0x55, 0x51, 0xcc, 0x81, 0x94, 0x0f, 0x9d, 0x69, 0xc9, 0x93,
- 0xba, 0x00, 0x01, 0x28, 0xc3, 0xb1, 0xdf, 0x01, 0x38, 0x79, 0xc6, 0x17,
- 0xe9, 0x01, 0x37, 0x21, 0xd6, 0x30, 0x09, 0x0f, 0xac, 0x31, 0xc9, 0xaa,
- 0xc9, 0x0f, 0xb0, 0xa1, 0xc4, 0x8c, 0xc6, 0x0f, 0xa1, 0x38, 0x05, 0xc0,
- 0x55, 0x63, 0x94, 0x0f, 0x9a, 0x81, 0xc4, 0xda, 0xea, 0x0f, 0xca, 0xe1,
- 0xc4, 0xe4, 0xfb, 0x0f, 0xd6, 0xd8, 0xc6, 0xa1, 0x52, 0x01, 0x05, 0x89,
- 0xc8, 0xb6, 0xc5, 0x01, 0x05, 0x38, 0xcb, 0x99, 0x9b, 0x01, 0x00, 0x41,
- 0xcf, 0x61, 0xfa, 0x01, 0x72, 0x70, 0xc9, 0xb4, 0x98, 0x0f, 0xa4, 0xe1,
- 0xc2, 0x02, 0x60, 0x0f, 0xa2, 0xd8, 0x16, 0xc0, 0x55, 0x73, 0xc3, 0x01,
- 0xb4, 0x08, 0x5d, 0x4b, 0x00, 0x55, 0x83, 0xc4, 0x06, 0x9d, 0x08, 0x5d,
- 0x60, 0xc3, 0x01, 0x4a, 0x08, 0x5c, 0xe1, 0xc5, 0x0d, 0xbc, 0x08, 0x5c,
- 0xd8, 0xc3, 0x26, 0xf9, 0x08, 0x5c, 0x89, 0x15, 0xc0, 0x55, 0x89, 0xc2,
- 0x00, 0x27, 0x08, 0x5c, 0x71, 0xc3, 0x1f, 0xd8, 0x08, 0x5c, 0x61, 0xc8,
- 0xbf, 0xb5, 0x08, 0x5c, 0x59, 0xc6, 0xd0, 0x5d, 0x08, 0x5c, 0x51, 0xc4,
- 0xe2, 0x57, 0x08, 0x5c, 0x49, 0xc4, 0x4b, 0x98, 0x08, 0x5c, 0x41, 0xc2,
- 0x01, 0xf0, 0x08, 0x5c, 0x23, 0x00, 0x55, 0x93, 0xc5, 0x4b, 0x92, 0x08,
- 0x5c, 0x31, 0xcd, 0x78, 0xa9, 0x08, 0x5c, 0x29, 0xc6, 0x45, 0xf6, 0x08,
- 0x5c, 0x19, 0xc5, 0xa1, 0x94, 0x08, 0x5c, 0x11, 0xc4, 0xe4, 0x8f, 0x08,
- 0x5c, 0x09, 0xc5, 0xa8, 0xf1, 0x08, 0x5c, 0x00, 0xd2, 0x48, 0xa8, 0x00,
- 0xb9, 0xb1, 0xd2, 0x48, 0xde, 0x00, 0xb9, 0xa8, 0x48, 0xbb, 0xb5, 0xc0,
- 0x55, 0x99, 0xc3, 0x26, 0x13, 0x01, 0x5e, 0xd8, 0x46, 0xd2, 0x43, 0xc0,
- 0x55, 0xab, 0x50, 0x5d, 0xc2, 0x40, 0x55, 0xc1, 0x4c, 0x7b, 0x41, 0xc0,
- 0x56, 0x15, 0x48, 0xb1, 0x78, 0x40, 0x56, 0x2b, 0xcc, 0x8c, 0xec, 0x01,
- 0x30, 0x59, 0x45, 0x69, 0x4c, 0xc0, 0x56, 0x5f, 0x42, 0x00, 0x28, 0x40,
- 0x56, 0x6b, 0x0b, 0xc0, 0x56, 0x78, 0xd6, 0x2f, 0xf3, 0x0f, 0xae, 0xd8,
- 0x49, 0x03, 0x3b, 0xc0, 0x56, 0x84, 0xd1, 0x53, 0xb4, 0x01, 0x1e, 0x53,
- 0x00, 0x56, 0x90, 0xd3, 0x41, 0x75, 0x01, 0x1e, 0x4a, 0x00, 0x56, 0x96,
- 0xcb, 0x91, 0x3a, 0x01, 0x12, 0xe1, 0xc3, 0x06, 0xa6, 0x00, 0x03, 0xf9,
- 0xcb, 0x91, 0x7c, 0x0f, 0xb4, 0xd0, 0xca, 0x9e, 0x42, 0x01, 0x08, 0x49,
- 0xc7, 0xca, 0xb7, 0x01, 0x08, 0x19, 0xc4, 0x00, 0xcb, 0x00, 0x05, 0x80,
- 0xc4, 0x00, 0x67, 0x0f, 0xb1, 0xa9, 0xc6, 0x00, 0x71, 0x0f, 0xa5, 0x58,
- 0x48, 0x8a, 0x74, 0xc0, 0x56, 0x9c, 0x43, 0x06, 0x9a, 0x40, 0x56, 0xb5,
- 0x49, 0xb5, 0x9d, 0xc0, 0x56, 0xe5, 0xcb, 0x90, 0x7f, 0x01, 0x35, 0x71,
- 0x0b, 0x40, 0x57, 0x17, 0x51, 0x56, 0x29, 0xc0, 0x57, 0x29, 0x53, 0x42,
- 0x46, 0x40, 0x57, 0x3b, 0x03, 0xc0, 0x57, 0x47, 0xdb, 0x15, 0xee, 0x01,
- 0x1c, 0x11, 0xcb, 0x94, 0x5d, 0x0f, 0xcb, 0xc0, 0x46, 0x88, 0x7f, 0xc0,
- 0x57, 0x53, 0xce, 0x74, 0x45, 0x0f, 0xb7, 0x90, 0xd7, 0x29, 0x3a, 0x01,
- 0x1c, 0x99, 0xc3, 0x02, 0x32, 0x0f, 0x9d, 0x78, 0x0f, 0xc0, 0x57, 0x6b,
- 0xc6, 0x1e, 0x45, 0x00, 0x05, 0x40, 0x12, 0xc0, 0x57, 0x77, 0xca, 0x9b,
- 0xea, 0x0f, 0xc9, 0x21, 0xcc, 0x83, 0x68, 0x0f, 0xa1, 0x50, 0xdc, 0x12,
- 0xca, 0x01, 0x3c, 0xd9, 0xc9, 0x93, 0xa4, 0x01, 0x05, 0x79, 0xc3, 0x1d,
- 0x16, 0x0f, 0xa0, 0x4a, 0x00, 0x57, 0x83, 0x44, 0x01, 0x09, 0xc0, 0x57,
- 0x89, 0x00, 0xc0, 0x57, 0x95, 0x4a, 0x01, 0x89, 0x40, 0x57, 0xb0, 0x4a,
- 0x00, 0xf6, 0xc0, 0x57, 0xc2, 0x48, 0x03, 0x3b, 0x40, 0x57, 0xce, 0x43,
- 0x05, 0x99, 0xc0, 0x57, 0xda, 0xc5, 0xd8, 0xfe, 0x0f, 0x9b, 0x48, 0x44,
- 0x03, 0x1e, 0xc0, 0x57, 0xe8, 0x00, 0x40, 0x58, 0x0e, 0x43, 0x08, 0x24,
- 0xc0, 0x58, 0x26, 0xc5, 0x11, 0x01, 0x0f, 0xa1, 0xb0, 0x4b, 0x96, 0xfc,
- 0xc0, 0x58, 0x3e, 0xc7, 0xb7, 0x45, 0x01, 0x14, 0x0b, 0x00, 0x58, 0x4d,
- 0x42, 0x06, 0xe0, 0xc0, 0x58, 0x53, 0xc5, 0xd7, 0xd7, 0x01, 0x15, 0x71,
- 0xc6, 0x03, 0x30, 0x01, 0x11, 0x22, 0x00, 0x58, 0x62, 0xc6, 0xac, 0x34,
- 0x01, 0x05, 0x59, 0xc3, 0x1e, 0x7b, 0x0f, 0xd6, 0x78, 0x46, 0x00, 0x6b,
- 0x40, 0x58, 0x68, 0xc4, 0xe5, 0x2b, 0x0f, 0xa1, 0x61, 0xc8, 0x00, 0xc7,
- 0x00, 0x01, 0x20, 0xdd, 0x11, 0xc8, 0x0d, 0xe4, 0xf9, 0xcb, 0x97, 0x96,
- 0x0d, 0xe4, 0xf1, 0xd5, 0x32, 0xfe, 0x0d, 0xe4, 0xe9, 0xd1, 0x56, 0x8f,
- 0x0d, 0xe4, 0xe1, 0x46, 0xd3, 0xb7, 0xc0, 0x58, 0x77, 0x47, 0x02, 0x90,
- 0x40, 0x58, 0x93, 0x43, 0x01, 0xf4, 0xc0, 0x59, 0x30, 0x00, 0x40, 0x59,
- 0x42, 0xc4, 0x06, 0x23, 0x01, 0x2c, 0x99, 0xc9, 0xb2, 0x6a, 0x0f, 0xab,
- 0xb0, 0x00, 0x40, 0x59, 0x4e, 0xc3, 0x3d, 0xf2, 0x0f, 0xa4, 0x19, 0xc2,
- 0x13, 0x4f, 0x0f, 0x9b, 0x08, 0x44, 0x06, 0x16, 0xc0, 0x59, 0x5a, 0xcd,
- 0x7b, 0xdc, 0x0f, 0xa4, 0xf0, 0x42, 0x01, 0x02, 0xc0, 0x59, 0x64, 0xc5,
- 0xd7, 0xe1, 0x01, 0x08, 0xf8, 0x43, 0x20, 0xd8, 0xc0, 0x59, 0x70, 0xcd,
- 0x5d, 0xa5, 0x00, 0x00, 0xf1, 0xd1, 0x54, 0xd5, 0x0f, 0xb4, 0xc9, 0xc4,
- 0xe4, 0x53, 0x0f, 0xcf, 0xf0, 0xc6, 0x00, 0x71, 0x01, 0x1e, 0x71, 0xc4,
- 0x01, 0x1e, 0x01, 0x5c, 0x81, 0xc5, 0x01, 0xf7, 0x01, 0x5c, 0x88, 0xc5,
- 0xdf, 0xc5, 0x0f, 0x9a, 0x71, 0xcd, 0x79, 0xc7, 0x0f, 0xcf, 0x38, 0x5d,
- 0x10, 0x4f, 0xc0, 0x59, 0x7c, 0xcb, 0x8f, 0xe5, 0x00, 0x05, 0x70, 0xcc,
- 0x25, 0xea, 0x05, 0x4a, 0xf9, 0x18, 0xc0, 0x59, 0xe4, 0x4f, 0x2e, 0xbf,
- 0xc0, 0x59, 0xf0, 0x47, 0x02, 0x90, 0x40, 0x59, 0xff, 0x00, 0xc0, 0x5a,
- 0x5f, 0x46, 0x01, 0x09, 0xc0, 0x5a, 0xae, 0x02, 0xc0, 0x5a, 0xf3, 0xd5,
- 0x37, 0xff, 0x01, 0x51, 0xe8, 0x00, 0xc0, 0x5b, 0x0f, 0xc8, 0xb6, 0xd5,
- 0x0f, 0xab, 0x69, 0xc9, 0xb3, 0x30, 0x0f, 0xd4, 0x80, 0x47, 0x01, 0x0e,
- 0x40, 0x5b, 0x33, 0xc4, 0x15, 0x86, 0x0f, 0x9a, 0xc9, 0xc7, 0xc5, 0x5b,
- 0x0f, 0x9a, 0xc0, 0xd0, 0x58, 0xb2, 0x01, 0x49, 0x59, 0xd0, 0x3a, 0x25,
- 0x01, 0x49, 0x80, 0xc2, 0x00, 0x49, 0x0f, 0xb4, 0x00, 0xd9, 0x20, 0x81,
- 0x0f, 0xc9, 0x19, 0x07, 0xc0, 0x5b, 0x4b, 0xc9, 0xb1, 0xfe, 0x0f, 0xcf,
- 0xd8, 0x00, 0xc0, 0x5b, 0x57, 0x4e, 0x74, 0x1b, 0x40, 0x5b, 0x63, 0xd3,
- 0x1d, 0x4c, 0x01, 0x3b, 0x39, 0xd8, 0x22, 0x20, 0x01, 0x3b, 0x29, 0xc9,
- 0xa9, 0x58, 0x01, 0x09, 0xd1, 0xdd, 0x12, 0x59, 0x01, 0x5e, 0x69, 0xd7,
- 0x2a, 0xef, 0x01, 0x5e, 0x78, 0x48, 0x56, 0x61, 0xc0, 0x5b, 0x81, 0x15,
- 0xc0, 0x5b, 0xa6, 0xca, 0x8f, 0x4c, 0x08, 0x0c, 0x89, 0x06, 0xc0, 0x5b,
- 0xb0, 0xce, 0x71, 0xdd, 0x08, 0x0c, 0xb9, 0xc7, 0xcb, 0x51, 0x08, 0x0c,
- 0xd1, 0xce, 0x72, 0x4d, 0x08, 0x0c, 0xd8, 0xc3, 0x02, 0x92, 0x0f, 0x9f,
- 0xa8, 0x45, 0xdc, 0xe1, 0xc0, 0x5b, 0xc2, 0x44, 0x0c, 0x68, 0xc0, 0x5b,
- 0xce, 0x90, 0x01, 0x36, 0x32, 0x00, 0x5c, 0x02, 0x91, 0x0f, 0xa7, 0xdb,
- 0x00, 0x5c, 0x08, 0xd1, 0x54, 0x09, 0x01, 0x1d, 0xb8, 0xc2, 0x00, 0xb6,
- 0x01, 0x11, 0xb0, 0x44, 0x00, 0x34, 0xc0, 0x5c, 0x14, 0xc4, 0xe4, 0xdb,
- 0x0f, 0xcc, 0xe8, 0xc5, 0x11, 0x01, 0x0f, 0xa1, 0x80, 0x49, 0x52, 0xd7,
- 0xc0, 0x5c, 0x20, 0x47, 0x33, 0xef, 0xc0, 0x5c, 0x2c, 0x46, 0x06, 0x97,
- 0x40, 0x5c, 0x4a, 0x43, 0x02, 0x8d, 0xc0, 0x5c, 0x68, 0x10, 0x40, 0x5c,
- 0x92, 0xc9, 0xad, 0x6c, 0x01, 0x5f, 0x99, 0xc6, 0xbc, 0xef, 0x01, 0x5f,
- 0xa1, 0xc8, 0xb8, 0xb5, 0x01, 0x5f, 0xa9, 0xc8, 0xbc, 0xed, 0x01, 0x5f,
- 0xb1, 0xc8, 0xb9, 0x4d, 0x01, 0x5f, 0xb9, 0xc9, 0xb1, 0xda, 0x01, 0x5f,
- 0xc0, 0x9e, 0x07, 0xf0, 0x03, 0x00, 0x5c, 0x9e, 0x9f, 0x07, 0xf0, 0x0b,
- 0x00, 0x5c, 0xe4, 0xa6, 0x07, 0xf0, 0x43, 0x00, 0x5d, 0x1e, 0xa5, 0x07,
- 0xf0, 0x3b, 0x00, 0x5d, 0x46, 0xa4, 0x07, 0xf0, 0x33, 0x00, 0x5d, 0x6e,
- 0xa3, 0x07, 0xf0, 0x2b, 0x00, 0x5d, 0x96, 0xa2, 0x07, 0xf0, 0x23, 0x00,
- 0x5d, 0xbe, 0xa1, 0x07, 0xf0, 0x1b, 0x00, 0x5d, 0xe6, 0xa0, 0x07, 0xf0,
- 0x12, 0x00, 0x5e, 0x0e, 0x42, 0x00, 0x71, 0xc0, 0x5e, 0x36, 0xc5, 0x00,
- 0xaa, 0x05, 0x30, 0x69, 0xc9, 0x11, 0xdc, 0x05, 0x30, 0x71, 0xcd, 0x2c,
- 0x41, 0x05, 0x30, 0x79, 0x46, 0x06, 0x97, 0x40, 0x5e, 0x42, 0x46, 0x03,
- 0x3e, 0xc0, 0x5e, 0x66, 0x42, 0x00, 0x2e, 0xc0, 0x5e, 0xa9, 0xc5, 0xdb,
- 0x8d, 0x01, 0x09, 0x18, 0x45, 0x00, 0xcb, 0xc0, 0x5e, 0xbb, 0x45, 0x0c,
- 0x46, 0x40, 0x5e, 0xf9, 0x5f, 0x0d, 0x3f, 0xc0, 0x5f, 0x2d, 0xcc, 0x87,
- 0x1c, 0x01, 0x18, 0xb8, 0xc8, 0xc0, 0x65, 0x0f, 0xa7, 0xe1, 0x00, 0x40,
- 0x5f, 0x39, 0x4f, 0x07, 0x17, 0xc0, 0x5f, 0x45, 0x4d, 0x26, 0xea, 0x40,
- 0x5f, 0xc5, 0xcc, 0x8d, 0x1c, 0x01, 0x11, 0x81, 0xc7, 0xc2, 0xde, 0x0f,
- 0x9e, 0x81, 0xc4, 0x95, 0x0a, 0x0f, 0x98, 0x58, 0xcb, 0x98, 0x93, 0x01,
- 0x0c, 0x49, 0xcd, 0x40, 0x0c, 0x01, 0x0a, 0xf1, 0x08, 0xc0, 0x60, 0x45,
- 0x16, 0xc0, 0x60, 0x51, 0x44, 0x01, 0xb4, 0x40, 0x60, 0x5d, 0x00, 0xc0,
- 0x60, 0x83, 0x46, 0xd3, 0x27, 0xc0, 0x60, 0xcd, 0x45, 0xd9, 0xa3, 0x40,
- 0x60, 0xd9, 0xc4, 0x0c, 0x5a, 0x0e, 0x9b, 0xc1, 0xc3, 0x01, 0xb4, 0x0e,
- 0x9b, 0xb8, 0x09, 0xc0, 0x60, 0xeb, 0xca, 0xa2, 0xde, 0x0f, 0x9c, 0x58,
- 0x43, 0x46, 0x46, 0xc0, 0x60, 0xfd, 0xc3, 0x08, 0x1a, 0x0f, 0xd6, 0xa0,
- 0xc5, 0xc5, 0x01, 0x01, 0x38, 0x39, 0xc9, 0xab, 0xd7, 0x0f, 0xad, 0x68,
- 0x43, 0x04, 0x31, 0xc0, 0x61, 0x51, 0xc8, 0xb9, 0x0d, 0x0f, 0xcb, 0x08,
- 0x45, 0x8f, 0x40, 0xc0, 0x61, 0x6f, 0x4a, 0xa4, 0xc8, 0xc0, 0x61, 0x93,
- 0x45, 0xda, 0x34, 0x40, 0x61, 0xf9, 0x0d, 0xc0, 0x62, 0x17, 0x44, 0x08,
- 0x92, 0xc0, 0x62, 0x23, 0xc3, 0x0f, 0x59, 0x0f, 0xa1, 0x10, 0x00, 0xc0,
- 0x62, 0x51, 0x02, 0x40, 0x62, 0x7b, 0x10, 0xc0, 0x62, 0x8d, 0xce, 0x72,
- 0x07, 0x0f, 0xca, 0x48, 0xcc, 0x89, 0x5c, 0x0f, 0xa5, 0x69, 0xc9, 0xb0,
- 0x9f, 0x0f, 0xd3, 0xa0, 0x44, 0x15, 0xfa, 0xc0, 0x62, 0x97, 0x44, 0x85,
- 0x0e, 0x40, 0x62, 0xa3, 0x07, 0xc0, 0x62, 0xaf, 0x42, 0x00, 0x82, 0x40,
- 0x62, 0xb9, 0x44, 0x0e, 0x3a, 0xc0, 0x62, 0xc5, 0x42, 0x04, 0x32, 0x40,
- 0x62, 0xe9, 0xd8, 0x24, 0x78, 0x0f, 0xa8, 0xe9, 0xd6, 0x0a, 0x88, 0x01,
- 0x1f, 0x01, 0xcd, 0x02, 0x52, 0x01, 0x1e, 0xf1, 0xcb, 0x1a, 0x3f, 0x01,
- 0x1e, 0xe1, 0xce, 0x24, 0xb2, 0x01, 0x1d, 0xa1, 0x42, 0x00, 0xa4, 0xc0,
- 0x62, 0xf3, 0x46, 0x02, 0x12, 0xc0, 0x62, 0xfd, 0x45, 0x02, 0x4d, 0xc0,
- 0x63, 0x07, 0x44, 0x12, 0x7a, 0x40, 0x63, 0x11, 0x42, 0x01, 0x48, 0xc0,
- 0x63, 0x20, 0xc9, 0xb1, 0x0b, 0x01, 0x19, 0x80, 0x56, 0x2f, 0xb1, 0xc0,
- 0x63, 0x2c, 0xd6, 0x31, 0x11, 0x0f, 0x89, 0x50, 0xc2, 0x00, 0x6e, 0x0f,
- 0xcd, 0xbb, 0x00, 0x63, 0x3e, 0xc4, 0x7f, 0x7a, 0x0f, 0xcf, 0x80, 0x8f,
- 0x0f, 0xb4, 0x53, 0x00, 0x63, 0x44, 0xc2, 0x00, 0x34, 0x0f, 0xb4, 0x31,
- 0xcc, 0x82, 0x60, 0x01, 0x09, 0x11, 0x05, 0xc0, 0x63, 0x4a, 0x42, 0x07,
- 0x01, 0x40, 0x63, 0x56, 0x43, 0x01, 0x93, 0xc0, 0x63, 0x62, 0x49, 0x8a,
- 0x73, 0xc0, 0x63, 0x6e, 0x44, 0x17, 0x34, 0xc0, 0x63, 0x96, 0xc5, 0x37,
- 0x91, 0x01, 0x02, 0xe9, 0xcb, 0x99, 0xdd, 0x0f, 0xa9, 0x88, 0x87, 0x01,
- 0x15, 0x43, 0x00, 0x63, 0xca, 0xc4, 0xe5, 0x67, 0x0f, 0x9d, 0xd0, 0x12,
- 0xc0, 0x63, 0xd0, 0xc2, 0x03, 0xc7, 0x0f, 0xce, 0x62, 0x00, 0x63, 0xdc,
- 0x08, 0xc0, 0x63, 0xe2, 0x0e, 0xc0, 0x63, 0xf8, 0x06, 0xc0, 0x64, 0x02,
- 0x11, 0xc0, 0x64, 0x1c, 0x05, 0xc0, 0x64, 0x28, 0x03, 0xc0, 0x64, 0x3e,
- 0x0a, 0xc0, 0x64, 0x56, 0x15, 0xc0, 0x64, 0x62, 0x07, 0xc0, 0x64, 0x72,
- 0x42, 0x00, 0x34, 0xc0, 0x64, 0x8e, 0x42, 0x01, 0x09, 0xc0, 0x64, 0x9a,
- 0x0f, 0xc0, 0x64, 0xa6, 0x09, 0xc0, 0x64, 0xb8, 0xc5, 0xd7, 0x73, 0x0e,
- 0x99, 0xd9, 0xd3, 0x3f, 0xe6, 0x0e, 0x99, 0xb9, 0x14, 0xc0, 0x64, 0xd3,
- 0x12, 0xc0, 0x64, 0xdd, 0x0d, 0xc0, 0x64, 0xed, 0x04, 0xc0, 0x64, 0xf9,
- 0xc3, 0x89, 0x1d, 0x0e, 0x98, 0xe9, 0xcc, 0x88, 0xa8, 0x0e, 0x98, 0x88,
- 0x14, 0xc0, 0x65, 0x0b, 0xd2, 0x48, 0x96, 0x0f, 0x9b, 0xa9, 0xc3, 0x3d,
- 0xa5, 0x0f, 0xd6, 0xb0, 0x07, 0xc0, 0x65, 0x17, 0x44, 0xe0, 0x07, 0x40,
- 0x65, 0x29, 0x96, 0x01, 0x37, 0xd1, 0xc7, 0x8a, 0xf9, 0x01, 0x05, 0xc1,
- 0xd4, 0x38, 0xa5, 0x0f, 0x9d, 0xf0, 0xd7, 0x28, 0xde, 0x01, 0x3a, 0x29,
- 0xc2, 0x00, 0x28, 0x0f, 0xa0, 0x2a, 0x00, 0x65, 0x4d, 0xc7, 0x18, 0xd1,
- 0x01, 0x1f, 0x91, 0x47, 0x30, 0x8c, 0x40, 0x65, 0x53, 0x00, 0x40, 0x65,
- 0x5f, 0x45, 0xdf, 0x6b, 0xc0, 0x65, 0x6e, 0x4b, 0x8f, 0x82, 0xc0, 0x65,
- 0x96, 0xc7, 0x10, 0xff, 0x0f, 0xb1, 0x58, 0x42, 0x00, 0x2f, 0x40, 0x65,
- 0xa2, 0x15, 0xc0, 0x65, 0xa8, 0x45, 0x00, 0xba, 0xc0, 0x65, 0xb8, 0x0e,
- 0xc0, 0x66, 0x04, 0x52, 0x4e, 0x12, 0xc0, 0x66, 0x10, 0x46, 0x06, 0x97,
- 0xc0, 0x66, 0x1a, 0x4b, 0x6f, 0xcc, 0xc0, 0x66, 0x44, 0xc9, 0xae, 0x29,
- 0x00, 0x7d, 0xf3, 0x00, 0x66, 0x75, 0x52, 0x4a, 0x7c, 0x40, 0x66, 0x7b,
- 0x47, 0x02, 0x90, 0xc0, 0x66, 0x93, 0x42, 0x00, 0x82, 0xc0, 0x66, 0xa5,
- 0xce, 0x73, 0xc7, 0x01, 0x6b, 0x81, 0xd0, 0x59, 0xe2, 0x01, 0x6b, 0xf8,
- 0x00, 0xc0, 0x66, 0xab, 0xc8, 0xbd, 0x2d, 0x01, 0x71, 0xd0, 0xd3, 0x41,
- 0x3c, 0x0f, 0xdd, 0x81, 0x4a, 0x03, 0xfd, 0x40, 0x66, 0xed, 0x00, 0xc0,
- 0x66, 0xff, 0x47, 0x06, 0x90, 0x40, 0x67, 0x66, 0x47, 0x08, 0xba, 0xc0,
- 0x67, 0x7e, 0xc9, 0xb2, 0x10, 0x00, 0x2c, 0x79, 0xc6, 0x5a, 0xa2, 0x00,
- 0x2c, 0x51, 0xc9, 0x11, 0xdc, 0x00, 0x2c, 0x49, 0x03, 0xc0, 0x67, 0x8a,
- 0xcd, 0x2c, 0x41, 0x00, 0x2a, 0xf1, 0x05, 0xc0, 0x67, 0x96, 0x07, 0xc0,
- 0x67, 0xa2, 0xde, 0x0f, 0x24, 0x00, 0x2a, 0xc8, 0xca, 0xa8, 0xb0, 0x0f,
- 0x9d, 0x41, 0xcd, 0x7a, 0x7d, 0x0f, 0xb4, 0xd8, 0xce, 0x74, 0x29, 0x0f,
- 0x9c, 0xf9, 0xc4, 0x7f, 0xbb, 0x01, 0x5f, 0x28, 0x05, 0xc0, 0x67, 0xae,
- 0x4d, 0x26, 0xea, 0xc0, 0x67, 0xba, 0xcf, 0x69, 0xa7, 0x0f, 0x4a, 0x21,
- 0xd0, 0x5b, 0x12, 0x0f, 0x4a, 0x29, 0x47, 0x69, 0x11, 0xc0, 0x68, 0x3a,
- 0xc5, 0x09, 0x89, 0x0f, 0x4a, 0x39, 0x10, 0xc0, 0x68, 0x46, 0x46, 0x06,
- 0x97, 0xc0, 0x68, 0x52, 0x48, 0x12, 0x30, 0x40, 0x68, 0x76, 0x04, 0xc0,
- 0x68, 0x82, 0x05, 0xc0, 0x68, 0xa3, 0x06, 0xc0, 0x68, 0xb7, 0x12, 0xc0,
- 0x68, 0xc3, 0x16, 0xc0, 0x68, 0xd7, 0x14, 0xc0, 0x68, 0xf2, 0x18, 0xc0,
- 0x68, 0xff, 0x15, 0xc0, 0x69, 0x09, 0x03, 0xc0, 0x69, 0x2f, 0x0e, 0xc0,
- 0x69, 0x5d, 0x42, 0x04, 0x6e, 0xc0, 0x69, 0x69, 0x0f, 0xc0, 0x69, 0x75,
- 0x42, 0x01, 0x09, 0xc0, 0x69, 0x8a, 0xc5, 0x65, 0xf1, 0x0f, 0xb8, 0x19,
- 0x43, 0x03, 0x73, 0xc0, 0x69, 0x94, 0xc4, 0x82, 0xf8, 0x0f, 0xb8, 0x11,
- 0x09, 0xc0, 0x69, 0xa0, 0x44, 0x1c, 0x64, 0xc0, 0x69, 0xac, 0xc3, 0xd2,
- 0xae, 0x0f, 0xba, 0x31, 0xc5, 0xd5, 0x61, 0x0f, 0xba, 0xa9, 0x0a, 0x40,
- 0x69, 0xbb, 0xda, 0x1b, 0x3d, 0x01, 0x36, 0xa9, 0xce, 0x75, 0x4f, 0x01,
- 0x1c, 0x38, 0xc4, 0xd8, 0xc8, 0x01, 0x34, 0xb9, 0xc8, 0x96, 0x4f, 0x01,
- 0x09, 0xa9, 0xc2, 0x03, 0x3d, 0x00, 0x00, 0x38, 0xce, 0x74, 0xed, 0x01,
- 0x19, 0x71, 0xc8, 0x07, 0x5f, 0x01, 0x12, 0x60, 0xcb, 0x23, 0x35, 0x01,
- 0x12, 0x51, 0xc2, 0x02, 0x18, 0x01, 0x12, 0x42, 0x00, 0x69, 0xc5, 0xc9,
- 0xb2, 0x19, 0x0f, 0xb7, 0xd1, 0x0f, 0x40, 0x69, 0xcb, 0xc8, 0xb9, 0xad,
- 0x0f, 0xb7, 0x61, 0xc9, 0xb2, 0xd6, 0x0f, 0xb7, 0x58, 0x51, 0x53, 0x3d,
- 0xc0, 0x69, 0xd7, 0xcb, 0x93, 0x1e, 0x0f, 0xd6, 0x00, 0x4b, 0x09, 0x77,
- 0xc0, 0x69, 0xef, 0xce, 0x6c, 0xab, 0x0f, 0xa7, 0xb0, 0xc2, 0x00, 0x54,
- 0x01, 0x11, 0x03, 0x00, 0x6a, 0x0f, 0xca, 0xa7, 0xfc, 0x01, 0x09, 0x59,
- 0xc9, 0x25, 0x47, 0x0f, 0xa5, 0x11, 0xc7, 0xc6, 0xc0, 0x0f, 0xb1, 0x01,
- 0xcb, 0x8d, 0xbf, 0x0f, 0xb1, 0x38, 0x14, 0xc0, 0x6a, 0x15, 0x44, 0x0b,
- 0x82, 0xc0, 0x6a, 0x21, 0xcc, 0x88, 0x78, 0x0f, 0xb1, 0x90, 0xcb, 0x8c,
- 0xed, 0x01, 0x30, 0x51, 0xc9, 0xaa, 0x39, 0x08, 0x0c, 0xe0, 0x0e, 0xc0,
- 0x6a, 0x2c, 0x10, 0xc0, 0x6a, 0x36, 0x06, 0xc0, 0x6a, 0x4c, 0x16, 0xc0,
- 0x6a, 0x5a, 0x05, 0xc0, 0x6a, 0x68, 0x83, 0x08, 0xb8, 0x93, 0x00, 0x6a,
- 0x72, 0x0c, 0xc0, 0x6a, 0x78, 0x04, 0xc0, 0x6a, 0x82, 0x09, 0xc0, 0x6a,
- 0x8c, 0xc2, 0x00, 0xa4, 0x08, 0xb8, 0x89, 0xc2, 0x0c, 0x65, 0x08, 0xb8,
- 0x79, 0xc2, 0x02, 0x59, 0x08, 0xb8, 0x69, 0xc2, 0x00, 0xad, 0x08, 0xb8,
- 0x49, 0x12, 0xc0, 0x6a, 0x96, 0x0d, 0x40, 0x6a, 0xa0, 0xc8, 0x91, 0xbf,
- 0x08, 0xb9, 0xf9, 0x44, 0x00, 0xcc, 0x40, 0x6a, 0xaa, 0xc5, 0x2a, 0x13,
- 0x08, 0xb9, 0xd9, 0xc2, 0x00, 0x4d, 0x08, 0xb9, 0xd0, 0xc4, 0x22, 0x71,
- 0x08, 0xb9, 0xc9, 0xc5, 0x01, 0xdb, 0x08, 0xb9, 0xc1, 0x15, 0xc0, 0x6a,
- 0xba, 0x08, 0xc0, 0x6a, 0xc6, 0x16, 0xc0, 0x6a, 0xd2, 0xc3, 0x01, 0xb4,
- 0x08, 0xb9, 0x89, 0xc4, 0x15, 0xd3, 0x08, 0xb9, 0x80, 0x83, 0x08, 0xb9,
- 0x03, 0x00, 0x6a, 0xde, 0x91, 0x08, 0xb9, 0x41, 0x87, 0x08, 0xb9, 0x31,
- 0x97, 0x08, 0xb9, 0x23, 0x00, 0x6a, 0xee, 0x8b, 0x08, 0xb9, 0x12, 0x00,
- 0x6a, 0xf2, 0x0e, 0xc0, 0x6a, 0xf6, 0xc2, 0x02, 0x59, 0x08, 0xb8, 0xf0,
- 0xc6, 0x6b, 0x4e, 0x01, 0x08, 0x01, 0xc5, 0xd5, 0xc5, 0x0f, 0xd4, 0xb8,
- 0xd3, 0x41, 0x62, 0x01, 0x03, 0x69, 0xd2, 0x4a, 0xa0, 0x01, 0x03, 0x58,
- 0xc4, 0x01, 0x94, 0x01, 0x4c, 0xf9, 0xc5, 0x0b, 0x42, 0x00, 0x05, 0xa0,
- 0x42, 0x00, 0x90, 0xc0, 0x6b, 0x00, 0xc5, 0xd9, 0x9e, 0x01, 0x1b, 0xd3,
- 0x00, 0x6b, 0x0f, 0xc5, 0xa1, 0x17, 0x01, 0x1b, 0xab, 0x00, 0x6b, 0x15,
- 0x0b, 0xc0, 0x6b, 0x1b, 0xd0, 0x60, 0x12, 0x01, 0x1b, 0xb9, 0x14, 0xc0,
- 0x6b, 0x2a, 0x42, 0x01, 0xc7, 0xc0, 0x6b, 0x36, 0x06, 0xc0, 0x6b, 0x40,
- 0x15, 0xc0, 0x6b, 0x52, 0xc5, 0xd8, 0xd1, 0x01, 0x1b, 0x61, 0x05, 0xc0,
- 0x6b, 0x68, 0xd6, 0x2e, 0x67, 0x01, 0x1b, 0x49, 0xcf, 0x67, 0xb8, 0x01,
- 0x1b, 0x41, 0x44, 0x01, 0x1e, 0xc0, 0x6b, 0x74, 0x44, 0xe2, 0xb3, 0xc0,
- 0x6b, 0x80, 0xcd, 0x00, 0xde, 0x01, 0x1a, 0x00, 0x42, 0x00, 0x39, 0xc0,
- 0x6b, 0x8c, 0xd8, 0x23, 0x40, 0x00, 0x04, 0xf8, 0xc7, 0x30, 0x70, 0x00,
- 0x01, 0x39, 0xc4, 0x1d, 0x29, 0x01, 0x5f, 0x20, 0xd1, 0x49, 0xa4, 0x08,
- 0x59, 0xc9, 0x47, 0x02, 0x90, 0x40, 0x6b, 0x98, 0xc4, 0x38, 0xc5, 0x0f,
- 0x9f, 0xd1, 0xc6, 0x37, 0x33, 0x00, 0x01, 0x30, 0xca, 0xa2, 0x3e, 0x08,
- 0x08, 0x11, 0x47, 0x33, 0xef, 0xc0, 0x6c, 0x19, 0x19, 0xc0, 0x6c, 0x40,
- 0xd9, 0x20, 0x9a, 0x08, 0x09, 0xe1, 0xdc, 0x13, 0xc6, 0x08, 0x09, 0xe9,
- 0x48, 0x13, 0xcd, 0x40, 0x6c, 0x4c, 0x4a, 0xa5, 0xfe, 0xc0, 0x6c, 0x58,
- 0xc9, 0xb0, 0x96, 0x0f, 0xca, 0x50, 0xd4, 0x3e, 0x09, 0x0f, 0xbd, 0x89,
- 0xcb, 0x5a, 0x67, 0x0f, 0xbd, 0x21, 0x46, 0x02, 0x31, 0xc0, 0x6c, 0x7a,
- 0x15, 0xc0, 0x6c, 0x86, 0xd5, 0x35, 0x20, 0x0f, 0xbd, 0xe8, 0x43, 0x00,
- 0x3a, 0xc0, 0x6c, 0x92, 0xd4, 0x3b, 0x25, 0x0f, 0x9b, 0xf0, 0xc3, 0x1f,
- 0x50, 0x01, 0x16, 0x43, 0x00, 0x6c, 0xc5, 0x0e, 0xc0, 0x6c, 0xcb, 0xca,
- 0xa8, 0x74, 0x0f, 0x9f, 0xc8, 0xc8, 0x40, 0x9c, 0x0f, 0xb6, 0x48, 0x8d,
- 0x0f, 0xab, 0x73, 0x00, 0x6c, 0xd5, 0xc6, 0xc5, 0x9b, 0x0f, 0xd4, 0x18,
- 0xcb, 0x98, 0x67, 0x0f, 0x9c, 0xa8, 0x47, 0x02, 0x90, 0xc0, 0x6c, 0xe2,
- 0x4d, 0x7b, 0x0c, 0x40, 0x6d, 0x6c, 0x4b, 0x96, 0x41, 0xc0, 0x6d, 0x80,
- 0xc4, 0xae, 0x5b, 0x0f, 0x99, 0xe1, 0xc5, 0xdc, 0xcd, 0x0f, 0xa1, 0x08,
- 0x42, 0x00, 0xe0, 0xc0, 0x6d, 0xa7, 0xc9, 0x97, 0x1f, 0x01, 0x21, 0x10,
- 0x00, 0xc0, 0x6d, 0xaf, 0xc7, 0xc8, 0x87, 0x0f, 0xd6, 0x80, 0xc2, 0x00,
- 0x41, 0x0f, 0xd4, 0xa9, 0x8d, 0x0f, 0x9f, 0x33, 0x00, 0x6d, 0xbb, 0xc3,
- 0x0e, 0x3a, 0x0f, 0x9a, 0x60, 0x0e, 0xc0, 0x6d, 0xc1, 0x46, 0x76, 0x06,
- 0x40, 0x6d, 0xd1, 0xc3, 0x02, 0x5c, 0x0f, 0xcf, 0xd3, 0x00, 0x6e, 0x07,
- 0xc5, 0xd6, 0x47, 0x01, 0x35, 0xf1, 0x47, 0xcb, 0x74, 0x40, 0x6e, 0x0d,
- 0xc3, 0x00, 0x63, 0x0f, 0xcd, 0x09, 0xde, 0x0f, 0x42, 0x0f, 0x9f, 0xc0,
- 0x00, 0x40, 0x6e, 0x1f, 0x47, 0x02, 0x90, 0xc0, 0x6e, 0x37, 0x42, 0x00,
- 0x79, 0xc0, 0x6e, 0x7c, 0xc7, 0xc2, 0x13, 0x05, 0x37, 0x91, 0xc9, 0x11,
- 0xdc, 0x05, 0x37, 0x99, 0xc9, 0xac, 0xa6, 0x05, 0x37, 0xb1, 0xcd, 0x2c,
- 0x41, 0x05, 0x37, 0xb8, 0x0d, 0xc0, 0x6e, 0x86, 0xcb, 0x95, 0x86, 0x0f,
- 0xa1, 0x59, 0xc2, 0x00, 0xb7, 0x0f, 0xca, 0x98, 0x43, 0x40, 0xc2, 0xc0,
- 0x6e, 0x94, 0xc4, 0xd2, 0x93, 0x0f, 0xa8, 0x59, 0x8a, 0x0f, 0xb6, 0x02,
- 0x00, 0x6e, 0xb0, 0x00, 0xc0, 0x6e, 0xb6, 0xc8, 0xbf, 0x85, 0x0f, 0xa4,
- 0x40, 0xca, 0xa2, 0x5c, 0x0f, 0xb6, 0x21, 0xcb, 0x8e, 0xfe, 0x0f, 0xca,
- 0xb1, 0xc2, 0x01, 0x63, 0x0f, 0xcb, 0x78, 0xc9, 0xb3, 0x66, 0x01, 0x05,
- 0xf9, 0xc7, 0x85, 0x1c, 0x0f, 0xd7, 0x30, 0xc5, 0xdd, 0x59, 0x0f, 0x9d,
- 0x89, 0xc6, 0xd4, 0x1d, 0x0f, 0xcf, 0x10, 0xca, 0x9f, 0xa0, 0x0f, 0x9c,
- 0x11, 0x86, 0x0f, 0xa1, 0x30, 0xcf, 0x65, 0x33, 0x01, 0x4f, 0xc9, 0xc7,
- 0x27, 0xf8, 0x01, 0x4f, 0xc0, 0x87, 0x0f, 0xb5, 0x91, 0xc3, 0x1f, 0x24,
- 0x0f, 0xb5, 0xa0, 0xc3, 0x00, 0x7b, 0x0f, 0xcd, 0x59, 0x44, 0x8d, 0xeb,
- 0xc0, 0x6e, 0xc2, 0xca, 0x9e, 0x88, 0x0f, 0xa4, 0x99, 0xd0, 0x5b, 0xa2,
- 0x0f, 0x9e, 0xb1, 0x14, 0xc0, 0x6e, 0xda, 0xc2, 0x07, 0x01, 0x0f, 0xd6,
- 0xc0, 0xc9, 0xb4, 0x86, 0x01, 0x19, 0x63, 0x00, 0x6e, 0xe6, 0x45, 0xb0,
- 0x6d, 0xc0, 0x6e, 0xec, 0x16, 0x40, 0x6f, 0x1e, 0x00, 0xc0, 0x6f, 0x2a,
- 0xc8, 0xbe, 0x9d, 0x0f, 0xb6, 0x70, 0xc4, 0x00, 0x4b, 0x01, 0x13, 0x61,
- 0xc7, 0x00, 0x70, 0x01, 0x09, 0xb0, 0xc5, 0xaf, 0x0d, 0x0f, 0x9b, 0xd1,
- 0xc3, 0x0f, 0x59, 0x0f, 0xd5, 0x90, 0xc3, 0xe7, 0x99, 0x0f, 0xcc, 0x58,
- 0xc5, 0x05, 0x2f, 0x0f, 0xb4, 0x79, 0x16, 0x40, 0x6f, 0x3c, 0xc4, 0xe0,
- 0xe3, 0x01, 0x2e, 0x71, 0xc2, 0x00, 0x49, 0x01, 0x01, 0x13, 0x00, 0x6f,
- 0x48, 0xc4, 0x2a, 0xc6, 0x0f, 0xab, 0x5a, 0x00, 0x6f, 0x4e, 0x46, 0x76,
- 0x06, 0x40, 0x6f, 0x54, 0x4b, 0x6f, 0xcc, 0xc0, 0x6f, 0x6c, 0x47, 0x02,
- 0x90, 0x40, 0x6f, 0x74, 0xc4, 0x4e, 0x8a, 0x0f, 0xce, 0x59, 0x95, 0x0f,
- 0xd7, 0x38, 0x06, 0xc0, 0x6f, 0xd2, 0x42, 0x00, 0x07, 0xc0, 0x6f, 0xde,
- 0xc2, 0x00, 0xe0, 0x0f, 0xcf, 0x88, 0x0b, 0xc0, 0x6f, 0xe8, 0x44, 0xe1,
- 0x67, 0x40, 0x6f, 0xf2, 0x44, 0x03, 0xe6, 0xc0, 0x70, 0x12, 0xc8, 0xbe,
- 0x55, 0x0f, 0xc8, 0x71, 0xc5, 0xd5, 0xa2, 0x0f, 0xcb, 0x31, 0xc2, 0x00,
- 0x3a, 0x0f, 0xcf, 0xc8, 0x03, 0xc0, 0x70, 0x24, 0xc2, 0x00, 0x7b, 0x00,
- 0x16, 0xc0, 0x09, 0xc0, 0x70, 0x34, 0x0d, 0xc0, 0x70, 0x46, 0x03, 0xc0,
- 0x70, 0x69, 0x15, 0xc0, 0x70, 0x7b, 0x06, 0xc0, 0x70, 0x98, 0x1b, 0xc0,
- 0x70, 0xa8, 0x08, 0xc0, 0x70, 0xb2, 0x42, 0x11, 0xd4, 0xc0, 0x70, 0xc4,
- 0x0b, 0xc0, 0x70, 0xd6, 0x07, 0xc0, 0x70, 0xe6, 0x0f, 0xc0, 0x71, 0x08,
- 0x16, 0xc0, 0x71, 0x14, 0x0e, 0xc0, 0x71, 0x26, 0x11, 0xc0, 0x71, 0x30,
- 0x12, 0xc0, 0x71, 0x48, 0xcc, 0x83, 0xb0, 0x0e, 0x83, 0x51, 0x42, 0x04,
- 0x41, 0xc0, 0x71, 0x5e, 0xc4, 0xe5, 0xcb, 0x0e, 0x82, 0x01, 0x14, 0x40,
- 0x71, 0x6a, 0xc4, 0x22, 0x71, 0x08, 0xe3, 0x13, 0x00, 0x71, 0x76, 0xc5,
- 0x01, 0xdb, 0x08, 0xe3, 0x0b, 0x00, 0x71, 0x7c, 0x15, 0xc0, 0x71, 0x80,
- 0x08, 0xc0, 0x71, 0x92, 0x16, 0xc0, 0x71, 0x9a, 0xc3, 0x01, 0xb4, 0x08,
- 0xe2, 0xd0, 0x45, 0x06, 0x98, 0xc0, 0x71, 0xa8, 0xcb, 0x95, 0x0d, 0x08,
- 0xe2, 0x11, 0xc4, 0x1c, 0xd0, 0x08, 0xe2, 0x08, 0x9f, 0x08, 0xe2, 0x29,
- 0x9e, 0x08, 0xe2, 0x20, 0x03, 0xc0, 0x71, 0xcc, 0x42, 0x03, 0x32, 0xc0,
- 0x71, 0xd8, 0xcb, 0x1e, 0x17, 0x08, 0xe1, 0xe0, 0x03, 0xc0, 0x71, 0xe4,
- 0x91, 0x08, 0xe1, 0xd1, 0x87, 0x08, 0xe1, 0xc1, 0x48, 0xac, 0xc1, 0xc0,
- 0x71, 0xf0, 0x97, 0x08, 0xe1, 0x93, 0x00, 0x71, 0xfb, 0x8b, 0x08, 0xe1,
- 0x82, 0x00, 0x71, 0xff, 0xc2, 0x00, 0xa4, 0x08, 0xe1, 0x71, 0x15, 0xc0,
- 0x72, 0x03, 0x18, 0xc0, 0x72, 0x13, 0xc2, 0x00, 0xc7, 0x08, 0xe1, 0x49,
- 0xc2, 0x02, 0x59, 0x08, 0xe1, 0x41, 0xc2, 0x1d, 0x5f, 0x08, 0xe1, 0x39,
- 0xc2, 0x00, 0xad, 0x08, 0xe1, 0x31, 0x04, 0xc0, 0x72, 0x1d, 0x12, 0xc0,
- 0x72, 0x27, 0x10, 0xc0, 0x72, 0x31, 0x06, 0xc0, 0x72, 0x47, 0x16, 0xc0,
- 0x72, 0x55, 0x0c, 0xc0, 0x72, 0x63, 0x05, 0xc0, 0x72, 0x6d, 0x09, 0xc0,
- 0x72, 0x77, 0x0d, 0xc0, 0x72, 0x81, 0x83, 0x08, 0xe0, 0x03, 0x00, 0x72,
- 0x8b, 0x91, 0x08, 0xe0, 0x61, 0x87, 0x08, 0xe0, 0x51, 0x97, 0x08, 0xe0,
- 0x23, 0x00, 0x72, 0x97, 0x8b, 0x08, 0xe0, 0x12, 0x00, 0x72, 0x9b, 0x43,
- 0x00, 0x28, 0xc0, 0x72, 0x9f, 0x00, 0x40, 0x72, 0xcd, 0x45, 0x01, 0xf7,
- 0xc0, 0x72, 0xec, 0x44, 0x01, 0x1e, 0xc0, 0x72, 0xf8, 0x06, 0x40, 0x73,
- 0x02, 0xdb, 0x18, 0xe2, 0x01, 0x3f, 0x00, 0xc2, 0x00, 0xbf, 0x01, 0x11,
- 0x43, 0x00, 0x73, 0x14, 0xc3, 0x00, 0x57, 0x01, 0x11, 0x3a, 0x00, 0x73,
- 0x18, 0xcd, 0x77, 0x23, 0x0f, 0xa8, 0x79, 0x4a, 0x9b, 0xe0, 0x40, 0x73,
- 0x1e, 0xc6, 0x02, 0x90, 0x0f, 0xa4, 0x61, 0xc5, 0xde, 0xe4, 0x0f, 0x9f,
- 0x48, 0xcb, 0x98, 0x46, 0x0f, 0xbb, 0xa1, 0xca, 0x9b, 0x90, 0x0f, 0xcf,
- 0xa1, 0xc2, 0x05, 0xd5, 0x0f, 0xd5, 0xb8, 0x00, 0xc0, 0x73, 0x2a, 0x46,
- 0x01, 0x09, 0xc0, 0x73, 0x79, 0x02, 0x40, 0x73, 0xbe, 0xc7, 0xc2, 0x9f,
- 0x0f, 0xcb, 0x61, 0xd3, 0x46, 0x22, 0x0f, 0x9a, 0x18, 0xc4, 0xe5, 0xdb,
- 0x0f, 0xa0, 0x30, 0x4b, 0x37, 0x03, 0xc0, 0x73, 0xda, 0xd8, 0x21, 0xc0,
- 0x01, 0x16, 0xd1, 0x45, 0x00, 0x6c, 0xc0, 0x73, 0xe6, 0x11, 0xc0, 0x73,
- 0xf8, 0x03, 0xc0, 0x74, 0x04, 0xc4, 0x00, 0xcb, 0x00, 0x01, 0xe1, 0xcf,
- 0x68, 0x99, 0x01, 0x55, 0x32, 0x00, 0x74, 0x10, 0x47, 0x02, 0x90, 0xc0,
- 0x74, 0x16, 0x46, 0x06, 0x97, 0xc0, 0x74, 0x6e, 0x4c, 0x10, 0x7e, 0xc0,
- 0x74, 0x92, 0x15, 0xc0, 0x74, 0xa2, 0x4f, 0x2e, 0xbf, 0xc0, 0x74, 0xae,
- 0x4b, 0x6f, 0xcc, 0x40, 0x74, 0xd0, 0x42, 0x01, 0xde, 0xc0, 0x74, 0xec,
- 0xd6, 0x25, 0x6a, 0x0f, 0xb3, 0x90, 0x47, 0x02, 0x90, 0xc0, 0x74, 0xf9,
- 0x4c, 0x10, 0x7e, 0x40, 0x75, 0x6f, 0x07, 0xc0, 0x75, 0x7b, 0x0d, 0x40,
- 0x75, 0x85, 0x43, 0xbc, 0x32, 0xc0, 0x75, 0x91, 0xd3, 0x46, 0xe0, 0x01,
- 0x96, 0x78, 0xc4, 0x1e, 0x80, 0x0f, 0xa4, 0x20, 0xcf, 0x68, 0xa8, 0x08,
- 0x49, 0xf9, 0x47, 0x02, 0x90, 0x40, 0x75, 0xb3, 0x83, 0x08, 0x14, 0x03,
- 0x00, 0x76, 0x15, 0x87, 0x08, 0x14, 0x0b, 0x00, 0x76, 0x19, 0x84, 0x08,
- 0x14, 0x13, 0x00, 0x76, 0x1d, 0x89, 0x08, 0x14, 0x21, 0x86, 0x08, 0x14,
- 0x29, 0x8b, 0x08, 0x14, 0x31, 0x99, 0x08, 0x14, 0x39, 0x9c, 0x08, 0x14,
- 0x41, 0x96, 0x08, 0x14, 0xbb, 0x00, 0x76, 0x21, 0x8c, 0x08, 0x14, 0x51,
- 0x8d, 0x08, 0x14, 0x5b, 0x00, 0x76, 0x29, 0x93, 0x08, 0x14, 0x61, 0x8e,
- 0x08, 0x14, 0x69, 0x8f, 0x08, 0x14, 0x73, 0x00, 0x76, 0x2d, 0x90, 0x08,
- 0x14, 0x7b, 0x00, 0x76, 0x31, 0x97, 0x08, 0x14, 0x91, 0x92, 0x08, 0x14,
- 0x99, 0x94, 0x08, 0x14, 0xa9, 0x95, 0x08, 0x14, 0xb1, 0x8a, 0x08, 0x14,
- 0xd9, 0x9a, 0x08, 0x14, 0xe0, 0x42, 0x00, 0x63, 0xc0, 0x76, 0x35, 0xc6,
- 0x91, 0x60, 0x01, 0x05, 0xf0, 0x15, 0xc0, 0x76, 0x42, 0x47, 0x02, 0x90,
- 0xc0, 0x76, 0x4e, 0x05, 0xc0, 0x76, 0x9e, 0x52, 0x49, 0x5c, 0x40, 0x76,
- 0xaa, 0x00, 0x40, 0x76, 0xc0, 0xc2, 0x01, 0x63, 0x0f, 0x9f, 0xb9, 0xc5,
- 0xd5, 0x89, 0x0f, 0xcb, 0xe0, 0xc8, 0xb8, 0xf5, 0x0f, 0xa0, 0xf1, 0xc3,
- 0x06, 0x25, 0x0f, 0xd4, 0xe0, 0x47, 0x02, 0x90, 0xc0, 0x76, 0xcc, 0xc8,
- 0x24, 0x10, 0x00, 0x75, 0x79, 0x4b, 0x6f, 0xcc, 0xc0, 0x77, 0x23, 0x15,
- 0xc0, 0x77, 0x50, 0xc5, 0xda, 0x8e, 0x00, 0x76, 0x31, 0x49, 0xb5, 0x82,
- 0xc0, 0x77, 0x5c, 0xd1, 0x54, 0x1a, 0x00, 0x76, 0x61, 0xc9, 0xac, 0xaf,
- 0x00, 0x76, 0x69, 0xc8, 0xb9, 0x35, 0x00, 0x76, 0x71, 0x46, 0x06, 0x97,
- 0xc0, 0x77, 0x6c, 0x43, 0x68, 0x16, 0x40, 0x77, 0x90, 0xca, 0x9d, 0x34,
- 0x0f, 0xbb, 0xb1, 0xc2, 0x0c, 0xf3, 0x0f, 0xd6, 0x08, 0x46, 0x02, 0x12,
- 0xc0, 0x77, 0x9c, 0x45, 0x02, 0x4d, 0xc0, 0x77, 0xc4, 0x44, 0x00, 0x57,
- 0xc0, 0x77, 0xe0, 0x45, 0x03, 0x2b, 0xc0, 0x77, 0xea, 0xce, 0x6f, 0x3d,
- 0x01, 0x38, 0x09, 0x44, 0x01, 0xb4, 0xc0, 0x78, 0x05, 0x16, 0xc0, 0x78,
- 0x11, 0xd2, 0x49, 0x26, 0x0f, 0xdc, 0x21, 0xd3, 0x40, 0x0c, 0x0f, 0xdc,
- 0x30, 0x46, 0x02, 0x31, 0xc0, 0x78, 0x1d, 0x16, 0xc0, 0x78, 0x2f, 0x15,
- 0xc0, 0x78, 0x3b, 0xd0, 0x58, 0x92, 0x0f, 0xc1, 0xe9, 0xd1, 0x51, 0x50,
- 0x0f, 0xc1, 0xa9, 0x03, 0xc0, 0x78, 0x47, 0xcf, 0x64, 0x9d, 0x01, 0x3f,
- 0x81, 0x06, 0xc0, 0x78, 0x56, 0xcd, 0x79, 0x2b, 0x01, 0x0e, 0x41, 0x0a,
- 0xc0, 0x78, 0x62, 0xc6, 0xcb, 0xe9, 0x0f, 0xb3, 0x69, 0x46, 0x05, 0xef,
- 0x40, 0x78, 0x6e, 0x46, 0x04, 0x73, 0xc0, 0x78, 0x7a, 0x4e, 0x73, 0x1f,
- 0xc0, 0x78, 0x86, 0xcc, 0x47, 0x7c, 0x0f, 0xa9, 0xd1, 0xd1, 0x55, 0x2a,
- 0x0f, 0xb7, 0x31, 0xc8, 0x2e, 0x33, 0x0f, 0xb7, 0x38, 0xc4, 0x36, 0xab,
- 0x01, 0x15, 0x2b, 0x00, 0x78, 0x92, 0x45, 0x01, 0x0f, 0xc0, 0x78, 0x98,
- 0xd7, 0x26, 0xb6, 0x01, 0x17, 0x81, 0x45, 0x05, 0x63, 0xc0, 0x78, 0xa7,
- 0xc9, 0xb4, 0xc5, 0x01, 0x4b, 0xf1, 0x45, 0x02, 0xdd, 0x40, 0x78, 0xce,
- 0xc9, 0xb3, 0xae, 0x0f, 0xcc, 0x21, 0xd7, 0x20, 0xce, 0x01, 0x33, 0x91,
- 0xc2, 0x00, 0xb7, 0x01, 0x11, 0x53, 0x00, 0x78, 0xda, 0x16, 0x40, 0x78,
- 0xde, 0xc8, 0xa6, 0xdc, 0x01, 0x1c, 0x61, 0xc5, 0xb8, 0xf0, 0x01, 0x01,
- 0xf8, 0xc9, 0xb1, 0x9b, 0x01, 0x37, 0x89, 0xcf, 0x6a, 0x10, 0x01, 0x30,
- 0xa0, 0x03, 0xc0, 0x78, 0xea, 0xc4, 0x96, 0x57, 0x08, 0x1c, 0x09, 0x09,
- 0xc0, 0x78, 0xf6, 0x0d, 0xc0, 0x79, 0x02, 0x06, 0xc0, 0x79, 0x0e, 0xc2,
- 0x00, 0xf6, 0x08, 0x1c, 0x2b, 0x00, 0x79, 0x1a, 0xc2, 0x01, 0x47, 0x08,
- 0x1c, 0x31, 0x1c, 0xc0, 0x79, 0x20, 0x16, 0xc0, 0x79, 0x2a, 0xc3, 0x4b,
- 0x98, 0x08, 0x1c, 0x51, 0x15, 0xc0, 0x79, 0x3a, 0xc5, 0xda, 0xd9, 0x08,
- 0x1c, 0x69, 0xc3, 0x0b, 0x0e, 0x08, 0x1c, 0x71, 0xc3, 0x1f, 0xd8, 0x08,
- 0x1c, 0x81, 0xc2, 0x01, 0xbc, 0x08, 0x1c, 0xa1, 0xc4, 0xe5, 0x57, 0x08,
- 0x1c, 0xb1, 0xc5, 0xd9, 0x3a, 0x08, 0x1c, 0xb9, 0x8b, 0x08, 0x1c, 0xd9,
- 0x97, 0x08, 0x1c, 0xe0, 0x43, 0x10, 0x3a, 0xc0, 0x79, 0x4a, 0x06, 0xc0,
- 0x79, 0xa6, 0x14, 0x40, 0x79, 0xb5, 0xc7, 0xc7, 0x37, 0x0f, 0xb4, 0x09,
- 0x0f, 0xc0, 0x79, 0xc1, 0xd7, 0x28, 0x3d, 0x01, 0x5f, 0xf8, 0x14, 0xc0,
- 0x79, 0xcd, 0x0a, 0xc0, 0x79, 0xeb, 0x10, 0xc0, 0x7a, 0x09, 0x0d, 0xc0,
- 0x7a, 0x2d, 0x42, 0x29, 0xae, 0xc0, 0x7a, 0x4b, 0x42, 0x00, 0x88, 0xc0,
- 0x7a, 0x57, 0x42, 0x32, 0xf0, 0xc0, 0x7a, 0x6f, 0x42, 0x30, 0x38, 0xc0,
- 0x7a, 0x83, 0x42, 0x14, 0x4a, 0xc0, 0x7a, 0x93, 0x19, 0xc0, 0x7a, 0xa5,
- 0x1b, 0xc0, 0x7a, 0xbd, 0x0f, 0xc0, 0x7a, 0xcf, 0x16, 0xc0, 0x7a, 0xed,
- 0x15, 0x40, 0x7b, 0x0b, 0xd7, 0x27, 0xb3, 0x01, 0x15, 0xc9, 0x84, 0x0f,
- 0x99, 0xf8, 0x0e, 0xc0, 0x7b, 0x29, 0x12, 0xc0, 0x7b, 0x35, 0xcc, 0x88,
- 0xfc, 0x00, 0x2f, 0x79, 0x45, 0x00, 0xba, 0xc0, 0x7b, 0x41, 0x47, 0x2a,
- 0x9e, 0x40, 0x7b, 0x53, 0x16, 0xc0, 0x7b, 0x9d, 0x06, 0xc0, 0x7b, 0xa9,
- 0xce, 0x6e, 0x87, 0x02, 0x6e, 0x19, 0x19, 0xc0, 0x7b, 0xbd, 0x42, 0x00,
- 0x79, 0xc0, 0x7b, 0xc9, 0xd0, 0x5a, 0x92, 0x02, 0x6e, 0x39, 0x15, 0xc0,
- 0x7b, 0xd3, 0x12, 0xc0, 0x7b, 0xe5, 0x08, 0xc0, 0x7b, 0xf7, 0x09, 0xc0,
- 0x7c, 0x03, 0x42, 0x00, 0x82, 0xc0, 0x7c, 0x0d, 0xca, 0xa3, 0x24, 0x02,
- 0x6e, 0x79, 0x03, 0xc0, 0x7c, 0x19, 0x04, 0xc0, 0x7c, 0x2b, 0x42, 0x00,
- 0x9c, 0xc0, 0x7c, 0x3d, 0x42, 0x00, 0x34, 0xc0, 0x7c, 0x47, 0x11, 0xc0,
- 0x7c, 0x57, 0xca, 0xa5, 0x2c, 0x02, 0x6f, 0xd8, 0x48, 0x00, 0xf9, 0xc0,
- 0x7c, 0x63, 0xc2, 0x02, 0x60, 0x0f, 0xa0, 0x72, 0x00, 0x7c, 0x89, 0x00,
- 0xc0, 0x7c, 0x8d, 0xc2, 0x01, 0x63, 0x0f, 0x9f, 0x40, 0xc6, 0xc9, 0xbc,
- 0x01, 0x18, 0xdb, 0x00, 0x7c, 0xa5, 0xc2, 0x02, 0x60, 0x01, 0x18, 0x12,
- 0x00, 0x7c, 0xab, 0xd9, 0x1e, 0xf1, 0x0f, 0xb3, 0x43, 0x00, 0x7c, 0xaf,
- 0x87, 0x0f, 0xab, 0x98, 0xc4, 0x3d, 0xa4, 0x0f, 0x9b, 0x79, 0xc3, 0xaf,
- 0x0a, 0x0f, 0xa0, 0xe8, 0x00, 0xc0, 0x7c, 0xb5, 0xc3, 0x25, 0x85, 0x0f,
- 0xa4, 0x38, 0x15, 0xc0, 0x7c, 0xc1, 0xc3, 0x2f, 0x47, 0x0f, 0xa9, 0x43,
- 0x00, 0x7c, 0xcb, 0xc6, 0xd2, 0x19, 0x0f, 0x9a, 0xa0, 0x06, 0xc0, 0x7c,
- 0xd1, 0x4d, 0x7c, 0x03, 0xc0, 0x7c, 0xe3, 0x45, 0xd7, 0x3c, 0xc0, 0x7d,
- 0x01, 0x09, 0x40, 0x7d, 0x13, 0x44, 0xc9, 0x9a, 0xc0, 0x7d, 0x1f, 0xcb,
- 0x97, 0x75, 0x0f, 0xa1, 0x18, 0x4c, 0x1b, 0x57, 0xc0, 0x7d, 0x2b, 0x44,
- 0x01, 0x1e, 0xc0, 0x7d, 0x37, 0x45, 0x01, 0xf7, 0xc0, 0x7d, 0x43, 0x48,
- 0xb6, 0xed, 0xc0, 0x7d, 0x4f, 0x47, 0xca, 0x86, 0xc0, 0x7d, 0x59, 0xd4,
- 0x3d, 0x7d, 0x07, 0xff, 0x41, 0xcd, 0x1b, 0x98, 0x07, 0xff, 0x51, 0xcf,
- 0x13, 0x0f, 0x07, 0xff, 0x61, 0xcc, 0x0d, 0x90, 0x07, 0xff, 0x69, 0xcc,
- 0x0d, 0x80, 0x07, 0xff, 0x70, 0x02, 0xc0, 0x7d, 0x65, 0x00, 0x40, 0x7d,
- 0x74, 0x47, 0x02, 0x90, 0xc0, 0x7d, 0x80, 0xce, 0x1b, 0x63, 0x01, 0x84,
- 0xe9, 0xd5, 0x34, 0x0f, 0x01, 0x84, 0xf1, 0xcc, 0x83, 0xec, 0x01, 0x84,
- 0xf8, 0xc3, 0x07, 0xd9, 0x01, 0x00, 0x83, 0x00, 0x7d, 0xd8, 0xc9, 0xac,
- 0x79, 0x01, 0x70, 0x90, 0x42, 0x00, 0x28, 0xc0, 0x7d, 0xe8, 0x47, 0xca,
- 0xb0, 0x40, 0x7d, 0xf4, 0x46, 0x07, 0x11, 0xc0, 0x7e, 0x06, 0xc7, 0x00,
- 0x71, 0x0f, 0xa9, 0x19, 0xc7, 0xc9, 0x13, 0x0f, 0xa9, 0x10, 0x14, 0xc0,
- 0x7e, 0x18, 0xc4, 0x1e, 0xe4, 0x01, 0x11, 0x5a, 0x00, 0x7e, 0x37, 0xcd,
- 0x7d, 0x14, 0x01, 0x1c, 0x01, 0x4d, 0x7b, 0xb5, 0x40, 0x7e, 0x3b, 0xc5,
- 0x65, 0xa6, 0x01, 0x10, 0xf3, 0x00, 0x7e, 0x47, 0x49, 0x56, 0xb3, 0x40,
- 0x7e, 0x4d, 0x42, 0x00, 0x9c, 0xc0, 0x7e, 0x57, 0x42, 0x00, 0x3a, 0x40,
- 0x7e, 0x63, 0x0b, 0xc0, 0x7e, 0x6f, 0xc2, 0x02, 0x6b, 0x00, 0x04, 0x22,
- 0x00, 0x7e, 0x7b, 0xd3, 0x41, 0x62, 0x01, 0x03, 0x61, 0xd2, 0x4a, 0xa0,
- 0x01, 0x03, 0x50, 0xcd, 0x7b, 0x8e, 0x0f, 0xd5, 0x51, 0x44, 0x03, 0x40,
- 0x40, 0x7e, 0x81, 0x16, 0xc0, 0x7e, 0x90, 0x42, 0x00, 0x06, 0xc0, 0x7e,
- 0x9c, 0xc5, 0x40, 0xc5, 0x01, 0x80, 0x01, 0x05, 0xc0, 0x7e, 0xa8, 0xc9,
- 0x11, 0xdc, 0x01, 0x80, 0x11, 0xce, 0x1b, 0x63, 0x01, 0x80, 0x29, 0xcb,
- 0x90, 0xe2, 0x01, 0x80, 0x39, 0xcf, 0x6a, 0x6a, 0x01, 0x81, 0x51, 0xd0,
- 0x58, 0xa2, 0x01, 0x81, 0x59, 0xd2, 0x49, 0xda, 0x01, 0x81, 0x69, 0xd3,
- 0x43, 0x3d, 0x01, 0x81, 0xf1, 0xcf, 0x66, 0x8c, 0x01, 0x81, 0xf9, 0x4b,
- 0x4f, 0xf1, 0x40, 0x7e, 0xb4, 0xc4, 0x58, 0xd3, 0x0f, 0x9b, 0x41, 0xc3,
- 0xb2, 0x35, 0x0f, 0xce, 0x50, 0xda, 0x1a, 0xd5, 0x01, 0x12, 0x98, 0x4e,
- 0x6d, 0x29, 0x40, 0x7e, 0xea, 0x8f, 0x0f, 0xd5, 0x89, 0x42, 0x00, 0x59,
- 0xc0, 0x7e, 0xfc, 0xc6, 0xd0, 0xc3, 0x0f, 0xaf, 0xd1, 0xc9, 0xb5, 0x94,
- 0x0f, 0xb0, 0xf8, 0xc2, 0x00, 0x57, 0x0f, 0xa3, 0x4b, 0x00, 0x7f, 0x08,
- 0xca, 0xa2, 0xd4, 0x0f, 0xb5, 0xd0, 0x00, 0xc0, 0x7f, 0x14, 0xdb, 0x17,
- 0x9e, 0x01, 0x3d, 0x98, 0xcc, 0x8a, 0xc4, 0x01, 0x33, 0xf9, 0xca, 0x9e,
- 0x2e, 0x01, 0x31, 0xc0, 0x46, 0x1b, 0xc6, 0xc0, 0x7f, 0x66, 0x46, 0x07,
- 0xdd, 0xc0, 0x7f, 0x72, 0x4a, 0x03, 0x68, 0xc0, 0x7f, 0x7e, 0x4b, 0x01,
- 0xf7, 0xc0, 0x7f, 0x9c, 0x4a, 0x01, 0x6b, 0xc0, 0x7f, 0xba, 0x48, 0x08,
- 0xca, 0x40, 0x7f, 0xd8, 0xca, 0x9e, 0x6a, 0x0f, 0xad, 0x71, 0xc4, 0x0a,
- 0x4a, 0x0f, 0xb6, 0xe0, 0x06, 0xc0, 0x7f, 0xf6, 0xc7, 0xc2, 0x7c, 0x0f,
- 0x9b, 0xb9, 0xc9, 0xa0, 0x91, 0x0f, 0xb0, 0x49, 0x89, 0x0f, 0xd5, 0xe8,
- 0x42, 0x00, 0x28, 0xc0, 0x80, 0x00, 0xc2, 0x11, 0xd4, 0x01, 0x18, 0xd0,
- 0x44, 0xd0, 0x27, 0xc0, 0x80, 0x0a, 0x44, 0x00, 0x34, 0x40, 0x80, 0x22,
- 0x49, 0xae, 0x17, 0xc0, 0x80, 0x2e, 0xc9, 0xac, 0x31, 0x01, 0x35, 0x00,
- 0x42, 0x00, 0x2e, 0xc0, 0x80, 0x4c, 0x44, 0x00, 0x34, 0xc0, 0x80, 0x5c,
- 0x42, 0x00, 0x55, 0x40, 0x80, 0x6e, 0xd3, 0x43, 0xfb, 0x0f, 0x98, 0xa1,
- 0xd4, 0x3d, 0x05, 0x0f, 0x98, 0x90, 0xda, 0x12, 0xe8, 0x01, 0x3d, 0xe1,
- 0xc4, 0x03, 0xf0, 0x0f, 0xa4, 0x90, 0xda, 0x1b, 0xd9, 0x01, 0x08, 0xc1,
- 0xca, 0x9f, 0xbe, 0x0f, 0x9e, 0x58, 0xc4, 0x00, 0x67, 0x0f, 0xb1, 0x49,
- 0xc8, 0x19, 0xb7, 0x0f, 0xb2, 0x00, 0xcb, 0x99, 0xd2, 0x01, 0x12, 0x01,
- 0xc3, 0x20, 0x38, 0x0f, 0xa9, 0x39, 0xc6, 0xd1, 0x2f, 0x0f, 0xc9, 0xe0,
- 0x44, 0x00, 0x34, 0x40, 0x80, 0x7a, 0xc2, 0x00, 0x9f, 0x0f, 0xd4, 0x41,
- 0xc9, 0x88, 0x7b, 0x0f, 0xb1, 0x98, 0xc5, 0xdb, 0xb0, 0x0f, 0xcd, 0x49,
- 0x16, 0xc0, 0x80, 0x8c, 0xc9, 0xb3, 0x81, 0x01, 0x37, 0x98, 0xc9, 0x1d,
- 0x4f, 0x01, 0x3b, 0x31, 0xc3, 0x01, 0xd3, 0x01, 0x34, 0xc3, 0x00, 0x80,
- 0x9e, 0xc8, 0x37, 0xa3, 0x0f, 0xa5, 0xf0, 0xc9, 0xb6, 0x48, 0x01, 0x34,
- 0xe1, 0xca, 0xa2, 0xfc, 0x0f, 0xa5, 0x50, 0xcc, 0x85, 0x90, 0x0f, 0xd5,
- 0x69, 0xc2, 0x00, 0x28, 0x0f, 0xae, 0x00, 0x14, 0xc0, 0x80, 0xa4, 0xc5,
- 0x04, 0x6a, 0x01, 0x37, 0x90, 0xc3, 0x57, 0xf0, 0x01, 0x15, 0x49, 0xc4,
- 0x63, 0xaf, 0x01, 0x10, 0x01, 0x0d, 0xc0, 0x80, 0xb4, 0xc6, 0xbc, 0xff,
- 0x00, 0x00, 0x61, 0xcb, 0x92, 0xbb, 0x0f, 0xcb, 0x00, 0xc6, 0xb8, 0x87,
- 0x0f, 0xa3, 0x18, 0xc2, 0x30, 0x5d, 0x0f, 0x98, 0x08, 0x42, 0x00, 0x55,
- 0xc0, 0x80, 0xc9, 0xcb, 0x94, 0x05, 0x01, 0x09, 0xd9, 0xc4, 0x81, 0xeb,
- 0x0f, 0x9f, 0x68, 0xc7, 0x46, 0x1b, 0x0f, 0xa7, 0x01, 0xc4, 0xd7, 0x60,
- 0x0f, 0xad, 0xb8, 0x0e, 0xc0, 0x80, 0xeb, 0xc4, 0xe3, 0x9f, 0x0f, 0xce,
- 0x30, 0xca, 0x8e, 0x2e, 0x0f, 0xcb, 0xb1, 0x46, 0xce, 0xa7, 0x40, 0x80,
- 0xf7, 0x43, 0x04, 0xe9, 0xc0, 0x81, 0x03, 0xc2, 0x01, 0x07, 0x01, 0x19,
- 0x13, 0x00, 0x81, 0x0f, 0xc6, 0x24, 0x62, 0x0f, 0xa1, 0xc0, 0x46, 0x12,
- 0x9a, 0xc0, 0x81, 0x15, 0x48, 0xbe, 0xdd, 0x40, 0x81, 0x21, 0x00, 0xc0,
- 0x81, 0x33, 0x46, 0x4b, 0x06, 0x40, 0x81, 0x4b, 0xc8, 0xb9, 0x95, 0x01,
- 0x35, 0x89, 0xd1, 0x51, 0x1d, 0x01, 0x03, 0x08, 0x9b, 0x01, 0x37, 0xa1,
- 0xc8, 0xbb, 0x8d, 0x0f, 0x9d, 0x08, 0xc8, 0x1c, 0xef, 0x01, 0x32, 0x01,
- 0xd7, 0x28, 0x26, 0x00, 0x05, 0x50, 0xc9, 0xb5, 0x28, 0x0f, 0xb1, 0x41,
- 0xc4, 0x17, 0xa2, 0x0f, 0xd5, 0xb0, 0x43, 0x13, 0xd8, 0xc0, 0x81, 0xab,
- 0x87, 0x0f, 0xa9, 0x2a, 0x00, 0x81, 0xc0, 0x8a, 0x0f, 0xa0, 0xfb, 0x00,
- 0x81, 0xd2, 0xcd, 0x7d, 0xe4, 0x0f, 0xa2, 0x50, 0xcb, 0x01, 0xbc, 0x01,
- 0x02, 0xc9, 0xc4, 0x00, 0xba, 0x01, 0x71, 0x68, 0xc4, 0x0f, 0x14, 0x01,
- 0x00, 0x91, 0xc5, 0x40, 0xc5, 0x01, 0x00, 0x38, 0x42, 0x00, 0x55, 0xc0,
- 0x81, 0xe4, 0x42, 0x00, 0x3b, 0x40, 0x81, 0xf6, 0xc3, 0x14, 0xd2, 0x0f,
- 0xd5, 0x79, 0x48, 0xbc, 0x75, 0x40, 0x82, 0x02, 0x4c, 0x8a, 0xac, 0xc0,
- 0x82, 0x2a, 0xc6, 0x94, 0x2b, 0x0b, 0x7f, 0x20, 0x46, 0x06, 0x97, 0xc0,
- 0x82, 0x32, 0x45, 0x00, 0xcb, 0xc0, 0x82, 0x56, 0x4b, 0x6f, 0xcc, 0xc0,
- 0x82, 0x68, 0x47, 0x02, 0x90, 0x40, 0x82, 0x82, 0x4b, 0x6f, 0xcc, 0xc0,
- 0x82, 0xe9, 0x47, 0x02, 0x90, 0xc0, 0x83, 0x06, 0x15, 0xc0, 0x83, 0x6d,
- 0xd1, 0x50, 0x40, 0x08, 0x91, 0xe9, 0x06, 0xc0, 0x83, 0x79, 0xce, 0x75,
- 0xb1, 0x08, 0x91, 0xd0, 0x15, 0xc0, 0x83, 0x85, 0x46, 0x06, 0x97, 0xc0,
- 0x83, 0x91, 0xd4, 0x3e, 0x6d, 0x00, 0xbe, 0xd9, 0x46, 0x34, 0xbb, 0xc0,
- 0x83, 0xb5, 0x52, 0x4d, 0x04, 0xc0, 0x83, 0xc1, 0x47, 0x02, 0x90, 0x40,
- 0x83, 0xd7, 0xc7, 0xc9, 0x05, 0x0f, 0xa8, 0xf9, 0xc5, 0x57, 0xfd, 0x01,
- 0x19, 0x42, 0x00, 0x84, 0x21, 0xc4, 0x22, 0x71, 0x0e, 0x96, 0x4b, 0x00,
- 0x84, 0x27, 0x07, 0xc0, 0x84, 0x2d, 0x15, 0xc0, 0x84, 0x3c, 0x08, 0xc0,
- 0x84, 0x4e, 0x16, 0xc0, 0x84, 0x5b, 0xc3, 0x01, 0xb4, 0x0e, 0x96, 0x09,
- 0xc4, 0x15, 0xd3, 0x0e, 0x96, 0x00, 0x00, 0xc0, 0x84, 0x69, 0x4a, 0x0e,
- 0x34, 0x40, 0x84, 0x87, 0x06, 0xc0, 0x84, 0x9f, 0x4c, 0x10, 0x7e, 0xc0,
- 0x84, 0xb1, 0xc9, 0xb3, 0xc9, 0x08, 0xfa, 0x11, 0x45, 0x00, 0xcb, 0xc0,
- 0x84, 0xcf, 0x4b, 0x6f, 0xcc, 0xc0, 0x84, 0xed, 0x47, 0x02, 0x90, 0x40,
- 0x85, 0x0c, 0xcb, 0x90, 0xed, 0x08, 0x85, 0xeb, 0x00, 0x85, 0x7b, 0x4b,
- 0x6f, 0xcc, 0xc0, 0x85, 0x81, 0x06, 0xc0, 0x85, 0xa1, 0x15, 0xc0, 0x85,
- 0xad, 0xd0, 0x5f, 0xc2, 0x08, 0x85, 0xe1, 0xd1, 0x50, 0x40, 0x08, 0x85,
- 0xd9, 0x47, 0x02, 0x90, 0x40, 0x85, 0xb9, 0x45, 0x00, 0xcb, 0xc0, 0x86,
- 0x20, 0x45, 0x0c, 0x46, 0xc0, 0x86, 0x2c, 0x46, 0x34, 0xbb, 0xc0, 0x86,
- 0x3b, 0x47, 0x02, 0x90, 0xc0, 0x86, 0x4d, 0x46, 0x06, 0x97, 0x40, 0x86,
- 0xb3, 0x45, 0xdc, 0xa5, 0xc0, 0x86, 0xd7, 0x09, 0x40, 0x86, 0xf5, 0xc5,
- 0x00, 0xb9, 0x0f, 0xa4, 0x59, 0x44, 0x00, 0x34, 0x40, 0x87, 0x01, 0xc5,
- 0x15, 0x85, 0x0f, 0xd5, 0x48, 0x46, 0x55, 0x2d, 0xc0, 0x87, 0x10, 0xc6,
- 0x40, 0x3f, 0x01, 0x05, 0x29, 0xc6, 0xd1, 0x1d, 0x0f, 0x98, 0x60, 0x47,
- 0x02, 0x90, 0xc0, 0x87, 0x1c, 0x45, 0x0c, 0x46, 0xc0, 0x87, 0x76, 0x4b,
- 0x6f, 0xcc, 0xc0, 0x87, 0x8e, 0x45, 0x00, 0xcb, 0x40, 0x87, 0xd5, 0x00,
- 0xc0, 0x87, 0xe7, 0x11, 0x40, 0x87, 0xf3, 0xd8, 0x24, 0x00, 0x01, 0x17,
- 0x79, 0x44, 0x00, 0x53, 0x40, 0x88, 0x0b, 0x42, 0x05, 0xd5, 0xc0, 0x88,
- 0x17, 0x0b, 0xc0, 0x88, 0x21, 0x9b, 0x01, 0x4f, 0xf8, 0xc3, 0x03, 0xea,
- 0x0f, 0xcd, 0xf1, 0xc3, 0x21, 0x76, 0x0f, 0xcd, 0xf8, 0x0b, 0xc0, 0x88,
- 0x33, 0x49, 0xb4, 0x6b, 0x40, 0x88, 0x3f, 0x91, 0x0f, 0xb4, 0x39, 0x45,
- 0x03, 0x3f, 0x40, 0x88, 0x5f, 0x4b, 0x98, 0x7d, 0xc0, 0x88, 0x7b, 0xd7,
- 0x26, 0x88, 0x0f, 0xaa, 0x71, 0xc8, 0x40, 0x9c, 0x0f, 0xb5, 0xc8, 0xc4,
- 0x60, 0x44, 0x01, 0x31, 0xf9, 0x46, 0xcd, 0x03, 0xc0, 0x88, 0x8d, 0xc6,
- 0x15, 0x35, 0x0f, 0xce, 0xe0, 0x46, 0xcf, 0x9d, 0xc0, 0x88, 0x99, 0xc9,
- 0xae, 0x3b, 0x0f, 0x9a, 0xb0, 0x46, 0x06, 0x97, 0xc0, 0x88, 0xae, 0x03,
- 0xc0, 0x88, 0xd2, 0x18, 0xc0, 0x88, 0xe4, 0x0e, 0xc0, 0x88, 0xf0, 0xd4,
- 0x39, 0xf9, 0x05, 0x57, 0xa1, 0xd8, 0x22, 0xb0, 0x05, 0x57, 0x99, 0x46,
- 0xd2, 0x61, 0x40, 0x88, 0xfc, 0xc2, 0x00, 0xb7, 0x0f, 0x9a, 0x41, 0xc9,
- 0x84, 0xa3, 0x0f, 0xd7, 0x00, 0x42, 0x00, 0xbf, 0xc0, 0x89, 0x08, 0xcd,
- 0x6f, 0x68, 0x0f, 0xc9, 0xb0, 0x42, 0x00, 0x44, 0xc0, 0x89, 0x18, 0xc2,
- 0x00, 0x6e, 0x0f, 0xa2, 0x21, 0xc2, 0x02, 0x60, 0x0f, 0xa0, 0x0a, 0x00,
- 0x89, 0x27, 0x0b, 0xc0, 0x89, 0x2b, 0x07, 0xc0, 0x89, 0x35, 0xcb, 0x8e,
- 0x43, 0x01, 0x50, 0x50, 0x11, 0xc0, 0x89, 0x41, 0x47, 0xc5, 0x0e, 0xc0,
- 0x89, 0x53, 0x42, 0x19, 0x4b, 0xc0, 0x89, 0xa2, 0xc3, 0x1e, 0x7b, 0x0f,
- 0xa0, 0x92, 0x00, 0x89, 0xac, 0xc8, 0xba, 0x0d, 0x0f, 0xaf, 0x81, 0x42,
- 0x00, 0x4e, 0xc0, 0x89, 0xb2, 0xc2, 0x02, 0x60, 0x0f, 0xd6, 0x70, 0x87,
- 0x0f, 0xaa, 0x61, 0xc3, 0x20, 0xb1, 0x0f, 0xcc, 0xf8, 0x00, 0x40, 0x89,
- 0xbe, 0x4a, 0x4a, 0x13, 0xc0, 0x89, 0xca, 0xc7, 0xc1, 0xd4, 0x0f, 0xce,
- 0x48, 0xc4, 0x22, 0x71, 0x0e, 0x97, 0x4b, 0x00, 0x89, 0xf6, 0x07, 0xc0,
- 0x89, 0xfc, 0x15, 0xc0, 0x8a, 0x0b, 0x08, 0xc0, 0x8a, 0x1d, 0x16, 0xc0,
- 0x8a, 0x2a, 0xc3, 0x01, 0xb4, 0x0e, 0x97, 0x09, 0xc4, 0x15, 0xd3, 0x0e,
- 0x97, 0x00, 0x45, 0xdf, 0x57, 0xc0, 0x8a, 0x38, 0xc5, 0xd8, 0xb3, 0x0f,
- 0xbb, 0xe0, 0x4b, 0x98, 0xa9, 0xc0, 0x8a, 0x56, 0xcd, 0x78, 0x41, 0x0f,
- 0x8d, 0x69, 0xd8, 0x25, 0x50, 0x00, 0x05, 0xd1, 0xc6, 0xc6, 0x51, 0x01,
- 0x81, 0xe0, 0x45, 0x42, 0x5c, 0xc0, 0x8a, 0x70, 0xcc, 0x83, 0xf8, 0x01,
- 0x35, 0x69, 0xd1, 0x53, 0x5f, 0x0f, 0xca, 0x59, 0xc4, 0xd3, 0x1a, 0x0f,
- 0xd4, 0x38, 0xca, 0xa1, 0xf8, 0x01, 0x39, 0x01, 0x42, 0x00, 0x55, 0xc0,
- 0x8a, 0x8c, 0x47, 0xb1, 0x5e, 0x40, 0x8a, 0x9e, 0xd6, 0x2e, 0xeb, 0x01,
- 0x37, 0x79, 0xc7, 0xc4, 0x2e, 0x0f, 0x9a, 0x08, 0xc7, 0x66, 0x1c, 0x01,
- 0x05, 0xe1, 0x48, 0xc0, 0xcd, 0xc0, 0x8a, 0xc6, 0x00, 0xc0, 0x8a, 0xe4,
- 0xce, 0x6e, 0x79, 0x0f, 0xab, 0x81, 0x45, 0xd6, 0x97, 0xc0, 0x8a, 0xfc,
- 0xc2, 0x13, 0x4f, 0x0f, 0xcb, 0x69, 0xce, 0x72, 0xbd, 0x0f, 0xcd, 0xe9,
- 0xc6, 0xcc, 0x9d, 0x0f, 0xa2, 0xf0, 0x46, 0xd4, 0xd7, 0xc0, 0x8b, 0x1a,
- 0x4a, 0x9d, 0xe8, 0x40, 0x8b, 0x28, 0x87, 0x0f, 0xce, 0xc9, 0xc3, 0x29,
- 0x8a, 0x0f, 0xcf, 0x91, 0xc7, 0xc7, 0x1b, 0x0f, 0xd4, 0x20, 0x42, 0x02,
- 0x70, 0xc0, 0x8b, 0x6e, 0xc5, 0xdf, 0x1b, 0x0f, 0x9a, 0x20, 0x0b, 0xc0,
- 0x8b, 0x78, 0x44, 0x92, 0xc9, 0x40, 0x8b, 0x8d, 0xcc, 0x00, 0x9b, 0x01,
- 0x13, 0x59, 0xc9, 0x0a, 0x4a, 0x01, 0x13, 0x50, 0xcb, 0x95, 0x0d, 0x0b,
- 0x53, 0x79, 0xc4, 0x1c, 0xd0, 0x0b, 0x53, 0x71, 0x45, 0x06, 0x98, 0x40,
- 0x8b, 0x99, 0x16, 0xc0, 0x8b, 0xbd, 0x14, 0xc0, 0x8b, 0xcd, 0x42, 0x00,
- 0xa4, 0xc0, 0x8b, 0xd5, 0xc2, 0x00, 0xc7, 0x0b, 0x52, 0xdb, 0x00, 0x8b,
- 0xdd, 0x0d, 0xc0, 0x8b, 0xe1, 0x87, 0x0b, 0x52, 0xc3, 0x00, 0x8b, 0xf1,
- 0xc2, 0x01, 0x09, 0x0b, 0x52, 0xb9, 0xc3, 0x04, 0xce, 0x0b, 0x52, 0xa1,
- 0x91, 0x0b, 0x52, 0x93, 0x00, 0x8b, 0xf5, 0x12, 0xc0, 0x8b, 0xfd, 0x10,
- 0xc0, 0x8c, 0x07, 0x0f, 0xc0, 0x8c, 0x13, 0xc3, 0x21, 0x5e, 0x0b, 0x52,
- 0x59, 0xc2, 0x0b, 0xc6, 0x0b, 0x52, 0x2b, 0x00, 0x8c, 0x1f, 0x83, 0x0b,
- 0x52, 0x31, 0xc2, 0x00, 0xde, 0x0b, 0x52, 0x21, 0xc2, 0x20, 0x67, 0x0b,
- 0x52, 0x10, 0x44, 0x00, 0xcc, 0xc0, 0x8c, 0x23, 0x46, 0x10, 0x5f, 0xc0,
- 0x8c, 0x5b, 0x4a, 0x9d, 0x3e, 0x40, 0x8c, 0x77, 0x46, 0x02, 0x91, 0xc0,
- 0x8c, 0x9b, 0x4f, 0x62, 0x36, 0x40, 0x8d, 0x05, 0xd4, 0x3a, 0x71, 0x05,
- 0x53, 0x81, 0xd2, 0x4e, 0xd8, 0x05, 0x4f, 0x30, 0x4f, 0x6a, 0x88, 0xc0,
- 0x8d, 0x17, 0x54, 0x3f, 0x5d, 0x40, 0x8d, 0x3b, 0xc7, 0xcb, 0x6d, 0x00,
- 0x81, 0x59, 0x03, 0xc0, 0x8d, 0x47, 0x8b, 0x00, 0x81, 0x6b, 0x00, 0x8d,
- 0x52, 0x97, 0x00, 0x81, 0x7b, 0x00, 0x8d, 0x56, 0x87, 0x00, 0x81, 0x8b,
- 0x00, 0x8d, 0x5a, 0x44, 0xbd, 0x8d, 0xc0, 0x8d, 0x60, 0x48, 0xac, 0xc1,
- 0xc0, 0x8d, 0x6a, 0x15, 0xc0, 0x8d, 0x78, 0x52, 0x26, 0xfb, 0xc0, 0x8d,
- 0x84, 0xcc, 0x8b, 0xcc, 0x00, 0x83, 0x89, 0x46, 0xcf, 0x6d, 0x40, 0x8d,
- 0x90, 0x0f, 0xc0, 0x8d, 0xa0, 0xce, 0x74, 0x0d, 0x00, 0x84, 0x10, 0xc4,
- 0x15, 0xd3, 0x00, 0x82, 0x01, 0xc3, 0x01, 0xb4, 0x00, 0x82, 0x09, 0x16,
- 0xc0, 0x8d, 0xac, 0x08, 0xc0, 0x8d, 0xb8, 0x15, 0xc0, 0x8d, 0xc4, 0xc5,
- 0x01, 0xdb, 0x00, 0x82, 0x41, 0xc4, 0x22, 0x71, 0x00, 0x82, 0x48, 0x16,
- 0xc0, 0x8d, 0xd0, 0xc3, 0xe7, 0x63, 0x01, 0x5e, 0xe0, 0x44, 0x03, 0x7a,
- 0xc0, 0x8d, 0xdc, 0xc2, 0x00, 0x28, 0x01, 0x35, 0x90, 0xc6, 0x6f, 0xb5,
- 0x0f, 0xa7, 0x81, 0x42, 0x00, 0xae, 0xc0, 0x8d, 0xe8, 0x00, 0xc0, 0x8e,
- 0x20, 0x45, 0x00, 0x8a, 0x40, 0x8e, 0x38, 0x44, 0x0c, 0x5b, 0xc0, 0x8e,
- 0x44, 0x4d, 0x7f, 0x91, 0x40, 0x8e, 0x5c, 0xc9, 0x29, 0x48, 0x01, 0x5e,
- 0x48, 0xc4, 0x9f, 0x7e, 0x01, 0x1c, 0xc1, 0xc4, 0x00, 0x8a, 0x00, 0x04,
- 0x28, 0x03, 0xc0, 0x8e, 0x62, 0x51, 0x51, 0x83, 0xc0, 0x8e, 0x6e, 0x4e,
- 0x70, 0x01, 0x40, 0x8e, 0x7a, 0x46, 0x06, 0x97, 0xc0, 0x8e, 0x86, 0x45,
- 0x00, 0xcb, 0xc0, 0x8e, 0xaa, 0x47, 0x02, 0x90, 0x40, 0x8e, 0xc8, 0x48,
- 0xb7, 0xb5, 0x40, 0x8e, 0xeb, 0xc2, 0x00, 0xc7, 0x01, 0x10, 0x39, 0x47,
- 0xc6, 0x6c, 0x40, 0x8f, 0x03, 0xc7, 0x78, 0xfd, 0x01, 0x05, 0x31, 0xc8,
- 0xb7, 0x6d, 0x0f, 0xa4, 0x28, 0xcc, 0x59, 0xb6, 0x01, 0x03, 0x71, 0xc4,
- 0xb5, 0x72, 0x0f, 0x9e, 0xf0, 0x02, 0xc0, 0x8f, 0x15, 0xc7, 0xc5, 0xf5,
- 0x01, 0x56, 0xe8, 0x42, 0x00, 0x6e, 0xc0, 0x8f, 0x21, 0xcf, 0x4a, 0xeb,
- 0x01, 0x15, 0x93, 0x00, 0x8f, 0x2b, 0xcd, 0x80, 0x61, 0x01, 0x05, 0xd8,
- 0x45, 0x81, 0xdf, 0xc0, 0x8f, 0x31, 0x00, 0xc0, 0x8f, 0x41, 0x87, 0x0f,
- 0xae, 0x42, 0x00, 0x8f, 0x7a, 0xd9, 0x20, 0xb3, 0x0f, 0xa8, 0xf1, 0xc5,
- 0x50, 0xd3, 0x01, 0x36, 0xa3, 0x00, 0x8f, 0x89, 0x12, 0xc0, 0x8f, 0x8f,
- 0xcd, 0x7a, 0x97, 0x0f, 0xa7, 0xa9, 0x04, 0xc0, 0x8f, 0x9b, 0xce, 0x71,
- 0xf9, 0x0f, 0xb5, 0x68, 0xd0, 0x59, 0xb2, 0x01, 0x03, 0x79, 0xc8, 0xbc,
- 0x65, 0x08, 0x0c, 0x70, 0xcc, 0x87, 0x94, 0x0f, 0x0a, 0x71, 0x46, 0x02,
- 0x91, 0x40, 0x8f, 0xa7, 0xc4, 0x22, 0x71, 0x0f, 0x0a, 0x49, 0xc5, 0x01,
- 0xdb, 0x0f, 0x0a, 0x41, 0x15, 0xc0, 0x90, 0x29, 0x08, 0xc0, 0x90, 0x35,
- 0x16, 0xc0, 0x90, 0x41, 0xc3, 0x01, 0xb4, 0x0f, 0x0a, 0x09, 0xc4, 0x15,
- 0xd3, 0x0f, 0x0a, 0x00, 0xd2, 0x4e, 0x00, 0x0f, 0x09, 0xe9, 0x44, 0x00,
- 0xcc, 0x40, 0x90, 0x4d, 0x86, 0x0f, 0x09, 0xb1, 0x89, 0x0f, 0x09, 0xa9,
- 0x95, 0x0f, 0x09, 0xa1, 0x98, 0x0f, 0x09, 0x99, 0x8c, 0x0f, 0x09, 0x91,
- 0x8f, 0x0f, 0x09, 0x89, 0x84, 0x0f, 0x09, 0x80, 0x4c, 0x8a, 0xa0, 0xc0,
- 0x90, 0x59, 0xce, 0x1b, 0x63, 0x0b, 0x7f, 0x08, 0x44, 0x0b, 0x11, 0xc0,
- 0x90, 0x61, 0xc8, 0xb3, 0x0d, 0x01, 0x08, 0xb0, 0x4f, 0x2d, 0x6c, 0x40,
- 0x90, 0x77, 0xc2, 0x00, 0xbf, 0x01, 0x16, 0x09, 0xc3, 0x00, 0x57, 0x01,
- 0x16, 0x00, 0xc8, 0x6a, 0xcb, 0x01, 0x10, 0x89, 0x46, 0x1e, 0xfc, 0x40,
- 0x90, 0x83, 0xc8, 0x2b, 0x5a, 0x01, 0x10, 0x81, 0x47, 0x20, 0x56, 0x40,
- 0x90, 0x8f, 0xca, 0xa0, 0x9a, 0x00, 0x3f, 0xf1, 0xc9, 0xad, 0x99, 0x00,
- 0x3f, 0xe9, 0x45, 0x06, 0x98, 0x40, 0x90, 0xa1, 0xc9, 0xaf, 0x76, 0x00,
- 0x3f, 0xd1, 0xd2, 0x4e, 0x48, 0x00, 0x3f, 0xa9, 0x46, 0x02, 0x91, 0x40,
- 0x90, 0xc5, 0xc2, 0x00, 0xad, 0x00, 0x3f, 0xc1, 0x47, 0x11, 0x39, 0x40,
- 0x91, 0x45, 0xca, 0x9e, 0xb0, 0x00, 0x3f, 0xb9, 0xc9, 0xaf, 0x6d, 0x00,
- 0x3f, 0xb0, 0xd4, 0x39, 0x81, 0x0f, 0xbe, 0xc9, 0xc2, 0x24, 0x58, 0x0f,
- 0xcb, 0xb8, 0xc7, 0xc3, 0x47, 0x0f, 0xd3, 0x69, 0xc7, 0xc7, 0xae, 0x0f,
- 0xd3, 0x39, 0xc8, 0xbe, 0x45, 0x0f, 0xd3, 0x41, 0xc8, 0xb8, 0x15, 0x0f,
- 0xd3, 0x49, 0xc5, 0xa0, 0x31, 0x0f, 0xd3, 0x51, 0x05, 0x40, 0x91, 0x5d,
- 0xc5, 0xa0, 0x31, 0x0f, 0xd3, 0x19, 0xc7, 0xc7, 0xae, 0x0f, 0xd3, 0x01,
- 0xc8, 0xbe, 0x45, 0x0f, 0xd3, 0x09, 0xc8, 0xb8, 0x15, 0x0f, 0xd3, 0x11,
- 0x05, 0xc0, 0x91, 0x69, 0xc7, 0xc3, 0x47, 0x0f, 0xd3, 0x30, 0x4a, 0xa1,
- 0x30, 0xc0, 0x91, 0x75, 0x5a, 0x1a, 0x53, 0x40, 0x91, 0x8d, 0xcc, 0x8c,
- 0x68, 0x01, 0x1c, 0x19, 0x43, 0x18, 0x87, 0x40, 0x91, 0xa3, 0xc4, 0x0f,
- 0x14, 0x01, 0x00, 0xa1, 0xc5, 0x40, 0xc5, 0x01, 0x00, 0x19, 0xc4, 0x03,
- 0xd9, 0x01, 0x00, 0x08, 0xc2, 0x00, 0x6e, 0x01, 0x32, 0x0b, 0x00, 0x91,
- 0xbf, 0x00, 0x40, 0x91, 0xc5, 0x0f, 0xc0, 0x91, 0xd1, 0x19, 0xc0, 0x91,
- 0xe4, 0x16, 0xc0, 0x91, 0xee, 0x0a, 0xc0, 0x91, 0xf8, 0x0e, 0xc0, 0x92,
- 0x0a, 0x08, 0xc0, 0x92, 0x16, 0x07, 0xc0, 0x92, 0x20, 0x04, 0xc0, 0x92,
- 0x2a, 0x0b, 0xc0, 0x92, 0x36, 0x11, 0xc0, 0x92, 0x40, 0x18, 0xc0, 0x92,
- 0x4a, 0x03, 0xc0, 0x92, 0x54, 0x42, 0x00, 0x1c, 0xc0, 0x92, 0x5e, 0x43,
- 0xe7, 0x33, 0xc0, 0x92, 0x66, 0x43, 0xe7, 0xae, 0xc0, 0x92, 0x89, 0x42,
- 0xdd, 0xf3, 0xc0, 0x92, 0xb2, 0x42, 0xc5, 0x4c, 0xc0, 0x92, 0xc6, 0x42,
- 0xde, 0x93, 0xc0, 0x92, 0xda, 0x43, 0xe6, 0xaf, 0xc0, 0x92, 0xe6, 0x42,
- 0xe8, 0x14, 0xc0, 0x93, 0x02, 0x10, 0xc0, 0x93, 0x0a, 0x43, 0xe7, 0x6f,
- 0xc0, 0x93, 0x1a, 0x42, 0xe8, 0x20, 0xc0, 0x93, 0x3a, 0x42, 0xe5, 0x62,
- 0xc0, 0x93, 0x52, 0x42, 0xe8, 0x18, 0x40, 0x93, 0x6e, 0x14, 0xc0, 0x93,
- 0x7a, 0x59, 0x11, 0xe8, 0x40, 0x93, 0x86, 0xc3, 0x05, 0x87, 0x01, 0x11,
- 0xc9, 0x49, 0x0f, 0xfe, 0x40, 0x93, 0xaa, 0x48, 0x14, 0xc7, 0xc0, 0x93,
- 0xb6, 0x07, 0x40, 0x94, 0x0a, 0x0f, 0xc0, 0x94, 0x16, 0xc3, 0x0e, 0x41,
- 0x00, 0x9b, 0x28, 0xcc, 0x89, 0xbc, 0x00, 0x9b, 0x31, 0xd2, 0x42, 0x34,
- 0x00, 0x9b, 0x40, 0xc3, 0x01, 0xb4, 0x00, 0x9b, 0x49, 0x16, 0xc0, 0x94,
- 0x22, 0x08, 0xc0, 0x94, 0x2e, 0x15, 0xc0, 0x94, 0x3a, 0xc5, 0x01, 0xdb,
- 0x00, 0x9b, 0x81, 0xc4, 0x22, 0x71, 0x00, 0x9b, 0x88, 0x16, 0xc0, 0x94,
- 0x46, 0x08, 0xc0, 0x94, 0x5b, 0x15, 0xc0, 0x94, 0x67, 0xc6, 0xd4, 0x83,
- 0x00, 0x9b, 0xc9, 0xc6, 0x29, 0x88, 0x00, 0x9b, 0xd1, 0xc7, 0x0c, 0x4b,
- 0x00, 0x9b, 0xd8, 0xc5, 0xdd, 0xe0, 0x00, 0x9c, 0x81, 0x06, 0xc0, 0x94,
- 0x73, 0xc6, 0x87, 0x76, 0x00, 0x9c, 0x91, 0xcc, 0x87, 0x70, 0x00, 0x9c,
- 0x99, 0x0d, 0xc0, 0x94, 0x82, 0xc6, 0xd0, 0x2d, 0x00, 0x9c, 0xb1, 0xc5,
- 0xce, 0x66, 0x00, 0x9c, 0xb8, 0xc7, 0x87, 0xed, 0x01, 0x10, 0x43, 0x00,
- 0x94, 0x8e, 0x45, 0xd8, 0x8b, 0xc0, 0x94, 0x92, 0xc5, 0xb8, 0x18, 0x0f,
- 0xa0, 0xc1, 0xc5, 0xdd, 0x72, 0x0f, 0xb6, 0xb8, 0xd2, 0x4b, 0x8a, 0x08,
- 0x7f, 0xb1, 0x46, 0x02, 0x91, 0x40, 0x94, 0x9c, 0x83, 0x08, 0x28, 0x01,
- 0xc2, 0x00, 0x48, 0x08, 0x28, 0x09, 0x05, 0xc0, 0x94, 0xff, 0x06, 0xc0,
- 0x95, 0x09, 0x10, 0xc0, 0x95, 0x13, 0x87, 0x08, 0x28, 0x43, 0x00, 0x95,
- 0x27, 0xc2, 0x17, 0x9f, 0x08, 0x28, 0x49, 0x09, 0xc0, 0x95, 0x2b, 0xc2,
- 0x01, 0xf0, 0x08, 0x28, 0x61, 0x8b, 0x08, 0x28, 0x69, 0xc2, 0x1b, 0xa5,
- 0x08, 0x28, 0x71, 0x0d, 0xc0, 0x95, 0x39, 0x0e, 0xc0, 0x95, 0x43, 0xc2,
- 0x00, 0xc3, 0x08, 0x28, 0x91, 0x91, 0x08, 0x28, 0xb1, 0xc2, 0x00, 0x27,
- 0x08, 0x28, 0xb9, 0xc2, 0x8f, 0x22, 0x08, 0x28, 0xc1, 0x14, 0xc0, 0x95,
- 0x4d, 0x15, 0xc0, 0x95, 0x57, 0x16, 0xc0, 0x95, 0x61, 0x97, 0x08, 0x28,
- 0xf9, 0xc2, 0x00, 0x7b, 0x08, 0x29, 0x01, 0xc2, 0x25, 0x1f, 0x08, 0x29,
- 0x09, 0x9b, 0x08, 0x29, 0x11, 0x1c, 0x40, 0x95, 0x6b, 0x42, 0x00, 0x9f,
- 0xc0, 0x95, 0x75, 0x12, 0xc0, 0x95, 0x7b, 0xcf, 0x15, 0x8e, 0x01, 0x39,
- 0x98, 0x46, 0x00, 0x6b, 0x40, 0x95, 0x87, 0x43, 0x00, 0xf3, 0xc0, 0x95,
- 0x93, 0xda, 0x1c, 0xa9, 0x0f, 0xa8, 0xd0, 0xc4, 0x0f, 0x14, 0x01, 0x00,
- 0x99, 0xc5, 0x40, 0xc5, 0x01, 0x00, 0x11, 0xc4, 0x03, 0xd9, 0x01, 0x00,
- 0x00, 0xc4, 0x01, 0x1e, 0x01, 0x19, 0x59, 0xc5, 0x01, 0xf7, 0x01, 0x19,
- 0x30, 0x46, 0x05, 0xef, 0xc0, 0x95, 0xb5, 0x46, 0x02, 0x31, 0x40, 0x95,
- 0xc7, 0xc3, 0x01, 0xb4, 0x01, 0x5f, 0x81, 0xc3, 0x01, 0x59, 0x01, 0x5f,
- 0x88, 0x00, 0xc0, 0x95, 0xd9, 0x42, 0x00, 0x46, 0x40, 0x95, 0xe5, 0xca,
- 0xa8, 0x42, 0x01, 0x12, 0xd1, 0x47, 0x37, 0x0f, 0x40, 0x95, 0xfa, 0x95,
- 0x01, 0x12, 0xc9, 0xc8, 0x1c, 0xd5, 0x01, 0x09, 0x70, 0xc5, 0x00, 0xca,
- 0x01, 0x05, 0x61, 0xce, 0x6d, 0x6f, 0x01, 0x05, 0x01, 0x45, 0xd8, 0x72,
- 0x40, 0x96, 0x06, 0xc6, 0xd3, 0x9f, 0x0f, 0xcd, 0x71, 0xc3, 0x04, 0x45,
- 0x0f, 0x9d, 0xc0, 0x46, 0x06, 0x97, 0xc0, 0x96, 0x12, 0xc2, 0x00, 0x3a,
- 0x08, 0xec, 0xc1, 0x18, 0xc0, 0x96, 0x36, 0x45, 0x00, 0xcb, 0xc0, 0x96,
- 0x42, 0x47, 0x02, 0x90, 0x40, 0x96, 0x4e, 0xc8, 0x92, 0xc9, 0x01, 0x05,
- 0x91, 0xc5, 0xd6, 0xf6, 0x0f, 0xa4, 0x10, 0x45, 0x00, 0xcb, 0xc0, 0x96,
- 0xbb, 0x47, 0x02, 0x90, 0xc0, 0x96, 0xdf, 0x4b, 0x6f, 0xcc, 0xc0, 0x97,
- 0x54, 0x46, 0x06, 0x97, 0xc0, 0x97, 0x72, 0xc5, 0xd5, 0x7a, 0x00, 0x53,
- 0x81, 0x03, 0xc0, 0x97, 0x96, 0xc3, 0x04, 0x30, 0x00, 0x53, 0x91, 0xc3,
- 0x08, 0x23, 0x00, 0x53, 0x99, 0xc8, 0xbf, 0xf5, 0x00, 0x53, 0xa0, 0x45,
- 0x00, 0xcb, 0xc0, 0x97, 0xa2, 0x47, 0x02, 0x90, 0xc0, 0x97, 0xc4, 0x46,
- 0x34, 0xbb, 0xc0, 0x98, 0x2f, 0xc2, 0x00, 0x3a, 0x00, 0x56, 0x81, 0x46,
- 0x06, 0x97, 0xc0, 0x98, 0x3b, 0xd1, 0x50, 0x40, 0x00, 0x57, 0x81, 0xca,
- 0x80, 0x23, 0x00, 0x57, 0x88, 0x96, 0x0f, 0xa0, 0x81, 0xc5, 0xdb, 0x15,
- 0x0f, 0xca, 0x28, 0xc4, 0xe4, 0xbb, 0x08, 0x19, 0x99, 0x03, 0xc0, 0x98,
- 0x5f, 0xc8, 0xba, 0x25, 0x08, 0x19, 0xa9, 0x0b, 0xc0, 0x98, 0x6b, 0x0a,
- 0xc0, 0x98, 0x77, 0x16, 0xc0, 0x98, 0x83, 0xc3, 0x27, 0x3f, 0x08, 0x19,
- 0xc9, 0xc5, 0xdb, 0x01, 0x08, 0x19, 0xd1, 0xc5, 0xda, 0xa7, 0x08, 0x19,
- 0xd9, 0xc5, 0x87, 0x58, 0x08, 0x19, 0xe1, 0x10, 0xc0, 0x98, 0x8f, 0xc3,
- 0xad, 0x7e, 0x08, 0x19, 0xf1, 0xc4, 0xe0, 0x0b, 0x08, 0x19, 0xf9, 0xc8,
- 0xbc, 0x4d, 0x08, 0x1a, 0x01, 0xc5, 0xd7, 0x91, 0x08, 0x1a, 0x11, 0xc5,
- 0xda, 0xa2, 0x08, 0x1a, 0x19, 0xc5, 0xdc, 0x05, 0x08, 0x1a, 0x29, 0xc5,
- 0xd5, 0x75, 0x08, 0x1a, 0x31, 0xc5, 0xd6, 0x56, 0x08, 0x1a, 0x49, 0xc7,
- 0xc5, 0x1c, 0x08, 0x19, 0x89, 0xc4, 0xe1, 0xe7, 0x08, 0x19, 0x90, 0x07,
- 0xc0, 0x98, 0x9b, 0x4a, 0x03, 0x0a, 0xc0, 0x98, 0xa7, 0xc7, 0xc1, 0x5d,
- 0x0f, 0xd3, 0xb0, 0x45, 0xda, 0x98, 0xc0, 0x98, 0xce, 0xcb, 0x9a, 0x6c,
- 0x0f, 0x9c, 0x99, 0xc3, 0x59, 0xbf, 0x0f, 0x9a, 0x39, 0xc9, 0x1f, 0x97,
- 0x00, 0x03, 0x00, 0x46, 0x20, 0xf9, 0xc0, 0x98, 0xec, 0xcb, 0x8d, 0xe0,
- 0x0f, 0xb1, 0x60, 0xca, 0xa7, 0xca, 0x0f, 0xa4, 0xb9, 0x43, 0x10, 0x47,
- 0x40, 0x98, 0xfb, 0x45, 0x00, 0x6b, 0x40, 0x99, 0x07, 0xc3, 0x04, 0x6d,
- 0x01, 0x32, 0x51, 0xc6, 0xb0, 0xc6, 0x0f, 0xa4, 0x70, 0x46, 0x4a, 0xa3,
- 0xc0, 0x99, 0x13, 0x46, 0x93, 0x78, 0x40, 0x99, 0x1f, 0x8e, 0x0f, 0xa3,
- 0x3b, 0x00, 0x99, 0x3d, 0xc9, 0xb0, 0x21, 0x0f, 0xcc, 0x90, 0xc9, 0xaa,
- 0x0c, 0x0f, 0x98, 0xf9, 0xd1, 0x50, 0x0d, 0x0f, 0x98, 0x81, 0xc3, 0x09,
- 0xe5, 0x0f, 0xcf, 0x20, 0x48, 0x52, 0xa6, 0xc0, 0x99, 0x43, 0xca, 0xa4,
- 0xe6, 0x0f, 0xca, 0xd8, 0xc4, 0xe0, 0x23, 0x0f, 0xcd, 0x39, 0x42, 0x00,
- 0x55, 0x40, 0x99, 0x4f, 0xc8, 0x28, 0x85, 0x01, 0x15, 0xb1, 0x43, 0x3b,
- 0x08, 0x40, 0x99, 0x5b, 0xd0, 0x1e, 0xbf, 0x07, 0xe9, 0xf1, 0xd1, 0x1a,
- 0x39, 0x07, 0xe9, 0xf8, 0x4d, 0x52, 0xd7, 0xc0, 0x99, 0x83, 0x47, 0x33,
- 0xef, 0xc0, 0x99, 0x8f, 0xc8, 0xbb, 0xf5, 0x0f, 0x69, 0x71, 0x51, 0x51,
- 0x72, 0x40, 0x99, 0xb6, 0xc4, 0xe0, 0xd7, 0x0f, 0xb4, 0xb1, 0xc3, 0x23,
- 0x70, 0x0f, 0xb4, 0x69, 0xca, 0xa1, 0xbc, 0x0f, 0xb4, 0xa1, 0xca, 0x9c,
- 0xe4, 0x0f, 0xb4, 0xc1, 0xcb, 0x92, 0xe7, 0x0f, 0xb7, 0x88, 0x00, 0xc0,
- 0x99, 0xce, 0xcf, 0x60, 0xce, 0x0f, 0xd3, 0x88, 0xe0, 0x09, 0xc7, 0x0f,
- 0xa8, 0xd8, 0x10, 0xc0, 0x99, 0xda, 0xd5, 0x35, 0x5f, 0x00, 0x04, 0xe8,
- 0xc6, 0xce, 0x89, 0x01, 0x19, 0x29, 0xc8, 0xbd, 0x05, 0x0f, 0xa5, 0xfa,
- 0x00, 0x99, 0xe2, 0x00, 0xc0, 0x99, 0xe8, 0x43, 0x00, 0x28, 0x40, 0x9a,
- 0x1e, 0x12, 0xc0, 0x9a, 0x30, 0xc4, 0xe3, 0xd3, 0x00, 0xe3, 0xe9, 0xc5,
- 0xd6, 0x9c, 0x00, 0xe3, 0xd9, 0x42, 0x13, 0x51, 0xc0, 0x9a, 0x3c, 0xd0,
- 0x57, 0x5c, 0x00, 0xe3, 0xc9, 0x47, 0x02, 0x90, 0xc0, 0x9a, 0x48, 0x46,
- 0x06, 0x97, 0x40, 0x9a, 0x60, 0x46, 0x0c, 0xaf, 0xc0, 0x9a, 0x84, 0xc8,
- 0xbe, 0xd5, 0x0f, 0xa7, 0x20, 0x06, 0xc0, 0x9a, 0x9c, 0x05, 0xc0, 0x9a,
- 0xa8, 0xcf, 0x6b, 0x4b, 0x01, 0x22, 0x39, 0x04, 0xc0, 0x9a, 0xb4, 0xcd,
- 0x78, 0x75, 0x01, 0x22, 0x19, 0xc4, 0x49, 0x6e, 0x01, 0x22, 0x11, 0xc4,
- 0x02, 0x83, 0x01, 0x22, 0x00, 0xc4, 0xe5, 0x07, 0x0f, 0xa0, 0xc9, 0xcb,
- 0x96, 0x99, 0x0f, 0xb6, 0x88, 0x4e, 0x73, 0xff, 0xc0, 0x9a, 0xc6, 0xc6,
- 0x5a, 0xa2, 0x01, 0x72, 0xe8, 0xc3, 0x00, 0x8b, 0x01, 0x01, 0xf1, 0xc2,
- 0x03, 0xa5, 0x0f, 0xae, 0xba, 0x00, 0x9a, 0xd2, 0xd5, 0x33, 0x52, 0x00,
- 0xb4, 0xe1, 0xcc, 0x33, 0x5b, 0x00, 0xb4, 0xd9, 0x47, 0x02, 0x90, 0xc0,
- 0x9a, 0xd8, 0xca, 0xa5, 0xae, 0x00, 0xb4, 0x00, 0x47, 0x02, 0x90, 0xc0,
- 0x9b, 0x32, 0x46, 0x06, 0x97, 0x40, 0x9b, 0xb5, 0x4f, 0x07, 0x17, 0xc0,
- 0x9b, 0xd9, 0x4d, 0x26, 0xea, 0x40, 0x9c, 0x40, 0x12, 0xc0, 0x9c, 0xa7,
- 0xc5, 0xda, 0x0c, 0x0e, 0x7e, 0x11, 0x06, 0xc0, 0x9c, 0xb8, 0x11, 0xc0,
- 0x9c, 0xce, 0x0d, 0xc0, 0x9c, 0xdd, 0x15, 0xc0, 0x9c, 0xfb, 0xc6, 0xd4,
- 0x3b, 0x0e, 0x7d, 0x3b, 0x00, 0x9d, 0x0e, 0x1c, 0xc0, 0x9d, 0x12, 0xc4,
- 0xe1, 0x87, 0x0e, 0x7c, 0x19, 0x14, 0xc0, 0x9d, 0x1c, 0x42, 0x11, 0xd4,
- 0xc0, 0x9d, 0x28, 0x49, 0xb1, 0x38, 0xc0, 0x9d, 0x34, 0x4a, 0x9f, 0x8c,
- 0x40, 0x9d, 0x52, 0xc3, 0x23, 0x4a, 0x0e, 0x7a, 0x31, 0xc5, 0x78, 0x91,
- 0x0e, 0x7a, 0x29, 0xce, 0x6f, 0xe5, 0x0e, 0x7a, 0x21, 0x46, 0xd4, 0xc5,
- 0x40, 0x9d, 0x68, 0xdb, 0x16, 0xe1, 0x0e, 0x7a, 0x09, 0x45, 0x00, 0xba,
- 0xc0, 0x9d, 0x70, 0xd7, 0x2b, 0x34, 0x0e, 0x79, 0xf1, 0x51, 0x54, 0xa2,
- 0x40, 0x9d, 0xc2, 0xc8, 0xb9, 0x25, 0x08, 0xd2, 0x39, 0x44, 0x00, 0xcc,
- 0x40, 0x9d, 0xd4, 0x46, 0x33, 0x6a, 0xc0, 0x9d, 0xe6, 0x46, 0x29, 0x53,
- 0x40, 0x9d, 0xf2, 0xd6, 0x2c, 0xf1, 0x08, 0xd2, 0x29, 0xc9, 0x15, 0x9d,
- 0x08, 0xd1, 0xf8, 0xca, 0xa6, 0x9e, 0x08, 0xd2, 0x21, 0xcb, 0x97, 0x80,
- 0x08, 0xd2, 0x19, 0xc4, 0x02, 0x28, 0x08, 0xd2, 0x11, 0xc5, 0x33, 0x45,
- 0x08, 0xd2, 0x08, 0x0d, 0xc0, 0x9d, 0xfe, 0xc2, 0x00, 0xa4, 0x08, 0xd1,
- 0x89, 0x15, 0xc0, 0x9e, 0x0e, 0xc2, 0x04, 0x41, 0x08, 0xd1, 0x69, 0xc2,
- 0x00, 0xc7, 0x08, 0xd1, 0x61, 0xc2, 0x02, 0x59, 0x08, 0xd1, 0x59, 0xc2,
- 0x1d, 0x5f, 0x08, 0xd1, 0x51, 0xc2, 0x00, 0x02, 0x08, 0xd1, 0x49, 0x1c,
- 0xc0, 0x9e, 0x1e, 0x06, 0xc0, 0x9e, 0x28, 0x16, 0xc0, 0x9e, 0x3a, 0xc2,
- 0x00, 0xad, 0x08, 0xd1, 0x11, 0x04, 0xc0, 0x9e, 0x4c, 0x12, 0xc0, 0x9e,
- 0x56, 0x10, 0xc0, 0x9e, 0x60, 0xc2, 0x24, 0x58, 0x08, 0xd0, 0x91, 0x05,
- 0xc0, 0x9e, 0x76, 0x09, 0xc0, 0x9e, 0x80, 0x83, 0x08, 0xd0, 0x00, 0xcb,
- 0x34, 0xc1, 0x08, 0xd0, 0x51, 0x45, 0x00, 0xcb, 0x40, 0x9e, 0x8a, 0xd5,
- 0x35, 0x35, 0x01, 0x51, 0xf1, 0x45, 0x02, 0x13, 0xc0, 0x9e, 0xaa, 0xd4,
- 0x39, 0xd1, 0x01, 0x53, 0x28, 0x46, 0xcf, 0xd9, 0xc0, 0x9e, 0xb6, 0xc3,
- 0x3d, 0xa5, 0x01, 0x4c, 0x08, 0xcf, 0x60, 0x92, 0x01, 0x4c, 0x49, 0xcd,
- 0x7d, 0xbd, 0x01, 0x4c, 0x38, 0xc6, 0x59, 0xec, 0x01, 0x00, 0x69, 0x42,
- 0x00, 0x10, 0xc0, 0x9e, 0xc0, 0xc5, 0x40, 0xc5, 0x01, 0x00, 0x58, 0xcb,
- 0x97, 0x1d, 0x01, 0x37, 0xd9, 0xd3, 0x3f, 0xc0, 0x0f, 0xa9, 0x81, 0xc6,
- 0xd2, 0x6d, 0x0f, 0xa3, 0xd1, 0xc4, 0xc9, 0x53, 0x0f, 0xa3, 0xc9, 0xcb,
- 0x93, 0x08, 0x0f, 0x9f, 0x19, 0xc5, 0xb0, 0x91, 0x0f, 0x9c, 0x71, 0xc6,
- 0xd3, 0x1b, 0x0f, 0x9f, 0x79, 0xda, 0x1c, 0xc3, 0x01, 0x80, 0x20, 0x42,
- 0x03, 0xa4, 0xc0, 0x9e, 0xcc, 0x42, 0x00, 0x54, 0xc0, 0x9e, 0xd8, 0x46,
- 0x06, 0x97, 0xc0, 0x9e, 0xe4, 0xd3, 0x41, 0xc1, 0x05, 0x4e, 0x69, 0xcf,
- 0x63, 0x62, 0x05, 0x4e, 0x11, 0x4f, 0x2e, 0xbf, 0xc0, 0x9f, 0x08, 0x4b,
- 0x6f, 0xcc, 0xc0, 0x9f, 0x1a, 0x45, 0x00, 0xcb, 0x40, 0x9f, 0x3c, 0x44,
- 0x03, 0xde, 0xc0, 0x9f, 0x57, 0x45, 0x41, 0x2e, 0x40, 0x9f, 0x63, 0xd0,
- 0x0f, 0xfb, 0x01, 0x02, 0x41, 0xc4, 0x00, 0xba, 0x00, 0x01, 0xf8, 0x49,
- 0x14, 0xc6, 0xc0, 0x9f, 0x6f, 0x48, 0x99, 0xbf, 0x40, 0x9f, 0xe8, 0x47,
- 0x02, 0x90, 0xc0, 0xa0, 0x3a, 0xd0, 0x5a, 0xa2, 0x08, 0x75, 0x69, 0x4a,
- 0x52, 0xb5, 0x40, 0xa0, 0xbf, 0x8e, 0x00, 0x00, 0xc3, 0x00, 0xa0, 0xcb,
- 0x94, 0x01, 0x32, 0x58, 0x95, 0x00, 0xa8, 0x2b, 0x00, 0xa0, 0xd5, 0x90,
- 0x00, 0xa6, 0x83, 0x00, 0xa1, 0x00, 0x85, 0x00, 0xa5, 0x0b, 0x00, 0xa1,
- 0x3d, 0x04, 0xc0, 0xa1, 0x60, 0x96, 0x00, 0xa3, 0x33, 0x00, 0xa1, 0x72,
- 0x19, 0xc0, 0xa1, 0xa4, 0x94, 0x00, 0xaa, 0x83, 0x00, 0xa1, 0xc0, 0x88,
- 0x00, 0xaa, 0xeb, 0x00, 0xa1, 0xe3, 0x87, 0x00, 0xa0, 0x0b, 0x00, 0xa2,
- 0x08, 0x91, 0x00, 0xa0, 0x2b, 0x00, 0xa2, 0x12, 0x9b, 0x00, 0xa9, 0xf3,
- 0x00, 0xa2, 0x24, 0x8e, 0x00, 0xa7, 0x53, 0x00, 0xa2, 0x47, 0x8f, 0x00,
- 0xa5, 0xdb, 0x00, 0xa2, 0x6b, 0x8d, 0x00, 0xa4, 0x1b, 0x00, 0xa2, 0x8f,
- 0x92, 0x00, 0xa2, 0x4b, 0x00, 0xa2, 0xaf, 0x83, 0x00, 0xa0, 0x53, 0x00,
- 0xa2, 0xcc, 0x93, 0x00, 0xac, 0x2b, 0x00, 0xa2, 0xe8, 0x0a, 0xc0, 0xa2,
- 0xfd, 0x8b, 0x00, 0xa0, 0x1b, 0x00, 0xa3, 0x07, 0xcc, 0x21, 0x78, 0x00,
- 0xa0, 0xf0, 0xc2, 0x00, 0x54, 0x0f, 0xab, 0x79, 0x9b, 0x0f, 0x9b, 0x60,
- 0xc3, 0x00, 0xf2, 0x01, 0x08, 0x29, 0x96, 0x01, 0x01, 0xc2, 0x00, 0xa3,
- 0x0f, 0xc8, 0xb7, 0x7d, 0x0f, 0xae, 0x19, 0xc5, 0x08, 0x42, 0x0f, 0xa6,
- 0x3a, 0x00, 0xa3, 0x15, 0xca, 0xa0, 0x2c, 0x0f, 0x9d, 0x01, 0x90, 0x00,
- 0x16, 0x38, 0xc9, 0xb5, 0x31, 0x0f, 0x9c, 0x79, 0xc9, 0xb2, 0x61, 0x0f,
- 0xd4, 0xd0, 0xcb, 0x80, 0xe5, 0x00, 0x00, 0x69, 0xc2, 0x00, 0xb2, 0x0f,
- 0xca, 0xa8, 0x97, 0x08, 0x15, 0x93, 0x00, 0xa3, 0x1b, 0x94, 0x08, 0x15,
- 0x2b, 0x00, 0xa3, 0x22, 0x8e, 0x08, 0x15, 0x1b, 0x00, 0xa3, 0x26, 0x83,
- 0x08, 0x15, 0x03, 0x00, 0xa3, 0x2d, 0x93, 0x08, 0x15, 0x41, 0x84, 0x08,
- 0x15, 0x49, 0x8f, 0x08, 0x15, 0x53, 0x00, 0xa3, 0x31, 0x91, 0x08, 0x15,
- 0x59, 0x86, 0x08, 0x15, 0x13, 0x00, 0xa3, 0x38, 0x96, 0x08, 0x15, 0x6b,
- 0x00, 0xa3, 0x3c, 0x95, 0x08, 0x15, 0x83, 0x00, 0xa3, 0x43, 0x42, 0x06,
- 0x8f, 0xc0, 0xa3, 0x55, 0x90, 0x08, 0x15, 0xab, 0x00, 0xa3, 0x61, 0x9a,
- 0x08, 0x15, 0xa1, 0x92, 0x08, 0x15, 0xbb, 0x00, 0xa3, 0x6d, 0x8b, 0x08,
- 0x15, 0xcb, 0x00, 0xa3, 0x71, 0x87, 0x08, 0x15, 0xd3, 0x00, 0xa3, 0x75,
- 0x8d, 0x08, 0x15, 0xe3, 0x00, 0xa3, 0x79, 0x89, 0x08, 0x16, 0x02, 0x00,
- 0xa3, 0x7d, 0x47, 0x02, 0x90, 0xc0, 0xa3, 0x81, 0xcd, 0x7c, 0xb9, 0x08,
- 0x2b, 0x78, 0xcb, 0x9b, 0x48, 0x0f, 0xa7, 0xc0, 0x46, 0x00, 0x6b, 0x40,
- 0xa3, 0xf5, 0x26, 0xc0, 0xa4, 0x01, 0x25, 0xc0, 0xa4, 0x41, 0x03, 0x40,
- 0xa4, 0x81, 0x03, 0xc0, 0xa4, 0x89, 0x26, 0x40, 0xa4, 0xc1, 0xc5, 0x66,
- 0x19, 0x01, 0x74, 0x01, 0x03, 0x40, 0xa5, 0x01, 0x0e, 0xc0, 0xa5, 0x0f,
- 0xc4, 0xe1, 0x6f, 0x01, 0x74, 0xd9, 0x0b, 0xc0, 0xa5, 0x1b, 0xc2, 0x01,
- 0x12, 0x01, 0x75, 0x39, 0x4c, 0x87, 0xf4, 0x40, 0xa5, 0x27, 0x07, 0xc0,
- 0xa5, 0x5d, 0x45, 0x04, 0x74, 0xc0, 0xa5, 0x69, 0x10, 0xc0, 0xa5, 0x75,
- 0xc2, 0x01, 0xbd, 0x01, 0x74, 0xe1, 0x0b, 0xc0, 0xa5, 0x81, 0x46, 0xcd,
- 0xe7, 0xc0, 0xa5, 0x8d, 0xc4, 0xe0, 0xff, 0x01, 0x75, 0xb0, 0xc5, 0x15,
- 0x36, 0x01, 0x74, 0x29, 0x43, 0x3f, 0x68, 0x40, 0xa5, 0x99, 0x11, 0xc0,
- 0xa5, 0xa5, 0xc5, 0xc6, 0x04, 0x01, 0x75, 0x71, 0x45, 0xdd, 0x81, 0xc0,
- 0xa5, 0xb5, 0xc3, 0x86, 0x99, 0x01, 0x76, 0xc0, 0xc4, 0x14, 0xca, 0x01,
- 0x74, 0x39, 0xc5, 0x8f, 0xd5, 0x01, 0x74, 0x99, 0xc4, 0xe2, 0xa7, 0x01,
- 0x76, 0x09, 0xc5, 0xd6, 0xe7, 0x01, 0x77, 0x88, 0xc3, 0x01, 0xb4, 0x01,
- 0x74, 0x41, 0xc3, 0x01, 0x59, 0x01, 0x74, 0x48, 0xc9, 0xae, 0x95, 0x01,
- 0x74, 0x51, 0xc4, 0x17, 0xa0, 0x01, 0x74, 0xf1, 0xc2, 0x13, 0x91, 0x01,
- 0x75, 0x40, 0x44, 0xb5, 0xcc, 0xc0, 0xa5, 0xc1, 0x44, 0x02, 0xa8, 0x40,
- 0xa5, 0xd1, 0x42, 0x00, 0x9d, 0xc0, 0xa5, 0xdd, 0xc3, 0x00, 0x57, 0x01,
- 0x74, 0xc1, 0xc3, 0x00, 0xbf, 0x01, 0x76, 0x38, 0x11, 0xc0, 0xa5, 0xe7,
- 0x07, 0x40, 0xa5, 0xff, 0x03, 0xc0, 0xa6, 0x0b, 0x44, 0x16, 0xbd, 0x40,
- 0xa6, 0x17, 0xc3, 0x06, 0xda, 0x01, 0x75, 0x19, 0xc3, 0x63, 0x97, 0x01,
- 0x76, 0x50, 0xc3, 0x01, 0xf2, 0x01, 0x75, 0x49, 0x4c, 0x87, 0xf4, 0x40,
- 0xa6, 0x23, 0xc2, 0x05, 0xd0, 0x01, 0x75, 0x59, 0xc2, 0x00, 0x4b, 0x01,
- 0x75, 0xc1, 0x43, 0x00, 0xec, 0x40, 0xa6, 0x33, 0xc3, 0x01, 0xb4, 0x01,
- 0x75, 0x89, 0x16, 0xc0, 0xa6, 0x3d, 0xc4, 0x06, 0x9d, 0x01, 0x75, 0xa0,
- 0x45, 0x1b, 0xdd, 0xc0, 0xa6, 0x49, 0xc4, 0xe1, 0x77, 0x01, 0x77, 0x20,
- 0x90, 0x01, 0x8e, 0xe8, 0x99, 0x01, 0x8e, 0x23, 0x00, 0xa6, 0x53, 0x9c,
- 0x01, 0x8e, 0xbb, 0x00, 0xa6, 0x5b, 0x92, 0x01, 0x8e, 0x99, 0x96, 0x01,
- 0x8e, 0xc9, 0x89, 0x01, 0x8e, 0xd0, 0x9c, 0x01, 0x8e, 0xab, 0x00, 0xa6,
- 0x65, 0x92, 0x01, 0x8e, 0x3b, 0x00, 0xa6, 0x7b, 0x89, 0x01, 0x8e, 0xb1,
- 0xc3, 0xe7, 0xe7, 0x01, 0x8f, 0x00, 0x86, 0x01, 0x8e, 0xd9, 0x9c, 0x01,
- 0x8e, 0xe1, 0x89, 0x01, 0x8f, 0x10, 0xc8, 0x77, 0x76, 0x0f, 0xb3, 0xf3,
- 0x00, 0xa6, 0x81, 0xc5, 0x00, 0xb9, 0x01, 0x38, 0x98, 0xce, 0x70, 0xd3,
- 0x0f, 0xa7, 0x19, 0xc8, 0xb7, 0xf5, 0x0f, 0xce, 0x00, 0x45, 0xdd, 0xbd,
- 0xc0, 0xa6, 0x87, 0x14, 0x40, 0xa6, 0x93, 0x94, 0x0f, 0xd4, 0x89, 0xc2,
- 0x07, 0x01, 0x01, 0x36, 0x98, 0x47, 0xc3, 0x01, 0xc0, 0xa6, 0x9f, 0x47,
- 0x07, 0x33, 0x40, 0xa6, 0xae, 0x47, 0x02, 0x90, 0xc0, 0xa6, 0xbd, 0x18,
- 0xc0, 0xa7, 0x1f, 0xcd, 0x2c, 0x41, 0x08, 0x8a, 0x19, 0x06, 0xc0, 0xa7,
- 0x2b, 0x15, 0xc0, 0xa7, 0x3d, 0xc7, 0xc4, 0x7b, 0x08, 0x89, 0xa1, 0xc7,
- 0xc6, 0x50, 0x08, 0x89, 0x91, 0xc6, 0xb6, 0x7f, 0x08, 0x89, 0x88, 0x4f,
- 0x2e, 0xbf, 0xc0, 0xa7, 0x49, 0x4b, 0x6f, 0xcc, 0xc0, 0xa7, 0x67, 0x47,
- 0x02, 0x90, 0xc0, 0xa7, 0x86, 0x4c, 0x10, 0x7e, 0xc0, 0xa7, 0xef, 0x46,
- 0x06, 0x97, 0x40, 0xa7, 0xff, 0xcc, 0x88, 0x24, 0x0f, 0xb5, 0xc0, 0x47,
- 0x33, 0xef, 0xc0, 0xa8, 0x23, 0x47, 0x02, 0x90, 0x40, 0xa8, 0x36, 0xc8,
- 0x19, 0xb7, 0x0f, 0xb1, 0xf9, 0xc4, 0x00, 0x67, 0x0f, 0xb1, 0x10, 0x00,
- 0xc0, 0xa8, 0x9b, 0xc9, 0xb0, 0xba, 0x01, 0x36, 0x61, 0x43, 0x01, 0xf4,
- 0x40, 0xa8, 0xab, 0xca, 0x9e, 0x7e, 0x0f, 0x9b, 0xc1, 0xc5, 0xc6, 0xd7,
- 0x0f, 0xd5, 0x98, 0x05, 0xc0, 0xa8, 0xbd, 0x09, 0xc0, 0xa8, 0xe0, 0x03,
- 0xc0, 0xa8, 0xea, 0x14, 0xc0, 0xa9, 0x00, 0x0e, 0xc0, 0xa9, 0x08, 0x42,
- 0x00, 0x6c, 0xc0, 0xa9, 0x1e, 0x16, 0xc0, 0xa9, 0x2a, 0x06, 0xc0, 0xa9,
- 0x45, 0x07, 0xc0, 0xa9, 0x56, 0x08, 0xc0, 0xa9, 0x62, 0x15, 0xc0, 0xa9,
- 0x6e, 0x04, 0xc0, 0xa9, 0x90, 0x42, 0x04, 0x2b, 0xc0, 0xa9, 0x9a, 0x17,
- 0xc0, 0xa9, 0xa6, 0x0b, 0xc0, 0xa9, 0xb6, 0x47, 0x30, 0xd9, 0xc0, 0xa9,
- 0xc0, 0x11, 0xc0, 0xa9, 0xcc, 0x0f, 0xc0, 0xa9, 0xe7, 0x12, 0xc0, 0xa9,
- 0xf6, 0x10, 0xc0, 0xaa, 0x00, 0x1a, 0xc0, 0xaa, 0x0c, 0x42, 0x00, 0xf6,
- 0xc0, 0xaa, 0x16, 0x49, 0x03, 0x3b, 0x40, 0xaa, 0x28, 0xce, 0x6c, 0x8f,
- 0x01, 0x1c, 0x21, 0xc6, 0x87, 0xeb, 0x01, 0x10, 0x09, 0xc7, 0x57, 0x18,
- 0x0f, 0xae, 0xe1, 0xc3, 0x1a, 0x7f, 0x0f, 0xcf, 0x68, 0x47, 0xb5, 0xe5,
- 0xc0, 0xaa, 0x34, 0x83, 0x00, 0x01, 0x60, 0x48, 0xb6, 0x95, 0xc0, 0xaa,
- 0x40, 0x42, 0x00, 0x28, 0x40, 0xaa, 0x4c, 0xd7, 0x15, 0xf2, 0x01, 0x1c,
- 0x09, 0x45, 0xca, 0x81, 0xc0, 0xaa, 0x58, 0xcc, 0x62, 0x93, 0x01, 0x11,
- 0x71, 0x44, 0x7b, 0x49, 0x40, 0xaa, 0x64, 0xc6, 0xce, 0xf5, 0x0f, 0xa3,
- 0xb9, 0xc4, 0x00, 0xcb, 0x0f, 0xb5, 0x38, 0xc9, 0xb5, 0xf7, 0x0f, 0x9c,
- 0x51, 0xcb, 0x94, 0xc0, 0x0f, 0xb0, 0xb1, 0xc9, 0x99, 0xc9, 0x0f, 0xb0,
- 0xa8, 0x00, 0x40, 0xaa, 0x70, 0xc2, 0x00, 0x35, 0x0f, 0x9b, 0x99, 0x87,
- 0x0f, 0x9b, 0x50, 0xcb, 0x95, 0x65, 0x0f, 0x89, 0x79, 0xca, 0xa4, 0x0a,
- 0x00, 0x05, 0x48, 0x15, 0xc0, 0xaa, 0x7c, 0x05, 0xc0, 0xaa, 0x88, 0x46,
- 0xd3, 0xf3, 0xc0, 0xaa, 0x94, 0x4b, 0x92, 0x21, 0xc0, 0xaa, 0xa6, 0x08,
- 0xc0, 0xaa, 0xbe, 0xd5, 0x33, 0x7c, 0x01, 0x67, 0xf8, 0xc7, 0xb2, 0x6c,
- 0x0f, 0xca, 0x11, 0xc9, 0xaf, 0x0a, 0x0f, 0x9b, 0xd8, 0x42, 0x00, 0x59,
- 0xc0, 0xaa, 0xca, 0xc3, 0x03, 0xcd, 0x01, 0x02, 0x80, 0x45, 0x09, 0x7d,
- 0xc0, 0xaa, 0xec, 0x46, 0x11, 0x01, 0x40, 0xab, 0x12, 0x46, 0x00, 0x6b,
- 0x40, 0xab, 0x2e, 0xce, 0x75, 0x09, 0x0f, 0xa2, 0x79, 0xc8, 0x77, 0x76,
- 0x0f, 0x9d, 0x60, 0x42, 0x00, 0x59, 0xc0, 0xab, 0x46, 0x00, 0x40, 0xab,
- 0xa8, 0xc6, 0xcc, 0xa9, 0x0f, 0x9d, 0x51, 0xcf, 0x6b, 0x87, 0x01, 0x50,
- 0x81, 0xcc, 0x0b, 0x3b, 0x00, 0x02, 0xf0, 0x1c, 0xc0, 0xab, 0xb4, 0x97,
- 0x09, 0x18, 0x5b, 0x00, 0xab, 0xcf, 0x16, 0xc0, 0xac, 0x0a, 0x15, 0xc0,
- 0xac, 0x26, 0x10, 0xc0, 0xac, 0x3f, 0x0f, 0xc0, 0xac, 0x5b, 0x0e, 0xc0,
- 0xac, 0x77, 0x0d, 0xc0, 0xac, 0x8c, 0x0a, 0xc0, 0xac, 0xad, 0x09, 0xc0,
- 0xac, 0xc2, 0x87, 0x09, 0x04, 0x53, 0x00, 0xac, 0xdb, 0x06, 0xc0, 0xad,
- 0x13, 0x04, 0xc0, 0xad, 0x28, 0x83, 0x09, 0x00, 0x03, 0x00, 0xad, 0x3d,
- 0x12, 0xc0, 0xad, 0x81, 0x14, 0xc0, 0xad, 0x98, 0x8b, 0x09, 0x09, 0xfa,
- 0x00, 0xad, 0xa7, 0x49, 0x1d, 0xe4, 0xc0, 0xad, 0xdd, 0xce, 0x74, 0x8b,
- 0x09, 0x23, 0x89, 0xd9, 0x1e, 0x5b, 0x09, 0x23, 0x80, 0x42, 0x04, 0x6e,
- 0xc0, 0xad, 0xef, 0x07, 0xc0, 0xad, 0xfb, 0x15, 0xc0, 0xae, 0x07, 0x08,
- 0xc0, 0xae, 0x19, 0x11, 0xc0, 0xae, 0x25, 0x16, 0x40, 0xae, 0x31, 0x42,
- 0x00, 0x2e, 0xc0, 0xae, 0x3d, 0xc9, 0xb0, 0xc3, 0x0f, 0xca, 0x60, 0x45,
- 0x3e, 0x71, 0xc0, 0xae, 0x49, 0xca, 0xa6, 0xc6, 0x0f, 0x9a, 0xd8, 0xcf,
- 0x54, 0xc6, 0x01, 0x37, 0xf1, 0xca, 0xa4, 0x6e, 0x0f, 0xcb, 0x20, 0xcc,
- 0x82, 0x54, 0x01, 0x08, 0x21, 0x45, 0x00, 0x8a, 0x40, 0xae, 0x55, 0x56,
- 0x2c, 0xc5, 0xc0, 0xae, 0x61, 0x46, 0x08, 0xbb, 0x40, 0xae, 0xc0, 0xd0,
- 0x5c, 0x12, 0x00, 0xe9, 0x59, 0xc8, 0xbc, 0x95, 0x00, 0x26, 0x01, 0xcd,
- 0x80, 0xc9, 0x05, 0x33, 0x70, 0x46, 0x02, 0x91, 0xc0, 0xae, 0xcc, 0x48,
- 0x19, 0x70, 0x40, 0xaf, 0x4b, 0x42, 0xe7, 0xcf, 0xc0, 0xaf, 0x5d, 0x1e,
- 0xc0, 0xaf, 0x65, 0x1d, 0x40, 0xaf, 0x6d, 0x19, 0xc0, 0xaf, 0x95, 0x1a,
- 0xc0, 0xaf, 0xa5, 0x1c, 0xc0, 0xaf, 0xad, 0x83, 0x08, 0x40, 0x01, 0x87,
- 0x08, 0x40, 0x09, 0x8b, 0x08, 0x40, 0x11, 0x91, 0x08, 0x40, 0x19, 0x97,
- 0x08, 0x40, 0x21, 0x0c, 0xc0, 0xaf, 0xb5, 0x0d, 0xc0, 0xaf, 0xbd, 0x0e,
- 0xc0, 0xaf, 0xd1, 0x0f, 0xc0, 0xaf, 0xe5, 0x10, 0xc0, 0xaf, 0xf9, 0x12,
- 0xc0, 0xb0, 0x0d, 0x14, 0xc0, 0xb0, 0x21, 0x15, 0xc0, 0xb0, 0x35, 0x16,
- 0x40, 0xb0, 0x49, 0xc4, 0x22, 0x71, 0x0f, 0xdf, 0xc9, 0xc4, 0x15, 0xd3,
- 0x0f, 0xdf, 0x81, 0xc3, 0x01, 0xb4, 0x0f, 0xdf, 0x89, 0x16, 0xc0, 0xb0,
- 0x5d, 0x08, 0xc0, 0xb0, 0x69, 0x15, 0xc0, 0xb0, 0x75, 0xc5, 0x01, 0xdb,
- 0x0f, 0xdf, 0xc0, 0xe0, 0x07, 0x27, 0x01, 0x51, 0x90, 0xc2, 0x00, 0xbf,
- 0x01, 0x18, 0xa1, 0xc8, 0x0b, 0x28, 0x00, 0x05, 0x38, 0xe0, 0x00, 0x67,
- 0x0f, 0xc9, 0x60, 0x47, 0xc7, 0xed, 0xc0, 0xb0, 0x81, 0x00, 0x40, 0xb0,
- 0x89, 0x48, 0x7e, 0xdb, 0xc0, 0xb0, 0xa5, 0x45, 0x00, 0xcb, 0xc0, 0xb0,
- 0xb1, 0x0e, 0xc0, 0xb0, 0xc1, 0x4b, 0x6f, 0xcc, 0xc0, 0xb0, 0xcd, 0xd6,
- 0x2d, 0x49, 0x00, 0x6f, 0xa0, 0x14, 0xc0, 0xb0, 0xe3, 0x08, 0xc0, 0xb0,
- 0xef, 0xcb, 0x1a, 0x3f, 0x0e, 0xd4, 0x59, 0x05, 0xc0, 0xb1, 0x09, 0x15,
- 0xc0, 0xb1, 0x13, 0x0e, 0xc0, 0xb1, 0x31, 0x42, 0x01, 0xc7, 0xc0, 0xb1,
- 0x3b, 0x16, 0xc0, 0xb1, 0x41, 0xdb, 0x18, 0x91, 0x0e, 0xd3, 0x79, 0x07,
- 0xc0, 0xb1, 0x4f, 0x0a, 0xc0, 0xb1, 0x61, 0x10, 0xc0, 0xb1, 0x6e, 0x42,
- 0x00, 0x82, 0xc0, 0xb1, 0x7a, 0x42, 0x02, 0x58, 0xc0, 0xb1, 0x86, 0x44,
- 0x8a, 0xc6, 0xc0, 0xb1, 0x92, 0x06, 0xc0, 0xb1, 0x9e, 0x46, 0xd2, 0x8b,
- 0x40, 0xb1, 0xaa, 0xe0, 0x05, 0xe7, 0x01, 0x39, 0xf1, 0x47, 0x08, 0xea,
- 0x40, 0xb1, 0xbc, 0x4b, 0x6f, 0xcc, 0xc0, 0xb1, 0xce, 0x47, 0x02, 0x90,
- 0xc0, 0xb1, 0xf1, 0x15, 0xc0, 0xb2, 0x58, 0xd0, 0x5c, 0xf2, 0x08, 0xae,
- 0x49, 0x50, 0x5d, 0x92, 0xc0, 0xb2, 0x62, 0x06, 0x40, 0xb2, 0x6e, 0x46,
- 0x05, 0xef, 0xc0, 0xb2, 0x7a, 0x46, 0x02, 0x31, 0x40, 0xb2, 0x92, 0xc9,
- 0x0a, 0x4a, 0x01, 0x54, 0xe9, 0xcc, 0x00, 0x9b, 0x01, 0x54, 0xf0, 0xdb,
- 0x18, 0xac, 0x01, 0x54, 0xf9, 0xde, 0x0e, 0x70, 0x01, 0x55, 0x00, 0xcb,
- 0x74, 0x48, 0x0f, 0xb4, 0x11, 0xc8, 0xb8, 0x65, 0x0f, 0x9a, 0xe0, 0xc3,
- 0x00, 0xb6, 0x0f, 0xb4, 0x49, 0xcd, 0x80, 0xbc, 0x0f, 0xaf, 0xe8, 0x00,
- 0xc0, 0xb2, 0xaa, 0x45, 0x31, 0x58, 0x40, 0xb2, 0xc0, 0xc6, 0xd1, 0xb9,
- 0x01, 0x34, 0xd1, 0xcb, 0x98, 0x04, 0x01, 0x34, 0xa8, 0x44, 0x01, 0xdc,
- 0xc0, 0xb2, 0xdc, 0xc6, 0xd3, 0x45, 0x0f, 0x9a, 0x98, 0xd2, 0x47, 0x64,
- 0x01, 0x13, 0x19, 0xcd, 0x7d, 0xa3, 0x00, 0x04, 0xe0, 0x45, 0x00, 0x6c,
- 0xc0, 0xb2, 0xe8, 0x48, 0xb9, 0x5d, 0x40, 0xb2, 0xf4, 0xc7, 0xc6, 0x57,
- 0x0f, 0xce, 0x11, 0xc3, 0x06, 0xda, 0x01, 0x30, 0x98, 0x45, 0x00, 0xcb,
- 0xc0, 0xb3, 0x00, 0x4b, 0x6f, 0xcc, 0xc0, 0xb3, 0x12, 0x47, 0x02, 0x90,
- 0xc0, 0xb3, 0x38, 0xd4, 0x3d, 0x2d, 0x05, 0x45, 0xa1, 0x06, 0x40, 0xb3,
- 0xa3, 0xd4, 0x10, 0xe9, 0x0f, 0xb3, 0xd1, 0x46, 0x10, 0x37, 0x40, 0xb3,
- 0xb5, 0xc8, 0xbb, 0xed, 0x0f, 0xa7, 0x08, 0x03, 0xc0, 0xb3, 0xc1, 0x15,
- 0xc0, 0xb3, 0xd7, 0xc4, 0xe0, 0x3f, 0x00, 0x41, 0xd9, 0x1c, 0xc0, 0xb3,
- 0xe3, 0xc5, 0x7f, 0x31, 0x00, 0x41, 0xc9, 0xcd, 0x7f, 0x29, 0x00, 0x41,
- 0xb9, 0xc3, 0xa6, 0xf6, 0x00, 0x41, 0x99, 0xc7, 0xc3, 0x39, 0x00, 0x41,
- 0x80, 0x44, 0x00, 0xbb, 0xc0, 0xb3, 0xef, 0x4f, 0x0f, 0x25, 0x40, 0xb4,
- 0x10, 0x15, 0xc0, 0xb4, 0x20, 0x91, 0x00, 0x41, 0x5b, 0x00, 0xb4, 0x2c,
- 0x8b, 0x00, 0x41, 0x51, 0x45, 0x2b, 0xd3, 0xc0, 0xb4, 0x35, 0x97, 0x00,
- 0x41, 0x39, 0x83, 0x00, 0x41, 0x1b, 0x00, 0xb4, 0x48, 0x87, 0x00, 0x40,
- 0xe8, 0x16, 0xc0, 0xb4, 0x4c, 0x15, 0xc0, 0xb4, 0x5e, 0xc4, 0x4b, 0x86,
- 0x00, 0x40, 0x99, 0xc3, 0xe6, 0x9a, 0x00, 0x40, 0x91, 0xc2, 0x00, 0x60,
- 0x00, 0x40, 0x81, 0x0b, 0xc0, 0xb4, 0x6a, 0xc3, 0x1f, 0xd8, 0x00, 0x40,
- 0x69, 0xc3, 0x85, 0xc2, 0x00, 0x40, 0x61, 0xc5, 0xa2, 0xdd, 0x00, 0x40,
- 0x59, 0xc4, 0xe2, 0xdf, 0x00, 0x40, 0x51, 0xc3, 0x6d, 0xce, 0x00, 0x40,
- 0x49, 0xc3, 0x08, 0xc2, 0x00, 0x40, 0x31, 0x04, 0xc0, 0xb4, 0x76, 0xc5,
- 0x4b, 0x7f, 0x00, 0x40, 0x19, 0xc5, 0xd7, 0x4b, 0x00, 0x40, 0x11, 0xc4,
- 0xdf, 0x9e, 0x00, 0x40, 0x00, 0xcf, 0x44, 0x12, 0x01, 0x31, 0x00, 0x8a,
- 0x0f, 0xcd, 0x29, 0xc8, 0x46, 0x1a, 0x0f, 0x9d, 0x80, 0x87, 0x01, 0x19,
- 0x99, 0x4a, 0x25, 0xc4, 0x40, 0xb4, 0x82, 0x44, 0x00, 0x34, 0xc0, 0xb4,
- 0x8e, 0xc6, 0xc6, 0xc1, 0x0f, 0xb1, 0x50, 0xcc, 0x83, 0xbc, 0x0f, 0xb2,
- 0x11, 0xcd, 0x7d, 0x21, 0x0f, 0xb2, 0x08, 0x4c, 0x21, 0xc0, 0xc0, 0xb4,
- 0xa0, 0x53, 0x45, 0x51, 0x40, 0xb4, 0xb2, 0x8d, 0x0f, 0xcc, 0x41, 0x44,
- 0x41, 0xc9, 0x40, 0xb4, 0xbe, 0xc6, 0x01, 0x7a, 0x01, 0x3a, 0x69, 0xc4,
- 0x0e, 0xa8, 0x01, 0x39, 0x81, 0xcb, 0x90, 0x32, 0x01, 0x38, 0xf0, 0xc6,
- 0xd4, 0x17, 0x0f, 0x9b, 0x39, 0x4b, 0x8b, 0xd9, 0x40, 0xb4, 0xee, 0x4c,
- 0x8b, 0xfc, 0xc0, 0xb5, 0x6e, 0xc4, 0x29, 0xf3, 0x0f, 0x9b, 0x81, 0x00,
- 0xc0, 0xb5, 0x86, 0x95, 0x0f, 0xd3, 0x98, 0xc4, 0xe3, 0x1b, 0x0f, 0xb6,
- 0x69, 0xc7, 0xc7, 0xfb, 0x0f, 0xb6, 0x90, 0xc2, 0x00, 0x34, 0x00, 0x00,
- 0x79, 0xc3, 0x00, 0x83, 0x00, 0x00, 0x70, 0xc2, 0x00, 0xb7, 0x0f, 0xcc,
- 0x11, 0xc2, 0x05, 0xd5, 0x01, 0x32, 0x78, 0x46, 0x04, 0x73, 0xc0, 0xb5,
- 0xae, 0x48, 0x07, 0x17, 0xc0, 0xb5, 0xbe, 0xd4, 0x19, 0x6f, 0x0f, 0xb3,
- 0x80, 0xc2, 0x00, 0x4d, 0x0f, 0xad, 0xa9, 0xc7, 0xc5, 0x00, 0x0f, 0xd4,
- 0xd8, 0xcd, 0x77, 0xe6, 0x01, 0x36, 0x20, 0x45, 0x16, 0xbc, 0xc0, 0xb5,
- 0xe2, 0x45, 0x1f, 0xc8, 0x40, 0xb6, 0x12, 0xd0, 0x0d, 0x8c, 0x0f, 0xb3,
- 0x58, 0xcd, 0x76, 0x60, 0x01, 0x4f, 0xb0, 0x9f, 0x08, 0xd5, 0x11, 0x9e,
- 0x08, 0xd5, 0x08, 0x45, 0x00, 0x56, 0x40, 0xb6, 0x42, 0xc5, 0xd8, 0x3b,
- 0x08, 0xd4, 0xe9, 0xcb, 0x97, 0x80, 0x08, 0xd4, 0xe1, 0xc4, 0x02, 0x28,
- 0x08, 0xd4, 0xd9, 0xc5, 0x33, 0x45, 0x08, 0xd4, 0xd0, 0xc8, 0xb9, 0x25,
- 0x08, 0xd4, 0xc9, 0x44, 0x00, 0xcc, 0x40, 0xb6, 0x4e, 0xc2, 0x00, 0x02,
- 0x08, 0xd4, 0xa9, 0x95, 0x08, 0xd4, 0xa3, 0x00, 0xb6, 0x66, 0x8e, 0x08,
- 0xd4, 0x91, 0x94, 0x08, 0xd4, 0x89, 0x8f, 0x08, 0xd4, 0x81, 0x84, 0x08,
- 0xd4, 0x79, 0x90, 0x08, 0xd4, 0x73, 0x00, 0xb6, 0x6a, 0x86, 0x08, 0xd4,
- 0x69, 0x8d, 0x08, 0xd4, 0x59, 0x89, 0x08, 0xd4, 0x50, 0x15, 0xc0, 0xb6,
- 0x6e, 0xc2, 0x00, 0xc7, 0x08, 0xd4, 0x39, 0xc2, 0x02, 0x59, 0x08, 0xd4,
- 0x30, 0x0d, 0xc0, 0xb6, 0x78, 0xc2, 0x00, 0xa4, 0x08, 0xd4, 0x11, 0x15,
- 0xc0, 0xb6, 0x88, 0xc2, 0x04, 0x41, 0x08, 0xd3, 0xf1, 0xc2, 0x00, 0xc7,
- 0x08, 0xd3, 0xe9, 0xc2, 0x02, 0x59, 0x08, 0xd3, 0xe1, 0xc2, 0x1d, 0x5f,
- 0x08, 0xd3, 0xd9, 0xc2, 0x00, 0x02, 0x08, 0xd3, 0xd1, 0x1c, 0xc0, 0xb6,
- 0x98, 0x06, 0xc0, 0xb6, 0xa2, 0x16, 0xc0, 0xb6, 0xb6, 0xc2, 0x00, 0xad,
- 0x08, 0xd3, 0xa1, 0x04, 0xc0, 0xb6, 0xc8, 0x12, 0xc0, 0xb6, 0xd2, 0x10,
- 0xc0, 0xb6, 0xdc, 0x0c, 0xc0, 0xb6, 0xf2, 0x05, 0xc0, 0xb6, 0xfc, 0x09,
- 0xc0, 0xb7, 0x06, 0x83, 0x08, 0xd2, 0x80, 0xcb, 0x34, 0xc1, 0x08, 0xd2,
- 0xd9, 0x45, 0x00, 0xcb, 0x40, 0xb7, 0x10, 0xd1, 0x37, 0x9a, 0x0f, 0xad,
- 0x61, 0xc9, 0xaf, 0xac, 0x0f, 0x9b, 0x31, 0xc6, 0x5a, 0xa2, 0x00, 0x05,
- 0x68, 0xc4, 0x22, 0x71, 0x08, 0x87, 0xc9, 0xc5, 0x01, 0xdb, 0x08, 0x87,
- 0xc1, 0x15, 0xc0, 0xb7, 0x30, 0x08, 0xc0, 0xb7, 0x3c, 0x16, 0xc0, 0xb7,
- 0x48, 0xc3, 0x01, 0xb4, 0x08, 0x87, 0x89, 0xc4, 0x15, 0xd3, 0x08, 0x87,
- 0x80, 0x42, 0x00, 0xad, 0xc0, 0xb7, 0x54, 0x07, 0xc0, 0xb7, 0x5c, 0xc2,
- 0x22, 0x1f, 0x08, 0x87, 0x31, 0xc2, 0x6c, 0xa9, 0x08, 0x87, 0x29, 0xc2,
- 0x14, 0x44, 0x08, 0x87, 0x21, 0xc2, 0x02, 0x98, 0x08, 0x87, 0x11, 0x10,
- 0xc0, 0xb7, 0x66, 0xc3, 0xe7, 0x7e, 0x08, 0x87, 0x01, 0xc3, 0xc4, 0x45,
- 0x08, 0x86, 0xf9, 0xc3, 0x19, 0xa7, 0x08, 0x86, 0xf1, 0xc3, 0x14, 0x96,
- 0x08, 0x86, 0xe9, 0xc3, 0x46, 0x7e, 0x08, 0x86, 0xe1, 0xc3, 0x3b, 0x73,
- 0x08, 0x86, 0xd9, 0xc3, 0xcb, 0x77, 0x08, 0x86, 0xd1, 0xc3, 0x14, 0x8f,
- 0x08, 0x86, 0xc1, 0xc3, 0xac, 0xb2, 0x08, 0x86, 0xa9, 0xc3, 0x6e, 0x30,
- 0x08, 0x86, 0xa1, 0xc3, 0xe6, 0x5e, 0x08, 0x86, 0x99, 0xc3, 0x47, 0x28,
- 0x08, 0x86, 0x91, 0xc3, 0x02, 0x97, 0x08, 0x86, 0x89, 0xc3, 0xdd, 0x6f,
- 0x08, 0x86, 0x80, 0xd4, 0x3e, 0x95, 0x08, 0x7a, 0xc9, 0x44, 0x01, 0x59,
- 0xc0, 0xb7, 0x78, 0xcf, 0x3e, 0x9a, 0x08, 0x7a, 0xb8, 0xc3, 0x01, 0xb4,
- 0x08, 0x7a, 0x8b, 0x00, 0xb7, 0x87, 0x16, 0x40, 0xb7, 0x8d, 0xcc, 0x02,
- 0xbb, 0x08, 0x7a, 0x81, 0xca, 0xa3, 0x38, 0x08, 0x7a, 0x79, 0xcf, 0x6c,
- 0x1d, 0x08, 0x7a, 0x71, 0x45, 0x10, 0xd5, 0xc0, 0xb7, 0x99, 0x46, 0x0d,
- 0xe6, 0xc0, 0xb7, 0xa5, 0x49, 0x01, 0x59, 0xc0, 0xb7, 0xb1, 0x44, 0x01,
- 0xb8, 0x40, 0xb7, 0xbd, 0x0e, 0xc0, 0xb7, 0xc9, 0xc4, 0xe1, 0xcb, 0x08,
- 0x7a, 0x19, 0xc3, 0x26, 0xf9, 0x08, 0x7a, 0x11, 0x15, 0xc0, 0xb7, 0xd5,
- 0xc9, 0x5d, 0xe2, 0x08, 0x7a, 0x01, 0xc2, 0x00, 0x27, 0x08, 0x79, 0xf1,
- 0x03, 0xc0, 0xb7, 0xdf, 0xc3, 0x1f, 0xd8, 0x08, 0x79, 0xd9, 0xc3, 0x0b,
- 0x0e, 0x08, 0x79, 0xd1, 0xc4, 0xe2, 0x57, 0x08, 0x79, 0xc1, 0xc4, 0x4b,
- 0x98, 0x08, 0x79, 0xb9, 0xc2, 0x01, 0xf0, 0x08, 0x79, 0x9b, 0x00, 0xb7,
- 0xeb, 0xc5, 0x4b, 0x92, 0x08, 0x79, 0xa9, 0xc3, 0x78, 0xa9, 0x08, 0x79,
- 0xa1, 0xc5, 0xa1, 0x94, 0x08, 0x79, 0x91, 0xc4, 0xe4, 0x8f, 0x08, 0x79,
- 0x88, 0x00, 0xc0, 0xb7, 0xf1, 0x42, 0x00, 0x59, 0x40, 0xb8, 0x4d, 0xcd,
- 0x76, 0x39, 0x0f, 0xaa, 0x29, 0x15, 0xc0, 0xb8, 0xa5, 0x06, 0xc0, 0xb8,
- 0xcc, 0x10, 0xc0, 0xb8, 0xd6, 0xce, 0x6e, 0x17, 0x01, 0x20, 0xf9, 0xd0,
- 0x5d, 0xa2, 0x01, 0x20, 0xf1, 0xcf, 0x66, 0xc8, 0x01, 0x20, 0xe9, 0x08,
- 0xc0, 0xb8, 0xe0, 0x07, 0xc0, 0xb8, 0xec, 0x42, 0x03, 0x40, 0xc0, 0xb8,
- 0xf6, 0xd3, 0x47, 0x06, 0x01, 0x20, 0x59, 0xc9, 0x19, 0xeb, 0x01, 0x20,
- 0x51, 0xd5, 0x33, 0x13, 0x01, 0x20, 0x49, 0x04, 0xc0, 0xb9, 0x02, 0xcb,
- 0x4d, 0x89, 0x01, 0x20, 0x31, 0xd2, 0x47, 0xf4, 0x01, 0x5c, 0xb8, 0x47,
- 0x02, 0x90, 0xc0, 0xb9, 0x0e, 0x0a, 0xc0, 0xb9, 0x80, 0x4d, 0x79, 0xd4,
- 0xc0, 0xb9, 0x92, 0x14, 0xc0, 0xb9, 0x9e, 0x47, 0xc2, 0xd0, 0xc0, 0xb9,
- 0xb0, 0x47, 0xcb, 0xc8, 0xc0, 0xb9, 0xc2, 0xd1, 0x49, 0xa4, 0x00, 0x38,
- 0x79, 0x42, 0x00, 0x79, 0xc0, 0xb9, 0xd4, 0x42, 0x08, 0x22, 0xc0, 0xb9,
- 0xe0, 0x07, 0xc0, 0xb9, 0xec, 0xc7, 0xc5, 0xbd, 0x00, 0x3a, 0x51, 0xc5,
- 0x22, 0x43, 0x00, 0x3a, 0x49, 0xcc, 0x85, 0x78, 0x00, 0x3a, 0x01, 0xc9,
- 0xb6, 0x3f, 0x00, 0x3a, 0x09, 0x16, 0xc0, 0xb9, 0xf8, 0x4d, 0x7a, 0x56,
- 0x40, 0xba, 0x04, 0x83, 0x05, 0x40, 0x01, 0x8b, 0x05, 0x40, 0x09, 0x97,
- 0x05, 0x40, 0x19, 0x87, 0x05, 0x40, 0x21, 0x91, 0x05, 0x40, 0x29, 0x0d,
- 0xc0, 0xba, 0x10, 0x09, 0xc0, 0xba, 0x1a, 0x05, 0xc0, 0xba, 0x24, 0x16,
- 0xc0, 0xba, 0x2e, 0x06, 0xc0, 0xba, 0x3c, 0xc2, 0x00, 0xf6, 0x05, 0x41,
- 0x11, 0x0c, 0xc0, 0xba, 0x4a, 0xc2, 0x00, 0x10, 0x05, 0x40, 0xc1, 0x12,
- 0xc0, 0xba, 0x54, 0x04, 0xc0, 0xba, 0x5e, 0xc2, 0x00, 0x82, 0x05, 0x40,
- 0xe9, 0x14, 0xc0, 0xba, 0x68, 0xc2, 0x01, 0x64, 0x05, 0x40, 0xf9, 0xc2,
- 0x05, 0x7b, 0x05, 0x41, 0x08, 0xc8, 0xbe, 0xa5, 0x05, 0x40, 0x11, 0xc7,
- 0x5d, 0x4b, 0x05, 0x40, 0x31, 0x03, 0x40, 0xba, 0x72, 0x83, 0x05, 0x41,
- 0x19, 0x8b, 0x05, 0x41, 0x21, 0x97, 0x05, 0x41, 0x29, 0x87, 0x05, 0x41,
- 0x31, 0xc2, 0x02, 0x84, 0x05, 0x41, 0x38, 0x9e, 0x05, 0x41, 0x41, 0x9f,
- 0x05, 0x41, 0x49, 0xa0, 0x05, 0x41, 0x51, 0xa1, 0x05, 0x41, 0x58, 0xca,
- 0xa0, 0xfe, 0x0f, 0xa5, 0x61, 0xc5, 0xd7, 0xe6, 0x0f, 0xb5, 0x20, 0xd6,
- 0x2d, 0x33, 0x0f, 0xaf, 0x19, 0xc2, 0x00, 0x28, 0x0f, 0xa8, 0x43, 0x00,
- 0xba, 0x7e, 0xcf, 0x61, 0x28, 0x0f, 0xb2, 0x50, 0x87, 0x01, 0x3a, 0x3b,
- 0x00, 0xba, 0x84, 0xc9, 0x7b, 0xdf, 0x0f, 0xa4, 0xb0, 0xc2, 0x01, 0xc7,
- 0x01, 0x4d, 0x09, 0xc4, 0x01, 0x1e, 0x01, 0x4d, 0x00, 0xcc, 0x8c, 0x08,
- 0x0f, 0xae, 0x99, 0xc8, 0xb7, 0xc5, 0x0f, 0xae, 0x91, 0xc5, 0x0a, 0x91,
- 0x0f, 0xa0, 0xd0, 0xc4, 0xe5, 0x4f, 0x0f, 0xab, 0xc0, 0x90, 0x0f, 0xca,
- 0x21, 0xcb, 0x8f, 0xcf, 0x0f, 0xcf, 0xa8, 0x43, 0x02, 0x5d, 0xc0, 0xba,
- 0x88, 0x46, 0x0a, 0x0f, 0x40, 0xba, 0xa9, 0xcc, 0x88, 0x60, 0x01, 0x36,
- 0x29, 0xc9, 0xab, 0x98, 0x0f, 0x98, 0xf0, 0x52, 0x49, 0x80, 0xc0, 0xba,
- 0xe1, 0x47, 0x02, 0x90, 0xc0, 0xbb, 0x09, 0xc8, 0x76, 0x58, 0x00, 0xdd,
- 0xd1, 0x46, 0x06, 0x97, 0xc0, 0xbb, 0x93, 0x51, 0x52, 0x82, 0xc0, 0xbb,
- 0xb7, 0x45, 0x00, 0xcb, 0xc0, 0xbb, 0xc9, 0x4d, 0x7e, 0xa7, 0x40, 0xbb,
- 0xd5, 0xcf, 0x68, 0x7b, 0x0f, 0x98, 0x20, 0xd5, 0x37, 0x03, 0x01, 0x17,
- 0x49, 0xce, 0x70, 0xa9, 0x01, 0x15, 0x89, 0x46, 0x23, 0x35, 0xc0, 0xbb,
- 0xdf, 0x46, 0x00, 0x95, 0x40, 0xbb, 0xeb, 0xc2, 0x00, 0xf3, 0x01, 0x14,
- 0x13, 0x00, 0xbc, 0x03, 0x46, 0x00, 0x95, 0xc0, 0xbc, 0x07, 0x45, 0x00,
- 0x6c, 0x40, 0xbc, 0x13, 0xd1, 0x1a, 0x39, 0x01, 0x04, 0x71, 0xd0, 0x1e,
- 0xbf, 0x01, 0x04, 0x69, 0x07, 0xc0, 0xbc, 0x25, 0xc5, 0x1b, 0x38, 0x01,
- 0x04, 0x59, 0xc9, 0x68, 0x21, 0x01, 0x04, 0x51, 0xc4, 0x22, 0x71, 0x01,
- 0x04, 0x49, 0x15, 0xc0, 0xbc, 0x31, 0x08, 0xc0, 0xbc, 0x3d, 0x16, 0xc0,
- 0xbc, 0x49, 0xc3, 0x01, 0xb4, 0x01, 0x04, 0x09, 0xc4, 0x15, 0xd3, 0x01,
- 0x04, 0x00, 0x87, 0x01, 0x19, 0x19, 0x44, 0x00, 0x34, 0x40, 0xbc, 0x55,
- 0x00, 0xc0, 0xbc, 0x61, 0xc7, 0xc1, 0x41, 0x01, 0x55, 0x52, 0x00, 0xbc,
- 0xc3, 0x46, 0xd3, 0x03, 0xc0, 0xbc, 0xc9, 0xca, 0x9e, 0x56, 0x00, 0x04,
- 0xf0, 0x16, 0xc0, 0xbc, 0xd1, 0xc2, 0x00, 0x69, 0x0f, 0xc9, 0xa2, 0x00,
- 0xbc, 0xe0, 0xc6, 0x1f, 0x68, 0x01, 0x11, 0xbb, 0x00, 0xbc, 0xe6, 0xc9,
- 0xb5, 0x8b, 0x01, 0x0a, 0x50, 0x00, 0x40, 0xbc, 0xec, 0xcd, 0x7a, 0xbe,
- 0x01, 0x08, 0xf1, 0x5b, 0x19, 0x4e, 0x40, 0xbd, 0x04, 0xc5, 0x2a, 0x69,
- 0x0f, 0xc9, 0x81, 0xc3, 0x14, 0x99, 0x0f, 0xd6, 0x19, 0xc6, 0x15, 0x35,
- 0x0f, 0xd6, 0x20, 0xc3, 0x01, 0x0a, 0x0f, 0xd5, 0x39, 0x45, 0x3f, 0x4d,
- 0x40, 0xbd, 0x3c, 0xcc, 0x84, 0xb8, 0x01, 0x08, 0x78, 0x49, 0xb0, 0x7b,
- 0xc0, 0xbd, 0x48, 0xcc, 0x8a, 0xe8, 0x0f, 0xb6, 0xe8, 0x46, 0x19, 0x56,
- 0x40, 0xbd, 0x86, 0xc5, 0x00, 0xca, 0x00, 0x01, 0x5b, 0x00, 0xbd, 0x8e,
- 0xcb, 0x96, 0xba, 0x00, 0x05, 0x88, 0xc8, 0x2a, 0x73, 0x0f, 0xc8, 0x79,
- 0xca, 0xa3, 0xec, 0x0f, 0xc8, 0x60, 0xcb, 0x96, 0x83, 0x0f, 0x9c, 0x69,
- 0xc5, 0xdf, 0xc5, 0x0f, 0x9a, 0x68, 0xc4, 0x12, 0xa9, 0x0f, 0xa1, 0xe9,
- 0xc4, 0x00, 0x67, 0x0f, 0xa1, 0xb8, 0xd0, 0x5e, 0xe2, 0x01, 0x1c, 0x91,
- 0xd2, 0x49, 0xb6, 0x01, 0x1c, 0x88, 0xc8, 0x1c, 0xd5, 0x01, 0x5f, 0xe9,
- 0xc9, 0xb0, 0x57, 0x0f, 0xb7, 0x98, 0x94, 0x0f, 0xa6, 0xf9, 0x00, 0xc0,
- 0xbd, 0x92, 0x95, 0x0f, 0xae, 0x80, 0x43, 0x04, 0x18, 0xc0, 0xbd, 0x9e,
- 0xc8, 0xb8, 0x95, 0x0f, 0x9c, 0x49, 0xd1, 0x56, 0xc2, 0x01, 0x81, 0xe9,
- 0xcc, 0x82, 0xd8, 0x01, 0x92, 0x80, 0x46, 0x07, 0x11, 0xc0, 0xbd, 0xa8,
- 0x47, 0x33, 0xef, 0xc0, 0xbd, 0xb4, 0x46, 0x06, 0x97, 0xc0, 0xbd, 0xca,
- 0x47, 0xca, 0x86, 0xc0, 0xbd, 0xe8, 0x52, 0x4d, 0x16, 0xc0, 0xbe, 0x2e,
- 0x4a, 0x9f, 0x64, 0x40, 0xbe, 0x3a, 0x45, 0x6a, 0xdb, 0xc0, 0xbe, 0x78,
- 0x45, 0x08, 0x54, 0xc0, 0xbe, 0x84, 0xc5, 0xd7, 0x82, 0x0f, 0xd4, 0x10,
- 0x00, 0x40, 0xbe, 0x96, 0xcf, 0x67, 0x7c, 0x08, 0xd7, 0xa3, 0x00, 0xbe,
- 0xa2, 0x46, 0x02, 0x91, 0x40, 0xbe, 0xa6, 0x00, 0x40, 0xbf, 0x14, 0xc4,
- 0x0c, 0x46, 0x08, 0xd7, 0x63, 0x00, 0xbf, 0x20, 0xcc, 0x21, 0x78, 0x08,
- 0xd7, 0x3a, 0x00, 0xbf, 0x24, 0x00, 0x40, 0xbf, 0x2a, 0x00, 0xc0, 0xbf,
- 0x39, 0x46, 0xcd, 0xf3, 0xc0, 0xbf, 0x51, 0xcd, 0x7b, 0x19, 0x0f, 0xc9,
- 0x90, 0x49, 0xad, 0x3f, 0xc0, 0xbf, 0x63, 0x49, 0x2e, 0xc0, 0x40, 0xbf,
- 0x95, 0x44, 0xab, 0xee, 0xc0, 0xbf, 0xd7, 0x0f, 0xc0, 0xbf, 0xf1, 0xc3,
- 0x07, 0x42, 0x0b, 0x5b, 0x81, 0x16, 0xc0, 0xbf, 0xfd, 0xc2, 0x07, 0x8d,
- 0x0b, 0x5b, 0x61, 0x10, 0xc0, 0xc0, 0x0f, 0x1a, 0xc0, 0xc0, 0x1b, 0x0a,
- 0xc0, 0xc0, 0x2b, 0xc8, 0xbe, 0xfd, 0x0b, 0x5b, 0x39, 0x44, 0xe0, 0x63,
- 0xc0, 0xc0, 0x37, 0xc6, 0xd1, 0x7d, 0x0b, 0x5a, 0x18, 0x16, 0xc0, 0xc0,
- 0x53, 0x47, 0x0c, 0x4b, 0xc0, 0xc0, 0x5f, 0xc8, 0x32, 0x88, 0x0b, 0x5a,
- 0xf0, 0xc4, 0x22, 0x71, 0x0b, 0x5a, 0xc9, 0xc5, 0x01, 0xdb, 0x0b, 0x5a,
- 0xc1, 0x15, 0xc0, 0xc0, 0x69, 0x08, 0xc0, 0xc0, 0x75, 0x16, 0xc0, 0xc0,
- 0x81, 0xc3, 0x01, 0xb4, 0x0b, 0x5a, 0x89, 0xc4, 0x15, 0xd3, 0x0b, 0x5a,
- 0x80, 0x16, 0xc0, 0xc0, 0x8d, 0xc3, 0xe1, 0x73, 0x0b, 0x59, 0xa9, 0x15,
- 0xc0, 0xc0, 0x99, 0x0d, 0x40, 0xc0, 0xa3, 0x03, 0xc0, 0xc0, 0xaf, 0x19,
- 0xc0, 0xc0, 0xc7, 0x0b, 0xc0, 0xc0, 0xcf, 0x11, 0xc0, 0xc0, 0xdb, 0x17,
- 0xc0, 0xc0, 0xe7, 0x07, 0x40, 0xc0, 0xf3, 0xd0, 0x3d, 0xa9, 0x0f, 0xb5,
- 0x81, 0xc2, 0x02, 0x18, 0x0f, 0xca, 0xa0, 0xc8, 0x19, 0xb7, 0x0f, 0xb1,
- 0xf1, 0xc4, 0x00, 0x67, 0x0f, 0xb1, 0x08, 0xcb, 0x94, 0x94, 0x01, 0x1f,
- 0xf1, 0xc5, 0x00, 0x72, 0x01, 0x1f, 0xd8, 0xc7, 0x00, 0x70, 0x01, 0x1f,
- 0xe9, 0xcb, 0x96, 0x4c, 0x01, 0x1f, 0xe0, 0x43, 0x00, 0x92, 0xc0, 0xc0,
- 0xff, 0xc3, 0x36, 0x3a, 0x0f, 0xa7, 0x70, 0xc7, 0x05, 0x7a, 0x01, 0x03,
- 0x49, 0xca, 0xa0, 0xd6, 0x01, 0x01, 0x60, 0xd1, 0x56, 0xd3, 0x0f, 0xb5,
- 0x40, 0xc7, 0x00, 0x6b, 0x01, 0x57, 0x08, 0x42, 0x00, 0xb7, 0xc0, 0xc1,
- 0x0e, 0xc7, 0xc5, 0x54, 0x01, 0x18, 0x31, 0xcc, 0x83, 0x8c, 0x0f, 0xb1,
- 0x18, 0xc4, 0x00, 0xcb, 0x01, 0x0a, 0x61, 0xd1, 0x4f, 0xfc, 0x01, 0x01,
- 0x89, 0xca, 0xa7, 0x52, 0x01, 0x01, 0x80, 0xc8, 0x14, 0x82, 0x01, 0x31,
- 0x71, 0x8a, 0x0f, 0x9a, 0x89, 0xc3, 0x01, 0xe4, 0x0f, 0xcc, 0xd0, 0xc4,
- 0x04, 0x5e, 0x08, 0x5d, 0x59, 0x19, 0xc0, 0xc1, 0x18, 0xc2, 0x00, 0x4d,
- 0x08, 0x5d, 0x68, 0xc8, 0x0c, 0x4a, 0x08, 0x5d, 0x78, 0xc3, 0x0d, 0xd9,
- 0x08, 0x5c, 0x81, 0x03, 0x40, 0xc1, 0x22, 0xc2, 0x00, 0x6e, 0x08, 0x5c,
- 0x38, 0xce, 0x75, 0xb1, 0x08, 0x48, 0xf9, 0x47, 0x33, 0xef, 0xc0, 0xc1,
- 0x2e, 0x47, 0x02, 0x90, 0x40, 0xc1, 0x3b, 0x47, 0x02, 0x90, 0xc0, 0xc1,
- 0x9e, 0x15, 0xc0, 0xc2, 0x24, 0xd0, 0x5e, 0x02, 0x05, 0x43, 0xa9, 0x45,
- 0x00, 0xba, 0x40, 0xc2, 0x2e, 0x12, 0xc0, 0xc2, 0x3a, 0x16, 0xc0, 0xc2,
- 0x4a, 0x05, 0xc0, 0xc2, 0x5c, 0x19, 0xc0, 0xc2, 0x70, 0x0a, 0xc0, 0xc2,
- 0x7c, 0x04, 0xc0, 0xc2, 0x8e, 0x15, 0xc0, 0xc2, 0xa1, 0x42, 0x00, 0xad,
- 0xc0, 0xc2, 0xbf, 0x42, 0x00, 0x8a, 0xc0, 0xc2, 0xcb, 0x42, 0x01, 0x4a,
- 0xc0, 0xc2, 0xd5, 0x14, 0xc0, 0xc2, 0xe1, 0xc5, 0xd6, 0x8d, 0x08, 0x0f,
- 0x71, 0xc4, 0xac, 0x43, 0x08, 0x0f, 0x99, 0xc7, 0xc9, 0xde, 0x08, 0x0f,
- 0xb9, 0x09, 0xc0, 0xc2, 0xed, 0xc5, 0x01, 0x0f, 0x08, 0x0e, 0xc9, 0xc5,
- 0xd7, 0xd2, 0x08, 0x0f, 0xc0, 0xc6, 0x57, 0xf2, 0x00, 0x04, 0x81, 0xc4,
- 0x06, 0x9d, 0x00, 0x00, 0xa1, 0x16, 0xc0, 0xc2, 0xf9, 0xc3, 0x01, 0xb4,
- 0x00, 0x00, 0x88, 0x15, 0xc0, 0xc3, 0x05, 0x44, 0x01, 0xf0, 0xc0, 0xc3,
- 0x11, 0x03, 0xc0, 0xc3, 0x20, 0x09, 0xc0, 0xc3, 0x2c, 0xc2, 0x00, 0x3a,
- 0x00, 0x4a, 0x81, 0x4b, 0x6f, 0xcc, 0xc0, 0xc3, 0x38, 0x47, 0x02, 0x90,
- 0xc0, 0xc3, 0x6d, 0xc7, 0xc9, 0x67, 0x05, 0x47, 0xe9, 0xc5, 0x9a, 0x82,
- 0x05, 0x47, 0xd1, 0x06, 0x40, 0xc3, 0xe2, 0xc6, 0xd4, 0x2f, 0x0f, 0xae,
- 0xa1, 0xc8, 0x3f, 0xdd, 0x0f, 0xad, 0x28, 0x96, 0x0f, 0x9e, 0xe3, 0x00,
- 0xc3, 0xf4, 0x43, 0x02, 0x5d, 0x40, 0xc3, 0xfa, 0x44, 0x06, 0xca, 0xc0,
- 0xc4, 0x06, 0xca, 0xa9, 0x00, 0x0f, 0x99, 0x98, 0x44, 0x00, 0x57, 0xc0,
- 0xc4, 0x12, 0x45, 0x00, 0x6c, 0x40, 0xc4, 0x24, 0x46, 0x00, 0x6b, 0x40,
- 0xc4, 0x30, 0x46, 0x00, 0x6b, 0x40, 0xc4, 0x42, 0xc5, 0x65, 0xf1, 0x0e,
- 0x98, 0x2b, 0x00, 0xc4, 0x54, 0x0a, 0xc0, 0xc4, 0x5a, 0x49, 0xb6, 0x63,
- 0xc0, 0xc4, 0x66, 0x48, 0xb6, 0xfd, 0x40, 0xc4, 0x72, 0xc4, 0x22, 0x71,
- 0x00, 0x01, 0xcb, 0x00, 0xc4, 0x7e, 0xc5, 0x01, 0xdb, 0x00, 0x01, 0xc3,
- 0x00, 0xc4, 0x82, 0x15, 0xc0, 0xc4, 0x86, 0x08, 0xc0, 0xc4, 0x98, 0x16,
- 0xc0, 0xc4, 0xaa, 0xc3, 0x01, 0xb4, 0x00, 0x01, 0x8b, 0x00, 0xc4, 0xbc,
- 0xc4, 0x15, 0xd3, 0x00, 0x01, 0x82, 0x00, 0xc4, 0xc0, 0x06, 0xc0, 0xc4,
- 0xc4, 0xd0, 0x5c, 0xf2, 0x08, 0xca, 0x31, 0xca, 0x95, 0x5a, 0x08, 0xca,
- 0x29, 0x45, 0x00, 0xcb, 0xc0, 0xc4, 0xd0, 0x47, 0x2e, 0xce, 0xc0, 0xc4,
- 0xe8, 0xca, 0x9c, 0xda, 0x08, 0xca, 0x09, 0xd3, 0x40, 0x32, 0x08, 0xc9,
- 0xf9, 0x18, 0xc0, 0xc4, 0xf4, 0x47, 0x02, 0x90, 0x40, 0xc5, 0x00, 0x45,
- 0x29, 0x00, 0xc0, 0xc5, 0x6d, 0xc3, 0x22, 0x38, 0x01, 0x11, 0x19, 0xc7,
- 0xc4, 0x89, 0x0f, 0xc9, 0xf8, 0x4b, 0x42, 0x4e, 0xc0, 0xc5, 0x77, 0xca,
- 0x9d, 0xc0, 0x01, 0x3b, 0xf9, 0x46, 0x06, 0x97, 0x40, 0xc5, 0x83, 0xca,
- 0x9d, 0xc0, 0x01, 0x3c, 0x49, 0x46, 0x06, 0x97, 0x40, 0xc5, 0xa1, 0xc8,
- 0xbc, 0x25, 0x01, 0x36, 0x69, 0x49, 0xb3, 0xa5, 0x40, 0xc5, 0xc5, 0xa3,
- 0x01, 0x34, 0x29, 0xa2, 0x01, 0x34, 0x21, 0xa1, 0x01, 0x34, 0x19, 0xa0,
- 0x01, 0x34, 0x11, 0x9f, 0x01, 0x34, 0x09, 0x9e, 0x01, 0x34, 0x00, 0xc9,
- 0xb5, 0x1f, 0x01, 0x18, 0x01, 0x44, 0x61, 0x49, 0x40, 0xc5, 0xd1, 0xc9,
- 0xb2, 0xe8, 0x0f, 0xd3, 0xc1, 0xc3, 0x02, 0x90, 0x0f, 0xa5, 0x38, 0xc5,
- 0x11, 0x01, 0x0f, 0xa1, 0x90, 0x48, 0xbb, 0x35, 0xc0, 0xc5, 0xe9, 0x42,
- 0x00, 0x46, 0x40, 0xc5, 0xfb, 0xc9, 0x03, 0x7e, 0x01, 0x18, 0x21, 0xd7,
- 0x26, 0xb6, 0x01, 0x17, 0x89, 0xc4, 0x36, 0xab, 0x01, 0x15, 0x23, 0x00,
- 0xc6, 0x42, 0xc9, 0xb4, 0xc5, 0x01, 0x4b, 0xf8, 0xd2, 0x47, 0x76, 0x0f,
- 0xa9, 0xe9, 0xcc, 0x47, 0x7c, 0x0f, 0xa9, 0xd9, 0x4e, 0x73, 0x1f, 0x40,
- 0xc6, 0x48, 0x42, 0x39, 0x3c, 0xc0, 0xc6, 0x54, 0xc5, 0x00, 0xdd, 0x0f,
- 0x81, 0x80, 0xc5, 0x00, 0xdd, 0x0f, 0x83, 0x11, 0x42, 0x39, 0x3c, 0x40,
- 0xc6, 0x7e, 0x00, 0xc0, 0xc6, 0xa8, 0x42, 0x00, 0x59, 0xc0, 0xc6, 0xfa,
- 0x02, 0x40, 0xc7, 0x0c, 0x05, 0xc0, 0xc7, 0x1e, 0xc5, 0x8a, 0xbf, 0x01,
- 0x4c, 0xc9, 0x15, 0xc0, 0xc7, 0x2a, 0xc9, 0xb0, 0x2a, 0x0f, 0xd7, 0x29,
- 0xd4, 0x3d, 0x55, 0x01, 0x70, 0x41, 0xc6, 0xcc, 0xe5, 0x01, 0x70, 0x99,
- 0xd4, 0x3b, 0x89, 0x01, 0x70, 0xb0, 0xc8, 0x17, 0x45, 0x01, 0x16, 0x29,
- 0xc5, 0x1b, 0x38, 0x01, 0x11, 0xc1, 0xc4, 0x26, 0x12, 0x01, 0x10, 0xa1,
- 0xc5, 0x00, 0x95, 0x00, 0x16, 0xc8, 0xd1, 0x50, 0x40, 0x08, 0xc1, 0xd9,
- 0x45, 0x00, 0xcb, 0xc0, 0xc7, 0x36, 0x4b, 0x6f, 0xcc, 0xc0, 0xc7, 0x48,
- 0x47, 0x02, 0x90, 0x40, 0xc7, 0x6b, 0xcf, 0x4a, 0xe8, 0x01, 0x17, 0x5b,
- 0x00, 0xc7, 0xd2, 0xc6, 0x0b, 0x0e, 0x01, 0x10, 0x60, 0xc9, 0x23, 0x34,
- 0x01, 0x17, 0x08, 0xc5, 0x30, 0x63, 0x01, 0x14, 0x03, 0x00, 0xc7, 0xd8,
- 0xc3, 0x00, 0x7a, 0x01, 0x15, 0x60, 0xdd, 0x10, 0x89, 0x01, 0x57, 0x70,
- 0xc7, 0x89, 0x0d, 0x0f, 0xad, 0xd9, 0xc4, 0x26, 0xb2, 0x0f, 0xad, 0xca,
- 0x00, 0xc7, 0xde, 0x0e, 0xc0, 0xc7, 0xe4, 0x45, 0x05, 0x2b, 0xc0, 0xc7,
- 0xf0, 0x49, 0xad, 0x09, 0xc0, 0xc8, 0x21, 0x44, 0xaf, 0xc8, 0xc0, 0xc8,
- 0x3f, 0xd7, 0x27, 0x12, 0x0d, 0xe3, 0x90, 0x99, 0x0d, 0xe1, 0xc3, 0x00,
- 0xc8, 0x4b, 0x96, 0x0d, 0xe0, 0x1b, 0x00, 0xc8, 0x6a, 0x95, 0x0d, 0xe0,
- 0xe3, 0x00, 0xc8, 0x72, 0x8c, 0x0d, 0xe0, 0xdb, 0x00, 0xc8, 0x82, 0x90,
- 0x0d, 0xe0, 0xd3, 0x00, 0xc8, 0x86, 0x8f, 0x0d, 0xe0, 0xcb, 0x00, 0xc8,
- 0x90, 0x94, 0x0d, 0xe0, 0x5b, 0x00, 0xc8, 0x94, 0x8e, 0x0d, 0xe0, 0x33,
- 0x00, 0xc8, 0xa4, 0x8a, 0x0d, 0xe0, 0x03, 0x00, 0xc8, 0xae, 0x8d, 0x0d,
- 0xe0, 0x2b, 0x00, 0xc8, 0xb2, 0x86, 0x0d, 0xe0, 0x43, 0x00, 0xc8, 0xba,
- 0x88, 0x0d, 0xe0, 0x23, 0x00, 0xc8, 0xc4, 0x92, 0x0d, 0xe0, 0x13, 0x00,
- 0xc8, 0xca, 0x89, 0x0d, 0xe0, 0x53, 0x00, 0xc8, 0xd6, 0x98, 0x0d, 0xe0,
- 0x4b, 0x00, 0xc8, 0xdc, 0x84, 0x0d, 0xe0, 0x39, 0x9a, 0x0d, 0xe0, 0x0b,
- 0x00, 0xc8, 0xe2, 0x91, 0x0d, 0xe2, 0x23, 0x00, 0xc8, 0xe6, 0x97, 0x0d,
- 0xe2, 0x8b, 0x00, 0xc8, 0xf8, 0x87, 0x0d, 0xe2, 0x3b, 0x00, 0xc9, 0x06,
- 0xc2, 0x05, 0xd0, 0x0d, 0xe2, 0x81, 0x8b, 0x0d, 0xe2, 0x33, 0x00, 0xc9,
- 0x0e, 0x83, 0x0d, 0xe2, 0x0a, 0x00, 0xc9, 0x12, 0xe0, 0x01, 0xe7, 0x01,
- 0x3c, 0xf9, 0xc8, 0x81, 0x29, 0x07, 0xf2, 0x49, 0xc8, 0x80, 0x80, 0x07,
- 0xf2, 0x68, 0xc6, 0x00, 0x71, 0x0f, 0xa5, 0x41, 0xd0, 0x5b, 0x82, 0x01,
- 0x72, 0x18, 0xc5, 0xa4, 0x4b, 0x0f, 0xaf, 0x09, 0x45, 0x00, 0x6c, 0x40,
- 0xc9, 0x18, 0x00, 0xc0, 0xc9, 0x24, 0x42, 0x00, 0x59, 0x40, 0xc9, 0x45,
- 0x51, 0x50, 0xc8, 0xc0, 0xc9, 0x8e, 0xc3, 0x4e, 0xbc, 0x0f, 0xb5, 0xd8,
- 0xcf, 0x25, 0x41, 0x01, 0x33, 0xe1, 0x4f, 0x62, 0x27, 0x40, 0xc9, 0x96,
- 0x9c, 0x0f, 0x8f, 0xf9, 0x9b, 0x0f, 0x8f, 0xf1, 0x9a, 0x0f, 0x8f, 0xe9,
- 0x99, 0x0f, 0x8f, 0xe1, 0x98, 0x0f, 0x8f, 0xd9, 0x97, 0x0f, 0x8f, 0xd1,
- 0x96, 0x0f, 0x8f, 0xc9, 0x95, 0x0f, 0x8f, 0xc1, 0x94, 0x0f, 0x8f, 0xb9,
- 0x93, 0x0f, 0x8f, 0xb1, 0x92, 0x0f, 0x8f, 0xa9, 0x91, 0x0f, 0x8f, 0xa1,
- 0x90, 0x0f, 0x8f, 0x99, 0x8f, 0x0f, 0x8f, 0x91, 0x8e, 0x0f, 0x8f, 0x89,
- 0x8d, 0x0f, 0x8f, 0x81, 0x8c, 0x0f, 0x8f, 0x79, 0x8b, 0x0f, 0x8f, 0x71,
- 0x8a, 0x0f, 0x8f, 0x69, 0x89, 0x0f, 0x8f, 0x61, 0x88, 0x0f, 0x8f, 0x59,
- 0x87, 0x0f, 0x8f, 0x51, 0x86, 0x0f, 0x8f, 0x49, 0x85, 0x0f, 0x8f, 0x41,
- 0x84, 0x0f, 0x8f, 0x39, 0x83, 0x0f, 0x8f, 0x30, 0xc5, 0x1e, 0x24, 0x05,
- 0x4a, 0x99, 0x4a, 0x6f, 0xcd, 0x40, 0xc9, 0xa2, 0x8a, 0x05, 0x4a, 0x91,
- 0x94, 0x05, 0x4a, 0x89, 0x90, 0x05, 0x4a, 0x82, 0x00, 0xc9, 0xb9, 0x83,
- 0x05, 0x4a, 0x31, 0x10, 0xc0, 0xc9, 0xbd, 0x0f, 0xc0, 0xc9, 0xcf, 0xc2,
- 0x00, 0xa4, 0x05, 0x4a, 0x09, 0xc2, 0x01, 0x09, 0x05, 0x4a, 0x01, 0xc2,
- 0x1d, 0x5f, 0x05, 0x49, 0xf9, 0xc2, 0x00, 0xc7, 0x05, 0x49, 0xf1, 0xc2,
- 0x02, 0x59, 0x05, 0x49, 0xe9, 0xc2, 0x0c, 0x65, 0x05, 0x49, 0xe1, 0xc2,
- 0x24, 0x58, 0x05, 0x49, 0xd1, 0xc2, 0x03, 0x40, 0x05, 0x49, 0xc9, 0xc2,
- 0x00, 0xde, 0x05, 0x49, 0xb9, 0xc2, 0x03, 0xa4, 0x05, 0x49, 0xb1, 0xc2,
- 0x0b, 0xc6, 0x05, 0x49, 0xa1, 0xc2, 0x00, 0xb3, 0x05, 0x49, 0x99, 0xc2,
- 0x01, 0x29, 0x05, 0x49, 0x89, 0xc2, 0x04, 0x2b, 0x05, 0x49, 0x80, 0x15,
- 0xc0, 0xc9, 0xd9, 0x03, 0xc0, 0xc9, 0xfc, 0x11, 0xc0, 0xca, 0x04, 0x42,
- 0x00, 0xa4, 0xc0, 0xca, 0x16, 0x4a, 0x03, 0x3b, 0xc0, 0xca, 0x22, 0x05,
- 0xc0, 0xca, 0x2e, 0xcb, 0x1a, 0x3f, 0x00, 0x01, 0x4b, 0x00, 0xca, 0x43,
- 0x08, 0xc0, 0xca, 0x47, 0xe0, 0x06, 0xc7, 0x01, 0x16, 0x51, 0x16, 0xc0,
- 0xca, 0x51, 0x42, 0x01, 0x4a, 0xc0, 0xca, 0x65, 0x19, 0xc0, 0xca, 0x71,
- 0x46, 0x05, 0xef, 0xc0, 0xca, 0x7d, 0xd7, 0x28, 0xf5, 0x01, 0x70, 0x69,
- 0xd6, 0x2e, 0x7d, 0x01, 0x70, 0xe8, 0x19, 0xc0, 0xca, 0x89, 0x16, 0xc0,
- 0xca, 0x98, 0x15, 0xc0, 0xca, 0xaa, 0x0a, 0xc0, 0xca, 0xb6, 0xd0, 0x58,
- 0x92, 0x0f, 0xc1, 0xf1, 0xc5, 0x01, 0x0f, 0x01, 0x0c, 0x93, 0x00, 0xca,
- 0xc0, 0xd1, 0x57, 0x4a, 0x01, 0x0f, 0xf1, 0x06, 0xc0, 0xca, 0xca, 0x12,
- 0xc0, 0xca, 0xd6, 0x14, 0xc0, 0xca, 0xe2, 0xcf, 0x64, 0x9d, 0x01, 0x5a,
- 0x31, 0x04, 0xc0, 0xca, 0xee, 0x08, 0xc0, 0xcb, 0x00, 0xd7, 0x28, 0xb0,
- 0x0f, 0xc5, 0x38, 0x49, 0x01, 0x8a, 0xc0, 0xcb, 0x0c, 0x15, 0xc0, 0xcb,
- 0x24, 0xdb, 0x17, 0xd4, 0x01, 0x37, 0x29, 0x48, 0xbd, 0x6d, 0xc0, 0xcb,
- 0x30, 0x47, 0x54, 0x6f, 0x40, 0xcb, 0x48, 0xc8, 0x07, 0x5f, 0x01, 0x12,
- 0xb9, 0xcb, 0x91, 0xd4, 0x01, 0x12, 0xb1, 0xc8, 0x17, 0x45, 0x01, 0x10,
- 0xc1, 0xc5, 0x00, 0x95, 0x00, 0x16, 0xd1, 0xc4, 0xe4, 0x6b, 0x0f, 0xb6,
- 0xf9, 0xc5, 0x01, 0x8a, 0x01, 0x71, 0x80, 0x45, 0x05, 0x63, 0xc0, 0xcb,
- 0x5d, 0x43, 0x10, 0x47, 0xc0, 0xcb, 0x69, 0x45, 0x02, 0x4d, 0xc0, 0xcb,
- 0x75, 0x46, 0x02, 0x12, 0x40, 0xcb, 0x81, 0xce, 0x6f, 0xad, 0x0f, 0xae,
- 0xf1, 0x42, 0x00, 0x29, 0x40, 0xcb, 0x8d, 0xc6, 0xd2, 0x85, 0x0f, 0xbc,
- 0x59, 0xc7, 0xca, 0xc5, 0x0f, 0xa6, 0x68, 0xc3, 0xe6, 0x5b, 0x0f, 0x93,
- 0x29, 0x42, 0x00, 0xe5, 0xc0, 0xcb, 0x99, 0xc2, 0x07, 0x49, 0x0f, 0x93,
- 0x19, 0xc2, 0x12, 0x0a, 0x0f, 0x93, 0x09, 0xc2, 0x11, 0xdc, 0x0f, 0x93,
- 0x00, 0xc3, 0x01, 0xb4, 0x01, 0x0b, 0x03, 0x00, 0xcb, 0xa5, 0x08, 0xc0,
- 0xcb, 0xa9, 0x15, 0xc0, 0xcb, 0xb3, 0xd4, 0x3f, 0x0d, 0x01, 0x0c, 0x19,
- 0x16, 0xc0, 0xcb, 0xc2, 0x07, 0xc0, 0xcb, 0xd5, 0xc4, 0x22, 0x71, 0x01,
- 0x0b, 0x40, 0x07, 0xc0, 0xcb, 0xe1, 0xcb, 0x8f, 0xb9, 0x08, 0x0c, 0xa8,
- 0xd3, 0x40, 0x1f, 0x08, 0x0c, 0xa1, 0xcc, 0x84, 0xe8, 0x08, 0x0c, 0xb1,
- 0xcd, 0x78, 0x68, 0x08, 0x0c, 0xc8, 0xc3, 0x64, 0x0d, 0x0f, 0xb4, 0x19,
- 0xc5, 0xd9, 0x3f, 0x0f, 0xb7, 0x20, 0xc4, 0x03, 0x53, 0x01, 0x38, 0x5b,
- 0x00, 0xcb, 0xf3, 0xc4, 0xc1, 0x0f, 0x01, 0x38, 0x51, 0x0f, 0xc0, 0xcb,
- 0xf9, 0xcc, 0x88, 0x54, 0x0f, 0xc8, 0xd1, 0xd4, 0x21, 0xdc, 0x01, 0x70,
- 0x31, 0xc3, 0x01, 0x4a, 0x01, 0x71, 0x9b, 0x00, 0xcc, 0x0b, 0xc6, 0x07,
- 0x09, 0x01, 0x70, 0x59, 0xc5, 0x00, 0xaa, 0x01, 0x71, 0xa0, 0xc3, 0x7f,
- 0x84, 0x0f, 0x98, 0x40, 0xcb, 0x97, 0x5f, 0x01, 0x31, 0x11, 0xc7, 0xc2,
- 0xc9, 0x0f, 0xa8, 0xc0, 0xc3, 0x64, 0x0d, 0x0f, 0x9e, 0x71, 0xca, 0xa3,
- 0x92, 0x0f, 0x9e, 0x68, 0xca, 0x9d, 0x16, 0x08, 0x73, 0xf1, 0x44, 0x01,
- 0xb4, 0x40, 0xcc, 0x11, 0x44, 0x22, 0x71, 0xc0, 0xcc, 0x23, 0x45, 0x01,
- 0xdb, 0xc0, 0xcc, 0x2f, 0x15, 0xc0, 0xcc, 0x39, 0x08, 0xc0, 0xcc, 0x45,
- 0x16, 0xc0, 0xcc, 0x4d, 0xcb, 0x0c, 0x47, 0x08, 0x73, 0x90, 0xc4, 0x22,
- 0x71, 0x08, 0x73, 0x41, 0xc5, 0x01, 0xdb, 0x08, 0x73, 0x39, 0x15, 0xc0,
- 0xcc, 0x5b, 0x08, 0xc0, 0xcc, 0x67, 0x16, 0xc0, 0xcc, 0x73, 0xc3, 0x01,
- 0xb4, 0x08, 0x73, 0x00, 0x47, 0x02, 0x90, 0xc0, 0xcc, 0x7f, 0xcf, 0x62,
- 0x9f, 0x00, 0xb7, 0x81, 0xcf, 0x64, 0x52, 0x00, 0xb7, 0x79, 0xcd, 0x7f,
- 0x50, 0x00, 0xb7, 0x71, 0xd1, 0x51, 0x94, 0x00, 0xb7, 0x69, 0xd4, 0x39,
- 0x95, 0x00, 0xb7, 0x61, 0xd2, 0x48, 0xde, 0x00, 0xb7, 0x58, 0xc2, 0x00,
- 0x28, 0x0f, 0x9e, 0x19, 0xd3, 0x44, 0x80, 0x0f, 0x9d, 0xe8, 0xa2, 0x07,
- 0xf0, 0x73, 0x00, 0xcd, 0x0f, 0x9e, 0x07, 0xf0, 0x53, 0x00, 0xcd, 0x37,
- 0x9d, 0x07, 0xf0, 0x4b, 0x00, 0xcd, 0x5f, 0xa6, 0x70, 0x08, 0x13, 0x00,
- 0xcd, 0x87, 0xa5, 0x70, 0x08, 0x0b, 0x00, 0xcd, 0xaf, 0xa4, 0x70, 0x08,
- 0x03, 0x00, 0xcd, 0xd7, 0xa3, 0x07, 0xf0, 0x7b, 0x00, 0xcd, 0xff, 0xa1,
- 0x07, 0xf0, 0x6b, 0x00, 0xce, 0x27, 0xa0, 0x07, 0xf0, 0x63, 0x00, 0xce,
- 0x4f, 0x9f, 0x07, 0xf0, 0x5a, 0x00, 0xce, 0x77, 0xa2, 0x70, 0x08, 0x43,
- 0x00, 0xce, 0x9f, 0xa1, 0x70, 0x08, 0x3b, 0x00, 0xce, 0xbb, 0xa0, 0x70,
- 0x08, 0x33, 0x00, 0xce, 0xe3, 0x9f, 0x70, 0x08, 0x2b, 0x00, 0xcf, 0x0b,
- 0x9e, 0x70, 0x08, 0x23, 0x00, 0xcf, 0x33, 0x9d, 0x70, 0x08, 0x1b, 0x00,
- 0xcf, 0x5b, 0xa6, 0x70, 0x08, 0x61, 0xa5, 0x70, 0x08, 0x59, 0xa4, 0x70,
- 0x08, 0x51, 0xa3, 0x70, 0x08, 0x48, 0xa6, 0x70, 0x0a, 0x91, 0xa5, 0x70,
- 0x0a, 0x89, 0xa4, 0x70, 0x0a, 0x81, 0xa3, 0x70, 0x0a, 0x79, 0xa2, 0x70,
- 0x0a, 0x71, 0xa1, 0x70, 0x0a, 0x69, 0xa0, 0x70, 0x0a, 0x61, 0x9f, 0x70,
- 0x0a, 0x59, 0x9e, 0x70, 0x0a, 0x51, 0x9d, 0x70, 0x0a, 0x48, 0xa6, 0x70,
- 0x0a, 0x41, 0xa5, 0x70, 0x0a, 0x39, 0xa4, 0x70, 0x0a, 0x31, 0xa3, 0x70,
- 0x0a, 0x29, 0xa2, 0x70, 0x0a, 0x21, 0xa1, 0x70, 0x0a, 0x19, 0xa0, 0x70,
- 0x0a, 0x11, 0x9f, 0x70, 0x0a, 0x09, 0x9e, 0x70, 0x0a, 0x01, 0x9d, 0x70,
- 0x09, 0xf8, 0xa6, 0x70, 0x09, 0xf1, 0xa5, 0x70, 0x09, 0xe9, 0xa4, 0x70,
- 0x09, 0xe1, 0xa3, 0x70, 0x09, 0xd9, 0xa2, 0x70, 0x09, 0xd1, 0xa1, 0x70,
- 0x09, 0xc9, 0xa0, 0x70, 0x09, 0xc1, 0x9f, 0x70, 0x09, 0xb9, 0x9e, 0x70,
- 0x09, 0xb1, 0x9d, 0x70, 0x09, 0xa8, 0xa6, 0x70, 0x09, 0xa1, 0xa5, 0x70,
- 0x09, 0x99, 0xa4, 0x70, 0x09, 0x91, 0xa3, 0x70, 0x09, 0x89, 0xa2, 0x70,
- 0x09, 0x81, 0xa1, 0x70, 0x09, 0x79, 0xa0, 0x70, 0x09, 0x71, 0x9f, 0x70,
- 0x09, 0x69, 0x9e, 0x70, 0x09, 0x61, 0x9d, 0x70, 0x09, 0x58, 0xa6, 0x70,
- 0x09, 0x51, 0xa5, 0x70, 0x09, 0x49, 0xa4, 0x70, 0x09, 0x41, 0xa3, 0x70,
- 0x09, 0x39, 0xa2, 0x70, 0x09, 0x31, 0xa1, 0x70, 0x09, 0x29, 0xa0, 0x70,
- 0x09, 0x21, 0x9f, 0x70, 0x09, 0x19, 0x9e, 0x70, 0x09, 0x11, 0x9d, 0x70,
- 0x09, 0x08, 0xa6, 0x70, 0x09, 0x01, 0xa5, 0x70, 0x08, 0xf9, 0xa4, 0x70,
- 0x08, 0xf1, 0xa3, 0x70, 0x08, 0xe9, 0xa2, 0x70, 0x08, 0xe1, 0xa1, 0x70,
- 0x08, 0xd9, 0xa0, 0x70, 0x08, 0xd1, 0x9f, 0x70, 0x08, 0xc9, 0x9e, 0x70,
- 0x08, 0xc1, 0x9d, 0x70, 0x08, 0xb8, 0xa6, 0x70, 0x08, 0xb1, 0xa5, 0x70,
- 0x08, 0xa9, 0xa4, 0x70, 0x08, 0xa1, 0xa3, 0x70, 0x08, 0x99, 0xa2, 0x70,
- 0x08, 0x91, 0xa1, 0x70, 0x08, 0x89, 0xa0, 0x70, 0x08, 0x81, 0x9f, 0x70,
- 0x08, 0x79, 0x9e, 0x70, 0x08, 0x71, 0x9d, 0x70, 0x08, 0x68, 0x47, 0x14,
- 0xc8, 0xc0, 0xcf, 0x83, 0x45, 0x10, 0x60, 0x40, 0xcf, 0xf2, 0xc4, 0x15,
- 0xd3, 0x05, 0x31, 0x01, 0xc3, 0x01, 0xb4, 0x05, 0x31, 0x09, 0x16, 0xc0,
- 0xd0, 0x14, 0x08, 0xc0, 0xd0, 0x20, 0x15, 0xc0, 0xd0, 0x2c, 0xc5, 0x01,
- 0xdb, 0x05, 0x31, 0x41, 0xc4, 0x22, 0x71, 0x05, 0x31, 0x48, 0x51, 0x55,
- 0xa1, 0xc0, 0xd0, 0x38, 0x44, 0x00, 0xe8, 0xc0, 0xd0, 0x50, 0xd5, 0x31,
- 0xed, 0x01, 0x35, 0x41, 0xc4, 0x00, 0x8a, 0x00, 0x03, 0xe3, 0x00, 0xd0,
- 0x68, 0xc8, 0x24, 0x10, 0x01, 0x17, 0x71, 0xc9, 0x38, 0x82, 0x01, 0x02,
- 0xf1, 0x16, 0xc0, 0xd0, 0x6c, 0xcb, 0x99, 0x38, 0x01, 0x4c, 0xd1, 0xc8,
- 0xbe, 0x7d, 0x01, 0x71, 0xe9, 0x4c, 0x81, 0xf4, 0xc0, 0xd0, 0x7e, 0xda,
- 0x1b, 0x57, 0x01, 0x81, 0xd8, 0x46, 0x10, 0x37, 0xc0, 0xd0, 0x90, 0xd0,
- 0x5a, 0xe2, 0x0f, 0xbd, 0x29, 0x45, 0xdd, 0x22, 0x40, 0xd0, 0xb2, 0xdc,
- 0x13, 0x3a, 0x00, 0xe7, 0xd1, 0x03, 0xc0, 0xd0, 0xbe, 0xcb, 0x98, 0x9e,
- 0x00, 0xe7, 0xb1, 0xcb, 0x91, 0x66, 0x00, 0xe7, 0xa9, 0x14, 0xc0, 0xd0,
- 0xd0, 0xcd, 0x2e, 0x18, 0x00, 0xe7, 0x79, 0xd6, 0x2e, 0x0f, 0x00, 0xe7,
- 0x71, 0xc6, 0xd2, 0xb5, 0x00, 0xe7, 0x69, 0x48, 0x5e, 0xfa, 0xc0, 0xd0,
- 0xe2, 0xda, 0x1d, 0x5f, 0x00, 0xe6, 0xa1, 0xc9, 0xac, 0xe5, 0x00, 0xe6,
- 0x98, 0x42, 0x01, 0x4a, 0xc0, 0xd0, 0xfa, 0x42, 0x00, 0x5b, 0xc0, 0xd1,
- 0x06, 0x47, 0xc9, 0x91, 0xc0, 0xd1, 0x12, 0xe0, 0x07, 0x87, 0x00, 0xe7,
- 0x09, 0x16, 0xc0, 0xd1, 0x1e, 0x42, 0x04, 0x2b, 0xc0, 0xd1, 0x30, 0x4b,
- 0x1d, 0x5f, 0xc0, 0xd1, 0x3c, 0xc7, 0xc8, 0x72, 0x00, 0xe6, 0x91, 0xc5,
- 0xdc, 0x96, 0x00, 0xe6, 0x88, 0xc4, 0xe5, 0x8b, 0x0b, 0x7f, 0x89, 0xc2,
- 0x03, 0x40, 0x0b, 0x7f, 0x80, 0xc6, 0xa7, 0xa6, 0x0f, 0xa7, 0xc9, 0xc4,
- 0xe2, 0x03, 0x0f, 0x9d, 0x70, 0x83, 0x08, 0x2b, 0x81, 0x04, 0xc0, 0xd1,
- 0x51, 0x05, 0xc0, 0xd1, 0x5b, 0x06, 0xc0, 0xd1, 0x65, 0x87, 0x08, 0x2b,
- 0xc3, 0x00, 0xd1, 0x6f, 0xc2, 0x17, 0x9f, 0x08, 0x2b, 0xc9, 0xc2, 0x01,
- 0x29, 0x08, 0x2b, 0xd1, 0x0a, 0xc0, 0xd1, 0x73, 0x8b, 0x08, 0x2b, 0xf3,
- 0x00, 0xd1, 0x7d, 0xc2, 0x1b, 0xa5, 0x08, 0x2c, 0x01, 0x0e, 0xc0, 0xd1,
- 0x83, 0xc2, 0x00, 0xc3, 0x08, 0x2c, 0x21, 0x10, 0xc0, 0xd1, 0x8d, 0x91,
- 0x08, 0x2c, 0x39, 0xc2, 0x00, 0x27, 0x08, 0x2c, 0x41, 0xc2, 0x0f, 0x60,
- 0x08, 0x2c, 0x49, 0x15, 0xc0, 0xd1, 0x97, 0x16, 0xc0, 0xd1, 0xa1, 0x97,
- 0x08, 0x2c, 0x81, 0x9b, 0x08, 0x2c, 0xa1, 0xc2, 0x08, 0xc2, 0x08, 0x2c,
- 0xa9, 0xc2, 0x04, 0x2b, 0x08, 0x2c, 0x09, 0xc2, 0x00, 0x9c, 0x08, 0x2c,
- 0x51, 0xc2, 0x00, 0x7b, 0x08, 0x2c, 0x89, 0xc2, 0x25, 0x1f, 0x08, 0x2c,
- 0x90, 0x83, 0x08, 0x2c, 0xb9, 0x04, 0xc0, 0xd1, 0xab, 0x05, 0xc0, 0xd1,
- 0xb5, 0x06, 0xc0, 0xd1, 0xbf, 0x87, 0x08, 0x2c, 0xfb, 0x00, 0xd1, 0xc9,
- 0xc2, 0x17, 0x9f, 0x08, 0x2d, 0x01, 0xc2, 0x01, 0x29, 0x08, 0x2d, 0x09,
- 0x0a, 0xc0, 0xd1, 0xcd, 0x8b, 0x08, 0x2d, 0x2b, 0x00, 0xd1, 0xd7, 0xc2,
- 0x1b, 0xa5, 0x08, 0x2d, 0x39, 0xc2, 0x04, 0x2b, 0x08, 0x2d, 0x41, 0x0e,
- 0xc0, 0xd1, 0xdd, 0xc2, 0x00, 0xc3, 0x08, 0x2d, 0x59, 0x10, 0xc0, 0xd1,
- 0xe7, 0x91, 0x08, 0x2d, 0x71, 0xc2, 0x00, 0x27, 0x08, 0x2d, 0x79, 0xc2,
- 0x0f, 0x60, 0x08, 0x2d, 0x81, 0xc2, 0x00, 0x9c, 0x08, 0x2d, 0x89, 0x15,
- 0xc0, 0xd1, 0xf1, 0x16, 0xc0, 0xd1, 0xfb, 0x97, 0x08, 0x2d, 0xb9, 0xc2,
- 0x00, 0x7b, 0x08, 0x2d, 0xc1, 0xc2, 0x25, 0x1f, 0x08, 0x2d, 0xc9, 0x9b,
- 0x08, 0x2d, 0xd9, 0xc2, 0x08, 0xc2, 0x08, 0x2d, 0xe0, 0x44, 0x0c, 0x5b,
- 0xc0, 0xd2, 0x05, 0xca, 0x9c, 0x26, 0x01, 0x0a, 0xc0, 0x45, 0x04, 0x5e,
- 0xc0, 0xd2, 0x11, 0x43, 0x01, 0x47, 0x40, 0xd2, 0x23, 0xc6, 0x01, 0xdb,
- 0x01, 0x0a, 0xd9, 0x15, 0xc0, 0xd2, 0x2f, 0xc5, 0x9c, 0x2a, 0x01, 0x0a,
- 0xa9, 0x16, 0xc0, 0xd2, 0x3b, 0xc5, 0xda, 0xcf, 0x01, 0x0a, 0x89, 0xc7,
- 0x0a, 0xb9, 0x00, 0x05, 0xe1, 0xc4, 0x03, 0x2b, 0x00, 0x05, 0xe8, 0x42,
- 0x00, 0x9b, 0xc0, 0xd2, 0x47, 0x0e, 0xc0, 0xd2, 0x53, 0x05, 0xc0, 0xd2,
- 0x63, 0x14, 0xc0, 0xd2, 0x6d, 0x42, 0x00, 0x90, 0xc0, 0xd2, 0x79, 0x07,
- 0xc0, 0xd2, 0x85, 0x15, 0xc0, 0xd2, 0x91, 0x06, 0xc0, 0xd2, 0xa3, 0xc9,
- 0x11, 0xdc, 0x70, 0x01, 0x71, 0xcc, 0x89, 0x2c, 0x70, 0x01, 0x69, 0x12,
- 0xc0, 0xd2, 0xaf, 0x03, 0xc0, 0xd2, 0xbb, 0xc5, 0x21, 0x12, 0x70, 0x03,
- 0xf1, 0xcd, 0x36, 0xd9, 0x70, 0x03, 0xe1, 0xcb, 0x90, 0xed, 0x70, 0x01,
- 0x18, 0x4b, 0x6f, 0xcc, 0xc0, 0xd2, 0xcd, 0x47, 0x02, 0x90, 0x40, 0xd2,
- 0xd5, 0x47, 0x02, 0x90, 0xc0, 0xd3, 0x27, 0x45, 0x00, 0xcb, 0xc0, 0xd3,
- 0x88, 0x4b, 0x6f, 0xcc, 0x40, 0xd3, 0x94, 0x43, 0x03, 0xcb, 0xc0, 0xd3,
- 0x9c, 0x43, 0x7b, 0xed, 0xc0, 0xd3, 0xa8, 0xc5, 0xd8, 0x2c, 0x0f, 0x9a,
- 0x50, 0xd7, 0x26, 0x9f, 0x08, 0xff, 0xf9, 0x15, 0xc0, 0xd3, 0xb4, 0xd2,
- 0x4e, 0x36, 0x08, 0xff, 0x71, 0x16, 0xc0, 0xd3, 0xcc, 0x03, 0xc0, 0xd3,
- 0xd8, 0x05, 0xc0, 0xd3, 0xea, 0x0e, 0xc0, 0xd3, 0xf6, 0x06, 0xc0, 0xd4,
- 0x02, 0xd4, 0x39, 0xbd, 0x08, 0xff, 0x21, 0x49, 0x52, 0xd7, 0xc0, 0xd4,
- 0x1a, 0x4b, 0x6f, 0xcc, 0xc0, 0xd4, 0x2c, 0xc2, 0x00, 0x3a, 0x00, 0x5e,
- 0x81, 0x47, 0x33, 0xef, 0xc0, 0xd4, 0x4c, 0xca, 0xa4, 0xd2, 0x00, 0x5f,
- 0xa1, 0xc9, 0xae, 0x83, 0x00, 0x5f, 0xa9, 0xca, 0x80, 0x23, 0x00, 0x5f,
- 0xc8, 0x46, 0x06, 0x97, 0xc0, 0xd4, 0x5e, 0xd1, 0x50, 0x40, 0x08, 0xb5,
- 0xc9, 0x47, 0x02, 0x90, 0xc0, 0xd4, 0x82, 0x45, 0x00, 0xcb, 0xc0, 0xd4,
- 0xe9, 0x4b, 0x6f, 0xcc, 0x40, 0xd4, 0xfb, 0x45, 0x00, 0xcb, 0xc0, 0xd5,
- 0x15, 0x4b, 0x8f, 0x40, 0xc0, 0xd5, 0x48, 0x4b, 0x90, 0x53, 0xc0, 0xd5,
- 0x6c, 0x42, 0x00, 0x79, 0xc0, 0xd5, 0x90, 0x4b, 0x6f, 0xcc, 0xc0, 0xd5,
- 0x9c, 0x47, 0x02, 0x90, 0x40, 0xd5, 0xc6, 0x16, 0xc0, 0xd6, 0x14, 0x83,
- 0x00, 0xcb, 0x1b, 0x00, 0xd6, 0x28, 0x87, 0x00, 0xcb, 0x5b, 0x00, 0xd6,
- 0x32, 0x97, 0x00, 0xcb, 0x3b, 0x00, 0xd6, 0x3a, 0x91, 0x00, 0xcb, 0x4b,
- 0x00, 0xd6, 0x3e, 0x8b, 0x00, 0xcb, 0x21, 0x10, 0xc0, 0xd6, 0x42, 0x0d,
- 0xc0, 0xd6, 0x4c, 0xc2, 0x0f, 0x60, 0x00, 0xca, 0xf9, 0xc2, 0x00, 0xa4,
- 0x00, 0xca, 0xf1, 0xc2, 0x04, 0x41, 0x00, 0xca, 0xe9, 0xc2, 0x00, 0x67,
- 0x00, 0xca, 0xe1, 0xc2, 0x00, 0xad, 0x00, 0xca, 0xd9, 0x12, 0xc0, 0xd6,
- 0x56, 0xc2, 0x00, 0xc7, 0x00, 0xca, 0xc1, 0xc2, 0x1d, 0x5f, 0x00, 0xca,
- 0xa9, 0xc2, 0x0c, 0x65, 0x00, 0xca, 0xa1, 0xc2, 0x96, 0xd0, 0x00, 0xca,
- 0x88, 0x47, 0x10, 0x5e, 0xc0, 0xd6, 0x60, 0x49, 0xb5, 0x82, 0xc0, 0xd6,
- 0x78, 0x46, 0x34, 0xbb, 0xc0, 0xd6, 0x90, 0x45, 0xdb, 0x79, 0xc0, 0xd6,
- 0xaa, 0x47, 0x02, 0x90, 0x40, 0xd6, 0xb6, 0xc2, 0x19, 0x4b, 0x0f, 0xcc,
- 0x19, 0xcd, 0x78, 0xf7, 0x01, 0x05, 0xd0, 0x46, 0x05, 0xef, 0xc0, 0xd6,
- 0xc2, 0xd1, 0x57, 0xb0, 0x01, 0x36, 0x49, 0x42, 0x00, 0x10, 0xc0, 0xd6,
- 0xce, 0x06, 0xc0, 0xd6, 0xda, 0x15, 0xc0, 0xd6, 0xe6, 0x03, 0xc0, 0xd6,
- 0xfe, 0x05, 0xc0, 0xd7, 0x0a, 0xd7, 0x26, 0xe4, 0x01, 0x09, 0x49, 0xcc,
- 0x85, 0xfc, 0x0f, 0xac, 0x78, 0xd2, 0x21, 0x36, 0x0f, 0xbe, 0x11, 0x06,
- 0xc0, 0xd7, 0x16, 0x0e, 0xc0, 0xd7, 0x22, 0x14, 0xc0, 0xd7, 0x2e, 0xce,
- 0x70, 0xfd, 0x0f, 0xaf, 0x59, 0xcc, 0x87, 0x28, 0x0f, 0xad, 0x89, 0xd3,
- 0x3f, 0xd3, 0x0f, 0xad, 0x39, 0xd8, 0x25, 0x80, 0x01, 0x53, 0xb0, 0x42,
- 0x00, 0x59, 0xc0, 0xd7, 0x3a, 0xcc, 0x7d, 0x49, 0x01, 0x00, 0x21, 0xc7,
- 0xbd, 0x2e, 0x01, 0x71, 0xd8, 0x00, 0xc0, 0xd7, 0x52, 0xc9, 0xa0, 0x91,
- 0x0f, 0xc8, 0xa0, 0xcf, 0x66, 0x05, 0x01, 0x36, 0x41, 0xc5, 0xda, 0xe3,
- 0x01, 0x30, 0x40, 0xc9, 0xb2, 0x3d, 0x0f, 0xa2, 0x71, 0xc7, 0xc9, 0x8a,
- 0x0f, 0xa2, 0x68, 0xc4, 0x5d, 0x63, 0x01, 0x11, 0xa1, 0x00, 0x40, 0xd7,
- 0x5c, 0xc5, 0x9c, 0x71, 0x0f, 0x99, 0x09, 0xc7, 0xc2, 0xad, 0x01, 0x4f,
- 0x38, 0x11, 0xc0, 0xd7, 0x68, 0xc7, 0xc3, 0x86, 0x00, 0x3d, 0x51, 0x07,
- 0xc0, 0xd7, 0x7a, 0xc7, 0xc6, 0xea, 0x00, 0x3d, 0x41, 0x03, 0xc0, 0xd7,
- 0x8c, 0x47, 0x02, 0x90, 0xc0, 0xd7, 0x98, 0xc5, 0xda, 0x93, 0x00, 0x3d,
- 0x80, 0x05, 0xc0, 0xd8, 0x02, 0x46, 0x06, 0x97, 0x40, 0xd8, 0x0e, 0x43,
- 0x03, 0x2d, 0xc0, 0xd8, 0x32, 0x96, 0x0f, 0x9d, 0x48, 0x05, 0xc0, 0xd8,
- 0x50, 0xcc, 0x8d, 0x7c, 0x01, 0x71, 0x18, 0x05, 0xc0, 0xd8, 0x5c, 0xcc,
- 0x8d, 0x7c, 0x01, 0x71, 0x10, 0xd3, 0x09, 0x74, 0x01, 0x49, 0xd3, 0x00,
- 0xd8, 0x68, 0xda, 0x1b, 0x23, 0x01, 0x49, 0xe0, 0xd0, 0x5f, 0x42, 0x0f,
- 0x15, 0x71, 0x47, 0x02, 0x90, 0x40, 0xd8, 0x6e, 0x42, 0xe8, 0x0c, 0xc0,
- 0xd8, 0xe7, 0x23, 0xc0, 0xd8, 0xf3, 0x22, 0xc0, 0xd9, 0x05, 0x24, 0x40,
- 0xd9, 0x11, 0xc5, 0xae, 0xfc, 0x0f, 0xd5, 0x28, 0xc4, 0x64, 0x0c, 0x0f,
- 0xb4, 0x58, 0xc5, 0xda, 0x3e, 0x0f, 0xad, 0x91, 0xc3, 0x06, 0xd1, 0x0f,
- 0xb4, 0xe0, 0xd3, 0x46, 0x5b, 0x01, 0x56, 0xd9, 0xc5, 0xd8, 0x40, 0x01,
- 0x5e, 0xb8, 0x42, 0x00, 0x54, 0xc0, 0xd9, 0x1d, 0x45, 0x09, 0x6f, 0x40,
- 0xd9, 0x29, 0xc5, 0x65, 0xf1, 0x01, 0x31, 0xb9, 0xc8, 0x2e, 0xb7, 0x01,
- 0x31, 0xb1, 0x19, 0xc0, 0xd9, 0x3b, 0xc7, 0x6e, 0xd4, 0x01, 0x31, 0x99,
- 0xc4, 0x82, 0xf8, 0x01, 0x31, 0x91, 0xc4, 0x28, 0xf1, 0x01, 0x31, 0x89,
- 0xc6, 0x6d, 0x5b, 0x01, 0x31, 0x80, 0x4d, 0x17, 0x38, 0xc0, 0xd9, 0x47,
- 0xc5, 0x21, 0x12, 0x01, 0x12, 0x59, 0xc8, 0x1e, 0xe0, 0x01, 0x11, 0x69,
- 0x12, 0xc0, 0xd9, 0x5f, 0x54, 0x3c, 0x15, 0xc0, 0xd9, 0x6b, 0xce, 0x72,
- 0xcb, 0x01, 0x57, 0xb1, 0x47, 0xc3, 0x1d, 0xc0, 0xd9, 0x77, 0xd7, 0x27,
- 0xf8, 0x01, 0x57, 0xd9, 0xc6, 0xd5, 0x1f, 0x01, 0x72, 0x58, 0xd0, 0x5e,
- 0xa2, 0x01, 0x5e, 0xf8, 0xc2, 0x3b, 0x07, 0x0f, 0x9e, 0x31, 0x45, 0x03,
- 0x3f, 0x40, 0xd9, 0x83, 0xc5, 0xd6, 0xf1, 0x0f, 0xb4, 0x70, 0x11, 0xc0,
- 0xd9, 0x8f, 0xc6, 0xd4, 0xe3, 0x0e, 0x9a, 0x81, 0xc5, 0x0a, 0x2b, 0x0e,
- 0x99, 0xb1, 0x43, 0x11, 0xdd, 0x40, 0xd9, 0x9b, 0x03, 0xc0, 0xd9, 0xa7,
- 0xc5, 0xd5, 0x48, 0x0e, 0x99, 0x28, 0x0b, 0xc0, 0xd9, 0xb3, 0xc8, 0x37,
- 0x6c, 0x0e, 0x9a, 0x41, 0x07, 0xc0, 0xd9, 0xc3, 0xc4, 0xe5, 0xd7, 0x0e,
- 0x9a, 0x19, 0xc5, 0xde, 0x21, 0x0e, 0x99, 0x00, 0xcb, 0x8f, 0x8d, 0x0e,
- 0x9a, 0x99, 0xc9, 0xb4, 0xbc, 0x0e, 0x98, 0x68, 0x11, 0xc0, 0xd9, 0xd5,
- 0x43, 0x07, 0x42, 0xc0, 0xd9, 0xdf, 0xc5, 0xba, 0x48, 0x0e, 0x99, 0x09,
- 0xc5, 0x02, 0x22, 0x0e, 0x98, 0x30, 0xca, 0xa0, 0x36, 0x0e, 0x9a, 0x89,
- 0xcb, 0x9a, 0x14, 0x0e, 0x9a, 0x09, 0xc6, 0xd4, 0x35, 0x0e, 0x98, 0xc9,
- 0xc5, 0x3d, 0x08, 0x0e, 0x98, 0x60, 0xc7, 0xcb, 0x9e, 0x0e, 0x9a, 0x69,
- 0xcb, 0x4e, 0x7f, 0x0e, 0x98, 0xb0, 0x16, 0xc0, 0xd9, 0xe9, 0xc8, 0xb9,
- 0xed, 0x0e, 0x9a, 0x59, 0xc6, 0x82, 0xe5, 0x0e, 0x9a, 0x28, 0xc9, 0xad,
- 0x24, 0x0e, 0x9a, 0x51, 0xcc, 0x8c, 0xf8, 0x0e, 0x9a, 0x11, 0xc7, 0x2c,
- 0xe5, 0x0e, 0x99, 0xd1, 0x10, 0xc0, 0xd9, 0xf3, 0xc3, 0x2e, 0x88, 0x0e,
- 0x98, 0xe0, 0xc3, 0x13, 0x6e, 0x0e, 0x9a, 0x31, 0xc6, 0xd1, 0x41, 0x0e,
- 0x98, 0x90, 0xc3, 0x1d, 0x23, 0x0e, 0x9a, 0x21, 0xc5, 0x76, 0xea, 0x0e,
- 0x98, 0xb8, 0xc6, 0xd2, 0x73, 0x0e, 0x9a, 0x01, 0xc6, 0x13, 0xce, 0x0e,
- 0x99, 0xc9, 0xc4, 0x79, 0x2d, 0x0e, 0x98, 0x40, 0xc8, 0x50, 0x51, 0x0e,
- 0x99, 0x43, 0x00, 0xda, 0x05, 0xca, 0xa8, 0x88, 0x0e, 0x99, 0xf1, 0xc8,
- 0xc0, 0x3d, 0x0e, 0x99, 0x91, 0xcc, 0x85, 0xd8, 0x0e, 0x99, 0x78, 0xc5,
- 0xde, 0xda, 0x0e, 0x99, 0xa9, 0x07, 0x40, 0xda, 0x0b, 0x03, 0xc0, 0xda,
- 0x1b, 0xc5, 0xd6, 0xe2, 0x0e, 0x99, 0x51, 0xca, 0xa3, 0x88, 0x0e, 0x98,
- 0x98, 0xc6, 0xd2, 0x3d, 0x0e, 0x99, 0x39, 0xcc, 0x88, 0x30, 0x0e, 0x98,
- 0x50, 0xce, 0x75, 0x79, 0x0e, 0x99, 0x19, 0xcc, 0x85, 0x24, 0x0e, 0x98,
- 0x71, 0xc6, 0x67, 0x24, 0x0e, 0x98, 0x48, 0x45, 0x0b, 0x69, 0xc0, 0xda,
- 0x27, 0xcd, 0x7c, 0x92, 0x0f, 0xa6, 0x30, 0x46, 0x37, 0x5e, 0xc0, 0xda,
- 0x33, 0xc5, 0xbc, 0x20, 0x0f, 0xa9, 0x69, 0xc6, 0x2f, 0x38, 0x0f, 0xa7,
- 0xd0, 0x45, 0x00, 0xcb, 0xc0, 0xda, 0x4b, 0x42, 0x00, 0x54, 0xc0, 0xda,
- 0x6b, 0x4b, 0x6f, 0xcc, 0xc0, 0xda, 0x77, 0xce, 0x71, 0x43, 0x00, 0x62,
- 0xb1, 0x46, 0x06, 0x97, 0xc0, 0xda, 0x9d, 0x4f, 0x67, 0xe5, 0x40, 0xda,
- 0xc1, 0xc5, 0x11, 0x01, 0x0f, 0xa1, 0x78, 0xd0, 0x5c, 0x22, 0x01, 0x4e,
- 0xa9, 0xcf, 0x6b, 0x2d, 0x01, 0x4e, 0xa0, 0xc8, 0x17, 0x45, 0x01, 0x11,
- 0xe3, 0x00, 0xda, 0xd1, 0x45, 0x00, 0x6c, 0x40, 0xda, 0xd5, 0x46, 0x06,
- 0x97, 0xc0, 0xda, 0xe1, 0xc2, 0x00, 0x3a, 0x08, 0xa6, 0x39, 0x03, 0xc0,
- 0xdb, 0x05, 0xc5, 0xd5, 0xcf, 0x08, 0xa6, 0x29, 0x45, 0x00, 0xcb, 0xc0,
- 0xdb, 0x11, 0x4b, 0x6f, 0xcc, 0xc0, 0xdb, 0x27, 0x47, 0x02, 0x90, 0x40,
- 0xdb, 0x4d, 0xc2, 0x00, 0x49, 0x01, 0x02, 0x51, 0xca, 0x9e, 0x74, 0x01,
- 0x72, 0x90, 0xe0, 0x01, 0xa7, 0x08, 0x59, 0xd0, 0x1b, 0xc0, 0xdb, 0xb4,
- 0x44, 0x00, 0xcc, 0xc0, 0xdb, 0xc0, 0x49, 0x5f, 0xd2, 0x40, 0xdb, 0xec,
- 0x09, 0xc0, 0xdb, 0xf8, 0x42, 0x00, 0x34, 0xc0, 0xdc, 0x04, 0x05, 0xc0,
- 0xdc, 0x10, 0xd5, 0x33, 0x3d, 0x00, 0x78, 0x39, 0x15, 0xc0, 0xdc, 0x22,
- 0x04, 0xc0, 0xdc, 0x2e, 0xd5, 0x35, 0xf2, 0x00, 0x78, 0x61, 0x10, 0xc0,
- 0xdc, 0x38, 0x16, 0xc0, 0xdc, 0x44, 0x14, 0xc0, 0xdc, 0x4e, 0x4c, 0x88,
- 0xcc, 0xc0, 0xdc, 0x5a, 0xc7, 0xca, 0x5c, 0x00, 0x7c, 0x21, 0xc6, 0xcc,
- 0x7f, 0x00, 0x7c, 0x29, 0xd6, 0x30, 0x4b, 0x00, 0x7e, 0x89, 0xd3, 0x41,
- 0xd4, 0x00, 0x7e, 0xc8, 0x4d, 0x77, 0xbf, 0xc0, 0xdc, 0x66, 0x46, 0x02,
- 0x91, 0x40, 0xdc, 0x72, 0x15, 0xc0, 0xdc, 0xd2, 0xc9, 0xaa, 0x03, 0x00,
- 0x78, 0xc0, 0xc4, 0x15, 0xd3, 0x00, 0x79, 0x01, 0xc3, 0x01, 0xb4, 0x00,
- 0x79, 0x09, 0x16, 0xc0, 0xdc, 0xde, 0x08, 0xc0, 0xdc, 0xea, 0x15, 0xc0,
- 0xdc, 0xf6, 0xc5, 0x01, 0xdb, 0x00, 0x79, 0x41, 0xc4, 0x22, 0x71, 0x00,
- 0x79, 0x49, 0x45, 0x03, 0x2b, 0x40, 0xdd, 0x02, 0xc2, 0x14, 0x40, 0x00,
- 0x7b, 0x89, 0x8b, 0x00, 0x7b, 0x93, 0x00, 0xdd, 0x26, 0x97, 0x00, 0x7b,
- 0xa3, 0x00, 0xdd, 0x2a, 0x48, 0xac, 0xc1, 0xc0, 0xdd, 0x2e, 0x87, 0x00,
- 0x7b, 0xd3, 0x00, 0xdd, 0x3c, 0x91, 0x00, 0x7b, 0xe3, 0x00, 0xdd, 0x40,
- 0xca, 0xa8, 0xba, 0x00, 0x7c, 0x02, 0x00, 0xdd, 0x44, 0xcd, 0x78, 0xc3,
- 0x00, 0x7d, 0xf8, 0xca, 0x9e, 0x38, 0x00, 0x7e, 0x01, 0xca, 0xa5, 0x54,
- 0x00, 0x7e, 0x09, 0xc9, 0xab, 0x47, 0x00, 0x7e, 0x11, 0xca, 0xa7, 0xe8,
- 0x00, 0x7e, 0x18, 0x1b, 0xc0, 0xdd, 0x48, 0x51, 0x50, 0x62, 0xc0, 0xdd,
- 0x62, 0x16, 0xc0, 0xdd, 0x6a, 0x03, 0x40, 0xdd, 0x76, 0xe0, 0x04, 0x07,
- 0x01, 0x6b, 0x78, 0x43, 0x01, 0x4a, 0xc0, 0xdd, 0x82, 0xdc, 0x14, 0x52,
- 0x01, 0x02, 0x89, 0xce, 0x70, 0xfd, 0x0f, 0xaf, 0x51, 0xcc, 0x87, 0x28,
- 0x0f, 0xad, 0x81, 0xc6, 0x7b, 0xe3, 0x0f, 0xa4, 0xa9, 0x55, 0x33, 0x28,
- 0xc0, 0xdd, 0x8c, 0x48, 0x1a, 0xfa, 0xc0, 0xdd, 0x98, 0xce, 0x74, 0xfb,
- 0x01, 0x4e, 0x49, 0xd8, 0x25, 0x80, 0x01, 0x53, 0xa9, 0xd1, 0x42, 0x94,
- 0x0f, 0xa3, 0x61, 0xd3, 0x42, 0x92, 0x0f, 0xa3, 0x68, 0xd7, 0x2b, 0x1d,
- 0x0f, 0xc5, 0x81, 0x58, 0x25, 0xb0, 0xc0, 0xdd, 0xa4, 0x57, 0x28, 0xc7,
- 0x40, 0xdd, 0xb6, 0x15, 0xc0, 0xdd, 0xc2, 0x0e, 0xc0, 0xdd, 0xed, 0x42,
- 0x00, 0xad, 0xc0, 0xdd, 0xfd, 0x06, 0xc0, 0xde, 0x0f, 0x14, 0xc0, 0xde,
- 0x25, 0xc5, 0x4b, 0xf1, 0x00, 0x32, 0x83, 0x00, 0xde, 0x3b, 0x08, 0xc0,
- 0xde, 0x48, 0x45, 0x01, 0x75, 0xc0, 0xde, 0x63, 0x16, 0xc0, 0xde, 0x75,
- 0x05, 0xc0, 0xde, 0x91, 0x42, 0x00, 0xa4, 0xc0, 0xde, 0x9d, 0x12, 0xc0,
- 0xde, 0xa9, 0x18, 0xc0, 0xde, 0xbf, 0xd2, 0x4d, 0x5e, 0x00, 0x44, 0x39,
- 0x07, 0xc0, 0xde, 0xcb, 0xd0, 0x5c, 0xd2, 0x00, 0x32, 0xf9, 0xc8, 0xbd,
- 0x0d, 0x00, 0x32, 0xc9, 0xce, 0x75, 0xe9, 0x00, 0x32, 0xb9, 0xcd, 0x2c,
- 0x41, 0x00, 0x30, 0xf9, 0x47, 0x33, 0xef, 0x40, 0xde, 0xd7, 0x46, 0x06,
- 0x97, 0xc0, 0xde, 0xe3, 0x44, 0x00, 0x27, 0xc0, 0xdf, 0x07, 0xcb, 0x93,
- 0x4a, 0x00, 0x30, 0x39, 0xc9, 0xb2, 0x34, 0x00, 0x30, 0x30, 0x48, 0x19,
- 0x70, 0xc0, 0xdf, 0x13, 0x46, 0x02, 0x91, 0x40, 0xdf, 0x25, 0xd0, 0x49,
- 0xa5, 0x00, 0x2a, 0xf9, 0xc9, 0x30, 0x6e, 0x00, 0x2a, 0xd0, 0xc4, 0x00,
- 0xab, 0x00, 0x2a, 0xe9, 0x4e, 0x07, 0x18, 0x40, 0xdf, 0x9e, 0xcf, 0x0f,
- 0xfc, 0x00, 0x2a, 0xe1, 0xcc, 0x86, 0x38, 0x00, 0x2a, 0xd8, 0x4e, 0x07,
- 0x18, 0xc0, 0xe0, 0x17, 0xd1, 0x2e, 0xd6, 0x0f, 0x4a, 0x40, 0xc4, 0x69,
- 0xa7, 0x0f, 0x49, 0x11, 0x06, 0xc0, 0xe0, 0x97, 0xc4, 0x7b, 0x8a, 0x0f,
- 0x49, 0x21, 0xc4, 0xe6, 0x37, 0x0f, 0x49, 0x29, 0x04, 0xc0, 0xe0, 0xa3,
- 0x15, 0xc0, 0xe0, 0xad, 0xc2, 0x00, 0x27, 0x0f, 0x49, 0x41, 0xc2, 0x02,
- 0x59, 0x0f, 0x49, 0x51, 0x87, 0x0f, 0x49, 0x59, 0xc2, 0x00, 0x67, 0x0f,
- 0x49, 0x61, 0x8b, 0x0f, 0x49, 0x69, 0x91, 0x0f, 0x49, 0x71, 0x1b, 0xc0,
- 0xe0, 0xb9, 0xc3, 0x78, 0xa9, 0x0f, 0x49, 0x89, 0x10, 0xc0, 0xe0, 0xc3,
- 0x0d, 0xc0, 0xe0, 0xd5, 0x97, 0x0f, 0x49, 0xa9, 0xc4, 0xe2, 0xbf, 0x0f,
- 0x49, 0xb1, 0xc3, 0x11, 0xd4, 0x0f, 0x49, 0xb9, 0xc2, 0x00, 0xa4, 0x0f,
- 0x49, 0xc1, 0xc4, 0xd6, 0xec, 0x0f, 0x49, 0xc9, 0x09, 0xc0, 0xe0, 0xe7,
- 0xc2, 0x00, 0x16, 0x0f, 0x49, 0xe1, 0xc2, 0x04, 0x41, 0x0f, 0x49, 0xf1,
- 0xc3, 0xb2, 0xb2, 0x0f, 0x4a, 0x08, 0xc8, 0x00, 0xb6, 0x0f, 0x4a, 0x31,
- 0xd4, 0x3e, 0xf9, 0x0f, 0x4a, 0x48, 0xc4, 0x35, 0x01, 0x0f, 0x4a, 0x51,
- 0xd0, 0x57, 0x8f, 0x0f, 0x4a, 0x58, 0xc4, 0x15, 0xd3, 0x0f, 0x4a, 0x81,
- 0xc3, 0x01, 0xb4, 0x0f, 0x4a, 0x89, 0x16, 0xc0, 0xe0, 0xf1, 0x08, 0xc0,
- 0xe0, 0xfd, 0x15, 0xc0, 0xe1, 0x09, 0xc5, 0x01, 0xdb, 0x0f, 0x4a, 0xc1,
- 0xc4, 0x22, 0x71, 0x0f, 0x4a, 0xc8, 0xd0, 0x0f, 0xfb, 0x0f, 0x4a, 0xf1,
- 0xcd, 0x2c, 0x41, 0x0f, 0x4a, 0xf8, 0x47, 0xc6, 0x65, 0xc0, 0xe1, 0x15,
- 0xc4, 0xe5, 0x33, 0x0f, 0xba, 0x13, 0x00, 0xe1, 0x21, 0xcb, 0x90, 0xc1,
- 0x0f, 0xb8, 0x79, 0xca, 0x9d, 0xf2, 0x0f, 0xb9, 0xf1, 0xc4, 0x1a, 0x63,
- 0x0f, 0xba, 0xc8, 0x14, 0xc0, 0xe1, 0x25, 0xc7, 0xc6, 0x5e, 0x0f, 0xb8,
- 0x99, 0x46, 0x48, 0x2b, 0xc0, 0xe1, 0x34, 0x03, 0x40, 0xe1, 0x40, 0x42,
- 0x03, 0x48, 0xc0, 0xe1, 0x52, 0xc8, 0xb8, 0x3d, 0x0f, 0xbb, 0x80, 0x11,
- 0xc0, 0xe1, 0x61, 0xd2, 0x4a, 0x6a, 0x0f, 0xb8, 0x71, 0xca, 0xa5, 0x7c,
- 0x0f, 0xba, 0xf9, 0x17, 0x40, 0xe1, 0x70, 0xc5, 0xd2, 0x83, 0x0f, 0xb9,
- 0xfb, 0x00, 0xe1, 0x7c, 0x42, 0x00, 0x34, 0xc0, 0xe1, 0x82, 0xc4, 0xdf,
- 0xdb, 0x0f, 0xba, 0x69, 0xc6, 0x77, 0x10, 0x0f, 0xba, 0x88, 0x07, 0xc0,
- 0xe1, 0x8e, 0xc8, 0xb9, 0x05, 0x0f, 0xb8, 0xc2, 0x00, 0xe1, 0xa6, 0x0b,
- 0xc0, 0xe1, 0xac, 0xc8, 0xba, 0xa5, 0x0f, 0xb9, 0x40, 0x17, 0xc0, 0xe1,
- 0xbe, 0x42, 0x00, 0x92, 0xc0, 0xe1, 0xca, 0xc5, 0xde, 0x8f, 0x0f, 0xb8,
- 0xd9, 0xc5, 0xaf, 0x41, 0x0f, 0xba, 0x39, 0xce, 0x71, 0xc1, 0x0f, 0xba,
- 0x79, 0x16, 0xc0, 0xe1, 0xd7, 0xc3, 0xcb, 0x32, 0x0f, 0xba, 0xa0, 0xcb,
- 0x91, 0x03, 0x0f, 0xb9, 0x59, 0x43, 0x00, 0x90, 0xc0, 0xe1, 0xe6, 0xc2,
- 0x01, 0x7b, 0x0f, 0xb8, 0x09, 0x0e, 0xc0, 0xe1, 0xf0, 0xc6, 0xcd, 0xff,
- 0x0f, 0xb9, 0xd1, 0xca, 0x9e, 0x92, 0x0f, 0xb9, 0xe9, 0xc4, 0x6e, 0x09,
- 0x0f, 0xba, 0xb9, 0xc6, 0xd4, 0x9b, 0x0f, 0xba, 0xd8, 0xc7, 0xc1, 0x4f,
- 0x0f, 0xb9, 0x51, 0xc8, 0xb9, 0x85, 0x0f, 0xba, 0x98, 0xc3, 0x02, 0x24,
- 0x0f, 0xb8, 0xa9, 0xc3, 0x01, 0xdd, 0x0f, 0xbb, 0x78, 0xd0, 0x60, 0x42,
- 0x0f, 0xb8, 0x83, 0x00, 0xe2, 0x05, 0xc8, 0xb9, 0xdd, 0x0f, 0xb9, 0xc1,
- 0xc4, 0x89, 0x72, 0x0f, 0xbb, 0x88, 0xc3, 0x00, 0x9f, 0x0f, 0xb8, 0x21,
- 0x9a, 0x0f, 0xba, 0x50, 0xc9, 0xad, 0xe1, 0x0f, 0xb8, 0x01, 0xc7, 0xc7,
- 0x5a, 0x0f, 0xba, 0x08, 0xc3, 0x1e, 0x5b, 0x0f, 0xb8, 0xd1, 0xc2, 0x06,
- 0x1f, 0x0f, 0xba, 0x48, 0xc4, 0x91, 0x0a, 0x0f, 0xb8, 0xe3, 0x00, 0xe2,
- 0x09, 0xcb, 0x97, 0xb7, 0x0f, 0xb9, 0x08, 0x11, 0xc0, 0xe2, 0x0f, 0x44,
- 0x03, 0x2c, 0x40, 0xe2, 0x1b, 0xd7, 0x0b, 0x30, 0x01, 0x53, 0x78, 0xd3,
- 0x46, 0x0f, 0x0f, 0x9f, 0x39, 0xc5, 0x42, 0xd3, 0x0f, 0xb4, 0xb8, 0x1d,
- 0xc0, 0xe2, 0x27, 0x1e, 0xc0, 0xe2, 0x4f, 0x1f, 0xc0, 0xe2, 0x77, 0x20,
- 0xc0, 0xe2, 0x9f, 0x21, 0xc0, 0xe2, 0xc7, 0x22, 0x40, 0xe2, 0xef, 0xd3,
- 0x41, 0xe7, 0x01, 0x3f, 0x91, 0x05, 0xc0, 0xe3, 0x01, 0xd1, 0x01, 0x75,
- 0x01, 0x0d, 0xd1, 0x16, 0xc0, 0xe3, 0x0d, 0x48, 0x03, 0x68, 0xc0, 0xe3,
- 0x19, 0xcb, 0x87, 0x04, 0x01, 0x50, 0x88, 0x46, 0x00, 0x6b, 0x40, 0xe3,
- 0x1f, 0xda, 0x1d, 0x2b, 0x01, 0x37, 0x11, 0xc3, 0x0f, 0xf5, 0x01, 0x5e,
- 0xc8, 0x8d, 0x00, 0x01, 0x53, 0x00, 0xe3, 0x2b, 0x8f, 0x01, 0x02, 0x10,
- 0xc2, 0x00, 0xc7, 0x08, 0xba, 0x31, 0x83, 0x08, 0xb8, 0x70, 0xc2, 0x00,
- 0xc1, 0x08, 0xba, 0x29, 0xc2, 0x1d, 0x5f, 0x08, 0xb8, 0x81, 0x83, 0x08,
- 0xb8, 0x19, 0xc2, 0x01, 0x29, 0x08, 0xb8, 0x10, 0x06, 0xc0, 0xe3, 0x31,
- 0xc2, 0x00, 0xa4, 0x08, 0xb8, 0xa1, 0x83, 0x08, 0xb8, 0x98, 0x16, 0xc0,
- 0xe3, 0x3b, 0xc2, 0x00, 0xa4, 0x08, 0xb8, 0x61, 0x83, 0x08, 0xb8, 0x20,
- 0x83, 0x08, 0xba, 0x01, 0xc2, 0x00, 0xa4, 0x08, 0xb8, 0x58, 0x49, 0x0d,
- 0x48, 0x40, 0xe3, 0x45, 0xc2, 0x00, 0xa4, 0x08, 0xb8, 0xc9, 0x83, 0x08,
- 0xb8, 0x50, 0xc2, 0x00, 0xa4, 0x08, 0xb8, 0xc1, 0x83, 0x08, 0xb8, 0x40,
- 0xc2, 0x00, 0xa4, 0x08, 0xb8, 0xb9, 0x83, 0x08, 0xb8, 0xa8, 0xc2, 0x00,
- 0xa4, 0x08, 0xb8, 0x39, 0x83, 0x08, 0xb8, 0x30, 0xc2, 0x00, 0xa4, 0x08,
- 0xb8, 0x09, 0x83, 0x08, 0xb8, 0x00, 0xc5, 0xdd, 0xfe, 0x08, 0xb9, 0xf1,
- 0x15, 0xc0, 0xe3, 0x57, 0xc6, 0xd0, 0xe7, 0x08, 0xb9, 0x58, 0xc4, 0x18,
- 0x83, 0x08, 0xb9, 0xb9, 0xc2, 0x26, 0x51, 0x08, 0xb9, 0xb0, 0xc3, 0x0c,
- 0x5b, 0x08, 0xb9, 0xa9, 0xc3, 0x06, 0x9e, 0x08, 0xb9, 0xa0, 0xc4, 0x04,
- 0x5e, 0x08, 0xb9, 0x99, 0xc2, 0x01, 0x47, 0x08, 0xb9, 0x90, 0x8f, 0x08,
- 0xb9, 0x51, 0x8b, 0x08, 0xb9, 0x49, 0x99, 0x08, 0xb9, 0x39, 0x83, 0x08,
- 0xb9, 0x08, 0x97, 0x08, 0xb9, 0x28, 0x8b, 0x08, 0xb9, 0x18, 0xca, 0xa2,
- 0x48, 0x08, 0xb8, 0xf9, 0x83, 0x08, 0xb8, 0xe8, 0xc2, 0x01, 0xf2, 0x01,
- 0x1c, 0xab, 0x00, 0xe3, 0x63, 0x44, 0x47, 0xeb, 0x40, 0xe3, 0x67, 0xc9,
- 0x4f, 0x38, 0x01, 0x1b, 0xb0, 0xc9, 0x4f, 0x38, 0x01, 0x1b, 0xc8, 0xc3,
- 0x00, 0xb2, 0x01, 0x1b, 0x9b, 0x00, 0xe3, 0x73, 0xc5, 0xdf, 0x61, 0x01,
- 0x19, 0xb0, 0xc2, 0x00, 0xf6, 0x01, 0x1b, 0xa1, 0xce, 0x6e, 0x41, 0x01,
- 0x1a, 0x30, 0x00, 0xc0, 0xe3, 0x79, 0xca, 0x6e, 0x45, 0x01, 0x1a, 0x78,
- 0x43, 0x02, 0xc7, 0xc0, 0xe3, 0x8b, 0x42, 0x01, 0x63, 0xc0, 0xe3, 0x95,
- 0xcf, 0x61, 0x46, 0x01, 0x1a, 0xd0, 0xd1, 0x4f, 0x30, 0x01, 0x1b, 0x71,
- 0x16, 0xc0, 0xe3, 0x9f, 0xc8, 0x00, 0xe3, 0x01, 0x19, 0xf9, 0xca, 0xa6,
- 0xd0, 0x01, 0x19, 0xb8, 0xc8, 0xb8, 0x9d, 0x01, 0x1b, 0x51, 0x46, 0x01,
- 0x7b, 0x40, 0xe3, 0xab, 0xcb, 0x9b, 0x1c, 0x01, 0x1b, 0x39, 0xca, 0x6e,
- 0x45, 0x01, 0x1a, 0x28, 0xc9, 0x1e, 0x42, 0x01, 0x1b, 0x21, 0xc8, 0x4f,
- 0x39, 0x01, 0x1a, 0xd8, 0x49, 0x07, 0x49, 0xc0, 0xe3, 0xc9, 0xcf, 0x69,
- 0x98, 0x01, 0x12, 0x80, 0x0a, 0xc0, 0xe3, 0xd5, 0x15, 0xc0, 0xe3, 0xdf,
- 0xc2, 0x00, 0x7b, 0x08, 0x59, 0x61, 0x1b, 0xc0, 0xe3, 0xed, 0xc2, 0x00,
- 0xc3, 0x08, 0x59, 0x41, 0x10, 0xc0, 0xe3, 0xf7, 0x06, 0xc0, 0xe4, 0x0b,
- 0x16, 0xc0, 0xe4, 0x15, 0xc2, 0x1b, 0xa5, 0x08, 0x58, 0xc1, 0xc2, 0x00,
- 0x69, 0x08, 0x58, 0xb9, 0x09, 0xc0, 0xe4, 0x25, 0x1a, 0xc0, 0xe4, 0x35,
- 0xc2, 0x01, 0x05, 0x08, 0x58, 0x81, 0x97, 0x08, 0x58, 0x73, 0x00, 0xe4,
- 0x45, 0x8b, 0x08, 0x58, 0x63, 0x00, 0xe4, 0x49, 0x91, 0x08, 0x58, 0x53,
- 0x00, 0xe4, 0x4d, 0x87, 0x08, 0x58, 0x43, 0x00, 0xe4, 0x51, 0x83, 0x08,
- 0x58, 0x03, 0x00, 0xe4, 0x55, 0xc2, 0x00, 0x27, 0x08, 0x58, 0xf1, 0xc2,
- 0x17, 0x9f, 0x08, 0x58, 0xf9, 0x04, 0xc0, 0xe4, 0x6b, 0xc2, 0x00, 0x9c,
- 0x08, 0x59, 0x69, 0xc2, 0x00, 0x54, 0x08, 0x59, 0x71, 0x1c, 0x40, 0xe4,
- 0x75, 0xc3, 0x01, 0xb4, 0x08, 0x08, 0x3b, 0x00, 0xe4, 0x7f, 0x16, 0xc0,
- 0xe4, 0x83, 0x08, 0xc0, 0xe4, 0x94, 0x15, 0xc0, 0xe4, 0x9c, 0xc5, 0x01,
- 0xdb, 0x08, 0x08, 0x73, 0x00, 0xe4, 0xae, 0xc4, 0x22, 0x71, 0x08, 0x08,
- 0x7a, 0x00, 0xe4, 0xb9, 0x46, 0x0e, 0xf4, 0xc0, 0xe4, 0xc6, 0x4e, 0x5d,
- 0xa5, 0x40, 0xe4, 0xdc, 0xce, 0x74, 0xa7, 0x08, 0x09, 0xf1, 0xcd, 0x80,
- 0xf0, 0x08, 0x09, 0xf8, 0x0e, 0xc0, 0xe4, 0xe8, 0x46, 0x10, 0x37, 0xc0,
- 0xe4, 0xf4, 0x42, 0x01, 0x4a, 0xc0, 0xe5, 0x2d, 0x49, 0x03, 0x3b, 0xc0,
- 0xe5, 0x39, 0x43, 0x10, 0x47, 0xc0, 0xe5, 0x51, 0x46, 0x02, 0x12, 0x40,
- 0xe5, 0x69, 0xc6, 0x07, 0x09, 0x0f, 0xbc, 0x81, 0xc6, 0x01, 0x7a, 0x0f,
- 0xbc, 0x30, 0xc6, 0x13, 0x57, 0x0f, 0xbd, 0x59, 0xd2, 0x4c, 0xf2, 0x0f,
- 0xbd, 0xb8, 0xd6, 0x0a, 0x88, 0x01, 0x1f, 0x09, 0xcd, 0x02, 0x52, 0x01,
- 0x1e, 0xf9, 0xcb, 0x1a, 0x3f, 0x01, 0x1e, 0xe9, 0xce, 0x24, 0xb2, 0x01,
- 0x1d, 0xab, 0x00, 0xe5, 0x81, 0x45, 0x03, 0x2b, 0xc0, 0xe5, 0x87, 0x46,
- 0x02, 0x12, 0xc0, 0xe5, 0x9f, 0x45, 0x02, 0x4d, 0xc0, 0xe5, 0xa9, 0xd7,
- 0x16, 0x94, 0x01, 0x49, 0xd8, 0x46, 0x00, 0x6b, 0x40, 0xe5, 0xb3, 0x00,
- 0xc0, 0xe5, 0xbf, 0xc3, 0x00, 0x34, 0x0f, 0x9d, 0x98, 0xc4, 0x00, 0xba,
- 0x0f, 0xa8, 0xb3, 0x00, 0xe5, 0xcb, 0x95, 0x0f, 0xa6, 0xd0, 0x84, 0x01,
- 0x88, 0x2b, 0x00, 0xe5, 0xd1, 0x92, 0x01, 0x88, 0x31, 0x8f, 0x01, 0x88,
- 0x39, 0x88, 0x01, 0x88, 0x41, 0x86, 0x01, 0x88, 0x49, 0x96, 0x01, 0x88,
- 0x51, 0x90, 0x01, 0x88, 0x5b, 0x00, 0xe5, 0xd5, 0x8e, 0x01, 0x88, 0x63,
- 0x00, 0xe5, 0xe0, 0x89, 0x01, 0x88, 0x6b, 0x00, 0xe5, 0xe4, 0x8d, 0x01,
- 0x88, 0x73, 0x00, 0xe5, 0xf4, 0x8a, 0x01, 0x88, 0x79, 0x8c, 0x01, 0x88,
- 0x83, 0x00, 0xe5, 0xf8, 0x93, 0x01, 0x88, 0x89, 0x9a, 0x01, 0x88, 0x91,
- 0x9c, 0x01, 0x88, 0xbb, 0x00, 0xe5, 0xfc, 0x85, 0x01, 0x88, 0xc3, 0x00,
- 0xe6, 0x08, 0x95, 0x01, 0x88, 0xcb, 0x00, 0xe6, 0x0c, 0x94, 0x01, 0x88,
- 0xb1, 0x83, 0x01, 0x88, 0xd3, 0x00, 0xe6, 0x10, 0x91, 0x01, 0x88, 0xdb,
- 0x00, 0xe6, 0x2d, 0x87, 0x01, 0x88, 0xe3, 0x00, 0xe6, 0x47, 0x8b, 0x01,
- 0x89, 0x3b, 0x00, 0xe6, 0x5e, 0x97, 0x01, 0x89, 0x43, 0x00, 0xe6, 0x77,
- 0x98, 0x01, 0x89, 0x50, 0x92, 0x01, 0x8d, 0xa1, 0x96, 0x01, 0x8d, 0xa9,
- 0x8d, 0x01, 0x8d, 0xb1, 0x8a, 0x01, 0x8d, 0xb9, 0x89, 0x01, 0x8d, 0xd8,
- 0x9e, 0x0f, 0xd8, 0x03, 0x00, 0xe6, 0x7d, 0xa0, 0x0f, 0xd8, 0x1b, 0x00,
- 0xe6, 0x9d, 0x9f, 0x0f, 0xd8, 0x0b, 0x00, 0xe6, 0xaf, 0xa2, 0x0f, 0xd8,
- 0x7b, 0x00, 0xe6, 0xc8, 0xa1, 0x0f, 0xd8, 0x3b, 0x00, 0xe6, 0xcc, 0xa3,
- 0x0f, 0xd8, 0xf0, 0x00, 0xc0, 0xe6, 0xd7, 0x02, 0x40, 0xe7, 0x21, 0xc4,
- 0xe4, 0x9f, 0x0f, 0xa6, 0xc1, 0xc5, 0x1d, 0x53, 0x0f, 0xa4, 0xc8, 0x4a,
- 0xa8, 0x38, 0x40, 0xe7, 0x2d, 0xc8, 0xb8, 0x75, 0x0f, 0xd3, 0x81, 0xc8,
- 0xbb, 0x7d, 0x0f, 0xcf, 0xb1, 0x11, 0x40, 0xe7, 0x45, 0x42, 0x03, 0xa4,
- 0xc0, 0xe7, 0x54, 0x4f, 0x29, 0xe3, 0xc0, 0xe7, 0x61, 0x46, 0xd2, 0xc1,
- 0xc0, 0xe7, 0x77, 0xc5, 0xdb, 0xd3, 0x00, 0xda, 0xe1, 0x46, 0x06, 0x97,
- 0xc0, 0xe7, 0x83, 0x47, 0x02, 0x90, 0xc0, 0xe7, 0xa7, 0xc9, 0xab, 0x86,
- 0x00, 0xda, 0x21, 0x4b, 0x6f, 0xcc, 0xc0, 0xe8, 0x4b, 0x45, 0x00, 0xcb,
- 0x40, 0xe8, 0x7c, 0xcd, 0x7e, 0x25, 0x0f, 0x9e, 0x00, 0xc9, 0x11, 0xdc,
- 0x0b, 0x57, 0xa9, 0x4a, 0x52, 0xb5, 0xc0, 0xe8, 0x9a, 0x47, 0x02, 0x90,
- 0x40, 0xe8, 0xac, 0xc6, 0x00, 0x71, 0x0f, 0xb5, 0xe1, 0xc5, 0xda, 0xf2,
- 0x0f, 0xa3, 0xe1, 0xc6, 0x51, 0xa8, 0x0f, 0x9b, 0xe1, 0xc5, 0x54, 0x7b,
- 0x0f, 0xa1, 0x20, 0x12, 0xc0, 0xe9, 0x24, 0x83, 0x05, 0x35, 0x01, 0x0d,
- 0xc0, 0xe9, 0x3a, 0x97, 0x05, 0x35, 0x11, 0xc2, 0x00, 0x4c, 0x05, 0x35,
- 0x21, 0x14, 0xc0, 0xe9, 0x5d, 0x16, 0xc0, 0xe9, 0x6f, 0x91, 0x05, 0x35,
- 0x39, 0x10, 0xc0, 0xe9, 0x7b, 0x8b, 0x05, 0x35, 0x49, 0x0e, 0xc0, 0xe9,
- 0xa8, 0x8f, 0x05, 0x35, 0x9b, 0x00, 0xe9, 0xc0, 0x15, 0xc0, 0xe9, 0xd8,
- 0x1b, 0xc0, 0xe9, 0xf2, 0x19, 0xc0, 0xea, 0x02, 0x08, 0x40, 0xea, 0x0c,
- 0x0f, 0xc0, 0xea, 0x22, 0xc3, 0x0e, 0x41, 0x05, 0x37, 0xa0, 0x47, 0x04,
- 0xeb, 0xc0, 0xea, 0x2e, 0x00, 0xc0, 0xea, 0x34, 0x15, 0x40, 0xea, 0x40,
- 0x15, 0xc0, 0xea, 0x4c, 0x43, 0x3a, 0xf0, 0xc0, 0xea, 0x58, 0x4f, 0x2e,
- 0xbf, 0xc0, 0xea, 0x64, 0x4b, 0x6f, 0xcc, 0xc0, 0xea, 0x6e, 0x47, 0x02,
- 0x90, 0x40, 0xea, 0x90, 0xc3, 0x81, 0xb7, 0x0f, 0xb6, 0x08, 0xc5, 0xc0,
- 0x70, 0x0f, 0xa6, 0x51, 0xc7, 0xcb, 0x2e, 0x0f, 0xcf, 0xe0, 0xcf, 0x6b,
- 0x69, 0x01, 0x33, 0x61, 0xcc, 0x8b, 0x84, 0x01, 0x33, 0x59, 0xd8, 0x22,
- 0x38, 0x0f, 0x9c, 0xe9, 0xd7, 0x2a, 0x20, 0x0f, 0x9c, 0xe0, 0xc5, 0x11,
- 0x01, 0x0f, 0xa1, 0xd9, 0xca, 0xa7, 0x34, 0x0f, 0xce, 0xa0, 0xcc, 0x20,
- 0x4f, 0x01, 0x1f, 0x18, 0x47, 0x02, 0x90, 0xc0, 0xea, 0xf3, 0x15, 0xc0,
- 0xeb, 0x56, 0x4b, 0x6f, 0xcc, 0xc0, 0xeb, 0x62, 0x03, 0xc0, 0xeb, 0x82,
- 0x46, 0x06, 0x97, 0xc0, 0xeb, 0x94, 0x46, 0x80, 0x23, 0xc0, 0xeb, 0xb8,
- 0x49, 0x3e, 0x6d, 0xc0, 0xeb, 0xc4, 0xc6, 0xcf, 0xbb, 0x00, 0x4f, 0xd1,
- 0xca, 0x9f, 0x50, 0x00, 0x4f, 0xd8, 0xc5, 0xdf, 0x02, 0x0f, 0x9b, 0x89,
- 0x49, 0x03, 0xf7, 0x40, 0xeb, 0xd0, 0xc6, 0x00, 0x71, 0x01, 0x1b, 0xf1,
- 0xd8, 0x21, 0x90, 0x0f, 0xa8, 0xa9, 0xc6, 0xcc, 0x61, 0x0f, 0xd6, 0x88,
- 0xcf, 0x62, 0x09, 0x0f, 0xa3, 0x29, 0xce, 0x2d, 0x3b, 0x0f, 0xa3, 0x20,
- 0xc9, 0x17, 0x44, 0x01, 0x10, 0xc8, 0xd1, 0x57, 0x39, 0x0f, 0xab, 0x60,
- 0xce, 0x72, 0x85, 0x00, 0xd0, 0xf9, 0xc7, 0xcb, 0x90, 0x00, 0xd0, 0xf1,
- 0x4b, 0x6f, 0xcc, 0xc0, 0xeb, 0xd6, 0x47, 0x02, 0x90, 0x40, 0xeb, 0xec,
- 0x97, 0x00, 0xba, 0x99, 0x8b, 0x00, 0xba, 0x90, 0xc2, 0x00, 0xa4, 0x00,
- 0xba, 0x89, 0xc2, 0x0c, 0x65, 0x00, 0xba, 0x81, 0xc2, 0x01, 0x09, 0x00,
- 0xba, 0x79, 0xc2, 0x00, 0xc7, 0x00, 0xba, 0x71, 0xc2, 0x02, 0x59, 0x00,
- 0xba, 0x69, 0xc2, 0x1d, 0x5f, 0x00, 0xba, 0x61, 0xc2, 0x00, 0xad, 0x00,
- 0xba, 0x59, 0xc2, 0x00, 0xde, 0x00, 0xba, 0x51, 0xc2, 0x03, 0xa4, 0x00,
- 0xba, 0x49, 0x10, 0xc0, 0xec, 0x4c, 0xc2, 0x0b, 0xc6, 0x00, 0xba, 0x39,
- 0xc2, 0x00, 0xb3, 0x00, 0xba, 0x31, 0xc2, 0x01, 0x29, 0x00, 0xba, 0x21,
- 0xc2, 0x04, 0x2b, 0x00, 0xba, 0x19, 0x97, 0x00, 0xba, 0x11, 0x8b, 0x00,
- 0xba, 0x09, 0x83, 0x00, 0xba, 0x00, 0xcb, 0x8e, 0xf3, 0x0f, 0xa3, 0x81,
- 0xcb, 0x95, 0x91, 0x0f, 0x98, 0x48, 0xc4, 0xe4, 0x67, 0x0f, 0xa5, 0xe1,
- 0x95, 0x0f, 0xd3, 0x90, 0x4c, 0x82, 0x78, 0xc0, 0xec, 0x56, 0x90, 0x0f,
- 0xcf, 0x00, 0x47, 0x33, 0xef, 0xc0, 0xec, 0x62, 0x47, 0x02, 0x90, 0xc0,
- 0xec, 0x8f, 0x18, 0xc0, 0xec, 0xf7, 0x45, 0x00, 0xcb, 0xc0, 0xed, 0x03,
- 0x06, 0xc0, 0xed, 0x27, 0x4c, 0x10, 0x7e, 0x40, 0xed, 0x39, 0xdb, 0x16,
- 0xab, 0x01, 0x1c, 0x59, 0xc5, 0x1d, 0x53, 0x0f, 0xa4, 0xa1, 0xc3, 0x00,
- 0xe8, 0x00, 0x05, 0x30, 0x86, 0x0f, 0x9a, 0xf1, 0xd0, 0x5e, 0x52, 0x00,
- 0x04, 0x11, 0xca, 0xa8, 0xa6, 0x0f, 0xc9, 0x88, 0x42, 0x00, 0xbf, 0xc0,
- 0xed, 0x49, 0x46, 0xd3, 0x3f, 0xc0, 0xed, 0x55, 0xcb, 0x95, 0xf4, 0x0e,
- 0x82, 0x28, 0xc5, 0x83, 0xb7, 0x0e, 0x81, 0x23, 0x00, 0xed, 0x61, 0x46,
- 0xd3, 0x33, 0xc0, 0xed, 0x65, 0x11, 0xc0, 0xed, 0x72, 0x14, 0xc0, 0xed,
- 0x87, 0x42, 0x00, 0x9e, 0xc0, 0xed, 0x93, 0xc6, 0xc1, 0xab, 0x0e, 0x83,
- 0x08, 0x14, 0xc0, 0xed, 0x9f, 0x12, 0xc0, 0xed, 0xab, 0x45, 0xd8, 0x81,
- 0xc0, 0xed, 0xbb, 0x10, 0x40, 0xed, 0xd3, 0x16, 0xc0, 0xed, 0xdf, 0x48,
- 0xb7, 0xfd, 0xc0, 0xed, 0xf4, 0xc5, 0xd5, 0xfc, 0x0e, 0x81, 0x4b, 0x00,
- 0xee, 0x06, 0x1b, 0xc0, 0xee, 0x0c, 0xc7, 0xc6, 0xb9, 0x0e, 0x80, 0xe8,
- 0x0b, 0xc0, 0xee, 0x19, 0xc2, 0x20, 0x67, 0x0e, 0x81, 0x79, 0xc5, 0xdf,
- 0x70, 0x0e, 0x80, 0x08, 0x42, 0x17, 0x9f, 0xc0, 0xee, 0x36, 0x12, 0x40,
- 0xee, 0x42, 0x46, 0x38, 0xc4, 0xc0, 0xee, 0x4c, 0xda, 0x1c, 0x5b, 0x0e,
- 0x86, 0x29, 0x49, 0xac, 0x70, 0x40, 0xee, 0x77, 0x44, 0xe0, 0x6f, 0xc0,
- 0xee, 0x89, 0x47, 0xc7, 0xd1, 0xc0, 0xee, 0x9b, 0x44, 0x59, 0x07, 0x40,
- 0xee, 0xa7, 0x42, 0x00, 0x50, 0xc0, 0xee, 0xb1, 0x15, 0xc0, 0xee, 0xbb,
- 0xc6, 0xd2, 0xa3, 0x0e, 0x81, 0xf8, 0x10, 0xc0, 0xee, 0xc7, 0x46, 0xd1,
- 0x95, 0xc0, 0xee, 0xd3, 0xc7, 0xc5, 0xee, 0x0e, 0x83, 0x41, 0xc9, 0xae,
- 0x5f, 0x0e, 0x83, 0x21, 0xc6, 0xd0, 0x93, 0x0e, 0x82, 0xa9, 0xce, 0x74,
- 0x53, 0x0e, 0x80, 0x70, 0x48, 0xb9, 0x65, 0xc0, 0xee, 0xdf, 0xca, 0xa1,
- 0x44, 0x0e, 0x82, 0xb8, 0x14, 0xc0, 0xee, 0xff, 0x07, 0xc0, 0xef, 0x09,
- 0x0a, 0xc0, 0xef, 0x1b, 0xc6, 0xd2, 0xfd, 0x0e, 0x81, 0x38, 0x07, 0xc0,
- 0xef, 0x25, 0xc6, 0xc4, 0x0c, 0x0e, 0x82, 0xe8, 0x49, 0xb0, 0x33, 0xc0,
- 0xef, 0x31, 0xc5, 0xdb, 0x42, 0x0e, 0x82, 0xd9, 0x44, 0xe0, 0x67, 0xc0,
- 0xef, 0x3d, 0x46, 0xd3, 0xab, 0x40, 0xef, 0x47, 0x42, 0x00, 0xcb, 0xc0,
- 0xef, 0x53, 0x42, 0x00, 0xbb, 0xc0, 0xef, 0x5d, 0x46, 0xd1, 0x83, 0xc0,
- 0xef, 0x69, 0x07, 0x40, 0xef, 0x75, 0x44, 0xe5, 0xf7, 0xc0, 0xef, 0x8a,
- 0xc3, 0x4e, 0xb9, 0x0e, 0x80, 0xc8, 0xc6, 0xcf, 0x91, 0x0e, 0x81, 0xe1,
- 0xc4, 0xc7, 0xd3, 0x0e, 0x81, 0x28, 0xc2, 0x0c, 0x57, 0x08, 0xe3, 0x58,
- 0x9b, 0x08, 0xe3, 0x50, 0xc4, 0x18, 0x83, 0x08, 0xe3, 0x03, 0x00, 0xef,
- 0x94, 0xc2, 0x26, 0x51, 0x08, 0xe2, 0xfa, 0x00, 0xef, 0x9a, 0x0b, 0xc0,
- 0xef, 0xa0, 0x11, 0x40, 0xef, 0xac, 0x0a, 0xc0, 0xef, 0xb8, 0x19, 0xc0,
- 0xef, 0xc4, 0xc2, 0x00, 0x4d, 0x08, 0xe3, 0x18, 0xc4, 0x22, 0x71, 0x08,
- 0xe2, 0xc9, 0xc5, 0x01, 0xdb, 0x08, 0xe2, 0xc1, 0x15, 0xc0, 0xef, 0xce,
- 0x08, 0xc0, 0xef, 0xda, 0x16, 0xc0, 0xef, 0xe6, 0xc3, 0x01, 0xb4, 0x08,
- 0xe2, 0x89, 0xc4, 0x15, 0xd3, 0x08, 0xe2, 0x80, 0xc7, 0x76, 0x59, 0x08,
- 0xe2, 0x01, 0xc7, 0x11, 0x41, 0x08, 0xe1, 0xe8, 0xc4, 0x0f, 0x7c, 0x08,
- 0xe1, 0xf9, 0xc5, 0x44, 0x7b, 0x08, 0xe1, 0xf0, 0x97, 0x08, 0xe1, 0xd9,
- 0x8b, 0x08, 0xe1, 0xc9, 0x83, 0x08, 0xe1, 0x78, 0x8e, 0x08, 0xe1, 0xb1,
- 0x94, 0x08, 0xe1, 0xa2, 0x00, 0xef, 0xf2, 0x97, 0x08, 0xe1, 0x98, 0x8b,
- 0x08, 0xe1, 0x88, 0x83, 0x08, 0xe1, 0x69, 0xc2, 0x0c, 0x65, 0x08, 0xe1,
- 0x61, 0xc2, 0x00, 0xa4, 0x08, 0xe1, 0x58, 0x83, 0x08, 0xe1, 0x51, 0x47,
- 0xac, 0xc2, 0x40, 0xef, 0xf6, 0xc2, 0x00, 0xa4, 0x08, 0xe1, 0x29, 0x83,
- 0x08, 0xe1, 0x20, 0xc2, 0x00, 0xa4, 0x08, 0xe1, 0x19, 0x83, 0x08, 0xe1,
- 0x10, 0x83, 0x08, 0xe1, 0x09, 0xc2, 0x00, 0xc1, 0x08, 0xe0, 0xe1, 0xc2,
- 0x1d, 0x5f, 0x08, 0xe0, 0xb9, 0xc2, 0x01, 0x29, 0x08, 0xe0, 0x90, 0xc2,
- 0x00, 0xa4, 0x08, 0xe1, 0x01, 0x83, 0x08, 0xe0, 0xf9, 0x06, 0x40, 0xf0,
- 0x01, 0xc2, 0x00, 0xa4, 0x08, 0xe0, 0xf1, 0x83, 0x08, 0xe0, 0xe9, 0x16,
- 0x40, 0xf0, 0x0b, 0xc2, 0x00, 0xa4, 0x08, 0xe0, 0xb1, 0x83, 0x08, 0xe0,
- 0xa8, 0xc2, 0x00, 0xa4, 0x08, 0xe0, 0xa1, 0x83, 0x08, 0xe0, 0x98, 0xc2,
- 0x00, 0xa4, 0x08, 0xe0, 0x89, 0x83, 0x08, 0xe0, 0x80, 0xc2, 0x00, 0xa4,
- 0x08, 0xe0, 0x79, 0x83, 0x08, 0xe0, 0x70, 0x97, 0x08, 0xe0, 0x69, 0x8b,
- 0x08, 0xe0, 0x59, 0x83, 0x08, 0xe0, 0x08, 0x97, 0x08, 0xe0, 0x28, 0x8b,
- 0x08, 0xe0, 0x18, 0x45, 0x02, 0x4d, 0xc0, 0xf0, 0x15, 0x46, 0x02, 0x12,
- 0xc0, 0xf0, 0x3b, 0x16, 0xc0, 0xf0, 0x63, 0xce, 0x6f, 0x3d, 0x01, 0x38,
- 0x19, 0x45, 0x03, 0x2b, 0xc0, 0xf0, 0x6f, 0xd3, 0x40, 0x0c, 0x01, 0x2c,
- 0x39, 0xd2, 0x49, 0x26, 0x01, 0x2c, 0x29, 0x44, 0x01, 0xb4, 0x40, 0xf0,
- 0x87, 0x04, 0xc0, 0xf0, 0x93, 0xc8, 0x0b, 0x7f, 0x01, 0x02, 0x71, 0xc4,
- 0x00, 0x8a, 0x00, 0x02, 0xf9, 0xc6, 0x4b, 0x24, 0x01, 0x72, 0x3b, 0x00,
- 0xf0, 0x9f, 0xdb, 0x18, 0x40, 0x01, 0x80, 0xf8, 0x46, 0x01, 0x09, 0xc0,
- 0xf0, 0xa5, 0xc5, 0x36, 0xaa, 0x01, 0x3e, 0xe8, 0x46, 0x01, 0x09, 0xc0,
- 0xf0, 0xbd, 0x00, 0x40, 0xf0, 0xd5, 0xc7, 0x2f, 0x37, 0x01, 0x3e, 0x61,
- 0x47, 0xca, 0xfd, 0xc0, 0xf0, 0xe1, 0xc3, 0x16, 0x41, 0x0f, 0xd4, 0xc0,
- 0x00, 0x40, 0xf0, 0xe7, 0x46, 0x00, 0x6b, 0x40, 0xf0, 0xf3, 0xc4, 0x15,
- 0xd3, 0x00, 0x00, 0x79, 0xc3, 0x01, 0xb4, 0x00, 0x00, 0x70, 0x03, 0xc0,
- 0xf1, 0x0b, 0x42, 0x00, 0xa4, 0xc0, 0xf1, 0x13, 0x14, 0xc0, 0xf1, 0x1f,
- 0xc8, 0x6c, 0x4f, 0x01, 0x3e, 0xe1, 0x11, 0xc0, 0xf1, 0x2b, 0x15, 0xc0,
- 0xf1, 0x37, 0x05, 0xc0, 0xf1, 0x5a, 0x16, 0xc0, 0xf1, 0x75, 0x08, 0xc0,
- 0xf1, 0x89, 0x4a, 0x03, 0x3b, 0xc0, 0xf1, 0x93, 0xcb, 0x1a, 0x3f, 0x00,
- 0x01, 0x43, 0x00, 0xf1, 0x9f, 0xe0, 0x06, 0xc7, 0x01, 0x16, 0x49, 0x42,
- 0x01, 0x4a, 0xc0, 0xf1, 0xa3, 0x19, 0xc0, 0xf1, 0xaf, 0x04, 0xc0, 0xf1,
- 0xc1, 0x0e, 0x40, 0xf1, 0xcd, 0x19, 0xc0, 0xf1, 0xd9, 0x16, 0xc0, 0xf1,
- 0xe8, 0xd0, 0x58, 0x92, 0x0f, 0xc1, 0xe1, 0xc5, 0x01, 0x0f, 0x01, 0x0c,
- 0x83, 0x00, 0xf1, 0xfa, 0x14, 0xc0, 0xf2, 0x04, 0xd1, 0x57, 0x4a, 0x01,
- 0x0f, 0xe9, 0x06, 0xc0, 0xf2, 0x10, 0x15, 0xc0, 0xf2, 0x1c, 0x0a, 0xc0,
- 0xf2, 0x28, 0x12, 0xc0, 0xf2, 0x32, 0x04, 0xc0, 0xf2, 0x3e, 0xcf, 0x64,
- 0x9d, 0x01, 0x5a, 0x29, 0x08, 0xc0, 0xf2, 0x50, 0xd7, 0x28, 0xb0, 0x0f,
- 0xc5, 0x20, 0x49, 0x01, 0x8a, 0xc0, 0xf2, 0x5c, 0x15, 0xc0, 0xf2, 0x74,
- 0xdb, 0x17, 0xd4, 0x01, 0x37, 0x31, 0x49, 0x38, 0x96, 0xc0, 0xf2, 0x80,
- 0x47, 0x54, 0x6f, 0x40, 0xf2, 0x98, 0xca, 0x37, 0x0e, 0x01, 0x17, 0x31,
- 0xc5, 0x07, 0x62, 0x01, 0x13, 0x40, 0xc3, 0x01, 0x4a, 0x01, 0x16, 0xb1,
- 0xcd, 0x80, 0x54, 0x01, 0x53, 0xc9, 0xd3, 0x46, 0xa7, 0x01, 0x53, 0xd8,
- 0x42, 0x00, 0x29, 0xc0, 0xf2, 0xad, 0xcc, 0x8c, 0x8c, 0x01, 0x13, 0x30,
- 0x45, 0x00, 0x96, 0xc0, 0xf2, 0xc8, 0x43, 0x00, 0x58, 0x40, 0xf2, 0xde,
- 0xd4, 0x03, 0x13, 0x01, 0x55, 0x40, 0x06, 0xc0, 0xf2, 0xea, 0x16, 0xc0,
- 0xf2, 0xfa, 0x83, 0x00, 0xe1, 0x19, 0xc2, 0x01, 0x09, 0x00, 0xe1, 0x11,
- 0x15, 0xc0, 0xf3, 0x0c, 0xc2, 0x04, 0x41, 0x00, 0xe0, 0xf9, 0x0a, 0xc0,
- 0xf3, 0x16, 0xc2, 0x00, 0xc7, 0x00, 0xe0, 0xe1, 0xc2, 0x02, 0x59, 0x00,
- 0xe0, 0xd9, 0xc2, 0x1d, 0x5f, 0x00, 0xe0, 0xd1, 0x0f, 0xc0, 0xf3, 0x20,
- 0x04, 0xc0, 0xf3, 0x2a, 0x08, 0xc0, 0xf3, 0x34, 0x12, 0xc0, 0xf3, 0x3e,
- 0x10, 0xc0, 0xf3, 0x4e, 0xc2, 0x24, 0x58, 0x00, 0xe0, 0x41, 0x05, 0xc0,
- 0xf3, 0x5e, 0x09, 0xc0, 0xf3, 0x68, 0x0d, 0x40, 0xf3, 0x72, 0xc4, 0x22,
- 0x71, 0x00, 0xe2, 0x49, 0xc5, 0x01, 0xdb, 0x00, 0xe2, 0x41, 0x15, 0xc0,
- 0xf3, 0x82, 0x08, 0xc0, 0xf3, 0x8e, 0x16, 0xc0, 0xf3, 0x9a, 0xc3, 0x01,
- 0xb4, 0x00, 0xe2, 0x09, 0xc4, 0x15, 0xd3, 0x00, 0xe2, 0x00, 0x16, 0xc0,
- 0xf3, 0xa6, 0xc6, 0xc3, 0xe2, 0x00, 0xe1, 0xe9, 0xd2, 0x4e, 0xb4, 0x00,
- 0xe1, 0xe0, 0x44, 0x00, 0xcc, 0xc0, 0xf3, 0xb5, 0x50, 0x5f, 0xd2, 0x40,
- 0xf3, 0xc1, 0x8d, 0x00, 0xe1, 0x6b, 0x00, 0xf3, 0xcd, 0x90, 0x00, 0xe1,
- 0x83, 0x00, 0xf3, 0xd3, 0x96, 0x00, 0xe1, 0x99, 0x94, 0x00, 0xe1, 0x91,
- 0x92, 0x00, 0xe1, 0x89, 0x8e, 0x00, 0xe1, 0x79, 0x8f, 0x00, 0xe1, 0x70,
- 0x87, 0x00, 0xe1, 0x61, 0x97, 0x00, 0xe1, 0x53, 0x00, 0xf3, 0xd9, 0x91,
- 0x00, 0xe1, 0x43, 0x00, 0xf3, 0xdd, 0x8b, 0x00, 0xe1, 0x39, 0xc2, 0x14,
- 0x40, 0x00, 0xe1, 0x30, 0x00, 0xc0, 0xf3, 0xe1, 0xc4, 0x04, 0x6e, 0x01,
- 0x30, 0x3a, 0x00, 0xf4, 0x1b, 0x1b, 0xc0, 0xf4, 0x24, 0xc2, 0x00, 0xde,
- 0x05, 0x26, 0x81, 0x12, 0xc0, 0xf4, 0x2e, 0x06, 0xc0, 0xf4, 0x38, 0x16,
- 0xc0, 0xf4, 0x42, 0x09, 0xc0, 0xf4, 0x56, 0x0d, 0xc0, 0xf4, 0x60, 0xc2,
- 0x24, 0x58, 0x05, 0x26, 0xc9, 0x05, 0xc0, 0xf4, 0x6a, 0xc2, 0x00, 0xad,
- 0x05, 0x26, 0xf9, 0x10, 0xc0, 0xf4, 0x74, 0xc2, 0x00, 0xc7, 0x05, 0x27,
- 0x09, 0x15, 0xc0, 0xf4, 0x7e, 0x1c, 0xc0, 0xf4, 0x88, 0x0a, 0xc0, 0xf4,
- 0x92, 0xc2, 0x96, 0xd0, 0x05, 0x27, 0x39, 0xc2, 0x00, 0x67, 0x05, 0x27,
- 0x49, 0xc2, 0x01, 0x09, 0x05, 0x27, 0x51, 0x83, 0x05, 0x27, 0x73, 0x00,
- 0xf4, 0x9c, 0x87, 0x05, 0x27, 0x83, 0x00, 0xf4, 0xa0, 0x8b, 0x05, 0x27,
- 0x91, 0x91, 0x05, 0x27, 0x9b, 0x00, 0xf4, 0xa4, 0x97, 0x05, 0x27, 0xa2,
- 0x00, 0xf4, 0xa8, 0xc5, 0x00, 0xaa, 0x05, 0x27, 0xf1, 0xc9, 0x11, 0xdc,
- 0x05, 0x27, 0xf8, 0x00, 0xc0, 0xf4, 0xb0, 0x43, 0x00, 0xc8, 0x40, 0xf4,
- 0xcb, 0xcd, 0x7c, 0x9f, 0x0f, 0xac, 0x39, 0xc7, 0x00, 0x70, 0x0f, 0xa8,
- 0xb8, 0x46, 0x06, 0x97, 0xc0, 0xf4, 0xd7, 0xcd, 0x2c, 0x41, 0x00, 0xca,
- 0x29, 0xd0, 0x0f, 0xfb, 0x00, 0xca, 0x21, 0x15, 0xc0, 0xf4, 0xfb, 0x45,
- 0x34, 0xbb, 0xc0, 0xf5, 0x0d, 0x47, 0x02, 0x90, 0x40, 0xf5, 0x19, 0x85,
- 0x08, 0x49, 0xc9, 0x90, 0x08, 0x49, 0x5b, 0x00, 0xf5, 0x68, 0x8e, 0x08,
- 0x49, 0x4b, 0x00, 0xf5, 0x6c, 0x87, 0x08, 0x49, 0x23, 0x00, 0xf5, 0x70,
- 0x83, 0x08, 0x49, 0x03, 0x00, 0xf5, 0x74, 0x96, 0x08, 0x49, 0x7b, 0x00,
- 0xf5, 0x78, 0x95, 0x08, 0x49, 0x9b, 0x00, 0xf5, 0x7c, 0x93, 0x08, 0x49,
- 0x91, 0x88, 0x08, 0x49, 0x89, 0x97, 0x08, 0x49, 0x81, 0x94, 0x08, 0x49,
- 0x69, 0x91, 0x08, 0x49, 0x61, 0x8f, 0x08, 0x49, 0x51, 0x8d, 0x08, 0x49,
- 0x41, 0x9b, 0x08, 0x49, 0x39, 0x8b, 0x08, 0x49, 0x31, 0x98, 0x08, 0x49,
- 0x29, 0x86, 0x08, 0x49, 0x19, 0x89, 0x08, 0x49, 0x11, 0x84, 0x08, 0x49,
- 0x08, 0x90, 0x08, 0x14, 0xc8, 0x90, 0x08, 0x14, 0xd0, 0x8a, 0x08, 0x14,
- 0x18, 0x8a, 0x08, 0x14, 0x49, 0x96, 0x08, 0x14, 0xc0, 0x8d, 0x08, 0x14,
- 0xa0, 0x8f, 0x08, 0x14, 0x80, 0x90, 0x08, 0x14, 0x88, 0x00, 0xc0, 0xf5,
- 0x80, 0xc6, 0xc1, 0x42, 0x01, 0x55, 0x5a, 0x00, 0xf5, 0xbc, 0x45, 0x04,
- 0x74, 0xc0, 0xf5, 0xc2, 0x56, 0x2c, 0xc5, 0x40, 0xf5, 0xcc, 0x15, 0xc0,
- 0xf6, 0x13, 0xd5, 0x35, 0xdd, 0x00, 0x14, 0xb3, 0x00, 0xf6, 0x28, 0x42,
- 0x00, 0x9c, 0xc0, 0xf6, 0x2e, 0x03, 0xc0, 0xf6, 0x3d, 0xd8, 0x26, 0x58,
- 0x00, 0xe9, 0x21, 0xcc, 0x21, 0x78, 0x00, 0x14, 0xa3, 0x00, 0xf6, 0x49,
- 0xdb, 0x17, 0x4d, 0x00, 0x14, 0xa9, 0x42, 0x02, 0x8c, 0xc0, 0xf6, 0x4f,
- 0xc2, 0x1e, 0x62, 0x00, 0x0d, 0x31, 0xcf, 0x64, 0xca, 0x00, 0x0d, 0xd9,
- 0xc4, 0x99, 0x69, 0x00, 0x0d, 0xf9, 0xcc, 0x83, 0x98, 0x00, 0x0e, 0x01,
- 0xcd, 0x77, 0x8b, 0x00, 0x0e, 0x08, 0xc4, 0x0d, 0xbd, 0x01, 0x38, 0xe9,
- 0x48, 0x07, 0x18, 0x40, 0xf6, 0x5b, 0xca, 0xa8, 0xc4, 0x05, 0x3f, 0xb9,
- 0x49, 0x10, 0x8f, 0xc0, 0xf6, 0x67, 0x0b, 0xc0, 0xf6, 0x6f, 0xc9, 0xae,
- 0xa7, 0x05, 0x3f, 0xf8, 0xc9, 0xab, 0x98, 0x0f, 0x98, 0xe1, 0xc6, 0x00,
- 0x71, 0x0f, 0x98, 0xb8, 0x0d, 0xc0, 0xf6, 0x7b, 0x12, 0xc0, 0xf6, 0x83,
- 0x10, 0xc0, 0xf6, 0x93, 0xc2, 0x00, 0x79, 0x00, 0x74, 0x41, 0x15, 0xc0,
- 0xf6, 0xa3, 0xc2, 0x01, 0x4a, 0x00, 0x74, 0xa1, 0x16, 0xc0, 0xf6, 0xaf,
- 0xc2, 0x00, 0x2b, 0x00, 0x74, 0xd1, 0x43, 0xc5, 0x35, 0xc0, 0xf6, 0xb9,
- 0xc2, 0x00, 0x82, 0x00, 0x75, 0x09, 0xc2, 0x20, 0x67, 0x00, 0x75, 0x11,
- 0xc2, 0x00, 0x39, 0x00, 0x75, 0x19, 0xc2, 0x01, 0x64, 0x00, 0x75, 0x2b,
- 0x00, 0xf6, 0xc9, 0xc2, 0x01, 0x47, 0x00, 0x75, 0x39, 0x43, 0x68, 0x16,
- 0xc0, 0xf6, 0xcf, 0x91, 0x00, 0x75, 0x68, 0x83, 0x00, 0x75, 0x83, 0x00,
- 0xf6, 0xdb, 0x45, 0xdb, 0x79, 0xc0, 0xf6, 0xeb, 0x8b, 0x00, 0x75, 0xa3,
- 0x00, 0xf6, 0xf7, 0x9b, 0x00, 0x75, 0xb3, 0x00, 0xf6, 0xfb, 0x97, 0x00,
- 0x75, 0xc3, 0x00, 0xf6, 0xff, 0x87, 0x00, 0x76, 0x03, 0x00, 0xf7, 0x03,
- 0x91, 0x00, 0x76, 0x10, 0xcf, 0x62, 0xdb, 0x00, 0x75, 0xd1, 0x4e, 0x6f,
- 0xc9, 0x40, 0xf7, 0x07, 0xc2, 0x13, 0xa5, 0x00, 0x76, 0x41, 0x16, 0xc0,
- 0xf7, 0x13, 0xc6, 0xd3, 0x15, 0x00, 0x76, 0x58, 0xc4, 0x15, 0xd3, 0x00,
- 0x76, 0x81, 0xc3, 0x01, 0xb4, 0x00, 0x76, 0x89, 0x16, 0xc0, 0xf7, 0x1d,
- 0x08, 0xc0, 0xf7, 0x29, 0x15, 0xc0, 0xf7, 0x35, 0xc5, 0x01, 0xdb, 0x00,
- 0x76, 0xc1, 0xc4, 0x22, 0x71, 0x00, 0x76, 0xc8, 0xc2, 0x00, 0x10, 0x00,
- 0x76, 0xe1, 0xc2, 0x00, 0x82, 0x00, 0x76, 0xe8, 0x16, 0xc0, 0xf7, 0x41,
- 0x4f, 0x61, 0x19, 0xc0, 0xf7, 0x4d, 0x4f, 0x02, 0x28, 0xc0, 0xf7, 0x59,
- 0xda, 0x1a, 0xa1, 0x01, 0x3a, 0x81, 0xc6, 0xcc, 0x19, 0x01, 0x38, 0x81,
- 0xd5, 0x36, 0x5b, 0x01, 0x2e, 0xe9, 0x43, 0x06, 0xd2, 0x40, 0xf7, 0x65,
- 0x16, 0xc0, 0xf7, 0x6b, 0x4f, 0x61, 0x19, 0xc0, 0xf7, 0x77, 0xcf, 0x68,
- 0x4e, 0x01, 0x3e, 0xa1, 0xd5, 0x36, 0x5b, 0x01, 0x2e, 0xe1, 0x44, 0x20,
- 0x8f, 0x40, 0xf7, 0x83, 0x0e, 0xc0, 0xf7, 0x89, 0x4f, 0x2f, 0x8b, 0x40,
- 0xf7, 0x95, 0x48, 0x06, 0x13, 0xc0, 0xf7, 0x9b, 0xc5, 0x01, 0xe2, 0x01,
- 0x2c, 0x03, 0x00, 0xf7, 0xa5, 0xc6, 0x01, 0x7a, 0x01, 0x2f, 0x01, 0xcc,
- 0x06, 0x1b, 0x0f, 0xdc, 0x70, 0xcc, 0x01, 0xdb, 0x01, 0x2c, 0xa1, 0xcd,
- 0x19, 0x0b, 0x0f, 0xdc, 0x10, 0xdb, 0x18, 0xfd, 0x0f, 0xdb, 0x69, 0x45,
- 0x04, 0x5e, 0x40, 0xf7, 0xab, 0xc5, 0x01, 0x0f, 0x01, 0x0f, 0x3b, 0x00,
- 0xf7, 0xb7, 0xcc, 0x8c, 0x20, 0x01, 0x0f, 0x72, 0x00, 0xf7, 0xbb, 0x42,
- 0x00, 0x5b, 0xc0, 0xf7, 0xc1, 0x42, 0x01, 0x47, 0x40, 0xf7, 0xcd, 0xcf,
- 0x5c, 0xe3, 0x0f, 0xc2, 0x89, 0xcc, 0x84, 0xdc, 0x0f, 0xc1, 0xc8, 0xc4,
- 0x01, 0x10, 0x01, 0x0c, 0x8b, 0x00, 0xf7, 0xd9, 0xc5, 0xd9, 0xcb, 0x01,
- 0x70, 0xa8, 0xcb, 0x82, 0xb5, 0x01, 0x0f, 0x09, 0xcb, 0x8c, 0x21, 0x01,
- 0x0e, 0x88, 0x51, 0x02, 0xd1, 0xc0, 0xf7, 0xdd, 0x45, 0x10, 0x38, 0x40,
- 0xf7, 0xe9, 0xc5, 0x01, 0x0f, 0x01, 0x58, 0x31, 0xd3, 0x42, 0x6c, 0x01,
- 0x5c, 0x48, 0xc8, 0x2e, 0x33, 0x0f, 0xb7, 0x41, 0xcc, 0x47, 0x7c, 0x0f,
- 0xa9, 0xe0, 0xd0, 0x5c, 0x22, 0x01, 0x2f, 0x71, 0xcf, 0x6b, 0x2d, 0x01,
- 0x2f, 0x68, 0xd2, 0x4c, 0xbc, 0x01, 0x3e, 0xf8, 0xc4, 0x01, 0xf0, 0x01,
- 0x18, 0x1b, 0x00, 0xf7, 0xf5, 0xcf, 0x66, 0xf5, 0x01, 0x4d, 0xe8, 0xcb,
- 0x04, 0xfc, 0x01, 0x0f, 0x99, 0xcc, 0x8c, 0x20, 0x01, 0x0e, 0xa9, 0xc5,
- 0x01, 0x0f, 0x01, 0x0c, 0xab, 0x00, 0xf7, 0xf9, 0xcb, 0x97, 0xe3, 0x01,
- 0x58, 0x69, 0xd5, 0x03, 0xb2, 0x01, 0x5b, 0x29, 0xd0, 0x5c, 0xe2, 0x0f,
- 0xc2, 0xc8, 0x4f, 0x69, 0xb6, 0xc0, 0xf7, 0xff, 0x50, 0x5d, 0x32, 0x40,
- 0xf8, 0x0b, 0x00, 0x40, 0xf8, 0x17, 0xca, 0x19, 0xf4, 0x00, 0x00, 0xf9,
- 0xc9, 0x73, 0x5c, 0x01, 0x5f, 0xd0, 0xc3, 0xa3, 0x7e, 0x08, 0x1c, 0x01,
- 0xc2, 0x00, 0x34, 0x08, 0x1c, 0x98, 0xc4, 0x17, 0xed, 0x08, 0x1c, 0x11,
- 0xc4, 0x95, 0xd4, 0x08, 0x1c, 0xc8, 0xc2, 0x00, 0xa4, 0x08, 0x1c, 0x19,
- 0xc2, 0x0f, 0x61, 0x08, 0x1c, 0x58, 0xc4, 0xd7, 0x88, 0x08, 0x1c, 0x21,
- 0xc3, 0x03, 0x2b, 0x08, 0x1c, 0x78, 0xc2, 0x00, 0xb3, 0x08, 0x1c, 0x40,
- 0xc3, 0x05, 0xe7, 0x08, 0x1c, 0x39, 0x97, 0x08, 0x1c, 0x88, 0xc2, 0x00,
- 0x49, 0x08, 0x1c, 0x49, 0xc5, 0xd5, 0x7f, 0x08, 0x1c, 0xc1, 0x91, 0x08,
- 0x1c, 0xd0, 0xc3, 0x0d, 0xd9, 0x08, 0x1c, 0x61, 0x03, 0xc0, 0xf8, 0x29,
- 0xc2, 0x08, 0x22, 0x08, 0x1c, 0xe8, 0x0a, 0xc0, 0xf8, 0x35, 0x07, 0xc0,
- 0xf8, 0x41, 0x19, 0xc0, 0xf8, 0x53, 0x15, 0xc0, 0xf8, 0x65, 0x46, 0x07,
- 0xdd, 0xc0, 0xf8, 0x7f, 0x0e, 0xc0, 0xf8, 0x8b, 0x16, 0xc0, 0xf8, 0xa1,
- 0x04, 0xc0, 0xf8, 0xb3, 0x42, 0x01, 0xc7, 0xc0, 0xf8, 0xbf, 0x05, 0xc0,
- 0xf8, 0xcb, 0x06, 0xc0, 0xf8, 0xe0, 0x14, 0xc0, 0xf8, 0xf0, 0x0f, 0xc0,
- 0xf8, 0xfc, 0xc9, 0x68, 0x21, 0x01, 0x3c, 0xa9, 0xcc, 0x03, 0x3b, 0x01,
- 0x3a, 0xd1, 0x03, 0xc0, 0xf9, 0x08, 0x11, 0xc0, 0xf9, 0x1a, 0x08, 0xc0,
- 0xf9, 0x2c, 0xcb, 0x5a, 0x67, 0x01, 0x38, 0xd1, 0xd4, 0x10, 0xe9, 0x0f,
- 0xb3, 0xc8, 0xc5, 0xb0, 0xc7, 0x0f, 0xd5, 0x33, 0x00, 0xf9, 0x38, 0xc5,
- 0x37, 0x5e, 0x0f, 0x9d, 0x38, 0x42, 0x00, 0xed, 0xc0, 0xf9, 0x3e, 0xcf,
- 0x61, 0x28, 0x0f, 0xb2, 0x48, 0xd3, 0x46, 0xf3, 0x01, 0x36, 0x89, 0xc7,
- 0x00, 0x70, 0x01, 0x1c, 0x40, 0x42, 0x32, 0xf0, 0xc0, 0xf9, 0x50, 0x42,
- 0x30, 0x38, 0xc0, 0xf9, 0x68, 0x42, 0x14, 0x4a, 0xc0, 0xf9, 0x84, 0x42,
- 0x29, 0xae, 0xc0, 0xf9, 0x94, 0x42, 0x00, 0x88, 0x40, 0xf9, 0xac, 0x42,
- 0x29, 0xae, 0xc0, 0xf9, 0xbc, 0x42, 0x00, 0x88, 0xc0, 0xf9, 0xdc, 0x42,
- 0x32, 0xf0, 0xc0, 0xf9, 0xf8, 0x42, 0x30, 0x38, 0xc0, 0xfa, 0x04, 0x42,
- 0x14, 0x4a, 0x40, 0xfa, 0x20, 0x42, 0x29, 0xae, 0xc0, 0xfa, 0x47, 0x42,
- 0x00, 0x88, 0xc0, 0xfa, 0x5b, 0x42, 0x32, 0xf0, 0xc0, 0xfa, 0x79, 0x42,
- 0x30, 0x38, 0xc0, 0xfa, 0x85, 0x42, 0x14, 0x4a, 0xc0, 0xfa, 0xa7, 0x47,
- 0xc8, 0x41, 0x40, 0xfa, 0xcb, 0x42, 0x29, 0xae, 0xc0, 0xfa, 0xd3, 0x42,
- 0x00, 0x88, 0xc0, 0xfa, 0xe5, 0x42, 0x32, 0xf0, 0xc0, 0xfa, 0xfd, 0x42,
- 0x30, 0x38, 0xc0, 0xfb, 0x19, 0x42, 0x14, 0x4a, 0x40, 0xfb, 0x39, 0xa0,
- 0x0d, 0x80, 0xb1, 0x9f, 0x0d, 0x80, 0xa9, 0x9e, 0x0d, 0x80, 0xa0, 0xa3,
- 0x0d, 0x80, 0x99, 0xa2, 0x0d, 0x80, 0x91, 0xa1, 0x0d, 0x80, 0x89, 0xa0,
- 0x0d, 0x80, 0x81, 0x9f, 0x0d, 0x80, 0x79, 0x9e, 0x0d, 0x80, 0x08, 0xa2,
- 0x0d, 0x80, 0x71, 0xa1, 0x0d, 0x80, 0x69, 0xa0, 0x0d, 0x80, 0x61, 0x9f,
- 0x0d, 0x80, 0x59, 0x9e, 0x0d, 0x80, 0x50, 0xa1, 0x0d, 0x80, 0x49, 0xa0,
- 0x0d, 0x80, 0x41, 0x9f, 0x0d, 0x80, 0x39, 0x9e, 0x0d, 0x80, 0x30, 0xc2,
- 0x01, 0x47, 0x0d, 0x80, 0x29, 0xa0, 0x0d, 0x80, 0x21, 0x9f, 0x0d, 0x80,
- 0x19, 0x9e, 0x0d, 0x80, 0x10, 0x42, 0x29, 0xae, 0xc0, 0xfb, 0x66, 0x42,
- 0x00, 0x88, 0xc0, 0xfb, 0x82, 0x42, 0x30, 0x38, 0xc0, 0xfb, 0x92, 0x42,
- 0x14, 0x4a, 0x40, 0xfb, 0xa6, 0x42, 0x14, 0x4a, 0xc0, 0xfb, 0xba, 0x42,
- 0x32, 0xf0, 0xc0, 0xfb, 0xd4, 0x42, 0x29, 0xae, 0x40, 0xfb, 0xe4, 0x42,
- 0x29, 0xae, 0xc0, 0xfb, 0xfc, 0x42, 0x00, 0x88, 0xc0, 0xfc, 0x14, 0x42,
- 0x32, 0xf0, 0xc0, 0xfc, 0x22, 0x42, 0x30, 0x38, 0xc0, 0xfc, 0x32, 0x42,
- 0x14, 0x4a, 0x40, 0xfc, 0x4e, 0x42, 0x29, 0xae, 0xc0, 0xfc, 0x6a, 0x42,
- 0x00, 0x88, 0xc0, 0xfc, 0x88, 0x42, 0x30, 0x38, 0xc0, 0xfc, 0xac, 0x42,
- 0x14, 0x4a, 0xc0, 0xfc, 0xc8, 0x42, 0x32, 0xf0, 0x40, 0xfc, 0xd8, 0x42,
- 0x29, 0xae, 0xc0, 0xfc, 0xee, 0x42, 0x00, 0x88, 0xc0, 0xfd, 0x0a, 0x42,
- 0x32, 0xf0, 0xc0, 0xfd, 0x1e, 0x42, 0x30, 0x38, 0xc0, 0xfd, 0x3e, 0x42,
- 0x14, 0x4a, 0x40, 0xfd, 0x56, 0x48, 0x19, 0x70, 0xc0, 0xfd, 0x76, 0x46,
- 0x02, 0x91, 0x40, 0xfd, 0x82, 0x45, 0x12, 0xd1, 0xc0, 0xfe, 0x18, 0x4b,
- 0x10, 0x7f, 0x40, 0xfe, 0x48, 0xc9, 0xaa, 0xe4, 0x00, 0x2e, 0x29, 0xc9,
- 0xb3, 0x9c, 0x00, 0x2e, 0x21, 0xcd, 0x7a, 0xa4, 0x00, 0x2d, 0x78, 0x1c,
- 0xc0, 0xfe, 0x66, 0x06, 0xc0, 0xfe, 0x70, 0xc4, 0xe2, 0x93, 0x00, 0x2d,
- 0x61, 0xc3, 0x12, 0x56, 0x00, 0x2d, 0x59, 0x42, 0x05, 0xd0, 0xc0, 0xfe,
- 0x7c, 0x16, 0xc0, 0xfe, 0x88, 0x42, 0x0f, 0x60, 0xc0, 0xfe, 0x92, 0xcc,
- 0x8c, 0x2c, 0x00, 0x2d, 0x11, 0x42, 0x03, 0xa4, 0xc0, 0xfe, 0x9e, 0xc5,
- 0x49, 0xa7, 0x00, 0x2c, 0xb9, 0x15, 0xc0, 0xfe, 0xaa, 0xc7, 0xca, 0x71,
- 0x00, 0x2c, 0x89, 0x43, 0x00, 0x63, 0xc0, 0xfe, 0xb6, 0x0f, 0x40, 0xfe,
- 0xc5, 0x43, 0x04, 0x9f, 0xc0, 0xfe, 0xda, 0xc7, 0x0d, 0x51, 0x02, 0x6e,
- 0x48, 0x0b, 0xc0, 0xff, 0x0a, 0xc7, 0xc9, 0x75, 0x02, 0x6e, 0xf9, 0xd5,
- 0x37, 0x6c, 0x02, 0x6f, 0x19, 0x07, 0x40, 0xff, 0x16, 0xc6, 0x76, 0xe9,
- 0x02, 0x6e, 0x21, 0xd2, 0x4c, 0x2c, 0x02, 0x6e, 0x88, 0x10, 0xc0, 0xff,
- 0x28, 0xcc, 0x8d, 0x70, 0x02, 0x6f, 0x58, 0x45, 0x04, 0x74, 0xc0, 0xff,
- 0x34, 0xc9, 0xab, 0xfb, 0x02, 0x6e, 0x59, 0xce, 0x70, 0x0f, 0x02, 0x6e,
- 0xb0, 0xc4, 0x8c, 0x5b, 0x02, 0x6e, 0x51, 0xc7, 0xc9, 0x52, 0x02, 0x6f,
- 0x11, 0xcd, 0x7f, 0xdf, 0x02, 0x6f, 0x68, 0xc9, 0xb3, 0xf6, 0x02, 0x6e,
- 0x61, 0xc8, 0xb7, 0x35, 0x02, 0x6e, 0x80, 0x14, 0xc0, 0xff, 0x40, 0xd1,
- 0x50, 0x51, 0x02, 0x6f, 0x60, 0xc5, 0xd7, 0xaf, 0x02, 0x6e, 0x71, 0xcb,
- 0x96, 0x8e, 0x02, 0x6e, 0xd0, 0xc7, 0xc6, 0xd5, 0x02, 0x6e, 0x91, 0xc8,
- 0xbd, 0x7d, 0x02, 0x6f, 0xb1, 0xcf, 0x67, 0xd6, 0x02, 0x6f, 0xf0, 0xcd,
- 0x77, 0xd9, 0x02, 0x6e, 0xa1, 0xcb, 0x8f, 0x09, 0x02, 0x6f, 0x51, 0xd0,
- 0x5d, 0x62, 0x02, 0x6f, 0xf8, 0x16, 0xc0, 0xff, 0x4c, 0xc8, 0xbb, 0x75,
- 0x02, 0x6f, 0x80, 0x10, 0xc0, 0xff, 0x58, 0xc7, 0xc5, 0x62, 0x02, 0x6e,
- 0xf1, 0xc6, 0xd1, 0x41, 0x02, 0x6f, 0x48, 0x42, 0x01, 0xc8, 0xc0, 0xff,
- 0x64, 0xca, 0x9f, 0xdc, 0x02, 0x6f, 0x30, 0x51, 0x55, 0xa1, 0xc0, 0xff,
- 0x70, 0x04, 0xc0, 0xff, 0x8e, 0xd5, 0x31, 0xed, 0x01, 0x35, 0x49, 0x4a,
- 0xa7, 0xf2, 0xc0, 0xff, 0x9a, 0xce, 0x71, 0x97, 0x01, 0x1d, 0x79, 0xc8,
- 0x24, 0x10, 0x01, 0x01, 0x31, 0x16, 0x40, 0xff, 0xaa, 0x00, 0x40, 0xff,
- 0xb6, 0xc7, 0xc3, 0x0f, 0x01, 0x33, 0x41, 0xc8, 0xc1, 0x1d, 0x01, 0x30,
- 0xa9, 0xc6, 0xcc, 0x61, 0x0f, 0x99, 0xb1, 0xc3, 0x77, 0xbe, 0x0f, 0x99,
- 0x68, 0xd2, 0x4b, 0xd2, 0x01, 0x1f, 0x98, 0x00, 0x40, 0xff, 0xc2, 0xd0,
- 0x0d, 0x8c, 0x0f, 0xb3, 0x48, 0xc4, 0xce, 0x71, 0x0f, 0xd5, 0x71, 0xc5,
- 0x40, 0xc5, 0x01, 0x00, 0x50, 0x83, 0x0f, 0xd5, 0x61, 0xc8, 0xc0, 0x4d,
- 0x0f, 0xa1, 0xc8, 0x45, 0x00, 0x56, 0x40, 0xff, 0xd1, 0x42, 0x00, 0xde,
- 0xc0, 0xff, 0xe3, 0xc5, 0xc6, 0x58, 0x0f, 0xc8, 0xe9, 0x4c, 0x86, 0xf8,
- 0x40, 0xff, 0xed, 0x46, 0x06, 0x97, 0xc0, 0xff, 0xf9, 0x45, 0x00, 0xcb,
- 0xc1, 0x00, 0x1d, 0x45, 0x00, 0xba, 0xc1, 0x00, 0x29, 0x46, 0x34, 0xbb,
- 0xc1, 0x00, 0x35, 0x47, 0x02, 0x90, 0x41, 0x00, 0x49, 0xcd, 0x77, 0xa5,
- 0x00, 0xb9, 0xa1, 0x4b, 0x6f, 0xcc, 0xc1, 0x00, 0xb3, 0x47, 0x02, 0x90,
- 0x41, 0x00, 0xbb, 0x43, 0x47, 0xae, 0xc1, 0x01, 0x19, 0x4d, 0x7b, 0xcf,
- 0x41, 0x01, 0x3b, 0x47, 0x33, 0xef, 0xc1, 0x01, 0x59, 0x47, 0x02, 0x90,
- 0x41, 0x01, 0x6c, 0xc9, 0x11, 0xdc, 0x07, 0xfb, 0x09, 0xc5, 0x00, 0xaa,
- 0x07, 0xfb, 0x20, 0xcf, 0x66, 0x32, 0x07, 0xfb, 0x11, 0xcb, 0x01, 0x09,
- 0x07, 0xff, 0x48, 0xcf, 0x66, 0x32, 0x07, 0xfb, 0x19, 0xcb, 0x01, 0x09,
- 0x07, 0xff, 0x58, 0x00, 0xc1, 0x01, 0xc9, 0xde, 0x0e, 0x34, 0x07, 0xfb,
- 0x80, 0xc6, 0x94, 0x2b, 0x07, 0xfd, 0x01, 0x47, 0x02, 0x90, 0x41, 0x01,
- 0xe1, 0xcb, 0x92, 0xb0, 0x0f, 0xb4, 0x23, 0x01, 0x02, 0x3b, 0xcb, 0x94,
- 0xf7, 0x0f, 0xa3, 0x00, 0xcc, 0x8a, 0xf4, 0x01, 0x35, 0x09, 0xd1, 0x52,
- 0xf9, 0x0f, 0xa8, 0x30, 0x15, 0xc1, 0x02, 0x41, 0x83, 0x01, 0x82, 0x13,
- 0x01, 0x02, 0x5b, 0x8b, 0x01, 0x82, 0x21, 0x97, 0x01, 0x82, 0x31, 0x87,
- 0x01, 0x82, 0x41, 0x91, 0x01, 0x82, 0x51, 0x0d, 0xc1, 0x02, 0x61, 0x09,
- 0xc1, 0x02, 0x75, 0x1c, 0xc1, 0x02, 0x89, 0x16, 0xc1, 0x02, 0x9d, 0x06,
- 0xc1, 0x02, 0xb1, 0x90, 0x01, 0x84, 0x9b, 0x01, 0x02, 0xc5, 0x0a, 0xc1,
- 0x02, 0xd9, 0x04, 0xc1, 0x02, 0xed, 0x12, 0xc1, 0x03, 0x01, 0x0f, 0xc1,
- 0x03, 0x15, 0x1b, 0xc1, 0x03, 0x29, 0x14, 0xc1, 0x03, 0x35, 0x19, 0xc1,
- 0x03, 0x49, 0xc2, 0x5d, 0x23, 0x01, 0x84, 0xa0, 0x00, 0xc1, 0x03, 0x59,
- 0xcb, 0x99, 0xa6, 0x01, 0x01, 0x39, 0xc6, 0x89, 0x32, 0x00, 0x01, 0x68,
- 0x43, 0x06, 0x18, 0xc1, 0x03, 0x65, 0x44, 0x03, 0x1e, 0x41, 0x03, 0x83,
- 0xc4, 0x26, 0x12, 0x01, 0x03, 0x21, 0xc9, 0x19, 0xf5, 0x01, 0x03, 0x19,
- 0xc5, 0x01, 0x2d, 0x01, 0x03, 0x10, 0xcf, 0x66, 0x7d, 0x0f, 0xa9, 0x01,
- 0xc7, 0x66, 0x85, 0x0f, 0xa9, 0x21, 0xcd, 0x78, 0x34, 0x0f, 0xa9, 0x08,
- 0x0e, 0xc1, 0x03, 0xab, 0xc6, 0xd1, 0x8f, 0x01, 0x15, 0xd1, 0xc7, 0x02,
- 0x60, 0x01, 0x11, 0x4b, 0x01, 0x03, 0xb7, 0xc6, 0x10, 0xee, 0x01, 0x01,
- 0xe9, 0xcb, 0x38, 0x09, 0x01, 0x51, 0xe0, 0x00, 0x41, 0x03, 0xbb, 0x46,
- 0x61, 0xb8, 0xc1, 0x03, 0xcb, 0x47, 0xc7, 0xd8, 0x41, 0x03, 0xd7, 0xda,
- 0x1c, 0xf7, 0x01, 0x4e, 0xf0, 0x15, 0xc1, 0x03, 0xe3, 0xcb, 0x99, 0x59,
- 0x0f, 0xa4, 0x08, 0xc4, 0x00, 0xc3, 0x01, 0x10, 0x31, 0x43, 0x2e, 0x88,
- 0x41, 0x03, 0xef, 0xcc, 0x86, 0x5c, 0x0f, 0xa7, 0x41, 0xce, 0x70, 0x2b,
- 0x01, 0x4e, 0xe0, 0xcd, 0x80, 0x20, 0x01, 0x05, 0xc9, 0x48, 0xbd, 0xcd,
- 0x41, 0x03, 0xfb, 0xd7, 0x2a, 0x4e, 0x0f, 0xd7, 0xa8, 0xc2, 0x02, 0x18,
- 0x01, 0x13, 0x0b, 0x01, 0x04, 0x1f, 0xce, 0x37, 0x1f, 0x01, 0x53, 0x38,
- 0x4a, 0xa4, 0xaa, 0xc1, 0x04, 0x25, 0x49, 0xb3, 0xc0, 0x41, 0x04, 0x33,
- 0x54, 0x39, 0x1d, 0xc1, 0x04, 0x3f, 0xd1, 0x29, 0x9c, 0x01, 0x81, 0x60,
- 0xc4, 0x00, 0xab, 0x01, 0x80, 0x09, 0xcb, 0x8e, 0xc7, 0x01, 0x80, 0x30,
- 0xcc, 0x82, 0xe4, 0x01, 0x8c, 0x81, 0xcc, 0x86, 0x80, 0x01, 0x8c, 0x89,
- 0xc8, 0x29, 0xa5, 0x01, 0x8c, 0x91, 0x16, 0xc1, 0x04, 0x5d, 0x08, 0xc1,
- 0x04, 0x6d, 0x0f, 0xc1, 0x04, 0x79, 0xcb, 0x97, 0x54, 0x01, 0x8c, 0xc1,
- 0xcb, 0x91, 0x50, 0x01, 0x8c, 0xd1, 0xcb, 0x94, 0xe1, 0x01, 0x8c, 0xe9,
- 0xca, 0xa7, 0x20, 0x01, 0x8c, 0xf0, 0x47, 0x33, 0xef, 0xc1, 0x04, 0x85,
- 0xcc, 0x84, 0xd0, 0x08, 0x42, 0xb9, 0x47, 0x02, 0x90, 0x41, 0x04, 0x92,
- 0xc6, 0x59, 0xec, 0x01, 0x03, 0x01, 0xd4, 0x3b, 0x11, 0x01, 0x71, 0x88,
- 0x42, 0x00, 0x46, 0xc1, 0x04, 0xf5, 0xd0, 0x5a, 0x22, 0x0f, 0xa3, 0x78,
- 0x05, 0xc1, 0x05, 0x0d, 0x0a, 0xc1, 0x05, 0x2b, 0x52, 0x4a, 0xfa, 0xc1,
- 0x05, 0x39, 0x15, 0xc1, 0x05, 0x45, 0x0e, 0xc1, 0x05, 0x79, 0x06, 0xc1,
- 0x05, 0x89, 0x16, 0xc1, 0x05, 0x9e, 0xd9, 0x0f, 0xfb, 0x01, 0x3a, 0xa9,
- 0xd6, 0x2c, 0x41, 0x01, 0x3a, 0xa1, 0x08, 0xc1, 0x05, 0xb4, 0xc3, 0xe7,
- 0xde, 0x01, 0x38, 0x89, 0x14, 0xc1, 0x05, 0xc4, 0x42, 0x01, 0xc7, 0xc1,
- 0x05, 0xd0, 0x0f, 0xc1, 0x05, 0xdc, 0xc6, 0x1d, 0x59, 0x01, 0x2f, 0x31,
- 0x12, 0xc1, 0x05, 0xe8, 0x43, 0x00, 0x7b, 0x41, 0x05, 0xf4, 0x45, 0x16,
- 0xbc, 0xc1, 0x06, 0x00, 0x45, 0x1f, 0xc8, 0x41, 0x06, 0x1e, 0x45, 0x1f,
- 0xc8, 0xc1, 0x06, 0x3c, 0x45, 0x16, 0xbc, 0x41, 0x06, 0x5a, 0xd5, 0x37,
- 0xab, 0x0f, 0xc4, 0x19, 0xca, 0x37, 0xb6, 0x0f, 0xc3, 0x59, 0xd0, 0x5f,
- 0x62, 0x0f, 0xc3, 0x19, 0xd1, 0x56, 0x3a, 0x0f, 0xc3, 0x99, 0xd0, 0x37,
- 0xb0, 0x0f, 0xc3, 0xd8, 0xd5, 0x37, 0xab, 0x0f, 0xc4, 0x11, 0xd0, 0x37,
- 0xb0, 0x0f, 0xc3, 0xd1, 0xd0, 0x5f, 0x62, 0x0f, 0xc3, 0x11, 0xca, 0x37,
- 0xb6, 0x0f, 0xc3, 0x51, 0xd1, 0x56, 0x3a, 0x0f, 0xc3, 0x90, 0xd5, 0x37,
- 0xab, 0x0f, 0xc4, 0x01, 0xd0, 0x5f, 0x62, 0x0f, 0xc3, 0x01, 0xca, 0x37,
- 0xb6, 0x0f, 0xc3, 0x41, 0xd1, 0x56, 0x3a, 0x0f, 0xc3, 0x81, 0xd0, 0x37,
- 0xb0, 0x0f, 0xc3, 0xc0, 0xd0, 0x5f, 0x62, 0x0f, 0xc3, 0x09, 0xca, 0x37,
- 0xb6, 0x0f, 0xc3, 0x49, 0xd1, 0x56, 0x3a, 0x0f, 0xc3, 0x89, 0xd0, 0x37,
- 0xb0, 0x0f, 0xc3, 0xc9, 0xd5, 0x37, 0xab, 0x0f, 0xc4, 0x08, 0x00, 0xc1,
- 0x06, 0x78, 0xc2, 0x01, 0x12, 0x0f, 0xd4, 0xf8, 0x00, 0xc1, 0x06, 0x84,
- 0xc5, 0xdb, 0xf6, 0x0f, 0x9a, 0x48, 0xc9, 0xb0, 0xe7, 0x0f, 0x17, 0xf9,
- 0x46, 0x06, 0x97, 0xc1, 0x06, 0x9c, 0x45, 0x0c, 0x46, 0xc1, 0x06, 0xc0,
- 0x47, 0x02, 0x90, 0x41, 0x06, 0xd2, 0xd4, 0x3d, 0x05, 0x0f, 0x98, 0xc1,
- 0xd3, 0x43, 0xfb, 0x0f, 0x98, 0xb0, 0xc2, 0x00, 0x3a, 0x08, 0xc7, 0xf9,
- 0x47, 0x33, 0xef, 0xc1, 0x07, 0x59, 0x46, 0x06, 0x97, 0xc1, 0x07, 0x71,
- 0x4d, 0x26, 0xea, 0xc1, 0x07, 0x95, 0x4f, 0x07, 0x17, 0x41, 0x07, 0xf4,
- 0x0e, 0xc1, 0x08, 0x53, 0xc8, 0x81, 0x29, 0x07, 0xf2, 0x59, 0xc4, 0x0f,
- 0x14, 0x01, 0x81, 0x80, 0xca, 0xa8, 0xe2, 0x0f, 0x9f, 0x99, 0xca, 0x9f,
- 0x1e, 0x0f, 0x9f, 0xa1, 0xc9, 0x46, 0x06, 0x0f, 0xa2, 0x58, 0x58, 0x22,
- 0xf8, 0xc1, 0x08, 0x5f, 0xc4, 0x0f, 0x14, 0x01, 0x80, 0xe0, 0xc8, 0x30,
- 0x17, 0x0f, 0xac, 0x29, 0xc6, 0xcd, 0x3f, 0x0f, 0xb7, 0xc1, 0xc4, 0x5d,
- 0xc8, 0x0f, 0xca, 0x78, 0xc5, 0x90, 0x22, 0x0f, 0xcb, 0xf9, 0xc4, 0x1f,
- 0x5c, 0x01, 0x1f, 0x29, 0xc5, 0x71, 0x30, 0x0f, 0xd6, 0x98, 0x42, 0x01,
- 0xbb, 0x41, 0x08, 0x6b, 0x00, 0xc1, 0x08, 0x77, 0xc7, 0x93, 0x4e, 0x01,
- 0x10, 0xe1, 0xcd, 0x7d, 0x48, 0x01, 0x00, 0x28, 0xca, 0xa4, 0x46, 0x0f,
- 0x9b, 0xa3, 0x01, 0x08, 0x99, 0xc3, 0x00, 0x34, 0x01, 0x56, 0xe1, 0xce,
- 0x49, 0x72, 0x01, 0x70, 0x80, 0x44, 0x00, 0x6c, 0xc1, 0x08, 0x9f, 0xc4,
- 0x3f, 0xa1, 0x0f, 0xc9, 0x31, 0xc7, 0xc2, 0x98, 0x0f, 0xa4, 0x31, 0xcf,
- 0x67, 0x04, 0x0f, 0xb0, 0xc1, 0x15, 0xc1, 0x08, 0xa9, 0xd2, 0x4e, 0x7e,
- 0x0f, 0xcb, 0xc8, 0x4d, 0x2a, 0x7d, 0xc1, 0x08, 0xb5, 0xc7, 0xc3, 0x24,
- 0x0f, 0x9a, 0x10, 0xc8, 0xbb, 0xcd, 0x01, 0x05, 0x19, 0xc3, 0x91, 0xec,
- 0x0f, 0x9a, 0xf8, 0x46, 0x04, 0xec, 0xc1, 0x08, 0xc1, 0xd1, 0x54, 0x6f,
- 0x0f, 0xa1, 0x28, 0xd8, 0x24, 0x60, 0x0f, 0xb1, 0x30, 0xcd, 0x76, 0xc8,
- 0x01, 0x0a, 0xf9, 0xc5, 0x00, 0xe2, 0x01, 0x02, 0x20, 0xc4, 0xe3, 0x47,
- 0x0f, 0xad, 0xf1, 0xc5, 0xd7, 0x0a, 0x0f, 0xad, 0xe9, 0xc7, 0x89, 0x0d,
- 0x0f, 0xad, 0xe0, 0xca, 0x9c, 0x94, 0x01, 0x3e, 0xb9, 0xc5, 0x01, 0xe2,
- 0x01, 0x2c, 0x41, 0x45, 0x15, 0xac, 0xc1, 0x08, 0xc7, 0xc4, 0x05, 0x30,
- 0x00, 0x01, 0x70, 0x10, 0xc1, 0x08, 0xd3, 0x03, 0xc1, 0x08, 0xdf, 0x06,
- 0xc1, 0x08, 0xf1, 0x05, 0xc1, 0x08, 0xfd, 0x15, 0xc1, 0x09, 0x0d, 0x0e,
- 0xc1, 0x09, 0x19, 0x07, 0xc1, 0x09, 0x29, 0x42, 0x00, 0x9b, 0xc1, 0x09,
- 0x35, 0x42, 0x00, 0x90, 0xc1, 0x09, 0x41, 0x14, 0xc1, 0x09, 0x4d, 0xc5,
- 0x21, 0x12, 0x07, 0xfa, 0xf1, 0x12, 0xc1, 0x09, 0x59, 0xc6, 0x60, 0xe6,
- 0x07, 0xff, 0x19, 0xca, 0xa0, 0x0e, 0x07, 0xff, 0x21, 0xc8, 0x7e, 0x6b,
- 0x07, 0xff, 0x29, 0xc8, 0xbf, 0x1d, 0x07, 0xff, 0x31, 0xcc, 0x89, 0x2c,
- 0x07, 0xf8, 0x69, 0xc9, 0x11, 0xdc, 0x07, 0xf8, 0x71, 0xcd, 0x36, 0xd9,
- 0x07, 0xfa, 0xe0, 0xcc, 0x6b, 0x21, 0x01, 0x31, 0xeb, 0x01, 0x09, 0x6b,
- 0xce, 0x75, 0xdb, 0x01, 0x03, 0x41, 0xcb, 0x63, 0x1b, 0x0f, 0xca, 0x38,
- 0x44, 0x3f, 0xd6, 0xc1, 0x09, 0x6f, 0x42, 0x00, 0x8e, 0xc1, 0x09, 0x79,
- 0xc7, 0xc5, 0x5b, 0x0f, 0xcf, 0x40, 0xc3, 0x19, 0x4b, 0x01, 0x2e, 0x49,
- 0xd1, 0x50, 0x2f, 0x0f, 0x9d, 0x19, 0xd7, 0x29, 0xf2, 0x0f, 0x9b, 0x28,
- 0xc7, 0xca, 0xa9, 0x0f, 0xae, 0x21, 0xc6, 0xa1, 0x0c, 0x0f, 0xa6, 0x09,
- 0xc9, 0x19, 0xf5, 0x00, 0x00, 0xe0, 0xc9, 0xb2, 0x22, 0x0f, 0xa7, 0xe9,
- 0xc6, 0x61, 0xaa, 0x0f, 0x9c, 0xf0, 0x4c, 0x10, 0x7e, 0xc1, 0x09, 0x85,
- 0xd1, 0x52, 0xd7, 0x08, 0x52, 0x41, 0x47, 0x33, 0xef, 0xc1, 0x09, 0x9d,
- 0x46, 0x06, 0x97, 0xc1, 0x09, 0xa7, 0x18, 0xc1, 0x09, 0xb7, 0x45, 0x00,
- 0xcb, 0xc1, 0x09, 0xc3, 0x47, 0x02, 0x90, 0x41, 0x09, 0xe1, 0x05, 0xc1,
- 0x0a, 0x37, 0x04, 0x41, 0x0a, 0x6f, 0xc4, 0x22, 0x71, 0x08, 0x97, 0xc9,
- 0x15, 0xc1, 0x0a, 0xaf, 0x08, 0xc1, 0x0a, 0xbb, 0x16, 0xc1, 0x0a, 0xc7,
- 0xc3, 0x01, 0xb4, 0x08, 0x97, 0x89, 0xc4, 0x15, 0xd3, 0x08, 0x97, 0x81,
- 0xc5, 0x01, 0xdb, 0x08, 0x97, 0xc0, 0xc6, 0x1e, 0x23, 0x08, 0x97, 0x51,
- 0xc5, 0x35, 0x00, 0x08, 0x97, 0x49, 0xc8, 0x11, 0x40, 0x08, 0x96, 0xf8,
- 0x91, 0x08, 0x97, 0x39, 0x03, 0xc1, 0x0a, 0xd3, 0x87, 0x08, 0x97, 0x29,
- 0x97, 0x08, 0x97, 0x1b, 0x01, 0x0a, 0xdf, 0x8b, 0x08, 0x97, 0x0a, 0x01,
- 0x0a, 0xe3, 0xc2, 0x00, 0xa4, 0x08, 0x96, 0xf1, 0x15, 0xc1, 0x0a, 0xe7,
- 0xc2, 0x04, 0x41, 0x08, 0x96, 0xd9, 0xc2, 0x00, 0xc7, 0x08, 0x96, 0xd1,
- 0x14, 0xc1, 0x0a, 0xf1, 0xc2, 0x1d, 0x5f, 0x08, 0x96, 0xc1, 0xc2, 0x00,
- 0xad, 0x08, 0x96, 0xb9, 0x04, 0xc1, 0x0a, 0xfb, 0x12, 0xc1, 0x0b, 0x0b,
- 0x10, 0xc1, 0x0b, 0x15, 0x06, 0xc1, 0x0b, 0x2b, 0x16, 0xc1, 0x0b, 0x39,
- 0x0c, 0xc1, 0x0b, 0x47, 0x05, 0xc1, 0x0b, 0x57, 0x09, 0xc1, 0x0b, 0x61,
- 0x0d, 0xc1, 0x0b, 0x71, 0x83, 0x08, 0x95, 0x83, 0x01, 0x0b, 0x7b, 0x91,
- 0x08, 0x95, 0xc1, 0x87, 0x08, 0x95, 0xb1, 0x97, 0x08, 0x95, 0xa3, 0x01,
- 0x0b, 0x87, 0x8b, 0x08, 0x95, 0x92, 0x01, 0x0b, 0x8b, 0xc9, 0xac, 0xc1,
- 0x08, 0x92, 0x09, 0x03, 0xc1, 0x0b, 0x8f, 0x91, 0x08, 0x91, 0x91, 0x87,
- 0x08, 0x91, 0x81, 0x97, 0x08, 0x91, 0x79, 0x8b, 0x08, 0x91, 0x6a, 0x01,
- 0x0b, 0x9b, 0x15, 0xc1, 0x0b, 0x9f, 0xc2, 0x0f, 0x60, 0x08, 0x91, 0xf9,
- 0x0e, 0xc1, 0x0b, 0xa9, 0xc2, 0x00, 0xa4, 0x08, 0x91, 0x51, 0xc2, 0x04,
- 0x41, 0x08, 0x91, 0x41, 0xc2, 0x02, 0x59, 0x08, 0x91, 0x31, 0xc2, 0x1d,
- 0x5f, 0x08, 0x91, 0x29, 0xc2, 0x00, 0xad, 0x08, 0x91, 0x21, 0x04, 0xc1,
- 0x0b, 0xb3, 0x12, 0xc1, 0x0b, 0xc3, 0x10, 0xc1, 0x0b, 0xcd, 0x06, 0xc1,
- 0x0b, 0xe3, 0x16, 0xc1, 0x0b, 0xf1, 0x0c, 0xc1, 0x0b, 0xff, 0x05, 0xc1,
- 0x0c, 0x09, 0x09, 0xc1, 0x0c, 0x13, 0x0d, 0xc1, 0x0c, 0x23, 0x83, 0x08,
- 0x90, 0x03, 0x01, 0x0c, 0x2d, 0x91, 0x08, 0x90, 0x31, 0x87, 0x08, 0x90,
- 0x21, 0x97, 0x08, 0x90, 0x19, 0x8b, 0x08, 0x90, 0x10, 0x44, 0x00, 0xcc,
- 0xc1, 0x0c, 0x39, 0xcb, 0x25, 0xeb, 0x08, 0x91, 0xd8, 0x46, 0x06, 0xf2,
- 0xc1, 0x0c, 0x4f, 0xc4, 0x1c, 0xd0, 0x08, 0x91, 0xc0, 0x46, 0x10, 0x5f,
- 0xc1, 0x0c, 0x5b, 0x44, 0x00, 0xcc, 0x41, 0x0c, 0x7b, 0xc4, 0x22, 0x71,
- 0x00, 0xbf, 0x49, 0xc5, 0x01, 0xdb, 0x00, 0xbf, 0x41, 0x15, 0xc1, 0x0c,
- 0xbd, 0x08, 0xc1, 0x0c, 0xc9, 0x16, 0xc1, 0x0c, 0xd5, 0xc3, 0x01, 0xb4,
- 0x00, 0xbf, 0x09, 0xc4, 0x15, 0xd3, 0x00, 0xbf, 0x00, 0x45, 0x00, 0xcb,
- 0xc1, 0x0c, 0xe1, 0x4a, 0x9c, 0x1c, 0x41, 0x0d, 0x02, 0x13, 0xc1, 0x0d,
- 0x0a, 0xc2, 0x02, 0x55, 0x00, 0xbd, 0x6b, 0x01, 0x0d, 0x26, 0xc2, 0x14,
- 0xd5, 0x00, 0xbd, 0x5a, 0x01, 0x0d, 0x2a, 0xc2, 0x0f, 0x60, 0x00, 0xbd,
- 0x11, 0x0e, 0xc1, 0x0d, 0x2e, 0xc2, 0x00, 0xa4, 0x00, 0xbd, 0x01, 0x15,
- 0xc1, 0x0d, 0x36, 0xc2, 0x17, 0x58, 0x00, 0xbc, 0xe1, 0xc2, 0x00, 0x39,
- 0x00, 0xbc, 0xd1, 0xc2, 0x20, 0x67, 0x00, 0xbc, 0xc9, 0xc2, 0x00, 0x82,
- 0x00, 0xbc, 0xc1, 0x12, 0xc1, 0x0d, 0x46, 0xc2, 0x00, 0xde, 0x00, 0xbc,
- 0xa1, 0x10, 0xc1, 0x0d, 0x4e, 0x16, 0xc1, 0x0d, 0x64, 0x06, 0xc1, 0x0d,
- 0x76, 0x05, 0xc1, 0x0d, 0x7e, 0x0d, 0x41, 0x0d, 0x8a, 0xca, 0xa4, 0xb4,
- 0x0f, 0xad, 0x30, 0xc4, 0x13, 0xf2, 0x0e, 0x96, 0x98, 0xc4, 0x01, 0xdc,
- 0x0e, 0x96, 0x43, 0x01, 0x0d, 0x96, 0xc5, 0x6a, 0x79, 0x0e, 0x96, 0x58,
- 0xc4, 0x18, 0x83, 0x0e, 0x96, 0x3b, 0x01, 0x0d, 0x9c, 0xc2, 0x26, 0x51,
- 0x0e, 0x96, 0x32, 0x01, 0x0d, 0xa2, 0x0b, 0xc1, 0x0d, 0xa8, 0xc3, 0x06,
- 0x9e, 0x0e, 0x96, 0x22, 0x01, 0x0d, 0xb4, 0x0a, 0xc1, 0x0d, 0xba, 0x19,
- 0xc1, 0x0d, 0xc6, 0xc2, 0x00, 0x4d, 0x0e, 0x96, 0x50, 0x47, 0x02, 0x90,
- 0xc1, 0x0d, 0xd0, 0xca, 0x39, 0xef, 0x01, 0x87, 0xd9, 0xce, 0x1b, 0x63,
- 0x01, 0x87, 0xe9, 0xd5, 0x34, 0x0f, 0x01, 0x87, 0xf1, 0xcc, 0x85, 0x6c,
- 0x01, 0x87, 0xf8, 0xd1, 0x30, 0x3a, 0x01, 0x84, 0xd9, 0xd6, 0x30, 0x35,
- 0x01, 0x84, 0xe1, 0xcd, 0x76, 0x94, 0x01, 0x85, 0x01, 0xd4, 0x0e, 0x3e,
- 0x01, 0x87, 0xe0, 0x45, 0x06, 0x98, 0xc1, 0x0e, 0x26, 0xcb, 0x95, 0x0d,
- 0x08, 0xfa, 0x21, 0xc4, 0x1c, 0xd0, 0x08, 0xfa, 0x18, 0x05, 0xc1, 0x0e,
- 0x4a, 0x15, 0xc1, 0x0e, 0x56, 0x08, 0xc1, 0x0e, 0x60, 0xca, 0xa1, 0xee,
- 0x08, 0xfa, 0x59, 0x42, 0x01, 0x4a, 0xc1, 0x0e, 0x6c, 0xd8, 0x25, 0xe0,
- 0x08, 0xfa, 0x30, 0xc6, 0xd0, 0xe7, 0x08, 0xfa, 0x09, 0xc7, 0x44, 0x79,
- 0x08, 0xf8, 0x19, 0xc5, 0xd6, 0xab, 0x08, 0xf8, 0x11, 0xc8, 0x11, 0x40,
- 0x08, 0xf8, 0x09, 0xcb, 0x1e, 0x17, 0x08, 0xf8, 0x00, 0x87, 0x08, 0xf9,
- 0xf3, 0x01, 0x0e, 0x76, 0x03, 0xc1, 0x0e, 0x7a, 0xc9, 0xac, 0xc1, 0x08,
- 0xf9, 0xd1, 0x97, 0x08, 0xf9, 0xc3, 0x01, 0x0e, 0x88, 0x8b, 0x08, 0xf9,
- 0xb2, 0x01, 0x0e, 0x8c, 0x0c, 0xc1, 0x0e, 0x90, 0xc2, 0x00, 0xa4, 0x08,
- 0xf9, 0x91, 0x15, 0xc1, 0x0e, 0xa0, 0xc2, 0x01, 0x09, 0x08, 0xf9, 0x71,
- 0xc2, 0x00, 0xc7, 0x08, 0xf9, 0x69, 0xc2, 0x02, 0x59, 0x08, 0xf9, 0x61,
- 0xc2, 0x1d, 0x5f, 0x08, 0xf9, 0x59, 0xc2, 0x00, 0xad, 0x08, 0xf9, 0x51,
- 0x04, 0xc1, 0x0e, 0xb0, 0x12, 0xc1, 0x0e, 0xba, 0x10, 0xc1, 0x0e, 0xc4,
- 0x06, 0xc1, 0x0e, 0xda, 0x16, 0xc1, 0x0e, 0xe8, 0x05, 0xc1, 0x0e, 0xf6,
- 0x09, 0xc1, 0x0f, 0x00, 0x0d, 0xc1, 0x0f, 0x0a, 0x91, 0x08, 0xf8, 0x81,
- 0x83, 0x08, 0xf8, 0x23, 0x01, 0x0f, 0x14, 0x87, 0x08, 0xf8, 0x71, 0x48,
- 0xac, 0xc1, 0xc1, 0x0f, 0x1c, 0x97, 0x08, 0xf8, 0x43, 0x01, 0x0f, 0x2a,
- 0x8b, 0x08, 0xf8, 0x32, 0x01, 0x0f, 0x2e, 0xc6, 0x00, 0x94, 0x08, 0x86,
- 0x68, 0xc9, 0xac, 0xc1, 0x08, 0x86, 0x11, 0x03, 0xc1, 0x0f, 0x32, 0x91,
- 0x08, 0x85, 0xb9, 0x87, 0x08, 0x85, 0xa9, 0x97, 0x08, 0x85, 0x9b, 0x01,
- 0x0f, 0x3e, 0x8b, 0x08, 0x85, 0x8a, 0x01, 0x0f, 0x42, 0x46, 0x06, 0xf2,
- 0xc1, 0x0f, 0x46, 0xc4, 0x1c, 0xd0, 0x08, 0x86, 0x00, 0xcb, 0x25, 0xeb,
- 0x08, 0x85, 0xf1, 0x44, 0x00, 0xcc, 0x41, 0x0f, 0x52, 0xc2, 0x00, 0xa4,
- 0x08, 0x85, 0x79, 0x15, 0xc1, 0x0f, 0x6a, 0xc2, 0x04, 0x41, 0x08, 0x85,
- 0x59, 0xc2, 0x00, 0xc7, 0x08, 0x85, 0x51, 0x14, 0xc1, 0x0f, 0x7a, 0xc2,
- 0x1d, 0x5f, 0x08, 0x85, 0x41, 0xc2, 0x00, 0xad, 0x08, 0x85, 0x39, 0x04,
- 0xc1, 0x0f, 0x84, 0x12, 0xc1, 0x0f, 0x8e, 0x10, 0xc1, 0x0f, 0x98, 0x06,
- 0xc1, 0x0f, 0xae, 0x16, 0xc1, 0x0f, 0xbc, 0x0c, 0xc1, 0x0f, 0xca, 0x05,
- 0xc1, 0x0f, 0xd4, 0x09, 0xc1, 0x0f, 0xde, 0x0d, 0xc1, 0x0f, 0xe8, 0x83,
- 0x08, 0x84, 0x1b, 0x01, 0x0f, 0xf2, 0x91, 0x08, 0x84, 0x59, 0x87, 0x08,
- 0x84, 0x49, 0x97, 0x08, 0x84, 0x3b, 0x01, 0x0f, 0xfe, 0x8b, 0x08, 0x84,
- 0x2a, 0x01, 0x10, 0x02, 0xc4, 0xe0, 0x2b, 0x05, 0x49, 0x79, 0xc3, 0xe6,
- 0x82, 0x05, 0x49, 0x70, 0xc5, 0xd6, 0xfb, 0x05, 0x49, 0x63, 0x01, 0x10,
- 0x06, 0xc6, 0xc5, 0x40, 0x05, 0x49, 0x58, 0x91, 0x05, 0x49, 0x51, 0x87,
- 0x05, 0x49, 0x3b, 0x01, 0x10, 0x0c, 0x97, 0x05, 0x49, 0x42, 0x01, 0x10,
- 0x10, 0x11, 0xc1, 0x10, 0x14, 0x8b, 0x05, 0x49, 0x21, 0x83, 0x05, 0x49,
- 0x11, 0xc2, 0x03, 0x40, 0x05, 0x49, 0x09, 0xc2, 0x04, 0x41, 0x05, 0x49,
- 0x01, 0x0a, 0xc1, 0x10, 0x1c, 0x16, 0xc1, 0x10, 0x26, 0xc2, 0x01, 0x09,
- 0x05, 0x48, 0xe9, 0xc2, 0x00, 0xc7, 0x05, 0x48, 0xe1, 0xc2, 0x1d, 0x5f,
- 0x05, 0x48, 0xd9, 0xc2, 0x02, 0x59, 0x05, 0x48, 0xd1, 0xc2, 0x00, 0xde,
- 0x05, 0x48, 0xc9, 0xc2, 0x0b, 0xc6, 0x05, 0x48, 0xc1, 0xc2, 0x00, 0xad,
- 0x05, 0x48, 0xb9, 0x12, 0xc1, 0x10, 0x30, 0x10, 0xc1, 0x10, 0x3a, 0xc2,
- 0x02, 0xb4, 0x05, 0x48, 0x81, 0x15, 0xc1, 0x10, 0x4a, 0xc2, 0x01, 0x29,
- 0x05, 0x48, 0x61, 0x0d, 0x41, 0x10, 0x54, 0xc4, 0x22, 0x71, 0x05, 0x48,
- 0x49, 0xc5, 0x01, 0xdb, 0x05, 0x48, 0x41, 0x15, 0xc1, 0x10, 0x5e, 0x08,
- 0xc1, 0x10, 0x6a, 0x16, 0xc1, 0x10, 0x76, 0xc3, 0x01, 0xb4, 0x05, 0x48,
- 0x09, 0xc4, 0x15, 0xd3, 0x05, 0x48, 0x00, 0x45, 0x00, 0xcb, 0xc1, 0x10,
- 0x82, 0x42, 0x00, 0x54, 0xc1, 0x10, 0xa6, 0x4b, 0x6f, 0xcc, 0xc1, 0x10,
- 0xb2, 0xce, 0x71, 0x43, 0x00, 0x66, 0xb1, 0x46, 0x06, 0x97, 0x41, 0x10,
- 0xd8, 0xc4, 0xa2, 0x96, 0x0f, 0xcc, 0xc1, 0x4b, 0x99, 0xbc, 0x41, 0x10,
- 0xfc, 0xc4, 0x00, 0x67, 0x0f, 0xb0, 0xbb, 0x01, 0x11, 0x60, 0xd9, 0x20,
- 0x04, 0x0f, 0xb1, 0xe8, 0xc6, 0xba, 0x97, 0x0f, 0xd4, 0xb1, 0xc5, 0x63,
- 0x21, 0x0f, 0x9c, 0xb0, 0x14, 0xc1, 0x11, 0x66, 0x16, 0xc1, 0x11, 0x72,
- 0x10, 0xc1, 0x11, 0x90, 0x06, 0xc1, 0x11, 0xa9, 0x15, 0xc1, 0x11, 0xbd,
- 0x04, 0xc1, 0x11, 0xd3, 0x0a, 0xc1, 0x11, 0xdd, 0x03, 0xc1, 0x11, 0xe7,
- 0xc2, 0x01, 0x09, 0x0b, 0x7a, 0x11, 0x1c, 0xc1, 0x11, 0xf1, 0x43, 0x75,
- 0xf8, 0xc1, 0x12, 0x03, 0x09, 0xc1, 0x12, 0x1f, 0xc2, 0x96, 0xd0, 0x0b,
- 0x79, 0x39, 0x13, 0xc1, 0x12, 0x27, 0xc2, 0x04, 0x2b, 0x0b, 0x78, 0xf1,
- 0x0e, 0xc1, 0x12, 0x31, 0x18, 0xc1, 0x12, 0x3f, 0xc2, 0x00, 0x67, 0x0b,
- 0x78, 0x39, 0x0f, 0xc1, 0x12, 0x49, 0x12, 0x41, 0x12, 0x53, 0xc5, 0x01,
- 0x62, 0x0b, 0x7c, 0x91, 0xc5, 0x00, 0x95, 0x0b, 0x7c, 0x89, 0xc9, 0x69,
- 0x4d, 0x0b, 0x7c, 0x81, 0xc5, 0x01, 0xf7, 0x0b, 0x7c, 0x78, 0x97, 0x0b,
- 0x7b, 0x53, 0x01, 0x12, 0x5d, 0x8b, 0x0b, 0x7b, 0x0b, 0x01, 0x12, 0x7e,
- 0x87, 0x0b, 0x7a, 0xeb, 0x01, 0x12, 0xa2, 0xc2, 0x00, 0x18, 0x0b, 0x7c,
- 0x19, 0x91, 0x0b, 0x7a, 0xcb, 0x01, 0x12, 0xb8, 0x9b, 0x0b, 0x7b, 0x8b,
- 0x01, 0x12, 0xc8, 0x90, 0x0b, 0x7b, 0xeb, 0x01, 0x12, 0xd2, 0x83, 0x0b,
- 0x7a, 0xa3, 0x01, 0x12, 0xd6, 0xca, 0x9f, 0xe6, 0x0b, 0x7b, 0xc3, 0x01,
- 0x12, 0xf6, 0x99, 0x0b, 0x7a, 0xe2, 0x01, 0x12, 0xfa, 0x49, 0xb4, 0x47,
- 0xc1, 0x12, 0xfe, 0xca, 0x9b, 0xae, 0x0b, 0x7a, 0x89, 0xd6, 0x2e, 0xd5,
- 0x0b, 0x7a, 0x78, 0xcb, 0x98, 0xb4, 0x01, 0x22, 0x49, 0xcc, 0x88, 0xd8,
- 0x01, 0x22, 0x40, 0xc5, 0xbc, 0x20, 0x0f, 0xa9, 0x61, 0xc5, 0x37, 0x5e,
- 0x0f, 0x9d, 0x21, 0xc5, 0x00, 0xca, 0x00, 0x05, 0xa9, 0xc2, 0x00, 0x48,
- 0x0f, 0xcd, 0x00, 0xc3, 0x01, 0x4a, 0x00, 0x05, 0xb9, 0xe0, 0x08, 0x27,
- 0x0f, 0xde, 0x10, 0x00, 0xc1, 0x13, 0x0a, 0xcd, 0x7d, 0x3b, 0x01, 0x10,
- 0x98, 0xc4, 0xcd, 0x89, 0x0f, 0xae, 0xa9, 0xc4, 0x5d, 0xc8, 0x0f, 0xa5,
- 0xe9, 0xc3, 0x23, 0x70, 0x0f, 0xb4, 0x80, 0x43, 0x06, 0x1f, 0xc1, 0x13,
- 0x19, 0x45, 0xd9, 0x1c, 0x41, 0x13, 0x55, 0xce, 0x71, 0x7b, 0x0b, 0x74,
- 0xd1, 0x15, 0xc1, 0x13, 0x67, 0xc9, 0x11, 0xdc, 0x0b, 0x74, 0xc1, 0x05,
- 0xc1, 0x13, 0x73, 0x46, 0x06, 0x97, 0xc1, 0x13, 0x7f, 0x47, 0x33, 0xef,
- 0x41, 0x13, 0xa6, 0xc9, 0xb3, 0x03, 0x01, 0x1e, 0xc9, 0x16, 0xc1, 0x13,
- 0xbc, 0x4a, 0xa6, 0x8a, 0xc1, 0x13, 0xce, 0xcf, 0x6b, 0x3c, 0x01, 0x1e,
- 0x99, 0xc5, 0x1f, 0x0a, 0x01, 0x1e, 0x88, 0x4a, 0x9d, 0x2a, 0xc1, 0x13,
- 0xda, 0x46, 0x06, 0x97, 0xc1, 0x13, 0xe2, 0x51, 0x52, 0xb5, 0x41, 0x14,
- 0x00, 0x48, 0xbb, 0x45, 0xc1, 0x14, 0x10, 0x4d, 0x76, 0xa1, 0x41, 0x14,
- 0x20, 0xc2, 0x03, 0x38, 0x01, 0x12, 0xf1, 0xc5, 0x01, 0x93, 0x01, 0x11,
- 0x0b, 0x01, 0x14, 0x2f, 0xd4, 0x3e, 0xd1, 0x01, 0x4c, 0xe8, 0xc4, 0x15,
- 0xd3, 0x05, 0x5f, 0x81, 0xc4, 0x22, 0x71, 0x05, 0x5f, 0xc9, 0xc3, 0x01,
- 0xb4, 0x05, 0x5f, 0x89, 0x16, 0xc1, 0x14, 0x33, 0x08, 0xc1, 0x14, 0x3f,
- 0x15, 0xc1, 0x14, 0x4b, 0xc5, 0x01, 0xdb, 0x05, 0x5f, 0xc0, 0xc8, 0xbb,
- 0x2d, 0x05, 0x5f, 0x69, 0xc3, 0x7a, 0x4c, 0x05, 0x57, 0x91, 0xcb, 0x94,
- 0xaa, 0x05, 0x57, 0x88, 0x4a, 0x6f, 0xcd, 0xc1, 0x14, 0x57, 0xc5, 0x1e,
- 0x24, 0x05, 0x57, 0xb0, 0x46, 0x02, 0x91, 0xc1, 0x14, 0x87, 0xc7, 0xc2,
- 0x21, 0x05, 0x5f, 0x60, 0xc2, 0x00, 0x57, 0x05, 0x57, 0x81, 0xc2, 0x01,
- 0xdb, 0x05, 0x5f, 0x58, 0x00, 0xc1, 0x14, 0xf6, 0xc3, 0x1b, 0x43, 0x0f,
- 0xb7, 0x19, 0xcf, 0x62, 0xf9, 0x0f, 0xcd, 0xe0, 0xc3, 0x04, 0x6c, 0x01,
- 0x37, 0x83, 0x01, 0x15, 0x02, 0xc5, 0xd7, 0x6e, 0x0f, 0xaf, 0xd8, 0x00,
- 0x41, 0x15, 0x06, 0x45, 0x04, 0x09, 0xc1, 0x15, 0x12, 0x00, 0x41, 0x15,
- 0x1e, 0xc2, 0x00, 0x76, 0x01, 0x15, 0x39, 0xcd, 0x7d, 0x55, 0x0f, 0xc9,
- 0xd8, 0x49, 0x8a, 0x73, 0xc1, 0x15, 0x3a, 0xcd, 0x80, 0xa2, 0x01, 0x1c,
- 0x69, 0xc4, 0x44, 0x47, 0x0f, 0xb4, 0xe8, 0x16, 0xc1, 0x15, 0x44, 0x15,
- 0xc1, 0x15, 0x56, 0xce, 0x75, 0xcd, 0x08, 0xb3, 0x3b, 0x01, 0x15, 0x65,
- 0xcd, 0x7b, 0x26, 0x08, 0xb3, 0x0b, 0x01, 0x15, 0x6b, 0xc5, 0x09, 0xed,
- 0x00, 0xc0, 0x03, 0x01, 0x15, 0x71, 0x06, 0xc1, 0x15, 0x77, 0x47, 0x02,
- 0x90, 0xc1, 0x15, 0x83, 0x08, 0xc1, 0x16, 0x0e, 0xcf, 0x69, 0xf2, 0x00,
- 0xc0, 0x71, 0xc6, 0xcd, 0x93, 0x00, 0xc0, 0x51, 0x47, 0xc2, 0x83, 0xc1,
- 0x16, 0x20, 0x42, 0x00, 0x79, 0xc1, 0x16, 0x2c, 0xc8, 0x24, 0x10, 0x00,
- 0xc0, 0x08, 0x00, 0xc1, 0x16, 0x38, 0xcb, 0x5c, 0xa7, 0x0f, 0xc8, 0x88,
- 0xc5, 0x11, 0x01, 0x0f, 0xa1, 0xa8, 0xd0, 0x5f, 0x32, 0x0f, 0x9c, 0x89,
- 0xc4, 0x29, 0xf3, 0x0f, 0xcb, 0x70, 0xc3, 0x7c, 0x93, 0x0f, 0xa7, 0xa1,
- 0xdd, 0x11, 0xab, 0x0f, 0xa7, 0x90, 0x47, 0xca, 0xe1, 0xc1, 0x16, 0x44,
- 0x45, 0x5a, 0xe2, 0xc1, 0x16, 0x72, 0x4a, 0xa4, 0xdc, 0xc1, 0x16, 0xb0,
- 0x15, 0xc1, 0x16, 0xc2, 0x4e, 0x6f, 0x13, 0xc1, 0x16, 0xce, 0x08, 0xc1,
- 0x16, 0xe0, 0x42, 0x00, 0x5b, 0xc1, 0x16, 0xec, 0x45, 0x02, 0x4d, 0x41,
- 0x16, 0xf8, 0xc4, 0x13, 0xf2, 0x0e, 0x97, 0x98, 0xc4, 0x01, 0xdc, 0x0e,
- 0x97, 0x43, 0x01, 0x17, 0x10, 0xc5, 0x6a, 0x79, 0x0e, 0x97, 0x58, 0xc4,
- 0x18, 0x83, 0x0e, 0x97, 0x3b, 0x01, 0x17, 0x16, 0xc2, 0x26, 0x51, 0x0e,
- 0x97, 0x32, 0x01, 0x17, 0x1c, 0x0b, 0xc1, 0x17, 0x22, 0xc3, 0x06, 0x9e,
- 0x0e, 0x97, 0x22, 0x01, 0x17, 0x2e, 0x0a, 0xc1, 0x17, 0x34, 0x19, 0xc1,
- 0x17, 0x40, 0xc2, 0x00, 0x4d, 0x0e, 0x97, 0x50, 0xce, 0x72, 0x85, 0x08,
- 0xf7, 0xc1, 0xca, 0xa1, 0xc6, 0x08, 0xf7, 0xb9, 0x4b, 0x6f, 0xcc, 0xc1,
- 0x17, 0x4a, 0xc5, 0xd1, 0x11, 0x08, 0xf7, 0x91, 0x47, 0x02, 0x90, 0x41,
- 0x17, 0x5a, 0x46, 0x06, 0x97, 0xc1, 0x17, 0xb6, 0x14, 0xc1, 0x17, 0xda,
- 0x18, 0xc1, 0x17, 0xe6, 0x45, 0x00, 0xcb, 0xc1, 0x17, 0xf2, 0x47, 0x02,
- 0x90, 0x41, 0x18, 0x10, 0x15, 0xc1, 0x18, 0x77, 0x4b, 0x6f, 0xcc, 0xc1,
- 0x18, 0x83, 0x47, 0x02, 0x90, 0xc1, 0x18, 0x99, 0xc9, 0xab, 0xe0, 0x08,
- 0xe3, 0x89, 0xc9, 0x15, 0x9d, 0x08, 0xe3, 0x80, 0x4c, 0x31, 0xf2, 0xc1,
- 0x18, 0xf9, 0xcf, 0x24, 0xe1, 0x01, 0x35, 0x29, 0xc4, 0x00, 0xcb, 0x01,
- 0x32, 0x10, 0x45, 0x00, 0xcb, 0xc1, 0x19, 0x05, 0x47, 0x02, 0x90, 0xc1,
- 0x19, 0x17, 0x4b, 0x6f, 0xcc, 0xc1, 0x19, 0x80, 0xce, 0x75, 0x87, 0x00,
- 0x6a, 0xb9, 0x49, 0x52, 0xd7, 0xc1, 0x19, 0xa6, 0x06, 0xc1, 0x19, 0xb2,
- 0x47, 0x33, 0xef, 0x41, 0x19, 0xbe, 0x4c, 0x10, 0x7e, 0xc1, 0x19, 0xca,
- 0x47, 0x33, 0xef, 0xc1, 0x19, 0xe8, 0x52, 0x49, 0xa4, 0xc1, 0x19, 0xfb,
- 0x47, 0x02, 0x90, 0xc1, 0x1a, 0x07, 0xc7, 0xc1, 0xb8, 0x08, 0x56, 0x40,
- 0xc7, 0xc9, 0xe5, 0x0f, 0xab, 0xd1, 0x43, 0x03, 0x59, 0xc1, 0x1a, 0x6c,
- 0x45, 0x00, 0x6c, 0xc1, 0x1a, 0x78, 0xd7, 0x2a, 0x65, 0x0f, 0xa3, 0x58,
- 0xcb, 0x01, 0xbc, 0x00, 0x42, 0xf1, 0xcf, 0x69, 0x11, 0x00, 0x42, 0xd9,
- 0xd1, 0x51, 0xd8, 0x00, 0x42, 0xd1, 0xd0, 0x5f, 0x02, 0x00, 0x42, 0xc9,
- 0x47, 0x02, 0x90, 0x41, 0x1a, 0x84, 0x0e, 0xc1, 0x1a, 0xa4, 0x15, 0xc1,
- 0x1a, 0xb0, 0xd1, 0x50, 0x40, 0x08, 0x8b, 0xa0, 0xc5, 0x99, 0xf9, 0x0f,
- 0x81, 0x51, 0x19, 0xc1, 0x1a, 0xbc, 0x07, 0xc1, 0x1a, 0xce, 0x15, 0xc1,
- 0x1a, 0xda, 0x10, 0xc1, 0x1a, 0xf8, 0xca, 0xa5, 0x0e, 0x0f, 0x80, 0x21,
- 0xcc, 0x8a, 0xdc, 0x0f, 0x80, 0x29, 0x11, 0xc1, 0x1b, 0x04, 0x16, 0xc1,
- 0x1b, 0x10, 0x08, 0xc1, 0x1b, 0x1c, 0xc4, 0xe5, 0x03, 0x0f, 0x81, 0x11,
- 0xcd, 0x76, 0xfc, 0x0f, 0x81, 0x29, 0x42, 0x00, 0xde, 0xc1, 0x1b, 0x28,
- 0xc6, 0xd2, 0x1f, 0x0f, 0x81, 0x40, 0x43, 0x00, 0x92, 0xc1, 0x1b, 0x34,
- 0x00, 0x41, 0x1b, 0x47, 0x42, 0x0c, 0x7f, 0xc1, 0x1b, 0x59, 0xc3, 0x15,
- 0x2f, 0x01, 0x15, 0xc1, 0xc3, 0x0f, 0x21, 0x01, 0x14, 0x62, 0x01, 0x1b,
- 0x65, 0xcc, 0x25, 0xea, 0x08, 0x95, 0x49, 0x47, 0x02, 0x90, 0x41, 0x1b,
- 0x69, 0xc4, 0x22, 0x71, 0x0b, 0x53, 0x49, 0xc5, 0x01, 0xdb, 0x0b, 0x53,
- 0x41, 0x15, 0xc1, 0x1b, 0xc5, 0x08, 0xc1, 0x1b, 0xd1, 0x16, 0xc1, 0x1b,
- 0xdd, 0xc3, 0x01, 0xb4, 0x0b, 0x53, 0x09, 0xc4, 0x15, 0xd3, 0x0b, 0x53,
- 0x00, 0xc2, 0x13, 0xa5, 0x0b, 0x52, 0xf1, 0xc3, 0x01, 0xf0, 0x0b, 0x52,
- 0xa9, 0x83, 0x0b, 0x52, 0x00, 0x8b, 0x0b, 0x52, 0xe9, 0x91, 0x0b, 0x52,
- 0x98, 0x8b, 0x0b, 0x52, 0xe1, 0x91, 0x0b, 0x52, 0x48, 0x90, 0x0b, 0x52,
- 0xd0, 0x91, 0x0b, 0x52, 0xc9, 0xc4, 0xe3, 0xfb, 0x0b, 0x52, 0x61, 0xc3,
- 0x4a, 0xb2, 0x0b, 0x52, 0x40, 0x83, 0x0b, 0x52, 0xb0, 0x91, 0x0b, 0x52,
- 0x89, 0x8e, 0x0b, 0x52, 0x68, 0x83, 0x0b, 0x52, 0x81, 0xc2, 0x00, 0x0a,
- 0x0b, 0x52, 0x38, 0xc2, 0x00, 0x34, 0x0b, 0x52, 0x79, 0xc2, 0x04, 0xcb,
- 0x0b, 0x52, 0x08, 0xc3, 0x3c, 0x50, 0x0b, 0x52, 0x71, 0xc2, 0x01, 0x2e,
- 0x0b, 0x52, 0x18, 0x8b, 0x0b, 0x52, 0x50, 0x4f, 0x62, 0xea, 0xc1, 0x1b,
- 0xe9, 0xce, 0x73, 0x8f, 0x05, 0x53, 0xd9, 0x15, 0xc1, 0x1b, 0xf1, 0x03,
- 0xc1, 0x1b, 0xfd, 0xc9, 0x0e, 0xac, 0x00, 0x81, 0xb9, 0x42, 0x03, 0x32,
- 0xc1, 0x1c, 0x09, 0xce, 0x73, 0x11, 0x00, 0x82, 0x51, 0x57, 0x26, 0xfb,
- 0xc1, 0x1c, 0x15, 0xd4, 0x3f, 0x71, 0x00, 0x84, 0x79, 0x4c, 0x85, 0x9c,
- 0x41, 0x1c, 0x29, 0x03, 0xc1, 0x1c, 0x31, 0xc8, 0xbf, 0x4d, 0x00, 0x82,
- 0x61, 0xc9, 0xb3, 0xed, 0x00, 0x82, 0x69, 0xc8, 0xbd, 0x45, 0x00, 0x82,
- 0x79, 0x45, 0x4e, 0xd8, 0x41, 0x1c, 0x3d, 0xc4, 0x15, 0xd3, 0x00, 0x84,
- 0x81, 0xc3, 0x01, 0xb4, 0x00, 0x84, 0x89, 0x16, 0xc1, 0x1c, 0x49, 0x08,
- 0xc1, 0x1c, 0x55, 0x15, 0xc1, 0x1c, 0x61, 0xc5, 0x01, 0xdb, 0x00, 0x84,
- 0xc1, 0xc4, 0x22, 0x71, 0x00, 0x84, 0xc8, 0x83, 0x00, 0x81, 0x0b, 0x01,
- 0x1c, 0x6d, 0x0d, 0xc1, 0x1c, 0x77, 0x16, 0xc1, 0x1c, 0x84, 0x15, 0xc1,
- 0x1c, 0x95, 0x09, 0xc1, 0x1c, 0xa9, 0x10, 0xc1, 0x1c, 0xb9, 0x05, 0xc1,
- 0x1c, 0xcd, 0x0c, 0xc1, 0x1c, 0xd7, 0x06, 0xc1, 0x1c, 0xe1, 0x12, 0xc1,
- 0x1c, 0xef, 0x04, 0xc1, 0x1c, 0xf9, 0x0f, 0xc1, 0x1d, 0x03, 0xc2, 0x1d,
- 0x5f, 0x00, 0x80, 0xd1, 0x14, 0xc1, 0x1d, 0x0d, 0x0e, 0xc1, 0x1d, 0x17,
- 0x19, 0xc1, 0x1d, 0x21, 0xc2, 0x00, 0xa4, 0x00, 0x80, 0xf9, 0x8b, 0x00,
- 0x81, 0x1b, 0x01, 0x1d, 0x2b, 0x97, 0x00, 0x81, 0x2b, 0x01, 0x1d, 0x2f,
- 0x87, 0x00, 0x81, 0x3b, 0x01, 0x1d, 0x33, 0x91, 0x00, 0x81, 0x49, 0x48,
- 0xac, 0xc1, 0x41, 0x1d, 0x39, 0xc2, 0x04, 0x2e, 0x05, 0x53, 0xb1, 0xc2,
- 0xc1, 0x7b, 0x05, 0x53, 0xa9, 0xc3, 0xe7, 0x96, 0x05, 0x53, 0xa0, 0xc4,
- 0x22, 0x71, 0x05, 0x4f, 0xc9, 0xc5, 0x01, 0xdb, 0x05, 0x4f, 0xc1, 0x15,
- 0xc1, 0x1d, 0x47, 0x08, 0xc1, 0x1d, 0x53, 0x16, 0xc1, 0x1d, 0x5f, 0xc3,
- 0x01, 0xb4, 0x05, 0x4f, 0x89, 0xc4, 0x15, 0xd3, 0x05, 0x4f, 0x80, 0xc5,
- 0xd5, 0x5c, 0x00, 0x83, 0x19, 0xc6, 0xd1, 0x05, 0x00, 0x83, 0x20, 0x83,
- 0x00, 0x81, 0x61, 0x8b, 0x00, 0x81, 0x92, 0x01, 0x1d, 0x6b, 0x8b, 0x00,
- 0x81, 0x70, 0x97, 0x00, 0x81, 0x80, 0xc6, 0x00, 0x94, 0x00, 0x81, 0xa8,
- 0xc2, 0x23, 0xb4, 0x00, 0x81, 0x99, 0x91, 0x00, 0x81, 0xa0, 0x94, 0x00,
- 0x82, 0xb3, 0x01, 0x1d, 0x74, 0x8e, 0x00, 0x82, 0xc2, 0x01, 0x1d, 0x78,
- 0xcc, 0x8b, 0x6c, 0x00, 0x83, 0x11, 0x44, 0x03, 0x10, 0x41, 0x1d, 0x7c,
- 0xc2, 0x11, 0x70, 0x00, 0x83, 0x39, 0xc2, 0x0f, 0x4d, 0x00, 0x83, 0x40,
- 0xc2, 0x1b, 0xd8, 0x00, 0x83, 0x91, 0x97, 0x00, 0x83, 0x99, 0xc2, 0x00,
- 0x4c, 0x00, 0x83, 0xa0, 0x46, 0x2e, 0xcf, 0xc1, 0x1d, 0x8f, 0x4a, 0xa1,
- 0x9e, 0x41, 0x1d, 0xa7, 0xc2, 0x01, 0x47, 0x00, 0x82, 0x11, 0xc4, 0x04,
- 0x5e, 0x00, 0x82, 0x18, 0xc3, 0x06, 0x9e, 0x00, 0x82, 0x21, 0xc3, 0x0c,
- 0x5b, 0x00, 0x82, 0x28, 0xc2, 0x26, 0x51, 0x00, 0x82, 0x31, 0xc4, 0x18,
- 0x83, 0x00, 0x82, 0x38, 0xc9, 0xb0, 0x8d, 0x0f, 0xd4, 0x31, 0xca, 0xa2,
- 0x0c, 0x0f, 0xd5, 0xd0, 0x46, 0xcf, 0xcd, 0xc1, 0x1d, 0xb9, 0xc4, 0x00,
- 0x67, 0x0f, 0xb0, 0x80, 0x15, 0xc1, 0x1d, 0xf0, 0x47, 0x02, 0x90, 0xc1,
- 0x1d, 0xfa, 0xce, 0x75, 0x25, 0x08, 0xa2, 0xe9, 0xd0, 0x57, 0xd2, 0x08,
- 0xa2, 0xd9, 0x06, 0xc1, 0x1e, 0x61, 0xd1, 0x50, 0x40, 0x08, 0xa2, 0x79,
- 0xca, 0x95, 0x5a, 0x08, 0xa2, 0x71, 0xc5, 0x00, 0xaa, 0x08, 0xa2, 0x69,
- 0xc2, 0x00, 0x3a, 0x08, 0xa2, 0x49, 0x4b, 0x6f, 0xcc, 0x41, 0x1e, 0x73,
- 0xcb, 0x8f, 0x1f, 0x01, 0x05, 0x51, 0x48, 0xbb, 0x55, 0xc1, 0x1e, 0x93,
- 0x45, 0x15, 0xac, 0xc1, 0x1e, 0xb2, 0xc4, 0x00, 0x8a, 0x00, 0x00, 0x50,
- 0xc4, 0x01, 0x1e, 0x01, 0x5c, 0x91, 0xc5, 0x01, 0xf7, 0x01, 0x5c, 0x98,
- 0x48, 0x07, 0x09, 0xc1, 0x1e, 0xbe, 0x48, 0x20, 0x55, 0xc1, 0x1e, 0xee,
- 0xcb, 0x4d, 0x89, 0x00, 0x00, 0xa9, 0x49, 0x1d, 0xe4, 0x41, 0x1f, 0x0c,
- 0xe0, 0x06, 0xe7, 0x01, 0x15, 0x78, 0x43, 0x0a, 0x08, 0xc1, 0x1f, 0x1e,
- 0x42, 0x01, 0xc8, 0x41, 0x1f, 0x2a, 0xc9, 0x0a, 0x4a, 0x01, 0x13, 0xc9,
- 0x43, 0x00, 0x8f, 0x41, 0x1f, 0x30, 0xcc, 0x00, 0x9b, 0x01, 0x13, 0xc1,
- 0x43, 0x00, 0x8f, 0x41, 0x1f, 0x3c, 0xc4, 0x22, 0x71, 0x0f, 0x27, 0xc9,
- 0xc5, 0x01, 0xdb, 0x0f, 0x27, 0xc1, 0x15, 0xc1, 0x1f, 0x48, 0x08, 0xc1,
- 0x1f, 0x54, 0x16, 0xc1, 0x1f, 0x60, 0xc3, 0x01, 0xb4, 0x0f, 0x27, 0x89,
- 0xc4, 0x15, 0xd3, 0x0f, 0x27, 0x80, 0xc5, 0xde, 0x71, 0x0f, 0x27, 0x79,
- 0xc4, 0xdf, 0xe0, 0x0f, 0x27, 0x71, 0xc5, 0xd9, 0x30, 0x0f, 0x27, 0x69,
- 0xc5, 0xdd, 0x1d, 0x0f, 0x27, 0x61, 0xc4, 0xe5, 0xef, 0x0f, 0x27, 0x58,
- 0x87, 0x0f, 0x27, 0x23, 0x01, 0x1f, 0x6c, 0x97, 0x0f, 0x26, 0xfb, 0x01,
- 0x1f, 0x87, 0x8b, 0x0f, 0x26, 0xd3, 0x01, 0x1f, 0x97, 0x83, 0x0f, 0x26,
- 0xab, 0x01, 0x1f, 0xa9, 0x91, 0x0f, 0x26, 0x82, 0x01, 0x1f, 0xbb, 0x4b,
- 0x6f, 0xcc, 0xc1, 0x1f, 0xcd, 0xca, 0x1d, 0x8f, 0x08, 0xcf, 0x19, 0x45,
- 0x00, 0xcb, 0xc1, 0x1f, 0xf6, 0x47, 0x02, 0x90, 0x41, 0x20, 0x06, 0x47,
- 0x33, 0xef, 0xc1, 0x20, 0x69, 0xd5, 0x33, 0xe5, 0x08, 0x45, 0x59, 0x47,
- 0x02, 0x90, 0x41, 0x20, 0x7a, 0xd4, 0x3d, 0xa5, 0x0f, 0xb5, 0x89, 0xcf,
- 0x67, 0x4f, 0x01, 0x00, 0x88, 0x00, 0xc1, 0x20, 0xe3, 0xd6, 0x2e, 0x25,
- 0x0f, 0xb7, 0x50, 0xcc, 0x23, 0x34, 0x01, 0x15, 0xa0, 0xe0, 0x04, 0x47,
- 0x0f, 0xaa, 0x21, 0x0e, 0xc1, 0x20, 0xf5, 0x4b, 0x2f, 0x85, 0x41, 0x21,
- 0x01, 0xca, 0xa2, 0x3e, 0x01, 0x1b, 0xd9, 0xd2, 0x4a, 0xe8, 0x01, 0x17,
- 0x53, 0x01, 0x21, 0x07, 0x15, 0xc1, 0x21, 0x0d, 0x16, 0xc1, 0x21, 0x19,
- 0x03, 0xc1, 0x21, 0x25, 0xcc, 0x00, 0x9b, 0x01, 0x13, 0x79, 0xc9, 0x0a,
- 0x4a, 0x01, 0x13, 0x71, 0x43, 0x00, 0x8f, 0xc1, 0x21, 0x3d, 0xcc, 0x8a,
- 0x04, 0x01, 0x13, 0x11, 0xcb, 0x6a, 0xd7, 0x01, 0x11, 0x30, 0x43, 0x03,
- 0xa3, 0xc1, 0x21, 0x49, 0xc4, 0xe4, 0x9f, 0x0f, 0xa6, 0x9a, 0x01, 0x21,
- 0x53, 0xc5, 0x00, 0xca, 0x0f, 0xb5, 0x58, 0xc5, 0xdf, 0x93, 0x0f, 0xab,
- 0x91, 0xca, 0xa3, 0x42, 0x0f, 0xb5, 0xb8, 0xc9, 0xb3, 0xd2, 0x00, 0x04,
- 0x19, 0xc7, 0xc7, 0x53, 0x0f, 0xb5, 0x98, 0x99, 0x0f, 0x09, 0x61, 0x87,
- 0x0f, 0x09, 0x53, 0x01, 0x21, 0x59, 0x91, 0x0f, 0x09, 0x43, 0x01, 0x21,
- 0x5d, 0x97, 0x0f, 0x09, 0x39, 0x8b, 0x0f, 0x09, 0x31, 0x83, 0x0f, 0x09,
- 0x23, 0x01, 0x21, 0x61, 0x14, 0xc1, 0x21, 0x65, 0xc2, 0x01, 0x29, 0x0f,
- 0x09, 0x11, 0x12, 0xc1, 0x21, 0x6f, 0x0f, 0xc1, 0x21, 0x79, 0xc2, 0x00,
- 0xa4, 0x0f, 0x08, 0x23, 0x01, 0x21, 0x83, 0x10, 0xc1, 0x21, 0x87, 0x06,
- 0xc1, 0x21, 0xb1, 0x1a, 0xc1, 0x21, 0xbb, 0xc2, 0x1d, 0x5f, 0x0f, 0x08,
- 0xc1, 0xc2, 0x0f, 0x60, 0x0f, 0x08, 0xb9, 0xc2, 0x00, 0x67, 0x0f, 0x08,
- 0xa9, 0x16, 0xc1, 0x21, 0xc5, 0xc2, 0x04, 0x41, 0x0f, 0x08, 0x91, 0xc2,
- 0x04, 0x2b, 0x0f, 0x08, 0x71, 0xc2, 0x02, 0xb4, 0x0f, 0x08, 0x59, 0xc2,
- 0x0c, 0x65, 0x0f, 0x08, 0x51, 0xc2, 0x00, 0xc7, 0x0f, 0x08, 0x49, 0xc2,
- 0x03, 0x40, 0x0f, 0x08, 0x40, 0xc4, 0x18, 0x83, 0x0f, 0x0a, 0x39, 0xc2,
- 0x26, 0x51, 0x0f, 0x0a, 0x30, 0xc3, 0x0c, 0x5b, 0x0f, 0x0a, 0x29, 0xc3,
- 0x06, 0x9e, 0x0f, 0x0a, 0x20, 0xc4, 0x04, 0x5e, 0x0f, 0x0a, 0x19, 0xc2,
- 0x01, 0x47, 0x0f, 0x0a, 0x10, 0xc5, 0xdb, 0x0b, 0x0f, 0x09, 0xe1, 0x44,
- 0x15, 0xd8, 0x41, 0x21, 0xd5, 0x1f, 0xc1, 0x21, 0xf3, 0x1e, 0x41, 0x22,
- 0x33, 0x16, 0xc1, 0x22, 0x57, 0xd2, 0x4c, 0xe0, 0x01, 0x24, 0xd1, 0x07,
- 0xc1, 0x22, 0x69, 0x15, 0xc1, 0x22, 0x75, 0x08, 0x41, 0x22, 0x7f, 0xc4,
- 0x26, 0x12, 0x01, 0x50, 0x21, 0xc3, 0x01, 0x4a, 0x01, 0x50, 0x18, 0xce,
- 0x6e, 0x4f, 0x01, 0x50, 0x31, 0xd5, 0x32, 0x95, 0x01, 0x50, 0x28, 0xce,
- 0x6c, 0xb9, 0x01, 0x50, 0x11, 0xcd, 0x76, 0xef, 0x01, 0x50, 0x09, 0xcc,
- 0x8a, 0x40, 0x01, 0x50, 0x00, 0xc4, 0x22, 0x71, 0x00, 0x3e, 0x49, 0xc5,
- 0x01, 0xdb, 0x00, 0x3e, 0x41, 0x15, 0xc1, 0x22, 0x8b, 0x08, 0xc1, 0x22,
- 0x97, 0x16, 0xc1, 0x22, 0xa3, 0xc3, 0x01, 0xb4, 0x00, 0x3e, 0x09, 0xc4,
- 0x15, 0xd3, 0x00, 0x3e, 0x00, 0x0c, 0xc1, 0x22, 0xaf, 0x90, 0x00, 0x3e,
- 0x93, 0x01, 0x22, 0xb9, 0xc2, 0x1d, 0x5f, 0x00, 0x3f, 0x31, 0xc2, 0x01,
- 0x09, 0x00, 0x3f, 0x29, 0xc2, 0x00, 0xa4, 0x00, 0x3f, 0x21, 0xc2, 0x00,
- 0xad, 0x00, 0x3f, 0x09, 0xc2, 0x00, 0xc7, 0x00, 0x3e, 0xf9, 0xc2, 0x04,
- 0x2b, 0x00, 0x3e, 0xf1, 0xc2, 0x00, 0x67, 0x00, 0x3e, 0xe9, 0xc3, 0x9e,
- 0xb0, 0x00, 0x3e, 0xe1, 0xc2, 0x0c, 0x65, 0x00, 0x3e, 0xd9, 0x14, 0xc1,
- 0x22, 0xc9, 0xc2, 0x0b, 0xc6, 0x00, 0x3e, 0xc3, 0x01, 0x22, 0xd3, 0xc3,
- 0x1b, 0xb6, 0x00, 0x3e, 0xb9, 0xc2, 0x00, 0xb3, 0x00, 0x3e, 0xa9, 0xc2,
- 0x03, 0xa4, 0x00, 0x3e, 0xa1, 0xc2, 0x00, 0xde, 0x00, 0x3e, 0x99, 0x91,
- 0x00, 0x3e, 0x83, 0x01, 0x22, 0xd9, 0x97, 0x00, 0x3e, 0x71, 0x87, 0x00,
- 0x3e, 0x6b, 0x01, 0x22, 0xdd, 0x8b, 0x00, 0x3e, 0x61, 0x83, 0x00, 0x3e,
- 0x50, 0xd0, 0x5e, 0x12, 0x00, 0x3f, 0x99, 0xd1, 0x57, 0x8e, 0x00, 0x3f,
- 0x91, 0x45, 0x2b, 0xd3, 0xc1, 0x22, 0xe1, 0x46, 0x2b, 0xff, 0x41, 0x22,
- 0xf9, 0xc6, 0x55, 0xbd, 0x0f, 0xd3, 0x59, 0xc5, 0xda, 0x61, 0x0f, 0xd3,
- 0x60, 0xc6, 0x55, 0xbd, 0x0f, 0xd3, 0x21, 0xc5, 0xda, 0x61, 0x0f, 0xd3,
- 0x28, 0xc8, 0xbf, 0x65, 0x0f, 0xcd, 0x81, 0xca, 0x9b, 0xb8, 0x0f, 0xcd,
- 0x89, 0xc4, 0xe3, 0x1f, 0x0f, 0xcd, 0x91, 0xca, 0xa8, 0xce, 0x0f, 0xcd,
- 0x98, 0xa3, 0x0f, 0x9f, 0xf9, 0xa2, 0x0f, 0x9f, 0xf1, 0xa1, 0x0f, 0x9f,
- 0xe9, 0xa0, 0x0f, 0x9f, 0xe1, 0xc3, 0xe7, 0x78, 0x0f, 0x9f, 0xd8, 0xc3,
- 0x0f, 0x21, 0x01, 0x10, 0x2b, 0x01, 0x23, 0x0b, 0xc4, 0x9f, 0x7e, 0x0f,
- 0xae, 0x63, 0x01, 0x23, 0x11, 0xc8, 0xbf, 0x35, 0x0f, 0xae, 0x59, 0x10,
- 0x41, 0x23, 0x15, 0x42, 0x00, 0x9a, 0x41, 0x23, 0x24, 0x43, 0x00, 0xf3,
- 0xc1, 0x23, 0x30, 0xd0, 0x5b, 0x32, 0x0f, 0xcd, 0xd8, 0xcf, 0x61, 0xeb,
- 0x09, 0xa2, 0xab, 0x01, 0x23, 0x3c, 0xd2, 0x4b, 0x9c, 0x09, 0xa2, 0x01,
- 0x1d, 0x41, 0x23, 0x42, 0xcd, 0x7d, 0x07, 0x09, 0xa2, 0x31, 0x1d, 0x41,
- 0x23, 0x56, 0xcd, 0x79, 0x45, 0x09, 0xa2, 0x29, 0x1d, 0x41, 0x23, 0x62,
- 0x44, 0x03, 0x2c, 0xc1, 0x23, 0x72, 0xd0, 0x5a, 0xd2, 0x09, 0xa1, 0x89,
- 0x42, 0xce, 0x77, 0x41, 0x23, 0x7e, 0xc8, 0x79, 0x4a, 0x09, 0xa2, 0x19,
- 0x42, 0xce, 0x77, 0x41, 0x23, 0xa1, 0xc9, 0xb2, 0x85, 0x09, 0xa2, 0x09,
- 0x1d, 0x41, 0x23, 0xc7, 0x43, 0x00, 0x58, 0xc1, 0x23, 0xdf, 0x1d, 0x41,
- 0x23, 0xf1, 0x45, 0x3c, 0xb7, 0xc1, 0x24, 0x01, 0x42, 0xce, 0x77, 0x41,
- 0x24, 0x13, 0x49, 0xad, 0xa2, 0xc1, 0x24, 0x3a, 0x1d, 0x41, 0x24, 0x52,
- 0xcd, 0x77, 0xcc, 0x09, 0xa1, 0xb1, 0x1d, 0x41, 0x24, 0x5a, 0xce, 0x6c,
- 0xd5, 0x09, 0xa1, 0x81, 0x1d, 0x41, 0x24, 0x72, 0x42, 0xc5, 0x7d, 0xc1,
- 0x24, 0x8b, 0x1d, 0x41, 0x24, 0x9b, 0x1e, 0xc1, 0x24, 0xbd, 0x1d, 0x41,
- 0x24, 0xdf, 0xa5, 0x09, 0x9f, 0x19, 0xa4, 0x09, 0x9f, 0x11, 0xa3, 0x09,
- 0x9f, 0x09, 0xa2, 0x09, 0x9f, 0x01, 0xa1, 0x09, 0x9e, 0xf9, 0xa0, 0x09,
- 0x9e, 0xf1, 0x9f, 0x09, 0x9e, 0xe9, 0x9e, 0x09, 0x9e, 0xda, 0x01, 0x25,
- 0x0f, 0xa5, 0x09, 0x9e, 0xcb, 0x01, 0x25, 0x13, 0xa4, 0x09, 0x9e, 0xc1,
- 0xa3, 0x09, 0x9e, 0xb3, 0x01, 0x25, 0x17, 0xa2, 0x09, 0x9e, 0xa9, 0xa1,
- 0x09, 0x9e, 0x93, 0x01, 0x25, 0x1b, 0xa0, 0x09, 0x9e, 0x89, 0x9f, 0x09,
- 0x9e, 0x81, 0x9e, 0x09, 0x9e, 0x78, 0x21, 0xc1, 0x25, 0x23, 0x20, 0xc1,
- 0x25, 0x2f, 0x1f, 0xc1, 0x25, 0x5a, 0x1e, 0xc1, 0x25, 0x88, 0x1d, 0x41,
- 0x25, 0xb0, 0x21, 0xc1, 0x25, 0xd7, 0x20, 0xc1, 0x25, 0xf3, 0x1f, 0xc1,
- 0x26, 0x1e, 0x1e, 0xc1, 0x26, 0x49, 0x1d, 0x41, 0x26, 0x77, 0x1f, 0xc1,
- 0x26, 0xa1, 0x1e, 0xc1, 0x26, 0xc9, 0x1d, 0x41, 0x26, 0xf7, 0xa4, 0x09,
- 0x95, 0x71, 0xa3, 0x09, 0x95, 0x69, 0xa2, 0x09, 0x95, 0x61, 0xa1, 0x09,
- 0x95, 0x59, 0xa0, 0x09, 0x95, 0x51, 0x9f, 0x09, 0x95, 0x49, 0x9e, 0x09,
- 0x95, 0x40, 0x1e, 0xc1, 0x27, 0x21, 0x1d, 0x41, 0x27, 0x29, 0x42, 0xdd,
- 0xf3, 0xc1, 0x27, 0x53, 0x42, 0xe8, 0x10, 0xc1, 0x27, 0x5f, 0x1d, 0x41,
- 0x27, 0x6d, 0xa5, 0x09, 0x8d, 0x11, 0xa4, 0x09, 0x8d, 0x09, 0xa3, 0x09,
- 0x8d, 0x01, 0xa2, 0x09, 0x8c, 0xf9, 0xa1, 0x09, 0x8c, 0xf1, 0xa0, 0x09,
- 0x8c, 0xe9, 0x9f, 0x09, 0x8c, 0xe1, 0x9e, 0x09, 0x8c, 0xd8, 0x22, 0xc1,
- 0x27, 0x81, 0x21, 0xc1, 0x27, 0x95, 0x20, 0xc1, 0x27, 0xc3, 0x1f, 0xc1,
- 0x27, 0xf1, 0x1e, 0xc1, 0x28, 0x1f, 0x1d, 0x41, 0x28, 0x4a, 0x23, 0xc1,
- 0x28, 0x74, 0x22, 0xc1, 0x28, 0x97, 0x21, 0xc1, 0x28, 0xc8, 0x20, 0xc1,
- 0x28, 0xf6, 0x1f, 0xc1, 0x29, 0x24, 0x1e, 0xc1, 0x29, 0x4f, 0x1d, 0x41,
- 0x29, 0x77, 0x1f, 0xc1, 0x29, 0x9e, 0x1e, 0xc1, 0x29, 0xb2, 0x1d, 0x41,
- 0x29, 0xdd, 0x4c, 0x81, 0x70, 0xc1, 0x2a, 0x04, 0xd2, 0x4d, 0xca, 0x0f,
- 0xa3, 0xe8, 0xc4, 0x22, 0x71, 0x00, 0x37, 0xc9, 0xc5, 0x01, 0xdb, 0x00,
- 0x37, 0xc1, 0x15, 0xc1, 0x2a, 0x1a, 0x08, 0xc1, 0x2a, 0x26, 0x16, 0xc1,
- 0x2a, 0x32, 0xc3, 0x01, 0xb4, 0x00, 0x37, 0x89, 0xc4, 0x15, 0xd3, 0x00,
- 0x37, 0x80, 0xcd, 0x2c, 0x41, 0x01, 0x02, 0x49, 0xc4, 0x00, 0xba, 0x00,
- 0x01, 0x08, 0x09, 0xc1, 0x2a, 0x3e, 0x0a, 0xc1, 0x2a, 0x70, 0x04, 0xc1,
- 0x2a, 0x91, 0x05, 0xc1, 0x2a, 0xb6, 0x06, 0xc1, 0x2a, 0xe1, 0x16, 0xc1,
- 0x2b, 0x0c, 0x0e, 0xc1, 0x2b, 0x41, 0x0f, 0xc1, 0x2b, 0x64, 0x15, 0xc1,
- 0x2b, 0x8b, 0x14, 0xc1, 0x2b, 0xba, 0x13, 0xc1, 0x2b, 0xe3, 0x18, 0xc1,
- 0x2c, 0x0c, 0x1a, 0xc1, 0x2c, 0x2c, 0x10, 0xc1, 0x2c, 0x51, 0x0d, 0xc1,
- 0x2c, 0x78, 0x19, 0xc1, 0x2c, 0xa1, 0x12, 0xc1, 0x2c, 0xbe, 0x1c, 0xc1,
- 0x2c, 0xe3, 0x1b, 0xc1, 0x2d, 0x0e, 0x0c, 0xc1, 0x2d, 0x2b, 0x08, 0x41,
- 0x2d, 0x4e, 0xca, 0x25, 0xec, 0x00, 0x9b, 0x01, 0xc7, 0x4f, 0x31, 0x00,
- 0x9b, 0x20, 0x47, 0x11, 0x39, 0xc1, 0x2d, 0x72, 0xc2, 0x00, 0xad, 0x00,
- 0x9b, 0x18, 0xc2, 0x01, 0x47, 0x00, 0x9b, 0x51, 0xc4, 0x04, 0x5e, 0x00,
- 0x9b, 0x58, 0xc3, 0x06, 0x9e, 0x00, 0x9b, 0x61, 0xc3, 0x0c, 0x5b, 0x00,
- 0x9b, 0x68, 0xc2, 0x26, 0x51, 0x00, 0x9b, 0x71, 0xc4, 0x18, 0x83, 0x00,
- 0x9b, 0x78, 0xc2, 0x00, 0x4d, 0x00, 0x9b, 0x93, 0x01, 0x2d, 0x7e, 0xc5,
- 0x2a, 0x13, 0x00, 0x9b, 0x99, 0xc5, 0x0c, 0x54, 0x00, 0x9b, 0xa0, 0xc4,
- 0x47, 0x9b, 0x00, 0x9b, 0xa9, 0xc4, 0x43, 0xcc, 0x00, 0x9b, 0xb0, 0xc4,
- 0xcf, 0xf7, 0x00, 0x9b, 0xb9, 0xc6, 0x18, 0x83, 0x00, 0x9b, 0xc0, 0xc4,
- 0xda, 0x49, 0x00, 0x9c, 0x8b, 0x01, 0x2d, 0x84, 0xc4, 0xe2, 0xa3, 0x00,
- 0x9c, 0xa0, 0xc4, 0x5a, 0xa6, 0x00, 0x9c, 0xa9, 0xc3, 0x56, 0x08, 0x00,
- 0x9c, 0xc8, 0x00, 0x41, 0x2d, 0x8a, 0xcf, 0x46, 0x5f, 0x01, 0x1f, 0x39,
- 0x00, 0x41, 0x2d, 0x96, 0x16, 0xc1, 0x2d, 0xae, 0x15, 0xc1, 0x2d, 0xba,
- 0xc4, 0x5d, 0xe2, 0x08, 0x7f, 0x99, 0xc4, 0xbf, 0xb9, 0x08, 0x7f, 0x91,
- 0xc2, 0x00, 0x27, 0x08, 0x7f, 0x81, 0xc3, 0x1f, 0xd8, 0x08, 0x7f, 0x69,
- 0xc3, 0x0b, 0x0e, 0x08, 0x7f, 0x61, 0xc6, 0xd0, 0x5d, 0x08, 0x7f, 0x59,
- 0xc4, 0xe2, 0x57, 0x08, 0x7f, 0x51, 0xc4, 0x4b, 0x98, 0x08, 0x7f, 0x49,
- 0xc2, 0x01, 0xf0, 0x08, 0x7f, 0x23, 0x01, 0x2d, 0xc4, 0xc5, 0x4b, 0x92,
- 0x08, 0x7f, 0x31, 0xc3, 0x78, 0xa9, 0x08, 0x7f, 0x29, 0xc6, 0x45, 0xf6,
- 0x08, 0x7f, 0x19, 0xc5, 0xa1, 0x94, 0x08, 0x7f, 0x11, 0xc4, 0xe4, 0x8f,
- 0x08, 0x7f, 0x09, 0x03, 0x41, 0x2d, 0xca, 0x87, 0x08, 0x28, 0x11, 0xc2,
- 0x01, 0xf0, 0x08, 0x28, 0x18, 0x87, 0x08, 0x28, 0x21, 0xc2, 0x01, 0xf0,
- 0x08, 0x28, 0x30, 0xc2, 0x00, 0x06, 0x08, 0x28, 0x29, 0x87, 0x08, 0x28,
- 0x99, 0x83, 0x08, 0x28, 0xa1, 0xc2, 0x1b, 0xa5, 0x08, 0x28, 0xa8, 0x8b,
- 0x08, 0x28, 0x38, 0x87, 0x08, 0x28, 0x51, 0xc2, 0x1b, 0xa5, 0x08, 0x28,
- 0x59, 0x0a, 0x41, 0x2d, 0xd6, 0x87, 0x08, 0x28, 0x79, 0xc2, 0x01, 0xf0,
- 0x08, 0x29, 0x38, 0x87, 0x08, 0x28, 0x81, 0xc2, 0x00, 0x54, 0x08, 0x28,
- 0x88, 0x87, 0x08, 0x28, 0xc9, 0xc2, 0x00, 0x9c, 0x08, 0x28, 0xd0, 0x87,
- 0x08, 0x28, 0xd9, 0xc2, 0x01, 0xf0, 0x08, 0x28, 0xe0, 0x87, 0x08, 0x28,
- 0xe9, 0xc2, 0x01, 0xf0, 0x08, 0x28, 0xf0, 0x87, 0x08, 0x29, 0x19, 0xc2,
- 0x01, 0xf0, 0x08, 0x29, 0x20, 0xe0, 0x0b, 0x67, 0x01, 0x3a, 0x50, 0xdf,
- 0x0c, 0xa4, 0x01, 0x3a, 0x09, 0x47, 0x08, 0xea, 0x41, 0x2d, 0xe0, 0xc9,
- 0xb0, 0x3c, 0x0f, 0xac, 0x21, 0xd5, 0x36, 0xc4, 0x0f, 0xa7, 0x48, 0x43,
- 0x06, 0xe0, 0xc1, 0x2d, 0xf2, 0xc6, 0x06, 0x1b, 0x00, 0x00, 0xc9, 0x16,
- 0xc1, 0x2d, 0xfe, 0xc4, 0x00, 0x8a, 0x00, 0x00, 0x51, 0xcd, 0x7c, 0x6b,
- 0x00, 0x04, 0x39, 0xcc, 0x8d, 0x04, 0x00, 0x04, 0xb8, 0xc6, 0x01, 0x7a,
- 0x01, 0x4f, 0x99, 0xc7, 0x3f, 0x2e, 0x01, 0x4f, 0x89, 0xc6, 0x07, 0x09,
- 0x01, 0x4f, 0x78, 0xc6, 0x01, 0x7a, 0x01, 0x4f, 0x91, 0xc7, 0x3f, 0x2e,
- 0x01, 0x4f, 0x81, 0xc6, 0x07, 0x09, 0x01, 0x4f, 0x70, 0x43, 0x02, 0x18,
- 0xc1, 0x2e, 0x0d, 0xcf, 0x6a, 0xd3, 0x01, 0x16, 0xa8, 0xc5, 0x37, 0x91,
- 0x01, 0x12, 0xa9, 0xc4, 0x00, 0xcb, 0x00, 0x01, 0xeb, 0x01, 0x2e, 0x19,
- 0xcd, 0x77, 0x3d, 0x01, 0x53, 0x70, 0xc2, 0x02, 0x18, 0x01, 0x12, 0x69,
- 0xd4, 0x38, 0x7d, 0x01, 0x53, 0xc0, 0xcb, 0x93, 0x08, 0x0f, 0x9f, 0x21,
- 0xc6, 0xd3, 0x1b, 0x0f, 0x9f, 0x80, 0xc4, 0x22, 0x71, 0x08, 0xed, 0x49,
- 0xc5, 0x01, 0xdb, 0x08, 0xed, 0x41, 0x15, 0xc1, 0x2e, 0x1d, 0x08, 0xc1,
- 0x2e, 0x29, 0x16, 0xc1, 0x2e, 0x35, 0xc3, 0x01, 0xb4, 0x08, 0xed, 0x09,
- 0xc4, 0x15, 0xd3, 0x08, 0xed, 0x00, 0xc5, 0x1e, 0x24, 0x08, 0xec, 0xb9,
- 0x4a, 0x6f, 0xcd, 0x41, 0x2e, 0x41, 0xc7, 0x44, 0x79, 0x08, 0xec, 0xb1,
- 0xc8, 0x11, 0x40, 0x08, 0xec, 0xa8, 0xc2, 0x0c, 0x65, 0x08, 0xec, 0x49,
- 0xc2, 0x02, 0x59, 0x08, 0xec, 0x41, 0xc2, 0x00, 0xa4, 0x08, 0xec, 0x39,
- 0x12, 0xc1, 0x2e, 0x5f, 0x10, 0xc1, 0x2e, 0x69, 0x06, 0xc1, 0x2e, 0x73,
- 0x0c, 0xc1, 0x2e, 0x81, 0x0e, 0xc1, 0x2e, 0x8b, 0x16, 0xc1, 0x2e, 0x95,
- 0x05, 0xc1, 0x2e, 0xa3, 0x09, 0xc1, 0x2e, 0xad, 0x0d, 0xc1, 0x2e, 0xb7,
- 0xc2, 0x00, 0xad, 0x08, 0xeb, 0x81, 0x04, 0xc1, 0x2e, 0xc1, 0xc2, 0x04,
- 0x41, 0x08, 0xeb, 0x69, 0xc2, 0x1d, 0x5f, 0x08, 0xeb, 0x61, 0x83, 0x08,
- 0xeb, 0x03, 0x01, 0x2e, 0xcb, 0xc2, 0x02, 0x84, 0x08, 0xeb, 0x51, 0xc2,
- 0x00, 0x4c, 0x08, 0xeb, 0x39, 0x97, 0x08, 0xeb, 0x23, 0x01, 0x2e, 0xd7,
- 0x8b, 0x08, 0xeb, 0x12, 0x01, 0x2e, 0xdb, 0xca, 0xa6, 0xbc, 0x00, 0x50,
- 0x09, 0xc5, 0x60, 0x92, 0x00, 0x50, 0x11, 0x42, 0x03, 0x32, 0xc1, 0x2e,
- 0xdf, 0xc5, 0x35, 0x00, 0x00, 0x51, 0xe1, 0xc5, 0xd6, 0x0b, 0x00, 0x52,
- 0x89, 0xc6, 0xcd, 0xab, 0x00, 0x53, 0xa8, 0x83, 0x00, 0x50, 0x2b, 0x01,
- 0x2e, 0xeb, 0x8b, 0x00, 0x50, 0x3b, 0x01, 0x2e, 0xf7, 0x97, 0x00, 0x50,
- 0x4b, 0x01, 0x2e, 0xfb, 0xc2, 0x00, 0x4c, 0x00, 0x50, 0x79, 0xc2, 0x02,
- 0x84, 0x00, 0x50, 0x99, 0x0d, 0xc1, 0x2e, 0xff, 0x09, 0xc1, 0x2f, 0x07,
- 0x10, 0xc1, 0x2f, 0x0f, 0x05, 0xc1, 0x2f, 0x25, 0x0c, 0xc1, 0x2f, 0x2f,
- 0x16, 0xc1, 0x2f, 0x39, 0x06, 0xc1, 0x2f, 0x47, 0x12, 0xc1, 0x2f, 0x55,
- 0x04, 0xc1, 0x2f, 0x5f, 0xc2, 0x00, 0xad, 0x00, 0x51, 0x71, 0xc2, 0x1d,
- 0x5f, 0x00, 0x51, 0x79, 0x14, 0xc1, 0x2f, 0x69, 0x0e, 0xc1, 0x2f, 0x73,
- 0xc2, 0x04, 0x41, 0x00, 0x51, 0xa9, 0x15, 0xc1, 0x2f, 0x7d, 0xc2, 0x00,
- 0xa4, 0x00, 0x51, 0xc9, 0xc2, 0x02, 0xb4, 0x00, 0x52, 0xd9, 0xc2, 0x00,
- 0x67, 0x00, 0x52, 0xf0, 0x03, 0xc1, 0x2f, 0x87, 0x8b, 0x00, 0x51, 0xfb,
- 0x01, 0x2f, 0x93, 0x97, 0x00, 0x52, 0x0b, 0x01, 0x2f, 0x97, 0xc2, 0x00,
- 0x4c, 0x00, 0x52, 0x39, 0xc2, 0x02, 0x84, 0x00, 0x52, 0x58, 0xc4, 0x15,
- 0xd3, 0x00, 0x53, 0x31, 0xc3, 0x01, 0xb4, 0x00, 0x53, 0x39, 0x16, 0xc1,
- 0x2f, 0x9b, 0x08, 0xc1, 0x2f, 0xa7, 0x15, 0xc1, 0x2f, 0xb3, 0xc5, 0x01,
- 0xdb, 0x00, 0x53, 0x71, 0xc4, 0x22, 0x71, 0x00, 0x53, 0x78, 0xc4, 0xe4,
- 0xc7, 0x00, 0x53, 0x89, 0xd0, 0x50, 0x41, 0x00, 0x53, 0xb0, 0x05, 0xc1,
- 0x2f, 0xbf, 0x03, 0xc1, 0x2f, 0xcb, 0x42, 0x03, 0x32, 0xc1, 0x2f, 0xd7,
- 0xc5, 0x35, 0x00, 0x00, 0x55, 0xe1, 0x15, 0xc1, 0x2f, 0xe3, 0xc6, 0xd4,
- 0x65, 0x00, 0x57, 0xe1, 0x16, 0x41, 0x2f, 0xef, 0x83, 0x00, 0x54, 0x2b,
- 0x01, 0x2f, 0xfb, 0x8b, 0x00, 0x54, 0x3b, 0x01, 0x30, 0x07, 0x97, 0x00,
- 0x54, 0x4b, 0x01, 0x30, 0x0b, 0x18, 0xc1, 0x30, 0x0f, 0x87, 0x00, 0x54,
- 0x79, 0x91, 0x00, 0x54, 0x99, 0x0d, 0xc1, 0x30, 0x19, 0x09, 0xc1, 0x30,
- 0x23, 0x10, 0xc1, 0x30, 0x2d, 0x05, 0xc1, 0x30, 0x43, 0x0c, 0xc1, 0x30,
- 0x4d, 0x16, 0xc1, 0x30, 0x57, 0x06, 0xc1, 0x30, 0x65, 0x12, 0xc1, 0x30,
- 0x73, 0x04, 0xc1, 0x30, 0x7d, 0xc2, 0x00, 0xad, 0x00, 0x55, 0x71, 0xc2,
- 0x1d, 0x5f, 0x00, 0x55, 0x79, 0xc2, 0x02, 0x59, 0x00, 0x55, 0x81, 0x0e,
- 0xc1, 0x30, 0x87, 0x15, 0xc1, 0x30, 0x91, 0xc2, 0x00, 0xa4, 0x00, 0x55,
- 0xc9, 0xc3, 0xaf, 0x4c, 0x00, 0x57, 0xc8, 0x47, 0xc9, 0x91, 0xc1, 0x30,
- 0xa1, 0x45, 0x00, 0xcb, 0x41, 0x30, 0xa9, 0xc4, 0x15, 0xd3, 0x00, 0x57,
- 0x31, 0xc3, 0x01, 0xb4, 0x00, 0x57, 0x39, 0x16, 0xc1, 0x30, 0xcf, 0x08,
- 0xc1, 0x30, 0xdb, 0x15, 0xc1, 0x30, 0xe7, 0xc5, 0x01, 0xdb, 0x00, 0x57,
- 0x71, 0xc4, 0x22, 0x71, 0x00, 0x57, 0x78, 0xc5, 0xde, 0x2b, 0x08, 0x19,
- 0xa1, 0xc3, 0xa5, 0xa2, 0x08, 0x19, 0x80, 0xc3, 0xb9, 0x91, 0x08, 0x19,
- 0xb1, 0xc4, 0xe2, 0x0f, 0x08, 0x1a, 0x38, 0xc3, 0x8e, 0xaf, 0x08, 0x19,
- 0xb9, 0xc4, 0x73, 0xef, 0x08, 0x1a, 0x40, 0xc5, 0xd6, 0x38, 0x08, 0x19,
- 0xc1, 0xc4, 0xe4, 0x5b, 0x08, 0x1a, 0x20, 0xc5, 0xdd, 0xd6, 0x08, 0x19,
- 0xe9, 0x43, 0x00, 0x8b, 0x41, 0x30, 0xf3, 0x42, 0x00, 0xbd, 0xc1, 0x30,
- 0xff, 0x42, 0x00, 0x4e, 0x41, 0x31, 0x69, 0x04, 0xc1, 0x31, 0x81, 0xd5,
- 0x35, 0xb3, 0x01, 0x16, 0xd9, 0x45, 0x00, 0x6c, 0xc1, 0x31, 0x8d, 0x11,
- 0xc1, 0x31, 0x9f, 0x03, 0xc1, 0x31, 0xab, 0xc4, 0x00, 0xcb, 0x00, 0x01,
- 0xf1, 0xcf, 0x68, 0x99, 0x01, 0x55, 0x3a, 0x01, 0x31, 0xb7, 0x4b, 0x6f,
- 0xcc, 0xc1, 0x31, 0xbd, 0x47, 0x02, 0x90, 0xc1, 0x31, 0xe1, 0x45, 0x00,
- 0xcb, 0xc1, 0x32, 0x4a, 0xce, 0x75, 0x87, 0x08, 0x9a, 0xb9, 0xc2, 0x00,
- 0x3a, 0x08, 0x9a, 0x80, 0xc4, 0x00, 0x67, 0x0f, 0xb0, 0x03, 0x01, 0x32,
- 0x64, 0xda, 0x19, 0xb7, 0x0f, 0xb1, 0xc0, 0xc9, 0x19, 0xf5, 0x00, 0x00,
- 0xe9, 0xc4, 0x00, 0xba, 0x01, 0x5e, 0x90, 0xc8, 0xb9, 0x45, 0x01, 0x37,
- 0x71, 0xc7, 0xcb, 0x43, 0x01, 0x37, 0x68, 0x48, 0x07, 0x5a, 0xc1, 0x32,
- 0x6a, 0xcb, 0x97, 0xcd, 0x01, 0x11, 0xd0, 0x58, 0x26, 0x28, 0xc1, 0x32,
- 0x76, 0x4f, 0x07, 0x17, 0xc1, 0x32, 0xfc, 0x47, 0x02, 0x90, 0xc1, 0x33,
- 0x80, 0xd3, 0x42, 0x33, 0x00, 0x87, 0xd9, 0x4d, 0x26, 0xea, 0x41, 0x34,
- 0x06, 0xc8, 0x40, 0x9c, 0x0f, 0xb6, 0x50, 0x4f, 0x07, 0x17, 0xc1, 0x34,
- 0x8a, 0x4d, 0x26, 0xea, 0x41, 0x34, 0xf3, 0xc4, 0xe4, 0x9f, 0x0f, 0xa6,
- 0xc9, 0xc5, 0x1d, 0x53, 0x0f, 0xcf, 0x08, 0x45, 0x00, 0xcb, 0xc1, 0x35,
- 0x5c, 0x47, 0x02, 0x90, 0xc1, 0x35, 0x78, 0x4b, 0x6f, 0xcc, 0xc1, 0x35,
- 0xdf, 0x03, 0xc1, 0x35, 0xff, 0x46, 0x06, 0x97, 0xc1, 0x36, 0x0b, 0xc6,
- 0xcf, 0xbb, 0x00, 0x5b, 0x81, 0x49, 0x52, 0xd7, 0x41, 0x36, 0x2f, 0xc5,
- 0xd4, 0x89, 0x0f, 0x69, 0xe9, 0xc4, 0x03, 0x2b, 0x0f, 0x69, 0xe0, 0x16,
- 0xc1, 0x36, 0x3b, 0x08, 0xc1, 0x36, 0x4c, 0xc3, 0x01, 0xb4, 0x0f, 0x68,
- 0x0b, 0x01, 0x36, 0x54, 0x15, 0xc1, 0x36, 0x58, 0xc5, 0x01, 0xdb, 0x0f,
- 0x68, 0x43, 0x01, 0x36, 0x6a, 0xc4, 0x22, 0x71, 0x0f, 0x68, 0x4a, 0x01,
- 0x36, 0x75, 0x16, 0xc1, 0x36, 0x82, 0x08, 0xc1, 0x36, 0x9a, 0x15, 0xc1,
- 0x36, 0xa9, 0xc5, 0x01, 0xdb, 0x0f, 0x69, 0xa9, 0xc4, 0x22, 0x71, 0x0f,
- 0x69, 0xb0, 0x44, 0x01, 0xb8, 0xc1, 0x36, 0xb8, 0xcc, 0x87, 0x28, 0x0f,
- 0xad, 0x78, 0x00, 0xc1, 0x36, 0xc4, 0x02, 0x41, 0x36, 0xec, 0xc5, 0xd7,
- 0x5f, 0x0f, 0xad, 0xc0, 0x48, 0xbc, 0x6d, 0xc1, 0x36, 0xf8, 0x47, 0xc8,
- 0x17, 0xc1, 0x37, 0x04, 0x42, 0x05, 0x7b, 0xc1, 0x37, 0x16, 0x4a, 0xa0,
- 0xa4, 0xc1, 0x37, 0x22, 0x4e, 0x71, 0x19, 0xc1, 0x37, 0x34, 0x4e, 0x74,
- 0x61, 0xc1, 0x37, 0x40, 0xc3, 0x1e, 0x7b, 0x0f, 0xae, 0xe9, 0x43, 0x00,
- 0x27, 0xc1, 0x37, 0x4c, 0x47, 0xca, 0xe1, 0x41, 0x37, 0x56, 0xc5, 0x2a,
- 0x69, 0x0f, 0xa3, 0xa9, 0xc3, 0x14, 0x99, 0x0f, 0xa3, 0xa1, 0xc5, 0xdc,
- 0x64, 0x0f, 0xce, 0x98, 0x4b, 0x10, 0x7f, 0xc1, 0x37, 0x62, 0xc7, 0xc9,
- 0x36, 0x00, 0xe3, 0xe0, 0xd1, 0x57, 0x5b, 0x00, 0xe3, 0xd1, 0xc8, 0xbd,
- 0x9d, 0x00, 0xe3, 0xc0, 0x11, 0xc1, 0x37, 0x6e, 0x0e, 0xc1, 0x37, 0x80,
- 0x07, 0xc1, 0x37, 0x97, 0x17, 0xc1, 0x37, 0xab, 0x0b, 0xc1, 0x37, 0xbd,
- 0x03, 0x41, 0x37, 0xcf, 0xc4, 0x22, 0x71, 0x00, 0xe2, 0xc9, 0xc5, 0x01,
- 0xdb, 0x00, 0xe2, 0xc1, 0x15, 0xc1, 0x37, 0xe5, 0x08, 0xc1, 0x37, 0xf1,
- 0x16, 0xc1, 0x37, 0xfd, 0xc3, 0x01, 0xb4, 0x00, 0xe2, 0x89, 0xc4, 0x15,
- 0xd3, 0x00, 0xe2, 0x80, 0xca, 0x21, 0x3e, 0x01, 0x39, 0x69, 0xcb, 0x90,
- 0x32, 0x01, 0x38, 0xf9, 0xcb, 0x5a, 0x67, 0x01, 0x38, 0xc9, 0xca, 0x29,
- 0x30, 0x01, 0x34, 0xe8, 0xcf, 0x67, 0xa9, 0x01, 0x22, 0x51, 0xc3, 0x00,
- 0xe4, 0x01, 0x22, 0x40, 0xd6, 0x2f, 0x43, 0x01, 0x22, 0x49, 0xc4, 0x63,
- 0x04, 0x01, 0x22, 0x08, 0xd9, 0x20, 0x36, 0x01, 0x22, 0x31, 0xc6, 0xcc,
- 0x01, 0x01, 0x22, 0x29, 0xca, 0xa5, 0x4a, 0x01, 0x22, 0x20, 0xc4, 0x03,
- 0x68, 0x01, 0x4d, 0x39, 0xc2, 0x01, 0xc7, 0x01, 0x4d, 0x30, 0x45, 0x27,
- 0x5e, 0x41, 0x38, 0x09, 0xc5, 0xd9, 0x26, 0x00, 0xb4, 0xd1, 0x42, 0x00,
- 0x9d, 0xc1, 0x38, 0x15, 0x0b, 0xc1, 0x38, 0x27, 0x17, 0xc1, 0x38, 0x33,
- 0x11, 0xc1, 0x38, 0x43, 0xc4, 0x5f, 0xaf, 0x00, 0xb4, 0x81, 0xc4, 0xe0,
- 0xc3, 0x00, 0xb4, 0x79, 0x15, 0xc1, 0x38, 0x4d, 0x10, 0xc1, 0x38, 0x59,
- 0xc4, 0xe1, 0xd3, 0x00, 0xb4, 0x61, 0xc4, 0xe4, 0xeb, 0x00, 0xb4, 0x59,
- 0x05, 0xc1, 0x38, 0x65, 0xc5, 0xd5, 0x4d, 0x00, 0xb4, 0x41, 0xc4, 0xe4,
- 0xbf, 0x00, 0xb4, 0x39, 0xc5, 0xd5, 0xed, 0x00, 0xb4, 0x19, 0xc4, 0xe6,
- 0x0b, 0x00, 0xb4, 0x11, 0xc5, 0xde, 0x3a, 0x00, 0xb4, 0x08, 0x83, 0x08,
- 0x24, 0xb3, 0x01, 0x38, 0x71, 0xc2, 0x00, 0xde, 0x08, 0x24, 0x09, 0xc2,
- 0x00, 0xb3, 0x08, 0x24, 0x11, 0xc2, 0x24, 0x58, 0x08, 0x24, 0x19, 0xc2,
- 0x96, 0xd0, 0x08, 0x24, 0x21, 0x0d, 0xc1, 0x38, 0x7b, 0x06, 0xc1, 0x38,
- 0x87, 0xc2, 0x02, 0x59, 0x08, 0x24, 0x39, 0x15, 0xc1, 0x38, 0x93, 0xc4,
- 0xe4, 0x73, 0x08, 0x24, 0x59, 0xc2, 0x01, 0x29, 0x08, 0x24, 0x61, 0xc2,
- 0x00, 0x67, 0x08, 0x24, 0x69, 0xc4, 0xd6, 0xec, 0x08, 0x24, 0x71, 0xc4,
- 0xe2, 0x4f, 0x08, 0x24, 0x81, 0xc4, 0xe6, 0x3b, 0x08, 0x24, 0x89, 0xc4,
- 0xe5, 0x17, 0x08, 0x24, 0x91, 0xc3, 0x78, 0xa9, 0x08, 0x24, 0x99, 0xc2,
- 0x00, 0xa4, 0x08, 0x24, 0xa1, 0xc2, 0x1d, 0x5f, 0x08, 0x24, 0xa9, 0x87,
- 0x08, 0x24, 0xbb, 0x01, 0x38, 0x9d, 0x8b, 0x08, 0x24, 0xc1, 0x91, 0x08,
- 0x24, 0xcb, 0x01, 0x38, 0xa1, 0x97, 0x08, 0x24, 0xd0, 0xc4, 0x15, 0xd3,
- 0x08, 0x25, 0x01, 0xc3, 0x01, 0xb4, 0x08, 0x25, 0x09, 0x16, 0xc1, 0x38,
- 0xa5, 0x08, 0xc1, 0x38, 0xb1, 0x15, 0xc1, 0x38, 0xbd, 0xc5, 0x01, 0xdb,
- 0x08, 0x25, 0x41, 0xc4, 0x22, 0x71, 0x08, 0x25, 0x48, 0x83, 0x08, 0x25,
- 0x83, 0x01, 0x38, 0xc9, 0xc3, 0x02, 0x58, 0x08, 0x25, 0xa1, 0xc3, 0x1b,
- 0xb6, 0x08, 0x25, 0xa9, 0x87, 0x08, 0x25, 0xbb, 0x01, 0x38, 0xd4, 0x0a,
- 0xc1, 0x38, 0xde, 0x8b, 0x08, 0x25, 0xd9, 0x0d, 0xc1, 0x38, 0xe8, 0xc2,
- 0x00, 0xc7, 0x08, 0x25, 0xf9, 0xc2, 0x00, 0xad, 0x08, 0x26, 0x01, 0xc2,
- 0x00, 0xc1, 0x08, 0x26, 0x09, 0x91, 0x08, 0x26, 0x13, 0x01, 0x38, 0xf8,
- 0xc2, 0x03, 0xa4, 0x08, 0x26, 0x21, 0x15, 0xc1, 0x38, 0xfe, 0x16, 0xc1,
- 0x39, 0x08, 0xc3, 0x44, 0x76, 0x08, 0x26, 0x69, 0x97, 0x08, 0x26, 0x71,
- 0xc2, 0x01, 0x09, 0x08, 0x26, 0x79, 0xc3, 0x4a, 0xb1, 0x08, 0x26, 0x89,
- 0x1c, 0x41, 0x39, 0x10, 0x83, 0x08, 0x26, 0xc3, 0x01, 0x39, 0x1a, 0xc3,
- 0x02, 0x58, 0x08, 0x26, 0xe1, 0xc3, 0x1b, 0xb6, 0x08, 0x26, 0xe9, 0x87,
- 0x08, 0x26, 0xfb, 0x01, 0x39, 0x25, 0x0a, 0xc1, 0x39, 0x2f, 0x8b, 0x08,
- 0x27, 0x19, 0x0d, 0xc1, 0x39, 0x39, 0xc2, 0x00, 0xc7, 0x08, 0x27, 0x39,
- 0xc2, 0x00, 0xad, 0x08, 0x27, 0x41, 0xc2, 0x00, 0xc1, 0x08, 0x27, 0x49,
- 0x91, 0x08, 0x27, 0x53, 0x01, 0x39, 0x49, 0xc2, 0x03, 0xa4, 0x08, 0x27,
- 0x61, 0x15, 0xc1, 0x39, 0x4f, 0x16, 0xc1, 0x39, 0x59, 0xc3, 0x44, 0x76,
- 0x08, 0x27, 0xa9, 0x97, 0x08, 0x27, 0xb1, 0xc2, 0x01, 0x09, 0x08, 0x27,
- 0xb9, 0xc3, 0x4a, 0xb1, 0x08, 0x27, 0xc9, 0x1c, 0x41, 0x39, 0x61, 0x03,
- 0xc1, 0x39, 0x6b, 0x11, 0xc1, 0x39, 0x7d, 0xc8, 0xba, 0x75, 0x0e, 0x7a,
- 0xc2, 0x01, 0x39, 0x89, 0xc3, 0x6d, 0xbd, 0x0e, 0x7e, 0x09, 0x07, 0xc1,
- 0x39, 0x8f, 0xcf, 0x5d, 0x73, 0x0e, 0x7b, 0x59, 0xcb, 0x96, 0xe6, 0x0e,
- 0x7a, 0x98, 0xc5, 0xdf, 0x8e, 0x0e, 0x7e, 0x01, 0xc4, 0xe0, 0xb3, 0x0e,
- 0x7d, 0x7a, 0x01, 0x39, 0x9b, 0xc6, 0xac, 0xf1, 0x0e, 0x7d, 0xf9, 0xc5,
- 0xde, 0xd0, 0x0e, 0x7c, 0x21, 0x42, 0x14, 0xd5, 0xc1, 0x39, 0x9f, 0xc6,
- 0xd2, 0xa9, 0x0e, 0x7b, 0x71, 0xc5, 0x58, 0x8d, 0x0e, 0x7a, 0xa0, 0x16,
- 0xc1, 0x39, 0xae, 0xc8, 0xbd, 0x15, 0x0e, 0x7b, 0xeb, 0x01, 0x39, 0xc6,
- 0x49, 0xad, 0xfc, 0x41, 0x39, 0xca, 0x00, 0x41, 0x39, 0xe6, 0xc6, 0xad,
- 0xfe, 0x0e, 0x7c, 0x29, 0x03, 0x41, 0x39, 0xf2, 0xc2, 0x13, 0x91, 0x0e,
- 0x7c, 0x11, 0xd2, 0x48, 0x06, 0x0e, 0x7b, 0x60, 0xc5, 0xd2, 0xce, 0x0e,
- 0x7b, 0x79, 0xc8, 0x48, 0x10, 0x0e, 0x7a, 0xd8, 0x4c, 0x86, 0x8c, 0xc1,
- 0x39, 0xfe, 0xcb, 0x95, 0xbd, 0x0e, 0x7b, 0x31, 0xc8, 0x4d, 0xc2, 0x0e,
- 0x7b, 0x29, 0xc9, 0xaa, 0x5d, 0x0e, 0x7b, 0x21, 0xc8, 0xbb, 0x05, 0x0e,
- 0x7b, 0x18, 0x16, 0xc1, 0x3a, 0x16, 0xc6, 0xbd, 0xc7, 0x0e, 0x7b, 0x09,
- 0xc7, 0xc4, 0xcf, 0x0e, 0x7b, 0x01, 0xc5, 0xdf, 0x3e, 0x0e, 0x7a, 0xf0,
- 0xa0, 0x0e, 0x7a, 0x19, 0x9f, 0x0e, 0x7a, 0x10, 0x0d, 0xc1, 0x3a, 0x22,
- 0x05, 0xc1, 0x3a, 0x37, 0x06, 0xc1, 0x3a, 0x46, 0x16, 0xc1, 0x3a, 0x52,
- 0x15, 0xc1, 0x3a, 0x64, 0x11, 0xc1, 0x3a, 0x7c, 0x42, 0x01, 0x8a, 0xc1,
- 0x3a, 0x8c, 0x1c, 0xc1, 0x3a, 0x96, 0x42, 0x02, 0x59, 0xc1, 0x3a, 0xa0,
- 0xc5, 0xdc, 0x69, 0x0e, 0x79, 0x39, 0xc6, 0xd4, 0x77, 0x0e, 0x79, 0x29,
- 0xc7, 0xc7, 0x0d, 0x0e, 0x79, 0x21, 0x48, 0xbe, 0x35, 0xc1, 0x3a, 0xac,
- 0x4d, 0x7f, 0xf9, 0xc1, 0x3a, 0xb8, 0x47, 0xc3, 0x40, 0xc1, 0x3a, 0xc2,
- 0x46, 0xcd, 0xc3, 0x41, 0x3a, 0xce, 0xc9, 0xb1, 0xbf, 0x0e, 0x79, 0x91,
- 0xc6, 0xb1, 0xc2, 0x0e, 0x79, 0x89, 0xc7, 0x6e, 0xdb, 0x0e, 0x79, 0x80,
- 0x42, 0x03, 0x32, 0xc1, 0x3a, 0xda, 0xc8, 0x11, 0x40, 0x08, 0xd1, 0xc1,
- 0x46, 0x1e, 0x17, 0x41, 0x3a, 0xe6, 0xd6, 0x2c, 0xf1, 0x08, 0xd2, 0x31,
- 0xc9, 0x15, 0x9d, 0x08, 0xd2, 0x00, 0x4d, 0x7b, 0x0c, 0xc1, 0x3a, 0xf5,
- 0xd1, 0x50, 0x95, 0x08, 0xd1, 0xd0, 0xc3, 0x1a, 0x80, 0x08, 0xd1, 0x91,
- 0xc2, 0x00, 0xa4, 0x08, 0xd0, 0x61, 0x83, 0x08, 0xd0, 0x58, 0x83, 0x08,
- 0xd1, 0x81, 0xc2, 0x0c, 0x65, 0x08, 0xd1, 0x79, 0xc2, 0x00, 0xa4, 0x08,
- 0xd1, 0x70, 0x83, 0x08, 0xd1, 0x41, 0xc2, 0x00, 0xa4, 0x08, 0xd1, 0x38,
- 0x1c, 0xc1, 0x3b, 0x0d, 0xc2, 0x00, 0xa4, 0x08, 0xd0, 0xe1, 0x83, 0x08,
- 0xd0, 0xd9, 0x06, 0x41, 0x3b, 0x17, 0x15, 0xc1, 0x3b, 0x21, 0xc2, 0x00,
- 0xa4, 0x08, 0xd0, 0xd1, 0x83, 0x08, 0xd0, 0xc9, 0x16, 0x41, 0x3b, 0x2b,
- 0xc2, 0x00, 0xa4, 0x08, 0xd1, 0x09, 0x83, 0x08, 0xd1, 0x00, 0xc2, 0x00,
- 0xa4, 0x08, 0xd0, 0xf9, 0x83, 0x08, 0xd0, 0xf0, 0x83, 0x08, 0xd0, 0xe9,
- 0xc2, 0x00, 0xc1, 0x08, 0xd0, 0xc1, 0xc2, 0x1d, 0x5f, 0x08, 0xd0, 0x99,
- 0xc2, 0x01, 0x29, 0x08, 0xd0, 0x78, 0xc2, 0x00, 0xa4, 0x08, 0xd0, 0x89,
- 0x83, 0x08, 0xd0, 0x80, 0xc2, 0x00, 0xa4, 0x08, 0xd0, 0x71, 0x83, 0x08,
- 0xd0, 0x68, 0xca, 0xa8, 0xba, 0x08, 0xd0, 0x49, 0x03, 0xc1, 0x3b, 0x35,
- 0x91, 0x08, 0xd0, 0x33, 0x01, 0x3b, 0x3d, 0x87, 0x08, 0xd0, 0x21, 0x97,
- 0x08, 0xd0, 0x1b, 0x01, 0x3b, 0x41, 0x8b, 0x08, 0xd0, 0x08, 0xcf, 0x60,
- 0x92, 0x01, 0x4c, 0x51, 0xcd, 0x7d, 0xbd, 0x01, 0x4c, 0x40, 0x12, 0xc1,
- 0x3b, 0x45, 0xcb, 0x35, 0x3f, 0x01, 0x50, 0xf8, 0xc8, 0xbf, 0x0d, 0x01,
- 0x00, 0x61, 0xcc, 0x40, 0xbe, 0x07, 0xf7, 0xf8, 0x43, 0x15, 0x18, 0xc1,
- 0x3b, 0x51, 0x42, 0x00, 0x35, 0x41, 0x3b, 0x75, 0x45, 0x02, 0x92, 0xc1,
- 0x3b, 0x81, 0xcc, 0x82, 0x00, 0x05, 0x4e, 0x08, 0x16, 0xc1, 0x3c, 0x0d,
- 0xc3, 0x01, 0xb4, 0x05, 0x4e, 0x89, 0xc4, 0x15, 0xd3, 0x05, 0x4e, 0x81,
- 0x08, 0xc1, 0x3c, 0x19, 0x15, 0xc1, 0x3c, 0x25, 0xc5, 0x01, 0xdb, 0x05,
- 0x4e, 0xc1, 0xc4, 0x22, 0x71, 0x05, 0x4e, 0xc8, 0xc5, 0xd6, 0xc9, 0x05,
- 0x4d, 0xf9, 0xc7, 0xc8, 0x02, 0x05, 0x4d, 0xf1, 0xc5, 0xda, 0x48, 0x05,
- 0x4d, 0xe8, 0xc5, 0xd8, 0xdb, 0x05, 0x4d, 0xe1, 0xca, 0xa6, 0x76, 0x05,
- 0x4d, 0xd9, 0x16, 0xc1, 0x3c, 0x31, 0xc4, 0xca, 0x39, 0x05, 0x4d, 0xc3,
- 0x01, 0x3c, 0x3b, 0xc4, 0xe0, 0xe7, 0x05, 0x4d, 0xb2, 0x01, 0x3c, 0x41,
- 0xc5, 0xd5, 0xb6, 0x05, 0x4c, 0x0b, 0x01, 0x3c, 0x47, 0xc7, 0xc2, 0x8a,
- 0x05, 0x4c, 0x19, 0xc5, 0xd9, 0x99, 0x05, 0x4c, 0x11, 0xc9, 0xb5, 0xee,
- 0x05, 0x4c, 0x00, 0x46, 0x01, 0xc7, 0xc1, 0x3c, 0x4d, 0x46, 0x01, 0xd1,
- 0x41, 0x3c, 0x5f, 0xc5, 0x18, 0x47, 0x01, 0x02, 0xb9, 0xd1, 0x1e, 0xe0,
- 0x01, 0x50, 0x60, 0x10, 0xc1, 0x3c, 0x6b, 0x0c, 0xc1, 0x3c, 0xaa, 0x13,
- 0xc1, 0x3c, 0xca, 0x14, 0xc1, 0x3c, 0xe6, 0x15, 0xc1, 0x3d, 0x0d, 0x05,
- 0xc1, 0x3d, 0x3f, 0x1c, 0xc1, 0x3d, 0x6d, 0x19, 0xc1, 0x3d, 0x9f, 0x0a,
- 0xc1, 0x3d, 0xbb, 0x1b, 0xc1, 0x3d, 0xed, 0x1a, 0xc1, 0x3e, 0x09, 0x0f,
- 0xc1, 0x3e, 0x27, 0x8b, 0x05, 0x00, 0x13, 0x01, 0x3e, 0x55, 0x83, 0x05,
- 0x00, 0x53, 0x01, 0x3e, 0x6b, 0xc2, 0x00, 0xb1, 0x05, 0x00, 0x6b, 0x01,
- 0x3e, 0x77, 0x91, 0x05, 0x00, 0x8b, 0x01, 0x3e, 0x7f, 0x87, 0x05, 0x00,
- 0xa3, 0x01, 0x3e, 0x8b, 0x04, 0xc1, 0x3e, 0x8f, 0x12, 0xc1, 0x3e, 0xbd,
- 0x08, 0xc1, 0x3e, 0xe0, 0x18, 0xc1, 0x3f, 0x03, 0x06, 0xc1, 0x3f, 0x2a,
- 0x16, 0xc1, 0x3f, 0x51, 0x0e, 0xc1, 0x3f, 0x74, 0x09, 0xc1, 0x3f, 0x9e,
- 0x0d, 0x41, 0x3f, 0xc5, 0xc3, 0xe6, 0xbe, 0x05, 0x24, 0x81, 0x0e, 0xc1,
- 0x3f, 0xe8, 0x0d, 0xc1, 0x3f, 0xf5, 0x10, 0xc1, 0x3f, 0xff, 0x05, 0xc1,
- 0x40, 0x0f, 0x15, 0xc1, 0x40, 0x28, 0x09, 0xc1, 0x40, 0x32, 0x0f, 0xc1,
- 0x40, 0x46, 0x0a, 0xc1, 0x40, 0x50, 0x04, 0xc1, 0x40, 0x5a, 0x1b, 0xc1,
- 0x40, 0x66, 0x12, 0xc1, 0x40, 0x70, 0x16, 0xc1, 0x40, 0x7c, 0x1c, 0xc1,
- 0x40, 0x86, 0x06, 0xc1, 0x40, 0x9a, 0xc2, 0x00, 0x11, 0x05, 0x25, 0x49,
- 0x0c, 0xc1, 0x40, 0xa4, 0x18, 0xc1, 0x40, 0xac, 0xc2, 0x01, 0x47, 0x05,
- 0x25, 0xc0, 0xc3, 0xe7, 0x24, 0x08, 0x75, 0x43, 0x01, 0x40, 0xb8, 0xc3,
- 0x0f, 0x69, 0x08, 0x75, 0x03, 0x01, 0x40, 0xbe, 0x07, 0xc1, 0x40, 0xc4,
- 0x0a, 0xc1, 0x40, 0xd8, 0xc2, 0x01, 0x12, 0x08, 0x75, 0x29, 0xc3, 0x78,
- 0xa9, 0x08, 0x75, 0x21, 0xc2, 0x06, 0x1f, 0x08, 0x75, 0x19, 0xc3, 0x1f,
- 0xd8, 0x08, 0x75, 0x11, 0xc3, 0x85, 0xc2, 0x08, 0x75, 0x09, 0xc3, 0x92,
- 0xe5, 0x08, 0x74, 0xf9, 0x0d, 0xc1, 0x40, 0xe4, 0xc3, 0x0f, 0x60, 0x08,
- 0x74, 0xe1, 0xc2, 0x04, 0x41, 0x08, 0x74, 0xd3, 0x01, 0x40, 0xf0, 0xc2,
- 0x00, 0x67, 0x08, 0x74, 0xc9, 0x1a, 0xc1, 0x40, 0xf6, 0x1c, 0xc1, 0x41,
- 0x00, 0x16, 0xc1, 0x41, 0x0b, 0x42, 0x0b, 0xc6, 0xc1, 0x41, 0x15, 0x15,
- 0xc1, 0x41, 0x1d, 0xc2, 0x24, 0x58, 0x08, 0x74, 0x81, 0x14, 0xc1, 0x41,
- 0x33, 0x05, 0xc1, 0x41, 0x3d, 0x12, 0xc1, 0x41, 0x47, 0xc2, 0x00, 0x48,
- 0x08, 0x74, 0x08, 0xca, 0xa4, 0x5a, 0x08, 0x75, 0x61, 0xca, 0x9d, 0xde,
- 0x08, 0x75, 0x58, 0x00, 0xc1, 0x41, 0x51, 0xc8, 0xba, 0x35, 0x0f, 0xae,
- 0xc8, 0x12, 0xc1, 0x41, 0x5d, 0x83, 0x00, 0xa7, 0xa3, 0x01, 0x41, 0x6d,
- 0x8a, 0x00, 0xa9, 0x2b, 0x01, 0x41, 0x7b, 0x91, 0x00, 0xa7, 0x8b, 0x01,
- 0x41, 0x98, 0x99, 0x00, 0xa8, 0x3b, 0x01, 0x41, 0xa6, 0x87, 0x00, 0xa7,
- 0x69, 0x8b, 0x00, 0xa7, 0x7a, 0x01, 0x41, 0xbf, 0x83, 0x00, 0xa6, 0x3b,
- 0x01, 0x41, 0xc3, 0x19, 0xc1, 0x41, 0xda, 0x91, 0x00, 0xa6, 0x23, 0x01,
- 0x41, 0xf3, 0xc2, 0x00, 0x35, 0x00, 0xac, 0xb3, 0x01, 0x41, 0xfb, 0x89,
- 0x00, 0xac, 0xab, 0x01, 0x42, 0x10, 0x44, 0xe0, 0x3b, 0xc1, 0x42, 0x25,
- 0x48, 0xbd, 0x25, 0xc1, 0x42, 0x34, 0x87, 0x00, 0xa6, 0x01, 0x8b, 0x00,
- 0xa6, 0x13, 0x01, 0x42, 0x3f, 0x8a, 0x00, 0xa6, 0x90, 0x83, 0x00, 0xa4,
- 0x83, 0x01, 0x42, 0x43, 0xc7, 0xcb, 0xc1, 0x00, 0xb3, 0x69, 0x19, 0xc1,
- 0x42, 0x50, 0x91, 0x00, 0xa4, 0x6b, 0x01, 0x42, 0x69, 0x8b, 0x00, 0xa4,
- 0x5b, 0x01, 0x42, 0x6d, 0x87, 0x00, 0xa4, 0x48, 0x4b, 0x8e, 0x90, 0xc1,
- 0x42, 0x71, 0x49, 0xac, 0x16, 0xc1, 0x42, 0x79, 0xcb, 0x90, 0xcc, 0x00,
- 0xa9, 0xf8, 0x42, 0x2d, 0x41, 0xc1, 0x42, 0x9c, 0x16, 0xc1, 0x42, 0xb5,
- 0x8a, 0x00, 0xab, 0x53, 0x01, 0x42, 0xcc, 0x83, 0x00, 0xa2, 0xab, 0x01,
- 0x42, 0xf2, 0x1b, 0xc1, 0x42, 0xfd, 0x19, 0xc1, 0x43, 0x0d, 0x91, 0x00,
- 0xa2, 0x83, 0x01, 0x43, 0x26, 0x8b, 0x00, 0xa2, 0x73, 0x01, 0x43, 0x2a,
- 0x87, 0x00, 0xa2, 0x60, 0x87, 0x00, 0xa0, 0x63, 0x01, 0x43, 0x2e, 0x83,
- 0x00, 0xa0, 0xbb, 0x01, 0x43, 0x34, 0x91, 0x00, 0xa0, 0x93, 0x01, 0x43,
- 0x3c, 0x8b, 0x00, 0xa0, 0x72, 0x01, 0x43, 0x43, 0x47, 0xc7, 0xa0, 0xc1,
- 0x43, 0x47, 0x19, 0xc1, 0x43, 0x51, 0x83, 0x00, 0xaa, 0x5b, 0x01, 0x43,
- 0x6c, 0x91, 0x00, 0xaa, 0x43, 0x01, 0x43, 0x77, 0x8b, 0x00, 0xaa, 0x33,
- 0x01, 0x43, 0x7b, 0x87, 0x00, 0xaa, 0x10, 0x8b, 0x00, 0xaa, 0xab, 0x01,
- 0x43, 0x7f, 0xc8, 0x11, 0xdd, 0x00, 0xb3, 0x71, 0xc3, 0x14, 0x3f, 0x00,
- 0xaa, 0xd9, 0x83, 0x00, 0xaa, 0xcb, 0x01, 0x43, 0x89, 0x91, 0x00, 0xaa,
- 0xbb, 0x01, 0x43, 0x90, 0x87, 0x00, 0xaa, 0x98, 0xc8, 0xba, 0x65, 0x00,
- 0xc6, 0xe1, 0x90, 0x00, 0xa1, 0x58, 0x47, 0xc8, 0xb8, 0xc1, 0x43, 0x94,
- 0x9b, 0x00, 0xc5, 0x81, 0x91, 0x00, 0xa0, 0x31, 0x90, 0x00, 0xa1, 0x68,
- 0x83, 0x00, 0xa9, 0x6b, 0x01, 0x43, 0xb6, 0x91, 0x00, 0xa9, 0x53, 0x01,
- 0x43, 0xc1, 0x19, 0xc1, 0x43, 0xc9, 0x46, 0x90, 0xd0, 0xc1, 0x43, 0xe2,
- 0x8b, 0x00, 0xa9, 0x43, 0x01, 0x44, 0x20, 0x87, 0x00, 0xa9, 0x30, 0x83,
- 0x00, 0xa6, 0xd3, 0x01, 0x44, 0x24, 0x8a, 0x00, 0xad, 0x33, 0x01, 0x44,
- 0x2f, 0x87, 0x00, 0xa6, 0x99, 0x8b, 0x00, 0xa6, 0xab, 0x01, 0x44, 0x44,
- 0x91, 0x00, 0xa6, 0xbb, 0x01, 0x44, 0x48, 0x19, 0x41, 0x44, 0x4c, 0x83,
- 0x00, 0xa5, 0x53, 0x01, 0x44, 0x65, 0x87, 0x00, 0xa5, 0x1b, 0x01, 0x44,
- 0x70, 0x91, 0x00, 0xa5, 0x3b, 0x01, 0x44, 0x76, 0x8b, 0x00, 0xa5, 0x2b,
- 0x01, 0x44, 0x7d, 0x19, 0xc1, 0x44, 0x81, 0x8a, 0x00, 0xa5, 0xe8, 0x99,
- 0x00, 0xa4, 0x23, 0x01, 0x44, 0x9a, 0x83, 0x00, 0xa3, 0x93, 0x01, 0x44,
- 0xb3, 0x87, 0x00, 0xa3, 0x59, 0x8b, 0x00, 0xa3, 0x6b, 0x01, 0x44, 0xbe,
- 0x91, 0x00, 0xa3, 0x7a, 0x01, 0x44, 0xc2, 0x19, 0xc1, 0x44, 0xc6, 0x83,
- 0x00, 0xa1, 0xc3, 0x01, 0x44, 0xdf, 0x91, 0x00, 0xa1, 0x9b, 0x01, 0x44,
- 0xea, 0x87, 0x00, 0xa1, 0x79, 0x8b, 0x00, 0xa1, 0x8a, 0x01, 0x44, 0xf2,
- 0x83, 0x00, 0xa0, 0x5b, 0x01, 0x44, 0xf6, 0x9b, 0x00, 0xc5, 0x89, 0x8b,
- 0x00, 0xa0, 0xe3, 0x01, 0x44, 0xfe, 0x4a, 0x9f, 0x82, 0xc1, 0x45, 0x04,
- 0x90, 0x00, 0xa1, 0x70, 0x83, 0x00, 0xac, 0x1b, 0x01, 0x45, 0x0c, 0x91,
- 0x00, 0xac, 0x0b, 0x01, 0x45, 0x17, 0x8b, 0x00, 0xab, 0xfa, 0x01, 0x45,
- 0x1b, 0x8d, 0x00, 0xab, 0xe9, 0xc5, 0x5a, 0xa3, 0x00, 0xa0, 0x00, 0x8b,
- 0x00, 0xa0, 0x21, 0x90, 0x00, 0xa1, 0x60, 0xd0, 0x60, 0x32, 0x01, 0x02,
- 0x08, 0xc9, 0xb4, 0xa1, 0x0f, 0xae, 0x10, 0x97, 0x08, 0x15, 0xfa, 0x01,
- 0x45, 0x1f, 0x94, 0x08, 0x16, 0x48, 0x86, 0x08, 0x15, 0x32, 0x01, 0x45,
- 0x26, 0x9f, 0x08, 0x15, 0x38, 0x84, 0x08, 0x16, 0x52, 0x01, 0x45, 0x2a,
- 0x9f, 0x08, 0x15, 0x60, 0x96, 0x08, 0x16, 0x3a, 0x01, 0x45, 0x36, 0x8a,
- 0x08, 0x15, 0x73, 0x01, 0x45, 0x3a, 0x95, 0x08, 0x15, 0xc1, 0x96, 0x08,
- 0x16, 0x12, 0x01, 0x45, 0x3e, 0xc2, 0x8a, 0xb6, 0x08, 0x15, 0x89, 0xc2,
- 0xe6, 0xcc, 0x08, 0x16, 0x30, 0x90, 0x08, 0x15, 0x99, 0x86, 0x08, 0x15,
- 0xf1, 0x89, 0x08, 0x16, 0x20, 0x9f, 0x08, 0x15, 0x08, 0x8b, 0x08, 0x16,
- 0x28, 0x9f, 0x08, 0x16, 0x78, 0x9f, 0x08, 0x15, 0xe8, 0x9f, 0x08, 0x16,
- 0x08, 0x03, 0xc1, 0x45, 0x42, 0xc3, 0x00, 0x48, 0x08, 0x29, 0x89, 0x09,
- 0xc1, 0x45, 0x4e, 0x06, 0xc1, 0x45, 0x5a, 0x07, 0xc1, 0x45, 0x6a, 0x1c,
- 0xc1, 0x45, 0x74, 0x16, 0xc1, 0x45, 0x7e, 0x05, 0xc1, 0x45, 0x90, 0x1b,
- 0xc1, 0x45, 0x9e, 0x0b, 0xc1, 0x45, 0xaa, 0x15, 0xc1, 0x45, 0xbc, 0x0e,
- 0xc1, 0x45, 0xc6, 0xc4, 0xe0, 0x7b, 0x08, 0x2a, 0x01, 0x0c, 0xc1, 0x45,
- 0xd2, 0x0d, 0xc1, 0x45, 0xde, 0xc4, 0xe1, 0x0f, 0x08, 0x2a, 0x31, 0x42,
- 0x0f, 0x60, 0xc1, 0x45, 0xea, 0xc3, 0xdb, 0x65, 0x08, 0x2a, 0x61, 0xc4,
- 0xe5, 0xab, 0x08, 0x2a, 0x71, 0xc2, 0x00, 0xb7, 0x08, 0x2a, 0x91, 0xc3,
- 0x93, 0x1c, 0x08, 0x2a, 0xa1, 0x12, 0xc1, 0x45, 0xf2, 0xc3, 0x03, 0x61,
- 0x08, 0x2a, 0xc9, 0xc4, 0xe0, 0x97, 0x08, 0x2a, 0xd8, 0xcc, 0x89, 0x14,
- 0x0f, 0xb1, 0xc9, 0xc9, 0xaf, 0x13, 0x0f, 0xb1, 0xe0, 0x07, 0xc1, 0x45,
- 0xfe, 0x06, 0xc1, 0x46, 0x3e, 0x03, 0xc1, 0x46, 0x7e, 0x08, 0xc1, 0x46,
- 0xbe, 0x24, 0xc1, 0x46, 0xfe, 0x23, 0xc1, 0x47, 0x3e, 0x20, 0xc1, 0x47,
- 0x7e, 0x1f, 0xc1, 0x47, 0xbe, 0x1e, 0xc1, 0x47, 0xfe, 0x1d, 0xc1, 0x48,
- 0x3e, 0x05, 0xc1, 0x48, 0x7e, 0x04, 0xc1, 0x48, 0xbe, 0x26, 0xc1, 0x48,
- 0xfe, 0x25, 0xc1, 0x49, 0x3e, 0x22, 0xc1, 0x49, 0x7e, 0x21, 0x41, 0x49,
- 0xbe, 0x24, 0xc1, 0x49, 0xfe, 0x23, 0xc1, 0x4a, 0x3e, 0x22, 0xc1, 0x4a,
- 0x7e, 0x21, 0xc1, 0x4a, 0xbe, 0x1f, 0xc1, 0x4a, 0xfe, 0x1d, 0xc1, 0x4b,
- 0x3e, 0x08, 0xc1, 0x4b, 0x7e, 0x04, 0xc1, 0x4b, 0xbe, 0x03, 0xc1, 0x4b,
- 0xfe, 0x26, 0xc1, 0x4c, 0x3e, 0x25, 0xc1, 0x4c, 0x7e, 0x07, 0xc1, 0x4c,
- 0xbe, 0x06, 0xc1, 0x4c, 0xfe, 0x05, 0xc1, 0x4d, 0x3e, 0x20, 0xc1, 0x4d,
- 0x7e, 0x1e, 0x41, 0x4d, 0xbe, 0x1e, 0xc1, 0x4d, 0xfe, 0x1d, 0x41, 0x4e,
- 0x36, 0x06, 0xc1, 0x4e, 0x76, 0x05, 0xc1, 0x4e, 0x9e, 0x04, 0xc1, 0x4e,
- 0xde, 0x03, 0xc1, 0x4f, 0x1e, 0x26, 0xc1, 0x4f, 0x5e, 0x25, 0xc1, 0x4f,
- 0x9e, 0x24, 0xc1, 0x4f, 0xde, 0x23, 0xc1, 0x50, 0x1e, 0x22, 0xc1, 0x50,
- 0x56, 0x21, 0xc1, 0x50, 0x96, 0x20, 0xc1, 0x50, 0xd6, 0x1f, 0xc1, 0x51,
- 0x16, 0x1e, 0xc1, 0x51, 0x56, 0x1d, 0x41, 0x51, 0x96, 0x08, 0xc1, 0x51,
- 0xd6, 0x07, 0xc1, 0x52, 0x16, 0x06, 0xc1, 0x52, 0x56, 0x05, 0xc1, 0x52,
- 0x96, 0x04, 0xc1, 0x52, 0xd6, 0x03, 0xc1, 0x53, 0x16, 0x26, 0xc1, 0x53,
- 0x56, 0x25, 0xc1, 0x53, 0x96, 0x24, 0xc1, 0x53, 0xd6, 0x23, 0xc1, 0x54,
- 0x16, 0x22, 0xc1, 0x54, 0x56, 0x21, 0xc1, 0x54, 0x96, 0x20, 0xc1, 0x54,
- 0xd6, 0x1f, 0xc1, 0x55, 0x16, 0x1e, 0xc1, 0x55, 0x56, 0x1d, 0x41, 0x55,
- 0x96, 0x92, 0x01, 0x74, 0xc9, 0x8f, 0x01, 0x75, 0xb9, 0xc2, 0x00, 0x34,
- 0x01, 0x76, 0xb8, 0xc3, 0x45, 0x46, 0x01, 0x74, 0x09, 0xc5, 0x78, 0x8a,
- 0x01, 0x76, 0x10, 0xc6, 0xd0, 0xed, 0x01, 0x75, 0x01, 0xc2, 0x0c, 0x57,
- 0x01, 0x76, 0x78, 0x15, 0xc1, 0x55, 0xd6, 0xc4, 0x64, 0x0d, 0x01, 0x76,
- 0x59, 0x09, 0xc1, 0x55, 0xf4, 0x0e, 0xc1, 0x56, 0x00, 0x16, 0xc1, 0x56,
- 0x0c, 0xc4, 0x46, 0x31, 0x01, 0x76, 0xd9, 0x08, 0xc1, 0x56, 0x1e, 0x07,
- 0xc1, 0x56, 0x30, 0xc5, 0xa0, 0x31, 0x01, 0x77, 0x11, 0xc4, 0xa6, 0x72,
- 0x01, 0x77, 0x31, 0xc6, 0x8a, 0xe2, 0x01, 0x77, 0x80, 0x45, 0x74, 0xa9,
- 0xc1, 0x56, 0x3c, 0xc2, 0x00, 0x92, 0x01, 0x74, 0x58, 0xc3, 0x01, 0xb4,
- 0x01, 0x74, 0x61, 0xc3, 0x01, 0x59, 0x01, 0x74, 0x68, 0xc3, 0x22, 0xdc,
- 0x01, 0x74, 0x91, 0x44, 0x48, 0x9e, 0x41, 0x56, 0x46, 0x49, 0x87, 0xf7,
- 0xc1, 0x56, 0x52, 0xc2, 0x44, 0x0d, 0x01, 0x75, 0x78, 0xc3, 0x01, 0xb4,
- 0x01, 0x75, 0x61, 0xc3, 0x01, 0x59, 0x01, 0x75, 0x68, 0xc3, 0x01, 0xb4,
- 0x01, 0x75, 0x21, 0xc3, 0x01, 0x59, 0x01, 0x75, 0x28, 0x9a, 0x01, 0x74,
- 0x31, 0xcb, 0x8e, 0x4e, 0x01, 0x75, 0x51, 0xc2, 0x00, 0x8c, 0x01, 0x77,
- 0x18, 0xc3, 0x01, 0xb4, 0x01, 0x75, 0xd1, 0xc3, 0x01, 0x59, 0x01, 0x75,
- 0xd8, 0xc3, 0x01, 0xb4, 0x01, 0x74, 0x71, 0x16, 0xc1, 0x56, 0x60, 0xc4,
- 0x06, 0x9d, 0x01, 0x74, 0x88, 0xc3, 0x01, 0xb4, 0x01, 0x76, 0x89, 0xc3,
- 0x01, 0x59, 0x01, 0x76, 0x90, 0x43, 0x0f, 0xf8, 0xc1, 0x56, 0x6c, 0x86,
- 0x01, 0x77, 0x08, 0xc2, 0x00, 0xb7, 0x01, 0x74, 0xe9, 0xc4, 0x17, 0xa2,
- 0x01, 0x74, 0xf9, 0xc4, 0xd2, 0x84, 0x01, 0x75, 0xe9, 0x44, 0x0e, 0x4a,
- 0x41, 0x56, 0x78, 0xc2, 0x00, 0xe5, 0x01, 0x75, 0xa9, 0xc2, 0x00, 0x9e,
- 0x01, 0x75, 0xe0, 0x44, 0x02, 0x93, 0xc1, 0x56, 0x84, 0x43, 0xe7, 0x4e,
- 0x41, 0x56, 0x90, 0xc3, 0x01, 0xb4, 0x01, 0x76, 0x19, 0xc3, 0x01, 0x59,
- 0x01, 0x76, 0x20, 0xc4, 0x18, 0x83, 0x01, 0x77, 0x59, 0x16, 0xc1, 0x56,
- 0x9c, 0xc6, 0x8a, 0xe2, 0x01, 0x77, 0x78, 0xc3, 0x01, 0xb4, 0x01, 0x76,
- 0xe9, 0x16, 0x41, 0x56, 0xa8, 0xc2, 0x01, 0x47, 0x01, 0x75, 0x91, 0xc4,
- 0x04, 0x5e, 0x01, 0x75, 0x98, 0xc3, 0x01, 0xb4, 0x01, 0x75, 0xf1, 0x16,
- 0x41, 0x56, 0xb4, 0x9c, 0x01, 0x8e, 0xc1, 0x89, 0x01, 0x8e, 0xf8, 0xc2,
- 0x23, 0x27, 0x01, 0x8e, 0x49, 0x9c, 0x01, 0x8e, 0xf0, 0x9c, 0x01, 0x8e,
- 0x2b, 0x01, 0x56, 0xc0, 0x89, 0x01, 0x8e, 0x31, 0x99, 0x01, 0x8e, 0x6b,
- 0x01, 0x56, 0xcb, 0x96, 0x01, 0x8e, 0x50, 0xc2, 0x23, 0x27, 0x01, 0x8e,
- 0x60, 0xc5, 0x05, 0x39, 0x0f, 0xdc, 0xa8, 0x4d, 0x26, 0xea, 0xc1, 0x56,
- 0xcf, 0x47, 0x02, 0x90, 0x41, 0x57, 0x1e, 0xc3, 0x91, 0xec, 0x0f, 0x9a,
- 0x91, 0xc9, 0xae, 0x56, 0x0f, 0x99, 0xc0, 0xc2, 0x04, 0x0a, 0x01, 0x02,
- 0x01, 0xc9, 0x33, 0x1f, 0x00, 0x00, 0x4a, 0x01, 0x57, 0x6d, 0xcf, 0x65,
- 0x15, 0x0f, 0xa6, 0x49, 0xcd, 0x76, 0x6d, 0x0f, 0xa6, 0x42, 0x01, 0x57,
- 0x71, 0xc3, 0xdd, 0xef, 0x08, 0x8a, 0x39, 0x0e, 0xc1, 0x57, 0x77, 0xc3,
- 0x3b, 0xc7, 0x08, 0x89, 0x31, 0xc3, 0xcc, 0xeb, 0x08, 0x89, 0x29, 0xc3,
- 0x14, 0x3f, 0x08, 0x89, 0x21, 0xc3, 0x4b, 0xf4, 0x08, 0x89, 0x11, 0x1b,
- 0xc1, 0x57, 0x83, 0xc3, 0x73, 0xfd, 0x08, 0x88, 0xf9, 0x04, 0xc1, 0x57,
- 0x8f, 0x12, 0xc1, 0x57, 0x9b, 0x10, 0xc1, 0x57, 0xa7, 0x06, 0xc1, 0x57,
- 0xbf, 0x16, 0xc1, 0x57, 0xcf, 0x0c, 0xc1, 0x57, 0xdf, 0x05, 0xc1, 0x57,
- 0xeb, 0x09, 0xc1, 0x57, 0xf7, 0x0d, 0xc1, 0x58, 0x03, 0x87, 0x08, 0x88,
- 0x31, 0x97, 0x08, 0x88, 0x29, 0x8b, 0x08, 0x88, 0x21, 0xc2, 0x14, 0x40,
- 0x08, 0x88, 0x18, 0x4a, 0x6f, 0xcd, 0xc1, 0x58, 0x0f, 0xc5, 0x1e, 0x24,
- 0x08, 0x89, 0x98, 0xcb, 0x95, 0x0d, 0x08, 0x8a, 0x11, 0xc4, 0x1c, 0xd0,
- 0x08, 0x8a, 0x09, 0x45, 0x06, 0x98, 0x41, 0x58, 0x32, 0xcb, 0x25, 0xeb,
- 0x08, 0x8a, 0x01, 0x44, 0x00, 0xcc, 0x41, 0x58, 0x56, 0xc2, 0x01, 0x09,
- 0x05, 0x51, 0xb1, 0xc2, 0x00, 0xc7, 0x05, 0x51, 0xa9, 0xc2, 0x02, 0x59,
- 0x05, 0x51, 0xa1, 0xc2, 0x1d, 0x5f, 0x05, 0x51, 0x99, 0x46, 0x2b, 0x13,
- 0x41, 0x58, 0x68, 0x97, 0x05, 0x51, 0x6b, 0x01, 0x58, 0x76, 0x03, 0xc1,
- 0x58, 0x7a, 0x91, 0x05, 0x51, 0x7b, 0x01, 0x58, 0x86, 0xc2, 0x01, 0xdb,
- 0x05, 0x51, 0x61, 0x8b, 0x05, 0x51, 0x52, 0x01, 0x58, 0x8a, 0xc2, 0x00,
- 0xa4, 0x05, 0x51, 0x41, 0x15, 0xc1, 0x58, 0x8e, 0x10, 0xc1, 0x58, 0x98,
- 0x09, 0xc1, 0x58, 0xaa, 0x0d, 0xc1, 0x58, 0xb4, 0x91, 0x05, 0x50, 0x29,
- 0x83, 0x05, 0x50, 0x03, 0x01, 0x58, 0xbe, 0x87, 0x05, 0x50, 0x19, 0x46,
- 0x2b, 0x13, 0xc1, 0x58, 0xc2, 0xc2, 0x04, 0x41, 0x05, 0x51, 0x29, 0xc2,
- 0x00, 0xc7, 0x05, 0x51, 0x21, 0xc2, 0x02, 0x59, 0x05, 0x51, 0x19, 0xc2,
- 0x1d, 0x5f, 0x05, 0x51, 0x11, 0x04, 0xc1, 0x58, 0xf1, 0x0f, 0xc1, 0x59,
- 0x01, 0x12, 0xc1, 0x59, 0x0b, 0x06, 0xc1, 0x59, 0x1b, 0x16, 0xc1, 0x59,
- 0x2b, 0x0c, 0xc1, 0x59, 0x35, 0x42, 0x11, 0xd4, 0xc1, 0x59, 0x3f, 0x97,
- 0x05, 0x50, 0x11, 0x8b, 0x05, 0x50, 0x08, 0xcc, 0x8b, 0x18, 0x05, 0x52,
- 0xf9, 0x06, 0xc1, 0x59, 0x49, 0xc6, 0x7d, 0xf8, 0x05, 0x52, 0xe0, 0xc4,
- 0x22, 0x71, 0x05, 0x52, 0xc9, 0xc5, 0x01, 0xdb, 0x05, 0x52, 0xc1, 0x15,
- 0xc1, 0x59, 0x55, 0x08, 0xc1, 0x59, 0x61, 0x16, 0xc1, 0x59, 0x6d, 0xc4,
- 0x15, 0xd3, 0x05, 0x52, 0x81, 0xc3, 0x01, 0xb4, 0x05, 0x52, 0x88, 0xc3,
- 0x01, 0xb4, 0x08, 0x7e, 0x2b, 0x01, 0x59, 0x79, 0x16, 0xc1, 0x59, 0x7f,
- 0xc4, 0x06, 0x9d, 0x08, 0x7e, 0x40, 0xc3, 0x26, 0xf9, 0x08, 0x7e, 0x21,
- 0x15, 0xc1, 0x59, 0x8f, 0xc4, 0xe2, 0x57, 0x08, 0x7d, 0xd9, 0xc4, 0x4b,
- 0x98, 0x08, 0x7d, 0xd1, 0xc2, 0x01, 0xf0, 0x08, 0x7d, 0xab, 0x01, 0x59,
- 0xa1, 0xc5, 0x4b, 0x92, 0x08, 0x7d, 0xc1, 0xca, 0x9b, 0x86, 0x08, 0x7d,
- 0xb9, 0xc3, 0x78, 0xa9, 0x08, 0x7d, 0xb1, 0xc6, 0x45, 0xf6, 0x08, 0x7d,
- 0xa1, 0xc5, 0xa1, 0x94, 0x08, 0x7d, 0x99, 0xc4, 0xe4, 0x8f, 0x08, 0x7d,
- 0x91, 0x03, 0xc1, 0x59, 0xa7, 0xc6, 0xd0, 0x5d, 0x08, 0x7d, 0xe1, 0xc3,
- 0x0b, 0x0e, 0x08, 0x7d, 0xe9, 0xc3, 0x1f, 0xd8, 0x08, 0x7d, 0xf1, 0xc2,
- 0x00, 0x27, 0x08, 0x7e, 0x09, 0xc4, 0x5d, 0xe2, 0x08, 0x7e, 0x10, 0xc4,
- 0x00, 0xba, 0x01, 0x3a, 0x61, 0x43, 0x00, 0xf3, 0xc1, 0x59, 0xb3, 0x12,
- 0x41, 0x59, 0xbf, 0xc6, 0xcf, 0x37, 0x01, 0x34, 0xa1, 0xc5, 0xd7, 0x23,
- 0x0f, 0x9c, 0x61, 0x47, 0x56, 0x29, 0x41, 0x59, 0xce, 0x48, 0xbc, 0xe5,
- 0xc1, 0x59, 0xd4, 0x49, 0x09, 0x79, 0xc1, 0x5a, 0x06, 0xd0, 0x0b, 0x37,
- 0x00, 0x18, 0x13, 0x01, 0x5a, 0x12, 0x03, 0xc1, 0x5a, 0x18, 0x11, 0xc1,
- 0x5a, 0x27, 0xc6, 0xbf, 0x3f, 0x00, 0x19, 0x38, 0x51, 0x52, 0xa4, 0xc1,
- 0x5a, 0x36, 0x14, 0x41, 0x5a, 0xa7, 0x48, 0x3e, 0x16, 0xc1, 0x5a, 0xb1,
- 0x10, 0xc1, 0x5a, 0xbd, 0x4f, 0x6b, 0xf0, 0xc1, 0x5a, 0xc9, 0x44, 0x35,
- 0x4b, 0x41, 0x5a, 0xd5, 0x0b, 0xc1, 0x5a, 0xdd, 0x07, 0x41, 0x5a, 0xe9,
- 0x43, 0x01, 0x1f, 0xc1, 0x5a, 0xf5, 0x11, 0xc1, 0x5a, 0xff, 0x45, 0x07,
- 0x12, 0xc1, 0x5b, 0x0b, 0x42, 0x00, 0xcc, 0x41, 0x5b, 0x17, 0x43, 0x08,
- 0x88, 0xc1, 0x5b, 0x23, 0xcf, 0x69, 0x20, 0x00, 0xd5, 0xb0, 0x46, 0x17,
- 0x32, 0xc1, 0x5b, 0x2f, 0xcf, 0x0e, 0xbb, 0x01, 0x06, 0xd9, 0xc4, 0x21,
- 0x13, 0x00, 0x18, 0x1b, 0x01, 0x5b, 0x41, 0xd1, 0x53, 0xf8, 0x00, 0x18,
- 0x90, 0x11, 0xc1, 0x5b, 0x45, 0x07, 0xc1, 0x5b, 0x55, 0xc8, 0x1e, 0x43,
- 0x00, 0x18, 0x42, 0x01, 0x5b, 0x61, 0x49, 0xb0, 0x45, 0xc1, 0x5b, 0x6d,
- 0xd0, 0x5e, 0xd2, 0x00, 0x1a, 0x38, 0xce, 0x38, 0x83, 0x01, 0x06, 0xe1,
- 0xc6, 0xd3, 0xbd, 0x00, 0x1a, 0x90, 0x45, 0x2c, 0x00, 0xc1, 0x5b, 0x8c,
- 0xce, 0x6e, 0xe9, 0x00, 0xee, 0x19, 0xca, 0xa5, 0x72, 0x00, 0xee, 0x11,
- 0x47, 0x24, 0xb3, 0xc1, 0x5b, 0x96, 0x16, 0xc1, 0x5b, 0xa2, 0xcc, 0x84,
- 0x88, 0x00, 0x19, 0xe0, 0xca, 0xa1, 0x58, 0x08, 0x99, 0xd9, 0x14, 0x41,
- 0x5b, 0xa8, 0x4b, 0x96, 0xdb, 0xc1, 0x5b, 0xb7, 0x50, 0x5b, 0xe2, 0x41,
- 0x5b, 0xc3, 0x12, 0xc1, 0x5b, 0xcf, 0xc7, 0x01, 0x4d, 0x00, 0xee, 0x91,
- 0xc7, 0x06, 0xa0, 0x00, 0xee, 0x88, 0xc7, 0x01, 0x60, 0x00, 0xee, 0x81,
- 0x10, 0x41, 0x5b, 0xdb, 0xc5, 0x01, 0x62, 0x00, 0xee, 0x79, 0xc5, 0x00,
- 0x95, 0x00, 0x1a, 0xd8, 0xc5, 0xcd, 0x22, 0x00, 0x19, 0x43, 0x01, 0x5b,
- 0xe7, 0xce, 0x73, 0x03, 0x00, 0xd5, 0xb9, 0xc7, 0x81, 0x2a, 0x00, 0x18,
- 0x29, 0x51, 0x51, 0x61, 0x41, 0x5b, 0xed, 0xc5, 0x60, 0xe7, 0x00, 0x18,
- 0x23, 0x01, 0x5c, 0x0b, 0xcf, 0x61, 0x64, 0x00, 0x19, 0x00, 0x49, 0x68,
- 0x22, 0xc1, 0x5c, 0x13, 0x03, 0x41, 0x5c, 0x1f, 0xd0, 0x60, 0x02, 0x00,
- 0xd6, 0x31, 0xce, 0x6c, 0x65, 0x00, 0x1a, 0x50, 0xc8, 0xbf, 0xcd, 0x00,
- 0xd5, 0xa9, 0x00, 0x41, 0x5c, 0x2b, 0xc8, 0x9f, 0xca, 0x00, 0x18, 0x49,
- 0xc2, 0x00, 0xc0, 0x00, 0x18, 0xd9, 0xce, 0x6c, 0x81, 0x00, 0x1a, 0x58,
- 0x45, 0x00, 0x8a, 0xc1, 0x5c, 0x37, 0xc5, 0x21, 0x12, 0x00, 0x19, 0xf0,
- 0xca, 0x96, 0xbb, 0x01, 0x02, 0x91, 0xc2, 0x00, 0x9e, 0x00, 0x02, 0x00,
- 0x4b, 0x8f, 0x98, 0xc1, 0x5c, 0x43, 0x4b, 0x99, 0x17, 0x41, 0x5c, 0x61,
- 0xc4, 0xe0, 0x13, 0x01, 0x19, 0xa9, 0xc4, 0xe4, 0xa3, 0x01, 0x19, 0xa0,
- 0x45, 0x00, 0x6c, 0xc1, 0x5c, 0x7f, 0x43, 0x82, 0x1d, 0x41, 0x5c, 0x91,
- 0xc5, 0xda, 0x6b, 0x0f, 0x9c, 0xd9, 0xd3, 0x41, 0xae, 0x00, 0x04, 0xd8,
- 0xc6, 0x0a, 0x62, 0x01, 0x12, 0xa1, 0xc4, 0x00, 0xcb, 0x01, 0x05, 0x08,
- 0x4c, 0x26, 0xeb, 0xc1, 0x5c, 0xa0, 0x46, 0x10, 0x5f, 0x41, 0x5d, 0x0d,
- 0x4e, 0x07, 0x18, 0xc1, 0x5d, 0x27, 0x49, 0x11, 0x37, 0x41, 0x5d, 0x94,
- 0xce, 0x6e, 0xbf, 0x08, 0x17, 0x01, 0x46, 0x06, 0x97, 0xc1, 0x5d, 0xa0,
- 0x47, 0x33, 0xef, 0x41, 0x5d, 0xbe, 0xc9, 0x11, 0xdc, 0x01, 0x67, 0xc9,
- 0xd4, 0x30, 0xbb, 0x01, 0x67, 0xd1, 0xd6, 0x30, 0xb9, 0x01, 0x67, 0xd9,
- 0xcd, 0x4c, 0x55, 0x01, 0x67, 0xe0, 0xd0, 0x52, 0xd8, 0x01, 0x67, 0xe9,
- 0xc8, 0x11, 0xdd, 0x01, 0x67, 0xf0, 0xcd, 0x7e, 0x8d, 0x0f, 0xa8, 0x81,
- 0x4d, 0x7f, 0x77, 0xc1, 0x5d, 0xdc, 0xc4, 0xe4, 0x9f, 0x0f, 0xa6, 0xa9,
- 0x17, 0xc1, 0x5d, 0xe8, 0xd8, 0x23, 0xe8, 0x01, 0x52, 0x69, 0x42, 0x08,
- 0x22, 0x41, 0x5d, 0xf7, 0xd3, 0x41, 0xe7, 0x01, 0x3f, 0x99, 0x05, 0xc1,
- 0x5e, 0x09, 0xc8, 0x1e, 0xe0, 0x01, 0x11, 0x89, 0xd1, 0x01, 0x75, 0x01,
- 0x0d, 0xd9, 0x16, 0xc1, 0x5e, 0x15, 0x45, 0x01, 0xf7, 0xc1, 0x5e, 0x21,
- 0x48, 0x03, 0x68, 0x41, 0x5e, 0x2d, 0x16, 0xc1, 0x5e, 0x33, 0x07, 0xc1,
- 0x5e, 0x43, 0x44, 0x22, 0x71, 0xc1, 0x5e, 0x4f, 0x15, 0xc1, 0x5e, 0x5b,
- 0x08, 0xc1, 0x5e, 0x67, 0x43, 0x01, 0xb4, 0x41, 0x5e, 0x73, 0xc9, 0xb0,
- 0x3c, 0x0f, 0x99, 0x49, 0xc4, 0x28, 0xec, 0x0f, 0x99, 0x41, 0xc4, 0x05,
- 0xde, 0x0f, 0x99, 0x39, 0xc7, 0xc5, 0x38, 0x0f, 0x99, 0x50, 0x05, 0xc1,
- 0x5e, 0x7f, 0x0a, 0xc1, 0x5e, 0x93, 0xde, 0x0e, 0xe8, 0x01, 0x3a, 0x11,
- 0x19, 0xc1, 0x5e, 0xab, 0x06, 0xc1, 0x5e, 0xb5, 0x0e, 0xc1, 0x5e, 0xc3,
- 0x47, 0x33, 0xef, 0xc1, 0x5e, 0xcf, 0x16, 0xc1, 0x5e, 0xe5, 0xc6, 0x0a,
- 0x62, 0x01, 0x14, 0xe1, 0x03, 0xc1, 0x5e, 0xf4, 0x14, 0xc1, 0x5f, 0x00,
- 0x0f, 0xc1, 0x5f, 0x0c, 0x12, 0xc1, 0x5f, 0x18, 0x0b, 0xc1, 0x5f, 0x30,
- 0xcc, 0x00, 0x9b, 0x01, 0x4e, 0x09, 0x04, 0xc1, 0x5f, 0x42, 0xcc, 0x03,
- 0x3b, 0x01, 0x4d, 0xb1, 0x9a, 0x01, 0x5d, 0xf1, 0xcf, 0x64, 0x61, 0x0f,
- 0x88, 0x69, 0xc6, 0x07, 0x09, 0x0f, 0xbe, 0xb9, 0x0d, 0x41, 0x5f, 0x4e,
- 0x45, 0x00, 0x6c, 0xc1, 0x5f, 0x5a, 0x5e, 0x0f, 0x9c, 0x41, 0x5f, 0x84,
- 0x97, 0x09, 0x1b, 0x53, 0x01, 0x5f, 0x8a, 0x83, 0x09, 0x1a, 0xeb, 0x01,
- 0x5f, 0xa1, 0x8b, 0x09, 0x1b, 0x1b, 0x01, 0x5f, 0xb3, 0xc2, 0x8e, 0x17,
- 0x09, 0x1b, 0x10, 0x94, 0x09, 0x19, 0x43, 0x01, 0x5f, 0xce, 0x00, 0xc1,
- 0x5f, 0xeb, 0x8f, 0x09, 0x18, 0xeb, 0x01, 0x5f, 0xfe, 0x1c, 0xc1, 0x60,
- 0x13, 0xc4, 0xe0, 0x4b, 0x09, 0x1a, 0xc9, 0xc2, 0x00, 0xe5, 0x09, 0x1a,
- 0x8b, 0x01, 0x60, 0x1e, 0x90, 0x09, 0x19, 0x33, 0x01, 0x60, 0x32, 0x86,
- 0x09, 0x18, 0x9b, 0x01, 0x60, 0x38, 0x84, 0x09, 0x18, 0x91, 0x9f, 0x09,
- 0x18, 0x88, 0x97, 0x09, 0x18, 0x2b, 0x01, 0x60, 0x42, 0x83, 0x09, 0x17,
- 0x5b, 0x01, 0x60, 0x5a, 0x8b, 0x09, 0x17, 0xf3, 0x01, 0x60, 0x79, 0x87,
- 0x09, 0x17, 0xe2, 0x01, 0x60, 0x8e, 0x8b, 0x09, 0x16, 0xdb, 0x01, 0x60,
- 0x94, 0x0a, 0xc1, 0x60, 0xab, 0x83, 0x09, 0x14, 0x9b, 0x01, 0x60, 0xc4,
- 0x97, 0x09, 0x17, 0x12, 0x01, 0x60, 0xdc, 0x8b, 0x09, 0x12, 0x63, 0x01,
- 0x60, 0xfd, 0x97, 0x09, 0x13, 0x0b, 0x01, 0x61, 0x1b, 0x83, 0x09, 0x11,
- 0xf3, 0x01, 0x61, 0x2b, 0x87, 0x09, 0x12, 0x42, 0x01, 0x61, 0x43, 0x97,
- 0x09, 0x11, 0x63, 0x01, 0x61, 0x47, 0x8b, 0x09, 0x11, 0x53, 0x01, 0x61,
- 0x69, 0x87, 0x09, 0x11, 0x43, 0x01, 0x61, 0x73, 0x83, 0x09, 0x11, 0x02,
- 0x01, 0x61, 0x7a, 0x97, 0x09, 0x0f, 0xdb, 0x01, 0x61, 0x93, 0x83, 0x09,
- 0x0d, 0xbb, 0x01, 0x61, 0xbc, 0x8b, 0x09, 0x0f, 0xba, 0x01, 0x61, 0xdc,
- 0x83, 0x09, 0x0a, 0xbb, 0x01, 0x61, 0xec, 0xc5, 0xda, 0x1b, 0x09, 0x0d,
- 0xb1, 0x97, 0x09, 0x0d, 0x53, 0x01, 0x62, 0x22, 0x8b, 0x09, 0x0d, 0x03,
- 0x01, 0x62, 0x4f, 0xc4, 0x72, 0x9d, 0x09, 0x0c, 0xf8, 0x8b, 0x09, 0x09,
- 0x6b, 0x01, 0x62, 0x61, 0x83, 0x09, 0x09, 0x4b, 0x01, 0x62, 0x67, 0x97,
- 0x09, 0x09, 0xba, 0x01, 0x62, 0x6f, 0x97, 0x09, 0x08, 0xb3, 0x01, 0x62,
- 0x84, 0x8b, 0x09, 0x08, 0x03, 0x01, 0x62, 0xaa, 0x07, 0xc1, 0x62, 0xc7,
- 0x83, 0x09, 0x05, 0xaa, 0x01, 0x62, 0xd6, 0xc3, 0x08, 0xc2, 0x09, 0x05,
- 0x0b, 0x01, 0x63, 0x12, 0xc3, 0x0b, 0xee, 0x09, 0x05, 0x03, 0x01, 0x63,
- 0x16, 0x14, 0xc1, 0x63, 0x1c, 0x9f, 0x09, 0x04, 0x6b, 0x01, 0x63, 0x2b,
- 0x90, 0x09, 0x04, 0xbb, 0x01, 0x63, 0x31, 0x8e, 0x09, 0x04, 0xb1, 0xc3,
- 0xe1, 0xdf, 0x09, 0x04, 0xa9, 0xc3, 0x03, 0xf0, 0x09, 0x04, 0xa1, 0x00,
- 0x41, 0x63, 0x35, 0x97, 0x09, 0x03, 0xd3, 0x01, 0x63, 0x41, 0x8b, 0x09,
- 0x03, 0x93, 0x01, 0x63, 0x64, 0x83, 0x09, 0x02, 0xaa, 0x01, 0x63, 0x7f,
- 0x97, 0x09, 0x02, 0x6b, 0x01, 0x63, 0x97, 0x83, 0x09, 0x02, 0x03, 0x01,
- 0x63, 0xab, 0x8b, 0x09, 0x02, 0x4a, 0x01, 0x63, 0xcf, 0x86, 0x09, 0x00,
- 0xe3, 0x01, 0x63, 0xd5, 0x84, 0x09, 0x00, 0x53, 0x01, 0x63, 0xdb, 0xc3,
- 0x00, 0xba, 0x09, 0x01, 0x5b, 0x01, 0x63, 0xe6, 0x15, 0xc1, 0x63, 0xec,
- 0x14, 0xc1, 0x63, 0xf9, 0xc3, 0x0e, 0x9f, 0x09, 0x01, 0x99, 0x90, 0x09,
- 0x01, 0x6b, 0x01, 0x64, 0x08, 0x8e, 0x09, 0x01, 0x03, 0x01, 0x64, 0x12,
- 0x8d, 0x09, 0x00, 0xeb, 0x01, 0x64, 0x24, 0x9f, 0x09, 0x00, 0x49, 0x47,
- 0x01, 0x2c, 0x41, 0x64, 0x2a, 0x8b, 0x09, 0x13, 0xfb, 0x01, 0x64, 0x58,
- 0xc4, 0x72, 0x9d, 0x09, 0x13, 0xf3, 0x01, 0x64, 0x60, 0x83, 0x09, 0x13,
- 0xd2, 0x01, 0x64, 0x66, 0x97, 0x09, 0x14, 0x91, 0x8b, 0x09, 0x14, 0x89,
- 0x83, 0x09, 0x14, 0x7a, 0x01, 0x64, 0x72, 0xc2, 0x00, 0xe5, 0x09, 0x0a,
- 0xb1, 0x94, 0x09, 0x0a, 0xa9, 0x90, 0x09, 0x0a, 0xa1, 0x8f, 0x09, 0x0a,
- 0x73, 0x01, 0x64, 0x76, 0x8e, 0x09, 0x0a, 0x5b, 0x01, 0x64, 0x80, 0x89,
- 0x09, 0x0a, 0x2b, 0x01, 0x64, 0x8a, 0xc3, 0x5b, 0x41, 0x09, 0x0a, 0x13,
- 0x01, 0x64, 0x91, 0x84, 0x09, 0x0a, 0x09, 0xc2, 0x00, 0x56, 0x09, 0x0a,
- 0x00, 0xc9, 0xb5, 0x79, 0x09, 0x23, 0xa1, 0xc8, 0xbf, 0x6d, 0x09, 0x23,
- 0x99, 0xc5, 0x37, 0x91, 0x09, 0x23, 0x90, 0x43, 0x00, 0x8c, 0xc1, 0x64,
- 0x97, 0x44, 0xe1, 0xd7, 0x41, 0x64, 0xbf, 0x45, 0x02, 0x13, 0xc1, 0x64,
- 0xcb, 0x47, 0xc1, 0x48, 0x41, 0x64, 0xf3, 0x45, 0x1a, 0x21, 0xc1, 0x65,
- 0x03, 0x43, 0x4c, 0xf2, 0xc1, 0x65, 0x28, 0x54, 0x3e, 0xa9, 0x41, 0x65,
- 0x50, 0x44, 0x0c, 0x5b, 0xc1, 0x65, 0x5c, 0x44, 0x06, 0x9e, 0x41, 0x65,
- 0x80, 0x43, 0x00, 0x8c, 0xc1, 0x65, 0xaf, 0x50, 0x5a, 0x42, 0x41, 0x65,
- 0xd5, 0x43, 0x01, 0x47, 0xc1, 0x65, 0xe1, 0x45, 0x04, 0x5e, 0x41, 0x66,
- 0x06, 0x42, 0x01, 0x64, 0xc1, 0x66, 0x2b, 0xd1, 0x4f, 0x1f, 0x01, 0x1d,
- 0x50, 0xc8, 0xba, 0x45, 0x0f, 0xa5, 0x89, 0xc4, 0x00, 0xcb, 0x00, 0x05,
- 0x20, 0xc8, 0x81, 0x29, 0x07, 0xf2, 0x51, 0xc8, 0x80, 0x80, 0x07, 0xf2,
- 0x70, 0x06, 0xc1, 0x66, 0x37, 0x04, 0xc1, 0x66, 0x3f, 0xc3, 0x8f, 0x12,
- 0x0f, 0x02, 0xa3, 0x01, 0x66, 0x49, 0xc4, 0xc7, 0x6b, 0x0f, 0x03, 0x31,
- 0xc2, 0x00, 0xa4, 0x0f, 0x03, 0x0b, 0x01, 0x66, 0x4f, 0xc3, 0x02, 0x28,
- 0x0f, 0x03, 0x21, 0xc3, 0x26, 0x9b, 0x0f, 0x03, 0x19, 0xc3, 0x09, 0x66,
- 0x0f, 0x03, 0x11, 0x07, 0xc1, 0x66, 0x55, 0x97, 0x0f, 0x02, 0xf9, 0xc2,
- 0x00, 0x27, 0x0f, 0x02, 0xe9, 0x91, 0x0f, 0x02, 0xe1, 0xc2, 0x04, 0x2b,
- 0x0f, 0x02, 0xd1, 0x8b, 0x0f, 0x02, 0xcb, 0x01, 0x66, 0x61, 0x1c, 0xc1,
- 0x66, 0x65, 0xc2, 0x00, 0x7b, 0x0f, 0x02, 0x99, 0x83, 0x0f, 0x02, 0x88,
- 0x46, 0x02, 0x91, 0xc1, 0x66, 0x6f, 0x48, 0x19, 0x70, 0x41, 0x66, 0xec,
- 0x87, 0x00, 0x21, 0x6b, 0x01, 0x66, 0xfe, 0x06, 0xc1, 0x67, 0x2b, 0x15,
- 0xc1, 0x67, 0x4e, 0x12, 0xc1, 0x67, 0x70, 0x83, 0x00, 0x20, 0x83, 0x01,
- 0x67, 0x7d, 0xc2, 0x00, 0x4b, 0x00, 0x28, 0xe1, 0xc2, 0x0f, 0x60, 0x00,
- 0x28, 0xd1, 0x1b, 0xc1, 0x67, 0x8f, 0x14, 0xc1, 0x67, 0xab, 0x0e, 0xc1,
- 0x67, 0xbd, 0x0d, 0xc1, 0x67, 0xcf, 0x0a, 0xc1, 0x67, 0xec, 0x09, 0xc1,
- 0x67, 0xf9, 0x05, 0xc1, 0x68, 0x08, 0x97, 0x00, 0x21, 0x1b, 0x01, 0x68,
- 0x23, 0x04, 0xc1, 0x68, 0x30, 0x91, 0x00, 0x20, 0xf3, 0x01, 0x68, 0x4e,
- 0x8b, 0x00, 0x20, 0xc3, 0x01, 0x68, 0x61, 0x1c, 0xc1, 0x68, 0x7e, 0x16,
- 0xc1, 0x68, 0x89, 0xc2, 0x1b, 0xa5, 0x00, 0x20, 0x41, 0x10, 0xc1, 0x68,
- 0xa0, 0xc2, 0x00, 0x7b, 0x00, 0x20, 0x91, 0x44, 0x13, 0x8e, 0xc1, 0x68,
- 0xac, 0xc4, 0xe1, 0x87, 0x00, 0x23, 0x90, 0xc4, 0xe3, 0xaf, 0x00, 0x26,
- 0xa1, 0xc6, 0xd2, 0xf1, 0x00, 0x25, 0xa1, 0xc6, 0xd3, 0x57, 0x00, 0x25,
- 0x20, 0x9f, 0x09, 0x7f, 0x91, 0x9e, 0x09, 0x7f, 0x88, 0x1e, 0xc1, 0x68,
- 0xb8, 0x1d, 0x41, 0x68, 0xc4, 0x26, 0xc1, 0x68, 0xe8, 0x25, 0xc1, 0x69,
- 0x0c, 0x24, 0xc1, 0x69, 0x34, 0x23, 0xc1, 0x69, 0x5b, 0x22, 0xc1, 0x69,
- 0x7f, 0x21, 0xc1, 0x69, 0xa3, 0x20, 0xc1, 0x69, 0xbb, 0x1f, 0xc1, 0x69,
- 0xdb, 0x1e, 0xc1, 0x69, 0xfb, 0x1d, 0x41, 0x6a, 0x1a, 0x87, 0x08, 0x41,
- 0x99, 0x8b, 0x08, 0x41, 0xa1, 0x91, 0x08, 0x41, 0xa9, 0x83, 0x08, 0x41,
- 0x90, 0x83, 0x08, 0x41, 0xb9, 0x87, 0x08, 0x41, 0xc0, 0x83, 0x08, 0x41,
- 0xe1, 0x91, 0x08, 0x41, 0xf8, 0x83, 0x08, 0x40, 0x29, 0x91, 0x08, 0x40,
- 0x40, 0x83, 0x08, 0x40, 0x51, 0x87, 0x08, 0x40, 0x59, 0x8b, 0x08, 0x40,
- 0x61, 0x91, 0x08, 0x40, 0x69, 0x97, 0x08, 0x40, 0x70, 0x83, 0x08, 0x40,
- 0x79, 0x87, 0x08, 0x40, 0x81, 0x8b, 0x08, 0x40, 0x89, 0x91, 0x08, 0x40,
- 0x91, 0x97, 0x08, 0x40, 0x98, 0x83, 0x08, 0x40, 0xa1, 0x87, 0x08, 0x40,
- 0xa9, 0x8b, 0x08, 0x40, 0xb1, 0x91, 0x08, 0x40, 0xb9, 0x97, 0x08, 0x40,
- 0xc0, 0x83, 0x08, 0x40, 0xc9, 0x87, 0x08, 0x40, 0xd1, 0x8b, 0x08, 0x40,
- 0xd9, 0x91, 0x08, 0x40, 0xe1, 0x97, 0x08, 0x40, 0xe8, 0x83, 0x08, 0x40,
- 0xf1, 0x87, 0x08, 0x40, 0xf9, 0x8b, 0x08, 0x41, 0x01, 0x91, 0x08, 0x41,
- 0x09, 0x97, 0x08, 0x41, 0x10, 0x83, 0x08, 0x41, 0x19, 0x87, 0x08, 0x41,
- 0x21, 0x8b, 0x08, 0x41, 0x29, 0x91, 0x08, 0x41, 0x31, 0x97, 0x08, 0x41,
- 0x38, 0x83, 0x08, 0x41, 0x41, 0x87, 0x08, 0x41, 0x49, 0x8b, 0x08, 0x41,
- 0x51, 0x91, 0x08, 0x41, 0x59, 0x97, 0x08, 0x41, 0x60, 0x83, 0x08, 0x41,
- 0x69, 0x87, 0x08, 0x41, 0x71, 0x8b, 0x08, 0x41, 0x79, 0x91, 0x08, 0x41,
- 0x81, 0x97, 0x08, 0x41, 0x88, 0xc2, 0x01, 0x47, 0x0f, 0xdf, 0x91, 0xc4,
- 0x04, 0x5e, 0x0f, 0xdf, 0x98, 0xc3, 0x06, 0x9e, 0x0f, 0xdf, 0xa1, 0xc3,
- 0x0c, 0x5b, 0x0f, 0xdf, 0xa8, 0xc2, 0x26, 0x51, 0x0f, 0xdf, 0xb1, 0xc4,
- 0x18, 0x83, 0x0f, 0xdf, 0xb8, 0xa0, 0x00, 0x04, 0x79, 0x9f, 0x00, 0x04,
- 0x70, 0x47, 0xc2, 0xf3, 0xc1, 0x6a, 0x3a, 0x43, 0x01, 0xf7, 0xc1, 0x6a,
- 0x46, 0x0e, 0xc1, 0x6a, 0x4c, 0xde, 0x0f, 0xba, 0x01, 0x00, 0xd9, 0xd4,
- 0x3d, 0xe1, 0x00, 0x04, 0xd0, 0x47, 0x33, 0xef, 0xc1, 0x6a, 0x56, 0x46,
- 0x06, 0x97, 0x41, 0x6a, 0x74, 0xcb, 0x1e, 0x17, 0x00, 0x6c, 0x09, 0x03,
- 0xc1, 0x6a, 0x92, 0xc9, 0xac, 0x04, 0x00, 0x6c, 0x18, 0x46, 0x02, 0x91,
- 0xc1, 0x6a, 0x9e, 0x4a, 0x9e, 0x06, 0x41, 0x6a, 0xec, 0xca, 0x63, 0xee,
- 0x00, 0x6e, 0x79, 0x0d, 0xc1, 0x6b, 0x10, 0x45, 0x63, 0xe9, 0xc1, 0x6b,
- 0x1c, 0x42, 0x01, 0x29, 0x41, 0x6b, 0x3a, 0x47, 0x00, 0xb2, 0xc1, 0x6b,
- 0x46, 0x43, 0x45, 0x93, 0x41, 0x6b, 0x50, 0x0b, 0xc1, 0x6b, 0x62, 0xc8,
- 0x11, 0xdd, 0x0e, 0xd4, 0x41, 0x0e, 0xc1, 0x6b, 0x6e, 0x48, 0xbc, 0xb5,
- 0xc1, 0x6b, 0x7a, 0x5c, 0x12, 0x92, 0x41, 0x6b, 0x8c, 0x11, 0xc1, 0x6b,
- 0x9b, 0x46, 0x91, 0x29, 0x41, 0x6b, 0xa7, 0xc8, 0x4f, 0x30, 0x0e, 0xd4,
- 0x49, 0x48, 0x18, 0x9c, 0xc1, 0x6b, 0xb9, 0x47, 0xc9, 0xa6, 0xc1, 0x6b,
- 0xc5, 0x47, 0xc9, 0xec, 0xc1, 0x6b, 0xd5, 0x46, 0xcf, 0x8b, 0x41, 0x6b,
- 0xe1, 0x47, 0x7c, 0x5f, 0xc1, 0x6b, 0xf3, 0x0b, 0x41, 0x6b, 0xfb, 0xe0,
- 0x00, 0x27, 0x0e, 0xd3, 0xa8, 0x11, 0xc1, 0x6c, 0x05, 0x07, 0xc1, 0x6c,
- 0x17, 0x46, 0xce, 0xcb, 0x41, 0x6c, 0x26, 0xc9, 0xaf, 0xfd, 0x0e, 0xd3,
- 0x61, 0xc3, 0x17, 0x14, 0x0e, 0xd1, 0x81, 0x42, 0x05, 0xd0, 0x41, 0x6c,
- 0x32, 0x03, 0xc1, 0x6c, 0x4e, 0xc3, 0x01, 0xf1, 0x0e, 0xcf, 0xfa, 0x01,
- 0x6c, 0x5a, 0xc3, 0x6b, 0x57, 0x0e, 0xd3, 0x51, 0x44, 0x12, 0xaa, 0x41,
- 0x6c, 0x5e, 0x47, 0xc6, 0x49, 0xc1, 0x6c, 0x6e, 0x44, 0x1b, 0xc8, 0x41,
- 0x6c, 0x86, 0x45, 0x99, 0xd1, 0xc1, 0x6c, 0xba, 0x44, 0xdd, 0x82, 0x41,
- 0x6c, 0xc6, 0x44, 0xcf, 0x79, 0xc1, 0x6c, 0xd8, 0x44, 0x86, 0xa4, 0x41,
- 0x6c, 0xe4, 0x4f, 0x65, 0xf6, 0xc1, 0x6c, 0xf0, 0x47, 0xc5, 0x2a, 0x41,
- 0x6d, 0x02, 0xc7, 0x00, 0x48, 0x0e, 0xc8, 0x51, 0xc8, 0x39, 0x95, 0x0e,
- 0xc8, 0x49, 0xc6, 0x24, 0x18, 0x0e, 0xc8, 0x40, 0xca, 0x21, 0x3e, 0x01,
- 0x39, 0xb1, 0xd4, 0x39, 0xe5, 0x0f, 0xa9, 0x79, 0xcd, 0x0e, 0x9f, 0x0f,
- 0xbe, 0x68, 0x03, 0xc1, 0x6d, 0x2a, 0x91, 0x08, 0xad, 0xd1, 0x87, 0x08,
- 0xad, 0xc1, 0xc9, 0xac, 0xc1, 0x08, 0xad, 0xa3, 0x01, 0x6d, 0x3f, 0x97,
- 0x08, 0xad, 0x93, 0x01, 0x6d, 0x43, 0x8b, 0x08, 0xad, 0x82, 0x01, 0x6d,
- 0x47, 0x83, 0x08, 0xac, 0x03, 0x01, 0x6d, 0x4b, 0x16, 0xc1, 0x6d, 0x5d,
- 0xc2, 0x00, 0xa4, 0x08, 0xad, 0x71, 0x15, 0xc1, 0x6d, 0x72, 0x18, 0xc1,
- 0x6d, 0x82, 0xc2, 0x00, 0xc7, 0x08, 0xad, 0x49, 0xc2, 0x02, 0x59, 0x08,
- 0xad, 0x41, 0xc2, 0x1d, 0x5f, 0x08, 0xad, 0x39, 0xc2, 0x00, 0xad, 0x08,
- 0xad, 0x31, 0x04, 0xc1, 0x6d, 0x8c, 0x12, 0xc1, 0x6d, 0x96, 0x10, 0xc1,
- 0x6d, 0xa0, 0x06, 0xc1, 0x6d, 0xb6, 0x0c, 0xc1, 0x6d, 0xc4, 0x05, 0xc1,
- 0x6d, 0xce, 0x09, 0xc1, 0x6d, 0xd8, 0x0d, 0xc1, 0x6d, 0xe2, 0x91, 0x08,
- 0xac, 0x61, 0x87, 0x08, 0xac, 0x51, 0x97, 0x08, 0xac, 0x23, 0x01, 0x6d,
- 0xec, 0x8b, 0x08, 0xac, 0x12, 0x01, 0x6d, 0xf0, 0x07, 0xc1, 0x6d, 0xf4,
- 0x44, 0x00, 0xcc, 0x41, 0x6e, 0x00, 0xa0, 0x08, 0xae, 0x41, 0x9f, 0x08,
- 0xae, 0x39, 0x9e, 0x08, 0xae, 0x30, 0xcb, 0x95, 0x0d, 0x08, 0xae, 0x19,
- 0xc4, 0x1c, 0xd0, 0x08, 0xae, 0x10, 0xd3, 0x43, 0x04, 0x0f, 0xad, 0x09,
- 0xd1, 0x54, 0x2b, 0x0f, 0xad, 0x01, 0xd4, 0x08, 0x33, 0x0f, 0xac, 0xd9,
- 0xd3, 0x41, 0x16, 0x0f, 0xac, 0xd0, 0xd3, 0x43, 0x04, 0x0f, 0xac, 0xf9,
- 0xd1, 0x54, 0x2b, 0x0f, 0xac, 0xf1, 0xd4, 0x08, 0x33, 0x0f, 0xac, 0xc9,
- 0xd3, 0x41, 0x16, 0x0f, 0xac, 0xc0, 0x11, 0xc1, 0x6e, 0x1e, 0xcc, 0x89,
- 0xf8, 0x01, 0x31, 0x51, 0xc6, 0x0a, 0x62, 0x01, 0x12, 0xd9, 0x45, 0x00,
- 0x6c, 0x41, 0x6e, 0x2a, 0xc4, 0x26, 0xb2, 0x00, 0x00, 0x11, 0xc7, 0xc8,
- 0xbf, 0x00, 0x00, 0x09, 0x15, 0xc1, 0x6e, 0x36, 0xce, 0x71, 0x35, 0x00,
- 0x04, 0xb1, 0xcc, 0x8d, 0x04, 0x00, 0x04, 0xb0, 0xc4, 0x1f, 0x5c, 0x01,
- 0x1f, 0x21, 0xc6, 0x18, 0xf8, 0x0f, 0xa6, 0x78, 0xcb, 0x98, 0x88, 0x0f,
- 0xde, 0x31, 0xc5, 0x22, 0xcf, 0x0f, 0xde, 0x48, 0xc4, 0x01, 0x1e, 0x0f,
- 0xde, 0x39, 0xc5, 0x01, 0xf7, 0x0f, 0xde, 0x40, 0xcb, 0x1e, 0x17, 0x05,
- 0x46, 0x29, 0x42, 0x03, 0x32, 0xc1, 0x6e, 0x42, 0xc8, 0x11, 0x40, 0x05,
- 0x44, 0x00, 0x03, 0xc1, 0x6e, 0x4e, 0x91, 0x05, 0x46, 0x0b, 0x01, 0x6e,
- 0x5a, 0x87, 0x05, 0x45, 0xf3, 0x01, 0x6e, 0x5e, 0x48, 0xac, 0xc1, 0xc1,
- 0x6e, 0x62, 0x8b, 0x05, 0x45, 0xb3, 0x01, 0x6e, 0x70, 0x97, 0x05, 0x45,
- 0xc2, 0x01, 0x6e, 0x74, 0x15, 0xc1, 0x6e, 0x78, 0xc2, 0x00, 0xa4, 0x05,
- 0x45, 0x91, 0x0e, 0xc1, 0x6e, 0x88, 0x83, 0x05, 0x44, 0x13, 0x01, 0x6e,
- 0x92, 0x8b, 0x05, 0x44, 0x23, 0x01, 0x6e, 0x9e, 0x97, 0x05, 0x44, 0x33,
- 0x01, 0x6e, 0xa2, 0x18, 0xc1, 0x6e, 0xa6, 0x87, 0x05, 0x44, 0x63, 0x01,
- 0x6e, 0xb0, 0x91, 0x05, 0x44, 0x7b, 0x01, 0x6e, 0xb4, 0x0d, 0xc1, 0x6e,
- 0xb8, 0x09, 0xc1, 0x6e, 0xc2, 0x10, 0xc1, 0x6e, 0xcc, 0x05, 0xc1, 0x6e,
- 0xe2, 0x0c, 0xc1, 0x6e, 0xec, 0x16, 0xc1, 0x6e, 0xf6, 0x06, 0xc1, 0x6f,
- 0x04, 0x12, 0xc1, 0x6f, 0x12, 0x04, 0xc1, 0x6f, 0x1c, 0xc2, 0x00, 0xad,
- 0x05, 0x45, 0x51, 0xc2, 0x1d, 0x5f, 0x05, 0x45, 0x59, 0xc2, 0x02, 0x59,
- 0x05, 0x45, 0x60, 0xc4, 0x1c, 0xd0, 0x05, 0x46, 0x71, 0xcb, 0x95, 0x0d,
- 0x05, 0x46, 0x79, 0x45, 0x06, 0x98, 0x41, 0x6f, 0x26, 0x47, 0x06, 0xf1,
- 0xc1, 0x6f, 0x4a, 0x48, 0xbc, 0xd5, 0x41, 0x6f, 0x56, 0x10, 0xc1, 0x6f,
- 0x5c, 0xc6, 0xce, 0x3b, 0x00, 0x41, 0xe1, 0xc5, 0xd5, 0xf7, 0x00, 0x41,
- 0xa1, 0xc5, 0xd8, 0x4f, 0x00, 0x41, 0x88, 0xcb, 0x92, 0x42, 0x00, 0x41,
- 0xe9, 0xc9, 0xab, 0xce, 0x00, 0x41, 0xa8, 0xc3, 0x35, 0x88, 0x00, 0x41,
- 0xd1, 0xc4, 0xe2, 0xab, 0x00, 0x41, 0xc0, 0xc7, 0xc3, 0x39, 0x00, 0x41,
- 0x69, 0xce, 0x6d, 0xc3, 0x00, 0x40, 0xd9, 0xc6, 0x64, 0xbb, 0x00, 0x40,
- 0xc9, 0xc9, 0xae, 0x0e, 0x00, 0x40, 0xc1, 0xc2, 0x00, 0x34, 0x00, 0x40,
- 0xb2, 0x01, 0x6f, 0x68, 0x8b, 0x00, 0x41, 0x41, 0xc7, 0xc4, 0x12, 0x00,
- 0x41, 0x21, 0xce, 0x6d, 0xc3, 0x00, 0x40, 0xd0, 0xc4, 0xda, 0x94, 0x00,
- 0x41, 0x61, 0xc6, 0xc4, 0x13, 0x00, 0x41, 0x28, 0xc9, 0xad, 0x48, 0x00,
- 0x41, 0x0a, 0x01, 0x6f, 0x6e, 0x8b, 0x00, 0x41, 0x49, 0x97, 0x00, 0x41,
- 0x31, 0x83, 0x00, 0x41, 0x13, 0x01, 0x6f, 0x72, 0x87, 0x00, 0x40, 0xe0,
- 0x83, 0x00, 0x41, 0x00, 0xc3, 0xbb, 0xff, 0x00, 0x40, 0xa9, 0xc6, 0xcc,
- 0xeb, 0x00, 0x40, 0x89, 0xc2, 0x00, 0x6d, 0x00, 0x40, 0x40, 0xc3, 0x00,
- 0xa4, 0x00, 0x40, 0xa1, 0xc6, 0xd2, 0x49, 0x00, 0x40, 0x70, 0x90, 0x00,
- 0x40, 0x79, 0x96, 0x00, 0x40, 0x39, 0x9b, 0x00, 0x40, 0x20, 0xc2, 0x14,
- 0x40, 0x00, 0x40, 0x29, 0xc2, 0x00, 0x6d, 0x00, 0x40, 0x08, 0xc3, 0x00,
- 0x57, 0x01, 0x52, 0xc1, 0xc2, 0x00, 0xbf, 0x01, 0x52, 0xb8, 0xc6, 0x00,
- 0x71, 0x0f, 0xa5, 0x21, 0xc4, 0x00, 0x67, 0x0f, 0xb1, 0xa1, 0xcd, 0x7b,
- 0x4d, 0x0f, 0xb6, 0x60, 0xc9, 0x0a, 0x4a, 0x01, 0x54, 0xab, 0x01, 0x6f,
- 0x76, 0xcc, 0x00, 0x9b, 0x01, 0x54, 0xb2, 0x01, 0x6f, 0x7c, 0xc9, 0xac,
- 0x94, 0x01, 0x5a, 0xd1, 0xcd, 0x7e, 0x18, 0x01, 0x5a, 0xe0, 0x15, 0xc1,
- 0x6f, 0x82, 0xd1, 0x52, 0x4f, 0x08, 0x8e, 0xe9, 0xca, 0x1d, 0x8f, 0x08,
- 0x8e, 0xe1, 0x07, 0xc1, 0x6f, 0x98, 0x06, 0xc1, 0x6f, 0xa4, 0x46, 0x34,
- 0xbb, 0xc1, 0x6f, 0xb6, 0xd1, 0x50, 0x40, 0x08, 0x8e, 0x39, 0xc2, 0x00,
- 0x3a, 0x08, 0x8e, 0x21, 0x47, 0x02, 0x90, 0x41, 0x6f, 0xc2, 0xc4, 0xe5,
- 0xc7, 0x08, 0x22, 0x81, 0x16, 0xc1, 0x70, 0x27, 0xc4, 0xe2, 0x67, 0x08,
- 0x22, 0x91, 0xc3, 0x19, 0xf0, 0x08, 0x22, 0x99, 0x15, 0xc1, 0x70, 0x31,
- 0xc6, 0xcf, 0xf1, 0x08, 0x22, 0xb9, 0x42, 0x05, 0xd0, 0xc1, 0x70, 0x3b,
- 0x0a, 0xc1, 0x70, 0x43, 0xc3, 0xe6, 0xeb, 0x08, 0x22, 0xd1, 0xc4, 0xe4,
- 0x47, 0x08, 0x22, 0xd9, 0xc3, 0xa2, 0x5c, 0x08, 0x22, 0xe1, 0xc3, 0x34,
- 0xbb, 0x08, 0x22, 0xe9, 0xc3, 0xe6, 0x91, 0x08, 0x22, 0xf9, 0x0f, 0xc1,
- 0x70, 0x4f, 0xc5, 0xdf, 0xb6, 0x08, 0x23, 0x09, 0x42, 0x01, 0x47, 0xc1,
- 0x70, 0x5b, 0xc4, 0xe2, 0x87, 0x08, 0x23, 0x21, 0x0b, 0xc1, 0x70, 0x65,
- 0x07, 0xc1, 0x70, 0x75, 0x03, 0xc1, 0x70, 0x85, 0x11, 0xc1, 0x70, 0xab,
- 0xc4, 0xe0, 0x5f, 0x08, 0x23, 0x71, 0xc3, 0x1f, 0xd8, 0x08, 0x23, 0x79,
- 0xc2, 0x01, 0xc7, 0x08, 0x23, 0x98, 0xc7, 0xc9, 0x4b, 0x0d, 0xe5, 0x19,
- 0xc9, 0xb2, 0x4f, 0x0d, 0xe5, 0x11, 0xd2, 0x48, 0x60, 0x0d, 0xe5, 0x09,
- 0xce, 0x6f, 0xbb, 0x0d, 0xe5, 0x00, 0x46, 0x01, 0xf7, 0xc1, 0x70, 0xcb,
- 0xc9, 0xb1, 0x65, 0x01, 0x56, 0xf1, 0xc9, 0x36, 0xa6, 0x01, 0x56, 0xfb,
- 0x01, 0x70, 0xd1, 0xc7, 0xc9, 0x3d, 0x01, 0x57, 0x03, 0x01, 0x70, 0xd7,
- 0xd3, 0x43, 0x63, 0x01, 0x5a, 0x71, 0x04, 0x41, 0x70, 0xdb, 0x91, 0x01,
- 0x09, 0xa1, 0x87, 0x01, 0x09, 0x79, 0x8e, 0x01, 0x08, 0x99, 0x89, 0x01,
- 0x08, 0x50, 0x8f, 0x01, 0x09, 0x99, 0x88, 0x01, 0x09, 0x89, 0x87, 0x01,
- 0x09, 0x81, 0x84, 0x01, 0x09, 0x61, 0x94, 0x01, 0x08, 0xd9, 0x92, 0x01,
- 0x08, 0xc1, 0x8e, 0x01, 0x08, 0x91, 0x8b, 0x01, 0x08, 0x81, 0x8a, 0x01,
- 0x08, 0x58, 0xd0, 0x5c, 0xe2, 0x0f, 0xc2, 0xb9, 0xcc, 0x8c, 0x20, 0x01,
- 0x0e, 0xc9, 0xc5, 0x01, 0x0f, 0x01, 0x0c, 0xcb, 0x01, 0x70, 0xe7, 0x49,
- 0x01, 0x8a, 0xc1, 0x70, 0xeb, 0xcb, 0x04, 0xfc, 0x01, 0x58, 0x19, 0xcb,
- 0x97, 0xe3, 0x01, 0x58, 0x59, 0xd5, 0x03, 0xb2, 0x01, 0x5b, 0x4a, 0x01,
- 0x70, 0xfd, 0xd0, 0x5c, 0xe2, 0x0f, 0xc2, 0xb1, 0xc5, 0x01, 0x0f, 0x01,
- 0x0c, 0xc3, 0x01, 0x71, 0x03, 0xcc, 0x8c, 0x20, 0x01, 0x0e, 0xc1, 0x49,
- 0x01, 0x8a, 0xc1, 0x71, 0x07, 0xcb, 0x04, 0xfc, 0x01, 0x58, 0x11, 0xcb,
- 0x97, 0xe3, 0x01, 0x58, 0x51, 0xd5, 0x03, 0xb2, 0x01, 0x5b, 0x42, 0x01,
- 0x71, 0x19, 0xc5, 0x8b, 0x13, 0x08, 0xd4, 0xf9, 0xcc, 0x8b, 0x0c, 0x08,
- 0xd4, 0xf0, 0xc7, 0x44, 0x79, 0x08, 0xd4, 0xb9, 0xc8, 0x11, 0x40, 0x08,
- 0xd4, 0xb1, 0xcb, 0x98, 0x9e, 0x08, 0xd4, 0x29, 0xcb, 0x91, 0x66, 0x08,
- 0xd4, 0x20, 0x8a, 0x08, 0xd4, 0x98, 0x89, 0x08, 0xd4, 0x60, 0x83, 0x08,
- 0xd4, 0x49, 0xc2, 0x00, 0xa4, 0x08, 0xd4, 0x40, 0xc3, 0x1a, 0x80, 0x08,
- 0xd4, 0x19, 0xc2, 0x00, 0xa4, 0x08, 0xd2, 0xe9, 0x83, 0x08, 0xd2, 0xe0,
- 0x83, 0x08, 0xd4, 0x09, 0xc2, 0x0c, 0x65, 0x08, 0xd4, 0x01, 0xc2, 0x00,
- 0xa4, 0x08, 0xd3, 0xf8, 0x83, 0x08, 0xd3, 0xc9, 0xc2, 0x00, 0xa4, 0x08,
- 0xd3, 0xc0, 0xc2, 0x02, 0xb4, 0x08, 0xd3, 0xb9, 0xc2, 0x00, 0xa4, 0x08,
- 0xd3, 0x71, 0x83, 0x08, 0xd3, 0x69, 0x06, 0x41, 0x71, 0x1f, 0x15, 0xc1,
- 0x71, 0x29, 0xc2, 0x00, 0xa4, 0x08, 0xd3, 0x61, 0x83, 0x08, 0xd3, 0x59,
- 0x16, 0x41, 0x71, 0x33, 0xc2, 0x00, 0xa4, 0x08, 0xd3, 0x99, 0x83, 0x08,
- 0xd3, 0x90, 0xc2, 0x00, 0xa4, 0x08, 0xd3, 0x89, 0x83, 0x08, 0xd3, 0x80,
- 0x83, 0x08, 0xd3, 0x79, 0xc2, 0x00, 0xc1, 0x08, 0xd3, 0x51, 0xc2, 0x1d,
- 0x5f, 0x08, 0xd3, 0x29, 0xc2, 0x01, 0x29, 0x08, 0xd3, 0x00, 0xc2, 0x00,
- 0xa4, 0x08, 0xd3, 0x21, 0x83, 0x08, 0xd3, 0x18, 0xc2, 0x00, 0xa4, 0x08,
- 0xd3, 0x11, 0x83, 0x08, 0xd3, 0x08, 0xc2, 0x00, 0xa4, 0x08, 0xd2, 0xf9,
- 0x83, 0x08, 0xd2, 0xf0, 0x48, 0xac, 0xc1, 0xc1, 0x71, 0x3d, 0x03, 0xc1,
- 0x71, 0x45, 0x91, 0x08, 0xd2, 0xab, 0x01, 0x71, 0x4d, 0x87, 0x08, 0xd2,
- 0xa1, 0x97, 0x08, 0xd2, 0x9b, 0x01, 0x71, 0x51, 0x8b, 0x08, 0xd2, 0x88,
- 0xc4, 0x18, 0x83, 0x08, 0x87, 0xb9, 0xc2, 0x26, 0x51, 0x08, 0x87, 0xb0,
- 0xc3, 0x0c, 0x5b, 0x08, 0x87, 0xa9, 0xc3, 0x06, 0x9e, 0x08, 0x87, 0xa0,
- 0xc4, 0x04, 0x5e, 0x08, 0x87, 0x99, 0xc2, 0x01, 0x47, 0x08, 0x87, 0x90,
- 0x87, 0x08, 0x87, 0x41, 0x8a, 0x08, 0x86, 0xb0, 0x8a, 0x08, 0x87, 0x39,
- 0xc2, 0x0d, 0xf7, 0x08, 0x87, 0x18, 0xc3, 0x46, 0x7e, 0x08, 0x87, 0x09,
- 0xc2, 0x02, 0x98, 0x08, 0x86, 0xc9, 0xc3, 0xac, 0xb2, 0x08, 0x86, 0xb8,
- 0xd1, 0x52, 0x0b, 0x08, 0x7a, 0xc1, 0xcd, 0x78, 0x9c, 0x08, 0x7a, 0xaa,
- 0x01, 0x71, 0x55, 0xc8, 0x0c, 0x4a, 0x08, 0x7a, 0xa0, 0xc5, 0x2a, 0x13,
- 0x08, 0x7a, 0x99, 0xc2, 0x00, 0x4d, 0x08, 0x7a, 0x90, 0xc5, 0x01, 0x62,
- 0x08, 0x7a, 0x69, 0xc5, 0x00, 0x95, 0x08, 0x7a, 0x60, 0xc5, 0x01, 0x62,
- 0x08, 0x7a, 0x59, 0xc5, 0x00, 0x95, 0x08, 0x7a, 0x50, 0xc5, 0x00, 0x95,
- 0x08, 0x7a, 0x49, 0xc5, 0x01, 0x62, 0x08, 0x7a, 0x38, 0xc5, 0x00, 0x95,
- 0x08, 0x7a, 0x41, 0xc5, 0x01, 0x62, 0x08, 0x7a, 0x30, 0xc3, 0x13, 0xfc,
- 0x08, 0x7a, 0x21, 0xc5, 0xd0, 0x5e, 0x08, 0x79, 0xc8, 0xc3, 0x0d, 0xd9,
- 0x08, 0x7a, 0x09, 0x03, 0x41, 0x71, 0x5b, 0xc3, 0x15, 0x1d, 0x08, 0x79,
- 0xe9, 0xc4, 0x37, 0x5c, 0x08, 0x79, 0x80, 0xc2, 0x00, 0x6e, 0x08, 0x79,
- 0xb0, 0x16, 0xc1, 0x71, 0x67, 0x08, 0xc1, 0x71, 0x79, 0x19, 0xc1, 0x71,
- 0x81, 0x0e, 0xc1, 0x71, 0x91, 0x11, 0xc1, 0x71, 0xa7, 0x0b, 0xc1, 0x71,
- 0xc0, 0x05, 0xc1, 0x71, 0xd4, 0x14, 0xc1, 0x71, 0xfa, 0x0a, 0xc1, 0x72,
- 0x15, 0x06, 0xc1, 0x72, 0x3d, 0x12, 0xc1, 0x72, 0x63, 0x07, 0xc1, 0x72,
- 0x9c, 0x03, 0xc1, 0x72, 0xb0, 0xc3, 0xe0, 0xeb, 0x01, 0x98, 0x31, 0x0d,
- 0xc1, 0x72, 0xd6, 0x09, 0xc1, 0x73, 0x37, 0x15, 0xc1, 0x73, 0x5c, 0x10,
- 0xc1, 0x73, 0x74, 0x04, 0xc1, 0x73, 0x95, 0x0f, 0xc1, 0x73, 0xb5, 0x1b,
- 0xc1, 0x74, 0x08, 0xc8, 0xbf, 0x95, 0x01, 0x9e, 0xf0, 0x0e, 0xc1, 0x74,
- 0x14, 0x15, 0xc1, 0x74, 0x1e, 0x0d, 0xc1, 0x74, 0x4e, 0xcc, 0x8a, 0x40,
- 0x01, 0x15, 0x09, 0x16, 0xc1, 0x74, 0x5a, 0x0f, 0xc1, 0x74, 0x6a, 0x12,
- 0xc1, 0x74, 0x74, 0x05, 0xc1, 0x74, 0x80, 0x18, 0xc1, 0x74, 0x90, 0x17,
- 0xc1, 0x74, 0x9a, 0x0a, 0xc1, 0x74, 0xa6, 0x11, 0xc1, 0x74, 0xba, 0x08,
- 0xc1, 0x74, 0xc4, 0xc7, 0xc3, 0xbe, 0x0f, 0x8c, 0xf9, 0x10, 0xc1, 0x74,
- 0xdc, 0xc2, 0x00, 0x51, 0x0f, 0x8c, 0xa1, 0xc8, 0x0b, 0x7f, 0x01, 0x4e,
- 0x31, 0xd5, 0x34, 0x63, 0x01, 0x4e, 0x21, 0xc2, 0x16, 0xaa, 0x0f, 0x8a,
- 0x78, 0xc9, 0xb3, 0xff, 0x01, 0x20, 0xd3, 0x01, 0x74, 0xe6, 0xc4, 0x40,
- 0xc6, 0x01, 0x21, 0x01, 0xcf, 0x63, 0x53, 0x01, 0x20, 0xb1, 0x45, 0x9b,
- 0xe5, 0xc1, 0x74, 0xec, 0x48, 0x45, 0x8a, 0xc1, 0x74, 0xf8, 0xcf, 0x6b,
- 0x96, 0x01, 0x0a, 0x78, 0x07, 0xc1, 0x75, 0x04, 0xcf, 0x61, 0x73, 0x01,
- 0x20, 0x80, 0x07, 0xc1, 0x75, 0x13, 0xc3, 0x11, 0xdd, 0x01, 0x20, 0x00,
- 0xcd, 0x7d, 0x96, 0x01, 0x20, 0xe1, 0xc8, 0xbc, 0xfd, 0x01, 0x20, 0x60,
- 0xc5, 0x61, 0x7d, 0x01, 0x20, 0xd9, 0x10, 0x41, 0x75, 0x1f, 0xc4, 0x21,
- 0x97, 0x01, 0x20, 0xc1, 0xcd, 0x80, 0xe3, 0x01, 0x20, 0x68, 0xc8, 0xc0,
- 0xed, 0x01, 0x20, 0x41, 0xc3, 0x0a, 0x93, 0x01, 0x20, 0x38, 0x0f, 0xc1,
- 0x75, 0x2b, 0xc2, 0x00, 0x27, 0x00, 0x39, 0x33, 0x01, 0x75, 0x37, 0x16,
- 0xc1, 0x75, 0x3d, 0x15, 0xc1, 0x75, 0x4c, 0x14, 0xc1, 0x75, 0x6a, 0xc4,
- 0xc2, 0xd1, 0x00, 0x39, 0x49, 0x87, 0x00, 0x39, 0x29, 0xcd, 0x79, 0x79,
- 0x00, 0x39, 0x21, 0xc3, 0x1f, 0xd8, 0x00, 0x39, 0x11, 0xc6, 0xd0, 0x45,
- 0x00, 0x39, 0x01, 0xc4, 0xe2, 0x57, 0x00, 0x38, 0xf9, 0xc4, 0xe0, 0x37,
- 0x00, 0x38, 0xeb, 0x01, 0x75, 0x76, 0xc2, 0x01, 0xf0, 0x00, 0x38, 0xbb,
- 0x01, 0x75, 0x7c, 0xc4, 0x69, 0x5c, 0x00, 0x38, 0xc9, 0xc3, 0x78, 0xa9,
- 0x00, 0x38, 0xc1, 0x06, 0xc1, 0x75, 0x82, 0xc5, 0xd7, 0x55, 0x00, 0x38,
- 0x9b, 0x01, 0x75, 0x8e, 0xc4, 0xe4, 0x8f, 0x00, 0x38, 0x91, 0xc5, 0x5f,
- 0x9d, 0x00, 0x38, 0x80, 0x44, 0x7f, 0x1c, 0xc1, 0x75, 0x94, 0x48, 0xba,
- 0x2d, 0xc1, 0x75, 0x9e, 0xcf, 0x60, 0xfb, 0x00, 0x38, 0x28, 0xc7, 0x0a,
- 0xab, 0x00, 0x39, 0xc9, 0xca, 0x00, 0xf6, 0x00, 0x39, 0xc0, 0x45, 0xdd,
- 0xf9, 0xc1, 0x75, 0xb0, 0xc4, 0xe0, 0xcf, 0x00, 0x39, 0xf9, 0xc7, 0xc4,
- 0x90, 0x00, 0x3a, 0x10, 0xc6, 0x1b, 0xf3, 0x00, 0x39, 0xa9, 0xc5, 0x01,
- 0x62, 0x00, 0x39, 0xa1, 0xc5, 0x00, 0x95, 0x00, 0x39, 0x98, 0xc6, 0x1b,
- 0xf3, 0x00, 0x39, 0x91, 0xc5, 0x01, 0x62, 0x00, 0x39, 0x89, 0xc5, 0x00,
- 0x95, 0x00, 0x39, 0x80, 0xc9, 0xad, 0xf3, 0x00, 0x38, 0x51, 0x4b, 0x93,
- 0x81, 0x41, 0x75, 0xbc, 0x48, 0xbc, 0x0d, 0xc1, 0x75, 0xc8, 0x4a, 0x9c,
- 0xd0, 0x41, 0x75, 0xd7, 0xcf, 0x69, 0xc5, 0x00, 0x38, 0x01, 0x45, 0x80,
- 0x2f, 0x41, 0x75, 0xe6, 0x51, 0x56, 0x6d, 0xc1, 0x75, 0xf2, 0x4a, 0x04,
- 0x5e, 0x41, 0x75, 0xfe, 0xc5, 0x00, 0x95, 0x00, 0x3a, 0x39, 0xc5, 0x01,
- 0x62, 0x00, 0x3a, 0x40, 0x91, 0x05, 0x40, 0x39, 0xc2, 0x00, 0xf6, 0x05,
- 0x40, 0x40, 0x91, 0x05, 0x40, 0x49, 0xc2, 0x00, 0xf6, 0x05, 0x40, 0x50,
- 0x91, 0x05, 0x40, 0x61, 0xc2, 0x00, 0xf6, 0x05, 0x40, 0x68, 0x16, 0xc1,
- 0x76, 0x0a, 0x91, 0x05, 0x40, 0xa1, 0xc2, 0x00, 0xf6, 0x05, 0x40, 0xa8,
- 0x06, 0xc1, 0x76, 0x14, 0x91, 0x05, 0x40, 0xb1, 0xc2, 0x00, 0xf6, 0x05,
- 0x40, 0xb8, 0x91, 0x05, 0x40, 0x71, 0xc2, 0x00, 0xf6, 0x05, 0x40, 0x78,
- 0x91, 0x05, 0x40, 0xc9, 0xc2, 0x00, 0xf6, 0x05, 0x40, 0xd0, 0x91, 0x05,
- 0x40, 0xd9, 0xc2, 0x00, 0xf6, 0x05, 0x40, 0xe0, 0x91, 0x05, 0x40, 0xf1,
- 0xc2, 0x00, 0x39, 0x05, 0x41, 0x00, 0xc7, 0x11, 0x41, 0x05, 0x40, 0x59,
- 0xd0, 0x5d, 0x42, 0x05, 0x41, 0x60, 0x46, 0x00, 0x6b, 0x41, 0x76, 0x1e,
- 0x95, 0x01, 0x39, 0x40, 0xd1, 0x57, 0x6c, 0x01, 0x3e, 0x49, 0xc2, 0x00,
- 0xf3, 0x01, 0x14, 0x1b, 0x01, 0x76, 0x30, 0x46, 0x00, 0x95, 0xc1, 0x76,
- 0x34, 0x45, 0x00, 0x6c, 0xc1, 0x76, 0x40, 0x47, 0x13, 0x72, 0x41, 0x76,
- 0x52, 0x0e, 0xc1, 0x76, 0x5e, 0xd1, 0x1a, 0x39, 0x01, 0x03, 0xf1, 0x07,
- 0xc1, 0x76, 0x6a, 0xc5, 0x1b, 0x38, 0x01, 0x03, 0xd9, 0xc9, 0x68, 0x21,
- 0x01, 0x03, 0xd1, 0xc4, 0x22, 0x71, 0x01, 0x03, 0xc9, 0x15, 0xc1, 0x76,
- 0x76, 0x08, 0xc1, 0x76, 0x82, 0xc4, 0x15, 0xd3, 0x01, 0x03, 0x81, 0x16,
- 0xc1, 0x76, 0x8e, 0xc3, 0x01, 0xb4, 0x00, 0x05, 0xc8, 0xca, 0x9c, 0x58,
- 0x00, 0xe6, 0x39, 0xca, 0x9c, 0xee, 0x00, 0xe6, 0x31, 0xca, 0x9d, 0xac,
- 0x00, 0xe6, 0x29, 0xcb, 0x92, 0xdc, 0x00, 0xe6, 0x21, 0xc5, 0xd6, 0xc9,
- 0x00, 0xe6, 0x19, 0x12, 0xc1, 0x76, 0x9a, 0xc5, 0xdf, 0x66, 0x00, 0xe6,
- 0x00, 0x08, 0xc1, 0x76, 0xa6, 0x83, 0x00, 0xdc, 0x1b, 0x01, 0x76, 0xb0,
- 0x04, 0xc1, 0x76, 0xba, 0x0e, 0xc1, 0x76, 0xc4, 0x14, 0xc1, 0x76, 0xce,
- 0x15, 0xc1, 0x76, 0xd8, 0x0d, 0xc1, 0x76, 0xe2, 0xc2, 0x00, 0xa4, 0x00,
- 0xdd, 0x01, 0xc2, 0x96, 0xd0, 0x00, 0xdc, 0xf9, 0xc2, 0x01, 0x09, 0x00,
- 0xdc, 0xe9, 0xc2, 0x1d, 0x5f, 0x00, 0xdc, 0xd1, 0xc2, 0x00, 0xad, 0x00,
- 0xdc, 0xc9, 0xc2, 0x04, 0x41, 0x00, 0xdc, 0xb9, 0xc2, 0x03, 0xa4, 0x00,
- 0xdc, 0xa9, 0x10, 0xc1, 0x76, 0xec, 0xc2, 0x0b, 0xc6, 0x00, 0xdc, 0x99,
- 0xc2, 0x00, 0xb3, 0x00, 0xdc, 0x91, 0xc2, 0x02, 0xb4, 0x00, 0xdc, 0x81,
- 0xc2, 0x24, 0x58, 0x00, 0xdc, 0x79, 0xc2, 0x03, 0x40, 0x00, 0xdc, 0x71,
- 0xc2, 0x01, 0x29, 0x00, 0xdc, 0x61, 0xc2, 0x0f, 0x60, 0x00, 0xdc, 0x59,
- 0x87, 0x00, 0xdc, 0x43, 0x01, 0x76, 0xfc, 0x91, 0x00, 0xdc, 0x39, 0x97,
- 0x00, 0xdc, 0x29, 0x8b, 0x00, 0xdc, 0x20, 0xc4, 0x22, 0x71, 0x00, 0xdd,
- 0xc9, 0xc5, 0x01, 0xdb, 0x00, 0xdd, 0xc1, 0x15, 0xc1, 0x77, 0x00, 0x08,
- 0xc1, 0x77, 0x0c, 0x16, 0xc1, 0x77, 0x18, 0xc3, 0x01, 0xb4, 0x00, 0xdd,
- 0x89, 0xc4, 0x15, 0xd3, 0x00, 0xdd, 0x80, 0x47, 0xc8, 0xd4, 0xc1, 0x77,
- 0x24, 0x42, 0x15, 0x1c, 0xc1, 0x77, 0x30, 0xc7, 0xc1, 0x25, 0x00, 0xdd,
- 0x08, 0xc6, 0x1e, 0x23, 0x00, 0xdd, 0x59, 0x42, 0x03, 0xa4, 0x41, 0x77,
- 0x3c, 0x10, 0xc1, 0x77, 0x46, 0xc5, 0xdb, 0xd8, 0x00, 0xdd, 0x40, 0xca,
- 0x37, 0x0e, 0x01, 0x13, 0xf9, 0xc5, 0x07, 0x62, 0x01, 0x13, 0xe8, 0x4c,
- 0x24, 0x18, 0xc1, 0x77, 0x64, 0xcb, 0x0e, 0x83, 0x01, 0x55, 0xa1, 0x44,
- 0x1e, 0x2d, 0xc1, 0x77, 0x70, 0xcf, 0x69, 0x89, 0x01, 0x55, 0xc0, 0x00,
- 0x41, 0x77, 0x7c, 0xd0, 0x01, 0xf7, 0x01, 0x4b, 0xc9, 0x42, 0x08, 0x22,
- 0x41, 0x77, 0x91, 0xc3, 0x01, 0x4a, 0x01, 0x55, 0xe9, 0xcf, 0x68, 0x21,
- 0x01, 0x55, 0xf9, 0xd9, 0x1e, 0xa6, 0x01, 0x56, 0x08, 0xca, 0x0e, 0x84,
- 0x01, 0x04, 0x61, 0xc4, 0x01, 0xdc, 0x01, 0x04, 0x40, 0xc4, 0x18, 0x83,
- 0x01, 0x04, 0x39, 0xc2, 0x26, 0x51, 0x01, 0x04, 0x30, 0xc3, 0x0c, 0x5b,
- 0x01, 0x04, 0x29, 0xc3, 0x06, 0x9e, 0x01, 0x04, 0x20, 0xc4, 0x04, 0x5e,
- 0x01, 0x04, 0x19, 0xc2, 0x01, 0x47, 0x01, 0x04, 0x10, 0x4a, 0x00, 0x67,
- 0xc1, 0x77, 0x9d, 0x4e, 0x19, 0xb7, 0x41, 0x77, 0xb4, 0x42, 0x00, 0x79,
- 0xc1, 0x77, 0xc0, 0x07, 0xc1, 0x77, 0xd2, 0x14, 0xc1, 0x77, 0xed, 0x16,
- 0xc1, 0x77, 0xff, 0xcc, 0x86, 0x98, 0x0f, 0xa9, 0xc9, 0xce, 0x6f, 0x2f,
- 0x0f, 0xa9, 0xc1, 0xd1, 0x50, 0x84, 0x01, 0x53, 0x09, 0x03, 0xc1, 0x78,
- 0x0b, 0xd1, 0x50, 0xb7, 0x07, 0xf2, 0x89, 0xc9, 0x11, 0xdc, 0x07, 0xf2,
- 0x91, 0xc9, 0xac, 0xa6, 0x07, 0xf2, 0xa1, 0xcd, 0x2c, 0x41, 0x07, 0xf2,
- 0xb1, 0x42, 0x00, 0x54, 0xc1, 0x78, 0x1d, 0xcb, 0x90, 0xed, 0x07, 0xf2,
- 0xf9, 0x12, 0xc1, 0x78, 0x29, 0xcc, 0x89, 0x2c, 0x07, 0xf3, 0x19, 0xd1,
- 0x51, 0x3f, 0x07, 0xf3, 0x29, 0xcb, 0x97, 0xee, 0x07, 0xf3, 0x48, 0xcc,
- 0x23, 0x34, 0x01, 0x55, 0x60, 0x02, 0xc1, 0x78, 0x35, 0x00, 0x41, 0x78,
- 0x3d, 0xce, 0x4f, 0xdd, 0x01, 0x1c, 0xc9, 0xc2, 0x00, 0x28, 0x0f, 0xad,
- 0x42, 0x01, 0x78, 0x49, 0xc2, 0x05, 0x88, 0x0f, 0xa3, 0xc0, 0xc5, 0x07,
- 0x62, 0x01, 0x10, 0xe8, 0xd5, 0x37, 0x03, 0x01, 0x17, 0x41, 0xce, 0x70,
- 0xa9, 0x01, 0x15, 0x81, 0x46, 0x23, 0x35, 0xc1, 0x78, 0x4f, 0x46, 0x00,
- 0x95, 0x41, 0x78, 0x5b, 0x42, 0x00, 0x79, 0xc1, 0x78, 0x73, 0xc9, 0xac,
- 0xa6, 0x07, 0xf0, 0xa1, 0x07, 0xc1, 0x78, 0x7f, 0xcd, 0x2c, 0x41, 0x07,
- 0xf0, 0xb1, 0xd3, 0x24, 0x05, 0x07, 0xf0, 0xc9, 0xce, 0x70, 0xe1, 0x07,
- 0xf1, 0x81, 0xcd, 0x80, 0x7b, 0x07, 0xf1, 0xa1, 0x0e, 0xc1, 0x78, 0x91,
- 0x46, 0x02, 0x12, 0xc1, 0x78, 0x9d, 0x4c, 0x1b, 0x57, 0x41, 0x78, 0xcb,
- 0xcd, 0x7a, 0xb1, 0x01, 0x18, 0xc1, 0xc7, 0xc3, 0x6a, 0x0f, 0xb6, 0x80,
- 0x04, 0xc1, 0x78, 0xd7, 0x47, 0x6e, 0x26, 0xc1, 0x78, 0xe3, 0x16, 0xc1,
- 0x78, 0xfb, 0x08, 0xc1, 0x79, 0x13, 0x15, 0xc1, 0x79, 0x1d, 0x49, 0xb3,
- 0x6f, 0xc1, 0x79, 0x29, 0x48, 0xb9, 0xf5, 0xc1, 0x79, 0x41, 0x48, 0xba,
- 0xfd, 0xc1, 0x79, 0x59, 0x0d, 0xc1, 0x79, 0x71, 0x49, 0xb5, 0xaf, 0xc1,
- 0x79, 0x7d, 0xc9, 0xb4, 0xce, 0x0f, 0x85, 0xf9, 0xcb, 0x99, 0xf3, 0x0f,
- 0x86, 0xf8, 0x16, 0xc1, 0x79, 0x95, 0x08, 0x41, 0x79, 0xa1, 0x00, 0x41,
- 0x79, 0xad, 0x46, 0x0b, 0x31, 0xc1, 0x79, 0xbf, 0xc9, 0xb4, 0xd7, 0x0f,
- 0xa6, 0x20, 0x00, 0xc1, 0x79, 0xcb, 0xd8, 0x25, 0x38, 0x01, 0x33, 0xe8,
- 0x4d, 0x26, 0xea, 0xc1, 0x79, 0xd7, 0x4f, 0x07, 0x17, 0x41, 0x7a, 0x3f,
- 0x16, 0xc1, 0x7a, 0xa7, 0xc8, 0x4c, 0xe0, 0x01, 0x24, 0x31, 0x07, 0xc1,
- 0x7a, 0xb9, 0x15, 0xc1, 0x7a, 0xc5, 0x08, 0x41, 0x7a, 0xd1, 0xc4, 0x22,
- 0x71, 0x01, 0x23, 0xe1, 0xc5, 0x01, 0xdb, 0x01, 0x23, 0xd9, 0x15, 0xc1,
- 0x7a, 0xdd, 0x08, 0xc1, 0x7a, 0xe9, 0x16, 0xc1, 0x7a, 0xf5, 0xc3, 0x01,
- 0xb4, 0x01, 0x23, 0xa0, 0x0d, 0xc1, 0x7b, 0x01, 0xc5, 0xd9, 0x80, 0x01,
- 0x90, 0x0b, 0x01, 0x7b, 0x13, 0x16, 0xc1, 0x7b, 0x19, 0xc5, 0xd6, 0x3d,
- 0x01, 0x90, 0x1b, 0x01, 0x7b, 0x2b, 0xc5, 0xdb, 0x51, 0x01, 0x90, 0x23,
- 0x01, 0x7b, 0x31, 0x12, 0xc1, 0x7b, 0x37, 0xc4, 0xac, 0xd8, 0x01, 0x90,
- 0x33, 0x01, 0x7b, 0x49, 0xc5, 0xbb, 0xa0, 0x01, 0x90, 0x3b, 0x01, 0x7b,
- 0x4f, 0x05, 0xc1, 0x7b, 0x55, 0xc5, 0x98, 0x41, 0x01, 0x90, 0x6a, 0x01,
- 0x7b, 0x67, 0xc4, 0xe2, 0xbb, 0x01, 0x90, 0xe9, 0xc3, 0x0c, 0x4a, 0x01,
- 0x90, 0xf0, 0xc3, 0x01, 0xb4, 0x01, 0x91, 0x01, 0x16, 0xc1, 0x7b, 0x6d,
- 0x08, 0xc1, 0x7b, 0x7f, 0x15, 0xc1, 0x7b, 0x8f, 0x07, 0xc1, 0x7b, 0xad,
- 0x10, 0xc1, 0x7b, 0xbf, 0x0f, 0xc1, 0x7b, 0xcb, 0x19, 0xc1, 0x7b, 0xd7,
- 0xc4, 0xe1, 0x2b, 0x01, 0x91, 0x91, 0x05, 0xc1, 0x7b, 0xe3, 0xc5, 0xda,
- 0xac, 0x01, 0x91, 0xc1, 0x42, 0x00, 0x9c, 0xc1, 0x7b, 0xef, 0xc8, 0xb9,
- 0x6d, 0x01, 0x91, 0xf8, 0xc2, 0x02, 0x18, 0x01, 0x11, 0x29, 0x45, 0x00,
- 0x6c, 0x41, 0x7b, 0xff, 0xca, 0x19, 0xf4, 0x01, 0x01, 0x49, 0xc2, 0x07,
- 0x43, 0x01, 0x70, 0x79, 0xc7, 0x62, 0x02, 0x01, 0x72, 0x68, 0xc5, 0x2b,
- 0x13, 0x08, 0xd7, 0xc1, 0xc7, 0x42, 0x0d, 0x08, 0xd7, 0x80, 0x00, 0x41,
- 0x7c, 0x0b, 0x08, 0xc1, 0x7c, 0x1a, 0x8b, 0x08, 0xd6, 0xbb, 0x01, 0x7c,
- 0x24, 0x97, 0x08, 0xd6, 0xcb, 0x01, 0x7c, 0x28, 0x91, 0x08, 0xd6, 0xc1,
- 0x87, 0x08, 0xd6, 0xb1, 0x83, 0x08, 0xd6, 0xa9, 0x05, 0xc1, 0x7c, 0x2c,
- 0xc2, 0x02, 0x59, 0x08, 0xd6, 0x91, 0x12, 0xc1, 0x7c, 0x36, 0x10, 0xc1,
- 0x7c, 0x40, 0x16, 0xc1, 0x7c, 0x4a, 0xc2, 0x00, 0xde, 0x08, 0xd6, 0x61,
- 0xc2, 0x0c, 0x65, 0x08, 0xd6, 0x59, 0x0d, 0xc1, 0x7c, 0x54, 0xc2, 0x01,
- 0x29, 0x08, 0xd6, 0x49, 0xc2, 0x00, 0xa4, 0x08, 0xd6, 0x41, 0xc2, 0x04,
- 0x41, 0x08, 0xd6, 0x31, 0xc2, 0x02, 0xb4, 0x08, 0xd6, 0x29, 0xc2, 0x0b,
- 0xc6, 0x08, 0xd6, 0x21, 0xc2, 0x00, 0xad, 0x08, 0xd6, 0x19, 0xc2, 0x00,
- 0xc7, 0x08, 0xd6, 0x10, 0xc5, 0x2b, 0x13, 0x08, 0xd7, 0x91, 0xca, 0xa6,
- 0x62, 0x08, 0xd7, 0x88, 0x00, 0x41, 0x7c, 0x5e, 0xc6, 0x2b, 0x12, 0x08,
- 0xd7, 0x50, 0xc5, 0x2b, 0x13, 0x08, 0xd7, 0x49, 0xc4, 0x0e, 0x41, 0x08,
- 0xd7, 0x2a, 0x01, 0x7c, 0x6d, 0xc4, 0x73, 0xd9, 0x0f, 0x99, 0xa1, 0xc9,
- 0xb4, 0x23, 0x0f, 0xd7, 0x99, 0xc7, 0xca, 0x2b, 0x0f, 0xd7, 0xa1, 0xc6,
- 0x26, 0x97, 0x01, 0x70, 0xc8, 0x47, 0x33, 0xef, 0xc1, 0x7c, 0x73, 0xd6,
- 0x2f, 0xdd, 0x08, 0x43, 0xc1, 0x42, 0x00, 0x54, 0x41, 0x7c, 0x81, 0x18,
- 0xc1, 0x7c, 0x8d, 0x0d, 0xc1, 0x7c, 0x99, 0x16, 0xc1, 0x7c, 0xab, 0x1b,
- 0xc1, 0x7c, 0xb5, 0xc3, 0xe7, 0x9f, 0x0b, 0x5c, 0x59, 0x42, 0x00, 0xa4,
- 0xc1, 0x7c, 0xc1, 0xc4, 0xe5, 0x1f, 0x0b, 0x5c, 0x39, 0xc4, 0xe5, 0x0f,
- 0x0b, 0x5c, 0x21, 0xc5, 0xda, 0xfc, 0x0b, 0x5c, 0x09, 0x0e, 0x41, 0x7c,
- 0xcb, 0x05, 0xc1, 0x7c, 0xd7, 0xc3, 0xe6, 0xe8, 0x0b, 0x59, 0x71, 0xc2,
- 0x13, 0xa9, 0x0b, 0x59, 0x69, 0x10, 0xc1, 0x7c, 0xe3, 0xc5, 0xd8, 0x04,
- 0x0b, 0x59, 0x51, 0x0a, 0xc1, 0x7c, 0xff, 0xc3, 0xe7, 0x87, 0x0b, 0x59,
- 0x31, 0xc3, 0x50, 0x71, 0x0b, 0x59, 0x21, 0xc4, 0xe5, 0x4b, 0x0b, 0x59,
- 0x19, 0xc3, 0xba, 0xc5, 0x0b, 0x59, 0x09, 0xc3, 0x20, 0x92, 0x0b, 0x58,
- 0xf1, 0xc3, 0xe7, 0x0f, 0x0b, 0x58, 0xe0, 0xc8, 0xb8, 0x55, 0x0b, 0x5b,
- 0xb9, 0xc8, 0xbb, 0x3d, 0x0b, 0x5b, 0xb1, 0x16, 0xc1, 0x7d, 0x11, 0x05,
- 0xc1, 0x7d, 0x20, 0xd2, 0x4a, 0xb2, 0x0b, 0x5b, 0x90, 0xc2, 0x05, 0xd5,
- 0x0b, 0x5b, 0x89, 0x44, 0xa9, 0x0c, 0x41, 0x7d, 0x2c, 0xc2, 0x13, 0xa9,
- 0x0b, 0x5b, 0x79, 0xca, 0xa9, 0x0a, 0x0b, 0x5b, 0x69, 0xce, 0x6c, 0x3b,
- 0x0b, 0x5b, 0x30, 0xc3, 0xe7, 0x9c, 0x0b, 0x5b, 0x59, 0xc3, 0xe6, 0xb5,
- 0x0b, 0x5b, 0x48, 0xc3, 0x46, 0xe6, 0x0b, 0x5b, 0x51, 0x1b, 0xc1, 0x7d,
- 0x38, 0xc3, 0x28, 0x49, 0x0b, 0x5a, 0x20, 0xc3, 0x5f, 0xd1, 0x0b, 0x5b,
- 0x41, 0xc2, 0x00, 0x8a, 0x0b, 0x5b, 0x28, 0xc3, 0x28, 0x53, 0x0b, 0x5b,
- 0x19, 0xc4, 0xe5, 0xb3, 0x0b, 0x5a, 0x11, 0xc4, 0xe0, 0x4f, 0x0b, 0x5a,
- 0x01, 0xc4, 0xe1, 0xc7, 0x0b, 0x59, 0xd9, 0x16, 0x41, 0x7d, 0x44, 0xc8,
- 0xba, 0x5d, 0x0b, 0x5b, 0x09, 0x42, 0x00, 0x4d, 0x41, 0x7d, 0x4e, 0xc9,
- 0x32, 0x87, 0x0b, 0x5a, 0xf9, 0x95, 0x0b, 0x5a, 0xe0, 0xc4, 0x18, 0x83,
- 0x0b, 0x5a, 0xb9, 0xc2, 0x26, 0x51, 0x0b, 0x5a, 0xb0, 0xc3, 0x0c, 0x5b,
- 0x0b, 0x5a, 0xa9, 0xc3, 0x06, 0x9e, 0x0b, 0x5a, 0xa0, 0xc4, 0x04, 0x5e,
- 0x0b, 0x5a, 0x99, 0xc2, 0x01, 0x47, 0x0b, 0x5a, 0x90, 0xc3, 0xa6, 0xf7,
- 0x0b, 0x59, 0xb1, 0xc2, 0x05, 0x98, 0x0b, 0x59, 0x80, 0xc3, 0xa7, 0x3e,
- 0x0b, 0x59, 0xa1, 0x91, 0x0b, 0x59, 0x88, 0xc3, 0x44, 0x77, 0x0b, 0x59,
- 0x99, 0xc2, 0x01, 0x30, 0x0b, 0x59, 0x90, 0x03, 0xc1, 0x7d, 0x56, 0x98,
- 0x0b, 0x58, 0xb9, 0x84, 0x0b, 0x58, 0xb1, 0x19, 0xc1, 0x7d, 0x5e, 0x0b,
- 0xc1, 0x7d, 0x66, 0x17, 0x41, 0x7d, 0x6e, 0x98, 0x0b, 0x58, 0xc9, 0x84,
- 0x0b, 0x58, 0xc0, 0x03, 0xc1, 0x7d, 0x76, 0x98, 0x0b, 0x58, 0x19, 0x84,
- 0x0b, 0x58, 0x10, 0x98, 0x0b, 0x58, 0x99, 0x84, 0x0b, 0x58, 0x91, 0x11,
- 0x41, 0x7d, 0x7e, 0x03, 0xc1, 0x7d, 0x86, 0x98, 0x0b, 0x58, 0x39, 0x84,
- 0x0b, 0x58, 0x30, 0x98, 0x0b, 0x58, 0x49, 0x84, 0x0b, 0x58, 0x41, 0x07,
- 0x41, 0x7d, 0x8e, 0xc4, 0x2a, 0xc6, 0x0f, 0xa7, 0x79, 0xc4, 0x00, 0xba,
- 0x01, 0x80, 0x92, 0x01, 0x7d, 0x96, 0x00, 0xc1, 0x7d, 0x9c, 0xcb, 0x7e,
- 0x00, 0x0f, 0xa5, 0xd8, 0x91, 0x08, 0x5d, 0x51, 0xc4, 0x18, 0x85, 0x08,
- 0x5d, 0x70, 0xc3, 0xdf, 0x4a, 0x08, 0x5c, 0x79, 0xc4, 0xd9, 0x77, 0x08,
- 0x5c, 0x68, 0x16, 0xc1, 0x7d, 0xc4, 0xc3, 0x01, 0xb4, 0x08, 0x48, 0xb2,
- 0x01, 0x7d, 0xd4, 0x16, 0xc1, 0x7d, 0xda, 0x15, 0xc1, 0x7d, 0xe6, 0xc4,
- 0xbc, 0x75, 0x08, 0x48, 0x99, 0xc3, 0xe6, 0xbb, 0x08, 0x48, 0x91, 0xc2,
- 0x00, 0x27, 0x08, 0x48, 0x81, 0x03, 0xc1, 0x7d, 0xf8, 0xc3, 0x1f, 0xd8,
- 0x08, 0x48, 0x69, 0xc3, 0x0b, 0x0e, 0x08, 0x48, 0x61, 0xc4, 0xda, 0xd9,
- 0x08, 0x48, 0x59, 0xc3, 0xbf, 0x5a, 0x08, 0x48, 0x51, 0xc3, 0x4b, 0x98,
- 0x08, 0x48, 0x49, 0xc2, 0x01, 0xf0, 0x08, 0x48, 0x23, 0x01, 0x7e, 0x04,
- 0xc3, 0x69, 0x5c, 0x08, 0x48, 0x31, 0xc3, 0xe6, 0xe2, 0x08, 0x48, 0x29,
- 0xc4, 0xd7, 0x87, 0x08, 0x48, 0x19, 0xc4, 0xe2, 0x07, 0x08, 0x48, 0x11,
- 0xc3, 0x00, 0x48, 0x08, 0x48, 0x08, 0x0d, 0xc1, 0x7e, 0x08, 0x09, 0xc1,
- 0x7e, 0x12, 0x10, 0xc1, 0x7e, 0x1c, 0x05, 0xc1, 0x7e, 0x32, 0xc2, 0x24,
- 0x58, 0x05, 0x42, 0x31, 0x16, 0xc1, 0x7e, 0x3f, 0x06, 0xc1, 0x7e, 0x51,
- 0x12, 0xc1, 0x7e, 0x61, 0xc2, 0x00, 0xde, 0x05, 0x42, 0x71, 0xc2, 0x00,
- 0xad, 0x05, 0x42, 0x79, 0xc2, 0x01, 0x09, 0x05, 0x42, 0x99, 0x1c, 0xc1,
- 0x7e, 0x6b, 0x15, 0xc1, 0x7e, 0x75, 0xc2, 0x1d, 0x5f, 0x05, 0x42, 0xb9,
- 0xc2, 0x02, 0x59, 0x05, 0x42, 0xc1, 0xc2, 0x00, 0xc7, 0x05, 0x42, 0xc9,
- 0xc2, 0x00, 0xa4, 0x05, 0x42, 0xe1, 0x83, 0x05, 0x42, 0xeb, 0x01, 0x7e,
- 0x85, 0x8b, 0x05, 0x42, 0xf1, 0x97, 0x05, 0x42, 0xf9, 0x87, 0x05, 0x43,
- 0x03, 0x01, 0x7e, 0x91, 0x91, 0x05, 0x43, 0x09, 0xc2, 0x0f, 0x60, 0x05,
- 0x43, 0x11, 0xc2, 0x96, 0xd0, 0x05, 0x43, 0x19, 0xc2, 0x00, 0x67, 0x05,
- 0x43, 0x21, 0x45, 0x17, 0x58, 0x41, 0x7e, 0x95, 0x17, 0xc1, 0x7e, 0xa1,
- 0xcf, 0x62, 0x45, 0x05, 0x43, 0xa0, 0xc4, 0x02, 0x28, 0x05, 0x43, 0xb1,
- 0xcb, 0x97, 0x80, 0x05, 0x43, 0xb8, 0xc9, 0xa3, 0x42, 0x08, 0x0e, 0x81,
- 0x0e, 0xc1, 0x7e, 0xad, 0xc6, 0xcf, 0x31, 0x08, 0x0f, 0xa0, 0xcc, 0x8c,
- 0xa4, 0x08, 0x0e, 0x91, 0xc4, 0xe1, 0x5b, 0x08, 0x0e, 0xc1, 0xc4, 0x5a,
- 0x29, 0x08, 0x0f, 0x80, 0x03, 0xc1, 0x7e, 0xb9, 0xc4, 0xe1, 0x23, 0x08,
- 0x0e, 0xa1, 0xc3, 0x28, 0x53, 0x08, 0x0e, 0xe1, 0x11, 0x41, 0x7e, 0xc9,
- 0xc4, 0x2a, 0x6a, 0x08, 0x0e, 0xa9, 0xc8, 0xb6, 0xa5, 0x08, 0x0f, 0xe0,
- 0xc5, 0xba, 0x98, 0x08, 0x0e, 0xb1, 0xc3, 0x00, 0xbf, 0x08, 0x0f, 0x49,
- 0xc3, 0x03, 0x33, 0x08, 0x0f, 0x50, 0x11, 0xc1, 0x7e, 0xd8, 0xc2, 0x00,
- 0x4c, 0x08, 0x0f, 0x8b, 0x01, 0x7e, 0xe2, 0xc8, 0xbe, 0xb5, 0x08, 0x0f,
- 0x58, 0x42, 0x00, 0x0a, 0xc1, 0x7e, 0xe8, 0xc2, 0x26, 0xfa, 0x08, 0x0e,
- 0xf9, 0xc4, 0x04, 0xb5, 0x08, 0x0f, 0x29, 0xc8, 0xbe, 0x05, 0x08, 0x0f,
- 0xd9, 0xc7, 0xc7, 0x3e, 0x08, 0x0f, 0xd0, 0xc6, 0xce, 0xe3, 0x08, 0x0e,
- 0xe9, 0xc5, 0xd7, 0x69, 0x08, 0x0e, 0xf0, 0x86, 0x08, 0x0f, 0x01, 0xc2,
- 0x02, 0x55, 0x08, 0x0f, 0xb0, 0xc4, 0xe2, 0x7b, 0x08, 0x0f, 0x19, 0xc2,
- 0x00, 0x7b, 0x08, 0x0f, 0x78, 0xc2, 0x00, 0xc2, 0x08, 0x0f, 0x69, 0xc6,
- 0xcb, 0xef, 0x08, 0x0f, 0xa8, 0xc5, 0xd9, 0x2b, 0x08, 0x0f, 0xc9, 0xc7,
- 0xc6, 0x26, 0x08, 0x0e, 0xb8, 0xc4, 0x04, 0x5e, 0x00, 0x00, 0x99, 0xc2,
- 0x01, 0x47, 0x00, 0x00, 0x90, 0x44, 0x00, 0xcc, 0xc1, 0x7e, 0xf2, 0x4b,
- 0x95, 0xc8, 0x41, 0x7f, 0x4b, 0xc5, 0x00, 0xb9, 0x08, 0xd8, 0x03, 0x01,
- 0x7f, 0x57, 0xc6, 0x01, 0x01, 0x05, 0x47, 0xd8, 0xcb, 0x84, 0x41, 0x00,
- 0x4a, 0xa1, 0xd0, 0x50, 0x41, 0x00, 0x4b, 0x80, 0xcb, 0x1f, 0x95, 0x00,
- 0x4a, 0x99, 0xc9, 0x95, 0x5b, 0x05, 0x47, 0xc8, 0x03, 0xc1, 0x7f, 0x5d,
- 0xcf, 0x64, 0x34, 0x00, 0x4a, 0x71, 0x91, 0x00, 0x4a, 0x5b, 0x01, 0x7f,
- 0x71, 0x46, 0x2b, 0xff, 0xc1, 0x7f, 0x7b, 0x47, 0xc9, 0x91, 0xc1, 0x7f,
- 0x83, 0x87, 0x00, 0x4a, 0x39, 0x48, 0xac, 0xc1, 0xc1, 0x7f, 0x91, 0x97,
- 0x00, 0x4a, 0x0b, 0x01, 0x7f, 0x9f, 0x8b, 0x00, 0x49, 0xfa, 0x01, 0x7f,
- 0xaa, 0x0a, 0xc1, 0x7f, 0xae, 0x15, 0xc1, 0x7f, 0xb8, 0x18, 0xc1, 0x7f,
- 0xc6, 0x0e, 0xc1, 0x7f, 0xd0, 0x14, 0xc1, 0x7f, 0xd8, 0x1b, 0xc1, 0x7f,
- 0xe8, 0xc2, 0x00, 0xad, 0x00, 0x49, 0x73, 0x01, 0x7f, 0xf2, 0x04, 0xc1,
- 0x7f, 0xf8, 0x12, 0xc1, 0x80, 0x08, 0x10, 0xc1, 0x80, 0x12, 0x06, 0xc1,
- 0x80, 0x26, 0x16, 0xc1, 0x80, 0x34, 0x0c, 0xc1, 0x80, 0x42, 0x05, 0xc1,
- 0x80, 0x52, 0x09, 0xc1, 0x80, 0x5f, 0x0d, 0xc1, 0x80, 0x73, 0x83, 0x00,
- 0x48, 0x2b, 0x01, 0x80, 0x7b, 0x91, 0x00, 0x48, 0x9b, 0x01, 0x80, 0x8f,
- 0x87, 0x00, 0x48, 0x79, 0x97, 0x00, 0x48, 0x4b, 0x01, 0x80, 0x99, 0x8b,
- 0x00, 0x48, 0x3b, 0x01, 0x80, 0xa4, 0xc2, 0x0f, 0x60, 0x00, 0x4a, 0xc1,
- 0x1c, 0xc1, 0x80, 0xa8, 0xc2, 0x00, 0x67, 0x00, 0x4a, 0xf0, 0x45, 0x06,
- 0x98, 0xc1, 0x80, 0xb2, 0xcb, 0x95, 0x0d, 0x00, 0x4b, 0x29, 0xc4, 0x1c,
- 0xd0, 0x00, 0x4b, 0x20, 0xc7, 0xc2, 0x05, 0x0f, 0x9e, 0xe8, 0x4f, 0x07,
- 0x17, 0xc1, 0x80, 0xd6, 0x4d, 0x26, 0xea, 0x41, 0x81, 0x38, 0xcf, 0x64,
- 0x7f, 0x01, 0x1f, 0x41, 0xd4, 0x3b, 0x39, 0x01, 0x1c, 0xb0, 0x47, 0x07,
- 0x3a, 0xc1, 0x81, 0x9a, 0x44, 0x03, 0x27, 0xc1, 0x81, 0xa6, 0xc4, 0x53,
- 0x38, 0x01, 0x1e, 0x30, 0xc8, 0x01, 0xe7, 0x01, 0x1e, 0x19, 0xc6, 0x01,
- 0x7a, 0x01, 0x1e, 0x00, 0xc4, 0x53, 0x38, 0x01, 0x1e, 0x41, 0xc8, 0x01,
- 0xe7, 0x01, 0x1e, 0x29, 0xc6, 0x01, 0x7a, 0x01, 0x1e, 0x10, 0xc4, 0x53,
- 0x38, 0x01, 0x1e, 0x39, 0xc8, 0x01, 0xe7, 0x01, 0x1e, 0x21, 0xc6, 0x01,
- 0x7a, 0x01, 0x1e, 0x08, 0x44, 0x81, 0x73, 0x41, 0x81, 0xb2, 0xca, 0xa6,
- 0xf8, 0x0e, 0x98, 0x11, 0xcd, 0x79, 0x11, 0x0e, 0x98, 0x08, 0xc2, 0x00,
- 0x34, 0x01, 0x34, 0x79, 0xc3, 0x01, 0x93, 0x01, 0x34, 0x60, 0xc3, 0x01,
- 0x93, 0x01, 0x34, 0x71, 0xc2, 0x00, 0x34, 0x01, 0x34, 0x68, 0x00, 0x41,
- 0x81, 0xbe, 0x00, 0x41, 0x81, 0xca, 0xc4, 0x18, 0x83, 0x00, 0x01, 0xbb,
- 0x01, 0x81, 0xd6, 0xc2, 0x26, 0x51, 0x00, 0x01, 0xb2, 0x01, 0x81, 0xda,
- 0xc3, 0x0c, 0x5b, 0x00, 0x01, 0xab, 0x01, 0x81, 0xde, 0xc3, 0x06, 0x9e,
- 0x00, 0x01, 0xa2, 0x01, 0x81, 0xe2, 0xc4, 0x04, 0x5e, 0x00, 0x01, 0x9b,
- 0x01, 0x81, 0xe6, 0xc2, 0x01, 0x47, 0x00, 0x01, 0x92, 0x01, 0x81, 0xea,
- 0x00, 0x41, 0x81, 0xee, 0x00, 0x41, 0x81, 0xfa, 0x45, 0x06, 0x98, 0xc1,
- 0x82, 0x06, 0xcb, 0x95, 0x0d, 0x08, 0xca, 0x20, 0xc5, 0x35, 0x00, 0x08,
- 0xca, 0x19, 0xc7, 0xca, 0x5c, 0x08, 0xc9, 0xe9, 0xcb, 0x1e, 0x17, 0x08,
- 0xc9, 0xe1, 0xc8, 0x11, 0x40, 0x08, 0xc9, 0xd8, 0xc2, 0x02, 0x59, 0x08,
- 0xca, 0x11, 0xc2, 0x1d, 0x5f, 0x08, 0xca, 0x00, 0xc5, 0x1e, 0x24, 0x08,
- 0xc9, 0xf1, 0x4a, 0x6f, 0xcd, 0x41, 0x82, 0x2a, 0xc2, 0x02, 0xb4, 0x08,
- 0xc9, 0x79, 0x0e, 0xc1, 0x82, 0x44, 0xc2, 0x00, 0xa4, 0x08, 0xc9, 0x69,
- 0x15, 0xc1, 0x82, 0x4e, 0xc2, 0x04, 0x41, 0x08, 0xc9, 0x49, 0xc2, 0x02,
- 0x59, 0x08, 0xc9, 0x39, 0x1b, 0xc1, 0x82, 0x5e, 0xc2, 0x00, 0xad, 0x08,
- 0xc9, 0x21, 0x04, 0xc1, 0x82, 0x68, 0x12, 0xc1, 0x82, 0x72, 0x10, 0xc1,
- 0x82, 0x7c, 0x06, 0xc1, 0x82, 0x92, 0x16, 0xc1, 0x82, 0xa0, 0xc2, 0x24,
- 0x58, 0x08, 0xc8, 0x99, 0x05, 0xc1, 0x82, 0xb0, 0x09, 0xc1, 0x82, 0xba,
- 0x0d, 0xc1, 0x82, 0xc4, 0x91, 0x08, 0xc8, 0x49, 0x87, 0x08, 0xc8, 0x31,
- 0x97, 0x08, 0xc8, 0x23, 0x01, 0x82, 0xce, 0x8b, 0x08, 0xc8, 0x13, 0x01,
- 0x82, 0xd2, 0x83, 0x08, 0xc8, 0x02, 0x01, 0x82, 0xd6, 0xc5, 0x01, 0x2d,
- 0x01, 0x16, 0x39, 0x15, 0x41, 0x82, 0xda, 0xca, 0x9d, 0xc0, 0x01, 0x3c,
- 0x99, 0x46, 0x06, 0x97, 0x41, 0x82, 0xe6, 0xc4, 0x22, 0x71, 0x01, 0x3b,
- 0xf1, 0xc5, 0x01, 0xdb, 0x01, 0x3b, 0xe9, 0x15, 0xc1, 0x83, 0x0a, 0x08,
- 0xc1, 0x83, 0x16, 0x16, 0xc1, 0x83, 0x22, 0xc3, 0x01, 0xb4, 0x01, 0x3b,
- 0xb0, 0xc4, 0x22, 0x71, 0x01, 0x3c, 0x41, 0xc5, 0x01, 0xdb, 0x01, 0x3c,
- 0x39, 0x15, 0xc1, 0x83, 0x2e, 0x08, 0xc1, 0x83, 0x3a, 0x16, 0xc1, 0x83,
- 0x46, 0xc3, 0x01, 0xb4, 0x01, 0x3c, 0x01, 0xc4, 0x15, 0xd3, 0x0f, 0x88,
- 0x58, 0xc4, 0x00, 0x67, 0x0f, 0xb0, 0xf1, 0xd1, 0x53, 0x92, 0x0f, 0xb1,
- 0x28, 0xc8, 0x17, 0x45, 0x01, 0x16, 0x21, 0xd7, 0x2b, 0x90, 0x0f, 0xa5,
- 0x01, 0x45, 0x00, 0x6c, 0xc1, 0x83, 0x52, 0xc6, 0xd2, 0x85, 0x0f, 0xbc,
- 0xe0, 0xc4, 0x02, 0x83, 0x0f, 0xc8, 0x43, 0x01, 0x83, 0x6a, 0xcc, 0x81,
- 0xdc, 0x0f, 0xc8, 0x4a, 0x01, 0x83, 0x70, 0x16, 0xc1, 0x83, 0x76, 0x15,
- 0xc1, 0x83, 0x82, 0x0a, 0xc1, 0x83, 0x8e, 0x03, 0xc1, 0x83, 0x9a, 0xcf,
- 0x64, 0x9d, 0x01, 0x3f, 0x89, 0xcb, 0x04, 0xfc, 0x01, 0x0f, 0x4b, 0x01,
- 0x83, 0xa9, 0x06, 0xc1, 0x83, 0xaf, 0xcd, 0x79, 0x2b, 0x01, 0x0e, 0x51,
- 0xcc, 0x30, 0xd9, 0x01, 0x0d, 0x79, 0xc6, 0xcb, 0xe9, 0x0f, 0xb3, 0x79,
- 0x46, 0x05, 0xef, 0xc1, 0x83, 0xbb, 0xd1, 0x51, 0x50, 0x0f, 0xc1, 0xb9,
- 0xd0, 0x58, 0x92, 0x0f, 0xc1, 0xf8, 0xd2, 0x4c, 0xaa, 0x01, 0x57, 0x88,
- 0xd0, 0x5c, 0x22, 0x01, 0x4f, 0x49, 0xcf, 0x6b, 0x2d, 0x01, 0x4f, 0x40,
- 0x43, 0xe6, 0x8b, 0xc1, 0x83, 0xc7, 0x43, 0xe7, 0x7b, 0xc1, 0x83, 0xe3,
- 0x43, 0xe7, 0x54, 0xc1, 0x83, 0xff, 0x43, 0xe7, 0xd5, 0xc1, 0x84, 0x1b,
- 0x43, 0xe7, 0xcc, 0xc1, 0x84, 0x37, 0x43, 0xe6, 0xf1, 0xc1, 0x84, 0x53,
- 0x43, 0xe7, 0x15, 0x41, 0x84, 0x6f, 0x43, 0xe7, 0x54, 0xc1, 0x84, 0x8b,
- 0x43, 0xe7, 0x7b, 0xc1, 0x84, 0xa7, 0x43, 0xe7, 0xd5, 0xc1, 0x84, 0xc3,
- 0x43, 0xe7, 0xcc, 0xc1, 0x84, 0xdf, 0x43, 0xe6, 0x8b, 0xc1, 0x84, 0xfb,
- 0x43, 0xe6, 0xf1, 0xc1, 0x85, 0x17, 0x43, 0xe7, 0x15, 0x41, 0x85, 0x33,
- 0x05, 0xc1, 0x85, 0x4f, 0x49, 0x03, 0x3b, 0xc1, 0x85, 0x61, 0x17, 0xc1,
- 0x85, 0x70, 0x44, 0x08, 0x9b, 0xc1, 0x85, 0x7c, 0x15, 0xc1, 0x85, 0x88,
- 0xcd, 0x2c, 0x41, 0x01, 0x02, 0x39, 0xd0, 0x0f, 0xfb, 0x01, 0x01, 0xe1,
- 0x12, 0xc1, 0x85, 0x9c, 0x06, 0xc1, 0x85, 0xa6, 0x0a, 0xc1, 0x85, 0xb2,
- 0x0e, 0xc1, 0x85, 0xbe, 0xdb, 0x16, 0x75, 0x01, 0x4c, 0xb1, 0x47, 0xc3,
- 0x1d, 0xc1, 0x85, 0xc8, 0xcc, 0x84, 0x40, 0x00, 0x16, 0xe9, 0xcd, 0x81,
- 0x24, 0x07, 0xf2, 0x61, 0xce, 0x74, 0x7d, 0x01, 0x70, 0xb8, 0xc9, 0x1c,
- 0xee, 0x01, 0x35, 0x19, 0xcb, 0x24, 0xe5, 0x01, 0x35, 0x11, 0xc6, 0x00,
- 0x71, 0x01, 0x5f, 0xe0, 0x47, 0x6f, 0x1a, 0xc1, 0x85, 0xd7, 0xce, 0x6c,
- 0x49, 0x01, 0x4e, 0xf9, 0x45, 0x00, 0x8a, 0x41, 0x85, 0xef, 0xc5, 0x01,
- 0x7b, 0x01, 0x2e, 0x61, 0xc4, 0x0d, 0xbd, 0x01, 0x02, 0xe0, 0xc5, 0x07,
- 0x0a, 0x01, 0x58, 0xd1, 0xc6, 0x27, 0xf9, 0x01, 0x72, 0x50, 0xc5, 0x35,
- 0x00, 0x08, 0xc1, 0xd1, 0x42, 0x03, 0x32, 0xc1, 0x85, 0xfb, 0xc8, 0x11,
- 0x40, 0x08, 0xc1, 0xb8, 0x03, 0xc1, 0x86, 0x07, 0x91, 0x08, 0xc1, 0xa9,
- 0x87, 0x08, 0xc1, 0x99, 0xc9, 0xac, 0xc1, 0x08, 0xc1, 0x8b, 0x01, 0x86,
- 0x13, 0x97, 0x08, 0xc1, 0x7b, 0x01, 0x86, 0x17, 0x8b, 0x08, 0xc1, 0x6a,
- 0x01, 0x86, 0x1b, 0x14, 0xc1, 0x86, 0x1f, 0xc2, 0x00, 0xa4, 0x08, 0xc1,
- 0x51, 0x15, 0xc1, 0x86, 0x29, 0xc2, 0x04, 0x41, 0x08, 0xc1, 0x31, 0xc2,
- 0x00, 0xc7, 0x08, 0xc1, 0x29, 0xc2, 0x1d, 0x5f, 0x08, 0xc1, 0x19, 0xc2,
- 0x00, 0xad, 0x08, 0xc1, 0x11, 0x04, 0xc1, 0x86, 0x39, 0x12, 0xc1, 0x86,
- 0x43, 0x10, 0xc1, 0x86, 0x4d, 0x06, 0xc1, 0x86, 0x63, 0x16, 0xc1, 0x86,
- 0x71, 0x0c, 0xc1, 0x86, 0x7f, 0x05, 0xc1, 0x86, 0x89, 0x09, 0xc1, 0x86,
- 0x93, 0x0d, 0xc1, 0x86, 0x9d, 0x83, 0x08, 0xc0, 0x03, 0x01, 0x86, 0xa7,
- 0x91, 0x08, 0xc0, 0x41, 0x87, 0x08, 0xc0, 0x31, 0x97, 0x08, 0xc0, 0x23,
- 0x01, 0x86, 0xb3, 0x8b, 0x08, 0xc0, 0x12, 0x01, 0x86, 0xb7, 0xc9, 0x23,
- 0x34, 0x01, 0x17, 0x68, 0xc9, 0x23, 0x34, 0x01, 0x17, 0x00, 0xcc, 0x89,
- 0x08, 0x0f, 0xad, 0xd0, 0x43, 0x01, 0x12, 0xc1, 0x86, 0xbb, 0xd5, 0x36,
- 0x70, 0x0d, 0xe3, 0x80, 0xc8, 0x03, 0x3b, 0x0d, 0xe4, 0x43, 0x01, 0x86,
- 0xea, 0xc4, 0x53, 0x38, 0x0d, 0xe4, 0x39, 0x0e, 0xc1, 0x86, 0xf0, 0xc6,
- 0x01, 0x7a, 0x0d, 0xe4, 0x29, 0xc3, 0x01, 0x4a, 0x0d, 0xe4, 0x21, 0xc5,
- 0x1f, 0x94, 0x0d, 0xe4, 0x11, 0xcb, 0x95, 0x2e, 0x0d, 0xe4, 0x09, 0xc5,
- 0x35, 0x4a, 0x0d, 0xe4, 0x00, 0x42, 0x00, 0xb3, 0xc1, 0x86, 0xfc, 0xc6,
- 0xd2, 0x0d, 0x0d, 0xe3, 0xd9, 0xc6, 0x95, 0x49, 0x0d, 0xe3, 0xd1, 0xd4,
- 0x38, 0x91, 0x0d, 0xe3, 0xb9, 0xc6, 0x27, 0x23, 0x0d, 0xe3, 0xb0, 0xcf,
- 0x60, 0xec, 0x0d, 0xe3, 0xa1, 0xd1, 0x27, 0x18, 0x0d, 0xe3, 0x88, 0xc2,
- 0x00, 0x5a, 0x0d, 0xe1, 0xd1, 0x8a, 0x0d, 0xe1, 0xc9, 0x91, 0x0d, 0xe2,
- 0xeb, 0x01, 0x87, 0x0b, 0xc2, 0x01, 0xdb, 0x0d, 0xe2, 0xf9, 0x8b, 0x0d,
- 0xe2, 0xf1, 0x83, 0x0d, 0xe2, 0xe0, 0x00, 0xc1, 0x87, 0x0f, 0x8a, 0x0d,
- 0xe0, 0x88, 0x00, 0xc1, 0x87, 0x19, 0x45, 0xdc, 0xbe, 0xc1, 0x87, 0x4a,
- 0xc6, 0xd1, 0xe3, 0x0d, 0xe2, 0x48, 0x00, 0x41, 0x87, 0x66, 0x00, 0xc1,
- 0x87, 0x84, 0x45, 0x40, 0x3c, 0x41, 0x87, 0x95, 0x00, 0x41, 0x87, 0xa5,
- 0x8a, 0x0d, 0xe0, 0xc1, 0xc2, 0x00, 0x70, 0x0d, 0xe0, 0x81, 0x48, 0xb8,
- 0x45, 0x41, 0x87, 0xb6, 0x8a, 0x0d, 0xe0, 0xb9, 0x44, 0x02, 0xa8, 0x41,
- 0x87, 0xbe, 0x8e, 0x0d, 0xe0, 0xb0, 0x8d, 0x0d, 0xe0, 0xa1, 0x00, 0x41,
- 0x87, 0xc6, 0x8a, 0x0d, 0xe0, 0x99, 0xc2, 0x00, 0x70, 0x0d, 0xe0, 0x68,
- 0xc2, 0x05, 0x0d, 0x0d, 0xe0, 0x70, 0xc2, 0x05, 0x0d, 0x0d, 0xe0, 0x61,
- 0x47, 0xc1, 0xcd, 0x41, 0x87, 0xd0, 0xc4, 0xe6, 0x1f, 0x0d, 0xe1, 0xf0,
- 0xc8, 0xb9, 0x3d, 0x0d, 0xe3, 0x50, 0x99, 0x0d, 0xe2, 0x98, 0x97, 0x0d,
- 0xe2, 0xd9, 0x99, 0x0d, 0xe2, 0xd1, 0xc2, 0x22, 0x1f, 0x0d, 0xe2, 0xc9,
- 0x83, 0x0d, 0xe2, 0x18, 0x8a, 0x0d, 0xe2, 0xb9, 0xc2, 0x05, 0x0d, 0x0d,
- 0xe2, 0xa1, 0x8b, 0x0d, 0xe2, 0x50, 0x97, 0x0d, 0xe2, 0x91, 0x87, 0x0d,
- 0xe2, 0x58, 0x87, 0x0d, 0xe2, 0x40, 0xc2, 0x00, 0x83, 0x0d, 0xe2, 0x28,
- 0xca, 0xa3, 0x06, 0x01, 0x71, 0xb1, 0xcb, 0x8d, 0x9e, 0x01, 0x71, 0xb8,
- 0xc5, 0x08, 0x42, 0x00, 0x04, 0x69, 0x42, 0x00, 0x8a, 0xc1, 0x87, 0xd8,
- 0xc7, 0x27, 0xf8, 0x00, 0x02, 0xe3, 0x01, 0x87, 0xe4, 0xcd, 0x77, 0x71,
- 0x0f, 0xb3, 0xf9, 0x55, 0x38, 0x53, 0x41, 0x87, 0xe8, 0x14, 0xc1, 0x87,
- 0xf4, 0xc8, 0x69, 0x81, 0x01, 0x18, 0x81, 0x16, 0xc1, 0x88, 0x06, 0x15,
- 0xc1, 0x88, 0x1b, 0x12, 0xc1, 0x88, 0x27, 0x47, 0x06, 0xf1, 0xc1, 0x88,
- 0x33, 0xe0, 0x0a, 0xc7, 0x0f, 0xac, 0xa9, 0xcc, 0x81, 0xe8, 0x0f, 0xac,
- 0xa1, 0xc9, 0xb4, 0xfb, 0x01, 0x4d, 0x81, 0xc5, 0x01, 0x93, 0x01, 0x4d,
- 0x1b, 0x01, 0x88, 0x42, 0xd2, 0x49, 0x6e, 0x01, 0x70, 0x89, 0xcd, 0x2c,
- 0x41, 0x01, 0x71, 0x71, 0xc5, 0x00, 0xaa, 0x01, 0x72, 0x08, 0x9f, 0x01,
- 0x37, 0x09, 0x9e, 0x01, 0x37, 0x00, 0xd1, 0x51, 0x0c, 0x01, 0x33, 0xd1,
- 0x45, 0x1a, 0x68, 0x41, 0x88, 0x48, 0x87, 0x05, 0x4a, 0x4b, 0x01, 0x88,
- 0x72, 0x03, 0xc1, 0x88, 0x7a, 0x91, 0x05, 0x4a, 0x59, 0x97, 0x05, 0x4a,
+ " _-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789S VERTICALLY BELOW AND SMALL "
+ "TAHRIGHTWARDS TRIANGLE-HEADED ARROW DOWNWARDS TRIANGLE-HEADED ARROW "
+ "DOWNWARDS EQUILATERAL ARROWHEADSEMICIRCULAR ANTICLOCKWISE ARROWLOWER LEFT "
+ "CURLY BRACKET SECTIONLE COMMA QUOTATION MARK ORNAMENTEDIUM SHADE AND "
+ "RIGHT HALF BLOCK SHADOWED WHITE RIGHTWARDS ARROWTHIRD WHITE RIGHT "
+ "POINTING INDEXEDIUM SHADE AND LOWER HALF BLOCKE TO MIDDLE LEFT TO LOWER "
+ "CENTREFACE WITH SYMBOLS COVERING MOUTH CONTAINING SMALL WHITE TRIANGLEE "
+ "SQUARED LATIN CAPITAL LETTER POWNWARDS HARPOON WITH BARB RIGHTAISING BOTH "
+ "HANDS IN CELEBRATIONTOP HALF DIVIDED BY VERTICAL BARED UPWARDS "
+ "EQUILATERAL ARROWHEADTICAL BAR DOUBLE RIGHT TURNSTILEER ARROWS CIRCLING "
+ "ANTICLOCKWISELAR SIGN WITH OVERLAID BACKSLASHHAND WITH MIDDLE FINGER "
+ "EXTENDEDPER BODY TILTING FROM HIP JOINTS BETWEEN MIDDLE AND RING "
+ "FINGERSESS-THAN ABOVE DOUBLE-LINE EQUAL BAR AT END OF HORIZONTAL "
+ "STROKEWNWARDS ARROW WITH TIP LEFTWARDSER IGI SHIR OVER SHIR UD OVER "
+ "UDINTING DOWNWARDS THEN NORTH EASTS AND UPWARDS OPEN CIRCLE ARROWS LOWER "
+ "HALF INVERSE MEDIUM SHADE LETTER BYELORUSSIAN-UKRAINIAN IROFLEX CLICK "
+ "WITH RETROFLEX HOOK LEFTWARDS TRIANGLE-HEADED ARROWE-POINTED BLACK "
+ "RIGHTWARDS ARROWM LEFT MEMBER OF DOUBLE VERTICALIGHTWARDS HARPOON WITH "
+ "BARB DOWNTRIANGLE-HEADED RIGHTWARDS ARROWUPPER AND LOWER ONE EIGHTH BLOCK "
+ "KORANIC STOP SIGN ISOLATED FORMDIFIER LETTER LABIALIZATION "
+ "MARKRIANGLE-HEADED OPEN CIRCLE ARROWD ARROW WITH TRIANGLE ARROWHEADSIDE "
+ "AND JOINED WITH INTERSECTIONRIGHT SEMICIRCLE WITH THREE DOTSFT-POINTING "
+ "ANGLE QUOTATION MARK HORIZONTAL STROKES TO THE RIGHTAND MIDDLE RIGHT TO "
+ "LOWER CENTRETER-THAN ABOVE DOUBLE-LINE EQUALHUR KAZAKH KIRGHIZ ALEF "
+ "MAKSURA ATHARVAVEDIC INDEPENDENT SVARITAOWER RIGHT CURLY BRACKET "
+ "SECTIONHT-POINTING ANGLE QUOTATION MARKLE-LINE EQUAL ABOVE "
+ "GREATER-THANAND LEFTWARDS OPEN CIRCLE ARROWSUG2 OVER TUG2 TUG2 OVER TUG2 "
+ "PAPNORMAL FACTOR SEMIDIRECT PRODUCTLAGAB TIMES U OVER LAGAB TIMES UTION "
+ "SIGN WITH CIRCUMFLEX ACCENTIRECT PRODUCT WITH BOTTOM CLOSEDER TAB NI OVER "
+ "NI DISH OVER DISHTIMES ASH2 KU OVER HI TIMES ASH2 TRIANGULAR THREE "
+ "QUARTERS BLOCKH SUPERSCRIPT ALEF ISOLATED FORMGREATER-THAN ABOVE SLANTED "
+ "EQUALH HORIZONTAL MIDDLE BLACK STRIPEH HAMZA ABOVE WITH ALEF MAKSURA DIC "
+ "KASHMIRI INDEPENDENT SVARITAED ARABIC-INDIC DIGIT FOUR ABOVEROXIMATELY "
+ "NOR ACTUALLY EQUAL TOED COMMA QUOTATION MARK ORNAMENTVED STEM PARAGRAPH "
+ "SIGN ORNAMENTARDS ARROW ABOVE LEFTWARDS ARROWSWIRL BIRGA WITH DOUBLE "
+ "ORNAMENTED ARABIC-INDIC DIGIT FOUR BELOWMARK WITH LEFT RIGHT ARROW "
+ "ABOVEOUBLE-LINE EQUAL ABOVE LESS-THANRONT-TILTED SHADOWED WHITE ARROWL "
+ "ARABIC LETTER TAH AND TWO DOTSEN ARM ENDING IN ARROW POINTING "
+ "ARDROP-SPOKED PROPELLER ASTERISKRONG CENTRALIZATION STROKE BELOWVE "
+ "LESS-THAN ABOVE SLANTED EQUALOF UPWARDS TRIANGLE-HEADED ARROWLEFTWARDS "
+ "HARPOON WITH BARB DOWN TORTOISE SHELL BRACKET ORNAMENTLL BUT UPPER LEFT "
+ "QUADRANT BLACKDED HIGH STOP WITH FILLED CENTREETALLED BLACK AND WHITE "
+ "FLORETTEWO DOTS ABOVE AND TWO DOTS BELOWE CONTAINING BLACK SMALL "
+ "LOZENGEARDS HARPOON WITH BARB DOWNWARDSTOM-LIGHTED RIGHTWARDS ARROWHEADWO "
+ "DOTS OVER ONE DOT PUNCTUATIONAISED HAND WITH FINGERS SPLAYEDA-HIRAGANA "
+ "PROLONGED SOUND MARKRING OVER TWO RINGS PUNCTUATIONRIGHTWARDS HARPOON "
+ "WITH BARB UPIN WHITE CIRCLE IN BLACK SQUARE CROSSING ASH OVER ASH OVER "
+ "ASHLEFTWARDS EQUILATERAL ARROWHEADAND MIDDLE LEFT TO LOWER CENTREUPWARDS "
+ "HARPOON WITH BARB RIGHTTNAMESE ALTERNATE READING MARK RINGS OVER ONE RING "
+ "PUNCTUATIONACK-TILTED SHADOWED WHITE ARROWONE HUNDRED THIRTY-FIVE "
+ "DEGREESETALLED OUTLINED BLACK FLORETTEAND JOINED BY DASH WITH SUBSETDOT "
+ "BELOW AND THREE DOTS ABOVEHT CENTRALIZATION STROKE BELOWQAF WITH LAM WITH "
+ "ALEF MAKSURAAGGRAVATED INDEPENDENT SVARITAON WITH RIGHTWARDS ARROW "
+ "ABOVEMINTON RACQUET AND SHUTTLECOCKOPEN CENTRE EIGHT POINTED "
+ "STARGREATER-THAN ABOVE EQUALS SIGNINDEX THUMB CURVE THUMB INSIDESAD WITH "
+ "LAM WITH ALEF MAKSURADIVIDED BY HORIZONTAL BAR AND DOWNWARDS AND "
+ "RIGHTWARDS ARROWGHT FOUR POINTED PINWHEEL STAREART EXCLAMATION MARK "
+ "ORNAMENTOINTING ANGLE BRACKET ORNAMENTMODIFIER LETTER LEFT HALF "
+ "RINGMAKSURA WITH SUPERSCRIPT ALEF HIGH-REVERSED-9 QUOTATION MARKEFT "
+ "SEMICIRCLE WITH THREE DOTSCKED FACE WITH EXPLODING HEADDOT OVER TWO DOTS "
+ "PUNCTUATIONLIGHT FOUR POINTED BLACK CUSPSUPERSCRIPT ALEF INITIAL "
+ "FORMEFTWARDS HARPOON WITH BARB UPOMBINING ANUSVARA ABOVE RIGHTDOWN MIDDLE "
+ "THUMB INDEX CROSSEXTENDED ARABIC-INDIC DIGIT T DOWN INDEX THUMB HOOK "
+ "MIDDLENS-SERIF INTERROBANG ORNAMENTPUNCTUATION CHINOOK FULL "
+ "STOPCONTAINING BLACK SMALL CIRCLET-HANDED INTERLACED PENTAGRAMUP HEAVY "
+ "AND RIGHT DOWN LIGHT WITH REVERSED NEGATION SLASHWITH RIGHTWARDS ARROW AT "
+ "LEFTONAL INDICATOR SYMBOL LETTER DOWN HEAVY AND RIGHT UP "
+ "LIGHTSEMICIRCULAR PATH AROUND POLE OVER RIGHTWARDS ARROW TO BAREVERSED "
+ "LUNATE EPSILON SYMBOLSALTIRE WITH ROUNDED CORNERSRECTANGULAR PATH AROUND "
+ "POLEASTERISKS ALIGNED VERTICALLYUPPER CENTRE TO MIDDLE RIGHTHREE HUNDRED "
+ "FIFTEEN DEGREESEFT ARC GREATER-THAN BRACKETMONOGRAMMOS TESSERA "
+ "DODEKATALEFTWARDS OF DOWNWARDS ARROWIBE SYLLABLE BOUNDARY "
+ "MARKEROUND-TIPPED RIGHTWARDS ARROWHADED WHITE RIGHTWARDS ARROWUP HEAVY "
+ "AND LEFT DOWN LIGHTEDGE-TAILED RIGHTWARDS ARROWDOUBLE ANUSVARA "
+ "ANTARGOMUKHAE-FEATHERED RIGHTWARDS ARROWBESIDE AND JOINED WITH UNIONACE "
+ "DIRECTION POSITION NOSE UPWARDS AND RIGHTWARDS ARROWT LITTER IN ITS PLACE "
+ "SYMBOLU ALAYHI WAAALIHEE WA-SALLAMDOWN HEAVY AND LEFT UP LIGHTMIDDLE RING "
+ "LITTLE CONJOINEDISMILLAH AR-RAHMAN AR-RAHEEMLIQUID MEASURE FIRST "
+ "SUBUNITFTING POINT RIGHTWARDS ARROWLARGE EQUILATERAL "
+ "ARROWHEADW-REVERSED-9 QUOTATION MARKRIANGULAR ONE QUARTER BLOCKPPY PERSON "
+ "RAISING ONE HANDTWO HUNDRED SEVENTY DEGREESTALIC LATIN CAPITAL LETTER "
+ "RIPLE VERTICAL BAR OPERATORFOUR FINGERS CONJOINED BENTPHARYNGEAL VOICED "
+ "FRICATIVEBETWEEN TWO HORIZONTAL BARSRIGHT ARC LESS-THAN BRACKETZERO FOR "
+ "ODD POWERS OF FOURIMPERFECTUM CUM PROLATIONE DOUBLE PRIME QUOTATION "
+ "MARKHEAD MARK WITH MOON AND SUNDOUBLE ANGLE QUOTATION MARK AND LEFT "
+ "SEMICIRCLE ARROWSLESS-THAN ABOVE EQUALS SIGNIDE ARC ANTICLOCKWISE "
+ "ARROWEMICIRCULAR CLOCKWISE ARROWLOWER TONAL RANGE INDICATORENTATION FORM "
+ "FOR VERTICAL UPPER MIDDLE LEFT TO UPPER DIVIDED BY HORIZONTAL "
+ "RULEKATHAKA INDEPENDENT SVARITALIGHT CENTRALIZATION STROKELOWER MIDDLE "
+ "LEFT TO LOWER WO DOTS BELOW AND DOT ABOVEOUR BALLOON-SPOKED ASTERISKONE "
+ "LARGE AND ONE SMALL EYE BARREE WITH TWO DOTS BELOWBRDA RNYING YIG MGO "
+ "MDUN MABRDA RNYING YIG MGO SGAB MAN-OUTLINED RIGHTWARDS ARROWCIRCLE WITH "
+ "NORTHWEST ARROWFINGER COVERING CLOSED LIPSCONTINUOUS UNDERLINE SYMBOLEAVY "
+ "WHITE RIGHTWARDS ARROWWALLPLANE SHOULDER HIP MOVEIDE-HEADED RIGHTWARDS "
+ "ARROWOTATED FLORAL HEART BULLETDOTTED LUNATE SIGMA SYMBOLUNEVEN EYES AND "
+ "WAVY MOUTHOCKED FEMALE AND MALE SIGN OR APPROXIMATELY EQUAL TOWITH "
+ "LEFTWARDS ARROW ABOVEONOMICAL SYMBOL FOR URANUSTHREE DOTS ABOVE "
+ "DOWNWARDSYAJURVEDIC MIDLINE SVARITACAT FACE WITH SMILING EYESMEEM WITH "
+ "HAH WITH TATWEELSSIAN ASTROLOGICAL SYMBOL MARRIED PARTNERSHIP "
+ "SYMBOLDOTTED SUBSTITUTION MARKERU REVERSED OVER U REVERSEDLANTED SOUTH "
+ "ARROW WITH HOCRIPT LIGATURE ET ORNAMENTAND MIDDLE FINGERS CROSSEDE "
+ "ONE-WAY LEFT WAY TRAFFIC LESS THAN THE DENOMINATORIDEOGRAPHIC ITERATION "
+ "MARKRIPLE DOT PUNCTUATION MARKLINE FEED SEPARATOR SYMBOLLEFTWARDS OF "
+ "UPWARDS ARROWDROP-SHADOWED WHITE SQUAREALEF MAKSURA ISOLATED FORMOORPLANE "
+ "SHOULDER HIP MOVE NEGATED WITH VERTICAL BARJECT REPLACEMENT CHARACTERBLE "
+ "TENNIS PADDLE AND BALLERSTRASS ELLIPTIC FUNCTIONCLOCKWISE ARROW WITH "
+ "MINUSINDUSTRIAL STANDARD SYMBOLORTHOGONAL CROSSHATCH FILLRIGHT "
+ "PARENTHESIS ORNAMENTAND RIGHT ONE EIGHTH BLOCKHTORA SKLIRON CHROMA VASIS "
+ "WITHIN TRIANGLE ARROWHEADLLALLAHOU ALAYHE WASSALLAM GAD OVER GAD GAR OVER "
+ "GARRIST CIRCLE HITTING WALL FFICULTY AT THE BEGINNINGMBINING "
+ "CRYPTOGRAMMIC DOTP-BARBED RIGHTWARDS ARROWRANCH BANK "
+ "IDENTIFICATIONINVERTED EXCLAMATION MARKLEFTWARDS ARROW WITH HOOKOUBLE "
+ "BIRGA WITH ORNAMENTREE-HUNDRED-AND-TWENTIETH WITH DOUBLE GRAVE ACCENTELD "
+ "HOCKEY STICK AND BALLDIAERESIS AND HOOK SYMBOLLOW QUILT SQUARE "
+ "ORNAMENTFECTIVENESS OR DISTORTION WITH DOUBLE VERTICAL BAR WITH DOUBLE "
+ "MIDDLE TILDETTED SUBSTITUTION BRACKETLEFT PARENTHESIS ORNAMENTDRY MEASURE "
+ "FIRST SUBUNITTAB OVER TAB GAR OVER GAR WITH CIRCLED ONE OVERLAYSTRUMENTAL "
+ "NOTATION SYMBOBREVE WITH INVERTED BREVEOLD ASSYRIAN WORD DIVIDERONE UNDER "
+ "EIGHTEEN SYMBOLBUT NOT ACTUALLY EQUAL TOTERNION INTEGRAL OPERATORSTROKE "
+ "AND TWO DOTS ABOVEMULTIPLICATION SIGN BELOWCAT FACE WITH CLOSED EYESRIGHT "
+ "DIAGONAL HALF BLACKCROSSING NORTH EAST ARROWING ON THE FLOOR "
+ "LAUGHINGRIPLE BIRGA WITH ORNAMENTMEDIUM TRIANGLE ARROWHEADNIVERSAL "
+ "RECYCLING SYMBOL OVER NUN LAGAR TIMES SALDOUBLE CANDRABINDU VIRAMASMALL "
+ "ARABIC LETTER TAH ISOSCELES RIGHT TRIANGLEE PLUS A PLUS SU PLUS "
+ "NALY-RECYCLED PAPER SYMBOLOTTOM-SHADED WHITE ARROWOTTOM SHADED WHITE "
+ "ARROWSTROKE THROUGH DESCENDERSYLLABLE REPETITION MARKARROW POINTING "
+ "DIRECTLY ARM CIRCLE HITTING WALL MALL CIRCLE TO THE "
+ "RIGHTONE-HUNDRED-AND-SIXTIETHRIGHTWARDS THEN CURVING LARGE TRIANGLE "
+ "ARROWHEADFINGER AND THUMB CROSSEDPOINTING DOWNWARDS ABOVEPERSCRIPT ALEF "
+ "MOKHASSASHEAVY BLACK HEART BULLETORIZONTAL BAR WITH NOTCHING FACE WITH "
+ "OPEN MOUTHDESCENDING MUSICAL NOTESSINGLE-LINE NOT EQUAL TOBLACK "
+ "LENTICULAR BRACKETDOWNSCALING FACTOR KIIZHMTAVRULI CAPITAL LETTER BLIC "
+ "ADDRESS LOUDSPEAKERLINE HORIZONTAL ELLIPSISEQUAL TO OR GREATER-THAN "
+ "POINTING BACKHAND INDEXAND MALE AND FEMALE SIGNCULINE ORDINAL "
+ "INDICATOREYES AND HAND OVER MOUTHREVERSED NINE-LIKE BHALELICATION PROGRAM "
+ "COMMANDIGATURE OPEN ET ORNAMENTDIAGONAL CROSSHATCH FILLWITH CANCELLATION "
+ "STROKETED INTERPOLATION MARKERLEFT DIAGONAL HALF BLACKMODIFIER "
+ "FITZPATRICK TYPTEARDROP-SPOKED ASTERISKBUSINESS SUIT LEVITATINGE HUNDRED "
+ "TWENTY-EIGHTH INTERSECTION WITH SERIFSWELVE POINTED BLACK "
+ "STARGREATER-THAN OR EQUAL TOALTERNATE SECTION MARKERCONSECUTIVE EQUALS "
+ "SIGNSWO DOTS VERTICALLY ABOVEUPWARD POINTING TRIANGLET BLACK RIGHTWARDS "
+ "ARROWVOICED LARYNGEAL SPIRANTGLOTTAL STOP WITH STROKE OVER TOP SQUARE "
+ "BRACKETDOUBLE HORIZONTAL STROKEWITH UPWARDS ARROW ABOVEUPRIGHT "
+ "RECTANGULAR ZEROBERKANAN BEORC BJARKAN BWO-WAY LEFT WAY TRAFFIC POINTING "
+ "AT THE VIEWERARTY HORN AND PARTY HATCURRENT SYMBOL FORM TWOARABIC LETTER "
+ "TAH ABOVEPUNCTUATION END OF TEXTALGAMATION OR COPRODUCTLEFT-SHADED WHITE "
+ "ARROWREE VARIATION SELECTOR GREEK SMALL LETTER IOTAHT TRIFOLIATE "
+ "SNOWFLAKECLUSTER-INITIAL LETTER ORAH WITH NINE BRANCHESUPWARDS THEN NORTH "
+ "WESTCIRCUMFLEX ACCENT ABOVEHREE POINTED BLACK STARRAISED OMISSION "
+ "BRACKETORIGINAL OF OR EQUAL TOHANKED RIGHTWARDS ARROW CAKE WITH SWIRL "
+ "DESIGNP WITH EXCLAMATION MARKSHAPE WITH A DOT INSIDEININE ORDINAL "
+ "INDICATORWHITE FOUR POINTED CUSPESS OUTLINED WHITE STARATTACHING VERTICAL "
+ "OMETSING DIAGONAL CROSSING RIGHT-POINTING TRIANGLE ROTATED NINETY "
+ "DEGREESAND WOMAN HOLDING HANDSRIGHT HORIZONTAL SECANTIVE FINGERS SPREAD "
+ "OPENZANTINE MUSICAL SYMBOL DOWNWARDS THEN CURVING UP SPREAD THUMB "
+ "FORWARDISTED RIGHTWARDS ARROWSAVOURING DELICIOUS FOODPA OVER PA GAR OVER "
+ "GAR WITH HALF-CIRCLE BELOWRTOISE SHELL BRACKETED LSCHREIBER PAUSE "
+ "SYMBOLSOLIDUS BINARY RELATIONASCENDING MUSICAL NOTESIDE ARC CLOCKWISE "
+ "ARROWVERTICAL BISECTING LINEALEF MAKSURA FINAL FORME-ROTATED DIVISION "
+ "SIGNANG DEPARTING TONE MARKOVER RIGHTWARDS HARPOONOVERLAPPING LOGICAL "
+ "ANDUBLE VERTICAL BAR BELOWIGHT-SHADED WHITE ARROWMITIAN CONJUGATE "
+ "MATRIXRIGHT DIAGONAL ELLIPSISDOT BELOW AND DOT ABOVE OVER STAMPED "
+ "ENVELOPELATIN CAPITAL LETTER S WITH THREE DOTS ABOVELE BESIDE VERTICAL "
+ "BAR WITH DECORATIVE COVER WITH CIRCUMFLEX ABOVELEFT TRIANGLE OPERATORLONG "
+ "HORIZONTAL STROKE WITH SINGLE ZAPYATAYAMNYAM YIG GI MGO RGYANNOT "
+ "INCLUDING THE POLEI YFESIS TETARTIMORIONWITH HORIZONTAL STROKEUPPER RIGHT "
+ "AND LOWER CURLY BRACKET ORNAMENTPUNCTUATION KUNDDALIYAE-ROUND NOTEHEAD "
+ "DOWN USTOMER ACCOUNT NUMBERUPPER ONE EIGHTH BLOCKPRECEDED BY "
+ "APOSTROPHEIGEL LONG-BRANCH-SOL SYIAKENG PUACHUE HMONG DOWN-POINTING "
+ "TRIANGLEIN DEPARTING TONE MARKINDIRECT QUESTION MARKDOMAIN "
+ "ANTIRESTRICTIONING HEAD IN SILHOUETTEPERSET OF NOR EQUAL TOS-SHAPED BAG "
+ "DELIMITERRECTILINEAR BLACK STARREASE FONT SIZE SYMBOLCONSONANT SIGN "
+ "MEDIAL DOUBLE SOLIDUS OVERLAYCONSONANT MODIFIER BARGREATER-THAN "
+ "DIAERESISCOMPATIBILITY IDEOGRAPND RECORDING COPYRIGHTREE-CIRCLE ALTERNATE "
+ "IDOUBLE VERTICAL STROKEPOINTING UPWARDS BELOWDOUBLE-LINED HEAD MARKUIGHUR "
+ "KIRGHIZ YEH WITEAST-POINTING AIRPLANEN-POTABLE WATER SYMBOLWITH VOICED "
+ "SOUND MARKHEXIFORM LONG ANUSVARASEMI-VOICED SOUND MARKQUESTION MARK "
+ "ORNAMENTHORT HORIZONTAL STROKEWITH JEEM INITIAL FORMSHORT RIGHTWARDS "
+ "ARROW RIGHT ARROWHEAD ABOVEACKSLANTED SOUTH ARROWRIGHT-POINTING "
+ "FLEURONDRESSED TO THE SUBJECTTRIPLE RIGHT TURNSTILETRIPLE VERTICAL "
+ "STROKEROUND A POINT OPERATOROVER SHIR BUR OVER BUROVER NU11 BUR OVER "
+ "BUROVER LEFTWARDS HARPOON SYMBOL FOR LIGHTHOUSESHORT VERTICAL "
+ "STROKESWASALLAM ISOLATED FORMKEEPING STILL MOUNTAINARMENIAN ETERNITY "
+ "SIGNLOW PARAPHRASE BRACKETRROW WITH ROUNDED HEADTWO HORIZONTAL STROKESBAR "
+ "ABOVE INTERSECTIONSYMPTOTICALLY EQUAL TOUBSCRIPT SMALL LETTER INVERTED "
+ "SMALL V ABOVEDOWNWARDS ZIGZAG ARROWINVERTED SMALL V BELOWLETTER SMALL "
+ "CAPITAL SH AMPERSAND ORNAMENTEQUAL TO OR LESS-THANLESS-THAN OR EQUAL "
+ "TOABOVE SHORT DOWN TACKMIDDLE RING LITTLE ONDIGRAMMOS EX DODEKATAOPPOSING "
+ "AN PLUS NAGAND TELEPHONE RECEIVERSIDE TO SIDE SCISSORSER RIGHT CORNER "
+ "ANGLEWESTERN PWO KAREN TONWO-CIRCLE ALTERNATE IWO-CIRCLE NUKTA "
+ "ABOVEO-FLOORPLANE TWISTINGXTRA SHORT VOWEL MARKOHAMMAD ISOLATED "
+ "FORMMORPHOLOGICAL DIVIDERYIG MGO TSHEG SHAD MAOP SHADED WHITE ARROWDOWN "
+ "HORIZONTAL LIGHTDOWN HORIZONTAL HEAVYMALE WITH STROKE SIGNDOWNWARDS ARROW "
+ "ABOVEWITH INVERTED V ABOVENTISTRY SYMBOL LIGHT ACUTE AND HOOK SYMBOLSHORT "
+ "LEFTWARDS ARROWALLING DIAGONAL SLASHAKIA TELOUS ICHIMATOSDOUBLE LEFT "
+ "TURNSTILEHALF TRIANGULAR COLONLATTENED OPEN A ABOVEINVERTED BRIDGE "
+ "BELOWTHICK LETTER SELECTOREFT HORIZONTAL SECANTINTERSECTION "
+ "OPERATORDELIMITER TSHEG BSTARGRUENT WITH DOT ABOVEEFT OPEN BOX "
+ "OPERATORINTERSECTING LOGICAL EIGHT SPOKED ASTERISKFINAL CONSONANT SIGN "
+ "HOCKEY STICK AND PUCK AND SLANTED PARALLELARD SHELL FLOPPY DISKBESIDE "
+ "RIGHT TRIANGLETILDE OPERATOR ABOVE ELATIONAL COMPOSITIONARKENING OF THE "
+ "LIGHTVERY HEAVY BARB ARROWRATING SYSTEM COMMANDEFT-POINTING TRIANGLEGHT "
+ "OPEN BOX OPERATORVERTICAL LINE OVERLAYARYSTIAN FIVE HUNDREDPAP PLUS PAP "
+ "PLUS LU3RELICT HOUSE BUILDING WITH VERTICAL STROKEHOUSAND MILLIONS "
+ "SIGNTEEN POINTED ASTERISKVOICED ITERATION MARKBUT NOT EQUIVALENT TOHAND "
+ "INTERIOR PRODUCTCRUCIFORM NUMBER FOURREVERSED FEATHER MARKINVERTED "
+ "GLOTTAL STOPCROSSE STICK AND BALLAND VOWEL LENGTH MARKHORIZONTAL "
+ "TABULATIONQUADRANT CIRCULAR ARCFLATTENED PARENTHESISBOTTOM U-SHAPED ARROW "
+ "WITH SHORT RIGHT LEGRISING DIAGONAL SLASHCTOR OR CROSS PRODUCTTRIANGULAR "
+ "HALF BLOCKTOUCHING INSIDE MOUTHGRAMMOS OKTO DODEKATAVAL WITH OVAL "
+ "INSIDEIRCLES HITTING WALL WET CULTIVATION SIGNINSIDE MOUTH RELAXEDFORMS "
+ "LIGHT VERTICALTHROUGH SMALL CIRCLECHEMICAL SYMBOL FOR UP-POINTING "
+ "TRIANGLECHARACTER INTRODUCERBETWEEN PALM FACINGSINVERSE WHITE "
+ "CIRCLEINVERTED CANDRABINDUDOTLESS HEAD OF KHAHHREE-DOT NUKTA "
+ "ABOVESEPARATOR MIDDLE DOTREVERSED ONE HUNDREDWITH FOUR DOTS "
+ "ABOVESEPARATOR KEY SYMBOLTRANSPOSITION MARKERCURRENCY SYMBOL RIELBSET OF "
+ "NOR EQUAL TOCURVED ANGLE BRACKETMULTIPLE PUNCTUATIONHORIZONTAL BAR WITH "
+ "PLUS GISH TIMES TAK4IMAGE OF OR EQUAL TODOUBLE ANGLE BRACKET79 OVER "
+ "LAK-079 GUNUCENTRE VERTICAL LINEWITH SOROCHYA NOZHKAINDEX RING LITTLE "
+ "ONRIGHT ANGLE WITH DOTRUMAI PALAUNG TONE-5A- SHOG GI MGO RGYANHIGH TONE "
+ "APOSTROPHEFOUR RAISED KNUCKLES OVER LAGAR GUNU SHEHIGH RATHA OR LOW "
+ "PADOWN ARROWHEAD BELOWINING OBLIQUE STROKE OVER TUR ZA OVER ZAUBHAANAHU "
+ "WA TAAALAAENARMONIOS ANTIFONIAGRA GCAN -CHAR RTAGSLEFT AND LOWER "
+ "RIGHTANGE ANTIRESTRICTIONLE WITH POPPING CORK AND DIAGONAL STROKESINGLE "
+ "DOT TONE MARKAND SOUTH WEST ARROW WITH HORIZONTAL BARLATALIZED HOOK "
+ "BELOWLASHING SWEAT SYMBOLRIGHT QUADRANT BLACKOTLESS J WITH STROKEONE MARK "
+ "SGAW KAREN FINAL CONSONANT MARKYIG MGO PHUR SHAD MAAND SOUTH EAST "
+ "ARROWAND NORTH WEST ARROWEMESTVENNY ZADERZHKAEVERSED ROTATED RANAAND "
+ "NORTH EAST ARROWWORD REPETITION MARKREAN STANDARD SYMBOLYRENAIC TWO "
+ "DRACHMASCONSONANT SIGN HAARUE CONSONANT MODIFIER ABOVE LEFT "
+ "TRIANGLELEFT-TO-RIGHT SECANTL FUNCTIONAL SYMBOL HAR2 TIMES GAL PLUS RIGHT "
+ "U-SHAPED ARROWRIGHT-POINTING ANGLENIS RACQUET AND BALL BEGIN LOGOGRAM "
+ "MARKSCRIPTION CHARACTER DOUBLE DOT TONE MARKOVER LEFTWARDS "
+ "ARROWJUDEO-SPANISH VARIKACRESCENT MOON SYMBOLHALF CIRCLE WITH DOTFIVE "
+ "SPOKED ASTERISKSIDEWAYS NOON GHUNNAARXIS KAI FTHORA VOUL-DOWN-OUTPUT "
+ "SYMBOLGHTWARDS ARROW BELOWE POINTED WHITE STARWARE-FUNCTION "
+ "SYMBOLONCAVE-SIDED DIAMONDCJK UNIFIED IDEOGRAPKHAMTI REDUPLICATIONSTAR "
+ "WITH MIDDLE DOTTIGHTLY-CLOSED EYESHORT STROKE OVERLAYURRENCY SYMBOL "
+ "BAHTMSHELL MOBILE PHONEHAND COVERING MOUTHNE HUNDRED TWENTY PHEAD-SHAPED "
+ "POINTERWITH YEH FINAL FORMGIBBOUS MOON SYMBOLTO LOWER RIGHT FILLCIRCLED "
+ "SANS-SERIF NDRED POINTS SYMBOLTHANG LONG ANUSVARANAXIAN FIVE "
+ "HUNDREDCIRCLED WHITE ARROWTWENTY-FIVE DEGREESAHU ALAYHI "
+ "WA-AALIHARENTHESIS NOTEHEADKULL AND CROSSBONESUP HORIZONTAL HEAVYUP "
+ "HORIZONTAL LIGHTARTIAL DIFFERENTIALVERTICAL TABULATIONFIVE FINGERS "
+ "SPREADEVEN POWERS OF FOURLUB-SPOKED ASTERISKSTRAIGHT THUMB BENTSTRATIAN "
+ "FIFTY MNASATED TELLER MACHINETURNED PADA PISELEHLEFT U-SHAPED "
+ "ARROWALTERNATE LAKH MARKLEFT TO LOWER RIGHTSLANTED NORTH ARROWTURNED "
+ "SECTION MARKLEFT POINTING INDEXLF MADDA OVER MADDAVERSE FINAL "
+ "BARLINELEADING MCHAN RTAGSANS-SERIF CAPITAL LANSPOSITION BRACKETLANTED "
+ "EQUAL ABOVE DENOMINATOR SIXTEENARABIC FORM SHAPINGIGATURE "
+ "AYIN-DALETHFT-POINTING FLEURONIN CHEN SPUNGS SHADA END LOGOGRAM "
+ "MARKIMIDIA SEXTULA SIGND CIRCUMFLEX ACCENTILDING CONSTRUCTIONSH PLUS HU "
+ "PLUS ASHSEVEN EIGHTHS BLOCKTART OF RUB EL HIZBTRANNO MALO POVYSHEGAPPED "
+ "CIRCLE ARROWC DIGRAPH WITH CURLMOVES AGAINST CHEEKSEQUENCE "
+ "INTRODUCERHURISAZ THURS THORNBETWEEN MIDDLE RINGING SHIRT WITH "
+ "SASHT-FACING BABY CHICKINSIDE CIRCLE BELOWET WITH WHITE CROSSVARIANT FORM "
+ "ILIMMUVARIANT WITH SQUAREVARIATION INDICATORVASTNESS OR WASTINGSYMBOL FOR "
+ "BEGINNERISPUTED END OF AYAHITED LIABILITY SIGNVERSAL INTERSECTIONRIGHT "
+ "MIDDLE STROKEOP SEMICIRCLE ARROW AND RETROFLEX HOOK WITH VERTICAL "
+ "TAILOCAL NOTATION SYMBO WITH STRIKETHROUGHRROW NO-BREAK SPACER-INITIAL "
+ "LETTER RAOTATED ARDHAVISARGADOWN AND HORIZONTAL AND "
+ "PROSGEGRAMMENIQUADRUPLE CRESCENTSUBLE DOT WITHIN DOTNOGRAPHIC FULL STOP "
+ "FLUTTERING IN WINDPARAGRAPH SEPARATORNORTH ARROW WITH HOOVER E NUN OVER "
+ "NUNRY CULTIVATION SIGNOW-9 QUOTATION MARKQUESTION MARK ABOVERIGHT TO "
+ "LOWER LEFTDOWN POINTING INDEXPREFIXED NASAL SIGN IN A RECTANGLE "
+ "BOXRAILING MCHAN RTAGSREE-QUARTER CIRCLE NETWORKED COMPUTERS LOVE YOU "
+ "HAND SIGNWOMEN HOLDING HANDS KASKAL U GUNU DISHENTY-TWO POINT TWOERTICAL "
+ "BAR VIRAMAKBAR ISOLATED FORMARROW SHAFT WIDTH THUMB INDEX THUMBNYET "
+ "THYOOM TA-ROLKANTAJA NAASIKYAYALOWER MIDDLE RIGHTEPIGRAPHIC LETTER "
+ "TILTING FROM WAISTNYOOGA NAAKSIKYAYAVERY SMALL DIAMONDKOREAN CHARACTER "
+ "OOUCHTONE TELEPHONEHILOSOPHERS SULFUR AND NO DOTS ABOVEHAIS LUS NTOG "
+ "NTOGJEEM ISOLATED FORMHORIZONTALLY BELOWUM WITH DRUMSTICKSATIN SMALL "
+ "LETTER IX SPOKED ASTERISKTROFLEX HOOK BELOW AND YPOGEGRAMMENIATERRESTRIAL "
+ "ALIENSET OVER BUILDINGSRYUKOVAYA SVETLAYAFIVE EIGHTHS BLOCKOW TONE "
+ "APOSTROPHE SPREAD THUMB SIDEASTROLOGICAL SIGN ASTED SWEET POTATORATUM "
+ "SUPER STRATUHOLDING BACK TEARSSOUL ISOLATED FORMENTERING TONE MARKSMALL "
+ "CIRCLE ABOVEWEST POINTING LEAFEAST POINTING LEAFU-GAAHLAA "
+ "TTUDDAAGABBREVIATION MARK EVERSED CHELYUSTKAAND LOW RIGHT RINGORK ON THE "
+ "DECAYEDLEFT MIDDLE STROKEGGLY VERTICAL LINERIST CIRCLE FRONT MATHEMATICAL "
+ "SPACER-STRING FRETBOARDLEFT-STEM TONE BARYPTIAN HIEROGLYPH AND SMASH "
+ "PRODUCTSINGLE PUNCTUATIONYLLABLE LENGTHENER WITH CROSSED-TAILOU ALAYHE "
+ "WASALLAMWHITE VERTICAL BARLOSED CIRCLE ARROWSHAN REDUPLICATIONL-UP-OUTPUT "
+ "SYMBOL WITH CIRCLE ABOVE WITH CIRCLE BELOWUP ARROWHEAD BELOWTYPE A "
+ "ELECTRONICSU-SHAPED ORNAMENTSOTLESS DALATH RISH WITH FLOWING SANDEVENTEEN "
+ "FULL STOPTOP U-SHAPED ARROW WITH KAVYKA ABOVESMALL RED TRIANGLEBOTTOM "
+ "RIGHT KASRAQUARTER NOTE STEM DOUBLE PUNCTUATIONTO LOWER LEFT FILLNINETEEN "
+ "FULL STOPFORKED PARAGRAPHOSNORMAL SUBGROUP OFCUP WITHOUT HANDLEWITH "
+ "JUSTIFICATIONBAARAKA WA-TAAALAAFORTY-FIVE DEGREESIGATURE ZAYIN-YODHIRROR "
+ "HORIZONTALLYCONTINUING OVERLAP NOT LITTER SYMBOLPERFIXED LETTER "
+ "RAD-HEIGHT LEFT HOOKHUNDREDS UNIT MARKS PRESSED TOGETHERGISH CROSSING "
+ "GISHVARIANT FORM LIMMU FOR SIMALUNGUN SAWITH STROKE SYMBOLBASELINE ROUND "
+ "DOTIGSAW PUZZLE PIECEIN POSSESSION SIGNUTLINED BLACK STARPUNCTUATION "
+ "BINDU COPPER ANTIMONIATENISH VERSE DIVIDERN-FLOORPLANE SPACEITAN SMALL "
+ "SCRIPT INDEPENDENT VOWEL X-STRING FRETBOARDPACING CANDRABINDUCANTILLATION "
+ "SIGN GATIVE ACKNOWLEDGENITE PART INTEGRALUPPER MIDDLE RIGHTSEPTUPLE "
+ "CRESCENTS OVER INVERTED SHUS INSIDE AND ABOVECTLY EQUIVALENT TOSYMMETRIC "
+ "SWAPPINGHOUSANDS SEPARATORTEN THOUSAND SIGN GRAVEYARD SYMBOLAMPHYLIAN "
+ "DIGAMMAHEART-SHAPED EYESLHOUETTE OF JAPAN WITH TILDE ABOVEEMICOLON "
+ "UNDERBARCROSS PUNCTUATIONPINWHEEL ASTERISKAND BLACK SQUARESVOCALIZATION "
+ "MARKSIGN O WITH CROSSPRESSIONLESS FACEIDEOGRAPHIC COMMAPRIZNAK MODIFIER "
+ "CIRCLES WITH DOTSHAH ISOLATED FORMBRACKET EXTENSIONBRIGHTNESS SYMBOL "
+ "LAGAR OVER LAGARYEH ISOLATED FORM LAGAB TIMES ASH2AEUM ONE "
+ "PLETHRONTARTING FROM SIGNPUNCTUATION SIGN IASTRE MARK ABOVE0 WHEELED "
+ "CHARIOTIGHTEEN FULL STOPAMBDA WITH STROKETIAL ARTS UNIFORMALTERNATE "
+ "NUMBER ULAR MEDIUM SHADEOPEN-HEADED ARROWBUT RELIEVED FACEVRE TOURNOIS "
+ "SIGNLAPPING LESS-THANBOTTOM HALF BLACKLIAN HIEROGLYPH APRECEDING "
+ "SOLIDUSTAN ISOLATED FORMGENERIC MATERIALSASTERN PWO KAREN CTION "
+ "APPLICATIONCANCELLATION MARKSTRAIGHT MOVEMENTK-LETTER CAPITAL VEE WITH "
+ "UNDERBAROURTEEN FULL STOPTHREE SOUND WAVES PLUS SHA3 PLUS ACAL SYMBOL "
+ "BOTTOMINVERTED MCHU CANKE BOTTLE AND CUPKHAH INITIAL FORMVERAGE WITH "
+ "SLASHAUKAZ LAGU LOGR LHORIZONTAL SINGLEURNED DAMMA BELOWFLICK "
+ "ALTERNATINGQUAT REVERSED ESHATHERING TOGETHEROWER NUMERAL SIGNURNED COMMA "
+ "ABOVEHORIZONTAL DOUBLEUP POINTING INDEXCANDRABINDU ABOVETRIPLE DASH "
+ "ARROWEAR SCREEN SYMBOLDELPHIC FIVE MNASLAGOLITIC LETTER LAH ISOLATED "
+ "FORMUP AND HORIZONTALAPLI DYO DODEKATABERBER ACADEMY YA OVER ZU PLUS "
+ "SARDENTAL PERCUSSIVEING POLE AND FISHINEAR ANNOTATION LD PERMIC LETTER "
+ "FRACTION ONE HALF WITH SOUND WAVESFROM SMALL CIRCLEBE WITH MERIDIANSRIGHT "
+ "HALF CIRCLE WITH BULLET NOSETERSYLLABIC TSHEGVARIANT FORM ASH9 TO BLACK "
+ "DIAMONDVARIANT FORM IMINSQUARED TIMES KURVARIANT FORM USSUFINGER-POST "
+ "ARROWHIRTEEN FULL STOPVERY SMALL SQUAREMANENT PAPER SIGN ALTERNATION "
+ "MARKCONSONANT SIGN PAABBREVIATION SIGNN-WALLPLANE SPACECLOSED LITTLE "
+ "YUSMEDIUM BARB ARROWMALL WHITE CIRCLENASALIZATION MARKREATIONAL "
+ "VEHICLENAUDIZ NYD NAUD NCONTINUATION SIGNCONTOURED OUTLINENG STROKE "
+ "OVERLAYNFORMATION SOURCEY ON BLACK SQUARESERVER EYE SYMBOLGREATER-THAN "
+ "NOR GREATER-THAN SIGNCOMBINING NUMBER WITH NOT EQUAL TORYBLION BASE "
+ "SIGNTONAL RANGE MARK S REVOLVING LIGHTMILITARY AIRPLANENEGATIVE CIRCLED A "
+ "PLUS HA PLUS DA AND PALATAL HOOKWHITE SHOGI PIECEWHITE PARENTHESISMEEM "
+ "INITIAL FORMN-BREAKING HYPHENN-CIRCUIT-OUTPUT WO VERTICAL DOTS NE EYEBROW "
+ "RAISEDK PERMITTED HERELOWER OVER UPPERRIGHT HALF BLACKDOUBLE HEAD "
+ "MARKUMBER SIGN ABOVEMIDDLE AND BELOWQ WITH HOOK TAILRIZONTAL "
+ "ELLIPSEHYPHENATION MARK AND LIGHT RIGHT WITH HEADSTROKEIMENSIONAL "
+ "ANGLEQUALS SIGN BELOWSTRAIGHT STRETCH PLUS KAK PLUS AEPSILON "
+ "UNDERBARK-NO-EVIL MONKEYNAL DIGIT SHAPESINES CONVERGING RIGHT DOWN BARB "
+ "WAVY HAMZA BELOWEVERSED VISARGA STUCK-OUT TONGUEE-NO-EVIL MONKEYCORNER "
+ "LEFTWARDSSLANTED EQUAL TO CAPPED MOUNTAINDVUMYA ZAPYATYMIEVERY OTHER TIME "
+ "WITH DOT INSIDEOLD TAMIL VIRAMAADIAN SYLLABICS EN MILLIONS SIGNRAYS AND "
+ "DOTTED UP MIDDLE HINGED OR THE IMAGE OFVOWEL LENGTHENERCREAMING IN "
+ "FEARPLACEHOLDER MARKINDEX THUMB SIDEVARIANT FORM ESHLD ASSYRIAN ONE SHU2 "
+ "PLUS KASKALHMATULLAH ALAYHEULDERED OPEN BOXLEFT ARROW ABOVESMALL CAPITAL "
+ "ELDEWAYS U BRACKETFALLING DIAGONALEFORE COMPLETIONPEN CENTRE CROSS TIMES "
+ "DISH TENUINDEPENDENT SHINECIMAL SEPARATORDOUBLE CRESCENTSVERY HEAVY "
+ "SHAFTDOUBLE DOT ABOVENION WITH SERIFSORIZONTAL JOINERCASIAN ALBANIAN "
+ "ANABAZAR SQUARE R-NO-EVIL MONKEYHOLDING TOGETHERSTICKING OUT FARRECORD "
+ "SEPARATORMIDDLE AND RIGHTPLE MEASURE RESTSMALL NOON ABOVE TIMES GAN2 "
+ "TENUDITORIAL CORONISRISING TONE MARKR PLUS GAN2 TENUSTERISK "
+ "OPERATORAFFRICATION MARKLEFT HALF CIRCLELLOW PAN OF FOODCOMPRESSED "
+ "ARROWB2 TENU PLUS TABDOUBLE ZAPYATAYASIGN RISING TONEISTOS DISC SIGN "
+ "PRECHGESANG STEMIRCLE X NOTEHEADE PLUS GAN2 TENURIGHT HALF BELOWPRECEDING "
+ "SUBSETIFTEEN FULL STOPTAKANA-HIRAGANA YATHOS BASE SIGNENTRE WHITE "
+ "STARUBJOINED LETTER UPERSCRIPT ALAPHRESH-AYIN-DALETHRAIDO RAD REID RYIR "
+ "MKPARAQ MEUNIPPER-MOUTH FACEWITH LEFT UPTURNMESTVENNY KLYUCHE-ENDED "
+ "MULTIMAPTRIPLE CRESCENTSIDED GREEK CROSSUPPER OVER LOWERENUMERATION "
+ "SIGNHIGH SPACING DOT WITH BUNNY EARSCORNER DOWNWARDSEND OF TEXT "
+ "MARKWALLED ENCLOSUREDOUBLE TURNSTILEVIEWING CEREMONYL-CARRIER "
+ "LETTERDOTTED CRESCENTS LIGHT MOON ARTAWITH HEARING AIDMAGNIFYING "
+ "GLASSWITH HAMZA ABOVEIGHTH NOTE STEM UDLY CRYING FACEPOSTPOSITION "
+ "MENSERIFS AT BOTTOMO-MINOAN SIGN CMARENTHESES ABOVEE-PIECE "
+ "SWIMSUITCONSONANT JOINERNTAIGANA LETTER LIGHT BARB ARROWOLD WHITE "
+ "CIRCLELIGHT AND RIGHT BOLD GREEK CROSSSITION INDICATOR INSERTION "
+ "POINTLTERNATE HASANTA AND HEAVY RIGHTFFERENCE BETWEENCH WITH UMBRELLAWITH "
+ "CENTRED DOTSANS-SERIF ARROW SMALL ROTATIONSDOWN RIGHT BARB BOTTOM HALF "
+ "RINGIVE POINTED STARDOING CARTWHEELST SYRIAC CROSSGROUP SEPARATORMIDDLE "
+ "DIAGONALST QUARTER MOONSSICAL BUILDING PARESTIGMENON OUBLE BACKSLASH "
+ "TIMES IGI GUNU DEYTEROU ICHOUR-FINAL LETTER FOUR DOTS WITH NJOINING "
+ "MACRONFOUR ENCLOSURESBELGTHOR SYMBOLKISIM5 TIMES BIPEN SQUARED DOTPENSION "
+ "RAILWAYKTOVIK NUMERAL QUADRUPLE ARROWERCURY SUBLIMAT TOUCHING "
+ "INDEXATINATE MYSLITEFORWARD TILTINGINVERTED STROKECAPITAL LETTERSXTEEN "
+ "FULL STOPUMAI PALAUNG FARIGHTWARDS AND NOON FINAL FORM AND BOTTOM "
+ "ENDRIGHTWARDS TICKHORIZONTAL FILLOWER HALF BLACKIZED WHEELCHAIROW-FALLING "
+ "TONEDOWN SEQUENTIALOLIDUS OPERATORS IN SILHOUETTERIGHT RERENGGANEH "
+ "INITIAL FORMIRCUMFLEX BELOWNOT APPROXIMATEHREE DOTS BELOWHOUSAND "
+ "STATERSATA LINK ESCAPEULTIPLICATION XS ELEVATUS MARKITH FINGERNAILSNOON "
+ "WITH KASRAWITH DOWN ARROWSTERESIS SYMBOLGEMINATION MARKVERTICAL SECANT "
+ "THROUGH CIRCLE2 CHARIOT FRAMETIP ON THE LEFTNVERTED UBADAMA AND LOWER "
+ "LEFTQUADRUPLE DASH THUMB STRAIGHTR WITH FISHHOOKRIGHT HALF RINGUBLE RING "
+ "BELOWUGMENTATION DOTINUS SIGN BELOWFIVE-LIKE BHALEHORIZONTAL DASHCORNER "
+ "WITH DOTWITH RAIN DROPSNO GOOD GESTURESTROKE NOT SIGNINVERTED LAZY "
+ "SSALTER PAHLAVI ZAIN FINAL FORMERICAN FOOTBALLMURDA MAHAPRANADAGESH OR "
+ "MAPIQBAR ABOVE UNION PLUS HI PLUS AHOOKED INDEX UPRMAN PENNY SIGNBLOCK "
+ "DIAGONAL ALTERNATE FORMTRIANGULAR MARKPLUS SIGN BELOWDI ALLAHOU "
+ "ANHUVOWEL SEPARATORVOWEL SHORTENERPUT SYMBOL FOR ALMOST EQUAL TOOGOGRAM "
+ "KHAMTI DOWNWARDS TRENDLEVEL TONE MARKLEVEN FULL STOPSIXTEENTH NOTESCKET "
+ "CALCULATORACHES THE LIMITALIF LENGTHENERON TOP OF MODEMIGN PALI "
+ "VIRAMAAMOUNT OF CHECKORIZONTAL COLONNA DOUBLE HELIXIMISEOS "
+ "CHRONOUMARRYING MAIDENODO SOFT HYPHENEXPONENT SYMBOLCLOSED BY CURVEBROWS "
+ "STRAIGHT BELOW LONG DASHTHROWING A KISS NEPOSTOYANNAYAAMARITAN SOURCEZERO "
+ "WITH SLASHIAMOND UNDERBARLONG HOOK BELOWRECEPTIVE EARTHIDING "
+ "ENCLOSURELONG VOWEL SIGNLOOK OF TRIUMPHHAKING PARALLELGHT REPEAT "
+ "SIGNPRISHTHAMATRA ELOSED INSULAR GDOUBLE TRIANGLEAI LAING DIGIT TVIMADUR "
+ "SYMBOLOND PLACE MEDALDIALYTIKA TONOSYNCHRONOUS IDLELIGHT AND LEFT "
+ "S-KAPYEOUNPIEUPONTOUR INTEGRALALAYHE ASSALLAMONJOINED HINGEDSHITA PLUS "
+ "GISHRECITATIVE MARKETIC VERSE SIGNUR POINTED STARMALAKON CHROMA LMOST "
+ "EQUAL TO HALF FILL SPACEND OF PARAGRAPHLD TAMIL SHORT TWO WITH "
+ "STROKEPERTHO PEORTH P WITH DIAERESISN-ARY SUMMATIONING SYMBOL FOR WHITE "
+ "DOT RIGHTESSARON CHRONONTILDE DIAERESISU PLUS U PLUS UBINDING "
+ "BRACKETREVERSE SOLIDUSUSTER NOTEHEAD WITH RIGHT LEGORT EQUALS SIGNEFT "
+ "REPEAT SIGN OPERATOR WITH HEH MEDIAL FORM CORNER BRACKETLATION "
+ "FUNCTIONFRACTION DIGIT REAMY EYEBROWS DESCENDING TONETOP RIGHT FATHATHREE "
+ "DISH TENUGGLY LINE BELOWMEEM FINAL FORMDIGA AELA-PILLAODIFIER DAMAGEDAND "
+ "PARALLEL TOO-CURRENCY SIGNESIDE LESS-THANFTER COMPLETIONREVERSED OPEN E "
+ "WITH DESCENDER OVER SIG4 SHU2E OVER INFINITYICTED LEFT ENTR CLOUD AND "
+ "RAINLEFT HAND INDEXIMPERFECTA RESTE MUSICAL NOTESBENT OVER INDEXO-HEADED "
+ "ARROW INGLE HEAD MARKE ISOLATED FORMLEFT DOWN BARB LEFT HALF BELOW OVER "
+ "MOUNTAINSLEFT HALF BLACKSHORT OVER LONGIRCULAR VIRAMAINVERTED "
+ "BIRGAUSPENSION MARKNORTH ARABIAN SECOND SUBUNITRIGHT CROSSBARWITH LEFT "
+ "HOOKD-UP NEWSPAPERINSERTION SIGNS-OR-PLUS SIGNPARATED SYMBOLDOUBLE-STRUCK "
+ "ING HANDS SIGNINOLOGICAL DOTPAO KAREN TONEERIAL ARAMAIC UPPED INDEX UP "
+ "HASER FOR VAVWORD SEPARATORPARTMENT STOREERIC INDICATORSEL "
+ "LOCOMOTIVEPLITTING APARTBLADE SCISSORSBLACK TRIANGLETELPIECE "
+ "CLOCKINVERTED DAMMAPLETE INFINITYPLE WITH HEARTREVERSED-SCHWATETARTOS "
+ "ICHOSREATIVE HEAVENINFINITY BELOWHREE-LEGGED TEPROTECTED AREABAG "
+ "MEMBERSHIPTRIANGLE WITH PPED MIDDLE UPBHATTIPROLU AAFORWARD INDEX "
+ "E-PER-EM SPACEIGHTWARDS VANEX-PER-EM SPACERD PLACE MEDALIKHAYA PUTNAYAPEN "
+ "MARK BELOWTRANSMIT STATEDEYTEROS ICHOSEXTENDED BHALEAND COLD SWEATORD "
+ "SEPARATOR SHESHIG TIMES ORANGE DIAMONDR-OFF CALENDARR-PER-EM "
+ "SPACEEMIVOWEL SIGN EXCLAMATION OHAMNUC PII KUUHVONIC ASTERISKLEFTWARDS "
+ "AND OPPOSING PIRIGLEFTWARDS TICKDI ALLAAHU ANHTIMES SHU TENUSEPARATOR "
+ "MARKOTHERS CIRCLEDTHOUSANDS MARKUBHAYATO MUKHATHOUSANDS SIGNSMALL "
+ "TRIANGLEUNDER RELATIONMEDIUM DIAMOND WITH LONG LEGOBLIQUE HYPHENFATHATAN "
+ "ABOVELESS-THAN NOR SMALL LETTER DGYPTOLOGICAL ATHREE POINTED OT TILDE "
+ "ABOVEANGLE OPENING OSITION SYMBOLTWO WHITE DOTSTURKIC LETTER REE-LINE "
+ "STAFFONE SOUND WAVE1 OVER LAK-081TWO ENCLOSURESUNIT SEPARATORENNA WITH "
+ "BARSAI LENGTH MARKREH FINAL FORMLOTUS POSITIONDIO MICROPHONELOWER "
+ "DIAGONALROSS ON SHIELDAFETY SCISSORSUNION OPERATOROLD RESOLUTIONSIDE-DOWN "
+ "FACETWO DOT LEADERGHT WITH STARSUAL WHEELCHAIRYOUTHFUL FOLLYLETTER "
+ "OVERLAPLEVEN TWELFTHSALLAJALALOUHOUTIMES OPERATORVRON SNOWFLAKEED SYMBOL "
+ "FOR TOP HALF BLACKALF TREE TRUNKOOTNOTE MARKERSMALL LETTER JLIGHTLY SMALL "
+ "DIRECTION FLIPLINE EXTENSIONDIAGONAL MOUTHCRIFICIAL WINEDIAGONAL PATH END "
+ "OF SECTIONLITTLE SECTIONDUN3 GUNU GUNUEPENTHETIC YUTVERTICAL HEAVYJOINED "
+ "SQUARESZAKAYA LANTERNCOLON OPERATORQUINARIUS SIGNOVER GUD LUGALVERTICAL "
+ "COLONTHAKA ANUDATTATHALAN ETHEL ODASIA PNEUMATAVERTICAL ABOVEHESIVE "
+ "BANDAGERING MEGAPHONEOUSING THUNDERNYI ZLA NAA DAERTICAL JOINEROUR OBOLS "
+ "SIGNTHER CHRISTMASAROUND-PROFILEO HEADED ARROWWITH DOT BELOWUP RIGHT BARB "
+ "MRACHNOTIKHAYAISED ROUND DOTVENIENCE STOREISSION TICKETSITA PLUS GISH "
+ "WITH FATHATAN XO EKFONITIKONVERTICAL LIGHTMOTHETIC ABOVEAU LENGTH MARK "
+ "AND DIAERESISIVE KEY SYMBOLIVE OBOLS SIGNIWAZ TIR TYR TSURROUND FROM "
+ "STRONG ISOLATEMENSION ORIGINA-ESASA DOTTEDZAH WITH MEEM CENTURIAL "
+ "SIGNCEPTER OF JOVESPEECH BUBBLES WITH BACK YERSHAN MEDIAL WACERTAINTY "
+ "SIGNFINAL ANUSVARALACE OF SAJDAHFFICE BUILDINGGUNU TIMES ASHSMOKING "
+ "SYMBOLFFED FLATBREADDOES NOT EXIST CROSSING ESH2HIRD-STAGE HLIMID-LEVEL "
+ "TONEUND MARK ABOVESQUIGGLE ARROWHERICAL ANGLEUE OF LIBERTYCROSSING "
+ "MUSHQUARTERS SIGNGYA GRAM SHADCROSSING GABACIRCLE INSIDE "
+ "DIMINUTION-1HAKASSIAN CHE B BAR SYMBOLED DOUBLE VERURNED W BELOWGAW KAREN "
+ "SHAUFFLE PRODUCTEELING PERSONGTER TSHEG MAUSICAL LEIMMATOP HALF "
+ "RINGUPWARDS TRENDCITATION MARKUDDISA SIRRAHEAVENLY EARTHTIRTA TUMETESHREE "
+ "TWELFTHSUSHING UPWARDC-SIMPLIFIED RANKS CASKET CROSSING GAN2GIFT "
+ "ENVELOPEWO-LINE STAFFCHECKER BOARDH-SPEED TRAINUPPER RIGHT-SI WITH STROKE "
+ "BZHI MIG CANH-HEELED SHOEUPSILON WITH CROSSING KA2CRIPTION TAKEGAR "
+ "FRACTION TED HAND SIGNRACKETS ABOVEEDIC ANUSVARA DIRECTIONAL THIRD "
+ "SUBUNITHIEROGLYPHIC HAM DIGIT ONECRIPTIONAL PAH-TYPE SYMBOLCULATED LORRY "
+ "BAT AND BALLED PAPERCLIPSHINESE TONE Y AND YEN SIGN AND SKI BOOTBY "
+ "DEFINITIONCHRYSANTHEMUMLATERAL CLICKOTEHEAD BLACK WITH INK PEN WITH "
+ "JEGOGANSMALL LETTERSANNED LEATHERTU WAS-SALAAM WITH OVERBARLEFT CROSSBAR "
+ "WITH TEE TOPRIPLE SVARITA WITHOUT SNOWAN RUPEE SIGNERTION SYMBOLAMUHU "
+ "ALAYNAAEMPHATIC TONERIZONTAL TAILOPPOSING NAGAMESSENIAN TENLEGETOS "
+ "ICHOSNTY FULL STOPMODIFIER MARKARLAUG SYMBOLMOBILE PHONES TIMES "
+ "KASKALSSAGE WAITINGARGOSYNTHETONVICE CONTROL VARIANT "
+ "FORMYUUKALEAPINTUFINAL SEMKATHARCHAIC KOPPADVOECHELNAYA L-FLOORPLANE "
+ "L-TYPE SYMBOLL-YEORINHIEUHAR WITH QUILLNUMERATOR ONEFICATION CARDMINUS "
+ "WHITE XAPPED PRESENTMINUS SIMILARNUMERIC SIGN MINDER RIBBONOGOTYPE SIGN "
+ "OBLIQUE LINE SHESH PLUS KIDIC MARK SIGNMASORA CIRCLEMARKS CHAPTERRRIAGE "
+ "RETURNONAL COMPUTERACCOMMODATIONRPENTRY PLANEMALO POVYSHE OMAN NUMERAL "
+ "EUROPE-AFRICAROTATED BIRGALOSING SPIRALOLVING HEARTSSIA-AUSTRALIALVE FULL "
+ "STOPLVEOLAR CLICKWAVY LOW LINEWAVY OVERLINEMALL LETTER "
+ "ZWAW-AYIN-RESHMAILBOX WITH U WITH STROKEONGRATULATIONDIAERESIZED UALL "
+ "LOST SIGNVOWEL SIGN PAYOD YOD PATAHMELODIC QITSARN PENTATHLONYMBOL TAU "
+ "RHORNAMENT STROKOON NOTEHEAD LISION SYMBOLW RING INSIDEEVERSED DAMMAWITH "
+ "ASTERISKONG-LEGGED DEONG RIGHT LEGSILI PNEUMATALOCATION SIGNLOCK WITH "
+ "KEYRTABLE STEREOMEDIUM SQUARESIGN PAMUDPODSIGN AVAGRAHAUPERIMPOSED "
+ "XFROWNING FACET-FLOORPLANE OPPOSING KURUPONDIUS SIGNE-GRAVE-ACUTEDOUBLE "
+ "MUCAADE-FLOORPLANE INTEREST SIGNFT ARROWHEAD BASAN LETTER RIGHT "
+ "HARPOONINVERTED FORKBALL AND HOOPDOUBLE HYPHENINVERTED TURNIOT SYLLABLE "
+ "BACKSLASH BARNASALIZED TONIRCLED INDEX E-ACUTE-GRAVEVE-LINE STAFFILABIAL "
+ "CLICKZHOU NUMERAL G-SSANGKIYEOKIDE LOST SIGNPREPONDERANCEIFI ROHINGYA "
+ "BOWING DEEPLYPPOSING LUGALBOTTOM CORNERIGHT TWELFTHSTAI LAING TONBOHAIRIC "
+ "KHEITE ORDER MARKE-MAIL SYMBOLSYNDESMOS NEOILE SEPARATORSAMYOK SANNYAPLUS "
+ "OPERATORFULL SURROUNDDOUBLE STROKESAL PLUS TUG2T-SHAPED SIGNIMAGE BRACKET "
+ "OF THE HORNSNG TERMINATORS KAI APOTHESS UP "
+ "TOGETHERFOREMENTIONEDJ-SIMPLIFIED TROFLEX CLICKDOTTED ZLAMA JES SU NGA "
+ "ROFIXED-FORM RAS KRYZHEM ON VERTICAL BARSMULTIOCULAR OWITH INTEGRALK WORK "
+ "SYMBOLSELECTED AREAVERTICAL FILLOUT MIDDLE UPOURTH SUBUNITSTERTIUS "
+ "SIGNARROW OVERLAYSTABLE SYMBOLARRED TRIDENTIVE SLOW SIGNNINE TWELFTHSVEN "
+ "POINT ONEISH LIRA SIGNNAP PIZZICATONING MOVEMENTAWELLEMET YAZPAIRED "
+ "ARROWSER BOARD FILLPA NJI PIPAEMDOUBLE CIRCLEVERGREEN TREEITING "
+ "THROUGHVERLAY MIDDLEIVE POINT ONEAUDATE CHRIVIEHU FEOH FE FDOUBLE ARCH UH "
+ "PLUS GISHDOT OPERATORDOWN NEUTRALDOUBLE ARROWERPENDICULARDOWN HARPOONE-2 "
+ "PLASTICSE-6 PLASTICSE-5 PLASTICSFOLDED HANDSTRIPLE FLAMEE-4 PLASTICSE-3 "
+ "PLASTICSFLAG ON POSTDASHED ARROWUGHT BALLOONEAGULL BELOWE-1 PLASTICSGEBA "
+ "KAREN IEIGHTH NOTESUPPER CORNERE-WALLPLANE EEPING SMALLTRAGRAM FOR FT "
+ "RERENGGANGAW KAREN EUULL NOTEHEADCROSSING NUNFOUNTAIN PENUBSCRIPT "
+ "TWOFORMING ARTSTRIPLE DANDAGBY FOOTBALLE-7 PLASTICSDICTION SIGNUME "
+ "INTEGRALENARIUS SIGNCRESCENT BAREVERING FACEDUG TIMES NIENOS "
+ "CHRONOUEMISOFT SIGNENT ARROW POUN WITH RAYSDIGRAPH KOTODIGRAPH "
+ "YORIFERENCE MARKCROSSING LU2CROSSING KALGERED TREMOLDE MARK SIGNCROSSING "
+ "GI4TOP-LIGHTED ETRETES SIGNFEMININE DOTDENTAL CLICKFALLING DOTSEMELY "
+ "HEAVY META STAVROUWITH SMALL VMUM TIMES PAMPTY CENTRE MPHASIS "
+ "MARKMONOGRAPH UKMING TO MEETSH ZIDA TENUMIDDLE PIECEWIGGLY FENCEAA "
+ "AS-SALAAMMEDIUM SHAFTMEDARY CAMELSHED BARLINEMBELLISHMENTMANNAZ MAN MACE "
+ "INTEGRALMALL SECTION6 LONG NGGOOS SUBPUNCTISNJALA GONDI X-LINE "
+ "STAFFS-MINUS SIGNNGLE BARLINENERSHIP SIGNNEPOSTOYANNYNDING PERSONNCK "
+ "CONSTANTNATURAL SIGNNARROW SHAFTSAZ IS ISS IN-OFF SYMBOLN-COPPER OREN "
+ "ELEMENT OFSECTION SIGNLEFT-LIGHTEDVOLTAGE SIGNLEFT HARPOONAND CRESCENTAND "
+ "OPERATORANG KHANG GYLEADING EYESSMALL DOUBLELAYING CARDSALTERNATING "
+ "LATALIZATIONSMALL TRIPLELARGE TRIPLELARGE DOUBLELAM WITH YEHSMILING "
+ "FACEAR DIAERESISACUTE ACCENTWDATA SQUAREMADDA ABOVE M NSHUT NYAMSHORT "
+ "RIKRIKLUS NOTEHEADLU PLUS ESH2LOWERED FLAGAFU LEERAEWALOWER CORNERLLE "
+ "PATTERN LINKING MARKLINGING FIRELIMBS DIGITSLICKING LIPSALLPOINT PENALT "
+ "PAN SIGNLER CONSTANT OVER MIDDLEPROTOS ICHOS LINE SYMBOLPOUTING FACEPORT "
+ "CONTROL OF ANTIMONY ON PEDESTALPHEME JOINERRIGHT DOUBLE OVER KISIM5 "
+ "KLYUCHEVAYA OVER TWO PIRIGHT SINGLEOWER RIGHT-S SKEWED LEFTR-LINE "
+ "STAFFRCHAIC SAMPI AND PICTURE AND TOP ENDRANCHING OUTREFACE COLONRAH BEN "
+ "YOMORAFFIC LIGHTREN CROSSINGOVER KASKAL CROSSING GUQUIRREL TAILQUARTER "
+ "SIGNQUARED ARROWREVERSE MARKRIAGE SYMBOLRIAL TRAMWAY IN TRIANGLEOGOGRAM "
+ "NYAJRNAM BCAD MAONISHED FACEROLLING EYESON US SYMBOLOK HAND SIGNROUNDED "
+ "ZEROOING TO MEETOID NOTEHEADOHAZARD SIGNYMBOL BINDU YAN NUMERAL Y-MOUTH "
+ "FACEO-WALLPLANE NUITY SYMBOLS ABOVE SIGNXIRON KLASMANOTCHED TAILNOTCHED "
+ "HOOKNORTHERN TSEOUR TWELFTHSRING OVERLAYOUGHT BUBBLEOUCHES THUMBRION "
+ "CHRONONOTTOM HALF O WITH DAGESH WITH INDEX WITH UPTURN-DZUD RTAGS "
+ "RISTMAS TREEOQ NSHUT YUMOPPOSING LU2OPLE HUGGINGOPENING "
+ "LEFTYOUTHFULNESSYOD TRIANGLEINTEGRATION HIBITED SIGNCENDING NODETHIC "
+ "LETTER BAHIRGOMUKHAINUSOID SIGNTHETA SYMBOLVAKRAHASANYABACK OF HANDBEHIND "
+ "CLOUDING OPERATORING HITTING ING HAND FANING ENVELOPETHODOX CROSST "
+ "MONGKEUAEQIRAGANA HOKACHAIR SYMBOLCHARACTER-18CHARACTER-1BIRTY-SECOND "
+ "THREE HEARTSVER EQUAL TOHEAD-BANDAGEATTOOED HEADIVE TWELFTHSHASIS "
+ "SYMBOLATNAH HAFUKHTERMINAL MARC-SSANGPIEUPHYPHEN-MINUSIGATURE SHRIBRATION "
+ "MODEIDENTICAL TOUSEATED FACET-WALLPLANE BRIDGE ABOVEBYSMAL WATERBSCRIPT "
+ "ALEFTEARS OF JOYTE SEPARATORBUTTON MOUSEINVERTEBRATEUTH ARABIAN BLUE "
+ "DIAMONDCABBAGE-TREEBLOWING FACETERNATE AYINILLED CIRCLEBLACK "
+ "CIRCLECALENDAR PADUTH-SLAVEY KHREE FINGERSIN MIDDLE UPBINING MARK INDEX "
+ "MIDDLEBETWEEN LIPSKANA REPEAT KHMIMIC KHEICK-O-LANTERNCIAN LETTER HALF "
+ "BRACKETARALLELOGRAMVICTORY HANDGREEN DRAGONHAH WITH DALCIRCLED "
+ "PLUSCIRCLES AND K-FEATHERED SS-THAN SIGNGROUND SLIDEL-WALLPLANE SUR OVER "
+ "SURGUARDED AREAHAND FORMAT KAPPA SYMBOLZZA WA JALLOR OPERATORRACHMA "
+ "SIGNRITING HANDHREE BALUDARANSMISSIONORCE SYMBOLRIPLE PRIMERADITIONAL "
+ "RISING DOTSGSUM -KHYILORT BARLINERAISED FLAGGE AT "
+ "NIGHTGAYANUKITTATETRAFONIASONIAN SIGN OLD NUBIAN ENTHESIZED OMAN SIYAQ "
+ "ROSS ACCENTROR-BARRED RONTHISMATATONAL MARK ENG DIGRAPHOPPOSING IMTRAIGHT "
+ "WAWROKUTASTI AREAKTHROUGHTENS DIGIT I TIMES NUNGRAMMA SIGNRCHAIC JNYAEN "
+ "STRAIGHTI TIMES BADOPPOSING ENPUT MORTUUMPERISPOMENIEEZING FACERIGHT "
+ "HEAVYRIGHT LIGHTRIGHT GUARDPARAKALESMATHIRDS SIGNHIGH STROKEUETTE BREADUG "
+ "RTAGS GYTIGHT ACUTEHEAVEN MARKPHNAEK MUANHEATED FACETHREE "
+ "TIMESPODCHASHIEMGHAIN WITH HAWH HMONG POST OFFICEHANG KHUDAMTIEE "
+ "SHEUOQOUNDED FACEGENTLE WINDHOKHLOM ON EDICAL MASKGUISED FACEOUTHERN "
+ "TSETHAM DIGIT OUBLE DANDAOUBLE ACUTEHORA DIGIT TH-THALATHARA SOMPENG RING "
+ "LIQUIDUBLE TONGUERESPONDS TOHAGGAR YAZHOVERSTRUCK HLETIC SHOEHITE "
+ "DRAGONQUIQUADRATERIGHT-LIGHTRIGHT-HAND QUERED FLAGLUPOVODNAYALU PLUS "
+ "IGISVASTI SIGNLTERNATE YAEUNJOMNDEUQLTERNATE AALTED FLOWERLT OF "
+ "CLOTHIVERY TRUCKLOTI NAGRI LOSING MARKLOSED ENTRYLON SKEWED SIMILE SIGNIX "
+ "TWELFTHSSINGLE AND JECT SYMBOLTWO FINGERSMED RGYINGSME LONG CANIPLE "
+ "TONGUEMBLER GLASSMBA BAYANNASHEQEL SIGNIRCLED TEXTJIHVAMULIYASYURA "
+ "SASAKETEI MAYEK MACING FACEISIGOTHIC ZLEAF CLOVERTUNE COOKIESMALL CLOUDLD "
+ "SCRIPT XLD POLISH OLCE TSA CANKRAINIAN IELATIN CROSSLEEP "
+ "SYMBOLKTIESELSKABSPIRATED FAFINAL SIGMAL OF THREADL TIMES LALL-SHAPED "
+ "HALACKLETTER LACK SULFURLINE FILLERLEU SATANGAJOYOUS LAKELETION "
+ "MARKSTORIC SITEKAARA POLLULEFT SYMBOLLEFT SINGLETRUNCATED ASTERED "
+ "SIGNLEFT DOUBLESTEAMY ROOMFIRE ENGINETA EQUAL TONTIMONY ORETRIAN CAMELNSE "
+ "CHEEKS FGHANI SIGNIEN MONSTERRYVNIA SIGNNITIAL IZHENIKOLSBURG IL "
+ "FRAGMENTS-SSANGSIOSNG LEFT LEGILCROW SIGNILIQUA SIGNOF MASHFAATEORGIAN "
+ "NARIBLE-CREE YOHINGYA YEHEPIDAUREAN RPOON ABOVETRESS SIGN OF "
+ "ENVELOPETRESVETLAYANYIS -KHYILRUNNING MANNUSVARA ONENUN HAFUKHAG-BJARKAN "
+ "BNUMBER ZERONUMBER SIGNMIDDLE HOOKMOVED BELOWMONOCULAR OT-DIAGONAL "
+ "SHAAYATHIYAMIDDLE STEMFOURTH ROOTMIDDLE BENTMICAL HEARTMFON PIPAEMESH "
+ "DIGRAPHTY THOUSANDERCENT SIGNND OF PIECEND ODD SIGNNBLENDED "
+ "UKIMULTANEOUSIN EQUAL TON-LUE KARANSECOND MARKN THE "
+ "VERGEINDERGARTENINDICESIMA MUUSIKATOANMUNCIA SIGN0 FOOTSTOOLATION POINTE "
+ "WITH VEILATION SPACEAESHAE NYAMCH AND LAMPAESCULAPIUSCELANDIC-YR WITH "
+ "TITLO WITH TRILL WRIST FLEX WITH PLATECE OF PIZZAWASLA "
+ "ABOVECCUMULATIONCARET TILDEACTIVE SIGNCAN RGYINGSURRENT SIGN WITH DASIA "
+ "TIMES ESH2DOUBLE RINGULO TWO SUMDOUBLE SHAD UPPER HALF WITH CARON WITH "
+ "COMMACHING CHICKWE PALAUNG DENT EMBLEM WITH FLASHAGONAL SIGN WITH "
+ "JACKSUMAN FIGURE WITH MAPIQATH PRODUCTAGAZ DAEG DWING NEEDLEBANK SYMBOLWO "
+ "TWELFTHSBOTTOM MARKDYO CHRONONUNIT DIGIT DIATONON DI6 LONG "
+ "NGGEVAMAGOMUKHABELOW RIGHT5 LONG MBOOUTH CORNERSDUATION CAPWINKING "
+ "EYEUNJO WYNN WVA V CHELNUBLACK ARROWWITH GARDENBETA SYMBOLUTING "
+ "WHALEBUMPY ABOVEACKED COMMADESK PERSONAWNING FACEE NOTEHEAD BZHI -KHYIL3 "
+ "LONG NGGOUSHING HANDAILLESS PHIVE OF PEACEXTINGUISHERXI RADICAL XESTES "
+ "SIGNBREVE BELOWXAGRAM FOR VARYS ICHOSWHITE JOKER OF FORTUNECROSSING "
+ "PICROSSING IMCROSSING ENCROSSING BUEAVER DENE OF FLOWERSZERO THIRDSDE "
+ "KIKAKUI OVER BULUG OVER IDIM VYKA ABOVE ARM SPIRAL AMUSED FACEZIGZAG "
+ "LINEDASH SYMBOLDONG TSHUGS I ZAPYATOYCURVED BENDDOLLAR SIGNUISHED "
+ "FACEARAM GONDI CTION MARK DECORATIONCROSSING URCOMBINATION S "
+ "ZAPYATOYDOACHASHMEECREDIT SIGNVISARGA ONE SHOE STILE TACK BELOWDOUBLE AND "
+ "DALETH-RESHASE TO THE DOUBLE MARKARPEGGIATO ARMS RAISEDCONTAIN AS E2 "
+ "TIMES AN PLUS MASH2 PLUS NUNUZ AND MACRONCOND SCREENUPADHMANIYA "
+ "PROPORTIONAR ECLIPSEINITIAL RABA SATANGAL-AMMONIACION BOTTLEINDU "
+ "BELOWBELOW LEFTLA USED ASW OR MODELSMILO SIGNLACK JOKERING-SHIFT VIE "
+ "CAMERAINHERENT AST PALETTEATTY WITH ATRICHISMAIXTHS DISHKHAMTI "
+ "TONST-FEEDINGSTRUCTION LANE MERGEKINDI MVOPKAI SYMBOLSTROM SIGNJONG TILE "
+ "ASTERISCUSKA SATANGAASPIRATIONK2 PLUS BUSTRAL SIGNSSANGARAEASPACE MARKT "
+ "AND BOLTAYER BEADSVEL SLIDERKRYZHEVAYASYMBOL VIDSS OF MILKISMUTH OREIT "
+ "MBAAKETVERAGE BOXSYLLABLE MAUTOMOBILEITH DIGIT H-HAGALL HCROSS "
+ "MARKCROPHONIC TOP CORNERGHT LIFTERGIMEL-HETHGOLUBCHIK COMPONENT GREE "
+ "SLASHCOLATE BARGUA PI MAOTING HEARTTIMES SIGNGGRAVATIONH-LOW "
+ "TONEHAAPRAANA CISIVENESSCIRCLED CAUR CORNERSUR HUNDREDHAMZA MARKHAN DIGIT "
+ "UR YIG MGOUPPER HOOKTRIPLE DOTUP OR DOWNFOR RECORDFORMATTINGDA SATANGADA "
+ "PLUS HAD WITH DOTFRONT WALLD KEYBOARDFTOGGOS OUCY MESSAGEG MUNDARI "
+ "G-HAGALL HTRETCHED CTRESS AND TREFACTIONCURLY HAIRCURLED WAWGAGE "
+ "CLAIMGANDA MARKGAP FILLERGAS BZUNG GBAKURUNENGGING FACEICK FIGUREIDDLE "
+ "MARKBSTRUCTIONBROKEN BARIDEOGRAPH IED SHRIMPTAMAN SIGNIGATING RAIGATURE "
+ "OETAIL GLASSBOLD SHAFTICAL TAPERIKRON ISONT-TO-LEFT BLUE HEARTILE "
+ "FOLDERT-STACCATOUTH OR SPYBLACK FLAGINDEX BENTHEAVY BEATCHECK "
+ "MARKHERMOMETERCHAD RTAGSCH BALLOONTHMIKON N URIPIGMENTTHDAY CAKETHAPASCAN "
+ "HOOK ABOVECAPITAL ETHOTIC HOOKHREE ABOVETESE CROSSTERNATIVE HUNGARIAN I "
+ "ARCHAIONURVED OMETC WITH DOTTEMPLATIONUSEL HORSE WITH LOW WITH BASE WITH "
+ "BELTOTING STAR WITH EGGSRING THUMB WITH PAGEOSTAL MARKOSSED SHEI WITH "
+ "RAINORTHERN TA WITH RINGOUND OMEGA TIMES SHEOUR FIFTHS THOUSANDSOUT INDEX "
+ "OUTER JOINOUTHERN TARING ABOVEYEH BARREERUPEE MARKRUDIMENTA O-STACCATORSI "
+ "SYMBOLOANDAKHIATOF HYGIEIA2 PLUS ASHROUNDED ERROUND DOT 2 LONG MBOOLING "
+ "FACEOLON EQUAL SCHROEDEROMMA BELOWON MEDIAL 1 LONG MBEROEZENIAN ROAD "
+ "OMEGAOON LILITHOON SELENAYMBOL AIVAOP NKAARAEOPEN SHELFRKING FACEOROME "
+ "SIGN WITH TICK WITH TAIL LATE FORM KABA TENURHO SYMBOLRGE SQUARERGE "
+ "CIRCLEREVERSED I DRAWINGS QUISH QUADRESH BELOWR2 PLUS SUZH "
+ "DIGRAPHRAGGISMATA AND MOUSE AND KNIFE AND BREVERCHAIC KHA AND "
+ "ARROWRDHACANDRA AND ACUTEZU OVER ZURECIPITATERED DRAGON OVER KAD5 RGYA "
+ "GRAM RESUPINUSRIGHT SIGNZAR AMULET POVODNAYA PLUS SHU2PASSIMBANGPEDAL "
+ "MARKRIGHT HOOKZEIRO SIGN OF DHARMAPLUS BELOWPLUS ERIN2PLUS NAGA POETRY "
+ "MARPOLICE CARPOUND SIGNPPOPOTAMUSPRALINEAR LPAPRAANA EBENSTIMMELL "
+ "MODIFIEW-MID TONEAKKHANGYAOLLOW HEARTLOGICAL ORLONG FINALLONG OVER LONG "
+ "TSHEGAILED BIRDLOWER HOOKALEF LAMEDSIDEWAYS ILTERNATE ULTIC "
+ "CROSSAEDA-PILLAM STALLIONADDA WITH ADAK BINDIMADDA MARKMALL ALEPHACE "
+ "FILLERSHMIRI YEHLEK ATTAK SMALL TAH ANGULAR TOLE LETTER "
+ "LE-DELAYEDANGKHANKHUANDHI MARKLEFT GUARDLEFT HEAVYLEFT LIGHTLEFT "
+ "SERIFLEFT-HAND ABATA TREELENDED YUSLET SYMBOLALLY MARK LGIZ "
+ "EOLHXSIXTEENTHSLHAG RTAGSLIGHT BEATLIGHT BULBALENT SIGNALEF WITH "
+ "LIMITATIONLINDRICITYWO SHORTS SELINE ESHWITH STRAWN NGGEUAETSE-CREE "
+ "SKWITH WINGSNAKE BELOWSBUB -CHALNANGMONTHONASPIRATEDNDA PA NJINDU "
+ "TEMPLEWITH SPOONNGLICANA WNGUAGE TAGXED BICEPSXHEEJ CEEVNOTE WITH NTITY "
+ "MARKNTO SHRINENUMBER TENY AND RICEMILLE SIGNAB2 TIMES ME PLUS ENSHARP "
+ "SIGNMEEM ABOVEWHITE FLAGWHITE HAIRMEL SYMBOLWHOLE NOTEWING HEARTA TANG "
+ "LAIA PLUS KURA PLUS IGIVIOUS PAGEMINO TILE SGOR RTAGSMONOFONIASMONOGRAM "
+ "BMONOSPACE MONTH SIGNMOTORCYCLEED FINGERSUP NEUTRALEQUIHOPPEREICH "
+ "STARKENETRATIONF SHE-GOATUMAN EARTHEIGHTIETHSUBLE DASH DIRGA "
+ "MUREDIATONIKI ELT BUCKLEEUTRAL YERU2 PLUS BAENTHUSIASMTWENTIETHSDED "
+ "PERSONTWO SHORTSDUOUS TREEUM ROTUNDADIGIT ZEROEDESTRIANSTWO THIRDSEARLY "
+ "FORMEEKING EYEUAM TSHOOJEONGCHIEUMFAHRENHEITTYPE COLONENS "
+ "SYMBOLFFICULTIESUP HARPOONEVERSED PEUARDEDNESSEARTH MARKUNKIA SIGNE PLUS "
+ "SUMETTA-PILLAU CIN HAU DOUBLE BARFINAL HETHVEUAENGAMARISTERA URNED AYB0 "
+ "LONG LEEEN WITH CARTRIDGEETER SIGN PLUS GALED PLANETSTAL BALLKU RU "
+ "KHAHIMA SIMA PLUS SAGS DIGRAPHHOT SASAKKSTREPTONXHAUSTIONCANG "
+ "TE-UHINOCEROSUNGLASSESDUS RTAGSYRANISMA ONE THIRDKING BOOTE AT "
+ "LEFTQUADCOLONARM CLOCKKEMPHRENG GARSHUNICAPITAL QCAPITAL IRFUL FACE PLUS "
+ "KU3HOOK MARK FROM BARCAPITAL D PLUS GUDURLY "
+ "LOOPMUKPHRENGSTEBASKETSEMICOLONURNED GANHIYYAALAA PLUS LALLARGEMENTAOS "
+ "ICHOSVINE LEAFLAN SIGN RA OR RITTED STEMHARD SIGNDENT AND SMALL "
+ "YUSEMBEDDINGELEGRAPH PADE SUITR PLUS RAHEADSCARF DIVIDERSANTAYALANHALF "
+ "SIGNHALSHELET RICKSHAWVISARGAYANVERTED RANSKRIT SON GROUNDRWARI DDA3 PLUS "
+ "ANNUSVARAYAY BLOSSOMANTHAKHATCHOSEONG UR-DE-LISED DIGIT "
+ "PAA-PILLAHARACTERSARCHAIC MPANYANGGANTESSENCENTERPRISE-KHYUD PAHI "
+ "SYMBOLSEPTEMBERQUEEN OF WITH FACEURUZ UR UL ME HANDVICE MARK PLUS ZA7KYO "
+ "TOWERMRACHNAYASPIRITUS NSERT AT PLUS TURVIGINTILEON TEUAEQFEH WITH "
+ "LACKFOOT QUSHSHAYASOF PASUQCHAVIYANION-JOINERNTRACTIONCHANICAL "
+ "L-THIEUTHDEPARTUREL-PHIEUPHZIR SASAKL-KHIEUKHBASE UNITNGER SHIPNGER "
+ "ROOTIGHTH ASHROJECTIONVANAGARI NCLOSING BEER MUGSIGN NUKTAIGN SAFHAIGN "
+ "TOMPIPLE HEART5 LONG JONG SANDAL OF BLOODTABE SIGNWRINKLES B "
+ "DIGRAPHTANDSTILLPIDERY HAT OF MEATTAKHALLUST ON BONEPOSITIONSPPOSITIONOP "
+ "HALF O OF PAPERPPOINTED IGH HAMZABOTH BENTZERO SIGNUTRA MARKBIAL "
+ "SIGNWORDSPACETUTEYASATBICYCLISTRO WIDTH E SCOTS SUMED HEADBKHASIAN BITE "
+ "LIPSRIED FACEINARBORASIN SQUAREINAL NOTEBING CANEUVUZHAKKUINNYIIYHEOO "
+ "DENNENINISHMENTBO GYFU GBO BAIMAIING STONENFORZANDONDAILING ING "
+ "LANEST-THIEUTHILE TILDEUURDHAJA ILIPPINE ING GLOVEING DOLLSING CARD RNEY "
+ "PARAPENTASEMEUE MAEMBAPENT SIGNHWAZ EH EETTI "
+ "BALLJERUSALEMTUKWENTISXCITEMENTI PLUS LIN-CREE THX-X BELOWATHAMASATTEH "
+ "ABOVE OVER LUMIVINATIONONGSEONG TED PLANTN MODIFIEFACING UPKAMEYTSA AS "
+ "MEMBERHREE FOR NO TELEIAKA- SHOG UNGSEONG ATTACHED "
+ "TROMIKONK-KHIEUKHK-CHIEUCHETRASIMOUSTRELNAYAK GESTUREIRST MARKVER "
+ "LUGALZENE RINGNIHSHVASANIGHT OF 1 CHARIOTISEN-ISENNIGGAHITAWRY "
+ "SMILEVELOPMENTICE CREAMEURO SIGNBRUL SHADNGUN SIGNIDEOGRAM IDEWAYS "
+ "UN-PANSIOSIA SYMBOLEMPTY SETVERLONG AIALECT-P ITRA SIGNNING SIGNONG GRAVE "
+ "OVER BAL5 BATHTUB5 CYPERUSNA KHONNAAVE ARROWPROJECTORER BUBBLE "
+ "STREAMEROTAL SIGNAINTBRUSHOCCLUSIONAISED DOT APODEXIAFINAL NUN AND "
+ "TAILLONG TIP CRESCENDOFINAL NGAFINAL MEM247 DIPTELOND HAIRAJANYALANWASH "
+ "TAILAF PERSONCUBE ROOTRCHAIC IIFISH TAILAFFE FACE AND CURLODIASTOLEDOWN "
+ "HANDLOWER DOTOVER MUSH2 GARMENTALAYALAM CONJOINEREAVY DOWNLIVERANCEERCIAL "
+ "ATREFORMED LITTLE UP SPARKLERLISSANDO EIGHT OF LINE FACEGREATER "
+ "YCOMPLETEDLIGHTNINGOW KAVYKATONE MAI Y-FOURTHSY-FOURTH CORN "
+ "FACEUPTSTIMMEGITTARIUSMETOBELUSLLABLE OMLLABLE B0TO CORNERAL "
+ "RUNOUTMHANCHOLLD SALTIRE TIMES HAMALL RINGOKOUFISMAMBROIDERYFROM WALLE-EM "
+ "DASHRED JOKERFLAT SIGNMAEMGBIEEABOVE TO TIMES UDMARK SHADMARK CIM DAD "
+ "WITH Z DIGRAPHOKED HEADULL BLANKOKED TAILFOUR BENTEBIT SIGNDRAM "
+ "SIGNELLOWSHIPACE NOTE OVER GAN2LVIN SIGNOUTH WINDTREDECILEENTRY SAWRDEL "
+ "DKARTRAIGHT U AND BELTDOWN STEPRCHAIC RACULTATIONDOWN SIGNUPPER DOTG IN "
+ "HOLEOURA SIGNREAK HEREUBSTITUTEADEG ADEGM HE-GOATHALF NOTEREDNE ON "
+ "G-PANSIOSADMA GDANRPORATIONU TTUDDAGOF STIMMETRESVETLOOUT MOUTHP "
+ "DIGRAPHES AKURU YEH ABOVEOM SYMBOLRTS MEDALP ELAMITECLOSED "
+ "PLALPAPRANAO-EM DASHRTER MARKHAIKSUKI ESAME DOTSHAB CEEBANEROSIS WING "
+ "STAREFORMED TORTH WINDEFAIDRIN H-KHIEUKHZAL SASAKZAKRYTAYAH-CREE "
+ "THH-THIEUTHYEH WITH ERTY LINELEFT TACKVOETOCHIELEFT RINGTIMES NA2TIMES "
+ "PAPVOCALIC RCK SEXTANA PLUS NAALI GALI COIN SIGNP-PHIEUPHYEAR "
+ "SIGNANGGEUAETVRAKHIYA GRIK SIGNANG CITI SIVE FACEEOUT BOXNAVIYANIAPITAL "
+ "FEGORIAN ESH LOOPDIFONIASAMS HORNMIONIAN VE SASAKNAMENNY NCE "
+ "SIGNM-KIYEOKWO ABOVEANC SIGNBACKWARDM ALLAAHDOCUMENTT OCLOCKBIEE FON ON "
+ "LEFT OF YARNT NGGEETOVER BU DVANTAGEAEN NYAMEK ONKARAY-NIGHTIS FORM IS "
+ "WHEELIS-PILLAZAKRYTOELUB SUITOVER GA2LTRY LEGOVER GI4A SIGN AORM FEEDIRD "
+ "MARKBILLIONSE-MACRONVOMITINGAZHAAKKU "
+ "TALENTSVOLUTIONDVISVARAT-KIYEOKOTIFIED SCAN LINMARRATAN "
+ "RASWADIEVERANCEMANGALAMV OVER MACKSPACE NUTILLUABAAFILIMALL AXESHOE JOTME "
+ "BADGEABOAFILIANE TREEDIAMONDSLATION XP-KIYEOKING BOWLING BELLING ROD "
+ "MANDARINEN NTEUMBER POLEBEVERAGEM-TIKEUTWBOY HATE-WELSH ANS SIGNT-HINGE "
+ "P-TIKEUTUKEUTNDAMMATION ULLS LEGSHORT ERSANYAKA EVEN OF 56 TURO2INAGARI "
+ "MON TIMESHKIR KAT-BISHOPBATBEIT LEANING OT MBUAEMY HOUSEKASRATAN "
+ "POLNAYALF RING ER TRUTHLOCATIVERIYOOSANARCASITEERAL URNARRIVINGL "
+ "POLISHDUSHENNA PLUS DI PLUS DUL-LAKUNAL-KIYEOKAIYANNOILONG "
+ "BARKAIYARAAARTYRIA ALLIANCEARSI YEHYPORROONOVERRIDEOREHEAD YRILLIC ETA "
+ "SIGNUNG DASH SLIDINGSENTAGONARKLEAN UDAWADI SQUEEZED0 BRONZEEREVODKA PLUS "
+ "RUDIT CARDLKULIZMYW PRINTSPAVIYANILFWIDTH SPERSIONERDIGRIS8 KANAKOET "
+ "SHOESVESSEL BSIX DOTSRILLIONS YUQ NAELOZHITIEEST "
+ "WINDMEM-QOPHEUFEUAETSOFTNESSPALOCHKAE-THIRTYLA LENGADS-CREE LOW STOP OVER "
+ "KGAR TSHES OVER MUSUPERSETN-KIYEOKPAIRTHRAAVY BAND SATCHELSCRIPT GENICIAN "
+ "UNDERDOT OTTAVA UNDERTIENA METEKLOW DOUBITA MFONVERGENCEWN HEARTPAKPAK "
+ "EN-TIKEUTN-THIRTYSWIMMINGWASH KAFLESS SHAOVER SHEASTERN WLORRAINEAST "
+ "WINDDIM GUNUL-TIKEUTJUNCTIONSIGN LAEOVER TIRN OCLOCKLESSER YLONG S "
+ "TETRASEMEK-TIKEUTSE WEDGESIFISTONJACK OF OVER ZISUCCEEDSJAVIYANIATH "
+ "MARKSNA LDANSMA SIGNATEBOARDATE MARKATAKANA PENTAGONOVER SAGOBOOFILIY "
+ "POPPEROCALIC MICHAEAN LANTANGS-KIYEOK ANTENNAFINAL THRAUGHTS "
+ "HANDLESOLLOWINGHOP BELLCOUNCIL COUNTERSNINE OF RASMIAN "
+ "BUNDANCEELEPHANTPROSTAYAE OF POOUATRILLOBRA FACETAR "
+ "EYESPRECEDESR-THIRTYECH YIWNHALF GURROSHTHI TOWARDS GARITIC ON "
+ "CROSSCEILING BOX TRAYHAN-AKATDDY BEARNOTE PADED BRICKGENITIVEWRITING NI "
+ "ABOVE CEDILLAOCUS OF NOVEMBERODESTONEREE MARKGBASINNATIRRUP RNO THUMBHREE "
+ "OF FILE BOXXCELLENT AT DUSK3 ARMOURCANDICUSTER FACEUBJOINERCOMBINED "
+ "KEMBANGS OCHKOMQAIRTHRAYBEYFILII SHAKTICABLEWAYOM NTEUMCLIMBING1 "
+ "HELMETOMANIAN GREAT SACLOSED TPUSHPIKAH-MADR MNO SLASHCAL DISCCK CHARTE "
+ "OCLOCKRAMMA GGX OCLOCKGORAZDO ROSSED ORSE DUNGTTO MARKOMMA BARUAREG "
+ "YACASSETTEGLASNAYAC SIYAQ NITIAL ZO-THIRTYCHATTAWAO OCLOCKPUN "
+ "IYEKX-THIRTYPUB DAWBTISMOS ETAI LUE FEBRUARYFRAKTUR NED FOODOHM SIGNNG "
+ "RTAGSRESVETLYBOL SIGNONE FOR UP TRUCKWON SIGNIGMOID SQUINTILEYENISEI "
+ "FLOURISHFRICAN DBLE SIGNURAMAZDATRI DISHS-TIKEUTG-MADR MYIDDISH RED HAIR1 "
+ "BARLEYDAMMATANTRICOLOND BUBBLEZWARAKAYCHINESE D CROSS HESPIAN "
+ "G-KIYEOKIMANSIS DANTAJA TAALUJA YESIEUNGEXCHANGENEUTRAL R-CREE RTHIOPIC "
+ "CURSIVE REATNESSEBEEFILIYAMAKKANBINOVILERDEL NAGCURLICUEG-NAUD NBLINEAR "
+ "TURNED M CURRENTBLED CARNRES TOSZWJ THAJED FIGURHARMONICBOT FACEYER YAGHR "
+ "OCLOCKTTENTIONTHOSCOPENFORKINGHARBAHAYSALT OF Y BEETLECER BALLNEIFORM "
+ "TICK IN T-THIRTYOO TYPE WRINKLED FACING FOR STOPANDERERH-RIEUL "
+ "BUTTONCLEAVERP-CIEUCLAGIOS PALLAWALEK TOOH-HIEUHALTILLOVILIK BH-AR "
+ "AEORTIETH3 SPICEURFACE UP HANDWIFRUITH-NIEUNH-OSS OORT-TWICHEINAPMINIMA "
+ "H-PIEUPVOWEL KMINGKALH-MIEUMVILLAINAM ALEFTYSCAPER2 "
+ "GUNUANGOLATANGLONGLASHES 3 MONTHANGLED LE LEAFANGKUOQSERPINADOFONONRY "
+ "FACEAPEZIUM9 NGGAAUN MEUTLAYANNAON FACE CHIKI HALANTASEXTILEHANGUL "
+ "REREKAN CER-WAY GREENY HEART RAMBATENSHUETURATIONCHEVRONAND ENDCK "
+ "LIMEHEADINGRUM CLEOMERANGLAK-050ZQAPHA P-HIEUHHAARKAAAN MARKORKHON "
+ "RESILLOP-NIEUNP-MUOY RA REPALAMITE P-MIEUM WOLOSOVOICINGTIKRAMAHAYANNA3 "
+ "OMEGANUMBERSYELLOW ECEMBER2 WOMANAELAENGDAYANNARDO RJEOF SOAPG-TYR TG-SOL "
+ "S TEDUNGLYGISMAOUT HUBG-RIEULG-OSS OG-MIEUMWAZ "
+ "EOHG-HIEUHOFFICERM-HIEUHOLAPUK M-MUOY TROLLEYYA LAMPYAH LI "
+ "TRYASKAYAYANNARIPPLE GAYANNAEAVY YAAETMEUNM-NIEUNRRECTUSMEETORURD DISKRD "
+ "FACEOVER DUSIDDHAMTRAINERUM IYEKUP MARKOVER ANOKRYTIED-ANGLEMBOL B02 "
+ "OLIVEMANCHU REATHY WAAJIBDA FACEUP TACKUP STEPFLUENCEUP SIGNSHIFT "
+ "TMASSAGESHORT AM-PIEUPM-RIEULMAAYYAAYANMAR OGDIAN DANESE AD NECKOUR OF "
+ "MAI SATWDRIVERDRIL BUU-EO-EUACTER TD MADDADANCINGD MOUTHOX LINEOX BACKA "
+ "NAME 3 AREPATSECHKAFINAGH ALESMA OWILO SRAKHANGLIGHT XCOMING DU NJAA "
+ "SLOWLYWIGNYANVYSOKO DEAVOURUBUFILIL-MIEUMGRADUAL SHAKERCLOSE EALLOT XLEUT "
+ "KARTHIAN 3 EIGHTTIVATE LEYBALLRISIMOULF FACEY-CREE CLOTHESERNIN AA "
+ "-PHRUCOASTERSIGN UDCRACKERA-HIGH MERICASGHEUGHE APLOUNFINAL Y STRIDEOCK "
+ "SALAHAPAKHOCTAGONOCTOBERGENERALRAYANNATORNADOSHAKINGLJUDIJEGOLIAN "
+ "CONTACTRANGKEPCOPTIC P PIEETOT NGOMER THANLLYFISHGLAGOLIEIGHT KOW "
+ "ALEFRARIETYOT REPH26 EYYYBUFFALONIKAHITF DAVIDIC "
+ "WANDTAYANNATCHFORKBULANCEIBIFILIS-IEUNGISIBLE ITALIC S-HIEUHIANGQI "
+ "UKKAKHAS-CIEUCEULEUNGAUNTLETAULDRONPRENKHAT ASHESPHUTHAOBREVIS "
+ "S-RIEULS-PIEUPSYNAGMAITON RAS-NIEUNTASHEEL0 SPEARROKEN "
+ "LS-MIEUMICYCLESUSSYERUAY SIGNSCOOTERICOPTERAXIMATAN-CIEUCATH OF JAIN OMI "
+ "RTAGSI NTEUMJARATI JAYANNAC-HIEUHJECTIVESUKUUDON-BREVENJAEMLI KEFULAJERAN "
+ "JC-IEUNGJIBWAY ATAEAN ASUTORUPTHAHA PERVISEN-RIEULF SASAKAU "
+ "MARKPSTICKSN-PIEUPIVE-PULONGONANN-HIEUHN-GRAVEIL DRUMSANDHI DHALATHSANGAN "
+ "BERRIESBERGINEPOMOFO UYGHUR DIARGONILLEANNWO MARKDYNAMICWO FOR PLOYAN "
+ "BOARDERIKHAHITINNABARTA MARKT-CREE NG MASK MENDUT MUQDAMT-MIEUMSAMPHAOWO "
+ "WAENYMAIC LT-PIEUPBLACHKOEL PUMPBIG YUST-QUEEN6 NGGOOIMILAR MUOMAEEN "
+ "LEAFT-HIEUHT-RIEULT-HIDETNEQUDAAT-SHIRT5 NGGEENESTED TALL AAOP "
+ "MARKPPROACHVAV YOD MAELEEYNAMIC UT TIMEEAD ORE0 WHEATIFIED "
+ "EVAYANNASAYANNA OPEN-OVE DOT NANCIAL LONSUMS-SAJDABORZAYAT-CIEUCROGRESSSA "
+ "VAH POVODNYVANESE VAPOURS1 ARROWWORSHIPONTIEENBARREKHTAISYOURMUKHI "
+ "BAMBOOSBOURINGNAYANNABAIRKANTTILIK THALIYADISIMOUE "
+ "DRINKTTHACANORCULUSHIUCHUS FLEXUSTHESEOSKOMBUVAKOQNDONKORONISQUARIUSKPAK "
+ "WAKAYANNASTERING7 NGUANCAP TENKARO "
+ "BASEGMENTFATIGUEPAYEROKPAYANNAKEUAERIDIPLOUNKHA YATST "
+ "TUBECAYANNAMUNGKAHARRED OARRED BHEXAGONOREVMA QUEEZE THKUQI SOV ROGHI "
+ "SIGNL-CIEUCNTAINS ESTIVALL-HIEUHURGLASS9 CLOTHL-NIEUNL-PIEUPPANESE 3 "
+ "WHEELE GLASS3 SWORDSOLDIERCELSIUSVEW "
+ "NOWSPRINGSMRACHNYFAYANNAKKURUNIKUTAARUKYLISMAXOPHONEARDNESSL "
+ "SEGNOK-PIEUPPURPLE N-ACUTE7 NGGUAPENGKALED "
+ "RICEPEGERMAPUSHPINK-RIEULC-PIEUP PLOPHUASHTRA K-NIEUNK-HIEUHASH "
+ "FROCABINETONG UEXAS SIGN ISLANDPECTIVEETNAHTA9 NJEEMECHIKYSTICK6 HUAN8 "
+ "NYANSHAYIMELLITEMPLING1 GBOO3 NGGA6 GUEI9 MUENSAADIYOOPED 5 MERI2 MBOOON "
+ "KEYOFOUNDNSANAQ7 MBEENGBAT SSLESS7 GUAN8 MBEE0 NGGO7 NDOO0 NYUNXO NEOS "
+ "SHOE0 NYON6 NGGEAASHAEWINDOWENTIMA1 GOLDNKNOWNRISEMENTIIMUOSETTEA "
+ "HAAMNTEVMAOITIC 1 WINEEN GHES TENTN-ALAFOOMUUTN YANGEMASTI5 WOOL2 "
+ "NGGUOGONEK8 NYENWORKERNIZKO NSUZ AME DIENDA TARSIAN 7 MBUUNEUME WO OF "
+ "NTOGENENIKI RKAANUROCKETWN BOW4 KPEE1 NDEE0 NGGIN DASH4 DEERN-SIOSOPEN "
+ "POBELOSDVANCES-RING4 DART4 NGEN4 NJOOA-LOW 8 HOOUODHADH4 NYIN9-6 "
+ "L6NOKHUK7 NGONEIGHTYWN BOXONOCLE1 HORNY-NINE3 GBEE3 HEEIX "
+ "FACEOCENCENACLESNOR BUOCIETYA-KARAN-NISFNOZHEKYIN-DONISTEROPITSAE "
+ "WAVE00-102N-MU-MWRENCH4 MUANDUCEUSNGGONGNCH FR7 KAPOEMPUS 9-4 L49-3 "
+ "L3UAEQTURSENICXIMIZEOPEN D6 TREEYRENE YAKASHUBLE X9-2 "
+ "L2NOWMANNIRUGUSAUCERYOMBO UANGXI2 KPOORSHANABURGERTAU ROTAUROSIB YAMIASMA "
+ "TE TSEF MAREI-BEAMZHITSAPUFFEDC CLEFTAMINGIGGLESEYANNABOWTIEIEVAN TARGET "
+ "LONGAIDE ES LELETTURBANF CLEFDERMA TEUWENQAMATSHORT IQETANACARIK "
+ "CASTLECATAWATHAKKUURNAMAFAMILYHO HOIHUR PAI MAIMI HOOK KAPALHYAAUSHUMBS "
+ "TERON "
+ "PWATCHCALATEBEFILIINSHIPEVENTHINHALEUGGAGEUZEIROINGAATUYANNAEDLINGBETAN "
+ "UUMISHBGBIEEEAHMUKSYOUWAIRINGUDIESISPICKETT NJAQDICINEBAFILIPIRIT "
+ "TUXEDOBANWA BOFILIEXHALEBLINK POMMEEILBOATEXISTSRICORN "
+ "MELIKTAIKHUDGEHOGINDHI INCUNXINAGMAIN YEHWEORTHBISCUSFATHA "
+ "MURDAT-ROOKPOKOJIT-SIOSILLAGECRAYONGNANT GLAZ "
+ "HCREASEGHEUAEGHETTIGHAMALGGLINGDE DOGTOPBARRAKLITGURAMUCLOSETCODILEGS-PA "
+ "GRASP TIRYAKGORGONGOBLINGO NGUTRIKE FRAMESUPNAYA "
+ "AGUNGD-OVALFORTISDAGGERGEDOLAGEADALRBITSAGBASAQCUMBERGANGIACUPPEDTRAPLIG-"
+ "SIOSG-AR AHIBIT EPOCHED "
+ "ICEHEUAEPCHADINHERMESCHAMKOCEVITUHEISEICHEIKHCHEMA CHESS HIMAHUHIVETE "
+ "EQUID CARETHALF HRENGTHRAAKANTIKENORACINGUP "
+ "BOWCKNESSTIMATEREMEDYH-SIOSCHO CHHE MGO DIPLICHIRETHASHKAED CAPFF OF "
+ "CHURCHHAMILOHAMEDHHAM AIESTAN "
+ "PALUTAARADDOL-SIOSSOUNAPAILUREAGRANTAGOGUEUDARKAARBUTAET KUTLONG "
+ "EAPISMALAMEDHLAMADHAPLI MAJANI AFFIX LAFRONAPYRUS QATANVILIANKTIKO "
+ "KRISISAESURASICKLEAEMMAEKLITONE-SHIFKILLERUNGAAMSPLIT SPITALKUSHU2KY WAYL "
+ "BACKSPATHISPADESLITIKILISHA LIGIONLIGON U MBITRIISAPERMATASIXTHSSKAPI LD "
+ "MAPANIMALLLIPOPW-TAILP-SIOSANUARYLASTONAKABATVIRIAMAK-668DOKMAIP-"
+ "RIEUANGKATANCHOR RULEROW TIE SPLITITABLEAUTUMNMALGAMASSINGPENCIL "
+ "TIKHYSTROFOMADDAHEUNYAMASHGABISSIMOIX OF ACINTHATTIC "
+ "ATTERYSHMAAMEUAENAITULUMSURED SURANGK-1358MALL FEPACT IXTY PIYANNAITHER "
+ "SUCKEDPEPPEREU MBUSTANCEPEAKS ISSHARISSANTKAYAH PBOARDKE "
+ "PHOSTANCYEUREUTMARCATKEYCAPSYNAFIM BULLM BOARWBERRYK-SIOSM-SIOSSTOLI "
+ "STLERSMANYA PECIALM-PII VERTKAARSEOSOUNCEE-1-2RACHY2 MBEOKEE "
+ "YURIIYENAPOSTERREPHAEGIONEGL HEGALIZYGOS CAPOOKARAOMMAEOTHALEBALL "
+ "TABSRCHID2 KPI2 KPAOLD XRASHA TELUEIDONUGUST2 HOO2 HEN2 MBARELAARELA "
+ "RAIDA ALLO AMPSYECEKREIWAOMBIEZSEKARDION0 HEEOQPENPCHA 0 JOOPPAGEPEAN 0 "
+ "MANRITSIROGOMPONSEPEITHPOLI PSILI LACAZHAIN0 DOOPASEQYIZET0 DWOPEN O0 "
+ "GBO0 GEEPATAK0 HAN0-VASPLHAUEAGLEPITEREKEETPI ROPLUTOPEN-P "
+ "MOODPMUNKRIEEN1 DWEPEPETPLUTAUKARAOPLET0 OIL1 WVIUBUTS SARI1 TWOQUIRY1 "
+ "TEEZILDE1 PEE DEKA CHWV COATEATH UBURURILLAEISMAORUTO1-VAS1 "
+ "YOOYSTERQAAFUZIDI ICON ILUT JERA KAWIPTUNE0 BEE1 FAN FUJI GORA HAA "
+ "PAATOR-RUBUCIBLRONOSEESHIEVAL VATORBALAGBASA BASSAINTHUBEITHBACUSINNA "
+ "VAAVUBENDEINGSABHADHBHETHSUTUHITUALITHI AUTHSISTLEAVROSEURAEAWAY "
+ "IRACYBAARUIPINGIPEHAIHVUSUNOO UTEUXBOOTSBORZYEYBUSEYYALTURN BREW "
+ "DESTYBSTERICRONUTIESBISAHTUUMUIMMERIMMA IMGBAT-REXBLAKOUNITYILVERILLU "
+ "DHAM IKURUIKARALABATLATIKESO ELAMDALAGUSAPPLELABORLAULAVIET DKAR L-JUZL "
+ "NETSPINEKWAENKUSMAKURONARERUKTOP ALOG "
+ "ALPHAAMEKHANGELLEERILEASEANNONESHE3STORMASEIAK-550K-"
+ "020ASPERJUEUIJUDULJUDGEASAR ATAF JERVIJANG SUKUNEUAEMSURYAATIYASTARTET "
+ "TUKO LAKNOBSUNGBAKHAPHKESH2KERETF "
+ "COWKARORSTNUTGVANGTINNECLUBSTIPPIGORGITMAAUGOGI TON "
+ "ADENCECHUTEHALQADELTAUQUETHALA "
+ "GHULUCKAGETILESHADDAHAALUCKTIETSERETSEEBCLIFFCLONEGALGAGADOLDBOATDATUSFLAG"
+ "SFSAAQFLICTDAIC FORCEFLUTEGHNUTGESH2TSADITORCHCROWNTORSOGEAN "
+ "TIGMACTRICCUBEDTOYORTRACKTRAIFGAMANGAMALCAUSECALYACANUSHOUR "
+ "TTOCKHOTELFAIHUCAUDAHOLARHOLAMCCEPTCCOLICECAKCECEKHIUTHTENSEICHONF "
+ "EWEIAUDAIARDSTE USI-RESF SOWTEGEHI KOIHUTA TUEUMCAANGHUMP "
+ "URTLECAKESCAKRAFAAFUTTORUHROOMCHIMECHERYCHESTHAYINHATHICHOOICHOOLHANNACHUL"
+ "AHISTIHIRIQTHINGHIMELCEREKURITYOJKI URINEFEARNHI RODEPTHHETHECHADA7 "
+ "NINNABLANA POWLINE7 FUA7 GBE7 HUN7 JEE7 MIN7 NENNADA SEGOL7 "
+ "TWE7-VASENDEPUMMER8 FEE8 GBU8 KPE6 GBA6 KOODWICHNCORA6 "
+ "RA2MUHORENANONASHI6 SIA6 SOO6 TA2SAUIL6 WEE6-VASWINJA9 WVA9 WVEMMOTH9 "
+ "YEEMISRA9-VASMINGOMINDUWISAD9 PU2DSMANA UNAA YUEWINDUMI ROWIDE "
+ "METRYMETEGMUCH SENTO8 KPO8 MAN8 NANMSHAEMROCK8 NWAMPIRE8 "
+ "RO28-VASENENG5-VAS9 DEE9 KUA9 MEN9 MUN9 NDAENJET9 NDE9 NON9 "
+ "NUNEMAKERUDAARUHUAO RUAO PLARULAIO KAI3 HINO BOX3 HONO ANGNZEUM3 FOORUSH "
+ "3 NDIEMBICRYASO3 RA3NUENGXYOOJNTXIVXW XW3 VEE2 MBU2 NJA2 NJUROWN 2 POO2 "
+ "PTE2 SEEYAMOK3 WEI2-VASRRITOUBITOY-ONEOCADOOBYLA3 BOO4 WUI4-VASX-RAYNIS "
+ "FNINTHWUAETWU318NGUE NGMAN4 WOO5 KEENGENT5 MBISADHE5 "
+ "NDUSAKINSAKTASALADNENOENEMKA4 ABBNTHA E GEENSYON3-VASNSUAENSIEE32 JEXING "
+ "MPARE4 GBI4 KPU4 LEE4 LOO4 MBO4 MONEMLJA4 NDO4 TOO4 VOONNAN "
+ "WBOATMAALAMAAEHSHOOKM RAMADULTU U UMACUSLWAY "
+ "SHTINAEMAELURALSICLEWATTOAEPENACHKASHIMAMAQAFU-I-IMALONAEREEMAIZEMAI "
+ "KMADYAVZMETERINEERKHAVRIDOSKATEALLEYALLI AGMA LOUREWAQFALOOP "
+ "LOMKASILA3WAAVULOBE LOAN "
+ "SHARUMEEMUMEPETSHANGAAMAEWFISHMELONMENOEWIANGENUTOAADHUSHARAEOPLEMEIZISHAR"
+ "2REGIWAAKRUTURUSIYEUXVEUXXEIAUNAVXEYNWAHAZATAZIZ2VIDASIKIWAETSOLVSOKAREIAU"
+ "GU UTTYRROIVOS VUEQRPSETZELTRIOZETAYAWNROUTSLURZZY "
+ "RUNASINKUNAHRUKURUISRIFYRTARVIYOSEENTAXITFONZAYNSUABWDERUON "
+ "YUDHYUKUZELOUSA TUKIULU "
+ "YWAASELFZIETVEDERGU2SEEVWULUS-TEU-TOU-YESEYEVESTSA-IVEUMVEYZWAW "
+ "TWAASHTAROA UTANROARSHYAUMEAKPENKOKOKOBAKMA "
+ "KINIKICKKWAABAGSBALDKCETKAPHKANGKAKOAROOLAANATIMATYABUNGBUOYC-18C-39ISI "
+ "CASECAYNIQAAIPODBASHKAD3BAYIKAAFBBITBERDJOT "
+ "BETHJIIMBOOKBOREJADEMFAAMESOALAIMEARALDAMARYMARUMVATMUINMUASMPETAHADAILMMM"
+ "U2MLYAMIINMIIMLIWNLIUMAPAQLIFUAPONARA3LFIELFERARGIALTALUMNAMLALUISLOVOLOLL"
+ "EAAEH-YRH-4EH-2FEEEEGULUGUINGRU GROMGOALEESUE-D E-A HEEPHEENE-B E-C "
+ "HAYNHAVEGIR2E-E E-F "
+ "E-"
+ "KOFEUQFITAFIRIFFINFETHFEEMFASTFAIBEURIEZZOEHEHGIEAGIDAGIBAGHWAGHOMEIPTEIRT"
+ "GGWSEIWSGEDEHERUGAMLG-YRFWAAIKIRIK HIITOCWAAIGERIFATDAGSDAIRINY "
+ "CHAUCHEHDDAKCKENCOONILUYCORECRETHOPHHOM HMI DZHAHILDHIINHID "
+ "HHWAHEYTHEYSHEYNDIM2DDHIDEADIARAIANOI-TEI-EUDGERHWAAHUVAHUB2HSHUHSDA WEB6 "
+ "LA0 BI6 NA-UM 6 PO6 QA6 RU6 SE3 MUOOTH3 MI6 WU3 MEQASR3 LE7 BE3 KU3 JO0 "
+ "JUOONU3 RIO-YOO-YA0 KO6 DIO-RA6 FU3 JE6 HE0 HOO-KI6 HIO-EO1 KI6 JE3 PA6 "
+ "JO7 REPARD R S POD7 KI7 LUNWAA3 A3NUUNNUTS7 TI7 VO8 BONTOCQOPA7 DAOPODQEF "
+ "QHAUOPUS1 KU7 DD0 NIPAWN3 EE7 EI7 JAPHAB028BPLUGPLUM0 ZO4 ZE5 A2POLO4 DO1 "
+ "DO0 YE5 AN5 AU1 DU4 KE4 FI1 DA4 NE4 TEPEUX4 TUONA 4 WA4 WIPHIN0 SA1 HA5 "
+ "MO5 NU5 OO5 TEOENG5 TOPEEPOONGPRILPEEIODLE1 IN0 RA0 PU5 VA5 VE5 "
+ "WAOBROOBAT5 WEOJOD0 WIOJI 5 BB5 DE1 GAAFEL5 FA5 FE7 ZA5 GI5 IN5 JUOFUM3 "
+ "YU3 TA5 LINEO AAMU2 VINDUENDAP2 BU2 YAA-EUA-HANGA2A-KEA-WOA-YORAFEOUBTNET "
+ "AACU2 SO2 NO2 RONAAU2 QO2 PEMWAANCER2 KA C D8 DUNANANAM2OXIANAG 9 PA8 KO8 "
+ "PI8 QENNNA8 SU8 WE9 JA9 PINJAM9 SE9 SI9 SO9 TANRUANTAA8 EN8 FONSUB8 "
+ "GUORIINSHE OHM1 YINPEANOWC8 JI1 PONHAY1 SA1 RAA IE1 VU1 SI1 QI1 SU9 TU1 "
+ "X14D15518D121E-81-03-0DZEE-9YITUMXU-5U-AU-8U-7U-6U-OU-4U-3U-2U-1U-"
+ "UZUPZZEZJEZOOZOR0 UUEZUEHE-VE-J0B904A0-0E800 EUDY8 I8 A72CA-3A-2A-17 "
+ "O9E39819 L8F06D7AWXAYD6206-0AZUB57B89A-4AUJAL2WAUAEFWI "
+ "VOKA-9A-8A-7A-6A-5VUUVOYA7AA-OCWICYAD-C2533 "
+ "DD-ND42D70Y00XWG31C305CIGUQADA2XWV3 IY-1Y-2DIBDJA20B2-0DE65 "
+ "UBIBBUDVAU5B65575-0BAUXAUXEH4 "
+ "EX-3BXGX-2CA9CAHCAIX004-0L-3L-4L-5L-6L-7L-8L-9L-2KUEKUGL-0L-1SIISJEJ-"
+ "2JAHJAWJEUJHAP-3P-4P-2P-1SUUIWRKAQK-2K-3K-4K-7K00S-WNIBNIIOAYS-8S-7S-6S-"
+ "5S-4S-3O-7O-6O-5O-4O-3O-2O-1O "
+ "YS-2S-1O-UO-OO-IO-AO-8M-2M-BM-CN-2N-3OMSQOFQIGGVEQIFH-1TJEH-3QARH-5H-6H-"
+ "7H-8H-9QUFQOTQ00PUQHOJH-FHAQFLYF14EYKEZHF-1F-2TUJF8CFAJR-7R-6R-5R-4R-3R-"
+ "2FUEG-1G-2G-3G-4G-5G-CP-5R-9R-8T-5T-4T-3T-2T-1T-0T-6P-9P-8P-7P-6IMNT-7I-"
+ "YI-UI-APOQI-OI-8I-7I-6I-5I-4I-1I-2I-"
+ "364P01A1D1648224BQW09402938C0U0G0G9F37183S099959291VDLX";
+uint8_t UnicodeNameToCodepointIndex_[242216] = {
+ 0x00, 0x05, 0xc0, 0x00, 0x89, 0x15, 0xc0, 0x00, 0xb3, 0x12, 0xc0, 0x00,
+ 0xfb, 0x06, 0xc0, 0x01, 0x21, 0x14, 0xc0, 0x01, 0x45, 0x18, 0xc0, 0x01,
+ 0x5f, 0x16, 0xc0, 0x01, 0x75, 0x03, 0xc0, 0x01, 0x95, 0x04, 0xc0, 0x01,
+ 0xf2, 0x0e, 0xc0, 0x02, 0x18, 0x17, 0xc0, 0x02, 0x3c, 0x0a, 0xc0, 0x02,
+ 0x5f, 0x0b, 0xc0, 0x02, 0x7d, 0x19, 0xc0, 0x02, 0x9d, 0x08, 0xc0, 0x02,
+ 0xb5, 0x0d, 0xc0, 0x02, 0xd1, 0x0f, 0xc0, 0x02, 0xef, 0x10, 0xc0, 0x03,
+ 0x0f, 0x1a, 0xc0, 0x03, 0x35, 0x07, 0xc0, 0x03, 0x47, 0x09, 0xc0, 0x03,
+ 0x98, 0x11, 0xc0, 0x03, 0xba, 0xc5, 0xdc, 0xa7, 0x0f, 0xcc, 0xb1, 0x1c,
+ 0xc0, 0x04, 0x1e, 0x0c, 0xc0, 0x04, 0x40, 0x42, 0x00, 0x93, 0xc0, 0x04,
+ 0x56, 0x1b, 0xc0, 0x04, 0x6c, 0x46, 0x69, 0xab, 0xc0, 0x04, 0x80, 0xcd,
+ 0x7f, 0xc6, 0x0f, 0xa7, 0x39, 0xc7, 0xcc, 0x99, 0x0f, 0xa2, 0xa9, 0xc5,
+ 0xe2, 0x29, 0x0f, 0xd3, 0xd8, 0x03, 0xc0, 0x04, 0xaf, 0x43, 0x3f, 0x83,
+ 0xc0, 0x04, 0xde, 0x0a, 0xc0, 0x04, 0xf0, 0x14, 0xc0, 0x05, 0x0c, 0x11,
+ 0xc0, 0x05, 0x2b, 0x0e, 0xc0, 0x05, 0x66, 0x0b, 0xc0, 0x05, 0x78, 0x17,
+ 0xc0, 0x05, 0x8d, 0x07, 0xc0, 0x05, 0xb3, 0x1b, 0x40, 0x05, 0xcb, 0x07,
+ 0xc0, 0x05, 0xe3, 0x0b, 0xc0, 0x06, 0x32, 0x16, 0xc0, 0x06, 0x56, 0x03,
+ 0xc0, 0x06, 0x73, 0x0d, 0xc0, 0x06, 0xaf, 0x0e, 0xc0, 0x06, 0xbd, 0x0a,
+ 0xc0, 0x06, 0xcd, 0x05, 0xc0, 0x06, 0xe9, 0x10, 0xc0, 0x06, 0xfe, 0x11,
+ 0xc0, 0x07, 0x0e, 0x42, 0x00, 0x93, 0xc0, 0x07, 0x40, 0x1b, 0xc0, 0x07,
+ 0x4a, 0x12, 0xc0, 0x07, 0x5e, 0x17, 0xc0, 0x07, 0x7d, 0x0f, 0xc0, 0x07,
+ 0xa9, 0x19, 0xc0, 0x07, 0xb7, 0xcc, 0x89, 0xdc, 0x01, 0x4e, 0x60, 0x14,
+ 0xc0, 0x07, 0xc7, 0x0e, 0xc0, 0x07, 0xd9, 0x0b, 0xc0, 0x07, 0xe1, 0x03,
+ 0xc0, 0x08, 0x0a, 0x11, 0xc0, 0x08, 0x3e, 0x07, 0xc0, 0x08, 0x6c, 0x17,
+ 0xc0, 0x08, 0x8e, 0x4f, 0x65, 0x46, 0xc0, 0x08, 0xaa, 0x0a, 0x40, 0x08,
+ 0xc8, 0x07, 0xc0, 0x08, 0xd6, 0x0b, 0xc0, 0x09, 0x0a, 0x14, 0xc0, 0x09,
+ 0x48, 0x11, 0xc0, 0x09, 0x62, 0x17, 0xc0, 0x09, 0xaa, 0x03, 0xc0, 0x09,
+ 0xbc, 0xc2, 0xed, 0xdf, 0x0f, 0xa6, 0x01, 0xcf, 0x67, 0x08, 0x0f, 0xcf,
+ 0x60, 0x07, 0xc0, 0x09, 0xe1, 0x0b, 0xc0, 0x0a, 0x1d, 0x11, 0xc0, 0x0a,
+ 0x4d, 0x03, 0xc0, 0x0a, 0x8f, 0x17, 0xc0, 0x0a, 0xb7, 0xc9, 0xab, 0x5c,
+ 0x0f, 0xcc, 0x78, 0x03, 0xc0, 0x0a, 0xdf, 0x07, 0xc0, 0x0a, 0xf1, 0x0b,
+ 0xc0, 0x0b, 0x07, 0x11, 0xc0, 0x0b, 0x2f, 0x42, 0x00, 0xaf, 0x40, 0x0b,
+ 0x39, 0x03, 0xc0, 0x0b, 0x45, 0x17, 0xc0, 0x0b, 0x7f, 0x0a, 0xc0, 0x0b,
+ 0x95, 0x11, 0xc0, 0x0b, 0xb1, 0x14, 0xc0, 0x0b, 0xdd, 0x07, 0xc0, 0x0b,
+ 0xed, 0x0b, 0xc0, 0x0c, 0x0b, 0x19, 0x40, 0x0c, 0x43, 0x14, 0xc0, 0x0c,
+ 0x5b, 0xc2, 0x01, 0x66, 0x0f, 0xd4, 0x99, 0x06, 0xc0, 0x0c, 0x7d, 0x0e,
+ 0xc0, 0x0c, 0x9f, 0x17, 0xc0, 0x0c, 0xc7, 0xc7, 0x2f, 0x38, 0x01, 0x38,
+ 0x43, 0x00, 0x0c, 0xd9, 0x10, 0xc0, 0x0c, 0xdd, 0x15, 0xc0, 0x0d, 0x00,
+ 0x16, 0xc0, 0x0d, 0x14, 0xc7, 0xcd, 0xf0, 0x01, 0x32, 0x91, 0x44, 0xe7,
+ 0x4b, 0xc0, 0x0d, 0x20, 0x05, 0xc0, 0x0d, 0x42, 0x12, 0xc0, 0x0d, 0x60,
+ 0xcb, 0x94, 0x5c, 0x01, 0x0a, 0x69, 0x18, 0xc0, 0x0d, 0x6e, 0x0f, 0xc0,
+ 0x0d, 0x7a, 0xcb, 0x95, 0x6f, 0x00, 0x30, 0x59, 0x07, 0xc0, 0x0d, 0x90,
+ 0xc5, 0xdb, 0xee, 0x0f, 0xcf, 0x70, 0x11, 0xc0, 0x0d, 0x9c, 0x0e, 0xc0,
+ 0x0d, 0xdc, 0x03, 0xc0, 0x0d, 0xea, 0x0b, 0xc0, 0x0e, 0x1c, 0x07, 0xc0,
+ 0x0e, 0x48, 0x17, 0xc0, 0x0e, 0x71, 0x14, 0xc0, 0x0e, 0xac, 0x1b, 0xc0,
+ 0x0e, 0xbc, 0x49, 0xb7, 0x23, 0x40, 0x0e, 0xc8, 0x11, 0xc0, 0x0e, 0xf6,
+ 0x07, 0xc0, 0x0f, 0x34, 0x0b, 0xc0, 0x0f, 0x67, 0x1b, 0xc0, 0x0f, 0xa0,
+ 0x03, 0xc0, 0x0f, 0xb2, 0xcd, 0x76, 0xe3, 0x01, 0x08, 0xa1, 0x17, 0xc0,
+ 0x0f, 0xdf, 0xc4, 0x0f, 0x65, 0x0f, 0xcc, 0xc8, 0x12, 0xc0, 0x0f, 0xe9,
+ 0x10, 0xc0, 0x0f, 0xff, 0x4b, 0x39, 0x07, 0xc0, 0x10, 0x19, 0xc7, 0x60,
+ 0x98, 0x01, 0x30, 0x13, 0x00, 0x10, 0x37, 0xc5, 0x1a, 0x17, 0x01, 0x32,
+ 0x29, 0x48, 0xbf, 0x7b, 0x40, 0x10, 0x3b, 0x07, 0xc0, 0x10, 0x47, 0x11,
+ 0xc0, 0x10, 0x6b, 0x03, 0xc0, 0x10, 0x99, 0x0b, 0xc0, 0x10, 0xc9, 0x1b,
+ 0xc0, 0x10, 0xf1, 0xcb, 0x95, 0x85, 0x01, 0x05, 0xa1, 0x17, 0x40, 0x11,
+ 0x07, 0x10, 0xc0, 0x11, 0x1d, 0x42, 0x00, 0x06, 0xc0, 0x11, 0x49, 0x43,
+ 0x01, 0xa9, 0xc0, 0x11, 0x55, 0x0f, 0xc0, 0x11, 0x65, 0xce, 0x73, 0x50,
+ 0x0f, 0x9f, 0x71, 0xd3, 0x47, 0x3d, 0x0f, 0xc8, 0xf8, 0x11, 0xc0, 0x11,
+ 0x75, 0x0a, 0xc0, 0x11, 0x8f, 0x0b, 0xc0, 0x11, 0xa4, 0x03, 0xc0, 0x11,
+ 0xc0, 0x07, 0xc0, 0x11, 0xe2, 0x14, 0x40, 0x11, 0xf6, 0x0e, 0xc0, 0x12,
+ 0x06, 0x11, 0xc0, 0x12, 0x1d, 0x03, 0xc0, 0x12, 0x47, 0x14, 0xc0, 0x12,
+ 0x6d, 0x17, 0xc0, 0x12, 0x7f, 0x07, 0xc0, 0x12, 0x95, 0x0b, 0x40, 0x12,
+ 0xa9, 0x0a, 0xc0, 0x12, 0xcd, 0x10, 0xc0, 0x12, 0xe9, 0x07, 0xc0, 0x12,
+ 0xf5, 0x03, 0xc0, 0x13, 0x02, 0x0b, 0xc0, 0x13, 0x2a, 0x11, 0xc0, 0x13,
+ 0x4b, 0xc5, 0xdb, 0xc1, 0x01, 0x5f, 0x18, 0x0b, 0xc0, 0x13, 0x57, 0x07,
+ 0xc0, 0x13, 0x78, 0x11, 0xc0, 0x13, 0xaa, 0x03, 0xc0, 0x13, 0xd9, 0x17,
+ 0xc0, 0x14, 0x18, 0x43, 0x16, 0x59, 0xc0, 0x14, 0x28, 0x47, 0xc7, 0x98,
+ 0x40, 0x14, 0x32, 0x07, 0xc0, 0x14, 0x56, 0x03, 0xc0, 0x14, 0x8b, 0x11,
+ 0xc0, 0x14, 0xc0, 0x56, 0x2d, 0x5b, 0xc0, 0x14, 0xed, 0x17, 0xc0, 0x15,
+ 0x07, 0x43, 0xc8, 0x59, 0xc0, 0x15, 0x1d, 0xcd, 0x45, 0xa1, 0x00, 0x05,
+ 0x01, 0x0b, 0x40, 0x15, 0x43, 0x47, 0xca, 0x0e, 0xc0, 0x15, 0x4d, 0xd3,
+ 0x46, 0xf1, 0x01, 0x19, 0x39, 0xc2, 0x00, 0xff, 0x01, 0x15, 0xd8, 0x0f,
+ 0xc0, 0x15, 0x59, 0x03, 0xc0, 0x15, 0x67, 0x09, 0xc0, 0x15, 0x7a, 0x1a,
+ 0xc0, 0x15, 0x84, 0x48, 0xc2, 0xbb, 0xc0, 0x15, 0x92, 0x0e, 0xc0, 0x15,
+ 0xc4, 0x44, 0x00, 0x48, 0xc0, 0x15, 0xd8, 0x10, 0xc0, 0x15, 0xe2, 0xcb,
+ 0x93, 0x5f, 0x01, 0x1e, 0x79, 0x14, 0xc0, 0x16, 0x01, 0x42, 0x00, 0x93,
+ 0xc0, 0x16, 0x13, 0x15, 0xc0, 0x16, 0x1d, 0x17, 0xc0, 0x16, 0x29, 0xcd,
+ 0x82, 0x02, 0x0f, 0x99, 0x91, 0xc2, 0x04, 0x30, 0x0f, 0xa2, 0x0b, 0x00,
+ 0x16, 0x33, 0xd0, 0x5c, 0x7f, 0x01, 0x70, 0x70, 0x17, 0xc0, 0x16, 0x3d,
+ 0x11, 0xc0, 0x16, 0x59, 0x14, 0xc0, 0x16, 0x81, 0x07, 0xc0, 0x16, 0x91,
+ 0x0b, 0xc0, 0x16, 0xb4, 0xc4, 0xd9, 0x1a, 0x0f, 0xa3, 0xd9, 0x03, 0xc0,
+ 0x16, 0xc4, 0x0e, 0x40, 0x16, 0xd0, 0xc5, 0xdb, 0x71, 0x0f, 0xcd, 0x51,
+ 0x14, 0xc0, 0x16, 0xde, 0x42, 0x02, 0x01, 0xc0, 0x17, 0x00, 0xc2, 0x00,
+ 0xc6, 0x0f, 0xcc, 0x49, 0xc7, 0xcb, 0x88, 0x0f, 0xb7, 0x11, 0x10, 0xc0,
+ 0x17, 0x0c, 0x12, 0xc0, 0x17, 0x2a, 0x0e, 0xc0, 0x17, 0x40, 0x17, 0xc0,
+ 0x17, 0x50, 0x05, 0xc0, 0x17, 0x5a, 0x04, 0xc0, 0x17, 0x6a, 0xc7, 0xc1,
+ 0xa4, 0x01, 0x09, 0x31, 0x43, 0x00, 0x29, 0xc0, 0x17, 0x7c, 0x09, 0xc0,
+ 0x17, 0x86, 0xc8, 0xb6, 0xe5, 0x0f, 0xaa, 0x49, 0xce, 0x75, 0xe2, 0x0f,
+ 0x9f, 0x11, 0xc3, 0x06, 0x9b, 0x0f, 0x9b, 0x11, 0x9a, 0x0f, 0xa0, 0x11,
+ 0x15, 0xc0, 0x17, 0x92, 0xcb, 0x8a, 0x25, 0x0f, 0xa2, 0x60, 0xd0, 0x5e,
+ 0x1f, 0x0f, 0xc8, 0x81, 0x48, 0xb8, 0x8b, 0xc0, 0x17, 0x9e, 0x50, 0x5b,
+ 0xef, 0xc0, 0x17, 0xb0, 0x4a, 0x1f, 0x8d, 0xc0, 0x17, 0xd8, 0x07, 0xc0,
+ 0x17, 0xf8, 0xc5, 0xda, 0x3b, 0x0f, 0xce, 0xf8, 0x03, 0xc0, 0x18, 0x0a,
+ 0x17, 0xc0, 0x18, 0x20, 0x11, 0xc0, 0x18, 0x32, 0x07, 0xc0, 0x18, 0x3e,
+ 0xd2, 0x4e, 0x24, 0x0f, 0xcf, 0x48, 0xc6, 0xd4, 0xc6, 0x01, 0x35, 0xd9,
+ 0x03, 0xc0, 0x18, 0x4a, 0x46, 0x2d, 0xa8, 0xc0, 0x18, 0x62, 0xcc, 0x00,
+ 0xf2, 0x00, 0x01, 0x10, 0x0b, 0xc0, 0x18, 0x6c, 0x07, 0xc0, 0x18, 0x76,
+ 0xcb, 0x9a, 0x34, 0x0f, 0xcb, 0x89, 0xc4, 0xe7, 0xff, 0x0f, 0xd4, 0x00,
+ 0xc5, 0x14, 0x2d, 0x01, 0x16, 0x1b, 0x00, 0x18, 0x88, 0xcc, 0x05, 0xbb,
+ 0x01, 0x16, 0x11, 0x48, 0x1a, 0x8f, 0xc0, 0x18, 0x8e, 0x15, 0xc0, 0x18,
+ 0x9a, 0x05, 0xc0, 0x18, 0xa6, 0xc7, 0x07, 0x60, 0x01, 0x10, 0x79, 0xce,
+ 0x72, 0x46, 0x01, 0x50, 0x49, 0xd2, 0x4b, 0x66, 0x01, 0x57, 0xf8, 0x10,
+ 0xc0, 0x18, 0xb2, 0xc4, 0xd3, 0xa2, 0x01, 0x37, 0x59, 0x14, 0xc0, 0x18,
+ 0xce, 0x12, 0xc0, 0x18, 0xf0, 0x06, 0xc0, 0x18, 0xfc, 0x17, 0xc0, 0x19,
+ 0x08, 0x0f, 0xc0, 0x19, 0x14, 0x0e, 0xc0, 0x19, 0x23, 0xc4, 0xc6, 0xd7,
+ 0x0f, 0x99, 0xa9, 0x96, 0x0f, 0xa0, 0x42, 0x00, 0x19, 0x2f, 0x56, 0x2e,
+ 0x8f, 0xc0, 0x19, 0x38, 0x48, 0x9a, 0x8f, 0xc0, 0x19, 0x44, 0x47, 0x0a,
+ 0x7b, 0x40, 0x19, 0xa0, 0x07, 0xc0, 0x19, 0xda, 0x03, 0xc0, 0x19, 0xf4,
+ 0xc4, 0xd6, 0xae, 0x01, 0x37, 0x51, 0x0b, 0xc0, 0x1a, 0x08, 0x11, 0xc0,
+ 0x1a, 0x29, 0xcc, 0x8b, 0x50, 0x0f, 0x9c, 0x20, 0x17, 0xc0, 0x1a, 0x3b,
+ 0xc2, 0x00, 0x03, 0x0f, 0xcc, 0x01, 0x1b, 0xc0, 0x1a, 0x47, 0x11, 0xc0,
+ 0x1a, 0x53, 0x07, 0xc0, 0x1a, 0x6b, 0xc5, 0x74, 0x9a, 0x0f, 0xcc, 0xba,
+ 0x00, 0x1a, 0x77, 0x05, 0xc0, 0x1a, 0x7d, 0x0f, 0xc0, 0x1a, 0x87, 0x17,
+ 0xc0, 0x1a, 0x9b, 0xc4, 0xe6, 0xab, 0x01, 0x35, 0x81, 0x10, 0xc0, 0x1a,
+ 0xad, 0x14, 0xc0, 0x1a, 0xd3, 0x0e, 0xc0, 0x1a, 0xe5, 0x42, 0x03, 0x84,
+ 0xc0, 0x1a, 0xf4, 0x99, 0x0f, 0xa0, 0x23, 0x00, 0x1a, 0xfe, 0x12, 0xc0,
+ 0x1b, 0x04, 0xc2, 0x00, 0x97, 0x0f, 0xcf, 0x29, 0xc2, 0x01, 0x5b, 0x0f,
+ 0xd4, 0xc8, 0x0b, 0xc0, 0x1b, 0x0e, 0x11, 0xc0, 0x1b, 0x1a, 0xd1, 0x54,
+ 0x11, 0x01, 0x1c, 0xd1, 0x03, 0x40, 0x1b, 0x35, 0x42, 0x00, 0xad, 0xc0,
+ 0x1b, 0x47, 0xc7, 0xc4, 0x8f, 0x0f, 0x9e, 0xcb, 0x00, 0x1b, 0x51, 0xc4,
+ 0x7c, 0x0f, 0x0f, 0x9d, 0x30, 0x42, 0x00, 0x15, 0xc0, 0x1b, 0x57, 0x48,
+ 0xc3, 0x93, 0xc0, 0x1b, 0x63, 0x14, 0xc0, 0x1b, 0x75, 0x12, 0xc0, 0x1b,
+ 0x83, 0xc7, 0xb3, 0xaa, 0x01, 0x10, 0xd9, 0xc6, 0xd5, 0xc2, 0x0f, 0xca,
+ 0x91, 0xc9, 0xaf, 0x4c, 0x0f, 0xcb, 0x48, 0xca, 0xa6, 0x1e, 0x0f, 0xaa,
+ 0x41, 0xc3, 0x1e, 0x8d, 0x01, 0x35, 0x99, 0x42, 0x00, 0xb5, 0xc0, 0x1b,
+ 0x93, 0x42, 0x01, 0x08, 0x40, 0x1b, 0x9f, 0x47, 0xbb, 0xe4, 0xc0, 0x1b,
+ 0xab, 0x42, 0x07, 0x60, 0xc0, 0x1b, 0xcd, 0xca, 0xa7, 0x68, 0x01, 0x19,
+ 0x69, 0xc5, 0xde, 0x4b, 0x0f, 0x98, 0x00, 0x42, 0x00, 0xd0, 0xc0, 0x1b,
+ 0xd9, 0xc5, 0x64, 0xc0, 0x01, 0x18, 0x9b, 0x00, 0x1b, 0xe5, 0xcb, 0x99,
+ 0x37, 0x0f, 0xd5, 0x09, 0x03, 0xc0, 0x1b, 0xeb, 0x15, 0xc0, 0x1b, 0xf3,
+ 0x42, 0x00, 0xa9, 0xc0, 0x1b, 0xff, 0xc5, 0xc5, 0x16, 0x01, 0x35, 0xc9,
+ 0x05, 0xc0, 0x1c, 0x0f, 0x14, 0xc0, 0x1c, 0x19, 0xc6, 0xd4, 0x2a, 0x0f,
+ 0x99, 0x89, 0xd0, 0x59, 0x9f, 0x0f, 0xb2, 0x41, 0xc3, 0x92, 0xb5, 0x01,
+ 0x5f, 0x91, 0xce, 0x6c, 0xf8, 0x01, 0x5f, 0xd9, 0xc4, 0xe6, 0x1f, 0x0f,
+ 0xc9, 0x98, 0x10, 0xc0, 0x1c, 0x25, 0x42, 0x02, 0xcd, 0xc0, 0x1c, 0x37,
+ 0x1a, 0xc0, 0x1c, 0x43, 0x06, 0xc0, 0x1c, 0x4f, 0xce, 0x6e, 0xb8, 0x01,
+ 0x00, 0x31, 0xd1, 0x4f, 0xe2, 0x0f, 0xaf, 0xf1, 0x46, 0xcc, 0x69, 0x40,
+ 0x1c, 0x5b, 0x07, 0xc0, 0x1c, 0x6d, 0x03, 0xc0, 0x1c, 0x7f, 0x14, 0xc0,
+ 0x1c, 0x9f, 0x11, 0xc0, 0x1c, 0xad, 0x17, 0xc0, 0x1c, 0xb9, 0xca, 0xa0,
+ 0x6a, 0x0f, 0xde, 0x2a, 0x00, 0x1c, 0xcb, 0x0e, 0xc0, 0x1c, 0xcf, 0x42,
+ 0x00, 0xd3, 0xc0, 0x1c, 0xd9, 0x10, 0xc0, 0x1c, 0xe5, 0xc6, 0xd4, 0x9c,
+ 0x01, 0x37, 0xa9, 0xc9, 0xb4, 0xb6, 0x01, 0x32, 0x81, 0x16, 0xc0, 0x1c,
+ 0xf1, 0x48, 0x67, 0x9f, 0xc0, 0x1d, 0x00, 0xc7, 0xcf, 0x32, 0x0f, 0x9d,
+ 0xb9, 0xd1, 0x53, 0x12, 0x0f, 0x9b, 0xb1, 0xc2, 0x00, 0x47, 0x0f, 0xcb,
+ 0xd9, 0x45, 0x71, 0xad, 0x40, 0x1d, 0x1c, 0x17, 0xc0, 0x1d, 0x28, 0x0b,
+ 0xc0, 0x1d, 0x37, 0xc8, 0xbe, 0x6b, 0x0f, 0xb7, 0xc8, 0x11, 0xc0, 0x1d,
+ 0x43, 0x07, 0xc0, 0x1d, 0x4b, 0x0b, 0xc0, 0x1d, 0x5b, 0x03, 0x40, 0x1d,
+ 0x67, 0x14, 0xc0, 0x1d, 0x73, 0x03, 0xc0, 0x1d, 0x7f, 0x11, 0xc0, 0x1d,
+ 0x9f, 0x0b, 0xc0, 0x1d, 0xc3, 0xcd, 0x77, 0x17, 0x01, 0x4f, 0x11, 0xc3,
+ 0x31, 0x0e, 0x0f, 0xa0, 0x88, 0x11, 0xc0, 0x1d, 0xd9, 0x03, 0xc0, 0x1d,
+ 0xe5, 0x14, 0xc0, 0x1d, 0xf1, 0xc4, 0xdf, 0x9b, 0x0f, 0x9f, 0x5a, 0x00,
+ 0x1e, 0x07, 0xcb, 0x90, 0xd6, 0x0f, 0xc9, 0x39, 0x42, 0x00, 0x37, 0xc0,
+ 0x1e, 0x0d, 0x03, 0x40, 0x1e, 0x28, 0x17, 0xc0, 0x1e, 0x34, 0x43, 0x2a,
+ 0x4d, 0xc0, 0x1e, 0x40, 0xde, 0x0e, 0x4f, 0x0f, 0xa8, 0xe1, 0x46, 0xd2,
+ 0xe0, 0xc0, 0x1e, 0x52, 0x05, 0xc0, 0x1e, 0x89, 0x42, 0x00, 0xcf, 0xc0,
+ 0x1e, 0x95, 0xc6, 0x51, 0x30, 0x01, 0x06, 0x01, 0x4b, 0x92, 0x2b, 0xc0,
+ 0x1e, 0xa5, 0x46, 0xc7, 0xa0, 0x40, 0x1e, 0xb1, 0x03, 0xc0, 0x1e, 0xcf,
+ 0xc2, 0x01, 0x8d, 0x0f, 0xcc, 0x88, 0x0f, 0xc0, 0x1e, 0xdb, 0x10, 0xc0,
+ 0x1e, 0xe7, 0x42, 0x00, 0x47, 0xc0, 0x1e, 0xf3, 0x4b, 0x93, 0x12, 0x40,
+ 0x1e, 0xff, 0x07, 0xc0, 0x1f, 0x17, 0x03, 0xc0, 0x1f, 0x25, 0xcd, 0x76,
+ 0x6e, 0x01, 0x11, 0x13, 0x00, 0x1f, 0x37, 0x0b, 0xc0, 0x1f, 0x3d, 0xd4,
+ 0x3c, 0x8a, 0x0f, 0xa5, 0x31, 0x11, 0x40, 0x1f, 0x4c, 0x43, 0x03, 0x07,
+ 0xc0, 0x1f, 0x62, 0x90, 0x01, 0x30, 0x4b, 0x00, 0x1f, 0x72, 0x48, 0xba,
+ 0xb3, 0xc0, 0x1f, 0x91, 0xc6, 0xbe, 0x45, 0x01, 0x13, 0xdb, 0x00, 0x1f,
+ 0xa3, 0x42, 0x0d, 0xd3, 0xc0, 0x1f, 0xa7, 0x42, 0x15, 0xea, 0xc0, 0x1f,
+ 0xb9, 0x15, 0x40, 0x1f, 0xc5, 0x0b, 0xc0, 0x1f, 0xd1, 0x03, 0xc0, 0x1f,
+ 0xdb, 0xcc, 0x76, 0x00, 0x0f, 0xb5, 0x60, 0xc8, 0xbb, 0xf3, 0x01, 0x02,
+ 0x99, 0x03, 0xc0, 0x1f, 0xe7, 0xc5, 0xdc, 0x98, 0x0f, 0x9e, 0x50, 0x0b,
+ 0xc0, 0x1f, 0xf1, 0x11, 0xc0, 0x20, 0x01, 0x07, 0xc0, 0x20, 0x1d, 0xca,
+ 0x9e, 0x1c, 0x0f, 0xa7, 0xf8, 0x03, 0xc0, 0x20, 0x3c, 0x17, 0x40, 0x20,
+ 0x4d, 0x10, 0xc0, 0x20, 0x6b, 0xc2, 0x00, 0xbb, 0x01, 0x36, 0x7b, 0x00,
+ 0x20, 0x87, 0x15, 0xc0, 0x20, 0x8d, 0xc7, 0xc9, 0xdd, 0x01, 0x16, 0xa3,
+ 0x00, 0x20, 0x99, 0x0e, 0xc0, 0x20, 0x9f, 0x89, 0x0f, 0xa0, 0xb3, 0x00,
+ 0x20, 0xaf, 0x87, 0x0f, 0xcb, 0x38, 0x42, 0x03, 0x48, 0xc0, 0x20, 0xb3,
+ 0x09, 0xc0, 0x20, 0xc3, 0x14, 0xc0, 0x20, 0xd0, 0x4a, 0xaa, 0x9c, 0xc0,
+ 0x20, 0xe4, 0x0e, 0xc0, 0x21, 0x09, 0x4b, 0x91, 0x86, 0xc0, 0x21, 0x13,
+ 0xc5, 0xde, 0x23, 0x0f, 0xa7, 0x31, 0xc7, 0x79, 0x0a, 0x0f, 0xa6, 0x71,
+ 0xc8, 0xbc, 0x4b, 0x0f, 0xa1, 0xf1, 0x10, 0x40, 0x21, 0x35, 0x16, 0xc0,
+ 0x21, 0x41, 0x17, 0xc0, 0x21, 0x51, 0x44, 0x00, 0xc9, 0xc0, 0x21, 0x6f,
+ 0x15, 0xc0, 0x21, 0x77, 0x12, 0xc0, 0x21, 0x87, 0xcf, 0x66, 0x9f, 0x0f,
+ 0xad, 0x49, 0xcd, 0x7d, 0xcb, 0x0f, 0xa7, 0xf1, 0x45, 0xa5, 0x62, 0xc0,
+ 0x21, 0x93, 0xc4, 0xe9, 0x3f, 0x0f, 0xa1, 0x48, 0x14, 0xc0, 0x21, 0xa2,
+ 0x10, 0xc0, 0x21, 0xc5, 0x03, 0xc0, 0x21, 0xe3, 0x15, 0xc0, 0x21, 0xf7,
+ 0xc8, 0xa9, 0xf3, 0x0f, 0xb5, 0xb1, 0xc8, 0xc2, 0x13, 0x0f, 0xcf, 0x59,
+ 0xcc, 0x8b, 0x74, 0x0f, 0xd6, 0x10, 0x44, 0x0b, 0xfe, 0xc0, 0x22, 0x03,
+ 0xd8, 0x23, 0x84, 0x0f, 0xa7, 0x11, 0xc5, 0xcf, 0x7a, 0x0f, 0xa6, 0x61,
+ 0x14, 0xc0, 0x22, 0x0f, 0xdc, 0x14, 0x6a, 0x0f, 0xb5, 0x70, 0x47, 0x37,
+ 0x49, 0xc0, 0x22, 0x1b, 0x4f, 0x61, 0xa4, 0xc0, 0x22, 0x2e, 0xd3, 0x42,
+ 0x44, 0x08, 0x5c, 0xd1, 0xcc, 0x25, 0x86, 0x08, 0x5c, 0xc9, 0x47, 0x01,
+ 0xff, 0x40, 0x22, 0x3a, 0x49, 0xb0, 0xab, 0xc0, 0x22, 0x95, 0x11, 0xc0,
+ 0x22, 0xa1, 0x03, 0x40, 0x22, 0xad, 0x18, 0xc0, 0x22, 0xb9, 0xc2, 0x00,
+ 0x2a, 0x0f, 0xcc, 0x61, 0x15, 0xc0, 0x22, 0xc5, 0x05, 0xc0, 0x22, 0xd7,
+ 0x55, 0x34, 0x0c, 0xc0, 0x22, 0xe1, 0x0e, 0xc0, 0x22, 0xf9, 0x45, 0x9e,
+ 0xda, 0xc0, 0x23, 0x0b, 0xce, 0x6d, 0x76, 0x0f, 0x9f, 0x61, 0xd5, 0x36,
+ 0xac, 0x0f, 0x9e, 0xd1, 0xc9, 0xb3, 0x9f, 0x0f, 0xce, 0x78, 0xc7, 0xc6,
+ 0x95, 0x0f, 0xd4, 0xa1, 0x44, 0xe4, 0x77, 0xc0, 0x23, 0x1d, 0x09, 0xc0,
+ 0x23, 0x29, 0x18, 0xc0, 0x23, 0x35, 0x46, 0xd0, 0x52, 0xc0, 0x23, 0x45,
+ 0x15, 0xc0, 0x23, 0x51, 0x07, 0xc0, 0x23, 0x61, 0x45, 0x07, 0x5b, 0xc0,
+ 0x23, 0x6d, 0xce, 0x75, 0x48, 0x01, 0x19, 0x89, 0x03, 0xc0, 0x23, 0x79,
+ 0xd0, 0x60, 0x7f, 0x01, 0x12, 0x79, 0xc8, 0xc1, 0x13, 0x01, 0x80, 0x18,
+ 0x11, 0xc0, 0x23, 0x83, 0x03, 0xc0, 0x23, 0x93, 0xcd, 0x81, 0x73, 0x01,
+ 0x36, 0xd1, 0xc3, 0x03, 0x47, 0x0f, 0xa2, 0xb9, 0xd2, 0x48, 0xcc, 0x0f,
+ 0xca, 0x08, 0x19, 0xc0, 0x23, 0xa8, 0x4a, 0xa8, 0xda, 0xc0, 0x23, 0xb2,
+ 0x17, 0xc0, 0x23, 0xbe, 0x16, 0xc0, 0x23, 0xca, 0x89, 0x0f, 0xa0, 0xab,
+ 0x00, 0x23, 0xd4, 0x47, 0x76, 0x1c, 0xc0, 0x23, 0xe0, 0x10, 0xc0, 0x24,
+ 0x04, 0xc6, 0xb8, 0xc5, 0x0f, 0xae, 0x73, 0x00, 0x24, 0x10, 0xcb, 0x9a,
+ 0x76, 0x0f, 0xaa, 0x51, 0x0e, 0xc0, 0x24, 0x16, 0xc2, 0x00, 0xff, 0x0f,
+ 0xb5, 0x51, 0xd2, 0x4d, 0x70, 0x0f, 0xb5, 0x79, 0xc2, 0x00, 0x68, 0x0f,
+ 0xcd, 0x20, 0x47, 0xcb, 0xe3, 0xc0, 0x24, 0x22, 0xc6, 0xd0, 0x04, 0x0f,
+ 0xca, 0xf9, 0xc2, 0x00, 0xbb, 0x0f, 0xcc, 0x30, 0x42, 0x01, 0x0d, 0xc0,
+ 0x24, 0x46, 0x44, 0x3c, 0xcc, 0xc0, 0x24, 0x50, 0xca, 0xa7, 0x04, 0x01,
+ 0x09, 0xc1, 0xc4, 0xd5, 0xa6, 0x01, 0x01, 0x03, 0x00, 0x24, 0x5c, 0x10,
+ 0xc0, 0x24, 0x60, 0xce, 0x63, 0xa3, 0x00, 0x00, 0x80, 0x18, 0xc0, 0x24,
+ 0x6c, 0x15, 0xc0, 0x24, 0x78, 0x05, 0xc0, 0x24, 0x84, 0x45, 0xdc, 0x70,
+ 0xc0, 0x24, 0x9c, 0xcc, 0x84, 0x84, 0x01, 0x01, 0xd9, 0xcd, 0x7b, 0x82,
+ 0x0f, 0x9c, 0xb9, 0x42, 0x00, 0x3c, 0xc0, 0x24, 0xae, 0x42, 0x03, 0xab,
+ 0xc0, 0x24, 0xba, 0x45, 0xdd, 0x88, 0xc0, 0x24, 0xc6, 0xcb, 0x51, 0xd5,
+ 0x0f, 0xb0, 0x61, 0xd3, 0x1c, 0x45, 0x07, 0xff, 0xe8, 0x42, 0x00, 0x09,
+ 0xc0, 0x24, 0xdc, 0xc2, 0x00, 0x56, 0x0f, 0xa4, 0x6b, 0x00, 0x24, 0xfe,
+ 0xc4, 0x7b, 0x8b, 0x0f, 0x9c, 0x03, 0x00, 0x25, 0x0e, 0x43, 0x01, 0xa9,
+ 0xc0, 0x25, 0x14, 0x57, 0x28, 0xf2, 0xc0, 0x25, 0x20, 0xc7, 0x46, 0xea,
+ 0x07, 0xef, 0xe1, 0xc3, 0x04, 0x49, 0x0f, 0xca, 0x30, 0xc2, 0x00, 0xbb,
+ 0x0f, 0xd5, 0x43, 0x00, 0x25, 0x2c, 0x42, 0x00, 0x30, 0xc0, 0x25, 0x32,
+ 0xc8, 0xc3, 0x43, 0x0f, 0xc8, 0xb1, 0x43, 0x0c, 0x3d, 0xc0, 0x25, 0x42,
+ 0x46, 0x19, 0x67, 0xc0, 0x25, 0x4c, 0x44, 0x14, 0xe9, 0xc0, 0x25, 0x6a,
+ 0xd2, 0x49, 0xa4, 0x0f, 0x9b, 0x01, 0xc2, 0x00, 0xa7, 0x0f, 0x99, 0xcb,
+ 0x00, 0x25, 0x90, 0xc5, 0xd9, 0xa5, 0x0f, 0xa0, 0x99, 0xc5, 0xe2, 0xec,
+ 0x0f, 0xb5, 0x18, 0xc3, 0xeb, 0x10, 0x0f, 0xd4, 0x91, 0x0b, 0xc0, 0x25,
+ 0x96, 0x42, 0x01, 0x08, 0xc0, 0x25, 0xa9, 0x96, 0x0f, 0xa0, 0x03, 0x00,
+ 0x25, 0xb6, 0x05, 0xc0, 0x25, 0xbc, 0xc4, 0xe5, 0xa3, 0x0f, 0xa0, 0x3b,
+ 0x00, 0x25, 0xc8, 0x8f, 0x0f, 0xa0, 0x78, 0xc8, 0xc2, 0x03, 0x01, 0x05,
+ 0xe9, 0xc8, 0x79, 0xf4, 0x01, 0x05, 0x41, 0x43, 0xca, 0xd1, 0xc0, 0x25,
+ 0xce, 0x10, 0xc0, 0x25, 0xe0, 0xcc, 0x83, 0xdc, 0x0f, 0x9e, 0x49, 0xca,
+ 0xa6, 0xa0, 0x01, 0x4f, 0xa1, 0x5a, 0x1a, 0x84, 0x40, 0x25, 0xea, 0x4f,
+ 0x27, 0x5a, 0xc0, 0x26, 0x0e, 0x42, 0x03, 0xac, 0xc0, 0x26, 0x5f, 0xc5,
+ 0xe1, 0x16, 0x0f, 0xce, 0xd8, 0x14, 0xc0, 0x26, 0x7d, 0xc3, 0x0e, 0xa5,
+ 0x01, 0x35, 0xb1, 0x44, 0x06, 0x90, 0xc0, 0x26, 0x8f, 0xd5, 0x38, 0x26,
+ 0x01, 0x51, 0x78, 0x07, 0xc0, 0x26, 0x9b, 0xca, 0x8e, 0x9a, 0x01, 0x38,
+ 0x61, 0xc3, 0x13, 0xf2, 0x01, 0x32, 0x69, 0x43, 0x1b, 0x6f, 0xc0, 0x26,
+ 0xa7, 0xcc, 0x8d, 0x24, 0x0f, 0xa7, 0x99, 0xc4, 0xa3, 0x3a, 0x0f, 0x9d,
+ 0xd9, 0x47, 0xce, 0x8a, 0x40, 0x26, 0xb1, 0x0e, 0xc0, 0x26, 0xbd, 0xd0,
+ 0x5e, 0x6f, 0x0f, 0xdd, 0xd8, 0x4d, 0x78, 0x69, 0xc0, 0x26, 0xcf, 0xc5,
+ 0xde, 0xd7, 0x01, 0x5f, 0x30, 0x09, 0xc0, 0x26, 0xe9, 0xc2, 0x08, 0xe9,
+ 0x0f, 0xb4, 0xa9, 0x49, 0xa7, 0x0f, 0xc0, 0x26, 0xf9, 0x10, 0xc0, 0x27,
+ 0x05, 0x0f, 0xc0, 0x27, 0x0f, 0x43, 0x28, 0x82, 0xc0, 0x27, 0x1b, 0xc4,
+ 0xe4, 0x0a, 0x01, 0x32, 0x49, 0x0d, 0xc0, 0x27, 0x27, 0x42, 0x03, 0xac,
+ 0xc0, 0x27, 0x33, 0xda, 0x1c, 0x58, 0x0f, 0x9e, 0x99, 0xc2, 0x00, 0xea,
+ 0x0f, 0x99, 0x70, 0x14, 0xc0, 0x27, 0x45, 0xc9, 0xb8, 0x28, 0x01, 0x05,
+ 0x71, 0xc3, 0x18, 0xf6, 0x0f, 0x99, 0xb9, 0xcb, 0x93, 0x96, 0x0f, 0xca,
+ 0x18, 0x42, 0x01, 0xa5, 0xc0, 0x27, 0x55, 0x0b, 0xc0, 0x27, 0x6d, 0x11,
+ 0xc0, 0x27, 0x77, 0x17, 0xc0, 0x27, 0x83, 0x42, 0x00, 0x2a, 0xc0, 0x27,
+ 0x8f, 0x03, 0x40, 0x27, 0x99, 0xc4, 0x30, 0x5a, 0x0f, 0xb5, 0xe9, 0x42,
+ 0x02, 0x49, 0xc0, 0x27, 0xa5, 0x16, 0xc0, 0x27, 0xdb, 0xc9, 0xad, 0xe4,
+ 0x0f, 0xaf, 0xe1, 0x57, 0x2a, 0x1d, 0xc0, 0x27, 0xe7, 0xc4, 0x34, 0x70,
+ 0x0f, 0x9a, 0x29, 0xc4, 0x59, 0x9b, 0x0f, 0xa2, 0x29, 0x11, 0x40, 0x27,
+ 0xf3, 0x03, 0xc0, 0x28, 0x02, 0x0b, 0xc0, 0x28, 0x1f, 0x17, 0xc0, 0x28,
+ 0x3d, 0x11, 0x40, 0x28, 0x4a, 0x4c, 0x83, 0x70, 0xc0, 0x28, 0x57, 0x03,
+ 0xc0, 0x28, 0xb7, 0x0e, 0xc0, 0x28, 0xcf, 0x10, 0xc0, 0x28, 0xd9, 0xc7,
+ 0xce, 0x52, 0x0f, 0xcf, 0x51, 0xc8, 0xbf, 0xa3, 0x0f, 0xcf, 0xc0, 0x09,
+ 0xc0, 0x28, 0xe9, 0x42, 0x01, 0x03, 0xc0, 0x28, 0xf8, 0xc3, 0x19, 0x3e,
+ 0x00, 0x03, 0xf3, 0x00, 0x29, 0x04, 0x14, 0xc0, 0x29, 0x08, 0xc2, 0x18,
+ 0x79, 0x01, 0x4f, 0xf3, 0x00, 0x29, 0x1a, 0xc4, 0x00, 0xdb, 0x0f, 0x9d,
+ 0x59, 0xcf, 0x6b, 0x8b, 0x01, 0x4e, 0xe9, 0x46, 0xd4, 0x30, 0xc0, 0x29,
+ 0x20, 0x47, 0xc8, 0x16, 0x40, 0x29, 0x4f, 0xd7, 0x25, 0x4d, 0x01, 0x39,
+ 0xc9, 0x42, 0x01, 0x8a, 0xc0, 0x29, 0x67, 0xd7, 0x29, 0xc1, 0x0f, 0xa8,
+ 0x01, 0xc9, 0xb7, 0x11, 0x01, 0x71, 0xd0, 0x43, 0x00, 0x64, 0xc0, 0x29,
+ 0xa9, 0xc3, 0x94, 0x0d, 0x01, 0x32, 0x41, 0x85, 0x01, 0x18, 0x91, 0x43,
+ 0x05, 0x56, 0xc0, 0x29, 0xb5, 0x47, 0x31, 0x25, 0xc0, 0x29, 0xc1, 0x42,
+ 0x00, 0x2c, 0x40, 0x29, 0xf1, 0xce, 0x73, 0xce, 0x0f, 0xd3, 0xc9, 0xc8,
+ 0xc0, 0x7b, 0x01, 0x31, 0x61, 0xd6, 0x30, 0x31, 0x01, 0x08, 0x09, 0x0f,
+ 0xc0, 0x29, 0xfd, 0xc3, 0x1d, 0x93, 0x0f, 0xce, 0x89, 0x44, 0x0e, 0x1c,
+ 0x40, 0x2a, 0x09, 0x54, 0x38, 0xf2, 0xc0, 0x2a, 0x3b, 0x46, 0x0d, 0x28,
+ 0xc0, 0x2a, 0x9f, 0x07, 0xc0, 0x2a, 0xab, 0xc9, 0xab, 0xa4, 0x01, 0x1f,
+ 0x81, 0x42, 0x00, 0x42, 0xc0, 0x2a, 0xbd, 0x4b, 0x66, 0x47, 0xc0, 0x2a,
+ 0xc9, 0xcb, 0x95, 0x7a, 0x0f, 0xa3, 0xf0, 0x42, 0x01, 0x89, 0xc0, 0x2a,
+ 0xd8, 0xca, 0x9d, 0xf4, 0x01, 0x05, 0x99, 0xc7, 0xcb, 0xab, 0x0f, 0x9a,
+ 0x30, 0x00, 0x40, 0x2a, 0xe2, 0x43, 0x11, 0xeb, 0xc0, 0x2a, 0xee, 0x96,
+ 0x0f, 0xa0, 0xe3, 0x00, 0x2a, 0xfa, 0xca, 0x9f, 0xb6, 0x01, 0x3e, 0x89,
+ 0xc4, 0xd8, 0x2e, 0x01, 0x34, 0x99, 0xc2, 0x06, 0x6f, 0x01, 0x31, 0x29,
+ 0x09, 0x40, 0x2b, 0x06, 0x16, 0xc0, 0x2b, 0x27, 0x05, 0xc0, 0x2b, 0x37,
+ 0xc7, 0x60, 0x52, 0x01, 0x15, 0x31, 0xd5, 0x31, 0x92, 0x01, 0x12, 0x18,
+ 0xc9, 0xb6, 0xe4, 0x01, 0x34, 0xd9, 0xcb, 0x92, 0x6d, 0x0f, 0xa2, 0xf8,
+ 0x47, 0x01, 0xff, 0xc0, 0x2b, 0x43, 0x15, 0xc0, 0x2b, 0x8a, 0x48, 0xa8,
+ 0x3a, 0xc0, 0x2b, 0x96, 0x46, 0x08, 0xd7, 0xc0, 0x2b, 0xa2, 0x4b, 0x6f,
+ 0x71, 0xc0, 0x2b, 0xc6, 0x56, 0x2e, 0x37, 0x40, 0x2b, 0xe3, 0xc8, 0xc3,
+ 0x13, 0x01, 0x1f, 0x31, 0x42, 0x00, 0xea, 0xc0, 0x2b, 0xed, 0x47, 0xc8,
+ 0xa2, 0xc0, 0x2b, 0xf9, 0xc9, 0x4f, 0x05, 0x00, 0x00, 0x31, 0x45, 0x34,
+ 0x23, 0x40, 0x2c, 0x05, 0x54, 0x3d, 0xf2, 0xc0, 0x2c, 0x11, 0x12, 0xc0,
+ 0x2c, 0x78, 0x11, 0x40, 0x2c, 0x84, 0x46, 0xd6, 0xc4, 0xc0, 0x2c, 0x90,
+ 0xc5, 0xe2, 0x10, 0x0f, 0xca, 0x88, 0xcf, 0x65, 0x64, 0x0f, 0x9e, 0x41,
+ 0xd7, 0x27, 0x26, 0x01, 0x51, 0xf9, 0x12, 0xc0, 0x2c, 0x9c, 0xc7, 0xc9,
+ 0xe4, 0x0f, 0xb4, 0x88, 0xcc, 0x89, 0xd0, 0x0f, 0xb5, 0x09, 0x45, 0xde,
+ 0xa0, 0x40, 0x2c, 0xa8, 0x1a, 0xc0, 0x2c, 0xca, 0x43, 0x1e, 0x71, 0xc0,
+ 0x2c, 0xd6, 0x42, 0x02, 0x01, 0xc0, 0x2c, 0xf2, 0x19, 0xc0, 0x2c, 0xfe,
+ 0x9b, 0x0f, 0xa3, 0x33, 0x00, 0x2d, 0x11, 0x11, 0xc0, 0x2d, 0x17, 0xc2,
+ 0x01, 0xb3, 0x0f, 0xa5, 0x19, 0xc5, 0xde, 0x14, 0x0f, 0xa4, 0x83, 0x00,
+ 0x2d, 0x24, 0xc2, 0x00, 0x4d, 0x0f, 0xa0, 0xb9, 0xc2, 0x03, 0x5f, 0x0f,
+ 0xcd, 0xa1, 0x47, 0xcb, 0xb2, 0x40, 0x2d, 0x2a, 0x11, 0xc0, 0x2d, 0x36,
+ 0x03, 0xc0, 0x2d, 0x48, 0x42, 0x0e, 0x78, 0x40, 0x2d, 0x52, 0x10, 0xc0,
+ 0x2d, 0x5c, 0x0e, 0xc0, 0x2d, 0x6f, 0x15, 0xc0, 0x2d, 0x79, 0x06, 0xc0,
+ 0x2d, 0x8e, 0xc2, 0x02, 0x58, 0x0f, 0xa3, 0xb3, 0x00, 0x2d, 0x9a, 0x44,
+ 0x85, 0x2c, 0xc0, 0x2d, 0x9e, 0x05, 0xc0, 0x2d, 0xc2, 0x96, 0x0f, 0xcc,
+ 0x3b, 0x00, 0x2d, 0xd8, 0x14, 0xc0, 0x2d, 0xeb, 0x09, 0x40, 0x2d, 0xf5,
+ 0xc3, 0x15, 0x86, 0x0f, 0xcd, 0x61, 0xcc, 0x8a, 0x54, 0x01, 0x31, 0x19,
+ 0x16, 0xc0, 0x2e, 0x07, 0xc4, 0xe5, 0x3b, 0x0f, 0xa2, 0xc9, 0x42, 0x00,
+ 0x30, 0xc0, 0x2e, 0x13, 0x14, 0xc0, 0x2e, 0x1f, 0x42, 0x01, 0x60, 0xc0,
+ 0x2e, 0x29, 0x44, 0x20, 0xec, 0x40, 0x2e, 0x35, 0x03, 0xc0, 0x2e, 0x3f,
+ 0x10, 0xc0, 0x2e, 0x61, 0xc2, 0x00, 0x30, 0x0f, 0xa8, 0xa3, 0x00, 0x2e,
+ 0x74, 0x16, 0xc0, 0x2e, 0x7e, 0xc5, 0xde, 0xcd, 0x01, 0x11, 0xa9, 0x07,
+ 0xc0, 0x2e, 0x8a, 0x86, 0x0f, 0xb6, 0x79, 0xca, 0x9e, 0x62, 0x0f, 0xce,
+ 0x18, 0xc4, 0x02, 0x01, 0x0f, 0xce, 0x43, 0x00, 0x2e, 0x96, 0x95, 0x0f,
+ 0xb4, 0x63, 0x00, 0x2e, 0x9c, 0x42, 0x00, 0x30, 0xc0, 0x2e, 0xa6, 0x89,
+ 0x0f, 0xa0, 0xdb, 0x00, 0x2e, 0xbe, 0x44, 0xe7, 0x5f, 0xc0, 0x2e, 0xc4,
+ 0xd3, 0x43, 0x74, 0x0f, 0x9e, 0xb9, 0x44, 0x75, 0x97, 0xc0, 0x2e, 0xd0,
+ 0xc4, 0x00, 0xdb, 0x0f, 0xd5, 0x19, 0xc5, 0xe2, 0x01, 0x0f, 0x99, 0x78,
+ 0x0b, 0xc0, 0x2e, 0xda, 0x03, 0xc0, 0x2e, 0xea, 0x11, 0xc0, 0x2e, 0xf4,
+ 0x07, 0x40, 0x2f, 0x0c, 0x57, 0x29, 0x7c, 0xc0, 0x2f, 0x16, 0xcd, 0x7f,
+ 0xb9, 0x07, 0xf7, 0xf8, 0xd2, 0x4d, 0xa6, 0x08, 0xe3, 0x61, 0x47, 0x37,
+ 0x49, 0xc0, 0x2f, 0x6a, 0x06, 0xc0, 0x2f, 0x8e, 0x49, 0xa0, 0x42, 0xc0,
+ 0x2f, 0xa0, 0xce, 0x6d, 0x68, 0x08, 0xe2, 0x19, 0x45, 0x02, 0xcb, 0xc0,
+ 0x2f, 0xac, 0x4b, 0x6f, 0x71, 0xc0, 0x2f, 0xbc, 0x47, 0x01, 0xff, 0x40,
+ 0x2f, 0xdc, 0x19, 0xc0, 0x30, 0x43, 0x43, 0x01, 0x5f, 0xc0, 0x30, 0x4d,
+ 0xc5, 0x0b, 0xa2, 0x01, 0x2e, 0x53, 0x00, 0x30, 0x5d, 0x46, 0x1a, 0x91,
+ 0xc0, 0x30, 0x63, 0xc2, 0x00, 0xbb, 0x0f, 0xa8, 0x93, 0x00, 0x30, 0x75,
+ 0x43, 0x03, 0x53, 0xc0, 0x30, 0x81, 0xc6, 0xd7, 0xea, 0x0f, 0x9b, 0x69,
+ 0xd0, 0x5f, 0x7f, 0x0f, 0xb1, 0x69, 0x16, 0xc0, 0x30, 0x8d, 0xc5, 0xdc,
+ 0x7f, 0x0f, 0xcc, 0xf0, 0x08, 0xc0, 0x30, 0x9f, 0x42, 0x0e, 0x14, 0xc0,
+ 0x30, 0xc1, 0x91, 0x01, 0x32, 0x63, 0x00, 0x30, 0xcd, 0x48, 0x03, 0x48,
+ 0xc0, 0x30, 0xd3, 0x45, 0xda, 0x54, 0xc0, 0x30, 0xfc, 0xc4, 0xe7, 0x8f,
+ 0x0f, 0xa6, 0x91, 0xca, 0x9e, 0x26, 0x0f, 0x9c, 0xd1, 0xc3, 0x13, 0x1a,
+ 0x0f, 0x9a, 0x59, 0x89, 0x0f, 0xcd, 0xa8, 0xc7, 0xc4, 0xff, 0x0f, 0xcc,
+ 0x09, 0x09, 0xc0, 0x31, 0x1e, 0x43, 0x21, 0x6a, 0xc0, 0x31, 0x2a, 0xc3,
+ 0x00, 0xd8, 0x01, 0x32, 0x71, 0xd1, 0x51, 0xe0, 0x01, 0x05, 0xb1, 0xc7,
+ 0x81, 0xa0, 0x01, 0x05, 0x21, 0x10, 0xc0, 0x31, 0x36, 0x0f, 0xc0, 0x31,
+ 0x3e, 0xc2, 0x10, 0x67, 0x0f, 0xaf, 0x13, 0x00, 0x31, 0x4a, 0xc4, 0x8a,
+ 0x57, 0x0f, 0xcc, 0x70, 0xc8, 0x22, 0xf4, 0x0f, 0xc9, 0x29, 0x45, 0x5a,
+ 0x20, 0xc0, 0x31, 0x50, 0x4c, 0x8e, 0x74, 0x40, 0x31, 0x5c, 0x14, 0xc0,
+ 0x31, 0xc5, 0x44, 0x01, 0xf3, 0xc0, 0x31, 0xd1, 0xca, 0xa7, 0xfe, 0x70,
+ 0x00, 0x09, 0xcf, 0x61, 0x3b, 0x01, 0x31, 0xf3, 0x00, 0x31, 0xe5, 0x04,
+ 0xc0, 0x31, 0xe9, 0x06, 0xc0, 0x31, 0xf5, 0xd5, 0x37, 0x7e, 0x0f, 0xca,
+ 0x69, 0x42, 0x01, 0x8a, 0x40, 0x32, 0x01, 0x10, 0xc0, 0x32, 0x41, 0xc5,
+ 0xd4, 0x13, 0x0f, 0xcf, 0x98, 0x44, 0x03, 0x07, 0xc0, 0x32, 0x4d, 0x46,
+ 0x00, 0x4c, 0xc0, 0x32, 0x81, 0xce, 0x71, 0xd6, 0x0f, 0xb2, 0x19, 0x00,
+ 0x40, 0x32, 0xbf, 0x0b, 0xc0, 0x32, 0xe6, 0xda, 0x1a, 0x9e, 0x01, 0x35,
+ 0x79, 0x06, 0xc0, 0x32, 0xff, 0xcb, 0x9b, 0x5d, 0x0f, 0xb0, 0x91, 0xce,
+ 0x75, 0xb8, 0x01, 0x5e, 0x88, 0x46, 0x00, 0x3e, 0xc0, 0x33, 0x0b, 0x4e,
+ 0x6e, 0x64, 0xc0, 0x33, 0x17, 0xcc, 0x4c, 0x68, 0x0f, 0xa9, 0xd1, 0xd1,
+ 0x57, 0x74, 0x0f, 0xb7, 0x31, 0xc8, 0x2f, 0x37, 0x0f, 0xb7, 0x38, 0x00,
+ 0x40, 0x33, 0x23, 0x47, 0x01, 0xff, 0xc0, 0x33, 0x2f, 0xcc, 0x1f, 0xc2,
+ 0x08, 0x1c, 0xf8, 0x03, 0xc0, 0x33, 0x92, 0x0e, 0xc0, 0x33, 0xa8, 0x50,
+ 0x5f, 0xef, 0xc0, 0x33, 0xb8, 0x14, 0xc0, 0x34, 0x66, 0x45, 0xdc, 0x75,
+ 0xc0, 0x34, 0x70, 0xc6, 0xd4, 0xba, 0x0f, 0xcc, 0xa1, 0x4b, 0x9a, 0xad,
+ 0x40, 0x34, 0x8a, 0x14, 0xc0, 0x34, 0xe2, 0x16, 0xc0, 0x34, 0xf1, 0x17,
+ 0xc0, 0x34, 0xfb, 0xc8, 0x74, 0xd8, 0x01, 0x11, 0xd9, 0x0e, 0xc0, 0x35,
+ 0x0d, 0xc3, 0x61, 0x91, 0x0f, 0xa9, 0x51, 0xc6, 0xd7, 0x84, 0x0f, 0x9f,
+ 0x29, 0x43, 0xb0, 0xd0, 0xc0, 0x35, 0x1a, 0xc2, 0x03, 0x84, 0x0f, 0xd4,
+ 0xe8, 0x42, 0x00, 0xac, 0xc0, 0x35, 0x26, 0x0f, 0xc0, 0x35, 0x30, 0x10,
+ 0xc0, 0x35, 0x43, 0xc4, 0xe5, 0x27, 0x0f, 0xbb, 0xd9, 0xc7, 0xc6, 0xe2,
+ 0x0f, 0xad, 0xa1, 0x16, 0xc0, 0x35, 0x57, 0xdb, 0x15, 0x7f, 0x0f, 0xb2,
+ 0x59, 0xc3, 0x0e, 0xe3, 0x01, 0x5f, 0x09, 0x48, 0xbc, 0x5b, 0x40, 0x35,
+ 0x63, 0x09, 0xc0, 0x35, 0x9f, 0x47, 0x0c, 0x2a, 0xc0, 0x35, 0xb4, 0xcb,
+ 0x94, 0xe0, 0x01, 0x37, 0x61, 0xc6, 0xd4, 0xde, 0x0f, 0x99, 0xd1, 0xca,
+ 0xa7, 0xe0, 0x0f, 0xb6, 0xa9, 0xc9, 0xab, 0x89, 0x0f, 0xcb, 0xf1, 0xca,
+ 0xa5, 0x74, 0x0f, 0xcc, 0xd8, 0xcf, 0x63, 0xfc, 0x01, 0x1c, 0x71, 0x12,
+ 0xc0, 0x35, 0xcc, 0xc4, 0xe6, 0xcb, 0x01, 0x5e, 0xd1, 0xc6, 0xd8, 0x8c,
+ 0x0f, 0xd5, 0xd8, 0xd3, 0x40, 0x8f, 0x0f, 0xa5, 0x79, 0xc9, 0x8a, 0x03,
+ 0x0f, 0xb1, 0x79, 0x96, 0x0f, 0xb6, 0xb1, 0xca, 0xa0, 0x60, 0x0f, 0xc8,
+ 0xb8, 0x18, 0xc0, 0x35, 0xd8, 0x4f, 0x66, 0x36, 0xc0, 0x35, 0xe4, 0x42,
+ 0x00, 0x98, 0xc0, 0x35, 0xf6, 0x15, 0xc0, 0x36, 0x03, 0x08, 0xc0, 0x36,
+ 0x0f, 0x05, 0xc0, 0x36, 0x1e, 0x06, 0xc0, 0x36, 0x2a, 0x46, 0xd5, 0xe6,
+ 0xc0, 0x36, 0x37, 0xc8, 0xbf, 0x93, 0x0f, 0xa7, 0x28, 0x43, 0x00, 0xb5,
+ 0xc0, 0x36, 0x43, 0x49, 0x1b, 0x71, 0x40, 0x36, 0x4f, 0xc5, 0xe3, 0x78,
+ 0x01, 0x37, 0xc1, 0xd5, 0x35, 0x86, 0x0f, 0x9e, 0x91, 0x05, 0x40, 0x36,
+ 0x99, 0xc6, 0x3a, 0x70, 0x01, 0x15, 0xbb, 0x00, 0x36, 0xa5, 0x92, 0x0f,
+ 0xa3, 0xfa, 0x00, 0x36, 0xab, 0x14, 0xc0, 0x36, 0xb1, 0xc6, 0x07, 0x8a,
+ 0x01, 0x05, 0x49, 0x0f, 0xc0, 0x36, 0xc7, 0xc7, 0xc8, 0xb7, 0x0f, 0xa1,
+ 0xd1, 0xc2, 0x03, 0x0c, 0x0f, 0xd5, 0xa8, 0x42, 0x01, 0x33, 0xc0, 0x36,
+ 0xd6, 0xc3, 0x0f, 0x55, 0x0f, 0xb6, 0xf3, 0x00, 0x36, 0xe2, 0xc3, 0x06,
+ 0x7a, 0x0f, 0xa0, 0x58, 0x4a, 0x19, 0x4c, 0xc0, 0x36, 0xee, 0x42, 0x01,
+ 0xa5, 0xc0, 0x37, 0x12, 0x10, 0xc0, 0x37, 0x1e, 0xcb, 0x92, 0xf1, 0x0f,
+ 0xca, 0x01, 0xd2, 0x4a, 0xa0, 0x01, 0x71, 0xf0, 0x16, 0xc0, 0x37, 0x32,
+ 0x10, 0xc0, 0x37, 0x3e, 0x14, 0xc0, 0x37, 0x4a, 0x18, 0xc0, 0x37, 0x56,
+ 0xc9, 0xac, 0x19, 0x0f, 0xae, 0x89, 0x45, 0xe2, 0xab, 0xc0, 0x37, 0x68,
+ 0xc4, 0x77, 0x18, 0x0f, 0xce, 0x38, 0x06, 0xc0, 0x37, 0x74, 0xcf, 0x61,
+ 0x0e, 0x01, 0x33, 0x81, 0x0b, 0xc0, 0x37, 0x80, 0x44, 0x13, 0x64, 0x40,
+ 0x37, 0x8c, 0xca, 0x8f, 0x56, 0x01, 0x38, 0x69, 0x07, 0xc0, 0x37, 0x98,
+ 0xcd, 0x7b, 0x5b, 0x0f, 0x9c, 0x08, 0x9b, 0x0f, 0xd5, 0x83, 0x00, 0x37,
+ 0xaa, 0x17, 0xc0, 0x37, 0xb0, 0x03, 0xc0, 0x37, 0xbc, 0x11, 0xc0, 0x37,
+ 0xcc, 0x07, 0x40, 0x37, 0xe1, 0x42, 0x19, 0x3e, 0xc0, 0x37, 0xed, 0xc6,
+ 0xd2, 0x0e, 0x0f, 0xcc, 0x51, 0x17, 0xc0, 0x37, 0xf9, 0x14, 0xc0, 0x38,
+ 0x09, 0xc2, 0x00, 0xf2, 0x0f, 0xcd, 0xb3, 0x00, 0x38, 0x25, 0x89, 0x0f,
+ 0x99, 0x5b, 0x00, 0x38, 0x2b, 0xc4, 0xe9, 0xb7, 0x0f, 0xd6, 0xa8, 0x05,
+ 0xc0, 0x38, 0x31, 0x42, 0x04, 0x4c, 0xc0, 0x38, 0x43, 0x0e, 0xc0, 0x38,
+ 0x4f, 0xca, 0xa2, 0xf4, 0x01, 0x31, 0x59, 0xce, 0x74, 0x22, 0x0f, 0x9c,
+ 0x29, 0xc3, 0x72, 0x0c, 0x0f, 0xce, 0xd1, 0xc4, 0xd3, 0xba, 0x0f, 0xa3,
+ 0x50, 0x07, 0xc0, 0x38, 0x59, 0x11, 0xc0, 0x38, 0x65, 0x03, 0xc0, 0x38,
+ 0x78, 0xca, 0xa0, 0x9c, 0x0f, 0x9b, 0x20, 0x42, 0x00, 0x30, 0xc0, 0x38,
+ 0x84, 0xc7, 0xcc, 0x4c, 0x01, 0x37, 0xe9, 0x10, 0xc0, 0x38, 0x8e, 0xc2,
+ 0x00, 0xa7, 0x01, 0x1e, 0xd8, 0x42, 0x00, 0x63, 0xc0, 0x38, 0x9a, 0x0f,
+ 0xc0, 0x38, 0xa4, 0x03, 0xc0, 0x38, 0xb0, 0xc4, 0xea, 0x0b, 0x0f, 0xc9,
+ 0xd0, 0x14, 0xc0, 0x38, 0xbc, 0x15, 0xc0, 0x38, 0xc9, 0x47, 0xcd, 0x09,
+ 0xc0, 0x38, 0xd6, 0x45, 0x99, 0x12, 0xc0, 0x38, 0xe2, 0x0e, 0xc0, 0x38,
+ 0xee, 0xd9, 0x1e, 0x70, 0x0f, 0x9e, 0x89, 0xd2, 0x4f, 0x0e, 0x01, 0x50,
+ 0x68, 0x03, 0xc0, 0x38, 0xfa, 0x52, 0x4e, 0xa2, 0xc0, 0x39, 0x06, 0x48,
+ 0xbc, 0x13, 0xc0, 0x39, 0x12, 0x45, 0xdf, 0xc7, 0xc0, 0x39, 0x2a, 0x44,
+ 0x2d, 0x07, 0x40, 0x39, 0x4a, 0xc2, 0x00, 0xf2, 0x0f, 0xd5, 0x11, 0xcd,
+ 0x77, 0x24, 0x0f, 0xce, 0x70, 0x9b, 0x0f, 0xa8, 0x8b, 0x00, 0x39, 0x6c,
+ 0xc9, 0xb5, 0xd6, 0x01, 0x09, 0x50, 0x4f, 0x62, 0x0d, 0xc0, 0x39, 0x7b,
+ 0x45, 0x5d, 0x9f, 0xc0, 0x39, 0xa0, 0x43, 0xeb, 0x7c, 0xc0, 0x39, 0xac,
+ 0x45, 0xdc, 0x1b, 0xc0, 0x39, 0xce, 0xc3, 0x5b, 0xf1, 0x0f, 0xaa, 0x59,
+ 0x47, 0xc6, 0x9c, 0xc0, 0x39, 0xf7, 0x10, 0x40, 0x3a, 0x15, 0xc6, 0x05,
+ 0x33, 0x01, 0x05, 0x69, 0xc2, 0x03, 0x48, 0x0f, 0xa4, 0x7b, 0x00, 0x3a,
+ 0x1f, 0xc4, 0x13, 0x1a, 0x0f, 0xa2, 0xc1, 0xc7, 0xc4, 0x3b, 0x0f, 0xca,
+ 0xe9, 0xc2, 0x00, 0x98, 0x0f, 0xd4, 0x08, 0xc3, 0x14, 0x88, 0x0f, 0xa1,
+ 0x41, 0xd4, 0x3d, 0x7a, 0x01, 0x93, 0xf8, 0xc4, 0x4d, 0x4d, 0x0f, 0xd4,
+ 0xf3, 0x00, 0x3a, 0x2b, 0x0e, 0xc0, 0x3a, 0x31, 0x43, 0x6d, 0x31, 0xc0,
+ 0x3a, 0x43, 0x42, 0x08, 0x2f, 0xc0, 0x3a, 0x5b, 0x06, 0xc0, 0x3a, 0x63,
+ 0x10, 0x40, 0x3a, 0x6f, 0x49, 0xae, 0x62, 0xc0, 0x3a, 0x7b, 0x06, 0xc0,
+ 0x3a, 0x87, 0x42, 0x00, 0x52, 0xc0, 0x3a, 0x91, 0x10, 0xc0, 0x3a, 0x9b,
+ 0x14, 0xc0, 0x3a, 0xad, 0x03, 0xc0, 0x3a, 0xbf, 0x4b, 0x93, 0xd8, 0xc0,
+ 0x3a, 0xcb, 0xc2, 0x01, 0xc2, 0x0f, 0xa6, 0xe9, 0x0e, 0xc0, 0x3a, 0xef,
+ 0xcd, 0x7a, 0x98, 0x00, 0x04, 0xa8, 0x16, 0xc0, 0x3a, 0xfb, 0x17, 0xc0,
+ 0x3b, 0x07, 0x06, 0xc0, 0x3b, 0x1c, 0x10, 0xc0, 0x3b, 0x2a, 0xc3, 0x74,
+ 0x74, 0x0f, 0xaf, 0xf9, 0x11, 0xc0, 0x3b, 0x41, 0x43, 0x01, 0xe9, 0xc0,
+ 0x3b, 0x53, 0xca, 0x40, 0x00, 0x0f, 0xa7, 0x8b, 0x00, 0x3b, 0x5d, 0xca,
+ 0x9d, 0x40, 0x0f, 0x9d, 0x28, 0x16, 0xc0, 0x3b, 0x61, 0x4c, 0x8a, 0x6c,
+ 0xc0, 0x3b, 0x6d, 0x0d, 0xc0, 0x3b, 0x92, 0x15, 0xc0, 0x3b, 0x9e, 0x14,
+ 0xc0, 0x3b, 0xb6, 0x0e, 0xc0, 0x3b, 0xce, 0x12, 0xc0, 0x3b, 0xe0, 0x90,
+ 0x0f, 0xa3, 0x43, 0x00, 0x3b, 0xec, 0x0a, 0xc0, 0x3c, 0x1a, 0xc6, 0xd2,
+ 0xa4, 0x0f, 0xae, 0xb1, 0xc4, 0x61, 0xbe, 0x00, 0x05, 0x79, 0xc5, 0xe0,
+ 0x9e, 0x0f, 0xcd, 0x19, 0x09, 0x40, 0x3c, 0x26, 0x15, 0xc0, 0x3c, 0x36,
+ 0x42, 0x03, 0x12, 0xc0, 0x3c, 0x42, 0x43, 0x1c, 0xd3, 0x40, 0x3c, 0x4c,
+ 0x06, 0xc0, 0x3c, 0x58, 0x47, 0x01, 0xff, 0x40, 0x3c, 0x6a, 0x15, 0xc0,
+ 0x3c, 0xca, 0x0e, 0xc0, 0x3c, 0xdc, 0x50, 0x0f, 0x99, 0xc0, 0x3c, 0xe8,
+ 0x16, 0xc0, 0x3c, 0xf4, 0x4b, 0x6f, 0x71, 0xc0, 0x3d, 0x00, 0x4f, 0x2e,
+ 0x37, 0xc0, 0x3d, 0x41, 0x46, 0x08, 0xd7, 0x40, 0x3d, 0x4b, 0x15, 0xc0,
+ 0x3d, 0x6f, 0x42, 0x01, 0xc4, 0xc0, 0x3d, 0x79, 0x19, 0xc0, 0x3d, 0x85,
+ 0x43, 0x11, 0xb8, 0xc0, 0x3d, 0x9b, 0xc5, 0xdb, 0x94, 0x01, 0x32, 0x33,
+ 0x00, 0x3d, 0xa7, 0x43, 0x5e, 0xd8, 0xc0, 0x3d, 0xad, 0x46, 0xd8, 0xda,
+ 0xc0, 0x3d, 0xb9, 0xc5, 0xde, 0x37, 0x0f, 0xa2, 0xa1, 0xc7, 0xc6, 0xe9,
+ 0x0f, 0xc8, 0x98, 0xcc, 0x8d, 0x3c, 0x0f, 0xc9, 0x11, 0x4a, 0x9f, 0xe8,
+ 0xc0, 0x3d, 0xc9, 0xc2, 0x00, 0x3b, 0x01, 0x15, 0xe3, 0x00, 0x3d, 0xdb,
+ 0x04, 0xc0, 0x3d, 0xe1, 0x0b, 0xc0, 0x3d, 0xed, 0x47, 0x35, 0xf2, 0xc0,
+ 0x3d, 0xf9, 0xd3, 0x45, 0x9b, 0x01, 0x01, 0x79, 0xc8, 0xba, 0x13, 0x0f,
+ 0xa6, 0xd9, 0xca, 0xa4, 0xe8, 0x0f, 0xcf, 0xf8, 0xd6, 0x2f, 0x3f, 0x0f,
+ 0xb5, 0x89, 0x94, 0x01, 0x15, 0xeb, 0x00, 0x3e, 0x05, 0x16, 0xc0, 0x3e,
+ 0x1a, 0x00, 0xc0, 0x3e, 0x2b, 0x42, 0x00, 0xa9, 0xc0, 0x3e, 0x4e, 0xd1,
+ 0x57, 0xeb, 0x01, 0x00, 0x89, 0xc2, 0x00, 0xa7, 0x0f, 0xa2, 0x19, 0xc8,
+ 0xc3, 0x6b, 0x01, 0x56, 0xe8, 0xca, 0x9f, 0x34, 0x0f, 0x0a, 0x79, 0x0e,
+ 0xc0, 0x3e, 0x5a, 0x46, 0x08, 0xd7, 0xc0, 0x3e, 0x66, 0x15, 0xc0, 0x3e,
+ 0x8a, 0x43, 0x0e, 0x70, 0x40, 0x3e, 0x96, 0x44, 0x6f, 0xe4, 0xc0, 0x3e,
+ 0xc0, 0x0f, 0xc0, 0x3e, 0xcc, 0xca, 0x9e, 0x12, 0x0f, 0xa9, 0x49, 0xc2,
+ 0x00, 0x30, 0x00, 0x00, 0x00, 0xca, 0xa0, 0xa6, 0x00, 0x3f, 0xf9, 0x06,
+ 0xc0, 0x3e, 0xd8, 0x0e, 0xc0, 0x3e, 0xea, 0xd0, 0x0f, 0x62, 0x00, 0x3f,
+ 0xc9, 0x43, 0x00, 0xea, 0xc0, 0x3e, 0xfc, 0x47, 0x11, 0xf0, 0xc0, 0x3f,
+ 0x08, 0xd4, 0x3b, 0x22, 0x00, 0x3f, 0xa0, 0x10, 0xc0, 0x3f, 0x14, 0xce,
+ 0x71, 0xf2, 0x0f, 0x98, 0x18, 0x46, 0x04, 0x91, 0xc0, 0x3f, 0x20, 0x44,
+ 0x01, 0xed, 0x40, 0x3f, 0x42, 0x44, 0xe9, 0x6f, 0xc0, 0x3f, 0x64, 0x12,
+ 0xc0, 0x3f, 0x70, 0x00, 0x40, 0x3f, 0x7c, 0xc3, 0x00, 0x57, 0x0f, 0xcc,
+ 0x29, 0xcf, 0x61, 0x0e, 0x01, 0x33, 0x89, 0x94, 0x0f, 0xa2, 0x12, 0x00,
+ 0x3f, 0x8e, 0x89, 0x0f, 0xca, 0xd1, 0x52, 0x4a, 0xfa, 0x40, 0x3f, 0x9b,
+ 0x16, 0xc0, 0x40, 0x17, 0x05, 0xc0, 0x40, 0x21, 0xd1, 0x50, 0x6a, 0x0f,
+ 0xb0, 0x88, 0x15, 0xc0, 0x40, 0x2d, 0x42, 0x00, 0xea, 0xc0, 0x40, 0x37,
+ 0xc9, 0xaf, 0xc1, 0x00, 0x9b, 0x09, 0xc9, 0x11, 0x47, 0x00, 0x9b, 0x11,
+ 0x12, 0xc0, 0x40, 0x41, 0xcd, 0x2d, 0xa6, 0x00, 0x9b, 0x39, 0x46, 0x08,
+ 0xd7, 0xc0, 0x40, 0x4d, 0x47, 0x37, 0x49, 0xc0, 0x40, 0x6b, 0x4b, 0x90,
+ 0x31, 0x40, 0x40, 0x89, 0x07, 0xc0, 0x40, 0xaf, 0x47, 0xcc, 0x37, 0xc0,
+ 0x40, 0xca, 0x88, 0x0f, 0xce, 0xe9, 0x4d, 0x7e, 0x81, 0x40, 0x40, 0xd6,
+ 0x00, 0xc0, 0x41, 0x4f, 0xc6, 0x5f, 0x73, 0x01, 0x33, 0x50, 0xc6, 0x2b,
+ 0x9d, 0x01, 0x38, 0x4b, 0x00, 0x41, 0x5f, 0xca, 0x2f, 0x4b, 0x01, 0x1c,
+ 0x31, 0x42, 0x00, 0x3c, 0xc0, 0x41, 0x65, 0x00, 0xc0, 0x41, 0x71, 0xc5,
+ 0xdb, 0x2b, 0x00, 0x00, 0x28, 0x4b, 0x9c, 0x44, 0xc0, 0x41, 0x83, 0x4b,
+ 0x90, 0x1b, 0xc0, 0x41, 0x8f, 0x48, 0xb9, 0x1b, 0x40, 0x41, 0x9b, 0x42,
+ 0x00, 0x2f, 0xc0, 0x41, 0xa7, 0x0b, 0x40, 0x41, 0xb1, 0x46, 0xd6, 0x70,
+ 0xc0, 0x41, 0xbd, 0xc4, 0x63, 0xad, 0x00, 0x00, 0xd8, 0xcc, 0x88, 0x50,
+ 0x01, 0x08, 0x39, 0x14, 0x40, 0x41, 0xc7, 0x95, 0x0f, 0xa2, 0x01, 0xc7,
+ 0xab, 0x67, 0x0f, 0xa2, 0x98, 0x0b, 0xc0, 0x41, 0xd1, 0x4c, 0x86, 0x04,
+ 0xc0, 0x41, 0xdd, 0x42, 0x00, 0x4d, 0xc0, 0x41, 0xf9, 0x47, 0xcd, 0x79,
+ 0xc0, 0x42, 0x05, 0x47, 0xca, 0xe0, 0x40, 0x42, 0x39, 0xc5, 0xdb, 0x58,
+ 0x0f, 0xcc, 0x69, 0xc4, 0xe6, 0x23, 0x0f, 0x9e, 0x61, 0x03, 0xc0, 0x42,
+ 0x63, 0xc6, 0xd2, 0x74, 0x0f, 0xbb, 0xe9, 0xc5, 0xd3, 0x41, 0x0f, 0xcb,
+ 0xe9, 0xc3, 0x07, 0xc3, 0x0f, 0xd5, 0xf9, 0x4c, 0x8b, 0xc8, 0x40, 0x42,
+ 0x6d, 0x07, 0xc0, 0x42, 0xe1, 0x03, 0xc0, 0x42, 0xf1, 0x0b, 0xc0, 0x43,
+ 0x09, 0x11, 0x40, 0x43, 0x15, 0xc2, 0x00, 0x4d, 0x01, 0x34, 0xcb, 0x00,
+ 0x43, 0x21, 0x0f, 0xc0, 0x43, 0x27, 0x11, 0xc0, 0x43, 0x33, 0xcf, 0x65,
+ 0xbe, 0x01, 0x05, 0x81, 0xc3, 0x74, 0x86, 0x0f, 0xce, 0xf1, 0xc7, 0xcb,
+ 0xff, 0x01, 0x80, 0x98, 0xca, 0xa8, 0x94, 0x01, 0x09, 0xb9, 0x14, 0xc0,
+ 0x43, 0x3f, 0xc9, 0xae, 0xaa, 0x0f, 0xd6, 0xd0, 0xc6, 0xd1, 0x2a, 0x0f,
+ 0x9d, 0x91, 0xc4, 0xc2, 0xf5, 0x0f, 0xce, 0x20, 0x11, 0xc0, 0x43, 0x4c,
+ 0xca, 0xa9, 0x3e, 0x01, 0x4f, 0x31, 0x03, 0x40, 0x43, 0x5e, 0x43, 0x00,
+ 0x55, 0xc0, 0x43, 0x6a, 0xc3, 0x10, 0x64, 0x0f, 0xbb, 0xf9, 0xd0, 0x59,
+ 0xff, 0x01, 0x3e, 0x39, 0xcc, 0x8c, 0x4c, 0x01, 0x31, 0x31, 0x0b, 0xc0,
+ 0x43, 0x76, 0x45, 0x0d, 0x2b, 0x40, 0x43, 0x82, 0xc2, 0x00, 0x2a, 0x0f,
+ 0xcd, 0x31, 0x4b, 0x90, 0x05, 0x40, 0x43, 0x8e, 0x47, 0xc8, 0x39, 0xc0,
+ 0x43, 0xa6, 0x42, 0x00, 0xc0, 0xc0, 0x43, 0xc4, 0x52, 0x28, 0x69, 0xc0,
+ 0x43, 0xd0, 0xc3, 0x00, 0xe4, 0x0f, 0xce, 0x29, 0xd0, 0x5f, 0xcf, 0x0f,
+ 0xd3, 0x88, 0x07, 0xc0, 0x43, 0xd6, 0xc7, 0xcd, 0xc6, 0x01, 0x36, 0x71,
+ 0xc8, 0x14, 0x40, 0x01, 0x30, 0x69, 0x42, 0x00, 0x2c, 0x40, 0x43, 0xee,
+ 0x06, 0xc0, 0x43, 0xfd, 0x47, 0xc5, 0x06, 0xc0, 0x44, 0x07, 0xc3, 0x0d,
+ 0x8f, 0x0f, 0xd6, 0x90, 0x16, 0xc0, 0x44, 0x2d, 0xc8, 0xb8, 0x93, 0x01,
+ 0x09, 0x28, 0x42, 0x00, 0xb2, 0xc0, 0x44, 0x39, 0x16, 0xc0, 0x44, 0x5d,
+ 0xc9, 0xb6, 0x27, 0x0f, 0xbb, 0xa8, 0xd1, 0x56, 0xec, 0x01, 0x1f, 0xf9,
+ 0x46, 0x3b, 0x8e, 0xc0, 0x44, 0x69, 0xda, 0x1c, 0x3e, 0x07, 0xff, 0xe0,
+ 0x0e, 0xc0, 0x44, 0x75, 0xcb, 0x91, 0x5a, 0x0f, 0xcb, 0xa8, 0x44, 0x78,
+ 0xc4, 0xc0, 0x44, 0x84, 0xc4, 0xd1, 0x0e, 0x00, 0x16, 0xd8, 0x46, 0xd9,
+ 0x64, 0xc0, 0x44, 0x9c, 0x44, 0x3a, 0x70, 0x40, 0x44, 0xa8, 0x46, 0xd1,
+ 0x42, 0xc0, 0x44, 0xb4, 0x51, 0x50, 0x8c, 0xc0, 0x44, 0xf7, 0x4a, 0x57,
+ 0x1f, 0x40, 0x45, 0x0f, 0x15, 0xc0, 0x45, 0x27, 0x42, 0x00, 0xb8, 0xc0,
+ 0x45, 0x33, 0x48, 0x10, 0x79, 0xc0, 0x45, 0x3f, 0x45, 0x00, 0xfa, 0xc0,
+ 0x45, 0x4b, 0xd4, 0x3c, 0xda, 0x08, 0xd1, 0x99, 0x47, 0x01, 0xff, 0xc0,
+ 0x45, 0x63, 0x46, 0x33, 0x45, 0x40, 0x45, 0xbf, 0xce, 0x6e, 0x56, 0x01,
+ 0x17, 0xf9, 0x14, 0xc0, 0x45, 0xcb, 0x15, 0xc0, 0x45, 0xdd, 0x45, 0x00,
+ 0xcd, 0xc0, 0x45, 0xe9, 0xca, 0xaa, 0x38, 0x01, 0x4c, 0x11, 0xd6, 0x2d,
+ 0xb3, 0x01, 0x53, 0x20, 0x49, 0xaf, 0xdc, 0xc0, 0x45, 0xf5, 0xc2, 0x04,
+ 0x35, 0x01, 0x5f, 0x11, 0xc8, 0xbf, 0x3b, 0x0f, 0xcc, 0x98, 0x47, 0xce,
+ 0xd7, 0xc0, 0x46, 0x05, 0x47, 0xcd, 0x48, 0xc0, 0x46, 0x35, 0xcc, 0x8e,
+ 0x68, 0x0f, 0x9c, 0x19, 0x94, 0x0f, 0xd6, 0xc8, 0xc2, 0x00, 0x10, 0x01,
+ 0x35, 0xa9, 0xc5, 0xda, 0xd1, 0x01, 0x32, 0x19, 0xc6, 0xd5, 0x32, 0x0f,
+ 0xc9, 0xc8, 0xc6, 0xcf, 0xe6, 0x0f, 0xab, 0xc9, 0xc2, 0x01, 0x5b, 0x01,
+ 0x50, 0xe8, 0xc3, 0x39, 0xe4, 0x0f, 0xa2, 0xb1, 0xc7, 0xc9, 0x89, 0x0f,
+ 0xd5, 0xe0, 0xc9, 0x4c, 0x89, 0x01, 0x33, 0x49, 0x42, 0x0a, 0x49, 0xc0,
+ 0x46, 0x65, 0xd9, 0x20, 0x00, 0x01, 0x50, 0xb1, 0xc3, 0x26, 0xf7, 0x0f,
+ 0xbb, 0xf0, 0xcb, 0x59, 0xc4, 0x01, 0x12, 0xf9, 0x00, 0x40, 0x46, 0x71,
+ 0xc6, 0xd0, 0xf4, 0x01, 0x31, 0x79, 0x00, 0x40, 0x46, 0x7d, 0x45, 0xdb,
+ 0x7b, 0xc0, 0x46, 0x89, 0xca, 0xa5, 0xba, 0x0f, 0xa4, 0xd9, 0xc6, 0x07,
+ 0x8a, 0x00, 0x05, 0x28, 0x4f, 0x6a, 0x5f, 0x40, 0x46, 0x9b, 0xc2, 0x00,
+ 0xff, 0x01, 0x16, 0x09, 0xc3, 0x00, 0x3a, 0x01, 0x16, 0x00, 0xc8, 0x69,
+ 0xb2, 0x01, 0x10, 0x89, 0x46, 0x0c, 0x9d, 0x40, 0x46, 0xa7, 0xc8, 0x27,
+ 0x35, 0x01, 0x10, 0x81, 0x47, 0x1f, 0x71, 0x40, 0x46, 0xb3, 0x42, 0x01,
+ 0xa1, 0xc0, 0x46, 0xc5, 0xc8, 0xc1, 0x9b, 0x0f, 0xcb, 0x59, 0xc2, 0x09,
+ 0x06, 0x0f, 0xb7, 0xb1, 0x50, 0x5a, 0x1f, 0xc0, 0x46, 0xd0, 0x06, 0x40,
+ 0x47, 0x64, 0xc8, 0xbb, 0xfb, 0x01, 0x36, 0x81, 0x07, 0xc0, 0x47, 0x6e,
+ 0x42, 0x00, 0x3c, 0xc0, 0x47, 0x7b, 0x11, 0xc0, 0x47, 0x8a, 0x12, 0xc0,
+ 0x47, 0x94, 0x14, 0xc0, 0x47, 0xa0, 0x4b, 0x8e, 0x75, 0x40, 0x47, 0xac,
+ 0xc6, 0xd4, 0xa8, 0x01, 0x32, 0x89, 0xc6, 0xd8, 0xaa, 0x01, 0x71, 0xf8,
+ 0xc5, 0xd2, 0x6f, 0x01, 0x31, 0x21, 0xc5, 0xe0, 0xcb, 0x01, 0x08, 0x30,
+ 0xc9, 0x07, 0x87, 0x01, 0x31, 0x09, 0x50, 0x5b, 0xdf, 0x40, 0x48, 0x2a,
+ 0xc3, 0x00, 0x99, 0x0f, 0xa7, 0xbb, 0x00, 0x48, 0x36, 0xc4, 0x2b, 0x0a,
+ 0x0f, 0x9e, 0xa8, 0xc5, 0x6f, 0x5b, 0x0f, 0xa6, 0x29, 0xc9, 0xad, 0xc9,
+ 0x0f, 0xc8, 0xc8, 0xc5, 0x10, 0x15, 0x0f, 0xa1, 0x8a, 0x00, 0x48, 0x3c,
+ 0x44, 0xe6, 0x4f, 0xc0, 0x48, 0x42, 0x43, 0xed, 0x0e, 0x40, 0x48, 0x4e,
+ 0x14, 0xc0, 0x48, 0x56, 0x05, 0xc0, 0x48, 0x60, 0x15, 0xc0, 0x48, 0x74,
+ 0x12, 0xc0, 0x48, 0x98, 0x04, 0xc0, 0x48, 0xa4, 0x16, 0xc0, 0x48, 0xba,
+ 0x46, 0xa4, 0x6b, 0xc0, 0x48, 0xd2, 0x06, 0xc0, 0x48, 0xde, 0x0e, 0xc0,
+ 0x48, 0xf0, 0x0a, 0xc0, 0x48, 0xfc, 0x0f, 0xc0, 0x49, 0x0e, 0x19, 0xc0,
+ 0x49, 0x16, 0x08, 0xc0, 0x49, 0x20, 0xc4, 0xe5, 0xbf, 0x01, 0x75, 0x49,
+ 0x07, 0xc0, 0x49, 0x2c, 0x4d, 0x77, 0xb3, 0xc0, 0x49, 0x3e, 0x44, 0xe9,
+ 0xe7, 0xc0, 0x49, 0x74, 0xc3, 0x1a, 0xb6, 0x01, 0x75, 0xc9, 0x09, 0xc0,
+ 0x49, 0x84, 0x4d, 0x80, 0x89, 0x40, 0x49, 0x90, 0x96, 0x01, 0x8e, 0x03,
+ 0x00, 0x49, 0xa0, 0xc2, 0x0f, 0x3e, 0x01, 0x8e, 0x09, 0xc2, 0xec, 0x0a,
+ 0x01, 0x8e, 0x11, 0xc3, 0xec, 0x09, 0x01, 0x8e, 0x19, 0x95, 0x01, 0x8e,
+ 0x8b, 0x00, 0x49, 0xa4, 0x8a, 0x01, 0x8e, 0x83, 0x00, 0x49, 0xbe, 0x90,
+ 0x01, 0x8e, 0x79, 0x92, 0x01, 0x8e, 0x93, 0x00, 0x49, 0xd6, 0x86, 0x01,
+ 0x8e, 0xa1, 0x93, 0x01, 0x8f, 0x18, 0x42, 0x00, 0xbb, 0xc0, 0x49, 0xe2,
+ 0x07, 0xc0, 0x49, 0xf1, 0x14, 0xc0, 0x49, 0xfd, 0xcb, 0x92, 0xa4, 0x0f,
+ 0x9e, 0x09, 0xc5, 0xdd, 0xd3, 0x0f, 0x99, 0x80, 0x0b, 0xc0, 0x4a, 0x07,
+ 0x14, 0xc0, 0x4a, 0x11, 0x44, 0xe5, 0x37, 0xc0, 0x4a, 0x1d, 0x42, 0x01,
+ 0x0b, 0x40, 0x4a, 0x47, 0xc3, 0x09, 0xe7, 0x01, 0x35, 0xb9, 0xc4, 0x7d,
+ 0x38, 0x01, 0x31, 0x39, 0xc5, 0xda, 0xef, 0x0f, 0xa1, 0xf9, 0xc4, 0xe7,
+ 0x33, 0x0f, 0xa0, 0xa1, 0xc2, 0x19, 0x3e, 0x0f, 0xce, 0x92, 0x00, 0x4a,
+ 0x65, 0x48, 0xbf, 0x0b, 0xc0, 0x4a, 0x6b, 0xca, 0x9e, 0xe4, 0x0f, 0x9b,
+ 0x59, 0xc7, 0xcb, 0x5e, 0x0f, 0xcb, 0x10, 0xc3, 0x1c, 0xd2, 0x0f, 0xd3,
+ 0xe1, 0xca, 0xa5, 0x24, 0x01, 0x05, 0x10, 0x44, 0x01, 0x5e, 0xc0, 0x4a,
+ 0x77, 0xc9, 0xab, 0x0b, 0x0f, 0xa9, 0x70, 0x42, 0x03, 0x48, 0xc0, 0x4a,
+ 0x83, 0xc2, 0x00, 0x6a, 0x0f, 0xa2, 0x89, 0xc6, 0xd5, 0x56, 0x0f, 0xa0,
+ 0x51, 0xc6, 0xd9, 0x04, 0x0f, 0xca, 0x80, 0xc8, 0xbc, 0x3b, 0x0f, 0xa5,
+ 0x99, 0xca, 0x3e, 0xbd, 0x0f, 0x98, 0xc8, 0xcd, 0x78, 0xf8, 0x0f, 0x9e,
+ 0x78, 0xc4, 0xdc, 0x87, 0x0f, 0xcb, 0x29, 0x0d, 0x40, 0x4a, 0x93, 0x47,
+ 0x10, 0xa4, 0xc0, 0x4a, 0x9f, 0xc2, 0x00, 0xdd, 0x01, 0x30, 0x21, 0x12,
+ 0xc0, 0x4b, 0x0d, 0x0f, 0x40, 0x4b, 0x25, 0x42, 0x00, 0xb5, 0xc0, 0x4b,
+ 0x2f, 0xce, 0x6d, 0xf4, 0x0f, 0xa4, 0x89, 0xcb, 0x97, 0xd7, 0x0f, 0xb6,
+ 0x58, 0xc8, 0xbd, 0xe3, 0x01, 0x30, 0x61, 0x16, 0xc0, 0x4b, 0x3b, 0xca,
+ 0xa0, 0xd8, 0x01, 0x19, 0x91, 0x4a, 0x9d, 0x90, 0xc0, 0x4b, 0x53, 0xce,
+ 0x74, 0x84, 0x0f, 0x9f, 0x51, 0x08, 0xc0, 0x4b, 0x5f, 0xd5, 0x35, 0x1d,
+ 0x01, 0x53, 0x68, 0xcb, 0x92, 0x4c, 0x01, 0x12, 0xc1, 0xc2, 0x00, 0x2f,
+ 0x0f, 0xd5, 0xc1, 0xd2, 0x4e, 0x7e, 0x01, 0x72, 0x78, 0xc2, 0x00, 0xe5,
+ 0x00, 0x01, 0xd3, 0x00, 0x4b, 0x71, 0xcd, 0x7d, 0x56, 0x0f, 0xa5, 0x28,
+ 0x0b, 0xc0, 0x4b, 0x75, 0xc7, 0xcf, 0x6a, 0x0f, 0x9a, 0xd0, 0xc5, 0x10,
+ 0x15, 0x0f, 0xa1, 0x70, 0x1b, 0xc0, 0x4b, 0x7f, 0x44, 0x1c, 0x80, 0x40,
+ 0x4b, 0x8b, 0x46, 0x88, 0x0a, 0xc0, 0x4b, 0xa9, 0xc6, 0xd9, 0x10, 0x0f,
+ 0xa6, 0x58, 0xc7, 0x71, 0xdd, 0x0f, 0xc9, 0x09, 0x42, 0x00, 0xa7, 0xc0,
+ 0x4b, 0xb5, 0x42, 0x00, 0xbb, 0xc0, 0x4b, 0xc1, 0xc2, 0x03, 0xbd, 0x01,
+ 0x30, 0x0a, 0x00, 0x4b, 0xcd, 0xd3, 0x3f, 0xf7, 0x0f, 0xac, 0x09, 0x42,
+ 0x05, 0x08, 0xc0, 0x4b, 0xd3, 0xcf, 0x61, 0x4a, 0x0f, 0x9e, 0xd8, 0x42,
+ 0x00, 0x58, 0xc0, 0x4b, 0xdf, 0x17, 0x40, 0x4b, 0xe9, 0xc8, 0xc0, 0x2b,
+ 0x0f, 0x98, 0x30, 0xc3, 0xec, 0xd5, 0x0f, 0xb6, 0x19, 0xc3, 0x00, 0x4d,
+ 0x0f, 0x9b, 0x70, 0x45, 0x02, 0xcb, 0xc0, 0x4b, 0xfb, 0x51, 0x51, 0x36,
+ 0xc0, 0x4c, 0x4b, 0x4d, 0x7b, 0x75, 0x40, 0x4c, 0x5d, 0x0e, 0xc0, 0x4c,
+ 0x77, 0xe0, 0x09, 0x27, 0x01, 0x3b, 0x09, 0x14, 0x40, 0x4c, 0x83, 0x00,
+ 0xc0, 0x4c, 0x8f, 0xc3, 0x2e, 0xd0, 0x01, 0x5f, 0x01, 0xc4, 0x28, 0x52,
+ 0x0f, 0xce, 0x08, 0x42, 0x01, 0xa5, 0xc0, 0x4c, 0x9b, 0xc5, 0x02, 0xca,
+ 0x00, 0x05, 0x10, 0xc5, 0x02, 0xca, 0x01, 0x05, 0xa9, 0xc3, 0x13, 0x58,
+ 0x00, 0x05, 0xc0, 0x15, 0xc0, 0x4c, 0xa7, 0x47, 0x01, 0xff, 0xc0, 0x4c,
+ 0xb3, 0x49, 0x01, 0xf7, 0xc0, 0x4c, 0xc5, 0xce, 0x70, 0x24, 0x00, 0x24,
+ 0x11, 0xc6, 0x4c, 0x56, 0x05, 0x33, 0xf1, 0xc7, 0xce, 0x2f, 0x05, 0x33,
+ 0xf8, 0x50, 0x5f, 0xaf, 0xc0, 0x4c, 0xd1, 0x4d, 0x7e, 0xcf, 0x40, 0x4c,
+ 0xdf, 0xce, 0x6e, 0xf0, 0x00, 0x04, 0x99, 0xc5, 0x1c, 0xa1, 0x01, 0x10,
+ 0xb0, 0x49, 0xad, 0x54, 0x40, 0x4d, 0x23, 0x8e, 0x0f, 0xcd, 0x69, 0x96,
+ 0x0f, 0xa5, 0xd0, 0xcb, 0x92, 0x83, 0x01, 0x35, 0xe1, 0xc7, 0xb7, 0x2e,
+ 0x07, 0xf2, 0x28, 0xc7, 0xc5, 0x14, 0x01, 0x35, 0xd1, 0x06, 0xc0, 0x4d,
+ 0x47, 0xc5, 0x34, 0x9a, 0x00, 0x01, 0xd8, 0x16, 0xc0, 0x4d, 0x4d, 0xcf,
+ 0x68, 0x7f, 0x0f, 0xca, 0x40, 0xc9, 0xad, 0xd2, 0x01, 0x09, 0x01, 0x45,
+ 0x29, 0xe6, 0x40, 0x4d, 0x59, 0x42, 0x00, 0x57, 0xc0, 0x4d, 0x5f, 0xc6,
+ 0xd2, 0x50, 0x01, 0x11, 0xf9, 0x45, 0xde, 0x19, 0x40, 0x4d, 0x6b, 0x48,
+ 0xbf, 0xc3, 0xc0, 0x4d, 0x87, 0xcd, 0x80, 0x48, 0x0f, 0xc8, 0xc0, 0x42,
+ 0x01, 0x58, 0xc0, 0x4d, 0xd9, 0xd5, 0x36, 0xeb, 0x01, 0x39, 0xd0, 0x45,
+ 0xdc, 0xc0, 0xc0, 0x4d, 0xe5, 0x46, 0x3e, 0xf9, 0x40, 0x4e, 0x05, 0xcd,
+ 0x7b, 0x68, 0x01, 0x53, 0x61, 0x43, 0x07, 0x52, 0xc0, 0x4e, 0x11, 0x46,
+ 0x03, 0x50, 0x40, 0x4e, 0x1d, 0xc8, 0xc3, 0x63, 0x0f, 0xd3, 0xd1, 0x42,
+ 0x01, 0x02, 0xc0, 0x4e, 0x29, 0xd3, 0x46, 0x20, 0x01, 0x71, 0xe0, 0x16,
+ 0xc0, 0x4e, 0x35, 0x14, 0xc0, 0x4e, 0x41, 0x46, 0xd6, 0xa6, 0xc0, 0x4e,
+ 0x4b, 0xcd, 0x2b, 0x96, 0x0f, 0xac, 0x19, 0xc4, 0x01, 0x08, 0x0f, 0x9e,
+ 0xf9, 0xcc, 0x86, 0x4c, 0x0f, 0xce, 0x68, 0xd7, 0x28, 0xc4, 0x01, 0x39,
+ 0x49, 0x03, 0xc0, 0x4e, 0x57, 0x0b, 0x40, 0x4e, 0x63, 0xc6, 0xd3, 0xee,
+ 0x01, 0x1f, 0x89, 0xc8, 0xc0, 0xdb, 0x0f, 0xaf, 0x00, 0xce, 0x71, 0x82,
+ 0x0f, 0x9c, 0xc9, 0xc2, 0x07, 0x44, 0x0f, 0xb6, 0x99, 0xce, 0x76, 0x0c,
+ 0x0f, 0xca, 0xc8, 0x00, 0x40, 0x4e, 0x6f, 0x16, 0xc0, 0x4e, 0x7b, 0xca,
+ 0x88, 0x82, 0x0f, 0xd7, 0x08, 0xc4, 0xe8, 0x7f, 0x0f, 0xcc, 0xa9, 0x47,
+ 0xcf, 0x94, 0x40, 0x4e, 0x87, 0x48, 0x11, 0x17, 0xc0, 0x4e, 0xa3, 0xc5,
+ 0xe0, 0x53, 0x0f, 0xcb, 0x50, 0xc3, 0x02, 0x9f, 0x01, 0x32, 0x21, 0xc6,
+ 0xcf, 0xfe, 0x0f, 0xb7, 0x82, 0x00, 0x4e, 0xaf, 0x4c, 0x11, 0x33, 0xc0,
+ 0x4e, 0xb5, 0xd1, 0x4a, 0x46, 0x00, 0x41, 0xb1, 0x0f, 0xc0, 0x4e, 0xdf,
+ 0x4b, 0x6f, 0x71, 0xc0, 0x4e, 0xeb, 0x47, 0x01, 0xff, 0x40, 0x4f, 0x0f,
+ 0xc4, 0xe4, 0xfb, 0x0f, 0xcd, 0xd1, 0xc3, 0x0c, 0x82, 0x0f, 0xcf, 0xb8,
+ 0xc2, 0x1d, 0xe2, 0x0f, 0xcd, 0x41, 0xc2, 0x00, 0x30, 0x0f, 0xa4, 0x02,
+ 0x00, 0x4f, 0x67, 0xc2, 0x00, 0x2a, 0x01, 0x37, 0xb9, 0xcd, 0x79, 0x2c,
+ 0x0f, 0x9d, 0xf8, 0x16, 0xc0, 0x4f, 0x6d, 0x12, 0x40, 0x4f, 0x77, 0x86,
+ 0x0f, 0xb7, 0xb9, 0xca, 0xa1, 0x14, 0x0f, 0xab, 0xa9, 0x42, 0x06, 0x97,
+ 0x40, 0x4f, 0x81, 0x46, 0x72, 0xaa, 0xc0, 0x4f, 0x8d, 0xcb, 0x98, 0x24,
+ 0x0f, 0x9a, 0xa8, 0x45, 0x04, 0x8c, 0xc0, 0x4f, 0x99, 0xce, 0x6f, 0x98,
+ 0x05, 0x33, 0x98, 0xc3, 0x16, 0x72, 0x0f, 0xcc, 0x81, 0xc2, 0x07, 0x27,
+ 0x0f, 0xc9, 0xb8, 0x14, 0xc0, 0x4f, 0xa5, 0x4c, 0x01, 0x2b, 0xc0, 0x4f,
+ 0xaf, 0xc5, 0xe1, 0x0c, 0x01, 0x30, 0xc1, 0x18, 0xc0, 0x4f, 0xc1, 0xd0,
+ 0x5c, 0xdf, 0x0f, 0xca, 0xc1, 0xc9, 0xa3, 0xbd, 0x0f, 0xd7, 0x40, 0xc3,
+ 0x00, 0xc9, 0x0f, 0xb5, 0xf9, 0x42, 0x00, 0x2b, 0xc0, 0x4f, 0xcd, 0xd0,
+ 0x5a, 0xef, 0x01, 0x1b, 0xe9, 0xca, 0xa2, 0x22, 0x0f, 0x99, 0x01, 0x46,
+ 0x2b, 0x09, 0xc0, 0x4f, 0xe1, 0xdd, 0x10, 0x11, 0x0f, 0xc9, 0x78, 0xca,
+ 0xa8, 0x30, 0x01, 0x37, 0x49, 0x43, 0x00, 0xcf, 0xc0, 0x4f, 0xed, 0x92,
+ 0x0f, 0xb5, 0x11, 0xc3, 0x1e, 0x70, 0x0f, 0xb7, 0x08, 0x43, 0xc4, 0xd0,
+ 0xc0, 0x4f, 0xf9, 0xc4, 0x1f, 0xff, 0x0f, 0xb7, 0xa0, 0xc3, 0x09, 0xde,
+ 0x01, 0x34, 0xb1, 0xc2, 0x15, 0xea, 0x0f, 0xcf, 0x18, 0x44, 0x08, 0x31,
+ 0xc0, 0x50, 0x05, 0xc4, 0x45, 0xff, 0x01, 0x08, 0x41, 0x07, 0xc0, 0x50,
+ 0x17, 0xc3, 0x1b, 0xba, 0x0f, 0xa6, 0xe0, 0xc8, 0xbd, 0x2b, 0x0f, 0x9c,
+ 0x90, 0xc5, 0x27, 0x94, 0x01, 0x3a, 0x21, 0xc3, 0x14, 0xe9, 0x01, 0x30,
+ 0x1b, 0x00, 0x50, 0x23, 0xd0, 0x59, 0xcf, 0x0f, 0x9e, 0xa1, 0xc7, 0xcb,
+ 0xea, 0x0f, 0x9e, 0x10, 0xc2, 0x00, 0x95, 0x0f, 0xa0, 0x61, 0xc2, 0x00,
+ 0xdc, 0x0f, 0xa0, 0x68, 0x43, 0x01, 0xae, 0xc0, 0x50, 0x29, 0xd6, 0x2e,
+ 0xa5, 0x01, 0x08, 0xb8, 0xd6, 0x1e, 0xa5, 0x0f, 0xb3, 0x53, 0x00, 0x50,
+ 0x35, 0xc2, 0x04, 0x35, 0x00, 0x01, 0x7a, 0x00, 0x50, 0x3b, 0x4c, 0x8c,
+ 0xf4, 0xc0, 0x50, 0x41, 0xdb, 0x16, 0xa8, 0x08, 0xd5, 0x03, 0x00, 0x50,
+ 0x4d, 0x45, 0x00, 0xfa, 0xc0, 0x50, 0x53, 0x15, 0xc0, 0x50, 0x6b, 0xcf,
+ 0x64, 0x0b, 0x08, 0xd4, 0xc1, 0x55, 0x35, 0x71, 0xc0, 0x50, 0x77, 0x57,
+ 0x27, 0x99, 0xc0, 0x50, 0xa7, 0x47, 0x01, 0xff, 0xc0, 0x50, 0xb7, 0x46,
+ 0x33, 0x45, 0x40, 0x51, 0x11, 0xc8, 0xc3, 0x8b, 0x01, 0x35, 0xe9, 0xc2,
+ 0x02, 0xe1, 0x0f, 0xcf, 0x30, 0xd4, 0x3f, 0x5a, 0x01, 0x1c, 0xa1, 0x00,
+ 0xc0, 0x51, 0x1d, 0xc4, 0x18, 0x26, 0x0f, 0xca, 0x70, 0x46, 0x08, 0xd7,
+ 0xc0, 0x51, 0x2f, 0x47, 0x01, 0xff, 0x40, 0x51, 0x53, 0x4c, 0x11, 0x33,
+ 0xc0, 0x51, 0xcd, 0x47, 0x37, 0x49, 0xc0, 0x51, 0xdf, 0x4a, 0x57, 0x1f,
+ 0xc0, 0x51, 0xec, 0xd0, 0x5b, 0x6f, 0x08, 0x7a, 0x29, 0x47, 0x01, 0xff,
+ 0x40, 0x52, 0x16, 0x42, 0x01, 0xa5, 0xc0, 0x52, 0x73, 0xd8, 0x25, 0xdc,
+ 0x01, 0x3d, 0x38, 0x48, 0x1a, 0x0e, 0xc0, 0x52, 0x7d, 0xc5, 0xe0, 0xe4,
+ 0x01, 0x19, 0x78, 0xc6, 0xd6, 0xee, 0x0f, 0xaa, 0x69, 0xcd, 0x68, 0x9f,
+ 0x00, 0x00, 0xb0, 0x43, 0x61, 0x15, 0xc0, 0x52, 0xd1, 0xc3, 0x02, 0xfa,
+ 0x0f, 0xa4, 0x48, 0x47, 0x01, 0xff, 0xc0, 0x53, 0x29, 0x45, 0x02, 0xcb,
+ 0xc0, 0x53, 0x7f, 0x4b, 0x6f, 0x71, 0xc0, 0x53, 0x8f, 0x4a, 0xa5, 0x56,
+ 0x40, 0x53, 0xa5, 0x07, 0xc0, 0x53, 0xbd, 0xca, 0x9d, 0x22, 0x01, 0x05,
+ 0xb9, 0x03, 0x40, 0x53, 0xc9, 0x43, 0x06, 0xe5, 0xc0, 0x53, 0xd3, 0xc6,
+ 0xd5, 0x26, 0x0f, 0x9a, 0xe9, 0xc2, 0x01, 0xa1, 0x00, 0x01, 0x00, 0x49,
+ 0x70, 0xd1, 0x40, 0x53, 0xe0, 0x44, 0x00, 0x9a, 0xc0, 0x53, 0xec, 0xc3,
+ 0x07, 0xc6, 0x0f, 0xab, 0xba, 0x00, 0x53, 0xfe, 0xc9, 0xb6, 0xed, 0x0f,
+ 0x9e, 0x29, 0xcb, 0x9a, 0x08, 0x0f, 0xa1, 0x99, 0x11, 0xc0, 0x54, 0x04,
+ 0xc3, 0x02, 0xfa, 0x0f, 0xcf, 0xe8, 0x15, 0xc0, 0x54, 0x0e, 0xc4, 0x53,
+ 0xa8, 0x0f, 0xcd, 0xc1, 0xc7, 0xc4, 0x81, 0x0f, 0xcd, 0xc8, 0x00, 0xc0,
+ 0x54, 0x1a, 0x47, 0xc7, 0xa6, 0xc0, 0x54, 0x26, 0xc6, 0x92, 0xf6, 0x0f,
+ 0x99, 0xd9, 0xc4, 0xad, 0x8f, 0x0f, 0x98, 0x2b, 0x00, 0x54, 0x50, 0xd2,
+ 0x49, 0x38, 0x0f, 0x98, 0x38, 0xc6, 0x07, 0xba, 0x01, 0x1d, 0x99, 0xc3,
+ 0x02, 0x47, 0x01, 0x1d, 0x91, 0xcd, 0x80, 0xf1, 0x01, 0x50, 0x58, 0x00,
+ 0x40, 0x54, 0x56, 0x43, 0x00, 0xdd, 0xc0, 0x54, 0x6e, 0x46, 0x08, 0x2f,
+ 0xc0, 0x54, 0x83, 0xc6, 0xb6, 0x60, 0x00, 0x00, 0xd0, 0xcc, 0x85, 0xd4,
+ 0x01, 0x11, 0x79, 0xc2, 0x00, 0x2a, 0x0f, 0x9e, 0x20, 0xc2, 0x00, 0x0a,
+ 0x0f, 0x9b, 0x19, 0xcf, 0x61, 0xfe, 0x0f, 0xb4, 0xf8, 0x0e, 0xc0, 0x54,
+ 0xbd, 0xca, 0xa3, 0xbc, 0x0f, 0xb0, 0x78, 0x42, 0x00, 0x30, 0xc0, 0x54,
+ 0xc7, 0xca, 0x4b, 0x14, 0x01, 0x51, 0x98, 0xd5, 0x32, 0x14, 0x0f, 0xb3,
+ 0xa9, 0x90, 0x0f, 0xcd, 0x10, 0x42, 0x06, 0x8c, 0xc0, 0x54, 0xd4, 0x10,
+ 0xc0, 0x54, 0xe0, 0xc2, 0x01, 0x03, 0x01, 0x01, 0x90, 0xc9, 0xb0, 0x2d,
+ 0x0f, 0xcd, 0x79, 0xc7, 0xca, 0xf5, 0x01, 0x18, 0x29, 0x12, 0xc0, 0x54,
+ 0xed, 0xc7, 0xc4, 0xce, 0x01, 0x5e, 0xc1, 0xcc, 0x8b, 0xb0, 0x0f, 0xb6,
+ 0x38, 0xca, 0xa8, 0xd0, 0x01, 0x1c, 0xb9, 0xc5, 0xbf, 0x4e, 0x01, 0x13,
+ 0xd3, 0x00, 0x54, 0xfc, 0x15, 0xc0, 0x55, 0x00, 0x46, 0xd5, 0x0e, 0xc0,
+ 0x55, 0x0c, 0xc4, 0xe4, 0x5f, 0x0f, 0xcb, 0x40, 0x05, 0xc0, 0x55, 0x1e,
+ 0xcc, 0x86, 0x58, 0x01, 0x08, 0x73, 0x00, 0x55, 0x2a, 0x1b, 0x40, 0x55,
+ 0x30, 0xc2, 0x01, 0x89, 0x01, 0x32, 0x3b, 0x00, 0x55, 0x3c, 0xc6, 0x0e,
+ 0xc1, 0x00, 0x01, 0x5b, 0x00, 0x55, 0x42, 0xcc, 0x86, 0x1c, 0x00, 0x05,
+ 0x89, 0xc4, 0x02, 0xfa, 0x0f, 0xd5, 0x00, 0x42, 0x11, 0x3f, 0xc0, 0x55,
+ 0x46, 0xca, 0x0f, 0x53, 0x01, 0x39, 0x79, 0x07, 0xc0, 0x55, 0x52, 0xc7,
+ 0x22, 0xce, 0x0f, 0xd3, 0xb9, 0xc3, 0x13, 0x33, 0x0f, 0xd4, 0x28, 0xc8,
+ 0xc1, 0xcb, 0x0f, 0xb7, 0xd8, 0xc3, 0x97, 0x83, 0x01, 0x32, 0x99, 0xc3,
+ 0x70, 0xa5, 0x0f, 0xa9, 0x58, 0xcd, 0x79, 0xc8, 0x01, 0x56, 0xd0, 0xc8,
+ 0xbf, 0x2b, 0x0f, 0xa5, 0x49, 0x8e, 0x0f, 0xa4, 0x51, 0xc9, 0x95, 0xc9,
+ 0x00, 0x05, 0xb0, 0x00, 0x40, 0x55, 0x5e, 0xcc, 0x88, 0x8c, 0x0f, 0xb6,
+ 0x11, 0x47, 0xcb, 0x6c, 0xc0, 0x55, 0x6a, 0x07, 0x40, 0x55, 0x76, 0x87,
+ 0x0f, 0xae, 0x7b, 0x00, 0x55, 0x82, 0xc3, 0x73, 0xb1, 0x0f, 0xb6, 0xa0,
+ 0x16, 0xc0, 0x55, 0x8e, 0x4b, 0x8f, 0xfa, 0xc0, 0x55, 0xa6, 0x03, 0xc0,
+ 0x55, 0xca, 0xc3, 0x29, 0x21, 0x0f, 0xcc, 0xe0, 0xcc, 0x26, 0x0c, 0x08,
+ 0xd7, 0xab, 0x00, 0x55, 0xdc, 0x0e, 0xc0, 0x55, 0xe0, 0xce, 0x76, 0x44,
+ 0x08, 0xd7, 0x7b, 0x00, 0x55, 0xef, 0x47, 0xcb, 0x8f, 0xc0, 0x55, 0xf3,
+ 0xcb, 0x5c, 0x8f, 0x08, 0xd7, 0x32, 0x00, 0x56, 0x05, 0xc3, 0x02, 0xe3,
+ 0x01, 0x35, 0xa1, 0x0f, 0x40, 0x56, 0x09, 0x05, 0xc0, 0x56, 0x19, 0x45,
+ 0x02, 0xcb, 0xc0, 0x56, 0x25, 0x47, 0x37, 0x49, 0xc0, 0x56, 0x5d, 0x46,
+ 0x08, 0xd7, 0xc0, 0x56, 0x6d, 0x49, 0xb5, 0x6a, 0xc0, 0x56, 0x91, 0x47,
+ 0xc4, 0x6c, 0x40, 0x56, 0xa3, 0xc7, 0xc7, 0x4b, 0x0f, 0xa1, 0xe1, 0xc5,
+ 0xdf, 0x63, 0x0f, 0xca, 0xf0, 0x03, 0xc0, 0x56, 0xbb, 0xc8, 0x5c, 0xe7,
+ 0x0f, 0x9b, 0x91, 0xc9, 0xb1, 0x68, 0x0f, 0xd5, 0xa0, 0x45, 0x01, 0x5d,
+ 0xc0, 0x56, 0xc7, 0xc8, 0xb9, 0x4b, 0x0f, 0x9a, 0xb9, 0xc7, 0x47, 0x49,
+ 0x00, 0x05, 0x19, 0xcb, 0x92, 0x36, 0x0f, 0xd6, 0xb9, 0xc2, 0x11, 0x3f,
+ 0x0f, 0xa2, 0xe8, 0x15, 0xc0, 0x56, 0xd3, 0x11, 0x40, 0x56, 0xdf, 0xcf,
+ 0x60, 0x40, 0x01, 0x18, 0xb1, 0x16, 0xc0, 0x56, 0xeb, 0xc5, 0xd9, 0xff,
+ 0x01, 0x5f, 0x38, 0x4d, 0x78, 0x9d, 0xc0, 0x56, 0xf7, 0xc4, 0x12, 0x87,
+ 0x0f, 0x9b, 0xf8, 0xc3, 0x5f, 0x11, 0x0f, 0xb4, 0x9b, 0x00, 0x57, 0x03,
+ 0xc7, 0xc6, 0x72, 0x0f, 0xa3, 0x70, 0xca, 0x82, 0x8e, 0x01, 0x3e, 0x13,
+ 0x00, 0x57, 0x09, 0x15, 0xc0, 0x57, 0x0f, 0xd1, 0x55, 0xdc, 0x01, 0x33,
+ 0xf1, 0x00, 0xc0, 0x57, 0x21, 0xcc, 0x83, 0xc4, 0x0f, 0x9d, 0x69, 0xc9,
+ 0x97, 0x13, 0x00, 0x01, 0x28, 0xc3, 0xb5, 0xed, 0x01, 0x38, 0x79, 0xc6,
+ 0x11, 0x84, 0x01, 0x37, 0x21, 0xd6, 0x2b, 0x8d, 0x0f, 0xac, 0x31, 0xc9,
+ 0xb8, 0x3a, 0x0f, 0xb0, 0xa1, 0xc4, 0xe6, 0x5b, 0x0f, 0xa1, 0x38, 0x05,
+ 0xc0, 0x57, 0x33, 0x94, 0x0f, 0x9a, 0x81, 0xc4, 0xe8, 0x63, 0x0f, 0xca,
+ 0xe1, 0xc4, 0xe8, 0x4b, 0x0f, 0xd6, 0xd8, 0xc6, 0x9d, 0x26, 0x01, 0x05,
+ 0x89, 0xc8, 0xbb, 0xeb, 0x01, 0x05, 0x38, 0xcb, 0x97, 0xc1, 0x01, 0x00,
+ 0x41, 0xcf, 0x63, 0xc0, 0x01, 0x72, 0x70, 0xc9, 0xae, 0xfb, 0x0f, 0xa4,
+ 0xe1, 0xc2, 0x00, 0xa7, 0x0f, 0xa2, 0xd8, 0x16, 0xc0, 0x57, 0x43, 0xc3,
+ 0x05, 0x17, 0x08, 0x5d, 0x4b, 0x00, 0x57, 0x53, 0xc4, 0x08, 0xdd, 0x08,
+ 0x5d, 0x60, 0xc3, 0x05, 0xe3, 0x08, 0x5c, 0xe1, 0xc5, 0x0c, 0xa3, 0x08,
+ 0x5c, 0xd8, 0xc3, 0x5f, 0x3d, 0x08, 0x5c, 0x89, 0x15, 0xc0, 0x57, 0x59,
+ 0xc2, 0x03, 0x07, 0x08, 0x5c, 0x71, 0xc3, 0x21, 0x00, 0x08, 0x5c, 0x61,
+ 0xc8, 0xbc, 0xb3, 0x08, 0x5c, 0x59, 0xc6, 0xd7, 0x12, 0x08, 0x5c, 0x51,
+ 0xc4, 0xe5, 0x53, 0x08, 0x5c, 0x49, 0xc4, 0x4d, 0x48, 0x08, 0x5c, 0x41,
+ 0xc2, 0x00, 0x5b, 0x08, 0x5c, 0x23, 0x00, 0x57, 0x63, 0xc5, 0x4d, 0x42,
+ 0x08, 0x5c, 0x31, 0xcd, 0x7c, 0xad, 0x08, 0x5c, 0x29, 0xc6, 0x43, 0x0f,
+ 0x08, 0x5c, 0x19, 0xc5, 0x9e, 0xbc, 0x08, 0x5c, 0x11, 0xc4, 0xe5, 0xaf,
+ 0x08, 0x5c, 0x09, 0xc5, 0xa6, 0x5f, 0x08, 0x5c, 0x00, 0xd2, 0x4c, 0x98,
+ 0x00, 0xb9, 0xb1, 0xd2, 0x4b, 0x1e, 0x00, 0xb9, 0xa8, 0x48, 0xbd, 0x3b,
+ 0xc0, 0x57, 0x69, 0xc3, 0x21, 0x5f, 0x01, 0x5e, 0xd8, 0x46, 0xd5, 0x5c,
+ 0xc0, 0x57, 0x7b, 0x50, 0x5d, 0x2f, 0x40, 0x57, 0x91, 0x4c, 0x7a, 0xb3,
+ 0xc0, 0x57, 0xe5, 0x48, 0xae, 0xc6, 0x40, 0x57, 0xfb, 0xcc, 0x8b, 0xbc,
+ 0x01, 0x30, 0x59, 0x45, 0xdd, 0x24, 0xc0, 0x58, 0x2f, 0x42, 0x00, 0x2a,
+ 0x40, 0x58, 0x3b, 0x0b, 0xc0, 0x58, 0x48, 0xd6, 0x2e, 0x21, 0x0f, 0xae,
+ 0xd8, 0x49, 0x02, 0x5b, 0xc0, 0x58, 0x54, 0xd1, 0x54, 0x55, 0x01, 0x1e,
+ 0x53, 0x00, 0x58, 0x60, 0xd3, 0x45, 0xd4, 0x01, 0x1e, 0x4a, 0x00, 0x58,
+ 0x66, 0xcb, 0x95, 0x43, 0x01, 0x12, 0xe1, 0xc3, 0x06, 0x46, 0x00, 0x03,
+ 0xf9, 0xcb, 0x93, 0x07, 0x0f, 0xb4, 0xd0, 0xca, 0xaa, 0x2e, 0x01, 0x08,
+ 0x49, 0xc7, 0xce, 0xfa, 0x01, 0x08, 0x19, 0xc4, 0x02, 0xcb, 0x00, 0x05,
+ 0x80, 0xc4, 0x01, 0xa7, 0x0f, 0xb1, 0xa9, 0xc6, 0x01, 0xb1, 0x0f, 0xa5,
+ 0x58, 0x48, 0x83, 0x74, 0xc0, 0x58, 0x6c, 0x43, 0x08, 0xda, 0x40, 0x58,
+ 0x85, 0x49, 0xb6, 0xd2, 0xc0, 0x58, 0xb5, 0xcb, 0x8f, 0x76, 0x01, 0x35,
+ 0x71, 0x0b, 0x40, 0x58, 0xe7, 0x51, 0x57, 0x85, 0xc0, 0x58, 0xf9, 0x53,
+ 0x40, 0x7c, 0x40, 0x59, 0x0b, 0x03, 0xc0, 0x59, 0x17, 0xdb, 0x18, 0xfa,
+ 0x01, 0x1c, 0x11, 0xcb, 0x91, 0xde, 0x0f, 0xcb, 0xc0, 0x44, 0x07, 0x4d,
+ 0xc0, 0x59, 0x23, 0xce, 0x6d, 0x92, 0x0f, 0xb7, 0x90, 0xd7, 0x26, 0xe1,
+ 0x01, 0x1c, 0x99, 0xc3, 0x01, 0x32, 0x0f, 0x9d, 0x78, 0x0f, 0xc0, 0x59,
+ 0x47, 0xc6, 0x1e, 0x8c, 0x00, 0x05, 0x40, 0x12, 0xc0, 0x59, 0x53, 0xca,
+ 0xa3, 0x3a, 0x0f, 0xc9, 0x21, 0xcc, 0x85, 0xa4, 0x0f, 0xa1, 0x50, 0xdc,
+ 0x15, 0x12, 0x01, 0x3c, 0xd9, 0xc9, 0x8f, 0x4c, 0x01, 0x05, 0x79, 0xc3,
+ 0x1c, 0xc5, 0x0f, 0xa0, 0x4a, 0x00, 0x59, 0x5f, 0x10, 0xc0, 0x59, 0x65,
+ 0x4b, 0x2d, 0x74, 0x40, 0x59, 0x6f, 0x49, 0x03, 0x76, 0xc0, 0x59, 0x81,
+ 0x47, 0x00, 0x29, 0x40, 0x59, 0x8d, 0x42, 0x01, 0x22, 0xc0, 0x59, 0x99,
+ 0xc5, 0xde, 0x82, 0x0f, 0x9b, 0x48, 0x44, 0x04, 0x8d, 0xc0, 0x59, 0xaf,
+ 0x00, 0x40, 0x59, 0xd5, 0x43, 0x06, 0x84, 0xc0, 0x59, 0xed, 0xc5, 0x10,
+ 0x15, 0x0f, 0xa1, 0xb0, 0x4b, 0x9c, 0x70, 0xc0, 0x5a, 0x05, 0xc7, 0xbe,
+ 0x43, 0x01, 0x14, 0x0b, 0x00, 0x5a, 0x14, 0x42, 0x07, 0x60, 0xc0, 0x5a,
+ 0x1a, 0xc5, 0xde, 0x78, 0x01, 0x15, 0x71, 0xc6, 0x02, 0x50, 0x01, 0x11,
+ 0x22, 0x00, 0x5a, 0x29, 0xc6, 0xb2, 0xc1, 0x01, 0x05, 0x59, 0xc3, 0x1e,
+ 0x77, 0x0f, 0xd6, 0x78, 0x46, 0x01, 0xab, 0x40, 0x5a, 0x2f, 0xc4, 0xe9,
+ 0x0b, 0x0f, 0xa1, 0x61, 0xc8, 0x02, 0xc7, 0x00, 0x01, 0x20, 0xdd, 0x11,
+ 0x33, 0x0d, 0xe4, 0xf9, 0xcb, 0x9c, 0x4f, 0x0d, 0xe4, 0xf1, 0xd5, 0x34,
+ 0xc9, 0x0d, 0xe4, 0xe9, 0xd1, 0x50, 0x59, 0x0d, 0xe4, 0xe1, 0x46, 0xd7,
+ 0x2a, 0xc0, 0x5a, 0x3e, 0x47, 0x01, 0xff, 0x40, 0x5a, 0x5a, 0x43, 0x00,
+ 0x5f, 0xc0, 0x5a, 0xf7, 0x00, 0x40, 0x5b, 0x09, 0xc4, 0x01, 0x0e, 0x01,
+ 0x2c, 0x99, 0xc9, 0xab, 0x65, 0x0f, 0xab, 0xb0, 0x00, 0x40, 0x5b, 0x15,
+ 0xc3, 0x39, 0x2b, 0x0f, 0xa4, 0x19, 0xc2, 0x13, 0xf3, 0x0f, 0x9b, 0x08,
+ 0x44, 0x03, 0x44, 0xc0, 0x5b, 0x21, 0xcd, 0x7c, 0x52, 0x0f, 0xa4, 0xf0,
+ 0x42, 0x00, 0x52, 0xc0, 0x5b, 0x2b, 0xc5, 0xda, 0x77, 0x01, 0x08, 0xf8,
+ 0x43, 0x20, 0xed, 0xc0, 0x5b, 0x37, 0xcd, 0x5c, 0x32, 0x00, 0x00, 0xf1,
+ 0xd1, 0x56, 0x75, 0x0f, 0xb4, 0xc9, 0xc4, 0xe6, 0xdf, 0x0f, 0xcf, 0xf0,
+ 0xc6, 0x01, 0xb1, 0x01, 0x1e, 0x71, 0xc4, 0x00, 0xcd, 0x01, 0x5c, 0x81,
+ 0xc5, 0x00, 0x47, 0x01, 0x5c, 0x88, 0xc5, 0xdc, 0xe8, 0x0f, 0x9a, 0x71,
+ 0xcd, 0x77, 0xda, 0x0f, 0xcf, 0x38, 0x5d, 0x11, 0xe1, 0xc0, 0x5b, 0x43,
+ 0xcb, 0x95, 0x17, 0x00, 0x05, 0x70, 0xcc, 0x25, 0x86, 0x05, 0x4a, 0xf9,
+ 0x18, 0xc0, 0x5b, 0xab, 0x4f, 0x2e, 0x37, 0xc0, 0x5b, 0xb7, 0x47, 0x01,
+ 0xff, 0x40, 0x5b, 0xc6, 0x16, 0xc0, 0x5c, 0x26, 0x4b, 0x05, 0xe8, 0xc0,
+ 0x5c, 0x36, 0x43, 0x27, 0x40, 0xc0, 0x5c, 0x4e, 0xdd, 0x11, 0x6d, 0x01,
+ 0x37, 0x29, 0x4a, 0xa0, 0xec, 0xc0, 0x5c, 0x5a, 0x49, 0x44, 0x58, 0x40,
+ 0x5c, 0x72, 0x00, 0xc0, 0x5c, 0x87, 0xc8, 0xba, 0x4b, 0x0f, 0xab, 0x69,
+ 0xc9, 0xab, 0x02, 0x0f, 0xd4, 0x80, 0x47, 0x00, 0x61, 0x40, 0x5c, 0xab,
+ 0xc4, 0x18, 0x26, 0x0f, 0x9a, 0xc9, 0xc7, 0xc8, 0xe8, 0x0f, 0x9a, 0xc0,
+ 0xd0, 0x5b, 0x2f, 0x01, 0x49, 0x59, 0xd0, 0x3d, 0x06, 0x01, 0x49, 0x80,
+ 0xc2, 0x00, 0xdd, 0x0f, 0xb4, 0x00, 0xd9, 0x20, 0x96, 0x0f, 0xc9, 0x19,
+ 0x07, 0xc0, 0x5c, 0xc3, 0xc9, 0xaf, 0x82, 0x0f, 0xcf, 0xd8, 0x00, 0xc0,
+ 0x5c, 0xcf, 0x4e, 0x72, 0x62, 0x40, 0x5c, 0xdb, 0xd3, 0x19, 0x6d, 0x01,
+ 0x3b, 0x39, 0xd8, 0x22, 0xc4, 0x01, 0x3b, 0x29, 0xc9, 0xab, 0xbf, 0x01,
+ 0x09, 0xd1, 0xdd, 0x10, 0x4b, 0x01, 0x5e, 0x69, 0xd7, 0x28, 0xad, 0x01,
+ 0x5e, 0x78, 0x48, 0x53, 0xb0, 0xc0, 0x5c, 0xf9, 0x15, 0xc0, 0x5d, 0x1e,
+ 0xca, 0x97, 0x96, 0x08, 0x0c, 0x89, 0x06, 0xc0, 0x5d, 0x28, 0xce, 0x73,
+ 0x6c, 0x08, 0x0c, 0xb9, 0xc7, 0xcf, 0xbe, 0x08, 0x0c, 0xd1, 0xce, 0x75,
+ 0x72, 0x08, 0x0c, 0xd8, 0xc3, 0x02, 0x01, 0x0f, 0x9f, 0xa8, 0x45, 0xe3,
+ 0x14, 0xc0, 0x5d, 0x3a, 0x44, 0x0c, 0x09, 0xc0, 0x5d, 0x46, 0x90, 0x01,
+ 0x36, 0x32, 0x00, 0x5d, 0x7a, 0x91, 0x0f, 0xa7, 0xdb, 0x00, 0x5d, 0x80,
+ 0xd1, 0x52, 0xf0, 0x01, 0x1d, 0xb8, 0xc2, 0x00, 0xe4, 0x01, 0x11, 0xb0,
+ 0x44, 0x01, 0x5e, 0xc0, 0x5d, 0x8c, 0xc4, 0xe7, 0x37, 0x0f, 0xcc, 0xe8,
+ 0xc5, 0x10, 0x15, 0x0f, 0xa1, 0x80, 0x49, 0x54, 0xdd, 0xc0, 0x5d, 0x98,
+ 0x47, 0x37, 0x49, 0xc0, 0x5d, 0xa4, 0x46, 0x08, 0xd7, 0x40, 0x5d, 0xc2,
+ 0x43, 0x05, 0x2d, 0xc0, 0x5d, 0xe0, 0x10, 0x40, 0x5e, 0x0a, 0xc9, 0xae,
+ 0x11, 0x01, 0x5f, 0x99, 0xc6, 0xc2, 0xe5, 0x01, 0x5f, 0xa1, 0xc8, 0xc1,
+ 0xe3, 0x01, 0x5f, 0xa9, 0xc8, 0xc2, 0xe3, 0x01, 0x5f, 0xb1, 0xc8, 0xbc,
+ 0x03, 0x01, 0x5f, 0xb9, 0xc9, 0xb5, 0xe8, 0x01, 0x5f, 0xc0, 0xc3, 0x8c,
+ 0x84, 0x07, 0xf0, 0x03, 0x00, 0x5e, 0x16, 0xc3, 0xed, 0x3e, 0x07, 0xf0,
+ 0x0b, 0x00, 0x5e, 0x5c, 0xc3, 0xed, 0x59, 0x07, 0xf0, 0x43, 0x00, 0x5e,
+ 0x96, 0xc3, 0xed, 0x5c, 0x07, 0xf0, 0x3b, 0x00, 0x5e, 0xbe, 0xc3, 0xed,
+ 0x2f, 0x07, 0xf0, 0x33, 0x00, 0x5e, 0xe6, 0xc3, 0xed, 0x32, 0x07, 0xf0,
+ 0x2b, 0x00, 0x5f, 0x0e, 0xc3, 0xed, 0x35, 0x07, 0xf0, 0x23, 0x00, 0x5f,
+ 0x36, 0xc3, 0xed, 0x38, 0x07, 0xf0, 0x1b, 0x00, 0x5f, 0x5e, 0xc3, 0xed,
+ 0x3b, 0x07, 0xf0, 0x12, 0x00, 0x5f, 0x86, 0x42, 0x01, 0xb1, 0xc0, 0x5f,
+ 0xae, 0xc5, 0x00, 0xea, 0x05, 0x30, 0x69, 0xc9, 0x11, 0x47, 0x05, 0x30,
+ 0x71, 0xcd, 0x2d, 0xa6, 0x05, 0x30, 0x79, 0x46, 0x08, 0xd7, 0x40, 0x5f,
+ 0xba, 0x46, 0x02, 0x5e, 0xc0, 0x5f, 0xde, 0x42, 0x00, 0x32, 0xc0, 0x60,
+ 0x21, 0xc5, 0xe3, 0x28, 0x01, 0x09, 0x18, 0x45, 0x02, 0xcb, 0xc0, 0x60,
+ 0x33, 0x45, 0x2a, 0xe3, 0x40, 0x60, 0x71, 0x5f, 0x0d, 0x1e, 0xc0, 0x60,
+ 0xa5, 0xcc, 0x87, 0x90, 0x01, 0x18, 0xb8, 0xc8, 0xc1, 0x2b, 0x0f, 0xa7,
+ 0xe1, 0x00, 0x40, 0x60, 0xb1, 0x4f, 0x01, 0xf7, 0xc0, 0x60, 0xbd, 0x4d,
+ 0x27, 0x71, 0x40, 0x61, 0x3d, 0xcc, 0x84, 0x00, 0x01, 0x11, 0x81, 0xc7,
+ 0xc8, 0xa9, 0x0f, 0x9e, 0x81, 0xc4, 0xcd, 0xbc, 0x0f, 0x98, 0x58, 0xcb,
+ 0x9b, 0x1b, 0x01, 0x0c, 0x49, 0xcd, 0x43, 0x9a, 0x01, 0x0a, 0xf1, 0x08,
+ 0xc0, 0x61, 0xbd, 0x16, 0xc0, 0x61, 0xc9, 0x44, 0x05, 0x17, 0x40, 0x61,
+ 0xd5, 0x00, 0xc0, 0x61, 0xfb, 0x46, 0xd4, 0x7e, 0xc0, 0x62, 0x45, 0x45,
+ 0xdd, 0x29, 0x40, 0x62, 0x51, 0xc4, 0x0d, 0x8e, 0x0e, 0x9b, 0xc1, 0xc3,
+ 0x05, 0x17, 0x0e, 0x9b, 0xb8, 0x09, 0xc0, 0x62, 0x63, 0xca, 0xa6, 0xf0,
+ 0x0f, 0x9c, 0x58, 0x43, 0x59, 0x36, 0xc0, 0x62, 0x75, 0xc3, 0x06, 0x7a,
+ 0x0f, 0xd6, 0xa0, 0xc5, 0xc7, 0xc3, 0x01, 0x38, 0x39, 0xc9, 0xaa, 0xe7,
+ 0x0f, 0xad, 0x68, 0x43, 0x08, 0xb1, 0xc0, 0x62, 0xc9, 0xc8, 0xb8, 0x43,
+ 0x0f, 0xcb, 0x08, 0x45, 0x91, 0xf4, 0xc0, 0x62, 0xe7, 0x4a, 0xa6, 0x96,
+ 0xc0, 0x63, 0x0b, 0x45, 0xdc, 0xf7, 0x40, 0x63, 0x71, 0x0d, 0xc0, 0x63,
+ 0x8f, 0x44, 0x05, 0xb2, 0xc0, 0x63, 0x9b, 0xc3, 0x0e, 0x84, 0x0f, 0xa1,
+ 0x10, 0x42, 0x00, 0xc0, 0xc0, 0x63, 0xc9, 0x44, 0xe6, 0x6f, 0xc0, 0x63,
+ 0xf3, 0xce, 0x6e, 0x9c, 0x01, 0x00, 0x21, 0xc9, 0xb5, 0x2b, 0x01, 0x71,
+ 0xd8, 0x10, 0xc0, 0x64, 0x0b, 0xce, 0x6e, 0xc6, 0x0f, 0xca, 0x48, 0xcc,
+ 0x83, 0x10, 0x0f, 0xa5, 0x69, 0xc9, 0xaf, 0x0d, 0x0f, 0xd3, 0xa0, 0x44,
+ 0x19, 0x06, 0xc0, 0x64, 0x15, 0x44, 0x88, 0x22, 0x40, 0x64, 0x21, 0x07,
+ 0xc0, 0x64, 0x2d, 0x42, 0x01, 0xc2, 0x40, 0x64, 0x37, 0x44, 0x0c, 0x2d,
+ 0xc0, 0x64, 0x43, 0x42, 0x03, 0xac, 0x40, 0x64, 0x67, 0xd8, 0x26, 0x54,
+ 0x0f, 0xa8, 0xe9, 0xd6, 0x0a, 0xe8, 0x01, 0x1f, 0x01, 0xcd, 0x00, 0xd2,
+ 0x01, 0x1e, 0xf1, 0xcb, 0x1c, 0xe0, 0x01, 0x1e, 0xe1, 0xce, 0x26, 0x2e,
+ 0x01, 0x1d, 0xa1, 0x42, 0x01, 0x0e, 0xc0, 0x64, 0x71, 0x46, 0x01, 0x17,
+ 0xc0, 0x64, 0x7b, 0x45, 0x00, 0xcd, 0xc0, 0x64, 0x85, 0x44, 0x13, 0x02,
+ 0x40, 0x64, 0x8f, 0x42, 0x01, 0x8a, 0xc0, 0x64, 0x9e, 0xc9, 0xb4, 0xe3,
+ 0x01, 0x19, 0x80, 0x54, 0x3f, 0x82, 0xc0, 0x64, 0xaa, 0xd6, 0x2b, 0xa3,
+ 0x0f, 0x89, 0x50, 0xc2, 0x01, 0x47, 0x0f, 0xcd, 0xbb, 0x00, 0x64, 0xc2,
+ 0xc4, 0x7c, 0xbd, 0x0f, 0xcf, 0x80, 0x8f, 0x0f, 0xb4, 0x53, 0x00, 0x64,
+ 0xc8, 0xc2, 0x01, 0x5b, 0x0f, 0xb4, 0x31, 0xcc, 0x84, 0xb4, 0x01, 0x09,
+ 0x11, 0x05, 0xc0, 0x64, 0xce, 0x42, 0x02, 0xa1, 0x40, 0x64, 0xda, 0x43,
+ 0x00, 0x55, 0xc0, 0x64, 0xe6, 0x49, 0x83, 0x73, 0xc0, 0x64, 0xf0, 0x44,
+ 0x15, 0xd2, 0xc0, 0x65, 0x18, 0xc5, 0x34, 0x9a, 0x01, 0x02, 0xe9, 0xcb,
+ 0x98, 0xdf, 0x0f, 0xa9, 0x88, 0x87, 0x01, 0x15, 0x43, 0x00, 0x65, 0x4c,
+ 0xc4, 0xe5, 0xeb, 0x0f, 0x9d, 0xd0, 0x12, 0xc0, 0x65, 0x52, 0xc2, 0x00,
+ 0x30, 0x0f, 0xce, 0x62, 0x00, 0x65, 0x5e, 0x08, 0xc0, 0x65, 0x64, 0x0e,
+ 0xc0, 0x65, 0x7a, 0x06, 0xc0, 0x65, 0x84, 0x11, 0xc0, 0x65, 0x9e, 0x05,
+ 0xc0, 0x65, 0xaa, 0x03, 0xc0, 0x65, 0xc0, 0x0a, 0xc0, 0x65, 0xd8, 0x15,
+ 0xc0, 0x65, 0xe4, 0x07, 0xc0, 0x65, 0xf4, 0x42, 0x01, 0x5b, 0xc0, 0x66,
+ 0x10, 0x42, 0x00, 0x4c, 0xc0, 0x66, 0x1c, 0x0f, 0xc0, 0x66, 0x28, 0x09,
+ 0xc0, 0x66, 0x3a, 0xc5, 0xdc, 0xb1, 0x0e, 0x99, 0xd9, 0xd3, 0x44, 0xca,
+ 0x0e, 0x99, 0xb9, 0x14, 0xc0, 0x66, 0x55, 0x12, 0xc0, 0x66, 0x5f, 0x0d,
+ 0xc0, 0x66, 0x6f, 0x04, 0xc0, 0x66, 0x7b, 0xc3, 0x8d, 0x81, 0x0e, 0x98,
+ 0xe9, 0xcc, 0x8b, 0x8c, 0x0e, 0x98, 0x88, 0x44, 0x0a, 0x48, 0xc0, 0x66,
+ 0x8d, 0xce, 0x6f, 0x52, 0x0f, 0xa6, 0x31, 0xd2, 0x4c, 0xf2, 0x0f, 0x9b,
+ 0xa9, 0xc3, 0x2f, 0x41, 0x0f, 0xd6, 0xb0, 0x07, 0xc0, 0x66, 0x99, 0x44,
+ 0xe4, 0x47, 0x40, 0x66, 0xab, 0x96, 0x01, 0x37, 0xd1, 0xc7, 0x86, 0xc9,
+ 0x01, 0x05, 0xc1, 0xd4, 0x3e, 0x42, 0x0f, 0x9d, 0xf0, 0xd7, 0x27, 0x82,
+ 0x01, 0x3a, 0x29, 0xc2, 0x00, 0x2a, 0x0f, 0xa0, 0x2a, 0x00, 0x66, 0xcf,
+ 0xc7, 0x17, 0x39, 0x01, 0x1f, 0x91, 0x47, 0x2b, 0xb8, 0x40, 0x66, 0xd5,
+ 0x00, 0x40, 0x66, 0xe1, 0x45, 0xdf, 0x4a, 0xc0, 0x66, 0xf0, 0x4b, 0x8f,
+ 0xe4, 0xc0, 0x67, 0x18, 0xc7, 0x10, 0x13, 0x0f, 0xb1, 0x58, 0x42, 0x00,
+ 0x43, 0x40, 0x67, 0x24, 0x15, 0xc0, 0x67, 0x2a, 0x45, 0x00, 0xfa, 0xc0,
+ 0x67, 0x3a, 0x0e, 0xc0, 0x67, 0x86, 0x52, 0x49, 0x92, 0xc0, 0x67, 0x92,
+ 0x46, 0x08, 0xd7, 0xc0, 0x67, 0x9c, 0x4b, 0x6f, 0x71, 0xc0, 0x67, 0xc6,
+ 0xc9, 0xab, 0x14, 0x00, 0x7d, 0xf3, 0x00, 0x67, 0xf7, 0x52, 0x4e, 0xea,
+ 0x40, 0x67, 0xfd, 0x47, 0x01, 0xff, 0xc0, 0x68, 0x15, 0x42, 0x01, 0xc2,
+ 0xc0, 0x68, 0x27, 0xce, 0x6f, 0xec, 0x01, 0x6b, 0x81, 0xd0, 0x5f, 0xdf,
+ 0x01, 0x6b, 0xf8, 0x43, 0x05, 0xe3, 0xc0, 0x68, 0x2d, 0xdc, 0x12, 0xaa,
+ 0x01, 0x02, 0x89, 0xce, 0x75, 0x8e, 0x0f, 0xaf, 0x51, 0xcc, 0x8d, 0x90,
+ 0x0f, 0xad, 0x81, 0xc6, 0x7c, 0x59, 0x0f, 0xa4, 0xa9, 0x55, 0x35, 0x47,
+ 0xc0, 0x68, 0x37, 0x48, 0x1a, 0x8f, 0xc0, 0x68, 0x43, 0xce, 0x73, 0x42,
+ 0x01, 0x4e, 0x49, 0xd8, 0x25, 0x94, 0x01, 0x53, 0xa9, 0xd1, 0x47, 0x52,
+ 0x0f, 0xa3, 0x61, 0xd3, 0x47, 0x50, 0x0f, 0xa3, 0x68, 0xd3, 0x40, 0x30,
+ 0x0f, 0xdd, 0x81, 0x4a, 0x01, 0x58, 0x40, 0x68, 0x4f, 0x42, 0x05, 0x2e,
+ 0xc0, 0x68, 0x61, 0x48, 0x08, 0xcf, 0x40, 0x68, 0xc8, 0x47, 0x0b, 0x9a,
+ 0xc0, 0x68, 0xe0, 0xc9, 0xb5, 0xb2, 0x00, 0x2c, 0x79, 0xc6, 0x57, 0xf6,
+ 0x00, 0x2c, 0x51, 0xc9, 0x11, 0x47, 0x00, 0x2c, 0x49, 0x03, 0xc0, 0x68,
+ 0xec, 0xcd, 0x2d, 0xa6, 0x00, 0x2a, 0xf1, 0x05, 0xc0, 0x68, 0xf8, 0x07,
+ 0xc0, 0x69, 0x04, 0xde, 0x0f, 0x99, 0x00, 0x2a, 0xc8, 0xca, 0x9d, 0x54,
+ 0x0f, 0x9d, 0x41, 0xcd, 0x78, 0xeb, 0x0f, 0xb4, 0xd8, 0xce, 0x74, 0x92,
+ 0x0f, 0x9c, 0xf9, 0xc4, 0x7a, 0x4d, 0x01, 0x5f, 0x28, 0x05, 0xc0, 0x69,
+ 0x10, 0x4d, 0x27, 0x71, 0xc0, 0x69, 0x1c, 0xcf, 0x66, 0xbd, 0x0f, 0x4a,
+ 0x21, 0xd0, 0x5a, 0x6f, 0x0f, 0x4a, 0x29, 0x47, 0x64, 0x0b, 0xc0, 0x69,
+ 0x9c, 0xc5, 0x08, 0x89, 0x0f, 0x4a, 0x39, 0x10, 0xc0, 0x69, 0xa8, 0x46,
+ 0x08, 0xd7, 0xc0, 0x69, 0xb4, 0x48, 0x10, 0x79, 0x40, 0x69, 0xd8, 0x04,
+ 0xc0, 0x69, 0xe4, 0x05, 0xc0, 0x6a, 0x02, 0x06, 0xc0, 0x6a, 0x16, 0x12,
+ 0xc0, 0x6a, 0x22, 0x16, 0xc0, 0x6a, 0x36, 0x14, 0xc0, 0x6a, 0x4e, 0x18,
+ 0xc0, 0x6a, 0x58, 0x15, 0xc0, 0x6a, 0x62, 0x03, 0xc0, 0x6a, 0x86, 0x0e,
+ 0xc0, 0x6a, 0xb4, 0x42, 0x01, 0xce, 0xc0, 0x6a, 0xc0, 0x0f, 0xc0, 0x6a,
+ 0xcc, 0x42, 0x00, 0x4c, 0xc0, 0x6a, 0xde, 0xc5, 0x67, 0xe4, 0x0f, 0xb8,
+ 0x19, 0x43, 0x00, 0x93, 0xc0, 0x6a, 0xe8, 0xc4, 0x88, 0x1c, 0x0f, 0xb8,
+ 0x11, 0x09, 0xc0, 0x6a, 0xf4, 0x43, 0x04, 0x46, 0xc0, 0x6b, 0x00, 0xc3,
+ 0x7f, 0xdf, 0x0f, 0xba, 0x31, 0xc5, 0xdf, 0xcc, 0x0f, 0xba, 0xa9, 0x0a,
+ 0x40, 0x6b, 0x0c, 0xda, 0x1b, 0x3a, 0x01, 0x36, 0xa9, 0xce, 0x75, 0x02,
+ 0x01, 0x1c, 0x38, 0xc4, 0xe1, 0xa8, 0x01, 0x34, 0xb9, 0xc8, 0x86, 0x8c,
+ 0x01, 0x09, 0xa9, 0xc2, 0x00, 0x2b, 0x00, 0x00, 0x38, 0xce, 0x74, 0x30,
+ 0x01, 0x19, 0x71, 0xc8, 0x08, 0xff, 0x01, 0x12, 0x60, 0xcb, 0x25, 0x71,
+ 0x01, 0x12, 0x51, 0xc2, 0x01, 0x89, 0x01, 0x12, 0x42, 0x00, 0x6b, 0x16,
+ 0xc9, 0xac, 0xe8, 0x0f, 0xb7, 0xd1, 0x0f, 0x40, 0x6b, 0x1c, 0xc8, 0xbb,
+ 0x63, 0x0f, 0xb7, 0x61, 0xc9, 0xae, 0x74, 0x0f, 0xb7, 0x58, 0x51, 0x52,
+ 0x13, 0xc0, 0x6b, 0x28, 0xcb, 0x96, 0xe5, 0x0f, 0xd6, 0x00, 0x4b, 0x00,
+ 0xb7, 0xc0, 0x6b, 0x40, 0xce, 0x71, 0x4a, 0x0f, 0xa7, 0xb0, 0xc2, 0x00,
+ 0x58, 0x01, 0x11, 0x03, 0x00, 0x6b, 0x60, 0xca, 0x9d, 0xb8, 0x01, 0x09,
+ 0x59, 0xc9, 0x21, 0x83, 0x0f, 0xa5, 0x11, 0xc7, 0xc4, 0xdc, 0x0f, 0xb1,
+ 0x01, 0xcb, 0x9b, 0xaa, 0x0f, 0xb1, 0x38, 0x14, 0xc0, 0x6b, 0x66, 0x44,
+ 0x0a, 0x62, 0xc0, 0x6b, 0x72, 0xcc, 0x8a, 0x00, 0x0f, 0xb1, 0x90, 0xcb,
+ 0x8b, 0xbd, 0x01, 0x30, 0x51, 0xc9, 0xb2, 0x91, 0x08, 0x0c, 0xe0, 0x0e,
+ 0xc0, 0x6b, 0x7d, 0x10, 0xc0, 0x6b, 0x87, 0x06, 0xc0, 0x6b, 0x9d, 0x16,
+ 0xc0, 0x6b, 0xab, 0x05, 0xc0, 0x6b, 0xb9, 0x83, 0x08, 0xb8, 0x93, 0x00,
+ 0x6b, 0xc3, 0x0c, 0xc0, 0x6b, 0xc9, 0x04, 0xc0, 0x6b, 0xd3, 0x09, 0xc0,
+ 0x6b, 0xdd, 0xc2, 0x01, 0x0e, 0x08, 0xb8, 0x89, 0xc2, 0x0e, 0xe5, 0x08,
+ 0xb8, 0x79, 0xc2, 0x00, 0x9a, 0x08, 0xb8, 0x69, 0xc2, 0x00, 0x3f, 0x08,
+ 0xb8, 0x49, 0x12, 0xc0, 0x6b, 0xe7, 0x0d, 0x40, 0x6b, 0xf1, 0xc8, 0x9e,
+ 0x3b, 0x08, 0xb9, 0xf9, 0x44, 0x02, 0xcc, 0x40, 0x6b, 0xfb, 0xc5, 0x25,
+ 0x27, 0x08, 0xb9, 0xd9, 0xc2, 0x01, 0x04, 0x08, 0xb9, 0xd0, 0xc4, 0x24,
+ 0x35, 0x08, 0xb9, 0xc9, 0xc5, 0x05, 0x1b, 0x08, 0xb9, 0xc1, 0x15, 0xc0,
+ 0x6c, 0x0b, 0x08, 0xc0, 0x6c, 0x17, 0x16, 0xc0, 0x6c, 0x23, 0xc3, 0x05,
+ 0x17, 0x08, 0xb9, 0x89, 0xc4, 0x16, 0x57, 0x08, 0xb9, 0x80, 0x83, 0x08,
+ 0xb9, 0x03, 0x00, 0x6c, 0x2f, 0x91, 0x08, 0xb9, 0x41, 0x87, 0x08, 0xb9,
+ 0x31, 0x97, 0x08, 0xb9, 0x23, 0x00, 0x6c, 0x3f, 0x8b, 0x08, 0xb9, 0x12,
+ 0x00, 0x6c, 0x43, 0x0e, 0xc0, 0x6c, 0x47, 0xc2, 0x00, 0x9a, 0x08, 0xb8,
+ 0xf0, 0xc6, 0x66, 0xed, 0x01, 0x08, 0x01, 0xc5, 0xda, 0x45, 0x0f, 0xd4,
+ 0xb8, 0xd3, 0x42, 0xef, 0x01, 0x03, 0x69, 0xd2, 0x4f, 0x7a, 0x01, 0x03,
+ 0x58, 0xc4, 0x00, 0x56, 0x01, 0x4c, 0xf9, 0xc5, 0x07, 0xa2, 0x00, 0x05,
+ 0xa0, 0x42, 0x00, 0x93, 0xc0, 0x6c, 0x51, 0xc5, 0xdd, 0x2e, 0x01, 0x1b,
+ 0xd3, 0x00, 0x6c, 0x60, 0xc5, 0xa2, 0x81, 0x01, 0x1b, 0xab, 0x00, 0x6c,
+ 0x66, 0xc4, 0x27, 0x7e, 0x01, 0x1b, 0x9b, 0x00, 0x6c, 0x6c, 0xd0, 0x59,
+ 0x1f, 0x01, 0x1b, 0xb9, 0x14, 0xc0, 0x6c, 0x72, 0x42, 0x02, 0x6a, 0xc0,
+ 0x6c, 0x7e, 0x06, 0xc0, 0x6c, 0x88, 0x15, 0xc0, 0x6c, 0x9a, 0xc5, 0xdb,
+ 0x35, 0x01, 0x1b, 0x61, 0x05, 0xc0, 0x6c, 0xb0, 0xd6, 0x2e, 0x79, 0x01,
+ 0x1b, 0x49, 0xcf, 0x69, 0xe7, 0x01, 0x1b, 0x41, 0x44, 0x00, 0xcd, 0xc0,
+ 0x6c, 0xbc, 0x44, 0xe5, 0xab, 0xc0, 0x6c, 0xc8, 0xcd, 0x7e, 0xdc, 0x01,
+ 0x1a, 0x01, 0xc6, 0xd3, 0x22, 0x01, 0x19, 0xb0, 0x42, 0x00, 0x64, 0xc0,
+ 0x6c, 0xd4, 0xd8, 0x24, 0x44, 0x00, 0x04, 0xf8, 0xc7, 0x2d, 0x3e, 0x00,
+ 0x01, 0x39, 0xc4, 0xa5, 0xd7, 0x01, 0x5f, 0x20, 0xd1, 0x4a, 0x46, 0x08,
+ 0x59, 0xc9, 0x47, 0x01, 0xff, 0x40, 0x6c, 0xe0, 0xc4, 0x3f, 0x16, 0x0f,
+ 0x9f, 0xd1, 0xc6, 0x32, 0x1a, 0x00, 0x01, 0x30, 0xca, 0xa1, 0x3c, 0x08,
+ 0x08, 0x11, 0x47, 0x37, 0x49, 0xc0, 0x6d, 0x61, 0x19, 0xc0, 0x6d, 0x88,
+ 0xd9, 0x1f, 0x38, 0x08, 0x09, 0xe1, 0xdc, 0x14, 0xf6, 0x08, 0x09, 0xe9,
+ 0x48, 0x14, 0xfd, 0x40, 0x6d, 0x94, 0x4a, 0xa4, 0x20, 0xc0, 0x6d, 0xa0,
+ 0xc9, 0xb0, 0xb4, 0x0f, 0xca, 0x50, 0xd4, 0x3e, 0xe2, 0x0f, 0xbd, 0x89,
+ 0xcb, 0x5e, 0x74, 0x0f, 0xbd, 0x21, 0x46, 0x01, 0x31, 0xc0, 0x6d, 0xc2,
+ 0x15, 0xc0, 0x6d, 0xce, 0xd5, 0x35, 0x5c, 0x0f, 0xbd, 0xe8, 0x43, 0x03,
+ 0x1a, 0xc0, 0x6d, 0xda, 0xd4, 0x3c, 0x12, 0x0f, 0x9b, 0xf0, 0xc3, 0x1e,
+ 0x52, 0x01, 0x16, 0x43, 0x00, 0x6e, 0x0d, 0x0e, 0xc0, 0x6e, 0x13, 0xca,
+ 0xa4, 0x84, 0x0f, 0x9f, 0xc8, 0xc8, 0x40, 0x9a, 0x0f, 0xb6, 0x48, 0x8d,
+ 0x0f, 0xab, 0x73, 0x00, 0x6e, 0x1d, 0xc6, 0xc5, 0x77, 0x0f, 0xd4, 0x18,
+ 0xcb, 0x9b, 0x10, 0x0f, 0x9c, 0xa8, 0x47, 0x01, 0xff, 0xc0, 0x6e, 0x2a,
+ 0x4d, 0x61, 0x97, 0x40, 0x6e, 0xb4, 0x49, 0xb7, 0xe0, 0xc0, 0x6e, 0xc8,
+ 0xc4, 0xad, 0x35, 0x0f, 0x99, 0xe1, 0xc5, 0xe3, 0xcd, 0x0f, 0xa1, 0x08,
+ 0x05, 0xc0, 0x6e, 0xfb, 0xc9, 0x99, 0x44, 0x01, 0x21, 0x10, 0x00, 0xc0,
+ 0x6f, 0x0d, 0xc7, 0xcb, 0xa4, 0x0f, 0xd6, 0x80, 0xc2, 0x03, 0x21, 0x0f,
+ 0xd4, 0xa9, 0x8d, 0x0f, 0x9f, 0x33, 0x00, 0x6f, 0x19, 0xc3, 0x07, 0x05,
+ 0x0f, 0x9a, 0x60, 0x0e, 0xc0, 0x6f, 0x1f, 0x46, 0x79, 0x13, 0x40, 0x6f,
+ 0x2f, 0xc3, 0x00, 0xdc, 0x0f, 0xcf, 0xd3, 0x00, 0x6f, 0x65, 0xc5, 0xd9,
+ 0xd7, 0x01, 0x35, 0xf1, 0x47, 0xcd, 0x3a, 0x40, 0x6f, 0x6b, 0xc3, 0x02,
+ 0xfb, 0x0f, 0xcd, 0x09, 0xde, 0x0e, 0x6d, 0x0f, 0x9f, 0xc0, 0x00, 0x40,
+ 0x6f, 0x7d, 0x47, 0x01, 0xff, 0xc0, 0x6f, 0x95, 0x42, 0x00, 0xea, 0xc0,
+ 0x6f, 0xda, 0xc7, 0xcb, 0x0a, 0x05, 0x37, 0x91, 0xc9, 0x11, 0x47, 0x05,
+ 0x37, 0x99, 0xc9, 0xac, 0x22, 0x05, 0x37, 0xb1, 0xcd, 0x2d, 0xa6, 0x05,
+ 0x37, 0xb8, 0x48, 0x0d, 0x5e, 0xc0, 0x6f, 0xe4, 0x0d, 0xc0, 0x6f, 0xea,
+ 0xcb, 0x95, 0x59, 0x0f, 0xa1, 0x59, 0xc2, 0x00, 0xe5, 0x0f, 0xca, 0x98,
+ 0x43, 0x45, 0xa6, 0xc0, 0x6f, 0xf2, 0xc4, 0xd8, 0x9a, 0x0f, 0xa8, 0x59,
+ 0x8a, 0x0f, 0xb6, 0x02, 0x00, 0x70, 0x0e, 0x00, 0xc0, 0x70, 0x14, 0xc8,
+ 0xba, 0x6b, 0x0f, 0xa4, 0x40, 0xca, 0xa0, 0x2e, 0x0f, 0xb6, 0x21, 0xcb,
+ 0x91, 0x23, 0x0f, 0xca, 0xb1, 0xc2, 0x00, 0x35, 0x0f, 0xcb, 0x78, 0xc9,
+ 0xb7, 0xfb, 0x01, 0x05, 0xf9, 0xc7, 0x88, 0x30, 0x0f, 0xd7, 0x30, 0xc5,
+ 0xdf, 0x2c, 0x0f, 0x9d, 0x89, 0xc6, 0xd6, 0xa0, 0x0f, 0xcf, 0x10, 0xca,
+ 0xa1, 0x78, 0x0f, 0x9c, 0x11, 0x86, 0x0f, 0xa1, 0x30, 0xcf, 0x6a, 0x14,
+ 0x01, 0x4f, 0xc9, 0xc7, 0x2a, 0x4b, 0x01, 0x4f, 0xc0, 0x87, 0x0f, 0xb5,
+ 0x91, 0xc3, 0x1d, 0x77, 0x0f, 0xb5, 0xa0, 0xc3, 0x00, 0x29, 0x0f, 0xcd,
+ 0x59, 0x44, 0x93, 0x75, 0xc0, 0x70, 0x20, 0xca, 0x9e, 0xf8, 0x0f, 0xa4,
+ 0x99, 0xd0, 0x60, 0x8f, 0x0f, 0x9e, 0xb1, 0x14, 0xc0, 0x70, 0x38, 0xc2,
+ 0x02, 0xa1, 0x0f, 0xd6, 0xc0, 0xc9, 0xb2, 0x0a, 0x01, 0x19, 0x63, 0x00,
+ 0x70, 0x44, 0x45, 0xb7, 0xf6, 0xc0, 0x70, 0x4a, 0x16, 0x40, 0x70, 0x7c,
+ 0x00, 0xc0, 0x70, 0x88, 0xc8, 0xbe, 0xeb, 0x0f, 0xb6, 0x70, 0xc4, 0x03,
+ 0x2b, 0x01, 0x13, 0x61, 0xc7, 0x01, 0xb0, 0x01, 0x09, 0xb0, 0xc5, 0xae,
+ 0xda, 0x0f, 0x9b, 0xd1, 0xc3, 0x0e, 0x84, 0x0f, 0xd5, 0x90, 0xc3, 0xed,
+ 0x14, 0x0f, 0xcc, 0x58, 0xc5, 0x05, 0x2f, 0x0f, 0xb4, 0x79, 0x16, 0x40,
+ 0x70, 0x9a, 0xc4, 0xe4, 0xf7, 0x01, 0x2e, 0x71, 0xc2, 0x00, 0xdd, 0x01,
+ 0x01, 0x13, 0x00, 0x70, 0xa6, 0xc4, 0x26, 0xcf, 0x0f, 0xab, 0x5a, 0x00,
+ 0x70, 0xac, 0x46, 0x79, 0x13, 0x40, 0x70, 0xb2, 0x4b, 0x6f, 0x71, 0xc0,
+ 0x70, 0xca, 0x47, 0x01, 0xff, 0x40, 0x70, 0xd2, 0xc4, 0x49, 0xd4, 0x0f,
+ 0xce, 0x59, 0x95, 0x0f, 0xd7, 0x38, 0x06, 0xc0, 0x71, 0x30, 0x42, 0x00,
+ 0x07, 0xc0, 0x71, 0x3c, 0xc2, 0x00, 0xbb, 0x0f, 0xcf, 0x88, 0x0b, 0xc0,
+ 0x71, 0x46, 0x44, 0xe7, 0x4f, 0x40, 0x71, 0x50, 0x44, 0xa0, 0x8b, 0xc0,
+ 0x71, 0x70, 0xc8, 0xbd, 0x7b, 0x0f, 0xc8, 0x71, 0xc5, 0xdf, 0x04, 0x0f,
+ 0xcb, 0x31, 0xc2, 0x00, 0xeb, 0x0f, 0xcf, 0xc8, 0x03, 0xc0, 0x71, 0x82,
+ 0xc2, 0x00, 0x29, 0x00, 0x16, 0xc0, 0x09, 0xc0, 0x71, 0x92, 0x0d, 0xc0,
+ 0x71, 0xa4, 0x03, 0xc0, 0x71, 0xc7, 0x15, 0xc0, 0x71, 0xd9, 0x06, 0xc0,
+ 0x71, 0xf6, 0x1b, 0xc0, 0x72, 0x06, 0x08, 0xc0, 0x72, 0x10, 0x42, 0x11,
+ 0x3f, 0xc0, 0x72, 0x22, 0x0b, 0xc0, 0x72, 0x34, 0x07, 0xc0, 0x72, 0x44,
+ 0x0f, 0xc0, 0x72, 0x66, 0x16, 0xc0, 0x72, 0x72, 0x0e, 0xc0, 0x72, 0x84,
+ 0x11, 0xc0, 0x72, 0x8e, 0x12, 0xc0, 0x72, 0xa6, 0xcc, 0x8a, 0xa8, 0x0e,
+ 0x83, 0x51, 0x42, 0x06, 0x8c, 0xc0, 0x72, 0xbc, 0xc4, 0xea, 0x1b, 0x0e,
+ 0x82, 0x01, 0x14, 0x40, 0x72, 0xc8, 0xc4, 0x24, 0x35, 0x08, 0xe3, 0x13,
+ 0x00, 0x72, 0xd4, 0xc5, 0x05, 0x1b, 0x08, 0xe3, 0x0b, 0x00, 0x72, 0xda,
+ 0x15, 0xc0, 0x72, 0xde, 0x08, 0xc0, 0x72, 0xf0, 0x16, 0xc0, 0x72, 0xf8,
+ 0xc3, 0x05, 0x17, 0x08, 0xe2, 0xd0, 0x45, 0x08, 0xd8, 0xc0, 0x73, 0x06,
+ 0xcb, 0x91, 0xff, 0x08, 0xe2, 0x11, 0xc4, 0x1c, 0xb3, 0x08, 0xe2, 0x08,
+ 0xc3, 0xed, 0x3e, 0x08, 0xe2, 0x29, 0xc3, 0x8c, 0x84, 0x08, 0xe2, 0x20,
+ 0x03, 0xc0, 0x73, 0x2a, 0x42, 0x02, 0x52, 0xc0, 0x73, 0x36, 0xcb, 0x21,
+ 0x1a, 0x08, 0xe1, 0xe0, 0x03, 0xc0, 0x73, 0x42, 0x91, 0x08, 0xe1, 0xd1,
+ 0x87, 0x08, 0xe1, 0xc1, 0x48, 0xb7, 0xd7, 0xc0, 0x73, 0x4e, 0x97, 0x08,
+ 0xe1, 0x93, 0x00, 0x73, 0x59, 0x8b, 0x08, 0xe1, 0x82, 0x00, 0x73, 0x5d,
+ 0xc2, 0x01, 0x0e, 0x08, 0xe1, 0x71, 0x15, 0xc0, 0x73, 0x61, 0x18, 0xc0,
+ 0x73, 0x71, 0xc2, 0x00, 0x96, 0x08, 0xe1, 0x49, 0xc2, 0x00, 0x9a, 0x08,
+ 0xe1, 0x41, 0xc2, 0x1a, 0x36, 0x08, 0xe1, 0x39, 0xc2, 0x00, 0x3f, 0x08,
+ 0xe1, 0x31, 0x04, 0xc0, 0x73, 0x7b, 0x12, 0xc0, 0x73, 0x85, 0x10, 0xc0,
+ 0x73, 0x8f, 0x06, 0xc0, 0x73, 0xa5, 0x16, 0xc0, 0x73, 0xb3, 0x0c, 0xc0,
+ 0x73, 0xc1, 0x05, 0xc0, 0x73, 0xcb, 0x09, 0xc0, 0x73, 0xd5, 0x0d, 0xc0,
+ 0x73, 0xdf, 0x83, 0x08, 0xe0, 0x03, 0x00, 0x73, 0xe9, 0x91, 0x08, 0xe0,
+ 0x61, 0x87, 0x08, 0xe0, 0x51, 0x97, 0x08, 0xe0, 0x23, 0x00, 0x73, 0xf5,
+ 0x8b, 0x08, 0xe0, 0x12, 0x00, 0x73, 0xf9, 0x43, 0x00, 0xca, 0xc0, 0x73,
+ 0xfd, 0x00, 0x40, 0x74, 0x2b, 0x45, 0x00, 0x47, 0xc0, 0x74, 0x4a, 0x44,
+ 0x00, 0xcd, 0xc0, 0x74, 0x56, 0x06, 0x40, 0x74, 0x60, 0xdb, 0x17, 0x9b,
+ 0x01, 0x3f, 0x00, 0xc2, 0x00, 0xff, 0x01, 0x11, 0x43, 0x00, 0x74, 0x72,
+ 0xc3, 0x00, 0x3a, 0x01, 0x11, 0x3a, 0x00, 0x74, 0x76, 0xcd, 0x79, 0x6d,
+ 0x0f, 0xa8, 0x79, 0x4a, 0x9d, 0x36, 0x40, 0x74, 0x7c, 0xc6, 0x01, 0xff,
+ 0x0f, 0xa4, 0x61, 0xc5, 0xde, 0xe6, 0x0f, 0x9f, 0x48, 0xcb, 0x9a, 0xce,
+ 0x0f, 0xbb, 0xa1, 0xca, 0x9c, 0xf0, 0x0f, 0xcf, 0xa1, 0xc2, 0x04, 0x35,
+ 0x0f, 0xd5, 0xb8, 0x16, 0xc0, 0x74, 0x88, 0x4b, 0x05, 0xe8, 0xc0, 0x74,
+ 0x92, 0x43, 0x27, 0x40, 0xc0, 0x74, 0xaa, 0xdd, 0x11, 0x6d, 0x01, 0x37,
+ 0x31, 0x4b, 0x3d, 0xe1, 0xc0, 0x74, 0xb6, 0x49, 0x44, 0x58, 0x40, 0x74,
+ 0xce, 0xc7, 0xc5, 0x30, 0x0f, 0xcb, 0x61, 0xd3, 0x46, 0x33, 0x0f, 0x9a,
+ 0x18, 0xc4, 0xe8, 0x43, 0x0f, 0xa0, 0x30, 0x4b, 0x37, 0x15, 0xc0, 0x74,
+ 0xe3, 0xd8, 0x23, 0xb4, 0x01, 0x16, 0xd1, 0x45, 0x01, 0xac, 0xc0, 0x74,
+ 0xef, 0x11, 0xc0, 0x75, 0x01, 0x03, 0xc0, 0x75, 0x0d, 0xc4, 0x02, 0xcb,
+ 0x00, 0x01, 0xe1, 0xcf, 0x67, 0x53, 0x01, 0x55, 0x32, 0x00, 0x75, 0x19,
+ 0x47, 0x01, 0xff, 0xc0, 0x75, 0x1f, 0x46, 0x08, 0xd7, 0xc0, 0x75, 0x77,
+ 0x4c, 0x11, 0x33, 0xc0, 0x75, 0x9b, 0x15, 0xc0, 0x75, 0xab, 0x4f, 0x2e,
+ 0x37, 0xc0, 0x75, 0xb7, 0x4b, 0x6f, 0x71, 0x40, 0x75, 0xd9, 0x42, 0x00,
+ 0x4a, 0xc0, 0x75, 0xf5, 0xd6, 0x24, 0x5e, 0x0f, 0xb3, 0x90, 0x47, 0x01,
+ 0xff, 0xc0, 0x76, 0x02, 0x4c, 0x11, 0x33, 0x40, 0x76, 0x78, 0x07, 0xc0,
+ 0x76, 0x84, 0x0d, 0x40, 0x76, 0x8e, 0x43, 0xb8, 0xf8, 0xc0, 0x76, 0x9a,
+ 0xd3, 0x45, 0x03, 0x01, 0x96, 0x78, 0xc4, 0x1e, 0x7c, 0x0f, 0xa4, 0x20,
+ 0xcf, 0x65, 0xeb, 0x08, 0x49, 0xf9, 0x47, 0x01, 0xff, 0x40, 0x76, 0xbc,
+ 0x83, 0x08, 0x14, 0x03, 0x00, 0x77, 0x1e, 0x87, 0x08, 0x14, 0x0b, 0x00,
+ 0x77, 0x22, 0x84, 0x08, 0x14, 0x13, 0x00, 0x77, 0x26, 0x89, 0x08, 0x14,
+ 0x21, 0x86, 0x08, 0x14, 0x29, 0x8b, 0x08, 0x14, 0x31, 0x99, 0x08, 0x14,
+ 0x39, 0x9c, 0x08, 0x14, 0x41, 0x96, 0x08, 0x14, 0xbb, 0x00, 0x77, 0x2a,
+ 0x8c, 0x08, 0x14, 0x51, 0x8d, 0x08, 0x14, 0x5b, 0x00, 0x77, 0x32, 0x93,
+ 0x08, 0x14, 0x61, 0x8e, 0x08, 0x14, 0x69, 0x8f, 0x08, 0x14, 0x73, 0x00,
+ 0x77, 0x36, 0x90, 0x08, 0x14, 0x7b, 0x00, 0x77, 0x3a, 0x97, 0x08, 0x14,
+ 0x91, 0x92, 0x08, 0x14, 0x99, 0x94, 0x08, 0x14, 0xa9, 0x95, 0x08, 0x14,
+ 0xb1, 0x8a, 0x08, 0x14, 0xd9, 0x9a, 0x08, 0x14, 0xe0, 0x42, 0x02, 0xfb,
+ 0xc0, 0x77, 0x3e, 0xc6, 0x92, 0xca, 0x01, 0x05, 0xf0, 0x15, 0xc0, 0x77,
+ 0x4b, 0x47, 0x01, 0xff, 0xc0, 0x77, 0x57, 0x05, 0xc0, 0x77, 0xa7, 0x52,
+ 0x48, 0x06, 0x40, 0x77, 0xb3, 0x00, 0x40, 0x77, 0xc9, 0xc2, 0x00, 0x35,
+ 0x0f, 0x9f, 0xb9, 0xc5, 0xdb, 0x49, 0x0f, 0xcb, 0xe0, 0xc8, 0xc3, 0x83,
+ 0x0f, 0xa0, 0xf1, 0xc3, 0x07, 0xc6, 0x0f, 0xd4, 0xe0, 0x47, 0x01, 0xff,
+ 0xc0, 0x77, 0xd5, 0xc8, 0x23, 0xac, 0x00, 0x75, 0x79, 0x4b, 0x6f, 0x71,
+ 0xc0, 0x78, 0x2c, 0x15, 0xc0, 0x78, 0x59, 0xc5, 0xdd, 0xab, 0x00, 0x76,
+ 0x31, 0x49, 0xb4, 0x89, 0xc0, 0x78, 0x65, 0xd1, 0x52, 0x79, 0x00, 0x76,
+ 0x61, 0xc9, 0xb2, 0x37, 0x00, 0x76, 0x69, 0xc8, 0xc2, 0xdb, 0x00, 0x76,
+ 0x71, 0x46, 0x08, 0xd7, 0xc0, 0x78, 0x75, 0x43, 0x69, 0x91, 0x40, 0x78,
+ 0x99, 0xca, 0x9c, 0xc8, 0x0f, 0xbb, 0xb1, 0xc2, 0x0c, 0x56, 0x0f, 0xd6,
+ 0x08, 0x46, 0x01, 0x17, 0xc0, 0x78, 0xa5, 0x45, 0x00, 0xcd, 0xc0, 0x78,
+ 0xcd, 0x44, 0x00, 0x3a, 0xc0, 0x78, 0xe9, 0x45, 0x01, 0x1d, 0xc0, 0x78,
+ 0xf3, 0xce, 0x6d, 0xae, 0x01, 0x38, 0x09, 0x44, 0x05, 0x17, 0xc0, 0x79,
+ 0x0e, 0x16, 0xc0, 0x79, 0x1a, 0xd2, 0x49, 0x5c, 0x0f, 0xdc, 0x21, 0xd3,
+ 0x43, 0x9a, 0x0f, 0xdc, 0x30, 0x46, 0x01, 0x31, 0xc0, 0x79, 0x26, 0x16,
+ 0xc0, 0x79, 0x38, 0x15, 0xc0, 0x79, 0x42, 0xd0, 0x5c, 0xef, 0x0f, 0xc1,
+ 0xe9, 0xd1, 0x55, 0xa9, 0x0f, 0xc1, 0xa9, 0x03, 0xc0, 0x79, 0x4e, 0xcf,
+ 0x62, 0x1c, 0x01, 0x3f, 0x81, 0x06, 0xc0, 0x79, 0x5d, 0xcd, 0x81, 0xce,
+ 0x01, 0x0e, 0x41, 0x0a, 0xc0, 0x79, 0x69, 0xc6, 0xd1, 0x60, 0x0f, 0xb3,
+ 0x69, 0x46, 0x04, 0x91, 0x40, 0x79, 0x75, 0xc4, 0x32, 0x64, 0x01, 0x15,
+ 0x2b, 0x00, 0x79, 0x81, 0x45, 0x00, 0x62, 0xc0, 0x79, 0x87, 0xd7, 0x2b,
+ 0x5f, 0x01, 0x17, 0x81, 0x45, 0x0a, 0xe3, 0xc0, 0x79, 0x96, 0xc9, 0xb3,
+ 0xba, 0x01, 0x4b, 0xf1, 0x45, 0x02, 0x1d, 0x40, 0x79, 0xbd, 0xc9, 0xb4,
+ 0xa4, 0x0f, 0xcc, 0x21, 0xd7, 0x20, 0xe3, 0x01, 0x33, 0x91, 0xc2, 0x00,
+ 0xe5, 0x01, 0x11, 0x53, 0x00, 0x79, 0xc9, 0x16, 0x40, 0x79, 0xcd, 0xc8,
+ 0xa3, 0xfa, 0x01, 0x1c, 0x61, 0xc5, 0xbd, 0x56, 0x01, 0x01, 0xf8, 0xc8,
+ 0x2f, 0x37, 0x0f, 0xb7, 0x41, 0xcc, 0x4c, 0x68, 0x0f, 0xa9, 0xe0, 0xd0,
+ 0x58, 0x4f, 0x01, 0x2f, 0x71, 0xcf, 0x6c, 0x4e, 0x01, 0x2f, 0x68, 0xc9,
+ 0xad, 0x0c, 0x01, 0x37, 0x89, 0xcf, 0x65, 0x0a, 0x01, 0x30, 0xa0, 0x03,
+ 0xc0, 0x79, 0xd9, 0xc4, 0x99, 0xfd, 0x08, 0x1c, 0x09, 0x09, 0xc0, 0x79,
+ 0xe5, 0x0d, 0xc0, 0x79, 0xf1, 0x06, 0xc0, 0x79, 0xfd, 0xc2, 0x03, 0x76,
+ 0x08, 0x1c, 0x2b, 0x00, 0x7a, 0x09, 0xc2, 0x0a, 0x20, 0x08, 0x1c, 0x31,
+ 0x1c, 0xc0, 0x7a, 0x0f, 0x16, 0xc0, 0x7a, 0x19, 0xc3, 0x4d, 0x48, 0x08,
+ 0x1c, 0x51, 0x15, 0xc0, 0x7a, 0x29, 0xc5, 0xdc, 0xde, 0x08, 0x1c, 0x69,
+ 0xc3, 0x04, 0xae, 0x08, 0x1c, 0x71, 0xc3, 0x21, 0x00, 0x08, 0x1c, 0x81,
+ 0xc2, 0x0b, 0xfc, 0x08, 0x1c, 0xa1, 0xc4, 0xe8, 0x77, 0x08, 0x1c, 0xb1,
+ 0xc5, 0xd9, 0xf5, 0x08, 0x1c, 0xb9, 0x8b, 0x08, 0x1c, 0xd9, 0x97, 0x08,
+ 0x1c, 0xe0, 0x43, 0x11, 0x90, 0xc0, 0x7a, 0x39, 0x06, 0xc0, 0x7a, 0x95,
+ 0x43, 0x0f, 0x5f, 0xc0, 0x7a, 0xa4, 0xd0, 0x5b, 0xff, 0x0f, 0xb2, 0x48,
+ 0xc7, 0xca, 0xaf, 0x0f, 0xb4, 0x09, 0x0f, 0xc0, 0x7a, 0xb6, 0xd7, 0x2a,
+ 0x34, 0x01, 0x5f, 0xf8, 0x14, 0xc0, 0x7a, 0xc2, 0x0a, 0xc0, 0x7b, 0x64,
+ 0x10, 0xc0, 0x7c, 0x33, 0x0d, 0xc0, 0x7c, 0xf3, 0xc3, 0xec, 0xa5, 0x0d,
+ 0x80, 0xb1, 0xc3, 0xec, 0xa8, 0x0d, 0x80, 0xa9, 0xc3, 0xec, 0xab, 0x0d,
+ 0x80, 0xa1, 0xc3, 0x82, 0xb0, 0x0d, 0x80, 0x99, 0xc3, 0x3b, 0x0b, 0x0d,
+ 0x80, 0x91, 0xc3, 0x82, 0xe0, 0x0d, 0x80, 0x89, 0xc3, 0x82, 0xec, 0x0d,
+ 0x80, 0x81, 0xc3, 0x82, 0xa4, 0x0d, 0x80, 0x79, 0xc3, 0xea, 0xe3, 0x0d,
+ 0x80, 0x71, 0xc3, 0xea, 0xf5, 0x0d, 0x80, 0x69, 0xc3, 0xea, 0xf8, 0x0d,
+ 0x80, 0x61, 0xc3, 0xea, 0xfb, 0x0d, 0x80, 0x59, 0xc3, 0xea, 0xfe, 0x0d,
+ 0x80, 0x51, 0xc3, 0xed, 0xa1, 0x0d, 0x80, 0x49, 0xc3, 0xed, 0xaa, 0x0d,
+ 0x80, 0x41, 0xc3, 0xed, 0xa7, 0x0d, 0x80, 0x39, 0xc3, 0xed, 0xa4, 0x0d,
+ 0x80, 0x31, 0xc4, 0xe9, 0xd7, 0x0d, 0x80, 0x29, 0xc3, 0xeb, 0x3d, 0x0d,
+ 0x80, 0x21, 0xc3, 0xeb, 0x40, 0x0d, 0x80, 0x19, 0xc3, 0xeb, 0x43, 0x0d,
+ 0x80, 0x11, 0xc3, 0x83, 0x28, 0x0d, 0x80, 0x09, 0x46, 0xd2, 0x5c, 0xc0,
+ 0x7d, 0xc8, 0x19, 0xc0, 0x7d, 0xd4, 0x1b, 0xc0, 0x7e, 0x52, 0x0f, 0xc0,
+ 0x7e, 0xb2, 0x16, 0xc0, 0x7f, 0x54, 0x15, 0x40, 0x80, 0x14, 0xd7, 0x2b,
+ 0x48, 0x01, 0x15, 0xc9, 0x84, 0x0f, 0x99, 0xf8, 0x0e, 0xc0, 0x80, 0xe0,
+ 0x12, 0xc0, 0x80, 0xec, 0xcc, 0x8b, 0x98, 0x00, 0x2f, 0x79, 0x45, 0x00,
+ 0xfa, 0xc0, 0x80, 0xf8, 0x47, 0x27, 0xe9, 0x40, 0x81, 0x0a, 0x16, 0xc0,
+ 0x81, 0x54, 0x06, 0xc0, 0x81, 0x60, 0xce, 0x72, 0x0e, 0x02, 0x6e, 0x19,
+ 0x19, 0xc0, 0x81, 0x74, 0x42, 0x00, 0xea, 0xc0, 0x81, 0x80, 0xd0, 0x5c,
+ 0x0f, 0x02, 0x6e, 0x39, 0x15, 0xc0, 0x81, 0x8a, 0x12, 0xc0, 0x81, 0x9c,
+ 0x08, 0xc0, 0x81, 0xae, 0x09, 0xc0, 0x81, 0xba, 0x42, 0x01, 0xc2, 0xc0,
+ 0x81, 0xc4, 0xca, 0xa9, 0xac, 0x02, 0x6e, 0x79, 0x03, 0xc0, 0x81, 0xd0,
+ 0x04, 0xc0, 0x81, 0xe2, 0x42, 0x01, 0xa5, 0xc0, 0x81, 0xf4, 0x42, 0x01,
+ 0x5b, 0xc0, 0x81, 0xfe, 0x11, 0xc0, 0x82, 0x0e, 0xca, 0xa7, 0x5e, 0x02,
+ 0x6f, 0xd8, 0x48, 0x03, 0x79, 0xc0, 0x82, 0x1a, 0xc2, 0x00, 0xa7, 0x0f,
+ 0xa0, 0x72, 0x00, 0x82, 0x40, 0x00, 0xc0, 0x82, 0x44, 0xc2, 0x00, 0x35,
+ 0x0f, 0x9f, 0x40, 0xc6, 0xce, 0xbc, 0x01, 0x18, 0xdb, 0x00, 0x82, 0x5c,
+ 0xc2, 0x00, 0xa7, 0x01, 0x18, 0x12, 0x00, 0x82, 0x62, 0xd9, 0x1e, 0xa2,
+ 0x0f, 0xb3, 0x43, 0x00, 0x82, 0x66, 0x87, 0x0f, 0xab, 0x98, 0xc4, 0x49,
+ 0xaf, 0x0f, 0x9b, 0x79, 0xc3, 0xae, 0xd7, 0x0f, 0xa0, 0xe8, 0x00, 0xc0,
+ 0x82, 0x6c, 0xc3, 0x25, 0x99, 0x0f, 0xa4, 0x38, 0x15, 0xc0, 0x82, 0x78,
+ 0xc3, 0x2d, 0x07, 0x0f, 0xa9, 0x43, 0x00, 0x82, 0x82, 0xc6, 0xd2, 0xf8,
+ 0x0f, 0x9a, 0xa0, 0x06, 0xc0, 0x82, 0x88, 0x4d, 0x7f, 0x5e, 0xc0, 0x82,
+ 0x9a, 0x45, 0xdc, 0x52, 0xc0, 0x82, 0xb8, 0x09, 0x40, 0x82, 0xca, 0x44,
+ 0xcb, 0x1a, 0xc0, 0x82, 0xd6, 0xcb, 0x98, 0xc9, 0x0f, 0xa1, 0x18, 0x4c,
+ 0x1b, 0x6e, 0xc0, 0x82, 0xe2, 0x44, 0x00, 0xcd, 0xc0, 0x82, 0xee, 0x45,
+ 0x00, 0x47, 0xc0, 0x82, 0xfa, 0x47, 0xbe, 0x7a, 0xc0, 0x83, 0x06, 0x47,
+ 0xc5, 0x1b, 0xc0, 0x83, 0x12, 0xd4, 0x38, 0xca, 0x07, 0xff, 0x41, 0xcd,
+ 0x1b, 0xc9, 0x07, 0xff, 0x51, 0xcf, 0x13, 0x43, 0x07, 0xff, 0x61, 0xcc,
+ 0x0c, 0x96, 0x07, 0xff, 0x69, 0xcc, 0x0c, 0x86, 0x07, 0xff, 0x70, 0xcd,
+ 0x78, 0x01, 0x0f, 0xb4, 0x23, 0x00, 0x83, 0x1e, 0x42, 0x01, 0xaf, 0xc0,
+ 0x83, 0x24, 0xcd, 0x78, 0x35, 0x0f, 0xa3, 0x00, 0x47, 0x01, 0xff, 0xc0,
+ 0x83, 0x30, 0xce, 0x1b, 0x7a, 0x01, 0x84, 0xe9, 0xd5, 0x37, 0x00, 0x01,
+ 0x84, 0xf1, 0xcc, 0x84, 0x78, 0x01, 0x84, 0xf8, 0x42, 0x00, 0x5b, 0xc0,
+ 0x83, 0x88, 0xc9, 0xb3, 0xd5, 0x01, 0x70, 0x90, 0x42, 0x00, 0x2a, 0xc0,
+ 0x83, 0x95, 0x47, 0xc9, 0xf9, 0x40, 0x83, 0xa1, 0x46, 0x01, 0xf1, 0xc0,
+ 0x83, 0xb3, 0xc7, 0x01, 0xb1, 0x0f, 0xa9, 0x19, 0xc7, 0xc5, 0xf4, 0x0f,
+ 0xa9, 0x10, 0x14, 0xc0, 0x83, 0xc5, 0xc4, 0x20, 0x0c, 0x01, 0x11, 0x5a,
+ 0x00, 0x83, 0xe4, 0xcd, 0x79, 0xfc, 0x01, 0x1c, 0x01, 0x4d, 0x78, 0xd1,
+ 0x40, 0x83, 0xe8, 0xc5, 0x6b, 0x95, 0x01, 0x10, 0xf3, 0x00, 0x83, 0xf4,
+ 0x49, 0x56, 0xcc, 0x40, 0x83, 0xfa, 0x42, 0x01, 0xa5, 0xc0, 0x84, 0x04,
+ 0x42, 0x00, 0xeb, 0x40, 0x84, 0x10, 0x0b, 0xc0, 0x84, 0x1c, 0xc2, 0x01,
+ 0x65, 0x00, 0x04, 0x22, 0x00, 0x84, 0x28, 0xd3, 0x42, 0xef, 0x01, 0x03,
+ 0x61, 0xd2, 0x4f, 0x7a, 0x01, 0x03, 0x50, 0xcd, 0x7b, 0x41, 0x0f, 0xd5,
+ 0x51, 0x44, 0x02, 0x60, 0x40, 0x84, 0x2e, 0x16, 0xc0, 0x84, 0x3d, 0x42,
+ 0x00, 0x06, 0xc0, 0x84, 0x49, 0xc5, 0x45, 0xa9, 0x01, 0x80, 0x01, 0x05,
+ 0xc0, 0x84, 0x55, 0xc9, 0x11, 0x47, 0x01, 0x80, 0x11, 0xce, 0x1b, 0x7a,
+ 0x01, 0x80, 0x29, 0xcb, 0x96, 0x77, 0x01, 0x80, 0x39, 0xcf, 0x66, 0x72,
+ 0x01, 0x81, 0x51, 0xd0, 0x5c, 0x8f, 0x01, 0x81, 0x59, 0xd2, 0x49, 0xec,
+ 0x01, 0x81, 0x69, 0xd3, 0x44, 0xb7, 0x01, 0x81, 0xf1, 0xcf, 0x69, 0x51,
+ 0x01, 0x81, 0xf9, 0x4b, 0x54, 0xc1, 0x40, 0x84, 0x61, 0xc4, 0x5a, 0x80,
+ 0x0f, 0x9b, 0x41, 0xc3, 0xb3, 0xa9, 0x0f, 0xce, 0x50, 0xda, 0x19, 0xce,
+ 0x01, 0x12, 0x98, 0x4e, 0x6d, 0x3e, 0x40, 0x84, 0x97, 0x8f, 0x0f, 0xd5,
+ 0x89, 0x42, 0x00, 0x3c, 0xc0, 0x84, 0xa9, 0xc6, 0xd7, 0xde, 0x0f, 0xaf,
+ 0xd1, 0xc9, 0xb0, 0x09, 0x0f, 0xb0, 0xf8, 0xc2, 0x00, 0x3a, 0x0f, 0xa3,
+ 0x4b, 0x00, 0x84, 0xb5, 0xca, 0xaa, 0x42, 0x0f, 0xb5, 0xd0, 0x42, 0x00,
+ 0xc0, 0xc0, 0x84, 0xc1, 0xdc, 0x13, 0xfa, 0x01, 0x3d, 0x98, 0xcc, 0x8c,
+ 0x70, 0x01, 0x33, 0xf9, 0xca, 0xa5, 0x2e, 0x01, 0x31, 0xc0, 0x46, 0x1a,
+ 0xf3, 0xc0, 0x85, 0x11, 0x46, 0x03, 0xdd, 0xc0, 0x85, 0x1d, 0x4a, 0x00,
+ 0x68, 0xc0, 0x85, 0x29, 0x4b, 0x00, 0x47, 0xc0, 0x85, 0x47, 0x4a, 0x04,
+ 0x68, 0xc0, 0x85, 0x65, 0x48, 0x02, 0x6a, 0x40, 0x85, 0x83, 0xca, 0x9f,
+ 0xca, 0x0f, 0xad, 0x71, 0xc4, 0x09, 0xde, 0x0f, 0xb6, 0xe0, 0x06, 0xc0,
+ 0x85, 0xa1, 0xc7, 0xce, 0xe5, 0x0f, 0x9b, 0xb9, 0xc9, 0xa3, 0xbd, 0x0f,
+ 0xb0, 0x49, 0x89, 0x0f, 0xd5, 0xe8, 0x42, 0x00, 0x2a, 0xc0, 0x85, 0xab,
+ 0xc2, 0x11, 0x3f, 0x01, 0x18, 0xd0, 0x44, 0xd6, 0x82, 0xc0, 0x85, 0xb5,
+ 0x44, 0x01, 0x5e, 0x40, 0x85, 0xcd, 0x49, 0xb8, 0x31, 0xc0, 0x85, 0xd9,
+ 0xc9, 0xb2, 0xbe, 0x01, 0x35, 0x00, 0x42, 0x00, 0x32, 0xc0, 0x85, 0xf7,
+ 0x44, 0x01, 0x5e, 0xc0, 0x86, 0x07, 0x42, 0x00, 0xc0, 0x40, 0x86, 0x19,
+ 0xd3, 0x40, 0x56, 0x0f, 0x98, 0xa1, 0xd4, 0x3e, 0xba, 0x0f, 0x98, 0x90,
+ 0xda, 0x13, 0xc4, 0x01, 0x3d, 0xe1, 0xc4, 0x0a, 0x30, 0x0f, 0xa4, 0x90,
+ 0xda, 0x1c, 0x72, 0x01, 0x08, 0xc1, 0xca, 0x9e, 0xb2, 0x0f, 0x9e, 0x58,
+ 0xc4, 0x01, 0xa7, 0x0f, 0xb1, 0x49, 0xc8, 0x1a, 0x50, 0x0f, 0xb2, 0x00,
+ 0xcb, 0x99, 0x21, 0x01, 0x12, 0x01, 0xc3, 0x1d, 0xdc, 0x0f, 0xa9, 0x39,
+ 0xc6, 0xd9, 0x5e, 0x0f, 0xc9, 0xe0, 0x44, 0x01, 0x5e, 0x40, 0x86, 0x25,
+ 0xc2, 0x00, 0x98, 0x0f, 0xd4, 0x41, 0xc9, 0x8a, 0x03, 0x0f, 0xb1, 0x98,
+ 0xc5, 0xe0, 0xb2, 0x0f, 0xcd, 0x49, 0x16, 0xc0, 0x86, 0x37, 0xc9, 0xb6,
+ 0x42, 0x01, 0x37, 0x98, 0xc9, 0x19, 0x70, 0x01, 0x3b, 0x31, 0xc3, 0x00,
+ 0xc9, 0x01, 0x34, 0xc3, 0x00, 0x86, 0x49, 0xc8, 0x35, 0xbd, 0x0f, 0xa5,
+ 0xf0, 0xc9, 0xad, 0x4b, 0x01, 0x34, 0xe1, 0xca, 0xa8, 0x08, 0x0f, 0xa5,
+ 0x50, 0xcc, 0x8c, 0x34, 0x0f, 0xd5, 0x69, 0xc2, 0x00, 0x2a, 0x0f, 0xae,
+ 0x00, 0x14, 0xc0, 0x86, 0x4f, 0xc5, 0x01, 0xca, 0x01, 0x37, 0x91, 0xce,
+ 0x6f, 0x60, 0x01, 0x00, 0x28, 0xc3, 0x97, 0x83, 0x01, 0x15, 0x49, 0xc4,
+ 0x66, 0x0b, 0x01, 0x10, 0x01, 0x0d, 0xc0, 0x86, 0x59, 0xc6, 0xb9, 0x65,
+ 0x00, 0x00, 0x61, 0xcb, 0x94, 0x04, 0x0f, 0xcb, 0x00, 0xc6, 0xbc, 0x4d,
+ 0x0f, 0xa3, 0x18, 0xc2, 0x2c, 0x65, 0x0f, 0x98, 0x08, 0x42, 0x00, 0xc0,
+ 0xc0, 0x86, 0x6e, 0xcb, 0x93, 0x3e, 0x01, 0x09, 0xd9, 0xc4, 0x57, 0x3f,
+ 0x0f, 0x9f, 0x68, 0xc7, 0x41, 0xde, 0x0f, 0xa7, 0x01, 0xc4, 0xdb, 0x81,
+ 0x0f, 0xad, 0xb8, 0x0e, 0xc0, 0x86, 0x90, 0xc4, 0xe9, 0x7f, 0x0f, 0xce,
+ 0x30, 0xca, 0x90, 0xd7, 0x0f, 0xcb, 0xb1, 0x46, 0xd2, 0x7a, 0x40, 0x86,
+ 0x9c, 0x10, 0xc0, 0x86, 0xa8, 0xc2, 0x00, 0x6a, 0x01, 0x19, 0x13, 0x00,
+ 0x86, 0xb4, 0xc6, 0x22, 0xf6, 0x0f, 0xa1, 0xc0, 0x46, 0x14, 0x3a, 0xc0,
+ 0x86, 0xba, 0x48, 0x59, 0xfd, 0x40, 0x86, 0xc6, 0x00, 0xc0, 0x86, 0xd8,
+ 0x46, 0x47, 0xb8, 0x40, 0x86, 0xf0, 0xc8, 0xbb, 0x5b, 0x01, 0x35, 0x89,
+ 0xd1, 0x52, 0x68, 0x01, 0x03, 0x08, 0x9b, 0x01, 0x37, 0xa1, 0xc8, 0xb9,
+ 0x23, 0x0f, 0x9d, 0x08, 0xc8, 0x19, 0xc6, 0x01, 0x32, 0x01, 0xd7, 0x28,
+ 0x96, 0x00, 0x05, 0x50, 0xc9, 0xab, 0xd1, 0x0f, 0xb1, 0x41, 0xc4, 0x13,
+ 0xff, 0x0f, 0xd5, 0xb0, 0x43, 0x15, 0x08, 0xc0, 0x87, 0x50, 0x87, 0x0f,
+ 0xa9, 0x2a, 0x00, 0x87, 0x65, 0x8a, 0x0f, 0xa0, 0xfb, 0x00, 0x87, 0x77,
+ 0xcd, 0x78, 0x76, 0x0f, 0xa2, 0x50, 0xcb, 0x0b, 0xfc, 0x01, 0x02, 0xc9,
+ 0xc4, 0x00, 0xfa, 0x01, 0x71, 0x68, 0xc4, 0x0d, 0xc7, 0x01, 0x00, 0x91,
+ 0xc5, 0x45, 0xa9, 0x01, 0x00, 0x38, 0x42, 0x00, 0xc0, 0xc0, 0x87, 0x89,
+ 0x42, 0x01, 0x0b, 0x40, 0x87, 0x9b, 0xc3, 0x13, 0x62, 0x0f, 0xd5, 0x79,
+ 0x48, 0xbf, 0x6b, 0x40, 0x87, 0xa7, 0x4c, 0x8c, 0x7c, 0xc0, 0x87, 0xcf,
+ 0xc6, 0x94, 0xb9, 0x0b, 0x7f, 0x20, 0x46, 0x08, 0xd7, 0xc0, 0x87, 0xd7,
+ 0x45, 0x02, 0xcb, 0xc0, 0x87, 0xfb, 0x4b, 0x6f, 0x71, 0xc0, 0x88, 0x0d,
+ 0x47, 0x01, 0xff, 0x40, 0x88, 0x27, 0x4b, 0x6f, 0x71, 0xc0, 0x88, 0x8e,
+ 0x47, 0x01, 0xff, 0xc0, 0x88, 0xab, 0x15, 0xc0, 0x89, 0x12, 0xd1, 0x56,
+ 0x0f, 0x08, 0x91, 0xe9, 0x06, 0xc0, 0x89, 0x1e, 0xce, 0x6d, 0x68, 0x08,
+ 0x91, 0xd0, 0x15, 0xc0, 0x89, 0x2a, 0x46, 0x08, 0xd7, 0xc0, 0x89, 0x36,
+ 0xd4, 0x39, 0xf6, 0x00, 0xbe, 0xd9, 0x46, 0x33, 0x45, 0xc0, 0x89, 0x5a,
+ 0x52, 0x4e, 0xb4, 0xc0, 0x89, 0x66, 0x47, 0x01, 0xff, 0x40, 0x89, 0x7c,
+ 0xc7, 0xce, 0x13, 0x0f, 0xa8, 0xf9, 0xc5, 0x4a, 0xe3, 0x01, 0x19, 0x42,
+ 0x00, 0x89, 0xc6, 0xc4, 0x24, 0x35, 0x0e, 0x96, 0x4b, 0x00, 0x89, 0xcc,
+ 0x07, 0xc0, 0x89, 0xd2, 0x15, 0xc0, 0x89, 0xe1, 0x08, 0xc0, 0x89, 0xf3,
+ 0x16, 0xc0, 0x8a, 0x00, 0xc3, 0x05, 0x17, 0x0e, 0x96, 0x09, 0xc4, 0x16,
+ 0x57, 0x0e, 0x96, 0x00, 0x42, 0x00, 0xee, 0xc0, 0x8a, 0x0e, 0x4b, 0x0c,
+ 0x26, 0x40, 0x8a, 0x2c, 0x06, 0xc0, 0x8a, 0x44, 0x4c, 0x11, 0x33, 0xc0,
+ 0x8a, 0x56, 0xc9, 0xb4, 0x0b, 0x08, 0xfa, 0x11, 0x45, 0x02, 0xcb, 0xc0,
+ 0x8a, 0x74, 0x4b, 0x6f, 0x71, 0xc0, 0x8a, 0x92, 0x47, 0x01, 0xff, 0x40,
+ 0x8a, 0xb1, 0xcb, 0x96, 0x82, 0x08, 0x85, 0xeb, 0x00, 0x8b, 0x20, 0x4b,
+ 0x6f, 0x71, 0xc0, 0x8b, 0x26, 0x06, 0xc0, 0x8b, 0x46, 0x15, 0xc0, 0x8b,
+ 0x52, 0xd0, 0x5e, 0x8f, 0x08, 0x85, 0xe1, 0xd1, 0x56, 0x0f, 0x08, 0x85,
+ 0xd9, 0x47, 0x01, 0xff, 0x40, 0x8b, 0x5e, 0x45, 0x02, 0xcb, 0xc0, 0x8b,
+ 0xc5, 0x45, 0x2a, 0xe3, 0xc0, 0x8b, 0xd1, 0x46, 0x33, 0x45, 0xc0, 0x8b,
+ 0xe0, 0x47, 0x01, 0xff, 0xc0, 0x8b, 0xf2, 0x46, 0x08, 0xd7, 0x40, 0x8c,
+ 0x58, 0x45, 0xe0, 0x1c, 0xc0, 0x8c, 0x7c, 0x09, 0x40, 0x8c, 0x9a, 0xc5,
+ 0x00, 0xf9, 0x0f, 0xa4, 0x59, 0x44, 0x01, 0x5e, 0x40, 0x8c, 0xa6, 0xc5,
+ 0x18, 0x25, 0x0f, 0xd5, 0x48, 0x46, 0x57, 0x77, 0xc0, 0x8c, 0xb5, 0xc6,
+ 0x46, 0xeb, 0x01, 0x05, 0x29, 0xc6, 0xd7, 0x90, 0x0f, 0x98, 0x60, 0x47,
+ 0x01, 0xff, 0xc0, 0x8c, 0xc1, 0x45, 0x2a, 0xe3, 0xc0, 0x8d, 0x1b, 0x4b,
+ 0x6f, 0x71, 0xc0, 0x8d, 0x33, 0x45, 0x02, 0xcb, 0x40, 0x8d, 0x7a, 0x00,
+ 0xc0, 0x8d, 0x8c, 0x11, 0x40, 0x8d, 0x98, 0xd8, 0x23, 0x9c, 0x01, 0x17,
+ 0x79, 0x44, 0x01, 0x8f, 0x40, 0x8d, 0xb0, 0x17, 0xc0, 0x8d, 0xbc, 0x0b,
+ 0xc0, 0x8d, 0xc8, 0x9b, 0x01, 0x4f, 0xf8, 0xc3, 0x0a, 0x2a, 0x0f, 0xcd,
+ 0xf1, 0xc3, 0x36, 0x56, 0x0f, 0xcd, 0xf8, 0x0b, 0xc0, 0x8d, 0xda, 0x49,
+ 0xb7, 0x62, 0x40, 0x8d, 0xe6, 0x91, 0x0f, 0xb4, 0x39, 0x45, 0x02, 0x5f,
+ 0x40, 0x8e, 0x06, 0x4b, 0x9b, 0x26, 0xc0, 0x8e, 0x22, 0xd7, 0x27, 0xb0,
+ 0x0f, 0xaa, 0x71, 0xc8, 0x40, 0x9a, 0x0f, 0xb5, 0xc8, 0xc4, 0x62, 0x2d,
+ 0x01, 0x31, 0xf9, 0x46, 0xd0, 0xd0, 0xc0, 0x8e, 0x34, 0xc6, 0x15, 0x83,
+ 0x0f, 0xce, 0xe0, 0x46, 0xd8, 0xb0, 0xc0, 0x8e, 0x40, 0xc9, 0xaf, 0x5e,
+ 0x0f, 0x9a, 0xb0, 0x46, 0x08, 0xd7, 0xc0, 0x8e, 0x55, 0x03, 0xc0, 0x8e,
+ 0x79, 0x18, 0xc0, 0x8e, 0x8b, 0x0e, 0xc0, 0x8e, 0x97, 0xd4, 0x3d, 0x66,
+ 0x05, 0x57, 0xa1, 0xd8, 0x21, 0xd4, 0x05, 0x57, 0x99, 0x46, 0xd6, 0x1c,
+ 0x40, 0x8e, 0xa3, 0xc2, 0x00, 0xe5, 0x0f, 0x9a, 0x41, 0xc9, 0x88, 0x83,
+ 0x0f, 0xd7, 0x00, 0x42, 0x00, 0xff, 0xc0, 0x8e, 0xaf, 0xcd, 0x74, 0x23,
+ 0x0f, 0xc9, 0xb0, 0x42, 0x00, 0xb5, 0xc0, 0x8e, 0xbf, 0xc2, 0x01, 0x47,
+ 0x0f, 0xa2, 0x21, 0xc2, 0x00, 0xa7, 0x0f, 0xa0, 0x0a, 0x00, 0x8e, 0xce,
+ 0x0b, 0xc0, 0x8e, 0xd2, 0x07, 0xc0, 0x8e, 0xdc, 0xcb, 0x98, 0x92, 0x01,
+ 0x50, 0x50, 0x11, 0xc0, 0x8e, 0xe8, 0x47, 0xc9, 0x58, 0xc0, 0x8e, 0xfa,
+ 0x07, 0xc0, 0x8f, 0x49, 0xc3, 0x1e, 0x77, 0x0f, 0xa0, 0x92, 0x00, 0x8f,
+ 0x55, 0xc8, 0xc2, 0x53, 0x0f, 0xaf, 0x81, 0x42, 0x00, 0xf8, 0xc0, 0x8f,
+ 0x5b, 0xc2, 0x00, 0xa7, 0x0f, 0xd6, 0x70, 0x87, 0x0f, 0xaa, 0x61, 0xc3,
+ 0x5c, 0x80, 0x0f, 0xcc, 0xf8, 0x00, 0x40, 0x8f, 0x67, 0x4a, 0x4a, 0xc7,
+ 0xc0, 0x8f, 0x73, 0xc7, 0xcd, 0xb1, 0x0f, 0xce, 0x48, 0xc4, 0x24, 0x35,
+ 0x0e, 0x97, 0x4b, 0x00, 0x8f, 0x9f, 0x07, 0xc0, 0x8f, 0xa5, 0x15, 0xc0,
+ 0x8f, 0xb4, 0x08, 0xc0, 0x8f, 0xc6, 0x16, 0xc0, 0x8f, 0xd3, 0xc3, 0x05,
+ 0x17, 0x0e, 0x97, 0x09, 0xc4, 0x16, 0x57, 0x0e, 0x97, 0x00, 0x45, 0xdd,
+ 0x79, 0xc0, 0x8f, 0xe1, 0xc5, 0xe1, 0x5c, 0x0f, 0xbb, 0xe0, 0x4b, 0x9b,
+ 0xb5, 0xc0, 0x8f, 0xff, 0xcd, 0x80, 0xfe, 0x0f, 0x8d, 0x69, 0xd8, 0x23,
+ 0xfc, 0x00, 0x05, 0xd1, 0xc6, 0xcb, 0x58, 0x01, 0x81, 0xe0, 0x45, 0x43,
+ 0x2b, 0xc0, 0x90, 0x19, 0xcc, 0x89, 0xc4, 0x01, 0x35, 0x69, 0xd1, 0x51,
+ 0x8b, 0x0f, 0xca, 0x59, 0xc4, 0xc1, 0x2a, 0x0f, 0xd4, 0x38, 0xca, 0xa1,
+ 0xb4, 0x01, 0x39, 0x01, 0x42, 0x00, 0xc0, 0xc0, 0x90, 0x35, 0x47, 0xb4,
+ 0x04, 0x40, 0x90, 0x47, 0xd6, 0x30, 0xcb, 0x01, 0x37, 0x79, 0xc7, 0xc4,
+ 0xc0, 0x0f, 0x9a, 0x08, 0xc7, 0x68, 0x2d, 0x01, 0x05, 0xe1, 0x48, 0xbe,
+ 0xab, 0xc0, 0x90, 0x6f, 0x00, 0xc0, 0x90, 0x8d, 0xce, 0x6d, 0xca, 0x0f,
+ 0xab, 0x81, 0x45, 0xde, 0x73, 0xc0, 0x90, 0xa5, 0xc2, 0x13, 0xf3, 0x0f,
+ 0xcb, 0x69, 0xce, 0x72, 0x00, 0x0f, 0xcd, 0xe9, 0xc6, 0xd0, 0x82, 0x0f,
+ 0xa2, 0xf0, 0x46, 0xd7, 0x24, 0xc0, 0x90, 0xc3, 0x4a, 0x9d, 0xc2, 0x40,
+ 0x90, 0xd1, 0x87, 0x0f, 0xce, 0xc9, 0xc3, 0x29, 0x2b, 0x0f, 0xcf, 0x91,
+ 0xc7, 0xc9, 0xcf, 0x0f, 0xd4, 0x20, 0x42, 0x00, 0x2d, 0xc0, 0x91, 0x17,
+ 0xc5, 0xdf, 0x77, 0x0f, 0x9a, 0x20, 0x0b, 0xc0, 0x91, 0x21, 0x44, 0x95,
+ 0x72, 0x40, 0x91, 0x36, 0xcc, 0x06, 0xfb, 0x01, 0x13, 0x59, 0xc9, 0x09,
+ 0xde, 0x01, 0x13, 0x50, 0xcb, 0x91, 0xff, 0x0b, 0x53, 0x79, 0xc4, 0x1c,
+ 0xb3, 0x0b, 0x53, 0x71, 0x45, 0x08, 0xd8, 0x40, 0x91, 0x42, 0x16, 0xc0,
+ 0x91, 0x66, 0x14, 0xc0, 0x91, 0x76, 0x42, 0x01, 0x0e, 0xc0, 0x91, 0x7e,
+ 0xc2, 0x00, 0x96, 0x0b, 0x52, 0xdb, 0x00, 0x91, 0x86, 0x0d, 0xc0, 0x91,
+ 0x8a, 0x87, 0x0b, 0x52, 0xc3, 0x00, 0x91, 0x9a, 0xc2, 0x00, 0x4c, 0x0b,
+ 0x52, 0xb9, 0xc3, 0x03, 0xae, 0x0b, 0x52, 0xa1, 0x91, 0x0b, 0x52, 0x93,
+ 0x00, 0x91, 0x9e, 0x12, 0xc0, 0x91, 0xa6, 0x10, 0xc0, 0x91, 0xb0, 0x0f,
+ 0xc0, 0x91, 0xbc, 0xc3, 0x2d, 0x66, 0x0b, 0x52, 0x59, 0xc2, 0x0c, 0x25,
+ 0x0b, 0x52, 0x2b, 0x00, 0x91, 0xc8, 0x83, 0x0b, 0x52, 0x31, 0xc2, 0x02,
+ 0x1d, 0x0b, 0x52, 0x21, 0xc2, 0x47, 0x43, 0x0b, 0x52, 0x10, 0x44, 0x02,
+ 0xcc, 0xc0, 0x91, 0xcc, 0x46, 0x11, 0xf1, 0xc0, 0x92, 0x04, 0x4a, 0x9f,
+ 0x5c, 0x40, 0x92, 0x20, 0x46, 0x02, 0x00, 0xc0, 0x92, 0x44, 0x4f, 0x66,
+ 0x54, 0x40, 0x92, 0xae, 0xd4, 0x3f, 0x96, 0x05, 0x53, 0x81, 0xd2, 0x4b,
+ 0x8a, 0x05, 0x4f, 0x30, 0x4f, 0x68, 0x61, 0xc0, 0x92, 0xc0, 0x54, 0x3c,
+ 0xc6, 0x40, 0x92, 0xe4, 0xc7, 0xcc, 0xae, 0x00, 0x81, 0x59, 0x03, 0xc0,
+ 0x92, 0xf0, 0x8b, 0x00, 0x81, 0x6b, 0x00, 0x92, 0xfb, 0x97, 0x00, 0x81,
+ 0x7b, 0x00, 0x92, 0xff, 0x87, 0x00, 0x81, 0x8b, 0x00, 0x93, 0x03, 0x44,
+ 0xba, 0xeb, 0xc0, 0x93, 0x09, 0x48, 0xb7, 0xd7, 0xc0, 0x93, 0x13, 0x15,
+ 0xc0, 0x93, 0x21, 0x52, 0x32, 0xe6, 0xc0, 0x93, 0x2d, 0xcc, 0x83, 0x34,
+ 0x00, 0x83, 0x89, 0x46, 0xd9, 0x0a, 0x40, 0x93, 0x39, 0x0f, 0xc0, 0x93,
+ 0x49, 0xce, 0x75, 0xaa, 0x00, 0x84, 0x10, 0xc4, 0x16, 0x57, 0x00, 0x82,
+ 0x01, 0xc3, 0x05, 0x17, 0x00, 0x82, 0x09, 0x16, 0xc0, 0x93, 0x55, 0x08,
+ 0xc0, 0x93, 0x61, 0x15, 0xc0, 0x93, 0x6d, 0xc5, 0x05, 0x1b, 0x00, 0x82,
+ 0x41, 0xc4, 0x24, 0x35, 0x00, 0x82, 0x48, 0x16, 0xc0, 0x93, 0x79, 0xc3,
+ 0x6a, 0x22, 0x01, 0x5e, 0xe0, 0x44, 0x00, 0x9a, 0xc0, 0x93, 0x85, 0xc2,
+ 0x00, 0x2a, 0x01, 0x35, 0x90, 0xc6, 0x6c, 0xd6, 0x0f, 0xa7, 0x81, 0x42,
+ 0x00, 0xee, 0xc0, 0x93, 0x91, 0x00, 0xc0, 0x93, 0xc9, 0x45, 0x03, 0x5d,
+ 0x40, 0x93, 0xe1, 0x44, 0x0d, 0x8f, 0xc0, 0x93, 0xed, 0x4d, 0x76, 0xf0,
+ 0x40, 0x94, 0x05, 0xc9, 0x26, 0xef, 0x01, 0x5e, 0x48, 0xc4, 0xa2, 0x46,
+ 0x01, 0x1c, 0xc1, 0xc4, 0x03, 0x5d, 0x00, 0x04, 0x28, 0x03, 0xc0, 0x94,
+ 0x0b, 0x51, 0x56, 0xfd, 0xc0, 0x94, 0x17, 0x4e, 0x70, 0x86, 0x40, 0x94,
+ 0x23, 0x46, 0x08, 0xd7, 0xc0, 0x94, 0x2f, 0x45, 0x02, 0xcb, 0xc0, 0x94,
+ 0x53, 0x47, 0x01, 0xff, 0x40, 0x94, 0x71, 0x48, 0xba, 0xe3, 0x40, 0x94,
+ 0x94, 0xc2, 0x00, 0x96, 0x01, 0x10, 0x39, 0x47, 0xcb, 0x2d, 0x40, 0x94,
+ 0xac, 0xc7, 0x81, 0xa0, 0x01, 0x05, 0x31, 0xc8, 0xbb, 0x6b, 0x0f, 0xa4,
+ 0x28, 0xcc, 0x59, 0x43, 0x01, 0x03, 0x71, 0xc4, 0xb4, 0x43, 0x0f, 0x9e,
+ 0xf0, 0x42, 0x01, 0x47, 0xc0, 0x94, 0xbe, 0xcf, 0x4c, 0xe3, 0x01, 0x15,
+ 0x93, 0x00, 0x94, 0xc8, 0xcd, 0x7b, 0xb6, 0x01, 0x05, 0xd8, 0x45, 0x8a,
+ 0xb7, 0xc0, 0x94, 0xce, 0x00, 0xc0, 0x94, 0xde, 0x87, 0x0f, 0xae, 0x42,
+ 0x00, 0x95, 0x17, 0xd9, 0x1f, 0xce, 0x0f, 0xa8, 0xf1, 0xc5, 0x7c, 0x06,
+ 0x01, 0x36, 0xa3, 0x00, 0x95, 0x26, 0x12, 0xc0, 0x95, 0x2c, 0xcd, 0x7a,
+ 0x7e, 0x0f, 0xa7, 0xa9, 0x04, 0xc0, 0x95, 0x38, 0xce, 0x75, 0xfe, 0x0f,
+ 0xb5, 0x68, 0xd0, 0x59, 0x3f, 0x01, 0x03, 0x79, 0xc8, 0xbe, 0x63, 0x08,
+ 0x0c, 0x70, 0xcc, 0x89, 0xe8, 0x0f, 0x0a, 0x71, 0x46, 0x02, 0x00, 0x40,
+ 0x95, 0x44, 0xc4, 0x24, 0x35, 0x0f, 0x0a, 0x49, 0xc5, 0x05, 0x1b, 0x0f,
+ 0x0a, 0x41, 0x15, 0xc0, 0x95, 0xc6, 0x08, 0xc0, 0x95, 0xd2, 0x16, 0xc0,
+ 0x95, 0xde, 0xc3, 0x05, 0x17, 0x0f, 0x0a, 0x09, 0xc4, 0x16, 0x57, 0x0f,
+ 0x0a, 0x00, 0xd2, 0x4b, 0x30, 0x0f, 0x09, 0xe9, 0x44, 0x02, 0xcc, 0x40,
+ 0x95, 0xea, 0xc3, 0x39, 0x81, 0x0f, 0x09, 0xb1, 0xc3, 0x7e, 0x40, 0x0f,
+ 0x09, 0xa9, 0xc3, 0x3f, 0x73, 0x0f, 0x09, 0xa1, 0xc3, 0xeb, 0x1c, 0x0f,
+ 0x09, 0x99, 0xc3, 0xeb, 0x1f, 0x0f, 0x09, 0x91, 0xc3, 0x7f, 0xc6, 0x0f,
+ 0x09, 0x89, 0xc3, 0xe6, 0x7f, 0x0f, 0x09, 0x80, 0x4c, 0x8c, 0x88, 0xc0,
+ 0x95, 0xf6, 0xce, 0x1b, 0x7a, 0x0b, 0x7f, 0x08, 0x44, 0x04, 0xb1, 0xc0,
+ 0x95, 0xfe, 0xc8, 0xaf, 0xa7, 0x01, 0x08, 0xb0, 0xca, 0xa3, 0xc6, 0x00,
+ 0x3f, 0xf1, 0xc9, 0xac, 0xcd, 0x00, 0x3f, 0xe9, 0x45, 0x08, 0xd8, 0x40,
+ 0x96, 0x14, 0xc9, 0xb3, 0x8d, 0x00, 0x3f, 0xd1, 0xd2, 0x49, 0x6e, 0x00,
+ 0x3f, 0xa9, 0x46, 0x02, 0x00, 0x40, 0x96, 0x38, 0xc2, 0x00, 0x3f, 0x00,
+ 0x3f, 0xc1, 0x47, 0x10, 0xa4, 0x40, 0x96, 0xb8, 0xca, 0xa0, 0x56, 0x00,
+ 0x3f, 0xb9, 0xc9, 0xb0, 0x48, 0x00, 0x3f, 0xb0, 0xd4, 0x3f, 0x46, 0x0f,
+ 0xbe, 0xc9, 0xc2, 0x26, 0x94, 0x0f, 0xcb, 0xb8, 0xc7, 0xce, 0xf3, 0x0f,
+ 0xd3, 0x69, 0xc7, 0xc9, 0x35, 0x0f, 0xd3, 0x39, 0xc8, 0xba, 0x5b, 0x0f,
+ 0xd3, 0x41, 0xc8, 0xbf, 0x1b, 0x0f, 0xd3, 0x49, 0xc5, 0xa1, 0xff, 0x0f,
+ 0xd3, 0x51, 0x05, 0x40, 0x96, 0xd0, 0xc5, 0xa1, 0xff, 0x0f, 0xd3, 0x19,
+ 0xc7, 0xc9, 0x35, 0x0f, 0xd3, 0x01, 0xc8, 0xba, 0x5b, 0x0f, 0xd3, 0x09,
+ 0xc8, 0xbf, 0x1b, 0x0f, 0xd3, 0x11, 0x05, 0xc0, 0x96, 0xdc, 0xc7, 0xce,
+ 0xf3, 0x0f, 0xd3, 0x30, 0x4a, 0x9e, 0xd0, 0xc0, 0x96, 0xe8, 0x58, 0x24,
+ 0xd4, 0x40, 0x97, 0x00, 0xcc, 0x8c, 0xdc, 0x01, 0x1c, 0x19, 0x43, 0x15,
+ 0xab, 0x40, 0x97, 0x1e, 0xc4, 0x0d, 0xc7, 0x01, 0x00, 0xa1, 0xc5, 0x45,
+ 0xa9, 0x01, 0x00, 0x19, 0xc4, 0x0b, 0x19, 0x01, 0x00, 0x08, 0xc2, 0x01,
+ 0x47, 0x01, 0x32, 0x0b, 0x00, 0x97, 0x3a, 0x00, 0x40, 0x97, 0x40, 0x0f,
+ 0xc0, 0x97, 0x4c, 0x19, 0xc0, 0x97, 0x5f, 0x16, 0xc0, 0x97, 0x69, 0x0a,
+ 0xc0, 0x97, 0x73, 0x0e, 0xc0, 0x97, 0x85, 0x08, 0xc0, 0x97, 0x91, 0x07,
+ 0xc0, 0x97, 0x9b, 0x04, 0xc0, 0x97, 0xa5, 0x0b, 0xc0, 0x97, 0xb1, 0x11,
+ 0xc0, 0x97, 0xbb, 0x18, 0xc0, 0x97, 0xc5, 0x03, 0xc0, 0x97, 0xcf, 0x42,
+ 0x00, 0x1c, 0xc0, 0x97, 0xd9, 0x43, 0xeb, 0xb5, 0xc0, 0x97, 0xe1, 0x43,
+ 0xec, 0x18, 0xc0, 0x98, 0x04, 0x42, 0xed, 0xc9, 0xc0, 0x98, 0x2d, 0x42,
+ 0xed, 0xd5, 0xc0, 0x98, 0x41, 0x42, 0xeb, 0x12, 0xc0, 0x98, 0x55, 0x43,
+ 0xed, 0x05, 0xc0, 0x98, 0x61, 0x42, 0xed, 0xaf, 0xc0, 0x98, 0x7d, 0x10,
+ 0xc0, 0x98, 0x85, 0x43, 0xec, 0x78, 0xc0, 0x98, 0x95, 0x42, 0xed, 0xcb,
+ 0xc0, 0x98, 0xb5, 0x42, 0xbc, 0x22, 0xc0, 0x98, 0xcd, 0x42, 0xed, 0xc7,
+ 0x40, 0x98, 0xe9, 0x14, 0xc0, 0x98, 0xf5, 0x59, 0x10, 0xdf, 0x40, 0x99,
+ 0x01, 0xc3, 0x03, 0x47, 0x01, 0x11, 0xc9, 0x49, 0x0f, 0x65, 0x40, 0x99,
+ 0x25, 0x48, 0x13, 0x57, 0xc0, 0x99, 0x31, 0x07, 0x40, 0x99, 0x85, 0x0f,
+ 0xc0, 0x99, 0x91, 0xc3, 0x0c, 0x34, 0x00, 0x9b, 0x28, 0xcc, 0x89, 0x4c,
+ 0x00, 0x9b, 0x31, 0xd2, 0x46, 0x47, 0x00, 0x9b, 0x40, 0xc3, 0x05, 0x17,
+ 0x00, 0x9b, 0x49, 0x16, 0xc0, 0x99, 0x9d, 0x08, 0xc0, 0x99, 0xa9, 0x15,
+ 0xc0, 0x99, 0xb5, 0xc5, 0x05, 0x1b, 0x00, 0x9b, 0x81, 0xc4, 0x24, 0x35,
+ 0x00, 0x9b, 0x88, 0x16, 0xc0, 0x99, 0xc1, 0x08, 0xc0, 0x99, 0xd6, 0x15,
+ 0xc0, 0x99, 0xe2, 0xc6, 0xd1, 0xe4, 0x00, 0x9b, 0xc9, 0xc6, 0x29, 0x29,
+ 0x00, 0x9b, 0xd1, 0xc7, 0x0d, 0x7f, 0x00, 0x9b, 0xd8, 0xc5, 0xda, 0x9f,
+ 0x00, 0x9c, 0x81, 0x06, 0xc0, 0x99, 0xee, 0xc6, 0x87, 0xba, 0x00, 0x9c,
+ 0x91, 0xcc, 0x87, 0xb4, 0x00, 0x9c, 0x99, 0x0d, 0xc0, 0x99, 0xfa, 0xc6,
+ 0xd6, 0x94, 0x00, 0x9c, 0xb1, 0xc5, 0xcc, 0x8d, 0x00, 0x9c, 0xb8, 0xc7,
+ 0x86, 0xa5, 0x01, 0x10, 0x43, 0x00, 0x9a, 0x06, 0x45, 0xde, 0xaa, 0xc0,
+ 0x9a, 0x0a, 0xc5, 0xbf, 0x1e, 0x0f, 0xa0, 0xc1, 0xc5, 0xdb, 0xd0, 0x0f,
+ 0xb6, 0xb8, 0xd2, 0x4d, 0x3a, 0x08, 0x7f, 0xb1, 0x46, 0x02, 0x00, 0x40,
+ 0x9a, 0x14, 0x83, 0x08, 0x28, 0x01, 0xc2, 0x00, 0x34, 0x08, 0x28, 0x09,
+ 0x05, 0xc0, 0x9a, 0x77, 0x06, 0xc0, 0x9a, 0x81, 0x10, 0xc0, 0x9a, 0x8b,
+ 0x87, 0x08, 0x28, 0x43, 0x00, 0x9a, 0x9f, 0xc2, 0x13, 0xfc, 0x08, 0x28,
+ 0x49, 0x09, 0xc0, 0x9a, 0xa3, 0xc2, 0x00, 0x5b, 0x08, 0x28, 0x61, 0x8b,
+ 0x08, 0x28, 0x69, 0xc2, 0x1c, 0x3e, 0x08, 0x28, 0x71, 0x0d, 0xc0, 0x9a,
+ 0xb1, 0x0e, 0xc0, 0x9a, 0xbb, 0xc2, 0x01, 0x03, 0x08, 0x28, 0x91, 0x91,
+ 0x08, 0x28, 0xb1, 0xc2, 0x03, 0x07, 0x08, 0x28, 0xb9, 0xc2, 0x93, 0xaf,
+ 0x08, 0x28, 0xc1, 0x14, 0xc0, 0x9a, 0xc5, 0x15, 0xc0, 0x9a, 0xcf, 0x16,
+ 0xc0, 0x9a, 0xd9, 0x97, 0x08, 0x28, 0xf9, 0xc2, 0x00, 0x29, 0x08, 0x29,
+ 0x01, 0xc2, 0x01, 0x66, 0x08, 0x29, 0x09, 0x9b, 0x08, 0x29, 0x11, 0x1c,
+ 0x40, 0x9a, 0xe3, 0x42, 0x00, 0x98, 0xc0, 0x9a, 0xed, 0x12, 0xc0, 0x9a,
+ 0xf3, 0xcf, 0x18, 0x2e, 0x01, 0x39, 0x98, 0x46, 0x01, 0xab, 0x40, 0x9a,
+ 0xff, 0x43, 0x03, 0x73, 0xc0, 0x9b, 0x0b, 0xda, 0x19, 0xe8, 0x0f, 0xa8,
+ 0xd0, 0xc4, 0x0d, 0xc7, 0x01, 0x00, 0x99, 0xc5, 0x45, 0xa9, 0x01, 0x00,
+ 0x11, 0xc4, 0x0b, 0x19, 0x01, 0x00, 0x00, 0xc4, 0x00, 0xcd, 0x01, 0x19,
+ 0x59, 0xc5, 0x00, 0x47, 0x01, 0x19, 0x30, 0x46, 0x04, 0x91, 0xc0, 0x9b,
+ 0x2d, 0x46, 0x01, 0x31, 0x40, 0x9b, 0x3f, 0xc3, 0x05, 0x17, 0x01, 0x5f,
+ 0x81, 0xc3, 0x0a, 0x1f, 0x01, 0x5f, 0x88, 0x00, 0xc0, 0x9b, 0x51, 0x42,
+ 0x00, 0x27, 0x40, 0x9b, 0x5d, 0xca, 0xa6, 0x8c, 0x01, 0x12, 0xd1, 0x47,
+ 0x37, 0x21, 0x40, 0x9b, 0x72, 0x95, 0x01, 0x12, 0xc9, 0xc8, 0x1c, 0xb8,
+ 0x01, 0x09, 0x70, 0x11, 0xc0, 0x9b, 0x7e, 0xcf, 0x6b, 0x31, 0x01, 0x05,
+ 0x00, 0xc6, 0xd5, 0x38, 0x0f, 0xcd, 0x71, 0xc3, 0x09, 0x46, 0x0f, 0x9d,
+ 0xc0, 0x46, 0x08, 0xd7, 0xc0, 0x9b, 0x8a, 0xc2, 0x00, 0xeb, 0x08, 0xec,
+ 0xc1, 0x18, 0xc0, 0x9b, 0xae, 0x45, 0x02, 0xcb, 0xc0, 0x9b, 0xba, 0x47,
+ 0x01, 0xff, 0x40, 0x9b, 0xc6, 0xc8, 0x95, 0x72, 0x01, 0x05, 0x91, 0xc5,
+ 0xe0, 0xc6, 0x0f, 0xa4, 0x10, 0x45, 0x02, 0xcb, 0xc0, 0x9c, 0x33, 0x47,
+ 0x01, 0xff, 0xc0, 0x9c, 0x57, 0x4b, 0x6f, 0x71, 0xc0, 0x9c, 0xcc, 0x46,
+ 0x08, 0xd7, 0xc0, 0x9c, 0xea, 0xc5, 0xdd, 0xe7, 0x00, 0x53, 0x81, 0x03,
+ 0xc0, 0x9d, 0x0e, 0xc3, 0x08, 0xb0, 0x00, 0x53, 0x91, 0xc3, 0x06, 0x83,
+ 0x00, 0x53, 0x99, 0xc8, 0xb9, 0x0b, 0x00, 0x53, 0xa0, 0x45, 0x02, 0xcb,
+ 0xc0, 0x9d, 0x1a, 0x47, 0x01, 0xff, 0xc0, 0x9d, 0x3c, 0x46, 0x33, 0x45,
+ 0xc0, 0x9d, 0xa7, 0xc2, 0x00, 0xeb, 0x00, 0x56, 0x81, 0x46, 0x08, 0xd7,
+ 0xc0, 0x9d, 0xb3, 0xd1, 0x56, 0x0f, 0x00, 0x57, 0x81, 0xca, 0x79, 0xf2,
+ 0x00, 0x57, 0x88, 0x96, 0x0f, 0xa0, 0x81, 0xc5, 0xdd, 0x06, 0x0f, 0xca,
+ 0x28, 0xc4, 0xe7, 0x1b, 0x08, 0x19, 0x99, 0x03, 0xc0, 0x9d, 0xd7, 0xc8,
+ 0xc0, 0x6b, 0x08, 0x19, 0xa9, 0x0b, 0xc0, 0x9d, 0xe3, 0x0a, 0xc0, 0x9d,
+ 0xef, 0x16, 0xc0, 0x9d, 0xfb, 0xc3, 0x5d, 0x2e, 0x08, 0x19, 0xc9, 0xc5,
+ 0xdd, 0x15, 0x08, 0x19, 0xd1, 0xc5, 0xdc, 0xe3, 0x08, 0x19, 0xd9, 0xc5,
+ 0x85, 0xc8, 0x08, 0x19, 0xe1, 0x10, 0xc0, 0x9e, 0x07, 0xc3, 0xb1, 0x05,
+ 0x08, 0x19, 0xf1, 0xc4, 0xe4, 0x0a, 0x08, 0x19, 0xf9, 0xc8, 0xbd, 0x1b,
+ 0x08, 0x1a, 0x01, 0xc5, 0xda, 0x22, 0x08, 0x1a, 0x11, 0xc5, 0xe0, 0x80,
+ 0x08, 0x1a, 0x19, 0xc5, 0xe0, 0x8f, 0x08, 0x1a, 0x29, 0xc5, 0xde, 0xeb,
+ 0x08, 0x1a, 0x31, 0xc5, 0xd9, 0xd2, 0x08, 0x1a, 0x49, 0xc7, 0xcd, 0x95,
+ 0x08, 0x19, 0x89, 0xc4, 0xe6, 0xd3, 0x08, 0x19, 0x90, 0x07, 0xc0, 0x9e,
+ 0x13, 0x4a, 0x08, 0x4a, 0xc0, 0x9e, 0x1f, 0xc7, 0xc5, 0x37, 0x0f, 0xd3,
+ 0xb0, 0x45, 0xe2, 0x88, 0xc0, 0x9e, 0x46, 0xcb, 0x99, 0xbb, 0x0f, 0x9c,
+ 0x99, 0xc3, 0x28, 0xc3, 0x0f, 0x9a, 0x39, 0xc9, 0x1e, 0x67, 0x00, 0x03,
+ 0x00, 0x46, 0x96, 0x4d, 0xc0, 0x9e, 0x64, 0xcb, 0x93, 0xe3, 0x0f, 0xb1,
+ 0x60, 0xca, 0xa8, 0xa8, 0x0f, 0xa4, 0xb9, 0x43, 0x11, 0x8a, 0x40, 0x9e,
+ 0x73, 0x45, 0x01, 0xab, 0x40, 0x9e, 0x7f, 0xc3, 0x01, 0xcd, 0x01, 0x32,
+ 0x51, 0xc6, 0xb0, 0x6f, 0x0f, 0xa4, 0x70, 0x46, 0x4f, 0x7d, 0xc0, 0x9e,
+ 0x8b, 0x46, 0x95, 0xea, 0x40, 0x9e, 0x97, 0x8e, 0x0f, 0xa3, 0x3b, 0x00,
+ 0x9e, 0xb5, 0xc9, 0xb3, 0xc3, 0x0f, 0xcc, 0x90, 0xc9, 0xb7, 0x47, 0x0f,
+ 0x98, 0xf9, 0xd1, 0x55, 0x10, 0x0f, 0x98, 0x81, 0xc3, 0x39, 0x90, 0x0f,
+ 0xcf, 0x20, 0x48, 0x54, 0x35, 0xc0, 0x9e, 0xbb, 0xca, 0x9e, 0x44, 0x0f,
+ 0xca, 0xd8, 0xc4, 0xe5, 0x1b, 0x0f, 0xcd, 0x39, 0x42, 0x00, 0xc0, 0x40,
+ 0x9e, 0xc7, 0xc8, 0x28, 0x26, 0x01, 0x15, 0xb1, 0x43, 0x3a, 0xc9, 0x40,
+ 0x9e, 0xd3, 0xd0, 0x1f, 0x1f, 0x07, 0xe9, 0xf1, 0xd1, 0x1c, 0xda, 0x07,
+ 0xe9, 0xf8, 0x4d, 0x54, 0xdd, 0xc0, 0x9e, 0xfb, 0x47, 0x37, 0x49, 0xc0,
+ 0x9f, 0x07, 0xc8, 0xb9, 0xc3, 0x0f, 0x69, 0x71, 0x51, 0x51, 0x9c, 0x40,
+ 0x9f, 0x2e, 0xc4, 0xe4, 0x9f, 0x0f, 0xb4, 0xb1, 0xc3, 0x25, 0x04, 0x0f,
+ 0xb4, 0x69, 0xca, 0xa5, 0x60, 0x0f, 0xb4, 0xa1, 0xca, 0x9e, 0x76, 0x0f,
+ 0xb4, 0xc1, 0xcb, 0x95, 0x38, 0x0f, 0xb7, 0x88, 0x44, 0x0b, 0xf8, 0xc0,
+ 0x9f, 0x46, 0xcc, 0x8d, 0x90, 0x0f, 0xad, 0x78, 0xe0, 0x09, 0xa7, 0x0f,
+ 0xa8, 0xd8, 0x42, 0x00, 0xf8, 0xc0, 0x9f, 0x52, 0xdb, 0x18, 0xa9, 0x01,
+ 0x3d, 0xf1, 0x51, 0x57, 0xfc, 0xc0, 0x9f, 0x7a, 0xd5, 0x36, 0x2e, 0x00,
+ 0x04, 0xe8, 0xc6, 0xd0, 0x3a, 0x01, 0x19, 0x29, 0xc8, 0xc0, 0xd3, 0x0f,
+ 0xa5, 0xfa, 0x00, 0x9f, 0x86, 0x00, 0xc0, 0x9f, 0x8c, 0x43, 0x00, 0xca,
+ 0x40, 0x9f, 0xc2, 0x12, 0xc0, 0x9f, 0xd4, 0xc4, 0xe5, 0xef, 0x00, 0xe3,
+ 0xe9, 0xc5, 0xda, 0x18, 0x00, 0xe3, 0xd9, 0x0f, 0xc0, 0x9f, 0xe0, 0xd0,
+ 0x4a, 0x36, 0x00, 0xe3, 0xc9, 0x47, 0x01, 0xff, 0xc0, 0x9f, 0xec, 0x46,
+ 0x08, 0xd7, 0x40, 0xa0, 0x04, 0x46, 0x0d, 0xa5, 0xc0, 0xa0, 0x28, 0xc8,
+ 0xbf, 0x93, 0x0f, 0xa7, 0x20, 0x06, 0xc0, 0xa0, 0x40, 0x05, 0xc0, 0xa0,
+ 0x4c, 0xcf, 0x66, 0xea, 0x01, 0x22, 0x39, 0x04, 0xc0, 0xa0, 0x58, 0xcd,
+ 0x7e, 0x9b, 0x01, 0x22, 0x19, 0xc4, 0x4c, 0xce, 0x01, 0x22, 0x11, 0xc4,
+ 0x04, 0x63, 0x01, 0x22, 0x00, 0xc4, 0xe8, 0x8b, 0x0f, 0xa0, 0xc9, 0xcb,
+ 0x98, 0xf5, 0x0f, 0xb6, 0x88, 0x4e, 0x70, 0xcc, 0xc0, 0xa0, 0x6a, 0xc6,
+ 0x57, 0xf6, 0x01, 0x72, 0xe8, 0xc3, 0x03, 0x5e, 0x01, 0x01, 0xf1, 0xc2,
+ 0x01, 0xf8, 0x0f, 0xae, 0xba, 0x00, 0xa0, 0x76, 0xd5, 0x37, 0x54, 0x00,
+ 0xb4, 0xe1, 0xcc, 0x37, 0x5d, 0x00, 0xb4, 0xd9, 0x47, 0x01, 0xff, 0xc0,
+ 0xa0, 0x7c, 0xca, 0x9e, 0x08, 0x00, 0xb4, 0x00, 0x47, 0x01, 0xff, 0xc0,
+ 0xa0, 0xd6, 0x46, 0x08, 0xd7, 0x40, 0xa1, 0x59, 0x4f, 0x01, 0xf7, 0xc0,
+ 0xa1, 0x7d, 0x4d, 0x27, 0x71, 0x40, 0xa1, 0xe4, 0x12, 0xc0, 0xa2, 0x4b,
+ 0xc5, 0xe2, 0x7e, 0x0e, 0x7e, 0x11, 0x06, 0xc0, 0xa2, 0x5c, 0x11, 0xc0,
+ 0xa2, 0x72, 0x0d, 0xc0, 0xa2, 0x81, 0x15, 0xc0, 0xa2, 0x9f, 0xc6, 0xcf,
+ 0xe0, 0x0e, 0x7d, 0x3b, 0x00, 0xa2, 0xb2, 0x1c, 0xc0, 0xa2, 0xb6, 0xc4,
+ 0xe6, 0xa3, 0x0e, 0x7c, 0x19, 0x14, 0xc0, 0xa2, 0xc0, 0x42, 0x11, 0x3f,
+ 0xc0, 0xa2, 0xcc, 0x49, 0xb8, 0x1f, 0xc0, 0xa2, 0xd8, 0x4a, 0x9e, 0xc6,
+ 0x40, 0xa2, 0xf6, 0xc3, 0x24, 0x4e, 0x0e, 0x7a, 0x31, 0xc5, 0x80, 0xcc,
+ 0x0e, 0x7a, 0x29, 0xce, 0x72, 0xb6, 0x0e, 0x7a, 0x21, 0x44, 0x66, 0x72,
+ 0x40, 0xa3, 0x0c, 0xdb, 0x17, 0x4a, 0x0e, 0x7a, 0x09, 0x45, 0x00, 0xfa,
+ 0xc0, 0xa3, 0x18, 0xd7, 0x28, 0xdb, 0x0e, 0x79, 0xf1, 0x51, 0x57, 0x52,
+ 0x40, 0xa3, 0x6a, 0xc8, 0xc0, 0x4b, 0x08, 0xd2, 0x39, 0x44, 0x02, 0xcc,
+ 0x40, 0xa3, 0x7c, 0x46, 0x32, 0x95, 0xc0, 0xa3, 0x8e, 0x44, 0x27, 0x9b,
+ 0x40, 0xa3, 0x9a, 0xd6, 0x2e, 0xfd, 0x08, 0xd2, 0x29, 0xc9, 0x16, 0xa8,
+ 0x08, 0xd1, 0xf8, 0xca, 0xa5, 0xe2, 0x08, 0xd2, 0x21, 0xcb, 0x98, 0x9d,
+ 0x08, 0xd2, 0x19, 0xc4, 0x01, 0x0d, 0x08, 0xd2, 0x11, 0xc5, 0x33, 0x81,
+ 0x08, 0xd2, 0x08, 0x0d, 0xc0, 0xa3, 0xa6, 0xc2, 0x01, 0x0e, 0x08, 0xd1,
+ 0x89, 0x15, 0xc0, 0xa3, 0xb6, 0xc2, 0x06, 0x8c, 0x08, 0xd1, 0x69, 0xc2,
+ 0x00, 0x96, 0x08, 0xd1, 0x61, 0xc2, 0x00, 0x9a, 0x08, 0xd1, 0x59, 0xc2,
+ 0x1a, 0x36, 0x08, 0xd1, 0x51, 0xc2, 0x00, 0x02, 0x08, 0xd1, 0x49, 0x1c,
+ 0xc0, 0xa3, 0xc6, 0x06, 0xc0, 0xa3, 0xd0, 0x16, 0xc0, 0xa3, 0xe2, 0xc2,
+ 0x00, 0x3f, 0x08, 0xd1, 0x11, 0x04, 0xc0, 0xa3, 0xf4, 0x12, 0xc0, 0xa3,
+ 0xfe, 0x10, 0xc0, 0xa4, 0x08, 0xc2, 0x26, 0x94, 0x08, 0xd0, 0x91, 0x05,
+ 0xc0, 0xa4, 0x1e, 0x09, 0xc0, 0xa4, 0x28, 0x83, 0x08, 0xd0, 0x00, 0xcb,
+ 0x37, 0x9d, 0x08, 0xd0, 0x51, 0x45, 0x02, 0xcb, 0x40, 0xa4, 0x32, 0xd5,
+ 0x35, 0xef, 0x01, 0x51, 0xf1, 0x45, 0x01, 0x18, 0xc0, 0xa4, 0x52, 0xd4,
+ 0x3b, 0xfe, 0x01, 0x53, 0x28, 0x46, 0xd6, 0x22, 0xc0, 0xa4, 0x5e, 0xc3,
+ 0x2f, 0x41, 0x01, 0x4c, 0x08, 0xcf, 0x6a, 0x05, 0x01, 0x4c, 0x49, 0xcd,
+ 0x80, 0x3b, 0x01, 0x4c, 0x38, 0xc6, 0x5b, 0xd9, 0x01, 0x00, 0x69, 0x10,
+ 0xc0, 0xa4, 0x68, 0xc5, 0x45, 0xa9, 0x01, 0x00, 0x58, 0xcb, 0x99, 0x42,
+ 0x01, 0x37, 0xd9, 0xd3, 0x44, 0xdd, 0x0f, 0xa9, 0x81, 0xc6, 0xd5, 0x74,
+ 0x0f, 0xa3, 0xd1, 0xc4, 0xcd, 0x34, 0x0f, 0xa3, 0xc9, 0xcb, 0x91, 0x91,
+ 0x0f, 0x9f, 0x19, 0xc5, 0xb0, 0xc1, 0x0f, 0x9c, 0x71, 0xc6, 0xd3, 0xa0,
+ 0x0f, 0x9f, 0x79, 0xda, 0x1c, 0xa6, 0x01, 0x80, 0x20, 0x42, 0x07, 0x44,
+ 0xc0, 0xa4, 0x74, 0x42, 0x00, 0x58, 0xc0, 0xa4, 0x80, 0x46, 0x08, 0xd7,
+ 0xc0, 0xa4, 0x8c, 0xd3, 0x41, 0xe5, 0x05, 0x4e, 0x69, 0xcf, 0x63, 0x48,
+ 0x05, 0x4e, 0x11, 0x4f, 0x2e, 0x37, 0xc0, 0xa4, 0xb0, 0x4b, 0x6f, 0x71,
+ 0xc0, 0xa4, 0xc2, 0x45, 0x02, 0xcb, 0x40, 0xa4, 0xe4, 0x44, 0x0b, 0x1e,
+ 0xc0, 0xa4, 0xff, 0x45, 0x45, 0xff, 0x40, 0xa5, 0x0b, 0xd0, 0x0f, 0x62,
+ 0x01, 0x02, 0x41, 0xc4, 0x00, 0xfa, 0x00, 0x01, 0xf8, 0x49, 0x13, 0x56,
+ 0xc0, 0xa5, 0x17, 0x48, 0x9a, 0x8f, 0x40, 0xa5, 0x90, 0x47, 0x01, 0xff,
+ 0xc0, 0xa5, 0xe2, 0xd0, 0x58, 0xaf, 0x08, 0x75, 0x69, 0x4a, 0x57, 0x1f,
+ 0x40, 0xa6, 0x67, 0xc4, 0x21, 0x5e, 0x01, 0x50, 0x21, 0xc3, 0x05, 0xe3,
+ 0x01, 0x50, 0x18, 0xce, 0x71, 0xba, 0x01, 0x50, 0x31, 0xd5, 0x34, 0xf3,
+ 0x01, 0x50, 0x28, 0xce, 0x72, 0x46, 0x01, 0x50, 0x11, 0xcd, 0x7f, 0xfa,
+ 0x01, 0x50, 0x09, 0xcc, 0x82, 0x68, 0x01, 0x50, 0x00, 0x8e, 0x00, 0x00,
+ 0xc3, 0x00, 0xa6, 0x73, 0x94, 0x01, 0x32, 0x58, 0x95, 0x00, 0xa8, 0x2b,
+ 0x00, 0xa6, 0x7d, 0x90, 0x00, 0xa6, 0x83, 0x00, 0xa6, 0xa8, 0x85, 0x00,
+ 0xa5, 0x0b, 0x00, 0xa6, 0xdf, 0x04, 0xc0, 0xa7, 0x02, 0x96, 0x00, 0xa3,
+ 0x33, 0x00, 0xa7, 0x14, 0x19, 0xc0, 0xa7, 0x4f, 0x48, 0xc2, 0xb3, 0xc0,
+ 0xa7, 0x6b, 0x88, 0x00, 0xaa, 0xeb, 0x00, 0xa7, 0x75, 0x87, 0x00, 0xa0,
+ 0x0b, 0x00, 0xa7, 0x9a, 0x91, 0x00, 0xa0, 0x2b, 0x00, 0xa7, 0xa4, 0x94,
+ 0x00, 0xaa, 0x83, 0x00, 0xa7, 0xb6, 0x9b, 0x00, 0xa9, 0xf3, 0x00, 0xa7,
+ 0xd3, 0x8e, 0x00, 0xa7, 0x53, 0x00, 0xa7, 0xf0, 0x8f, 0x00, 0xa5, 0xdb,
+ 0x00, 0xa8, 0x14, 0x8d, 0x00, 0xa4, 0x1b, 0x00, 0xa8, 0x38, 0x92, 0x00,
+ 0xa2, 0x4b, 0x00, 0xa8, 0x58, 0x83, 0x00, 0xa0, 0x53, 0x00, 0xa8, 0x75,
+ 0x93, 0x00, 0xac, 0x2b, 0x00, 0xa8, 0x91, 0x0a, 0xc0, 0xa8, 0xa6, 0x49,
+ 0xb1, 0x29, 0xc0, 0xa8, 0xb0, 0x47, 0xc8, 0xbe, 0xc0, 0xa8, 0xbb, 0x8b,
+ 0x00, 0xa0, 0x1b, 0x00, 0xa8, 0xf9, 0xcc, 0x26, 0x0c, 0x00, 0xa0, 0xf0,
+ 0xc2, 0x00, 0x58, 0x0f, 0xab, 0x79, 0x9b, 0x0f, 0x9b, 0x60, 0xc3, 0x03,
+ 0x72, 0x01, 0x08, 0x29, 0x96, 0x01, 0x01, 0xc2, 0x00, 0xa9, 0x01, 0xc8,
+ 0xc0, 0x1b, 0x0f, 0xae, 0x19, 0xc5, 0x01, 0x62, 0x0f, 0xa6, 0x3a, 0x00,
+ 0xa9, 0x07, 0xca, 0xa1, 0xfa, 0x0f, 0x9d, 0x01, 0x90, 0x00, 0x16, 0x38,
+ 0xc9, 0xb3, 0x0f, 0x0f, 0x9c, 0x79, 0xc9, 0xb5, 0xf1, 0x0f, 0xd4, 0xd0,
+ 0xcb, 0x7b, 0xdf, 0x00, 0x00, 0x69, 0xc2, 0x00, 0xf2, 0x0f, 0xca, 0xa8,
+ 0x97, 0x08, 0x15, 0x93, 0x00, 0xa9, 0x0d, 0x94, 0x08, 0x15, 0x2b, 0x00,
+ 0xa9, 0x14, 0x8e, 0x08, 0x15, 0x1b, 0x00, 0xa9, 0x18, 0x83, 0x08, 0x15,
+ 0x03, 0x00, 0xa9, 0x1f, 0x93, 0x08, 0x15, 0x41, 0x84, 0x08, 0x15, 0x49,
+ 0x8f, 0x08, 0x15, 0x53, 0x00, 0xa9, 0x23, 0x91, 0x08, 0x15, 0x59, 0x86,
+ 0x08, 0x15, 0x13, 0x00, 0xa9, 0x2a, 0x96, 0x08, 0x15, 0x6b, 0x00, 0xa9,
+ 0x2e, 0x95, 0x08, 0x15, 0x83, 0x00, 0xa9, 0x35, 0xc4, 0xe5, 0x77, 0x08,
+ 0x15, 0x89, 0x90, 0x08, 0x15, 0xab, 0x00, 0xa9, 0x47, 0x9a, 0x08, 0x15,
+ 0xa1, 0x92, 0x08, 0x15, 0xbb, 0x00, 0xa9, 0x53, 0x8b, 0x08, 0x15, 0xcb,
+ 0x00, 0xa9, 0x57, 0x87, 0x08, 0x15, 0xd3, 0x00, 0xa9, 0x5b, 0x8d, 0x08,
+ 0x15, 0xe3, 0x00, 0xa9, 0x5f, 0x89, 0x08, 0x16, 0x03, 0x00, 0xa9, 0x63,
+ 0xc4, 0xe5, 0x7b, 0x08, 0x16, 0x30, 0x47, 0x01, 0xff, 0xc0, 0xa9, 0x67,
+ 0xcd, 0x77, 0x65, 0x08, 0x2b, 0x78, 0xcb, 0x98, 0xea, 0x0f, 0xa7, 0xc0,
+ 0x46, 0x01, 0xab, 0x40, 0xa9, 0xdb, 0x26, 0xc0, 0xa9, 0xe7, 0x25, 0xc0,
+ 0xaa, 0x27, 0x03, 0x40, 0xaa, 0x67, 0x03, 0xc0, 0xaa, 0x6f, 0x26, 0x40,
+ 0xaa, 0xa7, 0xc5, 0x68, 0x2a, 0x01, 0x74, 0x01, 0x03, 0x40, 0xaa, 0xe7,
+ 0x0e, 0xc0, 0xaa, 0xf5, 0xc4, 0xb5, 0x84, 0x01, 0x74, 0xd9, 0x0b, 0xc0,
+ 0xab, 0x01, 0xc2, 0x00, 0x37, 0x01, 0x75, 0x38, 0x07, 0xc0, 0xab, 0x0d,
+ 0x45, 0x00, 0x3f, 0xc0, 0xab, 0x19, 0x10, 0xc0, 0xab, 0x25, 0xc2, 0x0b,
+ 0xfd, 0x01, 0x74, 0xe1, 0x0b, 0xc0, 0xab, 0x31, 0x46, 0xd4, 0x72, 0xc0,
+ 0xab, 0x3d, 0xc4, 0xe6, 0x77, 0x01, 0x75, 0xb0, 0xc5, 0x15, 0x84, 0x01,
+ 0x74, 0x29, 0x43, 0x3c, 0xd1, 0x40, 0xab, 0x49, 0x11, 0xc0, 0xab, 0x55,
+ 0xc5, 0xcd, 0x81, 0x01, 0x75, 0x71, 0x45, 0xe1, 0x9d, 0xc0, 0xab, 0x65,
+ 0xc3, 0x8d, 0xb5, 0x01, 0x76, 0xc0, 0xc4, 0x13, 0x5a, 0x01, 0x74, 0x39,
+ 0xc5, 0x94, 0x83, 0x01, 0x74, 0x99, 0xc4, 0xe7, 0x13, 0x01, 0x76, 0x09,
+ 0xc5, 0xdf, 0x5e, 0x01, 0x77, 0x88, 0xc3, 0x05, 0x17, 0x01, 0x74, 0x41,
+ 0xc3, 0x0a, 0x1f, 0x01, 0x74, 0x48, 0xc9, 0xb1, 0x56, 0x01, 0x74, 0x51,
+ 0xc4, 0x13, 0xfd, 0x01, 0x74, 0xf1, 0xc2, 0x13, 0x1d, 0x01, 0x75, 0x40,
+ 0x44, 0xb7, 0x2e, 0xc0, 0xab, 0x71, 0x44, 0x0a, 0x68, 0x40, 0xab, 0x81,
+ 0x42, 0x00, 0x5c, 0xc0, 0xab, 0x8d, 0xc3, 0x00, 0x3a, 0x01, 0x74, 0xc1,
+ 0xc3, 0x00, 0xff, 0x01, 0x76, 0x38, 0x11, 0xc0, 0xab, 0x97, 0x07, 0x40,
+ 0xab, 0xaf, 0x03, 0xc0, 0xab, 0xbb, 0x44, 0x18, 0xd6, 0x40, 0xab, 0xc7,
+ 0xc3, 0x07, 0x5a, 0x01, 0x75, 0x19, 0xc3, 0x65, 0x6c, 0x01, 0x76, 0x50,
+ 0xc2, 0x04, 0x30, 0x01, 0x75, 0x59, 0xc2, 0x00, 0xc9, 0x01, 0x75, 0xc1,
+ 0x43, 0x03, 0x6c, 0x40, 0xab, 0xd3, 0x15, 0xc0, 0xab, 0xdd, 0xc4, 0x61,
+ 0x05, 0x01, 0x76, 0x59, 0x09, 0xc0, 0xab, 0xfb, 0x0e, 0xc0, 0xac, 0x07,
+ 0x16, 0xc0, 0xac, 0x13, 0xc4, 0x46, 0x42, 0x01, 0x76, 0xd9, 0x08, 0xc0,
+ 0xac, 0x25, 0x07, 0xc0, 0xac, 0x37, 0xc5, 0xa1, 0xff, 0x01, 0x77, 0x11,
+ 0xc4, 0xa5, 0xf2, 0x01, 0x77, 0x31, 0xc6, 0x8e, 0xaa, 0x01, 0x77, 0x80,
+ 0xc3, 0x05, 0x17, 0x01, 0x75, 0x89, 0x16, 0xc0, 0xac, 0x43, 0xc4, 0x08,
+ 0xdd, 0x01, 0x75, 0xa0, 0x45, 0x1c, 0x76, 0xc0, 0xac, 0x4f, 0xc4, 0xd9,
+ 0x1a, 0x01, 0x77, 0x20, 0xc4, 0x15, 0xa7, 0x01, 0x77, 0x59, 0x16, 0xc0,
+ 0xac, 0x59, 0xc6, 0x8e, 0xaa, 0x01, 0x77, 0x78, 0x90, 0x01, 0x8e, 0xe8,
+ 0x99, 0x01, 0x8e, 0x23, 0x00, 0xac, 0x65, 0x9c, 0x01, 0x8e, 0xbb, 0x00,
+ 0xac, 0x6d, 0x92, 0x01, 0x8e, 0x99, 0x96, 0x01, 0x8e, 0xc9, 0x89, 0x01,
+ 0x8e, 0xd0, 0x9c, 0x01, 0x8e, 0xab, 0x00, 0xac, 0x77, 0x92, 0x01, 0x8e,
+ 0x3b, 0x00, 0xac, 0x8d, 0x89, 0x01, 0x8e, 0xb1, 0xc3, 0xeb, 0xb8, 0x01,
+ 0x8f, 0x00, 0x86, 0x01, 0x8e, 0xd9, 0x9c, 0x01, 0x8e, 0xe1, 0x89, 0x01,
+ 0x8f, 0x10, 0xc8, 0x77, 0xf9, 0x0f, 0xb3, 0xf3, 0x00, 0xac, 0x93, 0xc5,
+ 0x00, 0xf9, 0x01, 0x38, 0x98, 0xce, 0x73, 0xdc, 0x0f, 0xa7, 0x19, 0xc8,
+ 0xbe, 0x23, 0x0f, 0xce, 0x00, 0x45, 0xd9, 0x96, 0xc0, 0xac, 0x99, 0x14,
+ 0x40, 0xac, 0xa5, 0x94, 0x0f, 0xd4, 0x89, 0xc2, 0x02, 0xa1, 0x01, 0x36,
+ 0x98, 0x47, 0xc7, 0xd7, 0xc0, 0xac, 0xb1, 0x47, 0x07, 0xb3, 0x40, 0xac,
+ 0xc0, 0x47, 0x01, 0xff, 0xc0, 0xac, 0xcf, 0x18, 0xc0, 0xad, 0x31, 0xcd,
+ 0x2d, 0xa6, 0x08, 0x8a, 0x19, 0x06, 0xc0, 0xad, 0x3d, 0x15, 0xc0, 0xad,
+ 0x4f, 0xc7, 0xc7, 0x91, 0x08, 0x89, 0xa1, 0xc7, 0xcb, 0x57, 0x08, 0x89,
+ 0x91, 0xc6, 0xc1, 0x15, 0x08, 0x89, 0x88, 0x4f, 0x2e, 0x37, 0xc0, 0xad,
+ 0x5b, 0x4b, 0x6f, 0x71, 0xc0, 0xad, 0x79, 0x47, 0x01, 0xff, 0xc0, 0xad,
+ 0x98, 0x4c, 0x11, 0x33, 0xc0, 0xae, 0x01, 0x46, 0x08, 0xd7, 0x40, 0xae,
+ 0x11, 0xcc, 0x89, 0x70, 0x0f, 0xb5, 0xc0, 0x47, 0x37, 0x49, 0xc0, 0xae,
+ 0x35, 0x47, 0x01, 0xff, 0x40, 0xae, 0x48, 0xc8, 0x1a, 0x50, 0x0f, 0xb1,
+ 0xf9, 0xc4, 0x01, 0xa7, 0x0f, 0xb1, 0x10, 0x00, 0xc0, 0xae, 0xad, 0xc9,
+ 0xb0, 0x87, 0x01, 0x36, 0x61, 0x43, 0x00, 0x5f, 0x40, 0xae, 0xbd, 0xca,
+ 0xa0, 0xc4, 0x0f, 0x9b, 0xc1, 0xc5, 0xcc, 0xbe, 0x0f, 0xd5, 0x98, 0x05,
+ 0xc0, 0xae, 0xcf, 0x09, 0xc0, 0xae, 0xf2, 0x03, 0xc0, 0xae, 0xfc, 0x14,
+ 0xc0, 0xaf, 0x12, 0x0e, 0xc0, 0xaf, 0x1a, 0x42, 0x00, 0xbd, 0xc0, 0xaf,
+ 0x30, 0x16, 0xc0, 0xaf, 0x3c, 0x06, 0xc0, 0xaf, 0x57, 0x07, 0xc0, 0xaf,
+ 0x65, 0x08, 0xc0, 0xaf, 0x71, 0x15, 0xc0, 0xaf, 0x7d, 0x04, 0xc0, 0xaf,
+ 0x9f, 0x42, 0x06, 0x6b, 0xc0, 0xaf, 0xa9, 0x17, 0xc0, 0xaf, 0xb5, 0x0b,
+ 0xc0, 0xaf, 0xc5, 0x47, 0x31, 0xdd, 0xc0, 0xaf, 0xcf, 0x11, 0xc0, 0xaf,
+ 0xdb, 0x0f, 0xc0, 0xaf, 0xf6, 0x12, 0xc0, 0xb0, 0x02, 0x10, 0xc0, 0xb0,
+ 0x0c, 0xc9, 0xb1, 0x32, 0x00, 0xd5, 0xa9, 0x42, 0x03, 0x76, 0xc0, 0xb0,
+ 0x18, 0x49, 0x02, 0x5b, 0xc0, 0xb0, 0x2a, 0x42, 0x04, 0x4c, 0x40, 0xb0,
+ 0x36, 0xce, 0x70, 0xda, 0x01, 0x1c, 0x21, 0xc6, 0x86, 0xa3, 0x01, 0x10,
+ 0x09, 0xc7, 0x50, 0x6b, 0x0f, 0xae, 0xe1, 0xc3, 0x1c, 0x77, 0x0f, 0xcf,
+ 0x68, 0x47, 0xb4, 0x26, 0xc0, 0xb0, 0x42, 0x83, 0x00, 0x01, 0x60, 0x48,
+ 0xba, 0x53, 0xc0, 0xb0, 0x4e, 0x42, 0x00, 0x2a, 0x40, 0xb0, 0x5a, 0xd7,
+ 0x18, 0xfe, 0x01, 0x1c, 0x09, 0x45, 0xce, 0xa8, 0xc0, 0xb0, 0x66, 0xcc,
+ 0x68, 0xcd, 0x01, 0x11, 0x71, 0x44, 0x7a, 0xbb, 0x40, 0xb0, 0x72, 0xc6,
+ 0xd1, 0x18, 0x0f, 0xa3, 0xb9, 0xc4, 0x02, 0xcb, 0x0f, 0xb5, 0x38, 0xc9,
+ 0xb0, 0xfc, 0x0f, 0x9c, 0x51, 0xcb, 0x91, 0xb2, 0x0f, 0xb0, 0xb1, 0xc9,
+ 0x9b, 0x5f, 0x0f, 0xb0, 0xa8, 0x00, 0x40, 0xb0, 0x7e, 0xc2, 0x00, 0x56,
+ 0x0f, 0x9b, 0x99, 0x87, 0x0f, 0x9b, 0x50, 0xcb, 0x94, 0xf6, 0x0f, 0x89,
+ 0x79, 0xca, 0xa4, 0xde, 0x00, 0x05, 0x48, 0x15, 0xc0, 0xb0, 0x8a, 0x05,
+ 0xc0, 0xb0, 0x96, 0x46, 0xd8, 0xc8, 0xc0, 0xb0, 0xa2, 0x4b, 0x8f, 0xef,
+ 0xc0, 0xb0, 0xb4, 0x08, 0xc0, 0xb0, 0xcc, 0xd5, 0x33, 0x64, 0x01, 0x67,
+ 0xf8, 0xc7, 0xab, 0x67, 0x0f, 0xca, 0x11, 0xc9, 0xae, 0xd7, 0x0f, 0x9b,
+ 0xd8, 0x42, 0x00, 0x3c, 0xc0, 0xb0, 0xd8, 0xc3, 0x02, 0x69, 0x01, 0x02,
+ 0x80, 0x45, 0x00, 0xbd, 0xc0, 0xb0, 0xfa, 0x46, 0x10, 0x15, 0x40, 0xb1,
+ 0x20, 0x46, 0x01, 0xab, 0x40, 0xb1, 0x3c, 0xce, 0x6d, 0x14, 0x0f, 0xa2,
+ 0x79, 0xc8, 0x77, 0xf9, 0x0f, 0x9d, 0x60, 0x42, 0x00, 0x3c, 0xc0, 0xb1,
+ 0x54, 0x00, 0x40, 0xb1, 0xb6, 0xc6, 0xd0, 0xe2, 0x0f, 0x9d, 0x51, 0xcf,
+ 0x6a, 0x8c, 0x01, 0x50, 0x81, 0xcc, 0x07, 0x9b, 0x00, 0x02, 0xf0, 0x1c,
+ 0xc0, 0xb1, 0xc2, 0x97, 0x09, 0x18, 0x5b, 0x00, 0xb1, 0xdd, 0x16, 0xc0,
+ 0xb2, 0x18, 0x15, 0xc0, 0xb2, 0x34, 0x10, 0xc0, 0xb2, 0x4d, 0x0f, 0xc0,
+ 0xb2, 0x69, 0x0e, 0xc0, 0xb2, 0x85, 0x0d, 0xc0, 0xb2, 0x9a, 0x0a, 0xc0,
+ 0xb2, 0xbb, 0x09, 0xc0, 0xb2, 0xd0, 0x87, 0x09, 0x04, 0x53, 0x00, 0xb2,
+ 0xe9, 0x06, 0xc0, 0xb3, 0x21, 0x04, 0xc0, 0xb3, 0x36, 0x83, 0x09, 0x00,
+ 0x03, 0x00, 0xb3, 0x4b, 0x12, 0xc0, 0xb3, 0x8f, 0x14, 0xc0, 0xb3, 0xa6,
+ 0x8b, 0x09, 0x09, 0xfa, 0x00, 0xb3, 0xb5, 0x49, 0x20, 0x6a, 0xc0, 0xb3,
+ 0xeb, 0xce, 0x73, 0x88, 0x09, 0x23, 0x89, 0xd9, 0x1f, 0xb5, 0x09, 0x23,
+ 0x80, 0x42, 0x01, 0xce, 0xc0, 0xb3, 0xfd, 0x07, 0xc0, 0xb4, 0x09, 0x15,
+ 0xc0, 0xb4, 0x15, 0x08, 0xc0, 0xb4, 0x27, 0x11, 0xc0, 0xb4, 0x33, 0x16,
+ 0x40, 0xb4, 0x3f, 0x42, 0x00, 0x32, 0xc0, 0xb4, 0x4b, 0xc9, 0xb0, 0x6c,
+ 0x0f, 0xca, 0x60, 0x45, 0x39, 0xfa, 0xc0, 0xb4, 0x57, 0xca, 0xa8, 0x44,
+ 0x0f, 0x9a, 0xd8, 0xcf, 0x56, 0xdd, 0x01, 0x37, 0xf1, 0xca, 0xa7, 0x86,
+ 0x0f, 0xcb, 0x20, 0xcc, 0x88, 0x74, 0x01, 0x08, 0x21, 0x45, 0x03, 0x5d,
+ 0x40, 0xb4, 0x63, 0x56, 0x31, 0xa7, 0xc0, 0xb4, 0x6f, 0x46, 0x0b, 0x9b,
+ 0x40, 0xb4, 0xce, 0xd0, 0x5b, 0x0f, 0x00, 0xe9, 0x59, 0xc8, 0xbc, 0xcb,
+ 0x00, 0x26, 0x01, 0xcd, 0x80, 0xe4, 0x05, 0x33, 0x70, 0x46, 0x02, 0x00,
+ 0xc0, 0xb4, 0xda, 0x48, 0x1b, 0x0d, 0x40, 0xb5, 0x59, 0x42, 0xeb, 0xbe,
+ 0xc0, 0xb5, 0x6b, 0x1e, 0xc0, 0xb5, 0x73, 0x1d, 0x40, 0xb5, 0x7b, 0x19,
+ 0xc0, 0xb5, 0xa3, 0x1a, 0xc0, 0xb5, 0xb3, 0x1c, 0xc0, 0xb5, 0xbb, 0x83,
+ 0x08, 0x40, 0x01, 0x87, 0x08, 0x40, 0x09, 0x8b, 0x08, 0x40, 0x11, 0x91,
+ 0x08, 0x40, 0x19, 0x97, 0x08, 0x40, 0x21, 0x0c, 0xc0, 0xb5, 0xc3, 0x0d,
+ 0xc0, 0xb5, 0xcb, 0x0e, 0xc0, 0xb5, 0xdf, 0x0f, 0xc0, 0xb5, 0xf3, 0x10,
+ 0xc0, 0xb6, 0x07, 0x12, 0xc0, 0xb6, 0x1b, 0x14, 0xc0, 0xb6, 0x2f, 0x15,
+ 0xc0, 0xb6, 0x43, 0x16, 0x40, 0xb6, 0x57, 0xc4, 0x24, 0x35, 0x0f, 0xdf,
+ 0xc9, 0xc4, 0x16, 0x57, 0x0f, 0xdf, 0x81, 0xc3, 0x05, 0x17, 0x0f, 0xdf,
+ 0x89, 0x16, 0xc0, 0xb6, 0x6b, 0x08, 0xc0, 0xb6, 0x77, 0x15, 0xc0, 0xb6,
+ 0x83, 0xc5, 0x05, 0x1b, 0x0f, 0xdf, 0xc0, 0xe0, 0x07, 0xa7, 0x01, 0x51,
+ 0x90, 0xc2, 0x00, 0xff, 0x01, 0x18, 0xa1, 0xc8, 0x07, 0x88, 0x00, 0x05,
+ 0x38, 0xe0, 0x01, 0xa7, 0x0f, 0xc9, 0x60, 0x46, 0xd7, 0x6c, 0xc0, 0xb6,
+ 0x8f, 0x42, 0x00, 0xc0, 0x40, 0xb6, 0x9b, 0x48, 0x7a, 0xe6, 0xc0, 0xb6,
+ 0xb7, 0x45, 0x02, 0xcb, 0xc0, 0xb6, 0xc3, 0x0e, 0xc0, 0xb6, 0xd3, 0x4b,
+ 0x6f, 0x71, 0xc0, 0xb6, 0xdf, 0xd6, 0x2c, 0xd7, 0x00, 0x6f, 0xa0, 0x14,
+ 0xc0, 0xb6, 0xf5, 0x08, 0xc0, 0xb7, 0x01, 0xcb, 0x1c, 0xe0, 0x0e, 0xd4,
+ 0x59, 0x05, 0xc0, 0xb7, 0x1b, 0x15, 0xc0, 0xb7, 0x25, 0x0e, 0xc0, 0xb7,
+ 0x43, 0x42, 0x02, 0x6a, 0xc0, 0xb7, 0x4d, 0x16, 0xc0, 0xb7, 0x53, 0xdb,
+ 0x19, 0x30, 0x0e, 0xd3, 0x79, 0x07, 0xc0, 0xb7, 0x61, 0x0a, 0xc0, 0xb7,
+ 0x73, 0x10, 0xc0, 0xb7, 0x80, 0x42, 0x01, 0xc2, 0xc0, 0xb7, 0x8c, 0x42,
+ 0x00, 0xd8, 0xc0, 0xb7, 0x98, 0x44, 0x8c, 0x72, 0xc0, 0xb7, 0xa4, 0x06,
+ 0xc0, 0xb7, 0xb0, 0x46, 0xd5, 0x62, 0x40, 0xb7, 0xbc, 0xe0, 0x0b, 0x47,
+ 0x01, 0x39, 0xf1, 0x47, 0x04, 0x8a, 0x40, 0xb7, 0xce, 0x4b, 0x6f, 0x71,
+ 0xc0, 0xb7, 0xe0, 0x47, 0x01, 0xff, 0xc0, 0xb8, 0x03, 0x15, 0xc0, 0xb8,
+ 0x6a, 0xd0, 0x5e, 0xcf, 0x08, 0xae, 0x49, 0x4e, 0x21, 0xdd, 0xc0, 0xb8,
+ 0x74, 0x06, 0x40, 0xb8, 0x86, 0x46, 0x04, 0x91, 0xc0, 0xb8, 0x92, 0x46,
+ 0x01, 0x31, 0x40, 0xb8, 0xaa, 0xc9, 0x09, 0xde, 0x01, 0x54, 0xe9, 0xcc,
+ 0x06, 0xfb, 0x01, 0x54, 0xf0, 0xdb, 0x16, 0xf9, 0x01, 0x54, 0xf9, 0xde,
+ 0x0e, 0xa9, 0x01, 0x55, 0x00, 0xcb, 0x6d, 0x95, 0x0f, 0xb4, 0x11, 0xc8,
+ 0xba, 0x43, 0x0f, 0x9a, 0xe0, 0xc3, 0x00, 0xe4, 0x0f, 0xb4, 0x49, 0xcd,
+ 0x76, 0x7b, 0x0f, 0xaf, 0xe8, 0x00, 0xc0, 0xb8, 0xc2, 0x45, 0x2d, 0xe4,
+ 0x40, 0xb8, 0xd8, 0xc6, 0xd6, 0x04, 0x01, 0x34, 0xd1, 0xcb, 0x97, 0xe2,
+ 0x01, 0x34, 0xa8, 0x44, 0x00, 0x48, 0xc0, 0xb8, 0xf4, 0xc6, 0xd9, 0x46,
+ 0x0f, 0x9a, 0x98, 0xd2, 0x4f, 0x68, 0x01, 0x13, 0x19, 0xcd, 0x80, 0x55,
+ 0x00, 0x04, 0xe0, 0x45, 0x01, 0xac, 0xc0, 0xb9, 0x00, 0x48, 0xbb, 0x0b,
+ 0x40, 0xb9, 0x0c, 0xc7, 0xc8, 0x78, 0x0f, 0xce, 0x11, 0xc3, 0x07, 0x5a,
+ 0x01, 0x30, 0x98, 0x45, 0x02, 0xcb, 0xc0, 0xb9, 0x18, 0x4b, 0x6f, 0x71,
+ 0xc0, 0xb9, 0x2a, 0x47, 0x01, 0xff, 0xc0, 0xb9, 0x50, 0xd4, 0x3d, 0xa2,
+ 0x05, 0x45, 0xa1, 0x06, 0x40, 0xb9, 0xbb, 0xd4, 0x11, 0x1f, 0x0f, 0xb3,
+ 0xd1, 0x46, 0x11, 0x8d, 0x40, 0xb9, 0xcd, 0xc8, 0xbe, 0xc3, 0x0f, 0xa7,
+ 0x08, 0x03, 0xc0, 0xb9, 0xd9, 0x15, 0xc0, 0xb9, 0xef, 0xc4, 0xe4, 0x09,
+ 0x00, 0x41, 0xd9, 0x1c, 0xc0, 0xb9, 0xfb, 0xc5, 0x7d, 0x1d, 0x00, 0x41,
+ 0xc9, 0xcd, 0x7d, 0x15, 0x00, 0x41, 0xb9, 0xc3, 0xeb, 0xfa, 0x00, 0x41,
+ 0x99, 0xc7, 0xcc, 0x92, 0x00, 0x41, 0x80, 0x44, 0x00, 0xfb, 0xc0, 0xba,
+ 0x07, 0x4f, 0x0f, 0x9a, 0x40, 0xba, 0x23, 0x15, 0xc0, 0xba, 0x33, 0x91,
+ 0x00, 0x41, 0x5b, 0x00, 0xba, 0x3f, 0x8b, 0x00, 0x41, 0x51, 0x45, 0x2c,
+ 0x27, 0xc0, 0xba, 0x48, 0x97, 0x00, 0x41, 0x39, 0x83, 0x00, 0x41, 0x1b,
+ 0x00, 0xba, 0x5b, 0x87, 0x00, 0x40, 0xe8, 0x16, 0xc0, 0xba, 0x5f, 0x15,
+ 0xc0, 0xba, 0x71, 0xc4, 0x4c, 0x16, 0x00, 0x40, 0x99, 0xc3, 0xec, 0xff,
+ 0x00, 0x40, 0x91, 0xc2, 0x02, 0xf8, 0x00, 0x40, 0x81, 0x0b, 0xc0, 0xba,
+ 0x7d, 0xc3, 0x21, 0x00, 0x00, 0x40, 0x69, 0xc3, 0x8e, 0x5e, 0x00, 0x40,
+ 0x61, 0xc5, 0xdc, 0xcf, 0x00, 0x40, 0x59, 0xc4, 0xe5, 0x9f, 0x00, 0x40,
+ 0x51, 0xc3, 0x73, 0x31, 0x00, 0x40, 0x49, 0xc3, 0x0b, 0xa2, 0x00, 0x40,
+ 0x31, 0x04, 0xc0, 0xba, 0x89, 0xc5, 0x4c, 0x0f, 0x00, 0x40, 0x19, 0xc5,
+ 0xde, 0xc3, 0x00, 0x40, 0x11, 0xc4, 0xd0, 0xea, 0x00, 0x40, 0x00, 0xcf,
+ 0x41, 0x18, 0x01, 0x31, 0x00, 0x8a, 0x0f, 0xcd, 0x29, 0xc8, 0x41, 0xdd,
+ 0x0f, 0x9d, 0x80, 0x87, 0x01, 0x19, 0x99, 0x4a, 0x22, 0x60, 0x40, 0xba,
+ 0x95, 0x44, 0x01, 0x5e, 0xc0, 0xba, 0xa1, 0xc6, 0xc4, 0xdd, 0x0f, 0xb1,
+ 0x50, 0xcc, 0x87, 0x6c, 0x0f, 0xb2, 0x11, 0xcd, 0x7e, 0x0c, 0x0f, 0xb2,
+ 0x08, 0x4c, 0x23, 0xb4, 0xc0, 0xba, 0xb3, 0x53, 0x46, 0x59, 0x40, 0xba,
+ 0xc5, 0x8d, 0x0f, 0xcc, 0x41, 0x44, 0x41, 0xed, 0x40, 0xba, 0xd1, 0xc6,
+ 0x03, 0xfa, 0x01, 0x3a, 0x69, 0xc4, 0x0e, 0xa5, 0x01, 0x39, 0x81, 0xcb,
+ 0x94, 0x46, 0x01, 0x38, 0xf0, 0xc6, 0xd5, 0xf2, 0x0f, 0x9b, 0x39, 0x4b,
+ 0x8e, 0x75, 0x40, 0xbb, 0x01, 0x4c, 0x8f, 0x1c, 0xc0, 0xbb, 0x81, 0xc4,
+ 0x28, 0x52, 0x0f, 0x9b, 0x81, 0x00, 0xc0, 0xbb, 0x99, 0x95, 0x0f, 0xd3,
+ 0x98, 0xc4, 0xe5, 0x47, 0x0f, 0xb6, 0x69, 0xc7, 0xc6, 0x8e, 0x0f, 0xb6,
+ 0x90, 0xc2, 0x01, 0x5b, 0x00, 0x00, 0x79, 0xc3, 0x01, 0xc3, 0x00, 0x00,
+ 0x70, 0xc2, 0x00, 0xe5, 0x0f, 0xcc, 0x11, 0xc2, 0x04, 0x35, 0x01, 0x32,
+ 0x78, 0x46, 0x00, 0x3e, 0xc0, 0xbb, 0xc1, 0x48, 0x01, 0xf7, 0xc0, 0xbb,
+ 0xd1, 0xd4, 0x1b, 0x0c, 0x0f, 0xb3, 0x80, 0xc2, 0x01, 0x04, 0x0f, 0xad,
+ 0xa9, 0xc7, 0xc7, 0xc2, 0x0f, 0xd4, 0xd8, 0xcd, 0x79, 0xe2, 0x01, 0x36,
+ 0x20, 0x45, 0x18, 0xd5, 0xc0, 0xbb, 0xf5, 0x45, 0x20, 0x8c, 0x40, 0xbc,
+ 0x25, 0xd0, 0x0c, 0x92, 0x0f, 0xb3, 0x58, 0xcd, 0x79, 0xae, 0x01, 0x4f,
+ 0xb0, 0xc3, 0xec, 0x6c, 0x08, 0xd5, 0x11, 0xc3, 0xd8, 0xbc, 0x08, 0xd5,
+ 0x08, 0x45, 0x00, 0x39, 0x40, 0xbc, 0x55, 0xc5, 0xdb, 0x03, 0x08, 0xd4,
+ 0xe9, 0xcb, 0x98, 0x9d, 0x08, 0xd4, 0xe1, 0xc4, 0x01, 0x0d, 0x08, 0xd4,
+ 0xd9, 0xc5, 0x33, 0x81, 0x08, 0xd4, 0xd0, 0xc8, 0xc0, 0x4b, 0x08, 0xd4,
+ 0xc9, 0x44, 0x02, 0xcc, 0x40, 0xbc, 0x61, 0xc2, 0x00, 0x02, 0x08, 0xd4,
+ 0xa9, 0x95, 0x08, 0xd4, 0xa3, 0x00, 0xbc, 0x79, 0x8e, 0x08, 0xd4, 0x91,
+ 0x94, 0x08, 0xd4, 0x89, 0x8f, 0x08, 0xd4, 0x81, 0x84, 0x08, 0xd4, 0x79,
+ 0x90, 0x08, 0xd4, 0x73, 0x00, 0xbc, 0x7d, 0x86, 0x08, 0xd4, 0x69, 0x8d,
+ 0x08, 0xd4, 0x59, 0x89, 0x08, 0xd4, 0x50, 0x15, 0xc0, 0xbc, 0x81, 0xc2,
+ 0x00, 0x96, 0x08, 0xd4, 0x39, 0xc2, 0x00, 0x9a, 0x08, 0xd4, 0x30, 0x0d,
+ 0xc0, 0xbc, 0x8b, 0xc2, 0x01, 0x0e, 0x08, 0xd4, 0x11, 0x15, 0xc0, 0xbc,
+ 0x9b, 0xc2, 0x06, 0x8c, 0x08, 0xd3, 0xf1, 0xc2, 0x00, 0x96, 0x08, 0xd3,
+ 0xe9, 0xc2, 0x00, 0x9a, 0x08, 0xd3, 0xe1, 0xc2, 0x1a, 0x36, 0x08, 0xd3,
+ 0xd9, 0xc2, 0x00, 0x02, 0x08, 0xd3, 0xd1, 0x1c, 0xc0, 0xbc, 0xab, 0x06,
+ 0xc0, 0xbc, 0xb5, 0x16, 0xc0, 0xbc, 0xc9, 0xc2, 0x00, 0x3f, 0x08, 0xd3,
+ 0xa1, 0x04, 0xc0, 0xbc, 0xdb, 0x12, 0xc0, 0xbc, 0xe5, 0x10, 0xc0, 0xbc,
+ 0xef, 0x0c, 0xc0, 0xbd, 0x05, 0x05, 0xc0, 0xbd, 0x0f, 0x09, 0xc0, 0xbd,
+ 0x19, 0x83, 0x08, 0xd2, 0x80, 0xcb, 0x37, 0x9d, 0x08, 0xd2, 0xd9, 0x45,
+ 0x02, 0xcb, 0x40, 0xbd, 0x23, 0xd1, 0x35, 0xb4, 0x0f, 0xad, 0x61, 0xc9,
+ 0xb2, 0x52, 0x0f, 0x9b, 0x31, 0xc6, 0x57, 0xf6, 0x00, 0x05, 0x68, 0xc4,
+ 0x24, 0x35, 0x08, 0x87, 0xc9, 0xc5, 0x05, 0x1b, 0x08, 0x87, 0xc1, 0x15,
+ 0xc0, 0xbd, 0x43, 0x08, 0xc0, 0xbd, 0x4f, 0x16, 0xc0, 0xbd, 0x5b, 0xc3,
+ 0x05, 0x17, 0x08, 0x87, 0x89, 0xc4, 0x16, 0x57, 0x08, 0x87, 0x80, 0x42,
+ 0x00, 0x3f, 0xc0, 0xbd, 0x67, 0x07, 0xc0, 0xbd, 0x6f, 0xc2, 0x28, 0x39,
+ 0x08, 0x87, 0x31, 0xc2, 0x49, 0xc7, 0x08, 0x87, 0x29, 0xc2, 0x14, 0x94,
+ 0x08, 0x87, 0x21, 0xc2, 0x00, 0x45, 0x08, 0x87, 0x11, 0x10, 0xc0, 0xbd,
+ 0x79, 0xc3, 0xec, 0x4b, 0x08, 0x87, 0x01, 0xc3, 0xc0, 0xc1, 0x08, 0x86,
+ 0xf9, 0xc3, 0x1a, 0x74, 0x08, 0x86, 0xf1, 0xc3, 0x14, 0xe6, 0x08, 0x86,
+ 0xe9, 0xc3, 0x45, 0x00, 0x08, 0x86, 0xe1, 0xc3, 0x65, 0x4d, 0x08, 0x86,
+ 0xd9, 0xc3, 0xcd, 0x3d, 0x08, 0x86, 0xd1, 0xc3, 0x14, 0xdf, 0x08, 0x86,
+ 0xc1, 0xc3, 0xb2, 0x3a, 0x08, 0x86, 0xa9, 0xc3, 0x75, 0xdf, 0x08, 0x86,
+ 0xa1, 0xc3, 0xec, 0x12, 0x08, 0x86, 0x99, 0xc3, 0x3f, 0xf3, 0x08, 0x86,
+ 0x91, 0xc3, 0x00, 0x44, 0x08, 0x86, 0x89, 0xc3, 0x40, 0xed, 0x08, 0x86,
+ 0x80, 0xd4, 0x3e, 0xce, 0x08, 0x7a, 0xc9, 0x44, 0x0a, 0x1f, 0xc0, 0xbd,
+ 0x8b, 0xcf, 0x3e, 0xd3, 0x08, 0x7a, 0xb8, 0xc3, 0x05, 0x17, 0x08, 0x7a,
+ 0x8b, 0x00, 0xbd, 0x9a, 0x16, 0x40, 0xbd, 0xa0, 0xcc, 0x0a, 0x7b, 0x08,
+ 0x7a, 0x81, 0xca, 0xa4, 0x34, 0x08, 0x7a, 0x79, 0xcf, 0x67, 0xcb, 0x08,
+ 0x7a, 0x71, 0x45, 0x04, 0x63, 0xc0, 0xbd, 0xac, 0x46, 0x0e, 0xd3, 0xc0,
+ 0xbd, 0xb8, 0x49, 0x0b, 0x79, 0xc0, 0xbd, 0xc4, 0x44, 0x0b, 0xf8, 0x40,
+ 0xbd, 0xd0, 0x0e, 0xc0, 0xbd, 0xdc, 0xc4, 0xe6, 0xaf, 0x08, 0x7a, 0x19,
+ 0xc3, 0x5f, 0x3d, 0x08, 0x7a, 0x11, 0x15, 0xc0, 0xbd, 0xe8, 0xc9, 0x5d,
+ 0xef, 0x08, 0x7a, 0x01, 0xc2, 0x03, 0x07, 0x08, 0x79, 0xf1, 0x03, 0xc0,
+ 0xbd, 0xf2, 0xc3, 0x21, 0x00, 0x08, 0x79, 0xd9, 0xc3, 0x04, 0xae, 0x08,
+ 0x79, 0xd1, 0xc4, 0xe5, 0x53, 0x08, 0x79, 0xc1, 0xc4, 0x4d, 0x48, 0x08,
+ 0x79, 0xb9, 0xc2, 0x00, 0x5b, 0x08, 0x79, 0x9b, 0x00, 0xbd, 0xfe, 0xc5,
+ 0x4d, 0x42, 0x08, 0x79, 0xa9, 0xc3, 0x7c, 0xad, 0x08, 0x79, 0xa1, 0xc5,
+ 0x9e, 0xbc, 0x08, 0x79, 0x91, 0xc4, 0xe5, 0xaf, 0x08, 0x79, 0x88, 0x00,
+ 0xc0, 0xbe, 0x04, 0x42, 0x00, 0x3c, 0x40, 0xbe, 0x60, 0xcd, 0x7b, 0xd0,
+ 0x0f, 0xaa, 0x29, 0x15, 0xc0, 0xbe, 0xb8, 0x06, 0xc0, 0xbe, 0xdf, 0x10,
+ 0xc0, 0xbe, 0xe9, 0xce, 0x71, 0x3c, 0x01, 0x20, 0xf9, 0xd0, 0x5c, 0x2f,
+ 0x01, 0x20, 0xf1, 0xcf, 0x61, 0x1d, 0x01, 0x20, 0xe9, 0x08, 0xc0, 0xbe,
+ 0xf3, 0x07, 0xc0, 0xbe, 0xff, 0x42, 0x00, 0x2e, 0xc0, 0xbf, 0x09, 0xd3,
+ 0x41, 0x60, 0x01, 0x20, 0x59, 0xc9, 0x1b, 0xa2, 0x01, 0x20, 0x51, 0xd5,
+ 0x37, 0xa8, 0x01, 0x20, 0x49, 0x04, 0xc0, 0xbf, 0x15, 0xcb, 0x4f, 0x03,
+ 0x01, 0x20, 0x31, 0xd2, 0x4b, 0xe4, 0x01, 0x5c, 0xb8, 0x47, 0x01, 0xff,
+ 0xc0, 0xbf, 0x21, 0x0a, 0xc0, 0xbf, 0x93, 0x4d, 0x80, 0xa3, 0xc0, 0xbf,
+ 0xa5, 0x14, 0xc0, 0xbf, 0xb1, 0x47, 0xc5, 0x84, 0xc0, 0xbf, 0xc3, 0x47,
+ 0xcb, 0x3b, 0xc0, 0xbf, 0xd5, 0xd1, 0x4a, 0x46, 0x00, 0x38, 0x79, 0x42,
+ 0x00, 0xea, 0xc0, 0xbf, 0xe7, 0x42, 0x06, 0x82, 0xc0, 0xbf, 0xf3, 0x07,
+ 0xc0, 0xbf, 0xff, 0xc7, 0xcd, 0x6b, 0x00, 0x3a, 0x51, 0xc5, 0x23, 0x17,
+ 0x00, 0x3a, 0x49, 0xcc, 0x84, 0xe4, 0x00, 0x3a, 0x01, 0xc9, 0xae, 0x35,
+ 0x00, 0x3a, 0x09, 0x16, 0xc0, 0xc0, 0x0b, 0x4d, 0x7b, 0x9c, 0x40, 0xc0,
+ 0x17, 0x83, 0x05, 0x40, 0x01, 0x8b, 0x05, 0x40, 0x09, 0x97, 0x05, 0x40,
+ 0x19, 0x87, 0x05, 0x40, 0x21, 0x91, 0x05, 0x40, 0x29, 0x0d, 0xc0, 0xc0,
+ 0x23, 0x09, 0xc0, 0xc0, 0x2d, 0x05, 0xc0, 0xc0, 0x37, 0x16, 0xc0, 0xc0,
+ 0x41, 0x06, 0xc0, 0xc0, 0x4f, 0xc2, 0x03, 0x76, 0x05, 0x41, 0x11, 0x0c,
+ 0xc0, 0xc0, 0x5d, 0xc2, 0x00, 0x10, 0x05, 0x40, 0xc1, 0x12, 0xc0, 0xc0,
+ 0x67, 0x04, 0xc0, 0xc0, 0x71, 0xc2, 0x01, 0xc2, 0x05, 0x40, 0xe9, 0x14,
+ 0xc0, 0xc0, 0x7b, 0xc2, 0x00, 0x36, 0x05, 0x40, 0xf9, 0xc2, 0x05, 0x3b,
+ 0x05, 0x41, 0x08, 0xc8, 0xb9, 0xa3, 0x05, 0x40, 0x11, 0xc7, 0x60, 0x68,
+ 0x05, 0x40, 0x31, 0x03, 0x40, 0xc0, 0x85, 0x83, 0x05, 0x41, 0x19, 0x8b,
+ 0x05, 0x41, 0x21, 0x97, 0x05, 0x41, 0x29, 0x87, 0x05, 0x41, 0x31, 0xc2,
+ 0x02, 0x14, 0x05, 0x41, 0x38, 0xc3, 0xd8, 0xbc, 0x05, 0x41, 0x41, 0xc3,
+ 0xec, 0x6c, 0x05, 0x41, 0x49, 0xc3, 0xec, 0x6f, 0x05, 0x41, 0x51, 0xc3,
+ 0xec, 0x72, 0x05, 0x41, 0x58, 0xca, 0xa1, 0x5a, 0x0f, 0xa5, 0x61, 0xc5,
+ 0xde, 0x5a, 0x0f, 0xb5, 0x20, 0x0d, 0xc0, 0xc0, 0x91, 0xd0, 0x59, 0x2f,
+ 0x0f, 0xb2, 0x50, 0x87, 0x01, 0x3a, 0x3b, 0x00, 0xc0, 0xa0, 0xc9, 0x7c,
+ 0x55, 0x0f, 0xa4, 0xb0, 0xc2, 0x02, 0x6a, 0x01, 0x4d, 0x09, 0xc4, 0x00,
+ 0xcd, 0x01, 0x4d, 0x00, 0xcc, 0x8d, 0xfc, 0x0f, 0xae, 0x99, 0xc8, 0xbf,
+ 0xab, 0x0f, 0xae, 0x91, 0xc5, 0x0a, 0xf1, 0x0f, 0xa0, 0xd0, 0xc4, 0xe7,
+ 0xa7, 0x0f, 0xab, 0xc0, 0x90, 0x0f, 0xca, 0x21, 0xcb, 0x94, 0x7d, 0x0f,
+ 0xcf, 0xa8, 0x43, 0x00, 0xdd, 0xc0, 0xc0, 0xa4, 0x46, 0x08, 0x2f, 0x40,
+ 0xc0, 0xc5, 0xcc, 0x8c, 0x10, 0x01, 0x36, 0x29, 0xc9, 0xad, 0xb7, 0x0f,
+ 0x98, 0xf0, 0x52, 0x4e, 0x5a, 0xc0, 0xc0, 0xfd, 0x47, 0x01, 0xff, 0xc0,
+ 0xc1, 0x25, 0xc8, 0x7d, 0xf7, 0x00, 0xdd, 0xd1, 0x46, 0x08, 0xd7, 0xc0,
+ 0xc1, 0xaf, 0x51, 0x55, 0xfe, 0xc0, 0xc1, 0xd3, 0x45, 0x02, 0xcb, 0xc0,
+ 0xc1, 0xe5, 0x4d, 0x7c, 0xfb, 0x40, 0xc1, 0xf1, 0xcf, 0x6c, 0x3f, 0x0f,
+ 0x98, 0x20, 0xd5, 0x37, 0x15, 0x01, 0x17, 0x49, 0xce, 0x70, 0x40, 0x01,
+ 0x15, 0x89, 0x46, 0x25, 0x71, 0xc0, 0xc1, 0xfb, 0x46, 0x03, 0x50, 0x40,
+ 0xc2, 0x07, 0xc2, 0x03, 0x73, 0x01, 0x14, 0x13, 0x00, 0xc2, 0x1f, 0x46,
+ 0x03, 0x50, 0xc0, 0xc2, 0x23, 0x45, 0x01, 0xac, 0x40, 0xc2, 0x2f, 0xd1,
+ 0x1c, 0xda, 0x01, 0x04, 0x71, 0xd0, 0x1f, 0x1f, 0x01, 0x04, 0x69, 0x07,
+ 0xc0, 0xc2, 0x41, 0xc5, 0x1c, 0xa1, 0x01, 0x04, 0x59, 0xc9, 0x65, 0xfa,
+ 0x01, 0x04, 0x51, 0xc4, 0x24, 0x35, 0x01, 0x04, 0x49, 0x15, 0xc0, 0xc2,
+ 0x4d, 0x08, 0xc0, 0xc2, 0x59, 0x16, 0xc0, 0xc2, 0x65, 0xc3, 0x05, 0x17,
+ 0x01, 0x04, 0x09, 0xc4, 0x16, 0x57, 0x01, 0x04, 0x00, 0x87, 0x01, 0x19,
+ 0x19, 0x44, 0x01, 0x5e, 0x40, 0xc2, 0x71, 0x00, 0xc0, 0xc2, 0x7d, 0xc7,
+ 0xc9, 0x82, 0x01, 0x55, 0x52, 0x00, 0xc2, 0xdf, 0x45, 0xdf, 0x31, 0xc0,
+ 0xc2, 0xe5, 0xca, 0x9f, 0xde, 0x00, 0x04, 0xf0, 0x16, 0xc0, 0xc2, 0xf7,
+ 0xc2, 0x01, 0xa1, 0x0f, 0xc9, 0xa2, 0x00, 0xc3, 0x06, 0xc6, 0x1e, 0xce,
+ 0x01, 0x11, 0xbb, 0x00, 0xc3, 0x0c, 0xc9, 0xb7, 0xa1, 0x01, 0x0a, 0x50,
+ 0x00, 0x40, 0xc3, 0x12, 0xcd, 0x78, 0x5c, 0x01, 0x08, 0xf1, 0x5b, 0x17,
+ 0x65, 0x40, 0xc3, 0x2a, 0xc5, 0x29, 0x3b, 0x0f, 0xc9, 0x81, 0xc3, 0x14,
+ 0xe9, 0x0f, 0xd6, 0x19, 0xc6, 0x15, 0x83, 0x0f, 0xd6, 0x20, 0xc3, 0x00,
+ 0x4d, 0x0f, 0xd5, 0x39, 0x45, 0x3a, 0x72, 0x40, 0xc3, 0x62, 0xcc, 0x88,
+ 0xe0, 0x01, 0x08, 0x78, 0x49, 0xb0, 0xc6, 0xc0, 0xc3, 0x6e, 0xcc, 0x8e,
+ 0xec, 0x0f, 0xb6, 0xe8, 0x46, 0x17, 0x6d, 0x40, 0xc3, 0xac, 0x00, 0x40,
+ 0xc3, 0xb4, 0xc8, 0x29, 0x45, 0x0f, 0xc8, 0x79, 0xca, 0xa9, 0x20, 0x0f,
+ 0xc8, 0x60, 0xcb, 0x9b, 0xcb, 0x0f, 0x9c, 0x69, 0xc5, 0xdc, 0xe8, 0x0f,
+ 0x9a, 0x68, 0xc4, 0x14, 0x49, 0x0f, 0xa1, 0xe9, 0xc4, 0x01, 0xa7, 0x0f,
+ 0xa1, 0xb8, 0xd2, 0x4b, 0x9c, 0x01, 0x1c, 0x91, 0xd4, 0x3f, 0x1e, 0x01,
+ 0x1c, 0x88, 0xc8, 0x1c, 0xb8, 0x01, 0x5f, 0xe9, 0xc9, 0xae, 0xa1, 0x0f,
+ 0xb7, 0x98, 0x94, 0x0f, 0xa6, 0xf9, 0x00, 0xc0, 0xc3, 0xc6, 0x95, 0x0f,
+ 0xae, 0x80, 0x43, 0x05, 0x58, 0xc0, 0xc3, 0xd2, 0xc8, 0xbe, 0x9b, 0x0f,
+ 0x9c, 0x49, 0xd1, 0x55, 0xed, 0x01, 0x81, 0xe9, 0xcc, 0x86, 0x34, 0x01,
+ 0x92, 0x80, 0x46, 0x01, 0xf1, 0xc0, 0xc3, 0xdc, 0x47, 0x37, 0x49, 0xc0,
+ 0xc3, 0xe8, 0x46, 0x08, 0xd7, 0xc0, 0xc3, 0xfe, 0x47, 0xc5, 0x1b, 0xc0,
+ 0xc4, 0x1c, 0x52, 0x48, 0x4e, 0xc0, 0xc4, 0x62, 0x4a, 0xa0, 0x92, 0x40,
+ 0xc4, 0x6e, 0x45, 0x6b, 0x2a, 0xc0, 0xc4, 0xac, 0x45, 0x09, 0x34, 0xc0,
+ 0xc4, 0xb8, 0xc5, 0xde, 0x05, 0x0f, 0xd4, 0x10, 0x00, 0x40, 0xc4, 0xca,
+ 0xcf, 0x63, 0x0c, 0x08, 0xd7, 0xa3, 0x00, 0xc4, 0xd6, 0x46, 0x02, 0x00,
+ 0x40, 0xc4, 0xda, 0x00, 0x40, 0xc5, 0x48, 0xc4, 0x22, 0x33, 0x08, 0xd7,
+ 0x63, 0x00, 0xc5, 0x54, 0xcc, 0x26, 0x0c, 0x08, 0xd7, 0x3a, 0x00, 0xc5,
+ 0x58, 0x00, 0x40, 0xc5, 0x5e, 0x00, 0xc0, 0xc5, 0x6d, 0x46, 0xd2, 0xb6,
+ 0xc0, 0xc5, 0x85, 0xcd, 0x80, 0x6f, 0x0f, 0xc9, 0x90, 0x49, 0xac, 0x61,
+ 0xc0, 0xc5, 0x97, 0x49, 0x2e, 0x38, 0x40, 0xc5, 0xc9, 0x44, 0xb5, 0x6f,
+ 0xc0, 0xc6, 0x0b, 0x0f, 0xc0, 0xc6, 0x25, 0xc3, 0x07, 0xc2, 0x0b, 0x5b,
+ 0x81, 0x16, 0xc0, 0xc6, 0x31, 0xc2, 0x06, 0x8d, 0x0b, 0x5b, 0x61, 0x10,
+ 0xc0, 0xc6, 0x43, 0x1a, 0xc0, 0xc6, 0x4f, 0x0a, 0xc0, 0xc6, 0x5f, 0xc8,
+ 0xc3, 0x2b, 0x0b, 0x5b, 0x39, 0x44, 0xe4, 0x53, 0xc0, 0xc6, 0x6b, 0xc6,
+ 0xd3, 0x0a, 0x0b, 0x5a, 0x18, 0x16, 0xc0, 0xc6, 0x87, 0x47, 0x0d, 0x7f,
+ 0xc0, 0xc6, 0x93, 0xc8, 0x36, 0xde, 0x0b, 0x5a, 0xf0, 0xc4, 0x24, 0x35,
+ 0x0b, 0x5a, 0xc9, 0xc5, 0x05, 0x1b, 0x0b, 0x5a, 0xc1, 0x15, 0xc0, 0xc6,
+ 0x9d, 0x08, 0xc0, 0xc6, 0xa9, 0x16, 0xc0, 0xc6, 0xb5, 0xc3, 0x05, 0x17,
+ 0x0b, 0x5a, 0x89, 0xc4, 0x16, 0x57, 0x0b, 0x5a, 0x80, 0x16, 0xc0, 0xc6,
+ 0xc1, 0xc3, 0xe6, 0xdc, 0x0b, 0x59, 0xa9, 0x15, 0xc0, 0xc6, 0xcd, 0x0d,
+ 0x40, 0xc6, 0xd7, 0x03, 0xc0, 0xc6, 0xe3, 0x19, 0xc0, 0xc6, 0xfb, 0x0b,
+ 0xc0, 0xc7, 0x03, 0x11, 0xc0, 0xc7, 0x0f, 0x17, 0xc0, 0xc7, 0x1b, 0x07,
+ 0x40, 0xc7, 0x27, 0xd0, 0x2f, 0x45, 0x0f, 0xb5, 0x81, 0xc2, 0x01, 0x89,
+ 0x0f, 0xca, 0xa0, 0xc8, 0x1a, 0x50, 0x0f, 0xb1, 0xf1, 0xc4, 0x01, 0xa7,
+ 0x0f, 0xb1, 0x08, 0xcb, 0x94, 0x51, 0x01, 0x1f, 0xf1, 0xc5, 0x01, 0xb2,
+ 0x01, 0x1f, 0xd8, 0xc8, 0x12, 0x6a, 0x01, 0x1f, 0xe9, 0xcc, 0x86, 0x88,
+ 0x01, 0x1f, 0xe0, 0x43, 0x00, 0x9b, 0xc0, 0xc7, 0x33, 0xc3, 0x35, 0x3b,
+ 0x0f, 0xa7, 0x70, 0xc7, 0x05, 0x3a, 0x01, 0x03, 0x49, 0xca, 0x9f, 0x98,
+ 0x01, 0x01, 0x60, 0xd1, 0x57, 0x63, 0x0f, 0xb5, 0x40, 0xc7, 0x01, 0xab,
+ 0x01, 0x57, 0x08, 0x42, 0x00, 0xe5, 0xc0, 0xc7, 0x42, 0xc7, 0xcf, 0xcc,
+ 0x01, 0x18, 0x31, 0xcc, 0x84, 0x24, 0x0f, 0xb1, 0x18, 0xc4, 0x02, 0xcb,
+ 0x01, 0x0a, 0x61, 0xd1, 0x4f, 0x9e, 0x01, 0x01, 0x89, 0xca, 0xa8, 0x4e,
+ 0x01, 0x01, 0x80, 0xc8, 0x14, 0x7e, 0x01, 0x31, 0x71, 0x8a, 0x0f, 0x9a,
+ 0x89, 0xc3, 0x00, 0xba, 0x0f, 0xcc, 0xd0, 0xc4, 0x05, 0xde, 0x08, 0x5d,
+ 0x59, 0x19, 0xc0, 0xc7, 0x4c, 0xc2, 0x01, 0x04, 0x08, 0x5d, 0x68, 0xc8,
+ 0x0d, 0x7e, 0x08, 0x5d, 0x78, 0xc3, 0x11, 0x40, 0x08, 0x5c, 0x81, 0x03,
+ 0x40, 0xc7, 0x56, 0xc2, 0x01, 0x47, 0x08, 0x5c, 0x38, 0xce, 0x6d, 0x68,
+ 0x08, 0x48, 0xf9, 0x47, 0x37, 0x49, 0xc0, 0xc7, 0x62, 0x47, 0x01, 0xff,
+ 0x40, 0xc7, 0x6f, 0x47, 0x01, 0xff, 0xc0, 0xc7, 0xd2, 0x15, 0xc0, 0xc8,
+ 0x58, 0xd0, 0x58, 0x5f, 0x05, 0x43, 0xa9, 0x45, 0x00, 0xfa, 0x40, 0xc8,
+ 0x62, 0x12, 0xc0, 0xc8, 0x6e, 0x16, 0xc0, 0xc8, 0x7e, 0x05, 0xc0, 0xc8,
+ 0x90, 0x19, 0xc0, 0xc8, 0xa4, 0x0a, 0xc0, 0xc8, 0xb0, 0x04, 0xc0, 0xc8,
+ 0xc2, 0x15, 0xc0, 0xc8, 0xd5, 0x42, 0x00, 0x3f, 0xc0, 0xc8, 0xf3, 0x42,
+ 0x02, 0xb5, 0xc0, 0xc8, 0xff, 0x42, 0x00, 0x68, 0xc0, 0xc9, 0x09, 0x14,
+ 0xc0, 0xc9, 0x15, 0xc5, 0xda, 0xcc, 0x08, 0x0f, 0x71, 0xc4, 0xac, 0x58,
+ 0x08, 0x0f, 0x99, 0xc7, 0xc7, 0xfa, 0x08, 0x0f, 0xb9, 0x09, 0xc0, 0xc9,
+ 0x21, 0xc5, 0x00, 0x62, 0x08, 0x0e, 0xc9, 0xc5, 0xde, 0x7d, 0x08, 0x0f,
+ 0xc0, 0xc6, 0x4a, 0xd8, 0x00, 0x04, 0x81, 0xc4, 0x08, 0xdd, 0x00, 0x00,
+ 0xa1, 0x16, 0xc0, 0xc9, 0x2d, 0xc3, 0x05, 0x17, 0x00, 0x00, 0x88, 0x15,
+ 0xc0, 0xc9, 0x39, 0x44, 0x00, 0x5b, 0xc0, 0xc9, 0x45, 0x03, 0xc0, 0xc9,
+ 0x54, 0x09, 0xc0, 0xc9, 0x60, 0xc2, 0x00, 0xeb, 0x00, 0x4a, 0x81, 0x4b,
+ 0x6f, 0x71, 0xc0, 0xc9, 0x6c, 0x47, 0x01, 0xff, 0xc0, 0xc9, 0xa1, 0xc7,
+ 0xca, 0xcb, 0x05, 0x47, 0xe9, 0xc5, 0x98, 0x45, 0x05, 0x47, 0xd1, 0x06,
+ 0x40, 0xca, 0x16, 0xc6, 0xd0, 0x9a, 0x0f, 0xae, 0xa1, 0xc8, 0x47, 0x34,
+ 0x0f, 0xad, 0x28, 0x96, 0x0f, 0x9e, 0xe3, 0x00, 0xca, 0x28, 0x43, 0x00,
+ 0xdd, 0x40, 0xca, 0x2e, 0x44, 0x07, 0x4a, 0xc0, 0xca, 0x3a, 0xca, 0xa9,
+ 0xd4, 0x0f, 0x99, 0x98, 0x44, 0x00, 0x3a, 0xc0, 0xca, 0x46, 0x45, 0x01,
+ 0xac, 0x40, 0xca, 0x58, 0x46, 0x01, 0xab, 0x40, 0xca, 0x64, 0x46, 0x01,
+ 0xab, 0x40, 0xca, 0x76, 0xc5, 0x67, 0xe4, 0x0e, 0x98, 0x2b, 0x00, 0xca,
+ 0x88, 0x0a, 0xc0, 0xca, 0x8e, 0x49, 0xb4, 0x65, 0xc0, 0xca, 0x9a, 0x48,
+ 0xbe, 0x03, 0x40, 0xca, 0xa6, 0xc4, 0x24, 0x35, 0x00, 0x01, 0xcb, 0x00,
+ 0xca, 0xb2, 0xc5, 0x05, 0x1b, 0x00, 0x01, 0xc3, 0x00, 0xca, 0xb6, 0x15,
+ 0xc0, 0xca, 0xba, 0x08, 0xc0, 0xca, 0xcc, 0x16, 0xc0, 0xca, 0xde, 0xc3,
+ 0x05, 0x17, 0x00, 0x01, 0x8b, 0x00, 0xca, 0xf0, 0xc4, 0x16, 0x57, 0x00,
+ 0x01, 0x82, 0x00, 0xca, 0xf4, 0x06, 0xc0, 0xca, 0xf8, 0xd0, 0x5e, 0xcf,
+ 0x08, 0xca, 0x31, 0xca, 0xa0, 0x42, 0x08, 0xca, 0x29, 0x45, 0x02, 0xcb,
+ 0xc0, 0xcb, 0x04, 0x47, 0x2e, 0x46, 0xc0, 0xcb, 0x1c, 0xca, 0x9c, 0xd2,
+ 0x08, 0xca, 0x09, 0xd3, 0x46, 0xde, 0x08, 0xc9, 0xf9, 0x18, 0xc0, 0xcb,
+ 0x28, 0x47, 0x01, 0xff, 0x40, 0xcb, 0x34, 0x45, 0x28, 0x17, 0xc0, 0xcb,
+ 0xa1, 0xc3, 0x0e, 0xe3, 0x01, 0x11, 0x19, 0xc7, 0xcc, 0x0d, 0x0f, 0xc9,
+ 0xf8, 0x4b, 0x40, 0x84, 0xc0, 0xcb, 0xab, 0xca, 0xa8, 0x3a, 0x01, 0x3b,
+ 0xf9, 0x46, 0x08, 0xd7, 0x40, 0xcb, 0xb7, 0xca, 0xa8, 0x3a, 0x01, 0x3c,
+ 0x49, 0x46, 0x08, 0xd7, 0x40, 0xcb, 0xd5, 0xc8, 0xc3, 0x1b, 0x01, 0x36,
+ 0x69, 0x49, 0xaf, 0x8b, 0x40, 0xcb, 0xf9, 0xc3, 0x82, 0xb0, 0x01, 0x34,
+ 0x29, 0xc3, 0x3b, 0x0b, 0x01, 0x34, 0x21, 0xc3, 0x82, 0xe0, 0x01, 0x34,
+ 0x19, 0xc3, 0x82, 0xec, 0x01, 0x34, 0x11, 0xc3, 0x82, 0xa4, 0x01, 0x34,
+ 0x09, 0xc3, 0x83, 0x28, 0x01, 0x34, 0x00, 0xc9, 0xaa, 0xf0, 0x01, 0x18,
+ 0x01, 0x44, 0x67, 0xbf, 0x40, 0xcc, 0x05, 0xc9, 0xaf, 0x16, 0x0f, 0xd3,
+ 0xc1, 0xc3, 0x01, 0xff, 0x0f, 0xa5, 0x38, 0xc5, 0x10, 0x15, 0x0f, 0xa1,
+ 0x90, 0x44, 0x00, 0x4c, 0xc0, 0xcc, 0x1d, 0x00, 0x40, 0xcc, 0x29, 0xd2,
+ 0x4c, 0x62, 0x0f, 0xa9, 0xe9, 0xcc, 0x4c, 0x68, 0x0f, 0xa9, 0xd9, 0x4e,
+ 0x6e, 0x64, 0x40, 0xcc, 0x44, 0x43, 0xec, 0x3c, 0xc0, 0xcc, 0x50, 0xc6,
+ 0xd7, 0x96, 0x0f, 0x81, 0x80, 0xc6, 0xd7, 0x96, 0x0f, 0x83, 0x11, 0x43,
+ 0xec, 0x3c, 0x40, 0xcc, 0x7a, 0x07, 0xc0, 0xcc, 0xa4, 0x49, 0x6d, 0x0b,
+ 0xc0, 0xcc, 0xae, 0xd0, 0x5e, 0x4f, 0x01, 0x4e, 0xf9, 0x47, 0x03, 0x5b,
+ 0x40, 0xcc, 0xc6, 0x05, 0xc0, 0xcc, 0xd2, 0xc5, 0x85, 0x87, 0x01, 0x4c,
+ 0xc9, 0x15, 0xc0, 0xcc, 0xde, 0xc9, 0xb4, 0x5c, 0x0f, 0xd7, 0x29, 0xd4,
+ 0x39, 0xe2, 0x01, 0x70, 0x41, 0xc6, 0xd1, 0x96, 0x01, 0x70, 0x99, 0xd4,
+ 0x3e, 0x2e, 0x01, 0x70, 0xb0, 0xc8, 0x15, 0xe3, 0x01, 0x16, 0x29, 0xc5,
+ 0x1c, 0xa1, 0x01, 0x11, 0xc1, 0xc4, 0x21, 0x5e, 0x01, 0x10, 0xa1, 0xc5,
+ 0x03, 0x50, 0x00, 0x16, 0xc8, 0xd1, 0x56, 0x0f, 0x08, 0xc1, 0xd9, 0x45,
+ 0x02, 0xcb, 0xc0, 0xcc, 0xea, 0x4b, 0x6f, 0x71, 0xc0, 0xcc, 0xfc, 0x47,
+ 0x01, 0xff, 0x40, 0xcd, 0x1f, 0xcf, 0x4c, 0xe0, 0x01, 0x17, 0x5b, 0x00,
+ 0xcd, 0x86, 0xc6, 0x04, 0xae, 0x01, 0x10, 0x60, 0xc9, 0x25, 0x70, 0x01,
+ 0x17, 0x08, 0xc5, 0x2d, 0x31, 0x01, 0x14, 0x03, 0x00, 0xcd, 0x8c, 0xc3,
+ 0x01, 0xba, 0x01, 0x15, 0x60, 0xdd, 0x11, 0xa7, 0x01, 0x57, 0x70, 0xc7,
+ 0x89, 0x2d, 0x0f, 0xad, 0xd9, 0xc4, 0x27, 0x22, 0x0f, 0xad, 0xca, 0x00,
+ 0xcd, 0x92, 0x0e, 0xc0, 0xcd, 0x98, 0x45, 0x0b, 0x2b, 0xc0, 0xcd, 0xa4,
+ 0x49, 0xb1, 0xb9, 0xc0, 0xcd, 0xd5, 0x44, 0xa5, 0xa8, 0xc0, 0xcd, 0xf3,
+ 0xd7, 0x29, 0x4e, 0x0d, 0xe3, 0x90, 0x99, 0x0d, 0xe1, 0xc3, 0x00, 0xcd,
+ 0xff, 0x96, 0x0d, 0xe0, 0x1b, 0x00, 0xce, 0x1e, 0x95, 0x0d, 0xe0, 0xe3,
+ 0x00, 0xce, 0x26, 0x8c, 0x0d, 0xe0, 0xdb, 0x00, 0xce, 0x36, 0x90, 0x0d,
+ 0xe0, 0xd3, 0x00, 0xce, 0x3a, 0x8f, 0x0d, 0xe0, 0xcb, 0x00, 0xce, 0x44,
+ 0x94, 0x0d, 0xe0, 0x5b, 0x00, 0xce, 0x48, 0x8e, 0x0d, 0xe0, 0x33, 0x00,
+ 0xce, 0x58, 0x8a, 0x0d, 0xe0, 0x03, 0x00, 0xce, 0x62, 0x8d, 0x0d, 0xe0,
+ 0x2b, 0x00, 0xce, 0x66, 0x86, 0x0d, 0xe0, 0x43, 0x00, 0xce, 0x6e, 0x88,
+ 0x0d, 0xe0, 0x23, 0x00, 0xce, 0x78, 0x92, 0x0d, 0xe0, 0x13, 0x00, 0xce,
+ 0x7e, 0x89, 0x0d, 0xe0, 0x53, 0x00, 0xce, 0x8a, 0x98, 0x0d, 0xe0, 0x4b,
+ 0x00, 0xce, 0x90, 0x84, 0x0d, 0xe0, 0x39, 0x9a, 0x0d, 0xe0, 0x0b, 0x00,
+ 0xce, 0x96, 0x91, 0x0d, 0xe2, 0x23, 0x00, 0xce, 0x9a, 0x97, 0x0d, 0xe2,
+ 0x8b, 0x00, 0xce, 0xac, 0x87, 0x0d, 0xe2, 0x3b, 0x00, 0xce, 0xba, 0xc2,
+ 0x04, 0x30, 0x0d, 0xe2, 0x81, 0x8b, 0x0d, 0xe2, 0x33, 0x00, 0xce, 0xc2,
+ 0x83, 0x0d, 0xe2, 0x0a, 0x00, 0xce, 0xc6, 0xe0, 0x04, 0xe7, 0x01, 0x3c,
+ 0xf9, 0xc8, 0x7c, 0x98, 0x07, 0xf2, 0x49, 0xc8, 0x7c, 0x8b, 0x07, 0xf2,
+ 0x68, 0xc6, 0x01, 0xb1, 0x0f, 0xa5, 0x41, 0xd0, 0x5e, 0x2f, 0x01, 0x72,
+ 0x18, 0xc5, 0xa4, 0x6b, 0x0f, 0xaf, 0x09, 0x45, 0x01, 0xac, 0x40, 0xce,
+ 0xcc, 0x00, 0xc0, 0xce, 0xd8, 0x42, 0x00, 0x3c, 0x40, 0xce, 0xf9, 0x4f,
+ 0x6b, 0x9a, 0xc0, 0xcf, 0x42, 0xc3, 0x47, 0xd8, 0x0f, 0xb5, 0xd8, 0xcf,
+ 0x21, 0x7d, 0x01, 0x33, 0xe1, 0x4f, 0x69, 0xba, 0x40, 0xcf, 0x4e, 0x9c,
+ 0x0f, 0x8f, 0xf9, 0x9b, 0x0f, 0x8f, 0xf1, 0x9a, 0x0f, 0x8f, 0xe9, 0x99,
+ 0x0f, 0x8f, 0xe1, 0x98, 0x0f, 0x8f, 0xd9, 0x97, 0x0f, 0x8f, 0xd1, 0x96,
+ 0x0f, 0x8f, 0xc9, 0x95, 0x0f, 0x8f, 0xc1, 0x94, 0x0f, 0x8f, 0xb9, 0x93,
+ 0x0f, 0x8f, 0xb1, 0x92, 0x0f, 0x8f, 0xa9, 0x91, 0x0f, 0x8f, 0xa1, 0x90,
+ 0x0f, 0x8f, 0x99, 0x8f, 0x0f, 0x8f, 0x91, 0x8e, 0x0f, 0x8f, 0x89, 0x8d,
+ 0x0f, 0x8f, 0x81, 0x8c, 0x0f, 0x8f, 0x79, 0x8b, 0x0f, 0x8f, 0x71, 0x8a,
+ 0x0f, 0x8f, 0x69, 0x89, 0x0f, 0x8f, 0x61, 0x88, 0x0f, 0x8f, 0x59, 0x87,
+ 0x0f, 0x8f, 0x51, 0x86, 0x0f, 0x8f, 0x49, 0x85, 0x0f, 0x8f, 0x41, 0x84,
+ 0x0f, 0x8f, 0x39, 0x83, 0x0f, 0x8f, 0x30, 0xc5, 0x21, 0x27, 0x05, 0x4a,
+ 0x99, 0x4a, 0x6f, 0x72, 0x40, 0xcf, 0x5a, 0x8a, 0x05, 0x4a, 0x91, 0x94,
+ 0x05, 0x4a, 0x89, 0x90, 0x05, 0x4a, 0x82, 0x00, 0xcf, 0x71, 0x83, 0x05,
+ 0x4a, 0x31, 0x10, 0xc0, 0xcf, 0x75, 0x0f, 0xc0, 0xcf, 0x87, 0xc2, 0x01,
+ 0x0e, 0x05, 0x4a, 0x09, 0xc2, 0x00, 0x4c, 0x05, 0x4a, 0x01, 0xc2, 0x1a,
+ 0x36, 0x05, 0x49, 0xf9, 0xc2, 0x00, 0x96, 0x05, 0x49, 0xf1, 0xc2, 0x00,
+ 0x9a, 0x05, 0x49, 0xe9, 0xc2, 0x0e, 0xe5, 0x05, 0x49, 0xe1, 0xc2, 0x26,
+ 0x94, 0x05, 0x49, 0xd1, 0xc2, 0x00, 0x2e, 0x05, 0x49, 0xc9, 0xc2, 0x02,
+ 0x1d, 0x05, 0x49, 0xb9, 0xc2, 0x07, 0x44, 0x05, 0x49, 0xb1, 0xc2, 0x0c,
+ 0x25, 0x05, 0x49, 0xa1, 0xc2, 0x00, 0x44, 0x05, 0x49, 0x99, 0xc2, 0x07,
+ 0x69, 0x05, 0x49, 0x89, 0xc2, 0x06, 0x6b, 0x05, 0x49, 0x80, 0x00, 0xc0,
+ 0xcf, 0x91, 0x46, 0x00, 0x4c, 0xc0, 0xcf, 0xe6, 0xd5, 0x37, 0x2a, 0x01,
+ 0x51, 0xe8, 0xd0, 0x5f, 0x4f, 0x0f, 0xa8, 0x71, 0xcd, 0x0f, 0x83, 0x01,
+ 0x19, 0x51, 0xd4, 0x3a, 0x1e, 0x01, 0x4f, 0xe9, 0xdb, 0x16, 0xc3, 0x00,
+ 0x05, 0xd8, 0xdc, 0x13, 0x8a, 0x01, 0x3d, 0x49, 0xd7, 0x2a, 0x79, 0x01,
+ 0x49, 0xc0, 0xc7, 0x05, 0x3a, 0x01, 0x03, 0x39, 0xc8, 0xbb, 0xd3, 0x01,
+ 0x01, 0x71, 0xc9, 0xac, 0x97, 0x01, 0x01, 0x59, 0xc4, 0x00, 0xfa, 0x01,
+ 0x00, 0x78, 0xd6, 0x31, 0x23, 0x00, 0x2c, 0x69, 0xc4, 0xbe, 0x2d, 0x0f,
+ 0xc8, 0xe1, 0xcb, 0x92, 0xc5, 0x00, 0x7e, 0xaa, 0x00, 0xd0, 0x28, 0xc8,
+ 0x08, 0xff, 0x01, 0x12, 0xb9, 0xcb, 0x97, 0x48, 0x01, 0x12, 0xb1, 0xc8,
+ 0x15, 0xe3, 0x01, 0x10, 0xc1, 0xc5, 0x03, 0x50, 0x00, 0x16, 0xd1, 0xc4,
+ 0xe5, 0x73, 0x0f, 0xb6, 0xf9, 0xc5, 0x01, 0x59, 0x01, 0x71, 0x80, 0x45,
+ 0x0a, 0xe3, 0xc0, 0xd0, 0x2e, 0x43, 0x11, 0x8a, 0xc0, 0xd0, 0x3a, 0x45,
+ 0x00, 0xcd, 0xc0, 0xd0, 0x46, 0x46, 0x01, 0x17, 0x40, 0xd0, 0x52, 0xce,
+ 0x6c, 0xce, 0x0f, 0xae, 0xf1, 0x42, 0x00, 0xb2, 0x40, 0xd0, 0x5e, 0xc6,
+ 0xd3, 0x58, 0x0f, 0xbc, 0x59, 0xc7, 0xcf, 0x78, 0x0f, 0xa6, 0x68, 0xc3,
+ 0xec, 0x15, 0x0f, 0x93, 0x29, 0x42, 0x01, 0x0d, 0xc0, 0xd0, 0x6a, 0xc2,
+ 0x08, 0xe9, 0x0f, 0x93, 0x19, 0xc2, 0x12, 0x5d, 0x0f, 0x93, 0x09, 0xc2,
+ 0x11, 0x47, 0x0f, 0x93, 0x00, 0xc3, 0x05, 0x17, 0x01, 0x0b, 0x03, 0x00,
+ 0xd0, 0x76, 0x08, 0xc0, 0xd0, 0x7a, 0x15, 0xc0, 0xd0, 0x84, 0xd4, 0x39,
+ 0xa6, 0x01, 0x0c, 0x19, 0x16, 0xc0, 0xd0, 0x93, 0x07, 0xc0, 0xd0, 0xa6,
+ 0xc4, 0x24, 0x35, 0x01, 0x0b, 0x40, 0x07, 0xc0, 0xd0, 0xb2, 0xcb, 0x95,
+ 0xd2, 0x08, 0x0c, 0xa8, 0xd3, 0x43, 0x4e, 0x08, 0x0c, 0xa1, 0xcc, 0x84,
+ 0x0c, 0x08, 0x0c, 0xb1, 0xcd, 0x7e, 0x33, 0x08, 0x0c, 0xc8, 0xc3, 0x5f,
+ 0x11, 0x0f, 0xb4, 0x19, 0xc5, 0xde, 0xb9, 0x0f, 0xb7, 0x20, 0xc4, 0x02,
+ 0x33, 0x01, 0x38, 0x5b, 0x00, 0xd0, 0xc4, 0xc4, 0xbe, 0x2d, 0x01, 0x38,
+ 0x51, 0x0f, 0xc0, 0xd0, 0xca, 0xcc, 0x8c, 0x04, 0x0f, 0xc8, 0xd1, 0xd4,
+ 0x24, 0xa8, 0x01, 0x70, 0x31, 0xc3, 0x05, 0xe3, 0x01, 0x71, 0x9b, 0x00,
+ 0xd0, 0xdc, 0xc6, 0x01, 0xe9, 0x01, 0x70, 0x59, 0xc5, 0x00, 0xea, 0x01,
+ 0x71, 0xa0, 0xc3, 0x7f, 0x6b, 0x0f, 0x98, 0x40, 0xcb, 0x98, 0x50, 0x01,
+ 0x31, 0x11, 0xc7, 0xc3, 0xd9, 0x0f, 0xa8, 0xc0, 0xc3, 0x5f, 0x11, 0x0f,
+ 0x9e, 0x71, 0xca, 0xa9, 0x16, 0x0f, 0x9e, 0x68, 0xca, 0xa9, 0xfc, 0x08,
+ 0x73, 0xf1, 0x44, 0x05, 0x17, 0x40, 0xd0, 0xe2, 0x44, 0x24, 0x35, 0xc0,
+ 0xd0, 0xf4, 0x45, 0x05, 0x1b, 0xc0, 0xd1, 0x00, 0x15, 0xc0, 0xd1, 0x0a,
+ 0x08, 0xc0, 0xd1, 0x16, 0x16, 0xc0, 0xd1, 0x1e, 0xcb, 0x0d, 0x7b, 0x08,
+ 0x73, 0x90, 0xc4, 0x24, 0x35, 0x08, 0x73, 0x41, 0xc5, 0x05, 0x1b, 0x08,
+ 0x73, 0x39, 0x15, 0xc0, 0xd1, 0x2c, 0x08, 0xc0, 0xd1, 0x38, 0x16, 0xc0,
+ 0xd1, 0x44, 0xc3, 0x05, 0x17, 0x08, 0x73, 0x00, 0x47, 0x01, 0xff, 0xc0,
+ 0xd1, 0x50, 0xcf, 0x61, 0xd1, 0x00, 0xb7, 0x81, 0xcf, 0x68, 0x70, 0x00,
+ 0xb7, 0x79, 0xcd, 0x7a, 0x71, 0x00, 0xb7, 0x71, 0xd1, 0x50, 0x15, 0x00,
+ 0xb7, 0x69, 0xd4, 0x3a, 0x32, 0x00, 0xb7, 0x61, 0xd2, 0x4b, 0x1e, 0x00,
+ 0xb7, 0x58, 0xc2, 0x00, 0x2a, 0x0f, 0x9e, 0x19, 0xd3, 0x44, 0x45, 0x0f,
+ 0x9d, 0xe8, 0xa2, 0x07, 0xf0, 0x73, 0x00, 0xd1, 0xe0, 0x9e, 0x07, 0xf0,
+ 0x53, 0x00, 0xd2, 0x08, 0x9d, 0x07, 0xf0, 0x4b, 0x00, 0xd2, 0x30, 0xa6,
+ 0x70, 0x08, 0x13, 0x00, 0xd2, 0x58, 0xa5, 0x70, 0x08, 0x0b, 0x00, 0xd2,
+ 0x80, 0xa4, 0x70, 0x08, 0x03, 0x00, 0xd2, 0xa8, 0xa3, 0x07, 0xf0, 0x7b,
+ 0x00, 0xd2, 0xd0, 0xa1, 0x07, 0xf0, 0x6b, 0x00, 0xd2, 0xf8, 0xa0, 0x07,
+ 0xf0, 0x63, 0x00, 0xd3, 0x20, 0x9f, 0x07, 0xf0, 0x5a, 0x00, 0xd3, 0x48,
+ 0xa2, 0x70, 0x08, 0x43, 0x00, 0xd3, 0x70, 0xa1, 0x70, 0x08, 0x3b, 0x00,
+ 0xd3, 0x8c, 0xa0, 0x70, 0x08, 0x33, 0x00, 0xd3, 0xb4, 0x9f, 0x70, 0x08,
+ 0x2b, 0x00, 0xd3, 0xdc, 0x9e, 0x70, 0x08, 0x23, 0x00, 0xd4, 0x04, 0x9d,
+ 0x70, 0x08, 0x1b, 0x00, 0xd4, 0x2c, 0xa6, 0x70, 0x08, 0x61, 0xa5, 0x70,
+ 0x08, 0x59, 0xa4, 0x70, 0x08, 0x51, 0xa3, 0x70, 0x08, 0x48, 0xa6, 0x70,
+ 0x0a, 0x91, 0xa5, 0x70, 0x0a, 0x89, 0xa4, 0x70, 0x0a, 0x81, 0xa3, 0x70,
+ 0x0a, 0x79, 0xa2, 0x70, 0x0a, 0x71, 0xa1, 0x70, 0x0a, 0x69, 0xa0, 0x70,
+ 0x0a, 0x61, 0x9f, 0x70, 0x0a, 0x59, 0x9e, 0x70, 0x0a, 0x51, 0x9d, 0x70,
+ 0x0a, 0x48, 0xa6, 0x70, 0x0a, 0x41, 0xa5, 0x70, 0x0a, 0x39, 0xa4, 0x70,
+ 0x0a, 0x31, 0xa3, 0x70, 0x0a, 0x29, 0xa2, 0x70, 0x0a, 0x21, 0xa1, 0x70,
+ 0x0a, 0x19, 0xa0, 0x70, 0x0a, 0x11, 0x9f, 0x70, 0x0a, 0x09, 0x9e, 0x70,
+ 0x0a, 0x01, 0x9d, 0x70, 0x09, 0xf8, 0xa6, 0x70, 0x09, 0xf1, 0xa5, 0x70,
+ 0x09, 0xe9, 0xa4, 0x70, 0x09, 0xe1, 0xa3, 0x70, 0x09, 0xd9, 0xa2, 0x70,
+ 0x09, 0xd1, 0xa1, 0x70, 0x09, 0xc9, 0xa0, 0x70, 0x09, 0xc1, 0x9f, 0x70,
+ 0x09, 0xb9, 0x9e, 0x70, 0x09, 0xb1, 0x9d, 0x70, 0x09, 0xa8, 0xa6, 0x70,
+ 0x09, 0xa1, 0xa5, 0x70, 0x09, 0x99, 0xa4, 0x70, 0x09, 0x91, 0xa3, 0x70,
+ 0x09, 0x89, 0xa2, 0x70, 0x09, 0x81, 0xa1, 0x70, 0x09, 0x79, 0xa0, 0x70,
+ 0x09, 0x71, 0x9f, 0x70, 0x09, 0x69, 0x9e, 0x70, 0x09, 0x61, 0x9d, 0x70,
+ 0x09, 0x58, 0xa6, 0x70, 0x09, 0x51, 0xa5, 0x70, 0x09, 0x49, 0xa4, 0x70,
+ 0x09, 0x41, 0xa3, 0x70, 0x09, 0x39, 0xa2, 0x70, 0x09, 0x31, 0xa1, 0x70,
+ 0x09, 0x29, 0xa0, 0x70, 0x09, 0x21, 0x9f, 0x70, 0x09, 0x19, 0x9e, 0x70,
+ 0x09, 0x11, 0x9d, 0x70, 0x09, 0x08, 0xa6, 0x70, 0x09, 0x01, 0xa5, 0x70,
+ 0x08, 0xf9, 0xa4, 0x70, 0x08, 0xf1, 0xa3, 0x70, 0x08, 0xe9, 0xa2, 0x70,
+ 0x08, 0xe1, 0xa1, 0x70, 0x08, 0xd9, 0xa0, 0x70, 0x08, 0xd1, 0x9f, 0x70,
+ 0x08, 0xc9, 0x9e, 0x70, 0x08, 0xc1, 0x9d, 0x70, 0x08, 0xb8, 0xa6, 0x70,
+ 0x08, 0xb1, 0xa5, 0x70, 0x08, 0xa9, 0xa4, 0x70, 0x08, 0xa1, 0xa3, 0x70,
+ 0x08, 0x99, 0xa2, 0x70, 0x08, 0x91, 0xa1, 0x70, 0x08, 0x89, 0xa0, 0x70,
+ 0x08, 0x81, 0x9f, 0x70, 0x08, 0x79, 0x9e, 0x70, 0x08, 0x71, 0x9d, 0x70,
+ 0x08, 0x68, 0x47, 0x13, 0x58, 0xc0, 0xd4, 0x54, 0x45, 0x11, 0xf2, 0x40,
+ 0xd4, 0xc3, 0xc4, 0x16, 0x57, 0x05, 0x31, 0x01, 0xc3, 0x05, 0x17, 0x05,
+ 0x31, 0x09, 0x16, 0xc0, 0xd4, 0xe5, 0x08, 0xc0, 0xd4, 0xf1, 0x15, 0xc0,
+ 0xd4, 0xfd, 0xc5, 0x05, 0x1b, 0x05, 0x31, 0x41, 0xc4, 0x24, 0x35, 0x05,
+ 0x31, 0x48, 0x4f, 0x05, 0x17, 0xc0, 0xd5, 0x09, 0x44, 0x02, 0x8d, 0xc0,
+ 0xd5, 0x2d, 0xd5, 0x33, 0xcd, 0x01, 0x35, 0x41, 0xc4, 0x03, 0x5d, 0x00,
+ 0x03, 0xe3, 0x00, 0xd5, 0x45, 0xc8, 0x23, 0xac, 0x01, 0x17, 0x71, 0xc9,
+ 0x39, 0xbf, 0x01, 0x02, 0xf1, 0x16, 0xc0, 0xd5, 0x49, 0xcb, 0x9b, 0x68,
+ 0x01, 0x4c, 0xd1, 0xc8, 0xbc, 0x8b, 0x01, 0x71, 0xe9, 0x4c, 0x8e, 0x50,
+ 0xc0, 0xd5, 0x5b, 0xda, 0x1b, 0x6e, 0x01, 0x81, 0xd8, 0x46, 0x11, 0x8d,
+ 0xc0, 0xd5, 0x6d, 0xd0, 0x60, 0x2f, 0x0f, 0xbd, 0x29, 0x45, 0xe0, 0xee,
+ 0x40, 0xd5, 0x8f, 0xdc, 0x13, 0xde, 0x00, 0xe7, 0xd1, 0x03, 0xc0, 0xd5,
+ 0x9b, 0xcb, 0x9c, 0xb2, 0x00, 0xe7, 0xb1, 0xcb, 0x93, 0xc2, 0x00, 0xe7,
+ 0xa9, 0x14, 0xc0, 0xd5, 0xad, 0xcd, 0x2f, 0x74, 0x00, 0xe7, 0x79, 0xd6,
+ 0x2f, 0x6b, 0x00, 0xe7, 0x71, 0xc6, 0xd5, 0x68, 0x00, 0xe7, 0x69, 0x48,
+ 0x59, 0x87, 0xc0, 0xd5, 0xbf, 0xda, 0x1a, 0x36, 0x00, 0xe6, 0xa1, 0xc9,
+ 0xb2, 0x13, 0x00, 0xe6, 0x98, 0x42, 0x00, 0x68, 0xc0, 0xd5, 0xd7, 0x42,
+ 0x00, 0x47, 0xc0, 0xd5, 0xe3, 0x47, 0xa4, 0x92, 0xc0, 0xd5, 0xef, 0xe0,
+ 0x06, 0x87, 0x00, 0xe7, 0x09, 0x16, 0xc0, 0xd5, 0xfb, 0x42, 0x06, 0x6b,
+ 0xc0, 0xd6, 0x0d, 0x4b, 0x1a, 0x36, 0xc0, 0xd6, 0x19, 0xc7, 0xca, 0x38,
+ 0x00, 0xe6, 0x91, 0xc5, 0xe3, 0xeb, 0x00, 0xe6, 0x88, 0xc4, 0xea, 0x9b,
+ 0x0b, 0x7f, 0x89, 0xc2, 0x00, 0x2e, 0x0b, 0x7f, 0x80, 0xc6, 0x9d, 0x44,
+ 0x0f, 0xa7, 0xc9, 0xc4, 0x7a, 0x3b, 0x0f, 0x9d, 0x70, 0x83, 0x08, 0x2b,
+ 0x81, 0x04, 0xc0, 0xd6, 0x2e, 0x05, 0xc0, 0xd6, 0x38, 0x06, 0xc0, 0xd6,
+ 0x42, 0x87, 0x08, 0x2b, 0xc3, 0x00, 0xd6, 0x4c, 0xc2, 0x13, 0xfc, 0x08,
+ 0x2b, 0xc9, 0xc2, 0x07, 0x69, 0x08, 0x2b, 0xd1, 0x0a, 0xc0, 0xd6, 0x50,
+ 0x8b, 0x08, 0x2b, 0xf3, 0x00, 0xd6, 0x5a, 0xc2, 0x1c, 0x3e, 0x08, 0x2c,
+ 0x01, 0x0e, 0xc0, 0xd6, 0x60, 0xc2, 0x01, 0x03, 0x08, 0x2c, 0x21, 0x10,
+ 0xc0, 0xd6, 0x6a, 0x91, 0x08, 0x2c, 0x39, 0xc2, 0x03, 0x07, 0x08, 0x2c,
+ 0x41, 0xc2, 0x0e, 0x13, 0x08, 0x2c, 0x49, 0x15, 0xc0, 0xd6, 0x74, 0x16,
+ 0xc0, 0xd6, 0x7e, 0x97, 0x08, 0x2c, 0x81, 0x9b, 0x08, 0x2c, 0xa1, 0xc2,
+ 0x0b, 0xa2, 0x08, 0x2c, 0xa9, 0xc2, 0x06, 0x6b, 0x08, 0x2c, 0x09, 0xc2,
+ 0x01, 0xa5, 0x08, 0x2c, 0x51, 0xc2, 0x00, 0x29, 0x08, 0x2c, 0x89, 0xc2,
+ 0x01, 0x66, 0x08, 0x2c, 0x90, 0x83, 0x08, 0x2c, 0xb9, 0x04, 0xc0, 0xd6,
+ 0x88, 0x05, 0xc0, 0xd6, 0x92, 0x06, 0xc0, 0xd6, 0x9c, 0x87, 0x08, 0x2c,
+ 0xfb, 0x00, 0xd6, 0xa6, 0xc2, 0x13, 0xfc, 0x08, 0x2d, 0x01, 0xc2, 0x07,
+ 0x69, 0x08, 0x2d, 0x09, 0x0a, 0xc0, 0xd6, 0xaa, 0x8b, 0x08, 0x2d, 0x2b,
+ 0x00, 0xd6, 0xb4, 0xc2, 0x1c, 0x3e, 0x08, 0x2d, 0x39, 0xc2, 0x06, 0x6b,
+ 0x08, 0x2d, 0x41, 0x0e, 0xc0, 0xd6, 0xba, 0xc2, 0x01, 0x03, 0x08, 0x2d,
+ 0x59, 0x10, 0xc0, 0xd6, 0xc4, 0x91, 0x08, 0x2d, 0x71, 0xc2, 0x03, 0x07,
+ 0x08, 0x2d, 0x79, 0xc2, 0x0e, 0x13, 0x08, 0x2d, 0x81, 0xc2, 0x01, 0xa5,
+ 0x08, 0x2d, 0x89, 0x15, 0xc0, 0xd6, 0xce, 0x16, 0xc0, 0xd6, 0xd8, 0x97,
+ 0x08, 0x2d, 0xb9, 0xc2, 0x00, 0x29, 0x08, 0x2d, 0xc1, 0xc2, 0x01, 0x66,
+ 0x08, 0x2d, 0xc9, 0x9b, 0x08, 0x2d, 0xd9, 0xc2, 0x0b, 0xa2, 0x08, 0x2d,
+ 0xe0, 0x44, 0x0d, 0x8f, 0xc0, 0xd6, 0xe2, 0xca, 0xa2, 0x90, 0x01, 0x0a,
+ 0xc0, 0x45, 0x05, 0xde, 0xc0, 0xd6, 0xee, 0x43, 0x0a, 0x20, 0x40, 0xd7,
+ 0x00, 0xc6, 0x05, 0x1b, 0x01, 0x0a, 0xd9, 0x15, 0xc0, 0xd7, 0x0c, 0xc5,
+ 0xa2, 0x94, 0x01, 0x0a, 0xa9, 0x16, 0xc0, 0xd7, 0x18, 0xc5, 0xe2, 0x33,
+ 0x01, 0x0a, 0x89, 0xc7, 0x08, 0x19, 0x00, 0x05, 0xe1, 0xc4, 0x01, 0x1d,
+ 0x00, 0x05, 0xe8, 0x42, 0x06, 0xfb, 0xc0, 0xd7, 0x24, 0x0e, 0xc0, 0xd7,
+ 0x30, 0x05, 0xc0, 0xd7, 0x40, 0x14, 0xc0, 0xd7, 0x4a, 0x42, 0x00, 0x93,
+ 0xc0, 0xd7, 0x56, 0x07, 0xc0, 0xd7, 0x62, 0x15, 0xc0, 0xd7, 0x6e, 0x06,
+ 0xc0, 0xd7, 0x80, 0xc9, 0x11, 0x47, 0x70, 0x01, 0x71, 0xcc, 0x8d, 0x0c,
+ 0x70, 0x01, 0x69, 0x12, 0xc0, 0xd7, 0x8c, 0x03, 0xc0, 0xd7, 0x98, 0xc5,
+ 0x1f, 0x01, 0x70, 0x03, 0xf1, 0xcd, 0x36, 0x6d, 0x70, 0x03, 0xe1, 0xcb,
+ 0x96, 0x82, 0x70, 0x01, 0x18, 0x4b, 0x6f, 0x71, 0xc0, 0xd7, 0xaa, 0x47,
+ 0x01, 0xff, 0x40, 0xd7, 0xb2, 0x47, 0x01, 0xff, 0xc0, 0xd8, 0x04, 0x45,
+ 0x02, 0xcb, 0xc0, 0xd8, 0x65, 0x4b, 0x6f, 0x71, 0x40, 0xd8, 0x71, 0x43,
+ 0x0b, 0x0b, 0xc0, 0xd8, 0x79, 0x43, 0x75, 0x5b, 0xc0, 0xd8, 0x85, 0xc5,
+ 0xe3, 0x69, 0x0f, 0x9a, 0x50, 0xd7, 0x27, 0x0f, 0x08, 0xff, 0xf9, 0x15,
+ 0xc0, 0xd8, 0x91, 0xd2, 0x4e, 0x36, 0x08, 0xff, 0x71, 0x16, 0xc0, 0xd8,
+ 0xa9, 0x03, 0xc0, 0xd8, 0xb5, 0x05, 0xc0, 0xd8, 0xc7, 0x0e, 0xc0, 0xd8,
+ 0xd3, 0x06, 0xc0, 0xd8, 0xdf, 0xd4, 0x38, 0xa2, 0x08, 0xff, 0x21, 0x49,
+ 0x54, 0xdd, 0xc0, 0xd8, 0xf7, 0x4b, 0x6f, 0x71, 0xc0, 0xd9, 0x09, 0xc2,
+ 0x00, 0xeb, 0x00, 0x5e, 0x81, 0x47, 0x37, 0x49, 0xc0, 0xd9, 0x29, 0xca,
+ 0xa9, 0x0c, 0x00, 0x5f, 0xa1, 0xc9, 0xb8, 0x0d, 0x00, 0x5f, 0xa9, 0xca,
+ 0x79, 0xf2, 0x00, 0x5f, 0xc8, 0x46, 0x08, 0xd7, 0xc0, 0xd9, 0x3b, 0xd1,
+ 0x56, 0x0f, 0x08, 0xb5, 0xc9, 0x47, 0x01, 0xff, 0xc0, 0xd9, 0x5f, 0x45,
+ 0x02, 0xcb, 0xc0, 0xd9, 0xc6, 0x4b, 0x6f, 0x71, 0x40, 0xd9, 0xd8, 0x45,
+ 0x02, 0xcb, 0xc0, 0xd9, 0xf2, 0x4b, 0x91, 0xf4, 0xc0, 0xda, 0x25, 0x4b,
+ 0x92, 0x15, 0xc0, 0xda, 0x49, 0x42, 0x00, 0xea, 0xc0, 0xda, 0x6d, 0x4b,
+ 0x6f, 0x71, 0xc0, 0xda, 0x79, 0x47, 0x01, 0xff, 0x40, 0xda, 0xa3, 0x16,
+ 0xc0, 0xda, 0xf1, 0x83, 0x00, 0xcb, 0x1b, 0x00, 0xdb, 0x05, 0x87, 0x00,
+ 0xcb, 0x5b, 0x00, 0xdb, 0x0f, 0x97, 0x00, 0xcb, 0x3b, 0x00, 0xdb, 0x17,
+ 0x91, 0x00, 0xcb, 0x4b, 0x00, 0xdb, 0x1b, 0x8b, 0x00, 0xcb, 0x21, 0x10,
+ 0xc0, 0xdb, 0x1f, 0x0d, 0xc0, 0xdb, 0x29, 0xc2, 0x0e, 0x13, 0x00, 0xca,
+ 0xf9, 0xc2, 0x01, 0x0e, 0x00, 0xca, 0xf1, 0xc2, 0x06, 0x8c, 0x00, 0xca,
+ 0xe9, 0xc2, 0x01, 0xa7, 0x00, 0xca, 0xe1, 0xc2, 0x00, 0x3f, 0x00, 0xca,
+ 0xd9, 0x12, 0xc0, 0xdb, 0x33, 0xc2, 0x00, 0x96, 0x00, 0xca, 0xc1, 0xc2,
+ 0x1a, 0x36, 0x00, 0xca, 0xa9, 0xc2, 0x0e, 0xe5, 0x00, 0xca, 0xa1, 0xc2,
+ 0x23, 0xe3, 0x00, 0xca, 0x88, 0x47, 0x11, 0xf0, 0xc0, 0xdb, 0x3d, 0x49,
+ 0xb4, 0x89, 0xc0, 0xdb, 0x55, 0x46, 0x33, 0x45, 0xc0, 0xdb, 0x6d, 0x45,
+ 0xe3, 0x5a, 0xc0, 0xdb, 0x87, 0x47, 0x01, 0xff, 0x40, 0xdb, 0x93, 0xc2,
+ 0x18, 0x55, 0x0f, 0xcc, 0x19, 0xcd, 0x81, 0x9a, 0x01, 0x05, 0xd0, 0x46,
+ 0x04, 0x91, 0xc0, 0xdb, 0x9f, 0xd1, 0x57, 0xb8, 0x01, 0x36, 0x49, 0x42,
+ 0x00, 0x10, 0xc0, 0xdb, 0xab, 0x06, 0xc0, 0xdb, 0xb7, 0x15, 0xc0, 0xdb,
+ 0xc3, 0x03, 0xc0, 0xdb, 0xdb, 0x05, 0xc0, 0xdb, 0xe7, 0xd7, 0x27, 0x6b,
+ 0x01, 0x09, 0x49, 0xcc, 0x8a, 0x24, 0x0f, 0xac, 0x78, 0xd2, 0x25, 0x52,
+ 0x0f, 0xbe, 0x11, 0x06, 0xc0, 0xdb, 0xf3, 0x0e, 0xc0, 0xdb, 0xff, 0x14,
+ 0xc0, 0xdc, 0x0b, 0xce, 0x75, 0x8e, 0x0f, 0xaf, 0x59, 0xcc, 0x8d, 0x90,
+ 0x0f, 0xad, 0x89, 0xd3, 0x47, 0x2a, 0x0f, 0xad, 0x39, 0xd8, 0x25, 0x94,
+ 0x01, 0x53, 0xb0, 0x4c, 0x84, 0xcc, 0xc0, 0xdc, 0x17, 0x4b, 0x92, 0x8e,
+ 0xc0, 0xdc, 0x23, 0x43, 0x02, 0x2e, 0xc0, 0xdc, 0x29, 0x4c, 0x86, 0xb8,
+ 0x40, 0xdc, 0x2f, 0x00, 0xc0, 0xdc, 0x35, 0xc9, 0xa3, 0xbd, 0x0f, 0xc8,
+ 0xa0, 0xcf, 0x6b, 0xa9, 0x01, 0x36, 0x41, 0xc5, 0xdd, 0x51, 0x01, 0x30,
+ 0x40, 0xc9, 0xb6, 0x30, 0x0f, 0xa2, 0x71, 0xc7, 0xc7, 0x67, 0x0f, 0xa2,
+ 0x68, 0xc4, 0x5b, 0x40, 0x01, 0x11, 0xa1, 0x00, 0x40, 0xdc, 0x3f, 0xc5,
+ 0xa1, 0x4b, 0x0f, 0x99, 0x09, 0xc7, 0xcb, 0xd5, 0x01, 0x4f, 0x38, 0x11,
+ 0xc0, 0xdc, 0x4b, 0xc7, 0xc8, 0x63, 0x00, 0x3d, 0x51, 0x07, 0xc0, 0xdc,
+ 0x5d, 0xc7, 0xc9, 0xeb, 0x00, 0x3d, 0x41, 0x03, 0xc0, 0xdc, 0x6f, 0x47,
+ 0x01, 0xff, 0xc0, 0xdc, 0x7b, 0xc5, 0xdd, 0x8d, 0x00, 0x3d, 0x80, 0x05,
+ 0xc0, 0xdc, 0xe5, 0x46, 0x08, 0xd7, 0x40, 0xdc, 0xf1, 0x43, 0x01, 0x1f,
+ 0xc0, 0xdd, 0x15, 0x96, 0x0f, 0x9d, 0x48, 0x05, 0xc0, 0xdd, 0x33, 0xcc,
+ 0x8e, 0x80, 0x01, 0x71, 0x18, 0x05, 0xc0, 0xdd, 0x3f, 0xcc, 0x8e, 0x80,
+ 0x01, 0x71, 0x10, 0xd3, 0x00, 0xb4, 0x01, 0x49, 0xd3, 0x00, 0xdd, 0x4b,
+ 0xda, 0x1c, 0x8c, 0x01, 0x49, 0xe0, 0xd0, 0x5d, 0x1f, 0x0f, 0x15, 0x71,
+ 0x47, 0x01, 0xff, 0x40, 0xdd, 0x51, 0x44, 0xe6, 0x4b, 0xc0, 0xdd, 0xca,
+ 0x43, 0xec, 0xf3, 0xc0, 0xdd, 0xd6, 0x43, 0xec, 0xf0, 0xc0, 0xdd, 0xe8,
+ 0x43, 0xec, 0xf6, 0x40, 0xdd, 0xf4, 0xc5, 0xb3, 0x25, 0x0f, 0xd5, 0x28,
+ 0xc4, 0x61, 0x04, 0x0f, 0xb4, 0x58, 0xc5, 0x7e, 0xa7, 0x0f, 0xad, 0x91,
+ 0xc3, 0x07, 0x51, 0x0f, 0xb4, 0xe0, 0xd3, 0x45, 0x16, 0x01, 0x56, 0xd9,
+ 0xc5, 0xda, 0xe0, 0x01, 0x5e, 0xb8, 0x0e, 0xc0, 0xde, 0x00, 0x45, 0x00,
+ 0xaf, 0x40, 0xde, 0x0c, 0xc5, 0x67, 0xe4, 0x01, 0x31, 0xb9, 0xc8, 0x31,
+ 0x1b, 0x01, 0x31, 0xb1, 0x19, 0xc0, 0xde, 0x1e, 0xc7, 0x73, 0xf1, 0x01,
+ 0x31, 0x99, 0xc4, 0x88, 0x1c, 0x01, 0x31, 0x91, 0xc4, 0x27, 0x95, 0x01,
+ 0x31, 0x89, 0xc6, 0x6e, 0x26, 0x01, 0x31, 0x80, 0x4d, 0x15, 0xd6, 0xc0,
+ 0xde, 0x2a, 0xc5, 0x1f, 0x01, 0x01, 0x12, 0x59, 0xc8, 0x20, 0x08, 0x01,
+ 0x11, 0x69, 0x12, 0xc0, 0xde, 0x42, 0x54, 0x3a, 0x46, 0xc0, 0xde, 0x4e,
+ 0xce, 0x73, 0x5e, 0x01, 0x57, 0xb1, 0x47, 0xcc, 0xa7, 0xc0, 0xde, 0x5a,
+ 0xd7, 0x2a, 0x4b, 0x01, 0x57, 0xd9, 0xc6, 0xd5, 0xa4, 0x01, 0x72, 0x58,
+ 0xd0, 0x5f, 0x2f, 0x01, 0x5e, 0xf8, 0xc2, 0x3a, 0xc8, 0x0f, 0x9e, 0x31,
+ 0x45, 0x02, 0x5f, 0x40, 0xde, 0x66, 0xc5, 0xdc, 0x66, 0x0f, 0xb4, 0x70,
+ 0x11, 0xc0, 0xde, 0x72, 0xc6, 0xd6, 0xe2, 0x0e, 0x9a, 0x81, 0xc5, 0x04,
+ 0xc6, 0x0e, 0x99, 0xb1, 0x43, 0x11, 0x48, 0x40, 0xde, 0x7e, 0x03, 0xc0,
+ 0xde, 0x8a, 0xc5, 0xd9, 0xaf, 0x0e, 0x99, 0x28, 0x0b, 0xc0, 0xde, 0x96,
+ 0xc8, 0x36, 0x04, 0x0e, 0x9a, 0x41, 0x07, 0xc0, 0xde, 0xa6, 0xc4, 0xe9,
+ 0xe3, 0x0e, 0x9a, 0x19, 0xc5, 0xdc, 0x89, 0x0e, 0x99, 0x00, 0xcb, 0x97,
+ 0x69, 0x0e, 0x9a, 0x99, 0xc9, 0xaf, 0x70, 0x0e, 0x98, 0x68, 0x11, 0xc0,
+ 0xde, 0xb8, 0x43, 0x07, 0xc2, 0xc0, 0xde, 0xc2, 0xc5, 0xc2, 0xa6, 0x0e,
+ 0x99, 0x09, 0xc5, 0x01, 0xa2, 0x0e, 0x98, 0x30, 0xca, 0x9f, 0x0c, 0x0e,
+ 0x9a, 0x89, 0xcb, 0x98, 0x3a, 0x0e, 0x9a, 0x09, 0xc6, 0xd1, 0x9c, 0x0e,
+ 0x98, 0xc9, 0xc5, 0x3e, 0xbd, 0x0e, 0x98, 0x60, 0xc7, 0xcf, 0x39, 0x0e,
+ 0x9a, 0x69, 0xcb, 0x49, 0xc9, 0x0e, 0x98, 0xb0, 0x16, 0xc0, 0xde, 0xcc,
+ 0xc8, 0xb9, 0xd3, 0x0e, 0x9a, 0x59, 0xc6, 0x88, 0x09, 0x0e, 0x9a, 0x28,
+ 0xc9, 0xab, 0x4a, 0x0e, 0x9a, 0x51, 0xcc, 0x85, 0xbc, 0x0e, 0x9a, 0x11,
+ 0xc7, 0x31, 0x2d, 0x0e, 0x99, 0xd1, 0x10, 0xc0, 0xde, 0xd6, 0xc3, 0x2e,
+ 0x23, 0x0e, 0x98, 0xe0, 0xc3, 0x12, 0x8a, 0x0e, 0x9a, 0x31, 0xc6, 0xd5,
+ 0x1a, 0x0e, 0x98, 0x90, 0xc3, 0x1c, 0xd2, 0x0e, 0x9a, 0x21, 0xc5, 0x7a,
+ 0xa0, 0x0e, 0x98, 0xb8, 0xc6, 0xd8, 0x56, 0x0e, 0x9a, 0x01, 0xc6, 0x14,
+ 0xfe, 0x0e, 0x99, 0xc9, 0xc4, 0x81, 0xd0, 0x0e, 0x98, 0x40, 0xc8, 0x53,
+ 0x9a, 0x0e, 0x99, 0x43, 0x00, 0xde, 0xe8, 0xca, 0xaa, 0x6a, 0x0e, 0x99,
+ 0xf1, 0xc8, 0xc2, 0xcb, 0x0e, 0x99, 0x91, 0xcc, 0x8a, 0x3c, 0x0e, 0x99,
+ 0x78, 0xc5, 0xdc, 0x16, 0x0e, 0x99, 0xa9, 0x07, 0x40, 0xde, 0xee, 0x03,
+ 0xc0, 0xde, 0xfe, 0xc5, 0xdf, 0xc2, 0x0e, 0x99, 0x51, 0xca, 0xa9, 0x48,
+ 0x0e, 0x98, 0x98, 0xc6, 0xd4, 0x00, 0x0e, 0x99, 0x39, 0xcc, 0x83, 0x64,
+ 0x0e, 0x98, 0x50, 0xce, 0x71, 0xc8, 0x0e, 0x99, 0x19, 0xcc, 0x89, 0x40,
+ 0x0e, 0x98, 0x71, 0xc6, 0x5f, 0x12, 0x0e, 0x98, 0x48, 0xd9, 0x1d, 0xc1,
+ 0x01, 0x3d, 0xd1, 0xd1, 0x24, 0xf3, 0x01, 0x39, 0xd8, 0x46, 0x32, 0xae,
+ 0xc0, 0xdf, 0x0a, 0xc5, 0xc3, 0x66, 0x0f, 0xa9, 0x69, 0xc6, 0x2a, 0xca,
+ 0x0f, 0xa7, 0xd0, 0x45, 0x02, 0xcb, 0xc0, 0xdf, 0x22, 0x42, 0x00, 0x58,
+ 0xc0, 0xdf, 0x42, 0x4b, 0x6f, 0x71, 0xc0, 0xdf, 0x4e, 0xce, 0x71, 0x58,
+ 0x00, 0x62, 0xb1, 0x46, 0x08, 0xd7, 0xc0, 0xdf, 0x74, 0x4f, 0x6a, 0x9b,
+ 0x40, 0xdf, 0x98, 0xc5, 0x10, 0x15, 0x0f, 0xa1, 0x78, 0xd0, 0x58, 0x4f,
+ 0x01, 0x4e, 0xa9, 0xcf, 0x6c, 0x4e, 0x01, 0x4e, 0xa0, 0xc8, 0x15, 0xe3,
+ 0x01, 0x11, 0xe3, 0x00, 0xdf, 0xa8, 0x45, 0x01, 0xac, 0x40, 0xdf, 0xac,
+ 0x46, 0x08, 0xd7, 0xc0, 0xdf, 0xb8, 0xc2, 0x00, 0xeb, 0x08, 0xa6, 0x39,
+ 0x03, 0xc0, 0xdf, 0xdc, 0xc5, 0xdd, 0xd8, 0x08, 0xa6, 0x29, 0x45, 0x02,
+ 0xcb, 0xc0, 0xdf, 0xe8, 0x4b, 0x6f, 0x71, 0xc0, 0xdf, 0xfe, 0x47, 0x01,
+ 0xff, 0x40, 0xe0, 0x24, 0xc2, 0x00, 0xdd, 0x01, 0x02, 0x51, 0xca, 0xa1,
+ 0x96, 0x01, 0x72, 0x90, 0xe0, 0x0b, 0xe7, 0x08, 0x59, 0xd0, 0x1b, 0xc0,
+ 0xe0, 0x8b, 0x44, 0x02, 0xcc, 0xc0, 0xe0, 0x97, 0x49, 0x5d, 0xcf, 0x40,
+ 0xe0, 0xc3, 0x09, 0xc0, 0xe0, 0xcf, 0x42, 0x01, 0x5b, 0xc0, 0xe0, 0xdb,
+ 0x05, 0xc0, 0xe0, 0xe7, 0xd5, 0x33, 0x79, 0x00, 0x78, 0x39, 0x15, 0xc0,
+ 0xe0, 0xf9, 0x04, 0xc0, 0xe1, 0x05, 0xd5, 0x35, 0x08, 0x00, 0x78, 0x61,
+ 0x10, 0xc0, 0xe1, 0x0f, 0x16, 0xc0, 0xe1, 0x1b, 0x14, 0xc0, 0xe1, 0x25,
+ 0x4c, 0x86, 0xf4, 0xc0, 0xe1, 0x31, 0xc7, 0xc5, 0x0d, 0x00, 0x7c, 0x21,
+ 0xc6, 0xd6, 0xca, 0x00, 0x7c, 0x29, 0xd6, 0x2c, 0x53, 0x00, 0x7e, 0x89,
+ 0xd3, 0x42, 0x90, 0x00, 0x7e, 0xc8, 0x4d, 0x7b, 0x8f, 0xc0, 0xe1, 0x3d,
+ 0x46, 0x02, 0x00, 0x40, 0xe1, 0x49, 0x15, 0xc0, 0xe1, 0xa9, 0xc9, 0xad,
+ 0x93, 0x00, 0x78, 0xc0, 0xc4, 0x16, 0x57, 0x00, 0x79, 0x01, 0xc3, 0x05,
+ 0x17, 0x00, 0x79, 0x09, 0x16, 0xc0, 0xe1, 0xb5, 0x08, 0xc0, 0xe1, 0xc1,
+ 0x15, 0xc0, 0xe1, 0xcd, 0xc5, 0x05, 0x1b, 0x00, 0x79, 0x41, 0xc4, 0x24,
+ 0x35, 0x00, 0x79, 0x49, 0x45, 0x01, 0x1d, 0x40, 0xe1, 0xd9, 0xc2, 0x0e,
+ 0x30, 0x00, 0x7b, 0x89, 0x8b, 0x00, 0x7b, 0x93, 0x00, 0xe1, 0xfd, 0x97,
+ 0x00, 0x7b, 0xa3, 0x00, 0xe2, 0x01, 0x48, 0xb7, 0xd7, 0xc0, 0xe2, 0x05,
+ 0x87, 0x00, 0x7b, 0xd3, 0x00, 0xe2, 0x13, 0x91, 0x00, 0x7b, 0xe3, 0x00,
+ 0xe2, 0x17, 0xca, 0xa4, 0x16, 0x00, 0x7c, 0x02, 0x00, 0xe2, 0x1b, 0xcd,
+ 0x78, 0x28, 0x00, 0x7d, 0xf8, 0xca, 0xa1, 0x32, 0x00, 0x7e, 0x01, 0xca,
+ 0xa7, 0x36, 0x00, 0x7e, 0x09, 0xc9, 0xab, 0x53, 0x00, 0x7e, 0x11, 0xca,
+ 0xa7, 0xb8, 0x00, 0x7e, 0x18, 0x1b, 0xc0, 0xe2, 0x1f, 0x51, 0x54, 0x77,
+ 0xc0, 0xe2, 0x39, 0x16, 0xc0, 0xe2, 0x41, 0x03, 0x40, 0xe2, 0x4d, 0xe0,
+ 0x05, 0x47, 0x01, 0x6b, 0x78, 0x00, 0xc0, 0xe2, 0x59, 0xda, 0x0b, 0xed,
+ 0x01, 0x71, 0x50, 0xc2, 0x00, 0xff, 0x01, 0x52, 0xb1, 0xc3, 0x00, 0x3a,
+ 0x01, 0x52, 0xa8, 0xcb, 0x8f, 0x3f, 0x01, 0x50, 0x41, 0xcc, 0x86, 0xe8,
+ 0x01, 0x50, 0x38, 0xd7, 0x27, 0xc7, 0x0f, 0xc5, 0x81, 0x58, 0x22, 0x4c,
+ 0xc0, 0xe2, 0x65, 0x57, 0x29, 0x93, 0x40, 0xe2, 0x77, 0x15, 0xc0, 0xe2,
+ 0x83, 0x0e, 0xc0, 0xe2, 0xae, 0x42, 0x00, 0x3f, 0xc0, 0xe2, 0xbe, 0x06,
+ 0xc0, 0xe2, 0xd0, 0x14, 0xc0, 0xe2, 0xe6, 0xc5, 0x4c, 0x81, 0x00, 0x32,
+ 0x83, 0x00, 0xe2, 0xfc, 0x08, 0xc0, 0xe3, 0x09, 0x45, 0x03, 0xf5, 0xc0,
+ 0xe3, 0x24, 0x16, 0xc0, 0xe3, 0x36, 0x05, 0xc0, 0xe3, 0x52, 0x42, 0x01,
+ 0x0e, 0xc0, 0xe3, 0x5e, 0x12, 0xc0, 0xe3, 0x6a, 0x18, 0xc0, 0xe3, 0x80,
+ 0xd2, 0x4e, 0x12, 0x00, 0x44, 0x39, 0x07, 0xc0, 0xe3, 0x8c, 0xd0, 0x59,
+ 0x6f, 0x00, 0x32, 0xf9, 0xc8, 0xc2, 0x5b, 0x00, 0x32, 0xc9, 0xce, 0x6d,
+ 0xd8, 0x00, 0x32, 0xb9, 0xcd, 0x2d, 0xa6, 0x00, 0x30, 0xf9, 0x47, 0x37,
+ 0x49, 0x40, 0xe3, 0x98, 0x46, 0x08, 0xd7, 0xc0, 0xe3, 0xa4, 0x44, 0x03,
+ 0x07, 0xc0, 0xe3, 0xc8, 0xcb, 0x96, 0xcf, 0x00, 0x30, 0x39, 0xc9, 0xb3,
+ 0xa8, 0x00, 0x30, 0x30, 0x48, 0x1b, 0x0d, 0xc0, 0xe3, 0xd4, 0x46, 0x02,
+ 0x00, 0x40, 0xe3, 0xe6, 0xd0, 0x4a, 0x47, 0x00, 0x2a, 0xf9, 0xc9, 0x2d,
+ 0x3c, 0x00, 0x2a, 0xd0, 0xc4, 0x00, 0xeb, 0x00, 0x2a, 0xe9, 0x4e, 0x01,
+ 0xf8, 0x40, 0xe4, 0x5f, 0xcf, 0x0f, 0x63, 0x00, 0x2a, 0xe1, 0xcc, 0x85,
+ 0x44, 0x00, 0x2a, 0xd8, 0x4e, 0x01, 0xf8, 0xc0, 0xe4, 0xd8, 0xd1, 0x2e,
+ 0x64, 0x0f, 0x4a, 0x40, 0xc4, 0x66, 0xbd, 0x0f, 0x49, 0x11, 0x06, 0xc0,
+ 0xe5, 0x58, 0xc4, 0x79, 0xaa, 0x0f, 0x49, 0x21, 0xc4, 0xe6, 0x03, 0x0f,
+ 0x49, 0x29, 0x04, 0xc0, 0xe5, 0x64, 0x15, 0xc0, 0xe5, 0x6e, 0xc2, 0x03,
+ 0x07, 0x0f, 0x49, 0x41, 0xc2, 0x00, 0x9a, 0x0f, 0x49, 0x51, 0x87, 0x0f,
+ 0x49, 0x59, 0xc2, 0x01, 0xa7, 0x0f, 0x49, 0x61, 0x8b, 0x0f, 0x49, 0x69,
+ 0x91, 0x0f, 0x49, 0x71, 0x1b, 0xc0, 0xe5, 0x7a, 0xc3, 0x7c, 0xad, 0x0f,
+ 0x49, 0x89, 0x10, 0xc0, 0xe5, 0x84, 0x0d, 0xc0, 0xe5, 0x96, 0x97, 0x0f,
+ 0x49, 0xa9, 0xc4, 0xe5, 0xb3, 0x0f, 0x49, 0xb1, 0xc3, 0x11, 0x3f, 0x0f,
+ 0x49, 0xb9, 0xc2, 0x01, 0x0e, 0x0f, 0x49, 0xc1, 0xc4, 0xdb, 0x76, 0x0f,
+ 0x49, 0xc9, 0x09, 0xc0, 0xe5, 0xa8, 0xc2, 0x00, 0x16, 0x0f, 0x49, 0xe1,
+ 0xc2, 0x06, 0x8c, 0x0f, 0x49, 0xf1, 0xc3, 0xb7, 0x74, 0x0f, 0x4a, 0x08,
+ 0xc8, 0x00, 0xf6, 0x0f, 0x4a, 0x31, 0xd4, 0x3d, 0xb6, 0x0f, 0x4a, 0x48,
+ 0xc4, 0x33, 0x1b, 0x0f, 0x4a, 0x51, 0xd0, 0x56, 0x65, 0x0f, 0x4a, 0x58,
+ 0xc4, 0x16, 0x57, 0x0f, 0x4a, 0x81, 0xc3, 0x05, 0x17, 0x0f, 0x4a, 0x89,
+ 0x16, 0xc0, 0xe5, 0xb2, 0x08, 0xc0, 0xe5, 0xbe, 0x15, 0xc0, 0xe5, 0xca,
+ 0xc5, 0x05, 0x1b, 0x0f, 0x4a, 0xc1, 0xc4, 0x24, 0x35, 0x0f, 0x4a, 0xc8,
+ 0xd0, 0x0f, 0x62, 0x0f, 0x4a, 0xf1, 0xcd, 0x2d, 0xa6, 0x0f, 0x4a, 0xf8,
+ 0x47, 0xca, 0xc4, 0xc0, 0xe5, 0xd6, 0x43, 0x05, 0x29, 0xc0, 0xe5, 0xe2,
+ 0xcb, 0x94, 0xa9, 0x0f, 0xb8, 0x79, 0xca, 0x9e, 0x4e, 0x0f, 0xb9, 0xf1,
+ 0xc4, 0x24, 0xe4, 0x0f, 0xba, 0xc8, 0x14, 0xc0, 0xe5, 0xf2, 0xc7, 0xcb,
+ 0xf8, 0x0f, 0xb8, 0x99, 0x46, 0x4e, 0x6d, 0xc0, 0xe5, 0xfe, 0x03, 0x40,
+ 0xe6, 0x0a, 0x42, 0x00, 0xbe, 0xc0, 0xe6, 0x1c, 0xc8, 0xb9, 0x13, 0x0f,
+ 0xbb, 0x80, 0x11, 0xc0, 0xe6, 0x28, 0xd2, 0x48, 0x72, 0x0f, 0xb8, 0x71,
+ 0xca, 0xa4, 0xac, 0x0f, 0xba, 0xf9, 0x17, 0x40, 0xe6, 0x37, 0x44, 0x5f,
+ 0x3b, 0xc0, 0xe6, 0x43, 0x42, 0x01, 0x5b, 0xc0, 0xe6, 0x4d, 0xc4, 0xe4,
+ 0x4b, 0x0f, 0xba, 0x69, 0xc6, 0x81, 0x6d, 0x0f, 0xba, 0x88, 0x07, 0xc0,
+ 0xe6, 0x59, 0x47, 0xc9, 0x19, 0x40, 0xe6, 0x6b, 0x0b, 0xc0, 0xe6, 0x75,
+ 0xc8, 0xbc, 0x6b, 0x0f, 0xb9, 0x40, 0x17, 0xc0, 0xe6, 0x81, 0x03, 0xc0,
+ 0xe6, 0x8d, 0xc5, 0xdc, 0xb6, 0x0f, 0xb8, 0xd9, 0xc5, 0xad, 0xf7, 0x0f,
+ 0xba, 0x39, 0xce, 0x75, 0x80, 0x0f, 0xba, 0x79, 0x16, 0xc0, 0xe6, 0x9c,
+ 0xc3, 0xc6, 0x29, 0x0f, 0xba, 0xa0, 0xcb, 0x95, 0x4e, 0x0f, 0xb9, 0x59,
+ 0x43, 0x01, 0xea, 0xc0, 0xe6, 0xa8, 0xc2, 0x00, 0xac, 0x0f, 0xb8, 0x09,
+ 0x0e, 0xc0, 0xe6, 0xb2, 0xc6, 0xd2, 0x9e, 0x0f, 0xb9, 0xd1, 0xca, 0xa1,
+ 0x6e, 0x0f, 0xb9, 0xe9, 0xc4, 0x6f, 0x36, 0x0f, 0xba, 0xb9, 0xc6, 0xd8,
+ 0x50, 0x0f, 0xba, 0xd8, 0xc7, 0xcc, 0xdf, 0x0f, 0xb9, 0x51, 0xc8, 0xbf,
+ 0xeb, 0x0f, 0xba, 0x98, 0xc3, 0x01, 0xa4, 0x0f, 0xb8, 0xa9, 0xc3, 0x00,
+ 0x49, 0x0f, 0xbb, 0x78, 0x4f, 0x62, 0x2b, 0xc0, 0xe6, 0xc4, 0xc8, 0xbb,
+ 0x53, 0x0f, 0xb9, 0xc1, 0xc4, 0x73, 0x94, 0x0f, 0xbb, 0x88, 0xc3, 0x00,
+ 0x98, 0x0f, 0xb8, 0x21, 0x9a, 0x0f, 0xba, 0x50, 0xc9, 0xad, 0x81, 0x0f,
+ 0xb8, 0x01, 0xc7, 0xc5, 0x61, 0x0f, 0xba, 0x08, 0xc3, 0x1a, 0xb6, 0x0f,
+ 0xb8, 0xd1, 0xc2, 0x01, 0x0a, 0x0f, 0xba, 0x48, 0x44, 0x09, 0x3d, 0xc0,
+ 0xe6, 0xd4, 0xcc, 0x86, 0x94, 0x0f, 0xb9, 0x08, 0x11, 0xc0, 0xe6, 0xde,
+ 0x44, 0x01, 0x1e, 0x40, 0xe6, 0xea, 0xd7, 0x07, 0x90, 0x01, 0x53, 0x78,
+ 0xd3, 0x41, 0xd2, 0x0f, 0x9f, 0x39, 0xc5, 0x3f, 0xff, 0x0f, 0xb4, 0xb8,
+ 0x1d, 0xc0, 0xe6, 0xf6, 0x1e, 0xc0, 0xe7, 0x1e, 0x1f, 0xc0, 0xe7, 0x46,
+ 0x20, 0xc0, 0xe7, 0x6e, 0x21, 0xc0, 0xe7, 0x96, 0x22, 0x40, 0xe7, 0xbe,
+ 0xd3, 0x43, 0xd3, 0x01, 0x3f, 0x91, 0x05, 0xc0, 0xe7, 0xd0, 0xd1, 0x03,
+ 0xf5, 0x01, 0x0d, 0xd1, 0x16, 0xc0, 0xe7, 0xdc, 0x48, 0x00, 0x68, 0xc0,
+ 0xe7, 0xe8, 0xcb, 0x8b, 0xa4, 0x01, 0x50, 0x88, 0x46, 0x01, 0xab, 0x40,
+ 0xe7, 0xee, 0xda, 0x1a, 0x02, 0x01, 0x37, 0x11, 0xc3, 0x9e, 0x05, 0x01,
+ 0x5e, 0xc8, 0x8d, 0x00, 0x01, 0x53, 0x00, 0xe7, 0xfa, 0x8f, 0x01, 0x02,
+ 0x10, 0xc2, 0x00, 0x96, 0x08, 0xba, 0x31, 0x83, 0x08, 0xb8, 0x70, 0xc2,
+ 0x01, 0x01, 0x08, 0xba, 0x29, 0xc2, 0x1a, 0x36, 0x08, 0xb8, 0x81, 0x83,
+ 0x08, 0xb8, 0x19, 0xc2, 0x07, 0x69, 0x08, 0xb8, 0x10, 0x06, 0xc0, 0xe8,
+ 0x00, 0xc2, 0x01, 0x0e, 0x08, 0xb8, 0xa1, 0x83, 0x08, 0xb8, 0x98, 0x16,
+ 0xc0, 0xe8, 0x0a, 0xc2, 0x01, 0x0e, 0x08, 0xb8, 0x61, 0x83, 0x08, 0xb8,
+ 0x20, 0x83, 0x08, 0xba, 0x01, 0xc2, 0x01, 0x0e, 0x08, 0xb8, 0x58, 0x49,
+ 0x0d, 0x27, 0x40, 0xe8, 0x14, 0xc2, 0x01, 0x0e, 0x08, 0xb8, 0xc9, 0x83,
+ 0x08, 0xb8, 0x50, 0xc2, 0x01, 0x0e, 0x08, 0xb8, 0xc1, 0x83, 0x08, 0xb8,
+ 0x40, 0xc2, 0x01, 0x0e, 0x08, 0xb8, 0xb9, 0x83, 0x08, 0xb8, 0xa8, 0xc2,
+ 0x01, 0x0e, 0x08, 0xb8, 0x39, 0x83, 0x08, 0xb8, 0x30, 0xc2, 0x01, 0x0e,
+ 0x08, 0xb8, 0x09, 0x83, 0x08, 0xb8, 0x00, 0xc5, 0xe1, 0x75, 0x08, 0xb9,
+ 0xf1, 0x15, 0xc0, 0xe8, 0x26, 0xc6, 0xd7, 0x72, 0x08, 0xb9, 0x58, 0xc4,
+ 0x15, 0xa7, 0x08, 0xb9, 0xb9, 0xc2, 0x22, 0x45, 0x08, 0xb9, 0xb0, 0xc3,
+ 0x0d, 0x8f, 0x08, 0xb9, 0xa9, 0xc3, 0x08, 0xde, 0x08, 0xb9, 0xa0, 0xc4,
+ 0x05, 0xde, 0x08, 0xb9, 0x99, 0xc2, 0x0a, 0x20, 0x08, 0xb9, 0x90, 0x8f,
+ 0x08, 0xb9, 0x51, 0x8b, 0x08, 0xb9, 0x49, 0x99, 0x08, 0xb9, 0x39, 0x83,
+ 0x08, 0xb9, 0x08, 0x97, 0x08, 0xb9, 0x28, 0x8b, 0x08, 0xb9, 0x18, 0xca,
+ 0xa0, 0xb0, 0x08, 0xb8, 0xf9, 0x83, 0x08, 0xb8, 0xe8, 0xc2, 0x00, 0x5d,
+ 0x01, 0x1c, 0xab, 0x00, 0xe8, 0x32, 0x44, 0x4c, 0x8f, 0x40, 0xe8, 0x36,
+ 0xc9, 0x50, 0x0c, 0x01, 0x1b, 0xb0, 0xc9, 0x50, 0x0c, 0x01, 0x1b, 0xc8,
+ 0xc9, 0x50, 0x0c, 0x01, 0x1b, 0xc0, 0xc2, 0x03, 0x76, 0x01, 0x1b, 0xa1,
+ 0xce, 0x6e, 0xaa, 0x01, 0x1a, 0x30, 0x00, 0xc0, 0xe8, 0x42, 0xca, 0x6e,
+ 0xae, 0x01, 0x1a, 0x78, 0x43, 0x00, 0x69, 0xc0, 0xe8, 0x54, 0x42, 0x00,
+ 0x35, 0xc0, 0xe8, 0x5e, 0xcf, 0x67, 0xbc, 0x01, 0x1a, 0xd0, 0xd1, 0x50,
+ 0x04, 0x01, 0x1b, 0x71, 0x16, 0xc0, 0xe8, 0x68, 0xc8, 0x7e, 0xe1, 0x01,
+ 0x19, 0xf9, 0xca, 0xa4, 0x2a, 0x01, 0x19, 0xb8, 0xc8, 0xc1, 0x1b, 0x01,
+ 0x1b, 0x51, 0x46, 0x03, 0xfb, 0x40, 0xe8, 0x74, 0xcb, 0x9c, 0x18, 0x01,
+ 0x1b, 0x39, 0xca, 0x6e, 0xae, 0x01, 0x1a, 0x28, 0xc9, 0x1e, 0x89, 0x01,
+ 0x1b, 0x21, 0xc8, 0x50, 0x0d, 0x01, 0x1a, 0xd8, 0x49, 0x08, 0xe9, 0xc0,
+ 0xe8, 0x92, 0xcf, 0x66, 0xae, 0x01, 0x12, 0x80, 0x0a, 0xc0, 0xe8, 0x9e,
+ 0x15, 0xc0, 0xe8, 0xa8, 0xc2, 0x00, 0x29, 0x08, 0x59, 0x61, 0x1b, 0xc0,
+ 0xe8, 0xb6, 0xc2, 0x01, 0x03, 0x08, 0x59, 0x41, 0x10, 0xc0, 0xe8, 0xc0,
+ 0x06, 0xc0, 0xe8, 0xd4, 0x16, 0xc0, 0xe8, 0xde, 0xc2, 0x1c, 0x3e, 0x08,
+ 0x58, 0xc1, 0xc2, 0x01, 0xa1, 0x08, 0x58, 0xb9, 0x09, 0xc0, 0xe8, 0xee,
+ 0x1a, 0xc0, 0xe8, 0xfe, 0xc2, 0x00, 0xdc, 0x08, 0x58, 0x81, 0x97, 0x08,
+ 0x58, 0x73, 0x00, 0xe9, 0x0e, 0x8b, 0x08, 0x58, 0x63, 0x00, 0xe9, 0x12,
+ 0x91, 0x08, 0x58, 0x53, 0x00, 0xe9, 0x16, 0x87, 0x08, 0x58, 0x43, 0x00,
+ 0xe9, 0x1a, 0x83, 0x08, 0x58, 0x03, 0x00, 0xe9, 0x1e, 0xc2, 0x03, 0x07,
+ 0x08, 0x58, 0xf1, 0xc2, 0x13, 0xfc, 0x08, 0x58, 0xf9, 0x04, 0xc0, 0xe9,
+ 0x34, 0xc2, 0x01, 0xa5, 0x08, 0x59, 0x69, 0xc2, 0x00, 0x58, 0x08, 0x59,
+ 0x71, 0x1c, 0x40, 0xe9, 0x3e, 0xc3, 0x05, 0x17, 0x08, 0x08, 0x3b, 0x00,
+ 0xe9, 0x48, 0x16, 0xc0, 0xe9, 0x4c, 0x08, 0xc0, 0xe9, 0x5d, 0x15, 0xc0,
+ 0xe9, 0x65, 0xc5, 0x05, 0x1b, 0x08, 0x08, 0x73, 0x00, 0xe9, 0x77, 0xc4,
+ 0x24, 0x35, 0x08, 0x08, 0x7a, 0x00, 0xe9, 0x82, 0x46, 0x0e, 0x97, 0xc0,
+ 0xe9, 0x8f, 0x4e, 0x6f, 0x28, 0x40, 0xe9, 0xa5, 0xce, 0x6c, 0xa4, 0x08,
+ 0x09, 0xf1, 0xcd, 0x78, 0xaa, 0x08, 0x09, 0xf8, 0x0e, 0xc0, 0xe9, 0xb1,
+ 0x46, 0x11, 0x8d, 0xc0, 0xe9, 0xbd, 0x42, 0x00, 0x68, 0xc0, 0xe9, 0xf6,
+ 0x49, 0x02, 0x5b, 0xc0, 0xea, 0x02, 0x43, 0x11, 0x8a, 0xc0, 0xea, 0x1a,
+ 0x46, 0x01, 0x17, 0x40, 0xea, 0x32, 0xc6, 0x01, 0xe9, 0x0f, 0xbc, 0x81,
+ 0xc6, 0x03, 0xfa, 0x0f, 0xbc, 0x30, 0xc6, 0x12, 0x73, 0x0f, 0xbd, 0x59,
+ 0xd2, 0x48, 0xf0, 0x0f, 0xbd, 0xb8, 0xd6, 0x0a, 0xe8, 0x01, 0x1f, 0x09,
+ 0xcd, 0x00, 0xd2, 0x01, 0x1e, 0xf9, 0xcb, 0x1c, 0xe0, 0x01, 0x1e, 0xe9,
+ 0xce, 0x26, 0x2e, 0x01, 0x1d, 0xab, 0x00, 0xea, 0x4a, 0x45, 0x01, 0x1d,
+ 0xc0, 0xea, 0x50, 0x46, 0x01, 0x17, 0xc0, 0xea, 0x68, 0x45, 0x00, 0xcd,
+ 0xc0, 0xea, 0x72, 0xd7, 0x17, 0x18, 0x01, 0x49, 0xd8, 0x46, 0x01, 0xab,
+ 0x40, 0xea, 0x7c, 0x00, 0xc0, 0xea, 0x88, 0xc3, 0x01, 0x5e, 0x0f, 0x9d,
+ 0x98, 0xc4, 0x00, 0xfa, 0x0f, 0xa8, 0xb3, 0x00, 0xea, 0x94, 0x95, 0x0f,
+ 0xa6, 0xd0, 0x84, 0x01, 0x88, 0x2b, 0x00, 0xea, 0x9a, 0x92, 0x01, 0x88,
+ 0x31, 0x8f, 0x01, 0x88, 0x39, 0x88, 0x01, 0x88, 0x41, 0x86, 0x01, 0x88,
+ 0x49, 0x96, 0x01, 0x88, 0x51, 0x90, 0x01, 0x88, 0x5b, 0x00, 0xea, 0x9e,
+ 0x8e, 0x01, 0x88, 0x63, 0x00, 0xea, 0xa9, 0x89, 0x01, 0x88, 0x6b, 0x00,
+ 0xea, 0xad, 0x8d, 0x01, 0x88, 0x73, 0x00, 0xea, 0xbd, 0x8a, 0x01, 0x88,
+ 0x79, 0x8c, 0x01, 0x88, 0x83, 0x00, 0xea, 0xc1, 0x93, 0x01, 0x88, 0x89,
+ 0x9a, 0x01, 0x88, 0x91, 0x9c, 0x01, 0x88, 0xbb, 0x00, 0xea, 0xc5, 0x85,
+ 0x01, 0x88, 0xc3, 0x00, 0xea, 0xd1, 0x95, 0x01, 0x88, 0xcb, 0x00, 0xea,
+ 0xd5, 0x94, 0x01, 0x88, 0xb1, 0x83, 0x01, 0x88, 0xd3, 0x00, 0xea, 0xd9,
+ 0x91, 0x01, 0x88, 0xdb, 0x00, 0xea, 0xf6, 0x87, 0x01, 0x88, 0xe3, 0x00,
+ 0xeb, 0x10, 0x8b, 0x01, 0x89, 0x3b, 0x00, 0xeb, 0x27, 0x97, 0x01, 0x89,
+ 0x43, 0x00, 0xeb, 0x40, 0x98, 0x01, 0x89, 0x50, 0x92, 0x01, 0x8d, 0xa1,
+ 0x96, 0x01, 0x8d, 0xa9, 0x8d, 0x01, 0x8d, 0xb1, 0x8a, 0x01, 0x8d, 0xb9,
+ 0x89, 0x01, 0x8d, 0xd8, 0xc3, 0xed, 0x6b, 0x0f, 0xd8, 0x03, 0x00, 0xeb,
+ 0x46, 0xc3, 0xed, 0x65, 0x0f, 0xd8, 0x1b, 0x00, 0xeb, 0x66, 0xc3, 0xed,
+ 0x68, 0x0f, 0xd8, 0x0b, 0x00, 0xeb, 0x78, 0xc3, 0xed, 0x5f, 0x0f, 0xd8,
+ 0x7b, 0x00, 0xeb, 0x91, 0xc3, 0xed, 0x62, 0x0f, 0xd8, 0x3b, 0x00, 0xeb,
+ 0x95, 0xc3, 0xed, 0x71, 0x0f, 0xd8, 0xf0, 0x42, 0x00, 0xfd, 0xc0, 0xeb,
+ 0xa0, 0x4c, 0x8e, 0xd4, 0xc0, 0xeb, 0xea, 0x51, 0x52, 0x9b, 0x40, 0xeb,
+ 0xfc, 0xc4, 0xe5, 0xb7, 0x0f, 0xa6, 0xc1, 0xc5, 0x19, 0x74, 0x0f, 0xa4,
+ 0xc8, 0x4a, 0xa8, 0x1c, 0x40, 0xec, 0x10, 0xc8, 0xbc, 0x7b, 0x0f, 0xd3,
+ 0x81, 0xc8, 0xb8, 0xe3, 0x0f, 0xcf, 0xb1, 0x11, 0x40, 0xec, 0x28, 0x42,
+ 0x07, 0x44, 0xc0, 0xec, 0x32, 0x4f, 0x29, 0x84, 0xc0, 0xec, 0x3f, 0x46,
+ 0xd3, 0x9a, 0xc0, 0xec, 0x55, 0xc5, 0xe0, 0xd5, 0x00, 0xda, 0xe1, 0x46,
+ 0x08, 0xd7, 0xc0, 0xec, 0x61, 0x47, 0x01, 0xff, 0xc0, 0xec, 0x85, 0xc9,
+ 0xb6, 0x66, 0x00, 0xda, 0x21, 0x4b, 0x6f, 0x71, 0xc0, 0xed, 0x29, 0x45,
+ 0x02, 0xcb, 0x40, 0xed, 0x5a, 0xcd, 0x7e, 0xa8, 0x0f, 0x9e, 0x00, 0xc9,
+ 0x11, 0x47, 0x0b, 0x57, 0xa9, 0x4a, 0x57, 0x1f, 0xc0, 0xed, 0x78, 0x47,
+ 0x01, 0xff, 0x40, 0xed, 0x8a, 0xc6, 0x01, 0xb1, 0x0f, 0xb5, 0xe1, 0xc5,
+ 0xdd, 0x38, 0x0f, 0xa3, 0xe1, 0xc6, 0x53, 0x15, 0x0f, 0x9b, 0xe1, 0xc5,
+ 0x44, 0x66, 0x0f, 0xa1, 0x20, 0x12, 0xc0, 0xee, 0x02, 0x83, 0x05, 0x35,
+ 0x01, 0x0d, 0xc0, 0xee, 0x18, 0x97, 0x05, 0x35, 0x11, 0xc2, 0x01, 0xe6,
+ 0x05, 0x35, 0x21, 0x14, 0xc0, 0xee, 0x3b, 0x16, 0xc0, 0xee, 0x4d, 0x91,
+ 0x05, 0x35, 0x39, 0x10, 0xc0, 0xee, 0x59, 0x8b, 0x05, 0x35, 0x49, 0x0e,
+ 0xc0, 0xee, 0x86, 0x8f, 0x05, 0x35, 0x9b, 0x00, 0xee, 0x9e, 0x15, 0xc0,
+ 0xee, 0xb6, 0x1b, 0xc0, 0xee, 0xd0, 0x19, 0xc0, 0xee, 0xe0, 0x08, 0x40,
+ 0xee, 0xea, 0x0f, 0xc0, 0xef, 0x00, 0xc3, 0x0c, 0x34, 0x05, 0x37, 0xa0,
+ 0xe0, 0x01, 0x27, 0x01, 0x3d, 0x58, 0x00, 0xc0, 0xef, 0x0c, 0x15, 0x40,
+ 0xef, 0x18, 0x15, 0xc0, 0xef, 0x24, 0x43, 0x3e, 0xad, 0xc0, 0xef, 0x30,
+ 0x4f, 0x2e, 0x37, 0xc0, 0xef, 0x3c, 0x4b, 0x6f, 0x71, 0xc0, 0xef, 0x46,
+ 0x47, 0x01, 0xff, 0x40, 0xef, 0x68, 0xc3, 0xb2, 0xdf, 0x0f, 0xb6, 0x08,
+ 0xc5, 0xc0, 0xde, 0x0f, 0xa6, 0x51, 0xc7, 0xc6, 0x25, 0x0f, 0xcf, 0xe0,
+ 0xcf, 0x66, 0x90, 0x01, 0x33, 0x61, 0xcc, 0x83, 0x40, 0x01, 0x33, 0x59,
+ 0xd8, 0x23, 0x0c, 0x0f, 0x9c, 0xe9, 0xd7, 0x2a, 0x62, 0x0f, 0x9c, 0xe0,
+ 0xc5, 0x10, 0x15, 0x0f, 0xa1, 0xd9, 0xca, 0xa9, 0xc0, 0x0f, 0xce, 0xa0,
+ 0xcc, 0x1f, 0x6a, 0x01, 0x1f, 0x18, 0x47, 0x01, 0xff, 0xc0, 0xef, 0xcb,
+ 0x15, 0xc0, 0xf0, 0x2e, 0x4b, 0x6f, 0x71, 0xc0, 0xf0, 0x3a, 0x03, 0xc0,
+ 0xf0, 0x5a, 0x46, 0x08, 0xd7, 0xc0, 0xf0, 0x6c, 0x46, 0x79, 0xf2, 0xc0,
+ 0xf0, 0x90, 0x49, 0x39, 0xf6, 0xc0, 0xf0, 0x9c, 0xc6, 0xd8, 0xfe, 0x00,
+ 0x4f, 0xd1, 0xca, 0xa0, 0x38, 0x00, 0x4f, 0xd8, 0xc5, 0xe1, 0x84, 0x0f,
+ 0x9b, 0x89, 0x49, 0x0a, 0x37, 0x40, 0xf0, 0xa8, 0xc6, 0x01, 0xb1, 0x01,
+ 0x1b, 0xf1, 0xd8, 0x24, 0x8c, 0x0f, 0xa8, 0xa9, 0xc6, 0xd8, 0xe6, 0x0f,
+ 0xd6, 0x88, 0xcf, 0x63, 0x39, 0x0f, 0xa3, 0x29, 0xce, 0x2d, 0xd1, 0x0f,
+ 0xa3, 0x20, 0xc9, 0x15, 0xe2, 0x01, 0x10, 0xc8, 0xd1, 0x54, 0xee, 0x0f,
+ 0xab, 0x60, 0xce, 0x72, 0xfc, 0x00, 0xd0, 0xf9, 0xc7, 0xc3, 0xf5, 0x00,
+ 0xd0, 0xf1, 0x4b, 0x6f, 0x71, 0xc0, 0xf0, 0xae, 0x47, 0x01, 0xff, 0x40,
+ 0xf0, 0xc4, 0x97, 0x00, 0xba, 0x99, 0x8b, 0x00, 0xba, 0x90, 0xc2, 0x01,
+ 0x0e, 0x00, 0xba, 0x89, 0xc2, 0x0e, 0xe5, 0x00, 0xba, 0x81, 0xc2, 0x00,
+ 0x4c, 0x00, 0xba, 0x79, 0xc2, 0x00, 0x96, 0x00, 0xba, 0x71, 0xc2, 0x00,
+ 0x9a, 0x00, 0xba, 0x69, 0xc2, 0x1a, 0x36, 0x00, 0xba, 0x61, 0xc2, 0x00,
+ 0x3f, 0x00, 0xba, 0x59, 0xc2, 0x02, 0x1d, 0x00, 0xba, 0x51, 0xc2, 0x07,
+ 0x44, 0x00, 0xba, 0x49, 0x10, 0xc0, 0xf1, 0x24, 0xc2, 0x0c, 0x25, 0x00,
+ 0xba, 0x39, 0xc2, 0x00, 0x44, 0x00, 0xba, 0x31, 0xc2, 0x07, 0x69, 0x00,
+ 0xba, 0x21, 0xc2, 0x06, 0x6b, 0x00, 0xba, 0x19, 0x97, 0x00, 0xba, 0x11,
+ 0x8b, 0x00, 0xba, 0x09, 0x83, 0x00, 0xba, 0x00, 0xcb, 0x97, 0xb6, 0x0f,
+ 0xa3, 0x81, 0xcb, 0x8f, 0xc3, 0x0f, 0x98, 0x48, 0xc4, 0xe5, 0x83, 0x0f,
+ 0xa5, 0xe1, 0x95, 0x0f, 0xd3, 0x90, 0x4c, 0x87, 0xfc, 0xc0, 0xf1, 0x2e,
+ 0x90, 0x0f, 0xcf, 0x00, 0x47, 0x37, 0x49, 0xc0, 0xf1, 0x3a, 0x47, 0x01,
+ 0xff, 0xc0, 0xf1, 0x67, 0x18, 0xc0, 0xf1, 0xcf, 0x45, 0x02, 0xcb, 0xc0,
+ 0xf1, 0xdb, 0x06, 0xc0, 0xf1, 0xff, 0x4c, 0x11, 0x33, 0x40, 0xf2, 0x11,
+ 0xdb, 0x18, 0xc4, 0x01, 0x1c, 0x59, 0xc5, 0x19, 0x74, 0x0f, 0xa4, 0xa1,
+ 0xc3, 0x02, 0x1d, 0x00, 0x05, 0x30, 0x86, 0x0f, 0x9a, 0xf1, 0xd0, 0x58,
+ 0x2f, 0x00, 0x04, 0x11, 0xca, 0x9d, 0x86, 0x0f, 0xc9, 0x88, 0x42, 0x00,
+ 0xff, 0xc0, 0xf2, 0x21, 0x46, 0xd1, 0x54, 0xc0, 0xf2, 0x2d, 0xcb, 0x90,
+ 0x26, 0x0e, 0x82, 0x28, 0xc5, 0x8a, 0xaf, 0x0e, 0x81, 0x23, 0x00, 0xf2,
+ 0x39, 0x46, 0xd0, 0xa0, 0xc0, 0xf2, 0x3d, 0x11, 0xc0, 0xf2, 0x4a, 0x14,
+ 0xc0, 0xf2, 0x5f, 0x42, 0x00, 0x97, 0xc0, 0xf2, 0x6b, 0xc6, 0xcf, 0x2c,
+ 0x0e, 0x83, 0x08, 0x14, 0xc0, 0xf2, 0x77, 0x12, 0xc0, 0xf2, 0x83, 0x45,
+ 0xdd, 0xf6, 0xc0, 0xf2, 0x93, 0x10, 0x40, 0xf2, 0xab, 0x16, 0xc0, 0xf2,
+ 0xb7, 0x48, 0xc2, 0x83, 0xc0, 0xf2, 0xcc, 0xc5, 0xdb, 0x5d, 0x0e, 0x81,
+ 0x4b, 0x00, 0xf2, 0xde, 0x1b, 0xc0, 0xf2, 0xe4, 0xc7, 0xca, 0xb6, 0x0e,
+ 0x80, 0xe8, 0x0b, 0xc0, 0xf2, 0xf1, 0xc2, 0x47, 0x43, 0x0e, 0x81, 0x79,
+ 0xc5, 0xdd, 0x56, 0x0e, 0x80, 0x08, 0x42, 0x13, 0xfc, 0xc0, 0xf3, 0x0e,
+ 0x12, 0x40, 0xf3, 0x1a, 0x46, 0x3f, 0x15, 0xc0, 0xf3, 0x24, 0xda, 0x1d,
+ 0x0e, 0x0e, 0x86, 0x29, 0x49, 0xb7, 0x3e, 0x40, 0xf3, 0x4f, 0x44, 0xe5,
+ 0x17, 0xc0, 0xf3, 0x61, 0x47, 0xce, 0x7c, 0xc0, 0xf3, 0x73, 0x44, 0x5a,
+ 0x14, 0x40, 0xf3, 0x7f, 0x42, 0x00, 0xa9, 0xc0, 0xf3, 0x89, 0x15, 0xc0,
+ 0xf3, 0x93, 0xc6, 0xd5, 0xfe, 0x0e, 0x81, 0xf8, 0x10, 0xc0, 0xf3, 0x9f,
+ 0x46, 0xd3, 0xe8, 0xc0, 0xf3, 0xab, 0xc7, 0xcf, 0x71, 0x0e, 0x83, 0x41,
+ 0xc9, 0xab, 0x41, 0x0e, 0x83, 0x21, 0xc6, 0xd7, 0x30, 0x0e, 0x82, 0xa9,
+ 0xce, 0x74, 0xbc, 0x0e, 0x80, 0x70, 0x48, 0xbb, 0xb3, 0xc0, 0xf3, 0xb7,
+ 0xca, 0xa0, 0xe2, 0x0e, 0x82, 0xb8, 0x14, 0xc0, 0xf3, 0xd7, 0x07, 0xc0,
+ 0xf3, 0xe1, 0x0a, 0xc0, 0xf3, 0xf3, 0xc6, 0xd4, 0xcc, 0x0e, 0x81, 0x38,
+ 0x07, 0xc0, 0xf3, 0xfd, 0xc6, 0xc6, 0x42, 0x0e, 0x82, 0xe8, 0x49, 0xab,
+ 0x77, 0xc0, 0xf4, 0x09, 0xc5, 0xe3, 0x4b, 0x0e, 0x82, 0xd9, 0x44, 0xe4,
+ 0x17, 0xc0, 0xf4, 0x15, 0x46, 0xd7, 0xba, 0x40, 0xf4, 0x1f, 0x42, 0x02,
+ 0x29, 0xc0, 0xf4, 0x2b, 0x42, 0x00, 0x4d, 0xc0, 0xf4, 0x35, 0x46, 0xd3,
+ 0x10, 0xc0, 0xf4, 0x41, 0x07, 0x40, 0xf4, 0x4d, 0x44, 0xe4, 0x43, 0xc0,
+ 0xf4, 0x62, 0xc3, 0x47, 0xd5, 0x0e, 0x80, 0xc8, 0xc6, 0xd7, 0x0c, 0x0e,
+ 0x81, 0xe1, 0xc4, 0xce, 0x7e, 0x0e, 0x81, 0x28, 0xc2, 0x0d, 0x8b, 0x08,
+ 0xe3, 0x58, 0x9b, 0x08, 0xe3, 0x50, 0xc4, 0x15, 0xa7, 0x08, 0xe3, 0x03,
+ 0x00, 0xf4, 0x6c, 0xc2, 0x22, 0x45, 0x08, 0xe2, 0xfa, 0x00, 0xf4, 0x72,
+ 0x0b, 0xc0, 0xf4, 0x78, 0x11, 0x40, 0xf4, 0x84, 0x0a, 0xc0, 0xf4, 0x90,
+ 0x19, 0xc0, 0xf4, 0x9c, 0xc2, 0x01, 0x04, 0x08, 0xe3, 0x18, 0xc4, 0x24,
+ 0x35, 0x08, 0xe2, 0xc9, 0xc5, 0x05, 0x1b, 0x08, 0xe2, 0xc1, 0x15, 0xc0,
+ 0xf4, 0xa6, 0x08, 0xc0, 0xf4, 0xb2, 0x16, 0xc0, 0xf4, 0xbe, 0xc3, 0x05,
+ 0x17, 0x08, 0xe2, 0x89, 0xc4, 0x16, 0x57, 0x08, 0xe2, 0x80, 0xc7, 0x7d,
+ 0xf8, 0x08, 0xe2, 0x01, 0xc7, 0x10, 0xac, 0x08, 0xe1, 0xe8, 0xc4, 0x21,
+ 0x28, 0x08, 0xe1, 0xf9, 0xc5, 0x45, 0xcf, 0x08, 0xe1, 0xf0, 0x97, 0x08,
+ 0xe1, 0xd9, 0x8b, 0x08, 0xe1, 0xc9, 0x83, 0x08, 0xe1, 0x78, 0x8e, 0x08,
+ 0xe1, 0xb1, 0x94, 0x08, 0xe1, 0xa2, 0x00, 0xf4, 0xca, 0x97, 0x08, 0xe1,
+ 0x98, 0x8b, 0x08, 0xe1, 0x88, 0x83, 0x08, 0xe1, 0x69, 0xc2, 0x0e, 0xe5,
+ 0x08, 0xe1, 0x61, 0xc2, 0x01, 0x0e, 0x08, 0xe1, 0x58, 0x83, 0x08, 0xe1,
+ 0x51, 0x47, 0xb7, 0xd8, 0x40, 0xf4, 0xce, 0xc2, 0x01, 0x0e, 0x08, 0xe1,
+ 0x29, 0x83, 0x08, 0xe1, 0x20, 0xc2, 0x01, 0x0e, 0x08, 0xe1, 0x19, 0x83,
+ 0x08, 0xe1, 0x10, 0x83, 0x08, 0xe1, 0x09, 0xc2, 0x01, 0x01, 0x08, 0xe0,
+ 0xe1, 0xc2, 0x1a, 0x36, 0x08, 0xe0, 0xb9, 0xc2, 0x07, 0x69, 0x08, 0xe0,
+ 0x90, 0xc2, 0x01, 0x0e, 0x08, 0xe1, 0x01, 0x83, 0x08, 0xe0, 0xf9, 0x06,
+ 0x40, 0xf4, 0xd9, 0xc2, 0x01, 0x0e, 0x08, 0xe0, 0xf1, 0x83, 0x08, 0xe0,
+ 0xe9, 0x16, 0x40, 0xf4, 0xe3, 0xc2, 0x01, 0x0e, 0x08, 0xe0, 0xb1, 0x83,
+ 0x08, 0xe0, 0xa8, 0xc2, 0x01, 0x0e, 0x08, 0xe0, 0xa1, 0x83, 0x08, 0xe0,
+ 0x98, 0xc2, 0x01, 0x0e, 0x08, 0xe0, 0x89, 0x83, 0x08, 0xe0, 0x80, 0xc2,
+ 0x01, 0x0e, 0x08, 0xe0, 0x79, 0x83, 0x08, 0xe0, 0x70, 0x97, 0x08, 0xe0,
+ 0x69, 0x8b, 0x08, 0xe0, 0x59, 0x83, 0x08, 0xe0, 0x08, 0x97, 0x08, 0xe0,
+ 0x28, 0x8b, 0x08, 0xe0, 0x18, 0x45, 0x00, 0xcd, 0xc0, 0xf4, 0xed, 0x46,
+ 0x01, 0x17, 0xc0, 0xf5, 0x13, 0x16, 0xc0, 0xf5, 0x3b, 0xce, 0x6d, 0xae,
+ 0x01, 0x38, 0x19, 0x45, 0x01, 0x1d, 0xc0, 0xf5, 0x47, 0xd3, 0x43, 0x9a,
+ 0x01, 0x2c, 0x39, 0xd2, 0x49, 0x5c, 0x01, 0x2c, 0x29, 0x44, 0x05, 0x17,
+ 0x40, 0xf5, 0x5f, 0x04, 0xc0, 0xf5, 0x6b, 0xc8, 0x0a, 0x5f, 0x01, 0x02,
+ 0x71, 0xc4, 0x03, 0x5d, 0x00, 0x02, 0xf9, 0xc6, 0x4c, 0x56, 0x01, 0x72,
+ 0x3b, 0x00, 0xf5, 0x77, 0xdb, 0x16, 0x8d, 0x01, 0x80, 0xf8, 0x46, 0x00,
+ 0x4c, 0xc0, 0xf5, 0x7d, 0xc5, 0x32, 0x63, 0x01, 0x3e, 0xe8, 0x46, 0x00,
+ 0x4c, 0xc0, 0xf5, 0x95, 0x00, 0x40, 0xf5, 0xad, 0xc7, 0x2a, 0xc9, 0x01,
+ 0x3e, 0x61, 0x47, 0xcf, 0xa9, 0xc0, 0xf5, 0xb9, 0xc3, 0x1f, 0x85, 0x0f,
+ 0xd4, 0xc0, 0x00, 0x40, 0xf5, 0xbf, 0x46, 0x01, 0xab, 0x40, 0xf5, 0xcb,
+ 0xc4, 0x16, 0x57, 0x00, 0x00, 0x79, 0xc3, 0x05, 0x17, 0x00, 0x00, 0x70,
+ 0x00, 0xc0, 0xf5, 0xe3, 0x46, 0x00, 0x4c, 0x40, 0xf6, 0x38, 0xd0, 0x5f,
+ 0x4f, 0x0f, 0xa8, 0x69, 0xcd, 0x0f, 0x83, 0x01, 0x19, 0x49, 0xd4, 0x3a,
+ 0x1e, 0x01, 0x4f, 0xe1, 0xdb, 0x16, 0xc3, 0x00, 0x05, 0x58, 0xdc, 0x13,
+ 0x8a, 0x01, 0x3d, 0x51, 0xdb, 0x17, 0x14, 0x01, 0x49, 0xc8, 0xc7, 0x05,
+ 0x3a, 0x01, 0x03, 0x31, 0xc8, 0xbb, 0xd3, 0x01, 0x01, 0x69, 0xc9, 0xac,
+ 0x97, 0x01, 0x01, 0x51, 0xc4, 0x00, 0xfa, 0x01, 0x00, 0x70, 0xd6, 0x31,
+ 0x23, 0x00, 0x2c, 0x71, 0xc4, 0xbe, 0x2d, 0x0f, 0xc8, 0xd9, 0xcb, 0x92,
+ 0xc5, 0x00, 0x7e, 0xb2, 0x00, 0xf6, 0x7a, 0xca, 0x37, 0x20, 0x01, 0x17,
+ 0x31, 0xc5, 0x09, 0x02, 0x01, 0x13, 0x40, 0xc3, 0x05, 0xe3, 0x01, 0x16,
+ 0xb1, 0xcd, 0x76, 0xbc, 0x01, 0x53, 0xc9, 0xd3, 0x46, 0xa5, 0x01, 0x53,
+ 0xd8, 0x42, 0x00, 0xb2, 0xc0, 0xf6, 0x80, 0xcc, 0x8c, 0xac, 0x01, 0x13,
+ 0x30, 0x45, 0x03, 0x51, 0xc0, 0xf6, 0x9b, 0x43, 0x00, 0x3b, 0x40, 0xf6,
+ 0xb1, 0xd4, 0x08, 0x53, 0x01, 0x55, 0x40, 0x06, 0xc0, 0xf6, 0xbd, 0x16,
+ 0xc0, 0xf6, 0xcd, 0x83, 0x00, 0xe1, 0x19, 0xc2, 0x00, 0x4c, 0x00, 0xe1,
+ 0x11, 0x15, 0xc0, 0xf6, 0xdf, 0xc2, 0x06, 0x8c, 0x00, 0xe0, 0xf9, 0x0a,
+ 0xc0, 0xf6, 0xe9, 0xc2, 0x00, 0x96, 0x00, 0xe0, 0xe1, 0xc2, 0x00, 0x9a,
+ 0x00, 0xe0, 0xd9, 0xc2, 0x1a, 0x36, 0x00, 0xe0, 0xd1, 0x0f, 0xc0, 0xf6,
+ 0xf3, 0x04, 0xc0, 0xf6, 0xfd, 0x08, 0xc0, 0xf7, 0x07, 0x12, 0xc0, 0xf7,
+ 0x11, 0x10, 0xc0, 0xf7, 0x21, 0xc2, 0x26, 0x94, 0x00, 0xe0, 0x41, 0x05,
+ 0xc0, 0xf7, 0x31, 0x09, 0xc0, 0xf7, 0x3b, 0x0d, 0x40, 0xf7, 0x45, 0xc4,
+ 0x24, 0x35, 0x00, 0xe2, 0x49, 0xc5, 0x05, 0x1b, 0x00, 0xe2, 0x41, 0x15,
+ 0xc0, 0xf7, 0x55, 0x08, 0xc0, 0xf7, 0x61, 0x16, 0xc0, 0xf7, 0x6d, 0xc3,
+ 0x05, 0x17, 0x00, 0xe2, 0x09, 0xc4, 0x16, 0x57, 0x00, 0xe2, 0x00, 0x16,
+ 0xc0, 0xf7, 0x79, 0xc6, 0xc5, 0x2a, 0x00, 0xe1, 0xe9, 0xd2, 0x47, 0xd0,
+ 0x00, 0xe1, 0xe0, 0x44, 0x02, 0xcc, 0xc0, 0xf7, 0x88, 0x50, 0x5d, 0xcf,
+ 0x40, 0xf7, 0x94, 0x8d, 0x00, 0xe1, 0x6b, 0x00, 0xf7, 0xa0, 0x90, 0x00,
+ 0xe1, 0x83, 0x00, 0xf7, 0xa6, 0x96, 0x00, 0xe1, 0x99, 0x94, 0x00, 0xe1,
+ 0x91, 0x92, 0x00, 0xe1, 0x89, 0x8e, 0x00, 0xe1, 0x79, 0x8f, 0x00, 0xe1,
+ 0x70, 0x87, 0x00, 0xe1, 0x61, 0x97, 0x00, 0xe1, 0x53, 0x00, 0xf7, 0xac,
+ 0x91, 0x00, 0xe1, 0x43, 0x00, 0xf7, 0xb0, 0x8b, 0x00, 0xe1, 0x39, 0xc2,
+ 0x0e, 0x30, 0x00, 0xe1, 0x30, 0x00, 0xc0, 0xf7, 0xb4, 0xc4, 0x01, 0xce,
+ 0x01, 0x30, 0x3a, 0x00, 0xf7, 0xee, 0x1b, 0xc0, 0xf7, 0xf7, 0xc2, 0x02,
+ 0x1d, 0x05, 0x26, 0x81, 0x12, 0xc0, 0xf8, 0x01, 0x06, 0xc0, 0xf8, 0x0b,
+ 0x16, 0xc0, 0xf8, 0x15, 0x09, 0xc0, 0xf8, 0x29, 0x0d, 0xc0, 0xf8, 0x33,
+ 0xc2, 0x26, 0x94, 0x05, 0x26, 0xc9, 0x05, 0xc0, 0xf8, 0x3d, 0xc2, 0x00,
+ 0x3f, 0x05, 0x26, 0xf9, 0x10, 0xc0, 0xf8, 0x47, 0xc2, 0x00, 0x96, 0x05,
+ 0x27, 0x09, 0x15, 0xc0, 0xf8, 0x51, 0x1c, 0xc0, 0xf8, 0x5b, 0x0a, 0xc0,
+ 0xf8, 0x65, 0xc2, 0x23, 0xe3, 0x05, 0x27, 0x39, 0xc2, 0x01, 0xa7, 0x05,
+ 0x27, 0x49, 0xc2, 0x00, 0x4c, 0x05, 0x27, 0x51, 0x83, 0x05, 0x27, 0x73,
+ 0x00, 0xf8, 0x6f, 0x87, 0x05, 0x27, 0x83, 0x00, 0xf8, 0x73, 0x8b, 0x05,
+ 0x27, 0x91, 0x91, 0x05, 0x27, 0x9b, 0x00, 0xf8, 0x77, 0x97, 0x05, 0x27,
+ 0xa2, 0x00, 0xf8, 0x7b, 0xc5, 0x00, 0xea, 0x05, 0x27, 0xf1, 0xc9, 0x11,
+ 0x47, 0x05, 0x27, 0xf8, 0x00, 0xc0, 0xf8, 0x83, 0x43, 0x00, 0xb1, 0x40,
+ 0xf8, 0x9e, 0xcd, 0x79, 0x05, 0x0f, 0xac, 0x39, 0xc7, 0x01, 0xb0, 0x0f,
+ 0xa8, 0xb8, 0x46, 0x08, 0xd7, 0xc0, 0xf8, 0xaa, 0xcd, 0x2d, 0xa6, 0x00,
+ 0xca, 0x29, 0xd0, 0x0f, 0x62, 0x00, 0xca, 0x21, 0x15, 0xc0, 0xf8, 0xce,
+ 0x44, 0x33, 0x45, 0xc0, 0xf8, 0xe0, 0x47, 0x01, 0xff, 0x40, 0xf8, 0xec,
+ 0x85, 0x08, 0x49, 0xc9, 0x90, 0x08, 0x49, 0x5b, 0x00, 0xf9, 0x3b, 0x8e,
+ 0x08, 0x49, 0x4b, 0x00, 0xf9, 0x3f, 0x87, 0x08, 0x49, 0x23, 0x00, 0xf9,
+ 0x43, 0x83, 0x08, 0x49, 0x03, 0x00, 0xf9, 0x47, 0x96, 0x08, 0x49, 0x7b,
+ 0x00, 0xf9, 0x4b, 0x95, 0x08, 0x49, 0x9b, 0x00, 0xf9, 0x4f, 0x93, 0x08,
+ 0x49, 0x91, 0x88, 0x08, 0x49, 0x89, 0x97, 0x08, 0x49, 0x81, 0x94, 0x08,
+ 0x49, 0x69, 0x91, 0x08, 0x49, 0x61, 0x8f, 0x08, 0x49, 0x51, 0x8d, 0x08,
+ 0x49, 0x41, 0x9b, 0x08, 0x49, 0x39, 0x8b, 0x08, 0x49, 0x31, 0x98, 0x08,
+ 0x49, 0x29, 0x86, 0x08, 0x49, 0x19, 0x89, 0x08, 0x49, 0x11, 0x84, 0x08,
+ 0x49, 0x08, 0x90, 0x08, 0x14, 0xc8, 0x90, 0x08, 0x14, 0xd0, 0x8a, 0x08,
+ 0x14, 0x18, 0x8a, 0x08, 0x14, 0x49, 0x96, 0x08, 0x14, 0xc0, 0x8d, 0x08,
+ 0x14, 0xa0, 0x8f, 0x08, 0x14, 0x80, 0x90, 0x08, 0x14, 0x88, 0x00, 0xc0,
+ 0xf9, 0x53, 0xc6, 0xc9, 0x83, 0x01, 0x55, 0x5a, 0x00, 0xf9, 0x8f, 0x45,
+ 0x00, 0x3f, 0xc0, 0xf9, 0x95, 0x56, 0x31, 0xa7, 0x40, 0xf9, 0x9f, 0x15,
+ 0xc0, 0xf9, 0xe6, 0xd5, 0x37, 0x69, 0x00, 0x14, 0xb3, 0x00, 0xf9, 0xfb,
+ 0x42, 0x01, 0xa5, 0xc0, 0xfa, 0x01, 0x03, 0xc0, 0xfa, 0x10, 0xd8, 0x25,
+ 0xf4, 0x00, 0xe9, 0x21, 0xcc, 0x26, 0x0c, 0x00, 0x14, 0xa3, 0x00, 0xfa,
+ 0x1c, 0xdb, 0x16, 0x06, 0x00, 0x14, 0xa9, 0x42, 0x05, 0x57, 0xc0, 0xfa,
+ 0x22, 0xc2, 0x1f, 0xbc, 0x00, 0x0d, 0x31, 0xcf, 0x69, 0x7e, 0x00, 0x0d,
+ 0xd9, 0xc4, 0x99, 0xd6, 0x00, 0x0d, 0xf9, 0xcc, 0x84, 0xf0, 0x00, 0x0e,
+ 0x01, 0xcd, 0x79, 0x53, 0x00, 0x0e, 0x08, 0xc4, 0x0c, 0xa4, 0x01, 0x38,
+ 0xe9, 0x48, 0x01, 0xf8, 0x40, 0xfa, 0x2e, 0xca, 0xa6, 0x0a, 0x05, 0x3f,
+ 0xb9, 0x49, 0x11, 0xad, 0xc0, 0xfa, 0x3a, 0x0b, 0xc0, 0xfa, 0x42, 0xc9,
+ 0xad, 0x6f, 0x05, 0x3f, 0xf8, 0xc9, 0xad, 0xb7, 0x0f, 0x98, 0xe1, 0xc6,
+ 0x01, 0xb1, 0x0f, 0x98, 0xb8, 0x0d, 0xc0, 0xfa, 0x4e, 0x12, 0xc0, 0xfa,
+ 0x56, 0x10, 0xc0, 0xfa, 0x66, 0xc2, 0x00, 0xea, 0x00, 0x74, 0x41, 0x15,
+ 0xc0, 0xfa, 0x76, 0xc2, 0x00, 0x68, 0x00, 0x74, 0xa1, 0x16, 0xc0, 0xfa,
+ 0x82, 0xc2, 0x01, 0xb4, 0x00, 0x74, 0xd1, 0x43, 0xcb, 0xb6, 0xc0, 0xfa,
+ 0x8c, 0xc2, 0x01, 0xc2, 0x00, 0x75, 0x09, 0xc2, 0x47, 0x43, 0x00, 0x75,
+ 0x11, 0xc2, 0x00, 0x64, 0x00, 0x75, 0x19, 0xc2, 0x00, 0x36, 0x00, 0x75,
+ 0x2b, 0x00, 0xfa, 0x9c, 0xc2, 0x0a, 0x20, 0x00, 0x75, 0x39, 0x43, 0x69,
+ 0x91, 0xc0, 0xfa, 0xa2, 0x91, 0x00, 0x75, 0x68, 0x83, 0x00, 0x75, 0x83,
+ 0x00, 0xfa, 0xae, 0x45, 0xe3, 0x5a, 0xc0, 0xfa, 0xbe, 0x8b, 0x00, 0x75,
+ 0xa3, 0x00, 0xfa, 0xca, 0x9b, 0x00, 0x75, 0xb3, 0x00, 0xfa, 0xce, 0x97,
+ 0x00, 0x75, 0xc3, 0x00, 0xfa, 0xd2, 0x87, 0x00, 0x76, 0x03, 0x00, 0xfa,
+ 0xd6, 0x91, 0x00, 0x76, 0x10, 0xcf, 0x66, 0xdb, 0x00, 0x75, 0xd1, 0x4e,
+ 0x6f, 0x6e, 0x40, 0xfa, 0xda, 0xc2, 0x13, 0x31, 0x00, 0x76, 0x41, 0x16,
+ 0xc0, 0xfa, 0xe6, 0xc6, 0xd3, 0xa6, 0x00, 0x76, 0x58, 0xc4, 0x16, 0x57,
+ 0x00, 0x76, 0x81, 0xc3, 0x05, 0x17, 0x00, 0x76, 0x89, 0x16, 0xc0, 0xfa,
+ 0xf0, 0x08, 0xc0, 0xfa, 0xfc, 0x15, 0xc0, 0xfb, 0x08, 0xc5, 0x05, 0x1b,
+ 0x00, 0x76, 0xc1, 0xc4, 0x24, 0x35, 0x00, 0x76, 0xc8, 0xc2, 0x00, 0x10,
+ 0x00, 0x76, 0xe1, 0xc2, 0x01, 0xc2, 0x00, 0x76, 0xe8, 0x16, 0xc0, 0xfb,
+ 0x14, 0x4f, 0x65, 0xcd, 0xc0, 0xfb, 0x20, 0x4f, 0x01, 0x28, 0xc0, 0xfb,
+ 0x2c, 0xda, 0x1b, 0xd6, 0x01, 0x3a, 0x81, 0xc6, 0xd8, 0x5c, 0x01, 0x38,
+ 0x81, 0xd5, 0x37, 0xbd, 0x01, 0x2e, 0xe9, 0x43, 0x07, 0x52, 0x40, 0xfb,
+ 0x38, 0x16, 0xc0, 0xfb, 0x3e, 0x4f, 0x65, 0xcd, 0xc0, 0xfb, 0x4a, 0xcf,
+ 0x64, 0xfb, 0x01, 0x3e, 0xa1, 0xd5, 0x37, 0xbd, 0x01, 0x2e, 0xe1, 0x44,
+ 0x20, 0xa4, 0x40, 0xfb, 0x56, 0x0e, 0xc0, 0xfb, 0x5c, 0x4f, 0x2c, 0xb1,
+ 0x40, 0xfb, 0x68, 0x48, 0x04, 0x13, 0xc0, 0xfb, 0x6e, 0xc5, 0x01, 0x22,
+ 0x01, 0x2c, 0x03, 0x00, 0xfb, 0x78, 0xc6, 0x03, 0xfa, 0x01, 0x2f, 0x01,
+ 0xcc, 0x04, 0x1b, 0x0f, 0xdc, 0x70, 0xcc, 0x05, 0x1b, 0x01, 0x2c, 0xa1,
+ 0xcd, 0x15, 0x72, 0x0f, 0xdc, 0x10, 0xdb, 0x15, 0x64, 0x0f, 0xdb, 0x69,
+ 0x45, 0x05, 0xde, 0x40, 0xfb, 0x7e, 0xc5, 0x00, 0x62, 0x01, 0x0f, 0x3b,
+ 0x00, 0xfb, 0x8a, 0xcc, 0x82, 0x80, 0x01, 0x0f, 0x72, 0x00, 0xfb, 0x8e,
+ 0x42, 0x00, 0x47, 0xc0, 0xfb, 0x94, 0x19, 0x40, 0xfb, 0xa0, 0xcf, 0x60,
+ 0xb0, 0x0f, 0xc2, 0x89, 0xcc, 0x89, 0xac, 0x0f, 0xc1, 0xc8, 0xc4, 0x00,
+ 0x63, 0x01, 0x0c, 0x8b, 0x00, 0xfb, 0xac, 0xc5, 0xe0, 0x58, 0x01, 0x70,
+ 0xa8, 0xcb, 0x83, 0x05, 0x01, 0x0f, 0x09, 0xcb, 0x82, 0x81, 0x01, 0x0e,
+ 0x88, 0x51, 0x02, 0x11, 0xc0, 0xfb, 0xb0, 0x45, 0x11, 0x8e, 0x40, 0xfb,
+ 0xbc, 0xc5, 0x00, 0x62, 0x01, 0x58, 0x31, 0xd3, 0x40, 0xc8, 0x01, 0x5c,
+ 0x48, 0xd2, 0x4b, 0xae, 0x01, 0x3e, 0xf8, 0xc4, 0x00, 0x5b, 0x01, 0x18,
+ 0x1b, 0x00, 0xfb, 0xc8, 0xcf, 0x64, 0x29, 0x01, 0x4d, 0xe8, 0xcb, 0x09,
+ 0xfc, 0x01, 0x0f, 0x99, 0xcc, 0x82, 0x80, 0x01, 0x0e, 0xa9, 0xc5, 0x00,
+ 0x62, 0x01, 0x0c, 0xab, 0x00, 0xfb, 0xcc, 0xcb, 0x99, 0xe7, 0x01, 0x58,
+ 0x69, 0xd5, 0x00, 0x52, 0x01, 0x5b, 0x29, 0xd0, 0x60, 0xaf, 0x0f, 0xc2,
+ 0xc8, 0x4f, 0x6c, 0x21, 0xc0, 0xfb, 0xd2, 0x50, 0x59, 0x5f, 0x40, 0xfb,
+ 0xde, 0x00, 0x40, 0xfb, 0xea, 0xca, 0x1b, 0xab, 0x00, 0x00, 0xf9, 0xc9,
+ 0x6c, 0xfd, 0x01, 0x5f, 0xd0, 0xc3, 0x81, 0xe7, 0x08, 0x1c, 0x01, 0xc2,
+ 0x01, 0x5b, 0x08, 0x1c, 0x98, 0xc4, 0xe6, 0x33, 0x08, 0x1c, 0x11, 0xc4,
+ 0x8b, 0xde, 0x08, 0x1c, 0xc8, 0xc2, 0x01, 0x0e, 0x08, 0x1c, 0x19, 0xc2,
+ 0x0e, 0x14, 0x08, 0x1c, 0x58, 0xc4, 0xde, 0x10, 0x08, 0x1c, 0x21, 0xc3,
+ 0x01, 0x1d, 0x08, 0x1c, 0x78, 0xc2, 0x00, 0x44, 0x08, 0x1c, 0x40, 0xc3,
+ 0x0b, 0x47, 0x08, 0x1c, 0x39, 0x97, 0x08, 0x1c, 0x88, 0xc2, 0x00, 0xdd,
+ 0x08, 0x1c, 0x49, 0xc5, 0xdf, 0x9f, 0x08, 0x1c, 0xc1, 0x91, 0x08, 0x1c,
+ 0xd0, 0xc3, 0x11, 0x40, 0x08, 0x1c, 0x61, 0x03, 0xc0, 0xfb, 0xfc, 0xc2,
+ 0x06, 0x82, 0x08, 0x1c, 0xe8, 0x0a, 0xc0, 0xfc, 0x08, 0x07, 0xc0, 0xfc,
+ 0x14, 0x19, 0xc0, 0xfc, 0x26, 0x15, 0xc0, 0xfc, 0x38, 0x46, 0x03, 0xdd,
+ 0xc0, 0xfc, 0x52, 0x0e, 0xc0, 0xfc, 0x5e, 0x16, 0xc0, 0xfc, 0x74, 0x04,
+ 0xc0, 0xfc, 0x86, 0x42, 0x02, 0x6a, 0xc0, 0xfc, 0x92, 0x05, 0xc0, 0xfc,
+ 0x9e, 0x06, 0xc0, 0xfc, 0xb3, 0x14, 0xc0, 0xfc, 0xc3, 0x0f, 0xc0, 0xfc,
+ 0xcf, 0xc9, 0x65, 0xfa, 0x01, 0x3c, 0xa9, 0xcc, 0x02, 0x5b, 0x01, 0x3a,
+ 0xd1, 0x03, 0xc0, 0xfc, 0xdb, 0x11, 0xc0, 0xfc, 0xed, 0x08, 0xc0, 0xfc,
+ 0xff, 0xcb, 0x5e, 0x74, 0x01, 0x38, 0xd1, 0xd4, 0x11, 0x1f, 0x0f, 0xb3,
+ 0xc8, 0xc5, 0xb0, 0x70, 0x0f, 0xd5, 0x33, 0x00, 0xfd, 0x0b, 0xc5, 0x32,
+ 0xae, 0x0f, 0x9d, 0x38, 0xca, 0x9b, 0xcc, 0x0f, 0xa4, 0xf9, 0x45, 0x01,
+ 0xac, 0xc0, 0xfd, 0x11, 0xc5, 0x02, 0x33, 0x0f, 0xd7, 0xb0, 0xd3, 0x44,
+ 0x7e, 0x01, 0x36, 0x89, 0xc7, 0x01, 0xb0, 0x01, 0x1c, 0x40, 0xc3, 0xea,
+ 0xf8, 0x0d, 0x87, 0xd1, 0xc3, 0xea, 0xfb, 0x0d, 0x87, 0xc9, 0xc3, 0xea,
+ 0xfe, 0x0d, 0x87, 0xc1, 0xc3, 0xed, 0x98, 0x0d, 0x87, 0xb9, 0xc3, 0xed,
+ 0x9b, 0x0d, 0x87, 0xb1, 0xc3, 0xed, 0x9e, 0x0d, 0x87, 0xa9, 0xc3, 0xed,
+ 0xa1, 0x0d, 0x87, 0xa1, 0xc3, 0xed, 0xaa, 0x0d, 0x87, 0x99, 0xc3, 0xed,
+ 0xa7, 0x0d, 0x87, 0x91, 0xc3, 0xed, 0xa4, 0x0d, 0x87, 0x89, 0xc3, 0xeb,
+ 0x6d, 0x0d, 0x87, 0x81, 0xc3, 0xeb, 0x3d, 0x0d, 0x87, 0x79, 0xc3, 0xeb,
+ 0x40, 0x0d, 0x87, 0x71, 0xc3, 0xec, 0x9c, 0x0d, 0x88, 0x39, 0xc3, 0xec,
+ 0x9f, 0x0d, 0x88, 0x31, 0xc3, 0xec, 0xa2, 0x0d, 0x88, 0x29, 0xc3, 0xec,
+ 0xa5, 0x0d, 0x88, 0x21, 0xc3, 0xec, 0xa8, 0x0d, 0x88, 0x19, 0xc3, 0xec,
+ 0xab, 0x0d, 0x88, 0x11, 0xc3, 0x82, 0xe0, 0x0d, 0x88, 0x09, 0xc3, 0x82,
+ 0xec, 0x0d, 0x88, 0x01, 0xc3, 0x82, 0xa4, 0x0d, 0x87, 0xf9, 0xc3, 0x83,
+ 0x28, 0x0d, 0x87, 0xf1, 0xc3, 0xea, 0xef, 0x0d, 0x87, 0xe9, 0xc3, 0xea,
+ 0xe3, 0x0d, 0x87, 0xe1, 0xc3, 0xea, 0xf5, 0x0d, 0x87, 0xd9, 0xc3, 0xeb,
+ 0x43, 0x0d, 0x87, 0x68, 0xc3, 0xec, 0xab, 0x0d, 0x85, 0xd1, 0xc3, 0x83,
+ 0xe8, 0x0d, 0x85, 0xc9, 0xc3, 0x82, 0xb0, 0x0d, 0x85, 0xc1, 0xc3, 0x3b,
+ 0x0b, 0x0d, 0x85, 0xb9, 0xc3, 0x82, 0xe0, 0x0d, 0x85, 0xb1, 0xc3, 0x82,
+ 0xec, 0x0d, 0x85, 0xa9, 0xc3, 0x82, 0xa4, 0x0d, 0x85, 0xa1, 0xc3, 0x83,
+ 0x28, 0x0d, 0x85, 0x99, 0xc3, 0xea, 0xf8, 0x0d, 0x85, 0x91, 0xc3, 0xea,
+ 0xfb, 0x0d, 0x85, 0x89, 0xc3, 0xea, 0xfe, 0x0d, 0x85, 0x81, 0xc3, 0xed,
+ 0x98, 0x0d, 0x85, 0x79, 0xc3, 0xed, 0x9b, 0x0d, 0x85, 0x71, 0xc3, 0xed,
+ 0x9e, 0x0d, 0x85, 0x69, 0xc3, 0xed, 0xa1, 0x0d, 0x85, 0x61, 0xc3, 0xed,
+ 0xaa, 0x0d, 0x85, 0x59, 0xc3, 0xed, 0xa7, 0x0d, 0x85, 0x51, 0xc3, 0xed,
+ 0xa4, 0x0d, 0x85, 0x49, 0xc3, 0xeb, 0x43, 0x0d, 0x84, 0xf3, 0x00, 0xfd,
+ 0x23, 0xc3, 0xeb, 0x82, 0x0d, 0x85, 0x31, 0xc3, 0xeb, 0x85, 0x0d, 0x85,
+ 0x29, 0xc3, 0xeb, 0x88, 0x0d, 0x85, 0x21, 0xc3, 0xeb, 0x8b, 0x0d, 0x85,
+ 0x19, 0xc3, 0xeb, 0x8e, 0x0d, 0x85, 0x11, 0xc3, 0xeb, 0x6d, 0x0d, 0x85,
+ 0x09, 0xc3, 0xeb, 0x3d, 0x0d, 0x85, 0x01, 0xc3, 0xeb, 0x40, 0x0d, 0x84,
+ 0xf9, 0xc3, 0xec, 0xc3, 0x0d, 0x86, 0x09, 0xc3, 0xec, 0x99, 0x0d, 0x86,
+ 0x01, 0xc3, 0xec, 0x9c, 0x0d, 0x85, 0xf9, 0xc3, 0xec, 0x9f, 0x0d, 0x85,
+ 0xf1, 0xc3, 0xec, 0xa2, 0x0d, 0x85, 0xe9, 0xc3, 0xec, 0xa5, 0x0d, 0x85,
+ 0xe1, 0xc3, 0xec, 0xa8, 0x0d, 0x85, 0xd8, 0xc3, 0xec, 0x9f, 0x0d, 0x84,
+ 0xe9, 0xc3, 0xec, 0xa2, 0x0d, 0x84, 0xe1, 0xc3, 0xec, 0xa5, 0x0d, 0x84,
+ 0xd9, 0xc3, 0xec, 0xa8, 0x0d, 0x84, 0xd1, 0xc3, 0xec, 0xab, 0x0d, 0x84,
+ 0xc9, 0xc4, 0xe6, 0x9b, 0x0d, 0x84, 0xc1, 0xc3, 0x82, 0xb0, 0x0d, 0x84,
+ 0xb9, 0xc3, 0x3b, 0x0b, 0x0d, 0x84, 0xb1, 0xc3, 0x82, 0xe0, 0x0d, 0x84,
+ 0xa9, 0xc3, 0x82, 0xec, 0x0d, 0x84, 0xa1, 0xc3, 0x82, 0xa4, 0x0d, 0x84,
+ 0x99, 0xc3, 0x83, 0x28, 0x0d, 0x84, 0x91, 0xc3, 0xea, 0xf8, 0x0d, 0x84,
+ 0x89, 0xc3, 0xea, 0xfb, 0x0d, 0x84, 0x81, 0xc3, 0xea, 0xfe, 0x0d, 0x84,
+ 0x79, 0xc4, 0xe7, 0x87, 0x0d, 0x84, 0x71, 0xc3, 0xed, 0x98, 0x0d, 0x84,
+ 0x69, 0xc3, 0xed, 0x9b, 0x0d, 0x84, 0x61, 0xc3, 0xed, 0x9e, 0x0d, 0x84,
+ 0x59, 0xc3, 0xed, 0xa1, 0x0d, 0x84, 0x51, 0xc3, 0xed, 0xaa, 0x0d, 0x84,
+ 0x49, 0xc3, 0xed, 0xa7, 0x0d, 0x84, 0x41, 0xc3, 0xed, 0xa4, 0x0d, 0x84,
+ 0x39, 0xc3, 0xeb, 0x82, 0x0d, 0x84, 0x31, 0xc3, 0xeb, 0x85, 0x0d, 0x84,
+ 0x29, 0xc3, 0xeb, 0x88, 0x0d, 0x84, 0x21, 0xc3, 0xeb, 0x8b, 0x0d, 0x84,
+ 0x19, 0xc3, 0xeb, 0x8e, 0x0d, 0x84, 0x11, 0xc3, 0xeb, 0x6d, 0x0d, 0x84,
+ 0x09, 0xc3, 0xeb, 0x3d, 0x0d, 0x84, 0x01, 0xc3, 0xeb, 0x40, 0x0d, 0x83,
+ 0xf9, 0xc3, 0xeb, 0x43, 0x0d, 0x83, 0xf0, 0xc3, 0xec, 0xa5, 0x0d, 0x81,
+ 0xd1, 0xc3, 0xec, 0xa8, 0x0d, 0x81, 0xc9, 0xc3, 0xec, 0xab, 0x0d, 0x81,
+ 0xc1, 0xc3, 0x82, 0xb0, 0x0d, 0x81, 0xb9, 0xc3, 0x3b, 0x0b, 0x0d, 0x81,
+ 0xb1, 0xc3, 0x82, 0xe0, 0x0d, 0x81, 0xa9, 0xc3, 0x82, 0xec, 0x0d, 0x81,
+ 0xa1, 0xc3, 0x82, 0xa4, 0x0d, 0x81, 0x99, 0xc3, 0x83, 0x28, 0x0d, 0x81,
+ 0x91, 0xc3, 0xea, 0xec, 0x0d, 0x81, 0x89, 0xc3, 0xea, 0xef, 0x0d, 0x81,
+ 0x81, 0xc3, 0xea, 0xe3, 0x0d, 0x81, 0x79, 0xc3, 0xea, 0xf5, 0x0d, 0x81,
+ 0x71, 0xc3, 0xea, 0xf8, 0x0d, 0x81, 0x69, 0xc3, 0xea, 0xfb, 0x0d, 0x81,
+ 0x61, 0xc3, 0xea, 0xfe, 0x0d, 0x81, 0x59, 0xc3, 0xed, 0x95, 0x0d, 0x81,
+ 0x51, 0xc3, 0xed, 0x98, 0x0d, 0x81, 0x49, 0xc3, 0xed, 0x9b, 0x0d, 0x81,
+ 0x41, 0xc3, 0xed, 0x9e, 0x0d, 0x81, 0x39, 0xc3, 0xed, 0xa1, 0x0d, 0x81,
+ 0x31, 0xc3, 0xed, 0xaa, 0x0d, 0x81, 0x29, 0xc3, 0xed, 0xa7, 0x0d, 0x81,
+ 0x21, 0xc3, 0xed, 0xa4, 0x0d, 0x81, 0x19, 0xc4, 0xe9, 0xd3, 0x0d, 0x81,
+ 0x11, 0xc3, 0xeb, 0x43, 0x0d, 0x80, 0xbb, 0x00, 0xfd, 0x2b, 0xc3, 0xeb,
+ 0x82, 0x0d, 0x80, 0xf9, 0xc3, 0xeb, 0x85, 0x0d, 0x80, 0xf1, 0xc3, 0xeb,
+ 0x88, 0x0d, 0x80, 0xe9, 0xc3, 0xeb, 0x8b, 0x0d, 0x80, 0xe1, 0xc3, 0xeb,
+ 0x8e, 0x0d, 0x80, 0xd9, 0xc3, 0xeb, 0x6d, 0x0d, 0x80, 0xd1, 0xc3, 0xeb,
+ 0x3d, 0x0d, 0x80, 0xc9, 0xc3, 0xeb, 0x40, 0x0d, 0x80, 0xc1, 0xc4, 0xe8,
+ 0x23, 0x0d, 0x81, 0xd8, 0xc3, 0xec, 0xa8, 0x0d, 0x88, 0xf1, 0xc3, 0xec,
+ 0xab, 0x0d, 0x88, 0xe8, 0xc3, 0xec, 0xa2, 0x0d, 0x88, 0xc9, 0xc3, 0xec,
+ 0xa5, 0x0d, 0x88, 0xc1, 0xc3, 0xec, 0xa8, 0x0d, 0x88, 0xb9, 0xc3, 0xec,
+ 0xab, 0x0d, 0x88, 0xb1, 0xc3, 0x82, 0xe0, 0x0d, 0x88, 0xa9, 0xc3, 0x82,
+ 0xec, 0x0d, 0x88, 0xa1, 0xc3, 0x82, 0xa4, 0x0d, 0x88, 0x99, 0xc3, 0x83,
+ 0x28, 0x0d, 0x88, 0x91, 0xc3, 0xed, 0x9e, 0x0d, 0x88, 0x89, 0xc3, 0xed,
+ 0xa1, 0x0d, 0x88, 0x81, 0xc3, 0xed, 0xaa, 0x0d, 0x88, 0x79, 0xc3, 0xed,
+ 0xa7, 0x0d, 0x88, 0x71, 0xc3, 0xed, 0xa4, 0x0d, 0x88, 0x69, 0xc3, 0xeb,
+ 0x8e, 0x0d, 0x88, 0x61, 0xc3, 0xeb, 0x6d, 0x0d, 0x88, 0x59, 0xc3, 0xeb,
+ 0x3d, 0x0d, 0x88, 0x51, 0xc3, 0xeb, 0x40, 0x0d, 0x88, 0x49, 0xc3, 0xeb,
+ 0x43, 0x0d, 0x88, 0x41, 0xc3, 0xec, 0x9f, 0x0d, 0x88, 0xd1, 0xc3, 0xec,
+ 0x9c, 0x0d, 0x88, 0xd9, 0xc3, 0xec, 0x99, 0x0d, 0x88, 0xe0, 0xc4, 0xe9,
+ 0xdb, 0x0d, 0x87, 0x11, 0xc3, 0xeb, 0x8e, 0x0d, 0x87, 0x09, 0xc3, 0xeb,
+ 0x6d, 0x0d, 0x87, 0x01, 0xc3, 0xeb, 0x3d, 0x0d, 0x86, 0xf9, 0xc3, 0xeb,
+ 0x40, 0x0d, 0x86, 0xf1, 0xc3, 0xeb, 0x43, 0x0d, 0x86, 0xe9, 0xc3, 0xea,
+ 0xfe, 0x0d, 0x87, 0x19, 0xc3, 0xea, 0xfb, 0x0d, 0x87, 0x21, 0xc3, 0xea,
+ 0xf8, 0x0d, 0x87, 0x29, 0xc3, 0xea, 0xf5, 0x0d, 0x87, 0x31, 0xc3, 0xec,
+ 0xab, 0x0d, 0x87, 0x39, 0xc3, 0xec, 0xa8, 0x0d, 0x87, 0x41, 0xc3, 0xec,
+ 0xa5, 0x0d, 0x87, 0x49, 0xc3, 0xec, 0xa2, 0x0d, 0x87, 0x51, 0xc3, 0xec,
+ 0x9f, 0x0d, 0x87, 0x59, 0xc3, 0xec, 0x9c, 0x0d, 0x87, 0x60, 0xc3, 0xec,
+ 0x9f, 0x0d, 0x86, 0xd9, 0xc3, 0xec, 0xa2, 0x0d, 0x86, 0xd1, 0xc3, 0xec,
+ 0xa5, 0x0d, 0x86, 0xc9, 0xc3, 0xec, 0xa8, 0x0d, 0x86, 0xc1, 0xc3, 0xec,
+ 0xab, 0x0d, 0x86, 0xb9, 0xc4, 0x7f, 0xc6, 0x0d, 0x86, 0xb1, 0xc3, 0x82,
+ 0xa4, 0x0d, 0x86, 0xa9, 0xc3, 0x83, 0x28, 0x0d, 0x86, 0xa1, 0xc3, 0xea,
+ 0xf5, 0x0d, 0x86, 0x99, 0xc3, 0xea, 0xf8, 0x0d, 0x86, 0x91, 0xc3, 0xea,
+ 0xfb, 0x0d, 0x86, 0x89, 0xc3, 0xea, 0xfe, 0x0d, 0x86, 0x81, 0xc3, 0xed,
+ 0x98, 0x0d, 0x86, 0x79, 0xc3, 0xed, 0x9b, 0x0d, 0x86, 0x71, 0xc3, 0xed,
+ 0x9e, 0x0d, 0x86, 0x69, 0xc3, 0xed, 0xa1, 0x0d, 0x86, 0x61, 0xc3, 0xed,
+ 0xaa, 0x0d, 0x86, 0x59, 0xc3, 0xed, 0xa7, 0x0d, 0x86, 0x51, 0xc3, 0xed,
+ 0xa4, 0x0d, 0x86, 0x49, 0xc3, 0xeb, 0x88, 0x0d, 0x86, 0x41, 0xc3, 0xeb,
+ 0x8b, 0x0d, 0x86, 0x39, 0xc3, 0xeb, 0x8e, 0x0d, 0x86, 0x31, 0xc3, 0xeb,
+ 0x6d, 0x0d, 0x86, 0x29, 0xc3, 0xeb, 0x3d, 0x0d, 0x86, 0x21, 0xc3, 0xeb,
+ 0x40, 0x0d, 0x86, 0x19, 0xc3, 0xeb, 0x43, 0x0d, 0x86, 0x11, 0xc3, 0xec,
+ 0x9c, 0x0d, 0x86, 0xe0, 0xc4, 0xe8, 0x0f, 0x0d, 0x83, 0xe9, 0xc3, 0xec,
+ 0x9c, 0x0d, 0x83, 0xe1, 0xc3, 0xec, 0x9f, 0x0d, 0x83, 0xd9, 0xc3, 0xec,
+ 0xa2, 0x0d, 0x83, 0xd1, 0xc3, 0xec, 0xa5, 0x0d, 0x83, 0xc9, 0xc3, 0xec,
+ 0xa8, 0x0d, 0x83, 0xc1, 0xc3, 0xec, 0xab, 0x0d, 0x83, 0xb9, 0xc3, 0xea,
+ 0xda, 0x0d, 0x83, 0xb1, 0xc3, 0xea, 0xce, 0x0d, 0x83, 0xa9, 0xc3, 0x83,
+ 0xe8, 0x0d, 0x83, 0xa1, 0xc3, 0x82, 0xb0, 0x0d, 0x83, 0x99, 0xc3, 0x3b,
+ 0x0b, 0x0d, 0x83, 0x91, 0xc3, 0x82, 0xe0, 0x0d, 0x83, 0x89, 0xc3, 0x82,
+ 0xec, 0x0d, 0x83, 0x81, 0xc3, 0x82, 0xa4, 0x0d, 0x83, 0x79, 0xc3, 0x83,
+ 0x28, 0x0d, 0x83, 0x71, 0xc3, 0xed, 0xa7, 0x0d, 0x83, 0x19, 0xc3, 0xed,
+ 0xa4, 0x0d, 0x83, 0x11, 0xc3, 0xeb, 0x6d, 0x0d, 0x83, 0x09, 0xc3, 0xeb,
+ 0x3d, 0x0d, 0x83, 0x01, 0xc3, 0xeb, 0x40, 0x0d, 0x82, 0xf9, 0xc3, 0xeb,
+ 0x43, 0x0d, 0x82, 0xf1, 0xc3, 0xed, 0xaa, 0x0d, 0x83, 0x21, 0xc3, 0xed,
+ 0xa1, 0x0d, 0x83, 0x29, 0xc3, 0xed, 0x9e, 0x0d, 0x83, 0x31, 0xc3, 0xed,
+ 0x9b, 0x0d, 0x83, 0x39, 0xc3, 0xed, 0x98, 0x0d, 0x83, 0x41, 0xc3, 0xea,
+ 0xfe, 0x0d, 0x83, 0x49, 0xc3, 0xea, 0xfb, 0x0d, 0x83, 0x51, 0xc3, 0xea,
+ 0xf8, 0x0d, 0x83, 0x59, 0xc3, 0xea, 0xf5, 0x0d, 0x83, 0x61, 0xc4, 0xe4,
+ 0xef, 0x0d, 0x83, 0x68, 0xc3, 0xec, 0x99, 0x0d, 0x82, 0xe9, 0xc3, 0xec,
+ 0x9c, 0x0d, 0x82, 0xe1, 0xc3, 0xec, 0x9f, 0x0d, 0x82, 0xd9, 0xc3, 0xec,
+ 0xa2, 0x0d, 0x82, 0xd1, 0xc3, 0xec, 0xa5, 0x0d, 0x82, 0xc9, 0xc3, 0xec,
+ 0xa8, 0x0d, 0x82, 0xc1, 0xc3, 0xec, 0xab, 0x0d, 0x82, 0xb9, 0xc3, 0x3b,
+ 0x0b, 0x0d, 0x82, 0xb1, 0xc3, 0x82, 0xe0, 0x0d, 0x82, 0xa9, 0xc3, 0x82,
+ 0xec, 0x0d, 0x82, 0xa1, 0xc3, 0x82, 0xa4, 0x0d, 0x82, 0x99, 0xc3, 0x83,
+ 0x28, 0x0d, 0x82, 0x91, 0xc3, 0xea, 0xe9, 0x0d, 0x82, 0x89, 0xc3, 0xea,
+ 0xec, 0x0d, 0x82, 0x81, 0xc3, 0xea, 0xef, 0x0d, 0x82, 0x79, 0xc3, 0xea,
+ 0xe3, 0x0d, 0x82, 0x71, 0xc3, 0xea, 0xf5, 0x0d, 0x82, 0x69, 0xc3, 0xea,
+ 0xf8, 0x0d, 0x82, 0x61, 0xc3, 0xea, 0xfb, 0x0d, 0x82, 0x59, 0xc3, 0xea,
+ 0xfe, 0x0d, 0x82, 0x51, 0xc3, 0xed, 0x9b, 0x0d, 0x82, 0x49, 0xc3, 0xed,
+ 0x9e, 0x0d, 0x82, 0x41, 0xc3, 0xed, 0xa1, 0x0d, 0x82, 0x39, 0xc3, 0xed,
+ 0xaa, 0x0d, 0x82, 0x31, 0xc3, 0xed, 0xa7, 0x0d, 0x82, 0x29, 0xc3, 0xed,
+ 0xa4, 0x0d, 0x82, 0x21, 0xc3, 0xeb, 0x85, 0x0d, 0x82, 0x19, 0xc3, 0xeb,
+ 0x88, 0x0d, 0x82, 0x11, 0xc3, 0xeb, 0x8b, 0x0d, 0x82, 0x09, 0xc3, 0xeb,
+ 0x8e, 0x0d, 0x82, 0x01, 0xc3, 0xeb, 0x6d, 0x0d, 0x81, 0xf9, 0xc3, 0xeb,
+ 0x3d, 0x0d, 0x81, 0xf1, 0xc3, 0xeb, 0x40, 0x0d, 0x81, 0xe9, 0xc3, 0xeb,
+ 0x43, 0x0d, 0x81, 0xe0, 0x48, 0x1b, 0x0d, 0xc0, 0xfd, 0x33, 0x46, 0x02,
+ 0x00, 0x40, 0xfd, 0x3f, 0x45, 0x15, 0x19, 0xc0, 0xfd, 0xd5, 0x4b, 0x11,
+ 0x34, 0x40, 0xfe, 0x05, 0xc9, 0xb3, 0xe7, 0x00, 0x2e, 0x29, 0xc9, 0xb6,
+ 0x39, 0x00, 0x2e, 0x21, 0xcd, 0x7b, 0xc3, 0x00, 0x2d, 0x78, 0x1c, 0xc0,
+ 0xfe, 0x23, 0x06, 0xc0, 0xfe, 0x2d, 0xc4, 0xe7, 0x3b, 0x00, 0x2d, 0x61,
+ 0xc3, 0x12, 0x35, 0x00, 0x2d, 0x59, 0x42, 0x04, 0x30, 0xc0, 0xfe, 0x39,
+ 0x16, 0xc0, 0xfe, 0x45, 0x42, 0x0e, 0x13, 0xc0, 0xfe, 0x4f, 0xcc, 0x8c,
+ 0xe8, 0x00, 0x2d, 0x11, 0x42, 0x07, 0x44, 0xc0, 0xfe, 0x5b, 0xc5, 0x4a,
+ 0x49, 0x00, 0x2c, 0xb9, 0x15, 0xc0, 0xfe, 0x67, 0xc7, 0xcf, 0xd3, 0x00,
+ 0x2c, 0x89, 0x43, 0x02, 0xfb, 0xc0, 0xfe, 0x73, 0x0f, 0x40, 0xfe, 0x82,
+ 0x43, 0x06, 0x1f, 0xc0, 0xfe, 0x97, 0xc7, 0x0d, 0x30, 0x02, 0x6e, 0x48,
+ 0x0b, 0xc0, 0xfe, 0xc7, 0xc7, 0xc5, 0x4c, 0x02, 0x6e, 0xf9, 0xd5, 0x36,
+ 0x04, 0x02, 0x6f, 0x19, 0x07, 0x40, 0xfe, 0xd3, 0xc6, 0x7a, 0x9f, 0x02,
+ 0x6e, 0x21, 0xd2, 0x4a, 0x7c, 0x02, 0x6e, 0x88, 0x10, 0xc0, 0xfe, 0xe5,
+ 0xcc, 0x85, 0x5c, 0x02, 0x6f, 0x58, 0x45, 0x00, 0x3f, 0xc0, 0xfe, 0xf1,
+ 0xc9, 0xaf, 0x3a, 0x02, 0x6e, 0x59, 0xce, 0x6d, 0xa0, 0x02, 0x6e, 0xb0,
+ 0xc4, 0x9a, 0x7d, 0x02, 0x6e, 0x51, 0xc7, 0xcd, 0x33, 0x02, 0x6f, 0x11,
+ 0xcd, 0x77, 0xa6, 0x02, 0x6f, 0x68, 0xc9, 0xb5, 0xbb, 0x02, 0x6e, 0x61,
+ 0xc8, 0xbe, 0xe3, 0x02, 0x6e, 0x80, 0x14, 0xc0, 0xfe, 0xfd, 0xd1, 0x53,
+ 0x9a, 0x02, 0x6f, 0x60, 0xc5, 0xdc, 0x7a, 0x02, 0x6e, 0x71, 0xcb, 0x99,
+ 0xb0, 0x02, 0x6e, 0xd0, 0xc7, 0xcc, 0xbc, 0x02, 0x6e, 0x91, 0xc8, 0xbf,
+ 0x13, 0x02, 0x6f, 0xb1, 0xcf, 0x6b, 0x4f, 0x02, 0x6f, 0xf0, 0xcd, 0x82,
+ 0x0f, 0x02, 0x6e, 0xa1, 0xcb, 0x90, 0x68, 0x02, 0x6f, 0x51, 0xd0, 0x5b,
+ 0x3f, 0x02, 0x6f, 0xf8, 0x16, 0xc0, 0xff, 0x09, 0xc8, 0xb9, 0x9b, 0x02,
+ 0x6f, 0x80, 0x10, 0xc0, 0xff, 0x15, 0xc7, 0xc7, 0x60, 0x02, 0x6e, 0xf1,
+ 0xc6, 0xd5, 0x1a, 0x02, 0x6f, 0x48, 0x42, 0x05, 0x08, 0xc0, 0xff, 0x21,
+ 0xca, 0xa0, 0x7e, 0x02, 0x6f, 0x30, 0x4f, 0x05, 0x17, 0xc0, 0xff, 0x2d,
+ 0x04, 0xc0, 0xff, 0x57, 0xd5, 0x33, 0xcd, 0x01, 0x35, 0x49, 0x48, 0xb9,
+ 0xbb, 0xc0, 0xff, 0x63, 0xce, 0x72, 0xc4, 0x01, 0x1d, 0x79, 0xc8, 0x23,
+ 0xac, 0x01, 0x01, 0x31, 0x16, 0x40, 0xff, 0x7b, 0x00, 0x40, 0xff, 0x87,
+ 0xc7, 0xcf, 0x08, 0x01, 0x33, 0x41, 0xc8, 0xba, 0x73, 0x01, 0x30, 0xa9,
+ 0xc6, 0xd8, 0xe6, 0x0f, 0x99, 0xb1, 0xc3, 0xd5, 0x3b, 0x0f, 0x99, 0x68,
+ 0xd2, 0x4c, 0x1a, 0x01, 0x1f, 0x98, 0x00, 0x40, 0xff, 0x93, 0xd0, 0x0c,
+ 0x92, 0x0f, 0xb3, 0x48, 0xc4, 0xd4, 0x5a, 0x0f, 0xd5, 0x71, 0xc5, 0x45,
+ 0xa9, 0x01, 0x00, 0x50, 0x83, 0x0f, 0xd5, 0x61, 0xc8, 0xc0, 0x43, 0x0f,
+ 0xa1, 0xc8, 0x45, 0x00, 0x39, 0x40, 0xff, 0xa2, 0x42, 0x02, 0x1d, 0xc0,
+ 0xff, 0xb4, 0xc5, 0xc8, 0x79, 0x0f, 0xc8, 0xe9, 0x4c, 0x8b, 0x2c, 0x40,
+ 0xff, 0xbe, 0x46, 0x08, 0xd7, 0xc0, 0xff, 0xca, 0x45, 0x02, 0xcb, 0xc0,
+ 0xff, 0xee, 0x45, 0x00, 0xfa, 0xc0, 0xff, 0xfa, 0x46, 0x33, 0x45, 0xc1,
+ 0x00, 0x06, 0x47, 0x01, 0xff, 0x41, 0x00, 0x1a, 0xcd, 0x7d, 0xe5, 0x00,
+ 0xb9, 0xa1, 0x4b, 0x6f, 0x71, 0xc1, 0x00, 0x84, 0x47, 0x01, 0xff, 0x41,
+ 0x00, 0x8c, 0x43, 0x49, 0xdc, 0xc1, 0x00, 0xea, 0x4d, 0x7f, 0x2a, 0x41,
+ 0x01, 0x0c, 0x47, 0x37, 0x49, 0xc1, 0x01, 0x2a, 0x47, 0x01, 0xff, 0x41,
+ 0x01, 0x3d, 0xc9, 0x11, 0x47, 0x07, 0xfb, 0x09, 0xc5, 0x00, 0xea, 0x07,
+ 0xfb, 0x20, 0xcf, 0x6a, 0x7d, 0x07, 0xfb, 0x11, 0xcb, 0x01, 0x3c, 0x07,
+ 0xff, 0x48, 0xcf, 0x6a, 0x7d, 0x07, 0xfb, 0x19, 0xcb, 0x01, 0x3c, 0x07,
+ 0xff, 0x58, 0x42, 0x00, 0xee, 0xc1, 0x01, 0x9a, 0xdf, 0x0c, 0x26, 0x07,
+ 0xfb, 0x80, 0xc6, 0x94, 0xb9, 0x07, 0xfd, 0x01, 0x47, 0x01, 0xff, 0x41,
+ 0x01, 0xb2, 0xd1, 0x55, 0x32, 0x0f, 0xb4, 0x28, 0xcc, 0x86, 0xc4, 0x01,
+ 0x35, 0x09, 0xd1, 0x50, 0xd0, 0x0f, 0xa8, 0x30, 0x15, 0xc1, 0x02, 0x06,
+ 0x83, 0x01, 0x82, 0x13, 0x01, 0x02, 0x20, 0x8b, 0x01, 0x82, 0x21, 0x97,
+ 0x01, 0x82, 0x31, 0x87, 0x01, 0x82, 0x41, 0x91, 0x01, 0x82, 0x51, 0x0d,
+ 0xc1, 0x02, 0x26, 0x09, 0xc1, 0x02, 0x3a, 0x1c, 0xc1, 0x02, 0x4e, 0x16,
+ 0xc1, 0x02, 0x62, 0x06, 0xc1, 0x02, 0x76, 0x90, 0x01, 0x84, 0x9b, 0x01,
+ 0x02, 0x8a, 0x0a, 0xc1, 0x02, 0x9e, 0x04, 0xc1, 0x02, 0xb2, 0x12, 0xc1,
+ 0x02, 0xc6, 0x0f, 0xc1, 0x02, 0xda, 0x1b, 0xc1, 0x02, 0xee, 0x14, 0xc1,
+ 0x02, 0xfa, 0x19, 0xc1, 0x03, 0x0e, 0xc2, 0x59, 0xe0, 0x01, 0x84, 0xa0,
+ 0x90, 0x01, 0x00, 0x83, 0x01, 0x03, 0x1e, 0xc7, 0x8d, 0x11, 0x00, 0x01,
+ 0x68, 0x43, 0x00, 0xbf, 0xc1, 0x03, 0x28, 0x44, 0x04, 0x8d, 0x41, 0x03,
+ 0x46, 0xc4, 0x21, 0x5e, 0x01, 0x03, 0x21, 0xc9, 0x1b, 0xac, 0x01, 0x03,
+ 0x19, 0xc5, 0x07, 0x6d, 0x01, 0x03, 0x10, 0xcf, 0x62, 0x76, 0x0f, 0xa9,
+ 0x01, 0xc7, 0x62, 0x7e, 0x0f, 0xa9, 0x21, 0xcd, 0x79, 0x87, 0x0f, 0xa9,
+ 0x08, 0x0e, 0xc1, 0x03, 0x6e, 0xc6, 0xd3, 0xf4, 0x01, 0x15, 0xd1, 0xc7,
+ 0x00, 0xe0, 0x01, 0x11, 0x4b, 0x01, 0x03, 0x7a, 0xc6, 0x11, 0x24, 0x01,
+ 0x01, 0xe9, 0xcb, 0x37, 0x34, 0x01, 0x51, 0xe0, 0x00, 0x41, 0x03, 0x7e,
+ 0x46, 0x65, 0x4f, 0xc1, 0x03, 0x8e, 0x47, 0xc8, 0x94, 0x41, 0x03, 0x9a,
+ 0xda, 0x1c, 0x24, 0x01, 0x4e, 0xf0, 0x15, 0xc1, 0x03, 0xa6, 0xcb, 0x9a,
+ 0x29, 0x0f, 0xa4, 0x08, 0xc4, 0x01, 0x03, 0x01, 0x10, 0x31, 0x43, 0x2e,
+ 0x23, 0x41, 0x03, 0xb2, 0xcc, 0x8c, 0x40, 0x0f, 0xa7, 0x41, 0xce, 0x6d,
+ 0xe6, 0x01, 0x4e, 0xe0, 0xcd, 0x79, 0xef, 0x01, 0x05, 0xc9, 0x48, 0xc1,
+ 0x3b, 0x41, 0x03, 0xbe, 0xd7, 0x26, 0xb3, 0x0f, 0xd7, 0xa8, 0xc2, 0x01,
+ 0x89, 0x01, 0x13, 0x0b, 0x01, 0x03, 0xe2, 0xce, 0x35, 0x24, 0x01, 0x53,
+ 0x38, 0x4a, 0xa7, 0x0e, 0xc1, 0x03, 0xe8, 0x49, 0xac, 0xa0, 0x41, 0x03,
+ 0xf6, 0x54, 0x3e, 0x6a, 0xc1, 0x04, 0x02, 0xd1, 0x2a, 0xdb, 0x01, 0x81,
+ 0x60, 0xc4, 0x00, 0xeb, 0x01, 0x80, 0x09, 0xcb, 0x93, 0x1d, 0x01, 0x80,
+ 0x30, 0xcc, 0x88, 0x08, 0x01, 0x8c, 0x81, 0xcc, 0x89, 0xb8, 0x01, 0x8c,
+ 0x89, 0xc8, 0x2a, 0xe4, 0x01, 0x8c, 0x91, 0x16, 0xc1, 0x04, 0x20, 0x08,
+ 0xc1, 0x04, 0x30, 0x0f, 0xc1, 0x04, 0x3c, 0xcb, 0x99, 0x58, 0x01, 0x8c,
+ 0xc1, 0xcb, 0x97, 0x5e, 0x01, 0x8c, 0xd1, 0xcb, 0x91, 0x44, 0x01, 0x8c,
+ 0xe9, 0xca, 0xaa, 0x74, 0x01, 0x8c, 0xf0, 0x47, 0x37, 0x49, 0xc1, 0x04,
+ 0x48, 0xcc, 0x86, 0xac, 0x08, 0x42, 0xb9, 0x47, 0x01, 0xff, 0x41, 0x04,
+ 0x55, 0xc6, 0x5b, 0xd9, 0x01, 0x03, 0x01, 0xd4, 0x39, 0x92, 0x01, 0x71,
+ 0x88, 0x42, 0x00, 0x27, 0xc1, 0x04, 0xb8, 0xd0, 0x5e, 0xaf, 0x0f, 0xa3,
+ 0x78, 0x05, 0xc1, 0x04, 0xd0, 0x0a, 0xc1, 0x04, 0xee, 0x52, 0x47, 0xac,
+ 0xc1, 0x04, 0xfc, 0x15, 0xc1, 0x05, 0x08, 0x0e, 0xc1, 0x05, 0x3c, 0x06,
+ 0xc1, 0x05, 0x4c, 0x16, 0xc1, 0x05, 0x61, 0xd9, 0x0f, 0x62, 0x01, 0x3a,
+ 0xa9, 0xd6, 0x2f, 0x97, 0x01, 0x3a, 0xa1, 0x08, 0xc1, 0x05, 0x77, 0xc3,
+ 0xec, 0x7e, 0x01, 0x38, 0x89, 0x14, 0xc1, 0x05, 0x87, 0x17, 0xc1, 0x05,
+ 0x93, 0x0f, 0xc1, 0x05, 0x9f, 0xc6, 0x19, 0x7a, 0x01, 0x2f, 0x31, 0x12,
+ 0xc1, 0x05, 0xab, 0x43, 0x00, 0x29, 0x41, 0x05, 0xb7, 0x45, 0x18, 0xd5,
+ 0xc1, 0x05, 0xc3, 0x45, 0x20, 0x8c, 0x41, 0x05, 0xe1, 0x45, 0x20, 0x8c,
+ 0xc1, 0x05, 0xff, 0x45, 0x18, 0xd5, 0x41, 0x06, 0x1d, 0xd5, 0x36, 0x19,
+ 0x0f, 0xc4, 0x19, 0xca, 0x36, 0x24, 0x0f, 0xc3, 0x59, 0xd0, 0x5f, 0xff,
+ 0x0f, 0xc3, 0x19, 0xd1, 0x56, 0x42, 0x0f, 0xc3, 0x99, 0xd0, 0x36, 0x1e,
+ 0x0f, 0xc3, 0xd8, 0xd5, 0x36, 0x19, 0x0f, 0xc4, 0x11, 0xd0, 0x36, 0x1e,
+ 0x0f, 0xc3, 0xd1, 0xd0, 0x5f, 0xff, 0x0f, 0xc3, 0x11, 0xca, 0x36, 0x24,
+ 0x0f, 0xc3, 0x51, 0xd1, 0x56, 0x42, 0x0f, 0xc3, 0x90, 0xd5, 0x36, 0x19,
+ 0x0f, 0xc4, 0x01, 0xd0, 0x5f, 0xff, 0x0f, 0xc3, 0x01, 0xca, 0x36, 0x24,
+ 0x0f, 0xc3, 0x41, 0xd1, 0x56, 0x42, 0x0f, 0xc3, 0x81, 0xd0, 0x36, 0x1e,
+ 0x0f, 0xc3, 0xc0, 0xd0, 0x5f, 0xff, 0x0f, 0xc3, 0x09, 0xca, 0x36, 0x24,
+ 0x0f, 0xc3, 0x49, 0xd1, 0x56, 0x42, 0x0f, 0xc3, 0x89, 0xd0, 0x36, 0x1e,
+ 0x0f, 0xc3, 0xc9, 0xd5, 0x36, 0x19, 0x0f, 0xc4, 0x08, 0x00, 0xc1, 0x06,
+ 0x3b, 0xc2, 0x00, 0x37, 0x0f, 0xd4, 0xf8, 0x00, 0xc1, 0x06, 0x47, 0xc5,
+ 0xe3, 0xd2, 0x0f, 0x9a, 0x48, 0xc9, 0xb2, 0x6d, 0x0f, 0x17, 0xf9, 0x46,
+ 0x08, 0xd7, 0xc1, 0x06, 0x5f, 0x45, 0x2a, 0xe3, 0xc1, 0x06, 0x83, 0x47,
+ 0x01, 0xff, 0x41, 0x06, 0x95, 0xd4, 0x3e, 0xba, 0x0f, 0x98, 0xc1, 0xd3,
+ 0x40, 0x56, 0x0f, 0x98, 0xb0, 0xc2, 0x00, 0xeb, 0x08, 0xc7, 0xf9, 0x47,
+ 0x37, 0x49, 0xc1, 0x07, 0x1c, 0x46, 0x08, 0xd7, 0xc1, 0x07, 0x34, 0x4d,
+ 0x27, 0x71, 0xc1, 0x07, 0x58, 0x4f, 0x01, 0xf7, 0x41, 0x07, 0xb7, 0x0e,
+ 0xc1, 0x08, 0x16, 0xc8, 0x7c, 0x98, 0x07, 0xf2, 0x59, 0xc4, 0x0d, 0xc7,
+ 0x01, 0x81, 0x80, 0xca, 0xa8, 0x80, 0x0f, 0x9f, 0x99, 0xca, 0xa1, 0x1e,
+ 0x0f, 0x9f, 0xa1, 0xc9, 0x47, 0x47, 0x0f, 0xa2, 0x58, 0x58, 0x21, 0xec,
+ 0xc1, 0x08, 0x22, 0xc4, 0x0d, 0xc7, 0x01, 0x80, 0xe0, 0xc8, 0x2b, 0x9b,
+ 0x0f, 0xac, 0x29, 0xc6, 0xd2, 0xda, 0x0f, 0xb7, 0xc1, 0xc4, 0x5d, 0x35,
+ 0x0f, 0xca, 0x78, 0xc5, 0x78, 0x3d, 0x0f, 0xcb, 0xf9, 0xc4, 0x1e, 0xc2,
+ 0x01, 0x1f, 0x29, 0xc5, 0x76, 0x15, 0x0f, 0xd6, 0x98, 0x42, 0x01, 0x58,
+ 0x41, 0x08, 0x2e, 0x00, 0xc1, 0x08, 0x3a, 0xc7, 0x96, 0xd3, 0x01, 0x10,
+ 0xe0, 0xca, 0xa4, 0x66, 0x0f, 0x9b, 0xa3, 0x01, 0x08, 0x5c, 0xc3, 0x01,
+ 0x5e, 0x01, 0x56, 0xe1, 0xce, 0x4c, 0xd2, 0x01, 0x70, 0x80, 0x44, 0x01,
+ 0xac, 0xc1, 0x08, 0x62, 0xc4, 0x39, 0x36, 0x0f, 0xc9, 0x31, 0xc7, 0xc7,
+ 0x75, 0x0f, 0xa4, 0x31, 0xcf, 0x67, 0x80, 0x0f, 0xb0, 0xc1, 0x15, 0xc1,
+ 0x08, 0x6c, 0xd2, 0x49, 0xc8, 0x0f, 0xcb, 0xc8, 0x4d, 0x28, 0xf3, 0xc1,
+ 0x08, 0x78, 0xc7, 0xcc, 0x76, 0x0f, 0x9a, 0x10, 0xc8, 0xb8, 0xab, 0x01,
+ 0x05, 0x19, 0xc3, 0x94, 0x0d, 0x0f, 0x9a, 0xf8, 0x48, 0x09, 0xea, 0xc1,
+ 0x08, 0x84, 0xd3, 0x44, 0x58, 0x0f, 0xa1, 0x28, 0xd8, 0x22, 0xf4, 0x0f,
+ 0xb1, 0x30, 0xcd, 0x7b, 0x34, 0x01, 0x0a, 0xf9, 0xc5, 0x02, 0xe2, 0x01,
+ 0x02, 0x20, 0xc4, 0x74, 0xb9, 0x0f, 0xad, 0xf1, 0xc5, 0xde, 0x28, 0x0f,
+ 0xad, 0xe9, 0xc7, 0x89, 0x2d, 0x0f, 0xad, 0xe0, 0xca, 0xa2, 0xae, 0x01,
+ 0x3e, 0xb9, 0xc5, 0x01, 0x22, 0x01, 0x2c, 0x41, 0x45, 0x16, 0xb7, 0xc1,
+ 0x08, 0x8a, 0xc4, 0x05, 0x30, 0x00, 0x01, 0x70, 0x10, 0xc1, 0x08, 0x96,
+ 0x03, 0xc1, 0x08, 0xa2, 0x06, 0xc1, 0x08, 0xb4, 0x05, 0xc1, 0x08, 0xc0,
+ 0x15, 0xc1, 0x08, 0xd0, 0x0e, 0xc1, 0x08, 0xdc, 0x07, 0xc1, 0x08, 0xec,
+ 0x42, 0x06, 0xfb, 0xc1, 0x08, 0xf8, 0x42, 0x00, 0x93, 0xc1, 0x09, 0x04,
+ 0x14, 0xc1, 0x09, 0x10, 0xc5, 0x1f, 0x01, 0x07, 0xfa, 0xf1, 0x12, 0xc1,
+ 0x09, 0x1c, 0xc6, 0x61, 0xbc, 0x07, 0xff, 0x19, 0xca, 0xa0, 0x88, 0x07,
+ 0xff, 0x21, 0xc8, 0x79, 0x24, 0x07, 0xff, 0x29, 0xc8, 0xc1, 0xd3, 0x07,
+ 0xff, 0x31, 0xcc, 0x8d, 0x0c, 0x07, 0xf8, 0x69, 0xc9, 0x11, 0x47, 0x07,
+ 0xf8, 0x71, 0xcd, 0x36, 0x6d, 0x07, 0xfa, 0xe0, 0xcc, 0x61, 0x3e, 0x01,
+ 0x31, 0xeb, 0x01, 0x09, 0x2e, 0xce, 0x75, 0x3a, 0x01, 0x03, 0x41, 0xcb,
+ 0x68, 0x83, 0x0f, 0xca, 0x38, 0x44, 0x47, 0x2d, 0xc1, 0x09, 0x32, 0x42,
+ 0x00, 0x91, 0xc1, 0x09, 0x3c, 0xc7, 0xc8, 0xe8, 0x0f, 0xcf, 0x40, 0xc3,
+ 0x18, 0x55, 0x01, 0x2e, 0x49, 0xd1, 0x54, 0xaa, 0x0f, 0x9d, 0x19, 0xd7,
+ 0x28, 0x51, 0x0f, 0x9b, 0x28, 0xc7, 0xcf, 0xb0, 0x0f, 0xae, 0x21, 0xc6,
+ 0xa1, 0x04, 0x0f, 0xa6, 0x09, 0xc9, 0x1b, 0xac, 0x00, 0x00, 0xe0, 0xc9,
+ 0xb2, 0xfd, 0x0f, 0xa7, 0xe9, 0xc6, 0xd5, 0x86, 0x0f, 0x9c, 0xf0, 0x4c,
+ 0x11, 0x33, 0xc1, 0x09, 0x48, 0xd1, 0x54, 0xdd, 0x08, 0x52, 0x41, 0x47,
+ 0x37, 0x49, 0xc1, 0x09, 0x60, 0x46, 0x08, 0xd7, 0xc1, 0x09, 0x6a, 0x18,
+ 0xc1, 0x09, 0x7a, 0x45, 0x02, 0xcb, 0xc1, 0x09, 0x86, 0x47, 0x01, 0xff,
+ 0x41, 0x09, 0xa4, 0x05, 0xc1, 0x09, 0xfa, 0x04, 0x41, 0x0a, 0x32, 0xc4,
+ 0x24, 0x35, 0x08, 0x97, 0xc9, 0x15, 0xc1, 0x0a, 0x72, 0x08, 0xc1, 0x0a,
+ 0x7e, 0x16, 0xc1, 0x0a, 0x8a, 0xc3, 0x05, 0x17, 0x08, 0x97, 0x89, 0xc4,
+ 0x16, 0x57, 0x08, 0x97, 0x81, 0xc5, 0x05, 0x1b, 0x08, 0x97, 0xc0, 0xc6,
+ 0x21, 0x26, 0x08, 0x97, 0x51, 0xc5, 0x33, 0x1a, 0x08, 0x97, 0x49, 0xc8,
+ 0x10, 0xab, 0x08, 0x96, 0xf8, 0x91, 0x08, 0x97, 0x39, 0x03, 0xc1, 0x0a,
+ 0x96, 0x87, 0x08, 0x97, 0x29, 0x97, 0x08, 0x97, 0x1b, 0x01, 0x0a, 0xa2,
+ 0x8b, 0x08, 0x97, 0x0a, 0x01, 0x0a, 0xa6, 0xc2, 0x01, 0x0e, 0x08, 0x96,
+ 0xf1, 0x15, 0xc1, 0x0a, 0xaa, 0xc2, 0x06, 0x8c, 0x08, 0x96, 0xd9, 0xc2,
+ 0x00, 0x96, 0x08, 0x96, 0xd1, 0x14, 0xc1, 0x0a, 0xb4, 0xc2, 0x1a, 0x36,
+ 0x08, 0x96, 0xc1, 0xc2, 0x00, 0x3f, 0x08, 0x96, 0xb9, 0x04, 0xc1, 0x0a,
+ 0xbe, 0x12, 0xc1, 0x0a, 0xce, 0x10, 0xc1, 0x0a, 0xd8, 0x06, 0xc1, 0x0a,
+ 0xee, 0x16, 0xc1, 0x0a, 0xfc, 0x0c, 0xc1, 0x0b, 0x0a, 0x05, 0xc1, 0x0b,
+ 0x1a, 0x09, 0xc1, 0x0b, 0x24, 0x0d, 0xc1, 0x0b, 0x34, 0x83, 0x08, 0x95,
+ 0x83, 0x01, 0x0b, 0x3e, 0x91, 0x08, 0x95, 0xc1, 0x87, 0x08, 0x95, 0xb1,
+ 0x97, 0x08, 0x95, 0xa3, 0x01, 0x0b, 0x4a, 0x8b, 0x08, 0x95, 0x92, 0x01,
+ 0x0b, 0x4e, 0xc9, 0xb7, 0xd7, 0x08, 0x92, 0x09, 0x03, 0xc1, 0x0b, 0x52,
+ 0x91, 0x08, 0x91, 0x91, 0x87, 0x08, 0x91, 0x81, 0x97, 0x08, 0x91, 0x79,
+ 0x8b, 0x08, 0x91, 0x6a, 0x01, 0x0b, 0x5e, 0x15, 0xc1, 0x0b, 0x62, 0xc2,
+ 0x0e, 0x13, 0x08, 0x91, 0xf9, 0x0e, 0xc1, 0x0b, 0x6c, 0xc2, 0x01, 0x0e,
+ 0x08, 0x91, 0x51, 0xc2, 0x06, 0x8c, 0x08, 0x91, 0x41, 0xc2, 0x00, 0x9a,
+ 0x08, 0x91, 0x31, 0xc2, 0x1a, 0x36, 0x08, 0x91, 0x29, 0xc2, 0x00, 0x3f,
+ 0x08, 0x91, 0x21, 0x04, 0xc1, 0x0b, 0x76, 0x12, 0xc1, 0x0b, 0x86, 0x10,
+ 0xc1, 0x0b, 0x90, 0x06, 0xc1, 0x0b, 0xa6, 0x16, 0xc1, 0x0b, 0xb4, 0x0c,
+ 0xc1, 0x0b, 0xc2, 0x05, 0xc1, 0x0b, 0xcc, 0x09, 0xc1, 0x0b, 0xd6, 0x0d,
+ 0xc1, 0x0b, 0xe6, 0x83, 0x08, 0x90, 0x03, 0x01, 0x0b, 0xf0, 0x91, 0x08,
+ 0x90, 0x31, 0x87, 0x08, 0x90, 0x21, 0x97, 0x08, 0x90, 0x19, 0x8b, 0x08,
+ 0x90, 0x10, 0x44, 0x02, 0xcc, 0xc1, 0x0b, 0xfc, 0xcb, 0x25, 0x87, 0x08,
+ 0x91, 0xd8, 0x46, 0x02, 0x92, 0xc1, 0x0c, 0x12, 0xc4, 0x1c, 0xb3, 0x08,
+ 0x91, 0xc0, 0x46, 0x11, 0xf1, 0xc1, 0x0c, 0x1e, 0x44, 0x02, 0xcc, 0x41,
+ 0x0c, 0x3e, 0xc4, 0x24, 0x35, 0x00, 0xbf, 0x49, 0xc5, 0x05, 0x1b, 0x00,
+ 0xbf, 0x41, 0x15, 0xc1, 0x0c, 0x80, 0x08, 0xc1, 0x0c, 0x8c, 0x16, 0xc1,
+ 0x0c, 0x98, 0xc3, 0x05, 0x17, 0x00, 0xbf, 0x09, 0xc4, 0x16, 0x57, 0x00,
+ 0xbf, 0x00, 0x45, 0x02, 0xcb, 0xc1, 0x0c, 0xa4, 0x4a, 0x9d, 0x4a, 0x41,
+ 0x0c, 0xc5, 0x13, 0xc1, 0x0c, 0xcd, 0xc2, 0x00, 0x31, 0x00, 0xbd, 0x6b,
+ 0x01, 0x0c, 0xe9, 0xc2, 0x13, 0x65, 0x00, 0xbd, 0x5a, 0x01, 0x0c, 0xed,
+ 0xc2, 0x0e, 0x13, 0x00, 0xbd, 0x11, 0x0e, 0xc1, 0x0c, 0xf1, 0xc2, 0x01,
+ 0x0e, 0x00, 0xbd, 0x01, 0x15, 0xc1, 0x0c, 0xf9, 0xc2, 0x16, 0x11, 0x00,
+ 0xbc, 0xe1, 0xc2, 0x00, 0x64, 0x00, 0xbc, 0xd1, 0xc2, 0x47, 0x43, 0x00,
+ 0xbc, 0xc9, 0xc2, 0x01, 0xc2, 0x00, 0xbc, 0xc1, 0x12, 0xc1, 0x0d, 0x09,
+ 0xc2, 0x02, 0x1d, 0x00, 0xbc, 0xa1, 0x10, 0xc1, 0x0d, 0x11, 0x16, 0xc1,
+ 0x0d, 0x27, 0x06, 0xc1, 0x0d, 0x39, 0x05, 0xc1, 0x0d, 0x41, 0x0d, 0x41,
+ 0x0d, 0x4d, 0xca, 0xa4, 0x5c, 0x0f, 0xad, 0x30, 0xc4, 0x12, 0xf2, 0x0e,
+ 0x96, 0x98, 0xc4, 0x00, 0x48, 0x0e, 0x96, 0x43, 0x01, 0x0d, 0x59, 0xc5,
+ 0x66, 0x81, 0x0e, 0x96, 0x58, 0xc4, 0x15, 0xa7, 0x0e, 0x96, 0x3b, 0x01,
+ 0x0d, 0x5f, 0xc2, 0x22, 0x45, 0x0e, 0x96, 0x32, 0x01, 0x0d, 0x65, 0x0b,
+ 0xc1, 0x0d, 0x6b, 0xc3, 0x08, 0xde, 0x0e, 0x96, 0x22, 0x01, 0x0d, 0x77,
+ 0x0a, 0xc1, 0x0d, 0x7d, 0x19, 0xc1, 0x0d, 0x89, 0xc2, 0x01, 0x04, 0x0e,
+ 0x96, 0x50, 0x47, 0x01, 0xff, 0xc1, 0x0d, 0x93, 0xca, 0x39, 0x9c, 0x01,
+ 0x87, 0xd9, 0xce, 0x1b, 0x7a, 0x01, 0x87, 0xe9, 0xd5, 0x37, 0x00, 0x01,
+ 0x87, 0xf1, 0xcc, 0x84, 0x6c, 0x01, 0x87, 0xf8, 0xd1, 0x2f, 0x5a, 0x01,
+ 0x84, 0xd9, 0xd6, 0x2f, 0x81, 0x01, 0x84, 0xe1, 0xcd, 0x7e, 0xb5, 0x01,
+ 0x85, 0x01, 0xd4, 0x0c, 0x31, 0x01, 0x87, 0xe0, 0x45, 0x08, 0xd8, 0xc1,
+ 0x0d, 0xe9, 0xcb, 0x91, 0xff, 0x08, 0xfa, 0x21, 0xc4, 0x1c, 0xb3, 0x08,
+ 0xfa, 0x18, 0x05, 0xc1, 0x0e, 0x0d, 0x15, 0xc1, 0x0e, 0x19, 0x08, 0xc1,
+ 0x0e, 0x23, 0xca, 0x9f, 0x7a, 0x08, 0xfa, 0x59, 0x42, 0x00, 0x68, 0xc1,
+ 0x0e, 0x2f, 0xd8, 0x25, 0x7c, 0x08, 0xfa, 0x30, 0xc6, 0xd7, 0x72, 0x08,
+ 0xfa, 0x09, 0xc7, 0x45, 0xcd, 0x08, 0xf8, 0x19, 0xc5, 0xd9, 0xaa, 0x08,
+ 0xf8, 0x11, 0xc8, 0x10, 0xab, 0x08, 0xf8, 0x09, 0xcb, 0x21, 0x1a, 0x08,
+ 0xf8, 0x00, 0x87, 0x08, 0xf9, 0xf3, 0x01, 0x0e, 0x39, 0x03, 0xc1, 0x0e,
+ 0x3d, 0xc9, 0xb7, 0xd7, 0x08, 0xf9, 0xd1, 0x97, 0x08, 0xf9, 0xc3, 0x01,
+ 0x0e, 0x4b, 0x8b, 0x08, 0xf9, 0xb2, 0x01, 0x0e, 0x4f, 0x0c, 0xc1, 0x0e,
+ 0x53, 0xc2, 0x01, 0x0e, 0x08, 0xf9, 0x91, 0x15, 0xc1, 0x0e, 0x63, 0xc2,
+ 0x00, 0x4c, 0x08, 0xf9, 0x71, 0xc2, 0x00, 0x96, 0x08, 0xf9, 0x69, 0xc2,
+ 0x00, 0x9a, 0x08, 0xf9, 0x61, 0xc2, 0x1a, 0x36, 0x08, 0xf9, 0x59, 0xc2,
+ 0x00, 0x3f, 0x08, 0xf9, 0x51, 0x04, 0xc1, 0x0e, 0x73, 0x12, 0xc1, 0x0e,
+ 0x7d, 0x10, 0xc1, 0x0e, 0x87, 0x06, 0xc1, 0x0e, 0x9d, 0x16, 0xc1, 0x0e,
+ 0xab, 0x05, 0xc1, 0x0e, 0xb9, 0x09, 0xc1, 0x0e, 0xc3, 0x0d, 0xc1, 0x0e,
+ 0xcd, 0x91, 0x08, 0xf8, 0x81, 0x83, 0x08, 0xf8, 0x23, 0x01, 0x0e, 0xd7,
+ 0x87, 0x08, 0xf8, 0x71, 0x48, 0xb7, 0xd7, 0xc1, 0x0e, 0xdf, 0x97, 0x08,
+ 0xf8, 0x43, 0x01, 0x0e, 0xed, 0x8b, 0x08, 0xf8, 0x32, 0x01, 0x0e, 0xf1,
+ 0xc6, 0x03, 0x4f, 0x08, 0x86, 0x68, 0xc9, 0xb7, 0xd7, 0x08, 0x86, 0x11,
+ 0x03, 0xc1, 0x0e, 0xf5, 0x91, 0x08, 0x85, 0xb9, 0x87, 0x08, 0x85, 0xa9,
+ 0x97, 0x08, 0x85, 0x9b, 0x01, 0x0f, 0x01, 0x8b, 0x08, 0x85, 0x8a, 0x01,
+ 0x0f, 0x05, 0x46, 0x02, 0x92, 0xc1, 0x0f, 0x09, 0xc4, 0x1c, 0xb3, 0x08,
+ 0x86, 0x00, 0xcb, 0x25, 0x87, 0x08, 0x85, 0xf1, 0x44, 0x02, 0xcc, 0x41,
+ 0x0f, 0x15, 0xc2, 0x01, 0x0e, 0x08, 0x85, 0x79, 0x15, 0xc1, 0x0f, 0x2d,
+ 0xc2, 0x06, 0x8c, 0x08, 0x85, 0x59, 0xc2, 0x00, 0x96, 0x08, 0x85, 0x51,
+ 0x14, 0xc1, 0x0f, 0x3d, 0xc2, 0x1a, 0x36, 0x08, 0x85, 0x41, 0xc2, 0x00,
+ 0x3f, 0x08, 0x85, 0x39, 0x04, 0xc1, 0x0f, 0x47, 0x12, 0xc1, 0x0f, 0x51,
+ 0x10, 0xc1, 0x0f, 0x5b, 0x06, 0xc1, 0x0f, 0x71, 0x16, 0xc1, 0x0f, 0x7f,
+ 0x0c, 0xc1, 0x0f, 0x8d, 0x05, 0xc1, 0x0f, 0x97, 0x09, 0xc1, 0x0f, 0xa1,
+ 0x0d, 0xc1, 0x0f, 0xab, 0x83, 0x08, 0x84, 0x1b, 0x01, 0x0f, 0xb5, 0x91,
+ 0x08, 0x84, 0x59, 0x87, 0x08, 0x84, 0x49, 0x97, 0x08, 0x84, 0x3b, 0x01,
+ 0x0f, 0xc1, 0x8b, 0x08, 0x84, 0x2a, 0x01, 0x0f, 0xc5, 0xc4, 0xe5, 0x23,
+ 0x05, 0x49, 0x79, 0xc3, 0xeb, 0x9d, 0x05, 0x49, 0x70, 0xc5, 0xde, 0xd2,
+ 0x05, 0x49, 0x63, 0x01, 0x0f, 0xc9, 0xc6, 0xcf, 0x8e, 0x05, 0x49, 0x58,
+ 0x91, 0x05, 0x49, 0x51, 0x87, 0x05, 0x49, 0x3b, 0x01, 0x0f, 0xcf, 0x97,
+ 0x05, 0x49, 0x42, 0x01, 0x0f, 0xd3, 0x11, 0xc1, 0x0f, 0xd7, 0x8b, 0x05,
+ 0x49, 0x21, 0x83, 0x05, 0x49, 0x11, 0xc2, 0x00, 0x2e, 0x05, 0x49, 0x09,
+ 0xc2, 0x06, 0x8c, 0x05, 0x49, 0x01, 0x0a, 0xc1, 0x0f, 0xdf, 0x16, 0xc1,
+ 0x0f, 0xe9, 0xc2, 0x00, 0x4c, 0x05, 0x48, 0xe9, 0xc2, 0x00, 0x96, 0x05,
+ 0x48, 0xe1, 0xc2, 0x1a, 0x36, 0x05, 0x48, 0xd9, 0xc2, 0x00, 0x9a, 0x05,
+ 0x48, 0xd1, 0xc2, 0x02, 0x1d, 0x05, 0x48, 0xc9, 0xc2, 0x0c, 0x25, 0x05,
+ 0x48, 0xc1, 0xc2, 0x00, 0x3f, 0x05, 0x48, 0xb9, 0x12, 0xc1, 0x0f, 0xf3,
+ 0x10, 0xc1, 0x0f, 0xfd, 0xc2, 0x05, 0x5c, 0x05, 0x48, 0x81, 0x15, 0xc1,
+ 0x10, 0x0d, 0xc2, 0x07, 0x69, 0x05, 0x48, 0x61, 0x0d, 0x41, 0x10, 0x17,
+ 0xc4, 0x24, 0x35, 0x05, 0x48, 0x49, 0xc5, 0x05, 0x1b, 0x05, 0x48, 0x41,
+ 0x15, 0xc1, 0x10, 0x21, 0x08, 0xc1, 0x10, 0x2d, 0x16, 0xc1, 0x10, 0x39,
+ 0xc3, 0x05, 0x17, 0x05, 0x48, 0x09, 0xc4, 0x16, 0x57, 0x05, 0x48, 0x00,
+ 0x45, 0x02, 0xcb, 0xc1, 0x10, 0x45, 0x42, 0x00, 0x58, 0xc1, 0x10, 0x69,
+ 0x4b, 0x6f, 0x71, 0xc1, 0x10, 0x75, 0xce, 0x71, 0x58, 0x00, 0x66, 0xb1,
+ 0x46, 0x08, 0xd7, 0x41, 0x10, 0x9b, 0xc4, 0xe5, 0x5f, 0x0f, 0xcc, 0xc1,
+ 0x4b, 0x9a, 0x8c, 0x41, 0x10, 0xbf, 0xc4, 0x01, 0xa7, 0x0f, 0xb0, 0xbb,
+ 0x01, 0x11, 0x23, 0xd9, 0x20, 0x4b, 0x0f, 0xb1, 0xe8, 0xc6, 0xc0, 0x9d,
+ 0x0f, 0xd4, 0xb1, 0xc5, 0x68, 0x89, 0x0f, 0x9c, 0xb0, 0x14, 0xc1, 0x11,
+ 0x29, 0x16, 0xc1, 0x11, 0x35, 0x10, 0xc1, 0x11, 0x53, 0x06, 0xc1, 0x11,
+ 0x6c, 0x15, 0xc1, 0x11, 0x80, 0x04, 0xc1, 0x11, 0x96, 0x0a, 0xc1, 0x11,
+ 0xa0, 0x03, 0xc1, 0x11, 0xaa, 0xc2, 0x00, 0x4c, 0x0b, 0x7a, 0x11, 0x1c,
+ 0xc1, 0x11, 0xb4, 0x43, 0x73, 0xf9, 0xc1, 0x11, 0xc6, 0x09, 0xc1, 0x11,
+ 0xe2, 0xc2, 0x23, 0xe3, 0x0b, 0x79, 0x39, 0x13, 0xc1, 0x11, 0xea, 0xc2,
+ 0x06, 0x6b, 0x0b, 0x78, 0xf1, 0x0e, 0xc1, 0x11, 0xf4, 0x18, 0xc1, 0x12,
+ 0x02, 0xc2, 0x01, 0xa7, 0x0b, 0x78, 0x39, 0x0f, 0xc1, 0x12, 0x0c, 0x12,
+ 0x41, 0x12, 0x16, 0xc5, 0x00, 0x34, 0x0b, 0x7c, 0x91, 0xc5, 0x03, 0x50,
+ 0x0b, 0x7c, 0x89, 0xc9, 0x6a, 0xc8, 0x0b, 0x7c, 0x81, 0xc5, 0x00, 0x47,
+ 0x0b, 0x7c, 0x78, 0x97, 0x0b, 0x7b, 0x53, 0x01, 0x12, 0x20, 0x8b, 0x0b,
+ 0x7b, 0x0b, 0x01, 0x12, 0x41, 0x87, 0x0b, 0x7a, 0xeb, 0x01, 0x12, 0x65,
+ 0xc2, 0x00, 0x18, 0x0b, 0x7c, 0x19, 0x91, 0x0b, 0x7a, 0xcb, 0x01, 0x12,
+ 0x7b, 0x9b, 0x0b, 0x7b, 0x8b, 0x01, 0x12, 0x8b, 0x90, 0x0b, 0x7b, 0xeb,
+ 0x01, 0x12, 0x95, 0x83, 0x0b, 0x7a, 0xa3, 0x01, 0x12, 0x99, 0xca, 0xa3,
+ 0x1c, 0x0b, 0x7b, 0xc3, 0x01, 0x12, 0xb9, 0x99, 0x0b, 0x7a, 0xe2, 0x01,
+ 0x12, 0xbd, 0x49, 0xb4, 0x2f, 0xc1, 0x12, 0xc1, 0xca, 0x9d, 0xe0, 0x0b,
+ 0x7a, 0x89, 0xd6, 0x2e, 0x63, 0x0b, 0x7a, 0x78, 0xcb, 0x9b, 0x73, 0x01,
+ 0x22, 0x49, 0xcc, 0x8a, 0x18, 0x01, 0x22, 0x40, 0xc5, 0xc3, 0x66, 0x0f,
+ 0xa9, 0x61, 0xc5, 0x32, 0xae, 0x0f, 0x9d, 0x21, 0xc5, 0x02, 0xca, 0x00,
+ 0x05, 0xa9, 0xc2, 0x00, 0x34, 0x0f, 0xcd, 0x00, 0xc3, 0x05, 0xe3, 0x00,
+ 0x05, 0xb9, 0xe0, 0x01, 0x47, 0x0f, 0xde, 0x10, 0x42, 0x00, 0x27, 0xc1,
+ 0x12, 0xcd, 0xce, 0x6c, 0xea, 0x01, 0x10, 0x98, 0xc4, 0xd2, 0xa6, 0x0f,
+ 0xae, 0xa9, 0xc4, 0x5d, 0x35, 0x0f, 0xa5, 0xe9, 0xc3, 0x25, 0x04, 0x0f,
+ 0xb4, 0x80, 0x43, 0x01, 0x0a, 0xc1, 0x12, 0xdc, 0x45, 0xdb, 0xcb, 0x41,
+ 0x13, 0x18, 0xce, 0x6f, 0x7c, 0x0b, 0x74, 0xd1, 0x15, 0xc1, 0x13, 0x2a,
+ 0xc9, 0x11, 0x47, 0x0b, 0x74, 0xc1, 0x05, 0xc1, 0x13, 0x36, 0x46, 0x08,
+ 0xd7, 0xc1, 0x13, 0x42, 0x47, 0x37, 0x49, 0x41, 0x13, 0x69, 0xc9, 0xb0,
+ 0xd8, 0x01, 0x1e, 0xc9, 0x16, 0xc1, 0x13, 0x7f, 0x4a, 0xa5, 0xd8, 0xc1,
+ 0x13, 0x91, 0xcf, 0x6c, 0x5d, 0x01, 0x1e, 0x99, 0xc5, 0x1f, 0x9c, 0x01,
+ 0x1e, 0x88, 0x4a, 0x9e, 0x6c, 0xc1, 0x13, 0x9d, 0x46, 0x08, 0xd7, 0xc1,
+ 0x13, 0xa5, 0x51, 0x57, 0x1f, 0x41, 0x13, 0xc3, 0x48, 0xc2, 0xc3, 0xc1,
+ 0x13, 0xd3, 0x4d, 0x78, 0xb7, 0x41, 0x13, 0xe3, 0xc2, 0x02, 0x58, 0x01,
+ 0x12, 0xf1, 0xc5, 0x00, 0x55, 0x01, 0x11, 0x0b, 0x01, 0x13, 0xef, 0xd4,
+ 0x3a, 0xe6, 0x01, 0x4c, 0xe8, 0xc4, 0x16, 0x57, 0x05, 0x5f, 0x81, 0xc4,
+ 0x24, 0x35, 0x05, 0x5f, 0xc9, 0xc3, 0x05, 0x17, 0x05, 0x5f, 0x89, 0x16,
+ 0xc1, 0x13, 0xf3, 0x08, 0xc1, 0x13, 0xff, 0x15, 0xc1, 0x14, 0x0b, 0xc5,
+ 0x05, 0x1b, 0x05, 0x5f, 0xc0, 0xc8, 0xc1, 0x63, 0x05, 0x5f, 0x69, 0xc3,
+ 0x81, 0xeb, 0x05, 0x57, 0x91, 0xcb, 0x91, 0x9c, 0x05, 0x57, 0x88, 0x4a,
+ 0x6f, 0x72, 0xc1, 0x14, 0x17, 0xc5, 0x21, 0x27, 0x05, 0x57, 0xb0, 0x46,
+ 0x02, 0x00, 0xc1, 0x14, 0x47, 0xc7, 0xc7, 0x05, 0x05, 0x5f, 0x60, 0xc2,
+ 0x00, 0x3a, 0x05, 0x57, 0x81, 0xc2, 0x05, 0x1b, 0x05, 0x5f, 0x58, 0x00,
+ 0xc1, 0x14, 0xb6, 0xc3, 0x1b, 0x40, 0x0f, 0xb7, 0x19, 0xcf, 0x62, 0xfd,
+ 0x0f, 0xcd, 0xe0, 0xc3, 0x01, 0xcc, 0x01, 0x37, 0x83, 0x01, 0x14, 0xc2,
+ 0xc5, 0xdb, 0xa3, 0x0f, 0xaf, 0xd8, 0x00, 0x41, 0x14, 0xc6, 0x45, 0x05,
+ 0x49, 0xc1, 0x14, 0xd2, 0x00, 0x41, 0x14, 0xde, 0xc2, 0x01, 0xb6, 0x01,
+ 0x15, 0x39, 0xcd, 0x7d, 0x22, 0x0f, 0xc9, 0xd8, 0x49, 0x83, 0x73, 0xc1,
+ 0x14, 0xfa, 0xcd, 0x81, 0x59, 0x01, 0x1c, 0x69, 0xc4, 0x47, 0x04, 0x0f,
+ 0xb4, 0xe8, 0x16, 0xc1, 0x15, 0x04, 0x15, 0xc1, 0x15, 0x16, 0xce, 0x6c,
+ 0x7a, 0x08, 0xb3, 0x3b, 0x01, 0x15, 0x25, 0xcd, 0x7c, 0x38, 0x08, 0xb3,
+ 0x0b, 0x01, 0x15, 0x2b, 0xc5, 0x09, 0x6d, 0x00, 0xc0, 0x03, 0x01, 0x15,
+ 0x31, 0x06, 0xc1, 0x15, 0x37, 0x47, 0x01, 0xff, 0xc1, 0x15, 0x43, 0x08,
+ 0xc1, 0x15, 0xce, 0xcf, 0x66, 0x18, 0x00, 0xc0, 0x71, 0xc6, 0xd2, 0xd4,
+ 0x00, 0xc0, 0x51, 0x47, 0xc7, 0x36, 0xc1, 0x15, 0xe0, 0x42, 0x00, 0xea,
+ 0xc1, 0x15, 0xec, 0xc8, 0x23, 0xac, 0x00, 0xc0, 0x08, 0x42, 0x00, 0x32,
+ 0xc1, 0x15, 0xf8, 0xcc, 0x8a, 0x78, 0x0f, 0xc8, 0x88, 0xc5, 0x10, 0x15,
+ 0x0f, 0xa1, 0xa8, 0xd0, 0x5e, 0xff, 0x0f, 0x9c, 0x89, 0xc4, 0x28, 0x52,
+ 0x0f, 0xcb, 0x70, 0xc3, 0x6f, 0x54, 0x0f, 0xa7, 0xa1, 0xdd, 0x11, 0xc4,
+ 0x0f, 0xa7, 0x90, 0x47, 0xca, 0x00, 0xc1, 0x16, 0x04, 0x45, 0x60, 0x2f,
+ 0xc1, 0x16, 0x32, 0x4a, 0xa9, 0x02, 0xc1, 0x16, 0x70, 0x15, 0xc1, 0x16,
+ 0x82, 0x4e, 0x6d, 0x06, 0xc1, 0x16, 0x8e, 0x08, 0xc1, 0x16, 0xa0, 0x42,
+ 0x00, 0x47, 0xc1, 0x16, 0xac, 0x45, 0x00, 0xcd, 0x41, 0x16, 0xb8, 0xc4,
+ 0x12, 0xf2, 0x0e, 0x97, 0x98, 0xc4, 0x00, 0x48, 0x0e, 0x97, 0x43, 0x01,
+ 0x16, 0xd0, 0xc5, 0x66, 0x81, 0x0e, 0x97, 0x58, 0xc4, 0x15, 0xa7, 0x0e,
+ 0x97, 0x3b, 0x01, 0x16, 0xd6, 0xc2, 0x22, 0x45, 0x0e, 0x97, 0x32, 0x01,
+ 0x16, 0xdc, 0x0b, 0xc1, 0x16, 0xe2, 0xc3, 0x08, 0xde, 0x0e, 0x97, 0x22,
+ 0x01, 0x16, 0xee, 0x0a, 0xc1, 0x16, 0xf4, 0x19, 0xc1, 0x17, 0x00, 0xc2,
+ 0x01, 0x04, 0x0e, 0x97, 0x50, 0xce, 0x72, 0xfc, 0x08, 0xf7, 0xc1, 0xca,
+ 0xa5, 0x06, 0x08, 0xf7, 0xb9, 0x4b, 0x6f, 0x71, 0xc1, 0x17, 0x0a, 0xc5,
+ 0xd8, 0x26, 0x08, 0xf7, 0x91, 0x47, 0x01, 0xff, 0x41, 0x17, 0x1a, 0x46,
+ 0x08, 0xd7, 0xc1, 0x17, 0x76, 0x14, 0xc1, 0x17, 0x9a, 0x18, 0xc1, 0x17,
+ 0xa6, 0x45, 0x02, 0xcb, 0xc1, 0x17, 0xb2, 0x47, 0x01, 0xff, 0x41, 0x17,
+ 0xd0, 0x15, 0xc1, 0x18, 0x37, 0x4b, 0x6f, 0x71, 0xc1, 0x18, 0x43, 0x47,
+ 0x01, 0xff, 0xc1, 0x18, 0x59, 0xc9, 0xb5, 0x61, 0x08, 0xe3, 0x89, 0xc9,
+ 0x16, 0xa8, 0x08, 0xe3, 0x80, 0x4c, 0x26, 0x19, 0xc1, 0x18, 0xb9, 0xcf,
+ 0x23, 0xed, 0x01, 0x35, 0x29, 0xc4, 0x02, 0xcb, 0x01, 0x32, 0x10, 0x45,
+ 0x02, 0xcb, 0xc1, 0x18, 0xc5, 0x47, 0x01, 0xff, 0xc1, 0x18, 0xd7, 0x4b,
+ 0x6f, 0x71, 0xc1, 0x19, 0x40, 0xce, 0x74, 0xe6, 0x00, 0x6a, 0xb9, 0x49,
+ 0x54, 0xdd, 0xc1, 0x19, 0x66, 0x06, 0xc1, 0x19, 0x72, 0x47, 0x37, 0x49,
+ 0x41, 0x19, 0x7e, 0x4c, 0x11, 0x33, 0xc1, 0x19, 0x8a, 0x47, 0x37, 0x49,
+ 0xc1, 0x19, 0xa8, 0x52, 0x4a, 0x46, 0xc1, 0x19, 0xbb, 0x47, 0x01, 0xff,
+ 0xc1, 0x19, 0xc7, 0xc7, 0xc8, 0xe1, 0x08, 0x56, 0x40, 0xc7, 0xc7, 0xe5,
+ 0x0f, 0xab, 0xd1, 0x43, 0x01, 0xf4, 0xc1, 0x1a, 0x2c, 0x45, 0x01, 0xac,
+ 0xc1, 0x1a, 0x38, 0xd7, 0x29, 0x37, 0x0f, 0xa3, 0x58, 0xcb, 0x0b, 0xfc,
+ 0x00, 0x42, 0xf1, 0xcf, 0x64, 0x0b, 0x00, 0x42, 0xd9, 0xd1, 0x50, 0x48,
+ 0x00, 0x42, 0xd1, 0xd0, 0x5c, 0xbf, 0x00, 0x42, 0xc9, 0x47, 0x01, 0xff,
+ 0x41, 0x1a, 0x44, 0x0e, 0xc1, 0x1a, 0x64, 0x15, 0xc1, 0x1a, 0x70, 0xd1,
+ 0x56, 0x0f, 0x08, 0x8b, 0xa0, 0xc5, 0x9a, 0xc9, 0x0f, 0x81, 0x51, 0x19,
+ 0xc1, 0x1a, 0x7c, 0x07, 0xc1, 0x1a, 0x8e, 0x15, 0xc1, 0x1a, 0x9a, 0x10,
+ 0xc1, 0x1a, 0xb8, 0xca, 0xa4, 0xb6, 0x0f, 0x80, 0x21, 0xcc, 0x8e, 0xa4,
+ 0x0f, 0x80, 0x29, 0x11, 0xc1, 0x1a, 0xc4, 0x16, 0xc1, 0x1a, 0xd0, 0x08,
+ 0xc1, 0x1a, 0xdc, 0xc4, 0xe8, 0xb7, 0x0f, 0x81, 0x11, 0xcd, 0x79, 0x46,
+ 0x0f, 0x81, 0x29, 0x42, 0x02, 0x1d, 0xc1, 0x1a, 0xe8, 0xc6, 0xd8, 0x4a,
+ 0x0f, 0x81, 0x40, 0x43, 0x00, 0x9b, 0xc1, 0x1a, 0xf4, 0x00, 0x41, 0x1b,
+ 0x07, 0x42, 0x0c, 0x20, 0xc1, 0x1b, 0x19, 0xc3, 0x50, 0x8a, 0x01, 0x15,
+ 0xc1, 0xc3, 0x0d, 0x99, 0x01, 0x14, 0x62, 0x01, 0x1b, 0x25, 0xcc, 0x25,
+ 0x86, 0x08, 0x95, 0x49, 0x47, 0x01, 0xff, 0x41, 0x1b, 0x29, 0xc4, 0x24,
+ 0x35, 0x0b, 0x53, 0x49, 0xc5, 0x05, 0x1b, 0x0b, 0x53, 0x41, 0x15, 0xc1,
+ 0x1b, 0x85, 0x08, 0xc1, 0x1b, 0x91, 0x16, 0xc1, 0x1b, 0x9d, 0xc3, 0x05,
+ 0x17, 0x0b, 0x53, 0x09, 0xc4, 0x16, 0x57, 0x0b, 0x53, 0x00, 0xc2, 0x13,
+ 0x31, 0x0b, 0x52, 0xf1, 0xc3, 0x00, 0x5b, 0x0b, 0x52, 0xa9, 0x83, 0x0b,
+ 0x52, 0x00, 0x8b, 0x0b, 0x52, 0xe9, 0x91, 0x0b, 0x52, 0x98, 0x8b, 0x0b,
+ 0x52, 0xe1, 0x91, 0x0b, 0x52, 0x48, 0x90, 0x0b, 0x52, 0xd0, 0x91, 0x0b,
+ 0x52, 0xc9, 0xc4, 0xe6, 0x43, 0x0b, 0x52, 0x61, 0xc3, 0x48, 0x96, 0x0b,
+ 0x52, 0x40, 0x83, 0x0b, 0x52, 0xb0, 0x91, 0x0b, 0x52, 0x89, 0x8e, 0x0b,
+ 0x52, 0x68, 0x83, 0x0b, 0x52, 0x81, 0xc2, 0x00, 0x0a, 0x0b, 0x52, 0x38,
+ 0xc2, 0x01, 0x5b, 0x0b, 0x52, 0x79, 0xc2, 0x03, 0xab, 0x0b, 0x52, 0x08,
+ 0xc3, 0x7a, 0x15, 0x0b, 0x52, 0x71, 0xc2, 0x07, 0x6e, 0x0b, 0x52, 0x18,
+ 0x8b, 0x0b, 0x52, 0x50, 0x4d, 0x7f, 0x9f, 0xc1, 0x1b, 0xa9, 0xce, 0x6d,
+ 0x30, 0x05, 0x53, 0xd9, 0x15, 0xc1, 0x1b, 0xb5, 0x03, 0xc1, 0x1b, 0xc1,
+ 0xc9, 0x0d, 0xd7, 0x00, 0x81, 0xb9, 0x42, 0x02, 0x52, 0xc1, 0x1b, 0xcd,
+ 0xce, 0x73, 0x0a, 0x00, 0x82, 0x51, 0x55, 0x32, 0xe6, 0xc1, 0x1b, 0xd9,
+ 0xd4, 0x3a, 0xfa, 0x00, 0x84, 0x79, 0x4a, 0x9d, 0x7c, 0x41, 0x1b, 0xf7,
+ 0x03, 0xc1, 0x1c, 0x03, 0xc8, 0xbb, 0x43, 0x00, 0x82, 0x61, 0xc9, 0xb4,
+ 0x6e, 0x00, 0x82, 0x69, 0xc8, 0xbf, 0xbb, 0x00, 0x82, 0x79, 0x45, 0x4b,
+ 0x8a, 0x41, 0x1c, 0x0f, 0xc4, 0x16, 0x57, 0x00, 0x84, 0x81, 0xc3, 0x05,
+ 0x17, 0x00, 0x84, 0x89, 0x16, 0xc1, 0x1c, 0x1b, 0x08, 0xc1, 0x1c, 0x27,
+ 0x15, 0xc1, 0x1c, 0x33, 0xc5, 0x05, 0x1b, 0x00, 0x84, 0xc1, 0xc4, 0x24,
+ 0x35, 0x00, 0x84, 0xc8, 0x83, 0x00, 0x81, 0x0b, 0x01, 0x1c, 0x3f, 0x0d,
+ 0xc1, 0x1c, 0x49, 0x16, 0xc1, 0x1c, 0x56, 0x15, 0xc1, 0x1c, 0x67, 0x09,
+ 0xc1, 0x1c, 0x7b, 0x10, 0xc1, 0x1c, 0x8b, 0x05, 0xc1, 0x1c, 0x9f, 0x0c,
+ 0xc1, 0x1c, 0xa9, 0x06, 0xc1, 0x1c, 0xb3, 0x12, 0xc1, 0x1c, 0xc1, 0x04,
+ 0xc1, 0x1c, 0xcb, 0x0f, 0xc1, 0x1c, 0xd5, 0xc2, 0x1a, 0x36, 0x00, 0x80,
+ 0xd1, 0x14, 0xc1, 0x1c, 0xdf, 0x0e, 0xc1, 0x1c, 0xe9, 0x19, 0xc1, 0x1c,
+ 0xf3, 0xc2, 0x01, 0x0e, 0x00, 0x80, 0xf9, 0x8b, 0x00, 0x81, 0x1b, 0x01,
+ 0x1c, 0xfd, 0x97, 0x00, 0x81, 0x2b, 0x01, 0x1d, 0x01, 0x87, 0x00, 0x81,
+ 0x3b, 0x01, 0x1d, 0x05, 0x91, 0x00, 0x81, 0x49, 0x48, 0xb7, 0xd7, 0x41,
+ 0x1d, 0x0b, 0xc2, 0x08, 0xae, 0x05, 0x53, 0xb1, 0xc2, 0xcd, 0xe4, 0x05,
+ 0x53, 0xa9, 0xc3, 0xec, 0x84, 0x05, 0x53, 0xa0, 0xc4, 0x24, 0x35, 0x05,
+ 0x4f, 0xc9, 0xc5, 0x05, 0x1b, 0x05, 0x4f, 0xc1, 0x15, 0xc1, 0x1d, 0x19,
+ 0x08, 0xc1, 0x1d, 0x25, 0x16, 0xc1, 0x1d, 0x31, 0xc3, 0x05, 0x17, 0x05,
+ 0x4f, 0x89, 0xc4, 0x16, 0x57, 0x05, 0x4f, 0x80, 0xc5, 0xdf, 0x90, 0x00,
+ 0x83, 0x19, 0xc6, 0xd9, 0x16, 0x00, 0x83, 0x20, 0x83, 0x00, 0x81, 0x61,
+ 0x8b, 0x00, 0x81, 0x92, 0x01, 0x1d, 0x3d, 0x8b, 0x00, 0x81, 0x70, 0x97,
+ 0x00, 0x81, 0x80, 0xc6, 0x03, 0x4f, 0x00, 0x81, 0xa8, 0xc2, 0x23, 0x68,
+ 0x00, 0x81, 0x99, 0x91, 0x00, 0x81, 0xa0, 0x94, 0x00, 0x82, 0xb3, 0x01,
+ 0x1d, 0x46, 0x8e, 0x00, 0x82, 0xc2, 0x01, 0x1d, 0x4a, 0xcc, 0x83, 0x88,
+ 0x00, 0x83, 0x11, 0x44, 0x03, 0x4c, 0x41, 0x1d, 0x4e, 0xc2, 0x0c, 0xfe,
+ 0x00, 0x83, 0x39, 0xc2, 0x0e, 0x78, 0x00, 0x83, 0x40, 0xc2, 0x09, 0x06,
+ 0x00, 0x83, 0x91, 0x97, 0x00, 0x83, 0x99, 0xc2, 0x01, 0xe6, 0x00, 0x83,
+ 0xa0, 0x46, 0x2e, 0x47, 0xc1, 0x1d, 0x61, 0x4a, 0xa3, 0x62, 0x41, 0x1d,
+ 0x79, 0xc2, 0x0a, 0x20, 0x00, 0x82, 0x11, 0xc4, 0x05, 0xde, 0x00, 0x82,
+ 0x18, 0xc3, 0x08, 0xde, 0x00, 0x82, 0x21, 0xc3, 0x0d, 0x8f, 0x00, 0x82,
+ 0x28, 0xc2, 0x22, 0x45, 0x00, 0x82, 0x31, 0xc4, 0x15, 0xa7, 0x00, 0x82,
+ 0x38, 0xc9, 0xb0, 0xbd, 0x0f, 0xd4, 0x31, 0xca, 0xa2, 0x2c, 0x0f, 0xd5,
+ 0xd0, 0x46, 0xd6, 0x28, 0xc1, 0x1d, 0x8b, 0xc4, 0x01, 0xa7, 0x0f, 0xb0,
+ 0x80, 0x15, 0xc1, 0x1d, 0xc2, 0x47, 0x01, 0xff, 0xc1, 0x1d, 0xcc, 0xce,
+ 0x6c, 0xdc, 0x08, 0xa2, 0xe9, 0xd0, 0x5a, 0x8f, 0x08, 0xa2, 0xd9, 0x06,
+ 0xc1, 0x1e, 0x33, 0xd1, 0x56, 0x0f, 0x08, 0xa2, 0x79, 0xca, 0xa0, 0x42,
+ 0x08, 0xa2, 0x71, 0xc5, 0x00, 0xea, 0x08, 0xa2, 0x69, 0xc2, 0x00, 0xeb,
+ 0x08, 0xa2, 0x49, 0x4b, 0x6f, 0x71, 0x41, 0x1e, 0x45, 0xcb, 0x93, 0xac,
+ 0x01, 0x05, 0x51, 0x48, 0xc1, 0x83, 0xc1, 0x1e, 0x65, 0x45, 0x16, 0xb7,
+ 0xc1, 0x1e, 0x84, 0xc4, 0x03, 0x5d, 0x00, 0x00, 0x50, 0xc4, 0x00, 0xcd,
+ 0x01, 0x5c, 0x91, 0xc5, 0x00, 0x47, 0x01, 0x5c, 0x98, 0x48, 0x01, 0xe9,
+ 0xc1, 0x1e, 0x90, 0x48, 0x1f, 0x70, 0xc1, 0x1e, 0xc0, 0xcb, 0x4f, 0x03,
+ 0x00, 0x00, 0xa9, 0x49, 0x20, 0x6a, 0x41, 0x1e, 0xde, 0xe0, 0x02, 0x87,
+ 0x01, 0x15, 0x78, 0x43, 0x08, 0x28, 0xc1, 0x1e, 0xf0, 0x42, 0x05, 0x08,
+ 0x41, 0x1e, 0xfc, 0xc9, 0x09, 0xde, 0x01, 0x13, 0xc9, 0x43, 0x00, 0x92,
+ 0x41, 0x1f, 0x02, 0xcc, 0x06, 0xfb, 0x01, 0x13, 0xc1, 0x43, 0x00, 0x92,
+ 0x41, 0x1f, 0x0e, 0xc4, 0x24, 0x35, 0x0f, 0x27, 0xc9, 0xc5, 0x05, 0x1b,
+ 0x0f, 0x27, 0xc1, 0x15, 0xc1, 0x1f, 0x1a, 0x08, 0xc1, 0x1f, 0x26, 0x16,
+ 0xc1, 0x1f, 0x32, 0xc3, 0x05, 0x17, 0x0f, 0x27, 0x89, 0xc4, 0x16, 0x57,
+ 0x0f, 0x27, 0x80, 0xc5, 0xdc, 0x11, 0x0f, 0x27, 0x79, 0xc4, 0xe7, 0x03,
+ 0x0f, 0x27, 0x71, 0xc5, 0xde, 0xb4, 0x0f, 0x27, 0x69, 0xc5, 0xe0, 0x62,
+ 0x0f, 0x27, 0x61, 0xc4, 0xe9, 0x67, 0x0f, 0x27, 0x58, 0x87, 0x0f, 0x27,
+ 0x23, 0x01, 0x1f, 0x3e, 0x97, 0x0f, 0x26, 0xfb, 0x01, 0x1f, 0x59, 0x8b,
+ 0x0f, 0x26, 0xd3, 0x01, 0x1f, 0x69, 0x83, 0x0f, 0x26, 0xab, 0x01, 0x1f,
+ 0x7b, 0x91, 0x0f, 0x26, 0x82, 0x01, 0x1f, 0x8d, 0x4b, 0x6f, 0x71, 0xc1,
+ 0x1f, 0x9f, 0xca, 0x58, 0xd5, 0x08, 0xcf, 0x19, 0x45, 0x02, 0xcb, 0xc1,
+ 0x1f, 0xc8, 0x47, 0x01, 0xff, 0x41, 0x1f, 0xd8, 0x47, 0x37, 0x49, 0xc1,
+ 0x20, 0x3b, 0xd5, 0x37, 0x3f, 0x08, 0x45, 0x59, 0x47, 0x01, 0xff, 0x41,
+ 0x20, 0x4c, 0x00, 0xc1, 0x20, 0xb5, 0xd6, 0x2f, 0x29, 0x0f, 0xb7, 0x50,
+ 0xcc, 0x25, 0x70, 0x01, 0x15, 0xa0, 0xe0, 0x05, 0xc7, 0x0f, 0xaa, 0x21,
+ 0x0e, 0xc1, 0x20, 0xc7, 0x4d, 0x78, 0x0e, 0x41, 0x20, 0xd3, 0xca, 0xa1,
+ 0x3c, 0x01, 0x1b, 0xd9, 0xd2, 0x4c, 0xe0, 0x01, 0x17, 0x53, 0x01, 0x20,
+ 0xd9, 0x15, 0xc1, 0x20, 0xdf, 0x16, 0xc1, 0x20, 0xeb, 0x03, 0xc1, 0x20,
+ 0xf7, 0xcc, 0x06, 0xfb, 0x01, 0x13, 0x79, 0xc9, 0x09, 0xde, 0x01, 0x13,
+ 0x71, 0x43, 0x00, 0x92, 0xc1, 0x21, 0x0f, 0xcc, 0x8d, 0x30, 0x01, 0x13,
+ 0x11, 0xcb, 0x6b, 0x26, 0x01, 0x11, 0x30, 0x43, 0x07, 0x43, 0xc1, 0x21,
+ 0x1b, 0xc4, 0xe5, 0xb7, 0x0f, 0xa6, 0x9a, 0x01, 0x21, 0x25, 0xc5, 0x02,
+ 0xca, 0x0f, 0xb5, 0x58, 0xc5, 0xdc, 0x39, 0x0f, 0xab, 0x91, 0xca, 0xa9,
+ 0xf2, 0x0f, 0xb5, 0xb8, 0xc9, 0xb6, 0x54, 0x00, 0x04, 0x19, 0xc7, 0xca,
+ 0x93, 0x0f, 0xb5, 0x98, 0x99, 0x0f, 0x09, 0x61, 0x87, 0x0f, 0x09, 0x53,
+ 0x01, 0x21, 0x2b, 0x91, 0x0f, 0x09, 0x43, 0x01, 0x21, 0x2f, 0x97, 0x0f,
+ 0x09, 0x39, 0x8b, 0x0f, 0x09, 0x31, 0x83, 0x0f, 0x09, 0x23, 0x01, 0x21,
+ 0x33, 0x14, 0xc1, 0x21, 0x37, 0xc2, 0x07, 0x69, 0x0f, 0x09, 0x11, 0x12,
+ 0xc1, 0x21, 0x41, 0x0f, 0xc1, 0x21, 0x4b, 0xc2, 0x01, 0x0e, 0x0f, 0x08,
+ 0x23, 0x01, 0x21, 0x55, 0x10, 0xc1, 0x21, 0x59, 0x06, 0xc1, 0x21, 0x83,
+ 0x1a, 0xc1, 0x21, 0x8d, 0xc2, 0x1a, 0x36, 0x0f, 0x08, 0xc1, 0xc2, 0x0e,
+ 0x13, 0x0f, 0x08, 0xb9, 0xc2, 0x01, 0xa7, 0x0f, 0x08, 0xa9, 0x16, 0xc1,
+ 0x21, 0x97, 0xc2, 0x06, 0x8c, 0x0f, 0x08, 0x91, 0xc2, 0x06, 0x6b, 0x0f,
+ 0x08, 0x71, 0xc2, 0x05, 0x5c, 0x0f, 0x08, 0x59, 0xc2, 0x0e, 0xe5, 0x0f,
+ 0x08, 0x51, 0xc2, 0x00, 0x96, 0x0f, 0x08, 0x49, 0xc2, 0x00, 0x2e, 0x0f,
+ 0x08, 0x40, 0xc4, 0x15, 0xa7, 0x0f, 0x0a, 0x39, 0xc2, 0x22, 0x45, 0x0f,
+ 0x0a, 0x30, 0xc3, 0x0d, 0x8f, 0x0f, 0x0a, 0x29, 0xc3, 0x08, 0xde, 0x0f,
+ 0x0a, 0x20, 0xc4, 0x05, 0xde, 0x0f, 0x0a, 0x19, 0xc2, 0x0a, 0x20, 0x0f,
+ 0x0a, 0x10, 0xc5, 0xe1, 0xc5, 0x0f, 0x09, 0xe1, 0x44, 0x16, 0x5c, 0x41,
+ 0x21, 0xa7, 0x1f, 0xc1, 0x21, 0xc5, 0x1e, 0x41, 0x22, 0x05, 0x16, 0xc1,
+ 0x22, 0x29, 0xd2, 0x4c, 0xbc, 0x01, 0x24, 0xd1, 0x07, 0xc1, 0x22, 0x3b,
+ 0x15, 0xc1, 0x22, 0x47, 0x08, 0x41, 0x22, 0x51, 0xc4, 0x24, 0x35, 0x00,
+ 0x3e, 0x49, 0xc5, 0x05, 0x1b, 0x00, 0x3e, 0x41, 0x15, 0xc1, 0x22, 0x5d,
+ 0x08, 0xc1, 0x22, 0x69, 0x16, 0xc1, 0x22, 0x75, 0xc3, 0x05, 0x17, 0x00,
+ 0x3e, 0x09, 0xc4, 0x16, 0x57, 0x00, 0x3e, 0x00, 0x0c, 0xc1, 0x22, 0x81,
+ 0x90, 0x00, 0x3e, 0x93, 0x01, 0x22, 0x8b, 0xc2, 0x1a, 0x36, 0x00, 0x3f,
+ 0x31, 0xc2, 0x00, 0x4c, 0x00, 0x3f, 0x29, 0xc2, 0x01, 0x0e, 0x00, 0x3f,
+ 0x21, 0xc2, 0x00, 0x3f, 0x00, 0x3f, 0x09, 0xc2, 0x00, 0x96, 0x00, 0x3e,
+ 0xf9, 0xc2, 0x06, 0x6b, 0x00, 0x3e, 0xf1, 0xc2, 0x01, 0xa7, 0x00, 0x3e,
+ 0xe9, 0xc3, 0xa0, 0x56, 0x00, 0x3e, 0xe1, 0xc2, 0x0e, 0xe5, 0x00, 0x3e,
+ 0xd9, 0x14, 0xc1, 0x22, 0x9b, 0xc2, 0x0c, 0x25, 0x00, 0x3e, 0xc3, 0x01,
+ 0x22, 0xa5, 0xc3, 0x1c, 0x4f, 0x00, 0x3e, 0xb9, 0xc2, 0x00, 0x44, 0x00,
+ 0x3e, 0xa9, 0xc2, 0x07, 0x44, 0x00, 0x3e, 0xa1, 0xc2, 0x02, 0x1d, 0x00,
+ 0x3e, 0x99, 0x91, 0x00, 0x3e, 0x83, 0x01, 0x22, 0xab, 0x97, 0x00, 0x3e,
+ 0x71, 0x87, 0x00, 0x3e, 0x6b, 0x01, 0x22, 0xaf, 0x8b, 0x00, 0x3e, 0x61,
+ 0x83, 0x00, 0x3e, 0x50, 0xd0, 0x5b, 0xaf, 0x00, 0x3f, 0x99, 0xd1, 0x56,
+ 0x64, 0x00, 0x3f, 0x91, 0x45, 0x2c, 0x27, 0xc1, 0x22, 0xb3, 0x46, 0x2f,
+ 0xd9, 0x41, 0x22, 0xcb, 0xc6, 0x51, 0x63, 0x0f, 0xd3, 0x59, 0xc5, 0xdd,
+ 0x47, 0x0f, 0xd3, 0x60, 0xc6, 0x51, 0x63, 0x0f, 0xd3, 0x21, 0xc5, 0xdd,
+ 0x47, 0x0f, 0xd3, 0x28, 0xc8, 0xc2, 0x33, 0x0f, 0xcd, 0x81, 0xca, 0xa0,
+ 0x1a, 0x0f, 0xcd, 0x89, 0xc4, 0xe5, 0x4b, 0x0f, 0xcd, 0x91, 0xca, 0xa8,
+ 0x8a, 0x0f, 0xcd, 0x98, 0xc3, 0x82, 0xb0, 0x0f, 0x9f, 0xf9, 0xc3, 0x3b,
+ 0x0b, 0x0f, 0x9f, 0xf1, 0xc3, 0x82, 0xe0, 0x0f, 0x9f, 0xe9, 0xc3, 0x82,
+ 0xec, 0x0f, 0x9f, 0xe1, 0xc5, 0xd9, 0x87, 0x0f, 0x9f, 0xd8, 0xc3, 0x0d,
+ 0x99, 0x01, 0x10, 0x2b, 0x01, 0x22, 0xdd, 0xc4, 0xa2, 0x46, 0x0f, 0xae,
+ 0x63, 0x01, 0x22, 0xe3, 0xc8, 0xb8, 0xc3, 0x0f, 0xae, 0x59, 0x10, 0x41,
+ 0x22, 0xe7, 0x42, 0x06, 0xfa, 0x41, 0x22, 0xf6, 0x43, 0x03, 0x73, 0xc1,
+ 0x23, 0x02, 0xd0, 0x5f, 0x3f, 0x0f, 0xcd, 0xd8, 0xcf, 0x6b, 0x13, 0x09,
+ 0xa2, 0xab, 0x01, 0x23, 0x0e, 0xd2, 0x4d, 0x4c, 0x09, 0xa2, 0x01, 0x1d,
+ 0x41, 0x23, 0x14, 0xcd, 0x7f, 0x44, 0x09, 0xa2, 0x31, 0x1d, 0x41, 0x23,
+ 0x28, 0xcd, 0x7c, 0xee, 0x09, 0xa2, 0x29, 0x1d, 0x41, 0x23, 0x34, 0x44,
+ 0x01, 0x1e, 0xc1, 0x23, 0x44, 0xd0, 0x5b, 0xcf, 0x09, 0xa1, 0x89, 0x42,
+ 0xd2, 0x56, 0x41, 0x23, 0x50, 0xc8, 0x7c, 0xf3, 0x09, 0xa2, 0x19, 0x42,
+ 0xd2, 0x56, 0x41, 0x23, 0x73, 0xc9, 0xb5, 0x8e, 0x09, 0xa2, 0x09, 0x1d,
+ 0x41, 0x23, 0x99, 0x43, 0x00, 0x3b, 0xc1, 0x23, 0xb1, 0x1d, 0x41, 0x23,
+ 0xc3, 0x45, 0x3e, 0x58, 0xc1, 0x23, 0xd3, 0x42, 0xd2, 0x56, 0x41, 0x23,
+ 0xe5, 0x49, 0xad, 0xff, 0xc1, 0x24, 0x0c, 0x1d, 0x41, 0x24, 0x24, 0xcd,
+ 0x82, 0x1c, 0x09, 0xa1, 0xb1, 0x1d, 0x41, 0x24, 0x2c, 0xce, 0x74, 0x06,
+ 0x09, 0xa1, 0x81, 0x1d, 0x41, 0x24, 0x44, 0x42, 0xe7, 0xae, 0xc1, 0x24,
+ 0x5d, 0x1d, 0x41, 0x24, 0x6d, 0x1e, 0xc1, 0x24, 0x8f, 0x1d, 0x41, 0x24,
+ 0xb1, 0xa5, 0x09, 0x9f, 0x19, 0xa4, 0x09, 0x9f, 0x11, 0xa3, 0x09, 0x9f,
+ 0x09, 0xa2, 0x09, 0x9f, 0x01, 0xa1, 0x09, 0x9e, 0xf9, 0xa0, 0x09, 0x9e,
+ 0xf1, 0x9f, 0x09, 0x9e, 0xe9, 0x9e, 0x09, 0x9e, 0xda, 0x01, 0x24, 0xe1,
+ 0xa5, 0x09, 0x9e, 0xcb, 0x01, 0x24, 0xe5, 0xa4, 0x09, 0x9e, 0xc1, 0xa3,
+ 0x09, 0x9e, 0xb3, 0x01, 0x24, 0xe9, 0xa2, 0x09, 0x9e, 0xa9, 0xa1, 0x09,
+ 0x9e, 0x93, 0x01, 0x24, 0xed, 0xa0, 0x09, 0x9e, 0x89, 0x9f, 0x09, 0x9e,
+ 0x81, 0x9e, 0x09, 0x9e, 0x78, 0x21, 0xc1, 0x24, 0xf5, 0x20, 0xc1, 0x25,
+ 0x01, 0x1f, 0xc1, 0x25, 0x2c, 0x1e, 0xc1, 0x25, 0x5a, 0x1d, 0x41, 0x25,
+ 0x82, 0x21, 0xc1, 0x25, 0xa9, 0x20, 0xc1, 0x25, 0xc5, 0x1f, 0xc1, 0x25,
+ 0xf0, 0x1e, 0xc1, 0x26, 0x1b, 0x1d, 0x41, 0x26, 0x49, 0x1f, 0xc1, 0x26,
+ 0x73, 0x1e, 0xc1, 0x26, 0x9b, 0x1d, 0x41, 0x26, 0xc9, 0xa4, 0x09, 0x95,
+ 0x71, 0xa3, 0x09, 0x95, 0x69, 0xa2, 0x09, 0x95, 0x61, 0xa1, 0x09, 0x95,
+ 0x59, 0xa0, 0x09, 0x95, 0x51, 0x9f, 0x09, 0x95, 0x49, 0x9e, 0x09, 0x95,
+ 0x40, 0x1e, 0xc1, 0x26, 0xf3, 0x1d, 0x41, 0x26, 0xfb, 0x42, 0xed, 0xc9,
+ 0xc1, 0x27, 0x25, 0x42, 0xca, 0x7d, 0xc1, 0x27, 0x31, 0x1d, 0x41, 0x27,
+ 0x3f, 0xa5, 0x09, 0x8d, 0x11, 0xa4, 0x09, 0x8d, 0x09, 0xa3, 0x09, 0x8d,
+ 0x01, 0xa2, 0x09, 0x8c, 0xf9, 0xa1, 0x09, 0x8c, 0xf1, 0xa0, 0x09, 0x8c,
+ 0xe9, 0x9f, 0x09, 0x8c, 0xe1, 0x9e, 0x09, 0x8c, 0xd8, 0x22, 0xc1, 0x27,
+ 0x53, 0x21, 0xc1, 0x27, 0x67, 0x20, 0xc1, 0x27, 0x95, 0x1f, 0xc1, 0x27,
+ 0xc3, 0x1e, 0xc1, 0x27, 0xf1, 0x1d, 0x41, 0x28, 0x1c, 0x23, 0xc1, 0x28,
+ 0x46, 0x22, 0xc1, 0x28, 0x69, 0x21, 0xc1, 0x28, 0x9a, 0x20, 0xc1, 0x28,
+ 0xc8, 0x1f, 0xc1, 0x28, 0xf6, 0x1e, 0xc1, 0x29, 0x21, 0x1d, 0x41, 0x29,
+ 0x49, 0x1f, 0xc1, 0x29, 0x70, 0x1e, 0xc1, 0x29, 0x84, 0x1d, 0x41, 0x29,
+ 0xaf, 0x4c, 0x85, 0x08, 0xc1, 0x29, 0xd6, 0xd2, 0x49, 0x26, 0x0f, 0xa3,
+ 0xe8, 0xc4, 0x24, 0x35, 0x00, 0x37, 0xc9, 0xc5, 0x05, 0x1b, 0x00, 0x37,
+ 0xc1, 0x15, 0xc1, 0x29, 0xec, 0x08, 0xc1, 0x29, 0xf8, 0x16, 0xc1, 0x2a,
+ 0x04, 0xc3, 0x05, 0x17, 0x00, 0x37, 0x89, 0xc4, 0x16, 0x57, 0x00, 0x37,
+ 0x80, 0xcd, 0x2d, 0xa6, 0x01, 0x02, 0x49, 0xc4, 0x00, 0xfa, 0x00, 0x01,
+ 0x08, 0x09, 0xc1, 0x2a, 0x10, 0x0a, 0xc1, 0x2a, 0x42, 0x04, 0xc1, 0x2a,
+ 0x63, 0x05, 0xc1, 0x2a, 0x88, 0x06, 0xc1, 0x2a, 0xb3, 0x16, 0xc1, 0x2a,
+ 0xde, 0x0e, 0xc1, 0x2b, 0x13, 0x0f, 0xc1, 0x2b, 0x36, 0x15, 0xc1, 0x2b,
+ 0x5d, 0x14, 0xc1, 0x2b, 0x8c, 0x13, 0xc1, 0x2b, 0xb5, 0x18, 0xc1, 0x2b,
+ 0xde, 0x1a, 0xc1, 0x2b, 0xfe, 0x10, 0xc1, 0x2c, 0x23, 0x0d, 0xc1, 0x2c,
+ 0x4a, 0x19, 0xc1, 0x2c, 0x73, 0x12, 0xc1, 0x2c, 0x90, 0x1c, 0xc1, 0x2c,
+ 0xb5, 0x1b, 0xc1, 0x2c, 0xe0, 0x0c, 0xc1, 0x2c, 0xfd, 0x08, 0x41, 0x2d,
+ 0x20, 0xca, 0x25, 0x88, 0x00, 0x9b, 0x01, 0xc7, 0x50, 0x05, 0x00, 0x9b,
+ 0x20, 0x47, 0x10, 0xa4, 0xc1, 0x2d, 0x44, 0xc2, 0x00, 0x3f, 0x00, 0x9b,
+ 0x18, 0xc2, 0x0a, 0x20, 0x00, 0x9b, 0x51, 0xc4, 0x05, 0xde, 0x00, 0x9b,
+ 0x58, 0xc3, 0x08, 0xde, 0x00, 0x9b, 0x61, 0xc3, 0x0d, 0x8f, 0x00, 0x9b,
+ 0x68, 0xc2, 0x22, 0x45, 0x00, 0x9b, 0x71, 0xc4, 0x15, 0xa7, 0x00, 0x9b,
+ 0x78, 0xc2, 0x01, 0x04, 0x00, 0x9b, 0x93, 0x01, 0x2d, 0x50, 0xc5, 0x25,
+ 0x27, 0x00, 0x9b, 0x99, 0xc5, 0x0d, 0x88, 0x00, 0x9b, 0xa0, 0xc4, 0x4d,
+ 0x29, 0x00, 0x9b, 0xa9, 0xc4, 0x41, 0xc9, 0x00, 0x9b, 0xb0, 0xc4, 0xd8,
+ 0xce, 0x00, 0x9b, 0xb9, 0xc6, 0x15, 0xa7, 0x00, 0x9b, 0xc0, 0x43, 0x14,
+ 0x01, 0xc1, 0x2d, 0x56, 0xc4, 0xe7, 0x17, 0x00, 0x9c, 0xa0, 0xc4, 0x58,
+ 0xb3, 0x00, 0x9c, 0xa9, 0xc3, 0x52, 0xbe, 0x00, 0x9c, 0xc8, 0x00, 0x41,
+ 0x2d, 0x60, 0xcf, 0x45, 0x1a, 0x01, 0x1f, 0x39, 0x00, 0x41, 0x2d, 0x6c,
+ 0x16, 0xc1, 0x2d, 0x84, 0x15, 0xc1, 0x2d, 0x90, 0xc4, 0x5d, 0xef, 0x08,
+ 0x7f, 0x99, 0xc4, 0xbc, 0xb7, 0x08, 0x7f, 0x91, 0xc2, 0x03, 0x07, 0x08,
+ 0x7f, 0x81, 0xc3, 0x21, 0x00, 0x08, 0x7f, 0x69, 0xc3, 0x04, 0xae, 0x08,
+ 0x7f, 0x61, 0xc6, 0xd7, 0x12, 0x08, 0x7f, 0x59, 0xc4, 0xe5, 0x53, 0x08,
+ 0x7f, 0x51, 0xc4, 0x4d, 0x48, 0x08, 0x7f, 0x49, 0xc2, 0x00, 0x5b, 0x08,
+ 0x7f, 0x23, 0x01, 0x2d, 0x9a, 0xc5, 0x4d, 0x42, 0x08, 0x7f, 0x31, 0xc3,
+ 0x7c, 0xad, 0x08, 0x7f, 0x29, 0xc6, 0x43, 0x0f, 0x08, 0x7f, 0x19, 0xc5,
+ 0x9e, 0xbc, 0x08, 0x7f, 0x11, 0xc4, 0xe5, 0xaf, 0x08, 0x7f, 0x09, 0x03,
+ 0x41, 0x2d, 0xa0, 0x87, 0x08, 0x28, 0x11, 0xc2, 0x00, 0x5b, 0x08, 0x28,
+ 0x18, 0x87, 0x08, 0x28, 0x21, 0xc2, 0x00, 0x5b, 0x08, 0x28, 0x30, 0xc2,
+ 0x00, 0x06, 0x08, 0x28, 0x29, 0x87, 0x08, 0x28, 0x99, 0x83, 0x08, 0x28,
+ 0xa1, 0xc2, 0x1c, 0x3e, 0x08, 0x28, 0xa8, 0x8b, 0x08, 0x28, 0x38, 0x87,
+ 0x08, 0x28, 0x51, 0xc2, 0x1c, 0x3e, 0x08, 0x28, 0x59, 0x0a, 0x41, 0x2d,
+ 0xac, 0x87, 0x08, 0x28, 0x79, 0xc2, 0x00, 0x5b, 0x08, 0x29, 0x38, 0x87,
+ 0x08, 0x28, 0x81, 0xc2, 0x00, 0x58, 0x08, 0x28, 0x88, 0x87, 0x08, 0x28,
+ 0xc9, 0xc2, 0x01, 0xa5, 0x08, 0x28, 0xd0, 0x87, 0x08, 0x28, 0xd9, 0xc2,
+ 0x00, 0x5b, 0x08, 0x28, 0xe0, 0x87, 0x08, 0x28, 0xe9, 0xc2, 0x00, 0x5b,
+ 0x08, 0x28, 0xf0, 0x87, 0x08, 0x29, 0x19, 0xc2, 0x00, 0x5b, 0x08, 0x29,
+ 0x20, 0xe0, 0x0a, 0x47, 0x01, 0x3a, 0x50, 0xdf, 0x0d, 0x9a, 0x01, 0x3a,
+ 0x09, 0x47, 0x04, 0x8a, 0x41, 0x2d, 0xb6, 0xc9, 0xb4, 0x77, 0x0f, 0xac,
+ 0x21, 0xd5, 0x33, 0xe2, 0x0f, 0xa7, 0x48, 0x43, 0x07, 0x60, 0xc1, 0x2d,
+ 0xc8, 0xc6, 0x04, 0x1b, 0x00, 0x00, 0xc9, 0x16, 0xc1, 0x2d, 0xd4, 0xc4,
+ 0x03, 0x5d, 0x00, 0x00, 0x51, 0xcd, 0x81, 0x0b, 0x00, 0x04, 0x39, 0xcc,
+ 0x8f, 0x10, 0x00, 0x04, 0xb8, 0xc6, 0x03, 0xfa, 0x01, 0x4f, 0x99, 0xc7,
+ 0x3f, 0x7b, 0x01, 0x4f, 0x89, 0xc6, 0x01, 0xe9, 0x01, 0x4f, 0x78, 0xc6,
+ 0x03, 0xfa, 0x01, 0x4f, 0x91, 0xc7, 0x3f, 0x7b, 0x01, 0x4f, 0x81, 0xc6,
+ 0x01, 0xe9, 0x01, 0x4f, 0x70, 0x43, 0x01, 0x89, 0xc1, 0x2d, 0xe3, 0xcf,
+ 0x6b, 0x22, 0x01, 0x16, 0xa8, 0xc5, 0x34, 0x9a, 0x01, 0x12, 0xa9, 0xc4,
+ 0x02, 0xcb, 0x00, 0x01, 0xeb, 0x01, 0x2d, 0xef, 0xcd, 0x7d, 0x7d, 0x01,
+ 0x53, 0x70, 0xc2, 0x01, 0x89, 0x01, 0x12, 0x69, 0xd4, 0x39, 0xba, 0x01,
+ 0x53, 0xc0, 0xc5, 0x02, 0xca, 0x01, 0x05, 0x61, 0x45, 0xda, 0x63, 0x41,
+ 0x2d, 0xf3, 0xc4, 0x24, 0x35, 0x08, 0xed, 0x49, 0xc5, 0x05, 0x1b, 0x08,
+ 0xed, 0x41, 0x15, 0xc1, 0x2d, 0xff, 0x08, 0xc1, 0x2e, 0x0b, 0x16, 0xc1,
+ 0x2e, 0x17, 0xc3, 0x05, 0x17, 0x08, 0xed, 0x09, 0xc4, 0x16, 0x57, 0x08,
+ 0xed, 0x00, 0xc5, 0x21, 0x27, 0x08, 0xec, 0xb9, 0x4a, 0x6f, 0x72, 0x41,
+ 0x2e, 0x23, 0xc7, 0x45, 0xcd, 0x08, 0xec, 0xb1, 0xc8, 0x10, 0xab, 0x08,
+ 0xec, 0xa8, 0xc2, 0x0e, 0xe5, 0x08, 0xec, 0x49, 0xc2, 0x00, 0x9a, 0x08,
+ 0xec, 0x41, 0xc2, 0x01, 0x0e, 0x08, 0xec, 0x39, 0x12, 0xc1, 0x2e, 0x41,
+ 0x10, 0xc1, 0x2e, 0x4b, 0x06, 0xc1, 0x2e, 0x55, 0x0c, 0xc1, 0x2e, 0x63,
+ 0x0e, 0xc1, 0x2e, 0x6d, 0x16, 0xc1, 0x2e, 0x77, 0x05, 0xc1, 0x2e, 0x85,
+ 0x09, 0xc1, 0x2e, 0x8f, 0x0d, 0xc1, 0x2e, 0x99, 0xc2, 0x00, 0x3f, 0x08,
+ 0xeb, 0x81, 0x04, 0xc1, 0x2e, 0xa3, 0xc2, 0x06, 0x8c, 0x08, 0xeb, 0x69,
+ 0xc2, 0x1a, 0x36, 0x08, 0xeb, 0x61, 0x83, 0x08, 0xeb, 0x03, 0x01, 0x2e,
+ 0xad, 0xc2, 0x02, 0x14, 0x08, 0xeb, 0x51, 0xc2, 0x01, 0xe6, 0x08, 0xeb,
+ 0x39, 0x97, 0x08, 0xeb, 0x23, 0x01, 0x2e, 0xb9, 0x8b, 0x08, 0xeb, 0x12,
+ 0x01, 0x2e, 0xbd, 0xca, 0xa6, 0x46, 0x00, 0x50, 0x09, 0xc5, 0x6a, 0x05,
+ 0x00, 0x50, 0x11, 0x42, 0x02, 0x52, 0xc1, 0x2e, 0xc1, 0xc5, 0x33, 0x1a,
+ 0x00, 0x51, 0xe1, 0xc5, 0xcc, 0x95, 0x00, 0x52, 0x89, 0xc6, 0xd2, 0xbc,
+ 0x00, 0x53, 0xa8, 0x83, 0x00, 0x50, 0x2b, 0x01, 0x2e, 0xcd, 0x8b, 0x00,
+ 0x50, 0x3b, 0x01, 0x2e, 0xd9, 0x97, 0x00, 0x50, 0x4b, 0x01, 0x2e, 0xdd,
+ 0xc2, 0x01, 0xe6, 0x00, 0x50, 0x79, 0xc2, 0x02, 0x14, 0x00, 0x50, 0x99,
+ 0x0d, 0xc1, 0x2e, 0xe1, 0x09, 0xc1, 0x2e, 0xe9, 0x10, 0xc1, 0x2e, 0xf1,
+ 0x05, 0xc1, 0x2f, 0x07, 0x0c, 0xc1, 0x2f, 0x11, 0x16, 0xc1, 0x2f, 0x1b,
+ 0x06, 0xc1, 0x2f, 0x29, 0x12, 0xc1, 0x2f, 0x37, 0x04, 0xc1, 0x2f, 0x41,
+ 0xc2, 0x00, 0x3f, 0x00, 0x51, 0x71, 0xc2, 0x1a, 0x36, 0x00, 0x51, 0x79,
+ 0x14, 0xc1, 0x2f, 0x4b, 0x0e, 0xc1, 0x2f, 0x55, 0xc2, 0x06, 0x8c, 0x00,
+ 0x51, 0xa9, 0x15, 0xc1, 0x2f, 0x5f, 0xc2, 0x01, 0x0e, 0x00, 0x51, 0xc9,
+ 0xc2, 0x05, 0x5c, 0x00, 0x52, 0xd9, 0xc2, 0x01, 0xa7, 0x00, 0x52, 0xf0,
+ 0x03, 0xc1, 0x2f, 0x69, 0x8b, 0x00, 0x51, 0xfb, 0x01, 0x2f, 0x75, 0x97,
+ 0x00, 0x52, 0x0b, 0x01, 0x2f, 0x79, 0xc2, 0x01, 0xe6, 0x00, 0x52, 0x39,
+ 0xc2, 0x02, 0x14, 0x00, 0x52, 0x58, 0xc4, 0x16, 0x57, 0x00, 0x53, 0x31,
+ 0xc3, 0x05, 0x17, 0x00, 0x53, 0x39, 0x16, 0xc1, 0x2f, 0x7d, 0x08, 0xc1,
+ 0x2f, 0x89, 0x15, 0xc1, 0x2f, 0x95, 0xc5, 0x05, 0x1b, 0x00, 0x53, 0x71,
+ 0xc4, 0x24, 0x35, 0x00, 0x53, 0x78, 0xc4, 0xe7, 0x2f, 0x00, 0x53, 0x89,
+ 0xd0, 0x56, 0x10, 0x00, 0x53, 0xb0, 0x05, 0xc1, 0x2f, 0xa1, 0x03, 0xc1,
+ 0x2f, 0xad, 0x42, 0x02, 0x52, 0xc1, 0x2f, 0xb9, 0xc5, 0x33, 0x1a, 0x00,
+ 0x55, 0xe1, 0x15, 0xc1, 0x2f, 0xc5, 0xc6, 0xd8, 0x6e, 0x00, 0x57, 0xe1,
+ 0x16, 0x41, 0x2f, 0xd1, 0x83, 0x00, 0x54, 0x2b, 0x01, 0x2f, 0xdd, 0x8b,
+ 0x00, 0x54, 0x3b, 0x01, 0x2f, 0xe9, 0x97, 0x00, 0x54, 0x4b, 0x01, 0x2f,
+ 0xed, 0x18, 0xc1, 0x2f, 0xf1, 0x87, 0x00, 0x54, 0x79, 0x91, 0x00, 0x54,
+ 0x99, 0x0d, 0xc1, 0x2f, 0xfb, 0x09, 0xc1, 0x30, 0x05, 0x10, 0xc1, 0x30,
+ 0x0f, 0x05, 0xc1, 0x30, 0x25, 0x0c, 0xc1, 0x30, 0x2f, 0x16, 0xc1, 0x30,
+ 0x39, 0x06, 0xc1, 0x30, 0x47, 0x12, 0xc1, 0x30, 0x55, 0x04, 0xc1, 0x30,
+ 0x5f, 0xc2, 0x00, 0x3f, 0x00, 0x55, 0x71, 0xc2, 0x1a, 0x36, 0x00, 0x55,
+ 0x79, 0xc2, 0x00, 0x9a, 0x00, 0x55, 0x81, 0x0e, 0xc1, 0x30, 0x69, 0x15,
+ 0xc1, 0x30, 0x73, 0xc2, 0x01, 0x0e, 0x00, 0x55, 0xc9, 0xc3, 0xb0, 0x39,
+ 0x00, 0x57, 0xc8, 0x47, 0xa4, 0x92, 0xc1, 0x30, 0x83, 0x45, 0x02, 0xcb,
+ 0x41, 0x30, 0x8b, 0xc4, 0x16, 0x57, 0x00, 0x57, 0x31, 0xc3, 0x05, 0x17,
+ 0x00, 0x57, 0x39, 0x16, 0xc1, 0x30, 0xb1, 0x08, 0xc1, 0x30, 0xbd, 0x15,
+ 0xc1, 0x30, 0xc9, 0xc5, 0x05, 0x1b, 0x00, 0x57, 0x71, 0xc4, 0x24, 0x35,
+ 0x00, 0x57, 0x78, 0xc5, 0xdc, 0x4d, 0x08, 0x19, 0xa1, 0xc3, 0x72, 0x36,
+ 0x08, 0x19, 0x80, 0xc3, 0xc2, 0x97, 0x08, 0x19, 0xb1, 0xc4, 0xe6, 0xe7,
+ 0x08, 0x1a, 0x38, 0xc3, 0x92, 0xad, 0x08, 0x19, 0xb9, 0xc4, 0xd4, 0x4c,
+ 0x08, 0x1a, 0x40, 0xc5, 0xdf, 0x13, 0x08, 0x19, 0xc1, 0xc4, 0xe6, 0xeb,
+ 0x08, 0x1a, 0x20, 0xc5, 0xdc, 0x20, 0x08, 0x19, 0xe9, 0x43, 0x03, 0x5e,
+ 0x41, 0x30, 0xd5, 0x42, 0x00, 0xfd, 0xc1, 0x30, 0xe1, 0x42, 0x00, 0xf8,
+ 0x41, 0x31, 0x4b, 0x04, 0xc1, 0x31, 0x63, 0xd5, 0x32, 0x29, 0x01, 0x16,
+ 0xd9, 0x45, 0x01, 0xac, 0xc1, 0x31, 0x6f, 0x11, 0xc1, 0x31, 0x81, 0x03,
+ 0xc1, 0x31, 0x8d, 0xc4, 0x02, 0xcb, 0x00, 0x01, 0xf1, 0xcf, 0x67, 0x53,
+ 0x01, 0x55, 0x3a, 0x01, 0x31, 0x99, 0x4b, 0x6f, 0x71, 0xc1, 0x31, 0x9f,
+ 0x47, 0x01, 0xff, 0xc1, 0x31, 0xc3, 0x45, 0x02, 0xcb, 0xc1, 0x32, 0x2c,
+ 0xce, 0x74, 0xe6, 0x08, 0x9a, 0xb9, 0xc2, 0x00, 0xeb, 0x08, 0x9a, 0x80,
+ 0xc4, 0x01, 0xa7, 0x0f, 0xb0, 0x03, 0x01, 0x32, 0x46, 0xda, 0x1a, 0x50,
+ 0x0f, 0xb1, 0xc0, 0xc9, 0x1b, 0xac, 0x00, 0x00, 0xe9, 0xc4, 0x00, 0xfa,
+ 0x01, 0x5e, 0x90, 0xc8, 0xbe, 0xdb, 0x01, 0x37, 0x71, 0xc7, 0xc6, 0x48,
+ 0x01, 0x37, 0x68, 0x48, 0x08, 0xfa, 0xc1, 0x32, 0x4c, 0xcb, 0x9c, 0xbd,
+ 0x01, 0x11, 0xd0, 0x58, 0x23, 0x6c, 0xc1, 0x32, 0x58, 0x4f, 0x01, 0xf7,
+ 0xc1, 0x32, 0xde, 0x47, 0x01, 0xff, 0xc1, 0x33, 0x62, 0xd3, 0x46, 0x46,
+ 0x00, 0x87, 0xd9, 0x4d, 0x27, 0x71, 0x41, 0x33, 0xe8, 0xc8, 0x40, 0x9a,
+ 0x0f, 0xb6, 0x50, 0x4f, 0x01, 0xf7, 0xc1, 0x34, 0x6c, 0x4d, 0x27, 0x71,
+ 0x41, 0x34, 0xd5, 0xc4, 0xe5, 0xb7, 0x0f, 0xa6, 0xc9, 0xc5, 0x19, 0x74,
+ 0x0f, 0xcf, 0x08, 0x45, 0x02, 0xcb, 0xc1, 0x35, 0x3e, 0x47, 0x01, 0xff,
+ 0xc1, 0x35, 0x5a, 0x4b, 0x6f, 0x71, 0xc1, 0x35, 0xc1, 0x03, 0xc1, 0x35,
+ 0xe1, 0x46, 0x08, 0xd7, 0xc1, 0x35, 0xed, 0xc6, 0xd8, 0xfe, 0x00, 0x5b,
+ 0x81, 0x49, 0x54, 0xdd, 0x41, 0x36, 0x11, 0xc5, 0xd7, 0xd2, 0x0f, 0x69,
+ 0xe9, 0xc4, 0x01, 0x1d, 0x0f, 0x69, 0xe0, 0x16, 0xc1, 0x36, 0x1d, 0x08,
+ 0xc1, 0x36, 0x2e, 0xc3, 0x05, 0x17, 0x0f, 0x68, 0x0b, 0x01, 0x36, 0x36,
+ 0x15, 0xc1, 0x36, 0x3a, 0xc5, 0x05, 0x1b, 0x0f, 0x68, 0x43, 0x01, 0x36,
+ 0x4c, 0xc4, 0x24, 0x35, 0x0f, 0x68, 0x4a, 0x01, 0x36, 0x57, 0x16, 0xc1,
+ 0x36, 0x64, 0x08, 0xc1, 0x36, 0x7c, 0x15, 0xc1, 0x36, 0x8b, 0xc5, 0x05,
+ 0x1b, 0x0f, 0x69, 0xa9, 0xc4, 0x24, 0x35, 0x0f, 0x69, 0xb0, 0xc6, 0x71,
+ 0xec, 0x01, 0x01, 0x21, 0xd9, 0x10, 0x32, 0x01, 0x71, 0x58, 0x42, 0x06,
+ 0x82, 0xc1, 0x36, 0x9a, 0x47, 0x0e, 0x90, 0xc1, 0x36, 0xa6, 0x42, 0x01,
+ 0xb4, 0xc1, 0x36, 0xbe, 0x08, 0xc1, 0x36, 0xc8, 0xc4, 0x00, 0xb9, 0x0f,
+ 0xa8, 0x99, 0x4d, 0x7c, 0xba, 0xc1, 0x36, 0xd4, 0xca, 0x6d, 0x18, 0x0f,
+ 0xa2, 0x80, 0xcd, 0x7b, 0x0d, 0x01, 0x1c, 0x81, 0xcd, 0x78, 0xde, 0x01,
+ 0x1c, 0x78, 0xc5, 0xdb, 0x80, 0x0f, 0xad, 0xc0, 0x48, 0xc2, 0x63, 0xc1,
+ 0x36, 0xe0, 0x47, 0xcb, 0xb9, 0xc1, 0x36, 0xec, 0x42, 0x05, 0x3b, 0xc1,
+ 0x36, 0xfe, 0x4a, 0xa1, 0xc8, 0xc1, 0x37, 0x0a, 0x4e, 0x70, 0xf6, 0xc1,
+ 0x37, 0x1c, 0x4e, 0x6c, 0x96, 0xc1, 0x37, 0x28, 0xc3, 0x1e, 0x77, 0x0f,
+ 0xae, 0xe9, 0x43, 0x03, 0x07, 0xc1, 0x37, 0x34, 0x47, 0xca, 0x00, 0x41,
+ 0x37, 0x3e, 0xc5, 0x29, 0x3b, 0x0f, 0xa3, 0xa9, 0xc3, 0x14, 0xe9, 0x0f,
+ 0xa3, 0xa1, 0xc5, 0xe3, 0x05, 0x0f, 0xce, 0x98, 0x4b, 0x11, 0x34, 0xc1,
+ 0x37, 0x4a, 0xc7, 0xc5, 0x92, 0x00, 0xe3, 0xe0, 0xd2, 0x4a, 0x34, 0x00,
+ 0xe3, 0xd1, 0xc9, 0xb6, 0xa5, 0x00, 0xe3, 0xc0, 0x11, 0xc1, 0x37, 0x56,
+ 0x0e, 0xc1, 0x37, 0x68, 0x07, 0xc1, 0x37, 0x7f, 0x17, 0xc1, 0x37, 0x93,
+ 0x0b, 0xc1, 0x37, 0xa5, 0x03, 0x41, 0x37, 0xb7, 0xc4, 0x24, 0x35, 0x00,
+ 0xe2, 0xc9, 0xc5, 0x05, 0x1b, 0x00, 0xe2, 0xc1, 0x15, 0xc1, 0x37, 0xcd,
+ 0x08, 0xc1, 0x37, 0xd9, 0x16, 0xc1, 0x37, 0xe5, 0xc3, 0x05, 0x17, 0x00,
+ 0xe2, 0x89, 0xc4, 0x16, 0x57, 0x00, 0xe2, 0x80, 0xca, 0x25, 0x5a, 0x01,
+ 0x39, 0x69, 0xcb, 0x94, 0x46, 0x01, 0x38, 0xf9, 0xcb, 0x5e, 0x74, 0x01,
+ 0x38, 0xc9, 0xca, 0x28, 0xd1, 0x01, 0x34, 0xe8, 0xcf, 0x61, 0x68, 0x01,
+ 0x22, 0x51, 0xc3, 0x02, 0xe4, 0x01, 0x22, 0x40, 0xd6, 0x2d, 0x03, 0x01,
+ 0x22, 0x49, 0xc4, 0x63, 0x08, 0x01, 0x22, 0x08, 0xd9, 0x1d, 0xda, 0x01,
+ 0x22, 0x31, 0xc6, 0xd8, 0x38, 0x01, 0x22, 0x29, 0xca, 0xa9, 0x8e, 0x01,
+ 0x22, 0x20, 0xc4, 0x00, 0x68, 0x01, 0x4d, 0x39, 0xc2, 0x02, 0x6a, 0x01,
+ 0x4d, 0x30, 0x45, 0x2b, 0x0a, 0x41, 0x37, 0xf1, 0xc5, 0xda, 0x7c, 0x00,
+ 0xb4, 0xd1, 0x42, 0x00, 0x5c, 0xc1, 0x37, 0xfd, 0x0b, 0xc1, 0x38, 0x0f,
+ 0x17, 0xc1, 0x38, 0x1b, 0x11, 0xc1, 0x38, 0x2b, 0xc4, 0xe5, 0xf3, 0x00,
+ 0xb4, 0x81, 0xc4, 0xe4, 0x8b, 0x00, 0xb4, 0x79, 0x15, 0xc1, 0x38, 0x35,
+ 0x10, 0xc1, 0x38, 0x41, 0xc4, 0x6a, 0x40, 0x00, 0xb4, 0x61, 0xc4, 0xe5,
+ 0xe3, 0x00, 0xb4, 0x59, 0x05, 0xc1, 0x38, 0x4d, 0xc5, 0xdd, 0xdd, 0x00,
+ 0xb4, 0x41, 0xc4, 0xe7, 0x1f, 0x00, 0xb4, 0x39, 0xc5, 0xdf, 0xd1, 0x00,
+ 0xb4, 0x19, 0xc4, 0xe6, 0x37, 0x00, 0xb4, 0x11, 0xc5, 0xdb, 0xe9, 0x00,
+ 0xb4, 0x08, 0x83, 0x08, 0x24, 0xb3, 0x01, 0x38, 0x59, 0xc2, 0x02, 0x1d,
+ 0x08, 0x24, 0x09, 0xc2, 0x00, 0x44, 0x08, 0x24, 0x11, 0xc2, 0x26, 0x94,
+ 0x08, 0x24, 0x19, 0xc2, 0x23, 0xe3, 0x08, 0x24, 0x21, 0x0d, 0xc1, 0x38,
+ 0x63, 0x06, 0xc1, 0x38, 0x6f, 0xc2, 0x00, 0x9a, 0x08, 0x24, 0x39, 0x15,
+ 0xc1, 0x38, 0x7b, 0xc4, 0xe5, 0x87, 0x08, 0x24, 0x59, 0xc2, 0x07, 0x69,
+ 0x08, 0x24, 0x61, 0xc2, 0x01, 0xa7, 0x08, 0x24, 0x69, 0xc4, 0xdb, 0x76,
+ 0x08, 0x24, 0x71, 0xc4, 0xe5, 0x63, 0x08, 0x24, 0x81, 0xc4, 0xe5, 0xff,
+ 0x08, 0x24, 0x89, 0xc4, 0xe8, 0x5f, 0x08, 0x24, 0x91, 0xc3, 0x7c, 0xad,
+ 0x08, 0x24, 0x99, 0xc2, 0x01, 0x0e, 0x08, 0x24, 0xa1, 0xc2, 0x1a, 0x36,
+ 0x08, 0x24, 0xa9, 0x87, 0x08, 0x24, 0xbb, 0x01, 0x38, 0x85, 0x8b, 0x08,
+ 0x24, 0xc1, 0x91, 0x08, 0x24, 0xcb, 0x01, 0x38, 0x89, 0x97, 0x08, 0x24,
+ 0xd0, 0xc4, 0x16, 0x57, 0x08, 0x25, 0x01, 0xc3, 0x05, 0x17, 0x08, 0x25,
+ 0x09, 0x16, 0xc1, 0x38, 0x8d, 0x08, 0xc1, 0x38, 0x99, 0x15, 0xc1, 0x38,
+ 0xa5, 0xc5, 0x05, 0x1b, 0x08, 0x25, 0x41, 0xc4, 0x24, 0x35, 0x08, 0x25,
+ 0x48, 0x83, 0x08, 0x25, 0x83, 0x01, 0x38, 0xb1, 0xc3, 0x00, 0xd8, 0x08,
+ 0x25, 0xa1, 0xc3, 0x1c, 0x4f, 0x08, 0x25, 0xa9, 0x87, 0x08, 0x25, 0xbb,
+ 0x01, 0x38, 0xbc, 0x0a, 0xc1, 0x38, 0xc6, 0x8b, 0x08, 0x25, 0xd9, 0x0d,
+ 0xc1, 0x38, 0xd0, 0xc2, 0x00, 0x96, 0x08, 0x25, 0xf9, 0xc2, 0x00, 0x3f,
+ 0x08, 0x26, 0x01, 0xc2, 0x01, 0x01, 0x08, 0x26, 0x09, 0x91, 0x08, 0x26,
+ 0x13, 0x01, 0x38, 0xe0, 0xc2, 0x07, 0x44, 0x08, 0x26, 0x21, 0x15, 0xc1,
+ 0x38, 0xe6, 0x16, 0xc1, 0x38, 0xf0, 0xc3, 0x45, 0xca, 0x08, 0x26, 0x69,
+ 0x97, 0x08, 0x26, 0x71, 0xc2, 0x00, 0x4c, 0x08, 0x26, 0x79, 0xc3, 0x91,
+ 0x7b, 0x08, 0x26, 0x89, 0x1c, 0x41, 0x38, 0xf8, 0x83, 0x08, 0x26, 0xc3,
+ 0x01, 0x39, 0x02, 0xc3, 0x00, 0xd8, 0x08, 0x26, 0xe1, 0xc3, 0x1c, 0x4f,
+ 0x08, 0x26, 0xe9, 0x87, 0x08, 0x26, 0xfb, 0x01, 0x39, 0x0d, 0x0a, 0xc1,
+ 0x39, 0x17, 0x8b, 0x08, 0x27, 0x19, 0x0d, 0xc1, 0x39, 0x21, 0xc2, 0x00,
+ 0x96, 0x08, 0x27, 0x39, 0xc2, 0x00, 0x3f, 0x08, 0x27, 0x41, 0xc2, 0x01,
+ 0x01, 0x08, 0x27, 0x49, 0x91, 0x08, 0x27, 0x53, 0x01, 0x39, 0x31, 0xc2,
+ 0x07, 0x44, 0x08, 0x27, 0x61, 0x15, 0xc1, 0x39, 0x37, 0x16, 0xc1, 0x39,
+ 0x41, 0xc3, 0x45, 0xca, 0x08, 0x27, 0xa9, 0x97, 0x08, 0x27, 0xb1, 0xc2,
+ 0x00, 0x4c, 0x08, 0x27, 0xb9, 0xc3, 0x91, 0x7b, 0x08, 0x27, 0xc9, 0x1c,
+ 0x41, 0x39, 0x49, 0x03, 0xc1, 0x39, 0x53, 0x11, 0xc1, 0x39, 0x65, 0xc8,
+ 0xbc, 0x2b, 0x0e, 0x7a, 0xc2, 0x01, 0x39, 0x71, 0xc3, 0x73, 0x9e, 0x0e,
+ 0x7e, 0x09, 0x07, 0xc1, 0x39, 0x77, 0xcf, 0x5d, 0x10, 0x0e, 0x7b, 0x59,
+ 0xcb, 0x99, 0xdc, 0x0e, 0x7a, 0x98, 0xc5, 0xdc, 0xac, 0x0e, 0x7e, 0x01,
+ 0xc4, 0xe4, 0x3f, 0x0e, 0x7d, 0x7a, 0x01, 0x39, 0x83, 0xc6, 0xb5, 0x13,
+ 0x0e, 0x7d, 0xf9, 0xc5, 0xe2, 0x15, 0x0e, 0x7c, 0x21, 0x42, 0x13, 0x65,
+ 0xc1, 0x39, 0x87, 0xc6, 0xd6, 0xb2, 0x0e, 0x7b, 0x71, 0xc5, 0x5e, 0x4a,
+ 0x0e, 0x7a, 0xa0, 0x16, 0xc1, 0x39, 0x96, 0xc8, 0xbc, 0xa3, 0x0e, 0x7b,
+ 0xeb, 0x01, 0x39, 0xae, 0x49, 0xb1, 0x83, 0x41, 0x39, 0xb2, 0x00, 0x41,
+ 0x39, 0xce, 0xc6, 0xb1, 0x85, 0x0e, 0x7c, 0x29, 0x03, 0x41, 0x39, 0xda,
+ 0xc2, 0x13, 0x1d, 0x0e, 0x7c, 0x11, 0xd2, 0x4a, 0x58, 0x0e, 0x7b, 0x60,
+ 0xc5, 0xd6, 0x9b, 0x0e, 0x7b, 0x79, 0xc8, 0x4a, 0x62, 0x0e, 0x7a, 0xd8,
+ 0x4c, 0x88, 0xd5, 0xc1, 0x39, 0xe6, 0xcb, 0x96, 0x35, 0x0e, 0x7b, 0x31,
+ 0xc8, 0x49, 0x54, 0x0e, 0x7b, 0x29, 0xc9, 0xad, 0xed, 0x0e, 0x7b, 0x21,
+ 0xc8, 0xbf, 0x23, 0x0e, 0x7b, 0x18, 0x16, 0xc1, 0x39, 0xfe, 0xc6, 0xc1,
+ 0xb5, 0x0e, 0x7b, 0x09, 0xc7, 0xcf, 0x0f, 0x0e, 0x7b, 0x01, 0xc5, 0xdc,
+ 0x61, 0x0e, 0x7a, 0xf0, 0xc3, 0xec, 0x1e, 0x0e, 0x7a, 0x19, 0xc3, 0xec,
+ 0x33, 0x0e, 0x7a, 0x10, 0x0d, 0xc1, 0x3a, 0x0a, 0x05, 0xc1, 0x3a, 0x1f,
+ 0x06, 0xc1, 0x3a, 0x2e, 0x16, 0xc1, 0x3a, 0x3a, 0x15, 0xc1, 0x3a, 0x4c,
+ 0x11, 0xc1, 0x3a, 0x64, 0x42, 0x01, 0x59, 0xc1, 0x3a, 0x74, 0x1c, 0xc1,
+ 0x3a, 0x7e, 0x42, 0x00, 0x9a, 0xc1, 0x3a, 0x88, 0xc5, 0xe3, 0x9b, 0x0e,
+ 0x79, 0x39, 0xc6, 0xd6, 0xf4, 0x0e, 0x79, 0x29, 0xc7, 0xcd, 0x25, 0x0e,
+ 0x79, 0x21, 0x48, 0xc0, 0xfb, 0xc1, 0x3a, 0x94, 0x4d, 0x7c, 0x11, 0xc1,
+ 0x3a, 0xa0, 0x47, 0xc8, 0x55, 0xc1, 0x3a, 0xaa, 0x46, 0xd1, 0x1e, 0x41,
+ 0x3a, 0xb6, 0xc9, 0xb6, 0xb7, 0x0e, 0x79, 0x91, 0xc6, 0xb6, 0xba, 0x0e,
+ 0x79, 0x89, 0xc7, 0x74, 0x68, 0x0e, 0x79, 0x80, 0x42, 0x02, 0x52, 0xc1,
+ 0x3a, 0xc2, 0xc8, 0x10, 0xab, 0x08, 0xd1, 0xc1, 0x46, 0x21, 0x1a, 0x41,
+ 0x3a, 0xce, 0xd6, 0x2e, 0xfd, 0x08, 0xd2, 0x31, 0xc9, 0x16, 0xa8, 0x08,
+ 0xd2, 0x00, 0x4f, 0x61, 0x95, 0xc1, 0x3a, 0xdd, 0xd3, 0x45, 0xae, 0x08,
+ 0xd1, 0xd0, 0xc3, 0x1d, 0x55, 0x08, 0xd1, 0x91, 0xc2, 0x01, 0x0e, 0x08,
+ 0xd0, 0x61, 0x83, 0x08, 0xd0, 0x58, 0x83, 0x08, 0xd1, 0x81, 0xc2, 0x0e,
+ 0xe5, 0x08, 0xd1, 0x79, 0xc2, 0x01, 0x0e, 0x08, 0xd1, 0x70, 0x83, 0x08,
+ 0xd1, 0x41, 0xc2, 0x01, 0x0e, 0x08, 0xd1, 0x38, 0x1c, 0xc1, 0x3a, 0xf5,
+ 0xc2, 0x01, 0x0e, 0x08, 0xd0, 0xe1, 0x83, 0x08, 0xd0, 0xd9, 0x06, 0x41,
+ 0x3a, 0xff, 0x15, 0xc1, 0x3b, 0x09, 0xc2, 0x01, 0x0e, 0x08, 0xd0, 0xd1,
+ 0x83, 0x08, 0xd0, 0xc9, 0x16, 0x41, 0x3b, 0x13, 0xc2, 0x01, 0x0e, 0x08,
+ 0xd1, 0x09, 0x83, 0x08, 0xd1, 0x00, 0xc2, 0x01, 0x0e, 0x08, 0xd0, 0xf9,
+ 0x83, 0x08, 0xd0, 0xf0, 0x83, 0x08, 0xd0, 0xe9, 0xc2, 0x01, 0x01, 0x08,
+ 0xd0, 0xc1, 0xc2, 0x1a, 0x36, 0x08, 0xd0, 0x99, 0xc2, 0x07, 0x69, 0x08,
+ 0xd0, 0x78, 0xc2, 0x01, 0x0e, 0x08, 0xd0, 0x89, 0x83, 0x08, 0xd0, 0x80,
+ 0xc2, 0x01, 0x0e, 0x08, 0xd0, 0x71, 0x83, 0x08, 0xd0, 0x68, 0xca, 0xa4,
+ 0x16, 0x08, 0xd0, 0x49, 0x03, 0xc1, 0x3b, 0x1d, 0x91, 0x08, 0xd0, 0x33,
+ 0x01, 0x3b, 0x25, 0x87, 0x08, 0xd0, 0x21, 0x97, 0x08, 0xd0, 0x1b, 0x01,
+ 0x3b, 0x29, 0x8b, 0x08, 0xd0, 0x08, 0xcf, 0x6a, 0x05, 0x01, 0x4c, 0x51,
+ 0xcd, 0x80, 0x3b, 0x01, 0x4c, 0x40, 0x12, 0xc1, 0x3b, 0x2d, 0xcb, 0x35,
+ 0xf9, 0x01, 0x50, 0xf8, 0xc9, 0xae, 0x50, 0x01, 0x00, 0x61, 0xcd, 0x45,
+ 0xa1, 0x07, 0xf7, 0xf8, 0x43, 0x18, 0x75, 0xc1, 0x3b, 0x39, 0x42, 0x00,
+ 0x56, 0x41, 0x3b, 0x5d, 0x45, 0x02, 0x01, 0xc1, 0x3b, 0x69, 0xcc, 0x83,
+ 0x7c, 0x05, 0x4e, 0x08, 0x16, 0xc1, 0x3b, 0xf5, 0xc3, 0x05, 0x17, 0x05,
+ 0x4e, 0x89, 0xc4, 0x16, 0x57, 0x05, 0x4e, 0x81, 0x08, 0xc1, 0x3c, 0x01,
+ 0x15, 0xc1, 0x3c, 0x0d, 0xc5, 0x05, 0x1b, 0x05, 0x4e, 0xc1, 0xc4, 0x24,
+ 0x35, 0x05, 0x4e, 0xc8, 0xc5, 0xdf, 0x68, 0x05, 0x4d, 0xf9, 0xc7, 0xcf,
+ 0x63, 0x05, 0x4d, 0xf1, 0xc5, 0xdd, 0xc4, 0x05, 0x4d, 0xe8, 0xc5, 0xda,
+ 0xfe, 0x05, 0x4d, 0xe1, 0xca, 0xa9, 0x7a, 0x05, 0x4d, 0xd9, 0x16, 0xc1,
+ 0x3c, 0x19, 0xc4, 0xca, 0xfc, 0x05, 0x4d, 0xc3, 0x01, 0x3c, 0x23, 0xc4,
+ 0xe4, 0xe7, 0x05, 0x4d, 0xb2, 0x01, 0x3c, 0x29, 0xc5, 0xdf, 0x09, 0x05,
+ 0x4c, 0x0b, 0x01, 0x3c, 0x2f, 0xc7, 0xc8, 0x4e, 0x05, 0x4c, 0x19, 0xc5,
+ 0x36, 0x7f, 0x05, 0x4c, 0x11, 0xc9, 0xad, 0x78, 0x05, 0x4c, 0x00, 0x46,
+ 0x05, 0x07, 0xc1, 0x3c, 0x35, 0x46, 0x00, 0xc7, 0x41, 0x3c, 0x47, 0xc5,
+ 0x16, 0x94, 0x01, 0x02, 0xb9, 0xd1, 0x20, 0x08, 0x01, 0x50, 0x60, 0x10,
+ 0xc1, 0x3c, 0x53, 0x0c, 0xc1, 0x3c, 0x92, 0x13, 0xc1, 0x3c, 0xb2, 0x14,
+ 0xc1, 0x3c, 0xce, 0x15, 0xc1, 0x3c, 0xf5, 0x05, 0xc1, 0x3d, 0x27, 0x1c,
+ 0xc1, 0x3d, 0x55, 0x19, 0xc1, 0x3d, 0x87, 0x0a, 0xc1, 0x3d, 0xa3, 0x1b,
+ 0xc1, 0x3d, 0xd5, 0x1a, 0xc1, 0x3d, 0xf1, 0x0f, 0xc1, 0x3e, 0x0f, 0x8b,
+ 0x05, 0x00, 0x13, 0x01, 0x3e, 0x3d, 0x83, 0x05, 0x00, 0x53, 0x01, 0x3e,
+ 0x53, 0xc2, 0x00, 0xf1, 0x05, 0x00, 0x6b, 0x01, 0x3e, 0x5f, 0x91, 0x05,
+ 0x00, 0x8b, 0x01, 0x3e, 0x67, 0x87, 0x05, 0x00, 0xa3, 0x01, 0x3e, 0x73,
+ 0x04, 0xc1, 0x3e, 0x77, 0x12, 0xc1, 0x3e, 0xa5, 0x08, 0xc1, 0x3e, 0xc8,
+ 0x18, 0xc1, 0x3e, 0xeb, 0x06, 0xc1, 0x3f, 0x12, 0x16, 0xc1, 0x3f, 0x39,
+ 0x0e, 0xc1, 0x3f, 0x5c, 0x09, 0xc1, 0x3f, 0x86, 0x0d, 0x41, 0x3f, 0xad,
+ 0xc3, 0xed, 0x02, 0x05, 0x24, 0x81, 0x0e, 0xc1, 0x3f, 0xd0, 0x0d, 0xc1,
+ 0x3f, 0xdd, 0x10, 0xc1, 0x3f, 0xe7, 0x05, 0xc1, 0x3f, 0xf7, 0x15, 0xc1,
+ 0x40, 0x10, 0x09, 0xc1, 0x40, 0x1a, 0x0f, 0xc1, 0x40, 0x2e, 0x0a, 0xc1,
+ 0x40, 0x38, 0x04, 0xc1, 0x40, 0x42, 0x1b, 0xc1, 0x40, 0x4e, 0x12, 0xc1,
+ 0x40, 0x58, 0x16, 0xc1, 0x40, 0x64, 0x1c, 0xc1, 0x40, 0x6e, 0x06, 0xc1,
+ 0x40, 0x82, 0xc2, 0x00, 0x11, 0x05, 0x25, 0x49, 0x0c, 0xc1, 0x40, 0x8c,
+ 0x18, 0xc1, 0x40, 0x94, 0xc2, 0x0a, 0x20, 0x05, 0x25, 0xc0, 0xc3, 0xc9,
+ 0xab, 0x08, 0x75, 0x43, 0x01, 0x40, 0xa0, 0xc3, 0x0e, 0x1c, 0x08, 0x75,
+ 0x03, 0x01, 0x40, 0xa6, 0x07, 0xc1, 0x40, 0xac, 0x0a, 0xc1, 0x40, 0xc0,
+ 0xc2, 0x00, 0x37, 0x08, 0x75, 0x29, 0xc3, 0x7c, 0xad, 0x08, 0x75, 0x21,
+ 0xc2, 0x01, 0x0a, 0x08, 0x75, 0x19, 0xc3, 0x21, 0x00, 0x08, 0x75, 0x11,
+ 0xc3, 0x8e, 0x5e, 0x08, 0x75, 0x09, 0xc3, 0x87, 0xe3, 0x08, 0x74, 0xf9,
+ 0x0d, 0xc1, 0x40, 0xcc, 0xc3, 0x0e, 0x13, 0x08, 0x74, 0xe1, 0xc2, 0x06,
+ 0x8c, 0x08, 0x74, 0xd3, 0x01, 0x40, 0xd8, 0xc2, 0x01, 0xa7, 0x08, 0x74,
+ 0xc9, 0x1a, 0xc1, 0x40, 0xde, 0x1c, 0xc1, 0x40, 0xe8, 0x16, 0xc1, 0x40,
+ 0xf3, 0x42, 0x0c, 0x25, 0xc1, 0x40, 0xfd, 0x15, 0xc1, 0x41, 0x05, 0xc2,
+ 0x26, 0x94, 0x08, 0x74, 0x81, 0x14, 0xc1, 0x41, 0x1b, 0x05, 0xc1, 0x41,
+ 0x25, 0x12, 0xc1, 0x41, 0x2f, 0xc2, 0x00, 0x34, 0x08, 0x74, 0x08, 0xca,
+ 0xa6, 0x50, 0x08, 0x75, 0x61, 0xca, 0x9f, 0x52, 0x08, 0x75, 0x58, 0x00,
+ 0xc1, 0x41, 0x39, 0xc8, 0xba, 0x33, 0x0f, 0xae, 0xc8, 0x12, 0xc1, 0x41,
+ 0x45, 0x83, 0x00, 0xa7, 0xa3, 0x01, 0x41, 0x55, 0x8a, 0x00, 0xa9, 0x2b,
+ 0x01, 0x41, 0x63, 0x91, 0x00, 0xa7, 0x8b, 0x01, 0x41, 0x80, 0x99, 0x00,
+ 0xa8, 0x3b, 0x01, 0x41, 0x8e, 0x87, 0x00, 0xa7, 0x69, 0x8b, 0x00, 0xa7,
+ 0x7a, 0x01, 0x41, 0xa7, 0x83, 0x00, 0xa6, 0x3b, 0x01, 0x41, 0xab, 0x19,
+ 0xc1, 0x41, 0xc2, 0x91, 0x00, 0xa6, 0x23, 0x01, 0x41, 0xdb, 0xc2, 0x00,
+ 0x56, 0x00, 0xac, 0xb3, 0x01, 0x41, 0xe3, 0x89, 0x00, 0xac, 0xab, 0x01,
+ 0x41, 0xf8, 0x44, 0xe4, 0x1b, 0xc1, 0x42, 0x0d, 0x87, 0x00, 0xa6, 0x01,
+ 0x8b, 0x00, 0xa6, 0x13, 0x01, 0x42, 0x1c, 0x8a, 0x00, 0xa6, 0x90, 0x83,
+ 0x00, 0xa4, 0x83, 0x01, 0x42, 0x20, 0xc7, 0xce, 0x98, 0x00, 0xb3, 0x69,
+ 0x19, 0xc1, 0x42, 0x2d, 0x91, 0x00, 0xa4, 0x6b, 0x01, 0x42, 0x46, 0x8b,
+ 0x00, 0xa4, 0x5b, 0x01, 0x42, 0x4a, 0x87, 0x00, 0xa4, 0x48, 0x4b, 0x9b,
+ 0x05, 0xc1, 0x42, 0x4e, 0x49, 0xae, 0x2c, 0xc1, 0x42, 0x56, 0xcb, 0x95,
+ 0xf3, 0x00, 0xa9, 0xf8, 0x42, 0x06, 0x66, 0xc1, 0x42, 0x79, 0x16, 0xc1,
+ 0x42, 0x92, 0x8a, 0x00, 0xab, 0x53, 0x01, 0x42, 0xa9, 0x83, 0x00, 0xa2,
+ 0xab, 0x01, 0x42, 0xc6, 0xc9, 0xb7, 0x86, 0x00, 0xad, 0x73, 0x01, 0x42,
+ 0xd1, 0x1b, 0xc1, 0x42, 0xea, 0x19, 0xc1, 0x42, 0xfa, 0x91, 0x00, 0xa2,
+ 0x83, 0x01, 0x43, 0x13, 0x8b, 0x00, 0xa2, 0x73, 0x01, 0x43, 0x17, 0x87,
+ 0x00, 0xa2, 0x60, 0x87, 0x00, 0xa0, 0x63, 0x01, 0x43, 0x1b, 0x83, 0x00,
+ 0xa0, 0xbb, 0x01, 0x43, 0x1f, 0x91, 0x00, 0xa0, 0x93, 0x01, 0x43, 0x27,
+ 0x8b, 0x00, 0xa0, 0x72, 0x01, 0x43, 0x2e, 0xc2, 0x00, 0xc9, 0x00, 0xc7,
+ 0x01, 0x87, 0x00, 0xaa, 0x18, 0x8b, 0x00, 0xaa, 0xab, 0x01, 0x43, 0x32,
+ 0xc8, 0x11, 0x48, 0x00, 0xb3, 0x71, 0xc3, 0x14, 0x8f, 0x00, 0xaa, 0xd9,
+ 0x83, 0x00, 0xaa, 0xcb, 0x01, 0x43, 0x3c, 0x91, 0x00, 0xaa, 0xbb, 0x01,
+ 0x43, 0x43, 0x87, 0x00, 0xaa, 0x98, 0xc8, 0xbd, 0xbb, 0x00, 0xc6, 0xe1,
+ 0x90, 0x00, 0xa1, 0x58, 0x47, 0xcb, 0x26, 0xc1, 0x43, 0x47, 0x9b, 0x00,
+ 0xc5, 0x81, 0x91, 0x00, 0xa0, 0x31, 0x90, 0x00, 0xa1, 0x68, 0x19, 0xc1,
+ 0x43, 0x69, 0x83, 0x00, 0xaa, 0x5b, 0x01, 0x43, 0x84, 0x91, 0x00, 0xaa,
+ 0x43, 0x01, 0x43, 0x8f, 0x8b, 0x00, 0xaa, 0x33, 0x01, 0x43, 0x93, 0x87,
+ 0x00, 0xaa, 0x10, 0x83, 0x00, 0xa9, 0x6b, 0x01, 0x43, 0x97, 0x91, 0x00,
+ 0xa9, 0x53, 0x01, 0x43, 0xa2, 0x19, 0xc1, 0x43, 0xaa, 0x8b, 0x00, 0xa9,
+ 0x43, 0x01, 0x43, 0xc3, 0x87, 0x00, 0xa9, 0x30, 0x83, 0x00, 0xa6, 0xd3,
+ 0x01, 0x43, 0xc7, 0x8a, 0x00, 0xad, 0x33, 0x01, 0x43, 0xd2, 0x87, 0x00,
+ 0xa6, 0x99, 0x8b, 0x00, 0xa6, 0xab, 0x01, 0x43, 0xe7, 0x91, 0x00, 0xa6,
+ 0xbb, 0x01, 0x43, 0xeb, 0x19, 0x41, 0x43, 0xef, 0x83, 0x00, 0xa5, 0x53,
+ 0x01, 0x44, 0x08, 0x87, 0x00, 0xa5, 0x1b, 0x01, 0x44, 0x13, 0x91, 0x00,
+ 0xa5, 0x3b, 0x01, 0x44, 0x19, 0x8b, 0x00, 0xa5, 0x2b, 0x01, 0x44, 0x20,
+ 0x19, 0xc1, 0x44, 0x24, 0x8a, 0x00, 0xa5, 0xe8, 0x99, 0x00, 0xa4, 0x23,
+ 0x01, 0x44, 0x3d, 0x83, 0x00, 0xa3, 0x93, 0x01, 0x44, 0x56, 0x87, 0x00,
+ 0xa3, 0x59, 0x8b, 0x00, 0xa3, 0x6b, 0x01, 0x44, 0x61, 0x91, 0x00, 0xa3,
+ 0x7a, 0x01, 0x44, 0x65, 0x19, 0xc1, 0x44, 0x69, 0x83, 0x00, 0xa1, 0xc3,
+ 0x01, 0x44, 0x82, 0x91, 0x00, 0xa1, 0x9b, 0x01, 0x44, 0x8d, 0x87, 0x00,
+ 0xa1, 0x79, 0x8b, 0x00, 0xa1, 0x8a, 0x01, 0x44, 0x95, 0x83, 0x00, 0xa0,
+ 0x5b, 0x01, 0x44, 0x99, 0x9b, 0x00, 0xc5, 0x89, 0x8b, 0x00, 0xa0, 0xe3,
+ 0x01, 0x44, 0xa1, 0x4a, 0xa1, 0x82, 0xc1, 0x44, 0xa7, 0x90, 0x00, 0xa1,
+ 0x70, 0x83, 0x00, 0xac, 0x1b, 0x01, 0x44, 0xaf, 0x91, 0x00, 0xac, 0x0b,
+ 0x01, 0x44, 0xba, 0x8b, 0x00, 0xab, 0xfa, 0x01, 0x44, 0xbe, 0x8d, 0x00,
+ 0xab, 0xe9, 0xc5, 0x57, 0xf7, 0x00, 0xa0, 0x00, 0x8b, 0x00, 0xab, 0x0b,
+ 0x01, 0x44, 0xc2, 0x87, 0x00, 0xaa, 0xf8, 0xc3, 0x47, 0xd7, 0x00, 0xa9,
+ 0x61, 0xc3, 0x66, 0x52, 0x00, 0xa2, 0x91, 0x12, 0xc1, 0x44, 0xc6, 0xc3,
+ 0x94, 0x09, 0x00, 0xa4, 0x79, 0xc2, 0x02, 0x14, 0x00, 0xa0, 0x39, 0x99,
+ 0x00, 0xa0, 0xe9, 0xc3, 0x16, 0xb7, 0x00, 0xa5, 0x49, 0xc3, 0x11, 0x42,
+ 0x00, 0xa6, 0x31, 0xc3, 0x18, 0x29, 0x00, 0xa6, 0xc9, 0xc3, 0xe0, 0x78,
+ 0x00, 0xa7, 0x99, 0xc3, 0x72, 0x8b, 0x00, 0xa3, 0x88, 0x8b, 0x00, 0xa0,
+ 0x21, 0x90, 0x00, 0xa1, 0x60, 0xd0, 0x60, 0x4f, 0x01, 0x02, 0x08, 0xc9,
+ 0xac, 0xc4, 0x0f, 0xae, 0x10, 0x97, 0x08, 0x15, 0xfa, 0x01, 0x44, 0xd2,
+ 0x94, 0x08, 0x16, 0x48, 0x86, 0x08, 0x15, 0x32, 0x01, 0x44, 0xd9, 0x9f,
+ 0x08, 0x15, 0x38, 0x84, 0x08, 0x16, 0x52, 0x01, 0x44, 0xdd, 0x9f, 0x08,
+ 0x15, 0x60, 0x96, 0x08, 0x16, 0x3a, 0x01, 0x44, 0xe9, 0x8a, 0x08, 0x15,
+ 0x73, 0x01, 0x44, 0xed, 0x95, 0x08, 0x15, 0xc1, 0x96, 0x08, 0x16, 0x12,
+ 0x01, 0x44, 0xf1, 0x90, 0x08, 0x15, 0x99, 0x86, 0x08, 0x15, 0xf1, 0x89,
+ 0x08, 0x16, 0x20, 0x9f, 0x08, 0x15, 0x08, 0x8b, 0x08, 0x16, 0x28, 0x9f,
+ 0x08, 0x16, 0x78, 0x9f, 0x08, 0x15, 0xe8, 0x9f, 0x08, 0x16, 0x08, 0x03,
+ 0xc1, 0x44, 0xf5, 0xc3, 0x03, 0x28, 0x08, 0x29, 0x89, 0x09, 0xc1, 0x45,
+ 0x01, 0x06, 0xc1, 0x45, 0x0d, 0x07, 0xc1, 0x45, 0x1d, 0x1c, 0xc1, 0x45,
+ 0x27, 0x16, 0xc1, 0x45, 0x31, 0x05, 0xc1, 0x45, 0x43, 0x1b, 0xc1, 0x45,
+ 0x51, 0x0b, 0xc1, 0x45, 0x5d, 0x15, 0xc1, 0x45, 0x6f, 0x0e, 0xc1, 0x45,
+ 0x79, 0xc4, 0xe4, 0x1f, 0x08, 0x2a, 0x01, 0x0c, 0xc1, 0x45, 0x85, 0x0d,
+ 0xc1, 0x45, 0x91, 0xc4, 0xe7, 0x67, 0x08, 0x2a, 0x31, 0x42, 0x0e, 0x13,
+ 0xc1, 0x45, 0x9d, 0xc3, 0xe3, 0x41, 0x08, 0x2a, 0x61, 0xc4, 0xea, 0x8f,
+ 0x08, 0x2a, 0x71, 0xc2, 0x00, 0xe5, 0x08, 0x2a, 0x91, 0xc3, 0xd3, 0xb8,
+ 0x08, 0x2a, 0xa1, 0x12, 0xc1, 0x45, 0xa5, 0xc3, 0x02, 0x41, 0x08, 0x2a,
+ 0xc9, 0xc4, 0xe5, 0x07, 0x08, 0x2a, 0xd8, 0xcc, 0x8d, 0x78, 0x0f, 0xb1,
+ 0xc9, 0xc9, 0xb2, 0x40, 0x0f, 0xb1, 0xe0, 0x07, 0xc1, 0x45, 0xb1, 0x06,
+ 0xc1, 0x45, 0xf1, 0x03, 0xc1, 0x46, 0x31, 0x08, 0xc1, 0x46, 0x71, 0x24,
+ 0xc1, 0x46, 0xb1, 0x23, 0xc1, 0x46, 0xf1, 0x20, 0xc1, 0x47, 0x31, 0x1f,
+ 0xc1, 0x47, 0x71, 0x1e, 0xc1, 0x47, 0xb1, 0x1d, 0xc1, 0x47, 0xf1, 0x05,
+ 0xc1, 0x48, 0x31, 0x04, 0xc1, 0x48, 0x71, 0x26, 0xc1, 0x48, 0xb1, 0x25,
+ 0xc1, 0x48, 0xf1, 0x22, 0xc1, 0x49, 0x31, 0x21, 0x41, 0x49, 0x71, 0x24,
+ 0xc1, 0x49, 0xb1, 0x23, 0xc1, 0x49, 0xf1, 0x22, 0xc1, 0x4a, 0x31, 0x21,
+ 0xc1, 0x4a, 0x71, 0x1f, 0xc1, 0x4a, 0xb1, 0x1d, 0xc1, 0x4a, 0xf1, 0x08,
+ 0xc1, 0x4b, 0x31, 0x04, 0xc1, 0x4b, 0x71, 0x03, 0xc1, 0x4b, 0xb1, 0x26,
+ 0xc1, 0x4b, 0xf1, 0x25, 0xc1, 0x4c, 0x31, 0x07, 0xc1, 0x4c, 0x71, 0x06,
+ 0xc1, 0x4c, 0xb1, 0x05, 0xc1, 0x4c, 0xf1, 0x20, 0xc1, 0x4d, 0x31, 0x1e,
+ 0x41, 0x4d, 0x71, 0x1e, 0xc1, 0x4d, 0xb1, 0x1d, 0x41, 0x4d, 0xe9, 0x06,
+ 0xc1, 0x4e, 0x29, 0x05, 0xc1, 0x4e, 0x51, 0x04, 0xc1, 0x4e, 0x91, 0x03,
+ 0xc1, 0x4e, 0xd1, 0x26, 0xc1, 0x4f, 0x11, 0x25, 0xc1, 0x4f, 0x51, 0x24,
+ 0xc1, 0x4f, 0x91, 0x23, 0xc1, 0x4f, 0xd1, 0x22, 0xc1, 0x50, 0x09, 0x21,
+ 0xc1, 0x50, 0x49, 0x20, 0xc1, 0x50, 0x89, 0x1f, 0xc1, 0x50, 0xc9, 0x1e,
+ 0xc1, 0x51, 0x09, 0x1d, 0x41, 0x51, 0x49, 0x08, 0xc1, 0x51, 0x89, 0x07,
+ 0xc1, 0x51, 0xc9, 0x06, 0xc1, 0x52, 0x09, 0x05, 0xc1, 0x52, 0x49, 0x04,
+ 0xc1, 0x52, 0x89, 0x03, 0xc1, 0x52, 0xc9, 0x26, 0xc1, 0x53, 0x09, 0x25,
+ 0xc1, 0x53, 0x49, 0x24, 0xc1, 0x53, 0x89, 0x23, 0xc1, 0x53, 0xc9, 0x22,
+ 0xc1, 0x54, 0x09, 0x21, 0xc1, 0x54, 0x49, 0x20, 0xc1, 0x54, 0x89, 0x1f,
+ 0xc1, 0x54, 0xc9, 0x1e, 0xc1, 0x55, 0x09, 0x1d, 0x41, 0x55, 0x49, 0x92,
+ 0x01, 0x74, 0xc9, 0x8f, 0x01, 0x75, 0xb9, 0xc2, 0x01, 0x5b, 0x01, 0x76,
+ 0xb8, 0xc3, 0x41, 0x55, 0x01, 0x74, 0x09, 0xc5, 0x80, 0x6a, 0x01, 0x76,
+ 0x10, 0xc6, 0xd7, 0x42, 0x01, 0x75, 0x01, 0xc2, 0x0d, 0x8b, 0x01, 0x76,
+ 0x78, 0x45, 0x6c, 0xa6, 0xc1, 0x55, 0x89, 0xc2, 0x00, 0x2f, 0x01, 0x74,
+ 0x58, 0xc3, 0x05, 0x17, 0x01, 0x74, 0x61, 0xc3, 0x0a, 0x1f, 0x01, 0x74,
+ 0x68, 0xc3, 0x23, 0x98, 0x01, 0x74, 0x91, 0x44, 0x4c, 0xfa, 0x41, 0x55,
+ 0x93, 0x49, 0x77, 0xb7, 0xc1, 0x55, 0x9f, 0xc2, 0x47, 0xe1, 0x01, 0x75,
+ 0x78, 0xc3, 0x05, 0x17, 0x01, 0x75, 0x61, 0xc3, 0x0a, 0x1f, 0x01, 0x75,
+ 0x68, 0xc3, 0x05, 0x17, 0x01, 0x75, 0x21, 0xc3, 0x0a, 0x1f, 0x01, 0x75,
+ 0x28, 0x9a, 0x01, 0x74, 0x31, 0xcb, 0x92, 0xfc, 0x01, 0x75, 0x51, 0xc2,
+ 0x03, 0x5f, 0x01, 0x77, 0x18, 0xc3, 0x05, 0x17, 0x01, 0x75, 0xd1, 0xc3,
+ 0x0a, 0x1f, 0x01, 0x75, 0xd8, 0xc3, 0x05, 0x17, 0x01, 0x74, 0x71, 0x16,
+ 0xc1, 0x55, 0xad, 0xc4, 0x08, 0xdd, 0x01, 0x74, 0x88, 0xc3, 0x05, 0x17,
+ 0x01, 0x76, 0x89, 0xc3, 0x0a, 0x1f, 0x01, 0x76, 0x90, 0x43, 0x0f, 0x5f,
+ 0xc1, 0x55, 0xb9, 0x86, 0x01, 0x77, 0x08, 0xc2, 0x00, 0xe5, 0x01, 0x74,
+ 0xe9, 0xc4, 0x13, 0xff, 0x01, 0x74, 0xf9, 0xc4, 0xe4, 0x93, 0x01, 0x75,
+ 0xe9, 0x44, 0x0c, 0x3d, 0x41, 0x55, 0xc5, 0xc2, 0x01, 0x0d, 0x01, 0x75,
+ 0xa9, 0xc2, 0x00, 0x97, 0x01, 0x75, 0xe0, 0x44, 0x02, 0x02, 0xc1, 0x55,
+ 0xd1, 0x43, 0x80, 0xfd, 0x41, 0x55, 0xdd, 0xc3, 0x05, 0x17, 0x01, 0x76,
+ 0x19, 0xc3, 0x0a, 0x1f, 0x01, 0x76, 0x20, 0xc3, 0x05, 0x17, 0x01, 0x76,
+ 0xe9, 0x16, 0x41, 0x55, 0xe9, 0xc3, 0x9e, 0x4b, 0x01, 0x75, 0x81, 0xc2,
+ 0x01, 0xe6, 0x01, 0x76, 0x29, 0xc5, 0x75, 0x8f, 0x01, 0x76, 0x41, 0xc4,
+ 0x0a, 0xf2, 0x01, 0x76, 0x49, 0xc3, 0x0d, 0x26, 0x01, 0x77, 0x38, 0xc3,
+ 0x1a, 0xb6, 0x01, 0x76, 0x81, 0xc3, 0x00, 0x97, 0x01, 0x76, 0xa0, 0xc3,
+ 0x0a, 0x68, 0x01, 0x76, 0x99, 0xc3, 0x4a, 0x1f, 0x01, 0x76, 0xd0, 0xcd,
+ 0x79, 0x94, 0x01, 0x76, 0xc9, 0xc4, 0xe7, 0xcf, 0x01, 0x77, 0x71, 0xc5,
+ 0xdf, 0x5e, 0x01, 0x77, 0x98, 0xc2, 0x00, 0x31, 0x01, 0x76, 0xe1, 0xc3,
+ 0x07, 0xda, 0x01, 0x77, 0x29, 0xc3, 0x24, 0x4e, 0x01, 0x77, 0x50, 0xc2,
+ 0x00, 0x97, 0x01, 0x77, 0x01, 0xc3, 0x15, 0xa8, 0x01, 0x77, 0x60, 0xc2,
+ 0x0a, 0x20, 0x01, 0x75, 0x91, 0xc4, 0x05, 0xde, 0x01, 0x75, 0x98, 0xc3,
+ 0x05, 0x17, 0x01, 0x75, 0xf1, 0x16, 0x41, 0x55, 0xf5, 0xc4, 0xe7, 0xcf,
+ 0x01, 0x77, 0x69, 0xc5, 0xdf, 0x5e, 0x01, 0x77, 0x90, 0x9c, 0x01, 0x8e,
+ 0xc1, 0x89, 0x01, 0x8e, 0xf8, 0xc2, 0x0f, 0x3e, 0x01, 0x8e, 0x49, 0x9c,
+ 0x01, 0x8e, 0xf0, 0x9c, 0x01, 0x8e, 0x2b, 0x01, 0x56, 0x01, 0x89, 0x01,
+ 0x8e, 0x31, 0x99, 0x01, 0x8e, 0x6b, 0x01, 0x56, 0x0c, 0x96, 0x01, 0x8e,
+ 0x50, 0xc2, 0x0f, 0x3e, 0x01, 0x8e, 0x60, 0xc5, 0x0b, 0x39, 0x0f, 0xdc,
+ 0xa8, 0x4d, 0x27, 0x71, 0xc1, 0x56, 0x10, 0x47, 0x01, 0xff, 0x41, 0x56,
+ 0x5f, 0xc3, 0x94, 0x0d, 0x0f, 0x9a, 0x91, 0xc9, 0xad, 0x30, 0x0f, 0x99,
+ 0xc0, 0xc2, 0x05, 0x4a, 0x01, 0x02, 0x01, 0xc9, 0x37, 0xb4, 0x00, 0x00,
+ 0x4a, 0x01, 0x56, 0xae, 0xcf, 0x66, 0x63, 0x0f, 0xa6, 0x49, 0xcd, 0x77,
+ 0x58, 0x0f, 0xa6, 0x42, 0x01, 0x56, 0xb2, 0xc3, 0xdb, 0xf8, 0x08, 0x8a,
+ 0x39, 0x0e, 0xc1, 0x56, 0xb8, 0xc3, 0x3b, 0xb0, 0x08, 0x89, 0x31, 0xc3,
+ 0xd0, 0x22, 0x08, 0x89, 0x29, 0xc3, 0x14, 0x8f, 0x08, 0x89, 0x21, 0xc3,
+ 0x0e, 0x2f, 0x08, 0x89, 0x11, 0x1b, 0xc1, 0x56, 0xc4, 0xc3, 0x3d, 0x00,
+ 0x08, 0x88, 0xf9, 0x04, 0xc1, 0x56, 0xd0, 0x12, 0xc1, 0x56, 0xdc, 0x10,
+ 0xc1, 0x56, 0xe8, 0x06, 0xc1, 0x57, 0x00, 0x16, 0xc1, 0x57, 0x10, 0x0c,
+ 0xc1, 0x57, 0x20, 0x05, 0xc1, 0x57, 0x2c, 0x09, 0xc1, 0x57, 0x38, 0x0d,
+ 0xc1, 0x57, 0x44, 0x87, 0x08, 0x88, 0x31, 0x97, 0x08, 0x88, 0x29, 0x8b,
+ 0x08, 0x88, 0x21, 0xc2, 0x0e, 0x30, 0x08, 0x88, 0x18, 0x4a, 0x6f, 0x72,
+ 0xc1, 0x57, 0x50, 0xc5, 0x21, 0x27, 0x08, 0x89, 0x98, 0xcb, 0x91, 0xff,
+ 0x08, 0x8a, 0x11, 0xc4, 0x1c, 0xb3, 0x08, 0x8a, 0x09, 0x45, 0x08, 0xd8,
+ 0x41, 0x57, 0x73, 0xcb, 0x25, 0x87, 0x08, 0x8a, 0x01, 0x44, 0x02, 0xcc,
+ 0x41, 0x57, 0x97, 0xc2, 0x00, 0x4c, 0x05, 0x51, 0xb1, 0xc2, 0x00, 0x96,
+ 0x05, 0x51, 0xa9, 0xc2, 0x00, 0x9a, 0x05, 0x51, 0xa1, 0xc2, 0x1a, 0x36,
+ 0x05, 0x51, 0x99, 0x46, 0x2a, 0xb4, 0x41, 0x57, 0xa9, 0x97, 0x05, 0x51,
+ 0x6b, 0x01, 0x57, 0xb7, 0x03, 0xc1, 0x57, 0xbb, 0x91, 0x05, 0x51, 0x7b,
+ 0x01, 0x57, 0xc7, 0xc2, 0x05, 0x1b, 0x05, 0x51, 0x61, 0x8b, 0x05, 0x51,
+ 0x52, 0x01, 0x57, 0xcb, 0xc2, 0x01, 0x0e, 0x05, 0x51, 0x41, 0x15, 0xc1,
+ 0x57, 0xcf, 0x10, 0xc1, 0x57, 0xd9, 0x09, 0xc1, 0x57, 0xeb, 0x0d, 0xc1,
+ 0x57, 0xf5, 0x91, 0x05, 0x50, 0x29, 0x83, 0x05, 0x50, 0x03, 0x01, 0x57,
+ 0xff, 0x87, 0x05, 0x50, 0x19, 0x46, 0x2a, 0xb4, 0xc1, 0x58, 0x03, 0xc2,
+ 0x06, 0x8c, 0x05, 0x51, 0x29, 0xc2, 0x00, 0x96, 0x05, 0x51, 0x21, 0xc2,
+ 0x00, 0x9a, 0x05, 0x51, 0x19, 0xc2, 0x1a, 0x36, 0x05, 0x51, 0x11, 0x04,
+ 0xc1, 0x58, 0x32, 0x0f, 0xc1, 0x58, 0x42, 0x12, 0xc1, 0x58, 0x4c, 0x06,
+ 0xc1, 0x58, 0x5c, 0x16, 0xc1, 0x58, 0x6c, 0x0c, 0xc1, 0x58, 0x76, 0x42,
+ 0x11, 0x3f, 0xc1, 0x58, 0x80, 0x97, 0x05, 0x50, 0x11, 0x8b, 0x05, 0x50,
+ 0x08, 0xcc, 0x83, 0xd0, 0x05, 0x52, 0xf9, 0x06, 0xc1, 0x58, 0x8a, 0xc6,
+ 0x7c, 0x4c, 0x05, 0x52, 0xe0, 0xc4, 0x24, 0x35, 0x05, 0x52, 0xc9, 0xc5,
+ 0x05, 0x1b, 0x05, 0x52, 0xc1, 0x15, 0xc1, 0x58, 0x96, 0x08, 0xc1, 0x58,
+ 0xa2, 0x16, 0xc1, 0x58, 0xae, 0xc4, 0x16, 0x57, 0x05, 0x52, 0x81, 0xc3,
+ 0x05, 0x17, 0x05, 0x52, 0x88, 0xc3, 0x05, 0x17, 0x08, 0x7e, 0x2b, 0x01,
+ 0x58, 0xba, 0x16, 0xc1, 0x58, 0xc0, 0xc4, 0x08, 0xdd, 0x08, 0x7e, 0x40,
+ 0xc3, 0x5f, 0x3d, 0x08, 0x7e, 0x21, 0x15, 0xc1, 0x58, 0xd0, 0xc4, 0xe5,
+ 0x53, 0x08, 0x7d, 0xd9, 0xc4, 0x4d, 0x48, 0x08, 0x7d, 0xd1, 0xc2, 0x00,
+ 0x5b, 0x08, 0x7d, 0xab, 0x01, 0x58, 0xe2, 0xc5, 0x4d, 0x42, 0x08, 0x7d,
+ 0xc1, 0xca, 0xa0, 0x24, 0x08, 0x7d, 0xb9, 0xc3, 0x7c, 0xad, 0x08, 0x7d,
+ 0xb1, 0xc6, 0x43, 0x0f, 0x08, 0x7d, 0xa1, 0xc5, 0x9e, 0xbc, 0x08, 0x7d,
+ 0x99, 0xc4, 0xe5, 0xaf, 0x08, 0x7d, 0x91, 0x03, 0xc1, 0x58, 0xe8, 0xc6,
+ 0xd7, 0x12, 0x08, 0x7d, 0xe1, 0xc3, 0x04, 0xae, 0x08, 0x7d, 0xe9, 0xc3,
+ 0x21, 0x00, 0x08, 0x7d, 0xf1, 0xc2, 0x03, 0x07, 0x08, 0x7e, 0x09, 0xc4,
+ 0x5d, 0xef, 0x08, 0x7e, 0x10, 0xc4, 0x00, 0xfa, 0x01, 0x3a, 0x61, 0x43,
+ 0x03, 0x73, 0xc1, 0x58, 0xf4, 0x12, 0x41, 0x59, 0x00, 0xc6, 0xce, 0xee,
+ 0x01, 0x34, 0xa1, 0xc5, 0xde, 0x64, 0x0f, 0x9c, 0x61, 0x47, 0x57, 0x85,
+ 0x41, 0x59, 0x0f, 0x48, 0xbb, 0xe3, 0xc1, 0x59, 0x15, 0x49, 0x00, 0xb9,
+ 0xc1, 0x59, 0x47, 0xd0, 0x07, 0x97, 0x00, 0x18, 0x13, 0x01, 0x59, 0x53,
+ 0x03, 0xc1, 0x59, 0x59, 0x11, 0xc1, 0x59, 0x68, 0xc6, 0xbf, 0xd5, 0x00,
+ 0x19, 0x38, 0x51, 0x54, 0x33, 0xc1, 0x59, 0x77, 0x14, 0x41, 0x59, 0xe8,
+ 0x48, 0x5c, 0xaf, 0xc1, 0x59, 0xf2, 0x10, 0xc1, 0x59, 0xfe, 0x4f, 0x69,
+ 0x42, 0xc1, 0x5a, 0x0a, 0x43, 0x25, 0x99, 0x41, 0x5a, 0x16, 0x0b, 0xc1,
+ 0x5a, 0x28, 0x07, 0x41, 0x5a, 0x34, 0x43, 0x00, 0xce, 0xc1, 0x5a, 0x40,
+ 0x11, 0xc1, 0x5a, 0x4a, 0x45, 0x01, 0xf2, 0xc1, 0x5a, 0x56, 0x42, 0x00,
+ 0x48, 0x41, 0x5a, 0x62, 0x43, 0x01, 0x10, 0xc1, 0x5a, 0x6e, 0xcf, 0x6a,
+ 0xe6, 0x00, 0xd5, 0xb0, 0x46, 0x15, 0xd0, 0xc1, 0x5a, 0x7a, 0xcf, 0x0d,
+ 0xe6, 0x01, 0x06, 0xd9, 0xc4, 0x1f, 0x02, 0x00, 0x18, 0x1b, 0x01, 0x5a,
+ 0x8c, 0xd1, 0x53, 0xbc, 0x00, 0x18, 0x90, 0x11, 0xc1, 0x5a, 0x90, 0x07,
+ 0xc1, 0x5a, 0xa0, 0x47, 0x1e, 0x8a, 0x41, 0x5a, 0xac, 0x49, 0xae, 0xce,
+ 0xc1, 0x5a, 0xb9, 0xd0, 0x58, 0xef, 0x00, 0x1a, 0x38, 0xce, 0x39, 0xc0,
+ 0x01, 0x06, 0xe1, 0xc6, 0xd7, 0xcc, 0x00, 0x1a, 0x90, 0x45, 0x2f, 0xad,
+ 0xc1, 0x5a, 0xd8, 0xce, 0x6c, 0x88, 0x00, 0xee, 0x19, 0xca, 0xa7, 0xae,
+ 0x00, 0xee, 0x11, 0x47, 0x26, 0x2f, 0xc1, 0x5a, 0xe2, 0x16, 0xc1, 0x5a,
+ 0xee, 0xcc, 0x83, 0x1c, 0x00, 0x19, 0xe0, 0xca, 0x9c, 0xfa, 0x08, 0x99,
+ 0xd9, 0x14, 0x41, 0x5a, 0xf4, 0x4b, 0x9b, 0x47, 0xc1, 0x5b, 0x00, 0x50,
+ 0x5d, 0x9f, 0x41, 0x5b, 0x0c, 0x12, 0xc1, 0x5b, 0x18, 0xc7, 0x0b, 0x6d,
+ 0x00, 0xee, 0x91, 0xc7, 0x08, 0xe0, 0x00, 0xee, 0x88, 0xc7, 0x0b, 0x80,
+ 0x00, 0xee, 0x81, 0x10, 0x41, 0x5b, 0x24, 0xc5, 0x00, 0x34, 0x00, 0xee,
+ 0x79, 0xc5, 0x03, 0x50, 0x00, 0x1a, 0xd8, 0xc5, 0xd1, 0x0d, 0x00, 0x19,
+ 0x43, 0x01, 0x5b, 0x30, 0xce, 0x6e, 0xe2, 0x00, 0xd5, 0xb9, 0xc7, 0x7c,
+ 0x99, 0x00, 0x18, 0x29, 0x51, 0x54, 0xcc, 0x41, 0x5b, 0x36, 0x44, 0x61,
+ 0xbd, 0xc1, 0x5b, 0x54, 0xcf, 0x64, 0xce, 0x00, 0x19, 0x00, 0x49, 0x65,
+ 0xfb, 0xc1, 0x5b, 0x6d, 0x03, 0x41, 0x5b, 0x79, 0xd0, 0x58, 0x6f, 0x00,
+ 0xd6, 0x31, 0xce, 0x70, 0xbe, 0x00, 0x1a, 0x50, 0xc8, 0xa1, 0x8e, 0x00,
+ 0x18, 0x49, 0xc2, 0x01, 0x00, 0x00, 0x18, 0xd9, 0xce, 0x74, 0xd8, 0x00,
+ 0x1a, 0x58, 0x45, 0x03, 0x5d, 0xc1, 0x5b, 0x85, 0xc5, 0x1f, 0x01, 0x00,
+ 0x19, 0xf0, 0xc5, 0x03, 0x50, 0x00, 0x19, 0xe9, 0xc5, 0x00, 0x34, 0x00,
+ 0x1a, 0x98, 0xca, 0x86, 0x1e, 0x01, 0x02, 0x91, 0xc2, 0x00, 0x97, 0x00,
+ 0x02, 0x00, 0x4b, 0x90, 0x73, 0xc1, 0x5b, 0x91, 0x4b, 0x99, 0x6e, 0x41,
+ 0x5b, 0xaf, 0xc4, 0xe4, 0x7f, 0x01, 0x19, 0xa9, 0xc4, 0xe5, 0xbb, 0x01,
+ 0x19, 0xa0, 0x45, 0x01, 0xac, 0xc1, 0x5b, 0xcd, 0x43, 0x80, 0xc9, 0x41,
+ 0x5b, 0xdf, 0xc5, 0xdd, 0xb0, 0x0f, 0x9c, 0xd9, 0xd3, 0x44, 0x0c, 0x00,
+ 0x04, 0xd8, 0xc6, 0x0e, 0xbc, 0x01, 0x12, 0xa1, 0xc4, 0x02, 0xcb, 0x01,
+ 0x05, 0x08, 0x4c, 0x27, 0x72, 0xc1, 0x5b, 0xee, 0x46, 0x11, 0xf1, 0x41,
+ 0x5c, 0x63, 0x4e, 0x01, 0xf8, 0xc1, 0x5c, 0x7d, 0x49, 0x10, 0xa2, 0x41,
+ 0x5c, 0xf2, 0xce, 0x70, 0x08, 0x08, 0x17, 0x01, 0x46, 0x08, 0xd7, 0xc1,
+ 0x5c, 0xfe, 0x47, 0x37, 0x49, 0x41, 0x5d, 0x1c, 0xc9, 0x11, 0x47, 0x01,
+ 0x67, 0xc9, 0xd4, 0x2d, 0x9f, 0x01, 0x67, 0xd1, 0xd6, 0x2d, 0x9d, 0x01,
+ 0x67, 0xd9, 0xcd, 0x4e, 0x83, 0x01, 0x67, 0xe0, 0xd0, 0x54, 0xde, 0x01,
+ 0x67, 0xe9, 0xc8, 0x11, 0x48, 0x01, 0x67, 0xf0, 0xcd, 0x7d, 0xbe, 0x0f,
+ 0xa8, 0x81, 0x4d, 0x7c, 0xba, 0xc1, 0x5d, 0x3a, 0xc4, 0xe5, 0xb7, 0x0f,
+ 0xa6, 0xa9, 0x17, 0xc1, 0x5d, 0x46, 0xd8, 0x25, 0x34, 0x01, 0x52, 0x69,
+ 0x42, 0x06, 0x82, 0x41, 0x5d, 0x55, 0xd3, 0x43, 0xd3, 0x01, 0x3f, 0x99,
+ 0x05, 0xc1, 0x5d, 0x67, 0xc8, 0x20, 0x08, 0x01, 0x11, 0x89, 0xd1, 0x03,
+ 0xf5, 0x01, 0x0d, 0xd9, 0x16, 0xc1, 0x5d, 0x73, 0x45, 0x00, 0x47, 0xc1,
+ 0x5d, 0x7f, 0x48, 0x00, 0x68, 0x41, 0x5d, 0x8b, 0x16, 0xc1, 0x5d, 0x91,
+ 0x07, 0xc1, 0x5d, 0x9f, 0x43, 0x01, 0xce, 0xc1, 0x5d, 0xab, 0x15, 0xc1,
+ 0x5d, 0xb7, 0x08, 0xc1, 0x5d, 0xc1, 0x42, 0x00, 0xe5, 0x41, 0x5d, 0xcd,
+ 0xc9, 0xb4, 0x77, 0x0f, 0x99, 0x49, 0xc4, 0x27, 0x90, 0x0f, 0x99, 0x41,
+ 0xc4, 0x04, 0x3e, 0x0f, 0x99, 0x39, 0xc7, 0xc9, 0x43, 0x0f, 0x99, 0x50,
+ 0x05, 0xc1, 0x5d, 0xd9, 0x0a, 0xc1, 0x5d, 0xed, 0xde, 0x0e, 0x8b, 0x01,
+ 0x3a, 0x11, 0x19, 0xc1, 0x5e, 0x05, 0x06, 0xc1, 0x5e, 0x0f, 0x0e, 0xc1,
+ 0x5e, 0x1d, 0x47, 0x37, 0x49, 0xc1, 0x5e, 0x29, 0x16, 0xc1, 0x5e, 0x3f,
+ 0xc6, 0x0e, 0xbc, 0x01, 0x14, 0xe1, 0x03, 0xc1, 0x5e, 0x4e, 0x14, 0xc1,
+ 0x5e, 0x5a, 0x0f, 0xc1, 0x5e, 0x66, 0x12, 0xc1, 0x5e, 0x72, 0x0b, 0xc1,
+ 0x5e, 0x8a, 0xcc, 0x06, 0xfb, 0x01, 0x4e, 0x09, 0x04, 0xc1, 0x5e, 0x9c,
+ 0xcc, 0x02, 0x5b, 0x01, 0x4d, 0xb1, 0x9a, 0x01, 0x5d, 0xf1, 0xcf, 0x67,
+ 0xad, 0x0f, 0x88, 0x69, 0xc6, 0x01, 0xe9, 0x0f, 0xbe, 0xb9, 0x0d, 0x41,
+ 0x5e, 0xa8, 0x45, 0x01, 0xac, 0xc1, 0x5e, 0xb4, 0x5e, 0x0f, 0x03, 0x41,
+ 0x5e, 0xde, 0x97, 0x09, 0x1b, 0x53, 0x01, 0x5e, 0xe4, 0x83, 0x09, 0x1a,
+ 0xeb, 0x01, 0x5e, 0xfb, 0x8b, 0x09, 0x1b, 0x1b, 0x01, 0x5f, 0x0d, 0xc2,
+ 0x64, 0x37, 0x09, 0x1b, 0x10, 0x94, 0x09, 0x19, 0x43, 0x01, 0x5f, 0x28,
+ 0x00, 0xc1, 0x5f, 0x45, 0x8f, 0x09, 0x18, 0xeb, 0x01, 0x5f, 0x58, 0x1c,
+ 0xc1, 0x5f, 0x6d, 0xc4, 0xe4, 0xc7, 0x09, 0x1a, 0xc9, 0xc2, 0x01, 0x0d,
+ 0x09, 0x1a, 0x8b, 0x01, 0x5f, 0x78, 0x90, 0x09, 0x19, 0x33, 0x01, 0x5f,
+ 0x8c, 0x86, 0x09, 0x18, 0x9b, 0x01, 0x5f, 0x92, 0x84, 0x09, 0x18, 0x91,
+ 0x9f, 0x09, 0x18, 0x88, 0x97, 0x09, 0x18, 0x2b, 0x01, 0x5f, 0x9c, 0x83,
+ 0x09, 0x17, 0x5b, 0x01, 0x5f, 0xb4, 0x8b, 0x09, 0x17, 0xf3, 0x01, 0x5f,
+ 0xd3, 0x87, 0x09, 0x17, 0xe2, 0x01, 0x5f, 0xe8, 0x8b, 0x09, 0x16, 0xdb,
+ 0x01, 0x5f, 0xee, 0x0a, 0xc1, 0x60, 0x05, 0x83, 0x09, 0x14, 0x9b, 0x01,
+ 0x60, 0x1e, 0x97, 0x09, 0x17, 0x12, 0x01, 0x60, 0x36, 0x8b, 0x09, 0x12,
+ 0x63, 0x01, 0x60, 0x57, 0x97, 0x09, 0x13, 0x0b, 0x01, 0x60, 0x75, 0x83,
+ 0x09, 0x11, 0xf3, 0x01, 0x60, 0x85, 0x87, 0x09, 0x12, 0x42, 0x01, 0x60,
+ 0x9d, 0x97, 0x09, 0x11, 0x63, 0x01, 0x60, 0xa1, 0x8b, 0x09, 0x11, 0x53,
+ 0x01, 0x60, 0xc3, 0x87, 0x09, 0x11, 0x43, 0x01, 0x60, 0xcd, 0x83, 0x09,
+ 0x11, 0x02, 0x01, 0x60, 0xd4, 0x97, 0x09, 0x0f, 0xdb, 0x01, 0x60, 0xed,
+ 0x83, 0x09, 0x0d, 0xbb, 0x01, 0x61, 0x16, 0x8b, 0x09, 0x0f, 0xba, 0x01,
+ 0x61, 0x60, 0x83, 0x09, 0x0a, 0xbb, 0x01, 0x61, 0x70, 0xc5, 0xe2, 0x3d,
+ 0x09, 0x0d, 0xb1, 0x97, 0x09, 0x0d, 0x53, 0x01, 0x61, 0xa6, 0x8b, 0x09,
+ 0x0d, 0x03, 0x01, 0x61, 0xd3, 0xc4, 0x76, 0x32, 0x09, 0x0c, 0xf8, 0x8b,
+ 0x09, 0x09, 0x6b, 0x01, 0x61, 0xe5, 0x83, 0x09, 0x09, 0x4b, 0x01, 0x61,
+ 0xeb, 0x97, 0x09, 0x09, 0xba, 0x01, 0x61, 0xf3, 0x97, 0x09, 0x08, 0xb3,
+ 0x01, 0x62, 0x08, 0x8b, 0x09, 0x08, 0x03, 0x01, 0x62, 0x2e, 0x07, 0xc1,
+ 0x62, 0x4b, 0x83, 0x09, 0x05, 0xaa, 0x01, 0x62, 0x5a, 0xc3, 0x0b, 0xa2,
+ 0x09, 0x05, 0x0b, 0x01, 0x62, 0x96, 0xc3, 0x07, 0xee, 0x09, 0x05, 0x03,
+ 0x01, 0x62, 0x9a, 0x14, 0xc1, 0x62, 0xa0, 0x9f, 0x09, 0x04, 0x6b, 0x01,
+ 0x62, 0xaf, 0x90, 0x09, 0x04, 0xbb, 0x01, 0x62, 0xb5, 0x8e, 0x09, 0x04,
+ 0xb1, 0xc3, 0x6c, 0x6b, 0x09, 0x04, 0xa9, 0xc3, 0x03, 0xc6, 0x09, 0x04,
+ 0xa1, 0x00, 0x41, 0x62, 0xb9, 0x97, 0x09, 0x03, 0xd3, 0x01, 0x62, 0xc5,
+ 0x8b, 0x09, 0x03, 0x93, 0x01, 0x62, 0xe8, 0x83, 0x09, 0x02, 0xaa, 0x01,
+ 0x63, 0x03, 0x97, 0x09, 0x02, 0x6b, 0x01, 0x63, 0x1b, 0x83, 0x09, 0x02,
+ 0x03, 0x01, 0x63, 0x2f, 0x8b, 0x09, 0x02, 0x4a, 0x01, 0x63, 0x53, 0x86,
+ 0x09, 0x00, 0xe3, 0x01, 0x63, 0x59, 0x84, 0x09, 0x00, 0x53, 0x01, 0x63,
+ 0x5f, 0xc3, 0x00, 0xfa, 0x09, 0x01, 0x5b, 0x01, 0x63, 0x6a, 0x15, 0xc1,
+ 0x63, 0x70, 0x14, 0xc1, 0x63, 0x7d, 0xc3, 0x0c, 0x82, 0x09, 0x01, 0x99,
+ 0x90, 0x09, 0x01, 0x6b, 0x01, 0x63, 0x8c, 0x8e, 0x09, 0x01, 0x03, 0x01,
+ 0x63, 0x96, 0x8d, 0x09, 0x00, 0xeb, 0x01, 0x63, 0xa8, 0x9f, 0x09, 0x00,
+ 0x49, 0x47, 0x07, 0x6c, 0x41, 0x63, 0xae, 0x8b, 0x09, 0x13, 0xfb, 0x01,
+ 0x63, 0xdc, 0xc4, 0x76, 0x32, 0x09, 0x13, 0xf3, 0x01, 0x63, 0xe4, 0x83,
+ 0x09, 0x13, 0xd2, 0x01, 0x63, 0xea, 0x97, 0x09, 0x14, 0x91, 0x8b, 0x09,
+ 0x14, 0x89, 0x83, 0x09, 0x14, 0x7a, 0x01, 0x63, 0xf6, 0xc2, 0x01, 0x0d,
+ 0x09, 0x0a, 0xb1, 0x94, 0x09, 0x0a, 0xa9, 0x90, 0x09, 0x0a, 0xa1, 0x8f,
+ 0x09, 0x0a, 0x73, 0x01, 0x63, 0xfa, 0x8e, 0x09, 0x0a, 0x5b, 0x01, 0x64,
+ 0x04, 0x89, 0x09, 0x0a, 0x2b, 0x01, 0x64, 0x0e, 0xc3, 0x76, 0xca, 0x09,
+ 0x0a, 0x13, 0x01, 0x64, 0x15, 0x84, 0x09, 0x0a, 0x09, 0xc2, 0x00, 0x39,
+ 0x09, 0x0a, 0x00, 0xc9, 0xab, 0x9b, 0x09, 0x23, 0xa1, 0xc8, 0xc2, 0x4b,
+ 0x09, 0x23, 0x99, 0xc5, 0x34, 0x9a, 0x09, 0x23, 0x90, 0x43, 0x03, 0x5f,
+ 0xc1, 0x64, 0x1b, 0x44, 0xe6, 0xcf, 0x41, 0x64, 0x43, 0x45, 0x01, 0x18,
+ 0xc1, 0x64, 0x4f, 0x47, 0xc5, 0xc3, 0x41, 0x64, 0x77, 0x45, 0x19, 0x9c,
+ 0xc1, 0x64, 0x87, 0x43, 0x48, 0xf0, 0xc1, 0x64, 0xac, 0x54, 0x3e, 0x06,
+ 0x41, 0x64, 0xd4, 0x44, 0x0d, 0x8f, 0xc1, 0x64, 0xe0, 0x44, 0x08, 0xde,
+ 0x41, 0x65, 0x04, 0x43, 0x03, 0x5f, 0xc1, 0x65, 0x33, 0x50, 0x5a, 0xbf,
+ 0x41, 0x65, 0x59, 0x43, 0x0a, 0x20, 0xc1, 0x65, 0x65, 0x45, 0x05, 0xde,
+ 0x41, 0x65, 0x8a, 0x42, 0x00, 0x36, 0xc1, 0x65, 0xaf, 0xd1, 0x50, 0xbf,
+ 0x01, 0x1d, 0x50, 0xc8, 0xc2, 0xa3, 0x0f, 0xa5, 0x89, 0xc4, 0x02, 0xcb,
+ 0x00, 0x05, 0x20, 0xc8, 0x7c, 0x98, 0x07, 0xf2, 0x51, 0xc8, 0x7c, 0x8b,
+ 0x07, 0xf2, 0x70, 0x06, 0xc1, 0x65, 0xbb, 0x04, 0xc1, 0x65, 0xc3, 0xc3,
+ 0xc8, 0xfd, 0x0f, 0x02, 0xa3, 0x01, 0x65, 0xcd, 0xc4, 0xa9, 0x9f, 0x0f,
+ 0x03, 0x31, 0xc2, 0x01, 0x0e, 0x0f, 0x03, 0x0b, 0x01, 0x65, 0xd3, 0xc3,
+ 0x01, 0x0d, 0x0f, 0x03, 0x21, 0xc3, 0x27, 0xc3, 0x0f, 0x03, 0x19, 0xc3,
+ 0x0a, 0x25, 0x0f, 0x03, 0x11, 0x07, 0xc1, 0x65, 0xd9, 0x97, 0x0f, 0x02,
+ 0xf9, 0xc2, 0x03, 0x07, 0x0f, 0x02, 0xe9, 0x91, 0x0f, 0x02, 0xe1, 0xc2,
+ 0x06, 0x6b, 0x0f, 0x02, 0xd1, 0x8b, 0x0f, 0x02, 0xcb, 0x01, 0x65, 0xe5,
+ 0x1c, 0xc1, 0x65, 0xe9, 0xc2, 0x00, 0x29, 0x0f, 0x02, 0x99, 0x83, 0x0f,
+ 0x02, 0x88, 0x46, 0x02, 0x00, 0xc1, 0x65, 0xf3, 0x48, 0x1b, 0x0d, 0x41,
+ 0x66, 0x70, 0x87, 0x00, 0x21, 0x6b, 0x01, 0x66, 0x82, 0x06, 0xc1, 0x66,
+ 0xaf, 0x15, 0xc1, 0x66, 0xd2, 0x12, 0xc1, 0x66, 0xf4, 0x83, 0x00, 0x20,
+ 0x83, 0x01, 0x67, 0x01, 0xc2, 0x00, 0xc9, 0x00, 0x28, 0xe1, 0xc2, 0x0e,
+ 0x13, 0x00, 0x28, 0xd1, 0x1b, 0xc1, 0x67, 0x13, 0x14, 0xc1, 0x67, 0x2f,
+ 0x0e, 0xc1, 0x67, 0x41, 0x0d, 0xc1, 0x67, 0x53, 0x0a, 0xc1, 0x67, 0x70,
+ 0x09, 0xc1, 0x67, 0x7d, 0x05, 0xc1, 0x67, 0x8c, 0x97, 0x00, 0x21, 0x1b,
+ 0x01, 0x67, 0xa7, 0x04, 0xc1, 0x67, 0xb4, 0x91, 0x00, 0x20, 0xf3, 0x01,
+ 0x67, 0xd2, 0x8b, 0x00, 0x20, 0xc3, 0x01, 0x67, 0xe5, 0x1c, 0xc1, 0x68,
+ 0x02, 0x16, 0xc1, 0x68, 0x0d, 0xc2, 0x1c, 0x3e, 0x00, 0x20, 0x41, 0x10,
+ 0xc1, 0x68, 0x24, 0xc2, 0x00, 0x29, 0x00, 0x20, 0x91, 0x44, 0x13, 0x1a,
+ 0xc1, 0x68, 0x30, 0xc4, 0xe6, 0xa3, 0x00, 0x23, 0x90, 0xc4, 0xea, 0xa7,
+ 0x00, 0x26, 0xa1, 0xc6, 0xd3, 0x16, 0x00, 0x25, 0xa1, 0xc6, 0xd0, 0xdc,
+ 0x00, 0x25, 0x20, 0x9f, 0x09, 0x7f, 0x91, 0x9e, 0x09, 0x7f, 0x88, 0x1e,
+ 0xc1, 0x68, 0x3c, 0x1d, 0x41, 0x68, 0x48, 0x26, 0xc1, 0x68, 0x6c, 0x25,
+ 0xc1, 0x68, 0x90, 0x24, 0xc1, 0x68, 0xb8, 0x23, 0xc1, 0x68, 0xdf, 0x22,
+ 0xc1, 0x69, 0x03, 0x21, 0xc1, 0x69, 0x27, 0x20, 0xc1, 0x69, 0x3f, 0x1f,
+ 0xc1, 0x69, 0x5f, 0x1e, 0xc1, 0x69, 0x7f, 0x1d, 0x41, 0x69, 0x9e, 0x87,
+ 0x08, 0x41, 0x99, 0x8b, 0x08, 0x41, 0xa1, 0x91, 0x08, 0x41, 0xa9, 0x83,
+ 0x08, 0x41, 0x90, 0x83, 0x08, 0x41, 0xb9, 0x87, 0x08, 0x41, 0xc0, 0x83,
+ 0x08, 0x41, 0xe1, 0x91, 0x08, 0x41, 0xf8, 0x83, 0x08, 0x40, 0x29, 0x91,
+ 0x08, 0x40, 0x40, 0x83, 0x08, 0x40, 0x51, 0x87, 0x08, 0x40, 0x59, 0x8b,
+ 0x08, 0x40, 0x61, 0x91, 0x08, 0x40, 0x69, 0x97, 0x08, 0x40, 0x70, 0x83,
+ 0x08, 0x40, 0x79, 0x87, 0x08, 0x40, 0x81, 0x8b, 0x08, 0x40, 0x89, 0x91,
+ 0x08, 0x40, 0x91, 0x97, 0x08, 0x40, 0x98, 0x83, 0x08, 0x40, 0xa1, 0x87,
+ 0x08, 0x40, 0xa9, 0x8b, 0x08, 0x40, 0xb1, 0x91, 0x08, 0x40, 0xb9, 0x97,
+ 0x08, 0x40, 0xc0, 0x83, 0x08, 0x40, 0xc9, 0x87, 0x08, 0x40, 0xd1, 0x8b,
+ 0x08, 0x40, 0xd9, 0x91, 0x08, 0x40, 0xe1, 0x97, 0x08, 0x40, 0xe8, 0x83,
+ 0x08, 0x40, 0xf1, 0x87, 0x08, 0x40, 0xf9, 0x8b, 0x08, 0x41, 0x01, 0x91,
+ 0x08, 0x41, 0x09, 0x97, 0x08, 0x41, 0x10, 0x83, 0x08, 0x41, 0x19, 0x87,
+ 0x08, 0x41, 0x21, 0x8b, 0x08, 0x41, 0x29, 0x91, 0x08, 0x41, 0x31, 0x97,
+ 0x08, 0x41, 0x38, 0x83, 0x08, 0x41, 0x41, 0x87, 0x08, 0x41, 0x49, 0x8b,
+ 0x08, 0x41, 0x51, 0x91, 0x08, 0x41, 0x59, 0x97, 0x08, 0x41, 0x60, 0x83,
+ 0x08, 0x41, 0x69, 0x87, 0x08, 0x41, 0x71, 0x8b, 0x08, 0x41, 0x79, 0x91,
+ 0x08, 0x41, 0x81, 0x97, 0x08, 0x41, 0x88, 0xc2, 0x0a, 0x20, 0x0f, 0xdf,
+ 0x91, 0xc4, 0x05, 0xde, 0x0f, 0xdf, 0x98, 0xc3, 0x08, 0xde, 0x0f, 0xdf,
+ 0xa1, 0xc3, 0x0d, 0x8f, 0x0f, 0xdf, 0xa8, 0xc2, 0x22, 0x45, 0x0f, 0xdf,
+ 0xb1, 0xc4, 0x15, 0xa7, 0x0f, 0xdf, 0xb8, 0xc3, 0xed, 0x65, 0x00, 0x04,
+ 0x79, 0xc3, 0xed, 0x68, 0x00, 0x04, 0x70, 0x47, 0xc7, 0x6e, 0xc1, 0x69,
+ 0xbe, 0x43, 0x00, 0x47, 0xc1, 0x69, 0xca, 0x0e, 0xc1, 0x69, 0xd0, 0xde,
+ 0x0f, 0xd5, 0x01, 0x00, 0xd9, 0xd4, 0x39, 0x1a, 0x00, 0x04, 0xd0, 0x47,
+ 0x37, 0x49, 0xc1, 0x69, 0xda, 0x46, 0x08, 0xd7, 0x41, 0x69, 0xf8, 0xcb,
+ 0x21, 0x1a, 0x00, 0x6c, 0x09, 0x03, 0xc1, 0x6a, 0x16, 0xc9, 0xac, 0xf1,
+ 0x00, 0x6c, 0x18, 0x46, 0x02, 0x00, 0xc1, 0x6a, 0x22, 0x4a, 0x9e, 0x80,
+ 0x41, 0x6a, 0x70, 0xca, 0x6b, 0x09, 0x00, 0x6e, 0x79, 0x0d, 0xc1, 0x6a,
+ 0x94, 0x45, 0x6b, 0x04, 0xc1, 0x6a, 0xa0, 0x42, 0x07, 0x69, 0x41, 0x6a,
+ 0xbe, 0x46, 0x00, 0xf2, 0xc1, 0x6a, 0xca, 0x43, 0x43, 0xb6, 0x41, 0x6a,
+ 0xdc, 0x0b, 0xc1, 0x6a, 0xee, 0xc8, 0x11, 0x48, 0x0e, 0xd4, 0x41, 0x0e,
+ 0xc1, 0x6a, 0xfa, 0x48, 0xbb, 0xdb, 0xc1, 0x6b, 0x06, 0x5c, 0x14, 0x32,
+ 0x41, 0x6b, 0x18, 0x11, 0xc1, 0x6b, 0x27, 0x46, 0x95, 0x69, 0x41, 0x6b,
+ 0x33, 0xc8, 0x50, 0x04, 0x0e, 0xd4, 0x49, 0x48, 0x19, 0x3b, 0xc1, 0x6b,
+ 0x45, 0x47, 0xce, 0x83, 0xc1, 0x6b, 0x51, 0x47, 0xc4, 0x2d, 0xc1, 0x6b,
+ 0x61, 0x46, 0xd5, 0x80, 0x41, 0x6b, 0x6d, 0x46, 0x7d, 0xb2, 0xc1, 0x6b,
+ 0x7f, 0x0b, 0x41, 0x6b, 0x91, 0xe0, 0x03, 0x07, 0x0e, 0xd3, 0xa8, 0x11,
+ 0xc1, 0x6b, 0x9b, 0x07, 0xc1, 0x6b, 0xad, 0x44, 0x1e, 0x65, 0x41, 0x6b,
+ 0xbc, 0xc9, 0xb1, 0x17, 0x0e, 0xd3, 0x61, 0xc3, 0x16, 0x39, 0x0e, 0xd1,
+ 0x81, 0x42, 0x04, 0x30, 0x41, 0x6b, 0xc8, 0x03, 0xc1, 0x6b, 0xe4, 0xc3,
+ 0x00, 0x5c, 0x0e, 0xcf, 0xfa, 0x01, 0x6b, 0xee, 0xc3, 0x66, 0xf6, 0x0e,
+ 0xd3, 0x51, 0x44, 0x14, 0x4a, 0x41, 0x6b, 0xf2, 0x45, 0x52, 0x95, 0xc1,
+ 0x6c, 0x02, 0x44, 0x01, 0xc4, 0x41, 0x6c, 0x1a, 0x45, 0xdb, 0x4e, 0xc1,
+ 0x6c, 0x4e, 0x44, 0xe1, 0x9e, 0x41, 0x6c, 0x5a, 0x44, 0xd8, 0xe0, 0xc1,
+ 0x6c, 0x6c, 0x44, 0x8d, 0xcc, 0x41, 0x6c, 0x78, 0x4f, 0x6a, 0xaa, 0xc1,
+ 0x6c, 0x84, 0x47, 0xcc, 0xd1, 0x41, 0x6c, 0x96, 0xc7, 0x03, 0x28, 0x0e,
+ 0xc8, 0x51, 0xc8, 0x3a, 0x32, 0x0e, 0xc8, 0x49, 0xc6, 0x23, 0x24, 0x0e,
+ 0xc8, 0x40, 0xca, 0x25, 0x5a, 0x01, 0x39, 0xb1, 0xd4, 0x3f, 0xaa, 0x0f,
+ 0xa9, 0x79, 0xcd, 0x0f, 0x50, 0x0f, 0xbe, 0x68, 0x03, 0xc1, 0x6c, 0xbe,
+ 0x91, 0x08, 0xad, 0xd1, 0x87, 0x08, 0xad, 0xc1, 0xc9, 0xb7, 0xd7, 0x08,
+ 0xad, 0xa3, 0x01, 0x6c, 0xd3, 0x97, 0x08, 0xad, 0x93, 0x01, 0x6c, 0xd7,
+ 0x8b, 0x08, 0xad, 0x82, 0x01, 0x6c, 0xdb, 0x83, 0x08, 0xac, 0x03, 0x01,
+ 0x6c, 0xdf, 0x16, 0xc1, 0x6c, 0xf1, 0xc2, 0x01, 0x0e, 0x08, 0xad, 0x71,
+ 0x15, 0xc1, 0x6d, 0x06, 0x18, 0xc1, 0x6d, 0x16, 0xc2, 0x00, 0x96, 0x08,
+ 0xad, 0x49, 0xc2, 0x00, 0x9a, 0x08, 0xad, 0x41, 0xc2, 0x1a, 0x36, 0x08,
+ 0xad, 0x39, 0xc2, 0x00, 0x3f, 0x08, 0xad, 0x31, 0x04, 0xc1, 0x6d, 0x20,
+ 0x12, 0xc1, 0x6d, 0x2a, 0x10, 0xc1, 0x6d, 0x34, 0x06, 0xc1, 0x6d, 0x4a,
+ 0x0c, 0xc1, 0x6d, 0x58, 0x05, 0xc1, 0x6d, 0x62, 0x09, 0xc1, 0x6d, 0x6c,
+ 0x0d, 0xc1, 0x6d, 0x76, 0x91, 0x08, 0xac, 0x61, 0x87, 0x08, 0xac, 0x51,
+ 0x97, 0x08, 0xac, 0x23, 0x01, 0x6d, 0x80, 0x8b, 0x08, 0xac, 0x12, 0x01,
+ 0x6d, 0x84, 0x07, 0xc1, 0x6d, 0x88, 0x44, 0x02, 0xcc, 0x41, 0x6d, 0x94,
+ 0xc3, 0xec, 0x6f, 0x08, 0xae, 0x41, 0xc3, 0xec, 0x6c, 0x08, 0xae, 0x39,
+ 0xc3, 0xd8, 0xbc, 0x08, 0xae, 0x30, 0xcb, 0x91, 0xff, 0x08, 0xae, 0x19,
+ 0xc4, 0x1c, 0xb3, 0x08, 0xae, 0x10, 0xd3, 0x46, 0xcb, 0x0f, 0xad, 0x09,
+ 0xd1, 0x53, 0xde, 0x0f, 0xad, 0x01, 0xd4, 0x01, 0x53, 0x0f, 0xac, 0xd9,
+ 0xd3, 0x42, 0x57, 0x0f, 0xac, 0xd0, 0xd3, 0x46, 0xcb, 0x0f, 0xac, 0xf9,
+ 0xd1, 0x53, 0xde, 0x0f, 0xac, 0xf1, 0xd4, 0x01, 0x53, 0x0f, 0xac, 0xc9,
+ 0xd3, 0x42, 0x57, 0x0f, 0xac, 0xc0, 0x11, 0xc1, 0x6d, 0xb2, 0xcc, 0x86,
+ 0xdc, 0x01, 0x31, 0x51, 0xc6, 0x0e, 0xbc, 0x01, 0x12, 0xd9, 0x45, 0x01,
+ 0xac, 0x41, 0x6d, 0xbe, 0xc4, 0x27, 0x22, 0x00, 0x00, 0x11, 0xc7, 0xc5,
+ 0x68, 0x00, 0x00, 0x09, 0x15, 0xc1, 0x6d, 0xca, 0xce, 0x6e, 0x48, 0x00,
+ 0x04, 0xb1, 0xcc, 0x8f, 0x10, 0x00, 0x04, 0xb0, 0xc4, 0x1e, 0xc2, 0x01,
+ 0x1f, 0x21, 0xc6, 0xd8, 0x32, 0x0f, 0xa6, 0x78, 0xcb, 0x9c, 0x65, 0x0f,
+ 0xde, 0x31, 0xc5, 0x23, 0x8b, 0x0f, 0xde, 0x48, 0xc4, 0x00, 0xcd, 0x0f,
+ 0xde, 0x39, 0xc5, 0x00, 0x47, 0x0f, 0xde, 0x40, 0xcb, 0x21, 0x1a, 0x05,
+ 0x46, 0x29, 0x42, 0x02, 0x52, 0xc1, 0x6d, 0xd6, 0xc8, 0x10, 0xab, 0x05,
+ 0x44, 0x00, 0x03, 0xc1, 0x6d, 0xe2, 0x91, 0x05, 0x46, 0x0b, 0x01, 0x6d,
+ 0xee, 0x87, 0x05, 0x45, 0xf3, 0x01, 0x6d, 0xf2, 0x48, 0xb7, 0xd7, 0xc1,
+ 0x6d, 0xf6, 0x8b, 0x05, 0x45, 0xb3, 0x01, 0x6e, 0x04, 0x97, 0x05, 0x45,
+ 0xc2, 0x01, 0x6e, 0x08, 0x15, 0xc1, 0x6e, 0x0c, 0xc2, 0x01, 0x0e, 0x05,
+ 0x45, 0x91, 0x0e, 0xc1, 0x6e, 0x1c, 0x83, 0x05, 0x44, 0x13, 0x01, 0x6e,
+ 0x26, 0x8b, 0x05, 0x44, 0x23, 0x01, 0x6e, 0x32, 0x97, 0x05, 0x44, 0x33,
+ 0x01, 0x6e, 0x36, 0x18, 0xc1, 0x6e, 0x3a, 0x87, 0x05, 0x44, 0x63, 0x01,
+ 0x6e, 0x44, 0x91, 0x05, 0x44, 0x7b, 0x01, 0x6e, 0x48, 0x0d, 0xc1, 0x6e,
+ 0x4c, 0x09, 0xc1, 0x6e, 0x56, 0x10, 0xc1, 0x6e, 0x60, 0x05, 0xc1, 0x6e,
+ 0x76, 0x0c, 0xc1, 0x6e, 0x80, 0x16, 0xc1, 0x6e, 0x8a, 0x06, 0xc1, 0x6e,
+ 0x98, 0x12, 0xc1, 0x6e, 0xa6, 0x04, 0xc1, 0x6e, 0xb0, 0xc2, 0x00, 0x3f,
+ 0x05, 0x45, 0x51, 0xc2, 0x1a, 0x36, 0x05, 0x45, 0x59, 0xc2, 0x00, 0x9a,
+ 0x05, 0x45, 0x60, 0xc4, 0x1c, 0xb3, 0x05, 0x46, 0x71, 0xcb, 0x91, 0xff,
+ 0x05, 0x46, 0x79, 0x45, 0x08, 0xd8, 0x41, 0x6e, 0xba, 0x47, 0x02, 0x91,
+ 0xc1, 0x6e, 0xde, 0x48, 0xbd, 0x63, 0x41, 0x6e, 0xea, 0x10, 0xc1, 0x6e,
+ 0xf0, 0xc6, 0xd1, 0x5a, 0x00, 0x41, 0xe1, 0xc5, 0xdd, 0xf1, 0x00, 0x41,
+ 0xa1, 0xc5, 0xde, 0x69, 0x00, 0x41, 0x88, 0xcb, 0x95, 0xdd, 0x00, 0x41,
+ 0xe9, 0xc9, 0xac, 0x34, 0x00, 0x41, 0xa8, 0xc3, 0xeb, 0x79, 0x00, 0x41,
+ 0xd1, 0xc4, 0xe5, 0x8b, 0x00, 0x41, 0xc0, 0xc7, 0xcc, 0x92, 0x00, 0x41,
+ 0x69, 0xce, 0x73, 0x26, 0x00, 0x40, 0xd9, 0xc6, 0x65, 0x82, 0x00, 0x40,
+ 0xc9, 0xc9, 0xb3, 0x2a, 0x00, 0x40, 0xc1, 0x0b, 0x41, 0x6e, 0xfc, 0x8b,
+ 0x00, 0x41, 0x41, 0xc7, 0xc7, 0x7c, 0x00, 0x41, 0x21, 0xce, 0x73, 0x26,
+ 0x00, 0x40, 0xd0, 0xc4, 0xdd, 0x8e, 0x00, 0x41, 0x61, 0xc6, 0xc7, 0x7d,
+ 0x00, 0x41, 0x28, 0xc9, 0xb2, 0xa3, 0x00, 0x41, 0x0a, 0x01, 0x6f, 0x06,
+ 0x8b, 0x00, 0x41, 0x49, 0x97, 0x00, 0x41, 0x31, 0x83, 0x00, 0x41, 0x13,
+ 0x01, 0x6f, 0x0a, 0x87, 0x00, 0x40, 0xe0, 0x83, 0x00, 0x41, 0x00, 0xc3,
+ 0x4d, 0x26, 0x00, 0x40, 0xa9, 0xc6, 0xd0, 0x22, 0x00, 0x40, 0x89, 0xc2,
+ 0x01, 0x33, 0x00, 0x40, 0x40, 0xc3, 0x02, 0x33, 0x00, 0x40, 0xa1, 0xc6,
+ 0xd4, 0x1e, 0x00, 0x40, 0x70, 0x90, 0x00, 0x40, 0x79, 0x96, 0x00, 0x40,
+ 0x39, 0x9b, 0x00, 0x40, 0x20, 0xc2, 0x0e, 0x30, 0x00, 0x40, 0x29, 0xc2,
+ 0x01, 0x33, 0x00, 0x40, 0x08, 0xc3, 0x00, 0x3a, 0x01, 0x52, 0xc1, 0xc2,
+ 0x00, 0xff, 0x01, 0x52, 0xb8, 0xc6, 0x01, 0xb1, 0x0f, 0xa5, 0x21, 0xc4,
+ 0x01, 0xa7, 0x0f, 0xb1, 0xa1, 0xcd, 0x7b, 0xf7, 0x0f, 0xb6, 0x60, 0xc9,
+ 0x09, 0xde, 0x01, 0x54, 0xab, 0x01, 0x6f, 0x0e, 0xcc, 0x06, 0xfb, 0x01,
+ 0x54, 0xb2, 0x01, 0x6f, 0x14, 0xc9, 0xb5, 0x85, 0x01, 0x5a, 0xd1, 0xcd,
+ 0x7a, 0x23, 0x01, 0x5a, 0xe0, 0x15, 0xc1, 0x6f, 0x1a, 0xd1, 0x56, 0x97,
+ 0x08, 0x8e, 0xe9, 0xca, 0x58, 0xd5, 0x08, 0x8e, 0xe1, 0x07, 0xc1, 0x6f,
+ 0x30, 0x06, 0xc1, 0x6f, 0x3c, 0x46, 0x33, 0x45, 0xc1, 0x6f, 0x4e, 0xd1,
+ 0x56, 0x0f, 0x08, 0x8e, 0x39, 0xc2, 0x00, 0xeb, 0x08, 0x8e, 0x21, 0x47,
+ 0x01, 0xff, 0x41, 0x6f, 0x5a, 0xc4, 0xe9, 0x2f, 0x08, 0x22, 0x81, 0x16,
+ 0xc1, 0x6f, 0xbf, 0xc4, 0xe5, 0x3f, 0x08, 0x22, 0x91, 0xc3, 0x1b, 0xa7,
+ 0x08, 0x22, 0x99, 0x15, 0xc1, 0x6f, 0xc9, 0xc6, 0xd6, 0xac, 0x08, 0x22,
+ 0xb9, 0x42, 0x04, 0x30, 0xc1, 0x6f, 0xd3, 0x0a, 0xc1, 0x6f, 0xdb, 0xc3,
+ 0xeb, 0xe8, 0x08, 0x22, 0xd1, 0xc4, 0xe7, 0x7b, 0x08, 0x22, 0xd9, 0xc3,
+ 0xa0, 0x2e, 0x08, 0x22, 0xe1, 0xc3, 0x33, 0x45, 0x08, 0x22, 0xe9, 0xc3,
+ 0xeb, 0x0d, 0x08, 0x22, 0xf9, 0x0f, 0xc1, 0x6f, 0xe7, 0xc5, 0xdd, 0x74,
+ 0x08, 0x23, 0x09, 0x42, 0x0a, 0x20, 0xc1, 0x6f, 0xf3, 0xc4, 0xe6, 0x3f,
+ 0x08, 0x23, 0x21, 0x0b, 0xc1, 0x6f, 0xfd, 0x07, 0xc1, 0x70, 0x0d, 0x03,
+ 0xc1, 0x70, 0x1d, 0x11, 0xc1, 0x70, 0x43, 0xc4, 0xe5, 0x1f, 0x08, 0x23,
+ 0x71, 0xc3, 0x21, 0x00, 0x08, 0x23, 0x79, 0xc2, 0x02, 0x6a, 0x08, 0x23,
+ 0x98, 0xc7, 0xc7, 0x59, 0x0d, 0xe5, 0x19, 0xc9, 0xb6, 0x15, 0x0d, 0xe5,
+ 0x11, 0xd2, 0x4d, 0x5e, 0x0d, 0xe5, 0x09, 0xce, 0x72, 0x1c, 0x0d, 0xe5,
+ 0x00, 0x46, 0x00, 0x47, 0xc1, 0x70, 0x63, 0xc9, 0xb7, 0xaa, 0x01, 0x56,
+ 0xf1, 0xc9, 0x32, 0x5f, 0x01, 0x56, 0xfb, 0x01, 0x70, 0x69, 0xc7, 0xc7,
+ 0x52, 0x01, 0x57, 0x03, 0x01, 0x70, 0x6f, 0xd3, 0x42, 0x31, 0x01, 0x5a,
+ 0x71, 0x04, 0x41, 0x70, 0x73, 0x91, 0x01, 0x09, 0xa1, 0x87, 0x01, 0x09,
+ 0x79, 0x8e, 0x01, 0x08, 0x99, 0x89, 0x01, 0x08, 0x50, 0x8f, 0x01, 0x09,
+ 0x99, 0x88, 0x01, 0x09, 0x89, 0x87, 0x01, 0x09, 0x81, 0x84, 0x01, 0x09,
+ 0x61, 0x94, 0x01, 0x08, 0xd9, 0x92, 0x01, 0x08, 0xc1, 0x8e, 0x01, 0x08,
+ 0x91, 0x8b, 0x01, 0x08, 0x81, 0x8a, 0x01, 0x08, 0x58, 0xd0, 0x60, 0xaf,
+ 0x0f, 0xc2, 0xb9, 0xcc, 0x82, 0x80, 0x01, 0x0e, 0xc9, 0xc5, 0x00, 0x62,
+ 0x01, 0x0c, 0xcb, 0x01, 0x70, 0x7f, 0x49, 0x01, 0x59, 0xc1, 0x70, 0x83,
+ 0xcb, 0x09, 0xfc, 0x01, 0x58, 0x19, 0xcb, 0x99, 0xe7, 0x01, 0x58, 0x59,
+ 0xd5, 0x00, 0x52, 0x01, 0x5b, 0x4a, 0x01, 0x70, 0x95, 0xd0, 0x60, 0xaf,
+ 0x0f, 0xc2, 0xb1, 0xc5, 0x00, 0x62, 0x01, 0x0c, 0xc3, 0x01, 0x70, 0x9b,
+ 0xcc, 0x82, 0x80, 0x01, 0x0e, 0xc1, 0x49, 0x01, 0x59, 0xc1, 0x70, 0x9f,
+ 0xcb, 0x09, 0xfc, 0x01, 0x58, 0x11, 0xcb, 0x99, 0xe7, 0x01, 0x58, 0x51,
+ 0xd5, 0x00, 0x52, 0x01, 0x5b, 0x42, 0x01, 0x70, 0xb1, 0xc5, 0x82, 0xdb,
+ 0x08, 0xd4, 0xf9, 0xcc, 0x82, 0xd4, 0x08, 0xd4, 0xf0, 0xc7, 0x45, 0xcd,
+ 0x08, 0xd4, 0xb9, 0xc8, 0x10, 0xab, 0x08, 0xd4, 0xb1, 0xcb, 0x9c, 0xb2,
+ 0x08, 0xd4, 0x29, 0xcb, 0x93, 0xc2, 0x08, 0xd4, 0x20, 0x8a, 0x08, 0xd4,
+ 0x98, 0x89, 0x08, 0xd4, 0x60, 0x83, 0x08, 0xd4, 0x49, 0xc2, 0x01, 0x0e,
+ 0x08, 0xd4, 0x40, 0xc3, 0x1d, 0x55, 0x08, 0xd4, 0x19, 0xc2, 0x01, 0x0e,
+ 0x08, 0xd2, 0xe9, 0x83, 0x08, 0xd2, 0xe0, 0x83, 0x08, 0xd4, 0x09, 0xc2,
+ 0x0e, 0xe5, 0x08, 0xd4, 0x01, 0xc2, 0x01, 0x0e, 0x08, 0xd3, 0xf8, 0x83,
+ 0x08, 0xd3, 0xc9, 0xc2, 0x01, 0x0e, 0x08, 0xd3, 0xc0, 0xc2, 0x05, 0x5c,
+ 0x08, 0xd3, 0xb9, 0xc2, 0x01, 0x0e, 0x08, 0xd3, 0x71, 0x83, 0x08, 0xd3,
+ 0x69, 0x06, 0x41, 0x70, 0xb7, 0x15, 0xc1, 0x70, 0xc1, 0xc2, 0x01, 0x0e,
+ 0x08, 0xd3, 0x61, 0x83, 0x08, 0xd3, 0x59, 0x16, 0x41, 0x70, 0xcb, 0xc2,
+ 0x01, 0x0e, 0x08, 0xd3, 0x99, 0x83, 0x08, 0xd3, 0x90, 0xc2, 0x01, 0x0e,
+ 0x08, 0xd3, 0x89, 0x83, 0x08, 0xd3, 0x80, 0x83, 0x08, 0xd3, 0x79, 0xc2,
+ 0x01, 0x01, 0x08, 0xd3, 0x51, 0xc2, 0x1a, 0x36, 0x08, 0xd3, 0x29, 0xc2,
+ 0x07, 0x69, 0x08, 0xd3, 0x00, 0xc2, 0x01, 0x0e, 0x08, 0xd3, 0x21, 0x83,
+ 0x08, 0xd3, 0x18, 0xc2, 0x01, 0x0e, 0x08, 0xd3, 0x11, 0x83, 0x08, 0xd3,
+ 0x08, 0xc2, 0x01, 0x0e, 0x08, 0xd2, 0xf9, 0x83, 0x08, 0xd2, 0xf0, 0x48,
+ 0xb7, 0xd7, 0xc1, 0x70, 0xd5, 0x03, 0xc1, 0x70, 0xdd, 0x91, 0x08, 0xd2,
+ 0xab, 0x01, 0x70, 0xe5, 0x87, 0x08, 0xd2, 0xa1, 0x97, 0x08, 0xd2, 0x9b,
+ 0x01, 0x70, 0xe9, 0x8b, 0x08, 0xd2, 0x88, 0xc4, 0x15, 0xa7, 0x08, 0x87,
+ 0xb9, 0xc2, 0x22, 0x45, 0x08, 0x87, 0xb0, 0xc3, 0x0d, 0x8f, 0x08, 0x87,
+ 0xa9, 0xc3, 0x08, 0xde, 0x08, 0x87, 0xa0, 0xc4, 0x05, 0xde, 0x08, 0x87,
+ 0x99, 0xc2, 0x0a, 0x20, 0x08, 0x87, 0x90, 0x87, 0x08, 0x87, 0x41, 0x8a,
+ 0x08, 0x86, 0xb0, 0x8a, 0x08, 0x87, 0x39, 0xc2, 0x08, 0x86, 0x08, 0x87,
+ 0x18, 0xc3, 0x45, 0x00, 0x08, 0x87, 0x09, 0xc2, 0x00, 0x45, 0x08, 0x86,
+ 0xc9, 0xc3, 0xb2, 0x3a, 0x08, 0x86, 0xb8, 0xd1, 0x50, 0x9d, 0x08, 0x7a,
+ 0xc1, 0xcd, 0x80, 0xd7, 0x08, 0x7a, 0xaa, 0x01, 0x70, 0xed, 0xc8, 0x0d,
+ 0x7e, 0x08, 0x7a, 0xa0, 0xc5, 0x25, 0x27, 0x08, 0x7a, 0x99, 0xc2, 0x01,
+ 0x04, 0x08, 0x7a, 0x90, 0xc5, 0x00, 0x34, 0x08, 0x7a, 0x69, 0xc5, 0x03,
+ 0x50, 0x08, 0x7a, 0x60, 0xc5, 0x00, 0x34, 0x08, 0x7a, 0x59, 0xc5, 0x03,
+ 0x50, 0x08, 0x7a, 0x50, 0xc5, 0x03, 0x50, 0x08, 0x7a, 0x49, 0xc5, 0x00,
+ 0x34, 0x08, 0x7a, 0x38, 0xc5, 0x03, 0x50, 0x08, 0x7a, 0x41, 0xc5, 0x00,
+ 0x34, 0x08, 0x7a, 0x30, 0xc3, 0x30, 0xe0, 0x08, 0x7a, 0x21, 0xc5, 0xd6,
+ 0xb9, 0x08, 0x79, 0xc8, 0xc3, 0x11, 0x40, 0x08, 0x7a, 0x09, 0x03, 0x41,
+ 0x70, 0xf3, 0xc3, 0x18, 0x7a, 0x08, 0x79, 0xe9, 0xc4, 0x32, 0xac, 0x08,
+ 0x79, 0x80, 0xc2, 0x01, 0x47, 0x08, 0x79, 0xb0, 0x16, 0xc1, 0x70, 0xff,
+ 0x08, 0xc1, 0x71, 0x11, 0x19, 0xc1, 0x71, 0x19, 0x0e, 0xc1, 0x71, 0x29,
+ 0x11, 0xc1, 0x71, 0x3f, 0x0b, 0xc1, 0x71, 0x58, 0x05, 0xc1, 0x71, 0x6c,
+ 0x14, 0xc1, 0x71, 0x92, 0x0a, 0xc1, 0x71, 0xad, 0x06, 0xc1, 0x71, 0xd5,
+ 0x12, 0xc1, 0x71, 0xfb, 0x07, 0xc1, 0x72, 0x34, 0x03, 0xc1, 0x72, 0x48,
+ 0xc3, 0xe4, 0xb3, 0x01, 0x98, 0x31, 0x0d, 0xc1, 0x72, 0x6e, 0x09, 0xc1,
+ 0x72, 0xcf, 0x15, 0xc1, 0x72, 0xf4, 0x10, 0xc1, 0x73, 0x0c, 0x04, 0xc1,
+ 0x73, 0x2d, 0x0f, 0xc1, 0x73, 0x4d, 0x1b, 0xc1, 0x73, 0xa0, 0xc8, 0xb9,
+ 0xe3, 0x01, 0x9e, 0xf0, 0x0e, 0xc1, 0x73, 0xac, 0x15, 0xc1, 0x73, 0xb6,
+ 0x0d, 0xc1, 0x73, 0xe6, 0xcc, 0x82, 0x68, 0x01, 0x15, 0x09, 0x16, 0xc1,
+ 0x73, 0xf2, 0x0f, 0xc1, 0x74, 0x02, 0x12, 0xc1, 0x74, 0x0c, 0x05, 0xc1,
+ 0x74, 0x18, 0x18, 0xc1, 0x74, 0x28, 0x17, 0xc1, 0x74, 0x32, 0x0a, 0xc1,
+ 0x74, 0x3e, 0x11, 0xc1, 0x74, 0x52, 0x08, 0xc1, 0x74, 0x5c, 0xc7, 0xc9,
+ 0x97, 0x0f, 0x8c, 0xf9, 0x10, 0xc1, 0x74, 0x74, 0xc2, 0x01, 0x8d, 0x0f,
+ 0x8c, 0xa1, 0xc8, 0x0a, 0x5f, 0x01, 0x4e, 0x31, 0xd5, 0x38, 0x11, 0x01,
+ 0x4e, 0x21, 0xc2, 0x18, 0xc3, 0x0f, 0x8a, 0x78, 0xc9, 0xb6, 0x5d, 0x01,
+ 0x20, 0xd3, 0x01, 0x74, 0x7e, 0xc4, 0x45, 0xaa, 0x01, 0x21, 0x01, 0xcf,
+ 0x68, 0x9d, 0x01, 0x20, 0xb1, 0x45, 0x9d, 0x3b, 0xc1, 0x74, 0x84, 0x48,
+ 0x43, 0xad, 0xc1, 0x74, 0x90, 0xcf, 0x67, 0x9e, 0x01, 0x0a, 0x78, 0x07,
+ 0xc1, 0x74, 0x9c, 0xcf, 0x63, 0xa2, 0x01, 0x20, 0x80, 0x07, 0xc1, 0x74,
+ 0xab, 0xc3, 0x11, 0x48, 0x01, 0x20, 0x00, 0xcd, 0x7f, 0xe0, 0x01, 0x20,
+ 0xe1, 0xc8, 0xb9, 0x63, 0x01, 0x20, 0x60, 0xc5, 0x63, 0xac, 0x01, 0x20,
+ 0xd9, 0x10, 0x41, 0x74, 0xb7, 0xc4, 0x24, 0x93, 0x01, 0x20, 0xc1, 0xcd,
+ 0x7b, 0xdd, 0x01, 0x20, 0x68, 0xc8, 0xb9, 0xeb, 0x01, 0x20, 0x41, 0xc3,
+ 0x0a, 0x59, 0x01, 0x20, 0x38, 0x0f, 0xc1, 0x74, 0xc3, 0xc2, 0x03, 0x07,
+ 0x00, 0x39, 0x33, 0x01, 0x74, 0xcf, 0x16, 0xc1, 0x74, 0xd5, 0x15, 0xc1,
+ 0x74, 0xe4, 0x14, 0xc1, 0x75, 0x02, 0xc4, 0xc5, 0x85, 0x00, 0x39, 0x49,
+ 0x87, 0x00, 0x39, 0x29, 0xcd, 0x7a, 0xd9, 0x00, 0x39, 0x21, 0xc3, 0x21,
+ 0x00, 0x00, 0x39, 0x11, 0xc6, 0xd7, 0x18, 0x00, 0x39, 0x01, 0xc4, 0xe5,
+ 0x53, 0x00, 0x38, 0xf9, 0xc4, 0xe4, 0xb7, 0x00, 0x38, 0xeb, 0x01, 0x75,
+ 0x0e, 0xc2, 0x00, 0x5b, 0x00, 0x38, 0xbb, 0x01, 0x75, 0x14, 0xc4, 0x65,
+ 0x55, 0x00, 0x38, 0xc9, 0xc3, 0x7c, 0xad, 0x00, 0x38, 0xc1, 0x06, 0xc1,
+ 0x75, 0x1a, 0xc5, 0xde, 0xc8, 0x00, 0x38, 0x9b, 0x01, 0x75, 0x26, 0xc4,
+ 0xe5, 0xaf, 0x00, 0x38, 0x91, 0xc5, 0x5d, 0xea, 0x00, 0x38, 0x80, 0x43,
+ 0x4e, 0x12, 0xc1, 0x75, 0x2c, 0x48, 0xbc, 0x0b, 0xc1, 0x75, 0x38, 0xcf,
+ 0x66, 0xf9, 0x00, 0x38, 0x28, 0xc7, 0x08, 0x0b, 0x00, 0x39, 0xc9, 0xca,
+ 0x03, 0x76, 0x00, 0x39, 0xc0, 0x45, 0xdb, 0xda, 0xc1, 0x75, 0x4a, 0xc4,
+ 0xe4, 0x23, 0x00, 0x39, 0xf9, 0xc7, 0xca, 0x15, 0x00, 0x3a, 0x10, 0xc6,
+ 0x19, 0x80, 0x00, 0x39, 0xa9, 0xc5, 0x00, 0x34, 0x00, 0x39, 0xa1, 0xc5,
+ 0x03, 0x50, 0x00, 0x39, 0x98, 0xc6, 0x19, 0x80, 0x00, 0x39, 0x91, 0xc5,
+ 0x00, 0x34, 0x00, 0x39, 0x89, 0xc5, 0x03, 0x50, 0x00, 0x39, 0x80, 0xc9,
+ 0xae, 0x59, 0x00, 0x38, 0x51, 0x4b, 0x93, 0x33, 0x41, 0x75, 0x56, 0x48,
+ 0xc3, 0x03, 0xc1, 0x75, 0x62, 0x4a, 0xa5, 0x7e, 0x41, 0x75, 0x71, 0xcf,
+ 0x69, 0x60, 0x00, 0x38, 0x01, 0x45, 0x75, 0x59, 0x41, 0x75, 0x80, 0x51,
+ 0x58, 0x0d, 0xc1, 0x75, 0x8c, 0x4a, 0x0d, 0xe6, 0x41, 0x75, 0x98, 0xc5,
+ 0x03, 0x50, 0x00, 0x3a, 0x39, 0xc5, 0x00, 0x34, 0x00, 0x3a, 0x40, 0x91,
+ 0x05, 0x40, 0x39, 0xc2, 0x03, 0x76, 0x05, 0x40, 0x40, 0x91, 0x05, 0x40,
+ 0x49, 0xc2, 0x03, 0x76, 0x05, 0x40, 0x50, 0x91, 0x05, 0x40, 0x61, 0xc2,
+ 0x03, 0x76, 0x05, 0x40, 0x68, 0x16, 0xc1, 0x75, 0xa4, 0x91, 0x05, 0x40,
+ 0xa1, 0xc2, 0x03, 0x76, 0x05, 0x40, 0xa8, 0x06, 0xc1, 0x75, 0xae, 0x91,
+ 0x05, 0x40, 0xb1, 0xc2, 0x03, 0x76, 0x05, 0x40, 0xb8, 0x91, 0x05, 0x40,
+ 0x71, 0xc2, 0x03, 0x76, 0x05, 0x40, 0x78, 0x91, 0x05, 0x40, 0xc9, 0xc2,
+ 0x03, 0x76, 0x05, 0x40, 0xd0, 0x91, 0x05, 0x40, 0xd9, 0xc2, 0x03, 0x76,
+ 0x05, 0x40, 0xe0, 0x91, 0x05, 0x40, 0xf1, 0xc2, 0x00, 0x64, 0x05, 0x41,
+ 0x00, 0xc7, 0x10, 0xac, 0x05, 0x40, 0x59, 0xd0, 0x60, 0x5f, 0x05, 0x41,
+ 0x60, 0xd6, 0x2d, 0xc9, 0x0f, 0xaf, 0x19, 0xc2, 0x00, 0x2a, 0x0f, 0xa8,
+ 0x42, 0x01, 0x75, 0xb8, 0x95, 0x01, 0x39, 0x40, 0xd1, 0x52, 0x24, 0x01,
+ 0x3e, 0x49, 0xc2, 0x03, 0x73, 0x01, 0x14, 0x1b, 0x01, 0x75, 0xbe, 0x46,
+ 0x03, 0x50, 0xc1, 0x75, 0xc2, 0x45, 0x01, 0xac, 0xc1, 0x75, 0xce, 0x47,
+ 0x14, 0x16, 0x41, 0x75, 0xe0, 0x0e, 0xc1, 0x75, 0xec, 0xd1, 0x1c, 0xda,
+ 0x01, 0x03, 0xf1, 0x07, 0xc1, 0x75, 0xf8, 0xc5, 0x1c, 0xa1, 0x01, 0x03,
+ 0xd9, 0xc9, 0x65, 0xfa, 0x01, 0x03, 0xd1, 0xc4, 0x24, 0x35, 0x01, 0x03,
+ 0xc9, 0x15, 0xc1, 0x76, 0x04, 0x08, 0xc1, 0x76, 0x10, 0xc4, 0x16, 0x57,
+ 0x01, 0x03, 0x81, 0x16, 0xc1, 0x76, 0x1c, 0xc3, 0x05, 0x17, 0x00, 0x05,
+ 0xc8, 0xca, 0x9c, 0xdc, 0x00, 0xe6, 0x39, 0xca, 0x9f, 0xa2, 0x00, 0xe6,
+ 0x31, 0xca, 0x9d, 0xd6, 0x00, 0xe6, 0x29, 0xcb, 0x94, 0xbf, 0x00, 0xe6,
+ 0x21, 0xc5, 0xdf, 0x68, 0x00, 0xe6, 0x19, 0x12, 0xc1, 0x76, 0x28, 0xc5,
+ 0xdd, 0x97, 0x00, 0xe6, 0x00, 0x08, 0xc1, 0x76, 0x34, 0x83, 0x00, 0xdc,
+ 0x1b, 0x01, 0x76, 0x3e, 0x04, 0xc1, 0x76, 0x48, 0x0e, 0xc1, 0x76, 0x52,
+ 0x14, 0xc1, 0x76, 0x5c, 0x15, 0xc1, 0x76, 0x66, 0x0d, 0xc1, 0x76, 0x70,
+ 0xc2, 0x01, 0x0e, 0x00, 0xdd, 0x01, 0xc2, 0x23, 0xe3, 0x00, 0xdc, 0xf9,
+ 0xc2, 0x00, 0x4c, 0x00, 0xdc, 0xe9, 0xc2, 0x1a, 0x36, 0x00, 0xdc, 0xd1,
+ 0xc2, 0x00, 0x3f, 0x00, 0xdc, 0xc9, 0xc2, 0x06, 0x8c, 0x00, 0xdc, 0xb9,
+ 0xc2, 0x07, 0x44, 0x00, 0xdc, 0xa9, 0x10, 0xc1, 0x76, 0x7a, 0xc2, 0x0c,
+ 0x25, 0x00, 0xdc, 0x99, 0xc2, 0x00, 0x44, 0x00, 0xdc, 0x91, 0xc2, 0x05,
+ 0x5c, 0x00, 0xdc, 0x81, 0xc2, 0x26, 0x94, 0x00, 0xdc, 0x79, 0xc2, 0x00,
+ 0x2e, 0x00, 0xdc, 0x71, 0xc2, 0x07, 0x69, 0x00, 0xdc, 0x61, 0xc2, 0x0e,
+ 0x13, 0x00, 0xdc, 0x59, 0x87, 0x00, 0xdc, 0x43, 0x01, 0x76, 0x8a, 0x91,
+ 0x00, 0xdc, 0x39, 0x97, 0x00, 0xdc, 0x29, 0x8b, 0x00, 0xdc, 0x20, 0xc4,
+ 0x24, 0x35, 0x00, 0xdd, 0xc9, 0xc5, 0x05, 0x1b, 0x00, 0xdd, 0xc1, 0x15,
+ 0xc1, 0x76, 0x8e, 0x08, 0xc1, 0x76, 0x9a, 0x16, 0xc1, 0x76, 0xa6, 0xc3,
+ 0x05, 0x17, 0x00, 0xdd, 0x89, 0xc4, 0x16, 0x57, 0x00, 0xdd, 0x80, 0x47,
+ 0xcb, 0x9d, 0xc1, 0x76, 0xb2, 0x42, 0x18, 0x79, 0xc1, 0x76, 0xbe, 0xc7,
+ 0xc4, 0x73, 0x00, 0xdd, 0x08, 0xc6, 0x21, 0x26, 0x00, 0xdd, 0x59, 0x42,
+ 0x07, 0x44, 0x41, 0x76, 0xca, 0x10, 0xc1, 0x76, 0xd4, 0xc5, 0xe3, 0xbe,
+ 0x00, 0xdd, 0x40, 0xca, 0x37, 0x20, 0x01, 0x13, 0xf9, 0xc5, 0x09, 0x02,
+ 0x01, 0x13, 0xe8, 0x4c, 0x23, 0x24, 0xc1, 0x76, 0xf2, 0xcb, 0x0e, 0xbc,
+ 0x01, 0x55, 0xa1, 0x44, 0x1f, 0xeb, 0xc1, 0x76, 0xfe, 0xcf, 0x66, 0x45,
+ 0x01, 0x55, 0xc0, 0x00, 0x41, 0x77, 0x0a, 0xd0, 0x01, 0x37, 0x01, 0x4b,
+ 0xc9, 0x42, 0x06, 0x82, 0x41, 0x77, 0x1f, 0xc3, 0x05, 0xe3, 0x01, 0x55,
+ 0xe9, 0xcf, 0x65, 0xfa, 0x01, 0x55, 0xf9, 0xd9, 0x20, 0x32, 0x01, 0x56,
+ 0x08, 0xca, 0x0e, 0xbd, 0x01, 0x04, 0x61, 0xc4, 0x00, 0x48, 0x01, 0x04,
+ 0x40, 0xc4, 0x15, 0xa7, 0x01, 0x04, 0x39, 0xc2, 0x22, 0x45, 0x01, 0x04,
+ 0x30, 0xc3, 0x0d, 0x8f, 0x01, 0x04, 0x29, 0xc3, 0x08, 0xde, 0x01, 0x04,
+ 0x20, 0xc4, 0x05, 0xde, 0x01, 0x04, 0x19, 0xc2, 0x0a, 0x20, 0x01, 0x04,
+ 0x10, 0x4a, 0x01, 0xa7, 0xc1, 0x77, 0x2b, 0x4e, 0x1a, 0x50, 0x41, 0x77,
+ 0x42, 0x42, 0x00, 0xea, 0xc1, 0x77, 0x4e, 0x07, 0xc1, 0x77, 0x60, 0x14,
+ 0xc1, 0x77, 0x7b, 0x16, 0xc1, 0x77, 0x8d, 0xcc, 0x8d, 0xb4, 0x0f, 0xa9,
+ 0xc9, 0xce, 0x6f, 0x44, 0x0f, 0xa9, 0xc1, 0xd1, 0x52, 0xac, 0x01, 0x53,
+ 0x09, 0x03, 0xc1, 0x77, 0x99, 0xd1, 0x50, 0x7b, 0x07, 0xf2, 0x89, 0xc9,
+ 0x11, 0x47, 0x07, 0xf2, 0x91, 0xc9, 0xac, 0x22, 0x07, 0xf2, 0xa1, 0xcd,
+ 0x2d, 0xa6, 0x07, 0xf2, 0xb1, 0x42, 0x00, 0x58, 0xc1, 0x77, 0xab, 0xcb,
+ 0x96, 0x82, 0x07, 0xf2, 0xf9, 0x12, 0xc1, 0x77, 0xb7, 0xcc, 0x8d, 0x0c,
+ 0x07, 0xf3, 0x19, 0xd1, 0x57, 0x0e, 0x07, 0xf3, 0x29, 0xcb, 0x9b, 0x9f,
+ 0x07, 0xf3, 0x48, 0xcc, 0x25, 0x70, 0x01, 0x55, 0x60, 0xc3, 0x82, 0xa4,
+ 0x00, 0x04, 0x91, 0x42, 0x00, 0xc0, 0xc1, 0x77, 0xc3, 0xc3, 0x83, 0x28,
+ 0x00, 0x04, 0x88, 0xce, 0x54, 0x14, 0x01, 0x1c, 0xc9, 0xc2, 0x00, 0x2a,
+ 0x0f, 0xad, 0x42, 0x01, 0x77, 0xcf, 0xc2, 0x03, 0x48, 0x0f, 0xa3, 0xc0,
+ 0xc5, 0x09, 0x02, 0x01, 0x10, 0xe8, 0xd5, 0x37, 0x15, 0x01, 0x17, 0x41,
+ 0xce, 0x70, 0x40, 0x01, 0x15, 0x81, 0x46, 0x25, 0x71, 0xc1, 0x77, 0xd5,
+ 0x46, 0x03, 0x50, 0x41, 0x77, 0xe1, 0x42, 0x00, 0xea, 0xc1, 0x77, 0xf9,
+ 0xc9, 0xac, 0x22, 0x07, 0xf0, 0xa1, 0x07, 0xc1, 0x78, 0x05, 0xcd, 0x2d,
+ 0xa6, 0x07, 0xf0, 0xb1, 0xd3, 0x23, 0xa1, 0x07, 0xf0, 0xc9, 0xce, 0x71,
+ 0xe4, 0x07, 0xf1, 0x81, 0xcd, 0x7c, 0x86, 0x07, 0xf1, 0xa1, 0x0e, 0xc1,
+ 0x78, 0x17, 0x46, 0x01, 0x17, 0xc1, 0x78, 0x23, 0x4c, 0x1b, 0x6e, 0x41,
+ 0x78, 0x51, 0xcd, 0x7e, 0x67, 0x01, 0x18, 0xc1, 0xc7, 0xcd, 0x5d, 0x0f,
+ 0xb6, 0x80, 0x04, 0xc1, 0x78, 0x5d, 0x47, 0x75, 0xd5, 0xc1, 0x78, 0x69,
+ 0x16, 0xc1, 0x78, 0x81, 0x08, 0xc1, 0x78, 0x99, 0x15, 0xc1, 0x78, 0xa3,
+ 0x49, 0xb4, 0x53, 0xc1, 0x78, 0xaf, 0x48, 0xbf, 0x03, 0xc1, 0x78, 0xc7,
+ 0x48, 0xbe, 0x33, 0xc1, 0x78, 0xdf, 0x0d, 0xc1, 0x78, 0xf7, 0x49, 0xad,
+ 0xae, 0xc1, 0x79, 0x03, 0xc9, 0xb5, 0x34, 0x0f, 0x85, 0xf9, 0xcb, 0x9a,
+ 0xc3, 0x0f, 0x86, 0xf8, 0x16, 0xc1, 0x79, 0x1b, 0x08, 0x41, 0x79, 0x27,
+ 0x43, 0x01, 0xf4, 0xc1, 0x79, 0x33, 0x45, 0x01, 0xac, 0xc1, 0x79, 0x45,
+ 0xd1, 0x0e, 0xb6, 0x01, 0x53, 0x90, 0x46, 0x07, 0x91, 0xc1, 0x79, 0x61,
+ 0xc9, 0xb1, 0x7a, 0x0f, 0xa6, 0x20, 0x00, 0xc1, 0x79, 0x6d, 0xd8, 0x21,
+ 0x74, 0x01, 0x33, 0xe8, 0x4d, 0x27, 0x71, 0xc1, 0x79, 0x79, 0x4f, 0x01,
+ 0xf7, 0x41, 0x79, 0xe1, 0x16, 0xc1, 0x7a, 0x49, 0xc8, 0x4c, 0xbc, 0x01,
+ 0x24, 0x31, 0x07, 0xc1, 0x7a, 0x5b, 0x15, 0xc1, 0x7a, 0x67, 0x08, 0x41,
+ 0x7a, 0x73, 0xc4, 0x24, 0x35, 0x01, 0x23, 0xe1, 0xc5, 0x05, 0x1b, 0x01,
+ 0x23, 0xd9, 0x15, 0xc1, 0x7a, 0x7f, 0x08, 0xc1, 0x7a, 0x8b, 0x16, 0xc1,
+ 0x7a, 0x97, 0xc3, 0x05, 0x17, 0x01, 0x23, 0xa0, 0x0d, 0xc1, 0x7a, 0xa3,
+ 0xc5, 0xc4, 0x44, 0x01, 0x90, 0x0b, 0x01, 0x7a, 0xb5, 0x16, 0xc1, 0x7a,
+ 0xbb, 0xc5, 0xc3, 0xd4, 0x01, 0x90, 0x1b, 0x01, 0x7a, 0xcd, 0xc5, 0xc4,
+ 0x7c, 0x01, 0x90, 0x23, 0x01, 0x7a, 0xd3, 0x12, 0xc1, 0x7a, 0xd9, 0xc4,
+ 0x95, 0xb8, 0x01, 0x90, 0x33, 0x01, 0x7a, 0xeb, 0xc5, 0xc2, 0x9e, 0x01,
+ 0x90, 0x3b, 0x01, 0x7a, 0xf1, 0x05, 0xc1, 0x7a, 0xf7, 0xc5, 0x7b, 0x22,
+ 0x01, 0x90, 0x6a, 0x01, 0x7b, 0x09, 0xc4, 0xaa, 0x23, 0x01, 0x90, 0xe9,
+ 0xc3, 0x0d, 0x7e, 0x01, 0x90, 0xf0, 0xc3, 0x05, 0x17, 0x01, 0x91, 0x01,
+ 0x16, 0xc1, 0x7b, 0x0f, 0x08, 0xc1, 0x7b, 0x21, 0x15, 0xc1, 0x7b, 0x31,
+ 0x07, 0xc1, 0x7b, 0x4f, 0x10, 0xc1, 0x7b, 0x61, 0x0f, 0xc1, 0x7b, 0x6d,
+ 0x19, 0xc1, 0x7b, 0x79, 0xc4, 0xe6, 0x8b, 0x01, 0x91, 0x91, 0x05, 0xc1,
+ 0x7b, 0x85, 0xc5, 0xdc, 0xed, 0x01, 0x91, 0xc1, 0x42, 0x01, 0xa5, 0xc1,
+ 0x7b, 0x91, 0xc8, 0xbb, 0xbb, 0x01, 0x91, 0xf8, 0xc2, 0x01, 0x89, 0x01,
+ 0x11, 0x29, 0x45, 0x01, 0xac, 0x41, 0x7b, 0xa1, 0xca, 0x1b, 0xab, 0x01,
+ 0x01, 0x49, 0xc2, 0x07, 0xc3, 0x01, 0x70, 0x79, 0xc7, 0x63, 0xc8, 0x01,
+ 0x72, 0x68, 0xc5, 0x2a, 0xb4, 0x08, 0xd7, 0xc1, 0xc7, 0x44, 0x91, 0x08,
+ 0xd7, 0x80, 0x00, 0x41, 0x7b, 0xad, 0x08, 0xc1, 0x7b, 0xbc, 0x8b, 0x08,
+ 0xd6, 0xbb, 0x01, 0x7b, 0xc6, 0x97, 0x08, 0xd6, 0xcb, 0x01, 0x7b, 0xca,
+ 0x91, 0x08, 0xd6, 0xc1, 0x87, 0x08, 0xd6, 0xb1, 0x83, 0x08, 0xd6, 0xa9,
+ 0x05, 0xc1, 0x7b, 0xce, 0xc2, 0x00, 0x9a, 0x08, 0xd6, 0x91, 0x12, 0xc1,
+ 0x7b, 0xd8, 0x10, 0xc1, 0x7b, 0xe2, 0x16, 0xc1, 0x7b, 0xec, 0xc2, 0x02,
+ 0x1d, 0x08, 0xd6, 0x61, 0xc2, 0x0e, 0xe5, 0x08, 0xd6, 0x59, 0x0d, 0xc1,
+ 0x7b, 0xf6, 0xc2, 0x07, 0x69, 0x08, 0xd6, 0x49, 0xc2, 0x01, 0x0e, 0x08,
+ 0xd6, 0x41, 0xc2, 0x06, 0x8c, 0x08, 0xd6, 0x31, 0xc2, 0x05, 0x5c, 0x08,
+ 0xd6, 0x29, 0xc2, 0x0c, 0x25, 0x08, 0xd6, 0x21, 0xc2, 0x00, 0x3f, 0x08,
+ 0xd6, 0x19, 0xc2, 0x00, 0x96, 0x08, 0xd6, 0x10, 0xc5, 0x2a, 0xb4, 0x08,
+ 0xd7, 0x91, 0xca, 0xa5, 0xce, 0x08, 0xd7, 0x88, 0x00, 0x41, 0x7c, 0x00,
+ 0xc6, 0x2a, 0xb3, 0x08, 0xd7, 0x50, 0xc5, 0x2a, 0xb4, 0x08, 0xd7, 0x49,
+ 0xc4, 0x0c, 0x34, 0x08, 0xd7, 0x2a, 0x01, 0x7c, 0x0f, 0xc4, 0x0b, 0x44,
+ 0x0f, 0x99, 0xa1, 0xc9, 0xb3, 0xde, 0x0f, 0xd7, 0x99, 0xc7, 0xc4, 0x34,
+ 0x0f, 0xd7, 0xa1, 0xc6, 0x27, 0xbf, 0x01, 0x70, 0xc8, 0x47, 0x37, 0x49,
+ 0xc1, 0x7c, 0x15, 0xd6, 0x30, 0x1b, 0x08, 0x43, 0xc1, 0x42, 0x00, 0x58,
+ 0x41, 0x7c, 0x23, 0x18, 0xc1, 0x7c, 0x2f, 0x0d, 0xc1, 0x7c, 0x3b, 0x16,
+ 0xc1, 0x7c, 0x4d, 0x1b, 0xc1, 0x7c, 0x57, 0xc3, 0xed, 0x2c, 0x0b, 0x5c,
+ 0x59, 0x42, 0x01, 0x0e, 0xc1, 0x7c, 0x63, 0xc4, 0xe8, 0xab, 0x0b, 0x5c,
+ 0x39, 0xc4, 0xe5, 0xe7, 0x0b, 0x5c, 0x21, 0xc5, 0xe1, 0xbb, 0x0b, 0x5c,
+ 0x09, 0x0e, 0x41, 0x7c, 0x6d, 0x05, 0xc1, 0x7c, 0x79, 0xc3, 0xeb, 0xfd,
+ 0x0b, 0x59, 0x71, 0xc2, 0x20, 0xa8, 0x0b, 0x59, 0x69, 0x10, 0xc1, 0x7c,
+ 0x85, 0xc5, 0xda, 0xc7, 0x0b, 0x59, 0x51, 0x0a, 0xc1, 0x7c, 0xa1, 0xc3,
+ 0x77, 0x3c, 0x0b, 0x59, 0x31, 0xc3, 0xe5, 0x25, 0x0b, 0x59, 0x21, 0xc4,
+ 0xe8, 0x87, 0x0b, 0x59, 0x19, 0xc3, 0xbe, 0xd3, 0x0b, 0x59, 0x09, 0xc3,
+ 0x20, 0xa7, 0x0b, 0x58, 0xf1, 0xc3, 0xeb, 0xee, 0x0b, 0x58, 0xe0, 0xc8,
+ 0xc3, 0x23, 0x0b, 0x5b, 0xb9, 0xc8, 0xc1, 0x73, 0x0b, 0x5b, 0xb1, 0x16,
+ 0xc1, 0x7c, 0xb3, 0x05, 0xc1, 0x7c, 0xc2, 0xd2, 0x48, 0x96, 0x0b, 0x5b,
+ 0x90, 0xc2, 0x04, 0x35, 0x0b, 0x5b, 0x89, 0x44, 0xa8, 0x14, 0x41, 0x7c,
+ 0xce, 0xc2, 0x20, 0xa8, 0x0b, 0x5b, 0x79, 0xca, 0xa8, 0x12, 0x0b, 0x5b,
+ 0x69, 0xce, 0x76, 0x36, 0x0b, 0x5b, 0x30, 0xc3, 0xed, 0x26, 0x0b, 0x5b,
+ 0x59, 0xc3, 0xec, 0xdb, 0x0b, 0x5b, 0x48, 0xc3, 0x45, 0x09, 0x0b, 0x5b,
+ 0x51, 0x1b, 0xc1, 0x7c, 0xda, 0xc3, 0x2a, 0x40, 0x0b, 0x5a, 0x20, 0xc3,
+ 0x83, 0xb7, 0x0b, 0x5b, 0x41, 0xc2, 0x02, 0xb5, 0x0b, 0x5b, 0x28, 0xc3,
+ 0x41, 0x99, 0x0b, 0x5b, 0x19, 0xc4, 0xea, 0x63, 0x0b, 0x5a, 0x11, 0xc4,
+ 0xe4, 0xe3, 0x0b, 0x5a, 0x01, 0xc4, 0xe6, 0xb3, 0x0b, 0x59, 0xd9, 0x16,
+ 0x41, 0x7c, 0xe6, 0xc8, 0xbc, 0x93, 0x0b, 0x5b, 0x09, 0x42, 0x01, 0x04,
+ 0x41, 0x7c, 0xf0, 0xc9, 0x36, 0xdd, 0x0b, 0x5a, 0xf9, 0x95, 0x0b, 0x5a,
+ 0xe0, 0xc4, 0x15, 0xa7, 0x0b, 0x5a, 0xb9, 0xc2, 0x22, 0x45, 0x0b, 0x5a,
+ 0xb0, 0xc3, 0x0d, 0x8f, 0x0b, 0x5a, 0xa9, 0xc3, 0x08, 0xde, 0x0b, 0x5a,
+ 0xa0, 0xc4, 0x05, 0xde, 0x0b, 0x5a, 0x99, 0xc2, 0x0a, 0x20, 0x0b, 0x5a,
+ 0x90, 0xc3, 0xe5, 0x26, 0x0b, 0x59, 0xb1, 0xc2, 0x02, 0x93, 0x0b, 0x59,
+ 0x80, 0xc3, 0xaa, 0x1a, 0x0b, 0x59, 0xa1, 0x91, 0x0b, 0x59, 0x88, 0xc3,
+ 0x45, 0xcb, 0x0b, 0x59, 0x99, 0xc2, 0x03, 0x47, 0x0b, 0x59, 0x90, 0x03,
+ 0xc1, 0x7c, 0xf8, 0x98, 0x0b, 0x58, 0xb9, 0x84, 0x0b, 0x58, 0xb1, 0x19,
+ 0xc1, 0x7d, 0x00, 0x0b, 0xc1, 0x7d, 0x08, 0x17, 0x41, 0x7d, 0x10, 0x98,
+ 0x0b, 0x58, 0xc9, 0x84, 0x0b, 0x58, 0xc0, 0x03, 0xc1, 0x7d, 0x18, 0x98,
+ 0x0b, 0x58, 0x19, 0x84, 0x0b, 0x58, 0x10, 0x98, 0x0b, 0x58, 0x99, 0x84,
+ 0x0b, 0x58, 0x91, 0x11, 0x41, 0x7d, 0x20, 0x03, 0xc1, 0x7d, 0x28, 0x98,
+ 0x0b, 0x58, 0x39, 0x84, 0x0b, 0x58, 0x30, 0x98, 0x0b, 0x58, 0x49, 0x84,
+ 0x0b, 0x58, 0x41, 0x07, 0x41, 0x7d, 0x30, 0xc4, 0x26, 0xcf, 0x0f, 0xa7,
+ 0x79, 0xc4, 0x00, 0xfa, 0x01, 0x80, 0x92, 0x01, 0x7d, 0x38, 0x00, 0xc1,
+ 0x7d, 0x3e, 0xcb, 0x7b, 0xec, 0x0f, 0xa5, 0xd8, 0x91, 0x08, 0x5d, 0x51,
+ 0xc4, 0x15, 0xa9, 0x08, 0x5d, 0x70, 0xc3, 0xe2, 0x62, 0x08, 0x5c, 0x79,
+ 0xc4, 0xdd, 0x34, 0x08, 0x5c, 0x68, 0x16, 0xc1, 0x7d, 0x66, 0xc3, 0x05,
+ 0x17, 0x08, 0x48, 0xb2, 0x01, 0x7d, 0x76, 0x16, 0xc1, 0x7d, 0x7c, 0x15,
+ 0xc1, 0x7d, 0x88, 0xc4, 0xbf, 0x6b, 0x08, 0x48, 0x99, 0xc3, 0xec, 0xd8,
+ 0x08, 0x48, 0x91, 0xc2, 0x03, 0x07, 0x08, 0x48, 0x81, 0x03, 0xc1, 0x7d,
+ 0x9a, 0xc3, 0x21, 0x00, 0x08, 0x48, 0x69, 0xc3, 0x04, 0xae, 0x08, 0x48,
+ 0x61, 0xc4, 0x14, 0x9f, 0x08, 0x48, 0x59, 0xc3, 0xbd, 0xa8, 0x08, 0x48,
+ 0x51, 0xc3, 0x4d, 0x48, 0x08, 0x48, 0x49, 0xc2, 0x00, 0x5b, 0x08, 0x48,
+ 0x23, 0x01, 0x7d, 0xa6, 0xc3, 0x65, 0x55, 0x08, 0x48, 0x31, 0xc3, 0xeb,
+ 0x76, 0x08, 0x48, 0x29, 0xc4, 0xde, 0x0f, 0x08, 0x48, 0x19, 0xc4, 0xe6,
+ 0xf7, 0x08, 0x48, 0x11, 0xc3, 0x03, 0x28, 0x08, 0x48, 0x08, 0x0d, 0xc1,
+ 0x7d, 0xaa, 0x09, 0xc1, 0x7d, 0xb4, 0x10, 0xc1, 0x7d, 0xbe, 0x05, 0xc1,
+ 0x7d, 0xd4, 0xc2, 0x26, 0x94, 0x05, 0x42, 0x31, 0x16, 0xc1, 0x7d, 0xe1,
+ 0x06, 0xc1, 0x7d, 0xf3, 0x12, 0xc1, 0x7e, 0x03, 0xc2, 0x02, 0x1d, 0x05,
+ 0x42, 0x71, 0xc2, 0x00, 0x3f, 0x05, 0x42, 0x79, 0xc2, 0x00, 0x4c, 0x05,
+ 0x42, 0x99, 0x1c, 0xc1, 0x7e, 0x0d, 0x15, 0xc1, 0x7e, 0x17, 0xc2, 0x1a,
+ 0x36, 0x05, 0x42, 0xb9, 0xc2, 0x00, 0x9a, 0x05, 0x42, 0xc1, 0xc2, 0x00,
+ 0x96, 0x05, 0x42, 0xc9, 0xc2, 0x01, 0x0e, 0x05, 0x42, 0xe1, 0x83, 0x05,
+ 0x42, 0xeb, 0x01, 0x7e, 0x27, 0x8b, 0x05, 0x42, 0xf1, 0x97, 0x05, 0x42,
+ 0xf9, 0x87, 0x05, 0x43, 0x03, 0x01, 0x7e, 0x33, 0x91, 0x05, 0x43, 0x09,
+ 0xc2, 0x0e, 0x13, 0x05, 0x43, 0x11, 0xc2, 0x23, 0xe3, 0x05, 0x43, 0x19,
+ 0xc2, 0x01, 0xa7, 0x05, 0x43, 0x21, 0x45, 0x16, 0x11, 0x41, 0x7e, 0x37,
+ 0x17, 0xc1, 0x7e, 0x43, 0xcf, 0x6c, 0x03, 0x05, 0x43, 0xa0, 0xc4, 0x01,
+ 0x0d, 0x05, 0x43, 0xb1, 0xcb, 0x98, 0x9d, 0x05, 0x43, 0xb8, 0xc9, 0xa9,
+ 0xf2, 0x08, 0x0e, 0x81, 0x0e, 0xc1, 0x7e, 0x4f, 0xc6, 0xd7, 0x36, 0x08,
+ 0x0f, 0xa0, 0xcc, 0x8c, 0xc4, 0x08, 0x0e, 0x91, 0xc4, 0xe7, 0x7f, 0x08,
+ 0x0e, 0xc1, 0xc4, 0x5e, 0xb6, 0x08, 0x0f, 0x80, 0x03, 0xc1, 0x7e, 0x5b,
+ 0xc4, 0xe7, 0x57, 0x08, 0x0e, 0xa1, 0xc3, 0x41, 0x99, 0x08, 0x0e, 0xe1,
+ 0x11, 0x41, 0x7e, 0x6b, 0xc4, 0x29, 0x3c, 0x08, 0x0e, 0xa9, 0xc8, 0xbd,
+ 0x23, 0x08, 0x0f, 0xe0, 0xc5, 0xc0, 0x9e, 0x08, 0x0e, 0xb1, 0xc3, 0x00,
+ 0xff, 0x08, 0x0f, 0x49, 0xc3, 0x02, 0x53, 0x08, 0x0f, 0x50, 0x11, 0xc1,
+ 0x7e, 0x7a, 0xc2, 0x01, 0xe6, 0x08, 0x0f, 0x8b, 0x01, 0x7e, 0x84, 0xc8,
+ 0xba, 0xbb, 0x08, 0x0f, 0x58, 0x42, 0x00, 0x0a, 0xc1, 0x7e, 0x8a, 0xc2,
+ 0x3c, 0xd1, 0x08, 0x0e, 0xf9, 0xc4, 0x02, 0xb5, 0x08, 0x0f, 0x29, 0xc8,
+ 0xba, 0x03, 0x08, 0x0f, 0xd9, 0xc7, 0xc6, 0xfe, 0x08, 0x0f, 0xd0, 0xc6,
+ 0xd2, 0x1a, 0x08, 0x0e, 0xe9, 0xc5, 0xde, 0xe1, 0x08, 0x0e, 0xf0, 0x86,
+ 0x08, 0x0f, 0x01, 0xc2, 0x00, 0x31, 0x08, 0x0f, 0xb0, 0xc4, 0xe6, 0x0b,
+ 0x08, 0x0f, 0x19, 0xc2, 0x00, 0x29, 0x08, 0x0f, 0x78, 0xc2, 0x01, 0x02,
+ 0x08, 0x0f, 0x69, 0xc6, 0xd0, 0xbe, 0x08, 0x0f, 0xa8, 0xc5, 0x92, 0x8a,
+ 0x08, 0x0f, 0xc9, 0xc7, 0xca, 0x2a, 0x08, 0x0e, 0xb8, 0xc4, 0x05, 0xde,
+ 0x00, 0x00, 0x99, 0xc2, 0x0a, 0x20, 0x00, 0x00, 0x90, 0x44, 0x02, 0xcc,
+ 0xc1, 0x7e, 0x94, 0x4b, 0x96, 0x1f, 0x41, 0x7e, 0xed, 0xc5, 0x00, 0xf9,
+ 0x08, 0xd8, 0x03, 0x01, 0x7e, 0xf9, 0xc6, 0x03, 0x81, 0x05, 0x47, 0xd8,
+ 0xcb, 0x87, 0x85, 0x00, 0x4a, 0xa1, 0xd0, 0x56, 0x10, 0x00, 0x4b, 0x80,
+ 0xcb, 0x1e, 0x65, 0x00, 0x4a, 0x99, 0xc9, 0xa0, 0x43, 0x05, 0x47, 0xc8,
+ 0x03, 0xc1, 0x7e, 0xff, 0xcf, 0x68, 0x34, 0x00, 0x4a, 0x71, 0x91, 0x00,
+ 0x4a, 0x5b, 0x01, 0x7f, 0x13, 0x46, 0x2f, 0xd9, 0xc1, 0x7f, 0x1d, 0x47,
+ 0xa4, 0x92, 0xc1, 0x7f, 0x25, 0x87, 0x00, 0x4a, 0x39, 0x48, 0xb7, 0xd7,
+ 0xc1, 0x7f, 0x33, 0x97, 0x00, 0x4a, 0x0b, 0x01, 0x7f, 0x41, 0x8b, 0x00,
+ 0x49, 0xfa, 0x01, 0x7f, 0x4c, 0x0a, 0xc1, 0x7f, 0x50, 0x15, 0xc1, 0x7f,
+ 0x5a, 0x18, 0xc1, 0x7f, 0x68, 0x0e, 0xc1, 0x7f, 0x72, 0x14, 0xc1, 0x7f,
+ 0x7a, 0x1b, 0xc1, 0x7f, 0x8a, 0xc2, 0x00, 0x3f, 0x00, 0x49, 0x73, 0x01,
+ 0x7f, 0x94, 0x04, 0xc1, 0x7f, 0x9a, 0x12, 0xc1, 0x7f, 0xaa, 0x10, 0xc1,
+ 0x7f, 0xb4, 0x06, 0xc1, 0x7f, 0xc8, 0x16, 0xc1, 0x7f, 0xd6, 0x0c, 0xc1,
+ 0x7f, 0xe4, 0x05, 0xc1, 0x7f, 0xf4, 0x09, 0xc1, 0x80, 0x01, 0x0d, 0xc1,
+ 0x80, 0x15, 0x83, 0x00, 0x48, 0x2b, 0x01, 0x80, 0x1d, 0x91, 0x00, 0x48,
+ 0x9b, 0x01, 0x80, 0x31, 0x87, 0x00, 0x48, 0x79, 0x97, 0x00, 0x48, 0x4b,
+ 0x01, 0x80, 0x3b, 0x8b, 0x00, 0x48, 0x3b, 0x01, 0x80, 0x46, 0xc2, 0x0e,
+ 0x13, 0x00, 0x4a, 0xc1, 0x1c, 0xc1, 0x80, 0x4a, 0xc2, 0x01, 0xa7, 0x00,
+ 0x4a, 0xf0, 0x45, 0x08, 0xd8, 0xc1, 0x80, 0x54, 0xcb, 0x91, 0xff, 0x00,
+ 0x4b, 0x29, 0xc4, 0x1c, 0xb3, 0x00, 0x4b, 0x20, 0xc7, 0xcf, 0xc5, 0x0f,
+ 0x9e, 0xe8, 0x4f, 0x01, 0xf7, 0xc1, 0x80, 0x78, 0x4d, 0x27, 0x71, 0x41,
+ 0x80, 0xda, 0xcf, 0x67, 0x44, 0x01, 0x1f, 0x41, 0xd4, 0x39, 0xce, 0x01,
+ 0x1c, 0xb0, 0x47, 0x07, 0xba, 0xc1, 0x81, 0x3c, 0x44, 0x02, 0x47, 0xc1,
+ 0x81, 0x48, 0xc4, 0x52, 0xda, 0x01, 0x1e, 0x30, 0xc8, 0x00, 0x52, 0x01,
+ 0x1e, 0x19, 0xc6, 0x03, 0xfa, 0x01, 0x1e, 0x00, 0xc4, 0x52, 0xda, 0x01,
+ 0x1e, 0x41, 0xc8, 0x00, 0x52, 0x01, 0x1e, 0x29, 0xc6, 0x03, 0xfa, 0x01,
+ 0x1e, 0x10, 0xc4, 0x52, 0xda, 0x01, 0x1e, 0x39, 0xc8, 0x00, 0x52, 0x01,
+ 0x1e, 0x21, 0xc6, 0x03, 0xfa, 0x01, 0x1e, 0x08, 0x44, 0x85, 0x0b, 0x41,
+ 0x81, 0x54, 0xca, 0xa9, 0x5c, 0x0e, 0x98, 0x11, 0xcd, 0x77, 0x7f, 0x0e,
+ 0x98, 0x08, 0xc2, 0x01, 0x5b, 0x01, 0x34, 0x79, 0xc3, 0x00, 0x55, 0x01,
+ 0x34, 0x60, 0xc3, 0x00, 0x55, 0x01, 0x34, 0x71, 0xc2, 0x01, 0x5b, 0x01,
+ 0x34, 0x68, 0x00, 0x41, 0x81, 0x60, 0x00, 0x41, 0x81, 0x6c, 0xc4, 0x15,
+ 0xa7, 0x00, 0x01, 0xbb, 0x01, 0x81, 0x78, 0xc2, 0x22, 0x45, 0x00, 0x01,
+ 0xb2, 0x01, 0x81, 0x7c, 0xc3, 0x0d, 0x8f, 0x00, 0x01, 0xab, 0x01, 0x81,
+ 0x80, 0xc3, 0x08, 0xde, 0x00, 0x01, 0xa2, 0x01, 0x81, 0x84, 0xc4, 0x05,
+ 0xde, 0x00, 0x01, 0x9b, 0x01, 0x81, 0x88, 0xc2, 0x0a, 0x20, 0x00, 0x01,
+ 0x92, 0x01, 0x81, 0x8c, 0x00, 0x41, 0x81, 0x90, 0x00, 0x41, 0x81, 0x9c,
+ 0x45, 0x08, 0xd8, 0xc1, 0x81, 0xa8, 0xcb, 0x91, 0xff, 0x08, 0xca, 0x20,
+ 0xc5, 0x33, 0x1a, 0x08, 0xca, 0x19, 0xc7, 0xc5, 0x0d, 0x08, 0xc9, 0xe9,
+ 0xcb, 0x21, 0x1a, 0x08, 0xc9, 0xe1, 0xc8, 0x10, 0xab, 0x08, 0xc9, 0xd8,
+ 0xc2, 0x00, 0x9a, 0x08, 0xca, 0x11, 0xc2, 0x1a, 0x36, 0x08, 0xca, 0x00,
+ 0xc5, 0x21, 0x27, 0x08, 0xc9, 0xf1, 0x4a, 0x6f, 0x72, 0x41, 0x81, 0xcc,
+ 0xc2, 0x05, 0x5c, 0x08, 0xc9, 0x79, 0x0e, 0xc1, 0x81, 0xe6, 0xc2, 0x01,
+ 0x0e, 0x08, 0xc9, 0x69, 0x15, 0xc1, 0x81, 0xf0, 0xc2, 0x06, 0x8c, 0x08,
+ 0xc9, 0x49, 0xc2, 0x00, 0x9a, 0x08, 0xc9, 0x39, 0x1b, 0xc1, 0x82, 0x00,
+ 0xc2, 0x00, 0x3f, 0x08, 0xc9, 0x21, 0x04, 0xc1, 0x82, 0x0a, 0x12, 0xc1,
+ 0x82, 0x14, 0x10, 0xc1, 0x82, 0x1e, 0x06, 0xc1, 0x82, 0x34, 0x16, 0xc1,
+ 0x82, 0x42, 0xc2, 0x26, 0x94, 0x08, 0xc8, 0x99, 0x05, 0xc1, 0x82, 0x52,
+ 0x09, 0xc1, 0x82, 0x5c, 0x0d, 0xc1, 0x82, 0x66, 0x91, 0x08, 0xc8, 0x49,
+ 0x87, 0x08, 0xc8, 0x31, 0x97, 0x08, 0xc8, 0x23, 0x01, 0x82, 0x70, 0x8b,
+ 0x08, 0xc8, 0x13, 0x01, 0x82, 0x74, 0x83, 0x08, 0xc8, 0x02, 0x01, 0x82,
+ 0x78, 0xc5, 0x07, 0x6d, 0x01, 0x16, 0x39, 0x15, 0x41, 0x82, 0x7c, 0xca,
+ 0xa8, 0x3a, 0x01, 0x3c, 0x99, 0x46, 0x08, 0xd7, 0x41, 0x82, 0x88, 0xc4,
+ 0x24, 0x35, 0x01, 0x3b, 0xf1, 0xc5, 0x05, 0x1b, 0x01, 0x3b, 0xe9, 0x15,
+ 0xc1, 0x82, 0xac, 0x08, 0xc1, 0x82, 0xb8, 0x16, 0xc1, 0x82, 0xc4, 0xc3,
+ 0x05, 0x17, 0x01, 0x3b, 0xb0, 0xc4, 0x24, 0x35, 0x01, 0x3c, 0x41, 0xc5,
+ 0x05, 0x1b, 0x01, 0x3c, 0x39, 0x15, 0xc1, 0x82, 0xd0, 0x08, 0xc1, 0x82,
+ 0xdc, 0x16, 0xc1, 0x82, 0xe8, 0xc3, 0x05, 0x17, 0x01, 0x3c, 0x01, 0xc4,
+ 0x16, 0x57, 0x0f, 0x88, 0x58, 0xc4, 0x01, 0xa7, 0x0f, 0xb0, 0xf1, 0xd1,
+ 0x51, 0xcf, 0x0f, 0xb1, 0x28, 0xc8, 0x15, 0xe3, 0x01, 0x16, 0x21, 0xd7,
+ 0x28, 0x7f, 0x0f, 0xa5, 0x01, 0x45, 0x01, 0xac, 0xc1, 0x82, 0xf4, 0xc6,
+ 0xd3, 0x58, 0x0f, 0xbc, 0xe0, 0x48, 0xc3, 0xbb, 0xc1, 0x83, 0x0c, 0x42,
+ 0x00, 0x27, 0x41, 0x83, 0x1e, 0xc9, 0x00, 0x9e, 0x01, 0x18, 0x21, 0xd7,
+ 0x2b, 0x5f, 0x01, 0x17, 0x89, 0xc4, 0x32, 0x64, 0x01, 0x15, 0x23, 0x01,
+ 0x83, 0x65, 0xc9, 0xb3, 0xba, 0x01, 0x4b, 0xf8, 0xd0, 0x58, 0x4f, 0x01,
+ 0x4f, 0x49, 0xcf, 0x6c, 0x4e, 0x01, 0x4f, 0x40, 0x43, 0xec, 0x1b, 0xc1,
+ 0x83, 0x6b, 0x43, 0xea, 0xd1, 0xc1, 0x83, 0x87, 0x43, 0xeb, 0x28, 0xc1,
+ 0x83, 0xa3, 0x43, 0xeb, 0xdf, 0xc1, 0x83, 0xbf, 0x43, 0xea, 0xd4, 0xc1,
+ 0x83, 0xdb, 0x43, 0xeb, 0xf7, 0xc1, 0x83, 0xf7, 0x43, 0xeb, 0x61, 0x41,
+ 0x84, 0x13, 0x43, 0xeb, 0x28, 0xc1, 0x84, 0x2f, 0x43, 0xea, 0xd1, 0xc1,
+ 0x84, 0x4b, 0x43, 0xeb, 0xdf, 0xc1, 0x84, 0x67, 0x43, 0xea, 0xd4, 0xc1,
+ 0x84, 0x83, 0x43, 0xec, 0x1b, 0xc1, 0x84, 0x9f, 0x43, 0xeb, 0xf7, 0xc1,
+ 0x84, 0xbb, 0x43, 0xeb, 0x61, 0x41, 0x84, 0xd7, 0x00, 0xc1, 0x84, 0xf3,
+ 0x42, 0x00, 0x3c, 0x41, 0x85, 0x45, 0x47, 0xca, 0x00, 0xc1, 0x85, 0x57,
+ 0xcf, 0x69, 0xab, 0x01, 0x0a, 0x01, 0x48, 0x01, 0xf7, 0xc1, 0x85, 0x63,
+ 0x46, 0x00, 0x3e, 0x41, 0x85, 0x88, 0x4c, 0x23, 0xb4, 0xc1, 0x85, 0x94,
+ 0x48, 0x08, 0x5a, 0x41, 0x85, 0xa0, 0xc5, 0x03, 0xfb, 0x01, 0x2e, 0x61,
+ 0xc4, 0x0c, 0xa4, 0x01, 0x02, 0xe0, 0xc5, 0x01, 0xea, 0x01, 0x58, 0xd1,
+ 0xc6, 0x2a, 0x4c, 0x01, 0x72, 0x50, 0xc5, 0x33, 0x1a, 0x08, 0xc1, 0xd1,
+ 0x42, 0x02, 0x52, 0xc1, 0x85, 0xb2, 0xc8, 0x10, 0xab, 0x08, 0xc1, 0xb8,
+ 0x03, 0xc1, 0x85, 0xbe, 0x91, 0x08, 0xc1, 0xa9, 0x87, 0x08, 0xc1, 0x99,
+ 0xc9, 0xb7, 0xd7, 0x08, 0xc1, 0x8b, 0x01, 0x85, 0xca, 0x97, 0x08, 0xc1,
+ 0x7b, 0x01, 0x85, 0xce, 0x8b, 0x08, 0xc1, 0x6a, 0x01, 0x85, 0xd2, 0x14,
+ 0xc1, 0x85, 0xd6, 0xc2, 0x01, 0x0e, 0x08, 0xc1, 0x51, 0x15, 0xc1, 0x85,
+ 0xe0, 0xc2, 0x06, 0x8c, 0x08, 0xc1, 0x31, 0xc2, 0x00, 0x96, 0x08, 0xc1,
+ 0x29, 0xc2, 0x1a, 0x36, 0x08, 0xc1, 0x19, 0xc2, 0x00, 0x3f, 0x08, 0xc1,
+ 0x11, 0x04, 0xc1, 0x85, 0xf0, 0x12, 0xc1, 0x85, 0xfa, 0x10, 0xc1, 0x86,
+ 0x04, 0x06, 0xc1, 0x86, 0x1a, 0x16, 0xc1, 0x86, 0x28, 0x0c, 0xc1, 0x86,
+ 0x36, 0x05, 0xc1, 0x86, 0x40, 0x09, 0xc1, 0x86, 0x4a, 0x0d, 0xc1, 0x86,
+ 0x54, 0x83, 0x08, 0xc0, 0x03, 0x01, 0x86, 0x5e, 0x91, 0x08, 0xc0, 0x41,
+ 0x87, 0x08, 0xc0, 0x31, 0x97, 0x08, 0xc0, 0x23, 0x01, 0x86, 0x6a, 0x8b,
+ 0x08, 0xc0, 0x12, 0x01, 0x86, 0x6e, 0xc9, 0x25, 0x70, 0x01, 0x17, 0x68,
+ 0xc9, 0x25, 0x70, 0x01, 0x17, 0x00, 0xcc, 0x89, 0x28, 0x0f, 0xad, 0xd0,
+ 0x43, 0x00, 0x37, 0xc1, 0x86, 0x72, 0xd5, 0x34, 0xde, 0x0d, 0xe3, 0x80,
+ 0xc8, 0x00, 0x29, 0x0d, 0xe4, 0x43, 0x01, 0x86, 0xa1, 0xc4, 0x52, 0xda,
+ 0x0d, 0xe4, 0x39, 0x0e, 0xc1, 0x86, 0xa7, 0xc6, 0x03, 0xfa, 0x0d, 0xe4,
+ 0x29, 0xc3, 0x05, 0xe3, 0x0d, 0xe4, 0x21, 0xc5, 0x1e, 0x64, 0x0d, 0xe4,
+ 0x11, 0xcb, 0x91, 0x39, 0x0d, 0xe4, 0x09, 0xc5, 0x34, 0x21, 0x0d, 0xe4,
+ 0x00, 0x42, 0x00, 0x44, 0xc1, 0x86, 0xb3, 0xc6, 0xd3, 0xd0, 0x0d, 0xe3,
+ 0xd9, 0xc6, 0x96, 0x92, 0x0d, 0xe3, 0xd1, 0xd4, 0x3d, 0xde, 0x0d, 0xe3,
+ 0xb9, 0xc6, 0x29, 0x5f, 0x0d, 0xe3, 0xb0, 0xcf, 0x64, 0x1a, 0x0d, 0xe3,
+ 0xa1, 0xd1, 0x29, 0x54, 0x0d, 0xe3, 0x88, 0xc2, 0x01, 0x16, 0x0d, 0xe1,
+ 0xd1, 0x8a, 0x0d, 0xe1, 0xc9, 0x91, 0x0d, 0xe2, 0xeb, 0x01, 0x86, 0xc2,
+ 0xc2, 0x05, 0x1b, 0x0d, 0xe2, 0xf9, 0x8b, 0x0d, 0xe2, 0xf1, 0x83, 0x0d,
+ 0xe2, 0xe0, 0x00, 0xc1, 0x86, 0xc6, 0x8a, 0x0d, 0xe0, 0x88, 0x00, 0xc1,
+ 0x86, 0xd0, 0x45, 0xe3, 0xaf, 0xc1, 0x87, 0x01, 0xc6, 0xd3, 0x8e, 0x0d,
+ 0xe2, 0x48, 0x00, 0x41, 0x87, 0x1d, 0x00, 0xc1, 0x87, 0x3b, 0x45, 0x46,
+ 0xe8, 0x41, 0x87, 0x4c, 0x00, 0x41, 0x87, 0x5c, 0x8a, 0x0d, 0xe0, 0xc1,
+ 0xc2, 0x00, 0x3d, 0x0d, 0xe0, 0x81, 0x48, 0xc0, 0xa3, 0x41, 0x87, 0x6d,
+ 0x8a, 0x0d, 0xe0, 0xb9, 0x44, 0x0a, 0x68, 0x41, 0x87, 0x75, 0x8e, 0x0d,
+ 0xe0, 0xb0, 0x8d, 0x0d, 0xe0, 0xa1, 0x00, 0x41, 0x87, 0x7d, 0x8a, 0x0d,
+ 0xe0, 0x99, 0xc2, 0x00, 0x3d, 0x0d, 0xe0, 0x68, 0xc2, 0x03, 0xdc, 0x0d,
+ 0xe0, 0x70, 0xc2, 0x03, 0xdc, 0x0d, 0xe0, 0x61, 0x47, 0xc8, 0xcc, 0x41,
+ 0x87, 0x87, 0xc4, 0xe8, 0x47, 0x0d, 0xe1, 0xf0, 0xc8, 0xbe, 0xa3, 0x0d,
+ 0xe3, 0x50, 0x99, 0x0d, 0xe2, 0x98, 0x97, 0x0d, 0xe2, 0xd9, 0x99, 0x0d,
+ 0xe2, 0xd1, 0xc2, 0x28, 0x39, 0x0d, 0xe2, 0xc9, 0x83, 0x0d, 0xe2, 0x18,
+ 0x8a, 0x0d, 0xe2, 0xb9, 0xc2, 0x03, 0xdc, 0x0d, 0xe2, 0xa1, 0x8b, 0x0d,
+ 0xe2, 0x50, 0x97, 0x0d, 0xe2, 0x91, 0x87, 0x0d, 0xe2, 0x58, 0x87, 0x0d,
+ 0xe2, 0x40, 0xc2, 0x01, 0xc3, 0x0d, 0xe2, 0x28, 0xca, 0xa6, 0xbe, 0x01,
+ 0x71, 0xb1, 0xcb, 0x90, 0xf7, 0x01, 0x71, 0xb8, 0xc5, 0x01, 0x62, 0x00,
+ 0x04, 0x69, 0x42, 0x02, 0xb5, 0xc1, 0x87, 0x8f, 0xc7, 0x2a, 0x4b, 0x00,
+ 0x02, 0xe3, 0x01, 0x87, 0x9b, 0xcd, 0x77, 0xf4, 0x0f, 0xb3, 0xf9, 0x55,
+ 0x35, 0xda, 0x41, 0x87, 0x9f, 0x14, 0xc1, 0x87, 0xab, 0xc8, 0x65, 0x2f,
+ 0x01, 0x18, 0x81, 0x16, 0xc1, 0x87, 0xbd, 0x15, 0xc1, 0x87, 0xd2, 0x12,
+ 0xc1, 0x87, 0xde, 0x47, 0x02, 0x91, 0xc1, 0x87, 0xea, 0xe0, 0x02, 0xe7,
+ 0x0f, 0xac, 0xa9, 0xcc, 0x8e, 0x98, 0x0f, 0xac, 0xa1, 0xc9, 0xb2, 0x9a,
+ 0x01, 0x4d, 0x81, 0xc5, 0x00, 0x55, 0x01, 0x4d, 0x1b, 0x01, 0x87, 0xf9,
+ 0xd2, 0x4c, 0xce, 0x01, 0x70, 0x89, 0xcd, 0x2d, 0xa6, 0x01, 0x71, 0x71,
+ 0xc5, 0x00, 0xea, 0x01, 0x72, 0x08, 0xc3, 0xeb, 0xd3, 0x01, 0x37, 0x09,
+ 0xc3, 0xeb, 0xd0, 0x01, 0x37, 0x00, 0xd1, 0x52, 0x46, 0x01, 0x33, 0xd1,
+ 0x43, 0x24, 0xe9, 0x41, 0x87, 0xff, 0x87, 0x05, 0x4a, 0x4b, 0x01, 0x88,
+ 0x29, 0x03, 0xc1, 0x88, 0x31, 0x91, 0x05, 0x4a, 0x59, 0x97, 0x05, 0x4a,
0x41, 0x8b, 0x05, 0x4a, 0x38, 0x89, 0x05, 0x4a, 0x78, 0x1b, 0xc1, 0x88,
- 0x82, 0xc2, 0x0b, 0xc6, 0x05, 0x4a, 0x21, 0x09, 0xc1, 0x88, 0x8c, 0x83,
- 0x05, 0x49, 0xa8, 0xc2, 0x00, 0xde, 0x05, 0x4a, 0x11, 0x83, 0x05, 0x49,
- 0xc0, 0x07, 0xc1, 0x88, 0x96, 0xd5, 0x32, 0xbf, 0x01, 0x3e, 0x31, 0xcd,
- 0x24, 0xb3, 0x00, 0x02, 0xeb, 0x01, 0x88, 0xa2, 0x0b, 0xc1, 0x88, 0xa6,
- 0x42, 0x00, 0x27, 0xc1, 0x88, 0xb2, 0xd3, 0x1d, 0xb2, 0x01, 0x70, 0x18,
- 0x14, 0xc1, 0x88, 0xc1, 0x10, 0x41, 0x88, 0xcd, 0xc9, 0x9c, 0x95, 0x01,
- 0x3e, 0xb1, 0x43, 0x00, 0x8c, 0xc1, 0x88, 0xd7, 0xcf, 0x68, 0xd5, 0x0f,
- 0xdd, 0xe0, 0x43, 0x03, 0x2d, 0xc1, 0x88, 0xe3, 0xd5, 0x37, 0x57, 0x0f,
- 0xab, 0xe8, 0xc7, 0xca, 0xa2, 0x01, 0x1d, 0xc9, 0xcd, 0x7c, 0x2a, 0x01,
- 0x71, 0x08, 0xcc, 0x02, 0x53, 0x00, 0x03, 0xeb, 0x01, 0x88, 0xfb, 0xc6,
- 0xbc, 0x7e, 0x01, 0x18, 0x49, 0xcd, 0x66, 0x34, 0x01, 0x80, 0x68, 0x00,
- 0x41, 0x88, 0xff, 0xc4, 0x20, 0x8d, 0x01, 0x18, 0x59, 0x0b, 0x41, 0x89,
- 0x11, 0x14, 0xc1, 0x89, 0x1d, 0xc3, 0x00, 0xdf, 0x01, 0x15, 0x11, 0x0a,
- 0xc1, 0x89, 0x29, 0xd5, 0x0a, 0x89, 0x01, 0x80, 0xa8, 0x45, 0x06, 0xf3,
- 0xc1, 0x89, 0x3b, 0xd9, 0x1d, 0xac, 0x01, 0x70, 0x28, 0xcb, 0x8a, 0xb9,
- 0x01, 0x4e, 0xc9, 0x45, 0x02, 0x32, 0x41, 0x89, 0x51, 0xd6, 0x0a, 0x88,
- 0x01, 0x4c, 0xc1, 0xd2, 0x23, 0xbe, 0x01, 0x80, 0x88, 0xca, 0x04, 0xfd,
- 0x01, 0x0f, 0x43, 0x01, 0x89, 0x6d, 0xc9, 0xb0, 0xcc, 0x01, 0x0c, 0xe8,
- 0x42, 0x00, 0x5b, 0xc1, 0x89, 0x71, 0x42, 0x01, 0x47, 0xc1, 0x89, 0x7d,
- 0xd5, 0x32, 0x56, 0x0f, 0xc5, 0x18, 0xcf, 0x5c, 0xe3, 0x0f, 0xc2, 0x91,
- 0x42, 0x00, 0x90, 0x41, 0x89, 0x89, 0x45, 0x10, 0x38, 0xc1, 0x89, 0x95,
- 0x03, 0x41, 0x89, 0xa1, 0x00, 0xc1, 0x89, 0xad, 0xc5, 0x12, 0xea, 0x01,
- 0x48, 0xd0, 0xcb, 0x82, 0xb5, 0x01, 0x0f, 0x11, 0x46, 0x06, 0xf2, 0x41,
- 0x89, 0xca, 0xcc, 0x79, 0x2c, 0x01, 0x0e, 0x49, 0xcb, 0x97, 0x3e, 0x0f,
- 0xd7, 0xc0, 0xc5, 0xcb, 0xea, 0x0f, 0xb3, 0x71, 0xd7, 0x27, 0x9c, 0x0f,
- 0xc5, 0x28, 0x45, 0x05, 0xf0, 0xc1, 0x89, 0xd9, 0xd8, 0x23, 0x58, 0x0f,
- 0xc5, 0x09, 0xdf, 0x0d, 0x9c, 0x0f, 0xc5, 0x48, 0xd0, 0x51, 0x51, 0x0f,
- 0xc1, 0xb1, 0xe0, 0x04, 0xe7, 0x0f, 0xc5, 0x58, 0xd0, 0x5f, 0xb2, 0x0f,
- 0xa8, 0x71, 0xcd, 0x09, 0x51, 0x01, 0x19, 0x51, 0xd4, 0x3e, 0x81, 0x01,
- 0x4f, 0xe9, 0xdb, 0x18, 0x5b, 0x00, 0x05, 0xd8, 0xdc, 0x13, 0xfe, 0x01,
- 0x3d, 0x49, 0xd7, 0x2a, 0x37, 0x01, 0x49, 0xc0, 0xc7, 0x05, 0x7a, 0x01,
- 0x03, 0x39, 0xc8, 0xbe, 0xed, 0x01, 0x01, 0x71, 0xc9, 0xb3, 0x42, 0x01,
- 0x01, 0x59, 0xc4, 0x00, 0xba, 0x01, 0x00, 0x78, 0xd6, 0x2c, 0xdb, 0x00,
- 0x2c, 0x69, 0xc4, 0xc1, 0x0f, 0x0f, 0xc8, 0xe1, 0xcb, 0x91, 0x5b, 0x00,
- 0x7e, 0xaa, 0x01, 0x89, 0xe5, 0xc4, 0x01, 0x1e, 0x01, 0x5d, 0x81, 0xc5,
- 0x01, 0xf7, 0x01, 0x5d, 0x88, 0xc4, 0x01, 0x1e, 0x01, 0x5d, 0x91, 0xc5,
- 0x01, 0xf7, 0x01, 0x5d, 0x98, 0xc2, 0x01, 0xc7, 0x01, 0x5d, 0xa1, 0xc4,
- 0x03, 0x68, 0x01, 0x5d, 0xb0, 0xc2, 0x01, 0xc7, 0x01, 0x5d, 0xa9, 0xc4,
- 0x03, 0x68, 0x01, 0x5d, 0xb8, 0xc7, 0xc7, 0xf4, 0x0f, 0x9d, 0x11, 0xc5,
- 0xdc, 0xf0, 0x0f, 0xb7, 0xe0, 0xc6, 0xd3, 0xc3, 0x0f, 0x93, 0x21, 0xc2,
- 0x00, 0x83, 0x0f, 0x93, 0x10, 0x00, 0x41, 0x89, 0xeb, 0x0b, 0xc1, 0x89,
- 0xfd, 0xc3, 0x06, 0x9e, 0x01, 0x0b, 0x18, 0xc2, 0x26, 0x51, 0x01, 0x0b,
- 0x2b, 0x01, 0x8a, 0x0f, 0xc4, 0x18, 0x83, 0x01, 0x0b, 0x30, 0xc2, 0x00,
- 0x4d, 0x01, 0x0b, 0x4b, 0x01, 0x8a, 0x15, 0x19, 0xc1, 0x8a, 0x1b, 0xc4,
- 0x04, 0x5e, 0x01, 0x0b, 0x10, 0xc5, 0x6a, 0x79, 0x01, 0x0b, 0x51, 0xc4,
- 0x01, 0xdc, 0x01, 0x0b, 0x38, 0x42, 0x0a, 0xe0, 0xc1, 0x8a, 0x25, 0xcb,
- 0x8f, 0x4b, 0x08, 0x0c, 0x91, 0xcd, 0x7c, 0x37, 0x08, 0x0c, 0xc0, 0x46,
- 0x00, 0x6b, 0x41, 0x8a, 0x31, 0xc6, 0x00, 0xc9, 0x0f, 0x8b, 0x61, 0xc6,
- 0x46, 0x09, 0x0f, 0x8b, 0x59, 0xc6, 0x5d, 0xcb, 0x0f, 0x8b, 0x50, 0xd8,
- 0x21, 0xd8, 0x01, 0x70, 0x38, 0xc5, 0x08, 0x27, 0x08, 0x73, 0xe9, 0xc7,
- 0x0a, 0xb9, 0x08, 0x73, 0xe1, 0xc4, 0x03, 0x2b, 0x08, 0x73, 0xd8, 0xc8,
- 0x0c, 0x4a, 0x08, 0x73, 0xd1, 0xc2, 0x0c, 0x57, 0x08, 0x73, 0x88, 0xc8,
- 0x0c, 0x4a, 0x08, 0x73, 0xc9, 0x9b, 0x08, 0x73, 0x80, 0x44, 0x18, 0x83,
- 0xc1, 0x8a, 0x3d, 0x42, 0x26, 0x51, 0x41, 0x8a, 0x49, 0x0b, 0xc1, 0x8a,
- 0x55, 0x11, 0x41, 0x8a, 0x61, 0x0a, 0xc1, 0x8a, 0x6d, 0x19, 0xc1, 0x8a,
- 0x79, 0xc2, 0x00, 0x4d, 0x08, 0x73, 0x48, 0xc4, 0x18, 0x83, 0x08, 0x73,
- 0x31, 0xc2, 0x26, 0x51, 0x08, 0x73, 0x28, 0xc3, 0x0c, 0x5b, 0x08, 0x73,
- 0x21, 0xc3, 0x06, 0x9e, 0x08, 0x73, 0x18, 0xc4, 0x04, 0x5e, 0x08, 0x73,
- 0x11, 0xc2, 0x01, 0x47, 0x08, 0x73, 0x08, 0x08, 0xc1, 0x8a, 0x85, 0x91,
- 0x00, 0xb5, 0x73, 0x01, 0x8a, 0x91, 0x15, 0xc1, 0x8a, 0xaf, 0x8d, 0x00,
- 0xb7, 0x8b, 0x01, 0x8a, 0xc8, 0x9a, 0x00, 0xb7, 0x51, 0x93, 0x00, 0xb7,
- 0x49, 0x0b, 0xc1, 0x8a, 0xce, 0x0e, 0xc1, 0x8a, 0xef, 0x85, 0x00, 0xb6,
- 0x6b, 0x01, 0x8a, 0xfb, 0x87, 0x00, 0xb6, 0x13, 0x01, 0x8b, 0x0b, 0x86,
- 0x00, 0xb6, 0x8b, 0x01, 0x8b, 0x23, 0xcc, 0x87, 0x58, 0x00, 0xb6, 0xb9,
- 0xd8, 0x24, 0x48, 0x00, 0xb6, 0x91, 0x16, 0xc1, 0x8b, 0x2f, 0x9c, 0x00,
- 0xb6, 0x71, 0x03, 0xc1, 0x8b, 0x3b, 0xcf, 0x68, 0x12, 0x00, 0xb6, 0x41,
- 0x89, 0x00, 0xb5, 0xab, 0x01, 0x8b, 0x53, 0xc7, 0xc9, 0x98, 0x00, 0xb6,
- 0x19, 0xd1, 0x56, 0x7e, 0x00, 0xb5, 0xf1, 0x42, 0x00, 0xa4, 0xc1, 0x8b,
- 0x5d, 0x99, 0x00, 0xb5, 0x2b, 0x01, 0x8b, 0x69, 0xd0, 0x5d, 0x02, 0x00,
- 0xb5, 0x89, 0x9b, 0x00, 0xb5, 0x23, 0x01, 0x8b, 0x6f, 0xc9, 0xa9, 0xbb,
- 0x00, 0xb5, 0x11, 0x98, 0x00, 0xb5, 0x08, 0xa1, 0x70, 0x0c, 0x49, 0xa0,
- 0x70, 0x0c, 0x41, 0xa6, 0x70, 0x0c, 0x71, 0xa5, 0x70, 0x0c, 0x69, 0xa4,
- 0x70, 0x0c, 0x61, 0xa3, 0x70, 0x0c, 0x59, 0xa2, 0x70, 0x0c, 0x51, 0x9f,
- 0x70, 0x0c, 0x39, 0x9e, 0x70, 0x0c, 0x31, 0x9d, 0x70, 0x0c, 0x28, 0xa0,
- 0x70, 0x0b, 0x01, 0x9f, 0x70, 0x0a, 0xf9, 0x9e, 0x70, 0x0a, 0xf1, 0x9d,
- 0x70, 0x0a, 0xe9, 0xa6, 0x70, 0x0b, 0x31, 0xa5, 0x70, 0x0b, 0x29, 0xa4,
- 0x70, 0x0b, 0x21, 0xa3, 0x70, 0x0b, 0x19, 0xa2, 0x70, 0x0b, 0x11, 0xa1,
- 0x70, 0x0b, 0x08, 0xa6, 0x70, 0x0a, 0xe1, 0xa5, 0x70, 0x0a, 0xd9, 0xa4,
- 0x70, 0x0a, 0xd1, 0xa3, 0x70, 0x0a, 0xc9, 0xa2, 0x70, 0x0a, 0xc1, 0xa1,
- 0x70, 0x0a, 0xb9, 0xa0, 0x70, 0x0a, 0xb1, 0x9f, 0x70, 0x0a, 0xa9, 0x9e,
- 0x70, 0x0a, 0xa1, 0x9d, 0x70, 0x0a, 0x98, 0xa6, 0x70, 0x0d, 0xb1, 0xa5,
- 0x70, 0x0d, 0xa9, 0xa4, 0x70, 0x0d, 0xa1, 0xa3, 0x70, 0x0d, 0x99, 0xa2,
- 0x70, 0x0d, 0x91, 0xa1, 0x70, 0x0d, 0x89, 0xa0, 0x70, 0x0d, 0x81, 0x9f,
- 0x70, 0x0d, 0x79, 0x9e, 0x70, 0x0d, 0x71, 0x9d, 0x70, 0x0d, 0x68, 0xa6,
- 0x70, 0x0d, 0x61, 0xa5, 0x70, 0x0d, 0x59, 0xa4, 0x70, 0x0d, 0x51, 0xa3,
- 0x70, 0x0d, 0x49, 0xa2, 0x70, 0x0d, 0x41, 0xa1, 0x70, 0x0d, 0x39, 0xa0,
- 0x70, 0x0d, 0x31, 0x9f, 0x70, 0x0d, 0x29, 0x9e, 0x70, 0x0d, 0x21, 0x9d,
- 0x70, 0x0d, 0x18, 0xa6, 0x70, 0x0d, 0x11, 0xa5, 0x70, 0x0d, 0x09, 0xa4,
- 0x70, 0x0d, 0x01, 0xa3, 0x70, 0x0c, 0xf9, 0xa2, 0x70, 0x0c, 0xf1, 0xa1,
- 0x70, 0x0c, 0xe9, 0xa0, 0x70, 0x0c, 0xe1, 0x9f, 0x70, 0x0c, 0xd9, 0x9e,
- 0x70, 0x0c, 0xd1, 0x9d, 0x70, 0x0c, 0xc8, 0xa6, 0x70, 0x0c, 0xc1, 0xa5,
- 0x70, 0x0c, 0xb9, 0xa4, 0x70, 0x0c, 0xb1, 0xa3, 0x70, 0x0c, 0xa9, 0xa2,
- 0x70, 0x0c, 0xa1, 0xa1, 0x70, 0x0c, 0x99, 0xa0, 0x70, 0x0c, 0x91, 0x9f,
- 0x70, 0x0c, 0x89, 0x9e, 0x70, 0x0c, 0x81, 0x9d, 0x70, 0x0c, 0x78, 0xa6,
- 0x70, 0x0c, 0x21, 0xa5, 0x70, 0x0c, 0x19, 0xa4, 0x70, 0x0c, 0x11, 0xa3,
- 0x70, 0x0c, 0x09, 0xa2, 0x70, 0x0c, 0x01, 0xa1, 0x70, 0x0b, 0xf9, 0xa0,
- 0x70, 0x0b, 0xf1, 0x9f, 0x70, 0x0b, 0xe9, 0x9e, 0x70, 0x0b, 0xe1, 0x9d,
- 0x70, 0x0b, 0xd8, 0xa6, 0x70, 0x0b, 0xd1, 0xa5, 0x70, 0x0b, 0xc9, 0xa4,
- 0x70, 0x0b, 0xc1, 0xa3, 0x70, 0x0b, 0xb9, 0xa2, 0x70, 0x0b, 0xb1, 0xa1,
- 0x70, 0x0b, 0xa9, 0xa0, 0x70, 0x0b, 0xa1, 0x9f, 0x70, 0x0b, 0x99, 0x9e,
- 0x70, 0x0b, 0x91, 0x9d, 0x70, 0x0b, 0x88, 0xa6, 0x70, 0x0b, 0x81, 0xa5,
- 0x70, 0x0b, 0x79, 0xa4, 0x70, 0x0b, 0x71, 0xa3, 0x70, 0x0b, 0x69, 0xa2,
- 0x70, 0x0b, 0x61, 0xa1, 0x70, 0x0b, 0x59, 0xa0, 0x70, 0x0b, 0x51, 0x9f,
- 0x70, 0x0b, 0x49, 0x9e, 0x70, 0x0b, 0x41, 0x9d, 0x70, 0x0b, 0x38, 0xa3,
- 0x70, 0x0f, 0x79, 0xa2, 0x70, 0x0f, 0x71, 0xa1, 0x70, 0x0f, 0x69, 0xa0,
- 0x70, 0x0f, 0x61, 0x9f, 0x70, 0x0f, 0x59, 0x9e, 0x70, 0x0f, 0x51, 0x9d,
- 0x70, 0x0f, 0x48, 0xa6, 0x70, 0x0f, 0x41, 0xa5, 0x70, 0x0f, 0x39, 0xa4,
- 0x70, 0x0f, 0x31, 0xa3, 0x70, 0x0f, 0x29, 0xa2, 0x70, 0x0f, 0x21, 0xa1,
- 0x70, 0x0f, 0x19, 0xa0, 0x70, 0x0f, 0x11, 0x9f, 0x70, 0x0f, 0x09, 0x9e,
- 0x70, 0x0f, 0x01, 0x9d, 0x70, 0x0e, 0xf8, 0xa6, 0x70, 0x0e, 0xf1, 0xa5,
- 0x70, 0x0e, 0xe9, 0xa4, 0x70, 0x0e, 0xe1, 0xa3, 0x70, 0x0e, 0xd9, 0xa2,
- 0x70, 0x0e, 0xd1, 0xa1, 0x70, 0x0e, 0xc9, 0xa0, 0x70, 0x0e, 0xc1, 0x9f,
- 0x70, 0x0e, 0xb9, 0x9e, 0x70, 0x0e, 0xb1, 0x9d, 0x70, 0x0e, 0xa8, 0xa6,
- 0x70, 0x0e, 0xa1, 0xa5, 0x70, 0x0e, 0x99, 0xa4, 0x70, 0x0e, 0x91, 0xa3,
- 0x70, 0x0e, 0x89, 0xa2, 0x70, 0x0e, 0x81, 0xa1, 0x70, 0x0e, 0x79, 0xa0,
- 0x70, 0x0e, 0x71, 0x9f, 0x70, 0x0e, 0x69, 0x9e, 0x70, 0x0e, 0x61, 0x9d,
- 0x70, 0x0e, 0x58, 0xa6, 0x70, 0x0e, 0x51, 0xa5, 0x70, 0x0e, 0x49, 0xa4,
- 0x70, 0x0e, 0x41, 0xa3, 0x70, 0x0e, 0x39, 0xa2, 0x70, 0x0e, 0x31, 0xa1,
- 0x70, 0x0e, 0x29, 0xa0, 0x70, 0x0e, 0x21, 0x9f, 0x70, 0x0e, 0x19, 0x9e,
- 0x70, 0x0e, 0x11, 0x9d, 0x70, 0x0e, 0x08, 0xa6, 0x70, 0x0e, 0x01, 0xa5,
- 0x70, 0x0d, 0xf9, 0xa4, 0x70, 0x0d, 0xf1, 0xa3, 0x70, 0x0d, 0xe9, 0xa2,
- 0x70, 0x0d, 0xe1, 0xa1, 0x70, 0x0d, 0xd9, 0xa0, 0x70, 0x0d, 0xd1, 0x9f,
- 0x70, 0x0d, 0xc9, 0x9e, 0x70, 0x0d, 0xc1, 0x9d, 0x70, 0x0d, 0xb8, 0x87,
- 0x05, 0x2f, 0x0b, 0x01, 0x8b, 0x73, 0x0a, 0xc1, 0x8b, 0x7e, 0x19, 0xc1,
- 0x8b, 0xa1, 0x12, 0xc1, 0x8b, 0xc4, 0x04, 0xc1, 0x8b, 0xde, 0x0f, 0xc1,
- 0x8b, 0xfc, 0x0d, 0xc1, 0x8c, 0x20, 0x09, 0xc1, 0x8c, 0x41, 0x08, 0xc1,
- 0x8c, 0x5f, 0x18, 0xc1, 0x8c, 0x79, 0x16, 0xc1, 0x8c, 0x93, 0x06, 0xc1,
- 0x8c, 0xb1, 0x0e, 0xc1, 0x8c, 0xcf, 0x14, 0xc1, 0x8c, 0xe9, 0x10, 0xc1,
- 0x8d, 0x03, 0x15, 0xc1, 0x8d, 0x30, 0x1c, 0xc1, 0x8d, 0x4e, 0x05, 0xc1,
- 0x8d, 0x6c, 0x0c, 0xc1, 0x8d, 0x86, 0x1b, 0xc1, 0x8d, 0xa0, 0x8b, 0x05,
- 0x29, 0x23, 0x01, 0x8d, 0xba, 0x83, 0x05, 0x2a, 0x4b, 0x01, 0x8d, 0xbe,
- 0x91, 0x05, 0x2d, 0xd3, 0x01, 0x8d, 0xc2, 0x97, 0x05, 0x2c, 0xaa, 0x01,
- 0x8d, 0xcd, 0x08, 0xc1, 0x8d, 0xd1, 0x0d, 0xc1, 0x8d, 0xdd, 0x16, 0xc1,
- 0x8d, 0xe9, 0xc3, 0xe7, 0xd2, 0x05, 0x30, 0xb1, 0xc4, 0x10, 0xf0, 0x05,
- 0x30, 0xb9, 0x06, 0xc1, 0x8d, 0xfb, 0xc4, 0x9d, 0xe8, 0x05, 0x30, 0xf8,
- 0xc2, 0x01, 0x47, 0x05, 0x31, 0x11, 0xc4, 0x04, 0x5e, 0x05, 0x31, 0x18,
- 0xc3, 0x06, 0x9e, 0x05, 0x31, 0x21, 0xc3, 0x0c, 0x5b, 0x05, 0x31, 0x28,
- 0xc2, 0x26, 0x51, 0x05, 0x31, 0x31, 0xc4, 0x18, 0x83, 0x05, 0x31, 0x38,
- 0x9f, 0x0f, 0xdb, 0x81, 0xa0, 0x0f, 0xdb, 0x89, 0xa1, 0x0f, 0xdb, 0x91,
- 0xa2, 0x0f, 0xdb, 0x99, 0xa3, 0x0f, 0xdb, 0xa1, 0xa4, 0x0f, 0xdb, 0xa8,
- 0xd6, 0x2b, 0xa7, 0x01, 0x3e, 0x51, 0xd5, 0x32, 0x02, 0x01, 0x4e, 0x81,
- 0xd6, 0x2f, 0x01, 0x01, 0x57, 0x11, 0xd5, 0x36, 0x1c, 0x01, 0x57, 0x20,
- 0x00, 0x41, 0x8e, 0x05, 0x42, 0x00, 0x03, 0xc1, 0x8e, 0x11, 0xcc, 0x83,
- 0x38, 0x0f, 0xb5, 0x31, 0xc4, 0x21, 0x13, 0x01, 0x71, 0x78, 0xc4, 0x00,
- 0xba, 0x01, 0x81, 0x8b, 0x01, 0x8e, 0x20, 0xd6, 0x31, 0x27, 0x01, 0x81,
- 0x92, 0x01, 0x8e, 0x24, 0x46, 0x0e, 0xf4, 0xc1, 0x8e, 0x2a, 0xcb, 0x5a,
- 0x67, 0x0f, 0xbd, 0x31, 0x46, 0x02, 0x31, 0xc1, 0x8e, 0x36, 0xcf, 0x65,
- 0x33, 0x0f, 0xb3, 0xe9, 0x15, 0xc1, 0x8e, 0x42, 0xd4, 0x3e, 0x09, 0x0f,
- 0xbd, 0x98, 0xcc, 0x00, 0x9b, 0x01, 0x16, 0xc9, 0xc9, 0x0a, 0x4a, 0x01,
- 0x16, 0xc0, 0xc7, 0xc6, 0x1f, 0x00, 0xe7, 0xb9, 0xcb, 0x44, 0x75, 0x00,
- 0xe7, 0x91, 0x48, 0x11, 0x41, 0x41, 0x8e, 0x54, 0xd3, 0x44, 0x6d, 0x00,
- 0xe7, 0x99, 0xd3, 0x42, 0x20, 0x00, 0xe7, 0x81, 0x50, 0x5e, 0xf2, 0x41,
- 0x8e, 0x6f, 0xc8, 0x6d, 0xbb, 0x00, 0xe7, 0x2b, 0x01, 0x8e, 0x7b, 0xc6,
- 0x6d, 0xbd, 0x00, 0xe7, 0x1b, 0x01, 0x8e, 0x81, 0xc7, 0x04, 0x40, 0x00,
- 0xe7, 0x10, 0x45, 0x06, 0xf3, 0xc1, 0x8e, 0x87, 0xc7, 0x0e, 0xae, 0x00,
- 0xe6, 0xe8, 0xc8, 0x9e, 0x62, 0x00, 0xe7, 0xc1, 0x43, 0xe7, 0xdb, 0x41,
- 0x8e, 0x93, 0xc5, 0x00, 0x95, 0x00, 0xe7, 0xa1, 0xc5, 0x01, 0x62, 0x00,
- 0xe6, 0xc0, 0xcf, 0x68, 0x8a, 0x00, 0xe6, 0xf9, 0xcd, 0x01, 0x5a, 0x00,
- 0xe6, 0xf1, 0xcd, 0x7a, 0x70, 0x00, 0xe6, 0xd8, 0xce, 0x6d, 0xb5, 0x00,
- 0xe6, 0xe1, 0xc6, 0xce, 0x1d, 0x00, 0xe6, 0x80, 0xdb, 0x16, 0x5a, 0x00,
- 0xe6, 0xbb, 0x01, 0x8e, 0x99, 0xd3, 0x04, 0x34, 0x00, 0xe6, 0xb1, 0xde,
- 0x0e, 0x52, 0x00, 0xe6, 0xa8, 0xc2, 0x00, 0x48, 0x08, 0x2b, 0x89, 0x87,
- 0x08, 0x2b, 0x90, 0x87, 0x08, 0x2b, 0x99, 0xc2, 0x01, 0xf0, 0x08, 0x2b,
- 0xa0, 0x87, 0x08, 0x2b, 0xa9, 0xc2, 0x01, 0xf0, 0x08, 0x2b, 0xb0, 0x8b,
- 0x08, 0x2b, 0xb8, 0xc2, 0x00, 0xa4, 0x08, 0x2b, 0xe9, 0x83, 0x08, 0x2b,
- 0xe0, 0xc2, 0x1b, 0xa5, 0x08, 0x2b, 0xf8, 0xc2, 0x00, 0xc7, 0x08, 0x2c,
- 0x19, 0x83, 0x08, 0x2c, 0x10, 0x87, 0x08, 0x2c, 0x29, 0xc2, 0x1b, 0xa5,
- 0x08, 0x2c, 0x30, 0xc2, 0x01, 0xf0, 0x08, 0x2c, 0x69, 0x87, 0x08, 0x2c,
- 0x60, 0x87, 0x08, 0x2c, 0x71, 0xc2, 0x01, 0xf0, 0x08, 0x2c, 0x78, 0xc2,
- 0x00, 0x48, 0x08, 0x2c, 0xc1, 0x87, 0x08, 0x2c, 0xc8, 0x87, 0x08, 0x2c,
- 0xd1, 0xc2, 0x01, 0xf0, 0x08, 0x2c, 0xd8, 0x87, 0x08, 0x2c, 0xe1, 0xc2,
- 0x01, 0xf0, 0x08, 0x2c, 0xe8, 0x8b, 0x08, 0x2c, 0xf0, 0x83, 0x08, 0x2d,
- 0x19, 0xc2, 0x00, 0xa4, 0x08, 0x2d, 0x20, 0xc2, 0x1b, 0xa5, 0x08, 0x2d,
- 0x30, 0x83, 0x08, 0x2d, 0x49, 0xc2, 0x00, 0xc7, 0x08, 0x2d, 0x50, 0x87,
- 0x08, 0x2d, 0x61, 0xc2, 0x1b, 0xa5, 0x08, 0x2d, 0x68, 0x87, 0x08, 0x2d,
- 0x99, 0xc2, 0x01, 0xf0, 0x08, 0x2d, 0xa0, 0x87, 0x08, 0x2d, 0xa9, 0xc2,
- 0x01, 0xf0, 0x08, 0x2d, 0xb0, 0xc7, 0x40, 0x12, 0x01, 0x0a, 0xe9, 0xc6,
- 0xd4, 0x89, 0x01, 0x0a, 0xd0, 0xc7, 0x40, 0x12, 0x01, 0x0a, 0xe1, 0xc6,
- 0x9c, 0x2a, 0x01, 0x0a, 0xb9, 0xc8, 0x0a, 0xb9, 0x00, 0x05, 0xf0, 0xc6,
- 0x9c, 0x2a, 0x01, 0x0a, 0xb1, 0xc6, 0x90, 0x95, 0x01, 0x0a, 0xa0, 0xc4,
- 0x9d, 0x5c, 0x01, 0x0a, 0xc9, 0xc6, 0xd0, 0x99, 0x01, 0x0a, 0x80, 0xc4,
- 0x08, 0x28, 0x01, 0x0a, 0x99, 0xc4, 0x0a, 0x04, 0x01, 0x0a, 0x90, 0xca,
- 0x1f, 0x96, 0x70, 0x03, 0x01, 0xcf, 0x51, 0x41, 0x70, 0x01, 0xf0, 0xc7,
- 0x80, 0x81, 0x70, 0x02, 0xf9, 0x07, 0xc1, 0x8e, 0x9f, 0x45, 0x07, 0x12,
- 0x41, 0x8e, 0xab, 0xd0, 0x0b, 0x37, 0x70, 0x02, 0xf1, 0x11, 0x41, 0x8e,
- 0xb7, 0x45, 0x02, 0x13, 0xc1, 0x8e, 0xc3, 0xce, 0x65, 0x34, 0x70, 0x02,
- 0xe0, 0xcb, 0x2c, 0x43, 0x70, 0x01, 0xf9, 0xcc, 0x00, 0xb2, 0x70, 0x01,
- 0x10, 0xca, 0x0e, 0x84, 0x70, 0x01, 0xe9, 0xcf, 0x0f, 0xfc, 0x70, 0x01,
- 0x08, 0xc8, 0x4f, 0x30, 0x70, 0x01, 0xd9, 0xc6, 0x27, 0xf9, 0x70, 0x01,
- 0x79, 0xc4, 0x40, 0xc6, 0x70, 0x01, 0x00, 0x45, 0x06, 0x98, 0xc1, 0x8e,
- 0xd5, 0xca, 0x97, 0xef, 0x70, 0x01, 0x20, 0xc8, 0x68, 0x22, 0x70, 0x01,
- 0x59, 0xcb, 0x93, 0xb8, 0x70, 0x01, 0x28, 0xc7, 0x0b, 0x80, 0x70, 0x01,
- 0x51, 0xc9, 0x30, 0x6e, 0x70, 0x01, 0x39, 0xc8, 0x37, 0x31, 0x70, 0x01,
- 0x30, 0x97, 0x00, 0xbb, 0x99, 0x8b, 0x00, 0xbb, 0x90, 0xc2, 0x0c, 0x65,
- 0x00, 0xbb, 0x81, 0xc2, 0x01, 0x09, 0x00, 0xbb, 0x79, 0xc2, 0x00, 0xc7,
- 0x00, 0xbb, 0x71, 0xc2, 0x1d, 0x5f, 0x00, 0xbb, 0x61, 0xc2, 0x00, 0xad,
- 0x00, 0xbb, 0x59, 0xc2, 0x00, 0xde, 0x00, 0xbb, 0x51, 0xc2, 0x03, 0xa4,
- 0x00, 0xbb, 0x49, 0x10, 0xc1, 0x8e, 0xf9, 0xc2, 0x0b, 0xc6, 0x00, 0xbb,
- 0x39, 0xc2, 0x00, 0xb3, 0x00, 0xbb, 0x31, 0xc2, 0x01, 0x29, 0x00, 0xbb,
- 0x21, 0xc2, 0x04, 0x2b, 0x00, 0xbb, 0x19, 0x97, 0x00, 0xbb, 0x11, 0x8b,
- 0x00, 0xbb, 0x09, 0x83, 0x00, 0xbb, 0x00, 0x83, 0x00, 0xb8, 0x03, 0x01,
- 0x8f, 0x03, 0xc2, 0x00, 0xa4, 0x00, 0xb8, 0x89, 0xc2, 0x0c, 0x65, 0x00,
- 0xb8, 0x81, 0xc2, 0x01, 0x09, 0x00, 0xb8, 0x79, 0xc2, 0x00, 0xc7, 0x00,
- 0xb8, 0x71, 0xc2, 0x02, 0x59, 0x00, 0xb8, 0x69, 0xc2, 0x1d, 0x5f, 0x00,
- 0xb8, 0x61, 0xc2, 0x00, 0xad, 0x00, 0xb8, 0x59, 0xc2, 0x00, 0xde, 0x00,
- 0xb8, 0x51, 0xc2, 0x03, 0xa4, 0x00, 0xb8, 0x49, 0x10, 0xc1, 0x8f, 0x09,
- 0xc2, 0x0b, 0xc6, 0x00, 0xb8, 0x39, 0xc2, 0x00, 0xb3, 0x00, 0xb8, 0x31,
- 0xc2, 0x01, 0x29, 0x00, 0xb8, 0x21, 0xc2, 0x04, 0x2b, 0x00, 0xb8, 0x19,
- 0x97, 0x00, 0xb8, 0x11, 0x8b, 0x00, 0xb8, 0x08, 0xc8, 0x77, 0xaa, 0x00,
- 0xb8, 0xa9, 0xc6, 0x1e, 0x23, 0x00, 0xb8, 0xa0, 0x97, 0x00, 0xb8, 0x99,
- 0x8b, 0x00, 0xb8, 0x90, 0x4a, 0xa1, 0x26, 0xc1, 0x8f, 0x13, 0xce, 0x1b,
- 0x63, 0x0b, 0x7f, 0x00, 0x46, 0x06, 0x97, 0xc1, 0x8f, 0x33, 0x47, 0x02,
- 0x90, 0x41, 0x8f, 0x57, 0x44, 0x00, 0xcc, 0xc1, 0x8f, 0xc3, 0xd1, 0x50,
- 0xfb, 0x08, 0xff, 0x79, 0xc9, 0xb3, 0x15, 0x08, 0xff, 0x61, 0xcc, 0x8c,
- 0x44, 0x08, 0xff, 0x38, 0xc9, 0xaa, 0xdb, 0x08, 0xff, 0x69, 0x4b, 0x93,
- 0x97, 0x41, 0x8f, 0xeb, 0xcb, 0x92, 0x6e, 0x08, 0xff, 0x59, 0xcd, 0x75,
- 0x88, 0x00, 0x5e, 0xb9, 0xcc, 0x86, 0x68, 0x00, 0x5f, 0xc0, 0xcb, 0x9b,
- 0x27, 0x08, 0xff, 0x51, 0xca, 0x98, 0xe1, 0x00, 0x5f, 0xb8, 0xc8, 0x46,
- 0x07, 0x08, 0xff, 0x31, 0x46, 0x02, 0x91, 0x41, 0x8f, 0xf7, 0xd3, 0x42,
- 0xa5, 0x08, 0xff, 0x29, 0x45, 0x06, 0x98, 0xc1, 0x90, 0x5e, 0xc7, 0xc6,
- 0x9d, 0x00, 0x5f, 0x99, 0xc9, 0xb6, 0x12, 0x00, 0x5f, 0xb0, 0xd8, 0x23,
- 0xa0, 0x08, 0xfe, 0xa1, 0x46, 0x04, 0x5d, 0xc1, 0x90, 0x82, 0x44, 0x01,
- 0xb4, 0x41, 0x90, 0x9a, 0x03, 0xc1, 0x90, 0xc0, 0x8b, 0x00, 0x5d, 0xfb,
- 0x01, 0x90, 0xcc, 0x97, 0x00, 0x5e, 0x0b, 0x01, 0x90, 0xd0, 0x87, 0x00,
- 0x5e, 0x33, 0x01, 0x90, 0xd4, 0x91, 0x00, 0x5e, 0x52, 0x01, 0x90, 0xd8,
- 0xc3, 0x0a, 0xe1, 0x00, 0x5f, 0x81, 0x44, 0x01, 0xb4, 0xc1, 0x90, 0xdc,
- 0xc4, 0x00, 0xcb, 0x00, 0x5f, 0xd0, 0xc4, 0x22, 0x71, 0x08, 0xb6, 0x49,
- 0xc5, 0x01, 0xdb, 0x08, 0xb6, 0x41, 0x15, 0xc1, 0x90, 0xe8, 0x08, 0xc1,
- 0x90, 0xf4, 0x16, 0xc1, 0x91, 0x00, 0xc3, 0x01, 0xb4, 0x08, 0xb6, 0x09,
- 0xc4, 0x15, 0xd3, 0x08, 0xb6, 0x00, 0x83, 0x08, 0xb4, 0x03, 0x01, 0x91,
- 0x0c, 0x14, 0xc1, 0x91, 0x1e, 0xc2, 0x00, 0xa4, 0x08, 0xb5, 0x49, 0x15,
- 0xc1, 0x91, 0x28, 0xc2, 0x04, 0x41, 0x08, 0xb5, 0x31, 0xc2, 0x00, 0xc7,
- 0x08, 0xb5, 0x29, 0xc2, 0x1d, 0x5f, 0x08, 0xb5, 0x19, 0xc2, 0x00, 0xad,
- 0x08, 0xb5, 0x11, 0x04, 0xc1, 0x91, 0x32, 0x12, 0xc1, 0x91, 0x3c, 0x10,
- 0xc1, 0x91, 0x46, 0x06, 0xc1, 0x91, 0x5c, 0x16, 0xc1, 0x91, 0x6a, 0x0c,
- 0xc1, 0x91, 0x78, 0x05, 0xc1, 0x91, 0x82, 0x09, 0xc1, 0x91, 0x8c, 0x0d,
- 0xc1, 0x91, 0x96, 0x91, 0x08, 0xb4, 0x41, 0x87, 0x08, 0xb4, 0x31, 0x97,
- 0x08, 0xb4, 0x23, 0x01, 0x91, 0xa0, 0x8b, 0x08, 0xb4, 0x12, 0x01, 0x91,
- 0xa4, 0xc5, 0x35, 0x00, 0x08, 0xb5, 0xb9, 0x42, 0x03, 0x32, 0xc1, 0x91,
- 0xa8, 0xc8, 0x11, 0x40, 0x08, 0xb5, 0x58, 0x03, 0xc1, 0x91, 0xb4, 0x91,
- 0x08, 0xb5, 0xa1, 0x87, 0x08, 0xb5, 0x91, 0x97, 0x08, 0xb5, 0x83, 0x01,
- 0x91, 0xc0, 0x8b, 0x08, 0xb5, 0x72, 0x01, 0x91, 0xc4, 0xc5, 0xd7, 0x0f,
- 0x00, 0xd5, 0x69, 0x0a, 0xc1, 0x91, 0xc8, 0x42, 0x0c, 0x65, 0xc1, 0x91,
- 0xd4, 0x0d, 0xc1, 0x91, 0xe9, 0x44, 0x3f, 0x73, 0xc1, 0x91, 0xfe, 0x14,
- 0xc1, 0x92, 0x13, 0xc6, 0xd4, 0x95, 0x00, 0xd5, 0x29, 0xc5, 0xdb, 0x47,
- 0x00, 0xd5, 0x03, 0x01, 0x92, 0x1f, 0x45, 0x27, 0x0d, 0x41, 0x92, 0x25,
- 0xc4, 0x22, 0x71, 0x00, 0xd4, 0xc9, 0xc5, 0x01, 0xdb, 0x00, 0xd4, 0xc1,
- 0x15, 0xc1, 0x92, 0x2d, 0x08, 0xc1, 0x92, 0x39, 0x16, 0xc1, 0x92, 0x45,
- 0xc3, 0x01, 0xb4, 0x00, 0xd4, 0x89, 0xc4, 0x15, 0xd3, 0x00, 0xd4, 0x80,
- 0xc4, 0x22, 0x71, 0x00, 0xd4, 0x49, 0xc5, 0x01, 0xdb, 0x00, 0xd4, 0x41,
- 0x15, 0xc1, 0x92, 0x51, 0x08, 0xc1, 0x92, 0x5d, 0x16, 0xc1, 0x92, 0x69,
- 0xc3, 0x01, 0xb4, 0x00, 0xd4, 0x09, 0xc4, 0x15, 0xd3, 0x00, 0xd4, 0x00,
- 0xd9, 0x1f, 0xa0, 0x00, 0xd3, 0xf9, 0x4d, 0x2e, 0xc1, 0x41, 0x92, 0x75,
- 0x91, 0x00, 0xd3, 0x5b, 0x01, 0x92, 0x95, 0x16, 0xc1, 0x92, 0xa3, 0x83,
- 0x00, 0xd3, 0x0b, 0x01, 0x92, 0xaf, 0x87, 0x00, 0xd3, 0x71, 0x97, 0x00,
- 0xd3, 0x4b, 0x01, 0x92, 0xbb, 0x8b, 0x00, 0xd3, 0x2b, 0x01, 0x92, 0xc6,
- 0xc7, 0xc4, 0x58, 0x00, 0xd3, 0x10, 0xc8, 0xbd, 0xed, 0x00, 0xd2, 0xa1,
- 0x0e, 0xc1, 0x92, 0xca, 0xc2, 0x02, 0x84, 0x00, 0xd2, 0x91, 0xc2, 0x00,
- 0x4c, 0x00, 0xd2, 0x89, 0x97, 0x00, 0xd2, 0x7b, 0x01, 0x92, 0xe3, 0x8b,
- 0x00, 0xd2, 0x6b, 0x01, 0x92, 0xe7, 0x83, 0x00, 0xd2, 0x59, 0x45, 0x05,
- 0x2b, 0xc1, 0x92, 0xeb, 0xc2, 0x01, 0x09, 0x00, 0xd2, 0x29, 0x14, 0xc1,
- 0x93, 0x17, 0xc2, 0x00, 0xad, 0x00, 0xd1, 0xf1, 0xc2, 0x00, 0xde, 0x00,
- 0xd1, 0xb9, 0x10, 0xc1, 0x93, 0x24, 0xc2, 0x0b, 0xc6, 0x00, 0xd1, 0x78,
- 0x44, 0x1b, 0x3f, 0xc1, 0x93, 0x34, 0x15, 0xc1, 0x93, 0x48, 0xc2, 0x00,
- 0xa4, 0x00, 0xca, 0xb9, 0x83, 0x00, 0xca, 0xb0, 0x8b, 0x00, 0xcb, 0x69,
- 0xc2, 0x0f, 0x4d, 0x00, 0xcb, 0x60, 0x8a, 0x00, 0xcb, 0x31, 0x87, 0x00,
- 0xcb, 0x28, 0x87, 0x00, 0xcb, 0x50, 0x91, 0x00, 0xcb, 0x40, 0x83, 0x00,
- 0xcb, 0x11, 0xc2, 0x01, 0x29, 0x00, 0xca, 0x90, 0xc2, 0x00, 0xa4, 0x00,
- 0xcb, 0x01, 0x83, 0x00, 0xca, 0x80, 0xc2, 0x00, 0xa4, 0x00, 0xca, 0xd1,
- 0x83, 0x00, 0xca, 0xc8, 0x42, 0x05, 0x68, 0xc1, 0x93, 0x52, 0xc6, 0xd1,
- 0xbf, 0x05, 0x56, 0xf1, 0xc3, 0x78, 0x27, 0x05, 0x56, 0xe9, 0xc5, 0xda,
- 0x84, 0x05, 0x56, 0xe0, 0xc4, 0xe0, 0xef, 0x05, 0x56, 0x11, 0xc3, 0x1d,
- 0x13, 0x05, 0x56, 0x09, 0xc5, 0xda, 0x84, 0x05, 0x56, 0x01, 0xc2, 0x13,
- 0xa5, 0x05, 0x55, 0xf8, 0x03, 0xc1, 0x93, 0x5c, 0x97, 0x05, 0x55, 0xa3,
- 0x01, 0x93, 0x72, 0x8b, 0x05, 0x55, 0x93, 0x01, 0x93, 0x7d, 0x87, 0x05,
- 0x55, 0xa9, 0x91, 0x05, 0x55, 0xb0, 0xc3, 0x01, 0x93, 0x05, 0x55, 0x81,
- 0xc3, 0x02, 0x32, 0x05, 0x55, 0xb8, 0x45, 0x05, 0x2b, 0xc1, 0x93, 0x81,
- 0x44, 0x08, 0x76, 0x41, 0x93, 0xdb, 0xcb, 0x57, 0xb6, 0x01, 0x36, 0x51,
- 0xc8, 0xbe, 0xc5, 0x01, 0x5e, 0x10, 0xc6, 0x2e, 0xc7, 0x01, 0x18, 0xc9,
- 0x44, 0x07, 0xdf, 0x41, 0x94, 0x35, 0x46, 0x11, 0xfc, 0xc1, 0x94, 0x41,
- 0xc5, 0xd5, 0x20, 0x01, 0x71, 0xc0, 0xc6, 0xd4, 0xa1, 0x01, 0x0a, 0x71,
- 0x52, 0x43, 0x89, 0xc1, 0x94, 0x4d, 0x45, 0x1b, 0xc7, 0xc1, 0x94, 0x59,
- 0xc8, 0x4f, 0x30, 0x01, 0x71, 0xa8, 0xc8, 0x37, 0x31, 0x01, 0x0a, 0x59,
- 0xc4, 0x01, 0x94, 0x01, 0x4d, 0x10, 0xc8, 0xbb, 0x25, 0x01, 0x09, 0x91,
- 0xc4, 0x00, 0xab, 0x01, 0x71, 0x90, 0xd0, 0x5b, 0x42, 0x01, 0x3e, 0x01,
- 0xce, 0x01, 0xb9, 0x01, 0x02, 0xb0, 0x50, 0x58, 0xe2, 0xc1, 0x94, 0x65,
- 0xcf, 0x66, 0x23, 0x01, 0x59, 0x88, 0xd0, 0x27, 0x47, 0x01, 0x0f, 0xb1,
- 0x44, 0x3a, 0x8a, 0x41, 0x94, 0x71, 0x4c, 0x8a, 0x88, 0xc1, 0x94, 0x89,
- 0x4b, 0x93, 0xce, 0xc1, 0x94, 0x95, 0x43, 0x03, 0x4e, 0xc1, 0x94, 0x9b,
- 0x4c, 0x8b, 0x24, 0x41, 0x94, 0xa1, 0x15, 0xc1, 0x94, 0xa7, 0xcb, 0x5a,
- 0x67, 0x0f, 0xbd, 0x08, 0xce, 0x73, 0x2d, 0x01, 0x10, 0x21, 0xc6, 0xd0,
- 0xa5, 0x01, 0x10, 0x18, 0xc8, 0xbc, 0x45, 0x00, 0x3d, 0x79, 0xc6, 0xd1,
- 0xd7, 0x00, 0x3d, 0x71, 0xc8, 0xba, 0x3d, 0x00, 0x3d, 0x58, 0xc8, 0xbe,
- 0x25, 0x00, 0x3d, 0x49, 0xc6, 0xd1, 0x89, 0x00, 0x3d, 0x61, 0xc8, 0xb9,
- 0xcd, 0x00, 0x3d, 0x68, 0xc8, 0xbb, 0xfd, 0x00, 0x3d, 0x39, 0xc6, 0xd3,
- 0x0f, 0x00, 0x3d, 0x30, 0xc5, 0xda, 0x25, 0x00, 0x3d, 0x29, 0xc5, 0xd6,
- 0xec, 0x00, 0x3d, 0x21, 0x09, 0xc1, 0x94, 0xb3, 0x16, 0xc1, 0x94, 0xc5,
- 0x06, 0xc1, 0x94, 0xde, 0x15, 0xc1, 0x94, 0xe8, 0x0a, 0xc1, 0x94, 0xf8,
- 0xc9, 0xb2, 0xcd, 0x00, 0x3c, 0xb9, 0xc8, 0xb9, 0xd5, 0x00, 0x3c, 0xb1,
- 0xc8, 0xbe, 0x15, 0x00, 0x3c, 0xa9, 0xc3, 0x4d, 0xc8, 0x00, 0x3c, 0xa1,
- 0x1c, 0xc1, 0x95, 0x04, 0x0e, 0xc1, 0x95, 0x0c, 0xc5, 0xd5, 0x66, 0x00,
- 0x3c, 0x51, 0xc5, 0xdc, 0x0f, 0x00, 0x3c, 0x49, 0xc5, 0xdd, 0xef, 0x00,
- 0x3c, 0x41, 0x03, 0xc1, 0x95, 0x18, 0x0d, 0xc1, 0x95, 0x24, 0xc3, 0x48,
- 0x84, 0x00, 0x3c, 0x21, 0xc3, 0x4b, 0xf4, 0x00, 0x3c, 0x19, 0x10, 0x41,
- 0x95, 0x30, 0x49, 0x39, 0x28, 0xc1, 0x95, 0x3c, 0xd3, 0x47, 0x19, 0x00,
- 0x71, 0xf8, 0xc4, 0x15, 0xd3, 0x00, 0x72, 0x81, 0xc3, 0x01, 0xb4, 0x00,
- 0x72, 0x89, 0x16, 0xc1, 0x95, 0x90, 0x08, 0xc1, 0x95, 0x9c, 0x15, 0xc1,
- 0x95, 0xa8, 0xc5, 0x01, 0xdb, 0x00, 0x72, 0xc1, 0xc4, 0x22, 0x71, 0x00,
- 0x72, 0xc8, 0xc8, 0x1e, 0xe0, 0x01, 0x19, 0x01, 0xcc, 0x89, 0x50, 0x01,
- 0x5e, 0x51, 0xcc, 0x84, 0xd0, 0x01, 0x71, 0xc9, 0xd0, 0x1e, 0xbf, 0x01,
- 0x72, 0xc9, 0xd1, 0x1a, 0x39, 0x01, 0x72, 0xd0, 0xc5, 0x13, 0x6c, 0x01,
- 0x18, 0xe9, 0xc3, 0x0b, 0x6a, 0x01, 0x18, 0x70, 0xc5, 0x13, 0x6c, 0x01,
- 0x18, 0xe1, 0xc3, 0x0b, 0x6a, 0x01, 0x18, 0x78, 0xca, 0x9e, 0xce, 0x01,
- 0x49, 0xe8, 0x83, 0x0f, 0x15, 0x6b, 0x01, 0x95, 0xb4, 0x04, 0xc1, 0x95,
- 0xb8, 0x91, 0x0f, 0x15, 0x51, 0x87, 0x0f, 0x15, 0x33, 0x01, 0x95, 0xc2,
- 0x97, 0x0f, 0x15, 0x29, 0x8b, 0x0f, 0x15, 0x0b, 0x01, 0x95, 0xc6, 0xc2,
- 0x00, 0xc7, 0x0f, 0x15, 0x01, 0xc2, 0x02, 0x59, 0x0f, 0x14, 0xf9, 0xc2,
- 0x00, 0xa4, 0x0f, 0x14, 0xf1, 0xc2, 0x24, 0x58, 0x0f, 0x14, 0xe9, 0xc2,
- 0x01, 0x09, 0x0f, 0x14, 0xe1, 0xc2, 0x1d, 0x5f, 0x0f, 0x14, 0xd9, 0xc3,
- 0x1b, 0xb6, 0x0f, 0x14, 0xd1, 0xc2, 0x0c, 0x65, 0x0f, 0x14, 0xc9, 0x10,
- 0xc1, 0x95, 0xca, 0xc2, 0x00, 0xad, 0x0f, 0x14, 0xb1, 0xc2, 0x01, 0x29,
- 0x0f, 0x14, 0xa9, 0xc2, 0x04, 0x2b, 0x0f, 0x14, 0xa1, 0xc2, 0x0b, 0xc6,
- 0x0f, 0x14, 0x99, 0xc2, 0x00, 0xb3, 0x0f, 0x14, 0x91, 0xc2, 0x03, 0xa4,
- 0x0f, 0x14, 0x80, 0xc2, 0xe7, 0xf8, 0x0f, 0x92, 0x09, 0xc2, 0xe7, 0xc7,
- 0x0f, 0x92, 0x10, 0xc3, 0xe7, 0x00, 0x0f, 0x92, 0x41, 0xc3, 0xe7, 0xea,
- 0x0f, 0x92, 0x29, 0xc3, 0xe6, 0xdf, 0x0f, 0x92, 0x00, 0xc3, 0xe7, 0xed,
- 0x0f, 0x92, 0x39, 0xc3, 0xe6, 0xee, 0x0f, 0x92, 0x18, 0xc3, 0xe7, 0x06,
- 0x0f, 0x92, 0x31, 0xc3, 0xe7, 0x48, 0x0f, 0x92, 0x20, 0xd8, 0x01, 0xef,
- 0x01, 0x3c, 0xe9, 0x46, 0x00, 0x6b, 0x41, 0x95, 0xd4, 0xc6, 0x1d, 0x59,
- 0x01, 0x01, 0x19, 0xc5, 0x18, 0xf9, 0x0f, 0xa6, 0x81, 0xcc, 0x84, 0x7c,
- 0x0f, 0xb5, 0x48, 0xc4, 0x00, 0x9e, 0x01, 0x31, 0xa9, 0xc3, 0x04, 0x34,
- 0x01, 0x31, 0xa0, 0xcf, 0x06, 0xf8, 0x01, 0x15, 0x51, 0xc9, 0x32, 0xcb,
- 0x01, 0x4c, 0x01, 0xcf, 0x28, 0x00, 0x01, 0x57, 0xa1, 0xd6, 0x2b, 0xa7,
- 0x01, 0x57, 0xa8, 0xc4, 0x18, 0x48, 0x01, 0x01, 0xa1, 0xc3, 0x26, 0x13,
- 0x01, 0x4f, 0xd8, 0xd6, 0x2d, 0x07, 0x01, 0x53, 0x41, 0xd6, 0x2f, 0x6f,
- 0x01, 0x53, 0x48, 0xc9, 0x0a, 0x4a, 0x01, 0x57, 0xb9, 0xcc, 0x00, 0x9b,
- 0x01, 0x57, 0xc0, 0xc5, 0xc1, 0xd6, 0x0f, 0x9b, 0xc9, 0xc4, 0x50, 0x3c,
- 0x0f, 0xa1, 0x00, 0xc7, 0xc2, 0x4b, 0x0e, 0x9a, 0xb1, 0xc7, 0xb7, 0x36,
- 0x0e, 0x98, 0xc0, 0xc4, 0x1f, 0x5c, 0x0e, 0x99, 0x59, 0xc7, 0x01, 0x79,
- 0x0e, 0x98, 0x38, 0xc7, 0xc7, 0x45, 0x0e, 0x9a, 0xa9, 0xca, 0xa6, 0x80,
- 0x0e, 0x99, 0x68, 0xca, 0x9c, 0xb2, 0x0e, 0x9a, 0xa1, 0x0f, 0xc1, 0x95,
- 0xec, 0xc8, 0xb8, 0x5d, 0x0e, 0x98, 0x80, 0xc7, 0xb2, 0xd8, 0x0e, 0x9a,
- 0x39, 0xca, 0x9e, 0xc4, 0x0e, 0x99, 0x11, 0xd9, 0x1f, 0x55, 0x0e, 0x98,
- 0x78, 0x43, 0x5d, 0x6a, 0xc1, 0x95, 0xf8, 0x10, 0x41, 0x96, 0x04, 0xc3,
- 0x13, 0xd1, 0x0e, 0x9a, 0x79, 0x07, 0x41, 0x96, 0x0e, 0x11, 0xc1, 0x96,
- 0x1a, 0xc6, 0xce, 0x53, 0x0e, 0x99, 0x48, 0xc9, 0xad, 0x12, 0x0e, 0x99,
- 0x99, 0xc8, 0xb8, 0x25, 0x0e, 0x99, 0x81, 0xc7, 0xc1, 0x8e, 0x0e, 0x98,
- 0xf8, 0xc3, 0x03, 0x58, 0x0e, 0x99, 0xf8, 0x15, 0xc1, 0x96, 0x26, 0xc5,
- 0xd9, 0x62, 0x0e, 0x98, 0xd1, 0xc3, 0x2b, 0x37, 0x0e, 0x98, 0xa0, 0xc5,
- 0x1e, 0xd7, 0x0e, 0x99, 0xa1, 0xc5, 0x58, 0x65, 0x0e, 0x99, 0x20, 0xd7,
- 0x28, 0x99, 0x01, 0x3d, 0xd1, 0xcf, 0x15, 0x8e, 0x01, 0x39, 0xd8, 0xcd,
- 0x7c, 0x5e, 0x01, 0x38, 0x31, 0x43, 0x06, 0xdb, 0xc1, 0x96, 0x30, 0xc4,
- 0x00, 0xcb, 0x01, 0x09, 0x09, 0xcf, 0x61, 0xcd, 0x0f, 0xac, 0x00, 0x05,
- 0xc1, 0x96, 0x3f, 0x03, 0xc1, 0x96, 0x4b, 0x42, 0x03, 0x32, 0xc1, 0x96,
- 0x57, 0xc5, 0x35, 0x00, 0x00, 0x61, 0xe1, 0xc7, 0xc4, 0x35, 0x00, 0x63,
- 0xb9, 0xc5, 0xd5, 0x70, 0x00, 0x63, 0xf8, 0x45, 0x02, 0x92, 0xc1, 0x96,
- 0x63, 0xc9, 0x34, 0xc3, 0x00, 0x62, 0xa8, 0x03, 0xc1, 0x96, 0xcc, 0x8b,
- 0x00, 0x61, 0xfb, 0x01, 0x96, 0xd8, 0x97, 0x00, 0x62, 0x0b, 0x01, 0x96,
- 0xdc, 0x48, 0xac, 0xc1, 0xc1, 0x96, 0xe0, 0x87, 0x00, 0x62, 0x33, 0x01,
- 0x96, 0xee, 0x91, 0x00, 0x62, 0x52, 0x01, 0x96, 0xf2, 0xc4, 0x15, 0xd3,
- 0x00, 0x63, 0x31, 0xc3, 0x01, 0xb4, 0x00, 0x63, 0x39, 0x16, 0xc1, 0x96,
- 0xf6, 0x08, 0xc1, 0x97, 0x02, 0x15, 0xc1, 0x97, 0x0e, 0xc5, 0x01, 0xdb,
- 0x00, 0x63, 0x71, 0xc4, 0x22, 0x71, 0x00, 0x63, 0x78, 0xdb, 0x15, 0xd3,
- 0x00, 0x63, 0xc1, 0x48, 0xb8, 0x1d, 0xc1, 0x97, 0x1a, 0x16, 0x41, 0x97,
- 0x26, 0x00, 0x41, 0x97, 0x32, 0xca, 0x9e, 0x60, 0x01, 0x70, 0xd9, 0x44,
- 0x01, 0xb8, 0x41, 0x97, 0x3e, 0xc4, 0x22, 0x71, 0x08, 0xa6, 0xc9, 0xc5,
- 0x01, 0xdb, 0x08, 0xa6, 0xc1, 0x15, 0xc1, 0x97, 0x4a, 0x08, 0xc1, 0x97,
- 0x56, 0x16, 0xc1, 0x97, 0x62, 0xc3, 0x01, 0xb4, 0x08, 0xa6, 0x89, 0xc4,
- 0x15, 0xd3, 0x08, 0xa6, 0x80, 0xd0, 0x50, 0x41, 0x08, 0xa6, 0x31, 0xc3,
- 0x7a, 0x4c, 0x08, 0xa4, 0x00, 0x03, 0xc1, 0x97, 0x6e, 0xc5, 0x35, 0x00,
- 0x08, 0xa6, 0x19, 0xcb, 0x1e, 0x17, 0x08, 0xa5, 0xf9, 0x42, 0x03, 0x32,
- 0x41, 0x97, 0x7a, 0x03, 0xc1, 0x97, 0x86, 0x46, 0x2b, 0xff, 0xc1, 0x97,
- 0x92, 0x91, 0x08, 0xa5, 0xe1, 0x87, 0x08, 0xa5, 0xc9, 0x48, 0xac, 0xc1,
- 0xc1, 0x97, 0x9a, 0x97, 0x08, 0xa5, 0x9b, 0x01, 0x97, 0xa8, 0x8b, 0x08,
- 0xa5, 0x8a, 0x01, 0x97, 0xac, 0xc2, 0x00, 0xa4, 0x08, 0xa5, 0x79, 0x15,
- 0xc1, 0x97, 0xb0, 0x18, 0xc1, 0x97, 0xc0, 0xc2, 0x00, 0xc7, 0x08, 0xa5,
- 0x51, 0xc2, 0x02, 0x59, 0x08, 0xa5, 0x49, 0xc2, 0x1d, 0x5f, 0x08, 0xa5,
- 0x41, 0xc2, 0x00, 0xad, 0x08, 0xa5, 0x39, 0x04, 0xc1, 0x97, 0xca, 0x12,
- 0xc1, 0x97, 0xd4, 0x10, 0xc1, 0x97, 0xde, 0x06, 0xc1, 0x97, 0xf4, 0x16,
- 0xc1, 0x98, 0x02, 0x0c, 0xc1, 0x98, 0x10, 0x05, 0xc1, 0x98, 0x1a, 0x09,
- 0xc1, 0x98, 0x24, 0x0d, 0xc1, 0x98, 0x2e, 0x83, 0x08, 0xa4, 0x0b, 0x01,
- 0x98, 0x38, 0x91, 0x08, 0xa4, 0x69, 0x87, 0x08, 0xa4, 0x59, 0x97, 0x08,
- 0xa4, 0x2b, 0x01, 0x98, 0x44, 0x8b, 0x08, 0xa4, 0x1a, 0x01, 0x98, 0x48,
- 0xc9, 0xaf, 0x9a, 0x00, 0x78, 0x01, 0x45, 0x10, 0x60, 0x41, 0x98, 0x4c,
- 0x14, 0xc1, 0x98, 0x68, 0x42, 0x1d, 0x5f, 0xc1, 0x98, 0x7a, 0x0f, 0xc1,
- 0x98, 0x86, 0xce, 0x75, 0xf7, 0x00, 0x7c, 0x11, 0xc8, 0xbf, 0x15, 0x00,
- 0x7c, 0x19, 0x42, 0x19, 0xb6, 0xc1, 0x98, 0x92, 0x44, 0xe1, 0xef, 0xc1,
- 0x98, 0x9e, 0xd1, 0x53, 0xd6, 0x00, 0x7c, 0x60, 0x45, 0x00, 0xcb, 0xc1,
- 0x98, 0xaa, 0x47, 0x02, 0x90, 0x41, 0x98, 0xbc, 0x44, 0x02, 0x93, 0xc1,
- 0x99, 0x1e, 0x4b, 0x8e, 0x7a, 0x41, 0x99, 0x2a, 0x46, 0x12, 0x32, 0xc1,
- 0x99, 0x36, 0xd1, 0x4f, 0x74, 0x00, 0x78, 0x58, 0x47, 0x7d, 0xf1, 0xc1,
- 0x99, 0x42, 0x45, 0x9a, 0x83, 0xc1, 0x99, 0x4e, 0xc6, 0xd2, 0xdf, 0x00,
- 0x79, 0xc0, 0xc9, 0xab, 0x08, 0x00, 0x78, 0x41, 0xc3, 0x02, 0x29, 0x00,
- 0x78, 0x68, 0x15, 0xc1, 0x99, 0x5a, 0x49, 0xad, 0xea, 0x41, 0x99, 0x64,
- 0x44, 0x92, 0x64, 0xc1, 0x99, 0x70, 0x4a, 0x9f, 0x28, 0x41, 0x99, 0x7f,
- 0x15, 0xc1, 0x99, 0x8b, 0xd3, 0x44, 0x47, 0x00, 0x7e, 0xd0, 0xd3, 0x42,
- 0x59, 0x00, 0x78, 0x89, 0xcd, 0x78, 0xdd, 0x00, 0x78, 0x90, 0xc2, 0x00,
- 0xb7, 0x00, 0x79, 0xe1, 0xc2, 0x00, 0xe4, 0x00, 0x79, 0xe8, 0xca, 0xa0,
- 0xea, 0x00, 0x78, 0xa9, 0xca, 0xa5, 0x68, 0x00, 0x78, 0xb0, 0x0d, 0xc1,
- 0x99, 0x97, 0x09, 0xc1, 0x99, 0xad, 0x10, 0xc1, 0x99, 0xb7, 0x05, 0xc1,
- 0x99, 0xcd, 0xc2, 0x24, 0x58, 0x00, 0x7a, 0x39, 0x16, 0xc1, 0x99, 0xd7,
- 0x06, 0xc1, 0x99, 0xe9, 0x12, 0xc1, 0x99, 0xfb, 0x04, 0xc1, 0x9a, 0x05,
- 0xc2, 0x00, 0xad, 0x00, 0x7a, 0xc1, 0xc2, 0x01, 0x09, 0x00, 0x7a, 0xe9,
- 0x1c, 0xc1, 0x9a, 0x0f, 0xc2, 0x00, 0x02, 0x00, 0x7b, 0x01, 0xc2, 0x1d,
- 0x5f, 0x00, 0x7b, 0x09, 0x14, 0xc1, 0x9a, 0x19, 0xc2, 0x00, 0xc7, 0x00,
- 0x7b, 0x19, 0x15, 0xc1, 0x9a, 0x23, 0xc2, 0x00, 0xa4, 0x00, 0x7b, 0x39,
- 0x83, 0x00, 0x7b, 0x41, 0xcd, 0x7a, 0xcb, 0x00, 0x7b, 0x50, 0xd4, 0x3d,
- 0xcd, 0x00, 0x78, 0xb9, 0xcb, 0x97, 0xd8, 0x00, 0x78, 0xc8, 0xc2, 0x01,
- 0x47, 0x00, 0x79, 0x11, 0xc4, 0x04, 0x5e, 0x00, 0x79, 0x18, 0xc3, 0x06,
- 0x9e, 0x00, 0x79, 0x21, 0xc3, 0x0c, 0x5b, 0x00, 0x79, 0x28, 0xc2, 0x26,
- 0x51, 0x00, 0x79, 0x31, 0xc4, 0x18, 0x83, 0x00, 0x79, 0x38, 0xc3, 0x01,
- 0xb4, 0x00, 0x79, 0x51, 0x16, 0xc1, 0x9a, 0x33, 0x08, 0xc1, 0x9a, 0x3f,
- 0x15, 0xc1, 0x9a, 0x4b, 0xc5, 0x01, 0xdb, 0x00, 0x79, 0x89, 0xc4, 0x22,
- 0x71, 0x00, 0x79, 0x91, 0xc4, 0x15, 0xd3, 0x00, 0x79, 0x98, 0x8b, 0x00,
- 0x7b, 0x98, 0x97, 0x00, 0x7b, 0xa8, 0x94, 0x00, 0x7b, 0xb3, 0x01, 0x9a,
- 0x57, 0x8e, 0x00, 0x7b, 0xc2, 0x01, 0x9a, 0x5b, 0x87, 0x00, 0x7b, 0xd8,
- 0x91, 0x00, 0x7b, 0xe8, 0x8b, 0x00, 0x7c, 0x08, 0x83, 0x01, 0x69, 0x83,
- 0x01, 0x9a, 0x5f, 0x87, 0x01, 0x6b, 0x33, 0x01, 0x9a, 0xd0, 0x8b, 0x01,
- 0x6a, 0x49, 0x97, 0x01, 0x6a, 0x99, 0x91, 0x01, 0x6b, 0x38, 0x8c, 0x01,
- 0x69, 0xa9, 0x8a, 0x01, 0x6a, 0x08, 0x48, 0xb8, 0xfd, 0xc1, 0x9a, 0xd4,
- 0xcd, 0x80, 0x13, 0x01, 0x6b, 0x20, 0xcb, 0x94, 0x89, 0x01, 0x6a, 0x59,
- 0xc8, 0xbb, 0xd5, 0x01, 0x6a, 0xc0, 0x00, 0xc1, 0x9a, 0xf3, 0xda, 0x01,
- 0xad, 0x01, 0x71, 0x50, 0xc2, 0x00, 0xbf, 0x01, 0x52, 0xb1, 0xc3, 0x00,
- 0x57, 0x01, 0x52, 0xa8, 0xcb, 0x90, 0x8a, 0x01, 0x50, 0x41, 0xcc, 0x88,
- 0x48, 0x01, 0x50, 0x38, 0xc7, 0x08, 0xca, 0x01, 0x49, 0xa1, 0xc9, 0x03,
- 0x68, 0x01, 0x49, 0xa9, 0xca, 0x3a, 0x11, 0x0f, 0xc5, 0x88, 0xc9, 0x01,
- 0x1e, 0x01, 0x49, 0xb1, 0xca, 0x01, 0xf7, 0x01, 0x49, 0xb8, 0x45, 0x04,
- 0x74, 0xc1, 0x9a, 0xff, 0x17, 0xc1, 0x9b, 0x29, 0x46, 0x10, 0x5f, 0xc1,
- 0x9b, 0x3e, 0x44, 0x00, 0xcc, 0xc1, 0x9b, 0x60, 0xd3, 0x45, 0x8a, 0x00,
- 0x36, 0xf1, 0xc5, 0xd5, 0xa7, 0x00, 0x32, 0x8b, 0x01, 0x9b, 0x7c, 0xc8,
- 0x4f, 0x30, 0x00, 0x30, 0xd8, 0x48, 0x19, 0x70, 0xc1, 0x9b, 0x80, 0x07,
- 0xc1, 0x9b, 0xde, 0x45, 0x17, 0xf0, 0x41, 0x9b, 0xea, 0x43, 0x00, 0xbc,
- 0xc1, 0x9b, 0xf6, 0x43, 0x2d, 0x58, 0xc1, 0x9c, 0x02, 0x4b, 0x4a, 0x12,
- 0x41, 0x9c, 0x0e, 0x03, 0xc1, 0x9c, 0x7a, 0x45, 0x05, 0x97, 0xc1, 0x9c,
- 0x89, 0xd3, 0x46, 0x6e, 0x00, 0x47, 0x11, 0xd0, 0x5c, 0xc2, 0x00, 0x33,
- 0x58, 0x4f, 0x30, 0xe7, 0xc1, 0x9c, 0x98, 0x03, 0xc1, 0x9c, 0xa7, 0x43,
- 0x0e, 0x49, 0xc1, 0x9c, 0xb1, 0xcd, 0x7f, 0x36, 0x00, 0x32, 0xe8, 0x00,
- 0xc1, 0x9c, 0xb7, 0xc3, 0x14, 0xa9, 0x00, 0x32, 0x6a, 0x01, 0x9c, 0xc9,
- 0xc4, 0x07, 0x87, 0x00, 0x32, 0x73, 0x01, 0x9c, 0xcf, 0xc8, 0x11, 0xdd,
- 0x00, 0x36, 0xa1, 0xd0, 0x5d, 0xf2, 0x00, 0x33, 0x69, 0xce, 0x6d, 0xd1,
- 0x00, 0x30, 0x10, 0xc8, 0xb6, 0xf5, 0x00, 0x47, 0x91, 0xc8, 0xb7, 0x5d,
- 0x00, 0x47, 0x89, 0xc8, 0x71, 0x0b, 0x00, 0x47, 0x80, 0x44, 0x01, 0xb4,
- 0xc1, 0x9c, 0xdc, 0xd1, 0x52, 0xe8, 0x00, 0x47, 0x19, 0x03, 0xc1, 0x9c,
- 0xee, 0xd2, 0x4c, 0x98, 0x00, 0x33, 0x61, 0xda, 0x1a, 0x87, 0x00, 0x30,
- 0xf0, 0x45, 0x02, 0x53, 0xc1, 0x9c, 0xfd, 0xc4, 0x00, 0xab, 0x00, 0x30,
- 0x60, 0xd3, 0x44, 0xb9, 0x00, 0x44, 0xf9, 0x44, 0x09, 0x8b, 0x41, 0x9d,
- 0x18, 0xd1, 0x50, 0xd9, 0x00, 0x44, 0x89, 0x11, 0xc1, 0x9d, 0x24, 0xce,
- 0x6e, 0x25, 0x00, 0x37, 0x49, 0xcb, 0x93, 0xb8, 0x00, 0x33, 0x50, 0xcc,
- 0x46, 0x3c, 0x00, 0x44, 0x71, 0x4a, 0x6f, 0xcd, 0x41, 0x9d, 0x30, 0x4c,
- 0x86, 0x44, 0xc1, 0x9d, 0x42, 0x46, 0x00, 0xf0, 0x41, 0x9d, 0x4e, 0xca,
- 0x46, 0xb0, 0x00, 0x30, 0x29, 0xc4, 0x00, 0xcb, 0x00, 0x30, 0x00, 0xc4,
- 0x22, 0x71, 0x00, 0x33, 0x49, 0xc5, 0x01, 0xdb, 0x00, 0x33, 0x41, 0x15,
- 0xc1, 0x9d, 0x5a, 0x08, 0xc1, 0x9d, 0x66, 0x16, 0xc1, 0x9d, 0x72, 0xc3,
- 0x01, 0xb4, 0x00, 0x33, 0x09, 0xc4, 0x15, 0xd3, 0x00, 0x33, 0x00, 0xd1,
- 0x4f, 0xfc, 0x00, 0x30, 0x51, 0xca, 0xa7, 0x52, 0x00, 0x30, 0x48, 0x44,
- 0x42, 0x94, 0xc1, 0x9d, 0x7e, 0xc7, 0xc8, 0xcd, 0x07, 0xd8, 0xb1, 0xc8,
- 0xbd, 0xfd, 0x00, 0x2c, 0x38, 0xc2, 0x15, 0x1d, 0x00, 0x2b, 0xab, 0x01,
- 0x9d, 0x96, 0xc3, 0xb2, 0x7c, 0x00, 0x2c, 0x31, 0xc2, 0x22, 0x1f, 0x00,
- 0x2c, 0x29, 0x42, 0x01, 0x05, 0xc1, 0x9d, 0xa2, 0x12, 0xc1, 0x9d, 0xaa,
- 0x05, 0xc1, 0x9d, 0xb6, 0x14, 0xc1, 0x9d, 0xc2, 0x16, 0xc1, 0x9d, 0xcc,
- 0x18, 0xc1, 0x9d, 0xdc, 0x15, 0xc1, 0x9d, 0xe6, 0x0c, 0xc1, 0x9d, 0xf2,
- 0xc3, 0x28, 0xed, 0x00, 0x2b, 0xb1, 0xc3, 0x00, 0xc3, 0x00, 0x2b, 0xa1,
- 0x09, 0xc1, 0x9d, 0xfc, 0xc2, 0x00, 0xf6, 0x00, 0x2b, 0x81, 0xc3, 0xe7,
- 0xbd, 0x00, 0x2b, 0x69, 0xc4, 0xe2, 0x7f, 0x00, 0x2b, 0x61, 0xc3, 0x04,
- 0x6d, 0x00, 0x2b, 0x59, 0x1c, 0xc1, 0x9e, 0x08, 0x07, 0xc1, 0x9e, 0x12,
- 0xc2, 0x0b, 0xc6, 0x00, 0x2b, 0x21, 0xc3, 0x17, 0x9a, 0x00, 0x2b, 0x11,
- 0xc3, 0xab, 0x29, 0x00, 0x2b, 0x08, 0xc3, 0xb2, 0x7c, 0x00, 0x2a, 0xb1,
- 0xc2, 0x22, 0x1f, 0x00, 0x2a, 0xa9, 0x42, 0x01, 0x05, 0xc1, 0x9e, 0x20,
- 0x12, 0xc1, 0x9e, 0x28, 0xc2, 0x15, 0x1d, 0x00, 0x2a, 0x2b, 0x01, 0x9e,
- 0x34, 0x05, 0xc1, 0x9e, 0x3a, 0x14, 0xc1, 0x9e, 0x46, 0x16, 0xc1, 0x9e,
- 0x50, 0x18, 0xc1, 0x9e, 0x5a, 0x15, 0xc1, 0x9e, 0x64, 0x0c, 0xc1, 0x9e,
- 0x70, 0xc3, 0x28, 0xed, 0x00, 0x2a, 0x31, 0xc3, 0x00, 0xc3, 0x00, 0x2a,
- 0x21, 0x09, 0xc1, 0x9e, 0x7a, 0xc2, 0x00, 0xf6, 0x00, 0x2a, 0x01, 0xc3,
- 0xe7, 0xbd, 0x00, 0x29, 0xe9, 0xc4, 0xe2, 0x7f, 0x00, 0x29, 0xe1, 0xc3,
- 0x04, 0x6d, 0x00, 0x29, 0xd9, 0x1c, 0xc1, 0x9e, 0x86, 0x07, 0xc1, 0x9e,
- 0x90, 0xc2, 0x0b, 0xc6, 0x00, 0x29, 0xa1, 0xc3, 0xab, 0x29, 0x00, 0x29,
- 0x89, 0xc3, 0x17, 0x9a, 0x00, 0x29, 0x90, 0xc4, 0x69, 0xa7, 0x0f, 0x48,
- 0x01, 0x06, 0xc1, 0x9e, 0x9e, 0xc4, 0x7b, 0x8a, 0x0f, 0x48, 0x11, 0xc4,
- 0xe6, 0x37, 0x0f, 0x48, 0x19, 0x04, 0xc1, 0x9e, 0xaa, 0x15, 0xc1, 0x9e,
- 0xb4, 0xc2, 0x00, 0x27, 0x0f, 0x48, 0x31, 0xc2, 0x02, 0x59, 0x0f, 0x48,
- 0x41, 0x87, 0x0f, 0x48, 0x49, 0xc2, 0x00, 0x67, 0x0f, 0x48, 0x51, 0x8b,
- 0x0f, 0x48, 0x59, 0x91, 0x0f, 0x48, 0x61, 0x1b, 0xc1, 0x9e, 0xc0, 0xc3,
- 0x78, 0xa9, 0x0f, 0x48, 0x79, 0x10, 0xc1, 0x9e, 0xca, 0x0d, 0xc1, 0x9e,
- 0xdc, 0x97, 0x0f, 0x48, 0x99, 0xc4, 0xe2, 0xbf, 0x0f, 0x48, 0xa1, 0xc3,
- 0x11, 0xd4, 0x0f, 0x48, 0xa9, 0xc2, 0x00, 0xa4, 0x0f, 0x48, 0xb1, 0xc4,
- 0xd6, 0xec, 0x0f, 0x48, 0xb9, 0x09, 0xc1, 0x9e, 0xee, 0xc2, 0x00, 0x16,
- 0x0f, 0x48, 0xd1, 0xc2, 0x04, 0x41, 0x0f, 0x48, 0xe1, 0xc3, 0xb2, 0xb2,
- 0x0f, 0x48, 0xf8, 0xc4, 0x14, 0x41, 0x0f, 0x49, 0x19, 0xc2, 0x00, 0xa4,
- 0x0f, 0x49, 0x78, 0x83, 0x0f, 0x49, 0x31, 0xc2, 0x01, 0xf0, 0x0f, 0x49,
- 0x48, 0xc9, 0xac, 0xca, 0x0f, 0x49, 0x39, 0xc2, 0x00, 0xa4, 0x0f, 0x4a,
- 0x18, 0xc2, 0x01, 0xf0, 0x0f, 0x49, 0x81, 0x83, 0x0f, 0x49, 0xa0, 0xc2,
- 0x01, 0xbd, 0x0f, 0x49, 0x91, 0xc2, 0x1d, 0x5f, 0x0f, 0x49, 0xd9, 0xc2,
- 0x00, 0xa4, 0x0f, 0x49, 0xe8, 0xc2, 0x0f, 0x61, 0x0f, 0x49, 0x99, 0xc2,
- 0x00, 0xa4, 0x0f, 0x49, 0xf9, 0xc2, 0x01, 0x8a, 0x0f, 0x4a, 0x10, 0x83,
- 0x0f, 0x49, 0xd1, 0xc2, 0x00, 0x48, 0x0f, 0x4a, 0x00, 0xc2, 0x01, 0x47,
- 0x0f, 0x4a, 0x91, 0xc4, 0x04, 0x5e, 0x0f, 0x4a, 0x98, 0xc3, 0x06, 0x9e,
- 0x0f, 0x4a, 0xa1, 0xc3, 0x0c, 0x5b, 0x0f, 0x4a, 0xa8, 0xc2, 0x26, 0x51,
- 0x0f, 0x4a, 0xb1, 0xc4, 0x18, 0x83, 0x0f, 0x4a, 0xb8, 0xc7, 0xc5, 0x23,
- 0x0f, 0xbb, 0x61, 0xc4, 0xe6, 0x0f, 0x0f, 0xbb, 0x58, 0x02, 0x41, 0x9e,
- 0xf8, 0xc6, 0xd4, 0x71, 0x0f, 0xbb, 0x2b, 0x01, 0x9f, 0x00, 0x48, 0xb9,
- 0xb5, 0x41, 0x9f, 0x04, 0xc3, 0x06, 0x01, 0x0f, 0xb9, 0x01, 0xcb, 0x48,
- 0x31, 0x0f, 0xb9, 0x28, 0xc2, 0xad, 0x23, 0x0f, 0xba, 0x61, 0xcb, 0x92,
- 0xa5, 0x0f, 0xba, 0x71, 0xc6, 0xd4, 0x23, 0x0f, 0xba, 0x80, 0xc5, 0xda,
- 0xed, 0x0f, 0xbb, 0x0b, 0x01, 0x9f, 0x13, 0xc4, 0x2e, 0xb2, 0x0f, 0xbb,
- 0x00, 0xc4, 0xe0, 0xbf, 0x0f, 0xba, 0x5b, 0x01, 0x9f, 0x19, 0xc7, 0xc9,
- 0x2f, 0x0f, 0xba, 0xc0, 0xc4, 0xe0, 0x17, 0x0f, 0xbb, 0x19, 0xca, 0xa0,
- 0x86, 0x0f, 0xbb, 0x20, 0xc2, 0xe7, 0x79, 0x0f, 0xba, 0x00, 0xc4, 0x91,
- 0x0a, 0x0f, 0xb9, 0x49, 0xc5, 0x89, 0x0f, 0x0f, 0xba, 0x40, 0xc5, 0xd8,
- 0x9a, 0x0f, 0xb9, 0x93, 0x01, 0x9f, 0x1f, 0xc5, 0xd9, 0x5d, 0x0f, 0xb9,
- 0xdb, 0x01, 0x9f, 0x29, 0xc4, 0x0a, 0x88, 0x0f, 0xbb, 0x68, 0xc2, 0xe7,
- 0x79, 0x0f, 0xb8, 0xc8, 0xc5, 0xda, 0x16, 0x0f, 0xb8, 0x53, 0x01, 0x9f,
- 0x2f, 0xc5, 0xd8, 0x0e, 0x0f, 0xb8, 0xb2, 0x01, 0x9f, 0x39, 0x46, 0x60,
- 0x4b, 0xc1, 0x9f, 0x3f, 0xc4, 0x4a, 0x78, 0x0f, 0xb8, 0x68, 0x96, 0x0f,
- 0xb8, 0xa3, 0x01, 0x9f, 0x4b, 0xc9, 0xab, 0x50, 0x0f, 0xb9, 0xc8, 0xcd,
- 0x77, 0x09, 0x0f, 0xba, 0x91, 0xd3, 0x45, 0xd6, 0x0f, 0xba, 0xe2, 0x01,
- 0x9f, 0x51, 0x00, 0xc1, 0x9f, 0x57, 0xc6, 0xd0, 0xff, 0x0f, 0xb8, 0x28,
- 0xc4, 0xe2, 0x53, 0x0f, 0xb9, 0xb3, 0x01, 0x9f, 0x69, 0xc2, 0x06, 0x1f,
- 0x0f, 0xba, 0x29, 0xc5, 0xd8, 0xc7, 0x0f, 0xbb, 0x50, 0x02, 0x41, 0x9f,
- 0x6f, 0xc2, 0xe7, 0x79, 0x0f, 0xb8, 0xe8, 0xc8, 0xc0, 0x8d, 0x0f, 0xba,
- 0xb1, 0xc2, 0x02, 0x53, 0x0f, 0xbb, 0x70, 0xc4, 0xb2, 0x10, 0x0f, 0xbb,
- 0x91, 0xc5, 0xd6, 0x4c, 0x0f, 0xbb, 0x98, 0x22, 0xc1, 0x9f, 0x77, 0x21,
- 0xc1, 0x9f, 0x9f, 0x20, 0xc1, 0x9f, 0xd0, 0x1f, 0xc1, 0x9f, 0xfb, 0x1e,
- 0xc1, 0xa0, 0x26, 0x1d, 0xc1, 0xa0, 0x51, 0x23, 0xc1, 0xa0, 0x75, 0x24,
- 0xc1, 0xa0, 0xa0, 0x25, 0xc1, 0xa0, 0xc8, 0x26, 0x41, 0xa0, 0xf0, 0x1d,
- 0xc1, 0xa1, 0x1e, 0x1e, 0xc1, 0xa1, 0x58, 0x1f, 0xc1, 0xa1, 0x86, 0x20,
- 0xc1, 0xa1, 0xb1, 0x21, 0xc1, 0xa1, 0xdc, 0x22, 0xc1, 0xa2, 0x04, 0x23,
- 0xc1, 0xa2, 0x2c, 0x24, 0xc1, 0xa2, 0x54, 0x25, 0xc1, 0xa2, 0x7c, 0x26,
- 0x41, 0xa2, 0xa4, 0x1d, 0xc1, 0xa2, 0xcc, 0x1e, 0xc1, 0xa2, 0xfd, 0x1f,
- 0xc1, 0xa3, 0x2b, 0x20, 0xc1, 0xa3, 0x56, 0x21, 0xc1, 0xa3, 0x7e, 0x22,
- 0xc1, 0xa3, 0xa6, 0x23, 0xc1, 0xa3, 0xce, 0x24, 0xc1, 0xa3, 0xf9, 0x25,
- 0xc1, 0xa4, 0x21, 0x26, 0x41, 0xa4, 0x4c, 0x1d, 0xc1, 0xa4, 0x7a, 0x1e,
- 0xc1, 0xa4, 0xa5, 0x1f, 0xc1, 0xa4, 0xcd, 0x20, 0xc1, 0xa4, 0xf8, 0x21,
- 0xc1, 0xa5, 0x23, 0x22, 0xc1, 0xa5, 0x4b, 0x23, 0xc1, 0xa5, 0x76, 0x24,
- 0xc1, 0xa5, 0xa4, 0x25, 0xc1, 0xa5, 0xcf, 0x26, 0x41, 0xa5, 0xfd, 0x1d,
- 0xc1, 0xa6, 0x27, 0x1e, 0xc1, 0xa6, 0x4f, 0x1f, 0xc1, 0xa6, 0x77, 0x20,
- 0xc1, 0xa6, 0x9f, 0x21, 0xc1, 0xa6, 0xc7, 0x22, 0xc1, 0xa6, 0xef, 0x23,
- 0xc1, 0xa7, 0x1d, 0x24, 0xc1, 0xa7, 0x45, 0x25, 0xc1, 0xa7, 0x6d, 0x26,
- 0x41, 0xa7, 0x95, 0x1d, 0xc1, 0xa7, 0xb5, 0x1e, 0xc1, 0xa7, 0xd9, 0x1f,
- 0xc1, 0xa8, 0x01, 0xc2, 0xe7, 0xcf, 0x0a, 0x32, 0x30, 0xcf, 0x62, 0x90,
- 0x01, 0x11, 0x99, 0xd2, 0x48, 0x72, 0x01, 0x4a, 0x00, 0xd3, 0x45, 0x9d,
- 0x01, 0x0d, 0xb1, 0x4f, 0x01, 0xe8, 0x41, 0xa8, 0x29, 0xe0, 0x0a, 0x67,
- 0x0f, 0xa8, 0x20, 0xc8, 0x4f, 0x39, 0x01, 0x4d, 0x21, 0xc8, 0x4b, 0x66,
- 0x01, 0x4c, 0xf0, 0xc9, 0x17, 0x44, 0x01, 0x10, 0xb8, 0xc2, 0x00, 0xa4,
- 0x08, 0xba, 0x21, 0x83, 0x08, 0xba, 0x18, 0xc2, 0x00, 0xa4, 0x08, 0xba,
- 0x11, 0x83, 0x08, 0xba, 0x08, 0xc2, 0x00, 0xde, 0x08, 0xb8, 0xd1, 0xc2,
- 0x01, 0x29, 0x08, 0xb8, 0xb1, 0xc2, 0x00, 0xb3, 0x08, 0xb8, 0x28, 0xc6,
- 0x02, 0x61, 0x08, 0xb9, 0xe9, 0xcc, 0x89, 0xc8, 0x08, 0xb9, 0xe0, 0x00,
- 0x41, 0xa8, 0x47, 0xc4, 0x03, 0xd9, 0x01, 0x1a, 0xf1, 0xc8, 0x4f, 0x39,
- 0x01, 0x1a, 0xc0, 0xc9, 0x4f, 0x38, 0x01, 0x1b, 0xc0, 0xcb, 0x9a, 0x82,
- 0x01, 0x1b, 0x91, 0x45, 0x36, 0xab, 0xc1, 0xa8, 0x8b, 0xc8, 0xbc, 0xa5,
- 0x01, 0x1a, 0xe8, 0x00, 0xc1, 0xa8, 0x9d, 0xca, 0x6e, 0x45, 0x01, 0x1a,
- 0xb0, 0x00, 0xc1, 0xa8, 0xaf, 0x43, 0x1d, 0x77, 0x41, 0xa8, 0xc1, 0xc9,
- 0xb0, 0xf9, 0x01, 0x1b, 0x69, 0xcc, 0x82, 0x6c, 0x01, 0x1b, 0x18, 0xc9,
- 0x1e, 0x42, 0x01, 0x1b, 0x29, 0x42, 0x00, 0x15, 0xc1, 0xa8, 0xcd, 0xc8,
- 0x4f, 0x39, 0x01, 0x1a, 0xe1, 0xc9, 0x00, 0xde, 0x01, 0x1a, 0x49, 0xc3,
- 0xaa, 0xf4, 0x01, 0x19, 0xf0, 0x46, 0x00, 0x8f, 0xc1, 0xa8, 0xd9, 0xd9,
- 0x1e, 0x29, 0x01, 0x12, 0x30, 0x87, 0x08, 0x59, 0xa9, 0xc2, 0x00, 0xc3,
- 0x08, 0x59, 0x48, 0xc3, 0x0a, 0x91, 0x08, 0x59, 0xa1, 0x0a, 0xc1, 0xa8,
- 0xe8, 0x87, 0x08, 0x59, 0x78, 0x87, 0x08, 0x59, 0x59, 0xc2, 0x05, 0xd0,
- 0x08, 0x59, 0x50, 0xc2, 0x00, 0x8c, 0x08, 0x59, 0x39, 0xc2, 0x05, 0xd0,
- 0x08, 0x59, 0x31, 0x87, 0x08, 0x59, 0x29, 0x09, 0x41, 0xa8, 0xf2, 0xc2,
- 0x01, 0xf0, 0x08, 0x58, 0xe1, 0x87, 0x08, 0x58, 0xd8, 0xc2, 0x01, 0xf0,
- 0x08, 0x58, 0xd1, 0x87, 0x08, 0x58, 0xc9, 0xc2, 0x00, 0x9f, 0x08, 0x58,
- 0xe8, 0xc2, 0x01, 0xf0, 0x08, 0x58, 0xb1, 0xc2, 0x00, 0x63, 0x08, 0x58,
- 0xa9, 0x87, 0x08, 0x58, 0xa0, 0xc2, 0x00, 0x7b, 0x08, 0x58, 0x99, 0x87,
- 0x08, 0x58, 0x89, 0xc2, 0x05, 0xd0, 0x08, 0x58, 0x90, 0x97, 0x08, 0x58,
- 0x78, 0x8b, 0x08, 0x58, 0x68, 0x91, 0x08, 0x58, 0x58, 0x87, 0x08, 0x58,
- 0x48, 0x87, 0x08, 0x58, 0x33, 0x01, 0xa9, 0x02, 0x83, 0x08, 0x58, 0x0b,
- 0x01, 0xa9, 0x06, 0x90, 0x08, 0x58, 0x21, 0x91, 0x08, 0x58, 0x10, 0x87,
- 0x08, 0x59, 0x01, 0xc2, 0x01, 0xf0, 0x08, 0x59, 0x08, 0x87, 0x08, 0x59,
- 0x81, 0xc2, 0x01, 0xf0, 0x08, 0x59, 0x90, 0x00, 0x41, 0xa9, 0x0e, 0x0a,
- 0xc1, 0xa9, 0x1a, 0xc2, 0x00, 0x4d, 0x08, 0x08, 0x83, 0x01, 0xa9, 0x2c,
- 0x19, 0x41, 0xa9, 0x32, 0x0b, 0xc1, 0xa9, 0x42, 0x11, 0x41, 0xa9, 0x54,
- 0xc2, 0x26, 0x51, 0x08, 0x08, 0x63, 0x01, 0xa9, 0x66, 0xc4, 0x18, 0x83,
- 0x08, 0x08, 0x6a, 0x01, 0xa9, 0x73, 0x00, 0xc1, 0xa9, 0x80, 0x9b, 0x08,
- 0x08, 0xba, 0x01, 0xa9, 0x8c, 0x00, 0xc1, 0xa9, 0x92, 0xc2, 0x0c, 0x57,
- 0x08, 0x08, 0xc2, 0x01, 0xa9, 0x9e, 0xc9, 0xaf, 0x7f, 0x08, 0x09, 0xb9,
- 0x08, 0xc1, 0xa9, 0xa4, 0xce, 0x74, 0xa7, 0x08, 0x09, 0xc9, 0xcd, 0x80,
- 0xf0, 0x08, 0x09, 0xd0, 0xc4, 0x00, 0x8a, 0x08, 0x08, 0x01, 0xc3, 0x01,
- 0x4a, 0x08, 0x08, 0x08, 0x45, 0x02, 0x13, 0xc1, 0xa9, 0xb0, 0x44, 0x02,
- 0x4e, 0x41, 0xa9, 0xf0, 0xc2, 0x01, 0xc7, 0x01, 0x2b, 0xcb, 0x01, 0xaa,
- 0x08, 0xc4, 0x01, 0x1e, 0x01, 0x2b, 0xc3, 0x01, 0xaa, 0x0e, 0x42, 0x01,
- 0x4a, 0xc1, 0xaa, 0x14, 0xc5, 0x01, 0xf7, 0x01, 0x2b, 0xd1, 0xc8, 0x03,
- 0x3b, 0x01, 0x28, 0x1b, 0x01, 0xaa, 0x23, 0x4f, 0x64, 0xac, 0xc1, 0xaa,
- 0x29, 0x4c, 0x52, 0x71, 0xc1, 0xaa, 0x35, 0xca, 0x00, 0xf6, 0x01, 0x28,
- 0x08, 0x45, 0x06, 0xf3, 0xc1, 0xaa, 0x41, 0x43, 0x05, 0x65, 0x41, 0xaa,
- 0x5c, 0x4b, 0x97, 0x28, 0xc1, 0xaa, 0x74, 0x4b, 0x8e, 0xd2, 0xc1, 0xaa,
- 0x86, 0x4a, 0x10, 0x37, 0xc1, 0xaa, 0x98, 0x4a, 0x5f, 0x72, 0x41, 0xaa,
- 0xaa, 0x4b, 0x97, 0x28, 0xc1, 0xaa, 0xbc, 0x4b, 0x8e, 0xd2, 0xc1, 0xaa,
- 0xce, 0x4a, 0x5f, 0x72, 0xc1, 0xaa, 0xe0, 0x4a, 0x10, 0x37, 0x41, 0xaa,
- 0xf8, 0x4f, 0x6c, 0x2c, 0xc1, 0xab, 0x10, 0xdc, 0x12, 0xae, 0x01, 0x2a,
- 0x31, 0xdc, 0x13, 0xaa, 0x01, 0x2a, 0x21, 0x4f, 0x12, 0xb3, 0x41, 0xab,
- 0x22, 0xd8, 0x24, 0xa8, 0x01, 0x1d, 0xb0, 0xc8, 0x1e, 0xe0, 0x01, 0x19,
- 0x09, 0xcc, 0x89, 0x50, 0x01, 0x5e, 0x59, 0xd0, 0x1e, 0xbf, 0x01, 0x72,
- 0xd9, 0xd1, 0x1a, 0x39, 0x01, 0x72, 0xe0, 0x05, 0xc1, 0xab, 0x34, 0xcc,
- 0x8d, 0x7c, 0x01, 0x71, 0x28, 0x05, 0xc1, 0xab, 0x40, 0xcc, 0x8d, 0x7c,
- 0x01, 0x71, 0x20, 0xd0, 0x5c, 0x22, 0x01, 0x4e, 0x91, 0xcf, 0x6b, 0x2d,
- 0x01, 0x4e, 0x88, 0xca, 0x9e, 0x4c, 0x0f, 0xaa, 0x79, 0xca, 0xa4, 0x78,
- 0x0f, 0xcb, 0x18, 0xc5, 0xd5, 0xde, 0x0f, 0xa6, 0x88, 0x97, 0x01, 0x8d,
- 0x00, 0x89, 0x01, 0x89, 0x5b, 0x01, 0xab, 0x4c, 0x90, 0x01, 0x89, 0x78,
- 0x8a, 0x01, 0x8d, 0xc8, 0x90, 0x01, 0x89, 0x61, 0x97, 0x01, 0x8d, 0x19,
- 0x8a, 0x01, 0x8d, 0xc1, 0x99, 0x01, 0x8d, 0xe0, 0x99, 0x01, 0x8d, 0xe8,
- 0x8b, 0x01, 0x8d, 0x10, 0x8a, 0x01, 0x88, 0x99, 0x8b, 0x01, 0x8d, 0x09,
- 0x9b, 0x01, 0x8d, 0xd0, 0x8a, 0x01, 0x88, 0xa0, 0x8a, 0x01, 0x88, 0xa8,
- 0x8b, 0x01, 0x88, 0xf3, 0x01, 0xab, 0x50, 0x97, 0x01, 0x89, 0x03, 0x01,
- 0xab, 0x56, 0x90, 0x01, 0x89, 0x13, 0x01, 0xab, 0x5c, 0x8f, 0x01, 0x8d,
- 0x81, 0x8a, 0x01, 0x8d, 0xf8, 0x97, 0x01, 0x89, 0x09, 0xcf, 0x37, 0x1e,
- 0x01, 0x89, 0x71, 0x91, 0x01, 0x8d, 0x31, 0x10, 0xc1, 0xab, 0x64, 0x8f,
- 0x01, 0x8d, 0x89, 0x87, 0x01, 0x8d, 0xf0, 0x8a, 0x01, 0x88, 0xe9, 0x8b,
- 0x01, 0x88, 0xf9, 0x90, 0x01, 0x89, 0x1b, 0x01, 0xab, 0x6c, 0x94, 0x01,
- 0x89, 0x31, 0x87, 0x01, 0x8d, 0x20, 0x97, 0x01, 0x89, 0x49, 0x8a, 0x01,
- 0x89, 0x69, 0x94, 0x01, 0x8d, 0x41, 0xc2, 0x1c, 0x13, 0x01, 0x8d, 0x53,
- 0x01, 0xab, 0x74, 0x8f, 0x01, 0x8d, 0x60, 0xc2, 0x1c, 0x13, 0x01, 0x8d,
- 0x58, 0xa1, 0x0f, 0xd8, 0x43, 0x01, 0xab, 0x78, 0x9f, 0x0f, 0xd8, 0x13,
- 0x01, 0xab, 0x83, 0xa2, 0x0f, 0xd8, 0x83, 0x01, 0xab, 0x9c, 0xa0, 0x0f,
- 0xd8, 0x23, 0x01, 0xab, 0xa0, 0xa3, 0x0f, 0xd8, 0xf8, 0xa2, 0x0f, 0xd8,
- 0x9b, 0x01, 0xab, 0xb1, 0xa1, 0x0f, 0xd8, 0x5b, 0x01, 0xab, 0xb5, 0xa3,
- 0x0f, 0xd9, 0x10, 0xa2, 0x0f, 0xd8, 0x8b, 0x01, 0xab, 0xc0, 0xa0, 0x0f,
- 0xd8, 0x2b, 0x01, 0xab, 0xc4, 0xa3, 0x0f, 0xd9, 0x01, 0xa1, 0x0f, 0xd8,
- 0x4a, 0x01, 0xab, 0xd6, 0xa3, 0x0f, 0xd9, 0x68, 0xa3, 0x0f, 0xd9, 0x31,
- 0xa2, 0x0f, 0xd8, 0xb2, 0x01, 0xab, 0xdd, 0x05, 0xc1, 0xab, 0xe1, 0x15,
- 0xc1, 0xac, 0x08, 0x16, 0xc1, 0xac, 0x4b, 0x06, 0xc1, 0xac, 0x69, 0x14,
- 0xc1, 0xac, 0x7c, 0x0e, 0xc1, 0xac, 0x8e, 0xd6, 0x2c, 0x41, 0x01, 0x3a,
- 0x99, 0x08, 0xc1, 0xac, 0x9e, 0xc3, 0xe7, 0xde, 0x01, 0x38, 0x91, 0x0f,
- 0xc1, 0xac, 0xa6, 0x17, 0xc1, 0xac, 0xb2, 0x0a, 0xc1, 0xac, 0xbc, 0x12,
- 0xc1, 0xac, 0xca, 0x43, 0x00, 0x7b, 0xc1, 0xac, 0xdc, 0xc6, 0xd1, 0xe9,
- 0x01, 0x4e, 0x99, 0xc7, 0xca, 0xef, 0x01, 0x5e, 0x20, 0x4a, 0x17, 0x9f,
- 0xc1, 0xac, 0xe8, 0x4f, 0x6a, 0x4c, 0x41, 0xac, 0xfa, 0xca, 0xa1, 0x8a,
- 0x0f, 0xa5, 0xb9, 0xc9, 0xb5, 0xa6, 0x0f, 0xa5, 0xb1, 0xcb, 0x97, 0xee,
- 0x0f, 0xa5, 0xa9, 0xc8, 0x7e, 0x6b, 0x0f, 0xa5, 0xa0, 0xc2, 0x00, 0xb7,
- 0x0f, 0x9c, 0x43, 0x01, 0xad, 0x0e, 0x42, 0x00, 0xed, 0x41, 0xad, 0x14,
- 0x0f, 0xc1, 0xad, 0x24, 0xc3, 0x01, 0x8d, 0x00, 0xda, 0xd2, 0x01, 0xad,
- 0x33, 0x4a, 0xa3, 0x74, 0xc1, 0xad, 0x39, 0x4b, 0x93, 0xc3, 0xc1, 0xad,
- 0x45, 0x4a, 0x52, 0xb5, 0xc1, 0xad, 0x51, 0x06, 0x41, 0xad, 0x75, 0x42,
- 0x03, 0xa4, 0xc1, 0xad, 0x8f, 0xc4, 0xdf, 0xdf, 0x00, 0xda, 0xf0, 0xc4,
- 0x22, 0x71, 0x00, 0xda, 0xc9, 0xc5, 0x01, 0xdb, 0x00, 0xda, 0xc1, 0x15,
- 0xc1, 0xad, 0x9b, 0x08, 0xc1, 0xad, 0xa7, 0x16, 0xc1, 0xad, 0xb3, 0xc3,
- 0x01, 0xb4, 0x00, 0xda, 0x89, 0xc4, 0x15, 0xd3, 0x00, 0xda, 0x80, 0x03,
- 0xc1, 0xad, 0xbf, 0xc9, 0xb2, 0xb2, 0x00, 0xda, 0x51, 0xc8, 0xb6, 0xbd,
- 0x00, 0xda, 0x49, 0x07, 0xc1, 0xad, 0xda, 0x16, 0xc1, 0xad, 0xe6, 0x0d,
- 0xc1, 0xad, 0xf3, 0xc2, 0x00, 0xa4, 0x00, 0xd9, 0x99, 0xc2, 0x0c, 0x65,
- 0x00, 0xd9, 0x93, 0x01, 0xae, 0x00, 0xc2, 0x01, 0x09, 0x00, 0xd9, 0x79,
- 0xc2, 0x00, 0xc7, 0x00, 0xd9, 0x73, 0x01, 0xae, 0x06, 0xc2, 0x02, 0x59,
- 0x00, 0xd9, 0x6b, 0x01, 0xae, 0x0f, 0xc2, 0x1d, 0x5f, 0x00, 0xd9, 0x61,
- 0xc2, 0x00, 0xad, 0x00, 0xd9, 0x59, 0xc2, 0x00, 0xde, 0x00, 0xd9, 0x4b,
- 0x01, 0xae, 0x18, 0xc2, 0x03, 0xa4, 0x00, 0xd9, 0x3b, 0x01, 0xae, 0x1e,
- 0x10, 0xc1, 0xae, 0x24, 0xc2, 0x0b, 0xc6, 0x00, 0xd9, 0x23, 0x01, 0xae,
- 0x37, 0xc2, 0x24, 0x58, 0x00, 0xd8, 0xd3, 0x01, 0xae, 0x3d, 0xc2, 0x03,
- 0x40, 0x00, 0xd8, 0xc3, 0x01, 0xae, 0x43, 0xc2, 0x01, 0x29, 0x00, 0xd8,
- 0xab, 0x01, 0xae, 0x49, 0xc5, 0xdd, 0xb8, 0x00, 0xd8, 0x8b, 0x01, 0xae,
- 0x4f, 0xc5, 0xd6, 0xb5, 0x00, 0xd8, 0x4b, 0x01, 0xae, 0x55, 0xc5, 0xde,
- 0x17, 0x00, 0xd8, 0x3a, 0x01, 0xae, 0x5b, 0xc5, 0xd8, 0xdb, 0x00, 0xda,
- 0x13, 0x01, 0xae, 0x61, 0x16, 0xc1, 0xae, 0x67, 0xc8, 0xba, 0xbd, 0x00,
- 0xd9, 0xe3, 0x01, 0xae, 0x76, 0xc7, 0xc2, 0x28, 0x00, 0xd9, 0xd3, 0x01,
- 0xae, 0x7c, 0xc4, 0xca, 0x39, 0x00, 0xd9, 0xc3, 0x01, 0xae, 0x82, 0xc3,
- 0x99, 0x97, 0x00, 0xd9, 0xb2, 0x01, 0xae, 0x88, 0xc7, 0xc4, 0xd6, 0x00,
- 0xd9, 0xa1, 0xc5, 0xdd, 0x6d, 0x00, 0xd8, 0x21, 0xc6, 0xcf, 0x97, 0x00,
- 0xd8, 0x19, 0xc5, 0xd5, 0xbb, 0x00, 0xd8, 0x11, 0x44, 0xdf, 0xd7, 0x41,
- 0xae, 0x8e, 0x44, 0x05, 0x2b, 0xc1, 0xae, 0x9a, 0x43, 0x01, 0x64, 0xc1,
- 0xae, 0xa6, 0xc8, 0xaf, 0xc8, 0x0b, 0x57, 0x90, 0x8b, 0x0b, 0x57, 0x69,
- 0x87, 0x0b, 0x57, 0x63, 0x01, 0xae, 0xb2, 0x97, 0x0b, 0x57, 0x53, 0x01,
- 0xae, 0xbc, 0x91, 0x0b, 0x57, 0x43, 0x01, 0xae, 0xc2, 0x83, 0x0b, 0x57,
- 0x39, 0xc2, 0x01, 0x09, 0x0b, 0x56, 0xdb, 0x01, 0xae, 0xc6, 0xc2, 0x03,
- 0xa4, 0x0b, 0x57, 0x29, 0x1b, 0xc1, 0xae, 0xcc, 0xc2, 0x5d, 0x23, 0x0b,
- 0x57, 0x19, 0xc2, 0x00, 0xde, 0x0b, 0x57, 0x11, 0xc2, 0x02, 0x18, 0x0b,
- 0x57, 0x09, 0xc2, 0x00, 0x69, 0x0b, 0x56, 0xf9, 0x06, 0xc1, 0xae, 0xd8,
- 0x09, 0xc1, 0xae, 0xe2, 0xc2, 0x00, 0xfa, 0x0b, 0x56, 0xe1, 0xc4, 0xe1,
- 0x18, 0x0b, 0x56, 0xd1, 0xc2, 0x00, 0x41, 0x0b, 0x56, 0xc9, 0x0d, 0xc1,
- 0xae, 0xee, 0xc3, 0x0b, 0x10, 0x0b, 0x56, 0xa1, 0xc2, 0x00, 0x67, 0x0b,
- 0x56, 0x99, 0xc2, 0x02, 0x60, 0x0b, 0x56, 0x90, 0x45, 0xd5, 0x6b, 0xc1,
- 0xae, 0xf8, 0x83, 0x05, 0x35, 0x59, 0x07, 0xc1, 0xaf, 0x1c, 0x17, 0xc1,
- 0xaf, 0x26, 0x8b, 0x05, 0x36, 0xe8, 0x83, 0x05, 0x35, 0x09, 0x97, 0x05,
- 0x35, 0x19, 0xc3, 0x19, 0x4c, 0x05, 0x35, 0xd1, 0x07, 0xc1, 0xaf, 0x30,
- 0x91, 0x05, 0x36, 0xfb, 0x01, 0xaf, 0x3e, 0x8b, 0x05, 0x37, 0x29, 0xc2,
- 0x03, 0xa4, 0x05, 0x37, 0x48, 0x07, 0xc1, 0xaf, 0x4a, 0x0b, 0xc1, 0xaf,
- 0x58, 0x97, 0x05, 0x36, 0x61, 0xc2, 0x0f, 0xf5, 0x05, 0x36, 0x88, 0x03,
- 0xc1, 0xaf, 0x62, 0x8b, 0x05, 0x37, 0x21, 0x07, 0x41, 0xaf, 0x6a, 0xc2,
- 0x15, 0x1d, 0x05, 0x35, 0x41, 0xc3, 0x57, 0x68, 0x05, 0x35, 0x89, 0x0c,
- 0xc1, 0xaf, 0x72, 0x97, 0x05, 0x35, 0xeb, 0x01, 0xaf, 0x84, 0xc3, 0x02,
- 0x28, 0x05, 0x36, 0x19, 0x16, 0xc1, 0xaf, 0x8a, 0x8b, 0x05, 0x36, 0x79,
- 0x09, 0xc1, 0xaf, 0x96, 0x83, 0x05, 0x36, 0xd8, 0x83, 0x05, 0x35, 0x51,
- 0xc4, 0xe3, 0xfe, 0x05, 0x35, 0x71, 0x97, 0x05, 0x36, 0x69, 0x8b, 0x05,
- 0x36, 0xe1, 0xc2, 0x89, 0x44, 0x05, 0x36, 0xf0, 0x07, 0xc1, 0xaf, 0xa6,
- 0x97, 0x05, 0x35, 0xa9, 0x8b, 0x05, 0x36, 0x71, 0x04, 0xc1, 0xaf, 0xb0,
- 0x83, 0x05, 0x37, 0x19, 0x91, 0x05, 0x37, 0x30, 0xc2, 0x5f, 0x91, 0x05,
- 0x35, 0xa1, 0x0a, 0xc1, 0xaf, 0xbc, 0x8b, 0x05, 0x35, 0xb9, 0xc3, 0xdd,
- 0xec, 0x05, 0x35, 0xc9, 0xc4, 0xc7, 0x7f, 0x05, 0x37, 0x60, 0xc2, 0x89,
- 0x44, 0x05, 0x35, 0xf9, 0xc2, 0x64, 0x9c, 0x05, 0x36, 0x09, 0x83, 0x05,
- 0x36, 0x10, 0xc2, 0x0f, 0x4d, 0x05, 0x36, 0x49, 0x83, 0x05, 0x36, 0xd0,
- 0xc2, 0x00, 0x4c, 0x05, 0x36, 0x59, 0x97, 0x05, 0x36, 0xc1, 0xc2, 0x00,
- 0x3a, 0x05, 0x36, 0xc9, 0xc5, 0xdd, 0x09, 0x05, 0x37, 0x68, 0x4c, 0x89,
- 0x8c, 0xc1, 0xaf, 0xd0, 0xc2, 0x00, 0xad, 0x05, 0x37, 0xa8, 0xe0, 0x02,
- 0x27, 0x01, 0x3d, 0x58, 0xcb, 0x92, 0x4d, 0x0f, 0xac, 0x11, 0xda, 0x1c,
- 0xa9, 0x0f, 0xa8, 0xc8, 0xc4, 0x40, 0xc6, 0x00, 0x00, 0x41, 0x5a, 0x1b,
- 0xbf, 0x41, 0xaf, 0xdc, 0x4c, 0x88, 0x3c, 0xc1, 0xaf, 0xe8, 0xc9, 0xaf,
- 0x64, 0x00, 0xdf, 0x30, 0xc7, 0xc5, 0xb6, 0x00, 0xdf, 0x99, 0xc5, 0xca,
- 0x26, 0x00, 0xdf, 0x90, 0x8a, 0x00, 0xdf, 0x89, 0xc2, 0x00, 0x35, 0x00,
- 0xdf, 0x80, 0x97, 0x00, 0xdf, 0x73, 0x01, 0xaf, 0xf8, 0x45, 0xca, 0x78,
- 0xc1, 0xaf, 0xfe, 0x91, 0x00, 0xdf, 0x61, 0x8b, 0x00, 0xdf, 0x51, 0x87,
- 0x00, 0xdf, 0x3b, 0x01, 0xb0, 0x06, 0xc8, 0xbc, 0x35, 0x00, 0xdf, 0x40,
- 0x97, 0x00, 0xdf, 0x29, 0x8b, 0x00, 0xdf, 0x21, 0x0f, 0xc1, 0xb0, 0x0a,
- 0x10, 0xc1, 0xb0, 0x17, 0xc2, 0x03, 0x40, 0x00, 0xdf, 0x09, 0x15, 0xc1,
- 0xb0, 0x33, 0xc2, 0x00, 0xc7, 0x00, 0xde, 0xf1, 0xc2, 0x1d, 0x5f, 0x00,
- 0xde, 0xd9, 0xc2, 0x02, 0x59, 0x00, 0xde, 0x91, 0xc2, 0x0b, 0xc6, 0x00,
- 0xde, 0x89, 0xc2, 0x24, 0x58, 0x00, 0xde, 0x81, 0xc2, 0x01, 0x29, 0x00,
- 0xde, 0x71, 0xc2, 0x03, 0xa4, 0x00, 0xde, 0x3b, 0x01, 0xb0, 0x43, 0xc2,
- 0x01, 0x09, 0x00, 0xde, 0x59, 0xc7, 0xca, 0x78, 0x00, 0xde, 0x31, 0xc2,
- 0x00, 0xde, 0x00, 0xde, 0x29, 0xc2, 0x00, 0xa4, 0x00, 0xde, 0x11, 0x83,
- 0x00, 0xde, 0x00, 0x0d, 0xc1, 0xb0, 0x49, 0xc2, 0x00, 0xa4, 0x00, 0x4d,
- 0xc9, 0x15, 0xc1, 0xb0, 0x56, 0xc2, 0x00, 0xc7, 0x00, 0x4d, 0x91, 0x14,
- 0xc1, 0xb0, 0x66, 0x1b, 0xc1, 0xb0, 0x79, 0xc2, 0x00, 0xad, 0x00, 0x4d,
- 0x71, 0x04, 0xc1, 0xb0, 0x83, 0x12, 0xc1, 0xb0, 0x8d, 0x10, 0xc1, 0xb0,
- 0x97, 0x06, 0xc1, 0xb0, 0xad, 0x16, 0xc1, 0xb0, 0xbb, 0x0c, 0xc1, 0xb0,
- 0xc9, 0x05, 0xc1, 0xb0, 0xd3, 0x09, 0xc1, 0xb0, 0xdd, 0x83, 0x00, 0x4c,
- 0x2b, 0x01, 0xb0, 0xe7, 0x91, 0x00, 0x4c, 0x99, 0x8b, 0x00, 0x4c, 0x3b,
- 0x01, 0xb0, 0xf3, 0x97, 0x00, 0x4c, 0x4b, 0x01, 0xb0, 0xf7, 0x18, 0xc1,
- 0xb0, 0xfb, 0x87, 0x00, 0x4c, 0x78, 0x44, 0x00, 0xcc, 0xc1, 0xb1, 0x07,
- 0xca, 0xa7, 0x70, 0x00, 0x4f, 0xf0, 0x03, 0xc1, 0xb1, 0x1d, 0x91, 0x00,
- 0x4e, 0x59, 0x87, 0x00, 0x4e, 0x39, 0x48, 0xac, 0xc1, 0xc1, 0xb1, 0x29,
- 0x97, 0x00, 0x4e, 0x0b, 0x01, 0xb1, 0x37, 0x8b, 0x00, 0x4d, 0xfa, 0x01,
- 0xb1, 0x3b, 0xcd, 0x75, 0x88, 0x00, 0x4e, 0xb9, 0xc3, 0x7a, 0x4c, 0x00,
- 0x4c, 0x01, 0xd0, 0x50, 0x41, 0x00, 0x4f, 0xe8, 0xc4, 0x15, 0xd3, 0x00,
- 0x4f, 0x31, 0xc3, 0x01, 0xb4, 0x00, 0x4f, 0x39, 0x16, 0xc1, 0xb1, 0x3f,
- 0x08, 0xc1, 0xb1, 0x4b, 0x15, 0xc1, 0xb1, 0x57, 0xc5, 0x01, 0xdb, 0x00,
- 0x4f, 0x71, 0xc4, 0x22, 0x71, 0x00, 0x4f, 0x78, 0xc4, 0x00, 0xba, 0x00,
- 0x4f, 0x91, 0xc4, 0x00, 0xcb, 0x00, 0x4f, 0x98, 0x4a, 0x76, 0xc8, 0xc1,
- 0xb1, 0x63, 0xd3, 0x45, 0x2b, 0x00, 0x4f, 0xc8, 0xe0, 0x07, 0xc7, 0x01,
- 0x5a, 0xf0, 0xc2, 0x0f, 0xf5, 0x00, 0xd0, 0xd9, 0x91, 0x00, 0xd0, 0xd1,
- 0x87, 0x00, 0xd0, 0xc9, 0x97, 0x00, 0xd0, 0xc1, 0x8b, 0x00, 0xd0, 0xb8,
- 0xc2, 0x00, 0xa4, 0x00, 0xd0, 0xb1, 0x83, 0x00, 0xd0, 0xa9, 0xc2, 0x0c,
- 0x65, 0x00, 0xd0, 0xa1, 0xc2, 0x04, 0x41, 0x00, 0xd0, 0x99, 0xc2, 0x00,
- 0xc7, 0x00, 0xd0, 0x91, 0xc2, 0x02, 0x59, 0x00, 0xd0, 0x89, 0xc2, 0x1d,
- 0x5f, 0x00, 0xd0, 0x81, 0x10, 0xc1, 0xb1, 0x76, 0xc2, 0x24, 0x58, 0x00,
- 0xd0, 0x69, 0xc2, 0x03, 0x40, 0x00, 0xd0, 0x61, 0xc2, 0x0b, 0xc6, 0x00,
- 0xd0, 0x49, 0xc2, 0x00, 0xb3, 0x00, 0xd0, 0x41, 0x0f, 0xc1, 0xb1, 0x88,
- 0xc2, 0x00, 0xde, 0x00, 0xd0, 0x29, 0xc2, 0x03, 0xa4, 0x00, 0xd0, 0x21,
- 0xc2, 0x01, 0x29, 0x00, 0xd0, 0x09, 0xc2, 0x04, 0x2b, 0x00, 0xd0, 0x00,
- 0x83, 0x00, 0xba, 0x41, 0xc2, 0x01, 0x29, 0x00, 0xba, 0x28, 0x45, 0xd5,
- 0x3e, 0xc1, 0xb1, 0x92, 0xc5, 0xb2, 0x89, 0x01, 0x40, 0x00, 0xc6, 0x59,
- 0xec, 0x08, 0x83, 0xf9, 0xc3, 0x01, 0xb4, 0x08, 0x82, 0x93, 0x01, 0xb1,
- 0xc7, 0xc4, 0x22, 0x71, 0x08, 0x82, 0xd3, 0x01, 0xb1, 0xcb, 0xc5, 0x01,
- 0xdb, 0x08, 0x82, 0xcb, 0x01, 0xb1, 0xd1, 0x15, 0xc1, 0xb1, 0xd5, 0x08,
- 0xc1, 0xb1, 0xe7, 0x16, 0x41, 0xb1, 0xef, 0x91, 0x08, 0x80, 0x8b, 0x01,
- 0xb1, 0xfd, 0x0e, 0xc1, 0xb2, 0x03, 0xc2, 0x00, 0xa4, 0x08, 0x81, 0x99,
- 0xc2, 0x02, 0x59, 0x08, 0x81, 0x69, 0xc2, 0x1d, 0x5f, 0x08, 0x81, 0x61,
- 0xc2, 0x00, 0xad, 0x08, 0x81, 0x59, 0x04, 0xc1, 0xb2, 0x0d, 0x12, 0xc1,
- 0xb2, 0x17, 0x10, 0xc1, 0xb2, 0x21, 0x06, 0xc1, 0xb2, 0x37, 0x16, 0xc1,
- 0xb2, 0x45, 0x0c, 0xc1, 0xb2, 0x53, 0x05, 0xc1, 0xb2, 0x5d, 0x09, 0xc1,
- 0xb2, 0x67, 0x0d, 0xc1, 0xb2, 0x71, 0x83, 0x08, 0x80, 0x2b, 0x01, 0xb2,
- 0x7b, 0x87, 0x08, 0x80, 0x79, 0x18, 0xc1, 0xb2, 0x87, 0x97, 0x08, 0x80,
- 0x4b, 0x01, 0xb2, 0x91, 0x8b, 0x08, 0x80, 0x3b, 0x01, 0xb2, 0x95, 0x15,
- 0x41, 0xb2, 0x99, 0x4a, 0x6f, 0xcd, 0xc1, 0xb2, 0xa9, 0xc5, 0x1e, 0x24,
- 0x08, 0x82, 0x30, 0xd0, 0x59, 0x02, 0x08, 0x83, 0x81, 0xcb, 0x98, 0x9e,
- 0x08, 0x80, 0x21, 0xcb, 0x91, 0x66, 0x08, 0x80, 0x19, 0xcb, 0x1e, 0x17,
- 0x08, 0x80, 0x01, 0xc8, 0x11, 0x40, 0x08, 0x80, 0x09, 0xc7, 0x44, 0x79,
- 0x08, 0x80, 0x10, 0x45, 0x06, 0x98, 0xc1, 0xb2, 0xd2, 0xcb, 0x95, 0x0d,
- 0x08, 0x82, 0x41, 0xc4, 0x1c, 0xd0, 0x08, 0x82, 0x38, 0x0e, 0xc1, 0xb2,
- 0xf6, 0xcc, 0x84, 0x94, 0x08, 0x82, 0x61, 0x42, 0x01, 0x4a, 0x41, 0xb3,
- 0x02, 0x42, 0x13, 0x4f, 0xc1, 0xb3, 0x0c, 0x4a, 0xa0, 0x68, 0x41, 0xb3,
- 0x18, 0xc6, 0x2f, 0x5b, 0x0e, 0x86, 0xc9, 0xc6, 0xd4, 0x8f, 0x0e, 0x86,
- 0xc0, 0x00, 0x41, 0xb3, 0x24, 0x00, 0xc1, 0xb3, 0x30, 0xc2, 0x00, 0xb3,
- 0x0e, 0x80, 0x82, 0x01, 0xb3, 0x3c, 0xc5, 0x5c, 0x7d, 0x0e, 0x84, 0x49,
- 0xc6, 0xac, 0xf1, 0x0e, 0x82, 0x51, 0xc6, 0xcd, 0x45, 0x0e, 0x81, 0xd2,
- 0x01, 0xb3, 0x40, 0x44, 0xe3, 0x3f, 0xc1, 0xb3, 0x46, 0xc6, 0xd3, 0x6f,
- 0x0e, 0x80, 0x60, 0x43, 0x0e, 0x56, 0xc1, 0xb3, 0x4e, 0xc5, 0xd6, 0x24,
- 0x0e, 0x80, 0x38, 0x46, 0xd1, 0x29, 0xc1, 0xb3, 0x5a, 0x42, 0x13, 0x4f,
- 0x41, 0xb3, 0x84, 0x11, 0xc1, 0xb3, 0x8e, 0xc2, 0x00, 0x8a, 0x0e, 0x84,
- 0x29, 0x45, 0xd7, 0xfa, 0x41, 0xb3, 0xa0, 0x45, 0xd7, 0xcd, 0xc1, 0xb3,
- 0xac, 0x44, 0xd2, 0x79, 0xc1, 0xb3, 0xb8, 0x42, 0x00, 0xc3, 0xc1, 0xb3,
- 0xc2, 0x43, 0x13, 0x4d, 0x41, 0xb3, 0xce, 0x46, 0xd2, 0x01, 0xc1, 0xb3,
- 0xd8, 0xca, 0x9c, 0x76, 0x0e, 0x81, 0x40, 0xc4, 0x1c, 0x84, 0x0e, 0x87,
- 0x41, 0xc5, 0xdd, 0x68, 0x0e, 0x83, 0xf3, 0x01, 0xb3, 0xe4, 0xca, 0xa3,
- 0xd8, 0x0e, 0x82, 0x20, 0xc6, 0xcf, 0xc7, 0x0e, 0x87, 0x13, 0x01, 0xb3,
- 0xea, 0xc7, 0xc7, 0x06, 0x0e, 0x86, 0xf2, 0x01, 0xb3, 0xee, 0xc4, 0x7e,
- 0xfe, 0x0e, 0x83, 0x48, 0xc3, 0x06, 0xc9, 0x0e, 0x83, 0x33, 0x01, 0xb3,
- 0xf2, 0x10, 0x41, 0xb3, 0xf8, 0xca, 0xa2, 0x84, 0x0e, 0x87, 0x39, 0x09,
- 0xc1, 0xb4, 0x04, 0x03, 0xc1, 0xb4, 0x13, 0x45, 0x1a, 0x46, 0xc1, 0xb4,
- 0x1f, 0xc3, 0x1e, 0xab, 0x0e, 0x84, 0x32, 0x01, 0xb4, 0x35, 0x44, 0x1a,
- 0x47, 0xc1, 0xb4, 0x3b, 0x42, 0x00, 0x4e, 0x41, 0xb4, 0x53, 0x11, 0xc1,
- 0xb4, 0x5f, 0xc4, 0x7f, 0x43, 0x0e, 0x82, 0x80, 0xd4, 0x3c, 0x01, 0x0e,
- 0x86, 0x61, 0xd6, 0x2f, 0x59, 0x0e, 0x86, 0x59, 0x10, 0xc1, 0xb4, 0x6e,
- 0x48, 0x1c, 0x61, 0xc1, 0xb4, 0x7a, 0x4f, 0x6c, 0x0e, 0xc1, 0xb4, 0x86,
- 0x4a, 0xa3, 0x60, 0xc1, 0xb4, 0x92, 0xc8, 0x9f, 0x0c, 0x0e, 0x81, 0xa2,
- 0x01, 0xb4, 0xae, 0xc8, 0xba, 0xed, 0x0e, 0x85, 0x81, 0xca, 0xa4, 0x14,
- 0x0e, 0x85, 0x79, 0xcb, 0x8f, 0x6c, 0x0e, 0x85, 0x70, 0xc6, 0xd5, 0x07,
- 0x0e, 0x86, 0x51, 0xc6, 0xd0, 0xd5, 0x0e, 0x86, 0x49, 0xc5, 0xd6, 0x60,
- 0x0e, 0x86, 0x40, 0xc3, 0x7e, 0xff, 0x0e, 0x83, 0x39, 0xc8, 0x9f, 0x0c,
- 0x0e, 0x81, 0xd8, 0x8b, 0x0e, 0x82, 0xb1, 0xc2, 0x00, 0xb7, 0x0e, 0x80,
- 0xc0, 0x08, 0xc1, 0xb4, 0xb4, 0xc7, 0xc4, 0xac, 0x0e, 0x84, 0xc0, 0xd5,
- 0x36, 0x07, 0x0e, 0x85, 0x61, 0x43, 0x00, 0xb7, 0x41, 0xb4, 0xc0, 0xd4,
- 0x38, 0xb9, 0x0e, 0x85, 0xb1, 0xc7, 0xc3, 0xc5, 0x0e, 0x83, 0xd8, 0xcd,
- 0x7c, 0xd3, 0x0e, 0x83, 0xa1, 0xcb, 0x95, 0xa7, 0x0e, 0x83, 0x00, 0x12,
- 0xc1, 0xb4, 0xcc, 0xcb, 0x98, 0xd5, 0x0e, 0x85, 0x89, 0xcd, 0x7f, 0xb8,
- 0x0e, 0x85, 0x51, 0x16, 0xc1, 0xb4, 0xd8, 0x45, 0xd9, 0xad, 0xc1, 0xb4,
- 0xe4, 0xce, 0x6d, 0xed, 0x0e, 0x85, 0x20, 0x0b, 0xc1, 0xb4, 0xf0, 0x45,
- 0xaa, 0xb1, 0x41, 0xb5, 0x00, 0xc6, 0xd0, 0x0f, 0x0e, 0x84, 0x41, 0xc5,
- 0x13, 0x9c, 0x0e, 0x81, 0x89, 0xc4, 0xad, 0x05, 0x0e, 0x80, 0x78, 0x07,
- 0xc1, 0xb5, 0x16, 0xc3, 0x04, 0x44, 0x0e, 0x80, 0xa0, 0x45, 0xde, 0x5d,
- 0xc1, 0xb5, 0x25, 0xc3, 0xbf, 0x77, 0x0e, 0x81, 0x70, 0xc3, 0x7e, 0xff,
- 0x0e, 0x83, 0xa9, 0xc8, 0x9f, 0x0c, 0x0e, 0x81, 0x60, 0x00, 0xc1, 0xb5,
- 0x3b, 0xca, 0x9f, 0x0a, 0x0e, 0x81, 0x00, 0xc3, 0x7e, 0xff, 0x0e, 0x82,
- 0x39, 0xc8, 0x9f, 0x0c, 0x0e, 0x80, 0xa8, 0x45, 0xc1, 0x0f, 0xc1, 0xb5,
- 0x4d, 0x0e, 0x41, 0xb5, 0x66, 0x42, 0x08, 0x0e, 0xc1, 0xb5, 0x70, 0xc5,
- 0xdd, 0x86, 0x0e, 0x80, 0xf0, 0xc3, 0x7e, 0xff, 0x0e, 0x82, 0xc9, 0xc8,
- 0x9f, 0x0c, 0x0e, 0x81, 0x30, 0xc6, 0xd0, 0x4b, 0x0e, 0x81, 0xc3, 0x01,
- 0xb5, 0x7f, 0x43, 0x4d, 0x6f, 0xc1, 0xb5, 0x85, 0xc9, 0x95, 0x51, 0x0e,
- 0x80, 0x10, 0x00, 0xc1, 0xb5, 0x8f, 0xca, 0x9f, 0x0a, 0x0e, 0x81, 0x08,
- 0xc2, 0x0c, 0x57, 0x08, 0xe3, 0x48, 0xc2, 0x0c, 0x57, 0x08, 0xe3, 0x40,
- 0xc3, 0x43, 0xcd, 0x08, 0xe3, 0x39, 0xc2, 0x00, 0x7b, 0x08, 0xe2, 0xf0,
- 0xc3, 0x0c, 0x56, 0x08, 0xe3, 0x31, 0xc2, 0x02, 0x53, 0x08, 0xe2, 0xe8,
- 0xc4, 0x0c, 0x55, 0x08, 0xe3, 0x29, 0xc3, 0x04, 0x5f, 0x08, 0xe2, 0xe0,
- 0xc4, 0x18, 0x85, 0x08, 0xe3, 0x21, 0x91, 0x08, 0xe2, 0xd8, 0xc4, 0x18,
- 0x83, 0x08, 0xe2, 0xb9, 0xc2, 0x26, 0x51, 0x08, 0xe2, 0xb0, 0xc3, 0x0c,
- 0x5b, 0x08, 0xe2, 0xa9, 0xc3, 0x06, 0x9e, 0x08, 0xe2, 0xa0, 0xc4, 0x04,
- 0x5e, 0x08, 0xe2, 0x99, 0xc2, 0x01, 0x47, 0x08, 0xe2, 0x90, 0x94, 0x08,
- 0xe1, 0xa8, 0x8e, 0x08, 0xe0, 0x41, 0x94, 0x08, 0xe0, 0x32, 0x01, 0xb5,
- 0xa1, 0xc2, 0x00, 0xa4, 0x08, 0xe0, 0xd9, 0x83, 0x08, 0xe0, 0xd0, 0xc2,
- 0x00, 0xa4, 0x08, 0xe0, 0xc9, 0x83, 0x08, 0xe0, 0xc0, 0x46, 0x01, 0xe7,
- 0xc1, 0xb5, 0xa5, 0x04, 0xc1, 0xb5, 0xb1, 0xd5, 0x36, 0x5b, 0x01, 0x2e,
- 0xf9, 0xc6, 0xd1, 0x47, 0x0f, 0xac, 0x69, 0x12, 0xc1, 0xb5, 0xbd, 0xcc,
- 0x81, 0xd0, 0x0f, 0xac, 0x59, 0xe0, 0x09, 0x67, 0x01, 0x49, 0xf8, 0x46,
- 0x01, 0xe7, 0xc1, 0xb5, 0xc9, 0xcf, 0x68, 0x4e, 0x01, 0x3e, 0x99, 0x15,
- 0xc1, 0xb5, 0xd5, 0xda, 0x1a, 0xa1, 0x01, 0x3a, 0x79, 0xc6, 0xcc, 0x19,
- 0x01, 0x38, 0x71, 0xd5, 0x36, 0x5b, 0x01, 0x2e, 0xf1, 0x4f, 0x61, 0x19,
- 0x41, 0xb5, 0xe1, 0xdb, 0x18, 0xfd, 0x0f, 0xdb, 0x79, 0x45, 0x04, 0x5e,
- 0x41, 0xb5, 0xed, 0xc6, 0x01, 0x7a, 0x01, 0x2f, 0x09, 0xd4, 0x38, 0xe1,
- 0x01, 0x2e, 0xd9, 0xc5, 0x01, 0xe2, 0x01, 0x2c, 0x21, 0xcc, 0x06, 0x1b,
- 0x0f, 0xdc, 0x78, 0xcd, 0x19, 0x0b, 0x01, 0x2c, 0x11, 0xcc, 0x01, 0xdb,
- 0x01, 0x2c, 0x08, 0xc6, 0xd2, 0x91, 0x0f, 0xd5, 0x59, 0xd0, 0x52, 0xfa,
- 0x0f, 0xa8, 0x28, 0xc9, 0x37, 0x1e, 0x01, 0x72, 0x40, 0xce, 0x6c, 0xe3,
- 0x01, 0x3f, 0xf9, 0xcc, 0x8c, 0x20, 0x01, 0x3f, 0xcb, 0x01, 0xb5, 0xf9,
- 0xc5, 0x01, 0x0f, 0x01, 0x3f, 0xb2, 0x01, 0xb5, 0xff, 0xcc, 0x8c, 0x20,
- 0x01, 0x3f, 0xc3, 0x01, 0xb6, 0x05, 0xc5, 0x01, 0x0f, 0x01, 0x3f, 0xab,
- 0x01, 0xb6, 0x0b, 0xce, 0x6c, 0xe3, 0x01, 0x59, 0x98, 0x46, 0x02, 0x12,
- 0xc1, 0xb6, 0x11, 0xc4, 0x36, 0xab, 0x01, 0x3e, 0xf0, 0xe0, 0x0b, 0x07,
- 0x01, 0x57, 0x30, 0x45, 0x00, 0x6c, 0xc1, 0xb6, 0x1d, 0xd7, 0x27, 0x57,
- 0x01, 0x52, 0xc8, 0xcf, 0x67, 0x5e, 0x01, 0x52, 0xe1, 0xcb, 0x95, 0x9c,
- 0x01, 0x52, 0xd1, 0x42, 0x01, 0x4a, 0xc1, 0xb6, 0x2f, 0xc8, 0x4f, 0x39,
- 0x01, 0x52, 0xf8, 0x10, 0xc1, 0xb6, 0x3b, 0x14, 0x41, 0xb6, 0x45, 0x43,
- 0x03, 0x2d, 0xc1, 0xb6, 0x51, 0xd5, 0x37, 0x57, 0x0f, 0xab, 0xd8, 0x45,
- 0x02, 0x13, 0xc1, 0xb6, 0x78, 0xd6, 0x28, 0xf6, 0x01, 0x70, 0x60, 0xc9,
- 0x9c, 0x95, 0x01, 0x3e, 0xa9, 0x43, 0x00, 0x8c, 0x41, 0xb6, 0xa6, 0xd5,
- 0x32, 0xbf, 0x01, 0x3e, 0x29, 0x07, 0xc1, 0xb6, 0xb2, 0xcd, 0x24, 0xb3,
- 0x00, 0x02, 0xdb, 0x01, 0xb6, 0xbe, 0x0b, 0xc1, 0xb6, 0xc2, 0xcc, 0x70,
- 0xfe, 0x0f, 0xaf, 0x41, 0xd3, 0x1d, 0xb2, 0x01, 0x70, 0x10, 0xcb, 0x8d,
- 0xd5, 0x01, 0x36, 0xe1, 0xcc, 0x02, 0x53, 0x00, 0x03, 0xdb, 0x01, 0xb6,
- 0xce, 0xc6, 0xbc, 0x7e, 0x01, 0x18, 0x41, 0xcd, 0x66, 0x34, 0x01, 0x80,
- 0x60, 0x0a, 0xc1, 0xb6, 0xd2, 0xc3, 0x00, 0xdf, 0x01, 0x15, 0x19, 0x14,
- 0xc1, 0xb6, 0xe4, 0xd5, 0x0a, 0x89, 0x01, 0x80, 0xa0, 0x0b, 0xc1, 0xb6,
- 0xf0, 0xc4, 0x20, 0x8d, 0x01, 0x18, 0x50, 0xc7, 0xca, 0xa2, 0x01, 0x1d,
- 0xc1, 0xcd, 0x7c, 0x2a, 0x01, 0x71, 0x00, 0x00, 0x41, 0xb6, 0xfc, 0x45,
- 0x06, 0xf3, 0xc1, 0xb7, 0x0e, 0xd9, 0x1d, 0xac, 0x01, 0x70, 0x20, 0xcb,
- 0x93, 0x55, 0x0f, 0xac, 0x71, 0xcb, 0x8a, 0xb9, 0x01, 0x4e, 0xc1, 0x45,
- 0x02, 0x32, 0x41, 0xb7, 0x26, 0x45, 0x05, 0xf0, 0xc1, 0xb7, 0x42, 0x44,
- 0x02, 0xde, 0x41, 0xb7, 0x4e, 0xc6, 0xd4, 0xa7, 0x0f, 0xb6, 0x29, 0xd5,
- 0x2e, 0x7e, 0x01, 0x70, 0xe0, 0xca, 0x04, 0xfd, 0x01, 0x0f, 0x33, 0x01,
- 0xb7, 0x5a, 0xc9, 0xb0, 0xcc, 0x01, 0x0c, 0xe0, 0x42, 0x00, 0x5b, 0xc1,
- 0xb7, 0x60, 0x42, 0x01, 0x47, 0xc1, 0xb7, 0x6c, 0xd5, 0x32, 0x56, 0x0f,
- 0xc5, 0x10, 0x00, 0xc1, 0xb7, 0x78, 0xc5, 0x12, 0xea, 0x01, 0x48, 0xc8,
- 0xc5, 0xcb, 0xea, 0x0f, 0xb3, 0x61, 0xd7, 0x27, 0x9c, 0x0f, 0xc5, 0x30,
- 0xcb, 0x82, 0xb5, 0x01, 0x0f, 0x01, 0x46, 0x06, 0xf2, 0x41, 0xb7, 0x95,
- 0x42, 0x00, 0x90, 0xc1, 0xb7, 0xa4, 0xcf, 0x5c, 0xe3, 0x0f, 0xc2, 0x80,
- 0x03, 0xc1, 0xb7, 0xb0, 0x45, 0x10, 0x38, 0x41, 0xb7, 0xbc, 0xcc, 0x79,
- 0x2c, 0x01, 0x0e, 0x39, 0xcb, 0x97, 0x3e, 0x0f, 0xd7, 0xb8, 0x45, 0x05,
- 0xf0, 0xc1, 0xb7, 0xc8, 0xd8, 0x23, 0x10, 0x0f, 0xc5, 0x01, 0xdf, 0x0d,
- 0x9c, 0x0f, 0xc5, 0x40, 0xd0, 0x51, 0x51, 0x0f, 0xc1, 0xa1, 0xe0, 0x04,
- 0xe7, 0x0f, 0xc5, 0x50, 0xd0, 0x5f, 0xb2, 0x0f, 0xa8, 0x69, 0xcd, 0x09,
- 0x51, 0x01, 0x19, 0x49, 0xd4, 0x3e, 0x81, 0x01, 0x4f, 0xe1, 0xdb, 0x18,
- 0x5b, 0x00, 0x05, 0x58, 0xdc, 0x13, 0xfe, 0x01, 0x3d, 0x51, 0xdb, 0x16,
- 0x90, 0x01, 0x49, 0xc8, 0xc7, 0x05, 0x7a, 0x01, 0x03, 0x31, 0xc8, 0xbe,
- 0xed, 0x01, 0x01, 0x69, 0xc9, 0xb3, 0x42, 0x01, 0x01, 0x51, 0xc4, 0x00,
- 0xba, 0x01, 0x00, 0x70, 0xd6, 0x2c, 0xdb, 0x00, 0x2c, 0x71, 0xc4, 0xc1,
- 0x0f, 0x0f, 0xc8, 0xd9, 0xcb, 0x91, 0x5b, 0x00, 0x7e, 0xb2, 0x01, 0xb7,
- 0xd4, 0xcc, 0x00, 0x9b, 0x01, 0x13, 0xb1, 0x43, 0x00, 0x8f, 0xc1, 0xb7,
- 0xda, 0xd0, 0x60, 0x72, 0x01, 0x53, 0xeb, 0x01, 0xb7, 0xe6, 0xcb, 0x1a,
- 0xd9, 0x01, 0x54, 0x28, 0xcf, 0x0b, 0x98, 0x01, 0x4b, 0xb1, 0x44, 0x05,
- 0x96, 0xc1, 0xb7, 0xec, 0x15, 0xc1, 0xb7, 0xf2, 0x44, 0x00, 0x9b, 0x41,
- 0xb7, 0xfe, 0xd8, 0x24, 0x18, 0x01, 0x54, 0x39, 0xcf, 0x62, 0x54, 0x01,
- 0x54, 0x48, 0xc2, 0x0b, 0xc6, 0x00, 0xe2, 0x79, 0xc2, 0x02, 0xb4, 0x00,
- 0xe0, 0xc9, 0x83, 0x00, 0xe0, 0x60, 0x16, 0xc1, 0xb8, 0x04, 0x15, 0xc1,
- 0xb8, 0x0e, 0xc2, 0x00, 0xa4, 0x00, 0xe0, 0x59, 0x83, 0x00, 0xe0, 0x50,
- 0xc2, 0x00, 0xa4, 0x00, 0xe1, 0x09, 0x83, 0x00, 0xe1, 0x00, 0xc2, 0x00,
- 0xc7, 0x00, 0xe0, 0xf1, 0x83, 0x00, 0xe0, 0xe8, 0xc2, 0x00, 0xc7, 0x00,
- 0xe0, 0xb1, 0x83, 0x00, 0xe0, 0xa8, 0xc2, 0x00, 0xc7, 0x00, 0xe0, 0xa1,
- 0x83, 0x00, 0xe0, 0x98, 0xc2, 0x00, 0xc7, 0x00, 0xe0, 0x91, 0x83, 0x00,
- 0xe0, 0x88, 0xc2, 0x00, 0xa4, 0x00, 0xe0, 0x81, 0xc2, 0x00, 0xc7, 0x00,
- 0xe0, 0x79, 0x83, 0x00, 0xe0, 0x70, 0x83, 0x00, 0xe0, 0x69, 0xc2, 0x1d,
- 0x5f, 0x00, 0xe0, 0x49, 0xc2, 0x01, 0x29, 0x00, 0xe0, 0x28, 0xc2, 0x00,
- 0xa4, 0x00, 0xe0, 0x39, 0x83, 0x00, 0xe0, 0x30, 0xc2, 0x00, 0xc7, 0x00,
- 0xe0, 0x21, 0x83, 0x00, 0xe0, 0x18, 0xc2, 0x00, 0xa4, 0x00, 0xe0, 0x11,
- 0xc2, 0x00, 0xc7, 0x00, 0xe0, 0x09, 0x83, 0x00, 0xe0, 0x00, 0xc4, 0x18,
- 0x83, 0x00, 0xe2, 0x39, 0xc2, 0x26, 0x51, 0x00, 0xe2, 0x30, 0xc3, 0x0c,
- 0x5b, 0x00, 0xe2, 0x29, 0xc3, 0x06, 0x9e, 0x00, 0xe2, 0x20, 0xc4, 0x04,
- 0x5e, 0x00, 0xe2, 0x19, 0xc2, 0x01, 0x47, 0x00, 0xe2, 0x10, 0xc5, 0xdd,
- 0x2c, 0x00, 0xe1, 0xfb, 0x01, 0xb8, 0x18, 0xc5, 0x4e, 0xc1, 0x00, 0xe1,
- 0xd8, 0xc5, 0x35, 0x00, 0x00, 0xe1, 0xb9, 0xc3, 0x03, 0xdd, 0x00, 0xe1,
- 0xb0, 0xc2, 0x02, 0x59, 0x00, 0xe1, 0x29, 0xc2, 0x1d, 0x5f, 0x00, 0xe1,
- 0x20, 0xc3, 0x01, 0x93, 0x00, 0xe1, 0xa8, 0xc6, 0xce, 0xb3, 0x00, 0xe1,
- 0xa0, 0x97, 0x00, 0xe1, 0x58, 0x91, 0x00, 0xe1, 0x48, 0x15, 0xc1, 0xb8,
- 0x1e, 0xcc, 0x1a, 0xaf, 0x0f, 0xbc, 0x71, 0x14, 0xc1, 0xb8, 0x30, 0x44,
- 0x01, 0x1e, 0xc1, 0xb8, 0x3c, 0xcc, 0x03, 0x3b, 0x01, 0x3a, 0xc1, 0xca,
- 0xa2, 0x3e, 0x0f, 0xaf, 0xc1, 0x08, 0xc1, 0xb8, 0x42, 0xcb, 0x5a, 0x67,
- 0x0f, 0xbd, 0x11, 0xd5, 0x35, 0x20, 0x0f, 0xbd, 0xd9, 0x16, 0xc1, 0xb8,
- 0x4e, 0xca, 0xa0, 0xae, 0x0f, 0xd3, 0xa8, 0xc5, 0xd6, 0x5b, 0x0f, 0xaf,
- 0x92, 0x01, 0xb8, 0x5a, 0xc2, 0x00, 0xa4, 0x08, 0xfd, 0x81, 0x83, 0x05,
- 0x27, 0x60, 0x83, 0x05, 0x26, 0x89, 0xc2, 0x00, 0xa4, 0x05, 0x26, 0x90,
- 0x83, 0x05, 0x26, 0x99, 0xc2, 0x02, 0xb4, 0x05, 0x26, 0xe0, 0x83, 0x05,
- 0x26, 0xa1, 0xc2, 0x00, 0xa4, 0x05, 0x26, 0xa9, 0x15, 0xc1, 0xb8, 0x60,
- 0x44, 0x01, 0xb4, 0x41, 0xb8, 0x6a, 0x83, 0x05, 0x26, 0xb1, 0xc2, 0x00,
- 0xa4, 0x05, 0x27, 0x68, 0x83, 0x05, 0x26, 0xb9, 0xc2, 0x00, 0xa4, 0x05,
- 0x26, 0xc0, 0x83, 0x05, 0x26, 0xd1, 0xc2, 0x00, 0xa4, 0x05, 0x26, 0xd8,
- 0x83, 0x05, 0x27, 0x01, 0xc2, 0x01, 0x29, 0x05, 0x27, 0x28, 0x83, 0x05,
- 0x27, 0x11, 0xc2, 0x00, 0xa4, 0x05, 0x27, 0x58, 0xc2, 0x00, 0xa4, 0x05,
- 0x27, 0x19, 0x83, 0x05, 0x27, 0x20, 0x83, 0x05, 0x27, 0x31, 0xc2, 0x00,
- 0xa4, 0x05, 0x27, 0x40, 0x87, 0x05, 0x27, 0x78, 0x97, 0x05, 0x27, 0x88,
- 0x87, 0x05, 0x27, 0xb8, 0x87, 0x05, 0x27, 0xa9, 0x8a, 0x05, 0x27, 0xb0,
- 0xc9, 0x19, 0xf5, 0x01, 0x01, 0x41, 0xca, 0x33, 0x1e, 0x00, 0x00, 0x5b,
- 0x01, 0xb8, 0x76, 0xc4, 0x19, 0xf0, 0x00, 0x00, 0x51, 0x4c, 0x87, 0x04,
- 0x41, 0xb8, 0x7c, 0x48, 0xbe, 0x85, 0xc1, 0xb8, 0x88, 0x42, 0x01, 0x2b,
- 0x41, 0xb8, 0xb0, 0xc4, 0x22, 0x71, 0x00, 0xca, 0x79, 0xc5, 0x01, 0xdb,
- 0x00, 0xca, 0x71, 0x15, 0xc1, 0xb8, 0xc2, 0x08, 0xc1, 0xb8, 0xce, 0x16,
- 0xc1, 0xb8, 0xda, 0xc3, 0x01, 0xb4, 0x00, 0xca, 0x39, 0xc4, 0x15, 0xd3,
- 0x00, 0xca, 0x30, 0x44, 0x00, 0xcc, 0xc1, 0xb8, 0xe6, 0x4c, 0x26, 0xeb,
- 0xc1, 0xb8, 0xfe, 0x50, 0x5f, 0xd2, 0x41, 0xb9, 0x2c, 0x46, 0x00, 0xca,
- 0xc1, 0xb9, 0x3e, 0xcf, 0x67, 0x22, 0x00, 0xc8, 0x00, 0x16, 0xc1, 0xb9,
- 0x5b, 0x09, 0xc1, 0xb9, 0x6b, 0xc2, 0x00, 0xa4, 0x00, 0xc8, 0xe1, 0x15,
- 0xc1, 0xb9, 0x7b, 0xc2, 0x01, 0x09, 0x00, 0xc8, 0xc1, 0xc2, 0x00, 0xc7,
- 0x00, 0xc8, 0xb9, 0xc2, 0x02, 0x59, 0x00, 0xc8, 0xb1, 0xc2, 0x1d, 0x5f,
- 0x00, 0xc8, 0xab, 0x01, 0xb9, 0x8b, 0xc2, 0x00, 0xad, 0x00, 0xc8, 0xa1,
- 0x04, 0xc1, 0xb9, 0x8f, 0x12, 0xc1, 0xb9, 0x99, 0x10, 0xc1, 0xb9, 0xa3,
- 0x06, 0xc1, 0xb9, 0xad, 0x0c, 0xc1, 0xb9, 0xb7, 0x05, 0xc1, 0xb9, 0xc1,
- 0x0d, 0x41, 0xb9, 0xcb, 0x90, 0x08, 0x49, 0xc0, 0x9b, 0x08, 0x49, 0xb8,
- 0x90, 0x08, 0x49, 0xb0, 0x90, 0x08, 0x49, 0xa8, 0x96, 0x08, 0x49, 0xa0,
- 0x95, 0x08, 0x49, 0x70, 0x04, 0xc1, 0xb9, 0xd5, 0x44, 0x07, 0x0d, 0xc1,
- 0xb9, 0xe1, 0x46, 0x76, 0x8a, 0xc1, 0xb9, 0xed, 0xc9, 0x36, 0xa6, 0x01,
- 0x3e, 0xc9, 0xc7, 0xc9, 0x3d, 0x01, 0x3e, 0xc1, 0xc6, 0x01, 0x7a, 0x01,
- 0x2f, 0x79, 0x11, 0xc1, 0xb9, 0xf9, 0x16, 0xc1, 0xba, 0x05, 0xd6, 0x2d,
- 0x1d, 0x01, 0x50, 0xf1, 0x47, 0xcb, 0x58, 0xc1, 0xba, 0x11, 0x47, 0xca,
- 0xbe, 0x41, 0xba, 0x1d, 0xcc, 0x23, 0x34, 0x01, 0x55, 0x68, 0x0e, 0xc1,
- 0xba, 0x29, 0x4f, 0x07, 0x17, 0x41, 0xba, 0x35, 0x96, 0x01, 0x04, 0xe1,
- 0x95, 0x01, 0x04, 0xdb, 0x01, 0xba, 0x41, 0x92, 0x01, 0x04, 0xd1, 0x90,
- 0x01, 0x04, 0xc9, 0x8f, 0x01, 0x04, 0xc1, 0x8e, 0x01, 0x04, 0xb9, 0x8d,
- 0x01, 0x04, 0xb1, 0x8a, 0x01, 0x04, 0xa9, 0x9a, 0x01, 0x04, 0x99, 0x91,
- 0x01, 0x04, 0x91, 0x87, 0x01, 0x04, 0x89, 0x83, 0x01, 0x04, 0x81, 0x98,
- 0x00, 0xeb, 0x29, 0x97, 0x00, 0xeb, 0x21, 0x94, 0x00, 0xeb, 0x19, 0x8b,
- 0x00, 0xeb, 0x11, 0x8c, 0x01, 0x63, 0xe0, 0x4d, 0x35, 0xa6, 0xc1, 0xba,
- 0x47, 0xca, 0x9e, 0xf6, 0x00, 0x14, 0xbb, 0x01, 0xba, 0xc6, 0xce, 0x75,
- 0x17, 0x05, 0x3c, 0x78, 0x46, 0x00, 0x6b, 0x41, 0xba, 0xcc, 0xcd, 0x7e,
- 0x32, 0x00, 0x0e, 0x1b, 0x01, 0xba, 0xd8, 0x47, 0x10, 0x91, 0x41, 0xba,
- 0xde, 0xc2, 0x00, 0x34, 0x00, 0xe9, 0x29, 0xcd, 0x7c, 0x78, 0x00, 0x0e,
- 0x10, 0xcc, 0x21, 0x84, 0x00, 0x15, 0x08, 0x47, 0x7f, 0x10, 0xc1, 0xba,
- 0xea, 0xd1, 0x53, 0x81, 0x00, 0x15, 0x68, 0x46, 0x02, 0x91, 0xc1, 0xba,
- 0xf6, 0x48, 0x19, 0x70, 0x41, 0xbb, 0xac, 0x88, 0x05, 0x3f, 0xd9, 0x92,
- 0x05, 0x3f, 0xe0, 0xc9, 0x53, 0xd7, 0x05, 0x3f, 0xe9, 0xc6, 0x6b, 0x46,
- 0x05, 0x3f, 0xf0, 0x91, 0x00, 0x74, 0x09, 0x0a, 0x41, 0xbb, 0xb8, 0x44,
- 0x62, 0xe0, 0xc1, 0xbb, 0xc4, 0x91, 0x00, 0x74, 0xd9, 0x43, 0x68, 0x16,
- 0x41, 0xbb, 0xf0, 0xc2, 0x13, 0x4f, 0x00, 0x74, 0x39, 0xc2, 0x20, 0x67,
- 0x00, 0x74, 0x69, 0x91, 0x00, 0x74, 0xc8, 0x42, 0x01, 0x48, 0xc1, 0xbb,
- 0xfc, 0x49, 0xad, 0x90, 0x41, 0xbc, 0x08, 0x91, 0x00, 0x74, 0xa9, 0x43,
- 0x68, 0x16, 0x41, 0xbc, 0x14, 0x08, 0xc1, 0xbc, 0x20, 0xc3, 0x30, 0x10,
- 0x00, 0x74, 0xe9, 0xc4, 0xb2, 0x72, 0x00, 0x74, 0xf8, 0x42, 0x01, 0x1d,
- 0x41, 0xbc, 0x2c, 0xc4, 0xb2, 0x72, 0x00, 0x75, 0x59, 0xc3, 0x30, 0x10,
- 0x00, 0x75, 0x70, 0x83, 0x00, 0x75, 0x91, 0x8f, 0x00, 0x75, 0x99, 0x9b,
- 0x00, 0x76, 0x19, 0x8b, 0x00, 0x76, 0x20, 0xc2, 0x00, 0x57, 0x00, 0x75,
- 0x89, 0xc2, 0x00, 0xb7, 0x00, 0x75, 0xd8, 0x8b, 0x00, 0x75, 0xa8, 0x9b,
- 0x00, 0x75, 0xb8, 0x97, 0x00, 0x75, 0xc8, 0x8b, 0x00, 0x76, 0x08, 0xc2,
- 0x01, 0x64, 0x00, 0x75, 0xe1, 0xc3, 0x4e, 0xfc, 0x00, 0x75, 0xe8, 0xc2,
- 0x00, 0xf6, 0x00, 0x76, 0x49, 0x8b, 0x00, 0x76, 0x50, 0xc2, 0x01, 0x47,
- 0x00, 0x76, 0x91, 0xc4, 0x04, 0x5e, 0x00, 0x76, 0x98, 0xc3, 0x06, 0x9e,
- 0x00, 0x76, 0xa1, 0xc3, 0x0c, 0x5b, 0x00, 0x76, 0xa8, 0xc2, 0x26, 0x51,
- 0x00, 0x76, 0xb1, 0xc4, 0x18, 0x83, 0x00, 0x76, 0xb8, 0x45, 0x01, 0xe8,
- 0xc1, 0xbc, 0x38, 0xd1, 0x4b, 0x0d, 0x0f, 0xdc, 0xc8, 0x46, 0x01, 0xc7,
- 0xc1, 0xbc, 0x44, 0x5b, 0x16, 0x24, 0x41, 0xbc, 0x56, 0xc6, 0x07, 0x09,
- 0x01, 0x3a, 0x91, 0xc6, 0x01, 0x7a, 0x0f, 0xa9, 0xf8, 0xe0, 0x02, 0x47,
- 0x01, 0x1d, 0x88, 0x45, 0x01, 0xe8, 0xc1, 0xbc, 0x62, 0xd2, 0x40, 0x6c,
- 0x0f, 0xdc, 0xc0, 0x5b, 0x18, 0x0a, 0xc1, 0xbc, 0x6e, 0x46, 0x01, 0xd1,
- 0x41, 0xbc, 0x7a, 0xe0, 0x02, 0xe7, 0x01, 0x1d, 0x80, 0x45, 0x01, 0xd2,
- 0xc1, 0xbc, 0x8c, 0x4d, 0x3b, 0x4e, 0x41, 0xbc, 0x98, 0xe0, 0x0a, 0xa7,
- 0x0f, 0xdb, 0x40, 0x0f, 0xc1, 0xbc, 0x9e, 0xcc, 0x0d, 0x80, 0x01, 0x2e,
- 0xd0, 0x44, 0x00, 0x56, 0x41, 0xbc, 0xa4, 0xcd, 0x40, 0x12, 0x0f, 0xdc,
- 0x19, 0xce, 0x0a, 0xb9, 0x0f, 0xdc, 0x28, 0x00, 0x41, 0xbc, 0xaa, 0xcc,
- 0x85, 0x84, 0x01, 0x0f, 0x78, 0x45, 0x01, 0x93, 0xc1, 0xbc, 0xc2, 0xc9,
- 0x64, 0xa3, 0x01, 0x48, 0x50, 0xcd, 0x7e, 0x0b, 0x01, 0x0c, 0xf9, 0x4e,
- 0x05, 0xba, 0x41, 0xbc, 0xce, 0x00, 0x41, 0xbc, 0xda, 0x44, 0x01, 0x1e,
- 0xc1, 0xbc, 0xf8, 0x45, 0x01, 0xf7, 0x41, 0xbd, 0x02, 0xd0, 0x58, 0x92,
- 0x0f, 0xc2, 0x09, 0xc5, 0x01, 0x0f, 0x0f, 0xc2, 0x28, 0x00, 0x41, 0xbd,
- 0x0c, 0xca, 0x9c, 0x30, 0x01, 0x0d, 0x40, 0xcc, 0x8a, 0x64, 0x01, 0x4a,
- 0x89, 0xcd, 0x7e, 0xce, 0x01, 0x4a, 0x68, 0xcd, 0x7e, 0xce, 0x01, 0x4a,
- 0x79, 0xcc, 0x8a, 0x64, 0x01, 0x4a, 0x60, 0xdc, 0x13, 0x72, 0x01, 0x52,
- 0x51, 0x46, 0x00, 0x95, 0xc1, 0xbd, 0x18, 0x45, 0x00, 0x6c, 0x41, 0xbd,
- 0x24, 0xc3, 0x79, 0x81, 0x08, 0x1c, 0x91, 0xc2, 0x00, 0x06, 0x08, 0x1c,
- 0xa8, 0xce, 0x67, 0x41, 0x0f, 0xdc, 0xb9, 0xde, 0x0f, 0xf6, 0x01, 0x3b,
- 0x18, 0x45, 0x02, 0x13, 0xc1, 0xbd, 0x36, 0x50, 0x0f, 0xfc, 0xc1, 0xbd,
- 0x48, 0xca, 0x0e, 0x84, 0x0f, 0xbf, 0x80, 0x45, 0x02, 0x32, 0xc1, 0xbd,
- 0x54, 0xdc, 0x12, 0xe6, 0x01, 0x3d, 0xe9, 0xdb, 0x16, 0xc6, 0x01, 0x3c,
- 0xa0, 0x03, 0xc1, 0xbd, 0x66, 0x45, 0x1b, 0xc7, 0xc1, 0xbd, 0x72, 0x0b,
- 0xc1, 0xbd, 0x7e, 0xc6, 0xb5, 0x72, 0x01, 0x3a, 0x41, 0xda, 0x19, 0x69,
- 0x0f, 0xb3, 0x88, 0x45, 0x1f, 0xc8, 0xc1, 0xbd, 0x8a, 0x4e, 0x49, 0x38,
- 0x41, 0xbd, 0x96, 0x03, 0xc1, 0xbd, 0xa2, 0x42, 0x01, 0x12, 0xc1, 0xbd,
- 0xae, 0x43, 0x01, 0x1f, 0xc1, 0xbd, 0xb8, 0xd8, 0x25, 0x68, 0x0f, 0xb3,
- 0x98, 0x49, 0x22, 0xe1, 0xc1, 0xbd, 0xc4, 0xdf, 0x01, 0xe8, 0x01, 0x3c,
- 0xf1, 0x4e, 0x21, 0x30, 0x41, 0xbd, 0xd0, 0x44, 0x03, 0xe3, 0xc1, 0xbd,
- 0xdc, 0xc7, 0xc4, 0xc1, 0x01, 0x38, 0xc0, 0x49, 0x2f, 0x87, 0xc1, 0xbd,
- 0xe6, 0x51, 0x01, 0x09, 0x41, 0xbd, 0xec, 0x45, 0x3f, 0x21, 0xc1, 0xbd,
- 0xf8, 0x42, 0x01, 0xf0, 0xc1, 0xbd, 0xfe, 0xc5, 0x01, 0x7b, 0x01, 0x5a,
- 0xc2, 0x01, 0xbe, 0x0a, 0x46, 0x82, 0xb5, 0xc1, 0xbe, 0x16, 0xcc, 0x2f,
- 0x37, 0x01, 0x3c, 0xb9, 0x11, 0x41, 0xbe, 0x1c, 0xdc, 0x14, 0x1a, 0x01,
- 0x3c, 0xe1, 0x44, 0x01, 0xdc, 0x41, 0xbe, 0x2e, 0xc9, 0x61, 0x64, 0x01,
- 0x3c, 0xb1, 0xcf, 0x65, 0x06, 0x01, 0x38, 0xb0, 0xc7, 0x0b, 0x80, 0x01,
- 0x39, 0x89, 0xd1, 0x37, 0x31, 0x0f, 0xb3, 0xa1, 0x51, 0x4a, 0xfb, 0x41,
- 0xbe, 0x3d, 0xd2, 0x4e, 0xc6, 0x01, 0x39, 0x71, 0xd0, 0x5b, 0x02, 0x01,
- 0x38, 0xe1, 0xd4, 0x3f, 0x35, 0x01, 0x5a, 0xb0, 0xdb, 0x15, 0x82, 0x01,
- 0x39, 0x21, 0x44, 0x0c, 0x5b, 0x41, 0xbe, 0x4c, 0xd1, 0x55, 0xc3, 0x01,
- 0x37, 0xe0, 0xca, 0x96, 0x84, 0x0f, 0xa4, 0xf9, 0x45, 0x00, 0x6c, 0xc1,
- 0xbe, 0x58, 0xc5, 0x03, 0x53, 0x0f, 0xd7, 0xb0, 0xa0, 0x0d, 0x87, 0xd1,
- 0x9f, 0x0d, 0x87, 0xc9, 0x9e, 0x0d, 0x87, 0xc1, 0xa3, 0x0d, 0x87, 0xe9,
- 0xa2, 0x0d, 0x87, 0xe1, 0xa1, 0x0d, 0x87, 0xd8, 0xa4, 0x0d, 0x87, 0xb9,
- 0xa3, 0x0d, 0x87, 0xb1, 0xa2, 0x0d, 0x87, 0xa9, 0xa1, 0x0d, 0x87, 0xa1,
- 0xa0, 0x0d, 0x87, 0x99, 0x9f, 0x0d, 0x87, 0x91, 0x9e, 0x0d, 0x87, 0x88,
- 0xa1, 0x0d, 0x87, 0x81, 0xa0, 0x0d, 0x87, 0x79, 0x9f, 0x0d, 0x87, 0x71,
- 0x9e, 0x0d, 0x87, 0x68, 0xa3, 0x0d, 0x88, 0x39, 0xa2, 0x0d, 0x88, 0x31,
- 0xa1, 0x0d, 0x88, 0x29, 0xa0, 0x0d, 0x88, 0x21, 0x9f, 0x0d, 0x88, 0x19,
- 0x9e, 0x0d, 0x88, 0x10, 0xa1, 0x0d, 0x88, 0x09, 0xa0, 0x0d, 0x88, 0x01,
- 0x9f, 0x0d, 0x87, 0xf9, 0x9e, 0x0d, 0x87, 0xf0, 0x9e, 0x0d, 0x85, 0xd1,
- 0xa5, 0x0d, 0x86, 0x09, 0xa4, 0x0d, 0x86, 0x01, 0xa3, 0x0d, 0x85, 0xf9,
- 0xa2, 0x0d, 0x85, 0xf1, 0xa1, 0x0d, 0x85, 0xe9, 0xa0, 0x0d, 0x85, 0xe1,
- 0x9f, 0x0d, 0x85, 0xd8, 0xa4, 0x0d, 0x85, 0xc9, 0xa3, 0x0d, 0x85, 0xc1,
- 0xa2, 0x0d, 0x85, 0xb9, 0xa1, 0x0d, 0x85, 0xb1, 0xa0, 0x0d, 0x85, 0xa9,
- 0x9f, 0x0d, 0x85, 0xa1, 0x9e, 0x0d, 0x85, 0x98, 0xa0, 0x0d, 0x85, 0x91,
- 0x9f, 0x0d, 0x85, 0x89, 0x9e, 0x0d, 0x85, 0x80, 0xa4, 0x0d, 0x85, 0x79,
- 0xa3, 0x0d, 0x85, 0x71, 0xa2, 0x0d, 0x85, 0x69, 0xa1, 0x0d, 0x85, 0x61,
- 0xa0, 0x0d, 0x85, 0x59, 0x9f, 0x0d, 0x85, 0x51, 0x9e, 0x0d, 0x85, 0x48,
- 0x9e, 0x0d, 0x84, 0xf3, 0x01, 0xbe, 0x6a, 0xa6, 0x0d, 0x85, 0x31, 0xa5,
- 0x0d, 0x85, 0x29, 0xa4, 0x0d, 0x85, 0x21, 0xa3, 0x0d, 0x85, 0x19, 0xa2,
- 0x0d, 0x85, 0x11, 0xa1, 0x0d, 0x85, 0x09, 0xa0, 0x0d, 0x85, 0x01, 0x9f,
- 0x0d, 0x84, 0xf8, 0xa2, 0x0d, 0x84, 0xe9, 0xa1, 0x0d, 0x84, 0xe1, 0xa0,
- 0x0d, 0x84, 0xd9, 0x9f, 0x0d, 0x84, 0xd1, 0x9e, 0x0d, 0x84, 0xc8, 0xc2,
- 0x05, 0x68, 0x0d, 0x84, 0xc1, 0xa3, 0x0d, 0x84, 0xb9, 0xa2, 0x0d, 0x84,
- 0xb1, 0xa1, 0x0d, 0x84, 0xa9, 0xa0, 0x0d, 0x84, 0xa1, 0x9f, 0x0d, 0x84,
- 0x99, 0x9e, 0x0d, 0x84, 0x90, 0xa0, 0x0d, 0x84, 0x89, 0x9f, 0x0d, 0x84,
- 0x81, 0x9e, 0x0d, 0x84, 0x78, 0xc2, 0x00, 0x9f, 0x0d, 0x84, 0x71, 0xa4,
- 0x0d, 0x84, 0x69, 0xa3, 0x0d, 0x84, 0x61, 0xa2, 0x0d, 0x84, 0x59, 0xa1,
- 0x0d, 0x84, 0x51, 0xa0, 0x0d, 0x84, 0x49, 0x9f, 0x0d, 0x84, 0x41, 0x9e,
- 0x0d, 0x84, 0x38, 0xa6, 0x0d, 0x84, 0x31, 0xa5, 0x0d, 0x84, 0x29, 0xa4,
- 0x0d, 0x84, 0x21, 0xa3, 0x0d, 0x84, 0x19, 0xa2, 0x0d, 0x84, 0x11, 0xa1,
- 0x0d, 0x84, 0x09, 0xa0, 0x0d, 0x84, 0x01, 0x9f, 0x0d, 0x83, 0xf9, 0x9e,
- 0x0d, 0x83, 0xf0, 0x9f, 0x0d, 0x88, 0xf1, 0x9e, 0x0d, 0x88, 0xe8, 0xa0,
- 0x0d, 0x81, 0xd1, 0x9f, 0x0d, 0x81, 0xc9, 0x9e, 0x0d, 0x81, 0xc1, 0xc2,
- 0x08, 0x12, 0x0d, 0x81, 0xd8, 0xa3, 0x0d, 0x81, 0xb9, 0xa2, 0x0d, 0x81,
- 0xb1, 0xa1, 0x0d, 0x81, 0xa9, 0xa0, 0x0d, 0x81, 0xa1, 0x9f, 0x0d, 0x81,
- 0x99, 0x9e, 0x0d, 0x81, 0x90, 0xa4, 0x0d, 0x81, 0x89, 0xa3, 0x0d, 0x81,
- 0x81, 0xa2, 0x0d, 0x81, 0x79, 0xa1, 0x0d, 0x81, 0x71, 0xa0, 0x0d, 0x81,
- 0x69, 0x9f, 0x0d, 0x81, 0x61, 0x9e, 0x0d, 0x81, 0x58, 0xa5, 0x0d, 0x81,
- 0x51, 0xa4, 0x0d, 0x81, 0x49, 0xa3, 0x0d, 0x81, 0x41, 0xa2, 0x0d, 0x81,
- 0x39, 0xa1, 0x0d, 0x81, 0x31, 0xa0, 0x0d, 0x81, 0x29, 0x9f, 0x0d, 0x81,
- 0x21, 0x9e, 0x0d, 0x81, 0x18, 0xc2, 0x01, 0x05, 0x0d, 0x81, 0x11, 0x9e,
- 0x0d, 0x80, 0xbb, 0x01, 0xbe, 0x72, 0xa6, 0x0d, 0x80, 0xf9, 0xa5, 0x0d,
- 0x80, 0xf1, 0xa4, 0x0d, 0x80, 0xe9, 0xa3, 0x0d, 0x80, 0xe1, 0xa2, 0x0d,
- 0x80, 0xd9, 0xa1, 0x0d, 0x80, 0xd1, 0xa0, 0x0d, 0x80, 0xc9, 0x9f, 0x0d,
- 0x80, 0xc0, 0xa1, 0x0d, 0x88, 0xc9, 0xa0, 0x0d, 0x88, 0xc1, 0x9f, 0x0d,
- 0x88, 0xb9, 0x9e, 0x0d, 0x88, 0xb1, 0xa2, 0x0d, 0x88, 0xd1, 0xa3, 0x0d,
- 0x88, 0xd9, 0xa4, 0x0d, 0x88, 0xe0, 0xa1, 0x0d, 0x88, 0xa9, 0xa0, 0x0d,
- 0x88, 0xa1, 0x9f, 0x0d, 0x88, 0x99, 0x9e, 0x0d, 0x88, 0x90, 0xa2, 0x0d,
- 0x88, 0x89, 0xa1, 0x0d, 0x88, 0x81, 0xa0, 0x0d, 0x88, 0x79, 0x9f, 0x0d,
- 0x88, 0x71, 0x9e, 0x0d, 0x88, 0x68, 0xa2, 0x0d, 0x88, 0x61, 0xa1, 0x0d,
- 0x88, 0x59, 0xa0, 0x0d, 0x88, 0x51, 0x9f, 0x0d, 0x88, 0x49, 0x9e, 0x0d,
- 0x88, 0x40, 0xc2, 0x20, 0x67, 0x0d, 0x87, 0x11, 0xa2, 0x0d, 0x87, 0x09,
- 0xa1, 0x0d, 0x87, 0x01, 0xa0, 0x0d, 0x86, 0xf9, 0x9f, 0x0d, 0x86, 0xf1,
- 0x9e, 0x0d, 0x86, 0xe8, 0x9e, 0x0d, 0x87, 0x19, 0x9f, 0x0d, 0x87, 0x21,
- 0xa0, 0x0d, 0x87, 0x29, 0xa1, 0x0d, 0x87, 0x30, 0x9e, 0x0d, 0x87, 0x39,
- 0x9f, 0x0d, 0x87, 0x41, 0xa0, 0x0d, 0x87, 0x49, 0xa1, 0x0d, 0x87, 0x51,
- 0xa2, 0x0d, 0x87, 0x59, 0xa3, 0x0d, 0x87, 0x60, 0xa2, 0x0d, 0x86, 0xd9,
- 0xa1, 0x0d, 0x86, 0xd1, 0xa0, 0x0d, 0x86, 0xc9, 0x9f, 0x0d, 0x86, 0xc1,
- 0x9e, 0x0d, 0x86, 0xb9, 0xa3, 0x0d, 0x86, 0xe0, 0xc2, 0x00, 0xad, 0x0d,
- 0x86, 0xb1, 0x9f, 0x0d, 0x86, 0xa9, 0x9e, 0x0d, 0x86, 0xa0, 0xa1, 0x0d,
- 0x86, 0x99, 0xa0, 0x0d, 0x86, 0x91, 0x9f, 0x0d, 0x86, 0x89, 0x9e, 0x0d,
- 0x86, 0x80, 0xa4, 0x0d, 0x86, 0x79, 0xa3, 0x0d, 0x86, 0x71, 0xa2, 0x0d,
- 0x86, 0x69, 0xa1, 0x0d, 0x86, 0x61, 0xa0, 0x0d, 0x86, 0x59, 0x9f, 0x0d,
- 0x86, 0x51, 0x9e, 0x0d, 0x86, 0x48, 0xa4, 0x0d, 0x86, 0x41, 0xa3, 0x0d,
- 0x86, 0x39, 0xa2, 0x0d, 0x86, 0x31, 0xa1, 0x0d, 0x86, 0x29, 0xa0, 0x0d,
- 0x86, 0x21, 0x9f, 0x0d, 0x86, 0x19, 0x9e, 0x0d, 0x86, 0x10, 0xc2, 0x02,
- 0x59, 0x0d, 0x83, 0xe9, 0xa3, 0x0d, 0x83, 0xe1, 0xa2, 0x0d, 0x83, 0xd9,
- 0xa1, 0x0d, 0x83, 0xd1, 0xa0, 0x0d, 0x83, 0xc9, 0x9f, 0x0d, 0x83, 0xc1,
- 0x9e, 0x0d, 0x83, 0xb8, 0xa6, 0x0d, 0x83, 0xb1, 0xa5, 0x0d, 0x83, 0xa9,
- 0xa4, 0x0d, 0x83, 0xa1, 0xa3, 0x0d, 0x83, 0x99, 0xa2, 0x0d, 0x83, 0x91,
- 0xa1, 0x0d, 0x83, 0x89, 0xa0, 0x0d, 0x83, 0x81, 0x9f, 0x0d, 0x83, 0x79,
- 0x9e, 0x0d, 0x83, 0x70, 0x9f, 0x0d, 0x83, 0x19, 0x9e, 0x0d, 0x83, 0x11,
- 0xa0, 0x0d, 0x83, 0x21, 0xa1, 0x0d, 0x83, 0x29, 0xa2, 0x0d, 0x83, 0x31,
- 0xa3, 0x0d, 0x83, 0x39, 0xa4, 0x0d, 0x83, 0x40, 0xa1, 0x0d, 0x83, 0x09,
- 0xa0, 0x0d, 0x83, 0x01, 0x9f, 0x0d, 0x82, 0xf9, 0x9e, 0x0d, 0x82, 0xf0,
- 0x9e, 0x0d, 0x83, 0x49, 0x9f, 0x0d, 0x83, 0x51, 0xa0, 0x0d, 0x83, 0x59,
- 0xa1, 0x0d, 0x83, 0x61, 0xc2, 0x02, 0x18, 0x0d, 0x83, 0x68, 0xa4, 0x0d,
- 0x82, 0xe9, 0xa3, 0x0d, 0x82, 0xe1, 0xa2, 0x0d, 0x82, 0xd9, 0xa1, 0x0d,
- 0x82, 0xd1, 0xa0, 0x0d, 0x82, 0xc9, 0x9f, 0x0d, 0x82, 0xc1, 0x9e, 0x0d,
- 0x82, 0xb8, 0xa2, 0x0d, 0x82, 0xb1, 0xa1, 0x0d, 0x82, 0xa9, 0xa0, 0x0d,
- 0x82, 0xa1, 0x9f, 0x0d, 0x82, 0x99, 0x9e, 0x0d, 0x82, 0x90, 0xa5, 0x0d,
- 0x82, 0x89, 0xa4, 0x0d, 0x82, 0x81, 0xa3, 0x0d, 0x82, 0x79, 0xa2, 0x0d,
- 0x82, 0x71, 0xa1, 0x0d, 0x82, 0x69, 0xa0, 0x0d, 0x82, 0x61, 0x9f, 0x0d,
- 0x82, 0x59, 0x9e, 0x0d, 0x82, 0x50, 0xa3, 0x0d, 0x82, 0x49, 0xa2, 0x0d,
- 0x82, 0x41, 0xa1, 0x0d, 0x82, 0x39, 0xa0, 0x0d, 0x82, 0x31, 0x9f, 0x0d,
- 0x82, 0x29, 0x9e, 0x0d, 0x82, 0x20, 0xa5, 0x0d, 0x82, 0x19, 0xa4, 0x0d,
- 0x82, 0x11, 0xa3, 0x0d, 0x82, 0x09, 0xa2, 0x0d, 0x82, 0x01, 0xa1, 0x0d,
- 0x81, 0xf9, 0xa0, 0x0d, 0x81, 0xf1, 0x9f, 0x0d, 0x81, 0xe9, 0x9e, 0x0d,
- 0x81, 0xe0, 0xca, 0xa3, 0x9c, 0x07, 0xda, 0x79, 0x48, 0xb7, 0x0d, 0x41,
- 0xbe, 0x7a, 0xc2, 0x00, 0x27, 0x00, 0x2f, 0x23, 0x01, 0xbe, 0x8c, 0xc3,
- 0xbf, 0x5a, 0x00, 0x2e, 0xdb, 0x01, 0xbe, 0x92, 0xc3, 0x00, 0x48, 0x00,
- 0x2e, 0x8b, 0x01, 0xbe, 0x98, 0xc3, 0x07, 0x8c, 0x00, 0x2e, 0xab, 0x01,
- 0xbe, 0x9e, 0x16, 0xc1, 0xbe, 0xa4, 0x15, 0xc1, 0xbe, 0xbf, 0xc4, 0x5d,
- 0xe2, 0x00, 0x2f, 0x43, 0x01, 0xbe, 0xd1, 0xc3, 0xe6, 0xbb, 0x00, 0x2f,
- 0x3b, 0x01, 0xbe, 0xd7, 0x46, 0x2b, 0x13, 0xc1, 0xbe, 0xdd, 0xc3, 0x1f,
- 0xd8, 0x00, 0x2f, 0x03, 0x01, 0xbf, 0x01, 0xc3, 0x0b, 0x0e, 0x00, 0x2e,
- 0xf3, 0x01, 0xbf, 0x07, 0xc5, 0xa3, 0xa1, 0x00, 0x2e, 0xe3, 0x01, 0xbf,
- 0x0d, 0xc3, 0x4b, 0x98, 0x00, 0x2e, 0xcb, 0x01, 0xbf, 0x13, 0xc5, 0x4b,
- 0x92, 0x00, 0x2e, 0xb3, 0x01, 0xbf, 0x19, 0xc2, 0x01, 0xf0, 0x00, 0x2e,
- 0xa3, 0x01, 0xbf, 0x1f, 0xc5, 0x45, 0xf6, 0x00, 0x2e, 0x9b, 0x01, 0xbf,
- 0x29, 0xc5, 0xa1, 0x94, 0x00, 0x2e, 0x93, 0x01, 0xbf, 0x2f, 0x03, 0xc1,
- 0xbf, 0x35, 0x45, 0xdc, 0x41, 0x41, 0xbf, 0x3f, 0xd4, 0x3a, 0xe9, 0x07,
- 0xd8, 0xf1, 0x13, 0xc1, 0xbf, 0x6f, 0x15, 0xc1, 0xbf, 0x7e, 0xc4, 0xe5,
- 0xfb, 0x00, 0x2d, 0xf9, 0xc5, 0xdb, 0xe7, 0x00, 0x2d, 0xe9, 0xcf, 0x64,
- 0xbb, 0x00, 0x2d, 0xe1, 0x0a, 0xc1, 0xbf, 0x8e, 0xc5, 0x7d, 0xd2, 0x00,
- 0x2d, 0xb9, 0xc5, 0xd7, 0xb4, 0x00, 0x2d, 0xa8, 0x43, 0x00, 0x63, 0xc1,
- 0xbf, 0xa3, 0xcb, 0x92, 0x8f, 0x00, 0x2e, 0x31, 0xc9, 0xaf, 0xbe, 0x00,
- 0x2e, 0x19, 0xc5, 0xd8, 0x5e, 0x00, 0x2e, 0x01, 0xc5, 0xdb, 0x65, 0x00,
- 0x2d, 0xf0, 0xc4, 0xe2, 0x97, 0x00, 0x2d, 0x71, 0x03, 0x41, 0xbf, 0xaf,
- 0xc3, 0x54, 0xe0, 0x00, 0x2d, 0x69, 0xc4, 0x44, 0x7c, 0x00, 0x2d, 0x38,
- 0xcc, 0x83, 0x50, 0x00, 0x2d, 0x51, 0xc3, 0x17, 0x64, 0x00, 0x2c, 0xd0,
- 0x07, 0xc1, 0xbf, 0xbb, 0xc5, 0xdd, 0x9a, 0x00, 0x2c, 0xb0, 0xc3, 0xa9,
- 0x62, 0x00, 0x2d, 0x41, 0xc9, 0xb2, 0xa9, 0x00, 0x2c, 0xf8, 0xc3, 0x15,
- 0xd3, 0x00, 0x2d, 0x09, 0xc4, 0xe0, 0x1b, 0x00, 0x2c, 0xc8, 0xc9, 0xb6,
- 0x5a, 0x00, 0x2c, 0x99, 0xc4, 0xda, 0xb2, 0x00, 0x2c, 0x90, 0xc3, 0x13,
- 0xfc, 0x00, 0x2c, 0xe3, 0x01, 0xbf, 0xc7, 0xc6, 0xce, 0xe9, 0x00, 0x2c,
- 0xf0, 0xc4, 0xe0, 0x03, 0x00, 0x2d, 0x19, 0xc7, 0xc3, 0xcc, 0x00, 0x2d,
- 0x21, 0xc5, 0xd8, 0x36, 0x00, 0x2d, 0x2a, 0x01, 0xbf, 0xcd, 0x05, 0xc1,
- 0xbf, 0xd3, 0xcf, 0x65, 0xe7, 0x02, 0x6e, 0x09, 0x03, 0xc1, 0xbf, 0xe5,
- 0xc6, 0xd0, 0x51, 0x02, 0x6f, 0x21, 0x19, 0xc1, 0xbf, 0xef, 0xd6, 0x2e,
- 0xa9, 0x02, 0x6f, 0x99, 0xcf, 0x6b, 0xa5, 0x02, 0x6f, 0xa9, 0xcb, 0x95,
- 0xb2, 0x02, 0x6f, 0xc1, 0xcb, 0x90, 0xd7, 0x02, 0x6f, 0xc8, 0xd9, 0x20,
- 0xe5, 0x02, 0x6e, 0x11, 0xc8, 0xb8, 0x7d, 0x02, 0x6f, 0xd0, 0xc9, 0xaf,
- 0xd0, 0x02, 0x6f, 0x39, 0xc6, 0xd1, 0x41, 0x02, 0x6f, 0x41, 0xc9, 0xac,
- 0xdc, 0x02, 0x6f, 0xa0, 0xc5, 0xd8, 0x22, 0x02, 0x6e, 0x29, 0xca, 0xa1,
- 0x3a, 0x02, 0x6e, 0x98, 0xc6, 0xd2, 0x55, 0x02, 0x6e, 0x41, 0xcd, 0x79,
- 0x52, 0x02, 0x6f, 0xe8, 0x44, 0x3a, 0xb7, 0xc1, 0xbf, 0xfb, 0xc3, 0x00,
- 0x68, 0x02, 0x6e, 0xa8, 0xc3, 0x06, 0xff, 0x02, 0x6e, 0xb9, 0xc4, 0x00,
- 0x9c, 0x02, 0x6f, 0x00, 0xc6, 0xcc, 0xcd, 0x02, 0x6e, 0xc1, 0xc8, 0xb9,
- 0x1d, 0x02, 0x6f, 0xe0, 0xc7, 0x12, 0xa1, 0x02, 0x6f, 0x29, 0xc7, 0x57,
- 0x18, 0x02, 0x6f, 0x70, 0xa1, 0x0f, 0xdb, 0xc1, 0x9f, 0x0f, 0xdb, 0xb1,
- 0xa0, 0x0f, 0xdb, 0xb9, 0xa2, 0x0f, 0xdb, 0xc9, 0xa3, 0x0f, 0xdb, 0xd1,
- 0xa4, 0x0f, 0xdb, 0xd9, 0xc4, 0xe2, 0xe7, 0x0f, 0xdc, 0x08, 0x45, 0x05,
- 0xf0, 0xc1, 0xc0, 0x05, 0xc2, 0x00, 0xbb, 0x01, 0x00, 0xa8, 0xa6, 0x01,
- 0x1d, 0xe9, 0xa4, 0x01, 0x1d, 0xe1, 0xa0, 0x01, 0x1d, 0xd9, 0x9e, 0x01,
- 0x1d, 0xd0, 0x42, 0x00, 0x03, 0xc1, 0xc0, 0x11, 0xcc, 0x83, 0x38, 0x0f,
- 0xb5, 0x28, 0xc6, 0xcd, 0x9f, 0x0f, 0x9e, 0x39, 0xc4, 0x00, 0x67, 0x0f,
- 0xa1, 0xa0, 0xcb, 0x9a, 0x56, 0x0f, 0x9f, 0x09, 0xc8, 0x32, 0x4e, 0x0f,
- 0x9f, 0x02, 0x01, 0xc0, 0x20, 0xc4, 0xce, 0x71, 0x01, 0x34, 0x91, 0xc6,
- 0xcc, 0x67, 0x01, 0x31, 0x69, 0xc6, 0xce, 0x83, 0x0f, 0xb7, 0x00, 0xc2,
- 0x03, 0xc7, 0x0f, 0xc9, 0xf1, 0x89, 0x0f, 0xa2, 0xe0, 0xda, 0x1b, 0x09,
- 0x0f, 0xc8, 0xf1, 0xd8, 0x25, 0xf8, 0x0f, 0xd7, 0x80, 0xc4, 0x22, 0x71,
- 0x08, 0x69, 0xc9, 0xc5, 0x01, 0xdb, 0x08, 0x69, 0xc1, 0x15, 0xc1, 0xc0,
- 0x24, 0x08, 0xc1, 0xc0, 0x30, 0x16, 0xc1, 0xc0, 0x3c, 0xc3, 0x01, 0xb4,
- 0x08, 0x69, 0x89, 0xc4, 0x15, 0xd3, 0x08, 0x69, 0x80, 0x42, 0x00, 0xb3,
- 0xc1, 0xc0, 0x48, 0xc8, 0xbe, 0xf5, 0x08, 0x69, 0x20, 0xc9, 0xaf, 0x91,
- 0x08, 0x69, 0x19, 0xc5, 0xdf, 0x25, 0x08, 0x69, 0x10, 0x91, 0x08, 0x69,
- 0x09, 0x87, 0x08, 0x69, 0x01, 0x97, 0x08, 0x68, 0xf9, 0x8b, 0x08, 0x68,
- 0xf1, 0x83, 0x08, 0x68, 0xe8, 0xc2, 0x04, 0x41, 0x08, 0x68, 0xe1, 0x10,
- 0xc1, 0xc0, 0x5a, 0x0d, 0xc1, 0xc0, 0x6a, 0xc2, 0x1d, 0x5f, 0x08, 0x68,
- 0xc1, 0xc2, 0x01, 0x09, 0x08, 0x68, 0xb1, 0xc2, 0x00, 0xad, 0x08, 0x68,
- 0xa1, 0xc2, 0x00, 0xc7, 0x08, 0x68, 0x99, 0xc2, 0x01, 0x29, 0x08, 0x68,
- 0x91, 0x14, 0xc1, 0xc0, 0x7a, 0x06, 0xc1, 0xc0, 0x84, 0xc2, 0x00, 0x67,
- 0x08, 0x68, 0x49, 0xc2, 0x00, 0xa4, 0x08, 0x68, 0x39, 0xc2, 0x03, 0x40,
- 0x08, 0x68, 0x31, 0xc2, 0x24, 0x58, 0x08, 0x68, 0x29, 0x16, 0xc1, 0xc0,
- 0x8e, 0x83, 0x08, 0x68, 0x01, 0xc2, 0x00, 0xde, 0x08, 0x68, 0x09, 0xc2,
- 0x03, 0xa4, 0x08, 0x68, 0x11, 0xc2, 0x02, 0xb4, 0x08, 0x68, 0x71, 0x15,
- 0x41, 0xc0, 0x98, 0x97, 0x00, 0xb9, 0x99, 0x8b, 0x00, 0xb9, 0x90, 0xc2,
- 0x00, 0xa4, 0x00, 0xb9, 0x89, 0xc2, 0x0c, 0x65, 0x00, 0xb9, 0x81, 0xc2,
- 0x01, 0x09, 0x00, 0xb9, 0x79, 0xc2, 0x00, 0xc7, 0x00, 0xb9, 0x71, 0xc2,
- 0x02, 0x59, 0x00, 0xb9, 0x69, 0xc2, 0x1d, 0x5f, 0x00, 0xb9, 0x61, 0xc2,
- 0x00, 0xad, 0x00, 0xb9, 0x59, 0xc2, 0x00, 0xde, 0x00, 0xb9, 0x51, 0xc2,
- 0x03, 0xa4, 0x00, 0xb9, 0x49, 0x10, 0xc1, 0xc0, 0xa2, 0xc2, 0x0b, 0xc6,
- 0x00, 0xb9, 0x39, 0xc2, 0x00, 0xb3, 0x00, 0xb9, 0x31, 0xc2, 0x01, 0x29,
- 0x00, 0xb9, 0x21, 0xc2, 0x04, 0x2b, 0x00, 0xb9, 0x19, 0x97, 0x00, 0xb9,
- 0x11, 0x8b, 0x00, 0xb9, 0x09, 0x83, 0x00, 0xb9, 0x00, 0x49, 0xb1, 0xec,
- 0xc1, 0xc0, 0xac, 0x0c, 0xc1, 0xc0, 0xf9, 0xd4, 0x38, 0xcd, 0x01, 0x81,
- 0x71, 0xd4, 0x3c, 0xa1, 0x01, 0x81, 0x79, 0x47, 0x02, 0x90, 0xc1, 0xc1,
- 0x05, 0xc6, 0x94, 0x2b, 0x01, 0x8b, 0x20, 0xc3, 0x01, 0xb4, 0x01, 0x81,
- 0x09, 0x16, 0xc1, 0xc1, 0x62, 0x08, 0xc1, 0xc1, 0x70, 0x15, 0xc1, 0xc1,
- 0x7c, 0xc5, 0x01, 0xdb, 0x01, 0x81, 0x41, 0xc4, 0x22, 0x71, 0x01, 0x81,
- 0x48, 0xc3, 0x01, 0xb4, 0x08, 0x47, 0xdb, 0x01, 0xc1, 0x88, 0x16, 0xc1,
- 0xc1, 0x8e, 0xc4, 0x0c, 0x5a, 0x08, 0x47, 0xe0, 0x16, 0xc1, 0xc1, 0x9a,
- 0x15, 0xc1, 0xc1, 0xa6, 0xc4, 0xbf, 0xb9, 0x08, 0x47, 0x91, 0xc2, 0x00,
- 0x27, 0x08, 0x47, 0x81, 0x03, 0xc1, 0xc1, 0xb0, 0xc3, 0x1f, 0xd8, 0x08,
- 0x47, 0x69, 0xc3, 0x0b, 0x0e, 0x08, 0x47, 0x61, 0xc6, 0xd0, 0x5d, 0x08,
- 0x47, 0x59, 0xc4, 0xe2, 0x57, 0x08, 0x47, 0x51, 0xc4, 0x4b, 0x98, 0x08,
- 0x47, 0x49, 0xc2, 0x01, 0xf0, 0x08, 0x47, 0x23, 0x01, 0xc1, 0xbc, 0xc4,
- 0xe0, 0xa3, 0x08, 0x47, 0x31, 0xc3, 0x78, 0xa9, 0x08, 0x47, 0x29, 0xcb,
- 0x98, 0xbf, 0x08, 0x47, 0x19, 0xc5, 0xa1, 0x94, 0x08, 0x47, 0x11, 0xc4,
- 0xe4, 0x8f, 0x08, 0x47, 0x08, 0xca, 0x39, 0xef, 0x07, 0xfb, 0x29, 0x47,
- 0x02, 0x90, 0xc1, 0xc1, 0xc2, 0xd1, 0x30, 0x3a, 0x07, 0xfc, 0xf1, 0xd6,
- 0x30, 0x35, 0x07, 0xfc, 0xf8, 0x0d, 0xc1, 0xc1, 0xfd, 0x15, 0xc1, 0xc2,
- 0x0c, 0xc5, 0xd6, 0x3d, 0x07, 0xfd, 0x4b, 0x01, 0xc2, 0x18, 0xc5, 0xdb,
- 0x51, 0x07, 0xfd, 0x89, 0x12, 0xc1, 0xc2, 0x1c, 0x8b, 0x07, 0xfe, 0xe3,
- 0x01, 0xc2, 0x2b, 0x05, 0xc1, 0xc2, 0x31, 0x16, 0xc1, 0xc2, 0x3d, 0xc5,
- 0x98, 0x41, 0x07, 0xfd, 0xf1, 0x83, 0x07, 0xfe, 0x13, 0x01, 0xc2, 0x49,
- 0x1b, 0xc1, 0xc2, 0x4d, 0x87, 0x07, 0xfe, 0x3b, 0x01, 0xc2, 0x67, 0x91,
- 0x07, 0xfe, 0x63, 0x01, 0xc2, 0x6f, 0x19, 0xc1, 0xc2, 0x73, 0x97, 0x07,
- 0xfe, 0x99, 0xc5, 0xd9, 0x80, 0x07, 0xfd, 0x22, 0x01, 0xc2, 0x85, 0xd1,
- 0x55, 0x5d, 0x0f, 0xb4, 0x28, 0x45, 0x04, 0x74, 0xc1, 0xc2, 0x89, 0x83,
- 0x01, 0x82, 0xa9, 0x8b, 0x01, 0x82, 0xb9, 0x97, 0x01, 0x82, 0xc9, 0x87,
- 0x01, 0x82, 0xd9, 0x91, 0x01, 0x82, 0xe8, 0x47, 0x7e, 0xdc, 0x41, 0xc2,
- 0xaf, 0x83, 0x01, 0x82, 0x59, 0x8b, 0x01, 0x82, 0x69, 0x97, 0x01, 0x82,
- 0x79, 0x87, 0x01, 0x82, 0x89, 0x91, 0x01, 0x82, 0x98, 0x83, 0x01, 0x82,
- 0x61, 0x8b, 0x01, 0x82, 0x71, 0x97, 0x01, 0x82, 0x81, 0x87, 0x01, 0x82,
- 0x91, 0x91, 0x01, 0x82, 0xa0, 0x83, 0x01, 0x82, 0xb1, 0x8b, 0x01, 0x82,
- 0xc1, 0x97, 0x01, 0x82, 0xd1, 0x87, 0x01, 0x82, 0xe1, 0x91, 0x01, 0x82,
- 0xf0, 0x83, 0x01, 0x82, 0xf9, 0x8b, 0x01, 0x83, 0x09, 0x97, 0x01, 0x83,
- 0x21, 0x87, 0x01, 0x83, 0x31, 0x91, 0x01, 0x83, 0x40, 0x83, 0x01, 0x83,
- 0x01, 0x8b, 0x01, 0x83, 0x11, 0x97, 0x01, 0x83, 0x29, 0x87, 0x01, 0x83,
- 0x39, 0x91, 0x01, 0x83, 0x48, 0x83, 0x01, 0x83, 0x51, 0x8b, 0x01, 0x83,
- 0x59, 0x97, 0x01, 0x83, 0x61, 0x87, 0x01, 0x83, 0x69, 0x91, 0x01, 0x83,
- 0x70, 0x83, 0x01, 0x83, 0x79, 0x8b, 0x01, 0x83, 0x91, 0x97, 0x01, 0x83,
- 0xa9, 0x87, 0x01, 0x83, 0xc1, 0x91, 0x01, 0x83, 0xd8, 0x83, 0x01, 0x83,
- 0x81, 0x8b, 0x01, 0x83, 0x99, 0x97, 0x01, 0x83, 0xb1, 0x87, 0x01, 0x83,
- 0xc9, 0x91, 0x01, 0x83, 0xe0, 0x83, 0x01, 0x83, 0x89, 0x8b, 0x01, 0x83,
- 0xa1, 0x97, 0x01, 0x83, 0xb9, 0x87, 0x01, 0x83, 0xd1, 0x91, 0x01, 0x83,
- 0xe8, 0x83, 0x01, 0x83, 0xf1, 0x8b, 0x01, 0x83, 0xf9, 0x97, 0x01, 0x84,
- 0x01, 0x87, 0x01, 0x84, 0x09, 0x91, 0x01, 0x84, 0x10, 0x83, 0x01, 0x84,
- 0x21, 0x97, 0x01, 0x84, 0x31, 0x91, 0x01, 0x84, 0x40, 0x83, 0x01, 0x84,
- 0x49, 0x8b, 0x01, 0x84, 0x51, 0x97, 0x01, 0x84, 0x59, 0x87, 0x01, 0x84,
- 0x61, 0x91, 0x01, 0x84, 0x68, 0x83, 0x01, 0x84, 0x79, 0x8b, 0x01, 0x84,
- 0x81, 0x87, 0x01, 0x84, 0x89, 0x91, 0x01, 0x84, 0x90, 0xc6, 0x1d, 0x59,
- 0x01, 0x02, 0x19, 0xce, 0x66, 0xab, 0x01, 0x70, 0xd0, 0x45, 0x6b, 0x55,
- 0xc1, 0xc2, 0xbb, 0xcc, 0x0d, 0x80, 0x01, 0x2e, 0xc9, 0xc6, 0x1d, 0x59,
- 0x01, 0x2e, 0xc1, 0xcc, 0x06, 0x1b, 0x0f, 0xdc, 0x81, 0x42, 0x01, 0x4a,
- 0x41, 0xc2, 0xc7, 0xc9, 0x17, 0xe6, 0x01, 0x37, 0x39, 0x0e, 0xc1, 0xc2,
- 0xcd, 0xc8, 0xba, 0xb5, 0x01, 0x09, 0x39, 0xc8, 0xb8, 0xed, 0x01, 0x02,
- 0xa1, 0xd0, 0x0f, 0xfb, 0x00, 0x05, 0x09, 0xcd, 0x2c, 0x41, 0x00, 0x05,
- 0xf9, 0xcb, 0x10, 0xe9, 0x01, 0x70, 0xc0, 0xda, 0x1c, 0xdd, 0x01, 0x35,
- 0x21, 0x51, 0x4f, 0xeb, 0x41, 0xc2, 0xdc, 0x00, 0x41, 0xc2, 0xee, 0xc9,
- 0x4f, 0x27, 0x01, 0x1d, 0x71, 0x45, 0x00, 0x6c, 0xc1, 0xc3, 0x00, 0x03,
- 0x41, 0xc3, 0x24, 0x47, 0x33, 0xef, 0xc1, 0xc3, 0x30, 0x47, 0x02, 0x90,
- 0x41, 0xc3, 0x43, 0x47, 0x33, 0xef, 0xc1, 0xc3, 0x9c, 0x47, 0x02, 0x90,
- 0x41, 0xc3, 0xaf, 0xc5, 0x56, 0xbd, 0x01, 0x09, 0xc9, 0x49, 0x19, 0xf6,
- 0x41, 0xc4, 0x12, 0xd1, 0x2f, 0xf8, 0x0f, 0xae, 0xd1, 0xc4, 0x0b, 0xeb,
- 0x01, 0x4f, 0x08, 0xd3, 0x3f, 0xad, 0x0f, 0x65, 0xa1, 0x47, 0x33, 0xef,
- 0xc1, 0xc4, 0x22, 0xca, 0xa1, 0xb2, 0x0f, 0x65, 0x81, 0x49, 0x52, 0xd7,
- 0xc1, 0xc4, 0x67, 0xcb, 0x57, 0xd2, 0x0f, 0x65, 0x61, 0xc9, 0x3f, 0xb7,
- 0x0f, 0x65, 0x00, 0xd5, 0x36, 0xaf, 0x01, 0x4f, 0x28, 0x08, 0xc1, 0xc4,
- 0x73, 0x16, 0xc1, 0xc4, 0x7f, 0xc3, 0x01, 0xb4, 0x0e, 0x9b, 0x90, 0xda,
- 0x19, 0xeb, 0x01, 0x81, 0xb9, 0x4b, 0x1d, 0x34, 0x41, 0xc4, 0x8b, 0x48,
- 0x09, 0x13, 0xc1, 0xc4, 0xbb, 0x49, 0xaa, 0xff, 0xc1, 0xc4, 0xc7, 0xcd,
- 0x7d, 0x6f, 0x01, 0x7f, 0xa1, 0x4e, 0x74, 0xb5, 0xc1, 0xc4, 0xd3, 0xc8,
- 0x00, 0xd5, 0x01, 0x7f, 0xd8, 0xc7, 0xc2, 0xfa, 0x01, 0x8c, 0x99, 0x0a,
- 0xc1, 0xc4, 0xe9, 0xc7, 0xc8, 0x9c, 0x01, 0x8c, 0xb0, 0x43, 0x06, 0x9e,
- 0xc1, 0xc4, 0xf5, 0xc9, 0xac, 0x28, 0x01, 0x8c, 0xc8, 0xca, 0x9f, 0x6e,
- 0x01, 0x8c, 0xb9, 0xc7, 0xc1, 0xbf, 0x01, 0x8c, 0xf8, 0x16, 0xc1, 0xc5,
- 0x01, 0xc3, 0x01, 0xb4, 0x08, 0x42, 0xc2, 0x01, 0xc5, 0x14, 0x16, 0xc1,
- 0xc5, 0x18, 0x15, 0xc1, 0xc5, 0x24, 0x03, 0xc1, 0xc5, 0x2e, 0xc3, 0x1f,
- 0xd8, 0x08, 0x42, 0x69, 0xc3, 0x0b, 0x0e, 0x08, 0x42, 0x61, 0xc6, 0xd0,
- 0x5d, 0x08, 0x42, 0x59, 0xc4, 0xe2, 0x57, 0x08, 0x42, 0x51, 0xc4, 0x4b,
- 0x98, 0x08, 0x42, 0x49, 0xc2, 0x01, 0xf0, 0x08, 0x42, 0x23, 0x01, 0xc5,
- 0x3a, 0xc5, 0x4b, 0x92, 0x08, 0x42, 0x31, 0xc3, 0x78, 0xa9, 0x08, 0x42,
- 0x29, 0xc6, 0x45, 0xf6, 0x08, 0x42, 0x19, 0xc5, 0xa1, 0x94, 0x08, 0x42,
- 0x11, 0xc4, 0xe4, 0x8f, 0x08, 0x42, 0x09, 0xc2, 0x00, 0x27, 0x08, 0x42,
- 0x81, 0xc4, 0xbf, 0xb9, 0x08, 0x42, 0x91, 0xc4, 0x5d, 0xe2, 0x08, 0x42,
- 0x98, 0xc7, 0xc7, 0xdf, 0x0f, 0xa2, 0xd1, 0xc3, 0x1d, 0x21, 0x0f, 0xa2,
- 0x91, 0xc6, 0xb0, 0xa2, 0x0f, 0xa3, 0x09, 0xc5, 0xdf, 0x34, 0x0f, 0xa3,
- 0x10, 0x45, 0x78, 0x8c, 0xc1, 0xc5, 0x40, 0xc5, 0x01, 0x7b, 0x01, 0x2e,
- 0x5b, 0x01, 0xc5, 0x77, 0xd4, 0x3f, 0x21, 0x01, 0x3f, 0x0b, 0x01, 0xc5,
- 0x7b, 0xc8, 0xbc, 0x9d, 0x01, 0x33, 0x38, 0x07, 0xc1, 0xc5, 0x81, 0xd5,
- 0x37, 0x96, 0x0f, 0xad, 0x59, 0x11, 0x41, 0xc5, 0x8b, 0xca, 0x9d, 0x16,
- 0x0f, 0xc5, 0x69, 0xc3, 0x01, 0xb4, 0x0f, 0xc5, 0x60, 0xc5, 0x07, 0x0a,
- 0x01, 0x2d, 0x0b, 0x01, 0xc5, 0x97, 0xc7, 0x33, 0xb4, 0x01, 0x38, 0x21,
- 0xc9, 0xb6, 0x6c, 0x01, 0x33, 0x21, 0xc2, 0x01, 0xbd, 0x0f, 0x99, 0x1b,
- 0x01, 0xc5, 0x9b, 0x0f, 0xc1, 0xc5, 0x9f, 0xca, 0x57, 0xb7, 0x01, 0x30,
- 0xb1, 0xc3, 0x04, 0x45, 0x01, 0x30, 0x31, 0xc9, 0xb5, 0xca, 0x07, 0xf2,
- 0x30, 0x03, 0xc1, 0xc5, 0xab, 0x43, 0x01, 0x1f, 0xc1, 0xc5, 0xb7, 0x45,
- 0x08, 0xc1, 0x41, 0xc5, 0xc1, 0xc6, 0x3f, 0x2f, 0x01, 0x2e, 0x3b, 0x01,
- 0xc5, 0xc7, 0x48, 0xba, 0xc5, 0xc1, 0xc5, 0xcb, 0x43, 0x02, 0xc7, 0x41,
- 0xc5, 0xd7, 0x14, 0xc1, 0xc5, 0xe3, 0xd7, 0x29, 0xad, 0x01, 0x36, 0xb9,
- 0xc8, 0x37, 0x5b, 0x01, 0x30, 0x79, 0xd2, 0x4b, 0xc0, 0x0f, 0xab, 0xf0,
- 0x0e, 0xc1, 0xc5, 0xef, 0x4c, 0x0e, 0x93, 0xc1, 0xc5, 0xfc, 0xcc, 0x7d,
- 0x22, 0x01, 0x31, 0xc8, 0x44, 0x01, 0xdc, 0xc1, 0xc6, 0x08, 0xc8, 0x45,
- 0x0c, 0x01, 0x2d, 0x68, 0x4a, 0x03, 0xfd, 0xc1, 0xc6, 0x14, 0x4a, 0x01,
- 0x89, 0x41, 0xc6, 0x20, 0x46, 0x06, 0x1c, 0xc1, 0xc6, 0x35, 0xca, 0xa2,
- 0xa2, 0x01, 0x5e, 0xe8, 0xcc, 0x83, 0x2c, 0x01, 0x2d, 0x89, 0x42, 0x00,
- 0x4d, 0x41, 0xc6, 0x45, 0x46, 0x03, 0x3e, 0xc1, 0xc6, 0x51, 0xce, 0x54,
- 0x61, 0x01, 0x58, 0xf0, 0xd5, 0x37, 0xab, 0x0f, 0xc4, 0x39, 0xd0, 0x37,
- 0xb0, 0x0f, 0xc3, 0xf9, 0xd0, 0x5f, 0x62, 0x0f, 0xc3, 0x39, 0xca, 0x37,
- 0xb6, 0x0f, 0xc3, 0x79, 0xd1, 0x56, 0x3a, 0x0f, 0xc3, 0xb8, 0xd5, 0x37,
- 0xab, 0x0f, 0xc4, 0x31, 0xd1, 0x56, 0x3a, 0x0f, 0xc3, 0xb1, 0xca, 0x37,
- 0xb6, 0x0f, 0xc3, 0x71, 0xd0, 0x5f, 0x62, 0x0f, 0xc3, 0x31, 0xd0, 0x37,
- 0xb0, 0x0f, 0xc3, 0xf0, 0xd5, 0x37, 0xab, 0x0f, 0xc4, 0x29, 0xd1, 0x56,
- 0x3a, 0x0f, 0xc3, 0xa9, 0xca, 0x37, 0xb6, 0x0f, 0xc3, 0x69, 0xd0, 0x5f,
- 0x62, 0x0f, 0xc3, 0x29, 0xd0, 0x37, 0xb0, 0x0f, 0xc3, 0xe8, 0xd5, 0x37,
- 0xab, 0x0f, 0xc4, 0x21, 0xd1, 0x56, 0x3a, 0x0f, 0xc3, 0xa1, 0xca, 0x37,
- 0xb6, 0x0f, 0xc3, 0x61, 0xd0, 0x5f, 0x62, 0x0f, 0xc3, 0x21, 0xd0, 0x37,
- 0xb0, 0x0f, 0xc3, 0xe0, 0xc5, 0xd8, 0xe5, 0x0f, 0x9c, 0x81, 0xcc, 0x86,
- 0xa4, 0x0f, 0x99, 0x60, 0xc6, 0xcf, 0x7f, 0x0f, 0xb5, 0xf1, 0xc4, 0x53,
- 0x38, 0x0f, 0x98, 0x51, 0xc7, 0xc5, 0xfc, 0x0f, 0xa0, 0x19, 0xc4, 0xe5,
- 0x0b, 0x0f, 0xc9, 0xe8, 0xc4, 0x22, 0x71, 0x0f, 0x17, 0xc9, 0xc5, 0x01,
- 0xdb, 0x0f, 0x17, 0xc1, 0x15, 0xc1, 0xc6, 0x63, 0x08, 0xc1, 0xc6, 0x6f,
- 0x16, 0xc1, 0xc6, 0x7b, 0xc3, 0x01, 0xb4, 0x0f, 0x17, 0x89, 0xc4, 0x15,
- 0xd3, 0x0f, 0x17, 0x80, 0xc3, 0xdd, 0x92, 0x0f, 0x17, 0x73, 0x01, 0xc6,
- 0x87, 0xc3, 0x22, 0x07, 0x0f, 0x17, 0x62, 0x01, 0xc6, 0x8d, 0x1b, 0xc1,
- 0xc6, 0x93, 0x97, 0x0f, 0x16, 0xf3, 0x01, 0xc6, 0x9d, 0x10, 0xc1, 0xc6,
- 0xa3, 0x83, 0x0f, 0x16, 0x0b, 0x01, 0xc6, 0xb3, 0x87, 0x0f, 0x16, 0xdb,
- 0x01, 0xc6, 0xc4, 0x91, 0x0f, 0x16, 0xab, 0x01, 0xc6, 0xc8, 0x8b, 0x0f,
- 0x16, 0xe3, 0x01, 0xc6, 0xcf, 0x16, 0xc1, 0xc6, 0xd5, 0x0e, 0xc1, 0xc6,
- 0xeb, 0xc2, 0x00, 0xa4, 0x0f, 0x16, 0xd1, 0x0d, 0xc1, 0xc6, 0xf5, 0xc2,
- 0x00, 0xad, 0x0f, 0x16, 0xc1, 0xc2, 0x02, 0x59, 0x0f, 0x16, 0xb9, 0xc2,
- 0x04, 0x41, 0x0f, 0x16, 0x99, 0xc2, 0x01, 0x09, 0x0f, 0x16, 0x91, 0xc2,
- 0x02, 0xb4, 0x0f, 0x16, 0x89, 0xc2, 0x24, 0x58, 0x0f, 0x16, 0x81, 0x15,
- 0xc1, 0xc6, 0xff, 0xc2, 0x00, 0x67, 0x0f, 0x16, 0x69, 0x12, 0xc1, 0xc7,
- 0x09, 0xc2, 0x01, 0x29, 0x0f, 0x16, 0x29, 0xc2, 0x0b, 0xc6, 0x0f, 0x16,
- 0x21, 0xc2, 0x03, 0x40, 0x0f, 0x16, 0x19, 0xc2, 0x00, 0xde, 0x0f, 0x16,
- 0x10, 0xc6, 0x29, 0x88, 0x08, 0xc7, 0x91, 0xc6, 0xd4, 0x83, 0x08, 0xc7,
- 0x89, 0x15, 0xc1, 0xc7, 0x13, 0x08, 0xc1, 0xc7, 0x1f, 0x16, 0x41, 0xc7,
- 0x2b, 0xc4, 0x22, 0x71, 0x08, 0xc7, 0x49, 0xc5, 0x01, 0xdb, 0x08, 0xc7,
- 0x41, 0x15, 0xc1, 0xc7, 0x3d, 0x08, 0xc1, 0xc7, 0x49, 0x16, 0xc1, 0xc7,
- 0x55, 0xc3, 0x01, 0xb4, 0x08, 0xc7, 0x09, 0xc4, 0x15, 0xd3, 0x08, 0xc7,
- 0x00, 0xc4, 0xe0, 0x43, 0x08, 0xc6, 0xf9, 0x15, 0xc1, 0xc7, 0x61, 0x0a,
- 0xc1, 0xc7, 0x6d, 0xc2, 0x01, 0xbc, 0x08, 0xc6, 0xc1, 0xc2, 0x03, 0xca,
- 0x08, 0xc6, 0xb9, 0x83, 0x08, 0xc6, 0x0b, 0x01, 0xc7, 0x7d, 0xc2, 0x0b,
- 0xc6, 0x08, 0xc6, 0xa1, 0x10, 0xc1, 0xc7, 0x8b, 0xc3, 0x02, 0x92, 0x08,
- 0xc6, 0x91, 0x91, 0x08, 0xc6, 0x4b, 0x01, 0xc7, 0x97, 0x87, 0x08, 0xc6,
- 0x43, 0x01, 0xc7, 0x9d, 0x17, 0xc1, 0xc7, 0xa1, 0x1b, 0xc1, 0xc7, 0xa9,
- 0xc2, 0x05, 0x68, 0x08, 0xc6, 0x61, 0xc2, 0x01, 0x29, 0x08, 0xc6, 0x59,
- 0xc2, 0x23, 0xb4, 0x08, 0xc6, 0x31, 0xc2, 0x00, 0x6c, 0x08, 0xc6, 0x10,
- 0xc4, 0xe0, 0x43, 0x08, 0xc5, 0xf9, 0x15, 0xc1, 0xc7, 0xb8, 0x0a, 0xc1,
- 0xc7, 0xc4, 0xc2, 0x01, 0xbc, 0x08, 0xc5, 0xc1, 0xc2, 0x03, 0xca, 0x08,
- 0xc5, 0xb9, 0x83, 0x08, 0xc5, 0x0b, 0x01, 0xc7, 0xd4, 0xc2, 0x0b, 0xc6,
- 0x08, 0xc5, 0xa1, 0x10, 0xc1, 0xc7, 0xe2, 0xc3, 0x02, 0x92, 0x08, 0xc5,
- 0x91, 0x91, 0x08, 0xc5, 0x4b, 0x01, 0xc7, 0xee, 0x87, 0x08, 0xc5, 0x43,
- 0x01, 0xc7, 0xf4, 0x17, 0xc1, 0xc7, 0xf8, 0x1b, 0xc1, 0xc8, 0x00, 0xc2,
- 0x05, 0x68, 0x08, 0xc5, 0x61, 0xc2, 0x01, 0x29, 0x08, 0xc5, 0x59, 0xc2,
- 0x23, 0xb4, 0x08, 0xc5, 0x31, 0xc2, 0x00, 0x6c, 0x08, 0xc5, 0x10, 0xc3,
- 0x00, 0x8b, 0x01, 0x18, 0x39, 0xc7, 0x80, 0x81, 0x07, 0xf2, 0x78, 0xc5,
- 0x01, 0xf7, 0x01, 0x49, 0x99, 0xc4, 0x01, 0x1e, 0x01, 0x59, 0xf8, 0xcf,
- 0x1a, 0x92, 0x01, 0x02, 0xa9, 0xcc, 0x86, 0xec, 0x0f, 0x9d, 0xa0, 0x05,
- 0xc1, 0xc8, 0x0f, 0xd7, 0x15, 0x86, 0x01, 0x39, 0x19, 0xd8, 0x22, 0xe0,
- 0x01, 0x39, 0x11, 0x44, 0x01, 0xb8, 0xc1, 0xc8, 0x1b, 0xcb, 0x94, 0x9f,
- 0x0f, 0x9a, 0x01, 0xd2, 0x21, 0x36, 0x0f, 0xbe, 0x30, 0xcb, 0x99, 0xe8,
- 0x0f, 0x9b, 0xe8, 0x00, 0xc1, 0xc8, 0x27, 0xc9, 0xa9, 0xf1, 0x0f, 0xb1,
- 0xb0, 0xd7, 0x28, 0x6b, 0x0f, 0xb0, 0x59, 0xd0, 0x58, 0xd2, 0x0f, 0xb1,
- 0x88, 0xdf, 0x0d, 0x7d, 0x01, 0x36, 0xf1, 0x49, 0x0d, 0xbc, 0x41, 0xc8,
- 0x70, 0xe0, 0x02, 0x27, 0x01, 0x3d, 0x60, 0xc9, 0xab, 0x98, 0x0f, 0x98,
- 0xe9, 0xc6, 0x00, 0x71, 0x0f, 0x98, 0xa8, 0xca, 0x60, 0x02, 0x07, 0xf8,
- 0x19, 0xc7, 0x69, 0x82, 0x07, 0xff, 0x10, 0xc7, 0x0b, 0x80, 0x07, 0xf8,
- 0x51, 0xc8, 0x37, 0x31, 0x07, 0xf8, 0x31, 0xc9, 0x30, 0x6e, 0x07, 0xf8,
- 0x38, 0x45, 0x06, 0x98, 0xc1, 0xc8, 0x7c, 0xca, 0x97, 0xef, 0x07, 0xf8,
- 0x20, 0x11, 0xc1, 0xc8, 0xa0, 0xd0, 0x0b, 0x37, 0x07, 0xf9, 0xf1, 0xc8,
- 0x93, 0xbb, 0x07, 0xff, 0x00, 0xc8, 0x4f, 0x30, 0x07, 0xf8, 0xd9, 0xc6,
- 0x27, 0xf9, 0x07, 0xf8, 0x78, 0x07, 0xc1, 0xc8, 0xac, 0x45, 0x07, 0x12,
- 0xc1, 0xc8, 0xb8, 0xc7, 0x80, 0x81, 0x07, 0xf9, 0xf8, 0xca, 0x0e, 0x84,
- 0x07, 0xf8, 0xe9, 0xcf, 0x0f, 0xfc, 0x07, 0xf8, 0x08, 0xcf, 0x51, 0x41,
- 0x07, 0xf8, 0xf1, 0xca, 0x1f, 0x96, 0x07, 0xfa, 0x00, 0xcb, 0x2c, 0x43,
- 0x07, 0xf8, 0xf9, 0xcc, 0x00, 0xb2, 0x07, 0xf8, 0x10, 0xce, 0x65, 0x34,
- 0x07, 0xf9, 0xe1, 0x45, 0x02, 0x13, 0x41, 0xc8, 0xc4, 0xc9, 0xa1, 0x8b,
- 0x07, 0xff, 0x09, 0xcb, 0x93, 0xb8, 0x07, 0xf8, 0x29, 0xc8, 0x68, 0x22,
- 0x07, 0xf8, 0x58, 0x00, 0x41, 0xc8, 0xdc, 0xc9, 0xb5, 0x70, 0x0f, 0x9c,
- 0x39, 0x95, 0x0f, 0x9c, 0x30, 0xc5, 0x92, 0xed, 0x0f, 0xb4, 0x91, 0xcb,
- 0x98, 0x5c, 0x0f, 0xcf, 0x78, 0x0e, 0xc1, 0xc8, 0xe8, 0x06, 0xc1, 0xc8,
- 0xf4, 0xc8, 0xbe, 0x4d, 0x08, 0x52, 0xa1, 0x05, 0xc1, 0xc8, 0xfe, 0xcc,
- 0x11, 0x65, 0x08, 0x52, 0x88, 0x44, 0x01, 0xb4, 0xc1, 0xc9, 0x0a, 0x16,
- 0x41, 0xc9, 0x16, 0xc4, 0x06, 0x9d, 0x08, 0x52, 0x19, 0x16, 0xc1, 0xc9,
- 0x22, 0xc3, 0x01, 0xb4, 0x08, 0x52, 0x00, 0xc5, 0x1e, 0x24, 0x08, 0x51,
- 0xf9, 0x45, 0x34, 0xbc, 0x41, 0xc9, 0x2e, 0x42, 0x01, 0x4a, 0xc1, 0xc9,
- 0x3a, 0xc5, 0xd6, 0x01, 0x08, 0x51, 0xc9, 0xc9, 0x31, 0x7f, 0x08, 0x51,
- 0xc1, 0xc7, 0x44, 0x79, 0x08, 0x50, 0x79, 0xc8, 0x11, 0x40, 0x08, 0x50,
- 0x70, 0x18, 0xc1, 0xc9, 0x46, 0x16, 0xc1, 0xc9, 0x50, 0xc2, 0x00, 0xc7,
- 0x08, 0x51, 0x59, 0xc2, 0x02, 0x59, 0x08, 0x51, 0x51, 0xc2, 0x1d, 0x5f,
- 0x08, 0x51, 0x49, 0xc2, 0x00, 0xad, 0x08, 0x51, 0x41, 0x04, 0xc1, 0xc9,
- 0x5e, 0x12, 0xc1, 0xc9, 0x68, 0x10, 0xc1, 0xc9, 0x72, 0x06, 0xc1, 0xc9,
- 0x82, 0xc2, 0x24, 0x58, 0x08, 0x50, 0xb9, 0x05, 0xc1, 0xc9, 0x90, 0x09,
- 0xc1, 0xc9, 0x9a, 0x0d, 0xc1, 0xc9, 0xa4, 0x83, 0x08, 0x50, 0x01, 0x15,
- 0xc1, 0xc9, 0xb4, 0xc2, 0x02, 0xb4, 0x08, 0x51, 0x81, 0xc2, 0x00, 0xa4,
- 0x08, 0x51, 0x88, 0x06, 0xc1, 0xc9, 0xc4, 0x05, 0xc1, 0xc9, 0xdc, 0x04,
- 0xc1, 0xca, 0x1c, 0x03, 0xc1, 0xca, 0x5c, 0x26, 0xc1, 0xca, 0x9c, 0x25,
- 0xc1, 0xca, 0xdc, 0x24, 0xc1, 0xcb, 0x1c, 0x23, 0xc1, 0xcb, 0x5c, 0x22,
- 0xc1, 0xcb, 0x9c, 0x21, 0xc1, 0xcb, 0xdc, 0x20, 0xc1, 0xcc, 0x1c, 0x1f,
- 0xc1, 0xcc, 0x5c, 0x1e, 0xc1, 0xcc, 0x9c, 0x1d, 0x41, 0xcc, 0xdc, 0x08,
- 0xc1, 0xcd, 0x1c, 0x07, 0xc1, 0xcd, 0x5c, 0x06, 0xc1, 0xcd, 0x9c, 0x05,
- 0xc1, 0xcd, 0xdc, 0x04, 0xc1, 0xce, 0x1c, 0x03, 0xc1, 0xce, 0x5c, 0x26,
- 0xc1, 0xce, 0x9c, 0x25, 0xc1, 0xce, 0xdc, 0x24, 0xc1, 0xcf, 0x1c, 0x23,
- 0xc1, 0xcf, 0x5c, 0x22, 0xc1, 0xcf, 0x9c, 0x21, 0xc1, 0xcf, 0xdc, 0x20,
- 0xc1, 0xd0, 0x1c, 0x1f, 0xc1, 0xd0, 0x5c, 0x1e, 0xc1, 0xd0, 0x9c, 0x1d,
- 0x41, 0xd0, 0xdc, 0xc4, 0x18, 0x83, 0x08, 0x97, 0xb9, 0xc2, 0x26, 0x51,
- 0x08, 0x97, 0xb0, 0xc3, 0x0c, 0x5b, 0x08, 0x97, 0xa9, 0xc3, 0x06, 0x9e,
- 0x08, 0x97, 0xa0, 0xc4, 0x04, 0x5e, 0x08, 0x97, 0x99, 0xc2, 0x01, 0x47,
- 0x08, 0x97, 0x90, 0x8b, 0x08, 0x97, 0x31, 0x83, 0x08, 0x97, 0x01, 0x97,
- 0x08, 0x97, 0x40, 0x97, 0x08, 0x97, 0x20, 0x8b, 0x08, 0x97, 0x10, 0x83,
- 0x08, 0x96, 0xe9, 0xc2, 0x00, 0xa4, 0x08, 0x96, 0xe0, 0x83, 0x08, 0x96,
- 0xc9, 0xc2, 0x02, 0x59, 0x08, 0x96, 0x50, 0xc2, 0x00, 0xa4, 0x08, 0x96,
- 0xb1, 0xc2, 0x00, 0xde, 0x08, 0x96, 0xa9, 0x83, 0x08, 0x96, 0xa0, 0xc2,
- 0x00, 0xa4, 0x08, 0x96, 0x99, 0x83, 0x08, 0x96, 0x90, 0x83, 0x08, 0x96,
- 0x89, 0xc2, 0x00, 0xc1, 0x08, 0x96, 0x61, 0xc2, 0x1d, 0x5f, 0x08, 0x96,
- 0x29, 0xc2, 0x01, 0x29, 0x08, 0x95, 0xf8, 0xc2, 0x00, 0xa4, 0x08, 0x96,
- 0x81, 0x83, 0x08, 0x96, 0x79, 0x06, 0x41, 0xd1, 0x1c, 0xc2, 0x00, 0xa4,
- 0x08, 0x96, 0x71, 0x83, 0x08, 0x96, 0x69, 0x16, 0x41, 0xd1, 0x2c, 0xc2,
- 0x00, 0xa4, 0x08, 0x96, 0x21, 0xc2, 0x24, 0x58, 0x08, 0x96, 0x19, 0x83,
- 0x08, 0x96, 0x10, 0xc2, 0x00, 0xa4, 0x08, 0x96, 0x09, 0x83, 0x08, 0x96,
- 0x00, 0xc2, 0x00, 0xa4, 0x08, 0x95, 0xf1, 0xc2, 0x01, 0x29, 0x08, 0x95,
- 0xe9, 0x83, 0x08, 0x95, 0xe0, 0xc2, 0x00, 0xa4, 0x08, 0x95, 0xd9, 0x83,
- 0x08, 0x95, 0xd0, 0x97, 0x08, 0x95, 0xc9, 0x8b, 0x08, 0x95, 0xb9, 0x83,
- 0x08, 0x95, 0x88, 0x97, 0x08, 0x95, 0xa8, 0x8b, 0x08, 0x95, 0x98, 0x97,
- 0x08, 0x91, 0x99, 0x8b, 0x08, 0x91, 0x89, 0x83, 0x08, 0x91, 0x60, 0x8b,
- 0x08, 0x91, 0x70, 0xc6, 0xd1, 0xe3, 0x08, 0x92, 0x01, 0x83, 0x08, 0x91,
- 0x48, 0xc2, 0x00, 0xc7, 0x08, 0x91, 0x59, 0x83, 0x08, 0x91, 0x38, 0xc2,
- 0x00, 0xa4, 0x08, 0x91, 0x19, 0xc2, 0x00, 0xde, 0x08, 0x91, 0x11, 0x83,
- 0x08, 0x91, 0x08, 0xc2, 0x00, 0xa4, 0x08, 0x91, 0x01, 0x83, 0x08, 0x90,
- 0xf8, 0x83, 0x08, 0x90, 0xf1, 0xc2, 0x00, 0xc1, 0x08, 0x90, 0xc1, 0xc2,
- 0x1d, 0x5f, 0x08, 0x90, 0x99, 0xc2, 0x01, 0x29, 0x08, 0x90, 0x68, 0xc2,
- 0x00, 0xa4, 0x08, 0x90, 0xe9, 0x06, 0xc1, 0xd1, 0x36, 0x83, 0x08, 0x90,
- 0xd8, 0xc2, 0x00, 0xa4, 0x08, 0x90, 0xd1, 0x83, 0x08, 0x90, 0xc9, 0x16,
- 0x41, 0xd1, 0x46, 0xc2, 0x24, 0x58, 0x08, 0x90, 0x89, 0x83, 0x08, 0x90,
- 0x80, 0xc2, 0x00, 0xa4, 0x08, 0x90, 0x79, 0x83, 0x08, 0x90, 0x70, 0xc2,
- 0x00, 0xa4, 0x08, 0x90, 0x61, 0xc2, 0x01, 0x29, 0x08, 0x90, 0x59, 0x83,
- 0x08, 0x90, 0x50, 0xc2, 0x00, 0xa4, 0x08, 0x90, 0x49, 0x83, 0x08, 0x90,
- 0x40, 0x97, 0x08, 0x90, 0x39, 0x8b, 0x08, 0x90, 0x29, 0x83, 0x08, 0x90,
- 0x08, 0x15, 0xc1, 0xd1, 0x50, 0xc5, 0x35, 0x00, 0x08, 0x91, 0xb1, 0xc6,
- 0x1e, 0x23, 0x08, 0x91, 0xa9, 0xc8, 0x11, 0x40, 0x08, 0x91, 0xa0, 0xcc,
- 0x25, 0xea, 0x08, 0x91, 0xe1, 0xc5, 0x8b, 0x1f, 0x08, 0x91, 0xc8, 0x43,
- 0xe5, 0xe8, 0xc1, 0xd1, 0x5c, 0x12, 0xc1, 0xd1, 0x64, 0x04, 0xc1, 0xd1,
- 0x76, 0x45, 0xdc, 0x82, 0xc1, 0xd1, 0x82, 0xc9, 0xb2, 0x07, 0x00, 0xcf,
- 0x81, 0x4a, 0xa5, 0xea, 0x41, 0xd1, 0x8e, 0x03, 0xc1, 0xd1, 0xa2, 0x0d,
- 0xc1, 0xd1, 0xb4, 0xcb, 0x93, 0xd9, 0x00, 0xbe, 0xc9, 0x04, 0xc1, 0xd1,
- 0xc6, 0xc7, 0xc3, 0x16, 0x00, 0xbe, 0xb9, 0x05, 0xc1, 0xd1, 0xd0, 0xc6,
- 0xd0, 0x87, 0x00, 0xbe, 0x89, 0xcd, 0x78, 0x27, 0x00, 0xbe, 0x81, 0x16,
- 0xc1, 0xd1, 0xdc, 0x14, 0xc1, 0xd1, 0xe8, 0xcb, 0x8f, 0x77, 0x00, 0xbe,
- 0x49, 0xcd, 0x7a, 0x08, 0x00, 0xbe, 0x41, 0xc7, 0xc4, 0x43, 0x00, 0xbe,
- 0x30, 0xc4, 0x18, 0x83, 0x00, 0xbf, 0x39, 0xc2, 0x26, 0x51, 0x00, 0xbf,
- 0x30, 0xc3, 0x0c, 0x5b, 0x00, 0xbf, 0x29, 0xc3, 0x06, 0x9e, 0x00, 0xbf,
- 0x20, 0xc4, 0x04, 0x5e, 0x00, 0xbf, 0x19, 0xc2, 0x01, 0x47, 0x00, 0xbf,
- 0x10, 0x03, 0xc1, 0xd1, 0xf4, 0x11, 0xc1, 0xd2, 0x04, 0x87, 0x00, 0xbe,
- 0x09, 0x8b, 0x00, 0xbd, 0xbb, 0x01, 0xd2, 0x0c, 0x9b, 0x00, 0xbd, 0xcb,
- 0x01, 0xd2, 0x14, 0x97, 0x00, 0xbd, 0xda, 0x01, 0xd2, 0x1c, 0x83, 0x00,
- 0xbd, 0xa9, 0x93, 0x00, 0xbd, 0xa0, 0x03, 0xc1, 0xd2, 0x24, 0x48, 0xc0,
- 0x1d, 0xc1, 0xd2, 0x34, 0x87, 0x00, 0xbd, 0x79, 0x97, 0x00, 0xbd, 0x3b,
- 0x01, 0xd2, 0x40, 0x8b, 0x00, 0xbd, 0x2a, 0x01, 0xd2, 0x4b, 0x9b, 0x00,
- 0xbd, 0x70, 0x9b, 0x00, 0xbd, 0x60, 0x83, 0x00, 0xbd, 0x09, 0x91, 0x00,
- 0xbc, 0xd8, 0x83, 0x00, 0xbc, 0xf9, 0xc2, 0x05, 0x7b, 0x00, 0xbc, 0xf1,
- 0xc2, 0x00, 0xa4, 0x00, 0xbc, 0xe8, 0x0a, 0xc1, 0xd2, 0x4f, 0x91, 0x00,
- 0xbc, 0xb0, 0x91, 0x00, 0xbc, 0x99, 0xc2, 0x00, 0x10, 0x00, 0xbc, 0x71,
- 0xc2, 0x20, 0x67, 0x00, 0xbc, 0x49, 0xc2, 0x13, 0x4f, 0x00, 0xbc, 0x20,
- 0x0a, 0xc1, 0xd2, 0x57, 0x91, 0x00, 0xbc, 0x89, 0x83, 0x00, 0xbc, 0x79,
- 0x42, 0x00, 0x6e, 0x41, 0xd2, 0x5f, 0x91, 0x00, 0xbc, 0x61, 0x83, 0x00,
- 0xbc, 0x50, 0x0a, 0xc1, 0xd2, 0x67, 0x91, 0x00, 0xbc, 0x39, 0x83, 0x00,
- 0xbc, 0x28, 0x0a, 0xc1, 0xd2, 0x6f, 0x91, 0x00, 0xbc, 0x11, 0x83, 0x00,
- 0xbc, 0x00, 0xc3, 0x00, 0x4c, 0x0e, 0x96, 0x90, 0xc4, 0x13, 0xf2, 0x0e,
- 0x96, 0x88, 0xc4, 0x13, 0xf2, 0x0e, 0x96, 0x80, 0xc5, 0x13, 0xf1, 0x0e,
- 0x96, 0x79, 0xc2, 0x00, 0x7b, 0x0e, 0x96, 0x28, 0xc4, 0x13, 0xf2, 0x0e,
- 0x96, 0x70, 0xc6, 0x53, 0x71, 0x0e, 0x96, 0x69, 0xc3, 0x04, 0x5f, 0x0e,
- 0x96, 0x18, 0xc4, 0x21, 0x31, 0x0e, 0x96, 0x61, 0x91, 0x0e, 0x96, 0x10,
- 0x15, 0xc1, 0xd2, 0x77, 0x83, 0x01, 0x85, 0x13, 0x01, 0xd2, 0x91, 0x0f,
- 0xc1, 0xd2, 0x97, 0x8b, 0x01, 0x85, 0x21, 0x97, 0x01, 0x85, 0x31, 0x87,
- 0x01, 0x85, 0x41, 0x91, 0x01, 0x85, 0x51, 0x0d, 0xc1, 0xd2, 0xae, 0x09,
- 0xc1, 0xd2, 0xc2, 0x1c, 0xc1, 0xd2, 0xd6, 0x16, 0xc1, 0xd2, 0xea, 0x06,
- 0xc1, 0xd2, 0xfe, 0x90, 0x01, 0x87, 0x9b, 0x01, 0xd3, 0x12, 0x0a, 0xc1,
- 0xd3, 0x26, 0x04, 0xc1, 0xd3, 0x3a, 0x12, 0xc1, 0xd3, 0x4e, 0x1b, 0xc1,
- 0xd3, 0x62, 0x14, 0xc1, 0xd3, 0x6e, 0x19, 0xc1, 0xd3, 0x82, 0x18, 0x41,
- 0xd3, 0x92, 0xc4, 0x22, 0x71, 0x08, 0xfa, 0xc9, 0xc5, 0x01, 0xdb, 0x08,
- 0xfa, 0xc1, 0x15, 0xc1, 0xd3, 0xa6, 0x08, 0xc1, 0xd3, 0xb2, 0x16, 0xc1,
- 0xd3, 0xbe, 0xc3, 0x01, 0xb4, 0x08, 0xfa, 0x89, 0xc4, 0x15, 0xd3, 0x08,
- 0xfa, 0x80, 0xcd, 0x7d, 0xf1, 0x08, 0xfa, 0x79, 0xc5, 0x01, 0x7b, 0x08,
- 0xfa, 0x60, 0x12, 0xc1, 0xd3, 0xca, 0xcd, 0x25, 0xeb, 0x08, 0xfa, 0x28,
- 0xcc, 0x88, 0xe4, 0x08, 0xfa, 0x69, 0xc5, 0x01, 0xd1, 0x08, 0xfa, 0x38,
- 0xc8, 0x3c, 0xa3, 0x08, 0xfa, 0x51, 0x96, 0x08, 0xfa, 0x48, 0x97, 0x08,
- 0xfa, 0x00, 0x8b, 0x08, 0xf9, 0xf9, 0xcb, 0x8e, 0xb1, 0x08, 0xf9, 0xa9,
- 0x83, 0x08, 0xf9, 0xa0, 0x97, 0x08, 0xf9, 0xc8, 0x8b, 0x08, 0xf9, 0xb8,
- 0xc3, 0x30, 0x4c, 0x08, 0xf9, 0x99, 0xc2, 0x00, 0xa4, 0x08, 0xf8, 0xd1,
- 0x83, 0x08, 0xf8, 0xc8, 0x83, 0x08, 0xf9, 0x89, 0xc2, 0x0c, 0x65, 0x08,
- 0xf9, 0x81, 0xc2, 0x00, 0xa4, 0x08, 0xf9, 0x78, 0xc2, 0x00, 0xa4, 0x08,
- 0xf9, 0x49, 0x83, 0x08, 0xf9, 0x40, 0xc2, 0x00, 0xa4, 0x08, 0xf9, 0x39,
- 0x83, 0x08, 0xf9, 0x30, 0x83, 0x08, 0xf9, 0x29, 0xc2, 0x00, 0xc1, 0x08,
- 0xf9, 0x01, 0xc2, 0x1d, 0x5f, 0x08, 0xf8, 0xd9, 0xc2, 0x01, 0x29, 0x08,
- 0xf8, 0xb0, 0xc2, 0x00, 0xa4, 0x08, 0xf9, 0x21, 0x83, 0x08, 0xf9, 0x19,
- 0x06, 0x41, 0xd3, 0xd6, 0xc2, 0x00, 0xa4, 0x08, 0xf9, 0x11, 0x83, 0x08,
- 0xf9, 0x09, 0x16, 0x41, 0xd3, 0xe0, 0xc2, 0x00, 0xa4, 0x08, 0xf8, 0xc1,
- 0x83, 0x08, 0xf8, 0xb8, 0xc2, 0x00, 0xa4, 0x08, 0xf8, 0xa9, 0x83, 0x08,
- 0xf8, 0xa0, 0xc2, 0x00, 0xa4, 0x08, 0xf8, 0x99, 0x83, 0x08, 0xf8, 0x90,
- 0x8b, 0x08, 0xf8, 0x79, 0x83, 0x08, 0xf8, 0x28, 0x8e, 0x08, 0xf8, 0x63,
- 0x01, 0xd3, 0xea, 0x94, 0x08, 0xf8, 0x52, 0x01, 0xd3, 0xee, 0x97, 0x08,
- 0xf8, 0x48, 0x8b, 0x08, 0xf8, 0x38, 0x97, 0x08, 0x85, 0xc1, 0x8b, 0x08,
- 0x85, 0xb1, 0x83, 0x08, 0x85, 0x80, 0x97, 0x08, 0x85, 0xa0, 0x8b, 0x08,
- 0x85, 0x90, 0xc5, 0x8b, 0x1f, 0x08, 0x86, 0x09, 0xcc, 0x25, 0xea, 0x08,
- 0x85, 0xf8, 0xc5, 0x35, 0x00, 0x08, 0x85, 0xd1, 0x42, 0x03, 0x32, 0xc1,
- 0xd3, 0xf2, 0xc8, 0x11, 0x40, 0x08, 0x84, 0x09, 0xcb, 0x1e, 0x17, 0x08,
- 0x84, 0x00, 0x83, 0x08, 0x85, 0x71, 0xc2, 0x0c, 0x65, 0x08, 0x85, 0x69,
- 0xc2, 0x00, 0xa4, 0x08, 0x85, 0x60, 0x83, 0x08, 0x85, 0x49, 0xc2, 0x00,
- 0xa4, 0x08, 0x84, 0xe0, 0xc2, 0x00, 0xa4, 0x08, 0x85, 0x31, 0x83, 0x08,
- 0x85, 0x28, 0xc2, 0x00, 0xa4, 0x08, 0x85, 0x21, 0x83, 0x08, 0x85, 0x18,
- 0x83, 0x08, 0x85, 0x11, 0xc2, 0x00, 0xc1, 0x08, 0x84, 0xe9, 0xc2, 0x1d,
- 0x5f, 0x08, 0x84, 0xb1, 0xc2, 0x01, 0x29, 0x08, 0x84, 0x88, 0xc2, 0x00,
- 0xa4, 0x08, 0x85, 0x09, 0x83, 0x08, 0x85, 0x01, 0x06, 0x41, 0xd3, 0xfe,
- 0xc2, 0x00, 0xa4, 0x08, 0x84, 0xf9, 0x83, 0x08, 0x84, 0xf1, 0x16, 0x41,
- 0xd4, 0x0e, 0xc2, 0x00, 0xa4, 0x08, 0x84, 0xa9, 0x83, 0x08, 0x84, 0xa0,
- 0xc2, 0x00, 0xa4, 0x08, 0x84, 0x99, 0x83, 0x08, 0x84, 0x90, 0xc2, 0x00,
- 0xa4, 0x08, 0x84, 0x81, 0x83, 0x08, 0x84, 0x78, 0xc2, 0x00, 0xa4, 0x08,
- 0x84, 0x71, 0x83, 0x08, 0x84, 0x68, 0x97, 0x08, 0x84, 0x61, 0x8b, 0x08,
- 0x84, 0x51, 0x83, 0x08, 0x84, 0x20, 0x97, 0x08, 0x84, 0x40, 0x8b, 0x08,
- 0x84, 0x30, 0xc7, 0xc5, 0x3f, 0x05, 0x49, 0x68, 0x87, 0x05, 0x49, 0x48,
- 0x87, 0x05, 0x49, 0x30, 0x91, 0x05, 0x49, 0x29, 0x87, 0x05, 0x49, 0x18,
- 0x83, 0x05, 0x48, 0xf9, 0xc2, 0x00, 0xb3, 0x05, 0x48, 0x98, 0xc2, 0x00,
- 0xa4, 0x05, 0x48, 0xf1, 0x83, 0x05, 0x48, 0x90, 0xc2, 0x00, 0xa4, 0x05,
- 0x48, 0xb1, 0x83, 0x05, 0x48, 0xa8, 0x83, 0x05, 0x48, 0xa1, 0xc2, 0x1d,
- 0x5f, 0x05, 0x48, 0x89, 0xc2, 0x01, 0x29, 0x05, 0x48, 0x68, 0xc2, 0x00,
- 0xa4, 0x05, 0x48, 0x79, 0x83, 0x05, 0x48, 0x70, 0xc2, 0x00, 0xa4, 0x05,
- 0x48, 0x59, 0x83, 0x05, 0x48, 0x50, 0xc4, 0x18, 0x83, 0x05, 0x48, 0x39,
- 0xc2, 0x26, 0x51, 0x05, 0x48, 0x30, 0xc3, 0x0c, 0x5b, 0x05, 0x48, 0x29,
- 0xc3, 0x06, 0x9e, 0x05, 0x48, 0x20, 0xc4, 0x04, 0x5e, 0x05, 0x48, 0x19,
- 0xc2, 0x01, 0x47, 0x05, 0x48, 0x10, 0x15, 0xc1, 0xd4, 0x18, 0x05, 0xc1,
- 0xd4, 0x24, 0x03, 0xc1, 0xd4, 0x30, 0x42, 0x03, 0x32, 0xc1, 0xd4, 0x3c,
- 0xc5, 0x35, 0x00, 0x00, 0x65, 0xe1, 0xcb, 0x91, 0x66, 0x00, 0x67, 0x89,
- 0xcb, 0x98, 0x9e, 0x00, 0x67, 0x90, 0x45, 0x02, 0x92, 0xc1, 0xd4, 0x48,
- 0xc9, 0x34, 0xc3, 0x00, 0x66, 0xa8, 0x03, 0xc1, 0xd4, 0xb7, 0x8b, 0x00,
- 0x65, 0xfb, 0x01, 0xd4, 0xc3, 0x97, 0x00, 0x66, 0x0b, 0x01, 0xd4, 0xc7,
- 0x48, 0xac, 0xc1, 0xc1, 0xd4, 0xcb, 0x87, 0x00, 0x66, 0x33, 0x01, 0xd4,
- 0xd9, 0x91, 0x00, 0x66, 0x52, 0x01, 0xd4, 0xdd, 0xc4, 0x15, 0xd3, 0x00,
- 0x67, 0x31, 0xc3, 0x01, 0xb4, 0x00, 0x67, 0x39, 0x16, 0xc1, 0xd4, 0xe1,
- 0x08, 0xc1, 0xd4, 0xed, 0x15, 0xc1, 0xd4, 0xf9, 0xc5, 0x01, 0xdb, 0x00,
- 0x67, 0x71, 0xc4, 0x22, 0x71, 0x00, 0x67, 0x78, 0x11, 0xc1, 0xd5, 0x05,
- 0x0e, 0xc1, 0xd5, 0x18, 0x06, 0xc1, 0xd5, 0x2d, 0x15, 0xc1, 0xd5, 0x3d,
- 0x0a, 0xc1, 0xd5, 0x87, 0x16, 0xc1, 0xd5, 0x99, 0x0f, 0xc1, 0xd5, 0xbe,
- 0x07, 0xc1, 0xd5, 0xd0, 0x05, 0xc1, 0xd5, 0xf3, 0x0b, 0xc1, 0xd6, 0x0b,
- 0xc5, 0xa4, 0x4b, 0x01, 0x78, 0x89, 0x12, 0xc1, 0xd6, 0x15, 0x19, 0xc1,
- 0xd6, 0x2b, 0x14, 0xc1, 0xd6, 0x45, 0x03, 0xc1, 0xd6, 0x5f, 0x09, 0xc1,
- 0xd6, 0x77, 0x04, 0xc1, 0xd6, 0x90, 0x10, 0xc1, 0xd6, 0xaa, 0x08, 0xc1,
- 0xd6, 0xb4, 0x42, 0x24, 0x58, 0xc1, 0xd6, 0xd6, 0xc3, 0x1b, 0x3b, 0x01,
- 0x7b, 0x21, 0x18, 0xc1, 0xd6, 0xe0, 0xc6, 0xcb, 0x58, 0x01, 0x7e, 0x40,
- 0x46, 0x00, 0x6b, 0x41, 0xd6, 0xec, 0x49, 0xb4, 0x62, 0xc1, 0xd6, 0xf8,
- 0xc2, 0x00, 0x9f, 0x0b, 0x7a, 0x50, 0x44, 0x1b, 0x3f, 0xc1, 0xd7, 0x04,
- 0x15, 0xc1, 0xd7, 0x20, 0x87, 0x0b, 0x7a, 0x41, 0x42, 0x2d, 0x41, 0xc1,
- 0xd7, 0x34, 0xc2, 0x00, 0xb3, 0x0b, 0x78, 0x71, 0x83, 0x0b, 0x78, 0x50,
- 0x83, 0x0b, 0x78, 0x83, 0x01, 0xd7, 0x3e, 0x1b, 0xc1, 0xd7, 0x44, 0x09,
- 0xc1, 0xd7, 0x4e, 0x10, 0xc1, 0xd7, 0x58, 0xc2, 0x00, 0xa4, 0x0b, 0x78,
- 0x88, 0x1c, 0xc1, 0xd7, 0x62, 0x42, 0x2d, 0x41, 0xc1, 0xd7, 0x78, 0xc2,
- 0x0b, 0xc6, 0x0b, 0x78, 0x79, 0x83, 0x0b, 0x78, 0x58, 0xc2, 0x15, 0x1d,
- 0x0b, 0x7a, 0x31, 0x83, 0x0b, 0x79, 0xd1, 0xc2, 0x0c, 0x65, 0x0b, 0x79,
- 0xa1, 0xc2, 0x00, 0xa4, 0x0b, 0x79, 0x98, 0xc2, 0x00, 0x5b, 0x0b, 0x7a,
- 0x29, 0x83, 0x0b, 0x78, 0x08, 0xc2, 0x00, 0xa4, 0x0b, 0x7a, 0x21, 0x83,
- 0x0b, 0x79, 0x30, 0x8a, 0x0b, 0x7a, 0x19, 0x47, 0x7e, 0xdc, 0x41, 0xd7,
- 0x82, 0x1c, 0xc1, 0xd7, 0x92, 0x15, 0xc1, 0xd7, 0xa0, 0x83, 0x0b, 0x79,
- 0xd9, 0xc2, 0x00, 0xa4, 0x0b, 0x79, 0xa8, 0x16, 0xc1, 0xd7, 0xaa, 0xc4,
- 0xe3, 0xf7, 0x0b, 0x79, 0x89, 0xc2, 0x04, 0x2b, 0x0b, 0x79, 0x01, 0xc3,
- 0x3a, 0x96, 0x0b, 0x78, 0x91, 0xc2, 0x03, 0xa4, 0x0b, 0x78, 0x10, 0x0a,
- 0xc1, 0xd7, 0xb8, 0x83, 0x0b, 0x78, 0xf8, 0xc2, 0x01, 0x29, 0x0b, 0x79,
- 0x11, 0x83, 0x0b, 0x79, 0x08, 0x0a, 0xc1, 0xd7, 0xc2, 0xc2, 0x1d, 0x5f,
- 0x0b, 0x78, 0xb9, 0x83, 0x0b, 0x78, 0xb0, 0xc2, 0x00, 0x67, 0x0b, 0x78,
- 0x49, 0x83, 0x0b, 0x78, 0x40, 0xc2, 0x00, 0xa4, 0x0b, 0x78, 0x29, 0x83,
- 0x0b, 0x78, 0x20, 0xc2, 0x00, 0xc7, 0x0b, 0x78, 0x19, 0x83, 0x0b, 0x78,
- 0x00, 0x8b, 0x0b, 0x7c, 0x39, 0xc2, 0x13, 0x91, 0x0b, 0x7b, 0xf9, 0xc2,
- 0x00, 0x35, 0x0b, 0x7b, 0x81, 0xc2, 0x01, 0xdb, 0x0b, 0x7b, 0x79, 0x97,
- 0x0b, 0x7b, 0x71, 0x83, 0x0b, 0x7b, 0x5a, 0x01, 0xd7, 0xcc, 0x91, 0x0b,
- 0x7b, 0x2b, 0x01, 0xd7, 0xd3, 0x89, 0x0b, 0x7c, 0x21, 0xc2, 0x00, 0x35,
- 0x0b, 0x7b, 0x49, 0x97, 0x0b, 0x7b, 0x41, 0x8b, 0x0b, 0x7b, 0x39, 0x87,
- 0x0b, 0x7b, 0x31, 0x83, 0x0b, 0x7b, 0x12, 0x01, 0xd7, 0xd9, 0x83, 0x0b,
- 0x7c, 0x29, 0x8b, 0x0b, 0x7b, 0xd1, 0x94, 0x0b, 0x7b, 0xbb, 0x01, 0xd7,
- 0xe0, 0x90, 0x0b, 0x7a, 0xf2, 0x01, 0xd7, 0xe4, 0x07, 0xc1, 0xd7, 0xe8,
- 0x89, 0x0b, 0x7c, 0x09, 0x97, 0x0b, 0x7b, 0xe1, 0x91, 0x0b, 0x7a, 0xd0,
- 0xc2, 0x03, 0x74, 0x0b, 0x7c, 0x01, 0x8b, 0x0b, 0x7b, 0x90, 0x89, 0x0b,
- 0x7b, 0xf0, 0x97, 0x0b, 0x7b, 0xd9, 0x8b, 0x0b, 0x7b, 0xc9, 0x87, 0x0b,
- 0x7b, 0x9b, 0x01, 0xd7, 0xf0, 0x90, 0x0b, 0x7a, 0xbb, 0x01, 0xd7, 0xf4,
- 0xc2, 0x4f, 0x51, 0x0b, 0x7a, 0xb1, 0x83, 0x0b, 0x7a, 0xa8, 0x94, 0x0b,
- 0x7b, 0xb0, 0x91, 0x0b, 0x7a, 0xd8, 0xca, 0x9b, 0xae, 0x0b, 0x7a, 0x99,
- 0xc7, 0xc4, 0x27, 0x0b, 0x7a, 0x90, 0xc5, 0x21, 0x12, 0x01, 0x12, 0x11,
- 0xc4, 0x00, 0xcb, 0x01, 0x10, 0x92, 0x01, 0xd7, 0xf8, 0x4e, 0x6c, 0x73,
- 0xc1, 0xd7, 0xfc, 0xcb, 0x5a, 0x67, 0x0f, 0xbd, 0x19, 0x46, 0x02, 0x31,
- 0xc1, 0xd8, 0x08, 0x04, 0xc1, 0xd8, 0x14, 0x45, 0x01, 0xf7, 0xc1, 0xd8,
- 0x20, 0x44, 0x01, 0x1e, 0xc1, 0xd8, 0x2a, 0x08, 0xc1, 0xd8, 0x34, 0xcc,
- 0x03, 0x3b, 0x01, 0x3a, 0xc9, 0x15, 0xc1, 0xd8, 0x46, 0xd2, 0x4a, 0x10,
- 0x01, 0x02, 0xf9, 0x46, 0x0e, 0xf4, 0x41, 0xd8, 0x5e, 0xc5, 0x00, 0xaa,
- 0x01, 0x72, 0x61, 0xd0, 0x0f, 0xfb, 0x01, 0x72, 0x99, 0xcd, 0x2c, 0x41,
- 0x01, 0x72, 0xa0, 0xca, 0xa1, 0x6c, 0x0b, 0x74, 0xc9, 0x4c, 0x26, 0xeb,
- 0x41, 0xd8, 0x6a, 0xc4, 0x00, 0xab, 0x0b, 0x74, 0xb9, 0x4e, 0x07, 0x18,
- 0x41, 0xd8, 0xe4, 0x16, 0xc1, 0xd9, 0x5e, 0xc3, 0x01, 0xb4, 0x0b, 0x74,
- 0x0b, 0x01, 0xd9, 0x70, 0xc4, 0x22, 0x71, 0x0b, 0x74, 0x49, 0xc5, 0x01,
- 0xdb, 0x0b, 0x74, 0x41, 0x15, 0xc1, 0xd9, 0x76, 0x08, 0xc1, 0xd9, 0x82,
- 0xc4, 0x15, 0xd3, 0x0b, 0x74, 0x00, 0xc8, 0x4c, 0xe0, 0x0b, 0x74, 0x99,
- 0x07, 0xc1, 0xd9, 0x8e, 0x15, 0xc1, 0xd9, 0x9a, 0x08, 0xc1, 0xd9, 0xa6,
- 0x16, 0x41, 0xd9, 0xb2, 0xc8, 0xb7, 0x75, 0x01, 0x1e, 0xc1, 0xc6, 0xce,
- 0x59, 0x01, 0x1e, 0xb9, 0x4a, 0xa6, 0x4e, 0x41, 0xd9, 0xc4, 0xca, 0x9d,
- 0x20, 0x01, 0x1e, 0xa1, 0xc5, 0x2b, 0xff, 0x01, 0x1e, 0x90, 0x1d, 0xc1,
- 0xd9, 0xd0, 0x1e, 0x41, 0xd9, 0xf8, 0xc3, 0x01, 0xb4, 0x0f, 0x46, 0x39,
- 0x16, 0xc1, 0xda, 0x20, 0x08, 0xc1, 0xda, 0x2c, 0x15, 0xc1, 0xda, 0x38,
- 0xc5, 0x01, 0xdb, 0x0f, 0x46, 0x71, 0xc4, 0x22, 0x71, 0x0f, 0x46, 0x78,
- 0x16, 0xc1, 0xda, 0x44, 0x47, 0x0c, 0x4b, 0xc1, 0xda, 0x4e, 0xc8, 0x32,
- 0x88, 0x0f, 0x46, 0xb0, 0x49, 0x52, 0xd7, 0xc1, 0xda, 0x58, 0x47, 0x33,
- 0xef, 0xc1, 0xda, 0x74, 0x0e, 0x41, 0xda, 0x9b, 0xcb, 0x91, 0xbe, 0x08,
- 0x4c, 0xf3, 0x01, 0xda, 0xa7, 0x47, 0x02, 0x90, 0x41, 0xda, 0xad, 0x00,
- 0x41, 0xdb, 0x0f, 0xc2, 0x01, 0x47, 0x05, 0x5f, 0x91, 0xc4, 0x04, 0x5e,
- 0x05, 0x5f, 0x98, 0xc3, 0x06, 0x9e, 0x05, 0x5f, 0xa1, 0xc3, 0x0c, 0x5b,
- 0x05, 0x5f, 0xa8, 0xc2, 0x26, 0x51, 0x05, 0x5f, 0xb1, 0xc4, 0x18, 0x83,
- 0x05, 0x5f, 0xb8, 0xc4, 0xe5, 0x73, 0x05, 0x5f, 0x51, 0xc7, 0xc6, 0xc7,
- 0x05, 0x5f, 0x49, 0xc5, 0xdf, 0xc0, 0x05, 0x5f, 0x31, 0x03, 0xc1, 0xdb,
- 0x1b, 0x0b, 0xc1, 0xdb, 0x29, 0xc4, 0x51, 0x2c, 0x05, 0x5f, 0x19, 0xc7,
- 0x44, 0x79, 0x05, 0x57, 0xa9, 0x17, 0xc1, 0xdb, 0x33, 0xc6, 0xd3, 0xb1,
- 0x05, 0x5f, 0x38, 0x8b, 0x05, 0x5e, 0x7b, 0x01, 0xdb, 0x3d, 0x10, 0xc1,
- 0xdb, 0x43, 0x16, 0xc1, 0xdb, 0x5f, 0x12, 0xc1, 0xdb, 0x72, 0x0d, 0xc1,
- 0xdb, 0x7f, 0x04, 0xc1, 0xdb, 0x8e, 0x06, 0xc1, 0xdb, 0x98, 0x09, 0xc1,
- 0xdb, 0xa8, 0x15, 0xc1, 0xdb, 0xb4, 0x42, 0x11, 0xd4, 0xc1, 0xdb, 0xc6,
- 0x91, 0x05, 0x57, 0x09, 0x87, 0x05, 0x57, 0x01, 0xc3, 0x03, 0x46, 0x05,
- 0x5e, 0xa1, 0xc5, 0xdd, 0xa9, 0x05, 0x5e, 0x89, 0xc2, 0x01, 0xbd, 0x05,
- 0x5e, 0x71, 0xc3, 0xcd, 0x54, 0x05, 0x5e, 0x69, 0xc4, 0xb5, 0xf1, 0x05,
- 0x5e, 0x61, 0xc3, 0x27, 0xb3, 0x05, 0x5e, 0x1b, 0x01, 0xdb, 0xd0, 0xc3,
- 0x00, 0xd9, 0x05, 0x5e, 0x13, 0x01, 0xdb, 0xd6, 0xc3, 0x4c, 0x27, 0x05,
- 0x5e, 0x59, 0x0c, 0x41, 0xdb, 0xdc, 0xc7, 0xc9, 0xc9, 0x0f, 0xb7, 0xa9,
- 0xc4, 0xd2, 0xbd, 0x0f, 0xb7, 0x28, 0x00, 0x41, 0xdb, 0xe8, 0xc4, 0x00,
- 0x67, 0x0f, 0xa1, 0x69, 0xc4, 0xd0, 0x0f, 0x0f, 0xd5, 0x20, 0x47, 0x02,
- 0x90, 0xc1, 0xdb, 0xfa, 0xd9, 0x1f, 0x0a, 0x05, 0x5a, 0xd8, 0x06, 0xc1,
- 0xdc, 0x40, 0x45, 0x00, 0xcb, 0xc1, 0xdc, 0x52, 0xd1, 0x50, 0x40, 0x08,
- 0xb2, 0x19, 0x4b, 0x6f, 0xcc, 0xc1, 0xdc, 0x62, 0x47, 0x02, 0x90, 0x41,
- 0xdc, 0x82, 0xc5, 0x65, 0xf1, 0x0e, 0x98, 0x01, 0x1b, 0x41, 0xdc, 0xe7,
- 0x46, 0x41, 0x9c, 0xc1, 0xdc, 0xf3, 0xd9, 0x21, 0x17, 0x08, 0xb3, 0x19,
- 0xcf, 0x62, 0x18, 0x00, 0xc0, 0x30, 0xca, 0x09, 0xe8, 0x08, 0xb3, 0x4b,
- 0x01, 0xdc, 0xf9, 0xdc, 0x14, 0xc2, 0x00, 0xc0, 0x38, 0xd5, 0x09, 0xf2,
- 0x08, 0xb3, 0x40, 0x46, 0x00, 0x6b, 0x41, 0xdc, 0xff, 0x46, 0x00, 0x6b,
- 0x41, 0xdd, 0x0b, 0xd9, 0x20, 0x68, 0x08, 0xb3, 0x11, 0x45, 0x06, 0x98,
- 0x41, 0xdd, 0x17, 0xc2, 0x00, 0xad, 0x00, 0xc1, 0x73, 0x01, 0xdd, 0x3b,
- 0x83, 0x00, 0xc1, 0x03, 0x01, 0xdd, 0x41, 0x16, 0xc1, 0xdd, 0x4d, 0x42,
- 0x11, 0xd4, 0xc1, 0xdd, 0x5d, 0x15, 0xc1, 0xdd, 0x68, 0x1c, 0xc1, 0xdd,
- 0x78, 0x0e, 0xc1, 0xdd, 0x88, 0xc3, 0x3b, 0xc7, 0x00, 0xc1, 0xf1, 0x0d,
- 0xc1, 0xdd, 0x92, 0xc2, 0x00, 0x67, 0x00, 0xc1, 0xc9, 0xc2, 0x01, 0x09,
- 0x00, 0xc1, 0xc1, 0xc2, 0x02, 0x59, 0x00, 0xc1, 0xb9, 0xc2, 0x1d, 0x5f,
- 0x00, 0xc1, 0xb1, 0xc2, 0x24, 0x58, 0x00, 0xc1, 0xa9, 0xc2, 0x0b, 0xc6,
- 0x00, 0xc1, 0x99, 0xc2, 0x01, 0x29, 0x00, 0xc1, 0x69, 0xc2, 0x0f, 0x60,
- 0x00, 0xc1, 0x61, 0xc2, 0x03, 0xa4, 0x00, 0xc1, 0x59, 0xc2, 0x00, 0xde,
- 0x00, 0xc1, 0x51, 0xc2, 0x00, 0xc1, 0x00, 0xc1, 0x41, 0x87, 0x00, 0xc1,
- 0x0b, 0x01, 0xdd, 0x9c, 0x97, 0x00, 0xc1, 0x23, 0x01, 0xdd, 0xa0, 0x91,
- 0x00, 0xc1, 0x1b, 0x01, 0xdd, 0xa4, 0x8b, 0x00, 0xc1, 0x10, 0x57, 0x2a,
- 0xaa, 0xc1, 0xdd, 0xa8, 0xc8, 0x38, 0x83, 0x00, 0xc0, 0x29, 0xc8, 0x11,
- 0xdd, 0x00, 0xc0, 0x18, 0xc9, 0x11, 0xdc, 0x00, 0xc0, 0x49, 0xc5, 0x00,
- 0xaa, 0x00, 0xc0, 0x40, 0xc3, 0x0e, 0x41, 0x00, 0xc0, 0x21, 0xc3, 0x00,
- 0xac, 0x00, 0xc0, 0x10, 0xca, 0xa4, 0xf0, 0x0f, 0xa5, 0xc1, 0xc3, 0x32,
- 0xc7, 0x0f, 0xa5, 0x80, 0x48, 0x07, 0x17, 0xc1, 0xdd, 0xb8, 0x12, 0xc1,
- 0xde, 0x59, 0xca, 0xa6, 0xda, 0x0e, 0xb8, 0xd1, 0xcc, 0x8b, 0x48, 0x0e,
- 0xb8, 0xc1, 0xcc, 0x89, 0x80, 0x0e, 0xb8, 0xb9, 0xce, 0x12, 0x11, 0x0e,
- 0xb8, 0xb1, 0x46, 0x04, 0x73, 0xc1, 0xde, 0x6b, 0xc5, 0xdc, 0x87, 0x0e,
- 0xb7, 0xd8, 0x15, 0xc1, 0xdf, 0x0b, 0x46, 0x06, 0x97, 0xc1, 0xdf, 0x17,
- 0x48, 0x07, 0x17, 0xc1, 0xdf, 0x3b, 0x47, 0xca, 0xe1, 0xc1, 0xdf, 0xdc,
- 0x12, 0xc1, 0xe0, 0x0a, 0xca, 0xa6, 0xda, 0x0e, 0xb7, 0x01, 0xcc, 0x8b,
- 0x48, 0x0e, 0xb6, 0xf1, 0xcc, 0x89, 0x80, 0x0e, 0xb6, 0xe9, 0xce, 0x12,
- 0x11, 0x0e, 0xb6, 0xe1, 0xc5, 0xdc, 0x87, 0x0e, 0xb6, 0x09, 0x48, 0xbf,
- 0xad, 0x41, 0xe0, 0x1c, 0x46, 0x06, 0x97, 0xc1, 0xe0, 0x28, 0x46, 0x04,
- 0x73, 0xc1, 0xe0, 0x4c, 0x48, 0x07, 0x17, 0x41, 0xe0, 0xb4, 0x4a, 0x42,
- 0x4f, 0xc1, 0xe1, 0x1c, 0x46, 0x0a, 0x0f, 0x41, 0xe1, 0x3a, 0x46, 0x06,
- 0x97, 0xc1, 0xe1, 0x46, 0x46, 0x04, 0x73, 0xc1, 0xe1, 0x6a, 0x48, 0x07,
- 0x17, 0x41, 0xe1, 0xd2, 0x47, 0xbf, 0xae, 0xc1, 0xe2, 0x1e, 0xcf, 0x33,
- 0x91, 0x01, 0x3e, 0x68, 0x44, 0x02, 0x14, 0xc1, 0xe2, 0x2a, 0xcd, 0x2a,
- 0x7c, 0x01, 0x3e, 0x58, 0xd5, 0x34, 0x39, 0x01, 0x3f, 0x71, 0x46, 0x02,
- 0x31, 0xc1, 0xe2, 0x42, 0xd4, 0x3c, 0x79, 0x01, 0x3f, 0x51, 0xcd, 0x09,
- 0x51, 0x01, 0x3f, 0x40, 0xc3, 0x00, 0x4c, 0x0e, 0x97, 0x90, 0xc4, 0x13,
- 0xf2, 0x0e, 0x97, 0x88, 0xc4, 0x13, 0xf2, 0x0e, 0x97, 0x80, 0xc5, 0x13,
- 0xf1, 0x0e, 0x97, 0x79, 0xc2, 0x00, 0x7b, 0x0e, 0x97, 0x28, 0xc4, 0x13,
- 0xf2, 0x0e, 0x97, 0x70, 0xc6, 0x53, 0x71, 0x0e, 0x97, 0x69, 0xc3, 0x04,
- 0x5f, 0x0e, 0x97, 0x18, 0xc4, 0x21, 0x31, 0x0e, 0x97, 0x61, 0x91, 0x0e,
- 0x97, 0x10, 0x91, 0x08, 0xf7, 0xb1, 0x87, 0x08, 0xf7, 0xa9, 0x97, 0x08,
- 0xf7, 0xa1, 0x8b, 0x08, 0xf7, 0x98, 0x83, 0x08, 0xf7, 0x89, 0xc2, 0x0c,
- 0x65, 0x08, 0xf7, 0x81, 0xc2, 0x04, 0x41, 0x08, 0xf7, 0x79, 0xc2, 0x00,
- 0xc7, 0x08, 0xf7, 0x71, 0xc2, 0x02, 0x59, 0x08, 0xf7, 0x69, 0xc2, 0x1d,
- 0x5f, 0x08, 0xf7, 0x61, 0x10, 0xc1, 0xe2, 0x4e, 0xc2, 0x24, 0x58, 0x08,
- 0xf7, 0x51, 0xc2, 0x03, 0x40, 0x08, 0xf7, 0x49, 0xc2, 0x0b, 0xc6, 0x08,
- 0xf7, 0x39, 0xc2, 0x00, 0xb3, 0x08, 0xf7, 0x31, 0xc2, 0x00, 0xad, 0x08,
- 0xf7, 0x29, 0xc2, 0x00, 0xde, 0x08, 0xf7, 0x21, 0xc2, 0x03, 0xa4, 0x08,
- 0xf7, 0x19, 0xc2, 0x01, 0x29, 0x08, 0xf7, 0x09, 0xc2, 0x04, 0x2b, 0x08,
- 0xf7, 0x00, 0xc4, 0x22, 0x71, 0x08, 0xea, 0xc9, 0xc5, 0x01, 0xdb, 0x08,
- 0xea, 0xc1, 0x15, 0xc1, 0xe2, 0x5e, 0x08, 0xc1, 0xe2, 0x6a, 0x16, 0xc1,
- 0xe2, 0x76, 0xc3, 0x01, 0xb4, 0x08, 0xea, 0x89, 0xc4, 0x15, 0xd3, 0x08,
- 0xea, 0x80, 0xc6, 0xd3, 0x75, 0x08, 0xea, 0x39, 0xc4, 0xb8, 0x17, 0x08,
- 0xea, 0x30, 0xc5, 0x1e, 0x24, 0x08, 0xea, 0x29, 0x4a, 0x6f, 0xcd, 0x41,
- 0xe2, 0x82, 0xc7, 0xca, 0x5c, 0x08, 0xea, 0x21, 0xc6, 0x1e, 0x17, 0x08,
- 0xea, 0x19, 0xc5, 0x35, 0x00, 0x08, 0xea, 0x11, 0xc7, 0x44, 0x79, 0x08,
- 0xea, 0x09, 0xc8, 0x11, 0x40, 0x08, 0xea, 0x00, 0x16, 0xc1, 0xe2, 0xa2,
- 0x0c, 0xc1, 0xe2, 0xb6, 0x0d, 0xc1, 0xe2, 0xc6, 0x0e, 0xc1, 0xe2, 0xd6,
- 0xc2, 0x00, 0xa4, 0x08, 0xe9, 0x61, 0x15, 0xc1, 0xe2, 0xe0, 0xc2, 0x04,
- 0x41, 0x08, 0xe9, 0x41, 0xc2, 0x02, 0x59, 0x08, 0xe9, 0x31, 0xc2, 0x1d,
- 0x5f, 0x08, 0xe9, 0x29, 0xc2, 0x00, 0xad, 0x08, 0xe9, 0x21, 0x04, 0xc1,
- 0xe2, 0xf0, 0x12, 0xc1, 0xe2, 0xfa, 0x10, 0xc1, 0xe3, 0x04, 0x06, 0xc1,
- 0xe3, 0x1a, 0x05, 0xc1, 0xe3, 0x28, 0x09, 0xc1, 0xe3, 0x32, 0x83, 0x08,
- 0xe8, 0x03, 0x01, 0xe3, 0x3c, 0x91, 0x08, 0xe8, 0x49, 0x87, 0x08, 0xe8,
- 0x31, 0x97, 0x08, 0xe8, 0x23, 0x01, 0xe3, 0x48, 0x8b, 0x08, 0xe8, 0x12,
- 0x01, 0xe3, 0x4c, 0x44, 0x00, 0xcc, 0xc1, 0xe3, 0x50, 0x50, 0x5f, 0xd2,
- 0x41, 0xe3, 0x5c, 0x91, 0x08, 0xe5, 0xa1, 0x87, 0x08, 0xe5, 0x99, 0x97,
- 0x08, 0xe5, 0x91, 0x8b, 0x08, 0xe5, 0x89, 0xc2, 0x14, 0x40, 0x08, 0xe5,
- 0x80, 0x83, 0x08, 0xe4, 0x79, 0xc2, 0x00, 0xa4, 0x08, 0xe4, 0x71, 0x15,
- 0xc1, 0xe3, 0xb6, 0xc2, 0x00, 0xc7, 0x08, 0xe4, 0x59, 0xc2, 0x02, 0x59,
- 0x08, 0xe4, 0x51, 0xc2, 0x1d, 0x5f, 0x08, 0xe4, 0x49, 0xc2, 0x00, 0x02,
- 0x08, 0xe4, 0x41, 0x1c, 0xc1, 0xe3, 0xc0, 0xc2, 0x01, 0x09, 0x08, 0xe4,
- 0x29, 0x06, 0xc1, 0xe3, 0xca, 0x16, 0xc1, 0xe3, 0xd4, 0xc2, 0x00, 0xad,
- 0x08, 0xe4, 0x09, 0xc2, 0x00, 0xde, 0x08, 0xe4, 0x01, 0x12, 0xc1, 0xe3,
- 0xe2, 0x10, 0xc1, 0xe3, 0xec, 0xc2, 0x24, 0x58, 0x08, 0xe3, 0xc1, 0x05,
- 0xc1, 0xe3, 0xfc, 0xc2, 0x01, 0x29, 0x08, 0xe3, 0xa1, 0x0d, 0x41, 0xe4,
- 0x06, 0xd8, 0x24, 0xd8, 0x01, 0x35, 0x39, 0xc4, 0x00, 0xcb, 0x01, 0x35,
- 0x30, 0x05, 0xc1, 0xe4, 0x10, 0x03, 0xc1, 0xe4, 0x22, 0x18, 0xc1, 0xe4,
- 0x2e, 0xc4, 0x08, 0x50, 0x00, 0x6a, 0x78, 0x18, 0xc1, 0xe4, 0x38, 0x83,
- 0x00, 0x68, 0x2b, 0x01, 0xe4, 0x48, 0x8b, 0x00, 0x68, 0x3b, 0x01, 0xe4,
- 0x5a, 0x97, 0x00, 0x68, 0x4b, 0x01, 0xe4, 0x5e, 0x87, 0x00, 0x68, 0x73,
- 0x01, 0xe4, 0x62, 0x91, 0x00, 0x68, 0x93, 0x01, 0xe4, 0x66, 0x0d, 0xc1,
- 0xe4, 0x6a, 0x09, 0xc1, 0xe4, 0x74, 0x10, 0xc1, 0xe4, 0x7e, 0x05, 0xc1,
- 0xe4, 0x92, 0x0c, 0xc1, 0xe4, 0x9a, 0x16, 0xc1, 0xe4, 0xa4, 0x06, 0xc1,
- 0xe4, 0xb2, 0x12, 0xc1, 0xe4, 0xc6, 0x04, 0xc1, 0xe4, 0xd0, 0xc2, 0x00,
- 0xad, 0x00, 0x69, 0x71, 0xc2, 0x1d, 0x5f, 0x00, 0x69, 0x79, 0x14, 0xc1,
- 0xe4, 0xda, 0x0e, 0xc1, 0xe4, 0xe4, 0x15, 0xc1, 0xe4, 0xec, 0xc2, 0x00,
- 0xa4, 0x00, 0x69, 0xc8, 0x03, 0xc1, 0xe4, 0xfc, 0x8b, 0x00, 0x69, 0xfb,
- 0x01, 0xe5, 0x08, 0x97, 0x00, 0x6a, 0x0b, 0x01, 0xe5, 0x0c, 0x48, 0xac,
- 0xc1, 0xc1, 0xe5, 0x10, 0x87, 0x00, 0x6a, 0x33, 0x01, 0xe5, 0x1e, 0x91,
- 0x00, 0x6a, 0x52, 0x01, 0xe5, 0x22, 0x44, 0x01, 0xb4, 0xc1, 0xe5, 0x26,
- 0x46, 0x04, 0x5d, 0x41, 0xe5, 0x4c, 0x45, 0x06, 0x98, 0xc1, 0xe5, 0x64,
- 0xc8, 0xbb, 0x15, 0x00, 0x6b, 0xc8, 0xc3, 0x0a, 0xe1, 0x00, 0x6b, 0x81,
- 0x44, 0x01, 0xb4, 0x41, 0xe5, 0x88, 0xcb, 0x94, 0x26, 0x08, 0x57, 0xb1,
- 0xc8, 0x01, 0x59, 0x08, 0x57, 0xa9, 0x42, 0x01, 0x4a, 0xc1, 0xe5, 0x94,
- 0xc7, 0x2f, 0xec, 0x08, 0x57, 0x89, 0xc4, 0x0e, 0xa8, 0x08, 0x57, 0x80,
- 0xc3, 0x01, 0xb4, 0x08, 0x57, 0x5b, 0x01, 0xe5, 0xa1, 0x16, 0xc1, 0xe5,
- 0xa7, 0xc4, 0x0c, 0x5a, 0x08, 0x57, 0x60, 0xc5, 0x01, 0x62, 0x08, 0x57,
- 0x31, 0xc5, 0x00, 0x95, 0x08, 0x57, 0x28, 0x16, 0xc1, 0xe5, 0xb3, 0x15,
- 0xc1, 0xe5, 0xc5, 0xc4, 0x5d, 0xe2, 0x08, 0x57, 0x09, 0x13, 0xc1, 0xe5,
- 0xd5, 0x1a, 0xc1, 0xe5, 0xe1, 0xc2, 0x17, 0x9f, 0x08, 0x56, 0xe1, 0xc2,
- 0x00, 0x27, 0x08, 0x56, 0xd9, 0x03, 0xc1, 0xe5, 0xed, 0xc3, 0x1f, 0xd8,
- 0x08, 0x56, 0xb9, 0xc3, 0x0b, 0x0e, 0x08, 0x56, 0xb1, 0x06, 0xc1, 0xe5,
- 0xff, 0xc6, 0xd0, 0x5d, 0x08, 0x56, 0x99, 0x0d, 0xc1, 0xe6, 0x0b, 0xc4,
- 0x4b, 0x98, 0x08, 0x56, 0x79, 0xc2, 0x01, 0xf0, 0x08, 0x56, 0x33, 0x01,
- 0xe6, 0x17, 0x0c, 0xc1, 0xe6, 0x1d, 0x1c, 0xc1, 0xe6, 0x29, 0xc3, 0x78,
- 0xa9, 0x08, 0x56, 0x39, 0x09, 0xc1, 0xe6, 0x35, 0x04, 0x41, 0xe6, 0x41,
- 0xd8, 0x23, 0x70, 0x0f, 0xab, 0xa1, 0xc6, 0xd0, 0x69, 0x0f, 0xc9, 0xa8,
- 0xc6, 0xd0, 0x75, 0x0f, 0xa3, 0x99, 0xca, 0x9b, 0xfe, 0x0f, 0xa3, 0x90,
- 0x03, 0xc1, 0xe6, 0x4d, 0xc3, 0x3c, 0x63, 0x00, 0x42, 0xb9, 0xc8, 0xbe,
- 0x2d, 0x00, 0x42, 0xb1, 0x0b, 0xc1, 0xe6, 0x94, 0xc7, 0xbe, 0x2e, 0x00,
- 0x42, 0x29, 0xc5, 0xd5, 0xd9, 0x00, 0x42, 0x00, 0xcc, 0x88, 0x00, 0x08,
- 0x8b, 0xb1, 0x46, 0x02, 0x91, 0x41, 0xe6, 0x9c, 0xcb, 0x25, 0xeb, 0x08,
- 0x8b, 0xa9, 0xc9, 0xaf, 0x52, 0x08, 0x8b, 0x98, 0xc5, 0x08, 0x9b, 0x0f,
- 0x81, 0x49, 0xc8, 0xb8, 0xad, 0x0f, 0x80, 0x11, 0xcb, 0x90, 0x11, 0x0f,
- 0x80, 0x30, 0xc8, 0xba, 0x85, 0x0f, 0x80, 0x01, 0x48, 0xac, 0xb9, 0x41,
- 0xe6, 0xf6, 0xc9, 0xa9, 0xe8, 0x0f, 0x80, 0x09, 0x46, 0xd0, 0x09, 0xc1,
- 0xe7, 0x00, 0x48, 0xb9, 0x15, 0xc1, 0xe7, 0x0a, 0xc5, 0x0c, 0xe1, 0x0f,
- 0x81, 0x31, 0xc5, 0xd6, 0xa1, 0x0f, 0x81, 0x38, 0xc9, 0xab, 0x6b, 0x0f,
- 0x80, 0x19, 0x47, 0xb9, 0xf6, 0x41, 0xe7, 0x14, 0x46, 0xb9, 0xf7, 0xc1,
- 0xe7, 0x1e, 0xc5, 0xd8, 0xae, 0x0f, 0x81, 0x18, 0x46, 0xcd, 0xed, 0xc1,
- 0xe7, 0x28, 0x48, 0xbc, 0x55, 0x41, 0xe7, 0x32, 0x47, 0xcb, 0xa5, 0xc1,
- 0xe7, 0x3c, 0x47, 0xcb, 0x5f, 0x41, 0xe7, 0x46, 0xc2, 0x00, 0xe0, 0x0f,
- 0x81, 0x59, 0xc4, 0x9a, 0xec, 0x0f, 0x81, 0x20, 0x15, 0xc1, 0xe7, 0x50,
- 0xc8, 0x9e, 0x6c, 0x0f, 0x9d, 0xcb, 0x01, 0xe7, 0x5c, 0xc4, 0x22, 0x4b,
- 0x0f, 0x9d, 0xa8, 0xca, 0xa5, 0xb8, 0x01, 0x33, 0x79, 0xcc, 0x8d, 0x28,
- 0x01, 0x33, 0x71, 0xc9, 0xb3, 0x5d, 0x01, 0x33, 0x68, 0x48, 0x1e, 0xad,
- 0xc1, 0xe7, 0x62, 0xcf, 0x65, 0x7e, 0x0f, 0x9d, 0xb0, 0x00, 0x41, 0xe7,
- 0x6f, 0x14, 0xc1, 0xe7, 0x7b, 0xc2, 0x00, 0xa4, 0x08, 0x95, 0x31, 0xc2,
- 0x0c, 0x65, 0x08, 0x95, 0x29, 0xc2, 0x04, 0x41, 0x08, 0x95, 0x21, 0xc2,
- 0x00, 0xc7, 0x08, 0x95, 0x19, 0xc2, 0x1d, 0x5f, 0x08, 0x95, 0x09, 0xc2,
- 0x00, 0xad, 0x08, 0x95, 0x01, 0x04, 0xc1, 0xe7, 0x8b, 0x12, 0xc1, 0xe7,
- 0x95, 0x10, 0xc1, 0xe7, 0x9f, 0x06, 0xc1, 0xe7, 0xaf, 0x16, 0xc1, 0xe7,
- 0xbd, 0x0c, 0xc1, 0xe7, 0xcb, 0x05, 0xc1, 0xe7, 0xd5, 0x09, 0xc1, 0xe7,
- 0xdf, 0x0d, 0xc1, 0xe7, 0xe9, 0x87, 0x08, 0x94, 0x19, 0x83, 0x08, 0x94,
- 0x01, 0x8b, 0x08, 0x94, 0x09, 0x97, 0x08, 0x94, 0x10, 0xc4, 0x18, 0x83,
- 0x0b, 0x53, 0x39, 0xc2, 0x26, 0x51, 0x0b, 0x53, 0x30, 0xc3, 0x0c, 0x5b,
- 0x0b, 0x53, 0x29, 0xc3, 0x06, 0x9e, 0x0b, 0x53, 0x20, 0xc4, 0x04, 0x5e,
- 0x0b, 0x53, 0x19, 0xc2, 0x01, 0x47, 0x0b, 0x53, 0x10, 0xa2, 0x05, 0x53,
- 0xe9, 0x9f, 0x05, 0x53, 0xe0, 0x44, 0x03, 0x10, 0xc1, 0xe7, 0xf3, 0xc6,
- 0x02, 0x61, 0x00, 0x82, 0x58, 0xc7, 0x11, 0x41, 0x00, 0x81, 0xb1, 0xc3,
- 0x92, 0xe0, 0x00, 0x81, 0xd0, 0xc5, 0x44, 0x7b, 0x00, 0x81, 0xc1, 0xc4,
- 0x0f, 0x7c, 0x00, 0x81, 0xc8, 0x9e, 0x00, 0x83, 0x49, 0x9f, 0x00, 0x83,
- 0x51, 0xa0, 0x00, 0x83, 0x59, 0xa1, 0x00, 0x83, 0x61, 0xa2, 0x00, 0x83,
- 0x68, 0x9e, 0x00, 0x84, 0xd1, 0xa0, 0x00, 0x84, 0xd8, 0x45, 0xcb, 0x66,
- 0xc1, 0xe8, 0x05, 0xcd, 0x77, 0xb2, 0x00, 0x82, 0x70, 0xc3, 0x01, 0xb4,
- 0x00, 0x84, 0xf1, 0xcb, 0x0f, 0xfb, 0x00, 0x84, 0xf8, 0xc2, 0x01, 0x47,
- 0x00, 0x84, 0x91, 0xc4, 0x04, 0x5e, 0x00, 0x84, 0x98, 0xc3, 0x06, 0x9e,
- 0x00, 0x84, 0xa1, 0xc3, 0x0c, 0x5b, 0x00, 0x84, 0xa8, 0xc2, 0x26, 0x51,
- 0x00, 0x84, 0xb1, 0xc4, 0x18, 0x83, 0x00, 0x84, 0xb8, 0xc7, 0xcb, 0x66,
- 0x05, 0x53, 0xd1, 0x97, 0x00, 0x81, 0x50, 0xc2, 0x00, 0xa4, 0x00, 0x80,
- 0x0b, 0x01, 0xe8, 0x17, 0x83, 0x00, 0x80, 0x00, 0x83, 0x00, 0x80, 0x83,
- 0x01, 0xe8, 0x1d, 0x16, 0xc1, 0xe8, 0x23, 0xc2, 0x00, 0xa4, 0x00, 0x80,
- 0x88, 0x0a, 0xc1, 0xe8, 0x2d, 0x83, 0x00, 0x80, 0xf1, 0xc2, 0x0c, 0x65,
- 0x00, 0x82, 0x89, 0xcd, 0x7c, 0xe0, 0x00, 0x83, 0x08, 0x83, 0x00, 0x80,
- 0x11, 0xc2, 0x00, 0xa4, 0x00, 0x80, 0x19, 0xc7, 0xbd, 0xee, 0x00, 0x81,
- 0xf8, 0xc2, 0x01, 0x29, 0x00, 0x80, 0x21, 0xc2, 0x1d, 0x5f, 0x00, 0x80,
- 0x49, 0x10, 0xc1, 0xe8, 0x3a, 0x83, 0x00, 0x80, 0xa0, 0x83, 0x00, 0x80,
- 0x29, 0xc2, 0x00, 0xa4, 0x00, 0x80, 0x30, 0x83, 0x00, 0x80, 0x39, 0xc2,
- 0x00, 0xa4, 0x00, 0x80, 0x40, 0x06, 0xc1, 0xe8, 0x44, 0x83, 0x00, 0x80,
- 0x91, 0xc2, 0x00, 0xa4, 0x00, 0x80, 0x98, 0x83, 0x00, 0x80, 0xa9, 0xc2,
- 0x00, 0xa4, 0x00, 0x80, 0xb0, 0x83, 0x00, 0x80, 0xb9, 0xc2, 0x00, 0xa4,
- 0x00, 0x80, 0xc0, 0x83, 0x00, 0x80, 0xc9, 0x43, 0x00, 0xb7, 0x41, 0xe8,
- 0x4e, 0x83, 0x00, 0x80, 0xd9, 0xcf, 0x65, 0x60, 0x00, 0x84, 0x70, 0x83,
- 0x00, 0x80, 0xe1, 0xc2, 0x00, 0xc7, 0x00, 0x81, 0x00, 0x83, 0x00, 0x80,
- 0xe9, 0x51, 0x26, 0xfc, 0x41, 0xe8, 0x64, 0x8b, 0x00, 0x81, 0x20, 0x97,
- 0x00, 0x81, 0x30, 0x51, 0x53, 0x0a, 0x41, 0xe8, 0x70, 0x94, 0x00, 0x82,
- 0x93, 0x01, 0xe8, 0x82, 0x8e, 0x00, 0x82, 0xa2, 0x01, 0xe8, 0x86, 0xc4,
- 0x18, 0x83, 0x05, 0x4f, 0xb9, 0xc2, 0x26, 0x51, 0x05, 0x4f, 0xb0, 0xc3,
- 0x0c, 0x5b, 0x05, 0x4f, 0xa9, 0xc3, 0x06, 0x9e, 0x05, 0x4f, 0xa0, 0xc4,
- 0x04, 0x5e, 0x05, 0x4f, 0x99, 0xc2, 0x01, 0x47, 0x05, 0x4f, 0x90, 0xc5,
- 0xd5, 0xac, 0x00, 0x84, 0xe2, 0x01, 0xe8, 0x8a, 0x94, 0x00, 0x82, 0xb8,
- 0x8e, 0x00, 0x82, 0xc8, 0xc2, 0x14, 0x40, 0x00, 0x84, 0x19, 0x87, 0x00,
- 0x84, 0x23, 0x01, 0xe8, 0x8e, 0xc7, 0xc6, 0xdc, 0x00, 0x84, 0x30, 0xc2,
- 0x1d, 0x5f, 0x00, 0x81, 0xd9, 0xc2, 0x02, 0x59, 0x00, 0x81, 0xe1, 0xc2,
- 0x01, 0x09, 0x00, 0x81, 0xe9, 0xc2, 0x00, 0xa4, 0x00, 0x81, 0xf0, 0xc2,
- 0x00, 0xc1, 0x00, 0x82, 0xf1, 0xc2, 0x00, 0xad, 0x00, 0x82, 0xf9, 0xc2,
- 0x00, 0xc7, 0x00, 0x83, 0x00, 0xca, 0xa8, 0x10, 0x0f, 0xd2, 0x53, 0x01,
- 0xe8, 0x94, 0xc5, 0xb5, 0xaf, 0x0f, 0xd0, 0x0b, 0x01, 0xe8, 0x9a, 0x0d,
- 0xc1, 0xe8, 0xa0, 0xc6, 0xd1, 0xf5, 0x0f, 0xd0, 0x1b, 0x01, 0xe8, 0xb2,
- 0xc4, 0xe0, 0xaf, 0x0f, 0xd0, 0x13, 0x01, 0xe8, 0xb8, 0xc4, 0xe5, 0xdf,
- 0x0f, 0xd0, 0x2b, 0x01, 0xe8, 0xbe, 0x47, 0x41, 0x9b, 0x41, 0xe8, 0xc4,
- 0x0b, 0xc1, 0xe8, 0xe0, 0xca, 0xa7, 0x70, 0x08, 0xa2, 0xf0, 0x18, 0xc1,
- 0xe8, 0xec, 0xc2, 0x00, 0xa4, 0x08, 0xa1, 0xa1, 0x15, 0xc1, 0xe8, 0xf8,
- 0x10, 0xc1, 0xe9, 0x08, 0x06, 0xc1, 0xe9, 0x20, 0x16, 0xc1, 0xe9, 0x2e,
- 0x0c, 0xc1, 0xe9, 0x3c, 0x05, 0xc1, 0xe9, 0x46, 0x09, 0xc1, 0xe9, 0x50,
- 0x0d, 0xc1, 0xe9, 0x5a, 0x83, 0x08, 0xa0, 0x03, 0x01, 0xe9, 0x64, 0x91,
- 0x08, 0xa0, 0x61, 0x87, 0x08, 0xa0, 0x51, 0x97, 0x08, 0xa0, 0x23, 0x01,
- 0xe9, 0x70, 0x8b, 0x08, 0xa0, 0x13, 0x01, 0xe9, 0x74, 0x12, 0xc1, 0xe9,
- 0x78, 0x04, 0xc1, 0xe9, 0x82, 0x0f, 0xc1, 0xe9, 0x8c, 0xc2, 0x1d, 0x5f,
- 0x08, 0xa1, 0x59, 0x14, 0xc1, 0xe9, 0x96, 0x0e, 0xc1, 0xe9, 0xa0, 0xc2,
- 0x01, 0x09, 0x08, 0xa1, 0x80, 0x46, 0x06, 0xf2, 0xc1, 0xe9, 0xaa, 0x45,
- 0x06, 0x98, 0xc1, 0xe9, 0xb6, 0xc4, 0x1c, 0xd0, 0x08, 0xa2, 0x58, 0x03,
- 0xc1, 0xe9, 0xda, 0x91, 0x08, 0xa2, 0x01, 0x87, 0x08, 0xa1, 0xf1, 0x48,
- 0xac, 0xc1, 0xc1, 0xe9, 0xe6, 0x97, 0x08, 0xa1, 0xc3, 0x01, 0xe9, 0xf4,
- 0x8b, 0x08, 0xa1, 0xb2, 0x01, 0xe9, 0xf8, 0xc8, 0xbf, 0x25, 0x00, 0xce,
- 0xf3, 0x01, 0xe9, 0xfc, 0x16, 0xc1, 0xea, 0x00, 0x46, 0x06, 0x97, 0xc1,
- 0xea, 0x0c, 0x47, 0x02, 0x90, 0xc1, 0xea, 0x30, 0x4b, 0x6f, 0xcc, 0x41,
- 0xea, 0x42, 0xc9, 0xab, 0x98, 0x0f, 0x98, 0xd1, 0xc6, 0x00, 0x71, 0x0f,
- 0x98, 0x88, 0xca, 0x9c, 0x44, 0x01, 0x3a, 0x71, 0xc2, 0x16, 0xaa, 0x0f,
- 0x8c, 0x79, 0xc2, 0x00, 0x03, 0x0f, 0x8c, 0x71, 0xc2, 0x0c, 0x65, 0x0f,
- 0x8c, 0x69, 0xc2, 0x03, 0xa4, 0x0f, 0x8c, 0x61, 0xc2, 0x02, 0x70, 0x0f,
- 0x8c, 0x59, 0x55, 0x07, 0x11, 0xc1, 0xea, 0x62, 0xcd, 0x2c, 0x41, 0x0f,
- 0xde, 0x20, 0xca, 0xa6, 0x1c, 0x01, 0x27, 0xf9, 0x47, 0x33, 0xef, 0xc1,
- 0xea, 0xca, 0x55, 0x07, 0x11, 0xc1, 0xea, 0xe0, 0xc8, 0x01, 0xe7, 0x0f,
- 0xbe, 0xb1, 0xc6, 0x07, 0x09, 0x0f, 0xbe, 0xc0, 0xc5, 0x0d, 0xbc, 0x0f,
- 0xdd, 0xe9, 0xdc, 0x02, 0x0b, 0x0f, 0xdd, 0xf1, 0xc7, 0x3f, 0x2e, 0x0f,
- 0xdd, 0xf8, 0xd6, 0x31, 0x53, 0x01, 0x14, 0x49, 0xd4, 0x3b, 0xd9, 0x01,
- 0x14, 0x40, 0xe0, 0x07, 0x47, 0x01, 0x12, 0x38, 0xca, 0x37, 0x0e, 0x01,
- 0x13, 0xa9, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x88, 0xca, 0x37, 0x0e, 0x01,
- 0x13, 0xa1, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x80, 0xc4, 0x18, 0x83, 0x0f,
- 0x27, 0xb9, 0xc2, 0x26, 0x51, 0x0f, 0x27, 0xb0, 0xc3, 0x0c, 0x5b, 0x0f,
- 0x27, 0xa9, 0xc3, 0x06, 0x9e, 0x0f, 0x27, 0xa0, 0xc4, 0x04, 0x5e, 0x0f,
- 0x27, 0x99, 0xc2, 0x01, 0x47, 0x0f, 0x27, 0x90, 0xc2, 0x03, 0xc7, 0x0f,
- 0x27, 0x51, 0xc2, 0x02, 0x92, 0x0f, 0x27, 0x49, 0x90, 0x0f, 0x27, 0x43,
- 0x01, 0xeb, 0x48, 0x8f, 0x0f, 0x27, 0x39, 0x89, 0x0f, 0x27, 0x30, 0x94,
- 0x0f, 0x27, 0x19, 0x8d, 0x0f, 0x27, 0x11, 0x86, 0x0f, 0x27, 0x09, 0x85,
- 0x0f, 0x27, 0x00, 0x8a, 0x0f, 0x26, 0xf1, 0x96, 0x0f, 0x26, 0xe9, 0xc2,
- 0x00, 0x52, 0x0f, 0x26, 0xe1, 0x95, 0x0f, 0x26, 0xd8, 0x8a, 0x0f, 0x26,
- 0xc9, 0xc2, 0x15, 0x1c, 0x0f, 0x26, 0xc1, 0x84, 0x0f, 0x26, 0xb9, 0x8c,
- 0x0f, 0x26, 0xb0, 0xc2, 0x00, 0x35, 0x0f, 0x26, 0xa1, 0x9b, 0x0f, 0x26,
- 0x99, 0x8e, 0x0f, 0x26, 0x91, 0x92, 0x0f, 0x26, 0x88, 0xcf, 0x64, 0x34,
- 0x08, 0xcf, 0x21, 0x03, 0xc1, 0xeb, 0x4c, 0x91, 0x08, 0xce, 0xe1, 0x87,
- 0x08, 0xce, 0xd1, 0xc9, 0xac, 0xc1, 0x08, 0xce, 0xb3, 0x01, 0xeb, 0x58,
- 0x97, 0x08, 0xce, 0xa3, 0x01, 0xeb, 0x5c, 0x8b, 0x08, 0xce, 0x92, 0x01,
- 0xeb, 0x60, 0xc7, 0xc4, 0x35, 0x08, 0xcf, 0x11, 0x03, 0xc1, 0xeb, 0x64,
- 0x42, 0x03, 0x32, 0x41, 0xeb, 0x70, 0x14, 0xc1, 0xeb, 0x7c, 0x0e, 0xc1,
- 0xeb, 0x86, 0xc2, 0x00, 0xa4, 0x08, 0xce, 0x71, 0x15, 0xc1, 0xeb, 0x90,
- 0x18, 0xc1, 0xeb, 0xa0, 0xc2, 0x1d, 0x5f, 0x08, 0xce, 0x39, 0xc2, 0x00,
- 0xad, 0x08, 0xce, 0x31, 0x04, 0xc1, 0xeb, 0xad, 0x12, 0xc1, 0xeb, 0xb7,
- 0x10, 0xc1, 0xeb, 0xc1, 0x06, 0xc1, 0xeb, 0xd7, 0x16, 0xc1, 0xeb, 0xe5,
- 0x0c, 0xc1, 0xeb, 0xf3, 0x05, 0xc1, 0xeb, 0xfd, 0x09, 0xc1, 0xec, 0x07,
- 0x0d, 0xc1, 0xec, 0x11, 0x83, 0x08, 0xcd, 0x03, 0x01, 0xec, 0x1b, 0x91,
- 0x08, 0xcd, 0x61, 0x87, 0x08, 0xcd, 0x51, 0x97, 0x08, 0xcd, 0x23, 0x01,
- 0xec, 0x27, 0x8b, 0x08, 0xcd, 0x12, 0x01, 0xec, 0x2b, 0xc3, 0x01, 0xb4,
- 0x08, 0x45, 0x3b, 0x01, 0xec, 0x2f, 0x16, 0xc1, 0xec, 0x35, 0x08, 0x41,
- 0xec, 0x45, 0x16, 0xc1, 0xec, 0x51, 0x15, 0xc1, 0xec, 0x5d, 0x46, 0x2b,
- 0x13, 0xc1, 0xec, 0x67, 0xc4, 0x5d, 0xe2, 0x08, 0x44, 0xd9, 0xc4, 0xbf,
- 0xb9, 0x08, 0x44, 0xd1, 0xc2, 0x00, 0x27, 0x08, 0x44, 0xc1, 0x03, 0xc1,
- 0xec, 0x9d, 0xc3, 0x1f, 0xd8, 0x08, 0x44, 0xa9, 0xc3, 0x0b, 0x0e, 0x08,
- 0x44, 0x99, 0xc6, 0xd0, 0x5d, 0x08, 0x44, 0x89, 0xc4, 0xe2, 0x57, 0x08,
- 0x44, 0x79, 0xc4, 0x4b, 0x98, 0x08, 0x44, 0x69, 0xc2, 0x01, 0xf0, 0x08,
- 0x44, 0x3b, 0x01, 0xec, 0xa9, 0xc5, 0x4b, 0x92, 0x08, 0x44, 0x49, 0xc3,
- 0x78, 0xa9, 0x08, 0x44, 0x41, 0xc6, 0x45, 0xf6, 0x08, 0x44, 0x29, 0xc5,
- 0xa1, 0x94, 0x08, 0x44, 0x21, 0xc4, 0xe4, 0x8f, 0x08, 0x44, 0x18, 0x45,
- 0x1f, 0xc8, 0xc1, 0xec, 0xaf, 0x45, 0x16, 0xbc, 0xc1, 0xec, 0xda, 0x46,
- 0x06, 0x91, 0x41, 0xed, 0x05, 0xde, 0x0e, 0xca, 0x0f, 0xaa, 0x19, 0x4a,
- 0x02, 0xe7, 0x41, 0xed, 0x1d, 0xe0, 0x09, 0x27, 0x01, 0x3d, 0x88, 0xcc,
- 0x23, 0x34, 0x01, 0x17, 0x60, 0x46, 0x1e, 0xfc, 0xc1, 0xed, 0x23, 0xc3,
- 0x00, 0xcc, 0x00, 0x05, 0x60, 0xc3, 0x37, 0x19, 0x01, 0x15, 0x69, 0xc4,
- 0x21, 0x13, 0x01, 0x12, 0x08, 0x43, 0x0a, 0x08, 0xc1, 0xed, 0x2f, 0xce,
- 0x69, 0x8a, 0x01, 0x12, 0x49, 0xd6, 0x30, 0xa3, 0x01, 0x12, 0x21, 0xcc,
- 0x87, 0xe8, 0x01, 0x10, 0x48, 0xca, 0x37, 0x0e, 0x01, 0x13, 0x69, 0xc5,
- 0x07, 0x62, 0x01, 0x13, 0x00, 0x86, 0x0f, 0xae, 0x51, 0xc2, 0x00, 0x63,
- 0x0f, 0xae, 0x48, 0xd6, 0x2c, 0x2b, 0x0f, 0xa6, 0xa0, 0x87, 0x0f, 0x09,
- 0x58, 0x91, 0x0f, 0x09, 0x48, 0x83, 0x0f, 0x09, 0x28, 0xc2, 0x02, 0x59,
- 0x0f, 0x09, 0x19, 0x83, 0x0f, 0x08, 0xb0, 0xc2, 0x00, 0xc7, 0x0f, 0x09,
- 0x09, 0x83, 0x0f, 0x08, 0xd0, 0xc2, 0x00, 0xc7, 0x0f, 0x09, 0x01, 0x83,
- 0x0f, 0x08, 0x00, 0x8a, 0x0f, 0x08, 0xf8, 0x12, 0xc1, 0xed, 0x3b, 0xc2,
- 0x0f, 0x60, 0x0f, 0x08, 0xc9, 0x16, 0xc1, 0xed, 0x45, 0xc2, 0x02, 0x59,
- 0x0f, 0x08, 0x89, 0xc2, 0x1d, 0x5f, 0x0f, 0x08, 0x81, 0xc2, 0x03, 0x40,
- 0x0f, 0x08, 0x61, 0xc2, 0x04, 0x2b, 0x0f, 0x08, 0x39, 0x83, 0x0f, 0x08,
- 0x28, 0xc2, 0x00, 0xc7, 0x0f, 0x08, 0xe9, 0x83, 0x0f, 0x08, 0x78, 0xc2,
- 0x1d, 0x5f, 0x0f, 0x08, 0xd9, 0x83, 0x0f, 0x08, 0x30, 0xc2, 0x96, 0xd0,
- 0x0f, 0x08, 0xa1, 0x83, 0x0f, 0x08, 0x19, 0xc2, 0x0c, 0x65, 0x0f, 0x08,
- 0x08, 0xcc, 0x8c, 0xd4, 0x0f, 0x09, 0xd9, 0xc6, 0xd0, 0xdb, 0x0f, 0x09,
- 0xd1, 0xc8, 0x7c, 0x5e, 0x0f, 0x09, 0xc9, 0xc5, 0xdd, 0xc2, 0x0f, 0x09,
- 0xc1, 0xc6, 0x15, 0x35, 0x0f, 0x09, 0xb8, 0x08, 0xc1, 0xed, 0x55, 0x07,
- 0xc1, 0xed, 0x85, 0x04, 0xc1, 0xed, 0xc5, 0x26, 0xc1, 0xee, 0x05, 0x25,
- 0xc1, 0xee, 0x45, 0x24, 0xc1, 0xee, 0x85, 0x23, 0xc1, 0xee, 0xc5, 0x22,
- 0xc1, 0xef, 0x05, 0x21, 0xc1, 0xef, 0x45, 0x20, 0xc1, 0xef, 0x85, 0x1f,
- 0xc1, 0xef, 0xc5, 0x1e, 0xc1, 0xf0, 0x05, 0x1d, 0xc1, 0xf0, 0x45, 0x06,
- 0xc1, 0xf0, 0x85, 0x05, 0xc1, 0xf0, 0xc5, 0x03, 0x41, 0xf1, 0x05, 0x08,
- 0xc1, 0xf1, 0x45, 0x07, 0xc1, 0xf1, 0x85, 0x06, 0xc1, 0xf1, 0xc5, 0x05,
- 0xc1, 0xf2, 0x05, 0x04, 0xc1, 0xf2, 0x45, 0x03, 0xc1, 0xf2, 0x85, 0x26,
- 0xc1, 0xf2, 0xc5, 0x25, 0xc1, 0xf3, 0x05, 0x24, 0x41, 0xf3, 0x45, 0x42,
- 0x00, 0x4b, 0xc1, 0xf3, 0x85, 0xd1, 0x53, 0x70, 0x01, 0x24, 0xa1, 0xcc,
- 0x48, 0x54, 0x01, 0x24, 0x88, 0xd1, 0x50, 0x1e, 0x01, 0x24, 0xc9, 0xcf,
- 0x6a, 0x79, 0x01, 0x24, 0x90, 0xd2, 0x48, 0x4e, 0x01, 0x24, 0xc1, 0x0b,
- 0x41, 0xf3, 0x91, 0xd0, 0x5a, 0x12, 0x01, 0x24, 0xb1, 0xd1, 0x56, 0x07,
- 0x01, 0x24, 0xa8, 0xc4, 0x18, 0x83, 0x00, 0x3e, 0x39, 0xc2, 0x26, 0x51,
- 0x00, 0x3e, 0x30, 0xc3, 0x0c, 0x5b, 0x00, 0x3e, 0x29, 0xc3, 0x06, 0x9e,
- 0x00, 0x3e, 0x20, 0xc4, 0x04, 0x5e, 0x00, 0x3e, 0x19, 0xc2, 0x01, 0x47,
- 0x00, 0x3e, 0x10, 0x44, 0x42, 0xef, 0xc1, 0xf3, 0x9d, 0x83, 0x00, 0x3e,
- 0xb0, 0xc2, 0x1d, 0x5f, 0x00, 0x3f, 0x13, 0x01, 0xf3, 0xaf, 0x83, 0x00,
- 0x3f, 0x1a, 0x01, 0xf3, 0xb5, 0xc2, 0x02, 0x59, 0x00, 0x3e, 0xd1, 0x83,
- 0x00, 0x3e, 0xc8, 0xc8, 0xc0, 0x85, 0x00, 0x3e, 0x88, 0x91, 0x00, 0x3e,
- 0x78, 0x87, 0x00, 0x3e, 0x58, 0xcb, 0x58, 0xa2, 0x00, 0x3f, 0x89, 0xc8,
- 0xaf, 0xa4, 0x00, 0x3f, 0x81, 0xc9, 0x3b, 0x75, 0x00, 0x3f, 0x79, 0xcf,
- 0x63, 0x9e, 0x00, 0x3f, 0x70, 0xcb, 0x58, 0xa2, 0x00, 0x3f, 0x69, 0xc8,
- 0xaf, 0xa4, 0x00, 0x3f, 0x61, 0xc9, 0x3b, 0x75, 0x00, 0x3f, 0x58, 0x46,
- 0x00, 0x6b, 0x41, 0xf3, 0xbb, 0x95, 0x0f, 0xae, 0x68, 0xc3, 0x22, 0x4c,
- 0x0f, 0xae, 0x2b, 0x01, 0xf3, 0xd3, 0xc3, 0x16, 0xbd, 0x0f, 0xd5, 0xc8,
- 0xc5, 0x12, 0x4f, 0x01, 0x1e, 0xd1, 0x45, 0xdc, 0x73, 0x41, 0xf3, 0xd9,
- 0xc4, 0xa6, 0xcc, 0x0f, 0x99, 0xf1, 0xc5, 0xdb, 0x7e, 0x0f, 0x99, 0xe8,
- 0x44, 0x00, 0xeb, 0x41, 0xf3, 0xe3, 0x21, 0xc1, 0xf4, 0x04, 0x20, 0xc1,
- 0xf4, 0x1b, 0x1f, 0xc1, 0xf4, 0x49, 0x1e, 0xc1, 0xf4, 0x7a, 0x1d, 0x41,
- 0xf4, 0xb1, 0x1f, 0xc1, 0xf4, 0xdb, 0x1e, 0xc1, 0xf4, 0xf6, 0x1d, 0x41,
- 0xf5, 0x2a, 0x20, 0xc1, 0xf5, 0x54, 0x1f, 0xc1, 0xf5, 0x76, 0x1e, 0xc1,
- 0xf5, 0x9e, 0x1d, 0x41, 0xf5, 0xcc, 0xc9, 0x79, 0x49, 0x09, 0xa2, 0x21,
- 0xc5, 0xb2, 0x89, 0x09, 0xa2, 0x10, 0xa5, 0x09, 0x8c, 0x39, 0xa4, 0x09,
- 0x8c, 0x31, 0xa3, 0x09, 0x8c, 0x23, 0x01, 0xf5, 0xfc, 0xa2, 0x09, 0x8c,
- 0x19, 0xa1, 0x09, 0x8c, 0x11, 0xa0, 0x09, 0x8c, 0x09, 0x9f, 0x09, 0x8c,
- 0x01, 0x9e, 0x09, 0x8b, 0xf8, 0xa5, 0x09, 0x8d, 0x61, 0xa4, 0x09, 0x8d,
- 0x59, 0xa3, 0x09, 0x8d, 0x4b, 0x01, 0xf6, 0x00, 0xa2, 0x09, 0x8d, 0x41,
- 0xa1, 0x09, 0x8d, 0x39, 0xa0, 0x09, 0x8d, 0x31, 0x9f, 0x09, 0x8d, 0x23,
- 0x01, 0xf6, 0x04, 0x9e, 0x09, 0x8d, 0x18, 0x22, 0xc1, 0xf6, 0x08, 0x21,
- 0xc1, 0xf6, 0x1b, 0x20, 0xc1, 0xf6, 0x4c, 0x1f, 0xc1, 0xf6, 0x7d, 0x1e,
- 0xc1, 0xf6, 0xa8, 0x1d, 0x41, 0xf6, 0xd3, 0xd0, 0x5f, 0x52, 0x09, 0xa1,
- 0xf9, 0xc9, 0x5f, 0x59, 0x09, 0xa1, 0xe9, 0xc7, 0xc1, 0xf0, 0x09, 0xa1,
- 0xc0, 0x20, 0xc1, 0xf6, 0xfa, 0x1f, 0xc1, 0xf7, 0x1d, 0x1e, 0xc1, 0xf7,
- 0x4b, 0x1d, 0x41, 0xf7, 0x79, 0xd0, 0x5f, 0x52, 0x09, 0xa1, 0xf1, 0xc9,
- 0x5f, 0x59, 0x09, 0xa1, 0xe1, 0xc7, 0xc1, 0xf0, 0x09, 0xa1, 0xb8, 0xa6,
- 0x09, 0x82, 0xc9, 0xa5, 0x09, 0x82, 0xc1, 0xa4, 0x09, 0x82, 0xb9, 0xa3,
- 0x09, 0x82, 0xb1, 0xa2, 0x09, 0x82, 0xa3, 0x01, 0xf7, 0xa3, 0xa1, 0x09,
- 0x82, 0x99, 0xa0, 0x09, 0x82, 0x91, 0x9f, 0x09, 0x82, 0x89, 0x9e, 0x09,
- 0x82, 0x80, 0xc6, 0x07, 0x3a, 0x09, 0xa1, 0xdb, 0x01, 0xf7, 0xa7, 0xc3,
- 0x03, 0x27, 0x09, 0xa1, 0xd3, 0x01, 0xf7, 0xab, 0xc6, 0x00, 0x50, 0x09,
- 0xa1, 0xc8, 0x1e, 0xc1, 0xf7, 0xaf, 0x1d, 0x41, 0xf7, 0xcd, 0x22, 0xc1,
- 0xf7, 0xf7, 0x21, 0xc1, 0xf8, 0x02, 0x20, 0xc1, 0xf8, 0x2a, 0x1f, 0xc1,
- 0xf8, 0x5b, 0x1e, 0xc1, 0xf8, 0x8f, 0x1d, 0x41, 0xf8, 0xbd, 0x1e, 0xc1,
- 0xf8, 0xea, 0xc2, 0xe8, 0x02, 0x09, 0x9d, 0x6b, 0x01, 0xf9, 0x18, 0x20,
- 0xc1, 0xf9, 0x1c, 0x1f, 0xc1, 0xf9, 0x50, 0x1d, 0x41, 0xf9, 0x84, 0x20,
- 0xc1, 0xf9, 0xb1, 0x1f, 0xc1, 0xf9, 0xbd, 0x1e, 0xc1, 0xf9, 0xe5, 0x1d,
- 0x41, 0xfa, 0x0d, 0xc2, 0xe6, 0x7a, 0x09, 0x82, 0x79, 0x23, 0xc1, 0xfa,
- 0x34, 0x22, 0xc1, 0xfa, 0x5c, 0x21, 0xc1, 0xfa, 0x84, 0x20, 0xc1, 0xfa,
- 0xb8, 0x1f, 0xc1, 0xfa, 0xe3, 0x1e, 0xc1, 0xfb, 0x0b, 0x1d, 0x41, 0xfb,
- 0x39, 0xa3, 0x09, 0xa0, 0x23, 0x01, 0xfb, 0x63, 0xa2, 0x09, 0x9f, 0xd3,
- 0x01, 0xfb, 0x83, 0xa1, 0x09, 0x9f, 0xc9, 0xa0, 0x09, 0x9f, 0xc1, 0x9f,
- 0x09, 0x9f, 0xb9, 0x9e, 0x09, 0x9f, 0xb1, 0x9d, 0x09, 0x9f, 0xa8, 0xa6,
- 0x09, 0x9f, 0xa1, 0xa5, 0x09, 0x9f, 0x99, 0xa4, 0x09, 0x9f, 0x91, 0xa3,
- 0x09, 0x9f, 0x89, 0xa2, 0x09, 0x9f, 0x7b, 0x01, 0xfb, 0xa7, 0xa1, 0x09,
- 0x9f, 0x6b, 0x01, 0xfb, 0xab, 0xa0, 0x09, 0x9f, 0x53, 0x01, 0xfb, 0xaf,
- 0x9f, 0x09, 0x9f, 0x2b, 0x01, 0xfb, 0xb7, 0x9e, 0x09, 0x9f, 0x20, 0x83,
- 0x09, 0x9e, 0xe0, 0x83, 0x09, 0x9e, 0xd0, 0x83, 0x09, 0x9e, 0xb8, 0x84,
- 0x09, 0x9e, 0xa1, 0x83, 0x09, 0x9e, 0x98, 0x9f, 0x09, 0x9b, 0x09, 0x9e,
- 0x09, 0x9b, 0x01, 0x9d, 0x09, 0x9a, 0xf8, 0xa6, 0x09, 0x9a, 0xf1, 0xa5,
- 0x09, 0x9a, 0xe9, 0xa4, 0x09, 0x9a, 0xe1, 0xa3, 0x09, 0x9a, 0xd9, 0xa2,
- 0x09, 0x9a, 0xd1, 0xa1, 0x09, 0x9a, 0xc9, 0xa0, 0x09, 0x9a, 0xc1, 0x9f,
- 0x09, 0x9a, 0xb3, 0x01, 0xfb, 0xc7, 0x9e, 0x09, 0x9a, 0xa9, 0x9d, 0x09,
- 0x9a, 0xa0, 0xa6, 0x09, 0x9a, 0x93, 0x01, 0xfb, 0xcb, 0xa5, 0x09, 0x9a,
- 0x89, 0xa4, 0x09, 0x9a, 0x81, 0xa3, 0x09, 0x9a, 0x79, 0xa2, 0x09, 0x9a,
- 0x71, 0xa1, 0x09, 0x9a, 0x69, 0xa0, 0x09, 0x9a, 0x5b, 0x01, 0xfb, 0xcf,
- 0x9f, 0x09, 0x9a, 0x51, 0x9e, 0x09, 0x9a, 0x49, 0x9d, 0x09, 0x9a, 0x40,
- 0xa6, 0x09, 0x9a, 0x39, 0xa5, 0x09, 0x9a, 0x31, 0xa4, 0x09, 0x9a, 0x29,
- 0xa3, 0x09, 0x9a, 0x21, 0xa2, 0x09, 0x9a, 0x19, 0xa1, 0x09, 0x9a, 0x11,
- 0xa0, 0x09, 0x9a, 0x09, 0x9f, 0x09, 0x9a, 0x01, 0x9e, 0x09, 0x99, 0xf9,
- 0x9d, 0x09, 0x99, 0xf0, 0xa6, 0x09, 0x99, 0xe9, 0xa5, 0x09, 0x99, 0xe1,
- 0xa4, 0x09, 0x99, 0xd9, 0xa3, 0x09, 0x99, 0xc3, 0x01, 0xfb, 0xd3, 0xa2,
- 0x09, 0x99, 0xb9, 0xa1, 0x09, 0x99, 0xb1, 0xa0, 0x09, 0x99, 0xa9, 0x9f,
- 0x09, 0x99, 0xa1, 0x9e, 0x09, 0x99, 0x98, 0xa3, 0x09, 0x98, 0x31, 0xa2,
- 0x09, 0x98, 0x29, 0xa1, 0x09, 0x98, 0x21, 0xa0, 0x09, 0x98, 0x19, 0x9f,
- 0x09, 0x98, 0x11, 0x9e, 0x09, 0x98, 0x09, 0x9d, 0x09, 0x98, 0x00, 0xa6,
- 0x09, 0x97, 0xf9, 0xa5, 0x09, 0x97, 0xf1, 0xa4, 0x09, 0x97, 0xe9, 0xa3,
- 0x09, 0x97, 0xe1, 0xa2, 0x09, 0x97, 0xd3, 0x01, 0xfb, 0xdb, 0xa1, 0x09,
- 0x97, 0xc9, 0xa0, 0x09, 0x97, 0xc1, 0x9f, 0x09, 0x97, 0xb9, 0x9e, 0x09,
- 0x97, 0xb1, 0x9d, 0x09, 0x97, 0xa8, 0xa6, 0x09, 0x97, 0xa1, 0xa5, 0x09,
- 0x97, 0x99, 0xa4, 0x09, 0x97, 0x91, 0xa3, 0x09, 0x97, 0x7b, 0x01, 0xfb,
- 0xdf, 0xa2, 0x09, 0x97, 0x71, 0xa1, 0x09, 0x97, 0x69, 0xa0, 0x09, 0x97,
- 0x61, 0x9f, 0x09, 0x97, 0x59, 0x9e, 0x09, 0x97, 0x51, 0x9d, 0x09, 0x97,
- 0x48, 0xa6, 0x09, 0x97, 0x41, 0xa5, 0x09, 0x97, 0x39, 0xa4, 0x09, 0x97,
- 0x2b, 0x01, 0xfb, 0xe7, 0xa3, 0x09, 0x97, 0x21, 0xa2, 0x09, 0x97, 0x19,
- 0xa1, 0x09, 0x97, 0x03, 0x01, 0xfb, 0xeb, 0xa0, 0x09, 0x96, 0xf9, 0x9f,
- 0x09, 0x96, 0xf1, 0x9e, 0x09, 0x96, 0xe9, 0x9d, 0x09, 0x96, 0xe0, 0xa6,
- 0x09, 0x96, 0xd9, 0xa5, 0x09, 0x96, 0xd1, 0xa4, 0x09, 0x96, 0xc9, 0xa3,
- 0x09, 0x96, 0xbb, 0x01, 0xfb, 0xf3, 0xa2, 0x09, 0x96, 0xb1, 0xa1, 0x09,
- 0x96, 0xa9, 0xa0, 0x09, 0x96, 0xa1, 0x9f, 0x09, 0x96, 0x93, 0x01, 0xfb,
- 0xf7, 0x9e, 0x09, 0x96, 0x88, 0xa6, 0x09, 0x96, 0x81, 0xa5, 0x09, 0x96,
- 0x79, 0xa4, 0x09, 0x96, 0x71, 0xa3, 0x09, 0x96, 0x69, 0xa2, 0x09, 0x96,
- 0x61, 0xa1, 0x09, 0x96, 0x59, 0xa0, 0x09, 0x96, 0x51, 0x9f, 0x09, 0x96,
- 0x49, 0x9e, 0x09, 0x96, 0x41, 0x9d, 0x09, 0x96, 0x38, 0xa6, 0x09, 0x96,
- 0x31, 0xa5, 0x09, 0x96, 0x29, 0xa4, 0x09, 0x96, 0x21, 0xa3, 0x09, 0x96,
- 0x13, 0x01, 0xfb, 0xfb, 0xa2, 0x09, 0x96, 0x09, 0xa1, 0x09, 0x96, 0x01,
- 0xa0, 0x09, 0x95, 0xf9, 0x9f, 0x09, 0x95, 0xf1, 0x9e, 0x09, 0x95, 0xe9,
- 0x9d, 0x09, 0x95, 0xda, 0x01, 0xfb, 0xff, 0xa6, 0x09, 0x95, 0xd1, 0xa5,
- 0x09, 0x95, 0xc9, 0xa4, 0x09, 0x95, 0xc1, 0xa3, 0x09, 0x95, 0xb9, 0xa2,
- 0x09, 0x95, 0xb1, 0xa1, 0x09, 0x95, 0xa9, 0xa0, 0x09, 0x95, 0x93, 0x01,
- 0xfc, 0x03, 0x9f, 0x09, 0x95, 0x83, 0x01, 0xfc, 0x0b, 0x9e, 0x09, 0x95,
- 0x78, 0x9e, 0x09, 0x95, 0x39, 0x9d, 0x09, 0x95, 0x30, 0xa6, 0x09, 0x95,
- 0x29, 0xa5, 0x09, 0x95, 0x21, 0xa4, 0x09, 0x95, 0x19, 0xa3, 0x09, 0x95,
- 0x11, 0xa2, 0x09, 0x95, 0x09, 0xa1, 0x09, 0x95, 0x01, 0xa0, 0x09, 0x94,
- 0xf3, 0x01, 0xfc, 0x0f, 0x9f, 0x09, 0x94, 0xe9, 0x9e, 0x09, 0x94, 0xda,
- 0x01, 0xfc, 0x13, 0x1f, 0xc1, 0xfc, 0x17, 0x1e, 0xc1, 0xfc, 0x26, 0x1d,
- 0x41, 0xfc, 0x57, 0xc2, 0xda, 0x7d, 0x09, 0x91, 0xa9, 0x1e, 0xc1, 0xfc,
- 0x7b, 0x1d, 0x41, 0xfc, 0xa6, 0x21, 0xc1, 0xfc, 0xcd, 0x20, 0xc1, 0xfc,
- 0xd9, 0x1f, 0xc1, 0xfd, 0x0d, 0x1e, 0xc1, 0xfd, 0x38, 0x1d, 0x41, 0xfd,
- 0x63, 0xa1, 0x09, 0x8b, 0xf1, 0xa0, 0x09, 0x8b, 0xe9, 0x9f, 0x09, 0x8b,
- 0xe1, 0x9e, 0x09, 0x8b, 0xd9, 0x9d, 0x09, 0x8b, 0xd0, 0xa6, 0x09, 0x8b,
- 0xc9, 0xa5, 0x09, 0x8b, 0xc1, 0xa4, 0x09, 0x8b, 0xb9, 0xa3, 0x09, 0x8b,
- 0xb1, 0xa2, 0x09, 0x8b, 0xa3, 0x01, 0xfd, 0x87, 0xa1, 0x09, 0x8b, 0x99,
- 0xa0, 0x09, 0x8b, 0x8b, 0x01, 0xfd, 0x8b, 0x9f, 0x09, 0x8b, 0x81, 0x9e,
- 0x09, 0x8b, 0x79, 0x9d, 0x09, 0x8b, 0x70, 0xa6, 0x09, 0x8b, 0x69, 0xa5,
- 0x09, 0x8b, 0x61, 0xa4, 0x09, 0x8b, 0x53, 0x01, 0xfd, 0x8f, 0xa3, 0x09,
- 0x8b, 0x43, 0x01, 0xfd, 0x93, 0xa2, 0x09, 0x8b, 0x39, 0xa1, 0x09, 0x8b,
- 0x31, 0xa0, 0x09, 0x8b, 0x29, 0x9f, 0x09, 0x8b, 0x21, 0x9e, 0x09, 0x8b,
- 0x19, 0x9d, 0x09, 0x8b, 0x10, 0xa6, 0x09, 0x8b, 0x09, 0xa5, 0x09, 0x8b,
- 0x01, 0xa4, 0x09, 0x8a, 0xf9, 0xa3, 0x09, 0x8a, 0xeb, 0x01, 0xfd, 0x97,
- 0xa2, 0x09, 0x8a, 0xe1, 0xa1, 0x09, 0x8a, 0xd9, 0xa0, 0x09, 0x8a, 0xd1,
- 0x9f, 0x09, 0x8a, 0xc9, 0x9e, 0x09, 0x8a, 0xc1, 0x9d, 0x09, 0x8a, 0xb2,
- 0x01, 0xfd, 0x9b, 0xa6, 0x09, 0x8a, 0xa9, 0xa5, 0x09, 0x8a, 0xa1, 0xa4,
- 0x09, 0x8a, 0x99, 0xa3, 0x09, 0x8a, 0x91, 0xa2, 0x09, 0x8a, 0x89, 0xa1,
- 0x09, 0x8a, 0x81, 0xa0, 0x09, 0x8a, 0x79, 0x9f, 0x09, 0x8a, 0x71, 0x9e,
- 0x09, 0x8a, 0x63, 0x01, 0xfd, 0x9f, 0x9d, 0x09, 0x8a, 0x58, 0xa6, 0x09,
- 0x8a, 0x51, 0xa5, 0x09, 0x8a, 0x49, 0xa4, 0x09, 0x8a, 0x33, 0x01, 0xfd,
- 0xa3, 0xa3, 0x09, 0x8a, 0x23, 0x01, 0xfd, 0xab, 0xa2, 0x09, 0x8a, 0x19,
- 0xa1, 0x09, 0x8a, 0x11, 0xa0, 0x09, 0x8a, 0x09, 0x9f, 0x09, 0x8a, 0x01,
- 0x9e, 0x09, 0x89, 0xf8, 0xa4, 0x09, 0x86, 0x4b, 0x01, 0xfd, 0xaf, 0xa3,
- 0x09, 0x86, 0x41, 0xa2, 0x09, 0x86, 0x39, 0xa1, 0x09, 0x86, 0x31, 0xa0,
- 0x09, 0x86, 0x29, 0x9f, 0x09, 0x86, 0x21, 0x9e, 0x09, 0x86, 0x19, 0x9d,
- 0x09, 0x86, 0x10, 0xa6, 0x09, 0x86, 0x09, 0xa5, 0x09, 0x86, 0x01, 0xa4,
- 0x09, 0x85, 0xf9, 0xa3, 0x09, 0x85, 0xf1, 0xa2, 0x09, 0x85, 0xe9, 0xa1,
- 0x09, 0x85, 0xdb, 0x01, 0xfd, 0xcf, 0xa0, 0x09, 0x85, 0xd1, 0x9f, 0x09,
- 0x85, 0xc3, 0x01, 0xfd, 0xd3, 0x9e, 0x09, 0x85, 0xb9, 0x9d, 0x09, 0x85,
- 0x6a, 0x01, 0xfd, 0xd7, 0xa6, 0x09, 0x85, 0x61, 0xa5, 0x09, 0x85, 0x53,
- 0x01, 0xfd, 0xfb, 0xa4, 0x09, 0x85, 0x49, 0xa3, 0x09, 0x85, 0x3b, 0x01,
- 0xfd, 0xff, 0xa2, 0x09, 0x85, 0x31, 0xa1, 0x09, 0x85, 0x29, 0xa0, 0x09,
- 0x85, 0x21, 0x9f, 0x09, 0x85, 0x19, 0x9e, 0x09, 0x85, 0x11, 0x9d, 0x09,
- 0x85, 0x08, 0xa6, 0x09, 0x85, 0x01, 0xa5, 0x09, 0x84, 0xf9, 0xa4, 0x09,
- 0x84, 0xf1, 0xa3, 0x09, 0x84, 0xe9, 0xa2, 0x09, 0x84, 0xe1, 0xa1, 0x09,
- 0x84, 0xd3, 0x01, 0xfe, 0x03, 0xa0, 0x09, 0x84, 0xc9, 0x9f, 0x09, 0x84,
- 0xc1, 0x9e, 0x09, 0x84, 0xb3, 0x01, 0xfe, 0x07, 0x9d, 0x09, 0x84, 0xa8,
- 0xa6, 0x09, 0x84, 0xa1, 0xa5, 0x09, 0x84, 0x99, 0xa4, 0x09, 0x84, 0x8b,
- 0x01, 0xfe, 0x0b, 0xa3, 0x09, 0x84, 0x81, 0xa2, 0x09, 0x84, 0x79, 0xa1,
- 0x09, 0x84, 0x71, 0xa0, 0x09, 0x84, 0x69, 0x9f, 0x09, 0x84, 0x61, 0x9e,
- 0x09, 0x84, 0x59, 0x9d, 0x09, 0x84, 0x50, 0xa6, 0x09, 0x84, 0x49, 0xa5,
- 0x09, 0x84, 0x41, 0xa4, 0x09, 0x84, 0x39, 0xa3, 0x09, 0x84, 0x31, 0xa2,
- 0x09, 0x84, 0x29, 0xa1, 0x09, 0x84, 0x21, 0xa0, 0x09, 0x84, 0x19, 0x9f,
- 0x09, 0x84, 0x11, 0x9e, 0x09, 0x84, 0x09, 0x9d, 0x09, 0x84, 0x00, 0xa6,
- 0x09, 0x83, 0xf9, 0xa5, 0x09, 0x83, 0xeb, 0x01, 0xfe, 0x0f, 0xa4, 0x09,
- 0x83, 0xe1, 0xa3, 0x09, 0x83, 0xd9, 0xa2, 0x09, 0x83, 0xd1, 0xa1, 0x09,
- 0x83, 0xc9, 0xa0, 0x09, 0x83, 0xc1, 0x9f, 0x09, 0x83, 0xb9, 0x9e, 0x09,
- 0x83, 0xb0, 0xa1, 0x09, 0x83, 0xa9, 0xa0, 0x09, 0x83, 0xa1, 0x9f, 0x09,
- 0x83, 0x99, 0x9e, 0x09, 0x83, 0x91, 0x9d, 0x09, 0x83, 0x88, 0xa6, 0x09,
- 0x83, 0x81, 0xa5, 0x09, 0x83, 0x79, 0xa4, 0x09, 0x83, 0x71, 0xa3, 0x09,
- 0x83, 0x69, 0xa2, 0x09, 0x83, 0x61, 0xa1, 0x09, 0x83, 0x59, 0xa0, 0x09,
- 0x83, 0x51, 0x9f, 0x09, 0x83, 0x49, 0x9e, 0x09, 0x83, 0x41, 0x9d, 0x09,
- 0x83, 0x32, 0x01, 0xfe, 0x13, 0xa6, 0x09, 0x83, 0x29, 0xa5, 0x09, 0x83,
- 0x21, 0xa4, 0x09, 0x83, 0x19, 0xa3, 0x09, 0x83, 0x11, 0xa2, 0x09, 0x83,
- 0x09, 0xa1, 0x09, 0x83, 0x01, 0xa0, 0x09, 0x82, 0xf9, 0x9f, 0x09, 0x82,
- 0xdb, 0x01, 0xfe, 0x17, 0x9e, 0x09, 0x82, 0xd0, 0xcb, 0x5a, 0x67, 0x0f,
- 0xbd, 0x39, 0x46, 0x02, 0x31, 0xc1, 0xfe, 0x23, 0x15, 0xc1, 0xfe, 0x2f,
- 0xd4, 0x3e, 0x09, 0x0f, 0xbd, 0xa0, 0xc4, 0x18, 0x83, 0x00, 0x37, 0xb9,
- 0xc2, 0x26, 0x51, 0x00, 0x37, 0xb0, 0xc3, 0x0c, 0x5b, 0x00, 0x37, 0xa9,
- 0xc3, 0x06, 0x9e, 0x00, 0x37, 0xa0, 0xc4, 0x04, 0x5e, 0x00, 0x37, 0x99,
- 0xc2, 0x01, 0x47, 0x00, 0x37, 0x90, 0x97, 0x00, 0x98, 0x4b, 0x01, 0xfe,
- 0x3b, 0x47, 0x21, 0x79, 0xc1, 0xfe, 0x41, 0x83, 0x00, 0x98, 0x43, 0x01,
- 0xfe, 0x64, 0x8b, 0x00, 0x98, 0x51, 0x87, 0x00, 0x98, 0x6b, 0x01, 0xfe,
- 0x68, 0x91, 0x00, 0x98, 0x73, 0x01, 0xfe, 0x6c, 0x19, 0xc1, 0xfe, 0x70,
- 0x09, 0xc1, 0xfe, 0x82, 0x1b, 0x41, 0xfe, 0xa0, 0x0a, 0xc1, 0xfe, 0xba,
- 0x83, 0x00, 0x90, 0x03, 0x01, 0xfe, 0xdc, 0x97, 0x00, 0x90, 0x09, 0x8b,
- 0x00, 0x90, 0x11, 0x87, 0x00, 0x90, 0x2b, 0x01, 0xfe, 0xe0, 0x91, 0x00,
- 0x90, 0x32, 0x01, 0xfe, 0xe4, 0x04, 0xc1, 0xfe, 0xe8, 0x83, 0x00, 0x93,
- 0x03, 0x01, 0xff, 0x02, 0x97, 0x00, 0x93, 0x09, 0x8b, 0x00, 0x93, 0x11,
- 0x87, 0x00, 0x93, 0x2b, 0x01, 0xff, 0x06, 0x91, 0x00, 0x93, 0x33, 0x01,
- 0xff, 0x0a, 0x19, 0x41, 0xff, 0x0e, 0x05, 0xc1, 0xff, 0x1d, 0x83, 0x00,
- 0x93, 0xc3, 0x01, 0xff, 0x3b, 0x97, 0x00, 0x93, 0xc9, 0x8b, 0x00, 0x93,
- 0xd1, 0x87, 0x00, 0x93, 0xeb, 0x01, 0xff, 0x3f, 0x91, 0x00, 0x93, 0xf3,
- 0x01, 0xff, 0x43, 0xc2, 0x01, 0x09, 0x00, 0x93, 0xf9, 0x0a, 0x41, 0xff,
- 0x47, 0x1c, 0xc1, 0xff, 0x6a, 0x06, 0xc1, 0xff, 0x83, 0x83, 0x00, 0x97,
- 0x83, 0x01, 0xff, 0xaa, 0x97, 0x00, 0x97, 0x89, 0x8b, 0x00, 0x97, 0x91,
- 0x87, 0x00, 0x97, 0xab, 0x01, 0xff, 0xae, 0x91, 0x00, 0x97, 0xb3, 0x01,
- 0xff, 0xb2, 0xc2, 0x01, 0x09, 0x00, 0x97, 0xb8, 0x42, 0x00, 0x6e, 0xc1,
- 0xff, 0xb6, 0x83, 0x00, 0x93, 0x83, 0x01, 0xff, 0xcf, 0x97, 0x00, 0x93,
- 0x89, 0x8b, 0x00, 0x93, 0x91, 0x87, 0x00, 0x93, 0xab, 0x01, 0xff, 0xd3,
- 0x91, 0x00, 0x93, 0xb3, 0x01, 0xff, 0xd7, 0xc2, 0x01, 0x09, 0x00, 0x93,
- 0xb9, 0x0a, 0xc1, 0xff, 0xdb, 0x15, 0xc1, 0xff, 0xfe, 0x1c, 0x42, 0x00,
- 0x1e, 0x83, 0x00, 0x90, 0x43, 0x02, 0x00, 0x3b, 0x97, 0x00, 0x90, 0x49,
- 0x8b, 0x00, 0x90, 0x51, 0x87, 0x00, 0x90, 0x6b, 0x02, 0x00, 0x3f, 0x91,
- 0x00, 0x90, 0x73, 0x02, 0x00, 0x43, 0xc2, 0x01, 0x09, 0x00, 0x90, 0x78,
- 0x83, 0x00, 0x90, 0xc3, 0x02, 0x00, 0x47, 0x97, 0x00, 0x90, 0xc9, 0x8b,
- 0x00, 0x90, 0xd1, 0x87, 0x00, 0x90, 0xeb, 0x02, 0x00, 0x4b, 0x91, 0x00,
- 0x90, 0xf3, 0x02, 0x00, 0x4f, 0x19, 0xc2, 0x00, 0x53, 0xc2, 0x1d, 0x5f,
- 0x00, 0x9a, 0xc8, 0x1c, 0xc2, 0x00, 0x62, 0x83, 0x00, 0x91, 0x83, 0x02,
- 0x00, 0x82, 0x97, 0x00, 0x91, 0x89, 0x8b, 0x00, 0x91, 0x91, 0x87, 0x00,
- 0x91, 0xab, 0x02, 0x00, 0x86, 0x91, 0x00, 0x91, 0xb3, 0x02, 0x00, 0x90,
- 0xc2, 0x01, 0x09, 0x00, 0x91, 0xb9, 0x0a, 0xc2, 0x00, 0x94, 0x15, 0x42,
- 0x00, 0xb7, 0x83, 0x00, 0x91, 0x43, 0x02, 0x00, 0xd1, 0x97, 0x00, 0x91,
- 0x49, 0x8b, 0x00, 0x91, 0x51, 0x87, 0x00, 0x91, 0x6b, 0x02, 0x00, 0xd5,
- 0x91, 0x00, 0x91, 0x73, 0x02, 0x00, 0xd9, 0xc2, 0x01, 0x09, 0x00, 0x91,
- 0x79, 0xc2, 0x1d, 0x5f, 0x00, 0x9a, 0xc0, 0x83, 0x00, 0x92, 0x03, 0x02,
- 0x00, 0xdd, 0x97, 0x00, 0x92, 0x09, 0x8b, 0x00, 0x92, 0x11, 0x87, 0x00,
- 0x92, 0x2b, 0x02, 0x00, 0xe1, 0x91, 0x00, 0x92, 0x33, 0x02, 0x00, 0xe5,
- 0x19, 0xc2, 0x00, 0xe9, 0x0a, 0xc2, 0x00, 0xfb, 0x1b, 0x42, 0x01, 0x19,
- 0x83, 0x00, 0x93, 0x43, 0x02, 0x01, 0x33, 0x97, 0x00, 0x93, 0x49, 0x8b,
- 0x00, 0x93, 0x51, 0x87, 0x00, 0x93, 0x6b, 0x02, 0x01, 0x37, 0x91, 0x00,
- 0x93, 0x71, 0xc2, 0x01, 0x09, 0x00, 0x93, 0x78, 0x83, 0x00, 0x94, 0x03,
- 0x02, 0x01, 0x3b, 0x97, 0x00, 0x94, 0x09, 0x8b, 0x00, 0x94, 0x11, 0x87,
- 0x00, 0x94, 0x2b, 0x02, 0x01, 0x3f, 0x91, 0x00, 0x94, 0x33, 0x02, 0x01,
- 0x43, 0x19, 0xc2, 0x01, 0x47, 0x1b, 0x42, 0x01, 0x59, 0x83, 0x00, 0x94,
- 0x83, 0x02, 0x01, 0x73, 0x97, 0x00, 0x94, 0x89, 0x8b, 0x00, 0x94, 0x91,
- 0x87, 0x00, 0x94, 0xab, 0x02, 0x01, 0x77, 0x91, 0x00, 0x94, 0xb3, 0x02,
- 0x01, 0x7b, 0xc2, 0x01, 0x09, 0x00, 0x94, 0xb9, 0x1b, 0x42, 0x01, 0x7f,
- 0x83, 0x00, 0x95, 0x43, 0x02, 0x01, 0xa2, 0x97, 0x00, 0x95, 0x49, 0x8b,
- 0x00, 0x95, 0x51, 0x87, 0x00, 0x95, 0x6b, 0x02, 0x01, 0xa6, 0x91, 0x00,
- 0x95, 0x73, 0x02, 0x01, 0xaa, 0x19, 0xc2, 0x01, 0xae, 0x1a, 0xc2, 0x01,
- 0xc0, 0x1b, 0x42, 0x01, 0xde, 0x83, 0x00, 0x96, 0x43, 0x02, 0x01, 0xf8,
- 0x97, 0x00, 0x96, 0x49, 0x8b, 0x00, 0x96, 0x51, 0x87, 0x00, 0x96, 0x6b,
- 0x02, 0x01, 0xfc, 0x91, 0x00, 0x96, 0x72, 0x02, 0x02, 0x00, 0x0a, 0xc2,
- 0x02, 0x04, 0x83, 0x00, 0x9a, 0x83, 0x02, 0x02, 0x27, 0x97, 0x00, 0x9a,
- 0x89, 0x8b, 0x00, 0x9a, 0x91, 0x87, 0x00, 0x9a, 0xab, 0x02, 0x02, 0x2b,
- 0x91, 0x00, 0x9a, 0xb3, 0x02, 0x02, 0x2f, 0x19, 0x42, 0x02, 0x33, 0x83,
- 0x00, 0x96, 0xc3, 0x02, 0x02, 0x42, 0x97, 0x00, 0x96, 0xc9, 0x8b, 0x00,
- 0x96, 0xd1, 0x87, 0x00, 0x96, 0xeb, 0x02, 0x02, 0x46, 0x91, 0x00, 0x96,
- 0xf3, 0x02, 0x02, 0x4a, 0xc2, 0x01, 0x09, 0x00, 0x96, 0xf9, 0x0a, 0xc2,
- 0x02, 0x4e, 0x1c, 0x42, 0x02, 0x6e, 0x83, 0x00, 0x97, 0x43, 0x02, 0x02,
- 0x88, 0x97, 0x00, 0x97, 0x49, 0x8b, 0x00, 0x97, 0x51, 0x87, 0x00, 0x97,
- 0x6b, 0x02, 0x02, 0x8c, 0x91, 0x00, 0x97, 0x72, 0x02, 0x02, 0x90, 0x83,
- 0x00, 0x98, 0x03, 0x02, 0x02, 0x94, 0x97, 0x00, 0x98, 0x09, 0x8b, 0x00,
- 0x98, 0x11, 0x87, 0x00, 0x98, 0x2b, 0x02, 0x02, 0x98, 0x91, 0x00, 0x98,
- 0x33, 0x02, 0x02, 0x9c, 0xc2, 0x01, 0x09, 0x00, 0x98, 0x38, 0x83, 0x00,
- 0x9a, 0x43, 0x02, 0x02, 0xa0, 0x97, 0x00, 0x9a, 0x49, 0x8b, 0x00, 0x9a,
- 0x51, 0x87, 0x00, 0x9a, 0x6b, 0x02, 0x02, 0xa4, 0x91, 0x00, 0x9a, 0x71,
- 0x19, 0xc2, 0x02, 0xa8, 0xc2, 0x1d, 0x5f, 0x00, 0x9a, 0xd0, 0x4b, 0x69,
- 0x11, 0xc2, 0x02, 0xb7, 0xd1, 0x34, 0xbb, 0x00, 0x9a, 0xf0, 0xc9, 0x4f,
- 0xff, 0x00, 0x9b, 0xe0, 0xc6, 0xce, 0x65, 0x00, 0x9c, 0xc0, 0x48, 0x74,
- 0x05, 0xc2, 0x02, 0xc3, 0x45, 0x00, 0x6c, 0x42, 0x02, 0xcf, 0xc5, 0x01,
- 0x0f, 0x01, 0x18, 0x09, 0xc5, 0x76, 0xf9, 0x0f, 0xa9, 0x31, 0xc4, 0xe4,
- 0xff, 0x0f, 0xa8, 0x61, 0xca, 0xa5, 0x40, 0x0f, 0xa5, 0x08, 0xc2, 0x26,
- 0xfa, 0x08, 0x7f, 0xa9, 0xc3, 0x1a, 0xba, 0x08, 0x7f, 0x40, 0xc3, 0x0d,
- 0xd9, 0x08, 0x7f, 0xa1, 0x03, 0x42, 0x02, 0xf3, 0xc2, 0x00, 0x6e, 0x08,
- 0x7f, 0x38, 0xc4, 0x37, 0x5c, 0x08, 0x7f, 0x01, 0xc3, 0x15, 0x1d, 0x08,
- 0x7f, 0x78, 0x87, 0x08, 0x29, 0x29, 0xc4, 0x32, 0x6d, 0x08, 0x29, 0x30,
- 0xd6, 0x2b, 0xe9, 0x01, 0x39, 0xb9, 0xcd, 0x0e, 0x9f, 0x01, 0x39, 0xa9,
- 0xca, 0x21, 0x3e, 0x01, 0x39, 0xa0, 0xc2, 0x00, 0xf3, 0x01, 0x10, 0x71,
- 0xcb, 0x71, 0x38, 0x00, 0x04, 0xb8, 0xcb, 0x8e, 0x64, 0x00, 0x00, 0x23,
- 0x02, 0x02, 0xff, 0xc3, 0x0a, 0xdf, 0x00, 0x00, 0x18, 0x43, 0x06, 0xd2,
- 0xc2, 0x03, 0x05, 0xcd, 0x77, 0x4a, 0x01, 0x12, 0xe8, 0x00, 0x42, 0x03,
- 0x1d, 0xc4, 0x18, 0x83, 0x08, 0xed, 0x39, 0xc2, 0x26, 0x51, 0x08, 0xed,
- 0x30, 0xc3, 0x0c, 0x5b, 0x08, 0xed, 0x29, 0xc3, 0x06, 0x9e, 0x08, 0xed,
- 0x20, 0xc4, 0x04, 0x5e, 0x08, 0xed, 0x19, 0xc2, 0x01, 0x47, 0x08, 0xed,
- 0x10, 0x03, 0xc2, 0x03, 0x27, 0xc2, 0x02, 0x84, 0x08, 0xec, 0x99, 0xc2,
- 0x00, 0x4c, 0x08, 0xec, 0x81, 0x97, 0x08, 0xec, 0x6b, 0x02, 0x03, 0x33,
- 0x8b, 0x08, 0xec, 0x5a, 0x02, 0x03, 0x37, 0xc2, 0x00, 0xa4, 0x08, 0xec,
- 0x31, 0x83, 0x08, 0xec, 0x28, 0xc2, 0x01, 0x29, 0x08, 0xec, 0x21, 0x83,
- 0x08, 0xeb, 0xd0, 0x06, 0xc2, 0x03, 0x3b, 0xc2, 0x00, 0xa4, 0x08, 0xeb,
- 0xc9, 0x83, 0x08, 0xeb, 0xc0, 0xc2, 0x00, 0xa4, 0x08, 0xec, 0x09, 0x83,
- 0x08, 0xec, 0x00, 0xc2, 0x00, 0xc7, 0x08, 0xeb, 0xf9, 0x83, 0x08, 0xeb,
- 0xa8, 0x16, 0xc2, 0x03, 0x45, 0xc2, 0x00, 0xa4, 0x08, 0xeb, 0xa1, 0x83,
- 0x08, 0xeb, 0x98, 0xc2, 0x00, 0xa4, 0x08, 0xeb, 0xe1, 0x83, 0x08, 0xeb,
- 0xd8, 0xc2, 0x00, 0xa4, 0x08, 0xeb, 0xb9, 0x83, 0x08, 0xeb, 0xb0, 0xc2,
- 0x00, 0xa4, 0x08, 0xeb, 0x91, 0x83, 0x08, 0xeb, 0x88, 0xc2, 0x00, 0xa4,
- 0x08, 0xeb, 0x79, 0x83, 0x08, 0xeb, 0x70, 0x97, 0x08, 0xeb, 0x59, 0x8b,
- 0x08, 0xeb, 0x41, 0x83, 0x08, 0xeb, 0x08, 0x97, 0x08, 0xeb, 0x28, 0x8b,
- 0x08, 0xeb, 0x18, 0xc5, 0x44, 0x7b, 0x00, 0x50, 0x19, 0xc4, 0x0f, 0x7c,
- 0x00, 0x52, 0x68, 0x83, 0x00, 0x50, 0x31, 0x8b, 0x00, 0x50, 0x81, 0x97,
- 0x00, 0x50, 0xa0, 0x8b, 0x00, 0x50, 0x40, 0x97, 0x00, 0x50, 0x50, 0x83,
- 0x00, 0x50, 0xa9, 0x0a, 0x42, 0x03, 0x4f, 0x83, 0x00, 0x50, 0xb9, 0x0a,
- 0x42, 0x03, 0x59, 0xc2, 0x01, 0x29, 0x00, 0x50, 0xc9, 0xc2, 0x1d, 0x5f,
- 0x00, 0x50, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x51, 0x19, 0x83, 0x00, 0x51,
- 0x40, 0x83, 0x00, 0x50, 0xd1, 0xc2, 0x00, 0xa4, 0x00, 0x50, 0xd8, 0x83,
- 0x00, 0x50, 0xe1, 0xc2, 0x00, 0xa4, 0x00, 0x50, 0xe8, 0x16, 0xc2, 0x03,
- 0x63, 0x83, 0x00, 0x51, 0x21, 0xc2, 0x00, 0xa4, 0x00, 0x51, 0x28, 0x06,
- 0xc2, 0x03, 0x6d, 0x83, 0x00, 0x51, 0x31, 0xc2, 0x00, 0xa4, 0x00, 0x51,
- 0x38, 0x83, 0x00, 0x51, 0x51, 0xc2, 0x00, 0xa4, 0x00, 0x51, 0x58, 0x83,
- 0x00, 0x51, 0x61, 0xc2, 0x00, 0xa4, 0x00, 0x51, 0x68, 0x83, 0x00, 0x51,
- 0x81, 0xc2, 0x02, 0x59, 0x00, 0x52, 0xe0, 0x83, 0x00, 0x51, 0x91, 0xc2,
- 0x00, 0xc7, 0x00, 0x51, 0x98, 0xc2, 0x00, 0xa4, 0x00, 0x51, 0xb1, 0x83,
- 0x00, 0x51, 0xc0, 0x83, 0x00, 0x51, 0xf1, 0x8b, 0x00, 0x52, 0x41, 0x97,
- 0x00, 0x52, 0x60, 0x8b, 0x00, 0x52, 0x00, 0x97, 0x00, 0x52, 0x10, 0xc2,
- 0x01, 0x47, 0x00, 0x53, 0x41, 0xc4, 0x04, 0x5e, 0x00, 0x53, 0x48, 0xc3,
- 0x06, 0x9e, 0x00, 0x53, 0x51, 0xc3, 0x0c, 0x5b, 0x00, 0x53, 0x58, 0xc2,
- 0x26, 0x51, 0x00, 0x53, 0x61, 0xc4, 0x18, 0x83, 0x00, 0x53, 0x68, 0xca,
- 0x1e, 0x18, 0x00, 0x54, 0x09, 0xd1, 0x34, 0xfa, 0x00, 0x57, 0xf0, 0xc7,
- 0x11, 0x41, 0x00, 0x54, 0x11, 0xc7, 0x76, 0x59, 0x00, 0x55, 0xe8, 0xc5,
- 0x44, 0x7b, 0x00, 0x54, 0x19, 0xc4, 0x0f, 0x7c, 0x00, 0x56, 0x68, 0xc4,
- 0xda, 0x94, 0x00, 0x57, 0xd1, 0xc5, 0xd5, 0xa7, 0x00, 0x57, 0xd8, 0xd4,
- 0x3c, 0xc9, 0x00, 0x57, 0xe9, 0xd5, 0x34, 0xf6, 0x00, 0x57, 0xf8, 0x83,
- 0x00, 0x54, 0x31, 0x8b, 0x00, 0x54, 0x81, 0x97, 0x00, 0x54, 0xa0, 0x8b,
- 0x00, 0x54, 0x40, 0x97, 0x00, 0x54, 0x50, 0x47, 0xac, 0xc2, 0xc2, 0x03,
- 0x77, 0x83, 0x00, 0x55, 0xa8, 0x83, 0x00, 0x54, 0xa9, 0xc2, 0x00, 0xa4,
- 0x00, 0x54, 0xb0, 0x83, 0x00, 0x54, 0xb9, 0xc2, 0x00, 0xa4, 0x00, 0x54,
- 0xc0, 0xc2, 0x01, 0x29, 0x00, 0x54, 0xc9, 0xc2, 0x1d, 0x5f, 0x00, 0x54,
- 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x55, 0x19, 0x83, 0x00, 0x55, 0x40, 0x83,
- 0x00, 0x54, 0xd1, 0xc2, 0x00, 0xa4, 0x00, 0x54, 0xd8, 0x83, 0x00, 0x54,
- 0xe1, 0xc2, 0x00, 0xa4, 0x00, 0x54, 0xe8, 0x16, 0xc2, 0x03, 0x85, 0x83,
- 0x00, 0x55, 0x21, 0xc2, 0x00, 0xa4, 0x00, 0x55, 0x28, 0x06, 0xc2, 0x03,
- 0x8f, 0x83, 0x00, 0x55, 0x31, 0xc2, 0x00, 0xa4, 0x00, 0x55, 0x38, 0x83,
- 0x00, 0x55, 0x51, 0xc2, 0x00, 0xa4, 0x00, 0x55, 0x58, 0x83, 0x00, 0x55,
- 0x61, 0xc2, 0x00, 0xa4, 0x00, 0x55, 0x68, 0x83, 0x00, 0x55, 0x91, 0xc2,
- 0x00, 0xc7, 0x00, 0x55, 0x98, 0xc2, 0x00, 0xa4, 0x00, 0x55, 0xb1, 0xc2,
- 0x0c, 0x65, 0x00, 0x55, 0xb9, 0x83, 0x00, 0x55, 0xc0, 0x87, 0x00, 0x54,
- 0x69, 0x91, 0x00, 0x54, 0x88, 0x03, 0xc2, 0x03, 0x99, 0x8b, 0x00, 0x55,
- 0xfb, 0x02, 0x03, 0xa5, 0x97, 0x00, 0x56, 0x0b, 0x02, 0x03, 0xa9, 0x48,
- 0xac, 0xc1, 0xc2, 0x03, 0xad, 0x47, 0xc9, 0x91, 0xc2, 0x03, 0xbb, 0x87,
- 0x00, 0x56, 0x39, 0x91, 0x00, 0x56, 0x58, 0xc2, 0x01, 0x47, 0x00, 0x57,
- 0x41, 0xc4, 0x04, 0x5e, 0x00, 0x57, 0x48, 0xc3, 0x06, 0x9e, 0x00, 0x57,
- 0x51, 0xc3, 0x0c, 0x5b, 0x00, 0x57, 0x58, 0xc2, 0x26, 0x51, 0x00, 0x57,
- 0x61, 0xc4, 0x18, 0x83, 0x00, 0x57, 0x68, 0xc2, 0x0c, 0x57, 0x08, 0x1a,
- 0x09, 0xc8, 0x0c, 0x4a, 0x08, 0x1a, 0x50, 0x0f, 0xc2, 0x03, 0xc3, 0x42,
- 0x00, 0x34, 0xc2, 0x03, 0xcf, 0x18, 0xc2, 0x03, 0xdb, 0x06, 0xc2, 0x03,
- 0xe7, 0x11, 0xc2, 0x03, 0xfc, 0x48, 0x07, 0x17, 0xc2, 0x04, 0x14, 0x15,
- 0xc2, 0x04, 0x30, 0x12, 0xc2, 0x04, 0x48, 0x0d, 0xc2, 0x04, 0x69, 0x0e,
- 0xc2, 0x04, 0x79, 0xcc, 0x56, 0x61, 0x00, 0x1b, 0xa1, 0x1b, 0xc2, 0x04,
- 0x91, 0xcd, 0x2c, 0x41, 0x00, 0x1b, 0xf1, 0x16, 0xc2, 0x04, 0x9d, 0x03,
- 0xc2, 0x04, 0xb9, 0xcb, 0x96, 0x57, 0x00, 0x1e, 0x81, 0x14, 0xc2, 0x04,
- 0xc9, 0x08, 0xc2, 0x04, 0xd5, 0xcb, 0x98, 0xeb, 0x08, 0x0c, 0x29, 0xcb,
- 0x96, 0x0a, 0x08, 0x0c, 0x41, 0xc9, 0xb3, 0x0c, 0x08, 0x0c, 0x51, 0x4d,
- 0x7d, 0x62, 0x42, 0x04, 0xe1, 0xc4, 0xe4, 0x9f, 0x0f, 0xa6, 0xb9, 0xc5,
- 0x1d, 0x53, 0x0f, 0xa4, 0xd1, 0xc5, 0xdf, 0xc5, 0x0f, 0x9a, 0x79, 0xc5,
- 0xde, 0xfd, 0x0f, 0xca, 0xb8, 0x4a, 0x37, 0x04, 0xc2, 0x04, 0xf3, 0xcf,
- 0x63, 0x71, 0x01, 0x55, 0x28, 0xc3, 0x01, 0x4a, 0x01, 0x16, 0xb9, 0xcd,
- 0x80, 0x54, 0x01, 0x53, 0xd1, 0xd3, 0x46, 0xa7, 0x01, 0x53, 0xe0, 0x42,
- 0x00, 0x29, 0xc2, 0x04, 0xff, 0x43, 0x00, 0x7b, 0x42, 0x05, 0x1a, 0x45,
- 0x00, 0x96, 0xc2, 0x05, 0x26, 0x43, 0x00, 0x58, 0x42, 0x05, 0x38, 0xd4,
- 0x03, 0x13, 0x01, 0x55, 0x48, 0x48, 0xac, 0xc1, 0xc2, 0x05, 0x44, 0x03,
- 0xc2, 0x05, 0x52, 0xc2, 0x02, 0x84, 0x08, 0x9a, 0x59, 0xc2, 0x00, 0x4c,
- 0x08, 0x9a, 0x39, 0x97, 0x08, 0x9a, 0x0b, 0x02, 0x05, 0x5e, 0x8b, 0x08,
- 0x99, 0xfa, 0x02, 0x05, 0x62, 0x18, 0xc2, 0x05, 0x66, 0xc2, 0x00, 0xa4,
- 0x08, 0x99, 0xc9, 0x15, 0xc2, 0x05, 0x76, 0x0e, 0xc2, 0x05, 0x86, 0xc2,
- 0x02, 0x59, 0x08, 0x99, 0x81, 0xc2, 0x1d, 0x5f, 0x08, 0x99, 0x79, 0xc2,
- 0x00, 0xad, 0x08, 0x99, 0x71, 0x04, 0xc2, 0x05, 0x90, 0x12, 0xc2, 0x05,
- 0x9a, 0x06, 0xc2, 0x05, 0xa4, 0x16, 0xc2, 0x05, 0xb2, 0x10, 0xc2, 0x05,
- 0xc0, 0x0c, 0xc2, 0x05, 0xd6, 0x05, 0xc2, 0x05, 0xe0, 0x09, 0xc2, 0x05,
- 0xea, 0x0d, 0xc2, 0x05, 0xf4, 0x83, 0x08, 0x98, 0x2b, 0x02, 0x05, 0xfe,
- 0xc2, 0x02, 0x84, 0x08, 0x98, 0x99, 0x97, 0x08, 0x98, 0x4b, 0x02, 0x06,
- 0x0a, 0x8b, 0x08, 0x98, 0x3b, 0x02, 0x06, 0x0e, 0xc2, 0x00, 0x4c, 0x08,
- 0x98, 0x78, 0xc5, 0xd8, 0x3b, 0x08, 0x9a, 0xe9, 0x42, 0x03, 0x32, 0xc2,
- 0x06, 0x12, 0x03, 0xc2, 0x06, 0x1e, 0xc5, 0x35, 0x00, 0x08, 0x99, 0xe1,
- 0x05, 0x42, 0x06, 0x2a, 0x46, 0x00, 0x6b, 0x42, 0x06, 0x36, 0xc5, 0x07,
- 0x62, 0x01, 0x12, 0x89, 0xca, 0x37, 0x0e, 0x01, 0x12, 0x70, 0x42, 0x00,
- 0xc7, 0xc2, 0x06, 0x40, 0x0a, 0xc2, 0x06, 0x4a, 0x03, 0xc2, 0x06, 0x5e,
- 0x16, 0xc2, 0x06, 0x6e, 0x07, 0xc2, 0x06, 0x78, 0xc2, 0x17, 0x51, 0x00,
- 0xe5, 0xb9, 0xc2, 0x00, 0x60, 0x00, 0xe5, 0xb1, 0xc2, 0x00, 0x4b, 0x00,
- 0xe5, 0x99, 0x0c, 0xc2, 0x06, 0x82, 0xc3, 0xe7, 0xb4, 0x00, 0xe5, 0x71,
- 0x05, 0xc2, 0x06, 0x8e, 0x15, 0xc2, 0x06, 0x9e, 0xc3, 0xe6, 0xa9, 0x00,
- 0xe5, 0x39, 0x09, 0xc2, 0x06, 0xaa, 0x0d, 0xc2, 0x06, 0xb6, 0x12, 0xc2,
- 0x06, 0xc2, 0xc2, 0x01, 0xbd, 0x00, 0xe5, 0x19, 0xc3, 0x0f, 0xf4, 0x00,
- 0xe5, 0x01, 0x1c, 0xc2, 0x06, 0xce, 0xc2, 0x00, 0xb7, 0x00, 0xe4, 0xe9,
- 0xc3, 0x28, 0x02, 0x00, 0xe4, 0xe1, 0xc3, 0x14, 0x99, 0x00, 0xe4, 0xd9,
- 0xc2, 0x00, 0x34, 0x00, 0xe4, 0xc1, 0xc3, 0x25, 0xc3, 0x00, 0xe4, 0xa9,
- 0xc3, 0x3f, 0x20, 0x00, 0xe4, 0x99, 0xc3, 0x10, 0xf0, 0x00, 0xe4, 0x88,
- 0x03, 0xc2, 0x06, 0xda, 0xc3, 0x10, 0xf0, 0x00, 0x85, 0x09, 0x09, 0xc2,
- 0x06, 0xe4, 0xc3, 0x3f, 0x20, 0x00, 0x85, 0x19, 0xc2, 0x00, 0x4d, 0x00,
- 0x85, 0x21, 0xc3, 0x25, 0xc3, 0x00, 0x85, 0x29, 0x1c, 0xc2, 0x06, 0xf0,
- 0x42, 0x00, 0xb3, 0xc2, 0x06, 0xfc, 0xc2, 0x00, 0x34, 0x00, 0x85, 0x41,
- 0x0d, 0xc2, 0x07, 0x04, 0xc3, 0x00, 0xe3, 0x00, 0x85, 0x51, 0xc3, 0x14,
- 0x99, 0x00, 0x85, 0x59, 0xc3, 0x28, 0x02, 0x00, 0x85, 0x61, 0xc2, 0x00,
- 0xb7, 0x00, 0x85, 0x69, 0x12, 0xc2, 0x07, 0x10, 0xc3, 0x0f, 0xf4, 0x00,
- 0x85, 0x81, 0x15, 0xc2, 0x07, 0x1c, 0xc2, 0x01, 0xbd, 0x00, 0x85, 0x99,
- 0xc3, 0xe6, 0xa9, 0x00, 0x85, 0xb9, 0x05, 0xc2, 0x07, 0x28, 0x0c, 0xc2,
- 0x07, 0x38, 0xc3, 0xe7, 0xb4, 0x00, 0x85, 0xf1, 0x0a, 0xc2, 0x07, 0x44,
- 0xc2, 0x00, 0x4b, 0x00, 0x86, 0x19, 0xc2, 0x17, 0x51, 0x00, 0x86, 0x38,
- 0x03, 0xc2, 0x07, 0x58, 0xc3, 0x10, 0xf0, 0x00, 0x86, 0x89, 0x09, 0xc2,
- 0x07, 0x68, 0xc3, 0x3f, 0x20, 0x00, 0x86, 0x99, 0x07, 0xc2, 0x07, 0x74,
- 0xc3, 0x25, 0xc3, 0x00, 0x86, 0xa9, 0x1c, 0xc2, 0x07, 0x7e, 0x16, 0xc2,
- 0x07, 0x8a, 0xc2, 0x00, 0x34, 0x00, 0x86, 0xc1, 0x0d, 0xc2, 0x07, 0x94,
- 0x42, 0x00, 0xc7, 0xc2, 0x07, 0xa0, 0xc3, 0x14, 0x99, 0x00, 0x86, 0xd9,
- 0xc3, 0x28, 0x02, 0x00, 0x86, 0xe1, 0xc2, 0x00, 0xb7, 0x00, 0x86, 0xe9,
- 0x12, 0xc2, 0x07, 0xaa, 0xc3, 0x0f, 0xf4, 0x00, 0x87, 0x01, 0x15, 0xc2,
- 0x07, 0xb6, 0xc2, 0x01, 0xbd, 0x00, 0x87, 0x19, 0xc3, 0xe6, 0xa9, 0x00,
- 0x87, 0x39, 0x05, 0xc2, 0x07, 0xc2, 0x0c, 0xc2, 0x07, 0xd2, 0xc3, 0xe7,
- 0xb4, 0x00, 0x87, 0x71, 0x0a, 0xc2, 0x07, 0xde, 0xc2, 0x00, 0x4b, 0x00,
- 0x87, 0x99, 0xc2, 0x00, 0x60, 0x00, 0x87, 0xb1, 0xc2, 0x17, 0x51, 0x00,
- 0x87, 0xb8, 0x03, 0xc2, 0x07, 0xf2, 0xc3, 0x10, 0xf0, 0x01, 0x68, 0x09,
- 0x09, 0xc2, 0x07, 0xfc, 0xc3, 0x3f, 0x20, 0x01, 0x68, 0x19, 0xc2, 0x00,
- 0x4d, 0x01, 0x68, 0x21, 0xc3, 0x25, 0xc3, 0x01, 0x68, 0x29, 0x1c, 0xc2,
- 0x08, 0x08, 0x42, 0x00, 0xb3, 0xc2, 0x08, 0x14, 0xc2, 0x00, 0x34, 0x01,
- 0x68, 0x41, 0x0d, 0xc2, 0x08, 0x1c, 0xc3, 0x00, 0xe3, 0x01, 0x68, 0x51,
- 0xc3, 0x14, 0x99, 0x01, 0x68, 0x59, 0xc3, 0x28, 0x02, 0x01, 0x68, 0x61,
- 0xc2, 0x00, 0xb7, 0x01, 0x68, 0x69, 0x12, 0xc2, 0x08, 0x28, 0xc3, 0x0f,
- 0xf4, 0x01, 0x68, 0x81, 0x15, 0xc2, 0x08, 0x34, 0xc2, 0x01, 0xbd, 0x01,
- 0x68, 0x99, 0xc3, 0xe6, 0xa9, 0x01, 0x68, 0xb9, 0x05, 0xc2, 0x08, 0x40,
- 0x0c, 0xc2, 0x08, 0x50, 0xc3, 0xe7, 0xb4, 0x01, 0x68, 0xf1, 0x0a, 0xc2,
- 0x08, 0x5c, 0xc2, 0x00, 0x4b, 0x01, 0x69, 0x19, 0xc2, 0x17, 0x51, 0x01,
- 0x69, 0x38, 0xc3, 0xe7, 0x0c, 0x01, 0x60, 0x01, 0x04, 0xc2, 0x08, 0x70,
- 0xc4, 0xe0, 0x5b, 0x01, 0x60, 0x11, 0xc7, 0xc7, 0xe6, 0x01, 0x60, 0x19,
- 0x06, 0xc2, 0x08, 0x7c, 0x1b, 0xc2, 0x08, 0x8e, 0x1c, 0xc2, 0x08, 0xa0,
- 0x8b, 0x01, 0x60, 0x5b, 0x02, 0x08, 0xac, 0xc4, 0xe2, 0x8b, 0x01, 0x60,
- 0x69, 0x0e, 0xc2, 0x08, 0xbe, 0xc7, 0x61, 0xe4, 0x01, 0x60, 0x79, 0xc5,
- 0xd9, 0xdf, 0x01, 0x60, 0x81, 0x11, 0xc2, 0x08, 0xca, 0x12, 0xc2, 0x08,
- 0xd6, 0xc5, 0xd7, 0xa0, 0x01, 0x60, 0x99, 0x15, 0xc2, 0x08, 0xe0, 0x16,
- 0xc2, 0x08, 0xf9, 0xc3, 0xca, 0x3a, 0x01, 0x60, 0xb1, 0x08, 0xc2, 0x09,
- 0x0b, 0xc4, 0xe1, 0x07, 0x01, 0x60, 0xc1, 0x05, 0x42, 0x09, 0x17, 0xc3,
- 0xe7, 0x0c, 0x01, 0x61, 0x81, 0x04, 0xc2, 0x09, 0x23, 0xc4, 0xe0, 0x5b,
- 0x01, 0x61, 0x91, 0xc7, 0xc7, 0xe6, 0x01, 0x61, 0x99, 0x06, 0xc2, 0x09,
- 0x2f, 0x1b, 0xc2, 0x09, 0x41, 0x1c, 0xc2, 0x09, 0x53, 0x8b, 0x01, 0x61,
- 0xdb, 0x02, 0x09, 0x5f, 0xc4, 0xe2, 0x8b, 0x01, 0x61, 0xe9, 0x0e, 0xc2,
- 0x09, 0x71, 0xc7, 0x61, 0xe4, 0x01, 0x61, 0xf9, 0xc5, 0xd9, 0xdf, 0x01,
- 0x62, 0x01, 0x11, 0xc2, 0x09, 0x7d, 0x12, 0xc2, 0x09, 0x89, 0xc5, 0xd7,
- 0xa0, 0x01, 0x62, 0x19, 0x15, 0xc2, 0x09, 0x93, 0x16, 0xc2, 0x09, 0xac,
- 0xc3, 0xca, 0x3a, 0x01, 0x62, 0x31, 0x08, 0xc2, 0x09, 0xbe, 0xc4, 0xe1,
- 0x07, 0x01, 0x62, 0x41, 0x05, 0x42, 0x09, 0xca, 0xcb, 0x1e, 0x17, 0x00,
- 0x58, 0x09, 0x03, 0xc2, 0x09, 0xd6, 0x42, 0x03, 0x32, 0xc2, 0x09, 0xe2,
- 0xc5, 0x35, 0x00, 0x00, 0x59, 0xe1, 0xc8, 0x81, 0x29, 0x00, 0x5a, 0xa8,
- 0x83, 0x00, 0x58, 0x2b, 0x02, 0x09, 0xee, 0x8b, 0x00, 0x58, 0x3b, 0x02,
- 0x09, 0xfa, 0x97, 0x00, 0x58, 0x4b, 0x02, 0x09, 0xfe, 0x18, 0xc2, 0x0a,
- 0x02, 0x87, 0x00, 0x58, 0x79, 0x91, 0x00, 0x58, 0x99, 0x0d, 0xc2, 0x0a,
- 0x0c, 0x09, 0xc2, 0x0a, 0x16, 0x10, 0xc2, 0x0a, 0x20, 0x05, 0xc2, 0x0a,
- 0x36, 0x0c, 0xc2, 0x0a, 0x40, 0x16, 0xc2, 0x0a, 0x4a, 0x06, 0xc2, 0x0a,
- 0x58, 0x12, 0xc2, 0x0a, 0x66, 0x04, 0xc2, 0x0a, 0x70, 0xc2, 0x00, 0xad,
- 0x00, 0x59, 0x71, 0x1b, 0xc2, 0x0a, 0x7a, 0x14, 0xc2, 0x0a, 0x84, 0x0e,
- 0xc2, 0x0a, 0x94, 0x15, 0xc2, 0x0a, 0x9e, 0xc2, 0x00, 0xa4, 0x00, 0x59,
- 0xc9, 0xc2, 0x01, 0x09, 0x00, 0x5b, 0x88, 0x03, 0xc2, 0x0a, 0xae, 0x8b,
- 0x00, 0x59, 0xfb, 0x02, 0x0a, 0xba, 0x97, 0x00, 0x5a, 0x0b, 0x02, 0x0a,
- 0xbe, 0x48, 0xac, 0xc1, 0xc2, 0x0a, 0xc2, 0x87, 0x00, 0x5a, 0x39, 0x91,
- 0x00, 0x5a, 0x58, 0xcd, 0x71, 0x44, 0x00, 0x5a, 0xb1, 0xcd, 0x75, 0x88,
- 0x00, 0x5a, 0xb8, 0xc4, 0x15, 0xd3, 0x00, 0x5b, 0x31, 0xc3, 0x01, 0xb4,
- 0x00, 0x5b, 0x39, 0x16, 0xc2, 0x0a, 0xd0, 0x08, 0xc2, 0x0a, 0xdc, 0x15,
- 0xc2, 0x0a, 0xe8, 0xc5, 0x01, 0xdb, 0x00, 0x5b, 0x71, 0xc4, 0x22, 0x71,
- 0x00, 0x5b, 0x78, 0x44, 0x01, 0xb4, 0xc2, 0x0a, 0xf4, 0x46, 0x04, 0x5d,
- 0x42, 0x0b, 0x0c, 0x0a, 0xc2, 0x0b, 0x18, 0x19, 0xc2, 0x0b, 0x2a, 0xc2,
- 0x00, 0x4d, 0x0f, 0x68, 0x52, 0x02, 0x0b, 0x3a, 0x11, 0xc2, 0x0b, 0x40,
- 0x0b, 0x42, 0x0b, 0x52, 0x00, 0x42, 0x0b, 0x64, 0xc2, 0x26, 0x51, 0x0f,
- 0x68, 0x33, 0x02, 0x0b, 0x70, 0xc4, 0x18, 0x83, 0x0f, 0x68, 0x3a, 0x02,
- 0x0b, 0x7d, 0x9b, 0x0f, 0x68, 0x8b, 0x02, 0x0b, 0x8a, 0x00, 0x42, 0x0b,
- 0x90, 0xc2, 0x0c, 0x57, 0x0f, 0x68, 0x93, 0x02, 0x0b, 0x9c, 0x00, 0x42,
- 0x0b, 0xa2, 0xc2, 0x01, 0x47, 0x0f, 0x69, 0x7b, 0x02, 0x0b, 0xae, 0xc4,
- 0x04, 0x5e, 0x0f, 0x69, 0x81, 0xc2, 0x00, 0x4d, 0x0f, 0x69, 0xba, 0x02,
- 0x0b, 0xb4, 0xc3, 0x06, 0x9e, 0x0f, 0x69, 0x8b, 0x02, 0x0b, 0xba, 0xc3,
- 0x0c, 0x5b, 0x0f, 0x69, 0x90, 0xc2, 0x26, 0x51, 0x0f, 0x69, 0x9b, 0x02,
- 0x0b, 0xc0, 0xc4, 0x18, 0x83, 0x0f, 0x69, 0xa0, 0xc6, 0x70, 0xe9, 0x01,
- 0x01, 0x21, 0xd9, 0x10, 0x70, 0x01, 0x71, 0x58, 0x42, 0x08, 0x22, 0xc2,
- 0x0b, 0xc6, 0x47, 0x02, 0x21, 0xc2, 0x0b, 0xd2, 0x42, 0x00, 0x2b, 0xc2,
- 0x0b, 0xea, 0x08, 0xc2, 0x0b, 0xf4, 0xc4, 0x01, 0xe3, 0x0f, 0xa8, 0x99,
- 0x4d, 0x7f, 0x77, 0xc2, 0x0c, 0x00, 0xca, 0x75, 0x0d, 0x0f, 0xa2, 0x80,
- 0xd9, 0x20, 0x1d, 0x01, 0x3d, 0xf1, 0x4f, 0x69, 0x02, 0x42, 0x0c, 0x0c,
- 0xce, 0x1b, 0x63, 0x0b, 0x7f, 0x19, 0xc9, 0xab, 0xc5, 0x0b, 0x7f, 0x10,
- 0x4c, 0x10, 0x7e, 0xc2, 0x0c, 0x18, 0x4a, 0x52, 0xb5, 0xc2, 0x0c, 0x2a,
- 0x47, 0x02, 0x90, 0x42, 0x0c, 0x36, 0x46, 0xcb, 0x05, 0xc2, 0x0c, 0x8c,
- 0x4c, 0x8a, 0x28, 0x42, 0x0c, 0x9c, 0x47, 0x33, 0xef, 0xc2, 0x0c, 0xa8,
- 0x4d, 0x26, 0xea, 0xc2, 0x0c, 0xbd, 0x4f, 0x07, 0x17, 0x42, 0x0c, 0xf8,
- 0x47, 0xc7, 0x99, 0xc2, 0x0d, 0x33, 0x48, 0xbb, 0x4d, 0x42, 0x0d, 0x52,
- 0x47, 0x33, 0xef, 0xc2, 0x0d, 0x6b, 0x47, 0x02, 0x90, 0x42, 0x0d, 0x75,
- 0x15, 0xc2, 0x0d, 0xd7, 0x4b, 0x51, 0x67, 0x42, 0x0d, 0xe3, 0x47, 0x02,
- 0x90, 0xc2, 0x0e, 0x56, 0x48, 0x56, 0x61, 0x42, 0x0e, 0xb3, 0xcd, 0x76,
- 0x46, 0x00, 0xe3, 0xf9, 0xc6, 0x76, 0x4d, 0x00, 0xe3, 0xf0, 0x8a, 0x00,
+ 0x39, 0xc2, 0x0c, 0x25, 0x05, 0x4a, 0x21, 0x09, 0xc1, 0x88, 0x43, 0x83,
+ 0x05, 0x49, 0xa8, 0xc2, 0x02, 0x1d, 0x05, 0x4a, 0x11, 0x83, 0x05, 0x49,
+ 0xc0, 0x15, 0xc1, 0x88, 0x4d, 0x03, 0xc1, 0x88, 0x6a, 0x11, 0xc1, 0x88,
+ 0x72, 0xd6, 0x2d, 0xf5, 0x01, 0x3e, 0x31, 0x42, 0x01, 0x0e, 0xc1, 0x88,
+ 0x84, 0x4a, 0x02, 0x5b, 0xc1, 0x88, 0x90, 0x05, 0xc1, 0x88, 0x9c, 0xcb,
+ 0x1c, 0xe0, 0x00, 0x01, 0x4b, 0x01, 0x88, 0xb1, 0x08, 0xc1, 0x88, 0xb5,
+ 0xe0, 0x07, 0x47, 0x01, 0x16, 0x51, 0x16, 0xc1, 0x88, 0xbf, 0x42, 0x00,
+ 0x68, 0xc1, 0x88, 0xd3, 0x19, 0xc1, 0x88, 0xdf, 0x46, 0x04, 0x91, 0xc1,
+ 0x88, 0xeb, 0xd7, 0x28, 0x0c, 0x01, 0x70, 0x69, 0xd6, 0x31, 0x39, 0x01,
+ 0x70, 0xe8, 0x19, 0xc1, 0x88, 0xf7, 0x16, 0xc1, 0x89, 0x06, 0x15, 0xc1,
+ 0x89, 0x16, 0x0a, 0xc1, 0x89, 0x22, 0xd0, 0x5c, 0xef, 0x0f, 0xc1, 0xf1,
+ 0x44, 0x00, 0x62, 0xc1, 0x89, 0x2c, 0xd1, 0x51, 0xbe, 0x01, 0x0f, 0xf1,
+ 0x06, 0xc1, 0x89, 0x39, 0x12, 0xc1, 0x89, 0x45, 0x14, 0xc1, 0x89, 0x51,
+ 0xcf, 0x62, 0x1c, 0x01, 0x5a, 0x31, 0x04, 0xc1, 0x89, 0x5d, 0x08, 0xc1,
+ 0x89, 0x6f, 0xd7, 0x27, 0x3d, 0x0f, 0xc5, 0x38, 0xca, 0x50, 0xa4, 0x00,
+ 0x7e, 0xb8, 0xc4, 0x00, 0xcd, 0x01, 0x5d, 0x81, 0xc5, 0x00, 0x47, 0x01,
+ 0x5d, 0x88, 0xc4, 0x00, 0xcd, 0x01, 0x5d, 0x91, 0xc5, 0x00, 0x47, 0x01,
+ 0x5d, 0x98, 0xc2, 0x02, 0x6a, 0x01, 0x5d, 0xa1, 0xc4, 0x00, 0x68, 0x01,
+ 0x5d, 0xb0, 0xc2, 0x02, 0x6a, 0x01, 0x5d, 0xa9, 0xc4, 0x00, 0x68, 0x01,
+ 0x5d, 0xb8, 0xc7, 0xc8, 0xda, 0x0f, 0x9d, 0x11, 0xc5, 0xe3, 0x78, 0x0f,
+ 0xb7, 0xe0, 0xc6, 0xd2, 0xe6, 0x0f, 0x93, 0x21, 0xc2, 0x01, 0xc3, 0x0f,
+ 0x93, 0x10, 0x00, 0x41, 0x89, 0x7b, 0x0b, 0xc1, 0x89, 0x8d, 0xc3, 0x08,
+ 0xde, 0x01, 0x0b, 0x18, 0xc2, 0x22, 0x45, 0x01, 0x0b, 0x2b, 0x01, 0x89,
+ 0x9f, 0xc4, 0x15, 0xa7, 0x01, 0x0b, 0x30, 0xc2, 0x01, 0x04, 0x01, 0x0b,
+ 0x4b, 0x01, 0x89, 0xa5, 0x19, 0xc1, 0x89, 0xab, 0xc4, 0x05, 0xde, 0x01,
+ 0x0b, 0x10, 0xc5, 0x66, 0x81, 0x01, 0x0b, 0x51, 0xc4, 0x00, 0x48, 0x01,
+ 0x0b, 0x38, 0x42, 0x03, 0x00, 0xc1, 0x89, 0xb5, 0xcb, 0x97, 0x95, 0x08,
+ 0x0c, 0x91, 0xcd, 0x81, 0x3f, 0x08, 0x0c, 0xc0, 0x46, 0x01, 0xab, 0x41,
+ 0x89, 0xc1, 0xc6, 0x02, 0xc9, 0x0f, 0x8b, 0x61, 0xc6, 0x47, 0x4a, 0x0f,
+ 0x8b, 0x59, 0xc6, 0x5d, 0x38, 0x0f, 0x8b, 0x50, 0xd8, 0x24, 0xa4, 0x01,
+ 0x70, 0x38, 0xc5, 0x01, 0x47, 0x08, 0x73, 0xe9, 0xc7, 0x08, 0x19, 0x08,
+ 0x73, 0xe1, 0xc4, 0x01, 0x1d, 0x08, 0x73, 0xd8, 0xc8, 0x0d, 0x7e, 0x08,
+ 0x73, 0xd1, 0xc2, 0x0d, 0x8b, 0x08, 0x73, 0x88, 0xc8, 0x0d, 0x7e, 0x08,
+ 0x73, 0xc9, 0x9b, 0x08, 0x73, 0x80, 0x44, 0x15, 0xa7, 0xc1, 0x89, 0xcd,
+ 0x42, 0x22, 0x45, 0x41, 0x89, 0xd9, 0x0b, 0xc1, 0x89, 0xe5, 0x11, 0x41,
+ 0x89, 0xf1, 0x0a, 0xc1, 0x89, 0xfd, 0x19, 0xc1, 0x8a, 0x09, 0xc2, 0x01,
+ 0x04, 0x08, 0x73, 0x48, 0xc4, 0x15, 0xa7, 0x08, 0x73, 0x31, 0xc2, 0x22,
+ 0x45, 0x08, 0x73, 0x28, 0xc3, 0x0d, 0x8f, 0x08, 0x73, 0x21, 0xc3, 0x08,
+ 0xde, 0x08, 0x73, 0x18, 0xc4, 0x05, 0xde, 0x08, 0x73, 0x11, 0xc2, 0x0a,
+ 0x20, 0x08, 0x73, 0x08, 0x08, 0xc1, 0x8a, 0x15, 0x91, 0x00, 0xb5, 0x73,
+ 0x01, 0x8a, 0x21, 0x15, 0xc1, 0x8a, 0x3f, 0x8d, 0x00, 0xb7, 0x8b, 0x01,
+ 0x8a, 0x58, 0x9a, 0x00, 0xb7, 0x51, 0x93, 0x00, 0xb7, 0x49, 0x0b, 0xc1,
+ 0x8a, 0x5e, 0x0e, 0xc1, 0x8a, 0x7f, 0x85, 0x00, 0xb6, 0x6b, 0x01, 0x8a,
+ 0x8b, 0x87, 0x00, 0xb6, 0x13, 0x01, 0x8a, 0x9b, 0x86, 0x00, 0xb6, 0x8b,
+ 0x01, 0x8a, 0xb3, 0xcc, 0x85, 0xc8, 0x00, 0xb6, 0xb9, 0xd8, 0x26, 0x84,
+ 0x00, 0xb6, 0x91, 0x16, 0xc1, 0x8a, 0xbf, 0x9c, 0x00, 0xb6, 0x71, 0x03,
+ 0xc1, 0x8a, 0xcb, 0xcf, 0x69, 0x8d, 0x00, 0xb6, 0x41, 0x89, 0x00, 0xb5,
+ 0xab, 0x01, 0x8a, 0xe3, 0xc7, 0xcb, 0x18, 0x00, 0xb6, 0x19, 0xd1, 0x56,
+ 0x86, 0x00, 0xb5, 0xf1, 0x42, 0x01, 0x0e, 0xc1, 0x8a, 0xed, 0x99, 0x00,
+ 0xb5, 0x2b, 0x01, 0x8a, 0xf9, 0xd0, 0x5d, 0xff, 0x00, 0xb5, 0x89, 0x9b,
+ 0x00, 0xb5, 0x23, 0x01, 0x8a, 0xff, 0xc9, 0xad, 0xc0, 0x00, 0xb5, 0x11,
+ 0x98, 0x00, 0xb5, 0x08, 0xa1, 0x70, 0x0c, 0x49, 0xa0, 0x70, 0x0c, 0x41,
+ 0xa6, 0x70, 0x0c, 0x71, 0xa5, 0x70, 0x0c, 0x69, 0xa4, 0x70, 0x0c, 0x61,
+ 0xa3, 0x70, 0x0c, 0x59, 0xa2, 0x70, 0x0c, 0x51, 0x9f, 0x70, 0x0c, 0x39,
+ 0x9e, 0x70, 0x0c, 0x31, 0x9d, 0x70, 0x0c, 0x28, 0xa0, 0x70, 0x0b, 0x01,
+ 0x9f, 0x70, 0x0a, 0xf9, 0x9e, 0x70, 0x0a, 0xf1, 0x9d, 0x70, 0x0a, 0xe9,
+ 0xa6, 0x70, 0x0b, 0x31, 0xa5, 0x70, 0x0b, 0x29, 0xa4, 0x70, 0x0b, 0x21,
+ 0xa3, 0x70, 0x0b, 0x19, 0xa2, 0x70, 0x0b, 0x11, 0xa1, 0x70, 0x0b, 0x08,
+ 0xa6, 0x70, 0x0a, 0xe1, 0xa5, 0x70, 0x0a, 0xd9, 0xa4, 0x70, 0x0a, 0xd1,
+ 0xa3, 0x70, 0x0a, 0xc9, 0xa2, 0x70, 0x0a, 0xc1, 0xa1, 0x70, 0x0a, 0xb9,
+ 0xa0, 0x70, 0x0a, 0xb1, 0x9f, 0x70, 0x0a, 0xa9, 0x9e, 0x70, 0x0a, 0xa1,
+ 0x9d, 0x70, 0x0a, 0x98, 0xa6, 0x70, 0x0d, 0xb1, 0xa5, 0x70, 0x0d, 0xa9,
+ 0xa4, 0x70, 0x0d, 0xa1, 0xa3, 0x70, 0x0d, 0x99, 0xa2, 0x70, 0x0d, 0x91,
+ 0xa1, 0x70, 0x0d, 0x89, 0xa0, 0x70, 0x0d, 0x81, 0x9f, 0x70, 0x0d, 0x79,
+ 0x9e, 0x70, 0x0d, 0x71, 0x9d, 0x70, 0x0d, 0x68, 0xa6, 0x70, 0x0d, 0x61,
+ 0xa5, 0x70, 0x0d, 0x59, 0xa4, 0x70, 0x0d, 0x51, 0xa3, 0x70, 0x0d, 0x49,
+ 0xa2, 0x70, 0x0d, 0x41, 0xa1, 0x70, 0x0d, 0x39, 0xa0, 0x70, 0x0d, 0x31,
+ 0x9f, 0x70, 0x0d, 0x29, 0x9e, 0x70, 0x0d, 0x21, 0x9d, 0x70, 0x0d, 0x18,
+ 0xa6, 0x70, 0x0d, 0x11, 0xa5, 0x70, 0x0d, 0x09, 0xa4, 0x70, 0x0d, 0x01,
+ 0xa3, 0x70, 0x0c, 0xf9, 0xa2, 0x70, 0x0c, 0xf1, 0xa1, 0x70, 0x0c, 0xe9,
+ 0xa0, 0x70, 0x0c, 0xe1, 0x9f, 0x70, 0x0c, 0xd9, 0x9e, 0x70, 0x0c, 0xd1,
+ 0x9d, 0x70, 0x0c, 0xc8, 0xa6, 0x70, 0x0c, 0xc1, 0xa5, 0x70, 0x0c, 0xb9,
+ 0xa4, 0x70, 0x0c, 0xb1, 0xa3, 0x70, 0x0c, 0xa9, 0xa2, 0x70, 0x0c, 0xa1,
+ 0xa1, 0x70, 0x0c, 0x99, 0xa0, 0x70, 0x0c, 0x91, 0x9f, 0x70, 0x0c, 0x89,
+ 0x9e, 0x70, 0x0c, 0x81, 0x9d, 0x70, 0x0c, 0x78, 0xa6, 0x70, 0x0c, 0x21,
+ 0xa5, 0x70, 0x0c, 0x19, 0xa4, 0x70, 0x0c, 0x11, 0xa3, 0x70, 0x0c, 0x09,
+ 0xa2, 0x70, 0x0c, 0x01, 0xa1, 0x70, 0x0b, 0xf9, 0xa0, 0x70, 0x0b, 0xf1,
+ 0x9f, 0x70, 0x0b, 0xe9, 0x9e, 0x70, 0x0b, 0xe1, 0x9d, 0x70, 0x0b, 0xd8,
+ 0xa6, 0x70, 0x0b, 0xd1, 0xa5, 0x70, 0x0b, 0xc9, 0xa4, 0x70, 0x0b, 0xc1,
+ 0xa3, 0x70, 0x0b, 0xb9, 0xa2, 0x70, 0x0b, 0xb1, 0xa1, 0x70, 0x0b, 0xa9,
+ 0xa0, 0x70, 0x0b, 0xa1, 0x9f, 0x70, 0x0b, 0x99, 0x9e, 0x70, 0x0b, 0x91,
+ 0x9d, 0x70, 0x0b, 0x88, 0xa6, 0x70, 0x0b, 0x81, 0xa5, 0x70, 0x0b, 0x79,
+ 0xa4, 0x70, 0x0b, 0x71, 0xa3, 0x70, 0x0b, 0x69, 0xa2, 0x70, 0x0b, 0x61,
+ 0xa1, 0x70, 0x0b, 0x59, 0xa0, 0x70, 0x0b, 0x51, 0x9f, 0x70, 0x0b, 0x49,
+ 0x9e, 0x70, 0x0b, 0x41, 0x9d, 0x70, 0x0b, 0x38, 0xa3, 0x70, 0x0f, 0x79,
+ 0xa2, 0x70, 0x0f, 0x71, 0xa1, 0x70, 0x0f, 0x69, 0xa0, 0x70, 0x0f, 0x61,
+ 0x9f, 0x70, 0x0f, 0x59, 0x9e, 0x70, 0x0f, 0x51, 0x9d, 0x70, 0x0f, 0x48,
+ 0xa6, 0x70, 0x0f, 0x41, 0xa5, 0x70, 0x0f, 0x39, 0xa4, 0x70, 0x0f, 0x31,
+ 0xa3, 0x70, 0x0f, 0x29, 0xa2, 0x70, 0x0f, 0x21, 0xa1, 0x70, 0x0f, 0x19,
+ 0xa0, 0x70, 0x0f, 0x11, 0x9f, 0x70, 0x0f, 0x09, 0x9e, 0x70, 0x0f, 0x01,
+ 0x9d, 0x70, 0x0e, 0xf8, 0xa6, 0x70, 0x0e, 0xf1, 0xa5, 0x70, 0x0e, 0xe9,
+ 0xa4, 0x70, 0x0e, 0xe1, 0xa3, 0x70, 0x0e, 0xd9, 0xa2, 0x70, 0x0e, 0xd1,
+ 0xa1, 0x70, 0x0e, 0xc9, 0xa0, 0x70, 0x0e, 0xc1, 0x9f, 0x70, 0x0e, 0xb9,
+ 0x9e, 0x70, 0x0e, 0xb1, 0x9d, 0x70, 0x0e, 0xa8, 0xa6, 0x70, 0x0e, 0xa1,
+ 0xa5, 0x70, 0x0e, 0x99, 0xa4, 0x70, 0x0e, 0x91, 0xa3, 0x70, 0x0e, 0x89,
+ 0xa2, 0x70, 0x0e, 0x81, 0xa1, 0x70, 0x0e, 0x79, 0xa0, 0x70, 0x0e, 0x71,
+ 0x9f, 0x70, 0x0e, 0x69, 0x9e, 0x70, 0x0e, 0x61, 0x9d, 0x70, 0x0e, 0x58,
+ 0xa6, 0x70, 0x0e, 0x51, 0xa5, 0x70, 0x0e, 0x49, 0xa4, 0x70, 0x0e, 0x41,
+ 0xa3, 0x70, 0x0e, 0x39, 0xa2, 0x70, 0x0e, 0x31, 0xa1, 0x70, 0x0e, 0x29,
+ 0xa0, 0x70, 0x0e, 0x21, 0x9f, 0x70, 0x0e, 0x19, 0x9e, 0x70, 0x0e, 0x11,
+ 0x9d, 0x70, 0x0e, 0x08, 0xa6, 0x70, 0x0e, 0x01, 0xa5, 0x70, 0x0d, 0xf9,
+ 0xa4, 0x70, 0x0d, 0xf1, 0xa3, 0x70, 0x0d, 0xe9, 0xa2, 0x70, 0x0d, 0xe1,
+ 0xa1, 0x70, 0x0d, 0xd9, 0xa0, 0x70, 0x0d, 0xd1, 0x9f, 0x70, 0x0d, 0xc9,
+ 0x9e, 0x70, 0x0d, 0xc1, 0x9d, 0x70, 0x0d, 0xb8, 0x87, 0x05, 0x2f, 0x0b,
+ 0x01, 0x8b, 0x03, 0x0a, 0xc1, 0x8b, 0x0e, 0x19, 0xc1, 0x8b, 0x31, 0x12,
+ 0xc1, 0x8b, 0x54, 0x04, 0xc1, 0x8b, 0x6e, 0x0f, 0xc1, 0x8b, 0x8c, 0x0d,
+ 0xc1, 0x8b, 0xb0, 0x09, 0xc1, 0x8b, 0xd1, 0x08, 0xc1, 0x8b, 0xef, 0x18,
+ 0xc1, 0x8c, 0x09, 0x16, 0xc1, 0x8c, 0x23, 0x06, 0xc1, 0x8c, 0x41, 0x0e,
+ 0xc1, 0x8c, 0x5f, 0x14, 0xc1, 0x8c, 0x79, 0x10, 0xc1, 0x8c, 0x93, 0x15,
+ 0xc1, 0x8c, 0xc0, 0x1c, 0xc1, 0x8c, 0xde, 0x05, 0xc1, 0x8c, 0xfc, 0x0c,
+ 0xc1, 0x8d, 0x16, 0x1b, 0xc1, 0x8d, 0x30, 0x8b, 0x05, 0x29, 0x23, 0x01,
+ 0x8d, 0x4a, 0x83, 0x05, 0x2a, 0x4b, 0x01, 0x8d, 0x4e, 0x91, 0x05, 0x2d,
+ 0xd3, 0x01, 0x8d, 0x52, 0x97, 0x05, 0x2c, 0xaa, 0x01, 0x8d, 0x5d, 0x08,
+ 0xc1, 0x8d, 0x61, 0x0d, 0xc1, 0x8d, 0x6d, 0x16, 0xc1, 0x8d, 0x79, 0xc3,
+ 0xec, 0x81, 0x05, 0x30, 0xb1, 0xc4, 0x11, 0x26, 0x05, 0x30, 0xb9, 0x06,
+ 0xc1, 0x8d, 0x8b, 0xc4, 0x9d, 0xc2, 0x05, 0x30, 0xf8, 0xc2, 0x0a, 0x20,
+ 0x05, 0x31, 0x11, 0xc4, 0x05, 0xde, 0x05, 0x31, 0x18, 0xc3, 0x08, 0xde,
+ 0x05, 0x31, 0x21, 0xc3, 0x0d, 0x8f, 0x05, 0x31, 0x28, 0xc2, 0x22, 0x45,
+ 0x05, 0x31, 0x31, 0xc4, 0x15, 0xa7, 0x05, 0x31, 0x38, 0xc3, 0xec, 0x6c,
+ 0x0f, 0xdb, 0x81, 0xc3, 0xec, 0x6f, 0x0f, 0xdb, 0x89, 0xc3, 0xec, 0x72,
+ 0x0f, 0xdb, 0x91, 0xc3, 0xdd, 0x5b, 0x0f, 0xdb, 0x99, 0xc3, 0xd8, 0x15,
+ 0x0f, 0xdb, 0xa1, 0xc3, 0xec, 0x75, 0x0f, 0xdb, 0xa8, 0xd6, 0x2c, 0x95,
+ 0x01, 0x3e, 0x51, 0xd5, 0x35, 0xc5, 0x01, 0x4e, 0x81, 0xd6, 0x30, 0x47,
+ 0x01, 0x57, 0x11, 0xd5, 0x34, 0x75, 0x01, 0x57, 0x20, 0x00, 0x41, 0x8d,
+ 0x9d, 0x42, 0x00, 0x03, 0xc1, 0x8d, 0xa9, 0xcc, 0x89, 0x64, 0x0f, 0xb5,
+ 0x31, 0xc4, 0x1f, 0x02, 0x01, 0x71, 0x78, 0xc4, 0x00, 0xfa, 0x01, 0x81,
+ 0x8b, 0x01, 0x8d, 0xb8, 0xd6, 0x2f, 0x55, 0x01, 0x81, 0x92, 0x01, 0x8d,
+ 0xbc, 0x46, 0x0e, 0x97, 0xc1, 0x8d, 0xc2, 0xcb, 0x5e, 0x74, 0x0f, 0xbd,
+ 0x31, 0x46, 0x01, 0x31, 0xc1, 0x8d, 0xce, 0xcf, 0x6a, 0x14, 0x0f, 0xb3,
+ 0xe9, 0x15, 0xc1, 0x8d, 0xda, 0xd4, 0x3e, 0xe2, 0x0f, 0xbd, 0x98, 0xcc,
+ 0x06, 0xfb, 0x01, 0x16, 0xc9, 0xc9, 0x09, 0xde, 0x01, 0x16, 0xc0, 0xc7,
+ 0xc5, 0xdf, 0x00, 0xe7, 0xb9, 0xcb, 0x45, 0xc9, 0x00, 0xe7, 0x91, 0x48,
+ 0x10, 0xac, 0x41, 0x8d, 0xec, 0xd3, 0x45, 0xc1, 0x00, 0xe7, 0x99, 0xd3,
+ 0x40, 0xa2, 0x00, 0xe7, 0x81, 0x50, 0x59, 0x7f, 0x41, 0x8e, 0x07, 0xc8,
+ 0x73, 0x9c, 0x00, 0xe7, 0x2b, 0x01, 0x8e, 0x13, 0xc6, 0x73, 0x9e, 0x00,
+ 0xe7, 0x1b, 0x01, 0x8e, 0x19, 0xc7, 0x06, 0xa0, 0x00, 0xe7, 0x10, 0x45,
+ 0x02, 0x93, 0xc1, 0x8e, 0x1f, 0xc7, 0x0d, 0xd9, 0x00, 0xe6, 0xe8, 0xc8,
+ 0xa2, 0xc4, 0x00, 0xe7, 0xc1, 0x43, 0xec, 0xde, 0x41, 0x8e, 0x2b, 0xc5,
+ 0x03, 0x50, 0x00, 0xe7, 0xa1, 0xc5, 0x00, 0x34, 0x00, 0xe6, 0xc0, 0xcf,
+ 0x63, 0x84, 0x00, 0xe6, 0xf9, 0xcd, 0x0b, 0x7a, 0x00, 0xe6, 0xf1, 0xcd,
+ 0x79, 0xd5, 0x00, 0xe6, 0xd8, 0xce, 0x73, 0x96, 0x00, 0xe6, 0xe1, 0xc6,
+ 0xd2, 0xf2, 0x00, 0xe6, 0x80, 0xdb, 0x17, 0xb6, 0x00, 0xe6, 0xbb, 0x01,
+ 0x8e, 0x31, 0xd3, 0x06, 0x94, 0x00, 0xe6, 0xb1, 0xde, 0x0e, 0x31, 0x00,
+ 0xe6, 0xa8, 0xc2, 0x00, 0x34, 0x08, 0x2b, 0x89, 0x87, 0x08, 0x2b, 0x90,
+ 0x87, 0x08, 0x2b, 0x99, 0xc2, 0x00, 0x5b, 0x08, 0x2b, 0xa0, 0x87, 0x08,
+ 0x2b, 0xa9, 0xc2, 0x00, 0x5b, 0x08, 0x2b, 0xb0, 0x8b, 0x08, 0x2b, 0xb8,
+ 0xc2, 0x01, 0x0e, 0x08, 0x2b, 0xe9, 0x83, 0x08, 0x2b, 0xe0, 0xc2, 0x1c,
+ 0x3e, 0x08, 0x2b, 0xf8, 0xc2, 0x00, 0x96, 0x08, 0x2c, 0x19, 0x83, 0x08,
+ 0x2c, 0x10, 0x87, 0x08, 0x2c, 0x29, 0xc2, 0x1c, 0x3e, 0x08, 0x2c, 0x30,
+ 0xc2, 0x00, 0x5b, 0x08, 0x2c, 0x69, 0x87, 0x08, 0x2c, 0x60, 0x87, 0x08,
+ 0x2c, 0x71, 0xc2, 0x00, 0x5b, 0x08, 0x2c, 0x78, 0xc2, 0x00, 0x34, 0x08,
+ 0x2c, 0xc1, 0x87, 0x08, 0x2c, 0xc8, 0x87, 0x08, 0x2c, 0xd1, 0xc2, 0x00,
+ 0x5b, 0x08, 0x2c, 0xd8, 0x87, 0x08, 0x2c, 0xe1, 0xc2, 0x00, 0x5b, 0x08,
+ 0x2c, 0xe8, 0x8b, 0x08, 0x2c, 0xf0, 0x83, 0x08, 0x2d, 0x19, 0xc2, 0x01,
+ 0x0e, 0x08, 0x2d, 0x20, 0xc2, 0x1c, 0x3e, 0x08, 0x2d, 0x30, 0x83, 0x08,
+ 0x2d, 0x49, 0xc2, 0x00, 0x96, 0x08, 0x2d, 0x50, 0x87, 0x08, 0x2d, 0x61,
+ 0xc2, 0x1c, 0x3e, 0x08, 0x2d, 0x68, 0x87, 0x08, 0x2d, 0x99, 0xc2, 0x00,
+ 0x5b, 0x08, 0x2d, 0xa0, 0x87, 0x08, 0x2d, 0xa9, 0xc2, 0x00, 0x5b, 0x08,
+ 0x2d, 0xb0, 0xc7, 0x43, 0xa0, 0x01, 0x0a, 0xe9, 0xc6, 0xd7, 0xd2, 0x01,
+ 0x0a, 0xd0, 0xc7, 0x43, 0xa0, 0x01, 0x0a, 0xe1, 0xc6, 0xa2, 0x94, 0x01,
+ 0x0a, 0xb9, 0xc8, 0x08, 0x19, 0x00, 0x05, 0xf0, 0xc6, 0xa2, 0x94, 0x01,
+ 0x0a, 0xb1, 0xc6, 0x91, 0x0d, 0x01, 0x0a, 0xa0, 0xc4, 0x9d, 0x72, 0x01,
+ 0x0a, 0xc9, 0xc6, 0xd4, 0x06, 0x01, 0x0a, 0x80, 0xc4, 0x01, 0x48, 0x01,
+ 0x0a, 0x99, 0xc4, 0x1c, 0xe3, 0x01, 0x0a, 0x90, 0xca, 0x1e, 0x66, 0x70,
+ 0x03, 0x01, 0xcf, 0x57, 0x10, 0x70, 0x01, 0xf0, 0xc7, 0x7c, 0x8c, 0x70,
+ 0x02, 0xf9, 0x07, 0xc1, 0x8e, 0x37, 0x45, 0x01, 0xf2, 0x41, 0x8e, 0x43,
+ 0xd0, 0x07, 0x97, 0x70, 0x02, 0xf1, 0x11, 0x41, 0x8e, 0x4f, 0x45, 0x01,
+ 0x18, 0xc1, 0x8e, 0x5b, 0xce, 0x6a, 0x15, 0x70, 0x02, 0xe0, 0xcb, 0x2d,
+ 0xa8, 0x70, 0x01, 0xf9, 0xcc, 0x00, 0xf2, 0x70, 0x01, 0x10, 0xca, 0x0e,
+ 0xbd, 0x70, 0x01, 0xe9, 0xcf, 0x0f, 0x63, 0x70, 0x01, 0x08, 0xc8, 0x50,
+ 0x04, 0x70, 0x01, 0xd9, 0xc6, 0x2a, 0x4c, 0x70, 0x01, 0x79, 0xc4, 0x45,
+ 0xaa, 0x70, 0x01, 0x00, 0x45, 0x08, 0xd8, 0xc1, 0x8e, 0x6d, 0xca, 0x9b,
+ 0xa0, 0x70, 0x01, 0x20, 0xc8, 0x65, 0xfb, 0x70, 0x01, 0x59, 0xcb, 0x97,
+ 0x11, 0x70, 0x01, 0x28, 0xc7, 0x0a, 0x60, 0x70, 0x01, 0x51, 0xc9, 0x2d,
+ 0x3c, 0x70, 0x01, 0x39, 0xc8, 0x32, 0x18, 0x70, 0x01, 0x30, 0x97, 0x00,
+ 0xbb, 0x99, 0x8b, 0x00, 0xbb, 0x90, 0xc2, 0x0e, 0xe5, 0x00, 0xbb, 0x81,
+ 0xc2, 0x00, 0x4c, 0x00, 0xbb, 0x79, 0xc2, 0x00, 0x96, 0x00, 0xbb, 0x71,
+ 0xc2, 0x1a, 0x36, 0x00, 0xbb, 0x61, 0xc2, 0x00, 0x3f, 0x00, 0xbb, 0x59,
+ 0xc2, 0x02, 0x1d, 0x00, 0xbb, 0x51, 0xc2, 0x07, 0x44, 0x00, 0xbb, 0x49,
+ 0x10, 0xc1, 0x8e, 0x91, 0xc2, 0x0c, 0x25, 0x00, 0xbb, 0x39, 0xc2, 0x00,
+ 0x44, 0x00, 0xbb, 0x31, 0xc2, 0x07, 0x69, 0x00, 0xbb, 0x21, 0xc2, 0x06,
+ 0x6b, 0x00, 0xbb, 0x19, 0x97, 0x00, 0xbb, 0x11, 0x8b, 0x00, 0xbb, 0x09,
+ 0x83, 0x00, 0xbb, 0x00, 0x83, 0x00, 0xb8, 0x03, 0x01, 0x8e, 0x9b, 0xc2,
+ 0x01, 0x0e, 0x00, 0xb8, 0x89, 0xc2, 0x0e, 0xe5, 0x00, 0xb8, 0x81, 0xc2,
+ 0x00, 0x4c, 0x00, 0xb8, 0x79, 0xc2, 0x00, 0x96, 0x00, 0xb8, 0x71, 0xc2,
+ 0x00, 0x9a, 0x00, 0xb8, 0x69, 0xc2, 0x1a, 0x36, 0x00, 0xb8, 0x61, 0xc2,
+ 0x00, 0x3f, 0x00, 0xb8, 0x59, 0xc2, 0x02, 0x1d, 0x00, 0xb8, 0x51, 0xc2,
+ 0x07, 0x44, 0x00, 0xb8, 0x49, 0x10, 0xc1, 0x8e, 0xa1, 0xc2, 0x0c, 0x25,
+ 0x00, 0xb8, 0x39, 0xc2, 0x00, 0x44, 0x00, 0xb8, 0x31, 0xc2, 0x07, 0x69,
+ 0x00, 0xb8, 0x21, 0xc2, 0x06, 0x6b, 0x00, 0xb8, 0x19, 0x97, 0x00, 0xb8,
+ 0x11, 0x8b, 0x00, 0xb8, 0x08, 0xc8, 0x7d, 0xea, 0x00, 0xb8, 0xa9, 0xc6,
+ 0x21, 0x26, 0x00, 0xb8, 0xa0, 0x97, 0x00, 0xb8, 0x99, 0x8b, 0x00, 0xb8,
+ 0x90, 0x48, 0x9e, 0xd0, 0xc1, 0x8e, 0xab, 0xce, 0x1b, 0x7a, 0x0b, 0x7f,
+ 0x00, 0x46, 0x08, 0xd7, 0xc1, 0x8e, 0xdb, 0x47, 0x01, 0xff, 0x41, 0x8e,
+ 0xff, 0x44, 0x02, 0xcc, 0xc1, 0x8f, 0x6b, 0xd1, 0x51, 0x25, 0x08, 0xff,
+ 0x79, 0xc9, 0xb0, 0xea, 0x08, 0xff, 0x61, 0xcc, 0x88, 0x44, 0x08, 0xff,
+ 0x38, 0xc9, 0xb3, 0x18, 0x08, 0xff, 0x69, 0x4b, 0x8f, 0x8c, 0x41, 0x8f,
+ 0x93, 0xcb, 0x97, 0x27, 0x08, 0xff, 0x59, 0xcd, 0x74, 0xe7, 0x00, 0x5e,
+ 0xb9, 0xcc, 0x8a, 0x9c, 0x00, 0x5f, 0xc0, 0xcb, 0x98, 0x66, 0x08, 0xff,
+ 0x51, 0xca, 0x9c, 0x03, 0x00, 0x5f, 0xb8, 0xc8, 0x47, 0x48, 0x08, 0xff,
+ 0x31, 0x46, 0x02, 0x00, 0x41, 0x8f, 0x9f, 0xd3, 0x46, 0x7f, 0x08, 0xff,
+ 0x29, 0x45, 0x08, 0xd8, 0xc1, 0x90, 0x06, 0xc7, 0xca, 0xa1, 0x00, 0x5f,
+ 0x99, 0xc9, 0xb5, 0xa9, 0x00, 0x5f, 0xb0, 0xd8, 0x23, 0x54, 0x08, 0xfe,
+ 0xa1, 0x46, 0x05, 0xdd, 0xc1, 0x90, 0x2a, 0x44, 0x05, 0x17, 0x41, 0x90,
+ 0x42, 0x03, 0xc1, 0x90, 0x68, 0x8b, 0x00, 0x5d, 0xfb, 0x01, 0x90, 0x74,
+ 0x97, 0x00, 0x5e, 0x0b, 0x01, 0x90, 0x78, 0x87, 0x00, 0x5e, 0x33, 0x01,
+ 0x90, 0x7c, 0x91, 0x00, 0x5e, 0x52, 0x01, 0x90, 0x80, 0xc3, 0x03, 0x01,
+ 0x00, 0x5f, 0x81, 0x44, 0x05, 0x17, 0xc1, 0x90, 0x84, 0xc4, 0x02, 0xcb,
+ 0x00, 0x5f, 0xd0, 0xc4, 0x24, 0x35, 0x08, 0xb6, 0x49, 0xc5, 0x05, 0x1b,
+ 0x08, 0xb6, 0x41, 0x15, 0xc1, 0x90, 0x90, 0x08, 0xc1, 0x90, 0x9c, 0x16,
+ 0xc1, 0x90, 0xa8, 0xc3, 0x05, 0x17, 0x08, 0xb6, 0x09, 0xc4, 0x16, 0x57,
+ 0x08, 0xb6, 0x00, 0x83, 0x08, 0xb4, 0x03, 0x01, 0x90, 0xb4, 0x14, 0xc1,
+ 0x90, 0xc6, 0xc2, 0x01, 0x0e, 0x08, 0xb5, 0x49, 0x15, 0xc1, 0x90, 0xd0,
+ 0xc2, 0x06, 0x8c, 0x08, 0xb5, 0x31, 0xc2, 0x00, 0x96, 0x08, 0xb5, 0x29,
+ 0xc2, 0x1a, 0x36, 0x08, 0xb5, 0x19, 0xc2, 0x00, 0x3f, 0x08, 0xb5, 0x11,
+ 0x04, 0xc1, 0x90, 0xda, 0x12, 0xc1, 0x90, 0xe4, 0x10, 0xc1, 0x90, 0xee,
+ 0x06, 0xc1, 0x91, 0x04, 0x16, 0xc1, 0x91, 0x12, 0x0c, 0xc1, 0x91, 0x20,
+ 0x05, 0xc1, 0x91, 0x2a, 0x09, 0xc1, 0x91, 0x34, 0x0d, 0xc1, 0x91, 0x3e,
+ 0x91, 0x08, 0xb4, 0x41, 0x87, 0x08, 0xb4, 0x31, 0x97, 0x08, 0xb4, 0x23,
+ 0x01, 0x91, 0x48, 0x8b, 0x08, 0xb4, 0x12, 0x01, 0x91, 0x4c, 0xc5, 0x33,
+ 0x1a, 0x08, 0xb5, 0xb9, 0x42, 0x02, 0x52, 0xc1, 0x91, 0x50, 0xc8, 0x10,
+ 0xab, 0x08, 0xb5, 0x58, 0x03, 0xc1, 0x91, 0x5c, 0x91, 0x08, 0xb5, 0xa1,
+ 0x87, 0x08, 0xb5, 0x91, 0x97, 0x08, 0xb5, 0x83, 0x01, 0x91, 0x68, 0x8b,
+ 0x08, 0xb5, 0x72, 0x01, 0x91, 0x6c, 0xc5, 0xdf, 0x54, 0x00, 0xd5, 0x69,
+ 0x0a, 0xc1, 0x91, 0x70, 0x42, 0x0e, 0xe5, 0xc1, 0x91, 0x7c, 0x0d, 0xc1,
+ 0x91, 0x91, 0x44, 0x3a, 0xfc, 0xc1, 0x91, 0xa6, 0x14, 0xc1, 0x91, 0xbb,
+ 0xc6, 0xd8, 0x1a, 0x00, 0xd5, 0x29, 0xc5, 0xe3, 0xdc, 0x00, 0xd5, 0x03,
+ 0x01, 0x91, 0xc7, 0x43, 0x0e, 0x70, 0x41, 0x91, 0xcd, 0xc4, 0x24, 0x35,
+ 0x00, 0xd4, 0xc9, 0xc5, 0x05, 0x1b, 0x00, 0xd4, 0xc1, 0x15, 0xc1, 0x91,
+ 0xd9, 0x08, 0xc1, 0x91, 0xe5, 0x16, 0xc1, 0x91, 0xf1, 0xc3, 0x05, 0x17,
+ 0x00, 0xd4, 0x89, 0xc4, 0x16, 0x57, 0x00, 0xd4, 0x80, 0xc4, 0x24, 0x35,
+ 0x00, 0xd4, 0x49, 0xc5, 0x05, 0x1b, 0x00, 0xd4, 0x41, 0x15, 0xc1, 0x91,
+ 0xfd, 0x08, 0xc1, 0x92, 0x09, 0x16, 0xc1, 0x92, 0x15, 0xc3, 0x05, 0x17,
+ 0x00, 0xd4, 0x09, 0xc4, 0x16, 0x57, 0x00, 0xd4, 0x00, 0xd9, 0x1d, 0xa8,
+ 0x00, 0xd3, 0xf9, 0x4d, 0x2e, 0x39, 0x41, 0x92, 0x21, 0x91, 0x00, 0xd3,
+ 0x5b, 0x01, 0x92, 0x41, 0x16, 0xc1, 0x92, 0x4f, 0x83, 0x00, 0xd3, 0x0b,
+ 0x01, 0x92, 0x5b, 0x87, 0x00, 0xd3, 0x71, 0x97, 0x00, 0xd3, 0x4b, 0x01,
+ 0x92, 0x67, 0x8b, 0x00, 0xd3, 0x2b, 0x01, 0x92, 0x72, 0xc7, 0xc7, 0xbb,
+ 0x00, 0xd3, 0x10, 0xc8, 0xc0, 0xab, 0x00, 0xd2, 0xa1, 0x0e, 0xc1, 0x92,
+ 0x76, 0xc2, 0x02, 0x14, 0x00, 0xd2, 0x91, 0xc2, 0x01, 0xe6, 0x00, 0xd2,
+ 0x89, 0x97, 0x00, 0xd2, 0x7b, 0x01, 0x92, 0x8f, 0x8b, 0x00, 0xd2, 0x6b,
+ 0x01, 0x92, 0x93, 0x83, 0x00, 0xd2, 0x59, 0x45, 0x0b, 0x2b, 0xc1, 0x92,
+ 0x97, 0xc2, 0x00, 0x4c, 0x00, 0xd2, 0x29, 0x14, 0xc1, 0x92, 0xc3, 0xc2,
+ 0x00, 0x3f, 0x00, 0xd1, 0xf1, 0xc2, 0x02, 0x1d, 0x00, 0xd1, 0xb9, 0x10,
+ 0xc1, 0x92, 0xd0, 0xc2, 0x0c, 0x25, 0x00, 0xd1, 0x78, 0x42, 0x00, 0xe5,
+ 0xc1, 0x92, 0xe0, 0x15, 0xc1, 0x92, 0xfe, 0xc2, 0x01, 0x0e, 0x00, 0xca,
+ 0xb9, 0x83, 0x00, 0xca, 0xb0, 0x8b, 0x00, 0xcb, 0x69, 0xc2, 0x0e, 0x78,
+ 0x00, 0xcb, 0x60, 0x8a, 0x00, 0xcb, 0x31, 0x87, 0x00, 0xcb, 0x28, 0x87,
+ 0x00, 0xcb, 0x50, 0x91, 0x00, 0xcb, 0x40, 0x83, 0x00, 0xcb, 0x11, 0xc2,
+ 0x07, 0x69, 0x00, 0xca, 0x90, 0xc2, 0x01, 0x0e, 0x00, 0xcb, 0x01, 0x83,
+ 0x00, 0xca, 0x80, 0xc2, 0x01, 0x0e, 0x00, 0xca, 0xd1, 0x83, 0x00, 0xca,
+ 0xc8, 0x42, 0x05, 0x28, 0xc1, 0x93, 0x08, 0xc6, 0xd3, 0xbe, 0x05, 0x56,
+ 0xf1, 0xc3, 0x7f, 0xed, 0x05, 0x56, 0xe9, 0xc5, 0xe1, 0xb6, 0x05, 0x56,
+ 0xe0, 0xc4, 0x77, 0x2e, 0x05, 0x56, 0x11, 0xc3, 0x19, 0xb2, 0x05, 0x56,
+ 0x09, 0xc5, 0xe1, 0xb6, 0x05, 0x56, 0x01, 0xc2, 0x13, 0x31, 0x05, 0x55,
+ 0xf8, 0x03, 0xc1, 0x93, 0x12, 0x97, 0x05, 0x55, 0xa3, 0x01, 0x93, 0x28,
+ 0x8b, 0x05, 0x55, 0x93, 0x01, 0x93, 0x33, 0x87, 0x05, 0x55, 0xa9, 0x91,
+ 0x05, 0x55, 0xb0, 0xc3, 0x00, 0x55, 0x05, 0x55, 0x81, 0xc3, 0x01, 0x32,
+ 0x05, 0x55, 0xb8, 0x45, 0x0b, 0x2b, 0xc1, 0x93, 0x37, 0x44, 0x00, 0x36,
+ 0x41, 0x93, 0x91, 0xcb, 0x57, 0xbe, 0x01, 0x36, 0x51, 0xc8, 0xbe, 0x83,
+ 0x01, 0x5e, 0x10, 0xc6, 0x2e, 0x3f, 0x01, 0x18, 0xc9, 0x44, 0x03, 0xdf,
+ 0x41, 0x93, 0xeb, 0x46, 0x10, 0xf3, 0xc1, 0x93, 0xf7, 0xc5, 0xd5, 0xa5,
+ 0x01, 0x71, 0xc0, 0xc6, 0xd8, 0xc2, 0x01, 0x0a, 0x71, 0x52, 0x42, 0xa3,
+ 0xc1, 0x94, 0x03, 0x45, 0x01, 0xc3, 0xc1, 0x94, 0x0f, 0xc8, 0x50, 0x04,
+ 0x01, 0x71, 0xa8, 0xc8, 0x32, 0x18, 0x01, 0x0a, 0x59, 0xc4, 0x00, 0x56,
+ 0x01, 0x4d, 0x10, 0xc8, 0xb8, 0x53, 0x01, 0x09, 0x91, 0xc4, 0x00, 0xeb,
+ 0x01, 0x71, 0x90, 0xd0, 0x58, 0xdf, 0x01, 0x3e, 0x01, 0xce, 0x0b, 0xf9,
+ 0x01, 0x02, 0xb0, 0x50, 0x59, 0x4f, 0xc1, 0x94, 0x1b, 0xcf, 0x09, 0x59,
+ 0x01, 0x59, 0x88, 0xd0, 0x29, 0xc8, 0x01, 0x0f, 0xb1, 0x44, 0x3e, 0xfb,
+ 0x41, 0x94, 0x27, 0x4b, 0x00, 0x47, 0xc1, 0x94, 0x3f, 0xdf, 0x0c, 0xc1,
+ 0x01, 0x5c, 0xc0, 0xe0, 0x02, 0x67, 0x01, 0x5c, 0xc8, 0xe0, 0x0b, 0xc7,
+ 0x01, 0x3d, 0x18, 0xe0, 0x00, 0x87, 0x01, 0x5c, 0xd8, 0x15, 0xc1, 0x94,
+ 0x4b, 0xcb, 0x5e, 0x74, 0x0f, 0xbd, 0x08, 0xce, 0x76, 0x1a, 0x01, 0x10,
+ 0x21, 0xc6, 0xd4, 0xa2, 0x01, 0x10, 0x18, 0xc8, 0xba, 0x1b, 0x00, 0x3d,
+ 0x79, 0xc6, 0xd4, 0x84, 0x00, 0x3d, 0x71, 0xc8, 0xbe, 0x93, 0x00, 0x3d,
+ 0x58, 0xc8, 0xc2, 0xd3, 0x00, 0x3d, 0x49, 0xc6, 0xd3, 0xfa, 0x00, 0x3d,
+ 0x61, 0xc8, 0xc0, 0x73, 0x00, 0x3d, 0x68, 0xc8, 0xb9, 0xfb, 0x00, 0x3d,
+ 0x39, 0xc6, 0xd4, 0x6c, 0x00, 0x3d, 0x30, 0xc5, 0xe3, 0xa5, 0x00, 0x3d,
+ 0x29, 0xc5, 0xdb, 0x76, 0x00, 0x3d, 0x21, 0x09, 0xc1, 0x94, 0x57, 0x16,
+ 0xc1, 0x94, 0x69, 0x06, 0xc1, 0x94, 0x82, 0x15, 0xc1, 0x94, 0x8c, 0x0a,
+ 0xc1, 0x94, 0x9c, 0xc9, 0xae, 0x47, 0x00, 0x3c, 0xb9, 0xc8, 0xbe, 0x4b,
+ 0x00, 0x3c, 0xb1, 0xc8, 0xbc, 0x53, 0x00, 0x3c, 0xa9, 0xc3, 0x67, 0x9c,
+ 0x00, 0x3c, 0xa1, 0x1c, 0xc1, 0x94, 0xa8, 0x0e, 0xc1, 0x94, 0xb0, 0xc5,
+ 0xdf, 0x6d, 0x00, 0x3c, 0x51, 0xc5, 0xe3, 0xb9, 0x00, 0x3c, 0x49, 0xc5,
+ 0xdb, 0xf8, 0x00, 0x3c, 0x41, 0x03, 0xc1, 0x94, 0xbc, 0x0d, 0xc1, 0x94,
+ 0xc8, 0xc3, 0x4d, 0x16, 0x00, 0x3c, 0x21, 0xc3, 0x0e, 0x2f, 0x00, 0x3c,
+ 0x19, 0x10, 0x41, 0x94, 0xd4, 0x49, 0x39, 0x1b, 0xc1, 0x94, 0xe0, 0xd3,
+ 0x3f, 0xe4, 0x00, 0x71, 0xf8, 0xc4, 0x16, 0x57, 0x00, 0x72, 0x81, 0xc3,
+ 0x05, 0x17, 0x00, 0x72, 0x89, 0x16, 0xc1, 0x95, 0x34, 0x08, 0xc1, 0x95,
+ 0x40, 0x15, 0xc1, 0x95, 0x4c, 0xc5, 0x05, 0x1b, 0x00, 0x72, 0xc1, 0xc4,
+ 0x24, 0x35, 0x00, 0x72, 0xc8, 0xc8, 0x20, 0x08, 0x01, 0x19, 0x01, 0xcc,
+ 0x8d, 0xf0, 0x01, 0x5e, 0x51, 0xcc, 0x86, 0xac, 0x01, 0x71, 0xc9, 0xd0,
+ 0x1f, 0x1f, 0x01, 0x72, 0xc9, 0xd1, 0x1c, 0xda, 0x01, 0x72, 0xd0, 0xc5,
+ 0x12, 0x88, 0x01, 0x18, 0xe9, 0xc3, 0x0a, 0x4a, 0x01, 0x18, 0x70, 0xc5,
+ 0x12, 0x88, 0x01, 0x18, 0xe1, 0xc3, 0x0a, 0x4a, 0x01, 0x18, 0x78, 0xca,
+ 0x3a, 0x54, 0x01, 0x49, 0xe8, 0x83, 0x0f, 0x15, 0x6b, 0x01, 0x95, 0x58,
+ 0x04, 0xc1, 0x95, 0x5c, 0x91, 0x0f, 0x15, 0x51, 0x87, 0x0f, 0x15, 0x33,
+ 0x01, 0x95, 0x66, 0x97, 0x0f, 0x15, 0x29, 0x8b, 0x0f, 0x15, 0x0b, 0x01,
+ 0x95, 0x6a, 0xc2, 0x00, 0x96, 0x0f, 0x15, 0x01, 0xc2, 0x00, 0x9a, 0x0f,
+ 0x14, 0xf9, 0xc2, 0x01, 0x0e, 0x0f, 0x14, 0xf1, 0xc2, 0x26, 0x94, 0x0f,
+ 0x14, 0xe9, 0xc2, 0x00, 0x4c, 0x0f, 0x14, 0xe1, 0xc2, 0x1a, 0x36, 0x0f,
+ 0x14, 0xd9, 0xc3, 0x1c, 0x4f, 0x0f, 0x14, 0xd1, 0xc2, 0x0e, 0xe5, 0x0f,
+ 0x14, 0xc9, 0x10, 0xc1, 0x95, 0x6e, 0xc2, 0x00, 0x3f, 0x0f, 0x14, 0xb1,
+ 0xc2, 0x07, 0x69, 0x0f, 0x14, 0xa9, 0xc2, 0x06, 0x6b, 0x0f, 0x14, 0xa1,
+ 0xc2, 0x0c, 0x25, 0x0f, 0x14, 0x99, 0xc2, 0x00, 0x44, 0x0f, 0x14, 0x91,
+ 0xc2, 0x07, 0x44, 0x0f, 0x14, 0x80, 0xc2, 0xed, 0xbf, 0x0f, 0x92, 0x09,
+ 0xc2, 0x8c, 0x87, 0x0f, 0x92, 0x10, 0xc3, 0xeb, 0xf4, 0x0f, 0x92, 0x41,
+ 0xc3, 0xeb, 0xa6, 0x0f, 0x92, 0x29, 0xc3, 0xeb, 0x3a, 0x0f, 0x92, 0x00,
+ 0xc3, 0xe7, 0x76, 0x0f, 0x92, 0x39, 0xc3, 0xeb, 0x6a, 0x0f, 0x92, 0x18,
+ 0xc3, 0xeb, 0x55, 0x0f, 0x92, 0x31, 0xc3, 0xeb, 0x22, 0x0f, 0x92, 0x20,
+ 0xd9, 0x04, 0xee, 0x01, 0x3c, 0xe9, 0x47, 0x01, 0xaa, 0x41, 0x95, 0x78,
+ 0xc6, 0x19, 0x7a, 0x01, 0x01, 0x19, 0xc5, 0xd8, 0x33, 0x0f, 0xa6, 0x81,
+ 0xcc, 0x82, 0xf8, 0x0f, 0xb5, 0x48, 0xc4, 0x00, 0x97, 0x01, 0x31, 0xa9,
+ 0xc3, 0x01, 0x62, 0x01, 0x31, 0xa0, 0xcf, 0x02, 0x98, 0x01, 0x15, 0x51,
+ 0xc9, 0x2e, 0x02, 0x01, 0x4c, 0x01, 0xcf, 0x2a, 0x53, 0x01, 0x57, 0xa1,
+ 0xd6, 0x2c, 0x95, 0x01, 0x57, 0xa8, 0xc4, 0x16, 0x95, 0x01, 0x01, 0xa1,
+ 0xc3, 0x21, 0x5f, 0x01, 0x4f, 0xd8, 0xd6, 0x2e, 0xd1, 0x01, 0x53, 0x41,
+ 0xd6, 0x30, 0x5d, 0x01, 0x53, 0x48, 0xc9, 0x09, 0xde, 0x01, 0x57, 0xb9,
+ 0xcc, 0x06, 0xfb, 0x01, 0x57, 0xc0, 0xc5, 0xcd, 0xb3, 0x0f, 0x9b, 0xc9,
+ 0xc4, 0x54, 0xb7, 0x0f, 0xa1, 0x00, 0xc7, 0xce, 0x05, 0x0e, 0x9a, 0xb1,
+ 0xc7, 0xbe, 0xe4, 0x0e, 0x98, 0xc0, 0xc4, 0x1e, 0xc2, 0x0e, 0x99, 0x59,
+ 0xc7, 0x03, 0xf9, 0x0e, 0x98, 0x38, 0xc7, 0xcd, 0x87, 0x0e, 0x9a, 0xa9,
+ 0xca, 0x9d, 0x18, 0x0e, 0x99, 0x68, 0xca, 0xaa, 0x4c, 0x0e, 0x9a, 0xa1,
+ 0x0f, 0xc1, 0x95, 0x90, 0xc8, 0xbd, 0x73, 0x0e, 0x98, 0x80, 0xc7, 0xae,
+ 0x76, 0x0e, 0x9a, 0x39, 0xca, 0x9f, 0x2a, 0x0e, 0x99, 0x11, 0xd9, 0x1e,
+ 0xbb, 0x0e, 0x98, 0x78, 0x43, 0x5b, 0x47, 0xc1, 0x95, 0x9c, 0x10, 0x41,
+ 0x95, 0xa8, 0xc3, 0x15, 0x01, 0x0e, 0x9a, 0x79, 0x07, 0x41, 0x95, 0xb2,
+ 0x11, 0xc1, 0x95, 0xbe, 0xc6, 0xd6, 0x4c, 0x0e, 0x99, 0x48, 0xc9, 0xac,
+ 0x46, 0x0e, 0x99, 0x99, 0xc8, 0xbe, 0xfb, 0x0e, 0x99, 0x81, 0xc7, 0xc8,
+ 0x5c, 0x0e, 0x98, 0xf8, 0xc3, 0x01, 0x61, 0x0e, 0x99, 0xf8, 0x15, 0xc1,
+ 0x95, 0xca, 0xc5, 0xdd, 0x42, 0x0e, 0x98, 0xd1, 0xc3, 0x28, 0xde, 0x0e,
+ 0x98, 0xa0, 0xc5, 0x88, 0x02, 0x0e, 0x99, 0xa1, 0xc5, 0x5c, 0x22, 0x0e,
+ 0x99, 0x20, 0xcd, 0x7d, 0xb1, 0x01, 0x38, 0x31, 0x43, 0x07, 0x5b, 0xc1,
+ 0x95, 0xd4, 0xc4, 0x02, 0xcb, 0x01, 0x09, 0x09, 0xcf, 0x66, 0xcc, 0x0f,
+ 0xac, 0x00, 0x05, 0xc1, 0x95, 0xe3, 0x03, 0xc1, 0x95, 0xef, 0x42, 0x02,
+ 0x52, 0xc1, 0x95, 0xfb, 0xc5, 0x33, 0x1a, 0x00, 0x61, 0xe1, 0xc7, 0xc6,
+ 0xf7, 0x00, 0x63, 0xb9, 0xc5, 0xdc, 0x93, 0x00, 0x63, 0xf8, 0x45, 0x02,
+ 0x01, 0xc1, 0x96, 0x07, 0xc9, 0x37, 0x9f, 0x00, 0x62, 0xa8, 0x03, 0xc1,
+ 0x96, 0x70, 0x8b, 0x00, 0x61, 0xfb, 0x01, 0x96, 0x7c, 0x97, 0x00, 0x62,
+ 0x0b, 0x01, 0x96, 0x80, 0x48, 0xb7, 0xd7, 0xc1, 0x96, 0x84, 0x87, 0x00,
+ 0x62, 0x33, 0x01, 0x96, 0x92, 0x91, 0x00, 0x62, 0x52, 0x01, 0x96, 0x96,
+ 0xc4, 0x16, 0x57, 0x00, 0x63, 0x31, 0xc3, 0x05, 0x17, 0x00, 0x63, 0x39,
+ 0x16, 0xc1, 0x96, 0x9a, 0x08, 0xc1, 0x96, 0xa6, 0x15, 0xc1, 0x96, 0xb2,
+ 0xc5, 0x05, 0x1b, 0x00, 0x63, 0x71, 0xc4, 0x24, 0x35, 0x00, 0x63, 0x78,
+ 0xdb, 0x16, 0x57, 0x00, 0x63, 0xc1, 0x48, 0xc1, 0xc3, 0xc1, 0x96, 0xbe,
+ 0x16, 0x41, 0x96, 0xca, 0x00, 0x41, 0x96, 0xd6, 0xca, 0xa2, 0xc2, 0x01,
+ 0x70, 0xd9, 0x44, 0x0b, 0xf8, 0x41, 0x96, 0xe2, 0xc4, 0x24, 0x35, 0x08,
+ 0xa6, 0xc9, 0xc5, 0x05, 0x1b, 0x08, 0xa6, 0xc1, 0x15, 0xc1, 0x96, 0xee,
+ 0x08, 0xc1, 0x96, 0xfa, 0x16, 0xc1, 0x97, 0x06, 0xc3, 0x05, 0x17, 0x08,
+ 0xa6, 0x89, 0xc4, 0x16, 0x57, 0x08, 0xa6, 0x80, 0xd0, 0x56, 0x10, 0x08,
+ 0xa6, 0x31, 0xc3, 0x81, 0xeb, 0x08, 0xa4, 0x00, 0x03, 0xc1, 0x97, 0x12,
+ 0xc5, 0x33, 0x1a, 0x08, 0xa6, 0x19, 0xcb, 0x21, 0x1a, 0x08, 0xa5, 0xf9,
+ 0x42, 0x02, 0x52, 0x41, 0x97, 0x1e, 0x03, 0xc1, 0x97, 0x2a, 0x46, 0x2f,
+ 0xd9, 0xc1, 0x97, 0x36, 0x91, 0x08, 0xa5, 0xe1, 0x87, 0x08, 0xa5, 0xc9,
+ 0x48, 0xb7, 0xd7, 0xc1, 0x97, 0x3e, 0x97, 0x08, 0xa5, 0x9b, 0x01, 0x97,
+ 0x4c, 0x8b, 0x08, 0xa5, 0x8a, 0x01, 0x97, 0x50, 0xc2, 0x01, 0x0e, 0x08,
+ 0xa5, 0x79, 0x15, 0xc1, 0x97, 0x54, 0x18, 0xc1, 0x97, 0x64, 0xc2, 0x00,
+ 0x96, 0x08, 0xa5, 0x51, 0xc2, 0x00, 0x9a, 0x08, 0xa5, 0x49, 0xc2, 0x1a,
+ 0x36, 0x08, 0xa5, 0x41, 0xc2, 0x00, 0x3f, 0x08, 0xa5, 0x39, 0x04, 0xc1,
+ 0x97, 0x6e, 0x12, 0xc1, 0x97, 0x78, 0x10, 0xc1, 0x97, 0x82, 0x06, 0xc1,
+ 0x97, 0x98, 0x16, 0xc1, 0x97, 0xa6, 0x0c, 0xc1, 0x97, 0xb4, 0x05, 0xc1,
+ 0x97, 0xbe, 0x09, 0xc1, 0x97, 0xc8, 0x0d, 0xc1, 0x97, 0xd2, 0x83, 0x08,
+ 0xa4, 0x0b, 0x01, 0x97, 0xdc, 0x91, 0x08, 0xa4, 0x69, 0x87, 0x08, 0xa4,
+ 0x59, 0x97, 0x08, 0xa4, 0x2b, 0x01, 0x97, 0xe8, 0x8b, 0x08, 0xa4, 0x1a,
+ 0x01, 0x97, 0xec, 0xc9, 0xb4, 0xc8, 0x00, 0x78, 0x01, 0x45, 0x11, 0xf2,
+ 0x41, 0x97, 0xf0, 0x14, 0xc1, 0x98, 0x0c, 0x42, 0x1a, 0x36, 0xc1, 0x98,
+ 0x1e, 0x0f, 0xc1, 0x98, 0x2a, 0xce, 0x73, 0xf8, 0x00, 0x7c, 0x11, 0xc8,
+ 0xbe, 0x5b, 0x00, 0x7c, 0x19, 0x42, 0x2e, 0x36, 0xc1, 0x98, 0x36, 0x44,
+ 0xe6, 0x5f, 0xc1, 0x98, 0x42, 0xd1, 0x53, 0x01, 0x00, 0x7c, 0x60, 0x45,
+ 0x02, 0xcb, 0xc1, 0x98, 0x4e, 0x47, 0x01, 0xff, 0x41, 0x98, 0x60, 0x44,
+ 0x02, 0x02, 0xc1, 0x98, 0xc2, 0x4b, 0x91, 0x2e, 0x41, 0x98, 0xce, 0x46,
+ 0x10, 0x7b, 0xc1, 0x98, 0xda, 0xd1, 0x55, 0x43, 0x00, 0x78, 0x58, 0x47,
+ 0x7c, 0x45, 0xc1, 0x98, 0xe6, 0x45, 0x98, 0x46, 0xc1, 0x98, 0xf2, 0xc6,
+ 0xd6, 0x88, 0x00, 0x79, 0xc0, 0xc9, 0xb2, 0x64, 0x00, 0x78, 0x41, 0xc3,
+ 0x01, 0x0e, 0x00, 0x78, 0x68, 0x15, 0xc1, 0x98, 0xfe, 0x49, 0xb1, 0xa7,
+ 0x41, 0x99, 0x08, 0x44, 0x96, 0x41, 0xc1, 0x99, 0x14, 0x4a, 0xa0, 0x4c,
+ 0x41, 0x99, 0x23, 0x15, 0xc1, 0x99, 0x2f, 0xd3, 0x47, 0x04, 0x00, 0x7e,
+ 0xd0, 0xd3, 0x43, 0x28, 0x00, 0x78, 0x89, 0xcd, 0x76, 0xa2, 0x00, 0x78,
+ 0x90, 0xc2, 0x00, 0xe5, 0x00, 0x79, 0xe1, 0xc2, 0x02, 0xe4, 0x00, 0x79,
+ 0xe8, 0xca, 0xa1, 0x50, 0x00, 0x78, 0xa9, 0xca, 0xa7, 0x2c, 0x00, 0x78,
+ 0xb0, 0x0d, 0xc1, 0x99, 0x3b, 0x09, 0xc1, 0x99, 0x51, 0x10, 0xc1, 0x99,
+ 0x5b, 0x05, 0xc1, 0x99, 0x71, 0xc2, 0x26, 0x94, 0x00, 0x7a, 0x39, 0x16,
+ 0xc1, 0x99, 0x7b, 0x06, 0xc1, 0x99, 0x8d, 0x12, 0xc1, 0x99, 0x9f, 0x04,
+ 0xc1, 0x99, 0xa9, 0xc2, 0x00, 0x3f, 0x00, 0x7a, 0xc1, 0xc2, 0x00, 0x4c,
+ 0x00, 0x7a, 0xe9, 0x1c, 0xc1, 0x99, 0xb3, 0xc2, 0x00, 0x02, 0x00, 0x7b,
+ 0x01, 0xc2, 0x1a, 0x36, 0x00, 0x7b, 0x09, 0x14, 0xc1, 0x99, 0xbd, 0xc2,
+ 0x00, 0x96, 0x00, 0x7b, 0x19, 0x15, 0xc1, 0x99, 0xc7, 0xc2, 0x01, 0x0e,
+ 0x00, 0x7b, 0x39, 0x83, 0x00, 0x7b, 0x41, 0xcd, 0x80, 0xbd, 0x00, 0x7b,
+ 0x50, 0xd4, 0x3b, 0xd6, 0x00, 0x78, 0xb9, 0xcb, 0x9b, 0x7e, 0x00, 0x78,
+ 0xc8, 0xc2, 0x0a, 0x20, 0x00, 0x79, 0x11, 0xc4, 0x05, 0xde, 0x00, 0x79,
+ 0x18, 0xc3, 0x08, 0xde, 0x00, 0x79, 0x21, 0xc3, 0x0d, 0x8f, 0x00, 0x79,
+ 0x28, 0xc2, 0x22, 0x45, 0x00, 0x79, 0x31, 0xc4, 0x15, 0xa7, 0x00, 0x79,
+ 0x38, 0xc3, 0x05, 0x17, 0x00, 0x79, 0x51, 0x16, 0xc1, 0x99, 0xd7, 0x08,
+ 0xc1, 0x99, 0xe3, 0x15, 0xc1, 0x99, 0xef, 0xc5, 0x05, 0x1b, 0x00, 0x79,
+ 0x89, 0xc4, 0x24, 0x35, 0x00, 0x79, 0x91, 0xc4, 0x16, 0x57, 0x00, 0x79,
+ 0x98, 0x8b, 0x00, 0x7b, 0x98, 0x97, 0x00, 0x7b, 0xa8, 0x94, 0x00, 0x7b,
+ 0xb3, 0x01, 0x99, 0xfb, 0x8e, 0x00, 0x7b, 0xc2, 0x01, 0x99, 0xff, 0x87,
+ 0x00, 0x7b, 0xd8, 0x91, 0x00, 0x7b, 0xe8, 0x8b, 0x00, 0x7c, 0x08, 0x83,
+ 0x01, 0x69, 0x83, 0x01, 0x9a, 0x03, 0x87, 0x01, 0x6b, 0x33, 0x01, 0x9a,
+ 0x74, 0x8b, 0x01, 0x6a, 0x49, 0x97, 0x01, 0x6a, 0x99, 0x91, 0x01, 0x6b,
+ 0x38, 0x8c, 0x01, 0x69, 0xa9, 0x8a, 0x01, 0x6a, 0x08, 0x48, 0xc1, 0x23,
+ 0xc1, 0x9a, 0x78, 0xcd, 0x81, 0xc1, 0x01, 0x6b, 0x20, 0xcb, 0x92, 0x57,
+ 0x01, 0x6a, 0x59, 0xc8, 0xc3, 0x4b, 0x01, 0x6a, 0xc0, 0xcb, 0x0b, 0xfc,
+ 0x01, 0x02, 0xd1, 0xc6, 0x71, 0xec, 0x01, 0x01, 0x28, 0xc7, 0x02, 0x6a,
+ 0x01, 0x49, 0xa1, 0xc9, 0x00, 0x68, 0x01, 0x49, 0xa9, 0xca, 0x3c, 0x52,
+ 0x0f, 0xc5, 0x88, 0xc9, 0x03, 0x9e, 0x01, 0x49, 0xb1, 0xca, 0x00, 0x47,
+ 0x01, 0x49, 0xb8, 0x45, 0x00, 0x3f, 0xc1, 0x9a, 0x97, 0x17, 0xc1, 0x9a,
+ 0xc1, 0x46, 0x11, 0xf1, 0xc1, 0x9a, 0xd6, 0x44, 0x02, 0xcc, 0xc1, 0x9a,
+ 0xf8, 0xd3, 0x43, 0xad, 0x00, 0x36, 0xf1, 0xc5, 0xde, 0x2d, 0x00, 0x32,
+ 0x8b, 0x01, 0x9b, 0x14, 0xc8, 0x50, 0x04, 0x00, 0x30, 0xd8, 0x48, 0x1b,
+ 0x0d, 0xc1, 0x9b, 0x18, 0x07, 0xc1, 0x9b, 0x76, 0x45, 0x15, 0x2f, 0x41,
+ 0x9b, 0x82, 0x43, 0x00, 0xfc, 0xc1, 0x9b, 0x8e, 0x43, 0x2c, 0xe6, 0xc1,
+ 0x9b, 0x9a, 0x4b, 0x4a, 0xc6, 0x41, 0x9b, 0xa6, 0x03, 0xc1, 0x9c, 0x12,
+ 0x45, 0x02, 0x92, 0xc1, 0x9c, 0x21, 0xd3, 0x44, 0xf0, 0x00, 0x47, 0x11,
+ 0xd0, 0x5b, 0x7f, 0x00, 0x33, 0x58, 0x4f, 0x2f, 0xf1, 0xc1, 0x9c, 0x30,
+ 0x03, 0xc1, 0x9c, 0x3f, 0x43, 0x0c, 0x3c, 0xc1, 0x9c, 0x49, 0xcd, 0x7d,
+ 0x70, 0x00, 0x32, 0xe8, 0x00, 0xc1, 0x9c, 0x4f, 0xc3, 0x06, 0x26, 0x00,
+ 0x32, 0x6a, 0x01, 0x9c, 0x61, 0xc4, 0x06, 0x87, 0x00, 0x32, 0x73, 0x01,
+ 0x9c, 0x67, 0xc8, 0x11, 0x48, 0x00, 0x36, 0xa1, 0xd0, 0x60, 0xef, 0x00,
+ 0x33, 0x69, 0xce, 0x72, 0x8c, 0x00, 0x30, 0x10, 0xc8, 0xbb, 0x23, 0x00,
+ 0x47, 0x91, 0xc8, 0xc2, 0x43, 0x00, 0x47, 0x89, 0xc8, 0x70, 0x78, 0x00,
+ 0x47, 0x80, 0x44, 0x05, 0x17, 0xc1, 0x9c, 0x74, 0xd1, 0x53, 0x67, 0x00,
+ 0x47, 0x19, 0x03, 0xc1, 0x9c, 0x86, 0xd2, 0x4f, 0x8c, 0x00, 0x33, 0x61,
+ 0xda, 0x1b, 0x88, 0x00, 0x30, 0xf0, 0x45, 0x00, 0xd3, 0xc1, 0x9c, 0x95,
+ 0xc4, 0x00, 0xeb, 0x00, 0x30, 0x60, 0xd3, 0x42, 0x6a, 0x00, 0x44, 0xf9,
+ 0x44, 0x08, 0x8b, 0x41, 0x9c, 0xb0, 0xd1, 0x51, 0x47, 0x00, 0x44, 0x89,
+ 0x11, 0xc1, 0x9c, 0xbc, 0xce, 0x75, 0xd4, 0x00, 0x37, 0x49, 0xcb, 0x97,
+ 0x11, 0x00, 0x33, 0x50, 0xcc, 0x45, 0x69, 0x00, 0x44, 0x71, 0x4a, 0x6f,
+ 0x72, 0x41, 0x9c, 0xc8, 0x4c, 0x85, 0x38, 0xc1, 0x9c, 0xda, 0x46, 0x03,
+ 0x70, 0x41, 0x9c, 0xe6, 0xca, 0x46, 0xae, 0x00, 0x30, 0x29, 0xc4, 0x02,
+ 0xcb, 0x00, 0x30, 0x00, 0xc4, 0x24, 0x35, 0x00, 0x33, 0x49, 0xc5, 0x05,
+ 0x1b, 0x00, 0x33, 0x41, 0x15, 0xc1, 0x9c, 0xf2, 0x08, 0xc1, 0x9c, 0xfe,
+ 0x16, 0xc1, 0x9d, 0x0a, 0xc3, 0x05, 0x17, 0x00, 0x33, 0x09, 0xc4, 0x16,
+ 0x57, 0x00, 0x33, 0x00, 0xd1, 0x4f, 0x9e, 0x00, 0x30, 0x51, 0xca, 0xa8,
+ 0x4e, 0x00, 0x30, 0x48, 0x44, 0x47, 0x52, 0xc1, 0x9d, 0x16, 0xc7, 0xcf,
+ 0x01, 0x07, 0xd8, 0xb1, 0xc8, 0xbf, 0x5b, 0x00, 0x2c, 0x38, 0xc2, 0x18,
+ 0x7a, 0x00, 0x2b, 0xab, 0x01, 0x9d, 0x2e, 0xc3, 0xae, 0x23, 0x00, 0x2c,
+ 0x31, 0xc2, 0x28, 0x39, 0x00, 0x2c, 0x29, 0x42, 0x00, 0xdc, 0xc1, 0x9d,
+ 0x3a, 0x12, 0xc1, 0x9d, 0x42, 0x05, 0xc1, 0x9d, 0x4e, 0x14, 0xc1, 0x9d,
+ 0x5a, 0x16, 0xc1, 0x9d, 0x64, 0x18, 0xc1, 0x9d, 0x74, 0x15, 0xc1, 0x9d,
+ 0x7e, 0x0c, 0xc1, 0x9d, 0x8a, 0xc3, 0x27, 0x91, 0x00, 0x2b, 0xb1, 0xc3,
+ 0x01, 0x03, 0x00, 0x2b, 0xa1, 0x09, 0xc1, 0x9d, 0x94, 0xc2, 0x03, 0x76,
+ 0x00, 0x2b, 0x81, 0xc3, 0xec, 0x00, 0x00, 0x2b, 0x69, 0xc4, 0xe6, 0x07,
+ 0x00, 0x2b, 0x61, 0xc3, 0x01, 0xcd, 0x00, 0x2b, 0x59, 0x1c, 0xc1, 0x9d,
+ 0xa0, 0x07, 0xc1, 0x9d, 0xaa, 0xc2, 0x0c, 0x25, 0x00, 0x2b, 0x21, 0xc3,
+ 0x16, 0x02, 0x00, 0x2b, 0x11, 0xc3, 0xaa, 0xd2, 0x00, 0x2b, 0x08, 0xc3,
+ 0xae, 0x23, 0x00, 0x2a, 0xb1, 0xc2, 0x28, 0x39, 0x00, 0x2a, 0xa9, 0x42,
+ 0x00, 0xdc, 0xc1, 0x9d, 0xb8, 0x12, 0xc1, 0x9d, 0xc0, 0xc2, 0x18, 0x7a,
+ 0x00, 0x2a, 0x2b, 0x01, 0x9d, 0xcc, 0x05, 0xc1, 0x9d, 0xd2, 0x14, 0xc1,
+ 0x9d, 0xde, 0x16, 0xc1, 0x9d, 0xe8, 0x18, 0xc1, 0x9d, 0xf2, 0x15, 0xc1,
+ 0x9d, 0xfc, 0x0c, 0xc1, 0x9e, 0x08, 0xc3, 0x27, 0x91, 0x00, 0x2a, 0x31,
+ 0xc3, 0x01, 0x03, 0x00, 0x2a, 0x21, 0x09, 0xc1, 0x9e, 0x12, 0xc2, 0x03,
+ 0x76, 0x00, 0x2a, 0x01, 0xc3, 0xec, 0x00, 0x00, 0x29, 0xe9, 0xc4, 0xe6,
+ 0x07, 0x00, 0x29, 0xe1, 0xc3, 0x01, 0xcd, 0x00, 0x29, 0xd9, 0x1c, 0xc1,
+ 0x9e, 0x1e, 0x07, 0xc1, 0x9e, 0x28, 0xc2, 0x0c, 0x25, 0x00, 0x29, 0xa1,
+ 0xc3, 0xaa, 0xd2, 0x00, 0x29, 0x89, 0xc3, 0x16, 0x02, 0x00, 0x29, 0x90,
+ 0xc4, 0x66, 0xbd, 0x0f, 0x48, 0x01, 0x06, 0xc1, 0x9e, 0x36, 0xc4, 0x79,
+ 0xaa, 0x0f, 0x48, 0x11, 0xc4, 0xe6, 0x03, 0x0f, 0x48, 0x19, 0x04, 0xc1,
+ 0x9e, 0x42, 0x15, 0xc1, 0x9e, 0x4c, 0xc2, 0x03, 0x07, 0x0f, 0x48, 0x31,
+ 0xc2, 0x00, 0x9a, 0x0f, 0x48, 0x41, 0x87, 0x0f, 0x48, 0x49, 0xc2, 0x01,
+ 0xa7, 0x0f, 0x48, 0x51, 0x8b, 0x0f, 0x48, 0x59, 0x91, 0x0f, 0x48, 0x61,
+ 0x1b, 0xc1, 0x9e, 0x58, 0xc3, 0x7c, 0xad, 0x0f, 0x48, 0x79, 0x10, 0xc1,
+ 0x9e, 0x62, 0x0d, 0xc1, 0x9e, 0x74, 0x97, 0x0f, 0x48, 0x99, 0xc4, 0xe5,
+ 0xb3, 0x0f, 0x48, 0xa1, 0xc3, 0x11, 0x3f, 0x0f, 0x48, 0xa9, 0xc2, 0x01,
+ 0x0e, 0x0f, 0x48, 0xb1, 0xc4, 0xdb, 0x76, 0x0f, 0x48, 0xb9, 0x09, 0xc1,
+ 0x9e, 0x86, 0xc2, 0x00, 0x16, 0x0f, 0x48, 0xd1, 0xc2, 0x06, 0x8c, 0x0f,
+ 0x48, 0xe1, 0xc3, 0xb7, 0x74, 0x0f, 0x48, 0xf8, 0xc4, 0x14, 0x91, 0x0f,
+ 0x49, 0x19, 0xc2, 0x01, 0x0e, 0x0f, 0x49, 0x78, 0x83, 0x0f, 0x49, 0x31,
+ 0xc2, 0x00, 0x5b, 0x0f, 0x49, 0x48, 0xc9, 0xb0, 0x3f, 0x0f, 0x49, 0x39,
+ 0xc2, 0x01, 0x0e, 0x0f, 0x4a, 0x18, 0xc2, 0x00, 0x5b, 0x0f, 0x49, 0x81,
+ 0x83, 0x0f, 0x49, 0xa0, 0xc2, 0x0b, 0xfd, 0x0f, 0x49, 0x91, 0xc2, 0x1a,
+ 0x36, 0x0f, 0x49, 0xd9, 0xc2, 0x01, 0x0e, 0x0f, 0x49, 0xe8, 0xc2, 0x0e,
+ 0x14, 0x0f, 0x49, 0x99, 0xc2, 0x01, 0x0e, 0x0f, 0x49, 0xf9, 0xc2, 0x01,
+ 0x59, 0x0f, 0x4a, 0x10, 0x83, 0x0f, 0x49, 0xd1, 0xc2, 0x00, 0x34, 0x0f,
+ 0x4a, 0x00, 0xc2, 0x0a, 0x20, 0x0f, 0x4a, 0x91, 0xc4, 0x05, 0xde, 0x0f,
+ 0x4a, 0x98, 0xc3, 0x08, 0xde, 0x0f, 0x4a, 0xa1, 0xc3, 0x0d, 0x8f, 0x0f,
+ 0x4a, 0xa8, 0xc2, 0x22, 0x45, 0x0f, 0x4a, 0xb1, 0xc4, 0x15, 0xa7, 0x0f,
+ 0x4a, 0xb8, 0xc7, 0xcd, 0x4f, 0x0f, 0xbb, 0x61, 0xc4, 0xe5, 0xd7, 0x0f,
+ 0xbb, 0x58, 0xc3, 0xec, 0x0c, 0x0f, 0xba, 0x19, 0x9a, 0x0f, 0xba, 0x11,
+ 0xc3, 0xec, 0x06, 0x0f, 0xba, 0x20, 0x45, 0xdb, 0xbc, 0xc1, 0x9e, 0x90,
+ 0x48, 0xbf, 0xdb, 0x41, 0x9e, 0xac, 0xc3, 0x0b, 0x61, 0x0f, 0xb9, 0x01,
+ 0xcb, 0x4e, 0x73, 0x0f, 0xb9, 0x28, 0xc2, 0xed, 0xe1, 0x0f, 0xba, 0x61,
+ 0xcb, 0x90, 0xc0, 0x0f, 0xba, 0x71, 0xc6, 0xd2, 0x6e, 0x0f, 0xba, 0x80,
+ 0x44, 0xe4, 0x3b, 0xc1, 0x9e, 0xb8, 0xc4, 0x31, 0x16, 0x0f, 0xbb, 0x00,
+ 0xc4, 0xe4, 0xaf, 0x0f, 0xba, 0x5b, 0x01, 0x9e, 0xc2, 0xc7, 0xca, 0x3f,
+ 0x0f, 0xba, 0xc0, 0xc4, 0xe4, 0x8f, 0x0f, 0xbb, 0x19, 0xca, 0xa0, 0x10,
+ 0x0f, 0xbb, 0x20, 0x94, 0x0f, 0xb9, 0xf9, 0xc3, 0xed, 0x3e, 0x0f, 0xba,
+ 0x00, 0xc4, 0x86, 0x9c, 0x0f, 0xb9, 0x49, 0xc5, 0x89, 0x2f, 0x0f, 0xba,
+ 0x40, 0x44, 0xe6, 0x57, 0xc1, 0x9e, 0xc8, 0x44, 0x27, 0x26, 0xc1, 0x9e,
+ 0xe1, 0xc4, 0x0a, 0xe8, 0x0f, 0xbb, 0x68, 0x96, 0x0f, 0xb8, 0xc1, 0xc3,
+ 0xed, 0x68, 0x0f, 0xb8, 0xc8, 0x44, 0x11, 0xb6, 0xc1, 0x9e, 0xeb, 0x44,
+ 0xe4, 0x63, 0x41, 0x9e, 0xfe, 0x46, 0x62, 0x34, 0xc1, 0x9f, 0x08, 0xc4,
+ 0x48, 0x80, 0x0f, 0xb8, 0x68, 0xc2, 0x03, 0x12, 0x0f, 0xb8, 0xa3, 0x01,
+ 0x9f, 0x14, 0xca, 0x9c, 0xe6, 0x0f, 0xb9, 0xc8, 0xcd, 0x81, 0x66, 0x0f,
+ 0xba, 0x91, 0x52, 0x49, 0xb6, 0x41, 0x9f, 0x1a, 0x00, 0xc1, 0x9f, 0x24,
+ 0xc6, 0xd5, 0x9e, 0x0f, 0xb8, 0x28, 0x43, 0x47, 0x67, 0xc1, 0x9f, 0x30,
+ 0xc2, 0x01, 0x0a, 0x0f, 0xba, 0x29, 0xc5, 0xe1, 0xa7, 0x0f, 0xbb, 0x50,
+ 0xc3, 0x82, 0xec, 0x0f, 0xb8, 0x91, 0xc3, 0x82, 0xa4, 0x0f, 0xb8, 0x89,
+ 0x87, 0x0f, 0xb8, 0x80, 0x87, 0x0f, 0xb8, 0xe1, 0xc3, 0x82, 0xa4, 0x0f,
+ 0xb8, 0xe8, 0xc8, 0xc1, 0x0b, 0x0f, 0xba, 0xb1, 0xc2, 0x00, 0xd3, 0x0f,
+ 0xbb, 0x70, 0xc4, 0xb5, 0xb2, 0x0f, 0xbb, 0x91, 0xc5, 0xd9, 0x82, 0x0f,
+ 0xbb, 0x98, 0x22, 0xc1, 0x9f, 0x3a, 0x21, 0xc1, 0x9f, 0x62, 0x20, 0xc1,
+ 0x9f, 0x93, 0x1f, 0xc1, 0x9f, 0xbe, 0x1e, 0xc1, 0x9f, 0xe9, 0x1d, 0xc1,
+ 0xa0, 0x14, 0x23, 0xc1, 0xa0, 0x38, 0x24, 0xc1, 0xa0, 0x63, 0x25, 0xc1,
+ 0xa0, 0x8b, 0x26, 0x41, 0xa0, 0xb3, 0x1d, 0xc1, 0xa0, 0xe1, 0x1e, 0xc1,
+ 0xa1, 0x1b, 0x1f, 0xc1, 0xa1, 0x49, 0x20, 0xc1, 0xa1, 0x74, 0x21, 0xc1,
+ 0xa1, 0x9f, 0x22, 0xc1, 0xa1, 0xc7, 0x23, 0xc1, 0xa1, 0xef, 0x24, 0xc1,
+ 0xa2, 0x17, 0x25, 0xc1, 0xa2, 0x3f, 0x26, 0x41, 0xa2, 0x67, 0x1d, 0xc1,
+ 0xa2, 0x8f, 0x1e, 0xc1, 0xa2, 0xc0, 0x1f, 0xc1, 0xa2, 0xee, 0x20, 0xc1,
+ 0xa3, 0x19, 0x21, 0xc1, 0xa3, 0x41, 0x22, 0xc1, 0xa3, 0x69, 0x23, 0xc1,
+ 0xa3, 0x91, 0x24, 0xc1, 0xa3, 0xbc, 0x25, 0xc1, 0xa3, 0xe4, 0x26, 0x41,
+ 0xa4, 0x0f, 0x1d, 0xc1, 0xa4, 0x3d, 0x1e, 0xc1, 0xa4, 0x68, 0x1f, 0xc1,
+ 0xa4, 0x90, 0x20, 0xc1, 0xa4, 0xbb, 0x21, 0xc1, 0xa4, 0xe6, 0x22, 0xc1,
+ 0xa5, 0x0e, 0x23, 0xc1, 0xa5, 0x39, 0x24, 0xc1, 0xa5, 0x67, 0x25, 0xc1,
+ 0xa5, 0x92, 0x26, 0x41, 0xa5, 0xc0, 0x1d, 0xc1, 0xa5, 0xea, 0x1e, 0xc1,
+ 0xa6, 0x12, 0x1f, 0xc1, 0xa6, 0x3a, 0x20, 0xc1, 0xa6, 0x62, 0x21, 0xc1,
+ 0xa6, 0x8a, 0x22, 0xc1, 0xa6, 0xb2, 0x23, 0xc1, 0xa6, 0xe0, 0x24, 0xc1,
+ 0xa7, 0x08, 0x25, 0xc1, 0xa7, 0x30, 0x26, 0x41, 0xa7, 0x58, 0x1d, 0xc1,
+ 0xa7, 0x78, 0x1e, 0xc1, 0xa7, 0x9c, 0x1f, 0xc1, 0xa7, 0xc4, 0xc2, 0xeb,
+ 0xbe, 0x0a, 0x32, 0x30, 0xcf, 0x68, 0xca, 0x01, 0x11, 0x99, 0xd2, 0x4b,
+ 0x78, 0x01, 0x4a, 0x00, 0xd3, 0x45, 0x3c, 0x01, 0x0d, 0xb1, 0x4f, 0x00,
+ 0x53, 0x41, 0xa7, 0xec, 0xe0, 0x03, 0xe7, 0x0f, 0xa8, 0x20, 0xc8, 0x50,
+ 0x0d, 0x01, 0x4d, 0x21, 0xc8, 0x4f, 0x56, 0x01, 0x4c, 0xf0, 0xc9, 0x15,
+ 0xe2, 0x01, 0x10, 0xb8, 0xc2, 0x01, 0x0e, 0x08, 0xba, 0x21, 0x83, 0x08,
+ 0xba, 0x18, 0xc2, 0x01, 0x0e, 0x08, 0xba, 0x11, 0x83, 0x08, 0xba, 0x08,
+ 0xc2, 0x02, 0x1d, 0x08, 0xb8, 0xd1, 0xc2, 0x07, 0x69, 0x08, 0xb8, 0xb1,
+ 0xc2, 0x00, 0x44, 0x08, 0xb8, 0x28, 0xc6, 0x00, 0xe1, 0x08, 0xb9, 0xe9,
+ 0xcc, 0x85, 0xe0, 0x08, 0xb9, 0xe0, 0x00, 0x41, 0xa8, 0x0a, 0xc4, 0x0b,
+ 0x19, 0x01, 0x1a, 0xf1, 0xc8, 0x50, 0x0d, 0x01, 0x1a, 0xc0, 0xcb, 0x98,
+ 0x45, 0x01, 0x1b, 0x91, 0x45, 0x9c, 0x24, 0xc1, 0xa8, 0x4e, 0xc8, 0xba,
+ 0x0b, 0x01, 0x1a, 0xe8, 0x00, 0xc1, 0xa8, 0x60, 0xca, 0x6e, 0xae, 0x01,
+ 0x1a, 0xb0, 0x00, 0xc1, 0xa8, 0x72, 0x43, 0x33, 0x1d, 0x41, 0xa8, 0x84,
+ 0xc9, 0xb0, 0x99, 0x01, 0x1b, 0x69, 0xcc, 0x87, 0x78, 0x01, 0x1b, 0x18,
+ 0xc9, 0x1e, 0x89, 0x01, 0x1b, 0x29, 0x42, 0x00, 0x15, 0xc1, 0xa8, 0x90,
+ 0xc8, 0x50, 0x0d, 0x01, 0x1a, 0xe1, 0xc9, 0x02, 0xde, 0x01, 0x1a, 0x49,
+ 0xc3, 0xba, 0x10, 0x01, 0x19, 0xf0, 0x46, 0x03, 0x62, 0xc1, 0xa8, 0x9c,
+ 0xd9, 0x1f, 0xe7, 0x01, 0x12, 0x30, 0x87, 0x08, 0x59, 0xa9, 0xc2, 0x01,
+ 0x03, 0x08, 0x59, 0x48, 0xc3, 0x0a, 0xf1, 0x08, 0x59, 0xa1, 0x0a, 0xc1,
+ 0xa8, 0xab, 0x87, 0x08, 0x59, 0x78, 0x87, 0x08, 0x59, 0x59, 0xc2, 0x04,
+ 0x30, 0x08, 0x59, 0x50, 0xc2, 0x03, 0x5f, 0x08, 0x59, 0x39, 0xc2, 0x04,
+ 0x30, 0x08, 0x59, 0x31, 0x87, 0x08, 0x59, 0x29, 0x09, 0x41, 0xa8, 0xb5,
+ 0xc2, 0x00, 0x5b, 0x08, 0x58, 0xe1, 0x87, 0x08, 0x58, 0xd8, 0xc2, 0x00,
+ 0x5b, 0x08, 0x58, 0xd1, 0x87, 0x08, 0x58, 0xc9, 0xc2, 0x00, 0x98, 0x08,
+ 0x58, 0xe8, 0xc2, 0x00, 0x5b, 0x08, 0x58, 0xb1, 0xc2, 0x02, 0xfb, 0x08,
+ 0x58, 0xa9, 0x87, 0x08, 0x58, 0xa0, 0xc2, 0x00, 0x29, 0x08, 0x58, 0x99,
+ 0x87, 0x08, 0x58, 0x89, 0xc2, 0x04, 0x30, 0x08, 0x58, 0x90, 0x97, 0x08,
+ 0x58, 0x78, 0x8b, 0x08, 0x58, 0x68, 0x91, 0x08, 0x58, 0x58, 0x87, 0x08,
+ 0x58, 0x48, 0x87, 0x08, 0x58, 0x33, 0x01, 0xa8, 0xc5, 0x83, 0x08, 0x58,
+ 0x0b, 0x01, 0xa8, 0xc9, 0x90, 0x08, 0x58, 0x21, 0x91, 0x08, 0x58, 0x10,
+ 0x87, 0x08, 0x59, 0x01, 0xc2, 0x00, 0x5b, 0x08, 0x59, 0x08, 0x87, 0x08,
+ 0x59, 0x81, 0xc2, 0x00, 0x5b, 0x08, 0x59, 0x90, 0x00, 0x41, 0xa8, 0xd1,
+ 0x0a, 0xc1, 0xa8, 0xdd, 0xc2, 0x01, 0x04, 0x08, 0x08, 0x83, 0x01, 0xa8,
+ 0xef, 0x19, 0x41, 0xa8, 0xf5, 0x0b, 0xc1, 0xa9, 0x05, 0x11, 0x41, 0xa9,
+ 0x17, 0xc2, 0x22, 0x45, 0x08, 0x08, 0x63, 0x01, 0xa9, 0x29, 0xc4, 0x15,
+ 0xa7, 0x08, 0x08, 0x6a, 0x01, 0xa9, 0x36, 0x00, 0xc1, 0xa9, 0x43, 0x9b,
+ 0x08, 0x08, 0xba, 0x01, 0xa9, 0x4f, 0x00, 0xc1, 0xa9, 0x55, 0xc2, 0x0d,
+ 0x8b, 0x08, 0x08, 0xc2, 0x01, 0xa9, 0x61, 0xc9, 0xae, 0x98, 0x08, 0x09,
+ 0xb9, 0x08, 0xc1, 0xa9, 0x67, 0xce, 0x6c, 0xa4, 0x08, 0x09, 0xc9, 0xcd,
+ 0x78, 0xaa, 0x08, 0x09, 0xd0, 0xc4, 0x03, 0x5d, 0x08, 0x08, 0x01, 0xc3,
+ 0x05, 0xe3, 0x08, 0x08, 0x08, 0x45, 0x01, 0x18, 0xc1, 0xa9, 0x73, 0x44,
+ 0x00, 0xce, 0x41, 0xa9, 0xb3, 0xc2, 0x02, 0x6a, 0x01, 0x2b, 0xcb, 0x01,
+ 0xa9, 0xcb, 0xc4, 0x00, 0xcd, 0x01, 0x2b, 0xc3, 0x01, 0xa9, 0xd1, 0x42,
+ 0x00, 0x68, 0xc1, 0xa9, 0xd7, 0xc5, 0x00, 0x47, 0x01, 0x2b, 0xd1, 0xc8,
+ 0x00, 0x29, 0x01, 0x28, 0x1b, 0x01, 0xa9, 0xe6, 0x4f, 0x64, 0x74, 0xc1,
+ 0xa9, 0xec, 0x4c, 0x54, 0x00, 0xc1, 0xa9, 0xf8, 0xca, 0x03, 0x76, 0x01,
+ 0x28, 0x08, 0x45, 0x02, 0x93, 0xc1, 0xaa, 0x04, 0x43, 0x0a, 0xe5, 0x41,
+ 0xaa, 0x1f, 0x4b, 0x9c, 0x2e, 0xc1, 0xaa, 0x37, 0x4b, 0x93, 0x54, 0xc1,
+ 0xaa, 0x49, 0x4a, 0x11, 0x8d, 0xc1, 0xaa, 0x5b, 0x4a, 0x60, 0x1f, 0x41,
+ 0xaa, 0x6d, 0x4b, 0x9c, 0x2e, 0xc1, 0xaa, 0x7f, 0x4b, 0x93, 0x54, 0xc1,
+ 0xaa, 0x91, 0x4a, 0x60, 0x1f, 0xc1, 0xaa, 0xa3, 0x4a, 0x11, 0x8d, 0x41,
+ 0xaa, 0xbb, 0x4f, 0x68, 0xac, 0xc1, 0xaa, 0xd3, 0xdc, 0x14, 0xa2, 0x01,
+ 0x2a, 0x31, 0xdc, 0x13, 0xa6, 0x01, 0x2a, 0x21, 0x4f, 0x13, 0xa9, 0x41,
+ 0xaa, 0xe5, 0xd8, 0x26, 0x24, 0x01, 0x1d, 0xb0, 0xc8, 0x20, 0x08, 0x01,
+ 0x19, 0x09, 0xcc, 0x8d, 0xf0, 0x01, 0x5e, 0x59, 0xd0, 0x1f, 0x1f, 0x01,
+ 0x72, 0xd9, 0xd1, 0x1c, 0xda, 0x01, 0x72, 0xe0, 0x05, 0xc1, 0xaa, 0xf7,
+ 0xcc, 0x8e, 0x80, 0x01, 0x71, 0x28, 0x05, 0xc1, 0xab, 0x03, 0xcc, 0x8e,
+ 0x80, 0x01, 0x71, 0x20, 0xd0, 0x58, 0x4f, 0x01, 0x4e, 0x91, 0xcf, 0x6c,
+ 0x4e, 0x01, 0x4e, 0x88, 0xca, 0xa3, 0x08, 0x0f, 0xaa, 0x79, 0xca, 0xa7,
+ 0xea, 0x0f, 0xcb, 0x18, 0xc5, 0xd9, 0xdc, 0x0f, 0xa6, 0x88, 0x97, 0x01,
+ 0x8d, 0x00, 0x89, 0x01, 0x89, 0x5b, 0x01, 0xab, 0x0f, 0x90, 0x01, 0x89,
+ 0x78, 0x8a, 0x01, 0x8d, 0xc8, 0x90, 0x01, 0x89, 0x61, 0x97, 0x01, 0x8d,
+ 0x19, 0x8a, 0x01, 0x8d, 0xc1, 0x99, 0x01, 0x8d, 0xe0, 0x99, 0x01, 0x8d,
+ 0xe8, 0x8b, 0x01, 0x8d, 0x10, 0x8a, 0x01, 0x88, 0x99, 0x8b, 0x01, 0x8d,
+ 0x09, 0x9b, 0x01, 0x8d, 0xd0, 0x8a, 0x01, 0x88, 0xa0, 0x8a, 0x01, 0x88,
+ 0xa8, 0x8b, 0x01, 0x88, 0xf3, 0x01, 0xab, 0x13, 0x97, 0x01, 0x89, 0x03,
+ 0x01, 0xab, 0x19, 0x90, 0x01, 0x89, 0x13, 0x01, 0xab, 0x1f, 0x8f, 0x01,
+ 0x8d, 0x81, 0x8a, 0x01, 0x8d, 0xf8, 0x97, 0x01, 0x89, 0x09, 0xcf, 0x35,
+ 0x23, 0x01, 0x89, 0x71, 0x91, 0x01, 0x8d, 0x31, 0x10, 0xc1, 0xab, 0x27,
+ 0x8f, 0x01, 0x8d, 0x89, 0x87, 0x01, 0x8d, 0xf0, 0x8a, 0x01, 0x88, 0xe9,
+ 0x8b, 0x01, 0x88, 0xf9, 0x90, 0x01, 0x89, 0x1b, 0x01, 0xab, 0x2f, 0x94,
+ 0x01, 0x89, 0x31, 0x87, 0x01, 0x8d, 0x20, 0x97, 0x01, 0x89, 0x49, 0x8a,
+ 0x01, 0x89, 0x69, 0x94, 0x01, 0x8d, 0x41, 0xc2, 0x1c, 0x5e, 0x01, 0x8d,
+ 0x53, 0x01, 0xab, 0x37, 0x8f, 0x01, 0x8d, 0x60, 0xc2, 0x1c, 0x5e, 0x01,
+ 0x8d, 0x58, 0xa1, 0x0f, 0xd8, 0x43, 0x01, 0xab, 0x3b, 0x9f, 0x0f, 0xd8,
+ 0x13, 0x01, 0xab, 0x46, 0xa2, 0x0f, 0xd8, 0x83, 0x01, 0xab, 0x5f, 0xa0,
+ 0x0f, 0xd8, 0x23, 0x01, 0xab, 0x63, 0xa3, 0x0f, 0xd8, 0xf8, 0xa2, 0x0f,
+ 0xd8, 0x9b, 0x01, 0xab, 0x74, 0xa1, 0x0f, 0xd8, 0x5b, 0x01, 0xab, 0x78,
+ 0xa3, 0x0f, 0xd9, 0x10, 0xa2, 0x0f, 0xd8, 0x8b, 0x01, 0xab, 0x83, 0xa0,
+ 0x0f, 0xd8, 0x2b, 0x01, 0xab, 0x87, 0xa3, 0x0f, 0xd9, 0x01, 0xa1, 0x0f,
+ 0xd8, 0x4a, 0x01, 0xab, 0x99, 0xa3, 0x0f, 0xd9, 0x68, 0xa3, 0x0f, 0xd9,
+ 0x31, 0xa2, 0x0f, 0xd8, 0xb2, 0x01, 0xab, 0xa0, 0x05, 0xc1, 0xab, 0xa4,
+ 0x15, 0xc1, 0xab, 0xcb, 0x16, 0xc1, 0xac, 0x0e, 0x06, 0xc1, 0xac, 0x2c,
+ 0x14, 0xc1, 0xac, 0x3f, 0x0e, 0xc1, 0xac, 0x51, 0xd6, 0x2f, 0x97, 0x01,
+ 0x3a, 0x99, 0x08, 0xc1, 0xac, 0x61, 0xc3, 0xec, 0x7e, 0x01, 0x38, 0x91,
+ 0x0f, 0xc1, 0xac, 0x69, 0x17, 0xc1, 0xac, 0x75, 0x0a, 0xc1, 0xac, 0x85,
+ 0x12, 0xc1, 0xac, 0x93, 0x43, 0x00, 0x29, 0xc1, 0xac, 0xa5, 0xc6, 0xd3,
+ 0x4c, 0x01, 0x4e, 0x99, 0xc7, 0xc9, 0x27, 0x01, 0x5e, 0x20, 0xd0, 0x20,
+ 0x86, 0x01, 0x3d, 0xb1, 0xd0, 0x01, 0x37, 0x01, 0x3d, 0xa9, 0xd0, 0x3d,
+ 0x06, 0x01, 0x3d, 0xa0, 0x85, 0x01, 0x09, 0x69, 0x9c, 0x01, 0x09, 0x41,
+ 0x94, 0x01, 0x08, 0xe1, 0x8b, 0x01, 0x08, 0x89, 0x8a, 0x01, 0x08, 0x60,
+ 0xca, 0xa5, 0x6a, 0x0f, 0xa5, 0xb9, 0xc9, 0xb2, 0x5b, 0x0f, 0xa5, 0xb1,
+ 0xcb, 0x9b, 0x9f, 0x0f, 0xa5, 0xa9, 0xc8, 0x79, 0x24, 0x0f, 0xa5, 0xa0,
+ 0x11, 0xc1, 0xac, 0xb1, 0x42, 0x00, 0xd0, 0x41, 0xac, 0xbb, 0x0f, 0xc1,
+ 0xac, 0xcb, 0xc3, 0x00, 0xb5, 0x00, 0xda, 0xd2, 0x01, 0xac, 0xda, 0x4a,
+ 0xa6, 0xe6, 0xc1, 0xac, 0xe0, 0x4b, 0x92, 0x99, 0xc1, 0xac, 0xec, 0x4a,
+ 0x57, 0x1f, 0xc1, 0xac, 0xf8, 0x06, 0x41, 0xad, 0x1c, 0x42, 0x07, 0x44,
+ 0xc1, 0xad, 0x36, 0xc4, 0xe4, 0x33, 0x00, 0xda, 0xf0, 0xc4, 0x24, 0x35,
+ 0x00, 0xda, 0xc9, 0xc5, 0x05, 0x1b, 0x00, 0xda, 0xc1, 0x15, 0xc1, 0xad,
+ 0x42, 0x08, 0xc1, 0xad, 0x4e, 0x16, 0xc1, 0xad, 0x5a, 0xc3, 0x05, 0x17,
+ 0x00, 0xda, 0x89, 0xc4, 0x16, 0x57, 0x00, 0xda, 0x80, 0x03, 0xc1, 0xad,
+ 0x66, 0xc9, 0xb7, 0x74, 0x00, 0xda, 0x51, 0xc8, 0xb8, 0x83, 0x00, 0xda,
+ 0x49, 0x07, 0xc1, 0xad, 0x81, 0x16, 0xc1, 0xad, 0x8d, 0x0d, 0xc1, 0xad,
+ 0x9a, 0xc2, 0x01, 0x0e, 0x00, 0xd9, 0x99, 0xc2, 0x0e, 0xe5, 0x00, 0xd9,
+ 0x93, 0x01, 0xad, 0xa7, 0xc2, 0x00, 0x4c, 0x00, 0xd9, 0x79, 0xc2, 0x00,
+ 0x96, 0x00, 0xd9, 0x73, 0x01, 0xad, 0xad, 0xc2, 0x00, 0x9a, 0x00, 0xd9,
+ 0x6b, 0x01, 0xad, 0xb6, 0xc2, 0x1a, 0x36, 0x00, 0xd9, 0x61, 0xc2, 0x00,
+ 0x3f, 0x00, 0xd9, 0x59, 0xc2, 0x02, 0x1d, 0x00, 0xd9, 0x4b, 0x01, 0xad,
+ 0xbf, 0xc2, 0x07, 0x44, 0x00, 0xd9, 0x3b, 0x01, 0xad, 0xc5, 0x10, 0xc1,
+ 0xad, 0xcb, 0xc2, 0x0c, 0x25, 0x00, 0xd9, 0x23, 0x01, 0xad, 0xde, 0xc2,
+ 0x26, 0x94, 0x00, 0xd8, 0xd3, 0x01, 0xad, 0xe4, 0xc2, 0x00, 0x2e, 0x00,
+ 0xd8, 0xc3, 0x01, 0xad, 0xea, 0xc2, 0x07, 0x69, 0x00, 0xd8, 0xab, 0x01,
+ 0xad, 0xf0, 0xc5, 0xd9, 0xc8, 0x00, 0xd8, 0x8b, 0x01, 0xad, 0xf6, 0xc5,
+ 0xdb, 0x08, 0x00, 0xd8, 0x4b, 0x01, 0xad, 0xfc, 0xc5, 0xdc, 0xca, 0x00,
+ 0xd8, 0x3a, 0x01, 0xae, 0x02, 0xc5, 0xda, 0xfe, 0x00, 0xda, 0x13, 0x01,
+ 0xae, 0x08, 0x16, 0xc1, 0xae, 0x0e, 0xc8, 0xbc, 0xdb, 0x00, 0xd9, 0xe3,
+ 0x01, 0xae, 0x1d, 0xc7, 0xc5, 0xbc, 0x00, 0xd9, 0xd3, 0x01, 0xae, 0x23,
+ 0xc4, 0xca, 0xfc, 0x00, 0xd9, 0xc3, 0x01, 0xae, 0x29, 0xc3, 0x9b, 0x38,
+ 0x00, 0xd9, 0xb2, 0x01, 0xae, 0x2f, 0xc7, 0xc5, 0x22, 0x00, 0xd9, 0xa1,
+ 0xc5, 0xdc, 0x8e, 0x00, 0xd8, 0x21, 0xc6, 0xd8, 0xb6, 0x00, 0xd8, 0x19,
+ 0xc5, 0xdf, 0x0e, 0x00, 0xd8, 0x11, 0x44, 0xe4, 0xcb, 0x41, 0xae, 0x35,
+ 0x43, 0x0b, 0x2b, 0xc1, 0xae, 0x41, 0x42, 0x00, 0x36, 0xc1, 0xae, 0x4d,
+ 0xc8, 0xa5, 0xa8, 0x0b, 0x57, 0x90, 0x8b, 0x0b, 0x57, 0x69, 0x87, 0x0b,
+ 0x57, 0x63, 0x01, 0xae, 0x59, 0x97, 0x0b, 0x57, 0x53, 0x01, 0xae, 0x63,
+ 0x91, 0x0b, 0x57, 0x43, 0x01, 0xae, 0x69, 0x83, 0x0b, 0x57, 0x39, 0xc2,
+ 0x00, 0x4c, 0x0b, 0x56, 0xdb, 0x01, 0xae, 0x6d, 0xc2, 0x07, 0x44, 0x0b,
+ 0x57, 0x29, 0x1b, 0xc1, 0xae, 0x73, 0xc2, 0x59, 0xe0, 0x0b, 0x57, 0x19,
+ 0xc2, 0x02, 0x1d, 0x0b, 0x57, 0x11, 0xc2, 0x01, 0x89, 0x0b, 0x57, 0x09,
+ 0xc2, 0x01, 0xa1, 0x0b, 0x56, 0xf9, 0x06, 0xc1, 0xae, 0x7f, 0x09, 0xc1,
+ 0xae, 0x89, 0xc2, 0x03, 0x7a, 0x0b, 0x56, 0xe1, 0xc4, 0xe7, 0x64, 0x0b,
+ 0x56, 0xd1, 0xc2, 0x03, 0x21, 0x0b, 0x56, 0xc9, 0x0d, 0xc1, 0xae, 0x95,
+ 0xc3, 0x04, 0xb0, 0x0b, 0x56, 0xa1, 0xc2, 0x01, 0xa7, 0x0b, 0x56, 0x99,
+ 0xc2, 0x00, 0xa7, 0x0b, 0x56, 0x90, 0x43, 0x22, 0xbe, 0xc1, 0xae, 0x9f,
+ 0x83, 0x05, 0x35, 0x59, 0x07, 0xc1, 0xae, 0xc3, 0x17, 0xc1, 0xae, 0xcd,
+ 0x8b, 0x05, 0x36, 0xe8, 0x83, 0x05, 0x35, 0x09, 0x97, 0x05, 0x35, 0x19,
+ 0xc3, 0xe0, 0xa5, 0x05, 0x35, 0xd1, 0x07, 0xc1, 0xae, 0xd7, 0x91, 0x05,
+ 0x36, 0xfb, 0x01, 0xae, 0xe5, 0x8b, 0x05, 0x37, 0x29, 0xc2, 0x07, 0x44,
+ 0x05, 0x37, 0x48, 0x07, 0xc1, 0xae, 0xf1, 0x0b, 0xc1, 0xae, 0xff, 0x97,
+ 0x05, 0x36, 0x61, 0xc2, 0x08, 0xc6, 0x05, 0x36, 0x88, 0x03, 0xc1, 0xaf,
+ 0x09, 0x8b, 0x05, 0x37, 0x21, 0x07, 0x41, 0xaf, 0x11, 0xc2, 0x18, 0x7a,
+ 0x05, 0x35, 0x41, 0xc3, 0x4a, 0x42, 0x05, 0x35, 0x89, 0x0c, 0xc1, 0xaf,
+ 0x19, 0x97, 0x05, 0x35, 0xeb, 0x01, 0xaf, 0x2b, 0xc3, 0x01, 0x0d, 0x05,
+ 0x36, 0x19, 0x16, 0xc1, 0xaf, 0x31, 0x8b, 0x05, 0x36, 0x79, 0x09, 0xc1,
+ 0xaf, 0x3d, 0x83, 0x05, 0x36, 0xd8, 0x83, 0x05, 0x35, 0x51, 0xc4, 0xe6,
+ 0x53, 0x05, 0x35, 0x71, 0x97, 0x05, 0x36, 0x69, 0x8b, 0x05, 0x36, 0xe1,
+ 0xc2, 0x8b, 0x5c, 0x05, 0x36, 0xf0, 0x07, 0xc1, 0xaf, 0x4d, 0x97, 0x05,
+ 0x35, 0xa9, 0x8b, 0x05, 0x36, 0x71, 0x04, 0xc1, 0xaf, 0x57, 0x83, 0x05,
+ 0x37, 0x19, 0x91, 0x05, 0x37, 0x30, 0xc2, 0x6f, 0x95, 0x05, 0x35, 0xa1,
+ 0x0a, 0xc1, 0xaf, 0x63, 0x8b, 0x05, 0x35, 0xb9, 0xc3, 0xdc, 0x59, 0x05,
+ 0x35, 0xc9, 0xc4, 0xc5, 0x40, 0x05, 0x37, 0x60, 0xc2, 0x8b, 0x5c, 0x05,
+ 0x35, 0xf9, 0xc2, 0x89, 0x93, 0x05, 0x36, 0x09, 0x83, 0x05, 0x36, 0x10,
+ 0xc2, 0x0e, 0x78, 0x05, 0x36, 0x49, 0x83, 0x05, 0x36, 0xd0, 0xc2, 0x01,
+ 0xe6, 0x05, 0x36, 0x59, 0x97, 0x05, 0x36, 0xc1, 0xc2, 0x00, 0xeb, 0x05,
+ 0x36, 0xc9, 0xc5, 0xe3, 0xc8, 0x05, 0x37, 0x68, 0x4c, 0x8e, 0x2c, 0xc1,
+ 0xaf, 0x77, 0xc2, 0x00, 0x3f, 0x05, 0x37, 0xa8, 0xcb, 0x96, 0x2a, 0x0f,
+ 0xac, 0x11, 0xda, 0x19, 0xe8, 0x0f, 0xa8, 0xc8, 0xc4, 0x45, 0xaa, 0x00,
+ 0x00, 0x41, 0x5a, 0x1a, 0xec, 0x41, 0xaf, 0x83, 0x4c, 0x8a, 0x60, 0xc1,
+ 0xaf, 0x8f, 0xc9, 0xae, 0xf2, 0x00, 0xdf, 0x30, 0xc7, 0xcb, 0x73, 0x00,
+ 0xdf, 0x99, 0xc5, 0xc4, 0x9f, 0x00, 0xdf, 0x90, 0x8a, 0x00, 0xdf, 0x89,
+ 0xc2, 0x00, 0x56, 0x00, 0xdf, 0x80, 0x97, 0x00, 0xdf, 0x73, 0x01, 0xaf,
+ 0x9f, 0x45, 0xce, 0x1a, 0xc1, 0xaf, 0xa5, 0x91, 0x00, 0xdf, 0x61, 0x8b,
+ 0x00, 0xdf, 0x51, 0x87, 0x00, 0xdf, 0x3b, 0x01, 0xaf, 0xad, 0xc8, 0xbd,
+ 0x83, 0x00, 0xdf, 0x40, 0x97, 0x00, 0xdf, 0x29, 0x8b, 0x00, 0xdf, 0x21,
+ 0x0f, 0xc1, 0xaf, 0xb1, 0x10, 0xc1, 0xaf, 0xbe, 0xc2, 0x00, 0x2e, 0x00,
+ 0xdf, 0x09, 0x15, 0xc1, 0xaf, 0xda, 0xc2, 0x00, 0x96, 0x00, 0xde, 0xf1,
+ 0xc2, 0x1a, 0x36, 0x00, 0xde, 0xd9, 0xc2, 0x00, 0x9a, 0x00, 0xde, 0x91,
+ 0xc2, 0x0c, 0x25, 0x00, 0xde, 0x89, 0xc2, 0x26, 0x94, 0x00, 0xde, 0x81,
+ 0xc2, 0x07, 0x69, 0x00, 0xde, 0x71, 0xc2, 0x07, 0x44, 0x00, 0xde, 0x3b,
+ 0x01, 0xaf, 0xea, 0xc2, 0x00, 0x4c, 0x00, 0xde, 0x59, 0xc7, 0xce, 0x1a,
+ 0x00, 0xde, 0x31, 0xc2, 0x02, 0x1d, 0x00, 0xde, 0x29, 0xc2, 0x01, 0x0e,
+ 0x00, 0xde, 0x11, 0x83, 0x00, 0xde, 0x00, 0x0d, 0xc1, 0xaf, 0xf0, 0xc2,
+ 0x01, 0x0e, 0x00, 0x4d, 0xc9, 0x15, 0xc1, 0xaf, 0xfd, 0xc2, 0x00, 0x96,
+ 0x00, 0x4d, 0x91, 0x14, 0xc1, 0xb0, 0x0d, 0x1b, 0xc1, 0xb0, 0x20, 0xc2,
+ 0x00, 0x3f, 0x00, 0x4d, 0x71, 0x04, 0xc1, 0xb0, 0x2a, 0x12, 0xc1, 0xb0,
+ 0x34, 0x10, 0xc1, 0xb0, 0x3e, 0x06, 0xc1, 0xb0, 0x54, 0x16, 0xc1, 0xb0,
+ 0x62, 0x0c, 0xc1, 0xb0, 0x70, 0x05, 0xc1, 0xb0, 0x7a, 0x09, 0xc1, 0xb0,
+ 0x84, 0x83, 0x00, 0x4c, 0x2b, 0x01, 0xb0, 0x8e, 0x91, 0x00, 0x4c, 0x99,
+ 0x8b, 0x00, 0x4c, 0x3b, 0x01, 0xb0, 0x9a, 0x97, 0x00, 0x4c, 0x4b, 0x01,
+ 0xb0, 0x9e, 0x18, 0xc1, 0xb0, 0xa2, 0x87, 0x00, 0x4c, 0x78, 0x44, 0x02,
+ 0xcc, 0xc1, 0xb0, 0xae, 0xca, 0xa6, 0xb4, 0x00, 0x4f, 0xf0, 0x03, 0xc1,
+ 0xb0, 0xc4, 0x91, 0x00, 0x4e, 0x59, 0x87, 0x00, 0x4e, 0x39, 0x48, 0xb7,
+ 0xd7, 0xc1, 0xb0, 0xd0, 0x97, 0x00, 0x4e, 0x0b, 0x01, 0xb0, 0xde, 0x8b,
+ 0x00, 0x4d, 0xfa, 0x01, 0xb0, 0xe2, 0xcd, 0x74, 0xe7, 0x00, 0x4e, 0xb9,
+ 0xc3, 0x81, 0xeb, 0x00, 0x4c, 0x01, 0xd0, 0x56, 0x10, 0x00, 0x4f, 0xe8,
+ 0xc4, 0x16, 0x57, 0x00, 0x4f, 0x31, 0xc3, 0x05, 0x17, 0x00, 0x4f, 0x39,
+ 0x16, 0xc1, 0xb0, 0xe6, 0x08, 0xc1, 0xb0, 0xf2, 0x15, 0xc1, 0xb0, 0xfe,
+ 0xc5, 0x05, 0x1b, 0x00, 0x4f, 0x71, 0xc4, 0x24, 0x35, 0x00, 0x4f, 0x78,
+ 0xc4, 0x00, 0xfa, 0x00, 0x4f, 0x91, 0xc4, 0x02, 0xcb, 0x00, 0x4f, 0x98,
+ 0x4a, 0x7b, 0x34, 0xc1, 0xb1, 0x0a, 0xd3, 0x42, 0xdc, 0x00, 0x4f, 0xc8,
+ 0xe0, 0x03, 0xc7, 0x01, 0x5a, 0xf0, 0xc2, 0x08, 0xc6, 0x00, 0xd0, 0xd9,
+ 0x91, 0x00, 0xd0, 0xd1, 0x87, 0x00, 0xd0, 0xc9, 0x97, 0x00, 0xd0, 0xc1,
+ 0x8b, 0x00, 0xd0, 0xb8, 0xc2, 0x01, 0x0e, 0x00, 0xd0, 0xb1, 0x83, 0x00,
+ 0xd0, 0xa9, 0xc2, 0x0e, 0xe5, 0x00, 0xd0, 0xa1, 0xc2, 0x06, 0x8c, 0x00,
+ 0xd0, 0x99, 0xc2, 0x00, 0x96, 0x00, 0xd0, 0x91, 0xc2, 0x00, 0x9a, 0x00,
+ 0xd0, 0x89, 0xc2, 0x1a, 0x36, 0x00, 0xd0, 0x81, 0x10, 0xc1, 0xb1, 0x1d,
+ 0xc2, 0x26, 0x94, 0x00, 0xd0, 0x69, 0xc2, 0x00, 0x2e, 0x00, 0xd0, 0x61,
+ 0xc2, 0x0c, 0x25, 0x00, 0xd0, 0x49, 0xc2, 0x00, 0x44, 0x00, 0xd0, 0x41,
+ 0x0f, 0xc1, 0xb1, 0x2f, 0xc2, 0x02, 0x1d, 0x00, 0xd0, 0x29, 0xc2, 0x07,
+ 0x44, 0x00, 0xd0, 0x21, 0xc2, 0x07, 0x69, 0x00, 0xd0, 0x09, 0xc2, 0x06,
+ 0x6b, 0x00, 0xd0, 0x00, 0x83, 0x00, 0xba, 0x41, 0xc2, 0x07, 0x69, 0x00,
+ 0xba, 0x28, 0x43, 0x05, 0xe3, 0xc1, 0xb1, 0x39, 0xc5, 0xb5, 0x92, 0x01,
+ 0x40, 0x00, 0xc6, 0x5b, 0xd9, 0x08, 0x83, 0xf9, 0xc3, 0x05, 0x17, 0x08,
+ 0x82, 0x93, 0x01, 0xb1, 0x7e, 0xc4, 0x24, 0x35, 0x08, 0x82, 0xd3, 0x01,
+ 0xb1, 0x82, 0xc5, 0x05, 0x1b, 0x08, 0x82, 0xcb, 0x01, 0xb1, 0x88, 0x15,
+ 0xc1, 0xb1, 0x8c, 0x08, 0xc1, 0xb1, 0x9e, 0x16, 0x41, 0xb1, 0xa6, 0x91,
+ 0x08, 0x80, 0x8b, 0x01, 0xb1, 0xb4, 0x0e, 0xc1, 0xb1, 0xba, 0xc2, 0x01,
+ 0x0e, 0x08, 0x81, 0x99, 0xc2, 0x00, 0x9a, 0x08, 0x81, 0x69, 0xc2, 0x1a,
+ 0x36, 0x08, 0x81, 0x61, 0xc2, 0x00, 0x3f, 0x08, 0x81, 0x59, 0x04, 0xc1,
+ 0xb1, 0xc4, 0x12, 0xc1, 0xb1, 0xce, 0x10, 0xc1, 0xb1, 0xd8, 0x06, 0xc1,
+ 0xb1, 0xee, 0x16, 0xc1, 0xb1, 0xfc, 0x0c, 0xc1, 0xb2, 0x0a, 0x05, 0xc1,
+ 0xb2, 0x14, 0x09, 0xc1, 0xb2, 0x1e, 0x0d, 0xc1, 0xb2, 0x28, 0x83, 0x08,
+ 0x80, 0x2b, 0x01, 0xb2, 0x32, 0x87, 0x08, 0x80, 0x79, 0x18, 0xc1, 0xb2,
+ 0x3e, 0x97, 0x08, 0x80, 0x4b, 0x01, 0xb2, 0x48, 0x8b, 0x08, 0x80, 0x3b,
+ 0x01, 0xb2, 0x4c, 0x15, 0x41, 0xb2, 0x50, 0x4a, 0x6f, 0x72, 0xc1, 0xb2,
+ 0x60, 0xc5, 0x21, 0x27, 0x08, 0x82, 0x30, 0xd0, 0x5a, 0x0f, 0x08, 0x83,
+ 0x81, 0xcb, 0x9c, 0xb2, 0x08, 0x80, 0x21, 0xcb, 0x93, 0xc2, 0x08, 0x80,
+ 0x19, 0xcb, 0x21, 0x1a, 0x08, 0x80, 0x01, 0xc8, 0x10, 0xab, 0x08, 0x80,
+ 0x09, 0xc7, 0x45, 0xcd, 0x08, 0x80, 0x10, 0x45, 0x08, 0xd8, 0xc1, 0xb2,
+ 0x89, 0xcb, 0x91, 0xff, 0x08, 0x82, 0x41, 0xc4, 0x1c, 0xb3, 0x08, 0x82,
+ 0x38, 0x0e, 0xc1, 0xb2, 0xad, 0xcc, 0x84, 0x18, 0x08, 0x82, 0x61, 0x42,
+ 0x00, 0x68, 0x41, 0xb2, 0xb9, 0x42, 0x13, 0xf3, 0xc1, 0xb2, 0xc3, 0x4a,
+ 0xa1, 0x64, 0x41, 0xb2, 0xcf, 0xc6, 0x2c, 0x81, 0x0e, 0x86, 0xc9, 0xc6,
+ 0xd4, 0x54, 0x0e, 0x86, 0xc0, 0x00, 0x41, 0xb2, 0xdb, 0x00, 0xc1, 0xb2,
+ 0xe7, 0xc2, 0x00, 0x44, 0x0e, 0x80, 0x82, 0x01, 0xb2, 0xf3, 0xc5, 0x5c,
+ 0x8a, 0x0e, 0x84, 0x49, 0xc6, 0xb5, 0x13, 0x0e, 0x82, 0x51, 0xc6, 0xd0,
+ 0xca, 0x0e, 0x81, 0xd2, 0x01, 0xb2, 0xf7, 0x44, 0xe5, 0x67, 0xc1, 0xb2,
+ 0xfd, 0xc6, 0xd0, 0xfa, 0x0e, 0x80, 0x60, 0x43, 0x0e, 0x35, 0xc1, 0xb3,
+ 0x05, 0xc5, 0xdf, 0xa9, 0x0e, 0x80, 0x38, 0x46, 0xd7, 0x48, 0xc1, 0xb3,
+ 0x11, 0x42, 0x13, 0xf3, 0x41, 0xb3, 0x3b, 0x11, 0xc1, 0xb3, 0x45, 0xc2,
+ 0x02, 0xb5, 0x0e, 0x84, 0x29, 0x45, 0xdc, 0xd9, 0x41, 0xb3, 0x57, 0x45,
+ 0xda, 0x81, 0xc1, 0xb3, 0x63, 0x44, 0xd5, 0x6e, 0xc1, 0xb3, 0x6f, 0x42,
+ 0x01, 0x03, 0xc1, 0xb3, 0x79, 0x43, 0x0e, 0xa7, 0x41, 0xb3, 0x85, 0x46,
+ 0xd6, 0x58, 0xc1, 0xb3, 0x8f, 0xca, 0x9d, 0x68, 0x0e, 0x81, 0x40, 0xc4,
+ 0x19, 0x8f, 0x0e, 0x87, 0x41, 0xc5, 0xdc, 0x2a, 0x0e, 0x83, 0xf3, 0x01,
+ 0xb3, 0x9b, 0xca, 0xa4, 0x52, 0x0e, 0x82, 0x20, 0xc6, 0xd9, 0x7c, 0x0e,
+ 0x87, 0x13, 0x01, 0xb3, 0xa1, 0xc7, 0xcd, 0xd4, 0x0e, 0x86, 0xf2, 0x01,
+ 0xb3, 0xa5, 0xc4, 0x7f, 0xdc, 0x0e, 0x83, 0x48, 0xc3, 0x07, 0x49, 0x0e,
+ 0x83, 0x33, 0x01, 0xb3, 0xa9, 0x10, 0x41, 0xb3, 0xaf, 0xca, 0x9f, 0xd4,
+ 0x0e, 0x87, 0x39, 0x09, 0xc1, 0xb3, 0xbb, 0x03, 0xc1, 0xb3, 0xca, 0x45,
+ 0x1c, 0xe7, 0xc1, 0xb3, 0xd6, 0xc3, 0x20, 0x37, 0x0e, 0x84, 0x32, 0x01,
+ 0xb3, 0xec, 0x44, 0x1c, 0xe8, 0xc1, 0xb3, 0xf2, 0x42, 0x00, 0xf8, 0x41,
+ 0xb4, 0x0a, 0x11, 0xc1, 0xb4, 0x16, 0xc4, 0x7d, 0xa4, 0x0e, 0x82, 0x80,
+ 0xd4, 0x3b, 0xc2, 0x0e, 0x86, 0x61, 0xd6, 0x2c, 0x7f, 0x0e, 0x86, 0x59,
+ 0x10, 0xc1, 0xb4, 0x25, 0x48, 0x1d, 0x14, 0xc1, 0xb4, 0x31, 0x4f, 0x69,
+ 0x33, 0xc1, 0xb4, 0x3d, 0x4a, 0xa9, 0x84, 0xc1, 0xb4, 0x49, 0xc8, 0xa1,
+ 0xd4, 0x0e, 0x81, 0xa2, 0x01, 0xb4, 0x65, 0xc8, 0xb8, 0x6b, 0x0e, 0x85,
+ 0x81, 0xca, 0xa8, 0xee, 0x0e, 0x85, 0x79, 0xcb, 0x8f, 0xd9, 0x0e, 0x85,
+ 0x70, 0xc6, 0xd7, 0x9c, 0x0e, 0x86, 0x51, 0xc6, 0xd7, 0x66, 0x0e, 0x86,
+ 0x49, 0xc5, 0xd9, 0xbe, 0x0e, 0x86, 0x40, 0xc3, 0x73, 0xe8, 0x0e, 0x83,
+ 0x39, 0xc8, 0xa1, 0xd4, 0x0e, 0x81, 0xd8, 0x8b, 0x0e, 0x82, 0xb1, 0xc2,
+ 0x00, 0xe5, 0x0e, 0x80, 0xc0, 0x08, 0xc1, 0xb4, 0x6b, 0xc7, 0xcb, 0xc0,
+ 0x0e, 0x84, 0xc0, 0xd5, 0x34, 0x60, 0x0e, 0x85, 0x61, 0x43, 0x00, 0xf7,
+ 0x41, 0xb4, 0x77, 0xd4, 0x3f, 0x0a, 0x0e, 0x85, 0xb1, 0xc7, 0xc4, 0xd5,
+ 0x0e, 0x83, 0xd8, 0xcd, 0x7a, 0xa5, 0x0e, 0x83, 0xa1, 0xcb, 0x91, 0x02,
+ 0x0e, 0x83, 0x00, 0x12, 0xc1, 0xb4, 0x83, 0xcb, 0x9a, 0xb8, 0x0e, 0x85,
+ 0x89, 0xcd, 0x7a, 0x4a, 0x0e, 0x85, 0x51, 0x16, 0xc1, 0xb4, 0x8f, 0x45,
+ 0xe3, 0x82, 0xc1, 0xb4, 0x9b, 0xce, 0x6e, 0xfe, 0x0e, 0x85, 0x20, 0x0b,
+ 0xc1, 0xb4, 0xa7, 0x45, 0xb1, 0xc5, 0x41, 0xb4, 0xb7, 0xc6, 0xd5, 0xd4,
+ 0x0e, 0x84, 0x41, 0xc5, 0x13, 0x28, 0x0e, 0x81, 0x89, 0xc4, 0xb1, 0xa3,
+ 0x0e, 0x80, 0x78, 0x07, 0xc1, 0xb4, 0xcd, 0xc3, 0x01, 0xfa, 0x0e, 0x80,
+ 0xa0, 0x45, 0xdc, 0x9d, 0xc1, 0xb4, 0xdc, 0xc3, 0xc1, 0xdd, 0x0e, 0x81,
+ 0x70, 0xc3, 0x73, 0xe8, 0x0e, 0x83, 0xa9, 0xc8, 0xa1, 0xd4, 0x0e, 0x81,
+ 0x60, 0x00, 0xc1, 0xb4, 0xf2, 0xca, 0xa1, 0xd2, 0x0e, 0x81, 0x00, 0xc3,
+ 0x73, 0xe8, 0x0e, 0x82, 0x39, 0xc8, 0xa1, 0xd4, 0x0e, 0x80, 0xa8, 0x45,
+ 0xbe, 0x2d, 0xc1, 0xb5, 0x04, 0x0e, 0x41, 0xb5, 0x1d, 0x42, 0x06, 0x6e,
+ 0xc1, 0xb5, 0x27, 0xc5, 0xdf, 0x1d, 0x0e, 0x80, 0xf0, 0xc3, 0x73, 0xe8,
+ 0x0e, 0x82, 0xc9, 0xc8, 0xa1, 0xd4, 0x0e, 0x81, 0x30, 0xc6, 0xd8, 0x02,
+ 0x0e, 0x81, 0xc3, 0x01, 0xb5, 0x36, 0x43, 0x90, 0x61, 0xc1, 0xb5, 0x3c,
+ 0xc9, 0x90, 0xcd, 0x0e, 0x80, 0x10, 0x00, 0xc1, 0xb5, 0x46, 0xca, 0xa1,
+ 0xd2, 0x0e, 0x81, 0x08, 0xc2, 0x0d, 0x8b, 0x08, 0xe3, 0x48, 0xc2, 0x0d,
+ 0x8b, 0x08, 0xe3, 0x40, 0xc3, 0x41, 0xca, 0x08, 0xe3, 0x39, 0xc2, 0x00,
+ 0x29, 0x08, 0xe2, 0xf0, 0xc3, 0x0d, 0x8a, 0x08, 0xe3, 0x31, 0xc2, 0x00,
+ 0xd3, 0x08, 0xe2, 0xe8, 0xc4, 0x0d, 0x89, 0x08, 0xe3, 0x29, 0xc3, 0x05,
+ 0xdf, 0x08, 0xe2, 0xe0, 0xc4, 0x15, 0xa9, 0x08, 0xe3, 0x21, 0x91, 0x08,
+ 0xe2, 0xd8, 0xc4, 0x15, 0xa7, 0x08, 0xe2, 0xb9, 0xc2, 0x22, 0x45, 0x08,
+ 0xe2, 0xb0, 0xc3, 0x0d, 0x8f, 0x08, 0xe2, 0xa9, 0xc3, 0x08, 0xde, 0x08,
+ 0xe2, 0xa0, 0xc4, 0x05, 0xde, 0x08, 0xe2, 0x99, 0xc2, 0x0a, 0x20, 0x08,
+ 0xe2, 0x90, 0x94, 0x08, 0xe1, 0xa8, 0x8e, 0x08, 0xe0, 0x41, 0x94, 0x08,
+ 0xe0, 0x32, 0x01, 0xb5, 0x58, 0xc2, 0x01, 0x0e, 0x08, 0xe0, 0xd9, 0x83,
+ 0x08, 0xe0, 0xd0, 0xc2, 0x01, 0x0e, 0x08, 0xe0, 0xc9, 0x83, 0x08, 0xe0,
+ 0xc0, 0x46, 0x00, 0x52, 0xc1, 0xb5, 0x5c, 0x04, 0xc1, 0xb5, 0x68, 0xd5,
+ 0x37, 0xbd, 0x01, 0x2e, 0xf9, 0xc6, 0xd5, 0x08, 0x0f, 0xac, 0x69, 0x12,
+ 0xc1, 0xb5, 0x74, 0xcc, 0x83, 0xac, 0x0f, 0xac, 0x59, 0xe0, 0x00, 0xa7,
+ 0x01, 0x49, 0xf8, 0x46, 0x00, 0x52, 0xc1, 0xb5, 0x80, 0xcf, 0x64, 0xfb,
+ 0x01, 0x3e, 0x99, 0x15, 0xc1, 0xb5, 0x8c, 0xda, 0x1b, 0xd6, 0x01, 0x3a,
+ 0x79, 0xc6, 0xd8, 0x5c, 0x01, 0x38, 0x71, 0xd5, 0x37, 0xbd, 0x01, 0x2e,
+ 0xf1, 0x4f, 0x65, 0xcd, 0x41, 0xb5, 0x98, 0xdb, 0x15, 0x64, 0x0f, 0xdb,
+ 0x79, 0x45, 0x05, 0xde, 0x41, 0xb5, 0xa4, 0xc6, 0x03, 0xfa, 0x01, 0x2f,
+ 0x09, 0xd4, 0x39, 0x42, 0x01, 0x2e, 0xd9, 0xc5, 0x01, 0x22, 0x01, 0x2c,
+ 0x21, 0xcc, 0x04, 0x1b, 0x0f, 0xdc, 0x78, 0xcd, 0x15, 0x72, 0x01, 0x2c,
+ 0x11, 0xcc, 0x05, 0x1b, 0x01, 0x2c, 0x08, 0xc6, 0xd8, 0x98, 0x0f, 0xd5,
+ 0x59, 0xd0, 0x50, 0xd1, 0x0f, 0xa8, 0x28, 0xc9, 0x35, 0x23, 0x01, 0x72,
+ 0x40, 0xce, 0x76, 0x60, 0x01, 0x3f, 0xf9, 0xcc, 0x82, 0x80, 0x01, 0x3f,
+ 0xcb, 0x01, 0xb5, 0xb0, 0xc5, 0x00, 0x62, 0x01, 0x3f, 0xb2, 0x01, 0xb5,
+ 0xb6, 0xcc, 0x82, 0x80, 0x01, 0x3f, 0xc3, 0x01, 0xb5, 0xbc, 0xc5, 0x00,
+ 0x62, 0x01, 0x3f, 0xab, 0x01, 0xb5, 0xc2, 0xce, 0x76, 0x60, 0x01, 0x59,
+ 0x98, 0x46, 0x01, 0x17, 0xc1, 0xb5, 0xc8, 0xc4, 0x32, 0x64, 0x01, 0x3e,
+ 0xf0, 0xe0, 0x04, 0xa7, 0x01, 0x57, 0x30, 0x45, 0x01, 0xac, 0xc1, 0xb5,
+ 0xd4, 0xd7, 0x2b, 0x03, 0x01, 0x52, 0xc8, 0xcf, 0x64, 0xec, 0x01, 0x52,
+ 0xe1, 0xcb, 0x96, 0xc4, 0x01, 0x52, 0xd1, 0x42, 0x00, 0x68, 0xc1, 0xb5,
+ 0xe6, 0xc8, 0x50, 0x0d, 0x01, 0x52, 0xf8, 0x03, 0xc1, 0xb5, 0xf2, 0x42,
+ 0x01, 0x0e, 0xc1, 0xb5, 0xfa, 0x14, 0xc1, 0xb6, 0x06, 0xc8, 0x5e, 0x57,
+ 0x01, 0x3e, 0xe1, 0x11, 0xc1, 0xb6, 0x12, 0xd6, 0x2d, 0xf5, 0x01, 0x3e,
+ 0x29, 0x05, 0xc1, 0xb6, 0x1e, 0x16, 0xc1, 0xb6, 0x39, 0x08, 0xc1, 0xb6,
+ 0x4d, 0x15, 0xc1, 0xb6, 0x57, 0x4a, 0x02, 0x5b, 0xc1, 0xb6, 0x74, 0xcb,
+ 0x1c, 0xe0, 0x00, 0x01, 0x43, 0x01, 0xb6, 0x80, 0xe0, 0x07, 0x47, 0x01,
+ 0x16, 0x49, 0x42, 0x00, 0x68, 0xc1, 0xb6, 0x84, 0x19, 0xc1, 0xb6, 0x90,
+ 0x04, 0xc1, 0xb6, 0xa2, 0x0e, 0x41, 0xb6, 0xae, 0x19, 0xc1, 0xb6, 0xba,
+ 0x16, 0xc1, 0xb6, 0xc9, 0xd0, 0x5c, 0xef, 0x0f, 0xc1, 0xe1, 0x44, 0x00,
+ 0x62, 0xc1, 0xb6, 0xd9, 0x14, 0xc1, 0xb6, 0xe6, 0xd1, 0x51, 0xbe, 0x01,
+ 0x0f, 0xe9, 0x06, 0xc1, 0xb6, 0xf2, 0x15, 0xc1, 0xb6, 0xfe, 0x0a, 0xc1,
+ 0xb7, 0x0a, 0x12, 0xc1, 0xb7, 0x14, 0x04, 0xc1, 0xb7, 0x20, 0xcf, 0x62,
+ 0x1c, 0x01, 0x5a, 0x29, 0x08, 0xc1, 0xb7, 0x32, 0xd7, 0x27, 0x3d, 0x0f,
+ 0xc5, 0x20, 0xca, 0x50, 0xa4, 0x00, 0x7e, 0xc0, 0xcc, 0x06, 0xfb, 0x01,
+ 0x13, 0xb1, 0x43, 0x00, 0x92, 0xc1, 0xb7, 0x3e, 0xd0, 0x59, 0xbf, 0x01,
+ 0x53, 0xeb, 0x01, 0xb7, 0x4a, 0xcb, 0x19, 0xd2, 0x01, 0x54, 0x28, 0xcf,
+ 0x09, 0x58, 0x01, 0x4b, 0xb1, 0x44, 0x02, 0x91, 0xc1, 0xb7, 0x50, 0x15,
+ 0xc1, 0xb7, 0x56, 0x44, 0x06, 0xfb, 0x41, 0xb7, 0x62, 0xd8, 0x23, 0x24,
+ 0x01, 0x54, 0x39, 0xcf, 0x63, 0x75, 0x01, 0x54, 0x48, 0xc2, 0x0c, 0x25,
+ 0x00, 0xe2, 0x79, 0xc2, 0x05, 0x5c, 0x00, 0xe0, 0xc9, 0x83, 0x00, 0xe0,
+ 0x60, 0x16, 0xc1, 0xb7, 0x68, 0x15, 0xc1, 0xb7, 0x72, 0xc2, 0x01, 0x0e,
+ 0x00, 0xe0, 0x59, 0x83, 0x00, 0xe0, 0x50, 0xc2, 0x01, 0x0e, 0x00, 0xe1,
+ 0x09, 0x83, 0x00, 0xe1, 0x00, 0xc2, 0x00, 0x96, 0x00, 0xe0, 0xf1, 0x83,
+ 0x00, 0xe0, 0xe8, 0xc2, 0x00, 0x96, 0x00, 0xe0, 0xb1, 0x83, 0x00, 0xe0,
+ 0xa8, 0xc2, 0x00, 0x96, 0x00, 0xe0, 0xa1, 0x83, 0x00, 0xe0, 0x98, 0xc2,
+ 0x00, 0x96, 0x00, 0xe0, 0x91, 0x83, 0x00, 0xe0, 0x88, 0xc2, 0x01, 0x0e,
+ 0x00, 0xe0, 0x81, 0xc2, 0x00, 0x96, 0x00, 0xe0, 0x79, 0x83, 0x00, 0xe0,
+ 0x70, 0x83, 0x00, 0xe0, 0x69, 0xc2, 0x1a, 0x36, 0x00, 0xe0, 0x49, 0xc2,
+ 0x07, 0x69, 0x00, 0xe0, 0x28, 0xc2, 0x01, 0x0e, 0x00, 0xe0, 0x39, 0x83,
+ 0x00, 0xe0, 0x30, 0xc2, 0x00, 0x96, 0x00, 0xe0, 0x21, 0x83, 0x00, 0xe0,
+ 0x18, 0xc2, 0x01, 0x0e, 0x00, 0xe0, 0x11, 0xc2, 0x00, 0x96, 0x00, 0xe0,
+ 0x09, 0x83, 0x00, 0xe0, 0x00, 0xc4, 0x15, 0xa7, 0x00, 0xe2, 0x39, 0xc2,
+ 0x22, 0x45, 0x00, 0xe2, 0x30, 0xc3, 0x0d, 0x8f, 0x00, 0xe2, 0x29, 0xc3,
+ 0x08, 0xde, 0x00, 0xe2, 0x20, 0xc4, 0x05, 0xde, 0x00, 0xe2, 0x19, 0xc2,
+ 0x0a, 0x20, 0x00, 0xe2, 0x10, 0xc5, 0xe2, 0xfb, 0x00, 0xe1, 0xfb, 0x01,
+ 0xb7, 0x7c, 0xc5, 0x47, 0xdd, 0x00, 0xe1, 0xd8, 0xc5, 0x33, 0x1a, 0x00,
+ 0xe1, 0xb9, 0xc3, 0x05, 0x2a, 0x00, 0xe1, 0xb0, 0xc2, 0x00, 0x9a, 0x00,
+ 0xe1, 0x29, 0xc2, 0x1a, 0x36, 0x00, 0xe1, 0x20, 0xc3, 0x00, 0x55, 0x00,
+ 0xe1, 0xa8, 0xc6, 0xd2, 0x3e, 0x00, 0xe1, 0xa0, 0x97, 0x00, 0xe1, 0x58,
+ 0x91, 0x00, 0xe1, 0x48, 0x15, 0xc1, 0xb7, 0x82, 0xcc, 0x1b, 0xe4, 0x0f,
+ 0xbc, 0x71, 0x14, 0xc1, 0xb7, 0x94, 0x44, 0x00, 0xcd, 0xc1, 0xb7, 0xa0,
+ 0xcc, 0x02, 0x5b, 0x01, 0x3a, 0xc1, 0xca, 0xa1, 0x3c, 0x0f, 0xaf, 0xc1,
+ 0x08, 0xc1, 0xb7, 0xa6, 0xcb, 0x5e, 0x74, 0x0f, 0xbd, 0x11, 0xd5, 0x35,
+ 0x5c, 0x0f, 0xbd, 0xd9, 0x16, 0xc1, 0xb7, 0xb2, 0xca, 0xa0, 0xf6, 0x0f,
+ 0xd3, 0xa8, 0xc5, 0xda, 0xea, 0x0f, 0xaf, 0x92, 0x01, 0xb7, 0xbe, 0xc2,
+ 0x01, 0x0e, 0x08, 0xfd, 0x81, 0x83, 0x05, 0x27, 0x60, 0x83, 0x05, 0x26,
+ 0x89, 0xc2, 0x01, 0x0e, 0x05, 0x26, 0x90, 0x83, 0x05, 0x26, 0x99, 0xc2,
+ 0x05, 0x5c, 0x05, 0x26, 0xe0, 0x83, 0x05, 0x26, 0xa1, 0xc2, 0x01, 0x0e,
+ 0x05, 0x26, 0xa9, 0x15, 0xc1, 0xb7, 0xc4, 0x44, 0x05, 0x17, 0x41, 0xb7,
+ 0xce, 0x83, 0x05, 0x26, 0xb1, 0xc2, 0x01, 0x0e, 0x05, 0x27, 0x68, 0x83,
+ 0x05, 0x26, 0xb9, 0xc2, 0x01, 0x0e, 0x05, 0x26, 0xc0, 0x83, 0x05, 0x26,
+ 0xd1, 0xc2, 0x01, 0x0e, 0x05, 0x26, 0xd8, 0x83, 0x05, 0x27, 0x01, 0xc2,
+ 0x07, 0x69, 0x05, 0x27, 0x28, 0x83, 0x05, 0x27, 0x11, 0xc2, 0x01, 0x0e,
+ 0x05, 0x27, 0x58, 0xc2, 0x01, 0x0e, 0x05, 0x27, 0x19, 0x83, 0x05, 0x27,
+ 0x20, 0x83, 0x05, 0x27, 0x31, 0xc2, 0x01, 0x0e, 0x05, 0x27, 0x40, 0x87,
+ 0x05, 0x27, 0x78, 0x97, 0x05, 0x27, 0x88, 0x87, 0x05, 0x27, 0xb8, 0x87,
+ 0x05, 0x27, 0xa9, 0x8a, 0x05, 0x27, 0xb0, 0xc9, 0x1b, 0xac, 0x01, 0x01,
+ 0x41, 0xca, 0x37, 0xb3, 0x00, 0x00, 0x5b, 0x01, 0xb7, 0xda, 0xc4, 0x1b,
+ 0xa7, 0x00, 0x00, 0x51, 0x4c, 0x8b, 0xa4, 0x41, 0xb7, 0xe0, 0x48, 0xb9,
+ 0x5b, 0xc1, 0xb7, 0xec, 0x42, 0x02, 0x20, 0x41, 0xb8, 0x14, 0xc4, 0x24,
+ 0x35, 0x00, 0xca, 0x79, 0xc5, 0x05, 0x1b, 0x00, 0xca, 0x71, 0x15, 0xc1,
+ 0xb8, 0x26, 0x08, 0xc1, 0xb8, 0x32, 0x16, 0xc1, 0xb8, 0x3e, 0xc3, 0x05,
+ 0x17, 0x00, 0xca, 0x39, 0xc4, 0x16, 0x57, 0x00, 0xca, 0x30, 0x44, 0x02,
+ 0xcc, 0xc1, 0xb8, 0x4a, 0x4c, 0x27, 0x72, 0xc1, 0xb8, 0x62, 0x50, 0x5d,
+ 0xcf, 0x41, 0xb8, 0x90, 0x47, 0x46, 0xeb, 0xc1, 0xb8, 0xa2, 0xd0, 0x5f,
+ 0x0f, 0x00, 0xc8, 0x00, 0x16, 0xc1, 0xb8, 0xbf, 0x09, 0xc1, 0xb8, 0xcf,
+ 0xc2, 0x01, 0x0e, 0x00, 0xc8, 0xe1, 0x15, 0xc1, 0xb8, 0xdf, 0xc2, 0x00,
+ 0x4c, 0x00, 0xc8, 0xc1, 0xc2, 0x00, 0x96, 0x00, 0xc8, 0xb9, 0xc2, 0x00,
+ 0x9a, 0x00, 0xc8, 0xb1, 0xc2, 0x1a, 0x36, 0x00, 0xc8, 0xab, 0x01, 0xb8,
+ 0xef, 0xc2, 0x00, 0x3f, 0x00, 0xc8, 0xa1, 0x04, 0xc1, 0xb8, 0xf3, 0x12,
+ 0xc1, 0xb8, 0xfd, 0x10, 0xc1, 0xb9, 0x07, 0x06, 0xc1, 0xb9, 0x11, 0x0c,
+ 0xc1, 0xb9, 0x1b, 0x05, 0xc1, 0xb9, 0x25, 0x0d, 0x41, 0xb9, 0x2f, 0x90,
+ 0x08, 0x49, 0xc0, 0x9b, 0x08, 0x49, 0xb8, 0x90, 0x08, 0x49, 0xb0, 0x90,
+ 0x08, 0x49, 0xa8, 0x96, 0x08, 0x49, 0xa0, 0x95, 0x08, 0x49, 0x70, 0x04,
+ 0xc1, 0xb9, 0x39, 0x44, 0x01, 0xed, 0xc1, 0xb9, 0x45, 0x46, 0x82, 0x05,
+ 0xc1, 0xb9, 0x51, 0xc9, 0x32, 0x5f, 0x01, 0x3e, 0xc9, 0xc7, 0xc7, 0x52,
+ 0x01, 0x3e, 0xc1, 0xc6, 0x03, 0xfa, 0x01, 0x2f, 0x79, 0x11, 0xc1, 0xb9,
+ 0x5d, 0x16, 0xc1, 0xb9, 0x69, 0xd6, 0x2c, 0x11, 0x01, 0x50, 0xf1, 0x47,
+ 0xc5, 0xfb, 0xc1, 0xb9, 0x75, 0x47, 0xcf, 0x4e, 0x41, 0xb9, 0x81, 0xcc,
+ 0x25, 0x70, 0x01, 0x55, 0x68, 0x0e, 0xc1, 0xb9, 0x8d, 0x4f, 0x01, 0xf7,
+ 0x41, 0xb9, 0x99, 0x96, 0x01, 0x04, 0xe1, 0x95, 0x01, 0x04, 0xdb, 0x01,
+ 0xb9, 0xa5, 0x92, 0x01, 0x04, 0xd1, 0x90, 0x01, 0x04, 0xc9, 0x8f, 0x01,
+ 0x04, 0xc1, 0x8e, 0x01, 0x04, 0xb9, 0x8d, 0x01, 0x04, 0xb1, 0x8a, 0x01,
+ 0x04, 0xa9, 0x9a, 0x01, 0x04, 0x99, 0x91, 0x01, 0x04, 0x91, 0x87, 0x01,
+ 0x04, 0x89, 0x83, 0x01, 0x04, 0x81, 0x98, 0x00, 0xeb, 0x29, 0x97, 0x00,
+ 0xeb, 0x21, 0x94, 0x00, 0xeb, 0x19, 0x8b, 0x00, 0xeb, 0x11, 0x8c, 0x01,
+ 0x63, 0xe0, 0x4d, 0x32, 0x07, 0xc1, 0xb9, 0xab, 0xca, 0x9f, 0xfc, 0x00,
+ 0x14, 0xbb, 0x01, 0xba, 0x2a, 0xce, 0x6d, 0x22, 0x05, 0x3c, 0x78, 0x46,
+ 0x01, 0xab, 0x41, 0xba, 0x30, 0xcd, 0x80, 0x96, 0x00, 0x0e, 0x1b, 0x01,
+ 0xba, 0x3c, 0x47, 0x11, 0xaf, 0x41, 0xba, 0x42, 0xc2, 0x01, 0x5b, 0x00,
+ 0xe9, 0x29, 0xcd, 0x7c, 0x79, 0x00, 0x0e, 0x10, 0xcc, 0x26, 0x18, 0x00,
+ 0x15, 0x08, 0x47, 0x7f, 0x1e, 0xc1, 0xba, 0x4e, 0xd1, 0x54, 0x99, 0x00,
+ 0x15, 0x68, 0x46, 0x02, 0x00, 0xc1, 0xba, 0x5a, 0x48, 0x1b, 0x0d, 0x41,
+ 0xbb, 0x10, 0x88, 0x05, 0x3f, 0xd9, 0x92, 0x05, 0x3f, 0xe0, 0xc9, 0x53,
+ 0x02, 0x05, 0x3f, 0xe9, 0xc6, 0xd3, 0x5e, 0x05, 0x3f, 0xf0, 0x91, 0x00,
+ 0x74, 0x09, 0x0a, 0x41, 0xbb, 0x1c, 0x44, 0x66, 0xe0, 0xc1, 0xbb, 0x28,
+ 0x91, 0x00, 0x74, 0xd9, 0x43, 0x69, 0x91, 0x41, 0xbb, 0x54, 0xc2, 0x13,
+ 0xf3, 0x00, 0x74, 0x39, 0xc2, 0x47, 0x43, 0x00, 0x74, 0x69, 0x91, 0x00,
+ 0x74, 0xc8, 0x42, 0x01, 0x8a, 0xc1, 0xbb, 0x60, 0x49, 0xad, 0x03, 0x41,
+ 0xbb, 0x6c, 0x91, 0x00, 0x74, 0xa9, 0x43, 0x69, 0x91, 0x41, 0xbb, 0x78,
+ 0x08, 0xc1, 0xbb, 0x84, 0xc3, 0x2b, 0x94, 0x00, 0x74, 0xe9, 0xc4, 0xab,
+ 0x64, 0x00, 0x74, 0xf8, 0x42, 0x00, 0xcc, 0x41, 0xbb, 0x90, 0xc4, 0xab,
+ 0x64, 0x00, 0x75, 0x59, 0xc3, 0x2b, 0x94, 0x00, 0x75, 0x70, 0x83, 0x00,
+ 0x75, 0x91, 0x8f, 0x00, 0x75, 0x99, 0x9b, 0x00, 0x76, 0x19, 0x8b, 0x00,
+ 0x76, 0x20, 0xc2, 0x00, 0x3a, 0x00, 0x75, 0x89, 0xc2, 0x00, 0xe5, 0x00,
+ 0x75, 0xd8, 0x8b, 0x00, 0x75, 0xa8, 0x9b, 0x00, 0x75, 0xb8, 0x97, 0x00,
+ 0x75, 0xc8, 0x8b, 0x00, 0x76, 0x08, 0xc2, 0x00, 0x36, 0x00, 0x75, 0xe1,
+ 0xc3, 0x48, 0x2a, 0x00, 0x75, 0xe8, 0xc2, 0x03, 0x76, 0x00, 0x76, 0x49,
+ 0x8b, 0x00, 0x76, 0x50, 0xc2, 0x0a, 0x20, 0x00, 0x76, 0x91, 0xc4, 0x05,
+ 0xde, 0x00, 0x76, 0x98, 0xc3, 0x08, 0xde, 0x00, 0x76, 0xa1, 0xc3, 0x0d,
+ 0x8f, 0x00, 0x76, 0xa8, 0xc2, 0x22, 0x45, 0x00, 0x76, 0xb1, 0xc4, 0x15,
+ 0xa7, 0x00, 0x76, 0xb8, 0x45, 0x00, 0x53, 0xc1, 0xbb, 0x9c, 0xd1, 0x4c,
+ 0xab, 0x0f, 0xdc, 0xc8, 0x46, 0x05, 0x07, 0xc1, 0xbb, 0xa8, 0x5b, 0x17,
+ 0xec, 0x41, 0xbb, 0xba, 0xc6, 0x01, 0xe9, 0x01, 0x3a, 0x91, 0xc6, 0x03,
+ 0xfa, 0x0f, 0xa9, 0xf8, 0xe0, 0x00, 0xc7, 0x01, 0x1d, 0x88, 0x45, 0x00,
+ 0x53, 0xc1, 0xbb, 0xc6, 0xd2, 0x40, 0x6a, 0x0f, 0xdc, 0xc0, 0x5b, 0x17,
+ 0x80, 0xc1, 0xbb, 0xd2, 0x46, 0x00, 0xc7, 0x41, 0xbb, 0xde, 0xe0, 0x06,
+ 0xa7, 0x01, 0x1d, 0x80, 0x45, 0x00, 0xc8, 0xc1, 0xbb, 0xf0, 0x4d, 0x3b,
+ 0xeb, 0x41, 0xbb, 0xfc, 0xe0, 0x08, 0x07, 0x0f, 0xdb, 0x40, 0x0f, 0xc1,
+ 0xbc, 0x02, 0xcc, 0x0c, 0x86, 0x01, 0x2e, 0xd0, 0x44, 0x00, 0x39, 0x41,
+ 0xbc, 0x08, 0xcd, 0x43, 0xa0, 0x0f, 0xdc, 0x19, 0xce, 0x08, 0x19, 0x0f,
+ 0xdc, 0x28, 0x00, 0x41, 0xbc, 0x0e, 0xcc, 0x88, 0xa4, 0x01, 0x0f, 0x78,
+ 0x44, 0x00, 0x55, 0xc1, 0xbc, 0x26, 0xc9, 0x62, 0x22, 0x01, 0x48, 0x50,
+ 0xce, 0x74, 0x3e, 0x01, 0x0c, 0xf9, 0x4f, 0x6b, 0xf4, 0x41, 0xbc, 0x32,
+ 0x00, 0x41, 0xbc, 0x3e, 0x44, 0x00, 0xcd, 0xc1, 0xbc, 0x5c, 0x45, 0x00,
+ 0x47, 0x41, 0xbc, 0x66, 0xd0, 0x5c, 0xef, 0x0f, 0xc2, 0x09, 0xc5, 0x00,
+ 0x62, 0x0f, 0xc2, 0x28, 0x00, 0x41, 0xbc, 0x70, 0xca, 0xa2, 0x0e, 0x01,
+ 0x0d, 0x40, 0xcc, 0x86, 0xd0, 0x01, 0x4a, 0x89, 0xcd, 0x7e, 0x8e, 0x01,
+ 0x4a, 0x68, 0xcd, 0x7e, 0x8e, 0x01, 0x4a, 0x79, 0xcc, 0x86, 0xd0, 0x01,
+ 0x4a, 0x60, 0xdc, 0x14, 0x16, 0x01, 0x52, 0x51, 0x46, 0x03, 0x50, 0xc1,
+ 0xbc, 0x7c, 0x45, 0x01, 0xac, 0x41, 0xbc, 0x88, 0xc3, 0x7a, 0xe1, 0x08,
+ 0x1c, 0x91, 0xc2, 0x00, 0x06, 0x08, 0x1c, 0xa8, 0xce, 0x62, 0xe0, 0x0f,
+ 0xdc, 0xb9, 0xde, 0x0f, 0x5d, 0x01, 0x3b, 0x18, 0x45, 0x01, 0x18, 0xc1,
+ 0xbc, 0x9a, 0x50, 0x0f, 0x63, 0xc1, 0xbc, 0xac, 0xca, 0x0e, 0xbd, 0x0f,
+ 0xbf, 0x80, 0x45, 0x01, 0x32, 0xc1, 0xbc, 0xb8, 0xdc, 0x13, 0xc2, 0x01,
+ 0x3d, 0xe9, 0xdb, 0x19, 0x4b, 0x01, 0x3c, 0xa0, 0x03, 0xc1, 0xbc, 0xca,
+ 0x45, 0x01, 0xc3, 0xc1, 0xbc, 0xd6, 0x0b, 0xc1, 0xbc, 0xe2, 0xc6, 0xb4,
+ 0x43, 0x01, 0x3a, 0x41, 0xda, 0x1b, 0x06, 0x0f, 0xb3, 0x88, 0x45, 0x20,
+ 0x8c, 0xc1, 0xbc, 0xee, 0x4e, 0x4a, 0x10, 0x41, 0xbc, 0xfa, 0x03, 0xc1,
+ 0xbd, 0x06, 0x42, 0x00, 0x37, 0xc1, 0xbd, 0x12, 0x42, 0x00, 0x07, 0xc1,
+ 0xbd, 0x1c, 0xd8, 0x24, 0x5c, 0x0f, 0xb3, 0x98, 0x49, 0x24, 0xed, 0xc1,
+ 0xbd, 0x28, 0xdf, 0x04, 0xe8, 0x01, 0x3c, 0xf1, 0x4e, 0x25, 0x4c, 0x41,
+ 0xbd, 0x34, 0x43, 0x04, 0x92, 0xc1, 0xbd, 0x40, 0xc7, 0xc8, 0x86, 0x01,
+ 0x38, 0xc0, 0x4b, 0x78, 0x10, 0xc1, 0xbd, 0x4c, 0x51, 0x03, 0x89, 0x41,
+ 0xbd, 0x52, 0x45, 0x3f, 0x6e, 0xc1, 0xbd, 0x5e, 0x42, 0x00, 0x5b, 0xc1,
+ 0xbd, 0x64, 0xc5, 0x03, 0xfb, 0x01, 0x5a, 0xc2, 0x01, 0xbd, 0x70, 0x46,
+ 0x83, 0x05, 0xc1, 0xbd, 0x7c, 0xcc, 0x2a, 0xc9, 0x01, 0x3c, 0xb9, 0x11,
+ 0x41, 0xbd, 0x82, 0xdc, 0x13, 0x6e, 0x01, 0x3c, 0xe1, 0x43, 0x00, 0x48,
+ 0x41, 0xbd, 0x94, 0xc9, 0x64, 0xce, 0x01, 0x3c, 0xb1, 0xcf, 0x63, 0xb1,
+ 0x01, 0x38, 0xb0, 0xc7, 0x0a, 0x60, 0x01, 0x39, 0x89, 0xd1, 0x32, 0x18,
+ 0x0f, 0xb3, 0xa1, 0x51, 0x47, 0xad, 0x41, 0xbd, 0xa3, 0xd2, 0x4e, 0x48,
+ 0x01, 0x39, 0x71, 0xd0, 0x5b, 0x4f, 0x01, 0x38, 0xe1, 0xd4, 0x38, 0x7a,
+ 0x01, 0x5a, 0xb0, 0xdb, 0x18, 0x22, 0x01, 0x39, 0x21, 0x44, 0x0d, 0x8f,
+ 0x41, 0xbd, 0xb2, 0xd1, 0x4f, 0xaf, 0x01, 0x37, 0xe0, 0xc6, 0x7b, 0x89,
+ 0x0f, 0xa4, 0xe9, 0xc5, 0x00, 0x62, 0x0f, 0xa4, 0xc1, 0xcf, 0x64, 0x47,
+ 0x0f, 0x9c, 0xa0, 0x9e, 0x0d, 0x85, 0x41, 0x9d, 0x0d, 0x85, 0x38, 0x9e,
+ 0x0d, 0x81, 0x09, 0x9d, 0x0d, 0x81, 0x00, 0xca, 0xa6, 0x00, 0x07, 0xda,
+ 0x79, 0x48, 0xc2, 0x2b, 0x41, 0xbd, 0xbe, 0xc2, 0x03, 0x07, 0x00, 0x2f,
+ 0x23, 0x01, 0xbd, 0xd0, 0xc3, 0xbd, 0xa8, 0x00, 0x2e, 0xdb, 0x01, 0xbd,
+ 0xd6, 0xc3, 0x03, 0x28, 0x00, 0x2e, 0x8b, 0x01, 0xbd, 0xdc, 0xc3, 0x06,
+ 0x8c, 0x00, 0x2e, 0xab, 0x01, 0xbd, 0xe2, 0x16, 0xc1, 0xbd, 0xe8, 0x15,
+ 0xc1, 0xbe, 0x03, 0xc4, 0x5d, 0xef, 0x00, 0x2f, 0x43, 0x01, 0xbe, 0x15,
+ 0xc3, 0xec, 0xd8, 0x00, 0x2f, 0x3b, 0x01, 0xbe, 0x1b, 0x46, 0x2a, 0xb4,
+ 0xc1, 0xbe, 0x21, 0xc3, 0x21, 0x00, 0x00, 0x2f, 0x03, 0x01, 0xbe, 0x45,
+ 0xc3, 0x04, 0xae, 0x00, 0x2e, 0xf3, 0x01, 0xbe, 0x4b, 0xc5, 0xa6, 0x05,
+ 0x00, 0x2e, 0xe3, 0x01, 0xbe, 0x51, 0xc3, 0x4d, 0x48, 0x00, 0x2e, 0xcb,
+ 0x01, 0xbe, 0x57, 0xc5, 0x4d, 0x42, 0x00, 0x2e, 0xb3, 0x01, 0xbe, 0x5d,
+ 0xc2, 0x00, 0x5b, 0x00, 0x2e, 0xa3, 0x01, 0xbe, 0x63, 0xc5, 0x43, 0x0f,
+ 0x00, 0x2e, 0x9b, 0x01, 0xbe, 0x6d, 0xc5, 0x9e, 0xbc, 0x00, 0x2e, 0x93,
+ 0x01, 0xbe, 0x73, 0x03, 0xc1, 0xbe, 0x79, 0x45, 0xe0, 0xdf, 0x41, 0xbe,
+ 0x83, 0xd4, 0x3e, 0xa6, 0x07, 0xd8, 0xf1, 0x13, 0xc1, 0xbe, 0xb3, 0x15,
+ 0xc1, 0xbe, 0xc2, 0xc4, 0xe9, 0xdf, 0x00, 0x2d, 0xf9, 0xc5, 0xe0, 0xe9,
+ 0x00, 0x2d, 0xe9, 0xcf, 0x65, 0x82, 0x00, 0x2d, 0xe1, 0x0a, 0xc1, 0xbe,
+ 0xd2, 0xc5, 0x7d, 0x10, 0x00, 0x2d, 0xb9, 0xc5, 0xde, 0x3c, 0x00, 0x2d,
+ 0xa8, 0x43, 0x02, 0xfb, 0xc1, 0xbe, 0xe7, 0xcb, 0x96, 0x61, 0x00, 0x2e,
+ 0x31, 0xc9, 0xae, 0x3e, 0x00, 0x2e, 0x19, 0xc5, 0xda, 0x9a, 0x00, 0x2e,
+ 0x01, 0xc5, 0xe3, 0x41, 0x00, 0x2d, 0xf0, 0xc4, 0xa2, 0x66, 0x00, 0x2d,
+ 0x71, 0x03, 0x41, 0xbe, 0xf3, 0xc3, 0x3b, 0x5d, 0x00, 0x2d, 0x69, 0xc4,
+ 0x45, 0xd0, 0x00, 0x2d, 0x38, 0xcc, 0x89, 0x58, 0x00, 0x2d, 0x51, 0xc3,
+ 0x16, 0x1d, 0x00, 0x2c, 0xd0, 0x07, 0xc1, 0xbe, 0xff, 0xc5, 0xdc, 0x48,
+ 0x00, 0x2c, 0xb0, 0xc3, 0x5f, 0x4e, 0x00, 0x2d, 0x41, 0xc9, 0xb0, 0xcf,
+ 0x00, 0x2c, 0xf8, 0xc3, 0x16, 0x57, 0x00, 0x2d, 0x09, 0xc4, 0xe5, 0x13,
+ 0x00, 0x2c, 0xc8, 0xc9, 0xac, 0xdf, 0x00, 0x2c, 0x99, 0xc4, 0xc9, 0x57,
+ 0x00, 0x2c, 0x90, 0xc3, 0x30, 0xe0, 0x00, 0x2c, 0xe3, 0x01, 0xbf, 0x0b,
+ 0xc6, 0xcf, 0xf8, 0x00, 0x2c, 0xf0, 0xc4, 0xe4, 0x83, 0x00, 0x2d, 0x19,
+ 0xc7, 0xc9, 0x20, 0x00, 0x2d, 0x21, 0xc5, 0xe3, 0x6e, 0x00, 0x2d, 0x2a,
+ 0x01, 0xbf, 0x11, 0x05, 0xc1, 0xbf, 0x17, 0xcf, 0x67, 0xda, 0x02, 0x6e,
+ 0x09, 0x03, 0xc1, 0xbf, 0x29, 0xc6, 0xd3, 0xb8, 0x02, 0x6f, 0x21, 0x19,
+ 0xc1, 0xbf, 0x33, 0xd6, 0x31, 0x0d, 0x02, 0x6f, 0x99, 0xcf, 0x67, 0x26,
+ 0x02, 0x6f, 0xa9, 0xcb, 0x91, 0xbd, 0x02, 0x6f, 0xc1, 0xcb, 0x94, 0xca,
+ 0x02, 0x6f, 0xc8, 0xd9, 0x1d, 0x8f, 0x02, 0x6e, 0x11, 0xc8, 0xbc, 0x63,
+ 0x02, 0x6f, 0xd0, 0xc9, 0xb4, 0x1d, 0x02, 0x6f, 0x39, 0xc6, 0xd5, 0x1a,
+ 0x02, 0x6f, 0x41, 0xc9, 0xb2, 0x49, 0x02, 0x6f, 0xa0, 0xc5, 0xde, 0x6e,
+ 0x02, 0x6e, 0x29, 0xca, 0xa1, 0xf0, 0x02, 0x6e, 0x98, 0xc6, 0xd3, 0x3a,
+ 0x02, 0x6e, 0x41, 0xcd, 0x7f, 0x51, 0x02, 0x6f, 0xe8, 0x44, 0x3c, 0x94,
+ 0xc1, 0xbf, 0x3f, 0xc3, 0x01, 0xa8, 0x02, 0x6e, 0xa8, 0xc3, 0x02, 0x9f,
+ 0x02, 0x6e, 0xb9, 0xc4, 0x06, 0xfc, 0x02, 0x6f, 0x00, 0xc6, 0xd2, 0x14,
+ 0x02, 0x6e, 0xc1, 0xc8, 0xbb, 0x3b, 0x02, 0x6f, 0xe0, 0xc7, 0x14, 0x41,
+ 0x02, 0x6f, 0x29, 0xc7, 0x50, 0x6b, 0x02, 0x6f, 0x70, 0xc3, 0xec, 0x72,
+ 0x0f, 0xdb, 0xc1, 0xc3, 0xec, 0x6c, 0x0f, 0xdb, 0xb1, 0xc3, 0xec, 0x6f,
+ 0x0f, 0xdb, 0xb9, 0xc3, 0xdd, 0x5b, 0x0f, 0xdb, 0xc9, 0xc3, 0xd8, 0x15,
+ 0x0f, 0xdb, 0xd1, 0xc3, 0xec, 0x75, 0x0f, 0xdb, 0xd9, 0xc6, 0xd8, 0xbc,
+ 0x0f, 0xdc, 0x08, 0x45, 0x04, 0x92, 0xc1, 0xbf, 0x49, 0xc2, 0x00, 0x4d,
+ 0x01, 0x00, 0xa8, 0xc3, 0xea, 0xda, 0x01, 0x1d, 0xe9, 0xc3, 0x83, 0xe8,
+ 0x01, 0x1d, 0xe1, 0xc3, 0x82, 0xec, 0x01, 0x1d, 0xd9, 0xc3, 0x83, 0x28,
+ 0x01, 0x1d, 0xd0, 0x42, 0x00, 0x03, 0xc1, 0xbf, 0x55, 0xcc, 0x89, 0x64,
+ 0x0f, 0xb5, 0x28, 0xc6, 0xd6, 0x5e, 0x0f, 0x9e, 0x39, 0xc4, 0x01, 0xa7,
+ 0x0f, 0xa1, 0xa0, 0xcb, 0x99, 0xf2, 0x0f, 0x9f, 0x09, 0xc8, 0x36, 0xb9,
+ 0x0f, 0x9f, 0x02, 0x01, 0xbf, 0x64, 0xc4, 0xd4, 0x5a, 0x01, 0x34, 0x91,
+ 0xc6, 0xd7, 0x5a, 0x01, 0x31, 0x69, 0xc6, 0xd2, 0x62, 0x0f, 0xb7, 0x00,
+ 0xc2, 0x00, 0x30, 0x0f, 0xc9, 0xf1, 0x89, 0x0f, 0xa2, 0xe0, 0xda, 0x1b,
+ 0x20, 0x0f, 0xc8, 0xf1, 0xd8, 0x22, 0x7c, 0x0f, 0xd7, 0x80, 0xc4, 0x24,
+ 0x35, 0x08, 0x69, 0xc9, 0xc5, 0x05, 0x1b, 0x08, 0x69, 0xc1, 0x15, 0xc1,
+ 0xbf, 0x68, 0x08, 0xc1, 0xbf, 0x74, 0x16, 0xc1, 0xbf, 0x80, 0xc3, 0x05,
+ 0x17, 0x08, 0x69, 0x89, 0xc4, 0x16, 0x57, 0x08, 0x69, 0x80, 0x42, 0x00,
+ 0x44, 0xc1, 0xbf, 0x8c, 0xc8, 0xc3, 0x73, 0x08, 0x69, 0x20, 0xc9, 0xb2,
+ 0xeb, 0x08, 0x69, 0x19, 0xc5, 0xe2, 0x6a, 0x08, 0x69, 0x10, 0x91, 0x08,
+ 0x69, 0x09, 0x87, 0x08, 0x69, 0x01, 0x97, 0x08, 0x68, 0xf9, 0x8b, 0x08,
+ 0x68, 0xf1, 0x83, 0x08, 0x68, 0xe8, 0xc2, 0x06, 0x8c, 0x08, 0x68, 0xe1,
+ 0x10, 0xc1, 0xbf, 0x9e, 0x0d, 0xc1, 0xbf, 0xae, 0xc2, 0x1a, 0x36, 0x08,
+ 0x68, 0xc1, 0xc2, 0x00, 0x4c, 0x08, 0x68, 0xb1, 0xc2, 0x00, 0x3f, 0x08,
+ 0x68, 0xa1, 0xc2, 0x00, 0x96, 0x08, 0x68, 0x99, 0xc2, 0x07, 0x69, 0x08,
+ 0x68, 0x91, 0x14, 0xc1, 0xbf, 0xbe, 0x06, 0xc1, 0xbf, 0xc8, 0xc2, 0x01,
+ 0xa7, 0x08, 0x68, 0x49, 0xc2, 0x01, 0x0e, 0x08, 0x68, 0x39, 0xc2, 0x00,
+ 0x2e, 0x08, 0x68, 0x31, 0xc2, 0x26, 0x94, 0x08, 0x68, 0x29, 0x16, 0xc1,
+ 0xbf, 0xd2, 0x83, 0x08, 0x68, 0x01, 0xc2, 0x02, 0x1d, 0x08, 0x68, 0x09,
+ 0xc2, 0x07, 0x44, 0x08, 0x68, 0x11, 0xc2, 0x05, 0x5c, 0x08, 0x68, 0x71,
+ 0x15, 0x41, 0xbf, 0xdc, 0x97, 0x00, 0xb9, 0x99, 0x8b, 0x00, 0xb9, 0x90,
+ 0xc2, 0x01, 0x0e, 0x00, 0xb9, 0x89, 0xc2, 0x0e, 0xe5, 0x00, 0xb9, 0x81,
+ 0xc2, 0x00, 0x4c, 0x00, 0xb9, 0x79, 0xc2, 0x00, 0x96, 0x00, 0xb9, 0x71,
+ 0xc2, 0x00, 0x9a, 0x00, 0xb9, 0x69, 0xc2, 0x1a, 0x36, 0x00, 0xb9, 0x61,
+ 0xc2, 0x00, 0x3f, 0x00, 0xb9, 0x59, 0xc2, 0x02, 0x1d, 0x00, 0xb9, 0x51,
+ 0xc2, 0x07, 0x44, 0x00, 0xb9, 0x49, 0x10, 0xc1, 0xbf, 0xe6, 0xc2, 0x0c,
+ 0x25, 0x00, 0xb9, 0x39, 0xc2, 0x00, 0x44, 0x00, 0xb9, 0x31, 0xc2, 0x07,
+ 0x69, 0x00, 0xb9, 0x21, 0xc2, 0x06, 0x6b, 0x00, 0xb9, 0x19, 0x97, 0x00,
+ 0xb9, 0x11, 0x8b, 0x00, 0xb9, 0x09, 0x83, 0x00, 0xb9, 0x00, 0x49, 0xad,
+ 0x42, 0xc1, 0xbf, 0xf0, 0x0c, 0xc1, 0xc0, 0x2e, 0xd4, 0x3c, 0x3a, 0x01,
+ 0x81, 0x71, 0xd4, 0x3e, 0x7e, 0x01, 0x81, 0x79, 0x47, 0x01, 0xff, 0xc1,
+ 0xc0, 0x3a, 0xc6, 0x94, 0xb9, 0x01, 0x8b, 0x20, 0xc3, 0x05, 0x17, 0x01,
+ 0x81, 0x09, 0x16, 0xc1, 0xc0, 0x8e, 0x08, 0xc1, 0xc0, 0x9c, 0x15, 0xc1,
+ 0xc0, 0xa8, 0xc5, 0x05, 0x1b, 0x01, 0x81, 0x41, 0xc4, 0x24, 0x35, 0x01,
+ 0x81, 0x48, 0xc3, 0x05, 0x17, 0x08, 0x47, 0xdb, 0x01, 0xc0, 0xb4, 0x16,
+ 0xc1, 0xc0, 0xba, 0xc4, 0x0d, 0x8e, 0x08, 0x47, 0xe0, 0x16, 0xc1, 0xc0,
+ 0xc6, 0x15, 0xc1, 0xc0, 0xd2, 0xc4, 0xbc, 0xb7, 0x08, 0x47, 0x91, 0xc2,
+ 0x03, 0x07, 0x08, 0x47, 0x81, 0x03, 0xc1, 0xc0, 0xdc, 0xc3, 0x21, 0x00,
+ 0x08, 0x47, 0x69, 0xc3, 0x04, 0xae, 0x08, 0x47, 0x61, 0xc6, 0xd7, 0x12,
+ 0x08, 0x47, 0x59, 0xc4, 0xe5, 0x53, 0x08, 0x47, 0x51, 0xc4, 0x4d, 0x48,
+ 0x08, 0x47, 0x49, 0xc2, 0x00, 0x5b, 0x08, 0x47, 0x23, 0x01, 0xc0, 0xe8,
+ 0xc4, 0xe4, 0xa7, 0x08, 0x47, 0x31, 0xc3, 0x7c, 0xad, 0x08, 0x47, 0x29,
+ 0xcb, 0x9c, 0x39, 0x08, 0x47, 0x19, 0xc5, 0x9e, 0xbc, 0x08, 0x47, 0x11,
+ 0xc4, 0xe5, 0xaf, 0x08, 0x47, 0x08, 0xca, 0x39, 0x9c, 0x07, 0xfb, 0x29,
+ 0x47, 0x01, 0xff, 0xc1, 0xc0, 0xee, 0xd1, 0x2f, 0x5a, 0x07, 0xfc, 0xf1,
+ 0xd6, 0x2f, 0x81, 0x07, 0xfc, 0xf8, 0x0d, 0xc1, 0xc1, 0x29, 0x15, 0xc1,
+ 0xc1, 0x35, 0x44, 0xc3, 0xd4, 0xc1, 0xc1, 0x41, 0xc5, 0xc4, 0x7c, 0x07,
+ 0xfd, 0x89, 0x12, 0xc1, 0xc1, 0x69, 0x8b, 0x07, 0xfe, 0xe3, 0x01, 0xc1,
+ 0x75, 0x05, 0xc1, 0xc1, 0x7b, 0x16, 0xc1, 0xc1, 0x87, 0xc5, 0x7b, 0x22,
+ 0x07, 0xfd, 0xf1, 0x83, 0x07, 0xfe, 0x13, 0x01, 0xc1, 0x93, 0x1b, 0xc1,
+ 0xc1, 0x97, 0x87, 0x07, 0xfe, 0x3b, 0x01, 0xc1, 0xb1, 0x91, 0x07, 0xfe,
+ 0x63, 0x01, 0xc1, 0xb9, 0x19, 0xc1, 0xc1, 0xbd, 0x97, 0x07, 0xfe, 0x99,
+ 0x44, 0xc4, 0x44, 0x41, 0xc1, 0xcf, 0x45, 0x00, 0x3f, 0xc1, 0xc1, 0xdf,
+ 0x83, 0x01, 0x82, 0xa9, 0x8b, 0x01, 0x82, 0xb9, 0x97, 0x01, 0x82, 0xc9,
+ 0x87, 0x01, 0x82, 0xd9, 0x91, 0x01, 0x82, 0xe8, 0x47, 0x7a, 0xe7, 0x41,
+ 0xc2, 0x05, 0x83, 0x01, 0x82, 0x59, 0x8b, 0x01, 0x82, 0x69, 0x97, 0x01,
+ 0x82, 0x79, 0x87, 0x01, 0x82, 0x89, 0x91, 0x01, 0x82, 0x98, 0x83, 0x01,
+ 0x82, 0x61, 0x8b, 0x01, 0x82, 0x71, 0x97, 0x01, 0x82, 0x81, 0x87, 0x01,
+ 0x82, 0x91, 0x91, 0x01, 0x82, 0xa0, 0x83, 0x01, 0x82, 0xb1, 0x8b, 0x01,
+ 0x82, 0xc1, 0x97, 0x01, 0x82, 0xd1, 0x87, 0x01, 0x82, 0xe1, 0x91, 0x01,
+ 0x82, 0xf0, 0x83, 0x01, 0x82, 0xf9, 0x8b, 0x01, 0x83, 0x09, 0x97, 0x01,
+ 0x83, 0x21, 0x87, 0x01, 0x83, 0x31, 0x91, 0x01, 0x83, 0x40, 0x83, 0x01,
+ 0x83, 0x01, 0x8b, 0x01, 0x83, 0x11, 0x97, 0x01, 0x83, 0x29, 0x87, 0x01,
+ 0x83, 0x39, 0x91, 0x01, 0x83, 0x48, 0x83, 0x01, 0x83, 0x51, 0x8b, 0x01,
+ 0x83, 0x59, 0x97, 0x01, 0x83, 0x61, 0x87, 0x01, 0x83, 0x69, 0x91, 0x01,
+ 0x83, 0x70, 0x83, 0x01, 0x83, 0x79, 0x8b, 0x01, 0x83, 0x91, 0x97, 0x01,
+ 0x83, 0xa9, 0x87, 0x01, 0x83, 0xc1, 0x91, 0x01, 0x83, 0xd8, 0x83, 0x01,
+ 0x83, 0x81, 0x8b, 0x01, 0x83, 0x99, 0x97, 0x01, 0x83, 0xb1, 0x87, 0x01,
+ 0x83, 0xc9, 0x91, 0x01, 0x83, 0xe0, 0x83, 0x01, 0x83, 0x89, 0x8b, 0x01,
+ 0x83, 0xa1, 0x97, 0x01, 0x83, 0xb9, 0x87, 0x01, 0x83, 0xd1, 0x91, 0x01,
+ 0x83, 0xe8, 0x83, 0x01, 0x83, 0xf1, 0x8b, 0x01, 0x83, 0xf9, 0x97, 0x01,
+ 0x84, 0x01, 0x87, 0x01, 0x84, 0x09, 0x91, 0x01, 0x84, 0x10, 0x83, 0x01,
+ 0x84, 0x21, 0x97, 0x01, 0x84, 0x31, 0x91, 0x01, 0x84, 0x40, 0x83, 0x01,
+ 0x84, 0x49, 0x8b, 0x01, 0x84, 0x51, 0x97, 0x01, 0x84, 0x59, 0x87, 0x01,
+ 0x84, 0x61, 0x91, 0x01, 0x84, 0x68, 0x83, 0x01, 0x84, 0x79, 0x8b, 0x01,
+ 0x84, 0x81, 0x87, 0x01, 0x84, 0x89, 0x91, 0x01, 0x84, 0x90, 0x00, 0xc1,
+ 0xc2, 0x11, 0xcb, 0x97, 0xab, 0x01, 0x01, 0x38, 0x45, 0x66, 0xf4, 0xc1,
+ 0xc2, 0x1d, 0xcc, 0x0c, 0x86, 0x01, 0x2e, 0xc9, 0xc6, 0x19, 0x7a, 0x01,
+ 0x2e, 0xc1, 0xcc, 0x04, 0x1b, 0x0f, 0xdc, 0x81, 0x42, 0x00, 0x68, 0x41,
+ 0xc2, 0x29, 0xc9, 0x11, 0x81, 0x01, 0x37, 0x39, 0x0e, 0xc1, 0xc2, 0x2f,
+ 0xc8, 0xc1, 0xa3, 0x01, 0x09, 0x39, 0xc8, 0xbd, 0x53, 0x01, 0x02, 0xa1,
+ 0xd0, 0x0f, 0x62, 0x00, 0x05, 0x09, 0xcd, 0x2d, 0xa6, 0x00, 0x05, 0xf9,
+ 0xcb, 0x11, 0x1f, 0x01, 0x70, 0xc0, 0xda, 0x19, 0xb4, 0x01, 0x35, 0x21,
+ 0x51, 0x54, 0xbb, 0x41, 0xc2, 0x3e, 0x00, 0x41, 0xc2, 0x50, 0xc9, 0x50,
+ 0xc7, 0x01, 0x1d, 0x71, 0x45, 0x01, 0xac, 0xc1, 0xc2, 0x62, 0x03, 0x41,
+ 0xc2, 0x86, 0x47, 0x37, 0x49, 0xc1, 0xc2, 0x92, 0x47, 0x01, 0xff, 0x41,
+ 0xc2, 0xa5, 0x47, 0x37, 0x49, 0xc1, 0xc2, 0xfe, 0x47, 0x01, 0xff, 0x41,
+ 0xc3, 0x11, 0xc5, 0x56, 0xd6, 0x01, 0x09, 0xc9, 0x49, 0x1b, 0xad, 0x41,
+ 0xc3, 0x74, 0xd1, 0x2e, 0x26, 0x0f, 0xae, 0xd1, 0xc4, 0x07, 0xeb, 0x01,
+ 0x4f, 0x08, 0xd3, 0x42, 0x0b, 0x0f, 0x65, 0xa1, 0x47, 0x37, 0x49, 0xc1,
+ 0xc3, 0x84, 0xca, 0xa2, 0xd6, 0x0f, 0x65, 0x81, 0x49, 0x54, 0xdd, 0xc1,
+ 0xc3, 0xc9, 0xcb, 0x5a, 0x8f, 0x0f, 0x65, 0x61, 0xc9, 0x42, 0x15, 0x0f,
+ 0x65, 0x00, 0xd5, 0x35, 0x9b, 0x01, 0x4f, 0x28, 0x08, 0xc1, 0xc3, 0xd5,
+ 0x16, 0xc1, 0xc3, 0xe1, 0xc3, 0x05, 0x17, 0x0e, 0x9b, 0x90, 0xda, 0x1b,
+ 0xa2, 0x01, 0x81, 0xb9, 0x4b, 0x1a, 0x0b, 0x41, 0xc3, 0xed, 0x48, 0x01,
+ 0x93, 0xc1, 0xc4, 0x1d, 0x49, 0xb5, 0x4f, 0xc1, 0xc4, 0x29, 0xcd, 0x80,
+ 0x07, 0x01, 0x7f, 0xa1, 0x4e, 0x75, 0x2c, 0xc1, 0xc4, 0x35, 0xc8, 0x02,
+ 0xd5, 0x01, 0x7f, 0xd8, 0xc7, 0xcb, 0xce, 0x01, 0x8c, 0x99, 0x0a, 0xc1,
+ 0xc4, 0x4b, 0xc7, 0xcc, 0xb5, 0x01, 0x8c, 0xb0, 0x43, 0x08, 0xde, 0xc1,
+ 0xc4, 0x57, 0xc9, 0xb1, 0xf8, 0x01, 0x8c, 0xc8, 0xca, 0xa0, 0x74, 0x01,
+ 0x8c, 0xb9, 0xc7, 0xc5, 0x99, 0x01, 0x8c, 0xf8, 0x16, 0xc1, 0xc4, 0x63,
+ 0xc3, 0x05, 0x17, 0x08, 0x42, 0xc2, 0x01, 0xc4, 0x76, 0x16, 0xc1, 0xc4,
+ 0x7a, 0x15, 0xc1, 0xc4, 0x86, 0x03, 0xc1, 0xc4, 0x90, 0xc3, 0x21, 0x00,
+ 0x08, 0x42, 0x69, 0xc3, 0x04, 0xae, 0x08, 0x42, 0x61, 0xc6, 0xd7, 0x12,
+ 0x08, 0x42, 0x59, 0xc4, 0xe5, 0x53, 0x08, 0x42, 0x51, 0xc4, 0x4d, 0x48,
+ 0x08, 0x42, 0x49, 0xc2, 0x00, 0x5b, 0x08, 0x42, 0x23, 0x01, 0xc4, 0x9c,
+ 0xc5, 0x4d, 0x42, 0x08, 0x42, 0x31, 0xc3, 0x7c, 0xad, 0x08, 0x42, 0x29,
+ 0xc6, 0x43, 0x0f, 0x08, 0x42, 0x19, 0xc5, 0x9e, 0xbc, 0x08, 0x42, 0x11,
+ 0xc4, 0xe5, 0xaf, 0x08, 0x42, 0x09, 0xc2, 0x03, 0x07, 0x08, 0x42, 0x81,
+ 0xc4, 0xbc, 0xb7, 0x08, 0x42, 0x91, 0xc4, 0x5d, 0xef, 0x08, 0x42, 0x98,
+ 0xc7, 0xc8, 0xc5, 0x0f, 0xa2, 0xd1, 0xc3, 0x1c, 0xd0, 0x0f, 0xa2, 0x91,
+ 0xc6, 0xaf, 0x10, 0x0f, 0xa3, 0x09, 0xc5, 0xdc, 0x5c, 0x0f, 0xa3, 0x10,
+ 0x45, 0x80, 0x6c, 0xc1, 0xc4, 0xa2, 0xc5, 0x03, 0xfb, 0x01, 0x2e, 0x5b,
+ 0x01, 0xc4, 0xd9, 0xd4, 0x3f, 0x6e, 0x01, 0x3f, 0x0b, 0x01, 0xc4, 0xdd,
+ 0xc8, 0xb9, 0x3b, 0x01, 0x33, 0x38, 0x07, 0xc1, 0xc4, 0xe3, 0xd5, 0x35,
+ 0xb0, 0x0f, 0xad, 0x59, 0x11, 0x41, 0xc4, 0xed, 0xca, 0xa9, 0xfc, 0x0f,
+ 0xc5, 0x69, 0xc3, 0x05, 0x17, 0x0f, 0xc5, 0x60, 0xc5, 0x01, 0xea, 0x01,
+ 0x2d, 0x0b, 0x01, 0xc4, 0xf9, 0xc7, 0x32, 0xca, 0x01, 0x38, 0x21, 0xc9,
+ 0xac, 0xa9, 0x01, 0x33, 0x21, 0xc2, 0x0b, 0xfd, 0x0f, 0x99, 0x1b, 0x01,
+ 0xc4, 0xfd, 0x0f, 0xc1, 0xc5, 0x01, 0xca, 0x57, 0xbf, 0x01, 0x30, 0xb1,
+ 0xc3, 0x09, 0x46, 0x01, 0x30, 0x31, 0xc9, 0xb7, 0x2c, 0x07, 0xf2, 0x30,
+ 0x03, 0xc1, 0xc5, 0x0d, 0x42, 0x00, 0x07, 0xc1, 0xc5, 0x19, 0x45, 0x0b,
+ 0xa1, 0x41, 0xc5, 0x25, 0xc6, 0x3f, 0x7c, 0x01, 0x2e, 0x3b, 0x01, 0xc5,
+ 0x2b, 0x48, 0xbe, 0xd3, 0xc1, 0xc5, 0x2f, 0x42, 0x00, 0x37, 0x41, 0xc5,
+ 0x3b, 0x14, 0xc1, 0xc5, 0x47, 0xd7, 0x26, 0x9c, 0x01, 0x36, 0xb9, 0xc8,
+ 0x32, 0xab, 0x01, 0x30, 0x79, 0xd2, 0x48, 0x60, 0x0f, 0xab, 0xf0, 0x0e,
+ 0xc1, 0xc5, 0x53, 0x4c, 0x0f, 0x44, 0xc1, 0xc5, 0x60, 0xcc, 0x7e, 0x0d,
+ 0x01, 0x31, 0xc8, 0x43, 0x00, 0x48, 0xc1, 0xc5, 0x6c, 0xc8, 0x46, 0xf8,
+ 0x01, 0x2d, 0x68, 0x4b, 0x53, 0xdf, 0xc1, 0xc5, 0x78, 0x4b, 0x39, 0x07,
+ 0x41, 0xc5, 0x84, 0x46, 0x01, 0x07, 0xc1, 0xc5, 0x99, 0xca, 0xa3, 0x94,
+ 0x01, 0x5e, 0xe8, 0xcc, 0x8e, 0x8c, 0x01, 0x2d, 0x89, 0x42, 0x01, 0x04,
+ 0x41, 0xc5, 0xa9, 0x46, 0x02, 0x5e, 0xc1, 0xc5, 0xb5, 0xce, 0x55, 0xce,
+ 0x01, 0x58, 0xf0, 0xd5, 0x36, 0x19, 0x0f, 0xc4, 0x39, 0xd0, 0x36, 0x1e,
+ 0x0f, 0xc3, 0xf9, 0xd0, 0x5f, 0xff, 0x0f, 0xc3, 0x39, 0xca, 0x36, 0x24,
+ 0x0f, 0xc3, 0x79, 0xd1, 0x56, 0x42, 0x0f, 0xc3, 0xb8, 0xd5, 0x36, 0x19,
+ 0x0f, 0xc4, 0x31, 0xd1, 0x56, 0x42, 0x0f, 0xc3, 0xb1, 0xca, 0x36, 0x24,
+ 0x0f, 0xc3, 0x71, 0xd0, 0x5f, 0xff, 0x0f, 0xc3, 0x31, 0xd0, 0x36, 0x1e,
+ 0x0f, 0xc3, 0xf0, 0xd5, 0x36, 0x19, 0x0f, 0xc4, 0x29, 0xd1, 0x56, 0x42,
+ 0x0f, 0xc3, 0xa9, 0xca, 0x36, 0x24, 0x0f, 0xc3, 0x69, 0xd0, 0x5f, 0xff,
+ 0x0f, 0xc3, 0x29, 0xd0, 0x36, 0x1e, 0x0f, 0xc3, 0xe8, 0xd5, 0x36, 0x19,
+ 0x0f, 0xc4, 0x21, 0xd1, 0x56, 0x42, 0x0f, 0xc3, 0xa1, 0xca, 0x36, 0x24,
+ 0x0f, 0xc3, 0x61, 0xd0, 0x5f, 0xff, 0x0f, 0xc3, 0x21, 0xd0, 0x36, 0x1e,
+ 0x0f, 0xc3, 0xe0, 0xc5, 0xdf, 0x7c, 0x0f, 0x9c, 0x81, 0xcc, 0x8d, 0xcc,
+ 0x0f, 0x99, 0x60, 0xc6, 0xd5, 0x50, 0x0f, 0xb5, 0xf1, 0xc4, 0x52, 0xda,
+ 0x0f, 0x98, 0x51, 0xc7, 0xc9, 0xba, 0x0f, 0xa0, 0x19, 0xc4, 0xe8, 0xc7,
+ 0x0f, 0xc9, 0xe8, 0xc4, 0x24, 0x35, 0x0f, 0x17, 0xc9, 0xc5, 0x05, 0x1b,
+ 0x0f, 0x17, 0xc1, 0x15, 0xc1, 0xc5, 0xc7, 0x08, 0xc1, 0xc5, 0xd3, 0x16,
+ 0xc1, 0xc5, 0xdf, 0xc3, 0x05, 0x17, 0x0f, 0x17, 0x89, 0xc4, 0x16, 0x57,
+ 0x0f, 0x17, 0x80, 0xc3, 0xdf, 0x47, 0x0f, 0x17, 0x73, 0x01, 0xc5, 0xeb,
+ 0xc3, 0x4f, 0x35, 0x0f, 0x17, 0x62, 0x01, 0xc5, 0xf1, 0x1b, 0xc1, 0xc5,
+ 0xf7, 0x97, 0x0f, 0x16, 0xf3, 0x01, 0xc6, 0x01, 0x10, 0xc1, 0xc6, 0x07,
+ 0x83, 0x0f, 0x16, 0x0b, 0x01, 0xc6, 0x17, 0x87, 0x0f, 0x16, 0xdb, 0x01,
+ 0xc6, 0x28, 0x91, 0x0f, 0x16, 0xab, 0x01, 0xc6, 0x2c, 0x8b, 0x0f, 0x16,
+ 0xe3, 0x01, 0xc6, 0x33, 0x16, 0xc1, 0xc6, 0x39, 0x0e, 0xc1, 0xc6, 0x4f,
+ 0xc2, 0x01, 0x0e, 0x0f, 0x16, 0xd1, 0x0d, 0xc1, 0xc6, 0x59, 0xc2, 0x00,
+ 0x3f, 0x0f, 0x16, 0xc1, 0xc2, 0x00, 0x9a, 0x0f, 0x16, 0xb9, 0xc2, 0x06,
+ 0x8c, 0x0f, 0x16, 0x99, 0xc2, 0x00, 0x4c, 0x0f, 0x16, 0x91, 0xc2, 0x05,
+ 0x5c, 0x0f, 0x16, 0x89, 0xc2, 0x26, 0x94, 0x0f, 0x16, 0x81, 0x15, 0xc1,
+ 0xc6, 0x63, 0xc2, 0x01, 0xa7, 0x0f, 0x16, 0x69, 0x12, 0xc1, 0xc6, 0x6d,
+ 0xc2, 0x07, 0x69, 0x0f, 0x16, 0x29, 0xc2, 0x0c, 0x25, 0x0f, 0x16, 0x21,
+ 0xc2, 0x00, 0x2e, 0x0f, 0x16, 0x19, 0xc2, 0x02, 0x1d, 0x0f, 0x16, 0x10,
+ 0xc6, 0x29, 0x29, 0x08, 0xc7, 0x91, 0xc6, 0xd1, 0xe4, 0x08, 0xc7, 0x89,
+ 0x15, 0xc1, 0xc6, 0x77, 0x08, 0xc1, 0xc6, 0x83, 0x16, 0x41, 0xc6, 0x8f,
+ 0xc4, 0x24, 0x35, 0x08, 0xc7, 0x49, 0xc5, 0x05, 0x1b, 0x08, 0xc7, 0x41,
+ 0x15, 0xc1, 0xc6, 0xa1, 0x08, 0xc1, 0xc6, 0xad, 0x16, 0xc1, 0xc6, 0xb9,
+ 0xc3, 0x05, 0x17, 0x08, 0xc7, 0x09, 0xc4, 0x16, 0x57, 0x08, 0xc7, 0x00,
+ 0xc4, 0xe4, 0x97, 0x08, 0xc6, 0xf9, 0x15, 0xc1, 0xc6, 0xc5, 0x0a, 0xc1,
+ 0xc6, 0xd1, 0xc2, 0x0b, 0xfc, 0x08, 0xc6, 0xc1, 0xc2, 0x0b, 0x0a, 0x08,
+ 0xc6, 0xb9, 0x83, 0x08, 0xc6, 0x0b, 0x01, 0xc6, 0xe1, 0xc2, 0x0c, 0x25,
+ 0x08, 0xc6, 0xa1, 0x10, 0xc1, 0xc6, 0xef, 0xc3, 0x02, 0x01, 0x08, 0xc6,
+ 0x91, 0x91, 0x08, 0xc6, 0x4b, 0x01, 0xc6, 0xfb, 0x87, 0x08, 0xc6, 0x43,
+ 0x01, 0xc7, 0x01, 0x17, 0xc1, 0xc7, 0x05, 0x1b, 0xc1, 0xc7, 0x0d, 0xc2,
+ 0x05, 0x28, 0x08, 0xc6, 0x61, 0xc2, 0x07, 0x69, 0x08, 0xc6, 0x59, 0xc2,
+ 0x23, 0x68, 0x08, 0xc6, 0x31, 0xc2, 0x00, 0xbd, 0x08, 0xc6, 0x10, 0xc4,
+ 0xe4, 0x97, 0x08, 0xc5, 0xf9, 0x15, 0xc1, 0xc7, 0x1c, 0x0a, 0xc1, 0xc7,
+ 0x28, 0xc2, 0x0b, 0xfc, 0x08, 0xc5, 0xc1, 0xc2, 0x0b, 0x0a, 0x08, 0xc5,
+ 0xb9, 0x83, 0x08, 0xc5, 0x0b, 0x01, 0xc7, 0x38, 0xc2, 0x0c, 0x25, 0x08,
+ 0xc5, 0xa1, 0x10, 0xc1, 0xc7, 0x46, 0xc3, 0x02, 0x01, 0x08, 0xc5, 0x91,
+ 0x91, 0x08, 0xc5, 0x4b, 0x01, 0xc7, 0x52, 0x87, 0x08, 0xc5, 0x43, 0x01,
+ 0xc7, 0x58, 0x17, 0xc1, 0xc7, 0x5c, 0x1b, 0xc1, 0xc7, 0x64, 0xc2, 0x05,
+ 0x28, 0x08, 0xc5, 0x61, 0xc2, 0x07, 0x69, 0x08, 0xc5, 0x59, 0xc2, 0x23,
+ 0x68, 0x08, 0xc5, 0x31, 0xc2, 0x00, 0xbd, 0x08, 0xc5, 0x10, 0xc3, 0x03,
+ 0x5e, 0x01, 0x18, 0x39, 0xc7, 0x7c, 0x8c, 0x07, 0xf2, 0x78, 0xc5, 0x00,
+ 0x47, 0x01, 0x49, 0x99, 0xc4, 0x00, 0xcd, 0x01, 0x59, 0xf8, 0xcf, 0x1b,
+ 0x93, 0x01, 0x02, 0xa9, 0xcc, 0x87, 0x18, 0x0f, 0x9d, 0xa0, 0x05, 0xc1,
+ 0xc7, 0x73, 0xd7, 0x18, 0x26, 0x01, 0x39, 0x19, 0xd8, 0x24, 0xec, 0x01,
+ 0x39, 0x11, 0x44, 0x0b, 0xf8, 0xc1, 0xc7, 0x7f, 0xcb, 0x93, 0xf9, 0x0f,
+ 0x9a, 0x01, 0xd2, 0x25, 0x52, 0x0f, 0xbe, 0x30, 0xcb, 0x98, 0x19, 0x0f,
+ 0x9b, 0xe8, 0x00, 0xc1, 0xc7, 0x8b, 0xc9, 0xb6, 0xc0, 0x0f, 0xb1, 0xb0,
+ 0xd7, 0x29, 0xd8, 0x0f, 0xb0, 0x59, 0xd0, 0x5a, 0x7f, 0x0f, 0xb1, 0x88,
+ 0xdf, 0x0c, 0x83, 0x01, 0x36, 0xf1, 0x49, 0x0c, 0xa3, 0x41, 0xc7, 0xd4,
+ 0xe0, 0x01, 0x27, 0x01, 0x3d, 0x60, 0xc9, 0xad, 0xb7, 0x0f, 0x98, 0xe9,
+ 0xc6, 0x01, 0xb1, 0x0f, 0x98, 0xa8, 0xca, 0x58, 0x6f, 0x07, 0xf8, 0x19,
+ 0xc7, 0x65, 0x30, 0x07, 0xff, 0x10, 0xc7, 0x0a, 0x60, 0x07, 0xf8, 0x51,
+ 0xc8, 0x32, 0x18, 0x07, 0xf8, 0x31, 0xc9, 0x2d, 0x3c, 0x07, 0xf8, 0x38,
+ 0x45, 0x08, 0xd8, 0xc1, 0xc7, 0xe0, 0xca, 0x9b, 0xa0, 0x07, 0xf8, 0x20,
+ 0x11, 0xc1, 0xc8, 0x04, 0xd0, 0x07, 0x97, 0x07, 0xf9, 0xf1, 0xc8, 0x97,
+ 0x14, 0x07, 0xff, 0x00, 0xc8, 0x50, 0x04, 0x07, 0xf8, 0xd9, 0xc6, 0x2a,
+ 0x4c, 0x07, 0xf8, 0x78, 0x07, 0xc1, 0xc8, 0x10, 0x45, 0x01, 0xf2, 0xc1,
+ 0xc8, 0x1c, 0xc7, 0x7c, 0x8c, 0x07, 0xf9, 0xf8, 0xca, 0x0e, 0xbd, 0x07,
+ 0xf8, 0xe9, 0xcf, 0x0f, 0x63, 0x07, 0xf8, 0x08, 0xcf, 0x57, 0x10, 0x07,
+ 0xf8, 0xf1, 0xca, 0x1e, 0x66, 0x07, 0xfa, 0x00, 0xcb, 0x2d, 0xa8, 0x07,
+ 0xf8, 0xf9, 0xcc, 0x00, 0xf2, 0x07, 0xf8, 0x10, 0xce, 0x6a, 0x15, 0x07,
+ 0xf9, 0xe1, 0x45, 0x01, 0x18, 0x41, 0xc8, 0x28, 0xc9, 0xa5, 0x6b, 0x07,
+ 0xff, 0x09, 0xcb, 0x97, 0x11, 0x07, 0xf8, 0x29, 0xc8, 0x65, 0xfb, 0x07,
+ 0xf8, 0x58, 0x00, 0x41, 0xc8, 0x40, 0xc9, 0xb4, 0x41, 0x0f, 0x9c, 0x39,
+ 0x95, 0x0f, 0x9c, 0x30, 0xc5, 0x95, 0x3e, 0x0f, 0xb4, 0x91, 0xcb, 0x9a,
+ 0x81, 0x0f, 0xcf, 0x78, 0x0e, 0xc1, 0xc8, 0x4c, 0x06, 0xc1, 0xc8, 0x58,
+ 0xc8, 0xb9, 0xdb, 0x08, 0x52, 0xa1, 0x05, 0xc1, 0xc8, 0x62, 0xcc, 0x11,
+ 0x61, 0x08, 0x52, 0x88, 0x44, 0x05, 0x17, 0xc1, 0xc8, 0x6e, 0x16, 0x41,
+ 0xc8, 0x7a, 0xc4, 0x08, 0xdd, 0x08, 0x52, 0x19, 0x16, 0xc1, 0xc8, 0x86,
+ 0xc3, 0x05, 0x17, 0x08, 0x52, 0x00, 0xc5, 0x21, 0x27, 0x08, 0x51, 0xf9,
+ 0x45, 0x33, 0x46, 0x41, 0xc8, 0x92, 0x42, 0x00, 0x68, 0xc1, 0xc8, 0x9e,
+ 0xc5, 0xde, 0xf0, 0x08, 0x51, 0xc9, 0xc9, 0x31, 0x7b, 0x08, 0x51, 0xc1,
+ 0xc7, 0x45, 0xcd, 0x08, 0x50, 0x79, 0xc8, 0x10, 0xab, 0x08, 0x50, 0x70,
+ 0x18, 0xc1, 0xc8, 0xaa, 0x16, 0xc1, 0xc8, 0xb4, 0xc2, 0x00, 0x96, 0x08,
+ 0x51, 0x59, 0xc2, 0x00, 0x9a, 0x08, 0x51, 0x51, 0xc2, 0x1a, 0x36, 0x08,
+ 0x51, 0x49, 0xc2, 0x00, 0x3f, 0x08, 0x51, 0x41, 0x04, 0xc1, 0xc8, 0xc2,
+ 0x12, 0xc1, 0xc8, 0xcc, 0x10, 0xc1, 0xc8, 0xd6, 0x06, 0xc1, 0xc8, 0xe6,
+ 0xc2, 0x26, 0x94, 0x08, 0x50, 0xb9, 0x05, 0xc1, 0xc8, 0xf4, 0x09, 0xc1,
+ 0xc8, 0xfe, 0x0d, 0xc1, 0xc9, 0x08, 0x83, 0x08, 0x50, 0x01, 0x15, 0xc1,
+ 0xc9, 0x18, 0xc2, 0x05, 0x5c, 0x08, 0x51, 0x81, 0xc2, 0x01, 0x0e, 0x08,
+ 0x51, 0x88, 0x06, 0xc1, 0xc9, 0x28, 0x05, 0xc1, 0xc9, 0x40, 0x04, 0xc1,
+ 0xc9, 0x80, 0x03, 0xc1, 0xc9, 0xc0, 0x26, 0xc1, 0xca, 0x00, 0x25, 0xc1,
+ 0xca, 0x40, 0x24, 0xc1, 0xca, 0x80, 0x23, 0xc1, 0xca, 0xc0, 0x22, 0xc1,
+ 0xcb, 0x00, 0x21, 0xc1, 0xcb, 0x40, 0x20, 0xc1, 0xcb, 0x80, 0x1f, 0xc1,
+ 0xcb, 0xc0, 0x1e, 0xc1, 0xcc, 0x00, 0x1d, 0x41, 0xcc, 0x40, 0x08, 0xc1,
+ 0xcc, 0x80, 0x07, 0xc1, 0xcc, 0xc0, 0x06, 0xc1, 0xcd, 0x00, 0x05, 0xc1,
+ 0xcd, 0x40, 0x04, 0xc1, 0xcd, 0x80, 0x03, 0xc1, 0xcd, 0xc0, 0x26, 0xc1,
+ 0xce, 0x00, 0x25, 0xc1, 0xce, 0x40, 0x24, 0xc1, 0xce, 0x80, 0x23, 0xc1,
+ 0xce, 0xc0, 0x22, 0xc1, 0xcf, 0x00, 0x21, 0xc1, 0xcf, 0x40, 0x20, 0xc1,
+ 0xcf, 0x80, 0x1f, 0xc1, 0xcf, 0xc0, 0x1e, 0xc1, 0xd0, 0x00, 0x1d, 0x41,
+ 0xd0, 0x40, 0xc4, 0x15, 0xa7, 0x08, 0x97, 0xb9, 0xc2, 0x22, 0x45, 0x08,
+ 0x97, 0xb0, 0xc3, 0x0d, 0x8f, 0x08, 0x97, 0xa9, 0xc3, 0x08, 0xde, 0x08,
+ 0x97, 0xa0, 0xc4, 0x05, 0xde, 0x08, 0x97, 0x99, 0xc2, 0x0a, 0x20, 0x08,
+ 0x97, 0x90, 0x8b, 0x08, 0x97, 0x31, 0x83, 0x08, 0x97, 0x01, 0x97, 0x08,
+ 0x97, 0x40, 0x97, 0x08, 0x97, 0x20, 0x8b, 0x08, 0x97, 0x10, 0x83, 0x08,
+ 0x96, 0xe9, 0xc2, 0x01, 0x0e, 0x08, 0x96, 0xe0, 0x83, 0x08, 0x96, 0xc9,
+ 0xc2, 0x00, 0x9a, 0x08, 0x96, 0x50, 0xc2, 0x01, 0x0e, 0x08, 0x96, 0xb1,
+ 0xc2, 0x02, 0x1d, 0x08, 0x96, 0xa9, 0x83, 0x08, 0x96, 0xa0, 0xc2, 0x01,
+ 0x0e, 0x08, 0x96, 0x99, 0x83, 0x08, 0x96, 0x90, 0x83, 0x08, 0x96, 0x89,
+ 0xc2, 0x01, 0x01, 0x08, 0x96, 0x61, 0xc2, 0x1a, 0x36, 0x08, 0x96, 0x29,
+ 0xc2, 0x07, 0x69, 0x08, 0x95, 0xf8, 0xc2, 0x01, 0x0e, 0x08, 0x96, 0x81,
+ 0x83, 0x08, 0x96, 0x79, 0x06, 0x41, 0xd0, 0x80, 0xc2, 0x01, 0x0e, 0x08,
+ 0x96, 0x71, 0x83, 0x08, 0x96, 0x69, 0x16, 0x41, 0xd0, 0x90, 0xc2, 0x01,
+ 0x0e, 0x08, 0x96, 0x21, 0xc2, 0x26, 0x94, 0x08, 0x96, 0x19, 0x83, 0x08,
+ 0x96, 0x10, 0xc2, 0x01, 0x0e, 0x08, 0x96, 0x09, 0x83, 0x08, 0x96, 0x00,
+ 0xc2, 0x01, 0x0e, 0x08, 0x95, 0xf1, 0xc2, 0x07, 0x69, 0x08, 0x95, 0xe9,
+ 0x83, 0x08, 0x95, 0xe0, 0xc2, 0x01, 0x0e, 0x08, 0x95, 0xd9, 0x83, 0x08,
+ 0x95, 0xd0, 0x97, 0x08, 0x95, 0xc9, 0x8b, 0x08, 0x95, 0xb9, 0x83, 0x08,
+ 0x95, 0x88, 0x97, 0x08, 0x95, 0xa8, 0x8b, 0x08, 0x95, 0x98, 0x97, 0x08,
+ 0x91, 0x99, 0x8b, 0x08, 0x91, 0x89, 0x83, 0x08, 0x91, 0x60, 0x8b, 0x08,
+ 0x91, 0x70, 0xc6, 0xd3, 0x8e, 0x08, 0x92, 0x01, 0x83, 0x08, 0x91, 0x48,
+ 0xc2, 0x00, 0x96, 0x08, 0x91, 0x59, 0x83, 0x08, 0x91, 0x38, 0xc2, 0x01,
+ 0x0e, 0x08, 0x91, 0x19, 0xc2, 0x02, 0x1d, 0x08, 0x91, 0x11, 0x83, 0x08,
+ 0x91, 0x08, 0xc2, 0x01, 0x0e, 0x08, 0x91, 0x01, 0x83, 0x08, 0x90, 0xf8,
+ 0x83, 0x08, 0x90, 0xf1, 0xc2, 0x01, 0x01, 0x08, 0x90, 0xc1, 0xc2, 0x1a,
+ 0x36, 0x08, 0x90, 0x99, 0xc2, 0x07, 0x69, 0x08, 0x90, 0x68, 0xc2, 0x01,
+ 0x0e, 0x08, 0x90, 0xe9, 0x06, 0xc1, 0xd0, 0x9a, 0x83, 0x08, 0x90, 0xd8,
+ 0xc2, 0x01, 0x0e, 0x08, 0x90, 0xd1, 0x83, 0x08, 0x90, 0xc9, 0x16, 0x41,
+ 0xd0, 0xaa, 0xc2, 0x26, 0x94, 0x08, 0x90, 0x89, 0x83, 0x08, 0x90, 0x80,
+ 0xc2, 0x01, 0x0e, 0x08, 0x90, 0x79, 0x83, 0x08, 0x90, 0x70, 0xc2, 0x01,
+ 0x0e, 0x08, 0x90, 0x61, 0xc2, 0x07, 0x69, 0x08, 0x90, 0x59, 0x83, 0x08,
+ 0x90, 0x50, 0xc2, 0x01, 0x0e, 0x08, 0x90, 0x49, 0x83, 0x08, 0x90, 0x40,
+ 0x97, 0x08, 0x90, 0x39, 0x8b, 0x08, 0x90, 0x29, 0x83, 0x08, 0x90, 0x08,
+ 0x15, 0xc1, 0xd0, 0xb4, 0xc5, 0x33, 0x1a, 0x08, 0x91, 0xb1, 0xc6, 0x21,
+ 0x26, 0x08, 0x91, 0xa9, 0xc8, 0x10, 0xab, 0x08, 0x91, 0xa0, 0xcc, 0x25,
+ 0x86, 0x08, 0x91, 0xe1, 0xc5, 0x83, 0xd7, 0x08, 0x91, 0xc8, 0x42, 0x0c,
+ 0x25, 0xc1, 0xd0, 0xc0, 0x12, 0xc1, 0xd0, 0xd8, 0x04, 0xc1, 0xd0, 0xea,
+ 0x45, 0xc5, 0xb7, 0xc1, 0xd0, 0xf6, 0xc9, 0xaf, 0xca, 0x00, 0xcf, 0x81,
+ 0x4a, 0xa6, 0x78, 0x41, 0xd1, 0x02, 0x03, 0xc1, 0xd1, 0x16, 0x0d, 0xc1,
+ 0xd1, 0x28, 0xcb, 0x91, 0x4f, 0x00, 0xbe, 0xc9, 0x04, 0xc1, 0xd1, 0x3a,
+ 0xc7, 0xc3, 0xfc, 0x00, 0xbe, 0xb9, 0x05, 0xc1, 0xd1, 0x44, 0xc6, 0xd8,
+ 0x0e, 0x00, 0xbe, 0x89, 0xcd, 0x7f, 0xed, 0x00, 0xbe, 0x81, 0x16, 0xc1,
+ 0xd1, 0x50, 0x14, 0xc1, 0xd1, 0x5c, 0xcb, 0x97, 0x8a, 0x00, 0xbe, 0x49,
+ 0xcd, 0x7a, 0xcc, 0x00, 0xbe, 0x41, 0xc7, 0xc9, 0xc1, 0x00, 0xbe, 0x30,
+ 0xc4, 0x15, 0xa7, 0x00, 0xbf, 0x39, 0xc2, 0x22, 0x45, 0x00, 0xbf, 0x30,
+ 0xc3, 0x0d, 0x8f, 0x00, 0xbf, 0x29, 0xc3, 0x08, 0xde, 0x00, 0xbf, 0x20,
+ 0xc4, 0x05, 0xde, 0x00, 0xbf, 0x19, 0xc2, 0x0a, 0x20, 0x00, 0xbf, 0x10,
+ 0x03, 0xc1, 0xd1, 0x68, 0x11, 0xc1, 0xd1, 0x78, 0x87, 0x00, 0xbe, 0x09,
+ 0x8b, 0x00, 0xbd, 0xbb, 0x01, 0xd1, 0x80, 0x9b, 0x00, 0xbd, 0xcb, 0x01,
+ 0xd1, 0x88, 0x97, 0x00, 0xbd, 0xda, 0x01, 0xd1, 0x90, 0x83, 0x00, 0xbd,
+ 0xa9, 0x93, 0x00, 0xbd, 0xa0, 0x03, 0xc1, 0xd1, 0x98, 0x48, 0xc3, 0xab,
+ 0xc1, 0xd1, 0xa8, 0x87, 0x00, 0xbd, 0x79, 0x97, 0x00, 0xbd, 0x3b, 0x01,
+ 0xd1, 0xb4, 0x8b, 0x00, 0xbd, 0x2a, 0x01, 0xd1, 0xbf, 0x9b, 0x00, 0xbd,
+ 0x70, 0x9b, 0x00, 0xbd, 0x60, 0x83, 0x00, 0xbd, 0x09, 0x91, 0x00, 0xbc,
+ 0xd8, 0x83, 0x00, 0xbc, 0xf9, 0xc2, 0x05, 0x3b, 0x00, 0xbc, 0xf1, 0xc2,
+ 0x01, 0x0e, 0x00, 0xbc, 0xe8, 0x0a, 0xc1, 0xd1, 0xc3, 0x91, 0x00, 0xbc,
+ 0xb0, 0x91, 0x00, 0xbc, 0x99, 0xc2, 0x00, 0x10, 0x00, 0xbc, 0x71, 0xc2,
+ 0x47, 0x43, 0x00, 0xbc, 0x49, 0xc2, 0x13, 0xf3, 0x00, 0xbc, 0x20, 0x0a,
+ 0xc1, 0xd1, 0xcb, 0x91, 0x00, 0xbc, 0x89, 0x83, 0x00, 0xbc, 0x79, 0x42,
+ 0x01, 0x47, 0x41, 0xd1, 0xd3, 0x91, 0x00, 0xbc, 0x61, 0x83, 0x00, 0xbc,
+ 0x50, 0x0a, 0xc1, 0xd1, 0xdb, 0x91, 0x00, 0xbc, 0x39, 0x83, 0x00, 0xbc,
+ 0x28, 0x0a, 0xc1, 0xd1, 0xe3, 0x91, 0x00, 0xbc, 0x11, 0x83, 0x00, 0xbc,
+ 0x00, 0xc3, 0x03, 0x2c, 0x0e, 0x96, 0x90, 0xc4, 0x12, 0xf2, 0x0e, 0x96,
+ 0x88, 0xc4, 0x12, 0xf2, 0x0e, 0x96, 0x80, 0xc5, 0x12, 0xf1, 0x0e, 0x96,
+ 0x79, 0xc2, 0x00, 0x29, 0x0e, 0x96, 0x28, 0xc4, 0x12, 0xf2, 0x0e, 0x96,
+ 0x70, 0xc6, 0x55, 0xbb, 0x0e, 0x96, 0x69, 0xc3, 0x05, 0xdf, 0x0e, 0x96,
+ 0x18, 0xc4, 0x25, 0x4d, 0x0e, 0x96, 0x61, 0x91, 0x0e, 0x96, 0x10, 0x15,
+ 0xc1, 0xd1, 0xeb, 0x83, 0x01, 0x85, 0x13, 0x01, 0xd2, 0x05, 0x0f, 0xc1,
+ 0xd2, 0x0b, 0x8b, 0x01, 0x85, 0x21, 0x97, 0x01, 0x85, 0x31, 0x87, 0x01,
+ 0x85, 0x41, 0x91, 0x01, 0x85, 0x51, 0x0d, 0xc1, 0xd2, 0x22, 0x09, 0xc1,
+ 0xd2, 0x36, 0x1c, 0xc1, 0xd2, 0x4a, 0x16, 0xc1, 0xd2, 0x5e, 0x06, 0xc1,
+ 0xd2, 0x72, 0x90, 0x01, 0x87, 0x9b, 0x01, 0xd2, 0x86, 0x0a, 0xc1, 0xd2,
+ 0x9a, 0x04, 0xc1, 0xd2, 0xae, 0x12, 0xc1, 0xd2, 0xc2, 0x1b, 0xc1, 0xd2,
+ 0xd6, 0x14, 0xc1, 0xd2, 0xe2, 0x19, 0xc1, 0xd2, 0xf6, 0x18, 0x41, 0xd3,
+ 0x06, 0xc4, 0x24, 0x35, 0x08, 0xfa, 0xc9, 0xc5, 0x05, 0x1b, 0x08, 0xfa,
+ 0xc1, 0x15, 0xc1, 0xd3, 0x1a, 0x08, 0xc1, 0xd3, 0x26, 0x16, 0xc1, 0xd3,
+ 0x32, 0xc3, 0x05, 0x17, 0x08, 0xfa, 0x89, 0xc4, 0x16, 0x57, 0x08, 0xfa,
+ 0x80, 0xcd, 0x7c, 0x45, 0x08, 0xfa, 0x79, 0xc5, 0x03, 0xfb, 0x08, 0xfa,
+ 0x60, 0x12, 0xc1, 0xd3, 0x3e, 0xcd, 0x25, 0x87, 0x08, 0xfa, 0x28, 0xcc,
+ 0x8d, 0xe4, 0x08, 0xfa, 0x69, 0xc5, 0x00, 0xc7, 0x08, 0xfa, 0x38, 0xc8,
+ 0x3e, 0x80, 0x08, 0xfa, 0x51, 0x96, 0x08, 0xfa, 0x48, 0x97, 0x08, 0xfa,
+ 0x00, 0x8b, 0x08, 0xf9, 0xf9, 0xcb, 0x92, 0xe6, 0x08, 0xf9, 0xa9, 0x83,
+ 0x08, 0xf9, 0xa0, 0x97, 0x08, 0xf9, 0xc8, 0x8b, 0x08, 0xf9, 0xb8, 0xc3,
+ 0x2c, 0x54, 0x08, 0xf9, 0x99, 0xc2, 0x01, 0x0e, 0x08, 0xf8, 0xd1, 0x83,
+ 0x08, 0xf8, 0xc8, 0x83, 0x08, 0xf9, 0x89, 0xc2, 0x0e, 0xe5, 0x08, 0xf9,
+ 0x81, 0xc2, 0x01, 0x0e, 0x08, 0xf9, 0x78, 0xc2, 0x01, 0x0e, 0x08, 0xf9,
+ 0x49, 0x83, 0x08, 0xf9, 0x40, 0xc2, 0x01, 0x0e, 0x08, 0xf9, 0x39, 0x83,
+ 0x08, 0xf9, 0x30, 0x83, 0x08, 0xf9, 0x29, 0xc2, 0x01, 0x01, 0x08, 0xf9,
+ 0x01, 0xc2, 0x1a, 0x36, 0x08, 0xf8, 0xd9, 0xc2, 0x07, 0x69, 0x08, 0xf8,
+ 0xb0, 0xc2, 0x01, 0x0e, 0x08, 0xf9, 0x21, 0x83, 0x08, 0xf9, 0x19, 0x06,
+ 0x41, 0xd3, 0x4a, 0xc2, 0x01, 0x0e, 0x08, 0xf9, 0x11, 0x83, 0x08, 0xf9,
+ 0x09, 0x16, 0x41, 0xd3, 0x54, 0xc2, 0x01, 0x0e, 0x08, 0xf8, 0xc1, 0x83,
+ 0x08, 0xf8, 0xb8, 0xc2, 0x01, 0x0e, 0x08, 0xf8, 0xa9, 0x83, 0x08, 0xf8,
+ 0xa0, 0xc2, 0x01, 0x0e, 0x08, 0xf8, 0x99, 0x83, 0x08, 0xf8, 0x90, 0x8b,
+ 0x08, 0xf8, 0x79, 0x83, 0x08, 0xf8, 0x28, 0x8e, 0x08, 0xf8, 0x63, 0x01,
+ 0xd3, 0x5e, 0x94, 0x08, 0xf8, 0x52, 0x01, 0xd3, 0x62, 0x97, 0x08, 0xf8,
+ 0x48, 0x8b, 0x08, 0xf8, 0x38, 0x97, 0x08, 0x85, 0xc1, 0x8b, 0x08, 0x85,
+ 0xb1, 0x83, 0x08, 0x85, 0x80, 0x97, 0x08, 0x85, 0xa0, 0x8b, 0x08, 0x85,
+ 0x90, 0xc5, 0x83, 0xd7, 0x08, 0x86, 0x09, 0xcc, 0x25, 0x86, 0x08, 0x85,
+ 0xf8, 0xc5, 0x33, 0x1a, 0x08, 0x85, 0xd1, 0x42, 0x02, 0x52, 0xc1, 0xd3,
+ 0x66, 0xc8, 0x10, 0xab, 0x08, 0x84, 0x09, 0xcb, 0x21, 0x1a, 0x08, 0x84,
+ 0x00, 0x83, 0x08, 0x85, 0x71, 0xc2, 0x0e, 0xe5, 0x08, 0x85, 0x69, 0xc2,
+ 0x01, 0x0e, 0x08, 0x85, 0x60, 0x83, 0x08, 0x85, 0x49, 0xc2, 0x01, 0x0e,
+ 0x08, 0x84, 0xe0, 0xc2, 0x01, 0x0e, 0x08, 0x85, 0x31, 0x83, 0x08, 0x85,
+ 0x28, 0xc2, 0x01, 0x0e, 0x08, 0x85, 0x21, 0x83, 0x08, 0x85, 0x18, 0x83,
+ 0x08, 0x85, 0x11, 0xc2, 0x01, 0x01, 0x08, 0x84, 0xe9, 0xc2, 0x1a, 0x36,
+ 0x08, 0x84, 0xb1, 0xc2, 0x07, 0x69, 0x08, 0x84, 0x88, 0xc2, 0x01, 0x0e,
+ 0x08, 0x85, 0x09, 0x83, 0x08, 0x85, 0x01, 0x06, 0x41, 0xd3, 0x72, 0xc2,
+ 0x01, 0x0e, 0x08, 0x84, 0xf9, 0x83, 0x08, 0x84, 0xf1, 0x16, 0x41, 0xd3,
+ 0x82, 0xc2, 0x01, 0x0e, 0x08, 0x84, 0xa9, 0x83, 0x08, 0x84, 0xa0, 0xc2,
+ 0x01, 0x0e, 0x08, 0x84, 0x99, 0x83, 0x08, 0x84, 0x90, 0xc2, 0x01, 0x0e,
+ 0x08, 0x84, 0x81, 0x83, 0x08, 0x84, 0x78, 0xc2, 0x01, 0x0e, 0x08, 0x84,
+ 0x71, 0x83, 0x08, 0x84, 0x68, 0x97, 0x08, 0x84, 0x61, 0x8b, 0x08, 0x84,
+ 0x51, 0x83, 0x08, 0x84, 0x20, 0x97, 0x08, 0x84, 0x40, 0x8b, 0x08, 0x84,
+ 0x30, 0xc7, 0xcf, 0x8d, 0x05, 0x49, 0x68, 0x87, 0x05, 0x49, 0x48, 0x87,
+ 0x05, 0x49, 0x30, 0x91, 0x05, 0x49, 0x29, 0x87, 0x05, 0x49, 0x18, 0x83,
+ 0x05, 0x48, 0xf9, 0xc2, 0x00, 0x44, 0x05, 0x48, 0x98, 0xc2, 0x01, 0x0e,
+ 0x05, 0x48, 0xf1, 0x83, 0x05, 0x48, 0x90, 0xc2, 0x01, 0x0e, 0x05, 0x48,
+ 0xb1, 0x83, 0x05, 0x48, 0xa8, 0x83, 0x05, 0x48, 0xa1, 0xc2, 0x1a, 0x36,
+ 0x05, 0x48, 0x89, 0xc2, 0x07, 0x69, 0x05, 0x48, 0x68, 0xc2, 0x01, 0x0e,
+ 0x05, 0x48, 0x79, 0x83, 0x05, 0x48, 0x70, 0xc2, 0x01, 0x0e, 0x05, 0x48,
+ 0x59, 0x83, 0x05, 0x48, 0x50, 0xc4, 0x15, 0xa7, 0x05, 0x48, 0x39, 0xc2,
+ 0x22, 0x45, 0x05, 0x48, 0x30, 0xc3, 0x0d, 0x8f, 0x05, 0x48, 0x29, 0xc3,
+ 0x08, 0xde, 0x05, 0x48, 0x20, 0xc4, 0x05, 0xde, 0x05, 0x48, 0x19, 0xc2,
+ 0x0a, 0x20, 0x05, 0x48, 0x10, 0x15, 0xc1, 0xd3, 0x8c, 0x05, 0xc1, 0xd3,
+ 0x98, 0x03, 0xc1, 0xd3, 0xa4, 0x42, 0x02, 0x52, 0xc1, 0xd3, 0xb0, 0xc5,
+ 0x33, 0x1a, 0x00, 0x65, 0xe1, 0xcb, 0x93, 0xc2, 0x00, 0x67, 0x89, 0xcb,
+ 0x9c, 0xb2, 0x00, 0x67, 0x90, 0x45, 0x02, 0x01, 0xc1, 0xd3, 0xbc, 0xc9,
+ 0x37, 0x9f, 0x00, 0x66, 0xa8, 0x03, 0xc1, 0xd4, 0x2b, 0x8b, 0x00, 0x65,
+ 0xfb, 0x01, 0xd4, 0x37, 0x97, 0x00, 0x66, 0x0b, 0x01, 0xd4, 0x3b, 0x48,
+ 0xb7, 0xd7, 0xc1, 0xd4, 0x3f, 0x87, 0x00, 0x66, 0x33, 0x01, 0xd4, 0x4d,
+ 0x91, 0x00, 0x66, 0x52, 0x01, 0xd4, 0x51, 0xc4, 0x16, 0x57, 0x00, 0x67,
+ 0x31, 0xc3, 0x05, 0x17, 0x00, 0x67, 0x39, 0x16, 0xc1, 0xd4, 0x55, 0x08,
+ 0xc1, 0xd4, 0x61, 0x15, 0xc1, 0xd4, 0x6d, 0xc5, 0x05, 0x1b, 0x00, 0x67,
+ 0x71, 0xc4, 0x24, 0x35, 0x00, 0x67, 0x78, 0x11, 0xc1, 0xd4, 0x79, 0x0e,
+ 0xc1, 0xd4, 0x8c, 0x06, 0xc1, 0xd4, 0xa1, 0x15, 0xc1, 0xd4, 0xb1, 0x0a,
+ 0xc1, 0xd4, 0xfb, 0x16, 0xc1, 0xd5, 0x0d, 0x0f, 0xc1, 0xd5, 0x32, 0x07,
+ 0xc1, 0xd5, 0x44, 0x05, 0xc1, 0xd5, 0x67, 0x0b, 0xc1, 0xd5, 0x7f, 0xc5,
+ 0xa4, 0x6b, 0x01, 0x78, 0x89, 0x12, 0xc1, 0xd5, 0x89, 0x19, 0xc1, 0xd5,
+ 0x9f, 0x14, 0xc1, 0xd5, 0xb9, 0x03, 0xc1, 0xd5, 0xd3, 0x09, 0xc1, 0xd5,
+ 0xeb, 0x04, 0xc1, 0xd6, 0x04, 0x10, 0xc1, 0xd6, 0x1e, 0x08, 0xc1, 0xd6,
+ 0x28, 0x42, 0x26, 0x94, 0xc1, 0xd6, 0x4a, 0xc3, 0x2a, 0x41, 0x01, 0x7b,
+ 0x21, 0x18, 0xc1, 0xd6, 0x54, 0xc6, 0xc5, 0xfb, 0x01, 0x7e, 0x40, 0x46,
+ 0x01, 0xab, 0x41, 0xd6, 0x60, 0x49, 0xb7, 0x50, 0xc1, 0xd6, 0x6c, 0xc2,
+ 0x00, 0x98, 0x0b, 0x7a, 0x50, 0x42, 0x00, 0xe5, 0xc1, 0xd6, 0x78, 0x15,
+ 0xc1, 0xd6, 0xa2, 0x87, 0x0b, 0x7a, 0x41, 0x42, 0x06, 0x66, 0xc1, 0xd6,
+ 0xb6, 0xc2, 0x00, 0x44, 0x0b, 0x78, 0x71, 0x83, 0x0b, 0x78, 0x50, 0x83,
+ 0x0b, 0x78, 0x83, 0x01, 0xd6, 0xc0, 0x1b, 0xc1, 0xd6, 0xc6, 0x09, 0xc1,
+ 0xd6, 0xd0, 0x10, 0xc1, 0xd6, 0xda, 0xc2, 0x01, 0x0e, 0x0b, 0x78, 0x88,
+ 0x1c, 0xc1, 0xd6, 0xe4, 0x42, 0x06, 0x66, 0xc1, 0xd6, 0xfa, 0xc2, 0x0c,
+ 0x25, 0x0b, 0x78, 0x79, 0x83, 0x0b, 0x78, 0x58, 0xc2, 0x18, 0x7a, 0x0b,
+ 0x7a, 0x31, 0x83, 0x0b, 0x79, 0xd1, 0xc2, 0x0e, 0xe5, 0x0b, 0x79, 0xa1,
+ 0xc2, 0x01, 0x0e, 0x0b, 0x79, 0x98, 0xc2, 0x00, 0x47, 0x0b, 0x7a, 0x29,
+ 0x83, 0x0b, 0x78, 0x08, 0xc2, 0x01, 0x0e, 0x0b, 0x7a, 0x21, 0x83, 0x0b,
+ 0x79, 0x30, 0x8a, 0x0b, 0x7a, 0x19, 0x47, 0x7a, 0xe7, 0x41, 0xd7, 0x04,
+ 0x1c, 0xc1, 0xd7, 0x14, 0x15, 0xc1, 0xd7, 0x22, 0x83, 0x0b, 0x79, 0xd9,
+ 0xc2, 0x01, 0x0e, 0x0b, 0x79, 0xa8, 0x16, 0xc1, 0xd7, 0x2c, 0xc4, 0xe7,
+ 0x53, 0x0b, 0x79, 0x89, 0xc2, 0x06, 0x6b, 0x0b, 0x79, 0x01, 0xc3, 0x3f,
+ 0x07, 0x0b, 0x78, 0x91, 0xc2, 0x07, 0x44, 0x0b, 0x78, 0x10, 0x0a, 0xc1,
+ 0xd7, 0x3a, 0x83, 0x0b, 0x78, 0xf8, 0xc2, 0x07, 0x69, 0x0b, 0x79, 0x11,
+ 0x83, 0x0b, 0x79, 0x08, 0x0a, 0xc1, 0xd7, 0x44, 0xc2, 0x1a, 0x36, 0x0b,
+ 0x78, 0xb9, 0x83, 0x0b, 0x78, 0xb0, 0xc2, 0x01, 0xa7, 0x0b, 0x78, 0x49,
+ 0x83, 0x0b, 0x78, 0x40, 0xc2, 0x01, 0x0e, 0x0b, 0x78, 0x29, 0x83, 0x0b,
+ 0x78, 0x20, 0xc2, 0x00, 0x96, 0x0b, 0x78, 0x19, 0x83, 0x0b, 0x78, 0x00,
+ 0x8b, 0x0b, 0x7c, 0x39, 0xc2, 0x13, 0x1d, 0x0b, 0x7b, 0xf9, 0xc2, 0x00,
+ 0x56, 0x0b, 0x7b, 0x81, 0xc2, 0x05, 0x1b, 0x0b, 0x7b, 0x79, 0x97, 0x0b,
+ 0x7b, 0x71, 0x83, 0x0b, 0x7b, 0x5a, 0x01, 0xd7, 0x4e, 0x91, 0x0b, 0x7b,
+ 0x2b, 0x01, 0xd7, 0x55, 0x89, 0x0b, 0x7c, 0x21, 0xc2, 0x00, 0x56, 0x0b,
+ 0x7b, 0x49, 0x97, 0x0b, 0x7b, 0x41, 0x8b, 0x0b, 0x7b, 0x39, 0x87, 0x0b,
+ 0x7b, 0x31, 0x83, 0x0b, 0x7b, 0x12, 0x01, 0xd7, 0x5b, 0x83, 0x0b, 0x7c,
+ 0x29, 0x8b, 0x0b, 0x7b, 0xd1, 0x94, 0x0b, 0x7b, 0xbb, 0x01, 0xd7, 0x62,
+ 0x90, 0x0b, 0x7a, 0xf2, 0x01, 0xd7, 0x66, 0x07, 0xc1, 0xd7, 0x6a, 0x89,
+ 0x0b, 0x7c, 0x09, 0x97, 0x0b, 0x7b, 0xe1, 0x91, 0x0b, 0x7a, 0xd0, 0xc2,
+ 0x00, 0x94, 0x0b, 0x7c, 0x01, 0x8b, 0x0b, 0x7b, 0x90, 0x89, 0x0b, 0x7b,
+ 0xf0, 0x97, 0x0b, 0x7b, 0xd9, 0x8b, 0x0b, 0x7b, 0xc9, 0x87, 0x0b, 0x7b,
+ 0x9b, 0x01, 0xd7, 0x72, 0x90, 0x0b, 0x7a, 0xbb, 0x01, 0xd7, 0x76, 0xc2,
+ 0x02, 0xe6, 0x0b, 0x7a, 0xb1, 0x83, 0x0b, 0x7a, 0xa8, 0x94, 0x0b, 0x7b,
+ 0xb0, 0x91, 0x0b, 0x7a, 0xd8, 0xca, 0x9d, 0xe0, 0x0b, 0x7a, 0x99, 0xc7,
+ 0xc5, 0xd8, 0x0b, 0x7a, 0x90, 0xc5, 0x1f, 0x01, 0x01, 0x12, 0x11, 0xc4,
+ 0x02, 0xcb, 0x01, 0x10, 0x92, 0x01, 0xd7, 0x7a, 0x4e, 0x70, 0xb0, 0xc1,
+ 0xd7, 0x7e, 0xcb, 0x5e, 0x74, 0x0f, 0xbd, 0x19, 0x46, 0x01, 0x31, 0xc1,
+ 0xd7, 0x8a, 0x04, 0xc1, 0xd7, 0x96, 0x44, 0x00, 0x47, 0xc1, 0xd7, 0xa2,
+ 0x43, 0x00, 0xcd, 0xc1, 0xd7, 0xae, 0x08, 0xc1, 0xd7, 0xba, 0xcc, 0x02,
+ 0x5b, 0x01, 0x3a, 0xc9, 0x15, 0xc1, 0xd7, 0xcc, 0xd2, 0x4a, 0xc4, 0x01,
+ 0x02, 0xf9, 0x46, 0x0e, 0x97, 0x41, 0xd7, 0xe4, 0xc5, 0x00, 0xea, 0x01,
+ 0x72, 0x61, 0xd0, 0x0f, 0x62, 0x01, 0x72, 0x99, 0xcd, 0x2d, 0xa6, 0x01,
+ 0x72, 0xa0, 0xca, 0xa3, 0x9e, 0x0b, 0x74, 0xc9, 0x4c, 0x27, 0x72, 0x41,
+ 0xd7, 0xf0, 0xc4, 0x00, 0xeb, 0x0b, 0x74, 0xb9, 0x4e, 0x01, 0xf8, 0x41,
+ 0xd8, 0x6a, 0x16, 0xc1, 0xd8, 0xe4, 0xc3, 0x05, 0x17, 0x0b, 0x74, 0x0b,
+ 0x01, 0xd8, 0xf6, 0xc4, 0x24, 0x35, 0x0b, 0x74, 0x49, 0xc5, 0x05, 0x1b,
+ 0x0b, 0x74, 0x41, 0x15, 0xc1, 0xd8, 0xfc, 0x08, 0xc1, 0xd9, 0x08, 0xc4,
+ 0x16, 0x57, 0x0b, 0x74, 0x00, 0xc8, 0x4c, 0xbc, 0x0b, 0x74, 0x99, 0x07,
+ 0xc1, 0xd9, 0x14, 0x15, 0xc1, 0xd9, 0x20, 0x08, 0xc1, 0xd9, 0x2c, 0x16,
+ 0x41, 0xd9, 0x38, 0xc8, 0xbe, 0x13, 0x01, 0x1e, 0xc1, 0xc6, 0xd0, 0xb2,
+ 0x01, 0x1e, 0xb9, 0x4a, 0xa7, 0x72, 0x41, 0xd9, 0x4a, 0xca, 0xa9, 0xca,
+ 0x01, 0x1e, 0xa1, 0xc5, 0x2f, 0xd9, 0x01, 0x1e, 0x90, 0x1d, 0xc1, 0xd9,
+ 0x56, 0x1e, 0x41, 0xd9, 0x7e, 0xc3, 0x05, 0x17, 0x0f, 0x46, 0x39, 0x16,
+ 0xc1, 0xd9, 0xa6, 0x08, 0xc1, 0xd9, 0xb2, 0x15, 0xc1, 0xd9, 0xbe, 0xc5,
+ 0x05, 0x1b, 0x0f, 0x46, 0x71, 0xc4, 0x24, 0x35, 0x0f, 0x46, 0x78, 0x16,
+ 0xc1, 0xd9, 0xca, 0x47, 0x0d, 0x7f, 0xc1, 0xd9, 0xd4, 0xc8, 0x36, 0xde,
+ 0x0f, 0x46, 0xb0, 0x49, 0x54, 0xdd, 0xc1, 0xd9, 0xde, 0x47, 0x37, 0x49,
+ 0xc1, 0xd9, 0xfa, 0x0e, 0x41, 0xda, 0x21, 0x4a, 0x9e, 0x3a, 0xc1, 0xda,
+ 0x2d, 0x47, 0x01, 0xff, 0x41, 0xda, 0x37, 0x00, 0x41, 0xda, 0x8f, 0xc2,
+ 0x0a, 0x20, 0x05, 0x5f, 0x91, 0xc4, 0x05, 0xde, 0x05, 0x5f, 0x98, 0xc3,
+ 0x08, 0xde, 0x05, 0x5f, 0xa1, 0xc3, 0x0d, 0x8f, 0x05, 0x5f, 0xa8, 0xc2,
+ 0x22, 0x45, 0x05, 0x5f, 0xb1, 0xc4, 0x15, 0xa7, 0x05, 0x5f, 0xb8, 0xc4,
+ 0x90, 0x86, 0x05, 0x5f, 0x51, 0xc7, 0xc4, 0x57, 0x05, 0x5f, 0x49, 0xc5,
+ 0xd9, 0xa0, 0x05, 0x5f, 0x31, 0x03, 0xc1, 0xda, 0x9b, 0x0b, 0xc1, 0xda,
+ 0xa9, 0xc4, 0xd7, 0x34, 0x05, 0x5f, 0x19, 0xc7, 0x45, 0xcd, 0x05, 0x57,
+ 0xa9, 0x17, 0xc1, 0xda, 0xb3, 0xc6, 0xd6, 0xdc, 0x05, 0x5f, 0x38, 0x8b,
+ 0x05, 0x5e, 0x7b, 0x01, 0xda, 0xbd, 0x10, 0xc1, 0xda, 0xc3, 0x16, 0xc1,
+ 0xda, 0xdf, 0x12, 0xc1, 0xda, 0xf2, 0x0d, 0xc1, 0xda, 0xff, 0x04, 0xc1,
+ 0xdb, 0x0e, 0x06, 0xc1, 0xdb, 0x18, 0x09, 0xc1, 0xdb, 0x28, 0x15, 0xc1,
+ 0xdb, 0x34, 0x42, 0x11, 0x3f, 0xc1, 0xdb, 0x46, 0x91, 0x05, 0x57, 0x09,
+ 0x87, 0x05, 0x57, 0x01, 0xc3, 0x04, 0x3e, 0x05, 0x5e, 0xa1, 0xc5, 0xdd,
+ 0x9c, 0x05, 0x5e, 0x89, 0xc2, 0x0b, 0xfd, 0x05, 0x5e, 0x71, 0xc3, 0xd1,
+ 0xdb, 0x05, 0x5e, 0x69, 0xc4, 0xad, 0x7b, 0x05, 0x5e, 0x61, 0xc3, 0x2b,
+ 0x48, 0x05, 0x5e, 0x1b, 0x01, 0xdb, 0x50, 0xc3, 0x02, 0xd9, 0x05, 0x5e,
+ 0x13, 0x01, 0xdb, 0x56, 0xc3, 0x48, 0x25, 0x05, 0x5e, 0x59, 0x0c, 0x41,
+ 0xdb, 0x5c, 0xc7, 0xca, 0xa8, 0x0f, 0xb7, 0xa9, 0xc4, 0xd4, 0x9e, 0x0f,
+ 0xb7, 0x28, 0x00, 0x41, 0xdb, 0x68, 0xc4, 0x01, 0xa7, 0x0f, 0xa1, 0x69,
+ 0xc4, 0xd5, 0xd4, 0x0f, 0xd5, 0x20, 0x47, 0x01, 0xff, 0xc1, 0xdb, 0x7a,
+ 0xd9, 0x1f, 0x9c, 0x05, 0x5a, 0xd8, 0x06, 0xc1, 0xdb, 0xc0, 0x45, 0x02,
+ 0xcb, 0xc1, 0xdb, 0xd2, 0xd1, 0x56, 0x0f, 0x08, 0xb2, 0x19, 0x4b, 0x6f,
+ 0x71, 0xc1, 0xdb, 0xe2, 0x47, 0x01, 0xff, 0x41, 0xdc, 0x02, 0xc5, 0x67,
+ 0xe4, 0x0e, 0x98, 0x01, 0x1b, 0x41, 0xdc, 0x67, 0x46, 0x41, 0xe6, 0xc1,
+ 0xdc, 0x73, 0xd9, 0x20, 0xaf, 0x08, 0xb3, 0x19, 0xcf, 0x67, 0x35, 0x00,
+ 0xc0, 0x30, 0xca, 0x09, 0x68, 0x08, 0xb3, 0x4b, 0x01, 0xdc, 0x79, 0xdc,
+ 0x13, 0x52, 0x00, 0xc0, 0x38, 0xd5, 0x09, 0x72, 0x08, 0xb3, 0x40, 0x46,
+ 0x01, 0xab, 0x41, 0xdc, 0x7f, 0x46, 0x01, 0xab, 0x41, 0xdc, 0x8b, 0xd9,
+ 0x1e, 0x25, 0x08, 0xb3, 0x11, 0x45, 0x08, 0xd8, 0x41, 0xdc, 0x97, 0xc2,
+ 0x00, 0x3f, 0x00, 0xc1, 0x73, 0x01, 0xdc, 0xbb, 0x83, 0x00, 0xc1, 0x03,
+ 0x01, 0xdc, 0xc1, 0x16, 0xc1, 0xdc, 0xcd, 0x42, 0x11, 0x3f, 0xc1, 0xdc,
+ 0xdd, 0x15, 0xc1, 0xdc, 0xe8, 0x1c, 0xc1, 0xdc, 0xf8, 0x0e, 0xc1, 0xdd,
+ 0x08, 0xc3, 0x3b, 0xb0, 0x00, 0xc1, 0xf1, 0x0d, 0xc1, 0xdd, 0x12, 0xc2,
+ 0x01, 0xa7, 0x00, 0xc1, 0xc9, 0xc2, 0x00, 0x4c, 0x00, 0xc1, 0xc1, 0xc2,
+ 0x00, 0x9a, 0x00, 0xc1, 0xb9, 0xc2, 0x1a, 0x36, 0x00, 0xc1, 0xb1, 0xc2,
+ 0x26, 0x94, 0x00, 0xc1, 0xa9, 0xc2, 0x0c, 0x25, 0x00, 0xc1, 0x99, 0xc2,
+ 0x07, 0x69, 0x00, 0xc1, 0x69, 0xc2, 0x0e, 0x13, 0x00, 0xc1, 0x61, 0xc2,
+ 0x07, 0x44, 0x00, 0xc1, 0x59, 0xc2, 0x02, 0x1d, 0x00, 0xc1, 0x51, 0xc2,
+ 0x01, 0x01, 0x00, 0xc1, 0x41, 0x87, 0x00, 0xc1, 0x0b, 0x01, 0xdd, 0x1c,
+ 0x97, 0x00, 0xc1, 0x23, 0x01, 0xdd, 0x20, 0x91, 0x00, 0xc1, 0x1b, 0x01,
+ 0xdd, 0x24, 0x8b, 0x00, 0xc1, 0x10, 0x57, 0x27, 0x54, 0xc1, 0xdd, 0x28,
+ 0xc8, 0x39, 0xc0, 0x00, 0xc0, 0x29, 0xc8, 0x11, 0x48, 0x00, 0xc0, 0x18,
+ 0xc9, 0x11, 0x47, 0x00, 0xc0, 0x49, 0xc5, 0x00, 0xea, 0x00, 0xc0, 0x40,
+ 0xc3, 0x0c, 0x34, 0x00, 0xc0, 0x21, 0xc3, 0x00, 0xec, 0x00, 0xc0, 0x10,
+ 0xca, 0xa7, 0xa4, 0x0f, 0xa5, 0xc1, 0xc3, 0x2d, 0xfe, 0x0f, 0xa5, 0x80,
+ 0x48, 0x01, 0xf7, 0xc1, 0xdd, 0x38, 0x12, 0xc1, 0xdd, 0xd9, 0xca, 0xa3,
+ 0xf8, 0x0e, 0xb8, 0xd1, 0xcc, 0x8f, 0x28, 0x0e, 0xb8, 0xc1, 0xcc, 0x8b,
+ 0xec, 0x0e, 0xb8, 0xb9, 0xce, 0x12, 0x64, 0x0e, 0xb8, 0xb1, 0x46, 0x00,
+ 0x3e, 0xc1, 0xdd, 0xeb, 0xc5, 0xdf, 0xef, 0x0e, 0xb7, 0xd8, 0x15, 0xc1,
+ 0xde, 0x8b, 0x46, 0x08, 0xd7, 0xc1, 0xde, 0x97, 0x48, 0x01, 0xf7, 0xc1,
+ 0xde, 0xbb, 0x47, 0xca, 0x00, 0xc1, 0xdf, 0x5c, 0x12, 0xc1, 0xdf, 0x8a,
+ 0xca, 0xa3, 0xf8, 0x0e, 0xb7, 0x01, 0xcc, 0x8f, 0x28, 0x0e, 0xb6, 0xf1,
+ 0xcc, 0x8b, 0xec, 0x0e, 0xb6, 0xe9, 0xce, 0x12, 0x64, 0x0e, 0xb6, 0xe1,
+ 0xc5, 0xdf, 0xef, 0x0e, 0xb6, 0x09, 0x48, 0xc1, 0x93, 0x41, 0xdf, 0x9c,
+ 0x46, 0x08, 0xd7, 0xc1, 0xdf, 0xa8, 0x46, 0x00, 0x3e, 0xc1, 0xdf, 0xcc,
+ 0x48, 0x01, 0xf7, 0x41, 0xe0, 0x34, 0x4a, 0x40, 0x85, 0xc1, 0xe0, 0x9c,
+ 0x46, 0x08, 0x2f, 0x41, 0xe0, 0xba, 0x46, 0x08, 0xd7, 0xc1, 0xe0, 0xc6,
+ 0x46, 0x00, 0x3e, 0xc1, 0xe0, 0xea, 0x48, 0x01, 0xf7, 0x41, 0xe1, 0x52,
+ 0x47, 0xc1, 0x94, 0xc1, 0xe1, 0x9e, 0xcf, 0x34, 0x4b, 0x01, 0x3e, 0x68,
+ 0x44, 0x01, 0x19, 0xc1, 0xe1, 0xaa, 0xcd, 0x28, 0xf2, 0x01, 0x3e, 0x58,
+ 0xd5, 0x37, 0xd2, 0x01, 0x3f, 0x71, 0x46, 0x01, 0x31, 0xc1, 0xe1, 0xc2,
+ 0xd4, 0x3a, 0x82, 0x01, 0x3f, 0x51, 0xcd, 0x0f, 0x83, 0x01, 0x3f, 0x40,
+ 0xc3, 0x03, 0x2c, 0x0e, 0x97, 0x90, 0xc4, 0x12, 0xf2, 0x0e, 0x97, 0x88,
+ 0xc4, 0x12, 0xf2, 0x0e, 0x97, 0x80, 0xc5, 0x12, 0xf1, 0x0e, 0x97, 0x79,
+ 0xc2, 0x00, 0x29, 0x0e, 0x97, 0x28, 0xc4, 0x12, 0xf2, 0x0e, 0x97, 0x70,
+ 0xc6, 0x55, 0xbb, 0x0e, 0x97, 0x69, 0xc3, 0x05, 0xdf, 0x0e, 0x97, 0x18,
+ 0xc4, 0x25, 0x4d, 0x0e, 0x97, 0x61, 0x91, 0x0e, 0x97, 0x10, 0x91, 0x08,
+ 0xf7, 0xb1, 0x87, 0x08, 0xf7, 0xa9, 0x97, 0x08, 0xf7, 0xa1, 0x8b, 0x08,
+ 0xf7, 0x98, 0x83, 0x08, 0xf7, 0x89, 0xc2, 0x0e, 0xe5, 0x08, 0xf7, 0x81,
+ 0xc2, 0x06, 0x8c, 0x08, 0xf7, 0x79, 0xc2, 0x00, 0x96, 0x08, 0xf7, 0x71,
+ 0xc2, 0x00, 0x9a, 0x08, 0xf7, 0x69, 0xc2, 0x1a, 0x36, 0x08, 0xf7, 0x61,
+ 0x10, 0xc1, 0xe1, 0xce, 0xc2, 0x26, 0x94, 0x08, 0xf7, 0x51, 0xc2, 0x00,
+ 0x2e, 0x08, 0xf7, 0x49, 0xc2, 0x0c, 0x25, 0x08, 0xf7, 0x39, 0xc2, 0x00,
+ 0x44, 0x08, 0xf7, 0x31, 0xc2, 0x00, 0x3f, 0x08, 0xf7, 0x29, 0xc2, 0x02,
+ 0x1d, 0x08, 0xf7, 0x21, 0xc2, 0x07, 0x44, 0x08, 0xf7, 0x19, 0xc2, 0x07,
+ 0x69, 0x08, 0xf7, 0x09, 0xc2, 0x06, 0x6b, 0x08, 0xf7, 0x00, 0xc4, 0x24,
+ 0x35, 0x08, 0xea, 0xc9, 0xc5, 0x05, 0x1b, 0x08, 0xea, 0xc1, 0x15, 0xc1,
+ 0xe1, 0xde, 0x08, 0xc1, 0xe1, 0xea, 0x16, 0xc1, 0xe1, 0xf6, 0xc3, 0x05,
+ 0x17, 0x08, 0xea, 0x89, 0xc4, 0x16, 0x57, 0x08, 0xea, 0x80, 0xc6, 0xd2,
+ 0x2c, 0x08, 0xea, 0x39, 0xc4, 0xa6, 0x61, 0x08, 0xea, 0x30, 0xc5, 0x21,
+ 0x27, 0x08, 0xea, 0x29, 0x4a, 0x6f, 0x72, 0x41, 0xe2, 0x02, 0xc7, 0xc5,
+ 0x0d, 0x08, 0xea, 0x21, 0xc6, 0x21, 0x1a, 0x08, 0xea, 0x19, 0xc5, 0x33,
+ 0x1a, 0x08, 0xea, 0x11, 0xc7, 0x45, 0xcd, 0x08, 0xea, 0x09, 0xc8, 0x10,
+ 0xab, 0x08, 0xea, 0x00, 0x16, 0xc1, 0xe2, 0x22, 0x0c, 0xc1, 0xe2, 0x36,
+ 0x0d, 0xc1, 0xe2, 0x46, 0x0e, 0xc1, 0xe2, 0x56, 0xc2, 0x01, 0x0e, 0x08,
+ 0xe9, 0x61, 0x15, 0xc1, 0xe2, 0x60, 0xc2, 0x06, 0x8c, 0x08, 0xe9, 0x41,
+ 0xc2, 0x00, 0x9a, 0x08, 0xe9, 0x31, 0xc2, 0x1a, 0x36, 0x08, 0xe9, 0x29,
+ 0xc2, 0x00, 0x3f, 0x08, 0xe9, 0x21, 0x04, 0xc1, 0xe2, 0x70, 0x12, 0xc1,
+ 0xe2, 0x7a, 0x10, 0xc1, 0xe2, 0x84, 0x06, 0xc1, 0xe2, 0x9a, 0x05, 0xc1,
+ 0xe2, 0xa8, 0x09, 0xc1, 0xe2, 0xb2, 0x83, 0x08, 0xe8, 0x03, 0x01, 0xe2,
+ 0xbc, 0x91, 0x08, 0xe8, 0x49, 0x87, 0x08, 0xe8, 0x31, 0x97, 0x08, 0xe8,
+ 0x23, 0x01, 0xe2, 0xc8, 0x8b, 0x08, 0xe8, 0x12, 0x01, 0xe2, 0xcc, 0x44,
+ 0x02, 0xcc, 0xc1, 0xe2, 0xd0, 0x50, 0x5d, 0xcf, 0x41, 0xe2, 0xdc, 0x91,
+ 0x08, 0xe5, 0xa1, 0x87, 0x08, 0xe5, 0x99, 0x97, 0x08, 0xe5, 0x91, 0x8b,
+ 0x08, 0xe5, 0x89, 0xc2, 0x0e, 0x30, 0x08, 0xe5, 0x80, 0x83, 0x08, 0xe4,
+ 0x79, 0xc2, 0x01, 0x0e, 0x08, 0xe4, 0x71, 0x15, 0xc1, 0xe3, 0x36, 0xc2,
+ 0x00, 0x96, 0x08, 0xe4, 0x59, 0xc2, 0x00, 0x9a, 0x08, 0xe4, 0x51, 0xc2,
+ 0x1a, 0x36, 0x08, 0xe4, 0x49, 0xc2, 0x00, 0x02, 0x08, 0xe4, 0x41, 0x1c,
+ 0xc1, 0xe3, 0x40, 0xc2, 0x00, 0x4c, 0x08, 0xe4, 0x29, 0x06, 0xc1, 0xe3,
+ 0x4a, 0x16, 0xc1, 0xe3, 0x54, 0xc2, 0x00, 0x3f, 0x08, 0xe4, 0x09, 0xc2,
+ 0x02, 0x1d, 0x08, 0xe4, 0x01, 0x12, 0xc1, 0xe3, 0x62, 0x10, 0xc1, 0xe3,
+ 0x6c, 0xc2, 0x26, 0x94, 0x08, 0xe3, 0xc1, 0x05, 0xc1, 0xe3, 0x7c, 0xc2,
+ 0x07, 0x69, 0x08, 0xe3, 0xa1, 0x0d, 0x41, 0xe3, 0x86, 0xd8, 0x23, 0xe4,
+ 0x01, 0x35, 0x39, 0xc4, 0x02, 0xcb, 0x01, 0x35, 0x30, 0x05, 0xc1, 0xe3,
+ 0x90, 0x03, 0xc1, 0xe3, 0xa2, 0x18, 0xc1, 0xe3, 0xae, 0xc4, 0x09, 0x30,
+ 0x00, 0x6a, 0x78, 0x18, 0xc1, 0xe3, 0xb8, 0x83, 0x00, 0x68, 0x2b, 0x01,
+ 0xe3, 0xc8, 0x8b, 0x00, 0x68, 0x3b, 0x01, 0xe3, 0xda, 0x97, 0x00, 0x68,
+ 0x4b, 0x01, 0xe3, 0xde, 0x87, 0x00, 0x68, 0x73, 0x01, 0xe3, 0xe2, 0x91,
+ 0x00, 0x68, 0x93, 0x01, 0xe3, 0xe6, 0x0d, 0xc1, 0xe3, 0xea, 0x09, 0xc1,
+ 0xe3, 0xf4, 0x10, 0xc1, 0xe3, 0xfe, 0x05, 0xc1, 0xe4, 0x12, 0x0c, 0xc1,
+ 0xe4, 0x1a, 0x16, 0xc1, 0xe4, 0x24, 0x06, 0xc1, 0xe4, 0x32, 0x12, 0xc1,
+ 0xe4, 0x46, 0x04, 0xc1, 0xe4, 0x50, 0xc2, 0x00, 0x3f, 0x00, 0x69, 0x71,
+ 0xc2, 0x1a, 0x36, 0x00, 0x69, 0x79, 0x14, 0xc1, 0xe4, 0x5a, 0x0e, 0xc1,
+ 0xe4, 0x64, 0x15, 0xc1, 0xe4, 0x6c, 0xc2, 0x01, 0x0e, 0x00, 0x69, 0xc8,
+ 0x03, 0xc1, 0xe4, 0x7c, 0x8b, 0x00, 0x69, 0xfb, 0x01, 0xe4, 0x88, 0x97,
+ 0x00, 0x6a, 0x0b, 0x01, 0xe4, 0x8c, 0x48, 0xb7, 0xd7, 0xc1, 0xe4, 0x90,
+ 0x87, 0x00, 0x6a, 0x33, 0x01, 0xe4, 0x9e, 0x91, 0x00, 0x6a, 0x52, 0x01,
+ 0xe4, 0xa2, 0x44, 0x05, 0x17, 0xc1, 0xe4, 0xa6, 0x46, 0x05, 0xdd, 0x41,
+ 0xe4, 0xcc, 0x45, 0x08, 0xd8, 0xc1, 0xe4, 0xe4, 0xc8, 0xbe, 0x73, 0x00,
+ 0x6b, 0xc8, 0xc3, 0x03, 0x01, 0x00, 0x6b, 0x81, 0x44, 0x05, 0x17, 0x41,
+ 0xe5, 0x08, 0xcb, 0x94, 0xb4, 0x08, 0x57, 0xb1, 0xc8, 0x0a, 0x1f, 0x08,
+ 0x57, 0xa9, 0x42, 0x00, 0x68, 0xc1, 0xe5, 0x14, 0xc7, 0x30, 0x2a, 0x08,
+ 0x57, 0x89, 0xc4, 0x0e, 0xa5, 0x08, 0x57, 0x80, 0xc3, 0x05, 0x17, 0x08,
+ 0x57, 0x5b, 0x01, 0xe5, 0x21, 0x16, 0xc1, 0xe5, 0x27, 0xc4, 0x0d, 0x8e,
+ 0x08, 0x57, 0x60, 0xc5, 0x00, 0x34, 0x08, 0x57, 0x31, 0xc5, 0x03, 0x50,
+ 0x08, 0x57, 0x28, 0x16, 0xc1, 0xe5, 0x33, 0x15, 0xc1, 0xe5, 0x45, 0xc4,
+ 0x5d, 0xef, 0x08, 0x57, 0x09, 0x13, 0xc1, 0xe5, 0x55, 0x1a, 0xc1, 0xe5,
+ 0x61, 0xc2, 0x13, 0xfc, 0x08, 0x56, 0xe1, 0xc2, 0x03, 0x07, 0x08, 0x56,
+ 0xd9, 0x03, 0xc1, 0xe5, 0x6d, 0xc3, 0x21, 0x00, 0x08, 0x56, 0xb9, 0xc3,
+ 0x04, 0xae, 0x08, 0x56, 0xb1, 0x06, 0xc1, 0xe5, 0x7f, 0xc6, 0xd7, 0x12,
+ 0x08, 0x56, 0x99, 0x0d, 0xc1, 0xe5, 0x8b, 0xc4, 0x4d, 0x48, 0x08, 0x56,
+ 0x79, 0xc2, 0x00, 0x5b, 0x08, 0x56, 0x33, 0x01, 0xe5, 0x97, 0x0c, 0xc1,
+ 0xe5, 0x9d, 0x1c, 0xc1, 0xe5, 0xa9, 0xc3, 0x7c, 0xad, 0x08, 0x56, 0x39,
+ 0x09, 0xc1, 0xe5, 0xb5, 0x04, 0x41, 0xe5, 0xc1, 0xd8, 0x25, 0x04, 0x0f,
+ 0xab, 0xa1, 0xc6, 0xd4, 0x78, 0x0f, 0xc9, 0xa8, 0xc6, 0xd3, 0x70, 0x0f,
+ 0xa3, 0x99, 0xca, 0x9e, 0xee, 0x0f, 0xa3, 0x90, 0x03, 0xc1, 0xe5, 0xcd,
+ 0xc3, 0xa4, 0xc6, 0x00, 0x42, 0xb9, 0xc8, 0xbb, 0x73, 0x00, 0x42, 0xb1,
+ 0x0b, 0xc1, 0xe6, 0x14, 0xc7, 0xbb, 0x74, 0x00, 0x42, 0x29, 0xc5, 0xde,
+ 0x0a, 0x00, 0x42, 0x00, 0xcc, 0x8d, 0x18, 0x08, 0x8b, 0xb1, 0x46, 0x02,
+ 0x00, 0x41, 0xe6, 0x1c, 0xcb, 0x25, 0x87, 0x08, 0x8b, 0xa9, 0xc9, 0xae,
+ 0xe0, 0x08, 0x8b, 0x98, 0xc5, 0x05, 0xbb, 0x0f, 0x81, 0x49, 0xc8, 0xbc,
+ 0xab, 0x0f, 0x80, 0x11, 0xcb, 0x92, 0x78, 0x0f, 0x80, 0x30, 0xc8, 0xbd,
+ 0xcb, 0x0f, 0x80, 0x01, 0x48, 0xb2, 0x1d, 0x41, 0xe6, 0x76, 0xc9, 0xb5,
+ 0xdf, 0x0f, 0x80, 0x09, 0x46, 0xd8, 0x86, 0xc1, 0xe6, 0x80, 0x48, 0xba,
+ 0xd3, 0xc1, 0xe6, 0x8a, 0xc5, 0xcf, 0x09, 0x0f, 0x81, 0x31, 0xc5, 0xe0,
+ 0x35, 0x0f, 0x81, 0x38, 0xc9, 0xb7, 0x59, 0x0f, 0x80, 0x19, 0x47, 0xbf,
+ 0x04, 0x41, 0xe6, 0x94, 0x46, 0xbf, 0x05, 0xc1, 0xe6, 0x9e, 0xc5, 0xd9,
+ 0xe1, 0x0f, 0x81, 0x18, 0x46, 0xd1, 0x48, 0xc1, 0xe6, 0xa8, 0x48, 0xc0,
+ 0x13, 0x41, 0xe6, 0xb2, 0x47, 0xc7, 0xb4, 0xc1, 0xe6, 0xbc, 0x47, 0x9a,
+ 0x75, 0x41, 0xe6, 0xc6, 0xc2, 0x00, 0xbb, 0x0f, 0x81, 0x59, 0xc4, 0x99,
+ 0xac, 0x0f, 0x81, 0x20, 0x15, 0xc1, 0xe6, 0xd0, 0xc8, 0x9f, 0xcc, 0x0f,
+ 0x9d, 0xcb, 0x01, 0xe6, 0xdc, 0xc4, 0x23, 0x1f, 0x0f, 0x9d, 0xa8, 0xca,
+ 0xa8, 0x6c, 0x01, 0x33, 0x79, 0xcc, 0x86, 0x64, 0x01, 0x33, 0x71, 0xc9,
+ 0xb5, 0x3d, 0x01, 0x33, 0x68, 0x48, 0x20, 0x39, 0xc1, 0xe6, 0xe2, 0xcf,
+ 0x6b, 0xd6, 0x0f, 0x9d, 0xb0, 0x00, 0x41, 0xe6, 0xef, 0x14, 0xc1, 0xe6,
+ 0xfb, 0xc2, 0x01, 0x0e, 0x08, 0x95, 0x31, 0xc2, 0x0e, 0xe5, 0x08, 0x95,
+ 0x29, 0xc2, 0x06, 0x8c, 0x08, 0x95, 0x21, 0xc2, 0x00, 0x96, 0x08, 0x95,
+ 0x19, 0xc2, 0x1a, 0x36, 0x08, 0x95, 0x09, 0xc2, 0x00, 0x3f, 0x08, 0x95,
+ 0x01, 0x04, 0xc1, 0xe7, 0x0b, 0x12, 0xc1, 0xe7, 0x15, 0x10, 0xc1, 0xe7,
+ 0x1f, 0x06, 0xc1, 0xe7, 0x2f, 0x16, 0xc1, 0xe7, 0x3d, 0x0c, 0xc1, 0xe7,
+ 0x4b, 0x05, 0xc1, 0xe7, 0x55, 0x09, 0xc1, 0xe7, 0x5f, 0x0d, 0xc1, 0xe7,
+ 0x69, 0x87, 0x08, 0x94, 0x19, 0x83, 0x08, 0x94, 0x01, 0x8b, 0x08, 0x94,
+ 0x09, 0x97, 0x08, 0x94, 0x10, 0xc4, 0x15, 0xa7, 0x0b, 0x53, 0x39, 0xc2,
+ 0x22, 0x45, 0x0b, 0x53, 0x30, 0xc3, 0x0d, 0x8f, 0x0b, 0x53, 0x29, 0xc3,
+ 0x08, 0xde, 0x0b, 0x53, 0x20, 0xc4, 0x05, 0xde, 0x0b, 0x53, 0x19, 0xc2,
+ 0x0a, 0x20, 0x0b, 0x53, 0x10, 0xc3, 0x3b, 0x0b, 0x05, 0x53, 0xe9, 0xc3,
+ 0x82, 0xa4, 0x05, 0x53, 0xe0, 0x44, 0x03, 0x4c, 0xc1, 0xe7, 0x73, 0xc6,
+ 0x00, 0xe1, 0x00, 0x82, 0x58, 0xc7, 0x10, 0xac, 0x00, 0x81, 0xb1, 0xc3,
+ 0x41, 0xd1, 0x00, 0x81, 0xd0, 0xc5, 0x45, 0xcf, 0x00, 0x81, 0xc1, 0xc4,
+ 0x21, 0x28, 0x00, 0x81, 0xc8, 0xc3, 0x83, 0x28, 0x00, 0x83, 0x49, 0xc3,
+ 0x82, 0xa4, 0x00, 0x83, 0x51, 0xc3, 0x82, 0xec, 0x00, 0x83, 0x59, 0xc3,
+ 0x82, 0xe0, 0x00, 0x83, 0x61, 0xc3, 0x3b, 0x0b, 0x00, 0x83, 0x68, 0xc3,
+ 0x83, 0x28, 0x00, 0x84, 0xd1, 0xc3, 0x82, 0xec, 0x00, 0x84, 0xd8, 0x45,
+ 0xca, 0x69, 0xc1, 0xe7, 0x85, 0xcd, 0x80, 0x7c, 0x00, 0x82, 0x70, 0xc3,
+ 0x05, 0x17, 0x00, 0x84, 0xf1, 0xcb, 0x0f, 0x62, 0x00, 0x84, 0xf8, 0xc2,
+ 0x0a, 0x20, 0x00, 0x84, 0x91, 0xc4, 0x05, 0xde, 0x00, 0x84, 0x98, 0xc3,
+ 0x08, 0xde, 0x00, 0x84, 0xa1, 0xc3, 0x0d, 0x8f, 0x00, 0x84, 0xa8, 0xc2,
+ 0x22, 0x45, 0x00, 0x84, 0xb1, 0xc4, 0x15, 0xa7, 0x00, 0x84, 0xb8, 0xc7,
+ 0xca, 0x69, 0x05, 0x53, 0xd1, 0x97, 0x00, 0x81, 0x50, 0xc2, 0x01, 0x0e,
+ 0x00, 0x80, 0x0b, 0x01, 0xe7, 0x97, 0x83, 0x00, 0x80, 0x00, 0x83, 0x00,
+ 0x80, 0x83, 0x01, 0xe7, 0x9d, 0x16, 0xc1, 0xe7, 0xa3, 0xc2, 0x01, 0x0e,
+ 0x00, 0x80, 0x88, 0x0a, 0xc1, 0xe7, 0xad, 0x83, 0x00, 0x80, 0xf1, 0xc2,
+ 0x0e, 0xe5, 0x00, 0x82, 0x89, 0xcd, 0x77, 0x0a, 0x00, 0x83, 0x08, 0x83,
+ 0x00, 0x80, 0x11, 0xc2, 0x01, 0x0e, 0x00, 0x80, 0x19, 0xc7, 0xc0, 0xac,
+ 0x00, 0x81, 0xf8, 0xc2, 0x07, 0x69, 0x00, 0x80, 0x21, 0xc2, 0x1a, 0x36,
+ 0x00, 0x80, 0x49, 0x10, 0xc1, 0xe7, 0xba, 0x83, 0x00, 0x80, 0xa0, 0x83,
+ 0x00, 0x80, 0x29, 0xc2, 0x01, 0x0e, 0x00, 0x80, 0x30, 0x83, 0x00, 0x80,
+ 0x39, 0xc2, 0x01, 0x0e, 0x00, 0x80, 0x40, 0x06, 0xc1, 0xe7, 0xc4, 0x83,
+ 0x00, 0x80, 0x91, 0xc2, 0x01, 0x0e, 0x00, 0x80, 0x98, 0x83, 0x00, 0x80,
+ 0xa9, 0xc2, 0x01, 0x0e, 0x00, 0x80, 0xb0, 0x83, 0x00, 0x80, 0xb9, 0xc2,
+ 0x01, 0x0e, 0x00, 0x80, 0xc0, 0x83, 0x00, 0x80, 0xc9, 0x43, 0x00, 0xf7,
+ 0x41, 0xe7, 0xce, 0x83, 0x00, 0x80, 0xd9, 0xcf, 0x62, 0x94, 0x00, 0x84,
+ 0x70, 0x83, 0x00, 0x80, 0xe1, 0xc2, 0x00, 0x96, 0x00, 0x81, 0x00, 0x83,
+ 0x00, 0x80, 0xe9, 0x51, 0x32, 0xe7, 0x41, 0xe7, 0xe4, 0x8b, 0x00, 0x81,
+ 0x20, 0x97, 0x00, 0x81, 0x30, 0x51, 0x52, 0x57, 0x41, 0xe7, 0xf0, 0x94,
+ 0x00, 0x82, 0x93, 0x01, 0xe8, 0x02, 0x8e, 0x00, 0x82, 0xa2, 0x01, 0xe8,
+ 0x06, 0xc4, 0x15, 0xa7, 0x05, 0x4f, 0xb9, 0xc2, 0x22, 0x45, 0x05, 0x4f,
+ 0xb0, 0xc3, 0x0d, 0x8f, 0x05, 0x4f, 0xa9, 0xc3, 0x08, 0xde, 0x05, 0x4f,
+ 0xa0, 0xc4, 0x05, 0xde, 0x05, 0x4f, 0x99, 0xc2, 0x0a, 0x20, 0x05, 0x4f,
+ 0x90, 0xc5, 0xdd, 0xfb, 0x00, 0x84, 0xe2, 0x01, 0xe8, 0x0a, 0x94, 0x00,
+ 0x82, 0xb8, 0x8e, 0x00, 0x82, 0xc8, 0xc2, 0x0e, 0x30, 0x00, 0x84, 0x19,
+ 0x87, 0x00, 0x84, 0x23, 0x01, 0xe8, 0x0e, 0xc7, 0xc9, 0x0b, 0x00, 0x84,
+ 0x30, 0xc2, 0x1a, 0x36, 0x00, 0x81, 0xd9, 0xc2, 0x00, 0x9a, 0x00, 0x81,
+ 0xe1, 0xc2, 0x00, 0x4c, 0x00, 0x81, 0xe9, 0xc2, 0x01, 0x0e, 0x00, 0x81,
+ 0xf0, 0xc2, 0x01, 0x01, 0x00, 0x82, 0xf1, 0xc2, 0x00, 0x3f, 0x00, 0x82,
+ 0xf9, 0xc2, 0x00, 0x96, 0x00, 0x83, 0x00, 0xca, 0xa9, 0x34, 0x0f, 0xd2,
+ 0x53, 0x01, 0xe8, 0x14, 0xc5, 0xad, 0xae, 0x0f, 0xd0, 0x0b, 0x01, 0xe8,
+ 0x1a, 0x0d, 0xc1, 0xe8, 0x20, 0xc6, 0xba, 0xfd, 0x0f, 0xd0, 0x1b, 0x01,
+ 0xe8, 0x32, 0xc4, 0xd4, 0xf2, 0x0f, 0xd0, 0x13, 0x01, 0xe8, 0x38, 0xc4,
+ 0xe8, 0x9b, 0x0f, 0xd0, 0x2b, 0x01, 0xe8, 0x3e, 0x47, 0x41, 0xe5, 0x41,
+ 0xe8, 0x44, 0x0b, 0xc1, 0xe8, 0x60, 0xca, 0xa6, 0xb4, 0x08, 0xa2, 0xf0,
+ 0x18, 0xc1, 0xe8, 0x6c, 0xc2, 0x01, 0x0e, 0x08, 0xa1, 0xa1, 0x15, 0xc1,
+ 0xe8, 0x78, 0x10, 0xc1, 0xe8, 0x88, 0x06, 0xc1, 0xe8, 0xa0, 0x16, 0xc1,
+ 0xe8, 0xae, 0x0c, 0xc1, 0xe8, 0xbc, 0x05, 0xc1, 0xe8, 0xc6, 0x09, 0xc1,
+ 0xe8, 0xd0, 0x0d, 0xc1, 0xe8, 0xda, 0x83, 0x08, 0xa0, 0x03, 0x01, 0xe8,
+ 0xe4, 0x91, 0x08, 0xa0, 0x61, 0x87, 0x08, 0xa0, 0x51, 0x97, 0x08, 0xa0,
+ 0x23, 0x01, 0xe8, 0xf0, 0x8b, 0x08, 0xa0, 0x13, 0x01, 0xe8, 0xf4, 0x12,
+ 0xc1, 0xe8, 0xf8, 0x04, 0xc1, 0xe9, 0x02, 0x0f, 0xc1, 0xe9, 0x0c, 0xc2,
+ 0x1a, 0x36, 0x08, 0xa1, 0x59, 0x14, 0xc1, 0xe9, 0x16, 0x0e, 0xc1, 0xe9,
+ 0x20, 0xc2, 0x00, 0x4c, 0x08, 0xa1, 0x80, 0x46, 0x02, 0x92, 0xc1, 0xe9,
+ 0x2a, 0x45, 0x08, 0xd8, 0xc1, 0xe9, 0x36, 0xc4, 0x1c, 0xb3, 0x08, 0xa2,
+ 0x58, 0x03, 0xc1, 0xe9, 0x5a, 0x91, 0x08, 0xa2, 0x01, 0x87, 0x08, 0xa1,
+ 0xf1, 0x48, 0xb7, 0xd7, 0xc1, 0xe9, 0x66, 0x97, 0x08, 0xa1, 0xc3, 0x01,
+ 0xe9, 0x74, 0x8b, 0x08, 0xa1, 0xb2, 0x01, 0xe9, 0x78, 0xc8, 0xbd, 0xeb,
+ 0x00, 0xce, 0xf3, 0x01, 0xe9, 0x7c, 0x16, 0xc1, 0xe9, 0x80, 0x46, 0x08,
+ 0xd7, 0xc1, 0xe9, 0x8c, 0x47, 0x01, 0xff, 0xc1, 0xe9, 0xb0, 0x4b, 0x6f,
+ 0x71, 0x41, 0xe9, 0xc2, 0xc9, 0xad, 0xb7, 0x0f, 0x98, 0xd1, 0xc6, 0x01,
+ 0xb1, 0x0f, 0x98, 0x88, 0xca, 0x9e, 0x94, 0x01, 0x3a, 0x71, 0xc2, 0x18,
+ 0xc3, 0x0f, 0x8c, 0x79, 0xc2, 0x00, 0x03, 0x0f, 0x8c, 0x71, 0xc2, 0x0e,
+ 0xe5, 0x0f, 0x8c, 0x69, 0xc2, 0x07, 0x44, 0x0f, 0x8c, 0x61, 0xc2, 0x00,
+ 0x2d, 0x0f, 0x8c, 0x59, 0x55, 0x01, 0xf1, 0xc1, 0xe9, 0xe2, 0xcd, 0x2d,
+ 0xa6, 0x0f, 0xde, 0x20, 0xca, 0xa9, 0xe8, 0x01, 0x27, 0xf9, 0x47, 0x37,
+ 0x49, 0xc1, 0xea, 0x4a, 0x55, 0x01, 0xf1, 0xc1, 0xea, 0x60, 0xc8, 0x00,
+ 0x52, 0x0f, 0xbe, 0xb1, 0xc6, 0x01, 0xe9, 0x0f, 0xbe, 0xc0, 0xc5, 0x0c,
+ 0xa3, 0x0f, 0xdd, 0xe9, 0xdc, 0x06, 0x2b, 0x0f, 0xdd, 0xf1, 0xc7, 0x3f,
+ 0x7b, 0x0f, 0xdd, 0xf8, 0xd6, 0x2d, 0xdf, 0x01, 0x14, 0x49, 0xd4, 0x3a,
+ 0x0a, 0x01, 0x14, 0x40, 0xe0, 0x08, 0xe7, 0x01, 0x12, 0x38, 0xca, 0x37,
+ 0x20, 0x01, 0x13, 0xa9, 0xc5, 0x09, 0x02, 0x01, 0x13, 0x88, 0xca, 0x37,
+ 0x20, 0x01, 0x13, 0xa1, 0xc5, 0x09, 0x02, 0x01, 0x13, 0x80, 0xc4, 0x15,
+ 0xa7, 0x0f, 0x27, 0xb9, 0xc2, 0x22, 0x45, 0x0f, 0x27, 0xb0, 0xc3, 0x0d,
+ 0x8f, 0x0f, 0x27, 0xa9, 0xc3, 0x08, 0xde, 0x0f, 0x27, 0xa0, 0xc4, 0x05,
+ 0xde, 0x0f, 0x27, 0x99, 0xc2, 0x0a, 0x20, 0x0f, 0x27, 0x90, 0xc2, 0x00,
+ 0x30, 0x0f, 0x27, 0x51, 0xc2, 0x02, 0x01, 0x0f, 0x27, 0x49, 0x90, 0x0f,
+ 0x27, 0x43, 0x01, 0xea, 0xc8, 0x8f, 0x0f, 0x27, 0x39, 0x89, 0x0f, 0x27,
+ 0x30, 0x94, 0x0f, 0x27, 0x19, 0x8d, 0x0f, 0x27, 0x11, 0x86, 0x0f, 0x27,
+ 0x09, 0x85, 0x0f, 0x27, 0x00, 0x8a, 0x0f, 0x26, 0xf1, 0x96, 0x0f, 0x26,
+ 0xe9, 0xc2, 0x01, 0x8e, 0x0f, 0x26, 0xe1, 0x95, 0x0f, 0x26, 0xd8, 0x8a,
+ 0x0f, 0x26, 0xc9, 0xc2, 0x18, 0x79, 0x0f, 0x26, 0xc1, 0x84, 0x0f, 0x26,
+ 0xb9, 0x8c, 0x0f, 0x26, 0xb0, 0xc2, 0x00, 0x56, 0x0f, 0x26, 0xa1, 0x9b,
+ 0x0f, 0x26, 0x99, 0x8e, 0x0f, 0x26, 0x91, 0x92, 0x0f, 0x26, 0x88, 0xcf,
+ 0x68, 0x34, 0x08, 0xcf, 0x21, 0x03, 0xc1, 0xea, 0xcc, 0x91, 0x08, 0xce,
+ 0xe1, 0x87, 0x08, 0xce, 0xd1, 0xc9, 0xb7, 0xd7, 0x08, 0xce, 0xb3, 0x01,
+ 0xea, 0xd8, 0x97, 0x08, 0xce, 0xa3, 0x01, 0xea, 0xdc, 0x8b, 0x08, 0xce,
+ 0x92, 0x01, 0xea, 0xe0, 0xc7, 0xc6, 0xf7, 0x08, 0xcf, 0x11, 0x03, 0xc1,
+ 0xea, 0xe4, 0x42, 0x02, 0x52, 0x41, 0xea, 0xf0, 0x14, 0xc1, 0xea, 0xfc,
+ 0x0e, 0xc1, 0xeb, 0x06, 0xc2, 0x01, 0x0e, 0x08, 0xce, 0x71, 0x15, 0xc1,
+ 0xeb, 0x10, 0x18, 0xc1, 0xeb, 0x20, 0xc2, 0x1a, 0x36, 0x08, 0xce, 0x39,
+ 0xc2, 0x00, 0x3f, 0x08, 0xce, 0x31, 0x04, 0xc1, 0xeb, 0x2d, 0x12, 0xc1,
+ 0xeb, 0x37, 0x10, 0xc1, 0xeb, 0x41, 0x06, 0xc1, 0xeb, 0x57, 0x16, 0xc1,
+ 0xeb, 0x65, 0x0c, 0xc1, 0xeb, 0x73, 0x05, 0xc1, 0xeb, 0x7d, 0x09, 0xc1,
+ 0xeb, 0x87, 0x0d, 0xc1, 0xeb, 0x91, 0x83, 0x08, 0xcd, 0x03, 0x01, 0xeb,
+ 0x9b, 0x91, 0x08, 0xcd, 0x61, 0x87, 0x08, 0xcd, 0x51, 0x97, 0x08, 0xcd,
+ 0x23, 0x01, 0xeb, 0xa7, 0x8b, 0x08, 0xcd, 0x12, 0x01, 0xeb, 0xab, 0xc3,
+ 0x05, 0x17, 0x08, 0x45, 0x3b, 0x01, 0xeb, 0xaf, 0x16, 0xc1, 0xeb, 0xb5,
+ 0x08, 0x41, 0xeb, 0xc5, 0x16, 0xc1, 0xeb, 0xd1, 0x15, 0xc1, 0xeb, 0xdd,
+ 0x46, 0x2a, 0xb4, 0xc1, 0xeb, 0xe7, 0xc4, 0x5d, 0xef, 0x08, 0x44, 0xd9,
+ 0xc4, 0xbc, 0xb7, 0x08, 0x44, 0xd1, 0xc2, 0x03, 0x07, 0x08, 0x44, 0xc1,
+ 0x03, 0xc1, 0xec, 0x1d, 0xc3, 0x21, 0x00, 0x08, 0x44, 0xa9, 0xc3, 0x04,
+ 0xae, 0x08, 0x44, 0x99, 0xc6, 0xd7, 0x12, 0x08, 0x44, 0x89, 0xc4, 0xe5,
+ 0x53, 0x08, 0x44, 0x79, 0xc4, 0x4d, 0x48, 0x08, 0x44, 0x69, 0xc2, 0x00,
+ 0x5b, 0x08, 0x44, 0x3b, 0x01, 0xec, 0x29, 0xc5, 0x4d, 0x42, 0x08, 0x44,
+ 0x49, 0xc3, 0x7c, 0xad, 0x08, 0x44, 0x41, 0xc6, 0x43, 0x0f, 0x08, 0x44,
+ 0x29, 0xc5, 0x9e, 0xbc, 0x08, 0x44, 0x21, 0xc4, 0xe5, 0xaf, 0x08, 0x44,
+ 0x18, 0x45, 0x20, 0x8c, 0xc1, 0xec, 0x2f, 0x45, 0x18, 0xd5, 0xc1, 0xec,
+ 0x5a, 0x46, 0x08, 0xd1, 0x41, 0xec, 0x85, 0xde, 0x0f, 0xf3, 0x0f, 0xaa,
+ 0x19, 0x4c, 0x88, 0xf8, 0x41, 0xec, 0x9d, 0xde, 0x01, 0x29, 0x01, 0x3d,
+ 0x88, 0xcc, 0x25, 0x70, 0x01, 0x17, 0x60, 0x46, 0x0c, 0x9d, 0xc1, 0xec,
+ 0xa3, 0xc3, 0x02, 0xcc, 0x00, 0x05, 0x60, 0xc3, 0x35, 0x1e, 0x01, 0x15,
+ 0x69, 0xc4, 0x1f, 0x02, 0x01, 0x12, 0x08, 0x43, 0x08, 0x28, 0xc1, 0xec,
+ 0xaf, 0xce, 0x66, 0x46, 0x01, 0x12, 0x49, 0xd6, 0x31, 0x91, 0x01, 0x12,
+ 0x21, 0xcc, 0x86, 0xa0, 0x01, 0x10, 0x48, 0xca, 0x37, 0x20, 0x01, 0x13,
+ 0x69, 0xc5, 0x09, 0x02, 0x01, 0x13, 0x00, 0x86, 0x0f, 0xae, 0x51, 0xc2,
+ 0x02, 0xfb, 0x0f, 0xae, 0x48, 0xd6, 0x2b, 0xe5, 0x0f, 0xa6, 0xa0, 0x87,
+ 0x0f, 0x09, 0x58, 0x91, 0x0f, 0x09, 0x48, 0x83, 0x0f, 0x09, 0x28, 0xc2,
+ 0x00, 0x9a, 0x0f, 0x09, 0x19, 0x83, 0x0f, 0x08, 0xb0, 0xc2, 0x00, 0x96,
+ 0x0f, 0x09, 0x09, 0x83, 0x0f, 0x08, 0xd0, 0xc2, 0x00, 0x96, 0x0f, 0x09,
+ 0x01, 0x83, 0x0f, 0x08, 0x00, 0x8a, 0x0f, 0x08, 0xf8, 0x12, 0xc1, 0xec,
+ 0xbb, 0xc2, 0x0e, 0x13, 0x0f, 0x08, 0xc9, 0x16, 0xc1, 0xec, 0xc5, 0xc2,
+ 0x00, 0x9a, 0x0f, 0x08, 0x89, 0xc2, 0x1a, 0x36, 0x0f, 0x08, 0x81, 0xc2,
+ 0x00, 0x2e, 0x0f, 0x08, 0x61, 0xc2, 0x06, 0x6b, 0x0f, 0x08, 0x39, 0x83,
+ 0x0f, 0x08, 0x28, 0xc2, 0x00, 0x96, 0x0f, 0x08, 0xe9, 0x83, 0x0f, 0x08,
+ 0x78, 0xc2, 0x1a, 0x36, 0x0f, 0x08, 0xd9, 0x83, 0x0f, 0x08, 0x30, 0xc2,
+ 0x23, 0xe3, 0x0f, 0x08, 0xa1, 0x83, 0x0f, 0x08, 0x19, 0xc2, 0x0e, 0xe5,
+ 0x0f, 0x08, 0x08, 0xcc, 0x8d, 0x9c, 0x0f, 0x09, 0xd9, 0xc6, 0xd7, 0xe4,
+ 0x0f, 0x09, 0xd1, 0xc8, 0x7d, 0xb1, 0x0f, 0x09, 0xc9, 0xc5, 0xdf, 0xb3,
+ 0x0f, 0x09, 0xc1, 0xc6, 0x15, 0x83, 0x0f, 0x09, 0xb8, 0x08, 0xc1, 0xec,
+ 0xd5, 0x07, 0xc1, 0xed, 0x05, 0x04, 0xc1, 0xed, 0x45, 0x26, 0xc1, 0xed,
+ 0x85, 0x25, 0xc1, 0xed, 0xc5, 0x24, 0xc1, 0xee, 0x05, 0x23, 0xc1, 0xee,
+ 0x45, 0x22, 0xc1, 0xee, 0x85, 0x21, 0xc1, 0xee, 0xc5, 0x20, 0xc1, 0xef,
+ 0x05, 0x1f, 0xc1, 0xef, 0x45, 0x1e, 0xc1, 0xef, 0x85, 0x1d, 0xc1, 0xef,
+ 0xc5, 0x06, 0xc1, 0xf0, 0x05, 0x05, 0xc1, 0xf0, 0x45, 0x03, 0x41, 0xf0,
+ 0x85, 0x08, 0xc1, 0xf0, 0xc5, 0x07, 0xc1, 0xf1, 0x05, 0x06, 0xc1, 0xf1,
+ 0x45, 0x05, 0xc1, 0xf1, 0x85, 0x04, 0xc1, 0xf1, 0xc5, 0x03, 0xc1, 0xf2,
+ 0x05, 0x26, 0xc1, 0xf2, 0x45, 0x25, 0xc1, 0xf2, 0x85, 0x24, 0x41, 0xf2,
+ 0xc5, 0x42, 0x00, 0xc9, 0xc1, 0xf3, 0x05, 0xd1, 0x55, 0xba, 0x01, 0x24,
+ 0xa1, 0xcc, 0x4c, 0x32, 0x01, 0x24, 0x88, 0xd1, 0x51, 0x69, 0x01, 0x24,
+ 0xc9, 0xcf, 0x66, 0x81, 0x01, 0x24, 0x90, 0xd2, 0x4c, 0x2c, 0x01, 0x24,
+ 0xc1, 0x0b, 0x41, 0xf3, 0x11, 0xd0, 0x5d, 0x8f, 0x01, 0x24, 0xb1, 0xd1,
+ 0x52, 0xbd, 0x01, 0x24, 0xa8, 0xc4, 0x15, 0xa7, 0x00, 0x3e, 0x39, 0xc2,
+ 0x22, 0x45, 0x00, 0x3e, 0x30, 0xc3, 0x0d, 0x8f, 0x00, 0x3e, 0x29, 0xc3,
+ 0x08, 0xde, 0x00, 0x3e, 0x20, 0xc4, 0x05, 0xde, 0x00, 0x3e, 0x19, 0xc2,
+ 0x0a, 0x20, 0x00, 0x3e, 0x10, 0x44, 0xe8, 0xff, 0xc1, 0xf3, 0x1d, 0x83,
+ 0x00, 0x3e, 0xb0, 0xc2, 0x1a, 0x36, 0x00, 0x3f, 0x13, 0x01, 0xf3, 0x2f,
+ 0x83, 0x00, 0x3f, 0x1a, 0x01, 0xf3, 0x35, 0xc2, 0x00, 0x9a, 0x00, 0x3e,
+ 0xd1, 0x83, 0x00, 0x3e, 0xc8, 0xc8, 0xbf, 0xfb, 0x00, 0x3e, 0x88, 0x91,
+ 0x00, 0x3e, 0x78, 0x87, 0x00, 0x3e, 0x58, 0xcb, 0x5c, 0x8f, 0x00, 0x3f,
+ 0x89, 0xc8, 0x9f, 0x18, 0x00, 0x3f, 0x81, 0xc9, 0x3b, 0x22, 0x00, 0x3f,
+ 0x79, 0xcf, 0x6a, 0xb9, 0x00, 0x3f, 0x70, 0xcb, 0x5c, 0x8f, 0x00, 0x3f,
+ 0x69, 0xc8, 0x9f, 0x18, 0x00, 0x3f, 0x61, 0xc9, 0x3b, 0x22, 0x00, 0x3f,
+ 0x58, 0x46, 0x01, 0xab, 0x41, 0xf3, 0x3b, 0x95, 0x0f, 0xae, 0x68, 0xc3,
+ 0x1f, 0xff, 0x0f, 0xae, 0x2b, 0x01, 0xf3, 0x53, 0xc3, 0x15, 0xb3, 0x0f,
+ 0xd5, 0xc8, 0xc5, 0x12, 0x2e, 0x01, 0x1e, 0xd1, 0x45, 0xe3, 0xaa, 0x41,
+ 0xf3, 0x59, 0xc4, 0xa8, 0x4a, 0x0f, 0x99, 0xf1, 0xc5, 0xe3, 0x55, 0x0f,
+ 0x99, 0xe8, 0x44, 0x03, 0x6b, 0x41, 0xf3, 0x63, 0x21, 0xc1, 0xf3, 0x84,
+ 0x20, 0xc1, 0xf3, 0x9b, 0x1f, 0xc1, 0xf3, 0xc9, 0x1e, 0xc1, 0xf3, 0xfa,
+ 0x1d, 0x41, 0xf4, 0x31, 0x1f, 0xc1, 0xf4, 0x5b, 0x1e, 0xc1, 0xf4, 0x76,
+ 0x1d, 0x41, 0xf4, 0xaa, 0x20, 0xc1, 0xf4, 0xd4, 0x1f, 0xc1, 0xf4, 0xf6,
+ 0x1e, 0xc1, 0xf5, 0x1e, 0x1d, 0x41, 0xf5, 0x4c, 0xc9, 0x7c, 0xf2, 0x09,
+ 0xa2, 0x21, 0xc5, 0xb5, 0x92, 0x09, 0xa2, 0x10, 0xa5, 0x09, 0x8c, 0x39,
+ 0xa4, 0x09, 0x8c, 0x31, 0xa3, 0x09, 0x8c, 0x23, 0x01, 0xf5, 0x7c, 0xa2,
+ 0x09, 0x8c, 0x19, 0xa1, 0x09, 0x8c, 0x11, 0xa0, 0x09, 0x8c, 0x09, 0x9f,
+ 0x09, 0x8c, 0x01, 0x9e, 0x09, 0x8b, 0xf8, 0xa5, 0x09, 0x8d, 0x61, 0xa4,
+ 0x09, 0x8d, 0x59, 0xa3, 0x09, 0x8d, 0x4b, 0x01, 0xf5, 0x80, 0xa2, 0x09,
+ 0x8d, 0x41, 0xa1, 0x09, 0x8d, 0x39, 0xa0, 0x09, 0x8d, 0x31, 0x9f, 0x09,
+ 0x8d, 0x23, 0x01, 0xf5, 0x84, 0x9e, 0x09, 0x8d, 0x18, 0x22, 0xc1, 0xf5,
+ 0x88, 0x21, 0xc1, 0xf5, 0x9b, 0x20, 0xc1, 0xf5, 0xcc, 0x1f, 0xc1, 0xf5,
+ 0xfd, 0x1e, 0xc1, 0xf6, 0x28, 0x1d, 0x41, 0xf6, 0x53, 0xd0, 0x5e, 0xdf,
+ 0x09, 0xa1, 0xf9, 0xc9, 0x5e, 0xe6, 0x09, 0xa1, 0xe9, 0xc7, 0xce, 0x21,
+ 0x09, 0xa1, 0xc0, 0x20, 0xc1, 0xf6, 0x7a, 0x1f, 0xc1, 0xf6, 0x9d, 0x1e,
+ 0xc1, 0xf6, 0xcb, 0x1d, 0x41, 0xf6, 0xf9, 0xd0, 0x5e, 0xdf, 0x09, 0xa1,
+ 0xf1, 0xc9, 0x5e, 0xe6, 0x09, 0xa1, 0xe1, 0xc7, 0xce, 0x21, 0x09, 0xa1,
+ 0xb8, 0xa6, 0x09, 0x82, 0xc9, 0xa5, 0x09, 0x82, 0xc1, 0xa4, 0x09, 0x82,
+ 0xb9, 0xa3, 0x09, 0x82, 0xb1, 0xa2, 0x09, 0x82, 0xa3, 0x01, 0xf7, 0x23,
+ 0xa1, 0x09, 0x82, 0x99, 0xa0, 0x09, 0x82, 0x91, 0x9f, 0x09, 0x82, 0x89,
+ 0x9e, 0x09, 0x82, 0x80, 0xc6, 0x07, 0xba, 0x09, 0xa1, 0xdb, 0x01, 0xf7,
+ 0x27, 0xc3, 0x02, 0x47, 0x09, 0xa1, 0xd3, 0x01, 0xf7, 0x2b, 0xc6, 0x01,
+ 0x8c, 0x09, 0xa1, 0xc8, 0x1e, 0xc1, 0xf7, 0x2f, 0x1d, 0x41, 0xf7, 0x4d,
+ 0x22, 0xc1, 0xf7, 0x77, 0x21, 0xc1, 0xf7, 0x82, 0x20, 0xc1, 0xf7, 0xaa,
+ 0x1f, 0xc1, 0xf7, 0xdb, 0x1e, 0xc1, 0xf8, 0x0f, 0x1d, 0x41, 0xf8, 0x3d,
+ 0x1e, 0xc1, 0xf8, 0x6a, 0xc2, 0xed, 0xc1, 0x09, 0x9d, 0x6b, 0x01, 0xf8,
+ 0x98, 0x20, 0xc1, 0xf8, 0x9c, 0x1f, 0xc1, 0xf8, 0xd0, 0x1d, 0x41, 0xf9,
+ 0x04, 0x20, 0xc1, 0xf9, 0x31, 0x1f, 0xc1, 0xf9, 0x3d, 0x1e, 0xc1, 0xf9,
+ 0x65, 0x1d, 0x41, 0xf9, 0x8d, 0xc2, 0xeb, 0xb3, 0x09, 0x82, 0x79, 0x23,
+ 0xc1, 0xf9, 0xb4, 0x22, 0xc1, 0xf9, 0xdc, 0x21, 0xc1, 0xfa, 0x04, 0x20,
+ 0xc1, 0xfa, 0x38, 0x1f, 0xc1, 0xfa, 0x63, 0x1e, 0xc1, 0xfa, 0x8b, 0x1d,
+ 0x41, 0xfa, 0xb9, 0xa3, 0x09, 0xa0, 0x23, 0x01, 0xfa, 0xe3, 0xa2, 0x09,
+ 0x9f, 0xd3, 0x01, 0xfb, 0x03, 0xa1, 0x09, 0x9f, 0xc9, 0xa0, 0x09, 0x9f,
+ 0xc1, 0x9f, 0x09, 0x9f, 0xb9, 0x9e, 0x09, 0x9f, 0xb1, 0x9d, 0x09, 0x9f,
+ 0xa8, 0xa6, 0x09, 0x9f, 0xa1, 0xa5, 0x09, 0x9f, 0x99, 0xa4, 0x09, 0x9f,
+ 0x91, 0xa3, 0x09, 0x9f, 0x89, 0xa2, 0x09, 0x9f, 0x7b, 0x01, 0xfb, 0x27,
+ 0xa1, 0x09, 0x9f, 0x6b, 0x01, 0xfb, 0x2b, 0xa0, 0x09, 0x9f, 0x53, 0x01,
+ 0xfb, 0x2f, 0x9f, 0x09, 0x9f, 0x2b, 0x01, 0xfb, 0x37, 0x9e, 0x09, 0x9f,
+ 0x20, 0x83, 0x09, 0x9e, 0xe0, 0x83, 0x09, 0x9e, 0xd0, 0x83, 0x09, 0x9e,
+ 0xb8, 0x84, 0x09, 0x9e, 0xa1, 0x83, 0x09, 0x9e, 0x98, 0x9f, 0x09, 0x9b,
+ 0x09, 0x9e, 0x09, 0x9b, 0x01, 0x9d, 0x09, 0x9a, 0xf8, 0xa6, 0x09, 0x9a,
+ 0xf1, 0xa5, 0x09, 0x9a, 0xe9, 0xa4, 0x09, 0x9a, 0xe1, 0xa3, 0x09, 0x9a,
+ 0xd9, 0xa2, 0x09, 0x9a, 0xd1, 0xa1, 0x09, 0x9a, 0xc9, 0xa0, 0x09, 0x9a,
+ 0xc1, 0x9f, 0x09, 0x9a, 0xb3, 0x01, 0xfb, 0x47, 0x9e, 0x09, 0x9a, 0xa9,
+ 0x9d, 0x09, 0x9a, 0xa0, 0xa6, 0x09, 0x9a, 0x93, 0x01, 0xfb, 0x4b, 0xa5,
+ 0x09, 0x9a, 0x89, 0xa4, 0x09, 0x9a, 0x81, 0xa3, 0x09, 0x9a, 0x79, 0xa2,
+ 0x09, 0x9a, 0x71, 0xa1, 0x09, 0x9a, 0x69, 0xa0, 0x09, 0x9a, 0x5b, 0x01,
+ 0xfb, 0x4f, 0x9f, 0x09, 0x9a, 0x51, 0x9e, 0x09, 0x9a, 0x49, 0x9d, 0x09,
+ 0x9a, 0x40, 0xa6, 0x09, 0x9a, 0x39, 0xa5, 0x09, 0x9a, 0x31, 0xa4, 0x09,
+ 0x9a, 0x29, 0xa3, 0x09, 0x9a, 0x21, 0xa2, 0x09, 0x9a, 0x19, 0xa1, 0x09,
+ 0x9a, 0x11, 0xa0, 0x09, 0x9a, 0x09, 0x9f, 0x09, 0x9a, 0x01, 0x9e, 0x09,
+ 0x99, 0xf9, 0x9d, 0x09, 0x99, 0xf0, 0xa6, 0x09, 0x99, 0xe9, 0xa5, 0x09,
+ 0x99, 0xe1, 0xa4, 0x09, 0x99, 0xd9, 0xa3, 0x09, 0x99, 0xc3, 0x01, 0xfb,
+ 0x53, 0xa2, 0x09, 0x99, 0xb9, 0xa1, 0x09, 0x99, 0xb1, 0xa0, 0x09, 0x99,
+ 0xa9, 0x9f, 0x09, 0x99, 0xa1, 0x9e, 0x09, 0x99, 0x98, 0xa3, 0x09, 0x98,
+ 0x31, 0xa2, 0x09, 0x98, 0x29, 0xa1, 0x09, 0x98, 0x21, 0xa0, 0x09, 0x98,
+ 0x19, 0x9f, 0x09, 0x98, 0x11, 0x9e, 0x09, 0x98, 0x09, 0x9d, 0x09, 0x98,
+ 0x00, 0xa6, 0x09, 0x97, 0xf9, 0xa5, 0x09, 0x97, 0xf1, 0xa4, 0x09, 0x97,
+ 0xe9, 0xa3, 0x09, 0x97, 0xe1, 0xa2, 0x09, 0x97, 0xd3, 0x01, 0xfb, 0x5b,
+ 0xa1, 0x09, 0x97, 0xc9, 0xa0, 0x09, 0x97, 0xc1, 0x9f, 0x09, 0x97, 0xb9,
+ 0x9e, 0x09, 0x97, 0xb1, 0x9d, 0x09, 0x97, 0xa8, 0xa6, 0x09, 0x97, 0xa1,
+ 0xa5, 0x09, 0x97, 0x99, 0xa4, 0x09, 0x97, 0x91, 0xa3, 0x09, 0x97, 0x7b,
+ 0x01, 0xfb, 0x5f, 0xa2, 0x09, 0x97, 0x71, 0xa1, 0x09, 0x97, 0x69, 0xa0,
+ 0x09, 0x97, 0x61, 0x9f, 0x09, 0x97, 0x59, 0x9e, 0x09, 0x97, 0x51, 0x9d,
+ 0x09, 0x97, 0x48, 0xa6, 0x09, 0x97, 0x41, 0xa5, 0x09, 0x97, 0x39, 0xa4,
+ 0x09, 0x97, 0x2b, 0x01, 0xfb, 0x67, 0xa3, 0x09, 0x97, 0x21, 0xa2, 0x09,
+ 0x97, 0x19, 0xa1, 0x09, 0x97, 0x03, 0x01, 0xfb, 0x6b, 0xa0, 0x09, 0x96,
+ 0xf9, 0x9f, 0x09, 0x96, 0xf1, 0x9e, 0x09, 0x96, 0xe9, 0x9d, 0x09, 0x96,
+ 0xe0, 0xa6, 0x09, 0x96, 0xd9, 0xa5, 0x09, 0x96, 0xd1, 0xa4, 0x09, 0x96,
+ 0xc9, 0xa3, 0x09, 0x96, 0xbb, 0x01, 0xfb, 0x73, 0xa2, 0x09, 0x96, 0xb1,
+ 0xa1, 0x09, 0x96, 0xa9, 0xa0, 0x09, 0x96, 0xa1, 0x9f, 0x09, 0x96, 0x93,
+ 0x01, 0xfb, 0x77, 0x9e, 0x09, 0x96, 0x88, 0xa6, 0x09, 0x96, 0x81, 0xa5,
+ 0x09, 0x96, 0x79, 0xa4, 0x09, 0x96, 0x71, 0xa3, 0x09, 0x96, 0x69, 0xa2,
+ 0x09, 0x96, 0x61, 0xa1, 0x09, 0x96, 0x59, 0xa0, 0x09, 0x96, 0x51, 0x9f,
+ 0x09, 0x96, 0x49, 0x9e, 0x09, 0x96, 0x41, 0x9d, 0x09, 0x96, 0x38, 0xa6,
+ 0x09, 0x96, 0x31, 0xa5, 0x09, 0x96, 0x29, 0xa4, 0x09, 0x96, 0x21, 0xa3,
+ 0x09, 0x96, 0x13, 0x01, 0xfb, 0x7b, 0xa2, 0x09, 0x96, 0x09, 0xa1, 0x09,
+ 0x96, 0x01, 0xa0, 0x09, 0x95, 0xf9, 0x9f, 0x09, 0x95, 0xf1, 0x9e, 0x09,
+ 0x95, 0xe9, 0x9d, 0x09, 0x95, 0xda, 0x01, 0xfb, 0x7f, 0xa6, 0x09, 0x95,
+ 0xd1, 0xa5, 0x09, 0x95, 0xc9, 0xa4, 0x09, 0x95, 0xc1, 0xa3, 0x09, 0x95,
+ 0xb9, 0xa2, 0x09, 0x95, 0xb1, 0xa1, 0x09, 0x95, 0xa9, 0xa0, 0x09, 0x95,
+ 0x93, 0x01, 0xfb, 0x83, 0x9f, 0x09, 0x95, 0x83, 0x01, 0xfb, 0x8b, 0x9e,
+ 0x09, 0x95, 0x78, 0x9e, 0x09, 0x95, 0x39, 0x9d, 0x09, 0x95, 0x30, 0xa6,
+ 0x09, 0x95, 0x29, 0xa5, 0x09, 0x95, 0x21, 0xa4, 0x09, 0x95, 0x19, 0xa3,
+ 0x09, 0x95, 0x11, 0xa2, 0x09, 0x95, 0x09, 0xa1, 0x09, 0x95, 0x01, 0xa0,
+ 0x09, 0x94, 0xf3, 0x01, 0xfb, 0x8f, 0x9f, 0x09, 0x94, 0xe9, 0x9e, 0x09,
+ 0x94, 0xda, 0x01, 0xfb, 0x93, 0x1f, 0xc1, 0xfb, 0x97, 0x1e, 0xc1, 0xfb,
+ 0xa6, 0x1d, 0x41, 0xfb, 0xd7, 0xc2, 0xdd, 0x63, 0x09, 0x91, 0xa9, 0x1e,
+ 0xc1, 0xfb, 0xfb, 0x1d, 0x41, 0xfc, 0x26, 0x21, 0xc1, 0xfc, 0x4d, 0x20,
+ 0xc1, 0xfc, 0x59, 0x1f, 0xc1, 0xfc, 0x8d, 0x1e, 0xc1, 0xfc, 0xb8, 0x1d,
+ 0x41, 0xfc, 0xe3, 0xa1, 0x09, 0x8b, 0xf1, 0xa0, 0x09, 0x8b, 0xe9, 0x9f,
+ 0x09, 0x8b, 0xe1, 0x9e, 0x09, 0x8b, 0xd9, 0x9d, 0x09, 0x8b, 0xd0, 0xa6,
+ 0x09, 0x8b, 0xc9, 0xa5, 0x09, 0x8b, 0xc1, 0xa4, 0x09, 0x8b, 0xb9, 0xa3,
+ 0x09, 0x8b, 0xb1, 0xa2, 0x09, 0x8b, 0xa3, 0x01, 0xfd, 0x07, 0xa1, 0x09,
+ 0x8b, 0x99, 0xa0, 0x09, 0x8b, 0x8b, 0x01, 0xfd, 0x0b, 0x9f, 0x09, 0x8b,
+ 0x81, 0x9e, 0x09, 0x8b, 0x79, 0x9d, 0x09, 0x8b, 0x70, 0xa6, 0x09, 0x8b,
+ 0x69, 0xa5, 0x09, 0x8b, 0x61, 0xa4, 0x09, 0x8b, 0x53, 0x01, 0xfd, 0x0f,
+ 0xa3, 0x09, 0x8b, 0x43, 0x01, 0xfd, 0x13, 0xa2, 0x09, 0x8b, 0x39, 0xa1,
+ 0x09, 0x8b, 0x31, 0xa0, 0x09, 0x8b, 0x29, 0x9f, 0x09, 0x8b, 0x21, 0x9e,
+ 0x09, 0x8b, 0x19, 0x9d, 0x09, 0x8b, 0x10, 0xa6, 0x09, 0x8b, 0x09, 0xa5,
+ 0x09, 0x8b, 0x01, 0xa4, 0x09, 0x8a, 0xf9, 0xa3, 0x09, 0x8a, 0xeb, 0x01,
+ 0xfd, 0x17, 0xa2, 0x09, 0x8a, 0xe1, 0xa1, 0x09, 0x8a, 0xd9, 0xa0, 0x09,
+ 0x8a, 0xd1, 0x9f, 0x09, 0x8a, 0xc9, 0x9e, 0x09, 0x8a, 0xc1, 0x9d, 0x09,
+ 0x8a, 0xb2, 0x01, 0xfd, 0x1b, 0xa6, 0x09, 0x8a, 0xa9, 0xa5, 0x09, 0x8a,
+ 0xa1, 0xa4, 0x09, 0x8a, 0x99, 0xa3, 0x09, 0x8a, 0x91, 0xa2, 0x09, 0x8a,
+ 0x89, 0xa1, 0x09, 0x8a, 0x81, 0xa0, 0x09, 0x8a, 0x79, 0x9f, 0x09, 0x8a,
+ 0x71, 0x9e, 0x09, 0x8a, 0x63, 0x01, 0xfd, 0x1f, 0x9d, 0x09, 0x8a, 0x58,
+ 0xa6, 0x09, 0x8a, 0x51, 0xa5, 0x09, 0x8a, 0x49, 0xa4, 0x09, 0x8a, 0x33,
+ 0x01, 0xfd, 0x23, 0xa3, 0x09, 0x8a, 0x23, 0x01, 0xfd, 0x2b, 0xa2, 0x09,
+ 0x8a, 0x19, 0xa1, 0x09, 0x8a, 0x11, 0xa0, 0x09, 0x8a, 0x09, 0x9f, 0x09,
+ 0x8a, 0x01, 0x9e, 0x09, 0x89, 0xf8, 0xa4, 0x09, 0x86, 0x4b, 0x01, 0xfd,
+ 0x2f, 0xa3, 0x09, 0x86, 0x41, 0xa2, 0x09, 0x86, 0x39, 0xa1, 0x09, 0x86,
+ 0x31, 0xa0, 0x09, 0x86, 0x29, 0x9f, 0x09, 0x86, 0x21, 0x9e, 0x09, 0x86,
+ 0x19, 0x9d, 0x09, 0x86, 0x10, 0xa6, 0x09, 0x86, 0x09, 0xa5, 0x09, 0x86,
+ 0x01, 0xa4, 0x09, 0x85, 0xf9, 0xa3, 0x09, 0x85, 0xf1, 0xa2, 0x09, 0x85,
+ 0xe9, 0xa1, 0x09, 0x85, 0xdb, 0x01, 0xfd, 0x4f, 0xa0, 0x09, 0x85, 0xd1,
+ 0x9f, 0x09, 0x85, 0xc3, 0x01, 0xfd, 0x53, 0x9e, 0x09, 0x85, 0xb9, 0x9d,
+ 0x09, 0x85, 0x6a, 0x01, 0xfd, 0x57, 0xa6, 0x09, 0x85, 0x61, 0xa5, 0x09,
+ 0x85, 0x53, 0x01, 0xfd, 0x7b, 0xa4, 0x09, 0x85, 0x49, 0xa3, 0x09, 0x85,
+ 0x3b, 0x01, 0xfd, 0x7f, 0xa2, 0x09, 0x85, 0x31, 0xa1, 0x09, 0x85, 0x29,
+ 0xa0, 0x09, 0x85, 0x21, 0x9f, 0x09, 0x85, 0x19, 0x9e, 0x09, 0x85, 0x11,
+ 0x9d, 0x09, 0x85, 0x08, 0xa6, 0x09, 0x85, 0x01, 0xa5, 0x09, 0x84, 0xf9,
+ 0xa4, 0x09, 0x84, 0xf1, 0xa3, 0x09, 0x84, 0xe9, 0xa2, 0x09, 0x84, 0xe1,
+ 0xa1, 0x09, 0x84, 0xd3, 0x01, 0xfd, 0x83, 0xa0, 0x09, 0x84, 0xc9, 0x9f,
+ 0x09, 0x84, 0xc1, 0x9e, 0x09, 0x84, 0xb3, 0x01, 0xfd, 0x87, 0x9d, 0x09,
+ 0x84, 0xa8, 0xa6, 0x09, 0x84, 0xa1, 0xa5, 0x09, 0x84, 0x99, 0xa4, 0x09,
+ 0x84, 0x8b, 0x01, 0xfd, 0x8b, 0xa3, 0x09, 0x84, 0x81, 0xa2, 0x09, 0x84,
+ 0x79, 0xa1, 0x09, 0x84, 0x71, 0xa0, 0x09, 0x84, 0x69, 0x9f, 0x09, 0x84,
+ 0x61, 0x9e, 0x09, 0x84, 0x59, 0x9d, 0x09, 0x84, 0x50, 0xa6, 0x09, 0x84,
+ 0x49, 0xa5, 0x09, 0x84, 0x41, 0xa4, 0x09, 0x84, 0x39, 0xa3, 0x09, 0x84,
+ 0x31, 0xa2, 0x09, 0x84, 0x29, 0xa1, 0x09, 0x84, 0x21, 0xa0, 0x09, 0x84,
+ 0x19, 0x9f, 0x09, 0x84, 0x11, 0x9e, 0x09, 0x84, 0x09, 0x9d, 0x09, 0x84,
+ 0x00, 0xa6, 0x09, 0x83, 0xf9, 0xa5, 0x09, 0x83, 0xeb, 0x01, 0xfd, 0x8f,
+ 0xa4, 0x09, 0x83, 0xe1, 0xa3, 0x09, 0x83, 0xd9, 0xa2, 0x09, 0x83, 0xd1,
+ 0xa1, 0x09, 0x83, 0xc9, 0xa0, 0x09, 0x83, 0xc1, 0x9f, 0x09, 0x83, 0xb9,
+ 0x9e, 0x09, 0x83, 0xb0, 0xa1, 0x09, 0x83, 0xa9, 0xa0, 0x09, 0x83, 0xa1,
+ 0x9f, 0x09, 0x83, 0x99, 0x9e, 0x09, 0x83, 0x91, 0x9d, 0x09, 0x83, 0x88,
+ 0xa6, 0x09, 0x83, 0x81, 0xa5, 0x09, 0x83, 0x79, 0xa4, 0x09, 0x83, 0x71,
+ 0xa3, 0x09, 0x83, 0x69, 0xa2, 0x09, 0x83, 0x61, 0xa1, 0x09, 0x83, 0x59,
+ 0xa0, 0x09, 0x83, 0x51, 0x9f, 0x09, 0x83, 0x49, 0x9e, 0x09, 0x83, 0x41,
+ 0x9d, 0x09, 0x83, 0x32, 0x01, 0xfd, 0x93, 0xa6, 0x09, 0x83, 0x29, 0xa5,
+ 0x09, 0x83, 0x21, 0xa4, 0x09, 0x83, 0x19, 0xa3, 0x09, 0x83, 0x11, 0xa2,
+ 0x09, 0x83, 0x09, 0xa1, 0x09, 0x83, 0x01, 0xa0, 0x09, 0x82, 0xf9, 0x9f,
+ 0x09, 0x82, 0xdb, 0x01, 0xfd, 0x97, 0x9e, 0x09, 0x82, 0xd0, 0xcb, 0x5e,
+ 0x74, 0x0f, 0xbd, 0x39, 0x46, 0x01, 0x31, 0xc1, 0xfd, 0xa3, 0x15, 0xc1,
+ 0xfd, 0xaf, 0xd4, 0x3e, 0xe2, 0x0f, 0xbd, 0xa0, 0xc4, 0x15, 0xa7, 0x00,
+ 0x37, 0xb9, 0xc2, 0x22, 0x45, 0x00, 0x37, 0xb0, 0xc3, 0x0d, 0x8f, 0x00,
+ 0x37, 0xa9, 0xc3, 0x08, 0xde, 0x00, 0x37, 0xa0, 0xc4, 0x05, 0xde, 0x00,
+ 0x37, 0x99, 0xc2, 0x0a, 0x20, 0x00, 0x37, 0x90, 0x97, 0x00, 0x98, 0x4b,
+ 0x01, 0xfd, 0xbb, 0x47, 0x26, 0x0d, 0xc1, 0xfd, 0xc1, 0x83, 0x00, 0x98,
+ 0x43, 0x01, 0xfd, 0xe4, 0x8b, 0x00, 0x98, 0x51, 0x87, 0x00, 0x98, 0x6b,
+ 0x01, 0xfd, 0xe8, 0x91, 0x00, 0x98, 0x73, 0x01, 0xfd, 0xec, 0x19, 0xc1,
+ 0xfd, 0xf0, 0x09, 0xc1, 0xfe, 0x02, 0x1b, 0x41, 0xfe, 0x20, 0x0a, 0xc1,
+ 0xfe, 0x3a, 0x83, 0x00, 0x90, 0x03, 0x01, 0xfe, 0x5c, 0x97, 0x00, 0x90,
+ 0x09, 0x8b, 0x00, 0x90, 0x11, 0x87, 0x00, 0x90, 0x2b, 0x01, 0xfe, 0x60,
+ 0x91, 0x00, 0x90, 0x32, 0x01, 0xfe, 0x64, 0x04, 0xc1, 0xfe, 0x68, 0x83,
+ 0x00, 0x93, 0x03, 0x01, 0xfe, 0x82, 0x97, 0x00, 0x93, 0x09, 0x8b, 0x00,
+ 0x93, 0x11, 0x87, 0x00, 0x93, 0x2b, 0x01, 0xfe, 0x86, 0x91, 0x00, 0x93,
+ 0x33, 0x01, 0xfe, 0x8a, 0x19, 0x41, 0xfe, 0x8e, 0x05, 0xc1, 0xfe, 0x9d,
+ 0x83, 0x00, 0x93, 0xc3, 0x01, 0xfe, 0xbb, 0x97, 0x00, 0x93, 0xc9, 0x8b,
+ 0x00, 0x93, 0xd1, 0x87, 0x00, 0x93, 0xeb, 0x01, 0xfe, 0xbf, 0x91, 0x00,
+ 0x93, 0xf3, 0x01, 0xfe, 0xc3, 0xc2, 0x00, 0x4c, 0x00, 0x93, 0xf9, 0x0a,
+ 0x41, 0xfe, 0xc7, 0x1c, 0xc1, 0xfe, 0xea, 0x06, 0xc1, 0xff, 0x03, 0x83,
+ 0x00, 0x97, 0x83, 0x01, 0xff, 0x2a, 0x97, 0x00, 0x97, 0x89, 0x8b, 0x00,
+ 0x97, 0x91, 0x87, 0x00, 0x97, 0xab, 0x01, 0xff, 0x2e, 0x91, 0x00, 0x97,
+ 0xb3, 0x01, 0xff, 0x32, 0xc2, 0x00, 0x4c, 0x00, 0x97, 0xb8, 0x42, 0x01,
+ 0x47, 0xc1, 0xff, 0x36, 0x83, 0x00, 0x93, 0x83, 0x01, 0xff, 0x4f, 0x97,
+ 0x00, 0x93, 0x89, 0x8b, 0x00, 0x93, 0x91, 0x87, 0x00, 0x93, 0xab, 0x01,
+ 0xff, 0x53, 0x91, 0x00, 0x93, 0xb3, 0x01, 0xff, 0x57, 0xc2, 0x00, 0x4c,
+ 0x00, 0x93, 0xb9, 0x0a, 0xc1, 0xff, 0x5b, 0x15, 0xc1, 0xff, 0x7e, 0x1c,
+ 0x41, 0xff, 0x9e, 0x83, 0x00, 0x90, 0x43, 0x01, 0xff, 0xbb, 0x97, 0x00,
+ 0x90, 0x49, 0x8b, 0x00, 0x90, 0x51, 0x87, 0x00, 0x90, 0x6b, 0x01, 0xff,
+ 0xbf, 0x91, 0x00, 0x90, 0x73, 0x01, 0xff, 0xc3, 0xc2, 0x00, 0x4c, 0x00,
+ 0x90, 0x78, 0x83, 0x00, 0x90, 0xc3, 0x01, 0xff, 0xc7, 0x97, 0x00, 0x90,
+ 0xc9, 0x8b, 0x00, 0x90, 0xd1, 0x87, 0x00, 0x90, 0xeb, 0x01, 0xff, 0xcb,
+ 0x91, 0x00, 0x90, 0xf3, 0x01, 0xff, 0xcf, 0x19, 0xc1, 0xff, 0xd3, 0xc2,
+ 0x1a, 0x36, 0x00, 0x9a, 0xc8, 0x1c, 0xc1, 0xff, 0xe2, 0x83, 0x00, 0x91,
+ 0x83, 0x02, 0x00, 0x02, 0x97, 0x00, 0x91, 0x89, 0x8b, 0x00, 0x91, 0x91,
+ 0x87, 0x00, 0x91, 0xab, 0x02, 0x00, 0x06, 0x91, 0x00, 0x91, 0xb3, 0x02,
+ 0x00, 0x10, 0xc2, 0x00, 0x4c, 0x00, 0x91, 0xb9, 0x0a, 0xc2, 0x00, 0x14,
+ 0x15, 0x42, 0x00, 0x37, 0x83, 0x00, 0x91, 0x43, 0x02, 0x00, 0x51, 0x97,
+ 0x00, 0x91, 0x49, 0x8b, 0x00, 0x91, 0x51, 0x87, 0x00, 0x91, 0x6b, 0x02,
+ 0x00, 0x55, 0x91, 0x00, 0x91, 0x73, 0x02, 0x00, 0x59, 0xc2, 0x00, 0x4c,
+ 0x00, 0x91, 0x79, 0xc2, 0x1a, 0x36, 0x00, 0x9a, 0xc0, 0x83, 0x00, 0x92,
+ 0x03, 0x02, 0x00, 0x5d, 0x97, 0x00, 0x92, 0x09, 0x8b, 0x00, 0x92, 0x11,
+ 0x87, 0x00, 0x92, 0x2b, 0x02, 0x00, 0x61, 0x91, 0x00, 0x92, 0x33, 0x02,
+ 0x00, 0x65, 0x19, 0xc2, 0x00, 0x69, 0x0a, 0xc2, 0x00, 0x7b, 0x1b, 0x42,
+ 0x00, 0x99, 0x83, 0x00, 0x93, 0x43, 0x02, 0x00, 0xb3, 0x97, 0x00, 0x93,
+ 0x49, 0x8b, 0x00, 0x93, 0x51, 0x87, 0x00, 0x93, 0x6b, 0x02, 0x00, 0xb7,
+ 0x91, 0x00, 0x93, 0x71, 0xc2, 0x00, 0x4c, 0x00, 0x93, 0x78, 0x83, 0x00,
+ 0x94, 0x03, 0x02, 0x00, 0xbb, 0x97, 0x00, 0x94, 0x09, 0x8b, 0x00, 0x94,
+ 0x11, 0x87, 0x00, 0x94, 0x2b, 0x02, 0x00, 0xbf, 0x91, 0x00, 0x94, 0x33,
+ 0x02, 0x00, 0xc3, 0x19, 0xc2, 0x00, 0xc7, 0x1b, 0x42, 0x00, 0xd9, 0x83,
+ 0x00, 0x94, 0x83, 0x02, 0x00, 0xf3, 0x97, 0x00, 0x94, 0x89, 0x8b, 0x00,
+ 0x94, 0x91, 0x87, 0x00, 0x94, 0xab, 0x02, 0x00, 0xf7, 0x91, 0x00, 0x94,
+ 0xb3, 0x02, 0x00, 0xfb, 0xc2, 0x00, 0x4c, 0x00, 0x94, 0xb9, 0x1b, 0x42,
+ 0x00, 0xff, 0x83, 0x00, 0x95, 0x43, 0x02, 0x01, 0x22, 0x97, 0x00, 0x95,
+ 0x49, 0x8b, 0x00, 0x95, 0x51, 0x87, 0x00, 0x95, 0x6b, 0x02, 0x01, 0x26,
+ 0x91, 0x00, 0x95, 0x73, 0x02, 0x01, 0x2a, 0x19, 0xc2, 0x01, 0x2e, 0x1a,
+ 0xc2, 0x01, 0x40, 0x1b, 0x42, 0x01, 0x5e, 0x83, 0x00, 0x96, 0x43, 0x02,
+ 0x01, 0x78, 0x97, 0x00, 0x96, 0x49, 0x8b, 0x00, 0x96, 0x51, 0x87, 0x00,
+ 0x96, 0x6b, 0x02, 0x01, 0x7c, 0x91, 0x00, 0x96, 0x72, 0x02, 0x01, 0x80,
+ 0x0a, 0xc2, 0x01, 0x84, 0x83, 0x00, 0x9a, 0x83, 0x02, 0x01, 0xa7, 0x97,
+ 0x00, 0x9a, 0x89, 0x8b, 0x00, 0x9a, 0x91, 0x87, 0x00, 0x9a, 0xab, 0x02,
+ 0x01, 0xab, 0x91, 0x00, 0x9a, 0xb3, 0x02, 0x01, 0xaf, 0x19, 0x42, 0x01,
+ 0xb3, 0x83, 0x00, 0x96, 0xc3, 0x02, 0x01, 0xc2, 0x97, 0x00, 0x96, 0xc9,
+ 0x8b, 0x00, 0x96, 0xd1, 0x87, 0x00, 0x96, 0xeb, 0x02, 0x01, 0xc6, 0x91,
+ 0x00, 0x96, 0xf3, 0x02, 0x01, 0xca, 0xc2, 0x00, 0x4c, 0x00, 0x96, 0xf9,
+ 0x0a, 0xc2, 0x01, 0xce, 0x1c, 0x42, 0x01, 0xee, 0x83, 0x00, 0x97, 0x43,
+ 0x02, 0x02, 0x08, 0x97, 0x00, 0x97, 0x49, 0x8b, 0x00, 0x97, 0x51, 0x87,
+ 0x00, 0x97, 0x6b, 0x02, 0x02, 0x0c, 0x91, 0x00, 0x97, 0x72, 0x02, 0x02,
+ 0x10, 0x83, 0x00, 0x98, 0x03, 0x02, 0x02, 0x14, 0x97, 0x00, 0x98, 0x09,
+ 0x8b, 0x00, 0x98, 0x11, 0x87, 0x00, 0x98, 0x2b, 0x02, 0x02, 0x18, 0x91,
+ 0x00, 0x98, 0x33, 0x02, 0x02, 0x1c, 0xc2, 0x00, 0x4c, 0x00, 0x98, 0x38,
+ 0x83, 0x00, 0x9a, 0x43, 0x02, 0x02, 0x20, 0x97, 0x00, 0x9a, 0x49, 0x8b,
+ 0x00, 0x9a, 0x51, 0x87, 0x00, 0x9a, 0x6b, 0x02, 0x02, 0x24, 0x91, 0x00,
+ 0x9a, 0x71, 0x19, 0xc2, 0x02, 0x28, 0xc2, 0x1a, 0x36, 0x00, 0x9a, 0xd0,
+ 0x4b, 0x64, 0x0b, 0xc2, 0x02, 0x37, 0xd1, 0x37, 0x97, 0x00, 0x9a, 0xf0,
+ 0xc9, 0x4f, 0xa1, 0x00, 0x9b, 0xe0, 0x96, 0x00, 0x9c, 0x89, 0xc7, 0xcc,
+ 0x8b, 0x00, 0x9c, 0xc0, 0x48, 0x70, 0xd2, 0xc2, 0x02, 0x43, 0x45, 0x01,
+ 0xac, 0x42, 0x02, 0x4f, 0xc5, 0x00, 0x62, 0x01, 0x18, 0x09, 0xc5, 0xde,
+ 0x91, 0x0f, 0xa9, 0x31, 0xc4, 0xe8, 0xb3, 0x0f, 0xa8, 0x61, 0xca, 0xa7,
+ 0x40, 0x0f, 0xa5, 0x08, 0xc2, 0x3c, 0xd1, 0x08, 0x7f, 0xa9, 0xc3, 0x1e,
+ 0x54, 0x08, 0x7f, 0x40, 0xc3, 0x11, 0x40, 0x08, 0x7f, 0xa1, 0x03, 0x42,
+ 0x02, 0x73, 0xc2, 0x01, 0x47, 0x08, 0x7f, 0x38, 0xc4, 0x32, 0xac, 0x08,
+ 0x7f, 0x01, 0xc3, 0x18, 0x7a, 0x08, 0x7f, 0x78, 0x87, 0x08, 0x29, 0x29,
+ 0xc4, 0x33, 0x51, 0x08, 0x29, 0x30, 0xd6, 0x2e, 0x0b, 0x01, 0x39, 0xb9,
+ 0xcd, 0x0f, 0x50, 0x01, 0x39, 0xa9, 0xca, 0x25, 0x5a, 0x01, 0x39, 0xa0,
+ 0xc2, 0x03, 0x73, 0x01, 0x10, 0x71, 0xcb, 0x6e, 0x4b, 0x00, 0x04, 0xb8,
+ 0xcb, 0x8f, 0x6b, 0x00, 0x00, 0x23, 0x02, 0x02, 0x7f, 0xc3, 0x02, 0xff,
+ 0x00, 0x00, 0x18, 0x43, 0x07, 0x52, 0xc2, 0x02, 0x85, 0xcd, 0x79, 0x39,
+ 0x01, 0x12, 0xe8, 0x00, 0x42, 0x02, 0x9d, 0xcb, 0x91, 0x91, 0x0f, 0x9f,
+ 0x21, 0xc6, 0xd3, 0xa0, 0x0f, 0x9f, 0x80, 0xc4, 0x15, 0xa7, 0x08, 0xed,
+ 0x39, 0xc2, 0x22, 0x45, 0x08, 0xed, 0x30, 0xc3, 0x0d, 0x8f, 0x08, 0xed,
+ 0x29, 0xc3, 0x08, 0xde, 0x08, 0xed, 0x20, 0xc4, 0x05, 0xde, 0x08, 0xed,
+ 0x19, 0xc2, 0x0a, 0x20, 0x08, 0xed, 0x10, 0x03, 0xc2, 0x02, 0xa7, 0xc2,
+ 0x02, 0x14, 0x08, 0xec, 0x99, 0xc2, 0x01, 0xe6, 0x08, 0xec, 0x81, 0x97,
+ 0x08, 0xec, 0x6b, 0x02, 0x02, 0xb3, 0x8b, 0x08, 0xec, 0x5a, 0x02, 0x02,
+ 0xb7, 0xc2, 0x01, 0x0e, 0x08, 0xec, 0x31, 0x83, 0x08, 0xec, 0x28, 0xc2,
+ 0x07, 0x69, 0x08, 0xec, 0x21, 0x83, 0x08, 0xeb, 0xd0, 0x06, 0xc2, 0x02,
+ 0xbb, 0xc2, 0x01, 0x0e, 0x08, 0xeb, 0xc9, 0x83, 0x08, 0xeb, 0xc0, 0xc2,
+ 0x01, 0x0e, 0x08, 0xec, 0x09, 0x83, 0x08, 0xec, 0x00, 0xc2, 0x00, 0x96,
+ 0x08, 0xeb, 0xf9, 0x83, 0x08, 0xeb, 0xa8, 0x16, 0xc2, 0x02, 0xc5, 0xc2,
+ 0x01, 0x0e, 0x08, 0xeb, 0xa1, 0x83, 0x08, 0xeb, 0x98, 0xc2, 0x01, 0x0e,
+ 0x08, 0xeb, 0xe1, 0x83, 0x08, 0xeb, 0xd8, 0xc2, 0x01, 0x0e, 0x08, 0xeb,
+ 0xb9, 0x83, 0x08, 0xeb, 0xb0, 0xc2, 0x01, 0x0e, 0x08, 0xeb, 0x91, 0x83,
+ 0x08, 0xeb, 0x88, 0xc2, 0x01, 0x0e, 0x08, 0xeb, 0x79, 0x83, 0x08, 0xeb,
+ 0x70, 0x97, 0x08, 0xeb, 0x59, 0x8b, 0x08, 0xeb, 0x41, 0x83, 0x08, 0xeb,
+ 0x08, 0x97, 0x08, 0xeb, 0x28, 0x8b, 0x08, 0xeb, 0x18, 0xc5, 0x45, 0xcf,
+ 0x00, 0x50, 0x19, 0xc4, 0x21, 0x28, 0x00, 0x52, 0x68, 0x83, 0x00, 0x50,
+ 0x31, 0x8b, 0x00, 0x50, 0x81, 0x97, 0x00, 0x50, 0xa0, 0x8b, 0x00, 0x50,
+ 0x40, 0x97, 0x00, 0x50, 0x50, 0x83, 0x00, 0x50, 0xa9, 0x0a, 0x42, 0x02,
+ 0xcf, 0x83, 0x00, 0x50, 0xb9, 0x0a, 0x42, 0x02, 0xd9, 0xc2, 0x07, 0x69,
+ 0x00, 0x50, 0xc9, 0xc2, 0x1a, 0x36, 0x00, 0x50, 0xf1, 0xc2, 0x01, 0x01,
+ 0x00, 0x51, 0x19, 0x83, 0x00, 0x51, 0x40, 0x83, 0x00, 0x50, 0xd1, 0xc2,
+ 0x01, 0x0e, 0x00, 0x50, 0xd8, 0x83, 0x00, 0x50, 0xe1, 0xc2, 0x01, 0x0e,
+ 0x00, 0x50, 0xe8, 0x16, 0xc2, 0x02, 0xe3, 0x83, 0x00, 0x51, 0x21, 0xc2,
+ 0x01, 0x0e, 0x00, 0x51, 0x28, 0x06, 0xc2, 0x02, 0xed, 0x83, 0x00, 0x51,
+ 0x31, 0xc2, 0x01, 0x0e, 0x00, 0x51, 0x38, 0x83, 0x00, 0x51, 0x51, 0xc2,
+ 0x01, 0x0e, 0x00, 0x51, 0x58, 0x83, 0x00, 0x51, 0x61, 0xc2, 0x01, 0x0e,
+ 0x00, 0x51, 0x68, 0x83, 0x00, 0x51, 0x81, 0xc2, 0x00, 0x9a, 0x00, 0x52,
+ 0xe0, 0x83, 0x00, 0x51, 0x91, 0xc2, 0x00, 0x96, 0x00, 0x51, 0x98, 0xc2,
+ 0x01, 0x0e, 0x00, 0x51, 0xb1, 0x83, 0x00, 0x51, 0xc0, 0x83, 0x00, 0x51,
+ 0xf1, 0x8b, 0x00, 0x52, 0x41, 0x97, 0x00, 0x52, 0x60, 0x8b, 0x00, 0x52,
+ 0x00, 0x97, 0x00, 0x52, 0x10, 0xc2, 0x0a, 0x20, 0x00, 0x53, 0x41, 0xc4,
+ 0x05, 0xde, 0x00, 0x53, 0x48, 0xc3, 0x08, 0xde, 0x00, 0x53, 0x51, 0xc3,
+ 0x0d, 0x8f, 0x00, 0x53, 0x58, 0xc2, 0x22, 0x45, 0x00, 0x53, 0x61, 0xc4,
+ 0x15, 0xa7, 0x00, 0x53, 0x68, 0xca, 0x21, 0x1b, 0x00, 0x54, 0x09, 0xd1,
+ 0x33, 0x14, 0x00, 0x57, 0xf0, 0xc7, 0x10, 0xac, 0x00, 0x54, 0x11, 0xc7,
+ 0x7d, 0xf8, 0x00, 0x55, 0xe8, 0xc5, 0x45, 0xcf, 0x00, 0x54, 0x19, 0xc4,
+ 0x21, 0x28, 0x00, 0x56, 0x68, 0xc4, 0xdd, 0x8e, 0x00, 0x57, 0xd1, 0xc5,
+ 0xde, 0x2d, 0x00, 0x57, 0xd8, 0xd4, 0x39, 0x7e, 0x00, 0x57, 0xe9, 0xd5,
+ 0x33, 0x10, 0x00, 0x57, 0xf8, 0x83, 0x00, 0x54, 0x31, 0x8b, 0x00, 0x54,
+ 0x81, 0x97, 0x00, 0x54, 0xa0, 0x8b, 0x00, 0x54, 0x40, 0x97, 0x00, 0x54,
+ 0x50, 0x47, 0xb7, 0xd8, 0xc2, 0x02, 0xf7, 0x83, 0x00, 0x55, 0xa8, 0x83,
+ 0x00, 0x54, 0xa9, 0xc2, 0x01, 0x0e, 0x00, 0x54, 0xb0, 0x83, 0x00, 0x54,
+ 0xb9, 0xc2, 0x01, 0x0e, 0x00, 0x54, 0xc0, 0xc2, 0x07, 0x69, 0x00, 0x54,
+ 0xc9, 0xc2, 0x1a, 0x36, 0x00, 0x54, 0xf1, 0xc2, 0x01, 0x01, 0x00, 0x55,
+ 0x19, 0x83, 0x00, 0x55, 0x40, 0x83, 0x00, 0x54, 0xd1, 0xc2, 0x01, 0x0e,
+ 0x00, 0x54, 0xd8, 0x83, 0x00, 0x54, 0xe1, 0xc2, 0x01, 0x0e, 0x00, 0x54,
+ 0xe8, 0x16, 0xc2, 0x03, 0x05, 0x83, 0x00, 0x55, 0x21, 0xc2, 0x01, 0x0e,
+ 0x00, 0x55, 0x28, 0x06, 0xc2, 0x03, 0x0f, 0x83, 0x00, 0x55, 0x31, 0xc2,
+ 0x01, 0x0e, 0x00, 0x55, 0x38, 0x83, 0x00, 0x55, 0x51, 0xc2, 0x01, 0x0e,
+ 0x00, 0x55, 0x58, 0x83, 0x00, 0x55, 0x61, 0xc2, 0x01, 0x0e, 0x00, 0x55,
+ 0x68, 0x83, 0x00, 0x55, 0x91, 0xc2, 0x00, 0x96, 0x00, 0x55, 0x98, 0xc2,
+ 0x01, 0x0e, 0x00, 0x55, 0xb1, 0xc2, 0x0e, 0xe5, 0x00, 0x55, 0xb9, 0x83,
+ 0x00, 0x55, 0xc0, 0x87, 0x00, 0x54, 0x69, 0x91, 0x00, 0x54, 0x88, 0x03,
+ 0xc2, 0x03, 0x19, 0x8b, 0x00, 0x55, 0xfb, 0x02, 0x03, 0x25, 0x97, 0x00,
+ 0x56, 0x0b, 0x02, 0x03, 0x29, 0x48, 0xb7, 0xd7, 0xc2, 0x03, 0x2d, 0x47,
+ 0xa4, 0x92, 0xc2, 0x03, 0x3b, 0x87, 0x00, 0x56, 0x39, 0x91, 0x00, 0x56,
+ 0x58, 0xc2, 0x0a, 0x20, 0x00, 0x57, 0x41, 0xc4, 0x05, 0xde, 0x00, 0x57,
+ 0x48, 0xc3, 0x08, 0xde, 0x00, 0x57, 0x51, 0xc3, 0x0d, 0x8f, 0x00, 0x57,
+ 0x58, 0xc2, 0x22, 0x45, 0x00, 0x57, 0x61, 0xc4, 0x15, 0xa7, 0x00, 0x57,
+ 0x68, 0xc2, 0x0d, 0x8b, 0x08, 0x1a, 0x09, 0xc8, 0x0d, 0x7e, 0x08, 0x1a,
+ 0x50, 0x0f, 0xc2, 0x03, 0x43, 0x42, 0x01, 0x5b, 0xc2, 0x03, 0x4f, 0x18,
+ 0xc2, 0x03, 0x5b, 0x06, 0xc2, 0x03, 0x67, 0x11, 0xc2, 0x03, 0x7c, 0x48,
+ 0x01, 0xf7, 0xc2, 0x03, 0x94, 0x15, 0xc2, 0x03, 0xb0, 0x12, 0xc2, 0x03,
+ 0xc8, 0x0d, 0xc2, 0x03, 0xe9, 0x0e, 0xc2, 0x03, 0xf9, 0xcc, 0x53, 0xb0,
+ 0x00, 0x1b, 0xa1, 0x1b, 0xc2, 0x04, 0x11, 0xcd, 0x2d, 0xa6, 0x00, 0x1b,
+ 0xf1, 0x16, 0xc2, 0x04, 0x1d, 0x03, 0xc2, 0x04, 0x39, 0xcb, 0x99, 0xfd,
+ 0x00, 0x1e, 0x81, 0x14, 0xc2, 0x04, 0x49, 0x08, 0xc2, 0x04, 0x55, 0xcb,
+ 0x9a, 0x97, 0x08, 0x0c, 0x29, 0xcb, 0x90, 0x89, 0x08, 0x0c, 0x41, 0xc9,
+ 0xaf, 0xa6, 0x08, 0x0c, 0x51, 0x4d, 0x78, 0x42, 0x42, 0x04, 0x61, 0xc4,
+ 0xe5, 0xb7, 0x0f, 0xa6, 0xb9, 0xc5, 0x19, 0x74, 0x0f, 0xa4, 0xd1, 0xc5,
+ 0xdc, 0xe8, 0x0f, 0x9a, 0x79, 0xc5, 0xe2, 0x74, 0x0f, 0xca, 0xb8, 0x4a,
+ 0x37, 0x16, 0xc2, 0x04, 0x73, 0xcf, 0x6b, 0x40, 0x01, 0x55, 0x28, 0xc3,
+ 0x05, 0xe3, 0x01, 0x16, 0xb9, 0xcd, 0x76, 0xbc, 0x01, 0x53, 0xd1, 0xd3,
+ 0x46, 0xa5, 0x01, 0x53, 0xe0, 0x42, 0x00, 0xb2, 0xc2, 0x04, 0x7f, 0x43,
+ 0x00, 0x29, 0x42, 0x04, 0x9a, 0x45, 0x03, 0x51, 0xc2, 0x04, 0xa6, 0x43,
+ 0x00, 0x3b, 0x42, 0x04, 0xb8, 0xd4, 0x08, 0x53, 0x01, 0x55, 0x48, 0x48,
+ 0xb7, 0xd7, 0xc2, 0x04, 0xc4, 0x03, 0xc2, 0x04, 0xd2, 0xc2, 0x02, 0x14,
+ 0x08, 0x9a, 0x59, 0xc2, 0x01, 0xe6, 0x08, 0x9a, 0x39, 0x97, 0x08, 0x9a,
+ 0x0b, 0x02, 0x04, 0xde, 0x8b, 0x08, 0x99, 0xfa, 0x02, 0x04, 0xe2, 0x18,
+ 0xc2, 0x04, 0xe6, 0xc2, 0x01, 0x0e, 0x08, 0x99, 0xc9, 0x15, 0xc2, 0x04,
+ 0xf6, 0x0e, 0xc2, 0x05, 0x06, 0xc2, 0x00, 0x9a, 0x08, 0x99, 0x81, 0xc2,
+ 0x1a, 0x36, 0x08, 0x99, 0x79, 0xc2, 0x00, 0x3f, 0x08, 0x99, 0x71, 0x04,
+ 0xc2, 0x05, 0x10, 0x12, 0xc2, 0x05, 0x1a, 0x06, 0xc2, 0x05, 0x24, 0x16,
+ 0xc2, 0x05, 0x32, 0x10, 0xc2, 0x05, 0x40, 0x0c, 0xc2, 0x05, 0x56, 0x05,
+ 0xc2, 0x05, 0x60, 0x09, 0xc2, 0x05, 0x6a, 0x0d, 0xc2, 0x05, 0x74, 0x83,
+ 0x08, 0x98, 0x2b, 0x02, 0x05, 0x7e, 0xc2, 0x02, 0x14, 0x08, 0x98, 0x99,
+ 0x97, 0x08, 0x98, 0x4b, 0x02, 0x05, 0x8a, 0x8b, 0x08, 0x98, 0x3b, 0x02,
+ 0x05, 0x8e, 0xc2, 0x01, 0xe6, 0x08, 0x98, 0x78, 0xc5, 0xdb, 0x03, 0x08,
+ 0x9a, 0xe9, 0x42, 0x02, 0x52, 0xc2, 0x05, 0x92, 0x03, 0xc2, 0x05, 0x9e,
+ 0xc5, 0x33, 0x1a, 0x08, 0x99, 0xe1, 0x05, 0x42, 0x05, 0xaa, 0x46, 0x01,
+ 0xab, 0x42, 0x05, 0xb6, 0xc5, 0x09, 0x02, 0x01, 0x12, 0x89, 0xca, 0x37,
+ 0x20, 0x01, 0x12, 0x70, 0x42, 0x00, 0x96, 0xc2, 0x05, 0xc0, 0x0a, 0xc2,
+ 0x05, 0xca, 0x03, 0xc2, 0x05, 0xde, 0x16, 0xc2, 0x05, 0xee, 0x07, 0xc2,
+ 0x05, 0xf8, 0xc2, 0x16, 0x0a, 0x00, 0xe5, 0xb9, 0xc2, 0x02, 0xf8, 0x00,
+ 0xe5, 0xb1, 0xc2, 0x00, 0xc9, 0x00, 0xe5, 0x99, 0x0c, 0xc2, 0x06, 0x02,
+ 0xc3, 0x23, 0xe3, 0x00, 0xe5, 0x71, 0x05, 0xc2, 0x06, 0x0e, 0x15, 0xc2,
+ 0x06, 0x1e, 0xc3, 0xec, 0xed, 0x00, 0xe5, 0x39, 0x09, 0xc2, 0x06, 0x2a,
+ 0x0d, 0xc2, 0x06, 0x36, 0x12, 0xc2, 0x06, 0x42, 0xc2, 0x0b, 0xfd, 0x00,
+ 0xe5, 0x19, 0xc3, 0x87, 0xeb, 0x00, 0xe5, 0x01, 0x1c, 0xc2, 0x06, 0x4e,
+ 0xc2, 0x00, 0xe5, 0x00, 0xe4, 0xe9, 0xc3, 0x2a, 0x55, 0x00, 0xe4, 0xe1,
+ 0xc3, 0x14, 0xe9, 0x00, 0xe4, 0xd9, 0xc2, 0x01, 0x5b, 0x00, 0xe4, 0xc1,
+ 0xc3, 0x22, 0x5f, 0x00, 0xe4, 0xa9, 0xc3, 0x9b, 0x7e, 0x00, 0xe4, 0x99,
+ 0xc3, 0x11, 0x26, 0x00, 0xe4, 0x88, 0x03, 0xc2, 0x06, 0x5a, 0xc3, 0x11,
+ 0x26, 0x00, 0x85, 0x09, 0x09, 0xc2, 0x06, 0x64, 0xc3, 0x9b, 0x7e, 0x00,
+ 0x85, 0x19, 0xc2, 0x01, 0x04, 0x00, 0x85, 0x21, 0xc3, 0x22, 0x5f, 0x00,
+ 0x85, 0x29, 0x1c, 0xc2, 0x06, 0x70, 0x42, 0x00, 0x44, 0xc2, 0x06, 0x7c,
+ 0xc2, 0x01, 0x5b, 0x00, 0x85, 0x41, 0x0d, 0xc2, 0x06, 0x84, 0xc3, 0x02,
+ 0xe3, 0x00, 0x85, 0x51, 0xc3, 0x14, 0xe9, 0x00, 0x85, 0x59, 0xc3, 0x2a,
+ 0x55, 0x00, 0x85, 0x61, 0xc2, 0x00, 0xe5, 0x00, 0x85, 0x69, 0x12, 0xc2,
+ 0x06, 0x90, 0xc3, 0x87, 0xeb, 0x00, 0x85, 0x81, 0x15, 0xc2, 0x06, 0x9c,
+ 0xc2, 0x0b, 0xfd, 0x00, 0x85, 0x99, 0xc3, 0xec, 0xed, 0x00, 0x85, 0xb9,
+ 0x05, 0xc2, 0x06, 0xa8, 0x0c, 0xc2, 0x06, 0xb8, 0xc3, 0x23, 0xe3, 0x00,
+ 0x85, 0xf1, 0x0a, 0xc2, 0x06, 0xc4, 0xc2, 0x00, 0xc9, 0x00, 0x86, 0x19,
+ 0xc2, 0x16, 0x0a, 0x00, 0x86, 0x38, 0x03, 0xc2, 0x06, 0xd8, 0xc3, 0x11,
+ 0x26, 0x00, 0x86, 0x89, 0x09, 0xc2, 0x06, 0xe8, 0xc3, 0x9b, 0x7e, 0x00,
+ 0x86, 0x99, 0x07, 0xc2, 0x06, 0xf4, 0xc3, 0x22, 0x5f, 0x00, 0x86, 0xa9,
+ 0x1c, 0xc2, 0x06, 0xfe, 0x16, 0xc2, 0x07, 0x0a, 0xc2, 0x01, 0x5b, 0x00,
+ 0x86, 0xc1, 0x0d, 0xc2, 0x07, 0x14, 0x42, 0x00, 0x96, 0xc2, 0x07, 0x20,
+ 0xc3, 0x14, 0xe9, 0x00, 0x86, 0xd9, 0xc3, 0x2a, 0x55, 0x00, 0x86, 0xe1,
+ 0xc2, 0x00, 0xe5, 0x00, 0x86, 0xe9, 0x12, 0xc2, 0x07, 0x2a, 0xc3, 0x87,
+ 0xeb, 0x00, 0x87, 0x01, 0x15, 0xc2, 0x07, 0x36, 0xc2, 0x0b, 0xfd, 0x00,
+ 0x87, 0x19, 0xc3, 0xec, 0xed, 0x00, 0x87, 0x39, 0x05, 0xc2, 0x07, 0x42,
+ 0x0c, 0xc2, 0x07, 0x52, 0xc3, 0x23, 0xe3, 0x00, 0x87, 0x71, 0x0a, 0xc2,
+ 0x07, 0x5e, 0xc2, 0x00, 0xc9, 0x00, 0x87, 0x99, 0xc2, 0x02, 0xf8, 0x00,
+ 0x87, 0xb1, 0xc2, 0x16, 0x0a, 0x00, 0x87, 0xb8, 0x03, 0xc2, 0x07, 0x72,
+ 0xc3, 0x11, 0x26, 0x01, 0x68, 0x09, 0x09, 0xc2, 0x07, 0x7c, 0xc3, 0x9b,
+ 0x7e, 0x01, 0x68, 0x19, 0xc2, 0x01, 0x04, 0x01, 0x68, 0x21, 0xc3, 0x22,
+ 0x5f, 0x01, 0x68, 0x29, 0x1c, 0xc2, 0x07, 0x88, 0x42, 0x00, 0x44, 0xc2,
+ 0x07, 0x94, 0xc2, 0x01, 0x5b, 0x01, 0x68, 0x41, 0x0d, 0xc2, 0x07, 0x9c,
+ 0xc3, 0x02, 0xe3, 0x01, 0x68, 0x51, 0xc3, 0x14, 0xe9, 0x01, 0x68, 0x59,
+ 0xc3, 0x2a, 0x55, 0x01, 0x68, 0x61, 0xc2, 0x00, 0xe5, 0x01, 0x68, 0x69,
+ 0x12, 0xc2, 0x07, 0xa8, 0xc3, 0x87, 0xeb, 0x01, 0x68, 0x81, 0x15, 0xc2,
+ 0x07, 0xb4, 0xc2, 0x0b, 0xfd, 0x01, 0x68, 0x99, 0xc3, 0xec, 0xed, 0x01,
+ 0x68, 0xb9, 0x05, 0xc2, 0x07, 0xc0, 0x0c, 0xc2, 0x07, 0xd0, 0xc3, 0x23,
+ 0xe3, 0x01, 0x68, 0xf1, 0x0a, 0xc2, 0x07, 0xdc, 0xc2, 0x00, 0xc9, 0x01,
+ 0x69, 0x19, 0xc2, 0x16, 0x0a, 0x01, 0x69, 0x38, 0xc3, 0xeb, 0x64, 0x01,
+ 0x60, 0x01, 0x04, 0xc2, 0x07, 0xf0, 0xc4, 0xe4, 0xdb, 0x01, 0x60, 0x11,
+ 0xc7, 0xc9, 0x90, 0x01, 0x60, 0x19, 0x06, 0xc2, 0x07, 0xfc, 0x1b, 0xc2,
+ 0x08, 0x0e, 0x1c, 0xc2, 0x08, 0x20, 0x8b, 0x01, 0x60, 0x5b, 0x02, 0x08,
+ 0x2c, 0xc4, 0xe5, 0x5b, 0x01, 0x60, 0x69, 0x0e, 0xc2, 0x08, 0x3e, 0xc7,
+ 0x62, 0x51, 0x01, 0x60, 0x79, 0xc5, 0xe0, 0x6c, 0x01, 0x60, 0x81, 0x11,
+ 0xc2, 0x08, 0x4a, 0x12, 0xc2, 0x08, 0x56, 0xc5, 0xda, 0x6d, 0x01, 0x60,
+ 0x99, 0x15, 0xc2, 0x08, 0x60, 0x16, 0xc2, 0x08, 0x79, 0xc3, 0xca, 0xfd,
+ 0x01, 0x60, 0xb1, 0x08, 0xc2, 0x08, 0x8b, 0xc4, 0xe6, 0xf3, 0x01, 0x60,
+ 0xc1, 0x05, 0x42, 0x08, 0x97, 0xc3, 0xeb, 0x64, 0x01, 0x61, 0x81, 0x04,
+ 0xc2, 0x08, 0xa3, 0xc4, 0xe4, 0xdb, 0x01, 0x61, 0x91, 0xc7, 0xc9, 0x90,
+ 0x01, 0x61, 0x99, 0x06, 0xc2, 0x08, 0xaf, 0x1b, 0xc2, 0x08, 0xc1, 0x1c,
+ 0xc2, 0x08, 0xd3, 0x8b, 0x01, 0x61, 0xdb, 0x02, 0x08, 0xdf, 0xc4, 0xe5,
+ 0x5b, 0x01, 0x61, 0xe9, 0x0e, 0xc2, 0x08, 0xf1, 0xc7, 0x62, 0x51, 0x01,
+ 0x61, 0xf9, 0xc5, 0xe0, 0x6c, 0x01, 0x62, 0x01, 0x11, 0xc2, 0x08, 0xfd,
+ 0x12, 0xc2, 0x09, 0x09, 0xc5, 0xda, 0x6d, 0x01, 0x62, 0x19, 0x15, 0xc2,
+ 0x09, 0x13, 0x16, 0xc2, 0x09, 0x2c, 0xc3, 0xca, 0xfd, 0x01, 0x62, 0x31,
+ 0x08, 0xc2, 0x09, 0x3e, 0xc4, 0xe6, 0xf3, 0x01, 0x62, 0x41, 0x05, 0x42,
+ 0x09, 0x4a, 0xcb, 0x21, 0x1a, 0x00, 0x58, 0x09, 0x03, 0xc2, 0x09, 0x56,
+ 0x42, 0x02, 0x52, 0xc2, 0x09, 0x62, 0xc5, 0x33, 0x1a, 0x00, 0x59, 0xe1,
+ 0xc8, 0x7c, 0x98, 0x00, 0x5a, 0xa8, 0x83, 0x00, 0x58, 0x2b, 0x02, 0x09,
+ 0x6e, 0x8b, 0x00, 0x58, 0x3b, 0x02, 0x09, 0x7a, 0x97, 0x00, 0x58, 0x4b,
+ 0x02, 0x09, 0x7e, 0x18, 0xc2, 0x09, 0x82, 0x87, 0x00, 0x58, 0x79, 0x91,
+ 0x00, 0x58, 0x99, 0x0d, 0xc2, 0x09, 0x8c, 0x09, 0xc2, 0x09, 0x96, 0x10,
+ 0xc2, 0x09, 0xa0, 0x05, 0xc2, 0x09, 0xb6, 0x0c, 0xc2, 0x09, 0xc0, 0x16,
+ 0xc2, 0x09, 0xca, 0x06, 0xc2, 0x09, 0xd8, 0x12, 0xc2, 0x09, 0xe6, 0x04,
+ 0xc2, 0x09, 0xf0, 0xc2, 0x00, 0x3f, 0x00, 0x59, 0x71, 0x1b, 0xc2, 0x09,
+ 0xfa, 0x14, 0xc2, 0x0a, 0x04, 0x0e, 0xc2, 0x0a, 0x14, 0x15, 0xc2, 0x0a,
+ 0x1e, 0xc2, 0x01, 0x0e, 0x00, 0x59, 0xc9, 0xc2, 0x00, 0x4c, 0x00, 0x5b,
+ 0x88, 0x03, 0xc2, 0x0a, 0x2e, 0x8b, 0x00, 0x59, 0xfb, 0x02, 0x0a, 0x3a,
+ 0x97, 0x00, 0x5a, 0x0b, 0x02, 0x0a, 0x3e, 0x48, 0xb7, 0xd7, 0xc2, 0x0a,
+ 0x42, 0x87, 0x00, 0x5a, 0x39, 0x91, 0x00, 0x5a, 0x58, 0xcd, 0x71, 0x59,
+ 0x00, 0x5a, 0xb1, 0xcd, 0x74, 0xe7, 0x00, 0x5a, 0xb8, 0xc4, 0x16, 0x57,
+ 0x00, 0x5b, 0x31, 0xc3, 0x05, 0x17, 0x00, 0x5b, 0x39, 0x16, 0xc2, 0x0a,
+ 0x50, 0x08, 0xc2, 0x0a, 0x5c, 0x15, 0xc2, 0x0a, 0x68, 0xc5, 0x05, 0x1b,
+ 0x00, 0x5b, 0x71, 0xc4, 0x24, 0x35, 0x00, 0x5b, 0x78, 0x44, 0x05, 0x17,
+ 0xc2, 0x0a, 0x74, 0x46, 0x05, 0xdd, 0x42, 0x0a, 0x8c, 0x0a, 0xc2, 0x0a,
+ 0x98, 0x19, 0xc2, 0x0a, 0xaa, 0xc2, 0x01, 0x04, 0x0f, 0x68, 0x52, 0x02,
+ 0x0a, 0xba, 0x11, 0xc2, 0x0a, 0xc0, 0x0b, 0x42, 0x0a, 0xd2, 0x00, 0x42,
+ 0x0a, 0xe4, 0xc2, 0x22, 0x45, 0x0f, 0x68, 0x33, 0x02, 0x0a, 0xf0, 0xc4,
+ 0x15, 0xa7, 0x0f, 0x68, 0x3a, 0x02, 0x0a, 0xfd, 0x9b, 0x0f, 0x68, 0x8b,
+ 0x02, 0x0b, 0x0a, 0x00, 0x42, 0x0b, 0x10, 0xc2, 0x0d, 0x8b, 0x0f, 0x68,
+ 0x93, 0x02, 0x0b, 0x1c, 0x00, 0x42, 0x0b, 0x22, 0xc2, 0x0a, 0x20, 0x0f,
+ 0x69, 0x7b, 0x02, 0x0b, 0x2e, 0xc4, 0x05, 0xde, 0x0f, 0x69, 0x81, 0xc2,
+ 0x01, 0x04, 0x0f, 0x69, 0xba, 0x02, 0x0b, 0x34, 0xc3, 0x08, 0xde, 0x0f,
+ 0x69, 0x8b, 0x02, 0x0b, 0x3a, 0xc3, 0x0d, 0x8f, 0x0f, 0x69, 0x90, 0xc2,
+ 0x22, 0x45, 0x0f, 0x69, 0x9b, 0x02, 0x0b, 0x40, 0xc4, 0x15, 0xa7, 0x0f,
+ 0x69, 0xa0, 0xc6, 0x2d, 0xdf, 0x01, 0x3e, 0x21, 0xc4, 0x0d, 0xd3, 0x01,
+ 0x3e, 0x18, 0xd8, 0x24, 0xec, 0x01, 0x39, 0xe1, 0xc8, 0x0a, 0x5f, 0x01,
+ 0x39, 0x91, 0xca, 0x25, 0x5a, 0x01, 0x39, 0x59, 0xc5, 0x0c, 0xa3, 0x01,
+ 0x38, 0xd8, 0x9a, 0x01, 0x21, 0x19, 0xc2, 0x03, 0x84, 0x0f, 0xa6, 0xb0,
+ 0xc5, 0x5a, 0x95, 0x0f, 0xae, 0x09, 0xca, 0xa1, 0x00, 0x0f, 0xa6, 0x10,
+ 0xcc, 0x87, 0xd8, 0x0f, 0xa7, 0x69, 0xcb, 0x8f, 0xb8, 0x0f, 0xa7, 0x60,
+ 0xce, 0x1b, 0x7a, 0x0b, 0x7f, 0x19, 0xc9, 0xab, 0xe3, 0x0b, 0x7f, 0x10,
+ 0x4c, 0x11, 0x33, 0xc2, 0x0b, 0x46, 0x4a, 0x57, 0x1f, 0xc2, 0x0b, 0x58,
+ 0x47, 0x01, 0xff, 0x42, 0x0b, 0x64, 0x46, 0xc7, 0xa0, 0xc2, 0x0b, 0xba,
+ 0x4c, 0x8d, 0xa8, 0x42, 0x0b, 0xca, 0x47, 0x37, 0x49, 0xc2, 0x0b, 0xd6,
+ 0x4d, 0x27, 0x71, 0xc2, 0x0b, 0xeb, 0x4f, 0x01, 0xf7, 0x42, 0x0c, 0x26,
+ 0x47, 0xc5, 0xa0, 0xc2, 0x0c, 0x61, 0x48, 0xc1, 0xeb, 0x42, 0x0c, 0x80,
+ 0x47, 0x37, 0x49, 0xc2, 0x0c, 0x99, 0x47, 0x01, 0xff, 0x42, 0x0c, 0xa3,
+ 0x15, 0xc2, 0x0d, 0x03, 0x4b, 0x54, 0xd2, 0x42, 0x0d, 0x0f, 0x47, 0x01,
+ 0xff, 0xc2, 0x0d, 0x82, 0x48, 0x53, 0xb0, 0x42, 0x0d, 0xdf, 0xcd, 0x7e,
+ 0x4d, 0x00, 0xe3, 0xf9, 0xc6, 0x7e, 0x54, 0x00, 0xe3, 0xf0, 0x8a, 0x00,
0xe3, 0xb9, 0x98, 0x00, 0xe3, 0xb1, 0x84, 0x00, 0xe3, 0xa9, 0xc2, 0x02,
- 0x92, 0x00, 0xe3, 0xa0, 0x91, 0x00, 0xe3, 0x99, 0x87, 0x00, 0xe3, 0x71,
+ 0x01, 0x00, 0xe3, 0xa0, 0x91, 0x00, 0xe3, 0x99, 0x87, 0x00, 0xe3, 0x71,
0x97, 0x00, 0xe3, 0x49, 0x8b, 0x00, 0xe3, 0x21, 0x83, 0x00, 0xe2, 0xd2,
- 0x02, 0x0e, 0xc5, 0xc2, 0x01, 0x10, 0x00, 0xe3, 0x91, 0x90, 0x00, 0xe3,
- 0x89, 0xc2, 0x00, 0x52, 0x00, 0xe3, 0x81, 0x92, 0x00, 0xe3, 0x78, 0x9b,
- 0x00, 0xe3, 0x69, 0xc2, 0x1c, 0x13, 0x00, 0xe3, 0x61, 0x86, 0x00, 0xe3,
- 0x59, 0x85, 0x00, 0xe3, 0x50, 0x94, 0x00, 0xe3, 0x41, 0xc2, 0x15, 0x1c,
+ 0x02, 0x0d, 0xf1, 0xc2, 0x00, 0x63, 0x00, 0xe3, 0x91, 0x90, 0x00, 0xe3,
+ 0x89, 0xc2, 0x01, 0x8e, 0x00, 0xe3, 0x81, 0x92, 0x00, 0xe3, 0x78, 0x9b,
+ 0x00, 0xe3, 0x69, 0xc2, 0x1c, 0x5e, 0x00, 0xe3, 0x61, 0x86, 0x00, 0xe3,
+ 0x59, 0x85, 0x00, 0xe3, 0x50, 0x94, 0x00, 0xe3, 0x41, 0xc2, 0x18, 0x79,
0x00, 0xe3, 0x39, 0x8a, 0x00, 0xe3, 0x31, 0x95, 0x00, 0xe3, 0x28, 0x03,
- 0xc2, 0x0e, 0xc9, 0x8e, 0x00, 0xe2, 0xf1, 0xc2, 0x00, 0x35, 0x00, 0xe2,
- 0xe9, 0x89, 0x00, 0xe2, 0xe1, 0x96, 0x00, 0xe2, 0xd8, 0xc4, 0x18, 0x83,
- 0x00, 0xe2, 0xb9, 0xc2, 0x26, 0x51, 0x00, 0xe2, 0xb0, 0xc3, 0x0c, 0x5b,
- 0x00, 0xe2, 0xa9, 0xc3, 0x06, 0x9e, 0x00, 0xe2, 0xa0, 0xc4, 0x04, 0x5e,
- 0x00, 0xe2, 0x99, 0xc2, 0x01, 0x47, 0x00, 0xe2, 0x90, 0x46, 0x02, 0x31,
- 0xc2, 0x0e, 0xd9, 0xcd, 0x52, 0xca, 0x01, 0x5d, 0xe0, 0xc9, 0xa9, 0x46,
- 0x00, 0xb4, 0xc9, 0xc5, 0xde, 0x85, 0x00, 0xb4, 0xa9, 0xc5, 0xcc, 0xbc,
- 0x00, 0xb4, 0x98, 0xc3, 0x00, 0x60, 0x00, 0xb4, 0xc1, 0xc6, 0xcc, 0xbb,
- 0x00, 0xb4, 0xa0, 0xc7, 0xc5, 0xd9, 0x00, 0xb4, 0xb9, 0x94, 0x00, 0xb4,
- 0x91, 0xc3, 0x07, 0x87, 0x00, 0xb4, 0x30, 0x94, 0x00, 0xb4, 0xb1, 0xc2,
- 0x1c, 0x13, 0x00, 0xb4, 0x88, 0xc5, 0xd9, 0x49, 0x00, 0xb4, 0x71, 0xc3,
- 0x12, 0xec, 0x00, 0xb4, 0x20, 0xc6, 0xcf, 0x49, 0x00, 0xb4, 0x69, 0xc3,
- 0x00, 0xb6, 0x00, 0xb4, 0x28, 0xc4, 0xe4, 0x57, 0x00, 0xb4, 0x51, 0xc3,
- 0x3a, 0xbf, 0x00, 0xb4, 0x48, 0xc3, 0x01, 0x1e, 0x08, 0x24, 0x01, 0x83,
- 0x08, 0x24, 0xd8, 0xc2, 0x00, 0xa4, 0x08, 0x24, 0x29, 0xc3, 0xbb, 0xff,
- 0x08, 0x24, 0x78, 0xc3, 0x0e, 0xa4, 0x08, 0x24, 0x31, 0xc2, 0x00, 0xa4,
- 0x08, 0x24, 0x50, 0x83, 0x08, 0x24, 0x41, 0xc4, 0xe1, 0x1f, 0x08, 0x24,
- 0x48, 0x87, 0x08, 0x24, 0xe0, 0x91, 0x08, 0x24, 0xe8, 0xc2, 0x01, 0x47,
- 0x08, 0x25, 0x11, 0xc4, 0x04, 0x5e, 0x08, 0x25, 0x18, 0xc3, 0x06, 0x9e,
- 0x08, 0x25, 0x21, 0xc3, 0x0c, 0x5b, 0x08, 0x25, 0x28, 0xc2, 0x26, 0x51,
- 0x08, 0x25, 0x31, 0xc4, 0x18, 0x83, 0x08, 0x25, 0x38, 0x8b, 0x08, 0x25,
- 0x8b, 0x02, 0x0e, 0xe5, 0x8a, 0x08, 0x25, 0x98, 0x0a, 0xc2, 0x0e, 0xe9,
- 0xc2, 0x00, 0x34, 0x08, 0x25, 0xc0, 0x83, 0x08, 0x25, 0xc9, 0xc2, 0x1d,
- 0x5f, 0x08, 0x25, 0xd0, 0x83, 0x08, 0x25, 0xe1, 0xc2, 0x1d, 0x5f, 0x08,
- 0x25, 0xf1, 0xc2, 0x00, 0xa4, 0x08, 0x26, 0x80, 0xc2, 0x00, 0x34, 0x08,
- 0x26, 0x18, 0x83, 0x08, 0x26, 0x31, 0xc2, 0x00, 0xa4, 0x08, 0x26, 0x38,
- 0x83, 0x08, 0x26, 0x41, 0x15, 0x42, 0x0e, 0xff, 0x83, 0x08, 0x26, 0x91,
- 0xc2, 0x00, 0xa4, 0x08, 0x26, 0x98, 0x8b, 0x08, 0x26, 0xcb, 0x02, 0x0f,
- 0x09, 0x8a, 0x08, 0x26, 0xd8, 0x0a, 0xc2, 0x0f, 0x0d, 0xc2, 0x00, 0x34,
- 0x08, 0x27, 0x00, 0x83, 0x08, 0x27, 0x09, 0xc2, 0x1d, 0x5f, 0x08, 0x27,
- 0x10, 0x83, 0x08, 0x27, 0x21, 0xc2, 0x1d, 0x5f, 0x08, 0x27, 0x31, 0xc2,
- 0x00, 0xa4, 0x08, 0x27, 0xc0, 0xc2, 0x00, 0x34, 0x08, 0x27, 0x58, 0x83,
- 0x08, 0x27, 0x71, 0xc2, 0x00, 0xa4, 0x08, 0x27, 0x78, 0x83, 0x08, 0x27,
- 0x81, 0x15, 0x42, 0x0f, 0x23, 0x83, 0x08, 0x27, 0xd1, 0xc2, 0x00, 0xa4,
- 0x08, 0x27, 0xd8, 0xc2, 0x05, 0xdc, 0x0e, 0x7e, 0x19, 0xc3, 0x9d, 0xab,
- 0x0e, 0x7a, 0xe1, 0xc6, 0xcc, 0xb5, 0x0e, 0x7a, 0x90, 0xc8, 0xc0, 0x55,
- 0x0e, 0x7c, 0x81, 0xc8, 0x93, 0x2b, 0x0e, 0x7b, 0x80, 0xcf, 0x64, 0x70,
- 0x0e, 0x7a, 0xc8, 0xd0, 0x58, 0x82, 0x0e, 0x7b, 0xa9, 0xc6, 0xce, 0xb9,
- 0x0e, 0x7b, 0x68, 0x00, 0x42, 0x0f, 0x2d, 0xc2, 0x23, 0xb6, 0x0e, 0x7c,
- 0x09, 0xc2, 0x05, 0xdc, 0x0e, 0x7a, 0x82, 0x02, 0x0f, 0x3d, 0x45, 0xd6,
- 0xba, 0xc2, 0x0f, 0x43, 0xc4, 0xe3, 0x43, 0x0e, 0x7c, 0x33, 0x02, 0x0f,
- 0x67, 0xc6, 0xcd, 0xf9, 0x0e, 0x7a, 0xb2, 0x02, 0x0f, 0x6b, 0x00, 0x42,
- 0x0f, 0x6f, 0x4d, 0x7f, 0xd2, 0xc2, 0x0f, 0x7b, 0x47, 0x83, 0xe1, 0xc2,
- 0x0f, 0x93, 0x16, 0xc2, 0x0f, 0x9f, 0xc8, 0x4d, 0xc2, 0x0e, 0x7b, 0x91,
- 0xc9, 0xaa, 0x5d, 0x0e, 0x7b, 0x88, 0x47, 0x83, 0xe1, 0xc2, 0x0f, 0xab,
- 0xc7, 0xc5, 0x85, 0x0e, 0x7d, 0x40, 0xc7, 0x2d, 0xf2, 0x0e, 0x7a, 0xe9,
- 0xc6, 0xcd, 0x63, 0x0e, 0x7a, 0xa8, 0xcb, 0x95, 0xbd, 0x0e, 0x7b, 0x51,
- 0xc8, 0x4d, 0xc2, 0x0e, 0x7b, 0x49, 0xc9, 0xaa, 0x5d, 0x0e, 0x7b, 0x41,
- 0xc8, 0xbb, 0x05, 0x0e, 0x7b, 0x38, 0xc8, 0xbd, 0xc5, 0x0e, 0x7b, 0x11,
- 0xc4, 0xcc, 0x93, 0x0e, 0x7a, 0xf8, 0xc4, 0x78, 0x92, 0x0e, 0x7a, 0x03,
- 0x02, 0x0f, 0xbd, 0xc5, 0xdc, 0x78, 0x0e, 0x79, 0x49, 0xc6, 0xd0, 0xb7,
- 0x0e, 0x79, 0x40, 0xca, 0xa0, 0x5e, 0x0e, 0x79, 0xf9, 0xc6, 0xd2, 0xcd,
- 0x0e, 0x79, 0xc2, 0x02, 0x0f, 0xc3, 0xc9, 0xac, 0x9d, 0x0e, 0x79, 0xe9,
- 0xd4, 0x3c, 0x51, 0x0e, 0x79, 0xa0, 0xc5, 0xbc, 0x98, 0x0e, 0x79, 0xe1,
- 0xc6, 0x6e, 0x6b, 0x0e, 0x79, 0x19, 0x45, 0xc4, 0x5c, 0x42, 0x0f, 0xc9,
- 0xce, 0x3b, 0x03, 0x0e, 0x79, 0xd9, 0xc4, 0xe2, 0x23, 0x0e, 0x79, 0x59,
- 0xd3, 0x41, 0xfa, 0x0e, 0x78, 0xd1, 0x49, 0xb4, 0xe9, 0x42, 0x0f, 0xd5,
- 0xc7, 0xc7, 0xc3, 0x0e, 0x79, 0xd1, 0xc7, 0xc5, 0xd2, 0x0e, 0x79, 0xa9,
- 0x90, 0x0e, 0x79, 0x08, 0x06, 0xc2, 0x0f, 0xe1, 0x46, 0x80, 0x00, 0x42,
- 0x0f, 0xf0, 0xc8, 0x3c, 0x5d, 0x0e, 0x79, 0x99, 0x07, 0x42, 0x0f, 0xfa,
- 0xc5, 0xd5, 0x25, 0x0e, 0x79, 0x61, 0xc3, 0xe6, 0xb8, 0x0e, 0x79, 0x10,
- 0xc6, 0xc3, 0x40, 0x0e, 0x78, 0xf9, 0x46, 0xcd, 0xc3, 0x42, 0x10, 0x06,
- 0x15, 0xc2, 0x10, 0x12, 0x43, 0x00, 0xb7, 0x42, 0x10, 0x1e, 0x43, 0x38,
- 0xbd, 0xc2, 0x10, 0x2a, 0x43, 0x00, 0xb7, 0x42, 0x10, 0x36, 0x43, 0x00,
- 0xb7, 0xc2, 0x10, 0x42, 0x4d, 0x78, 0x8f, 0x42, 0x10, 0x4e, 0xc5, 0x44,
- 0x7b, 0x08, 0xd1, 0xc9, 0xc4, 0x0f, 0x7c, 0x08, 0xd1, 0xa0, 0xce, 0x20,
- 0x73, 0x08, 0xd1, 0xb9, 0xc5, 0x1e, 0x1d, 0x08, 0xd1, 0xaa, 0x02, 0x10,
- 0x5a, 0xc2, 0x04, 0x41, 0x08, 0xd1, 0xf1, 0xc2, 0x00, 0xc7, 0x08, 0xd1,
- 0xe9, 0xc2, 0x02, 0x59, 0x08, 0xd1, 0xe1, 0xc2, 0x1d, 0x5f, 0x08, 0xd1,
- 0xd8, 0xc2, 0x00, 0xa4, 0x08, 0xd1, 0x31, 0x83, 0x08, 0xd1, 0x28, 0xc2,
- 0x00, 0xa4, 0x08, 0xd0, 0xb9, 0x83, 0x08, 0xd0, 0xb0, 0xc2, 0x00, 0xa4,
- 0x08, 0xd1, 0x21, 0x83, 0x08, 0xd1, 0x18, 0xc2, 0x00, 0xa4, 0x08, 0xd0,
+ 0xc2, 0x0d, 0xf5, 0x8e, 0x00, 0xe2, 0xf1, 0xc2, 0x00, 0x56, 0x00, 0xe2,
+ 0xe9, 0x89, 0x00, 0xe2, 0xe1, 0x96, 0x00, 0xe2, 0xd8, 0xc4, 0x15, 0xa7,
+ 0x00, 0xe2, 0xb9, 0xc2, 0x22, 0x45, 0x00, 0xe2, 0xb0, 0xc3, 0x0d, 0x8f,
+ 0x00, 0xe2, 0xa9, 0xc3, 0x08, 0xde, 0x00, 0xe2, 0xa0, 0xc4, 0x05, 0xde,
+ 0x00, 0xe2, 0x99, 0xc2, 0x0a, 0x20, 0x00, 0xe2, 0x90, 0x46, 0x01, 0x31,
+ 0xc2, 0x0e, 0x05, 0xcd, 0x50, 0x3b, 0x01, 0x5d, 0xe0, 0xc9, 0xb4, 0xec,
+ 0x00, 0xb4, 0xc9, 0xc5, 0xdc, 0x07, 0x00, 0xb4, 0xa9, 0xc5, 0xd1, 0xc7,
+ 0x00, 0xb4, 0x98, 0xc3, 0x02, 0xf8, 0x00, 0xb4, 0xc1, 0xc6, 0xd1, 0xc6,
+ 0x00, 0xb4, 0xa0, 0xc7, 0xcb, 0xc7, 0x00, 0xb4, 0xb9, 0x94, 0x00, 0xb4,
+ 0x91, 0xc3, 0x06, 0x87, 0x00, 0xb4, 0x30, 0x94, 0x00, 0xb4, 0xb1, 0xc2,
+ 0x1c, 0x5e, 0x00, 0xb4, 0x88, 0xc5, 0xde, 0xbe, 0x00, 0xb4, 0x71, 0xc3,
+ 0x13, 0xc8, 0x00, 0xb4, 0x20, 0xc6, 0xd5, 0xb0, 0x00, 0xb4, 0x69, 0xc3,
+ 0x00, 0xe4, 0x00, 0xb4, 0x28, 0xc4, 0xe6, 0xe3, 0x00, 0xb4, 0x51, 0xc3,
+ 0x1b, 0xba, 0x00, 0xb4, 0x48, 0xc3, 0x00, 0xcd, 0x08, 0x24, 0x01, 0x83,
+ 0x08, 0x24, 0xd8, 0xc2, 0x01, 0x0e, 0x08, 0x24, 0x29, 0xc3, 0x4d, 0x26,
+ 0x08, 0x24, 0x78, 0xc3, 0x0f, 0x55, 0x08, 0x24, 0x31, 0xc2, 0x01, 0x0e,
+ 0x08, 0x24, 0x50, 0x83, 0x08, 0x24, 0x41, 0xc4, 0xe7, 0x5b, 0x08, 0x24,
+ 0x48, 0x87, 0x08, 0x24, 0xe0, 0x91, 0x08, 0x24, 0xe8, 0xc2, 0x0a, 0x20,
+ 0x08, 0x25, 0x11, 0xc4, 0x05, 0xde, 0x08, 0x25, 0x18, 0xc3, 0x08, 0xde,
+ 0x08, 0x25, 0x21, 0xc3, 0x0d, 0x8f, 0x08, 0x25, 0x28, 0xc2, 0x22, 0x45,
+ 0x08, 0x25, 0x31, 0xc4, 0x15, 0xa7, 0x08, 0x25, 0x38, 0x8b, 0x08, 0x25,
+ 0x8b, 0x02, 0x0e, 0x11, 0x8a, 0x08, 0x25, 0x98, 0x0a, 0xc2, 0x0e, 0x15,
+ 0xc2, 0x01, 0x5b, 0x08, 0x25, 0xc0, 0x83, 0x08, 0x25, 0xc9, 0xc2, 0x1a,
+ 0x36, 0x08, 0x25, 0xd0, 0x83, 0x08, 0x25, 0xe1, 0xc2, 0x1a, 0x36, 0x08,
+ 0x25, 0xf1, 0xc2, 0x01, 0x0e, 0x08, 0x26, 0x80, 0xc2, 0x01, 0x5b, 0x08,
+ 0x26, 0x18, 0x83, 0x08, 0x26, 0x31, 0xc2, 0x01, 0x0e, 0x08, 0x26, 0x38,
+ 0x83, 0x08, 0x26, 0x41, 0x15, 0x42, 0x0e, 0x2b, 0x83, 0x08, 0x26, 0x91,
+ 0xc2, 0x01, 0x0e, 0x08, 0x26, 0x98, 0x8b, 0x08, 0x26, 0xcb, 0x02, 0x0e,
+ 0x35, 0x8a, 0x08, 0x26, 0xd8, 0x0a, 0xc2, 0x0e, 0x39, 0xc2, 0x01, 0x5b,
+ 0x08, 0x27, 0x00, 0x83, 0x08, 0x27, 0x09, 0xc2, 0x1a, 0x36, 0x08, 0x27,
+ 0x10, 0x83, 0x08, 0x27, 0x21, 0xc2, 0x1a, 0x36, 0x08, 0x27, 0x31, 0xc2,
+ 0x01, 0x0e, 0x08, 0x27, 0xc0, 0xc2, 0x01, 0x5b, 0x08, 0x27, 0x58, 0x83,
+ 0x08, 0x27, 0x71, 0xc2, 0x01, 0x0e, 0x08, 0x27, 0x78, 0x83, 0x08, 0x27,
+ 0x81, 0x15, 0x42, 0x0e, 0x4f, 0x83, 0x08, 0x27, 0xd1, 0xc2, 0x01, 0x0e,
+ 0x08, 0x27, 0xd8, 0xc2, 0x04, 0x3c, 0x0e, 0x7e, 0x19, 0xc3, 0x47, 0xe1,
+ 0x0e, 0x7a, 0xe1, 0xc6, 0xd5, 0x44, 0x0e, 0x7a, 0x90, 0xc8, 0xbc, 0x43,
+ 0x0e, 0x7c, 0x81, 0xc8, 0x91, 0x72, 0x0e, 0x7b, 0x80, 0xcf, 0x67, 0x8f,
+ 0x0e, 0x7a, 0xc8, 0xd0, 0x5e, 0x3f, 0x0e, 0x7b, 0xa9, 0xc6, 0xd5, 0xb6,
+ 0x0e, 0x7b, 0x68, 0x00, 0x42, 0x0e, 0x59, 0xc2, 0x23, 0x6a, 0x0e, 0x7c,
+ 0x09, 0xc2, 0x04, 0x3c, 0x0e, 0x7a, 0x82, 0x02, 0x0e, 0x69, 0x45, 0xda,
+ 0x1d, 0xc2, 0x0e, 0x6f, 0xc4, 0xe5, 0x6b, 0x0e, 0x7c, 0x33, 0x02, 0x0e,
+ 0x93, 0xc6, 0xd2, 0x4a, 0x0e, 0x7a, 0xb2, 0x02, 0x0e, 0x97, 0x00, 0x42,
+ 0x0e, 0x9b, 0x4d, 0x7a, 0xf3, 0xc2, 0x0e, 0xa7, 0x47, 0x5e, 0x49, 0xc2,
+ 0x0e, 0xbf, 0x16, 0xc2, 0x0e, 0xcb, 0xc8, 0x49, 0x54, 0x0e, 0x7b, 0x91,
+ 0xc9, 0xad, 0xed, 0x0e, 0x7b, 0x88, 0x47, 0x5e, 0x49, 0xc2, 0x0e, 0xd7,
+ 0xc7, 0xcd, 0x41, 0x0e, 0x7d, 0x40, 0xc7, 0x2c, 0x4c, 0x0e, 0x7a, 0xe9,
+ 0xc6, 0xd2, 0x38, 0x0e, 0x7a, 0xa8, 0xcb, 0x96, 0x35, 0x0e, 0x7b, 0x51,
+ 0xc8, 0x49, 0x54, 0x0e, 0x7b, 0x49, 0xc9, 0xad, 0xed, 0x0e, 0x7b, 0x41,
+ 0xc8, 0xbf, 0x23, 0x0e, 0x7b, 0x38, 0xc8, 0xc1, 0xb3, 0x0e, 0x7b, 0x11,
+ 0xc4, 0xd8, 0x64, 0x0e, 0x7a, 0xf8, 0xc4, 0x80, 0xcd, 0x0e, 0x7a, 0x03,
+ 0x02, 0x0e, 0xe9, 0xc5, 0xe3, 0x37, 0x0e, 0x79, 0x49, 0xc6, 0xd5, 0x8c,
+ 0x0e, 0x79, 0x40, 0xca, 0xa1, 0xdc, 0x0e, 0x79, 0xf9, 0xc6, 0xd6, 0x9a,
+ 0x0e, 0x79, 0xc2, 0x02, 0x0e, 0xef, 0xc9, 0xb7, 0xb3, 0x0e, 0x79, 0xe9,
+ 0xd4, 0x3d, 0x2a, 0x0e, 0x79, 0xa0, 0xc5, 0xbc, 0xce, 0x0e, 0x79, 0xe1,
+ 0xc6, 0x6e, 0xd4, 0x0e, 0x79, 0x19, 0x45, 0x63, 0xa1, 0x42, 0x0e, 0xf5,
+ 0xce, 0x3a, 0xc4, 0x0e, 0x79, 0xd9, 0xc4, 0xe5, 0x33, 0x0e, 0x79, 0x59,
+ 0xd3, 0x43, 0xc0, 0x0e, 0x78, 0xd1, 0x49, 0xb6, 0x81, 0x42, 0x0f, 0x01,
+ 0xc7, 0xcc, 0x45, 0x0e, 0x79, 0xd1, 0xc7, 0xc8, 0x0f, 0x0e, 0x79, 0xa9,
+ 0x90, 0x0e, 0x79, 0x08, 0x06, 0xc2, 0x0f, 0x0d, 0x46, 0x7c, 0x18, 0x42,
+ 0x0f, 0x1c, 0xc8, 0x3d, 0x36, 0x0e, 0x79, 0x99, 0x07, 0x42, 0x0f, 0x26,
+ 0xc5, 0xda, 0x40, 0x0e, 0x79, 0x61, 0xc3, 0xa8, 0x1b, 0x0e, 0x79, 0x10,
+ 0xc6, 0xc8, 0x55, 0x0e, 0x78, 0xf9, 0x46, 0xd1, 0x1e, 0x42, 0x0f, 0x32,
+ 0x15, 0xc2, 0x0f, 0x3e, 0x43, 0x00, 0xf7, 0x42, 0x0f, 0x4a, 0x43, 0x3f,
+ 0x0e, 0xc2, 0x0f, 0x56, 0x43, 0x00, 0xf7, 0x42, 0x0f, 0x62, 0x43, 0x00,
+ 0xf7, 0xc2, 0x0f, 0x6e, 0x4d, 0x80, 0xca, 0x42, 0x0f, 0x7a, 0xc5, 0x45,
+ 0xcf, 0x08, 0xd1, 0xc9, 0xc4, 0x21, 0x28, 0x08, 0xd1, 0xa0, 0xce, 0x1e,
+ 0x30, 0x08, 0xd1, 0xb9, 0xc5, 0x21, 0x20, 0x08, 0xd1, 0xaa, 0x02, 0x0f,
+ 0x86, 0xc2, 0x06, 0x8c, 0x08, 0xd1, 0xf1, 0xc2, 0x00, 0x96, 0x08, 0xd1,
+ 0xe9, 0xc2, 0x00, 0x9a, 0x08, 0xd1, 0xe1, 0xc2, 0x1a, 0x36, 0x08, 0xd1,
+ 0xd8, 0xc2, 0x01, 0x0e, 0x08, 0xd1, 0x31, 0x83, 0x08, 0xd1, 0x28, 0xc2,
+ 0x01, 0x0e, 0x08, 0xd0, 0xb9, 0x83, 0x08, 0xd0, 0xb0, 0xc2, 0x01, 0x0e,
+ 0x08, 0xd1, 0x21, 0x83, 0x08, 0xd1, 0x18, 0xc2, 0x01, 0x0e, 0x08, 0xd0,
0xa9, 0x83, 0x08, 0xd0, 0xa0, 0x97, 0x08, 0xd0, 0x41, 0x8b, 0x08, 0xd0,
- 0x38, 0x87, 0x08, 0xd0, 0x28, 0x87, 0x08, 0xd0, 0x10, 0xc9, 0xb6, 0x00,
- 0x01, 0x51, 0x09, 0xc5, 0xdd, 0x95, 0x01, 0x51, 0x00, 0x03, 0xc2, 0x10,
- 0x60, 0x12, 0xc2, 0x10, 0x6f, 0xc5, 0xdb, 0xd3, 0x05, 0x4e, 0x31, 0x0e,
- 0xc2, 0x10, 0x7b, 0xc5, 0xdc, 0x46, 0x05, 0x4e, 0x21, 0xcd, 0x79, 0x86,
- 0x05, 0x4e, 0xf1, 0xc9, 0xac, 0x4c, 0x05, 0x4e, 0xf8, 0xc7, 0xc4, 0x97,
- 0x05, 0x4e, 0x79, 0xc3, 0x6c, 0x12, 0x05, 0x4e, 0x00, 0xc2, 0x01, 0x29,
- 0x05, 0x4c, 0x93, 0x02, 0x10, 0x85, 0xc2, 0x00, 0xa4, 0x05, 0x4d, 0x91,
- 0xc2, 0x0c, 0x65, 0x05, 0x4d, 0x8b, 0x02, 0x10, 0x8b, 0xc2, 0x01, 0x09,
- 0x05, 0x4d, 0x71, 0xc2, 0x00, 0xc7, 0x05, 0x4d, 0x69, 0xc2, 0x02, 0x59,
- 0x05, 0x4d, 0x5b, 0x02, 0x10, 0x91, 0xc2, 0x1d, 0x5f, 0x05, 0x4d, 0x51,
- 0xc2, 0x00, 0xad, 0x05, 0x4d, 0x49, 0xc2, 0x00, 0xde, 0x05, 0x4d, 0x3b,
- 0x02, 0x10, 0x97, 0xc2, 0x03, 0xa4, 0x05, 0x4d, 0x2b, 0x02, 0x10, 0x9d,
- 0x10, 0xc2, 0x10, 0xa1, 0x06, 0xc2, 0x10, 0xba, 0x16, 0xc2, 0x10, 0xca,
- 0xc2, 0x24, 0x58, 0x05, 0x4c, 0xbb, 0x02, 0x10, 0xda, 0xc2, 0x03, 0x40,
- 0x05, 0x4c, 0xab, 0x02, 0x10, 0xe0, 0xc2, 0x04, 0x2b, 0x05, 0x4c, 0x7b,
- 0x02, 0x10, 0xe6, 0x91, 0x05, 0x4c, 0x71, 0x83, 0x05, 0x4c, 0x23, 0x02,
- 0x10, 0xea, 0x87, 0x05, 0x4c, 0x61, 0x97, 0x05, 0x4c, 0x41, 0x8b, 0x05,
- 0x4c, 0x32, 0x02, 0x10, 0xee, 0xc4, 0x04, 0x5e, 0x05, 0x4e, 0x99, 0xc2,
- 0x01, 0x47, 0x05, 0x4e, 0x90, 0xc3, 0x06, 0x9e, 0x05, 0x4e, 0xa1, 0xc3,
- 0x0c, 0x5b, 0x05, 0x4e, 0xa8, 0xc2, 0x26, 0x51, 0x05, 0x4e, 0xb1, 0xc4,
- 0x18, 0x83, 0x05, 0x4e, 0xb8, 0x03, 0xc2, 0x10, 0xf8, 0xc5, 0x0e, 0x40,
- 0x05, 0x4d, 0xa8, 0xc7, 0xc8, 0xf0, 0x05, 0x4d, 0xc8, 0xc6, 0xcd, 0xbd,
- 0x05, 0x4d, 0xb8, 0xc5, 0xd5, 0x2f, 0x05, 0x4d, 0x98, 0xc5, 0x01, 0xf7,
- 0x01, 0x2c, 0xeb, 0x02, 0x11, 0x04, 0xc4, 0x01, 0x1e, 0x01, 0x2c, 0xc2,
- 0x02, 0x11, 0x0d, 0xc5, 0x01, 0xf7, 0x01, 0x2c, 0xb9, 0xc4, 0x01, 0x1e,
- 0x01, 0x2c, 0xb0, 0x1b, 0xc2, 0x11, 0x13, 0x0c, 0xc2, 0x11, 0x28, 0x14,
- 0xc2, 0x11, 0x44, 0x09, 0xc2, 0x11, 0x67, 0x1c, 0xc2, 0x11, 0x8e, 0x04,
- 0xc2, 0x11, 0xb5, 0x06, 0xc2, 0x11, 0xd8, 0x8b, 0x05, 0x0b, 0xfb, 0x02,
- 0x11, 0xfb, 0x83, 0x05, 0x0c, 0x2b, 0x02, 0x12, 0x0e, 0x97, 0x05, 0x0c,
- 0x9b, 0x02, 0x12, 0x16, 0x91, 0x05, 0x0c, 0x63, 0x02, 0x12, 0x30, 0x87,
- 0x05, 0x0c, 0x7a, 0x02, 0x12, 0x3c, 0x0c, 0xc2, 0x12, 0x44, 0x9b, 0x05,
- 0x1f, 0xc3, 0x02, 0x12, 0x60, 0x97, 0x05, 0x1f, 0x93, 0x02, 0x12, 0x73,
- 0x91, 0x05, 0x1f, 0x73, 0x02, 0x12, 0x8d, 0x8b, 0x05, 0x1f, 0x12, 0x02,
- 0x12, 0x99, 0x9b, 0x05, 0x20, 0xa3, 0x02, 0x12, 0xac, 0x97, 0x05, 0x20,
- 0x73, 0x02, 0x12, 0xbf, 0x91, 0x05, 0x20, 0x53, 0x02, 0x12, 0xd9, 0x8b,
- 0x05, 0x1f, 0xf2, 0x02, 0x12, 0xe5, 0x9b, 0x05, 0x1e, 0xe3, 0x02, 0x12,
- 0xf8, 0x97, 0x05, 0x1e, 0xb3, 0x02, 0x13, 0x0b, 0x87, 0x05, 0x1e, 0x93,
- 0x02, 0x13, 0x25, 0x91, 0x05, 0x1e, 0x7b, 0x02, 0x13, 0x2d, 0x83, 0x05,
- 0x1e, 0x43, 0x02, 0x13, 0x39, 0x14, 0x42, 0x13, 0x45, 0x0a, 0xc2, 0x13,
- 0x68, 0x15, 0xc2, 0x13, 0x8b, 0x8b, 0x05, 0x18, 0x5b, 0x02, 0x13, 0xb5,
- 0x83, 0x05, 0x18, 0x93, 0x02, 0x13, 0xc8, 0x97, 0x05, 0x19, 0x03, 0x02,
- 0x13, 0xd4, 0x91, 0x05, 0x18, 0xcb, 0x02, 0x13, 0xee, 0x87, 0x05, 0x18,
- 0xe3, 0x02, 0x13, 0xfa, 0x9b, 0x05, 0x19, 0x32, 0x02, 0x14, 0x02, 0x0a,
- 0xc2, 0x14, 0x15, 0x9b, 0x05, 0x16, 0x63, 0x02, 0x14, 0x38, 0x87, 0x05,
- 0x16, 0x13, 0x02, 0x14, 0x4b, 0x97, 0x05, 0x16, 0x33, 0x02, 0x14, 0x53,
- 0x8b, 0x05, 0x15, 0x83, 0x02, 0x14, 0x6d, 0x83, 0x05, 0x15, 0xc3, 0x02,
- 0x14, 0x80, 0x91, 0x05, 0x15, 0xfa, 0x02, 0x14, 0x8c, 0x87, 0x05, 0x15,
- 0x03, 0x02, 0x14, 0x98, 0x91, 0x05, 0x14, 0xeb, 0x02, 0x14, 0xa0, 0x97,
- 0x05, 0x15, 0x23, 0x02, 0x14, 0xac, 0x83, 0x05, 0x14, 0xb3, 0x02, 0x14,
- 0xc6, 0x8b, 0x05, 0x14, 0x7b, 0x02, 0x14, 0xd2, 0x1c, 0xc2, 0x14, 0xe5,
- 0x0a, 0xc2, 0x15, 0x0f, 0x9b, 0x05, 0x15, 0x52, 0x02, 0x15, 0x32, 0x87,
- 0x05, 0x14, 0x5b, 0x02, 0x15, 0x45, 0x91, 0x05, 0x14, 0x43, 0x02, 0x15,
- 0x4d, 0x97, 0x05, 0x00, 0xab, 0x02, 0x15, 0x55, 0x83, 0x05, 0x14, 0x12,
- 0x02, 0x15, 0x5c, 0x87, 0x05, 0x13, 0xf3, 0x02, 0x15, 0x68, 0x1a, 0xc2,
- 0x15, 0x70, 0x0b, 0xc2, 0x15, 0x95, 0x83, 0x05, 0x13, 0x9b, 0x02, 0x15,
- 0xa0, 0xc2, 0x00, 0xb1, 0x05, 0x13, 0xbb, 0x02, 0x15, 0xac, 0x91, 0x05,
- 0x13, 0xdb, 0x02, 0x15, 0xb8, 0x0f, 0xc2, 0x15, 0xc4, 0x10, 0xc2, 0x15,
- 0xe7, 0x0e, 0x42, 0x16, 0x04, 0x8b, 0x05, 0x23, 0x9b, 0x02, 0x16, 0x2e,
- 0x97, 0x05, 0x24, 0x1b, 0x02, 0x16, 0x41, 0x91, 0x05, 0x23, 0xfb, 0x02,
- 0x16, 0x5b, 0x9b, 0x05, 0x24, 0x4a, 0x02, 0x16, 0x67, 0x9b, 0x05, 0x23,
- 0x6b, 0x02, 0x16, 0x7a, 0x8b, 0x05, 0x22, 0xfb, 0x02, 0x16, 0x8d, 0x91,
- 0x05, 0x23, 0x4b, 0x02, 0x16, 0xa0, 0xc2, 0x00, 0xb1, 0x05, 0x23, 0x32,
- 0x02, 0x16, 0xac, 0x09, 0xc2, 0x16, 0xb0, 0x8b, 0x05, 0x05, 0x83, 0x02,
- 0x16, 0xd5, 0x83, 0x05, 0x05, 0xbb, 0x02, 0x16, 0xe8, 0x97, 0x05, 0x06,
- 0x2b, 0x02, 0x16, 0xf4, 0x91, 0x05, 0x05, 0xfb, 0x02, 0x17, 0x0e, 0x87,
- 0x05, 0x06, 0x13, 0x02, 0x17, 0x1a, 0x9b, 0x05, 0x06, 0x5a, 0x02, 0x17,
- 0x1e, 0x96, 0x05, 0x00, 0x03, 0x02, 0x17, 0x2a, 0x9a, 0x05, 0x00, 0x09,
- 0x92, 0x05, 0x00, 0x19, 0x87, 0x05, 0x00, 0x32, 0x02, 0x17, 0x30, 0x96,
+ 0x38, 0x87, 0x08, 0xd0, 0x28, 0x87, 0x08, 0xd0, 0x10, 0xc9, 0xae, 0xbc,
+ 0x01, 0x51, 0x09, 0xc5, 0xdc, 0x43, 0x01, 0x51, 0x00, 0x03, 0xc2, 0x0f,
+ 0x8c, 0x12, 0xc2, 0x0f, 0x9b, 0xc5, 0xe0, 0xd5, 0x05, 0x4e, 0x31, 0x0e,
+ 0xc2, 0x0f, 0xa7, 0xc5, 0xe3, 0x5f, 0x05, 0x4e, 0x21, 0xcd, 0x77, 0x8c,
+ 0x05, 0x4e, 0xf1, 0xc9, 0xb2, 0x2e, 0x05, 0x4e, 0xf8, 0xc7, 0xc9, 0x66,
+ 0x05, 0x4e, 0x79, 0xc3, 0x18, 0x3c, 0x05, 0x4e, 0x00, 0xc2, 0x07, 0x69,
+ 0x05, 0x4c, 0x93, 0x02, 0x0f, 0xb1, 0xc2, 0x01, 0x0e, 0x05, 0x4d, 0x91,
+ 0xc2, 0x0e, 0xe5, 0x05, 0x4d, 0x8b, 0x02, 0x0f, 0xb7, 0xc2, 0x00, 0x4c,
+ 0x05, 0x4d, 0x71, 0xc2, 0x00, 0x96, 0x05, 0x4d, 0x69, 0xc2, 0x00, 0x9a,
+ 0x05, 0x4d, 0x5b, 0x02, 0x0f, 0xbd, 0xc2, 0x1a, 0x36, 0x05, 0x4d, 0x51,
+ 0xc2, 0x00, 0x3f, 0x05, 0x4d, 0x49, 0xc2, 0x02, 0x1d, 0x05, 0x4d, 0x3b,
+ 0x02, 0x0f, 0xc3, 0xc2, 0x07, 0x44, 0x05, 0x4d, 0x2b, 0x02, 0x0f, 0xc9,
+ 0x10, 0xc2, 0x0f, 0xcd, 0x06, 0xc2, 0x0f, 0xe6, 0x16, 0xc2, 0x0f, 0xf6,
+ 0xc2, 0x26, 0x94, 0x05, 0x4c, 0xbb, 0x02, 0x10, 0x06, 0xc2, 0x00, 0x2e,
+ 0x05, 0x4c, 0xab, 0x02, 0x10, 0x0c, 0xc2, 0x06, 0x6b, 0x05, 0x4c, 0x7b,
+ 0x02, 0x10, 0x12, 0x91, 0x05, 0x4c, 0x71, 0x83, 0x05, 0x4c, 0x23, 0x02,
+ 0x10, 0x16, 0x87, 0x05, 0x4c, 0x61, 0x97, 0x05, 0x4c, 0x41, 0x8b, 0x05,
+ 0x4c, 0x32, 0x02, 0x10, 0x1a, 0xc4, 0x05, 0xde, 0x05, 0x4e, 0x99, 0xc2,
+ 0x0a, 0x20, 0x05, 0x4e, 0x90, 0xc3, 0x08, 0xde, 0x05, 0x4e, 0xa1, 0xc3,
+ 0x0d, 0x8f, 0x05, 0x4e, 0xa8, 0xc2, 0x22, 0x45, 0x05, 0x4e, 0xb1, 0xc4,
+ 0x15, 0xa7, 0x05, 0x4e, 0xb8, 0x03, 0xc2, 0x10, 0x24, 0xc5, 0x0c, 0x33,
+ 0x05, 0x4d, 0xa8, 0xc7, 0xcc, 0x14, 0x05, 0x4d, 0xc8, 0xc6, 0xd4, 0xae,
+ 0x05, 0x4d, 0xb8, 0xc5, 0xd9, 0xfa, 0x05, 0x4d, 0x98, 0xc5, 0x00, 0x47,
+ 0x01, 0x2c, 0xeb, 0x02, 0x10, 0x30, 0xc4, 0x00, 0xcd, 0x01, 0x2c, 0xc2,
+ 0x02, 0x10, 0x39, 0xc5, 0x00, 0x47, 0x01, 0x2c, 0xb9, 0xc4, 0x00, 0xcd,
+ 0x01, 0x2c, 0xb0, 0x1b, 0xc2, 0x10, 0x3f, 0x0c, 0xc2, 0x10, 0x54, 0x14,
+ 0xc2, 0x10, 0x70, 0x09, 0xc2, 0x10, 0x93, 0x1c, 0xc2, 0x10, 0xba, 0x04,
+ 0xc2, 0x10, 0xe1, 0x06, 0xc2, 0x11, 0x04, 0x8b, 0x05, 0x0b, 0xfb, 0x02,
+ 0x11, 0x27, 0x83, 0x05, 0x0c, 0x2b, 0x02, 0x11, 0x3a, 0x97, 0x05, 0x0c,
+ 0x9b, 0x02, 0x11, 0x42, 0x91, 0x05, 0x0c, 0x63, 0x02, 0x11, 0x5c, 0x87,
+ 0x05, 0x0c, 0x7a, 0x02, 0x11, 0x68, 0x0c, 0xc2, 0x11, 0x70, 0x9b, 0x05,
+ 0x1f, 0xc3, 0x02, 0x11, 0x8c, 0x97, 0x05, 0x1f, 0x93, 0x02, 0x11, 0x9f,
+ 0x91, 0x05, 0x1f, 0x73, 0x02, 0x11, 0xb9, 0x8b, 0x05, 0x1f, 0x12, 0x02,
+ 0x11, 0xc5, 0x9b, 0x05, 0x20, 0xa3, 0x02, 0x11, 0xd8, 0x97, 0x05, 0x20,
+ 0x73, 0x02, 0x11, 0xeb, 0x91, 0x05, 0x20, 0x53, 0x02, 0x12, 0x05, 0x8b,
+ 0x05, 0x1f, 0xf2, 0x02, 0x12, 0x11, 0x9b, 0x05, 0x1e, 0xe3, 0x02, 0x12,
+ 0x24, 0x97, 0x05, 0x1e, 0xb3, 0x02, 0x12, 0x37, 0x87, 0x05, 0x1e, 0x93,
+ 0x02, 0x12, 0x51, 0x91, 0x05, 0x1e, 0x7b, 0x02, 0x12, 0x59, 0x83, 0x05,
+ 0x1e, 0x43, 0x02, 0x12, 0x65, 0x14, 0x42, 0x12, 0x71, 0x0a, 0xc2, 0x12,
+ 0x94, 0x15, 0xc2, 0x12, 0xb7, 0x8b, 0x05, 0x18, 0x5b, 0x02, 0x12, 0xe1,
+ 0x83, 0x05, 0x18, 0x93, 0x02, 0x12, 0xf4, 0x97, 0x05, 0x19, 0x03, 0x02,
+ 0x13, 0x00, 0x91, 0x05, 0x18, 0xcb, 0x02, 0x13, 0x1a, 0x87, 0x05, 0x18,
+ 0xe3, 0x02, 0x13, 0x26, 0x9b, 0x05, 0x19, 0x32, 0x02, 0x13, 0x2e, 0x0a,
+ 0xc2, 0x13, 0x41, 0x9b, 0x05, 0x16, 0x63, 0x02, 0x13, 0x64, 0x87, 0x05,
+ 0x16, 0x13, 0x02, 0x13, 0x77, 0x97, 0x05, 0x16, 0x33, 0x02, 0x13, 0x7f,
+ 0x8b, 0x05, 0x15, 0x83, 0x02, 0x13, 0x99, 0x83, 0x05, 0x15, 0xc3, 0x02,
+ 0x13, 0xac, 0x91, 0x05, 0x15, 0xfa, 0x02, 0x13, 0xb8, 0x87, 0x05, 0x15,
+ 0x03, 0x02, 0x13, 0xc4, 0x91, 0x05, 0x14, 0xeb, 0x02, 0x13, 0xcc, 0x97,
+ 0x05, 0x15, 0x23, 0x02, 0x13, 0xd8, 0x83, 0x05, 0x14, 0xb3, 0x02, 0x13,
+ 0xf2, 0x8b, 0x05, 0x14, 0x7b, 0x02, 0x13, 0xfe, 0x1c, 0xc2, 0x14, 0x11,
+ 0x0a, 0xc2, 0x14, 0x3b, 0x9b, 0x05, 0x15, 0x52, 0x02, 0x14, 0x5e, 0x87,
+ 0x05, 0x14, 0x5b, 0x02, 0x14, 0x71, 0x91, 0x05, 0x14, 0x43, 0x02, 0x14,
+ 0x79, 0x97, 0x05, 0x00, 0xab, 0x02, 0x14, 0x81, 0x83, 0x05, 0x14, 0x12,
+ 0x02, 0x14, 0x88, 0x87, 0x05, 0x13, 0xf3, 0x02, 0x14, 0x94, 0x1a, 0xc2,
+ 0x14, 0x9c, 0x0b, 0xc2, 0x14, 0xc1, 0x83, 0x05, 0x13, 0x9b, 0x02, 0x14,
+ 0xcc, 0xc2, 0x00, 0xf1, 0x05, 0x13, 0xbb, 0x02, 0x14, 0xd8, 0x91, 0x05,
+ 0x13, 0xdb, 0x02, 0x14, 0xe4, 0x0f, 0xc2, 0x14, 0xf0, 0x10, 0xc2, 0x15,
+ 0x13, 0x0e, 0x42, 0x15, 0x30, 0x8b, 0x05, 0x23, 0x9b, 0x02, 0x15, 0x5a,
+ 0x97, 0x05, 0x24, 0x1b, 0x02, 0x15, 0x6d, 0x91, 0x05, 0x23, 0xfb, 0x02,
+ 0x15, 0x87, 0x9b, 0x05, 0x24, 0x4a, 0x02, 0x15, 0x93, 0x9b, 0x05, 0x23,
+ 0x6b, 0x02, 0x15, 0xa6, 0x8b, 0x05, 0x22, 0xfb, 0x02, 0x15, 0xb9, 0x91,
+ 0x05, 0x23, 0x4b, 0x02, 0x15, 0xcc, 0xc2, 0x00, 0xf1, 0x05, 0x23, 0x32,
+ 0x02, 0x15, 0xd8, 0x09, 0xc2, 0x15, 0xdc, 0x8b, 0x05, 0x05, 0x83, 0x02,
+ 0x16, 0x01, 0x83, 0x05, 0x05, 0xbb, 0x02, 0x16, 0x14, 0x97, 0x05, 0x06,
+ 0x2b, 0x02, 0x16, 0x20, 0x91, 0x05, 0x05, 0xfb, 0x02, 0x16, 0x3a, 0x87,
+ 0x05, 0x06, 0x13, 0x02, 0x16, 0x46, 0x9b, 0x05, 0x06, 0x5a, 0x02, 0x16,
+ 0x4a, 0x96, 0x05, 0x00, 0x03, 0x02, 0x16, 0x56, 0x9a, 0x05, 0x00, 0x09,
+ 0x92, 0x05, 0x00, 0x19, 0x87, 0x05, 0x00, 0x32, 0x02, 0x16, 0x5c, 0x96,
0x05, 0x00, 0x41, 0x9a, 0x05, 0x00, 0x49, 0x92, 0x05, 0x00, 0x58, 0x9a,
0x05, 0x00, 0x61, 0x92, 0x05, 0x00, 0x70, 0x96, 0x05, 0x00, 0x79, 0x9a,
0x05, 0x00, 0x81, 0x92, 0x05, 0x00, 0x90, 0x9a, 0x05, 0x00, 0x98, 0x8b,
- 0x05, 0x00, 0xc3, 0x02, 0x17, 0x3c, 0x83, 0x05, 0x01, 0x03, 0x02, 0x17,
- 0x4f, 0x97, 0x05, 0x01, 0x73, 0x02, 0x17, 0x5b, 0x91, 0x05, 0x01, 0x3b,
- 0x02, 0x17, 0x75, 0x87, 0x05, 0x01, 0x53, 0x02, 0x17, 0x81, 0x9b, 0x05,
- 0x01, 0xa3, 0x02, 0x17, 0x89, 0x04, 0x42, 0x17, 0x9c, 0x8b, 0x05, 0x01,
- 0xd3, 0x02, 0x17, 0xc6, 0x83, 0x05, 0x02, 0x0b, 0x02, 0x17, 0xd9, 0x97,
- 0x05, 0x02, 0x63, 0x02, 0x17, 0xe5, 0x91, 0x05, 0x02, 0x43, 0x02, 0x17,
- 0xff, 0x9b, 0x05, 0x02, 0x92, 0x02, 0x18, 0x0b, 0x8b, 0x05, 0x06, 0x7b,
- 0x02, 0x18, 0x1e, 0x83, 0x05, 0x06, 0x9b, 0x02, 0x18, 0x2a, 0x91, 0x05,
- 0x06, 0xb3, 0x02, 0x18, 0x36, 0x97, 0x05, 0x06, 0xd3, 0x02, 0x18, 0x3e,
- 0x9b, 0x05, 0x07, 0x02, 0x02, 0x18, 0x51, 0x8b, 0x05, 0x07, 0x23, 0x02,
- 0x18, 0x5d, 0x83, 0x05, 0x07, 0x63, 0x02, 0x18, 0x70, 0x91, 0x05, 0x07,
- 0x83, 0x02, 0x18, 0x7c, 0x07, 0xc2, 0x18, 0x88, 0x97, 0x05, 0x07, 0xb3,
- 0x02, 0x18, 0x90, 0x9b, 0x05, 0x07, 0xe2, 0x02, 0x18, 0xa3, 0x8b, 0x05,
- 0x08, 0x13, 0x02, 0x18, 0xb6, 0x83, 0x05, 0x08, 0x4b, 0x02, 0x18, 0xc9,
- 0x97, 0x05, 0x08, 0xb3, 0x02, 0x18, 0xd5, 0x91, 0x05, 0x08, 0x7b, 0x02,
- 0x18, 0xef, 0x87, 0x05, 0x08, 0x93, 0x02, 0x18, 0xfb, 0x06, 0x42, 0x19,
- 0x03, 0x8b, 0x05, 0x08, 0xe3, 0x02, 0x19, 0x26, 0x83, 0x05, 0x09, 0x1b,
- 0x02, 0x19, 0x39, 0x97, 0x05, 0x09, 0x93, 0x02, 0x19, 0x45, 0x91, 0x05,
- 0x09, 0x5b, 0x02, 0x19, 0x5f, 0x87, 0x05, 0x09, 0x72, 0x02, 0x19, 0x6b,
- 0x8b, 0x05, 0x0d, 0xcb, 0x02, 0x19, 0x73, 0x83, 0x05, 0x0e, 0x0b, 0x02,
- 0x19, 0x86, 0x97, 0x05, 0x0e, 0x83, 0x02, 0x19, 0x92, 0x91, 0x05, 0x0e,
- 0x4b, 0x02, 0x19, 0xac, 0x87, 0x05, 0x0e, 0x63, 0x02, 0x19, 0xb8, 0x9b,
- 0x05, 0x0e, 0xb2, 0x02, 0x19, 0xc0, 0x8b, 0x05, 0x0e, 0xe3, 0x02, 0x19,
- 0xd3, 0x83, 0x05, 0x0f, 0x23, 0x02, 0x19, 0xe6, 0x97, 0x05, 0x0f, 0xa3,
- 0x02, 0x19, 0xf2, 0x91, 0x05, 0x0f, 0x63, 0x02, 0x1a, 0x0c, 0x87, 0x05,
- 0x0f, 0x83, 0x02, 0x1a, 0x18, 0x09, 0x42, 0x1a, 0x24, 0x8b, 0x05, 0x0f,
- 0xd3, 0x02, 0x1a, 0x47, 0x83, 0x05, 0x10, 0x0b, 0x02, 0x1a, 0x5a, 0x97,
- 0x05, 0x10, 0x83, 0x02, 0x1a, 0x66, 0x91, 0x05, 0x10, 0x43, 0x02, 0x1a,
- 0x80, 0x87, 0x05, 0x10, 0x62, 0x02, 0x1a, 0x8c, 0x8b, 0x05, 0x24, 0x8b,
- 0x02, 0x1a, 0x98, 0xc2, 0x1e, 0x62, 0x05, 0x24, 0xd0, 0xc2, 0x00, 0x6d,
- 0x05, 0x24, 0x91, 0x87, 0x05, 0x26, 0x30, 0x1b, 0xc2, 0x1a, 0x9c, 0xc3,
- 0xe6, 0xc7, 0x05, 0x25, 0xa1, 0xc3, 0xa9, 0xe4, 0x05, 0x26, 0x28, 0x9b,
- 0x05, 0x25, 0xe3, 0x02, 0x1a, 0xa8, 0xc3, 0xe6, 0x70, 0x05, 0x25, 0xe9,
- 0xc2, 0x00, 0x3e, 0x05, 0x25, 0xf1, 0xc2, 0x01, 0xf0, 0x05, 0x26, 0x18,
- 0xc2, 0x00, 0xcb, 0x05, 0x24, 0xa9, 0x0a, 0x42, 0x1a, 0xb0, 0x09, 0xc2,
- 0x1a, 0xc6, 0xc2, 0x04, 0x37, 0x05, 0x24, 0xb9, 0x83, 0x05, 0x25, 0x09,
- 0xc2, 0x00, 0xb2, 0x05, 0x25, 0xb0, 0x8b, 0x05, 0x24, 0xc1, 0xc2, 0x00,
- 0x11, 0x05, 0x24, 0xe0, 0x1a, 0xc2, 0x1a, 0xd2, 0xc2, 0x00, 0x82, 0x05,
- 0x25, 0x68, 0xc3, 0x03, 0xca, 0x05, 0x24, 0xd9, 0xc2, 0x02, 0x53, 0x05,
- 0x25, 0x28, 0x91, 0x05, 0x24, 0xe9, 0xc2, 0x00, 0x6d, 0x05, 0x25, 0x70,
- 0xc2, 0x00, 0x84, 0x05, 0x24, 0xf1, 0xc2, 0x14, 0x6d, 0x05, 0x25, 0x60,
- 0xc2, 0x00, 0x9e, 0x05, 0x25, 0x01, 0x97, 0x05, 0x25, 0x40, 0x17, 0xc2,
- 0x1a, 0xe4, 0xc2, 0x00, 0xb2, 0x05, 0x25, 0x59, 0x83, 0x05, 0x25, 0x91,
- 0xc4, 0xe0, 0xb7, 0x05, 0x26, 0x20, 0xc3, 0x64, 0x57, 0x05, 0x25, 0x21,
- 0x97, 0x05, 0x25, 0xc8, 0x0c, 0xc2, 0x1a, 0xec, 0x91, 0x05, 0x25, 0x98,
- 0xc2, 0x02, 0x53, 0x05, 0x25, 0x79, 0xc2, 0x04, 0x37, 0x05, 0x25, 0x88,
- 0xd6, 0x30, 0x8d, 0x08, 0x75, 0x88, 0xcf, 0x37, 0x1e, 0x08, 0x75, 0x80,
- 0x96, 0x08, 0x75, 0x49, 0x99, 0x08, 0x75, 0x31, 0xc2, 0x17, 0x51, 0x08,
- 0x74, 0xb9, 0xc3, 0x69, 0xa8, 0x08, 0x74, 0x00, 0xc2, 0x0c, 0x81, 0x08,
- 0x75, 0x39, 0xc2, 0x00, 0xa4, 0x08, 0x74, 0x48, 0xc3, 0x4b, 0x01, 0x08,
- 0x74, 0xf1, 0xc2, 0x0f, 0x61, 0x08, 0x74, 0xe8, 0xcf, 0x65, 0x8d, 0x08,
- 0x74, 0xd8, 0xc4, 0xe1, 0x0b, 0x08, 0x74, 0xc1, 0x83, 0x08, 0x74, 0x50,
- 0x87, 0x08, 0x74, 0xb1, 0x83, 0x08, 0x74, 0x7a, 0x02, 0x1a, 0xfc, 0x83,
- 0x08, 0x74, 0xa9, 0xc2, 0x01, 0xf0, 0x08, 0x74, 0x20, 0x86, 0x08, 0x74,
- 0xa1, 0x8e, 0x08, 0x74, 0x58, 0xc2, 0x01, 0xf2, 0x08, 0x74, 0x99, 0xc3,
- 0x0d, 0xd9, 0x08, 0x74, 0x91, 0xc2, 0x00, 0x34, 0x08, 0x74, 0x89, 0x87,
- 0x08, 0x74, 0x28, 0xc2, 0x00, 0xa4, 0x08, 0x74, 0x71, 0x83, 0x08, 0x74,
- 0x68, 0x0a, 0xc2, 0x1b, 0x00, 0xc2, 0x01, 0x2e, 0x08, 0x74, 0x30, 0xc2,
- 0x01, 0xf0, 0x08, 0x74, 0x19, 0x87, 0x08, 0x74, 0x10, 0xc9, 0x1b, 0xb6,
- 0x00, 0x04, 0xa1, 0xc3, 0x0e, 0x6e, 0x70, 0x03, 0xf8, 0x83, 0x08, 0xd5,
+ 0x05, 0x00, 0xc3, 0x02, 0x16, 0x68, 0x83, 0x05, 0x01, 0x03, 0x02, 0x16,
+ 0x7b, 0x97, 0x05, 0x01, 0x73, 0x02, 0x16, 0x87, 0x91, 0x05, 0x01, 0x3b,
+ 0x02, 0x16, 0xa1, 0x87, 0x05, 0x01, 0x53, 0x02, 0x16, 0xad, 0x9b, 0x05,
+ 0x01, 0xa3, 0x02, 0x16, 0xb5, 0x04, 0x42, 0x16, 0xc8, 0x8b, 0x05, 0x01,
+ 0xd3, 0x02, 0x16, 0xf2, 0x83, 0x05, 0x02, 0x0b, 0x02, 0x17, 0x05, 0x97,
+ 0x05, 0x02, 0x63, 0x02, 0x17, 0x11, 0x91, 0x05, 0x02, 0x43, 0x02, 0x17,
+ 0x2b, 0x9b, 0x05, 0x02, 0x92, 0x02, 0x17, 0x37, 0x8b, 0x05, 0x06, 0x7b,
+ 0x02, 0x17, 0x4a, 0x83, 0x05, 0x06, 0x9b, 0x02, 0x17, 0x56, 0x91, 0x05,
+ 0x06, 0xb3, 0x02, 0x17, 0x62, 0x97, 0x05, 0x06, 0xd3, 0x02, 0x17, 0x6a,
+ 0x9b, 0x05, 0x07, 0x02, 0x02, 0x17, 0x7d, 0x8b, 0x05, 0x07, 0x23, 0x02,
+ 0x17, 0x89, 0x83, 0x05, 0x07, 0x63, 0x02, 0x17, 0x9c, 0x91, 0x05, 0x07,
+ 0x83, 0x02, 0x17, 0xa8, 0x07, 0xc2, 0x17, 0xb4, 0x97, 0x05, 0x07, 0xb3,
+ 0x02, 0x17, 0xbc, 0x9b, 0x05, 0x07, 0xe2, 0x02, 0x17, 0xcf, 0x8b, 0x05,
+ 0x08, 0x13, 0x02, 0x17, 0xe2, 0x83, 0x05, 0x08, 0x4b, 0x02, 0x17, 0xf5,
+ 0x97, 0x05, 0x08, 0xb3, 0x02, 0x18, 0x01, 0x91, 0x05, 0x08, 0x7b, 0x02,
+ 0x18, 0x1b, 0x87, 0x05, 0x08, 0x93, 0x02, 0x18, 0x27, 0x06, 0x42, 0x18,
+ 0x2f, 0x8b, 0x05, 0x08, 0xe3, 0x02, 0x18, 0x52, 0x83, 0x05, 0x09, 0x1b,
+ 0x02, 0x18, 0x65, 0x97, 0x05, 0x09, 0x93, 0x02, 0x18, 0x71, 0x91, 0x05,
+ 0x09, 0x5b, 0x02, 0x18, 0x8b, 0x87, 0x05, 0x09, 0x72, 0x02, 0x18, 0x97,
+ 0x8b, 0x05, 0x0d, 0xcb, 0x02, 0x18, 0x9f, 0x83, 0x05, 0x0e, 0x0b, 0x02,
+ 0x18, 0xb2, 0x97, 0x05, 0x0e, 0x83, 0x02, 0x18, 0xbe, 0x91, 0x05, 0x0e,
+ 0x4b, 0x02, 0x18, 0xd8, 0x87, 0x05, 0x0e, 0x63, 0x02, 0x18, 0xe4, 0x9b,
+ 0x05, 0x0e, 0xb2, 0x02, 0x18, 0xec, 0x8b, 0x05, 0x0e, 0xe3, 0x02, 0x18,
+ 0xff, 0x83, 0x05, 0x0f, 0x23, 0x02, 0x19, 0x12, 0x97, 0x05, 0x0f, 0xa3,
+ 0x02, 0x19, 0x1e, 0x91, 0x05, 0x0f, 0x63, 0x02, 0x19, 0x38, 0x87, 0x05,
+ 0x0f, 0x83, 0x02, 0x19, 0x44, 0x09, 0x42, 0x19, 0x50, 0x8b, 0x05, 0x0f,
+ 0xd3, 0x02, 0x19, 0x73, 0x83, 0x05, 0x10, 0x0b, 0x02, 0x19, 0x86, 0x97,
+ 0x05, 0x10, 0x83, 0x02, 0x19, 0x92, 0x91, 0x05, 0x10, 0x43, 0x02, 0x19,
+ 0xac, 0x87, 0x05, 0x10, 0x62, 0x02, 0x19, 0xb8, 0x8b, 0x05, 0x24, 0x8b,
+ 0x02, 0x19, 0xc4, 0xc2, 0x1f, 0xbc, 0x05, 0x24, 0xd0, 0xc2, 0x01, 0x33,
+ 0x05, 0x24, 0x91, 0x87, 0x05, 0x26, 0x30, 0x1b, 0xc2, 0x19, 0xc8, 0xc3,
+ 0xeb, 0x04, 0x05, 0x25, 0xa1, 0xc3, 0xb5, 0x4b, 0x05, 0x26, 0x28, 0x9b,
+ 0x05, 0x25, 0xe3, 0x02, 0x19, 0xd4, 0xc3, 0xe8, 0x8a, 0x05, 0x25, 0xe9,
+ 0xc2, 0x03, 0x1e, 0x05, 0x25, 0xf1, 0xc2, 0x00, 0x5b, 0x05, 0x26, 0x18,
+ 0xc2, 0x02, 0x29, 0x05, 0x24, 0xa9, 0x0a, 0x42, 0x19, 0xdc, 0x09, 0xc2,
+ 0x19, 0xf2, 0xc2, 0x06, 0x97, 0x05, 0x24, 0xb9, 0x83, 0x05, 0x25, 0x09,
+ 0xc2, 0x00, 0xf2, 0x05, 0x25, 0xb0, 0x8b, 0x05, 0x24, 0xc1, 0xc2, 0x00,
+ 0x11, 0x05, 0x24, 0xe0, 0x1a, 0xc2, 0x19, 0xfe, 0xc2, 0x01, 0xc2, 0x05,
+ 0x25, 0x68, 0xc3, 0x0b, 0x0a, 0x05, 0x24, 0xd9, 0xc2, 0x00, 0xd3, 0x05,
+ 0x25, 0x28, 0x91, 0x05, 0x24, 0xe9, 0xc2, 0x01, 0x33, 0x05, 0x25, 0x70,
+ 0xc2, 0x01, 0xc4, 0x05, 0x24, 0xf1, 0xc2, 0x61, 0x89, 0x05, 0x25, 0x60,
+ 0xc2, 0x00, 0x97, 0x05, 0x25, 0x01, 0x97, 0x05, 0x25, 0x40, 0x17, 0xc2,
+ 0x1a, 0x10, 0xc2, 0x00, 0xf2, 0x05, 0x25, 0x59, 0x83, 0x05, 0x25, 0x91,
+ 0xc4, 0xe4, 0xd7, 0x05, 0x26, 0x20, 0xc3, 0x68, 0x75, 0x05, 0x25, 0x21,
+ 0x97, 0x05, 0x25, 0xc8, 0x0c, 0xc2, 0x1a, 0x18, 0x91, 0x05, 0x25, 0x98,
+ 0xc2, 0x00, 0xd3, 0x05, 0x25, 0x79, 0xc2, 0x06, 0x97, 0x05, 0x25, 0x88,
+ 0xd6, 0x2b, 0xfb, 0x08, 0x75, 0x88, 0xcf, 0x35, 0x23, 0x08, 0x75, 0x80,
+ 0x96, 0x08, 0x75, 0x49, 0x99, 0x08, 0x75, 0x31, 0xc2, 0x16, 0x0a, 0x08,
+ 0x74, 0xb9, 0xc3, 0x66, 0xbe, 0x08, 0x74, 0x00, 0xc2, 0x0c, 0x22, 0x08,
+ 0x75, 0x39, 0xc2, 0x01, 0x0e, 0x08, 0x74, 0x48, 0xc3, 0x47, 0xb3, 0x08,
+ 0x74, 0xf1, 0xc2, 0x0e, 0x14, 0x08, 0x74, 0xe8, 0xcf, 0x65, 0xdc, 0x08,
+ 0x74, 0xd8, 0xc4, 0xe7, 0x6f, 0x08, 0x74, 0xc1, 0x83, 0x08, 0x74, 0x50,
+ 0x87, 0x08, 0x74, 0xb1, 0x83, 0x08, 0x74, 0x7a, 0x02, 0x1a, 0x28, 0x83,
+ 0x08, 0x74, 0xa9, 0xc2, 0x00, 0x5b, 0x08, 0x74, 0x20, 0x86, 0x08, 0x74,
+ 0xa1, 0x8e, 0x08, 0x74, 0x58, 0xc2, 0x00, 0x5d, 0x08, 0x74, 0x99, 0xc3,
+ 0x11, 0x40, 0x08, 0x74, 0x91, 0xc2, 0x01, 0x5b, 0x08, 0x74, 0x89, 0x87,
+ 0x08, 0x74, 0x28, 0xc2, 0x01, 0x0e, 0x08, 0x74, 0x71, 0x83, 0x08, 0x74,
+ 0x68, 0x0a, 0xc2, 0x1a, 0x2c, 0xc2, 0x07, 0x6e, 0x08, 0x74, 0x30, 0xc2,
+ 0x00, 0x5b, 0x08, 0x74, 0x19, 0x87, 0x08, 0x74, 0x10, 0xc9, 0x1c, 0x4f,
+ 0x00, 0x04, 0xa1, 0xc3, 0x11, 0x84, 0x70, 0x03, 0xf8, 0x83, 0x08, 0xd5,
0xf9, 0x91, 0x08, 0xd5, 0xf1, 0x8b, 0x08, 0xd5, 0xe9, 0x87, 0x08, 0xd5,
- 0xe0, 0x9b, 0x00, 0xc5, 0xfb, 0x02, 0x1b, 0x0c, 0x83, 0x00, 0xa7, 0xaa,
- 0x02, 0x1b, 0x12, 0x19, 0xc2, 0x1b, 0x16, 0x83, 0x00, 0xa8, 0xab, 0x02,
- 0x1b, 0x2f, 0x91, 0x00, 0xa8, 0x9b, 0x02, 0x1b, 0x37, 0x8b, 0x00, 0xa8,
- 0x8b, 0x02, 0x1b, 0x3f, 0x87, 0x00, 0xa8, 0x80, 0x9b, 0x00, 0xc5, 0xf1,
- 0x4c, 0x88, 0x0c, 0xc2, 0x1b, 0x43, 0x91, 0x00, 0xa7, 0x90, 0x83, 0x00,
- 0xa8, 0x03, 0x02, 0x1b, 0x5b, 0x87, 0x00, 0xa7, 0xb1, 0x8b, 0x00, 0xa7,
- 0xc3, 0x02, 0x1b, 0x5f, 0x91, 0x00, 0xa7, 0xe2, 0x02, 0x1b, 0x63, 0x8b,
- 0x00, 0xa7, 0x80, 0x47, 0xc6, 0xce, 0xc2, 0x1b, 0x67, 0x9b, 0x00, 0xc5,
- 0xe1, 0x46, 0xd4, 0x5f, 0xc2, 0x1b, 0x71, 0x83, 0x00, 0xa6, 0x42, 0x02,
- 0x1b, 0x9d, 0x91, 0x00, 0xc6, 0x53, 0x02, 0x1b, 0xa1, 0x8b, 0x00, 0xc6,
- 0x33, 0x02, 0x1b, 0xa5, 0x87, 0x00, 0xa6, 0x49, 0x83, 0x00, 0xa6, 0x5a,
- 0x02, 0x1b, 0xa9, 0x9b, 0x00, 0xc5, 0xd9, 0x91, 0x00, 0xa6, 0x28, 0x83,
- 0x00, 0xb3, 0xab, 0x02, 0x1b, 0xad, 0x91, 0x00, 0xb3, 0x9b, 0x02, 0x1b,
- 0xb1, 0x8b, 0x00, 0xb3, 0x8a, 0x02, 0x1b, 0xb5, 0x83, 0x00, 0xac, 0x9b,
- 0x02, 0x1b, 0xb9, 0x91, 0x00, 0xac, 0x8b, 0x02, 0x1b, 0xc4, 0x8b, 0x00,
- 0xac, 0x7a, 0x02, 0x1b, 0xc8, 0xc4, 0x48, 0x9f, 0x00, 0xab, 0xe1, 0xc4,
- 0xd2, 0xc4, 0x00, 0xab, 0xda, 0x02, 0x1b, 0xcc, 0x8b, 0x00, 0xab, 0x0b,
- 0x02, 0x1b, 0xe5, 0x87, 0x00, 0xaa, 0xf8, 0x8b, 0x00, 0xa6, 0x18, 0x46,
- 0x67, 0x25, 0xc2, 0x1b, 0xe9, 0x83, 0x00, 0xa4, 0x8a, 0x02, 0x1c, 0x41,
- 0x91, 0x00, 0xa4, 0xc3, 0x02, 0x1c, 0x45, 0x8b, 0x00, 0xa4, 0xa3, 0x02,
- 0x1c, 0x49, 0x87, 0x00, 0xa4, 0x91, 0x83, 0x00, 0xa4, 0xe2, 0x02, 0x1c,
- 0x4d, 0x91, 0x00, 0xa4, 0x70, 0x8b, 0x00, 0xa4, 0x60, 0x94, 0x00, 0xc7,
- 0xa1, 0x8e, 0x00, 0xc7, 0x98, 0x99, 0x00, 0xb3, 0xfb, 0x02, 0x1c, 0x51,
- 0x0d, 0xc2, 0x1c, 0x61, 0x10, 0xc2, 0x1c, 0x71, 0x83, 0x00, 0xad, 0x99,
- 0x91, 0x00, 0xad, 0x91, 0x8b, 0x00, 0xad, 0x89, 0x87, 0x00, 0xad, 0x81,
- 0x95, 0x00, 0xa8, 0x40, 0x91, 0x00, 0xac, 0x43, 0x02, 0x1c, 0x81, 0xc2,
- 0x00, 0x4b, 0x00, 0xc7, 0x41, 0x83, 0x00, 0xac, 0x49, 0x8b, 0x00, 0xac,
- 0x39, 0x87, 0x00, 0xac, 0x30, 0x8a, 0x00, 0xab, 0x7b, 0x02, 0x1c, 0x85,
- 0x87, 0x00, 0xa3, 0x39, 0x8b, 0x00, 0xa3, 0x41, 0x91, 0x00, 0xa3, 0x49,
- 0x83, 0x00, 0xa3, 0x50, 0x19, 0xc2, 0x1c, 0xa1, 0xc8, 0xbd, 0x25, 0x00,
- 0xad, 0x73, 0x02, 0x1c, 0xac, 0x83, 0x00, 0xab, 0x33, 0x02, 0x1c, 0xc5,
- 0x91, 0x00, 0xab, 0x23, 0x02, 0x1c, 0xc9, 0x8b, 0x00, 0xab, 0x03, 0x02,
- 0x1c, 0xcd, 0x87, 0x00, 0xaa, 0xf0, 0x9b, 0x00, 0xc5, 0xb9, 0x83, 0x00,
- 0xa2, 0xb2, 0x02, 0x1c, 0xd1, 0x83, 0x00, 0xab, 0x99, 0x91, 0x00, 0xab,
- 0x91, 0x8b, 0x00, 0xab, 0x89, 0x87, 0x00, 0xab, 0x80, 0x91, 0x00, 0xa2,
- 0xeb, 0x02, 0x1c, 0xd5, 0x8b, 0x00, 0xa2, 0xcb, 0x02, 0x1c, 0xd9, 0x87,
- 0x00, 0xa2, 0xb9, 0x83, 0x00, 0xa3, 0x0a, 0x02, 0x1c, 0xdd, 0x91, 0x00,
- 0xa2, 0x88, 0x8b, 0x00, 0xa2, 0x78, 0x42, 0x00, 0x15, 0x42, 0x1c, 0xe1,
- 0x9b, 0x00, 0xc5, 0x99, 0x83, 0x00, 0xa0, 0xc8, 0x91, 0x00, 0xa0, 0xa2,
- 0x02, 0x1c, 0xed, 0x8b, 0x00, 0xa0, 0x80, 0xc2, 0x00, 0x4b, 0x00, 0xc7,
- 0x01, 0x87, 0x00, 0xaa, 0x18, 0x83, 0x00, 0xc6, 0x9b, 0x02, 0x1c, 0xf3,
- 0x91, 0x00, 0xc6, 0x8b, 0x02, 0x1c, 0xf7, 0x8b, 0x00, 0xc6, 0x7b, 0x02,
- 0x1c, 0xfb, 0xc2, 0x00, 0x4c, 0x00, 0xc6, 0x70, 0x9b, 0x00, 0xc6, 0x29,
- 0x83, 0x00, 0xaa, 0x62, 0x02, 0x1c, 0xff, 0x91, 0x00, 0xaa, 0x48, 0x8b,
- 0x00, 0xaa, 0x38, 0x44, 0x10, 0x50, 0xc2, 0x1d, 0x03, 0x8b, 0x00, 0xaa,
- 0xb0, 0x83, 0x00, 0xaa, 0xd2, 0x02, 0x1d, 0x35, 0x91, 0x00, 0xaa, 0xc0,
- 0x95, 0x00, 0xc6, 0xd3, 0x02, 0x1d, 0x39, 0x90, 0x00, 0xc6, 0xcb, 0x02,
- 0x1d, 0x3d, 0x8f, 0x00, 0xc6, 0xc1, 0x85, 0x00, 0xc6, 0xb9, 0x8d, 0x00,
- 0xc6, 0xb1, 0x96, 0x00, 0xc6, 0xa9, 0x92, 0x00, 0xc6, 0xa0, 0x9b, 0x00,
- 0xc6, 0x21, 0x83, 0x00, 0xa9, 0x72, 0x02, 0x1d, 0x41, 0x9b, 0x00, 0xc6,
- 0x19, 0x91, 0x00, 0xa9, 0x58, 0x83, 0x00, 0xa9, 0xcb, 0x02, 0x1d, 0x45,
- 0x91, 0x00, 0xa9, 0xab, 0x02, 0x1d, 0x49, 0x8b, 0x00, 0xa9, 0x8b, 0x02,
- 0x1d, 0x4d, 0x87, 0x00, 0xa9, 0x78, 0xc3, 0x4e, 0xbb, 0x00, 0xa9, 0x61,
- 0xc3, 0x3f, 0x5b, 0x00, 0xa2, 0x91, 0x12, 0xc2, 0x1d, 0x51, 0xc3, 0x92,
- 0xc0, 0x00, 0xa4, 0x79, 0xc2, 0x02, 0x84, 0x00, 0xa0, 0x39, 0x99, 0x00,
- 0xa0, 0xe9, 0xc3, 0x15, 0xac, 0x00, 0xa5, 0x49, 0xc3, 0x11, 0xd7, 0x00,
- 0xa6, 0x31, 0xc3, 0x15, 0x89, 0x00, 0xa6, 0xc9, 0xc3, 0x19, 0xd0, 0x00,
- 0xa7, 0x99, 0xc3, 0xdb, 0x76, 0x00, 0xa3, 0x88, 0x8b, 0x00, 0xa9, 0x48,
- 0x9b, 0x00, 0xc5, 0xe9, 0x83, 0x00, 0xa6, 0xda, 0x02, 0x1d, 0x5d, 0x83,
- 0x00, 0xad, 0x23, 0x02, 0x1d, 0x61, 0x91, 0x00, 0xad, 0x13, 0x02, 0x1d,
- 0x65, 0x8b, 0x00, 0xad, 0x02, 0x02, 0x1d, 0x69, 0x8b, 0x00, 0xa6, 0xb0,
- 0x91, 0x00, 0xa6, 0xc0, 0x87, 0x00, 0xa6, 0xe1, 0x8b, 0x00, 0xa6, 0xf3,
- 0x02, 0x1d, 0x6d, 0x91, 0x00, 0xa7, 0x13, 0x02, 0x1d, 0x71, 0x83, 0x00,
- 0xa7, 0x32, 0x02, 0x1d, 0x75, 0x9b, 0x00, 0xc5, 0xd1, 0x83, 0x00, 0xa5,
- 0x5a, 0x02, 0x1d, 0x79, 0x45, 0x2e, 0xd0, 0x42, 0x1d, 0x7d, 0x91, 0x00,
- 0xa5, 0x42, 0x02, 0x1d, 0x85, 0x8b, 0x00, 0xa5, 0x30, 0x87, 0x00, 0xa5,
- 0x61, 0x8b, 0x00, 0xa5, 0x73, 0x02, 0x1d, 0x8b, 0x91, 0x00, 0xa5, 0x93,
- 0x02, 0x1d, 0x8f, 0x83, 0x00, 0xa5, 0xb2, 0x02, 0x1d, 0x93, 0x83, 0x00,
- 0xa3, 0xf3, 0x02, 0x1d, 0x97, 0x87, 0x00, 0xa3, 0xa1, 0x8b, 0x00, 0xa3,
- 0xb3, 0x02, 0x1d, 0x9f, 0x91, 0x00, 0xa3, 0xd2, 0x02, 0x1d, 0xa3, 0x9b,
- 0x00, 0xc5, 0xc1, 0x83, 0x00, 0xa3, 0x9a, 0x02, 0x1d, 0xa7, 0x8b, 0x00,
- 0xa3, 0x70, 0x91, 0x00, 0xa3, 0x80, 0x91, 0x00, 0xa2, 0x03, 0x02, 0x1d,
- 0xab, 0x83, 0x00, 0xa2, 0x23, 0x02, 0x1d, 0xb3, 0x8b, 0x00, 0xa1, 0xe3,
- 0x02, 0x1d, 0xb7, 0x87, 0x00, 0xa1, 0xd0, 0x9b, 0x00, 0xc5, 0xa9, 0x83,
- 0x00, 0xa1, 0xca, 0x02, 0x1d, 0xbb, 0x9b, 0x00, 0xc5, 0xa1, 0x91, 0x00,
- 0xa1, 0xa0, 0x8b, 0x00, 0xa1, 0x90, 0x9b, 0x00, 0xc5, 0x91, 0x8b, 0x00,
- 0xa0, 0x10, 0xc7, 0xca, 0x0f, 0x00, 0xad, 0x78, 0x95, 0x00, 0xa8, 0x31,
- 0x8f, 0x00, 0xa5, 0xf0, 0x8b, 0x00, 0xb3, 0x79, 0x83, 0x00, 0xac, 0x22,
- 0x02, 0x1d, 0xbf, 0x91, 0x00, 0xac, 0x10, 0x8b, 0x00, 0xac, 0x00, 0x97,
- 0x08, 0x15, 0x22, 0x02, 0x1d, 0xc3, 0x9f, 0x08, 0x16, 0x70, 0xa0, 0x08,
- 0x16, 0x61, 0xa1, 0x08, 0x16, 0x69, 0x9f, 0x08, 0x16, 0x58, 0x9f, 0x08,
- 0x15, 0xb0, 0x9f, 0x08, 0x15, 0x78, 0x9f, 0x08, 0x16, 0x18, 0xc2, 0x00,
- 0x32, 0x08, 0x29, 0x81, 0xc2, 0x00, 0xbf, 0x08, 0x2a, 0x40, 0xc2, 0x01,
- 0x2e, 0x08, 0x29, 0x91, 0xc4, 0xe0, 0xf7, 0x08, 0x2a, 0xc0, 0xc2, 0x00,
- 0x9e, 0x08, 0x29, 0x99, 0xc3, 0x30, 0x5e, 0x08, 0x2a, 0x09, 0x1c, 0x42,
- 0x1d, 0xcf, 0x84, 0x08, 0x29, 0xa1, 0xc2, 0x17, 0x51, 0x08, 0x29, 0xb0,
- 0xc3, 0x44, 0xa0, 0x08, 0x29, 0xa9, 0x0a, 0x42, 0x1d, 0xdb, 0xc2, 0x00,
- 0xe4, 0x08, 0x29, 0xc1, 0xc3, 0x4b, 0xbc, 0x08, 0x2a, 0x99, 0xc3, 0xe7,
- 0x72, 0x08, 0x2a, 0xe0, 0x0a, 0xc2, 0x1d, 0xe5, 0x03, 0xc2, 0x1d, 0xf6,
- 0x42, 0x1d, 0x5f, 0x42, 0x1e, 0x00, 0xc3, 0x06, 0x45, 0x08, 0x29, 0xd1,
- 0xc3, 0xe7, 0x12, 0x08, 0x2b, 0x08, 0xc2, 0x02, 0xdf, 0x08, 0x29, 0xe1,
- 0xc3, 0x30, 0x4c, 0x08, 0x29, 0xf9, 0xc2, 0x01, 0x07, 0x08, 0x2a, 0xf0,
- 0x0a, 0xc2, 0x1e, 0x08, 0xc3, 0xe7, 0xc0, 0x08, 0x2a, 0xd0, 0xc2, 0x00,
- 0x57, 0x08, 0x29, 0xf1, 0xc3, 0xe7, 0x5d, 0x08, 0x2a, 0x28, 0xc3, 0xe7,
- 0x39, 0x08, 0x2a, 0x19, 0xc3, 0x7f, 0xf4, 0x08, 0x2a, 0x88, 0xc2, 0x00,
- 0xbb, 0x08, 0x2a, 0x21, 0xc2, 0x74, 0x99, 0x08, 0x2b, 0x18, 0x9b, 0x08,
- 0x2a, 0x39, 0x94, 0x08, 0x2a, 0x68, 0xc2, 0x00, 0x4d, 0x08, 0x2a, 0xb9,
- 0xc3, 0xe7, 0x72, 0x08, 0x2b, 0x10, 0x9d, 0x17, 0xcf, 0x01, 0x88, 0x17,
- 0xcf, 0x79, 0x87, 0x17, 0xcf, 0x71, 0x86, 0x17, 0xcf, 0x69, 0x85, 0x17,
- 0xcf, 0x61, 0x84, 0x17, 0xcf, 0x59, 0x83, 0x17, 0xcf, 0x51, 0xa6, 0x17,
- 0xcf, 0x49, 0xa5, 0x17, 0xcf, 0x41, 0xa4, 0x17, 0xcf, 0x39, 0xa3, 0x17,
- 0xcf, 0x31, 0xa2, 0x17, 0xcf, 0x29, 0xa1, 0x17, 0xcf, 0x21, 0xa0, 0x17,
- 0xcf, 0x19, 0x9f, 0x17, 0xcf, 0x11, 0x9e, 0x17, 0xcf, 0x08, 0x88, 0x17,
- 0xce, 0xf9, 0x87, 0x17, 0xce, 0xf1, 0xa6, 0x17, 0xce, 0xc9, 0x86, 0x17,
- 0xce, 0xe9, 0x85, 0x17, 0xce, 0xe1, 0x84, 0x17, 0xce, 0xd9, 0x83, 0x17,
- 0xce, 0xd1, 0xa5, 0x17, 0xce, 0xc1, 0xa4, 0x17, 0xce, 0xb9, 0xa3, 0x17,
- 0xce, 0xb1, 0xa2, 0x17, 0xce, 0xa9, 0xa1, 0x17, 0xce, 0xa1, 0xa0, 0x17,
- 0xce, 0x99, 0x9f, 0x17, 0xce, 0x91, 0x9e, 0x17, 0xce, 0x89, 0x9d, 0x17,
- 0xce, 0x80, 0x83, 0x17, 0xcd, 0x51, 0xa6, 0x17, 0xcd, 0x49, 0xa4, 0x17,
- 0xcd, 0x39, 0xa3, 0x17, 0xcd, 0x31, 0xa2, 0x17, 0xcd, 0x29, 0xa1, 0x17,
- 0xcd, 0x21, 0xa5, 0x17, 0xcd, 0x41, 0x86, 0x17, 0xcd, 0x69, 0x85, 0x17,
- 0xcd, 0x61, 0x84, 0x17, 0xcd, 0x59, 0xa0, 0x17, 0xcd, 0x19, 0x9f, 0x17,
- 0xcd, 0x11, 0x9e, 0x17, 0xcd, 0x09, 0x9d, 0x17, 0xcd, 0x01, 0x87, 0x17,
- 0xcd, 0x71, 0x88, 0x17, 0xcd, 0x78, 0x88, 0x17, 0xcf, 0xf9, 0x87, 0x17,
- 0xcf, 0xf1, 0x86, 0x17, 0xcf, 0xe9, 0x85, 0x17, 0xcf, 0xe1, 0x84, 0x17,
- 0xcf, 0xd9, 0x83, 0x17, 0xcf, 0xd1, 0xa6, 0x17, 0xcf, 0xc9, 0xa5, 0x17,
- 0xcf, 0xc1, 0xa4, 0x17, 0xcf, 0xb9, 0xa3, 0x17, 0xcf, 0xb1, 0xa2, 0x17,
- 0xcf, 0xa9, 0xa1, 0x17, 0xcf, 0xa1, 0xa0, 0x17, 0xcf, 0x99, 0x9f, 0x17,
- 0xcf, 0x91, 0x9e, 0x17, 0xcf, 0x89, 0x9d, 0x17, 0xcf, 0x80, 0x9d, 0x17,
- 0xcb, 0x81, 0x88, 0x17, 0xcb, 0xf9, 0x87, 0x17, 0xcb, 0xf1, 0x86, 0x17,
- 0xcb, 0xe9, 0x85, 0x17, 0xcb, 0xe1, 0x84, 0x17, 0xcb, 0xd9, 0x83, 0x17,
- 0xcb, 0xd1, 0xa6, 0x17, 0xcb, 0xc9, 0xa5, 0x17, 0xcb, 0xc1, 0xa4, 0x17,
- 0xcb, 0xb9, 0xa3, 0x17, 0xcb, 0xb1, 0xa2, 0x17, 0xcb, 0xa9, 0xa1, 0x17,
- 0xcb, 0xa1, 0xa0, 0x17, 0xcb, 0x99, 0x9f, 0x17, 0xcb, 0x91, 0x9e, 0x17,
- 0xcb, 0x88, 0x88, 0x17, 0xcb, 0x79, 0x87, 0x17, 0xcb, 0x71, 0x86, 0x17,
- 0xcb, 0x69, 0x85, 0x17, 0xcb, 0x61, 0x84, 0x17, 0xcb, 0x59, 0x83, 0x17,
- 0xcb, 0x51, 0xa6, 0x17, 0xcb, 0x49, 0xa5, 0x17, 0xcb, 0x41, 0xa4, 0x17,
- 0xcb, 0x39, 0xa3, 0x17, 0xcb, 0x31, 0xa2, 0x17, 0xcb, 0x29, 0xa1, 0x17,
- 0xcb, 0x21, 0x9d, 0x17, 0xcb, 0x01, 0x9e, 0x17, 0xcb, 0x09, 0x9f, 0x17,
- 0xcb, 0x11, 0xa0, 0x17, 0xcb, 0x18, 0x9d, 0x17, 0xc9, 0x81, 0x88, 0x17,
- 0xc9, 0xf9, 0x87, 0x17, 0xc9, 0xf1, 0x86, 0x17, 0xc9, 0xe9, 0x85, 0x17,
- 0xc9, 0xe1, 0x84, 0x17, 0xc9, 0xd9, 0x83, 0x17, 0xc9, 0xd1, 0xa6, 0x17,
- 0xc9, 0xc9, 0xa5, 0x17, 0xc9, 0xc1, 0xa4, 0x17, 0xc9, 0xb9, 0xa3, 0x17,
- 0xc9, 0xb1, 0xa2, 0x17, 0xc9, 0xa9, 0xa1, 0x17, 0xc9, 0xa1, 0xa0, 0x17,
- 0xc9, 0x99, 0x9f, 0x17, 0xc9, 0x91, 0x9e, 0x17, 0xc9, 0x88, 0x88, 0x17,
- 0xc9, 0x79, 0x87, 0x17, 0xc9, 0x71, 0x86, 0x17, 0xc9, 0x69, 0x85, 0x17,
- 0xc9, 0x61, 0x84, 0x17, 0xc9, 0x59, 0x83, 0x17, 0xc9, 0x51, 0xa6, 0x17,
- 0xc9, 0x49, 0xa5, 0x17, 0xc9, 0x41, 0xa4, 0x17, 0xc9, 0x39, 0xa3, 0x17,
- 0xc9, 0x31, 0xa2, 0x17, 0xc9, 0x29, 0xa1, 0x17, 0xc9, 0x21, 0xa0, 0x17,
- 0xc9, 0x19, 0x9f, 0x17, 0xc9, 0x11, 0x9e, 0x17, 0xc9, 0x09, 0x9d, 0x17,
- 0xc9, 0x00, 0x88, 0x17, 0xc8, 0xf9, 0x87, 0x17, 0xc8, 0xf1, 0x86, 0x17,
- 0xc8, 0xe9, 0x85, 0x17, 0xc8, 0xe1, 0x84, 0x17, 0xc8, 0xd9, 0x83, 0x17,
- 0xc8, 0xd1, 0xa6, 0x17, 0xc8, 0xc9, 0xa5, 0x17, 0xc8, 0xc1, 0xa4, 0x17,
- 0xc8, 0xb9, 0xa3, 0x17, 0xc8, 0xb1, 0xa2, 0x17, 0xc8, 0xa9, 0xa1, 0x17,
- 0xc8, 0xa1, 0xa0, 0x17, 0xc8, 0x99, 0x9f, 0x17, 0xc8, 0x91, 0x9e, 0x17,
- 0xc8, 0x89, 0x9d, 0x17, 0xc8, 0x80, 0x88, 0x17, 0xc8, 0x79, 0x87, 0x17,
- 0xc8, 0x71, 0x86, 0x17, 0xc8, 0x69, 0x85, 0x17, 0xc8, 0x61, 0x84, 0x17,
- 0xc8, 0x59, 0x83, 0x17, 0xc8, 0x51, 0xa6, 0x17, 0xc8, 0x49, 0xa5, 0x17,
- 0xc8, 0x41, 0xa4, 0x17, 0xc8, 0x39, 0xa3, 0x17, 0xc8, 0x31, 0xa2, 0x17,
- 0xc8, 0x29, 0xa1, 0x17, 0xc8, 0x21, 0xa0, 0x17, 0xc8, 0x19, 0x9f, 0x17,
- 0xc8, 0x11, 0x9e, 0x17, 0xc8, 0x09, 0x9d, 0x17, 0xc8, 0x00, 0x88, 0x17,
- 0xce, 0x79, 0x87, 0x17, 0xce, 0x71, 0x86, 0x17, 0xce, 0x69, 0x85, 0x17,
- 0xce, 0x61, 0x84, 0x17, 0xce, 0x59, 0x83, 0x17, 0xce, 0x51, 0xa6, 0x17,
- 0xce, 0x49, 0xa5, 0x17, 0xce, 0x41, 0xa4, 0x17, 0xce, 0x39, 0xa3, 0x17,
- 0xce, 0x31, 0xa2, 0x17, 0xce, 0x29, 0xa1, 0x17, 0xce, 0x21, 0xa0, 0x17,
- 0xce, 0x19, 0x9f, 0x17, 0xce, 0x11, 0x9d, 0x17, 0xce, 0x01, 0x9e, 0x17,
- 0xce, 0x08, 0x87, 0x17, 0xcd, 0xf1, 0x86, 0x17, 0xcd, 0xe9, 0x85, 0x17,
- 0xcd, 0xe1, 0x84, 0x17, 0xcd, 0xd9, 0x83, 0x17, 0xcd, 0xd1, 0xa6, 0x17,
- 0xcd, 0xc9, 0xa5, 0x17, 0xcd, 0xc1, 0xa4, 0x17, 0xcd, 0xb9, 0xa3, 0x17,
- 0xcd, 0xb1, 0xa2, 0x17, 0xcd, 0xa9, 0xa1, 0x17, 0xcd, 0xa1, 0x9d, 0x17,
- 0xcd, 0x81, 0x9e, 0x17, 0xcd, 0x89, 0x9f, 0x17, 0xcd, 0x91, 0xa0, 0x17,
- 0xcd, 0x99, 0x88, 0x17, 0xcd, 0xf8, 0x88, 0x17, 0xcc, 0xf9, 0x87, 0x17,
- 0xcc, 0xf1, 0x86, 0x17, 0xcc, 0xe9, 0x85, 0x17, 0xcc, 0xe1, 0x84, 0x17,
- 0xcc, 0xd9, 0x83, 0x17, 0xcc, 0xd1, 0xa6, 0x17, 0xcc, 0xc9, 0xa5, 0x17,
- 0xcc, 0xc1, 0xa4, 0x17, 0xcc, 0xb9, 0xa3, 0x17, 0xcc, 0xb1, 0xa2, 0x17,
- 0xcc, 0xa9, 0xa1, 0x17, 0xcc, 0xa1, 0x9d, 0x17, 0xcc, 0x81, 0x9e, 0x17,
- 0xcc, 0x89, 0x9f, 0x17, 0xcc, 0x91, 0xa0, 0x17, 0xcc, 0x98, 0x88, 0x17,
- 0xcc, 0x79, 0x87, 0x17, 0xcc, 0x71, 0x86, 0x17, 0xcc, 0x69, 0x85, 0x17,
- 0xcc, 0x61, 0x84, 0x17, 0xcc, 0x59, 0x83, 0x17, 0xcc, 0x51, 0xa6, 0x17,
- 0xcc, 0x49, 0xa5, 0x17, 0xcc, 0x41, 0xa4, 0x17, 0xcc, 0x39, 0xa3, 0x17,
- 0xcc, 0x31, 0xa2, 0x17, 0xcc, 0x29, 0xa1, 0x17, 0xcc, 0x21, 0xa0, 0x17,
- 0xcc, 0x19, 0x9f, 0x17, 0xcc, 0x11, 0x9e, 0x17, 0xcc, 0x09, 0x9d, 0x17,
- 0xcc, 0x00, 0xa5, 0x17, 0xca, 0xc1, 0xa4, 0x17, 0xca, 0xb9, 0xa3, 0x17,
- 0xca, 0xb1, 0xa2, 0x17, 0xca, 0xa9, 0xa1, 0x17, 0xca, 0xa1, 0x9e, 0x17,
- 0xca, 0x89, 0x9d, 0x17, 0xca, 0x81, 0x9f, 0x17, 0xca, 0x91, 0xa0, 0x17,
- 0xca, 0x99, 0xa6, 0x17, 0xca, 0xc9, 0x83, 0x17, 0xca, 0xd1, 0x84, 0x17,
- 0xca, 0xd9, 0x85, 0x17, 0xca, 0xe1, 0x86, 0x17, 0xca, 0xe9, 0x87, 0x17,
- 0xca, 0xf1, 0x88, 0x17, 0xca, 0xf8, 0x88, 0x17, 0xca, 0x79, 0x87, 0x17,
- 0xca, 0x71, 0x86, 0x17, 0xca, 0x69, 0x85, 0x17, 0xca, 0x61, 0x84, 0x17,
- 0xca, 0x59, 0x83, 0x17, 0xca, 0x51, 0xa6, 0x17, 0xca, 0x49, 0xa5, 0x17,
- 0xca, 0x41, 0xa4, 0x17, 0xca, 0x39, 0xa3, 0x17, 0xca, 0x31, 0xa2, 0x17,
- 0xca, 0x29, 0xa1, 0x17, 0xca, 0x21, 0xa0, 0x17, 0xca, 0x19, 0x9f, 0x17,
- 0xca, 0x11, 0x9e, 0x17, 0xca, 0x09, 0x9d, 0x17, 0xca, 0x00, 0xa2, 0x17,
- 0xc3, 0xa9, 0x9f, 0x17, 0xc3, 0x91, 0x88, 0x17, 0xc3, 0xf9, 0x87, 0x17,
- 0xc3, 0xf1, 0x86, 0x17, 0xc3, 0xe9, 0x85, 0x17, 0xc3, 0xe1, 0x84, 0x17,
- 0xc3, 0xd9, 0x83, 0x17, 0xc3, 0xd1, 0xa6, 0x17, 0xc3, 0xc9, 0xa5, 0x17,
- 0xc3, 0xc1, 0xa4, 0x17, 0xc3, 0xb9, 0xa3, 0x17, 0xc3, 0xb1, 0xa1, 0x17,
- 0xc3, 0xa1, 0xa0, 0x17, 0xc3, 0x99, 0x9e, 0x17, 0xc3, 0x89, 0x9d, 0x17,
- 0xc3, 0x80, 0x83, 0x17, 0xc3, 0x51, 0xa2, 0x17, 0xc3, 0x29, 0xa1, 0x17,
- 0xc3, 0x21, 0xa0, 0x17, 0xc3, 0x19, 0x9f, 0x17, 0xc3, 0x11, 0x9e, 0x17,
- 0xc3, 0x09, 0x88, 0x17, 0xc3, 0x79, 0x87, 0x17, 0xc3, 0x71, 0x86, 0x17,
- 0xc3, 0x69, 0x85, 0x17, 0xc3, 0x61, 0x84, 0x17, 0xc3, 0x59, 0xa6, 0x17,
- 0xc3, 0x49, 0xa5, 0x17, 0xc3, 0x41, 0xa4, 0x17, 0xc3, 0x39, 0xa3, 0x17,
- 0xc3, 0x31, 0x9d, 0x17, 0xc3, 0x00, 0xa6, 0x17, 0xc2, 0xc9, 0xa5, 0x17,
- 0xc2, 0xc1, 0xa4, 0x17, 0xc2, 0xb9, 0xa3, 0x17, 0xc2, 0xb1, 0xa2, 0x17,
- 0xc2, 0xa9, 0xa1, 0x17, 0xc2, 0xa1, 0xa0, 0x17, 0xc2, 0x99, 0x9f, 0x17,
- 0xc2, 0x91, 0x9e, 0x17, 0xc2, 0x89, 0x9d, 0x17, 0xc2, 0x81, 0x85, 0x17,
- 0xc2, 0xe1, 0x84, 0x17, 0xc2, 0xd9, 0x83, 0x17, 0xc2, 0xd1, 0x86, 0x17,
- 0xc2, 0xe9, 0x87, 0x17, 0xc2, 0xf1, 0x88, 0x17, 0xc2, 0xf8, 0x88, 0x17,
- 0xc2, 0x79, 0x87, 0x17, 0xc2, 0x71, 0xa6, 0x17, 0xc2, 0x49, 0xa5, 0x17,
- 0xc2, 0x41, 0xa4, 0x17, 0xc2, 0x39, 0xa3, 0x17, 0xc2, 0x31, 0xa2, 0x17,
- 0xc2, 0x29, 0xa1, 0x17, 0xc2, 0x21, 0xa0, 0x17, 0xc2, 0x19, 0x86, 0x17,
- 0xc2, 0x69, 0x85, 0x17, 0xc2, 0x61, 0x84, 0x17, 0xc2, 0x59, 0x83, 0x17,
- 0xc2, 0x51, 0x9f, 0x17, 0xc2, 0x11, 0x9e, 0x17, 0xc2, 0x09, 0x9d, 0x17,
- 0xc2, 0x00, 0xa5, 0x17, 0xc1, 0x41, 0xa4, 0x17, 0xc1, 0x39, 0xa3, 0x17,
- 0xc1, 0x31, 0xa2, 0x17, 0xc1, 0x29, 0xa1, 0x17, 0xc1, 0x21, 0x88, 0x17,
- 0xc1, 0x79, 0x87, 0x17, 0xc1, 0x71, 0x86, 0x17, 0xc1, 0x69, 0x85, 0x17,
- 0xc1, 0x61, 0x84, 0x17, 0xc1, 0x59, 0x83, 0x17, 0xc1, 0x51, 0xa6, 0x17,
- 0xc1, 0x49, 0xa0, 0x17, 0xc1, 0x19, 0x9f, 0x17, 0xc1, 0x11, 0x9e, 0x17,
- 0xc1, 0x09, 0x9d, 0x17, 0xc1, 0x00, 0xa5, 0x17, 0xc0, 0x41, 0xa4, 0x17,
- 0xc0, 0x39, 0x88, 0x17, 0xc0, 0x79, 0x87, 0x17, 0xc0, 0x71, 0x86, 0x17,
- 0xc0, 0x69, 0x85, 0x17, 0xc0, 0x61, 0x84, 0x17, 0xc0, 0x59, 0x83, 0x17,
- 0xc0, 0x51, 0xa6, 0x17, 0xc0, 0x49, 0xa3, 0x17, 0xc0, 0x31, 0xa2, 0x17,
- 0xc0, 0x29, 0xa1, 0x17, 0xc0, 0x21, 0x9d, 0x17, 0xc0, 0x01, 0x9e, 0x17,
- 0xc0, 0x09, 0x9f, 0x17, 0xc0, 0x11, 0xa0, 0x17, 0xc0, 0x18, 0x88, 0x17,
- 0xc7, 0xf9, 0x87, 0x17, 0xc7, 0xf1, 0x86, 0x17, 0xc7, 0xe9, 0x85, 0x17,
- 0xc7, 0xe1, 0x84, 0x17, 0xc7, 0xd9, 0x83, 0x17, 0xc7, 0xd1, 0xa6, 0x17,
- 0xc7, 0xc9, 0xa5, 0x17, 0xc7, 0xc1, 0xa4, 0x17, 0xc7, 0xb9, 0xa3, 0x17,
- 0xc7, 0xb1, 0xa2, 0x17, 0xc7, 0xa9, 0xa1, 0x17, 0xc7, 0xa1, 0xa0, 0x17,
- 0xc7, 0x99, 0x9f, 0x17, 0xc7, 0x91, 0x9e, 0x17, 0xc7, 0x89, 0x9d, 0x17,
- 0xc7, 0x80, 0x9d, 0x17, 0xc5, 0x81, 0x88, 0x17, 0xc5, 0xf9, 0x87, 0x17,
- 0xc5, 0xf1, 0x86, 0x17, 0xc5, 0xe9, 0x85, 0x17, 0xc5, 0xe1, 0x84, 0x17,
- 0xc5, 0xd9, 0x83, 0x17, 0xc5, 0xd1, 0xa6, 0x17, 0xc5, 0xc9, 0xa5, 0x17,
- 0xc5, 0xc1, 0xa4, 0x17, 0xc5, 0xb9, 0xa3, 0x17, 0xc5, 0xb1, 0xa2, 0x17,
- 0xc5, 0xa9, 0xa1, 0x17, 0xc5, 0xa1, 0xa0, 0x17, 0xc5, 0x99, 0x9f, 0x17,
- 0xc5, 0x91, 0x9e, 0x17, 0xc5, 0x88, 0x88, 0x17, 0xc5, 0x79, 0x87, 0x17,
- 0xc5, 0x71, 0x86, 0x17, 0xc5, 0x69, 0x85, 0x17, 0xc5, 0x61, 0x84, 0x17,
- 0xc5, 0x59, 0x83, 0x17, 0xc5, 0x51, 0xa6, 0x17, 0xc5, 0x49, 0xa5, 0x17,
- 0xc5, 0x41, 0xa4, 0x17, 0xc5, 0x39, 0xa3, 0x17, 0xc5, 0x31, 0xa2, 0x17,
- 0xc5, 0x29, 0xa1, 0x17, 0xc5, 0x21, 0xa0, 0x17, 0xc5, 0x19, 0x9f, 0x17,
- 0xc5, 0x11, 0x9e, 0x17, 0xc5, 0x09, 0x9d, 0x17, 0xc5, 0x00, 0x88, 0x17,
- 0xc4, 0xf9, 0x87, 0x17, 0xc4, 0xf1, 0x86, 0x17, 0xc4, 0xe9, 0x85, 0x17,
- 0xc4, 0xe1, 0x84, 0x17, 0xc4, 0xd9, 0x83, 0x17, 0xc4, 0xd1, 0xa6, 0x17,
- 0xc4, 0xc9, 0xa5, 0x17, 0xc4, 0xc1, 0xa4, 0x17, 0xc4, 0xb9, 0xa3, 0x17,
- 0xc4, 0xb1, 0xa2, 0x17, 0xc4, 0xa9, 0xa1, 0x17, 0xc4, 0xa1, 0xa0, 0x17,
- 0xc4, 0x99, 0x9f, 0x17, 0xc4, 0x91, 0x9e, 0x17, 0xc4, 0x89, 0x9d, 0x17,
- 0xc4, 0x80, 0x88, 0x17, 0xc4, 0x79, 0x87, 0x17, 0xc4, 0x71, 0x86, 0x17,
- 0xc4, 0x69, 0x85, 0x17, 0xc4, 0x61, 0x84, 0x17, 0xc4, 0x59, 0x83, 0x17,
- 0xc4, 0x51, 0xa6, 0x17, 0xc4, 0x49, 0xa5, 0x17, 0xc4, 0x41, 0xa4, 0x17,
- 0xc4, 0x39, 0xa3, 0x17, 0xc4, 0x31, 0xa2, 0x17, 0xc4, 0x29, 0xa1, 0x17,
- 0xc4, 0x21, 0xa0, 0x17, 0xc4, 0x19, 0x9f, 0x17, 0xc4, 0x11, 0x9e, 0x17,
- 0xc4, 0x09, 0x9d, 0x17, 0xc4, 0x00, 0x88, 0x17, 0xc7, 0x79, 0x87, 0x17,
- 0xc7, 0x71, 0x86, 0x17, 0xc7, 0x69, 0x85, 0x17, 0xc7, 0x61, 0x84, 0x17,
- 0xc7, 0x59, 0x83, 0x17, 0xc7, 0x51, 0xa6, 0x17, 0xc7, 0x49, 0xa5, 0x17,
- 0xc7, 0x41, 0xa4, 0x17, 0xc7, 0x39, 0xa3, 0x17, 0xc7, 0x31, 0xa2, 0x17,
- 0xc7, 0x29, 0xa1, 0x17, 0xc7, 0x21, 0x9d, 0x17, 0xc7, 0x01, 0x9e, 0x17,
- 0xc7, 0x09, 0x9f, 0x17, 0xc7, 0x11, 0xa0, 0x17, 0xc7, 0x18, 0xa6, 0x17,
- 0xc6, 0xc9, 0xa5, 0x17, 0xc6, 0xc1, 0xa4, 0x17, 0xc6, 0xb9, 0xa3, 0x17,
- 0xc6, 0xb1, 0xa2, 0x17, 0xc6, 0xa9, 0xa1, 0x17, 0xc6, 0xa1, 0xa0, 0x17,
- 0xc6, 0x99, 0x9f, 0x17, 0xc6, 0x91, 0x9e, 0x17, 0xc6, 0x89, 0x9d, 0x17,
- 0xc6, 0x81, 0x83, 0x17, 0xc6, 0xd1, 0x84, 0x17, 0xc6, 0xd9, 0x85, 0x17,
- 0xc6, 0xe1, 0x86, 0x17, 0xc6, 0xe9, 0x87, 0x17, 0xc6, 0xf1, 0x88, 0x17,
- 0xc6, 0xf8, 0x88, 0x17, 0xc6, 0x79, 0x87, 0x17, 0xc6, 0x71, 0x86, 0x17,
- 0xc6, 0x69, 0x85, 0x17, 0xc6, 0x61, 0x84, 0x17, 0xc6, 0x59, 0x83, 0x17,
- 0xc6, 0x51, 0xa6, 0x17, 0xc6, 0x49, 0xa5, 0x17, 0xc6, 0x41, 0xa4, 0x17,
- 0xc6, 0x39, 0xa3, 0x17, 0xc6, 0x31, 0xa2, 0x17, 0xc6, 0x29, 0xa1, 0x17,
- 0xc6, 0x21, 0xa0, 0x17, 0xc6, 0x19, 0x9f, 0x17, 0xc6, 0x11, 0x9e, 0x17,
- 0xc6, 0x09, 0x9d, 0x17, 0xc6, 0x00, 0x88, 0x17, 0xc1, 0xf9, 0x87, 0x17,
- 0xc1, 0xf1, 0x86, 0x17, 0xc1, 0xe9, 0x85, 0x17, 0xc1, 0xe1, 0x84, 0x17,
- 0xc1, 0xd9, 0x83, 0x17, 0xc1, 0xd1, 0xa6, 0x17, 0xc1, 0xc9, 0xa5, 0x17,
- 0xc1, 0xc1, 0xa4, 0x17, 0xc1, 0xb9, 0xa3, 0x17, 0xc1, 0xb1, 0xa2, 0x17,
- 0xc1, 0xa9, 0xa1, 0x17, 0xc1, 0xa1, 0xa0, 0x17, 0xc1, 0x99, 0x9f, 0x17,
- 0xc1, 0x91, 0x9e, 0x17, 0xc1, 0x89, 0x9d, 0x17, 0xc1, 0x80, 0x88, 0x17,
- 0xc0, 0xf9, 0x87, 0x17, 0xc0, 0xf1, 0x86, 0x17, 0xc0, 0xe9, 0x85, 0x17,
- 0xc0, 0xe1, 0x84, 0x17, 0xc0, 0xd9, 0x83, 0x17, 0xc0, 0xd1, 0xa6, 0x17,
- 0xc0, 0xc9, 0xa5, 0x17, 0xc0, 0xc1, 0xa4, 0x17, 0xc0, 0xb9, 0xa3, 0x17,
- 0xc0, 0xb1, 0xa2, 0x17, 0xc0, 0xa9, 0xa1, 0x17, 0xc0, 0xa1, 0xa0, 0x17,
- 0xc0, 0x99, 0x9f, 0x17, 0xc0, 0x91, 0x9e, 0x17, 0xc0, 0x89, 0x9d, 0x17,
- 0xc0, 0x80, 0x86, 0x17, 0xd0, 0xe9, 0x85, 0x17, 0xd0, 0xe1, 0x84, 0x17,
- 0xd0, 0xd9, 0x83, 0x17, 0xd0, 0xd1, 0xa6, 0x17, 0xd0, 0xc9, 0xa5, 0x17,
- 0xd0, 0xc1, 0xa4, 0x17, 0xd0, 0xb9, 0xa3, 0x17, 0xd0, 0xb1, 0xa2, 0x17,
- 0xd0, 0xa9, 0xa1, 0x17, 0xd0, 0xa1, 0xa0, 0x17, 0xd0, 0x99, 0x9f, 0x17,
- 0xd0, 0x91, 0x9e, 0x17, 0xd0, 0x89, 0x9d, 0x17, 0xd0, 0x80, 0x88, 0x17,
- 0xd0, 0x79, 0x87, 0x17, 0xd0, 0x71, 0x86, 0x17, 0xd0, 0x69, 0x85, 0x17,
- 0xd0, 0x61, 0x84, 0x17, 0xd0, 0x59, 0x83, 0x17, 0xd0, 0x51, 0xa6, 0x17,
- 0xd0, 0x49, 0xa5, 0x17, 0xd0, 0x41, 0xa4, 0x17, 0xd0, 0x39, 0xa3, 0x17,
- 0xd0, 0x31, 0xa2, 0x17, 0xd0, 0x29, 0xa1, 0x17, 0xd0, 0x21, 0xa0, 0x17,
- 0xd0, 0x19, 0x9f, 0x17, 0xd0, 0x11, 0x9e, 0x17, 0xd0, 0x09, 0x9d, 0x17,
- 0xd0, 0x00, 0xa6, 0x07, 0xd6, 0xc9, 0xa5, 0x07, 0xd6, 0xc1, 0xa4, 0x07,
- 0xd6, 0xb9, 0xa3, 0x07, 0xd6, 0xb1, 0xa2, 0x07, 0xd6, 0xa9, 0xa1, 0x07,
- 0xd6, 0xa1, 0xa0, 0x07, 0xd6, 0x99, 0x9f, 0x07, 0xd6, 0x91, 0x9e, 0x07,
- 0xd6, 0x89, 0x9d, 0x07, 0xd6, 0x80, 0x88, 0x07, 0xd6, 0x79, 0x87, 0x07,
- 0xd6, 0x71, 0x86, 0x07, 0xd6, 0x69, 0x85, 0x07, 0xd6, 0x61, 0x84, 0x07,
- 0xd6, 0x59, 0x83, 0x07, 0xd6, 0x51, 0xa6, 0x07, 0xd6, 0x49, 0xa5, 0x07,
- 0xd6, 0x41, 0xa4, 0x07, 0xd6, 0x39, 0xa3, 0x07, 0xd6, 0x31, 0xa2, 0x07,
- 0xd6, 0x29, 0xa1, 0x07, 0xd6, 0x21, 0xa0, 0x07, 0xd6, 0x19, 0x9f, 0x07,
- 0xd6, 0x11, 0x9e, 0x07, 0xd6, 0x09, 0x9d, 0x07, 0xd6, 0x00, 0x88, 0x07,
- 0xd5, 0xf9, 0x87, 0x07, 0xd5, 0xf1, 0x86, 0x07, 0xd5, 0xe9, 0x85, 0x07,
- 0xd5, 0xe1, 0x84, 0x07, 0xd5, 0xd9, 0x83, 0x07, 0xd5, 0xd1, 0xa6, 0x07,
- 0xd5, 0xc9, 0xa5, 0x07, 0xd5, 0xc1, 0xa4, 0x07, 0xd5, 0xb9, 0xa3, 0x07,
- 0xd5, 0xb1, 0xa2, 0x07, 0xd5, 0xa9, 0xa1, 0x07, 0xd5, 0xa1, 0xa0, 0x07,
- 0xd5, 0x99, 0x9f, 0x07, 0xd5, 0x91, 0x9e, 0x07, 0xd5, 0x89, 0x9d, 0x07,
- 0xd5, 0x80, 0x88, 0x07, 0xd5, 0x79, 0x87, 0x07, 0xd5, 0x71, 0x86, 0x07,
- 0xd5, 0x69, 0x85, 0x07, 0xd5, 0x61, 0x84, 0x07, 0xd5, 0x59, 0x83, 0x07,
- 0xd5, 0x51, 0xa6, 0x07, 0xd5, 0x49, 0xa5, 0x07, 0xd5, 0x41, 0xa4, 0x07,
- 0xd5, 0x39, 0xa3, 0x07, 0xd5, 0x31, 0xa2, 0x07, 0xd5, 0x29, 0xa1, 0x07,
- 0xd5, 0x21, 0xa0, 0x07, 0xd5, 0x19, 0x9f, 0x07, 0xd5, 0x11, 0x9e, 0x07,
- 0xd5, 0x09, 0x9d, 0x07, 0xd5, 0x00, 0x88, 0x07, 0xd4, 0xf9, 0x87, 0x07,
- 0xd4, 0xf1, 0x86, 0x07, 0xd4, 0xe9, 0x85, 0x07, 0xd4, 0xe1, 0x84, 0x07,
- 0xd4, 0xd9, 0x83, 0x07, 0xd4, 0xd1, 0xa6, 0x07, 0xd4, 0xc9, 0xa5, 0x07,
- 0xd4, 0xc1, 0xa4, 0x07, 0xd4, 0xb9, 0xa3, 0x07, 0xd4, 0xb1, 0xa2, 0x07,
- 0xd4, 0xa9, 0xa1, 0x07, 0xd4, 0xa1, 0xa0, 0x07, 0xd4, 0x99, 0x9f, 0x07,
- 0xd4, 0x91, 0x9e, 0x07, 0xd4, 0x89, 0x9d, 0x07, 0xd4, 0x80, 0x88, 0x07,
- 0xd4, 0x79, 0x87, 0x07, 0xd4, 0x71, 0x86, 0x07, 0xd4, 0x69, 0x85, 0x07,
- 0xd4, 0x61, 0x84, 0x07, 0xd4, 0x59, 0x83, 0x07, 0xd4, 0x51, 0xa6, 0x07,
- 0xd4, 0x49, 0xa5, 0x07, 0xd4, 0x41, 0xa4, 0x07, 0xd4, 0x39, 0xa3, 0x07,
- 0xd4, 0x31, 0xa2, 0x07, 0xd4, 0x29, 0xa1, 0x07, 0xd4, 0x21, 0xa0, 0x07,
- 0xd4, 0x19, 0x9f, 0x07, 0xd4, 0x11, 0x9e, 0x07, 0xd4, 0x09, 0x9d, 0x07,
- 0xd4, 0x00, 0x86, 0x07, 0xd3, 0xe9, 0x85, 0x07, 0xd3, 0xe1, 0x84, 0x07,
- 0xd3, 0xd9, 0x83, 0x07, 0xd3, 0xd1, 0xa6, 0x07, 0xd3, 0xc9, 0xa5, 0x07,
- 0xd3, 0xc1, 0xa4, 0x07, 0xd3, 0xb9, 0xa3, 0x07, 0xd3, 0xb1, 0xa2, 0x07,
- 0xd3, 0xa9, 0xa1, 0x07, 0xd3, 0xa1, 0xa0, 0x07, 0xd3, 0x99, 0x9f, 0x07,
- 0xd3, 0x91, 0x9e, 0x07, 0xd3, 0x89, 0x9d, 0x07, 0xd3, 0x81, 0x87, 0x07,
- 0xd3, 0xf1, 0x88, 0x07, 0xd3, 0xf8, 0x86, 0x07, 0xd3, 0x69, 0x85, 0x07,
- 0xd3, 0x61, 0x84, 0x07, 0xd3, 0x59, 0x83, 0x07, 0xd3, 0x51, 0xa6, 0x07,
- 0xd3, 0x49, 0xa5, 0x07, 0xd3, 0x41, 0xa4, 0x07, 0xd3, 0x39, 0xa3, 0x07,
- 0xd3, 0x31, 0xa2, 0x07, 0xd3, 0x29, 0xa1, 0x07, 0xd3, 0x21, 0xa0, 0x07,
- 0xd3, 0x19, 0x9f, 0x07, 0xd3, 0x11, 0x9e, 0x07, 0xd3, 0x09, 0x9d, 0x07,
- 0xd3, 0x00, 0x88, 0x07, 0xd2, 0xf9, 0x87, 0x07, 0xd2, 0xf1, 0x86, 0x07,
- 0xd2, 0xe9, 0x85, 0x07, 0xd2, 0xe1, 0x84, 0x07, 0xd2, 0xd9, 0x83, 0x07,
- 0xd2, 0xd1, 0xa6, 0x07, 0xd2, 0xc9, 0xa5, 0x07, 0xd2, 0xc1, 0xa4, 0x07,
- 0xd2, 0xb9, 0xa3, 0x07, 0xd2, 0xb1, 0xa2, 0x07, 0xd2, 0xa9, 0xa1, 0x07,
- 0xd2, 0xa1, 0xa0, 0x07, 0xd2, 0x99, 0x9f, 0x07, 0xd2, 0x91, 0x9e, 0x07,
- 0xd2, 0x89, 0x9d, 0x07, 0xd2, 0x80, 0x88, 0x07, 0xd2, 0x79, 0x87, 0x07,
- 0xd2, 0x71, 0x86, 0x07, 0xd2, 0x69, 0x85, 0x07, 0xd2, 0x61, 0x84, 0x07,
- 0xd2, 0x59, 0x83, 0x07, 0xd2, 0x51, 0xa6, 0x07, 0xd2, 0x49, 0xa5, 0x07,
- 0xd2, 0x41, 0xa4, 0x07, 0xd2, 0x39, 0xa3, 0x07, 0xd2, 0x31, 0xa2, 0x07,
- 0xd2, 0x29, 0xa1, 0x07, 0xd2, 0x21, 0xa0, 0x07, 0xd2, 0x19, 0x9f, 0x07,
- 0xd2, 0x11, 0x9d, 0x07, 0xd2, 0x01, 0x9e, 0x07, 0xd2, 0x08, 0x88, 0x07,
- 0xd1, 0xf9, 0x87, 0x07, 0xd1, 0xf1, 0x86, 0x07, 0xd1, 0xe9, 0x85, 0x07,
- 0xd1, 0xe1, 0x84, 0x07, 0xd1, 0xd9, 0x83, 0x07, 0xd1, 0xd1, 0xa6, 0x07,
- 0xd1, 0xc9, 0xa5, 0x07, 0xd1, 0xc1, 0xa4, 0x07, 0xd1, 0xb9, 0xa3, 0x07,
- 0xd1, 0xb1, 0xa2, 0x07, 0xd1, 0xa9, 0xa1, 0x07, 0xd1, 0xa1, 0xa0, 0x07,
- 0xd1, 0x99, 0x9f, 0x07, 0xd1, 0x91, 0x9e, 0x07, 0xd1, 0x89, 0x9d, 0x07,
- 0xd1, 0x80, 0x88, 0x07, 0xd1, 0x79, 0x87, 0x07, 0xd1, 0x71, 0x86, 0x07,
- 0xd1, 0x69, 0x85, 0x07, 0xd1, 0x61, 0x84, 0x07, 0xd1, 0x59, 0x83, 0x07,
- 0xd1, 0x51, 0xa6, 0x07, 0xd1, 0x49, 0xa5, 0x07, 0xd1, 0x41, 0xa4, 0x07,
- 0xd1, 0x39, 0xa3, 0x07, 0xd1, 0x31, 0xa2, 0x07, 0xd1, 0x29, 0xa1, 0x07,
- 0xd1, 0x21, 0xa0, 0x07, 0xd1, 0x19, 0x9f, 0x07, 0xd1, 0x11, 0x9e, 0x07,
- 0xd1, 0x09, 0x9d, 0x07, 0xd1, 0x00, 0x88, 0x07, 0xd0, 0xf9, 0x87, 0x07,
- 0xd0, 0xf1, 0x86, 0x07, 0xd0, 0xe9, 0x85, 0x07, 0xd0, 0xe1, 0x84, 0x07,
- 0xd0, 0xd9, 0x83, 0x07, 0xd0, 0xd1, 0xa6, 0x07, 0xd0, 0xc9, 0xa5, 0x07,
- 0xd0, 0xc1, 0xa4, 0x07, 0xd0, 0xb9, 0xa3, 0x07, 0xd0, 0xb1, 0xa2, 0x07,
- 0xd0, 0xa9, 0xa1, 0x07, 0xd0, 0xa1, 0xa0, 0x07, 0xd0, 0x99, 0x9f, 0x07,
- 0xd0, 0x91, 0x9e, 0x07, 0xd0, 0x89, 0x9d, 0x07, 0xd0, 0x80, 0x88, 0x07,
- 0xd0, 0x79, 0x87, 0x07, 0xd0, 0x71, 0x86, 0x07, 0xd0, 0x69, 0x85, 0x07,
- 0xd0, 0x61, 0x84, 0x07, 0xd0, 0x59, 0x83, 0x07, 0xd0, 0x51, 0xa6, 0x07,
- 0xd0, 0x49, 0xa5, 0x07, 0xd0, 0x41, 0xa4, 0x07, 0xd0, 0x39, 0xa3, 0x07,
- 0xd0, 0x31, 0xa2, 0x07, 0xd0, 0x29, 0xa1, 0x07, 0xd0, 0x21, 0xa0, 0x07,
- 0xd0, 0x19, 0x9f, 0x07, 0xd0, 0x11, 0x9e, 0x07, 0xd0, 0x09, 0x9d, 0x07,
- 0xd0, 0x00, 0x88, 0x07, 0xcf, 0xf9, 0x87, 0x07, 0xcf, 0xf1, 0x86, 0x07,
- 0xcf, 0xe9, 0x85, 0x07, 0xcf, 0xe1, 0x84, 0x07, 0xcf, 0xd9, 0x83, 0x07,
- 0xcf, 0xd1, 0xa6, 0x07, 0xcf, 0xc9, 0xa5, 0x07, 0xcf, 0xc1, 0xa4, 0x07,
- 0xcf, 0xb9, 0xa3, 0x07, 0xcf, 0xb1, 0xa2, 0x07, 0xcf, 0xa9, 0xa1, 0x07,
- 0xcf, 0xa1, 0xa0, 0x07, 0xcf, 0x99, 0x9f, 0x07, 0xcf, 0x91, 0x9e, 0x07,
- 0xcf, 0x89, 0x9d, 0x07, 0xcf, 0x80, 0x88, 0x07, 0xcf, 0x79, 0x87, 0x07,
- 0xcf, 0x71, 0x86, 0x07, 0xcf, 0x69, 0x85, 0x07, 0xcf, 0x61, 0x84, 0x07,
- 0xcf, 0x59, 0x83, 0x07, 0xcf, 0x51, 0xa6, 0x07, 0xcf, 0x49, 0xa5, 0x07,
- 0xcf, 0x41, 0xa4, 0x07, 0xcf, 0x39, 0xa3, 0x07, 0xcf, 0x31, 0xa2, 0x07,
- 0xcf, 0x29, 0xa1, 0x07, 0xcf, 0x21, 0xa0, 0x07, 0xcf, 0x19, 0x9f, 0x07,
- 0xcf, 0x11, 0x9e, 0x07, 0xcf, 0x09, 0x9d, 0x07, 0xcf, 0x00, 0x88, 0x07,
- 0xce, 0xf9, 0x87, 0x07, 0xce, 0xf1, 0x86, 0x07, 0xce, 0xe9, 0x85, 0x07,
- 0xce, 0xe1, 0x84, 0x07, 0xce, 0xd9, 0x83, 0x07, 0xce, 0xd1, 0xa6, 0x07,
- 0xce, 0xc9, 0xa5, 0x07, 0xce, 0xc1, 0xa4, 0x07, 0xce, 0xb9, 0xa3, 0x07,
- 0xce, 0xb1, 0xa2, 0x07, 0xce, 0xa9, 0xa1, 0x07, 0xce, 0xa1, 0xa0, 0x07,
- 0xce, 0x99, 0x9f, 0x07, 0xce, 0x91, 0x9e, 0x07, 0xce, 0x89, 0x9d, 0x07,
- 0xce, 0x80, 0x88, 0x07, 0xce, 0x79, 0x87, 0x07, 0xce, 0x71, 0x86, 0x07,
- 0xce, 0x69, 0x85, 0x07, 0xce, 0x61, 0x84, 0x07, 0xce, 0x59, 0x83, 0x07,
- 0xce, 0x51, 0xa6, 0x07, 0xce, 0x49, 0xa5, 0x07, 0xce, 0x41, 0xa4, 0x07,
- 0xce, 0x39, 0xa3, 0x07, 0xce, 0x31, 0xa2, 0x07, 0xce, 0x29, 0xa1, 0x07,
- 0xce, 0x21, 0xa0, 0x07, 0xce, 0x19, 0x9f, 0x07, 0xce, 0x11, 0x9e, 0x07,
- 0xce, 0x09, 0x9d, 0x07, 0xce, 0x00, 0x88, 0x07, 0xcd, 0xf9, 0x87, 0x07,
- 0xcd, 0xf1, 0x86, 0x07, 0xcd, 0xe9, 0x85, 0x07, 0xcd, 0xe1, 0x84, 0x07,
- 0xcd, 0xd9, 0x83, 0x07, 0xcd, 0xd1, 0xa6, 0x07, 0xcd, 0xc9, 0xa5, 0x07,
- 0xcd, 0xc1, 0xa4, 0x07, 0xcd, 0xb9, 0xa3, 0x07, 0xcd, 0xb1, 0xa2, 0x07,
- 0xcd, 0xa9, 0xa1, 0x07, 0xcd, 0xa1, 0xa0, 0x07, 0xcd, 0x99, 0x9f, 0x07,
- 0xcd, 0x91, 0x9e, 0x07, 0xcd, 0x89, 0x9d, 0x07, 0xcd, 0x80, 0x88, 0x07,
- 0xcd, 0x79, 0x87, 0x07, 0xcd, 0x71, 0x86, 0x07, 0xcd, 0x69, 0x85, 0x07,
- 0xcd, 0x61, 0x84, 0x07, 0xcd, 0x59, 0x83, 0x07, 0xcd, 0x51, 0xa6, 0x07,
- 0xcd, 0x49, 0xa5, 0x07, 0xcd, 0x41, 0xa4, 0x07, 0xcd, 0x39, 0xa3, 0x07,
- 0xcd, 0x31, 0xa2, 0x07, 0xcd, 0x29, 0xa1, 0x07, 0xcd, 0x21, 0xa0, 0x07,
- 0xcd, 0x19, 0x9f, 0x07, 0xcd, 0x11, 0x9e, 0x07, 0xcd, 0x09, 0x9d, 0x07,
- 0xcd, 0x00, 0x88, 0x07, 0xcc, 0xf9, 0x87, 0x07, 0xcc, 0xf1, 0x86, 0x07,
- 0xcc, 0xe9, 0x85, 0x07, 0xcc, 0xe1, 0x84, 0x07, 0xcc, 0xd9, 0x83, 0x07,
- 0xcc, 0xd1, 0xa6, 0x07, 0xcc, 0xc9, 0xa5, 0x07, 0xcc, 0xc1, 0xa4, 0x07,
- 0xcc, 0xb9, 0xa3, 0x07, 0xcc, 0xb1, 0xa2, 0x07, 0xcc, 0xa9, 0xa1, 0x07,
- 0xcc, 0xa1, 0xa0, 0x07, 0xcc, 0x99, 0x9f, 0x07, 0xcc, 0x91, 0x9e, 0x07,
- 0xcc, 0x89, 0x9d, 0x07, 0xcc, 0x80, 0x88, 0x07, 0xcc, 0x79, 0x87, 0x07,
- 0xcc, 0x71, 0x86, 0x07, 0xcc, 0x69, 0x85, 0x07, 0xcc, 0x61, 0x84, 0x07,
- 0xcc, 0x59, 0x83, 0x07, 0xcc, 0x51, 0xa6, 0x07, 0xcc, 0x49, 0xa5, 0x07,
- 0xcc, 0x41, 0xa4, 0x07, 0xcc, 0x39, 0xa3, 0x07, 0xcc, 0x31, 0xa2, 0x07,
- 0xcc, 0x29, 0xa1, 0x07, 0xcc, 0x21, 0xa0, 0x07, 0xcc, 0x19, 0x9f, 0x07,
- 0xcc, 0x11, 0x9e, 0x07, 0xcc, 0x09, 0x9d, 0x07, 0xcc, 0x00, 0x88, 0x07,
- 0xcb, 0xf9, 0x87, 0x07, 0xcb, 0xf1, 0x86, 0x07, 0xcb, 0xe9, 0x85, 0x07,
- 0xcb, 0xe1, 0x84, 0x07, 0xcb, 0xd9, 0x83, 0x07, 0xcb, 0xd1, 0xa6, 0x07,
- 0xcb, 0xc9, 0xa5, 0x07, 0xcb, 0xc1, 0xa4, 0x07, 0xcb, 0xb9, 0xa3, 0x07,
- 0xcb, 0xb1, 0xa2, 0x07, 0xcb, 0xa9, 0xa1, 0x07, 0xcb, 0xa1, 0xa0, 0x07,
- 0xcb, 0x99, 0x9f, 0x07, 0xcb, 0x91, 0x9e, 0x07, 0xcb, 0x89, 0x9d, 0x07,
- 0xcb, 0x80, 0x88, 0x07, 0xcb, 0x79, 0x87, 0x07, 0xcb, 0x71, 0x86, 0x07,
- 0xcb, 0x69, 0x85, 0x07, 0xcb, 0x61, 0x84, 0x07, 0xcb, 0x59, 0x83, 0x07,
- 0xcb, 0x51, 0xa6, 0x07, 0xcb, 0x49, 0xa5, 0x07, 0xcb, 0x41, 0xa4, 0x07,
- 0xcb, 0x39, 0xa3, 0x07, 0xcb, 0x31, 0xa2, 0x07, 0xcb, 0x29, 0xa1, 0x07,
- 0xcb, 0x21, 0xa0, 0x07, 0xcb, 0x19, 0x9f, 0x07, 0xcb, 0x11, 0x9e, 0x07,
- 0xcb, 0x09, 0x9d, 0x07, 0xcb, 0x00, 0x88, 0x07, 0xca, 0xf9, 0x87, 0x07,
- 0xca, 0xf1, 0x86, 0x07, 0xca, 0xe9, 0x85, 0x07, 0xca, 0xe1, 0x84, 0x07,
- 0xca, 0xd9, 0x83, 0x07, 0xca, 0xd1, 0xa6, 0x07, 0xca, 0xc9, 0xa5, 0x07,
- 0xca, 0xc1, 0xa4, 0x07, 0xca, 0xb9, 0xa3, 0x07, 0xca, 0xb1, 0xa2, 0x07,
- 0xca, 0xa9, 0xa1, 0x07, 0xca, 0xa1, 0xa0, 0x07, 0xca, 0x99, 0x9f, 0x07,
- 0xca, 0x91, 0x9e, 0x07, 0xca, 0x89, 0x9d, 0x07, 0xca, 0x80, 0x88, 0x07,
- 0xca, 0x79, 0x87, 0x07, 0xca, 0x71, 0x86, 0x07, 0xca, 0x69, 0x85, 0x07,
- 0xca, 0x61, 0x84, 0x07, 0xca, 0x59, 0x83, 0x07, 0xca, 0x51, 0xa6, 0x07,
- 0xca, 0x49, 0xa5, 0x07, 0xca, 0x41, 0xa4, 0x07, 0xca, 0x39, 0xa3, 0x07,
- 0xca, 0x31, 0xa2, 0x07, 0xca, 0x29, 0xa1, 0x07, 0xca, 0x21, 0xa0, 0x07,
- 0xca, 0x19, 0x9f, 0x07, 0xca, 0x11, 0x9e, 0x07, 0xca, 0x09, 0x9d, 0x07,
- 0xca, 0x00, 0x88, 0x07, 0xc9, 0xf9, 0x87, 0x07, 0xc9, 0xf1, 0x86, 0x07,
- 0xc9, 0xe9, 0x85, 0x07, 0xc9, 0xe1, 0x84, 0x07, 0xc9, 0xd9, 0x83, 0x07,
- 0xc9, 0xd1, 0xa6, 0x07, 0xc9, 0xc9, 0xa5, 0x07, 0xc9, 0xc1, 0xa4, 0x07,
- 0xc9, 0xb9, 0xa3, 0x07, 0xc9, 0xb1, 0xa2, 0x07, 0xc9, 0xa9, 0xa1, 0x07,
- 0xc9, 0xa1, 0xa0, 0x07, 0xc9, 0x99, 0x9d, 0x07, 0xc9, 0x81, 0x9e, 0x07,
- 0xc9, 0x89, 0x9f, 0x07, 0xc9, 0x90, 0xa4, 0x07, 0xc9, 0x39, 0xa3, 0x07,
- 0xc9, 0x31, 0xa2, 0x07, 0xc9, 0x29, 0xa1, 0x07, 0xc9, 0x21, 0xa0, 0x07,
- 0xc9, 0x19, 0x9f, 0x07, 0xc9, 0x11, 0x9d, 0x07, 0xc9, 0x01, 0x9e, 0x07,
- 0xc9, 0x09, 0xa5, 0x07, 0xc9, 0x41, 0xa6, 0x07, 0xc9, 0x49, 0x83, 0x07,
- 0xc9, 0x51, 0x84, 0x07, 0xc9, 0x59, 0x85, 0x07, 0xc9, 0x61, 0x86, 0x07,
- 0xc9, 0x69, 0x87, 0x07, 0xc9, 0x71, 0x88, 0x07, 0xc9, 0x78, 0x86, 0x07,
- 0xc8, 0xe9, 0x85, 0x07, 0xc8, 0xe1, 0x84, 0x07, 0xc8, 0xd9, 0x83, 0x07,
- 0xc8, 0xd1, 0xa6, 0x07, 0xc8, 0xc9, 0xa5, 0x07, 0xc8, 0xc1, 0xa4, 0x07,
- 0xc8, 0xb9, 0xa3, 0x07, 0xc8, 0xb1, 0xa2, 0x07, 0xc8, 0xa9, 0xa1, 0x07,
- 0xc8, 0xa1, 0xa0, 0x07, 0xc8, 0x99, 0x9f, 0x07, 0xc8, 0x91, 0x9e, 0x07,
- 0xc8, 0x89, 0x9d, 0x07, 0xc8, 0x81, 0x87, 0x07, 0xc8, 0xf1, 0x88, 0x07,
- 0xc8, 0xf8, 0x88, 0x07, 0xc8, 0x79, 0x87, 0x07, 0xc8, 0x71, 0x86, 0x07,
- 0xc8, 0x69, 0x85, 0x07, 0xc8, 0x61, 0x84, 0x07, 0xc8, 0x59, 0x83, 0x07,
- 0xc8, 0x51, 0xa6, 0x07, 0xc8, 0x49, 0xa5, 0x07, 0xc8, 0x41, 0xa4, 0x07,
- 0xc8, 0x39, 0xa3, 0x07, 0xc8, 0x31, 0xa2, 0x07, 0xc8, 0x29, 0xa1, 0x07,
- 0xc8, 0x21, 0xa0, 0x07, 0xc8, 0x19, 0x9d, 0x07, 0xc8, 0x01, 0x9e, 0x07,
- 0xc8, 0x09, 0x9f, 0x07, 0xc8, 0x10, 0xc3, 0xa4, 0xed, 0x01, 0x75, 0x81,
- 0xc2, 0x00, 0x4c, 0x01, 0x76, 0x29, 0xc5, 0x70, 0xfe, 0x01, 0x76, 0x41,
- 0xc4, 0x0a, 0x92, 0x01, 0x76, 0x49, 0xc3, 0x03, 0x25, 0x01, 0x77, 0x38,
- 0xc3, 0x1e, 0x5b, 0x01, 0x76, 0x81, 0xc3, 0x00, 0x9e, 0x01, 0x76, 0xa0,
- 0xc3, 0x02, 0xa8, 0x01, 0x76, 0x99, 0xc3, 0x48, 0xc9, 0x01, 0x76, 0xd0,
- 0xcd, 0x7f, 0xab, 0x01, 0x76, 0xc9, 0xc4, 0xe5, 0x77, 0x01, 0x77, 0x71,
- 0xc5, 0xd6, 0xe7, 0x01, 0x77, 0x98, 0xc2, 0x02, 0x55, 0x01, 0x76, 0xe1,
- 0xc3, 0x05, 0x1a, 0x01, 0x77, 0x29, 0xc3, 0x23, 0x4a, 0x01, 0x77, 0x50,
- 0xc2, 0x00, 0x9e, 0x01, 0x77, 0x01, 0xc3, 0x18, 0x84, 0x01, 0x77, 0x60,
- 0xc3, 0x01, 0xb4, 0x01, 0x74, 0x11, 0x16, 0x42, 0x1e, 0x15, 0xc3, 0x01,
- 0xb4, 0x01, 0x74, 0xa1, 0xc3, 0x01, 0x59, 0x01, 0x74, 0xa8, 0x0a, 0xc2,
- 0x1e, 0x21, 0x19, 0xc2, 0x1e, 0x2d, 0xc6, 0xcb, 0x58, 0x01, 0x77, 0x48,
- 0xc2, 0x01, 0x47, 0x01, 0x74, 0x79, 0xc4, 0x04, 0x5e, 0x01, 0x74, 0x80,
- 0xc3, 0x01, 0xb4, 0x01, 0x74, 0xb1, 0xc3, 0x01, 0x59, 0x01, 0x74, 0xb8,
- 0xc3, 0x01, 0xb4, 0x01, 0x76, 0xa9, 0xc3, 0x01, 0x59, 0x01, 0x76, 0xb0,
- 0xc3, 0x01, 0xb4, 0x01, 0x75, 0x09, 0xc3, 0x01, 0x59, 0x01, 0x75, 0x10,
- 0xc3, 0x01, 0xb4, 0x01, 0x76, 0x69, 0xc3, 0x01, 0x59, 0x01, 0x76, 0x70,
- 0xc4, 0xe5, 0x77, 0x01, 0x77, 0x69, 0xc5, 0xd6, 0xe7, 0x01, 0x77, 0x90,
- 0xc2, 0x01, 0x47, 0x01, 0x76, 0xf1, 0xc4, 0x04, 0x5e, 0x01, 0x76, 0xf8,
- 0xc2, 0x01, 0x47, 0x01, 0x75, 0xf9, 0xc4, 0x04, 0x5e, 0x01, 0x76, 0x00,
- 0x92, 0x01, 0x8e, 0x59, 0x9c, 0x01, 0x8e, 0x72, 0x02, 0x1e, 0x39, 0x89,
- 0x01, 0x8e, 0x40, 0x09, 0xc2, 0x1e, 0x3d, 0x98, 0x05, 0x5b, 0xa9, 0x97,
- 0x05, 0x5b, 0xa1, 0x91, 0x05, 0x5b, 0x99, 0x8b, 0x05, 0x5b, 0x91, 0x87,
- 0x05, 0x5b, 0x89, 0x83, 0x05, 0x5b, 0x81, 0x1b, 0xc2, 0x1e, 0x55, 0x19,
- 0xc2, 0x1e, 0x6d, 0x16, 0xc2, 0x1e, 0x85, 0x10, 0xc2, 0x1e, 0x99, 0x0a,
- 0xc2, 0x1e, 0xb4, 0x0f, 0xc2, 0x1e, 0xd2, 0x0e, 0xc2, 0x1e, 0xea, 0xc2,
- 0x04, 0x2b, 0x05, 0x5b, 0xb9, 0x42, 0x00, 0x90, 0xc2, 0x1f, 0x02, 0x95,
- 0x05, 0x5c, 0xeb, 0x02, 0x1f, 0x1a, 0x06, 0x42, 0x1f, 0x32, 0x83, 0x00,
- 0x9d, 0x01, 0x87, 0x00, 0x9d, 0x09, 0x8b, 0x00, 0x9d, 0x11, 0x91, 0x00,
- 0x9d, 0x19, 0x97, 0x00, 0x9d, 0x21, 0x98, 0x00, 0x9d, 0x29, 0x09, 0xc2,
- 0x1f, 0x50, 0xc2, 0x04, 0x2b, 0x00, 0x9d, 0x39, 0x0a, 0xc2, 0x1f, 0x68,
- 0x0e, 0xc2, 0x1f, 0x86, 0x0f, 0xc2, 0x1f, 0x9e, 0x10, 0xc2, 0x1f, 0xb6,
- 0x42, 0x00, 0x90, 0xc2, 0x1f, 0xd1, 0x95, 0x00, 0x9e, 0x6b, 0x02, 0x1f,
- 0xe9, 0x06, 0xc2, 0x20, 0x01, 0x16, 0xc2, 0x20, 0x1f, 0x19, 0xc2, 0x20,
- 0x33, 0x1b, 0x42, 0x20, 0x4b, 0x00, 0x42, 0x20, 0x63, 0xcd, 0x7e, 0x66,
- 0x0f, 0xa5, 0xc8, 0xc3, 0x3b, 0xc7, 0x08, 0x8a, 0x21, 0xc2, 0x14, 0x40,
- 0x08, 0x89, 0x18, 0xc2, 0x14, 0x40, 0x08, 0x89, 0x09, 0xc3, 0x4d, 0xc8,
- 0x08, 0x89, 0x00, 0xc3, 0x3b, 0xc7, 0x08, 0x88, 0xf1, 0xc2, 0x14, 0x40,
- 0x08, 0x88, 0xe8, 0xc3, 0x3b, 0xc7, 0x08, 0x88, 0xe1, 0xc2, 0x14, 0x40,
- 0x08, 0x88, 0xd8, 0xc2, 0x14, 0x40, 0x08, 0x88, 0xd1, 0xc3, 0x4b, 0xb6,
- 0x08, 0x88, 0xa9, 0xc3, 0x4d, 0xc8, 0x08, 0x88, 0x81, 0xc3, 0x57, 0x5c,
- 0x08, 0x88, 0x58, 0xc3, 0x3b, 0xc7, 0x08, 0x88, 0xc9, 0xc2, 0x14, 0x40,
- 0x08, 0x88, 0xc1, 0x06, 0x42, 0x20, 0x6f, 0xc3, 0x3b, 0xc7, 0x08, 0x88,
- 0xb9, 0xc2, 0x14, 0x40, 0x08, 0x88, 0xb1, 0x16, 0x42, 0x20, 0x7b, 0xc3,
- 0x3b, 0xc7, 0x08, 0x88, 0x79, 0xc2, 0x14, 0x40, 0x08, 0x88, 0x70, 0xc3,
- 0x3b, 0xc7, 0x08, 0x88, 0x69, 0xc2, 0x14, 0x40, 0x08, 0x88, 0x60, 0xc3,
- 0x3b, 0xc7, 0x08, 0x88, 0x51, 0xc2, 0x14, 0x40, 0x08, 0x88, 0x48, 0xc3,
- 0x3b, 0xc7, 0x08, 0x88, 0x41, 0xc2, 0x14, 0x40, 0x08, 0x88, 0x38, 0x87,
- 0x08, 0x89, 0x63, 0x02, 0x20, 0x87, 0x83, 0x08, 0x89, 0x3b, 0x02, 0x20,
- 0x8b, 0x91, 0x08, 0x89, 0x73, 0x02, 0x20, 0x97, 0x97, 0x08, 0x89, 0x53,
- 0x02, 0x20, 0x9b, 0x8b, 0x08, 0x89, 0x42, 0x02, 0x20, 0x9f, 0xc4, 0x22,
- 0x71, 0x08, 0x89, 0xf9, 0xc5, 0x01, 0xdb, 0x08, 0x89, 0xf1, 0x15, 0xc2,
- 0x20, 0xa3, 0x08, 0xc2, 0x20, 0xaf, 0x16, 0xc2, 0x20, 0xbb, 0xc3, 0x01,
- 0xb4, 0x08, 0x89, 0xb9, 0xc4, 0x15, 0xd3, 0x08, 0x89, 0xb0, 0xc7, 0x44,
- 0x79, 0x08, 0x88, 0x11, 0xc8, 0x11, 0x40, 0x08, 0x88, 0x09, 0xcb, 0x1e,
- 0x17, 0x08, 0x88, 0x00, 0x8a, 0x05, 0x52, 0x69, 0x8f, 0x05, 0x52, 0x61,
- 0xc2, 0x00, 0x35, 0x05, 0x52, 0x18, 0x87, 0x05, 0x51, 0x90, 0x97, 0x05,
- 0x51, 0x89, 0x8b, 0x05, 0x51, 0x81, 0x83, 0x05, 0x51, 0x48, 0x87, 0x05,
- 0x51, 0x70, 0x8b, 0x05, 0x51, 0x58, 0x83, 0x05, 0x51, 0x39, 0xc2, 0x0c,
- 0x65, 0x05, 0x51, 0x30, 0x09, 0xc2, 0x20, 0xc7, 0x83, 0x05, 0x50, 0xc1,
- 0xc2, 0x0f, 0x4d, 0x05, 0x50, 0xb9, 0x0a, 0x42, 0x20, 0xd1, 0xc2, 0x00,
- 0xa4, 0x05, 0x50, 0x49, 0x83, 0x05, 0x50, 0x40, 0xc2, 0x00, 0xa4, 0x05,
- 0x50, 0x39, 0x83, 0x05, 0x50, 0x30, 0x8b, 0x05, 0x50, 0x20, 0xc2, 0x05,
- 0x88, 0x05, 0x52, 0x59, 0x8e, 0x05, 0x52, 0x51, 0x94, 0x05, 0x52, 0x49,
- 0x9b, 0x05, 0x52, 0x41, 0x92, 0x05, 0x52, 0x39, 0x90, 0x05, 0x52, 0x33,
- 0x02, 0x20, 0xe1, 0x96, 0x05, 0x52, 0x29, 0xc2, 0x11, 0xd4, 0x05, 0x52,
- 0x21, 0x89, 0x05, 0x52, 0x09, 0x8d, 0x05, 0x52, 0x00, 0xc2, 0x00, 0xde,
- 0x05, 0x51, 0x09, 0x83, 0x05, 0x50, 0xe9, 0xc2, 0x00, 0xa4, 0x05, 0x50,
- 0xf0, 0x83, 0x05, 0x51, 0x01, 0xc2, 0x0f, 0x4d, 0x05, 0x50, 0xf8, 0xc2,
- 0x00, 0xa4, 0x05, 0x50, 0xe1, 0xc2, 0x03, 0xa4, 0x05, 0x50, 0xd9, 0x83,
- 0x05, 0x50, 0xd0, 0xc2, 0x0b, 0xc6, 0x05, 0x50, 0xc9, 0xc2, 0x00, 0xa4,
- 0x05, 0x50, 0xb1, 0x83, 0x05, 0x50, 0xa8, 0xc2, 0x00, 0xa4, 0x05, 0x50,
- 0xa1, 0x83, 0x05, 0x50, 0x98, 0xc2, 0x00, 0xa4, 0x05, 0x50, 0x79, 0x83,
- 0x05, 0x50, 0x70, 0xc2, 0x00, 0xa4, 0x05, 0x50, 0x69, 0x83, 0x05, 0x50,
- 0x60, 0xcb, 0x95, 0x0d, 0x05, 0x52, 0xf1, 0xc4, 0x1c, 0xd0, 0x05, 0x52,
- 0xe8, 0xc4, 0x18, 0x83, 0x05, 0x52, 0xb9, 0xc2, 0x26, 0x51, 0x05, 0x52,
- 0xb0, 0xc3, 0x0c, 0x5b, 0x05, 0x52, 0xa9, 0xc3, 0x06, 0x9e, 0x05, 0x52,
- 0xa0, 0xc4, 0x04, 0x5e, 0x05, 0x52, 0x99, 0xc2, 0x01, 0x47, 0x05, 0x52,
- 0x90, 0xc8, 0x0c, 0x4a, 0x08, 0x7e, 0x58, 0x19, 0xc2, 0x20, 0xe5, 0xc2,
- 0x00, 0x4d, 0x08, 0x7e, 0x49, 0xc4, 0x04, 0x5e, 0x08, 0x7e, 0x38, 0xc3,
- 0x0d, 0xd9, 0x08, 0x7e, 0x19, 0xca, 0xa8, 0xec, 0x08, 0x7d, 0x89, 0xc5,
- 0xd9, 0x76, 0x08, 0x7d, 0xf8, 0xc2, 0x00, 0x6e, 0x08, 0x7d, 0xc8, 0xc4,
- 0x37, 0x5c, 0x08, 0x7d, 0x81, 0xc3, 0x15, 0x1d, 0x08, 0x7e, 0x00, 0xc9,
- 0xad, 0x7e, 0x01, 0x31, 0x49, 0xc8, 0xc0, 0xb5, 0x01, 0x31, 0x40, 0xc5,
- 0xcf, 0x1a, 0x0f, 0xaa, 0x13, 0x02, 0x20, 0xef, 0x4a, 0x9c, 0x80, 0x42,
- 0x20, 0xf5, 0xe0, 0x07, 0x07, 0x0f, 0x8c, 0x50, 0x45, 0x04, 0x73, 0xc2,
- 0x21, 0x01, 0xcd, 0x32, 0x88, 0x00, 0x24, 0x49, 0x48, 0x0c, 0x4b, 0xc2,
- 0x21, 0x07, 0x12, 0xc2, 0x21, 0x13, 0xce, 0x71, 0xcf, 0x00, 0x24, 0x29,
- 0x16, 0xc2, 0x21, 0x23, 0x47, 0x02, 0x90, 0xc2, 0x21, 0x38, 0xc5, 0xda,
- 0x02, 0x05, 0x33, 0x79, 0xc6, 0x4b, 0x24, 0x05, 0x33, 0xe0, 0xcc, 0x85,
- 0x30, 0x01, 0x06, 0xc9, 0xcb, 0x09, 0xdc, 0x01, 0x06, 0xa8, 0xc6, 0x01,
- 0x61, 0x00, 0x19, 0x68, 0xc3, 0x02, 0xa7, 0x00, 0x18, 0x63, 0x02, 0x21,
- 0xa6, 0xc9, 0x1e, 0x19, 0x00, 0x18, 0x80, 0x44, 0x00, 0xac, 0xc2, 0x21,
- 0xac, 0xcf, 0x60, 0xdd, 0x07, 0xf1, 0x32, 0x02, 0x21, 0xbb, 0x08, 0xc2,
- 0x21, 0xc1, 0x8b, 0x0f, 0x00, 0x5b, 0x02, 0x21, 0xcd, 0x04, 0xc2, 0x21,
- 0xdf, 0x1b, 0xc2, 0x21, 0xeb, 0x15, 0xc2, 0x21, 0xfd, 0xc6, 0x76, 0x19,
- 0x0f, 0x00, 0xe9, 0x16, 0xc2, 0x22, 0x0d, 0xc4, 0xe1, 0x07, 0x0f, 0x00,
- 0xc1, 0xc3, 0xca, 0x3a, 0x0f, 0x00, 0xb1, 0xc5, 0xd7, 0xa0, 0x0f, 0x00,
- 0x99, 0xc6, 0xcf, 0x13, 0x0f, 0x00, 0x91, 0xc3, 0x01, 0xc5, 0x0f, 0x00,
- 0x89, 0xc5, 0xd9, 0xdf, 0x0f, 0x00, 0x81, 0xc7, 0x61, 0xe4, 0x0f, 0x00,
- 0x79, 0xc7, 0xc3, 0xda, 0x0f, 0x00, 0x71, 0xc4, 0xe2, 0x8b, 0x0f, 0x00,
- 0x69, 0x06, 0xc2, 0x22, 0x19, 0x1c, 0xc2, 0x22, 0x25, 0xc7, 0xc7, 0xe6,
- 0x0f, 0x00, 0x19, 0xc4, 0xe0, 0x5b, 0x0f, 0x00, 0x11, 0xc3, 0xe7, 0x0c,
- 0x0f, 0x00, 0x00, 0x44, 0x26, 0xe6, 0xc2, 0x22, 0x31, 0x03, 0x42, 0x22,
- 0x4f, 0xc5, 0x00, 0x95, 0x01, 0x07, 0x81, 0xc5, 0x01, 0x62, 0x00, 0x1a,
- 0xc8, 0xcc, 0x8b, 0x9c, 0x01, 0x07, 0x39, 0x4c, 0x09, 0x76, 0x42, 0x22,
- 0x61, 0xc5, 0x01, 0x62, 0x00, 0xef, 0xe9, 0xc5, 0x00, 0x95, 0x00, 0x1a,
- 0x60, 0x02, 0xc2, 0x22, 0x6d, 0x00, 0x42, 0x22, 0x79, 0x43, 0x01, 0xdd,
- 0xc2, 0x22, 0x88, 0x43, 0x00, 0x35, 0x42, 0x22, 0x90, 0x45, 0x06, 0x15,
- 0xc2, 0x22, 0xa2, 0xd2, 0x4d, 0xa6, 0x00, 0x19, 0x10, 0x00, 0xc2, 0x22,
- 0xae, 0x46, 0x01, 0x09, 0x42, 0x22, 0xca, 0x43, 0x00, 0x35, 0xc2, 0x22,
- 0xd6, 0xc6, 0x80, 0x82, 0x00, 0x19, 0x90, 0x4d, 0x26, 0xea, 0xc2, 0x22,
- 0xe6, 0x55, 0x35, 0x9e, 0x42, 0x23, 0x69, 0xde, 0x0d, 0xf8, 0x00, 0xd5,
- 0xc9, 0x46, 0x19, 0x72, 0x42, 0x23, 0x7d, 0xcc, 0x87, 0xb8, 0x01, 0x07,
- 0x49, 0xd5, 0x32, 0x2c, 0x00, 0xef, 0xc8, 0xc8, 0xb7, 0xad, 0x01, 0x07,
- 0x41, 0xcc, 0x84, 0x40, 0x00, 0xd6, 0x59, 0xc3, 0x01, 0x4a, 0x00, 0xd5,
- 0xa0, 0x00, 0x42, 0x23, 0x8f, 0x44, 0x05, 0x98, 0xc2, 0x23, 0xa7, 0x16,
- 0xc2, 0x23, 0xb1, 0x42, 0x01, 0x07, 0x42, 0x23, 0xbb, 0xcb, 0x92, 0x9a,
- 0x00, 0xef, 0xd9, 0x49, 0xb1, 0x77, 0x42, 0x23, 0xc7, 0xc5, 0xde, 0x12,
- 0x00, 0xd5, 0x89, 0xc6, 0x01, 0x61, 0x00, 0x19, 0x20, 0xd8, 0x21, 0x60,
- 0x01, 0x07, 0x21, 0xc6, 0xd0, 0xf3, 0x01, 0x07, 0x19, 0x15, 0xc2, 0x23,
- 0xd9, 0xc6, 0x01, 0x7a, 0x01, 0x06, 0xeb, 0x02, 0x23, 0xe5, 0xc7, 0x3f,
- 0x2e, 0x01, 0x06, 0xf8, 0xd5, 0x36, 0xd9, 0x01, 0x06, 0x99, 0x15, 0x42,
- 0x23, 0xeb, 0xcd, 0x7e, 0x59, 0x00, 0xd6, 0x29, 0xc4, 0x01, 0x63, 0x00,
- 0x19, 0xd8, 0xe0, 0x02, 0xa7, 0x00, 0xd5, 0xd0, 0xc3, 0x0f, 0xc0, 0x00,
- 0x18, 0x33, 0x02, 0x23, 0xf7, 0x45, 0x32, 0x37, 0x42, 0x24, 0x03, 0xc4,
- 0x01, 0x1e, 0x00, 0xef, 0xb9, 0xc5, 0x01, 0xf7, 0x00, 0xef, 0xb0, 0xd1,
- 0x30, 0x3a, 0x01, 0x84, 0xc9, 0xd6, 0x30, 0x35, 0x01, 0x84, 0xd0, 0x46,
- 0x36, 0xaa, 0xc2, 0x24, 0x0f, 0xd1, 0x3d, 0x6c, 0x00, 0x1a, 0x70, 0x47,
- 0x1e, 0x8f, 0xc2, 0x24, 0x1b, 0xc6, 0x65, 0xa5, 0x00, 0xd5, 0x90, 0xc6,
- 0x00, 0x94, 0x00, 0xee, 0x70, 0xc2, 0x00, 0x57, 0x08, 0x1b, 0xb1, 0xc3,
- 0x64, 0x07, 0x08, 0x1b, 0xb9, 0xc4, 0xe0, 0x87, 0x08, 0x1b, 0xc1, 0xc5,
- 0xda, 0x07, 0x08, 0x1b, 0xc9, 0xc3, 0xe7, 0x90, 0x08, 0x1b, 0xd0, 0x02,
- 0xc2, 0x24, 0x27, 0x00, 0x42, 0x24, 0x39, 0xc5, 0x00, 0x95, 0x00, 0xd6,
- 0x41, 0xc5, 0x01, 0x62, 0x00, 0x18, 0xf8, 0x4a, 0x5e, 0x83, 0xc2, 0x24,
- 0x51, 0xd4, 0x3a, 0xc1, 0x00, 0x19, 0x08, 0xc5, 0x00, 0x95, 0x00, 0x19,
- 0xe9, 0xc5, 0x01, 0x62, 0x00, 0x1a, 0x98, 0xc5, 0x00, 0x95, 0x00, 0x18,
- 0x69, 0xc5, 0x01, 0x62, 0x00, 0x19, 0x48, 0xc4, 0x22, 0x71, 0x0e, 0x9b,
- 0x89, 0xc5, 0x01, 0xdb, 0x0e, 0x9b, 0x81, 0x15, 0xc2, 0x24, 0x63, 0x08,
- 0xc2, 0x24, 0x6f, 0x16, 0xc2, 0x24, 0x7b, 0xc3, 0x01, 0xb4, 0x0e, 0x9b,
- 0x48, 0xc4, 0x22, 0x71, 0x0e, 0x9b, 0x41, 0xc5, 0x01, 0xdb, 0x0e, 0x9b,
- 0x39, 0x15, 0xc2, 0x24, 0x87, 0x08, 0xc2, 0x24, 0x93, 0x16, 0xc2, 0x24,
- 0x9f, 0xc3, 0x01, 0xb4, 0x0e, 0x9b, 0x00, 0xc7, 0x76, 0x66, 0x01, 0x17,
- 0xe9, 0x48, 0x03, 0x3b, 0xc2, 0x24, 0xab, 0xd6, 0x2b, 0xd3, 0x01, 0x17,
- 0xd0, 0xcf, 0x4a, 0xe8, 0x01, 0x15, 0x9b, 0x02, 0x24, 0xb1, 0xc6, 0x0b,
- 0x0e, 0x01, 0x10, 0x58, 0x0d, 0xc2, 0x24, 0xb7, 0x0a, 0xc2, 0x24, 0xc7,
- 0x42, 0x01, 0x29, 0xc2, 0x24, 0xd3, 0x15, 0xc2, 0x24, 0xdf, 0x06, 0xc2,
- 0x24, 0xf5, 0x03, 0xc2, 0x25, 0x07, 0xc4, 0xe0, 0x93, 0x01, 0x64, 0x19,
- 0xc3, 0xe6, 0x67, 0x01, 0x64, 0x49, 0xc4, 0xe0, 0x87, 0x01, 0x64, 0x69,
- 0x16, 0xc2, 0x25, 0x13, 0xc5, 0xdd, 0x4a, 0x01, 0x64, 0x99, 0x0e, 0xc2,
- 0x25, 0x1f, 0xc2, 0x00, 0x50, 0x01, 0x64, 0xc9, 0xc2, 0x04, 0x6e, 0x01,
- 0x64, 0xd9, 0x91, 0x01, 0x64, 0xfb, 0x02, 0x25, 0x2b, 0x12, 0xc2, 0x25,
- 0x37, 0xc2, 0x00, 0x39, 0x01, 0x65, 0x19, 0xc2, 0x00, 0x91, 0x01, 0x65,
- 0x49, 0x08, 0xc2, 0x25, 0x41, 0x42, 0x0a, 0x0f, 0xc2, 0x25, 0x4b, 0xcd,
- 0x80, 0x47, 0x01, 0x67, 0x98, 0x0d, 0xc2, 0x25, 0x57, 0xc5, 0xdc, 0x50,
- 0x01, 0x67, 0x29, 0xc5, 0xd7, 0x9b, 0x01, 0x67, 0x31, 0x15, 0xc2, 0x25,
- 0x63, 0xc6, 0xd2, 0x2b, 0x01, 0x67, 0x40, 0x0a, 0xc2, 0x25, 0x6f, 0x42,
- 0x01, 0x29, 0xc2, 0x25, 0x7b, 0x15, 0xc2, 0x25, 0x87, 0x06, 0xc2, 0x25,
- 0x9d, 0x03, 0xc2, 0x25, 0xaf, 0xc4, 0xe0, 0x93, 0x01, 0x64, 0x11, 0xc3,
- 0xe6, 0x67, 0x01, 0x64, 0x41, 0xc4, 0xe0, 0x87, 0x01, 0x64, 0x61, 0x16,
- 0xc2, 0x25, 0xbb, 0xc5, 0xdd, 0x4a, 0x01, 0x64, 0x91, 0x0d, 0xc2, 0x25,
- 0xc7, 0x0e, 0xc2, 0x25, 0xd7, 0xc2, 0x00, 0x50, 0x01, 0x64, 0xc1, 0xc2,
- 0x04, 0x6e, 0x01, 0x64, 0xd1, 0x91, 0x01, 0x64, 0xf3, 0x02, 0x25, 0xe3,
- 0x12, 0xc2, 0x25, 0xef, 0xc2, 0x00, 0x39, 0x01, 0x65, 0x11, 0xc2, 0x00,
- 0x91, 0x01, 0x65, 0x41, 0x08, 0xc2, 0x25, 0xf9, 0x42, 0x0a, 0x0f, 0xc2,
- 0x26, 0x03, 0xcd, 0x80, 0x47, 0x01, 0x67, 0x90, 0xc8, 0xb9, 0xfd, 0x01,
- 0x67, 0x79, 0x49, 0xaf, 0x40, 0x42, 0x26, 0x0f, 0xc3, 0x01, 0xb4, 0x08,
- 0x17, 0x09, 0x16, 0xc2, 0x26, 0x1b, 0x08, 0xc2, 0x26, 0x27, 0x15, 0xc2,
- 0x26, 0x33, 0xc5, 0x01, 0xdb, 0x08, 0x17, 0x41, 0xc4, 0x22, 0x71, 0x08,
- 0x17, 0x48, 0x16, 0xc2, 0x26, 0x3f, 0x08, 0xc2, 0x26, 0x4d, 0x15, 0xc2,
- 0x26, 0x55, 0x45, 0x01, 0xdb, 0xc2, 0x26, 0x61, 0x44, 0x22, 0x71, 0xc2,
- 0x26, 0x6b, 0xcb, 0x0c, 0x47, 0x08, 0x17, 0x98, 0xcb, 0x8e, 0xa6, 0x0f,
- 0xa7, 0x59, 0xcc, 0x87, 0x4c, 0x0f, 0xa7, 0x50, 0xc7, 0x5b, 0xab, 0x0f,
- 0x98, 0x11, 0xd0, 0x60, 0x62, 0x01, 0x52, 0x62, 0x02, 0x26, 0x77, 0xc4,
- 0x0f, 0x20, 0x01, 0x56, 0x7b, 0x02, 0x26, 0x7d, 0xc6, 0x31, 0x53, 0x01,
- 0x56, 0x82, 0x02, 0x26, 0x83, 0xcf, 0x62, 0x90, 0x01, 0x11, 0x91, 0xd2,
- 0x48, 0x72, 0x01, 0x4a, 0x08, 0xd3, 0x45, 0x9d, 0x01, 0x0d, 0xb9, 0xe0,
- 0x07, 0xa7, 0x01, 0x5b, 0x70, 0xdb, 0x16, 0x09, 0x0f, 0xae, 0xc1, 0x46,
- 0x01, 0x09, 0x42, 0x26, 0x89, 0xe0, 0x0a, 0x67, 0x0f, 0xa8, 0x18, 0x19,
- 0xc2, 0x26, 0x92, 0x42, 0x00, 0x4d, 0xc2, 0x26, 0x9c, 0x44, 0x04, 0x5e,
- 0x42, 0x26, 0xa8, 0x45, 0x6a, 0x79, 0xc2, 0x26, 0xb4, 0x44, 0x01, 0xdc,
- 0x42, 0x26, 0xc0, 0xc7, 0xc8, 0x1e, 0x0f, 0xab, 0x21, 0xc7, 0xcb, 0xb3,
- 0x0f, 0xaa, 0xc0, 0x44, 0x18, 0x83, 0xc2, 0x26, 0xcc, 0x42, 0x26, 0x51,
- 0x42, 0x26, 0xd8, 0x43, 0x0c, 0x5b, 0xc2, 0x26, 0xe4, 0x43, 0x06, 0x9e,
- 0x42, 0x26, 0xf0, 0xc7, 0xc8, 0x1e, 0x0f, 0xaa, 0xe1, 0xc7, 0xcb, 0xb3,
- 0x0f, 0xaa, 0x80, 0x44, 0x0d, 0xbd, 0xc2, 0x26, 0xfc, 0xd8, 0x00, 0xcf,
- 0x0f, 0x8b, 0x71, 0x85, 0x0f, 0x8b, 0x69, 0x86, 0x0f, 0x89, 0x68, 0xdb,
- 0x17, 0x17, 0x01, 0x3d, 0x91, 0xd8, 0x21, 0x48, 0x01, 0x1c, 0x49, 0xcb,
- 0x9a, 0xfb, 0x0f, 0x8b, 0x79, 0x46, 0xca, 0x87, 0x42, 0x27, 0x06, 0x45,
- 0x02, 0x32, 0xc2, 0x27, 0x4c, 0x9c, 0x0f, 0x89, 0x70, 0x0b, 0xc2, 0x27,
- 0x58, 0xc3, 0x00, 0xe4, 0x01, 0x14, 0xe9, 0x11, 0x42, 0x27, 0x64, 0x45,
- 0x07, 0x12, 0xc2, 0x27, 0x6e, 0xc8, 0x05, 0x87, 0x01, 0x4e, 0x00, 0x16,
- 0xc2, 0x27, 0x7a, 0xc8, 0x4c, 0xe0, 0x01, 0x23, 0x91, 0x07, 0xc2, 0x27,
- 0x8f, 0x15, 0xc2, 0x27, 0x9b, 0x08, 0x42, 0x27, 0xa7, 0xc7, 0x01, 0xe8,
- 0x0f, 0xbe, 0xab, 0x02, 0x27, 0xb1, 0xc4, 0x01, 0x2e, 0x01, 0x14, 0xb8,
- 0xd0, 0x5a, 0xf2, 0x01, 0x14, 0xd9, 0x4c, 0x04, 0xbb, 0x42, 0x27, 0xb7,
- 0xcc, 0x85, 0xe4, 0x01, 0x14, 0xd1, 0xce, 0x65, 0x34, 0x01, 0x4d, 0xc0,
- 0xc4, 0x1b, 0x39, 0x01, 0x14, 0xb1, 0x49, 0x1e, 0xa7, 0x42, 0x27, 0xc3,
- 0xc3, 0x26, 0x13, 0x01, 0x14, 0xa9, 0xcc, 0x83, 0xc8, 0x01, 0x4d, 0xc9,
- 0xc7, 0x36, 0xbd, 0x01, 0x4d, 0xb9, 0xca, 0xa0, 0x18, 0x01, 0x81, 0xb0,
- 0x49, 0x9f, 0x65, 0xc2, 0x27, 0xc9, 0x5b, 0x15, 0xb8, 0xc2, 0x28, 0x15,
- 0xd1, 0x56, 0xb1, 0x0f, 0xb6, 0x40, 0xc5, 0x1d, 0x5a, 0x01, 0x4d, 0xf9,
- 0xc5, 0xdd, 0xdb, 0x01, 0x5d, 0xf8, 0x50, 0x4d, 0x17, 0xc2, 0x28, 0x1d,
- 0x48, 0xbb, 0x1d, 0x42, 0x28, 0x29, 0x03, 0xc2, 0x28, 0x61, 0x46, 0x01,
- 0xc7, 0xc2, 0x28, 0x67, 0x0e, 0xc2, 0x28, 0x73, 0xd0, 0x5c, 0x22, 0x01,
- 0x2e, 0x89, 0xcd, 0x77, 0x64, 0x01, 0x2e, 0x69, 0x43, 0x01, 0x59, 0xc2,
- 0x28, 0x7f, 0x15, 0xc2, 0x28, 0x85, 0xce, 0x0f, 0xa7, 0x01, 0x4d, 0xa8,
- 0xe0, 0x03, 0x27, 0x01, 0x4d, 0xd0, 0xa2, 0x09, 0x1b, 0x5b, 0x02, 0x28,
- 0x91, 0xd1, 0x55, 0x6e, 0x09, 0x2a, 0x11, 0x8f, 0x09, 0x1b, 0x71, 0xc3,
- 0x31, 0xb5, 0x09, 0x1b, 0x68, 0xa4, 0x09, 0x2a, 0x09, 0xc2, 0xe6, 0x6e,
- 0x09, 0x1b, 0x09, 0x89, 0x09, 0x1b, 0x01, 0x00, 0x42, 0x28, 0x97, 0xc2,
- 0xe0, 0x35, 0x09, 0x1b, 0x49, 0x89, 0x09, 0x1b, 0x41, 0x84, 0x09, 0x1b,
- 0x33, 0x02, 0x28, 0xa3, 0xa0, 0x09, 0x1b, 0x29, 0xc8, 0xb8, 0x8d, 0x09,
- 0x1b, 0x20, 0x97, 0x09, 0x19, 0xbb, 0x02, 0x28, 0xa9, 0x9f, 0x09, 0x19,
- 0x5b, 0x02, 0x28, 0xb8, 0x8b, 0x09, 0x19, 0xab, 0x02, 0x28, 0xbc, 0xa1,
- 0x09, 0x19, 0xa1, 0x00, 0x42, 0x28, 0xc0, 0x97, 0x09, 0x1c, 0xcb, 0x02,
- 0x28, 0xcc, 0x47, 0x19, 0x8e, 0xc2, 0x28, 0xd2, 0xc3, 0x72, 0x28, 0x09,
- 0x18, 0x60, 0x47, 0x01, 0x2c, 0xc2, 0x28, 0xe4, 0xc2, 0x06, 0x1f, 0x09,
- 0x19, 0x1b, 0x02, 0x28, 0xfd, 0xc3, 0x11, 0x39, 0x09, 0x19, 0x10, 0x97,
- 0x09, 0x1a, 0xe1, 0xa0, 0x09, 0x1a, 0xd2, 0x02, 0x29, 0x03, 0xc3, 0xe6,
- 0x6d, 0x09, 0x1a, 0xc1, 0x9f, 0x09, 0x1a, 0xb9, 0x9a, 0x09, 0x1a, 0xb1,
- 0x47, 0x01, 0x2c, 0x42, 0x29, 0x09, 0xc5, 0x39, 0x40, 0x09, 0x19, 0x38,
- 0xc2, 0x03, 0x87, 0x09, 0x18, 0xe1, 0x00, 0x42, 0x29, 0x1c, 0x8f, 0x09,
- 0x18, 0x43, 0x02, 0x29, 0x37, 0x94, 0x09, 0x18, 0x4b, 0x02, 0x29, 0x3d,
- 0x8d, 0x09, 0x18, 0x39, 0xc2, 0x03, 0x88, 0x09, 0x18, 0x30, 0xc2, 0x3d,
- 0x53, 0x09, 0x17, 0xd3, 0x02, 0x29, 0x43, 0x94, 0x09, 0x17, 0xd9, 0x89,
- 0x09, 0x17, 0x9b, 0x02, 0x29, 0x49, 0x84, 0x09, 0x17, 0x83, 0x02, 0x29,
- 0x4f, 0x00, 0x42, 0x29, 0x53, 0x9f, 0x09, 0x1c, 0xb9, 0x94, 0x09, 0x18,
- 0x0b, 0x02, 0x29, 0x65, 0x8e, 0x09, 0x18, 0x01, 0xc5, 0x59, 0x54, 0x09,
- 0x17, 0xf8, 0xc5, 0x39, 0x40, 0x09, 0x17, 0xe8, 0x00, 0xc2, 0x29, 0x69,
- 0xc3, 0xdb, 0xf3, 0x09, 0x17, 0x09, 0xc2, 0x9d, 0xb6, 0x09, 0x17, 0x01,
- 0x89, 0x09, 0x16, 0xea, 0x02, 0x29, 0x75, 0x97, 0x09, 0x16, 0xbb, 0x02,
- 0x29, 0x7c, 0x87, 0x09, 0x15, 0xd3, 0x02, 0x29, 0x8f, 0x83, 0x09, 0x15,
- 0x6b, 0x02, 0x29, 0xa6, 0x0b, 0x42, 0x29, 0xc0, 0x89, 0x09, 0x14, 0xab,
- 0x02, 0x29, 0xe1, 0x94, 0x09, 0x15, 0x61, 0xc4, 0xe5, 0x8f, 0x09, 0x15,
- 0x59, 0x8e, 0x09, 0x15, 0x4a, 0x02, 0x29, 0xe5, 0x94, 0x09, 0x17, 0x4b,
- 0x02, 0x29, 0xeb, 0x8f, 0x09, 0x17, 0x3b, 0x02, 0x29, 0xef, 0xc3, 0x08,
- 0x07, 0x09, 0x17, 0x31, 0x86, 0x09, 0x17, 0x23, 0x02, 0x29, 0xf5, 0xc8,
- 0x82, 0x3f, 0x09, 0x17, 0x18, 0x90, 0x09, 0x1c, 0x7b, 0x02, 0x29, 0xf9,
- 0xc3, 0x76, 0xe3, 0x09, 0x13, 0x01, 0x8f, 0x09, 0x12, 0x7b, 0x02, 0x2a,
- 0x06, 0x9f, 0x09, 0x12, 0x71, 0xc8, 0x9a, 0x8d, 0x09, 0x12, 0x68, 0xc2,
- 0x31, 0xb2, 0x09, 0x13, 0x13, 0x02, 0x2a, 0x0c, 0x90, 0x09, 0x13, 0x1a,
- 0x02, 0x2a, 0x10, 0xa1, 0x09, 0x1c, 0x71, 0x8f, 0x09, 0x12, 0x33, 0x02,
- 0x2a, 0x1d, 0xc2, 0x01, 0x29, 0x09, 0x12, 0x03, 0x02, 0x2a, 0x27, 0x9f,
- 0x09, 0x11, 0xf8, 0x00, 0x42, 0x2a, 0x2f, 0xc2, 0x00, 0xe5, 0x09, 0x11,
- 0x93, 0x02, 0x2a, 0x3b, 0xc4, 0xe6, 0x2b, 0x09, 0x11, 0x89, 0xc4, 0xe5,
- 0x93, 0x09, 0x11, 0x81, 0x89, 0x09, 0x11, 0x73, 0x02, 0x2a, 0x46, 0xc8,
- 0xb8, 0x2d, 0x09, 0x11, 0x68, 0xc9, 0xb4, 0x74, 0x09, 0x28, 0xf9, 0x90,
- 0x09, 0x11, 0x58, 0x95, 0x09, 0x11, 0x4a, 0x02, 0x2a, 0x4c, 0xc2, 0x00,
- 0xe5, 0x09, 0x11, 0x33, 0x02, 0x2a, 0x50, 0x94, 0x09, 0x11, 0x29, 0x8a,
- 0x09, 0x11, 0x21, 0x9f, 0x09, 0x11, 0x19, 0x00, 0x42, 0x2a, 0x54, 0x9f,
- 0x09, 0x0f, 0xeb, 0x02, 0x2a, 0x60, 0x8f, 0x09, 0x10, 0xeb, 0x02, 0x2a,
- 0x64, 0x8e, 0x09, 0x10, 0xe1, 0x8a, 0x09, 0x10, 0xd9, 0xc3, 0x3e, 0xb4,
- 0x09, 0x10, 0xbb, 0x02, 0x2a, 0x6d, 0xa0, 0x09, 0x10, 0xb1, 0xca, 0x90,
- 0xa1, 0x09, 0x0f, 0xe0, 0x42, 0x0d, 0x9e, 0xc2, 0x2a, 0x71, 0x42, 0x01,
- 0x29, 0xc2, 0x2a, 0x93, 0x8f, 0x09, 0x0f, 0xa3, 0x02, 0x2a, 0xa1, 0x8e,
- 0x09, 0x0f, 0x93, 0x02, 0x2a, 0xaa, 0xc4, 0xe1, 0x4b, 0x09, 0x0f, 0x88,
- 0xc2, 0x00, 0xe5, 0x09, 0x0f, 0xd1, 0xc4, 0xe5, 0x3f, 0x09, 0x0f, 0xc9,
- 0x8e, 0x09, 0x0f, 0xc0, 0x47, 0x01, 0x2c, 0xc2, 0x2a, 0xb0, 0xc9, 0xb5,
- 0xc1, 0x09, 0x1b, 0x79, 0xc4, 0x47, 0x42, 0x09, 0x0c, 0xe3, 0x02, 0x2a,
- 0xfc, 0x0f, 0xc2, 0x2b, 0x00, 0x8e, 0x09, 0x0c, 0xbb, 0x02, 0x2b, 0x08,
- 0x8d, 0x09, 0x0c, 0xab, 0x02, 0x2b, 0x0c, 0x06, 0xc2, 0x2b, 0x12, 0x84,
- 0x09, 0x0c, 0x79, 0x9f, 0x09, 0x0c, 0x6a, 0x02, 0x2b, 0x25, 0xc4, 0x5a,
- 0x32, 0x09, 0x0d, 0xa9, 0x94, 0x09, 0x0d, 0x9b, 0x02, 0x2b, 0x2b, 0x90,
- 0x09, 0x0d, 0x91, 0x8e, 0x09, 0x0d, 0x83, 0x02, 0x2b, 0x31, 0xa4, 0x09,
- 0x0d, 0x79, 0xa1, 0x09, 0x0d, 0x6b, 0x02, 0x2b, 0x37, 0xa0, 0x09, 0x0d,
- 0x61, 0x49, 0x0b, 0xf4, 0x42, 0x2b, 0x3d, 0x15, 0xc2, 0x2b, 0x43, 0x90,
- 0x09, 0x0d, 0x29, 0x86, 0x09, 0x0d, 0x21, 0x47, 0x01, 0x2c, 0x42, 0x2b,
- 0x56, 0x47, 0x01, 0x2c, 0x42, 0x2b, 0x63, 0x00, 0xc2, 0x2b, 0x94, 0x8e,
- 0x09, 0x09, 0x60, 0xc2, 0x00, 0xe5, 0x09, 0x1b, 0xe9, 0xc2, 0xe1, 0x51,
- 0x09, 0x09, 0xf1, 0xc2, 0x59, 0x52, 0x09, 0x09, 0xc2, 0x02, 0x2b, 0xa3,
- 0x86, 0x09, 0x08, 0xf3, 0x02, 0x2b, 0xa9, 0x9f, 0x09, 0x08, 0xc3, 0x02,
- 0x2b, 0xad, 0x94, 0x09, 0x09, 0x2b, 0x02, 0x2b, 0xb1, 0x8f, 0x09, 0x09,
- 0x1b, 0x02, 0x2b, 0xb9, 0x8e, 0x09, 0x09, 0x11, 0xcc, 0x8c, 0xbc, 0x09,
- 0x08, 0xb8, 0x15, 0xc2, 0x2b, 0xbf, 0x89, 0x09, 0x1b, 0xe1, 0x14, 0xc2,
- 0x2b, 0xcc, 0xc3, 0x5b, 0x41, 0x09, 0x08, 0x39, 0xa1, 0x09, 0x08, 0x23,
- 0x02, 0x2b, 0xda, 0x00, 0x42, 0x2b, 0xde, 0xc5, 0xdb, 0x83, 0x09, 0x07,
- 0xf3, 0x02, 0x2b, 0xea, 0xc2, 0xda, 0x56, 0x09, 0x1b, 0xd8, 0xc2, 0x00,
- 0xde, 0x09, 0x07, 0x73, 0x02, 0x2b, 0xf0, 0x9f, 0x09, 0x05, 0xbb, 0x02,
- 0x2b, 0xf4, 0xc4, 0x4e, 0xd8, 0x09, 0x07, 0xe9, 0x94, 0x09, 0x07, 0xdb,
- 0x02, 0x2b, 0xf8, 0x90, 0x09, 0x07, 0xb3, 0x02, 0x2b, 0xfc, 0x8f, 0x09,
- 0x07, 0xa9, 0x8e, 0x09, 0x07, 0x93, 0x02, 0x2c, 0x03, 0x86, 0x09, 0x07,
- 0x83, 0x02, 0x2c, 0x0f, 0xc5, 0x39, 0x40, 0x09, 0x05, 0xb0, 0x00, 0x42,
- 0x2c, 0x15, 0xce, 0x72, 0x93, 0x09, 0x25, 0x60, 0xc3, 0xa1, 0xeb, 0x09,
- 0x04, 0xfb, 0x02, 0x2c, 0x21, 0xc2, 0x00, 0x4d, 0x09, 0x04, 0xf0, 0x47,
- 0x01, 0x2c, 0x42, 0x2c, 0x27, 0x00, 0x42, 0x2c, 0x4d, 0xd3, 0x3f, 0xf9,
- 0x09, 0x04, 0x61, 0xc9, 0xaa, 0xf6, 0x09, 0x04, 0x58, 0x89, 0x09, 0x04,
- 0x0b, 0x02, 0x2c, 0x65, 0x84, 0x09, 0x03, 0xf3, 0x02, 0x2c, 0x71, 0xc2,
- 0x3e, 0xab, 0x09, 0x04, 0x49, 0x90, 0x09, 0x04, 0x23, 0x02, 0x2c, 0x7b,
- 0x8a, 0x09, 0x04, 0x19, 0x00, 0x42, 0x2c, 0x86, 0x8f, 0x09, 0x03, 0xa3,
- 0x02, 0x2c, 0x98, 0xc2, 0x00, 0xe5, 0x09, 0x03, 0xcb, 0x02, 0x2c, 0xa5,
- 0x90, 0x09, 0x03, 0xbb, 0x02, 0x2c, 0xab, 0x84, 0x09, 0x03, 0x98, 0x89,
- 0x09, 0x02, 0xb3, 0x02, 0x2c, 0xb1, 0xcb, 0x3d, 0x4a, 0x09, 0x24, 0x41,
- 0x94, 0x09, 0x03, 0x7b, 0x02, 0x2c, 0xb9, 0x8f, 0x09, 0x03, 0x70, 0x00,
- 0xc2, 0x2c, 0xbd, 0x94, 0x09, 0x02, 0x9b, 0x02, 0x2c, 0xc9, 0xc3, 0x65,
- 0xb9, 0x09, 0x02, 0x8a, 0x02, 0x2c, 0xcd, 0xc4, 0x3e, 0xa9, 0x09, 0x02,
- 0x1b, 0x02, 0x2c, 0xd3, 0x86, 0x09, 0x02, 0x0b, 0x02, 0x2c, 0xd9, 0x94,
- 0x09, 0x02, 0x3b, 0x02, 0x2c, 0xdf, 0x8e, 0x09, 0x02, 0x23, 0x02, 0x2c,
- 0xe5, 0xc2, 0xe7, 0xf4, 0x09, 0x02, 0x10, 0x47, 0x01, 0x2c, 0x42, 0x2c,
- 0xf1, 0xcb, 0x9a, 0x8d, 0x09, 0x24, 0x10, 0x00, 0xc2, 0x2d, 0x01, 0x9f,
- 0x09, 0x00, 0xb2, 0x02, 0x2d, 0x0d, 0x47, 0x01, 0x2c, 0x42, 0x2d, 0x13,
- 0x8a, 0x09, 0x01, 0xc3, 0x02, 0x2d, 0x1f, 0xc3, 0xe6, 0xa3, 0x09, 0x01,
- 0xb8, 0xc3, 0x91, 0xf2, 0x09, 0x01, 0xb1, 0xc2, 0x01, 0xf2, 0x09, 0x01,
- 0xa2, 0x02, 0x2d, 0x2d, 0xc3, 0x0a, 0x91, 0x09, 0x01, 0x91, 0x00, 0x42,
- 0x2d, 0x33, 0xc3, 0x37, 0x5d, 0x09, 0x01, 0x51, 0xc2, 0x00, 0x57, 0x09,
- 0x01, 0x49, 0x47, 0x01, 0x2c, 0x42, 0x2d, 0x45, 0x47, 0x01, 0x2c, 0x42,
- 0x2d, 0x6d, 0xc3, 0x76, 0xe3, 0x09, 0x00, 0x41, 0xc4, 0x79, 0x0d, 0x09,
- 0x00, 0x39, 0xca, 0x3e, 0x23, 0x09, 0x00, 0x31, 0xc3, 0x04, 0xca, 0x09,
- 0x00, 0x29, 0xc2, 0x00, 0xa4, 0x09, 0x00, 0x21, 0xc9, 0x5b, 0xd9, 0x09,
- 0x00, 0x19, 0xc3, 0x61, 0x9a, 0x09, 0x00, 0x11, 0x83, 0x09, 0x00, 0x08,
- 0x14, 0xc2, 0x2d, 0x79, 0x00, 0x42, 0x2d, 0x86, 0xc9, 0x0b, 0x7e, 0x09,
- 0x1c, 0xa0, 0x92, 0x09, 0x13, 0xe9, 0x90, 0x09, 0x13, 0xe1, 0x86, 0x09,
- 0x13, 0xd8, 0x84, 0x09, 0x14, 0x80, 0xc2, 0x00, 0x34, 0x09, 0x0a, 0x99,
- 0x00, 0x42, 0x2d, 0x92, 0x9f, 0x09, 0x0a, 0x69, 0xd0, 0x5b, 0xd2, 0x09,
- 0x0a, 0x60, 0x8b, 0x09, 0x0a, 0x32, 0x02, 0x2d, 0xaa, 0x4b, 0x99, 0x85,
- 0x42, 0x2d, 0xae, 0x97, 0x09, 0x20, 0xa3, 0x02, 0x2d, 0xba, 0xd3, 0x42,
- 0x0d, 0x09, 0x22, 0x33, 0x02, 0x2d, 0xc0, 0xc5, 0xdc, 0x91, 0x09, 0x21,
- 0x59, 0xc5, 0xd7, 0x64, 0x09, 0x20, 0xe9, 0xc4, 0x05, 0x19, 0x09, 0x20,
- 0x71, 0xc3, 0x00, 0xe4, 0x09, 0x20, 0x38, 0xc3, 0x13, 0xfc, 0x09, 0x22,
- 0xb9, 0xc3, 0x0f, 0x42, 0x09, 0x22, 0xb0, 0x97, 0x09, 0x20, 0x9b, 0x02,
- 0x2d, 0xce, 0xd1, 0x4f, 0xc9, 0x09, 0x22, 0x23, 0x02, 0x2d, 0xd4, 0xc5,
- 0xdc, 0x91, 0x09, 0x21, 0x51, 0xc5, 0xd7, 0x64, 0x09, 0x20, 0xe1, 0xc4,
- 0x05, 0x19, 0x09, 0x20, 0x69, 0xc3, 0x00, 0xe4, 0x09, 0x20, 0x30, 0x08,
- 0xc2, 0x2d, 0xd8, 0xca, 0x9d, 0x16, 0x09, 0x23, 0x31, 0xc9, 0xb1, 0x41,
- 0x09, 0x23, 0x28, 0x97, 0x09, 0x20, 0x93, 0x02, 0x2d, 0xe4, 0x51, 0x4f,
- 0xb8, 0xc2, 0x2d, 0xea, 0xc5, 0xdc, 0x91, 0x09, 0x21, 0x49, 0xc5, 0xd7,
- 0x64, 0x09, 0x20, 0xd9, 0xc4, 0x05, 0x19, 0x09, 0x20, 0x61, 0xc3, 0x00,
- 0xe4, 0x09, 0x20, 0x28, 0x97, 0x09, 0x20, 0x8b, 0x02, 0x2d, 0xf2, 0xc3,
- 0x00, 0xe4, 0x09, 0x20, 0x23, 0x02, 0x2d, 0xf8, 0xd1, 0x4f, 0x96, 0x09,
- 0x22, 0x01, 0xc5, 0xdc, 0x91, 0x09, 0x21, 0x41, 0xc5, 0xd7, 0x64, 0x09,
- 0x20, 0xd1, 0xc4, 0x05, 0x19, 0x09, 0x20, 0x58, 0xc3, 0x0f, 0x42, 0x09,
- 0x21, 0x99, 0xc4, 0x05, 0x19, 0x09, 0x21, 0x90, 0x97, 0x09, 0x20, 0x83,
- 0x02, 0x2d, 0xfe, 0x15, 0xc2, 0x2e, 0x04, 0x04, 0xc2, 0x2e, 0x10, 0xc3,
- 0x00, 0xe4, 0x09, 0x20, 0x1b, 0x02, 0x2e, 0x1f, 0x44, 0x64, 0xbd, 0xc2,
- 0x2e, 0x25, 0xc4, 0x05, 0x19, 0x09, 0x20, 0x50, 0x97, 0x09, 0x20, 0x7b,
- 0x02, 0x2e, 0x2d, 0x04, 0xc2, 0x2e, 0x33, 0xc3, 0x00, 0xe4, 0x09, 0x20,
- 0x13, 0x02, 0x2e, 0x42, 0xd2, 0x4d, 0xee, 0x09, 0x21, 0xe3, 0x02, 0x2e,
- 0x48, 0x44, 0xa5, 0xb8, 0xc2, 0x2e, 0x50, 0x44, 0x64, 0xbd, 0xc2, 0x2e,
- 0x58, 0xc4, 0x05, 0x19, 0x09, 0x20, 0x48, 0xc8, 0xbf, 0xdd, 0x09, 0x23,
- 0x21, 0x48, 0x19, 0x0b, 0xc2, 0x2e, 0x60, 0x07, 0xc2, 0x2e, 0x6c, 0x46,
- 0x08, 0x27, 0xc2, 0x2e, 0x78, 0x04, 0xc2, 0x2e, 0x84, 0xc5, 0xdc, 0xa0,
- 0x09, 0x21, 0x61, 0x44, 0x64, 0xbd, 0x42, 0x2e, 0x90, 0xc7, 0x0a, 0xb9,
- 0x09, 0x23, 0x11, 0xc5, 0xd4, 0x89, 0x09, 0x23, 0x08, 0x47, 0x90, 0x95,
- 0xc2, 0x2e, 0x98, 0xc5, 0xd7, 0xf0, 0x09, 0x22, 0xc9, 0x04, 0xc2, 0x2e,
- 0xa4, 0xc3, 0x00, 0xe4, 0x09, 0x20, 0x03, 0x02, 0x2e, 0xb0, 0x44, 0xa5,
- 0xb8, 0xc2, 0x2e, 0xb6, 0x44, 0x64, 0xbd, 0x42, 0x2e, 0xbe, 0x04, 0xc2,
- 0x2e, 0xc6, 0xc3, 0x00, 0xe4, 0x09, 0x20, 0x0b, 0x02, 0x2e, 0xd5, 0x50,
- 0x58, 0x32, 0xc2, 0x2e, 0xdb, 0x44, 0xa5, 0xb8, 0xc2, 0x2e, 0xe7, 0x44,
- 0x64, 0xbd, 0xc2, 0x2e, 0xf5, 0xc4, 0x05, 0x19, 0x09, 0x20, 0x40, 0xc2,
- 0x00, 0x11, 0x01, 0x3d, 0x81, 0x46, 0x1a, 0xfc, 0x42, 0x2e, 0xfd, 0x1c,
- 0xc2, 0x2f, 0x09, 0x87, 0x0f, 0x02, 0xa8, 0xd7, 0x05, 0xd0, 0x0f, 0x03,
- 0x41, 0x87, 0x0f, 0x02, 0x90, 0xcc, 0x87, 0xc4, 0x0f, 0x03, 0x38, 0xc7,
- 0x86, 0x25, 0x0f, 0x03, 0x28, 0x88, 0x0f, 0x03, 0x01, 0x95, 0x0f, 0x02,
- 0xf1, 0x8e, 0x0f, 0x02, 0xd8, 0x87, 0x0f, 0x02, 0xb0, 0x87, 0x0f, 0x02,
- 0xc1, 0xc2, 0x01, 0xf0, 0x0f, 0x02, 0xb8, 0x97, 0x00, 0x22, 0x1b, 0x02,
- 0x2f, 0x13, 0x16, 0xc2, 0x2f, 0x26, 0x19, 0xc2, 0x2f, 0x49, 0x10, 0xc2,
- 0x2f, 0x53, 0x0e, 0xc2, 0x2f, 0x65, 0x14, 0xc2, 0x2f, 0x7d, 0x87, 0x00,
- 0x22, 0x6b, 0x02, 0x2f, 0x8f, 0x06, 0xc2, 0x2f, 0xbc, 0x15, 0xc2, 0x2f,
- 0xdf, 0x12, 0xc2, 0x30, 0x01, 0x83, 0x00, 0x21, 0x83, 0x02, 0x30, 0x14,
- 0xc2, 0x0f, 0x60, 0x00, 0x28, 0xd9, 0x1b, 0xc2, 0x30, 0x26, 0x0d, 0xc2,
- 0x30, 0x42, 0x0a, 0xc2, 0x30, 0x5f, 0x09, 0xc2, 0x30, 0x6c, 0x04, 0xc2,
- 0x30, 0x7b, 0x91, 0x00, 0x21, 0xf3, 0x02, 0x30, 0x99, 0x8b, 0x00, 0x21,
- 0xc3, 0x02, 0x30, 0xac, 0x1c, 0xc2, 0x30, 0xc9, 0x05, 0xc2, 0x30, 0xd4,
- 0x44, 0x13, 0x8e, 0xc2, 0x30, 0xef, 0xc2, 0x00, 0x7b, 0x00, 0x21, 0x91,
- 0xc2, 0x1b, 0xa5, 0x00, 0x22, 0xc1, 0xc4, 0xe1, 0x87, 0x00, 0x23, 0x98,
- 0xc4, 0xe3, 0xaf, 0x00, 0x26, 0xa9, 0xc6, 0xd2, 0xf1, 0x00, 0x25, 0xa9,
- 0xc6, 0xd3, 0x57, 0x00, 0x25, 0x28, 0x8e, 0x00, 0x20, 0xdb, 0x02, 0x30,
- 0xfb, 0x90, 0x00, 0x20, 0xeb, 0x02, 0x31, 0x01, 0xcf, 0x66, 0xaa, 0x00,
- 0x27, 0x61, 0x8f, 0x00, 0x20, 0xe3, 0x02, 0x31, 0x07, 0x95, 0x00, 0x21,
- 0x0b, 0x02, 0x31, 0x0d, 0x94, 0x00, 0x21, 0x03, 0x02, 0x31, 0x13, 0x88,
- 0x00, 0x21, 0x20, 0xc3, 0x26, 0x9b, 0x00, 0x29, 0x61, 0x1c, 0xc2, 0x31,
- 0x19, 0x46, 0x06, 0xf2, 0xc2, 0x31, 0x30, 0xc2, 0x1b, 0xa5, 0x00, 0x20,
- 0x13, 0x02, 0x31, 0x3a, 0x87, 0x00, 0x20, 0xa1, 0xc2, 0x00, 0x4b, 0x05,
- 0x34, 0x00, 0x0a, 0xc2, 0x31, 0x40, 0xc4, 0x73, 0xed, 0x00, 0x26, 0xc3,
- 0x02, 0x31, 0x5f, 0xc9, 0xb2, 0xfa, 0x00, 0x25, 0x73, 0x02, 0x31, 0x65,
- 0xcc, 0x81, 0x88, 0x00, 0x24, 0x61, 0x44, 0x62, 0x1d, 0x42, 0x31, 0x6b,
- 0x87, 0x00, 0x20, 0xfb, 0x02, 0x31, 0x7b, 0xc2, 0x00, 0xcb, 0x00, 0x23,
- 0x80, 0xc7, 0xc2, 0xc2, 0x00, 0x28, 0xf1, 0x49, 0xaa, 0x42, 0xc2, 0x31,
- 0x81, 0x46, 0x00, 0x6b, 0x42, 0x31, 0x96, 0x83, 0x00, 0x21, 0x7b, 0x02,
- 0x31, 0xa2, 0xc3, 0x70, 0xed, 0x00, 0x21, 0x5b, 0x02, 0x31, 0xaa, 0x8b,
- 0x00, 0x20, 0x39, 0x97, 0x00, 0x21, 0x71, 0x90, 0x05, 0x32, 0xf0, 0xc2,
- 0x00, 0xa4, 0x00, 0x28, 0xb1, 0x48, 0x10, 0x90, 0xc2, 0x31, 0xb0, 0xca,
- 0xa1, 0x12, 0x00, 0x23, 0xd0, 0xc2, 0x00, 0xa4, 0x00, 0x28, 0xa1, 0xc2,
- 0x1b, 0xa5, 0x00, 0x20, 0x49, 0xc9, 0x52, 0x68, 0x00, 0x23, 0x30, 0x11,
- 0xc2, 0x31, 0xc8, 0xcd, 0x7a, 0x22, 0x00, 0x26, 0x59, 0x83, 0x00, 0x20,
- 0xd3, 0x02, 0x31, 0xd4, 0xc2, 0x1b, 0xa5, 0x00, 0x20, 0x61, 0xc2, 0x00,
- 0xcb, 0x00, 0x23, 0x70, 0x83, 0x00, 0x21, 0x2b, 0x02, 0x31, 0xda, 0xc2,
- 0x00, 0x4b, 0x05, 0x34, 0xa0, 0xc2, 0x01, 0xf0, 0x00, 0x20, 0x9b, 0x02,
- 0x31, 0xe6, 0xc2, 0x1b, 0xa5, 0x00, 0x20, 0x18, 0xc2, 0x01, 0xf0, 0x00,
- 0x21, 0x3b, 0x02, 0x31, 0xec, 0xc8, 0xbe, 0xbd, 0x05, 0x34, 0xd1, 0xd0,
- 0x52, 0x61, 0x05, 0x32, 0xc1, 0xc3, 0x26, 0x9b, 0x05, 0x34, 0x30, 0x46,
- 0x00, 0x6b, 0xc2, 0x31, 0xf2, 0x8d, 0x00, 0x23, 0xc2, 0x02, 0x31, 0xfc,
- 0x03, 0xc2, 0x32, 0x02, 0xd7, 0x05, 0xd0, 0x00, 0x20, 0x31, 0x87, 0x00,
- 0x20, 0x89, 0xca, 0xa1, 0x1c, 0x05, 0x32, 0x61, 0xca, 0xa6, 0x12, 0x05,
- 0x32, 0xd1, 0x0b, 0x42, 0x32, 0x11, 0xcf, 0x66, 0xaa, 0x00, 0x27, 0x31,
- 0xc4, 0x70, 0xd8, 0x00, 0x23, 0x03, 0x02, 0x32, 0x1d, 0x96, 0x00, 0x23,
- 0xf0, 0x46, 0x00, 0x6b, 0xc2, 0x32, 0x23, 0x87, 0x00, 0x20, 0xab, 0x02,
- 0x32, 0x35, 0xc6, 0xcd, 0x75, 0x00, 0x23, 0xa3, 0x02, 0x32, 0x3b, 0x91,
- 0x00, 0x20, 0x0a, 0x02, 0x32, 0x41, 0x87, 0x00, 0x20, 0xbb, 0x02, 0x32,
- 0x45, 0x0a, 0x42, 0x32, 0x51, 0x87, 0x00, 0x21, 0x13, 0x02, 0x32, 0x5e,
- 0x15, 0xc2, 0x32, 0x64, 0xc2, 0x00, 0x4b, 0x05, 0x34, 0x61, 0xc3, 0x26,
- 0x9b, 0x05, 0x34, 0x90, 0xc2, 0x1b, 0xa5, 0x00, 0x20, 0x51, 0xca, 0x9d,
- 0x0c, 0x05, 0x32, 0x70, 0xc8, 0x87, 0xb0, 0x05, 0x32, 0x51, 0xc7, 0x80,
- 0xcf, 0x05, 0x33, 0x40, 0xa1, 0x09, 0x7f, 0x81, 0x9f, 0x09, 0x7f, 0x79,
- 0x9d, 0x09, 0x7f, 0x70, 0xa6, 0x09, 0x7f, 0x69, 0xa5, 0x09, 0x7f, 0x61,
- 0xa4, 0x09, 0x7f, 0x59, 0xa2, 0x09, 0x7f, 0x51, 0xa1, 0x09, 0x7f, 0x49,
- 0xa0, 0x09, 0x7f, 0x41, 0x9f, 0x09, 0x7f, 0x39, 0x9e, 0x09, 0x7f, 0x31,
- 0x9d, 0x09, 0x7f, 0x28, 0xa6, 0x09, 0x7f, 0x21, 0xa5, 0x09, 0x7f, 0x19,
- 0xa4, 0x09, 0x7f, 0x11, 0xa3, 0x09, 0x7f, 0x09, 0xa2, 0x09, 0x7f, 0x01,
- 0xa1, 0x09, 0x7e, 0xf9, 0x9f, 0x09, 0x7e, 0xf1, 0x9e, 0x09, 0x7e, 0xe9,
- 0x9d, 0x09, 0x7e, 0xe0, 0xa6, 0x09, 0x7e, 0xd9, 0xa5, 0x09, 0x7e, 0xd1,
- 0xa4, 0x09, 0x7e, 0xc9, 0xa3, 0x09, 0x7e, 0xc1, 0xa2, 0x09, 0x7e, 0xb9,
- 0xa1, 0x09, 0x7e, 0xb1, 0xa0, 0x09, 0x7e, 0xa9, 0x9f, 0x09, 0x7e, 0xa1,
- 0x9e, 0x09, 0x7e, 0x99, 0x9d, 0x09, 0x7e, 0x90, 0xa6, 0x09, 0x7e, 0x89,
- 0xa5, 0x09, 0x7e, 0x81, 0xa3, 0x09, 0x7e, 0x79, 0xa2, 0x09, 0x7e, 0x6b,
- 0x02, 0x32, 0x7a, 0xa1, 0x09, 0x7e, 0x61, 0xa0, 0x09, 0x7e, 0x59, 0x9f,
- 0x09, 0x7e, 0x51, 0x9e, 0x09, 0x7e, 0x49, 0x9d, 0x09, 0x7e, 0x40, 0xa6,
- 0x09, 0x7e, 0x39, 0xa5, 0x09, 0x7e, 0x31, 0xa4, 0x09, 0x7e, 0x29, 0xa3,
- 0x09, 0x7e, 0x21, 0xa1, 0x09, 0x7e, 0x19, 0xa0, 0x09, 0x7e, 0x11, 0x9f,
- 0x09, 0x7e, 0x09, 0x9e, 0x09, 0x7e, 0x01, 0x9d, 0x09, 0x7d, 0xf8, 0xa6,
- 0x09, 0x7d, 0xf1, 0xa5, 0x09, 0x7d, 0xe9, 0xa3, 0x09, 0x7d, 0xe1, 0xa2,
- 0x09, 0x7d, 0xd9, 0xa1, 0x09, 0x7d, 0xd1, 0xa0, 0x09, 0x7d, 0xc9, 0x9f,
- 0x09, 0x7d, 0xc1, 0x9e, 0x09, 0x7d, 0xb9, 0x9d, 0x09, 0x7d, 0xb0, 0xa6,
- 0x09, 0x7d, 0xa9, 0xa4, 0x09, 0x7d, 0xa1, 0xa3, 0x09, 0x7d, 0x99, 0xa1,
- 0x09, 0x7d, 0x91, 0x9e, 0x09, 0x7d, 0x89, 0x9d, 0x09, 0x7d, 0x80, 0xa6,
- 0x09, 0x7d, 0x79, 0xa5, 0x09, 0x7d, 0x71, 0xa4, 0x09, 0x7d, 0x69, 0xa3,
- 0x09, 0x7d, 0x61, 0xa2, 0x09, 0x7d, 0x59, 0xa1, 0x09, 0x7d, 0x51, 0xa0,
- 0x09, 0x7d, 0x49, 0x9d, 0x09, 0x7d, 0x40, 0xa6, 0x09, 0x7d, 0x39, 0xa5,
- 0x09, 0x7d, 0x31, 0xa4, 0x09, 0x7d, 0x29, 0xa3, 0x09, 0x7d, 0x21, 0xa2,
- 0x09, 0x7d, 0x19, 0xa1, 0x09, 0x7d, 0x11, 0xa0, 0x09, 0x7d, 0x09, 0x9e,
- 0x09, 0x7d, 0x00, 0xa6, 0x09, 0x7c, 0xf9, 0xa4, 0x09, 0x7c, 0xf1, 0xa2,
- 0x09, 0x7c, 0xe9, 0xa0, 0x09, 0x7c, 0xe1, 0x9f, 0x09, 0x7c, 0xd3, 0x02,
- 0x32, 0x7e, 0x9e, 0x09, 0x7c, 0xc9, 0x9d, 0x09, 0x7c, 0xc0, 0xa6, 0x09,
- 0x7c, 0xb9, 0xa5, 0x09, 0x7c, 0xb1, 0xa4, 0x09, 0x7c, 0xa9, 0xa3, 0x09,
- 0x7c, 0xa1, 0xa2, 0x09, 0x7c, 0x99, 0xa1, 0x09, 0x7c, 0x91, 0x9f, 0x09,
- 0x7c, 0x89, 0x9e, 0x09, 0x7c, 0x80, 0xc4, 0x04, 0x5e, 0x00, 0x04, 0x79,
- 0xc2, 0x01, 0x47, 0x00, 0x04, 0x70, 0xe0, 0x06, 0x67, 0x01, 0x01, 0xd0,
- 0x07, 0xc2, 0x32, 0x82, 0xd3, 0x43, 0x2a, 0x01, 0x00, 0xd0, 0x44, 0x01,
- 0xb4, 0xc2, 0x32, 0x88, 0xc6, 0x29, 0x88, 0x08, 0x8f, 0x91, 0xc6, 0xd4,
- 0x83, 0x08, 0x8f, 0x89, 0x15, 0xc2, 0x32, 0x94, 0x08, 0xc2, 0x32, 0xa0,
- 0x16, 0x42, 0x32, 0xac, 0xc4, 0x22, 0x71, 0x08, 0x8f, 0x49, 0xc5, 0x01,
- 0xdb, 0x08, 0x8f, 0x41, 0x15, 0xc2, 0x32, 0xbe, 0x08, 0xc2, 0x32, 0xca,
- 0x16, 0xc2, 0x32, 0xd6, 0xc3, 0x01, 0xb4, 0x08, 0x8f, 0x08, 0xc9, 0xad,
- 0xab, 0x00, 0x6c, 0x11, 0xc8, 0xb6, 0x75, 0x00, 0x6e, 0x50, 0x03, 0xc2,
- 0x32, 0xe2, 0x0b, 0xc2, 0x33, 0x0a, 0x17, 0xc2, 0x33, 0x22, 0x07, 0xc2,
- 0x33, 0x2e, 0x11, 0xc2, 0x33, 0x3a, 0x0f, 0xc2, 0x33, 0x46, 0xd2, 0x4b,
- 0xae, 0x00, 0x6c, 0xf1, 0x48, 0xba, 0xcd, 0xc2, 0x33, 0x50, 0x48, 0xb9,
- 0x8d, 0xc2, 0x33, 0x60, 0x48, 0xb6, 0xad, 0xc2, 0x33, 0x6c, 0xc7, 0xc9,
- 0x44, 0x00, 0x6d, 0xd1, 0xc7, 0xc3, 0x2b, 0x00, 0x6d, 0xd9, 0xc7, 0xc5,
- 0xa1, 0x00, 0x6e, 0x01, 0xc7, 0xc8, 0xe2, 0x00, 0x6e, 0x21, 0xc7, 0xcb,
- 0x35, 0x00, 0x6e, 0x30, 0xc4, 0x15, 0xd3, 0x00, 0x6f, 0x31, 0xc3, 0x01,
- 0xb4, 0x00, 0x6f, 0x39, 0x16, 0xc2, 0x33, 0x7e, 0x08, 0xc2, 0x33, 0x8a,
- 0x15, 0xc2, 0x33, 0x96, 0xc5, 0x01, 0xdb, 0x00, 0x6f, 0x71, 0xc4, 0x22,
- 0x71, 0x00, 0x6f, 0x78, 0x45, 0xb5, 0xf7, 0xc2, 0x33, 0xa2, 0x44, 0xc1,
- 0x2d, 0x42, 0x33, 0xb4, 0xca, 0xa5, 0xa4, 0x00, 0x6e, 0x89, 0xc8, 0xbb,
- 0x95, 0x00, 0x6e, 0x99, 0xc9, 0xb6, 0x24, 0x00, 0x6e, 0xb1, 0xc7, 0xc1,
- 0x2c, 0x00, 0x6e, 0xd1, 0x42, 0x01, 0x29, 0x42, 0x33, 0xc3, 0xca, 0xa7,
- 0x8e, 0x00, 0x6e, 0xc1, 0xc9, 0x95, 0x7d, 0x00, 0x6e, 0xf8, 0x4a, 0x82,
- 0xaa, 0xc2, 0x33, 0xcf, 0x02, 0x42, 0x33, 0xf3, 0xc7, 0x00, 0x48, 0x0e,
- 0xc8, 0x99, 0xc8, 0x39, 0x95, 0x0e, 0xc8, 0x91, 0xc6, 0x24, 0x18, 0x0e,
- 0xc8, 0x88, 0x4c, 0x82, 0xa8, 0xc2, 0x33, 0xff, 0xc4, 0x00, 0x62, 0x0e,
- 0xd3, 0xf0, 0xda, 0x19, 0xd1, 0x0e, 0xd3, 0x81, 0x44, 0x02, 0x70, 0x42,
- 0x34, 0x13, 0xc8, 0xb7, 0xbd, 0x0e, 0xd0, 0x99, 0xc7, 0xc5, 0xcb, 0x0e,
- 0xd0, 0x91, 0xc7, 0x89, 0xd9, 0x0e, 0xd0, 0x88, 0xca, 0x9d, 0x02, 0x0e,
- 0xd0, 0x43, 0x02, 0x34, 0x1d, 0xcf, 0x68, 0x30, 0x0e, 0xd0, 0x38, 0xc3,
- 0x0e, 0x41, 0x0e, 0xd4, 0x51, 0xc3, 0x00, 0xac, 0x0e, 0xd4, 0x38, 0xc6,
- 0xcf, 0x79, 0x0e, 0xd1, 0x61, 0xc7, 0x89, 0xd9, 0x0e, 0xd1, 0x59, 0xc6,
- 0xcd, 0x33, 0x0e, 0xd1, 0x50, 0xd2, 0x4c, 0x1a, 0x0e, 0xd3, 0x89, 0x44,
- 0x00, 0x3d, 0x42, 0x34, 0x23, 0xd1, 0x51, 0xc7, 0x0e, 0xc9, 0x01, 0x15,
- 0xc2, 0x34, 0x2f, 0x46, 0x17, 0xef, 0x42, 0x34, 0x3b, 0xc7, 0x00, 0x48,
- 0x0e, 0xc8, 0xa9, 0xc7, 0x00, 0x71, 0x0e, 0xc8, 0xa0, 0xc7, 0x00, 0x48,
- 0x0e, 0xc8, 0x69, 0xc8, 0x39, 0x95, 0x0e, 0xc8, 0x61, 0xc6, 0x24, 0x18,
- 0x0e, 0xc8, 0x58, 0x00, 0xc2, 0x34, 0x47, 0x02, 0x42, 0x34, 0x65, 0x43,
- 0x0d, 0xe3, 0xc2, 0x34, 0x71, 0x12, 0x42, 0x34, 0x7d, 0x44, 0xdf, 0xef,
- 0xc2, 0x34, 0x87, 0x45, 0xd9, 0x94, 0xc2, 0x34, 0x93, 0x44, 0x5b, 0xa1,
- 0x42, 0x34, 0xb7, 0xc3, 0x1a, 0xba, 0x0e, 0xd3, 0x0b, 0x02, 0x34, 0xc9,
- 0x4b, 0x91, 0x24, 0x42, 0x34, 0xcd, 0x4b, 0x43, 0xaf, 0xc2, 0x34, 0xdf,
- 0x4a, 0x18, 0x91, 0x42, 0x34, 0xeb, 0x45, 0xd7, 0x32, 0xc2, 0x34, 0xfd,
- 0x47, 0xc4, 0x4a, 0xc2, 0x35, 0x09, 0x00, 0xc2, 0x35, 0x1b, 0x42, 0x00,
- 0x46, 0xc2, 0x35, 0x27, 0x4f, 0x67, 0x6d, 0x42, 0x35, 0x45, 0xc2, 0x01,
- 0x7b, 0x0e, 0xd3, 0x59, 0x43, 0x14, 0x1c, 0x42, 0x35, 0x57, 0x00, 0x42,
- 0x35, 0x75, 0x19, 0xc2, 0x35, 0x81, 0xc7, 0xc5, 0xcb, 0x0e, 0xd1, 0x91,
- 0xc7, 0x89, 0xd9, 0x0e, 0xd1, 0x88, 0x4b, 0x43, 0xaf, 0xc2, 0x35, 0x8d,
- 0x4a, 0x18, 0x91, 0xc2, 0x35, 0xd3, 0x49, 0x1d, 0xe4, 0xc2, 0x36, 0x19,
- 0x46, 0xd2, 0x13, 0x42, 0x36, 0x2b, 0x49, 0xae, 0xcb, 0xc2, 0x36, 0x3d,
- 0x05, 0xc2, 0x36, 0x49, 0xc5, 0xde, 0x0d, 0x0e, 0xd2, 0x83, 0x02, 0x36,
- 0x55, 0xc4, 0x67, 0x0f, 0x0e, 0xd2, 0x6b, 0x02, 0x36, 0x59, 0x45, 0x01,
- 0x75, 0xc2, 0x36, 0x5d, 0xc5, 0x7d, 0x21, 0x0e, 0xd2, 0x0b, 0x02, 0x36,
- 0x81, 0xc5, 0xaf, 0x17, 0x0e, 0xd1, 0xf2, 0x02, 0x36, 0x85, 0xc6, 0xd0,
- 0x9f, 0x0e, 0xd1, 0xd1, 0xc6, 0xd2, 0x31, 0x0e, 0xd1, 0xc8, 0xc7, 0x00,
- 0x48, 0x0e, 0xc8, 0x81, 0xc8, 0x39, 0x95, 0x0e, 0xc8, 0x79, 0xc6, 0x24,
- 0x18, 0x0e, 0xc8, 0x70, 0xd0, 0x5b, 0x52, 0x0e, 0xd1, 0xc1, 0xc6, 0x03,
- 0xf1, 0x0e, 0xd1, 0xb0, 0xd0, 0x5b, 0x52, 0x0e, 0xd1, 0xb9, 0xc7, 0x58,
- 0x67, 0x0e, 0xd1, 0xa8, 0x48, 0x9d, 0x0b, 0xc2, 0x36, 0x89, 0xca, 0x9c,
- 0xf8, 0x0e, 0xd0, 0x79, 0xcc, 0x89, 0xd4, 0x0e, 0xd0, 0x70, 0xc7, 0xc6,
- 0xb2, 0x0e, 0xcf, 0xf1, 0xd0, 0x5f, 0x12, 0x0e, 0xcf, 0xe9, 0x15, 0xc2,
- 0x36, 0x95, 0xc7, 0x3a, 0xa6, 0x0e, 0xcf, 0xd1, 0xc5, 0xde, 0x0d, 0x0e,
- 0xcf, 0xc9, 0xc4, 0xe1, 0xbf, 0x0e, 0xcf, 0xb9, 0x4a, 0x30, 0xec, 0x42,
- 0x36, 0xa4, 0xca, 0xa3, 0xce, 0x08, 0xae, 0xe3, 0x02, 0x36, 0xb0, 0x97,
+ 0xe0, 0x9b, 0x00, 0xc5, 0xfb, 0x02, 0x1a, 0x38, 0x83, 0x00, 0xa7, 0xaa,
+ 0x02, 0x1a, 0x3e, 0x19, 0xc2, 0x1a, 0x42, 0x83, 0x00, 0xa8, 0xab, 0x02,
+ 0x1a, 0x5b, 0x91, 0x00, 0xa8, 0x9b, 0x02, 0x1a, 0x63, 0x8b, 0x00, 0xa8,
+ 0x8b, 0x02, 0x1a, 0x6b, 0x87, 0x00, 0xa8, 0x80, 0x9b, 0x00, 0xc5, 0xf1,
+ 0x4c, 0x8e, 0x08, 0xc2, 0x1a, 0x6f, 0x91, 0x00, 0xa7, 0x90, 0x83, 0x00,
+ 0xa8, 0x03, 0x02, 0x1a, 0x87, 0x87, 0x00, 0xa7, 0xb1, 0x8b, 0x00, 0xa7,
+ 0xc3, 0x02, 0x1a, 0x8b, 0x91, 0x00, 0xa7, 0xe2, 0x02, 0x1a, 0x8f, 0x8b,
+ 0x00, 0xa7, 0x80, 0x47, 0xcd, 0x9c, 0xc2, 0x1a, 0x93, 0x9b, 0x00, 0xc5,
+ 0xe1, 0x46, 0xd7, 0xd8, 0xc2, 0x1a, 0x9d, 0x83, 0x00, 0xa6, 0x42, 0x02,
+ 0x1a, 0xcf, 0x91, 0x00, 0xc6, 0x53, 0x02, 0x1a, 0xd3, 0x8b, 0x00, 0xc6,
+ 0x33, 0x02, 0x1a, 0xd7, 0x87, 0x00, 0xa6, 0x49, 0x83, 0x00, 0xa6, 0x5a,
+ 0x02, 0x1a, 0xdb, 0x9b, 0x00, 0xc5, 0xd9, 0x91, 0x00, 0xa6, 0x28, 0x83,
+ 0x00, 0xb3, 0xab, 0x02, 0x1a, 0xdf, 0x91, 0x00, 0xb3, 0x9b, 0x02, 0x1a,
+ 0xe3, 0x8b, 0x00, 0xb3, 0x8a, 0x02, 0x1a, 0xe7, 0x83, 0x00, 0xac, 0x9b,
+ 0x02, 0x1a, 0xeb, 0x91, 0x00, 0xac, 0x8b, 0x02, 0x1a, 0xf6, 0x8b, 0x00,
+ 0xac, 0x7a, 0x02, 0x1a, 0xfa, 0xc4, 0x4c, 0xfb, 0x00, 0xab, 0xe1, 0xc4,
+ 0xe7, 0x07, 0x00, 0xab, 0xda, 0x02, 0x1a, 0xfe, 0x8b, 0x00, 0xa6, 0x18,
+ 0x46, 0x5f, 0x13, 0xc2, 0x1b, 0x17, 0x83, 0x00, 0xa4, 0x8a, 0x02, 0x1b,
+ 0x6f, 0x91, 0x00, 0xa4, 0xc3, 0x02, 0x1b, 0x73, 0x8b, 0x00, 0xa4, 0xa3,
+ 0x02, 0x1b, 0x77, 0x87, 0x00, 0xa4, 0x91, 0x83, 0x00, 0xa4, 0xe2, 0x02,
+ 0x1b, 0x7b, 0x91, 0x00, 0xa4, 0x70, 0x8b, 0x00, 0xa4, 0x60, 0x94, 0x00,
+ 0xc7, 0xa1, 0x8e, 0x00, 0xc7, 0x98, 0x99, 0x00, 0xb3, 0xfb, 0x02, 0x1b,
+ 0x7f, 0x0d, 0xc2, 0x1b, 0x8f, 0x10, 0xc2, 0x1b, 0x9f, 0x83, 0x00, 0xad,
+ 0x99, 0x91, 0x00, 0xad, 0x91, 0x8b, 0x00, 0xad, 0x89, 0x87, 0x00, 0xad,
+ 0x81, 0x95, 0x00, 0xa8, 0x40, 0x91, 0x00, 0xac, 0x43, 0x02, 0x1b, 0xaf,
+ 0xc2, 0x00, 0xc9, 0x00, 0xc7, 0x41, 0x83, 0x00, 0xac, 0x49, 0x8b, 0x00,
+ 0xac, 0x39, 0x87, 0x00, 0xac, 0x30, 0x8a, 0x00, 0xab, 0x7b, 0x02, 0x1b,
+ 0xb3, 0x87, 0x00, 0xa3, 0x39, 0x8b, 0x00, 0xa3, 0x41, 0x91, 0x00, 0xa3,
+ 0x49, 0x83, 0x00, 0xa3, 0x50, 0x19, 0xc2, 0x1b, 0xcf, 0x83, 0x00, 0xab,
+ 0x33, 0x02, 0x1b, 0xda, 0x91, 0x00, 0xab, 0x23, 0x02, 0x1b, 0xde, 0x8b,
+ 0x00, 0xab, 0x03, 0x02, 0x1b, 0xe2, 0x87, 0x00, 0xaa, 0xf0, 0x9b, 0x00,
+ 0xc5, 0xb9, 0x83, 0x00, 0xa2, 0xb2, 0x02, 0x1b, 0xe6, 0x83, 0x00, 0xad,
+ 0x63, 0x02, 0x1b, 0xea, 0x91, 0x00, 0xad, 0x53, 0x02, 0x1b, 0xee, 0x8b,
+ 0x00, 0xad, 0x43, 0x02, 0x1b, 0xf2, 0x87, 0x00, 0xad, 0x38, 0x83, 0x00,
+ 0xab, 0x99, 0x91, 0x00, 0xab, 0x91, 0x8b, 0x00, 0xab, 0x89, 0x87, 0x00,
+ 0xab, 0x80, 0x91, 0x00, 0xa2, 0xeb, 0x02, 0x1b, 0xf6, 0x8b, 0x00, 0xa2,
+ 0xcb, 0x02, 0x1b, 0xfa, 0x87, 0x00, 0xa2, 0xb9, 0x83, 0x00, 0xa3, 0x0a,
+ 0x02, 0x1b, 0xfe, 0x91, 0x00, 0xa2, 0x88, 0x8b, 0x00, 0xa2, 0x78, 0x15,
+ 0x42, 0x1c, 0x02, 0x9b, 0x00, 0xc5, 0x99, 0x83, 0x00, 0xa0, 0xc8, 0x91,
+ 0x00, 0xa0, 0xa2, 0x02, 0x1c, 0x0e, 0x8b, 0x00, 0xa0, 0x80, 0x44, 0x11,
+ 0xe2, 0xc2, 0x1c, 0x14, 0x8b, 0x00, 0xaa, 0xb0, 0x83, 0x00, 0xaa, 0xd2,
+ 0x02, 0x1c, 0x46, 0x91, 0x00, 0xaa, 0xc0, 0x95, 0x00, 0xc6, 0xd3, 0x02,
+ 0x1c, 0x4a, 0x90, 0x00, 0xc6, 0xcb, 0x02, 0x1c, 0x4e, 0x8f, 0x00, 0xc6,
+ 0xc1, 0x85, 0x00, 0xc6, 0xb9, 0x8d, 0x00, 0xc6, 0xb1, 0x96, 0x00, 0xc6,
+ 0xa9, 0x92, 0x00, 0xc6, 0xa0, 0x83, 0x00, 0xc6, 0x9b, 0x02, 0x1c, 0x52,
+ 0x91, 0x00, 0xc6, 0x8b, 0x02, 0x1c, 0x56, 0x8b, 0x00, 0xc6, 0x7b, 0x02,
+ 0x1c, 0x5a, 0xc2, 0x01, 0xe6, 0x00, 0xc6, 0x70, 0x9b, 0x00, 0xc6, 0x29,
+ 0x83, 0x00, 0xaa, 0x62, 0x02, 0x1c, 0x5e, 0x91, 0x00, 0xaa, 0x48, 0x8b,
+ 0x00, 0xaa, 0x38, 0x9b, 0x00, 0xc6, 0x21, 0x83, 0x00, 0xa9, 0x72, 0x02,
+ 0x1c, 0x62, 0x9b, 0x00, 0xc6, 0x19, 0x91, 0x00, 0xa9, 0x58, 0x83, 0x00,
+ 0xa9, 0xcb, 0x02, 0x1c, 0x66, 0x91, 0x00, 0xa9, 0xab, 0x02, 0x1c, 0x6a,
+ 0x8b, 0x00, 0xa9, 0x8b, 0x02, 0x1c, 0x6e, 0x87, 0x00, 0xa9, 0x78, 0x8b,
+ 0x00, 0xa9, 0x48, 0x9b, 0x00, 0xc5, 0xe9, 0x83, 0x00, 0xa6, 0xda, 0x02,
+ 0x1c, 0x72, 0x83, 0x00, 0xad, 0x23, 0x02, 0x1c, 0x76, 0x91, 0x00, 0xad,
+ 0x13, 0x02, 0x1c, 0x7a, 0x8b, 0x00, 0xad, 0x02, 0x02, 0x1c, 0x7e, 0x8b,
+ 0x00, 0xa6, 0xb0, 0x91, 0x00, 0xa6, 0xc0, 0x87, 0x00, 0xa6, 0xe1, 0x8b,
+ 0x00, 0xa6, 0xf3, 0x02, 0x1c, 0x82, 0x91, 0x00, 0xa7, 0x13, 0x02, 0x1c,
+ 0x86, 0x83, 0x00, 0xa7, 0x32, 0x02, 0x1c, 0x8a, 0x9b, 0x00, 0xc5, 0xd1,
+ 0x83, 0x00, 0xa5, 0x5a, 0x02, 0x1c, 0x8e, 0x45, 0x2e, 0x48, 0x42, 0x1c,
+ 0x92, 0x91, 0x00, 0xa5, 0x42, 0x02, 0x1c, 0x9a, 0x8b, 0x00, 0xa5, 0x30,
+ 0x87, 0x00, 0xa5, 0x61, 0x8b, 0x00, 0xa5, 0x73, 0x02, 0x1c, 0xa0, 0x91,
+ 0x00, 0xa5, 0x93, 0x02, 0x1c, 0xa4, 0x83, 0x00, 0xa5, 0xb2, 0x02, 0x1c,
+ 0xa8, 0x83, 0x00, 0xa3, 0xf3, 0x02, 0x1c, 0xac, 0x87, 0x00, 0xa3, 0xa1,
+ 0x8b, 0x00, 0xa3, 0xb3, 0x02, 0x1c, 0xb4, 0x91, 0x00, 0xa3, 0xd2, 0x02,
+ 0x1c, 0xb8, 0x9b, 0x00, 0xc5, 0xc1, 0x83, 0x00, 0xa3, 0x9a, 0x02, 0x1c,
+ 0xbc, 0x8b, 0x00, 0xa3, 0x70, 0x91, 0x00, 0xa3, 0x80, 0x91, 0x00, 0xa2,
+ 0x03, 0x02, 0x1c, 0xc0, 0x83, 0x00, 0xa2, 0x23, 0x02, 0x1c, 0xc8, 0x8b,
+ 0x00, 0xa1, 0xe3, 0x02, 0x1c, 0xcc, 0x87, 0x00, 0xa1, 0xd0, 0x9b, 0x00,
+ 0xc5, 0xa9, 0x83, 0x00, 0xa1, 0xca, 0x02, 0x1c, 0xd0, 0x9b, 0x00, 0xc5,
+ 0xa1, 0x91, 0x00, 0xa1, 0xa0, 0x8b, 0x00, 0xa1, 0x90, 0x9b, 0x00, 0xc5,
+ 0x91, 0x8b, 0x00, 0xa0, 0x10, 0xc7, 0xc4, 0x11, 0x00, 0xad, 0x78, 0x95,
+ 0x00, 0xa8, 0x31, 0x8f, 0x00, 0xa5, 0xf0, 0x8b, 0x00, 0xb3, 0x79, 0x83,
+ 0x00, 0xac, 0x22, 0x02, 0x1c, 0xd4, 0x91, 0x00, 0xac, 0x10, 0x8b, 0x00,
+ 0xac, 0x00, 0x8b, 0x00, 0xab, 0x18, 0xc3, 0x14, 0x8f, 0x00, 0xa2, 0x41,
+ 0xc2, 0x02, 0x14, 0x00, 0xa1, 0xa8, 0x97, 0x08, 0x15, 0x22, 0x02, 0x1c,
+ 0xd8, 0x9f, 0x08, 0x16, 0x70, 0xa0, 0x08, 0x16, 0x61, 0xa1, 0x08, 0x16,
+ 0x69, 0x9f, 0x08, 0x16, 0x58, 0x9f, 0x08, 0x15, 0xb0, 0x9f, 0x08, 0x15,
+ 0x78, 0x9f, 0x08, 0x16, 0x18, 0xc2, 0x03, 0x12, 0x08, 0x29, 0x81, 0xc2,
+ 0x00, 0xff, 0x08, 0x2a, 0x40, 0xc2, 0x07, 0x6e, 0x08, 0x29, 0x91, 0xc4,
+ 0xe7, 0x6b, 0x08, 0x2a, 0xc0, 0xc2, 0x00, 0x97, 0x08, 0x29, 0x99, 0xc3,
+ 0x2c, 0x66, 0x08, 0x2a, 0x09, 0x1c, 0x42, 0x1c, 0xe4, 0x84, 0x08, 0x29,
+ 0xa1, 0xc2, 0x16, 0x0a, 0x08, 0x29, 0xb0, 0xc3, 0x3c, 0x74, 0x08, 0x29,
+ 0xa9, 0x0a, 0x42, 0x1c, 0xf0, 0xc2, 0x02, 0xe4, 0x08, 0x29, 0xc1, 0xc3,
+ 0x47, 0xf0, 0x08, 0x2a, 0x99, 0xc3, 0xec, 0x66, 0x08, 0x2a, 0xe0, 0x0a,
+ 0xc2, 0x1c, 0xfa, 0x03, 0xc2, 0x1d, 0x0b, 0x42, 0x1a, 0x36, 0x42, 0x1d,
+ 0x15, 0xc3, 0x05, 0x85, 0x08, 0x29, 0xd1, 0xc3, 0xeb, 0x5b, 0x08, 0x2b,
+ 0x08, 0xc2, 0x02, 0x1f, 0x08, 0x29, 0xe1, 0xc3, 0x2c, 0x54, 0x08, 0x29,
+ 0xf9, 0xc2, 0x00, 0x6a, 0x08, 0x2a, 0xf0, 0x0a, 0xc2, 0x1d, 0x1d, 0xc3,
+ 0xed, 0x1a, 0x08, 0x2a, 0xd0, 0xc2, 0x00, 0x3a, 0x08, 0x29, 0xf1, 0xc3,
+ 0xea, 0xdd, 0x08, 0x2a, 0x28, 0xc3, 0xc7, 0xf2, 0x08, 0x2a, 0x19, 0xc3,
+ 0x56, 0xc8, 0x08, 0x2a, 0x88, 0xc2, 0x00, 0x4d, 0x08, 0x2a, 0x21, 0xc2,
+ 0x33, 0x0f, 0x08, 0x2b, 0x18, 0x9b, 0x08, 0x2a, 0x39, 0x94, 0x08, 0x2a,
+ 0x68, 0xc2, 0x01, 0x04, 0x08, 0x2a, 0xb9, 0xc3, 0xec, 0x66, 0x08, 0x2b,
+ 0x10, 0x9d, 0x17, 0xcf, 0x01, 0x88, 0x17, 0xcf, 0x79, 0x87, 0x17, 0xcf,
+ 0x71, 0x86, 0x17, 0xcf, 0x69, 0x85, 0x17, 0xcf, 0x61, 0x84, 0x17, 0xcf,
+ 0x59, 0x83, 0x17, 0xcf, 0x51, 0xa6, 0x17, 0xcf, 0x49, 0xa5, 0x17, 0xcf,
+ 0x41, 0xa4, 0x17, 0xcf, 0x39, 0xa3, 0x17, 0xcf, 0x31, 0xa2, 0x17, 0xcf,
+ 0x29, 0xa1, 0x17, 0xcf, 0x21, 0xa0, 0x17, 0xcf, 0x19, 0x9f, 0x17, 0xcf,
+ 0x11, 0x9e, 0x17, 0xcf, 0x08, 0x88, 0x17, 0xce, 0xf9, 0x87, 0x17, 0xce,
+ 0xf1, 0xa6, 0x17, 0xce, 0xc9, 0x86, 0x17, 0xce, 0xe9, 0x85, 0x17, 0xce,
+ 0xe1, 0x84, 0x17, 0xce, 0xd9, 0x83, 0x17, 0xce, 0xd1, 0xa5, 0x17, 0xce,
+ 0xc1, 0xa4, 0x17, 0xce, 0xb9, 0xa3, 0x17, 0xce, 0xb1, 0xa2, 0x17, 0xce,
+ 0xa9, 0xa1, 0x17, 0xce, 0xa1, 0xa0, 0x17, 0xce, 0x99, 0x9f, 0x17, 0xce,
+ 0x91, 0x9e, 0x17, 0xce, 0x89, 0x9d, 0x17, 0xce, 0x80, 0x83, 0x17, 0xcd,
+ 0x51, 0xa6, 0x17, 0xcd, 0x49, 0xa4, 0x17, 0xcd, 0x39, 0xa3, 0x17, 0xcd,
+ 0x31, 0xa2, 0x17, 0xcd, 0x29, 0xa1, 0x17, 0xcd, 0x21, 0xa5, 0x17, 0xcd,
+ 0x41, 0x86, 0x17, 0xcd, 0x69, 0x85, 0x17, 0xcd, 0x61, 0x84, 0x17, 0xcd,
+ 0x59, 0xa0, 0x17, 0xcd, 0x19, 0x9f, 0x17, 0xcd, 0x11, 0x9e, 0x17, 0xcd,
+ 0x09, 0x9d, 0x17, 0xcd, 0x01, 0x87, 0x17, 0xcd, 0x71, 0x88, 0x17, 0xcd,
+ 0x78, 0x88, 0x17, 0xcf, 0xf9, 0x87, 0x17, 0xcf, 0xf1, 0x86, 0x17, 0xcf,
+ 0xe9, 0x85, 0x17, 0xcf, 0xe1, 0x84, 0x17, 0xcf, 0xd9, 0x83, 0x17, 0xcf,
+ 0xd1, 0xa6, 0x17, 0xcf, 0xc9, 0xa5, 0x17, 0xcf, 0xc1, 0xa4, 0x17, 0xcf,
+ 0xb9, 0xa3, 0x17, 0xcf, 0xb1, 0xa2, 0x17, 0xcf, 0xa9, 0xa1, 0x17, 0xcf,
+ 0xa1, 0xa0, 0x17, 0xcf, 0x99, 0x9f, 0x17, 0xcf, 0x91, 0x9e, 0x17, 0xcf,
+ 0x89, 0x9d, 0x17, 0xcf, 0x80, 0x9d, 0x17, 0xcb, 0x81, 0x88, 0x17, 0xcb,
+ 0xf9, 0x87, 0x17, 0xcb, 0xf1, 0x86, 0x17, 0xcb, 0xe9, 0x85, 0x17, 0xcb,
+ 0xe1, 0x84, 0x17, 0xcb, 0xd9, 0x83, 0x17, 0xcb, 0xd1, 0xa6, 0x17, 0xcb,
+ 0xc9, 0xa5, 0x17, 0xcb, 0xc1, 0xa4, 0x17, 0xcb, 0xb9, 0xa3, 0x17, 0xcb,
+ 0xb1, 0xa2, 0x17, 0xcb, 0xa9, 0xa1, 0x17, 0xcb, 0xa1, 0xa0, 0x17, 0xcb,
+ 0x99, 0x9f, 0x17, 0xcb, 0x91, 0x9e, 0x17, 0xcb, 0x88, 0x88, 0x17, 0xcb,
+ 0x79, 0x87, 0x17, 0xcb, 0x71, 0x86, 0x17, 0xcb, 0x69, 0x85, 0x17, 0xcb,
+ 0x61, 0x84, 0x17, 0xcb, 0x59, 0x83, 0x17, 0xcb, 0x51, 0xa6, 0x17, 0xcb,
+ 0x49, 0xa5, 0x17, 0xcb, 0x41, 0xa4, 0x17, 0xcb, 0x39, 0xa3, 0x17, 0xcb,
+ 0x31, 0xa2, 0x17, 0xcb, 0x29, 0xa1, 0x17, 0xcb, 0x21, 0x9d, 0x17, 0xcb,
+ 0x01, 0x9e, 0x17, 0xcb, 0x09, 0x9f, 0x17, 0xcb, 0x11, 0xa0, 0x17, 0xcb,
+ 0x18, 0x9d, 0x17, 0xc9, 0x81, 0x88, 0x17, 0xc9, 0xf9, 0x87, 0x17, 0xc9,
+ 0xf1, 0x86, 0x17, 0xc9, 0xe9, 0x85, 0x17, 0xc9, 0xe1, 0x84, 0x17, 0xc9,
+ 0xd9, 0x83, 0x17, 0xc9, 0xd1, 0xa6, 0x17, 0xc9, 0xc9, 0xa5, 0x17, 0xc9,
+ 0xc1, 0xa4, 0x17, 0xc9, 0xb9, 0xa3, 0x17, 0xc9, 0xb1, 0xa2, 0x17, 0xc9,
+ 0xa9, 0xa1, 0x17, 0xc9, 0xa1, 0xa0, 0x17, 0xc9, 0x99, 0x9f, 0x17, 0xc9,
+ 0x91, 0x9e, 0x17, 0xc9, 0x88, 0x88, 0x17, 0xc9, 0x79, 0x87, 0x17, 0xc9,
+ 0x71, 0x86, 0x17, 0xc9, 0x69, 0x85, 0x17, 0xc9, 0x61, 0x84, 0x17, 0xc9,
+ 0x59, 0x83, 0x17, 0xc9, 0x51, 0xa6, 0x17, 0xc9, 0x49, 0xa5, 0x17, 0xc9,
+ 0x41, 0xa4, 0x17, 0xc9, 0x39, 0xa3, 0x17, 0xc9, 0x31, 0xa2, 0x17, 0xc9,
+ 0x29, 0xa1, 0x17, 0xc9, 0x21, 0xa0, 0x17, 0xc9, 0x19, 0x9f, 0x17, 0xc9,
+ 0x11, 0x9e, 0x17, 0xc9, 0x09, 0x9d, 0x17, 0xc9, 0x00, 0x88, 0x17, 0xc8,
+ 0xf9, 0x87, 0x17, 0xc8, 0xf1, 0x86, 0x17, 0xc8, 0xe9, 0x85, 0x17, 0xc8,
+ 0xe1, 0x84, 0x17, 0xc8, 0xd9, 0x83, 0x17, 0xc8, 0xd1, 0xa6, 0x17, 0xc8,
+ 0xc9, 0xa5, 0x17, 0xc8, 0xc1, 0xa4, 0x17, 0xc8, 0xb9, 0xa3, 0x17, 0xc8,
+ 0xb1, 0xa2, 0x17, 0xc8, 0xa9, 0xa1, 0x17, 0xc8, 0xa1, 0xa0, 0x17, 0xc8,
+ 0x99, 0x9f, 0x17, 0xc8, 0x91, 0x9e, 0x17, 0xc8, 0x89, 0x9d, 0x17, 0xc8,
+ 0x80, 0x88, 0x17, 0xc8, 0x79, 0x87, 0x17, 0xc8, 0x71, 0x86, 0x17, 0xc8,
+ 0x69, 0x85, 0x17, 0xc8, 0x61, 0x84, 0x17, 0xc8, 0x59, 0x83, 0x17, 0xc8,
+ 0x51, 0xa6, 0x17, 0xc8, 0x49, 0xa5, 0x17, 0xc8, 0x41, 0xa4, 0x17, 0xc8,
+ 0x39, 0xa3, 0x17, 0xc8, 0x31, 0xa2, 0x17, 0xc8, 0x29, 0xa1, 0x17, 0xc8,
+ 0x21, 0xa0, 0x17, 0xc8, 0x19, 0x9f, 0x17, 0xc8, 0x11, 0x9e, 0x17, 0xc8,
+ 0x09, 0x9d, 0x17, 0xc8, 0x00, 0x88, 0x17, 0xce, 0x79, 0x87, 0x17, 0xce,
+ 0x71, 0x86, 0x17, 0xce, 0x69, 0x85, 0x17, 0xce, 0x61, 0x84, 0x17, 0xce,
+ 0x59, 0x83, 0x17, 0xce, 0x51, 0xa6, 0x17, 0xce, 0x49, 0xa5, 0x17, 0xce,
+ 0x41, 0xa4, 0x17, 0xce, 0x39, 0xa3, 0x17, 0xce, 0x31, 0xa2, 0x17, 0xce,
+ 0x29, 0xa1, 0x17, 0xce, 0x21, 0xa0, 0x17, 0xce, 0x19, 0x9f, 0x17, 0xce,
+ 0x11, 0x9d, 0x17, 0xce, 0x01, 0x9e, 0x17, 0xce, 0x08, 0x87, 0x17, 0xcd,
+ 0xf1, 0x86, 0x17, 0xcd, 0xe9, 0x85, 0x17, 0xcd, 0xe1, 0x84, 0x17, 0xcd,
+ 0xd9, 0x83, 0x17, 0xcd, 0xd1, 0xa6, 0x17, 0xcd, 0xc9, 0xa5, 0x17, 0xcd,
+ 0xc1, 0xa4, 0x17, 0xcd, 0xb9, 0xa3, 0x17, 0xcd, 0xb1, 0xa2, 0x17, 0xcd,
+ 0xa9, 0xa1, 0x17, 0xcd, 0xa1, 0x9d, 0x17, 0xcd, 0x81, 0x9e, 0x17, 0xcd,
+ 0x89, 0x9f, 0x17, 0xcd, 0x91, 0xa0, 0x17, 0xcd, 0x99, 0x88, 0x17, 0xcd,
+ 0xf8, 0x88, 0x17, 0xcc, 0xf9, 0x87, 0x17, 0xcc, 0xf1, 0x86, 0x17, 0xcc,
+ 0xe9, 0x85, 0x17, 0xcc, 0xe1, 0x84, 0x17, 0xcc, 0xd9, 0x83, 0x17, 0xcc,
+ 0xd1, 0xa6, 0x17, 0xcc, 0xc9, 0xa5, 0x17, 0xcc, 0xc1, 0xa4, 0x17, 0xcc,
+ 0xb9, 0xa3, 0x17, 0xcc, 0xb1, 0xa2, 0x17, 0xcc, 0xa9, 0xa1, 0x17, 0xcc,
+ 0xa1, 0x9d, 0x17, 0xcc, 0x81, 0x9e, 0x17, 0xcc, 0x89, 0x9f, 0x17, 0xcc,
+ 0x91, 0xa0, 0x17, 0xcc, 0x98, 0x88, 0x17, 0xcc, 0x79, 0x87, 0x17, 0xcc,
+ 0x71, 0x86, 0x17, 0xcc, 0x69, 0x85, 0x17, 0xcc, 0x61, 0x84, 0x17, 0xcc,
+ 0x59, 0x83, 0x17, 0xcc, 0x51, 0xa6, 0x17, 0xcc, 0x49, 0xa5, 0x17, 0xcc,
+ 0x41, 0xa4, 0x17, 0xcc, 0x39, 0xa3, 0x17, 0xcc, 0x31, 0xa2, 0x17, 0xcc,
+ 0x29, 0xa1, 0x17, 0xcc, 0x21, 0xa0, 0x17, 0xcc, 0x19, 0x9f, 0x17, 0xcc,
+ 0x11, 0x9e, 0x17, 0xcc, 0x09, 0x9d, 0x17, 0xcc, 0x00, 0xa5, 0x17, 0xca,
+ 0xc1, 0xa4, 0x17, 0xca, 0xb9, 0xa3, 0x17, 0xca, 0xb1, 0xa2, 0x17, 0xca,
+ 0xa9, 0xa1, 0x17, 0xca, 0xa1, 0x9e, 0x17, 0xca, 0x89, 0x9d, 0x17, 0xca,
+ 0x81, 0x9f, 0x17, 0xca, 0x91, 0xa0, 0x17, 0xca, 0x99, 0xa6, 0x17, 0xca,
+ 0xc9, 0x83, 0x17, 0xca, 0xd1, 0x84, 0x17, 0xca, 0xd9, 0x85, 0x17, 0xca,
+ 0xe1, 0x86, 0x17, 0xca, 0xe9, 0x87, 0x17, 0xca, 0xf1, 0x88, 0x17, 0xca,
+ 0xf8, 0x88, 0x17, 0xca, 0x79, 0x87, 0x17, 0xca, 0x71, 0x86, 0x17, 0xca,
+ 0x69, 0x85, 0x17, 0xca, 0x61, 0x84, 0x17, 0xca, 0x59, 0x83, 0x17, 0xca,
+ 0x51, 0xa6, 0x17, 0xca, 0x49, 0xa5, 0x17, 0xca, 0x41, 0xa4, 0x17, 0xca,
+ 0x39, 0xa3, 0x17, 0xca, 0x31, 0xa2, 0x17, 0xca, 0x29, 0xa1, 0x17, 0xca,
+ 0x21, 0xa0, 0x17, 0xca, 0x19, 0x9f, 0x17, 0xca, 0x11, 0x9e, 0x17, 0xca,
+ 0x09, 0x9d, 0x17, 0xca, 0x00, 0xa2, 0x17, 0xc3, 0xa9, 0x9f, 0x17, 0xc3,
+ 0x91, 0x88, 0x17, 0xc3, 0xf9, 0x87, 0x17, 0xc3, 0xf1, 0x86, 0x17, 0xc3,
+ 0xe9, 0x85, 0x17, 0xc3, 0xe1, 0x84, 0x17, 0xc3, 0xd9, 0x83, 0x17, 0xc3,
+ 0xd1, 0xa6, 0x17, 0xc3, 0xc9, 0xa5, 0x17, 0xc3, 0xc1, 0xa4, 0x17, 0xc3,
+ 0xb9, 0xa3, 0x17, 0xc3, 0xb1, 0xa1, 0x17, 0xc3, 0xa1, 0xa0, 0x17, 0xc3,
+ 0x99, 0x9e, 0x17, 0xc3, 0x89, 0x9d, 0x17, 0xc3, 0x80, 0x83, 0x17, 0xc3,
+ 0x51, 0xa2, 0x17, 0xc3, 0x29, 0xa1, 0x17, 0xc3, 0x21, 0xa0, 0x17, 0xc3,
+ 0x19, 0x9f, 0x17, 0xc3, 0x11, 0x9e, 0x17, 0xc3, 0x09, 0x88, 0x17, 0xc3,
+ 0x79, 0x87, 0x17, 0xc3, 0x71, 0x86, 0x17, 0xc3, 0x69, 0x85, 0x17, 0xc3,
+ 0x61, 0x84, 0x17, 0xc3, 0x59, 0xa6, 0x17, 0xc3, 0x49, 0xa5, 0x17, 0xc3,
+ 0x41, 0xa4, 0x17, 0xc3, 0x39, 0xa3, 0x17, 0xc3, 0x31, 0x9d, 0x17, 0xc3,
+ 0x00, 0xa6, 0x17, 0xc2, 0xc9, 0xa5, 0x17, 0xc2, 0xc1, 0xa4, 0x17, 0xc2,
+ 0xb9, 0xa3, 0x17, 0xc2, 0xb1, 0xa2, 0x17, 0xc2, 0xa9, 0xa1, 0x17, 0xc2,
+ 0xa1, 0xa0, 0x17, 0xc2, 0x99, 0x9f, 0x17, 0xc2, 0x91, 0x9e, 0x17, 0xc2,
+ 0x89, 0x9d, 0x17, 0xc2, 0x81, 0x85, 0x17, 0xc2, 0xe1, 0x84, 0x17, 0xc2,
+ 0xd9, 0x83, 0x17, 0xc2, 0xd1, 0x86, 0x17, 0xc2, 0xe9, 0x87, 0x17, 0xc2,
+ 0xf1, 0x88, 0x17, 0xc2, 0xf8, 0x88, 0x17, 0xc2, 0x79, 0x87, 0x17, 0xc2,
+ 0x71, 0xa6, 0x17, 0xc2, 0x49, 0xa5, 0x17, 0xc2, 0x41, 0xa4, 0x17, 0xc2,
+ 0x39, 0xa3, 0x17, 0xc2, 0x31, 0xa2, 0x17, 0xc2, 0x29, 0xa1, 0x17, 0xc2,
+ 0x21, 0xa0, 0x17, 0xc2, 0x19, 0x86, 0x17, 0xc2, 0x69, 0x85, 0x17, 0xc2,
+ 0x61, 0x84, 0x17, 0xc2, 0x59, 0x83, 0x17, 0xc2, 0x51, 0x9f, 0x17, 0xc2,
+ 0x11, 0x9e, 0x17, 0xc2, 0x09, 0x9d, 0x17, 0xc2, 0x00, 0xa5, 0x17, 0xc1,
+ 0x41, 0xa4, 0x17, 0xc1, 0x39, 0xa3, 0x17, 0xc1, 0x31, 0xa2, 0x17, 0xc1,
+ 0x29, 0xa1, 0x17, 0xc1, 0x21, 0x88, 0x17, 0xc1, 0x79, 0x87, 0x17, 0xc1,
+ 0x71, 0x86, 0x17, 0xc1, 0x69, 0x85, 0x17, 0xc1, 0x61, 0x84, 0x17, 0xc1,
+ 0x59, 0x83, 0x17, 0xc1, 0x51, 0xa6, 0x17, 0xc1, 0x49, 0xa0, 0x17, 0xc1,
+ 0x19, 0x9f, 0x17, 0xc1, 0x11, 0x9e, 0x17, 0xc1, 0x09, 0x9d, 0x17, 0xc1,
+ 0x00, 0xa5, 0x17, 0xc0, 0x41, 0xa4, 0x17, 0xc0, 0x39, 0x88, 0x17, 0xc0,
+ 0x79, 0x87, 0x17, 0xc0, 0x71, 0x86, 0x17, 0xc0, 0x69, 0x85, 0x17, 0xc0,
+ 0x61, 0x84, 0x17, 0xc0, 0x59, 0x83, 0x17, 0xc0, 0x51, 0xa6, 0x17, 0xc0,
+ 0x49, 0xa3, 0x17, 0xc0, 0x31, 0xa2, 0x17, 0xc0, 0x29, 0xa1, 0x17, 0xc0,
+ 0x21, 0x9d, 0x17, 0xc0, 0x01, 0x9e, 0x17, 0xc0, 0x09, 0x9f, 0x17, 0xc0,
+ 0x11, 0xa0, 0x17, 0xc0, 0x18, 0x88, 0x17, 0xc7, 0xf9, 0x87, 0x17, 0xc7,
+ 0xf1, 0x86, 0x17, 0xc7, 0xe9, 0x85, 0x17, 0xc7, 0xe1, 0x84, 0x17, 0xc7,
+ 0xd9, 0x83, 0x17, 0xc7, 0xd1, 0xa6, 0x17, 0xc7, 0xc9, 0xa5, 0x17, 0xc7,
+ 0xc1, 0xa4, 0x17, 0xc7, 0xb9, 0xa3, 0x17, 0xc7, 0xb1, 0xa2, 0x17, 0xc7,
+ 0xa9, 0xa1, 0x17, 0xc7, 0xa1, 0xa0, 0x17, 0xc7, 0x99, 0x9f, 0x17, 0xc7,
+ 0x91, 0x9e, 0x17, 0xc7, 0x89, 0x9d, 0x17, 0xc7, 0x80, 0x9d, 0x17, 0xc5,
+ 0x81, 0x88, 0x17, 0xc5, 0xf9, 0x87, 0x17, 0xc5, 0xf1, 0x86, 0x17, 0xc5,
+ 0xe9, 0x85, 0x17, 0xc5, 0xe1, 0x84, 0x17, 0xc5, 0xd9, 0x83, 0x17, 0xc5,
+ 0xd1, 0xa6, 0x17, 0xc5, 0xc9, 0xa5, 0x17, 0xc5, 0xc1, 0xa4, 0x17, 0xc5,
+ 0xb9, 0xa3, 0x17, 0xc5, 0xb1, 0xa2, 0x17, 0xc5, 0xa9, 0xa1, 0x17, 0xc5,
+ 0xa1, 0xa0, 0x17, 0xc5, 0x99, 0x9f, 0x17, 0xc5, 0x91, 0x9e, 0x17, 0xc5,
+ 0x88, 0x88, 0x17, 0xc5, 0x79, 0x87, 0x17, 0xc5, 0x71, 0x86, 0x17, 0xc5,
+ 0x69, 0x85, 0x17, 0xc5, 0x61, 0x84, 0x17, 0xc5, 0x59, 0x83, 0x17, 0xc5,
+ 0x51, 0xa6, 0x17, 0xc5, 0x49, 0xa5, 0x17, 0xc5, 0x41, 0xa4, 0x17, 0xc5,
+ 0x39, 0xa3, 0x17, 0xc5, 0x31, 0xa2, 0x17, 0xc5, 0x29, 0xa1, 0x17, 0xc5,
+ 0x21, 0xa0, 0x17, 0xc5, 0x19, 0x9f, 0x17, 0xc5, 0x11, 0x9e, 0x17, 0xc5,
+ 0x09, 0x9d, 0x17, 0xc5, 0x00, 0x88, 0x17, 0xc4, 0xf9, 0x87, 0x17, 0xc4,
+ 0xf1, 0x86, 0x17, 0xc4, 0xe9, 0x85, 0x17, 0xc4, 0xe1, 0x84, 0x17, 0xc4,
+ 0xd9, 0x83, 0x17, 0xc4, 0xd1, 0xa6, 0x17, 0xc4, 0xc9, 0xa5, 0x17, 0xc4,
+ 0xc1, 0xa4, 0x17, 0xc4, 0xb9, 0xa3, 0x17, 0xc4, 0xb1, 0xa2, 0x17, 0xc4,
+ 0xa9, 0xa1, 0x17, 0xc4, 0xa1, 0xa0, 0x17, 0xc4, 0x99, 0x9f, 0x17, 0xc4,
+ 0x91, 0x9e, 0x17, 0xc4, 0x89, 0x9d, 0x17, 0xc4, 0x80, 0x88, 0x17, 0xc4,
+ 0x79, 0x87, 0x17, 0xc4, 0x71, 0x86, 0x17, 0xc4, 0x69, 0x85, 0x17, 0xc4,
+ 0x61, 0x84, 0x17, 0xc4, 0x59, 0x83, 0x17, 0xc4, 0x51, 0xa6, 0x17, 0xc4,
+ 0x49, 0xa5, 0x17, 0xc4, 0x41, 0xa4, 0x17, 0xc4, 0x39, 0xa3, 0x17, 0xc4,
+ 0x31, 0xa2, 0x17, 0xc4, 0x29, 0xa1, 0x17, 0xc4, 0x21, 0xa0, 0x17, 0xc4,
+ 0x19, 0x9f, 0x17, 0xc4, 0x11, 0x9e, 0x17, 0xc4, 0x09, 0x9d, 0x17, 0xc4,
+ 0x00, 0x88, 0x17, 0xc7, 0x79, 0x87, 0x17, 0xc7, 0x71, 0x86, 0x17, 0xc7,
+ 0x69, 0x85, 0x17, 0xc7, 0x61, 0x84, 0x17, 0xc7, 0x59, 0x83, 0x17, 0xc7,
+ 0x51, 0xa6, 0x17, 0xc7, 0x49, 0xa5, 0x17, 0xc7, 0x41, 0xa4, 0x17, 0xc7,
+ 0x39, 0xa3, 0x17, 0xc7, 0x31, 0xa2, 0x17, 0xc7, 0x29, 0xa1, 0x17, 0xc7,
+ 0x21, 0x9d, 0x17, 0xc7, 0x01, 0x9e, 0x17, 0xc7, 0x09, 0x9f, 0x17, 0xc7,
+ 0x11, 0xa0, 0x17, 0xc7, 0x18, 0xa6, 0x17, 0xc6, 0xc9, 0xa5, 0x17, 0xc6,
+ 0xc1, 0xa4, 0x17, 0xc6, 0xb9, 0xa3, 0x17, 0xc6, 0xb1, 0xa2, 0x17, 0xc6,
+ 0xa9, 0xa1, 0x17, 0xc6, 0xa1, 0xa0, 0x17, 0xc6, 0x99, 0x9f, 0x17, 0xc6,
+ 0x91, 0x9e, 0x17, 0xc6, 0x89, 0x9d, 0x17, 0xc6, 0x81, 0x83, 0x17, 0xc6,
+ 0xd1, 0x84, 0x17, 0xc6, 0xd9, 0x85, 0x17, 0xc6, 0xe1, 0x86, 0x17, 0xc6,
+ 0xe9, 0x87, 0x17, 0xc6, 0xf1, 0x88, 0x17, 0xc6, 0xf8, 0x88, 0x17, 0xc6,
+ 0x79, 0x87, 0x17, 0xc6, 0x71, 0x86, 0x17, 0xc6, 0x69, 0x85, 0x17, 0xc6,
+ 0x61, 0x84, 0x17, 0xc6, 0x59, 0x83, 0x17, 0xc6, 0x51, 0xa6, 0x17, 0xc6,
+ 0x49, 0xa5, 0x17, 0xc6, 0x41, 0xa4, 0x17, 0xc6, 0x39, 0xa3, 0x17, 0xc6,
+ 0x31, 0xa2, 0x17, 0xc6, 0x29, 0xa1, 0x17, 0xc6, 0x21, 0xa0, 0x17, 0xc6,
+ 0x19, 0x9f, 0x17, 0xc6, 0x11, 0x9e, 0x17, 0xc6, 0x09, 0x9d, 0x17, 0xc6,
+ 0x00, 0x88, 0x17, 0xc1, 0xf9, 0x87, 0x17, 0xc1, 0xf1, 0x86, 0x17, 0xc1,
+ 0xe9, 0x85, 0x17, 0xc1, 0xe1, 0x84, 0x17, 0xc1, 0xd9, 0x83, 0x17, 0xc1,
+ 0xd1, 0xa6, 0x17, 0xc1, 0xc9, 0xa5, 0x17, 0xc1, 0xc1, 0xa4, 0x17, 0xc1,
+ 0xb9, 0xa3, 0x17, 0xc1, 0xb1, 0xa2, 0x17, 0xc1, 0xa9, 0xa1, 0x17, 0xc1,
+ 0xa1, 0xa0, 0x17, 0xc1, 0x99, 0x9f, 0x17, 0xc1, 0x91, 0x9e, 0x17, 0xc1,
+ 0x89, 0x9d, 0x17, 0xc1, 0x80, 0x88, 0x17, 0xc0, 0xf9, 0x87, 0x17, 0xc0,
+ 0xf1, 0x86, 0x17, 0xc0, 0xe9, 0x85, 0x17, 0xc0, 0xe1, 0x84, 0x17, 0xc0,
+ 0xd9, 0x83, 0x17, 0xc0, 0xd1, 0xa6, 0x17, 0xc0, 0xc9, 0xa5, 0x17, 0xc0,
+ 0xc1, 0xa4, 0x17, 0xc0, 0xb9, 0xa3, 0x17, 0xc0, 0xb1, 0xa2, 0x17, 0xc0,
+ 0xa9, 0xa1, 0x17, 0xc0, 0xa1, 0xa0, 0x17, 0xc0, 0x99, 0x9f, 0x17, 0xc0,
+ 0x91, 0x9e, 0x17, 0xc0, 0x89, 0x9d, 0x17, 0xc0, 0x80, 0x86, 0x17, 0xd0,
+ 0xe9, 0x85, 0x17, 0xd0, 0xe1, 0x84, 0x17, 0xd0, 0xd9, 0x83, 0x17, 0xd0,
+ 0xd1, 0xa6, 0x17, 0xd0, 0xc9, 0xa5, 0x17, 0xd0, 0xc1, 0xa4, 0x17, 0xd0,
+ 0xb9, 0xa3, 0x17, 0xd0, 0xb1, 0xa2, 0x17, 0xd0, 0xa9, 0xa1, 0x17, 0xd0,
+ 0xa1, 0xa0, 0x17, 0xd0, 0x99, 0x9f, 0x17, 0xd0, 0x91, 0x9e, 0x17, 0xd0,
+ 0x89, 0x9d, 0x17, 0xd0, 0x80, 0x88, 0x17, 0xd0, 0x79, 0x87, 0x17, 0xd0,
+ 0x71, 0x86, 0x17, 0xd0, 0x69, 0x85, 0x17, 0xd0, 0x61, 0x84, 0x17, 0xd0,
+ 0x59, 0x83, 0x17, 0xd0, 0x51, 0xa6, 0x17, 0xd0, 0x49, 0xa5, 0x17, 0xd0,
+ 0x41, 0xa4, 0x17, 0xd0, 0x39, 0xa3, 0x17, 0xd0, 0x31, 0xa2, 0x17, 0xd0,
+ 0x29, 0xa1, 0x17, 0xd0, 0x21, 0xa0, 0x17, 0xd0, 0x19, 0x9f, 0x17, 0xd0,
+ 0x11, 0x9e, 0x17, 0xd0, 0x09, 0x9d, 0x17, 0xd0, 0x00, 0xa6, 0x07, 0xd6,
+ 0xc9, 0xa5, 0x07, 0xd6, 0xc1, 0xa4, 0x07, 0xd6, 0xb9, 0xa3, 0x07, 0xd6,
+ 0xb1, 0xa2, 0x07, 0xd6, 0xa9, 0xa1, 0x07, 0xd6, 0xa1, 0xa0, 0x07, 0xd6,
+ 0x99, 0x9f, 0x07, 0xd6, 0x91, 0x9e, 0x07, 0xd6, 0x89, 0x9d, 0x07, 0xd6,
+ 0x80, 0x88, 0x07, 0xd6, 0x79, 0x87, 0x07, 0xd6, 0x71, 0x86, 0x07, 0xd6,
+ 0x69, 0x85, 0x07, 0xd6, 0x61, 0x84, 0x07, 0xd6, 0x59, 0x83, 0x07, 0xd6,
+ 0x51, 0xa6, 0x07, 0xd6, 0x49, 0xa5, 0x07, 0xd6, 0x41, 0xa4, 0x07, 0xd6,
+ 0x39, 0xa3, 0x07, 0xd6, 0x31, 0xa2, 0x07, 0xd6, 0x29, 0xa1, 0x07, 0xd6,
+ 0x21, 0xa0, 0x07, 0xd6, 0x19, 0x9f, 0x07, 0xd6, 0x11, 0x9e, 0x07, 0xd6,
+ 0x09, 0x9d, 0x07, 0xd6, 0x00, 0x88, 0x07, 0xd5, 0xf9, 0x87, 0x07, 0xd5,
+ 0xf1, 0x86, 0x07, 0xd5, 0xe9, 0x85, 0x07, 0xd5, 0xe1, 0x84, 0x07, 0xd5,
+ 0xd9, 0x83, 0x07, 0xd5, 0xd1, 0xa6, 0x07, 0xd5, 0xc9, 0xa5, 0x07, 0xd5,
+ 0xc1, 0xa4, 0x07, 0xd5, 0xb9, 0xa3, 0x07, 0xd5, 0xb1, 0xa2, 0x07, 0xd5,
+ 0xa9, 0xa1, 0x07, 0xd5, 0xa1, 0xa0, 0x07, 0xd5, 0x99, 0x9f, 0x07, 0xd5,
+ 0x91, 0x9e, 0x07, 0xd5, 0x89, 0x9d, 0x07, 0xd5, 0x80, 0x88, 0x07, 0xd5,
+ 0x79, 0x87, 0x07, 0xd5, 0x71, 0x86, 0x07, 0xd5, 0x69, 0x85, 0x07, 0xd5,
+ 0x61, 0x84, 0x07, 0xd5, 0x59, 0x83, 0x07, 0xd5, 0x51, 0xa6, 0x07, 0xd5,
+ 0x49, 0xa5, 0x07, 0xd5, 0x41, 0xa4, 0x07, 0xd5, 0x39, 0xa3, 0x07, 0xd5,
+ 0x31, 0xa2, 0x07, 0xd5, 0x29, 0xa1, 0x07, 0xd5, 0x21, 0xa0, 0x07, 0xd5,
+ 0x19, 0x9f, 0x07, 0xd5, 0x11, 0x9e, 0x07, 0xd5, 0x09, 0x9d, 0x07, 0xd5,
+ 0x00, 0x88, 0x07, 0xd4, 0xf9, 0x87, 0x07, 0xd4, 0xf1, 0x86, 0x07, 0xd4,
+ 0xe9, 0x85, 0x07, 0xd4, 0xe1, 0x84, 0x07, 0xd4, 0xd9, 0x83, 0x07, 0xd4,
+ 0xd1, 0xa6, 0x07, 0xd4, 0xc9, 0xa5, 0x07, 0xd4, 0xc1, 0xa4, 0x07, 0xd4,
+ 0xb9, 0xa3, 0x07, 0xd4, 0xb1, 0xa2, 0x07, 0xd4, 0xa9, 0xa1, 0x07, 0xd4,
+ 0xa1, 0xa0, 0x07, 0xd4, 0x99, 0x9f, 0x07, 0xd4, 0x91, 0x9e, 0x07, 0xd4,
+ 0x89, 0x9d, 0x07, 0xd4, 0x80, 0x88, 0x07, 0xd4, 0x79, 0x87, 0x07, 0xd4,
+ 0x71, 0x86, 0x07, 0xd4, 0x69, 0x85, 0x07, 0xd4, 0x61, 0x84, 0x07, 0xd4,
+ 0x59, 0x83, 0x07, 0xd4, 0x51, 0xa6, 0x07, 0xd4, 0x49, 0xa5, 0x07, 0xd4,
+ 0x41, 0xa4, 0x07, 0xd4, 0x39, 0xa3, 0x07, 0xd4, 0x31, 0xa2, 0x07, 0xd4,
+ 0x29, 0xa1, 0x07, 0xd4, 0x21, 0xa0, 0x07, 0xd4, 0x19, 0x9f, 0x07, 0xd4,
+ 0x11, 0x9e, 0x07, 0xd4, 0x09, 0x9d, 0x07, 0xd4, 0x00, 0x86, 0x07, 0xd3,
+ 0xe9, 0x85, 0x07, 0xd3, 0xe1, 0x84, 0x07, 0xd3, 0xd9, 0x83, 0x07, 0xd3,
+ 0xd1, 0xa6, 0x07, 0xd3, 0xc9, 0xa5, 0x07, 0xd3, 0xc1, 0xa4, 0x07, 0xd3,
+ 0xb9, 0xa3, 0x07, 0xd3, 0xb1, 0xa2, 0x07, 0xd3, 0xa9, 0xa1, 0x07, 0xd3,
+ 0xa1, 0xa0, 0x07, 0xd3, 0x99, 0x9f, 0x07, 0xd3, 0x91, 0x9e, 0x07, 0xd3,
+ 0x89, 0x9d, 0x07, 0xd3, 0x81, 0x87, 0x07, 0xd3, 0xf1, 0x88, 0x07, 0xd3,
+ 0xf8, 0x86, 0x07, 0xd3, 0x69, 0x85, 0x07, 0xd3, 0x61, 0x84, 0x07, 0xd3,
+ 0x59, 0x83, 0x07, 0xd3, 0x51, 0xa6, 0x07, 0xd3, 0x49, 0xa5, 0x07, 0xd3,
+ 0x41, 0xa4, 0x07, 0xd3, 0x39, 0xa3, 0x07, 0xd3, 0x31, 0xa2, 0x07, 0xd3,
+ 0x29, 0xa1, 0x07, 0xd3, 0x21, 0xa0, 0x07, 0xd3, 0x19, 0x9f, 0x07, 0xd3,
+ 0x11, 0x9e, 0x07, 0xd3, 0x09, 0x9d, 0x07, 0xd3, 0x00, 0x88, 0x07, 0xd2,
+ 0xf9, 0x87, 0x07, 0xd2, 0xf1, 0x86, 0x07, 0xd2, 0xe9, 0x85, 0x07, 0xd2,
+ 0xe1, 0x84, 0x07, 0xd2, 0xd9, 0x83, 0x07, 0xd2, 0xd1, 0xa6, 0x07, 0xd2,
+ 0xc9, 0xa5, 0x07, 0xd2, 0xc1, 0xa4, 0x07, 0xd2, 0xb9, 0xa3, 0x07, 0xd2,
+ 0xb1, 0xa2, 0x07, 0xd2, 0xa9, 0xa1, 0x07, 0xd2, 0xa1, 0xa0, 0x07, 0xd2,
+ 0x99, 0x9f, 0x07, 0xd2, 0x91, 0x9e, 0x07, 0xd2, 0x89, 0x9d, 0x07, 0xd2,
+ 0x80, 0x88, 0x07, 0xd2, 0x79, 0x87, 0x07, 0xd2, 0x71, 0x86, 0x07, 0xd2,
+ 0x69, 0x85, 0x07, 0xd2, 0x61, 0x84, 0x07, 0xd2, 0x59, 0x83, 0x07, 0xd2,
+ 0x51, 0xa6, 0x07, 0xd2, 0x49, 0xa5, 0x07, 0xd2, 0x41, 0xa4, 0x07, 0xd2,
+ 0x39, 0xa3, 0x07, 0xd2, 0x31, 0xa2, 0x07, 0xd2, 0x29, 0xa1, 0x07, 0xd2,
+ 0x21, 0xa0, 0x07, 0xd2, 0x19, 0x9f, 0x07, 0xd2, 0x11, 0x9d, 0x07, 0xd2,
+ 0x01, 0x9e, 0x07, 0xd2, 0x08, 0x88, 0x07, 0xd1, 0xf9, 0x87, 0x07, 0xd1,
+ 0xf1, 0x86, 0x07, 0xd1, 0xe9, 0x85, 0x07, 0xd1, 0xe1, 0x84, 0x07, 0xd1,
+ 0xd9, 0x83, 0x07, 0xd1, 0xd1, 0xa6, 0x07, 0xd1, 0xc9, 0xa5, 0x07, 0xd1,
+ 0xc1, 0xa4, 0x07, 0xd1, 0xb9, 0xa3, 0x07, 0xd1, 0xb1, 0xa2, 0x07, 0xd1,
+ 0xa9, 0xa1, 0x07, 0xd1, 0xa1, 0xa0, 0x07, 0xd1, 0x99, 0x9f, 0x07, 0xd1,
+ 0x91, 0x9e, 0x07, 0xd1, 0x89, 0x9d, 0x07, 0xd1, 0x80, 0x88, 0x07, 0xd1,
+ 0x79, 0x87, 0x07, 0xd1, 0x71, 0x86, 0x07, 0xd1, 0x69, 0x85, 0x07, 0xd1,
+ 0x61, 0x84, 0x07, 0xd1, 0x59, 0x83, 0x07, 0xd1, 0x51, 0xa6, 0x07, 0xd1,
+ 0x49, 0xa5, 0x07, 0xd1, 0x41, 0xa4, 0x07, 0xd1, 0x39, 0xa3, 0x07, 0xd1,
+ 0x31, 0xa2, 0x07, 0xd1, 0x29, 0xa1, 0x07, 0xd1, 0x21, 0xa0, 0x07, 0xd1,
+ 0x19, 0x9f, 0x07, 0xd1, 0x11, 0x9e, 0x07, 0xd1, 0x09, 0x9d, 0x07, 0xd1,
+ 0x00, 0x88, 0x07, 0xd0, 0xf9, 0x87, 0x07, 0xd0, 0xf1, 0x86, 0x07, 0xd0,
+ 0xe9, 0x85, 0x07, 0xd0, 0xe1, 0x84, 0x07, 0xd0, 0xd9, 0x83, 0x07, 0xd0,
+ 0xd1, 0xa6, 0x07, 0xd0, 0xc9, 0xa5, 0x07, 0xd0, 0xc1, 0xa4, 0x07, 0xd0,
+ 0xb9, 0xa3, 0x07, 0xd0, 0xb1, 0xa2, 0x07, 0xd0, 0xa9, 0xa1, 0x07, 0xd0,
+ 0xa1, 0xa0, 0x07, 0xd0, 0x99, 0x9f, 0x07, 0xd0, 0x91, 0x9e, 0x07, 0xd0,
+ 0x89, 0x9d, 0x07, 0xd0, 0x80, 0x88, 0x07, 0xd0, 0x79, 0x87, 0x07, 0xd0,
+ 0x71, 0x86, 0x07, 0xd0, 0x69, 0x85, 0x07, 0xd0, 0x61, 0x84, 0x07, 0xd0,
+ 0x59, 0x83, 0x07, 0xd0, 0x51, 0xa6, 0x07, 0xd0, 0x49, 0xa5, 0x07, 0xd0,
+ 0x41, 0xa4, 0x07, 0xd0, 0x39, 0xa3, 0x07, 0xd0, 0x31, 0xa2, 0x07, 0xd0,
+ 0x29, 0xa1, 0x07, 0xd0, 0x21, 0xa0, 0x07, 0xd0, 0x19, 0x9f, 0x07, 0xd0,
+ 0x11, 0x9e, 0x07, 0xd0, 0x09, 0x9d, 0x07, 0xd0, 0x00, 0x88, 0x07, 0xcf,
+ 0xf9, 0x87, 0x07, 0xcf, 0xf1, 0x86, 0x07, 0xcf, 0xe9, 0x85, 0x07, 0xcf,
+ 0xe1, 0x84, 0x07, 0xcf, 0xd9, 0x83, 0x07, 0xcf, 0xd1, 0xa6, 0x07, 0xcf,
+ 0xc9, 0xa5, 0x07, 0xcf, 0xc1, 0xa4, 0x07, 0xcf, 0xb9, 0xa3, 0x07, 0xcf,
+ 0xb1, 0xa2, 0x07, 0xcf, 0xa9, 0xa1, 0x07, 0xcf, 0xa1, 0xa0, 0x07, 0xcf,
+ 0x99, 0x9f, 0x07, 0xcf, 0x91, 0x9e, 0x07, 0xcf, 0x89, 0x9d, 0x07, 0xcf,
+ 0x80, 0x88, 0x07, 0xcf, 0x79, 0x87, 0x07, 0xcf, 0x71, 0x86, 0x07, 0xcf,
+ 0x69, 0x85, 0x07, 0xcf, 0x61, 0x84, 0x07, 0xcf, 0x59, 0x83, 0x07, 0xcf,
+ 0x51, 0xa6, 0x07, 0xcf, 0x49, 0xa5, 0x07, 0xcf, 0x41, 0xa4, 0x07, 0xcf,
+ 0x39, 0xa3, 0x07, 0xcf, 0x31, 0xa2, 0x07, 0xcf, 0x29, 0xa1, 0x07, 0xcf,
+ 0x21, 0xa0, 0x07, 0xcf, 0x19, 0x9f, 0x07, 0xcf, 0x11, 0x9e, 0x07, 0xcf,
+ 0x09, 0x9d, 0x07, 0xcf, 0x00, 0x88, 0x07, 0xce, 0xf9, 0x87, 0x07, 0xce,
+ 0xf1, 0x86, 0x07, 0xce, 0xe9, 0x85, 0x07, 0xce, 0xe1, 0x84, 0x07, 0xce,
+ 0xd9, 0x83, 0x07, 0xce, 0xd1, 0xa6, 0x07, 0xce, 0xc9, 0xa5, 0x07, 0xce,
+ 0xc1, 0xa4, 0x07, 0xce, 0xb9, 0xa3, 0x07, 0xce, 0xb1, 0xa2, 0x07, 0xce,
+ 0xa9, 0xa1, 0x07, 0xce, 0xa1, 0xa0, 0x07, 0xce, 0x99, 0x9f, 0x07, 0xce,
+ 0x91, 0x9e, 0x07, 0xce, 0x89, 0x9d, 0x07, 0xce, 0x80, 0x88, 0x07, 0xce,
+ 0x79, 0x87, 0x07, 0xce, 0x71, 0x86, 0x07, 0xce, 0x69, 0x85, 0x07, 0xce,
+ 0x61, 0x84, 0x07, 0xce, 0x59, 0x83, 0x07, 0xce, 0x51, 0xa6, 0x07, 0xce,
+ 0x49, 0xa5, 0x07, 0xce, 0x41, 0xa4, 0x07, 0xce, 0x39, 0xa3, 0x07, 0xce,
+ 0x31, 0xa2, 0x07, 0xce, 0x29, 0xa1, 0x07, 0xce, 0x21, 0xa0, 0x07, 0xce,
+ 0x19, 0x9f, 0x07, 0xce, 0x11, 0x9e, 0x07, 0xce, 0x09, 0x9d, 0x07, 0xce,
+ 0x00, 0x88, 0x07, 0xcd, 0xf9, 0x87, 0x07, 0xcd, 0xf1, 0x86, 0x07, 0xcd,
+ 0xe9, 0x85, 0x07, 0xcd, 0xe1, 0x84, 0x07, 0xcd, 0xd9, 0x83, 0x07, 0xcd,
+ 0xd1, 0xa6, 0x07, 0xcd, 0xc9, 0xa5, 0x07, 0xcd, 0xc1, 0xa4, 0x07, 0xcd,
+ 0xb9, 0xa3, 0x07, 0xcd, 0xb1, 0xa2, 0x07, 0xcd, 0xa9, 0xa1, 0x07, 0xcd,
+ 0xa1, 0xa0, 0x07, 0xcd, 0x99, 0x9f, 0x07, 0xcd, 0x91, 0x9e, 0x07, 0xcd,
+ 0x89, 0x9d, 0x07, 0xcd, 0x80, 0x88, 0x07, 0xcd, 0x79, 0x87, 0x07, 0xcd,
+ 0x71, 0x86, 0x07, 0xcd, 0x69, 0x85, 0x07, 0xcd, 0x61, 0x84, 0x07, 0xcd,
+ 0x59, 0x83, 0x07, 0xcd, 0x51, 0xa6, 0x07, 0xcd, 0x49, 0xa5, 0x07, 0xcd,
+ 0x41, 0xa4, 0x07, 0xcd, 0x39, 0xa3, 0x07, 0xcd, 0x31, 0xa2, 0x07, 0xcd,
+ 0x29, 0xa1, 0x07, 0xcd, 0x21, 0xa0, 0x07, 0xcd, 0x19, 0x9f, 0x07, 0xcd,
+ 0x11, 0x9e, 0x07, 0xcd, 0x09, 0x9d, 0x07, 0xcd, 0x00, 0x88, 0x07, 0xcc,
+ 0xf9, 0x87, 0x07, 0xcc, 0xf1, 0x86, 0x07, 0xcc, 0xe9, 0x85, 0x07, 0xcc,
+ 0xe1, 0x84, 0x07, 0xcc, 0xd9, 0x83, 0x07, 0xcc, 0xd1, 0xa6, 0x07, 0xcc,
+ 0xc9, 0xa5, 0x07, 0xcc, 0xc1, 0xa4, 0x07, 0xcc, 0xb9, 0xa3, 0x07, 0xcc,
+ 0xb1, 0xa2, 0x07, 0xcc, 0xa9, 0xa1, 0x07, 0xcc, 0xa1, 0xa0, 0x07, 0xcc,
+ 0x99, 0x9f, 0x07, 0xcc, 0x91, 0x9e, 0x07, 0xcc, 0x89, 0x9d, 0x07, 0xcc,
+ 0x80, 0x88, 0x07, 0xcc, 0x79, 0x87, 0x07, 0xcc, 0x71, 0x86, 0x07, 0xcc,
+ 0x69, 0x85, 0x07, 0xcc, 0x61, 0x84, 0x07, 0xcc, 0x59, 0x83, 0x07, 0xcc,
+ 0x51, 0xa6, 0x07, 0xcc, 0x49, 0xa5, 0x07, 0xcc, 0x41, 0xa4, 0x07, 0xcc,
+ 0x39, 0xa3, 0x07, 0xcc, 0x31, 0xa2, 0x07, 0xcc, 0x29, 0xa1, 0x07, 0xcc,
+ 0x21, 0xa0, 0x07, 0xcc, 0x19, 0x9f, 0x07, 0xcc, 0x11, 0x9e, 0x07, 0xcc,
+ 0x09, 0x9d, 0x07, 0xcc, 0x00, 0x88, 0x07, 0xcb, 0xf9, 0x87, 0x07, 0xcb,
+ 0xf1, 0x86, 0x07, 0xcb, 0xe9, 0x85, 0x07, 0xcb, 0xe1, 0x84, 0x07, 0xcb,
+ 0xd9, 0x83, 0x07, 0xcb, 0xd1, 0xa6, 0x07, 0xcb, 0xc9, 0xa5, 0x07, 0xcb,
+ 0xc1, 0xa4, 0x07, 0xcb, 0xb9, 0xa3, 0x07, 0xcb, 0xb1, 0xa2, 0x07, 0xcb,
+ 0xa9, 0xa1, 0x07, 0xcb, 0xa1, 0xa0, 0x07, 0xcb, 0x99, 0x9f, 0x07, 0xcb,
+ 0x91, 0x9e, 0x07, 0xcb, 0x89, 0x9d, 0x07, 0xcb, 0x80, 0x88, 0x07, 0xcb,
+ 0x79, 0x87, 0x07, 0xcb, 0x71, 0x86, 0x07, 0xcb, 0x69, 0x85, 0x07, 0xcb,
+ 0x61, 0x84, 0x07, 0xcb, 0x59, 0x83, 0x07, 0xcb, 0x51, 0xa6, 0x07, 0xcb,
+ 0x49, 0xa5, 0x07, 0xcb, 0x41, 0xa4, 0x07, 0xcb, 0x39, 0xa3, 0x07, 0xcb,
+ 0x31, 0xa2, 0x07, 0xcb, 0x29, 0xa1, 0x07, 0xcb, 0x21, 0xa0, 0x07, 0xcb,
+ 0x19, 0x9f, 0x07, 0xcb, 0x11, 0x9e, 0x07, 0xcb, 0x09, 0x9d, 0x07, 0xcb,
+ 0x00, 0x88, 0x07, 0xca, 0xf9, 0x87, 0x07, 0xca, 0xf1, 0x86, 0x07, 0xca,
+ 0xe9, 0x85, 0x07, 0xca, 0xe1, 0x84, 0x07, 0xca, 0xd9, 0x83, 0x07, 0xca,
+ 0xd1, 0xa6, 0x07, 0xca, 0xc9, 0xa5, 0x07, 0xca, 0xc1, 0xa4, 0x07, 0xca,
+ 0xb9, 0xa3, 0x07, 0xca, 0xb1, 0xa2, 0x07, 0xca, 0xa9, 0xa1, 0x07, 0xca,
+ 0xa1, 0xa0, 0x07, 0xca, 0x99, 0x9f, 0x07, 0xca, 0x91, 0x9e, 0x07, 0xca,
+ 0x89, 0x9d, 0x07, 0xca, 0x80, 0x88, 0x07, 0xca, 0x79, 0x87, 0x07, 0xca,
+ 0x71, 0x86, 0x07, 0xca, 0x69, 0x85, 0x07, 0xca, 0x61, 0x84, 0x07, 0xca,
+ 0x59, 0x83, 0x07, 0xca, 0x51, 0xa6, 0x07, 0xca, 0x49, 0xa5, 0x07, 0xca,
+ 0x41, 0xa4, 0x07, 0xca, 0x39, 0xa3, 0x07, 0xca, 0x31, 0xa2, 0x07, 0xca,
+ 0x29, 0xa1, 0x07, 0xca, 0x21, 0xa0, 0x07, 0xca, 0x19, 0x9f, 0x07, 0xca,
+ 0x11, 0x9e, 0x07, 0xca, 0x09, 0x9d, 0x07, 0xca, 0x00, 0x88, 0x07, 0xc9,
+ 0xf9, 0x87, 0x07, 0xc9, 0xf1, 0x86, 0x07, 0xc9, 0xe9, 0x85, 0x07, 0xc9,
+ 0xe1, 0x84, 0x07, 0xc9, 0xd9, 0x83, 0x07, 0xc9, 0xd1, 0xa6, 0x07, 0xc9,
+ 0xc9, 0xa5, 0x07, 0xc9, 0xc1, 0xa4, 0x07, 0xc9, 0xb9, 0xa3, 0x07, 0xc9,
+ 0xb1, 0xa2, 0x07, 0xc9, 0xa9, 0xa1, 0x07, 0xc9, 0xa1, 0xa0, 0x07, 0xc9,
+ 0x99, 0x9d, 0x07, 0xc9, 0x81, 0x9e, 0x07, 0xc9, 0x89, 0x9f, 0x07, 0xc9,
+ 0x90, 0xa4, 0x07, 0xc9, 0x39, 0xa3, 0x07, 0xc9, 0x31, 0xa2, 0x07, 0xc9,
+ 0x29, 0xa1, 0x07, 0xc9, 0x21, 0xa0, 0x07, 0xc9, 0x19, 0x9f, 0x07, 0xc9,
+ 0x11, 0x9d, 0x07, 0xc9, 0x01, 0x9e, 0x07, 0xc9, 0x09, 0xa5, 0x07, 0xc9,
+ 0x41, 0xa6, 0x07, 0xc9, 0x49, 0x83, 0x07, 0xc9, 0x51, 0x84, 0x07, 0xc9,
+ 0x59, 0x85, 0x07, 0xc9, 0x61, 0x86, 0x07, 0xc9, 0x69, 0x87, 0x07, 0xc9,
+ 0x71, 0x88, 0x07, 0xc9, 0x78, 0x86, 0x07, 0xc8, 0xe9, 0x85, 0x07, 0xc8,
+ 0xe1, 0x84, 0x07, 0xc8, 0xd9, 0x83, 0x07, 0xc8, 0xd1, 0xa6, 0x07, 0xc8,
+ 0xc9, 0xa5, 0x07, 0xc8, 0xc1, 0xa4, 0x07, 0xc8, 0xb9, 0xa3, 0x07, 0xc8,
+ 0xb1, 0xa2, 0x07, 0xc8, 0xa9, 0xa1, 0x07, 0xc8, 0xa1, 0xa0, 0x07, 0xc8,
+ 0x99, 0x9f, 0x07, 0xc8, 0x91, 0x9e, 0x07, 0xc8, 0x89, 0x9d, 0x07, 0xc8,
+ 0x81, 0x87, 0x07, 0xc8, 0xf1, 0x88, 0x07, 0xc8, 0xf8, 0x88, 0x07, 0xc8,
+ 0x79, 0x87, 0x07, 0xc8, 0x71, 0x86, 0x07, 0xc8, 0x69, 0x85, 0x07, 0xc8,
+ 0x61, 0x84, 0x07, 0xc8, 0x59, 0x83, 0x07, 0xc8, 0x51, 0xa6, 0x07, 0xc8,
+ 0x49, 0xa5, 0x07, 0xc8, 0x41, 0xa4, 0x07, 0xc8, 0x39, 0xa3, 0x07, 0xc8,
+ 0x31, 0xa2, 0x07, 0xc8, 0x29, 0xa1, 0x07, 0xc8, 0x21, 0xa0, 0x07, 0xc8,
+ 0x19, 0x9d, 0x07, 0xc8, 0x01, 0x9e, 0x07, 0xc8, 0x09, 0x9f, 0x07, 0xc8,
+ 0x10, 0xc3, 0x05, 0x17, 0x01, 0x74, 0x11, 0x16, 0x42, 0x1d, 0x2a, 0xc3,
+ 0x05, 0x17, 0x01, 0x74, 0xa1, 0xc3, 0x0a, 0x1f, 0x01, 0x74, 0xa8, 0x0a,
+ 0xc2, 0x1d, 0x36, 0x19, 0xc2, 0x1d, 0x42, 0xc6, 0xc5, 0xfb, 0x01, 0x77,
+ 0x48, 0xc2, 0x0a, 0x20, 0x01, 0x74, 0x79, 0xc4, 0x05, 0xde, 0x01, 0x74,
+ 0x80, 0xc3, 0x05, 0x17, 0x01, 0x74, 0xb1, 0xc3, 0x0a, 0x1f, 0x01, 0x74,
+ 0xb8, 0xc3, 0x05, 0x17, 0x01, 0x76, 0xa9, 0xc3, 0x0a, 0x1f, 0x01, 0x76,
+ 0xb0, 0xc3, 0x05, 0x17, 0x01, 0x75, 0x09, 0xc3, 0x0a, 0x1f, 0x01, 0x75,
+ 0x10, 0xc3, 0x05, 0x17, 0x01, 0x76, 0x69, 0xc3, 0x0a, 0x1f, 0x01, 0x76,
+ 0x70, 0xc2, 0x0a, 0x20, 0x01, 0x76, 0xf1, 0xc4, 0x05, 0xde, 0x01, 0x76,
+ 0xf8, 0xc2, 0x0a, 0x20, 0x01, 0x75, 0xf9, 0xc4, 0x05, 0xde, 0x01, 0x76,
+ 0x00, 0x92, 0x01, 0x8e, 0x59, 0x9c, 0x01, 0x8e, 0x72, 0x02, 0x1d, 0x4e,
+ 0x89, 0x01, 0x8e, 0x40, 0x09, 0xc2, 0x1d, 0x52, 0x98, 0x05, 0x5b, 0xa9,
+ 0x97, 0x05, 0x5b, 0xa1, 0x91, 0x05, 0x5b, 0x99, 0x8b, 0x05, 0x5b, 0x91,
+ 0x87, 0x05, 0x5b, 0x89, 0x83, 0x05, 0x5b, 0x81, 0x1b, 0xc2, 0x1d, 0x6a,
+ 0x19, 0xc2, 0x1d, 0x82, 0x16, 0xc2, 0x1d, 0x9a, 0x10, 0xc2, 0x1d, 0xae,
+ 0x0a, 0xc2, 0x1d, 0xc9, 0x0f, 0xc2, 0x1d, 0xe7, 0x0e, 0xc2, 0x1d, 0xff,
+ 0xc2, 0x06, 0x6b, 0x05, 0x5b, 0xb9, 0x42, 0x00, 0x93, 0xc2, 0x1e, 0x17,
+ 0x95, 0x05, 0x5c, 0xeb, 0x02, 0x1e, 0x2f, 0x06, 0x42, 0x1e, 0x47, 0x83,
+ 0x00, 0x9d, 0x01, 0x87, 0x00, 0x9d, 0x09, 0x8b, 0x00, 0x9d, 0x11, 0x91,
+ 0x00, 0x9d, 0x19, 0x97, 0x00, 0x9d, 0x21, 0x98, 0x00, 0x9d, 0x29, 0x09,
+ 0xc2, 0x1e, 0x65, 0xc2, 0x06, 0x6b, 0x00, 0x9d, 0x39, 0x0a, 0xc2, 0x1e,
+ 0x7d, 0x0e, 0xc2, 0x1e, 0x9b, 0x0f, 0xc2, 0x1e, 0xb3, 0x10, 0xc2, 0x1e,
+ 0xcb, 0x42, 0x00, 0x93, 0xc2, 0x1e, 0xe6, 0x95, 0x00, 0x9e, 0x6b, 0x02,
+ 0x1e, 0xfe, 0x06, 0xc2, 0x1f, 0x16, 0x16, 0xc2, 0x1f, 0x34, 0x19, 0xc2,
+ 0x1f, 0x48, 0x1b, 0x42, 0x1f, 0x60, 0x00, 0x42, 0x1f, 0x78, 0xcd, 0x79,
+ 0x1f, 0x0f, 0xa5, 0xc8, 0xc3, 0x3b, 0xb0, 0x08, 0x8a, 0x21, 0xc2, 0x0e,
+ 0x30, 0x08, 0x89, 0x18, 0xc2, 0x0e, 0x30, 0x08, 0x89, 0x09, 0xc3, 0x67,
+ 0x9c, 0x08, 0x89, 0x00, 0xc3, 0x3b, 0xb0, 0x08, 0x88, 0xf1, 0xc2, 0x0e,
+ 0x30, 0x08, 0x88, 0xe8, 0xc3, 0x3b, 0xb0, 0x08, 0x88, 0xe1, 0xc2, 0x0e,
+ 0x30, 0x08, 0x88, 0xd8, 0xc2, 0x0e, 0x30, 0x08, 0x88, 0xd1, 0xc3, 0x3d,
+ 0x50, 0x08, 0x88, 0xa9, 0xc3, 0x67, 0x9c, 0x08, 0x88, 0x81, 0xc3, 0x4a,
+ 0x36, 0x08, 0x88, 0x58, 0xc3, 0x3b, 0xb0, 0x08, 0x88, 0xc9, 0xc2, 0x0e,
+ 0x30, 0x08, 0x88, 0xc1, 0x06, 0x42, 0x1f, 0x84, 0xc3, 0x3b, 0xb0, 0x08,
+ 0x88, 0xb9, 0xc2, 0x0e, 0x30, 0x08, 0x88, 0xb1, 0x16, 0x42, 0x1f, 0x90,
+ 0xc3, 0x3b, 0xb0, 0x08, 0x88, 0x79, 0xc2, 0x0e, 0x30, 0x08, 0x88, 0x70,
+ 0xc3, 0x3b, 0xb0, 0x08, 0x88, 0x69, 0xc2, 0x0e, 0x30, 0x08, 0x88, 0x60,
+ 0xc3, 0x3b, 0xb0, 0x08, 0x88, 0x51, 0xc2, 0x0e, 0x30, 0x08, 0x88, 0x48,
+ 0xc3, 0x3b, 0xb0, 0x08, 0x88, 0x41, 0xc2, 0x0e, 0x30, 0x08, 0x88, 0x38,
+ 0x87, 0x08, 0x89, 0x63, 0x02, 0x1f, 0x9c, 0x83, 0x08, 0x89, 0x3b, 0x02,
+ 0x1f, 0xa0, 0x91, 0x08, 0x89, 0x73, 0x02, 0x1f, 0xac, 0x97, 0x08, 0x89,
+ 0x53, 0x02, 0x1f, 0xb0, 0x8b, 0x08, 0x89, 0x42, 0x02, 0x1f, 0xb4, 0xc4,
+ 0x24, 0x35, 0x08, 0x89, 0xf9, 0xc5, 0x05, 0x1b, 0x08, 0x89, 0xf1, 0x15,
+ 0xc2, 0x1f, 0xb8, 0x08, 0xc2, 0x1f, 0xc4, 0x16, 0xc2, 0x1f, 0xd0, 0xc3,
+ 0x05, 0x17, 0x08, 0x89, 0xb9, 0xc4, 0x16, 0x57, 0x08, 0x89, 0xb0, 0xc7,
+ 0x45, 0xcd, 0x08, 0x88, 0x11, 0xc8, 0x10, 0xab, 0x08, 0x88, 0x09, 0xcb,
+ 0x21, 0x1a, 0x08, 0x88, 0x00, 0x8a, 0x05, 0x52, 0x69, 0x8f, 0x05, 0x52,
+ 0x61, 0xc2, 0x00, 0x56, 0x05, 0x52, 0x18, 0x87, 0x05, 0x51, 0x90, 0x97,
+ 0x05, 0x51, 0x89, 0x8b, 0x05, 0x51, 0x81, 0x83, 0x05, 0x51, 0x48, 0x87,
+ 0x05, 0x51, 0x70, 0x8b, 0x05, 0x51, 0x58, 0x83, 0x05, 0x51, 0x39, 0xc2,
+ 0x0e, 0xe5, 0x05, 0x51, 0x30, 0x09, 0xc2, 0x1f, 0xdc, 0x83, 0x05, 0x50,
+ 0xc1, 0xc2, 0x0e, 0x78, 0x05, 0x50, 0xb9, 0x0a, 0x42, 0x1f, 0xe6, 0xc2,
+ 0x01, 0x0e, 0x05, 0x50, 0x49, 0x83, 0x05, 0x50, 0x40, 0xc2, 0x01, 0x0e,
+ 0x05, 0x50, 0x39, 0x83, 0x05, 0x50, 0x30, 0x8b, 0x05, 0x50, 0x20, 0xc2,
+ 0x03, 0x48, 0x05, 0x52, 0x59, 0x8e, 0x05, 0x52, 0x51, 0x94, 0x05, 0x52,
+ 0x49, 0x9b, 0x05, 0x52, 0x41, 0x92, 0x05, 0x52, 0x39, 0x90, 0x05, 0x52,
+ 0x33, 0x02, 0x1f, 0xf6, 0x96, 0x05, 0x52, 0x29, 0xc2, 0x11, 0x3f, 0x05,
+ 0x52, 0x21, 0x89, 0x05, 0x52, 0x09, 0x8d, 0x05, 0x52, 0x00, 0xc2, 0x02,
+ 0x1d, 0x05, 0x51, 0x09, 0x83, 0x05, 0x50, 0xe9, 0xc2, 0x01, 0x0e, 0x05,
+ 0x50, 0xf0, 0x83, 0x05, 0x51, 0x01, 0xc2, 0x0e, 0x78, 0x05, 0x50, 0xf8,
+ 0xc2, 0x01, 0x0e, 0x05, 0x50, 0xe1, 0xc2, 0x07, 0x44, 0x05, 0x50, 0xd9,
+ 0x83, 0x05, 0x50, 0xd0, 0xc2, 0x0c, 0x25, 0x05, 0x50, 0xc9, 0xc2, 0x01,
+ 0x0e, 0x05, 0x50, 0xb1, 0x83, 0x05, 0x50, 0xa8, 0xc2, 0x01, 0x0e, 0x05,
+ 0x50, 0xa1, 0x83, 0x05, 0x50, 0x98, 0xc2, 0x01, 0x0e, 0x05, 0x50, 0x79,
+ 0x83, 0x05, 0x50, 0x70, 0xc2, 0x01, 0x0e, 0x05, 0x50, 0x69, 0x83, 0x05,
+ 0x50, 0x60, 0xcb, 0x91, 0xff, 0x05, 0x52, 0xf1, 0xc4, 0x1c, 0xb3, 0x05,
+ 0x52, 0xe8, 0xc4, 0x15, 0xa7, 0x05, 0x52, 0xb9, 0xc2, 0x22, 0x45, 0x05,
+ 0x52, 0xb0, 0xc3, 0x0d, 0x8f, 0x05, 0x52, 0xa9, 0xc3, 0x08, 0xde, 0x05,
+ 0x52, 0xa0, 0xc4, 0x05, 0xde, 0x05, 0x52, 0x99, 0xc2, 0x0a, 0x20, 0x05,
+ 0x52, 0x90, 0xc8, 0x0d, 0x7e, 0x08, 0x7e, 0x58, 0x19, 0xc2, 0x1f, 0xfa,
+ 0xc2, 0x01, 0x04, 0x08, 0x7e, 0x49, 0xc4, 0x05, 0xde, 0x08, 0x7e, 0x38,
+ 0xc3, 0x11, 0x40, 0x08, 0x7e, 0x19, 0xca, 0xa6, 0x5a, 0x08, 0x7d, 0x89,
+ 0xc5, 0xdd, 0x33, 0x08, 0x7d, 0xf8, 0xc2, 0x01, 0x47, 0x08, 0x7d, 0xc8,
+ 0xc4, 0x32, 0xac, 0x08, 0x7d, 0x81, 0xc3, 0x18, 0x7a, 0x08, 0x7e, 0x00,
+ 0xc9, 0xb1, 0x05, 0x01, 0x31, 0x49, 0xc8, 0xbd, 0xc3, 0x01, 0x31, 0x40,
+ 0xc5, 0xd4, 0x97, 0x0f, 0xaa, 0x13, 0x02, 0x20, 0x04, 0x4a, 0x9d, 0x5e,
+ 0x42, 0x20, 0x0a, 0xe0, 0x01, 0xe7, 0x0f, 0x8c, 0x50, 0x45, 0x00, 0x3e,
+ 0xc2, 0x20, 0x16, 0xcd, 0x36, 0xde, 0x00, 0x24, 0x49, 0x48, 0x0d, 0x7f,
+ 0xc2, 0x20, 0x1c, 0x12, 0xc2, 0x20, 0x28, 0xce, 0x73, 0xb2, 0x00, 0x24,
+ 0x29, 0x16, 0xc2, 0x20, 0x38, 0x47, 0x01, 0xff, 0xc2, 0x20, 0x4d, 0xc5,
+ 0xe3, 0x64, 0x05, 0x33, 0x79, 0xc6, 0x4c, 0x56, 0x05, 0x33, 0xe0, 0xcc,
+ 0x8a, 0xe4, 0x01, 0x06, 0xc9, 0xcb, 0x09, 0x4c, 0x01, 0x06, 0xa8, 0xc6,
+ 0x00, 0x33, 0x00, 0x19, 0x68, 0xc3, 0x09, 0xe7, 0x00, 0x18, 0x63, 0x02,
+ 0x20, 0xbb, 0xc9, 0x21, 0x1c, 0x00, 0x18, 0x80, 0x44, 0x00, 0xec, 0xc2,
+ 0x20, 0xc1, 0xcf, 0x61, 0xb3, 0x07, 0xf1, 0x32, 0x02, 0x20, 0xd0, 0x08,
+ 0xc2, 0x20, 0xd6, 0x8b, 0x0f, 0x00, 0x5b, 0x02, 0x20, 0xe2, 0x04, 0xc2,
+ 0x20, 0xf4, 0x1b, 0xc2, 0x21, 0x00, 0x15, 0xc2, 0x21, 0x12, 0xc6, 0x82,
+ 0x3d, 0x0f, 0x00, 0xe9, 0x16, 0xc2, 0x21, 0x22, 0xc4, 0xe6, 0xf3, 0x0f,
+ 0x00, 0xc1, 0xc3, 0xca, 0xfd, 0x0f, 0x00, 0xb1, 0xc5, 0xda, 0x6d, 0x0f,
+ 0x00, 0x99, 0xc6, 0xd4, 0xf6, 0x0f, 0x00, 0x91, 0xc3, 0x96, 0x81, 0x0f,
+ 0x00, 0x89, 0xc5, 0xe0, 0x6c, 0x0f, 0x00, 0x81, 0xc7, 0x62, 0x51, 0x0f,
+ 0x00, 0x79, 0xc7, 0xc9, 0x51, 0x0f, 0x00, 0x71, 0xc4, 0xe5, 0x5b, 0x0f,
+ 0x00, 0x69, 0x06, 0xc2, 0x21, 0x2e, 0x1c, 0xc2, 0x21, 0x3a, 0xc7, 0xc9,
+ 0x90, 0x0f, 0x00, 0x19, 0xc4, 0xe4, 0xdb, 0x0f, 0x00, 0x11, 0xc3, 0xeb,
+ 0x64, 0x0f, 0x00, 0x00, 0x44, 0x27, 0x6d, 0xc2, 0x21, 0x46, 0x03, 0x42,
+ 0x21, 0x64, 0xc5, 0x03, 0x50, 0x01, 0x07, 0x81, 0xc5, 0x00, 0x34, 0x00,
+ 0x1a, 0xc8, 0xcc, 0x8a, 0x90, 0x01, 0x07, 0x39, 0x4c, 0x00, 0xb6, 0x42,
+ 0x21, 0x74, 0xc5, 0x00, 0x34, 0x00, 0xef, 0xe9, 0xc5, 0x03, 0x50, 0x00,
+ 0x1a, 0x60, 0xcd, 0x7e, 0x40, 0x00, 0xee, 0x49, 0xc8, 0xb9, 0x7b, 0x00,
+ 0xee, 0x39, 0x42, 0x00, 0xc0, 0x42, 0x21, 0x80, 0x43, 0x00, 0x49, 0xc2,
+ 0x21, 0x8f, 0x43, 0x01, 0x5f, 0x42, 0x21, 0x97, 0x45, 0x04, 0x15, 0xc2,
+ 0x21, 0xa9, 0xd2, 0x49, 0x02, 0x00, 0x19, 0x10, 0x00, 0xc2, 0x21, 0xb5,
+ 0x46, 0x00, 0x4c, 0x42, 0x21, 0xd1, 0x43, 0x01, 0x5f, 0xc2, 0x21, 0xdd,
+ 0xc6, 0x7c, 0x8d, 0x00, 0x19, 0x90, 0x4d, 0x27, 0x71, 0xc2, 0x21, 0xed,
+ 0x55, 0x31, 0xff, 0x42, 0x22, 0x70, 0xde, 0x0d, 0xf5, 0x00, 0xd5, 0xc9,
+ 0x46, 0x1b, 0x0f, 0x42, 0x22, 0x84, 0xcc, 0x8d, 0x54, 0x01, 0x07, 0x49,
+ 0xd5, 0x34, 0xb4, 0x00, 0xef, 0xc8, 0xc8, 0xbd, 0x43, 0x01, 0x07, 0x41,
+ 0xcc, 0x87, 0x84, 0x00, 0xd6, 0x59, 0xc3, 0x05, 0xe3, 0x00, 0xd5, 0xa0,
+ 0x00, 0x42, 0x22, 0x96, 0x44, 0x02, 0x93, 0xc2, 0x22, 0xae, 0x16, 0xc2,
+ 0x22, 0xb8, 0x42, 0x00, 0x6a, 0x42, 0x22, 0xc2, 0xcb, 0x94, 0xd5, 0x00,
+ 0xef, 0xd9, 0x49, 0xae, 0xc5, 0x42, 0x22, 0xce, 0xc6, 0xd1, 0xa2, 0x00,
+ 0xd5, 0x89, 0x95, 0x00, 0x18, 0x42, 0x02, 0x22, 0xe0, 0xd8, 0x25, 0xc4,
+ 0x01, 0x07, 0x21, 0xc6, 0xd9, 0x2e, 0x01, 0x07, 0x19, 0x15, 0xc2, 0x22,
+ 0xe6, 0xc6, 0x03, 0xfa, 0x01, 0x06, 0xeb, 0x02, 0x22, 0xf2, 0xc7, 0x3f,
+ 0x7b, 0x01, 0x06, 0xf8, 0xd5, 0x36, 0x6d, 0x01, 0x06, 0x99, 0x15, 0x42,
+ 0x22, 0xf8, 0xcd, 0x78, 0x83, 0x00, 0xd6, 0x29, 0xc4, 0x00, 0x35, 0x00,
+ 0x19, 0xd8, 0xe0, 0x0a, 0x67, 0x00, 0xd5, 0xd0, 0x42, 0x0f, 0xdb, 0xc2,
+ 0x23, 0x04, 0x45, 0x34, 0xbf, 0x42, 0x23, 0x11, 0xc4, 0x00, 0xcd, 0x00,
+ 0xef, 0xb9, 0xc5, 0x00, 0x47, 0x00, 0xef, 0xb0, 0xd1, 0x2f, 0x5a, 0x01,
+ 0x84, 0xc9, 0xd6, 0x2f, 0x81, 0x01, 0x84, 0xd0, 0x46, 0x9c, 0x23, 0xc2,
+ 0x23, 0x1d, 0xd1, 0x3f, 0x35, 0x00, 0x1a, 0x70, 0x47, 0x1d, 0xf5, 0xc2,
+ 0x23, 0x29, 0xc6, 0x6b, 0x94, 0x00, 0xd5, 0x90, 0xc6, 0x03, 0x4f, 0x00,
+ 0xee, 0x70, 0xc2, 0x00, 0x3a, 0x08, 0x1b, 0xb1, 0xc3, 0x60, 0xff, 0x08,
+ 0x1b, 0xb9, 0xc4, 0xe4, 0x27, 0x08, 0x1b, 0xc1, 0xc5, 0xe2, 0x79, 0x08,
+ 0x1b, 0xc9, 0xc3, 0xec, 0x42, 0x08, 0x1b, 0xd0, 0xc7, 0xcb, 0x03, 0x00,
+ 0xee, 0x61, 0xc7, 0xcb, 0x81, 0x00, 0xee, 0x31, 0xc7, 0xcf, 0x55, 0x00,
+ 0xee, 0x21, 0x90, 0x00, 0x18, 0x22, 0x02, 0x23, 0x35, 0xc5, 0x03, 0x50,
+ 0x00, 0xd6, 0x41, 0xc5, 0x00, 0x34, 0x00, 0x18, 0xf8, 0x4a, 0x5f, 0xc0,
+ 0xc2, 0x23, 0x39, 0xd4, 0x3c, 0x76, 0x00, 0x19, 0x08, 0xc5, 0x03, 0x50,
+ 0x00, 0x18, 0x69, 0xc5, 0x00, 0x34, 0x00, 0x19, 0x48, 0xc4, 0x24, 0x35,
+ 0x0e, 0x9b, 0x89, 0xc5, 0x05, 0x1b, 0x0e, 0x9b, 0x81, 0x15, 0xc2, 0x23,
+ 0x4b, 0x08, 0xc2, 0x23, 0x57, 0x16, 0xc2, 0x23, 0x63, 0xc3, 0x05, 0x17,
+ 0x0e, 0x9b, 0x48, 0xc4, 0x24, 0x35, 0x0e, 0x9b, 0x41, 0xc5, 0x05, 0x1b,
+ 0x0e, 0x9b, 0x39, 0x15, 0xc2, 0x23, 0x6f, 0x08, 0xc2, 0x23, 0x7b, 0x16,
+ 0xc2, 0x23, 0x87, 0xc3, 0x05, 0x17, 0x0e, 0x9b, 0x00, 0xc7, 0x79, 0xb4,
+ 0x01, 0x17, 0xe9, 0x48, 0x00, 0x29, 0xc2, 0x23, 0x93, 0xd6, 0x2c, 0x27,
+ 0x01, 0x17, 0xd0, 0xcf, 0x4c, 0xe0, 0x01, 0x15, 0x9b, 0x02, 0x23, 0x99,
+ 0xc6, 0x04, 0xae, 0x01, 0x10, 0x58, 0x0d, 0xc2, 0x23, 0x9f, 0x0a, 0xc2,
+ 0x23, 0xaf, 0x42, 0x07, 0x69, 0xc2, 0x23, 0xbb, 0x15, 0xc2, 0x23, 0xc7,
+ 0x06, 0xc2, 0x23, 0xdd, 0x03, 0xc2, 0x23, 0xef, 0xc4, 0xe4, 0x2f, 0x01,
+ 0x64, 0x19, 0xc3, 0xd3, 0x51, 0x01, 0x64, 0x49, 0xc4, 0xe4, 0x27, 0x01,
+ 0x64, 0x69, 0x16, 0xc2, 0x23, 0xfb, 0xc5, 0xdf, 0x27, 0x01, 0x64, 0x99,
+ 0xc5, 0xdc, 0xf2, 0x01, 0x64, 0xb9, 0xc2, 0x00, 0xa9, 0x01, 0x64, 0xc9,
+ 0xc2, 0x01, 0xce, 0x01, 0x64, 0xd9, 0x91, 0x01, 0x64, 0xfb, 0x02, 0x24,
+ 0x07, 0x12, 0xc2, 0x24, 0x13, 0xc2, 0x00, 0x64, 0x01, 0x65, 0x19, 0xc2,
+ 0x01, 0xeb, 0x01, 0x65, 0x49, 0x08, 0xc2, 0x24, 0x1d, 0x42, 0x08, 0x2f,
+ 0xc2, 0x24, 0x27, 0xcb, 0x94, 0x93, 0x01, 0x66, 0x89, 0xcd, 0x7f, 0xac,
+ 0x01, 0x67, 0x98, 0x0d, 0xc2, 0x24, 0x33, 0xc5, 0xe0, 0xda, 0x01, 0x67,
+ 0x29, 0xc5, 0xda, 0xdb, 0x01, 0x67, 0x31, 0x15, 0xc2, 0x24, 0x3f, 0xc6,
+ 0xd2, 0xfe, 0x01, 0x67, 0x40, 0x0a, 0xc2, 0x24, 0x4b, 0x42, 0x07, 0x69,
+ 0xc2, 0x24, 0x57, 0x15, 0xc2, 0x24, 0x63, 0x06, 0xc2, 0x24, 0x79, 0x03,
+ 0xc2, 0x24, 0x8b, 0xc4, 0xe4, 0x2f, 0x01, 0x64, 0x11, 0xc3, 0xd3, 0x51,
+ 0x01, 0x64, 0x41, 0xc4, 0xe4, 0x27, 0x01, 0x64, 0x61, 0x16, 0xc2, 0x24,
+ 0x97, 0xc5, 0xdf, 0x27, 0x01, 0x64, 0x91, 0x0d, 0xc2, 0x24, 0xa3, 0xc5,
+ 0xdc, 0xf2, 0x01, 0x64, 0xb1, 0xc2, 0x00, 0xa9, 0x01, 0x64, 0xc1, 0xc2,
+ 0x01, 0xce, 0x01, 0x64, 0xd1, 0x91, 0x01, 0x64, 0xf3, 0x02, 0x24, 0xb3,
+ 0x12, 0xc2, 0x24, 0xbf, 0xc2, 0x00, 0x64, 0x01, 0x65, 0x11, 0xc2, 0x01,
+ 0xeb, 0x01, 0x65, 0x41, 0x08, 0xc2, 0x24, 0xc9, 0x42, 0x08, 0x2f, 0xc2,
+ 0x24, 0xd3, 0xcb, 0x94, 0x93, 0x01, 0x66, 0x81, 0xcd, 0x7f, 0xac, 0x01,
+ 0x67, 0x90, 0xc8, 0xbf, 0xcb, 0x01, 0x67, 0x79, 0x49, 0xad, 0xf6, 0x42,
+ 0x24, 0xdf, 0xc3, 0x05, 0x17, 0x08, 0x17, 0x09, 0x16, 0xc2, 0x24, 0xeb,
+ 0x08, 0xc2, 0x24, 0xf7, 0x15, 0xc2, 0x25, 0x03, 0xc5, 0x05, 0x1b, 0x08,
+ 0x17, 0x41, 0xc4, 0x24, 0x35, 0x08, 0x17, 0x48, 0x16, 0xc2, 0x25, 0x0f,
+ 0x08, 0xc2, 0x25, 0x1d, 0x15, 0xc2, 0x25, 0x25, 0x45, 0x05, 0x1b, 0xc2,
+ 0x25, 0x31, 0x44, 0x24, 0x35, 0xc2, 0x25, 0x3b, 0xcb, 0x0d, 0x7b, 0x08,
+ 0x17, 0x98, 0xcb, 0x8f, 0xb8, 0x0f, 0xa7, 0x59, 0xcc, 0x87, 0xd8, 0x0f,
+ 0xa7, 0x50, 0xc7, 0x60, 0x98, 0x0f, 0x98, 0x11, 0xd0, 0x5b, 0xbf, 0x01,
+ 0x52, 0x62, 0x02, 0x25, 0x47, 0xc4, 0x0d, 0xd3, 0x01, 0x56, 0x7b, 0x02,
+ 0x25, 0x4d, 0xc6, 0x2d, 0xdf, 0x01, 0x56, 0x82, 0x02, 0x25, 0x53, 0xcf,
+ 0x68, 0xca, 0x01, 0x11, 0x91, 0xd2, 0x4b, 0x78, 0x01, 0x4a, 0x08, 0xd3,
+ 0x45, 0x3c, 0x01, 0x0d, 0xb9, 0xe0, 0x05, 0x67, 0x01, 0x5b, 0x70, 0xdb,
+ 0x16, 0xde, 0x0f, 0xae, 0xc1, 0x46, 0x00, 0x4c, 0x42, 0x25, 0x59, 0xe0,
+ 0x03, 0xe7, 0x0f, 0xa8, 0x18, 0x19, 0xc2, 0x25, 0x62, 0x07, 0xc2, 0x25,
+ 0x74, 0x43, 0x05, 0xde, 0x42, 0x25, 0x80, 0x44, 0x66, 0x72, 0xc2, 0x25,
+ 0x8c, 0x43, 0x00, 0x48, 0x42, 0x25, 0x98, 0xc8, 0xbc, 0xd3, 0x0f, 0xab,
+ 0x21, 0xc8, 0xc0, 0xe3, 0x0f, 0xaa, 0xc0, 0x43, 0x0f, 0xdb, 0xc2, 0x25,
+ 0xa4, 0x0b, 0x42, 0x25, 0xb0, 0x42, 0x02, 0x51, 0xc2, 0x25, 0xbc, 0x42,
+ 0x01, 0xc3, 0x42, 0x25, 0xc8, 0xc8, 0xbc, 0xd3, 0x0f, 0xaa, 0xe1, 0xc8,
+ 0xc0, 0xe3, 0x0f, 0xaa, 0x80, 0x44, 0x0c, 0xa4, 0xc2, 0x25, 0xd4, 0xd8,
+ 0x02, 0xcf, 0x0f, 0x8b, 0x71, 0x85, 0x0f, 0x8b, 0x69, 0x86, 0x0f, 0x89,
+ 0x68, 0xdb, 0x19, 0x15, 0x01, 0x3d, 0x91, 0xd8, 0x22, 0xdc, 0x01, 0x1c,
+ 0x49, 0xcb, 0x99, 0x0b, 0x0f, 0x8b, 0x79, 0x46, 0xc5, 0x1c, 0x42, 0x25,
+ 0xde, 0x45, 0x01, 0x32, 0xc2, 0x26, 0x24, 0x9c, 0x0f, 0x89, 0x70, 0x0b,
+ 0xc2, 0x26, 0x30, 0xc3, 0x02, 0xe4, 0x01, 0x14, 0xe9, 0x11, 0x42, 0x26,
+ 0x3c, 0x45, 0x01, 0xf2, 0xc2, 0x26, 0x46, 0xc8, 0x03, 0x47, 0x01, 0x4e,
+ 0x00, 0x16, 0xc2, 0x26, 0x52, 0xc8, 0x4c, 0xbc, 0x01, 0x23, 0x91, 0x07,
+ 0xc2, 0x26, 0x67, 0x15, 0xc2, 0x26, 0x73, 0x08, 0x42, 0x26, 0x7f, 0xc7,
+ 0x00, 0x53, 0x0f, 0xbe, 0xab, 0x02, 0x26, 0x89, 0xc4, 0x07, 0x6e, 0x01,
+ 0x14, 0xb8, 0xd0, 0x5c, 0xaf, 0x01, 0x14, 0xd9, 0x4b, 0x00, 0xb5, 0x42,
+ 0x26, 0x8f, 0xcc, 0x8c, 0x1c, 0x01, 0x14, 0xd1, 0xce, 0x6a, 0x15, 0x01,
+ 0x4d, 0xc0, 0xc4, 0x1c, 0xa2, 0x01, 0x14, 0xb1, 0x49, 0x20, 0x33, 0x42,
+ 0x26, 0x9b, 0xc3, 0x21, 0x5f, 0x01, 0x14, 0xa9, 0xcc, 0x82, 0x8c, 0x01,
+ 0x4d, 0xc9, 0xc7, 0x35, 0xa9, 0x01, 0x4d, 0xb9, 0xca, 0xa2, 0x4a, 0x01,
+ 0x81, 0xb0, 0x49, 0xa0, 0x93, 0xc2, 0x26, 0xa1, 0x5b, 0x15, 0xb5, 0xc2,
+ 0x26, 0xed, 0xd1, 0x56, 0xca, 0x0f, 0xb6, 0x40, 0xc5, 0x19, 0x7b, 0x01,
+ 0x4d, 0xf9, 0xc5, 0xd9, 0xf0, 0x01, 0x5d, 0xf8, 0x50, 0x48, 0x4f, 0xc2,
+ 0x26, 0xf5, 0x48, 0xbe, 0x7b, 0x42, 0x27, 0x01, 0x03, 0xc2, 0x27, 0x39,
+ 0x46, 0x05, 0x07, 0xc2, 0x27, 0x3f, 0x0e, 0xc2, 0x27, 0x4b, 0xd0, 0x58,
+ 0x4f, 0x01, 0x2e, 0x89, 0xcd, 0x81, 0x18, 0x01, 0x2e, 0x69, 0x43, 0x0a,
+ 0x1f, 0xc2, 0x27, 0x57, 0x15, 0xc2, 0x27, 0x5d, 0xce, 0x0f, 0x0e, 0x01,
+ 0x4d, 0xa8, 0xe0, 0x02, 0x47, 0x01, 0x4d, 0xd0, 0xa2, 0x09, 0x1b, 0x5b,
+ 0x02, 0x27, 0x69, 0xd1, 0x54, 0x88, 0x09, 0x2a, 0x11, 0x8f, 0x09, 0x1b,
+ 0x71, 0xc3, 0x30, 0x93, 0x09, 0x1b, 0x68, 0xa4, 0x09, 0x2a, 0x09, 0xc2,
+ 0xea, 0xe1, 0x09, 0x1b, 0x09, 0x89, 0x09, 0x1b, 0x01, 0x00, 0x42, 0x27,
+ 0x6f, 0xc2, 0xe4, 0x2d, 0x09, 0x1b, 0x49, 0x89, 0x09, 0x1b, 0x41, 0x84,
+ 0x09, 0x1b, 0x33, 0x02, 0x27, 0x7b, 0xa0, 0x09, 0x1b, 0x29, 0xc8, 0xbe,
+ 0x3b, 0x09, 0x1b, 0x20, 0x97, 0x09, 0x19, 0xbb, 0x02, 0x27, 0x81, 0x9f,
+ 0x09, 0x19, 0x5b, 0x02, 0x27, 0x90, 0x8b, 0x09, 0x19, 0xab, 0x02, 0x27,
+ 0x94, 0xa1, 0x09, 0x19, 0xa1, 0x00, 0x42, 0x27, 0x98, 0x97, 0x09, 0x1c,
+ 0xcb, 0x02, 0x27, 0xa4, 0x47, 0x1a, 0xdd, 0xc2, 0x27, 0xaa, 0xc3, 0x73,
+ 0x7f, 0x09, 0x18, 0x60, 0x47, 0x07, 0x6c, 0xc2, 0x27, 0xbc, 0xc2, 0x01,
+ 0x0a, 0x09, 0x19, 0x1b, 0x02, 0x27, 0xd5, 0xc3, 0x10, 0xa4, 0x09, 0x19,
+ 0x10, 0x97, 0x09, 0x1a, 0xe1, 0xa0, 0x09, 0x1a, 0xd2, 0x02, 0x27, 0xdb,
+ 0xc3, 0xea, 0xe0, 0x09, 0x1a, 0xc1, 0x9f, 0x09, 0x1a, 0xb9, 0x9a, 0x09,
+ 0x1a, 0xb1, 0x47, 0x07, 0x6c, 0x42, 0x27, 0xe1, 0xc5, 0x3a, 0xa5, 0x09,
+ 0x19, 0x38, 0xc2, 0x07, 0x27, 0x09, 0x18, 0xe1, 0x00, 0x42, 0x27, 0xf4,
+ 0x8f, 0x09, 0x18, 0x43, 0x02, 0x28, 0x0f, 0x94, 0x09, 0x18, 0x4b, 0x02,
+ 0x28, 0x15, 0x8d, 0x09, 0x18, 0x39, 0xc2, 0x07, 0x28, 0x09, 0x18, 0x30,
+ 0xc2, 0x3a, 0x6c, 0x09, 0x17, 0xd3, 0x02, 0x28, 0x1b, 0x94, 0x09, 0x17,
+ 0xd9, 0x89, 0x09, 0x17, 0x9b, 0x02, 0x28, 0x21, 0x84, 0x09, 0x17, 0x83,
+ 0x02, 0x28, 0x27, 0x00, 0x42, 0x28, 0x2b, 0x9f, 0x09, 0x1c, 0xb9, 0x94,
+ 0x09, 0x18, 0x0b, 0x02, 0x28, 0x3d, 0x8e, 0x09, 0x18, 0x01, 0xc5, 0x5b,
+ 0x6a, 0x09, 0x17, 0xf8, 0xc5, 0x3a, 0xa5, 0x09, 0x17, 0xe8, 0x00, 0xc2,
+ 0x28, 0x41, 0xc3, 0xe2, 0x18, 0x09, 0x17, 0x09, 0xc2, 0x9d, 0xea, 0x09,
+ 0x17, 0x01, 0x89, 0x09, 0x16, 0xea, 0x02, 0x28, 0x4d, 0x97, 0x09, 0x16,
+ 0xbb, 0x02, 0x28, 0x54, 0x87, 0x09, 0x15, 0xd3, 0x02, 0x28, 0x67, 0x83,
+ 0x09, 0x15, 0x6b, 0x02, 0x28, 0x7e, 0x0b, 0x42, 0x28, 0x98, 0x89, 0x09,
+ 0x14, 0xab, 0x02, 0x28, 0xb9, 0x94, 0x09, 0x15, 0x61, 0xc4, 0xe9, 0xcf,
+ 0x09, 0x15, 0x59, 0x8e, 0x09, 0x15, 0x4a, 0x02, 0x28, 0xbd, 0x94, 0x09,
+ 0x17, 0x4b, 0x02, 0x28, 0xc3, 0x8f, 0x09, 0x17, 0x3b, 0x02, 0x28, 0xc7,
+ 0xc3, 0x06, 0x67, 0x09, 0x17, 0x31, 0x86, 0x09, 0x17, 0x23, 0x02, 0x28,
+ 0xcd, 0xc8, 0x8f, 0x07, 0x09, 0x17, 0x18, 0x90, 0x09, 0x1c, 0x7b, 0x02,
+ 0x28, 0xd1, 0xc3, 0x7a, 0x99, 0x09, 0x13, 0x01, 0x8f, 0x09, 0x12, 0x7b,
+ 0x02, 0x28, 0xde, 0x9f, 0x09, 0x12, 0x71, 0xc8, 0x98, 0x7c, 0x09, 0x12,
+ 0x68, 0xc2, 0x30, 0xa6, 0x09, 0x13, 0x13, 0x02, 0x28, 0xe4, 0x90, 0x09,
+ 0x13, 0x1a, 0x02, 0x28, 0xe8, 0xa1, 0x09, 0x1c, 0x71, 0x8f, 0x09, 0x12,
+ 0x33, 0x02, 0x28, 0xf5, 0xc2, 0x07, 0x69, 0x09, 0x12, 0x03, 0x02, 0x28,
+ 0xff, 0x9f, 0x09, 0x11, 0xf8, 0x00, 0x42, 0x29, 0x07, 0xc2, 0x01, 0x0d,
+ 0x09, 0x11, 0x93, 0x02, 0x29, 0x13, 0xc4, 0xe4, 0xdf, 0x09, 0x11, 0x89,
+ 0xc4, 0xea, 0x73, 0x09, 0x11, 0x81, 0x89, 0x09, 0x11, 0x73, 0x02, 0x29,
+ 0x1e, 0xc8, 0xbd, 0x03, 0x09, 0x11, 0x68, 0xc9, 0xad, 0xdb, 0x09, 0x28,
+ 0xf9, 0x90, 0x09, 0x11, 0x58, 0x95, 0x09, 0x11, 0x4a, 0x02, 0x29, 0x24,
+ 0xc2, 0x01, 0x0d, 0x09, 0x11, 0x33, 0x02, 0x29, 0x28, 0x94, 0x09, 0x11,
+ 0x29, 0x8a, 0x09, 0x11, 0x21, 0x9f, 0x09, 0x11, 0x19, 0x00, 0x42, 0x29,
+ 0x2c, 0x9f, 0x09, 0x0f, 0xeb, 0x02, 0x29, 0x38, 0x8f, 0x09, 0x10, 0xeb,
+ 0x02, 0x29, 0x3c, 0x8e, 0x09, 0x10, 0xe1, 0x8a, 0x09, 0x10, 0xd9, 0xc3,
+ 0x3e, 0x11, 0x09, 0x10, 0xbb, 0x02, 0x29, 0x45, 0xa0, 0x09, 0x10, 0xb1,
+ 0xca, 0x90, 0xab, 0x09, 0x0f, 0xe0, 0x43, 0xec, 0x75, 0xc2, 0x29, 0x49,
+ 0x43, 0xd8, 0x15, 0xc2, 0x29, 0x55, 0xc5, 0xdd, 0x5b, 0x09, 0x27, 0xf9,
+ 0x43, 0xec, 0x72, 0xc2, 0x29, 0x73, 0x43, 0xec, 0x6f, 0xc2, 0x29, 0x8b,
+ 0x43, 0xec, 0x6c, 0xc2, 0x29, 0x99, 0x43, 0xd8, 0xbc, 0xc2, 0x29, 0xab,
+ 0x43, 0x3a, 0xa0, 0xc2, 0x29, 0xb7, 0x42, 0x07, 0x69, 0xc2, 0x29, 0xe1,
+ 0x8f, 0x09, 0x0f, 0xa3, 0x02, 0x29, 0xef, 0x8e, 0x09, 0x0f, 0x93, 0x02,
+ 0x29, 0xf8, 0xc4, 0xe7, 0x9f, 0x09, 0x0f, 0x88, 0xc2, 0x01, 0x0d, 0x09,
+ 0x0f, 0xd1, 0xc4, 0xe5, 0xf7, 0x09, 0x0f, 0xc9, 0x8e, 0x09, 0x0f, 0xc0,
+ 0x47, 0x07, 0x6c, 0xc2, 0x29, 0xfe, 0xc9, 0xb6, 0xf6, 0x09, 0x1b, 0x79,
+ 0xc4, 0x47, 0x66, 0x09, 0x0c, 0xe3, 0x02, 0x2a, 0x4a, 0x0f, 0xc2, 0x2a,
+ 0x4e, 0x8e, 0x09, 0x0c, 0xbb, 0x02, 0x2a, 0x56, 0x8d, 0x09, 0x0c, 0xab,
+ 0x02, 0x2a, 0x5a, 0x06, 0xc2, 0x2a, 0x60, 0x84, 0x09, 0x0c, 0x79, 0x9f,
+ 0x09, 0x0c, 0x6a, 0x02, 0x2a, 0x73, 0xc4, 0x5a, 0xcf, 0x09, 0x0d, 0xa9,
+ 0x94, 0x09, 0x0d, 0x9b, 0x02, 0x2a, 0x79, 0x90, 0x09, 0x0d, 0x91, 0x8e,
+ 0x09, 0x0d, 0x83, 0x02, 0x2a, 0x7f, 0xa4, 0x09, 0x0d, 0x79, 0xa1, 0x09,
+ 0x0d, 0x6b, 0x02, 0x2a, 0x85, 0xa0, 0x09, 0x0d, 0x61, 0x49, 0x07, 0xf4,
+ 0x42, 0x2a, 0x8b, 0x15, 0xc2, 0x2a, 0x91, 0x90, 0x09, 0x0d, 0x29, 0x86,
+ 0x09, 0x0d, 0x21, 0x47, 0x07, 0x6c, 0x42, 0x2a, 0xa4, 0x47, 0x07, 0x6c,
+ 0x42, 0x2a, 0xb1, 0x00, 0xc2, 0x2a, 0xe2, 0x8e, 0x09, 0x09, 0x60, 0xc2,
+ 0x01, 0x0d, 0x09, 0x1b, 0xe9, 0xc2, 0xd1, 0x05, 0x09, 0x09, 0xf1, 0xc2,
+ 0x5c, 0xff, 0x09, 0x09, 0xc2, 0x02, 0x2a, 0xf1, 0x86, 0x09, 0x08, 0xf3,
+ 0x02, 0x2a, 0xf7, 0x9f, 0x09, 0x08, 0xc3, 0x02, 0x2a, 0xfb, 0x94, 0x09,
+ 0x09, 0x2b, 0x02, 0x2a, 0xff, 0x8f, 0x09, 0x09, 0x1b, 0x02, 0x2b, 0x07,
+ 0x8e, 0x09, 0x09, 0x11, 0xcc, 0x89, 0x88, 0x09, 0x08, 0xb8, 0x15, 0xc2,
+ 0x2b, 0x0d, 0x89, 0x09, 0x1b, 0xe1, 0x14, 0xc2, 0x2b, 0x1a, 0xc3, 0x76,
+ 0xca, 0x09, 0x08, 0x39, 0xa1, 0x09, 0x08, 0x23, 0x02, 0x2b, 0x28, 0x00,
+ 0x42, 0x2b, 0x2c, 0xc5, 0xe3, 0x19, 0x09, 0x07, 0xf3, 0x02, 0x2b, 0x38,
+ 0xc2, 0xed, 0xb9, 0x09, 0x1b, 0xd8, 0xc2, 0x02, 0x1d, 0x09, 0x07, 0x73,
+ 0x02, 0x2b, 0x3e, 0x9f, 0x09, 0x05, 0xbb, 0x02, 0x2b, 0x42, 0xc4, 0x4b,
+ 0x8a, 0x09, 0x07, 0xe9, 0x94, 0x09, 0x07, 0xdb, 0x02, 0x2b, 0x46, 0x90,
+ 0x09, 0x07, 0xb3, 0x02, 0x2b, 0x4a, 0x8f, 0x09, 0x07, 0xa9, 0x8e, 0x09,
+ 0x07, 0x93, 0x02, 0x2b, 0x51, 0x86, 0x09, 0x07, 0x83, 0x02, 0x2b, 0x5d,
+ 0xc5, 0x3a, 0xa5, 0x09, 0x05, 0xb0, 0x00, 0x42, 0x2b, 0x63, 0xce, 0x76,
+ 0x28, 0x09, 0x25, 0x60, 0xc3, 0xa5, 0x49, 0x09, 0x04, 0xfb, 0x02, 0x2b,
+ 0x6f, 0xc2, 0x01, 0x04, 0x09, 0x04, 0xf0, 0x47, 0x07, 0x6c, 0x42, 0x2b,
+ 0x75, 0x00, 0x42, 0x2b, 0x9b, 0xd3, 0x46, 0x6c, 0x09, 0x04, 0x61, 0xc9,
+ 0xb7, 0xce, 0x09, 0x04, 0x58, 0x89, 0x09, 0x04, 0x0b, 0x02, 0x2b, 0xb3,
+ 0x84, 0x09, 0x03, 0xf3, 0x02, 0x2b, 0xbf, 0xc2, 0x3e, 0x08, 0x09, 0x04,
+ 0x49, 0x90, 0x09, 0x04, 0x23, 0x02, 0x2b, 0xc9, 0x8a, 0x09, 0x04, 0x19,
+ 0x00, 0x42, 0x2b, 0xd4, 0x8f, 0x09, 0x03, 0xa3, 0x02, 0x2b, 0xe6, 0xc2,
+ 0x01, 0x0d, 0x09, 0x03, 0xcb, 0x02, 0x2b, 0xf3, 0x90, 0x09, 0x03, 0xbb,
+ 0x02, 0x2b, 0xf9, 0x84, 0x09, 0x03, 0x98, 0x89, 0x09, 0x02, 0xb3, 0x02,
+ 0x2b, 0xff, 0xcb, 0x3a, 0x63, 0x09, 0x24, 0x41, 0x94, 0x09, 0x03, 0x7b,
+ 0x02, 0x2c, 0x07, 0x8f, 0x09, 0x03, 0x70, 0x00, 0xc2, 0x2c, 0x0b, 0x94,
+ 0x09, 0x02, 0x9b, 0x02, 0x2c, 0x17, 0xc3, 0x73, 0x83, 0x09, 0x02, 0x8a,
+ 0x02, 0x2c, 0x1b, 0xc4, 0x3e, 0x06, 0x09, 0x02, 0x1b, 0x02, 0x2c, 0x21,
+ 0x86, 0x09, 0x02, 0x0b, 0x02, 0x2c, 0x27, 0x94, 0x09, 0x02, 0x3b, 0x02,
+ 0x2c, 0x2d, 0x8e, 0x09, 0x02, 0x23, 0x02, 0x2c, 0x33, 0xc2, 0xeb, 0xba,
+ 0x09, 0x02, 0x10, 0x47, 0x07, 0x6c, 0x42, 0x2c, 0x3f, 0xcb, 0x98, 0x7c,
+ 0x09, 0x24, 0x10, 0x00, 0xc2, 0x2c, 0x4f, 0x9f, 0x09, 0x00, 0xb2, 0x02,
+ 0x2c, 0x5b, 0x47, 0x07, 0x6c, 0x42, 0x2c, 0x61, 0x8a, 0x09, 0x01, 0xc3,
+ 0x02, 0x2c, 0x6d, 0xc3, 0xeb, 0x73, 0x09, 0x01, 0xb8, 0xc3, 0x94, 0x64,
+ 0x09, 0x01, 0xb1, 0xc2, 0x00, 0x5d, 0x09, 0x01, 0xa2, 0x02, 0x2c, 0x7b,
+ 0xc3, 0x0a, 0xf1, 0x09, 0x01, 0x91, 0x00, 0x42, 0x2c, 0x81, 0xc3, 0x32,
+ 0xad, 0x09, 0x01, 0x51, 0xc2, 0x00, 0x3a, 0x09, 0x01, 0x49, 0x47, 0x07,
+ 0x6c, 0x42, 0x2c, 0x93, 0x47, 0x07, 0x6c, 0x42, 0x2c, 0xbb, 0xc3, 0x7a,
+ 0x99, 0x09, 0x00, 0x41, 0xc4, 0x76, 0x91, 0x09, 0x00, 0x39, 0xca, 0x3b,
+ 0x50, 0x09, 0x00, 0x31, 0xc3, 0x03, 0xaa, 0x09, 0x00, 0x29, 0xc2, 0x01,
+ 0x0e, 0x09, 0x00, 0x21, 0xc9, 0x5c, 0x76, 0x09, 0x00, 0x19, 0xc3, 0x64,
+ 0x5f, 0x09, 0x00, 0x11, 0x83, 0x09, 0x00, 0x08, 0x14, 0xc2, 0x2c, 0xc7,
+ 0x00, 0x42, 0x2c, 0xd4, 0xc9, 0x0a, 0x5e, 0x09, 0x1c, 0xa0, 0x92, 0x09,
+ 0x13, 0xe9, 0x90, 0x09, 0x13, 0xe1, 0x86, 0x09, 0x13, 0xd8, 0x84, 0x09,
+ 0x14, 0x80, 0xc2, 0x01, 0x5b, 0x09, 0x0a, 0x99, 0x00, 0x42, 0x2c, 0xe0,
+ 0x9f, 0x09, 0x0a, 0x69, 0xd0, 0x5c, 0x6f, 0x09, 0x0a, 0x60, 0x8b, 0x09,
+ 0x0a, 0x32, 0x02, 0x2c, 0xf8, 0x4b, 0x9b, 0x3c, 0x42, 0x2c, 0xfc, 0x97,
+ 0x09, 0x20, 0xa3, 0x02, 0x2d, 0x08, 0xd3, 0x44, 0x91, 0x09, 0x22, 0x33,
+ 0x02, 0x2d, 0x0e, 0xc5, 0xe3, 0xfa, 0x09, 0x21, 0x59, 0xc5, 0xde, 0x87,
+ 0x09, 0x20, 0xe9, 0xc4, 0x07, 0xd9, 0x09, 0x20, 0x71, 0xc3, 0x02, 0xe4,
+ 0x09, 0x20, 0x38, 0xc3, 0x30, 0xe0, 0x09, 0x22, 0xb9, 0xc3, 0x0e, 0x6d,
+ 0x09, 0x22, 0xb0, 0x97, 0x09, 0x20, 0x9b, 0x02, 0x2d, 0x1c, 0xd1, 0x55,
+ 0x98, 0x09, 0x22, 0x23, 0x02, 0x2d, 0x22, 0xc5, 0xe3, 0xfa, 0x09, 0x21,
+ 0x51, 0xc5, 0xde, 0x87, 0x09, 0x20, 0xe1, 0xc4, 0x07, 0xd9, 0x09, 0x20,
+ 0x69, 0xc3, 0x02, 0xe4, 0x09, 0x20, 0x30, 0x08, 0xc2, 0x2d, 0x26, 0xca,
+ 0xa9, 0xfc, 0x09, 0x23, 0x31, 0xc9, 0xab, 0x80, 0x09, 0x23, 0x28, 0x97,
+ 0x09, 0x20, 0x93, 0x02, 0x2d, 0x32, 0x51, 0x55, 0x76, 0xc2, 0x2d, 0x38,
+ 0xc5, 0xe3, 0xfa, 0x09, 0x21, 0x49, 0xc5, 0xde, 0x87, 0x09, 0x20, 0xd9,
+ 0xc4, 0x07, 0xd9, 0x09, 0x20, 0x61, 0xc3, 0x02, 0xe4, 0x09, 0x20, 0x28,
+ 0x97, 0x09, 0x20, 0x8b, 0x02, 0x2d, 0x40, 0xc3, 0x02, 0xe4, 0x09, 0x20,
+ 0x23, 0x02, 0x2d, 0x46, 0xd1, 0x55, 0x54, 0x09, 0x22, 0x01, 0xc5, 0xe3,
+ 0xfa, 0x09, 0x21, 0x41, 0xc5, 0xde, 0x87, 0x09, 0x20, 0xd1, 0xc4, 0x07,
+ 0xd9, 0x09, 0x20, 0x58, 0xc3, 0x0e, 0x6d, 0x09, 0x21, 0x99, 0xc4, 0x07,
+ 0xd9, 0x09, 0x21, 0x90, 0x97, 0x09, 0x20, 0x83, 0x02, 0x2d, 0x4c, 0x15,
+ 0xc2, 0x2d, 0x52, 0x04, 0xc2, 0x2d, 0x5e, 0xc3, 0x02, 0xe4, 0x09, 0x20,
+ 0x1b, 0x02, 0x2d, 0x6d, 0x44, 0x65, 0x84, 0xc2, 0x2d, 0x73, 0xc4, 0x07,
+ 0xd9, 0x09, 0x20, 0x50, 0x97, 0x09, 0x20, 0x7b, 0x02, 0x2d, 0x7b, 0x04,
+ 0xc2, 0x2d, 0x81, 0xc3, 0x02, 0xe4, 0x09, 0x20, 0x13, 0x02, 0x2d, 0x90,
+ 0xd2, 0x4d, 0xdc, 0x09, 0x21, 0xe3, 0x02, 0x2d, 0x96, 0x44, 0xa8, 0x6c,
+ 0xc2, 0x2d, 0x9e, 0x44, 0x65, 0x84, 0xc2, 0x2d, 0xa6, 0xc4, 0x07, 0xd9,
+ 0x09, 0x20, 0x48, 0xc8, 0xbf, 0x63, 0x09, 0x23, 0x21, 0x48, 0x15, 0x72,
+ 0xc2, 0x2d, 0xae, 0x07, 0xc2, 0x2d, 0xba, 0x46, 0x01, 0x47, 0xc2, 0x2d,
+ 0xc6, 0x04, 0xc2, 0x2d, 0xd2, 0xc5, 0xe3, 0xb4, 0x09, 0x21, 0x61, 0x44,
+ 0x65, 0x84, 0x42, 0x2d, 0xde, 0xc7, 0x08, 0x19, 0x09, 0x23, 0x11, 0xc5,
+ 0xd7, 0xd2, 0x09, 0x23, 0x08, 0x47, 0x91, 0x0d, 0xc2, 0x2d, 0xe6, 0xc5,
+ 0xdd, 0x4c, 0x09, 0x22, 0xc9, 0x04, 0xc2, 0x2d, 0xf2, 0xc3, 0x02, 0xe4,
+ 0x09, 0x20, 0x03, 0x02, 0x2d, 0xfe, 0x44, 0xa8, 0x6c, 0xc2, 0x2e, 0x04,
+ 0x44, 0x65, 0x84, 0x42, 0x2e, 0x0c, 0x04, 0xc2, 0x2e, 0x14, 0xc3, 0x02,
+ 0xe4, 0x09, 0x20, 0x0b, 0x02, 0x2e, 0x23, 0x50, 0x5a, 0xaf, 0xc2, 0x2e,
+ 0x29, 0x44, 0xa8, 0x6c, 0xc2, 0x2e, 0x35, 0x44, 0x65, 0x84, 0xc2, 0x2e,
+ 0x43, 0xc4, 0x07, 0xd9, 0x09, 0x20, 0x40, 0xc2, 0x00, 0x11, 0x01, 0x3d,
+ 0x81, 0x46, 0x1a, 0x91, 0x42, 0x2e, 0x4b, 0x1c, 0xc2, 0x2e, 0x57, 0x87,
+ 0x0f, 0x02, 0xa8, 0xd7, 0x04, 0x30, 0x0f, 0x03, 0x41, 0x87, 0x0f, 0x02,
+ 0x90, 0xcc, 0x8b, 0x38, 0x0f, 0x03, 0x38, 0xc7, 0x8a, 0x59, 0x0f, 0x03,
+ 0x28, 0x88, 0x0f, 0x03, 0x01, 0x95, 0x0f, 0x02, 0xf1, 0x8e, 0x0f, 0x02,
+ 0xd8, 0x87, 0x0f, 0x02, 0xb0, 0x87, 0x0f, 0x02, 0xc1, 0xc2, 0x00, 0x5b,
+ 0x0f, 0x02, 0xb8, 0x97, 0x00, 0x22, 0x1b, 0x02, 0x2e, 0x61, 0x16, 0xc2,
+ 0x2e, 0x74, 0x19, 0xc2, 0x2e, 0x97, 0x10, 0xc2, 0x2e, 0xa1, 0x0e, 0xc2,
+ 0x2e, 0xb3, 0x14, 0xc2, 0x2e, 0xcb, 0x87, 0x00, 0x22, 0x6b, 0x02, 0x2e,
+ 0xdd, 0x06, 0xc2, 0x2f, 0x0a, 0x15, 0xc2, 0x2f, 0x2d, 0x12, 0xc2, 0x2f,
+ 0x4f, 0x83, 0x00, 0x21, 0x83, 0x02, 0x2f, 0x62, 0xc2, 0x0e, 0x13, 0x00,
+ 0x28, 0xd9, 0x1b, 0xc2, 0x2f, 0x74, 0x0d, 0xc2, 0x2f, 0x90, 0x0a, 0xc2,
+ 0x2f, 0xad, 0x09, 0xc2, 0x2f, 0xba, 0x04, 0xc2, 0x2f, 0xc9, 0x91, 0x00,
+ 0x21, 0xf3, 0x02, 0x2f, 0xe7, 0x8b, 0x00, 0x21, 0xc3, 0x02, 0x2f, 0xfa,
+ 0x1c, 0xc2, 0x30, 0x17, 0x05, 0xc2, 0x30, 0x22, 0x44, 0x13, 0x1a, 0xc2,
+ 0x30, 0x3d, 0xc2, 0x00, 0x29, 0x00, 0x21, 0x91, 0xc2, 0x1c, 0x3e, 0x00,
+ 0x22, 0xc1, 0xc4, 0xe6, 0xa3, 0x00, 0x23, 0x98, 0xc4, 0xea, 0xa7, 0x00,
+ 0x26, 0xa9, 0xc6, 0xd3, 0x16, 0x00, 0x25, 0xa9, 0xc6, 0xd0, 0xdc, 0x00,
+ 0x25, 0x28, 0x8e, 0x00, 0x20, 0xdb, 0x02, 0x30, 0x49, 0x90, 0x00, 0x20,
+ 0xeb, 0x02, 0x30, 0x4f, 0xcf, 0x69, 0x9c, 0x00, 0x27, 0x61, 0x8f, 0x00,
+ 0x20, 0xe3, 0x02, 0x30, 0x55, 0x95, 0x00, 0x21, 0x0b, 0x02, 0x30, 0x5b,
+ 0x94, 0x00, 0x21, 0x03, 0x02, 0x30, 0x61, 0x88, 0x00, 0x21, 0x20, 0xc3,
+ 0x27, 0xc3, 0x00, 0x29, 0x61, 0x1c, 0xc2, 0x30, 0x67, 0x46, 0x02, 0x92,
+ 0xc2, 0x30, 0x7e, 0xc2, 0x1c, 0x3e, 0x00, 0x20, 0x13, 0x02, 0x30, 0x88,
+ 0x87, 0x00, 0x20, 0xa1, 0xc2, 0x00, 0xc9, 0x05, 0x34, 0x00, 0x0a, 0xc2,
+ 0x30, 0x8e, 0xc4, 0x6e, 0x0c, 0x00, 0x26, 0xc3, 0x02, 0x30, 0xad, 0xc9,
+ 0xb6, 0x03, 0x00, 0x25, 0x73, 0x02, 0x30, 0xb3, 0xcc, 0x84, 0x48, 0x00,
+ 0x24, 0x61, 0x44, 0x67, 0x3a, 0x42, 0x30, 0xb9, 0x87, 0x00, 0x20, 0xfb,
+ 0x02, 0x30, 0xc9, 0xc2, 0x02, 0x29, 0x00, 0x23, 0x80, 0xc7, 0xc8, 0x8d,
+ 0x00, 0x28, 0xf1, 0x49, 0xaf, 0xf7, 0xc2, 0x30, 0xcf, 0x46, 0x01, 0xab,
+ 0x42, 0x30, 0xe4, 0x83, 0x00, 0x21, 0x7b, 0x02, 0x30, 0xf0, 0xc3, 0x1a,
+ 0xd0, 0x00, 0x21, 0x5b, 0x02, 0x30, 0xf8, 0x8b, 0x00, 0x20, 0x39, 0x97,
+ 0x00, 0x21, 0x71, 0x90, 0x05, 0x32, 0xf0, 0xc2, 0x01, 0x0e, 0x00, 0x28,
+ 0xb1, 0x48, 0x11, 0xae, 0xc2, 0x30, 0xfe, 0xca, 0xa2, 0x7c, 0x00, 0x23,
+ 0xd0, 0xc2, 0x01, 0x0e, 0x00, 0x28, 0xa1, 0xc2, 0x1c, 0x3e, 0x00, 0x20,
+ 0x49, 0xc9, 0x56, 0x39, 0x00, 0x23, 0x30, 0x11, 0xc2, 0x31, 0x16, 0xcd,
+ 0x76, 0xd6, 0x00, 0x26, 0x59, 0x83, 0x00, 0x20, 0xd3, 0x02, 0x31, 0x22,
+ 0xc2, 0x1c, 0x3e, 0x00, 0x20, 0x61, 0xc2, 0x02, 0x29, 0x00, 0x23, 0x70,
+ 0x83, 0x00, 0x21, 0x2b, 0x02, 0x31, 0x28, 0xc2, 0x00, 0xc9, 0x05, 0x34,
+ 0xa0, 0xc2, 0x00, 0x5b, 0x00, 0x20, 0x9b, 0x02, 0x31, 0x34, 0xc2, 0x1c,
+ 0x3e, 0x00, 0x20, 0x18, 0xc2, 0x00, 0x5b, 0x00, 0x21, 0x3b, 0x02, 0x31,
+ 0x3a, 0xc8, 0xc1, 0x03, 0x05, 0x34, 0xd1, 0xd0, 0x56, 0x32, 0x05, 0x32,
+ 0xc1, 0xc3, 0x27, 0xc3, 0x05, 0x34, 0x30, 0x46, 0x01, 0xab, 0xc2, 0x31,
+ 0x40, 0x8d, 0x00, 0x23, 0xc2, 0x02, 0x31, 0x4a, 0x03, 0xc2, 0x31, 0x50,
+ 0xd7, 0x04, 0x30, 0x00, 0x20, 0x31, 0x87, 0x00, 0x20, 0x89, 0xca, 0xa3,
+ 0x80, 0x05, 0x32, 0x61, 0xca, 0xa6, 0xfa, 0x05, 0x32, 0xd1, 0x0b, 0x42,
+ 0x31, 0x5f, 0xcf, 0x69, 0x9c, 0x00, 0x27, 0x31, 0xc4, 0x73, 0xe1, 0x00,
+ 0x23, 0x03, 0x02, 0x31, 0x6b, 0x96, 0x00, 0x23, 0xf0, 0x46, 0x01, 0xab,
+ 0xc2, 0x31, 0x71, 0x87, 0x00, 0x20, 0xab, 0x02, 0x31, 0x83, 0xc6, 0xd3,
+ 0x28, 0x00, 0x23, 0xa3, 0x02, 0x31, 0x89, 0x91, 0x00, 0x20, 0x0a, 0x02,
+ 0x31, 0x8f, 0x87, 0x00, 0x20, 0xbb, 0x02, 0x31, 0x93, 0x0a, 0x42, 0x31,
+ 0x9f, 0x87, 0x00, 0x21, 0x13, 0x02, 0x31, 0xac, 0x15, 0xc2, 0x31, 0xb2,
+ 0xc2, 0x00, 0xc9, 0x05, 0x34, 0x61, 0xc3, 0x27, 0xc3, 0x05, 0x34, 0x90,
+ 0xc2, 0x1c, 0x3e, 0x00, 0x20, 0x51, 0xca, 0xa9, 0x98, 0x05, 0x32, 0x70,
+ 0xc8, 0x85, 0x54, 0x05, 0x32, 0x51, 0xc7, 0x80, 0xea, 0x05, 0x33, 0x40,
+ 0xa1, 0x09, 0x7f, 0x81, 0x9f, 0x09, 0x7f, 0x79, 0x9d, 0x09, 0x7f, 0x70,
+ 0xa6, 0x09, 0x7f, 0x69, 0xa5, 0x09, 0x7f, 0x61, 0xa4, 0x09, 0x7f, 0x59,
+ 0xa2, 0x09, 0x7f, 0x51, 0xa1, 0x09, 0x7f, 0x49, 0xa0, 0x09, 0x7f, 0x41,
+ 0x9f, 0x09, 0x7f, 0x39, 0x9e, 0x09, 0x7f, 0x31, 0x9d, 0x09, 0x7f, 0x28,
+ 0xa6, 0x09, 0x7f, 0x21, 0xa5, 0x09, 0x7f, 0x19, 0xa4, 0x09, 0x7f, 0x11,
+ 0xa3, 0x09, 0x7f, 0x09, 0xa2, 0x09, 0x7f, 0x01, 0xa1, 0x09, 0x7e, 0xf9,
+ 0x9f, 0x09, 0x7e, 0xf1, 0x9e, 0x09, 0x7e, 0xe9, 0x9d, 0x09, 0x7e, 0xe0,
+ 0xa6, 0x09, 0x7e, 0xd9, 0xa5, 0x09, 0x7e, 0xd1, 0xa4, 0x09, 0x7e, 0xc9,
+ 0xa3, 0x09, 0x7e, 0xc1, 0xa2, 0x09, 0x7e, 0xb9, 0xa1, 0x09, 0x7e, 0xb1,
+ 0xa0, 0x09, 0x7e, 0xa9, 0x9f, 0x09, 0x7e, 0xa1, 0x9e, 0x09, 0x7e, 0x99,
+ 0x9d, 0x09, 0x7e, 0x90, 0xa6, 0x09, 0x7e, 0x89, 0xa5, 0x09, 0x7e, 0x81,
+ 0xa3, 0x09, 0x7e, 0x79, 0xa2, 0x09, 0x7e, 0x6b, 0x02, 0x31, 0xc8, 0xa1,
+ 0x09, 0x7e, 0x61, 0xa0, 0x09, 0x7e, 0x59, 0x9f, 0x09, 0x7e, 0x51, 0x9e,
+ 0x09, 0x7e, 0x49, 0x9d, 0x09, 0x7e, 0x40, 0xa6, 0x09, 0x7e, 0x39, 0xa5,
+ 0x09, 0x7e, 0x31, 0xa4, 0x09, 0x7e, 0x29, 0xa3, 0x09, 0x7e, 0x21, 0xa1,
+ 0x09, 0x7e, 0x19, 0xa0, 0x09, 0x7e, 0x11, 0x9f, 0x09, 0x7e, 0x09, 0x9e,
+ 0x09, 0x7e, 0x01, 0x9d, 0x09, 0x7d, 0xf8, 0xa6, 0x09, 0x7d, 0xf1, 0xa5,
+ 0x09, 0x7d, 0xe9, 0xa3, 0x09, 0x7d, 0xe1, 0xa2, 0x09, 0x7d, 0xd9, 0xa1,
+ 0x09, 0x7d, 0xd1, 0xa0, 0x09, 0x7d, 0xc9, 0x9f, 0x09, 0x7d, 0xc1, 0x9e,
+ 0x09, 0x7d, 0xb9, 0x9d, 0x09, 0x7d, 0xb0, 0xa6, 0x09, 0x7d, 0xa9, 0xa4,
+ 0x09, 0x7d, 0xa1, 0xa3, 0x09, 0x7d, 0x99, 0xa1, 0x09, 0x7d, 0x91, 0x9e,
+ 0x09, 0x7d, 0x89, 0x9d, 0x09, 0x7d, 0x80, 0xa6, 0x09, 0x7d, 0x79, 0xa5,
+ 0x09, 0x7d, 0x71, 0xa4, 0x09, 0x7d, 0x69, 0xa3, 0x09, 0x7d, 0x61, 0xa2,
+ 0x09, 0x7d, 0x59, 0xa1, 0x09, 0x7d, 0x51, 0xa0, 0x09, 0x7d, 0x49, 0x9d,
+ 0x09, 0x7d, 0x40, 0xa6, 0x09, 0x7d, 0x39, 0xa5, 0x09, 0x7d, 0x31, 0xa4,
+ 0x09, 0x7d, 0x29, 0xa3, 0x09, 0x7d, 0x21, 0xa2, 0x09, 0x7d, 0x19, 0xa1,
+ 0x09, 0x7d, 0x11, 0xa0, 0x09, 0x7d, 0x09, 0x9e, 0x09, 0x7d, 0x00, 0xa6,
+ 0x09, 0x7c, 0xf9, 0xa4, 0x09, 0x7c, 0xf1, 0xa2, 0x09, 0x7c, 0xe9, 0xa0,
+ 0x09, 0x7c, 0xe1, 0x9f, 0x09, 0x7c, 0xd3, 0x02, 0x31, 0xcc, 0x9e, 0x09,
+ 0x7c, 0xc9, 0x9d, 0x09, 0x7c, 0xc0, 0xa6, 0x09, 0x7c, 0xb9, 0xa5, 0x09,
+ 0x7c, 0xb1, 0xa4, 0x09, 0x7c, 0xa9, 0xa3, 0x09, 0x7c, 0xa1, 0xa2, 0x09,
+ 0x7c, 0x99, 0xa1, 0x09, 0x7c, 0x91, 0x9f, 0x09, 0x7c, 0x89, 0x9e, 0x09,
+ 0x7c, 0x80, 0xc4, 0x05, 0xde, 0x00, 0x04, 0x79, 0xc2, 0x0a, 0x20, 0x00,
+ 0x04, 0x70, 0xe0, 0x06, 0xc7, 0x01, 0x01, 0xd0, 0x07, 0xc2, 0x31, 0xd0,
+ 0xd3, 0x46, 0x92, 0x01, 0x00, 0xd0, 0x44, 0x05, 0x17, 0xc2, 0x31, 0xd6,
+ 0xc6, 0x29, 0x29, 0x08, 0x8f, 0x91, 0xc6, 0xd1, 0xe4, 0x08, 0x8f, 0x89,
+ 0x15, 0xc2, 0x31, 0xe2, 0x08, 0xc2, 0x31, 0xee, 0x16, 0x42, 0x31, 0xfa,
+ 0xc4, 0x24, 0x35, 0x08, 0x8f, 0x49, 0xc5, 0x05, 0x1b, 0x08, 0x8f, 0x41,
+ 0x15, 0xc2, 0x32, 0x0c, 0x08, 0xc2, 0x32, 0x18, 0x16, 0xc2, 0x32, 0x24,
+ 0xc3, 0x05, 0x17, 0x08, 0x8f, 0x08, 0xc9, 0xad, 0x27, 0x00, 0x6c, 0x11,
+ 0xc8, 0xbb, 0x8b, 0x00, 0x6e, 0x50, 0x03, 0xc2, 0x32, 0x30, 0x0b, 0xc2,
+ 0x32, 0x58, 0x17, 0xc2, 0x32, 0x70, 0x07, 0xc2, 0x32, 0x7c, 0x11, 0xc2,
+ 0x32, 0x88, 0x0f, 0xc2, 0x32, 0x94, 0xd2, 0x47, 0xe2, 0x00, 0x6c, 0xf1,
+ 0x48, 0xba, 0xcb, 0xc2, 0x32, 0x9e, 0x48, 0xc2, 0x93, 0xc2, 0x32, 0xae,
+ 0x48, 0xc2, 0x8b, 0xc2, 0x32, 0xba, 0xc7, 0xc6, 0xaa, 0x00, 0x6d, 0xd1,
+ 0xc7, 0xc9, 0x3c, 0x00, 0x6d, 0xd9, 0xc7, 0xcc, 0xf4, 0x00, 0x6e, 0x01,
+ 0xc7, 0xc5, 0xe6, 0x00, 0x6e, 0x21, 0xc7, 0xcf, 0x16, 0x00, 0x6e, 0x30,
+ 0xc4, 0x16, 0x57, 0x00, 0x6f, 0x31, 0xc3, 0x05, 0x17, 0x00, 0x6f, 0x39,
+ 0x16, 0xc2, 0x32, 0xcc, 0x08, 0xc2, 0x32, 0xd8, 0x15, 0xc2, 0x32, 0xe4,
+ 0xc5, 0x05, 0x1b, 0x00, 0x6f, 0x71, 0xc4, 0x24, 0x35, 0x00, 0x6f, 0x78,
+ 0x45, 0xb0, 0xfc, 0xc2, 0x32, 0xf0, 0x44, 0xcd, 0xdc, 0x42, 0x33, 0x02,
+ 0xca, 0xa6, 0x28, 0x00, 0x6e, 0x89, 0xc8, 0xb9, 0x2b, 0x00, 0x6e, 0x99,
+ 0xc9, 0xad, 0x5d, 0x00, 0x6e, 0xb1, 0xc7, 0xcd, 0xdb, 0x00, 0x6e, 0xd1,
+ 0x42, 0x07, 0x69, 0x42, 0x33, 0x11, 0xca, 0xaa, 0x92, 0x00, 0x6e, 0xc1,
+ 0xc9, 0x8f, 0xd0, 0x00, 0x6e, 0xf8, 0x49, 0xb1, 0x71, 0xc2, 0x33, 0x1d,
+ 0x4d, 0x4e, 0x90, 0xc2, 0x33, 0x53, 0x4c, 0x56, 0x20, 0x42, 0x33, 0x6e,
+ 0xc7, 0x03, 0x28, 0x0e, 0xc8, 0x99, 0xc8, 0x3a, 0x32, 0x0e, 0xc8, 0x91,
+ 0xc6, 0x23, 0x24, 0x0e, 0xc8, 0x88, 0x4a, 0xa5, 0x9c, 0xc2, 0x33, 0x86,
+ 0xc4, 0x02, 0xfa, 0x0e, 0xd3, 0xf0, 0xda, 0x1c, 0x0a, 0x0e, 0xd3, 0x81,
+ 0x44, 0x04, 0x50, 0x42, 0x33, 0xa4, 0xc8, 0xc3, 0xb3, 0x0e, 0xd0, 0x99,
+ 0xc7, 0xc9, 0x5f, 0x0e, 0xd0, 0x91, 0xc7, 0x82, 0x79, 0x0e, 0xd0, 0x88,
+ 0xca, 0x9f, 0x84, 0x0e, 0xd0, 0x43, 0x02, 0x33, 0xae, 0xcf, 0x62, 0x58,
+ 0x0e, 0xd0, 0x38, 0xc3, 0x0c, 0x34, 0x0e, 0xd4, 0x51, 0xc3, 0x00, 0xec,
+ 0x0e, 0xd4, 0x38, 0xc6, 0xd8, 0xe0, 0x0e, 0xd1, 0x61, 0xc7, 0x82, 0x79,
+ 0x0e, 0xd1, 0x59, 0xc6, 0xd3, 0x2e, 0x0e, 0xd1, 0x50, 0xd2, 0x48, 0x18,
+ 0x0e, 0xd3, 0x89, 0x44, 0x03, 0x1d, 0x42, 0x33, 0xb4, 0xd1, 0x53, 0x78,
+ 0x0e, 0xc9, 0x01, 0x15, 0xc2, 0x33, 0xc0, 0x46, 0x15, 0x2e, 0x42, 0x33,
+ 0xcc, 0xc7, 0x03, 0x28, 0x0e, 0xc8, 0xa9, 0xc7, 0x01, 0xb1, 0x0e, 0xc8,
+ 0xa0, 0xc7, 0x03, 0x28, 0x0e, 0xc8, 0x69, 0xc8, 0x3a, 0x32, 0x0e, 0xc8,
+ 0x61, 0xc6, 0x23, 0x24, 0x0e, 0xc8, 0x58, 0x42, 0x00, 0xf8, 0xc2, 0x33,
+ 0xd8, 0xd2, 0x4e, 0x90, 0x0e, 0xd4, 0x01, 0xd1, 0x56, 0x20, 0x0e, 0xd3,
+ 0xf8, 0x43, 0x0e, 0xd0, 0xc2, 0x33, 0xf6, 0x12, 0x42, 0x34, 0x02, 0x42,
+ 0x00, 0x14, 0xc2, 0x34, 0x0c, 0x45, 0xe2, 0x42, 0xc2, 0x34, 0x18, 0x44,
+ 0xe0, 0xef, 0x42, 0x34, 0x3c, 0xc3, 0x1e, 0x54, 0x0e, 0xd3, 0x0b, 0x02,
+ 0x34, 0x4e, 0x4b, 0x95, 0x64, 0x42, 0x34, 0x52, 0x4d, 0x7b, 0x00, 0xc2,
+ 0x34, 0x64, 0x4c, 0x8e, 0xf8, 0x42, 0x34, 0x70, 0x43, 0x99, 0x2d, 0xc2,
+ 0x34, 0x82, 0x47, 0xc4, 0xab, 0xc2, 0x34, 0x8e, 0x00, 0xc2, 0x34, 0xa0,
+ 0x42, 0x00, 0x27, 0xc2, 0x34, 0xac, 0x4f, 0x67, 0x62, 0x42, 0x34, 0xca,
+ 0xc2, 0x00, 0xac, 0x0e, 0xd3, 0x59, 0x10, 0x42, 0x34, 0xdc, 0x00, 0x42,
+ 0x35, 0x00, 0x19, 0xc2, 0x35, 0x0c, 0xc7, 0xc9, 0x5f, 0x0e, 0xd1, 0x91,
+ 0xc7, 0x82, 0x79, 0x0e, 0xd1, 0x88, 0x4d, 0x7e, 0x19, 0xc2, 0x35, 0x18,
+ 0x4c, 0x8d, 0x48, 0xc2, 0x35, 0x5e, 0x4b, 0x96, 0xae, 0xc2, 0x35, 0xa4,
+ 0x48, 0xba, 0x9b, 0x42, 0x35, 0xb6, 0x49, 0xaf, 0x28, 0xc2, 0x35, 0xc8,
+ 0x05, 0xc2, 0x35, 0xd4, 0xc5, 0xdf, 0x18, 0x0e, 0xd2, 0x83, 0x02, 0x35,
+ 0xe0, 0xc4, 0x67, 0x8b, 0x0e, 0xd2, 0x6b, 0x02, 0x35, 0xe4, 0x45, 0x03,
+ 0xf5, 0xc2, 0x35, 0xe8, 0xc5, 0x7e, 0x0c, 0x0e, 0xd2, 0x0b, 0x02, 0x36,
+ 0x0c, 0xc5, 0xb2, 0x44, 0x0e, 0xd1, 0xf2, 0x02, 0x36, 0x10, 0xc6, 0xd4,
+ 0x8a, 0x0e, 0xd1, 0xd1, 0xc6, 0xd4, 0x0c, 0x0e, 0xd1, 0xc8, 0xc7, 0x03,
+ 0x28, 0x0e, 0xc8, 0x81, 0xc8, 0x3a, 0x32, 0x0e, 0xc8, 0x79, 0xc6, 0x23,
+ 0x24, 0x0e, 0xc8, 0x70, 0xd0, 0x60, 0xbf, 0x0e, 0xd1, 0xc1, 0xc6, 0x01,
+ 0x5e, 0x0e, 0xd1, 0xb0, 0xd0, 0x60, 0xbf, 0x0e, 0xd1, 0xb9, 0xc7, 0x5c,
+ 0x24, 0x0e, 0xd1, 0xa8, 0x48, 0xc2, 0xab, 0xc2, 0x36, 0x14, 0xca, 0xa9,
+ 0x2a, 0x0e, 0xd0, 0x79, 0xcc, 0x82, 0x74, 0x0e, 0xd0, 0x70, 0xc7, 0xc8,
+ 0x71, 0x0e, 0xcf, 0xf1, 0xd0, 0x59, 0xef, 0x0e, 0xcf, 0xe9, 0x15, 0xc2,
+ 0x36, 0x20, 0xc7, 0x38, 0xc3, 0x0e, 0xcf, 0xd1, 0xc5, 0xdf, 0x18, 0x0e,
+ 0xcf, 0xc9, 0xc4, 0xe6, 0xb7, 0x0e, 0xcf, 0xb9, 0x4a, 0x2f, 0xf6, 0x42,
+ 0x36, 0x2f, 0xca, 0xa6, 0x14, 0x08, 0xae, 0xe3, 0x02, 0x36, 0x3b, 0x97,
0x08, 0xad, 0xd9, 0x8b, 0x08, 0xad, 0xc9, 0x83, 0x08, 0xad, 0x78, 0x94,
0x08, 0xad, 0xa8, 0x97, 0x08, 0xad, 0x98, 0x8b, 0x08, 0xad, 0x88, 0xca,
- 0xa3, 0xce, 0x08, 0xae, 0xd9, 0x97, 0x08, 0xac, 0x69, 0x8b, 0x08, 0xac,
- 0x59, 0x83, 0x08, 0xac, 0x08, 0xd5, 0x34, 0xcc, 0x08, 0xae, 0xcb, 0x02,
- 0x36, 0xb4, 0x0a, 0xc2, 0x36, 0xb8, 0x83, 0x08, 0xac, 0xe9, 0x16, 0x42,
- 0x36, 0xc2, 0x83, 0x08, 0xad, 0x69, 0xc2, 0x0c, 0x65, 0x08, 0xad, 0x61,
- 0xc2, 0x00, 0xa4, 0x08, 0xad, 0x58, 0x83, 0x08, 0xad, 0x51, 0x47, 0xac,
- 0xc2, 0x42, 0x36, 0xcc, 0xc2, 0x00, 0xa4, 0x08, 0xad, 0x29, 0x83, 0x08,
- 0xad, 0x20, 0xc2, 0x00, 0xa4, 0x08, 0xad, 0x19, 0x83, 0x08, 0xad, 0x10,
- 0x83, 0x08, 0xad, 0x09, 0xc2, 0x00, 0xc1, 0x08, 0xac, 0xe1, 0xc2, 0x1d,
- 0x5f, 0x08, 0xac, 0xb9, 0xc2, 0x01, 0x29, 0x08, 0xac, 0x90, 0xc2, 0x00,
- 0xa4, 0x08, 0xad, 0x01, 0x83, 0x08, 0xac, 0xf9, 0x06, 0x42, 0x36, 0xda,
- 0xc2, 0x00, 0xa4, 0x08, 0xac, 0xb1, 0x83, 0x08, 0xac, 0xa8, 0xc2, 0x00,
- 0xa4, 0x08, 0xac, 0xa1, 0x83, 0x08, 0xac, 0x98, 0xc2, 0x00, 0xa4, 0x08,
- 0xac, 0x89, 0x83, 0x08, 0xac, 0x80, 0xc2, 0x00, 0xa4, 0x08, 0xac, 0x79,
+ 0xa6, 0x14, 0x08, 0xae, 0xd9, 0x97, 0x08, 0xac, 0x69, 0x8b, 0x08, 0xac,
+ 0x59, 0x83, 0x08, 0xac, 0x08, 0xd5, 0x32, 0xfb, 0x08, 0xae, 0xcb, 0x02,
+ 0x36, 0x3f, 0x0a, 0xc2, 0x36, 0x43, 0x83, 0x08, 0xac, 0xe9, 0x16, 0x42,
+ 0x36, 0x4d, 0x83, 0x08, 0xad, 0x69, 0xc2, 0x0e, 0xe5, 0x08, 0xad, 0x61,
+ 0xc2, 0x01, 0x0e, 0x08, 0xad, 0x58, 0x83, 0x08, 0xad, 0x51, 0x47, 0xb7,
+ 0xd8, 0x42, 0x36, 0x57, 0xc2, 0x01, 0x0e, 0x08, 0xad, 0x29, 0x83, 0x08,
+ 0xad, 0x20, 0xc2, 0x01, 0x0e, 0x08, 0xad, 0x19, 0x83, 0x08, 0xad, 0x10,
+ 0x83, 0x08, 0xad, 0x09, 0xc2, 0x01, 0x01, 0x08, 0xac, 0xe1, 0xc2, 0x1a,
+ 0x36, 0x08, 0xac, 0xb9, 0xc2, 0x07, 0x69, 0x08, 0xac, 0x90, 0xc2, 0x01,
+ 0x0e, 0x08, 0xad, 0x01, 0x83, 0x08, 0xac, 0xf9, 0x06, 0x42, 0x36, 0x65,
+ 0xc2, 0x01, 0x0e, 0x08, 0xac, 0xb1, 0x83, 0x08, 0xac, 0xa8, 0xc2, 0x01,
+ 0x0e, 0x08, 0xac, 0xa1, 0x83, 0x08, 0xac, 0x98, 0xc2, 0x01, 0x0e, 0x08,
+ 0xac, 0x89, 0x83, 0x08, 0xac, 0x80, 0xc2, 0x01, 0x0e, 0x08, 0xac, 0x79,
0x83, 0x08, 0xac, 0x70, 0x97, 0x08, 0xac, 0x28, 0x8b, 0x08, 0xac, 0x18,
- 0x4b, 0x99, 0x6f, 0xc2, 0x36, 0xe4, 0x48, 0x19, 0xf7, 0x42, 0x36, 0xf3,
- 0xc7, 0xc4, 0x35, 0x08, 0xae, 0x09, 0xc5, 0x35, 0x00, 0x08, 0xae, 0x01,
- 0x42, 0x03, 0x32, 0xc2, 0x36, 0xff, 0xc8, 0x11, 0x40, 0x08, 0xad, 0xe9,
- 0xcb, 0x1e, 0x17, 0x08, 0xad, 0xe0, 0xc7, 0xcb, 0xd6, 0x01, 0x39, 0x09,
- 0xc7, 0x17, 0x46, 0x01, 0x16, 0x30, 0xcf, 0x6b, 0x2d, 0x01, 0x5f, 0x51,
- 0xd0, 0x5c, 0x22, 0x01, 0x5f, 0x58, 0xcc, 0x7c, 0x6c, 0x00, 0x04, 0x31,
- 0xc5, 0x57, 0xf3, 0x00, 0x04, 0xc0, 0xc4, 0x0f, 0x7c, 0x05, 0x46, 0x21,
- 0xc5, 0x44, 0x7b, 0x05, 0x44, 0x08, 0x97, 0x05, 0x46, 0x19, 0x8b, 0x05,
+ 0x4b, 0x9b, 0xc0, 0xc2, 0x36, 0x6f, 0x48, 0x1b, 0xae, 0x42, 0x36, 0x7e,
+ 0xc7, 0xc6, 0xf7, 0x08, 0xae, 0x09, 0xc5, 0x33, 0x1a, 0x08, 0xae, 0x01,
+ 0x42, 0x02, 0x52, 0xc2, 0x36, 0x8a, 0xc8, 0x10, 0xab, 0x08, 0xad, 0xe9,
+ 0xcb, 0x21, 0x1a, 0x08, 0xad, 0xe0, 0xc7, 0xc9, 0xc8, 0x01, 0x39, 0x09,
+ 0xc7, 0x15, 0xe4, 0x01, 0x16, 0x30, 0xcf, 0x6c, 0x4e, 0x01, 0x5f, 0x51,
+ 0xd0, 0x58, 0x4f, 0x01, 0x5f, 0x58, 0xcc, 0x81, 0x0c, 0x00, 0x04, 0x31,
+ 0xc5, 0x4a, 0xd9, 0x00, 0x04, 0xc0, 0xc4, 0x21, 0x28, 0x05, 0x46, 0x21,
+ 0xc5, 0x45, 0xcf, 0x05, 0x44, 0x08, 0x97, 0x05, 0x46, 0x19, 0x8b, 0x05,
0x46, 0x01, 0x83, 0x05, 0x45, 0xa8, 0x91, 0x05, 0x46, 0x10, 0x87, 0x05,
- 0x45, 0xf8, 0x8e, 0x05, 0x45, 0xe3, 0x02, 0x37, 0x0b, 0x94, 0x05, 0x45,
- 0xd2, 0x02, 0x37, 0x0f, 0x8b, 0x05, 0x45, 0xb8, 0x97, 0x05, 0x45, 0xc8,
- 0xc2, 0x0c, 0x65, 0x05, 0x45, 0x81, 0x83, 0x05, 0x45, 0x89, 0xc2, 0x00,
- 0xa4, 0x05, 0x45, 0x78, 0xc2, 0x00, 0xc7, 0x05, 0x45, 0x99, 0x83, 0x05,
+ 0x45, 0xf8, 0x8e, 0x05, 0x45, 0xe3, 0x02, 0x36, 0x96, 0x94, 0x05, 0x45,
+ 0xd2, 0x02, 0x36, 0x9a, 0x8b, 0x05, 0x45, 0xb8, 0x97, 0x05, 0x45, 0xc8,
+ 0xc2, 0x0e, 0xe5, 0x05, 0x45, 0x81, 0x83, 0x05, 0x45, 0x89, 0xc2, 0x01,
+ 0x0e, 0x05, 0x45, 0x78, 0xc2, 0x00, 0x96, 0x05, 0x45, 0x99, 0x83, 0x05,
0x45, 0x68, 0x83, 0x05, 0x44, 0x19, 0x8b, 0x05, 0x44, 0x71, 0x97, 0x05,
- 0x44, 0x88, 0x8b, 0x05, 0x44, 0x28, 0x97, 0x05, 0x44, 0x38, 0x47, 0xac,
- 0xc2, 0xc2, 0x37, 0x13, 0x83, 0x05, 0x45, 0x70, 0x87, 0x05, 0x44, 0x68,
- 0x91, 0x05, 0x44, 0x80, 0x83, 0x05, 0x44, 0x91, 0xc2, 0x00, 0xa4, 0x05,
- 0x44, 0x98, 0x83, 0x05, 0x44, 0xa1, 0xc2, 0x00, 0xa4, 0x05, 0x44, 0xa8,
- 0xc2, 0x01, 0x29, 0x05, 0x44, 0xb1, 0xc2, 0x1d, 0x5f, 0x05, 0x44, 0xd9,
- 0xc2, 0x00, 0xc1, 0x05, 0x45, 0x01, 0x83, 0x05, 0x45, 0x28, 0x83, 0x05,
- 0x44, 0xb9, 0xc2, 0x00, 0xa4, 0x05, 0x44, 0xc0, 0x83, 0x05, 0x44, 0xc9,
- 0xc2, 0x00, 0xa4, 0x05, 0x44, 0xd0, 0x16, 0xc2, 0x37, 0x21, 0x83, 0x05,
- 0x45, 0x09, 0xc2, 0x00, 0xa4, 0x05, 0x45, 0x10, 0x06, 0xc2, 0x37, 0x2b,
- 0x83, 0x05, 0x45, 0x19, 0xc2, 0x00, 0xa4, 0x05, 0x45, 0x20, 0x83, 0x05,
- 0x45, 0x31, 0xc2, 0x00, 0xa4, 0x05, 0x45, 0x38, 0x83, 0x05, 0x45, 0x41,
- 0xc2, 0x00, 0xa4, 0x05, 0x45, 0x48, 0xc4, 0x15, 0xd3, 0x05, 0x46, 0x81,
- 0xc3, 0x01, 0xb4, 0x05, 0x46, 0x89, 0x16, 0xc2, 0x37, 0x35, 0x08, 0xc2,
- 0x37, 0x41, 0x15, 0xc2, 0x37, 0x4d, 0xc5, 0x01, 0xdb, 0x05, 0x46, 0xc1,
- 0xc4, 0x22, 0x71, 0x05, 0x46, 0xc8, 0xdd, 0x00, 0xaa, 0x0f, 0xb3, 0xb9,
- 0x44, 0x06, 0xfe, 0x42, 0x37, 0x59, 0xe0, 0x00, 0xa7, 0x0f, 0xb3, 0xc0,
- 0xc4, 0xe5, 0x27, 0x00, 0x41, 0xf1, 0xc3, 0x0e, 0x44, 0x00, 0x41, 0x90,
- 0xc5, 0xdf, 0x9d, 0x00, 0x40, 0xb8, 0x83, 0x00, 0x40, 0xf0, 0x83, 0x00,
- 0x40, 0xf8, 0xd0, 0x5a, 0x52, 0x01, 0x54, 0xb8, 0xd0, 0x5a, 0x52, 0x01,
- 0x54, 0xc0, 0x07, 0xc2, 0x37, 0x5f, 0x44, 0x00, 0xcc, 0xc2, 0x37, 0x6b,
- 0xc9, 0xaf, 0x1c, 0x08, 0x8e, 0x69, 0xca, 0xa7, 0x70, 0x08, 0x8e, 0x48,
- 0xc3, 0xaa, 0x6d, 0x08, 0x8e, 0xd1, 0xd5, 0x36, 0xee, 0x08, 0x8e, 0x60,
- 0x45, 0x06, 0x98, 0xc2, 0x37, 0x99, 0xcb, 0x95, 0x0d, 0x08, 0x8e, 0x31,
- 0xc4, 0x1c, 0xd0, 0x08, 0x8e, 0x28, 0x45, 0x00, 0xcb, 0xc2, 0x37, 0xbd,
- 0xcd, 0x76, 0xbb, 0x08, 0x8e, 0x58, 0xc2, 0x00, 0xa4, 0x08, 0x8d, 0x91,
- 0x15, 0xc2, 0x37, 0xe3, 0x18, 0xc2, 0x37, 0xf3, 0x0e, 0xc2, 0x37, 0xfd,
- 0xc2, 0x02, 0x59, 0x08, 0x8d, 0x59, 0xc2, 0x1d, 0x5f, 0x08, 0x8d, 0x51,
- 0xc2, 0x00, 0xad, 0x08, 0x8d, 0x49, 0x04, 0xc2, 0x38, 0x07, 0x12, 0xc2,
- 0x38, 0x11, 0x10, 0xc2, 0x38, 0x1b, 0x06, 0xc2, 0x38, 0x31, 0x16, 0xc2,
- 0x38, 0x3f, 0x0c, 0xc2, 0x38, 0x4d, 0x05, 0xc2, 0x38, 0x57, 0x09, 0xc2,
- 0x38, 0x61, 0x0d, 0xc2, 0x38, 0x6b, 0x83, 0x08, 0x8c, 0x1b, 0x02, 0x38,
- 0x75, 0x91, 0x08, 0x8c, 0x79, 0x87, 0x08, 0x8c, 0x69, 0x97, 0x08, 0x8c,
- 0x3b, 0x02, 0x38, 0x81, 0x8b, 0x08, 0x8c, 0x2a, 0x02, 0x38, 0x85, 0xc2,
- 0x00, 0xb2, 0x08, 0x22, 0x89, 0x0a, 0x42, 0x38, 0x89, 0x91, 0x08, 0x22,
- 0xa9, 0xc3, 0x13, 0xd1, 0x08, 0x22, 0xb0, 0x83, 0x08, 0x22, 0xc1, 0x99,
- 0x08, 0x23, 0xf8, 0xc3, 0x3f, 0x7b, 0x08, 0x22, 0xc9, 0xc4, 0xe3, 0xbb,
- 0x08, 0x23, 0x18, 0xc6, 0x13, 0xce, 0x08, 0x23, 0x01, 0xc3, 0x01, 0x2e,
- 0x08, 0x23, 0x28, 0x87, 0x08, 0x23, 0x11, 0xc2, 0x00, 0x75, 0x08, 0x23,
- 0x58, 0x88, 0x08, 0x23, 0x31, 0xc2, 0x00, 0x69, 0x08, 0x23, 0x91, 0xc2,
- 0x00, 0x57, 0x08, 0x23, 0xf0, 0xc2, 0x0e, 0x53, 0x08, 0x23, 0x39, 0x03,
- 0xc2, 0x38, 0x95, 0xc2, 0x01, 0x10, 0x08, 0x23, 0xd8, 0xc2, 0x00, 0xe5,
- 0x08, 0x23, 0x41, 0xc2, 0x01, 0x4a, 0x08, 0x23, 0x49, 0x8a, 0x08, 0x23,
- 0x69, 0xc2, 0x00, 0x63, 0x08, 0x23, 0x89, 0xc2, 0x00, 0x4b, 0x08, 0x23,
- 0xb9, 0x14, 0xc2, 0x38, 0x9d, 0xc2, 0x01, 0x7b, 0x08, 0x23, 0xd0, 0x90,
- 0x08, 0x23, 0x51, 0xc2, 0x00, 0x84, 0x08, 0x23, 0x61, 0xc2, 0x08, 0x0e,
- 0x08, 0x23, 0xa1, 0xc3, 0x08, 0xc1, 0x08, 0x23, 0xa9, 0xc2, 0x00, 0x31,
- 0x08, 0x23, 0xb1, 0x94, 0x08, 0x23, 0xc8, 0xe0, 0x0b, 0x87, 0x01, 0x4a,
- 0x20, 0xcd, 0x76, 0x60, 0x01, 0x57, 0x38, 0x00, 0x42, 0x38, 0xa7, 0xd6,
- 0x30, 0xfb, 0x01, 0x5a, 0x79, 0x4c, 0x81, 0x58, 0x42, 0x38, 0xb3, 0x00,
- 0x42, 0x38, 0xb9, 0xc3, 0xe6, 0xf7, 0x0f, 0xb3, 0x09, 0xc9, 0xac, 0x43,
- 0x0f, 0xb2, 0xc9, 0xc4, 0x48, 0xc8, 0x0f, 0xb2, 0x88, 0xc7, 0x11, 0xa4,
- 0x01, 0x5b, 0xc8, 0x00, 0x42, 0x38, 0xc5, 0xc3, 0xe6, 0xf7, 0x0f, 0xb3,
- 0x19, 0xc9, 0xac, 0x43, 0x0f, 0xb2, 0xd9, 0xc4, 0x48, 0xc8, 0x0f, 0xb2,
- 0x98, 0xc7, 0x11, 0xa4, 0x01, 0x5b, 0xc0, 0xc2, 0x00, 0xa4, 0x08, 0xd3,
- 0x49, 0x83, 0x08, 0xd3, 0x40, 0xc2, 0x00, 0xa4, 0x08, 0xd3, 0xb1, 0x83,
- 0x08, 0xd3, 0xa8, 0xc2, 0x00, 0xa4, 0x08, 0xd3, 0x39, 0x83, 0x08, 0xd3,
- 0x30, 0x8e, 0x08, 0xd2, 0xd1, 0x94, 0x08, 0xd2, 0xc8, 0x97, 0x08, 0xd2,
- 0xc1, 0x8b, 0x08, 0xd2, 0xb8, 0x87, 0x08, 0xd2, 0xb0, 0x87, 0x08, 0xd2,
- 0x90, 0xca, 0x52, 0x12, 0x08, 0x7a, 0xb0, 0xc3, 0xdf, 0x4a, 0x08, 0x79,
- 0xf9, 0xc4, 0xd9, 0x77, 0x08, 0x79, 0xe0, 0xc5, 0xd2, 0x86, 0x0f, 0xbc,
- 0xb1, 0xc2, 0x00, 0xb7, 0x01, 0x99, 0x39, 0xc2, 0xe8, 0x12, 0x01, 0x9c,
- 0xa0, 0x11, 0xc2, 0x38, 0xdd, 0x8f, 0x01, 0x9c, 0xc8, 0x44, 0x00, 0x6d,
- 0xc2, 0x38, 0xe9, 0xc4, 0x8c, 0xa4, 0x01, 0x9a, 0xb9, 0x84, 0x01, 0x9e,
- 0xe8, 0x11, 0xc2, 0x39, 0x23, 0xd5, 0x36, 0x31, 0x01, 0x56, 0x69, 0x8f,
- 0x01, 0x9e, 0x81, 0x90, 0x01, 0x9e, 0x89, 0x9a, 0x01, 0x9e, 0x98, 0xca,
- 0x28, 0x83, 0x01, 0x14, 0x83, 0x02, 0x39, 0x2d, 0xc3, 0x6a, 0x62, 0x01,
- 0x98, 0x49, 0xc3, 0x13, 0x50, 0x01, 0x98, 0x51, 0x98, 0x01, 0x9b, 0xa8,
- 0xc7, 0x3f, 0x4a, 0x01, 0x14, 0x7b, 0x02, 0x39, 0x33, 0x90, 0x01, 0x9e,
- 0x63, 0x02, 0x39, 0x39, 0x97, 0x01, 0x9b, 0xd0, 0xc2, 0x01, 0xc7, 0x01,
- 0x14, 0xa1, 0x03, 0xc2, 0x39, 0x45, 0x85, 0x01, 0x9e, 0x21, 0x86, 0x01,
- 0x9e, 0x29, 0xc8, 0xb6, 0xdd, 0x01, 0x9e, 0x31, 0x91, 0x01, 0x9e, 0x3b,
- 0x02, 0x39, 0x4d, 0x8f, 0x01, 0x9c, 0xea, 0x02, 0x39, 0x53, 0xc3, 0x63,
- 0x97, 0x01, 0x10, 0xd1, 0x0b, 0xc2, 0x39, 0x57, 0x17, 0xc2, 0x39, 0x69,
- 0x07, 0xc2, 0x39, 0x75, 0xc2, 0x01, 0xf2, 0x01, 0x9d, 0x6a, 0x02, 0x39,
- 0x81, 0xcc, 0x8b, 0x30, 0x0f, 0x90, 0x01, 0x89, 0x01, 0x96, 0x61, 0x83,
- 0x01, 0x9e, 0x53, 0x02, 0x39, 0x8a, 0x17, 0xc2, 0x39, 0x90, 0x07, 0xc2,
- 0x39, 0xa2, 0x11, 0xc2, 0x39, 0xae, 0x92, 0x01, 0x9e, 0x5b, 0x02, 0x39,
- 0xb6, 0x9c, 0x01, 0x9c, 0x80, 0x8c, 0x0f, 0x8c, 0x81, 0x83, 0x01, 0x9b,
- 0x93, 0x02, 0x39, 0xba, 0xc3, 0x13, 0x73, 0x01, 0x99, 0x29, 0xc3, 0x05,
- 0xd3, 0x01, 0x99, 0x31, 0x84, 0x01, 0x9e, 0x41, 0x8f, 0x01, 0x9b, 0xbb,
- 0x02, 0x39, 0xc0, 0x8e, 0x01, 0x9c, 0xb8, 0x11, 0xc2, 0x39, 0xc4, 0x83,
- 0x01, 0x9d, 0x4b, 0x02, 0x39, 0xd6, 0x0b, 0xc2, 0x39, 0xe0, 0x07, 0xc2,
- 0x39, 0xea, 0x8a, 0x01, 0x9e, 0xb9, 0x8f, 0x01, 0x9e, 0xc1, 0xc2, 0x7a,
- 0x96, 0x01, 0x9e, 0xc9, 0x94, 0x01, 0x9e, 0xd1, 0x85, 0x01, 0x9b, 0xb1,
- 0x88, 0x01, 0x9c, 0x51, 0x95, 0x01, 0x9d, 0x81, 0x98, 0x01, 0x9d, 0xa1,
- 0x99, 0x01, 0x9d, 0xd0, 0x14, 0xc2, 0x39, 0xfa, 0x98, 0x01, 0x96, 0x71,
- 0xc7, 0xca, 0x39, 0x01, 0x98, 0x39, 0xc4, 0xe4, 0x63, 0x01, 0x98, 0x40,
- 0xc5, 0xd5, 0x98, 0x01, 0x98, 0x01, 0xc5, 0xde, 0x26, 0x01, 0x98, 0x09,
- 0xc4, 0xe5, 0xaf, 0x01, 0x98, 0x11, 0xc3, 0x3d, 0x3e, 0x01, 0x98, 0x19,
- 0x97, 0x01, 0x9b, 0x99, 0x8f, 0x01, 0x9e, 0x11, 0xc7, 0x25, 0x2d, 0x01,
- 0x9e, 0xf8, 0x83, 0x01, 0x9c, 0x23, 0x02, 0x3a, 0x04, 0xc5, 0xdf, 0x20,
- 0x01, 0x98, 0x91, 0xc3, 0x1c, 0x64, 0x01, 0x98, 0xa3, 0x02, 0x3a, 0x0e,
- 0x42, 0x02, 0x53, 0xc2, 0x3a, 0x20, 0xc4, 0xe4, 0x1f, 0x01, 0x98, 0xe1,
- 0x11, 0xc2, 0x3a, 0x2c, 0x89, 0x01, 0x9c, 0x79, 0x8d, 0x01, 0x9e, 0x69,
- 0x8f, 0x01, 0x9c, 0xf3, 0x02, 0x3a, 0x38, 0x96, 0x01, 0x9e, 0x79, 0x84,
- 0x01, 0x9c, 0x29, 0xc3, 0x03, 0x40, 0x01, 0x9c, 0x49, 0xc2, 0xe8, 0x12,
- 0x01, 0x9c, 0x89, 0x8e, 0x01, 0x9c, 0xc1, 0xc2, 0x03, 0xa4, 0x01, 0x9d,
- 0x51, 0x98, 0x01, 0x9d, 0xc1, 0x99, 0x01, 0x9d, 0xf1, 0xc4, 0xe5, 0x83,
- 0x01, 0x9e, 0x00, 0x03, 0xc2, 0x3a, 0x3c, 0x0b, 0xc2, 0x3a, 0x4c, 0xc5,
- 0xd2, 0x50, 0x01, 0x98, 0xc3, 0x02, 0x3a, 0x5e, 0x9b, 0x01, 0x9e, 0x49,
- 0x84, 0x01, 0x9c, 0x39, 0xc2, 0xe8, 0x12, 0x01, 0x9c, 0x99, 0xc2, 0x03,
- 0xa4, 0x01, 0x9d, 0x60, 0x03, 0xc2, 0x3a, 0x64, 0xc6, 0xcf, 0xe5, 0x01,
- 0x99, 0x09, 0x43, 0x00, 0xc4, 0xc2, 0x3a, 0x70, 0x94, 0x01, 0x9e, 0xd9,
- 0x98, 0x01, 0x9e, 0xe0, 0x83, 0x01, 0x9c, 0x0b, 0x02, 0x3a, 0x78, 0xc4,
- 0x07, 0x3b, 0x01, 0x99, 0x49, 0x88, 0x01, 0x9c, 0x59, 0x8f, 0x01, 0x9c,
- 0xd1, 0x95, 0x01, 0x9d, 0x89, 0x98, 0x01, 0x9d, 0xa9, 0x99, 0x01, 0x9d,
- 0xd8, 0x03, 0xc2, 0x3a, 0x7e, 0xc3, 0xa9, 0xb0, 0x01, 0x99, 0x89, 0xc7,
- 0xc7, 0x68, 0x01, 0x99, 0xa1, 0xc4, 0xe4, 0x33, 0x01, 0x99, 0xe1, 0xc5,
- 0xde, 0xb7, 0x01, 0x99, 0xf1, 0x93, 0x01, 0x9e, 0x18, 0x83, 0x01, 0x9c,
- 0x1b, 0x02, 0x3a, 0x88, 0x0b, 0xc2, 0x3a, 0x9e, 0x07, 0xc2, 0x3a, 0xb1,
- 0x42, 0x01, 0x33, 0xc2, 0x3a, 0xc0, 0x89, 0x01, 0x9c, 0x71, 0x00, 0xc2,
- 0x3a, 0xe0, 0x84, 0x01, 0x9c, 0x33, 0x02, 0x3a, 0xf0, 0xc2, 0x00, 0x75,
- 0x01, 0x9e, 0xb1, 0xc2, 0xe8, 0x12, 0x01, 0x9c, 0x91, 0x8e, 0x01, 0x9c,
- 0xb1, 0x8f, 0x01, 0x9c, 0xe3, 0x02, 0x3a, 0xf6, 0xc2, 0x03, 0xa4, 0x01,
- 0x9d, 0x59, 0x95, 0x01, 0x9d, 0x99, 0x98, 0x01, 0x9d, 0xbb, 0x02, 0x3a,
- 0xfa, 0x99, 0x01, 0x9d, 0xea, 0x02, 0x3b, 0x00, 0x42, 0x14, 0x40, 0xc2,
- 0x3b, 0x06, 0xc3, 0x93, 0xe1, 0x01, 0x9a, 0x80, 0x11, 0xc2, 0x3b, 0x12,
- 0x45, 0x07, 0x12, 0x42, 0x3b, 0x1e, 0xc6, 0x13, 0x57, 0x01, 0x36, 0xe9,
- 0xc2, 0x26, 0x57, 0x0f, 0x8d, 0x51, 0xc6, 0xcf, 0xf7, 0x0f, 0x8d, 0x19,
- 0x07, 0xc2, 0x3b, 0x2a, 0xc2, 0x07, 0x43, 0x0f, 0x8c, 0xc1, 0xc5, 0x07,
- 0x0a, 0x01, 0x4e, 0x41, 0xcb, 0x11, 0x66, 0x01, 0x4e, 0x39, 0x86, 0x0f,
- 0x8a, 0x61, 0x95, 0x0f, 0x8a, 0x68, 0xc2, 0x19, 0x4b, 0x01, 0x35, 0xf9,
- 0x48, 0xbb, 0x1d, 0x42, 0x3b, 0x36, 0xc4, 0x01, 0x2e, 0x01, 0x15, 0x01,
- 0x19, 0xc2, 0x3b, 0x48, 0xc6, 0x04, 0x5e, 0x0f, 0x8c, 0xd8, 0xc4, 0x1b,
- 0x39, 0x01, 0x14, 0xf9, 0x98, 0x0f, 0x8a, 0x58, 0xc3, 0x26, 0x13, 0x01,
- 0x14, 0xf1, 0xc2, 0x7b, 0x3f, 0x0f, 0x8a, 0x70, 0x55, 0x2f, 0xb2, 0xc2,
- 0x3b, 0x54, 0xc3, 0x97, 0x1a, 0x0f, 0x8c, 0x91, 0x8e, 0x0f, 0x8c, 0x88,
- 0xc2, 0x00, 0x2c, 0x0f, 0x8d, 0x61, 0x95, 0x0f, 0x8c, 0xd0, 0xc2, 0x05,
- 0x26, 0x0f, 0x8d, 0x59, 0xd7, 0x2b, 0x79, 0x0f, 0x8c, 0xc8, 0xc5, 0xdd,
- 0xae, 0x0f, 0x8d, 0x41, 0xc2, 0x03, 0xdc, 0x0f, 0x8d, 0x39, 0x98, 0x0f,
- 0x8a, 0x51, 0x85, 0x0f, 0x8d, 0x30, 0xd3, 0x40, 0x58, 0x0f, 0x8d, 0x21,
- 0x8d, 0x0f, 0x8c, 0xb8, 0xcd, 0x81, 0x0a, 0x0f, 0x8d, 0x01, 0x44, 0x06,
- 0x9e, 0xc2, 0x3b, 0x6e, 0xc3, 0x04, 0x5f, 0x0f, 0x8c, 0x99, 0xd5, 0x33,
- 0x91, 0x01, 0x4e, 0x28, 0x89, 0x0f, 0x8c, 0xb1, 0xc2, 0x01, 0x06, 0x0f,
- 0x8c, 0xa8, 0xc9, 0x29, 0x48, 0x01, 0x21, 0x30, 0xc2, 0x00, 0x34, 0x01,
- 0x20, 0x79, 0xc3, 0x00, 0x83, 0x01, 0x20, 0x70, 0xc4, 0x26, 0xb2, 0x01,
- 0x20, 0x11, 0xc7, 0xc8, 0xbf, 0x01, 0x20, 0x08, 0xc4, 0x32, 0x15, 0x01,
- 0x21, 0x0b, 0x02, 0x3b, 0x78, 0x4d, 0x7b, 0x40, 0x42, 0x3b, 0x7e, 0xc5,
- 0xdc, 0xeb, 0x01, 0x21, 0x21, 0xd2, 0x4d, 0x82, 0x01, 0x20, 0xa8, 0x45,
- 0x00, 0xf1, 0xc2, 0x3b, 0x8e, 0xc5, 0xd7, 0xeb, 0x01, 0x20, 0x28, 0x49,
- 0xb1, 0x5c, 0xc2, 0x3b, 0x98, 0xc2, 0x01, 0x2e, 0x00, 0x39, 0x08, 0x46,
- 0xce, 0x29, 0x42, 0x3b, 0xc0, 0xc2, 0x26, 0xfa, 0x00, 0x39, 0x61, 0xc3,
- 0x1a, 0xba, 0x00, 0x38, 0xda, 0x02, 0x3b, 0xd2, 0xc3, 0x0d, 0xd9, 0x00,
- 0x39, 0x59, 0xc4, 0xdf, 0x49, 0x00, 0x39, 0x41, 0xc6, 0x79, 0x80, 0x00,
- 0x39, 0x19, 0xd0, 0x5f, 0x92, 0x00, 0x38, 0x89, 0x47, 0xcb, 0x04, 0x42,
- 0x3b, 0xd8, 0xc3, 0x05, 0x1a, 0x00, 0x39, 0x51, 0xca, 0x9d, 0x66, 0x00,
- 0x39, 0x38, 0xc3, 0x10, 0x36, 0x00, 0x38, 0xf0, 0xc2, 0x00, 0x6e, 0x00,
- 0x38, 0xd0, 0xd2, 0x4b, 0x78, 0x00, 0x38, 0xb1, 0xc5, 0x4b, 0x80, 0x00,
- 0x38, 0xa8, 0xc9, 0xb3, 0x1e, 0x00, 0x38, 0xa0, 0x00, 0xc2, 0x3b, 0xea,
- 0xcd, 0x80, 0x2d, 0x00, 0x39, 0xe0, 0xca, 0x9b, 0x9a, 0x00, 0x38, 0x69,
- 0xc9, 0xa9, 0x7c, 0x00, 0x38, 0x61, 0xc6, 0xa9, 0x7f, 0x00, 0x38, 0x58,
- 0xc5, 0x01, 0x62, 0x00, 0x39, 0xb9, 0xc5, 0x00, 0x95, 0x00, 0x39, 0xb0,
- 0xc5, 0x01, 0xf7, 0x00, 0x38, 0x39, 0xc4, 0x01, 0x1e, 0x00, 0x38, 0x30,
- 0xc5, 0x37, 0x91, 0x00, 0x38, 0x23, 0x02, 0x3b, 0xf6, 0xc9, 0x11, 0xdc,
- 0x00, 0x38, 0x10, 0xc5, 0x37, 0x91, 0x00, 0x38, 0x1b, 0x02, 0x3b, 0xfc,
- 0xc9, 0x11, 0xdc, 0x00, 0x38, 0x08, 0xc5, 0x00, 0x95, 0x00, 0x39, 0xe9,
- 0xc5, 0x01, 0x62, 0x00, 0x39, 0xf0, 0xc5, 0x00, 0x95, 0x00, 0x3a, 0x19,
- 0xc5, 0x01, 0x62, 0x00, 0x3a, 0x20, 0xc5, 0x00, 0x95, 0x00, 0x3a, 0x29,
- 0xc5, 0x01, 0x62, 0x00, 0x3a, 0x30, 0xc2, 0x00, 0xf6, 0x05, 0x40, 0x89,
- 0x91, 0x05, 0x40, 0x80, 0x91, 0x05, 0x40, 0x91, 0xc2, 0x00, 0xf6, 0x05,
- 0x40, 0x98, 0xd1, 0x53, 0x2c, 0x0f, 0xa8, 0x51, 0xce, 0x6e, 0x95, 0x0f,
- 0xa8, 0x49, 0xd3, 0x21, 0x95, 0x0f, 0xa8, 0x38, 0x00, 0x42, 0x3c, 0x02,
- 0xcf, 0x0b, 0x98, 0x01, 0x4b, 0xd9, 0x42, 0x08, 0x22, 0x42, 0x3c, 0x17,
- 0xc3, 0x01, 0x4a, 0x01, 0x55, 0xf1, 0xcf, 0x68, 0x21, 0x01, 0x56, 0x01,
- 0xd9, 0x1e, 0xa6, 0x01, 0x56, 0x10, 0xc6, 0x0f, 0x1e, 0x01, 0x56, 0xb9,
- 0xde, 0x0f, 0x06, 0x01, 0x56, 0xc0, 0x52, 0x4b, 0xf6, 0xc2, 0x3c, 0x23,
- 0xcf, 0x1e, 0xc0, 0x01, 0x03, 0xe8, 0xca, 0x0e, 0x84, 0x01, 0x03, 0xe1,
- 0xc4, 0x01, 0xdc, 0x01, 0x03, 0xc0, 0xc4, 0x18, 0x83, 0x01, 0x03, 0xb9,
- 0xc2, 0x26, 0x51, 0x01, 0x03, 0xb0, 0xc3, 0x0c, 0x5b, 0x01, 0x03, 0xa9,
- 0xc3, 0x06, 0x9e, 0x01, 0x03, 0xa0, 0xc2, 0x01, 0x47, 0x00, 0x05, 0x91,
- 0xc4, 0x04, 0x5e, 0x00, 0x05, 0x98, 0xc6, 0xd1, 0xad, 0x00, 0xe6, 0x11,
- 0xc7, 0xca, 0x1d, 0x00, 0xe6, 0x08, 0x45, 0x25, 0x5a, 0xc2, 0x3c, 0x2b,
- 0x83, 0x00, 0xdc, 0xb0, 0xc8, 0xb4, 0xaa, 0x00, 0xdd, 0xe9, 0x87, 0x00,
- 0xdc, 0x30, 0xc2, 0x00, 0xa4, 0x00, 0xdd, 0xe9, 0x83, 0x00, 0xdc, 0xc0,
- 0xc2, 0x11, 0x70, 0x00, 0xdd, 0xe1, 0x83, 0x00, 0xdc, 0xe0, 0xc2, 0x11,
- 0x70, 0x00, 0xdd, 0xd9, 0x83, 0x00, 0xdc, 0xd8, 0xc2, 0x1d, 0x5f, 0x00,
- 0xdd, 0x79, 0x83, 0x00, 0xdc, 0xf0, 0xc2, 0x00, 0xa4, 0x00, 0xdd, 0x71,
- 0x83, 0x00, 0xdc, 0x50, 0x83, 0x00, 0xdc, 0xa1, 0xc2, 0x1d, 0x5f, 0x00,
- 0xdc, 0x89, 0xc2, 0x01, 0x29, 0x00, 0xdc, 0x68, 0x97, 0x00, 0xdc, 0x48,
- 0xc4, 0x18, 0x83, 0x00, 0xdd, 0xb9, 0xc2, 0x26, 0x51, 0x00, 0xdd, 0xb0,
- 0xc3, 0x0c, 0x5b, 0x00, 0xdd, 0xa9, 0xc3, 0x06, 0x9e, 0x00, 0xdd, 0xa0,
- 0xc4, 0x04, 0x5e, 0x00, 0xdd, 0x99, 0xc2, 0x01, 0x47, 0x00, 0xdd, 0x90,
- 0xc2, 0x01, 0x09, 0x00, 0xdd, 0x69, 0xc2, 0x00, 0xad, 0x00, 0xdd, 0x60,
- 0xc3, 0x87, 0x7a, 0x00, 0xdd, 0x19, 0xc4, 0x88, 0xf1, 0x00, 0xdd, 0x10,
- 0xc5, 0xdc, 0xf5, 0x00, 0xdd, 0x51, 0x10, 0x42, 0x3c, 0x33, 0xc7, 0xc9,
- 0x0c, 0x00, 0xdd, 0x49, 0xc5, 0x0e, 0x40, 0x00, 0xdd, 0x39, 0xc7, 0xc3,
- 0x08, 0x00, 0xdd, 0x31, 0xc4, 0xe0, 0x2f, 0x00, 0xdd, 0x29, 0xc5, 0xd8,
- 0xea, 0x00, 0xdd, 0x20, 0xcb, 0x0e, 0x83, 0x01, 0x55, 0x81, 0xcc, 0x24,
- 0x24, 0x01, 0x55, 0x90, 0xc8, 0x07, 0x5f, 0x01, 0x55, 0xb1, 0xcf, 0x69,
- 0x89, 0x01, 0x55, 0xd0, 0xd1, 0x55, 0x19, 0x01, 0x14, 0x51, 0xcb, 0x23,
- 0x35, 0x01, 0x14, 0x33, 0x02, 0x3c, 0x3d, 0x46, 0x00, 0x95, 0x42, 0x3c,
- 0x43, 0xc6, 0x31, 0x53, 0x01, 0x56, 0x99, 0xc4, 0x0f, 0x20, 0x01, 0x56,
- 0xa8, 0xca, 0x24, 0x6e, 0x0f, 0xb0, 0x1b, 0x02, 0x3c, 0x5b, 0x0a, 0xc2,
- 0x3c, 0x61, 0x15, 0xc2, 0x3c, 0x73, 0xc4, 0x22, 0xe0, 0x0f, 0xcb, 0x90,
- 0xca, 0x24, 0x6e, 0x0f, 0xb1, 0xd1, 0xd1, 0x54, 0x3c, 0x0f, 0xb1, 0xd8,
- 0x47, 0xca, 0x7f, 0xc2, 0x3c, 0x82, 0x42, 0x00, 0xac, 0xc2, 0x3c, 0x8e,
- 0xc3, 0x0e, 0x41, 0x07, 0xf2, 0xa8, 0xc9, 0x87, 0xeb, 0x01, 0x10, 0x53,
- 0x02, 0x3c, 0x98, 0xcf, 0x0f, 0xfc, 0x07, 0xf2, 0xb9, 0xc6, 0xbd, 0x2f,
- 0x07, 0xf2, 0xc1, 0xca, 0x0e, 0x84, 0x07, 0xf3, 0x30, 0x4d, 0x7c, 0x10,
- 0xc2, 0x3c, 0x9e, 0x45, 0x02, 0x13, 0xc2, 0x3c, 0xbd, 0xce, 0x65, 0x34,
- 0x07, 0xf3, 0x40, 0xe0, 0x01, 0xa7, 0x08, 0x59, 0xd9, 0xc4, 0x21, 0x13,
- 0x00, 0x16, 0xe0, 0xc7, 0x2e, 0x34, 0x0f, 0xb7, 0x49, 0xc8, 0x37, 0x31,
- 0x07, 0xf3, 0x01, 0xc7, 0x0b, 0x80, 0x07, 0xf3, 0x08, 0x43, 0x02, 0x4f,
- 0xc2, 0x3c, 0xcf, 0xcc, 0x86, 0xd4, 0x07, 0xf3, 0x20, 0xc8, 0x68, 0x22,
- 0x07, 0xf3, 0x11, 0xcb, 0x93, 0xb8, 0x07, 0xf3, 0x50, 0x9f, 0x00, 0x04,
- 0x91, 0x9e, 0x00, 0x04, 0x88, 0xc3, 0x01, 0x59, 0x00, 0x04, 0x91, 0xc3,
- 0x01, 0xb4, 0x00, 0x04, 0x88, 0xc5, 0xd7, 0x5f, 0x0f, 0xad, 0xb0, 0xca,
- 0x37, 0x0e, 0x01, 0x13, 0xf1, 0xc5, 0x07, 0x62, 0x01, 0x13, 0xe0, 0x4c,
- 0x24, 0x18, 0xc2, 0x3c, 0xe1, 0xcb, 0x0e, 0x83, 0x01, 0x55, 0x99, 0x44,
- 0x1e, 0x2d, 0xc2, 0x3c, 0xed, 0xcf, 0x69, 0x89, 0x01, 0x55, 0xb8, 0xc3,
- 0x0e, 0x41, 0x07, 0xf0, 0x99, 0xc3, 0x00, 0xac, 0x07, 0xf0, 0x80, 0xcf,
- 0x0f, 0xfc, 0x07, 0xf0, 0xa9, 0xc6, 0xbd, 0x2f, 0x07, 0xf1, 0x89, 0xc6,
- 0xce, 0x05, 0x07, 0xf1, 0x90, 0x44, 0x02, 0x4e, 0xc2, 0x3c, 0xf9, 0xc7,
- 0x80, 0x81, 0x07, 0xf1, 0x98, 0xcb, 0x1a, 0x3f, 0x07, 0xf1, 0xb1, 0x05,
- 0xc2, 0x3d, 0x27, 0xd6, 0x0a, 0x88, 0x07, 0xf1, 0xd1, 0xd8, 0x23, 0xb8,
- 0x07, 0xf1, 0xe1, 0xd4, 0x3c, 0x79, 0x07, 0xf1, 0xf1, 0xce, 0x24, 0xb2,
- 0x07, 0xf2, 0x41, 0x46, 0x02, 0x31, 0xc2, 0x3d, 0x33, 0xcd, 0x09, 0x51,
- 0x07, 0xf2, 0x00, 0xc5, 0x00, 0xaa, 0x07, 0xf0, 0x89, 0xc9, 0x11, 0xdc,
- 0x07, 0xf0, 0x90, 0xc3, 0x00, 0xdf, 0x0f, 0x85, 0x01, 0xca, 0xa8, 0x7e,
- 0x0f, 0x86, 0x78, 0xc6, 0xd4, 0xbf, 0x0f, 0x85, 0x09, 0xc6, 0x7b, 0xe3,
- 0x0f, 0x85, 0x89, 0xc8, 0x4a, 0x99, 0x0f, 0x86, 0x09, 0xc5, 0xd8, 0x31,
- 0x0f, 0x86, 0x88, 0x46, 0xcd, 0xed, 0xc2, 0x3d, 0x3f, 0x48, 0xbc, 0x55,
- 0xc2, 0x3d, 0x57, 0x46, 0x77, 0x2e, 0xc2, 0x3d, 0x6f, 0x45, 0xde, 0x99,
- 0x42, 0x3d, 0x87, 0x11, 0xc2, 0x3d, 0xb1, 0x47, 0xcb, 0x5f, 0x42, 0x3d,
- 0xbd, 0x46, 0xd0, 0x09, 0xc2, 0x3d, 0xd5, 0x48, 0xb9, 0x15, 0x42, 0x3d,
- 0xed, 0xc6, 0xd4, 0xbf, 0x0f, 0x85, 0x41, 0xc6, 0x7b, 0xe3, 0x0f, 0x85,
- 0xc1, 0xc8, 0x4a, 0x99, 0x0f, 0x86, 0x41, 0xc5, 0xd8, 0x31, 0x0f, 0x86,
- 0xc0, 0xc6, 0xd4, 0xbf, 0x0f, 0x85, 0x49, 0xc6, 0x7b, 0xe3, 0x0f, 0x85,
- 0xc9, 0xc8, 0x4a, 0x99, 0x0f, 0x86, 0x49, 0xc5, 0xd8, 0x31, 0x0f, 0x86,
- 0xc8, 0xc6, 0xd4, 0xbf, 0x0f, 0x85, 0x59, 0xc6, 0x7b, 0xe3, 0x0f, 0x85,
- 0xd9, 0xc8, 0x4a, 0x99, 0x0f, 0x86, 0x59, 0xc5, 0xd8, 0x31, 0x0f, 0x86,
- 0xd8, 0x49, 0xac, 0xb8, 0xc2, 0x3e, 0x05, 0x47, 0x37, 0x71, 0x42, 0x3e,
- 0x1d, 0xc6, 0xd4, 0xbf, 0x0f, 0x85, 0x69, 0xc6, 0x7b, 0xe3, 0x0f, 0x85,
- 0xe9, 0xc8, 0x4a, 0x99, 0x0f, 0x86, 0x69, 0xc5, 0xd8, 0x31, 0x0f, 0x86,
- 0xe8, 0xc2, 0x01, 0x47, 0x01, 0x5e, 0x99, 0xc4, 0x04, 0x5e, 0x01, 0x5e,
- 0xa0, 0xc3, 0x06, 0x9e, 0x01, 0x5e, 0xa9, 0xc3, 0x0c, 0x5b, 0x01, 0x5e,
- 0xb0, 0x43, 0x03, 0x59, 0xc2, 0x3e, 0x35, 0x45, 0x00, 0x6c, 0xc2, 0x3e,
- 0x47, 0xd1, 0x0e, 0x7d, 0x01, 0x53, 0x90, 0xcb, 0x91, 0x0e, 0x0f, 0xae,
- 0xf9, 0xc3, 0x02, 0x53, 0x0f, 0xa6, 0x18, 0x45, 0x00, 0x8a, 0xc2, 0x3e,
- 0x63, 0xcc, 0x45, 0x45, 0x01, 0x10, 0x10, 0x9c, 0x01, 0x25, 0xa9, 0x9b,
- 0x01, 0x25, 0xa1, 0x9a, 0x01, 0x25, 0x99, 0x99, 0x01, 0x25, 0x91, 0x98,
- 0x01, 0x25, 0x89, 0x97, 0x01, 0x25, 0x81, 0x96, 0x01, 0x25, 0x79, 0x95,
- 0x01, 0x25, 0x71, 0x94, 0x01, 0x25, 0x69, 0x93, 0x01, 0x25, 0x61, 0x92,
- 0x01, 0x25, 0x59, 0x91, 0x01, 0x25, 0x51, 0x90, 0x01, 0x25, 0x49, 0x8f,
- 0x01, 0x25, 0x41, 0x8e, 0x01, 0x25, 0x39, 0x8d, 0x01, 0x25, 0x31, 0x8c,
- 0x01, 0x25, 0x29, 0x8b, 0x01, 0x25, 0x21, 0x8a, 0x01, 0x25, 0x19, 0x89,
- 0x01, 0x25, 0x11, 0x88, 0x01, 0x25, 0x09, 0x87, 0x01, 0x25, 0x01, 0x86,
- 0x01, 0x24, 0xf9, 0x85, 0x01, 0x24, 0xf1, 0x84, 0x01, 0x24, 0xe9, 0x83,
- 0x01, 0x24, 0xe0, 0x99, 0x0f, 0x89, 0x31, 0x9a, 0x0f, 0x89, 0x39, 0x9b,
- 0x0f, 0x89, 0x41, 0x9c, 0x0f, 0x89, 0x49, 0x83, 0x0f, 0x88, 0x81, 0x84,
- 0x0f, 0x88, 0x89, 0x85, 0x0f, 0x88, 0x91, 0x86, 0x0f, 0x88, 0x99, 0x87,
- 0x0f, 0x88, 0xa1, 0x88, 0x0f, 0x88, 0xa9, 0x89, 0x0f, 0x88, 0xb1, 0x8a,
- 0x0f, 0x88, 0xb9, 0x8b, 0x0f, 0x88, 0xc1, 0x8c, 0x0f, 0x88, 0xc9, 0x8d,
- 0x0f, 0x88, 0xd1, 0x8e, 0x0f, 0x88, 0xd9, 0x8f, 0x0f, 0x88, 0xe1, 0x90,
- 0x0f, 0x88, 0xe9, 0x91, 0x0f, 0x88, 0xf1, 0x92, 0x0f, 0x88, 0xf9, 0x93,
- 0x0f, 0x89, 0x01, 0x94, 0x0f, 0x89, 0x09, 0x95, 0x0f, 0x89, 0x11, 0x96,
- 0x0f, 0x89, 0x19, 0x97, 0x0f, 0x89, 0x21, 0x98, 0x0f, 0x89, 0x28, 0x42,
- 0x00, 0x4b, 0xc2, 0x3e, 0x7b, 0xc7, 0x53, 0x70, 0x01, 0x24, 0x01, 0xc2,
- 0x00, 0x4d, 0x01, 0x23, 0xe8, 0xc7, 0x20, 0xbe, 0x01, 0x24, 0x29, 0xc5,
- 0x6a, 0x79, 0x01, 0x23, 0xf0, 0xc8, 0x48, 0x4e, 0x01, 0x24, 0x21, 0xc6,
- 0x45, 0x38, 0x01, 0x24, 0x18, 0xc6, 0x13, 0xf0, 0x01, 0x24, 0x11, 0xc7,
- 0x56, 0x07, 0x01, 0x24, 0x08, 0xc4, 0x18, 0x83, 0x01, 0x23, 0xd1, 0xc2,
- 0x26, 0x51, 0x01, 0x23, 0xc8, 0xc3, 0x0c, 0x5b, 0x01, 0x23, 0xc1, 0xc3,
- 0x06, 0x9e, 0x01, 0x23, 0xb8, 0xc4, 0x04, 0x5e, 0x01, 0x23, 0xb1, 0xc2,
- 0x01, 0x47, 0x01, 0x23, 0xa8, 0xc5, 0x92, 0x32, 0x01, 0x90, 0x03, 0x02,
- 0x3e, 0x87, 0xc6, 0xc0, 0x37, 0x01, 0x90, 0x52, 0x02, 0x3e, 0x8d, 0xc2,
- 0x00, 0x56, 0x01, 0x90, 0x78, 0xc5, 0xc8, 0x2e, 0x01, 0x90, 0x13, 0x02,
- 0x3e, 0x93, 0xc6, 0xc6, 0xf2, 0x01, 0x90, 0x5a, 0x02, 0x3e, 0x99, 0xc2,
- 0x00, 0x56, 0x01, 0x90, 0x88, 0xc2, 0x00, 0x56, 0x01, 0x90, 0x90, 0xc4,
- 0x7a, 0x93, 0x01, 0x90, 0x2b, 0x02, 0x3e, 0x9f, 0xc6, 0xc1, 0x07, 0x01,
- 0x90, 0x62, 0x02, 0x3e, 0xa5, 0xc2, 0x00, 0x56, 0x01, 0x90, 0xa0, 0xc2,
- 0x00, 0x56, 0x01, 0x90, 0xa8, 0xc4, 0xc7, 0x2b, 0x01, 0x90, 0x43, 0x02,
- 0x3e, 0xab, 0xc6, 0xc7, 0x2a, 0x01, 0x90, 0x4a, 0x02, 0x3e, 0xaf, 0xc2,
- 0x00, 0x56, 0x01, 0x90, 0xd8, 0xc2, 0x01, 0x47, 0x01, 0x91, 0x09, 0xc4,
- 0x04, 0x5e, 0x01, 0x91, 0x11, 0xc2, 0x00, 0x4d, 0x01, 0x91, 0x48, 0xc3,
- 0x06, 0x9e, 0x01, 0x91, 0x19, 0x0b, 0xc2, 0x3e, 0xb5, 0xc7, 0xc9, 0xd0,
- 0x01, 0x92, 0x00, 0xc2, 0x26, 0x51, 0x01, 0x91, 0x29, 0x07, 0xc2, 0x3e,
- 0xc7, 0x17, 0xc2, 0x3e, 0xd3, 0x16, 0xc2, 0x3e, 0xdd, 0xc6, 0xcc, 0xc7,
- 0x01, 0x91, 0x99, 0xc6, 0xcc, 0x0d, 0x01, 0x91, 0xa8, 0xc4, 0x01, 0xdc,
- 0x01, 0x91, 0x39, 0xc4, 0x65, 0xf2, 0x01, 0x91, 0x79, 0xc9, 0xad, 0xd8,
- 0x01, 0x91, 0xe8, 0xc3, 0x00, 0x8b, 0x01, 0x91, 0x41, 0xc3, 0x00, 0xc2,
- 0x01, 0x91, 0xa0, 0xc3, 0x02, 0xd4, 0x01, 0x91, 0x51, 0xc4, 0x05, 0xe7,
- 0x01, 0x91, 0x70, 0xc4, 0x00, 0x9e, 0x01, 0x91, 0x61, 0xc3, 0x28, 0x7f,
- 0x01, 0x91, 0x68, 0xcd, 0x7e, 0x9a, 0x01, 0x91, 0xb9, 0xc3, 0x04, 0x75,
- 0x01, 0x91, 0xd0, 0xc7, 0x7a, 0x69, 0x01, 0x91, 0xc9, 0x15, 0xc2, 0x3e,
- 0xe9, 0xc3, 0x2b, 0x37, 0x01, 0x92, 0x18, 0xd1, 0x00, 0xf6, 0x01, 0x57,
- 0x91, 0xce, 0x38, 0x53, 0x01, 0x57, 0x98, 0xc5, 0x2b, 0x13, 0x08, 0xd7,
- 0xb9, 0xc4, 0x0e, 0x41, 0x08, 0xd7, 0x9a, 0x02, 0x3e, 0xf3, 0x45, 0x25,
- 0x5a, 0xc2, 0x3e, 0xf9, 0x83, 0x08, 0xd6, 0x98, 0x83, 0x08, 0xd6, 0xd8,
- 0x83, 0x08, 0xd6, 0xd0, 0xc2, 0x00, 0xa4, 0x08, 0xd6, 0xa1, 0x83, 0x08,
- 0xd6, 0x68, 0xc2, 0x00, 0xa4, 0x08, 0xd6, 0x89, 0x83, 0x08, 0xd6, 0x00,
- 0x83, 0x08, 0xd6, 0x81, 0xc2, 0x01, 0x29, 0x08, 0xd6, 0x38, 0xc2, 0x00,
- 0xa4, 0x08, 0xd6, 0x79, 0x83, 0x08, 0xd6, 0x70, 0xc2, 0x00, 0xa4, 0x08,
- 0xd6, 0x51, 0x83, 0x08, 0xd6, 0x08, 0xc5, 0x2b, 0x13, 0x08, 0xd7, 0x71,
- 0xc4, 0x0e, 0x41, 0x08, 0xd7, 0x5a, 0x02, 0x3f, 0x1c, 0xc6, 0x2b, 0x12,
- 0x08, 0xd7, 0x40, 0x16, 0xc2, 0x3f, 0x22, 0x08, 0xc2, 0x3f, 0x32, 0xc3,
- 0x01, 0xb4, 0x08, 0x43, 0xc8, 0xd3, 0x42, 0xde, 0x08, 0x43, 0xb9, 0x45,
- 0x02, 0x92, 0x42, 0x3f, 0x3e, 0xc2, 0x1b, 0xa4, 0x0b, 0x5c, 0x79, 0xc2,
- 0x1d, 0x60, 0x0b, 0x5c, 0x50, 0xc2, 0x27, 0xb2, 0x0b, 0x5c, 0x71, 0xc3,
- 0x50, 0x3f, 0x0b, 0x5c, 0x41, 0xc2, 0x02, 0x84, 0x0b, 0x5c, 0x10, 0x15,
- 0xc2, 0x3f, 0xa7, 0xc3, 0xe7, 0x36, 0x0b, 0x5c, 0x28, 0xc2, 0x1d, 0x60,
- 0x0b, 0x5c, 0x61, 0xc3, 0xe2, 0x01, 0x0b, 0x5b, 0xf0, 0x8f, 0x0b, 0x5c,
- 0x49, 0xc2, 0x1b, 0xa4, 0x0b, 0x5c, 0x18, 0xc3, 0xe7, 0x1e, 0x0b, 0x5c,
- 0x01, 0xc2, 0x03, 0x48, 0x0b, 0x5b, 0xf8, 0xc2, 0x13, 0xa9, 0x0b, 0x59,
- 0x79, 0xc3, 0xa1, 0xd8, 0x0b, 0x59, 0x38, 0xc2, 0x13, 0xa9, 0x0b, 0x59,
- 0x61, 0x16, 0xc2, 0x3f, 0xb9, 0xc4, 0xe4, 0x83, 0x0b, 0x59, 0x41, 0xc3,
- 0xc8, 0x15, 0x0b, 0x59, 0x11, 0xc3, 0x20, 0x92, 0x0b, 0x59, 0x00, 0xc3,
- 0x56, 0x7e, 0x0b, 0x59, 0x49, 0xc3, 0x20, 0x92, 0x0b, 0x59, 0x29, 0xc2,
- 0x13, 0xa9, 0x0b, 0x58, 0xf8, 0xc3, 0xe7, 0xe4, 0x0b, 0x5b, 0xa3, 0x02,
- 0x3f, 0xc5, 0xc7, 0xc3, 0xd3, 0x0b, 0x5a, 0x28, 0xca, 0xa7, 0x3e, 0x0b,
- 0x5b, 0x99, 0xc4, 0x14, 0xa2, 0x0b, 0x59, 0xc8, 0xc5, 0xd7, 0xc8, 0x0b,
- 0x5b, 0x71, 0xc4, 0xe0, 0x8f, 0x0b, 0x5a, 0x08, 0xc2, 0x02, 0x84, 0x0b,
- 0x5b, 0x21, 0x44, 0x19, 0x9e, 0x42, 0x3f, 0xcb, 0x0a, 0xc2, 0x3f, 0xe3,
- 0xc9, 0xaf, 0x25, 0x0b, 0x59, 0xc0, 0x00, 0xc2, 0x3f, 0xef, 0x95, 0x0b,
- 0x5a, 0xd8, 0x98, 0x0b, 0x58, 0xd9, 0x84, 0x0b, 0x58, 0xd0, 0x98, 0x0b,
- 0x58, 0x79, 0x84, 0x0b, 0x58, 0x70, 0x98, 0x0b, 0x58, 0x59, 0x84, 0x0b,
- 0x58, 0x50, 0x98, 0x0b, 0x58, 0x29, 0x84, 0x0b, 0x58, 0x20, 0x98, 0x0b,
- 0x58, 0xa9, 0x84, 0x0b, 0x58, 0xa0, 0x98, 0x0b, 0x58, 0x69, 0x84, 0x0b,
- 0x58, 0x60, 0x98, 0x0b, 0x58, 0x89, 0x84, 0x0b, 0x58, 0x80, 0x98, 0x0b,
- 0x58, 0x09, 0x84, 0x0b, 0x58, 0x00, 0xc5, 0x11, 0x01, 0x01, 0x81, 0x00,
- 0x45, 0x00, 0x6c, 0xc2, 0x3f, 0xfb, 0xc8, 0x7d, 0x21, 0x0f, 0xb2, 0x69,
- 0x14, 0xc2, 0x40, 0x17, 0xcd, 0x7f, 0x84, 0x0f, 0xb2, 0x39, 0xcf, 0x64,
- 0x07, 0x0f, 0xc9, 0xc1, 0x43, 0x03, 0x59, 0xc2, 0x40, 0x1d, 0xc8, 0xbf,
- 0x5d, 0x0f, 0xce, 0xb8, 0xc4, 0x04, 0x5e, 0x08, 0x48, 0xd9, 0x19, 0xc2,
- 0x40, 0x29, 0xc2, 0x00, 0x4d, 0x08, 0x48, 0xb8, 0xc8, 0x0c, 0x4a, 0x08,
- 0x48, 0xc8, 0xc2, 0x13, 0xa9, 0x08, 0x48, 0xa9, 0xc2, 0x00, 0x49, 0x08,
- 0x48, 0x40, 0xc3, 0x0d, 0xd9, 0x08, 0x48, 0xa1, 0xc3, 0x01, 0xf2, 0x08,
- 0x48, 0x89, 0xc3, 0x79, 0x80, 0x08, 0x48, 0x70, 0xc2, 0x00, 0x34, 0x08,
- 0x48, 0x79, 0xc2, 0x03, 0x2d, 0x08, 0x48, 0x00, 0x96, 0x08, 0x48, 0x38,
- 0x83, 0x05, 0x42, 0x01, 0xc2, 0x00, 0xa4, 0x05, 0x42, 0x08, 0x83, 0x05,
- 0x42, 0x11, 0xc2, 0x01, 0x29, 0x05, 0x43, 0x28, 0xc2, 0x01, 0x29, 0x05,
- 0x42, 0x19, 0xc2, 0x1d, 0x5f, 0x05, 0x42, 0x39, 0x83, 0x05, 0x42, 0x59,
- 0xc2, 0x00, 0xc1, 0x05, 0x43, 0x60, 0x83, 0x05, 0x42, 0x23, 0x02, 0x40,
- 0x33, 0xc2, 0x00, 0xa4, 0x05, 0x42, 0x28, 0x83, 0x05, 0x42, 0x41, 0xc2,
- 0x00, 0xa4, 0x05, 0x42, 0x49, 0x15, 0xc2, 0x40, 0x39, 0x16, 0x42, 0x40,
- 0x43, 0x83, 0x05, 0x42, 0x51, 0xc2, 0x02, 0xb4, 0x05, 0x42, 0x91, 0xc2,
- 0x0b, 0xc6, 0x05, 0x43, 0x58, 0x83, 0x05, 0x42, 0x61, 0xc2, 0x00, 0xa4,
- 0x05, 0x42, 0x68, 0xc2, 0x00, 0xa4, 0x05, 0x42, 0xa1, 0x83, 0x05, 0x42,
- 0xa8, 0xc6, 0x22, 0x81, 0x05, 0x42, 0xb1, 0xc2, 0x00, 0xa4, 0x05, 0x42,
- 0xd1, 0x83, 0x05, 0x42, 0xd8, 0xcb, 0x8e, 0xbc, 0x05, 0x43, 0x69, 0xcb,
- 0x93, 0x13, 0x05, 0x43, 0x80, 0x87, 0x05, 0x43, 0x30, 0xc8, 0xb7, 0x25,
- 0x05, 0x43, 0x71, 0xc4, 0x0c, 0x6a, 0x05, 0x43, 0x78, 0x4f, 0x5f, 0xd3,
- 0xc2, 0x40, 0x4d, 0xd2, 0x4d, 0xdc, 0x05, 0x43, 0x90, 0xc9, 0xb2, 0xa0,
- 0x08, 0x0e, 0x89, 0xc8, 0xb6, 0xe5, 0x08, 0x0f, 0x90, 0xc5, 0x65, 0xeb,
- 0x08, 0x0e, 0x99, 0xcd, 0x81, 0x17, 0x08, 0x0f, 0x11, 0x96, 0x08, 0x0f,
- 0x60, 0xc2, 0x00, 0x73, 0x08, 0x0f, 0x23, 0x02, 0x40, 0x5f, 0xc4, 0xe5,
- 0xff, 0x08, 0x0f, 0x30, 0x99, 0x08, 0x0e, 0xd1, 0xc7, 0xc5, 0x9a, 0x08,
- 0x0f, 0x08, 0xc4, 0x5d, 0xf1, 0x08, 0x0f, 0x38, 0xc3, 0x1e, 0x74, 0x08,
- 0x0e, 0xd9, 0x92, 0x08, 0x0f, 0x40, 0xc5, 0xdb, 0x24, 0x08, 0xd8, 0x49,
- 0xd8, 0x22, 0x68, 0x08, 0xd8, 0x41, 0x48, 0x26, 0xfb, 0xc2, 0x40, 0x65,
- 0xce, 0x70, 0xc5, 0x08, 0xd8, 0x23, 0x02, 0x40, 0x71, 0xc5, 0x22, 0x7b,
- 0x08, 0xd8, 0x13, 0x02, 0x40, 0x77, 0x42, 0x03, 0x32, 0xc2, 0x40, 0x7d,
- 0x03, 0xc2, 0x40, 0x89, 0xc5, 0x35, 0x00, 0x00, 0x49, 0xe1, 0xcb, 0x1e,
- 0x17, 0x00, 0x48, 0x0b, 0x02, 0x40, 0x95, 0xd4, 0x39, 0x6d, 0x00, 0x48,
- 0x01, 0x15, 0xc2, 0x40, 0x99, 0xc8, 0xb9, 0x55, 0x05, 0x47, 0xc1, 0xd9,
- 0x1e, 0x10, 0x05, 0x47, 0xa1, 0xd0, 0x5a, 0x82, 0x00, 0x4b, 0x88, 0xc8,
- 0x6d, 0xbb, 0x00, 0x4a, 0x91, 0xc6, 0x6d, 0xbd, 0x00, 0x4a, 0x88, 0xd0,
- 0x5a, 0x72, 0x08, 0xd8, 0x08, 0x99, 0x00, 0x4a, 0x79, 0x97, 0x00, 0x4a,
- 0x61, 0x8b, 0x00, 0x4a, 0x41, 0x83, 0x00, 0x49, 0xf1, 0x9b, 0x05, 0x47,
- 0xf8, 0xc2, 0x1b, 0xd8, 0x00, 0x49, 0xd9, 0x87, 0x00, 0x49, 0xd0, 0x91,
- 0x00, 0x4a, 0x51, 0x87, 0x00, 0x4a, 0x30, 0x91, 0x00, 0x4a, 0x49, 0x87,
- 0x00, 0x4a, 0x29, 0xc6, 0xd3, 0xff, 0x00, 0x4a, 0xa8, 0x94, 0x00, 0x4a,
- 0x1b, 0x02, 0x40, 0xa5, 0x8e, 0x00, 0x4b, 0x12, 0x02, 0x40, 0xa9, 0x97,
- 0x00, 0x4a, 0x13, 0x02, 0x40, 0xad, 0x87, 0x00, 0x4a, 0xb0, 0x8b, 0x00,
- 0x4a, 0x00, 0x83, 0x00, 0x49, 0xc9, 0xc7, 0xc1, 0x72, 0x00, 0x4b, 0xd0,
- 0x83, 0x00, 0x49, 0xc1, 0xc2, 0x0c, 0x65, 0x00, 0x49, 0xb9, 0x0a, 0x42,
- 0x40, 0xb1, 0x83, 0x00, 0x49, 0xa9, 0x47, 0xac, 0xc2, 0x42, 0x40, 0xbb,
- 0x0e, 0xc2, 0x40, 0xc9, 0x83, 0x00, 0x49, 0x90, 0xc2, 0x02, 0x59, 0x00,
- 0x49, 0x89, 0x83, 0x00, 0x49, 0x81, 0xc2, 0x00, 0xa4, 0x00, 0x4a, 0xe8,
- 0x83, 0x00, 0x49, 0x79, 0xc2, 0x1d, 0x5f, 0x00, 0x4a, 0xf8, 0xc9, 0xb0,
- 0x60, 0x00, 0x4b, 0xc0, 0xc2, 0x00, 0xa4, 0x00, 0x49, 0x69, 0x83, 0x00,
- 0x49, 0x61, 0xc2, 0x00, 0xde, 0x00, 0x4b, 0xf8, 0xc2, 0x00, 0xa4, 0x00,
- 0x49, 0x59, 0x83, 0x00, 0x49, 0x50, 0x10, 0xc2, 0x40, 0xd3, 0x83, 0x00,
- 0x49, 0x41, 0xc2, 0x1d, 0x5f, 0x00, 0x48, 0xf1, 0xc2, 0x01, 0x29, 0x00,
- 0x48, 0xc8, 0xc2, 0x00, 0xa4, 0x00, 0x49, 0x39, 0x83, 0x00, 0x49, 0x31,
- 0x06, 0x42, 0x40, 0xdd, 0xc2, 0x00, 0xa4, 0x00, 0x49, 0x29, 0x83, 0x00,
- 0x49, 0x21, 0x16, 0x42, 0x40, 0xeb, 0xc2, 0x00, 0xa4, 0x00, 0x48, 0xe9,
- 0x83, 0x00, 0x48, 0xe1, 0xc2, 0x24, 0x58, 0x00, 0x4b, 0xe0, 0xc2, 0x00,
- 0xa4, 0x00, 0x48, 0xd9, 0x83, 0x00, 0x48, 0xd2, 0x02, 0x40, 0xf5, 0x0a,
- 0xc2, 0x40, 0xfb, 0x83, 0x00, 0x48, 0xb9, 0xc2, 0x01, 0x29, 0x00, 0x4b,
- 0xd9, 0xcb, 0x21, 0x79, 0x00, 0x4b, 0xe8, 0x0a, 0xc2, 0x41, 0x05, 0x83,
- 0x00, 0x48, 0xa8, 0x97, 0x00, 0x48, 0xa1, 0x8b, 0x00, 0x48, 0x81, 0x83,
- 0x00, 0x48, 0x31, 0x9b, 0x05, 0x47, 0xf1, 0x99, 0x00, 0x4b, 0xa8, 0x87,
- 0x00, 0x4b, 0x99, 0xc2, 0x1b, 0xd8, 0x00, 0x4b, 0xa0, 0x97, 0x00, 0x48,
- 0x53, 0x02, 0x41, 0x0f, 0x87, 0x00, 0x4b, 0xb0, 0x8b, 0x00, 0x48, 0x40,
- 0x83, 0x00, 0x4a, 0xd9, 0xc2, 0x00, 0xa4, 0x00, 0x4b, 0xc8, 0xc4, 0x22,
- 0x71, 0x00, 0x4b, 0x79, 0xc5, 0x01, 0xdb, 0x00, 0x4b, 0x71, 0x15, 0xc2,
- 0x41, 0x13, 0x08, 0xc2, 0x41, 0x1f, 0x16, 0xc2, 0x41, 0x2b, 0xc3, 0x01,
- 0xb4, 0x00, 0x4b, 0x39, 0xc4, 0x15, 0xd3, 0x00, 0x4b, 0x30, 0x45, 0x2b,
- 0xd3, 0xc2, 0x41, 0x37, 0x46, 0x2b, 0xff, 0xc2, 0x41, 0x4d, 0xc2, 0x0c,
- 0x81, 0x08, 0x20, 0x61, 0x11, 0xc2, 0x41, 0x63, 0xc2, 0x13, 0x1d, 0x08,
- 0x20, 0x71, 0xc3, 0x19, 0x4c, 0x08, 0x20, 0x79, 0x8a, 0x08, 0x20, 0x81,
- 0xc3, 0x70, 0xfe, 0x08, 0x20, 0x89, 0xc3, 0xaf, 0x0a, 0x08, 0x20, 0x91,
- 0x16, 0xc2, 0x41, 0x6b, 0xc3, 0x7f, 0x8b, 0x08, 0x20, 0xa1, 0xc4, 0x42,
- 0x8d, 0x08, 0x20, 0xa9, 0xc3, 0x2d, 0xfe, 0x08, 0x20, 0xb1, 0xc3, 0x73,
- 0xbb, 0x08, 0x20, 0xb9, 0xc3, 0x95, 0x7b, 0x08, 0x20, 0xc1, 0x07, 0xc2,
- 0x41, 0x77, 0xc3, 0x0e, 0xc8, 0x08, 0x20, 0xd1, 0x1c, 0x42, 0x41, 0x9f,
- 0x45, 0x2b, 0xd3, 0xc2, 0x41, 0xab, 0x46, 0x2b, 0xff, 0xc2, 0x41, 0xc1,
- 0xc2, 0x0c, 0x81, 0x08, 0x21, 0xa1, 0x11, 0xc2, 0x41, 0xd7, 0xc2, 0x13,
- 0x1d, 0x08, 0x21, 0xb1, 0xc3, 0x19, 0x4c, 0x08, 0x21, 0xb9, 0x8a, 0x08,
- 0x21, 0xc1, 0xc3, 0x70, 0xfe, 0x08, 0x21, 0xc9, 0xc3, 0xaf, 0x0a, 0x08,
- 0x21, 0xd1, 0x16, 0xc2, 0x41, 0xdf, 0xc3, 0x7f, 0x8b, 0x08, 0x21, 0xe1,
- 0xc4, 0x42, 0x8d, 0x08, 0x21, 0xe9, 0xc3, 0x2d, 0xfe, 0x08, 0x21, 0xf1,
- 0xc3, 0x73, 0xbb, 0x08, 0x21, 0xf9, 0xc3, 0x95, 0x7b, 0x08, 0x22, 0x01,
- 0x07, 0xc2, 0x41, 0xeb, 0xc3, 0x0e, 0xc8, 0x08, 0x22, 0x11, 0x1c, 0x42,
- 0x42, 0x13, 0xc4, 0x01, 0x1e, 0x01, 0x1e, 0x61, 0xc5, 0x01, 0xf7, 0x01,
- 0x1d, 0xf8, 0xc4, 0x01, 0x1e, 0x01, 0x1e, 0x59, 0xc5, 0x01, 0xf7, 0x01,
- 0x1d, 0xf0, 0xc4, 0x9a, 0xfb, 0x0e, 0x98, 0x21, 0xc5, 0x6d, 0x5c, 0x0e,
- 0x98, 0x18, 0xc9, 0x11, 0xdc, 0x01, 0x24, 0x81, 0xc5, 0x00, 0xaa, 0x0f,
- 0x88, 0x50, 0xc9, 0x11, 0xdc, 0x01, 0x24, 0x79, 0xc5, 0x00, 0xaa, 0x0f,
- 0x88, 0x48, 0x00, 0x42, 0x42, 0x1f, 0x00, 0x42, 0x42, 0x2b, 0x00, 0x42,
- 0x42, 0x37, 0x00, 0x42, 0x42, 0x43, 0x00, 0x42, 0x42, 0x4f, 0x00, 0x42,
- 0x42, 0x5b, 0xc9, 0x11, 0xdc, 0x01, 0x24, 0x41, 0xc5, 0x00, 0xaa, 0x0f,
- 0x88, 0x10, 0xc9, 0x11, 0xdc, 0x0f, 0x88, 0x01, 0xc5, 0x00, 0xaa, 0x0f,
- 0x88, 0x08, 0xc4, 0x22, 0x71, 0x08, 0xca, 0xc9, 0xc5, 0x01, 0xdb, 0x08,
- 0xca, 0xc1, 0x15, 0xc2, 0x42, 0x67, 0x08, 0xc2, 0x42, 0x73, 0x16, 0xc2,
- 0x42, 0x7f, 0xc3, 0x01, 0xb4, 0x08, 0xca, 0x89, 0xc4, 0x15, 0xd3, 0x08,
- 0xca, 0x80, 0x91, 0x08, 0xc9, 0xc1, 0x03, 0xc2, 0x42, 0x8b, 0x87, 0x08,
- 0xc9, 0xa9, 0x97, 0x08, 0xc9, 0x9b, 0x02, 0x42, 0x93, 0x8b, 0x08, 0xc9,
- 0x8a, 0x02, 0x42, 0x97, 0xc2, 0x00, 0xc7, 0x08, 0xc9, 0x71, 0x83, 0x08,
- 0xc9, 0x40, 0x83, 0x08, 0xc9, 0x61, 0xc2, 0x0c, 0x65, 0x08, 0xc9, 0x59,
- 0xc2, 0x00, 0xa4, 0x08, 0xc9, 0x50, 0xc2, 0x1d, 0x5f, 0x08, 0xc9, 0x31,
- 0x83, 0x08, 0xc9, 0x28, 0xc2, 0x00, 0xa4, 0x08, 0xc9, 0x19, 0x83, 0x08,
- 0xc9, 0x10, 0xc2, 0x00, 0xa4, 0x08, 0xc9, 0x09, 0x83, 0x08, 0xc9, 0x00,
- 0x83, 0x08, 0xc8, 0xf9, 0xc2, 0x00, 0xc1, 0x08, 0xc8, 0xd1, 0xc2, 0x1d,
- 0x5f, 0x08, 0xc8, 0xa9, 0xc2, 0x01, 0x29, 0x08, 0xc8, 0x80, 0xc2, 0x00,
- 0xa4, 0x08, 0xc8, 0xf1, 0x83, 0x08, 0xc8, 0xe9, 0x06, 0x42, 0x42, 0x9b,
- 0xc2, 0x00, 0xa4, 0x08, 0xc8, 0xe1, 0x83, 0x08, 0xc8, 0xd9, 0xc2, 0x00,
- 0xb3, 0x08, 0xc8, 0xb0, 0xc2, 0x00, 0xa4, 0x08, 0xc8, 0x91, 0x83, 0x08,
- 0xc8, 0x88, 0xc2, 0x00, 0xa4, 0x08, 0xc8, 0x79, 0x83, 0x08, 0xc8, 0x70,
- 0xc2, 0x00, 0xa4, 0x08, 0xc8, 0x69, 0x83, 0x08, 0xc8, 0x60, 0x97, 0x08,
- 0xc8, 0x28, 0x8b, 0x08, 0xc8, 0x18, 0x83, 0x08, 0xc8, 0x08, 0xc4, 0x00,
- 0xe3, 0x01, 0x10, 0xa9, 0xc3, 0x00, 0xcc, 0x00, 0x07, 0xb8, 0xc4, 0x22,
- 0x71, 0x01, 0x3c, 0x91, 0xc5, 0x01, 0xdb, 0x01, 0x3c, 0x89, 0x15, 0xc2,
- 0x42, 0xa5, 0x08, 0xc2, 0x42, 0xb1, 0x16, 0xc2, 0x42, 0xbd, 0xc3, 0x01,
- 0xb4, 0x01, 0x3c, 0x51, 0xc4, 0x15, 0xd3, 0x0f, 0x88, 0x60, 0xc4, 0x18,
- 0x83, 0x01, 0x3b, 0xe1, 0xc2, 0x26, 0x51, 0x01, 0x3b, 0xd8, 0xc3, 0x0c,
- 0x5b, 0x01, 0x3b, 0xd1, 0xc3, 0x06, 0x9e, 0x01, 0x3b, 0xc8, 0xc4, 0x04,
- 0x5e, 0x01, 0x3b, 0xc1, 0xc2, 0x01, 0x47, 0x01, 0x3b, 0xb8, 0xc4, 0x18,
- 0x83, 0x01, 0x3c, 0x31, 0xc2, 0x26, 0x51, 0x01, 0x3c, 0x28, 0xc3, 0x0c,
- 0x5b, 0x01, 0x3c, 0x21, 0xc3, 0x06, 0x9e, 0x01, 0x3c, 0x18, 0xc4, 0x04,
- 0x5e, 0x01, 0x3c, 0x11, 0xc2, 0x01, 0x47, 0x01, 0x3c, 0x08, 0xcf, 0x6b,
- 0x2d, 0x01, 0x58, 0xb1, 0xd0, 0x5c, 0x22, 0x01, 0x58, 0xb9, 0xce, 0x72,
- 0x31, 0x01, 0x58, 0xc1, 0xd1, 0x50, 0xea, 0x01, 0x58, 0xc8, 0xc9, 0x37,
- 0x1e, 0x0f, 0xc8, 0x50, 0xc9, 0x37, 0x1e, 0x0f, 0xc8, 0x58, 0x42, 0x00,
- 0x5b, 0xc2, 0x42, 0xc9, 0x42, 0x01, 0x47, 0x42, 0x42, 0xd5, 0xcf, 0x5c,
- 0xe3, 0x0f, 0xc2, 0x99, 0xcc, 0x84, 0xdc, 0x0f, 0xc1, 0xd8, 0x45, 0x10,
- 0x38, 0xc2, 0x42, 0xe1, 0x51, 0x02, 0xd1, 0x42, 0x42, 0xed, 0xc4, 0x01,
- 0x10, 0x01, 0x0c, 0x9b, 0x02, 0x42, 0xf9, 0xc5, 0xd9, 0xcb, 0x01, 0x70,
- 0xa0, 0xda, 0x1d, 0x79, 0x0f, 0xc4, 0xb8, 0xcb, 0x82, 0xb5, 0x01, 0x0f,
- 0x19, 0xcb, 0x8c, 0x21, 0x01, 0x0e, 0x98, 0xc5, 0x01, 0x0f, 0x01, 0x58,
- 0x39, 0xd3, 0x42, 0x6c, 0x01, 0x5c, 0x58, 0xa3, 0x0f, 0x82, 0x99, 0x9d,
- 0x0f, 0x82, 0x69, 0x9e, 0x0f, 0x82, 0x71, 0x9f, 0x0f, 0x82, 0x79, 0xa0,
- 0x0f, 0x82, 0x81, 0xa1, 0x0f, 0x82, 0x89, 0xa2, 0x0f, 0x82, 0x90, 0xa3,
- 0x0f, 0x81, 0xf1, 0xa1, 0x0f, 0x81, 0xe1, 0x9d, 0x0f, 0x81, 0xc1, 0x9e,
- 0x0f, 0x81, 0xc9, 0x9f, 0x0f, 0x81, 0xd1, 0xa0, 0x0f, 0x81, 0xd9, 0xa2,
- 0x0f, 0x81, 0xe8, 0xa0, 0x0f, 0x81, 0xa1, 0x9f, 0x0f, 0x81, 0x99, 0x9e,
- 0x0f, 0x81, 0x91, 0x9d, 0x0f, 0x81, 0x89, 0xa1, 0x0f, 0x81, 0xa9, 0xa2,
- 0x0f, 0x81, 0xb1, 0xa3, 0x0f, 0x81, 0xb8, 0x9d, 0x0f, 0x81, 0xf9, 0x9e,
- 0x0f, 0x82, 0x01, 0x9f, 0x0f, 0x82, 0x09, 0xa0, 0x0f, 0x82, 0x11, 0xa1,
- 0x0f, 0x82, 0x19, 0xa2, 0x0f, 0x82, 0x21, 0xa3, 0x0f, 0x82, 0x28, 0x9d,
- 0x0f, 0x82, 0x31, 0x9e, 0x0f, 0x82, 0x39, 0x9f, 0x0f, 0x82, 0x41, 0xa0,
- 0x0f, 0x82, 0x49, 0xa1, 0x0f, 0x82, 0x51, 0xa2, 0x0f, 0x82, 0x59, 0xa3,
- 0x0f, 0x82, 0x60, 0x9d, 0x0f, 0x82, 0xa1, 0x9e, 0x0f, 0x82, 0xa9, 0x9f,
- 0x0f, 0x82, 0xb1, 0xa0, 0x0f, 0x82, 0xb9, 0xa1, 0x0f, 0x82, 0xc1, 0xa2,
- 0x0f, 0x82, 0xc9, 0xa3, 0x0f, 0x82, 0xd0, 0x9d, 0x0f, 0x82, 0xd9, 0x9e,
- 0x0f, 0x82, 0xe1, 0x9f, 0x0f, 0x82, 0xe9, 0xa0, 0x0f, 0x82, 0xf1, 0xa1,
- 0x0f, 0x82, 0xf9, 0xa2, 0x0f, 0x83, 0x01, 0xa3, 0x0f, 0x83, 0x08, 0x9d,
- 0x0f, 0x83, 0x19, 0x9e, 0x0f, 0x83, 0x21, 0x9f, 0x0f, 0x83, 0x29, 0xa0,
- 0x0f, 0x83, 0x31, 0xa1, 0x0f, 0x83, 0x39, 0xa2, 0x0f, 0x83, 0x41, 0xa3,
- 0x0f, 0x83, 0x48, 0x9d, 0x0f, 0x83, 0x51, 0x9e, 0x0f, 0x83, 0x59, 0x9f,
- 0x0f, 0x83, 0x61, 0xa0, 0x0f, 0x83, 0x69, 0xa1, 0x0f, 0x83, 0x71, 0xa2,
- 0x0f, 0x83, 0x79, 0xa3, 0x0f, 0x83, 0x80, 0x9d, 0x0f, 0x83, 0x89, 0x9e,
- 0x0f, 0x83, 0x91, 0x9f, 0x0f, 0x83, 0x99, 0xa0, 0x0f, 0x83, 0xa1, 0xa1,
- 0x0f, 0x83, 0xa9, 0xa2, 0x0f, 0x83, 0xb1, 0xa3, 0x0f, 0x83, 0xb8, 0x9d,
- 0x0f, 0x83, 0xc1, 0x9e, 0x0f, 0x83, 0xc9, 0x9f, 0x0f, 0x83, 0xd1, 0xa0,
- 0x0f, 0x83, 0xd9, 0xa1, 0x0f, 0x83, 0xe1, 0xa2, 0x0f, 0x83, 0xe9, 0xa3,
- 0x0f, 0x83, 0xf0, 0x9d, 0x0f, 0x83, 0xf9, 0x9e, 0x0f, 0x84, 0x01, 0x9f,
- 0x0f, 0x84, 0x09, 0xa0, 0x0f, 0x84, 0x11, 0xa1, 0x0f, 0x84, 0x19, 0xa2,
- 0x0f, 0x84, 0x21, 0xa3, 0x0f, 0x84, 0x28, 0x9e, 0x0f, 0x84, 0x39, 0x9f,
- 0x0f, 0x84, 0x41, 0xa0, 0x0f, 0x84, 0x49, 0xa1, 0x0f, 0x84, 0x51, 0xa2,
- 0x0f, 0x84, 0x59, 0xa3, 0x0f, 0x84, 0x61, 0x9d, 0x0f, 0x84, 0x30, 0x9d,
- 0x0f, 0x84, 0x69, 0x9e, 0x0f, 0x84, 0x71, 0x9f, 0x0f, 0x84, 0x79, 0xa0,
- 0x0f, 0x84, 0x81, 0xa1, 0x0f, 0x84, 0x89, 0xa2, 0x0f, 0x84, 0x91, 0xa3,
- 0x0f, 0x84, 0x98, 0xc9, 0xab, 0x35, 0x01, 0x3d, 0xf9, 0x47, 0x20, 0x56,
- 0xc2, 0x42, 0xfd, 0xca, 0x9c, 0x12, 0x01, 0x53, 0xa0, 0xc3, 0x00, 0xe8,
- 0x01, 0x1f, 0xc3, 0x02, 0x43, 0x09, 0xc4, 0x00, 0x8a, 0x01, 0x00, 0xb0,
- 0xc4, 0x13, 0x8a, 0x01, 0x16, 0x99, 0xc6, 0xc9, 0x3e, 0x01, 0x57, 0x58,
- 0xc8, 0x08, 0x9f, 0x01, 0x16, 0x91, 0xc4, 0x1e, 0xe4, 0x01, 0x11, 0x60,
- 0x17, 0xc2, 0x43, 0x0d, 0x46, 0x1e, 0xfc, 0xc2, 0x43, 0x25, 0x16, 0xc2,
- 0x43, 0x31, 0xcf, 0x61, 0xbe, 0x01, 0x57, 0xe8, 0x14, 0xc2, 0x43, 0x3d,
- 0xc3, 0x26, 0x13, 0x01, 0x4f, 0xd0, 0xc5, 0xd5, 0x20, 0x01, 0x01, 0x09,
- 0xc8, 0x36, 0xa7, 0x01, 0x57, 0x50, 0xdd, 0x0f, 0xbb, 0x01, 0x00, 0xf9,
- 0xc5, 0x5a, 0xa3, 0x01, 0x72, 0x00, 0x11, 0xc2, 0x43, 0x4c, 0xdc, 0x12,
- 0x76, 0x01, 0x4c, 0xa8, 0xc9, 0x0a, 0x4a, 0x01, 0x55, 0x0b, 0x02, 0x43,
- 0x56, 0xcc, 0x00, 0x9b, 0x01, 0x55, 0x10, 0x47, 0xca, 0xe1, 0xc2, 0x43,
- 0x5c, 0xcf, 0x6a, 0xc4, 0x01, 0x0a, 0x01, 0x48, 0x07, 0x17, 0xc2, 0x43,
- 0x68, 0x46, 0x04, 0x73, 0x42, 0x43, 0x8d, 0x4c, 0x21, 0xc0, 0xc2, 0x43,
- 0x99, 0x48, 0x03, 0x1a, 0x42, 0x43, 0xa5, 0xc4, 0x0f, 0x7c, 0x08, 0xc1,
- 0xc9, 0xc5, 0x44, 0x7b, 0x08, 0xc1, 0xc0, 0x97, 0x08, 0xc1, 0xb1, 0x8b,
- 0x08, 0xc1, 0xa1, 0x83, 0x08, 0xc1, 0x60, 0x94, 0x08, 0xc1, 0x90, 0x97,
- 0x08, 0xc1, 0x80, 0x8b, 0x08, 0xc1, 0x70, 0xc2, 0x02, 0x59, 0x08, 0xc1,
- 0x59, 0x83, 0x08, 0xc1, 0x20, 0x83, 0x08, 0xc1, 0x49, 0xc2, 0x0c, 0x65,
- 0x08, 0xc1, 0x41, 0xc2, 0x00, 0xa4, 0x08, 0xc1, 0x38, 0xc2, 0x00, 0xa4,
- 0x08, 0xc1, 0x09, 0x83, 0x08, 0xc1, 0x00, 0xc2, 0x00, 0xa4, 0x08, 0xc0,
- 0xf9, 0x83, 0x08, 0xc0, 0xf0, 0x83, 0x08, 0xc0, 0xe9, 0xc2, 0x00, 0xc1,
- 0x08, 0xc0, 0xc1, 0xc2, 0x1d, 0x5f, 0x08, 0xc0, 0x99, 0xc2, 0x01, 0x29,
- 0x08, 0xc0, 0x70, 0xc2, 0x00, 0xa4, 0x08, 0xc0, 0xe1, 0x83, 0x08, 0xc0,
- 0xd9, 0x06, 0x42, 0x43, 0xb7, 0xc2, 0x00, 0xa4, 0x08, 0xc0, 0xd1, 0x83,
- 0x08, 0xc0, 0xc9, 0x16, 0x42, 0x43, 0xc1, 0xc2, 0x00, 0xa4, 0x08, 0xc0,
- 0x91, 0x83, 0x08, 0xc0, 0x88, 0xc2, 0x00, 0xa4, 0x08, 0xc0, 0x81, 0x83,
- 0x08, 0xc0, 0x78, 0xc2, 0x00, 0xa4, 0x08, 0xc0, 0x69, 0x83, 0x08, 0xc0,
- 0x60, 0xc2, 0x00, 0xa4, 0x08, 0xc0, 0x59, 0x83, 0x08, 0xc0, 0x50, 0x97,
- 0x08, 0xc0, 0x49, 0x8b, 0x08, 0xc0, 0x39, 0x83, 0x08, 0xc0, 0x08, 0x97,
- 0x08, 0xc0, 0x28, 0x8b, 0x08, 0xc0, 0x18, 0x03, 0xc2, 0x43, 0xcb, 0xc8,
- 0x03, 0x3b, 0x0d, 0xe4, 0xc3, 0x02, 0x43, 0xd7, 0xc4, 0x53, 0x38, 0x0d,
- 0xe4, 0xb9, 0x0e, 0xc2, 0x43, 0xdd, 0xc6, 0x01, 0x7a, 0x0d, 0xe4, 0xa9,
- 0xc3, 0x01, 0x4a, 0x0d, 0xe4, 0xa1, 0xc5, 0x1f, 0x94, 0x0d, 0xe4, 0x91,
- 0xcb, 0x95, 0x2e, 0x0d, 0xe4, 0x88, 0xc7, 0x27, 0x22, 0x0d, 0xe3, 0xa8,
- 0xc3, 0x00, 0x8b, 0x0d, 0xe4, 0x31, 0xc9, 0xb1, 0x1d, 0x0d, 0xe4, 0x18,
- 0xc5, 0xd9, 0x58, 0x0d, 0xe3, 0xc3, 0x02, 0x43, 0xe9, 0xc2, 0x00, 0x31,
- 0x0d, 0xe3, 0xc8, 0x99, 0x0d, 0xe3, 0x00, 0xc3, 0x00, 0xc9, 0x0d, 0xe1,
- 0xb9, 0x95, 0x0d, 0xe1, 0xb0, 0x92, 0x0d, 0xe1, 0xa3, 0x02, 0x43, 0xef,
- 0x96, 0x0d, 0xe1, 0x93, 0x02, 0x43, 0xf5, 0x8c, 0x0d, 0xe1, 0x03, 0x02,
- 0x43, 0xfb, 0x95, 0x0d, 0xe1, 0x51, 0xc8, 0x37, 0x1f, 0x0d, 0xe1, 0x2b,
- 0x02, 0x44, 0x01, 0x8d, 0x0d, 0xe1, 0xfb, 0x02, 0x44, 0x07, 0x8f, 0x0d,
- 0xe1, 0xe1, 0x90, 0x0d, 0xe1, 0xd8, 0x8c, 0x0d, 0xe0, 0xa9, 0xc2, 0x44,
- 0x76, 0x0d, 0xe0, 0x91, 0x11, 0xc2, 0x44, 0x0d, 0xc2, 0x00, 0x57, 0x0d,
- 0xe3, 0x41, 0x07, 0xc2, 0x44, 0x15, 0x97, 0x0d, 0xe2, 0xc0, 0x90, 0x0d,
- 0xe1, 0x83, 0x02, 0x44, 0x21, 0x95, 0x0d, 0xe1, 0x4b, 0x02, 0x44, 0x27,
- 0x8f, 0x0d, 0xe0, 0xfb, 0x02, 0x44, 0x2d, 0xc8, 0x37, 0x1f, 0x0d, 0xe1,
- 0x1a, 0x02, 0x44, 0x33, 0x8f, 0x0d, 0xe0, 0xf3, 0x02, 0x44, 0x39, 0x95,
- 0x0d, 0xe1, 0x41, 0xc8, 0x37, 0x1f, 0x0d, 0xe1, 0x10, 0x83, 0x0d, 0xe3,
- 0x21, 0x8b, 0x0d, 0xe3, 0x19, 0x91, 0x0d, 0xe3, 0x11, 0x97, 0x0d, 0xe3,
- 0x08, 0x90, 0x0d, 0xe0, 0xeb, 0x02, 0x44, 0x3f, 0x95, 0x0d, 0xe1, 0x39,
- 0xc8, 0x37, 0x1f, 0x0d, 0xe1, 0x08, 0x97, 0x0d, 0xe2, 0xb1, 0x8b, 0x0d,
- 0xe2, 0x68, 0x97, 0x0d, 0xe2, 0xa9, 0x8b, 0x0d, 0xe2, 0x78, 0x8f, 0x0d,
- 0xe0, 0x79, 0xc3, 0x00, 0xc9, 0x0d, 0xe1, 0xe8, 0x8f, 0x0d, 0xe3, 0x31,
- 0x90, 0x0d, 0xe3, 0x28, 0xc7, 0x19, 0xed, 0x00, 0x04, 0x69, 0xde, 0x0e,
- 0x8e, 0x0f, 0xbe, 0x40, 0x00, 0x42, 0x44, 0x45, 0xcf, 0x0b, 0x98, 0x01,
- 0x5a, 0x09, 0xd0, 0x01, 0xf7, 0x01, 0x5a, 0x38, 0xda, 0x1d, 0x45, 0x01,
- 0x30, 0xc9, 0xdf, 0x0c, 0x66, 0x0f, 0xac, 0x89, 0xca, 0x3e, 0xd2, 0x01,
- 0x5f, 0xf0, 0xc4, 0x21, 0x13, 0x01, 0x11, 0xeb, 0x02, 0x44, 0x57, 0xcb,
- 0x92, 0xf2, 0x01, 0x01, 0xb9, 0x46, 0xd1, 0xc5, 0x42, 0x44, 0x5d, 0xd3,
- 0x43, 0x89, 0x01, 0x0a, 0x19, 0xc8, 0x4f, 0x30, 0x01, 0x02, 0x78, 0xcb,
- 0x8f, 0xae, 0x01, 0x02, 0x59, 0xc4, 0x18, 0x48, 0x01, 0x01, 0xa8, 0xc5,
- 0x18, 0x47, 0x01, 0x01, 0xb3, 0x02, 0x44, 0x69, 0xcf, 0x69, 0x7a, 0x01,
- 0x57, 0x68, 0xce, 0x50, 0x87, 0x01, 0x4d, 0x28, 0xca, 0xa8, 0x4c, 0x01,
- 0x33, 0xc9, 0xca, 0xa9, 0x14, 0x01, 0x33, 0xc1, 0xca, 0xa3, 0x2e, 0x01,
- 0x33, 0xb9, 0xca, 0xa5, 0x36, 0x01, 0x33, 0xb1, 0xca, 0xa4, 0x8c, 0x01,
- 0x33, 0xa9, 0xca, 0xa7, 0xd4, 0x01, 0x33, 0xa1, 0xca, 0x9b, 0xcc, 0x01,
- 0x33, 0x98, 0x83, 0x05, 0x4a, 0x71, 0x97, 0x05, 0x4a, 0x68, 0x97, 0x05,
- 0x4a, 0x61, 0x8b, 0x05, 0x4a, 0x50, 0xc2, 0x24, 0x58, 0x05, 0x4a, 0x29,
- 0x83, 0x05, 0x49, 0xd8, 0xc2, 0x01, 0x29, 0x05, 0x4a, 0x19, 0x83, 0x05,
- 0x49, 0x90, 0xd1, 0x40, 0x0e, 0x0f, 0xdc, 0x59, 0xd0, 0x06, 0xd7, 0x01,
- 0x16, 0x60, 0x00, 0x42, 0x44, 0x6f, 0xd3, 0x01, 0x94, 0x01, 0x00, 0xc9,
- 0xd0, 0x5f, 0xf2, 0x01, 0x71, 0x38, 0xca, 0x71, 0x00, 0x0f, 0xaf, 0x49,
- 0xc4, 0x22, 0xdc, 0x0f, 0xab, 0x42, 0x02, 0x44, 0x87, 0x49, 0x01, 0x11,
- 0xc2, 0x44, 0x8d, 0xd6, 0x12, 0x7c, 0x01, 0x4c, 0xa0, 0x09, 0xc2, 0x44,
- 0x99, 0x42, 0x00, 0x59, 0x42, 0x44, 0xa8, 0xcc, 0x01, 0xdb, 0x01, 0x2c,
- 0xa9, 0xcd, 0x19, 0x0b, 0x0f, 0xdc, 0x38, 0x42, 0x01, 0xe2, 0xc2, 0x44,
- 0xb4, 0xcc, 0x06, 0x1b, 0x0f, 0xdc, 0x69, 0xcb, 0x8f, 0xf0, 0x0f, 0xdd,
- 0x99, 0xc6, 0xa1, 0x0c, 0x0f, 0xdd, 0xd0, 0x00, 0x42, 0x44, 0xc0, 0xca,
- 0xa3, 0x6a, 0x01, 0x1d, 0x01, 0xc9, 0x4f, 0x27, 0x01, 0x1c, 0xf9, 0xca,
- 0xa0, 0xf4, 0x01, 0x1c, 0xf0, 0xc7, 0xa9, 0xc6, 0x01, 0x4b, 0xe9, 0xd0,
- 0x49, 0x28, 0x0f, 0xdc, 0x48, 0x44, 0x01, 0xe9, 0xc2, 0x44, 0xd2, 0xd3,
- 0x43, 0x76, 0x01, 0x70, 0x50, 0xcc, 0x8c, 0x50, 0x0f, 0xaf, 0x69, 0x44,
- 0x04, 0x5f, 0xc2, 0x44, 0xe1, 0xde, 0x08, 0x29, 0x0f, 0xde, 0x18, 0xce,
- 0x00, 0xb0, 0x01, 0x00, 0xe9, 0xcc, 0x8a, 0xb8, 0x01, 0x4e, 0xd9, 0x03,
- 0xc2, 0x44, 0xed, 0xcb, 0x1a, 0x3f, 0x01, 0x71, 0x48, 0xcb, 0x1a, 0x3f,
- 0x01, 0x4c, 0x31, 0x05, 0xc2, 0x44, 0xf9, 0xd2, 0x23, 0xbe, 0x01, 0x80,
- 0xb9, 0xd6, 0x0a, 0x88, 0x01, 0x80, 0xc9, 0xce, 0x24, 0xb2, 0x01, 0x80,
- 0xd8, 0x00, 0x42, 0x45, 0x05, 0x45, 0x01, 0x93, 0xc2, 0x45, 0x11, 0x44,
- 0x17, 0x34, 0x42, 0x45, 0x1d, 0xcd, 0x7e, 0x0b, 0x01, 0x0d, 0x01, 0x48,
- 0x01, 0xef, 0x42, 0x45, 0x29, 0xcb, 0x6c, 0xe6, 0x01, 0x0e, 0xe9, 0xca,
- 0x84, 0xde, 0x0f, 0xc1, 0xd0, 0xd0, 0x58, 0x92, 0x0f, 0xc2, 0x11, 0xc5,
- 0x01, 0x0f, 0x0f, 0xc2, 0x30, 0x46, 0x02, 0xd2, 0xc2, 0x45, 0x35, 0xc2,
- 0x00, 0x58, 0x0f, 0xd7, 0x88, 0x45, 0x00, 0x6c, 0xc2, 0x45, 0x41, 0x16,
- 0xc2, 0x45, 0x7d, 0xd4, 0x3e, 0x31, 0x01, 0x0e, 0x21, 0xc8, 0xb2, 0xf2,
- 0x01, 0x0d, 0x33, 0x02, 0x45, 0x89, 0x03, 0x42, 0x45, 0x8f, 0xc5, 0x01,
- 0x0f, 0x01, 0x0e, 0x93, 0x02, 0x45, 0x9b, 0xca, 0x52, 0x78, 0x01, 0x48,
- 0x68, 0xd3, 0x42, 0x6c, 0x01, 0x5c, 0x51, 0xc5, 0x01, 0x0f, 0x01, 0x5c,
- 0xa8, 0xca, 0x52, 0x12, 0x00, 0x7e, 0xb8, 0xc7, 0x0c, 0x4b, 0x01, 0x0b,
- 0x6b, 0x02, 0x45, 0xa5, 0xc8, 0x50, 0x00, 0x01, 0x0b, 0x7a, 0x02, 0x45,
- 0xab, 0xc3, 0x43, 0xcd, 0x01, 0x0b, 0x63, 0x02, 0x45, 0xb1, 0xc2, 0x00,
- 0x7b, 0x01, 0x0b, 0x22, 0x02, 0x45, 0xb5, 0xca, 0xa8, 0x2e, 0x01, 0x0c,
- 0x28, 0xc9, 0x4f, 0xff, 0x01, 0x0c, 0x10, 0xc4, 0x21, 0x31, 0x01, 0x0b,
- 0x59, 0x91, 0x01, 0x0b, 0x08, 0xc8, 0xb8, 0x4d, 0x08, 0x0c, 0x81, 0xc8,
- 0x40, 0x2a, 0x08, 0x0c, 0x98, 0x44, 0x1a, 0x0d, 0xc2, 0x45, 0xb9, 0xcf,
- 0x0c, 0x76, 0x0f, 0xac, 0x80, 0xc8, 0x0c, 0x4a, 0x08, 0x73, 0xc1, 0xc2,
- 0x0c, 0x57, 0x08, 0x73, 0x78, 0xc8, 0x0c, 0x4a, 0x08, 0x73, 0xb9, 0xc2,
- 0x0c, 0x57, 0x08, 0x73, 0x70, 0xca, 0x37, 0x4d, 0x08, 0x73, 0xb1, 0xc3,
- 0x43, 0xcd, 0x08, 0x73, 0x68, 0xca, 0xa2, 0xac, 0x08, 0x73, 0xa9, 0xc3,
- 0x0c, 0x56, 0x08, 0x73, 0x60, 0xcb, 0x13, 0xe3, 0x08, 0x73, 0xa1, 0xc4,
- 0x0c, 0x55, 0x08, 0x73, 0x58, 0xc9, 0x18, 0x78, 0x08, 0x73, 0x99, 0xc4,
- 0x18, 0x85, 0x08, 0x73, 0x50, 0x4d, 0x7a, 0xf2, 0xc2, 0x45, 0xbf, 0xcd,
- 0x78, 0x0d, 0x00, 0xb5, 0x00, 0x91, 0x00, 0xb7, 0x99, 0xce, 0x6d, 0xa7,
- 0x00, 0xb6, 0xf9, 0xc5, 0xd9, 0x12, 0x00, 0xb6, 0xa9, 0x90, 0x00, 0xb5,
- 0x81, 0x87, 0x00, 0xb5, 0x79, 0xc3, 0x01, 0x73, 0x00, 0xb5, 0x48, 0x8a,
- 0x00, 0xb7, 0x93, 0x02, 0x45, 0xd5, 0xc3, 0x14, 0xa9, 0x00, 0xb7, 0x29,
- 0xd6, 0x30, 0x77, 0x00, 0xb6, 0x59, 0xc7, 0xc9, 0xf3, 0x00, 0xb6, 0x50,
- 0x43, 0x3f, 0x7a, 0x42, 0x45, 0xdb, 0xcb, 0x9a, 0x35, 0x00, 0xb7, 0x41,
- 0xc2, 0x00, 0xbf, 0x00, 0xb7, 0x09, 0xc2, 0x00, 0x35, 0x00, 0xb6, 0xeb,
- 0x02, 0x45, 0xe5, 0xc7, 0xc2, 0xe5, 0x00, 0xb6, 0x39, 0xcc, 0x82, 0x24,
- 0x00, 0xb6, 0x08, 0x4b, 0x30, 0x7d, 0xc2, 0x45, 0xeb, 0xd1, 0x50, 0xa6,
- 0x00, 0xb6, 0xd0, 0x07, 0xc2, 0x46, 0x09, 0xc3, 0x3d, 0x8f, 0x00, 0xb7,
- 0x19, 0xc6, 0xd3, 0x63, 0x00, 0xb7, 0x10, 0xc2, 0x00, 0xbb, 0x00, 0xb7,
- 0x01, 0xc9, 0xa9, 0x6a, 0x00, 0xb6, 0xb1, 0xc2, 0x00, 0x35, 0x00, 0xb5,
- 0xb1, 0xc2, 0x00, 0x6e, 0x00, 0xb5, 0x38, 0xcb, 0x96, 0x78, 0x00, 0xb6,
- 0xf1, 0x46, 0xcc, 0x37, 0x42, 0x46, 0x13, 0xce, 0x74, 0x99, 0x00, 0xb6,
- 0x79, 0xd3, 0x40, 0x45, 0x00, 0xb5, 0x30, 0xca, 0xa6, 0x58, 0x00, 0xb6,
- 0x49, 0xc3, 0x22, 0x39, 0x00, 0xb5, 0x59, 0xc3, 0x16, 0x96, 0x00, 0xb5,
- 0x51, 0xc6, 0xcd, 0x4b, 0x00, 0xb5, 0x40, 0x07, 0xc2, 0x46, 0x1f, 0xc2,
- 0x00, 0xbb, 0x00, 0xb5, 0xc0, 0xc5, 0xd5, 0x39, 0x00, 0xb5, 0xd9, 0xc6,
- 0xcf, 0xd3, 0x00, 0xb5, 0xd0, 0xcb, 0x99, 0x64, 0x00, 0xb5, 0xc8, 0x94,
- 0x00, 0xb5, 0x18, 0x87, 0x05, 0x28, 0x03, 0x02, 0x46, 0x29, 0x90, 0x05,
- 0x2f, 0x10, 0x87, 0x05, 0x2f, 0x23, 0x02, 0x46, 0x2d, 0x8b, 0x05, 0x29,
- 0x33, 0x02, 0x46, 0x35, 0x83, 0x05, 0x2a, 0x63, 0x02, 0x46, 0x39, 0x91,
- 0x05, 0x2d, 0xeb, 0x02, 0x46, 0x3d, 0x97, 0x05, 0x2c, 0xba, 0x02, 0x46,
- 0x45, 0x87, 0x05, 0x2f, 0x33, 0x02, 0x46, 0x49, 0x8b, 0x05, 0x29, 0x43,
- 0x02, 0x46, 0x54, 0x83, 0x05, 0x2a, 0x73, 0x02, 0x46, 0x58, 0x91, 0x05,
- 0x2d, 0xfb, 0x02, 0x46, 0x5c, 0x97, 0x05, 0x2c, 0xca, 0x02, 0x46, 0x67,
- 0x87, 0x05, 0x2f, 0x43, 0x02, 0x46, 0x6b, 0x8b, 0x05, 0x29, 0x51, 0x83,
- 0x05, 0x2a, 0x81, 0x91, 0x05, 0x2e, 0x0b, 0x02, 0x46, 0x6f, 0x97, 0x05,
- 0x2c, 0xd8, 0x0a, 0xc2, 0x46, 0x73, 0x87, 0x05, 0x2f, 0x53, 0x02, 0x46,
- 0x8d, 0x8b, 0x05, 0x29, 0x61, 0x83, 0x05, 0x2a, 0x91, 0x91, 0x05, 0x2e,
- 0x1b, 0x02, 0x46, 0x91, 0x97, 0x05, 0x2c, 0xe8, 0x04, 0xc2, 0x46, 0x95,
- 0x42, 0x8b, 0x90, 0xc2, 0x46, 0xaf, 0x87, 0x05, 0x30, 0x43, 0x02, 0x46,
- 0xc9, 0x8b, 0x05, 0x2a, 0x31, 0x83, 0x05, 0x2b, 0x71, 0x91, 0x05, 0x2e,
- 0xf3, 0x02, 0x46, 0xcd, 0x97, 0x05, 0x2d, 0xb8, 0x12, 0xc2, 0x46, 0xd1,
- 0x87, 0x05, 0x30, 0x1b, 0x02, 0x46, 0xee, 0x8b, 0x05, 0x2a, 0x19, 0x83,
- 0x05, 0x2b, 0x53, 0x02, 0x46, 0xf2, 0x91, 0x05, 0x2e, 0xdb, 0x02, 0x46,
- 0xf6, 0x97, 0x05, 0x2d, 0xa0, 0x04, 0xc2, 0x46, 0xfa, 0x87, 0x05, 0x30,
- 0x33, 0x02, 0x47, 0x14, 0x8b, 0x05, 0x2a, 0x29, 0x83, 0x05, 0x2b, 0x69,
- 0x91, 0x05, 0x2e, 0xeb, 0x02, 0x47, 0x1c, 0x97, 0x05, 0x2d, 0xb0, 0x87,
- 0x05, 0x2f, 0x8b, 0x02, 0x47, 0x20, 0x8b, 0x05, 0x29, 0x89, 0x83, 0x05,
- 0x2a, 0xc1, 0x91, 0x05, 0x2e, 0x4b, 0x02, 0x47, 0x24, 0x97, 0x05, 0x2d,
- 0x10, 0x87, 0x05, 0x2f, 0x93, 0x02, 0x47, 0x28, 0x8b, 0x05, 0x29, 0x91,
- 0x83, 0x05, 0x2a, 0xc9, 0x91, 0x05, 0x2e, 0x53, 0x02, 0x47, 0x2c, 0x97,
- 0x05, 0x2d, 0x18, 0x87, 0x05, 0x2f, 0x9b, 0x02, 0x47, 0x30, 0x0a, 0xc2,
- 0x47, 0x34, 0x8b, 0x05, 0x29, 0x99, 0x83, 0x05, 0x2a, 0xd1, 0x91, 0x05,
- 0x2e, 0x5b, 0x02, 0x47, 0x4e, 0x97, 0x05, 0x2d, 0x20, 0x0a, 0xc2, 0x47,
- 0x52, 0x87, 0x05, 0x2f, 0xcb, 0x02, 0x47, 0x70, 0x8b, 0x05, 0x29, 0xc9,
- 0x83, 0x05, 0x2b, 0x01, 0x91, 0x05, 0x2e, 0x8b, 0x02, 0x47, 0x74, 0x97,
- 0x05, 0x2d, 0x50, 0x87, 0x05, 0x2f, 0xbb, 0x02, 0x47, 0x78, 0x8b, 0x05,
- 0x29, 0xb9, 0x83, 0x05, 0x2a, 0xf1, 0x91, 0x05, 0x2e, 0x7b, 0x02, 0x47,
- 0x82, 0x97, 0x05, 0x2d, 0x40, 0x87, 0x05, 0x2f, 0xc3, 0x02, 0x47, 0x86,
- 0x8b, 0x05, 0x29, 0xc1, 0x83, 0x05, 0x2a, 0xf9, 0x91, 0x05, 0x2e, 0x83,
- 0x02, 0x47, 0x8a, 0x97, 0x05, 0x2d, 0x48, 0x06, 0xc2, 0x47, 0x8e, 0x0c,
- 0xc2, 0x47, 0xa8, 0x89, 0x05, 0x30, 0x5b, 0x02, 0x47, 0xc2, 0x87, 0x05,
- 0x30, 0x4b, 0x02, 0x47, 0xd8, 0x1b, 0xc2, 0x47, 0xdc, 0x8b, 0x05, 0x2a,
- 0x39, 0x83, 0x05, 0x2b, 0x79, 0x91, 0x05, 0x2e, 0xfb, 0x02, 0x47, 0xf6,
- 0x97, 0x05, 0x2d, 0xc0, 0x87, 0x05, 0x2f, 0xdb, 0x02, 0x47, 0xfa, 0x0a,
- 0xc2, 0x47, 0xfe, 0x8b, 0x05, 0x29, 0xd9, 0x83, 0x05, 0x2b, 0x11, 0x91,
- 0x05, 0x2e, 0x9b, 0x02, 0x48, 0x18, 0x97, 0x05, 0x2d, 0x60, 0x87, 0x05,
- 0x2f, 0xeb, 0x02, 0x48, 0x1c, 0x0a, 0xc2, 0x48, 0x20, 0x8b, 0x05, 0x29,
- 0xe9, 0x83, 0x05, 0x2b, 0x21, 0x91, 0x05, 0x2e, 0xab, 0x02, 0x48, 0x3a,
- 0x97, 0x05, 0x2d, 0x70, 0x87, 0x05, 0x2f, 0xfb, 0x02, 0x48, 0x3e, 0x8b,
- 0x05, 0x29, 0xf9, 0x83, 0x05, 0x2b, 0x31, 0x91, 0x05, 0x2e, 0xbb, 0x02,
- 0x48, 0x42, 0x97, 0x05, 0x2d, 0x80, 0x87, 0x05, 0x30, 0x03, 0x02, 0x48,
- 0x46, 0x8b, 0x05, 0x2a, 0x01, 0x83, 0x05, 0x2b, 0x39, 0x91, 0x05, 0x2e,
- 0xc3, 0x02, 0x48, 0x4a, 0x97, 0x05, 0x2d, 0x88, 0x87, 0x05, 0x30, 0x13,
- 0x02, 0x48, 0x4e, 0x8b, 0x05, 0x2a, 0x11, 0x83, 0x05, 0x2b, 0x49, 0x91,
- 0x05, 0x2e, 0xd3, 0x02, 0x48, 0x52, 0x97, 0x05, 0x2d, 0x98, 0x90, 0x05,
- 0x29, 0x28, 0x90, 0x05, 0x2a, 0x50, 0x91, 0x05, 0x2b, 0x8b, 0x02, 0x48,
- 0x56, 0x90, 0x05, 0x2d, 0xd8, 0x90, 0x05, 0x2c, 0xb0, 0xc4, 0xe4, 0x03,
- 0x05, 0x30, 0x99, 0xc2, 0x14, 0x40, 0x05, 0x30, 0xc0, 0xc4, 0xe4, 0x03,
- 0x05, 0x30, 0xa1, 0xc3, 0x3f, 0x7b, 0x05, 0x30, 0xe0, 0xc3, 0x00, 0x34,
- 0x05, 0x30, 0xa9, 0xc2, 0x14, 0x40, 0x05, 0x30, 0xc9, 0xc3, 0x02, 0xa8,
- 0x05, 0x30, 0xe8, 0xc3, 0x01, 0x93, 0x05, 0x30, 0xd1, 0x11, 0x42, 0x48,
- 0x5a, 0xc9, 0x4f, 0x27, 0x01, 0x1e, 0x81, 0x45, 0x00, 0x6c, 0x42, 0x48,
- 0x66, 0xc7, 0x33, 0x21, 0x00, 0x00, 0x5b, 0x02, 0x48, 0x72, 0xc4, 0x3b,
- 0x42, 0x01, 0x5b, 0xf8, 0x00, 0x42, 0x48, 0x78, 0xcb, 0x99, 0xfe, 0x01,
- 0x81, 0xa0, 0xcf, 0x15, 0x8e, 0x0f, 0xbd, 0xf9, 0xd2, 0x21, 0x36, 0x0f,
- 0xbe, 0x80, 0xc6, 0x01, 0x7a, 0x0f, 0xbc, 0x41, 0xc6, 0x07, 0x09, 0x0f,
- 0xbc, 0x90, 0xc6, 0x27, 0xf9, 0x0f, 0xb3, 0xe1, 0xc6, 0x13, 0x57, 0x0f,
- 0xbd, 0x69, 0xd2, 0x4c, 0xf2, 0x0f, 0xbd, 0xc8, 0xce, 0x6f, 0x05, 0x00,
- 0xe7, 0x89, 0xcb, 0x97, 0xa1, 0x00, 0xe7, 0x5b, 0x02, 0x48, 0x84, 0xcc,
- 0x89, 0x20, 0x00, 0xe7, 0x51, 0xcc, 0x13, 0x4a, 0x00, 0xe7, 0x48, 0xc8,
- 0x6d, 0xbb, 0x00, 0xe7, 0x31, 0xc6, 0x6d, 0xbd, 0x00, 0xe7, 0x20, 0xca,
- 0x9f, 0xb4, 0x00, 0xe7, 0x40, 0xca, 0x9f, 0xb4, 0x00, 0xe7, 0x38, 0xca,
- 0x9e, 0x60, 0x00, 0xe7, 0xc9, 0xc7, 0x04, 0x40, 0x00, 0xe6, 0xd0, 0xe0,
- 0x04, 0x27, 0x00, 0xe7, 0x00, 0xca, 0x9f, 0x14, 0x00, 0xe6, 0xc8, 0x43,
- 0x02, 0x4f, 0xc2, 0x48, 0x8a, 0xcc, 0x86, 0xd4, 0x70, 0x01, 0xe0, 0x4f,
- 0x07, 0x17, 0xc2, 0x48, 0x9c, 0x4d, 0x26, 0xea, 0x42, 0x49, 0x04, 0x42,
- 0x00, 0xac, 0xc2, 0x49, 0x6c, 0xc3, 0x0e, 0x41, 0x70, 0x01, 0xd0, 0xce,
- 0x24, 0xb2, 0x70, 0x02, 0xe9, 0xcb, 0x1a, 0x3f, 0x70, 0x01, 0x49, 0xcd,
- 0x02, 0x52, 0x70, 0x03, 0xe8, 0xc4, 0x22, 0x71, 0x70, 0x01, 0xc9, 0xc5,
- 0x01, 0xdb, 0x70, 0x01, 0xc1, 0x15, 0xc2, 0x49, 0x76, 0x08, 0xc2, 0x49,
- 0x82, 0x16, 0xc2, 0x49, 0x8e, 0xc3, 0x01, 0xb4, 0x70, 0x01, 0x89, 0xc4,
- 0x15, 0xd3, 0x70, 0x01, 0x80, 0x83, 0x00, 0xbb, 0x41, 0xc2, 0x01, 0x29,
- 0x00, 0xbb, 0x28, 0xc9, 0xb3, 0x39, 0x00, 0xb8, 0xf8, 0x83, 0x00, 0xb8,
- 0x41, 0xc2, 0x01, 0x29, 0x00, 0xb8, 0x28, 0x24, 0xc2, 0x49, 0x9a, 0x23,
- 0xc2, 0x49, 0xb6, 0x22, 0xc2, 0x49, 0xde, 0x21, 0xc2, 0x4a, 0x06, 0x20,
- 0xc2, 0x4a, 0x2e, 0x1f, 0xc2, 0x4a, 0x56, 0x1e, 0xc2, 0x4a, 0x7e, 0x1d,
- 0x42, 0x4a, 0xa6, 0xc4, 0x22, 0x71, 0x0b, 0x56, 0x49, 0xc5, 0x01, 0xdb,
- 0x0b, 0x56, 0x41, 0x15, 0xc2, 0x4a, 0xce, 0x08, 0xc2, 0x4a, 0xda, 0x16,
- 0xc2, 0x4a, 0xe6, 0xc3, 0x01, 0xb4, 0x0b, 0x56, 0x09, 0xc4, 0x15, 0xd3,
- 0x0b, 0x56, 0x00, 0xc2, 0x02, 0xb4, 0x0b, 0x55, 0xf1, 0x05, 0xc2, 0x4a,
- 0xf2, 0x06, 0xc2, 0x4a, 0xfc, 0x08, 0xc2, 0x4b, 0x06, 0xc2, 0x96, 0xd0,
- 0x0b, 0x55, 0xd1, 0x16, 0xc2, 0x4b, 0x10, 0x0a, 0xc2, 0x4b, 0x20, 0x09,
- 0xc2, 0x4b, 0x28, 0x15, 0xc2, 0x4b, 0x32, 0x10, 0xc2, 0x4b, 0x3a, 0xc2,
- 0x02, 0x59, 0x0b, 0x55, 0x91, 0x0e, 0xc2, 0x4b, 0x50, 0x0f, 0xc2, 0x4b,
- 0x5a, 0xc2, 0x00, 0xde, 0x0b, 0x55, 0x51, 0x12, 0xc2, 0x4b, 0x6e, 0xc2,
- 0x01, 0x09, 0x0b, 0x55, 0x31, 0xc2, 0x1d, 0x5f, 0x0b, 0x55, 0x29, 0x0d,
- 0xc2, 0x4b, 0x78, 0x17, 0xc2, 0x4b, 0x82, 0x03, 0xc2, 0x4b, 0x9a, 0x0b,
- 0xc2, 0x4b, 0xae, 0x07, 0xc2, 0x4b, 0xbe, 0x18, 0xc2, 0x4b, 0xce, 0x11,
- 0x42, 0x4b, 0xde, 0x18, 0xc2, 0x4b, 0xee, 0x42, 0x13, 0x51, 0xc2, 0x4b,
- 0xfc, 0x0d, 0xc2, 0x4c, 0x0e, 0x12, 0xc2, 0x4c, 0x18, 0xc7, 0xaf, 0x4b,
- 0x08, 0xfe, 0xc1, 0x03, 0xc2, 0x4c, 0x22, 0xc6, 0xd2, 0x25, 0x08, 0xfe,
- 0xb1, 0xc3, 0x71, 0x95, 0x08, 0xfe, 0xa8, 0xcb, 0x90, 0xed, 0x08, 0xff,
- 0x49, 0xcb, 0x98, 0xe0, 0x08, 0xff, 0x40, 0x83, 0x00, 0x5c, 0x2b, 0x02,
- 0x4c, 0x2e, 0x8b, 0x00, 0x5c, 0x3b, 0x02, 0x4c, 0x3a, 0x97, 0x00, 0x5c,
- 0x4b, 0x02, 0x4c, 0x3e, 0x87, 0x00, 0x5c, 0x73, 0x02, 0x4c, 0x42, 0x91,
- 0x00, 0x5c, 0x93, 0x02, 0x4c, 0x46, 0xc2, 0x04, 0x2b, 0x00, 0x5c, 0xa9,
- 0x10, 0xc2, 0x4c, 0x4a, 0xc2, 0x03, 0x40, 0x00, 0x5c, 0xd1, 0xc2, 0x24,
- 0x58, 0x00, 0x5c, 0xe1, 0x16, 0xc2, 0x4c, 0x5e, 0xc2, 0x03, 0xa4, 0x00,
- 0x5d, 0x51, 0xc2, 0x00, 0xad, 0x00, 0x5d, 0x71, 0xc2, 0x1d, 0x5f, 0x00,
- 0x5d, 0x79, 0x14, 0xc2, 0x4c, 0x68, 0x0e, 0xc2, 0x4c, 0x72, 0xc2, 0x04,
- 0x41, 0x00, 0x5d, 0xa9, 0x15, 0xc2, 0x4c, 0x7a, 0xc2, 0x00, 0xa4, 0x00,
- 0x5d, 0xc8, 0xc4, 0x15, 0xd3, 0x00, 0x5f, 0x31, 0xc3, 0x01, 0xb4, 0x00,
- 0x5f, 0x39, 0x16, 0xc2, 0x4c, 0x8a, 0x08, 0xc2, 0x4c, 0x96, 0x15, 0xc2,
- 0x4c, 0xa2, 0xc5, 0x01, 0xdb, 0x00, 0x5f, 0x71, 0xc4, 0x22, 0x71, 0x00,
- 0x5f, 0x78, 0xc8, 0x0a, 0xb9, 0x08, 0xfe, 0x99, 0x44, 0x26, 0x50, 0xc2,
- 0x4c, 0xae, 0xca, 0x9e, 0x9c, 0x08, 0xfe, 0x69, 0xca, 0xa5, 0x9a, 0x08,
- 0xfe, 0x30, 0x45, 0x27, 0x8b, 0xc2, 0x4c, 0xba, 0xc7, 0x0a, 0xb9, 0x08,
- 0xfe, 0x81, 0x08, 0xc2, 0x4c, 0xc2, 0x45, 0x01, 0xdb, 0xc2, 0x4c, 0xce,
- 0x16, 0xc2, 0x4c, 0xd8, 0x44, 0x26, 0x50, 0xc2, 0x4c, 0xe8, 0xd8, 0x26,
- 0x40, 0x08, 0xfe, 0x08, 0x83, 0x00, 0x5d, 0xf1, 0x8b, 0x00, 0x5e, 0x41,
- 0x97, 0x00, 0x5e, 0x60, 0x8b, 0x00, 0x5e, 0x00, 0x97, 0x00, 0x5e, 0x10,
- 0x87, 0x00, 0x5e, 0x38, 0x91, 0x00, 0x5e, 0x58, 0xc7, 0x0c, 0x4b, 0x00,
- 0x5f, 0x89, 0xc8, 0x50, 0x00, 0x00, 0x5f, 0x90, 0xc4, 0x18, 0x83, 0x08,
- 0xb6, 0x39, 0xc2, 0x26, 0x51, 0x08, 0xb6, 0x30, 0xc3, 0x0c, 0x5b, 0x08,
- 0xb6, 0x29, 0xc3, 0x06, 0x9e, 0x08, 0xb6, 0x20, 0xc4, 0x04, 0x5e, 0x08,
- 0xb6, 0x19, 0xc2, 0x01, 0x47, 0x08, 0xb6, 0x10, 0xca, 0xa6, 0x44, 0x08,
- 0xb5, 0xc1, 0x97, 0x08, 0xb4, 0x49, 0x8b, 0x08, 0xb4, 0x39, 0x83, 0x08,
- 0xb4, 0x08, 0xc2, 0x02, 0x59, 0x08, 0xb5, 0x51, 0x83, 0x08, 0xb5, 0x20,
- 0x83, 0x08, 0xb5, 0x41, 0xc2, 0x00, 0xa4, 0x08, 0xb5, 0x38, 0xc2, 0x00,
- 0xa4, 0x08, 0xb5, 0x09, 0x83, 0x08, 0xb5, 0x00, 0xc2, 0x00, 0xa4, 0x08,
- 0xb4, 0xf9, 0x83, 0x08, 0xb4, 0xf0, 0x83, 0x08, 0xb4, 0xe9, 0xc2, 0x00,
- 0xc1, 0x08, 0xb4, 0xc1, 0xc2, 0x1d, 0x5f, 0x08, 0xb4, 0x99, 0xc2, 0x01,
- 0x29, 0x08, 0xb4, 0x70, 0xc2, 0x00, 0xa4, 0x08, 0xb4, 0xe1, 0x83, 0x08,
- 0xb4, 0xd9, 0x06, 0x42, 0x4c, 0xf4, 0xc2, 0x00, 0xa4, 0x08, 0xb4, 0xd1,
- 0x83, 0x08, 0xb4, 0xc9, 0x16, 0x42, 0x4c, 0xfe, 0xc2, 0x00, 0xa4, 0x08,
- 0xb4, 0x91, 0x83, 0x08, 0xb4, 0x88, 0xc2, 0x00, 0xa4, 0x08, 0xb4, 0x81,
- 0x83, 0x08, 0xb4, 0x78, 0xc2, 0x00, 0xa4, 0x08, 0xb4, 0x69, 0x83, 0x08,
- 0xb4, 0x60, 0xc2, 0x00, 0xa4, 0x08, 0xb4, 0x59, 0x83, 0x08, 0xb4, 0x50,
- 0x97, 0x08, 0xb4, 0x28, 0x8b, 0x08, 0xb4, 0x18, 0xc4, 0x0f, 0x7c, 0x08,
- 0xb5, 0xb1, 0xc5, 0x44, 0x7b, 0x08, 0xb5, 0x60, 0x97, 0x08, 0xb5, 0xa9,
- 0x8b, 0x08, 0xb5, 0x99, 0x83, 0x08, 0xb5, 0x68, 0x97, 0x08, 0xb5, 0x88,
- 0x8b, 0x08, 0xb5, 0x78, 0xc3, 0x01, 0x93, 0x00, 0xd5, 0x61, 0xc2, 0x64,
- 0x77, 0x00, 0xd5, 0x20, 0xc5, 0xd5, 0xd4, 0x00, 0xd5, 0x53, 0x02, 0x4d,
- 0x08, 0xc3, 0x01, 0x66, 0x00, 0xd5, 0x11, 0xc3, 0x85, 0x74, 0x00, 0xd3,
- 0x00, 0xc3, 0x3b, 0xc8, 0x00, 0xd5, 0x43, 0x02, 0x4d, 0x0e, 0xc3, 0xe7,
- 0xb7, 0x00, 0xd5, 0x19, 0x44, 0xe1, 0x3b, 0x42, 0x4d, 0x14, 0xc5, 0xdf,
- 0x2f, 0x00, 0xd5, 0x39, 0xc3, 0x78, 0x27, 0x00, 0xd3, 0xd9, 0xc4, 0xa7,
- 0x29, 0x00, 0xd3, 0xa2, 0x02, 0x4d, 0x20, 0xd4, 0x3c, 0x3d, 0x00, 0xd5,
- 0x31, 0xc6, 0xd4, 0x7d, 0x00, 0xd3, 0xd0, 0xc4, 0xe0, 0xcb, 0x00, 0xd5,
- 0x08, 0x9f, 0x00, 0xd3, 0xb1, 0x9e, 0x00, 0xd3, 0xa8, 0xc4, 0x18, 0x83,
- 0x00, 0xd4, 0xb9, 0xc2, 0x26, 0x51, 0x00, 0xd4, 0xb0, 0xc3, 0x0c, 0x5b,
- 0x00, 0xd4, 0xa9, 0xc3, 0x06, 0x9e, 0x00, 0xd4, 0xa0, 0xc4, 0x04, 0x5e,
- 0x00, 0xd4, 0x99, 0xc2, 0x01, 0x47, 0x00, 0xd4, 0x90, 0xc4, 0x18, 0x83,
- 0x00, 0xd4, 0x39, 0xc2, 0x26, 0x51, 0x00, 0xd4, 0x30, 0xc3, 0x0c, 0x5b,
- 0x00, 0xd4, 0x29, 0xc3, 0x06, 0x9e, 0x00, 0xd4, 0x20, 0xc4, 0x04, 0x5e,
- 0x00, 0xd4, 0x19, 0xc2, 0x01, 0x47, 0x00, 0xd4, 0x10, 0xc2, 0x0c, 0x65,
- 0x00, 0xd2, 0xf1, 0xc2, 0x00, 0xde, 0x00, 0xd2, 0xe9, 0x0f, 0xc2, 0x4d,
- 0x26, 0xd4, 0x3b, 0x61, 0x00, 0xd2, 0xd9, 0x0e, 0xc2, 0x4d, 0x30, 0xc9,
- 0xb4, 0x3e, 0x00, 0xd2, 0xc8, 0x42, 0x00, 0xae, 0xc2, 0x4d, 0x3c, 0x91,
- 0x00, 0xd3, 0x81, 0x9b, 0x00, 0xd3, 0x68, 0xc6, 0xd3, 0x2d, 0x00, 0xd3,
- 0x91, 0xc6, 0xcb, 0x6e, 0x00, 0xd3, 0x20, 0x8b, 0x00, 0xd3, 0x89, 0x87,
+ 0x44, 0x88, 0x8b, 0x05, 0x44, 0x28, 0x97, 0x05, 0x44, 0x38, 0x47, 0xb7,
+ 0xd8, 0xc2, 0x36, 0x9e, 0x83, 0x05, 0x45, 0x70, 0x87, 0x05, 0x44, 0x68,
+ 0x91, 0x05, 0x44, 0x80, 0x83, 0x05, 0x44, 0x91, 0xc2, 0x01, 0x0e, 0x05,
+ 0x44, 0x98, 0x83, 0x05, 0x44, 0xa1, 0xc2, 0x01, 0x0e, 0x05, 0x44, 0xa8,
+ 0xc2, 0x07, 0x69, 0x05, 0x44, 0xb1, 0xc2, 0x1a, 0x36, 0x05, 0x44, 0xd9,
+ 0xc2, 0x01, 0x01, 0x05, 0x45, 0x01, 0x83, 0x05, 0x45, 0x28, 0x83, 0x05,
+ 0x44, 0xb9, 0xc2, 0x01, 0x0e, 0x05, 0x44, 0xc0, 0x83, 0x05, 0x44, 0xc9,
+ 0xc2, 0x01, 0x0e, 0x05, 0x44, 0xd0, 0x16, 0xc2, 0x36, 0xac, 0x83, 0x05,
+ 0x45, 0x09, 0xc2, 0x01, 0x0e, 0x05, 0x45, 0x10, 0x06, 0xc2, 0x36, 0xb6,
+ 0x83, 0x05, 0x45, 0x19, 0xc2, 0x01, 0x0e, 0x05, 0x45, 0x20, 0x83, 0x05,
+ 0x45, 0x31, 0xc2, 0x01, 0x0e, 0x05, 0x45, 0x38, 0x83, 0x05, 0x45, 0x41,
+ 0xc2, 0x01, 0x0e, 0x05, 0x45, 0x48, 0xc4, 0x16, 0x57, 0x05, 0x46, 0x81,
+ 0xc3, 0x05, 0x17, 0x05, 0x46, 0x89, 0x16, 0xc2, 0x36, 0xc0, 0x08, 0xc2,
+ 0x36, 0xcc, 0x15, 0xc2, 0x36, 0xd8, 0xc5, 0x05, 0x1b, 0x05, 0x46, 0xc1,
+ 0xc4, 0x24, 0x35, 0x05, 0x46, 0xc8, 0xdd, 0x00, 0xea, 0x0f, 0xb3, 0xb9,
+ 0x44, 0x02, 0x9e, 0x42, 0x36, 0xe4, 0xe0, 0x00, 0xe7, 0x0f, 0xb3, 0xc0,
+ 0xc4, 0xe9, 0xfb, 0x00, 0x41, 0xf1, 0xc3, 0x0c, 0x37, 0x00, 0x41, 0x90,
+ 0xc6, 0xd0, 0xe8, 0x00, 0x40, 0xb9, 0x90, 0x00, 0x40, 0xb0, 0x83, 0x00,
+ 0x40, 0xf0, 0x83, 0x00, 0x40, 0xf8, 0xd0, 0x59, 0xff, 0x01, 0x54, 0xb8,
+ 0xd0, 0x59, 0xff, 0x01, 0x54, 0xc0, 0x07, 0xc2, 0x36, 0xea, 0x44, 0x02,
+ 0xcc, 0xc2, 0x36, 0xf6, 0xc9, 0xaf, 0xaf, 0x08, 0x8e, 0x69, 0xca, 0xa6,
+ 0xb4, 0x08, 0x8e, 0x48, 0xc3, 0xb1, 0x83, 0x08, 0x8e, 0xd1, 0xd5, 0x33,
+ 0x3a, 0x08, 0x8e, 0x60, 0x45, 0x08, 0xd8, 0xc2, 0x37, 0x24, 0xcb, 0x91,
+ 0xff, 0x08, 0x8e, 0x31, 0xc4, 0x1c, 0xb3, 0x08, 0x8e, 0x28, 0x45, 0x02,
+ 0xcb, 0xc2, 0x37, 0x48, 0xcd, 0x7a, 0x64, 0x08, 0x8e, 0x58, 0xc2, 0x01,
+ 0x0e, 0x08, 0x8d, 0x91, 0x15, 0xc2, 0x37, 0x6e, 0x18, 0xc2, 0x37, 0x7e,
+ 0x0e, 0xc2, 0x37, 0x88, 0xc2, 0x00, 0x9a, 0x08, 0x8d, 0x59, 0xc2, 0x1a,
+ 0x36, 0x08, 0x8d, 0x51, 0xc2, 0x00, 0x3f, 0x08, 0x8d, 0x49, 0x04, 0xc2,
+ 0x37, 0x92, 0x12, 0xc2, 0x37, 0x9c, 0x10, 0xc2, 0x37, 0xa6, 0x06, 0xc2,
+ 0x37, 0xbc, 0x16, 0xc2, 0x37, 0xca, 0x0c, 0xc2, 0x37, 0xd8, 0x05, 0xc2,
+ 0x37, 0xe2, 0x09, 0xc2, 0x37, 0xec, 0x0d, 0xc2, 0x37, 0xf6, 0x83, 0x08,
+ 0x8c, 0x1b, 0x02, 0x38, 0x00, 0x91, 0x08, 0x8c, 0x79, 0x87, 0x08, 0x8c,
+ 0x69, 0x97, 0x08, 0x8c, 0x3b, 0x02, 0x38, 0x0c, 0x8b, 0x08, 0x8c, 0x2a,
+ 0x02, 0x38, 0x10, 0xc2, 0x00, 0xf2, 0x08, 0x22, 0x89, 0x0a, 0x42, 0x38,
+ 0x14, 0x91, 0x08, 0x22, 0xa9, 0xc3, 0x15, 0x01, 0x08, 0x22, 0xb0, 0x83,
+ 0x08, 0x22, 0xc1, 0x99, 0x08, 0x23, 0xf8, 0xc3, 0x3b, 0x04, 0x08, 0x22,
+ 0xc9, 0xc4, 0xe9, 0xcb, 0x08, 0x23, 0x18, 0xc6, 0x14, 0xfe, 0x08, 0x23,
+ 0x01, 0xc3, 0x07, 0x6e, 0x08, 0x23, 0x28, 0x87, 0x08, 0x23, 0x11, 0xc2,
+ 0x01, 0xb5, 0x08, 0x23, 0x58, 0x88, 0x08, 0x23, 0x31, 0xc2, 0x01, 0xa1,
+ 0x08, 0x23, 0x91, 0xc2, 0x00, 0x3a, 0x08, 0x23, 0xf0, 0xc2, 0x0e, 0x32,
+ 0x08, 0x23, 0x39, 0x03, 0xc2, 0x38, 0x20, 0xc2, 0x00, 0x63, 0x08, 0x23,
+ 0xd8, 0xc2, 0x01, 0x0d, 0x08, 0x23, 0x41, 0xc2, 0x00, 0x68, 0x08, 0x23,
+ 0x49, 0x8a, 0x08, 0x23, 0x69, 0xc2, 0x02, 0xfb, 0x08, 0x23, 0x89, 0xc2,
+ 0x00, 0xc9, 0x08, 0x23, 0xb9, 0x14, 0xc2, 0x38, 0x28, 0xc2, 0x00, 0xac,
+ 0x08, 0x23, 0xd0, 0x90, 0x08, 0x23, 0x51, 0xc2, 0x01, 0xc4, 0x08, 0x23,
+ 0x61, 0xc2, 0x06, 0x6e, 0x08, 0x23, 0xa1, 0xc3, 0x0b, 0xa1, 0x08, 0x23,
+ 0xa9, 0xc2, 0x00, 0x95, 0x08, 0x23, 0xb1, 0x94, 0x08, 0x23, 0xc8, 0xe0,
+ 0x09, 0x47, 0x01, 0x4a, 0x20, 0xcd, 0x79, 0xae, 0x01, 0x57, 0x38, 0x00,
+ 0x42, 0x38, 0x32, 0xd6, 0x30, 0x05, 0x01, 0x5a, 0x79, 0x4c, 0x84, 0x54,
+ 0x42, 0x38, 0x3e, 0x00, 0x42, 0x38, 0x44, 0xc3, 0xeb, 0xeb, 0x0f, 0xb3,
+ 0x09, 0xc9, 0xac, 0x58, 0x0f, 0xb2, 0xc9, 0xc4, 0x4a, 0x1e, 0x0f, 0xb2,
+ 0x88, 0xc7, 0x12, 0x4e, 0x01, 0x5b, 0xc8, 0x00, 0x42, 0x38, 0x50, 0xc3,
+ 0xeb, 0xeb, 0x0f, 0xb3, 0x19, 0xc9, 0xac, 0x58, 0x0f, 0xb2, 0xd9, 0xc4,
+ 0x4a, 0x1e, 0x0f, 0xb2, 0x98, 0xc7, 0x12, 0x4e, 0x01, 0x5b, 0xc0, 0xc2,
+ 0x01, 0x0e, 0x08, 0xd3, 0x49, 0x83, 0x08, 0xd3, 0x40, 0xc2, 0x01, 0x0e,
+ 0x08, 0xd3, 0xb1, 0x83, 0x08, 0xd3, 0xa8, 0xc2, 0x01, 0x0e, 0x08, 0xd3,
+ 0x39, 0x83, 0x08, 0xd3, 0x30, 0x8e, 0x08, 0xd2, 0xd1, 0x94, 0x08, 0xd2,
+ 0xc8, 0x97, 0x08, 0xd2, 0xc1, 0x8b, 0x08, 0xd2, 0xb8, 0x87, 0x08, 0xd2,
+ 0xb0, 0x87, 0x08, 0xd2, 0x90, 0xca, 0x50, 0xa4, 0x08, 0x7a, 0xb0, 0xc3,
+ 0xe2, 0x62, 0x08, 0x79, 0xf9, 0xc4, 0xdd, 0x34, 0x08, 0x79, 0xe0, 0xc5,
+ 0xd3, 0x59, 0x0f, 0xbc, 0xb1, 0xc2, 0x00, 0xe5, 0x01, 0x99, 0x39, 0xc2,
+ 0xae, 0x85, 0x01, 0x9c, 0xa0, 0x11, 0xc2, 0x38, 0x68, 0x8f, 0x01, 0x9c,
+ 0xc8, 0x44, 0x01, 0xad, 0xc2, 0x38, 0x74, 0xc4, 0x8c, 0xc4, 0x01, 0x9a,
+ 0xb9, 0x84, 0x01, 0x9e, 0xe8, 0x11, 0xc2, 0x38, 0xae, 0xd5, 0x35, 0x32,
+ 0x01, 0x56, 0x69, 0x8f, 0x01, 0x9e, 0x81, 0x90, 0x01, 0x9e, 0x89, 0x9a,
+ 0x01, 0x9e, 0x98, 0xca, 0x28, 0x24, 0x01, 0x14, 0x83, 0x02, 0x38, 0xb8,
+ 0xc3, 0x68, 0x4a, 0x01, 0x98, 0x49, 0xc3, 0x13, 0xf4, 0x01, 0x98, 0x51,
+ 0x98, 0x01, 0x9b, 0xa8, 0xc7, 0x3a, 0x6f, 0x01, 0x14, 0x7b, 0x02, 0x38,
+ 0xbe, 0x90, 0x01, 0x9e, 0x63, 0x02, 0x38, 0xc4, 0x97, 0x01, 0x9b, 0xd0,
+ 0xc2, 0x02, 0x6a, 0x01, 0x14, 0xa1, 0x03, 0xc2, 0x38, 0xd0, 0x85, 0x01,
+ 0x9e, 0x21, 0x86, 0x01, 0x9e, 0x29, 0xc8, 0xbc, 0xf3, 0x01, 0x9e, 0x31,
+ 0x91, 0x01, 0x9e, 0x3b, 0x02, 0x38, 0xd8, 0x8f, 0x01, 0x9c, 0xea, 0x02,
+ 0x38, 0xde, 0xc3, 0x65, 0x6c, 0x01, 0x10, 0xd1, 0x0b, 0xc2, 0x38, 0xe2,
+ 0x17, 0xc2, 0x38, 0xf4, 0x07, 0xc2, 0x39, 0x00, 0xc2, 0x00, 0x5d, 0x01,
+ 0x9d, 0x6a, 0x02, 0x39, 0x0c, 0xcc, 0x8c, 0x64, 0x0f, 0x90, 0x01, 0x89,
+ 0x01, 0x96, 0x61, 0x83, 0x01, 0x9e, 0x53, 0x02, 0x39, 0x15, 0x17, 0xc2,
+ 0x39, 0x1b, 0x07, 0xc2, 0x39, 0x2d, 0x11, 0xc2, 0x39, 0x39, 0x92, 0x01,
+ 0x9e, 0x5b, 0x02, 0x39, 0x41, 0x9c, 0x01, 0x9c, 0x80, 0x8c, 0x0f, 0x8c,
+ 0x81, 0x83, 0x01, 0x9b, 0x93, 0x02, 0x39, 0x45, 0xc3, 0x14, 0x17, 0x01,
+ 0x99, 0x29, 0xc3, 0x04, 0x33, 0x01, 0x99, 0x31, 0x84, 0x01, 0x9e, 0x41,
+ 0x8f, 0x01, 0x9b, 0xbb, 0x02, 0x39, 0x4b, 0x8e, 0x01, 0x9c, 0xb8, 0x11,
+ 0xc2, 0x39, 0x4f, 0x83, 0x01, 0x9d, 0x4b, 0x02, 0x39, 0x61, 0x0b, 0xc2,
+ 0x39, 0x6b, 0x07, 0xc2, 0x39, 0x75, 0x8a, 0x01, 0x9e, 0xb9, 0x8f, 0x01,
+ 0x9e, 0xc1, 0xc2, 0x7a, 0x63, 0x01, 0x9e, 0xc9, 0x94, 0x01, 0x9e, 0xd1,
+ 0x85, 0x01, 0x9b, 0xb1, 0x88, 0x01, 0x9c, 0x51, 0x95, 0x01, 0x9d, 0x81,
+ 0x98, 0x01, 0x9d, 0xa1, 0x99, 0x01, 0x9d, 0xd0, 0x14, 0xc2, 0x39, 0x85,
+ 0x98, 0x01, 0x96, 0x71, 0xc7, 0xca, 0xfc, 0x01, 0x98, 0x39, 0xc4, 0x94,
+ 0xea, 0x01, 0x98, 0x40, 0xc5, 0xdb, 0xb2, 0x01, 0x98, 0x01, 0xc5, 0xe1,
+ 0x66, 0x01, 0x98, 0x09, 0xc4, 0xea, 0x8b, 0x01, 0x98, 0x11, 0xc3, 0x3d,
+ 0xb3, 0x01, 0x98, 0x19, 0x97, 0x01, 0x9b, 0x99, 0x8f, 0x01, 0x9e, 0x11,
+ 0xc7, 0x24, 0x21, 0x01, 0x9e, 0xf8, 0x83, 0x01, 0x9c, 0x23, 0x02, 0x39,
+ 0x8f, 0xc5, 0xd9, 0x9b, 0x01, 0x98, 0x91, 0xc3, 0x04, 0x46, 0x01, 0x98,
+ 0xa3, 0x02, 0x39, 0x99, 0x42, 0x00, 0xd3, 0xc2, 0x39, 0xab, 0xc4, 0xe6,
+ 0x6b, 0x01, 0x98, 0xe1, 0x11, 0xc2, 0x39, 0xb7, 0x89, 0x01, 0x9c, 0x79,
+ 0x8d, 0x01, 0x9e, 0x69, 0x8f, 0x01, 0x9c, 0xf3, 0x02, 0x39, 0xc3, 0x96,
+ 0x01, 0x9e, 0x79, 0x84, 0x01, 0x9c, 0x29, 0xc3, 0x00, 0x2e, 0x01, 0x9c,
+ 0x49, 0xc2, 0xae, 0x85, 0x01, 0x9c, 0x89, 0x8e, 0x01, 0x9c, 0xc1, 0xc2,
+ 0x07, 0x44, 0x01, 0x9d, 0x51, 0x98, 0x01, 0x9d, 0xc1, 0x99, 0x01, 0x9d,
+ 0xf1, 0xc4, 0xea, 0x83, 0x01, 0x9e, 0x00, 0x03, 0xc2, 0x39, 0xc7, 0x0b,
+ 0xc2, 0x39, 0xd7, 0xc5, 0xd5, 0x4b, 0x01, 0x98, 0xc3, 0x02, 0x39, 0xe9,
+ 0x9b, 0x01, 0x9e, 0x49, 0x84, 0x01, 0x9c, 0x39, 0xc2, 0xae, 0x85, 0x01,
+ 0x9c, 0x99, 0xc2, 0x07, 0x44, 0x01, 0x9d, 0x60, 0x03, 0xc2, 0x39, 0xef,
+ 0xc6, 0xd4, 0x4e, 0x01, 0x99, 0x09, 0x43, 0x01, 0x04, 0xc2, 0x39, 0xfb,
+ 0x94, 0x01, 0x9e, 0xd9, 0x98, 0x01, 0x9e, 0xe0, 0x83, 0x01, 0x9c, 0x0b,
+ 0x02, 0x3a, 0x03, 0xc4, 0x07, 0xbb, 0x01, 0x99, 0x49, 0x88, 0x01, 0x9c,
+ 0x59, 0x8f, 0x01, 0x9c, 0xd1, 0x95, 0x01, 0x9d, 0x89, 0x98, 0x01, 0x9d,
+ 0xa9, 0x99, 0x01, 0x9d, 0xd8, 0x03, 0xc2, 0x3a, 0x09, 0xc3, 0xd2, 0xd5,
+ 0x01, 0x99, 0x89, 0xc7, 0xca, 0x9a, 0x01, 0x99, 0xa1, 0xc4, 0x0b, 0x46,
+ 0x01, 0x99, 0xe1, 0xc5, 0xdb, 0x62, 0x01, 0x99, 0xf1, 0x93, 0x01, 0x9e,
+ 0x18, 0x83, 0x01, 0x9c, 0x1b, 0x02, 0x3a, 0x13, 0x0b, 0xc2, 0x3a, 0x29,
+ 0x07, 0xc2, 0x3a, 0x3c, 0x42, 0x07, 0x73, 0xc2, 0x3a, 0x4b, 0x89, 0x01,
+ 0x9c, 0x71, 0x00, 0xc2, 0x3a, 0x6b, 0x84, 0x01, 0x9c, 0x33, 0x02, 0x3a,
+ 0x7b, 0xc2, 0x01, 0xb5, 0x01, 0x9e, 0xb1, 0xc2, 0xae, 0x85, 0x01, 0x9c,
+ 0x91, 0x8e, 0x01, 0x9c, 0xb1, 0x8f, 0x01, 0x9c, 0xe3, 0x02, 0x3a, 0x81,
+ 0xc2, 0x07, 0x44, 0x01, 0x9d, 0x59, 0x95, 0x01, 0x9d, 0x99, 0x98, 0x01,
+ 0x9d, 0xbb, 0x02, 0x3a, 0x85, 0x99, 0x01, 0x9d, 0xea, 0x02, 0x3a, 0x8b,
+ 0x42, 0x0e, 0x30, 0xc2, 0x3a, 0x91, 0xc3, 0x91, 0x57, 0x01, 0x9a, 0x80,
+ 0x11, 0xc2, 0x3a, 0x9d, 0x45, 0x01, 0xf2, 0x42, 0x3a, 0xa9, 0xc6, 0x12,
+ 0x73, 0x01, 0x36, 0xe9, 0xc2, 0x4d, 0xdb, 0x0f, 0x8d, 0x51, 0xc6, 0xd8,
+ 0xce, 0x0f, 0x8d, 0x19, 0x07, 0xc2, 0x3a, 0xb5, 0xc2, 0x07, 0xc3, 0x0f,
+ 0x8c, 0xc1, 0xc5, 0x01, 0xea, 0x01, 0x4e, 0x41, 0xcb, 0x11, 0x62, 0x01,
+ 0x4e, 0x39, 0x86, 0x0f, 0x8a, 0x61, 0x95, 0x0f, 0x8a, 0x68, 0xc2, 0x18,
+ 0x55, 0x01, 0x35, 0xf9, 0x48, 0xbe, 0x7b, 0x42, 0x3a, 0xc1, 0xc4, 0x07,
+ 0x6e, 0x01, 0x15, 0x01, 0x19, 0xc2, 0x3a, 0xd3, 0xc6, 0x05, 0xde, 0x0f,
+ 0x8c, 0xd8, 0xc4, 0x1c, 0xa2, 0x01, 0x14, 0xf9, 0x98, 0x0f, 0x8a, 0x58,
+ 0xc3, 0x21, 0x5f, 0x01, 0x14, 0xf1, 0xc2, 0x55, 0xca, 0x0f, 0x8a, 0x70,
+ 0x53, 0x3f, 0x83, 0xc2, 0x3a, 0xdf, 0xc3, 0x97, 0xa8, 0x0f, 0x8c, 0x91,
+ 0x8e, 0x0f, 0x8c, 0x88, 0xc2, 0x03, 0x0c, 0x0f, 0x8d, 0x61, 0x95, 0x0f,
+ 0x8c, 0xd0, 0xc2, 0x23, 0x0b, 0x0f, 0x8d, 0x59, 0xd7, 0x28, 0x68, 0x0f,
+ 0x8c, 0xc8, 0xc5, 0xdf, 0x36, 0x0f, 0x8d, 0x41, 0xc2, 0x0a, 0x49, 0x0f,
+ 0x8d, 0x39, 0x98, 0x0f, 0x8a, 0x51, 0x85, 0x0f, 0x8d, 0x30, 0xd3, 0x40,
+ 0x1d, 0x0f, 0x8d, 0x21, 0x8d, 0x0f, 0x8c, 0xb8, 0xcd, 0x82, 0x29, 0x0f,
+ 0x8d, 0x01, 0x44, 0x08, 0xde, 0xc2, 0x3b, 0x03, 0xc3, 0x05, 0xdf, 0x0f,
+ 0x8c, 0x99, 0xd5, 0x34, 0x4b, 0x01, 0x4e, 0x28, 0x89, 0x0f, 0x8c, 0xb1,
+ 0xc2, 0x03, 0x86, 0x0f, 0x8c, 0xa8, 0xc9, 0x26, 0xef, 0x01, 0x21, 0x30,
+ 0xc2, 0x01, 0x5b, 0x01, 0x20, 0x79, 0xc3, 0x01, 0xc3, 0x01, 0x20, 0x70,
+ 0xc4, 0x27, 0x22, 0x01, 0x20, 0x11, 0xc7, 0xc5, 0x68, 0x01, 0x20, 0x08,
+ 0xc4, 0x6d, 0xc8, 0x01, 0x21, 0x0b, 0x02, 0x3b, 0x0d, 0x4d, 0x7a, 0xb2,
+ 0x42, 0x3b, 0x13, 0xc5, 0xdf, 0xf9, 0x01, 0x21, 0x21, 0xd2, 0x4e, 0xfc,
+ 0x01, 0x20, 0xa8, 0x45, 0x03, 0x71, 0xc2, 0x3b, 0x23, 0xc5, 0xdb, 0x2b,
+ 0x01, 0x20, 0x28, 0x49, 0xb4, 0x02, 0xc2, 0x3b, 0x2d, 0xc2, 0x07, 0x6e,
+ 0x00, 0x39, 0x08, 0x46, 0xd1, 0x36, 0x42, 0x3b, 0x55, 0xc2, 0x3c, 0xd1,
+ 0x00, 0x39, 0x61, 0xc3, 0x1e, 0x54, 0x00, 0x38, 0xda, 0x02, 0x3b, 0x67,
+ 0xc3, 0x11, 0x40, 0x00, 0x39, 0x59, 0xc4, 0xe2, 0x61, 0x00, 0x39, 0x41,
+ 0xc6, 0x7a, 0xe0, 0x00, 0x39, 0x19, 0xd0, 0x5d, 0xdf, 0x00, 0x38, 0x89,
+ 0x47, 0xc7, 0x9f, 0x42, 0x3b, 0x6d, 0xc3, 0x07, 0xda, 0x00, 0x39, 0x51,
+ 0xca, 0xaa, 0x60, 0x00, 0x39, 0x38, 0xc3, 0x10, 0x29, 0x00, 0x38, 0xf0,
+ 0xc2, 0x01, 0x47, 0x00, 0x38, 0xd0, 0xd2, 0x4c, 0x08, 0x00, 0x38, 0xb1,
+ 0xc5, 0x4c, 0x10, 0x00, 0x38, 0xa8, 0xc9, 0xab, 0xb6, 0x00, 0x38, 0xa0,
+ 0x42, 0x00, 0xee, 0xc2, 0x3b, 0x7f, 0xce, 0x75, 0x56, 0x00, 0x39, 0xe0,
+ 0xca, 0x9d, 0xcc, 0x00, 0x38, 0x69, 0xc9, 0xb4, 0xbf, 0x00, 0x38, 0x61,
+ 0xc6, 0xb4, 0xc2, 0x00, 0x38, 0x58, 0xc5, 0x00, 0x34, 0x00, 0x39, 0xb9,
+ 0xc5, 0x03, 0x50, 0x00, 0x39, 0xb0, 0xc5, 0x00, 0x47, 0x00, 0x38, 0x39,
+ 0xc4, 0x00, 0xcd, 0x00, 0x38, 0x30, 0xc5, 0x34, 0x9a, 0x00, 0x38, 0x23,
+ 0x02, 0x3b, 0x8b, 0xc9, 0x11, 0x47, 0x00, 0x38, 0x10, 0xc5, 0x34, 0x9a,
+ 0x00, 0x38, 0x1b, 0x02, 0x3b, 0x91, 0xc9, 0x11, 0x47, 0x00, 0x38, 0x08,
+ 0xc5, 0x03, 0x50, 0x00, 0x39, 0xe9, 0xc5, 0x00, 0x34, 0x00, 0x39, 0xf0,
+ 0xc5, 0x03, 0x50, 0x00, 0x3a, 0x19, 0xc5, 0x00, 0x34, 0x00, 0x3a, 0x20,
+ 0xc5, 0x03, 0x50, 0x00, 0x3a, 0x29, 0xc5, 0x00, 0x34, 0x00, 0x3a, 0x30,
+ 0xc2, 0x03, 0x76, 0x05, 0x40, 0x89, 0x91, 0x05, 0x40, 0x80, 0x91, 0x05,
+ 0x40, 0x91, 0xc2, 0x03, 0x76, 0x05, 0x40, 0x98, 0x46, 0x01, 0xab, 0x42,
+ 0x3b, 0x97, 0x00, 0x42, 0x3b, 0xa9, 0xcf, 0x09, 0x58, 0x01, 0x4b, 0xd9,
+ 0x42, 0x06, 0x82, 0x42, 0x3b, 0xbe, 0xc3, 0x05, 0xe3, 0x01, 0x55, 0xf1,
+ 0xcf, 0x65, 0xfa, 0x01, 0x56, 0x01, 0xd9, 0x20, 0x32, 0x01, 0x56, 0x10,
+ 0xc6, 0x0d, 0xd1, 0x01, 0x56, 0xb9, 0xde, 0x0d, 0xb9, 0x01, 0x56, 0xc0,
+ 0x52, 0x48, 0xde, 0xc2, 0x3b, 0xca, 0xcf, 0x1f, 0x20, 0x01, 0x03, 0xe8,
+ 0xca, 0x0e, 0xbd, 0x01, 0x03, 0xe1, 0xc4, 0x00, 0x48, 0x01, 0x03, 0xc0,
+ 0xc4, 0x15, 0xa7, 0x01, 0x03, 0xb9, 0xc2, 0x22, 0x45, 0x01, 0x03, 0xb0,
+ 0xc3, 0x0d, 0x8f, 0x01, 0x03, 0xa9, 0xc3, 0x08, 0xde, 0x01, 0x03, 0xa0,
+ 0xc2, 0x0a, 0x20, 0x00, 0x05, 0x91, 0xc4, 0x05, 0xde, 0x00, 0x05, 0x98,
+ 0xc6, 0xd3, 0xb2, 0x00, 0xe6, 0x11, 0xc7, 0xc4, 0xa4, 0x00, 0xe6, 0x08,
+ 0x45, 0x24, 0x06, 0xc2, 0x3b, 0xd2, 0x83, 0x00, 0xdc, 0xb0, 0xc8, 0xb3,
+ 0xb1, 0x00, 0xdd, 0xe9, 0x87, 0x00, 0xdc, 0x30, 0xc2, 0x01, 0x0e, 0x00,
+ 0xdd, 0xe9, 0x83, 0x00, 0xdc, 0xc0, 0xc2, 0x0c, 0xfe, 0x00, 0xdd, 0xe1,
+ 0x83, 0x00, 0xdc, 0xe0, 0xc2, 0x0c, 0xfe, 0x00, 0xdd, 0xd9, 0x83, 0x00,
+ 0xdc, 0xd8, 0xc2, 0x1a, 0x36, 0x00, 0xdd, 0x79, 0x83, 0x00, 0xdc, 0xf0,
+ 0xc2, 0x01, 0x0e, 0x00, 0xdd, 0x71, 0x83, 0x00, 0xdc, 0x50, 0x83, 0x00,
+ 0xdc, 0xa1, 0xc2, 0x1a, 0x36, 0x00, 0xdc, 0x89, 0xc2, 0x07, 0x69, 0x00,
+ 0xdc, 0x68, 0x97, 0x00, 0xdc, 0x48, 0xc4, 0x15, 0xa7, 0x00, 0xdd, 0xb9,
+ 0xc2, 0x22, 0x45, 0x00, 0xdd, 0xb0, 0xc3, 0x0d, 0x8f, 0x00, 0xdd, 0xa9,
+ 0xc3, 0x08, 0xde, 0x00, 0xdd, 0xa0, 0xc4, 0x05, 0xde, 0x00, 0xdd, 0x99,
+ 0xc2, 0x0a, 0x20, 0x00, 0xdd, 0x90, 0xc2, 0x00, 0x4c, 0x00, 0xdd, 0x69,
+ 0xc2, 0x00, 0x3f, 0x00, 0xdd, 0x60, 0xc3, 0xcf, 0x23, 0x00, 0xdd, 0x19,
+ 0xc4, 0x8b, 0xf9, 0x00, 0xdd, 0x10, 0xc5, 0xe2, 0xf6, 0x00, 0xdd, 0x51,
+ 0x10, 0x42, 0x3b, 0xda, 0xc7, 0xca, 0x23, 0x00, 0xdd, 0x49, 0xc5, 0x0c,
+ 0x33, 0x00, 0xdd, 0x39, 0xc7, 0xc6, 0x10, 0x00, 0xdd, 0x31, 0xc4, 0xe4,
+ 0xbb, 0x00, 0xdd, 0x29, 0xc5, 0xde, 0x1e, 0x00, 0xdd, 0x20, 0xcb, 0x0e,
+ 0xbc, 0x01, 0x55, 0x81, 0xcc, 0x23, 0x30, 0x01, 0x55, 0x90, 0xc8, 0x08,
+ 0xff, 0x01, 0x55, 0xb1, 0xcf, 0x66, 0x45, 0x01, 0x55, 0xd0, 0xd1, 0x57,
+ 0x30, 0x01, 0x14, 0x51, 0xcb, 0x25, 0x71, 0x01, 0x14, 0x33, 0x02, 0x3b,
+ 0xe4, 0x46, 0x03, 0x50, 0x42, 0x3b, 0xea, 0xc6, 0x2d, 0xdf, 0x01, 0x56,
+ 0x99, 0xc4, 0x0d, 0xd3, 0x01, 0x56, 0xa8, 0xca, 0x23, 0x02, 0x0f, 0xb0,
+ 0x1b, 0x02, 0x3c, 0x02, 0x0a, 0xc2, 0x3c, 0x08, 0x15, 0xc2, 0x3c, 0x1a,
+ 0xc4, 0x24, 0xec, 0x0f, 0xcb, 0x90, 0xca, 0x23, 0x02, 0x0f, 0xb1, 0xd1,
+ 0xd1, 0x4f, 0xd1, 0x0f, 0xb1, 0xd8, 0x47, 0xce, 0xa6, 0xc2, 0x3c, 0x29,
+ 0x42, 0x00, 0xec, 0xc2, 0x3c, 0x35, 0xc3, 0x0c, 0x34, 0x07, 0xf2, 0xa8,
+ 0xc9, 0x86, 0xa3, 0x01, 0x10, 0x53, 0x02, 0x3c, 0x3f, 0xcf, 0x0f, 0x63,
+ 0x07, 0xf2, 0xb9, 0xc6, 0xb5, 0x2e, 0x07, 0xf2, 0xc1, 0xca, 0x0e, 0xbd,
+ 0x07, 0xf3, 0x30, 0x4d, 0x7c, 0x1e, 0xc2, 0x3c, 0x45, 0x45, 0x01, 0x18,
+ 0xc2, 0x3c, 0x64, 0xce, 0x6a, 0x15, 0x07, 0xf3, 0x40, 0xe0, 0x0b, 0xe7,
+ 0x08, 0x59, 0xd9, 0xc4, 0x1f, 0x02, 0x00, 0x16, 0xe0, 0xc7, 0x2f, 0x38,
+ 0x0f, 0xb7, 0x49, 0xc8, 0x32, 0x18, 0x07, 0xf3, 0x01, 0xc7, 0x0a, 0x60,
+ 0x07, 0xf3, 0x08, 0x43, 0x00, 0xcf, 0xc2, 0x3c, 0x76, 0xcc, 0x8e, 0xe0,
+ 0x07, 0xf3, 0x20, 0xc8, 0x65, 0xfb, 0x07, 0xf3, 0x11, 0xcb, 0x97, 0x11,
+ 0x07, 0xf3, 0x50, 0xc3, 0x0a, 0x1f, 0x00, 0x04, 0x91, 0xc3, 0x05, 0x17,
+ 0x00, 0x04, 0x88, 0xc5, 0xdb, 0x80, 0x0f, 0xad, 0xb0, 0xca, 0x37, 0x20,
+ 0x01, 0x13, 0xf1, 0xc5, 0x09, 0x02, 0x01, 0x13, 0xe0, 0x4c, 0x23, 0x24,
+ 0xc2, 0x3c, 0x88, 0xcb, 0x0e, 0xbc, 0x01, 0x55, 0x99, 0x44, 0x1f, 0xeb,
+ 0xc2, 0x3c, 0x94, 0xcf, 0x66, 0x45, 0x01, 0x55, 0xb8, 0xc3, 0x0c, 0x34,
+ 0x07, 0xf0, 0x99, 0xc3, 0x00, 0xec, 0x07, 0xf0, 0x80, 0xcf, 0x0f, 0x63,
+ 0x07, 0xf0, 0xa9, 0xc6, 0xb5, 0x2e, 0x07, 0xf1, 0x89, 0xc6, 0xd1, 0x7e,
+ 0x07, 0xf1, 0x90, 0x44, 0x00, 0xce, 0xc2, 0x3c, 0xa0, 0xc7, 0x7c, 0x8c,
+ 0x07, 0xf1, 0x98, 0xcb, 0x1c, 0xe0, 0x07, 0xf1, 0xb1, 0x05, 0xc2, 0x3c,
+ 0xce, 0xd6, 0x0a, 0xe8, 0x07, 0xf1, 0xd1, 0xd8, 0x23, 0x3c, 0x07, 0xf1,
+ 0xe1, 0xd4, 0x3a, 0x82, 0x07, 0xf1, 0xf1, 0xce, 0x26, 0x2e, 0x07, 0xf2,
+ 0x41, 0x46, 0x01, 0x31, 0xc2, 0x3c, 0xda, 0xcd, 0x0f, 0x83, 0x07, 0xf2,
+ 0x00, 0xc5, 0x00, 0xea, 0x07, 0xf0, 0x89, 0xc9, 0x11, 0x47, 0x07, 0xf0,
+ 0x90, 0xc3, 0x00, 0xda, 0x0f, 0x85, 0x01, 0xca, 0x9d, 0x2c, 0x0f, 0x86,
+ 0x78, 0xc6, 0xd7, 0xa2, 0x0f, 0x85, 0x09, 0xc6, 0x7c, 0x59, 0x0f, 0x85,
+ 0x89, 0xc8, 0xba, 0x2b, 0x0f, 0x86, 0x09, 0xc5, 0xdd, 0xe2, 0x0f, 0x86,
+ 0x88, 0x46, 0xd1, 0x48, 0xc2, 0x3c, 0xe6, 0x48, 0xc0, 0x13, 0xc2, 0x3c,
+ 0xfe, 0x46, 0xad, 0xb1, 0xc2, 0x3d, 0x16, 0x43, 0x1f, 0x85, 0x42, 0x3d,
+ 0x2e, 0x11, 0xc2, 0x3d, 0x6a, 0x47, 0x9a, 0x75, 0x42, 0x3d, 0x76, 0x46,
+ 0xd8, 0x86, 0xc2, 0x3d, 0x8e, 0x48, 0xba, 0xd3, 0x42, 0x3d, 0xa6, 0xc6,
+ 0xd7, 0xa2, 0x0f, 0x85, 0x41, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xc1, 0xc8,
+ 0xba, 0x2b, 0x0f, 0x86, 0x41, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xc0, 0xc6,
+ 0xd7, 0xa2, 0x0f, 0x85, 0x49, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xc9, 0xc8,
+ 0xba, 0x2b, 0x0f, 0x86, 0x49, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xc8, 0xc6,
+ 0xd7, 0xa2, 0x0f, 0x85, 0x59, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xd9, 0xc8,
+ 0xba, 0x2b, 0x0f, 0x86, 0x59, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xd8, 0x49,
+ 0xb2, 0x1c, 0xc2, 0x3d, 0xbe, 0x47, 0x36, 0x09, 0x42, 0x3d, 0xd6, 0xc6,
+ 0xd7, 0xa2, 0x0f, 0x85, 0x69, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xe9, 0xc8,
+ 0xba, 0x2b, 0x0f, 0x86, 0x69, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xe8, 0xc2,
+ 0x0a, 0x20, 0x01, 0x5e, 0x99, 0xc4, 0x05, 0xde, 0x01, 0x5e, 0xa0, 0xc3,
+ 0x08, 0xde, 0x01, 0x5e, 0xa9, 0xc3, 0x0d, 0x8f, 0x01, 0x5e, 0xb0, 0xc8,
+ 0x00, 0x52, 0x01, 0x51, 0xc9, 0xd1, 0x55, 0x21, 0x01, 0x51, 0x71, 0xd0,
+ 0x5c, 0xcf, 0x01, 0x51, 0x68, 0xce, 0x6d, 0xbc, 0x01, 0x51, 0x41, 0x15,
+ 0xc2, 0x3d, 0xee, 0x46, 0x35, 0xda, 0xc2, 0x3d, 0xfa, 0xc9, 0x0d, 0xd7,
+ 0x01, 0x51, 0x29, 0xd7, 0x27, 0xde, 0x01, 0x51, 0x18, 0xcb, 0x93, 0xb7,
+ 0x0f, 0xae, 0xf9, 0xc3, 0x00, 0xd3, 0x0f, 0xa6, 0x18, 0x45, 0x03, 0x5d,
+ 0xc2, 0x3e, 0x06, 0xcc, 0x41, 0x54, 0x01, 0x10, 0x10, 0x9c, 0x01, 0x25,
+ 0xa9, 0x9b, 0x01, 0x25, 0xa1, 0x9a, 0x01, 0x25, 0x99, 0x99, 0x01, 0x25,
+ 0x91, 0x98, 0x01, 0x25, 0x89, 0x97, 0x01, 0x25, 0x81, 0x96, 0x01, 0x25,
+ 0x79, 0x95, 0x01, 0x25, 0x71, 0x94, 0x01, 0x25, 0x69, 0x93, 0x01, 0x25,
+ 0x61, 0x92, 0x01, 0x25, 0x59, 0x91, 0x01, 0x25, 0x51, 0x90, 0x01, 0x25,
+ 0x49, 0x8f, 0x01, 0x25, 0x41, 0x8e, 0x01, 0x25, 0x39, 0x8d, 0x01, 0x25,
+ 0x31, 0x8c, 0x01, 0x25, 0x29, 0x8b, 0x01, 0x25, 0x21, 0x8a, 0x01, 0x25,
+ 0x19, 0x89, 0x01, 0x25, 0x11, 0x88, 0x01, 0x25, 0x09, 0x87, 0x01, 0x25,
+ 0x01, 0x86, 0x01, 0x24, 0xf9, 0x85, 0x01, 0x24, 0xf1, 0x84, 0x01, 0x24,
+ 0xe9, 0x83, 0x01, 0x24, 0xe0, 0x99, 0x0f, 0x89, 0x31, 0x9a, 0x0f, 0x89,
+ 0x39, 0x9b, 0x0f, 0x89, 0x41, 0x9c, 0x0f, 0x89, 0x49, 0x83, 0x0f, 0x88,
+ 0x81, 0x84, 0x0f, 0x88, 0x89, 0x85, 0x0f, 0x88, 0x91, 0x86, 0x0f, 0x88,
+ 0x99, 0x87, 0x0f, 0x88, 0xa1, 0x88, 0x0f, 0x88, 0xa9, 0x89, 0x0f, 0x88,
+ 0xb1, 0x8a, 0x0f, 0x88, 0xb9, 0x8b, 0x0f, 0x88, 0xc1, 0x8c, 0x0f, 0x88,
+ 0xc9, 0x8d, 0x0f, 0x88, 0xd1, 0x8e, 0x0f, 0x88, 0xd9, 0x8f, 0x0f, 0x88,
+ 0xe1, 0x90, 0x0f, 0x88, 0xe9, 0x91, 0x0f, 0x88, 0xf1, 0x92, 0x0f, 0x88,
+ 0xf9, 0x93, 0x0f, 0x89, 0x01, 0x94, 0x0f, 0x89, 0x09, 0x95, 0x0f, 0x89,
+ 0x11, 0x96, 0x0f, 0x89, 0x19, 0x97, 0x0f, 0x89, 0x21, 0x98, 0x0f, 0x89,
+ 0x28, 0x42, 0x00, 0xc9, 0xc2, 0x3e, 0x1e, 0xc7, 0x55, 0xba, 0x01, 0x24,
+ 0x01, 0xc2, 0x01, 0x04, 0x01, 0x23, 0xe8, 0xc7, 0x1f, 0xd9, 0x01, 0x24,
+ 0x29, 0xc5, 0x66, 0x81, 0x01, 0x23, 0xf0, 0xc8, 0x4c, 0x2c, 0x01, 0x24,
+ 0x21, 0xc6, 0x42, 0xe9, 0x01, 0x24, 0x18, 0xc6, 0x12, 0xf0, 0x01, 0x24,
+ 0x11, 0xc7, 0x52, 0xbd, 0x01, 0x24, 0x08, 0xc4, 0x15, 0xa7, 0x01, 0x23,
+ 0xd1, 0xc2, 0x22, 0x45, 0x01, 0x23, 0xc8, 0xc3, 0x0d, 0x8f, 0x01, 0x23,
+ 0xc1, 0xc3, 0x08, 0xde, 0x01, 0x23, 0xb8, 0xc4, 0x05, 0xde, 0x01, 0x23,
+ 0xb1, 0xc2, 0x0a, 0x20, 0x01, 0x23, 0xa8, 0xc5, 0x7f, 0x3f, 0x01, 0x90,
+ 0x03, 0x02, 0x3e, 0x2a, 0xc6, 0xae, 0x92, 0x01, 0x90, 0x52, 0x02, 0x3e,
+ 0x30, 0xc2, 0x00, 0x39, 0x01, 0x90, 0x78, 0xc5, 0xba, 0x7e, 0x01, 0x90,
+ 0x13, 0x02, 0x3e, 0x36, 0xc6, 0xae, 0x6e, 0x01, 0x90, 0x5a, 0x02, 0x3e,
+ 0x3c, 0xc2, 0x00, 0x39, 0x01, 0x90, 0x88, 0xc2, 0x00, 0x39, 0x01, 0x90,
+ 0x90, 0xc4, 0x68, 0xc6, 0x01, 0x90, 0x2b, 0x02, 0x3e, 0x42, 0xc6, 0xae,
+ 0x80, 0x01, 0x90, 0x62, 0x02, 0x3e, 0x48, 0xc2, 0x00, 0x39, 0x01, 0x90,
+ 0xa0, 0xc2, 0x00, 0x39, 0x01, 0x90, 0xa8, 0xc4, 0xb1, 0xd8, 0x01, 0x90,
+ 0x43, 0x02, 0x3e, 0x4e, 0xc6, 0xb1, 0xd7, 0x01, 0x90, 0x4a, 0x02, 0x3e,
+ 0x52, 0xc2, 0x00, 0x39, 0x01, 0x90, 0xd8, 0xc2, 0x0a, 0x20, 0x01, 0x91,
+ 0x09, 0xc4, 0x05, 0xde, 0x01, 0x91, 0x11, 0xc2, 0x01, 0x04, 0x01, 0x91,
+ 0x48, 0xc3, 0x08, 0xde, 0x01, 0x91, 0x19, 0x0b, 0xc2, 0x3e, 0x58, 0xc7,
+ 0xce, 0xad, 0x01, 0x92, 0x00, 0xc2, 0x22, 0x45, 0x01, 0x91, 0x29, 0x07,
+ 0xc2, 0x3e, 0x6a, 0x17, 0xc2, 0x3e, 0x76, 0x16, 0xc2, 0x3e, 0x80, 0xc6,
+ 0xd2, 0x26, 0x01, 0x91, 0x99, 0xc6, 0xd9, 0x6a, 0x01, 0x91, 0xa8, 0xc4,
+ 0x00, 0x48, 0x01, 0x91, 0x39, 0xc4, 0x67, 0xe5, 0x01, 0x91, 0x79, 0xc9,
+ 0xad, 0x8a, 0x01, 0x91, 0xe8, 0xc3, 0x03, 0x5e, 0x01, 0x91, 0x41, 0xc3,
+ 0x01, 0x02, 0x01, 0x91, 0xa0, 0xc3, 0x02, 0x14, 0x01, 0x91, 0x51, 0xc4,
+ 0x0b, 0x47, 0x01, 0x91, 0x70, 0xc4, 0x00, 0x97, 0x01, 0x91, 0x61, 0xc3,
+ 0x29, 0xec, 0x01, 0x91, 0x68, 0xcd, 0x7c, 0xd4, 0x01, 0x91, 0xb9, 0xc3,
+ 0x00, 0x2f, 0x01, 0x91, 0xd0, 0xc7, 0x7b, 0x61, 0x01, 0x91, 0xc9, 0x15,
+ 0xc2, 0x3e, 0x8c, 0xc3, 0x28, 0xde, 0x01, 0x92, 0x18, 0xd1, 0x03, 0x76,
+ 0x01, 0x57, 0x91, 0xce, 0x35, 0xda, 0x01, 0x57, 0x98, 0xc5, 0x2a, 0xb4,
+ 0x08, 0xd7, 0xb9, 0xc4, 0x0c, 0x34, 0x08, 0xd7, 0x9a, 0x02, 0x3e, 0x96,
+ 0x45, 0x24, 0x06, 0xc2, 0x3e, 0x9c, 0x83, 0x08, 0xd6, 0x98, 0x83, 0x08,
+ 0xd6, 0xd8, 0x83, 0x08, 0xd6, 0xd0, 0xc2, 0x01, 0x0e, 0x08, 0xd6, 0xa1,
+ 0x83, 0x08, 0xd6, 0x68, 0xc2, 0x01, 0x0e, 0x08, 0xd6, 0x89, 0x83, 0x08,
+ 0xd6, 0x00, 0x83, 0x08, 0xd6, 0x81, 0xc2, 0x07, 0x69, 0x08, 0xd6, 0x38,
+ 0xc2, 0x01, 0x0e, 0x08, 0xd6, 0x79, 0x83, 0x08, 0xd6, 0x70, 0xc2, 0x01,
+ 0x0e, 0x08, 0xd6, 0x51, 0x83, 0x08, 0xd6, 0x08, 0xc5, 0x2a, 0xb4, 0x08,
+ 0xd7, 0x71, 0xc4, 0x0c, 0x34, 0x08, 0xd7, 0x5a, 0x02, 0x3e, 0xbf, 0xc6,
+ 0x2a, 0xb3, 0x08, 0xd7, 0x40, 0x16, 0xc2, 0x3e, 0xc5, 0x08, 0xc2, 0x3e,
+ 0xd5, 0xc3, 0x05, 0x17, 0x08, 0x43, 0xc8, 0xd3, 0x43, 0x15, 0x08, 0x43,
+ 0xb9, 0x45, 0x02, 0x01, 0x42, 0x3e, 0xe1, 0xc2, 0x3e, 0xa5, 0x0b, 0x5c,
+ 0x79, 0xc2, 0x1a, 0x37, 0x0b, 0x5c, 0x50, 0xc2, 0x04, 0xa6, 0x0b, 0x5c,
+ 0x71, 0xc3, 0x99, 0x98, 0x0b, 0x5c, 0x41, 0xc2, 0x02, 0x14, 0x0b, 0x5c,
+ 0x10, 0x15, 0xc2, 0x3f, 0x4a, 0xc3, 0xed, 0x0b, 0x0b, 0x5c, 0x28, 0xc2,
+ 0x1a, 0x37, 0x0b, 0x5c, 0x61, 0xc3, 0xe6, 0x55, 0x0b, 0x5b, 0xf0, 0x8f,
+ 0x0b, 0x5c, 0x49, 0xc2, 0x3e, 0xa5, 0x0b, 0x5c, 0x18, 0xc3, 0xeb, 0x70,
+ 0x0b, 0x5c, 0x01, 0xc2, 0x00, 0xbe, 0x0b, 0x5b, 0xf8, 0xc2, 0x20, 0xa8,
+ 0x0b, 0x59, 0x79, 0xc3, 0x53, 0x44, 0x0b, 0x59, 0x38, 0xc2, 0x20, 0xa8,
+ 0x0b, 0x59, 0x61, 0x16, 0xc2, 0x3f, 0x5c, 0xc4, 0xe7, 0x27, 0x0b, 0x59,
+ 0x41, 0xc3, 0xcb, 0x56, 0x0b, 0x59, 0x11, 0xc3, 0x20, 0xa7, 0x0b, 0x59,
+ 0x00, 0xc3, 0x56, 0x86, 0x0b, 0x59, 0x49, 0xc3, 0x20, 0xa7, 0x0b, 0x59,
+ 0x29, 0xc2, 0x20, 0xa8, 0x0b, 0x58, 0xf8, 0xc3, 0xeb, 0xca, 0x0b, 0x5b,
+ 0xa3, 0x02, 0x3f, 0x68, 0xc7, 0xce, 0x91, 0x0b, 0x5a, 0x28, 0xca, 0xaa,
+ 0x1a, 0x0b, 0x5b, 0x99, 0xc4, 0x14, 0xf2, 0x0b, 0x59, 0xc8, 0xc5, 0xde,
+ 0x41, 0x0b, 0x5b, 0x71, 0xc4, 0xe4, 0xab, 0x0b, 0x5a, 0x08, 0xc2, 0x02,
+ 0x14, 0x0b, 0x5b, 0x21, 0x44, 0x1a, 0x6b, 0x42, 0x3f, 0x6e, 0x0a, 0xc2,
+ 0x3f, 0x86, 0xc9, 0xb7, 0x35, 0x0b, 0x59, 0xc0, 0x00, 0xc2, 0x3f, 0x92,
+ 0x95, 0x0b, 0x5a, 0xd8, 0x98, 0x0b, 0x58, 0xd9, 0x84, 0x0b, 0x58, 0xd0,
+ 0x98, 0x0b, 0x58, 0x79, 0x84, 0x0b, 0x58, 0x70, 0x98, 0x0b, 0x58, 0x59,
+ 0x84, 0x0b, 0x58, 0x50, 0x98, 0x0b, 0x58, 0x29, 0x84, 0x0b, 0x58, 0x20,
+ 0x98, 0x0b, 0x58, 0xa9, 0x84, 0x0b, 0x58, 0xa0, 0x98, 0x0b, 0x58, 0x69,
+ 0x84, 0x0b, 0x58, 0x60, 0x98, 0x0b, 0x58, 0x89, 0x84, 0x0b, 0x58, 0x80,
+ 0x98, 0x0b, 0x58, 0x09, 0x84, 0x0b, 0x58, 0x00, 0xc5, 0x10, 0x15, 0x01,
+ 0x81, 0x00, 0x45, 0x01, 0xac, 0xc2, 0x3f, 0x9e, 0xc8, 0x7e, 0x0c, 0x0f,
+ 0xb2, 0x69, 0x14, 0xc2, 0x3f, 0xba, 0xcd, 0x7f, 0x6b, 0x0f, 0xb2, 0x39,
+ 0xcf, 0x60, 0xff, 0x0f, 0xc9, 0xc1, 0x43, 0x01, 0xf4, 0xc2, 0x3f, 0xc0,
+ 0xc8, 0xc0, 0x93, 0x0f, 0xce, 0xb8, 0xc4, 0x05, 0xde, 0x08, 0x48, 0xd9,
+ 0x19, 0xc2, 0x3f, 0xcc, 0xc2, 0x01, 0x04, 0x08, 0x48, 0xb8, 0xc8, 0x0d,
+ 0x7e, 0x08, 0x48, 0xc8, 0xc2, 0x20, 0xa8, 0x08, 0x48, 0xa9, 0xc2, 0x00,
+ 0xdd, 0x08, 0x48, 0x40, 0xc3, 0x11, 0x40, 0x08, 0x48, 0xa1, 0xc3, 0x00,
+ 0x5d, 0x08, 0x48, 0x89, 0xc3, 0x7a, 0xe0, 0x08, 0x48, 0x70, 0xc2, 0x01,
+ 0x5b, 0x08, 0x48, 0x79, 0xc2, 0x01, 0x1f, 0x08, 0x48, 0x00, 0x96, 0x08,
+ 0x48, 0x38, 0x83, 0x05, 0x42, 0x01, 0xc2, 0x01, 0x0e, 0x05, 0x42, 0x08,
+ 0x83, 0x05, 0x42, 0x11, 0xc2, 0x07, 0x69, 0x05, 0x43, 0x28, 0xc2, 0x07,
+ 0x69, 0x05, 0x42, 0x19, 0xc2, 0x1a, 0x36, 0x05, 0x42, 0x39, 0x83, 0x05,
+ 0x42, 0x59, 0xc2, 0x01, 0x01, 0x05, 0x43, 0x60, 0x83, 0x05, 0x42, 0x23,
+ 0x02, 0x3f, 0xd6, 0xc2, 0x01, 0x0e, 0x05, 0x42, 0x28, 0x83, 0x05, 0x42,
+ 0x41, 0xc2, 0x01, 0x0e, 0x05, 0x42, 0x49, 0x15, 0xc2, 0x3f, 0xdc, 0x16,
+ 0x42, 0x3f, 0xe6, 0x83, 0x05, 0x42, 0x51, 0xc2, 0x05, 0x5c, 0x05, 0x42,
+ 0x91, 0xc2, 0x0c, 0x25, 0x05, 0x43, 0x58, 0x83, 0x05, 0x42, 0x61, 0xc2,
+ 0x01, 0x0e, 0x05, 0x42, 0x68, 0xc2, 0x01, 0x0e, 0x05, 0x42, 0xa1, 0x83,
+ 0x05, 0x42, 0xa8, 0xc6, 0x21, 0x2d, 0x05, 0x42, 0xb1, 0xc2, 0x01, 0x0e,
+ 0x05, 0x42, 0xd1, 0x83, 0x05, 0x42, 0xd8, 0xcb, 0x92, 0xd0, 0x05, 0x43,
+ 0x69, 0xcb, 0x94, 0x67, 0x05, 0x43, 0x80, 0x87, 0x05, 0x43, 0x30, 0xc8,
+ 0xbd, 0xab, 0x05, 0x43, 0x71, 0xc4, 0x0c, 0x0b, 0x05, 0x43, 0x78, 0x4f,
+ 0x5d, 0xd0, 0xc2, 0x3f, 0xf0, 0xd2, 0x4d, 0x82, 0x05, 0x43, 0x90, 0xc9,
+ 0xaf, 0xee, 0x08, 0x0e, 0x89, 0xc8, 0xba, 0x23, 0x08, 0x0f, 0x90, 0xc5,
+ 0x67, 0xde, 0x08, 0x0e, 0x99, 0xcd, 0x7c, 0x04, 0x08, 0x0f, 0x11, 0x96,
+ 0x08, 0x0f, 0x60, 0xc2, 0x01, 0xb3, 0x08, 0x0f, 0x23, 0x02, 0x40, 0x02,
+ 0xc4, 0xe6, 0x2f, 0x08, 0x0f, 0x30, 0x99, 0x08, 0x0e, 0xd1, 0xc7, 0xc5,
+ 0x76, 0x08, 0x0f, 0x08, 0xc4, 0x93, 0x06, 0x08, 0x0f, 0x38, 0xc3, 0x1e,
+ 0x70, 0x08, 0x0e, 0xd9, 0x92, 0x08, 0x0f, 0x40, 0xc5, 0xe0, 0xb7, 0x08,
+ 0xd8, 0x49, 0xd8, 0x24, 0x2c, 0x08, 0xd8, 0x41, 0x48, 0x32, 0xe6, 0xc2,
+ 0x40, 0x08, 0xce, 0x6f, 0x0c, 0x08, 0xd8, 0x23, 0x02, 0x40, 0x14, 0xc5,
+ 0x24, 0x3f, 0x08, 0xd8, 0x13, 0x02, 0x40, 0x1a, 0x42, 0x02, 0x52, 0xc2,
+ 0x40, 0x20, 0x03, 0xc2, 0x40, 0x2c, 0xc5, 0x33, 0x1a, 0x00, 0x49, 0xe1,
+ 0xcb, 0x21, 0x1a, 0x00, 0x48, 0x0b, 0x02, 0x40, 0x38, 0xd4, 0x39, 0x56,
+ 0x00, 0x48, 0x01, 0x15, 0xc2, 0x40, 0x3c, 0xc8, 0xc0, 0xbb, 0x05, 0x47,
+ 0xc1, 0xd9, 0x21, 0x13, 0x05, 0x47, 0xa1, 0xd0, 0x5e, 0x9f, 0x00, 0x4b,
+ 0x88, 0xc8, 0x73, 0x9c, 0x00, 0x4a, 0x91, 0xc6, 0x73, 0x9e, 0x00, 0x4a,
+ 0x88, 0xd0, 0x58, 0xcf, 0x08, 0xd8, 0x08, 0x99, 0x00, 0x4a, 0x79, 0x97,
+ 0x00, 0x4a, 0x61, 0x8b, 0x00, 0x4a, 0x41, 0x83, 0x00, 0x49, 0xf1, 0x9b,
+ 0x05, 0x47, 0xf8, 0xc2, 0x09, 0x06, 0x00, 0x49, 0xd9, 0x87, 0x00, 0x49,
+ 0xd0, 0x91, 0x00, 0x4a, 0x51, 0x87, 0x00, 0x4a, 0x30, 0x91, 0x00, 0x4a,
+ 0x49, 0x87, 0x00, 0x4a, 0x29, 0xc6, 0xd7, 0x06, 0x00, 0x4a, 0xa8, 0x94,
+ 0x00, 0x4a, 0x1b, 0x02, 0x40, 0x48, 0x8e, 0x00, 0x4b, 0x12, 0x02, 0x40,
+ 0x4c, 0x97, 0x00, 0x4a, 0x13, 0x02, 0x40, 0x50, 0x87, 0x00, 0x4a, 0xb0,
+ 0x8b, 0x00, 0x4a, 0x00, 0x83, 0x00, 0x49, 0xc9, 0xc7, 0xc6, 0xbf, 0x00,
+ 0x4b, 0xd0, 0x83, 0x00, 0x49, 0xc1, 0xc2, 0x0e, 0xe5, 0x00, 0x49, 0xb9,
+ 0x0a, 0x42, 0x40, 0x54, 0x83, 0x00, 0x49, 0xa9, 0x47, 0xb7, 0xd8, 0x42,
+ 0x40, 0x5e, 0x0e, 0xc2, 0x40, 0x6c, 0x83, 0x00, 0x49, 0x90, 0xc2, 0x00,
+ 0x9a, 0x00, 0x49, 0x89, 0x83, 0x00, 0x49, 0x81, 0xc2, 0x01, 0x0e, 0x00,
+ 0x4a, 0xe8, 0x83, 0x00, 0x49, 0x79, 0xc2, 0x1a, 0x36, 0x00, 0x4a, 0xf8,
+ 0xc9, 0xad, 0x15, 0x00, 0x4b, 0xc0, 0xc2, 0x01, 0x0e, 0x00, 0x49, 0x69,
+ 0x83, 0x00, 0x49, 0x61, 0xc2, 0x02, 0x1d, 0x00, 0x4b, 0xf8, 0xc2, 0x01,
+ 0x0e, 0x00, 0x49, 0x59, 0x83, 0x00, 0x49, 0x50, 0x10, 0xc2, 0x40, 0x76,
+ 0x83, 0x00, 0x49, 0x41, 0xc2, 0x1a, 0x36, 0x00, 0x48, 0xf1, 0xc2, 0x07,
+ 0x69, 0x00, 0x48, 0xc8, 0xc2, 0x01, 0x0e, 0x00, 0x49, 0x39, 0x83, 0x00,
+ 0x49, 0x31, 0x06, 0x42, 0x40, 0x80, 0xc2, 0x01, 0x0e, 0x00, 0x49, 0x29,
+ 0x83, 0x00, 0x49, 0x21, 0x16, 0x42, 0x40, 0x8e, 0xc2, 0x01, 0x0e, 0x00,
+ 0x48, 0xe9, 0x83, 0x00, 0x48, 0xe1, 0xc2, 0x26, 0x94, 0x00, 0x4b, 0xe0,
+ 0xc2, 0x01, 0x0e, 0x00, 0x48, 0xd9, 0x83, 0x00, 0x48, 0xd2, 0x02, 0x40,
+ 0x98, 0x0a, 0xc2, 0x40, 0x9e, 0x83, 0x00, 0x48, 0xb9, 0xc2, 0x07, 0x69,
+ 0x00, 0x4b, 0xd9, 0xcb, 0x26, 0x0d, 0x00, 0x4b, 0xe8, 0x0a, 0xc2, 0x40,
+ 0xa8, 0x83, 0x00, 0x48, 0xa8, 0x97, 0x00, 0x48, 0xa1, 0x8b, 0x00, 0x48,
+ 0x81, 0x83, 0x00, 0x48, 0x31, 0x9b, 0x05, 0x47, 0xf1, 0x99, 0x00, 0x4b,
+ 0xa8, 0x87, 0x00, 0x4b, 0x99, 0xc2, 0x09, 0x06, 0x00, 0x4b, 0xa0, 0x97,
+ 0x00, 0x48, 0x53, 0x02, 0x40, 0xb2, 0x87, 0x00, 0x4b, 0xb0, 0x8b, 0x00,
+ 0x48, 0x40, 0x83, 0x00, 0x4a, 0xd9, 0xc2, 0x01, 0x0e, 0x00, 0x4b, 0xc8,
+ 0xc4, 0x24, 0x35, 0x00, 0x4b, 0x79, 0xc5, 0x05, 0x1b, 0x00, 0x4b, 0x71,
+ 0x15, 0xc2, 0x40, 0xb6, 0x08, 0xc2, 0x40, 0xc2, 0x16, 0xc2, 0x40, 0xce,
+ 0xc3, 0x05, 0x17, 0x00, 0x4b, 0x39, 0xc4, 0x16, 0x57, 0x00, 0x4b, 0x30,
+ 0x45, 0x2c, 0x27, 0xc2, 0x40, 0xda, 0x46, 0x2f, 0xd9, 0xc2, 0x40, 0xf0,
+ 0xc2, 0x0c, 0x22, 0x08, 0x20, 0x61, 0x11, 0xc2, 0x41, 0x06, 0xc2, 0x05,
+ 0x06, 0x08, 0x20, 0x71, 0xc3, 0xe0, 0xa5, 0x08, 0x20, 0x79, 0x8a, 0x08,
+ 0x20, 0x81, 0xc3, 0x75, 0x8f, 0x08, 0x20, 0x89, 0xc3, 0xae, 0xd7, 0x08,
+ 0x20, 0x91, 0x16, 0xc2, 0x41, 0x0e, 0xc3, 0x7d, 0x6e, 0x08, 0x20, 0xa1,
+ 0xc4, 0x44, 0x07, 0x08, 0x20, 0xa9, 0xc3, 0x2f, 0xc8, 0x08, 0x20, 0xb1,
+ 0xc3, 0x73, 0x52, 0x08, 0x20, 0xb9, 0xc3, 0x8f, 0xce, 0x08, 0x20, 0xc1,
+ 0x07, 0xc2, 0x41, 0x1a, 0xc3, 0x35, 0x30, 0x08, 0x20, 0xd1, 0x1c, 0x42,
+ 0x41, 0x42, 0x45, 0x2c, 0x27, 0xc2, 0x41, 0x4e, 0x46, 0x2f, 0xd9, 0xc2,
+ 0x41, 0x64, 0xc2, 0x0c, 0x22, 0x08, 0x21, 0xa1, 0x11, 0xc2, 0x41, 0x7a,
+ 0xc2, 0x05, 0x06, 0x08, 0x21, 0xb1, 0xc3, 0xe0, 0xa5, 0x08, 0x21, 0xb9,
+ 0x8a, 0x08, 0x21, 0xc1, 0xc3, 0x75, 0x8f, 0x08, 0x21, 0xc9, 0xc3, 0xae,
+ 0xd7, 0x08, 0x21, 0xd1, 0x16, 0xc2, 0x41, 0x82, 0xc3, 0x7d, 0x6e, 0x08,
+ 0x21, 0xe1, 0xc4, 0x44, 0x07, 0x08, 0x21, 0xe9, 0xc3, 0x2f, 0xc8, 0x08,
+ 0x21, 0xf1, 0xc3, 0x73, 0x52, 0x08, 0x21, 0xf9, 0xc3, 0x8f, 0xce, 0x08,
+ 0x22, 0x01, 0x07, 0xc2, 0x41, 0x8e, 0xc3, 0x35, 0x30, 0x08, 0x22, 0x11,
+ 0x1c, 0x42, 0x41, 0xb6, 0xc4, 0x00, 0xcd, 0x01, 0x1e, 0x61, 0xc5, 0x00,
+ 0x47, 0x01, 0x1d, 0xf8, 0xc4, 0x00, 0xcd, 0x01, 0x1e, 0x59, 0xc5, 0x00,
+ 0x47, 0x01, 0x1d, 0xf0, 0xc4, 0x99, 0x0b, 0x0e, 0x98, 0x21, 0xc5, 0x6e,
+ 0x27, 0x0e, 0x98, 0x18, 0xc9, 0x11, 0x47, 0x01, 0x24, 0x81, 0xc5, 0x00,
+ 0xea, 0x0f, 0x88, 0x50, 0xc9, 0x11, 0x47, 0x01, 0x24, 0x79, 0xc5, 0x00,
+ 0xea, 0x0f, 0x88, 0x48, 0x00, 0x42, 0x41, 0xc2, 0x00, 0x42, 0x41, 0xce,
+ 0x00, 0x42, 0x41, 0xda, 0x00, 0x42, 0x41, 0xe6, 0x00, 0x42, 0x41, 0xf2,
+ 0x00, 0x42, 0x41, 0xfe, 0xc9, 0x11, 0x47, 0x01, 0x24, 0x41, 0xc5, 0x00,
+ 0xea, 0x0f, 0x88, 0x10, 0xc9, 0x11, 0x47, 0x0f, 0x88, 0x01, 0xc5, 0x00,
+ 0xea, 0x0f, 0x88, 0x08, 0xc4, 0x24, 0x35, 0x08, 0xca, 0xc9, 0xc5, 0x05,
+ 0x1b, 0x08, 0xca, 0xc1, 0x15, 0xc2, 0x42, 0x0a, 0x08, 0xc2, 0x42, 0x16,
+ 0x16, 0xc2, 0x42, 0x22, 0xc3, 0x05, 0x17, 0x08, 0xca, 0x89, 0xc4, 0x16,
+ 0x57, 0x08, 0xca, 0x80, 0x91, 0x08, 0xc9, 0xc1, 0x03, 0xc2, 0x42, 0x2e,
+ 0x87, 0x08, 0xc9, 0xa9, 0x97, 0x08, 0xc9, 0x9b, 0x02, 0x42, 0x36, 0x8b,
+ 0x08, 0xc9, 0x8a, 0x02, 0x42, 0x3a, 0xc2, 0x00, 0x96, 0x08, 0xc9, 0x71,
+ 0x83, 0x08, 0xc9, 0x40, 0x83, 0x08, 0xc9, 0x61, 0xc2, 0x0e, 0xe5, 0x08,
+ 0xc9, 0x59, 0xc2, 0x01, 0x0e, 0x08, 0xc9, 0x50, 0xc2, 0x1a, 0x36, 0x08,
+ 0xc9, 0x31, 0x83, 0x08, 0xc9, 0x28, 0xc2, 0x01, 0x0e, 0x08, 0xc9, 0x19,
+ 0x83, 0x08, 0xc9, 0x10, 0xc2, 0x01, 0x0e, 0x08, 0xc9, 0x09, 0x83, 0x08,
+ 0xc9, 0x00, 0x83, 0x08, 0xc8, 0xf9, 0xc2, 0x01, 0x01, 0x08, 0xc8, 0xd1,
+ 0xc2, 0x1a, 0x36, 0x08, 0xc8, 0xa9, 0xc2, 0x07, 0x69, 0x08, 0xc8, 0x80,
+ 0xc2, 0x01, 0x0e, 0x08, 0xc8, 0xf1, 0x83, 0x08, 0xc8, 0xe9, 0x06, 0x42,
+ 0x42, 0x3e, 0xc2, 0x01, 0x0e, 0x08, 0xc8, 0xe1, 0x83, 0x08, 0xc8, 0xd9,
+ 0xc2, 0x00, 0x44, 0x08, 0xc8, 0xb0, 0xc2, 0x01, 0x0e, 0x08, 0xc8, 0x91,
+ 0x83, 0x08, 0xc8, 0x88, 0xc2, 0x01, 0x0e, 0x08, 0xc8, 0x79, 0x83, 0x08,
+ 0xc8, 0x70, 0xc2, 0x01, 0x0e, 0x08, 0xc8, 0x69, 0x83, 0x08, 0xc8, 0x60,
+ 0x97, 0x08, 0xc8, 0x28, 0x8b, 0x08, 0xc8, 0x18, 0x83, 0x08, 0xc8, 0x08,
+ 0xc4, 0x02, 0xe3, 0x01, 0x10, 0xa9, 0xc3, 0x02, 0xcc, 0x00, 0x07, 0xb8,
+ 0xc4, 0x24, 0x35, 0x01, 0x3c, 0x91, 0xc5, 0x05, 0x1b, 0x01, 0x3c, 0x89,
+ 0x15, 0xc2, 0x42, 0x48, 0x08, 0xc2, 0x42, 0x54, 0x16, 0xc2, 0x42, 0x60,
+ 0xc3, 0x05, 0x17, 0x01, 0x3c, 0x51, 0xc4, 0x16, 0x57, 0x0f, 0x88, 0x60,
+ 0xc4, 0x15, 0xa7, 0x01, 0x3b, 0xe1, 0xc2, 0x22, 0x45, 0x01, 0x3b, 0xd8,
+ 0xc3, 0x0d, 0x8f, 0x01, 0x3b, 0xd1, 0xc3, 0x08, 0xde, 0x01, 0x3b, 0xc8,
+ 0xc4, 0x05, 0xde, 0x01, 0x3b, 0xc1, 0xc2, 0x0a, 0x20, 0x01, 0x3b, 0xb8,
+ 0xc4, 0x15, 0xa7, 0x01, 0x3c, 0x31, 0xc2, 0x22, 0x45, 0x01, 0x3c, 0x28,
+ 0xc3, 0x0d, 0x8f, 0x01, 0x3c, 0x21, 0xc3, 0x08, 0xde, 0x01, 0x3c, 0x18,
+ 0xc4, 0x05, 0xde, 0x01, 0x3c, 0x11, 0xc2, 0x0a, 0x20, 0x01, 0x3c, 0x08,
+ 0xcf, 0x6c, 0x4e, 0x01, 0x58, 0xb1, 0xd0, 0x58, 0x4f, 0x01, 0x58, 0xb9,
+ 0xce, 0x72, 0x70, 0x01, 0x58, 0xc1, 0xd1, 0x52, 0x02, 0x01, 0x58, 0xc8,
+ 0xc4, 0x04, 0x63, 0x0f, 0xc8, 0x43, 0x02, 0x42, 0x6c, 0xcc, 0x8a, 0xc0,
+ 0x0f, 0xc8, 0x4a, 0x02, 0x42, 0x72, 0x16, 0xc2, 0x42, 0x78, 0x15, 0xc2,
+ 0x42, 0x82, 0x0a, 0xc2, 0x42, 0x8e, 0x03, 0xc2, 0x42, 0x9a, 0xcf, 0x62,
+ 0x1c, 0x01, 0x3f, 0x89, 0xcb, 0x09, 0xfc, 0x01, 0x0f, 0x4b, 0x02, 0x42,
+ 0xa9, 0x06, 0xc2, 0x42, 0xaf, 0xcd, 0x81, 0xce, 0x01, 0x0e, 0x51, 0xcc,
+ 0x31, 0xdd, 0x01, 0x0d, 0x79, 0xc6, 0xd1, 0x60, 0x0f, 0xb3, 0x79, 0x46,
+ 0x04, 0x91, 0xc2, 0x42, 0xbb, 0xd1, 0x55, 0xa9, 0x0f, 0xc1, 0xb9, 0xd0,
+ 0x5c, 0xef, 0x0f, 0xc1, 0xf8, 0xd2, 0x4b, 0xc0, 0x01, 0x57, 0x88, 0xa3,
+ 0x0f, 0x82, 0x99, 0x9d, 0x0f, 0x82, 0x69, 0x9e, 0x0f, 0x82, 0x71, 0x9f,
+ 0x0f, 0x82, 0x79, 0xa0, 0x0f, 0x82, 0x81, 0xa1, 0x0f, 0x82, 0x89, 0xa2,
+ 0x0f, 0x82, 0x90, 0xa3, 0x0f, 0x81, 0xf1, 0xa1, 0x0f, 0x81, 0xe1, 0x9d,
+ 0x0f, 0x81, 0xc1, 0x9e, 0x0f, 0x81, 0xc9, 0x9f, 0x0f, 0x81, 0xd1, 0xa0,
+ 0x0f, 0x81, 0xd9, 0xa2, 0x0f, 0x81, 0xe8, 0xa0, 0x0f, 0x81, 0xa1, 0x9f,
+ 0x0f, 0x81, 0x99, 0x9e, 0x0f, 0x81, 0x91, 0x9d, 0x0f, 0x81, 0x89, 0xa1,
+ 0x0f, 0x81, 0xa9, 0xa2, 0x0f, 0x81, 0xb1, 0xa3, 0x0f, 0x81, 0xb8, 0x9d,
+ 0x0f, 0x81, 0xf9, 0x9e, 0x0f, 0x82, 0x01, 0x9f, 0x0f, 0x82, 0x09, 0xa0,
+ 0x0f, 0x82, 0x11, 0xa1, 0x0f, 0x82, 0x19, 0xa2, 0x0f, 0x82, 0x21, 0xa3,
+ 0x0f, 0x82, 0x28, 0x9d, 0x0f, 0x82, 0x31, 0x9e, 0x0f, 0x82, 0x39, 0x9f,
+ 0x0f, 0x82, 0x41, 0xa0, 0x0f, 0x82, 0x49, 0xa1, 0x0f, 0x82, 0x51, 0xa2,
+ 0x0f, 0x82, 0x59, 0xa3, 0x0f, 0x82, 0x60, 0x9d, 0x0f, 0x82, 0xa1, 0x9e,
+ 0x0f, 0x82, 0xa9, 0x9f, 0x0f, 0x82, 0xb1, 0xa0, 0x0f, 0x82, 0xb9, 0xa1,
+ 0x0f, 0x82, 0xc1, 0xa2, 0x0f, 0x82, 0xc9, 0xa3, 0x0f, 0x82, 0xd0, 0x9d,
+ 0x0f, 0x82, 0xd9, 0x9e, 0x0f, 0x82, 0xe1, 0x9f, 0x0f, 0x82, 0xe9, 0xa0,
+ 0x0f, 0x82, 0xf1, 0xa1, 0x0f, 0x82, 0xf9, 0xa2, 0x0f, 0x83, 0x01, 0xa3,
+ 0x0f, 0x83, 0x08, 0x9d, 0x0f, 0x83, 0x19, 0x9e, 0x0f, 0x83, 0x21, 0x9f,
+ 0x0f, 0x83, 0x29, 0xa0, 0x0f, 0x83, 0x31, 0xa1, 0x0f, 0x83, 0x39, 0xa2,
+ 0x0f, 0x83, 0x41, 0xa3, 0x0f, 0x83, 0x48, 0x9d, 0x0f, 0x83, 0x51, 0x9e,
+ 0x0f, 0x83, 0x59, 0x9f, 0x0f, 0x83, 0x61, 0xa0, 0x0f, 0x83, 0x69, 0xa1,
+ 0x0f, 0x83, 0x71, 0xa2, 0x0f, 0x83, 0x79, 0xa3, 0x0f, 0x83, 0x80, 0x9d,
+ 0x0f, 0x83, 0x89, 0x9e, 0x0f, 0x83, 0x91, 0x9f, 0x0f, 0x83, 0x99, 0xa0,
+ 0x0f, 0x83, 0xa1, 0xa1, 0x0f, 0x83, 0xa9, 0xa2, 0x0f, 0x83, 0xb1, 0xa3,
+ 0x0f, 0x83, 0xb8, 0x9d, 0x0f, 0x83, 0xc1, 0x9e, 0x0f, 0x83, 0xc9, 0x9f,
+ 0x0f, 0x83, 0xd1, 0xa0, 0x0f, 0x83, 0xd9, 0xa1, 0x0f, 0x83, 0xe1, 0xa2,
+ 0x0f, 0x83, 0xe9, 0xa3, 0x0f, 0x83, 0xf0, 0x9d, 0x0f, 0x83, 0xf9, 0x9e,
+ 0x0f, 0x84, 0x01, 0x9f, 0x0f, 0x84, 0x09, 0xa0, 0x0f, 0x84, 0x11, 0xa1,
+ 0x0f, 0x84, 0x19, 0xa2, 0x0f, 0x84, 0x21, 0xa3, 0x0f, 0x84, 0x28, 0x9e,
+ 0x0f, 0x84, 0x39, 0x9f, 0x0f, 0x84, 0x41, 0xa0, 0x0f, 0x84, 0x49, 0xa1,
+ 0x0f, 0x84, 0x51, 0xa2, 0x0f, 0x84, 0x59, 0xa3, 0x0f, 0x84, 0x61, 0x9d,
+ 0x0f, 0x84, 0x30, 0x9d, 0x0f, 0x84, 0x69, 0x9e, 0x0f, 0x84, 0x71, 0x9f,
+ 0x0f, 0x84, 0x79, 0xa0, 0x0f, 0x84, 0x81, 0xa1, 0x0f, 0x84, 0x89, 0xa2,
+ 0x0f, 0x84, 0x91, 0xa3, 0x0f, 0x84, 0x98, 0x05, 0xc2, 0x42, 0xc7, 0x49,
+ 0x02, 0x5b, 0xc2, 0x42, 0xd9, 0x17, 0xc2, 0x42, 0xe8, 0x44, 0x04, 0x8b,
+ 0xc2, 0x42, 0xf4, 0x15, 0xc2, 0x43, 0x00, 0xcd, 0x2d, 0xa6, 0x01, 0x02,
+ 0x39, 0xd0, 0x0f, 0x62, 0x01, 0x01, 0xe1, 0x12, 0xc2, 0x43, 0x14, 0x06,
+ 0xc2, 0x43, 0x1e, 0x0a, 0xc2, 0x43, 0x2a, 0x0e, 0xc2, 0x43, 0x36, 0xdb,
+ 0x16, 0x3c, 0x01, 0x4c, 0xb1, 0x47, 0xcc, 0xa7, 0xc2, 0x43, 0x40, 0xcc,
+ 0x87, 0x84, 0x00, 0x16, 0xe9, 0xcd, 0x7c, 0x93, 0x07, 0xf2, 0x61, 0xce,
+ 0x70, 0x6a, 0x01, 0x70, 0xb8, 0xc9, 0x19, 0xc5, 0x01, 0x35, 0x19, 0xcb,
+ 0x23, 0xf1, 0x01, 0x35, 0x11, 0xc6, 0x01, 0xb1, 0x01, 0x5f, 0xe0, 0x46,
+ 0x00, 0x3e, 0xc2, 0x43, 0x4f, 0xc9, 0xab, 0xf5, 0x01, 0x0a, 0x28, 0x92,
+ 0x01, 0x08, 0xcb, 0x02, 0x43, 0x5f, 0xc5, 0x4f, 0xcc, 0x01, 0x09, 0xf1,
+ 0x9c, 0x01, 0x09, 0x21, 0x94, 0x01, 0x08, 0xe9, 0x93, 0x01, 0x08, 0xd1,
+ 0x90, 0x01, 0x08, 0xa9, 0x8a, 0x01, 0x08, 0x69, 0x85, 0x01, 0x08, 0x10,
+ 0xc5, 0x4f, 0xcc, 0x01, 0x09, 0xe9, 0xc2, 0x01, 0xf9, 0x01, 0x09, 0xe0,
+ 0xc9, 0x09, 0xde, 0x01, 0x54, 0xc9, 0xcc, 0x06, 0xfb, 0x01, 0x54, 0xd0,
+ 0x4c, 0x23, 0xb4, 0xc2, 0x43, 0x63, 0xd5, 0x32, 0x3e, 0x01, 0x57, 0xc9,
+ 0xd8, 0x25, 0x64, 0x01, 0x57, 0xd0, 0xc4, 0x21, 0x28, 0x08, 0xc1, 0xc9,
+ 0xc5, 0x45, 0xcf, 0x08, 0xc1, 0xc0, 0x97, 0x08, 0xc1, 0xb1, 0x8b, 0x08,
+ 0xc1, 0xa1, 0x83, 0x08, 0xc1, 0x60, 0x94, 0x08, 0xc1, 0x90, 0x97, 0x08,
+ 0xc1, 0x80, 0x8b, 0x08, 0xc1, 0x70, 0xc2, 0x00, 0x9a, 0x08, 0xc1, 0x59,
+ 0x83, 0x08, 0xc1, 0x20, 0x83, 0x08, 0xc1, 0x49, 0xc2, 0x0e, 0xe5, 0x08,
+ 0xc1, 0x41, 0xc2, 0x01, 0x0e, 0x08, 0xc1, 0x38, 0xc2, 0x01, 0x0e, 0x08,
+ 0xc1, 0x09, 0x83, 0x08, 0xc1, 0x00, 0xc2, 0x01, 0x0e, 0x08, 0xc0, 0xf9,
+ 0x83, 0x08, 0xc0, 0xf0, 0x83, 0x08, 0xc0, 0xe9, 0xc2, 0x01, 0x01, 0x08,
+ 0xc0, 0xc1, 0xc2, 0x1a, 0x36, 0x08, 0xc0, 0x99, 0xc2, 0x07, 0x69, 0x08,
+ 0xc0, 0x70, 0xc2, 0x01, 0x0e, 0x08, 0xc0, 0xe1, 0x83, 0x08, 0xc0, 0xd9,
+ 0x06, 0x42, 0x43, 0x6f, 0xc2, 0x01, 0x0e, 0x08, 0xc0, 0xd1, 0x83, 0x08,
+ 0xc0, 0xc9, 0x16, 0x42, 0x43, 0x79, 0xc2, 0x01, 0x0e, 0x08, 0xc0, 0x91,
+ 0x83, 0x08, 0xc0, 0x88, 0xc2, 0x01, 0x0e, 0x08, 0xc0, 0x81, 0x83, 0x08,
+ 0xc0, 0x78, 0xc2, 0x01, 0x0e, 0x08, 0xc0, 0x69, 0x83, 0x08, 0xc0, 0x60,
+ 0xc2, 0x01, 0x0e, 0x08, 0xc0, 0x59, 0x83, 0x08, 0xc0, 0x50, 0x97, 0x08,
+ 0xc0, 0x49, 0x8b, 0x08, 0xc0, 0x39, 0x83, 0x08, 0xc0, 0x08, 0x97, 0x08,
+ 0xc0, 0x28, 0x8b, 0x08, 0xc0, 0x18, 0x03, 0xc2, 0x43, 0x83, 0xc8, 0x00,
+ 0x29, 0x0d, 0xe4, 0xc3, 0x02, 0x43, 0x8f, 0xc4, 0x52, 0xda, 0x0d, 0xe4,
+ 0xb9, 0x0e, 0xc2, 0x43, 0x95, 0xc6, 0x03, 0xfa, 0x0d, 0xe4, 0xa9, 0xc3,
+ 0x05, 0xe3, 0x0d, 0xe4, 0xa1, 0xc5, 0x1e, 0x64, 0x0d, 0xe4, 0x91, 0xcb,
+ 0x91, 0x39, 0x0d, 0xe4, 0x88, 0xc7, 0x29, 0x5e, 0x0d, 0xe3, 0xa8, 0xc3,
+ 0x03, 0x5e, 0x0d, 0xe4, 0x31, 0xc9, 0xb2, 0xc7, 0x0d, 0xe4, 0x18, 0xc5,
+ 0xe2, 0x56, 0x0d, 0xe3, 0xc3, 0x02, 0x43, 0xa1, 0xc2, 0x00, 0x95, 0x0d,
+ 0xe3, 0xc8, 0x99, 0x0d, 0xe3, 0x00, 0xc3, 0x02, 0xc9, 0x0d, 0xe1, 0xb9,
+ 0x95, 0x0d, 0xe1, 0xb0, 0x92, 0x0d, 0xe1, 0xa3, 0x02, 0x43, 0xa7, 0x96,
+ 0x0d, 0xe1, 0x93, 0x02, 0x43, 0xad, 0x8c, 0x0d, 0xe1, 0x03, 0x02, 0x43,
+ 0xb3, 0x95, 0x0d, 0xe1, 0x51, 0xc8, 0x35, 0x24, 0x0d, 0xe1, 0x2b, 0x02,
+ 0x43, 0xb9, 0x8d, 0x0d, 0xe1, 0xfb, 0x02, 0x43, 0xbf, 0x8f, 0x0d, 0xe1,
+ 0xe1, 0x90, 0x0d, 0xe1, 0xd8, 0x8c, 0x0d, 0xe0, 0xa9, 0xc2, 0x45, 0xca,
+ 0x0d, 0xe0, 0x91, 0x11, 0xc2, 0x43, 0xc5, 0xc2, 0x00, 0x3a, 0x0d, 0xe3,
+ 0x41, 0x07, 0xc2, 0x43, 0xcd, 0x97, 0x0d, 0xe2, 0xc0, 0x90, 0x0d, 0xe1,
+ 0x83, 0x02, 0x43, 0xd9, 0x95, 0x0d, 0xe1, 0x4b, 0x02, 0x43, 0xdf, 0x8f,
+ 0x0d, 0xe0, 0xfb, 0x02, 0x43, 0xe5, 0xc8, 0x35, 0x24, 0x0d, 0xe1, 0x1a,
+ 0x02, 0x43, 0xeb, 0x8f, 0x0d, 0xe0, 0xf3, 0x02, 0x43, 0xf1, 0x95, 0x0d,
+ 0xe1, 0x41, 0xc8, 0x35, 0x24, 0x0d, 0xe1, 0x10, 0x83, 0x0d, 0xe3, 0x21,
+ 0x8b, 0x0d, 0xe3, 0x19, 0x91, 0x0d, 0xe3, 0x11, 0x97, 0x0d, 0xe3, 0x08,
+ 0x90, 0x0d, 0xe0, 0xeb, 0x02, 0x43, 0xf7, 0x95, 0x0d, 0xe1, 0x39, 0xc8,
+ 0x35, 0x24, 0x0d, 0xe1, 0x08, 0x97, 0x0d, 0xe2, 0xb1, 0x8b, 0x0d, 0xe2,
+ 0x68, 0x97, 0x0d, 0xe2, 0xa9, 0x8b, 0x0d, 0xe2, 0x78, 0x8f, 0x0d, 0xe0,
+ 0x79, 0xc3, 0x02, 0xc9, 0x0d, 0xe1, 0xe8, 0x8f, 0x0d, 0xe3, 0x31, 0x90,
+ 0x0d, 0xe3, 0x28, 0xc7, 0x1b, 0xa4, 0x00, 0x04, 0x69, 0xde, 0x0f, 0x3f,
+ 0x0f, 0xbe, 0x40, 0x00, 0x42, 0x43, 0xfd, 0xcf, 0x09, 0x58, 0x01, 0x5a,
+ 0x09, 0xd0, 0x01, 0x37, 0x01, 0x5a, 0x38, 0xda, 0x19, 0x66, 0x01, 0x30,
+ 0xc9, 0xdf, 0x0c, 0x07, 0x0f, 0xac, 0x89, 0xca, 0x3a, 0xe7, 0x01, 0x5f,
+ 0xf0, 0xc4, 0x1f, 0x02, 0x01, 0x11, 0xeb, 0x02, 0x44, 0x0f, 0xcb, 0x8f,
+ 0x81, 0x01, 0x01, 0xb9, 0x46, 0xd3, 0xe2, 0x42, 0x44, 0x15, 0xd3, 0x42,
+ 0xa3, 0x01, 0x0a, 0x19, 0xc8, 0x50, 0x04, 0x01, 0x02, 0x78, 0xcb, 0x95,
+ 0xc7, 0x01, 0x02, 0x59, 0xc4, 0x16, 0x95, 0x01, 0x01, 0xa8, 0xc5, 0x16,
+ 0x94, 0x01, 0x01, 0xb3, 0x02, 0x44, 0x21, 0xcf, 0x65, 0x28, 0x01, 0x57,
+ 0x68, 0xce, 0x52, 0xaf, 0x01, 0x4d, 0x28, 0xcc, 0x83, 0xe8, 0x01, 0x33,
+ 0xc9, 0xcc, 0x82, 0xb0, 0x01, 0x33, 0xc1, 0xcc, 0x82, 0xbc, 0x01, 0x33,
+ 0xb9, 0xcc, 0x82, 0xe0, 0x01, 0x33, 0xb1, 0xcc, 0x82, 0xec, 0x01, 0x33,
+ 0xa9, 0xcc, 0x82, 0xa4, 0x01, 0x33, 0xa1, 0xcc, 0x83, 0x28, 0x01, 0x33,
+ 0x98, 0x83, 0x05, 0x4a, 0x71, 0x97, 0x05, 0x4a, 0x68, 0x97, 0x05, 0x4a,
+ 0x61, 0x8b, 0x05, 0x4a, 0x50, 0xc2, 0x26, 0x94, 0x05, 0x4a, 0x29, 0x83,
+ 0x05, 0x49, 0xd8, 0xc2, 0x07, 0x69, 0x05, 0x4a, 0x19, 0x83, 0x05, 0x49,
+ 0x90, 0x07, 0xc2, 0x44, 0x27, 0xcd, 0x26, 0x2f, 0x00, 0x02, 0xeb, 0x02,
+ 0x44, 0x33, 0x0b, 0xc2, 0x44, 0x37, 0x42, 0x03, 0x07, 0xc2, 0x44, 0x43,
+ 0xd3, 0x1f, 0x0c, 0x01, 0x70, 0x18, 0x14, 0xc2, 0x44, 0x52, 0x10, 0x42,
+ 0x44, 0x5e, 0xc9, 0xa2, 0xaf, 0x01, 0x3e, 0xb1, 0x43, 0x03, 0x5f, 0xc2,
+ 0x44, 0x68, 0xcf, 0x61, 0xef, 0x0f, 0xdd, 0xe0, 0x43, 0x01, 0x1f, 0xc2,
+ 0x44, 0x74, 0xd5, 0x32, 0xa7, 0x0f, 0xab, 0xe8, 0xc7, 0xc7, 0xf3, 0x01,
+ 0x1d, 0xc9, 0xcd, 0x7b, 0x27, 0x01, 0x71, 0x08, 0xcc, 0x00, 0xd3, 0x00,
+ 0x03, 0xeb, 0x02, 0x44, 0x8c, 0xc6, 0xbf, 0x8c, 0x01, 0x18, 0x49, 0xcd,
+ 0x6a, 0x7f, 0x01, 0x80, 0x68, 0x00, 0x42, 0x44, 0x90, 0xc4, 0x20, 0xa2,
+ 0x01, 0x18, 0x59, 0x0b, 0x42, 0x44, 0xa2, 0x14, 0xc2, 0x44, 0xae, 0xc3,
+ 0x00, 0xda, 0x01, 0x15, 0x11, 0x0a, 0xc2, 0x44, 0xba, 0xd5, 0x0a, 0xe9,
+ 0x01, 0x80, 0xa8, 0x45, 0x02, 0x93, 0xc2, 0x44, 0xcc, 0xd9, 0x1f, 0x06,
+ 0x01, 0x70, 0x28, 0xcb, 0x85, 0x81, 0x01, 0x4e, 0xc9, 0x45, 0x01, 0x32,
+ 0x42, 0x44, 0xe2, 0xd6, 0x0a, 0xe8, 0x01, 0x4c, 0xc1, 0xd2, 0x23, 0x42,
+ 0x01, 0x80, 0x88, 0xca, 0x09, 0xfd, 0x01, 0x0f, 0x43, 0x02, 0x44, 0xfe,
+ 0xc9, 0xb2, 0xf4, 0x01, 0x0c, 0xe8, 0x42, 0x00, 0x47, 0xc2, 0x45, 0x02,
+ 0x19, 0xc2, 0x45, 0x0e, 0xd5, 0x33, 0x8e, 0x0f, 0xc5, 0x18, 0xcf, 0x60,
+ 0xb0, 0x0f, 0xc2, 0x91, 0x42, 0x00, 0x93, 0x42, 0x45, 0x1a, 0x45, 0x11,
+ 0x8e, 0xc2, 0x45, 0x26, 0x03, 0x42, 0x45, 0x32, 0x99, 0x01, 0x0c, 0x93,
+ 0x02, 0x45, 0x3e, 0xc6, 0xd7, 0xf0, 0x01, 0x48, 0xd0, 0xcb, 0x83, 0x05,
+ 0x01, 0x0f, 0x11, 0x46, 0x02, 0x92, 0x42, 0x45, 0x42, 0xcc, 0x81, 0xcf,
+ 0x01, 0x0e, 0x49, 0xcb, 0x9a, 0x60, 0x0f, 0xd7, 0xc0, 0xc5, 0xd1, 0x61,
+ 0x0f, 0xb3, 0x71, 0xd7, 0x2b, 0x31, 0x0f, 0xc5, 0x28, 0x45, 0x04, 0x92,
+ 0xc2, 0x45, 0x4e, 0xd8, 0x21, 0xa4, 0x0f, 0xc5, 0x09, 0xdf, 0x0d, 0x5c,
+ 0x0f, 0xc5, 0x48, 0xd0, 0x55, 0xaa, 0x0f, 0xc1, 0xb1, 0xe0, 0x09, 0xe7,
+ 0x0f, 0xc5, 0x58, 0xc7, 0x0d, 0x7f, 0x01, 0x0b, 0x6b, 0x02, 0x45, 0x5a,
+ 0xc8, 0x4f, 0xa2, 0x01, 0x0b, 0x7a, 0x02, 0x45, 0x60, 0xc3, 0x41, 0xca,
+ 0x01, 0x0b, 0x63, 0x02, 0x45, 0x66, 0xc2, 0x00, 0x29, 0x01, 0x0b, 0x22,
+ 0x02, 0x45, 0x6a, 0xca, 0xa3, 0xe4, 0x01, 0x0c, 0x28, 0xc9, 0x4f, 0xa1,
+ 0x01, 0x0c, 0x10, 0xc4, 0x25, 0x4d, 0x01, 0x0b, 0x59, 0x91, 0x01, 0x0b,
+ 0x08, 0xc8, 0xba, 0x93, 0x08, 0x0c, 0x81, 0xc8, 0x43, 0x59, 0x08, 0x0c,
+ 0x98, 0x44, 0x1a, 0xa6, 0xc2, 0x45, 0x6e, 0xcf, 0x0c, 0x17, 0x0f, 0xac,
+ 0x80, 0xc8, 0x0d, 0x7e, 0x08, 0x73, 0xc1, 0xc2, 0x0d, 0x8b, 0x08, 0x73,
+ 0x78, 0xc8, 0x0d, 0x7e, 0x08, 0x73, 0xb9, 0xc2, 0x0d, 0x8b, 0x08, 0x73,
+ 0x70, 0xca, 0x36, 0x8d, 0x08, 0x73, 0xb1, 0xc3, 0x41, 0xca, 0x08, 0x73,
+ 0x68, 0xca, 0x9f, 0x48, 0x08, 0x73, 0xa9, 0xc3, 0x0d, 0x8a, 0x08, 0x73,
+ 0x60, 0xcb, 0x12, 0xe3, 0x08, 0x73, 0xa1, 0xc4, 0x0d, 0x89, 0x08, 0x73,
+ 0x58, 0xc9, 0x15, 0x9c, 0x08, 0x73, 0x99, 0xc4, 0x15, 0xa9, 0x08, 0x73,
+ 0x50, 0x4d, 0x77, 0xc0, 0xc2, 0x45, 0x74, 0xcd, 0x82, 0x43, 0x00, 0xb5,
+ 0x00, 0x91, 0x00, 0xb7, 0x99, 0xce, 0x73, 0xa4, 0x00, 0xb6, 0xf9, 0xc5,
+ 0xda, 0xe5, 0x00, 0xb6, 0xa9, 0x90, 0x00, 0xb5, 0x81, 0x87, 0x00, 0xb5,
+ 0x79, 0xc3, 0x03, 0xf3, 0x00, 0xb5, 0x48, 0x8a, 0x00, 0xb7, 0x93, 0x02,
+ 0x45, 0x8a, 0xc3, 0x06, 0x26, 0x00, 0xb7, 0x29, 0xd6, 0x2d, 0x45, 0x00,
+ 0xb6, 0x59, 0xc7, 0xc8, 0x24, 0x00, 0xb6, 0x50, 0x43, 0x3b, 0x03, 0x42,
+ 0x45, 0x90, 0xcb, 0x97, 0xed, 0x00, 0xb7, 0x41, 0xc2, 0x00, 0xff, 0x00,
+ 0xb7, 0x09, 0xc2, 0x00, 0x56, 0x00, 0xb6, 0xeb, 0x02, 0x45, 0x9a, 0xc7,
+ 0xc6, 0x64, 0x00, 0xb6, 0x39, 0xcc, 0x86, 0x7c, 0x00, 0xb6, 0x08, 0x49,
+ 0x2d, 0x4b, 0xc2, 0x45, 0xa0, 0xd1, 0x53, 0x45, 0x00, 0xb6, 0xd0, 0x07,
+ 0xc2, 0x45, 0xbe, 0xc3, 0x66, 0xa5, 0x00, 0xb7, 0x19, 0xc6, 0xd4, 0xd8,
+ 0x00, 0xb7, 0x10, 0xc2, 0x00, 0x4d, 0x00, 0xb7, 0x01, 0xc9, 0xb0, 0xf3,
+ 0x00, 0xb6, 0xb1, 0xc2, 0x00, 0x56, 0x00, 0xb5, 0xb1, 0xc2, 0x01, 0x47,
+ 0x00, 0xb5, 0x38, 0xcb, 0x99, 0x2c, 0x00, 0xb6, 0xf1, 0x44, 0x19, 0x81,
+ 0x42, 0x45, 0xc8, 0xce, 0x75, 0x1e, 0x00, 0xb6, 0x79, 0xd3, 0x44, 0x1f,
+ 0x00, 0xb5, 0x30, 0xca, 0xa7, 0x18, 0x00, 0xb6, 0x49, 0xc3, 0x21, 0xcc,
+ 0x00, 0xb5, 0x59, 0xc3, 0x17, 0x1a, 0x00, 0xb5, 0x51, 0xc6, 0xd1, 0x24,
+ 0x00, 0xb5, 0x40, 0x07, 0xc2, 0x45, 0xda, 0xc2, 0x00, 0x4d, 0x00, 0xb5,
+ 0xc0, 0xc5, 0xd9, 0xb4, 0x00, 0xb5, 0xd9, 0xc6, 0xd5, 0x14, 0x00, 0xb5,
+ 0xd0, 0xcb, 0x99, 0xd1, 0x00, 0xb5, 0xc8, 0x94, 0x00, 0xb5, 0x18, 0x87,
+ 0x05, 0x28, 0x03, 0x02, 0x45, 0xe4, 0x90, 0x05, 0x2f, 0x10, 0x87, 0x05,
+ 0x2f, 0x23, 0x02, 0x45, 0xe8, 0x8b, 0x05, 0x29, 0x33, 0x02, 0x45, 0xf0,
+ 0x83, 0x05, 0x2a, 0x63, 0x02, 0x45, 0xf4, 0x91, 0x05, 0x2d, 0xeb, 0x02,
+ 0x45, 0xf8, 0x97, 0x05, 0x2c, 0xba, 0x02, 0x46, 0x00, 0x87, 0x05, 0x2f,
+ 0x33, 0x02, 0x46, 0x04, 0x8b, 0x05, 0x29, 0x43, 0x02, 0x46, 0x0f, 0x83,
+ 0x05, 0x2a, 0x73, 0x02, 0x46, 0x13, 0x91, 0x05, 0x2d, 0xfb, 0x02, 0x46,
+ 0x17, 0x97, 0x05, 0x2c, 0xca, 0x02, 0x46, 0x22, 0x87, 0x05, 0x2f, 0x43,
+ 0x02, 0x46, 0x26, 0x8b, 0x05, 0x29, 0x51, 0x83, 0x05, 0x2a, 0x81, 0x91,
+ 0x05, 0x2e, 0x0b, 0x02, 0x46, 0x2a, 0x97, 0x05, 0x2c, 0xd8, 0x0a, 0xc2,
+ 0x46, 0x2e, 0x87, 0x05, 0x2f, 0x53, 0x02, 0x46, 0x48, 0x8b, 0x05, 0x29,
+ 0x61, 0x83, 0x05, 0x2a, 0x91, 0x91, 0x05, 0x2e, 0x1b, 0x02, 0x46, 0x4c,
+ 0x97, 0x05, 0x2c, 0xe8, 0x04, 0xc2, 0x46, 0x50, 0x42, 0x83, 0xdc, 0xc2,
+ 0x46, 0x6a, 0x87, 0x05, 0x30, 0x43, 0x02, 0x46, 0x84, 0x8b, 0x05, 0x2a,
+ 0x31, 0x83, 0x05, 0x2b, 0x71, 0x91, 0x05, 0x2e, 0xf3, 0x02, 0x46, 0x88,
+ 0x97, 0x05, 0x2d, 0xb8, 0x12, 0xc2, 0x46, 0x8c, 0x87, 0x05, 0x30, 0x1b,
+ 0x02, 0x46, 0xa9, 0x8b, 0x05, 0x2a, 0x19, 0x83, 0x05, 0x2b, 0x53, 0x02,
+ 0x46, 0xad, 0x91, 0x05, 0x2e, 0xdb, 0x02, 0x46, 0xb1, 0x97, 0x05, 0x2d,
+ 0xa0, 0x04, 0xc2, 0x46, 0xb5, 0x87, 0x05, 0x30, 0x33, 0x02, 0x46, 0xcf,
+ 0x8b, 0x05, 0x2a, 0x29, 0x83, 0x05, 0x2b, 0x69, 0x91, 0x05, 0x2e, 0xeb,
+ 0x02, 0x46, 0xd7, 0x97, 0x05, 0x2d, 0xb0, 0x87, 0x05, 0x2f, 0x8b, 0x02,
+ 0x46, 0xdb, 0x8b, 0x05, 0x29, 0x89, 0x83, 0x05, 0x2a, 0xc1, 0x91, 0x05,
+ 0x2e, 0x4b, 0x02, 0x46, 0xdf, 0x97, 0x05, 0x2d, 0x10, 0x87, 0x05, 0x2f,
+ 0x93, 0x02, 0x46, 0xe3, 0x8b, 0x05, 0x29, 0x91, 0x83, 0x05, 0x2a, 0xc9,
+ 0x91, 0x05, 0x2e, 0x53, 0x02, 0x46, 0xe7, 0x97, 0x05, 0x2d, 0x18, 0x87,
+ 0x05, 0x2f, 0x9b, 0x02, 0x46, 0xeb, 0x0a, 0xc2, 0x46, 0xef, 0x8b, 0x05,
+ 0x29, 0x99, 0x83, 0x05, 0x2a, 0xd1, 0x91, 0x05, 0x2e, 0x5b, 0x02, 0x47,
+ 0x09, 0x97, 0x05, 0x2d, 0x20, 0x0a, 0xc2, 0x47, 0x0d, 0x87, 0x05, 0x2f,
+ 0xcb, 0x02, 0x47, 0x2b, 0x8b, 0x05, 0x29, 0xc9, 0x83, 0x05, 0x2b, 0x01,
+ 0x91, 0x05, 0x2e, 0x8b, 0x02, 0x47, 0x2f, 0x97, 0x05, 0x2d, 0x50, 0x87,
+ 0x05, 0x2f, 0xbb, 0x02, 0x47, 0x33, 0x8b, 0x05, 0x29, 0xb9, 0x83, 0x05,
+ 0x2a, 0xf1, 0x91, 0x05, 0x2e, 0x7b, 0x02, 0x47, 0x3d, 0x97, 0x05, 0x2d,
+ 0x40, 0x87, 0x05, 0x2f, 0xc3, 0x02, 0x47, 0x41, 0x8b, 0x05, 0x29, 0xc1,
+ 0x83, 0x05, 0x2a, 0xf9, 0x91, 0x05, 0x2e, 0x83, 0x02, 0x47, 0x45, 0x97,
+ 0x05, 0x2d, 0x48, 0x06, 0xc2, 0x47, 0x49, 0x0c, 0xc2, 0x47, 0x63, 0x89,
+ 0x05, 0x30, 0x5b, 0x02, 0x47, 0x7d, 0x87, 0x05, 0x30, 0x4b, 0x02, 0x47,
+ 0x93, 0x1b, 0xc2, 0x47, 0x97, 0x8b, 0x05, 0x2a, 0x39, 0x83, 0x05, 0x2b,
+ 0x79, 0x91, 0x05, 0x2e, 0xfb, 0x02, 0x47, 0xb1, 0x97, 0x05, 0x2d, 0xc0,
+ 0x87, 0x05, 0x2f, 0xdb, 0x02, 0x47, 0xb5, 0x0a, 0xc2, 0x47, 0xb9, 0x8b,
+ 0x05, 0x29, 0xd9, 0x83, 0x05, 0x2b, 0x11, 0x91, 0x05, 0x2e, 0x9b, 0x02,
+ 0x47, 0xd3, 0x97, 0x05, 0x2d, 0x60, 0x87, 0x05, 0x2f, 0xeb, 0x02, 0x47,
+ 0xd7, 0x0a, 0xc2, 0x47, 0xdb, 0x8b, 0x05, 0x29, 0xe9, 0x83, 0x05, 0x2b,
+ 0x21, 0x91, 0x05, 0x2e, 0xab, 0x02, 0x47, 0xf5, 0x97, 0x05, 0x2d, 0x70,
+ 0x87, 0x05, 0x2f, 0xfb, 0x02, 0x47, 0xf9, 0x8b, 0x05, 0x29, 0xf9, 0x83,
+ 0x05, 0x2b, 0x31, 0x91, 0x05, 0x2e, 0xbb, 0x02, 0x47, 0xfd, 0x97, 0x05,
+ 0x2d, 0x80, 0x87, 0x05, 0x30, 0x03, 0x02, 0x48, 0x01, 0x8b, 0x05, 0x2a,
+ 0x01, 0x83, 0x05, 0x2b, 0x39, 0x91, 0x05, 0x2e, 0xc3, 0x02, 0x48, 0x05,
+ 0x97, 0x05, 0x2d, 0x88, 0x87, 0x05, 0x30, 0x13, 0x02, 0x48, 0x09, 0x8b,
+ 0x05, 0x2a, 0x11, 0x83, 0x05, 0x2b, 0x49, 0x91, 0x05, 0x2e, 0xd3, 0x02,
+ 0x48, 0x0d, 0x97, 0x05, 0x2d, 0x98, 0x90, 0x05, 0x29, 0x28, 0x90, 0x05,
+ 0x2a, 0x50, 0x91, 0x05, 0x2b, 0x8b, 0x02, 0x48, 0x11, 0x90, 0x05, 0x2d,
+ 0xd8, 0x90, 0x05, 0x2c, 0xb0, 0xc4, 0xd0, 0x50, 0x05, 0x30, 0x99, 0xc2,
+ 0x0e, 0x30, 0x05, 0x30, 0xc0, 0xc4, 0xd0, 0x50, 0x05, 0x30, 0xa1, 0xc3,
+ 0x3b, 0x04, 0x05, 0x30, 0xe0, 0xc3, 0x01, 0x5e, 0x05, 0x30, 0xa9, 0xc2,
+ 0x0e, 0x30, 0x05, 0x30, 0xc9, 0xc3, 0x0a, 0x68, 0x05, 0x30, 0xe8, 0xc3,
+ 0x00, 0x55, 0x05, 0x30, 0xd1, 0xc4, 0xe9, 0x33, 0x05, 0x30, 0xd9, 0xc3,
+ 0xec, 0xba, 0x05, 0x30, 0xf0, 0xc9, 0x50, 0xc7, 0x01, 0x1e, 0x81, 0x45,
+ 0x01, 0xac, 0x42, 0x48, 0x15, 0xc7, 0x37, 0xb6, 0x00, 0x00, 0x5b, 0x02,
+ 0x48, 0x21, 0xc4, 0x39, 0xd7, 0x01, 0x5b, 0xf8, 0x00, 0x42, 0x48, 0x27,
+ 0xcb, 0x98, 0xa8, 0x01, 0x81, 0xa0, 0xcf, 0x18, 0x2e, 0x0f, 0xbd, 0xf9,
+ 0xd2, 0x25, 0x52, 0x0f, 0xbe, 0x80, 0xc6, 0x03, 0xfa, 0x0f, 0xbc, 0x41,
+ 0xc6, 0x01, 0xe9, 0x0f, 0xbc, 0x90, 0xc6, 0x2a, 0x4c, 0x0f, 0xb3, 0xe1,
+ 0xc6, 0x12, 0x73, 0x0f, 0xbd, 0x69, 0xd2, 0x48, 0xf0, 0x0f, 0xbd, 0xc8,
+ 0xce, 0x70, 0x16, 0x00, 0xe7, 0x89, 0xcb, 0x99, 0x8f, 0x00, 0xe7, 0x5b,
+ 0x02, 0x48, 0x33, 0xcc, 0x8b, 0xd4, 0x00, 0xe7, 0x51, 0xcc, 0x13, 0xee,
+ 0x00, 0xe7, 0x48, 0xc8, 0x73, 0x9c, 0x00, 0xe7, 0x31, 0xc6, 0x73, 0x9e,
+ 0x00, 0xe7, 0x20, 0xca, 0xa3, 0xda, 0x00, 0xe7, 0x40, 0xca, 0xa3, 0xda,
+ 0x00, 0xe7, 0x38, 0xca, 0xa2, 0xc2, 0x00, 0xe7, 0xc9, 0xc7, 0x06, 0xa0,
+ 0x00, 0xe6, 0xd0, 0xe0, 0x08, 0xa7, 0x00, 0xe7, 0x00, 0xca, 0xa3, 0x4e,
+ 0x00, 0xe6, 0xc8, 0x43, 0x00, 0xcf, 0xc2, 0x48, 0x39, 0xcc, 0x8e, 0xe0,
+ 0x70, 0x01, 0xe0, 0x4f, 0x01, 0xf7, 0xc2, 0x48, 0x4b, 0x4d, 0x27, 0x71,
+ 0x42, 0x48, 0xb3, 0x42, 0x00, 0xec, 0xc2, 0x49, 0x1b, 0xc3, 0x0c, 0x34,
+ 0x70, 0x01, 0xd0, 0xce, 0x26, 0x2e, 0x70, 0x02, 0xe9, 0xcb, 0x1c, 0xe0,
+ 0x70, 0x01, 0x49, 0xcd, 0x00, 0xd2, 0x70, 0x03, 0xe8, 0xc4, 0x24, 0x35,
+ 0x70, 0x01, 0xc9, 0xc5, 0x05, 0x1b, 0x70, 0x01, 0xc1, 0x15, 0xc2, 0x49,
+ 0x25, 0x08, 0xc2, 0x49, 0x31, 0x16, 0xc2, 0x49, 0x3d, 0xc3, 0x05, 0x17,
+ 0x70, 0x01, 0x89, 0xc4, 0x16, 0x57, 0x70, 0x01, 0x80, 0x83, 0x00, 0xbb,
+ 0x41, 0xc2, 0x07, 0x69, 0x00, 0xbb, 0x28, 0xc9, 0xb6, 0x1e, 0x00, 0xb8,
+ 0xf8, 0x83, 0x00, 0xb8, 0x41, 0xc2, 0x07, 0x69, 0x00, 0xb8, 0x28, 0x43,
+ 0xed, 0x83, 0xc2, 0x49, 0x49, 0x43, 0xed, 0x71, 0xc2, 0x49, 0x65, 0x43,
+ 0xed, 0x5f, 0xc2, 0x49, 0x8d, 0x43, 0xed, 0x62, 0xc2, 0x49, 0xb5, 0x43,
+ 0xed, 0x65, 0xc2, 0x49, 0xdd, 0x43, 0xed, 0x68, 0xc2, 0x4a, 0x05, 0x43,
+ 0xed, 0x6b, 0xc2, 0x4a, 0x2d, 0x43, 0xed, 0x6e, 0x42, 0x4a, 0x55, 0xc4,
+ 0x24, 0x35, 0x0b, 0x56, 0x49, 0xc5, 0x05, 0x1b, 0x0b, 0x56, 0x41, 0x15,
+ 0xc2, 0x4a, 0x7d, 0x08, 0xc2, 0x4a, 0x89, 0x16, 0xc2, 0x4a, 0x95, 0xc3,
+ 0x05, 0x17, 0x0b, 0x56, 0x09, 0xc4, 0x16, 0x57, 0x0b, 0x56, 0x00, 0xc2,
+ 0x05, 0x5c, 0x0b, 0x55, 0xf1, 0x05, 0xc2, 0x4a, 0xa1, 0x06, 0xc2, 0x4a,
+ 0xab, 0x08, 0xc2, 0x4a, 0xb5, 0xc2, 0x23, 0xe3, 0x0b, 0x55, 0xd1, 0x16,
+ 0xc2, 0x4a, 0xbf, 0x0a, 0xc2, 0x4a, 0xcf, 0x09, 0xc2, 0x4a, 0xd7, 0x15,
+ 0xc2, 0x4a, 0xe1, 0x10, 0xc2, 0x4a, 0xe9, 0xc2, 0x00, 0x9a, 0x0b, 0x55,
+ 0x91, 0x0e, 0xc2, 0x4a, 0xff, 0x0f, 0xc2, 0x4b, 0x09, 0xc2, 0x02, 0x1d,
+ 0x0b, 0x55, 0x51, 0x12, 0xc2, 0x4b, 0x1d, 0xc2, 0x00, 0x4c, 0x0b, 0x55,
+ 0x31, 0xc2, 0x1a, 0x36, 0x0b, 0x55, 0x29, 0x0d, 0xc2, 0x4b, 0x27, 0x17,
+ 0xc2, 0x4b, 0x31, 0x03, 0xc2, 0x4b, 0x49, 0x0b, 0xc2, 0x4b, 0x5d, 0x07,
+ 0xc2, 0x4b, 0x6d, 0x18, 0xc2, 0x4b, 0x7d, 0x11, 0x42, 0x4b, 0x8d, 0x18,
+ 0xc2, 0x4b, 0x9d, 0x42, 0x11, 0x89, 0xc2, 0x4b, 0xab, 0x0d, 0xc2, 0x4b,
+ 0xbd, 0x12, 0xc2, 0x4b, 0xc7, 0xc7, 0xb0, 0x38, 0x08, 0xfe, 0xc1, 0x03,
+ 0xc2, 0x4b, 0xd1, 0xc6, 0xd6, 0x10, 0x08, 0xfe, 0xb1, 0xc3, 0x42, 0x8e,
+ 0x08, 0xfe, 0xa8, 0xcb, 0x96, 0x82, 0x08, 0xff, 0x49, 0xcb, 0x9c, 0x02,
+ 0x08, 0xff, 0x40, 0x83, 0x00, 0x5c, 0x2b, 0x02, 0x4b, 0xdd, 0x8b, 0x00,
+ 0x5c, 0x3b, 0x02, 0x4b, 0xe9, 0x97, 0x00, 0x5c, 0x4b, 0x02, 0x4b, 0xed,
+ 0x87, 0x00, 0x5c, 0x73, 0x02, 0x4b, 0xf1, 0x91, 0x00, 0x5c, 0x93, 0x02,
+ 0x4b, 0xf5, 0xc2, 0x06, 0x6b, 0x00, 0x5c, 0xa9, 0x10, 0xc2, 0x4b, 0xf9,
+ 0xc2, 0x00, 0x2e, 0x00, 0x5c, 0xd1, 0xc2, 0x26, 0x94, 0x00, 0x5c, 0xe1,
+ 0x16, 0xc2, 0x4c, 0x0d, 0xc2, 0x07, 0x44, 0x00, 0x5d, 0x51, 0xc2, 0x00,
+ 0x3f, 0x00, 0x5d, 0x71, 0xc2, 0x1a, 0x36, 0x00, 0x5d, 0x79, 0x14, 0xc2,
+ 0x4c, 0x17, 0x0e, 0xc2, 0x4c, 0x21, 0xc2, 0x06, 0x8c, 0x00, 0x5d, 0xa9,
+ 0x15, 0xc2, 0x4c, 0x29, 0xc2, 0x01, 0x0e, 0x00, 0x5d, 0xc8, 0xc4, 0x16,
+ 0x57, 0x00, 0x5f, 0x31, 0xc3, 0x05, 0x17, 0x00, 0x5f, 0x39, 0x16, 0xc2,
+ 0x4c, 0x39, 0x08, 0xc2, 0x4c, 0x45, 0x15, 0xc2, 0x4c, 0x51, 0xc5, 0x05,
+ 0x1b, 0x00, 0x5f, 0x71, 0xc4, 0x24, 0x35, 0x00, 0x5f, 0x78, 0xc8, 0x08,
+ 0x19, 0x08, 0xfe, 0x99, 0x44, 0x22, 0x44, 0xc2, 0x4c, 0x5d, 0xca, 0xa9,
+ 0xb6, 0x08, 0xfe, 0x69, 0xca, 0xa9, 0x66, 0x08, 0xfe, 0x30, 0x43, 0x01,
+ 0x1d, 0xc2, 0x4c, 0x69, 0xc7, 0x08, 0x19, 0x08, 0xfe, 0x81, 0x08, 0xc2,
+ 0x4c, 0x75, 0x45, 0x05, 0x1b, 0xc2, 0x4c, 0x81, 0x16, 0xc2, 0x4c, 0x8b,
+ 0x44, 0x22, 0x44, 0xc2, 0x4c, 0x9b, 0xd8, 0x22, 0x34, 0x08, 0xfe, 0x08,
+ 0x83, 0x00, 0x5d, 0xf1, 0x8b, 0x00, 0x5e, 0x41, 0x97, 0x00, 0x5e, 0x60,
+ 0x8b, 0x00, 0x5e, 0x00, 0x97, 0x00, 0x5e, 0x10, 0x87, 0x00, 0x5e, 0x38,
+ 0x91, 0x00, 0x5e, 0x58, 0xc7, 0x0d, 0x7f, 0x00, 0x5f, 0x89, 0xc8, 0x4f,
+ 0xa2, 0x00, 0x5f, 0x90, 0xc4, 0x15, 0xa7, 0x08, 0xb6, 0x39, 0xc2, 0x22,
+ 0x45, 0x08, 0xb6, 0x30, 0xc3, 0x0d, 0x8f, 0x08, 0xb6, 0x29, 0xc3, 0x08,
+ 0xde, 0x08, 0xb6, 0x20, 0xc4, 0x05, 0xde, 0x08, 0xb6, 0x19, 0xc2, 0x0a,
+ 0x20, 0x08, 0xb6, 0x10, 0xca, 0xa4, 0x7a, 0x08, 0xb5, 0xc1, 0x97, 0x08,
+ 0xb4, 0x49, 0x8b, 0x08, 0xb4, 0x39, 0x83, 0x08, 0xb4, 0x08, 0xc2, 0x00,
+ 0x9a, 0x08, 0xb5, 0x51, 0x83, 0x08, 0xb5, 0x20, 0x83, 0x08, 0xb5, 0x41,
+ 0xc2, 0x01, 0x0e, 0x08, 0xb5, 0x38, 0xc2, 0x01, 0x0e, 0x08, 0xb5, 0x09,
+ 0x83, 0x08, 0xb5, 0x00, 0xc2, 0x01, 0x0e, 0x08, 0xb4, 0xf9, 0x83, 0x08,
+ 0xb4, 0xf0, 0x83, 0x08, 0xb4, 0xe9, 0xc2, 0x01, 0x01, 0x08, 0xb4, 0xc1,
+ 0xc2, 0x1a, 0x36, 0x08, 0xb4, 0x99, 0xc2, 0x07, 0x69, 0x08, 0xb4, 0x70,
+ 0xc2, 0x01, 0x0e, 0x08, 0xb4, 0xe1, 0x83, 0x08, 0xb4, 0xd9, 0x06, 0x42,
+ 0x4c, 0xa7, 0xc2, 0x01, 0x0e, 0x08, 0xb4, 0xd1, 0x83, 0x08, 0xb4, 0xc9,
+ 0x16, 0x42, 0x4c, 0xb1, 0xc2, 0x01, 0x0e, 0x08, 0xb4, 0x91, 0x83, 0x08,
+ 0xb4, 0x88, 0xc2, 0x01, 0x0e, 0x08, 0xb4, 0x81, 0x83, 0x08, 0xb4, 0x78,
+ 0xc2, 0x01, 0x0e, 0x08, 0xb4, 0x69, 0x83, 0x08, 0xb4, 0x60, 0xc2, 0x01,
+ 0x0e, 0x08, 0xb4, 0x59, 0x83, 0x08, 0xb4, 0x50, 0x97, 0x08, 0xb4, 0x28,
+ 0x8b, 0x08, 0xb4, 0x18, 0xc4, 0x21, 0x28, 0x08, 0xb5, 0xb1, 0xc5, 0x45,
+ 0xcf, 0x08, 0xb5, 0x60, 0x97, 0x08, 0xb5, 0xa9, 0x8b, 0x08, 0xb5, 0x99,
+ 0x83, 0x08, 0xb5, 0x68, 0x97, 0x08, 0xb5, 0x88, 0x8b, 0x08, 0xb5, 0x78,
+ 0xc3, 0x00, 0x55, 0x00, 0xd5, 0x61, 0xc2, 0x67, 0x96, 0x00, 0xd5, 0x20,
+ 0xc5, 0x4a, 0x67, 0x00, 0xd5, 0x53, 0x02, 0x4c, 0xbb, 0xc3, 0x3d, 0x15,
+ 0x00, 0xd5, 0x11, 0xc3, 0x3c, 0xb1, 0x00, 0xd3, 0x00, 0xc3, 0x3b, 0xb1,
+ 0x00, 0xd5, 0x43, 0x02, 0x4c, 0xc1, 0xc3, 0x88, 0xf7, 0x00, 0xd5, 0x19,
+ 0x43, 0x2d, 0x67, 0x42, 0x4c, 0xc7, 0xc5, 0xe1, 0xf2, 0x00, 0xd5, 0x39,
+ 0xc3, 0x7f, 0xed, 0x00, 0xd3, 0xd9, 0xc4, 0xe5, 0x57, 0x00, 0xd3, 0xa2,
+ 0x02, 0x4c, 0xd3, 0xd4, 0x3d, 0x3e, 0x00, 0xd5, 0x31, 0xc6, 0xd0, 0xc4,
+ 0x00, 0xd3, 0xd0, 0xc4, 0xe4, 0x03, 0x00, 0xd5, 0x08, 0xc3, 0x82, 0xa4,
+ 0x00, 0xd3, 0xb1, 0xc3, 0x83, 0x28, 0x00, 0xd3, 0xa8, 0xc4, 0x15, 0xa7,
+ 0x00, 0xd4, 0xb9, 0xc2, 0x22, 0x45, 0x00, 0xd4, 0xb0, 0xc3, 0x0d, 0x8f,
+ 0x00, 0xd4, 0xa9, 0xc3, 0x08, 0xde, 0x00, 0xd4, 0xa0, 0xc4, 0x05, 0xde,
+ 0x00, 0xd4, 0x99, 0xc2, 0x0a, 0x20, 0x00, 0xd4, 0x90, 0xc4, 0x15, 0xa7,
+ 0x00, 0xd4, 0x39, 0xc2, 0x22, 0x45, 0x00, 0xd4, 0x30, 0xc3, 0x0d, 0x8f,
+ 0x00, 0xd4, 0x29, 0xc3, 0x08, 0xde, 0x00, 0xd4, 0x20, 0xc4, 0x05, 0xde,
+ 0x00, 0xd4, 0x19, 0xc2, 0x0a, 0x20, 0x00, 0xd4, 0x10, 0xc2, 0x0e, 0xe5,
+ 0x00, 0xd2, 0xf1, 0xc2, 0x02, 0x1d, 0x00, 0xd2, 0xe9, 0x0f, 0xc2, 0x4c,
+ 0xd9, 0xd4, 0x3b, 0x5e, 0x00, 0xd2, 0xd9, 0x0e, 0xc2, 0x4c, 0xe3, 0xc9,
+ 0xb3, 0x69, 0x00, 0xd2, 0xc8, 0x42, 0x00, 0xee, 0xc2, 0x4c, 0xef, 0x91,
+ 0x00, 0xd3, 0x81, 0x9b, 0x00, 0xd3, 0x68, 0xc6, 0xd6, 0xbe, 0x00, 0xd3,
+ 0x91, 0xc6, 0xcc, 0xaf, 0x00, 0xd3, 0x20, 0x8b, 0x00, 0xd3, 0x89, 0x87,
0x00, 0xd3, 0x79, 0x83, 0x00, 0xd3, 0x18, 0x97, 0x00, 0xd3, 0x53, 0x02,
- 0x4d, 0x48, 0x87, 0x00, 0xd3, 0x38, 0x8b, 0x00, 0xd3, 0x30, 0x83, 0x00,
- 0xd2, 0x1b, 0x02, 0x4d, 0x4c, 0x43, 0x01, 0x12, 0xc2, 0x4d, 0x50, 0xc2,
- 0x00, 0xc7, 0x00, 0xd2, 0x51, 0xc2, 0x0f, 0x4d, 0x00, 0xd2, 0x20, 0x97,
- 0x00, 0xd2, 0x80, 0x8b, 0x00, 0xd2, 0x70, 0xc2, 0x00, 0xa4, 0x00, 0xd2,
- 0x49, 0x15, 0xc2, 0x4d, 0x7e, 0xc2, 0x1d, 0x5f, 0x00, 0xd2, 0x01, 0xc2,
- 0x00, 0x67, 0x00, 0xd1, 0xd1, 0x12, 0xc2, 0x4d, 0x8e, 0x16, 0xc2, 0x4d,
- 0x98, 0xc5, 0x3b, 0x66, 0x00, 0xd1, 0x71, 0x05, 0xc2, 0x4d, 0xa2, 0x0d,
- 0x42, 0x4d, 0xac, 0xc2, 0x0f, 0x4d, 0x00, 0xd2, 0x11, 0x83, 0x00, 0xd2,
- 0x0a, 0x02, 0x4d, 0xbc, 0x83, 0x00, 0xd1, 0xb1, 0xc2, 0x1d, 0x5f, 0x00,
- 0xd1, 0x61, 0xc2, 0x01, 0x29, 0x00, 0xd1, 0x30, 0xa3, 0x00, 0xcb, 0xa1,
- 0xa2, 0x00, 0xcb, 0x99, 0xa1, 0x00, 0xcb, 0x91, 0xa0, 0x00, 0xcb, 0x89,
- 0x9f, 0x00, 0xcb, 0x80, 0xc2, 0x00, 0xa4, 0x00, 0xcb, 0x09, 0x83, 0x00,
- 0xca, 0x98, 0xc5, 0xdd, 0x90, 0x05, 0x56, 0xf9, 0x90, 0x05, 0x56, 0xd8,
- 0x8f, 0x05, 0x55, 0xf1, 0x90, 0x05, 0x55, 0xe9, 0x9b, 0x05, 0x55, 0xe1,
- 0xc2, 0x0f, 0x4d, 0x05, 0x55, 0xd9, 0x83, 0x05, 0x55, 0x88, 0x83, 0x05,
- 0x55, 0xd1, 0x87, 0x05, 0x55, 0x9a, 0x02, 0x4d, 0xc8, 0x83, 0x05, 0x55,
- 0xc0, 0x91, 0x05, 0x55, 0x79, 0xc2, 0x00, 0xf6, 0x05, 0x55, 0x69, 0xc2,
- 0x17, 0x58, 0x05, 0x55, 0x59, 0xc2, 0x01, 0x64, 0x05, 0x55, 0x49, 0xc2,
- 0x00, 0x39, 0x05, 0x55, 0x39, 0xc2, 0x20, 0x67, 0x05, 0x55, 0x29, 0xc2,
- 0x00, 0x82, 0x05, 0x55, 0x19, 0xc2, 0x05, 0x83, 0x05, 0x55, 0x09, 0x12,
- 0xc2, 0x4d, 0xcc, 0xc2, 0x00, 0x2b, 0x05, 0x54, 0xd9, 0x10, 0xc2, 0x4d,
- 0xd6, 0x16, 0xc2, 0x4d, 0xe6, 0xc2, 0x01, 0x4a, 0x05, 0x54, 0x99, 0x05,
- 0xc2, 0x4d, 0xf0, 0xc2, 0x13, 0x4f, 0x05, 0x54, 0x39, 0x0d, 0xc2, 0x4d,
- 0xfa, 0xc2, 0x05, 0x7b, 0x05, 0x54, 0x78, 0x91, 0x05, 0x55, 0x71, 0xc2,
- 0x00, 0xf6, 0x05, 0x55, 0x61, 0xc2, 0x17, 0x58, 0x05, 0x55, 0x51, 0xc2,
- 0x01, 0x64, 0x05, 0x55, 0x41, 0xc2, 0x00, 0x39, 0x05, 0x55, 0x31, 0xc2,
- 0x20, 0x67, 0x05, 0x55, 0x21, 0xc2, 0x00, 0x82, 0x05, 0x55, 0x11, 0xc2,
- 0x05, 0x83, 0x05, 0x55, 0x01, 0x12, 0xc2, 0x4e, 0x02, 0xc2, 0x00, 0x2b,
- 0x05, 0x54, 0xd1, 0x10, 0xc2, 0x4e, 0x0c, 0x16, 0xc2, 0x4e, 0x1c, 0xc2,
- 0x01, 0x4a, 0x05, 0x54, 0x91, 0x05, 0xc2, 0x4e, 0x26, 0xc2, 0x13, 0x4f,
- 0x05, 0x54, 0x31, 0x0d, 0xc2, 0x4e, 0x30, 0xc2, 0x05, 0x7b, 0x05, 0x54,
- 0x70, 0xd2, 0x48, 0xba, 0x0f, 0xb2, 0xb1, 0xd2, 0x49, 0x38, 0x0f, 0xb2,
- 0xa0, 0xc4, 0x04, 0x5e, 0x01, 0x0c, 0x59, 0xc2, 0x01, 0x47, 0x01, 0x0c,
- 0x50, 0x9b, 0x01, 0x0a, 0x21, 0x8e, 0x01, 0x0a, 0x11, 0x89, 0x01, 0x0a,
- 0x08, 0xd2, 0x48, 0xba, 0x0f, 0xb2, 0xb9, 0xd2, 0x49, 0x38, 0x0f, 0xb2,
- 0xa8, 0xc4, 0x01, 0x1e, 0x01, 0x34, 0xf9, 0xc5, 0x01, 0xf7, 0x01, 0x34,
- 0xf0, 0xc5, 0x01, 0xf7, 0x0f, 0xaf, 0x39, 0xc4, 0x01, 0x1e, 0x0f, 0xaf,
- 0x31, 0xc5, 0x01, 0x62, 0x0f, 0xaf, 0x29, 0xc5, 0x00, 0x95, 0x0f, 0xaf,
- 0x20, 0x4b, 0x01, 0xf7, 0xc2, 0x4e, 0x38, 0xdf, 0x0c, 0x85, 0x01, 0x5c,
- 0xc0, 0xe0, 0x0b, 0xa7, 0x01, 0x5c, 0xc8, 0xe0, 0x0a, 0x27, 0x01, 0x3d,
- 0x18, 0xe0, 0x03, 0x67, 0x01, 0x5c, 0xd8, 0xc6, 0x13, 0x57, 0x0f, 0xbd,
- 0x41, 0xc4, 0x40, 0xc6, 0x01, 0x00, 0x48, 0xc5, 0xd6, 0x10, 0x00, 0x3d,
- 0x19, 0xc8, 0xb7, 0x8d, 0x00, 0x3c, 0x79, 0xc4, 0xd5, 0x67, 0x00, 0x3c,
- 0x70, 0x91, 0x00, 0x3d, 0x01, 0xc7, 0xb2, 0xcf, 0x00, 0x3c, 0x99, 0xc3,
- 0x3b, 0xc7, 0x00, 0x3c, 0x63, 0x02, 0x4e, 0x44, 0xc3, 0x3b, 0xd2, 0x00,
- 0x3c, 0xc0, 0x03, 0xc2, 0x4e, 0x4a, 0xc5, 0xd5, 0xb1, 0x00, 0x3c, 0x58,
- 0xc5, 0xdd, 0x40, 0x00, 0x3c, 0xf1, 0x0a, 0xc2, 0x4e, 0x56, 0xc4, 0x58,
- 0x1f, 0x00, 0x3c, 0x80, 0xc3, 0x3b, 0xc7, 0x00, 0x3c, 0xc9, 0xc2, 0x14,
- 0x40, 0x00, 0x3c, 0x00, 0x03, 0xc2, 0x4e, 0x62, 0x91, 0x00, 0x3d, 0x08,
- 0xc4, 0xe3, 0x63, 0x00, 0x3c, 0x69, 0xc8, 0xb2, 0xce, 0x00, 0x3c, 0x28,
- 0xc4, 0xe2, 0x77, 0x00, 0x3c, 0x39, 0xc3, 0x15, 0xf2, 0x00, 0x3d, 0x10,
- 0xc4, 0xd5, 0x67, 0x00, 0x3c, 0x31, 0xc3, 0x3b, 0xc7, 0x00, 0x3c, 0xd0,
- 0xc4, 0xe5, 0x37, 0x00, 0x3c, 0x11, 0xc2, 0x14, 0x40, 0x00, 0x3d, 0x88,
- 0x0d, 0xc2, 0x4e, 0x6c, 0x10, 0xc2, 0x4e, 0x78, 0x46, 0xd0, 0x27, 0xc2,
- 0x4e, 0x8a, 0x15, 0xc2, 0x4e, 0x9f, 0x1b, 0xc2, 0x4e, 0xab, 0x43, 0x5d,
- 0x05, 0xc2, 0x4e, 0xb7, 0x16, 0xc2, 0x4e, 0xc3, 0xc9, 0xab, 0x8f, 0x00,
- 0x70, 0xd1, 0x12, 0xc2, 0x4e, 0xcd, 0x42, 0x05, 0x83, 0xc2, 0x4e, 0xdd,
- 0x0f, 0xc2, 0x4e, 0xec, 0x14, 0xc2, 0x4e, 0xf8, 0x0e, 0xc2, 0x4f, 0x02,
- 0xc7, 0xc2, 0xd7, 0x00, 0x71, 0x39, 0x43, 0x68, 0x16, 0xc2, 0x4f, 0x12,
- 0xc5, 0xdf, 0x16, 0x00, 0x71, 0x69, 0xca, 0xa7, 0x2a, 0x00, 0x72, 0xd0,
- 0xc2, 0x01, 0x47, 0x00, 0x72, 0x91, 0xc4, 0x04, 0x5e, 0x00, 0x72, 0x98,
- 0xc3, 0x06, 0x9e, 0x00, 0x72, 0xa1, 0xc3, 0x0c, 0x5b, 0x00, 0x72, 0xa8,
- 0xc2, 0x26, 0x51, 0x00, 0x72, 0xb1, 0xc4, 0x18, 0x83, 0x00, 0x72, 0xb8,
- 0x87, 0x0f, 0x15, 0x58, 0x47, 0xc3, 0x4e, 0xc2, 0x4f, 0x1e, 0x83, 0x0f,
- 0x14, 0x88, 0x91, 0x0f, 0x15, 0x40, 0x97, 0x0f, 0x15, 0x18, 0xc2, 0x01,
- 0x29, 0x0f, 0x14, 0xc1, 0x83, 0x0f, 0x14, 0xb8, 0xd0, 0x5f, 0x22, 0x01,
- 0x4e, 0x69, 0xc8, 0x4f, 0x39, 0x01, 0x4e, 0x59, 0xc9, 0x17, 0x7a, 0x01,
- 0x4e, 0x51, 0xcf, 0x13, 0x63, 0x0f, 0xb6, 0x30, 0xc4, 0x58, 0xd6, 0x0e,
- 0x9a, 0x49, 0xc9, 0xb0, 0xde, 0x0e, 0x99, 0xe0, 0xc5, 0xb9, 0x70, 0x0e,
- 0x9a, 0x91, 0xc5, 0x5d, 0x6d, 0x0e, 0x9a, 0x70, 0xc6, 0xd1, 0x17, 0x0e,
- 0x99, 0xc1, 0x16, 0x42, 0x4f, 0x32, 0xc7, 0xcb, 0x89, 0x0e, 0x99, 0xe9,
- 0xc4, 0x1f, 0x5c, 0x0e, 0x99, 0x30, 0xc5, 0xd7, 0x50, 0x0e, 0x9a, 0x61,
- 0xc2, 0x00, 0x7b, 0x0e, 0x99, 0x88, 0xc5, 0xd7, 0xe1, 0x0e, 0x99, 0x71,
- 0x0b, 0x42, 0x4f, 0x44, 0xc5, 0x7f, 0xa1, 0x01, 0x18, 0xa9, 0xc5, 0x37,
- 0x67, 0x0f, 0xa6, 0xf2, 0x02, 0x4f, 0x50, 0x49, 0x11, 0x37, 0xc2, 0x4f,
- 0x56, 0xca, 0x1e, 0x18, 0x00, 0x60, 0x08, 0xc7, 0x11, 0x41, 0x00, 0x60,
- 0x11, 0xc7, 0x76, 0x59, 0x00, 0x61, 0xe8, 0xc5, 0x44, 0x7b, 0x00, 0x60,
- 0x19, 0xc4, 0x0f, 0x7c, 0x00, 0x62, 0x68, 0x83, 0x00, 0x60, 0x2b, 0x02,
- 0x4f, 0x62, 0x8b, 0x00, 0x60, 0x3b, 0x02, 0x4f, 0x6e, 0x97, 0x00, 0x60,
- 0x4b, 0x02, 0x4f, 0x72, 0x18, 0xc2, 0x4f, 0x76, 0x87, 0x00, 0x60, 0x73,
- 0x02, 0x4f, 0x80, 0x91, 0x00, 0x60, 0x93, 0x02, 0x4f, 0x84, 0x0d, 0xc2,
- 0x4f, 0x88, 0x09, 0xc2, 0x4f, 0x92, 0x10, 0xc2, 0x4f, 0x9c, 0x05, 0xc2,
- 0x4f, 0xb5, 0x0c, 0xc2, 0x4f, 0xbf, 0x16, 0xc2, 0x4f, 0xc9, 0x06, 0xc2,
- 0x4f, 0xdd, 0x12, 0xc2, 0x4f, 0xf1, 0x04, 0xc2, 0x4f, 0xfb, 0xc2, 0x00,
- 0xad, 0x00, 0x61, 0x71, 0xc2, 0x1d, 0x5f, 0x00, 0x61, 0x79, 0x14, 0xc2,
- 0x50, 0x05, 0x0e, 0xc2, 0x50, 0x0d, 0x15, 0xc2, 0x50, 0x15, 0xc2, 0x00,
- 0xa4, 0x00, 0x61, 0xc8, 0x83, 0x00, 0x61, 0xf1, 0x8b, 0x00, 0x62, 0x41,
- 0x97, 0x00, 0x62, 0x60, 0x8b, 0x00, 0x62, 0x00, 0x97, 0x00, 0x62, 0x10,
- 0x94, 0x00, 0x62, 0x1b, 0x02, 0x50, 0x25, 0x8e, 0x00, 0x63, 0x12, 0x02,
- 0x50, 0x29, 0x87, 0x00, 0x62, 0x38, 0x91, 0x00, 0x62, 0x58, 0xc2, 0x01,
- 0x47, 0x00, 0x63, 0x41, 0xc4, 0x04, 0x5e, 0x00, 0x63, 0x48, 0xc3, 0x06,
- 0x9e, 0x00, 0x63, 0x51, 0xc3, 0x0c, 0x5b, 0x00, 0x63, 0x58, 0xc2, 0x26,
- 0x51, 0x00, 0x63, 0x61, 0xc4, 0x18, 0x83, 0x00, 0x63, 0x68, 0xd2, 0x15,
- 0xdc, 0x00, 0x63, 0xc9, 0xd3, 0x40, 0xdd, 0x00, 0x63, 0xe0, 0x47, 0xc2,
- 0xec, 0xc2, 0x50, 0x2d, 0x49, 0xab, 0xbc, 0x42, 0x50, 0x39, 0x46, 0x00,
- 0x95, 0xc2, 0x50, 0x45, 0x45, 0x00, 0x6c, 0x42, 0x50, 0x51, 0xc5, 0x00,
- 0x95, 0x01, 0x70, 0xf1, 0xc5, 0x01, 0x62, 0x01, 0x70, 0xf8, 0xc4, 0x18,
- 0x83, 0x08, 0xa6, 0xb9, 0xc2, 0x26, 0x51, 0x08, 0xa6, 0xb0, 0xc3, 0x0c,
- 0x5b, 0x08, 0xa6, 0xa9, 0xc3, 0x06, 0x9e, 0x08, 0xa6, 0xa0, 0xc4, 0x04,
- 0x5e, 0x08, 0xa6, 0x99, 0xc2, 0x01, 0x47, 0x08, 0xa6, 0x90, 0xc7, 0x76,
- 0x59, 0x08, 0xa6, 0x21, 0xc7, 0x11, 0x41, 0x08, 0xa6, 0x00, 0xc5, 0x44,
- 0x7b, 0x08, 0xa6, 0x09, 0xc4, 0x0f, 0x7c, 0x08, 0xa6, 0x10, 0x97, 0x08,
- 0xa5, 0xf1, 0x8b, 0x08, 0xa5, 0xd9, 0x83, 0x08, 0xa5, 0x80, 0x91, 0x08,
- 0xa5, 0xe9, 0x87, 0x08, 0xa5, 0xd0, 0x8e, 0x08, 0xa5, 0xbb, 0x02, 0x50,
- 0x5d, 0x94, 0x08, 0xa5, 0xaa, 0x02, 0x50, 0x61, 0x97, 0x08, 0xa5, 0xa0,
- 0x8b, 0x08, 0xa5, 0x90, 0x83, 0x08, 0xa5, 0x71, 0xc2, 0x0c, 0x65, 0x08,
- 0xa5, 0x69, 0xc2, 0x00, 0xa4, 0x08, 0xa5, 0x60, 0x83, 0x08, 0xa5, 0x59,
- 0x47, 0xac, 0xc2, 0x42, 0x50, 0x65, 0xc2, 0x00, 0xa4, 0x08, 0xa5, 0x31,
- 0x83, 0x08, 0xa5, 0x28, 0xc2, 0x00, 0xa4, 0x08, 0xa5, 0x21, 0x83, 0x08,
- 0xa5, 0x18, 0x83, 0x08, 0xa5, 0x11, 0xc2, 0x00, 0xc1, 0x08, 0xa4, 0xe9,
- 0xc2, 0x1d, 0x5f, 0x08, 0xa4, 0xc1, 0xc2, 0x01, 0x29, 0x08, 0xa4, 0x98,
- 0xc2, 0x00, 0xa4, 0x08, 0xa5, 0x09, 0x83, 0x08, 0xa5, 0x01, 0x06, 0x42,
- 0x50, 0x73, 0xc2, 0x00, 0xa4, 0x08, 0xa4, 0xf9, 0x83, 0x08, 0xa4, 0xf1,
- 0x16, 0x42, 0x50, 0x7d, 0xc2, 0x00, 0xa4, 0x08, 0xa4, 0xb9, 0x83, 0x08,
- 0xa4, 0xb0, 0xc2, 0x00, 0xa4, 0x08, 0xa4, 0xa9, 0x83, 0x08, 0xa4, 0xa0,
- 0xc2, 0x00, 0xa4, 0x08, 0xa4, 0x91, 0x83, 0x08, 0xa4, 0x88, 0xc2, 0x00,
- 0xa4, 0x08, 0xa4, 0x81, 0x83, 0x08, 0xa4, 0x78, 0x97, 0x08, 0xa4, 0x71,
- 0x8b, 0x08, 0xa4, 0x61, 0x83, 0x08, 0xa4, 0x10, 0x97, 0x08, 0xa4, 0x30,
- 0x8b, 0x08, 0xa4, 0x20, 0xc7, 0xc4, 0x20, 0x00, 0x7e, 0x21, 0xc7, 0xc1,
- 0x9c, 0x00, 0x7e, 0x2b, 0x02, 0x50, 0x87, 0x12, 0xc2, 0x50, 0x8d, 0xc6,
- 0xcd, 0x6f, 0x00, 0x7e, 0x4a, 0x02, 0x50, 0x99, 0x44, 0xb3, 0xe5, 0xc2,
- 0x50, 0x9d, 0xcd, 0x81, 0x31, 0x00, 0x7b, 0xf1, 0xc8, 0x8b, 0x61, 0x00,
- 0x7b, 0xf8, 0xc7, 0xba, 0x06, 0x00, 0x79, 0xf1, 0xc8, 0xbb, 0xbd, 0x00,
- 0x7c, 0x38, 0xc8, 0xba, 0x05, 0x00, 0x79, 0xf9, 0xc7, 0x53, 0xe0, 0x00,
- 0x7c, 0x48, 0xc7, 0xc7, 0xb5, 0x00, 0x7c, 0x31, 0xc9, 0x95, 0x04, 0x00,
- 0x7c, 0x40, 0xcb, 0x9a, 0x4b, 0x00, 0x7c, 0x51, 0xcb, 0x8d, 0xeb, 0x00,
- 0x7c, 0x58, 0xcb, 0x95, 0x02, 0x00, 0x7c, 0x69, 0xc8, 0x53, 0xdf, 0x00,
- 0x7c, 0x71, 0xd1, 0x53, 0xd6, 0x00, 0x7c, 0x78, 0x0d, 0xc2, 0x50, 0xa9,
- 0x09, 0xc2, 0x50, 0xb9, 0x10, 0xc2, 0x50, 0xc3, 0x05, 0xc2, 0x50, 0xd9,
- 0xc2, 0x24, 0x58, 0x00, 0x7c, 0xb9, 0x16, 0xc2, 0x50, 0xe3, 0x06, 0xc2,
- 0x50, 0xf5, 0x12, 0xc2, 0x51, 0x07, 0x04, 0xc2, 0x51, 0x11, 0xc2, 0x00,
- 0xad, 0x00, 0x7d, 0x41, 0xc2, 0x01, 0x09, 0x00, 0x7d, 0x69, 0x1c, 0xc2,
- 0x51, 0x1b, 0xc2, 0x00, 0x02, 0x00, 0x7d, 0x81, 0xc2, 0x1d, 0x5f, 0x00,
- 0x7d, 0x89, 0xc2, 0x02, 0x59, 0x00, 0x7d, 0x91, 0xc2, 0x00, 0xc7, 0x00,
- 0x7d, 0x99, 0x15, 0xc2, 0x51, 0x25, 0xc2, 0x00, 0xa4, 0x00, 0x7d, 0xb9,
- 0x83, 0x00, 0x7d, 0xc1, 0x4b, 0x7a, 0xcb, 0x42, 0x51, 0x35, 0x48, 0x15,
- 0x22, 0xc2, 0x51, 0x47, 0xc5, 0x33, 0x45, 0x00, 0x78, 0xa0, 0xc2, 0x00,
- 0xb7, 0x00, 0x79, 0xd1, 0xc2, 0x00, 0xe4, 0x00, 0x79, 0xd8, 0xcf, 0x15,
- 0x22, 0x00, 0x78, 0x21, 0xdb, 0x15, 0x16, 0x00, 0x7e, 0x98, 0xcf, 0x15,
- 0x58, 0x00, 0x78, 0x29, 0xdb, 0x15, 0x4c, 0x00, 0x7e, 0xa0, 0xd4, 0x39,
- 0x09, 0x00, 0x78, 0x31, 0x4c, 0x86, 0x2c, 0x42, 0x51, 0x53, 0x0d, 0xc2,
- 0x51, 0x5f, 0xc9, 0xb2, 0xdf, 0x00, 0x79, 0xa0, 0xc7, 0x15, 0x22, 0x00,
- 0x78, 0x51, 0xcc, 0x30, 0x55, 0x00, 0x7e, 0x80, 0xc4, 0x02, 0x28, 0x00,
- 0x78, 0x71, 0xc5, 0x33, 0x45, 0x00, 0x7e, 0x92, 0x02, 0x51, 0x6b, 0xc7,
- 0x75, 0xf7, 0x00, 0x79, 0xa9, 0xca, 0xa5, 0xf4, 0x00, 0x79, 0xb8, 0xc8,
- 0x33, 0x47, 0x00, 0x78, 0x79, 0xc7, 0xc4, 0x3c, 0x00, 0x79, 0xc8, 0x83,
- 0x00, 0x7a, 0x01, 0xc2, 0x00, 0xa4, 0x00, 0x7a, 0x09, 0xc3, 0x1a, 0x80,
- 0x00, 0x7b, 0x49, 0xc2, 0x04, 0x2b, 0x00, 0x7b, 0x58, 0x83, 0x00, 0x7a,
- 0x11, 0xc2, 0x00, 0xa4, 0x00, 0x7a, 0x18, 0xc2, 0x01, 0x29, 0x00, 0x7a,
- 0x21, 0xc2, 0x1d, 0x5f, 0x00, 0x7a, 0x49, 0xc2, 0x00, 0xc1, 0x00, 0x7a,
- 0x71, 0x83, 0x00, 0x7a, 0x98, 0x83, 0x00, 0x7a, 0x29, 0xc2, 0x00, 0xa4,
- 0x00, 0x7a, 0x30, 0x16, 0xc2, 0x51, 0x71, 0x83, 0x00, 0x7a, 0x79, 0xc2,
- 0x00, 0xa4, 0x00, 0x7a, 0x81, 0x15, 0x42, 0x51, 0x7b, 0x06, 0xc2, 0x51,
- 0x85, 0x83, 0x00, 0x7a, 0x89, 0xc2, 0x00, 0xa4, 0x00, 0x7a, 0x91, 0x1c,
- 0x42, 0x51, 0x8f, 0x83, 0x00, 0x7a, 0xa1, 0xc2, 0x00, 0xa4, 0x00, 0x7a,
- 0xa8, 0x83, 0x00, 0x7a, 0xb1, 0xc2, 0x00, 0xa4, 0x00, 0x7a, 0xb8, 0xc2,
- 0x00, 0xa4, 0x00, 0x7a, 0xf1, 0x83, 0x00, 0x7a, 0xf8, 0x83, 0x00, 0x7b,
- 0x11, 0xc2, 0x02, 0x59, 0x00, 0x7b, 0x60, 0xc2, 0x00, 0xa4, 0x00, 0x7b,
- 0x21, 0xc2, 0x0c, 0x65, 0x00, 0x7b, 0x29, 0x83, 0x00, 0x7b, 0x30, 0xc2,
- 0x01, 0x47, 0x00, 0x79, 0x59, 0xc4, 0x04, 0x5e, 0x00, 0x79, 0x60, 0xc3,
- 0x06, 0x9e, 0x00, 0x79, 0x69, 0xc3, 0x0c, 0x5b, 0x00, 0x79, 0x70, 0xc2,
- 0x26, 0x51, 0x00, 0x79, 0x79, 0xc4, 0x18, 0x83, 0x00, 0x79, 0x80, 0x94,
- 0x00, 0x7b, 0xb8, 0x8e, 0x00, 0x7b, 0xc8, 0x84, 0x01, 0x69, 0x8b, 0x02,
- 0x51, 0x99, 0x89, 0x01, 0x69, 0x9b, 0x02, 0x51, 0x9d, 0x8c, 0x01, 0x69,
- 0xb1, 0x86, 0x01, 0x69, 0xbb, 0x02, 0x51, 0xa4, 0x88, 0x01, 0x69, 0xe1,
- 0x8d, 0x01, 0x69, 0xeb, 0x02, 0x51, 0xaf, 0x8a, 0x01, 0x6a, 0x03, 0x02,
- 0x51, 0xb6, 0x83, 0x01, 0x6a, 0x21, 0x93, 0x01, 0x6a, 0x39, 0x9c, 0x01,
- 0x6b, 0x1b, 0x02, 0x51, 0xba, 0x8e, 0x01, 0x6a, 0x69, 0x8f, 0x01, 0x6a,
- 0x71, 0x90, 0x01, 0x6a, 0x79, 0x92, 0x01, 0x6a, 0x91, 0x94, 0x01, 0x6a,
- 0xa3, 0x02, 0x51, 0xc2, 0x95, 0x01, 0x6a, 0xcb, 0x02, 0x51, 0xc6, 0x96,
- 0x01, 0x6a, 0xe3, 0x02, 0x51, 0xce, 0xc2, 0x11, 0xd4, 0x01, 0x6a, 0xf1,
- 0x98, 0x01, 0x6b, 0x01, 0x99, 0x01, 0x6b, 0x09, 0x9b, 0x01, 0x6b, 0x10,
- 0x9b, 0x01, 0x69, 0xd8, 0x8d, 0x01, 0x69, 0xf3, 0x02, 0x51, 0xd6, 0x8a,
- 0x01, 0x6a, 0x11, 0x93, 0x01, 0x6a, 0x41, 0xc2, 0x23, 0xb6, 0x01, 0x6a,
- 0x61, 0x09, 0xc2, 0x51, 0xda, 0xc2, 0x00, 0x35, 0x01, 0x6a, 0x88, 0xcb,
- 0x01, 0xbc, 0x01, 0x02, 0xd1, 0xc6, 0x70, 0xe9, 0x01, 0x01, 0x28, 0x44,
- 0x08, 0x76, 0xc2, 0x51, 0xe2, 0x45, 0x05, 0x2b, 0xc2, 0x51, 0xf8, 0x42,
- 0x00, 0x67, 0xc2, 0x52, 0x4c, 0xc3, 0x2e, 0x60, 0x00, 0x37, 0x31, 0xc3,
- 0x78, 0xa9, 0x00, 0x37, 0x29, 0xc5, 0x4b, 0xf1, 0x00, 0x30, 0xd1, 0xc5,
- 0x52, 0xee, 0x00, 0x30, 0xc8, 0xc3, 0x2d, 0x55, 0x00, 0x32, 0x93, 0x02,
- 0x52, 0x58, 0xd8, 0x24, 0x30, 0x00, 0x44, 0xe9, 0xcc, 0x87, 0x88, 0x00,
- 0x32, 0xb0, 0x4a, 0xa6, 0x08, 0xc2, 0x52, 0x5c, 0xc4, 0x00, 0x5b, 0x07,
- 0xdd, 0xf9, 0x16, 0xc2, 0x52, 0x68, 0x42, 0x01, 0x4a, 0xc2, 0x52, 0x74,
- 0x4a, 0x38, 0x82, 0xc2, 0x52, 0x80, 0xcb, 0x97, 0xac, 0x07, 0xde, 0x10,
- 0x15, 0xc2, 0x52, 0x8c, 0xc9, 0xac, 0x0d, 0x00, 0x30, 0xa1, 0x42, 0x02,
- 0x59, 0xc2, 0x52, 0x96, 0xcf, 0x6a, 0x01, 0x00, 0x30, 0x89, 0xc5, 0xdb,
- 0x97, 0x00, 0x30, 0x78, 0x00, 0x42, 0x52, 0xa2, 0x0c, 0xc2, 0x52, 0xae,
- 0x0a, 0xc2, 0x52, 0xba, 0x15, 0xc2, 0x52, 0xc6, 0x4b, 0x95, 0xd3, 0xc2,
- 0x52, 0xda, 0x03, 0xc2, 0x52, 0xf2, 0x16, 0xc2, 0x53, 0x08, 0x49, 0xac,
- 0x67, 0xc2, 0x53, 0x16, 0x4a, 0x62, 0xae, 0xc2, 0x53, 0x4a, 0x0d, 0xc2,
- 0x53, 0x7e, 0x49, 0x0f, 0x69, 0xc2, 0x53, 0x8a, 0x13, 0xc2, 0x53, 0xac,
- 0x49, 0xb2, 0x7c, 0xc2, 0x53, 0xb6, 0x04, 0xc2, 0x53, 0xda, 0x14, 0xc2,
- 0x53, 0xe6, 0x0f, 0xc2, 0x53, 0xf0, 0x4e, 0x73, 0x49, 0xc2, 0x53, 0xfc,
- 0x49, 0xb5, 0x3a, 0xc2, 0x54, 0x06, 0x56, 0x2e, 0x51, 0xc2, 0x54, 0x30,
- 0xd6, 0x2d, 0xa1, 0x07, 0xef, 0xc0, 0x4d, 0x7c, 0x1d, 0xc2, 0x54, 0x36,
- 0x45, 0x02, 0x92, 0x42, 0x54, 0x42, 0x4a, 0x75, 0x38, 0xc2, 0x54, 0xc3,
- 0xcc, 0x27, 0x90, 0x00, 0x46, 0x88, 0xd4, 0x3a, 0x85, 0x00, 0x47, 0xf9,
- 0xcb, 0x3a, 0x8e, 0x00, 0x32, 0xc0, 0xc7, 0xc6, 0x73, 0x00, 0x44, 0xe1,
- 0xc7, 0x29, 0x1c, 0x00, 0x32, 0x98, 0x06, 0xc2, 0x54, 0xd5, 0x03, 0xc2,
- 0x54, 0xdd, 0xc3, 0x88, 0x60, 0x0f, 0x70, 0x09, 0xc4, 0x2d, 0xfe, 0x0f,
- 0x70, 0x11, 0xc3, 0x78, 0xa9, 0x0f, 0x70, 0x29, 0x42, 0x02, 0xb4, 0xc2,
- 0x54, 0xe9, 0xc3, 0x19, 0xa7, 0x0f, 0x70, 0x39, 0x16, 0xc2, 0x54, 0xf3,
- 0xc3, 0x2e, 0x60, 0x0f, 0x70, 0x49, 0x0d, 0xc2, 0x55, 0x01, 0x0e, 0xc2,
- 0x55, 0x0d, 0xc4, 0x19, 0x9d, 0x0f, 0x70, 0x61, 0xc4, 0x3a, 0x8e, 0x0f,
- 0x70, 0x69, 0x15, 0xc2, 0x55, 0x19, 0xc3, 0x0f, 0x60, 0x0f, 0x70, 0x91,
- 0xc3, 0x6f, 0x91, 0x0f, 0x70, 0x99, 0x48, 0x12, 0x30, 0xc2, 0x55, 0x31,
- 0x49, 0x2d, 0x6d, 0xc2, 0x55, 0x83, 0xc3, 0xb2, 0x7c, 0x0f, 0x70, 0x81,
- 0xc5, 0x95, 0xd3, 0x0f, 0x70, 0xd8, 0xc3, 0x00, 0xac, 0x00, 0x32, 0x7b,
- 0x02, 0x55, 0x8f, 0xcc, 0x89, 0x38, 0x00, 0x30, 0x68, 0xd6, 0x30, 0xe5,
- 0x00, 0x47, 0xdb, 0x02, 0x55, 0x9c, 0xc7, 0xcb, 0x82, 0x00, 0x44, 0xf0,
- 0xc5, 0x00, 0x95, 0x00, 0x47, 0xc3, 0x02, 0x55, 0xa2, 0xc5, 0x01, 0x62,
- 0x00, 0x47, 0xd0, 0xce, 0x75, 0x33, 0x00, 0x44, 0x41, 0x9b, 0x00, 0x30,
- 0x40, 0xe0, 0x05, 0x27, 0x00, 0x37, 0x60, 0xce, 0x70, 0x63, 0x00, 0x47,
- 0xb1, 0xcd, 0x05, 0x7a, 0x07, 0xf3, 0xd1, 0xcb, 0x66, 0x54, 0x07, 0xf3,
- 0xd8, 0xce, 0x05, 0x79, 0x07, 0xf3, 0xa0, 0x00, 0xc2, 0x55, 0xa8, 0xc3,
- 0x14, 0xa9, 0x00, 0x32, 0x5a, 0x02, 0x55, 0xba, 0x45, 0xb7, 0xd9, 0xc2,
- 0x55, 0xc0, 0x49, 0x01, 0x59, 0xc2, 0x55, 0xcc, 0x48, 0x01, 0xb4, 0x42,
- 0x55, 0xd8, 0xc5, 0x19, 0xb2, 0x00, 0x32, 0x03, 0x02, 0x55, 0xe4, 0xcb,
- 0x8f, 0xa3, 0x07, 0xf3, 0x98, 0xc5, 0x4b, 0xf1, 0x00, 0x47, 0x33, 0x02,
- 0x55, 0xea, 0xc5, 0x52, 0xee, 0x00, 0x47, 0x2b, 0x02, 0x55, 0xf0, 0xc5,
- 0x69, 0x57, 0x00, 0x47, 0x22, 0x02, 0x55, 0xf6, 0xc5, 0x00, 0x95, 0x00,
- 0x32, 0xa1, 0xc5, 0x01, 0x62, 0x00, 0x32, 0xa8, 0xce, 0x72, 0xd9, 0x00,
- 0x44, 0x81, 0xcf, 0x64, 0xd9, 0x00, 0x30, 0x70, 0xc9, 0x0e, 0xac, 0x00,
- 0x32, 0xe1, 0xd6, 0x2c, 0x15, 0x00, 0x32, 0xd9, 0xcd, 0x2c, 0x1e, 0x00,
- 0x32, 0xd0, 0xc9, 0x05, 0x2b, 0x00, 0x37, 0x59, 0xc8, 0xbc, 0xc5, 0x00,
- 0x37, 0x50, 0xc4, 0x46, 0x7d, 0x00, 0x36, 0xe9, 0xc9, 0x5c, 0xf9, 0x00,
- 0x30, 0xe8, 0xc4, 0x18, 0x83, 0x00, 0x33, 0x39, 0xc2, 0x26, 0x51, 0x00,
- 0x33, 0x30, 0xc3, 0x0c, 0x5b, 0x00, 0x33, 0x29, 0xc3, 0x06, 0x9e, 0x00,
- 0x33, 0x20, 0xc4, 0x04, 0x5e, 0x00, 0x33, 0x19, 0xc2, 0x01, 0x47, 0x00,
- 0x33, 0x10, 0xc3, 0xe7, 0xbd, 0x07, 0xd8, 0xb9, 0xc3, 0x04, 0x6d, 0x07,
- 0xd8, 0xa9, 0xc3, 0x58, 0xc4, 0x07, 0xd8, 0xa1, 0xc3, 0x28, 0xed, 0x07,
- 0xd8, 0x98, 0xcc, 0x21, 0x84, 0x00, 0x2c, 0x41, 0xc2, 0x01, 0x07, 0x00,
- 0x2c, 0x10, 0x8a, 0x00, 0x2c, 0x21, 0x90, 0x00, 0x2b, 0x78, 0xc3, 0xe7,
- 0x72, 0x00, 0x2c, 0x19, 0xc2, 0x0d, 0xf7, 0x00, 0x2b, 0xd0, 0x91, 0x00,
- 0x2c, 0x09, 0x0a, 0xc2, 0x55, 0xfc, 0x83, 0x00, 0x2b, 0x70, 0xc2, 0x0d,
- 0xf7, 0x00, 0x2c, 0x01, 0x83, 0x00, 0x2b, 0xe0, 0xc3, 0xbe, 0x02, 0x00,
- 0x2b, 0xf9, 0x91, 0x00, 0x2b, 0x49, 0xc9, 0xab, 0x23, 0x00, 0x2b, 0x00,
- 0xc2, 0x01, 0x06, 0x00, 0x2b, 0xf1, 0x91, 0x00, 0x2b, 0xc0, 0xc2, 0x0d,
- 0xf7, 0x00, 0x2b, 0xe9, 0xc2, 0x00, 0xa4, 0x00, 0x2b, 0xb8, 0xc3, 0x66,
- 0x50, 0x00, 0x2b, 0xd9, 0x83, 0x00, 0x2b, 0x88, 0xc3, 0x02, 0x29, 0x00,
- 0x2b, 0x91, 0xc2, 0x01, 0x2e, 0x00, 0x2b, 0x18, 0xc2, 0x01, 0xf0, 0x00,
- 0x2b, 0x51, 0x83, 0x00, 0x2b, 0x30, 0x96, 0x00, 0x2b, 0x41, 0x8a, 0x00,
- 0x2b, 0x39, 0xc2, 0x11, 0xd4, 0x00, 0x2b, 0x28, 0x8a, 0x00, 0x2a, 0xa1,
- 0x90, 0x00, 0x29, 0xf8, 0xc3, 0xe7, 0x72, 0x00, 0x2a, 0x99, 0xc2, 0x0d,
- 0xf7, 0x00, 0x2a, 0x50, 0xc2, 0x01, 0x07, 0x00, 0x2a, 0x90, 0x91, 0x00,
- 0x2a, 0x89, 0x0a, 0xc2, 0x56, 0x06, 0x83, 0x00, 0x29, 0xf0, 0xc2, 0x0d,
- 0xf7, 0x00, 0x2a, 0x81, 0x83, 0x00, 0x2a, 0x60, 0xc3, 0xbe, 0x02, 0x00,
- 0x2a, 0x79, 0x91, 0x00, 0x29, 0xc8, 0xc2, 0x01, 0x06, 0x00, 0x2a, 0x71,
- 0x91, 0x00, 0x2a, 0x40, 0xc2, 0x0d, 0xf7, 0x00, 0x2a, 0x69, 0xc2, 0x00,
- 0xa4, 0x00, 0x2a, 0x38, 0xc3, 0x66, 0x50, 0x00, 0x2a, 0x59, 0x83, 0x00,
- 0x2a, 0x08, 0xc3, 0x02, 0x29, 0x00, 0x2a, 0x11, 0xc2, 0x01, 0x2e, 0x00,
- 0x29, 0x98, 0xc2, 0x01, 0xf0, 0x00, 0x29, 0xd1, 0x83, 0x00, 0x29, 0xb0,
- 0x96, 0x00, 0x29, 0xc1, 0x8a, 0x00, 0x29, 0xb9, 0xc2, 0x11, 0xd4, 0x00,
- 0x29, 0xa8, 0xc4, 0x14, 0x41, 0x0f, 0x48, 0x09, 0xc2, 0x00, 0xa4, 0x0f,
- 0x48, 0x68, 0x83, 0x0f, 0x48, 0x21, 0xc2, 0x01, 0xf0, 0x0f, 0x48, 0x38,
- 0xc9, 0xac, 0xca, 0x0f, 0x48, 0x29, 0xc2, 0x00, 0xa4, 0x0f, 0x49, 0x08,
- 0xc2, 0x01, 0xf0, 0x0f, 0x48, 0x71, 0x83, 0x0f, 0x48, 0x90, 0xc2, 0x01,
- 0xbd, 0x0f, 0x48, 0x81, 0xc2, 0x1d, 0x5f, 0x0f, 0x48, 0xc9, 0xc2, 0x00,
- 0xa4, 0x0f, 0x48, 0xd8, 0xc2, 0x0f, 0x61, 0x0f, 0x48, 0x89, 0xc2, 0x00,
- 0xa4, 0x0f, 0x48, 0xe9, 0xc2, 0x01, 0x8a, 0x0f, 0x49, 0x00, 0x83, 0x0f,
- 0x48, 0xc1, 0xc2, 0x00, 0x48, 0x0f, 0x48, 0xf0, 0x9f, 0x0f, 0xba, 0x19,
- 0xa0, 0x0f, 0xba, 0x20, 0x02, 0x42, 0x56, 0x10, 0xc4, 0x1c, 0x64, 0x0f,
- 0xb8, 0xf1, 0xc6, 0x48, 0x2a, 0x0f, 0xb9, 0x1a, 0x02, 0x56, 0x20, 0xc2,
- 0xe7, 0x79, 0x0f, 0xbb, 0x10, 0xc8, 0xbd, 0x95, 0x0f, 0xba, 0xd0, 0x02,
- 0xc2, 0x56, 0x26, 0x44, 0x00, 0xf2, 0x42, 0x56, 0x32, 0xc2, 0xe7, 0x79,
- 0x0f, 0xb9, 0xe0, 0xcc, 0x85, 0x3c, 0x0f, 0xb9, 0x79, 0x02, 0x42, 0x56,
- 0x41, 0xc2, 0xe7, 0x79, 0x0f, 0xb8, 0xb8, 0x45, 0x3f, 0x4d, 0xc2, 0x56,
- 0x49, 0xc3, 0x00, 0xb6, 0x0f, 0xba, 0xf0, 0x44, 0x00, 0xf2, 0x42, 0x56,
- 0x5b, 0xc2, 0xe7, 0x79, 0x0f, 0xba, 0xe8, 0xc5, 0xda, 0xde, 0x0f, 0xb8,
- 0x43, 0x02, 0x56, 0x67, 0xc5, 0xd6, 0xdd, 0x0f, 0xb8, 0x32, 0x02, 0x56,
- 0x6d, 0xc2, 0xe7, 0x79, 0x0f, 0xb9, 0xb8, 0xa0, 0x0f, 0xb8, 0x91, 0x9f,
- 0x0f, 0xb8, 0x88, 0x9f, 0x0a, 0x21, 0xd1, 0x9e, 0x0a, 0x21, 0xc9, 0x9d,
- 0x0a, 0x21, 0xc1, 0xa0, 0x0a, 0x21, 0xd9, 0xa1, 0x0a, 0x21, 0xe1, 0xa2,
- 0x0a, 0x21, 0xe9, 0xa3, 0x0a, 0x21, 0xf1, 0xa4, 0x0a, 0x21, 0xf9, 0xa5,
- 0x0a, 0x22, 0x01, 0xa6, 0x0a, 0x22, 0x08, 0xa6, 0x0a, 0x21, 0xb9, 0xa5,
- 0x0a, 0x21, 0xb1, 0xa4, 0x0a, 0x21, 0xa9, 0xa3, 0x0a, 0x21, 0x93, 0x02,
- 0x56, 0x73, 0xa2, 0x0a, 0x21, 0x83, 0x02, 0x56, 0x7b, 0xa1, 0x0a, 0x21,
- 0x79, 0xa0, 0x0a, 0x21, 0x71, 0x9f, 0x0a, 0x21, 0x69, 0x9e, 0x0a, 0x21,
- 0x5b, 0x02, 0x56, 0x7f, 0x9d, 0x0a, 0x21, 0x50, 0xa6, 0x0a, 0x21, 0x43,
- 0x02, 0x56, 0x83, 0xa5, 0x0a, 0x21, 0x39, 0xa4, 0x0a, 0x21, 0x31, 0xa3,
- 0x0a, 0x21, 0x29, 0xa2, 0x0a, 0x21, 0x21, 0xa1, 0x0a, 0x21, 0x19, 0xa0,
- 0x0a, 0x21, 0x11, 0x9f, 0x0a, 0x21, 0x09, 0x9e, 0x0a, 0x21, 0x01, 0x9d,
- 0x0a, 0x20, 0xf8, 0xa6, 0x0a, 0x20, 0xf1, 0xa5, 0x0a, 0x20, 0xe9, 0xa4,
- 0x0a, 0x20, 0xe1, 0xa3, 0x0a, 0x20, 0xd3, 0x02, 0x56, 0x87, 0xa2, 0x0a,
- 0x20, 0xc9, 0xa1, 0x0a, 0x20, 0xc1, 0xa0, 0x0a, 0x20, 0xb9, 0x9f, 0x0a,
- 0x20, 0xb1, 0x9e, 0x0a, 0x20, 0xa9, 0x9d, 0x0a, 0x20, 0xa0, 0xa6, 0x0a,
- 0x20, 0x99, 0xa5, 0x0a, 0x20, 0x91, 0xa4, 0x0a, 0x20, 0x89, 0xa3, 0x0a,
- 0x20, 0x81, 0xa2, 0x0a, 0x20, 0x79, 0xa1, 0x0a, 0x20, 0x71, 0xa0, 0x0a,
- 0x20, 0x69, 0x9f, 0x0a, 0x20, 0x61, 0x9e, 0x0a, 0x20, 0x59, 0x9d, 0x0a,
- 0x20, 0x4a, 0x02, 0x56, 0x8b, 0xa6, 0x0a, 0x20, 0x41, 0xa5, 0x0a, 0x20,
- 0x39, 0xa4, 0x0a, 0x20, 0x31, 0xa3, 0x0a, 0x20, 0x29, 0xa2, 0x0a, 0x20,
- 0x21, 0xa1, 0x0a, 0x20, 0x19, 0xa0, 0x0a, 0x20, 0x11, 0x9f, 0x0a, 0x20,
- 0x09, 0x9e, 0x0a, 0x20, 0x00, 0x9d, 0x0a, 0x22, 0x11, 0x9e, 0x0a, 0x22,
- 0x19, 0x9f, 0x0a, 0x22, 0x21, 0xa0, 0x0a, 0x22, 0x29, 0xa1, 0x0a, 0x22,
- 0x31, 0xa2, 0x0a, 0x22, 0x39, 0xa3, 0x0a, 0x22, 0x43, 0x02, 0x56, 0x8f,
- 0xa4, 0x0a, 0x22, 0x61, 0xa5, 0x0a, 0x22, 0x69, 0xa6, 0x0a, 0x22, 0x70,
- 0x9d, 0x0a, 0x22, 0x79, 0x9e, 0x0a, 0x22, 0x81, 0x9f, 0x0a, 0x22, 0x89,
- 0xa0, 0x0a, 0x22, 0x91, 0xa1, 0x0a, 0x22, 0x99, 0xa2, 0x0a, 0x22, 0xa1,
- 0xa3, 0x0a, 0x22, 0xa9, 0xa4, 0x0a, 0x22, 0xb1, 0xa5, 0x0a, 0x22, 0xb9,
- 0xa6, 0x0a, 0x22, 0xc0, 0x9d, 0x0a, 0x22, 0xc9, 0x9e, 0x0a, 0x22, 0xd1,
- 0x9f, 0x0a, 0x22, 0xd9, 0xa0, 0x0a, 0x22, 0xe1, 0xa1, 0x0a, 0x22, 0xe9,
- 0xa2, 0x0a, 0x22, 0xf1, 0xa3, 0x0a, 0x22, 0xf9, 0xa4, 0x0a, 0x23, 0x01,
- 0xa5, 0x0a, 0x23, 0x09, 0xa6, 0x0a, 0x23, 0x10, 0x9d, 0x0a, 0x23, 0x19,
- 0x9e, 0x0a, 0x23, 0x21, 0x9f, 0x0a, 0x23, 0x29, 0xa0, 0x0a, 0x23, 0x31,
- 0xa1, 0x0a, 0x23, 0x39, 0xa2, 0x0a, 0x23, 0x41, 0xa3, 0x0a, 0x23, 0x49,
- 0xa4, 0x0a, 0x23, 0x53, 0x02, 0x56, 0x9b, 0xa5, 0x0a, 0x23, 0x63, 0x02,
- 0x56, 0x9f, 0xa6, 0x0a, 0x23, 0x70, 0x9d, 0x0a, 0x23, 0x7b, 0x02, 0x56,
- 0xa3, 0x9e, 0x0a, 0x23, 0x8b, 0x02, 0x56, 0xa7, 0x9f, 0x0a, 0x23, 0x9b,
- 0x02, 0x56, 0xab, 0xa0, 0x0a, 0x23, 0xa9, 0xa1, 0x0a, 0x23, 0xb3, 0x02,
- 0x56, 0xaf, 0xa2, 0x0a, 0x23, 0xd3, 0x02, 0x56, 0xbb, 0xa3, 0x0a, 0x23,
- 0xe9, 0xa4, 0x0a, 0x23, 0xf3, 0x02, 0x56, 0xc3, 0xa5, 0x0a, 0x24, 0x11,
- 0xa6, 0x0a, 0x24, 0x18, 0x9d, 0x0a, 0x24, 0x23, 0x02, 0x56, 0xcf, 0x9e,
- 0x0a, 0x24, 0x39, 0x9f, 0x0a, 0x24, 0x41, 0xa0, 0x0a, 0x24, 0x49, 0xa1,
- 0x0a, 0x24, 0x51, 0xa2, 0x0a, 0x24, 0x5b, 0x02, 0x56, 0xd7, 0xa3, 0x0a,
- 0x24, 0x69, 0xa4, 0x0a, 0x24, 0x71, 0xa5, 0x0a, 0x24, 0x79, 0xa6, 0x0a,
- 0x24, 0x80, 0x9d, 0x0a, 0x24, 0x89, 0x9e, 0x0a, 0x24, 0x91, 0x9f, 0x0a,
- 0x24, 0x99, 0xa0, 0x0a, 0x24, 0xa1, 0xa1, 0x0a, 0x24, 0xa9, 0xa2, 0x0a,
- 0x24, 0xb3, 0x02, 0x56, 0xdb, 0xa3, 0x0a, 0x24, 0xc1, 0xa4, 0x0a, 0x24,
- 0xc9, 0xa5, 0x0a, 0x24, 0xd1, 0xa6, 0x0a, 0x24, 0xd8, 0x9d, 0x0a, 0x24,
- 0xe1, 0x9e, 0x0a, 0x24, 0xe9, 0x9f, 0x0a, 0x24, 0xf1, 0xa0, 0x0a, 0x24,
- 0xf9, 0xa1, 0x0a, 0x25, 0x01, 0xa2, 0x0a, 0x25, 0x0b, 0x02, 0x56, 0xdf,
- 0xa3, 0x0a, 0x25, 0x19, 0xa4, 0x0a, 0x25, 0x21, 0xa5, 0x0a, 0x25, 0x29,
- 0xa6, 0x0a, 0x25, 0x30, 0x9d, 0x0a, 0x25, 0x39, 0x9e, 0x0a, 0x25, 0x41,
- 0x9f, 0x0a, 0x25, 0x49, 0xa0, 0x0a, 0x25, 0x51, 0xa1, 0x0a, 0x25, 0x59,
- 0xa2, 0x0a, 0x25, 0x61, 0xa3, 0x0a, 0x25, 0x69, 0xa4, 0x0a, 0x25, 0x71,
- 0xa5, 0x0a, 0x25, 0x79, 0xa6, 0x0a, 0x25, 0x80, 0x9d, 0x0a, 0x25, 0x89,
- 0x9e, 0x0a, 0x25, 0x91, 0x9f, 0x0a, 0x25, 0x99, 0xa0, 0x0a, 0x25, 0xa1,
- 0xa1, 0x0a, 0x25, 0xa9, 0xa2, 0x0a, 0x25, 0xb1, 0xa3, 0x0a, 0x25, 0xb9,
- 0xa4, 0x0a, 0x25, 0xc1, 0xa5, 0x0a, 0x25, 0xc9, 0xa6, 0x0a, 0x25, 0xd0,
- 0x9d, 0x0a, 0x25, 0xd9, 0x9e, 0x0a, 0x25, 0xe1, 0x9f, 0x0a, 0x25, 0xe9,
- 0xa0, 0x0a, 0x25, 0xf1, 0xa1, 0x0a, 0x25, 0xf9, 0xa2, 0x0a, 0x26, 0x01,
- 0xa3, 0x0a, 0x26, 0x09, 0xa4, 0x0a, 0x26, 0x11, 0xa5, 0x0a, 0x26, 0x19,
- 0xa6, 0x0a, 0x26, 0x20, 0x9d, 0x0a, 0x26, 0x29, 0x9e, 0x0a, 0x26, 0x31,
- 0x9f, 0x0a, 0x26, 0x39, 0xa0, 0x0a, 0x26, 0x41, 0xa1, 0x0a, 0x26, 0x49,
- 0xa2, 0x0a, 0x26, 0x51, 0xa3, 0x0a, 0x26, 0x59, 0xa4, 0x0a, 0x26, 0x61,
- 0xa5, 0x0a, 0x26, 0x69, 0xa6, 0x0a, 0x26, 0x70, 0x9d, 0x0a, 0x26, 0x79,
- 0x9e, 0x0a, 0x26, 0x81, 0x9f, 0x0a, 0x26, 0x89, 0xa0, 0x0a, 0x26, 0x91,
- 0xa1, 0x0a, 0x26, 0x99, 0xa2, 0x0a, 0x26, 0xa1, 0xa3, 0x0a, 0x26, 0xa9,
- 0xa4, 0x0a, 0x26, 0xb1, 0xa5, 0x0a, 0x26, 0xb9, 0xa6, 0x0a, 0x26, 0xc0,
- 0x9d, 0x0a, 0x26, 0xc9, 0x9e, 0x0a, 0x26, 0xd1, 0x9f, 0x0a, 0x26, 0xd9,
- 0xa0, 0x0a, 0x26, 0xe1, 0xa1, 0x0a, 0x26, 0xe9, 0xa2, 0x0a, 0x26, 0xf1,
- 0xa3, 0x0a, 0x26, 0xf9, 0xa4, 0x0a, 0x27, 0x01, 0xa5, 0x0a, 0x27, 0x09,
- 0xa6, 0x0a, 0x27, 0x10, 0x9d, 0x0a, 0x27, 0x19, 0x9e, 0x0a, 0x27, 0x21,
- 0x9f, 0x0a, 0x27, 0x2b, 0x02, 0x56, 0xe3, 0xa0, 0x0a, 0x27, 0x41, 0xa1,
- 0x0a, 0x27, 0x49, 0xa2, 0x0a, 0x27, 0x51, 0xa3, 0x0a, 0x27, 0x59, 0xa4,
- 0x0a, 0x27, 0x63, 0x02, 0x56, 0xeb, 0xa5, 0x0a, 0x27, 0x71, 0xa6, 0x0a,
- 0x27, 0x7a, 0x02, 0x56, 0xef, 0x9d, 0x0a, 0x27, 0x89, 0x9e, 0x0a, 0x27,
- 0x91, 0x9f, 0x0a, 0x27, 0x99, 0xa0, 0x0a, 0x27, 0xa1, 0xa1, 0x0a, 0x27,
- 0xa9, 0xa2, 0x0a, 0x27, 0xb3, 0x02, 0x56, 0xf3, 0xa3, 0x0a, 0x27, 0xc3,
- 0x02, 0x56, 0xf7, 0xa4, 0x0a, 0x27, 0xd1, 0xa5, 0x0a, 0x27, 0xd9, 0xa6,
- 0x0a, 0x27, 0xe0, 0x9d, 0x0a, 0x27, 0xe9, 0x9e, 0x0a, 0x27, 0xf1, 0x9f,
- 0x0a, 0x27, 0xf9, 0xa0, 0x0a, 0x28, 0x01, 0xa1, 0x0a, 0x28, 0x09, 0xa2,
- 0x0a, 0x28, 0x11, 0xa3, 0x0a, 0x28, 0x19, 0xa4, 0x0a, 0x28, 0x23, 0x02,
- 0x56, 0xfb, 0xa5, 0x0a, 0x28, 0x31, 0xa6, 0x0a, 0x28, 0x38, 0x9d, 0x0a,
- 0x28, 0x41, 0x9e, 0x0a, 0x28, 0x49, 0x9f, 0x0a, 0x28, 0x51, 0xa0, 0x0a,
- 0x28, 0x59, 0xa1, 0x0a, 0x28, 0x61, 0xa2, 0x0a, 0x28, 0x69, 0xa3, 0x0a,
- 0x28, 0x71, 0xa4, 0x0a, 0x28, 0x79, 0xa5, 0x0a, 0x28, 0x81, 0xa6, 0x0a,
- 0x28, 0x88, 0x9d, 0x0a, 0x28, 0x91, 0x9e, 0x0a, 0x28, 0x99, 0x9f, 0x0a,
- 0x28, 0xa1, 0xa0, 0x0a, 0x28, 0xa9, 0xa1, 0x0a, 0x28, 0xb1, 0xa2, 0x0a,
- 0x28, 0xb9, 0xa3, 0x0a, 0x28, 0xc1, 0xa4, 0x0a, 0x28, 0xc9, 0xa5, 0x0a,
- 0x28, 0xd1, 0xa6, 0x0a, 0x28, 0xd8, 0x9d, 0x0a, 0x28, 0xe1, 0x9e, 0x0a,
- 0x28, 0xe9, 0x9f, 0x0a, 0x28, 0xf1, 0xa0, 0x0a, 0x28, 0xf9, 0xa1, 0x0a,
- 0x29, 0x01, 0xa2, 0x0a, 0x29, 0x09, 0xa3, 0x0a, 0x29, 0x11, 0xa4, 0x0a,
- 0x29, 0x19, 0xa5, 0x0a, 0x29, 0x21, 0xa6, 0x0a, 0x29, 0x28, 0x9d, 0x0a,
- 0x29, 0x31, 0x9e, 0x0a, 0x29, 0x39, 0x9f, 0x0a, 0x29, 0x41, 0xa0, 0x0a,
- 0x29, 0x49, 0xa1, 0x0a, 0x29, 0x51, 0xa2, 0x0a, 0x29, 0x59, 0xa3, 0x0a,
- 0x29, 0x61, 0xa4, 0x0a, 0x29, 0x6b, 0x02, 0x56, 0xff, 0xa5, 0x0a, 0x29,
- 0x79, 0xa6, 0x0a, 0x29, 0x80, 0x9d, 0x0a, 0x29, 0x89, 0x9e, 0x0a, 0x29,
- 0x91, 0x9f, 0x0a, 0x29, 0x99, 0xa0, 0x0a, 0x29, 0xa1, 0xa1, 0x0a, 0x29,
- 0xa9, 0xa2, 0x0a, 0x29, 0xb1, 0xa3, 0x0a, 0x29, 0xb9, 0xa4, 0x0a, 0x29,
- 0xc1, 0xa5, 0x0a, 0x29, 0xc9, 0xa6, 0x0a, 0x29, 0xd0, 0x9d, 0x0a, 0x29,
- 0xd9, 0x9e, 0x0a, 0x29, 0xe1, 0x9f, 0x0a, 0x29, 0xe9, 0xa0, 0x0a, 0x29,
- 0xf1, 0xa1, 0x0a, 0x29, 0xf9, 0xa2, 0x0a, 0x2a, 0x01, 0xa3, 0x0a, 0x2a,
- 0x09, 0xa4, 0x0a, 0x2a, 0x11, 0xa5, 0x0a, 0x2a, 0x19, 0xa6, 0x0a, 0x2a,
- 0x22, 0x02, 0x57, 0x03, 0x9d, 0x0a, 0x2a, 0x31, 0x9e, 0x0a, 0x2a, 0x39,
- 0x9f, 0x0a, 0x2a, 0x41, 0xa0, 0x0a, 0x2a, 0x49, 0xa1, 0x0a, 0x2a, 0x53,
- 0x02, 0x57, 0x07, 0xa2, 0x0a, 0x2a, 0x61, 0xa3, 0x0a, 0x2a, 0x69, 0xa4,
- 0x0a, 0x2a, 0x71, 0xa5, 0x0a, 0x2a, 0x79, 0xa6, 0x0a, 0x2a, 0x82, 0x02,
- 0x57, 0x0b, 0x9d, 0x0a, 0x2a, 0x91, 0x9e, 0x0a, 0x2a, 0x99, 0x9f, 0x0a,
- 0x2a, 0xa1, 0xa0, 0x0a, 0x2a, 0xa9, 0xa1, 0x0a, 0x2a, 0xb1, 0xa2, 0x0a,
- 0x2a, 0xb9, 0xa3, 0x0a, 0x2a, 0xc1, 0xa4, 0x0a, 0x2a, 0xc9, 0xa5, 0x0a,
- 0x2a, 0xd1, 0xa6, 0x0a, 0x2a, 0xda, 0x02, 0x57, 0x0f, 0x9d, 0x0a, 0x2a,
- 0xe9, 0x9e, 0x0a, 0x2a, 0xf1, 0x9f, 0x0a, 0x2a, 0xf9, 0xa0, 0x0a, 0x2b,
- 0x01, 0xa1, 0x0a, 0x2b, 0x09, 0xa2, 0x0a, 0x2b, 0x11, 0xa3, 0x0a, 0x2b,
- 0x19, 0xa4, 0x0a, 0x2b, 0x21, 0xa5, 0x0a, 0x2b, 0x29, 0xa6, 0x0a, 0x2b,
- 0x30, 0x9d, 0x0a, 0x2b, 0x39, 0x9e, 0x0a, 0x2b, 0x41, 0x9f, 0x0a, 0x2b,
- 0x49, 0xa0, 0x0a, 0x2b, 0x51, 0xa1, 0x0a, 0x2b, 0x59, 0xa2, 0x0a, 0x2b,
- 0x61, 0xa3, 0x0a, 0x2b, 0x69, 0xa4, 0x0a, 0x2b, 0x71, 0xa5, 0x0a, 0x2b,
- 0x79, 0xa6, 0x0a, 0x2b, 0x82, 0x02, 0x57, 0x13, 0x9d, 0x0a, 0x2b, 0x91,
- 0x9e, 0x0a, 0x2b, 0x99, 0x1f, 0xc2, 0x57, 0x17, 0xa0, 0x0a, 0x2b, 0xb9,
- 0xa1, 0x0a, 0x2b, 0xc1, 0xa2, 0x0a, 0x2b, 0xc9, 0xa3, 0x0a, 0x2b, 0xd3,
- 0x02, 0x57, 0x23, 0xa4, 0x0a, 0x2b, 0xf1, 0xa5, 0x0a, 0x2b, 0xf9, 0xa6,
- 0x0a, 0x2c, 0x00, 0x9d, 0x0a, 0x2c, 0x09, 0x9e, 0x0a, 0x2c, 0x11, 0x9f,
- 0x0a, 0x2c, 0x19, 0xa0, 0x0a, 0x2c, 0x21, 0xa1, 0x0a, 0x2c, 0x29, 0xa2,
- 0x0a, 0x2c, 0x31, 0xa3, 0x0a, 0x2c, 0x39, 0xa4, 0x0a, 0x2c, 0x41, 0xa5,
- 0x0a, 0x2c, 0x49, 0xa6, 0x0a, 0x2c, 0x50, 0x9d, 0x0a, 0x2c, 0x59, 0x9e,
- 0x0a, 0x2c, 0x61, 0x9f, 0x0a, 0x2c, 0x69, 0xa0, 0x0a, 0x2c, 0x71, 0xa1,
- 0x0a, 0x2c, 0x79, 0xa2, 0x0a, 0x2c, 0x81, 0xa3, 0x0a, 0x2c, 0x89, 0xa4,
- 0x0a, 0x2c, 0x91, 0xa5, 0x0a, 0x2c, 0x99, 0xa6, 0x0a, 0x2c, 0xa2, 0x02,
- 0x57, 0x2f, 0x9d, 0x0a, 0x2c, 0xb1, 0x9e, 0x0a, 0x2c, 0xb9, 0x9f, 0x0a,
- 0x2c, 0xc1, 0xa0, 0x0a, 0x2c, 0xc9, 0xa1, 0x0a, 0x2c, 0xd3, 0x02, 0x57,
- 0x33, 0xa2, 0x0a, 0x2c, 0xe1, 0xa3, 0x0a, 0x2c, 0xe9, 0xa4, 0x0a, 0x2c,
- 0xf1, 0xa5, 0x0a, 0x2c, 0xfb, 0x02, 0x57, 0x37, 0xa6, 0x0a, 0x2d, 0x08,
- 0x9d, 0x0a, 0x2d, 0x11, 0x9e, 0x0a, 0x2d, 0x1b, 0x02, 0x57, 0x3b, 0x9f,
- 0x0a, 0x2d, 0x29, 0xa0, 0x0a, 0x2d, 0x31, 0xa1, 0x0a, 0x2d, 0x39, 0xa2,
- 0x0a, 0x2d, 0x41, 0xa3, 0x0a, 0x2d, 0x49, 0xa4, 0x0a, 0x2d, 0x51, 0xa5,
- 0x0a, 0x2d, 0x59, 0xa6, 0x0a, 0x2d, 0x60, 0x9d, 0x0a, 0x2d, 0x69, 0x9e,
- 0x0a, 0x2d, 0x73, 0x02, 0x57, 0x3f, 0x9f, 0x0a, 0x2d, 0x81, 0x20, 0xc2,
- 0x57, 0x43, 0xa1, 0x0a, 0x2d, 0x99, 0xa2, 0x0a, 0x2d, 0xa1, 0xa3, 0x0a,
- 0x2d, 0xab, 0x02, 0x57, 0x4d, 0xa4, 0x0a, 0x2d, 0xb9, 0xa5, 0x0a, 0x2d,
- 0xc1, 0xa6, 0x0a, 0x2d, 0xc8, 0x9d, 0x0a, 0x2d, 0xd1, 0x9e, 0x0a, 0x2d,
- 0xd9, 0x9f, 0x0a, 0x2d, 0xe1, 0xc7, 0xc1, 0xfe, 0x0a, 0x2d, 0xe9, 0xa1,
- 0x0a, 0x2d, 0xf1, 0xa2, 0x0a, 0x2d, 0xf9, 0xa3, 0x0a, 0x2e, 0x01, 0xa4,
- 0x0a, 0x2e, 0x09, 0xa5, 0x0a, 0x2e, 0x11, 0xa6, 0x0a, 0x2e, 0x18, 0x9d,
- 0x0a, 0x2e, 0x21, 0x9e, 0x0a, 0x2e, 0x29, 0x9f, 0x0a, 0x2e, 0x31, 0xa0,
- 0x0a, 0x2e, 0x39, 0xa1, 0x0a, 0x2e, 0x41, 0xa2, 0x0a, 0x2e, 0x49, 0xa3,
- 0x0a, 0x2e, 0x51, 0xa4, 0x0a, 0x2e, 0x59, 0xa5, 0x0a, 0x2e, 0x61, 0xa6,
- 0x0a, 0x2e, 0x68, 0x1d, 0xc2, 0x57, 0x51, 0x9e, 0x0a, 0x2e, 0x81, 0x9f,
- 0x0a, 0x2e, 0x89, 0xa0, 0x0a, 0x2e, 0x91, 0xa1, 0x0a, 0x2e, 0x99, 0xa2,
- 0x0a, 0x2e, 0xa1, 0xa3, 0x0a, 0x2e, 0xa9, 0xa4, 0x0a, 0x2e, 0xb1, 0xa5,
- 0x0a, 0x2e, 0xb9, 0xa6, 0x0a, 0x2e, 0xc0, 0x9d, 0x0a, 0x2e, 0xc9, 0x9e,
- 0x0a, 0x2e, 0xd1, 0x9f, 0x0a, 0x2e, 0xd9, 0xa0, 0x0a, 0x2e, 0xe1, 0xa1,
- 0x0a, 0x2e, 0xe9, 0xa2, 0x0a, 0x2e, 0xf1, 0xa3, 0x0a, 0x2e, 0xf9, 0xa4,
- 0x0a, 0x2f, 0x01, 0xa5, 0x0a, 0x2f, 0x09, 0xa6, 0x0a, 0x2f, 0x10, 0x9d,
- 0x0a, 0x2f, 0x19, 0x9e, 0x0a, 0x2f, 0x21, 0x9f, 0x0a, 0x2f, 0x29, 0xa0,
- 0x0a, 0x2f, 0x31, 0xa1, 0x0a, 0x2f, 0x39, 0xa2, 0x0a, 0x2f, 0x41, 0xa3,
- 0x0a, 0x2f, 0x49, 0xa4, 0x0a, 0x2f, 0x51, 0xa5, 0x0a, 0x2f, 0x59, 0xa6,
- 0x0a, 0x2f, 0x60, 0x9d, 0x0a, 0x2f, 0x69, 0x9e, 0x0a, 0x2f, 0x71, 0x9f,
- 0x0a, 0x2f, 0x79, 0xa0, 0x0a, 0x2f, 0x81, 0xa1, 0x0a, 0x2f, 0x89, 0xa2,
- 0x0a, 0x2f, 0x91, 0xa3, 0x0a, 0x2f, 0x99, 0xa4, 0x0a, 0x2f, 0xa1, 0xa5,
- 0x0a, 0x2f, 0xa9, 0xa6, 0x0a, 0x2f, 0xb0, 0x9d, 0x0a, 0x2f, 0xbb, 0x02,
- 0x57, 0x5d, 0x9e, 0x0a, 0x2f, 0xc9, 0x9f, 0x0a, 0x2f, 0xd1, 0xa0, 0x0a,
- 0x2f, 0xd9, 0xa1, 0x0a, 0x2f, 0xe1, 0xa2, 0x0a, 0x2f, 0xe9, 0xa3, 0x0a,
- 0x2f, 0xf1, 0xa4, 0x0a, 0x2f, 0xfb, 0x02, 0x57, 0x61, 0xa5, 0x0a, 0x30,
- 0x09, 0xa6, 0x0a, 0x30, 0x10, 0x9d, 0x0a, 0x30, 0x19, 0x9e, 0x0a, 0x30,
- 0x21, 0x9f, 0x0a, 0x30, 0x29, 0xa0, 0x0a, 0x30, 0x31, 0xa1, 0x0a, 0x30,
- 0x39, 0xa2, 0x0a, 0x30, 0x41, 0xa3, 0x0a, 0x30, 0x49, 0xa4, 0x0a, 0x30,
- 0x51, 0xa5, 0x0a, 0x30, 0x59, 0xa6, 0x0a, 0x30, 0x60, 0x9d, 0x0a, 0x30,
- 0x69, 0x9e, 0x0a, 0x30, 0x71, 0x9f, 0x0a, 0x30, 0x79, 0xa0, 0x0a, 0x30,
- 0x81, 0xa1, 0x0a, 0x30, 0x89, 0xa2, 0x0a, 0x30, 0x91, 0xa3, 0x0a, 0x30,
- 0x99, 0xa4, 0x0a, 0x30, 0xa1, 0xa5, 0x0a, 0x30, 0xa9, 0xa6, 0x0a, 0x30,
- 0xb0, 0x9d, 0x0a, 0x30, 0xb9, 0x9e, 0x0a, 0x30, 0xc1, 0x9f, 0x0a, 0x30,
- 0xc9, 0xa0, 0x0a, 0x30, 0xd1, 0xa1, 0x0a, 0x30, 0xd9, 0xa2, 0x0a, 0x30,
- 0xe1, 0xa3, 0x0a, 0x30, 0xe9, 0xa4, 0x0a, 0x30, 0xf1, 0xa5, 0x0a, 0x30,
- 0xf9, 0xa6, 0x0a, 0x31, 0x00, 0x9d, 0x0a, 0x31, 0x09, 0x9e, 0x0a, 0x31,
- 0x11, 0x9f, 0x0a, 0x31, 0x19, 0xa0, 0x0a, 0x31, 0x21, 0xa1, 0x0a, 0x31,
- 0x29, 0xa2, 0x0a, 0x31, 0x31, 0xa3, 0x0a, 0x31, 0x39, 0xa4, 0x0a, 0x31,
- 0x40, 0x9e, 0x0a, 0x31, 0x49, 0x9f, 0x0a, 0x31, 0x51, 0xa0, 0x0a, 0x31,
- 0x59, 0xa1, 0x0a, 0x31, 0x61, 0xa2, 0x0a, 0x31, 0x69, 0xa3, 0x0a, 0x31,
- 0x71, 0xa4, 0x0a, 0x31, 0x79, 0xa5, 0x0a, 0x31, 0x81, 0xa6, 0x0a, 0x31,
- 0x88, 0x9d, 0x0a, 0x31, 0x91, 0x9e, 0x0a, 0x31, 0x99, 0x9f, 0x0a, 0x31,
- 0xa1, 0xa0, 0x0a, 0x31, 0xa9, 0xa1, 0x0a, 0x31, 0xb1, 0xa2, 0x0a, 0x31,
- 0xb9, 0xa3, 0x0a, 0x31, 0xc1, 0xa4, 0x0a, 0x31, 0xc9, 0xa5, 0x0a, 0x31,
- 0xd1, 0xa6, 0x0a, 0x31, 0xd8, 0x9d, 0x0a, 0x31, 0xe1, 0x9e, 0x0a, 0x31,
- 0xe9, 0x9f, 0x0a, 0x31, 0xf1, 0xa0, 0x0a, 0x31, 0xf9, 0xa1, 0x0a, 0x32,
- 0x01, 0xa2, 0x0a, 0x32, 0x09, 0xa3, 0x0a, 0x32, 0x11, 0xa4, 0x0a, 0x32,
- 0x19, 0xa5, 0x0a, 0x32, 0x21, 0xa6, 0x0a, 0x32, 0x28, 0xd1, 0x01, 0x75,
- 0x01, 0x5b, 0x79, 0xd4, 0x3b, 0x9d, 0x01, 0x5c, 0x61, 0xd5, 0x32, 0xe9,
- 0x01, 0x5c, 0x69, 0xd3, 0x47, 0x2c, 0x01, 0x5c, 0x71, 0xd2, 0x4e, 0xea,
- 0x01, 0x5c, 0x78, 0xc8, 0x2c, 0x41, 0x01, 0x1b, 0x81, 0xc9, 0x24, 0x24,
- 0x01, 0x1b, 0x79, 0x05, 0xc2, 0x57, 0x65, 0x06, 0xc2, 0x57, 0x71, 0x42,
- 0x01, 0xc7, 0xc2, 0x57, 0x84, 0xd0, 0x01, 0xf7, 0x01, 0x1a, 0x41, 0x42,
- 0x00, 0x54, 0xc2, 0x57, 0x90, 0xcc, 0x00, 0x9b, 0x01, 0x1a, 0x21, 0xc9,
- 0x00, 0xde, 0x01, 0x1a, 0x11, 0xc5, 0x00, 0xe2, 0x01, 0x1a, 0x09, 0xc3,
- 0xaa, 0xf4, 0x01, 0x19, 0xd9, 0xc5, 0x00, 0x8f, 0x01, 0x19, 0xc0, 0xc9,
- 0x1e, 0x42, 0x01, 0x1b, 0x09, 0xc3, 0xaa, 0xf4, 0x01, 0x1a, 0xa9, 0xc7,
- 0x76, 0x66, 0x01, 0x1a, 0x88, 0xcb, 0x9a, 0x82, 0x01, 0x1b, 0x89, 0xca,
- 0x9b, 0x1d, 0x01, 0x1b, 0x31, 0x45, 0x36, 0xab, 0x42, 0x57, 0x9c, 0xc5,
- 0x21, 0x12, 0x01, 0x1b, 0x59, 0xc9, 0x1e, 0x42, 0x01, 0x1b, 0x11, 0xc5,
- 0x07, 0x02, 0x01, 0x1a, 0x90, 0xc8, 0x4f, 0x39, 0x01, 0x1a, 0xc9, 0xc5,
- 0x07, 0x02, 0x01, 0x1a, 0x58, 0xc2, 0x00, 0xbb, 0x01, 0x1a, 0xf9, 0xc3,
- 0x07, 0x04, 0x01, 0x19, 0xe8, 0xc2, 0x02, 0x18, 0x01, 0x12, 0x2b, 0x02,
- 0x57, 0xa8, 0xcb, 0x23, 0x35, 0x01, 0x53, 0x80, 0xc2, 0x05, 0xd0, 0x08,
- 0x59, 0x99, 0x87, 0x08, 0x59, 0x88, 0xc2, 0x00, 0x7b, 0x08, 0x59, 0x21,
- 0xc2, 0x05, 0xd0, 0x08, 0x59, 0x19, 0x87, 0x08, 0x59, 0x10, 0x87, 0x08,
- 0x58, 0x38, 0x90, 0x08, 0x58, 0x29, 0x91, 0x08, 0x58, 0x18, 0xc7, 0x0c,
- 0x4b, 0x08, 0x08, 0xc9, 0xc8, 0x50, 0x00, 0x08, 0x09, 0x10, 0xc3, 0x04,
- 0x5f, 0x08, 0x08, 0x4b, 0x02, 0x57, 0xae, 0xc4, 0x0c, 0x55, 0x08, 0x08,
- 0x92, 0x02, 0x57, 0xb2, 0xc9, 0x4f, 0xff, 0x08, 0x09, 0x58, 0xc4, 0x18,
- 0x85, 0x08, 0x08, 0x8b, 0x02, 0x57, 0xb8, 0x91, 0x08, 0x08, 0x42, 0x02,
- 0x57, 0xbe, 0xc2, 0x00, 0x7b, 0x08, 0x08, 0x5b, 0x02, 0x57, 0xc2, 0xc3,
- 0x43, 0xcd, 0x08, 0x08, 0xa2, 0x02, 0x57, 0xc6, 0xc2, 0x02, 0x53, 0x08,
- 0x08, 0x53, 0x02, 0x57, 0xcc, 0xc3, 0x0c, 0x56, 0x08, 0x08, 0x9a, 0x02,
- 0x57, 0xd0, 0x00, 0xc2, 0x57, 0xd6, 0xc2, 0x0c, 0x57, 0x08, 0x08, 0xaa,
- 0x02, 0x57, 0xe2, 0x00, 0xc2, 0x57, 0xe8, 0xc2, 0x0c, 0x57, 0x08, 0x08,
- 0xb2, 0x02, 0x57, 0xf4, 0xc7, 0x0c, 0x4b, 0x08, 0x09, 0x01, 0xc8, 0x50,
- 0x00, 0x08, 0x09, 0x48, 0xc9, 0x4f, 0xff, 0x08, 0x09, 0x90, 0xc7, 0x0c,
- 0x4b, 0x08, 0x09, 0x09, 0xc8, 0x50, 0x00, 0x08, 0x09, 0x50, 0xc9, 0x4f,
- 0xff, 0x08, 0x09, 0x98, 0xcc, 0x13, 0xd6, 0x08, 0x09, 0xc1, 0xcd, 0x7a,
- 0x3c, 0x08, 0x09, 0xd8, 0xca, 0x00, 0xf6, 0x01, 0x28, 0x03, 0x02, 0x57,
- 0xfa, 0x06, 0xc2, 0x58, 0x00, 0xc2, 0x01, 0xc7, 0x01, 0x2b, 0xab, 0x02,
- 0x58, 0x0a, 0xc4, 0x01, 0x1e, 0x01, 0x2b, 0xa3, 0x02, 0x58, 0x10, 0xc5,
- 0x01, 0xf7, 0x01, 0x2b, 0xb1, 0x44, 0x12, 0x7a, 0xc2, 0x58, 0x16, 0xc8,
- 0x03, 0x3b, 0x01, 0x28, 0x13, 0x02, 0x58, 0x22, 0x4f, 0x64, 0xac, 0xc2,
- 0x58, 0x28, 0x4c, 0x52, 0x71, 0x42, 0x58, 0x34, 0x50, 0x5f, 0x72, 0xc2,
- 0x58, 0x40, 0xdd, 0x10, 0x32, 0x01, 0x2a, 0x29, 0xdd, 0x11, 0x71, 0x01,
- 0x2a, 0x19, 0x50, 0x10, 0x37, 0x42, 0x58, 0x52, 0x45, 0x00, 0x56, 0x42,
- 0x58, 0x64, 0xd0, 0x5c, 0x82, 0x01, 0x2b, 0xf0, 0xc2, 0x01, 0x07, 0x01,
- 0x2b, 0xdb, 0x02, 0x58, 0x74, 0x4a, 0xa3, 0x10, 0x42, 0x58, 0x7a, 0x45,
- 0x00, 0x56, 0x42, 0x58, 0x86, 0xc8, 0x03, 0x3b, 0x01, 0x28, 0x59, 0xca,
- 0x00, 0xf6, 0x01, 0x28, 0x48, 0xc8, 0x03, 0x3b, 0x01, 0x28, 0x39, 0xca,
- 0x00, 0xf6, 0x01, 0x28, 0x28, 0xc8, 0x03, 0x3b, 0x01, 0x2a, 0x8b, 0x02,
- 0x58, 0x98, 0x47, 0x53, 0xb4, 0xc2, 0x58, 0x9e, 0x49, 0x41, 0x75, 0xc2,
- 0x58, 0xb0, 0xca, 0x00, 0xf6, 0x01, 0x2a, 0x80, 0x4b, 0x97, 0x28, 0xc2,
- 0x58, 0xc2, 0x4b, 0x8e, 0xd2, 0xc2, 0x58, 0xd4, 0x4a, 0x5f, 0x72, 0xc2,
- 0x58, 0xe6, 0x4a, 0x10, 0x37, 0x42, 0x58, 0xfe, 0xd1, 0x4f, 0x52, 0x01,
- 0x2b, 0x59, 0xcb, 0x95, 0x70, 0x01, 0x2b, 0x11, 0xcc, 0x83, 0x44, 0x01,
- 0x2a, 0xf8, 0xd1, 0x4f, 0x63, 0x01, 0x2b, 0x51, 0xcb, 0x96, 0x20, 0x01,
- 0x2b, 0x09, 0xcc, 0x87, 0xa0, 0x01, 0x2a, 0xf0, 0xd0, 0x34, 0xa7, 0x01,
- 0x2a, 0x11, 0xca, 0xa3, 0xc4, 0x01, 0x29, 0x41, 0xcb, 0x8e, 0x85, 0x01,
- 0x29, 0x00, 0xd0, 0x34, 0x92, 0x01, 0x29, 0xf9, 0xca, 0xa2, 0xe8, 0x01,
- 0x29, 0x29, 0xcb, 0x8d, 0xa9, 0x01, 0x28, 0xe8, 0xd1, 0x4f, 0x52, 0x01,
- 0x2b, 0x41, 0xcb, 0x95, 0x70, 0x01, 0x2a, 0xe1, 0xcc, 0x83, 0x44, 0x01,
- 0x2a, 0xc8, 0xd1, 0x4f, 0x63, 0x01, 0x2b, 0x39, 0xcb, 0x96, 0x20, 0x01,
- 0x2a, 0xd9, 0xcc, 0x87, 0xa0, 0x01, 0x2a, 0xc0, 0xd5, 0x34, 0x8d, 0x01,
- 0x2a, 0x41, 0xd0, 0x34, 0x92, 0x01, 0x29, 0xb9, 0x45, 0x02, 0x4d, 0xc2,
- 0x59, 0x16, 0x46, 0x02, 0x12, 0x42, 0x59, 0x22, 0xd5, 0x34, 0xa2, 0x01,
- 0x2a, 0x01, 0xd0, 0x34, 0xa7, 0x01, 0x29, 0xc1, 0x45, 0x02, 0x4d, 0xc2,
- 0x59, 0x2e, 0x46, 0x02, 0x12, 0x42, 0x59, 0x3a, 0xce, 0x74, 0xdf, 0x01,
- 0x2a, 0x49, 0xc8, 0x11, 0x71, 0x01, 0x29, 0xc9, 0xca, 0x10, 0x32, 0x01,
- 0x29, 0x88, 0xce, 0x74, 0xc3, 0x01, 0x29, 0xf1, 0xc8, 0x10, 0x47, 0x01,
- 0x29, 0xb1, 0xca, 0x11, 0x84, 0x01, 0x29, 0x70, 0xc5, 0x13, 0x6c, 0x01,
- 0x18, 0xf9, 0xc3, 0x0b, 0x6a, 0x01, 0x18, 0x60, 0xc5, 0x13, 0x6c, 0x01,
- 0x18, 0xf1, 0xc3, 0x0b, 0x6a, 0x01, 0x18, 0x68, 0x89, 0x01, 0x8d, 0x68,
- 0xc2, 0x1c, 0x13, 0x01, 0x8d, 0x70, 0xc2, 0x1c, 0x13, 0x01, 0x8d, 0x78,
- 0x89, 0x01, 0x89, 0x21, 0x90, 0x01, 0x8d, 0x48, 0x90, 0x01, 0x8d, 0x39,
- 0x89, 0x01, 0x8d, 0x90, 0x89, 0x01, 0x89, 0x29, 0x90, 0x01, 0x8d, 0x28,
- 0x90, 0x01, 0x8d, 0x98, 0xa2, 0x0f, 0xd8, 0xbb, 0x02, 0x59, 0x46, 0xa3,
- 0x0f, 0xd9, 0x38, 0xa0, 0x0f, 0xd8, 0x33, 0x02, 0x59, 0x4a, 0xa2, 0x0f,
- 0xd8, 0x93, 0x02, 0x59, 0x5c, 0xa1, 0x0f, 0xd8, 0x53, 0x02, 0x59, 0x60,
- 0xa3, 0x0f, 0xd9, 0x08, 0xa3, 0x0f, 0xd9, 0x70, 0xa1, 0x0f, 0xd8, 0x63,
- 0x02, 0x59, 0x6b, 0xa3, 0x0f, 0xd9, 0x19, 0xc2, 0x00, 0x22, 0x0f, 0xd9,
- 0x90, 0xa3, 0x0f, 0xd9, 0x88, 0xa3, 0x0f, 0xd9, 0x49, 0xa2, 0x0f, 0xd8,
- 0xd2, 0x02, 0x59, 0x76, 0xa3, 0x0f, 0xd9, 0x78, 0xa1, 0x0f, 0xd8, 0x6b,
- 0x02, 0x59, 0x7a, 0xa3, 0x0f, 0xd9, 0x21, 0xa2, 0x0f, 0xd8, 0xa2, 0x02,
- 0x59, 0x85, 0xa2, 0x0f, 0xd8, 0xc2, 0x02, 0x59, 0x89, 0xa3, 0x0f, 0xd9,
- 0xa8, 0x45, 0x78, 0x8c, 0xc2, 0x59, 0x8d, 0x46, 0x3e, 0x82, 0xc2, 0x59,
- 0xc4, 0xd0, 0x5e, 0xb2, 0x01, 0x39, 0x61, 0xce, 0x72, 0x77, 0x01, 0x37,
- 0x41, 0xc5, 0x01, 0x7b, 0x01, 0x2e, 0x7b, 0x02, 0x59, 0xdc, 0xc8, 0xbc,
- 0x9d, 0x01, 0x33, 0x18, 0x4e, 0x71, 0x6d, 0xc2, 0x59, 0xe0, 0xc7, 0x33,
- 0xb4, 0x01, 0x38, 0x11, 0xce, 0x70, 0x71, 0x01, 0x38, 0x01, 0xc6, 0xcd,
- 0x69, 0x01, 0x36, 0x39, 0xc9, 0xb6, 0x6c, 0x01, 0x33, 0x01, 0x0f, 0xc2,
- 0x59, 0xec, 0xca, 0x57, 0xb7, 0x01, 0x30, 0xb9, 0xc3, 0x04, 0x45, 0x01,
- 0x30, 0x29, 0xcc, 0x86, 0xbc, 0x01, 0x30, 0x01, 0xc5, 0x07, 0x0a, 0x01,
- 0x2d, 0x03, 0x02, 0x59, 0xf8, 0xd3, 0x44, 0x0e, 0x0f, 0xab, 0x88, 0x44,
- 0xe2, 0x9b, 0xc2, 0x59, 0xfc, 0xc4, 0x6f, 0x1c, 0x01, 0x36, 0xf9, 0xd7,
- 0x29, 0xad, 0x01, 0x36, 0xb1, 0xc8, 0x37, 0x5b, 0x01, 0x30, 0x71, 0xd2,
- 0x4b, 0xc0, 0x0f, 0xab, 0xf8, 0x43, 0x02, 0xc7, 0xc2, 0x5a, 0x0e, 0xc6,
- 0x3f, 0x2f, 0x01, 0x2e, 0x33, 0x02, 0x5a, 0x20, 0x14, 0x42, 0x5a, 0x24,
- 0x44, 0x01, 0xdc, 0xc2, 0x5a, 0x30, 0xc8, 0x45, 0x0c, 0x01, 0x2d, 0x61,
- 0xc6, 0xcb, 0xef, 0x0f, 0x9f, 0xb0, 0x43, 0x01, 0x1f, 0xc2, 0x5a, 0x42,
- 0x11, 0xc2, 0x5a, 0x52, 0x45, 0x17, 0xf0, 0x42, 0x5a, 0x5e, 0x0e, 0xc2,
- 0x5a, 0x6a, 0x11, 0x42, 0x5a, 0x76, 0xca, 0xa2, 0x98, 0x01, 0x35, 0xc1,
- 0x46, 0x06, 0x1c, 0x42, 0x5a, 0x82, 0xd9, 0x20, 0xcc, 0x01, 0x33, 0xd9,
- 0x12, 0x42, 0x5a, 0xa0, 0x07, 0xc2, 0x5a, 0xb8, 0xd5, 0x37, 0x96, 0x0f,
- 0xad, 0x51, 0x11, 0x42, 0x5a, 0xc7, 0xcc, 0x83, 0x2c, 0x01, 0x2d, 0x81,
- 0xc6, 0xca, 0xc6, 0x0f, 0xac, 0x41, 0x42, 0x00, 0x4d, 0x42, 0x5a, 0xd3,
- 0x46, 0x03, 0x3e, 0xc2, 0x5a, 0xdf, 0x48, 0x4a, 0x91, 0x42, 0x5a, 0xeb,
- 0xd0, 0x1f, 0xc2, 0x01, 0x3d, 0xb1, 0xd0, 0x01, 0xf7, 0x01, 0x3d, 0xa9,
- 0xd0, 0x3a, 0x25, 0x01, 0x3d, 0xa0, 0x85, 0x01, 0x09, 0x69, 0x9c, 0x01,
- 0x09, 0x41, 0x94, 0x01, 0x08, 0xe1, 0x8b, 0x01, 0x08, 0x89, 0x8a, 0x01,
- 0x08, 0x60, 0xd0, 0x15, 0x8d, 0x01, 0x3a, 0x48, 0x9a, 0x01, 0x38, 0xb9,
- 0x42, 0x00, 0x2b, 0xc2, 0x5a, 0xfd, 0xc8, 0x94, 0xce, 0x0f, 0xaf, 0xa0,
- 0xc3, 0x0b, 0xc5, 0x00, 0xda, 0xdb, 0x02, 0x5b, 0x0a, 0xc5, 0xd8, 0xd6,
- 0x00, 0xdb, 0x00, 0xc8, 0xb7, 0xcd, 0x00, 0xdb, 0xe8, 0x46, 0xcd, 0xe1,
- 0xc2, 0x5b, 0x10, 0x49, 0xb3, 0x93, 0x42, 0x5b, 0x22, 0x48, 0xbf, 0x8d,
- 0xc2, 0x5b, 0x2e, 0x46, 0xce, 0x11, 0x42, 0x5b, 0x3a, 0xc4, 0x8a, 0xe5,
- 0x00, 0xdb, 0x99, 0xc5, 0xde, 0x1c, 0x00, 0xdb, 0x91, 0x44, 0xad, 0xb4,
- 0xc2, 0x5b, 0x46, 0xc7, 0x76, 0x32, 0x00, 0xdb, 0x79, 0xc5, 0xd9, 0x44,
- 0x00, 0xdb, 0x61, 0xc5, 0xde, 0xa8, 0x00, 0xdb, 0x58, 0x03, 0xc2, 0x5b,
- 0x58, 0x07, 0xc2, 0x5b, 0x6d, 0xc3, 0x00, 0x34, 0x00, 0xdb, 0x31, 0xc3,
- 0x3f, 0x7b, 0x00, 0xdb, 0x19, 0xc3, 0x02, 0xa8, 0x00, 0xdb, 0x08, 0xc5,
- 0x63, 0x68, 0x00, 0xda, 0xf9, 0xc7, 0xc4, 0xb3, 0x00, 0xda, 0xe8, 0xc4,
- 0x18, 0x83, 0x00, 0xda, 0xb9, 0xc2, 0x26, 0x51, 0x00, 0xda, 0xb0, 0xc3,
- 0x0c, 0x5b, 0x00, 0xda, 0xa9, 0xc3, 0x06, 0x9e, 0x00, 0xda, 0xa0, 0xc4,
- 0x04, 0x5e, 0x00, 0xda, 0x99, 0xc2, 0x01, 0x47, 0x00, 0xda, 0x90, 0xcb,
- 0x8e, 0xdd, 0x00, 0xda, 0x61, 0xcb, 0x91, 0x9d, 0x00, 0xda, 0x59, 0xc5,
- 0xde, 0x17, 0x00, 0xd8, 0x81, 0xc4, 0x9e, 0xf1, 0x00, 0xd8, 0x2a, 0x02,
- 0x5b, 0x79, 0xc7, 0xcb, 0x20, 0x00, 0xda, 0x41, 0xc4, 0x9e, 0xf1, 0x00,
- 0xd8, 0x78, 0xc9, 0xb2, 0x46, 0x00, 0xda, 0x39, 0x83, 0x00, 0xd9, 0x12,
- 0x02, 0x5b, 0x7f, 0xc9, 0xaa, 0xc0, 0x00, 0xda, 0x31, 0x83, 0x00, 0xd8,
- 0x9a, 0x02, 0x5b, 0x83, 0x43, 0x1f, 0xe7, 0x42, 0x5b, 0x8f, 0xc6, 0xba,
- 0xbf, 0x00, 0xd8, 0x6a, 0x02, 0x5b, 0x9b, 0xc5, 0xc2, 0x2a, 0x00, 0xd8,
- 0x5a, 0x02, 0x5b, 0xa1, 0xc8, 0xb8, 0xbd, 0x00, 0xd9, 0x50, 0xc6, 0xcd,
- 0x5d, 0x00, 0xd9, 0x40, 0x83, 0x00, 0xd9, 0x33, 0x02, 0x5b, 0xa7, 0xc2,
- 0x1d, 0x5f, 0x00, 0xd8, 0xe1, 0xc2, 0x01, 0x29, 0x00, 0xd8, 0xb8, 0x42,
- 0x00, 0x4f, 0x42, 0x5b, 0xad, 0xc5, 0xc9, 0x97, 0x00, 0xd8, 0xd8, 0xc5,
- 0xd7, 0xf5, 0x00, 0xd8, 0xc8, 0xc5, 0xd7, 0x7d, 0x00, 0xd8, 0xb0, 0xc7,
- 0xc9, 0xd7, 0x00, 0xd8, 0x90, 0xc7, 0xc9, 0xd7, 0x00, 0xd8, 0x50, 0xc7,
- 0xc9, 0xd7, 0x00, 0xd8, 0x40, 0xc7, 0xc9, 0xd7, 0x00, 0xda, 0x18, 0xc5,
- 0x23, 0xa6, 0x00, 0xd9, 0xf3, 0x02, 0x5b, 0xb9, 0xc5, 0xc9, 0xd9, 0x00,
- 0xd9, 0xa8, 0xc7, 0xc9, 0xd7, 0x00, 0xd9, 0xe8, 0xc7, 0xc9, 0xd7, 0x00,
- 0xd9, 0xd8, 0xc5, 0xd7, 0xff, 0x00, 0xd9, 0xc8, 0xc5, 0xd9, 0x21, 0x00,
- 0xd9, 0xb8, 0xc6, 0x1e, 0x17, 0x00, 0xd8, 0x09, 0xc5, 0xd6, 0x92, 0x00,
- 0xd8, 0x00, 0xc9, 0xaf, 0xa3, 0x0b, 0x57, 0xa1, 0xc5, 0x27, 0x0c, 0x0b,
- 0x57, 0x80, 0xc9, 0xaf, 0xc7, 0x0b, 0x57, 0x99, 0xc5, 0x27, 0x0c, 0x0b,
- 0x57, 0x88, 0x87, 0x0b, 0x57, 0x59, 0xc3, 0x1c, 0x13, 0x0b, 0x56, 0x80,
- 0xc2, 0x13, 0x1d, 0x0b, 0x57, 0x00, 0x91, 0x0b, 0x57, 0x48, 0xc3, 0x2d,
- 0x58, 0x0b, 0x57, 0x30, 0xc3, 0x1c, 0xc2, 0x0b, 0x57, 0x21, 0xc2, 0x04,
- 0x0a, 0x0b, 0x56, 0xa8, 0x91, 0x0b, 0x56, 0xf1, 0xc3, 0xe1, 0x1f, 0x0b,
- 0x56, 0xb8, 0xc2, 0x03, 0xca, 0x0b, 0x56, 0xe9, 0xc2, 0x02, 0x98, 0x0b,
- 0x56, 0xb0, 0xc3, 0x3b, 0x73, 0x0b, 0x56, 0xc1, 0x83, 0x0b, 0x56, 0x88,
- 0x42, 0x00, 0xf4, 0xc2, 0x5b, 0xbd, 0x42, 0x00, 0x55, 0xc2, 0x5b, 0xfe,
- 0x42, 0x00, 0x59, 0xc2, 0x5c, 0x3e, 0x42, 0x02, 0x8e, 0xc2, 0x5c, 0x73,
- 0x42, 0x01, 0x2b, 0xc2, 0x5c, 0xb3, 0x42, 0x00, 0xae, 0x42, 0x5c, 0xeb,
- 0xc2, 0xd0, 0x6a, 0x05, 0x36, 0x29, 0x87, 0x05, 0x36, 0x50, 0x87, 0x05,
- 0x36, 0x41, 0xc2, 0x0f, 0xf5, 0x05, 0x36, 0xb8, 0x96, 0x05, 0x35, 0xd9,
- 0xc2, 0xd0, 0x6a, 0x05, 0x36, 0x21, 0x90, 0x05, 0x36, 0x90, 0xc3, 0xe6,
- 0xa6, 0x05, 0x37, 0x71, 0xc4, 0xe2, 0x13, 0x05, 0x37, 0x78, 0x87, 0x05,
- 0x35, 0x29, 0xc2, 0xd0, 0x6a, 0x05, 0x36, 0x81, 0x90, 0x05, 0x37, 0x08,
- 0x8b, 0x05, 0x35, 0x61, 0xc2, 0x00, 0x4c, 0x05, 0x35, 0x68, 0x87, 0x05,
- 0x35, 0x31, 0x83, 0x05, 0x35, 0x80, 0x96, 0x05, 0x37, 0x41, 0x90, 0x05,
- 0x37, 0x50, 0xc3, 0x3c, 0x50, 0x05, 0x35, 0x91, 0xc3, 0x85, 0x08, 0x05,
- 0x35, 0xf1, 0xc2, 0x00, 0x4c, 0x05, 0x36, 0x30, 0xc2, 0x0f, 0xf5, 0x05,
- 0x35, 0xe0, 0xc2, 0x00, 0x4c, 0x05, 0x36, 0x39, 0xc2, 0x5f, 0x91, 0x05,
- 0x37, 0x58, 0xc5, 0xdb, 0x06, 0x05, 0x36, 0x99, 0xc2, 0x01, 0x29, 0x05,
- 0x36, 0xa1, 0x83, 0x05, 0x36, 0xa8, 0xc3, 0xb1, 0xe2, 0x05, 0x35, 0x79,
- 0x90, 0x05, 0x37, 0x10, 0xc2, 0x00, 0x4d, 0x05, 0x37, 0x01, 0xc2, 0x14,
- 0x40, 0x05, 0x37, 0x38, 0xc2, 0x23, 0xb4, 0x05, 0x35, 0xb1, 0xc3, 0xdd,
- 0xec, 0x05, 0x35, 0xc1, 0x97, 0x05, 0x36, 0x01, 0x91, 0x05, 0x36, 0xb0,
- 0xc7, 0xc1, 0x79, 0x05, 0x37, 0x81, 0xc9, 0xb2, 0x2b, 0x05, 0x37, 0x88,
- 0xc9, 0xac, 0xf7, 0x01, 0x5a, 0xd9, 0xcd, 0x7e, 0x18, 0x01, 0x5a, 0xe8,
- 0x12, 0xc2, 0x5d, 0x21, 0xc5, 0xdf, 0x75, 0x00, 0xdf, 0xf1, 0xc8, 0xc0,
- 0x25, 0x00, 0xdf, 0xe0, 0xd2, 0x47, 0x88, 0x00, 0xdf, 0x78, 0x91, 0x00,
- 0xdf, 0x69, 0x8b, 0x00, 0xdf, 0x58, 0x87, 0x00, 0xdf, 0x48, 0xc2, 0x00,
- 0xde, 0x00, 0xdf, 0x19, 0x83, 0x00, 0xde, 0xa2, 0x02, 0x5d, 0x2d, 0xc2,
- 0x0b, 0xc6, 0x00, 0xdf, 0x11, 0xc2, 0x1d, 0x5f, 0x00, 0xdf, 0x01, 0xc2,
- 0x01, 0x29, 0x00, 0xde, 0xe9, 0xca, 0x9f, 0xf0, 0x00, 0xde, 0xb9, 0x83,
- 0x00, 0xde, 0x48, 0x4a, 0x47, 0x8e, 0xc2, 0x5d, 0x33, 0x83, 0x00, 0xde,
- 0xc1, 0xca, 0x9c, 0x8a, 0x00, 0xde, 0xb0, 0xc7, 0xc1, 0x33, 0x00, 0xde,
- 0x68, 0xc2, 0x00, 0xa4, 0x00, 0x4c, 0xb3, 0x02, 0x5d, 0x6d, 0x83, 0x00,
- 0x4c, 0xa8, 0x83, 0x00, 0x4d, 0xc1, 0xc2, 0x0c, 0x65, 0x00, 0x4d, 0xb9,
- 0xc2, 0x00, 0xa4, 0x00, 0x4d, 0xb0, 0x83, 0x00, 0x4d, 0x83, 0x02, 0x5d,
- 0x73, 0xc2, 0x02, 0x59, 0x00, 0x4e, 0xe1, 0xc2, 0x00, 0xa4, 0x00, 0x4e,
- 0xe8, 0x83, 0x00, 0x4d, 0x79, 0xc2, 0x1d, 0x5f, 0x00, 0x4e, 0xf8, 0xc2,
- 0x00, 0xa4, 0x00, 0x4d, 0x69, 0x83, 0x00, 0x4d, 0x60, 0xc2, 0x00, 0xa4,
- 0x00, 0x4d, 0x59, 0x83, 0x00, 0x4d, 0x50, 0x83, 0x00, 0x4d, 0x41, 0xc2,
- 0x00, 0xc1, 0x00, 0x4d, 0x19, 0xc2, 0x1d, 0x5f, 0x00, 0x4c, 0xf1, 0xc2,
- 0x01, 0x29, 0x00, 0x4c, 0xc8, 0xc2, 0x00, 0xa4, 0x00, 0x4d, 0x39, 0x83,
- 0x00, 0x4d, 0x31, 0x06, 0x42, 0x5d, 0x79, 0xc2, 0x00, 0xa4, 0x00, 0x4d,
- 0x29, 0x83, 0x00, 0x4d, 0x21, 0x16, 0x42, 0x5d, 0x83, 0xc2, 0x00, 0xa4,
- 0x00, 0x4c, 0xe9, 0x83, 0x00, 0x4c, 0xe0, 0xc2, 0x00, 0xa4, 0x00, 0x4c,
- 0xd9, 0x83, 0x00, 0x4c, 0xd0, 0xc2, 0x00, 0xa4, 0x00, 0x4c, 0xc1, 0x83,
- 0x00, 0x4c, 0xb8, 0x97, 0x00, 0x4c, 0xa1, 0x8b, 0x00, 0x4c, 0x81, 0x83,
- 0x00, 0x4c, 0x30, 0x8b, 0x00, 0x4c, 0x40, 0x97, 0x00, 0x4c, 0x50, 0x47,
- 0xac, 0xc2, 0xc2, 0x5d, 0x8d, 0xcd, 0x7d, 0x7c, 0x00, 0x4f, 0xe0, 0x42,
- 0x03, 0x32, 0xc2, 0x5d, 0x9b, 0x03, 0xc2, 0x5d, 0xa7, 0xc5, 0x35, 0x00,
- 0x00, 0x4d, 0xe1, 0xcb, 0x1e, 0x17, 0x00, 0x4c, 0x08, 0x97, 0x00, 0x4e,
- 0x61, 0x8b, 0x00, 0x4e, 0x41, 0x83, 0x00, 0x4d, 0xf0, 0x94, 0x00, 0x4e,
- 0x1b, 0x02, 0x5d, 0xb3, 0x8e, 0x00, 0x4f, 0x12, 0x02, 0x5d, 0xb7, 0x97,
- 0x00, 0x4e, 0x10, 0x8b, 0x00, 0x4e, 0x00, 0xc2, 0x01, 0x47, 0x00, 0x4f,
- 0x41, 0xc4, 0x04, 0x5e, 0x00, 0x4f, 0x48, 0xc3, 0x06, 0x9e, 0x00, 0x4f,
- 0x51, 0xc3, 0x0c, 0x5b, 0x00, 0x4f, 0x58, 0xc2, 0x26, 0x51, 0x00, 0x4f,
- 0x61, 0xc4, 0x18, 0x83, 0x00, 0x4f, 0x68, 0xc3, 0x01, 0xb4, 0x00, 0x4f,
- 0xa3, 0x02, 0x5d, 0xbb, 0x16, 0xc2, 0x5d, 0xc1, 0xc4, 0x06, 0x9d, 0x00,
- 0x4f, 0xb8, 0x1b, 0xc2, 0x5d, 0xcd, 0xc2, 0x02, 0x59, 0x00, 0xd0, 0x59,
- 0x83, 0x00, 0xd0, 0x51, 0x09, 0x42, 0x5d, 0xd7, 0xc2, 0x03, 0xa4, 0x00,
- 0xd0, 0x39, 0x83, 0x00, 0xd0, 0x30, 0xa4, 0x01, 0x42, 0x03, 0x02, 0x5d,
- 0xe1, 0x9e, 0x01, 0x40, 0x0b, 0x02, 0x5d, 0xe5, 0x9f, 0x01, 0x40, 0x13,
- 0x02, 0x5e, 0x13, 0xa0, 0x01, 0x40, 0x23, 0x02, 0x5e, 0x3a, 0xa1, 0x01,
- 0x40, 0x43, 0x02, 0x5e, 0x5a, 0xa2, 0x01, 0x40, 0x83, 0x02, 0x5e, 0x73,
- 0xa3, 0x01, 0x41, 0x03, 0x02, 0x5e, 0x85, 0xa5, 0x01, 0x44, 0x00, 0x00,
- 0x42, 0x5e, 0x90, 0xc2, 0x0c, 0x57, 0x08, 0x83, 0x18, 0x9b, 0x08, 0x83,
- 0x10, 0xc4, 0x18, 0x83, 0x08, 0x82, 0xc3, 0x02, 0x5e, 0x9c, 0xc2, 0x26,
- 0x51, 0x08, 0x82, 0xba, 0x02, 0x5e, 0xa2, 0x0b, 0xc2, 0x5e, 0xa8, 0x11,
- 0x42, 0x5e, 0xb4, 0x0a, 0xc2, 0x5e, 0xc0, 0x19, 0xc2, 0x5e, 0xcc, 0xc2,
- 0x00, 0x4d, 0x08, 0x82, 0xd8, 0x49, 0x59, 0x03, 0x42, 0x5e, 0xd6, 0xc2,
- 0x00, 0xc7, 0x08, 0x81, 0xa1, 0x83, 0x08, 0x81, 0x70, 0xc2, 0x00, 0xa4,
- 0x08, 0x81, 0x51, 0x83, 0x08, 0x81, 0x48, 0xc2, 0x00, 0xa4, 0x08, 0x81,
- 0x41, 0x83, 0x08, 0x81, 0x38, 0x83, 0x08, 0x81, 0x31, 0xc2, 0x00, 0xc1,
- 0x08, 0x81, 0x09, 0xc2, 0x1d, 0x5f, 0x08, 0x80, 0xe1, 0xc2, 0x01, 0x29,
- 0x08, 0x80, 0xb8, 0xc2, 0x00, 0xa4, 0x08, 0x81, 0x29, 0x83, 0x08, 0x81,
- 0x21, 0x06, 0x42, 0x5e, 0xee, 0xc2, 0x00, 0xa4, 0x08, 0x81, 0x19, 0x83,
- 0x08, 0x81, 0x11, 0x16, 0x42, 0x5e, 0xf8, 0xc2, 0x00, 0xa4, 0x08, 0x80,
- 0xd9, 0x83, 0x08, 0x80, 0xd0, 0xc2, 0x00, 0xa4, 0x08, 0x80, 0xc9, 0x83,
- 0x08, 0x80, 0xc0, 0xc2, 0x00, 0xa4, 0x08, 0x80, 0xb1, 0x83, 0x08, 0x80,
- 0xa8, 0xc2, 0x00, 0xa4, 0x08, 0x80, 0xa1, 0x83, 0x08, 0x80, 0x98, 0x97,
- 0x08, 0x80, 0x91, 0x8b, 0x08, 0x80, 0x81, 0x83, 0x08, 0x80, 0x30, 0x47,
- 0xac, 0xc2, 0xc2, 0x5f, 0x02, 0x83, 0x08, 0x81, 0x78, 0x97, 0x08, 0x80,
- 0x50, 0x8b, 0x08, 0x80, 0x40, 0xc2, 0x00, 0xa4, 0x08, 0x81, 0x81, 0xc2,
- 0x0c, 0x65, 0x08, 0x81, 0x89, 0x83, 0x08, 0x81, 0x90, 0x91, 0x08, 0x82,
- 0x23, 0x02, 0x5f, 0x10, 0x03, 0xc2, 0x5f, 0x16, 0x87, 0x08, 0x82, 0x11,
- 0x48, 0xac, 0xc1, 0xc2, 0x5f, 0x22, 0x97, 0x08, 0x81, 0xe3, 0x02, 0x5f,
- 0x30, 0x8b, 0x08, 0x81, 0xd3, 0x02, 0x5f, 0x34, 0xce, 0x6e, 0x33, 0x08,
- 0x81, 0xc8, 0xc4, 0x22, 0x71, 0x08, 0x83, 0x79, 0xc5, 0x01, 0xdb, 0x08,
- 0x83, 0x71, 0x15, 0xc2, 0x5f, 0x38, 0x08, 0xc2, 0x5f, 0x44, 0x16, 0xc2,
- 0x5f, 0x50, 0xc3, 0x01, 0xb4, 0x08, 0x83, 0x39, 0xc4, 0x15, 0xd3, 0x08,
- 0x83, 0x30, 0xc4, 0x73, 0x66, 0x08, 0x82, 0x69, 0xc3, 0x00, 0x8b, 0x08,
- 0x82, 0x58, 0xc8, 0x3c, 0xa3, 0x08, 0x82, 0x51, 0x96, 0x08, 0x82, 0x48,
- 0x42, 0x00, 0x4e, 0xc2, 0x5f, 0x5c, 0xc9, 0x7c, 0xd7, 0x0e, 0x83, 0x90,
- 0xc7, 0xc3, 0xa9, 0x0e, 0x85, 0xa9, 0xc6, 0xc4, 0xf3, 0x0e, 0x85, 0xa0,
- 0xc4, 0x8f, 0x7c, 0x0e, 0x87, 0xa1, 0xc3, 0x8f, 0x80, 0x0e, 0x83, 0xf8,
- 0x44, 0xe5, 0xa3, 0xc2, 0x5f, 0x6e, 0xc8, 0x9f, 0x0c, 0x0e, 0x80, 0xd8,
- 0x00, 0x42, 0x5f, 0x80, 0xc5, 0xd6, 0x79, 0x0e, 0x82, 0x10, 0x03, 0xc2,
- 0x5f, 0x8c, 0x11, 0x42, 0x5f, 0x96, 0xc3, 0x04, 0x73, 0x0e, 0x83, 0xd1,
- 0xc9, 0xaa, 0xae, 0x0e, 0x81, 0xb8, 0xc2, 0x04, 0x6e, 0x0e, 0x87, 0x79,
- 0xc2, 0x00, 0xfa, 0x0e, 0x87, 0x71, 0xc2, 0x01, 0x05, 0x0e, 0x87, 0x69,
- 0xc2, 0x03, 0x30, 0x0e, 0x87, 0x61, 0xc2, 0x01, 0x29, 0x0e, 0x87, 0x59,
- 0xc3, 0x28, 0x6c, 0x0e, 0x87, 0x51, 0xc2, 0x03, 0xa4, 0x0e, 0x87, 0x48,
- 0x90, 0x0e, 0x84, 0xb9, 0xc9, 0x7c, 0xd7, 0x0e, 0x83, 0x98, 0x46, 0xd0,
- 0xe1, 0xc2, 0x5f, 0xa2, 0x46, 0xd3, 0xe1, 0xc2, 0x5f, 0xaf, 0xc5, 0x4a,
- 0x12, 0x0e, 0x81, 0x18, 0xc6, 0xcd, 0x8d, 0x0e, 0x81, 0x99, 0xca, 0x74,
- 0x57, 0x0e, 0x80, 0x68, 0xc5, 0xd8, 0x68, 0x0e, 0x85, 0x09, 0xc4, 0xe2,
- 0xf3, 0x0e, 0x84, 0xd0, 0xc5, 0xd6, 0x74, 0x0e, 0x85, 0x01, 0x8b, 0x0e,
- 0x84, 0xf8, 0xc2, 0x00, 0xcb, 0x0e, 0x84, 0xf1, 0xc4, 0x01, 0xe7, 0x0e,
- 0x84, 0xe8, 0x8b, 0x0e, 0x84, 0xe1, 0xc5, 0xd6, 0x74, 0x0e, 0x84, 0xd8,
- 0xc7, 0xc1, 0xaa, 0x0e, 0x83, 0x11, 0xc2, 0x00, 0xad, 0x0e, 0x82, 0xe0,
- 0xc9, 0xb1, 0xc8, 0x0e, 0x80, 0xf8, 0x00, 0x42, 0x5f, 0xbb, 0x00, 0x42,
- 0x5f, 0xc5, 0xc4, 0xd3, 0xdd, 0x0e, 0x80, 0x40, 0x45, 0xd9, 0xb2, 0xc2,
- 0x5f, 0xcf, 0xc4, 0xc7, 0xd3, 0x0e, 0x80, 0x98, 0xc8, 0xbb, 0x5d, 0x0e,
- 0x87, 0x31, 0xc5, 0xd2, 0x7a, 0x0e, 0x84, 0x92, 0x02, 0x5f, 0xe1, 0x46,
- 0xd1, 0x3b, 0xc2, 0x5f, 0xe7, 0xc4, 0xc4, 0xaf, 0x0e, 0x84, 0xc8, 0x16,
- 0xc2, 0x5f, 0xf9, 0xd5, 0x35, 0x74, 0x0e, 0x86, 0x91, 0xdc, 0x13, 0x8e,
- 0x0e, 0x86, 0x89, 0xd1, 0x51, 0x2e, 0x0e, 0x86, 0x80, 0xc9, 0x9f, 0x0b,
- 0x0e, 0x84, 0x00, 0x43, 0x01, 0xe7, 0xc2, 0x60, 0x05, 0xd5, 0x35, 0x74,
- 0x0e, 0x86, 0xb1, 0xdc, 0x13, 0x8e, 0x0e, 0x86, 0xa9, 0xd1, 0x51, 0x2e,
- 0x0e, 0x86, 0xa0, 0xc3, 0x8f, 0x80, 0x0e, 0x83, 0xe9, 0xc4, 0x8f, 0x7c,
- 0x0e, 0x83, 0xe0, 0xc4, 0xdf, 0xf7, 0x0e, 0x82, 0x99, 0xc6, 0xd1, 0x35,
- 0x0e, 0x80, 0x52, 0x02, 0x60, 0x11, 0xc5, 0xd9, 0x17, 0x0e, 0x86, 0x39,
- 0xc9, 0xad, 0xbd, 0x0e, 0x85, 0xe0, 0x47, 0x1c, 0x69, 0xc2, 0x60, 0x17,
- 0xcb, 0x97, 0x07, 0x0e, 0x85, 0xf0, 0xca, 0xa4, 0x14, 0x0e, 0x86, 0x21,
- 0xc8, 0xba, 0xed, 0x0e, 0x86, 0x18, 0x10, 0xc2, 0x60, 0x23, 0xc2, 0x00,
- 0xfa, 0x0e, 0x86, 0x01, 0xc2, 0x01, 0x05, 0x0e, 0x85, 0xf9, 0xc2, 0x03,
- 0x30, 0x0e, 0x85, 0xe9, 0xc2, 0x03, 0xa4, 0x0e, 0x85, 0xd0, 0xcf, 0x64,
- 0x8e, 0x0e, 0x85, 0xc8, 0x44, 0x38, 0xc4, 0xc2, 0x60, 0x2f, 0xc4, 0x63,
- 0xd6, 0x0e, 0x85, 0xb8, 0xc3, 0x7e, 0xff, 0x0e, 0x82, 0x31, 0xc8, 0x9f,
- 0x0c, 0x0e, 0x80, 0xd0, 0x47, 0xc1, 0xe9, 0xc2, 0x60, 0x39, 0x44, 0x84,
- 0x05, 0x42, 0x60, 0x45, 0x48, 0x6d, 0xe0, 0xc2, 0x60, 0x51, 0x42, 0x00,
- 0x5b, 0x42, 0x60, 0x5d, 0xce, 0x6d, 0xed, 0x0e, 0x85, 0x29, 0xcc, 0x84,
- 0x04, 0x0e, 0x85, 0x18, 0xc6, 0xd2, 0x79, 0x0e, 0x84, 0xb1, 0xc3, 0x1e,
- 0xab, 0x0e, 0x84, 0x39, 0x83, 0x0e, 0x81, 0x80, 0xc7, 0xc9, 0x59, 0x0e,
- 0x83, 0x81, 0x12, 0xc2, 0x60, 0x69, 0xc7, 0xc4, 0x0b, 0x0e, 0x83, 0x69,
- 0x42, 0x00, 0x4e, 0x42, 0x60, 0x75, 0xcd, 0x78, 0x82, 0x0e, 0x83, 0xc9,
- 0xc2, 0x00, 0xad, 0x0e, 0x81, 0x6a, 0x02, 0x60, 0x7f, 0xcf, 0x63, 0x26,
- 0x0e, 0x84, 0x71, 0x16, 0xc2, 0x60, 0x8b, 0xcb, 0x9a, 0x1f, 0x0e, 0x84,
- 0x59, 0xcc, 0x83, 0x74, 0x0e, 0x84, 0x50, 0xc3, 0x7e, 0xff, 0x0e, 0x82,
- 0x41, 0xc5, 0xce, 0x96, 0x0e, 0x80, 0x21, 0xcb, 0x74, 0x56, 0x0e, 0x80,
- 0x18, 0xc7, 0xc9, 0x59, 0x0e, 0x83, 0x89, 0xcb, 0x95, 0xa7, 0x0e, 0x83,
- 0x79, 0xc7, 0xc4, 0x0b, 0x0e, 0x83, 0x61, 0x90, 0x0e, 0x81, 0xca, 0x02,
- 0x60, 0x97, 0xc2, 0x00, 0xb7, 0x0e, 0x80, 0xb9, 0x8b, 0x0e, 0x80, 0x00,
- 0x47, 0xc5, 0x07, 0xc2, 0x60, 0x9d, 0xc6, 0xd3, 0xdb, 0x0e, 0x80, 0x4a,
- 0x02, 0x60, 0xa9, 0xc4, 0x7e, 0xfe, 0x0e, 0x82, 0x68, 0x16, 0xc2, 0x60,
- 0xad, 0xc2, 0x00, 0xad, 0x0e, 0x82, 0x08, 0xc3, 0x7e, 0xff, 0x0e, 0x82,
- 0xc1, 0xc5, 0xce, 0x96, 0x0e, 0x80, 0x31, 0xcb, 0x74, 0x56, 0x0e, 0x80,
- 0x28, 0x94, 0x08, 0xe0, 0x38, 0xd1, 0x50, 0x73, 0x0f, 0xdc, 0xf9, 0xc2,
- 0x00, 0x54, 0x01, 0x2f, 0xd0, 0x4e, 0x61, 0x1a, 0xc2, 0x60, 0xb7, 0xcc,
- 0x8d, 0x4c, 0x0f, 0xac, 0x50, 0xc9, 0xae, 0xf8, 0x0f, 0xac, 0x61, 0xc5,
- 0xcc, 0x1a, 0x0f, 0xac, 0x48, 0xd1, 0x50, 0x73, 0x0f, 0xdc, 0xf1, 0xc2,
- 0x00, 0x54, 0x01, 0x2f, 0xf8, 0x4e, 0x02, 0x29, 0xc2, 0x60, 0xc3, 0xdb,
- 0x18, 0xc7, 0x01, 0x49, 0xf0, 0x5b, 0x18, 0x0a, 0xc2, 0x60, 0xcf, 0x46,
- 0x01, 0xd1, 0x42, 0x60, 0xdb, 0xce, 0x0a, 0xb9, 0x01, 0x2c, 0x31, 0xcd,
- 0x40, 0x12, 0x01, 0x2c, 0x18, 0xc9, 0xb2, 0xf1, 0x01, 0x3f, 0xf0, 0xc9,
- 0xb2, 0xf1, 0x01, 0x3f, 0xe0, 0xc9, 0xb2, 0xf1, 0x01, 0x3f, 0xe8, 0xc9,
- 0xb2, 0xf1, 0x01, 0x3f, 0xd8, 0xcc, 0x8c, 0x20, 0x01, 0x3f, 0xd1, 0xc5,
- 0x01, 0x0f, 0x01, 0x3f, 0xb8, 0xcf, 0x67, 0x5e, 0x01, 0x52, 0xe9, 0xcb,
- 0x95, 0x9c, 0x01, 0x52, 0xd9, 0x42, 0x01, 0x4a, 0x42, 0x60, 0xed, 0xc7,
- 0x17, 0x7c, 0x01, 0x52, 0x89, 0x45, 0x06, 0xf3, 0x42, 0x60, 0xf9, 0x42,
- 0x00, 0x59, 0xc2, 0x61, 0x05, 0x09, 0x42, 0x61, 0x17, 0xd3, 0x16, 0x7d,
- 0x01, 0x4c, 0x99, 0x49, 0x01, 0x11, 0x42, 0x61, 0x26, 0x49, 0x06, 0x13,
- 0xc2, 0x61, 0x32, 0xcc, 0x06, 0x1b, 0x0f, 0xdc, 0x61, 0xc6, 0x01, 0x7a,
- 0x0f, 0xc8, 0x3b, 0x02, 0x61, 0x38, 0x42, 0x01, 0xe2, 0xc2, 0x61, 0x3e,
- 0xcb, 0x8f, 0xf0, 0x0f, 0xdd, 0x91, 0xc6, 0xa1, 0x0c, 0x0f, 0xdd, 0xc8,
- 0xd0, 0x5c, 0xe2, 0x0f, 0xc2, 0xc1, 0xd1, 0x57, 0x4a, 0x01, 0x0f, 0xf9,
- 0xc5, 0x01, 0x0f, 0x01, 0x0c, 0xa3, 0x02, 0x61, 0x4a, 0xcc, 0x8c, 0x20,
- 0x01, 0x0e, 0xa3, 0x02, 0x61, 0x4e, 0x19, 0xc2, 0x61, 0x54, 0xcb, 0x97,
- 0xe3, 0x01, 0x58, 0x61, 0xd5, 0x03, 0xb2, 0x01, 0x5b, 0x20, 0xcc, 0x01,
- 0xdb, 0x01, 0x2c, 0x79, 0xcd, 0x19, 0x0b, 0x01, 0x2c, 0x70, 0xd1, 0x40,
- 0x0e, 0x01, 0x2c, 0x49, 0xd0, 0x06, 0xd7, 0x01, 0x16, 0x58, 0x00, 0x42,
- 0x61, 0x60, 0xd3, 0x01, 0x94, 0x01, 0x00, 0xc1, 0xd0, 0x5f, 0xf2, 0x01,
- 0x71, 0x30, 0x00, 0x42, 0x61, 0x78, 0x44, 0x04, 0x5f, 0xc2, 0x61, 0x8a,
- 0xcc, 0x8c, 0x50, 0x0f, 0xaf, 0x61, 0xde, 0x08, 0x29, 0x0f, 0xde, 0x08,
- 0x44, 0x01, 0xe9, 0xc2, 0x61, 0x96, 0xd3, 0x43, 0x76, 0x01, 0x70, 0x48,
- 0xd0, 0x49, 0x28, 0x01, 0x2c, 0x59, 0xc7, 0xa9, 0xc6, 0x01, 0x4b, 0xe0,
- 0xca, 0xa3, 0x6a, 0x01, 0x1c, 0xe9, 0xc9, 0x4f, 0x27, 0x01, 0x1c, 0xe1,
- 0xca, 0xa0, 0xf4, 0x01, 0x1c, 0xd8, 0xce, 0x00, 0xb0, 0x01, 0x00, 0xe1,
- 0xcc, 0x8a, 0xb8, 0x01, 0x4e, 0xd1, 0xcb, 0x1a, 0x3f, 0x01, 0x71, 0x41,
- 0xcd, 0x09, 0x51, 0x01, 0x80, 0x50, 0xcb, 0x1a, 0x3f, 0x01, 0x4c, 0x29,
- 0x05, 0xc2, 0x61, 0xa2, 0xd2, 0x23, 0xbe, 0x01, 0x80, 0xb1, 0xd6, 0x0a,
- 0x88, 0x01, 0x80, 0xc1, 0xce, 0x24, 0xb2, 0x01, 0x80, 0xd0, 0xd6, 0x0a,
- 0x88, 0x01, 0x4c, 0xb9, 0xd2, 0x23, 0xbe, 0x01, 0x80, 0x80, 0x50, 0x5d,
- 0x52, 0xc2, 0x61, 0xae, 0x4e, 0x70, 0x8d, 0x42, 0x61, 0xba, 0xda, 0x1d,
- 0x79, 0x0f, 0xc4, 0xa0, 0x45, 0x01, 0x93, 0xc2, 0x61, 0xc6, 0x44, 0x17,
- 0x34, 0x42, 0x61, 0xd2, 0xcd, 0x7e, 0x0b, 0x01, 0x0c, 0xf1, 0x48, 0x01,
- 0xef, 0x42, 0x61, 0xde, 0x45, 0x00, 0x6c, 0xc2, 0x61, 0xea, 0x16, 0xc2,
- 0x62, 0x20, 0xd5, 0x11, 0x8f, 0x01, 0x0e, 0x31, 0xc8, 0xb2, 0xf2, 0x01,
- 0x0d, 0x23, 0x02, 0x62, 0x2c, 0x03, 0x42, 0x62, 0x32, 0xc5, 0x01, 0x0f,
- 0x01, 0x0e, 0x83, 0x02, 0x62, 0x3e, 0xca, 0x52, 0x78, 0x01, 0x48, 0x60,
- 0xcb, 0x6c, 0xe6, 0x01, 0x0e, 0xe1, 0xca, 0x84, 0xde, 0x0f, 0xc1, 0xc0,
- 0x46, 0x02, 0xd2, 0xc2, 0x62, 0x48, 0xc2, 0x00, 0x58, 0x0f, 0xd7, 0x90,
- 0xd0, 0x58, 0x92, 0x0f, 0xc2, 0x01, 0xc5, 0x01, 0x0f, 0x0f, 0xc2, 0x20,
- 0xc5, 0x01, 0x0f, 0x01, 0x58, 0x29, 0xd3, 0x42, 0x6c, 0x01, 0x5c, 0x40,
- 0xca, 0x52, 0x12, 0x00, 0x7e, 0xc0, 0xca, 0x37, 0x0e, 0x01, 0x13, 0x91,
- 0xc5, 0x07, 0x62, 0x01, 0x13, 0x20, 0x4a, 0x37, 0x1e, 0x42, 0x62, 0x54,
- 0xe0, 0x00, 0x87, 0x01, 0x54, 0x58, 0x47, 0xc6, 0x0a, 0xc2, 0x62, 0x63,
- 0x53, 0x42, 0xb8, 0x42, 0x62, 0x6f, 0xe0, 0x06, 0xa7, 0x01, 0x54, 0x88,
- 0xc2, 0x00, 0xa4, 0x00, 0xe2, 0x71, 0x83, 0x00, 0xe2, 0x68, 0xc2, 0x00,
- 0xa4, 0x00, 0xe0, 0xc1, 0x83, 0x00, 0xe0, 0xb8, 0xc7, 0xc3, 0xe1, 0x00,
- 0xe1, 0xf0, 0xd2, 0x4c, 0xf2, 0x0f, 0xbd, 0xa9, 0xc6, 0x13, 0x57, 0x0f,
- 0xbd, 0x49, 0xc4, 0x06, 0x23, 0x01, 0x2c, 0x88, 0x44, 0x01, 0xdc, 0xc2,
- 0x62, 0x75, 0xc3, 0x12, 0xec, 0x0f, 0xb4, 0x40, 0xe0, 0x0a, 0x87, 0x01,
- 0x3b, 0x90, 0x52, 0x12, 0x60, 0xc2, 0x62, 0x7b, 0x44, 0x0c, 0x5b, 0x42,
- 0x62, 0x87, 0xd7, 0x27, 0xe1, 0x0f, 0xbe, 0x01, 0xd8, 0x21, 0x30, 0x0f,
- 0xbe, 0x90, 0xc7, 0x71, 0x03, 0x0f, 0xaf, 0x88, 0x83, 0x05, 0x26, 0xe9,
- 0xc2, 0x00, 0xa4, 0x05, 0x26, 0xf0, 0x44, 0x5d, 0x25, 0xc2, 0x62, 0x93,
- 0xc5, 0xdc, 0xb4, 0x05, 0x27, 0xc8, 0xc4, 0xb5, 0x00, 0x00, 0x04, 0x50,
- 0xd6, 0x2c, 0xaf, 0x01, 0x50, 0xa1, 0x45, 0x00, 0x6c, 0x42, 0x62, 0xb1,
- 0x24, 0xc2, 0x62, 0xbd, 0x23, 0xc2, 0x62, 0xd1, 0x42, 0xe6, 0x74, 0xc2,
- 0x62, 0xed, 0x04, 0xc2, 0x63, 0x0d, 0xc4, 0xe2, 0x83, 0x08, 0x30, 0xd9,
- 0x1e, 0xc2, 0x63, 0x15, 0x20, 0xc2, 0x63, 0x27, 0x21, 0xc2, 0x63, 0x47,
- 0x22, 0x42, 0x63, 0x4f, 0x42, 0x00, 0x71, 0xc2, 0x63, 0x77, 0x49, 0xaf,
- 0x88, 0xc2, 0x63, 0x83, 0x4a, 0xa4, 0x00, 0x42, 0x63, 0x8d, 0xc4, 0x18,
- 0x83, 0x00, 0xca, 0x69, 0xc2, 0x26, 0x51, 0x00, 0xca, 0x60, 0xc3, 0x0c,
- 0x5b, 0x00, 0xca, 0x59, 0xc3, 0x06, 0x9e, 0x00, 0xca, 0x50, 0xc4, 0x04,
- 0x5e, 0x00, 0xca, 0x49, 0xc2, 0x01, 0x47, 0x00, 0xca, 0x40, 0xc3, 0x15,
- 0x89, 0x00, 0xca, 0x01, 0xc4, 0xe0, 0x9b, 0x00, 0xc9, 0xd9, 0xc9, 0xad,
- 0xb4, 0x00, 0xc9, 0xd1, 0xc9, 0xaa, 0x1e, 0x00, 0xc9, 0xc8, 0xc2, 0x00,
- 0xc7, 0x00, 0xc9, 0xc1, 0xc2, 0x02, 0x59, 0x00, 0xc9, 0xb9, 0xc2, 0x00,
- 0xad, 0x00, 0xc9, 0xb1, 0xc2, 0x03, 0xa4, 0x00, 0xc9, 0xa9, 0x10, 0xc2,
- 0x63, 0x97, 0xc2, 0x00, 0xb3, 0x00, 0xc9, 0x99, 0xc8, 0x11, 0x40, 0x00,
- 0xc9, 0x91, 0xc2, 0x04, 0x2b, 0x00, 0xc9, 0x80, 0xc2, 0x01, 0x09, 0x00,
- 0xc9, 0x59, 0xc2, 0x02, 0x59, 0x00, 0xc9, 0x51, 0xc2, 0x1d, 0x5f, 0x00,
- 0xc9, 0x48, 0x91, 0x00, 0xc9, 0x43, 0x02, 0x63, 0xa1, 0x87, 0x00, 0xc9,
- 0x3b, 0x02, 0x63, 0xa5, 0x83, 0x00, 0xc9, 0x03, 0x02, 0x63, 0xa9, 0x97,
- 0x00, 0xc9, 0x11, 0x8b, 0x00, 0xc9, 0x08, 0xc2, 0x02, 0x59, 0x00, 0xc8,
- 0xf1, 0xc2, 0x00, 0xa4, 0x00, 0xc8, 0x61, 0x83, 0x00, 0xc8, 0x58, 0xc3,
- 0x30, 0x5e, 0x00, 0xc8, 0xe9, 0xc2, 0x00, 0xa4, 0x00, 0xc8, 0x21, 0x83,
- 0x00, 0xc8, 0x18, 0x83, 0x00, 0xc8, 0xd9, 0xc2, 0x0c, 0x65, 0x00, 0xc8,
- 0xd1, 0xc2, 0x00, 0xa4, 0x00, 0xc8, 0xc8, 0x90, 0x00, 0xc8, 0x50, 0xc2,
- 0x00, 0xa4, 0x00, 0xc8, 0x99, 0x83, 0x00, 0xc8, 0x90, 0xc2, 0x00, 0xa4,
+ 0x4c, 0xfb, 0x87, 0x00, 0xd3, 0x38, 0x8b, 0x00, 0xd3, 0x30, 0x83, 0x00,
+ 0xd2, 0x1b, 0x02, 0x4c, 0xff, 0x43, 0x00, 0x37, 0xc2, 0x4d, 0x03, 0xc2,
+ 0x00, 0x96, 0x00, 0xd2, 0x51, 0xc2, 0x0e, 0x78, 0x00, 0xd2, 0x20, 0x97,
+ 0x00, 0xd2, 0x80, 0x8b, 0x00, 0xd2, 0x70, 0xc2, 0x01, 0x0e, 0x00, 0xd2,
+ 0x49, 0x15, 0xc2, 0x4d, 0x31, 0xc2, 0x1a, 0x36, 0x00, 0xd2, 0x01, 0xc2,
+ 0x01, 0xa7, 0x00, 0xd1, 0xd1, 0x12, 0xc2, 0x4d, 0x41, 0x16, 0xc2, 0x4d,
+ 0x4b, 0xc5, 0x3b, 0x63, 0x00, 0xd1, 0x71, 0x05, 0xc2, 0x4d, 0x55, 0x0d,
+ 0x42, 0x4d, 0x5f, 0xc2, 0x0e, 0x78, 0x00, 0xd2, 0x11, 0x83, 0x00, 0xd2,
+ 0x0a, 0x02, 0x4d, 0x6f, 0x83, 0x00, 0xd1, 0xb1, 0xc2, 0x1a, 0x36, 0x00,
+ 0xd1, 0x61, 0xc2, 0x07, 0x69, 0x00, 0xd1, 0x30, 0xc3, 0x82, 0xb0, 0x00,
+ 0xcb, 0xa1, 0xc3, 0x3b, 0x0b, 0x00, 0xcb, 0x99, 0xc3, 0x82, 0xe0, 0x00,
+ 0xcb, 0x91, 0xc3, 0x82, 0xec, 0x00, 0xcb, 0x89, 0xc3, 0x82, 0xa4, 0x00,
+ 0xcb, 0x80, 0xc2, 0x01, 0x0e, 0x00, 0xcb, 0x09, 0x83, 0x00, 0xca, 0x98,
+ 0xc5, 0xdf, 0x45, 0x05, 0x56, 0xf9, 0x90, 0x05, 0x56, 0xd8, 0x8f, 0x05,
+ 0x55, 0xf1, 0x90, 0x05, 0x55, 0xe9, 0x9b, 0x05, 0x55, 0xe1, 0xc2, 0x0e,
+ 0x78, 0x05, 0x55, 0xd9, 0x83, 0x05, 0x55, 0x88, 0x83, 0x05, 0x55, 0xd1,
+ 0x87, 0x05, 0x55, 0x9a, 0x02, 0x4d, 0x7b, 0x83, 0x05, 0x55, 0xc0, 0x91,
+ 0x05, 0x55, 0x79, 0xc2, 0x03, 0x76, 0x05, 0x55, 0x69, 0xc2, 0x16, 0x11,
+ 0x05, 0x55, 0x59, 0xc2, 0x00, 0x36, 0x05, 0x55, 0x49, 0xc2, 0x00, 0x64,
+ 0x05, 0x55, 0x39, 0xc2, 0x47, 0x43, 0x05, 0x55, 0x29, 0xc2, 0x01, 0xc2,
+ 0x05, 0x55, 0x19, 0xc2, 0x05, 0x43, 0x05, 0x55, 0x09, 0x12, 0xc2, 0x4d,
+ 0x7f, 0xc2, 0x01, 0xb4, 0x05, 0x54, 0xd9, 0x10, 0xc2, 0x4d, 0x89, 0x16,
+ 0xc2, 0x4d, 0x99, 0xc2, 0x00, 0x68, 0x05, 0x54, 0x99, 0x05, 0xc2, 0x4d,
+ 0xa3, 0xc2, 0x13, 0xf3, 0x05, 0x54, 0x39, 0x0d, 0xc2, 0x4d, 0xad, 0xc2,
+ 0x05, 0x3b, 0x05, 0x54, 0x78, 0x91, 0x05, 0x55, 0x71, 0xc2, 0x03, 0x76,
+ 0x05, 0x55, 0x61, 0xc2, 0x16, 0x11, 0x05, 0x55, 0x51, 0xc2, 0x00, 0x36,
+ 0x05, 0x55, 0x41, 0xc2, 0x00, 0x64, 0x05, 0x55, 0x31, 0xc2, 0x47, 0x43,
+ 0x05, 0x55, 0x21, 0xc2, 0x01, 0xc2, 0x05, 0x55, 0x11, 0xc2, 0x05, 0x43,
+ 0x05, 0x55, 0x01, 0x12, 0xc2, 0x4d, 0xb5, 0xc2, 0x01, 0xb4, 0x05, 0x54,
+ 0xd1, 0x10, 0xc2, 0x4d, 0xbf, 0x16, 0xc2, 0x4d, 0xcf, 0xc2, 0x00, 0x68,
+ 0x05, 0x54, 0x91, 0x05, 0xc2, 0x4d, 0xd9, 0xc2, 0x13, 0xf3, 0x05, 0x54,
+ 0x31, 0x0d, 0xc2, 0x4d, 0xe3, 0xc2, 0x05, 0x3b, 0x05, 0x54, 0x70, 0xd2,
+ 0x4a, 0x22, 0x0f, 0xb2, 0xb1, 0xd2, 0x4a, 0x10, 0x0f, 0xb2, 0xa0, 0xc4,
+ 0x05, 0xde, 0x01, 0x0c, 0x59, 0xc2, 0x0a, 0x20, 0x01, 0x0c, 0x50, 0x9b,
+ 0x01, 0x0a, 0x21, 0x8e, 0x01, 0x0a, 0x11, 0x89, 0x01, 0x0a, 0x08, 0xd2,
+ 0x4a, 0x22, 0x0f, 0xb2, 0xb9, 0xd2, 0x4a, 0x10, 0x0f, 0xb2, 0xa8, 0xc4,
+ 0x00, 0xcd, 0x01, 0x34, 0xf9, 0xc5, 0x00, 0x47, 0x01, 0x34, 0xf0, 0xc5,
+ 0x00, 0x47, 0x0f, 0xaf, 0x39, 0xc4, 0x00, 0xcd, 0x0f, 0xaf, 0x31, 0xc5,
+ 0x00, 0x34, 0x0f, 0xaf, 0x29, 0xc5, 0x03, 0x50, 0x0f, 0xaf, 0x20, 0xd5,
+ 0x00, 0x92, 0x01, 0x5c, 0xd1, 0xc9, 0x00, 0x9e, 0x01, 0x3d, 0x10, 0xc6,
+ 0x12, 0x73, 0x0f, 0xbd, 0x41, 0xc4, 0x45, 0xaa, 0x01, 0x00, 0x48, 0xc5,
+ 0x8b, 0xde, 0x00, 0x3d, 0x19, 0xc8, 0xb8, 0x4b, 0x00, 0x3c, 0x79, 0xc4,
+ 0xdb, 0x77, 0x00, 0x3c, 0x70, 0x91, 0x00, 0x3d, 0x01, 0xc7, 0xae, 0x49,
+ 0x00, 0x3c, 0x99, 0xc3, 0x3b, 0xb0, 0x00, 0x3c, 0x63, 0x02, 0x4d, 0xeb,
+ 0xc3, 0x3b, 0xbb, 0x00, 0x3c, 0xc0, 0x03, 0xc2, 0x4d, 0xf1, 0xc5, 0xde,
+ 0x32, 0x00, 0x3c, 0x58, 0xc5, 0xe3, 0xe6, 0x00, 0x3c, 0xf1, 0x0a, 0xc2,
+ 0x4d, 0xfd, 0xc4, 0x9c, 0xaf, 0x00, 0x3c, 0x80, 0xc3, 0x3b, 0xb0, 0x00,
+ 0x3c, 0xc9, 0xc2, 0x0e, 0x30, 0x00, 0x3c, 0x00, 0x03, 0xc2, 0x4e, 0x09,
+ 0x91, 0x00, 0x3d, 0x08, 0xc4, 0xe9, 0xaf, 0x00, 0x3c, 0x69, 0xc8, 0xae,
+ 0x48, 0x00, 0x3c, 0x28, 0xc4, 0xe6, 0x13, 0x00, 0x3c, 0x39, 0xc3, 0x18,
+ 0xfe, 0x00, 0x3d, 0x10, 0xc4, 0xdb, 0x77, 0x00, 0x3c, 0x31, 0xc3, 0x3b,
+ 0xb0, 0x00, 0x3c, 0xd0, 0xc4, 0xe7, 0xf7, 0x00, 0x3c, 0x11, 0xc2, 0x0e,
+ 0x30, 0x00, 0x3d, 0x88, 0x0d, 0xc2, 0x4e, 0x13, 0x10, 0xc2, 0x4e, 0x1f,
+ 0x46, 0xd6, 0x82, 0xc2, 0x4e, 0x31, 0x15, 0xc2, 0x4e, 0x46, 0x1b, 0xc2,
+ 0x4e, 0x52, 0x43, 0x5e, 0x02, 0xc2, 0x4e, 0x5e, 0x16, 0xc2, 0x4e, 0x6a,
+ 0xc9, 0xb0, 0x63, 0x00, 0x70, 0xd1, 0x12, 0xc2, 0x4e, 0x74, 0x42, 0x05,
+ 0x43, 0xc2, 0x4e, 0x84, 0x0f, 0xc2, 0x4e, 0x93, 0x14, 0xc2, 0x4e, 0x9f,
+ 0x0e, 0xc2, 0x4e, 0xa9, 0xc7, 0xcc, 0x30, 0x00, 0x71, 0x39, 0x43, 0x69,
+ 0x91, 0xc2, 0x4e, 0xb9, 0xc5, 0xe1, 0x8e, 0x00, 0x71, 0x69, 0xca, 0xa6,
+ 0xaa, 0x00, 0x72, 0xd0, 0xc2, 0x0a, 0x20, 0x00, 0x72, 0x91, 0xc4, 0x05,
+ 0xde, 0x00, 0x72, 0x98, 0xc3, 0x08, 0xde, 0x00, 0x72, 0xa1, 0xc3, 0x0d,
+ 0x8f, 0x00, 0x72, 0xa8, 0xc2, 0x22, 0x45, 0x00, 0x72, 0xb1, 0xc4, 0x15,
+ 0xa7, 0x00, 0x72, 0xb8, 0x87, 0x0f, 0x15, 0x58, 0x47, 0xc7, 0x3d, 0xc2,
+ 0x4e, 0xc5, 0x83, 0x0f, 0x14, 0x88, 0x91, 0x0f, 0x15, 0x40, 0x97, 0x0f,
+ 0x15, 0x18, 0xc2, 0x07, 0x69, 0x0f, 0x14, 0xc1, 0x83, 0x0f, 0x14, 0xb8,
+ 0xd0, 0x5f, 0x9f, 0x01, 0x4e, 0x69, 0xc8, 0x50, 0x0d, 0x01, 0x4e, 0x59,
+ 0xc9, 0x18, 0x19, 0x01, 0x4e, 0x51, 0xcf, 0x12, 0x7f, 0x0f, 0xb6, 0x30,
+ 0xc4, 0x5a, 0x83, 0x0e, 0x9a, 0x49, 0xc9, 0xb0, 0x51, 0x0e, 0x99, 0xe0,
+ 0xc5, 0xbb, 0xbe, 0x0e, 0x9a, 0x91, 0xc5, 0x5b, 0x4a, 0x0e, 0x9a, 0x70,
+ 0xc6, 0xd9, 0x1c, 0x0e, 0x99, 0xc1, 0x16, 0x42, 0x4e, 0xd9, 0xc7, 0xc7,
+ 0xec, 0x0e, 0x99, 0xe9, 0xc4, 0x1e, 0xc2, 0x0e, 0x99, 0x30, 0xc5, 0xda,
+ 0x5e, 0x0e, 0x9a, 0x61, 0xc2, 0x00, 0x29, 0x0e, 0x99, 0x88, 0xc5, 0xda,
+ 0x77, 0x0e, 0x99, 0x71, 0x0b, 0x42, 0x4e, 0xeb, 0xc5, 0x7f, 0xbc, 0x01,
+ 0x18, 0xa9, 0xc5, 0x32, 0xb7, 0x0f, 0xa6, 0xf2, 0x02, 0x4e, 0xf7, 0x49,
+ 0x10, 0xa2, 0xc2, 0x4e, 0xfd, 0xca, 0x21, 0x1b, 0x00, 0x60, 0x08, 0xc7,
+ 0x10, 0xac, 0x00, 0x60, 0x11, 0xc7, 0x7d, 0xf8, 0x00, 0x61, 0xe8, 0xc5,
+ 0x45, 0xcf, 0x00, 0x60, 0x19, 0xc4, 0x21, 0x28, 0x00, 0x62, 0x68, 0x83,
+ 0x00, 0x60, 0x2b, 0x02, 0x4f, 0x09, 0x8b, 0x00, 0x60, 0x3b, 0x02, 0x4f,
+ 0x15, 0x97, 0x00, 0x60, 0x4b, 0x02, 0x4f, 0x19, 0x18, 0xc2, 0x4f, 0x1d,
+ 0x87, 0x00, 0x60, 0x73, 0x02, 0x4f, 0x27, 0x91, 0x00, 0x60, 0x93, 0x02,
+ 0x4f, 0x2b, 0x0d, 0xc2, 0x4f, 0x2f, 0x09, 0xc2, 0x4f, 0x39, 0x10, 0xc2,
+ 0x4f, 0x43, 0x05, 0xc2, 0x4f, 0x5c, 0x0c, 0xc2, 0x4f, 0x66, 0x16, 0xc2,
+ 0x4f, 0x70, 0x06, 0xc2, 0x4f, 0x84, 0x12, 0xc2, 0x4f, 0x98, 0x04, 0xc2,
+ 0x4f, 0xa2, 0xc2, 0x00, 0x3f, 0x00, 0x61, 0x71, 0xc2, 0x1a, 0x36, 0x00,
+ 0x61, 0x79, 0x14, 0xc2, 0x4f, 0xac, 0x0e, 0xc2, 0x4f, 0xb4, 0x15, 0xc2,
+ 0x4f, 0xbc, 0xc2, 0x01, 0x0e, 0x00, 0x61, 0xc8, 0x83, 0x00, 0x61, 0xf1,
+ 0x8b, 0x00, 0x62, 0x41, 0x97, 0x00, 0x62, 0x60, 0x8b, 0x00, 0x62, 0x00,
+ 0x97, 0x00, 0x62, 0x10, 0x94, 0x00, 0x62, 0x1b, 0x02, 0x4f, 0xcc, 0x8e,
+ 0x00, 0x63, 0x12, 0x02, 0x4f, 0xd0, 0x87, 0x00, 0x62, 0x38, 0x91, 0x00,
+ 0x62, 0x58, 0xc2, 0x0a, 0x20, 0x00, 0x63, 0x41, 0xc4, 0x05, 0xde, 0x00,
+ 0x63, 0x48, 0xc3, 0x08, 0xde, 0x00, 0x63, 0x51, 0xc3, 0x0d, 0x8f, 0x00,
+ 0x63, 0x58, 0xc2, 0x22, 0x45, 0x00, 0x63, 0x61, 0xc4, 0x15, 0xa7, 0x00,
+ 0x63, 0x68, 0xd2, 0x16, 0x60, 0x00, 0x63, 0xc9, 0xd3, 0x41, 0x86, 0x00,
+ 0x63, 0xe0, 0x47, 0xcb, 0xdc, 0xc2, 0x4f, 0xd4, 0x49, 0xb1, 0x95, 0x42,
+ 0x4f, 0xe0, 0x46, 0x03, 0x50, 0xc2, 0x4f, 0xec, 0x45, 0x01, 0xac, 0x42,
+ 0x4f, 0xf8, 0xc5, 0x03, 0x50, 0x01, 0x70, 0xf1, 0xc5, 0x00, 0x34, 0x01,
+ 0x70, 0xf8, 0xc4, 0x15, 0xa7, 0x08, 0xa6, 0xb9, 0xc2, 0x22, 0x45, 0x08,
+ 0xa6, 0xb0, 0xc3, 0x0d, 0x8f, 0x08, 0xa6, 0xa9, 0xc3, 0x08, 0xde, 0x08,
+ 0xa6, 0xa0, 0xc4, 0x05, 0xde, 0x08, 0xa6, 0x99, 0xc2, 0x0a, 0x20, 0x08,
+ 0xa6, 0x90, 0xc7, 0x7d, 0xf8, 0x08, 0xa6, 0x21, 0xc7, 0x10, 0xac, 0x08,
+ 0xa6, 0x00, 0xc5, 0x45, 0xcf, 0x08, 0xa6, 0x09, 0xc4, 0x21, 0x28, 0x08,
+ 0xa6, 0x10, 0x97, 0x08, 0xa5, 0xf1, 0x8b, 0x08, 0xa5, 0xd9, 0x83, 0x08,
+ 0xa5, 0x80, 0x91, 0x08, 0xa5, 0xe9, 0x87, 0x08, 0xa5, 0xd0, 0x8e, 0x08,
+ 0xa5, 0xbb, 0x02, 0x50, 0x04, 0x94, 0x08, 0xa5, 0xaa, 0x02, 0x50, 0x08,
+ 0x97, 0x08, 0xa5, 0xa0, 0x8b, 0x08, 0xa5, 0x90, 0x83, 0x08, 0xa5, 0x71,
+ 0xc2, 0x0e, 0xe5, 0x08, 0xa5, 0x69, 0xc2, 0x01, 0x0e, 0x08, 0xa5, 0x60,
+ 0x83, 0x08, 0xa5, 0x59, 0x47, 0xb7, 0xd8, 0x42, 0x50, 0x0c, 0xc2, 0x01,
+ 0x0e, 0x08, 0xa5, 0x31, 0x83, 0x08, 0xa5, 0x28, 0xc2, 0x01, 0x0e, 0x08,
+ 0xa5, 0x21, 0x83, 0x08, 0xa5, 0x18, 0x83, 0x08, 0xa5, 0x11, 0xc2, 0x01,
+ 0x01, 0x08, 0xa4, 0xe9, 0xc2, 0x1a, 0x36, 0x08, 0xa4, 0xc1, 0xc2, 0x07,
+ 0x69, 0x08, 0xa4, 0x98, 0xc2, 0x01, 0x0e, 0x08, 0xa5, 0x09, 0x83, 0x08,
+ 0xa5, 0x01, 0x06, 0x42, 0x50, 0x1a, 0xc2, 0x01, 0x0e, 0x08, 0xa4, 0xf9,
+ 0x83, 0x08, 0xa4, 0xf1, 0x16, 0x42, 0x50, 0x24, 0xc2, 0x01, 0x0e, 0x08,
+ 0xa4, 0xb9, 0x83, 0x08, 0xa4, 0xb0, 0xc2, 0x01, 0x0e, 0x08, 0xa4, 0xa9,
+ 0x83, 0x08, 0xa4, 0xa0, 0xc2, 0x01, 0x0e, 0x08, 0xa4, 0x91, 0x83, 0x08,
+ 0xa4, 0x88, 0xc2, 0x01, 0x0e, 0x08, 0xa4, 0x81, 0x83, 0x08, 0xa4, 0x78,
+ 0x97, 0x08, 0xa4, 0x71, 0x8b, 0x08, 0xa4, 0x61, 0x83, 0x08, 0xa4, 0x10,
+ 0x97, 0x08, 0xa4, 0x30, 0x8b, 0x08, 0xa4, 0x20, 0xc7, 0xc7, 0xc9, 0x00,
+ 0x7e, 0x21, 0xc7, 0xc6, 0x1e, 0x00, 0x7e, 0x2b, 0x02, 0x50, 0x2e, 0x12,
+ 0xc2, 0x50, 0x34, 0xc6, 0xd2, 0x20, 0x00, 0x7e, 0x4a, 0x02, 0x50, 0x40,
+ 0x44, 0xb5, 0xfb, 0xc2, 0x50, 0x44, 0xcd, 0x80, 0xb0, 0x00, 0x7b, 0xf1,
+ 0xc8, 0x89, 0xf5, 0x00, 0x7b, 0xf8, 0xc7, 0xbc, 0xfc, 0x00, 0x79, 0xf1,
+ 0xc8, 0xc1, 0xab, 0x00, 0x7c, 0x38, 0xc8, 0xbc, 0xfb, 0x00, 0x79, 0xf9,
+ 0xc7, 0x53, 0x0b, 0x00, 0x7c, 0x48, 0xc7, 0xca, 0xd2, 0x00, 0x7c, 0x31,
+ 0xc9, 0x94, 0x32, 0x00, 0x7c, 0x40, 0xcb, 0x98, 0x5b, 0x00, 0x7c, 0x51,
+ 0xcb, 0x93, 0x75, 0x00, 0x7c, 0x58, 0xcb, 0x94, 0x30, 0x00, 0x7c, 0x69,
+ 0xc8, 0x53, 0x0a, 0x00, 0x7c, 0x71, 0xd1, 0x53, 0x01, 0x00, 0x7c, 0x78,
+ 0x0d, 0xc2, 0x50, 0x50, 0x09, 0xc2, 0x50, 0x60, 0x10, 0xc2, 0x50, 0x6a,
+ 0x05, 0xc2, 0x50, 0x80, 0xc2, 0x26, 0x94, 0x00, 0x7c, 0xb9, 0x16, 0xc2,
+ 0x50, 0x8a, 0x06, 0xc2, 0x50, 0x9c, 0x12, 0xc2, 0x50, 0xae, 0x04, 0xc2,
+ 0x50, 0xb8, 0xc2, 0x00, 0x3f, 0x00, 0x7d, 0x41, 0xc2, 0x00, 0x4c, 0x00,
+ 0x7d, 0x69, 0x1c, 0xc2, 0x50, 0xc2, 0xc2, 0x00, 0x02, 0x00, 0x7d, 0x81,
+ 0xc2, 0x1a, 0x36, 0x00, 0x7d, 0x89, 0xc2, 0x00, 0x9a, 0x00, 0x7d, 0x91,
+ 0xc2, 0x00, 0x96, 0x00, 0x7d, 0x99, 0x15, 0xc2, 0x50, 0xcc, 0xc2, 0x01,
+ 0x0e, 0x00, 0x7d, 0xb9, 0x83, 0x00, 0x7d, 0xc1, 0x4b, 0x80, 0xbd, 0x42,
+ 0x50, 0xdc, 0x48, 0x18, 0x7f, 0xc2, 0x50, 0xee, 0xc5, 0x33, 0x81, 0x00,
+ 0x78, 0xa0, 0xc2, 0x00, 0xe5, 0x00, 0x79, 0xd1, 0xc2, 0x02, 0xe4, 0x00,
+ 0x79, 0xd8, 0xcf, 0x18, 0x7f, 0x00, 0x78, 0x21, 0xdb, 0x18, 0x73, 0x00,
+ 0x7e, 0x98, 0xcf, 0x18, 0x9a, 0x00, 0x78, 0x29, 0xdb, 0x18, 0x8e, 0x00,
+ 0x7e, 0xa0, 0xd4, 0x3c, 0xee, 0x00, 0x78, 0x31, 0x4c, 0x8b, 0x44, 0x42,
+ 0x50, 0xfa, 0x0d, 0xc2, 0x51, 0x06, 0xc9, 0xab, 0x6e, 0x00, 0x79, 0xa0,
+ 0xc7, 0x18, 0x7f, 0x00, 0x78, 0x51, 0xcc, 0x2c, 0x5d, 0x00, 0x7e, 0x80,
+ 0xc4, 0x01, 0x0d, 0x00, 0x78, 0x71, 0xc5, 0x33, 0x81, 0x00, 0x7e, 0x92,
+ 0x02, 0x51, 0x12, 0xc7, 0x73, 0xf8, 0x00, 0x79, 0xa9, 0xca, 0xa8, 0xe4,
+ 0x00, 0x79, 0xb8, 0xc8, 0x33, 0x83, 0x00, 0x78, 0x79, 0xc7, 0xc8, 0xd3,
+ 0x00, 0x79, 0xc8, 0x83, 0x00, 0x7a, 0x01, 0xc2, 0x01, 0x0e, 0x00, 0x7a,
+ 0x09, 0xc3, 0x1d, 0x55, 0x00, 0x7b, 0x49, 0xc2, 0x06, 0x6b, 0x00, 0x7b,
+ 0x58, 0x83, 0x00, 0x7a, 0x11, 0xc2, 0x01, 0x0e, 0x00, 0x7a, 0x18, 0xc2,
+ 0x07, 0x69, 0x00, 0x7a, 0x21, 0xc2, 0x1a, 0x36, 0x00, 0x7a, 0x49, 0xc2,
+ 0x01, 0x01, 0x00, 0x7a, 0x71, 0x83, 0x00, 0x7a, 0x98, 0x83, 0x00, 0x7a,
+ 0x29, 0xc2, 0x01, 0x0e, 0x00, 0x7a, 0x30, 0x16, 0xc2, 0x51, 0x18, 0x83,
+ 0x00, 0x7a, 0x79, 0xc2, 0x01, 0x0e, 0x00, 0x7a, 0x81, 0x15, 0x42, 0x51,
+ 0x22, 0x06, 0xc2, 0x51, 0x2c, 0x83, 0x00, 0x7a, 0x89, 0xc2, 0x01, 0x0e,
+ 0x00, 0x7a, 0x91, 0x1c, 0x42, 0x51, 0x36, 0x83, 0x00, 0x7a, 0xa1, 0xc2,
+ 0x01, 0x0e, 0x00, 0x7a, 0xa8, 0x83, 0x00, 0x7a, 0xb1, 0xc2, 0x01, 0x0e,
+ 0x00, 0x7a, 0xb8, 0xc2, 0x01, 0x0e, 0x00, 0x7a, 0xf1, 0x83, 0x00, 0x7a,
+ 0xf8, 0x83, 0x00, 0x7b, 0x11, 0xc2, 0x00, 0x9a, 0x00, 0x7b, 0x60, 0xc2,
+ 0x01, 0x0e, 0x00, 0x7b, 0x21, 0xc2, 0x0e, 0xe5, 0x00, 0x7b, 0x29, 0x83,
+ 0x00, 0x7b, 0x30, 0xc2, 0x0a, 0x20, 0x00, 0x79, 0x59, 0xc4, 0x05, 0xde,
+ 0x00, 0x79, 0x60, 0xc3, 0x08, 0xde, 0x00, 0x79, 0x69, 0xc3, 0x0d, 0x8f,
+ 0x00, 0x79, 0x70, 0xc2, 0x22, 0x45, 0x00, 0x79, 0x79, 0xc4, 0x15, 0xa7,
+ 0x00, 0x79, 0x80, 0x94, 0x00, 0x7b, 0xb8, 0x8e, 0x00, 0x7b, 0xc8, 0x84,
+ 0x01, 0x69, 0x8b, 0x02, 0x51, 0x40, 0x89, 0x01, 0x69, 0x9b, 0x02, 0x51,
+ 0x44, 0x8c, 0x01, 0x69, 0xb1, 0x86, 0x01, 0x69, 0xbb, 0x02, 0x51, 0x4b,
+ 0x88, 0x01, 0x69, 0xe1, 0x8d, 0x01, 0x69, 0xeb, 0x02, 0x51, 0x56, 0x8a,
+ 0x01, 0x6a, 0x03, 0x02, 0x51, 0x5d, 0x83, 0x01, 0x6a, 0x21, 0x93, 0x01,
+ 0x6a, 0x39, 0x9c, 0x01, 0x6b, 0x1b, 0x02, 0x51, 0x61, 0x8e, 0x01, 0x6a,
+ 0x69, 0x8f, 0x01, 0x6a, 0x71, 0x90, 0x01, 0x6a, 0x79, 0x92, 0x01, 0x6a,
+ 0x91, 0x94, 0x01, 0x6a, 0xa3, 0x02, 0x51, 0x69, 0x95, 0x01, 0x6a, 0xcb,
+ 0x02, 0x51, 0x6d, 0x96, 0x01, 0x6a, 0xe3, 0x02, 0x51, 0x75, 0xc2, 0x11,
+ 0x3f, 0x01, 0x6a, 0xf1, 0x98, 0x01, 0x6b, 0x01, 0x99, 0x01, 0x6b, 0x09,
+ 0x9b, 0x01, 0x6b, 0x10, 0x9b, 0x01, 0x69, 0xd8, 0x8d, 0x01, 0x69, 0xf3,
+ 0x02, 0x51, 0x7d, 0x8a, 0x01, 0x6a, 0x11, 0x93, 0x01, 0x6a, 0x41, 0xc2,
+ 0x23, 0x6a, 0x01, 0x6a, 0x61, 0x09, 0xc2, 0x51, 0x81, 0xc2, 0x00, 0x56,
+ 0x01, 0x6a, 0x88, 0x44, 0x00, 0x36, 0xc2, 0x51, 0x89, 0x45, 0x0b, 0x2b,
+ 0xc2, 0x51, 0x9f, 0x42, 0x01, 0xa7, 0xc2, 0x51, 0xf3, 0xc3, 0x2f, 0x22,
+ 0x00, 0x37, 0x31, 0xc3, 0x7c, 0xad, 0x00, 0x37, 0x29, 0xc5, 0x4c, 0x81,
+ 0x00, 0x30, 0xd1, 0xc5, 0x53, 0x6d, 0x00, 0x30, 0xc8, 0xc3, 0x2c, 0xe3,
+ 0x00, 0x32, 0x93, 0x02, 0x51, 0xff, 0xd8, 0x22, 0xac, 0x00, 0x44, 0xe9,
+ 0xcc, 0x8d, 0x6c, 0x00, 0x32, 0xb0, 0x4a, 0xa6, 0x82, 0xc2, 0x52, 0x03,
+ 0xc4, 0x01, 0xbd, 0x07, 0xdd, 0xf9, 0x16, 0xc2, 0x52, 0x0f, 0x42, 0x00,
+ 0x68, 0xc2, 0x52, 0x1b, 0x4a, 0x39, 0xbf, 0xc2, 0x52, 0x27, 0xcb, 0x98,
+ 0x2f, 0x07, 0xde, 0x10, 0x15, 0xc2, 0x52, 0x33, 0xc9, 0xaf, 0x55, 0x00,
+ 0x30, 0xa1, 0x42, 0x00, 0x9a, 0xc2, 0x52, 0x3d, 0xcf, 0x68, 0xd9, 0x00,
+ 0x30, 0x89, 0xc5, 0xe0, 0xa8, 0x00, 0x30, 0x78, 0x00, 0x42, 0x52, 0x49,
+ 0x0c, 0xc2, 0x52, 0x55, 0x0a, 0xc2, 0x52, 0x61, 0x15, 0xc2, 0x52, 0x6d,
+ 0x4b, 0x91, 0x7b, 0xc2, 0x52, 0x81, 0x03, 0xc2, 0x52, 0x99, 0x16, 0xc2,
+ 0x52, 0xaf, 0x49, 0xb7, 0x98, 0xc2, 0x52, 0xbd, 0x4a, 0x63, 0xde, 0xc2,
+ 0x52, 0xf1, 0x0d, 0xc2, 0x53, 0x25, 0x49, 0x0e, 0x1c, 0xc2, 0x53, 0x31,
+ 0x13, 0xc2, 0x53, 0x53, 0x49, 0xae, 0x23, 0xc2, 0x53, 0x5d, 0x04, 0xc2,
+ 0x53, 0x81, 0x14, 0xc2, 0x53, 0x8d, 0x0f, 0xc2, 0x53, 0x97, 0x4e, 0x75,
+ 0x64, 0xc2, 0x53, 0xa3, 0x49, 0xb5, 0x73, 0xc2, 0x53, 0xad, 0x56, 0x2f,
+ 0x13, 0xc2, 0x53, 0xd7, 0xd6, 0x30, 0xf7, 0x07, 0xef, 0xc0, 0x4d, 0x7e,
+ 0x74, 0xc2, 0x53, 0xdd, 0x45, 0x02, 0x01, 0x42, 0x53, 0xe9, 0x4a, 0xa3,
+ 0x26, 0xc2, 0x54, 0x6a, 0xcc, 0x2a, 0x11, 0x00, 0x46, 0x88, 0xd4, 0x3e,
+ 0xf6, 0x00, 0x47, 0xf9, 0xcb, 0x3e, 0xff, 0x00, 0x32, 0xc0, 0xc7, 0xc7,
+ 0x44, 0x00, 0x44, 0xe1, 0xc7, 0x27, 0x08, 0x00, 0x32, 0x98, 0x06, 0xc2,
+ 0x54, 0x7c, 0x03, 0xc2, 0x54, 0x84, 0xc3, 0x8c, 0x10, 0x0f, 0x70, 0x09,
+ 0xc4, 0x2f, 0xc8, 0x0f, 0x70, 0x11, 0xc3, 0x7c, 0xad, 0x0f, 0x70, 0x29,
+ 0x42, 0x05, 0x5c, 0xc2, 0x54, 0x90, 0xc3, 0x1a, 0x74, 0x0f, 0x70, 0x39,
+ 0x16, 0xc2, 0x54, 0x9a, 0xc3, 0x2f, 0x22, 0x0f, 0x70, 0x49, 0x0d, 0xc2,
+ 0x54, 0xa8, 0x0e, 0xc2, 0x54, 0xb4, 0xc4, 0x1a, 0x6a, 0x0f, 0x70, 0x61,
+ 0xc4, 0x3e, 0xff, 0x0f, 0x70, 0x69, 0x15, 0xc2, 0x54, 0xc0, 0xc3, 0x0e,
+ 0x13, 0x0f, 0x70, 0x91, 0xc3, 0x71, 0x66, 0x0f, 0x70, 0x99, 0x48, 0x10,
+ 0x79, 0xc2, 0x54, 0xd8, 0x49, 0x35, 0xe0, 0xc2, 0x55, 0x2a, 0xc3, 0xae,
+ 0x23, 0x0f, 0x70, 0x81, 0xc5, 0x91, 0x7b, 0x0f, 0x70, 0xd8, 0xc3, 0x00,
+ 0xec, 0x00, 0x32, 0x7b, 0x02, 0x55, 0x36, 0xcc, 0x8d, 0x84, 0x00, 0x30,
+ 0x68, 0xd6, 0x2f, 0xef, 0x00, 0x47, 0xdb, 0x02, 0x55, 0x43, 0xc7, 0xc7,
+ 0xde, 0x00, 0x44, 0xf0, 0xc5, 0x03, 0x50, 0x00, 0x47, 0xc3, 0x02, 0x55,
+ 0x49, 0xc5, 0x00, 0x34, 0x00, 0x47, 0xd0, 0xce, 0x74, 0x76, 0x00, 0x44,
+ 0x41, 0x9b, 0x00, 0x30, 0x40, 0xe0, 0x0b, 0x27, 0x00, 0x37, 0x60, 0xce,
+ 0x74, 0x4c, 0x00, 0x47, 0xb1, 0xcd, 0x05, 0x3a, 0x07, 0xf3, 0xd1, 0xcb,
+ 0x6a, 0x72, 0x07, 0xf3, 0xd8, 0xce, 0x05, 0x39, 0x07, 0xf3, 0xa0, 0x00,
+ 0xc2, 0x55, 0x4f, 0xc3, 0x06, 0x26, 0x00, 0x32, 0x5a, 0x02, 0x55, 0x61,
+ 0x45, 0xe3, 0x96, 0xc2, 0x55, 0x67, 0x49, 0x0b, 0x79, 0xc2, 0x55, 0x73,
+ 0x48, 0x0b, 0xf4, 0x42, 0x55, 0x7f, 0xc5, 0x1a, 0x7f, 0x00, 0x32, 0x03,
+ 0x02, 0x55, 0x8b, 0xcb, 0x95, 0xa6, 0x07, 0xf3, 0x98, 0xc5, 0x4c, 0x81,
+ 0x00, 0x47, 0x33, 0x02, 0x55, 0x91, 0xc5, 0x53, 0x6d, 0x00, 0x47, 0x2b,
+ 0x02, 0x55, 0x97, 0xc5, 0x6a, 0xd2, 0x00, 0x47, 0x22, 0x02, 0x55, 0x9d,
+ 0xc5, 0x03, 0x50, 0x00, 0x32, 0xa1, 0xc5, 0x00, 0x34, 0x00, 0x32, 0xa8,
+ 0xce, 0x76, 0x52, 0x00, 0x44, 0x81, 0xcf, 0x69, 0x15, 0x00, 0x30, 0x70,
+ 0xc9, 0x0d, 0xd7, 0x00, 0x32, 0xe1, 0xd6, 0x31, 0xbd, 0x00, 0x32, 0xd9,
+ 0xcd, 0x31, 0xc6, 0x00, 0x32, 0xd0, 0xc9, 0x0b, 0x2b, 0x00, 0x37, 0x59,
+ 0xc8, 0xbc, 0xeb, 0x00, 0x37, 0x50, 0xc4, 0x44, 0xff, 0x00, 0x36, 0xe9,
+ 0xc9, 0x5e, 0xd6, 0x00, 0x30, 0xe8, 0xc4, 0x15, 0xa7, 0x00, 0x33, 0x39,
+ 0xc2, 0x22, 0x45, 0x00, 0x33, 0x30, 0xc3, 0x0d, 0x8f, 0x00, 0x33, 0x29,
+ 0xc3, 0x08, 0xde, 0x00, 0x33, 0x20, 0xc4, 0x05, 0xde, 0x00, 0x33, 0x19,
+ 0xc2, 0x0a, 0x20, 0x00, 0x33, 0x10, 0xc3, 0xec, 0x00, 0x07, 0xd8, 0xb9,
+ 0xc3, 0x01, 0xcd, 0x07, 0xd8, 0xa9, 0xc3, 0x38, 0xf1, 0x07, 0xd8, 0xa1,
+ 0xc3, 0x27, 0x91, 0x07, 0xd8, 0x98, 0xcc, 0x26, 0x18, 0x00, 0x2c, 0x41,
+ 0xc2, 0x00, 0x6a, 0x00, 0x2c, 0x10, 0x8a, 0x00, 0x2c, 0x21, 0x90, 0x00,
+ 0x2b, 0x78, 0xc3, 0xec, 0x66, 0x00, 0x2c, 0x19, 0xc2, 0x08, 0x86, 0x00,
+ 0x2b, 0xd0, 0x91, 0x00, 0x2c, 0x09, 0x0a, 0xc2, 0x55, 0xa3, 0x83, 0x00,
+ 0x2b, 0x70, 0xc2, 0x08, 0x86, 0x00, 0x2c, 0x01, 0x83, 0x00, 0x2b, 0xe0,
+ 0xc3, 0xbf, 0x60, 0x00, 0x2b, 0xf9, 0x91, 0x00, 0x2b, 0x49, 0xc9, 0xaa,
+ 0xcc, 0x00, 0x2b, 0x00, 0xc2, 0x03, 0x86, 0x00, 0x2b, 0xf1, 0x91, 0x00,
+ 0x2b, 0xc0, 0xc2, 0x08, 0x86, 0x00, 0x2b, 0xe9, 0xc2, 0x01, 0x0e, 0x00,
+ 0x2b, 0xb8, 0xc3, 0x3b, 0x5c, 0x00, 0x2b, 0xd9, 0x83, 0x00, 0x2b, 0x88,
+ 0xc3, 0x01, 0x0e, 0x00, 0x2b, 0x91, 0xc2, 0x07, 0x6e, 0x00, 0x2b, 0x18,
+ 0xc2, 0x00, 0x5b, 0x00, 0x2b, 0x51, 0x83, 0x00, 0x2b, 0x30, 0x96, 0x00,
+ 0x2b, 0x41, 0x8a, 0x00, 0x2b, 0x39, 0xc2, 0x11, 0x3f, 0x00, 0x2b, 0x28,
+ 0x8a, 0x00, 0x2a, 0xa1, 0x90, 0x00, 0x29, 0xf8, 0xc3, 0xec, 0x66, 0x00,
+ 0x2a, 0x99, 0xc2, 0x08, 0x86, 0x00, 0x2a, 0x50, 0xc2, 0x00, 0x6a, 0x00,
+ 0x2a, 0x90, 0x91, 0x00, 0x2a, 0x89, 0x0a, 0xc2, 0x55, 0xad, 0x83, 0x00,
+ 0x29, 0xf0, 0xc2, 0x08, 0x86, 0x00, 0x2a, 0x81, 0x83, 0x00, 0x2a, 0x60,
+ 0xc3, 0xbf, 0x60, 0x00, 0x2a, 0x79, 0x91, 0x00, 0x29, 0xc8, 0xc2, 0x03,
+ 0x86, 0x00, 0x2a, 0x71, 0x91, 0x00, 0x2a, 0x40, 0xc2, 0x08, 0x86, 0x00,
+ 0x2a, 0x69, 0xc2, 0x01, 0x0e, 0x00, 0x2a, 0x38, 0xc3, 0x3b, 0x5c, 0x00,
+ 0x2a, 0x59, 0x83, 0x00, 0x2a, 0x08, 0xc3, 0x01, 0x0e, 0x00, 0x2a, 0x11,
+ 0xc2, 0x07, 0x6e, 0x00, 0x29, 0x98, 0xc2, 0x00, 0x5b, 0x00, 0x29, 0xd1,
+ 0x83, 0x00, 0x29, 0xb0, 0x96, 0x00, 0x29, 0xc1, 0x8a, 0x00, 0x29, 0xb9,
+ 0xc2, 0x11, 0x3f, 0x00, 0x29, 0xa8, 0xc4, 0x14, 0x91, 0x0f, 0x48, 0x09,
+ 0xc2, 0x01, 0x0e, 0x0f, 0x48, 0x68, 0x83, 0x0f, 0x48, 0x21, 0xc2, 0x00,
+ 0x5b, 0x0f, 0x48, 0x38, 0xc9, 0xb0, 0x3f, 0x0f, 0x48, 0x29, 0xc2, 0x01,
+ 0x0e, 0x0f, 0x49, 0x08, 0xc2, 0x00, 0x5b, 0x0f, 0x48, 0x71, 0x83, 0x0f,
+ 0x48, 0x90, 0xc2, 0x0b, 0xfd, 0x0f, 0x48, 0x81, 0xc2, 0x1a, 0x36, 0x0f,
+ 0x48, 0xc9, 0xc2, 0x01, 0x0e, 0x0f, 0x48, 0xd8, 0xc2, 0x0e, 0x14, 0x0f,
+ 0x48, 0x89, 0xc2, 0x01, 0x0e, 0x0f, 0x48, 0xe9, 0xc2, 0x01, 0x59, 0x0f,
+ 0x49, 0x00, 0x83, 0x0f, 0x48, 0xc1, 0xc2, 0x00, 0x34, 0x0f, 0x48, 0xf0,
+ 0x87, 0x0f, 0xbb, 0x29, 0xc3, 0x82, 0xa4, 0x0f, 0xbb, 0x31, 0xc3, 0x82,
+ 0xec, 0x0f, 0xbb, 0x39, 0xc3, 0x82, 0xe0, 0x0f, 0xbb, 0x41, 0xc3, 0x3b,
+ 0x0b, 0x0f, 0xbb, 0x48, 0xc4, 0x1d, 0x17, 0x0f, 0xb8, 0xf1, 0x45, 0x4e,
+ 0x6c, 0x42, 0x55, 0xb7, 0xc3, 0x82, 0xa4, 0x0f, 0xbb, 0x11, 0x87, 0x0f,
+ 0xbb, 0x08, 0xc8, 0xbf, 0xb3, 0x0f, 0xba, 0xd0, 0xc3, 0xec, 0x93, 0x0f,
+ 0xb9, 0xa9, 0x95, 0x0f, 0xb9, 0x93, 0x02, 0x55, 0xc1, 0xc3, 0xec, 0xb1,
+ 0x0f, 0xb9, 0x99, 0xc3, 0xec, 0x96, 0x0f, 0xb9, 0xa0, 0x94, 0x0f, 0xb9,
+ 0xd9, 0xc3, 0xed, 0x3e, 0x0f, 0xb9, 0xe0, 0x94, 0x0f, 0xb8, 0x53, 0x02,
+ 0x55, 0xc7, 0xc3, 0xed, 0x3e, 0x0f, 0xb8, 0x59, 0xc3, 0xed, 0x3b, 0x0f,
+ 0xb8, 0x60, 0x8e, 0x0f, 0xb8, 0xb1, 0xc3, 0xec, 0x33, 0x0f, 0xb8, 0xb8,
+ 0x45, 0x3a, 0x72, 0xc2, 0x55, 0xcd, 0xc3, 0x00, 0xe4, 0x0f, 0xba, 0xf0,
+ 0x44, 0x03, 0x72, 0x42, 0x55, 0xdf, 0x8f, 0x0f, 0xba, 0xe1, 0xc3, 0xec,
+ 0xc6, 0x0f, 0xba, 0xe8, 0x44, 0x25, 0x14, 0xc2, 0x55, 0xeb, 0x44, 0xe3,
+ 0xff, 0x42, 0x55, 0xf5, 0x8b, 0x0f, 0xb9, 0xb1, 0xc3, 0xed, 0xa7, 0x0f,
+ 0xb9, 0xb8, 0x9f, 0x0a, 0x21, 0xd1, 0x9e, 0x0a, 0x21, 0xc9, 0x9d, 0x0a,
+ 0x21, 0xc1, 0xa0, 0x0a, 0x21, 0xd9, 0xa1, 0x0a, 0x21, 0xe1, 0xa2, 0x0a,
+ 0x21, 0xe9, 0xa3, 0x0a, 0x21, 0xf1, 0xa4, 0x0a, 0x21, 0xf9, 0xa5, 0x0a,
+ 0x22, 0x01, 0xa6, 0x0a, 0x22, 0x08, 0xa6, 0x0a, 0x21, 0xb9, 0xa5, 0x0a,
+ 0x21, 0xb1, 0xa4, 0x0a, 0x21, 0xa9, 0xa3, 0x0a, 0x21, 0x93, 0x02, 0x55,
+ 0xff, 0xa2, 0x0a, 0x21, 0x83, 0x02, 0x56, 0x07, 0xa1, 0x0a, 0x21, 0x79,
+ 0xa0, 0x0a, 0x21, 0x71, 0x9f, 0x0a, 0x21, 0x69, 0x9e, 0x0a, 0x21, 0x5b,
+ 0x02, 0x56, 0x0b, 0x9d, 0x0a, 0x21, 0x50, 0xa6, 0x0a, 0x21, 0x43, 0x02,
+ 0x56, 0x0f, 0xa5, 0x0a, 0x21, 0x39, 0xa4, 0x0a, 0x21, 0x31, 0xa3, 0x0a,
+ 0x21, 0x29, 0xa2, 0x0a, 0x21, 0x21, 0xa1, 0x0a, 0x21, 0x19, 0xa0, 0x0a,
+ 0x21, 0x11, 0x9f, 0x0a, 0x21, 0x09, 0x9e, 0x0a, 0x21, 0x01, 0x9d, 0x0a,
+ 0x20, 0xf8, 0xa6, 0x0a, 0x20, 0xf1, 0xa5, 0x0a, 0x20, 0xe9, 0xa4, 0x0a,
+ 0x20, 0xe1, 0xa3, 0x0a, 0x20, 0xd3, 0x02, 0x56, 0x13, 0xa2, 0x0a, 0x20,
+ 0xc9, 0xa1, 0x0a, 0x20, 0xc1, 0xa0, 0x0a, 0x20, 0xb9, 0x9f, 0x0a, 0x20,
+ 0xb1, 0x9e, 0x0a, 0x20, 0xa9, 0x9d, 0x0a, 0x20, 0xa0, 0xa6, 0x0a, 0x20,
+ 0x99, 0xa5, 0x0a, 0x20, 0x91, 0xa4, 0x0a, 0x20, 0x89, 0xa3, 0x0a, 0x20,
+ 0x81, 0xa2, 0x0a, 0x20, 0x79, 0xa1, 0x0a, 0x20, 0x71, 0xa0, 0x0a, 0x20,
+ 0x69, 0x9f, 0x0a, 0x20, 0x61, 0x9e, 0x0a, 0x20, 0x59, 0x9d, 0x0a, 0x20,
+ 0x4a, 0x02, 0x56, 0x17, 0xa6, 0x0a, 0x20, 0x41, 0xa5, 0x0a, 0x20, 0x39,
+ 0xa4, 0x0a, 0x20, 0x31, 0xa3, 0x0a, 0x20, 0x29, 0xa2, 0x0a, 0x20, 0x21,
+ 0xa1, 0x0a, 0x20, 0x19, 0xa0, 0x0a, 0x20, 0x11, 0x9f, 0x0a, 0x20, 0x09,
+ 0x9e, 0x0a, 0x20, 0x00, 0x9d, 0x0a, 0x22, 0x11, 0x9e, 0x0a, 0x22, 0x19,
+ 0x9f, 0x0a, 0x22, 0x21, 0xa0, 0x0a, 0x22, 0x29, 0xa1, 0x0a, 0x22, 0x31,
+ 0xa2, 0x0a, 0x22, 0x39, 0xa3, 0x0a, 0x22, 0x43, 0x02, 0x56, 0x1b, 0xa4,
+ 0x0a, 0x22, 0x61, 0xa5, 0x0a, 0x22, 0x69, 0xa6, 0x0a, 0x22, 0x70, 0x9d,
+ 0x0a, 0x22, 0x79, 0x9e, 0x0a, 0x22, 0x81, 0x9f, 0x0a, 0x22, 0x89, 0xa0,
+ 0x0a, 0x22, 0x91, 0xa1, 0x0a, 0x22, 0x99, 0xa2, 0x0a, 0x22, 0xa1, 0xa3,
+ 0x0a, 0x22, 0xa9, 0xa4, 0x0a, 0x22, 0xb1, 0xa5, 0x0a, 0x22, 0xb9, 0xa6,
+ 0x0a, 0x22, 0xc0, 0x9d, 0x0a, 0x22, 0xc9, 0x9e, 0x0a, 0x22, 0xd1, 0x9f,
+ 0x0a, 0x22, 0xd9, 0xa0, 0x0a, 0x22, 0xe1, 0xa1, 0x0a, 0x22, 0xe9, 0xa2,
+ 0x0a, 0x22, 0xf1, 0xa3, 0x0a, 0x22, 0xf9, 0xa4, 0x0a, 0x23, 0x01, 0xa5,
+ 0x0a, 0x23, 0x09, 0xa6, 0x0a, 0x23, 0x10, 0x9d, 0x0a, 0x23, 0x19, 0x9e,
+ 0x0a, 0x23, 0x21, 0x9f, 0x0a, 0x23, 0x29, 0xa0, 0x0a, 0x23, 0x31, 0xa1,
+ 0x0a, 0x23, 0x39, 0xa2, 0x0a, 0x23, 0x41, 0xa3, 0x0a, 0x23, 0x49, 0xa4,
+ 0x0a, 0x23, 0x53, 0x02, 0x56, 0x27, 0xa5, 0x0a, 0x23, 0x63, 0x02, 0x56,
+ 0x2b, 0xa6, 0x0a, 0x23, 0x70, 0x9d, 0x0a, 0x23, 0x7b, 0x02, 0x56, 0x2f,
+ 0x9e, 0x0a, 0x23, 0x8b, 0x02, 0x56, 0x33, 0x9f, 0x0a, 0x23, 0x9b, 0x02,
+ 0x56, 0x37, 0xa0, 0x0a, 0x23, 0xa9, 0xa1, 0x0a, 0x23, 0xb3, 0x02, 0x56,
+ 0x3b, 0xa2, 0x0a, 0x23, 0xd3, 0x02, 0x56, 0x47, 0xa3, 0x0a, 0x23, 0xe9,
+ 0xa4, 0x0a, 0x23, 0xf3, 0x02, 0x56, 0x4f, 0xa5, 0x0a, 0x24, 0x11, 0xa6,
+ 0x0a, 0x24, 0x18, 0x9d, 0x0a, 0x24, 0x23, 0x02, 0x56, 0x5b, 0x9e, 0x0a,
+ 0x24, 0x39, 0x9f, 0x0a, 0x24, 0x41, 0xa0, 0x0a, 0x24, 0x49, 0xa1, 0x0a,
+ 0x24, 0x51, 0xa2, 0x0a, 0x24, 0x5b, 0x02, 0x56, 0x63, 0xa3, 0x0a, 0x24,
+ 0x69, 0xa4, 0x0a, 0x24, 0x71, 0xa5, 0x0a, 0x24, 0x79, 0xa6, 0x0a, 0x24,
+ 0x80, 0x9d, 0x0a, 0x24, 0x89, 0x9e, 0x0a, 0x24, 0x91, 0x9f, 0x0a, 0x24,
+ 0x99, 0xa0, 0x0a, 0x24, 0xa1, 0xa1, 0x0a, 0x24, 0xa9, 0xa2, 0x0a, 0x24,
+ 0xb3, 0x02, 0x56, 0x67, 0xa3, 0x0a, 0x24, 0xc1, 0xa4, 0x0a, 0x24, 0xc9,
+ 0xa5, 0x0a, 0x24, 0xd1, 0xa6, 0x0a, 0x24, 0xd8, 0x9d, 0x0a, 0x24, 0xe1,
+ 0x9e, 0x0a, 0x24, 0xe9, 0x9f, 0x0a, 0x24, 0xf1, 0xa0, 0x0a, 0x24, 0xf9,
+ 0xa1, 0x0a, 0x25, 0x01, 0xa2, 0x0a, 0x25, 0x0b, 0x02, 0x56, 0x6b, 0xa3,
+ 0x0a, 0x25, 0x19, 0xa4, 0x0a, 0x25, 0x21, 0xa5, 0x0a, 0x25, 0x29, 0xa6,
+ 0x0a, 0x25, 0x30, 0x9d, 0x0a, 0x25, 0x39, 0x9e, 0x0a, 0x25, 0x41, 0x9f,
+ 0x0a, 0x25, 0x49, 0xa0, 0x0a, 0x25, 0x51, 0xa1, 0x0a, 0x25, 0x59, 0xa2,
+ 0x0a, 0x25, 0x61, 0xa3, 0x0a, 0x25, 0x69, 0xa4, 0x0a, 0x25, 0x71, 0xa5,
+ 0x0a, 0x25, 0x79, 0xa6, 0x0a, 0x25, 0x80, 0x9d, 0x0a, 0x25, 0x89, 0x9e,
+ 0x0a, 0x25, 0x91, 0x9f, 0x0a, 0x25, 0x99, 0xa0, 0x0a, 0x25, 0xa1, 0xa1,
+ 0x0a, 0x25, 0xa9, 0xa2, 0x0a, 0x25, 0xb1, 0xa3, 0x0a, 0x25, 0xb9, 0xa4,
+ 0x0a, 0x25, 0xc1, 0xa5, 0x0a, 0x25, 0xc9, 0xa6, 0x0a, 0x25, 0xd0, 0x9d,
+ 0x0a, 0x25, 0xd9, 0x9e, 0x0a, 0x25, 0xe1, 0x9f, 0x0a, 0x25, 0xe9, 0xa0,
+ 0x0a, 0x25, 0xf1, 0xa1, 0x0a, 0x25, 0xf9, 0xa2, 0x0a, 0x26, 0x01, 0xa3,
+ 0x0a, 0x26, 0x09, 0xa4, 0x0a, 0x26, 0x11, 0xa5, 0x0a, 0x26, 0x19, 0xa6,
+ 0x0a, 0x26, 0x20, 0x9d, 0x0a, 0x26, 0x29, 0x9e, 0x0a, 0x26, 0x31, 0x9f,
+ 0x0a, 0x26, 0x39, 0xa0, 0x0a, 0x26, 0x41, 0xa1, 0x0a, 0x26, 0x49, 0xa2,
+ 0x0a, 0x26, 0x51, 0xa3, 0x0a, 0x26, 0x59, 0xa4, 0x0a, 0x26, 0x61, 0xa5,
+ 0x0a, 0x26, 0x69, 0xa6, 0x0a, 0x26, 0x70, 0x9d, 0x0a, 0x26, 0x79, 0x9e,
+ 0x0a, 0x26, 0x81, 0x9f, 0x0a, 0x26, 0x89, 0xa0, 0x0a, 0x26, 0x91, 0xa1,
+ 0x0a, 0x26, 0x99, 0xa2, 0x0a, 0x26, 0xa1, 0xa3, 0x0a, 0x26, 0xa9, 0xa4,
+ 0x0a, 0x26, 0xb1, 0xa5, 0x0a, 0x26, 0xb9, 0xa6, 0x0a, 0x26, 0xc0, 0x9d,
+ 0x0a, 0x26, 0xc9, 0x9e, 0x0a, 0x26, 0xd1, 0x9f, 0x0a, 0x26, 0xd9, 0xa0,
+ 0x0a, 0x26, 0xe1, 0xa1, 0x0a, 0x26, 0xe9, 0xa2, 0x0a, 0x26, 0xf1, 0xa3,
+ 0x0a, 0x26, 0xf9, 0xa4, 0x0a, 0x27, 0x01, 0xa5, 0x0a, 0x27, 0x09, 0xa6,
+ 0x0a, 0x27, 0x10, 0x9d, 0x0a, 0x27, 0x19, 0x9e, 0x0a, 0x27, 0x21, 0x9f,
+ 0x0a, 0x27, 0x2b, 0x02, 0x56, 0x6f, 0xa0, 0x0a, 0x27, 0x41, 0xa1, 0x0a,
+ 0x27, 0x49, 0xa2, 0x0a, 0x27, 0x51, 0xa3, 0x0a, 0x27, 0x59, 0xa4, 0x0a,
+ 0x27, 0x63, 0x02, 0x56, 0x77, 0xa5, 0x0a, 0x27, 0x71, 0xa6, 0x0a, 0x27,
+ 0x7a, 0x02, 0x56, 0x7b, 0x9d, 0x0a, 0x27, 0x89, 0x9e, 0x0a, 0x27, 0x91,
+ 0x9f, 0x0a, 0x27, 0x99, 0xa0, 0x0a, 0x27, 0xa1, 0xa1, 0x0a, 0x27, 0xa9,
+ 0xa2, 0x0a, 0x27, 0xb3, 0x02, 0x56, 0x7f, 0xa3, 0x0a, 0x27, 0xc3, 0x02,
+ 0x56, 0x83, 0xa4, 0x0a, 0x27, 0xd1, 0xa5, 0x0a, 0x27, 0xd9, 0xa6, 0x0a,
+ 0x27, 0xe0, 0x9d, 0x0a, 0x27, 0xe9, 0x9e, 0x0a, 0x27, 0xf1, 0x9f, 0x0a,
+ 0x27, 0xf9, 0xa0, 0x0a, 0x28, 0x01, 0xa1, 0x0a, 0x28, 0x09, 0xa2, 0x0a,
+ 0x28, 0x11, 0xa3, 0x0a, 0x28, 0x19, 0xa4, 0x0a, 0x28, 0x23, 0x02, 0x56,
+ 0x87, 0xa5, 0x0a, 0x28, 0x31, 0xa6, 0x0a, 0x28, 0x38, 0x9d, 0x0a, 0x28,
+ 0x41, 0x9e, 0x0a, 0x28, 0x49, 0x9f, 0x0a, 0x28, 0x51, 0xa0, 0x0a, 0x28,
+ 0x59, 0xa1, 0x0a, 0x28, 0x61, 0xa2, 0x0a, 0x28, 0x69, 0xa3, 0x0a, 0x28,
+ 0x71, 0xa4, 0x0a, 0x28, 0x79, 0xa5, 0x0a, 0x28, 0x81, 0xa6, 0x0a, 0x28,
+ 0x88, 0x9d, 0x0a, 0x28, 0x91, 0x9e, 0x0a, 0x28, 0x99, 0x9f, 0x0a, 0x28,
+ 0xa1, 0xa0, 0x0a, 0x28, 0xa9, 0xa1, 0x0a, 0x28, 0xb1, 0xa2, 0x0a, 0x28,
+ 0xb9, 0xa3, 0x0a, 0x28, 0xc1, 0xa4, 0x0a, 0x28, 0xc9, 0xa5, 0x0a, 0x28,
+ 0xd1, 0xa6, 0x0a, 0x28, 0xd8, 0x9d, 0x0a, 0x28, 0xe1, 0x9e, 0x0a, 0x28,
+ 0xe9, 0x9f, 0x0a, 0x28, 0xf1, 0xa0, 0x0a, 0x28, 0xf9, 0xa1, 0x0a, 0x29,
+ 0x01, 0xa2, 0x0a, 0x29, 0x09, 0xa3, 0x0a, 0x29, 0x11, 0xa4, 0x0a, 0x29,
+ 0x19, 0xa5, 0x0a, 0x29, 0x21, 0xa6, 0x0a, 0x29, 0x28, 0x9d, 0x0a, 0x29,
+ 0x31, 0x9e, 0x0a, 0x29, 0x39, 0x9f, 0x0a, 0x29, 0x41, 0xa0, 0x0a, 0x29,
+ 0x49, 0xa1, 0x0a, 0x29, 0x51, 0xa2, 0x0a, 0x29, 0x59, 0xa3, 0x0a, 0x29,
+ 0x61, 0xa4, 0x0a, 0x29, 0x6b, 0x02, 0x56, 0x8b, 0xa5, 0x0a, 0x29, 0x79,
+ 0xa6, 0x0a, 0x29, 0x80, 0x9d, 0x0a, 0x29, 0x89, 0x9e, 0x0a, 0x29, 0x91,
+ 0x9f, 0x0a, 0x29, 0x99, 0xa0, 0x0a, 0x29, 0xa1, 0xa1, 0x0a, 0x29, 0xa9,
+ 0xa2, 0x0a, 0x29, 0xb1, 0xa3, 0x0a, 0x29, 0xb9, 0xa4, 0x0a, 0x29, 0xc1,
+ 0xa5, 0x0a, 0x29, 0xc9, 0xa6, 0x0a, 0x29, 0xd0, 0x9d, 0x0a, 0x29, 0xd9,
+ 0x9e, 0x0a, 0x29, 0xe1, 0x9f, 0x0a, 0x29, 0xe9, 0xa0, 0x0a, 0x29, 0xf1,
+ 0xa1, 0x0a, 0x29, 0xf9, 0xa2, 0x0a, 0x2a, 0x01, 0xa3, 0x0a, 0x2a, 0x09,
+ 0xa4, 0x0a, 0x2a, 0x11, 0xa5, 0x0a, 0x2a, 0x19, 0xa6, 0x0a, 0x2a, 0x22,
+ 0x02, 0x56, 0x8f, 0x9d, 0x0a, 0x2a, 0x31, 0x9e, 0x0a, 0x2a, 0x39, 0x9f,
+ 0x0a, 0x2a, 0x41, 0xa0, 0x0a, 0x2a, 0x49, 0xa1, 0x0a, 0x2a, 0x53, 0x02,
+ 0x56, 0x93, 0xa2, 0x0a, 0x2a, 0x61, 0xa3, 0x0a, 0x2a, 0x69, 0xa4, 0x0a,
+ 0x2a, 0x71, 0xa5, 0x0a, 0x2a, 0x79, 0xa6, 0x0a, 0x2a, 0x82, 0x02, 0x56,
+ 0x97, 0x9d, 0x0a, 0x2a, 0x91, 0x9e, 0x0a, 0x2a, 0x99, 0x9f, 0x0a, 0x2a,
+ 0xa1, 0xa0, 0x0a, 0x2a, 0xa9, 0xa1, 0x0a, 0x2a, 0xb1, 0xa2, 0x0a, 0x2a,
+ 0xb9, 0xa3, 0x0a, 0x2a, 0xc1, 0xa4, 0x0a, 0x2a, 0xc9, 0xa5, 0x0a, 0x2a,
+ 0xd1, 0xa6, 0x0a, 0x2a, 0xda, 0x02, 0x56, 0x9b, 0x9d, 0x0a, 0x2a, 0xe9,
+ 0x9e, 0x0a, 0x2a, 0xf1, 0x9f, 0x0a, 0x2a, 0xf9, 0xa0, 0x0a, 0x2b, 0x01,
+ 0xa1, 0x0a, 0x2b, 0x09, 0xa2, 0x0a, 0x2b, 0x11, 0xa3, 0x0a, 0x2b, 0x19,
+ 0xa4, 0x0a, 0x2b, 0x21, 0xa5, 0x0a, 0x2b, 0x29, 0xa6, 0x0a, 0x2b, 0x30,
+ 0x9d, 0x0a, 0x2b, 0x39, 0x9e, 0x0a, 0x2b, 0x41, 0x9f, 0x0a, 0x2b, 0x49,
+ 0xa0, 0x0a, 0x2b, 0x51, 0xa1, 0x0a, 0x2b, 0x59, 0xa2, 0x0a, 0x2b, 0x61,
+ 0xa3, 0x0a, 0x2b, 0x69, 0xa4, 0x0a, 0x2b, 0x71, 0xa5, 0x0a, 0x2b, 0x79,
+ 0xa6, 0x0a, 0x2b, 0x82, 0x02, 0x56, 0x9f, 0x9d, 0x0a, 0x2b, 0x91, 0x9e,
+ 0x0a, 0x2b, 0x99, 0x1f, 0xc2, 0x56, 0xa3, 0xa0, 0x0a, 0x2b, 0xb9, 0xa1,
+ 0x0a, 0x2b, 0xc1, 0xa2, 0x0a, 0x2b, 0xc9, 0xa3, 0x0a, 0x2b, 0xd3, 0x02,
+ 0x56, 0xaf, 0xa4, 0x0a, 0x2b, 0xf1, 0xa5, 0x0a, 0x2b, 0xf9, 0xa6, 0x0a,
+ 0x2c, 0x00, 0x9d, 0x0a, 0x2c, 0x09, 0x9e, 0x0a, 0x2c, 0x11, 0x9f, 0x0a,
+ 0x2c, 0x19, 0xa0, 0x0a, 0x2c, 0x21, 0xa1, 0x0a, 0x2c, 0x29, 0xa2, 0x0a,
+ 0x2c, 0x31, 0xa3, 0x0a, 0x2c, 0x39, 0xa4, 0x0a, 0x2c, 0x41, 0xa5, 0x0a,
+ 0x2c, 0x49, 0xa6, 0x0a, 0x2c, 0x50, 0x9d, 0x0a, 0x2c, 0x59, 0x9e, 0x0a,
+ 0x2c, 0x61, 0x9f, 0x0a, 0x2c, 0x69, 0xa0, 0x0a, 0x2c, 0x71, 0xa1, 0x0a,
+ 0x2c, 0x79, 0xa2, 0x0a, 0x2c, 0x81, 0xa3, 0x0a, 0x2c, 0x89, 0xa4, 0x0a,
+ 0x2c, 0x91, 0xa5, 0x0a, 0x2c, 0x99, 0xa6, 0x0a, 0x2c, 0xa2, 0x02, 0x56,
+ 0xbb, 0x9d, 0x0a, 0x2c, 0xb1, 0x9e, 0x0a, 0x2c, 0xb9, 0x9f, 0x0a, 0x2c,
+ 0xc1, 0xa0, 0x0a, 0x2c, 0xc9, 0xa1, 0x0a, 0x2c, 0xd3, 0x02, 0x56, 0xbf,
+ 0xa2, 0x0a, 0x2c, 0xe1, 0xa3, 0x0a, 0x2c, 0xe9, 0xa4, 0x0a, 0x2c, 0xf1,
+ 0xa5, 0x0a, 0x2c, 0xfb, 0x02, 0x56, 0xc3, 0xa6, 0x0a, 0x2d, 0x08, 0x9d,
+ 0x0a, 0x2d, 0x11, 0x9e, 0x0a, 0x2d, 0x1b, 0x02, 0x56, 0xc7, 0x9f, 0x0a,
+ 0x2d, 0x29, 0xa0, 0x0a, 0x2d, 0x31, 0xa1, 0x0a, 0x2d, 0x39, 0xa2, 0x0a,
+ 0x2d, 0x41, 0xa3, 0x0a, 0x2d, 0x49, 0xa4, 0x0a, 0x2d, 0x51, 0xa5, 0x0a,
+ 0x2d, 0x59, 0xa6, 0x0a, 0x2d, 0x60, 0x9d, 0x0a, 0x2d, 0x69, 0x9e, 0x0a,
+ 0x2d, 0x73, 0x02, 0x56, 0xcb, 0x9f, 0x0a, 0x2d, 0x81, 0x20, 0xc2, 0x56,
+ 0xcf, 0xa1, 0x0a, 0x2d, 0x99, 0xa2, 0x0a, 0x2d, 0xa1, 0xa3, 0x0a, 0x2d,
+ 0xab, 0x02, 0x56, 0xd9, 0xa4, 0x0a, 0x2d, 0xb9, 0xa5, 0x0a, 0x2d, 0xc1,
+ 0xa6, 0x0a, 0x2d, 0xc8, 0x9d, 0x0a, 0x2d, 0xd1, 0x9e, 0x0a, 0x2d, 0xd9,
+ 0x9f, 0x0a, 0x2d, 0xe1, 0xc7, 0xc8, 0x9b, 0x0a, 0x2d, 0xe9, 0xa1, 0x0a,
+ 0x2d, 0xf1, 0xa2, 0x0a, 0x2d, 0xf9, 0xa3, 0x0a, 0x2e, 0x01, 0xa4, 0x0a,
+ 0x2e, 0x09, 0xa5, 0x0a, 0x2e, 0x11, 0xa6, 0x0a, 0x2e, 0x18, 0x9d, 0x0a,
+ 0x2e, 0x21, 0x9e, 0x0a, 0x2e, 0x29, 0x9f, 0x0a, 0x2e, 0x31, 0xa0, 0x0a,
+ 0x2e, 0x39, 0xa1, 0x0a, 0x2e, 0x41, 0xa2, 0x0a, 0x2e, 0x49, 0xa3, 0x0a,
+ 0x2e, 0x51, 0xa4, 0x0a, 0x2e, 0x59, 0xa5, 0x0a, 0x2e, 0x61, 0xa6, 0x0a,
+ 0x2e, 0x68, 0x1d, 0xc2, 0x56, 0xdd, 0x9e, 0x0a, 0x2e, 0x81, 0x9f, 0x0a,
+ 0x2e, 0x89, 0xa0, 0x0a, 0x2e, 0x91, 0xa1, 0x0a, 0x2e, 0x99, 0xa2, 0x0a,
+ 0x2e, 0xa1, 0xa3, 0x0a, 0x2e, 0xa9, 0xa4, 0x0a, 0x2e, 0xb1, 0xa5, 0x0a,
+ 0x2e, 0xb9, 0xa6, 0x0a, 0x2e, 0xc0, 0x9d, 0x0a, 0x2e, 0xc9, 0x9e, 0x0a,
+ 0x2e, 0xd1, 0x9f, 0x0a, 0x2e, 0xd9, 0xa0, 0x0a, 0x2e, 0xe1, 0xa1, 0x0a,
+ 0x2e, 0xe9, 0xa2, 0x0a, 0x2e, 0xf1, 0xa3, 0x0a, 0x2e, 0xf9, 0xa4, 0x0a,
+ 0x2f, 0x01, 0xa5, 0x0a, 0x2f, 0x09, 0xa6, 0x0a, 0x2f, 0x10, 0x9d, 0x0a,
+ 0x2f, 0x19, 0x9e, 0x0a, 0x2f, 0x21, 0x9f, 0x0a, 0x2f, 0x29, 0xa0, 0x0a,
+ 0x2f, 0x31, 0xa1, 0x0a, 0x2f, 0x39, 0xa2, 0x0a, 0x2f, 0x41, 0xa3, 0x0a,
+ 0x2f, 0x49, 0xa4, 0x0a, 0x2f, 0x51, 0xa5, 0x0a, 0x2f, 0x59, 0xa6, 0x0a,
+ 0x2f, 0x60, 0x9d, 0x0a, 0x2f, 0x69, 0x9e, 0x0a, 0x2f, 0x71, 0x9f, 0x0a,
+ 0x2f, 0x79, 0xa0, 0x0a, 0x2f, 0x81, 0xa1, 0x0a, 0x2f, 0x89, 0xa2, 0x0a,
+ 0x2f, 0x91, 0xa3, 0x0a, 0x2f, 0x99, 0xa4, 0x0a, 0x2f, 0xa1, 0xa5, 0x0a,
+ 0x2f, 0xa9, 0xa6, 0x0a, 0x2f, 0xb0, 0x9d, 0x0a, 0x2f, 0xbb, 0x02, 0x56,
+ 0xe9, 0x9e, 0x0a, 0x2f, 0xc9, 0x9f, 0x0a, 0x2f, 0xd1, 0xa0, 0x0a, 0x2f,
+ 0xd9, 0xa1, 0x0a, 0x2f, 0xe1, 0xa2, 0x0a, 0x2f, 0xe9, 0xa3, 0x0a, 0x2f,
+ 0xf1, 0xa4, 0x0a, 0x2f, 0xfb, 0x02, 0x56, 0xed, 0xa5, 0x0a, 0x30, 0x09,
+ 0xa6, 0x0a, 0x30, 0x10, 0x9d, 0x0a, 0x30, 0x19, 0x9e, 0x0a, 0x30, 0x21,
+ 0x9f, 0x0a, 0x30, 0x29, 0xa0, 0x0a, 0x30, 0x31, 0xa1, 0x0a, 0x30, 0x39,
+ 0xa2, 0x0a, 0x30, 0x41, 0xa3, 0x0a, 0x30, 0x49, 0xa4, 0x0a, 0x30, 0x51,
+ 0xa5, 0x0a, 0x30, 0x59, 0xa6, 0x0a, 0x30, 0x60, 0x9d, 0x0a, 0x30, 0x69,
+ 0x9e, 0x0a, 0x30, 0x71, 0x9f, 0x0a, 0x30, 0x79, 0xa0, 0x0a, 0x30, 0x81,
+ 0xa1, 0x0a, 0x30, 0x89, 0xa2, 0x0a, 0x30, 0x91, 0xa3, 0x0a, 0x30, 0x99,
+ 0xa4, 0x0a, 0x30, 0xa1, 0xa5, 0x0a, 0x30, 0xa9, 0xa6, 0x0a, 0x30, 0xb0,
+ 0x9d, 0x0a, 0x30, 0xb9, 0x9e, 0x0a, 0x30, 0xc1, 0x9f, 0x0a, 0x30, 0xc9,
+ 0xa0, 0x0a, 0x30, 0xd1, 0xa1, 0x0a, 0x30, 0xd9, 0xa2, 0x0a, 0x30, 0xe1,
+ 0xa3, 0x0a, 0x30, 0xe9, 0xa4, 0x0a, 0x30, 0xf1, 0xa5, 0x0a, 0x30, 0xf9,
+ 0xa6, 0x0a, 0x31, 0x00, 0x9d, 0x0a, 0x31, 0x09, 0x9e, 0x0a, 0x31, 0x11,
+ 0x9f, 0x0a, 0x31, 0x19, 0xa0, 0x0a, 0x31, 0x21, 0xa1, 0x0a, 0x31, 0x29,
+ 0xa2, 0x0a, 0x31, 0x31, 0xa3, 0x0a, 0x31, 0x39, 0xa4, 0x0a, 0x31, 0x40,
+ 0x9e, 0x0a, 0x31, 0x49, 0x9f, 0x0a, 0x31, 0x51, 0xa0, 0x0a, 0x31, 0x59,
+ 0xa1, 0x0a, 0x31, 0x61, 0xa2, 0x0a, 0x31, 0x69, 0xa3, 0x0a, 0x31, 0x71,
+ 0xa4, 0x0a, 0x31, 0x79, 0xa5, 0x0a, 0x31, 0x81, 0xa6, 0x0a, 0x31, 0x88,
+ 0x9d, 0x0a, 0x31, 0x91, 0x9e, 0x0a, 0x31, 0x99, 0x9f, 0x0a, 0x31, 0xa1,
+ 0xa0, 0x0a, 0x31, 0xa9, 0xa1, 0x0a, 0x31, 0xb1, 0xa2, 0x0a, 0x31, 0xb9,
+ 0xa3, 0x0a, 0x31, 0xc1, 0xa4, 0x0a, 0x31, 0xc9, 0xa5, 0x0a, 0x31, 0xd1,
+ 0xa6, 0x0a, 0x31, 0xd8, 0x9d, 0x0a, 0x31, 0xe1, 0x9e, 0x0a, 0x31, 0xe9,
+ 0x9f, 0x0a, 0x31, 0xf1, 0xa0, 0x0a, 0x31, 0xf9, 0xa1, 0x0a, 0x32, 0x01,
+ 0xa2, 0x0a, 0x32, 0x09, 0xa3, 0x0a, 0x32, 0x11, 0xa4, 0x0a, 0x32, 0x19,
+ 0xa5, 0x0a, 0x32, 0x21, 0xa6, 0x0a, 0x32, 0x28, 0xd1, 0x03, 0xf5, 0x01,
+ 0x5b, 0x79, 0xd4, 0x3e, 0x1a, 0x01, 0x5c, 0x61, 0xd5, 0x37, 0xe7, 0x01,
+ 0x5c, 0x69, 0xd3, 0x41, 0xf8, 0x01, 0x5c, 0x71, 0xd2, 0x4c, 0x3e, 0x01,
+ 0x5c, 0x78, 0xc8, 0x2d, 0xa6, 0x01, 0x1b, 0x81, 0xc9, 0x23, 0x30, 0x01,
+ 0x1b, 0x79, 0x05, 0xc2, 0x56, 0xf1, 0x06, 0xc2, 0x56, 0xfd, 0x42, 0x02,
+ 0x6a, 0xc2, 0x57, 0x10, 0xd0, 0x01, 0x37, 0x01, 0x1a, 0x41, 0x42, 0x00,
+ 0x58, 0xc2, 0x57, 0x1c, 0xcc, 0x06, 0xfb, 0x01, 0x1a, 0x21, 0xc9, 0x02,
+ 0xde, 0x01, 0x1a, 0x11, 0xc5, 0x02, 0xe2, 0x01, 0x1a, 0x09, 0xc3, 0xba,
+ 0x10, 0x01, 0x19, 0xd9, 0xc5, 0x03, 0x62, 0x01, 0x19, 0xc0, 0xc9, 0x1e,
+ 0x89, 0x01, 0x1b, 0x09, 0xc3, 0xba, 0x10, 0x01, 0x1a, 0xa9, 0xc7, 0x79,
+ 0xb4, 0x01, 0x1a, 0x88, 0xcb, 0x98, 0x45, 0x01, 0x1b, 0x89, 0xca, 0x9c,
+ 0x19, 0x01, 0x1b, 0x31, 0x45, 0x9c, 0x24, 0x42, 0x57, 0x28, 0xc5, 0x1f,
+ 0x01, 0x01, 0x1b, 0x59, 0xc9, 0x1e, 0x89, 0x01, 0x1b, 0x11, 0xc5, 0x02,
+ 0xa2, 0x01, 0x1a, 0x90, 0xc8, 0x50, 0x0d, 0x01, 0x1a, 0xc9, 0xc5, 0x02,
+ 0xa2, 0x01, 0x1a, 0x58, 0xc2, 0x00, 0x4d, 0x01, 0x1a, 0xf9, 0xc3, 0x02,
+ 0xa4, 0x01, 0x19, 0xe8, 0xc2, 0x01, 0x89, 0x01, 0x12, 0x2b, 0x02, 0x57,
+ 0x34, 0xcb, 0x25, 0x71, 0x01, 0x53, 0x80, 0xc2, 0x04, 0x30, 0x08, 0x59,
+ 0x99, 0x87, 0x08, 0x59, 0x88, 0xc2, 0x00, 0x29, 0x08, 0x59, 0x21, 0xc2,
+ 0x04, 0x30, 0x08, 0x59, 0x19, 0x87, 0x08, 0x59, 0x10, 0x87, 0x08, 0x58,
+ 0x38, 0x90, 0x08, 0x58, 0x29, 0x91, 0x08, 0x58, 0x18, 0xc7, 0x0d, 0x7f,
+ 0x08, 0x08, 0xc9, 0xc8, 0x4f, 0xa2, 0x08, 0x09, 0x10, 0xc3, 0x05, 0xdf,
+ 0x08, 0x08, 0x4b, 0x02, 0x57, 0x3a, 0xc4, 0x0d, 0x89, 0x08, 0x08, 0x92,
+ 0x02, 0x57, 0x3e, 0xc9, 0x4f, 0xa1, 0x08, 0x09, 0x58, 0xc4, 0x15, 0xa9,
+ 0x08, 0x08, 0x8b, 0x02, 0x57, 0x44, 0x91, 0x08, 0x08, 0x42, 0x02, 0x57,
+ 0x4a, 0xc2, 0x00, 0x29, 0x08, 0x08, 0x5b, 0x02, 0x57, 0x4e, 0xc3, 0x41,
+ 0xca, 0x08, 0x08, 0xa2, 0x02, 0x57, 0x52, 0xc2, 0x00, 0xd3, 0x08, 0x08,
+ 0x53, 0x02, 0x57, 0x58, 0xc3, 0x0d, 0x8a, 0x08, 0x08, 0x9a, 0x02, 0x57,
+ 0x5c, 0x00, 0xc2, 0x57, 0x62, 0xc2, 0x0d, 0x8b, 0x08, 0x08, 0xaa, 0x02,
+ 0x57, 0x6e, 0x00, 0xc2, 0x57, 0x74, 0xc2, 0x0d, 0x8b, 0x08, 0x08, 0xb2,
+ 0x02, 0x57, 0x80, 0xc7, 0x0d, 0x7f, 0x08, 0x09, 0x01, 0xc8, 0x4f, 0xa2,
+ 0x08, 0x09, 0x48, 0xc9, 0x4f, 0xa1, 0x08, 0x09, 0x90, 0xc7, 0x0d, 0x7f,
+ 0x08, 0x09, 0x09, 0xc8, 0x4f, 0xa2, 0x08, 0x09, 0x50, 0xc9, 0x4f, 0xa1,
+ 0x08, 0x09, 0x98, 0xcc, 0x15, 0x06, 0x08, 0x09, 0xc1, 0xcd, 0x81, 0x32,
+ 0x08, 0x09, 0xd8, 0xca, 0x03, 0x76, 0x01, 0x28, 0x03, 0x02, 0x57, 0x86,
+ 0x06, 0xc2, 0x57, 0x8c, 0xc2, 0x02, 0x6a, 0x01, 0x2b, 0xab, 0x02, 0x57,
+ 0x96, 0xc4, 0x00, 0xcd, 0x01, 0x2b, 0xa3, 0x02, 0x57, 0x9c, 0xc5, 0x00,
+ 0x47, 0x01, 0x2b, 0xb1, 0x44, 0x13, 0x02, 0xc2, 0x57, 0xa2, 0xc8, 0x00,
+ 0x29, 0x01, 0x28, 0x13, 0x02, 0x57, 0xae, 0x4f, 0x64, 0x74, 0xc2, 0x57,
+ 0xb4, 0x4c, 0x54, 0x00, 0x42, 0x57, 0xc0, 0x50, 0x60, 0x1f, 0xc2, 0x57,
+ 0xcc, 0xdd, 0x11, 0xfe, 0x01, 0x2a, 0x29, 0xdd, 0x11, 0x8a, 0x01, 0x2a,
+ 0x19, 0x50, 0x11, 0x8d, 0x42, 0x57, 0xde, 0x45, 0x00, 0x39, 0x42, 0x57,
+ 0xf0, 0xd0, 0x58, 0xbf, 0x01, 0x2b, 0xf0, 0xc2, 0x00, 0x6a, 0x01, 0x2b,
+ 0xdb, 0x02, 0x58, 0x00, 0x4a, 0xa9, 0x70, 0x42, 0x58, 0x06, 0x45, 0x00,
+ 0x39, 0x42, 0x58, 0x12, 0xc8, 0x00, 0x29, 0x01, 0x28, 0x59, 0xca, 0x03,
+ 0x76, 0x01, 0x28, 0x48, 0xc8, 0x00, 0x29, 0x01, 0x28, 0x39, 0xca, 0x03,
+ 0x76, 0x01, 0x28, 0x28, 0xc8, 0x00, 0x29, 0x01, 0x2a, 0x8b, 0x02, 0x58,
+ 0x24, 0x47, 0x54, 0x55, 0xc2, 0x58, 0x2a, 0x49, 0x45, 0xd4, 0xc2, 0x58,
+ 0x3c, 0xca, 0x03, 0x76, 0x01, 0x2a, 0x80, 0x4b, 0x9c, 0x2e, 0xc2, 0x58,
+ 0x4e, 0x4b, 0x93, 0x54, 0xc2, 0x58, 0x60, 0x4a, 0x60, 0x1f, 0xc2, 0x58,
+ 0x72, 0x4a, 0x11, 0x8d, 0x42, 0x58, 0x8a, 0xd1, 0x53, 0x56, 0x01, 0x2b,
+ 0x59, 0xcb, 0x95, 0x01, 0x01, 0x2b, 0x11, 0xcc, 0x88, 0xec, 0x01, 0x2a,
+ 0xf8, 0xd1, 0x53, 0xcd, 0x01, 0x2b, 0x51, 0xcb, 0x95, 0x22, 0x01, 0x2b,
+ 0x09, 0xcc, 0x88, 0xbc, 0x01, 0x2a, 0xf0, 0xd0, 0x33, 0xa8, 0x01, 0x2a,
+ 0x11, 0xca, 0xa6, 0xd2, 0x01, 0x29, 0x41, 0xcb, 0x90, 0xec, 0x01, 0x29,
+ 0x00, 0xd0, 0x33, 0xbd, 0x01, 0x29, 0xf9, 0xca, 0xa6, 0xc8, 0x01, 0x29,
+ 0x29, 0xcb, 0x90, 0xe1, 0x01, 0x28, 0xe8, 0xd1, 0x53, 0x56, 0x01, 0x2b,
+ 0x41, 0xcb, 0x95, 0x01, 0x01, 0x2a, 0xe1, 0xcc, 0x88, 0xec, 0x01, 0x2a,
+ 0xc8, 0xd1, 0x53, 0xcd, 0x01, 0x2b, 0x39, 0xcb, 0x95, 0x22, 0x01, 0x2a,
+ 0xd9, 0xcc, 0x88, 0xbc, 0x01, 0x2a, 0xc0, 0xd5, 0x33, 0xb8, 0x01, 0x2a,
+ 0x41, 0xd0, 0x33, 0xbd, 0x01, 0x29, 0xb9, 0x45, 0x00, 0xcd, 0xc2, 0x58,
+ 0xa2, 0x46, 0x01, 0x17, 0x42, 0x58, 0xae, 0xd5, 0x33, 0xa3, 0x01, 0x2a,
+ 0x01, 0xd0, 0x33, 0xa8, 0x01, 0x29, 0xc1, 0x45, 0x00, 0xcd, 0xc2, 0x58,
+ 0xba, 0x46, 0x01, 0x17, 0x42, 0x58, 0xc6, 0xce, 0x73, 0x34, 0x01, 0x2a,
+ 0x49, 0xc8, 0x11, 0x8a, 0x01, 0x29, 0xc9, 0xca, 0x11, 0xfe, 0x01, 0x29,
+ 0x88, 0xce, 0x74, 0xca, 0x01, 0x29, 0xf1, 0xc8, 0x12, 0x13, 0x01, 0x29,
+ 0xb1, 0xca, 0x11, 0x9d, 0x01, 0x29, 0x70, 0xc5, 0x12, 0x88, 0x01, 0x18,
+ 0xf9, 0xc3, 0x0a, 0x4a, 0x01, 0x18, 0x60, 0xc5, 0x12, 0x88, 0x01, 0x18,
+ 0xf1, 0xc3, 0x0a, 0x4a, 0x01, 0x18, 0x68, 0x89, 0x01, 0x8d, 0x68, 0xc2,
+ 0x1c, 0x5e, 0x01, 0x8d, 0x70, 0xc2, 0x1c, 0x5e, 0x01, 0x8d, 0x78, 0x89,
+ 0x01, 0x89, 0x21, 0x90, 0x01, 0x8d, 0x48, 0x90, 0x01, 0x8d, 0x39, 0x89,
+ 0x01, 0x8d, 0x90, 0x89, 0x01, 0x89, 0x29, 0x90, 0x01, 0x8d, 0x28, 0x90,
+ 0x01, 0x8d, 0x98, 0xa2, 0x0f, 0xd8, 0xbb, 0x02, 0x58, 0xd2, 0xa3, 0x0f,
+ 0xd9, 0x38, 0xa0, 0x0f, 0xd8, 0x33, 0x02, 0x58, 0xd6, 0xa2, 0x0f, 0xd8,
+ 0x93, 0x02, 0x58, 0xe8, 0xa1, 0x0f, 0xd8, 0x53, 0x02, 0x58, 0xec, 0xa3,
+ 0x0f, 0xd9, 0x08, 0xa3, 0x0f, 0xd9, 0x70, 0xa1, 0x0f, 0xd8, 0x63, 0x02,
+ 0x58, 0xf7, 0xa3, 0x0f, 0xd9, 0x19, 0xc2, 0x00, 0x22, 0x0f, 0xd9, 0x90,
+ 0xa3, 0x0f, 0xd9, 0x88, 0xa3, 0x0f, 0xd9, 0x49, 0xa2, 0x0f, 0xd8, 0xd2,
+ 0x02, 0x59, 0x02, 0xa3, 0x0f, 0xd9, 0x78, 0xa1, 0x0f, 0xd8, 0x6b, 0x02,
+ 0x59, 0x06, 0xa3, 0x0f, 0xd9, 0x21, 0xa2, 0x0f, 0xd8, 0xa2, 0x02, 0x59,
+ 0x11, 0xa2, 0x0f, 0xd8, 0xc2, 0x02, 0x59, 0x15, 0xa3, 0x0f, 0xd9, 0xa8,
+ 0x45, 0x80, 0x6c, 0xc2, 0x59, 0x19, 0x46, 0x3a, 0x1f, 0xc2, 0x59, 0x50,
+ 0xd0, 0x5d, 0xbf, 0x01, 0x39, 0x61, 0xce, 0x71, 0x9e, 0x01, 0x37, 0x41,
+ 0xc5, 0x03, 0xfb, 0x01, 0x2e, 0x7b, 0x02, 0x59, 0x68, 0xc8, 0xb9, 0x3b,
+ 0x01, 0x33, 0x18, 0x4e, 0x72, 0xa8, 0xc2, 0x59, 0x6c, 0xc7, 0x32, 0xca,
+ 0x01, 0x38, 0x11, 0xce, 0x71, 0xac, 0x01, 0x38, 0x01, 0xc6, 0xd2, 0xce,
+ 0x01, 0x36, 0x39, 0xc9, 0xac, 0xa9, 0x01, 0x33, 0x01, 0x0f, 0xc2, 0x59,
+ 0x78, 0xca, 0x57, 0xbf, 0x01, 0x30, 0xb9, 0xc3, 0x09, 0x46, 0x01, 0x30,
+ 0x29, 0xcc, 0x84, 0x60, 0x01, 0x30, 0x01, 0xc5, 0x01, 0xea, 0x01, 0x2d,
+ 0x03, 0x02, 0x59, 0x84, 0xd3, 0x41, 0x14, 0x0f, 0xab, 0x88, 0x44, 0xe7,
+ 0x23, 0xc2, 0x59, 0x88, 0xc4, 0x6d, 0x0f, 0x01, 0x36, 0xf9, 0xd7, 0x26,
+ 0x9c, 0x01, 0x36, 0xb1, 0xc8, 0x32, 0xab, 0x01, 0x30, 0x71, 0xd2, 0x48,
+ 0x60, 0x0f, 0xab, 0xf8, 0x42, 0x00, 0x37, 0xc2, 0x59, 0x9a, 0xc6, 0x3f,
+ 0x7c, 0x01, 0x2e, 0x33, 0x02, 0x59, 0xa4, 0x14, 0x42, 0x59, 0xa8, 0x43,
+ 0x00, 0x48, 0xc2, 0x59, 0xb4, 0xc8, 0x46, 0xf8, 0x01, 0x2d, 0x61, 0xc6,
+ 0xd0, 0xbe, 0x0f, 0x9f, 0xb0, 0x42, 0x00, 0x07, 0xc2, 0x59, 0xbe, 0x11,
+ 0xc2, 0x59, 0xc8, 0x45, 0x15, 0x2f, 0x42, 0x59, 0xd4, 0x0e, 0xc2, 0x59,
+ 0xe0, 0x11, 0x42, 0x59, 0xec, 0xca, 0xa3, 0x8a, 0x01, 0x35, 0xc1, 0x46,
+ 0x01, 0x07, 0x42, 0x59, 0xf8, 0xd9, 0x20, 0xe1, 0x01, 0x33, 0xd9, 0x12,
+ 0xc2, 0x5a, 0x16, 0x4b, 0x39, 0x07, 0x42, 0x5a, 0x28, 0x07, 0xc2, 0x5a,
+ 0x40, 0xd5, 0x35, 0xb0, 0x0f, 0xad, 0x51, 0x11, 0x42, 0x5a, 0x4f, 0xcc,
+ 0x8e, 0x8c, 0x01, 0x2d, 0x81, 0xc6, 0xcf, 0x79, 0x0f, 0xac, 0x41, 0x42,
+ 0x01, 0x04, 0x42, 0x5a, 0x5b, 0x46, 0x02, 0x5e, 0xc2, 0x5a, 0x67, 0x48,
+ 0x48, 0x3f, 0x42, 0x5a, 0x73, 0xd1, 0x18, 0x2c, 0x01, 0x3a, 0x49, 0x90,
+ 0x0f, 0x9c, 0x40, 0x9a, 0x01, 0x38, 0xb9, 0x42, 0x01, 0xb4, 0xc2, 0x5a,
+ 0x85, 0xc8, 0x94, 0x1d, 0x0f, 0xaf, 0xa0, 0xc3, 0x0c, 0xde, 0x00, 0xda,
+ 0xdb, 0x02, 0x5a, 0x92, 0xc5, 0xe1, 0x25, 0x00, 0xdb, 0x00, 0xc8, 0xbe,
+ 0xb3, 0x00, 0xdb, 0xe8, 0x46, 0xd1, 0x90, 0xc2, 0x5a, 0x98, 0x49, 0xb6,
+ 0xff, 0x42, 0x5a, 0xaa, 0x48, 0xc0, 0xb3, 0xc2, 0x5a, 0xb6, 0x46, 0xd2,
+ 0xaa, 0x42, 0x5a, 0xc2, 0xc4, 0xd2, 0x76, 0x00, 0xdb, 0x99, 0xc5, 0xdb,
+ 0xfd, 0x00, 0xdb, 0x91, 0x44, 0xab, 0xad, 0xc2, 0x5a, 0xce, 0xc7, 0x79,
+ 0x80, 0x00, 0xdb, 0x79, 0xc5, 0xe0, 0x30, 0x00, 0xdb, 0x61, 0xc5, 0xdf,
+ 0x40, 0x00, 0xdb, 0x58, 0x03, 0xc2, 0x5a, 0xe0, 0x07, 0xc2, 0x5a, 0xf5,
+ 0xc3, 0x01, 0x5e, 0x00, 0xdb, 0x31, 0xc3, 0x3b, 0x04, 0x00, 0xdb, 0x19,
+ 0xc3, 0x0a, 0x68, 0x00, 0xdb, 0x08, 0xc5, 0x63, 0x4e, 0x00, 0xda, 0xf9,
+ 0xc7, 0xce, 0x60, 0x00, 0xda, 0xe8, 0xc4, 0x15, 0xa7, 0x00, 0xda, 0xb9,
+ 0xc2, 0x22, 0x45, 0x00, 0xda, 0xb0, 0xc3, 0x0d, 0x8f, 0x00, 0xda, 0xa9,
+ 0xc3, 0x08, 0xde, 0x00, 0xda, 0xa0, 0xc4, 0x05, 0xde, 0x00, 0xda, 0x99,
+ 0xc2, 0x0a, 0x20, 0x00, 0xda, 0x90, 0xcb, 0x90, 0x94, 0x00, 0xda, 0x61,
+ 0xcb, 0x93, 0xcd, 0x00, 0xda, 0x59, 0xc5, 0xdc, 0xca, 0x00, 0xd8, 0x81,
+ 0xc4, 0x97, 0x59, 0x00, 0xd8, 0x2a, 0x02, 0x5b, 0x01, 0xc7, 0xcb, 0x50,
+ 0x00, 0xda, 0x41, 0xc4, 0x97, 0x59, 0x00, 0xd8, 0x78, 0xc9, 0xae, 0x86,
+ 0x00, 0xda, 0x39, 0x83, 0x00, 0xd9, 0x12, 0x02, 0x5b, 0x07, 0xc9, 0xab,
+ 0x38, 0x00, 0xda, 0x31, 0x83, 0x00, 0xd8, 0x9a, 0x02, 0x5b, 0x0b, 0x43,
+ 0x21, 0x0f, 0x42, 0x5b, 0x17, 0xc6, 0xbc, 0xdd, 0x00, 0xd8, 0x6a, 0x02,
+ 0x5b, 0x23, 0xc5, 0xc5, 0xbe, 0x00, 0xd8, 0x5a, 0x02, 0x5b, 0x29, 0xc8,
+ 0xc0, 0x5b, 0x00, 0xd9, 0x50, 0xc6, 0xd3, 0xd6, 0x00, 0xd9, 0x40, 0x83,
+ 0x00, 0xd9, 0x33, 0x02, 0x5b, 0x2f, 0xc2, 0x1a, 0x36, 0x00, 0xd8, 0xe1,
+ 0xc2, 0x07, 0x69, 0x00, 0xd8, 0xb8, 0x42, 0x00, 0xf9, 0x42, 0x5b, 0x35,
+ 0xc5, 0xdb, 0x8a, 0x00, 0xd8, 0xd8, 0xc5, 0xda, 0x8b, 0x00, 0xd8, 0xc8,
+ 0xc5, 0xdb, 0xa8, 0x00, 0xd8, 0xb0, 0xc7, 0xc6, 0x3a, 0x00, 0xd8, 0x90,
+ 0xc7, 0xc6, 0x3a, 0x00, 0xd8, 0x50, 0xc7, 0xc6, 0x3a, 0x00, 0xd8, 0x40,
+ 0xc7, 0xc6, 0x3a, 0x00, 0xda, 0x18, 0xc5, 0x23, 0x5a, 0x00, 0xd9, 0xf3,
+ 0x02, 0x5b, 0x41, 0xc5, 0xc6, 0x3c, 0x00, 0xd9, 0xa8, 0xc7, 0xc6, 0x3a,
+ 0x00, 0xd9, 0xe8, 0xc7, 0xc6, 0x3a, 0x00, 0xd9, 0xd8, 0xc5, 0xdb, 0x85,
+ 0x00, 0xd9, 0xc8, 0xc5, 0xdb, 0x21, 0x00, 0xd9, 0xb8, 0xc6, 0x21, 0x1a,
+ 0x00, 0xd8, 0x09, 0xc5, 0xa8, 0x4a, 0x00, 0xd8, 0x00, 0xca, 0x9f, 0x16,
+ 0x0b, 0x57, 0xa1, 0xc6, 0x3b, 0x25, 0x0b, 0x57, 0x80, 0xca, 0xa5, 0xa6,
+ 0x0b, 0x57, 0x99, 0xc6, 0x49, 0x6f, 0x0b, 0x57, 0x88, 0x87, 0x0b, 0x57,
+ 0x59, 0xc3, 0x1c, 0x5e, 0x0b, 0x56, 0x80, 0xc2, 0x05, 0x06, 0x0b, 0x57,
+ 0x00, 0x91, 0x0b, 0x57, 0x48, 0xc3, 0x2c, 0xe6, 0x0b, 0x57, 0x30, 0xc3,
+ 0x28, 0x95, 0x0b, 0x57, 0x21, 0xc2, 0x05, 0x4a, 0x0b, 0x56, 0xa8, 0x91,
+ 0x0b, 0x56, 0xf1, 0xc3, 0xe7, 0x0a, 0x0b, 0x56, 0xb8, 0xc2, 0x0b, 0x0a,
+ 0x0b, 0x56, 0xe9, 0xc2, 0x00, 0x45, 0x0b, 0x56, 0xb0, 0xc3, 0x65, 0x4d,
+ 0x0b, 0x56, 0xc1, 0x83, 0x0b, 0x56, 0x88, 0x44, 0xe6, 0x97, 0xc2, 0x5b,
+ 0x45, 0x44, 0xe6, 0x93, 0xc2, 0x5b, 0x86, 0x44, 0xe6, 0x6f, 0xc2, 0x5b,
+ 0xc6, 0x44, 0xe6, 0x83, 0xc2, 0x5b, 0xfb, 0x44, 0xe6, 0x7f, 0xc2, 0x5c,
+ 0x3b, 0x44, 0xe6, 0x73, 0x42, 0x5c, 0x73, 0xc2, 0xd4, 0x79, 0x05, 0x36,
+ 0x29, 0x87, 0x05, 0x36, 0x50, 0x87, 0x05, 0x36, 0x41, 0xc2, 0x08, 0xc6,
+ 0x05, 0x36, 0xb8, 0x96, 0x05, 0x35, 0xd9, 0xc2, 0xd4, 0x79, 0x05, 0x36,
+ 0x21, 0x90, 0x05, 0x36, 0x90, 0xc3, 0xeb, 0x91, 0x05, 0x37, 0x71, 0xc4,
+ 0xe6, 0xdb, 0x05, 0x37, 0x78, 0x87, 0x05, 0x35, 0x29, 0xc2, 0xd4, 0x79,
+ 0x05, 0x36, 0x81, 0x90, 0x05, 0x37, 0x08, 0x8b, 0x05, 0x35, 0x61, 0xc2,
+ 0x01, 0xe6, 0x05, 0x35, 0x68, 0x87, 0x05, 0x35, 0x31, 0x83, 0x05, 0x35,
+ 0x80, 0x96, 0x05, 0x37, 0x41, 0x90, 0x05, 0x37, 0x50, 0xc3, 0x7a, 0x15,
+ 0x05, 0x35, 0x91, 0xc3, 0x8c, 0x60, 0x05, 0x35, 0xf1, 0xc2, 0x01, 0xe6,
+ 0x05, 0x36, 0x30, 0xc2, 0x08, 0xc6, 0x05, 0x35, 0xe0, 0xc2, 0x01, 0xe6,
+ 0x05, 0x36, 0x39, 0xc2, 0x6f, 0x95, 0x05, 0x37, 0x58, 0xc5, 0xdd, 0x10,
+ 0x05, 0x36, 0x99, 0xc2, 0x07, 0x69, 0x05, 0x36, 0xa1, 0x83, 0x05, 0x36,
+ 0xa8, 0xc3, 0xaa, 0xdd, 0x05, 0x35, 0x79, 0x90, 0x05, 0x37, 0x10, 0xc2,
+ 0x01, 0x04, 0x05, 0x37, 0x01, 0xc2, 0x0e, 0x30, 0x05, 0x37, 0x38, 0xc2,
+ 0x23, 0x68, 0x05, 0x35, 0xb1, 0xc3, 0xdc, 0x59, 0x05, 0x35, 0xc1, 0x97,
+ 0x05, 0x36, 0x01, 0x91, 0x05, 0x36, 0xb0, 0xc7, 0xcd, 0xe2, 0x05, 0x37,
+ 0x81, 0xc9, 0xb1, 0x0e, 0x05, 0x37, 0x88, 0xc9, 0xb5, 0x97, 0x01, 0x5a,
+ 0xd9, 0xcd, 0x7a, 0x23, 0x01, 0x5a, 0xe8, 0x12, 0xc2, 0x5c, 0xa9, 0xc5,
+ 0xdd, 0x6f, 0x00, 0xdf, 0xf1, 0xc8, 0xbd, 0x5b, 0x00, 0xdf, 0xe0, 0xd2,
+ 0x4d, 0xee, 0x00, 0xdf, 0x78, 0x91, 0x00, 0xdf, 0x69, 0x8b, 0x00, 0xdf,
+ 0x58, 0x87, 0x00, 0xdf, 0x48, 0xc2, 0x02, 0x1d, 0x00, 0xdf, 0x19, 0x83,
+ 0x00, 0xde, 0xa2, 0x02, 0x5c, 0xb5, 0xc2, 0x0c, 0x25, 0x00, 0xdf, 0x11,
+ 0xc2, 0x1a, 0x36, 0x00, 0xdf, 0x01, 0xc2, 0x07, 0x69, 0x00, 0xde, 0xe9,
+ 0xca, 0xa2, 0x68, 0x00, 0xde, 0xb9, 0x83, 0x00, 0xde, 0x48, 0x4a, 0x4d,
+ 0xf4, 0xc2, 0x5c, 0xbb, 0x83, 0x00, 0xde, 0xc1, 0xca, 0xa2, 0xb8, 0x00,
+ 0xde, 0xb0, 0xc7, 0xcd, 0xf7, 0x00, 0xde, 0x68, 0xc2, 0x01, 0x0e, 0x00,
+ 0x4c, 0xb3, 0x02, 0x5c, 0xf5, 0x83, 0x00, 0x4c, 0xa8, 0x83, 0x00, 0x4d,
+ 0xc1, 0xc2, 0x0e, 0xe5, 0x00, 0x4d, 0xb9, 0xc2, 0x01, 0x0e, 0x00, 0x4d,
+ 0xb0, 0x83, 0x00, 0x4d, 0x83, 0x02, 0x5c, 0xfb, 0xc2, 0x00, 0x9a, 0x00,
+ 0x4e, 0xe1, 0xc2, 0x01, 0x0e, 0x00, 0x4e, 0xe8, 0x83, 0x00, 0x4d, 0x79,
+ 0xc2, 0x1a, 0x36, 0x00, 0x4e, 0xf8, 0xc2, 0x01, 0x0e, 0x00, 0x4d, 0x69,
+ 0x83, 0x00, 0x4d, 0x60, 0xc2, 0x01, 0x0e, 0x00, 0x4d, 0x59, 0x83, 0x00,
+ 0x4d, 0x50, 0x83, 0x00, 0x4d, 0x41, 0xc2, 0x01, 0x01, 0x00, 0x4d, 0x19,
+ 0xc2, 0x1a, 0x36, 0x00, 0x4c, 0xf1, 0xc2, 0x07, 0x69, 0x00, 0x4c, 0xc8,
+ 0xc2, 0x01, 0x0e, 0x00, 0x4d, 0x39, 0x83, 0x00, 0x4d, 0x31, 0x06, 0x42,
+ 0x5d, 0x01, 0xc2, 0x01, 0x0e, 0x00, 0x4d, 0x29, 0x83, 0x00, 0x4d, 0x21,
+ 0x16, 0x42, 0x5d, 0x0b, 0xc2, 0x01, 0x0e, 0x00, 0x4c, 0xe9, 0x83, 0x00,
+ 0x4c, 0xe0, 0xc2, 0x01, 0x0e, 0x00, 0x4c, 0xd9, 0x83, 0x00, 0x4c, 0xd0,
+ 0xc2, 0x01, 0x0e, 0x00, 0x4c, 0xc1, 0x83, 0x00, 0x4c, 0xb8, 0x97, 0x00,
+ 0x4c, 0xa1, 0x8b, 0x00, 0x4c, 0x81, 0x83, 0x00, 0x4c, 0x30, 0x8b, 0x00,
+ 0x4c, 0x40, 0x97, 0x00, 0x4c, 0x50, 0x47, 0xb7, 0xd8, 0xc2, 0x5d, 0x15,
+ 0xcd, 0x78, 0x90, 0x00, 0x4f, 0xe0, 0x42, 0x02, 0x52, 0xc2, 0x5d, 0x23,
+ 0x03, 0xc2, 0x5d, 0x2f, 0xc5, 0x33, 0x1a, 0x00, 0x4d, 0xe1, 0xcb, 0x21,
+ 0x1a, 0x00, 0x4c, 0x08, 0x97, 0x00, 0x4e, 0x61, 0x8b, 0x00, 0x4e, 0x41,
+ 0x83, 0x00, 0x4d, 0xf0, 0x94, 0x00, 0x4e, 0x1b, 0x02, 0x5d, 0x3b, 0x8e,
+ 0x00, 0x4f, 0x12, 0x02, 0x5d, 0x3f, 0x97, 0x00, 0x4e, 0x10, 0x8b, 0x00,
+ 0x4e, 0x00, 0xc2, 0x0a, 0x20, 0x00, 0x4f, 0x41, 0xc4, 0x05, 0xde, 0x00,
+ 0x4f, 0x48, 0xc3, 0x08, 0xde, 0x00, 0x4f, 0x51, 0xc3, 0x0d, 0x8f, 0x00,
+ 0x4f, 0x58, 0xc2, 0x22, 0x45, 0x00, 0x4f, 0x61, 0xc4, 0x15, 0xa7, 0x00,
+ 0x4f, 0x68, 0xc3, 0x05, 0x17, 0x00, 0x4f, 0xa3, 0x02, 0x5d, 0x43, 0x16,
+ 0xc2, 0x5d, 0x49, 0xc4, 0x08, 0xdd, 0x00, 0x4f, 0xb8, 0x1b, 0xc2, 0x5d,
+ 0x55, 0xc2, 0x00, 0x9a, 0x00, 0xd0, 0x59, 0x83, 0x00, 0xd0, 0x51, 0x09,
+ 0x42, 0x5d, 0x5f, 0xc2, 0x07, 0x44, 0x00, 0xd0, 0x39, 0x83, 0x00, 0xd0,
+ 0x30, 0xc3, 0xec, 0x8a, 0x01, 0x42, 0x03, 0x02, 0x5d, 0x69, 0xc3, 0xec,
+ 0xb4, 0x01, 0x40, 0x0b, 0x02, 0x5d, 0x6d, 0xc3, 0xec, 0xb1, 0x01, 0x40,
+ 0x13, 0x02, 0x5d, 0x9b, 0xc3, 0xec, 0x96, 0x01, 0x40, 0x23, 0x02, 0x5d,
+ 0xc2, 0xc3, 0xec, 0x93, 0x01, 0x40, 0x43, 0x02, 0x5d, 0xe2, 0xc3, 0xec,
+ 0x90, 0x01, 0x40, 0x83, 0x02, 0x5d, 0xfb, 0xc3, 0xec, 0x8d, 0x01, 0x41,
+ 0x03, 0x02, 0x5e, 0x0d, 0xc3, 0xec, 0x87, 0x01, 0x44, 0x00, 0x00, 0x42,
+ 0x5e, 0x18, 0xc2, 0x0d, 0x8b, 0x08, 0x83, 0x18, 0x9b, 0x08, 0x83, 0x10,
+ 0xc4, 0x15, 0xa7, 0x08, 0x82, 0xc3, 0x02, 0x5e, 0x24, 0xc2, 0x22, 0x45,
+ 0x08, 0x82, 0xba, 0x02, 0x5e, 0x2a, 0x0b, 0xc2, 0x5e, 0x30, 0x11, 0x42,
+ 0x5e, 0x3c, 0x0a, 0xc2, 0x5e, 0x48, 0x19, 0xc2, 0x5e, 0x54, 0xc2, 0x01,
+ 0x04, 0x08, 0x82, 0xd8, 0x49, 0x5a, 0x10, 0x42, 0x5e, 0x5e, 0xc2, 0x00,
+ 0x96, 0x08, 0x81, 0xa1, 0x83, 0x08, 0x81, 0x70, 0xc2, 0x01, 0x0e, 0x08,
+ 0x81, 0x51, 0x83, 0x08, 0x81, 0x48, 0xc2, 0x01, 0x0e, 0x08, 0x81, 0x41,
+ 0x83, 0x08, 0x81, 0x38, 0x83, 0x08, 0x81, 0x31, 0xc2, 0x01, 0x01, 0x08,
+ 0x81, 0x09, 0xc2, 0x1a, 0x36, 0x08, 0x80, 0xe1, 0xc2, 0x07, 0x69, 0x08,
+ 0x80, 0xb8, 0xc2, 0x01, 0x0e, 0x08, 0x81, 0x29, 0x83, 0x08, 0x81, 0x21,
+ 0x06, 0x42, 0x5e, 0x76, 0xc2, 0x01, 0x0e, 0x08, 0x81, 0x19, 0x83, 0x08,
+ 0x81, 0x11, 0x16, 0x42, 0x5e, 0x80, 0xc2, 0x01, 0x0e, 0x08, 0x80, 0xd9,
+ 0x83, 0x08, 0x80, 0xd0, 0xc2, 0x01, 0x0e, 0x08, 0x80, 0xc9, 0x83, 0x08,
+ 0x80, 0xc0, 0xc2, 0x01, 0x0e, 0x08, 0x80, 0xb1, 0x83, 0x08, 0x80, 0xa8,
+ 0xc2, 0x01, 0x0e, 0x08, 0x80, 0xa1, 0x83, 0x08, 0x80, 0x98, 0x97, 0x08,
+ 0x80, 0x91, 0x8b, 0x08, 0x80, 0x81, 0x83, 0x08, 0x80, 0x30, 0x47, 0xb7,
+ 0xd8, 0xc2, 0x5e, 0x8a, 0x83, 0x08, 0x81, 0x78, 0x97, 0x08, 0x80, 0x50,
+ 0x8b, 0x08, 0x80, 0x40, 0xc2, 0x01, 0x0e, 0x08, 0x81, 0x81, 0xc2, 0x0e,
+ 0xe5, 0x08, 0x81, 0x89, 0x83, 0x08, 0x81, 0x90, 0x91, 0x08, 0x82, 0x23,
+ 0x02, 0x5e, 0x98, 0x03, 0xc2, 0x5e, 0x9e, 0x87, 0x08, 0x82, 0x11, 0x48,
+ 0xb7, 0xd7, 0xc2, 0x5e, 0xaa, 0x97, 0x08, 0x81, 0xe3, 0x02, 0x5e, 0xb8,
+ 0x8b, 0x08, 0x81, 0xd3, 0x02, 0x5e, 0xbc, 0xce, 0x6e, 0x80, 0x08, 0x81,
+ 0xc8, 0xc4, 0x24, 0x35, 0x08, 0x83, 0x79, 0xc5, 0x05, 0x1b, 0x08, 0x83,
+ 0x71, 0x15, 0xc2, 0x5e, 0xc0, 0x08, 0xc2, 0x5e, 0xcc, 0x16, 0xc2, 0x5e,
+ 0xd8, 0xc3, 0x05, 0x17, 0x08, 0x83, 0x39, 0xc4, 0x16, 0x57, 0x08, 0x83,
+ 0x30, 0xc4, 0x71, 0x75, 0x08, 0x82, 0x69, 0xc3, 0x03, 0x5e, 0x08, 0x82,
+ 0x58, 0xc8, 0x3e, 0x80, 0x08, 0x82, 0x51, 0x96, 0x08, 0x82, 0x48, 0x42,
+ 0x00, 0xf8, 0xc2, 0x5e, 0xe4, 0xc9, 0x7a, 0xa9, 0x0e, 0x83, 0x90, 0xc7,
+ 0xce, 0x44, 0x0e, 0x85, 0xa9, 0xc6, 0xc9, 0x05, 0x0e, 0x85, 0xa0, 0xc4,
+ 0x97, 0x8f, 0x0e, 0x87, 0xa1, 0xc3, 0x8a, 0xb3, 0x0e, 0x83, 0xf8, 0x44,
+ 0xe9, 0xab, 0xc2, 0x5e, 0xf6, 0xc8, 0xa1, 0xd4, 0x0e, 0x80, 0xd8, 0x00,
+ 0x42, 0x5f, 0x08, 0xc5, 0xda, 0x27, 0x0e, 0x82, 0x10, 0x03, 0xc2, 0x5f,
+ 0x14, 0x11, 0x42, 0x5f, 0x1e, 0xc3, 0x00, 0x3e, 0x0e, 0x83, 0xd1, 0xc9,
+ 0xb1, 0xc2, 0x0e, 0x81, 0xb8, 0xc2, 0x01, 0xce, 0x0e, 0x87, 0x79, 0xc2,
+ 0x03, 0x7a, 0x0e, 0x87, 0x71, 0xc2, 0x00, 0xdc, 0x0e, 0x87, 0x69, 0xc2,
+ 0x01, 0x08, 0x0e, 0x87, 0x61, 0xc2, 0x07, 0x69, 0x0e, 0x87, 0x59, 0xc3,
+ 0x29, 0xd9, 0x0e, 0x87, 0x51, 0xc2, 0x07, 0x44, 0x0e, 0x87, 0x48, 0x90,
+ 0x0e, 0x84, 0xb9, 0xc9, 0x7a, 0xa9, 0x0e, 0x83, 0x98, 0x46, 0xd8, 0x68,
+ 0xc2, 0x5f, 0x2a, 0x46, 0xd3, 0x7c, 0xc2, 0x5f, 0x37, 0xc5, 0x4a, 0xc6,
+ 0x0e, 0x81, 0x18, 0xc6, 0xd0, 0x7c, 0x0e, 0x81, 0x99, 0xca, 0x74, 0xc0,
+ 0x0e, 0x80, 0x68, 0xc5, 0xdd, 0xec, 0x0e, 0x85, 0x09, 0xc4, 0xe6, 0x27,
+ 0x0e, 0x84, 0xd0, 0xc5, 0xca, 0xab, 0x0e, 0x85, 0x01, 0x8b, 0x0e, 0x84,
+ 0xf8, 0xc2, 0x02, 0x29, 0x0e, 0x84, 0xf1, 0xc4, 0x00, 0x52, 0x0e, 0x84,
+ 0xe8, 0x8b, 0x0e, 0x84, 0xe1, 0xc5, 0xca, 0xab, 0x0e, 0x84, 0xd8, 0xc7,
+ 0xcf, 0x2b, 0x0e, 0x83, 0x11, 0xc2, 0x00, 0x3f, 0x0e, 0x82, 0xe0, 0xc9,
+ 0xb3, 0x3c, 0x0e, 0x80, 0xf8, 0x00, 0x42, 0x5f, 0x43, 0x00, 0x42, 0x5f,
+ 0x4d, 0xc4, 0xd7, 0xaa, 0x0e, 0x80, 0x40, 0x45, 0xe3, 0x87, 0xc2, 0x5f,
+ 0x57, 0xc4, 0xce, 0x7e, 0x0e, 0x80, 0x98, 0xc8, 0xc0, 0xeb, 0x0e, 0x87,
+ 0x31, 0xc5, 0xd5, 0x6f, 0x0e, 0x84, 0x92, 0x02, 0x5f, 0x69, 0x46, 0xd9,
+ 0x58, 0xc2, 0x5f, 0x6f, 0xc4, 0xcb, 0xc3, 0x0e, 0x84, 0xc8, 0x16, 0xc2,
+ 0x5f, 0x81, 0xd5, 0x32, 0x7d, 0x0e, 0x86, 0x91, 0xdc, 0x13, 0x1a, 0x0e,
+ 0x86, 0x89, 0xd1, 0x54, 0x66, 0x0e, 0x86, 0x80, 0xc9, 0xa1, 0xd3, 0x0e,
+ 0x84, 0x00, 0x43, 0x00, 0x52, 0xc2, 0x5f, 0x8d, 0xd5, 0x32, 0x7d, 0x0e,
+ 0x86, 0xb1, 0xdc, 0x13, 0x1a, 0x0e, 0x86, 0xa9, 0xd1, 0x54, 0x66, 0x0e,
+ 0x86, 0xa0, 0xc3, 0x8a, 0xb3, 0x0e, 0x83, 0xe9, 0xc4, 0x97, 0x8f, 0x0e,
+ 0x83, 0xe0, 0xc4, 0xe4, 0x4f, 0x0e, 0x82, 0x99, 0xc6, 0xd7, 0x4e, 0x0e,
+ 0x80, 0x52, 0x02, 0x5f, 0x99, 0xc5, 0xe0, 0x67, 0x0e, 0x86, 0x39, 0xc9,
+ 0xac, 0x4f, 0x0e, 0x85, 0xe0, 0x47, 0x1d, 0x1c, 0xc2, 0x5f, 0x9f, 0xcb,
+ 0x99, 0x79, 0x0e, 0x85, 0xf0, 0xca, 0xa8, 0xee, 0x0e, 0x86, 0x21, 0xc8,
+ 0xb8, 0x6b, 0x0e, 0x86, 0x18, 0x10, 0xc2, 0x5f, 0xab, 0xc2, 0x03, 0x7a,
+ 0x0e, 0x86, 0x01, 0xc2, 0x00, 0xdc, 0x0e, 0x85, 0xf9, 0xc2, 0x01, 0x08,
+ 0x0e, 0x85, 0xe9, 0xc2, 0x07, 0x44, 0x0e, 0x85, 0xd0, 0xcf, 0x61, 0x86,
+ 0x0e, 0x85, 0xc8, 0x44, 0x2f, 0xac, 0xc2, 0x5f, 0xb7, 0xc4, 0x69, 0xe3,
+ 0x0e, 0x85, 0xb8, 0xc3, 0x73, 0xe8, 0x0e, 0x82, 0x31, 0xc8, 0xa1, 0xd4,
+ 0x0e, 0x80, 0xd0, 0x47, 0xc3, 0xee, 0xc2, 0x5f, 0xc1, 0x44, 0x88, 0x69,
+ 0x42, 0x5f, 0xcd, 0x48, 0x6e, 0x11, 0xc2, 0x5f, 0xd9, 0x42, 0x00, 0x47,
+ 0x42, 0x5f, 0xe5, 0xce, 0x6e, 0xfe, 0x0e, 0x85, 0x29, 0xcc, 0x88, 0x68,
+ 0x0e, 0x85, 0x18, 0xc6, 0xd5, 0x6e, 0x0e, 0x84, 0xb1, 0xc3, 0x20, 0x37,
+ 0x0e, 0x84, 0x39, 0x83, 0x0e, 0x81, 0x80, 0xc7, 0xca, 0x62, 0x0e, 0x83,
+ 0x81, 0x12, 0xc2, 0x5f, 0xf1, 0xc7, 0xc6, 0x41, 0x0e, 0x83, 0x69, 0x42,
+ 0x00, 0xf8, 0x42, 0x5f, 0xfd, 0xcd, 0x80, 0x62, 0x0e, 0x83, 0xc9, 0xc2,
+ 0x00, 0x3f, 0x0e, 0x81, 0x6a, 0x02, 0x60, 0x07, 0xcf, 0x67, 0x17, 0x0e,
+ 0x84, 0x71, 0x16, 0xc2, 0x60, 0x13, 0xcb, 0x99, 0x63, 0x0e, 0x84, 0x59,
+ 0xcc, 0x84, 0x3c, 0x0e, 0x84, 0x50, 0xc3, 0x73, 0xe8, 0x0e, 0x82, 0x41,
+ 0xc5, 0xd6, 0x8f, 0x0e, 0x80, 0x21, 0xcb, 0x74, 0xbf, 0x0e, 0x80, 0x18,
+ 0xc7, 0xca, 0x62, 0x0e, 0x83, 0x89, 0xcb, 0x91, 0x02, 0x0e, 0x83, 0x79,
+ 0xc7, 0xc6, 0x41, 0x0e, 0x83, 0x61, 0x90, 0x0e, 0x81, 0xca, 0x02, 0x60,
+ 0x1f, 0xc2, 0x00, 0xe5, 0x0e, 0x80, 0xb9, 0x8b, 0x0e, 0x80, 0x00, 0x47,
+ 0xc8, 0x1d, 0xc2, 0x60, 0x25, 0xc6, 0xd7, 0xa8, 0x0e, 0x80, 0x4a, 0x02,
+ 0x60, 0x31, 0xc4, 0x7f, 0xdc, 0x0e, 0x82, 0x68, 0x16, 0xc2, 0x60, 0x35,
+ 0xc2, 0x00, 0x3f, 0x0e, 0x82, 0x08, 0xc3, 0x73, 0xe8, 0x0e, 0x82, 0xc1,
+ 0xc5, 0xd6, 0x8f, 0x0e, 0x80, 0x31, 0xcb, 0x74, 0xbf, 0x0e, 0x80, 0x28,
+ 0x94, 0x08, 0xe0, 0x38, 0xd1, 0x51, 0xad, 0x0f, 0xdc, 0xf9, 0xc2, 0x00,
+ 0x58, 0x01, 0x2f, 0xd0, 0x4e, 0x65, 0xce, 0xc2, 0x60, 0x3f, 0xcc, 0x88,
+ 0x38, 0x0f, 0xac, 0x50, 0xc9, 0xb3, 0x21, 0x0f, 0xac, 0x61, 0xc5, 0xd8,
+ 0x5d, 0x0f, 0xac, 0x48, 0xd1, 0x51, 0xad, 0x0f, 0xdc, 0xf1, 0xc2, 0x00,
+ 0x58, 0x01, 0x2f, 0xf8, 0x4e, 0x01, 0x29, 0xc2, 0x60, 0x4b, 0xdb, 0x17,
+ 0x2f, 0x01, 0x49, 0xf0, 0x5b, 0x17, 0x80, 0xc2, 0x60, 0x57, 0x46, 0x00,
+ 0xc7, 0x42, 0x60, 0x63, 0xce, 0x08, 0x19, 0x01, 0x2c, 0x31, 0xcd, 0x43,
+ 0xa0, 0x01, 0x2c, 0x18, 0xc9, 0xab, 0xec, 0x01, 0x3f, 0xf0, 0xc9, 0xab,
+ 0xec, 0x01, 0x3f, 0xe0, 0xc9, 0xab, 0xec, 0x01, 0x3f, 0xe8, 0xc9, 0xab,
+ 0xec, 0x01, 0x3f, 0xd8, 0xcc, 0x82, 0x80, 0x01, 0x3f, 0xd1, 0xc5, 0x00,
+ 0x62, 0x01, 0x3f, 0xb8, 0xcf, 0x64, 0xec, 0x01, 0x52, 0xe9, 0xcb, 0x96,
+ 0xc4, 0x01, 0x52, 0xd9, 0x42, 0x00, 0x68, 0x42, 0x60, 0x75, 0xc7, 0x18,
+ 0x1b, 0x01, 0x52, 0x89, 0x45, 0x02, 0x93, 0x42, 0x60, 0x81, 0x10, 0xc2,
+ 0x60, 0x8d, 0x14, 0x42, 0x60, 0x97, 0x43, 0x01, 0x1f, 0xc2, 0x60, 0xa3,
+ 0xd5, 0x32, 0xa7, 0x0f, 0xab, 0xd8, 0x45, 0x01, 0x18, 0xc2, 0x60, 0xca,
+ 0xd6, 0x28, 0x0d, 0x01, 0x70, 0x60, 0xc9, 0xa2, 0xaf, 0x01, 0x3e, 0xa9,
+ 0x43, 0x03, 0x5f, 0x42, 0x60, 0xf8, 0xcb, 0x93, 0x28, 0x01, 0x36, 0xe1,
+ 0xcc, 0x00, 0xd3, 0x00, 0x03, 0xdb, 0x02, 0x61, 0x04, 0xc6, 0xbf, 0x8c,
+ 0x01, 0x18, 0x41, 0xcd, 0x6a, 0x7f, 0x01, 0x80, 0x60, 0x0a, 0xc2, 0x61,
+ 0x08, 0xc3, 0x00, 0xda, 0x01, 0x15, 0x19, 0x14, 0xc2, 0x61, 0x1a, 0xd5,
+ 0x0a, 0xe9, 0x01, 0x80, 0xa0, 0x0b, 0xc2, 0x61, 0x26, 0xc4, 0x20, 0xa2,
+ 0x01, 0x18, 0x50, 0x07, 0xc2, 0x61, 0x32, 0xcd, 0x26, 0x2f, 0x00, 0x02,
+ 0xdb, 0x02, 0x61, 0x3e, 0x0b, 0xc2, 0x61, 0x42, 0xcc, 0x75, 0x8f, 0x0f,
+ 0xaf, 0x41, 0xd3, 0x1f, 0x0c, 0x01, 0x70, 0x10, 0xc7, 0xc7, 0xf3, 0x01,
+ 0x1d, 0xc1, 0xcd, 0x7b, 0x27, 0x01, 0x71, 0x00, 0x00, 0x42, 0x61, 0x4e,
+ 0x45, 0x02, 0x93, 0xc2, 0x61, 0x60, 0xd9, 0x1f, 0x06, 0x01, 0x70, 0x20,
+ 0xcb, 0x8f, 0x55, 0x0f, 0xac, 0x71, 0xcb, 0x85, 0x81, 0x01, 0x4e, 0xc1,
+ 0x45, 0x01, 0x32, 0x42, 0x61, 0x78, 0x45, 0x04, 0x92, 0xc2, 0x61, 0x94,
+ 0x44, 0x02, 0x1e, 0x42, 0x61, 0xa0, 0xc6, 0xd4, 0x12, 0x0f, 0xb6, 0x29,
+ 0xd5, 0x31, 0x3a, 0x01, 0x70, 0xe0, 0xca, 0x09, 0xfd, 0x01, 0x0f, 0x33,
+ 0x02, 0x61, 0xac, 0xc9, 0xb2, 0xf4, 0x01, 0x0c, 0xe0, 0x42, 0x00, 0x47,
+ 0xc2, 0x61, 0xb2, 0x19, 0xc2, 0x61, 0xbe, 0xd5, 0x33, 0x8e, 0x0f, 0xc5,
+ 0x10, 0x99, 0x01, 0x0c, 0x83, 0x02, 0x61, 0xca, 0xc6, 0xd7, 0xf0, 0x01,
+ 0x48, 0xc8, 0xc5, 0xd1, 0x61, 0x0f, 0xb3, 0x61, 0xd7, 0x2b, 0x31, 0x0f,
+ 0xc5, 0x30, 0xcb, 0x83, 0x05, 0x01, 0x0f, 0x01, 0x46, 0x02, 0x92, 0x42,
+ 0x61, 0xce, 0x42, 0x00, 0x93, 0xc2, 0x61, 0xda, 0xcf, 0x60, 0xb0, 0x0f,
+ 0xc2, 0x80, 0x03, 0xc2, 0x61, 0xe6, 0x45, 0x11, 0x8e, 0x42, 0x61, 0xf2,
+ 0xcc, 0x81, 0xcf, 0x01, 0x0e, 0x39, 0xcb, 0x9a, 0x60, 0x0f, 0xd7, 0xb8,
+ 0x45, 0x04, 0x92, 0xc2, 0x61, 0xfe, 0xd8, 0x21, 0x8c, 0x0f, 0xc5, 0x01,
+ 0xdf, 0x0d, 0x5c, 0x0f, 0xc5, 0x40, 0xd0, 0x55, 0xaa, 0x0f, 0xc1, 0xa1,
+ 0xe0, 0x09, 0xe7, 0x0f, 0xc5, 0x50, 0xca, 0x37, 0x20, 0x01, 0x13, 0x91,
+ 0xc5, 0x09, 0x02, 0x01, 0x13, 0x20, 0x4a, 0x35, 0x23, 0x42, 0x62, 0x0a,
+ 0xe0, 0x06, 0xe7, 0x01, 0x54, 0x58, 0x47, 0xcc, 0x68, 0xc2, 0x62, 0x19,
+ 0x53, 0x42, 0xc9, 0x42, 0x62, 0x25, 0xe0, 0x06, 0x47, 0x01, 0x54, 0x88,
+ 0xc2, 0x01, 0x0e, 0x00, 0xe2, 0x71, 0x83, 0x00, 0xe2, 0x68, 0xc2, 0x01,
+ 0x0e, 0x00, 0xe0, 0xc1, 0x83, 0x00, 0xe0, 0xb8, 0xc7, 0xc5, 0x29, 0x00,
+ 0xe1, 0xf0, 0xd2, 0x48, 0xf0, 0x0f, 0xbd, 0xa9, 0xc6, 0x12, 0x73, 0x0f,
+ 0xbd, 0x49, 0xc4, 0x01, 0x0e, 0x01, 0x2c, 0x88, 0x44, 0x00, 0x48, 0xc2,
+ 0x62, 0x2b, 0xc3, 0x13, 0xc8, 0x0f, 0xb4, 0x40, 0xe0, 0x0a, 0xe7, 0x01,
+ 0x3b, 0x90, 0x52, 0x10, 0x52, 0xc2, 0x62, 0x31, 0x44, 0x0d, 0x8f, 0x42,
+ 0x62, 0x3d, 0xd7, 0x27, 0xf5, 0x0f, 0xbe, 0x01, 0xd8, 0x25, 0x4c, 0x0f,
+ 0xbe, 0x90, 0xc7, 0x75, 0x94, 0x0f, 0xaf, 0x88, 0x83, 0x05, 0x26, 0xe9,
+ 0xc2, 0x01, 0x0e, 0x05, 0x26, 0xf0, 0x44, 0x59, 0xe2, 0xc2, 0x62, 0x49,
+ 0xc5, 0xdf, 0xf4, 0x05, 0x27, 0xc8, 0xc4, 0xb2, 0x9f, 0x00, 0x04, 0x50,
+ 0xd6, 0x2c, 0x69, 0x01, 0x50, 0xa1, 0x45, 0x01, 0xac, 0x42, 0x62, 0x67,
+ 0x24, 0xc2, 0x62, 0x73, 0x23, 0xc2, 0x62, 0x87, 0x42, 0xeb, 0x2c, 0xc2,
+ 0x62, 0xa3, 0x04, 0xc2, 0x62, 0xc3, 0xc4, 0xe8, 0xaf, 0x08, 0x30, 0xd9,
+ 0x1e, 0xc2, 0x62, 0xcb, 0x20, 0xc2, 0x62, 0xdd, 0x21, 0xc2, 0x62, 0xfd,
+ 0x22, 0x42, 0x63, 0x05, 0x42, 0x01, 0xb1, 0xc2, 0x63, 0x2d, 0x49, 0xb2,
+ 0x76, 0xc2, 0x63, 0x39, 0x4a, 0xa8, 0xf8, 0x42, 0x63, 0x43, 0xc4, 0x15,
+ 0xa7, 0x00, 0xca, 0x69, 0xc2, 0x22, 0x45, 0x00, 0xca, 0x60, 0xc3, 0x0d,
+ 0x8f, 0x00, 0xca, 0x59, 0xc3, 0x08, 0xde, 0x00, 0xca, 0x50, 0xc4, 0x05,
+ 0xde, 0x00, 0xca, 0x49, 0xc2, 0x0a, 0x20, 0x00, 0xca, 0x40, 0xc3, 0x18,
+ 0x29, 0x00, 0xca, 0x01, 0xc4, 0xe4, 0xff, 0x00, 0xc9, 0xd9, 0xc9, 0xab,
+ 0xad, 0x00, 0xc9, 0xd1, 0xc9, 0xac, 0x10, 0x00, 0xc9, 0xc8, 0xc2, 0x00,
+ 0x96, 0x00, 0xc9, 0xc1, 0xc2, 0x00, 0x9a, 0x00, 0xc9, 0xb9, 0xc2, 0x00,
+ 0x3f, 0x00, 0xc9, 0xb1, 0xc2, 0x07, 0x44, 0x00, 0xc9, 0xa9, 0x10, 0xc2,
+ 0x63, 0x4d, 0xc2, 0x00, 0x44, 0x00, 0xc9, 0x99, 0xc8, 0x10, 0xab, 0x00,
+ 0xc9, 0x91, 0xc2, 0x06, 0x6b, 0x00, 0xc9, 0x80, 0xc2, 0x00, 0x4c, 0x00,
+ 0xc9, 0x59, 0xc2, 0x00, 0x9a, 0x00, 0xc9, 0x51, 0xc2, 0x1a, 0x36, 0x00,
+ 0xc9, 0x48, 0x91, 0x00, 0xc9, 0x43, 0x02, 0x63, 0x57, 0x87, 0x00, 0xc9,
+ 0x3b, 0x02, 0x63, 0x5b, 0x83, 0x00, 0xc9, 0x03, 0x02, 0x63, 0x5f, 0x97,
+ 0x00, 0xc9, 0x11, 0x8b, 0x00, 0xc9, 0x08, 0xc2, 0x00, 0x9a, 0x00, 0xc8,
+ 0xf1, 0xc2, 0x01, 0x0e, 0x00, 0xc8, 0x61, 0x83, 0x00, 0xc8, 0x58, 0xc3,
+ 0x2c, 0x66, 0x00, 0xc8, 0xe9, 0xc2, 0x01, 0x0e, 0x00, 0xc8, 0x21, 0x83,
+ 0x00, 0xc8, 0x18, 0x83, 0x00, 0xc8, 0xd9, 0xc2, 0x0e, 0xe5, 0x00, 0xc8,
+ 0xd1, 0xc2, 0x01, 0x0e, 0x00, 0xc8, 0xc8, 0x90, 0x00, 0xc8, 0x50, 0xc2,
+ 0x01, 0x0e, 0x00, 0xc8, 0x99, 0x83, 0x00, 0xc8, 0x90, 0xc2, 0x01, 0x0e,
0x00, 0xc8, 0x89, 0x83, 0x00, 0xc8, 0x80, 0x83, 0x00, 0xc8, 0x79, 0xc2,
- 0x01, 0x29, 0x00, 0xc8, 0x28, 0xc2, 0x00, 0xa4, 0x00, 0xc8, 0x71, 0x83,
- 0x00, 0xc8, 0x68, 0xc2, 0x00, 0xa4, 0x00, 0xc8, 0x49, 0x83, 0x00, 0xc8,
- 0x40, 0xc2, 0x00, 0xa4, 0x00, 0xc8, 0x39, 0x83, 0x00, 0xc8, 0x30, 0xc2,
- 0x00, 0xa4, 0x00, 0xc8, 0x11, 0x83, 0x00, 0xc8, 0x08, 0x45, 0xdf, 0x39,
- 0xc2, 0x63, 0xb1, 0x44, 0x86, 0x99, 0x42, 0x63, 0xbd, 0xc6, 0x07, 0x09,
- 0x0f, 0xbf, 0x29, 0xc6, 0x01, 0x7a, 0x0f, 0xa9, 0xa0, 0xc6, 0x01, 0x7a,
- 0x0f, 0xbf, 0x11, 0xc6, 0x07, 0x09, 0x0f, 0xbf, 0x48, 0x43, 0x00, 0x8c,
- 0xc2, 0x63, 0xcf, 0x46, 0x16, 0xed, 0x42, 0x63, 0xdb, 0x43, 0x01, 0x47,
- 0xc2, 0x63, 0xed, 0xdb, 0x17, 0x32, 0x01, 0x57, 0xe0, 0xc6, 0x01, 0x7a,
- 0x0f, 0xbf, 0x09, 0xc6, 0x07, 0x09, 0x0f, 0xbf, 0x40, 0xc6, 0x01, 0x7a,
- 0x0f, 0xbf, 0x19, 0xc6, 0x07, 0x09, 0x0f, 0xbf, 0x50, 0x46, 0x02, 0x91,
- 0xc2, 0x63, 0xf9, 0x48, 0x19, 0x70, 0x42, 0x64, 0xaf, 0xcd, 0x7c, 0xac,
- 0x00, 0xeb, 0xf1, 0xcd, 0x80, 0xaf, 0x00, 0xeb, 0xd8, 0xc4, 0x73, 0xed,
- 0x01, 0x04, 0xa0, 0x96, 0x00, 0xe8, 0xdb, 0x02, 0x64, 0xcb, 0x8e, 0x00,
- 0x14, 0xfb, 0x02, 0x64, 0xd1, 0x87, 0x00, 0xe8, 0x3b, 0x02, 0x64, 0xd7,
+ 0x07, 0x69, 0x00, 0xc8, 0x28, 0xc2, 0x01, 0x0e, 0x00, 0xc8, 0x71, 0x83,
+ 0x00, 0xc8, 0x68, 0xc2, 0x01, 0x0e, 0x00, 0xc8, 0x49, 0x83, 0x00, 0xc8,
+ 0x40, 0xc2, 0x01, 0x0e, 0x00, 0xc8, 0x39, 0x83, 0x00, 0xc8, 0x30, 0xc2,
+ 0x01, 0x0e, 0x00, 0xc8, 0x11, 0x83, 0x00, 0xc8, 0x08, 0x45, 0xe1, 0xde,
+ 0xc2, 0x63, 0x67, 0x44, 0x8d, 0xb5, 0x42, 0x63, 0x73, 0xc6, 0x01, 0xe9,
+ 0x0f, 0xbf, 0x29, 0xc6, 0x03, 0xfa, 0x0f, 0xa9, 0xa0, 0xc6, 0x03, 0xfa,
+ 0x0f, 0xbf, 0x11, 0xc6, 0x01, 0xe9, 0x0f, 0xbf, 0x48, 0x43, 0x03, 0x5f,
+ 0xc2, 0x63, 0x85, 0x46, 0x17, 0x56, 0x42, 0x63, 0x91, 0x43, 0x0a, 0x20,
+ 0xc2, 0x63, 0xa3, 0xdb, 0x15, 0xd0, 0x01, 0x57, 0xe0, 0xc6, 0x03, 0xfa,
+ 0x0f, 0xbf, 0x09, 0xc6, 0x01, 0xe9, 0x0f, 0xbf, 0x40, 0xc6, 0x03, 0xfa,
+ 0x0f, 0xbf, 0x19, 0xc6, 0x01, 0xe9, 0x0f, 0xbf, 0x50, 0x46, 0x02, 0x00,
+ 0xc2, 0x63, 0xaf, 0x48, 0x1b, 0x0d, 0x42, 0x64, 0x65, 0xcd, 0x7c, 0xc7,
+ 0x00, 0xeb, 0xf1, 0xcd, 0x78, 0x1b, 0x00, 0xeb, 0xd8, 0xc4, 0x6e, 0x0c,
+ 0x01, 0x04, 0xa0, 0x96, 0x00, 0xe8, 0xdb, 0x02, 0x64, 0x81, 0x8e, 0x00,
+ 0x14, 0xfb, 0x02, 0x64, 0x87, 0x87, 0x00, 0xe8, 0x3b, 0x02, 0x64, 0x8d,
0x9c, 0x00, 0xe9, 0x11, 0x99, 0x00, 0xe9, 0x09, 0x98, 0x00, 0xe9, 0x01,
- 0x97, 0x00, 0xe8, 0xe1, 0x94, 0x00, 0x14, 0x03, 0x02, 0x64, 0xe3, 0x92,
- 0x00, 0xe8, 0xc1, 0x91, 0x00, 0xe8, 0x7b, 0x02, 0x64, 0xf5, 0x8f, 0x00,
+ 0x97, 0x00, 0xe8, 0xe1, 0x94, 0x00, 0x14, 0x03, 0x02, 0x64, 0x99, 0x92,
+ 0x00, 0xe8, 0xc1, 0x91, 0x00, 0xe8, 0x7b, 0x02, 0x64, 0xab, 0x8f, 0x00,
0xe8, 0x69, 0x8d, 0x00, 0xe8, 0x59, 0x8c, 0x00, 0xe8, 0x51, 0x86, 0x00,
- 0xe8, 0x29, 0x85, 0x00, 0xe8, 0x21, 0x84, 0x00, 0x14, 0xcb, 0x02, 0x65,
- 0x03, 0x83, 0x00, 0xe8, 0x03, 0x02, 0x65, 0x09, 0x89, 0x00, 0x13, 0x13,
- 0x02, 0x65, 0x0d, 0x8b, 0x00, 0x13, 0x53, 0x02, 0x65, 0x13, 0x90, 0x00,
+ 0xe8, 0x29, 0x85, 0x00, 0xe8, 0x21, 0x84, 0x00, 0x14, 0xcb, 0x02, 0x64,
+ 0xb9, 0x83, 0x00, 0xe8, 0x03, 0x02, 0x64, 0xbf, 0x89, 0x00, 0x13, 0x13,
+ 0x02, 0x64, 0xc3, 0x8b, 0x00, 0x13, 0x53, 0x02, 0x64, 0xc9, 0x90, 0x00,
0x13, 0xa1, 0x9b, 0x00, 0x14, 0x79, 0x8a, 0x00, 0x14, 0xe1, 0x88, 0x05,
- 0x39, 0x81, 0x95, 0x05, 0x39, 0x89, 0x93, 0x05, 0x3d, 0x78, 0xca, 0x46,
- 0x9d, 0x0e, 0xf8, 0x78, 0xc4, 0x02, 0x52, 0x0e, 0xf8, 0x71, 0xc6, 0x01,
- 0x01, 0x00, 0x0d, 0xf0, 0xd4, 0x02, 0x73, 0x0e, 0xf8, 0x50, 0xd8, 0x21,
- 0x78, 0x00, 0x15, 0x11, 0xc8, 0xb7, 0xd5, 0x00, 0x0d, 0x50, 0xc5, 0x02,
- 0x6e, 0x00, 0x14, 0xc1, 0xca, 0x53, 0x88, 0x00, 0x15, 0x60, 0x9b, 0x00,
- 0x02, 0xcb, 0x02, 0x65, 0x19, 0x8f, 0x00, 0x02, 0x6b, 0x02, 0x65, 0x25,
- 0x97, 0x00, 0x02, 0xab, 0x02, 0x65, 0x31, 0x91, 0x00, 0x02, 0x7b, 0x02,
- 0x65, 0x3b, 0x8b, 0x00, 0x02, 0x4b, 0x02, 0x65, 0x5f, 0x87, 0x00, 0x02,
- 0x2b, 0x02, 0x65, 0x75, 0x83, 0x00, 0x02, 0x0b, 0x02, 0x65, 0x9d, 0x95,
- 0x00, 0x02, 0x9b, 0x02, 0x65, 0xd3, 0x9c, 0x00, 0x02, 0xd3, 0x02, 0x65,
- 0xf5, 0x9a, 0x00, 0x02, 0xc3, 0x02, 0x65, 0xfb, 0x99, 0x00, 0x02, 0xbb,
- 0x02, 0x66, 0x01, 0x98, 0x00, 0x02, 0xb3, 0x02, 0x66, 0x0d, 0x96, 0x00,
- 0x02, 0xa3, 0x02, 0x66, 0x29, 0x94, 0x00, 0x02, 0x93, 0x02, 0x66, 0x4e,
- 0x92, 0x00, 0x02, 0x83, 0x02, 0x66, 0x5e, 0x90, 0x00, 0x02, 0x73, 0x02,
- 0x66, 0x64, 0x8e, 0x00, 0x02, 0x63, 0x02, 0x66, 0x6e, 0x8d, 0x00, 0x02,
- 0x5b, 0x02, 0x66, 0x78, 0x8a, 0x00, 0x02, 0x43, 0x02, 0x66, 0x7e, 0x89,
- 0x00, 0x02, 0x3b, 0x02, 0x66, 0x96, 0x88, 0x00, 0x02, 0x33, 0x02, 0x66,
- 0xae, 0x86, 0x00, 0x02, 0x23, 0x02, 0x66, 0xb4, 0x85, 0x00, 0x02, 0x1b,
- 0x02, 0x66, 0xc1, 0x84, 0x00, 0x02, 0x13, 0x02, 0x66, 0xe2, 0x8c, 0x00,
- 0x02, 0x53, 0x02, 0x66, 0xf4, 0x93, 0x00, 0x02, 0x8a, 0x02, 0x66, 0xfa,
- 0xc2, 0x00, 0x0b, 0x00, 0x09, 0x91, 0xc2, 0x1b, 0xd8, 0x00, 0x0a, 0x90,
- 0x42, 0x01, 0x48, 0xc2, 0x67, 0x00, 0x43, 0xe7, 0x2a, 0x42, 0x67, 0x0c,
- 0xc3, 0x4a, 0xb1, 0x00, 0x74, 0x31, 0xc3, 0x1b, 0xb6, 0x00, 0x74, 0x49,
- 0xc3, 0xe7, 0x6c, 0x00, 0x74, 0x61, 0x10, 0xc2, 0x67, 0x18, 0x42, 0x02,
- 0x92, 0xc2, 0x67, 0x24, 0x06, 0xc2, 0x67, 0x2e, 0xc3, 0x22, 0x7b, 0x00,
- 0x75, 0x01, 0xc3, 0x14, 0x4e, 0x00, 0x75, 0x60, 0xc4, 0xb2, 0x72, 0x00,
- 0x74, 0xe1, 0xc3, 0x30, 0x10, 0x00, 0x74, 0xf0, 0xc3, 0x30, 0x10, 0x00,
- 0x74, 0x51, 0xc4, 0xb2, 0x72, 0x00, 0x75, 0x50, 0xc2, 0x00, 0xa4, 0x00,
- 0x75, 0x41, 0xc2, 0x0c, 0x65, 0x00, 0x75, 0x48, 0xc4, 0xb2, 0x72, 0x00,
- 0x74, 0xb1, 0xc3, 0x30, 0x10, 0x00, 0x74, 0xb8, 0xc2, 0x00, 0xb7, 0x00,
- 0x74, 0xe9, 0xc2, 0x0c, 0x81, 0x00, 0x74, 0xf8, 0xc3, 0x00, 0x34, 0x00,
- 0x75, 0x19, 0xc3, 0x63, 0x97, 0x00, 0x75, 0x28, 0xd1, 0x50, 0x73, 0x0f,
- 0xdc, 0xe9, 0xc2, 0x00, 0x54, 0x01, 0x2f, 0xc8, 0x55, 0x09, 0x0c, 0xc2,
- 0x67, 0x38, 0x48, 0x09, 0x13, 0xc2, 0x67, 0x4a, 0x4a, 0x13, 0x24, 0x42,
- 0x67, 0x56, 0xc6, 0x02, 0x21, 0x0f, 0xda, 0x91, 0xc5, 0x01, 0xf7, 0x0f,
- 0xda, 0x98, 0xd1, 0x50, 0x73, 0x0f, 0xdc, 0xe1, 0xc2, 0x00, 0x54, 0x01,
- 0x2f, 0xc0, 0xc6, 0x02, 0x21, 0x0f, 0xda, 0xb9, 0xc5, 0x01, 0xf7, 0x0f,
- 0xda, 0xc0, 0x55, 0x18, 0x10, 0xc2, 0x67, 0x62, 0x48, 0x09, 0x13, 0xc2,
- 0x67, 0x74, 0x4a, 0x13, 0x24, 0x42, 0x67, 0x80, 0xd5, 0x34, 0x78, 0x0f,
- 0xdc, 0xd1, 0xd0, 0x01, 0xd7, 0x0f, 0xdc, 0x00, 0xe0, 0x0a, 0xa7, 0x0f,
- 0xdb, 0x50, 0xe0, 0x0b, 0x47, 0x0f, 0xdc, 0x90, 0xe0, 0x06, 0x07, 0x0f,
- 0xdc, 0x88, 0xd9, 0x1d, 0x7a, 0x0f, 0xc4, 0xa9, 0xcb, 0x85, 0x85, 0x01,
- 0x0f, 0x5b, 0x02, 0x67, 0x8c, 0xc8, 0xb2, 0xf2, 0x01, 0x0f, 0x52, 0x02,
- 0x67, 0x92, 0xca, 0x03, 0x7d, 0x0f, 0xc4, 0x89, 0x48, 0x01, 0xef, 0x42,
- 0x67, 0x98, 0xd1, 0x53, 0xc5, 0x01, 0x4a, 0x49, 0xd8, 0x06, 0x4f, 0x01,
- 0x5f, 0x68, 0x45, 0x00, 0x6c, 0xc2, 0x67, 0xad, 0xdc, 0x13, 0x02, 0x01,
- 0x0e, 0x29, 0xc8, 0xb2, 0xf2, 0x01, 0x0d, 0x29, 0xc6, 0x11, 0xa5, 0x01,
- 0x48, 0x91, 0xda, 0x1b, 0x71, 0x0f, 0xdd, 0xc0, 0xc5, 0x01, 0x09, 0x01,
- 0x0d, 0xf9, 0x00, 0x42, 0x67, 0xdd, 0xc5, 0x01, 0x09, 0x01, 0x0d, 0xf1,
- 0x00, 0x42, 0x67, 0xef, 0xdb, 0x16, 0xfc, 0x01, 0x19, 0x21, 0xd2, 0x45,
- 0x06, 0x01, 0x5d, 0xc8, 0xd6, 0x31, 0x7f, 0x01, 0x52, 0x41, 0xcc, 0x08,
- 0x9b, 0x01, 0x52, 0x30, 0xca, 0xa5, 0x22, 0x01, 0x52, 0x29, 0xc7, 0x76,
- 0x66, 0x01, 0x52, 0x11, 0xca, 0x96, 0xbb, 0x01, 0x52, 0x08, 0xcf, 0x15,
- 0x8e, 0x0f, 0xbd, 0xf1, 0x42, 0x00, 0x9f, 0xc2, 0x67, 0xfb, 0x48, 0x08,
- 0xe9, 0x42, 0x68, 0x01, 0xc8, 0x00, 0xbf, 0x01, 0x3b, 0x11, 0xc6, 0x00,
- 0x71, 0x01, 0x3a, 0xb8, 0xc6, 0x01, 0x7a, 0x0f, 0xbc, 0x39, 0xd6, 0x2f,
- 0x17, 0x01, 0x36, 0xd9, 0xc6, 0x07, 0x09, 0x0f, 0xbc, 0x88, 0xdd, 0x10,
- 0xe0, 0x0f, 0xb3, 0xd9, 0xc5, 0x13, 0x58, 0x0f, 0xbd, 0x60, 0x4e, 0x49,
- 0x38, 0xc2, 0x68, 0x13, 0x45, 0x1f, 0xc8, 0x42, 0x68, 0x1f, 0x45, 0x01,
- 0x94, 0xc2, 0x68, 0x2b, 0x42, 0x02, 0x6c, 0x42, 0x68, 0x37, 0x49, 0x01,
- 0x8a, 0xc2, 0x68, 0x43, 0xc5, 0x01, 0x0f, 0x01, 0x3c, 0xd0, 0xc3, 0xe6,
- 0xf7, 0x0f, 0xb3, 0x21, 0xc9, 0xac, 0x43, 0x0f, 0xb2, 0xe0, 0xc9, 0x90,
- 0x34, 0x0f, 0xaa, 0x39, 0xca, 0xa7, 0x66, 0x01, 0x5a, 0xa8, 0x48, 0x02,
- 0xe9, 0xc2, 0x68, 0x4f, 0x00, 0x42, 0x68, 0x55, 0x50, 0x01, 0x89, 0xc2,
- 0x68, 0x61, 0x51, 0x01, 0x09, 0x42, 0x68, 0x6d, 0xd7, 0x28, 0x0f, 0x01,
- 0x3d, 0xd9, 0x46, 0x0b, 0x6f, 0x42, 0x68, 0x79, 0xca, 0x21, 0x3e, 0x0f,
- 0xbe, 0x99, 0xcd, 0x0e, 0x9f, 0x0f, 0xbe, 0xa0, 0x4b, 0x17, 0x9e, 0xc2,
- 0x68, 0x85, 0x00, 0x42, 0x68, 0x97, 0xe0, 0x09, 0x27, 0x01, 0x3d, 0x70,
- 0xd5, 0x03, 0x72, 0x0f, 0xc0, 0xc9, 0xdb, 0x17, 0xef, 0x0f, 0xc0, 0xe8,
- 0xe0, 0x08, 0xe7, 0x01, 0x3d, 0x40, 0xce, 0x70, 0x7f, 0x01, 0x3a, 0x31,
- 0xc7, 0xa2, 0x41, 0x01, 0x38, 0xa0, 0x46, 0x00, 0x6b, 0xc2, 0x68, 0xa3,
- 0xc9, 0xb2, 0x97, 0x01, 0x5a, 0xc8, 0xe0, 0x01, 0xe7, 0x01, 0x3d, 0x00,
- 0x45, 0x06, 0xf3, 0xc2, 0x68, 0xaf, 0xc9, 0x97, 0xf0, 0x0f, 0xa5, 0x91,
- 0x53, 0x01, 0x07, 0x42, 0x68, 0xbb, 0xcb, 0x01, 0x09, 0x01, 0x3c, 0xcb,
- 0x02, 0x68, 0xc7, 0x50, 0x01, 0x89, 0x42, 0x68, 0xcd, 0xc3, 0x01, 0xb4,
- 0x0f, 0xc4, 0xe3, 0x02, 0x68, 0xd9, 0xca, 0x9d, 0x16, 0x0f, 0xc4, 0xe8,
- 0xcf, 0x15, 0x8e, 0x0f, 0xbd, 0x91, 0xd2, 0x21, 0x36, 0x0f, 0xbe, 0x50,
- 0xc6, 0x79, 0x9a, 0x0f, 0xa4, 0xe9, 0xc5, 0x01, 0x0f, 0x0f, 0xa4, 0xc1,
- 0xcf, 0x66, 0x6e, 0x0f, 0x9c, 0xa0, 0x9e, 0x0d, 0x85, 0x41, 0x9d, 0x0d,
- 0x85, 0x38, 0x9e, 0x0d, 0x81, 0x09, 0x9d, 0x0d, 0x81, 0x00, 0xcd, 0x7d,
- 0xca, 0x07, 0xd8, 0xf9, 0x47, 0x06, 0xf1, 0xc2, 0x68, 0xdd, 0xc7, 0xc5,
- 0x93, 0x00, 0x2f, 0x88, 0x46, 0x00, 0x6b, 0x42, 0x68, 0xe9, 0x46, 0x00,
- 0x6b, 0x42, 0x68, 0xf5, 0x46, 0x00, 0x6b, 0x42, 0x69, 0x01, 0x46, 0x00,
- 0x6b, 0x42, 0x69, 0x0d, 0xc2, 0x07, 0x8d, 0x00, 0x2f, 0x53, 0x02, 0x69,
- 0x19, 0xc4, 0xd8, 0x4b, 0x00, 0x2f, 0x33, 0x02, 0x69, 0x1f, 0xc2, 0x00,
- 0x49, 0x00, 0x2e, 0xc2, 0x02, 0x69, 0x25, 0xc3, 0x0d, 0xd9, 0x00, 0x2f,
- 0x4b, 0x02, 0x69, 0x2b, 0xc5, 0xd9, 0x76, 0x00, 0x2f, 0x0a, 0x02, 0x69,
- 0x31, 0xcc, 0x8c, 0x14, 0x07, 0xda, 0x40, 0xcc, 0x8c, 0x14, 0x07, 0xda,
- 0x38, 0xc2, 0x00, 0x27, 0x00, 0x2f, 0x1b, 0x02, 0x69, 0x37, 0xc3, 0xbf,
- 0x5a, 0x00, 0x2e, 0xd3, 0x02, 0x69, 0x3d, 0xc5, 0xd8, 0x4a, 0x00, 0x2f,
- 0x29, 0xc3, 0x1f, 0xd8, 0x00, 0x2e, 0xf9, 0xc3, 0x0b, 0x0e, 0x00, 0x2e,
- 0xe8, 0xcc, 0x8c, 0x14, 0x07, 0xda, 0x00, 0xcc, 0x8c, 0x14, 0x07, 0xd9,
- 0xf0, 0xcc, 0x8c, 0x14, 0x07, 0xd9, 0xe0, 0x46, 0x00, 0x6b, 0x42, 0x69,
- 0x43, 0xcc, 0x8c, 0x14, 0x07, 0xd9, 0xb0, 0xcb, 0x9a, 0xae, 0x07, 0xd9,
- 0xa1, 0x96, 0x00, 0x2e, 0xb8, 0xcc, 0x8c, 0x14, 0x07, 0xd9, 0x98, 0xcc,
- 0x8c, 0x14, 0x07, 0xd9, 0x90, 0x0e, 0xc2, 0x69, 0x4f, 0xc3, 0x15, 0x1d,
- 0x00, 0x2f, 0x10, 0xc3, 0x26, 0x29, 0x07, 0xd9, 0x41, 0xc4, 0x5d, 0xe2,
- 0x07, 0xd9, 0x39, 0xc9, 0xb4, 0x50, 0x07, 0xd9, 0x31, 0xc5, 0xa3, 0xa1,
- 0x07, 0xd9, 0x29, 0xc3, 0xbf, 0x5a, 0x07, 0xd9, 0x21, 0xc2, 0x01, 0xf0,
- 0x07, 0xd9, 0x19, 0xc5, 0x45, 0xf6, 0x07, 0xd9, 0x11, 0xc4, 0x08, 0x1a,
- 0x07, 0xd9, 0x08, 0xc5, 0xcc, 0xda, 0x00, 0x2d, 0xc3, 0x02, 0x69, 0x5e,
- 0xc5, 0xd5, 0xca, 0x00, 0x2d, 0xd8, 0xc6, 0x44, 0x67, 0x00, 0x2e, 0x11,
- 0x0a, 0xc2, 0x69, 0x64, 0xc4, 0xda, 0xb2, 0x00, 0x2d, 0xb0, 0xc4, 0x6c,
- 0x9b, 0x00, 0x2d, 0xcb, 0x02, 0x69, 0x70, 0xc4, 0xd6, 0x2f, 0x00, 0x2d,
- 0xa1, 0x45, 0xdd, 0x9f, 0x42, 0x69, 0x76, 0xc6, 0xce, 0xe9, 0x00, 0x2f,
- 0xa1, 0xc3, 0x13, 0xfc, 0x00, 0x2f, 0x98, 0xc3, 0xe6, 0x7f, 0x00, 0x2c,
- 0xc1, 0x44, 0xe5, 0x43, 0x42, 0x69, 0x88, 0x46, 0xd3, 0xcf, 0xc2, 0x69,
- 0x94, 0xc3, 0x1e, 0x23, 0x00, 0x2c, 0xd8, 0xc7, 0xc8, 0xb1, 0x00, 0x2c,
- 0xe8, 0xc7, 0xc2, 0x44, 0x00, 0x2d, 0x30, 0xce, 0x6d, 0x53, 0x02, 0x6e,
- 0x01, 0xcc, 0x82, 0xf0, 0x02, 0x6e, 0xe9, 0xc7, 0xc6, 0x3b, 0x02, 0x6f,
- 0x88, 0x14, 0xc2, 0x69, 0xa0, 0xcc, 0x87, 0x10, 0x02, 0x6e, 0xe0, 0xc3,
- 0x0a, 0x93, 0x02, 0x6f, 0x79, 0xc7, 0xc2, 0x59, 0x02, 0x6f, 0xb8, 0x12,
- 0xc2, 0x69, 0xac, 0xc6, 0xd2, 0x55, 0x02, 0x6e, 0xc8, 0xc7, 0xca, 0xef,
- 0x01, 0x5e, 0x19, 0xc7, 0xcb, 0x19, 0x01, 0x59, 0x18, 0xc7, 0x33, 0x21,
- 0x00, 0x00, 0x4b, 0x02, 0x69, 0xb6, 0xc4, 0x3b, 0x42, 0x01, 0x5b, 0xf0,
- 0x95, 0x0f, 0x9e, 0xc0, 0xc4, 0x18, 0x83, 0x08, 0x69, 0xb9, 0xc2, 0x26,
- 0x51, 0x08, 0x69, 0xb0, 0xc3, 0x0c, 0x5b, 0x08, 0x69, 0xa9, 0xc3, 0x06,
- 0x9e, 0x08, 0x69, 0xa0, 0xc4, 0x04, 0x5e, 0x08, 0x69, 0x99, 0xc2, 0x01,
- 0x47, 0x08, 0x69, 0x90, 0xc3, 0x05, 0xd6, 0x08, 0x69, 0x39, 0xc2, 0x00,
- 0xc1, 0x08, 0x69, 0x31, 0xc4, 0x6d, 0xa8, 0x08, 0x69, 0x28, 0xc2, 0x1d,
- 0x5f, 0x08, 0x68, 0xd9, 0xc2, 0x01, 0x29, 0x08, 0x68, 0xd1, 0x83, 0x08,
- 0x68, 0xa8, 0x45, 0xdd, 0xd1, 0xc2, 0x69, 0xba, 0x83, 0x08, 0x68, 0x89,
- 0xc2, 0x00, 0xa4, 0x08, 0x68, 0x40, 0xc2, 0x02, 0x59, 0x08, 0x68, 0x69,
- 0x83, 0x08, 0x68, 0x60, 0xc2, 0x0b, 0xc6, 0x08, 0x68, 0x59, 0x83, 0x08,
- 0x68, 0x50, 0xc2, 0x00, 0xb3, 0x08, 0x68, 0x21, 0x83, 0x08, 0x68, 0x18,
- 0x83, 0x08, 0x68, 0x79, 0xc2, 0x00, 0xa4, 0x08, 0x68, 0x80, 0x83, 0x00,
- 0xb9, 0x41, 0xc2, 0x01, 0x29, 0x00, 0xb9, 0x28, 0xc5, 0xd6, 0x3d, 0x00,
- 0x88, 0x2b, 0x02, 0x69, 0xc6, 0x15, 0xc2, 0x69, 0xca, 0xc5, 0x98, 0x41,
- 0x00, 0x88, 0x93, 0x02, 0x69, 0xd9, 0x12, 0xc2, 0x69, 0xdf, 0xc5, 0xbb,
- 0xa0, 0x00, 0x88, 0x5b, 0x02, 0x69, 0xf7, 0xc5, 0xdb, 0x51, 0x00, 0x88,
- 0x33, 0x02, 0x69, 0xfb, 0x16, 0xc2, 0x69, 0xff, 0x0d, 0xc2, 0x6a, 0x0e,
- 0xc5, 0xd9, 0x80, 0x00, 0x88, 0x13, 0x02, 0x6a, 0x23, 0x05, 0xc2, 0x6a,
- 0x27, 0x42, 0x05, 0xd0, 0xc2, 0x6a, 0x3c, 0xc6, 0x94, 0x2b, 0x00, 0x8a,
- 0xf8, 0x49, 0xb2, 0x73, 0xc2, 0x6a, 0x48, 0x49, 0xb0, 0xf0, 0x42, 0x6a,
- 0x7f, 0x0d, 0xc2, 0x6a, 0xc6, 0x15, 0xc2, 0x6a, 0xdb, 0xc5, 0xd9, 0x80,
- 0x01, 0x89, 0xa3, 0x02, 0x6a, 0xea, 0x16, 0xc2, 0x6a, 0xee, 0xc5, 0xd6,
- 0x3d, 0x01, 0x89, 0xcb, 0x02, 0x6a, 0xfa, 0xc5, 0xdb, 0x51, 0x01, 0x8a,
- 0x0b, 0x02, 0x6a, 0xfe, 0x12, 0xc2, 0x6b, 0x02, 0x8b, 0x01, 0x8b, 0x1b,
- 0x02, 0x6b, 0x17, 0x05, 0xc2, 0x6b, 0x1d, 0xc5, 0x98, 0x41, 0x01, 0x8a,
- 0x71, 0x83, 0x01, 0x8a, 0x7b, 0x02, 0x6b, 0x29, 0x1b, 0xc2, 0x6b, 0x36,
- 0x87, 0x01, 0x8a, 0xa3, 0x02, 0x6b, 0x56, 0x91, 0x01, 0x8a, 0xbb, 0x02,
- 0x6b, 0x5e, 0x19, 0xc2, 0x6b, 0x62, 0x97, 0x01, 0x8a, 0xe0, 0x19, 0xc2,
- 0x6b, 0x74, 0x0a, 0xc2, 0x6b, 0x7e, 0xc2, 0x00, 0x4d, 0x01, 0x81, 0xc0,
- 0xc3, 0x06, 0x9e, 0x01, 0x81, 0x21, 0xc3, 0x0c, 0x5b, 0x01, 0x81, 0x28,
- 0xc2, 0x26, 0x51, 0x01, 0x81, 0x31, 0xc4, 0x18, 0x83, 0x01, 0x81, 0x38,
- 0xc8, 0x0c, 0x4a, 0x08, 0x47, 0xf8, 0xc5, 0x2a, 0x13, 0x08, 0x47, 0xf1,
- 0xc2, 0x00, 0x4d, 0x08, 0x47, 0xe8, 0xc2, 0x26, 0xfa, 0x08, 0x47, 0xa9,
- 0xc3, 0x1a, 0xba, 0x08, 0x47, 0x40, 0xc3, 0x0d, 0xd9, 0x08, 0x47, 0xa1,
- 0x03, 0x42, 0x6b, 0x8a, 0xc2, 0x17, 0x51, 0x08, 0x47, 0x79, 0xc4, 0x37,
- 0x5c, 0x08, 0x47, 0x00, 0xc2, 0x00, 0x6e, 0x08, 0x47, 0x38, 0x19, 0xc2,
- 0x6b, 0x96, 0x15, 0xc2, 0x6b, 0x9e, 0x83, 0x07, 0xfb, 0x89, 0x8b, 0x07,
- 0xfb, 0x91, 0x97, 0x07, 0xfb, 0x99, 0x87, 0x07, 0xfb, 0xa1, 0x91, 0x07,
- 0xfb, 0xa9, 0x0d, 0xc2, 0x6b, 0xb8, 0x16, 0xc2, 0x6b, 0xcc, 0x90, 0x07,
- 0xfc, 0xeb, 0x02, 0x6b, 0xe0, 0x0a, 0xc2, 0x6b, 0xf4, 0x0f, 0xc2, 0x6c,
- 0x08, 0x1b, 0xc2, 0x6c, 0x1c, 0x14, 0x42, 0x6c, 0x28, 0xc5, 0x92, 0x32,
- 0x07, 0xfd, 0x0b, 0x02, 0x6c, 0x3c, 0xc6, 0xc0, 0x37, 0x07, 0xfd, 0xd8,
- 0x44, 0x29, 0x95, 0xc2, 0x6c, 0x42, 0xc3, 0x3c, 0x08, 0x07, 0xfd, 0xa8,
- 0x02, 0x42, 0x6c, 0x60, 0xc4, 0x7a, 0x93, 0x07, 0xfd, 0x93, 0x02, 0x6c,
- 0x82, 0xc6, 0xc1, 0x07, 0x07, 0xfd, 0xe8, 0xc4, 0xbb, 0xa1, 0x07, 0xfd,
- 0xb8, 0xc4, 0xc7, 0x2b, 0x07, 0xfd, 0xc1, 0xc6, 0xc7, 0x2a, 0x07, 0xfd,
- 0xd0, 0xc6, 0xc6, 0xf2, 0x07, 0xfd, 0xe1, 0xc5, 0xc8, 0x2e, 0x07, 0xfd,
- 0x38, 0x87, 0x07, 0xfe, 0x18, 0x83, 0x07, 0xfe, 0x23, 0x02, 0x6c, 0x88,
- 0x87, 0x07, 0xfe, 0x5b, 0x02, 0x6c, 0x8c, 0x91, 0x07, 0xfe, 0x91, 0x97,
- 0x07, 0xfe, 0xb9, 0x8b, 0x07, 0xfe, 0xd8, 0x91, 0x07, 0xfe, 0x31, 0x97,
- 0x07, 0xfe, 0xd0, 0x87, 0x07, 0xfe, 0x78, 0x83, 0x07, 0xfe, 0x6b, 0x02,
- 0x6c, 0x90, 0x87, 0x07, 0xfe, 0xab, 0x02, 0x6c, 0x94, 0x8b, 0x07, 0xfe,
- 0xb0, 0x02, 0x42, 0x6c, 0x98, 0x0d, 0xc2, 0x6c, 0xa4, 0x19, 0xc2, 0x6c,
- 0xb0, 0x83, 0x01, 0x82, 0x09, 0x8b, 0x01, 0x82, 0x19, 0x97, 0x01, 0x82,
- 0x29, 0x87, 0x01, 0x82, 0x39, 0x91, 0x01, 0x82, 0x49, 0xc2, 0x00, 0x16,
- 0x01, 0x83, 0x19, 0x1b, 0x42, 0x6c, 0xc0, 0xc2, 0x05, 0xd0, 0x0d, 0x80,
- 0x09, 0xc2, 0x13, 0x1d, 0x0d, 0x88, 0xf8, 0xcd, 0x7e, 0x4c, 0x0f, 0xdc,
- 0xb1, 0xc5, 0x00, 0xb9, 0x0f, 0xdd, 0x88, 0xe0, 0x01, 0x07, 0x0f, 0xdd,
- 0xa0, 0xc5, 0x61, 0x8c, 0x01, 0x11, 0xf1, 0xc9, 0xaa, 0x66, 0x01, 0x72,
- 0x2a, 0x02, 0x6c, 0xcc, 0xc6, 0xd0, 0x8d, 0x07, 0xff, 0xc9, 0xc9, 0x19,
- 0xf5, 0x07, 0xff, 0xd1, 0xca, 0x7d, 0xa6, 0x07, 0xff, 0xd8, 0x43, 0x13,
- 0x72, 0xc2, 0x6c, 0xd2, 0x46, 0x00, 0x95, 0xc2, 0x6c, 0xd8, 0x45, 0x00,
- 0x6c, 0x42, 0x6c, 0xe4, 0x42, 0x01, 0xbd, 0xc2, 0x6c, 0xf6, 0xc7, 0x76,
- 0x66, 0x01, 0x50, 0xd9, 0xcc, 0x08, 0x9b, 0x01, 0x50, 0xc9, 0xca, 0x9b,
- 0xd6, 0x01, 0x50, 0xc1, 0xd9, 0x1d, 0xc5, 0x01, 0x50, 0xb9, 0xcd, 0x77,
- 0x57, 0x01, 0x50, 0x70, 0xd6, 0x2d, 0x5f, 0x01, 0x50, 0xa9, 0xd1, 0x4f,
- 0x41, 0x01, 0x50, 0x78, 0xc3, 0x01, 0xb4, 0x08, 0x5b, 0xc3, 0x02, 0x6d,
- 0x02, 0x16, 0xc2, 0x6d, 0x06, 0xc4, 0x06, 0x9d, 0x08, 0x5b, 0xd8, 0x16,
- 0xc2, 0x6d, 0x16, 0x15, 0xc2, 0x6d, 0x22, 0xc2, 0x00, 0x27, 0x08, 0x5b,
- 0x79, 0xc3, 0x1f, 0xd8, 0x08, 0x5b, 0x69, 0xc8, 0xbf, 0xb5, 0x08, 0x5b,
- 0x61, 0xc6, 0xd0, 0x5d, 0x08, 0x5b, 0x59, 0xc4, 0xe2, 0x57, 0x08, 0x5b,
- 0x51, 0xc4, 0x4b, 0x98, 0x08, 0x5b, 0x49, 0xc2, 0x01, 0xf0, 0x08, 0x5b,
- 0x23, 0x02, 0x6d, 0x2c, 0xc5, 0x4b, 0x92, 0x08, 0x5b, 0x31, 0xcd, 0x78,
- 0xa9, 0x08, 0x5b, 0x29, 0xc6, 0x45, 0xf6, 0x08, 0x5b, 0x19, 0xc5, 0xa1,
- 0x94, 0x08, 0x5b, 0x11, 0xc4, 0xe4, 0x8f, 0x08, 0x5b, 0x09, 0xc5, 0xa8,
- 0xf1, 0x08, 0x5b, 0x00, 0xc3, 0x01, 0xb4, 0x08, 0x5a, 0xc3, 0x02, 0x6d,
- 0x32, 0x16, 0xc2, 0x6d, 0x36, 0xc4, 0x06, 0x9d, 0x08, 0x5a, 0xd8, 0x16,
- 0xc2, 0x6d, 0x46, 0x15, 0xc2, 0x6d, 0x52, 0xc4, 0x5d, 0xe2, 0x08, 0x5a,
- 0x99, 0xc3, 0x0b, 0x0e, 0x08, 0x5a, 0x61, 0xc6, 0xd0, 0x5d, 0x08, 0x5a,
- 0x59, 0xc4, 0xe2, 0x57, 0x08, 0x5a, 0x51, 0xc4, 0x4b, 0x98, 0x08, 0x5a,
- 0x49, 0xc2, 0x01, 0xf0, 0x08, 0x5a, 0x23, 0x02, 0x6d, 0x5c, 0xc5, 0x4b,
- 0x92, 0x08, 0x5a, 0x31, 0xc3, 0x78, 0xa9, 0x08, 0x5a, 0x29, 0xc6, 0x45,
- 0xf6, 0x08, 0x5a, 0x19, 0xc5, 0xa1, 0x94, 0x08, 0x5a, 0x11, 0xc4, 0xe4,
- 0x8f, 0x08, 0x5a, 0x09, 0x03, 0xc2, 0x6d, 0x62, 0xc3, 0x1f, 0xd8, 0x08,
- 0x5a, 0x69, 0xc2, 0x00, 0x27, 0x08, 0x5a, 0x81, 0xc4, 0xbf, 0xb9, 0x08,
- 0x5a, 0x90, 0xc3, 0x01, 0xb4, 0x00, 0x00, 0xf9, 0x16, 0xc2, 0x6d, 0x6e,
- 0xc4, 0x06, 0x9d, 0x00, 0x00, 0xe0, 0x4a, 0x0d, 0x47, 0xc2, 0x6d, 0x7a,
- 0x49, 0x40, 0x32, 0xc2, 0x6d, 0x84, 0xc5, 0xda, 0x9d, 0x0f, 0x65, 0x0b,
- 0x02, 0x6d, 0xa2, 0xc4, 0x3f, 0xb7, 0x0f, 0x64, 0xf3, 0x02, 0x6d, 0xa8,
- 0xc4, 0x22, 0x71, 0x0f, 0x63, 0xcb, 0x02, 0x6d, 0xae, 0xc5, 0x01, 0xdb,
- 0x0f, 0x63, 0xc3, 0x02, 0x6d, 0xbb, 0x15, 0xc2, 0x6d, 0xc6, 0x08, 0xc2,
- 0x6d, 0xd8, 0x16, 0xc2, 0x6d, 0xe0, 0xc3, 0x01, 0xb4, 0x0f, 0x63, 0x8a,
- 0x02, 0x6d, 0xf1, 0xce, 0x0a, 0xb3, 0x0f, 0x65, 0x79, 0x44, 0x01, 0xb4,
- 0x42, 0x6d, 0xf5, 0xc3, 0x0c, 0x5b, 0x0e, 0x9b, 0xb1, 0xc3, 0x06, 0x9e,
- 0x0e, 0x9b, 0xa8, 0xc4, 0x04, 0x5e, 0x0e, 0x9b, 0xa1, 0xc2, 0x01, 0x47,
- 0x0e, 0x9b, 0x98, 0x0c, 0xc2, 0x6e, 0x01, 0xc8, 0xba, 0xad, 0x01, 0x96,
- 0x09, 0x42, 0x00, 0xad, 0xc2, 0x6e, 0x0b, 0x03, 0xc2, 0x6e, 0x15, 0xc9,
- 0xb0, 0x72, 0x01, 0x96, 0x41, 0xc7, 0xca, 0xe8, 0x01, 0x96, 0x49, 0xc8,
- 0xb8, 0xdd, 0x01, 0x96, 0x51, 0x06, 0xc2, 0x6e, 0x21, 0x45, 0xde, 0xdf,
- 0x42, 0x6e, 0x2d, 0xc5, 0x01, 0xf7, 0x01, 0x7f, 0x81, 0xd0, 0x58, 0x72,
- 0x01, 0x7f, 0x90, 0xc5, 0x01, 0x62, 0x01, 0x7f, 0x89, 0xd0, 0x58, 0x52,
- 0x01, 0x7f, 0x98, 0xc5, 0x00, 0x95, 0x01, 0x7f, 0xa9, 0xc5, 0x01, 0x62,
- 0x01, 0x7f, 0xb1, 0x0e, 0xc2, 0x6e, 0x52, 0x46, 0x01, 0xc7, 0x42, 0x6e,
- 0x5e, 0xc8, 0xc1, 0x15, 0x01, 0x8c, 0xa1, 0xc8, 0xbb, 0xdd, 0x01, 0x8c,
- 0xd8, 0xc5, 0x00, 0xb9, 0x01, 0x8c, 0xa9, 0xc7, 0x34, 0xc5, 0x01, 0x8c,
- 0xe0, 0xc2, 0x00, 0x4d, 0x08, 0x42, 0xdb, 0x02, 0x6e, 0x6a, 0x19, 0xc2,
- 0x6e, 0x70, 0xc4, 0x04, 0x5e, 0x08, 0x42, 0xd0, 0x00, 0x42, 0x6e, 0x7a,
- 0xc2, 0x26, 0xfa, 0x08, 0x42, 0xa9, 0xc3, 0x1a, 0xba, 0x08, 0x42, 0x40,
- 0xc3, 0x0d, 0xd9, 0x08, 0x42, 0xa1, 0x03, 0x42, 0x6e, 0x86, 0xc3, 0x15,
- 0x1d, 0x08, 0x42, 0x79, 0xc4, 0x37, 0x5c, 0x08, 0x42, 0x00, 0xc2, 0x00,
- 0x6e, 0x08, 0x42, 0x38, 0xca, 0xa8, 0x10, 0x0f, 0xd2, 0x43, 0x02, 0x6e,
- 0x92, 0xc4, 0xe0, 0xaf, 0x01, 0x32, 0xb3, 0x02, 0x6e, 0x98, 0xc4, 0xe5,
- 0xdf, 0x01, 0x32, 0xcb, 0x02, 0x6e, 0x9e, 0x0d, 0xc2, 0x6e, 0xa4, 0xc6,
- 0xd1, 0xf5, 0x01, 0x32, 0xbb, 0x02, 0x6e, 0xb6, 0xc5, 0xb5, 0xaf, 0x01,
- 0x32, 0xab, 0x02, 0x6e, 0xbc, 0x47, 0x41, 0x9b, 0x42, 0x6e, 0xc2, 0x00,
- 0x42, 0x6e, 0xde, 0x46, 0x00, 0x6b, 0x42, 0x6e, 0xea, 0x03, 0xc2, 0x6e,
- 0xf6, 0xc5, 0xcb, 0x1b, 0x01, 0x59, 0x08, 0xc7, 0xc9, 0xbb, 0x01, 0x4e,
- 0xb1, 0xd0, 0x58, 0x42, 0x01, 0x59, 0x68, 0x00, 0x42, 0x6f, 0x05, 0x00,
- 0x42, 0x6f, 0x17, 0xca, 0x83, 0xbe, 0x01, 0x31, 0xd1, 0x44, 0x04, 0x75,
- 0x42, 0x6f, 0x26, 0xc9, 0x90, 0x34, 0x0f, 0xaa, 0x31, 0xca, 0xa7, 0x48,
- 0x01, 0x58, 0xe0, 0x00, 0xc2, 0x6f, 0x30, 0x4a, 0x01, 0x89, 0x42, 0x6f,
- 0x3c, 0xe0, 0x08, 0xa7, 0x0f, 0xbd, 0x00, 0x00, 0x42, 0x6f, 0x4e, 0xc4,
- 0x58, 0x66, 0x01, 0x36, 0x09, 0xc3, 0x14, 0x99, 0x01, 0x36, 0x00, 0x4a,
- 0x03, 0xfd, 0xc2, 0x6f, 0x66, 0x4a, 0x01, 0x89, 0x42, 0x6f, 0x78, 0x46,
- 0x01, 0xe9, 0xc2, 0x6f, 0x84, 0xc7, 0xca, 0x4e, 0x01, 0x1f, 0x10, 0x11,
- 0xc2, 0x6f, 0x8a, 0xc2, 0x01, 0x28, 0x01, 0x34, 0x82, 0x02, 0x6f, 0x96,
- 0xc4, 0x0e, 0xa8, 0x01, 0x39, 0x39, 0xc4, 0x12, 0x72, 0x01, 0x5e, 0x70,
- 0x4a, 0x03, 0xfd, 0xc2, 0x6f, 0x9c, 0x4a, 0x01, 0x89, 0x42, 0x6f, 0xa8,
- 0xc5, 0x08, 0x42, 0x01, 0x30, 0xe9, 0xce, 0x25, 0x12, 0x0f, 0xa2, 0x30,
- 0xc8, 0x01, 0xe7, 0x01, 0x2d, 0x9b, 0x02, 0x6f, 0xb8, 0xce, 0x73, 0x9d,
- 0x01, 0x2d, 0xa9, 0xc7, 0xc8, 0x4f, 0x0f, 0xde, 0x50, 0x15, 0xc2, 0x6f,
- 0xbe, 0xc7, 0x3f, 0x2e, 0x01, 0x59, 0x31, 0xc7, 0x08, 0xc0, 0x01, 0x59,
- 0x40, 0xc4, 0x2e, 0xc4, 0x0f, 0x9f, 0x89, 0xc5, 0xb9, 0x50, 0x01, 0x59,
- 0x00, 0xc9, 0x45, 0x0b, 0x01, 0x2d, 0x79, 0xc3, 0x00, 0xe8, 0x01, 0x57,
- 0xf1, 0xc7, 0x58, 0x4b, 0x01, 0x59, 0x78, 0xc4, 0x18, 0x83, 0x0f, 0x17,
- 0xb9, 0xc2, 0x26, 0x51, 0x0f, 0x17, 0xb0, 0xc3, 0x0c, 0x5b, 0x0f, 0x17,
- 0xa9, 0xc3, 0x06, 0x9e, 0x0f, 0x17, 0xa0, 0xc4, 0x04, 0x5e, 0x0f, 0x17,
- 0x99, 0xc2, 0x01, 0x47, 0x0f, 0x17, 0x90, 0xc2, 0x04, 0x6e, 0x0f, 0x17,
- 0x78, 0xc2, 0x04, 0x6e, 0x0f, 0x17, 0x68, 0xc2, 0x14, 0x44, 0x0f, 0x17,
- 0x59, 0x83, 0x0f, 0x16, 0x30, 0xc2, 0x00, 0x4d, 0x0f, 0x17, 0x50, 0xc2,
- 0x1d, 0x5f, 0x0f, 0x17, 0x49, 0xc2, 0x01, 0x29, 0x0f, 0x16, 0xe9, 0x83,
- 0x0f, 0x16, 0x48, 0x83, 0x0f, 0x16, 0x03, 0x02, 0x6f, 0xd0, 0xc2, 0x00,
- 0x35, 0x0f, 0x17, 0x21, 0x97, 0x0f, 0x16, 0xb0, 0x90, 0x0f, 0x17, 0x38,
- 0x90, 0x0f, 0x17, 0x32, 0x02, 0x6f, 0xd7, 0xc2, 0x00, 0x35, 0x0f, 0x17,
- 0x28, 0xc2, 0x02, 0x59, 0x0f, 0x17, 0x09, 0xc2, 0x0c, 0x65, 0x0f, 0x17,
- 0x01, 0xc2, 0x00, 0xa4, 0x0f, 0x16, 0x61, 0x83, 0x0f, 0x16, 0x58, 0xc3,
- 0x8c, 0x67, 0x0f, 0x16, 0xf9, 0x83, 0x0f, 0x16, 0x40, 0xc2, 0x00, 0xa4,
- 0x0f, 0x16, 0xc9, 0x83, 0x0f, 0x16, 0xa0, 0xc2, 0x00, 0xa4, 0x0f, 0x16,
- 0x79, 0x83, 0x0f, 0x16, 0x70, 0x83, 0x0f, 0x16, 0x51, 0xc2, 0x00, 0xa4,
- 0x0f, 0x16, 0x38, 0xc6, 0x18, 0x83, 0x08, 0xc7, 0x81, 0xc4, 0xcf, 0xf7,
- 0x08, 0xc7, 0x78, 0xc4, 0x43, 0xcc, 0x08, 0xc7, 0x71, 0xc4, 0x47, 0x9b,
- 0x08, 0xc7, 0x68, 0xc5, 0x0c, 0x54, 0x08, 0xc7, 0x61, 0xc5, 0x2a, 0x13,
- 0x08, 0xc7, 0x59, 0xc2, 0x00, 0x4d, 0x08, 0xc7, 0x50, 0xc4, 0x18, 0x83,
- 0x08, 0xc7, 0x39, 0xc2, 0x26, 0x51, 0x08, 0xc7, 0x30, 0xc3, 0x0c, 0x5b,
- 0x08, 0xc7, 0x29, 0xc3, 0x06, 0x9e, 0x08, 0xc7, 0x20, 0xc4, 0x04, 0x5e,
- 0x08, 0xc7, 0x19, 0xc2, 0x01, 0x47, 0x08, 0xc7, 0x10, 0xc2, 0x23, 0xb4,
- 0x08, 0xc6, 0xf1, 0xc3, 0xe7, 0x69, 0x08, 0xc6, 0xe8, 0xc2, 0x00, 0xbb,
- 0x08, 0xc6, 0xe1, 0x11, 0xc2, 0x6f, 0xdb, 0xc3, 0xbd, 0x1e, 0x08, 0xc6,
- 0xc8, 0x8f, 0x08, 0xc6, 0xb1, 0x96, 0x08, 0xc6, 0xa9, 0xc2, 0x00, 0x35,
- 0x08, 0xc6, 0x50, 0xc3, 0x3f, 0x7b, 0x08, 0xc6, 0x99, 0xc3, 0x57, 0x5c,
- 0x08, 0xc6, 0x00, 0xc2, 0x00, 0x52, 0x08, 0xc6, 0x88, 0x10, 0x42, 0x6f,
- 0xe7, 0x85, 0x08, 0xc6, 0x79, 0x97, 0x08, 0xc6, 0x38, 0x97, 0x08, 0xc6,
- 0x1b, 0x02, 0x6f, 0xef, 0x91, 0x08, 0xc6, 0x29, 0x83, 0x08, 0xc6, 0x20,
- 0xc2, 0x23, 0xb4, 0x08, 0xc5, 0xf1, 0xc3, 0xe7, 0x69, 0x08, 0xc5, 0xe8,
- 0xc2, 0x00, 0xbb, 0x08, 0xc5, 0xe1, 0x11, 0xc2, 0x6f, 0xf3, 0xc3, 0xbd,
- 0x1e, 0x08, 0xc5, 0xc8, 0x8f, 0x08, 0xc5, 0xb1, 0x96, 0x08, 0xc5, 0xa9,
- 0xc2, 0x00, 0x35, 0x08, 0xc5, 0x50, 0xc3, 0x3f, 0x7b, 0x08, 0xc5, 0x99,
- 0xc3, 0x57, 0x5c, 0x08, 0xc5, 0x00, 0xc2, 0x00, 0x52, 0x08, 0xc5, 0x88,
- 0x10, 0x42, 0x6f, 0xff, 0x85, 0x08, 0xc5, 0x79, 0x97, 0x08, 0xc5, 0x38,
- 0x97, 0x08, 0xc5, 0x1b, 0x02, 0x70, 0x07, 0x91, 0x08, 0xc5, 0x29, 0x83,
- 0x08, 0xc5, 0x20, 0xd3, 0x44, 0xf2, 0x01, 0x39, 0x29, 0x43, 0x00, 0xbf,
- 0x42, 0x70, 0x0b, 0xc4, 0x00, 0xba, 0x01, 0x02, 0xd9, 0xcb, 0x01, 0xbc,
- 0x01, 0x02, 0xc0, 0x12, 0xc2, 0x70, 0x11, 0xcc, 0x8c, 0x98, 0x0f, 0xc8,
- 0xa9, 0x16, 0xc2, 0x70, 0x23, 0x11, 0xc2, 0x70, 0x2f, 0xcf, 0x62, 0xcc,
- 0x0f, 0xb2, 0x29, 0xcc, 0x8a, 0x7c, 0x0f, 0xb2, 0x21, 0xd0, 0x5d, 0x12,
- 0x0f, 0xb0, 0xdb, 0x02, 0x70, 0x41, 0x42, 0x00, 0x79, 0xc2, 0x70, 0x47,
- 0xcf, 0x6b, 0xd2, 0x0f, 0xb1, 0x21, 0x0f, 0xc2, 0x70, 0x53, 0xdb, 0x15,
- 0x67, 0x0f, 0xc9, 0x59, 0xda, 0x1a, 0x1f, 0x0f, 0xcb, 0xa1, 0xce, 0x6d,
- 0xfb, 0x0f, 0xd7, 0x20, 0xcf, 0x34, 0x63, 0x01, 0x49, 0x61, 0xd0, 0x1f,
- 0xc2, 0x01, 0x49, 0x78, 0xc4, 0x22, 0x71, 0x07, 0xf8, 0xc9, 0xc4, 0x15,
- 0xd3, 0x07, 0xf8, 0x81, 0xc3, 0x01, 0xb4, 0x07, 0xf8, 0x89, 0x16, 0xc2,
- 0x70, 0x5f, 0x08, 0xc2, 0x70, 0x6b, 0x15, 0xc2, 0x70, 0x77, 0xc5, 0x01,
- 0xdb, 0x07, 0xf8, 0xc0, 0xc3, 0x0e, 0x41, 0x07, 0xf8, 0xd1, 0x42, 0x00,
- 0xac, 0x42, 0x70, 0x83, 0xcc, 0x86, 0xd4, 0x07, 0xf8, 0xe1, 0x43, 0x02,
- 0x4f, 0x42, 0x70, 0x8d, 0x4f, 0x07, 0x17, 0xc2, 0x70, 0xa5, 0x4d, 0x26,
- 0xea, 0x42, 0x71, 0x0d, 0xce, 0x24, 0xb2, 0x07, 0xf9, 0xe9, 0xcd, 0x02,
- 0x52, 0x07, 0xfa, 0xe9, 0xd1, 0x57, 0x9f, 0x07, 0xfb, 0x01, 0xcb, 0x1a,
- 0x3f, 0x07, 0xf8, 0x48, 0xc9, 0xab, 0x98, 0x0f, 0x98, 0xd9, 0xc6, 0x00,
- 0x71, 0x0f, 0x98, 0x98, 0xc4, 0x23, 0x73, 0x08, 0x52, 0xc1, 0xc4, 0x73,
- 0x66, 0x08, 0x52, 0xa8, 0x11, 0xc2, 0x71, 0x75, 0xc4, 0x1c, 0xd0, 0x08,
- 0x52, 0xb0, 0xcb, 0x84, 0x95, 0x08, 0x52, 0x99, 0xc5, 0x01, 0x7b, 0x08,
- 0x52, 0x90, 0xc8, 0x50, 0x00, 0x08, 0x52, 0x39, 0xc7, 0x0c, 0x4b, 0x08,
- 0x52, 0x30, 0xc5, 0x2a, 0x13, 0x08, 0x52, 0x29, 0xc2, 0x00, 0x4d, 0x08,
- 0x52, 0x20, 0xc4, 0x04, 0x5e, 0x08, 0x52, 0x11, 0xc2, 0x01, 0x47, 0x08,
- 0x52, 0x08, 0xcb, 0x34, 0xc1, 0x08, 0x50, 0x61, 0x45, 0x00, 0xcb, 0x42,
- 0x71, 0x7f, 0xc7, 0x0e, 0xae, 0x08, 0x51, 0xd1, 0xcf, 0x63, 0xda, 0x08,
- 0x50, 0x68, 0xc2, 0x00, 0xa4, 0x08, 0x51, 0xa9, 0x83, 0x08, 0x51, 0x60,
- 0x16, 0xc2, 0x71, 0x95, 0xc2, 0x00, 0xa4, 0x08, 0x51, 0x01, 0x83, 0x08,
- 0x50, 0xf8, 0xc2, 0x00, 0xa4, 0x08, 0x51, 0x39, 0x83, 0x08, 0x51, 0x30,
- 0xc2, 0x00, 0xa4, 0x08, 0x51, 0x29, 0x83, 0x08, 0x51, 0x20, 0x83, 0x08,
- 0x51, 0x19, 0xc2, 0x00, 0xc1, 0x08, 0x50, 0xf1, 0xc2, 0x1d, 0x5f, 0x08,
- 0x50, 0xc8, 0xc2, 0x00, 0xa4, 0x08, 0x51, 0x11, 0x83, 0x08, 0x51, 0x09,
- 0x06, 0x42, 0x71, 0xa3, 0xc2, 0x00, 0xa4, 0x08, 0x50, 0xb1, 0x83, 0x08,
- 0x50, 0xa8, 0xc2, 0x00, 0xa4, 0x08, 0x50, 0x99, 0x83, 0x08, 0x50, 0x90,
- 0xc2, 0x00, 0xa4, 0x08, 0x50, 0x89, 0x83, 0x08, 0x50, 0x81, 0xc2, 0x04,
- 0x2b, 0x08, 0x51, 0x90, 0xc2, 0x00, 0xa4, 0x08, 0x51, 0x69, 0xc2, 0x0c,
- 0x65, 0x08, 0x51, 0x71, 0x83, 0x08, 0x51, 0x78, 0xa2, 0x0c, 0x66, 0xa9,
+ 0x39, 0x81, 0x95, 0x05, 0x39, 0x89, 0x93, 0x05, 0x3d, 0x78, 0xca, 0x43,
+ 0xef, 0x0e, 0xf8, 0x78, 0xc4, 0x00, 0xd2, 0x0e, 0xf8, 0x71, 0xc6, 0x03,
+ 0x81, 0x00, 0x0d, 0xf0, 0xd4, 0x04, 0x53, 0x0e, 0xf8, 0x50, 0xd8, 0x26,
+ 0x0c, 0x00, 0x15, 0x11, 0xc8, 0xb8, 0x63, 0x00, 0x0d, 0x50, 0xc5, 0x04,
+ 0x4e, 0x00, 0x14, 0xc1, 0xca, 0x54, 0xa0, 0x00, 0x15, 0x60, 0x9b, 0x00,
+ 0x02, 0xcb, 0x02, 0x64, 0xcf, 0x8f, 0x00, 0x02, 0x6b, 0x02, 0x64, 0xdb,
+ 0x97, 0x00, 0x02, 0xab, 0x02, 0x64, 0xe7, 0x91, 0x00, 0x02, 0x7b, 0x02,
+ 0x64, 0xf1, 0x8b, 0x00, 0x02, 0x4b, 0x02, 0x65, 0x15, 0x87, 0x00, 0x02,
+ 0x2b, 0x02, 0x65, 0x2b, 0x83, 0x00, 0x02, 0x0b, 0x02, 0x65, 0x53, 0x95,
+ 0x00, 0x02, 0x9b, 0x02, 0x65, 0x89, 0x9c, 0x00, 0x02, 0xd3, 0x02, 0x65,
+ 0xab, 0x9a, 0x00, 0x02, 0xc3, 0x02, 0x65, 0xb1, 0x99, 0x00, 0x02, 0xbb,
+ 0x02, 0x65, 0xb7, 0x98, 0x00, 0x02, 0xb3, 0x02, 0x65, 0xc3, 0x96, 0x00,
+ 0x02, 0xa3, 0x02, 0x65, 0xdf, 0x94, 0x00, 0x02, 0x93, 0x02, 0x66, 0x04,
+ 0x92, 0x00, 0x02, 0x83, 0x02, 0x66, 0x14, 0x90, 0x00, 0x02, 0x73, 0x02,
+ 0x66, 0x1a, 0x8e, 0x00, 0x02, 0x63, 0x02, 0x66, 0x24, 0x8d, 0x00, 0x02,
+ 0x5b, 0x02, 0x66, 0x2e, 0x8a, 0x00, 0x02, 0x43, 0x02, 0x66, 0x34, 0x89,
+ 0x00, 0x02, 0x3b, 0x02, 0x66, 0x4c, 0x88, 0x00, 0x02, 0x33, 0x02, 0x66,
+ 0x64, 0x86, 0x00, 0x02, 0x23, 0x02, 0x66, 0x6a, 0x85, 0x00, 0x02, 0x1b,
+ 0x02, 0x66, 0x77, 0x84, 0x00, 0x02, 0x13, 0x02, 0x66, 0x98, 0x8c, 0x00,
+ 0x02, 0x53, 0x02, 0x66, 0xaa, 0x93, 0x00, 0x02, 0x8a, 0x02, 0x66, 0xb0,
+ 0xc2, 0x00, 0x0b, 0x00, 0x09, 0x91, 0xc2, 0x09, 0x06, 0x00, 0x0a, 0x90,
+ 0x42, 0x01, 0x8a, 0xc2, 0x66, 0xb6, 0x43, 0x4d, 0xec, 0x42, 0x66, 0xc2,
+ 0xc3, 0x91, 0x7b, 0x00, 0x74, 0x31, 0xc3, 0x1c, 0x4f, 0x00, 0x74, 0x49,
+ 0xc3, 0xec, 0x54, 0x00, 0x74, 0x61, 0x10, 0xc2, 0x66, 0xce, 0x42, 0x02,
+ 0x01, 0xc2, 0x66, 0xda, 0x06, 0xc2, 0x66, 0xe4, 0xc3, 0x24, 0x3f, 0x00,
+ 0x75, 0x01, 0xc3, 0x13, 0x58, 0x00, 0x75, 0x60, 0xc4, 0xab, 0x64, 0x00,
+ 0x74, 0xe1, 0xc3, 0x2b, 0x94, 0x00, 0x74, 0xf0, 0xc3, 0x2b, 0x94, 0x00,
+ 0x74, 0x51, 0xc4, 0xab, 0x64, 0x00, 0x75, 0x50, 0xc2, 0x01, 0x0e, 0x00,
+ 0x75, 0x41, 0xc2, 0x0e, 0xe5, 0x00, 0x75, 0x48, 0xc4, 0xab, 0x64, 0x00,
+ 0x74, 0xb1, 0xc3, 0x2b, 0x94, 0x00, 0x74, 0xb8, 0xc2, 0x00, 0xe5, 0x00,
+ 0x74, 0xe9, 0xc2, 0x0c, 0x22, 0x00, 0x74, 0xf8, 0xc3, 0x01, 0x5e, 0x00,
+ 0x75, 0x19, 0xc3, 0x65, 0x6c, 0x00, 0x75, 0x28, 0xd1, 0x51, 0xad, 0x0f,
+ 0xdc, 0xe9, 0xc2, 0x00, 0x58, 0x01, 0x2f, 0xc8, 0x55, 0x01, 0x8c, 0xc2,
+ 0x66, 0xee, 0x48, 0x01, 0x93, 0xc2, 0x67, 0x00, 0x4a, 0x12, 0xcc, 0x42,
+ 0x67, 0x0c, 0xc6, 0x01, 0xa1, 0x0f, 0xda, 0x91, 0xc5, 0x00, 0x47, 0x0f,
+ 0xda, 0x98, 0xd1, 0x51, 0xad, 0x0f, 0xdc, 0xe1, 0xc2, 0x00, 0x58, 0x01,
+ 0x2f, 0xc0, 0xc6, 0x01, 0xa1, 0x0f, 0xda, 0xb9, 0xc5, 0x00, 0x47, 0x0f,
+ 0xda, 0xc0, 0x55, 0x17, 0x86, 0xc2, 0x67, 0x18, 0x48, 0x01, 0x93, 0xc2,
+ 0x67, 0x2a, 0x4a, 0x12, 0xcc, 0x42, 0x67, 0x36, 0xd5, 0x38, 0x3b, 0x0f,
+ 0xdc, 0xd1, 0xd0, 0x05, 0x17, 0x0f, 0xdc, 0x00, 0xe0, 0x08, 0x07, 0x0f,
+ 0xdb, 0x50, 0xe0, 0x01, 0x67, 0x0f, 0xdc, 0x90, 0xe0, 0x04, 0x07, 0x0f,
+ 0xdc, 0x88, 0xd9, 0x1d, 0x29, 0x0f, 0xc4, 0xa9, 0xcb, 0x88, 0xa5, 0x01,
+ 0x0f, 0x5b, 0x02, 0x67, 0x42, 0xc8, 0xab, 0xed, 0x01, 0x0f, 0x52, 0x02,
+ 0x67, 0x48, 0xcb, 0x05, 0x9b, 0x0f, 0xc4, 0x89, 0x49, 0x00, 0x59, 0x42,
+ 0x67, 0x4e, 0xd1, 0x54, 0xff, 0x01, 0x4a, 0x49, 0xd8, 0x05, 0x8f, 0x01,
+ 0x5f, 0x68, 0x45, 0x01, 0xac, 0xc2, 0x67, 0x63, 0xdc, 0x13, 0x36, 0x01,
+ 0x0e, 0x29, 0xc8, 0xab, 0xed, 0x01, 0x0d, 0x29, 0xc6, 0x12, 0x4f, 0x01,
+ 0x48, 0x91, 0xda, 0x1c, 0xf4, 0x0f, 0xdd, 0xc0, 0xc5, 0x00, 0x4c, 0x01,
+ 0x0d, 0xf9, 0x00, 0x42, 0x67, 0x93, 0xc5, 0x00, 0x4c, 0x01, 0x0d, 0xf1,
+ 0x00, 0x42, 0x67, 0xa5, 0xdb, 0x16, 0x21, 0x01, 0x19, 0x21, 0xd2, 0x46,
+ 0xf2, 0x01, 0x5d, 0xc8, 0xd6, 0x31, 0x7b, 0x01, 0x52, 0x41, 0xcc, 0x05,
+ 0xbb, 0x01, 0x52, 0x30, 0xca, 0xa5, 0xc4, 0x01, 0x52, 0x29, 0xc7, 0x79,
+ 0xb4, 0x01, 0x52, 0x11, 0xca, 0x86, 0x1e, 0x01, 0x52, 0x08, 0xcf, 0x18,
+ 0x2e, 0x0f, 0xbd, 0xf1, 0x42, 0x00, 0x98, 0xc2, 0x67, 0xb1, 0x48, 0x04,
+ 0x89, 0x42, 0x67, 0xb7, 0xc8, 0x00, 0xff, 0x01, 0x3b, 0x11, 0xc6, 0x01,
+ 0xb1, 0x01, 0x3a, 0xb8, 0xc6, 0x03, 0xfa, 0x0f, 0xbc, 0x39, 0xd6, 0x2d,
+ 0x71, 0x01, 0x36, 0xd9, 0xc6, 0x01, 0xe9, 0x0f, 0xbc, 0x88, 0xdd, 0x11,
+ 0x16, 0x0f, 0xb3, 0xd9, 0xc5, 0x12, 0x74, 0x0f, 0xbd, 0x60, 0x4e, 0x4a,
+ 0x10, 0xc2, 0x67, 0xc9, 0x45, 0x20, 0x8c, 0x42, 0x67, 0xd5, 0x45, 0x05,
+ 0x98, 0xc2, 0x67, 0xe1, 0x42, 0x04, 0x4c, 0x42, 0x67, 0xed, 0x49, 0x01,
+ 0x59, 0xc2, 0x67, 0xf9, 0xc5, 0x00, 0x62, 0x01, 0x3c, 0xd0, 0xc3, 0xeb,
+ 0xeb, 0x0f, 0xb3, 0x21, 0xc9, 0xac, 0x58, 0x0f, 0xb2, 0xe0, 0xc9, 0x94,
+ 0x48, 0x0f, 0xaa, 0x39, 0xca, 0xa4, 0x0c, 0x01, 0x5a, 0xa8, 0x4a, 0x78,
+ 0x11, 0xc2, 0x68, 0x05, 0x00, 0x42, 0x68, 0x0b, 0x51, 0x05, 0xe8, 0xc2,
+ 0x68, 0x17, 0x52, 0x1e, 0x0f, 0x42, 0x68, 0x23, 0xd7, 0x28, 0x3a, 0x01,
+ 0x3d, 0xd9, 0x46, 0x0a, 0x4f, 0x42, 0x68, 0x2f, 0xca, 0x25, 0x5a, 0x0f,
+ 0xbe, 0x99, 0xcd, 0x0f, 0x50, 0x0f, 0xbe, 0xa0, 0x4c, 0x8e, 0xd4, 0xc2,
+ 0x68, 0x3b, 0x42, 0x00, 0xfd, 0x42, 0x68, 0x4d, 0xde, 0x01, 0x29, 0x01,
+ 0x3d, 0x70, 0xd5, 0x00, 0x92, 0x0f, 0xc0, 0xc9, 0xdb, 0x15, 0x2e, 0x0f,
+ 0xc0, 0xe8, 0xe0, 0x04, 0x87, 0x01, 0x3d, 0x40, 0xce, 0x72, 0x54, 0x01,
+ 0x3a, 0x31, 0xc7, 0xa1, 0x3f, 0x01, 0x38, 0xa0, 0x46, 0x01, 0xab, 0xc2,
+ 0x68, 0x59, 0xc9, 0xb4, 0xf5, 0x01, 0x5a, 0xc8, 0xe0, 0x04, 0xe7, 0x01,
+ 0x3d, 0x00, 0x45, 0x02, 0x93, 0xc2, 0x68, 0x65, 0xc9, 0x9b, 0xa1, 0x0f,
+ 0xa5, 0x91, 0x53, 0x03, 0x87, 0x42, 0x68, 0x71, 0xcc, 0x01, 0x3b, 0x01,
+ 0x3c, 0xcb, 0x02, 0x68, 0x7d, 0x51, 0x05, 0xe8, 0x42, 0x68, 0x83, 0xc3,
+ 0x05, 0x17, 0x0f, 0xc4, 0xe3, 0x02, 0x68, 0x8f, 0xca, 0xa9, 0xfc, 0x0f,
+ 0xc4, 0xe8, 0xcf, 0x18, 0x2e, 0x0f, 0xbd, 0x91, 0xd2, 0x25, 0x52, 0x0f,
+ 0xbe, 0x50, 0xcd, 0x7d, 0x08, 0x07, 0xd8, 0xf9, 0x47, 0x02, 0x91, 0xc2,
+ 0x68, 0x93, 0xc7, 0xcc, 0xc3, 0x00, 0x2f, 0x88, 0x46, 0x01, 0xab, 0x42,
+ 0x68, 0x9f, 0x46, 0x01, 0xab, 0x42, 0x68, 0xab, 0x46, 0x01, 0xab, 0x42,
+ 0x68, 0xb7, 0x46, 0x01, 0xab, 0x42, 0x68, 0xc3, 0xc2, 0x06, 0x8d, 0x00,
+ 0x2f, 0x53, 0x02, 0x68, 0xcf, 0xc4, 0xde, 0x8d, 0x00, 0x2f, 0x33, 0x02,
+ 0x68, 0xd5, 0xc2, 0x00, 0xdd, 0x00, 0x2e, 0xc2, 0x02, 0x68, 0xdb, 0xc3,
+ 0x11, 0x40, 0x00, 0x2f, 0x4b, 0x02, 0x68, 0xe1, 0xc5, 0xdd, 0x33, 0x00,
+ 0x2f, 0x0a, 0x02, 0x68, 0xe7, 0xcc, 0x8b, 0x20, 0x07, 0xda, 0x40, 0xcc,
+ 0x8b, 0x20, 0x07, 0xda, 0x38, 0xc2, 0x03, 0x07, 0x00, 0x2f, 0x1b, 0x02,
+ 0x68, 0xed, 0xc3, 0xbd, 0xa8, 0x00, 0x2e, 0xd3, 0x02, 0x68, 0xf3, 0xc5,
+ 0xde, 0x8c, 0x00, 0x2f, 0x29, 0xc3, 0x21, 0x00, 0x00, 0x2e, 0xf9, 0xc3,
+ 0x04, 0xae, 0x00, 0x2e, 0xe8, 0xcc, 0x8b, 0x20, 0x07, 0xda, 0x00, 0xcc,
+ 0x8b, 0x20, 0x07, 0xd9, 0xf0, 0xcc, 0x8b, 0x20, 0x07, 0xd9, 0xe0, 0x46,
+ 0x01, 0xab, 0x42, 0x68, 0xf9, 0xcc, 0x8b, 0x20, 0x07, 0xd9, 0xb0, 0xcb,
+ 0x99, 0x16, 0x07, 0xd9, 0xa1, 0x96, 0x00, 0x2e, 0xb8, 0xcc, 0x8b, 0x20,
+ 0x07, 0xd9, 0x98, 0xcc, 0x8b, 0x20, 0x07, 0xd9, 0x90, 0x0e, 0xc2, 0x69,
+ 0x05, 0xc3, 0x18, 0x7a, 0x00, 0x2f, 0x10, 0xc3, 0x23, 0x6d, 0x07, 0xd9,
+ 0x41, 0xc4, 0x5d, 0xef, 0x07, 0xd9, 0x39, 0xc9, 0xb3, 0x72, 0x07, 0xd9,
+ 0x31, 0xc5, 0xa6, 0x05, 0x07, 0xd9, 0x29, 0xc3, 0xbd, 0xa8, 0x07, 0xd9,
+ 0x21, 0xc2, 0x00, 0x5b, 0x07, 0xd9, 0x19, 0xc5, 0x43, 0x0f, 0x07, 0xd9,
+ 0x11, 0xc4, 0x06, 0x7a, 0x07, 0xd9, 0x08, 0xc5, 0xd3, 0x89, 0x00, 0x2d,
+ 0xc3, 0x02, 0x69, 0x14, 0xc5, 0xdb, 0x1c, 0x00, 0x2d, 0xd8, 0xc6, 0x46,
+ 0x1a, 0x00, 0x2e, 0x11, 0x0a, 0xc2, 0x69, 0x1a, 0xc4, 0xc9, 0x57, 0x00,
+ 0x2d, 0xb0, 0xc4, 0x4f, 0xbe, 0x00, 0x2d, 0xcb, 0x02, 0x69, 0x26, 0xc4,
+ 0xdf, 0xaf, 0x00, 0x2d, 0xa1, 0x45, 0xdd, 0x7e, 0x42, 0x69, 0x2c, 0xc6,
+ 0xcf, 0xf8, 0x00, 0x2f, 0xa1, 0xc3, 0x30, 0xe0, 0x00, 0x2f, 0x98, 0xc3,
+ 0xdb, 0x75, 0x00, 0x2c, 0xc1, 0x44, 0xe8, 0x83, 0x42, 0x69, 0x3e, 0x46,
+ 0xd7, 0xae, 0xc2, 0x69, 0x4a, 0xc3, 0x21, 0x26, 0x00, 0x2c, 0xd8, 0xc7,
+ 0xcc, 0x1b, 0x00, 0x2c, 0xe8, 0xc7, 0xcb, 0x11, 0x00, 0x2d, 0x30, 0xce,
+ 0x6e, 0x1e, 0x02, 0x6e, 0x01, 0xcc, 0x88, 0x14, 0x02, 0x6e, 0xe9, 0xc7,
+ 0xca, 0x31, 0x02, 0x6f, 0x88, 0x14, 0xc2, 0x69, 0x56, 0xcc, 0x8d, 0x60,
+ 0x02, 0x6e, 0xe0, 0xc3, 0x0a, 0x59, 0x02, 0x6f, 0x79, 0xc7, 0xc3, 0xcb,
+ 0x02, 0x6f, 0xb8, 0x12, 0xc2, 0x69, 0x62, 0xc6, 0xd3, 0x3a, 0x02, 0x6e,
+ 0xc8, 0xc7, 0xc9, 0x27, 0x01, 0x5e, 0x19, 0xc7, 0xce, 0x75, 0x01, 0x59,
+ 0x18, 0xc7, 0x37, 0xb6, 0x00, 0x00, 0x4b, 0x02, 0x69, 0x6c, 0xc4, 0x39,
+ 0xd7, 0x01, 0x5b, 0xf0, 0x95, 0x0f, 0x9e, 0xc0, 0xc4, 0x15, 0xa7, 0x08,
+ 0x69, 0xb9, 0xc2, 0x22, 0x45, 0x08, 0x69, 0xb0, 0xc3, 0x0d, 0x8f, 0x08,
+ 0x69, 0xa9, 0xc3, 0x08, 0xde, 0x08, 0x69, 0xa0, 0xc4, 0x05, 0xde, 0x08,
+ 0x69, 0x99, 0xc2, 0x0a, 0x20, 0x08, 0x69, 0x90, 0xc3, 0x04, 0x36, 0x08,
+ 0x69, 0x39, 0xc2, 0x01, 0x01, 0x08, 0x69, 0x31, 0xc4, 0x73, 0xa5, 0x08,
+ 0x69, 0x28, 0xc2, 0x1a, 0x36, 0x08, 0x68, 0xd9, 0xc2, 0x07, 0x69, 0x08,
+ 0x68, 0xd1, 0x83, 0x08, 0x68, 0xa8, 0x45, 0xdb, 0xf3, 0xc2, 0x69, 0x70,
+ 0x83, 0x08, 0x68, 0x89, 0xc2, 0x01, 0x0e, 0x08, 0x68, 0x40, 0xc2, 0x00,
+ 0x9a, 0x08, 0x68, 0x69, 0x83, 0x08, 0x68, 0x60, 0xc2, 0x0c, 0x25, 0x08,
+ 0x68, 0x59, 0x83, 0x08, 0x68, 0x50, 0xc2, 0x00, 0x44, 0x08, 0x68, 0x21,
+ 0x83, 0x08, 0x68, 0x18, 0x83, 0x08, 0x68, 0x79, 0xc2, 0x01, 0x0e, 0x08,
+ 0x68, 0x80, 0x83, 0x00, 0xb9, 0x41, 0xc2, 0x07, 0x69, 0x00, 0xb9, 0x28,
+ 0x44, 0xc3, 0xd4, 0xc2, 0x69, 0x7c, 0x15, 0xc2, 0x69, 0xb0, 0x44, 0x7b,
+ 0x22, 0xc2, 0x69, 0xbc, 0x12, 0xc2, 0x69, 0xc6, 0x44, 0xc2, 0x9e, 0xc2,
+ 0x69, 0xd8, 0x44, 0xc4, 0x7c, 0xc2, 0x6a, 0x0c, 0x16, 0xc2, 0x6a, 0x28,
+ 0x0d, 0xc2, 0x6a, 0x34, 0x44, 0xc4, 0x44, 0xc2, 0x6a, 0x46, 0x05, 0xc2,
+ 0x6a, 0x6e, 0x42, 0x04, 0x30, 0xc2, 0x6a, 0x80, 0xc6, 0x94, 0xb9, 0x00,
+ 0x8a, 0xf8, 0x49, 0xb1, 0xb0, 0xc2, 0x6a, 0x8c, 0x49, 0xb1, 0x5f, 0x42,
+ 0x6b, 0x3b, 0x0d, 0xc2, 0x6b, 0x73, 0x15, 0xc2, 0x6b, 0x85, 0x44, 0xc4,
+ 0x44, 0xc2, 0x6b, 0x91, 0x16, 0xc2, 0x6b, 0xb3, 0x44, 0xc3, 0xd4, 0xc2,
+ 0x6b, 0xbf, 0x44, 0xc4, 0x7c, 0xc2, 0x6b, 0xed, 0x12, 0xc2, 0x6b, 0xfd,
+ 0x8b, 0x01, 0x8b, 0x1b, 0x02, 0x6c, 0x0f, 0x05, 0xc2, 0x6c, 0x15, 0xc5,
+ 0x7b, 0x22, 0x01, 0x8a, 0x71, 0x83, 0x01, 0x8a, 0x7b, 0x02, 0x6c, 0x21,
+ 0x1b, 0xc2, 0x6c, 0x2e, 0x87, 0x01, 0x8a, 0xa3, 0x02, 0x6c, 0x66, 0x91,
+ 0x01, 0x8a, 0xbb, 0x02, 0x6c, 0x6e, 0x19, 0xc2, 0x6c, 0x72, 0x97, 0x01,
+ 0x8a, 0xe0, 0x19, 0xc2, 0x6c, 0x84, 0x0a, 0xc2, 0x6c, 0x8e, 0xc2, 0x01,
+ 0x04, 0x01, 0x81, 0xc0, 0xc3, 0x08, 0xde, 0x01, 0x81, 0x21, 0xc3, 0x0d,
+ 0x8f, 0x01, 0x81, 0x28, 0xc2, 0x22, 0x45, 0x01, 0x81, 0x31, 0xc4, 0x15,
+ 0xa7, 0x01, 0x81, 0x38, 0xc8, 0x0d, 0x7e, 0x08, 0x47, 0xf8, 0xc5, 0x25,
+ 0x27, 0x08, 0x47, 0xf1, 0xc2, 0x01, 0x04, 0x08, 0x47, 0xe8, 0xc2, 0x3c,
+ 0xd1, 0x08, 0x47, 0xa9, 0xc3, 0x1e, 0x54, 0x08, 0x47, 0x40, 0xc3, 0x11,
+ 0x40, 0x08, 0x47, 0xa1, 0x03, 0x42, 0x6c, 0x9a, 0xc2, 0x16, 0x0a, 0x08,
+ 0x47, 0x79, 0xc4, 0x32, 0xac, 0x08, 0x47, 0x00, 0xc2, 0x01, 0x47, 0x08,
+ 0x47, 0x38, 0x19, 0xc2, 0x6c, 0xa6, 0x15, 0xc2, 0x6c, 0xae, 0x83, 0x07,
+ 0xfb, 0x89, 0x8b, 0x07, 0xfb, 0x91, 0x97, 0x07, 0xfb, 0x99, 0x87, 0x07,
+ 0xfb, 0xa1, 0x91, 0x07, 0xfb, 0xa9, 0x0d, 0xc2, 0x6c, 0xc8, 0x16, 0xc2,
+ 0x6c, 0xdc, 0x90, 0x07, 0xfc, 0xeb, 0x02, 0x6c, 0xf0, 0x0a, 0xc2, 0x6d,
+ 0x04, 0x0f, 0xc2, 0x6d, 0x18, 0x1b, 0xc2, 0x6d, 0x2c, 0x14, 0x42, 0x6d,
+ 0x38, 0x44, 0x7f, 0x3f, 0xc2, 0x6d, 0x4c, 0xc6, 0xae, 0x92, 0x07, 0xfd,
+ 0xd8, 0x44, 0x5d, 0x46, 0xc2, 0x6d, 0x56, 0xc3, 0x3b, 0xc9, 0x07, 0xfd,
+ 0xa8, 0xc8, 0xbb, 0x93, 0x07, 0xfd, 0x51, 0xc7, 0xc8, 0x6a, 0x07, 0xfd,
+ 0x59, 0x43, 0xae, 0x7d, 0xc2, 0x6d, 0x74, 0xc6, 0xd6, 0xd6, 0x07, 0xfd,
+ 0x69, 0xc9, 0xae, 0x6b, 0x07, 0xfd, 0x71, 0xc7, 0xce, 0xb4, 0x07, 0xfd,
+ 0x81, 0x8e, 0x07, 0xfd, 0x48, 0x43, 0x68, 0xc6, 0xc2, 0x6d, 0x80, 0xc6,
+ 0xae, 0x80, 0x07, 0xfd, 0xe8, 0xc4, 0xc2, 0x9f, 0x07, 0xfd, 0xb8, 0xc4,
+ 0xb1, 0xd8, 0x07, 0xfd, 0xc1, 0xc6, 0xb1, 0xd7, 0x07, 0xfd, 0xd0, 0xc6,
+ 0xae, 0x6e, 0x07, 0xfd, 0xe1, 0xc5, 0xba, 0x7e, 0x07, 0xfd, 0x38, 0x87,
+ 0x07, 0xfe, 0x18, 0x83, 0x07, 0xfe, 0x23, 0x02, 0x6d, 0x8a, 0x87, 0x07,
+ 0xfe, 0x5b, 0x02, 0x6d, 0x8e, 0x91, 0x07, 0xfe, 0x91, 0x97, 0x07, 0xfe,
+ 0xb9, 0x8b, 0x07, 0xfe, 0xd8, 0x91, 0x07, 0xfe, 0x31, 0x97, 0x07, 0xfe,
+ 0xd0, 0x87, 0x07, 0xfe, 0x78, 0x83, 0x07, 0xfe, 0x6b, 0x02, 0x6d, 0x92,
+ 0x87, 0x07, 0xfe, 0xab, 0x02, 0x6d, 0x96, 0x8b, 0x07, 0xfe, 0xb0, 0x90,
+ 0x07, 0xfd, 0x21, 0xc7, 0xca, 0xbd, 0x07, 0xfd, 0x29, 0xc7, 0xcb, 0x7a,
+ 0x07, 0xfd, 0x30, 0x0d, 0xc2, 0x6d, 0x9a, 0x19, 0xc2, 0x6d, 0xa6, 0x83,
+ 0x01, 0x82, 0x09, 0x8b, 0x01, 0x82, 0x19, 0x97, 0x01, 0x82, 0x29, 0x87,
+ 0x01, 0x82, 0x39, 0x91, 0x01, 0x82, 0x49, 0xc2, 0x00, 0x16, 0x01, 0x83,
+ 0x19, 0x1b, 0x42, 0x6d, 0xb6, 0xc2, 0x04, 0x30, 0x0d, 0x80, 0x09, 0xc2,
+ 0x05, 0x06, 0x0d, 0x88, 0xf8, 0xc6, 0x19, 0x7a, 0x01, 0x02, 0x19, 0xce,
+ 0x69, 0x9d, 0x01, 0x70, 0xd0, 0xcd, 0x81, 0xdb, 0x0f, 0xdc, 0xb1, 0xc5,
+ 0x00, 0xf9, 0x0f, 0xdd, 0x88, 0xe0, 0x03, 0x87, 0x0f, 0xdd, 0xa0, 0xc5,
+ 0x65, 0x41, 0x01, 0x11, 0xf1, 0xc9, 0xb4, 0x80, 0x01, 0x72, 0x2a, 0x02,
+ 0x6d, 0xc2, 0xc6, 0xd8, 0x2c, 0x07, 0xff, 0xc9, 0xc9, 0x1b, 0xac, 0x07,
+ 0xff, 0xd1, 0xca, 0x80, 0x58, 0x07, 0xff, 0xd8, 0x43, 0x14, 0x16, 0xc2,
+ 0x6d, 0xc8, 0x46, 0x03, 0x50, 0xc2, 0x6d, 0xce, 0x45, 0x01, 0xac, 0x42,
+ 0x6d, 0xda, 0x42, 0x0b, 0xfd, 0xc2, 0x6d, 0xec, 0xc7, 0x79, 0xb4, 0x01,
+ 0x50, 0xd9, 0xcc, 0x05, 0xbb, 0x01, 0x50, 0xc9, 0xca, 0x9f, 0x02, 0x01,
+ 0x50, 0xc1, 0xd9, 0x1e, 0x0c, 0x01, 0x50, 0xb9, 0xcd, 0x80, 0x14, 0x01,
+ 0x50, 0x70, 0xd6, 0x30, 0x73, 0x01, 0x50, 0xa9, 0xd1, 0x53, 0x34, 0x01,
+ 0x50, 0x78, 0xc3, 0x05, 0x17, 0x08, 0x5b, 0xc3, 0x02, 0x6d, 0xf8, 0x16,
+ 0xc2, 0x6d, 0xfc, 0xc4, 0x08, 0xdd, 0x08, 0x5b, 0xd8, 0x16, 0xc2, 0x6e,
+ 0x0c, 0x15, 0xc2, 0x6e, 0x18, 0xc2, 0x03, 0x07, 0x08, 0x5b, 0x79, 0xc3,
+ 0x21, 0x00, 0x08, 0x5b, 0x69, 0xc8, 0xbc, 0xb3, 0x08, 0x5b, 0x61, 0xc6,
+ 0xd7, 0x12, 0x08, 0x5b, 0x59, 0xc4, 0xe5, 0x53, 0x08, 0x5b, 0x51, 0xc4,
+ 0x4d, 0x48, 0x08, 0x5b, 0x49, 0xc2, 0x00, 0x5b, 0x08, 0x5b, 0x23, 0x02,
+ 0x6e, 0x22, 0xc5, 0x4d, 0x42, 0x08, 0x5b, 0x31, 0xcd, 0x7c, 0xad, 0x08,
+ 0x5b, 0x29, 0xc6, 0x43, 0x0f, 0x08, 0x5b, 0x19, 0xc5, 0x9e, 0xbc, 0x08,
+ 0x5b, 0x11, 0xc4, 0xe5, 0xaf, 0x08, 0x5b, 0x09, 0xc5, 0xa6, 0x5f, 0x08,
+ 0x5b, 0x00, 0xc3, 0x05, 0x17, 0x08, 0x5a, 0xc3, 0x02, 0x6e, 0x28, 0x16,
+ 0xc2, 0x6e, 0x2c, 0xc4, 0x08, 0xdd, 0x08, 0x5a, 0xd8, 0x16, 0xc2, 0x6e,
+ 0x3c, 0x15, 0xc2, 0x6e, 0x48, 0xc4, 0x5d, 0xef, 0x08, 0x5a, 0x99, 0xc3,
+ 0x04, 0xae, 0x08, 0x5a, 0x61, 0xc6, 0xd7, 0x12, 0x08, 0x5a, 0x59, 0xc4,
+ 0xe5, 0x53, 0x08, 0x5a, 0x51, 0xc4, 0x4d, 0x48, 0x08, 0x5a, 0x49, 0xc2,
+ 0x00, 0x5b, 0x08, 0x5a, 0x23, 0x02, 0x6e, 0x52, 0xc5, 0x4d, 0x42, 0x08,
+ 0x5a, 0x31, 0xc3, 0x7c, 0xad, 0x08, 0x5a, 0x29, 0xc6, 0x43, 0x0f, 0x08,
+ 0x5a, 0x19, 0xc5, 0x9e, 0xbc, 0x08, 0x5a, 0x11, 0xc4, 0xe5, 0xaf, 0x08,
+ 0x5a, 0x09, 0x03, 0xc2, 0x6e, 0x58, 0xc3, 0x21, 0x00, 0x08, 0x5a, 0x69,
+ 0xc2, 0x03, 0x07, 0x08, 0x5a, 0x81, 0xc4, 0xbc, 0xb7, 0x08, 0x5a, 0x90,
+ 0xc3, 0x05, 0x17, 0x00, 0x00, 0xf9, 0x16, 0xc2, 0x6e, 0x64, 0xc4, 0x08,
+ 0xdd, 0x00, 0x00, 0xe0, 0x4a, 0x0d, 0x26, 0xc2, 0x6e, 0x70, 0x49, 0x46,
+ 0xde, 0xc2, 0x6e, 0x7a, 0xc5, 0xdd, 0xce, 0x0f, 0x65, 0x0b, 0x02, 0x6e,
+ 0x98, 0xc4, 0x42, 0x15, 0x0f, 0x64, 0xf3, 0x02, 0x6e, 0x9e, 0xc4, 0x24,
+ 0x35, 0x0f, 0x63, 0xcb, 0x02, 0x6e, 0xa4, 0xc5, 0x05, 0x1b, 0x0f, 0x63,
+ 0xc3, 0x02, 0x6e, 0xb1, 0x15, 0xc2, 0x6e, 0xbc, 0x08, 0xc2, 0x6e, 0xce,
+ 0x16, 0xc2, 0x6e, 0xd6, 0xc3, 0x05, 0x17, 0x0f, 0x63, 0x8a, 0x02, 0x6e,
+ 0xe7, 0xce, 0x08, 0x13, 0x0f, 0x65, 0x79, 0x44, 0x05, 0x17, 0x42, 0x6e,
+ 0xeb, 0xc3, 0x0d, 0x8f, 0x0e, 0x9b, 0xb1, 0xc3, 0x08, 0xde, 0x0e, 0x9b,
+ 0xa8, 0xc4, 0x05, 0xde, 0x0e, 0x9b, 0xa1, 0xc2, 0x0a, 0x20, 0x0e, 0x9b,
+ 0x98, 0x0c, 0xc2, 0x6e, 0xf7, 0xc8, 0xc1, 0x8b, 0x01, 0x96, 0x09, 0x42,
+ 0x00, 0x3f, 0xc2, 0x6f, 0x01, 0x03, 0xc2, 0x6f, 0x0b, 0xc9, 0xad, 0xa5,
+ 0x01, 0x96, 0x41, 0xc7, 0xc9, 0x2e, 0x01, 0x96, 0x49, 0xc8, 0xbf, 0xe3,
+ 0x01, 0x96, 0x51, 0x06, 0xc2, 0x6f, 0x17, 0x45, 0xde, 0xdc, 0x42, 0x6f,
+ 0x23, 0xc5, 0x00, 0x47, 0x01, 0x7f, 0x81, 0xd0, 0x5c, 0x3f, 0x01, 0x7f,
+ 0x90, 0xc5, 0x00, 0x34, 0x01, 0x7f, 0x89, 0xd0, 0x58, 0x7f, 0x01, 0x7f,
+ 0x98, 0xc5, 0x03, 0x50, 0x01, 0x7f, 0xa9, 0xc5, 0x00, 0x34, 0x01, 0x7f,
+ 0xb1, 0x0e, 0xc2, 0x6f, 0x48, 0x46, 0x05, 0x07, 0x42, 0x6f, 0x54, 0xc8,
+ 0xbf, 0xf3, 0x01, 0x8c, 0xa1, 0xc8, 0xb9, 0x6b, 0x01, 0x8c, 0xd8, 0xc5,
+ 0x00, 0xf9, 0x01, 0x8c, 0xa9, 0xc7, 0x37, 0xa1, 0x01, 0x8c, 0xe0, 0xc2,
+ 0x01, 0x04, 0x08, 0x42, 0xdb, 0x02, 0x6f, 0x60, 0x19, 0xc2, 0x6f, 0x66,
+ 0xc4, 0x05, 0xde, 0x08, 0x42, 0xd0, 0x00, 0x42, 0x6f, 0x70, 0xc2, 0x3c,
+ 0xd1, 0x08, 0x42, 0xa9, 0xc3, 0x1e, 0x54, 0x08, 0x42, 0x40, 0xc3, 0x11,
+ 0x40, 0x08, 0x42, 0xa1, 0x03, 0x42, 0x6f, 0x7c, 0xc3, 0x18, 0x7a, 0x08,
+ 0x42, 0x79, 0xc4, 0x32, 0xac, 0x08, 0x42, 0x00, 0xc2, 0x01, 0x47, 0x08,
+ 0x42, 0x38, 0xca, 0xa9, 0x34, 0x0f, 0xd2, 0x43, 0x02, 0x6f, 0x88, 0xc4,
+ 0xd4, 0xf2, 0x01, 0x32, 0xb3, 0x02, 0x6f, 0x8e, 0xc4, 0xe8, 0x9b, 0x01,
+ 0x32, 0xcb, 0x02, 0x6f, 0x94, 0x0d, 0xc2, 0x6f, 0x9a, 0xc6, 0xba, 0xfd,
+ 0x01, 0x32, 0xbb, 0x02, 0x6f, 0xa9, 0xc5, 0xad, 0xae, 0x01, 0x32, 0xab,
+ 0x02, 0x6f, 0xaf, 0x47, 0x41, 0xe5, 0x42, 0x6f, 0xb5, 0x00, 0x42, 0x6f,
+ 0xd1, 0x46, 0x01, 0xab, 0x42, 0x6f, 0xdd, 0x03, 0xc2, 0x6f, 0xe9, 0xc5,
+ 0xce, 0x77, 0x01, 0x59, 0x08, 0xc7, 0xce, 0xbb, 0x01, 0x4e, 0xb1, 0xd0,
+ 0x58, 0x9f, 0x01, 0x59, 0x68, 0x00, 0x42, 0x6f, 0xf8, 0x00, 0x42, 0x70,
+ 0x0a, 0xca, 0x87, 0x6e, 0x01, 0x31, 0xd1, 0x44, 0x00, 0x40, 0x42, 0x70,
+ 0x19, 0xc9, 0x94, 0x48, 0x0f, 0xaa, 0x31, 0xca, 0xa4, 0x02, 0x01, 0x58,
+ 0xe0, 0x42, 0x00, 0xd0, 0xc2, 0x70, 0x23, 0x4b, 0x05, 0xe8, 0x42, 0x70,
+ 0x2f, 0xe0, 0x0b, 0x87, 0x0f, 0xbd, 0x00, 0x00, 0x42, 0x70, 0x41, 0xc4,
+ 0x57, 0xf1, 0x01, 0x36, 0x09, 0xc3, 0x14, 0xe9, 0x01, 0x36, 0x00, 0x4b,
+ 0x23, 0xcb, 0xc2, 0x70, 0x59, 0x4b, 0x2d, 0x74, 0x42, 0x70, 0x6b, 0x46,
+ 0x00, 0x54, 0xc2, 0x70, 0x77, 0xc7, 0xc4, 0xe3, 0x01, 0x1f, 0x10, 0x11,
+ 0xc2, 0x70, 0x7d, 0xc2, 0x07, 0x68, 0x01, 0x34, 0x82, 0x02, 0x70, 0x89,
+ 0xc4, 0x0e, 0xa5, 0x01, 0x39, 0x39, 0xc4, 0x10, 0x64, 0x01, 0x5e, 0x70,
+ 0x4b, 0x01, 0x57, 0xc2, 0x70, 0x8f, 0x4b, 0x05, 0xe8, 0x42, 0x70, 0x9b,
+ 0xc5, 0x01, 0x62, 0x01, 0x30, 0xe9, 0xce, 0x23, 0xd6, 0x0f, 0xa2, 0x30,
+ 0xc8, 0x00, 0x52, 0x01, 0x2d, 0x9b, 0x02, 0x70, 0xab, 0xce, 0x70, 0x32,
+ 0x01, 0x2d, 0xa9, 0xc7, 0xc5, 0x53, 0x0f, 0xde, 0x50, 0x15, 0xc2, 0x70,
+ 0xb1, 0xc7, 0x3f, 0x7b, 0x01, 0x59, 0x31, 0xc7, 0x0b, 0xa0, 0x01, 0x59,
+ 0x40, 0xc4, 0x2e, 0x3c, 0x0f, 0x9f, 0x89, 0xc5, 0xbc, 0x06, 0x01, 0x59,
+ 0x00, 0xc9, 0x46, 0xf7, 0x01, 0x2d, 0x79, 0xc3, 0x02, 0x1d, 0x01, 0x57,
+ 0xf1, 0xc7, 0x58, 0xa8, 0x01, 0x59, 0x78, 0xc4, 0x15, 0xa7, 0x0f, 0x17,
+ 0xb9, 0xc2, 0x22, 0x45, 0x0f, 0x17, 0xb0, 0xc3, 0x0d, 0x8f, 0x0f, 0x17,
+ 0xa9, 0xc3, 0x08, 0xde, 0x0f, 0x17, 0xa0, 0xc4, 0x05, 0xde, 0x0f, 0x17,
+ 0x99, 0xc2, 0x0a, 0x20, 0x0f, 0x17, 0x90, 0xc2, 0x01, 0xce, 0x0f, 0x17,
+ 0x78, 0xc2, 0x01, 0xce, 0x0f, 0x17, 0x68, 0xc2, 0x14, 0x94, 0x0f, 0x17,
+ 0x59, 0x83, 0x0f, 0x16, 0x30, 0xc2, 0x01, 0x04, 0x0f, 0x17, 0x50, 0xc2,
+ 0x1a, 0x36, 0x0f, 0x17, 0x49, 0xc2, 0x07, 0x69, 0x0f, 0x16, 0xe9, 0x83,
+ 0x0f, 0x16, 0x48, 0x83, 0x0f, 0x16, 0x03, 0x02, 0x70, 0xc3, 0xc2, 0x00,
+ 0x56, 0x0f, 0x17, 0x21, 0x97, 0x0f, 0x16, 0xb0, 0x90, 0x0f, 0x17, 0x38,
+ 0x90, 0x0f, 0x17, 0x32, 0x02, 0x70, 0xca, 0xc2, 0x00, 0x56, 0x0f, 0x17,
+ 0x28, 0xc2, 0x00, 0x9a, 0x0f, 0x17, 0x09, 0xc2, 0x0e, 0xe5, 0x0f, 0x17,
+ 0x01, 0xc2, 0x01, 0x0e, 0x0f, 0x16, 0x61, 0x83, 0x0f, 0x16, 0x58, 0xc3,
+ 0x3e, 0xcd, 0x0f, 0x16, 0xf9, 0x83, 0x0f, 0x16, 0x40, 0xc2, 0x01, 0x0e,
+ 0x0f, 0x16, 0xc9, 0x83, 0x0f, 0x16, 0xa0, 0xc2, 0x01, 0x0e, 0x0f, 0x16,
+ 0x79, 0x83, 0x0f, 0x16, 0x70, 0x83, 0x0f, 0x16, 0x51, 0xc2, 0x01, 0x0e,
+ 0x0f, 0x16, 0x38, 0xc6, 0x15, 0xa7, 0x08, 0xc7, 0x81, 0xc4, 0xd8, 0xce,
+ 0x08, 0xc7, 0x78, 0xc4, 0x41, 0xc9, 0x08, 0xc7, 0x71, 0xc4, 0x4d, 0x29,
+ 0x08, 0xc7, 0x68, 0xc5, 0x0d, 0x88, 0x08, 0xc7, 0x61, 0xc5, 0x25, 0x27,
+ 0x08, 0xc7, 0x59, 0xc2, 0x01, 0x04, 0x08, 0xc7, 0x50, 0xc4, 0x15, 0xa7,
+ 0x08, 0xc7, 0x39, 0xc2, 0x22, 0x45, 0x08, 0xc7, 0x30, 0xc3, 0x0d, 0x8f,
+ 0x08, 0xc7, 0x29, 0xc3, 0x08, 0xde, 0x08, 0xc7, 0x20, 0xc4, 0x05, 0xde,
+ 0x08, 0xc7, 0x19, 0xc2, 0x0a, 0x20, 0x08, 0xc7, 0x10, 0xc2, 0x23, 0x68,
+ 0x08, 0xc6, 0xf1, 0xc3, 0xec, 0x63, 0x08, 0xc6, 0xe8, 0xc2, 0x00, 0x4d,
+ 0x08, 0xc6, 0xe1, 0x11, 0xc2, 0x70, 0xce, 0xc3, 0xbb, 0x4c, 0x08, 0xc6,
+ 0xc8, 0x8f, 0x08, 0xc6, 0xb1, 0x96, 0x08, 0xc6, 0xa9, 0xc2, 0x00, 0x56,
+ 0x08, 0xc6, 0x50, 0xc3, 0x3b, 0x04, 0x08, 0xc6, 0x99, 0xc3, 0x4a, 0x36,
+ 0x08, 0xc6, 0x00, 0xc2, 0x01, 0x8e, 0x08, 0xc6, 0x88, 0x10, 0x42, 0x70,
+ 0xda, 0x85, 0x08, 0xc6, 0x79, 0x97, 0x08, 0xc6, 0x38, 0x97, 0x08, 0xc6,
+ 0x1b, 0x02, 0x70, 0xe2, 0x91, 0x08, 0xc6, 0x29, 0x83, 0x08, 0xc6, 0x20,
+ 0xc2, 0x23, 0x68, 0x08, 0xc5, 0xf1, 0xc3, 0xec, 0x63, 0x08, 0xc5, 0xe8,
+ 0xc2, 0x00, 0x4d, 0x08, 0xc5, 0xe1, 0x11, 0xc2, 0x70, 0xe6, 0xc3, 0xbb,
+ 0x4c, 0x08, 0xc5, 0xc8, 0x8f, 0x08, 0xc5, 0xb1, 0x96, 0x08, 0xc5, 0xa9,
+ 0xc2, 0x00, 0x56, 0x08, 0xc5, 0x50, 0xc3, 0x3b, 0x04, 0x08, 0xc5, 0x99,
+ 0xc3, 0x4a, 0x36, 0x08, 0xc5, 0x00, 0xc2, 0x01, 0x8e, 0x08, 0xc5, 0x88,
+ 0x10, 0x42, 0x70, 0xf2, 0x85, 0x08, 0xc5, 0x79, 0x97, 0x08, 0xc5, 0x38,
+ 0x97, 0x08, 0xc5, 0x1b, 0x02, 0x70, 0xfa, 0x91, 0x08, 0xc5, 0x29, 0x83,
+ 0x08, 0xc5, 0x20, 0xd3, 0x41, 0x99, 0x01, 0x39, 0x29, 0x43, 0x00, 0xff,
+ 0x42, 0x70, 0xfe, 0xc4, 0x00, 0xfa, 0x01, 0x02, 0xd9, 0xcb, 0x0b, 0xfc,
+ 0x01, 0x02, 0xc0, 0x12, 0xc2, 0x71, 0x04, 0xcc, 0x8c, 0xb8, 0x0f, 0xc8,
+ 0xa9, 0x16, 0xc2, 0x71, 0x16, 0x11, 0xc2, 0x71, 0x22, 0xcf, 0x65, 0x19,
+ 0x0f, 0xb2, 0x29, 0xcc, 0x8a, 0x0c, 0x0f, 0xb2, 0x21, 0xd0, 0x59, 0x8f,
+ 0x0f, 0xb0, 0xdb, 0x02, 0x71, 0x34, 0x42, 0x00, 0xea, 0xc2, 0x71, 0x3a,
+ 0xcf, 0x68, 0x07, 0x0f, 0xb1, 0x21, 0x0f, 0xc2, 0x71, 0x46, 0xdb, 0x18,
+ 0xdf, 0x0f, 0xc9, 0x59, 0xda, 0x19, 0x9a, 0x0f, 0xcb, 0xa1, 0xce, 0x72,
+ 0xd2, 0x0f, 0xd7, 0x20, 0xcf, 0x38, 0x11, 0x01, 0x49, 0x61, 0xd0, 0x20,
+ 0x86, 0x01, 0x49, 0x78, 0xc4, 0x24, 0x35, 0x07, 0xf8, 0xc9, 0xc4, 0x16,
+ 0x57, 0x07, 0xf8, 0x81, 0xc3, 0x05, 0x17, 0x07, 0xf8, 0x89, 0x16, 0xc2,
+ 0x71, 0x52, 0x08, 0xc2, 0x71, 0x5e, 0x15, 0xc2, 0x71, 0x6a, 0xc5, 0x05,
+ 0x1b, 0x07, 0xf8, 0xc0, 0xc3, 0x0c, 0x34, 0x07, 0xf8, 0xd1, 0x42, 0x00,
+ 0xec, 0x42, 0x71, 0x76, 0xcc, 0x8e, 0xe0, 0x07, 0xf8, 0xe1, 0x43, 0x00,
+ 0xcf, 0x42, 0x71, 0x80, 0x4f, 0x01, 0xf7, 0xc2, 0x71, 0x98, 0x4d, 0x27,
+ 0x71, 0x42, 0x72, 0x00, 0xce, 0x26, 0x2e, 0x07, 0xf9, 0xe9, 0xcd, 0x00,
+ 0xd2, 0x07, 0xfa, 0xe9, 0xd1, 0x57, 0xc9, 0x07, 0xfb, 0x01, 0xcb, 0x1c,
+ 0xe0, 0x07, 0xf8, 0x48, 0xc9, 0xad, 0xb7, 0x0f, 0x98, 0xd9, 0xc6, 0x01,
+ 0xb1, 0x0f, 0x98, 0x98, 0xc4, 0x25, 0x07, 0x08, 0x52, 0xc1, 0xc4, 0x71,
+ 0x75, 0x08, 0x52, 0xa8, 0x11, 0xc2, 0x72, 0x68, 0xc4, 0x1c, 0xb3, 0x08,
+ 0x52, 0xb0, 0xcb, 0x84, 0x19, 0x08, 0x52, 0x99, 0xc5, 0x03, 0xfb, 0x08,
+ 0x52, 0x90, 0xc8, 0x4f, 0xa2, 0x08, 0x52, 0x39, 0xc7, 0x0d, 0x7f, 0x08,
+ 0x52, 0x30, 0xc5, 0x25, 0x27, 0x08, 0x52, 0x29, 0xc2, 0x01, 0x04, 0x08,
+ 0x52, 0x20, 0xc4, 0x05, 0xde, 0x08, 0x52, 0x11, 0xc2, 0x0a, 0x20, 0x08,
+ 0x52, 0x08, 0xcb, 0x37, 0x9d, 0x08, 0x50, 0x61, 0x45, 0x02, 0xcb, 0x42,
+ 0x72, 0x72, 0xc7, 0x0d, 0xd9, 0x08, 0x51, 0xd1, 0xcf, 0x64, 0xb0, 0x08,
+ 0x50, 0x68, 0xc2, 0x01, 0x0e, 0x08, 0x51, 0xa9, 0x83, 0x08, 0x51, 0x60,
+ 0x16, 0xc2, 0x72, 0x88, 0xc2, 0x01, 0x0e, 0x08, 0x51, 0x01, 0x83, 0x08,
+ 0x50, 0xf8, 0xc2, 0x01, 0x0e, 0x08, 0x51, 0x39, 0x83, 0x08, 0x51, 0x30,
+ 0xc2, 0x01, 0x0e, 0x08, 0x51, 0x29, 0x83, 0x08, 0x51, 0x20, 0x83, 0x08,
+ 0x51, 0x19, 0xc2, 0x01, 0x01, 0x08, 0x50, 0xf1, 0xc2, 0x1a, 0x36, 0x08,
+ 0x50, 0xc8, 0xc2, 0x01, 0x0e, 0x08, 0x51, 0x11, 0x83, 0x08, 0x51, 0x09,
+ 0x06, 0x42, 0x72, 0x96, 0xc2, 0x01, 0x0e, 0x08, 0x50, 0xb1, 0x83, 0x08,
+ 0x50, 0xa8, 0xc2, 0x01, 0x0e, 0x08, 0x50, 0x99, 0x83, 0x08, 0x50, 0x90,
+ 0xc2, 0x01, 0x0e, 0x08, 0x50, 0x89, 0x83, 0x08, 0x50, 0x81, 0xc2, 0x06,
+ 0x6b, 0x08, 0x51, 0x90, 0xc2, 0x01, 0x0e, 0x08, 0x51, 0x69, 0xc2, 0x0e,
+ 0xe5, 0x08, 0x51, 0x71, 0x83, 0x08, 0x51, 0x78, 0xa2, 0x0c, 0x66, 0xa9,
0xa1, 0x0c, 0x66, 0xa1, 0xa0, 0x0c, 0x66, 0x99, 0x9f, 0x0c, 0x66, 0x91,
0x9e, 0x0c, 0x66, 0x89, 0x9d, 0x0c, 0x66, 0x80, 0x88, 0x0c, 0x66, 0x79,
0x87, 0x0c, 0x66, 0x71, 0x86, 0x0c, 0x66, 0x69, 0x85, 0x0c, 0x66, 0x61,
@@ -10883,5034 +10889,5077 @@ uint8_t UnicodeNameToCodepointIndex_[241561] = {
0xa6, 0x0c, 0x58, 0x49, 0xa5, 0x0c, 0x58, 0x41, 0xa4, 0x0c, 0x58, 0x39,
0xa3, 0x0c, 0x58, 0x31, 0xa2, 0x0c, 0x58, 0x29, 0xa1, 0x0c, 0x58, 0x21,
0xa0, 0x0c, 0x58, 0x19, 0x9f, 0x0c, 0x58, 0x11, 0x9e, 0x0c, 0x58, 0x09,
- 0x9d, 0x0c, 0x58, 0x00, 0xc2, 0x00, 0xa4, 0x08, 0x96, 0x59, 0xc2, 0x0b,
- 0xc6, 0x08, 0x96, 0x49, 0x83, 0x08, 0x96, 0x40, 0xc2, 0x00, 0xa4, 0x08,
- 0x96, 0x39, 0x83, 0x08, 0x96, 0x30, 0xc2, 0x0b, 0xc6, 0x08, 0x90, 0xe1,
- 0xc2, 0x00, 0xa4, 0x08, 0x90, 0xb9, 0x83, 0x08, 0x90, 0xb0, 0xc2, 0x00,
- 0xa4, 0x08, 0x90, 0xa9, 0x83, 0x08, 0x90, 0xa0, 0xc4, 0xda, 0x94, 0x08,
- 0x91, 0xf1, 0xc5, 0xd5, 0xa7, 0x08, 0x91, 0xb8, 0x02, 0xc2, 0x71, 0xad,
- 0x00, 0x42, 0x71, 0xbb, 0x43, 0x0f, 0x7c, 0xc2, 0x71, 0xc7, 0x43, 0x6c,
- 0xa4, 0xc2, 0x71, 0xcf, 0xc9, 0xad, 0x36, 0x00, 0xcf, 0x00, 0x44, 0xe0,
- 0xeb, 0xc2, 0x71, 0xdb, 0x43, 0x93, 0xfc, 0x42, 0x71, 0xe7, 0xc3, 0x32,
- 0xa9, 0x00, 0xcf, 0x89, 0xc4, 0xe2, 0x27, 0x00, 0xcf, 0x08, 0x12, 0xc2,
- 0x71, 0xf3, 0x04, 0xc2, 0x72, 0x02, 0xc4, 0xdc, 0x82, 0x00, 0xbf, 0x89,
- 0xc3, 0x15, 0x38, 0x00, 0xbf, 0x80, 0xc7, 0xc6, 0x88, 0x00, 0xbe, 0xe9,
- 0xcc, 0x88, 0xf0, 0x00, 0xbe, 0xe1, 0xc4, 0xe1, 0x7b, 0x00, 0xbe, 0x78,
- 0xc6, 0xce, 0xd7, 0x00, 0xbe, 0xd1, 0xc3, 0x00, 0xa4, 0x00, 0xbe, 0xa1,
- 0xc6, 0xd4, 0xf5, 0x00, 0xbe, 0x70, 0xc5, 0xd7, 0x00, 0x00, 0xbe, 0xc1,
- 0x03, 0x42, 0x72, 0x0e, 0xce, 0x6c, 0x9d, 0x00, 0xbe, 0xb1, 0xc4, 0xcc,
- 0xa1, 0x00, 0xbe, 0x90, 0xca, 0x9d, 0x52, 0x00, 0xbe, 0x69, 0xc6, 0xcd,
- 0x15, 0x00, 0xbe, 0x50, 0xc4, 0xe5, 0x7b, 0x00, 0xbe, 0x61, 0xc6, 0xd4,
- 0x4d, 0x00, 0xbe, 0x38, 0x97, 0x00, 0xbe, 0x29, 0x8b, 0x00, 0xbe, 0x19,
- 0x87, 0x00, 0xbe, 0x11, 0x83, 0x00, 0xbd, 0xb0, 0x91, 0x00, 0xbe, 0x21,
- 0x87, 0x00, 0xbd, 0xf0, 0x87, 0x00, 0xbe, 0x01, 0x8b, 0x00, 0xbd, 0xc0,
- 0x83, 0x00, 0xbd, 0xf9, 0x9b, 0x00, 0xbd, 0xd0, 0x83, 0x00, 0xbd, 0xe9,
- 0x97, 0x00, 0xbd, 0xe0, 0x97, 0x00, 0xbd, 0x99, 0x8b, 0x00, 0xbd, 0x81,
- 0x83, 0x00, 0xbd, 0x21, 0x93, 0x00, 0xbd, 0x18, 0xc3, 0x01, 0x59, 0x00,
- 0xbd, 0x91, 0xc3, 0x01, 0xb4, 0x00, 0xbd, 0x88, 0x97, 0x00, 0xbd, 0x4b,
- 0x02, 0x72, 0x20, 0x8d, 0x00, 0xbd, 0x40, 0x8b, 0x00, 0xbd, 0x30, 0x91,
- 0x00, 0xbc, 0xb9, 0x83, 0x00, 0xbc, 0xa8, 0x91, 0x00, 0xbc, 0x91, 0x83,
- 0x00, 0xbc, 0x80, 0x91, 0x00, 0xbc, 0x69, 0x83, 0x00, 0xbc, 0x58, 0x91,
- 0x00, 0xbc, 0x41, 0x83, 0x00, 0xbc, 0x30, 0x91, 0x00, 0xbc, 0x19, 0x83,
- 0x00, 0xbc, 0x08, 0x45, 0x04, 0x74, 0xc2, 0x72, 0x24, 0x83, 0x01, 0x85,
- 0xa9, 0x8b, 0x01, 0x85, 0xb9, 0x97, 0x01, 0x85, 0xc9, 0x87, 0x01, 0x85,
- 0xd9, 0x91, 0x01, 0x85, 0xe8, 0x47, 0x7e, 0xdc, 0x42, 0x72, 0x61, 0x8b,
- 0x01, 0x86, 0xfb, 0x02, 0x72, 0x6f, 0x83, 0x01, 0x86, 0xf1, 0x97, 0x01,
- 0x87, 0x01, 0x87, 0x01, 0x87, 0x09, 0x91, 0x01, 0x87, 0x10, 0x83, 0x01,
- 0x85, 0x59, 0x8b, 0x01, 0x85, 0x69, 0x97, 0x01, 0x85, 0x79, 0x87, 0x01,
- 0x85, 0x89, 0x91, 0x01, 0x85, 0x98, 0x83, 0x01, 0x85, 0x61, 0x8b, 0x01,
- 0x85, 0x71, 0x97, 0x01, 0x85, 0x81, 0x87, 0x01, 0x85, 0x91, 0x91, 0x01,
- 0x85, 0xa0, 0x83, 0x01, 0x85, 0xb1, 0x8b, 0x01, 0x85, 0xc1, 0x97, 0x01,
- 0x85, 0xd1, 0x87, 0x01, 0x85, 0xe1, 0x91, 0x01, 0x85, 0xf0, 0x83, 0x01,
- 0x85, 0xf9, 0x8b, 0x01, 0x86, 0x09, 0x97, 0x01, 0x86, 0x21, 0x87, 0x01,
- 0x86, 0x31, 0x91, 0x01, 0x86, 0x40, 0x83, 0x01, 0x86, 0x01, 0x8b, 0x01,
- 0x86, 0x11, 0x97, 0x01, 0x86, 0x29, 0x87, 0x01, 0x86, 0x39, 0x91, 0x01,
- 0x86, 0x48, 0x83, 0x01, 0x86, 0x51, 0x8b, 0x01, 0x86, 0x59, 0x97, 0x01,
- 0x86, 0x61, 0x87, 0x01, 0x86, 0x69, 0x91, 0x01, 0x86, 0x70, 0x83, 0x01,
- 0x86, 0x79, 0x8b, 0x01, 0x86, 0x91, 0x97, 0x01, 0x86, 0xa9, 0x87, 0x01,
- 0x86, 0xc1, 0x91, 0x01, 0x86, 0xd8, 0x83, 0x01, 0x86, 0x81, 0x8b, 0x01,
- 0x86, 0x99, 0x97, 0x01, 0x86, 0xb1, 0x87, 0x01, 0x86, 0xc9, 0x91, 0x01,
- 0x86, 0xe0, 0x83, 0x01, 0x86, 0x89, 0x8b, 0x01, 0x86, 0xa1, 0x97, 0x01,
- 0x86, 0xb9, 0x87, 0x01, 0x86, 0xd1, 0x91, 0x01, 0x86, 0xe8, 0x83, 0x01,
- 0x87, 0x21, 0x97, 0x01, 0x87, 0x31, 0x91, 0x01, 0x87, 0x40, 0x83, 0x01,
- 0x87, 0x49, 0x8b, 0x01, 0x87, 0x51, 0x97, 0x01, 0x87, 0x59, 0x87, 0x01,
- 0x87, 0x61, 0x91, 0x01, 0x87, 0x68, 0x83, 0x01, 0x87, 0x79, 0x8b, 0x01,
- 0x87, 0x81, 0x87, 0x01, 0x87, 0x89, 0x91, 0x01, 0x87, 0x90, 0x97, 0x01,
- 0x87, 0xa1, 0x83, 0x01, 0x87, 0xb9, 0x8b, 0x01, 0x87, 0xc1, 0x87, 0x01,
- 0x87, 0xc9, 0x91, 0x01, 0x87, 0xd0, 0xc4, 0x18, 0x83, 0x08, 0xfa, 0xb9,
- 0xc2, 0x26, 0x51, 0x08, 0xfa, 0xb0, 0xc3, 0x0c, 0x5b, 0x08, 0xfa, 0xa9,
- 0xc3, 0x06, 0x9e, 0x08, 0xfa, 0xa0, 0xc4, 0x04, 0x5e, 0x08, 0xfa, 0x99,
- 0xc2, 0x01, 0x47, 0x08, 0xfa, 0x90, 0xc4, 0x7d, 0xfa, 0x08, 0xfa, 0x71,
- 0xca, 0xa3, 0x56, 0x08, 0xfa, 0x40, 0xc2, 0x00, 0xa4, 0x08, 0xf8, 0xf9,
- 0x83, 0x08, 0xf8, 0xf0, 0xc2, 0x00, 0xa4, 0x08, 0xf8, 0xe9, 0x83, 0x08,
- 0xf8, 0xe0, 0x8e, 0x08, 0xf8, 0x68, 0x94, 0x08, 0xf8, 0x58, 0xc4, 0x0f,
- 0x7c, 0x08, 0x85, 0xc9, 0xc5, 0x44, 0x7b, 0x08, 0x84, 0x10, 0xc2, 0x00,
- 0xa4, 0x08, 0x84, 0xd9, 0xc3, 0x44, 0x76, 0x08, 0x84, 0xd1, 0x83, 0x08,
- 0x84, 0xc8, 0xc2, 0x00, 0xa4, 0x08, 0x84, 0xc1, 0x83, 0x08, 0x84, 0xb8,
- 0xd2, 0x4d, 0x28, 0x00, 0x64, 0x01, 0xc6, 0xc4, 0x36, 0x00, 0x64, 0x20,
- 0xca, 0x1e, 0x18, 0x00, 0x64, 0x09, 0xdd, 0x11, 0x37, 0x00, 0x67, 0x98,
- 0xc7, 0x11, 0x41, 0x00, 0x64, 0x11, 0xc7, 0x76, 0x59, 0x00, 0x65, 0xe8,
- 0xc5, 0x44, 0x7b, 0x00, 0x64, 0x19, 0xc4, 0x0f, 0x7c, 0x00, 0x66, 0x68,
- 0x83, 0x00, 0x64, 0x2b, 0x02, 0x72, 0x75, 0x8b, 0x00, 0x64, 0x3b, 0x02,
- 0x72, 0x81, 0x97, 0x00, 0x64, 0x4b, 0x02, 0x72, 0x85, 0x18, 0xc2, 0x72,
- 0x89, 0x87, 0x00, 0x64, 0x73, 0x02, 0x72, 0x93, 0x91, 0x00, 0x64, 0x93,
- 0x02, 0x72, 0x97, 0x0d, 0xc2, 0x72, 0x9b, 0x09, 0xc2, 0x72, 0xa5, 0x10,
- 0xc2, 0x72, 0xaf, 0x05, 0xc2, 0x72, 0xc8, 0x0c, 0xc2, 0x72, 0xd2, 0x16,
- 0xc2, 0x72, 0xdc, 0x06, 0xc2, 0x72, 0xea, 0x12, 0xc2, 0x72, 0xf8, 0x04,
- 0xc2, 0x73, 0x02, 0xc2, 0x00, 0xad, 0x00, 0x65, 0x71, 0xc2, 0x1d, 0x5f,
- 0x00, 0x65, 0x79, 0x14, 0xc2, 0x73, 0x0c, 0x0e, 0xc2, 0x73, 0x16, 0x15,
- 0xc2, 0x73, 0x1e, 0xc2, 0x00, 0xa4, 0x00, 0x65, 0xc9, 0xc2, 0x00, 0x67,
- 0x00, 0x66, 0xf0, 0x83, 0x00, 0x65, 0xf1, 0x8b, 0x00, 0x66, 0x41, 0x97,
- 0x00, 0x66, 0x60, 0x8b, 0x00, 0x66, 0x00, 0x97, 0x00, 0x66, 0x10, 0x94,
- 0x00, 0x66, 0x1b, 0x02, 0x73, 0x2e, 0x8e, 0x00, 0x67, 0x12, 0x02, 0x73,
- 0x32, 0x87, 0x00, 0x66, 0x38, 0x91, 0x00, 0x66, 0x58, 0xc2, 0x01, 0x47,
- 0x00, 0x67, 0x41, 0xc4, 0x04, 0x5e, 0x00, 0x67, 0x48, 0xc3, 0x06, 0x9e,
- 0x00, 0x67, 0x51, 0xc3, 0x0c, 0x5b, 0x00, 0x67, 0x58, 0xc2, 0x26, 0x51,
- 0x00, 0x67, 0x61, 0xc4, 0x18, 0x83, 0x00, 0x67, 0x68, 0xc2, 0x00, 0x8c,
- 0x01, 0x78, 0x03, 0x02, 0x73, 0x36, 0x12, 0xc2, 0x73, 0x3c, 0xc2, 0x18,
- 0x9f, 0x01, 0x7b, 0xe0, 0x0b, 0xc2, 0x73, 0x48, 0x07, 0xc2, 0x73, 0x58,
- 0x03, 0xc2, 0x73, 0x68, 0xc3, 0x02, 0xa8, 0x01, 0x7d, 0x3a, 0x02, 0x73,
- 0x74, 0x11, 0xc2, 0x73, 0x7a, 0x0b, 0xc2, 0x73, 0x9d, 0x14, 0xc2, 0x73,
- 0xad, 0x07, 0x42, 0x73, 0xbd, 0x0e, 0xc2, 0x73, 0xc9, 0x07, 0xc2, 0x73,
- 0xd3, 0x12, 0xc2, 0x73, 0xe9, 0x05, 0xc2, 0x73, 0xff, 0xc4, 0x04, 0x74,
- 0x01, 0x79, 0x49, 0x0a, 0xc2, 0x74, 0x0b, 0xc4, 0xb1, 0x10, 0x01, 0x79,
- 0xc9, 0x16, 0xc2, 0x74, 0x13, 0xc5, 0x07, 0x0a, 0x01, 0x7a, 0x29, 0xc2,
- 0x01, 0xbd, 0x01, 0x7a, 0x39, 0x03, 0xc2, 0x74, 0x21, 0xc4, 0x4d, 0x77,
- 0x01, 0x7b, 0x11, 0x0b, 0xc2, 0x74, 0x31, 0xc3, 0x32, 0x2b, 0x01, 0x7b,
- 0x51, 0xc4, 0x0e, 0x49, 0x01, 0x7d, 0x98, 0x11, 0xc2, 0x74, 0x3d, 0xcf,
- 0x60, 0xbf, 0x01, 0x78, 0xb1, 0x07, 0xc2, 0x74, 0x47, 0x03, 0x42, 0x74,
- 0x51, 0xc2, 0x01, 0x47, 0x01, 0x78, 0x33, 0x02, 0x74, 0x61, 0x03, 0xc2,
- 0x74, 0x67, 0xc2, 0x00, 0x4d, 0x01, 0x78, 0xb9, 0x42, 0x02, 0x53, 0xc2,
- 0x74, 0x79, 0x14, 0xc2, 0x74, 0x85, 0x0b, 0xc2, 0x74, 0x97, 0x11, 0x42,
- 0x74, 0xa3, 0xc2, 0x00, 0x57, 0x01, 0x78, 0x41, 0x11, 0xc2, 0x74, 0xaf,
- 0x07, 0xc2, 0x74, 0xbd, 0x0b, 0x42, 0x74, 0xc9, 0x10, 0xc2, 0x74, 0xd5,
- 0xc4, 0x01, 0xdc, 0x01, 0x78, 0x59, 0x03, 0xc2, 0x74, 0xe1, 0xc3, 0x18,
- 0x84, 0x01, 0x7e, 0x8b, 0x02, 0x74, 0xec, 0xc2, 0x05, 0xd0, 0x01, 0x7b,
- 0x61, 0xc9, 0xa9, 0x97, 0x01, 0x7e, 0x58, 0x11, 0xc2, 0x74, 0xf2, 0x0e,
- 0xc2, 0x75, 0x0e, 0xc4, 0xe1, 0x23, 0x01, 0x79, 0x31, 0x03, 0xc2, 0x75,
- 0x1e, 0xc3, 0x24, 0x9a, 0x01, 0x7d, 0x10, 0xc2, 0x00, 0x69, 0x01, 0x78,
- 0x71, 0x10, 0x42, 0x75, 0x30, 0xc4, 0x01, 0xd2, 0x01, 0x78, 0x91, 0x14,
- 0xc2, 0x75, 0x3c, 0xc3, 0x01, 0x64, 0x01, 0x7b, 0xf1, 0xc2, 0x00, 0xcc,
- 0x01, 0x7c, 0xb8, 0x14, 0xc2, 0x75, 0x48, 0x11, 0xc2, 0x75, 0x54, 0x07,
- 0xc2, 0x75, 0x60, 0x03, 0xc2, 0x75, 0x6c, 0x0a, 0xc2, 0x75, 0x7b, 0x42,
- 0x00, 0x34, 0x42, 0x75, 0x87, 0x0b, 0xc2, 0x75, 0x8f, 0xc3, 0xba, 0x3f,
- 0x01, 0x79, 0x39, 0x03, 0xc2, 0x75, 0xa1, 0xc2, 0x01, 0xf4, 0x01, 0x7c,
- 0xd1, 0xc2, 0x01, 0xbd, 0x01, 0x7c, 0xd8, 0xc4, 0x42, 0x86, 0x01, 0x78,
- 0xe1, 0xc2, 0x25, 0x1f, 0x01, 0x7a, 0x21, 0x42, 0x01, 0x10, 0xc2, 0x75,
- 0xaf, 0xc2, 0x00, 0x58, 0x01, 0x7b, 0xe8, 0x91, 0x01, 0x79, 0x0b, 0x02,
- 0x75, 0xbb, 0x42, 0x02, 0x59, 0xc2, 0x75, 0xc7, 0xc3, 0x00, 0x9e, 0x01,
- 0x7d, 0x41, 0xc4, 0xe1, 0x77, 0x01, 0x7e, 0x08, 0x0b, 0xc2, 0x75, 0xd3,
- 0x11, 0xc2, 0x75, 0xe3, 0x14, 0xc2, 0x75, 0xff, 0x03, 0xc2, 0x76, 0x11,
- 0x0e, 0xc2, 0x76, 0x1d, 0xc3, 0x24, 0xd7, 0x01, 0x7c, 0xb0, 0x11, 0xc2,
- 0x76, 0x2f, 0xc2, 0x00, 0x49, 0x01, 0x7b, 0xc8, 0xc2, 0x02, 0x53, 0x01,
- 0x7a, 0x89, 0x0b, 0xc2, 0x76, 0x39, 0x03, 0xc2, 0x76, 0x51, 0xc6, 0x17,
- 0xa0, 0x01, 0x7b, 0xd9, 0xc3, 0x63, 0x97, 0x01, 0x7c, 0xe1, 0x0e, 0xc2,
- 0x76, 0x63, 0x14, 0x42, 0x76, 0x6d, 0xc2, 0x00, 0x06, 0x01, 0x7a, 0xf9,
- 0x94, 0x01, 0x7b, 0xc0, 0xc5, 0xd9, 0x4e, 0x01, 0x7c, 0xa9, 0xc6, 0xd2,
- 0xaf, 0x01, 0x7d, 0x28, 0xcb, 0x20, 0x12, 0x0f, 0xb0, 0xd1, 0xcc, 0x19,
- 0xc5, 0x0f, 0xb0, 0xc8, 0x44, 0x1b, 0x3f, 0xc2, 0x76, 0x79, 0xc3, 0x02,
- 0x28, 0x0b, 0x79, 0x90, 0xa5, 0x0b, 0x7c, 0xc9, 0xa4, 0x0b, 0x7c, 0xc1,
- 0xa3, 0x0b, 0x7c, 0xb9, 0xa2, 0x0b, 0x7c, 0xb1, 0xa1, 0x0b, 0x7c, 0xa9,
- 0xa0, 0x0b, 0x7c, 0xa1, 0x9f, 0x0b, 0x7c, 0x98, 0x87, 0x0b, 0x7a, 0x49,
- 0x83, 0x0b, 0x79, 0xb9, 0xc2, 0x00, 0xa4, 0x0b, 0x79, 0x71, 0xc2, 0x0c,
- 0x65, 0x0b, 0x79, 0x50, 0xc2, 0x1d, 0x5f, 0x0b, 0x78, 0xe1, 0x83, 0x0b,
- 0x78, 0xd0, 0xca, 0x57, 0x90, 0x0b, 0x7a, 0x80, 0xc2, 0x00, 0xa4, 0x0b,
- 0x79, 0x69, 0x83, 0x0b, 0x79, 0x60, 0xc2, 0x00, 0xa4, 0x0b, 0x79, 0x21,
- 0x83, 0x0b, 0x79, 0x18, 0xc2, 0x00, 0xa4, 0x0b, 0x78, 0xa9, 0x83, 0x0b,
- 0x78, 0xa0, 0xc2, 0x15, 0x1d, 0x0b, 0x7a, 0x39, 0x83, 0x0b, 0x79, 0xc1,
- 0xc2, 0x00, 0xa4, 0x0b, 0x79, 0x79, 0xc2, 0x02, 0xb4, 0x0b, 0x79, 0x58,
- 0xc2, 0x1d, 0x5f, 0x0b, 0x78, 0xe9, 0x83, 0x0b, 0x78, 0xd8, 0xc3, 0x8d,
- 0xca, 0x0b, 0x79, 0xf9, 0x10, 0xc2, 0x76, 0x91, 0xc2, 0x00, 0xad, 0x0b,
- 0x78, 0x30, 0x15, 0xc2, 0x76, 0x9b, 0xc2, 0x1d, 0x5f, 0x0b, 0x7a, 0x01,
- 0x83, 0x0b, 0x79, 0xe8, 0x83, 0x0b, 0x79, 0xe1, 0xc2, 0x00, 0xa4, 0x0b,
- 0x79, 0xb0, 0x15, 0xc2, 0x76, 0xa5, 0x83, 0x0b, 0x78, 0x69, 0xc2, 0x00,
- 0xb3, 0x0b, 0x78, 0x60, 0xc2, 0x00, 0xa4, 0x0b, 0x79, 0x49, 0x83, 0x0b,
- 0x79, 0x40, 0xc2, 0x1d, 0x5f, 0x0b, 0x78, 0xc9, 0x83, 0x0b, 0x78, 0xc0,
- 0x90, 0x0b, 0x7b, 0x62, 0x02, 0x76, 0xaf, 0xc2, 0x00, 0x35, 0x0b, 0x7c,
- 0x30, 0x90, 0x0b, 0x7b, 0x1a, 0x02, 0x76, 0xb3, 0x94, 0x0b, 0x7b, 0xa8,
- 0x89, 0x0b, 0x7a, 0xf8, 0x94, 0x0b, 0x7c, 0x11, 0x9b, 0x0b, 0x7b, 0x00,
- 0x87, 0x0b, 0x7b, 0xa0, 0x89, 0x0b, 0x7a, 0xc0, 0x00, 0x42, 0x76, 0xb7,
- 0xcd, 0x0e, 0x9f, 0x0f, 0xbe, 0x19, 0xca, 0x21, 0x3e, 0x0f, 0xbe, 0x08,
- 0xc6, 0x07, 0x09, 0x0f, 0xbc, 0x79, 0xc6, 0x01, 0x7a, 0x01, 0x35, 0x50,
- 0xd0, 0x59, 0x82, 0x0f, 0xbc, 0x29, 0xcb, 0x89, 0x51, 0x01, 0x35, 0x58,
- 0x00, 0xc2, 0x76, 0xc3, 0xe0, 0x09, 0x47, 0x01, 0x3b, 0x68, 0x00, 0xc2,
- 0x76, 0xcf, 0xe0, 0x09, 0x47, 0x01, 0x3b, 0x60, 0x49, 0x34, 0x24, 0xc2,
- 0x76, 0xdb, 0xd3, 0x3e, 0x0a, 0x0f, 0xbd, 0x81, 0x4c, 0x0e, 0x93, 0x42,
- 0x76, 0xe7, 0xd1, 0x55, 0x7f, 0x01, 0x35, 0x61, 0xc4, 0x06, 0x23, 0x01,
- 0x2c, 0x91, 0xc6, 0x13, 0x57, 0x0f, 0xbd, 0x51, 0x43, 0x4c, 0xf2, 0x42,
- 0x76, 0xf3, 0xcf, 0x15, 0x8e, 0x0f, 0xbd, 0xe1, 0xd2, 0x21, 0x36, 0x0f,
- 0xbe, 0x70, 0x9b, 0x0b, 0x73, 0xfb, 0x02, 0x76, 0xff, 0x83, 0x0b, 0x73,
- 0x6b, 0x02, 0x77, 0x03, 0x91, 0x0b, 0x73, 0xeb, 0x02, 0x77, 0x0d, 0x94,
- 0x0b, 0x73, 0xe1, 0x90, 0x0b, 0x73, 0xdb, 0x02, 0x77, 0x11, 0x86, 0x0b,
- 0x73, 0xc9, 0x9a, 0x0b, 0x73, 0xc1, 0x8a, 0x0b, 0x73, 0xb3, 0x02, 0x77,
- 0x19, 0x93, 0x0b, 0x73, 0xa9, 0x8e, 0x0b, 0x73, 0xa1, 0x97, 0x0b, 0x73,
- 0x91, 0x85, 0x0b, 0x73, 0x89, 0x84, 0x0b, 0x73, 0x81, 0x87, 0x0b, 0x73,
- 0x79, 0x8c, 0x0b, 0x73, 0x71, 0x8d, 0x0b, 0x73, 0x63, 0x02, 0x77, 0x1d,
- 0x8b, 0x0b, 0x73, 0x59, 0x88, 0x0b, 0x73, 0x51, 0x89, 0x0b, 0x73, 0x49,
- 0x96, 0x0b, 0x73, 0x41, 0x92, 0x0b, 0x73, 0x39, 0x9c, 0x0b, 0x73, 0x29,
- 0x99, 0x0b, 0x73, 0x19, 0x98, 0x0b, 0x73, 0x11, 0x95, 0x0b, 0x73, 0x09,
- 0x8f, 0x0b, 0x73, 0x00, 0x9b, 0x0b, 0x72, 0xfb, 0x02, 0x77, 0x21, 0x83,
- 0x0b, 0x72, 0x6b, 0x02, 0x77, 0x25, 0x91, 0x0b, 0x72, 0xeb, 0x02, 0x77,
- 0x2f, 0x94, 0x0b, 0x72, 0xe1, 0x90, 0x0b, 0x72, 0xdb, 0x02, 0x77, 0x33,
- 0x86, 0x0b, 0x72, 0xc9, 0x9a, 0x0b, 0x72, 0xc1, 0x8a, 0x0b, 0x72, 0xb3,
- 0x02, 0x77, 0x3b, 0x93, 0x0b, 0x72, 0xa9, 0x8e, 0x0b, 0x72, 0xa1, 0x97,
- 0x0b, 0x72, 0x91, 0x85, 0x0b, 0x72, 0x89, 0x84, 0x0b, 0x72, 0x81, 0x87,
- 0x0b, 0x72, 0x79, 0x8c, 0x0b, 0x72, 0x71, 0x8d, 0x0b, 0x72, 0x63, 0x02,
- 0x77, 0x3f, 0x8b, 0x0b, 0x72, 0x59, 0x88, 0x0b, 0x72, 0x51, 0x89, 0x0b,
- 0x72, 0x49, 0x96, 0x0b, 0x72, 0x41, 0x92, 0x0b, 0x72, 0x39, 0x9c, 0x0b,
- 0x72, 0x29, 0x99, 0x0b, 0x72, 0x19, 0x98, 0x0b, 0x72, 0x11, 0x95, 0x0b,
- 0x72, 0x09, 0x8f, 0x0b, 0x72, 0x00, 0xc4, 0x04, 0x5e, 0x0b, 0x74, 0x1b,
- 0x02, 0x77, 0x43, 0xc2, 0x01, 0x47, 0x0b, 0x74, 0x12, 0x02, 0x77, 0x49,
- 0xcf, 0x65, 0x8d, 0x0b, 0x74, 0xa0, 0xc4, 0x18, 0x83, 0x0b, 0x74, 0x39,
- 0xc2, 0x26, 0x51, 0x0b, 0x74, 0x30, 0xc3, 0x0c, 0x5b, 0x0b, 0x74, 0x29,
- 0xc3, 0x06, 0x9e, 0x0b, 0x74, 0x20, 0xc7, 0x20, 0xbe, 0x0b, 0x74, 0x91,
- 0xc5, 0x6a, 0x79, 0x0b, 0x74, 0x58, 0xc8, 0x48, 0x4e, 0x0b, 0x74, 0x89,
- 0xc6, 0x45, 0x38, 0x0b, 0x74, 0x80, 0xc6, 0x13, 0xf0, 0x0b, 0x74, 0x79,
- 0xc7, 0x56, 0x07, 0x0b, 0x74, 0x70, 0xc7, 0x53, 0x70, 0x0b, 0x74, 0x69,
- 0xc5, 0x21, 0x30, 0x0b, 0x74, 0x61, 0xc2, 0x00, 0x4d, 0x0b, 0x74, 0x50,
- 0xc6, 0x08, 0x8f, 0x01, 0x1e, 0xb1, 0xc9, 0x6b, 0x42, 0x01, 0x1e, 0xa8,
- 0x24, 0xc2, 0x77, 0x4f, 0x25, 0xc2, 0x77, 0x8b, 0x1f, 0xc2, 0x77, 0xc7,
- 0x1e, 0xc2, 0x78, 0x03, 0x26, 0xc2, 0x78, 0x3f, 0x22, 0xc2, 0x78, 0x7b,
- 0x1d, 0xc2, 0x78, 0xb7, 0x21, 0xc2, 0x78, 0xed, 0x23, 0xc2, 0x79, 0x29,
- 0x20, 0x42, 0x79, 0x65, 0x26, 0xc2, 0x79, 0xa1, 0x20, 0xc2, 0x79, 0xd1,
- 0x1e, 0xc2, 0x7a, 0x0d, 0x23, 0xc2, 0x7a, 0x49, 0x24, 0xc2, 0x7a, 0x85,
- 0x21, 0xc2, 0x7a, 0xc1, 0x1d, 0xc2, 0x7a, 0xfd, 0x22, 0xc2, 0x7b, 0x39,
- 0x25, 0xc2, 0x7b, 0x75, 0x1f, 0x42, 0x7b, 0xb1, 0xc2, 0x01, 0x47, 0x0f,
- 0x46, 0x41, 0xc4, 0x04, 0x5e, 0x0f, 0x46, 0x48, 0xc3, 0x06, 0x9e, 0x0f,
- 0x46, 0x51, 0xc3, 0x0c, 0x5b, 0x0f, 0x46, 0x58, 0xc2, 0x26, 0x51, 0x0f,
- 0x46, 0x61, 0xc4, 0x18, 0x83, 0x0f, 0x46, 0x68, 0x07, 0xc2, 0x7b, 0xed,
- 0xc8, 0x4c, 0x98, 0x0f, 0x46, 0x98, 0x95, 0x0f, 0x46, 0x91, 0xca, 0x6f,
- 0x20, 0x0f, 0x46, 0xa8, 0x16, 0xc2, 0x7b, 0xf7, 0xcd, 0x7e, 0xe8, 0x08,
- 0x4f, 0xf1, 0x07, 0xc2, 0x7c, 0x09, 0x15, 0xc2, 0x7c, 0x15, 0x08, 0xc2,
- 0x7c, 0x21, 0x44, 0x01, 0xb4, 0x42, 0x7c, 0x2d, 0xc4, 0x22, 0x71, 0x08,
- 0x4e, 0x43, 0x02, 0x7c, 0x39, 0xc5, 0x01, 0xdb, 0x08, 0x4e, 0x3b, 0x02,
- 0x7c, 0x43, 0x15, 0xc2, 0x7c, 0x4d, 0x08, 0xc2, 0x7c, 0x5f, 0x16, 0xc2,
- 0x7c, 0x67, 0xc3, 0x01, 0xb4, 0x08, 0x4e, 0x02, 0x02, 0x7c, 0x78, 0x48,
- 0x3c, 0xbd, 0xc2, 0x7c, 0x7c, 0x46, 0x02, 0x91, 0x42, 0x7c, 0x88, 0xc2,
- 0xe7, 0x79, 0x08, 0x4c, 0xf8, 0xc2, 0x0b, 0xc6, 0x08, 0x4c, 0xe9, 0x16,
- 0xc2, 0x7c, 0xe7, 0xc2, 0x0f, 0x60, 0x08, 0x4c, 0xb9, 0x0d, 0xc2, 0x7c,
- 0xf9, 0x15, 0xc2, 0x7d, 0x03, 0xc3, 0xbf, 0xbc, 0x08, 0x4c, 0x91, 0xc2,
- 0x00, 0xc7, 0x08, 0x4c, 0x81, 0xc2, 0x02, 0x59, 0x08, 0x4c, 0x73, 0x02,
- 0x7d, 0x0e, 0x83, 0x08, 0x4c, 0x01, 0x87, 0x08, 0x4c, 0x09, 0x8b, 0x08,
- 0x4c, 0x11, 0x91, 0x08, 0x4c, 0x19, 0xc2, 0x1d, 0x5f, 0x08, 0x4c, 0x21,
- 0xc2, 0x01, 0x09, 0x08, 0x4c, 0x29, 0xc2, 0x00, 0xde, 0x08, 0x4c, 0x33,
- 0x02, 0x7d, 0x14, 0xc2, 0x03, 0xa4, 0x08, 0x4c, 0x41, 0xc2, 0x00, 0xad,
- 0x08, 0x4c, 0x49, 0x10, 0x42, 0x7d, 0x1a, 0x47, 0x24, 0x69, 0xc2, 0x7d,
- 0x28, 0xcc, 0x89, 0xec, 0x01, 0x4c, 0xd8, 0xc3, 0x51, 0x2d, 0x05, 0x5f,
- 0x29, 0x03, 0xc2, 0x7d, 0x2e, 0x97, 0x05, 0x57, 0x70, 0xc3, 0x51, 0x2d,
- 0x05, 0x5f, 0x21, 0x8b, 0x05, 0x57, 0x58, 0x97, 0x05, 0x57, 0x61, 0xc3,
- 0x51, 0x2d, 0x05, 0x5f, 0x40, 0xc7, 0xca, 0x32, 0x05, 0x5f, 0x10, 0xc3,
- 0x67, 0xa8, 0x05, 0x5e, 0x4b, 0x02, 0x7d, 0x36, 0x83, 0x05, 0x5e, 0x2b,
- 0x02, 0x7d, 0x3c, 0xc2, 0x00, 0xc1, 0x05, 0x57, 0x41, 0xc2, 0x1d, 0x5f,
- 0x05, 0x57, 0x18, 0xc2, 0x00, 0x31, 0x05, 0x5e, 0x3b, 0x02, 0x7d, 0x42,
- 0x16, 0xc2, 0x7d, 0x48, 0xc3, 0x18, 0x9c, 0x05, 0x5e, 0x50, 0x83, 0x05,
- 0x5e, 0x23, 0x02, 0x7d, 0x52, 0xc3, 0x09, 0x89, 0x05, 0x5e, 0x80, 0xc2,
- 0x01, 0x04, 0x05, 0x5e, 0x03, 0x02, 0x7d, 0x58, 0xc3, 0x18, 0x9c, 0x05,
- 0x5e, 0x40, 0xc3, 0x09, 0x89, 0x05, 0x5e, 0xd1, 0x83, 0x05, 0x5e, 0xa8,
- 0xc3, 0x18, 0x9c, 0x05, 0x5e, 0xc9, 0x06, 0xc2, 0x7d, 0x5e, 0xc2, 0x00,
- 0x31, 0x05, 0x5e, 0xb8, 0xc3, 0x18, 0x9c, 0x05, 0x5e, 0xc1, 0xc2, 0x01,
- 0x04, 0x05, 0x5e, 0x90, 0xc2, 0x0c, 0x65, 0x05, 0x57, 0x51, 0xc2, 0x00,
- 0xa4, 0x05, 0x57, 0x49, 0xc2, 0x00, 0xc2, 0x05, 0x5e, 0x08, 0x83, 0x05,
- 0x57, 0x11, 0xc2, 0x00, 0x31, 0x05, 0x5e, 0x30, 0xc7, 0xca, 0x32, 0x05,
- 0x5e, 0xe8, 0xc7, 0xca, 0x32, 0x05, 0x5e, 0xe0, 0xc3, 0x09, 0x89, 0x05,
- 0x5e, 0x99, 0xc2, 0x00, 0x31, 0x05, 0x5e, 0xb0, 0xc9, 0xaf, 0xeb, 0x0f,
- 0xb5, 0xa9, 0xc7, 0x68, 0xce, 0x0f, 0xb4, 0xf1, 0xc8, 0xbe, 0x75, 0x0f,
- 0xb5, 0x00, 0x05, 0xc2, 0x7d, 0x68, 0x15, 0xc2, 0x7d, 0x92, 0x14, 0xc2,
- 0x7d, 0xa8, 0x0e, 0xc2, 0x7d, 0xbe, 0x09, 0xc2, 0x7d, 0xd0, 0x04, 0xc2,
- 0x7d, 0xe5, 0x06, 0xc2, 0x7d, 0xf1, 0x03, 0xc2, 0x7d, 0xfb, 0x12, 0xc2,
- 0x7e, 0x0d, 0x16, 0xc2, 0x7e, 0x19, 0x17, 0xc2, 0x7e, 0x25, 0x18, 0xc2,
- 0x7e, 0x35, 0x0f, 0xc2, 0x7e, 0x41, 0x07, 0xc2, 0x7e, 0x4b, 0x0a, 0xc2,
- 0x7e, 0x57, 0x1b, 0xc2, 0x7e, 0x63, 0xca, 0x9f, 0x32, 0x00, 0x17, 0xf0,
- 0x45, 0x06, 0x98, 0xc2, 0x7e, 0x6f, 0xcb, 0x95, 0x0d, 0x08, 0xb2, 0x11,
- 0xc4, 0x1c, 0xd0, 0x08, 0xb2, 0x08, 0xc4, 0xe4, 0xdf, 0x08, 0xb2, 0x21,
- 0x03, 0xc2, 0x7e, 0x93, 0x42, 0x03, 0x32, 0x42, 0x7e, 0x9f, 0x03, 0xc2,
- 0x7e, 0xab, 0x91, 0x08, 0xb1, 0xd9, 0x87, 0x08, 0xb1, 0xc9, 0x48, 0xac,
- 0xc1, 0xc2, 0x7e, 0xb7, 0x97, 0x08, 0xb1, 0x9b, 0x02, 0x7e, 0xc5, 0x8b,
- 0x08, 0xb1, 0x8a, 0x02, 0x7e, 0xc9, 0x0e, 0xc2, 0x7e, 0xcd, 0xc2, 0x00,
- 0xa4, 0x08, 0xb1, 0x71, 0x15, 0xc2, 0x7e, 0xd7, 0x18, 0xc2, 0x7e, 0xe7,
- 0xc2, 0x02, 0x59, 0x08, 0xb1, 0x41, 0xc2, 0x1d, 0x5f, 0x08, 0xb1, 0x39,
- 0xc2, 0x00, 0xad, 0x08, 0xb1, 0x31, 0x04, 0xc2, 0x7e, 0xf1, 0x12, 0xc2,
- 0x7e, 0xfb, 0x10, 0xc2, 0x7f, 0x05, 0x06, 0xc2, 0x7f, 0x1b, 0x16, 0xc2,
- 0x7f, 0x29, 0x0c, 0xc2, 0x7f, 0x37, 0x05, 0xc2, 0x7f, 0x41, 0x09, 0xc2,
- 0x7f, 0x4b, 0x0d, 0xc2, 0x7f, 0x55, 0x83, 0x08, 0xb0, 0x03, 0x02, 0x7f,
- 0x5f, 0x91, 0x08, 0xb0, 0x61, 0x87, 0x08, 0xb0, 0x51, 0x97, 0x08, 0xb0,
- 0x23, 0x02, 0x7f, 0x6b, 0x8b, 0x08, 0xb0, 0x12, 0x02, 0x7f, 0x6f, 0xc2,
- 0x00, 0x34, 0x01, 0x34, 0x59, 0xc3, 0x01, 0x93, 0x01, 0x34, 0x50, 0xe0,
- 0x09, 0xe7, 0x08, 0xb3, 0x60, 0x46, 0x00, 0x6b, 0x42, 0x7f, 0x73, 0xcf,
- 0x09, 0xf8, 0x08, 0xb3, 0x31, 0xc8, 0x00, 0xbf, 0x08, 0xb3, 0x28, 0xcf,
- 0x09, 0xf8, 0x08, 0xb3, 0x21, 0xc8, 0x00, 0xbf, 0x08, 0xb3, 0x00, 0xc4,
- 0x22, 0x71, 0x00, 0xc0, 0xc9, 0xc5, 0x01, 0xdb, 0x00, 0xc0, 0xc1, 0x15,
- 0xc2, 0x7f, 0x7f, 0x08, 0xc2, 0x7f, 0x8b, 0x16, 0xc2, 0x7f, 0x97, 0xc3,
- 0x01, 0xb4, 0x00, 0xc0, 0x89, 0xc4, 0x15, 0xd3, 0x00, 0xc0, 0x80, 0x45,
- 0xc2, 0x85, 0x42, 0x7f, 0xa3, 0x48, 0xb0, 0x6a, 0xc2, 0x7f, 0xc5, 0xc2,
- 0x00, 0x35, 0x00, 0xc1, 0x48, 0x44, 0x62, 0x18, 0xc2, 0x80, 0x11, 0xc2,
- 0x0c, 0x65, 0x00, 0xc1, 0xe1, 0x83, 0x00, 0xc1, 0x90, 0x83, 0x00, 0xc1,
- 0xa3, 0x02, 0x80, 0x82, 0x8b, 0x00, 0xc2, 0x10, 0x44, 0x14, 0xc2, 0xc2,
- 0x80, 0x88, 0xc2, 0x00, 0xa4, 0x00, 0xc1, 0x89, 0x83, 0x00, 0xc1, 0x80,
- 0xc2, 0x00, 0x0a, 0x00, 0xc2, 0x09, 0xc2, 0x02, 0x59, 0x00, 0xc1, 0xf9,
- 0x83, 0x00, 0xc1, 0xe8, 0xc2, 0x00, 0xa4, 0x00, 0xc2, 0x01, 0x83, 0x00,
- 0xc1, 0x78, 0xc2, 0x00, 0xa4, 0x00, 0xc1, 0xd9, 0x83, 0x00, 0xc1, 0xd0,
- 0x87, 0x00, 0xc1, 0x38, 0x87, 0x00, 0xc1, 0x30, 0x87, 0x00, 0xc1, 0x28,
- 0xc4, 0x06, 0x9d, 0x00, 0xc0, 0x79, 0x16, 0xc2, 0x80, 0xea, 0xc3, 0x01,
- 0xb4, 0x00, 0xc0, 0x58, 0x89, 0x0e, 0xa1, 0xd3, 0x02, 0x80, 0xf6, 0x88,
- 0x0e, 0xa1, 0xc9, 0x87, 0x0e, 0xa1, 0xc3, 0x02, 0x80, 0xfc, 0x86, 0x0e,
- 0xa1, 0xbb, 0x02, 0x81, 0x08, 0x85, 0x0e, 0xa1, 0xb3, 0x02, 0x81, 0x0e,
- 0x84, 0x0e, 0xa1, 0xab, 0x02, 0x81, 0x14, 0x83, 0x0e, 0xa1, 0xa3, 0x02,
- 0x81, 0x1a, 0x91, 0x0e, 0xa2, 0x13, 0x02, 0x81, 0x20, 0x92, 0x0e, 0xa2,
- 0x1b, 0x02, 0x81, 0x24, 0x97, 0x0e, 0xa2, 0x43, 0x02, 0x81, 0x34, 0x96,
- 0x0e, 0xa2, 0x3b, 0x02, 0x81, 0x3a, 0x95, 0x0e, 0xa2, 0x33, 0x02, 0x81,
- 0x49, 0x94, 0x0e, 0xa2, 0x2b, 0x02, 0x81, 0x4f, 0x9a, 0x0e, 0xa2, 0x5b,
- 0x02, 0x81, 0x55, 0x90, 0x0e, 0xa2, 0x0b, 0x02, 0x81, 0x59, 0x8f, 0x0e,
- 0xa2, 0x03, 0x02, 0x81, 0x5d, 0x8e, 0x0e, 0xa1, 0xfb, 0x02, 0x81, 0x61,
- 0x8d, 0x0e, 0xa1, 0xf3, 0x02, 0x81, 0x67, 0x8b, 0x0e, 0xa1, 0xe3, 0x02,
- 0x81, 0x6d, 0x9c, 0x0e, 0xa2, 0x6b, 0x02, 0x81, 0x73, 0x9b, 0x0e, 0xa2,
- 0x61, 0x99, 0x0e, 0xa2, 0x51, 0x98, 0x0e, 0xa2, 0x49, 0x93, 0x0e, 0xa2,
- 0x21, 0x8c, 0x0e, 0xa1, 0xe9, 0x8a, 0x0e, 0xa1, 0xd8, 0xc8, 0x9d, 0xa4,
- 0x0e, 0xb8, 0xd9, 0xc9, 0xaa, 0xa5, 0x0e, 0xb8, 0xc9, 0xd3, 0x45, 0x3e,
- 0x0e, 0xb8, 0xa8, 0x91, 0x0e, 0xa2, 0xe3, 0x02, 0x81, 0x79, 0x92, 0x0e,
- 0xa2, 0xeb, 0x02, 0x81, 0x7d, 0x85, 0x0e, 0xa2, 0x83, 0x02, 0x81, 0x8d,
- 0x97, 0x0e, 0xa3, 0x13, 0x02, 0x81, 0x93, 0x96, 0x0e, 0xa3, 0x0b, 0x02,
- 0x81, 0x99, 0x95, 0x0e, 0xa3, 0x03, 0x02, 0x81, 0xa5, 0x88, 0x0e, 0xa2,
- 0x9b, 0x02, 0x81, 0xab, 0x94, 0x0e, 0xa2, 0xfb, 0x02, 0x81, 0xb1, 0x9a,
- 0x0e, 0xa3, 0x2b, 0x02, 0x81, 0xb7, 0x90, 0x0e, 0xa2, 0xdb, 0x02, 0x81,
- 0xbb, 0x8f, 0x0e, 0xa2, 0xd3, 0x02, 0x81, 0xbf, 0x8e, 0x0e, 0xa2, 0xcb,
- 0x02, 0x81, 0xc3, 0x8d, 0x0e, 0xa2, 0xc3, 0x02, 0x81, 0xc9, 0x8b, 0x0e,
- 0xa2, 0xb3, 0x02, 0x81, 0xcf, 0x87, 0x0e, 0xa2, 0x93, 0x02, 0x81, 0xd5,
- 0x9c, 0x0e, 0xa3, 0x3b, 0x02, 0x81, 0xe1, 0x86, 0x0e, 0xa2, 0x8b, 0x02,
- 0x81, 0xe7, 0x89, 0x0e, 0xa2, 0xa3, 0x02, 0x81, 0xf3, 0x84, 0x0e, 0xa2,
- 0x7b, 0x02, 0x81, 0xf9, 0x83, 0x0e, 0xa2, 0x73, 0x02, 0x81, 0xff, 0x9b,
- 0x0e, 0xa3, 0x31, 0x99, 0x0e, 0xa3, 0x21, 0x98, 0x0e, 0xa3, 0x19, 0x93,
- 0x0e, 0xa2, 0xf1, 0x8c, 0x0e, 0xa2, 0xb8, 0x45, 0x04, 0x74, 0xc2, 0x82,
- 0x05, 0x46, 0x0a, 0x0f, 0x42, 0x82, 0xa9, 0xc4, 0x22, 0x71, 0x0e, 0xbe,
- 0xb9, 0xc5, 0x01, 0xdb, 0x0e, 0xbe, 0xb1, 0x15, 0xc2, 0x82, 0xb5, 0x08,
- 0xc2, 0x82, 0xc1, 0x16, 0xc2, 0x82, 0xcd, 0xc3, 0x01, 0xb4, 0x0e, 0xbe,
- 0x79, 0xc4, 0x15, 0xd3, 0x0e, 0xbe, 0x70, 0x86, 0x0e, 0xa0, 0x1b, 0x02,
- 0x82, 0xd9, 0x91, 0x0e, 0xa0, 0x73, 0x02, 0x82, 0xe5, 0x92, 0x0e, 0xa0,
- 0x7b, 0x02, 0x82, 0xe9, 0x85, 0x0e, 0xa0, 0x13, 0x02, 0x82, 0xf9, 0x97,
- 0x0e, 0xa0, 0xa3, 0x02, 0x82, 0xff, 0x96, 0x0e, 0xa0, 0x9b, 0x02, 0x83,
- 0x05, 0x95, 0x0e, 0xa0, 0x93, 0x02, 0x83, 0x14, 0x94, 0x0e, 0xa0, 0x8b,
- 0x02, 0x83, 0x1a, 0x9a, 0x0e, 0xa0, 0xbb, 0x02, 0x83, 0x20, 0x90, 0x0e,
- 0xa0, 0x6b, 0x02, 0x83, 0x24, 0x8f, 0x0e, 0xa0, 0x63, 0x02, 0x83, 0x28,
- 0x8e, 0x0e, 0xa0, 0x5b, 0x02, 0x83, 0x2c, 0x8d, 0x0e, 0xa0, 0x53, 0x02,
- 0x83, 0x32, 0x8b, 0x0e, 0xa0, 0x43, 0x02, 0x83, 0x38, 0x87, 0x0e, 0xa0,
- 0x23, 0x02, 0x83, 0x3e, 0x9c, 0x0e, 0xa0, 0xcb, 0x02, 0x83, 0x4a, 0x89,
- 0x0e, 0xa0, 0x33, 0x02, 0x83, 0x50, 0x84, 0x0e, 0xa0, 0x0b, 0x02, 0x83,
- 0x56, 0x83, 0x0e, 0xa0, 0x03, 0x02, 0x83, 0x5c, 0x9b, 0x0e, 0xa0, 0xc1,
- 0x99, 0x0e, 0xa0, 0xb1, 0x98, 0x0e, 0xa0, 0xa9, 0x93, 0x0e, 0xa0, 0x81,
- 0x8c, 0x0e, 0xa0, 0x49, 0x8a, 0x0e, 0xa0, 0x39, 0x88, 0x0e, 0xa0, 0x28,
- 0x12, 0xc2, 0x83, 0x62, 0xca, 0xa6, 0xda, 0x0e, 0xba, 0xa1, 0xcc, 0x8b,
- 0x48, 0x0e, 0xba, 0x91, 0xcc, 0x89, 0x80, 0x0e, 0xba, 0x89, 0xce, 0x12,
- 0x11, 0x0e, 0xba, 0x81, 0x46, 0x04, 0x73, 0xc2, 0x83, 0x74, 0xc5, 0xdc,
- 0x87, 0x0e, 0xb9, 0xa9, 0x48, 0x07, 0x17, 0x42, 0x84, 0x18, 0xc8, 0x9d,
- 0xa4, 0x0e, 0xb7, 0x09, 0xc9, 0xaa, 0xa5, 0x0e, 0xb6, 0xf9, 0xd3, 0x45,
- 0x3e, 0x0e, 0xb6, 0xd8, 0x46, 0x04, 0x73, 0xc2, 0x84, 0xb9, 0x48, 0x07,
- 0x17, 0x42, 0x85, 0x21, 0xc4, 0x22, 0x71, 0x0e, 0xbf, 0xf9, 0xc5, 0x01,
- 0xdb, 0x0e, 0xbf, 0xf1, 0x15, 0xc2, 0x85, 0x89, 0x08, 0xc2, 0x85, 0x95,
- 0x16, 0xc2, 0x85, 0xa1, 0xc3, 0x01, 0xb4, 0x0e, 0xbf, 0xb9, 0xc4, 0x15,
- 0xd3, 0x0e, 0xbf, 0xb0, 0x9c, 0x0e, 0xb5, 0x19, 0x9b, 0x0e, 0xb5, 0x11,
- 0x9a, 0x0e, 0xb5, 0x09, 0x99, 0x0e, 0xb5, 0x01, 0x98, 0x0e, 0xb4, 0xf9,
- 0x97, 0x0e, 0xb4, 0xf1, 0x96, 0x0e, 0xb4, 0xe9, 0x95, 0x0e, 0xb4, 0xe1,
- 0x94, 0x0e, 0xb4, 0xd9, 0x93, 0x0e, 0xb4, 0xd1, 0x92, 0x0e, 0xb4, 0xc9,
- 0x91, 0x0e, 0xb4, 0xc1, 0x90, 0x0e, 0xb4, 0xb9, 0x8f, 0x0e, 0xb4, 0xb1,
- 0x8e, 0x0e, 0xb4, 0xa9, 0x8d, 0x0e, 0xb4, 0xa1, 0x8c, 0x0e, 0xb4, 0x99,
- 0x8b, 0x0e, 0xb4, 0x91, 0x8a, 0x0e, 0xb4, 0x89, 0x89, 0x0e, 0xb4, 0x81,
- 0x88, 0x0e, 0xb4, 0x79, 0x87, 0x0e, 0xb4, 0x71, 0x86, 0x0e, 0xb4, 0x69,
- 0x85, 0x0e, 0xb4, 0x61, 0x84, 0x0e, 0xb4, 0x59, 0x83, 0x0e, 0xb4, 0x50,
- 0x9c, 0x0e, 0xb4, 0x49, 0x9b, 0x0e, 0xb4, 0x41, 0x9a, 0x0e, 0xb4, 0x39,
- 0x99, 0x0e, 0xb4, 0x31, 0x98, 0x0e, 0xb4, 0x29, 0x97, 0x0e, 0xb4, 0x21,
- 0x96, 0x0e, 0xb4, 0x19, 0x95, 0x0e, 0xb4, 0x11, 0x94, 0x0e, 0xb4, 0x09,
- 0x93, 0x0e, 0xb4, 0x01, 0x92, 0x0e, 0xb3, 0xf9, 0x91, 0x0e, 0xb3, 0xf1,
- 0x90, 0x0e, 0xb3, 0xe9, 0x8f, 0x0e, 0xb3, 0xe1, 0x8e, 0x0e, 0xb3, 0xd9,
- 0x8d, 0x0e, 0xb3, 0xd1, 0x8c, 0x0e, 0xb3, 0xc9, 0x8b, 0x0e, 0xb3, 0xc1,
- 0x8a, 0x0e, 0xb3, 0xb9, 0x89, 0x0e, 0xb3, 0xb1, 0x88, 0x0e, 0xb3, 0xa9,
- 0x87, 0x0e, 0xb3, 0xa1, 0x86, 0x0e, 0xb3, 0x99, 0x85, 0x0e, 0xb3, 0x91,
- 0x84, 0x0e, 0xb3, 0x89, 0x83, 0x0e, 0xb3, 0x80, 0x45, 0x5a, 0xe2, 0xc2,
- 0x85, 0xad, 0x46, 0x06, 0x97, 0xc2, 0x85, 0xe7, 0x47, 0xca, 0xe1, 0xc2,
- 0x86, 0x0b, 0x46, 0x04, 0x73, 0xc2, 0x86, 0x17, 0x48, 0x07, 0x17, 0x42,
- 0x86, 0x7f, 0x46, 0x04, 0x73, 0xc2, 0x86, 0xe7, 0x48, 0x07, 0x17, 0x42,
- 0x87, 0x43, 0xc4, 0x22, 0x71, 0x0e, 0xbf, 0x09, 0xc5, 0x01, 0xdb, 0x0e,
- 0xbf, 0x01, 0x15, 0xc2, 0x87, 0x8b, 0x08, 0xc2, 0x87, 0x97, 0x16, 0xc2,
- 0x87, 0xa3, 0xc3, 0x01, 0xb4, 0x0e, 0xbe, 0xc9, 0xc4, 0x15, 0xd3, 0x0e,
- 0xbe, 0xc0, 0x9c, 0x0e, 0xab, 0x59, 0x9b, 0x0e, 0xab, 0x51, 0x9a, 0x0e,
- 0xab, 0x49, 0x99, 0x0e, 0xab, 0x41, 0x98, 0x0e, 0xab, 0x39, 0x97, 0x0e,
- 0xab, 0x31, 0x96, 0x0e, 0xab, 0x29, 0x95, 0x0e, 0xab, 0x21, 0x94, 0x0e,
- 0xab, 0x19, 0x93, 0x0e, 0xab, 0x11, 0x92, 0x0e, 0xab, 0x09, 0x91, 0x0e,
- 0xab, 0x01, 0x90, 0x0e, 0xaa, 0xf9, 0x8f, 0x0e, 0xaa, 0xf1, 0x8e, 0x0e,
- 0xaa, 0xe9, 0x8d, 0x0e, 0xaa, 0xe1, 0x8c, 0x0e, 0xaa, 0xd9, 0x8b, 0x0e,
- 0xaa, 0xd1, 0x8a, 0x0e, 0xaa, 0xc9, 0x89, 0x0e, 0xaa, 0xc1, 0x88, 0x0e,
- 0xaa, 0xb9, 0x87, 0x0e, 0xaa, 0xb1, 0x86, 0x0e, 0xaa, 0xa9, 0x85, 0x0e,
- 0xaa, 0xa1, 0x84, 0x0e, 0xaa, 0x99, 0x83, 0x0e, 0xaa, 0x90, 0x9b, 0x0e,
- 0xaa, 0x81, 0x9a, 0x0e, 0xaa, 0x79, 0x99, 0x0e, 0xaa, 0x71, 0x98, 0x0e,
- 0xaa, 0x69, 0x97, 0x0e, 0xaa, 0x61, 0x96, 0x0e, 0xaa, 0x59, 0x95, 0x0e,
- 0xaa, 0x51, 0x91, 0x0e, 0xaa, 0x31, 0x8f, 0x0e, 0xaa, 0x21, 0x8e, 0x0e,
- 0xaa, 0x19, 0x8d, 0x0e, 0xaa, 0x11, 0x8c, 0x0e, 0xaa, 0x09, 0x8b, 0x0e,
- 0xaa, 0x01, 0x89, 0x0e, 0xa9, 0xf1, 0x88, 0x0e, 0xa9, 0xe9, 0x87, 0x0e,
- 0xa9, 0xe1, 0x86, 0x0e, 0xa9, 0xd9, 0x84, 0x0e, 0xa9, 0xc9, 0x83, 0x0e,
- 0xa9, 0xc0, 0x46, 0x04, 0x73, 0xc2, 0x87, 0xaf, 0x48, 0x07, 0x17, 0x42,
- 0x88, 0x17, 0xd5, 0x34, 0x39, 0x01, 0x3f, 0x79, 0x46, 0x02, 0x31, 0xc2,
- 0x88, 0x6b, 0xd4, 0x3c, 0x79, 0x01, 0x3f, 0x59, 0xcd, 0x09, 0x51, 0x01,
- 0x3f, 0x48, 0xd6, 0x0a, 0x88, 0x01, 0x3f, 0x61, 0xce, 0x24, 0xb2, 0x01,
- 0x3f, 0x30, 0xc2, 0x1d, 0x5f, 0x08, 0xf7, 0x59, 0x83, 0x08, 0xf7, 0x41,
- 0xc2, 0x01, 0x29, 0x08, 0xf7, 0x10, 0xc4, 0x18, 0x83, 0x08, 0xea, 0xb9,
- 0xc2, 0x26, 0x51, 0x08, 0xea, 0xb0, 0xc3, 0x0c, 0x5b, 0x08, 0xea, 0xa9,
- 0xc3, 0x06, 0x9e, 0x08, 0xea, 0xa0, 0xc4, 0x04, 0x5e, 0x08, 0xea, 0x99,
- 0xc2, 0x01, 0x47, 0x08, 0xea, 0x90, 0x03, 0xc2, 0x88, 0x77, 0x91, 0x08,
- 0xe9, 0xe9, 0x87, 0x08, 0xe9, 0xd1, 0xc9, 0xac, 0xc1, 0x08, 0xe9, 0xb1,
- 0x97, 0x08, 0xe9, 0xa3, 0x02, 0x88, 0x83, 0x8b, 0x08, 0xe9, 0x92, 0x02,
- 0x88, 0x87, 0xc2, 0x02, 0x59, 0x08, 0xe9, 0x81, 0xc2, 0x00, 0xa4, 0x08,
- 0xe8, 0xe1, 0x83, 0x08, 0xe8, 0xd9, 0x16, 0x42, 0x88, 0x8b, 0xc3, 0x30,
- 0x4c, 0x08, 0xe9, 0x79, 0xc2, 0x00, 0xa4, 0x08, 0xe8, 0xa1, 0x83, 0x08,
- 0xe8, 0x98, 0xc3, 0x1a, 0x80, 0x08, 0xe9, 0x71, 0xc2, 0x00, 0xa4, 0x08,
- 0xe8, 0x69, 0x83, 0x08, 0xe8, 0x60, 0xc2, 0x00, 0xc7, 0x08, 0xe9, 0x69,
- 0x83, 0x08, 0xe9, 0x38, 0x83, 0x08, 0xe9, 0x59, 0xc2, 0x0c, 0x65, 0x08,
- 0xe9, 0x51, 0xc2, 0x00, 0xa4, 0x08, 0xe9, 0x48, 0xc2, 0x00, 0xa4, 0x08,
- 0xe9, 0x19, 0x83, 0x08, 0xe9, 0x10, 0xc2, 0x00, 0xa4, 0x08, 0xe9, 0x09,
- 0x83, 0x08, 0xe9, 0x00, 0x83, 0x08, 0xe8, 0xf9, 0xc2, 0x00, 0xc1, 0x08,
- 0xe8, 0xd1, 0xc2, 0x1d, 0x5f, 0x08, 0xe8, 0xa9, 0xc2, 0x01, 0x29, 0x08,
- 0xe8, 0x80, 0xc2, 0x00, 0xa4, 0x08, 0xe8, 0xf1, 0x83, 0x08, 0xe8, 0xe9,
- 0x06, 0x42, 0x88, 0x95, 0xc2, 0x00, 0xa4, 0x08, 0xe8, 0x91, 0x83, 0x08,
- 0xe8, 0x88, 0xc2, 0x00, 0xa4, 0x08, 0xe8, 0x79, 0x83, 0x08, 0xe8, 0x70,
- 0x97, 0x08, 0xe8, 0x59, 0x8b, 0x08, 0xe8, 0x41, 0x83, 0x08, 0xe8, 0x08,
- 0x97, 0x08, 0xe8, 0x28, 0x8b, 0x08, 0xe8, 0x18, 0xcb, 0x1e, 0x17, 0x08,
- 0xe5, 0xb1, 0xc8, 0x11, 0x40, 0x08, 0xe5, 0xa8, 0x83, 0x08, 0xe5, 0x79,
- 0xc2, 0x00, 0xa4, 0x08, 0xe5, 0x71, 0x15, 0xc2, 0x88, 0x9f, 0xc2, 0x00,
- 0xc7, 0x08, 0xe5, 0x59, 0xc2, 0x02, 0x59, 0x08, 0xe5, 0x51, 0xc2, 0x1d,
- 0x5f, 0x08, 0xe5, 0x49, 0x1c, 0xc2, 0x88, 0xa9, 0xc2, 0x01, 0x09, 0x08,
- 0xe5, 0x29, 0x06, 0xc2, 0x88, 0xb3, 0x16, 0xc2, 0x88, 0xbd, 0xc2, 0x00,
- 0xad, 0x08, 0xe5, 0x09, 0xc2, 0x00, 0xde, 0x08, 0xe5, 0x01, 0x12, 0xc2,
- 0x88, 0xcb, 0x10, 0xc2, 0x88, 0xd5, 0xc2, 0x24, 0x58, 0x08, 0xe4, 0xc1,
- 0x05, 0xc2, 0x88, 0xe5, 0xc2, 0x01, 0x29, 0x08, 0xe4, 0xa1, 0x0d, 0x42,
- 0x88, 0xef, 0x83, 0x08, 0xe4, 0x69, 0xc2, 0x00, 0xa4, 0x08, 0xe4, 0x60,
- 0x83, 0x08, 0xe4, 0x39, 0xc2, 0x00, 0xa4, 0x08, 0xe4, 0x30, 0xc2, 0x02,
- 0xb4, 0x08, 0xe4, 0x21, 0x83, 0x08, 0xe3, 0xe0, 0x15, 0xc2, 0x88, 0xf9,
- 0xc2, 0x00, 0xa4, 0x08, 0xe3, 0xd9, 0x83, 0x08, 0xe3, 0xd0, 0xc2, 0x00,
- 0xa4, 0x08, 0xe3, 0xf9, 0x83, 0x08, 0xe3, 0xf0, 0x83, 0x08, 0xe3, 0xe9,
- 0xc2, 0x1d, 0x5f, 0x08, 0xe3, 0xc9, 0xc2, 0x01, 0x29, 0x08, 0xe3, 0xa8,
- 0xc2, 0x00, 0xa4, 0x08, 0xe3, 0xb9, 0x83, 0x08, 0xe3, 0xb0, 0xc2, 0x00,
- 0xa4, 0x08, 0xe3, 0x99, 0x83, 0x08, 0xe3, 0x90, 0xd7, 0x11, 0x37, 0x00,
- 0x68, 0x01, 0xca, 0x1e, 0x18, 0x00, 0x68, 0x09, 0xce, 0x73, 0xf1, 0x00,
- 0x69, 0xe0, 0xc7, 0x11, 0x41, 0x00, 0x68, 0x11, 0xc7, 0x76, 0x59, 0x00,
- 0x69, 0xe8, 0x0b, 0xc2, 0x89, 0x03, 0xd2, 0x48, 0xf0, 0x00, 0x69, 0xd8,
- 0xcd, 0x7d, 0x7c, 0x00, 0x68, 0x21, 0x47, 0xac, 0xc2, 0xc2, 0x89, 0x0f,
- 0x83, 0x00, 0x69, 0xa8, 0x83, 0x00, 0x68, 0x31, 0x8b, 0x00, 0x68, 0x81,
- 0x97, 0x00, 0x68, 0xa1, 0xc9, 0xb4, 0xaa, 0x00, 0x6a, 0xf8, 0x8b, 0x00,
- 0x68, 0x40, 0x97, 0x00, 0x68, 0x50, 0x87, 0x00, 0x68, 0x78, 0x91, 0x00,
- 0x68, 0x98, 0x83, 0x00, 0x68, 0xa9, 0xc2, 0x00, 0xa4, 0x00, 0x68, 0xb0,
- 0x83, 0x00, 0x68, 0xb9, 0xc2, 0x00, 0xa4, 0x00, 0x68, 0xc0, 0xc2, 0x01,
- 0x29, 0x00, 0x68, 0xc9, 0xc2, 0x1d, 0x5f, 0x00, 0x68, 0xf1, 0x10, 0xc2,
- 0x89, 0x1d, 0x83, 0x00, 0x69, 0x40, 0x83, 0x00, 0x68, 0xd1, 0x0a, 0x42,
- 0x89, 0x27, 0x83, 0x00, 0x68, 0xe1, 0xc2, 0x00, 0xa4, 0x00, 0x68, 0xe8,
- 0x16, 0xc2, 0x89, 0x31, 0x83, 0x00, 0x69, 0x21, 0xc2, 0x00, 0xa4, 0x00,
- 0x69, 0x28, 0x06, 0xc2, 0x89, 0x41, 0x83, 0x00, 0x69, 0x31, 0xc2, 0x00,
- 0xa4, 0x00, 0x69, 0x39, 0xc7, 0xc8, 0x64, 0x00, 0x6a, 0x70, 0x83, 0x00,
- 0x69, 0x51, 0xc2, 0x00, 0xa4, 0x00, 0x69, 0x58, 0x83, 0x00, 0x69, 0x61,
- 0xc2, 0x00, 0xa4, 0x00, 0x69, 0x68, 0x83, 0x00, 0x69, 0x81, 0xc2, 0x02,
- 0x59, 0x00, 0x69, 0x88, 0x83, 0x00, 0x69, 0x91, 0x0e, 0x42, 0x89, 0x4b,
- 0xc2, 0x00, 0xa4, 0x00, 0x69, 0xb1, 0xc2, 0x0c, 0x65, 0x00, 0x69, 0xb9,
- 0x83, 0x00, 0x69, 0xc0, 0x83, 0x00, 0x69, 0xf1, 0x8b, 0x00, 0x6a, 0x41,
- 0x97, 0x00, 0x6a, 0x60, 0x8b, 0x00, 0x6a, 0x00, 0x97, 0x00, 0x6a, 0x10,
- 0x94, 0x00, 0x6a, 0x1b, 0x02, 0x89, 0x55, 0x8e, 0x00, 0x6b, 0x12, 0x02,
- 0x89, 0x59, 0x87, 0x00, 0x6a, 0x38, 0x91, 0x00, 0x6a, 0x58, 0xd8, 0x26,
- 0x40, 0x00, 0x6a, 0xc1, 0x08, 0xc2, 0x89, 0x5d, 0x16, 0xc2, 0x89, 0x69,
- 0xc7, 0x0a, 0xb9, 0x00, 0x6b, 0x99, 0xc4, 0x03, 0x2b, 0x00, 0x6b, 0xa1,
- 0xc9, 0x6b, 0x69, 0x00, 0x6b, 0xb1, 0xc6, 0x01, 0xdb, 0x00, 0x6b, 0xb8,
- 0xca, 0xa5, 0x9a, 0x00, 0x6a, 0xd1, 0xca, 0x9e, 0x9c, 0x00, 0x6a, 0xe9,
- 0xc8, 0x0a, 0xb9, 0x00, 0x6b, 0xa9, 0xca, 0xa7, 0x5c, 0x00, 0x6b, 0xc0,
- 0xc4, 0x15, 0xd3, 0x00, 0x6b, 0x31, 0xc3, 0x01, 0xb4, 0x00, 0x6b, 0x39,
- 0x16, 0xc2, 0x89, 0x75, 0x08, 0xc2, 0x89, 0x81, 0x15, 0xc2, 0x89, 0x8d,
- 0xc5, 0x01, 0xdb, 0x00, 0x6b, 0x71, 0xc4, 0x22, 0x71, 0x00, 0x6b, 0x78,
- 0xc7, 0x0c, 0x4b, 0x00, 0x6b, 0x89, 0xc8, 0x50, 0x00, 0x00, 0x6b, 0x90,
- 0x96, 0x08, 0x57, 0xa3, 0x02, 0x89, 0x99, 0xd3, 0x44, 0x5a, 0x08, 0x57,
- 0x90, 0xc8, 0x0c, 0x4a, 0x08, 0x57, 0x78, 0xc5, 0x2a, 0x13, 0x08, 0x57,
- 0x71, 0xc2, 0x00, 0x4d, 0x08, 0x57, 0x68, 0xc2, 0x26, 0xfa, 0x08, 0x57,
- 0x21, 0xc6, 0xd2, 0xc7, 0x08, 0x56, 0xa9, 0xc3, 0x1a, 0xba, 0x08, 0x56,
- 0x70, 0xc4, 0x0d, 0xd8, 0x08, 0x57, 0x19, 0xc3, 0x0d, 0xd9, 0x08, 0x57,
- 0x11, 0x03, 0x42, 0x89, 0x9f, 0xc4, 0xd1, 0xf8, 0x08, 0x57, 0x01, 0xc3,
- 0x30, 0x73, 0x08, 0x56, 0xf0, 0xc3, 0x30, 0x73, 0x08, 0x56, 0xf9, 0xc3,
- 0x08, 0x56, 0x08, 0x56, 0x88, 0xc4, 0x45, 0xc1, 0x08, 0x56, 0xd1, 0xc3,
- 0x15, 0x1d, 0x08, 0x56, 0xc9, 0xc4, 0x37, 0x5c, 0x08, 0x56, 0x00, 0xc6,
- 0xd2, 0xc7, 0x08, 0x56, 0xa1, 0xc5, 0x45, 0xf7, 0x08, 0x56, 0x28, 0xc4,
- 0xda, 0x5d, 0x08, 0x56, 0x91, 0xc3, 0x08, 0x56, 0x08, 0x56, 0x80, 0xc2,
- 0x00, 0x6e, 0x08, 0x56, 0x68, 0xc5, 0xd5, 0x52, 0x08, 0x56, 0x61, 0xc4,
- 0x45, 0xc1, 0x08, 0x56, 0x58, 0xc5, 0xd5, 0x52, 0x08, 0x56, 0x51, 0xc4,
- 0x45, 0xc1, 0x08, 0x56, 0x48, 0xc5, 0xd6, 0x51, 0x08, 0x56, 0x21, 0xc4,
- 0xa1, 0x95, 0x08, 0x56, 0x18, 0xc4, 0x9c, 0xcc, 0x08, 0x56, 0x11, 0xc3,
- 0x1a, 0xba, 0x08, 0x56, 0x08, 0xc2, 0x00, 0x34, 0x00, 0x42, 0xc1, 0x96,
- 0x00, 0x42, 0xab, 0x02, 0x89, 0xab, 0x95, 0x00, 0x42, 0x73, 0x02, 0x89,
- 0xaf, 0x94, 0x00, 0x42, 0x99, 0x93, 0x00, 0x42, 0x91, 0x92, 0x00, 0x42,
- 0x81, 0x90, 0x00, 0x42, 0x69, 0x8f, 0x00, 0x42, 0x61, 0x8e, 0x00, 0x42,
- 0x59, 0x8d, 0x00, 0x42, 0x53, 0x02, 0x89, 0xb7, 0x9c, 0x00, 0x42, 0x31,
- 0x8a, 0x00, 0x42, 0x21, 0x86, 0x00, 0x42, 0x19, 0x89, 0x00, 0x42, 0x11,
- 0x84, 0x00, 0x42, 0x08, 0x90, 0x00, 0x42, 0x79, 0x96, 0x00, 0x42, 0x38,
- 0x14, 0xc2, 0x89, 0xbd, 0xc2, 0x00, 0xa4, 0x08, 0x8b, 0x89, 0xc2, 0x0c,
- 0x65, 0x08, 0x8b, 0x81, 0xc2, 0x04, 0x41, 0x08, 0x8b, 0x79, 0xc2, 0x00,
- 0xc7, 0x08, 0x8b, 0x71, 0xc2, 0x00, 0xad, 0x08, 0x8b, 0x61, 0x04, 0xc2,
- 0x89, 0xc7, 0x12, 0xc2, 0x89, 0xd1, 0x10, 0xc2, 0x89, 0xdb, 0x06, 0xc2,
- 0x89, 0xeb, 0x16, 0xc2, 0x89, 0xf9, 0x0c, 0xc2, 0x8a, 0x07, 0x05, 0xc2,
- 0x8a, 0x11, 0x09, 0xc2, 0x8a, 0x1b, 0x0d, 0xc2, 0x8a, 0x25, 0x91, 0x08,
- 0x8a, 0xa1, 0x87, 0x08, 0x8a, 0x99, 0x97, 0x08, 0x8a, 0x91, 0x8b, 0x08,
- 0x8a, 0x89, 0x83, 0x08, 0x8a, 0x80, 0x05, 0xc2, 0x8a, 0x2f, 0xc7, 0xc6,
- 0x03, 0x0f, 0x80, 0xb8, 0x05, 0xc2, 0x8a, 0x3b, 0xc7, 0xc6, 0x03, 0x0f,
- 0x80, 0xa8, 0x05, 0xc2, 0x8a, 0x47, 0xc7, 0xc6, 0x03, 0x0f, 0x80, 0xb0,
- 0x05, 0xc2, 0x8a, 0x53, 0xc7, 0xc6, 0x03, 0x0f, 0x80, 0xc0, 0x05, 0xc2,
- 0x8a, 0x5f, 0xc7, 0xc6, 0x03, 0x0f, 0x80, 0x80, 0x05, 0xc2, 0x8a, 0x6b,
- 0xc7, 0xc6, 0x03, 0x0f, 0x80, 0x88, 0x05, 0xc2, 0x8a, 0x77, 0xc7, 0xc6,
- 0x03, 0x0f, 0x80, 0x90, 0x05, 0xc2, 0x8a, 0x83, 0xc7, 0xc6, 0x03, 0x0f,
- 0x80, 0x98, 0x05, 0xc2, 0x8a, 0x8f, 0xc7, 0xc6, 0x03, 0x0f, 0x80, 0xa0,
- 0x46, 0x10, 0x5f, 0xc2, 0x8a, 0x9b, 0xc4, 0xe4, 0xd3, 0x0f, 0x9d, 0xe0,
- 0xcb, 0x9a, 0x09, 0x0f, 0x9c, 0xc0, 0x9a, 0x01, 0x38, 0xa9, 0xc4, 0x00,
- 0xcb, 0x00, 0x06, 0xba, 0x02, 0x8b, 0x01, 0xc5, 0x13, 0x89, 0x01, 0x14,
- 0x71, 0xce, 0x1e, 0xa6, 0x01, 0x14, 0x68, 0xc2, 0x00, 0xa4, 0x08, 0x95,
- 0x41, 0xc2, 0x02, 0x59, 0x08, 0x95, 0x39, 0x83, 0x08, 0x95, 0x10, 0xc2,
- 0x00, 0xa4, 0x08, 0x94, 0xf9, 0x83, 0x08, 0x94, 0xe8, 0xc2, 0x00, 0xa4,
- 0x08, 0x94, 0xe1, 0x83, 0x08, 0x94, 0xd8, 0x83, 0x08, 0x94, 0xd1, 0xc2,
- 0x00, 0xc1, 0x08, 0x94, 0xa9, 0xc2, 0x1d, 0x5f, 0x08, 0x94, 0x78, 0xc2,
- 0x00, 0xa4, 0x08, 0x94, 0xc9, 0x83, 0x08, 0x94, 0xc1, 0x06, 0x42, 0x8b,
- 0x05, 0xc2, 0x00, 0xa4, 0x08, 0x94, 0xb9, 0x83, 0x08, 0x94, 0xb1, 0x16,
- 0x42, 0x8b, 0x15, 0x83, 0x08, 0x94, 0x61, 0xc2, 0x24, 0x58, 0x08, 0x94,
- 0x68, 0x83, 0x08, 0x94, 0x51, 0xc2, 0x00, 0xa4, 0x08, 0x94, 0x58, 0xc2,
- 0x00, 0xa4, 0x08, 0x94, 0x41, 0x83, 0x08, 0x94, 0x30, 0xc2, 0x00, 0xa4,
- 0x08, 0x94, 0x29, 0x83, 0x08, 0x94, 0x20, 0xc3, 0x4e, 0x26, 0x05, 0x4f,
- 0x29, 0x45, 0x27, 0x0d, 0xc2, 0x8b, 0x1f, 0x48, 0xb7, 0xe5, 0x42, 0x8b,
- 0x2f, 0xc3, 0x01, 0x59, 0x05, 0x53, 0xc9, 0xc3, 0x01, 0xb4, 0x05, 0x53,
- 0xc1, 0xcb, 0x0f, 0xfb, 0x05, 0x53, 0xb8, 0x44, 0x3a, 0x74, 0x42, 0x8b,
- 0x3b, 0x48, 0x62, 0xec, 0x42, 0x8b, 0x7f, 0x83, 0x00, 0x80, 0x59, 0xc2,
- 0x00, 0xa4, 0x00, 0x80, 0x60, 0x83, 0x00, 0x82, 0x83, 0x02, 0x8b, 0x9f,
- 0x4b, 0x9b, 0x53, 0x42, 0x8b, 0xa5, 0xc2, 0x1d, 0x5f, 0x00, 0x80, 0x51,
- 0x83, 0x00, 0x80, 0x78, 0x83, 0x00, 0x80, 0x69, 0xc2, 0x00, 0xa4, 0x00,
- 0x80, 0x70, 0x87, 0x00, 0x81, 0x41, 0xc3, 0x6b, 0x49, 0x00, 0x82, 0xd1,
- 0xc3, 0xe7, 0x6c, 0x00, 0x82, 0xd9, 0x42, 0x43, 0xfd, 0x42, 0x8b, 0xb1,
- 0xc3, 0x00, 0xa3, 0x00, 0x83, 0x29, 0xc3, 0x08, 0xcb, 0x00, 0x83, 0x30,
- 0xc3, 0x3a, 0x96, 0x00, 0x83, 0x71, 0xc3, 0xdf, 0xfb, 0x00, 0x83, 0x79,
- 0xc4, 0xe2, 0x17, 0x00, 0x83, 0x80, 0x94, 0x00, 0x82, 0x98, 0x8e, 0x00,
- 0x82, 0xa8, 0x8b, 0x00, 0x84, 0xe8, 0xc6, 0x00, 0x94, 0x00, 0x84, 0x28,
- 0xd7, 0x29, 0x7f, 0x0f, 0xd2, 0x68, 0x49, 0x29, 0x7f, 0x42, 0x8b, 0xb9,
- 0xc3, 0x00, 0x34, 0x0f, 0xd0, 0x03, 0x02, 0x8b, 0xc5, 0xc5, 0x7c, 0xf9,
- 0x0f, 0xd0, 0x22, 0x02, 0x8b, 0xcb, 0x49, 0x29, 0x7f, 0x42, 0x8b, 0xd1,
- 0x49, 0x29, 0x7f, 0x42, 0x8b, 0xdd, 0x49, 0x29, 0x7f, 0x42, 0x8b, 0xe9,
- 0x0d, 0xc2, 0x8b, 0xf5, 0xc5, 0xb5, 0xaf, 0x0f, 0xd1, 0x59, 0xc4, 0xe0,
- 0xaf, 0x0f, 0xd1, 0x61, 0xc6, 0xd1, 0xf5, 0x0f, 0xd1, 0x69, 0xc4, 0xe5,
- 0xdf, 0x0f, 0xd1, 0x78, 0x43, 0x00, 0xcd, 0xc2, 0x8c, 0x01, 0xc4, 0xe4,
- 0xcb, 0x08, 0xa2, 0x50, 0xcd, 0x7d, 0x7c, 0x08, 0xa2, 0xf9, 0x47, 0xac,
- 0xc2, 0x42, 0x8c, 0x29, 0x83, 0x08, 0xa1, 0x99, 0xc2, 0x00, 0xa4, 0x08,
- 0xa1, 0x89, 0xc2, 0x0c, 0x65, 0x08, 0xa1, 0x90, 0x83, 0x08, 0xa1, 0x19,
- 0xc2, 0x00, 0xc1, 0x08, 0xa0, 0xf1, 0x1b, 0xc2, 0x8c, 0x37, 0x09, 0xc2,
- 0x8c, 0x41, 0xc2, 0x00, 0xa4, 0x08, 0xa1, 0x20, 0xc2, 0x00, 0xa4, 0x08,
- 0xa1, 0x11, 0x83, 0x08, 0xa1, 0x09, 0x06, 0x42, 0x8c, 0x4b, 0xc2, 0x00,
- 0xa4, 0x08, 0xa1, 0x01, 0x83, 0x08, 0xa0, 0xf9, 0x16, 0x42, 0x8c, 0x55,
- 0xc2, 0x00, 0xa4, 0x08, 0xa0, 0xb9, 0x83, 0x08, 0xa0, 0xb0, 0xc2, 0x00,
- 0xa4, 0x08, 0xa0, 0xa9, 0x83, 0x08, 0xa0, 0xa0, 0xc2, 0x00, 0xa4, 0x08,
- 0xa0, 0x89, 0x83, 0x08, 0xa0, 0x80, 0xc2, 0x00, 0xa4, 0x08, 0xa0, 0x79,
- 0x83, 0x08, 0xa0, 0x70, 0x97, 0x08, 0xa0, 0x69, 0x8b, 0x08, 0xa0, 0x59,
- 0x83, 0x08, 0xa0, 0x08, 0x97, 0x08, 0xa0, 0x28, 0x8b, 0x08, 0xa0, 0x18,
- 0x83, 0x08, 0xa1, 0x29, 0xc2, 0x00, 0xa4, 0x08, 0xa1, 0x30, 0x83, 0x08,
- 0xa1, 0x39, 0xc2, 0x00, 0xa4, 0x08, 0xa1, 0x40, 0x83, 0x08, 0xa1, 0x49,
- 0xc2, 0x00, 0xa4, 0x08, 0xa1, 0x50, 0x83, 0x08, 0xa1, 0x61, 0xc2, 0x00,
- 0xa4, 0x08, 0xa1, 0x68, 0x83, 0x08, 0xa1, 0x71, 0xc2, 0x00, 0xa4, 0x08,
- 0xa1, 0x78, 0xc5, 0x00, 0xaa, 0x08, 0xa2, 0xd1, 0xc5, 0x8b, 0x1f, 0x08,
- 0xa2, 0x60, 0xc4, 0x22, 0x71, 0x08, 0xa2, 0xc9, 0xc5, 0x01, 0xdb, 0x08,
- 0xa2, 0xc1, 0x15, 0xc2, 0x8c, 0x5f, 0x08, 0xc2, 0x8c, 0x6b, 0x16, 0xc2,
- 0x8c, 0x77, 0xc3, 0x01, 0xb4, 0x08, 0xa2, 0x89, 0xc4, 0x15, 0xd3, 0x08,
- 0xa2, 0x80, 0x97, 0x08, 0xa2, 0x09, 0x8b, 0x08, 0xa1, 0xf9, 0x83, 0x08,
- 0xa1, 0xa8, 0x8e, 0x08, 0xa1, 0xe3, 0x02, 0x8c, 0x83, 0x94, 0x08, 0xa1,
- 0xd2, 0x02, 0x8c, 0x87, 0x97, 0x08, 0xa1, 0xc8, 0x8b, 0x08, 0xa1, 0xb8,
- 0x98, 0x00, 0xce, 0xf8, 0xcd, 0x78, 0xea, 0x00, 0xce, 0xd1, 0x49, 0xb1,
- 0x4a, 0x42, 0x8c, 0x8b, 0xc4, 0x22, 0x71, 0x00, 0xce, 0xc9, 0xc5, 0x01,
- 0xdb, 0x00, 0xce, 0xc1, 0x15, 0xc2, 0x8c, 0x93, 0x08, 0xc2, 0x8c, 0x9f,
- 0x16, 0xc2, 0x8c, 0xab, 0xc3, 0x01, 0xb4, 0x00, 0xce, 0x89, 0xc4, 0x15,
- 0xd3, 0x00, 0xce, 0x80, 0x46, 0x2b, 0x13, 0xc2, 0x8c, 0xb7, 0x44, 0x08,
- 0x76, 0xc2, 0x8c, 0xd2, 0x45, 0x05, 0x2b, 0x42, 0x8d, 0x20, 0x0b, 0xc2,
- 0x8d, 0x6e, 0x97, 0x00, 0xcd, 0x9b, 0x02, 0x8d, 0x76, 0x91, 0x00, 0xcd,
- 0xbb, 0x02, 0x8d, 0x85, 0x03, 0xc2, 0x8d, 0x90, 0x87, 0x00, 0xcd, 0xa9,
- 0xcf, 0x69, 0x6b, 0x00, 0xcd, 0x80, 0x9c, 0x0f, 0x8c, 0x49, 0x9b, 0x0f,
- 0x8c, 0x41, 0x9a, 0x0f, 0x8c, 0x39, 0x99, 0x0f, 0x8c, 0x31, 0x98, 0x0f,
- 0x8c, 0x29, 0x97, 0x0f, 0x8c, 0x21, 0x96, 0x0f, 0x8c, 0x19, 0x95, 0x0f,
- 0x8c, 0x11, 0x94, 0x0f, 0x8c, 0x09, 0x93, 0x0f, 0x8c, 0x01, 0x92, 0x0f,
- 0x8b, 0xf9, 0x91, 0x0f, 0x8b, 0xf1, 0x90, 0x0f, 0x8b, 0xe9, 0x8f, 0x0f,
- 0x8b, 0xe1, 0x8e, 0x0f, 0x8b, 0xd9, 0x8d, 0x0f, 0x8b, 0xd1, 0x8c, 0x0f,
- 0x8b, 0xc9, 0x8b, 0x0f, 0x8b, 0xc1, 0x8a, 0x0f, 0x8b, 0xb9, 0x89, 0x0f,
- 0x8b, 0xb1, 0x88, 0x0f, 0x8b, 0xa9, 0x87, 0x0f, 0x8b, 0xa1, 0x86, 0x0f,
- 0x8b, 0x99, 0x85, 0x0f, 0x8b, 0x91, 0x84, 0x0f, 0x8b, 0x89, 0x83, 0x0f,
- 0x8b, 0x80, 0x16, 0xc2, 0x8d, 0x9f, 0xc8, 0x4c, 0xe0, 0x01, 0x27, 0x99,
- 0x07, 0xc2, 0x8d, 0xab, 0x15, 0xc2, 0x8d, 0xb7, 0x08, 0x42, 0x8d, 0xc3,
- 0x9c, 0x0f, 0x8b, 0x49, 0x9b, 0x0f, 0x8b, 0x41, 0x9a, 0x0f, 0x8b, 0x39,
- 0x99, 0x0f, 0x8b, 0x31, 0x98, 0x0f, 0x8b, 0x29, 0x97, 0x0f, 0x8b, 0x21,
- 0x96, 0x0f, 0x8b, 0x19, 0x95, 0x0f, 0x8b, 0x11, 0x94, 0x0f, 0x8b, 0x09,
- 0x93, 0x0f, 0x8b, 0x01, 0x92, 0x0f, 0x8a, 0xf9, 0x91, 0x0f, 0x8a, 0xf1,
- 0x90, 0x0f, 0x8a, 0xe9, 0x8f, 0x0f, 0x8a, 0xe1, 0x8e, 0x0f, 0x8a, 0xd9,
- 0x8d, 0x0f, 0x8a, 0xd1, 0x8c, 0x0f, 0x8a, 0xc9, 0x8b, 0x0f, 0x8a, 0xc1,
- 0x8a, 0x0f, 0x8a, 0xb9, 0x89, 0x0f, 0x8a, 0xb1, 0x88, 0x0f, 0x8a, 0xa9,
- 0x87, 0x0f, 0x8a, 0xa1, 0x86, 0x0f, 0x8a, 0x99, 0x85, 0x0f, 0x8a, 0x91,
- 0x84, 0x0f, 0x8a, 0x89, 0x83, 0x0f, 0x8a, 0x80, 0x90, 0x0f, 0x27, 0x28,
- 0x97, 0x08, 0xce, 0xe9, 0x8b, 0x08, 0xce, 0xd9, 0x83, 0x08, 0xce, 0x88,
- 0x94, 0x08, 0xce, 0xb8, 0x97, 0x08, 0xce, 0xa8, 0x8b, 0x08, 0xce, 0x98,
- 0xc7, 0x76, 0x59, 0x08, 0xcf, 0x09, 0xc7, 0x11, 0x41, 0x08, 0xce, 0xf0,
- 0xc4, 0x0f, 0x7c, 0x08, 0xcf, 0x01, 0xc5, 0x44, 0x7b, 0x08, 0xce, 0xf8,
- 0xc2, 0x02, 0x59, 0x08, 0xce, 0x81, 0x83, 0x08, 0xce, 0x40, 0xc2, 0x00,
- 0xc7, 0x08, 0xce, 0x79, 0x83, 0x08, 0xce, 0x48, 0x83, 0x08, 0xce, 0x69,
- 0xc2, 0x0c, 0x65, 0x08, 0xce, 0x61, 0xc2, 0x00, 0xa4, 0x08, 0xce, 0x58,
- 0x83, 0x08, 0xce, 0x51, 0xc8, 0xac, 0xc2, 0x08, 0xcd, 0x32, 0x02, 0x8d,
- 0xcf, 0xc2, 0x00, 0xa4, 0x08, 0xce, 0x29, 0x83, 0x08, 0xce, 0x20, 0xc2,
- 0x00, 0xa4, 0x08, 0xce, 0x19, 0x83, 0x08, 0xce, 0x10, 0x83, 0x08, 0xce,
- 0x09, 0xc2, 0x00, 0xc1, 0x08, 0xcd, 0xe1, 0xc2, 0x1d, 0x5f, 0x08, 0xcd,
- 0xb9, 0xc2, 0x01, 0x29, 0x08, 0xcd, 0x90, 0xc2, 0x00, 0xa4, 0x08, 0xce,
- 0x01, 0x83, 0x08, 0xcd, 0xf9, 0x06, 0x42, 0x8d, 0xd3, 0xc2, 0x00, 0xa4,
- 0x08, 0xcd, 0xf1, 0x83, 0x08, 0xcd, 0xe9, 0x16, 0x42, 0x8d, 0xdd, 0xc2,
- 0x00, 0xa4, 0x08, 0xcd, 0xb1, 0x83, 0x08, 0xcd, 0xa8, 0xc2, 0x00, 0xa4,
- 0x08, 0xcd, 0xa1, 0x83, 0x08, 0xcd, 0x98, 0xc2, 0x00, 0xa4, 0x08, 0xcd,
- 0x89, 0x83, 0x08, 0xcd, 0x80, 0xc2, 0x00, 0xa4, 0x08, 0xcd, 0x79, 0x83,
- 0x08, 0xcd, 0x70, 0x97, 0x08, 0xcd, 0x69, 0x8b, 0x08, 0xcd, 0x59, 0x83,
- 0x08, 0xcd, 0x08, 0x97, 0x08, 0xcd, 0x28, 0x8b, 0x08, 0xcd, 0x18, 0xc8,
- 0x0c, 0x4a, 0x08, 0x45, 0x78, 0x19, 0xc2, 0x8d, 0xe7, 0xc2, 0x00, 0x4d,
- 0x08, 0x45, 0x69, 0xc4, 0x04, 0x5e, 0x08, 0x45, 0x48, 0xc3, 0x0c, 0x5b,
- 0x08, 0x45, 0x61, 0xc3, 0x06, 0x9e, 0x08, 0x45, 0x50, 0xc2, 0x26, 0xfa,
- 0x08, 0x44, 0xf1, 0xc3, 0x1a, 0xba, 0x08, 0x44, 0x58, 0xc3, 0x0d, 0xd9,
- 0x08, 0x44, 0xe9, 0x03, 0x42, 0x8d, 0xf1, 0xc4, 0x0d, 0xd8, 0x08, 0x44,
- 0xe1, 0xc3, 0x1f, 0xd8, 0x08, 0x44, 0xa1, 0xc3, 0x0b, 0x0e, 0x08, 0x44,
- 0x91, 0xc6, 0xd0, 0x5d, 0x08, 0x44, 0x81, 0xc4, 0xe2, 0x57, 0x08, 0x44,
- 0x71, 0xc4, 0x4b, 0x98, 0x08, 0x44, 0x61, 0xc2, 0x01, 0xf0, 0x08, 0x44,
- 0x31, 0xc4, 0xe4, 0x8f, 0x08, 0x44, 0x11, 0xc5, 0xa8, 0xf1, 0x08, 0x44,
- 0x00, 0xc3, 0x15, 0x1d, 0x08, 0x44, 0xb9, 0xc4, 0x37, 0x5c, 0x08, 0x44,
- 0x08, 0xc2, 0x00, 0x6e, 0x08, 0x44, 0x50, 0x49, 0x01, 0x8a, 0xc2, 0x8d,
- 0xfd, 0xcc, 0x8c, 0x20, 0x01, 0x0e, 0xb9, 0x03, 0xc2, 0x8e, 0x0f, 0xcb,
- 0x04, 0xfc, 0x01, 0x58, 0x01, 0xcb, 0x97, 0xe3, 0x01, 0x58, 0x41, 0xd5,
- 0x03, 0xb2, 0x01, 0x5b, 0x3b, 0x02, 0x8e, 0x1e, 0xd0, 0x5c, 0xe2, 0x0f,
- 0xc2, 0xa8, 0x03, 0xc2, 0x8e, 0x24, 0xcc, 0x8c, 0x20, 0x01, 0x0e, 0xb1,
- 0x49, 0x01, 0x8a, 0xc2, 0x8e, 0x33, 0xcb, 0x04, 0xfc, 0x01, 0x58, 0x09,
- 0xcb, 0x97, 0xe3, 0x01, 0x58, 0x49, 0xd5, 0x03, 0xb2, 0x01, 0x5b, 0x33,
- 0x02, 0x8e, 0x45, 0xd0, 0x5c, 0xe2, 0x0f, 0xc2, 0xa0, 0x49, 0x52, 0xd7,
- 0xc2, 0x8e, 0x4b, 0x43, 0x00, 0x90, 0xc2, 0x8e, 0x57, 0xd0, 0x57, 0xd2,
- 0x05, 0x41, 0xb9, 0xca, 0xa1, 0xb2, 0x05, 0x41, 0xc0, 0xe0, 0x09, 0x27,
- 0x01, 0x3d, 0x78, 0xd7, 0x28, 0x82, 0x01, 0x17, 0x19, 0xd4, 0x3f, 0x49,
- 0x01, 0x17, 0x10, 0xc9, 0x31, 0x53, 0x01, 0x14, 0x29, 0xc7, 0x3b, 0xd9,
- 0x01, 0x14, 0x20, 0xc2, 0x00, 0xc7, 0x0f, 0x08, 0xf1, 0x83, 0x0f, 0x08,
- 0xe0, 0xc2, 0x96, 0xd0, 0x0f, 0x08, 0x99, 0xc2, 0x0c, 0x65, 0x0f, 0x08,
- 0x69, 0x83, 0x0f, 0x08, 0x10, 0x84, 0x0d, 0x97, 0xd9, 0x83, 0x0d, 0x97,
- 0xd1, 0xa6, 0x0d, 0x97, 0xc9, 0xa5, 0x0d, 0x97, 0xc1, 0xa4, 0x0d, 0x97,
- 0xb9, 0xa3, 0x0d, 0x97, 0xb1, 0xa2, 0x0d, 0x97, 0xa9, 0xa1, 0x0d, 0x97,
- 0xa1, 0xa0, 0x0d, 0x97, 0x99, 0x9f, 0x0d, 0x97, 0x91, 0x9e, 0x0d, 0x97,
- 0x89, 0x9d, 0x0d, 0x97, 0x80, 0x88, 0x0d, 0x97, 0x79, 0x87, 0x0d, 0x97,
- 0x71, 0x86, 0x0d, 0x97, 0x69, 0x83, 0x0d, 0x97, 0x51, 0xa6, 0x0d, 0x97,
- 0x49, 0xa2, 0x0d, 0x97, 0x29, 0x85, 0x0d, 0x97, 0x61, 0x84, 0x0d, 0x97,
- 0x59, 0xa5, 0x0d, 0x97, 0x41, 0xa4, 0x0d, 0x97, 0x39, 0xa3, 0x0d, 0x97,
- 0x31, 0xa1, 0x0d, 0x97, 0x21, 0xa0, 0x0d, 0x97, 0x19, 0x9f, 0x0d, 0x97,
- 0x11, 0x9e, 0x0d, 0x97, 0x09, 0x9d, 0x0d, 0x97, 0x00, 0x83, 0x0d, 0x95,
- 0xd1, 0x88, 0x0d, 0x95, 0xf9, 0x87, 0x0d, 0x95, 0xf1, 0xa6, 0x0d, 0x95,
- 0xc9, 0xa5, 0x0d, 0x95, 0xc1, 0xa4, 0x0d, 0x95, 0xb9, 0xa3, 0x0d, 0x95,
- 0xb1, 0xa2, 0x0d, 0x95, 0xa9, 0xa1, 0x0d, 0x95, 0xa1, 0xa0, 0x0d, 0x95,
- 0x99, 0x9f, 0x0d, 0x95, 0x91, 0x9e, 0x0d, 0x95, 0x89, 0x9d, 0x0d, 0x95,
- 0x81, 0x84, 0x0d, 0x95, 0xd9, 0x85, 0x0d, 0x95, 0xe1, 0x86, 0x0d, 0x95,
- 0xe8, 0x83, 0x0d, 0x94, 0xd1, 0xa6, 0x0d, 0x94, 0xc9, 0xa5, 0x0d, 0x94,
- 0xc1, 0xa4, 0x0d, 0x94, 0xb9, 0xa3, 0x0d, 0x94, 0xb1, 0xa2, 0x0d, 0x94,
- 0xa9, 0xa1, 0x0d, 0x94, 0xa1, 0xa0, 0x0d, 0x94, 0x99, 0x9f, 0x0d, 0x94,
- 0x91, 0x9e, 0x0d, 0x94, 0x89, 0x9d, 0x0d, 0x94, 0x81, 0x88, 0x0d, 0x94,
- 0xf9, 0x87, 0x0d, 0x94, 0xf1, 0x86, 0x0d, 0x94, 0xe9, 0x85, 0x0d, 0x94,
- 0xe1, 0x84, 0x0d, 0x94, 0xd8, 0x88, 0x0d, 0x94, 0x79, 0x87, 0x0d, 0x94,
- 0x71, 0x86, 0x0d, 0x94, 0x69, 0x85, 0x0d, 0x94, 0x61, 0x84, 0x0d, 0x94,
- 0x59, 0x83, 0x0d, 0x94, 0x51, 0xa6, 0x0d, 0x94, 0x49, 0xa5, 0x0d, 0x94,
- 0x41, 0xa4, 0x0d, 0x94, 0x39, 0xa3, 0x0d, 0x94, 0x31, 0xa2, 0x0d, 0x94,
- 0x29, 0xa1, 0x0d, 0x94, 0x21, 0xa0, 0x0d, 0x94, 0x19, 0x9f, 0x0d, 0x94,
- 0x11, 0x9e, 0x0d, 0x94, 0x09, 0x9d, 0x0d, 0x94, 0x00, 0x88, 0x0d, 0x93,
- 0xf9, 0x87, 0x0d, 0x93, 0xf1, 0x86, 0x0d, 0x93, 0xe9, 0x85, 0x0d, 0x93,
- 0xe1, 0x84, 0x0d, 0x93, 0xd9, 0x83, 0x0d, 0x93, 0xd1, 0xa6, 0x0d, 0x93,
- 0xc9, 0xa5, 0x0d, 0x93, 0xc1, 0xa4, 0x0d, 0x93, 0xb9, 0xa3, 0x0d, 0x93,
- 0xb1, 0xa2, 0x0d, 0x93, 0xa9, 0xa1, 0x0d, 0x93, 0xa1, 0xa0, 0x0d, 0x93,
- 0x99, 0x9f, 0x0d, 0x93, 0x91, 0x9e, 0x0d, 0x93, 0x89, 0x9d, 0x0d, 0x93,
- 0x80, 0x88, 0x0d, 0x93, 0x79, 0x87, 0x0d, 0x93, 0x71, 0x86, 0x0d, 0x93,
- 0x69, 0x85, 0x0d, 0x93, 0x61, 0x84, 0x0d, 0x93, 0x59, 0x83, 0x0d, 0x93,
- 0x51, 0xa6, 0x0d, 0x93, 0x49, 0xa5, 0x0d, 0x93, 0x41, 0xa4, 0x0d, 0x93,
- 0x39, 0xa3, 0x0d, 0x93, 0x31, 0xa2, 0x0d, 0x93, 0x29, 0xa1, 0x0d, 0x93,
- 0x21, 0xa0, 0x0d, 0x93, 0x19, 0x9f, 0x0d, 0x93, 0x11, 0x9e, 0x0d, 0x93,
- 0x09, 0x9d, 0x0d, 0x93, 0x00, 0x88, 0x0d, 0x92, 0xf9, 0x87, 0x0d, 0x92,
- 0xf1, 0x86, 0x0d, 0x92, 0xe9, 0x85, 0x0d, 0x92, 0xe1, 0x84, 0x0d, 0x92,
- 0xd9, 0x83, 0x0d, 0x92, 0xd1, 0xa6, 0x0d, 0x92, 0xc9, 0xa5, 0x0d, 0x92,
- 0xc1, 0xa4, 0x0d, 0x92, 0xb9, 0xa3, 0x0d, 0x92, 0xb1, 0xa2, 0x0d, 0x92,
- 0xa9, 0xa1, 0x0d, 0x92, 0xa1, 0xa0, 0x0d, 0x92, 0x99, 0x9f, 0x0d, 0x92,
- 0x91, 0x9e, 0x0d, 0x92, 0x89, 0x9d, 0x0d, 0x92, 0x80, 0x88, 0x0d, 0x92,
- 0x79, 0x87, 0x0d, 0x92, 0x71, 0x86, 0x0d, 0x92, 0x69, 0x85, 0x0d, 0x92,
- 0x61, 0x84, 0x0d, 0x92, 0x59, 0x83, 0x0d, 0x92, 0x51, 0xa6, 0x0d, 0x92,
- 0x49, 0xa5, 0x0d, 0x92, 0x41, 0xa4, 0x0d, 0x92, 0x39, 0xa3, 0x0d, 0x92,
- 0x31, 0xa2, 0x0d, 0x92, 0x29, 0xa1, 0x0d, 0x92, 0x21, 0xa0, 0x0d, 0x92,
- 0x19, 0x9f, 0x0d, 0x92, 0x11, 0x9e, 0x0d, 0x92, 0x09, 0x9d, 0x0d, 0x92,
- 0x00, 0x88, 0x0d, 0x91, 0xf9, 0x87, 0x0d, 0x91, 0xf1, 0x86, 0x0d, 0x91,
- 0xe9, 0x85, 0x0d, 0x91, 0xe1, 0x84, 0x0d, 0x91, 0xd9, 0x83, 0x0d, 0x91,
- 0xd1, 0xa6, 0x0d, 0x91, 0xc9, 0xa5, 0x0d, 0x91, 0xc1, 0xa4, 0x0d, 0x91,
- 0xb9, 0xa3, 0x0d, 0x91, 0xb1, 0xa2, 0x0d, 0x91, 0xa9, 0xa1, 0x0d, 0x91,
- 0xa1, 0xa0, 0x0d, 0x91, 0x99, 0x9f, 0x0d, 0x91, 0x91, 0x9e, 0x0d, 0x91,
- 0x89, 0x9d, 0x0d, 0x91, 0x80, 0x88, 0x0d, 0x91, 0x79, 0x87, 0x0d, 0x91,
- 0x71, 0x86, 0x0d, 0x91, 0x69, 0x85, 0x0d, 0x91, 0x61, 0x84, 0x0d, 0x91,
- 0x59, 0x83, 0x0d, 0x91, 0x51, 0xa6, 0x0d, 0x91, 0x49, 0xa5, 0x0d, 0x91,
- 0x41, 0xa4, 0x0d, 0x91, 0x39, 0xa3, 0x0d, 0x91, 0x31, 0xa2, 0x0d, 0x91,
- 0x29, 0xa1, 0x0d, 0x91, 0x21, 0xa0, 0x0d, 0x91, 0x19, 0x9f, 0x0d, 0x91,
- 0x11, 0x9e, 0x0d, 0x91, 0x09, 0x9d, 0x0d, 0x91, 0x00, 0x88, 0x0d, 0x90,
- 0xf9, 0x87, 0x0d, 0x90, 0xf1, 0x86, 0x0d, 0x90, 0xe9, 0x85, 0x0d, 0x90,
- 0xe1, 0x84, 0x0d, 0x90, 0xd9, 0x83, 0x0d, 0x90, 0xd1, 0xa6, 0x0d, 0x90,
- 0xc9, 0xa5, 0x0d, 0x90, 0xc1, 0xa4, 0x0d, 0x90, 0xb9, 0xa3, 0x0d, 0x90,
- 0xb1, 0xa2, 0x0d, 0x90, 0xa9, 0xa1, 0x0d, 0x90, 0xa1, 0xa0, 0x0d, 0x90,
- 0x99, 0x9f, 0x0d, 0x90, 0x91, 0x9e, 0x0d, 0x90, 0x89, 0x9d, 0x0d, 0x90,
- 0x80, 0x88, 0x0d, 0x90, 0x79, 0x87, 0x0d, 0x90, 0x71, 0x86, 0x0d, 0x90,
- 0x69, 0x85, 0x0d, 0x90, 0x61, 0x84, 0x0d, 0x90, 0x59, 0x83, 0x0d, 0x90,
- 0x51, 0xa6, 0x0d, 0x90, 0x49, 0xa5, 0x0d, 0x90, 0x41, 0xa4, 0x0d, 0x90,
- 0x39, 0xa3, 0x0d, 0x90, 0x31, 0xa2, 0x0d, 0x90, 0x29, 0xa1, 0x0d, 0x90,
- 0x21, 0xa0, 0x0d, 0x90, 0x19, 0x9f, 0x0d, 0x90, 0x11, 0x9e, 0x0d, 0x90,
- 0x09, 0x9d, 0x0d, 0x90, 0x00, 0x88, 0x0d, 0x96, 0xf9, 0x87, 0x0d, 0x96,
- 0xf1, 0x86, 0x0d, 0x96, 0xe9, 0x85, 0x0d, 0x96, 0xe1, 0x84, 0x0d, 0x96,
- 0xd9, 0x83, 0x0d, 0x96, 0xd1, 0xa6, 0x0d, 0x96, 0xc9, 0xa5, 0x0d, 0x96,
- 0xc1, 0xa4, 0x0d, 0x96, 0xb9, 0xa3, 0x0d, 0x96, 0xb1, 0xa2, 0x0d, 0x96,
- 0xa9, 0xa1, 0x0d, 0x96, 0xa1, 0xa0, 0x0d, 0x96, 0x99, 0x9f, 0x0d, 0x96,
- 0x91, 0x9e, 0x0d, 0x96, 0x89, 0x9d, 0x0d, 0x96, 0x80, 0x88, 0x0d, 0x96,
- 0x79, 0x87, 0x0d, 0x96, 0x71, 0x86, 0x0d, 0x96, 0x69, 0x85, 0x0d, 0x96,
- 0x61, 0x84, 0x0d, 0x96, 0x59, 0x83, 0x0d, 0x96, 0x51, 0xa6, 0x0d, 0x96,
- 0x49, 0xa5, 0x0d, 0x96, 0x41, 0xa4, 0x0d, 0x96, 0x39, 0xa3, 0x0d, 0x96,
- 0x31, 0xa2, 0x0d, 0x96, 0x29, 0xa1, 0x0d, 0x96, 0x21, 0xa0, 0x0d, 0x96,
- 0x19, 0x9f, 0x0d, 0x96, 0x11, 0x9e, 0x0d, 0x96, 0x09, 0x9d, 0x0d, 0x96,
- 0x00, 0x88, 0x0d, 0x95, 0x79, 0x87, 0x0d, 0x95, 0x71, 0x86, 0x0d, 0x95,
- 0x69, 0x85, 0x0d, 0x95, 0x61, 0x84, 0x0d, 0x95, 0x59, 0x83, 0x0d, 0x95,
- 0x51, 0xa6, 0x0d, 0x95, 0x49, 0xa5, 0x0d, 0x95, 0x41, 0xa4, 0x0d, 0x95,
- 0x39, 0xa3, 0x0d, 0x95, 0x31, 0xa2, 0x0d, 0x95, 0x29, 0xa1, 0x0d, 0x95,
- 0x21, 0xa0, 0x0d, 0x95, 0x19, 0x9f, 0x0d, 0x95, 0x11, 0x9e, 0x0d, 0x95,
- 0x09, 0x9d, 0x0d, 0x95, 0x00, 0x88, 0x0d, 0x8f, 0xf9, 0x87, 0x0d, 0x8f,
- 0xf1, 0x86, 0x0d, 0x8f, 0xe9, 0x85, 0x0d, 0x8f, 0xe1, 0x84, 0x0d, 0x8f,
- 0xd9, 0x83, 0x0d, 0x8f, 0xd1, 0xa6, 0x0d, 0x8f, 0xc9, 0xa5, 0x0d, 0x8f,
- 0xc1, 0xa4, 0x0d, 0x8f, 0xb9, 0xa3, 0x0d, 0x8f, 0xb1, 0xa2, 0x0d, 0x8f,
- 0xa9, 0xa1, 0x0d, 0x8f, 0xa1, 0xa0, 0x0d, 0x8f, 0x99, 0x9f, 0x0d, 0x8f,
- 0x91, 0x9e, 0x0d, 0x8f, 0x89, 0x9d, 0x0d, 0x8f, 0x80, 0x88, 0x0d, 0x8f,
- 0x79, 0x87, 0x0d, 0x8f, 0x71, 0x86, 0x0d, 0x8f, 0x69, 0x85, 0x0d, 0x8f,
- 0x61, 0x84, 0x0d, 0x8f, 0x59, 0x83, 0x0d, 0x8f, 0x51, 0xa6, 0x0d, 0x8f,
- 0x49, 0xa5, 0x0d, 0x8f, 0x41, 0xa4, 0x0d, 0x8f, 0x39, 0xa3, 0x0d, 0x8f,
- 0x31, 0xa2, 0x0d, 0x8f, 0x29, 0xa1, 0x0d, 0x8f, 0x21, 0xa0, 0x0d, 0x8f,
- 0x19, 0x9f, 0x0d, 0x8f, 0x11, 0x9e, 0x0d, 0x8f, 0x09, 0x9d, 0x0d, 0x8f,
- 0x00, 0x88, 0x0d, 0x8e, 0xf9, 0x87, 0x0d, 0x8e, 0xf1, 0x86, 0x0d, 0x8e,
- 0xe9, 0x85, 0x0d, 0x8e, 0xe1, 0x84, 0x0d, 0x8e, 0xd9, 0x83, 0x0d, 0x8e,
- 0xd1, 0xa6, 0x0d, 0x8e, 0xc9, 0xa5, 0x0d, 0x8e, 0xc1, 0xa4, 0x0d, 0x8e,
- 0xb9, 0xa3, 0x0d, 0x8e, 0xb1, 0xa2, 0x0d, 0x8e, 0xa9, 0xa1, 0x0d, 0x8e,
- 0xa1, 0xa0, 0x0d, 0x8e, 0x99, 0x9f, 0x0d, 0x8e, 0x91, 0x9e, 0x0d, 0x8e,
- 0x89, 0x9d, 0x0d, 0x8e, 0x80, 0x88, 0x0d, 0x8e, 0x79, 0x87, 0x0d, 0x8e,
- 0x71, 0x86, 0x0d, 0x8e, 0x69, 0x85, 0x0d, 0x8e, 0x61, 0x84, 0x0d, 0x8e,
- 0x59, 0x83, 0x0d, 0x8e, 0x51, 0xa6, 0x0d, 0x8e, 0x49, 0xa5, 0x0d, 0x8e,
- 0x41, 0xa4, 0x0d, 0x8e, 0x39, 0xa3, 0x0d, 0x8e, 0x31, 0xa2, 0x0d, 0x8e,
- 0x29, 0xa1, 0x0d, 0x8e, 0x21, 0xa0, 0x0d, 0x8e, 0x19, 0x9f, 0x0d, 0x8e,
- 0x11, 0x9e, 0x0d, 0x8e, 0x09, 0x9d, 0x0d, 0x8e, 0x00, 0x88, 0x0d, 0x8d,
- 0xf9, 0x87, 0x0d, 0x8d, 0xf1, 0x86, 0x0d, 0x8d, 0xe9, 0x85, 0x0d, 0x8d,
- 0xe1, 0x84, 0x0d, 0x8d, 0xd9, 0x83, 0x0d, 0x8d, 0xd1, 0xa6, 0x0d, 0x8d,
- 0xc9, 0xa5, 0x0d, 0x8d, 0xc1, 0xa4, 0x0d, 0x8d, 0xb9, 0xa3, 0x0d, 0x8d,
- 0xb1, 0xa2, 0x0d, 0x8d, 0xa9, 0xa1, 0x0d, 0x8d, 0xa1, 0xa0, 0x0d, 0x8d,
- 0x99, 0x9f, 0x0d, 0x8d, 0x91, 0x9e, 0x0d, 0x8d, 0x89, 0x9d, 0x0d, 0x8d,
- 0x80, 0x88, 0x0d, 0x8d, 0x79, 0x87, 0x0d, 0x8d, 0x71, 0x86, 0x0d, 0x8d,
- 0x69, 0x85, 0x0d, 0x8d, 0x61, 0x84, 0x0d, 0x8d, 0x59, 0x83, 0x0d, 0x8d,
- 0x51, 0xa6, 0x0d, 0x8d, 0x49, 0xa5, 0x0d, 0x8d, 0x41, 0xa4, 0x0d, 0x8d,
- 0x39, 0xa3, 0x0d, 0x8d, 0x31, 0xa2, 0x0d, 0x8d, 0x29, 0xa1, 0x0d, 0x8d,
- 0x21, 0xa0, 0x0d, 0x8d, 0x19, 0x9f, 0x0d, 0x8d, 0x11, 0x9e, 0x0d, 0x8d,
- 0x09, 0x9d, 0x0d, 0x8d, 0x00, 0x88, 0x0d, 0x8c, 0xf9, 0x87, 0x0d, 0x8c,
- 0xf1, 0x86, 0x0d, 0x8c, 0xe9, 0x85, 0x0d, 0x8c, 0xe1, 0x84, 0x0d, 0x8c,
- 0xd9, 0x83, 0x0d, 0x8c, 0xd1, 0xa6, 0x0d, 0x8c, 0xc9, 0xa5, 0x0d, 0x8c,
- 0xc1, 0xa4, 0x0d, 0x8c, 0xb9, 0xa3, 0x0d, 0x8c, 0xb1, 0xa2, 0x0d, 0x8c,
- 0xa9, 0xa1, 0x0d, 0x8c, 0xa1, 0xa0, 0x0d, 0x8c, 0x99, 0x9f, 0x0d, 0x8c,
- 0x91, 0x9e, 0x0d, 0x8c, 0x89, 0x9d, 0x0d, 0x8c, 0x80, 0x88, 0x0d, 0x8c,
- 0x79, 0x87, 0x0d, 0x8c, 0x71, 0x86, 0x0d, 0x8c, 0x69, 0x85, 0x0d, 0x8c,
- 0x61, 0x84, 0x0d, 0x8c, 0x59, 0x83, 0x0d, 0x8c, 0x51, 0xa6, 0x0d, 0x8c,
- 0x49, 0xa5, 0x0d, 0x8c, 0x41, 0xa4, 0x0d, 0x8c, 0x39, 0xa3, 0x0d, 0x8c,
- 0x31, 0xa2, 0x0d, 0x8c, 0x29, 0xa1, 0x0d, 0x8c, 0x21, 0xa0, 0x0d, 0x8c,
- 0x19, 0x9f, 0x0d, 0x8c, 0x11, 0x9e, 0x0d, 0x8c, 0x09, 0x9d, 0x0d, 0x8c,
- 0x00, 0x88, 0x0d, 0x8b, 0xf9, 0x87, 0x0d, 0x8b, 0xf1, 0x86, 0x0d, 0x8b,
- 0xe9, 0x85, 0x0d, 0x8b, 0xe1, 0x84, 0x0d, 0x8b, 0xd9, 0x83, 0x0d, 0x8b,
- 0xd1, 0xa6, 0x0d, 0x8b, 0xc9, 0xa5, 0x0d, 0x8b, 0xc1, 0xa4, 0x0d, 0x8b,
- 0xb9, 0xa3, 0x0d, 0x8b, 0xb1, 0xa2, 0x0d, 0x8b, 0xa9, 0xa1, 0x0d, 0x8b,
- 0xa1, 0xa0, 0x0d, 0x8b, 0x99, 0x9f, 0x0d, 0x8b, 0x91, 0x9e, 0x0d, 0x8b,
- 0x89, 0x9d, 0x0d, 0x8b, 0x80, 0xcd, 0x7b, 0xf6, 0x01, 0x24, 0xd9, 0xcd,
- 0x7b, 0x33, 0x01, 0x24, 0x98, 0xcf, 0x61, 0x55, 0x01, 0x24, 0xb9, 0xc2,
- 0x00, 0xcd, 0x00, 0x01, 0x18, 0xc2, 0x02, 0x59, 0x00, 0x3f, 0x51, 0xc3,
- 0x1b, 0xb6, 0x00, 0x3f, 0x49, 0xc2, 0x24, 0x58, 0x00, 0x3f, 0x40, 0xc7,
- 0xc8, 0x6b, 0x00, 0x3f, 0x38, 0xc7, 0xc8, 0x6b, 0x00, 0x3f, 0x00, 0xd0,
- 0x5b, 0xb2, 0x01, 0x4d, 0xa1, 0xd1, 0x09, 0xd6, 0x01, 0x4d, 0x99, 0xd2,
- 0x49, 0x02, 0x01, 0x4d, 0x91, 0xc7, 0x76, 0x66, 0x01, 0x4d, 0x88, 0x43,
- 0x03, 0xa3, 0x42, 0x8e, 0x63, 0x03, 0xc2, 0x8e, 0x6d, 0xcd, 0x7f, 0x5d,
- 0x0f, 0x98, 0x68, 0xc6, 0x07, 0x3a, 0x09, 0xa2, 0x83, 0x02, 0x8e, 0x79,
- 0xc3, 0x03, 0x27, 0x09, 0xa2, 0x5b, 0x02, 0x8e, 0x7d, 0xc3, 0x00, 0xef,
- 0x09, 0xa2, 0x91, 0xc5, 0xa0, 0x46, 0x09, 0xa2, 0x4a, 0x02, 0x8e, 0x81,
- 0xa1, 0x09, 0x8f, 0x71, 0xa0, 0x09, 0x8f, 0x69, 0x9f, 0x09, 0x8f, 0x61,
- 0x9e, 0x09, 0x8f, 0x59, 0x9d, 0x09, 0x8f, 0x4a, 0x02, 0x8e, 0x87, 0xa6,
- 0x09, 0x8f, 0x41, 0xa5, 0x09, 0x8f, 0x39, 0xa4, 0x09, 0x8f, 0x31, 0xa3,
- 0x09, 0x8f, 0x29, 0xa2, 0x09, 0x8f, 0x21, 0xa1, 0x09, 0x8f, 0x19, 0xa0,
- 0x09, 0x8f, 0x03, 0x02, 0x8e, 0x8b, 0x9f, 0x09, 0x8e, 0xf9, 0x9e, 0x09,
- 0x8e, 0xeb, 0x02, 0x8e, 0x93, 0x9d, 0x09, 0x8e, 0xe0, 0xa6, 0x09, 0x8e,
- 0xd9, 0xa5, 0x09, 0x8e, 0xcb, 0x02, 0x8e, 0x97, 0xa4, 0x09, 0x8e, 0xc1,
- 0xa3, 0x09, 0x8e, 0xb9, 0xa2, 0x09, 0x8e, 0xb1, 0xa1, 0x09, 0x8e, 0xa3,
- 0x02, 0x8e, 0x9b, 0xa0, 0x09, 0x8e, 0x99, 0x9f, 0x09, 0x8e, 0x8b, 0x02,
- 0x8e, 0x9f, 0x9e, 0x09, 0x8e, 0x81, 0x9d, 0x09, 0x8e, 0x78, 0xa6, 0x09,
- 0x8e, 0x71, 0xa5, 0x09, 0x8e, 0x69, 0xa4, 0x09, 0x8e, 0x5b, 0x02, 0x8e,
- 0xa3, 0xa3, 0x09, 0x8e, 0x4b, 0x02, 0x8e, 0xa7, 0xa2, 0x09, 0x8e, 0x3b,
- 0x02, 0x8e, 0xab, 0xa1, 0x09, 0x8e, 0x31, 0xa0, 0x09, 0x8e, 0x29, 0x9f,
- 0x09, 0x8d, 0xe3, 0x02, 0x8e, 0xaf, 0x9e, 0x09, 0x8d, 0xd9, 0x9d, 0x09,
- 0x8d, 0xca, 0x02, 0x8e, 0xcf, 0xa6, 0x09, 0x8d, 0xc1, 0xa5, 0x09, 0x8d,
- 0xb9, 0xa4, 0x09, 0x8d, 0xb1, 0xa3, 0x09, 0x8d, 0xa9, 0xa2, 0x09, 0x8d,
- 0xa1, 0xa1, 0x09, 0x8d, 0x99, 0xa0, 0x09, 0x8d, 0x8b, 0x02, 0x8e, 0xd3,
- 0x9f, 0x09, 0x8d, 0x81, 0x9e, 0x09, 0x8d, 0x6a, 0x02, 0x8e, 0xd7, 0xa2,
- 0x09, 0x9e, 0x71, 0xa1, 0x09, 0x9e, 0x63, 0x02, 0x8e, 0xdf, 0xa0, 0x09,
- 0x9e, 0x59, 0x9f, 0x09, 0x9e, 0x51, 0x9e, 0x09, 0x9e, 0x49, 0x9d, 0x09,
- 0x9e, 0x40, 0xa6, 0x09, 0x9e, 0x39, 0xa5, 0x09, 0x9e, 0x2b, 0x02, 0x8e,
- 0xe3, 0xa4, 0x09, 0x9e, 0x1b, 0x02, 0x8e, 0xe7, 0xa3, 0x09, 0x9e, 0x11,
- 0xa2, 0x09, 0x9e, 0x09, 0xa1, 0x09, 0x9d, 0xfb, 0x02, 0x8e, 0xeb, 0xa0,
- 0x09, 0x9d, 0xf1, 0x9f, 0x09, 0x9d, 0xe9, 0x9e, 0x09, 0x9d, 0xe1, 0x9d,
- 0x09, 0x9d, 0xd2, 0x02, 0x8e, 0xef, 0xa6, 0x09, 0x9d, 0xc3, 0x02, 0x8e,
- 0xf3, 0xa5, 0x09, 0x9d, 0xb9, 0xa4, 0x09, 0x9d, 0xb1, 0xa3, 0x09, 0x9d,
- 0xa9, 0xa2, 0x09, 0x9d, 0xa1, 0xa1, 0x09, 0x9d, 0x99, 0xa0, 0x09, 0x9d,
- 0x8b, 0x02, 0x8e, 0xf7, 0x9f, 0x09, 0x9d, 0x81, 0x9e, 0x09, 0x9d, 0x78,
- 0xa3, 0x09, 0x99, 0x91, 0xa2, 0x09, 0x99, 0x89, 0xa1, 0x09, 0x99, 0x81,
- 0xa0, 0x09, 0x99, 0x73, 0x02, 0x8e, 0xfb, 0x9f, 0x09, 0x99, 0x63, 0x02,
- 0x8e, 0xff, 0x9e, 0x09, 0x99, 0x59, 0x9d, 0x09, 0x99, 0x50, 0xa6, 0x09,
- 0x99, 0x49, 0xa5, 0x09, 0x99, 0x41, 0xa4, 0x09, 0x99, 0x39, 0xa3, 0x09,
- 0x99, 0x31, 0xa2, 0x09, 0x99, 0x29, 0xa1, 0x09, 0x99, 0x21, 0xa0, 0x09,
- 0x99, 0x19, 0x9f, 0x09, 0x99, 0x11, 0x9e, 0x09, 0x99, 0x09, 0x9d, 0x09,
- 0x99, 0x00, 0xa6, 0x09, 0x98, 0xf9, 0xa5, 0x09, 0x98, 0xf1, 0xa4, 0x09,
- 0x98, 0xe9, 0xa3, 0x09, 0x98, 0xdb, 0x02, 0x8f, 0x03, 0xa2, 0x09, 0x98,
- 0xd1, 0xa1, 0x09, 0x98, 0xc9, 0xa0, 0x09, 0x98, 0xc1, 0x9f, 0x09, 0x98,
- 0xb9, 0x9e, 0x09, 0x98, 0xab, 0x02, 0x8f, 0x07, 0x9d, 0x09, 0x98, 0xa0,
- 0xa6, 0x09, 0x98, 0x93, 0x02, 0x8f, 0x0b, 0xa5, 0x09, 0x98, 0x83, 0x02,
- 0x8f, 0x0f, 0xa4, 0x09, 0x98, 0x73, 0x02, 0x8f, 0x13, 0xa3, 0x09, 0x98,
- 0x69, 0xa2, 0x09, 0x98, 0x61, 0xa1, 0x09, 0x98, 0x59, 0xa0, 0x09, 0x98,
- 0x4b, 0x02, 0x8f, 0x17, 0x9f, 0x09, 0x98, 0x41, 0x9e, 0x09, 0x98, 0x38,
- 0x83, 0x09, 0x8c, 0x28, 0x83, 0x09, 0x8d, 0x50, 0x83, 0x09, 0x8d, 0x28,
- 0xa0, 0x09, 0x89, 0xf1, 0x9f, 0x09, 0x89, 0xe9, 0x9e, 0x09, 0x89, 0xcb,
- 0x02, 0x8f, 0x1b, 0x9d, 0x09, 0x89, 0xc0, 0xa6, 0x09, 0x89, 0xb9, 0xa5,
- 0x09, 0x89, 0xb1, 0xa4, 0x09, 0x89, 0xa3, 0x02, 0x8f, 0x27, 0xa3, 0x09,
- 0x89, 0x93, 0x02, 0x8f, 0x2b, 0xa2, 0x09, 0x89, 0x83, 0x02, 0x8f, 0x2f,
- 0xa1, 0x09, 0x89, 0x79, 0xa0, 0x09, 0x89, 0x71, 0x9f, 0x09, 0x89, 0x69,
- 0x9e, 0x09, 0x89, 0x61, 0x9d, 0x09, 0x89, 0x58, 0xa6, 0x09, 0x89, 0x51,
- 0xa5, 0x09, 0x89, 0x43, 0x02, 0x8f, 0x33, 0xa4, 0x09, 0x89, 0x33, 0x02,
- 0x8f, 0x37, 0xa3, 0x09, 0x89, 0x29, 0xa2, 0x09, 0x89, 0x21, 0xa1, 0x09,
- 0x89, 0x19, 0xa0, 0x09, 0x89, 0x11, 0x9f, 0x09, 0x89, 0x09, 0x9e, 0x09,
- 0x88, 0xfb, 0x02, 0x8f, 0x3b, 0x9d, 0x09, 0x88, 0xf0, 0xa6, 0x09, 0x88,
- 0xe9, 0xa5, 0x09, 0x88, 0xe1, 0xa4, 0x09, 0x88, 0xd9, 0xa3, 0x09, 0x88,
- 0xd1, 0xa2, 0x09, 0x88, 0xc9, 0xa1, 0x09, 0x88, 0xc1, 0xa0, 0x09, 0x88,
- 0xb9, 0x9f, 0x09, 0x88, 0xb1, 0x9e, 0x09, 0x88, 0xa3, 0x02, 0x8f, 0x3f,
- 0x9d, 0x09, 0x88, 0x98, 0xa6, 0x09, 0x88, 0x91, 0xa5, 0x09, 0x88, 0x89,
- 0xa4, 0x09, 0x88, 0x81, 0xa3, 0x09, 0x88, 0x79, 0xa2, 0x09, 0x88, 0x71,
- 0xa1, 0x09, 0x88, 0x69, 0xa0, 0x09, 0x88, 0x5b, 0x02, 0x8f, 0x43, 0x9f,
- 0x09, 0x88, 0x51, 0x9e, 0x09, 0x88, 0x49, 0x9d, 0x09, 0x88, 0x40, 0xa6,
- 0x09, 0x88, 0x39, 0xa5, 0x09, 0x88, 0x31, 0xa4, 0x09, 0x88, 0x29, 0xa3,
- 0x09, 0x88, 0x21, 0xa2, 0x09, 0x88, 0x19, 0xa1, 0x09, 0x88, 0x11, 0xa0,
- 0x09, 0x88, 0x09, 0x9f, 0x09, 0x88, 0x01, 0x9e, 0x09, 0x87, 0xf2, 0x02,
- 0x8f, 0x47, 0xa5, 0x09, 0x87, 0xe9, 0xa4, 0x09, 0x87, 0xe1, 0xa3, 0x09,
- 0x87, 0xd9, 0xa1, 0x09, 0x87, 0xcb, 0x02, 0x8f, 0x4b, 0xa0, 0x09, 0x87,
- 0xc1, 0x9f, 0x09, 0x87, 0xb9, 0x9e, 0x09, 0x87, 0xb1, 0x9d, 0x09, 0x87,
- 0xa8, 0xa6, 0x09, 0x87, 0xa1, 0xa5, 0x09, 0x87, 0x93, 0x02, 0x8f, 0x4f,
- 0xa4, 0x09, 0x87, 0x89, 0xa3, 0x09, 0x87, 0x81, 0xa2, 0x09, 0x87, 0x79,
- 0xa1, 0x09, 0x87, 0x71, 0xa0, 0x09, 0x87, 0x69, 0x9f, 0x09, 0x87, 0x61,
- 0x9e, 0x09, 0x87, 0x59, 0x9d, 0x09, 0x87, 0x4a, 0x02, 0x8f, 0x53, 0xa6,
- 0x09, 0x87, 0x41, 0xa5, 0x09, 0x87, 0x39, 0xa4, 0x09, 0x87, 0x2b, 0x02,
- 0x8f, 0x57, 0xa3, 0x09, 0x87, 0x1b, 0x02, 0x8f, 0x5b, 0xa2, 0x09, 0x87,
- 0x11, 0xa1, 0x09, 0x87, 0x09, 0xa0, 0x09, 0x87, 0x01, 0x9f, 0x09, 0x86,
- 0xf9, 0x9e, 0x09, 0x86, 0xf1, 0x9d, 0x09, 0x86, 0xe8, 0xa6, 0x09, 0x86,
- 0xdb, 0x02, 0x8f, 0x5f, 0xa5, 0x09, 0x86, 0xcb, 0x02, 0x8f, 0x63, 0xa4,
- 0x09, 0x86, 0xc1, 0xa3, 0x09, 0x86, 0xb9, 0xa2, 0x09, 0x86, 0xb1, 0xa1,
- 0x09, 0x86, 0xa9, 0xa0, 0x09, 0x86, 0xa1, 0x9f, 0x09, 0x86, 0x99, 0x9e,
- 0x09, 0x86, 0x90, 0x83, 0x09, 0x82, 0xa8, 0x00, 0x42, 0x8f, 0x67, 0x00,
- 0x42, 0x8f, 0x73, 0xa2, 0x09, 0x8c, 0xd1, 0xa1, 0x09, 0x8c, 0xc9, 0xa0,
- 0x09, 0x8c, 0xc1, 0x9f, 0x09, 0x8c, 0xb9, 0x9e, 0x09, 0x8c, 0xab, 0x02,
- 0x8f, 0x7f, 0x9d, 0x09, 0x8c, 0x9a, 0x02, 0x8f, 0x83, 0xa6, 0x09, 0x8c,
- 0x8b, 0x02, 0x8f, 0x87, 0xa5, 0x09, 0x8c, 0x81, 0xa4, 0x09, 0x8c, 0x79,
- 0xa3, 0x09, 0x8c, 0x71, 0xa2, 0x09, 0x8c, 0x63, 0x02, 0x8f, 0x8b, 0xa1,
- 0x09, 0x8c, 0x59, 0xa0, 0x09, 0x8c, 0x51, 0x9f, 0x09, 0x8c, 0x49, 0x9e,
- 0x09, 0x8c, 0x40, 0x9e, 0x09, 0x94, 0xd1, 0x9d, 0x09, 0x94, 0xba, 0x02,
- 0x8f, 0x8f, 0xa6, 0x09, 0x94, 0xb1, 0xa5, 0x09, 0x94, 0xa9, 0xa4, 0x09,
- 0x94, 0xa1, 0xa3, 0x09, 0x94, 0x99, 0xa2, 0x09, 0x94, 0x91, 0xa1, 0x09,
- 0x94, 0x89, 0xa0, 0x09, 0x94, 0x81, 0x9f, 0x09, 0x94, 0x79, 0x9e, 0x09,
- 0x94, 0x71, 0x9d, 0x09, 0x94, 0x68, 0xa6, 0x09, 0x94, 0x61, 0xa5, 0x09,
- 0x94, 0x59, 0xa4, 0x09, 0x94, 0x51, 0xa3, 0x09, 0x94, 0x2b, 0x02, 0x8f,
- 0x97, 0xa2, 0x09, 0x94, 0x21, 0xa1, 0x09, 0x94, 0x19, 0xa0, 0x09, 0x94,
- 0x0b, 0x02, 0x8f, 0xa7, 0x9f, 0x09, 0x94, 0x01, 0x9e, 0x09, 0x93, 0xf9,
- 0x9d, 0x09, 0x93, 0xea, 0x02, 0x8f, 0xab, 0xa6, 0x09, 0x93, 0xdb, 0x02,
- 0x8f, 0xaf, 0xa5, 0x09, 0x93, 0xd1, 0xa4, 0x09, 0x93, 0xc9, 0xa3, 0x09,
- 0x93, 0xc1, 0xa2, 0x09, 0x93, 0xb3, 0x02, 0x8f, 0xb3, 0xa1, 0x09, 0x93,
- 0xa3, 0x02, 0x8f, 0xb7, 0xa0, 0x09, 0x93, 0x99, 0x9f, 0x09, 0x93, 0x91,
- 0x9e, 0x09, 0x93, 0x89, 0x9d, 0x09, 0x93, 0x7a, 0x02, 0x8f, 0xbb, 0xa6,
- 0x09, 0x93, 0x6b, 0x02, 0x8f, 0xbf, 0xa5, 0x09, 0x93, 0x61, 0xa4, 0x09,
- 0x93, 0x59, 0xa3, 0x09, 0x93, 0x51, 0xa2, 0x09, 0x93, 0x49, 0xa1, 0x09,
- 0x93, 0x41, 0xa0, 0x09, 0x93, 0x39, 0x9f, 0x09, 0x93, 0x31, 0x9e, 0x09,
- 0x93, 0x29, 0x9d, 0x09, 0x93, 0x0a, 0x02, 0x8f, 0xc3, 0xa6, 0x09, 0x93,
- 0x01, 0xa5, 0x09, 0x92, 0xf9, 0xa4, 0x09, 0x92, 0xf1, 0xa3, 0x09, 0x92,
- 0xbb, 0x02, 0x8f, 0xcf, 0xa2, 0x09, 0x92, 0xab, 0x02, 0x8f, 0xe7, 0xa1,
- 0x09, 0x92, 0xa1, 0xa0, 0x09, 0x92, 0x99, 0x9f, 0x09, 0x92, 0x91, 0x9e,
- 0x09, 0x92, 0x82, 0x02, 0x8f, 0xeb, 0x9e, 0x09, 0x9b, 0xc3, 0x02, 0x8f,
- 0xef, 0xa6, 0x09, 0x9c, 0x29, 0xa5, 0x09, 0x9c, 0x21, 0xa4, 0x09, 0x9c,
- 0x19, 0xa3, 0x09, 0x9c, 0x11, 0xa2, 0x09, 0x9c, 0x09, 0xa1, 0x09, 0x9c,
- 0x01, 0xa0, 0x09, 0x9b, 0xf9, 0x9f, 0x09, 0x9b, 0xe3, 0x02, 0x8f, 0xff,
- 0x9d, 0x09, 0x9b, 0xb8, 0x83, 0x09, 0x9d, 0x70, 0xa6, 0x09, 0x9d, 0x61,
- 0xa5, 0x09, 0x9d, 0x59, 0xa4, 0x09, 0x9d, 0x4b, 0x02, 0x90, 0x07, 0xa3,
- 0x09, 0x9d, 0x41, 0xa2, 0x09, 0x9d, 0x39, 0xa1, 0x09, 0x9d, 0x31, 0xa0,
- 0x09, 0x9d, 0x23, 0x02, 0x90, 0x0b, 0x9f, 0x09, 0x9d, 0x19, 0x9e, 0x09,
- 0x9d, 0x0b, 0x02, 0x90, 0x0f, 0x9d, 0x09, 0x9c, 0xfa, 0x02, 0x90, 0x13,
- 0xa6, 0x09, 0x9c, 0xeb, 0x02, 0x90, 0x17, 0xa5, 0x09, 0x9c, 0xdb, 0x02,
- 0x90, 0x1b, 0xa4, 0x09, 0x9c, 0xd1, 0xa3, 0x09, 0x9c, 0xc9, 0xa2, 0x09,
- 0x9c, 0xc1, 0xa1, 0x09, 0x9c, 0xb9, 0xa0, 0x09, 0x9c, 0xab, 0x02, 0x90,
- 0x1f, 0x9f, 0x09, 0x9c, 0xa1, 0x9e, 0x09, 0x9c, 0x99, 0x9d, 0x09, 0x9c,
- 0x32, 0x02, 0x90, 0x23, 0xa6, 0x09, 0x9b, 0xb1, 0xa5, 0x09, 0x9b, 0xa9,
- 0xa4, 0x09, 0x9b, 0x93, 0x02, 0x90, 0x53, 0xa3, 0x09, 0x9b, 0x89, 0xa2,
- 0x09, 0x9b, 0x81, 0xa1, 0x09, 0x9b, 0x79, 0xa0, 0x09, 0x9b, 0x71, 0x9f,
- 0x09, 0x9b, 0x63, 0x02, 0x90, 0x5b, 0x9e, 0x09, 0x9b, 0x12, 0x02, 0x90,
- 0x5f, 0x9f, 0x09, 0xa1, 0x71, 0x9e, 0x09, 0xa1, 0x69, 0x9d, 0x09, 0xa1,
- 0x60, 0xa6, 0x09, 0xa1, 0x59, 0xa5, 0x09, 0xa1, 0x51, 0xa4, 0x09, 0xa1,
- 0x49, 0xa3, 0x09, 0xa1, 0x41, 0xa2, 0x09, 0xa1, 0x39, 0xa1, 0x09, 0xa1,
- 0x31, 0xa0, 0x09, 0xa1, 0x29, 0x9f, 0x09, 0xa1, 0x21, 0x9e, 0x09, 0xa1,
- 0x19, 0x9d, 0x09, 0xa1, 0x10, 0xa6, 0x09, 0xa1, 0x09, 0xa5, 0x09, 0xa1,
- 0x01, 0xa4, 0x09, 0xa0, 0xf9, 0xa3, 0x09, 0xa0, 0xf1, 0xa2, 0x09, 0xa0,
- 0xe9, 0xa1, 0x09, 0xa0, 0xe1, 0xa0, 0x09, 0xa0, 0xd9, 0x9f, 0x09, 0xa0,
- 0xd1, 0x9e, 0x09, 0xa0, 0xc9, 0x9d, 0x09, 0xa0, 0xc0, 0xa6, 0x09, 0xa0,
- 0xb9, 0xa5, 0x09, 0xa0, 0xb1, 0xa4, 0x09, 0xa0, 0x9b, 0x02, 0x90, 0x83,
- 0xa3, 0x09, 0xa0, 0x91, 0xa2, 0x09, 0xa0, 0x89, 0xa1, 0x09, 0xa0, 0x81,
- 0xa0, 0x09, 0xa0, 0x79, 0x9f, 0x09, 0xa0, 0x71, 0x9e, 0x09, 0xa0, 0x68,
- 0xa6, 0x09, 0x82, 0x71, 0xa5, 0x09, 0x82, 0x69, 0xa4, 0x09, 0x82, 0x61,
- 0xa3, 0x09, 0x82, 0x59, 0xa2, 0x09, 0x82, 0x51, 0xa1, 0x09, 0x82, 0x49,
- 0xa0, 0x09, 0x82, 0x41, 0x9f, 0x09, 0x82, 0x39, 0x9e, 0x09, 0x82, 0x31,
- 0x9d, 0x09, 0x82, 0x28, 0xa6, 0x09, 0x82, 0x21, 0xa5, 0x09, 0x82, 0x19,
- 0xa4, 0x09, 0x82, 0x11, 0xa3, 0x09, 0x82, 0x09, 0xa2, 0x09, 0x82, 0x01,
- 0xa1, 0x09, 0x81, 0xf9, 0xa0, 0x09, 0x81, 0xf1, 0x9f, 0x09, 0x81, 0xe9,
- 0x9e, 0x09, 0x81, 0xe1, 0x9d, 0x09, 0x81, 0xd8, 0xa6, 0x09, 0x81, 0xd1,
- 0xa5, 0x09, 0x81, 0xc9, 0xa4, 0x09, 0x81, 0xc1, 0xa3, 0x09, 0x81, 0xb9,
- 0xa2, 0x09, 0x81, 0xab, 0x02, 0x90, 0x8b, 0xa1, 0x09, 0x81, 0xa1, 0xa0,
- 0x09, 0x81, 0x93, 0x02, 0x90, 0x8f, 0x9f, 0x09, 0x81, 0x83, 0x02, 0x90,
- 0x93, 0x9e, 0x09, 0x81, 0x79, 0x9d, 0x09, 0x81, 0x6a, 0x02, 0x90, 0x97,
- 0xa6, 0x09, 0x81, 0x61, 0xa5, 0x09, 0x81, 0x59, 0xa4, 0x09, 0x81, 0x51,
- 0xa3, 0x09, 0x81, 0x49, 0xa2, 0x09, 0x81, 0x41, 0xa1, 0x09, 0x81, 0x39,
- 0xa0, 0x09, 0x81, 0x31, 0x9f, 0x09, 0x81, 0x23, 0x02, 0x90, 0x9b, 0x9e,
- 0x09, 0x81, 0x19, 0x9d, 0x09, 0x81, 0x10, 0xa6, 0x09, 0x81, 0x09, 0xa5,
- 0x09, 0x81, 0x01, 0xa4, 0x09, 0x80, 0xf9, 0xa3, 0x09, 0x80, 0xf1, 0xa2,
- 0x09, 0x80, 0xe9, 0xa1, 0x09, 0x80, 0xe1, 0xa0, 0x09, 0x80, 0xd9, 0x9f,
- 0x09, 0x80, 0xd1, 0x9e, 0x09, 0x80, 0xc9, 0x9d, 0x09, 0x80, 0xc0, 0xa6,
- 0x09, 0x80, 0xb9, 0xa5, 0x09, 0x80, 0xb1, 0xa4, 0x09, 0x80, 0xa3, 0x02,
- 0x90, 0x9f, 0xa3, 0x09, 0x80, 0x99, 0xa2, 0x09, 0x80, 0x91, 0xa1, 0x09,
- 0x80, 0x83, 0x02, 0x90, 0xa3, 0xa0, 0x09, 0x80, 0x79, 0x9f, 0x09, 0x80,
- 0x71, 0x9e, 0x09, 0x80, 0x69, 0x9d, 0x09, 0x80, 0x60, 0xa6, 0x09, 0x80,
- 0x59, 0xa5, 0x09, 0x80, 0x51, 0xa4, 0x09, 0x80, 0x49, 0xa3, 0x09, 0x80,
- 0x33, 0x02, 0x90, 0xa7, 0xa2, 0x09, 0x80, 0x23, 0x02, 0x90, 0xaf, 0xa1,
- 0x09, 0x80, 0x19, 0xa0, 0x09, 0x80, 0x11, 0x9f, 0x09, 0x80, 0x09, 0x9e,
- 0x09, 0x80, 0x00, 0x8a, 0x09, 0xa0, 0x61, 0x89, 0x09, 0xa0, 0x59, 0x88,
- 0x09, 0xa0, 0x51, 0x87, 0x09, 0xa0, 0x49, 0x86, 0x09, 0xa0, 0x41, 0x85,
- 0x09, 0xa0, 0x39, 0x84, 0x09, 0xa0, 0x31, 0x83, 0x09, 0xa0, 0x28, 0x8b,
- 0x09, 0xa0, 0x19, 0x8a, 0x09, 0xa0, 0x11, 0x89, 0x09, 0xa0, 0x09, 0x88,
- 0x09, 0xa0, 0x01, 0x87, 0x09, 0x9f, 0xf9, 0x86, 0x09, 0x9f, 0xf1, 0x85,
- 0x09, 0x9f, 0xe9, 0x84, 0x09, 0x9f, 0xe1, 0x83, 0x09, 0x9f, 0xd8, 0x83,
- 0x09, 0x9f, 0x80, 0x83, 0x09, 0x9f, 0x70, 0x84, 0x09, 0x9f, 0x61, 0x83,
- 0x09, 0x9f, 0x58, 0x86, 0x09, 0x9f, 0x49, 0x85, 0x09, 0x9f, 0x41, 0x84,
- 0x09, 0x9f, 0x39, 0x83, 0x09, 0x9f, 0x30, 0x83, 0x09, 0x9a, 0xb8, 0x83,
- 0x09, 0x9a, 0x98, 0x83, 0x09, 0x9a, 0x60, 0x84, 0x09, 0x99, 0xd1, 0x83,
- 0x09, 0x99, 0xc8, 0x83, 0x09, 0x97, 0xd8, 0x84, 0x09, 0x97, 0x89, 0x83,
- 0x09, 0x97, 0x80, 0x83, 0x09, 0x97, 0x30, 0x84, 0x09, 0x97, 0x11, 0x83,
- 0x09, 0x97, 0x08, 0x83, 0x09, 0x96, 0xc0, 0x83, 0x09, 0x96, 0x98, 0x83,
- 0x09, 0x96, 0x18, 0x83, 0x09, 0x95, 0xe0, 0x84, 0x09, 0x95, 0xa1, 0x83,
- 0x09, 0x95, 0x98, 0x83, 0x09, 0x95, 0x88, 0x83, 0x09, 0x94, 0xf8, 0x83,
- 0x09, 0x94, 0xe0, 0x9f, 0x09, 0x92, 0x73, 0x02, 0x90, 0xb3, 0x9e, 0x09,
- 0x92, 0x69, 0x9d, 0x09, 0x92, 0x60, 0xa6, 0x09, 0x92, 0x59, 0xa5, 0x09,
- 0x92, 0x4b, 0x02, 0x90, 0xb7, 0xa4, 0x09, 0x92, 0x41, 0xa3, 0x09, 0x92,
- 0x39, 0xa2, 0x09, 0x92, 0x31, 0xa1, 0x09, 0x92, 0x29, 0xa0, 0x09, 0x92,
- 0x21, 0x9f, 0x09, 0x92, 0x19, 0x9e, 0x09, 0x92, 0x0b, 0x02, 0x90, 0xbb,
- 0x9d, 0x09, 0x91, 0xfa, 0x02, 0x90, 0xbf, 0xa6, 0x09, 0x91, 0xf1, 0xa5,
- 0x09, 0x91, 0xe9, 0xa4, 0x09, 0x91, 0xe1, 0xa3, 0x09, 0x91, 0xd9, 0xa2,
- 0x09, 0x91, 0xd1, 0xa1, 0x09, 0x91, 0xc9, 0xa0, 0x09, 0x91, 0xc1, 0x9f,
- 0x09, 0x91, 0xb9, 0x9e, 0x09, 0x91, 0xb0, 0xa6, 0x09, 0x91, 0xa1, 0xa5,
- 0x09, 0x91, 0x99, 0xa4, 0x09, 0x91, 0x8b, 0x02, 0x90, 0xc3, 0xa3, 0x09,
- 0x91, 0x81, 0xa2, 0x09, 0x91, 0x79, 0xa1, 0x09, 0x91, 0x71, 0xa0, 0x09,
- 0x91, 0x69, 0x9f, 0x09, 0x91, 0x61, 0x9e, 0x09, 0x91, 0x59, 0x9d, 0x09,
- 0x91, 0x50, 0xa6, 0x09, 0x91, 0x49, 0xa5, 0x09, 0x91, 0x41, 0xa4, 0x09,
- 0x91, 0x39, 0xa3, 0x09, 0x91, 0x31, 0xa2, 0x09, 0x91, 0x23, 0x02, 0x90,
- 0xc7, 0xa1, 0x09, 0x91, 0x19, 0xa0, 0x09, 0x91, 0x11, 0x9f, 0x09, 0x91,
- 0x09, 0x9e, 0x09, 0x91, 0x00, 0x9f, 0x09, 0x90, 0xf9, 0x9e, 0x09, 0x90,
- 0xf1, 0x9d, 0x09, 0x90, 0xe8, 0xa6, 0x09, 0x90, 0xe1, 0xa5, 0x09, 0x90,
- 0xd9, 0xa4, 0x09, 0x90, 0xcb, 0x02, 0x90, 0xcb, 0xa3, 0x09, 0x90, 0xc1,
- 0xa2, 0x09, 0x90, 0xb3, 0x02, 0x90, 0xcf, 0xa1, 0x09, 0x90, 0xa3, 0x02,
- 0x90, 0xd3, 0xa0, 0x09, 0x90, 0x93, 0x02, 0x90, 0xd7, 0x9f, 0x09, 0x90,
- 0x89, 0x9e, 0x09, 0x90, 0x81, 0x9d, 0x09, 0x90, 0x78, 0xa6, 0x09, 0x90,
- 0x71, 0xa5, 0x09, 0x90, 0x69, 0xa4, 0x09, 0x90, 0x61, 0xa3, 0x09, 0x90,
- 0x59, 0xa2, 0x09, 0x90, 0x4b, 0x02, 0x90, 0xdb, 0xa1, 0x09, 0x90, 0x41,
- 0xa0, 0x09, 0x90, 0x39, 0x9f, 0x09, 0x90, 0x31, 0x9e, 0x09, 0x90, 0x29,
- 0x9d, 0x09, 0x90, 0x20, 0xa6, 0x09, 0x90, 0x19, 0xa5, 0x09, 0x90, 0x03,
- 0x02, 0x90, 0xdf, 0xa4, 0x09, 0x8f, 0xf9, 0xa3, 0x09, 0x8f, 0xf1, 0xa2,
- 0x09, 0x8f, 0xe9, 0xa1, 0x09, 0x8f, 0xe1, 0xa0, 0x09, 0x8f, 0xd9, 0x9f,
- 0x09, 0x8f, 0xd1, 0x9e, 0x09, 0x8f, 0xc9, 0x9d, 0x09, 0x8f, 0xc0, 0xa6,
- 0x09, 0x8f, 0xb9, 0xa5, 0x09, 0x8f, 0xb1, 0xa4, 0x09, 0x8f, 0xa9, 0xa3,
- 0x09, 0x8f, 0xa1, 0xa2, 0x09, 0x8f, 0x99, 0xa1, 0x09, 0x8f, 0x91, 0xa0,
- 0x09, 0x8f, 0x89, 0x9f, 0x09, 0x8f, 0x81, 0x9e, 0x09, 0x8f, 0x78, 0x83,
- 0x09, 0x8b, 0xa8, 0x83, 0x09, 0x8b, 0x90, 0x83, 0x09, 0x8b, 0x58, 0x83,
- 0x09, 0x8b, 0x48, 0x83, 0x09, 0x8a, 0xf0, 0x83, 0x09, 0x8a, 0xb8, 0x83,
- 0x09, 0x8a, 0x68, 0x84, 0x09, 0x8a, 0x41, 0x83, 0x09, 0x8a, 0x38, 0x83,
- 0x09, 0x8a, 0x28, 0x8a, 0x09, 0x86, 0x89, 0x89, 0x09, 0x86, 0x81, 0x88,
- 0x09, 0x86, 0x79, 0x87, 0x09, 0x86, 0x71, 0x86, 0x09, 0x86, 0x69, 0x85,
- 0x09, 0x86, 0x61, 0x84, 0x09, 0x86, 0x59, 0x83, 0x09, 0x86, 0x50, 0x83,
- 0x09, 0x85, 0xe0, 0x83, 0x09, 0x85, 0xc8, 0x8b, 0x09, 0x85, 0xb1, 0x8a,
- 0x09, 0x85, 0xa9, 0x89, 0x09, 0x85, 0xa1, 0x88, 0x09, 0x85, 0x99, 0x87,
- 0x09, 0x85, 0x91, 0x86, 0x09, 0x85, 0x89, 0x85, 0x09, 0x85, 0x81, 0x84,
- 0x09, 0x85, 0x79, 0x83, 0x09, 0x85, 0x70, 0x83, 0x09, 0x85, 0x58, 0x83,
- 0x09, 0x85, 0x40, 0x83, 0x09, 0x84, 0xd8, 0x83, 0x09, 0x84, 0xb8, 0x83,
- 0x09, 0x84, 0x90, 0x83, 0x09, 0x83, 0xf0, 0x83, 0x09, 0x83, 0x38, 0x85,
- 0x09, 0x82, 0xf1, 0x84, 0x09, 0x82, 0xe9, 0x83, 0x09, 0x82, 0xe0, 0xc6,
- 0x01, 0x7a, 0x0f, 0xbc, 0x49, 0xc6, 0x07, 0x09, 0x0f, 0xbc, 0x98, 0xc6,
- 0x13, 0x57, 0x0f, 0xbd, 0x71, 0xd2, 0x4c, 0xf2, 0x0f, 0xbd, 0xd0, 0x45,
- 0x4f, 0x43, 0x42, 0x90, 0xe7, 0x83, 0x00, 0x95, 0x03, 0x02, 0x91, 0x17,
- 0x97, 0x00, 0x95, 0x09, 0x8b, 0x00, 0x95, 0x11, 0x87, 0x00, 0x95, 0x2b,
- 0x02, 0x91, 0x1b, 0x91, 0x00, 0x95, 0x33, 0x02, 0x91, 0x1f, 0xc2, 0x01,
- 0x09, 0x00, 0x95, 0x38, 0x83, 0x00, 0x98, 0x58, 0x87, 0x00, 0x98, 0x60,
- 0x83, 0x00, 0x98, 0x78, 0x83, 0x00, 0x98, 0x83, 0x02, 0x91, 0x23, 0x8b,
- 0x00, 0x98, 0x91, 0x87, 0x00, 0x98, 0xaa, 0x02, 0x91, 0x27, 0x83, 0x00,
- 0x98, 0xc3, 0x02, 0x91, 0x2b, 0x97, 0x00, 0x98, 0xc9, 0x8b, 0x00, 0x98,
- 0xd1, 0x87, 0x00, 0x98, 0xeb, 0x02, 0x91, 0x2f, 0x91, 0x00, 0x98, 0xf1,
- 0x19, 0x42, 0x91, 0x33, 0x83, 0x01, 0x6e, 0xc3, 0x02, 0x91, 0x45, 0x97,
- 0x01, 0x6e, 0xc9, 0x8b, 0x01, 0x6e, 0xd1, 0x87, 0x01, 0x6e, 0xeb, 0x02,
- 0x91, 0x49, 0x91, 0x01, 0x6e, 0xf0, 0x19, 0xc2, 0x91, 0x4d, 0x1b, 0xc2,
- 0x91, 0x5c, 0x83, 0x00, 0x90, 0x83, 0x02, 0x91, 0x76, 0x97, 0x00, 0x90,
- 0x89, 0x8b, 0x00, 0x90, 0x91, 0x87, 0x00, 0x90, 0xab, 0x02, 0x91, 0x7a,
- 0x91, 0x00, 0x90, 0xb0, 0x83, 0x00, 0x90, 0x18, 0x87, 0x00, 0x90, 0x20,
- 0x83, 0x00, 0x90, 0x38, 0x91, 0x05, 0x59, 0x71, 0x87, 0x05, 0x59, 0x6b,
- 0x02, 0x91, 0x7e, 0x83, 0x05, 0x59, 0x43, 0x02, 0x91, 0x82, 0x8b, 0x05,
- 0x59, 0x51, 0x97, 0x05, 0x59, 0x48, 0x83, 0x00, 0x93, 0x18, 0x87, 0x00,
- 0x93, 0x20, 0x83, 0x01, 0x6c, 0x28, 0x83, 0x00, 0x93, 0x39, 0x8b, 0x00,
- 0x9c, 0x29, 0x87, 0x00, 0x9c, 0x3a, 0x02, 0x91, 0x86, 0x0a, 0xc2, 0x91,
- 0x8a, 0x83, 0x01, 0x6d, 0x43, 0x02, 0x91, 0xa8, 0x97, 0x01, 0x6d, 0x49,
- 0x8b, 0x01, 0x6d, 0x51, 0x87, 0x01, 0x6d, 0x6b, 0x02, 0x91, 0xac, 0x91,
- 0x01, 0x6d, 0x70, 0x83, 0x00, 0x93, 0xd8, 0x87, 0x00, 0x93, 0xe0, 0x83,
- 0x01, 0x6c, 0x38, 0x83, 0x00, 0x99, 0x43, 0x02, 0x91, 0xb0, 0x97, 0x00,
- 0x99, 0x49, 0x8b, 0x00, 0x99, 0x51, 0x87, 0x00, 0x99, 0x6b, 0x02, 0x91,
- 0xb4, 0x91, 0x00, 0x99, 0x73, 0x02, 0x91, 0xb8, 0xc2, 0x01, 0x09, 0x00,
- 0x99, 0x78, 0x91, 0x05, 0x58, 0xb1, 0x87, 0x05, 0x58, 0xab, 0x02, 0x91,
- 0xbc, 0xc2, 0x14, 0x40, 0x05, 0x58, 0x99, 0x8b, 0x05, 0x58, 0x91, 0x97,
- 0x05, 0x58, 0x88, 0x0a, 0xc2, 0x91, 0xc0, 0x83, 0x00, 0x97, 0xc3, 0x02,
- 0x91, 0xd9, 0x97, 0x00, 0x97, 0xc9, 0x8b, 0x00, 0x97, 0xd1, 0x87, 0x00,
- 0x97, 0xeb, 0x02, 0x91, 0xdd, 0x91, 0x00, 0x97, 0xf3, 0x02, 0x91, 0xe1,
- 0xc2, 0x01, 0x09, 0x00, 0x97, 0xf8, 0x83, 0x00, 0x97, 0x98, 0x87, 0x00,
- 0x97, 0xa0, 0x83, 0x01, 0x6c, 0x60, 0x91, 0x05, 0x58, 0x31, 0x87, 0x05,
- 0x58, 0x2b, 0x02, 0x91, 0xe5, 0xc2, 0x14, 0x40, 0x05, 0x58, 0x19, 0x8b,
- 0x05, 0x58, 0x11, 0x97, 0x05, 0x58, 0x08, 0x83, 0x00, 0x93, 0x98, 0x87,
- 0x00, 0x93, 0xa0, 0x83, 0x01, 0x6c, 0x30, 0x83, 0x00, 0x99, 0x03, 0x02,
- 0x91, 0xe9, 0x97, 0x00, 0x99, 0x09, 0x8b, 0x00, 0x99, 0x11, 0x87, 0x00,
- 0x99, 0x2b, 0x02, 0x91, 0xed, 0x91, 0x00, 0x99, 0x33, 0x02, 0x91, 0xf1,
- 0xc2, 0x01, 0x09, 0x00, 0x99, 0x38, 0x83, 0x00, 0x99, 0xc3, 0x02, 0x91,
- 0xf5, 0x97, 0x00, 0x99, 0xc9, 0x8b, 0x00, 0x99, 0xd1, 0x87, 0x00, 0x99,
- 0xeb, 0x02, 0x91, 0xf9, 0x91, 0x00, 0x99, 0xf1, 0xc2, 0x01, 0x09, 0x00,
- 0x99, 0xf8, 0x83, 0x00, 0x9a, 0x03, 0x02, 0x91, 0xfd, 0x97, 0x00, 0x9a,
- 0x09, 0x8b, 0x00, 0x9a, 0x11, 0x87, 0x00, 0x9a, 0x2b, 0x02, 0x92, 0x01,
- 0x91, 0x00, 0x9a, 0x32, 0x02, 0x92, 0x05, 0x83, 0x00, 0x90, 0x58, 0x87,
- 0x00, 0x90, 0x60, 0x83, 0x01, 0x6c, 0x00, 0x83, 0x00, 0x90, 0xd8, 0x87,
- 0x00, 0x90, 0xe0, 0x83, 0x01, 0x6c, 0x08, 0x83, 0x00, 0x90, 0xf9, 0x8b,
- 0x00, 0x9c, 0x09, 0x87, 0x00, 0x9c, 0x1a, 0x02, 0x92, 0x09, 0x83, 0x00,
- 0x91, 0x03, 0x02, 0x92, 0x0d, 0x97, 0x00, 0x91, 0x09, 0x8b, 0x00, 0x91,
- 0x11, 0x87, 0x00, 0x91, 0x2b, 0x02, 0x92, 0x11, 0x91, 0x00, 0x91, 0x31,
- 0xc2, 0x01, 0x09, 0x00, 0x91, 0x38, 0x83, 0x00, 0x91, 0x98, 0x87, 0x00,
- 0x91, 0xa1, 0x48, 0xbf, 0xa5, 0x42, 0x92, 0x15, 0x83, 0x01, 0x6c, 0x18,
- 0x83, 0x00, 0x91, 0xc3, 0x02, 0x92, 0x2d, 0x97, 0x00, 0x91, 0xc9, 0x8b,
- 0x00, 0x91, 0xd1, 0x87, 0x00, 0x91, 0xeb, 0x02, 0x92, 0x31, 0x91, 0x00,
- 0x91, 0xf3, 0x02, 0x92, 0x35, 0xc2, 0x01, 0x09, 0x00, 0x91, 0xf8, 0x83,
- 0x01, 0x6d, 0x03, 0x02, 0x92, 0x39, 0x97, 0x01, 0x6d, 0x09, 0x8b, 0x01,
- 0x6d, 0x11, 0x87, 0x01, 0x6d, 0x2b, 0x02, 0x92, 0x3d, 0x91, 0x01, 0x6d,
- 0x30, 0x83, 0x00, 0x91, 0x58, 0x87, 0x00, 0x91, 0x60, 0x83, 0x01, 0x6c,
- 0x10, 0x83, 0x00, 0x92, 0x18, 0x87, 0x00, 0x92, 0x20, 0x83, 0x00, 0x92,
- 0x38, 0x83, 0x00, 0x92, 0x43, 0x02, 0x92, 0x41, 0x8b, 0x00, 0x92, 0x51,
- 0x87, 0x00, 0x92, 0x6a, 0x02, 0x92, 0x45, 0x83, 0x00, 0x92, 0x83, 0x02,
- 0x92, 0x49, 0x97, 0x00, 0x92, 0x89, 0x8b, 0x00, 0x92, 0x91, 0x87, 0x00,
- 0x92, 0xab, 0x02, 0x92, 0x4d, 0x91, 0x00, 0x92, 0xb1, 0x19, 0x42, 0x92,
- 0x51, 0x83, 0x01, 0x6e, 0x03, 0x02, 0x92, 0x63, 0x97, 0x01, 0x6e, 0x09,
- 0x8b, 0x01, 0x6e, 0x11, 0x87, 0x01, 0x6e, 0x2b, 0x02, 0x92, 0x67, 0x91,
- 0x01, 0x6e, 0x30, 0x83, 0x00, 0x93, 0x58, 0x87, 0x00, 0x93, 0x60, 0x83,
- 0x00, 0x94, 0x18, 0x87, 0x00, 0x94, 0x20, 0x83, 0x00, 0x94, 0x38, 0x83,
- 0x00, 0x94, 0x43, 0x02, 0x92, 0x6b, 0x8b, 0x00, 0x94, 0x51, 0x87, 0x00,
- 0x94, 0x6a, 0x02, 0x92, 0x6f, 0x83, 0x01, 0x6e, 0x83, 0x02, 0x92, 0x73,
- 0x97, 0x01, 0x6e, 0x89, 0x8b, 0x01, 0x6e, 0x91, 0x87, 0x01, 0x6e, 0xab,
- 0x02, 0x92, 0x77, 0x91, 0x01, 0x6e, 0xb0, 0x83, 0x00, 0x94, 0x98, 0x87,
- 0x00, 0x94, 0xa0, 0x83, 0x01, 0x6c, 0x40, 0x83, 0x00, 0x94, 0xc3, 0x02,
- 0x92, 0x7b, 0x97, 0x00, 0x94, 0xc9, 0x8b, 0x00, 0x94, 0xd1, 0x87, 0x00,
- 0x94, 0xeb, 0x02, 0x92, 0x7f, 0x91, 0x00, 0x94, 0xf3, 0x02, 0x92, 0x83,
- 0xc2, 0x01, 0x09, 0x00, 0x94, 0xf8, 0x83, 0x00, 0x95, 0x58, 0x87, 0x00,
- 0x95, 0x60, 0x83, 0x00, 0x95, 0x78, 0x83, 0x00, 0x95, 0x83, 0x02, 0x92,
- 0x87, 0x8b, 0x00, 0x95, 0x91, 0x87, 0x00, 0x95, 0xaa, 0x02, 0x92, 0x8b,
- 0x83, 0x00, 0x95, 0xc3, 0x02, 0x92, 0x8f, 0x97, 0x00, 0x95, 0xc9, 0x8b,
- 0x00, 0x95, 0xd1, 0x87, 0x00, 0x95, 0xeb, 0x02, 0x92, 0x93, 0x91, 0x00,
- 0x95, 0xf1, 0x19, 0x42, 0x92, 0x97, 0x83, 0x01, 0x6e, 0x43, 0x02, 0x92,
- 0xa9, 0x97, 0x01, 0x6e, 0x49, 0x8b, 0x01, 0x6e, 0x51, 0x87, 0x01, 0x6e,
- 0x6b, 0x02, 0x92, 0xad, 0x91, 0x01, 0x6e, 0x70, 0x83, 0x00, 0x96, 0x58,
- 0x87, 0x00, 0x96, 0x60, 0x83, 0x00, 0x96, 0x78, 0x83, 0x00, 0x99, 0x83,
- 0x02, 0x92, 0xb1, 0x97, 0x00, 0x99, 0x89, 0x8b, 0x00, 0x99, 0x91, 0x87,
- 0x00, 0x99, 0xab, 0x02, 0x92, 0xbb, 0x91, 0x00, 0x99, 0xb3, 0x02, 0x92,
- 0xbf, 0xc2, 0x01, 0x09, 0x00, 0x99, 0xb8, 0x83, 0x00, 0x9a, 0x98, 0x87,
- 0x00, 0x9a, 0xa0, 0x83, 0x01, 0x6c, 0x90, 0x83, 0x00, 0x9a, 0xb9, 0x8b,
- 0x00, 0x9c, 0x69, 0x87, 0x00, 0x9c, 0x7a, 0x02, 0x92, 0xc3, 0x83, 0x00,
- 0x96, 0xd8, 0x87, 0x00, 0x96, 0xe0, 0x83, 0x01, 0x6c, 0x58, 0x83, 0x00,
- 0x97, 0x03, 0x02, 0x92, 0xc7, 0x97, 0x00, 0x97, 0x09, 0x8b, 0x00, 0x97,
- 0x11, 0x87, 0x00, 0x97, 0x2b, 0x02, 0x92, 0xcb, 0x91, 0x00, 0x97, 0x31,
- 0xc2, 0x01, 0x09, 0x00, 0x97, 0x38, 0x83, 0x01, 0x6d, 0x83, 0x02, 0x92,
- 0xcf, 0x97, 0x01, 0x6d, 0x89, 0x8b, 0x01, 0x6d, 0x91, 0x87, 0x01, 0x6d,
- 0xab, 0x02, 0x92, 0xd3, 0x91, 0x01, 0x6d, 0xb0, 0x83, 0x00, 0x97, 0x58,
- 0x87, 0x00, 0x97, 0x60, 0x83, 0x00, 0x97, 0x78, 0x83, 0x00, 0x98, 0x18,
- 0x87, 0x00, 0x98, 0x20, 0x83, 0x01, 0x6c, 0x70, 0x83, 0x00, 0x9a, 0x58,
- 0x87, 0x00, 0x9a, 0x60, 0x83, 0x00, 0x9a, 0x79, 0x8b, 0x00, 0x9c, 0x49,
- 0x87, 0x00, 0x9c, 0x5a, 0x02, 0x92, 0xd7, 0xd5, 0x34, 0xb7, 0x00, 0x9a,
- 0xe9, 0xc4, 0x00, 0xba, 0x00, 0x9a, 0xf8, 0xc7, 0x08, 0xca, 0x01, 0x3e,
- 0x91, 0xc9, 0x03, 0x68, 0x01, 0x56, 0xc8, 0xd6, 0x2c, 0x83, 0x01, 0x17,
- 0xc9, 0xc8, 0x4f, 0x39, 0x01, 0x17, 0xc1, 0xc7, 0x76, 0x66, 0x01, 0x17,
- 0xb1, 0xc9, 0x17, 0x7a, 0x01, 0x17, 0xa9, 0x48, 0x03, 0x3b, 0xc2, 0x92,
- 0xdb, 0xd6, 0x2b, 0xd3, 0x01, 0x17, 0x90, 0xc3, 0xdf, 0x4a, 0x08, 0x7f,
- 0x89, 0xc4, 0xd9, 0x77, 0x08, 0x7f, 0x70, 0xc6, 0x01, 0xe1, 0x00, 0x00,
- 0xb8, 0xc8, 0xb7, 0x45, 0x01, 0x16, 0xf9, 0xc8, 0xb7, 0x15, 0x01, 0x16,
- 0xf1, 0xcc, 0x00, 0x9b, 0x01, 0x16, 0xe9, 0xc9, 0x0a, 0x4a, 0x01, 0x16,
- 0xe0, 0x03, 0xc2, 0x92, 0xe1, 0x45, 0x00, 0x6c, 0x42, 0x92, 0xf0, 0x97,
- 0x08, 0xec, 0xa1, 0x8b, 0x08, 0xec, 0x89, 0x83, 0x08, 0xec, 0x50, 0x97,
- 0x08, 0xec, 0x70, 0x8b, 0x08, 0xec, 0x60, 0xc2, 0x00, 0xa4, 0x08, 0xec,
- 0x19, 0x83, 0x08, 0xec, 0x10, 0xc2, 0x00, 0xa4, 0x08, 0xeb, 0xf1, 0x83,
- 0x08, 0xeb, 0xe8, 0x83, 0x00, 0x50, 0xb1, 0xc2, 0x00, 0xa4, 0x00, 0x52,
- 0xc8, 0x83, 0x00, 0x50, 0xc1, 0xc2, 0x00, 0xa4, 0x00, 0x52, 0xd0, 0x83,
- 0x00, 0x50, 0xf9, 0xc2, 0x00, 0xa4, 0x00, 0x51, 0x00, 0x83, 0x00, 0x51,
- 0x09, 0xc2, 0x00, 0xa4, 0x00, 0x51, 0x10, 0x94, 0x00, 0x54, 0x5b, 0x02,
- 0x93, 0x06, 0x8e, 0x00, 0x54, 0x62, 0x02, 0x93, 0x0a, 0x83, 0x00, 0x54,
- 0xf9, 0xc2, 0x00, 0xa4, 0x00, 0x55, 0x00, 0x83, 0x00, 0x55, 0x09, 0xc2,
- 0x00, 0xa4, 0x00, 0x55, 0x10, 0x83, 0x00, 0x55, 0xf1, 0x8b, 0x00, 0x56,
- 0x41, 0x97, 0x00, 0x56, 0x60, 0x8b, 0x00, 0x56, 0x00, 0x97, 0x00, 0x56,
- 0x10, 0x94, 0x00, 0x56, 0x1b, 0x02, 0x93, 0x0e, 0x8e, 0x00, 0x57, 0x12,
- 0x02, 0x93, 0x12, 0x87, 0x00, 0x56, 0x29, 0x91, 0x00, 0x56, 0x48, 0xcd,
- 0x80, 0x06, 0x0e, 0x92, 0x29, 0xcc, 0x82, 0xcc, 0x08, 0x0c, 0x08, 0x5b,
- 0x16, 0x3f, 0xc2, 0x93, 0x16, 0xcc, 0x82, 0xc0, 0x08, 0x0c, 0x68, 0x55,
- 0x32, 0xaa, 0xc2, 0x93, 0x3e, 0xc4, 0x07, 0xa6, 0x00, 0xff, 0x78, 0xc4,
- 0x5a, 0x03, 0x00, 0xff, 0xf3, 0x02, 0x93, 0x6b, 0x49, 0x63, 0xbd, 0xc2,
- 0x93, 0x71, 0xcb, 0x93, 0xa2, 0x08, 0x0b, 0xd8, 0xc3, 0x43, 0x19, 0x00,
- 0xff, 0xe9, 0x43, 0x00, 0x8c, 0xc2, 0x93, 0x7d, 0xc8, 0xbc, 0x8d, 0x08,
- 0x0b, 0xe1, 0xca, 0xa4, 0x50, 0x08, 0x0c, 0x20, 0x0e, 0xc2, 0x93, 0x8c,
- 0xca, 0x9d, 0xa2, 0x00, 0x1e, 0x79, 0xcc, 0x89, 0x80, 0x00, 0x1f, 0xa1,
- 0x49, 0x10, 0x8f, 0xc2, 0x93, 0x98, 0xda, 0x1c, 0x75, 0x00, 0x1f, 0xf0,
- 0x45, 0x04, 0x74, 0xc2, 0x93, 0xa4, 0x56, 0x2c, 0xc5, 0xc2, 0x93, 0xb6,
- 0xcc, 0x8c, 0xc8, 0x08, 0x0c, 0x61, 0xcd, 0x7e, 0x80, 0x08, 0x0d, 0x00,
- 0xc4, 0x7f, 0x43, 0x00, 0xfd, 0xfb, 0x02, 0x93, 0xd4, 0xca, 0x95, 0x50,
- 0x00, 0xfe, 0x01, 0xcd, 0x44, 0xd2, 0x00, 0xfd, 0xf1, 0xc8, 0x9d, 0xa4,
- 0x00, 0x1e, 0xb1, 0xc9, 0xaa, 0xa5, 0x00, 0x1e, 0xa8, 0xc6, 0x5c, 0x7c,
- 0x00, 0xfd, 0xe9, 0x03, 0xc2, 0x93, 0xda, 0xd0, 0x60, 0x22, 0x08, 0x0c,
- 0x10, 0x46, 0x02, 0x91, 0xc2, 0x93, 0xe6, 0xd1, 0x56, 0x5c, 0x00, 0x1b,
- 0xa9, 0x46, 0x12, 0x0b, 0xc2, 0x94, 0x02, 0xc9, 0xae, 0x44, 0x08, 0x0c,
- 0x18, 0xcc, 0x48, 0x1e, 0x00, 0x1b, 0xd1, 0xc8, 0xae, 0x84, 0x08, 0x0b,
- 0xc8, 0xc4, 0x63, 0xc7, 0x00, 0x1c, 0x21, 0x0a, 0xc2, 0x94, 0x0e, 0x43,
- 0x01, 0x47, 0xc2, 0x94, 0x1a, 0xca, 0xa3, 0xba, 0x08, 0x0b, 0xd1, 0xd1,
- 0x55, 0xe5, 0x08, 0x0c, 0x48, 0xc9, 0xad, 0x00, 0x00, 0x1c, 0x39, 0x4a,
- 0x9c, 0x62, 0xc2, 0x94, 0x26, 0x14, 0x42, 0x94, 0x58, 0x43, 0x68, 0x16,
- 0xc2, 0x94, 0x64, 0xdd, 0x12, 0x02, 0x00, 0x1f, 0xb0, 0xce, 0x6c, 0xc7,
- 0x08, 0x0b, 0xf9, 0xce, 0x74, 0x37, 0x08, 0x0c, 0x00, 0xcb, 0x1e, 0x50,
- 0x00, 0x1e, 0x91, 0xd5, 0x35, 0x4a, 0x00, 0x1e, 0x99, 0xd9, 0x1e, 0x42,
- 0x00, 0x1e, 0xa0, 0xca, 0x37, 0x0e, 0x01, 0x17, 0x39, 0xc5, 0x07, 0x62,
- 0x01, 0x13, 0x48, 0xc9, 0x0a, 0x4a, 0x01, 0x13, 0xb9, 0x43, 0x00, 0x8f,
- 0xc2, 0x94, 0x70, 0xd0, 0x60, 0x72, 0x01, 0x53, 0xf3, 0x02, 0x94, 0x7c,
- 0xcb, 0x1a, 0xd9, 0x01, 0x54, 0x30, 0xc9, 0x07, 0x5e, 0x01, 0x13, 0x39,
- 0xd1, 0x52, 0x3e, 0x01, 0x55, 0x20, 0xd0, 0x01, 0xf7, 0x01, 0x4b, 0xc1,
- 0x06, 0xc2, 0x94, 0x82, 0x15, 0xc2, 0x94, 0x88, 0x0e, 0x42, 0x94, 0x94,
- 0xd8, 0x24, 0x18, 0x01, 0x54, 0x41, 0xcf, 0x62, 0x54, 0x01, 0x54, 0x50,
- 0x8e, 0x08, 0x9b, 0x13, 0x02, 0x94, 0x9a, 0x94, 0x08, 0x9a, 0x1a, 0x02,
- 0x94, 0x9e, 0x97, 0x08, 0x9a, 0x61, 0x8b, 0x08, 0x9a, 0x41, 0x83, 0x08,
- 0x99, 0xf0, 0x97, 0x08, 0x9a, 0x10, 0x8b, 0x08, 0x9a, 0x00, 0x47, 0xac,
- 0xc2, 0xc2, 0x94, 0xa2, 0x45, 0x07, 0x8f, 0xc2, 0x94, 0xb0, 0x83, 0x08,
- 0x99, 0xa8, 0x83, 0x08, 0x99, 0xc1, 0xc2, 0x0c, 0x65, 0x08, 0x99, 0xb9,
- 0xc2, 0x00, 0xa4, 0x08, 0x99, 0xb0, 0xc2, 0x00, 0xc7, 0x08, 0x99, 0x99,
- 0x83, 0x08, 0x99, 0x90, 0xc2, 0x00, 0xa4, 0x08, 0x99, 0x69, 0x83, 0x08,
- 0x99, 0x60, 0xc2, 0x00, 0xa4, 0x08, 0x99, 0x59, 0x83, 0x08, 0x99, 0x50,
- 0xc2, 0x00, 0xa4, 0x08, 0x99, 0x39, 0x83, 0x08, 0x99, 0x31, 0x06, 0x42,
- 0x94, 0xbc, 0xc2, 0x00, 0xa4, 0x08, 0x99, 0x29, 0x16, 0xc2, 0x94, 0xc6,
- 0x83, 0x08, 0x99, 0x20, 0xc2, 0x1d, 0x5f, 0x08, 0x98, 0xf1, 0xc2, 0x01,
- 0x29, 0x08, 0x98, 0xc9, 0xc2, 0x00, 0xc1, 0x08, 0x99, 0x19, 0x83, 0x08,
- 0x99, 0x40, 0xc2, 0x00, 0xa4, 0x08, 0x98, 0xe9, 0x83, 0x08, 0x98, 0xe0,
- 0xc2, 0x00, 0xa4, 0x08, 0x98, 0xd9, 0x83, 0x08, 0x98, 0xd0, 0xc2, 0x00,
- 0xa4, 0x08, 0x98, 0xc1, 0x83, 0x08, 0x98, 0xb8, 0xc2, 0x00, 0xa4, 0x08,
- 0x98, 0xb1, 0x83, 0x08, 0x98, 0xa8, 0x97, 0x08, 0x98, 0xa1, 0x8b, 0x08,
- 0x98, 0x81, 0x83, 0x08, 0x98, 0x30, 0x97, 0x08, 0x98, 0x50, 0x8b, 0x08,
- 0x98, 0x40, 0xc4, 0x0f, 0x7c, 0x08, 0x9a, 0x69, 0xc5, 0x44, 0x7b, 0x08,
- 0x98, 0x18, 0xc7, 0x76, 0x59, 0x08, 0x99, 0xe9, 0xc7, 0x11, 0x41, 0x08,
- 0x98, 0x10, 0xca, 0x1e, 0x18, 0x08, 0x98, 0x09, 0xd7, 0x11, 0x37, 0x08,
- 0x98, 0x00, 0x15, 0xc2, 0x94, 0xd0, 0xdb, 0x19, 0x33, 0x0f, 0xc9, 0x50,
- 0xc9, 0xaf, 0xf4, 0x00, 0xe5, 0xf9, 0x95, 0x00, 0xe4, 0xd0, 0x03, 0xc2,
- 0x94, 0xdc, 0xc2, 0x1b, 0xd8, 0x00, 0xe5, 0xa9, 0xc2, 0x04, 0x0a, 0x00,
- 0xe5, 0x91, 0x87, 0x00, 0xe5, 0x88, 0xc2, 0x00, 0x4d, 0x00, 0xe5, 0xe9,
- 0xc2, 0x00, 0x34, 0x00, 0xe5, 0xd1, 0x90, 0x00, 0xe4, 0x80, 0xc9, 0xa9,
- 0xb2, 0x00, 0xe5, 0xc9, 0x03, 0x42, 0x94, 0xe7, 0xc4, 0x87, 0xf9, 0x00,
- 0xe5, 0xc1, 0x90, 0x00, 0xe4, 0xa0, 0xc3, 0x00, 0xa4, 0x00, 0xe5, 0x79,
- 0xc2, 0x00, 0x31, 0x00, 0xe5, 0x58, 0x0a, 0xc2, 0x94, 0xef, 0xc2, 0x00,
- 0x31, 0x00, 0xe5, 0x61, 0xc2, 0x00, 0x57, 0x00, 0xe5, 0x50, 0xc3, 0x0d,
- 0xd9, 0x00, 0xe5, 0x41, 0xc2, 0x00, 0x57, 0x00, 0xe5, 0x08, 0xc3, 0x00,
- 0xa4, 0x00, 0xe5, 0x31, 0xc2, 0x00, 0x57, 0x00, 0xe4, 0x90, 0xc3, 0x02,
- 0xd0, 0x00, 0xe5, 0x29, 0xc2, 0x00, 0x57, 0x00, 0xe4, 0xc8, 0xc3, 0x02,
- 0xd0, 0x00, 0xe5, 0x21, 0xc2, 0x00, 0xbb, 0x00, 0xe4, 0xf0, 0xc3, 0x02,
- 0xd0, 0x00, 0xe4, 0xf9, 0xc2, 0x00, 0x4d, 0x00, 0xe4, 0xb0, 0x90, 0x00,
- 0x85, 0x01, 0xc2, 0x00, 0x4d, 0x00, 0x86, 0x68, 0xc2, 0x00, 0x57, 0x00,
- 0x85, 0x11, 0xc3, 0x00, 0xa4, 0x00, 0x85, 0xb0, 0xc2, 0x00, 0x4d, 0x00,
- 0x85, 0x31, 0xc3, 0x02, 0xd0, 0x00, 0x85, 0x78, 0x90, 0x00, 0x85, 0x39,
- 0x94, 0x00, 0x85, 0x90, 0xc2, 0x00, 0x57, 0x00, 0x85, 0x49, 0xc3, 0x02,
- 0xd0, 0x00, 0x85, 0xa8, 0xc2, 0x00, 0xbb, 0x00, 0x85, 0x71, 0xc3, 0x02,
- 0xd0, 0x00, 0x85, 0xa0, 0xc2, 0x00, 0x57, 0x00, 0x85, 0x89, 0xc3, 0x0d,
- 0xd9, 0x00, 0x85, 0xc0, 0x0a, 0xc2, 0x94, 0xfb, 0xc2, 0x00, 0x57, 0x00,
- 0x85, 0xd1, 0xc2, 0x00, 0x31, 0x00, 0x85, 0xe0, 0xc2, 0x00, 0x31, 0x00,
- 0x85, 0xd9, 0xc3, 0x00, 0xa4, 0x00, 0x85, 0xf8, 0x03, 0xc2, 0x95, 0x07,
- 0x87, 0x00, 0x86, 0x09, 0xc2, 0x04, 0x0a, 0x00, 0x86, 0x11, 0xc2, 0x1b,
- 0xd8, 0x00, 0x86, 0x28, 0x90, 0x00, 0x86, 0x81, 0xc2, 0x00, 0x34, 0x00,
- 0x87, 0xd1, 0xc2, 0x00, 0x4d, 0x00, 0x87, 0xe8, 0xc2, 0x00, 0x57, 0x00,
- 0x86, 0x91, 0xc3, 0x00, 0xa4, 0x00, 0x87, 0x30, 0x90, 0x00, 0x86, 0xa1,
- 0xc4, 0x87, 0xf9, 0x00, 0x87, 0xc0, 0xc2, 0x00, 0x4d, 0x00, 0x86, 0xb1,
- 0xc3, 0x02, 0xd0, 0x00, 0x86, 0xf8, 0x03, 0xc2, 0x95, 0x0f, 0xc9, 0xa9,
- 0xb2, 0x00, 0x87, 0xc8, 0xc2, 0x00, 0x57, 0x00, 0x86, 0xc9, 0xc3, 0x02,
- 0xd0, 0x00, 0x87, 0x28, 0x95, 0x00, 0x86, 0xd1, 0xc9, 0xaf, 0xf4, 0x00,
- 0x87, 0xf8, 0xc2, 0x00, 0xbb, 0x00, 0x86, 0xf1, 0xc3, 0x02, 0xd0, 0x00,
- 0x87, 0x20, 0xc2, 0x00, 0x57, 0x00, 0x87, 0x09, 0xc3, 0x0d, 0xd9, 0x00,
- 0x87, 0x40, 0x0a, 0xc2, 0x95, 0x17, 0xc2, 0x00, 0x57, 0x00, 0x87, 0x51,
- 0xc2, 0x00, 0x31, 0x00, 0x87, 0x60, 0xc2, 0x00, 0x31, 0x00, 0x87, 0x59,
- 0xc3, 0x00, 0xa4, 0x00, 0x87, 0x78, 0x03, 0xc2, 0x95, 0x23, 0x87, 0x00,
- 0x87, 0x89, 0xc2, 0x04, 0x0a, 0x00, 0x87, 0x91, 0xc2, 0x1b, 0xd8, 0x00,
- 0x87, 0xa8, 0x90, 0x01, 0x68, 0x01, 0xc2, 0x00, 0x4d, 0x01, 0x69, 0x68,
- 0xc2, 0x00, 0x57, 0x01, 0x68, 0x11, 0xc3, 0x00, 0xa4, 0x01, 0x68, 0xb0,
- 0xc2, 0x00, 0x4d, 0x01, 0x68, 0x31, 0xc3, 0x02, 0xd0, 0x01, 0x68, 0x78,
- 0x90, 0x01, 0x68, 0x39, 0x94, 0x01, 0x68, 0x90, 0xc2, 0x00, 0x57, 0x01,
- 0x68, 0x49, 0xc3, 0x02, 0xd0, 0x01, 0x68, 0xa8, 0xc2, 0x00, 0xbb, 0x01,
- 0x68, 0x71, 0xc3, 0x02, 0xd0, 0x01, 0x68, 0xa0, 0xc2, 0x00, 0x57, 0x01,
- 0x68, 0x89, 0xc3, 0x0d, 0xd9, 0x01, 0x68, 0xc0, 0x0a, 0xc2, 0x95, 0x2e,
- 0xc2, 0x00, 0x57, 0x01, 0x68, 0xd1, 0xc2, 0x00, 0x31, 0x01, 0x68, 0xe0,
- 0xc2, 0x00, 0x31, 0x01, 0x68, 0xd9, 0xc3, 0x00, 0xa4, 0x01, 0x68, 0xf8,
- 0x03, 0xc2, 0x95, 0x3a, 0x87, 0x01, 0x69, 0x09, 0xc2, 0x04, 0x0a, 0x01,
- 0x69, 0x11, 0xc2, 0x1b, 0xd8, 0x01, 0x69, 0x28, 0xc3, 0xe6, 0x8e, 0x01,
- 0x60, 0x09, 0xc6, 0xc7, 0x4d, 0x01, 0x61, 0x40, 0xc4, 0xe5, 0x7f, 0x01,
- 0x60, 0x21, 0xc4, 0xe0, 0x53, 0x01, 0x60, 0x39, 0xc5, 0xde, 0xcb, 0x01,
- 0x60, 0x60, 0x07, 0xc2, 0x95, 0x42, 0xc3, 0x00, 0xb4, 0x01, 0x61, 0x09,
- 0x97, 0x01, 0x61, 0x19, 0x91, 0x01, 0x61, 0x30, 0xc6, 0xd1, 0xd1, 0x01,
- 0x60, 0x31, 0xc5, 0xd8, 0x59, 0x01, 0x60, 0x40, 0x42, 0x23, 0xb6, 0xc2,
- 0x95, 0x4c, 0xcb, 0x91, 0x71, 0x01, 0x60, 0x51, 0x47, 0x1d, 0x45, 0x42,
- 0x95, 0x56, 0xc6, 0xc3, 0xdb, 0x01, 0x60, 0x71, 0xcf, 0x61, 0xdc, 0x01,
- 0x61, 0x70, 0xc2, 0x01, 0xc6, 0x01, 0x60, 0x89, 0xc2, 0x00, 0x16, 0x01,
- 0x60, 0xc8, 0xc5, 0xcf, 0x14, 0x01, 0x60, 0x91, 0x87, 0x01, 0x60, 0xd0,
- 0xc4, 0xe6, 0x1b, 0x01, 0x60, 0xa1, 0x0a, 0xc2, 0x95, 0x62, 0xc9, 0xb4,
- 0x8f, 0x01, 0x61, 0x11, 0xc8, 0xa9, 0x8f, 0x01, 0x61, 0x22, 0x02, 0x95,
- 0x6f, 0xc5, 0xd9, 0x8a, 0x01, 0x60, 0xa9, 0xc2, 0x00, 0xcb, 0x01, 0x60,
- 0xe1, 0xcb, 0x95, 0xe9, 0x01, 0x61, 0x68, 0xc4, 0xaf, 0x43, 0x01, 0x60,
- 0xb9, 0xc3, 0x04, 0x44, 0x01, 0x61, 0x50, 0xc5, 0x76, 0x1a, 0x01, 0x60,
- 0xe9, 0xcd, 0x76, 0x12, 0x01, 0x61, 0x78, 0xc3, 0xe6, 0x8e, 0x01, 0x61,
- 0x89, 0xc6, 0xc7, 0x4d, 0x01, 0x62, 0xc0, 0xc4, 0xe5, 0x7f, 0x01, 0x61,
- 0xa1, 0xc4, 0xe0, 0x53, 0x01, 0x61, 0xb9, 0xc5, 0xde, 0xcb, 0x01, 0x61,
- 0xe0, 0x07, 0xc2, 0x95, 0x75, 0xc3, 0x00, 0xb4, 0x01, 0x62, 0x89, 0x97,
- 0x01, 0x62, 0x99, 0x91, 0x01, 0x62, 0xb0, 0xc6, 0xd1, 0xd1, 0x01, 0x61,
- 0xb1, 0xc5, 0xd8, 0x59, 0x01, 0x61, 0xc0, 0x42, 0x23, 0xb6, 0xc2, 0x95,
- 0x7f, 0xcb, 0x91, 0x71, 0x01, 0x61, 0xd1, 0x47, 0x1d, 0x45, 0x42, 0x95,
- 0x89, 0xc6, 0xc3, 0xdb, 0x01, 0x61, 0xf1, 0xcf, 0x61, 0xdc, 0x01, 0x62,
- 0xf0, 0xc2, 0x01, 0xc6, 0x01, 0x62, 0x09, 0xc2, 0x00, 0x16, 0x01, 0x62,
- 0x48, 0xc5, 0xcf, 0x14, 0x01, 0x62, 0x11, 0x87, 0x01, 0x62, 0x50, 0xc4,
- 0xe6, 0x1b, 0x01, 0x62, 0x21, 0x0a, 0xc2, 0x95, 0x95, 0xc9, 0xb4, 0x8f,
- 0x01, 0x62, 0x91, 0xc8, 0xa9, 0x8f, 0x01, 0x62, 0xa2, 0x02, 0x95, 0xa2,
- 0xc5, 0xd9, 0x8a, 0x01, 0x62, 0x29, 0xc2, 0x00, 0xcb, 0x01, 0x62, 0x61,
- 0xcb, 0x95, 0xe9, 0x01, 0x62, 0xe8, 0xc4, 0xaf, 0x43, 0x01, 0x62, 0x39,
- 0xc3, 0x04, 0x44, 0x01, 0x62, 0xd0, 0xc5, 0x76, 0x1a, 0x01, 0x62, 0x69,
- 0xcd, 0x76, 0x12, 0x01, 0x62, 0xf8, 0xc7, 0x11, 0x41, 0x00, 0x58, 0x11,
- 0xc7, 0x76, 0x59, 0x00, 0x59, 0xe8, 0xc5, 0x44, 0x7b, 0x00, 0x58, 0x19,
- 0xc4, 0x0f, 0x7c, 0x00, 0x5a, 0x68, 0x83, 0x00, 0x58, 0x31, 0x8b, 0x00,
- 0x58, 0x81, 0x97, 0x00, 0x58, 0xa0, 0x8b, 0x00, 0x58, 0x40, 0x97, 0x00,
- 0x58, 0x50, 0x47, 0xac, 0xc2, 0xc2, 0x95, 0xa8, 0x83, 0x00, 0x59, 0xa8,
- 0x83, 0x00, 0x58, 0xa9, 0xc2, 0x00, 0xa4, 0x00, 0x58, 0xb0, 0x83, 0x00,
- 0x58, 0xb9, 0xc2, 0x00, 0xa4, 0x00, 0x58, 0xc0, 0xc2, 0x01, 0x29, 0x00,
- 0x58, 0xc9, 0xc2, 0x1d, 0x5f, 0x00, 0x58, 0xf1, 0xc2, 0x00, 0xc1, 0x00,
- 0x59, 0x19, 0x83, 0x00, 0x59, 0x40, 0x83, 0x00, 0x58, 0xd1, 0xc2, 0x00,
- 0xa4, 0x00, 0x58, 0xd8, 0x83, 0x00, 0x58, 0xe1, 0xc2, 0x00, 0xa4, 0x00,
- 0x58, 0xe8, 0x16, 0xc2, 0x95, 0xb6, 0x83, 0x00, 0x59, 0x21, 0xc2, 0x00,
- 0xa4, 0x00, 0x59, 0x28, 0x06, 0xc2, 0x95, 0xc0, 0x83, 0x00, 0x59, 0x31,
- 0xc2, 0x00, 0xa4, 0x00, 0x59, 0x38, 0x83, 0x00, 0x59, 0x51, 0xc2, 0x00,
- 0xa4, 0x00, 0x59, 0x58, 0x83, 0x00, 0x59, 0x61, 0xc2, 0x00, 0xa4, 0x00,
- 0x59, 0x68, 0x83, 0x00, 0x59, 0x79, 0xc2, 0x1d, 0x5f, 0x00, 0x5a, 0xf8,
- 0x83, 0x00, 0x59, 0x81, 0xc2, 0x02, 0x59, 0x00, 0x5a, 0xe1, 0xc2, 0x00,
- 0xa4, 0x00, 0x5a, 0xe8, 0x83, 0x00, 0x59, 0x91, 0xc2, 0x00, 0xc7, 0x00,
- 0x59, 0x98, 0xc2, 0x00, 0xa4, 0x00, 0x59, 0xb1, 0xc2, 0x0c, 0x65, 0x00,
- 0x59, 0xb9, 0x83, 0x00, 0x59, 0xc0, 0x83, 0x00, 0x59, 0xf1, 0x8b, 0x00,
- 0x5a, 0x41, 0x97, 0x00, 0x5a, 0x60, 0x8b, 0x00, 0x5a, 0x00, 0x97, 0x00,
- 0x5a, 0x10, 0x94, 0x00, 0x5a, 0x1b, 0x02, 0x95, 0xca, 0x8e, 0x00, 0x5b,
- 0x12, 0x02, 0x95, 0xce, 0xc2, 0x01, 0x47, 0x00, 0x5b, 0x41, 0xc4, 0x04,
- 0x5e, 0x00, 0x5b, 0x48, 0xc3, 0x06, 0x9e, 0x00, 0x5b, 0x51, 0xc3, 0x0c,
- 0x5b, 0x00, 0x5b, 0x58, 0xc2, 0x26, 0x51, 0x00, 0x5b, 0x61, 0xc4, 0x18,
- 0x83, 0x00, 0x5b, 0x68, 0xc7, 0x0a, 0xb9, 0x00, 0x5b, 0x91, 0xc4, 0x03,
- 0x2b, 0x00, 0x5b, 0x99, 0xc9, 0x6b, 0x69, 0x00, 0x5b, 0xa9, 0xc6, 0x01,
- 0xdb, 0x00, 0x5b, 0xb0, 0xc8, 0x0a, 0xb9, 0x00, 0x5b, 0xa1, 0xca, 0xa7,
- 0x5c, 0x00, 0x5b, 0xb8, 0xc3, 0x04, 0x5f, 0x0f, 0x68, 0x1b, 0x02, 0x95,
- 0xd2, 0xc4, 0x0c, 0x55, 0x0f, 0x68, 0x62, 0x02, 0x95, 0xd6, 0x91, 0x0f,
- 0x68, 0x13, 0x02, 0x95, 0xdc, 0xc4, 0x18, 0x85, 0x0f, 0x68, 0x5a, 0x02,
- 0x95, 0xe0, 0xc9, 0x4f, 0xff, 0x0f, 0x69, 0x28, 0xc2, 0x02, 0x53, 0x0f,
- 0x68, 0x23, 0x02, 0x95, 0xe6, 0xc3, 0x0c, 0x56, 0x0f, 0x68, 0x6a, 0x02,
- 0x95, 0xea, 0xc2, 0x00, 0x7b, 0x0f, 0x68, 0x2b, 0x02, 0x95, 0xf0, 0xc3,
- 0x43, 0xcd, 0x0f, 0x68, 0x72, 0x02, 0x95, 0xf4, 0xc7, 0x0c, 0x4b, 0x0f,
- 0x68, 0x99, 0xc8, 0x50, 0x00, 0x0f, 0x68, 0xe0, 0xc2, 0x0c, 0x57, 0x0f,
- 0x68, 0x7b, 0x02, 0x95, 0xfa, 0x00, 0x42, 0x96, 0x00, 0xc2, 0x0c, 0x57,
- 0x0f, 0x68, 0x83, 0x02, 0x96, 0x0c, 0x00, 0x42, 0x96, 0x12, 0xc9, 0x4f,
- 0xff, 0x0f, 0x69, 0x60, 0xc7, 0x0c, 0x4b, 0x0f, 0x68, 0xd1, 0xc8, 0x50,
- 0x00, 0x0f, 0x69, 0x18, 0xc9, 0x4f, 0xff, 0x0f, 0x69, 0x68, 0xc7, 0x0c,
- 0x4b, 0x0f, 0x68, 0xd9, 0xc8, 0x50, 0x00, 0x0f, 0x69, 0x20, 0xc9, 0x4f,
- 0xff, 0x0f, 0x69, 0xd0, 0xc9, 0x4f, 0xff, 0x0f, 0x69, 0xd8, 0xc8, 0x0c,
- 0x4a, 0x0f, 0x69, 0xc0, 0xc8, 0x0c, 0x4a, 0x0f, 0x69, 0xc8, 0xc6, 0x31,
- 0x53, 0x01, 0x3e, 0x21, 0xc4, 0x0f, 0x20, 0x01, 0x3e, 0x18, 0xd8, 0x22,
- 0xe0, 0x01, 0x39, 0xe1, 0xc8, 0x0b, 0x7f, 0x01, 0x39, 0x91, 0xca, 0x21,
- 0x3e, 0x01, 0x39, 0x59, 0xc5, 0x0d, 0xbc, 0x01, 0x38, 0xd8, 0x9a, 0x01,
- 0x21, 0x19, 0xc2, 0x01, 0x04, 0x0f, 0xa6, 0xb0, 0xc5, 0x57, 0xd8, 0x0f,
- 0xae, 0x09, 0xca, 0xa1, 0x08, 0x0f, 0xa6, 0x10, 0xcc, 0x87, 0x4c, 0x0f,
- 0xa7, 0x69, 0xcb, 0x8e, 0xa6, 0x0f, 0xa7, 0x60, 0xcd, 0x7d, 0x89, 0x01,
- 0x1c, 0x81, 0xcd, 0x79, 0xba, 0x01, 0x1c, 0x78, 0xc9, 0x38, 0x82, 0x08,
- 0x7c, 0x49, 0x44, 0x01, 0x59, 0xc2, 0x96, 0x1e, 0xc3, 0x00, 0xe8, 0x08,
- 0x7c, 0x30, 0x49, 0x01, 0x59, 0xc2, 0x96, 0x2a, 0x44, 0x01, 0xb8, 0x42,
- 0x96, 0x36, 0x0e, 0xc2, 0x96, 0x42, 0xc3, 0x26, 0xf9, 0x08, 0x7c, 0x01,
- 0xc2, 0x00, 0x27, 0x08, 0x7b, 0xe1, 0x15, 0xc2, 0x96, 0x4e, 0xc3, 0x1f,
- 0xd8, 0x08, 0x7b, 0xd1, 0xc3, 0x0b, 0x0e, 0x08, 0x7b, 0xc9, 0xc4, 0xe2,
- 0x57, 0x08, 0x7b, 0xb9, 0xc4, 0x4b, 0x98, 0x08, 0x7b, 0xb1, 0xca, 0x9c,
- 0xc6, 0x08, 0x7b, 0xa9, 0xc5, 0x4b, 0x92, 0x08, 0x7b, 0xa1, 0xc3, 0x78,
- 0xa9, 0x08, 0x7b, 0x99, 0xca, 0xa1, 0x94, 0x08, 0x7b, 0x91, 0xc4, 0xe4,
- 0x8f, 0x08, 0x7b, 0x89, 0xc5, 0xa8, 0xf1, 0x08, 0x7b, 0x81, 0xc4, 0x5d,
- 0xe2, 0x08, 0x7b, 0xf0, 0xd1, 0x52, 0xd7, 0x08, 0x79, 0x31, 0x47, 0x33,
- 0xef, 0xc2, 0x96, 0x58, 0x0e, 0x42, 0x96, 0x69, 0x43, 0x2f, 0x53, 0xc2,
- 0x96, 0x75, 0x47, 0x02, 0x90, 0x42, 0x96, 0x81, 0xc3, 0x0a, 0xe1, 0x08,
- 0x67, 0xe1, 0x42, 0x00, 0x60, 0xc2, 0x96, 0xde, 0xc3, 0x01, 0xb4, 0x08,
- 0x67, 0xd2, 0x02, 0x96, 0xea, 0x97, 0x08, 0x67, 0x53, 0x02, 0x96, 0xee,
- 0x87, 0x08, 0x66, 0x4b, 0x02, 0x96, 0xfc, 0x4a, 0xa2, 0x66, 0xc2, 0x97,
- 0x5c, 0x4b, 0x91, 0xa8, 0xc2, 0x97, 0x68, 0xc8, 0xbe, 0x65, 0x08, 0x67,
- 0x19, 0x91, 0x08, 0x66, 0xdb, 0x02, 0x97, 0x74, 0x83, 0x08, 0x66, 0x03,
- 0x02, 0x97, 0x7e, 0x8b, 0x08, 0x66, 0x83, 0x02, 0x97, 0x92, 0xc7, 0xc8,
- 0xaa, 0x08, 0x66, 0x50, 0x87, 0x08, 0x64, 0x4b, 0x02, 0x97, 0x96, 0xc8,
- 0xbe, 0x65, 0x08, 0x65, 0x19, 0x91, 0x08, 0x64, 0xdb, 0x02, 0x97, 0xf6,
- 0x4a, 0xa2, 0x66, 0xc2, 0x98, 0x00, 0x4b, 0x91, 0xa8, 0xc2, 0x98, 0x0c,
- 0x97, 0x08, 0x65, 0x53, 0x02, 0x98, 0x18, 0x83, 0x08, 0x64, 0x03, 0x02,
- 0x98, 0x26, 0x8b, 0x08, 0x64, 0x83, 0x02, 0x98, 0x3a, 0xc7, 0xc8, 0xaa,
- 0x08, 0x64, 0x50, 0xc4, 0xe3, 0x27, 0x08, 0x62, 0x41, 0x91, 0x08, 0x60,
- 0x33, 0x02, 0x98, 0x3e, 0x83, 0x08, 0x60, 0x03, 0x02, 0x98, 0x51, 0x07,
- 0xc2, 0x98, 0x80, 0x8b, 0x08, 0x60, 0x1a, 0x02, 0x98, 0xa0, 0x83, 0x08,
- 0x60, 0x0b, 0x02, 0x98, 0xa8, 0x87, 0x08, 0x60, 0x2b, 0x02, 0x98, 0xdb,
- 0x11, 0xc2, 0x98, 0xed, 0x8b, 0x08, 0x60, 0x22, 0x02, 0x98, 0xf8, 0x16,
- 0xc2, 0x98, 0xfc, 0xc3, 0x01, 0xb4, 0x08, 0x54, 0xe8, 0x42, 0x02, 0xb4,
- 0xc2, 0x99, 0x08, 0x16, 0xc2, 0x99, 0x12, 0xc3, 0x2e, 0x60, 0x08, 0x54,
- 0xd1, 0x09, 0xc2, 0x99, 0x22, 0x42, 0x0b, 0xc6, 0xc2, 0x99, 0x2e, 0x43,
- 0xe7, 0xab, 0xc2, 0x99, 0x36, 0xc3, 0x78, 0xa9, 0x08, 0x54, 0x29, 0xc3,
- 0x0f, 0x60, 0x08, 0x54, 0x21, 0xc4, 0x19, 0x9d, 0x08, 0x54, 0x19, 0x0a,
- 0xc2, 0x99, 0x42, 0xc3, 0x0f, 0x69, 0x08, 0x54, 0x09, 0xc3, 0x6f, 0x91,
- 0x08, 0x54, 0x39, 0xc3, 0x88, 0x60, 0x08, 0x54, 0x41, 0x0d, 0xc2, 0x99,
- 0x4e, 0xc4, 0x3a, 0x8e, 0x08, 0x54, 0x61, 0xc3, 0x0f, 0xd8, 0x08, 0x54,
- 0x71, 0xc3, 0xb2, 0x7c, 0x08, 0x54, 0x81, 0x03, 0x42, 0x99, 0x5a, 0xcd,
- 0x7d, 0xfe, 0x0f, 0xad, 0x99, 0x44, 0x05, 0xe2, 0x42, 0x99, 0x66, 0xc2,
- 0x00, 0x57, 0x08, 0x1a, 0x81, 0xc3, 0x31, 0xb5, 0x08, 0x1a, 0x89, 0xc3,
- 0x42, 0x86, 0x08, 0x1a, 0x91, 0x06, 0xc2, 0x99, 0x78, 0x87, 0x08, 0x1a,
- 0xa3, 0x02, 0x99, 0x82, 0x1c, 0xc2, 0x99, 0x86, 0x8b, 0x08, 0x1a, 0xcb,
- 0x02, 0x99, 0x92, 0xc4, 0xe2, 0x2b, 0x08, 0x1a, 0xd1, 0xc3, 0x14, 0xc0,
- 0x08, 0x1a, 0xd9, 0xc5, 0xdb, 0xe2, 0x08, 0x1a, 0xe1, 0xc5, 0xda, 0x07,
- 0x08, 0x1a, 0xe9, 0x18, 0xc2, 0x99, 0x9a, 0xc4, 0xe5, 0xeb, 0x08, 0x1a,
- 0xf9, 0xc3, 0x28, 0x41, 0x08, 0x1b, 0x01, 0x15, 0xc2, 0x99, 0xa6, 0x16,
- 0xc2, 0x99, 0xb0, 0x97, 0x08, 0x1b, 0x19, 0xc5, 0xd9, 0x35, 0x08, 0x1b,
- 0x21, 0x1b, 0xc2, 0x99, 0xbc, 0x91, 0x08, 0x1b, 0x4b, 0x02, 0x99, 0xd6,
- 0xc2, 0x00, 0xa4, 0x08, 0x1b, 0x60, 0xc2, 0x00, 0x48, 0x08, 0x18, 0x09,
- 0x0d, 0xc2, 0x99, 0xda, 0xc2, 0x00, 0x06, 0x08, 0x18, 0x19, 0x87, 0x08,
- 0x18, 0x23, 0x02, 0x99, 0xec, 0xc2, 0x00, 0x7b, 0x08, 0x18, 0x29, 0xc2,
- 0x08, 0xc2, 0x08, 0x18, 0x31, 0xc2, 0x01, 0xf0, 0x08, 0x18, 0x39, 0x16,
- 0xc2, 0x9a, 0x10, 0x8b, 0x08, 0x18, 0x4b, 0x02, 0x9a, 0x1a, 0x83, 0x08,
- 0x18, 0x01, 0x91, 0x08, 0x18, 0x79, 0x12, 0xc2, 0x9a, 0x1e, 0x15, 0xc2,
- 0x9a, 0x28, 0x97, 0x08, 0x18, 0xb3, 0x02, 0x9a, 0x34, 0xc3, 0x26, 0x9b,
- 0x08, 0x18, 0xe1, 0xc2, 0x05, 0xd0, 0x08, 0x19, 0x69, 0xcc, 0x84, 0xac,
- 0x08, 0x19, 0x70, 0xc3, 0x01, 0xb4, 0x08, 0x19, 0x01, 0x42, 0x00, 0x60,
- 0xc2, 0x9a, 0x38, 0xc3, 0x0a, 0xe1, 0x08, 0x19, 0x10, 0x83, 0x00, 0xe2,
- 0xf8, 0x99, 0x00, 0xe3, 0x19, 0x8f, 0x00, 0xe3, 0x11, 0x8c, 0x00, 0xe3,
- 0x09, 0x8d, 0x00, 0xe3, 0x00, 0xc7, 0x43, 0x5d, 0x01, 0x5d, 0xd1, 0xd1,
- 0x52, 0xc6, 0x01, 0x5d, 0xd8, 0x90, 0x08, 0x25, 0x90, 0xc3, 0x1b, 0xb6,
- 0x08, 0x25, 0xb1, 0xc2, 0x04, 0x2b, 0x08, 0x25, 0xe9, 0xc2, 0x03, 0xa4,
- 0x08, 0x26, 0x29, 0x16, 0x42, 0x9a, 0x44, 0x83, 0x08, 0x26, 0x51, 0xc2,
- 0x00, 0xa4, 0x08, 0x26, 0x60, 0x90, 0x08, 0x26, 0xd0, 0xc3, 0x1b, 0xb6,
- 0x08, 0x26, 0xf1, 0xc2, 0x04, 0x2b, 0x08, 0x27, 0x29, 0xc2, 0x03, 0xa4,
- 0x08, 0x27, 0x69, 0x16, 0x42, 0x9a, 0x4e, 0x83, 0x08, 0x27, 0x91, 0xc2,
- 0x00, 0xa4, 0x08, 0x27, 0xa0, 0x0d, 0xc2, 0x9a, 0x58, 0xcb, 0x95, 0xbd,
- 0x0e, 0x7d, 0x89, 0xc8, 0x4d, 0xc2, 0x0e, 0x7d, 0x80, 0xc6, 0xcc, 0x91,
- 0x0e, 0x7a, 0x88, 0x0d, 0xc2, 0x9a, 0x64, 0x16, 0xc2, 0x9a, 0x70, 0x44,
- 0xe1, 0xeb, 0xc2, 0x9a, 0x7c, 0x49, 0x7f, 0xd2, 0xc2, 0x9a, 0x89, 0xce,
- 0x64, 0x71, 0x0e, 0x7c, 0xb9, 0x12, 0xc2, 0x9a, 0x96, 0xce, 0x6e, 0xdb,
- 0x0e, 0x7c, 0x98, 0x00, 0x42, 0x9a, 0xa0, 0x00, 0x42, 0x9a, 0xb5, 0x42,
- 0x00, 0x46, 0xc2, 0x9a, 0xc1, 0xc8, 0xbd, 0xf5, 0x0e, 0x7b, 0xf8, 0xcb,
- 0x83, 0xe1, 0x0e, 0x7b, 0xe1, 0xce, 0x64, 0x71, 0x0e, 0x7b, 0xd9, 0xc8,
- 0x4d, 0xc2, 0x0e, 0x7b, 0xd1, 0xc8, 0xbb, 0x05, 0x0e, 0x7b, 0xc8, 0x45,
- 0x4d, 0xbd, 0xc2, 0x9a, 0xcd, 0xce, 0x64, 0x71, 0x0e, 0x7b, 0xb8, 0xc6,
- 0x6e, 0x6b, 0x0e, 0x7b, 0xa1, 0xca, 0x95, 0xbe, 0x0e, 0x7b, 0x98, 0xcc,
- 0x83, 0x14, 0x0e, 0x7d, 0x59, 0xc7, 0xc5, 0x85, 0x0e, 0x7d, 0x51, 0xc3,
- 0xe6, 0xa0, 0x0e, 0x7d, 0x48, 0xc8, 0xbb, 0x6d, 0x0e, 0x79, 0x68, 0xc8,
- 0xc0, 0xf5, 0x0e, 0x79, 0xc8, 0xc9, 0x78, 0x8f, 0x0e, 0x78, 0xc1, 0x43,
- 0x00, 0xb7, 0x42, 0x9a, 0xd9, 0xc5, 0x01, 0xf7, 0x0e, 0x78, 0x89, 0xc4,
- 0x01, 0x1e, 0x0e, 0x78, 0x28, 0xc7, 0x93, 0x2c, 0x0e, 0x79, 0xb3, 0x02,
- 0x9a, 0xe5, 0xc6, 0xd0, 0x15, 0x0e, 0x79, 0x30, 0x15, 0xc2, 0x9a, 0xeb,
- 0x43, 0x00, 0xb7, 0x42, 0x9a, 0xf7, 0xc3, 0xe6, 0x9d, 0x0e, 0x79, 0x51,
- 0xc2, 0x01, 0x64, 0x0e, 0x79, 0x00, 0x43, 0x00, 0xb7, 0xc2, 0x9b, 0x03,
- 0x4d, 0x78, 0x8f, 0x42, 0x9b, 0x0f, 0xc6, 0x41, 0xfa, 0x0e, 0x78, 0xf1,
- 0x42, 0x04, 0x2a, 0x42, 0x9b, 0x1b, 0xc5, 0x01, 0xf7, 0x0e, 0x78, 0x91,
- 0xc4, 0x01, 0x1e, 0x0e, 0x78, 0x30, 0xc6, 0x78, 0x92, 0x0e, 0x78, 0xe9,
- 0x4b, 0x90, 0x27, 0x42, 0x9b, 0x27, 0xc5, 0x01, 0xf7, 0x0e, 0x78, 0xa1,
- 0xc4, 0x01, 0x1e, 0x0e, 0x78, 0x40, 0xc5, 0x01, 0xf7, 0x0e, 0x78, 0x81,
- 0xc4, 0x01, 0x1e, 0x0e, 0x78, 0x20, 0xc5, 0x01, 0xf7, 0x0e, 0x78, 0x69,
- 0xc4, 0x01, 0x1e, 0x0e, 0x78, 0x08, 0xce, 0x20, 0x73, 0x08, 0xd1, 0xb0,
- 0xc3, 0x0c, 0x5f, 0x05, 0x4e, 0x53, 0x02, 0x9b, 0x33, 0xc4, 0xe5, 0xe7,
- 0x05, 0x4e, 0x18, 0xc6, 0xd1, 0x11, 0x05, 0x4e, 0x39, 0xc6, 0x41, 0xce,
- 0x05, 0x4e, 0x60, 0x17, 0xc2, 0x9b, 0x39, 0xc5, 0x3f, 0xa9, 0x05, 0x4e,
- 0x40, 0xc6, 0xcc, 0xd3, 0x05, 0x4c, 0x98, 0x42, 0x00, 0x4f, 0x42, 0x9b,
- 0x45, 0xc6, 0xcc, 0x6d, 0x05, 0x4d, 0x60, 0xc6, 0xcc, 0xd3, 0x05, 0x4d,
- 0x40, 0x00, 0x42, 0x9b, 0x51, 0x83, 0x05, 0x4d, 0x23, 0x02, 0x9b, 0x5d,
- 0xc2, 0x1d, 0x5f, 0x05, 0x4c, 0xd3, 0x02, 0x9b, 0x63, 0xc2, 0x01, 0x29,
- 0x05, 0x4c, 0xa2, 0x02, 0x9b, 0x69, 0x83, 0x05, 0x4d, 0x13, 0x02, 0x9b,
- 0x72, 0xc2, 0x0b, 0xc6, 0x05, 0x4c, 0xea, 0x02, 0x9b, 0x78, 0x83, 0x05,
- 0x4d, 0x03, 0x02, 0x9b, 0x7e, 0xc2, 0x00, 0xb3, 0x05, 0x4c, 0xda, 0x02,
- 0x9b, 0x84, 0xca, 0x6a, 0xf6, 0x05, 0x4c, 0xc8, 0xc6, 0xcc, 0xd3, 0x05,
- 0x4c, 0xb0, 0x00, 0x42, 0x9b, 0x8a, 0x8b, 0x05, 0x4c, 0x68, 0x8b, 0x05,
- 0x4c, 0x39, 0xc5, 0xd7, 0xbe, 0x05, 0x4c, 0x28, 0xc4, 0x04, 0xb5, 0x05,
- 0x4d, 0xd1, 0xc4, 0xe0, 0xbb, 0x05, 0x4d, 0xa0, 0xcf, 0x65, 0x24, 0x01,
- 0x2c, 0xf2, 0x02, 0x9b, 0x96, 0x45, 0x00, 0x56, 0x42, 0x9b, 0x9c, 0x97,
- 0x05, 0x22, 0xdb, 0x02, 0x9b, 0xa8, 0x91, 0x05, 0x22, 0xbb, 0x02, 0x9b,
- 0xbb, 0x8b, 0x05, 0x22, 0x62, 0x02, 0x9b, 0xc7, 0x9b, 0x05, 0x22, 0x33,
- 0x02, 0x9b, 0xda, 0x97, 0x05, 0x22, 0x03, 0x02, 0x9b, 0xed, 0x91, 0x05,
- 0x21, 0xeb, 0x02, 0x9c, 0x03, 0x8b, 0x05, 0x21, 0x9a, 0x02, 0x9c, 0x0f,
- 0x9b, 0x05, 0x1d, 0x3b, 0x02, 0x9c, 0x22, 0x97, 0x05, 0x1d, 0x0b, 0x02,
- 0x9c, 0x35, 0x87, 0x05, 0x1c, 0xeb, 0x02, 0x9c, 0x48, 0x91, 0x05, 0x1c,
- 0xcb, 0x02, 0x9c, 0x54, 0x83, 0x05, 0x1c, 0xb2, 0x02, 0x9c, 0x5c, 0xc2,
- 0x04, 0x0a, 0x05, 0x12, 0xf3, 0x02, 0x9c, 0x68, 0x83, 0x05, 0x13, 0x13,
- 0x02, 0x9c, 0x70, 0xc2, 0x00, 0xb1, 0x05, 0x13, 0x33, 0x02, 0x9c, 0x7c,
- 0x91, 0x05, 0x13, 0x4b, 0x02, 0x9c, 0x84, 0x87, 0x05, 0x13, 0x62, 0x02,
- 0x9c, 0x90, 0x8b, 0x05, 0x17, 0x7b, 0x02, 0x9c, 0x98, 0x83, 0x05, 0x17,
- 0xb3, 0x02, 0x9c, 0xab, 0x97, 0x05, 0x17, 0xfb, 0x02, 0x9c, 0xb7, 0x11,
- 0xc2, 0x9c, 0xcd, 0x87, 0x05, 0x17, 0xeb, 0x02, 0x9c, 0xd5, 0x9b, 0x05,
- 0x18, 0x2a, 0x02, 0x9c, 0xd9, 0x8b, 0x05, 0x03, 0xc3, 0x02, 0x9c, 0xec,
- 0x83, 0x05, 0x03, 0xfb, 0x02, 0x9c, 0xff, 0x91, 0x05, 0x04, 0x1b, 0x02,
- 0x9d, 0x0b, 0x97, 0x05, 0x04, 0x3b, 0x02, 0x9d, 0x17, 0x9b, 0x05, 0x04,
- 0x6a, 0x02, 0x9d, 0x2a, 0x8b, 0x05, 0x0a, 0x9b, 0x02, 0x9d, 0x3d, 0x83,
- 0x05, 0x0a, 0xcb, 0x02, 0x9d, 0x50, 0x91, 0x05, 0x0a, 0xeb, 0x02, 0x9d,
- 0x5c, 0x87, 0x05, 0x0b, 0x03, 0x02, 0x9d, 0x68, 0x97, 0x05, 0x0b, 0x22,
- 0x02, 0x9d, 0x70, 0x96, 0x05, 0x0b, 0xe9, 0x9a, 0x05, 0x0b, 0xf1, 0x92,
- 0x05, 0x0c, 0x01, 0x87, 0x05, 0x0c, 0x12, 0x02, 0x9d, 0x83, 0x9a, 0x05,
- 0x0c, 0x21, 0x92, 0x05, 0x0c, 0x30, 0x91, 0x05, 0x0c, 0x43, 0x02, 0x9d,
- 0x8b, 0x96, 0x05, 0x0c, 0x89, 0x9a, 0x05, 0x0c, 0x91, 0x92, 0x05, 0x0c,
- 0xa1, 0x94, 0x05, 0x0c, 0xb2, 0x02, 0x9d, 0x93, 0x96, 0x05, 0x0c, 0x51,
- 0x9a, 0x05, 0x0c, 0x59, 0x92, 0x05, 0x0c, 0x68, 0x9a, 0x05, 0x0c, 0x71,
- 0x92, 0x05, 0x0c, 0x80, 0x9b, 0x05, 0x21, 0x7b, 0x02, 0x9d, 0x97, 0x97,
- 0x05, 0x21, 0x4b, 0x02, 0x9d, 0xa3, 0x91, 0x05, 0x21, 0x2b, 0x02, 0x9d,
- 0xbd, 0x8b, 0x05, 0x20, 0xd2, 0x02, 0x9d, 0xc9, 0x94, 0x05, 0x1f, 0xdb,
- 0x02, 0x9d, 0xdc, 0x92, 0x05, 0x1f, 0xc9, 0x9a, 0x05, 0x1f, 0xb9, 0x96,
- 0x05, 0x1f, 0xb0, 0x94, 0x05, 0x1f, 0xab, 0x02, 0x9d, 0xe0, 0x92, 0x05,
- 0x1f, 0x99, 0x9a, 0x05, 0x1f, 0x89, 0x96, 0x05, 0x1f, 0x81, 0x91, 0x05,
- 0x1f, 0x52, 0x02, 0x9d, 0xe4, 0x92, 0x05, 0x1f, 0x79, 0x9a, 0x05, 0x1f,
- 0x69, 0x96, 0x05, 0x1f, 0x60, 0x87, 0x05, 0x1f, 0x33, 0x02, 0x9d, 0xf0,
- 0x92, 0x05, 0x1f, 0x19, 0x9a, 0x05, 0x1f, 0x09, 0x96, 0x05, 0x1f, 0x00,
- 0x94, 0x05, 0x20, 0xbb, 0x02, 0x9d, 0xfc, 0x92, 0x05, 0x20, 0xa9, 0x9a,
- 0x05, 0x20, 0x99, 0x96, 0x05, 0x20, 0x90, 0x94, 0x05, 0x20, 0x8b, 0x02,
- 0x9e, 0x00, 0x92, 0x05, 0x20, 0x79, 0x9a, 0x05, 0x20, 0x69, 0x96, 0x05,
- 0x20, 0x61, 0x91, 0x05, 0x20, 0x32, 0x02, 0x9e, 0x04, 0x92, 0x05, 0x20,
- 0x59, 0x9a, 0x05, 0x20, 0x49, 0x96, 0x05, 0x20, 0x40, 0x87, 0x05, 0x20,
- 0x13, 0x02, 0x9e, 0x10, 0x92, 0x05, 0x1f, 0xf9, 0x9a, 0x05, 0x1f, 0xe9,
- 0x96, 0x05, 0x1f, 0xe0, 0x94, 0x05, 0x1e, 0xfb, 0x02, 0x9e, 0x1c, 0x92,
- 0x05, 0x1e, 0xe9, 0x9a, 0x05, 0x1e, 0xd9, 0x96, 0x05, 0x1e, 0xd0, 0x94,
- 0x05, 0x1e, 0xcb, 0x02, 0x9e, 0x20, 0x92, 0x05, 0x1e, 0xb9, 0x9a, 0x05,
- 0x1e, 0xa9, 0x96, 0x05, 0x1e, 0xa1, 0x91, 0x05, 0x1e, 0x5a, 0x02, 0x9e,
- 0x24, 0x92, 0x05, 0x1e, 0x99, 0x9a, 0x05, 0x1e, 0x88, 0x92, 0x05, 0x1e,
- 0x81, 0x9a, 0x05, 0x1e, 0x71, 0x96, 0x05, 0x1e, 0x68, 0x92, 0x05, 0x1e,
- 0x49, 0x9a, 0x05, 0x1e, 0x39, 0x96, 0x05, 0x1e, 0x30, 0x9b, 0x05, 0x1c,
- 0x83, 0x02, 0x9e, 0x2c, 0x97, 0x05, 0x1c, 0x53, 0x02, 0x9e, 0x3f, 0x87,
- 0x05, 0x1c, 0x33, 0x02, 0x9e, 0x59, 0x91, 0x05, 0x1c, 0x13, 0x02, 0x9e,
- 0x65, 0x83, 0x05, 0x1b, 0xea, 0x02, 0x9e, 0x71, 0x9b, 0x05, 0x1e, 0x13,
- 0x02, 0x9e, 0x75, 0x97, 0x05, 0x1d, 0xe3, 0x02, 0x9e, 0x88, 0x87, 0x05,
- 0x1d, 0xc3, 0x02, 0x9e, 0xa2, 0x91, 0x05, 0x1d, 0xa3, 0x02, 0x9e, 0xae,
- 0x83, 0x05, 0x1d, 0x6a, 0x02, 0x9e, 0xba, 0x9b, 0x05, 0x1a, 0x13, 0x02,
- 0x9e, 0xc6, 0x8b, 0x05, 0x19, 0x63, 0x02, 0x9e, 0xd9, 0x83, 0x05, 0x19,
- 0x9b, 0x02, 0x9e, 0xec, 0x91, 0x05, 0x19, 0xbb, 0x02, 0x9e, 0xf8, 0x87,
- 0x05, 0x19, 0xd3, 0x02, 0x9f, 0x04, 0x97, 0x05, 0x19, 0xf2, 0x02, 0x9f,
- 0x0c, 0x96, 0x05, 0x18, 0x49, 0x9a, 0x05, 0x18, 0x51, 0x92, 0x05, 0x18,
- 0x61, 0x87, 0x05, 0x18, 0x72, 0x02, 0x9f, 0x18, 0x96, 0x05, 0x18, 0x81,
- 0x9a, 0x05, 0x18, 0x89, 0x92, 0x05, 0x18, 0x98, 0x91, 0x05, 0x18, 0xab,
- 0x02, 0x9f, 0x20, 0x96, 0x05, 0x18, 0xf1, 0x9a, 0x05, 0x18, 0xf9, 0x92,
- 0x05, 0x19, 0x09, 0x94, 0x05, 0x19, 0x1a, 0x02, 0x9f, 0x28, 0x96, 0x05,
- 0x18, 0xb9, 0x9a, 0x05, 0x18, 0xc1, 0x92, 0x05, 0x18, 0xd0, 0x9a, 0x05,
- 0x18, 0xd9, 0x92, 0x05, 0x18, 0xe8, 0x96, 0x05, 0x19, 0x21, 0x9a, 0x05,
- 0x19, 0x29, 0x92, 0x05, 0x19, 0x39, 0x94, 0x05, 0x19, 0x4a, 0x02, 0x9f,
- 0x2c, 0x9b, 0x05, 0x1b, 0xc3, 0x02, 0x9f, 0x30, 0x97, 0x05, 0x1b, 0x93,
- 0x02, 0x9f, 0x43, 0x87, 0x05, 0x1b, 0x7b, 0x02, 0x9f, 0x59, 0x91, 0x05,
- 0x1b, 0x5b, 0x02, 0x9f, 0x65, 0x83, 0x05, 0x1b, 0x1a, 0x02, 0x9f, 0x71,
- 0x94, 0x05, 0x16, 0x7b, 0x02, 0x9f, 0x7d, 0x96, 0x05, 0x16, 0x51, 0x9a,
- 0x05, 0x16, 0x59, 0x92, 0x05, 0x16, 0x68, 0x92, 0x05, 0x16, 0x19, 0x9a,
- 0x05, 0x16, 0x08, 0x96, 0x05, 0x16, 0x21, 0x9a, 0x05, 0x16, 0x29, 0x92,
- 0x05, 0x16, 0x39, 0x94, 0x05, 0x16, 0x4b, 0x02, 0x9f, 0x81, 0x91, 0x05,
- 0x15, 0xda, 0x02, 0x9f, 0x85, 0x96, 0x05, 0x15, 0x71, 0x9a, 0x05, 0x15,
- 0x79, 0x92, 0x05, 0x15, 0x89, 0x87, 0x05, 0x15, 0xa2, 0x02, 0x9f, 0x8d,
- 0x96, 0x05, 0x15, 0xb1, 0x9a, 0x05, 0x15, 0xb9, 0x92, 0x05, 0x15, 0xc8,
- 0x96, 0x05, 0x15, 0xe9, 0x9a, 0x05, 0x15, 0xf1, 0x92, 0x05, 0x16, 0x00,
- 0x9a, 0x05, 0x14, 0xf9, 0x92, 0x05, 0x15, 0x08, 0x92, 0x05, 0x14, 0xf1,
- 0x9a, 0x05, 0x14, 0xe1, 0x96, 0x05, 0x14, 0xd8, 0x91, 0x05, 0x14, 0xcb,
- 0x02, 0x9f, 0x99, 0x96, 0x05, 0x15, 0x11, 0x9a, 0x05, 0x15, 0x19, 0x92,
- 0x05, 0x15, 0x29, 0x94, 0x05, 0x15, 0x3a, 0x02, 0x9f, 0xa1, 0x92, 0x05,
- 0x14, 0xb9, 0x9a, 0x05, 0x14, 0xa9, 0x96, 0x05, 0x14, 0xa0, 0x87, 0x05,
- 0x14, 0x93, 0x02, 0x9f, 0xa5, 0x92, 0x05, 0x14, 0x81, 0x9a, 0x05, 0x14,
- 0x71, 0x96, 0x05, 0x14, 0x68, 0x91, 0x05, 0x16, 0xeb, 0x02, 0x9f, 0xad,
- 0x83, 0x05, 0x16, 0xd3, 0x02, 0x9f, 0xb5, 0x8b, 0x05, 0x16, 0x93, 0x02,
- 0x9f, 0xc1, 0x87, 0x05, 0x17, 0x03, 0x02, 0x9f, 0xd4, 0x97, 0x05, 0x17,
- 0x1b, 0x02, 0x9f, 0xdc, 0x9b, 0x05, 0x17, 0x4a, 0x02, 0x9f, 0xeb, 0x9b,
- 0x05, 0x1a, 0xeb, 0x02, 0x9f, 0xfe, 0x97, 0x05, 0x1a, 0xbb, 0x02, 0xa0,
- 0x11, 0x87, 0x05, 0x1a, 0x9b, 0x02, 0xa0, 0x2b, 0x91, 0x05, 0x1a, 0x7b,
- 0x02, 0xa0, 0x37, 0x83, 0x05, 0x1a, 0x42, 0x02, 0xa0, 0x43, 0x96, 0x05,
- 0x15, 0x41, 0x9a, 0x05, 0x15, 0x49, 0x92, 0x05, 0x15, 0x59, 0x94, 0x05,
- 0x15, 0x6a, 0x02, 0xa0, 0x4f, 0x92, 0x05, 0x14, 0x61, 0x9a, 0x05, 0x14,
- 0x50, 0x92, 0x05, 0x14, 0x49, 0x9a, 0x05, 0x14, 0x38, 0x91, 0x05, 0x14,
- 0x2a, 0x02, 0xa0, 0x53, 0x92, 0x05, 0x14, 0x19, 0x9a, 0x05, 0x14, 0x09,
- 0x96, 0x05, 0x14, 0x00, 0x92, 0x05, 0x13, 0xf9, 0x9a, 0x05, 0x13, 0xe8,
- 0x87, 0x05, 0x12, 0xdb, 0x02, 0xa0, 0x5b, 0x91, 0x05, 0x12, 0xc3, 0x02,
- 0xa0, 0x63, 0xc2, 0x00, 0xb1, 0x05, 0x12, 0xa3, 0x02, 0xa0, 0x6f, 0x83,
- 0x05, 0x12, 0x83, 0x02, 0xa0, 0x7b, 0x8b, 0x05, 0x12, 0x42, 0x02, 0xa0,
- 0x87, 0x96, 0x05, 0x13, 0x71, 0x87, 0x05, 0x13, 0x82, 0x02, 0xa0, 0x9a,
- 0x96, 0x05, 0x13, 0x89, 0x9a, 0x05, 0x13, 0x91, 0x92, 0x05, 0x13, 0xa0,
- 0x96, 0x05, 0x13, 0xa9, 0x9a, 0x05, 0x13, 0xb1, 0x92, 0x05, 0x13, 0xc0,
- 0x96, 0x05, 0x13, 0xc9, 0x9a, 0x05, 0x13, 0xd1, 0x92, 0x05, 0x13, 0xe0,
- 0x8b, 0x05, 0x04, 0x9b, 0x02, 0xa0, 0x9e, 0x83, 0x05, 0x04, 0xd3, 0x02,
- 0xa0, 0xb1, 0x97, 0x05, 0x05, 0x2b, 0x02, 0xa0, 0xbd, 0x91, 0x05, 0x05,
- 0x0b, 0x02, 0xa0, 0xd7, 0x9b, 0x05, 0x05, 0x52, 0x02, 0xa0, 0xe3, 0x8b,
- 0x05, 0x0b, 0x53, 0x02, 0xa0, 0xf2, 0x83, 0x05, 0x0b, 0x93, 0x02, 0xa1,
- 0x05, 0x17, 0xc2, 0xa1, 0x11, 0x11, 0xc2, 0xa1, 0x1c, 0x87, 0x05, 0x0b,
- 0xd2, 0x02, 0xa1, 0x28, 0x8b, 0x05, 0x0c, 0xcb, 0x02, 0xa1, 0x30, 0x83,
- 0x05, 0x0d, 0x03, 0x02, 0xa1, 0x43, 0x97, 0x05, 0x0d, 0x6b, 0x02, 0xa1,
- 0x4f, 0x91, 0x05, 0x0d, 0x33, 0x02, 0xa1, 0x69, 0x87, 0x05, 0x0d, 0x4b,
- 0x02, 0xa1, 0x71, 0x9b, 0x05, 0x0d, 0x9a, 0x02, 0xa1, 0x79, 0x87, 0x05,
- 0x23, 0xbb, 0x02, 0xa1, 0x8c, 0x92, 0x05, 0x23, 0xa1, 0x9a, 0x05, 0x23,
- 0x91, 0x96, 0x05, 0x23, 0x88, 0x91, 0x05, 0x23, 0xdb, 0x02, 0xa1, 0x98,
- 0x96, 0x05, 0x24, 0x09, 0x9a, 0x05, 0x24, 0x11, 0x92, 0x05, 0x24, 0x21,
- 0x94, 0x05, 0x24, 0x32, 0x02, 0xa1, 0xa4, 0x96, 0x05, 0x23, 0xe9, 0x9a,
- 0x05, 0x23, 0xf1, 0x92, 0x05, 0x24, 0x00, 0x96, 0x05, 0x24, 0x39, 0x9a,
- 0x05, 0x24, 0x41, 0x92, 0x05, 0x24, 0x51, 0x94, 0x05, 0x24, 0x62, 0x02,
- 0xa1, 0xa8, 0x94, 0x05, 0x23, 0x83, 0x02, 0xa1, 0xac, 0x92, 0x05, 0x23,
- 0x71, 0x9a, 0x05, 0x23, 0x61, 0x96, 0x05, 0x23, 0x58, 0x96, 0x05, 0x22,
- 0xe9, 0x9a, 0x05, 0x22, 0xf1, 0x92, 0x05, 0x23, 0x01, 0x87, 0x05, 0x23,
- 0x1a, 0x02, 0xa1, 0xb0, 0x9a, 0x05, 0x23, 0x41, 0x92, 0x05, 0x23, 0x51,
- 0x96, 0x05, 0x23, 0x38, 0x9a, 0x05, 0x23, 0x28, 0x97, 0x05, 0x12, 0x13,
- 0x02, 0xa1, 0xbc, 0xc2, 0x04, 0x0a, 0x05, 0x11, 0x8b, 0x02, 0xa1, 0xd6,
- 0x83, 0x05, 0x11, 0xa3, 0x02, 0xa1, 0xda, 0x91, 0x05, 0x11, 0xdb, 0x02,
- 0xa1, 0xe6, 0x87, 0x05, 0x11, 0xf2, 0x02, 0xa1, 0xf2, 0x96, 0x05, 0x05,
- 0x71, 0x9a, 0x05, 0x05, 0x79, 0x92, 0x05, 0x05, 0x89, 0x87, 0x05, 0x05,
- 0x9a, 0x02, 0xa1, 0xfa, 0x96, 0x05, 0x05, 0xa9, 0x9a, 0x05, 0x05, 0xb1,
- 0x92, 0x05, 0x05, 0xc0, 0x91, 0x05, 0x05, 0xdb, 0x02, 0xa2, 0x02, 0x96,
- 0x05, 0x06, 0x19, 0x9a, 0x05, 0x06, 0x21, 0x92, 0x05, 0x06, 0x31, 0x94,
- 0x05, 0x06, 0x42, 0x02, 0xa2, 0x0e, 0x96, 0x05, 0x05, 0xe9, 0x9a, 0x05,
- 0x05, 0xf1, 0x92, 0x05, 0x06, 0x00, 0x9a, 0x05, 0x06, 0x08, 0x96, 0x05,
- 0x06, 0x49, 0x9a, 0x05, 0x06, 0x51, 0x92, 0x05, 0x06, 0x60, 0xcc, 0x1b,
- 0x65, 0x05, 0x00, 0xa8, 0x96, 0x05, 0x00, 0x21, 0x9a, 0x05, 0x00, 0x29,
- 0x92, 0x05, 0x00, 0x38, 0x96, 0x05, 0x00, 0xb1, 0x9a, 0x05, 0x00, 0xb9,
- 0x92, 0x05, 0x00, 0xc9, 0x87, 0x05, 0x00, 0xe2, 0x02, 0xa2, 0x12, 0x96,
- 0x05, 0x00, 0xf1, 0x9a, 0x05, 0x00, 0xf9, 0x92, 0x05, 0x01, 0x08, 0x91,
- 0x05, 0x01, 0x1b, 0x02, 0xa2, 0x1e, 0x96, 0x05, 0x01, 0x61, 0x9a, 0x05,
- 0x01, 0x69, 0x92, 0x05, 0x01, 0x79, 0x94, 0x05, 0x01, 0x8a, 0x02, 0xa2,
- 0x26, 0x96, 0x05, 0x01, 0x29, 0x9a, 0x05, 0x01, 0x31, 0x92, 0x05, 0x01,
- 0x40, 0x9a, 0x05, 0x01, 0x49, 0x92, 0x05, 0x01, 0x58, 0x96, 0x05, 0x01,
- 0x91, 0x9a, 0x05, 0x01, 0x99, 0x92, 0x05, 0x01, 0xa9, 0x94, 0x05, 0x01,
- 0xba, 0x02, 0xa2, 0x2a, 0x8b, 0x05, 0x02, 0xc3, 0x02, 0xa2, 0x2e, 0x83,
- 0x05, 0x03, 0x03, 0x02, 0xa2, 0x41, 0x97, 0x05, 0x03, 0x73, 0x02, 0xa2,
- 0x4d, 0x91, 0x05, 0x03, 0x3b, 0x02, 0xa2, 0x67, 0x87, 0x05, 0x03, 0x53,
- 0x02, 0xa2, 0x73, 0x9b, 0x05, 0x03, 0xa2, 0x02, 0xa2, 0x7b, 0x96, 0x05,
- 0x01, 0xc1, 0x9a, 0x05, 0x01, 0xc9, 0x92, 0x05, 0x01, 0xd9, 0x87, 0x05,
- 0x01, 0xea, 0x02, 0xa2, 0x87, 0x96, 0x05, 0x01, 0xf9, 0x9a, 0x05, 0x02,
- 0x01, 0x92, 0x05, 0x02, 0x10, 0x91, 0x05, 0x02, 0x23, 0x02, 0xa2, 0x8f,
- 0x96, 0x05, 0x02, 0x51, 0x9a, 0x05, 0x02, 0x59, 0x92, 0x05, 0x02, 0x69,
- 0x94, 0x05, 0x02, 0x7a, 0x02, 0xa2, 0x97, 0x96, 0x05, 0x02, 0x31, 0x9a,
- 0x05, 0x02, 0x39, 0x92, 0x05, 0x02, 0x48, 0x96, 0x05, 0x02, 0x81, 0x9a,
- 0x05, 0x02, 0x89, 0x92, 0x05, 0x02, 0x99, 0x94, 0x05, 0x02, 0xaa, 0x02,
- 0xa2, 0x9b, 0x96, 0x05, 0x06, 0x69, 0x9a, 0x05, 0x06, 0x71, 0x92, 0x05,
- 0x06, 0x80, 0x96, 0x05, 0x06, 0x89, 0x9a, 0x05, 0x06, 0x91, 0x92, 0x05,
- 0x06, 0xa0, 0x9a, 0x05, 0x06, 0xa9, 0x92, 0x05, 0x06, 0xb8, 0x96, 0x05,
- 0x06, 0xc1, 0x9a, 0x05, 0x06, 0xc9, 0x92, 0x05, 0x06, 0xd9, 0x94, 0x05,
- 0x06, 0xea, 0x02, 0xa2, 0x9f, 0x96, 0x05, 0x06, 0xf1, 0x9a, 0x05, 0x06,
- 0xf9, 0x92, 0x05, 0x07, 0x08, 0x96, 0x05, 0x07, 0x11, 0x9a, 0x05, 0x07,
- 0x19, 0x92, 0x05, 0x07, 0x29, 0x87, 0x05, 0x07, 0x42, 0x02, 0xa2, 0xa3,
- 0x96, 0x05, 0x07, 0x51, 0x9a, 0x05, 0x07, 0x59, 0x92, 0x05, 0x07, 0x68,
- 0x96, 0x05, 0x07, 0x71, 0x9a, 0x05, 0x07, 0x79, 0x92, 0x05, 0x07, 0x88,
- 0x9a, 0x05, 0x07, 0x91, 0x92, 0x05, 0x07, 0x98, 0x96, 0x05, 0x07, 0xa1,
- 0x9a, 0x05, 0x07, 0xa9, 0x92, 0x05, 0x07, 0xb9, 0x94, 0x05, 0x07, 0xca,
- 0x02, 0xa2, 0xaf, 0x96, 0x05, 0x07, 0xd1, 0x9a, 0x05, 0x07, 0xd9, 0x92,
- 0x05, 0x07, 0xe9, 0x94, 0x05, 0x07, 0xfa, 0x02, 0xa2, 0xb3, 0x96, 0x05,
- 0x08, 0x01, 0x9a, 0x05, 0x08, 0x09, 0x92, 0x05, 0x08, 0x19, 0x87, 0x05,
- 0x08, 0x2a, 0x02, 0xa2, 0xb7, 0x96, 0x05, 0x08, 0x39, 0x9a, 0x05, 0x08,
- 0x41, 0x92, 0x05, 0x08, 0x50, 0x91, 0x05, 0x08, 0x63, 0x02, 0xa2, 0xbf,
- 0x96, 0x05, 0x08, 0xa1, 0x9a, 0x05, 0x08, 0xa9, 0x92, 0x05, 0x08, 0xb9,
- 0x94, 0x05, 0x08, 0xca, 0x02, 0xa2, 0xc3, 0x96, 0x05, 0x08, 0x69, 0x9a,
- 0x05, 0x08, 0x71, 0x92, 0x05, 0x08, 0x80, 0x9a, 0x05, 0x08, 0x89, 0x92,
- 0x05, 0x08, 0x98, 0x8b, 0x05, 0x09, 0xc3, 0x02, 0xa2, 0xc7, 0x83, 0x05,
- 0x09, 0xfb, 0x02, 0xa2, 0xda, 0x97, 0x05, 0x0a, 0x6b, 0x02, 0xa2, 0xe6,
- 0x91, 0x05, 0x0a, 0x33, 0x02, 0xa3, 0x00, 0x87, 0x05, 0x0a, 0x4a, 0x02,
- 0xa3, 0x0c, 0x96, 0x05, 0x08, 0xd1, 0x9a, 0x05, 0x08, 0xd9, 0x92, 0x05,
- 0x08, 0xe9, 0x87, 0x05, 0x08, 0xfa, 0x02, 0xa3, 0x14, 0x96, 0x05, 0x09,
- 0x09, 0x9a, 0x05, 0x09, 0x11, 0x92, 0x05, 0x09, 0x20, 0x91, 0x05, 0x09,
- 0x3b, 0x02, 0xa3, 0x1c, 0x96, 0x05, 0x09, 0x81, 0x9a, 0x05, 0x09, 0x89,
- 0x92, 0x05, 0x09, 0x99, 0x94, 0x05, 0x09, 0xaa, 0x02, 0xa3, 0x28, 0x96,
- 0x05, 0x09, 0x49, 0x9a, 0x05, 0x09, 0x51, 0x92, 0x05, 0x09, 0x60, 0x9a,
- 0x05, 0x09, 0x69, 0x92, 0x05, 0x09, 0x78, 0x96, 0x05, 0x0d, 0xb9, 0x9a,
- 0x05, 0x0d, 0xc1, 0x92, 0x05, 0x0d, 0xd1, 0x87, 0x05, 0x0d, 0xea, 0x02,
- 0xa3, 0x2c, 0x96, 0x05, 0x0d, 0xf9, 0x9a, 0x05, 0x0e, 0x01, 0x92, 0x05,
- 0x0e, 0x10, 0x91, 0x05, 0x0e, 0x2b, 0x02, 0xa3, 0x38, 0x96, 0x05, 0x0e,
- 0x71, 0x9a, 0x05, 0x0e, 0x79, 0x92, 0x05, 0x0e, 0x89, 0x94, 0x05, 0x0e,
- 0x9a, 0x02, 0xa3, 0x44, 0x96, 0x05, 0x0e, 0x39, 0x9a, 0x05, 0x0e, 0x41,
- 0x92, 0x05, 0x0e, 0x50, 0x9a, 0x05, 0x0e, 0x59, 0x92, 0x05, 0x0e, 0x68,
- 0x96, 0x05, 0x0e, 0xa1, 0x9a, 0x05, 0x0e, 0xa9, 0x92, 0x05, 0x0e, 0xb9,
- 0x94, 0x05, 0x0e, 0xca, 0x02, 0xa3, 0x48, 0x96, 0x05, 0x0e, 0xd1, 0x9a,
- 0x05, 0x0e, 0xd9, 0x92, 0x05, 0x0e, 0xe9, 0x87, 0x05, 0x0f, 0x02, 0x02,
- 0xa3, 0x4c, 0x96, 0x05, 0x0f, 0x11, 0x9a, 0x05, 0x0f, 0x19, 0x92, 0x05,
- 0x0f, 0x28, 0x91, 0x05, 0x0f, 0x43, 0x02, 0xa3, 0x58, 0x96, 0x05, 0x0f,
- 0x91, 0x9a, 0x05, 0x0f, 0x99, 0x92, 0x05, 0x0f, 0xa9, 0x94, 0x05, 0x0f,
- 0xba, 0x02, 0xa3, 0x64, 0x96, 0x05, 0x0f, 0x51, 0x9a, 0x05, 0x0f, 0x59,
- 0x92, 0x05, 0x0f, 0x68, 0x96, 0x05, 0x0f, 0x71, 0x9a, 0x05, 0x0f, 0x79,
- 0x92, 0x05, 0x0f, 0x88, 0x8b, 0x05, 0x10, 0xb3, 0x02, 0xa3, 0x68, 0x83,
- 0x05, 0x10, 0xe3, 0x02, 0xa3, 0x77, 0x97, 0x05, 0x11, 0x63, 0x02, 0xa3,
- 0x83, 0x91, 0x05, 0x11, 0x23, 0x02, 0xa3, 0x9d, 0x87, 0x05, 0x11, 0x42,
- 0x02, 0xa3, 0xa9, 0x96, 0x05, 0x0f, 0xc1, 0x9a, 0x05, 0x0f, 0xc9, 0x92,
- 0x05, 0x0f, 0xd9, 0x87, 0x05, 0x0f, 0xea, 0x02, 0xa3, 0xb5, 0x96, 0x05,
- 0x0f, 0xf9, 0x9a, 0x05, 0x10, 0x01, 0x92, 0x05, 0x10, 0x10, 0x91, 0x05,
- 0x10, 0x23, 0x02, 0xa3, 0xbd, 0x96, 0x05, 0x10, 0x71, 0x9a, 0x05, 0x10,
- 0x79, 0x92, 0x05, 0x10, 0x89, 0x94, 0x05, 0x10, 0x9a, 0x02, 0xa3, 0xc5,
- 0x96, 0x05, 0x10, 0x31, 0x9a, 0x05, 0x10, 0x39, 0x92, 0x05, 0x10, 0x48,
- 0x96, 0x05, 0x10, 0x51, 0x9a, 0x05, 0x10, 0x59, 0x92, 0x05, 0x10, 0x68,
- 0x87, 0x05, 0x25, 0xd8, 0xc2, 0x00, 0x3e, 0x05, 0x24, 0x99, 0xc2, 0x00,
- 0x11, 0x05, 0x25, 0x38, 0x92, 0x05, 0x24, 0xa1, 0x96, 0x05, 0x25, 0x18,
- 0x9b, 0x05, 0x25, 0x81, 0xc2, 0x02, 0x53, 0x05, 0x25, 0xd1, 0xc2, 0x00,
- 0x9e, 0x05, 0x26, 0x01, 0xc2, 0x00, 0x11, 0x05, 0x26, 0x10, 0xc2, 0x00,
- 0x11, 0x05, 0x24, 0xb1, 0xc2, 0x00, 0xb1, 0x05, 0x25, 0x30, 0xc2, 0x00,
- 0x6d, 0x05, 0x24, 0xc9, 0xc2, 0x00, 0xb1, 0x05, 0x24, 0xf9, 0xc2, 0x00,
- 0x11, 0x05, 0x25, 0xf8, 0x92, 0x05, 0x25, 0x11, 0x94, 0x05, 0x26, 0x08,
- 0xc2, 0x00, 0x84, 0x05, 0x25, 0x51, 0x9b, 0x05, 0x25, 0xa9, 0xc2, 0x04,
- 0x0a, 0x05, 0x25, 0xb8, 0x8e, 0x08, 0x74, 0x60, 0xc3, 0x36, 0x15, 0x08,
- 0x74, 0x41, 0xc2, 0x01, 0x2e, 0x08, 0x74, 0x38, 0x44, 0xe2, 0xeb, 0x42,
- 0xa3, 0xc9, 0x8b, 0x00, 0xa7, 0x70, 0x91, 0x00, 0xa8, 0xeb, 0x02, 0xa3,
- 0xe7, 0x83, 0x00, 0xa9, 0x0b, 0x02, 0xa3, 0xef, 0x8b, 0x00, 0xa8, 0xcb,
- 0x02, 0xa3, 0xf3, 0x87, 0x00, 0xa8, 0xb8, 0x9b, 0x00, 0xc6, 0x09, 0x83,
- 0x00, 0xa8, 0xb0, 0x9b, 0x00, 0xc6, 0x01, 0x91, 0x00, 0xa8, 0xa0, 0x8b,
- 0x00, 0xa8, 0x90, 0xc2, 0x0d, 0xf7, 0x00, 0xa4, 0x29, 0xc2, 0x14, 0x44,
- 0x00, 0xa4, 0x31, 0xc2, 0x22, 0x1f, 0x00, 0xa4, 0x39, 0xc2, 0x02, 0x98,
- 0x00, 0xa4, 0x40, 0x83, 0x00, 0xa8, 0x10, 0x8b, 0x00, 0xa7, 0xd0, 0x91,
- 0x00, 0xa7, 0xf0, 0x43, 0x58, 0x40, 0xc2, 0xa3, 0xf7, 0x0a, 0x42, 0xa4,
- 0x0c, 0xc4, 0xdf, 0xfb, 0x00, 0xa9, 0xe9, 0x19, 0xc2, 0xa4, 0x21, 0x15,
- 0xc2, 0xa4, 0x2d, 0xc4, 0xe2, 0x3b, 0x00, 0xa4, 0x11, 0xc4, 0xe4, 0xab,
- 0x00, 0xa5, 0x01, 0xc4, 0xe4, 0xef, 0x00, 0xa5, 0xd1, 0xc4, 0xe5, 0x23,
- 0x00, 0xa6, 0x79, 0xc4, 0xe0, 0x77, 0x00, 0xa3, 0x28, 0x8b, 0x00, 0xa6,
- 0x08, 0x91, 0x00, 0xc6, 0x60, 0x8b, 0x00, 0xc6, 0x40, 0x83, 0x00, 0xa6,
- 0x68, 0x83, 0x00, 0xb3, 0xb0, 0x91, 0x00, 0xb3, 0xa0, 0x8b, 0x00, 0xb3,
- 0x90, 0x8b, 0x00, 0xb3, 0x81, 0x83, 0x00, 0xac, 0xa2, 0x02, 0xa4, 0x54,
- 0x91, 0x00, 0xac, 0x90, 0x8b, 0x00, 0xac, 0x80, 0x83, 0x00, 0xab, 0xcb,
- 0x02, 0xa4, 0x58, 0x91, 0x00, 0xab, 0xbb, 0x02, 0xa4, 0x5c, 0x8b, 0x00,
- 0xab, 0xab, 0x02, 0xa4, 0x60, 0x87, 0x00, 0xab, 0xa0, 0x8b, 0x00, 0xab,
- 0x18, 0x06, 0xc2, 0xa4, 0x64, 0x0c, 0xc2, 0xa4, 0x74, 0x09, 0xc2, 0xa4,
- 0x95, 0x16, 0xc2, 0xa4, 0xb7, 0x42, 0x11, 0xd4, 0xc2, 0xa4, 0xc7, 0x1b,
- 0xc2, 0xa4, 0xde, 0x0f, 0xc2, 0xa4, 0xf5, 0x10, 0xc2, 0xa5, 0x0c, 0x0d,
- 0xc2, 0xa5, 0x27, 0x92, 0x00, 0xaf, 0x73, 0x02, 0xa5, 0x32, 0x8a, 0x00,
- 0xa2, 0x5b, 0x02, 0xa5, 0x49, 0x19, 0xc2, 0xa5, 0x57, 0x14, 0xc2, 0xa5,
- 0x6e, 0x0e, 0xc2, 0xa5, 0x85, 0xc2, 0x00, 0x4c, 0x00, 0xa0, 0x41, 0x8b,
- 0x00, 0xa0, 0x4b, 0x02, 0xa5, 0xa0, 0x9c, 0x00, 0xb2, 0x33, 0x02, 0xa5,
- 0xa6, 0x15, 0x42, 0xa5, 0xbd, 0x8b, 0x00, 0xa4, 0x50, 0x91, 0x00, 0xa4,
- 0xd0, 0x8b, 0x00, 0xa4, 0xb0, 0x83, 0x00, 0xa4, 0xf0, 0x83, 0x00, 0xad,
- 0xb9, 0x91, 0x00, 0xad, 0xb1, 0x8b, 0x00, 0xad, 0xa9, 0x87, 0x00, 0xad,
- 0xa0, 0x83, 0x00, 0xad, 0xf9, 0x91, 0x00, 0xad, 0xf1, 0x8b, 0x00, 0xad,
- 0xe9, 0x87, 0x00, 0xad, 0xe0, 0x83, 0x00, 0xad, 0xd9, 0x91, 0x00, 0xad,
- 0xd1, 0x8b, 0x00, 0xad, 0xc9, 0x87, 0x00, 0xad, 0xc0, 0x91, 0x00, 0xc7,
- 0x48, 0x83, 0x00, 0xab, 0x73, 0x02, 0xa5, 0xdb, 0x91, 0x00, 0xab, 0x6b,
- 0x02, 0xa5, 0xdf, 0xc2, 0x00, 0x4b, 0x00, 0xc7, 0x29, 0x8b, 0x00, 0xab,
- 0x61, 0x87, 0x00, 0xab, 0x58, 0x83, 0x00, 0xc7, 0x23, 0x02, 0xa5, 0xe3,
- 0x87, 0x00, 0xc7, 0x18, 0x83, 0x00, 0xad, 0x63, 0x02, 0xa5, 0xe7, 0x91,
- 0x00, 0xad, 0x53, 0x02, 0xa5, 0xeb, 0x8b, 0x00, 0xad, 0x43, 0x02, 0xa5,
- 0xef, 0x87, 0x00, 0xad, 0x38, 0x83, 0x00, 0xab, 0x38, 0x91, 0x00, 0xab,
- 0x28, 0x8b, 0x00, 0xab, 0x10, 0x8b, 0x00, 0xa2, 0x68, 0x91, 0x00, 0xa2,
- 0xf8, 0x8b, 0x00, 0xa2, 0xd8, 0x83, 0x00, 0xa3, 0x18, 0x46, 0x90, 0xd0,
- 0xc2, 0xa5, 0xf3, 0xc5, 0xba, 0x68, 0x00, 0xc6, 0xe8, 0x48, 0xbd, 0xd5,
- 0x42, 0xa6, 0x3a, 0x83, 0x00, 0xaa, 0x70, 0x91, 0x00, 0xc6, 0x90, 0x8b,
- 0x00, 0xc6, 0x80, 0x8b, 0x00, 0xaa, 0x28, 0x14, 0xc2, 0xa6, 0x49, 0x15,
- 0xc2, 0xa6, 0x53, 0xc5, 0x35, 0x4a, 0x00, 0xa0, 0xf9, 0xc5, 0x1f, 0x94,
- 0x00, 0xa1, 0x01, 0xd0, 0x5a, 0xc2, 0x00, 0xa1, 0x09, 0xcd, 0x76, 0xd5,
- 0x00, 0xa1, 0x11, 0x42, 0x01, 0x4a, 0xc2, 0xa6, 0x5f, 0xca, 0x39, 0xef,
- 0x00, 0xa1, 0x39, 0xc4, 0x26, 0x12, 0x00, 0xa1, 0x48, 0x8b, 0x00, 0xaa,
- 0xa0, 0x8a, 0x00, 0xc6, 0xd8, 0x19, 0x42, 0xa6, 0x6b, 0x8b, 0x00, 0xa9,
- 0x38, 0x83, 0x00, 0xa9, 0xd8, 0x91, 0x00, 0xa9, 0xb8, 0x8b, 0x00, 0xa9,
- 0x98, 0xc3, 0x14, 0x3f, 0x00, 0xa2, 0x41, 0xc2, 0x02, 0x84, 0x00, 0xa1,
- 0xa8, 0x8b, 0x00, 0xa6, 0xa0, 0x83, 0x00, 0xad, 0x28, 0x91, 0x00, 0xad,
- 0x18, 0x8b, 0x00, 0xad, 0x08, 0x8b, 0x00, 0xa7, 0x00, 0x91, 0x00, 0xa7,
- 0x20, 0x83, 0x00, 0xa7, 0x40, 0x8b, 0x00, 0xa5, 0x20, 0x94, 0x00, 0xaa,
- 0x91, 0x8e, 0x00, 0xa7, 0x60, 0xca, 0xa7, 0x02, 0x00, 0xa8, 0x48, 0x8b,
- 0x00, 0xa5, 0x80, 0x91, 0x00, 0xa5, 0xa0, 0x83, 0x00, 0xa5, 0xc0, 0x9b,
- 0x00, 0xc5, 0xc9, 0x83, 0x00, 0xa4, 0x00, 0x8b, 0x00, 0xa3, 0xc0, 0x91,
- 0x00, 0xa3, 0xe0, 0x8b, 0x00, 0xa3, 0x60, 0x9b, 0x00, 0xc5, 0xb1, 0x91,
- 0x00, 0xa2, 0x10, 0x83, 0x00, 0xa2, 0x30, 0x8b, 0x00, 0xa1, 0xf0, 0x8b,
- 0x00, 0xa1, 0x80, 0x8b, 0x00, 0xab, 0xf0, 0x97, 0x08, 0x15, 0xd9, 0x9f,
- 0x08, 0x16, 0x41, 0xa0, 0x08, 0x16, 0x80, 0xc3, 0x4b, 0xbc, 0x08, 0x2a,
- 0x79, 0xc2, 0x0c, 0x81, 0x08, 0x2a, 0xa8, 0xc2, 0x00, 0x31, 0x08, 0x29,
- 0xb9, 0x83, 0x08, 0x29, 0xd8, 0x83, 0x08, 0x29, 0xcb, 0x02, 0xa6, 0x79,
- 0xc2, 0x64, 0x77, 0x08, 0x2a, 0x49, 0x8b, 0x08, 0x2a, 0x50, 0x94, 0x08,
- 0x2a, 0x11, 0xc2, 0x17, 0x51, 0x08, 0x2b, 0x00, 0x9b, 0x08, 0x2a, 0x59,
- 0x99, 0x08, 0x2a, 0xf8, 0x83, 0x08, 0x29, 0xeb, 0x02, 0xa6, 0x7d, 0xc2,
- 0x64, 0x77, 0x08, 0x2a, 0xe8, 0xc2, 0x01, 0x47, 0x01, 0x74, 0x19, 0xc4,
- 0x04, 0x5e, 0x01, 0x74, 0x20, 0xce, 0x73, 0xd5, 0x01, 0x75, 0x31, 0xc3,
- 0x00, 0xbf, 0x01, 0x76, 0x30, 0xc3, 0xc1, 0x2a, 0x01, 0x76, 0x61, 0xc4,
- 0x94, 0xd6, 0x01, 0x77, 0x40, 0x89, 0x01, 0x8f, 0x08, 0x83, 0x05, 0x5b,
- 0xb1, 0x87, 0x05, 0x5b, 0xc1, 0x8b, 0x05, 0x5b, 0xc9, 0x91, 0x05, 0x5b,
- 0xd1, 0x97, 0x05, 0x5b, 0xd9, 0x98, 0x05, 0x5b, 0xe0, 0x83, 0x05, 0x5d,
- 0xf9, 0x87, 0x00, 0x9f, 0xc1, 0x8b, 0x00, 0x9f, 0xc9, 0x91, 0x00, 0x9f,
- 0xd1, 0x97, 0x00, 0x9f, 0xd9, 0x98, 0x00, 0x9f, 0xe0, 0x98, 0x05, 0x5d,
- 0xf1, 0x97, 0x05, 0x5d, 0xe9, 0x91, 0x05, 0x5d, 0xe1, 0x8b, 0x05, 0x5d,
- 0xd9, 0x87, 0x05, 0x5d, 0xd1, 0x83, 0x05, 0x5d, 0xc8, 0x15, 0xc2, 0xa6,
- 0x81, 0x0e, 0xc2, 0xa6, 0x99, 0x83, 0x05, 0x5d, 0x21, 0x8b, 0x05, 0x5d,
- 0x41, 0x87, 0x05, 0x5d, 0x30, 0x91, 0x05, 0x5c, 0x99, 0x8b, 0x05, 0x5c,
- 0x91, 0x87, 0x05, 0x5c, 0x89, 0x83, 0x05, 0x5c, 0x73, 0x02, 0xa6, 0xb1,
- 0x97, 0x05, 0x5c, 0xa1, 0x98, 0x05, 0x5c, 0xa8, 0xc2, 0x00, 0xc1, 0x05,
- 0x5c, 0x79, 0x83, 0x05, 0x5b, 0xe9, 0x87, 0x05, 0x5b, 0xf1, 0x8b, 0x05,
- 0x5b, 0xf9, 0x91, 0x05, 0x5c, 0x01, 0x97, 0x05, 0x5c, 0x09, 0x98, 0x05,
- 0x5c, 0x10, 0x97, 0x05, 0x5c, 0x69, 0x91, 0x05, 0x5c, 0x61, 0x8b, 0x05,
- 0x5c, 0x59, 0x87, 0x05, 0x5c, 0x51, 0x83, 0x05, 0x5c, 0x49, 0x98, 0x00,
- 0x9f, 0xe8, 0x98, 0x05, 0x5c, 0x41, 0x97, 0x05, 0x5c, 0x39, 0x91, 0x05,
- 0x5c, 0x31, 0x8b, 0x05, 0x5c, 0x29, 0x87, 0x05, 0x5c, 0x21, 0x83, 0x05,
- 0x5c, 0x18, 0x83, 0x05, 0x5c, 0xb1, 0x87, 0x05, 0x5c, 0xb9, 0x8b, 0x05,
- 0x5c, 0xc1, 0x91, 0x05, 0x5c, 0xc9, 0x97, 0x05, 0x5c, 0xd1, 0x98, 0x05,
- 0x5c, 0xd8, 0x83, 0x05, 0x5c, 0xe1, 0x87, 0x05, 0x5c, 0xf1, 0x8b, 0x05,
- 0x5c, 0xf9, 0x91, 0x05, 0x5d, 0x01, 0x97, 0x05, 0x5d, 0x09, 0x98, 0x05,
- 0x5d, 0x10, 0x83, 0x05, 0x5d, 0x19, 0x87, 0x05, 0x5d, 0x29, 0x8b, 0x05,
- 0x5d, 0x39, 0x91, 0x05, 0x5d, 0x49, 0x97, 0x05, 0x5d, 0x51, 0x98, 0x05,
- 0x5d, 0x59, 0xc2, 0x00, 0xc7, 0x05, 0x5d, 0x60, 0x83, 0x00, 0x9d, 0x31,
- 0x87, 0x00, 0x9d, 0x41, 0x8b, 0x00, 0x9d, 0x49, 0x91, 0x00, 0x9d, 0x51,
- 0x97, 0x00, 0x9d, 0x59, 0x98, 0x00, 0x9d, 0x60, 0x83, 0x00, 0x9d, 0x69,
- 0x87, 0x00, 0x9d, 0x71, 0x8b, 0x00, 0x9d, 0x79, 0x91, 0x00, 0x9d, 0x81,
- 0x97, 0x00, 0x9d, 0x89, 0x98, 0x00, 0x9d, 0x91, 0xc2, 0x00, 0xc1, 0x00,
- 0x9d, 0xf8, 0x83, 0x00, 0x9d, 0x99, 0x87, 0x00, 0x9d, 0xa1, 0x8b, 0x00,
- 0x9d, 0xa9, 0x91, 0x00, 0x9d, 0xb1, 0x97, 0x00, 0x9d, 0xb9, 0x98, 0x00,
- 0x9d, 0xc0, 0x83, 0x00, 0x9d, 0xc9, 0x87, 0x00, 0x9d, 0xd1, 0x8b, 0x00,
- 0x9d, 0xd9, 0x91, 0x00, 0x9d, 0xe1, 0x97, 0x00, 0x9d, 0xe9, 0x98, 0x00,
- 0x9f, 0xa8, 0x83, 0x00, 0x9d, 0xf3, 0x02, 0xa6, 0xb5, 0x87, 0x00, 0x9e,
- 0x09, 0x8b, 0x00, 0x9e, 0x11, 0x91, 0x00, 0x9e, 0x19, 0x97, 0x00, 0x9e,
- 0x21, 0x98, 0x00, 0x9e, 0x28, 0x83, 0x00, 0x9e, 0x31, 0x87, 0x00, 0x9e,
- 0x39, 0x8b, 0x00, 0x9e, 0x41, 0x91, 0x00, 0x9e, 0x49, 0x97, 0x00, 0x9e,
- 0x51, 0x98, 0x00, 0x9e, 0x58, 0x83, 0x00, 0x9e, 0x61, 0x87, 0x00, 0x9e,
- 0x71, 0x8b, 0x00, 0x9e, 0x79, 0x91, 0x00, 0x9e, 0x81, 0x97, 0x00, 0x9e,
- 0x89, 0x98, 0x00, 0x9e, 0x90, 0x83, 0x00, 0x9e, 0x99, 0x87, 0x00, 0x9e,
- 0xa9, 0x8b, 0x00, 0x9e, 0xb9, 0x91, 0x00, 0x9e, 0xc9, 0x97, 0x00, 0x9e,
- 0xd1, 0x98, 0x00, 0x9e, 0xd9, 0xc2, 0x00, 0xc7, 0x00, 0x9e, 0xe0, 0x83,
- 0x00, 0x9e, 0xa1, 0x87, 0x00, 0x9e, 0xb1, 0x8b, 0x00, 0x9e, 0xc1, 0x0e,
- 0xc2, 0xa6, 0xb9, 0x15, 0x42, 0xa6, 0xd1, 0x83, 0x00, 0x9f, 0x49, 0x87,
- 0x00, 0x9f, 0x51, 0x8b, 0x00, 0x9f, 0x59, 0x91, 0x00, 0x9f, 0x61, 0x97,
- 0x00, 0x9f, 0x69, 0x98, 0x00, 0x9f, 0x70, 0x83, 0x00, 0x9f, 0x79, 0x87,
- 0x00, 0x9f, 0x81, 0x8b, 0x00, 0x9f, 0x89, 0x91, 0x00, 0x9f, 0x91, 0x97,
- 0x00, 0x9f, 0x99, 0x98, 0x00, 0x9f, 0xa0, 0xc3, 0x0f, 0x21, 0x00, 0x04,
- 0x41, 0xd2, 0x4a, 0x58, 0x00, 0x04, 0x48, 0xc3, 0x3b, 0xc7, 0x08, 0x88,
- 0xa1, 0xc2, 0x14, 0x40, 0x08, 0x88, 0x98, 0xc3, 0x3b, 0xc7, 0x08, 0x88,
- 0x91, 0xc2, 0x14, 0x40, 0x08, 0x88, 0x88, 0x8b, 0x08, 0x8a, 0x30, 0x83,
- 0x08, 0x8a, 0x29, 0x97, 0x08, 0x89, 0x79, 0x8b, 0x08, 0x89, 0x68, 0x8b,
- 0x08, 0x89, 0x80, 0x97, 0x08, 0x89, 0x58, 0x8b, 0x08, 0x89, 0x48, 0xc4,
- 0x18, 0x83, 0x08, 0x89, 0xe9, 0xc2, 0x26, 0x51, 0x08, 0x89, 0xe0, 0xc3,
- 0x0c, 0x5b, 0x08, 0x89, 0xd9, 0xc3, 0x06, 0x9e, 0x08, 0x89, 0xd0, 0xc4,
- 0x04, 0x5e, 0x08, 0x89, 0xc9, 0xc2, 0x01, 0x47, 0x08, 0x89, 0xc0, 0xc2,
- 0x0f, 0x4d, 0x05, 0x50, 0x51, 0x83, 0x05, 0x50, 0x58, 0xc2, 0x24, 0x58,
- 0x05, 0x50, 0x91, 0x83, 0x05, 0x50, 0x89, 0xc2, 0x0f, 0x4d, 0x05, 0x50,
- 0x80, 0x89, 0x05, 0x52, 0x10, 0xc4, 0x18, 0x85, 0x08, 0x7e, 0x51, 0x91,
- 0x08, 0x7e, 0x30, 0xd7, 0x27, 0x85, 0x0f, 0xaa, 0x08, 0xce, 0x6f, 0x83,
- 0x01, 0x72, 0x81, 0xcd, 0x76, 0x7a, 0x01, 0x72, 0x88, 0xe0, 0x05, 0xc7,
- 0x0f, 0x04, 0x78, 0xce, 0x6f, 0x21, 0x00, 0x24, 0x41, 0xcd, 0x32, 0x88,
- 0x05, 0x33, 0x88, 0xc7, 0xc6, 0x34, 0x00, 0x24, 0x39, 0xcd, 0x7f, 0x43,
- 0x00, 0x24, 0x31, 0x03, 0x42, 0xa6, 0xe9, 0xc4, 0x9a, 0xe1, 0x00, 0x24,
- 0x1b, 0x02, 0xa6, 0xf5, 0xd0, 0x5c, 0x92, 0x05, 0x33, 0x81, 0xd5, 0x32,
- 0x80, 0x05, 0x33, 0x90, 0x07, 0xc2, 0xa6, 0xf9, 0x8b, 0x05, 0x33, 0xab,
- 0x02, 0xa7, 0x14, 0x97, 0x05, 0x33, 0xbb, 0x02, 0xa7, 0x1e, 0x1b, 0xc2,
- 0xa7, 0x24, 0xc2, 0x00, 0xa4, 0x01, 0x6f, 0x7b, 0x02, 0xa7, 0x38, 0x15,
- 0xc2, 0xa7, 0x3e, 0x91, 0x01, 0x6f, 0x53, 0x02, 0xa7, 0x48, 0x04, 0xc2,
- 0xa7, 0x4e, 0xc2, 0x00, 0x7b, 0x01, 0x6f, 0x09, 0xc3, 0x8f, 0x12, 0x01,
- 0x6f, 0x11, 0x06, 0xc2, 0xa7, 0x58, 0x1c, 0xc2, 0xa7, 0x62, 0xc2, 0x04,
- 0x2b, 0x01, 0x6f, 0x31, 0xc2, 0x00, 0x27, 0x01, 0x6f, 0x59, 0x16, 0xc2,
- 0xa7, 0x6c, 0xc3, 0x26, 0x9b, 0x01, 0x6f, 0x89, 0xc4, 0xe1, 0x87, 0x01,
- 0x6f, 0xa1, 0x83, 0x01, 0x6f, 0xb1, 0xcc, 0x87, 0xac, 0x01, 0x6f, 0xc9,
- 0xca, 0x52, 0x67, 0x01, 0x6f, 0xe8, 0xc6, 0x01, 0x61, 0x00, 0x19, 0x60,
- 0xc5, 0x00, 0x95, 0x00, 0x18, 0x9b, 0x02, 0xa7, 0x76, 0xc5, 0x01, 0x62,
- 0x00, 0x19, 0x30, 0xc6, 0x01, 0x61, 0x07, 0xf1, 0x68, 0xc3, 0x04, 0x44,
- 0x0f, 0x01, 0x51, 0xc4, 0xaf, 0x43, 0x0f, 0x00, 0xb8, 0x47, 0x1d, 0x45,
- 0xc2, 0xa7, 0x7c, 0xcb, 0x91, 0x71, 0x0f, 0x00, 0x51, 0xc3, 0x78, 0x94,
- 0x0f, 0x00, 0x48, 0xc6, 0xc7, 0x4d, 0x0f, 0x01, 0x41, 0xc3, 0xe6, 0x8e,
- 0x0f, 0x00, 0x08, 0x91, 0x0f, 0x01, 0x31, 0x97, 0x0f, 0x01, 0x19, 0xc3,
- 0x00, 0xb4, 0x0f, 0x01, 0x09, 0x07, 0x42, 0xa7, 0x88, 0xc8, 0xa9, 0x8f,
- 0x0f, 0x01, 0x21, 0x0a, 0xc2, 0xa7, 0x92, 0xc4, 0xe6, 0x1b, 0x0f, 0x00,
- 0xa0, 0xc2, 0x00, 0xcb, 0x0f, 0x00, 0xe1, 0xc5, 0xd9, 0x8a, 0x0f, 0x00,
- 0xa8, 0xc5, 0xde, 0xcb, 0x0f, 0x00, 0x61, 0xc4, 0xe5, 0x7f, 0x0f, 0x00,
- 0x20, 0xc5, 0xd8, 0x59, 0x0f, 0x00, 0x41, 0xc6, 0xd1, 0xd1, 0x0f, 0x00,
- 0x30, 0x48, 0x22, 0x43, 0xc2, 0xa7, 0x9c, 0xcb, 0x95, 0x4f, 0x00, 0x1a,
- 0x11, 0xc7, 0xc1, 0xc6, 0x00, 0x1a, 0x19, 0xcf, 0x63, 0xbc, 0x00, 0x1a,
- 0x21, 0xcd, 0x48, 0x1d, 0x00, 0x1a, 0x28, 0x45, 0xda, 0x98, 0xc2, 0xa7,
- 0xa6, 0x42, 0x00, 0x7b, 0xc2, 0xa7, 0xb2, 0xcc, 0x8b, 0x00, 0x00, 0x1a,
- 0x78, 0xcc, 0x85, 0x30, 0x01, 0x06, 0xd1, 0xcb, 0x09, 0xdc, 0x01, 0x06,
- 0xa0, 0xcb, 0x96, 0x15, 0x00, 0xee, 0x49, 0xc6, 0x60, 0xe6, 0x00, 0xee,
- 0x38, 0xc6, 0x0b, 0x41, 0x00, 0x18, 0x0b, 0x02, 0xa7, 0xba, 0xc9, 0x29,
- 0xa4, 0x00, 0x1a, 0x08, 0x00, 0xc2, 0xa7, 0xc0, 0x19, 0x42, 0xa7, 0xd8,
- 0xc7, 0x20, 0x61, 0x01, 0x06, 0xc1, 0xc5, 0x00, 0x95, 0x00, 0x18, 0x51,
- 0xc5, 0x01, 0x62, 0x00, 0x19, 0x28, 0xd0, 0x2d, 0xbd, 0x01, 0x07, 0x29,
- 0xcd, 0x53, 0xfc, 0x00, 0x18, 0xa0, 0x03, 0xc2, 0xa7, 0xde, 0x4c, 0x09,
- 0xd6, 0xc2, 0xa7, 0xea, 0x42, 0x00, 0xa4, 0xc2, 0xa7, 0xf6, 0x4c, 0x1a,
- 0x3f, 0xc2, 0xa8, 0x02, 0xca, 0x9b, 0x33, 0x00, 0x18, 0xc0, 0xdb, 0x0b,
- 0xcc, 0x01, 0x07, 0x69, 0xcd, 0x7b, 0x9b, 0x01, 0x07, 0x50, 0xd6, 0x2d,
- 0xb7, 0x01, 0x07, 0x59, 0xd5, 0x36, 0xd9, 0x01, 0x06, 0x91, 0x15, 0x42,
- 0xa8, 0x0e, 0x97, 0x00, 0x1b, 0x3b, 0x02, 0xa8, 0x1a, 0x91, 0x00, 0x1b,
- 0x33, 0x02, 0xa8, 0x20, 0x83, 0x00, 0x1b, 0x1b, 0x02, 0xa8, 0x26, 0x99,
- 0x00, 0xef, 0x8b, 0x02, 0xa8, 0x3e, 0x87, 0x00, 0x1b, 0x23, 0x02, 0xa8,
- 0x44, 0x92, 0x00, 0xef, 0x71, 0x8e, 0x00, 0xee, 0xeb, 0x02, 0xa8, 0x50,
- 0x88, 0x00, 0xef, 0x5b, 0x02, 0xa8, 0x5c, 0x95, 0x00, 0xef, 0x23, 0x02,
- 0xa8, 0x62, 0x84, 0x00, 0xef, 0x43, 0x02, 0xa8, 0x68, 0x9c, 0x00, 0xef,
- 0x31, 0x94, 0x00, 0x1b, 0x63, 0x02, 0xa8, 0x6e, 0x90, 0x00, 0xef, 0x01,
- 0x8d, 0x00, 0xee, 0xe1, 0x89, 0x00, 0xee, 0xd1, 0x8b, 0x00, 0x1b, 0x2b,
- 0x02, 0xa8, 0x72, 0x85, 0x00, 0x1b, 0x43, 0x02, 0xa8, 0x78, 0x96, 0x00,
- 0x1b, 0x6b, 0x02, 0xa8, 0x7e, 0x86, 0x00, 0x1b, 0x49, 0x8a, 0x00, 0x1b,
- 0x51, 0x8f, 0x00, 0x1b, 0x59, 0x98, 0x00, 0x1b, 0x71, 0x9a, 0x00, 0x1b,
- 0x78, 0x94, 0x00, 0xef, 0x11, 0x90, 0x00, 0xef, 0x09, 0x8f, 0x00, 0xee,
- 0xf9, 0x8e, 0x00, 0xee, 0xf1, 0x89, 0x00, 0xee, 0xd8, 0xc9, 0x0f, 0x34,
- 0x07, 0xf1, 0x03, 0x02, 0xa8, 0x84, 0xca, 0x0a, 0xf7, 0x07, 0xf1, 0x0a,
- 0x02, 0xa8, 0x8a, 0xc5, 0x01, 0x62, 0x00, 0x19, 0x81, 0xc7, 0x20, 0x61,
- 0x00, 0x19, 0xa1, 0xcf, 0x69, 0xd4, 0x07, 0xf1, 0x49, 0xd0, 0x5e, 0x62,
- 0x07, 0xf1, 0x50, 0x00, 0xc2, 0xa8, 0x90, 0xd3, 0x40, 0xca, 0x00, 0xd5,
- 0x80, 0x00, 0xc2, 0xa8, 0xe0, 0x44, 0x03, 0x1e, 0x42, 0xa8, 0xf2, 0xcb,
- 0x01, 0x09, 0x00, 0xd5, 0x99, 0xcb, 0x9b, 0x32, 0x00, 0x18, 0xf0, 0xcd,
- 0x76, 0x53, 0x05, 0x47, 0x89, 0x47, 0x02, 0x90, 0xc2, 0xa8, 0xfe, 0x46,
- 0x06, 0x97, 0x42, 0xa9, 0x24, 0xc5, 0x4f, 0xdf, 0x01, 0x07, 0x11, 0xc5,
- 0x07, 0x0a, 0x01, 0x06, 0xf0, 0xca, 0x00, 0xdd, 0x01, 0x07, 0x00, 0xcd,
- 0x45, 0xb6, 0x00, 0x19, 0xa9, 0xce, 0x2d, 0xbf, 0x00, 0x19, 0xb8, 0xc7,
- 0xc7, 0x22, 0x00, 0xee, 0x59, 0xc6, 0x01, 0x61, 0x00, 0x19, 0x70, 0xc5,
- 0x01, 0x62, 0x00, 0x19, 0x51, 0xc5, 0x00, 0x95, 0x00, 0x1a, 0x30, 0xc5,
- 0x00, 0x95, 0x00, 0xef, 0xa9, 0xc5, 0x01, 0x62, 0x00, 0x18, 0xe8, 0x4c,
- 0x8c, 0x74, 0xc2, 0xa9, 0x48, 0x42, 0x02, 0x58, 0x42, 0xa9, 0x54, 0xc5,
- 0x1f, 0x0a, 0x00, 0xee, 0x61, 0xc5, 0x1f, 0x94, 0x00, 0xee, 0x31, 0xc5,
- 0x35, 0x4a, 0x00, 0xee, 0x20, 0xc5, 0x01, 0x62, 0x00, 0x19, 0x89, 0xc9,
- 0x0f, 0x34, 0x07, 0xf1, 0x23, 0x02, 0xa9, 0x63, 0xca, 0x0a, 0xf7, 0x07,
- 0xf1, 0x2a, 0x02, 0xa9, 0x69, 0xc7, 0x20, 0x61, 0x00, 0xd5, 0xf1, 0xc5,
- 0x01, 0x62, 0x00, 0xd5, 0xe9, 0xc5, 0x00, 0x95, 0x00, 0xd5, 0xd8, 0xc4,
- 0x18, 0x83, 0x0e, 0x9b, 0x79, 0xc2, 0x26, 0x51, 0x0e, 0x9b, 0x70, 0xc3,
- 0x0c, 0x5b, 0x0e, 0x9b, 0x69, 0xc3, 0x06, 0x9e, 0x0e, 0x9b, 0x60, 0xc4,
- 0x04, 0x5e, 0x0e, 0x9b, 0x59, 0xc2, 0x01, 0x47, 0x0e, 0x9b, 0x50, 0xc4,
- 0x18, 0x83, 0x0e, 0x9b, 0x31, 0xc2, 0x26, 0x51, 0x0e, 0x9b, 0x28, 0xc3,
- 0x0c, 0x5b, 0x0e, 0x9b, 0x21, 0xc3, 0x06, 0x9e, 0x0e, 0x9b, 0x18, 0xc4,
- 0x04, 0x5e, 0x0e, 0x9b, 0x11, 0xc2, 0x01, 0x47, 0x0e, 0x9b, 0x08, 0xe0,
- 0x00, 0xe7, 0x01, 0x17, 0xd8, 0xcc, 0x23, 0x34, 0x01, 0x15, 0xa8, 0x0a,
- 0xc2, 0xa9, 0x6f, 0xc3, 0x42, 0x32, 0x01, 0x64, 0xa9, 0xc2, 0x00, 0xcb,
- 0x01, 0x64, 0xe8, 0xc3, 0x00, 0xf7, 0x00, 0x1f, 0x49, 0xc3, 0x00, 0x9e,
- 0x01, 0x64, 0x78, 0xc4, 0xcf, 0xab, 0x00, 0x1f, 0x59, 0xc3, 0x00, 0xac,
- 0x01, 0x64, 0x28, 0x0a, 0xc2, 0xa9, 0x79, 0xc2, 0x00, 0x83, 0x01, 0x64,
- 0x59, 0xc3, 0x07, 0x4a, 0x01, 0x65, 0x29, 0xc4, 0x8a, 0x54, 0x01, 0x66,
- 0x08, 0xc2, 0x01, 0xdb, 0x00, 0x1f, 0x79, 0xc4, 0xe3, 0xc3, 0x01, 0x64,
- 0x39, 0x49, 0xaa, 0x15, 0x42, 0xa9, 0x85, 0xc3, 0xe7, 0x5a, 0x01, 0x64,
- 0x09, 0xcc, 0x85, 0xc0, 0x01, 0x66, 0x48, 0xc5, 0xd7, 0x14, 0x01, 0x64,
- 0x89, 0xc2, 0x13, 0xa9, 0x01, 0x65, 0x38, 0xc4, 0x50, 0x72, 0x01, 0x64,
- 0xb9, 0xca, 0xa1, 0xd0, 0x01, 0x66, 0x88, 0xc2, 0x00, 0x83, 0x01, 0x65,
- 0x89, 0x43, 0x1e, 0x5c, 0x42, 0xa9, 0x9d, 0x8b, 0x01, 0x65, 0x09, 0xc2,
- 0x00, 0xcb, 0x01, 0x65, 0x78, 0x8b, 0x01, 0x65, 0x59, 0xc2, 0x01, 0xdb,
- 0x00, 0x1f, 0x28, 0x4c, 0x1f, 0xaa, 0xc2, 0xa9, 0xa9, 0xca, 0x9f, 0xfa,
- 0x01, 0x66, 0x18, 0xc2, 0x00, 0xda, 0x01, 0x67, 0x21, 0xc5, 0xd7, 0x05,
- 0x01, 0x67, 0x48, 0xc6, 0xd2, 0x37, 0x01, 0x67, 0x39, 0xc9, 0xaa, 0x30,
- 0x01, 0x67, 0x50, 0xc3, 0x00, 0xf7, 0x00, 0x1f, 0x41, 0xc3, 0x00, 0x9e,
- 0x01, 0x64, 0x70, 0xc4, 0xcf, 0xab, 0x00, 0x1f, 0x51, 0xc3, 0x00, 0xac,
- 0x01, 0x64, 0x20, 0x0a, 0xc2, 0xa9, 0xc1, 0xc2, 0x00, 0x83, 0x01, 0x64,
- 0x51, 0xc3, 0x07, 0x4a, 0x01, 0x65, 0x21, 0xc4, 0x8a, 0x54, 0x01, 0x66,
- 0x00, 0xc2, 0x01, 0xdb, 0x00, 0x1f, 0x71, 0xc4, 0xe3, 0xc3, 0x01, 0x64,
- 0x31, 0x49, 0xaa, 0x15, 0x42, 0xa9, 0xcd, 0xc3, 0xe7, 0x5a, 0x01, 0x64,
- 0x01, 0xcc, 0x85, 0xc0, 0x01, 0x66, 0x40, 0xc5, 0xd7, 0x14, 0x01, 0x64,
- 0x81, 0xc2, 0x13, 0xa9, 0x01, 0x65, 0x30, 0xc3, 0x42, 0x32, 0x01, 0x64,
- 0xa1, 0xc2, 0x00, 0xcb, 0x01, 0x64, 0xe1, 0x0a, 0x42, 0xa9, 0xe5, 0xc4,
- 0x50, 0x72, 0x01, 0x64, 0xb1, 0xca, 0xa1, 0xd0, 0x01, 0x66, 0x80, 0xc2,
- 0x00, 0x83, 0x01, 0x65, 0x81, 0x43, 0x1e, 0x5c, 0x42, 0xa9, 0xef, 0x8b,
- 0x01, 0x65, 0x01, 0xc2, 0x00, 0xcb, 0x01, 0x65, 0x70, 0x8b, 0x01, 0x65,
- 0x51, 0xc2, 0x01, 0xdb, 0x00, 0x1f, 0x20, 0x4c, 0x1f, 0xaa, 0xc2, 0xa9,
- 0xfb, 0xca, 0x9f, 0xfa, 0x01, 0x66, 0x10, 0xc5, 0xdf, 0x98, 0x01, 0x67,
- 0x81, 0xc5, 0xd9, 0xfd, 0x01, 0x67, 0x88, 0xc2, 0x01, 0x47, 0x08, 0x17,
- 0x11, 0xc4, 0x04, 0x5e, 0x08, 0x17, 0x18, 0xc3, 0x06, 0x9e, 0x08, 0x17,
- 0x21, 0xc3, 0x0c, 0x5b, 0x08, 0x17, 0x28, 0xc2, 0x26, 0x51, 0x08, 0x17,
- 0x31, 0xc4, 0x18, 0x83, 0x08, 0x17, 0x38, 0xc2, 0x00, 0x4d, 0x08, 0x17,
- 0x51, 0x19, 0xc2, 0xaa, 0x13, 0x0a, 0x42, 0xaa, 0x1f, 0x11, 0xc2, 0xaa,
- 0x2b, 0x0b, 0x42, 0xaa, 0x37, 0x42, 0x26, 0x51, 0xc2, 0xaa, 0x43, 0x44,
- 0x18, 0x83, 0x42, 0xaa, 0x4f, 0x9b, 0x08, 0x17, 0x89, 0xc8, 0x0c, 0x4a,
- 0x08, 0x17, 0xd0, 0xc2, 0x0c, 0x57, 0x08, 0x17, 0x91, 0xc8, 0x0c, 0x4a,
- 0x08, 0x17, 0xd8, 0xd2, 0x47, 0xd0, 0x01, 0x52, 0x80, 0xcc, 0x23, 0x34,
- 0x01, 0x56, 0x88, 0xcc, 0x23, 0x34, 0x01, 0x56, 0x90, 0xe0, 0x01, 0x67,
- 0x0f, 0xa8, 0x0a, 0x02, 0xaa, 0x5b, 0x44, 0x21, 0x31, 0xc2, 0xaa, 0x61,
- 0x11, 0x42, 0xaa, 0x6d, 0xc7, 0xc8, 0x1e, 0x0f, 0xab, 0x29, 0xc7, 0xcb,
- 0xb3, 0x0f, 0xaa, 0xc8, 0xc7, 0xc8, 0x1e, 0x0f, 0xaa, 0xf1, 0xc7, 0xcb,
- 0xb3, 0x0f, 0xaa, 0x90, 0xc7, 0xc8, 0x1e, 0x0f, 0xab, 0x31, 0xc7, 0xcb,
- 0xb3, 0x0f, 0xaa, 0xd0, 0xc7, 0xc8, 0x1e, 0x0f, 0xab, 0x19, 0xc7, 0xcb,
- 0xb3, 0x0f, 0xaa, 0xb8, 0xc7, 0xc8, 0x1e, 0x0f, 0xab, 0x11, 0xc7, 0xcb,
- 0xb3, 0x0f, 0xaa, 0xb0, 0xc7, 0xc8, 0x1e, 0x0f, 0xab, 0x09, 0xc7, 0xcb,
- 0xb3, 0x0f, 0xaa, 0xa8, 0xc7, 0xc8, 0x1e, 0x0f, 0xab, 0x01, 0xc7, 0xcb,
- 0xb3, 0x0f, 0xaa, 0xa0, 0xc7, 0xc8, 0x1e, 0x0f, 0xaa, 0xf9, 0xc7, 0xcb,
- 0xb3, 0x0f, 0xaa, 0x98, 0x00, 0xc2, 0xaa, 0x79, 0xc9, 0xb0, 0xba, 0x01,
- 0x36, 0x90, 0x0d, 0xc2, 0xaa, 0x88, 0xc5, 0xd9, 0x80, 0x01, 0x93, 0x0b,
- 0x02, 0xaa, 0x9a, 0x16, 0xc2, 0xaa, 0xa0, 0xc5, 0xd6, 0x3d, 0x01, 0x93,
- 0x1b, 0x02, 0xaa, 0xb2, 0xc5, 0xdb, 0x51, 0x01, 0x93, 0x23, 0x02, 0xaa,
- 0xb8, 0x12, 0xc2, 0xaa, 0xbe, 0xc4, 0xac, 0xd8, 0x01, 0x93, 0x33, 0x02,
- 0xaa, 0xd0, 0xc5, 0xbb, 0xa0, 0x01, 0x93, 0x3b, 0x02, 0xaa, 0xd6, 0x05,
- 0xc2, 0xaa, 0xda, 0xc5, 0x98, 0x41, 0x01, 0x93, 0x6a, 0x02, 0xaa, 0xec,
- 0xc4, 0x0e, 0xa8, 0x01, 0x39, 0x51, 0xc6, 0x1d, 0x59, 0x01, 0x4d, 0xf0,
- 0x44, 0x06, 0x99, 0xc2, 0xaa, 0xf2, 0x48, 0x2f, 0x38, 0x42, 0xab, 0x16,
- 0xca, 0x2d, 0x6b, 0x01, 0x14, 0xc9, 0x0e, 0x42, 0xab, 0x22, 0x4d, 0x26,
- 0xea, 0xc2, 0xab, 0x28, 0x4f, 0x07, 0x17, 0x42, 0xab, 0x90, 0x42, 0x00,
- 0x4b, 0xc2, 0xab, 0xf8, 0x44, 0x0c, 0x54, 0xc2, 0xac, 0x07, 0xc2, 0x00,
- 0x4d, 0x01, 0x23, 0x4a, 0x02, 0xac, 0x14, 0x44, 0x01, 0xdc, 0xc2, 0xac,
- 0x1a, 0xc5, 0x6a, 0x79, 0x01, 0x23, 0x50, 0x45, 0x18, 0x83, 0xc2, 0xac,
- 0x26, 0x43, 0x26, 0x51, 0x42, 0xac, 0x32, 0x43, 0x13, 0xf0, 0xc2, 0xac,
- 0x3e, 0x11, 0x42, 0xac, 0x4b, 0xc5, 0x03, 0x67, 0x01, 0x1c, 0x50, 0xd6,
- 0x2f, 0x2d, 0x01, 0x4d, 0xe1, 0xc6, 0x01, 0x0e, 0x0f, 0x88, 0x70, 0xe0,
- 0x0b, 0x27, 0x01, 0x51, 0xb0, 0x03, 0xc2, 0xac, 0x5a, 0xc8, 0x2c, 0x41,
- 0x01, 0x92, 0x21, 0x0d, 0xc2, 0xac, 0x72, 0x15, 0xc2, 0xac, 0x7e, 0xc3,
- 0x01, 0xb4, 0x01, 0x94, 0x01, 0x16, 0xc2, 0xac, 0xa2, 0x08, 0xc2, 0xac,
- 0xb4, 0x07, 0xc2, 0xac, 0xc4, 0x10, 0xc2, 0xac, 0xdc, 0x0f, 0xc2, 0xac,
- 0xe6, 0x19, 0xc2, 0xac, 0xf6, 0x0a, 0xc2, 0xad, 0x02, 0x05, 0xc2, 0xad,
- 0x0e, 0x0e, 0xc2, 0xad, 0x18, 0xc5, 0xb8, 0x87, 0x01, 0x94, 0xf1, 0xc4,
- 0xaf, 0xff, 0x01, 0x95, 0x01, 0x14, 0x42, 0xad, 0x2a, 0x85, 0x0f, 0x89,
- 0x59, 0x94, 0x0f, 0x89, 0x60, 0xc6, 0xd2, 0x9d, 0x01, 0x93, 0xe1, 0xc5,
- 0xdf, 0x4d, 0x01, 0x93, 0xe8, 0x83, 0x01, 0x96, 0x81, 0x8b, 0x01, 0x96,
- 0x89, 0x97, 0x01, 0x96, 0x91, 0x87, 0x01, 0x96, 0x99, 0x91, 0x01, 0x96,
- 0xa1, 0x0d, 0xc2, 0xad, 0x34, 0x15, 0xc2, 0xad, 0x48, 0x16, 0xc2, 0xad,
- 0x5c, 0x10, 0xc2, 0xad, 0x70, 0x0a, 0xc2, 0xad, 0x84, 0x0f, 0xc2, 0xad,
- 0x98, 0x1b, 0xc2, 0xad, 0xac, 0x14, 0xc2, 0xad, 0xb8, 0x19, 0x42, 0xad,
- 0xcc, 0xe0, 0x03, 0xc7, 0x01, 0x2e, 0xa8, 0xd4, 0x3c, 0x29, 0x01, 0x2e,
- 0xa1, 0xca, 0x1d, 0xed, 0x01, 0x2e, 0x98, 0xcf, 0x69, 0x3e, 0x01, 0x2e,
- 0x91, 0xce, 0x6b, 0x2e, 0x01, 0x2e, 0x80, 0xe0, 0x04, 0x87, 0x01, 0x4e,
- 0x18, 0xd8, 0x21, 0xf0, 0x01, 0x4e, 0x11, 0xcd, 0x77, 0x98, 0x01, 0x4d,
- 0xd8, 0x47, 0x01, 0x2c, 0x42, 0xad, 0xdc, 0xd1, 0x54, 0x80, 0x09, 0x1a,
- 0xf9, 0xc4, 0x59, 0x55, 0x09, 0x1a, 0xf0, 0xca, 0xa7, 0x7a, 0x09, 0x1b,
- 0x38, 0x47, 0x01, 0x2c, 0xc2, 0xad, 0xe6, 0xc2, 0x0b, 0xc6, 0x09, 0x1a,
- 0x7a, 0x02, 0xae, 0x29, 0x00, 0x42, 0xae, 0x2f, 0xa0, 0x09, 0x19, 0xb0,
- 0xc7, 0x6e, 0x09, 0x09, 0x19, 0x51, 0xcb, 0x98, 0xca, 0x09, 0x19, 0x48,
- 0xc2, 0x01, 0x32, 0x09, 0x18, 0x68, 0xda, 0x19, 0x83, 0x09, 0x18, 0x81,
- 0xcc, 0x82, 0x3c, 0x09, 0x18, 0x79, 0xd7, 0x29, 0x68, 0x09, 0x18, 0x70,
- 0xc2, 0x00, 0xc3, 0x09, 0x1c, 0xc3, 0x02, 0xae, 0x3b, 0x97, 0x09, 0x19,
- 0x09, 0xc4, 0x56, 0x51, 0x09, 0x19, 0x01, 0xc5, 0x01, 0x27, 0x09, 0x18,
- 0xf0, 0x47, 0x01, 0x2c, 0x42, 0xae, 0x41, 0xcd, 0x78, 0xd0, 0x09, 0x1a,
- 0xd8, 0xc4, 0x3d, 0x51, 0x09, 0x1a, 0xa9, 0xc2, 0x0b, 0xf2, 0x09, 0x1a,
- 0x9b, 0x02, 0xae, 0x4d, 0x83, 0x09, 0x1a, 0x90, 0xc7, 0x6e, 0x09, 0x09,
- 0x18, 0xd3, 0x02, 0xae, 0x51, 0xc4, 0x39, 0x41, 0x09, 0x18, 0xc9, 0x46,
- 0x01, 0x2d, 0xc2, 0xae, 0x57, 0xc6, 0xd1, 0x23, 0x09, 0x18, 0xa0, 0x47,
- 0x01, 0x2c, 0x42, 0xae, 0x6c, 0xd4, 0x3d, 0xb9, 0x09, 0x18, 0x50, 0xc9,
- 0xb4, 0xf2, 0x09, 0x29, 0xc8, 0x47, 0x01, 0x2c, 0x42, 0xae, 0x78, 0x00,
- 0x42, 0xae, 0x96, 0xc4, 0x39, 0x41, 0x09, 0x17, 0x79, 0x46, 0x01, 0x2d,
- 0xc2, 0xae, 0xa2, 0xc8, 0x0b, 0x7f, 0x09, 0x17, 0x60, 0x00, 0x42, 0xae,
- 0xae, 0xca, 0x3d, 0x4b, 0x09, 0x29, 0xc1, 0xc4, 0x39, 0x41, 0x09, 0x16,
- 0xe0, 0xa1, 0x09, 0x16, 0xf2, 0x02, 0xae, 0xbd, 0x9f, 0x09, 0x16, 0xcb,
- 0x02, 0xae, 0xc3, 0xc3, 0x31, 0xb5, 0x09, 0x16, 0xd1, 0xd2, 0x49, 0x14,
- 0x09, 0x16, 0xc0, 0x00, 0xc2, 0xae, 0xc9, 0xc2, 0x00, 0xe5, 0x09, 0x16,
- 0x03, 0x02, 0xae, 0xde, 0x90, 0x09, 0x15, 0xf9, 0xc2, 0xdb, 0x4b, 0x09,
- 0x15, 0xf0, 0xa3, 0x09, 0x15, 0xbb, 0x02, 0xae, 0xe8, 0xc2, 0x3e, 0xab,
- 0x09, 0x15, 0xc9, 0xc2, 0xe7, 0x04, 0x09, 0x15, 0xc1, 0xa0, 0x09, 0x15,
- 0x72, 0x02, 0xae, 0xee, 0xc2, 0x00, 0xb3, 0x09, 0x16, 0xb1, 0x94, 0x09,
- 0x16, 0x9b, 0x02, 0xae, 0xf4, 0xc3, 0x7c, 0xf9, 0x09, 0x16, 0x91, 0x8f,
- 0x09, 0x16, 0x33, 0x02, 0xae, 0xf8, 0x86, 0x09, 0x16, 0x1a, 0x02, 0xae,
- 0xfe, 0x00, 0x42, 0xaf, 0x04, 0xd1, 0x54, 0xe6, 0x09, 0x15, 0x50, 0xa6,
- 0x09, 0x17, 0x50, 0xc3, 0x00, 0xe4, 0x09, 0x17, 0x40, 0x9f, 0x09, 0x17,
- 0x28, 0xc3, 0xe6, 0x76, 0x09, 0x12, 0x93, 0x02, 0xaf, 0x1f, 0xa6, 0x09,
- 0x1c, 0x80, 0x49, 0x3e, 0xad, 0x42, 0xaf, 0x25, 0x00, 0x42, 0xaf, 0x31,
- 0xc2, 0x4e, 0x2b, 0x09, 0x13, 0x6b, 0x02, 0xaf, 0x43, 0x00, 0x42, 0xaf,
- 0x47, 0x9f, 0x09, 0x12, 0x39, 0xc8, 0xb9, 0x2d, 0x09, 0x12, 0x28, 0x94,
- 0x09, 0x12, 0x21, 0x00, 0x42, 0xaf, 0x62, 0xc7, 0x6e, 0x09, 0x09, 0x12,
- 0x59, 0x46, 0x01, 0x2d, 0x42, 0xaf, 0x74, 0x00, 0xc2, 0xaf, 0x7e, 0xa0,
- 0x09, 0x11, 0xca, 0x02, 0xaf, 0x93, 0xc5, 0x39, 0x40, 0x09, 0x11, 0x78,
- 0x8a, 0x09, 0x1c, 0x60, 0x9f, 0x09, 0x11, 0x38, 0xc4, 0x39, 0x41, 0x09,
- 0x11, 0x11, 0xca, 0x3d, 0x4b, 0x09, 0x11, 0x08, 0x00, 0x42, 0xaf, 0x97,
- 0xc9, 0xb4, 0xe0, 0x09, 0x10, 0xf2, 0x02, 0xaf, 0xb1, 0x00, 0x42, 0xaf,
- 0xb7, 0x24, 0xc2, 0xaf, 0xc1, 0x23, 0xc2, 0xaf, 0xcd, 0xc3, 0xe6, 0xfa,
- 0x09, 0x27, 0xf9, 0x21, 0xc2, 0xaf, 0xeb, 0x20, 0xc2, 0xb0, 0x03, 0x1f,
- 0xc2, 0xb0, 0x11, 0x1e, 0xc2, 0xb0, 0x23, 0x1d, 0x42, 0xb0, 0x2f, 0x84,
- 0x09, 0x0d, 0xc3, 0x02, 0xb0, 0x59, 0x94, 0x09, 0x0f, 0x62, 0x02, 0xb0,
- 0x5d, 0xca, 0x54, 0x87, 0x09, 0x0f, 0xaa, 0x02, 0xb0, 0x61, 0xca, 0x8f,
- 0xc5, 0x09, 0x0f, 0x98, 0x97, 0x09, 0x0c, 0x3b, 0x02, 0xb0, 0x67, 0x0d,
- 0xc2, 0xb0, 0x88, 0x04, 0xc2, 0xb0, 0x96, 0x16, 0xc2, 0xb0, 0xa2, 0x15,
- 0xc2, 0xb0, 0xac, 0x12, 0xc2, 0xb0, 0xc3, 0x0e, 0xc2, 0xb0, 0xcb, 0xcd,
- 0x0b, 0xfa, 0x09, 0x1c, 0x11, 0x09, 0xc2, 0xb0, 0xd6, 0x83, 0x09, 0x0a,
- 0xc3, 0x02, 0xb0, 0xeb, 0xc2, 0x30, 0xd9, 0x09, 0x0c, 0x61, 0xc2, 0x05,
- 0xd4, 0x09, 0x0b, 0xe9, 0x10, 0xc2, 0xb0, 0xfe, 0x0f, 0xc2, 0xb1, 0x08,
- 0x0b, 0xc2, 0xb1, 0x16, 0x07, 0x42, 0xb1, 0x20, 0x00, 0x42, 0xb1, 0x2c,
- 0xa1, 0x09, 0x0c, 0xd9, 0x9f, 0x09, 0x0c, 0xd0, 0x00, 0x42, 0xb1, 0x38,
- 0xcf, 0x66, 0xb9, 0x09, 0x0c, 0xb0, 0xa2, 0x09, 0x0c, 0x9b, 0x02, 0xb1,
- 0x44, 0xa1, 0x09, 0x0c, 0x91, 0xa0, 0x09, 0x0c, 0x89, 0x9f, 0x09, 0x0c,
- 0x80, 0xcd, 0x7a, 0x15, 0x09, 0x0c, 0x70, 0xcd, 0x7b, 0x67, 0x09, 0x0d,
- 0xa0, 0xc5, 0x39, 0x40, 0x09, 0x0d, 0x88, 0xcd, 0x79, 0x38, 0x09, 0x0d,
- 0x70, 0xe0, 0x0b, 0xe7, 0x09, 0x0d, 0x58, 0xc3, 0x6b, 0x5d, 0x09, 0x0d,
- 0x43, 0x02, 0xb1, 0x4a, 0x8a, 0x09, 0x0d, 0x39, 0xc2, 0x00, 0x92, 0x09,
- 0x0d, 0x30, 0x97, 0x09, 0x0d, 0x13, 0x02, 0xb1, 0x50, 0xc3, 0x61, 0x9a,
- 0x09, 0x0d, 0x08, 0xc3, 0x00, 0xe4, 0x09, 0x09, 0x73, 0x02, 0xb1, 0x54,
- 0x97, 0x09, 0x09, 0xb1, 0xc3, 0x0a, 0x91, 0x09, 0x09, 0xa9, 0xc3, 0x1f,
- 0xd8, 0x09, 0x09, 0xa1, 0xc3, 0x32, 0x2b, 0x09, 0x09, 0x99, 0xc3, 0x1c,
- 0x42, 0x09, 0x09, 0x91, 0xc4, 0x05, 0x19, 0x09, 0x09, 0x89, 0xc3, 0x61,
- 0x9a, 0x09, 0x09, 0x80, 0xc4, 0x59, 0x55, 0x09, 0x09, 0x53, 0x02, 0xb1,
- 0x5e, 0xc4, 0x39, 0x41, 0x09, 0x09, 0x58, 0x47, 0x01, 0x2c, 0x42, 0xb1,
- 0x64, 0x00, 0x42, 0xb1, 0x82, 0x00, 0x42, 0xb1, 0x94, 0x17, 0xc2, 0xb1,
- 0xa0, 0xa4, 0x09, 0x09, 0x30, 0xca, 0x9b, 0x68, 0x09, 0x09, 0x20, 0x8a,
- 0x09, 0x08, 0x8b, 0x02, 0xb1, 0xaa, 0xc2, 0x00, 0x92, 0x09, 0x08, 0x80,
- 0xa0, 0x09, 0x08, 0x53, 0x02, 0xb1, 0xae, 0x9f, 0x09, 0x08, 0x42, 0x02,
- 0xb1, 0xb4, 0x00, 0x42, 0xb1, 0xba, 0xcb, 0x4d, 0x99, 0x09, 0x08, 0x19,
- 0x46, 0x01, 0x2d, 0x42, 0xb1, 0xc6, 0x47, 0x01, 0x2c, 0x42, 0xb1, 0xce,
- 0x00, 0x42, 0xb1, 0xd8, 0x00, 0x42, 0xb1, 0xe4, 0xa0, 0x09, 0x07, 0xe0,
- 0x9f, 0x09, 0x07, 0xba, 0x02, 0xb1, 0xf0, 0xc2, 0x00, 0xc2, 0x09, 0x07,
- 0xa1, 0xda, 0x1c, 0x41, 0x09, 0x07, 0x98, 0xd6, 0x1c, 0x45, 0x09, 0x07,
- 0x88, 0x46, 0x01, 0x2d, 0xc2, 0xb1, 0xf4, 0x4e, 0x6e, 0x09, 0x42, 0xb2,
- 0x2f, 0xc2, 0x5c, 0x57, 0x09, 0x25, 0x58, 0xc3, 0x03, 0xa4, 0x09, 0x25,
- 0x51, 0xc3, 0x54, 0x8e, 0x09, 0x25, 0x49, 0x97, 0x09, 0x04, 0x99, 0x15,
- 0xc2, 0xb2, 0x59, 0xc2, 0x00, 0x50, 0x09, 0x04, 0x81, 0xc3, 0x1c, 0x4f,
- 0x09, 0x04, 0x79, 0xd1, 0x4f, 0xa7, 0x09, 0x04, 0x70, 0xc7, 0x07, 0x09,
- 0x09, 0x04, 0xe9, 0xcb, 0x8f, 0x35, 0x09, 0x04, 0xe1, 0xcb, 0x99, 0x22,
- 0x09, 0x04, 0xd9, 0x46, 0x01, 0x2d, 0x42, 0xb2, 0x65, 0x47, 0x01, 0x2c,
- 0xc2, 0xb2, 0x74, 0xc2, 0x04, 0xdd, 0x09, 0x04, 0x10, 0x47, 0x01, 0x2c,
- 0xc2, 0xb2, 0xac, 0x9f, 0x09, 0x04, 0x00, 0xa1, 0x09, 0x04, 0x41, 0xa0,
- 0x09, 0x04, 0x2a, 0x02, 0xb2, 0xb8, 0xc7, 0x6e, 0x09, 0x09, 0x03, 0xe9,
- 0xc4, 0x39, 0x41, 0x09, 0x03, 0xe1, 0xc7, 0xcb, 0xac, 0x09, 0x03, 0xd8,
- 0x9f, 0x09, 0x03, 0xb3, 0x02, 0xb2, 0xc1, 0x47, 0x01, 0x2c, 0x42, 0xb2,
- 0xc7, 0xc9, 0x9f, 0x5a, 0x09, 0x1b, 0xa8, 0xd3, 0x47, 0x3f, 0x09, 0x03,
- 0xc0, 0x00, 0xc2, 0xb2, 0xd9, 0xa0, 0x09, 0x1b, 0xa0, 0x03, 0x42, 0xb2,
- 0xe5, 0x48, 0xbc, 0x2d, 0xc2, 0xb2, 0xed, 0xcb, 0x99, 0x0c, 0x09, 0x02,
- 0x80, 0x9f, 0x09, 0x02, 0xa0, 0xcb, 0x99, 0x90, 0x09, 0x02, 0x90, 0x47,
- 0x01, 0x2c, 0x42, 0xb2, 0xff, 0xd0, 0x5b, 0x92, 0x09, 0x24, 0x18, 0xc2,
- 0x61, 0x9f, 0x09, 0x02, 0x40, 0xc2, 0x01, 0x28, 0x09, 0x02, 0x31, 0xc9,
- 0xb4, 0x7d, 0x09, 0x02, 0x28, 0xc8, 0x66, 0xc0, 0x09, 0x02, 0x61, 0xc3,
- 0x1c, 0x4f, 0x09, 0x02, 0x59, 0x83, 0x09, 0x02, 0x50, 0x46, 0x01, 0x2d,
- 0xc2, 0xb3, 0x11, 0xc4, 0x39, 0x41, 0x09, 0x00, 0xa8, 0x47, 0x01, 0x2c,
- 0x42, 0xb3, 0x48, 0xc3, 0xe7, 0x8a, 0x09, 0x1b, 0x91, 0xc3, 0x0a, 0x91,
- 0x09, 0x01, 0x60, 0xc3, 0x01, 0x29, 0x09, 0x01, 0xf9, 0x9f, 0x09, 0x01,
- 0xf1, 0x00, 0x42, 0xb3, 0x6a, 0xca, 0x54, 0x87, 0x09, 0x01, 0xa8, 0x4a,
- 0xa2, 0xc0, 0xc2, 0xb3, 0x7c, 0xcb, 0x90, 0x48, 0x09, 0x01, 0x79, 0xc7,
- 0xca, 0xd3, 0x09, 0x01, 0x70, 0xc3, 0x79, 0x0e, 0x09, 0x01, 0x41, 0xc3,
- 0x0a, 0x91, 0x09, 0x01, 0x39, 0x0d, 0xc2, 0xb3, 0x88, 0xc2, 0x00, 0xa4,
- 0x09, 0x01, 0x21, 0xc4, 0x3d, 0x46, 0x09, 0x01, 0x19, 0xc4, 0xe4, 0x1b,
- 0x09, 0x01, 0x11, 0xc2, 0x00, 0x92, 0x09, 0x01, 0x08, 0xcf, 0x6a, 0xe2,
- 0x09, 0x00, 0xf9, 0xc5, 0xa1, 0xe9, 0x09, 0x00, 0xf0, 0x9f, 0x09, 0x1c,
- 0xa9, 0xc2, 0x00, 0xcc, 0x09, 0x14, 0x52, 0x02, 0xb3, 0x92, 0xcb, 0x99,
- 0x43, 0x09, 0x14, 0x49, 0x46, 0x01, 0x2d, 0x42, 0xb3, 0x96, 0xc7, 0x07,
- 0x09, 0x09, 0x0a, 0x91, 0xcb, 0x8f, 0x2a, 0x09, 0x0a, 0x89, 0xcb, 0x99,
- 0x2d, 0x09, 0x0a, 0x81, 0xca, 0x3d, 0x4b, 0x09, 0x0a, 0x78, 0x00, 0x42,
- 0xb3, 0xb3, 0xc7, 0x07, 0x09, 0x09, 0x0a, 0x21, 0xc3, 0x31, 0xb5, 0x09,
- 0x0a, 0x18, 0xcd, 0x79, 0x38, 0x09, 0x23, 0x70, 0xc2, 0x00, 0x56, 0x09,
- 0x22, 0x49, 0xa1, 0x09, 0x22, 0x41, 0xa0, 0x09, 0x22, 0x38, 0xcd, 0x79,
- 0x38, 0x09, 0x23, 0x68, 0xa0, 0x09, 0x22, 0x28, 0xc4, 0x43, 0xcc, 0x09,
- 0x23, 0x41, 0xc4, 0x47, 0x9b, 0x09, 0x23, 0x38, 0xcd, 0x79, 0x38, 0x09,
- 0x23, 0x60, 0x00, 0xc2, 0xb3, 0xcb, 0xa0, 0x09, 0x22, 0x08, 0xcd, 0x79,
- 0x38, 0x09, 0x23, 0x58, 0xc5, 0x59, 0x54, 0x09, 0x22, 0x70, 0xcd, 0x79,
- 0x38, 0x09, 0x23, 0x50, 0xca, 0x9d, 0x5c, 0x09, 0x22, 0xe1, 0x43, 0x02,
- 0xd0, 0x42, 0xb3, 0xd3, 0xc3, 0x5b, 0xda, 0x09, 0x22, 0xa3, 0x02, 0xb3,
- 0xdb, 0xc3, 0x31, 0xd5, 0x09, 0x21, 0xc8, 0xc5, 0x59, 0x54, 0x09, 0x22,
- 0x68, 0x97, 0x09, 0x21, 0x11, 0x9f, 0x09, 0x20, 0xc8, 0xcd, 0x79, 0x38,
- 0x09, 0x23, 0x48, 0xc3, 0x5b, 0xda, 0x09, 0x22, 0x93, 0x02, 0xb3, 0xe1,
- 0xc3, 0x31, 0xd5, 0x09, 0x21, 0xc0, 0xc5, 0x59, 0x54, 0x09, 0x22, 0x60,
- 0x00, 0xc2, 0xb3, 0xe7, 0xa1, 0x09, 0x21, 0xe8, 0x97, 0x09, 0x21, 0x81,
- 0x9f, 0x09, 0x21, 0x30, 0x97, 0x09, 0x21, 0x09, 0x9f, 0x09, 0x20, 0xc0,
- 0xc3, 0x9b, 0x02, 0x09, 0x23, 0x19, 0xc3, 0x00, 0xe4, 0x09, 0x23, 0x00,
- 0xc9, 0xae, 0xef, 0x09, 0x22, 0xf9, 0xc4, 0xd7, 0xf1, 0x09, 0x22, 0xc0,
- 0xce, 0x4f, 0x96, 0x09, 0x22, 0xe9, 0xc4, 0x05, 0x19, 0x09, 0x22, 0xd0,
- 0xc3, 0x5b, 0xda, 0x09, 0x22, 0x79, 0xc3, 0x31, 0xd5, 0x09, 0x21, 0xa0,
- 0x97, 0x09, 0x20, 0xf1, 0x9f, 0x09, 0x20, 0xa8, 0xce, 0x4f, 0x96, 0x09,
- 0x22, 0xf1, 0xc4, 0x05, 0x19, 0x09, 0x22, 0xd8, 0xc3, 0x5b, 0xda, 0x09,
- 0x22, 0x81, 0xc3, 0x31, 0xd5, 0x09, 0x21, 0xa8, 0xc5, 0x59, 0x54, 0x09,
- 0x22, 0x50, 0x97, 0x09, 0x21, 0x69, 0x9f, 0x09, 0x21, 0x18, 0x97, 0x09,
- 0x20, 0xf9, 0x9f, 0x09, 0x20, 0xb0, 0xc3, 0x5b, 0xda, 0x09, 0x22, 0x89,
- 0xc3, 0x31, 0xd5, 0x09, 0x21, 0xb2, 0x02, 0xb3, 0xef, 0xc5, 0x59, 0x54,
- 0x09, 0x22, 0x58, 0xc2, 0xe7, 0x76, 0x09, 0x21, 0xd9, 0xc2, 0xe7, 0xfe,
- 0x09, 0x21, 0xd0, 0x97, 0x09, 0x21, 0x73, 0x02, 0xb3, 0xf5, 0x9f, 0x09,
- 0x21, 0x22, 0x02, 0xb3, 0xfb, 0x97, 0x09, 0x21, 0x01, 0x9f, 0x09, 0x20,
- 0xb8, 0xc3, 0x00, 0x57, 0x01, 0x16, 0x79, 0xc2, 0x00, 0xbf, 0x01, 0x16,
- 0x70, 0xc2, 0x01, 0xf0, 0x0f, 0x03, 0x51, 0x87, 0x0f, 0x03, 0x48, 0xcb,
- 0x92, 0x58, 0x00, 0xe4, 0x41, 0x46, 0x00, 0x6b, 0xc2, 0xb4, 0x01, 0x8d,
- 0x00, 0x23, 0xca, 0x02, 0xb4, 0x0b, 0x44, 0x04, 0x75, 0xc2, 0xb4, 0x11,
- 0xce, 0x6e, 0xf7, 0x00, 0xe4, 0x29, 0x87, 0x00, 0x22, 0x13, 0x02, 0xb4,
- 0x23, 0x15, 0xc2, 0xb4, 0x29, 0xc2, 0x00, 0x4b, 0x05, 0x34, 0x69, 0xc3,
- 0x26, 0x9b, 0x05, 0x34, 0x98, 0xc6, 0xd1, 0x5f, 0x00, 0xe4, 0x19, 0x87,
- 0x00, 0x28, 0xe8, 0xc7, 0x1f, 0xcd, 0x00, 0xe4, 0x11, 0xca, 0x9d, 0x0c,
- 0x05, 0x32, 0x79, 0xc2, 0x1b, 0xa5, 0x00, 0x22, 0xd0, 0xcd, 0x7e, 0xc1,
- 0x00, 0xe4, 0x09, 0xc2, 0x00, 0xa4, 0x00, 0x28, 0xa9, 0xc2, 0x1b, 0xa5,
- 0x00, 0x22, 0xc9, 0xc9, 0x52, 0x68, 0x00, 0x23, 0x38, 0x44, 0x0e, 0x49,
- 0xc2, 0xb4, 0x3f, 0xc2, 0x00, 0xa4, 0x00, 0x28, 0xb9, 0x48, 0x10, 0x90,
- 0x42, 0xb4, 0x4b, 0x8e, 0x00, 0x21, 0xdb, 0x02, 0xb4, 0x63, 0x90, 0x00,
- 0x21, 0xeb, 0x02, 0xb4, 0x69, 0xcf, 0x66, 0xaa, 0x00, 0x27, 0x69, 0x8f,
- 0x00, 0x21, 0xe3, 0x02, 0xb4, 0x6f, 0x95, 0x00, 0x22, 0x0b, 0x02, 0xb4,
- 0x75, 0x94, 0x00, 0x22, 0x03, 0x02, 0xb4, 0x7b, 0x88, 0x00, 0x22, 0x20,
- 0xc3, 0x26, 0x9b, 0x00, 0x29, 0x69, 0x1c, 0xc2, 0xb4, 0x81, 0x46, 0x06,
- 0xf2, 0xc2, 0xb4, 0x98, 0xc2, 0x1b, 0xa5, 0x00, 0x22, 0x93, 0x02, 0xb4,
- 0xa2, 0x87, 0x00, 0x21, 0xa1, 0xc2, 0x00, 0x4b, 0x05, 0x34, 0x08, 0x0a,
- 0xc2, 0xb4, 0xa8, 0xc4, 0x73, 0xed, 0x00, 0x26, 0xcb, 0x02, 0xb4, 0xc7,
- 0xc9, 0xb2, 0xfa, 0x00, 0x25, 0x7b, 0x02, 0xb4, 0xcd, 0xcc, 0x81, 0x88,
- 0x00, 0x24, 0x69, 0x44, 0x62, 0x1d, 0x42, 0xb4, 0xd3, 0x87, 0x00, 0x21,
- 0xfb, 0x02, 0xb4, 0xe3, 0xc7, 0xbc, 0x96, 0x00, 0x26, 0x79, 0xc2, 0x00,
- 0xcb, 0x00, 0x23, 0x88, 0xc7, 0xc2, 0xc2, 0x00, 0x28, 0xf9, 0x49, 0xaa,
- 0x42, 0xc2, 0xb4, 0xe9, 0x46, 0x00, 0x6b, 0x42, 0xb4, 0xfe, 0x83, 0x00,
- 0x22, 0x7b, 0x02, 0xb5, 0x0a, 0xc3, 0x70, 0xed, 0x00, 0x22, 0x5b, 0x02,
- 0xb5, 0x12, 0x90, 0x05, 0x32, 0xf9, 0x97, 0x00, 0x22, 0x71, 0x8b, 0x00,
- 0x22, 0xb8, 0x11, 0xc2, 0xb5, 0x18, 0xcd, 0x7a, 0x22, 0x00, 0x26, 0x61,
- 0x83, 0x00, 0x21, 0xd3, 0x02, 0xb5, 0x24, 0xc2, 0x1b, 0xa5, 0x00, 0x22,
- 0xe1, 0xc2, 0x00, 0xcb, 0x00, 0x23, 0x78, 0x83, 0x00, 0x22, 0x2b, 0x02,
- 0xb5, 0x2a, 0xc2, 0x00, 0x4b, 0x05, 0x34, 0xa8, 0xc2, 0x01, 0xf0, 0x00,
- 0x21, 0x9b, 0x02, 0xb5, 0x36, 0xc2, 0x1b, 0xa5, 0x00, 0x22, 0x98, 0x03,
- 0xc2, 0xb5, 0x3c, 0xca, 0xa1, 0x1c, 0x05, 0x32, 0x69, 0x87, 0x00, 0x21,
- 0x89, 0xca, 0xa6, 0x12, 0x05, 0x32, 0xd9, 0x0b, 0xc2, 0xb5, 0x4b, 0xd7,
- 0x05, 0xd0, 0x00, 0x22, 0xb0, 0xcf, 0x66, 0xaa, 0x00, 0x27, 0x39, 0xc4,
- 0x70, 0xd8, 0x00, 0x23, 0x0b, 0x02, 0xb5, 0x57, 0x96, 0x00, 0x23, 0xf8,
- 0x46, 0x00, 0x6b, 0xc2, 0xb5, 0x5d, 0x87, 0x00, 0x21, 0xab, 0x02, 0xb5,
- 0x6f, 0xc6, 0xcd, 0x75, 0x00, 0x23, 0xab, 0x02, 0xb5, 0x75, 0x91, 0x00,
- 0x22, 0x8a, 0x02, 0xb5, 0x7b, 0x87, 0x00, 0x21, 0xbb, 0x02, 0xb5, 0x7f,
- 0x0a, 0x42, 0xb5, 0x8b, 0xc2, 0x01, 0xf0, 0x00, 0x22, 0x3b, 0x02, 0xb5,
- 0x98, 0xc8, 0xbe, 0xbd, 0x05, 0x34, 0xd9, 0xd0, 0x52, 0x61, 0x05, 0x32,
- 0xc9, 0xc3, 0x26, 0x9b, 0x05, 0x34, 0x38, 0xc8, 0x87, 0xb0, 0x05, 0x32,
- 0x59, 0xc7, 0x80, 0xcf, 0x05, 0x33, 0x48, 0x46, 0x00, 0x6b, 0x42, 0xb5,
- 0x9e, 0x46, 0x00, 0x6b, 0x42, 0xb5, 0xb6, 0xca, 0x9f, 0xb4, 0x00, 0x26,
- 0x68, 0xcf, 0x66, 0x5f, 0x00, 0x25, 0x50, 0xca, 0xa0, 0x04, 0x00, 0x24,
- 0x70, 0x1c, 0xc2, 0xb5, 0xd4, 0x87, 0x00, 0x20, 0x2b, 0x02, 0xb5, 0xde,
- 0xc2, 0x01, 0xf0, 0x00, 0x20, 0x79, 0xc2, 0x00, 0x4b, 0x05, 0x34, 0x10,
- 0x91, 0x05, 0x34, 0xc1, 0xcb, 0x90, 0x3d, 0x05, 0x33, 0x60, 0xc2, 0x07,
- 0x8b, 0x05, 0x32, 0x40, 0xc2, 0x00, 0xa4, 0x00, 0x25, 0xd3, 0x02, 0xb5,
- 0xe4, 0x44, 0x2c, 0x01, 0xc2, 0xb5, 0xea, 0x83, 0x00, 0x21, 0x41, 0xc3,
- 0x1b, 0xb6, 0x00, 0x21, 0x49, 0xc2, 0x00, 0x4b, 0x05, 0x34, 0xb0, 0xcf,
- 0x66, 0xaa, 0x00, 0x26, 0xd0, 0xcc, 0x21, 0x84, 0x00, 0x25, 0x80, 0xc4,
- 0x00, 0xcb, 0x00, 0x21, 0x61, 0xc2, 0x00, 0x06, 0x05, 0x33, 0x11, 0x07,
- 0x42, 0xb5, 0xf5, 0x46, 0x00, 0x6b, 0x42, 0xb5, 0xfd, 0xc3, 0x98, 0x92,
- 0x00, 0x27, 0x01, 0xc3, 0x26, 0x9b, 0x00, 0x25, 0xe3, 0x02, 0xb6, 0x09,
- 0xc2, 0x00, 0xa4, 0x00, 0x25, 0x40, 0xc9, 0x1e, 0x42, 0x00, 0x26, 0x91,
- 0xc5, 0x1f, 0x0a, 0x00, 0x26, 0x80, 0x87, 0x00, 0x28, 0xc1, 0x96, 0x00,
- 0x23, 0x10, 0x46, 0x00, 0x6b, 0x42, 0xb6, 0x0f, 0xc2, 0x08, 0xc2, 0x00,
- 0x28, 0x81, 0xc3, 0x98, 0x92, 0x05, 0x32, 0x21, 0xc2, 0x48, 0x12, 0x05,
- 0x32, 0xa1, 0xc3, 0x09, 0x66, 0x05, 0x33, 0x00, 0x43, 0xd2, 0x0c, 0xc2,
- 0xb6, 0x1b, 0xc3, 0x7e, 0xe5, 0x00, 0x24, 0x00, 0x46, 0x00, 0x6b, 0x42,
- 0xb6, 0x3d, 0x46, 0x00, 0x6b, 0xc2, 0xb6, 0x55, 0xc7, 0x86, 0x25, 0x00,
- 0x21, 0x50, 0x46, 0x00, 0x6b, 0x42, 0xb6, 0x67, 0x46, 0x00, 0x6b, 0x42,
- 0xb6, 0x82, 0x06, 0xc2, 0xb6, 0x8c, 0xc6, 0x60, 0xe6, 0x00, 0x27, 0x70,
- 0xca, 0x05, 0xde, 0x00, 0x20, 0x20, 0xc6, 0xcb, 0xbb, 0x00, 0x27, 0x43,
- 0x02, 0xb6, 0x98, 0xc8, 0xbd, 0x65, 0x00, 0x25, 0x00, 0xc9, 0x90, 0x3f,
- 0x05, 0x33, 0x51, 0xc5, 0xc7, 0x4e, 0x00, 0x23, 0x50, 0xcb, 0x9a, 0xda,
- 0x00, 0x23, 0xe0, 0xc9, 0x1e, 0x42, 0x00, 0x27, 0x21, 0xc6, 0x60, 0xe6,
- 0x00, 0x27, 0x11, 0xc5, 0x1f, 0x94, 0x00, 0x20, 0x68, 0x46, 0x00, 0x6b,
- 0x42, 0xb6, 0x9e, 0xd9, 0x1f, 0x87, 0x00, 0x23, 0xb0, 0x16, 0x42, 0xb6,
- 0xaa, 0x47, 0x09, 0xf2, 0xc2, 0xb6, 0xb4, 0xc4, 0xe5, 0x53, 0x05, 0x32,
- 0x00, 0x87, 0x00, 0x20, 0xb3, 0x02, 0xb6, 0xc0, 0xc2, 0x00, 0x4b, 0x05,
- 0x34, 0x20, 0x46, 0x00, 0x6b, 0x42, 0xb6, 0xc6, 0xc2, 0x01, 0xf0, 0x00,
- 0x20, 0x59, 0x87, 0x00, 0x21, 0x31, 0xc2, 0x00, 0x4b, 0x05, 0x34, 0x71,
- 0xc2, 0x02, 0x60, 0x05, 0x34, 0x80, 0x84, 0x09, 0x7e, 0x70, 0x84, 0x09,
- 0x7c, 0xd8, 0xe0, 0x01, 0x87, 0x01, 0x01, 0xc8, 0xc8, 0x50, 0x00, 0x08,
- 0x8f, 0xa1, 0xc7, 0x0c, 0x4b, 0x08, 0x8f, 0x98, 0xc6, 0x18, 0x83, 0x08,
- 0x8f, 0x81, 0xc4, 0xcf, 0xf7, 0x08, 0x8f, 0x78, 0xc4, 0x43, 0xcc, 0x08,
- 0x8f, 0x71, 0xc4, 0x47, 0x9b, 0x08, 0x8f, 0x68, 0xc5, 0x0c, 0x54, 0x08,
- 0x8f, 0x61, 0xc5, 0x2a, 0x13, 0x08, 0x8f, 0x59, 0xc2, 0x00, 0x4d, 0x08,
- 0x8f, 0x50, 0xc4, 0x18, 0x83, 0x08, 0x8f, 0x39, 0xc2, 0x26, 0x51, 0x08,
- 0x8f, 0x30, 0xc3, 0x0c, 0x5b, 0x08, 0x8f, 0x29, 0xc3, 0x06, 0x9e, 0x08,
- 0x8f, 0x20, 0xc4, 0x04, 0x5e, 0x08, 0x8f, 0x19, 0xc2, 0x01, 0x47, 0x08,
- 0x8f, 0x10, 0xc5, 0x64, 0x78, 0x00, 0x6c, 0x29, 0xc6, 0x8e, 0xa0, 0x00,
- 0x6c, 0x31, 0x07, 0xc2, 0xb6, 0xd2, 0xc6, 0xcf, 0x85, 0x00, 0x6c, 0x99,
- 0xc6, 0xd2, 0xd9, 0x00, 0x6c, 0xb1, 0x4a, 0xa3, 0x7e, 0xc2, 0xb6, 0xde,
- 0xcb, 0x8e, 0x9b, 0x00, 0x6d, 0xc8, 0xc5, 0x64, 0x78, 0x00, 0x6c, 0x49,
- 0xc6, 0xcf, 0x85, 0x00, 0x6c, 0x51, 0x42, 0x05, 0xd4, 0xc2, 0xb7, 0x0a,
- 0x42, 0x12, 0x0a, 0x42, 0xb7, 0x16, 0xc5, 0x64, 0x78, 0x00, 0x6c, 0x59,
- 0xc6, 0xd2, 0xd9, 0x00, 0x6c, 0x60, 0xc5, 0x64, 0x78, 0x00, 0x6c, 0x89,
- 0xc6, 0xd0, 0xb1, 0x00, 0x6c, 0x90, 0xc5, 0x64, 0x78, 0x00, 0x6c, 0xa1,
- 0xc6, 0x64, 0x77, 0x00, 0x6c, 0xa8, 0x03, 0xc2, 0xb7, 0x22, 0x49, 0xaf,
- 0x37, 0x42, 0xb7, 0x2e, 0xc7, 0xc7, 0x30, 0x00, 0x6c, 0xf9, 0xc7, 0xc9,
- 0x6e, 0x00, 0x6d, 0x31, 0x06, 0x42, 0xb7, 0x40, 0xca, 0x4b, 0xb6, 0x00,
- 0x6d, 0x21, 0x42, 0x0c, 0x65, 0x42, 0xb7, 0x4c, 0xc7, 0xc3, 0x71, 0x00,
- 0x6d, 0x89, 0xc7, 0xc3, 0xef, 0x00, 0x6d, 0xe9, 0xc7, 0xca, 0x94, 0x00,
- 0x6e, 0x18, 0xc2, 0x01, 0x47, 0x00, 0x6f, 0x41, 0xc4, 0x04, 0x5e, 0x00,
- 0x6f, 0x48, 0xc3, 0x06, 0x9e, 0x00, 0x6f, 0x51, 0xc3, 0x0c, 0x5b, 0x00,
- 0x6f, 0x58, 0xc2, 0x26, 0x51, 0x00, 0x6f, 0x61, 0xc4, 0x18, 0x83, 0x00,
- 0x6f, 0x68, 0xca, 0xa5, 0xa4, 0x00, 0x6e, 0x81, 0xc8, 0xbb, 0x95, 0x00,
- 0x6e, 0x91, 0xc9, 0xb6, 0x24, 0x00, 0x6e, 0xa0, 0xc2, 0x04, 0x41, 0x00,
- 0x6e, 0xcb, 0x02, 0xb7, 0x58, 0xc5, 0xd8, 0xa9, 0x00, 0x6e, 0xd8, 0xca,
- 0xa7, 0x8e, 0x00, 0x6f, 0x91, 0xc9, 0x95, 0x7d, 0x00, 0x6f, 0x98, 0x1e,
- 0xc2, 0xb7, 0x5e, 0xa6, 0x0e, 0xd5, 0x41, 0xa5, 0x0e, 0xd5, 0x39, 0xa4,
- 0x0e, 0xd5, 0x31, 0xa3, 0x0e, 0xd5, 0x29, 0xa2, 0x0e, 0xd5, 0x21, 0xa1,
- 0x0e, 0xd5, 0x19, 0xa0, 0x0e, 0xd5, 0x11, 0x9f, 0x0e, 0xd5, 0x08, 0x4b,
- 0x43, 0xaf, 0xc2, 0xb7, 0x7a, 0x4a, 0x18, 0x91, 0x42, 0xb7, 0x95, 0xa3,
- 0x0e, 0xd4, 0xf9, 0xa2, 0x0e, 0xd4, 0xf1, 0xa1, 0x0e, 0xd4, 0xe9, 0xa0,
- 0x0e, 0xd4, 0xe1, 0x9f, 0x0e, 0xd4, 0xd8, 0x15, 0xc2, 0xb7, 0xad, 0x46,
- 0x17, 0xef, 0x42, 0xb7, 0xb9, 0xc8, 0x00, 0x2f, 0x0e, 0xd0, 0x48, 0xc9,
- 0xb4, 0x08, 0x0e, 0xd3, 0x71, 0xc5, 0xd9, 0xf3, 0x0e, 0xd3, 0x68, 0xc9,
- 0x65, 0xb1, 0x0e, 0xc8, 0xd1, 0x45, 0x04, 0x74, 0x42, 0xb7, 0xc5, 0xc8,
- 0x39, 0x95, 0x0e, 0xc8, 0xc1, 0xc6, 0x24, 0x18, 0x0e, 0xc8, 0xb0, 0xcc,
- 0x85, 0x0c, 0x0e, 0xd4, 0x31, 0xc5, 0xd8, 0x7c, 0x0e, 0xd4, 0x29, 0x42,
- 0x01, 0xf0, 0xc2, 0xb7, 0xd1, 0xc5, 0xd7, 0x2d, 0x0e, 0xd4, 0x19, 0xc5,
- 0x4b, 0x06, 0x0e, 0xd4, 0x10, 0xd0, 0x57, 0xe2, 0x0e, 0xd4, 0x01, 0xcf,
- 0x6b, 0xe1, 0x0e, 0xd3, 0xf8, 0x47, 0xc3, 0x32, 0xc2, 0xb7, 0xdd, 0xcb,
- 0x96, 0xc5, 0x0e, 0xd3, 0xb0, 0x00, 0xc2, 0xb7, 0xf9, 0xd2, 0x4a, 0xc4,
- 0x0e, 0xd2, 0x98, 0xd3, 0x43, 0xaf, 0x0e, 0xd3, 0xa1, 0x4a, 0x18, 0x91,
- 0x42, 0xb8, 0x05, 0x47, 0x02, 0x21, 0xc2, 0xb8, 0x11, 0xd3, 0x42, 0x7f,
- 0x0e, 0xd2, 0xf1, 0xd4, 0x3a, 0x99, 0x0e, 0xd2, 0xe9, 0x44, 0x01, 0x1a,
- 0xc2, 0xb8, 0x1d, 0xcc, 0x85, 0x18, 0x0e, 0xd2, 0xd1, 0xd0, 0x58, 0x62,
- 0x0e, 0xd2, 0xc8, 0xc7, 0x00, 0x48, 0x0e, 0xc8, 0x39, 0xc8, 0x39, 0x95,
- 0x0e, 0xc8, 0x31, 0xc6, 0x24, 0x18, 0x0e, 0xc8, 0x28, 0x00, 0x42, 0xb8,
- 0x29, 0xc3, 0x01, 0x64, 0x0e, 0xd1, 0x79, 0xc6, 0x00, 0x50, 0x0e, 0xd1,
- 0x71, 0xc4, 0x05, 0x2b, 0x0e, 0xd1, 0x68, 0xc7, 0xc3, 0x5c, 0x0e, 0xcc,
- 0x39, 0x49, 0xa9, 0x4f, 0x42, 0xb8, 0x3b, 0x4b, 0x98, 0x0f, 0xc2, 0xb8,
- 0x47, 0xc7, 0xc3, 0x5c, 0x0e, 0xca, 0x89, 0x49, 0xa9, 0x4f, 0x42, 0xb8,
- 0x59, 0x4a, 0x18, 0x91, 0xc2, 0xb8, 0x65, 0x4b, 0x43, 0xaf, 0x42, 0xb8,
- 0x72, 0xca, 0x46, 0x23, 0x0e, 0xd1, 0x01, 0xc4, 0x03, 0x68, 0x0e, 0xd0,
- 0xf9, 0xc2, 0x01, 0xc7, 0x0e, 0xd0, 0xf0, 0xc4, 0x9a, 0x40, 0x0e, 0xd0,
- 0xe9, 0x46, 0xd1, 0xcb, 0x42, 0xb8, 0x81, 0x44, 0x16, 0xc5, 0xc2, 0xb8,
- 0x8d, 0x45, 0x03, 0x2b, 0xc2, 0xb8, 0x99, 0xc6, 0x07, 0x41, 0x0e, 0xd0,
- 0xb1, 0xc8, 0xb9, 0x9d, 0x0e, 0xd0, 0xa9, 0xc4, 0x01, 0x75, 0x0e, 0xd0,
- 0xa0, 0xc4, 0x03, 0x68, 0x0e, 0xd0, 0x61, 0xc7, 0x89, 0xd9, 0x0e, 0xd0,
- 0x59, 0xc2, 0x01, 0xc7, 0x0e, 0xd0, 0x50, 0x08, 0xc2, 0xb8, 0xa5, 0xc5,
- 0x01, 0x93, 0x0e, 0xc4, 0x2b, 0x02, 0xb8, 0xb7, 0x0a, 0xc2, 0xb8, 0xbb,
- 0x05, 0xc2, 0xb8, 0xcd, 0xc4, 0x3f, 0x3e, 0x0e, 0xc3, 0xba, 0x02, 0xb8,
- 0xe3, 0x48, 0x52, 0x03, 0xc2, 0xb8, 0xe7, 0xc3, 0x18, 0x48, 0x0e, 0xd0,
- 0x00, 0xc6, 0xd1, 0x9b, 0x0e, 0xd1, 0xa1, 0xc7, 0xae, 0xcc, 0x0e, 0xd1,
- 0x98, 0xc3, 0xe7, 0x81, 0x0e, 0xd3, 0x49, 0x48, 0x15, 0x67, 0xc2, 0xb8,
- 0xf1, 0x19, 0xc2, 0xb8, 0xfd, 0x58, 0x22, 0x50, 0xc2, 0xb9, 0x09, 0x15,
- 0xc2, 0xb9, 0x1b, 0x45, 0xb7, 0xd9, 0xc2, 0xb9, 0x27, 0x45, 0xdf, 0x5c,
- 0xc2, 0xb9, 0x33, 0x05, 0xc2, 0xb9, 0x3f, 0x46, 0xcc, 0x31, 0xc2, 0xb9,
- 0x57, 0x47, 0x30, 0xd9, 0xc2, 0xb9, 0x69, 0x04, 0xc2, 0xb9, 0x7b, 0x47,
- 0x2f, 0x01, 0xc2, 0xb9, 0x87, 0x47, 0x06, 0xf1, 0x42, 0xb9, 0x99, 0xc3,
- 0xe7, 0x81, 0x0e, 0xd3, 0x41, 0x48, 0x15, 0x67, 0xc2, 0xb9, 0xae, 0x19,
- 0xc2, 0xb9, 0xba, 0x4b, 0x22, 0x50, 0xc2, 0xb9, 0xc6, 0x45, 0xb7, 0xd9,
- 0xc2, 0xb9, 0xd2, 0x45, 0xdf, 0x5c, 0xc2, 0xb9, 0xed, 0x05, 0xc2, 0xba,
- 0x05, 0x15, 0xc2, 0xba, 0x1d, 0x46, 0xcc, 0x31, 0xc2, 0xba, 0x29, 0x47,
- 0x30, 0xd9, 0xc2, 0xba, 0x3b, 0x04, 0xc2, 0xba, 0x4d, 0x47, 0x2f, 0x01,
- 0xc2, 0xba, 0x59, 0x47, 0x06, 0xf1, 0x42, 0xba, 0x6e, 0x48, 0x00, 0x48,
- 0xc2, 0xba, 0x83, 0x48, 0xbe, 0x8d, 0xc2, 0xba, 0x8f, 0x45, 0xdd, 0x45,
- 0x42, 0xba, 0xa4, 0xd5, 0x33, 0xa6, 0x0e, 0xc9, 0x39, 0x43, 0x10, 0x47,
- 0xc2, 0xba, 0xb9, 0xcf, 0x65, 0xab, 0x0e, 0xc9, 0x20, 0xc6, 0x05, 0x96,
- 0x0e, 0xd2, 0xc1, 0xc6, 0x24, 0x18, 0x0e, 0xd2, 0xb8, 0xc6, 0x13, 0x6c,
- 0x0e, 0xd2, 0xb1, 0x46, 0x15, 0x78, 0x42, 0xba, 0xc5, 0x00, 0x42, 0xba,
- 0xd7, 0x00, 0x42, 0xba, 0xe3, 0xc9, 0x45, 0x0b, 0x0e, 0xd2, 0x53, 0x02,
- 0xba, 0xef, 0xc4, 0x3f, 0x3e, 0x0e, 0xd2, 0x3b, 0x02, 0xba, 0xf3, 0xc8,
- 0xb7, 0xbd, 0x0e, 0xd2, 0x31, 0xc7, 0x29, 0xd4, 0x0e, 0xd2, 0x29, 0xc6,
- 0x01, 0x7a, 0x0e, 0xd2, 0x20, 0x00, 0x42, 0xba, 0xf7, 0x00, 0x42, 0xbb,
- 0x03, 0xc2, 0x01, 0xc7, 0x0e, 0xd0, 0x81, 0xc4, 0x03, 0x68, 0x0e, 0xd0,
- 0x68, 0xcb, 0x91, 0xc9, 0x0e, 0xcf, 0xdb, 0x02, 0xbb, 0x0f, 0xc3, 0x01,
- 0x64, 0x0e, 0xcf, 0xc0, 0xc5, 0x17, 0xef, 0x0e, 0xcf, 0xb1, 0xc5, 0x04,
- 0x73, 0x0e, 0xcf, 0xa8, 0x97, 0x08, 0xae, 0xe8, 0x8b, 0x08, 0xae, 0xd0,
- 0xd6, 0x2f, 0x9b, 0x08, 0xae, 0xc1, 0x83, 0x08, 0xac, 0xf0, 0xc2, 0x00,
- 0xa4, 0x08, 0xac, 0xc9, 0x83, 0x08, 0xac, 0xc0, 0x8e, 0x08, 0xac, 0x43,
- 0x02, 0xbb, 0x15, 0x94, 0x08, 0xac, 0x32, 0x02, 0xbb, 0x19, 0xc2, 0x00,
- 0xa4, 0x08, 0xac, 0xd9, 0x83, 0x08, 0xac, 0xd0, 0x45, 0x00, 0x6c, 0xc2,
- 0xbb, 0x1d, 0xcb, 0x97, 0x8b, 0x08, 0xae, 0x7a, 0x02, 0xbb, 0x41, 0xc3,
- 0x00, 0xe8, 0x08, 0xae, 0x29, 0xc3, 0x01, 0x4a, 0x08, 0xae, 0x20, 0xc4,
- 0x0f, 0x7c, 0x08, 0xad, 0xf9, 0xc5, 0x44, 0x7b, 0x08, 0xad, 0xf0, 0x8e,
- 0x05, 0x45, 0xe8, 0x94, 0x05, 0x45, 0xd8, 0x94, 0x05, 0x44, 0x43, 0x02,
- 0xbb, 0x47, 0x8e, 0x05, 0x44, 0x52, 0x02, 0xbb, 0x4b, 0x83, 0x05, 0x44,
- 0xe1, 0xc2, 0x00, 0xa4, 0x05, 0x44, 0xe8, 0x83, 0x05, 0x44, 0xf1, 0xc2,
- 0x00, 0xa4, 0x05, 0x44, 0xf8, 0xc2, 0x01, 0x47, 0x05, 0x46, 0x91, 0xc4,
- 0x04, 0x5e, 0x05, 0x46, 0x98, 0xc3, 0x06, 0x9e, 0x05, 0x46, 0xa1, 0xc3,
- 0x0c, 0x5b, 0x05, 0x46, 0xa8, 0xc2, 0x26, 0x51, 0x05, 0x46, 0xb1, 0xc4,
- 0x18, 0x83, 0x05, 0x46, 0xb8, 0xe0, 0x0c, 0x27, 0x0f, 0xb3, 0xb0, 0x4b,
- 0x99, 0x7a, 0xc2, 0xbb, 0x4f, 0xc7, 0x19, 0xf7, 0x08, 0x8e, 0x40, 0xc7,
- 0xc4, 0x35, 0x08, 0x8e, 0xd9, 0xd4, 0x39, 0x6d, 0x08, 0x8e, 0x79, 0xc5,
- 0x35, 0x00, 0x08, 0x8e, 0x51, 0xcb, 0x98, 0x9e, 0x08, 0x8e, 0x19, 0xcb,
- 0x91, 0x66, 0x08, 0x8e, 0x11, 0x03, 0xc2, 0xbb, 0x57, 0x42, 0x03, 0x32,
- 0xc2, 0xbb, 0x63, 0xcb, 0x1e, 0x17, 0x08, 0x8c, 0x00, 0xc4, 0x22, 0x71,
- 0x08, 0x8e, 0xc9, 0xc5, 0x01, 0xdb, 0x08, 0x8e, 0xc1, 0x15, 0xc2, 0xbb,
- 0x6f, 0x08, 0xc2, 0xbb, 0x7b, 0x16, 0xc2, 0xbb, 0x87, 0xc3, 0x01, 0xb4,
- 0x08, 0x8e, 0x89, 0xc4, 0x15, 0xd3, 0x08, 0x8e, 0x80, 0xcf, 0x64, 0x34,
- 0x08, 0x8e, 0x71, 0x03, 0xc2, 0xbb, 0x93, 0x91, 0x08, 0x8d, 0xf1, 0x87,
- 0x08, 0x8d, 0xe1, 0x48, 0xac, 0xc1, 0xc2, 0xbb, 0x9f, 0x97, 0x08, 0x8d,
- 0xb3, 0x02, 0xbb, 0xad, 0x8b, 0x08, 0x8d, 0xa2, 0x02, 0xbb, 0xb1, 0x83,
- 0x08, 0x8d, 0x89, 0xc2, 0x0c, 0x65, 0x08, 0x8d, 0x81, 0xc2, 0x00, 0xa4,
- 0x08, 0x8d, 0x78, 0x83, 0x08, 0x8d, 0x71, 0x47, 0xac, 0xc2, 0x42, 0xbb,
- 0xb5, 0xc2, 0x00, 0xc7, 0x08, 0x8d, 0x69, 0x83, 0x08, 0x8d, 0x60, 0xc2,
- 0x00, 0xa4, 0x08, 0x8d, 0x41, 0x83, 0x08, 0x8d, 0x38, 0xc2, 0x00, 0xa4,
- 0x08, 0x8d, 0x31, 0x83, 0x08, 0x8d, 0x28, 0x83, 0x08, 0x8d, 0x21, 0xc2,
- 0x00, 0xc1, 0x08, 0x8c, 0xf9, 0xc2, 0x1d, 0x5f, 0x08, 0x8c, 0xd1, 0xc2,
- 0x01, 0x29, 0x08, 0x8c, 0xa8, 0xc2, 0x00, 0xa4, 0x08, 0x8d, 0x19, 0x83,
- 0x08, 0x8d, 0x11, 0x06, 0x42, 0xbb, 0xc3, 0xc2, 0x00, 0xa4, 0x08, 0x8d,
- 0x09, 0x83, 0x08, 0x8d, 0x01, 0x16, 0x42, 0xbb, 0xcd, 0xc2, 0x00, 0xa4,
- 0x08, 0x8c, 0xc9, 0x83, 0x08, 0x8c, 0xc0, 0xc2, 0x00, 0xa4, 0x08, 0x8c,
- 0xb9, 0x83, 0x08, 0x8c, 0xb0, 0xc2, 0x00, 0xa4, 0x08, 0x8c, 0xa1, 0x83,
- 0x08, 0x8c, 0x98, 0xc2, 0x00, 0xa4, 0x08, 0x8c, 0x91, 0x83, 0x08, 0x8c,
- 0x88, 0x97, 0x08, 0x8c, 0x81, 0x8b, 0x08, 0x8c, 0x71, 0x83, 0x08, 0x8c,
- 0x20, 0x97, 0x08, 0x8c, 0x40, 0x8b, 0x08, 0x8c, 0x30, 0xc3, 0x01, 0xdc,
- 0x08, 0x22, 0xa1, 0xc2, 0x19, 0x4b, 0x08, 0x22, 0xf0, 0x96, 0x08, 0x23,
- 0x81, 0x94, 0x08, 0x23, 0xe8, 0x87, 0x08, 0x23, 0xc1, 0xc3, 0x5c, 0x62,
- 0x08, 0x23, 0xe0, 0xcd, 0x50, 0x88, 0x01, 0x57, 0x41, 0xd5, 0x36, 0x9a,
- 0x01, 0x57, 0x48, 0xe0, 0x07, 0xc7, 0x01, 0x5a, 0xf8, 0xc9, 0x1d, 0xd5,
- 0x01, 0x49, 0x31, 0xd4, 0x3a, 0x35, 0x01, 0x49, 0x50, 0xc9, 0xb5, 0x5e,
- 0x01, 0x0f, 0x91, 0xc9, 0x1d, 0xd5, 0x01, 0x49, 0x29, 0xd4, 0x3a, 0x0d,
- 0x01, 0x49, 0x49, 0xd9, 0x1f, 0xb9, 0x01, 0x49, 0x68, 0xca, 0xa1, 0xda,
- 0x01, 0x37, 0xb1, 0xc2, 0x00, 0xb2, 0x01, 0x1e, 0x68, 0x0e, 0xc2, 0xbb,
- 0xd7, 0x46, 0x01, 0xc7, 0xc2, 0xbb, 0xe3, 0xd0, 0x5c, 0x22, 0x01, 0x2f,
- 0x41, 0xd8, 0x23, 0x88, 0x01, 0x2d, 0x49, 0xda, 0x1d, 0x11, 0x01, 0x2d,
- 0x31, 0xcd, 0x77, 0x64, 0x01, 0x2d, 0x29, 0xcf, 0x67, 0x40, 0x01, 0x2d,
- 0x21, 0xd1, 0x52, 0x1c, 0x01, 0x4f, 0x01, 0xce, 0x72, 0x31, 0x01, 0x58,
- 0x91, 0xd1, 0x50, 0xea, 0x01, 0x58, 0x98, 0xc5, 0x08, 0xc2, 0x01, 0x18,
- 0x89, 0x89, 0x01, 0x9e, 0x90, 0x44, 0x1a, 0xd5, 0x42, 0xbb, 0xef, 0x44,
- 0x1a, 0xd5, 0x42, 0xbb, 0xfb, 0xc4, 0x98, 0x5e, 0x01, 0x98, 0x21, 0xc2,
- 0x00, 0x30, 0x01, 0x98, 0x28, 0x92, 0x01, 0x14, 0x99, 0x8e, 0x01, 0x9c,
- 0x40, 0xc9, 0xab, 0x3e, 0x01, 0x9b, 0xf8, 0x00, 0x42, 0xbc, 0x07, 0xd5,
- 0x37, 0xc0, 0x01, 0x56, 0x71, 0xc5, 0xd9, 0x03, 0x01, 0x9a, 0x89, 0xc2,
- 0x02, 0x59, 0x01, 0x9a, 0x90, 0xc3, 0x6c, 0xa3, 0x01, 0x9a, 0x99, 0xc5,
- 0xd8, 0x95, 0x01, 0x9a, 0xa0, 0xc2, 0x13, 0x51, 0x01, 0x9a, 0xa9, 0xc6,
- 0xcd, 0xdb, 0x01, 0x9a, 0xb0, 0xc7, 0x04, 0xd2, 0x01, 0x9d, 0x72, 0x02,
- 0xbc, 0x13, 0xc3, 0x1b, 0xff, 0x01, 0x99, 0x50, 0xc6, 0xcf, 0x55, 0x01,
- 0x99, 0x91, 0xc4, 0xe2, 0xaf, 0x01, 0x99, 0x99, 0xc3, 0x03, 0xdd, 0x01,
- 0x99, 0xa8, 0xc7, 0xc2, 0x60, 0x01, 0x99, 0xb1, 0xc4, 0xe0, 0xdb, 0x01,
- 0x99, 0xc8, 0x90, 0x01, 0x99, 0xf9, 0x11, 0x42, 0xbc, 0x19, 0x83, 0x01,
- 0x9b, 0x88, 0xc3, 0x13, 0xcf, 0x01, 0x99, 0x20, 0x00, 0x42, 0xbc, 0x23,
- 0xd0, 0x5e, 0x42, 0x01, 0x5e, 0x81, 0xc4, 0x0f, 0x43, 0x01, 0x99, 0xe9,
- 0xc3, 0x1b, 0xf2, 0x01, 0x9a, 0x00, 0x03, 0xc2, 0xbc, 0x2f, 0xc5, 0xd6,
- 0x88, 0x01, 0x9c, 0x00, 0xc7, 0xc6, 0x81, 0x01, 0x99, 0x71, 0x0d, 0x42,
- 0xbc, 0x3b, 0xc2, 0x05, 0x7b, 0x01, 0x99, 0xb9, 0x10, 0xc2, 0xbc, 0x45,
- 0xc3, 0x8e, 0x2e, 0x01, 0x99, 0xd8, 0x89, 0x01, 0x96, 0x69, 0x47, 0xc4,
- 0x5f, 0x42, 0xbc, 0x51, 0xc3, 0x04, 0x30, 0x01, 0x98, 0x59, 0x14, 0x42,
- 0xbc, 0x6f, 0xc6, 0xd2, 0x4f, 0x01, 0x98, 0xa9, 0xc7, 0xc1, 0x6b, 0x01,
- 0x98, 0xb1, 0xc5, 0xdc, 0x6e, 0x01, 0x98, 0xb8, 0xc6, 0xd2, 0xd3, 0x01,
- 0x98, 0xd1, 0xc4, 0x3f, 0x5c, 0x01, 0x98, 0xd8, 0xc4, 0xdc, 0xa3, 0x01,
- 0x98, 0xe9, 0xc3, 0x31, 0x68, 0x01, 0x98, 0xf0, 0x00, 0x42, 0xbc, 0x7b,
- 0xc3, 0x02, 0xa7, 0x01, 0x98, 0x71, 0xc3, 0x76, 0x38, 0x01, 0x98, 0x79,
- 0x8e, 0x01, 0x9f, 0xf8, 0xc2, 0x01, 0x29, 0x01, 0x98, 0x81, 0xc3, 0xe7,
- 0xd2, 0x01, 0x98, 0x89, 0xc5, 0xde, 0x30, 0x01, 0x98, 0x98, 0xc3, 0x0c,
- 0x46, 0x01, 0x98, 0xc8, 0xc5, 0xde, 0x03, 0x01, 0x98, 0xf9, 0xc6, 0xcd,
- 0x39, 0x01, 0x99, 0x00, 0x8b, 0x01, 0x99, 0x11, 0x91, 0x01, 0x99, 0x18,
- 0xc2, 0x00, 0x10, 0x01, 0x99, 0x40, 0xc5, 0xde, 0xbc, 0x01, 0x99, 0x69,
- 0x94, 0x01, 0x9b, 0xa0, 0x0b, 0xc2, 0xbc, 0x85, 0xc3, 0xbf, 0xbc, 0x01,
- 0x9a, 0x29, 0xc4, 0xe0, 0xc7, 0x01, 0x9a, 0x31, 0xc5, 0xda, 0x39, 0x01,
- 0x9a, 0x38, 0xc5, 0xda, 0xf7, 0x01, 0x9a, 0x41, 0xc2, 0x00, 0x5b, 0x01,
- 0x9a, 0x4b, 0x02, 0xbc, 0x91, 0x8e, 0x01, 0x9e, 0xa8, 0xc2, 0x01, 0x29,
- 0x01, 0x9a, 0x5b, 0x02, 0xbc, 0x97, 0xc5, 0xc1, 0x6d, 0x01, 0x9a, 0x68,
- 0x88, 0x01, 0x9c, 0x61, 0x89, 0x01, 0x9c, 0x69, 0x83, 0x01, 0x9c, 0x11,
- 0x8e, 0x01, 0x9c, 0xa9, 0x8f, 0x01, 0x9c, 0xd9, 0x95, 0x01, 0x9d, 0x91,
- 0x98, 0x01, 0x9d, 0xb1, 0x99, 0x01, 0x9d, 0xe0, 0x11, 0xc2, 0xbc, 0x9d,
- 0xc7, 0x07, 0x09, 0x01, 0x9d, 0x09, 0xc5, 0xd8, 0xc2, 0x01, 0x9d, 0x28,
- 0xc6, 0x04, 0x72, 0x01, 0x9e, 0xa0, 0x00, 0x42, 0xbc, 0xac, 0xc5, 0x70,
- 0xd7, 0x01, 0x9d, 0xc8, 0xc5, 0x70, 0xd7, 0x01, 0x9d, 0xf8, 0xc2, 0x01,
- 0x4a, 0x01, 0x9a, 0x71, 0xc2, 0x05, 0xd4, 0x01, 0x9a, 0x78, 0x46, 0x1a,
- 0xfc, 0xc2, 0xbc, 0xb8, 0xc6, 0xd0, 0x6f, 0x0f, 0x8d, 0x48, 0xce, 0x6c,
- 0x57, 0x0f, 0x8d, 0x29, 0x4f, 0x07, 0x17, 0x42, 0xbc, 0xc4, 0xcd, 0x76,
- 0xae, 0x0f, 0x8d, 0x09, 0xcb, 0x96, 0xf1, 0x0f, 0x8c, 0xe0, 0xc2, 0x00,
- 0x06, 0x0f, 0x90, 0x99, 0xc2, 0x0c, 0x65, 0x0f, 0x90, 0x11, 0xc4, 0xe2,
- 0x2f, 0x0f, 0x90, 0x08, 0xd2, 0x49, 0xc8, 0x0f, 0x8d, 0x11, 0xc3, 0x27,
- 0x05, 0x0f, 0x8c, 0xe8, 0x26, 0xc2, 0xbd, 0x2c, 0x22, 0xc2, 0xbd, 0x38,
- 0x24, 0xc2, 0xbd, 0x6c, 0x23, 0xc2, 0xbd, 0x88, 0x25, 0xc2, 0xbd, 0xac,
- 0x42, 0xe8, 0x0c, 0x42, 0xbd, 0xbe, 0x8d, 0x0f, 0x8c, 0xf1, 0xcf, 0x01,
- 0xb8, 0x01, 0x71, 0x60, 0xc9, 0x29, 0x48, 0x01, 0x21, 0x28, 0xc4, 0x06,
- 0x9d, 0x01, 0x20, 0xa1, 0x16, 0xc2, 0xbd, 0xd4, 0xc3, 0x01, 0xb4, 0x01,
- 0x20, 0x88, 0xc6, 0x06, 0x1b, 0x01, 0x20, 0xc9, 0x16, 0x42, 0xbd, 0xe0,
- 0xc3, 0x1a, 0x80, 0x00, 0x43, 0x51, 0x42, 0x03, 0xc7, 0xc2, 0xbd, 0xef,
- 0xc2, 0x02, 0x59, 0x00, 0x43, 0x39, 0xc3, 0x22, 0x7b, 0x00, 0x43, 0x31,
- 0x10, 0xc2, 0xbd, 0xf9, 0xc3, 0x21, 0x7b, 0x00, 0x43, 0x19, 0xc2, 0x24,
- 0x58, 0x00, 0x43, 0x08, 0xc7, 0xc4, 0xc8, 0x00, 0x39, 0x79, 0xc6, 0xd0,
- 0x03, 0x00, 0x39, 0x71, 0xc5, 0xde, 0x6c, 0x00, 0x39, 0x68, 0xc9, 0xb3,
- 0x1e, 0x00, 0x38, 0xe0, 0xc2, 0x17, 0x9f, 0x00, 0x3a, 0x79, 0xc5, 0xda,
- 0x5c, 0x00, 0x3a, 0x71, 0xc5, 0xd8, 0xb8, 0x00, 0x3a, 0x68, 0xc5, 0x01,
- 0x62, 0x00, 0x39, 0xd9, 0xc5, 0x00, 0x95, 0x00, 0x39, 0xd0, 0x48, 0x8a,
- 0x58, 0x42, 0xbe, 0x09, 0xcc, 0x8a, 0x58, 0x00, 0x38, 0x40, 0xd1, 0x55,
- 0x19, 0x01, 0x14, 0x59, 0xcb, 0x23, 0x35, 0x01, 0x14, 0x3b, 0x02, 0xbe,
- 0x15, 0x46, 0x00, 0x95, 0x42, 0xbe, 0x1b, 0xc4, 0x0f, 0x20, 0x01, 0x56,
- 0xa1, 0xc6, 0x31, 0x53, 0x01, 0x56, 0xb0, 0x90, 0x01, 0x03, 0xf9, 0x8b,
- 0x01, 0x03, 0x88, 0x8f, 0x00, 0xdd, 0xf9, 0x8d, 0x00, 0xdd, 0xf0, 0x09,
- 0xc2, 0xbe, 0x33, 0xc5, 0xde, 0xc6, 0x00, 0xdc, 0x00, 0xcf, 0x37, 0x1e,
- 0x01, 0x56, 0x18, 0xcb, 0x0e, 0x83, 0x01, 0x56, 0x29, 0xce, 0x38, 0x53,
- 0x01, 0x56, 0x39, 0xcf, 0x69, 0x89, 0x01, 0x56, 0x49, 0xcc, 0x24, 0x24,
- 0x01, 0x56, 0x58, 0x45, 0x00, 0x56, 0x42, 0xbe, 0x3f, 0xc3, 0x42, 0x02,
- 0x0f, 0xb0, 0x39, 0xc4, 0x77, 0x39, 0x0f, 0xb0, 0x41, 0xd0, 0x54, 0x3d,
- 0x0f, 0xb0, 0x68, 0xcb, 0x19, 0xc6, 0x0f, 0xb0, 0x53, 0x02, 0xbe, 0x51,
- 0xc9, 0xb2, 0x6a, 0x0f, 0xb0, 0x70, 0x45, 0x00, 0x6c, 0xc2, 0xbe, 0x57,
- 0xc9, 0xac, 0x3a, 0x01, 0x10, 0x68, 0x83, 0x07, 0xf2, 0x81, 0xc9, 0xb5,
- 0xe5, 0x07, 0xf3, 0x58, 0x46, 0x00, 0x6b, 0x42, 0xbe, 0x63, 0xc3, 0x01,
- 0xb4, 0x01, 0x0b, 0x83, 0x02, 0xbe, 0x6f, 0x08, 0xc2, 0xbe, 0x73, 0x16,
- 0xc2, 0xbe, 0x7d, 0x07, 0xc2, 0xbe, 0x8d, 0xc4, 0x22, 0x71, 0x01, 0x0b,
- 0xc1, 0x15, 0x42, 0xbe, 0x99, 0xcb, 0x1a, 0x3f, 0x07, 0xf2, 0xd1, 0xd6,
- 0x0a, 0x88, 0x07, 0xf2, 0xf1, 0xcd, 0x02, 0x52, 0x07, 0xf2, 0xe0, 0xcb,
- 0x1a, 0x3f, 0x07, 0xf2, 0xc9, 0xcd, 0x02, 0x52, 0x07, 0xf2, 0xd9, 0xd6,
- 0x0a, 0x88, 0x07, 0xf2, 0xe8, 0xcb, 0x0e, 0x83, 0x01, 0x55, 0x79, 0xcc,
- 0x24, 0x24, 0x01, 0x55, 0x88, 0xc8, 0x07, 0x5f, 0x01, 0x55, 0xa9, 0xcf,
- 0x69, 0x89, 0x01, 0x55, 0xc8, 0xcb, 0x1a, 0x3f, 0x07, 0xf1, 0xa9, 0xd6,
- 0x0a, 0x88, 0x07, 0xf1, 0xc9, 0xd8, 0x23, 0xb8, 0x07, 0xf1, 0xd9, 0xd4,
- 0x3c, 0x79, 0x07, 0xf1, 0xe9, 0xcd, 0x09, 0x51, 0x07, 0xf1, 0xf9, 0x46,
- 0x02, 0x31, 0xc2, 0xbe, 0xa5, 0xce, 0x24, 0xb2, 0x07, 0xf2, 0x39, 0x05,
- 0x42, 0xbe, 0xb1, 0xcc, 0x02, 0x53, 0x07, 0xf1, 0xc1, 0xcd, 0x66, 0x34,
- 0x07, 0xf2, 0x10, 0x4e, 0x23, 0xbe, 0xc2, 0xbe, 0xbd, 0xce, 0x66, 0x33,
- 0x07, 0xf2, 0x20, 0xc6, 0xd4, 0xbf, 0x0f, 0x85, 0x11, 0xc6, 0x7b, 0xe3,
- 0x0f, 0x85, 0x91, 0xc8, 0x4a, 0x99, 0x0f, 0x86, 0x11, 0xc5, 0xd8, 0x31,
- 0x0f, 0x86, 0x90, 0xc6, 0xd4, 0xbf, 0x0f, 0x85, 0x19, 0xc6, 0x7b, 0xe3,
- 0x0f, 0x85, 0x99, 0xc8, 0x4a, 0x99, 0x0f, 0x86, 0x19, 0xc5, 0xd8, 0x31,
- 0x0f, 0x86, 0x98, 0xc6, 0xd4, 0xbf, 0x0f, 0x85, 0x51, 0xc6, 0x7b, 0xe3,
- 0x0f, 0x85, 0xd1, 0xc8, 0x4a, 0x99, 0x0f, 0x86, 0x51, 0xc5, 0xd8, 0x31,
- 0x0f, 0x86, 0xd0, 0x9e, 0x0f, 0x87, 0x0b, 0x02, 0xbe, 0xc9, 0x9f, 0x0f,
- 0x87, 0x13, 0x02, 0xbe, 0xf1, 0xa0, 0x0f, 0x87, 0x19, 0xa1, 0x0f, 0x87,
- 0x21, 0xa2, 0x0f, 0x87, 0x29, 0xa3, 0x0f, 0x87, 0x31, 0xa4, 0x0f, 0x87,
- 0x39, 0xa5, 0x0f, 0x87, 0x41, 0xa6, 0x0f, 0x87, 0x48, 0x46, 0xcb, 0xa6,
- 0xc2, 0xbe, 0xf9, 0xc2, 0x00, 0x75, 0x0f, 0x87, 0x00, 0xc6, 0xd4, 0xbf,
- 0x0f, 0x85, 0x29, 0xc6, 0x7b, 0xe3, 0x0f, 0x85, 0xa9, 0xc8, 0x4a, 0x99,
- 0x0f, 0x86, 0x29, 0xc5, 0xd8, 0x31, 0x0f, 0x86, 0xa8, 0xc6, 0xd4, 0xbf,
- 0x0f, 0x85, 0x31, 0xc6, 0x7b, 0xe3, 0x0f, 0x85, 0xb1, 0xc8, 0x4a, 0x99,
- 0x0f, 0x86, 0x31, 0xc5, 0xd8, 0x31, 0x0f, 0x86, 0xb0, 0xc6, 0xd4, 0xbf,
- 0x0f, 0x85, 0x39, 0xc6, 0x7b, 0xe3, 0x0f, 0x85, 0xb9, 0xc8, 0x4a, 0x99,
- 0x0f, 0x86, 0x39, 0xc5, 0xd8, 0x31, 0x0f, 0x86, 0xb8, 0xc6, 0xd4, 0xbf,
- 0x0f, 0x85, 0x61, 0xc6, 0x7b, 0xe3, 0x0f, 0x85, 0xe1, 0xc8, 0x4a, 0x99,
- 0x0f, 0x86, 0x61, 0xc5, 0xd8, 0x31, 0x0f, 0x86, 0xe0, 0xc6, 0xd4, 0xbf,
- 0x0f, 0x85, 0x71, 0xc6, 0x7b, 0xe3, 0x0f, 0x85, 0xf1, 0xc8, 0x4a, 0x99,
- 0x0f, 0x86, 0x71, 0xc5, 0xd8, 0x31, 0x0f, 0x86, 0xf0, 0xc8, 0x01, 0xe7,
- 0x01, 0x51, 0xc9, 0xd1, 0x55, 0xf6, 0x01, 0x51, 0x71, 0xd0, 0x5b, 0x72,
- 0x01, 0x51, 0x68, 0xce, 0x6f, 0x4b, 0x01, 0x51, 0x41, 0x15, 0xc2, 0xbf,
- 0x11, 0x46, 0x38, 0x53, 0xc2, 0xbf, 0x1d, 0xc9, 0x0e, 0xac, 0x01, 0x51,
- 0x29, 0xd7, 0x2a, 0x93, 0x01, 0x51, 0x18, 0xc2, 0x01, 0xc7, 0x00, 0x04,
- 0x61, 0xc8, 0xb7, 0xa5, 0x00, 0x04, 0x61, 0xc4, 0x03, 0x68, 0x00, 0x04,
- 0x59, 0xc7, 0x29, 0xd4, 0x00, 0x04, 0x58, 0xc3, 0x18, 0x86, 0x01, 0x24,
- 0x39, 0xc3, 0x21, 0x32, 0x01, 0x23, 0xf8, 0xc2, 0x00, 0x56, 0x01, 0x90,
- 0x70, 0xc2, 0x00, 0x56, 0x01, 0x90, 0xc0, 0xc2, 0x00, 0x56, 0x01, 0x90,
- 0x80, 0xc2, 0x00, 0x56, 0x01, 0x90, 0xc8, 0xc2, 0x00, 0x56, 0x01, 0x90,
- 0x98, 0xc2, 0x00, 0x56, 0x01, 0x90, 0xd0, 0x00, 0x42, 0xbf, 0x29, 0xc2,
- 0x00, 0x56, 0x01, 0x90, 0xb8, 0xc2, 0x00, 0x7b, 0x01, 0x91, 0x21, 0xc2,
- 0x00, 0x9c, 0x01, 0x91, 0x59, 0xc7, 0xc3, 0x94, 0x01, 0x91, 0xb0, 0xc3,
- 0x18, 0x84, 0x01, 0x91, 0x31, 0xc2, 0x03, 0x2d, 0x01, 0x92, 0x10, 0x90,
- 0x01, 0x91, 0x81, 0xc7, 0xc7, 0x84, 0x01, 0x91, 0xe0, 0xc3, 0x01, 0xe4,
- 0x01, 0x91, 0x89, 0xc3, 0xe6, 0x64, 0x01, 0x91, 0xd8, 0xc5, 0x56, 0xbd,
- 0x01, 0x91, 0xf1, 0x96, 0x01, 0x92, 0x08, 0xc6, 0x2b, 0x12, 0x08, 0xd7,
- 0xb0, 0x9b, 0x08, 0xd7, 0x21, 0x90, 0x08, 0xd7, 0x03, 0x02, 0xbf, 0x31,
- 0x99, 0x08, 0xd7, 0x11, 0x8e, 0x08, 0xd7, 0x09, 0x8f, 0x08, 0xd6, 0xf9,
- 0x96, 0x08, 0xd6, 0xf1, 0x8d, 0x08, 0xd6, 0xe9, 0x92, 0x08, 0xd6, 0xe0,
- 0xc6, 0x2b, 0x12, 0x08, 0xd7, 0x68, 0x19, 0xc2, 0xbf, 0x35, 0xc2, 0x00,
- 0x4d, 0x08, 0x43, 0xf1, 0xc4, 0x04, 0x5e, 0x08, 0x43, 0xd8, 0xc3, 0x0c,
- 0x5b, 0x08, 0x43, 0xe9, 0xc3, 0x06, 0x9e, 0x08, 0x43, 0xe0, 0x16, 0xc2,
- 0xbf, 0x3f, 0x15, 0xc2, 0xbf, 0x4b, 0xc4, 0x5d, 0xe2, 0x08, 0x43, 0xa1,
- 0xc4, 0xbf, 0xb9, 0x08, 0x43, 0x99, 0xc2, 0x00, 0x27, 0x08, 0x43, 0x89,
- 0x03, 0xc2, 0xbf, 0x55, 0xc3, 0x1f, 0xd8, 0x08, 0x43, 0x71, 0xc9, 0xb4,
- 0x35, 0x08, 0x43, 0x69, 0xc3, 0x0b, 0x0e, 0x08, 0x43, 0x61, 0xc6, 0xd0,
- 0x5d, 0x08, 0x43, 0x59, 0xc4, 0xe2, 0x57, 0x08, 0x43, 0x51, 0xc4, 0x4b,
- 0x98, 0x08, 0x43, 0x49, 0xc2, 0x01, 0xf0, 0x08, 0x43, 0x23, 0x02, 0xbf,
- 0x61, 0xc5, 0x4b, 0x92, 0x08, 0x43, 0x31, 0xc3, 0x78, 0xa9, 0x08, 0x43,
- 0x29, 0xc6, 0x45, 0xf6, 0x08, 0x43, 0x19, 0xc5, 0xa1, 0x94, 0x08, 0x43,
- 0x11, 0xc4, 0xe4, 0x8f, 0x08, 0x43, 0x08, 0xc2, 0x15, 0x15, 0x0b, 0x5c,
- 0x69, 0xc2, 0x00, 0x03, 0x0b, 0x5c, 0x31, 0xc4, 0xa9, 0x0b, 0x0b, 0x5b,
- 0xe8, 0xc3, 0xa1, 0xd8, 0x0b, 0x59, 0x59, 0xc3, 0xcd, 0x3f, 0x0b, 0x58,
- 0xe8, 0xc5, 0xd6, 0x15, 0x0b, 0x5b, 0xa8, 0xc4, 0xe1, 0xbb, 0x0b, 0x59,
- 0xf9, 0xc3, 0x6b, 0x01, 0x0b, 0x59, 0xf1, 0xc3, 0x33, 0x12, 0x0b, 0x59,
- 0xe9, 0xc5, 0xda, 0x70, 0x0b, 0x59, 0xe0, 0xc3, 0x46, 0xe6, 0x0b, 0x59,
- 0xd1, 0xc2, 0x00, 0x3a, 0x0b, 0x59, 0xb8, 0xc8, 0xc0, 0xd5, 0x0b, 0x5b,
- 0x01, 0xc9, 0x6e, 0xbf, 0x0b, 0x5a, 0xe8, 0x04, 0xc2, 0xbf, 0x67, 0xcc,
- 0x81, 0x7c, 0x0f, 0xb2, 0x79, 0xcc, 0x84, 0xa0, 0x0f, 0xb2, 0x71, 0xc9,
- 0xab, 0x1a, 0x0f, 0xce, 0xa9, 0xc5, 0xd8, 0xef, 0x0f, 0xd6, 0x28, 0xe0,
- 0x03, 0x47, 0x0f, 0xb2, 0x60, 0xcb, 0x8f, 0xda, 0x0f, 0xce, 0xb1, 0xce,
- 0x73, 0x65, 0x0f, 0xce, 0xc0, 0x91, 0x08, 0x48, 0xd1, 0xc4, 0x18, 0x85,
- 0x08, 0x48, 0xc0, 0xc9, 0x1e, 0x19, 0x05, 0x43, 0x98, 0x83, 0x05, 0x42,
- 0x81, 0xc2, 0x00, 0xa4, 0x05, 0x42, 0x88, 0x83, 0x05, 0x43, 0x49, 0xc2,
- 0x00, 0xa4, 0x05, 0x43, 0x50, 0xc2, 0x01, 0x09, 0x05, 0x43, 0x39, 0xc2,
- 0x1d, 0x5f, 0x05, 0x43, 0x41, 0xc2, 0x02, 0x59, 0x05, 0x43, 0x88, 0xd4,
- 0x3a, 0xd5, 0x08, 0x0f, 0xe8, 0xcf, 0x22, 0x71, 0x08, 0xd8, 0x39, 0xcf,
- 0x68, 0x6c, 0x08, 0xd8, 0x30, 0xca, 0x1d, 0xd4, 0x08, 0xd8, 0x28, 0xca,
- 0x1d, 0xd4, 0x08, 0xd8, 0x18, 0xc4, 0x0f, 0x7c, 0x00, 0x4a, 0x69, 0xc5,
- 0x44, 0x7b, 0x00, 0x48, 0x18, 0xc7, 0x76, 0x59, 0x00, 0x49, 0xe9, 0xc7,
- 0x11, 0x41, 0x00, 0x48, 0x10, 0x00, 0x42, 0xbf, 0x73, 0xc6, 0xc4, 0x36,
- 0x05, 0x47, 0xe1, 0xd2, 0x4d, 0x28, 0x05, 0x47, 0x90, 0x94, 0x00, 0x4a,
- 0x20, 0x8e, 0x00, 0x4b, 0x18, 0x87, 0x00, 0x4a, 0xb8, 0x83, 0x00, 0x49,
- 0xb1, 0x44, 0x2c, 0x01, 0x42, 0xbf, 0x83, 0x8e, 0x00, 0x48, 0x63, 0x02,
- 0xbf, 0x8f, 0x94, 0x00, 0x48, 0x5a, 0x02, 0xbf, 0x93, 0xc2, 0x00, 0xc7,
- 0x00, 0x49, 0xa1, 0x83, 0x00, 0x49, 0x98, 0xc2, 0x00, 0xc1, 0x00, 0x49,
- 0x49, 0x83, 0x00, 0x49, 0x18, 0xc2, 0x00, 0xa4, 0x00, 0x49, 0x11, 0x83,
- 0x00, 0x49, 0x09, 0x06, 0x42, 0xbf, 0x97, 0xc2, 0x00, 0xa4, 0x00, 0x49,
- 0x01, 0x83, 0x00, 0x48, 0xf8, 0x45, 0xc9, 0x93, 0x42, 0xbf, 0xa1, 0x83,
- 0x00, 0x48, 0xc1, 0xc2, 0x00, 0xa4, 0x00, 0x4a, 0xd0, 0x83, 0x00, 0x48,
- 0xb1, 0xc2, 0x00, 0xa4, 0x00, 0x4a, 0xc8, 0x87, 0x00, 0x4b, 0xb8, 0xc4,
- 0x18, 0x83, 0x00, 0x4b, 0x69, 0xc2, 0x26, 0x51, 0x00, 0x4b, 0x60, 0xc3,
- 0x0c, 0x5b, 0x00, 0x4b, 0x59, 0xc3, 0x06, 0x9e, 0x00, 0x4b, 0x50, 0xc4,
- 0x04, 0x5e, 0x00, 0x4b, 0x49, 0xc2, 0x01, 0x47, 0x00, 0x4b, 0x40, 0x8b,
- 0x08, 0x20, 0x01, 0x83, 0x08, 0x20, 0x13, 0x02, 0xbf, 0xad, 0x91, 0x08,
- 0x20, 0x23, 0x02, 0xbf, 0xb1, 0x87, 0x08, 0x20, 0x08, 0x8b, 0x08, 0x20,
- 0x31, 0x87, 0x08, 0x20, 0x39, 0x83, 0x08, 0x20, 0x43, 0x02, 0xbf, 0xb5,
- 0x91, 0x08, 0x20, 0x52, 0x02, 0xbf, 0xb9, 0x99, 0x08, 0x20, 0x69, 0x8b,
- 0x08, 0x21, 0x30, 0xc2, 0x00, 0x4c, 0x08, 0x20, 0x99, 0xc3, 0x0e, 0xa3,
- 0x08, 0x20, 0xe0, 0x88, 0x08, 0x20, 0xc9, 0xc2, 0x00, 0x6e, 0x08, 0x20,
- 0xd9, 0x95, 0x08, 0x20, 0xeb, 0x02, 0xbf, 0xbd, 0x94, 0x08, 0x21, 0x09,
- 0x8e, 0x08, 0x21, 0x11, 0x8f, 0x08, 0x21, 0x19, 0x90, 0x08, 0x21, 0x23,
- 0x02, 0xbf, 0xc1, 0x99, 0x08, 0x21, 0x38, 0xc2, 0x00, 0x4c, 0x08, 0x20,
- 0xf1, 0xc3, 0x0e, 0xa3, 0x08, 0x21, 0x00, 0x8b, 0x08, 0x21, 0x41, 0x87,
- 0x08, 0x21, 0x49, 0x83, 0x08, 0x21, 0x53, 0x02, 0xbf, 0xc5, 0x91, 0x08,
- 0x21, 0x62, 0x02, 0xbf, 0xc9, 0x8b, 0x08, 0x21, 0x71, 0x87, 0x08, 0x21,
- 0x79, 0x83, 0x08, 0x21, 0x83, 0x02, 0xbf, 0xcd, 0x91, 0x08, 0x21, 0x92,
- 0x02, 0xbf, 0xd1, 0x99, 0x08, 0x21, 0xa9, 0x8b, 0x08, 0x22, 0x70, 0xc2,
- 0x00, 0x4c, 0x08, 0x21, 0xd9, 0xc3, 0x0e, 0xa3, 0x08, 0x22, 0x20, 0x88,
- 0x08, 0x22, 0x09, 0xc2, 0x00, 0x6e, 0x08, 0x22, 0x19, 0x95, 0x08, 0x22,
- 0x2b, 0x02, 0xbf, 0xd5, 0x94, 0x08, 0x22, 0x49, 0x8e, 0x08, 0x22, 0x51,
- 0x8f, 0x08, 0x22, 0x59, 0x90, 0x08, 0x22, 0x63, 0x02, 0xbf, 0xd9, 0x99,
- 0x08, 0x22, 0x78, 0xc2, 0x00, 0x4c, 0x08, 0x22, 0x31, 0xc3, 0x0e, 0xa3,
- 0x08, 0x22, 0x40, 0xc9, 0x11, 0xdc, 0x01, 0x24, 0x71, 0xc5, 0x00, 0xaa,
- 0x0f, 0x88, 0x40, 0xc9, 0x11, 0xdc, 0x01, 0x24, 0x69, 0xc5, 0x00, 0xaa,
- 0x0f, 0x88, 0x38, 0xc9, 0x11, 0xdc, 0x01, 0x24, 0x61, 0xc5, 0x00, 0xaa,
- 0x0f, 0x88, 0x30, 0xc9, 0x11, 0xdc, 0x01, 0x24, 0x59, 0xc5, 0x00, 0xaa,
- 0x0f, 0x88, 0x28, 0xc9, 0x11, 0xdc, 0x01, 0x24, 0x51, 0xc5, 0x00, 0xaa,
- 0x0f, 0x88, 0x20, 0xc9, 0x11, 0xdc, 0x01, 0x24, 0x49, 0xc5, 0x00, 0xaa,
- 0x0f, 0x88, 0x18, 0xc4, 0x18, 0x83, 0x08, 0xca, 0xb9, 0xc2, 0x26, 0x51,
- 0x08, 0xca, 0xb0, 0xc3, 0x0c, 0x5b, 0x08, 0xca, 0xa9, 0xc3, 0x06, 0x9e,
- 0x08, 0xca, 0xa0, 0xc4, 0x04, 0x5e, 0x08, 0xca, 0x99, 0xc2, 0x01, 0x47,
- 0x08, 0xca, 0x90, 0x8b, 0x08, 0xc9, 0xb9, 0x83, 0x08, 0xc9, 0x80, 0x97,
- 0x08, 0xc9, 0xa0, 0x8b, 0x08, 0xc9, 0x90, 0xc2, 0x00, 0xa4, 0x08, 0xc8,
- 0xc9, 0x83, 0x08, 0xc8, 0xc0, 0xc4, 0x18, 0x83, 0x01, 0x3c, 0x81, 0xc2,
- 0x26, 0x51, 0x01, 0x3c, 0x78, 0xc3, 0x0c, 0x5b, 0x01, 0x3c, 0x71, 0xc3,
- 0x06, 0x9e, 0x01, 0x3c, 0x68, 0xc4, 0x04, 0x5e, 0x01, 0x3c, 0x61, 0xc2,
- 0x01, 0x47, 0x01, 0x3c, 0x58, 0x45, 0x01, 0x93, 0xc2, 0xbf, 0xdd, 0xc9,
- 0x64, 0xa3, 0x01, 0x48, 0x58, 0xcd, 0x7e, 0x0b, 0x01, 0x0d, 0x09, 0x46,
- 0x01, 0xef, 0x42, 0xbf, 0xe9, 0xc5, 0x01, 0x0f, 0x0f, 0xc2, 0x39, 0xd0,
- 0x58, 0x92, 0x0f, 0xc2, 0x18, 0x44, 0x01, 0x1e, 0xc2, 0xbf, 0xef, 0x45,
- 0x01, 0xf7, 0x42, 0xbf, 0xf9, 0x00, 0x42, 0xc0, 0x03, 0xca, 0x9d, 0xc0,
- 0x01, 0x27, 0xf1, 0x46, 0x06, 0x97, 0x42, 0xc0, 0x21, 0x00, 0x42, 0xc0,
- 0x3f, 0xc6, 0x31, 0x53, 0x01, 0x16, 0x89, 0xc4, 0x0f, 0x20, 0x01, 0x16,
- 0x81, 0xc6, 0xb7, 0x47, 0x01, 0x55, 0xe1, 0xcd, 0x6e, 0xea, 0x01, 0x72,
- 0x20, 0xc5, 0x13, 0x89, 0x01, 0x52, 0x79, 0xcc, 0x08, 0x9b, 0x01, 0x52,
- 0x70, 0xcd, 0x69, 0x7c, 0x01, 0x57, 0x61, 0xcb, 0x97, 0x49, 0x01, 0x72,
- 0x48, 0xc3, 0x01, 0x2e, 0x01, 0x01, 0x9b, 0x02, 0xc0, 0x4b, 0xc6, 0xb7,
- 0x17, 0x01, 0x55, 0xd8, 0x19, 0xc2, 0xc0, 0x51, 0x46, 0x1a, 0xfc, 0x42,
- 0xc0, 0x5b, 0xce, 0x50, 0x87, 0x01, 0x55, 0x18, 0x46, 0x04, 0x73, 0xc2,
- 0xc0, 0x67, 0xc9, 0xa9, 0xa0, 0x01, 0x0a, 0x28, 0x92, 0x01, 0x08, 0xcb,
- 0x02, 0xc0, 0x77, 0xc5, 0x52, 0x39, 0x01, 0x09, 0xf1, 0x9c, 0x01, 0x09,
- 0x21, 0x94, 0x01, 0x08, 0xe9, 0x93, 0x01, 0x08, 0xd1, 0x90, 0x01, 0x08,
- 0xa9, 0x8a, 0x01, 0x08, 0x69, 0x85, 0x01, 0x08, 0x10, 0xc5, 0x52, 0x39,
- 0x01, 0x09, 0xe9, 0xc2, 0x07, 0x19, 0x01, 0x09, 0xe0, 0xc9, 0x0a, 0x4a,
- 0x01, 0x54, 0xc9, 0xcc, 0x00, 0x9b, 0x01, 0x54, 0xd0, 0x4c, 0x21, 0xc0,
- 0xc2, 0xc0, 0x7b, 0xd5, 0x35, 0x0b, 0x01, 0x57, 0xc9, 0xd8, 0x23, 0x28,
- 0x01, 0x57, 0xd0, 0xc2, 0x00, 0xa4, 0x08, 0xc0, 0xb9, 0x83, 0x08, 0xc0,
- 0xb0, 0xc2, 0x00, 0xa4, 0x08, 0xc0, 0xa9, 0x83, 0x08, 0xc0, 0xa0, 0xc4,
- 0x01, 0x10, 0x0d, 0xe4, 0xc9, 0xc4, 0x35, 0x4b, 0x0d, 0xe4, 0x80, 0xc7,
- 0x27, 0x22, 0x0d, 0xe3, 0x98, 0xc3, 0x00, 0x8b, 0x0d, 0xe4, 0xb1, 0xc9,
- 0xb1, 0x1d, 0x0d, 0xe4, 0x98, 0xc5, 0x02, 0x82, 0x0d, 0xe3, 0xe0, 0xc2,
- 0x00, 0x5a, 0x0d, 0xe1, 0xa8, 0xc2, 0x00, 0x5a, 0x0d, 0xe1, 0x98, 0xc2,
- 0x00, 0x70, 0x0d, 0xe1, 0x70, 0xc6, 0x01, 0x61, 0x0d, 0xe1, 0x30, 0xc2,
- 0x00, 0x5a, 0x0d, 0xe2, 0x00, 0x90, 0x0d, 0xe3, 0x49, 0x99, 0x0d, 0xe2,
+ 0x9d, 0x0c, 0x58, 0x00, 0xc2, 0x01, 0x0e, 0x08, 0x96, 0x59, 0xc2, 0x0c,
+ 0x25, 0x08, 0x96, 0x49, 0x83, 0x08, 0x96, 0x40, 0xc2, 0x01, 0x0e, 0x08,
+ 0x96, 0x39, 0x83, 0x08, 0x96, 0x30, 0xc2, 0x0c, 0x25, 0x08, 0x90, 0xe1,
+ 0xc2, 0x01, 0x0e, 0x08, 0x90, 0xb9, 0x83, 0x08, 0x90, 0xb0, 0xc2, 0x01,
+ 0x0e, 0x08, 0x90, 0xa9, 0x83, 0x08, 0x90, 0xa0, 0xc4, 0xdd, 0x8e, 0x08,
+ 0x91, 0xf1, 0xc5, 0xde, 0x2d, 0x08, 0x91, 0xb8, 0x43, 0x39, 0x07, 0xc2,
+ 0x72, 0xa0, 0x43, 0x1d, 0xc1, 0xc2, 0x72, 0xac, 0x47, 0xc5, 0xb5, 0xc2,
+ 0x72, 0xb8, 0x42, 0x02, 0x49, 0x42, 0x72, 0xc4, 0x42, 0x00, 0x9a, 0xc2,
+ 0x72, 0xd0, 0x43, 0x6f, 0x91, 0xc2, 0x72, 0xe8, 0xc9, 0xb1, 0x3b, 0x00,
+ 0xcf, 0x00, 0x44, 0xe4, 0xb3, 0xc2, 0x72, 0xf4, 0x43, 0x2c, 0x7e, 0x42,
+ 0x73, 0x00, 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0x89, 0xc4, 0xbc, 0x79, 0x00,
+ 0xcf, 0x08, 0x12, 0xc2, 0x73, 0x0c, 0x04, 0xc2, 0x73, 0x18, 0xc4, 0xc5,
+ 0xb7, 0x00, 0xbf, 0x89, 0xc3, 0x15, 0x86, 0x00, 0xbf, 0x80, 0xc7, 0xcd,
+ 0xb8, 0x00, 0xbe, 0xe9, 0xcc, 0x8b, 0xf8, 0x00, 0xbe, 0xe1, 0xc4, 0xe7,
+ 0xa3, 0x00, 0xbe, 0x78, 0xc6, 0xd0, 0xee, 0x00, 0xbe, 0xd1, 0xc3, 0x02,
+ 0x33, 0x00, 0xbe, 0xa1, 0xc6, 0xd8, 0x08, 0x00, 0xbe, 0x70, 0xc5, 0xdc,
+ 0x6b, 0x00, 0xbe, 0xc1, 0x03, 0x42, 0x73, 0x24, 0xce, 0x6f, 0x8a, 0x00,
+ 0xbe, 0xb1, 0xc4, 0xe9, 0x27, 0x00, 0xbe, 0x90, 0xca, 0xa2, 0xfe, 0x00,
+ 0xbe, 0x69, 0xc6, 0xd7, 0xc6, 0x00, 0xbe, 0x50, 0xc4, 0xe9, 0x5f, 0x00,
+ 0xbe, 0x61, 0xc6, 0xd4, 0x42, 0x00, 0xbe, 0x38, 0x97, 0x00, 0xbe, 0x29,
+ 0x8b, 0x00, 0xbe, 0x19, 0x87, 0x00, 0xbe, 0x11, 0x83, 0x00, 0xbd, 0xb0,
+ 0x91, 0x00, 0xbe, 0x21, 0x87, 0x00, 0xbd, 0xf0, 0x87, 0x00, 0xbe, 0x01,
+ 0x8b, 0x00, 0xbd, 0xc0, 0x83, 0x00, 0xbd, 0xf9, 0x9b, 0x00, 0xbd, 0xd0,
+ 0x83, 0x00, 0xbd, 0xe9, 0x97, 0x00, 0xbd, 0xe0, 0x97, 0x00, 0xbd, 0x99,
+ 0x8b, 0x00, 0xbd, 0x81, 0x83, 0x00, 0xbd, 0x21, 0x93, 0x00, 0xbd, 0x18,
+ 0xc3, 0x0a, 0x1f, 0x00, 0xbd, 0x91, 0xc3, 0x05, 0x17, 0x00, 0xbd, 0x88,
+ 0x97, 0x00, 0xbd, 0x4b, 0x02, 0x73, 0x36, 0x8d, 0x00, 0xbd, 0x40, 0x8b,
+ 0x00, 0xbd, 0x30, 0x91, 0x00, 0xbc, 0xb9, 0x83, 0x00, 0xbc, 0xa8, 0x91,
+ 0x00, 0xbc, 0x91, 0x83, 0x00, 0xbc, 0x80, 0x91, 0x00, 0xbc, 0x69, 0x83,
+ 0x00, 0xbc, 0x58, 0x91, 0x00, 0xbc, 0x41, 0x83, 0x00, 0xbc, 0x30, 0x91,
+ 0x00, 0xbc, 0x19, 0x83, 0x00, 0xbc, 0x08, 0x45, 0x00, 0x3f, 0xc2, 0x73,
+ 0x3a, 0x83, 0x01, 0x85, 0xa9, 0x8b, 0x01, 0x85, 0xb9, 0x97, 0x01, 0x85,
+ 0xc9, 0x87, 0x01, 0x85, 0xd9, 0x91, 0x01, 0x85, 0xe8, 0x47, 0x7a, 0xe7,
+ 0x42, 0x73, 0x77, 0x8b, 0x01, 0x86, 0xfb, 0x02, 0x73, 0x85, 0x83, 0x01,
+ 0x86, 0xf1, 0x97, 0x01, 0x87, 0x01, 0x87, 0x01, 0x87, 0x09, 0x91, 0x01,
+ 0x87, 0x10, 0x83, 0x01, 0x85, 0x59, 0x8b, 0x01, 0x85, 0x69, 0x97, 0x01,
+ 0x85, 0x79, 0x87, 0x01, 0x85, 0x89, 0x91, 0x01, 0x85, 0x98, 0x83, 0x01,
+ 0x85, 0x61, 0x8b, 0x01, 0x85, 0x71, 0x97, 0x01, 0x85, 0x81, 0x87, 0x01,
+ 0x85, 0x91, 0x91, 0x01, 0x85, 0xa0, 0x83, 0x01, 0x85, 0xb1, 0x8b, 0x01,
+ 0x85, 0xc1, 0x97, 0x01, 0x85, 0xd1, 0x87, 0x01, 0x85, 0xe1, 0x91, 0x01,
+ 0x85, 0xf0, 0x83, 0x01, 0x85, 0xf9, 0x8b, 0x01, 0x86, 0x09, 0x97, 0x01,
+ 0x86, 0x21, 0x87, 0x01, 0x86, 0x31, 0x91, 0x01, 0x86, 0x40, 0x83, 0x01,
+ 0x86, 0x01, 0x8b, 0x01, 0x86, 0x11, 0x97, 0x01, 0x86, 0x29, 0x87, 0x01,
+ 0x86, 0x39, 0x91, 0x01, 0x86, 0x48, 0x83, 0x01, 0x86, 0x51, 0x8b, 0x01,
+ 0x86, 0x59, 0x97, 0x01, 0x86, 0x61, 0x87, 0x01, 0x86, 0x69, 0x91, 0x01,
+ 0x86, 0x70, 0x83, 0x01, 0x86, 0x79, 0x8b, 0x01, 0x86, 0x91, 0x97, 0x01,
+ 0x86, 0xa9, 0x87, 0x01, 0x86, 0xc1, 0x91, 0x01, 0x86, 0xd8, 0x83, 0x01,
+ 0x86, 0x81, 0x8b, 0x01, 0x86, 0x99, 0x97, 0x01, 0x86, 0xb1, 0x87, 0x01,
+ 0x86, 0xc9, 0x91, 0x01, 0x86, 0xe0, 0x83, 0x01, 0x86, 0x89, 0x8b, 0x01,
+ 0x86, 0xa1, 0x97, 0x01, 0x86, 0xb9, 0x87, 0x01, 0x86, 0xd1, 0x91, 0x01,
+ 0x86, 0xe8, 0x83, 0x01, 0x87, 0x21, 0x97, 0x01, 0x87, 0x31, 0x91, 0x01,
+ 0x87, 0x40, 0x83, 0x01, 0x87, 0x49, 0x8b, 0x01, 0x87, 0x51, 0x97, 0x01,
+ 0x87, 0x59, 0x87, 0x01, 0x87, 0x61, 0x91, 0x01, 0x87, 0x68, 0x83, 0x01,
+ 0x87, 0x79, 0x8b, 0x01, 0x87, 0x81, 0x87, 0x01, 0x87, 0x89, 0x91, 0x01,
+ 0x87, 0x90, 0x97, 0x01, 0x87, 0xa1, 0x83, 0x01, 0x87, 0xb9, 0x8b, 0x01,
+ 0x87, 0xc1, 0x87, 0x01, 0x87, 0xc9, 0x91, 0x01, 0x87, 0xd0, 0xc4, 0x15,
+ 0xa7, 0x08, 0xfa, 0xb9, 0xc2, 0x22, 0x45, 0x08, 0xfa, 0xb0, 0xc3, 0x0d,
+ 0x8f, 0x08, 0xfa, 0xa9, 0xc3, 0x08, 0xde, 0x08, 0xfa, 0xa0, 0xc4, 0x05,
+ 0xde, 0x08, 0xfa, 0x99, 0xc2, 0x0a, 0x20, 0x08, 0xfa, 0x90, 0xc4, 0x7c,
+ 0x4e, 0x08, 0xfa, 0x71, 0xca, 0xa6, 0x64, 0x08, 0xfa, 0x40, 0xc2, 0x01,
+ 0x0e, 0x08, 0xf8, 0xf9, 0x83, 0x08, 0xf8, 0xf0, 0xc2, 0x01, 0x0e, 0x08,
+ 0xf8, 0xe9, 0x83, 0x08, 0xf8, 0xe0, 0x8e, 0x08, 0xf8, 0x68, 0x94, 0x08,
+ 0xf8, 0x58, 0xc4, 0x21, 0x28, 0x08, 0x85, 0xc9, 0xc5, 0x45, 0xcf, 0x08,
+ 0x84, 0x10, 0xc2, 0x01, 0x0e, 0x08, 0x84, 0xd9, 0xc3, 0x45, 0xca, 0x08,
+ 0x84, 0xd1, 0x83, 0x08, 0x84, 0xc8, 0xc2, 0x01, 0x0e, 0x08, 0x84, 0xc1,
+ 0x83, 0x08, 0x84, 0xb8, 0xd2, 0x4e, 0xd8, 0x00, 0x64, 0x01, 0xc6, 0xc6,
+ 0xf8, 0x00, 0x64, 0x20, 0xca, 0x21, 0x1b, 0x00, 0x64, 0x09, 0xdd, 0x10,
+ 0xa2, 0x00, 0x67, 0x98, 0xc7, 0x10, 0xac, 0x00, 0x64, 0x11, 0xc7, 0x7d,
+ 0xf8, 0x00, 0x65, 0xe8, 0xc5, 0x45, 0xcf, 0x00, 0x64, 0x19, 0xc4, 0x21,
+ 0x28, 0x00, 0x66, 0x68, 0x83, 0x00, 0x64, 0x2b, 0x02, 0x73, 0x8b, 0x8b,
+ 0x00, 0x64, 0x3b, 0x02, 0x73, 0x97, 0x97, 0x00, 0x64, 0x4b, 0x02, 0x73,
+ 0x9b, 0x18, 0xc2, 0x73, 0x9f, 0x87, 0x00, 0x64, 0x73, 0x02, 0x73, 0xa9,
+ 0x91, 0x00, 0x64, 0x93, 0x02, 0x73, 0xad, 0x0d, 0xc2, 0x73, 0xb1, 0x09,
+ 0xc2, 0x73, 0xbb, 0x10, 0xc2, 0x73, 0xc5, 0x05, 0xc2, 0x73, 0xde, 0x0c,
+ 0xc2, 0x73, 0xe8, 0x16, 0xc2, 0x73, 0xf2, 0x06, 0xc2, 0x74, 0x00, 0x12,
+ 0xc2, 0x74, 0x0e, 0x04, 0xc2, 0x74, 0x18, 0xc2, 0x00, 0x3f, 0x00, 0x65,
+ 0x71, 0xc2, 0x1a, 0x36, 0x00, 0x65, 0x79, 0x14, 0xc2, 0x74, 0x22, 0x0e,
+ 0xc2, 0x74, 0x2c, 0x15, 0xc2, 0x74, 0x34, 0xc2, 0x01, 0x0e, 0x00, 0x65,
+ 0xc9, 0xc2, 0x01, 0xa7, 0x00, 0x66, 0xf0, 0x83, 0x00, 0x65, 0xf1, 0x8b,
+ 0x00, 0x66, 0x41, 0x97, 0x00, 0x66, 0x60, 0x8b, 0x00, 0x66, 0x00, 0x97,
+ 0x00, 0x66, 0x10, 0x94, 0x00, 0x66, 0x1b, 0x02, 0x74, 0x44, 0x8e, 0x00,
+ 0x67, 0x12, 0x02, 0x74, 0x48, 0x87, 0x00, 0x66, 0x38, 0x91, 0x00, 0x66,
+ 0x58, 0xc2, 0x0a, 0x20, 0x00, 0x67, 0x41, 0xc4, 0x05, 0xde, 0x00, 0x67,
+ 0x48, 0xc3, 0x08, 0xde, 0x00, 0x67, 0x51, 0xc3, 0x0d, 0x8f, 0x00, 0x67,
+ 0x58, 0xc2, 0x22, 0x45, 0x00, 0x67, 0x61, 0xc4, 0x15, 0xa7, 0x00, 0x67,
+ 0x68, 0xc2, 0x03, 0x5f, 0x01, 0x78, 0x03, 0x02, 0x74, 0x4c, 0x12, 0xc2,
+ 0x74, 0x52, 0xc2, 0x19, 0x3e, 0x01, 0x7b, 0xe0, 0x0b, 0xc2, 0x74, 0x5e,
+ 0x07, 0xc2, 0x74, 0x6e, 0x03, 0xc2, 0x74, 0x7e, 0xc3, 0x0a, 0x68, 0x01,
+ 0x7d, 0x3a, 0x02, 0x74, 0x8a, 0x11, 0xc2, 0x74, 0x90, 0x0b, 0xc2, 0x74,
+ 0xb3, 0x14, 0xc2, 0x74, 0xc3, 0x07, 0x42, 0x74, 0xd3, 0x0e, 0xc2, 0x74,
+ 0xdf, 0x07, 0xc2, 0x74, 0xe9, 0x12, 0xc2, 0x74, 0xff, 0x05, 0xc2, 0x75,
+ 0x15, 0xc4, 0x00, 0x3f, 0x01, 0x79, 0x49, 0x0a, 0xc2, 0x75, 0x21, 0xc4,
+ 0xa2, 0xb7, 0x01, 0x79, 0xc9, 0x16, 0xc2, 0x75, 0x29, 0xc5, 0x01, 0xea,
+ 0x01, 0x7a, 0x29, 0xc2, 0x0b, 0xfd, 0x01, 0x7a, 0x39, 0x03, 0xc2, 0x75,
+ 0x37, 0xc4, 0x49, 0xab, 0x01, 0x7b, 0x11, 0x0b, 0xc2, 0x75, 0x47, 0xc3,
+ 0x2d, 0x9c, 0x01, 0x7b, 0x51, 0xc4, 0x0c, 0x3c, 0x01, 0x7d, 0x98, 0x11,
+ 0xc2, 0x75, 0x53, 0xcf, 0x67, 0xe9, 0x01, 0x78, 0xb1, 0x07, 0xc2, 0x75,
+ 0x5d, 0x03, 0x42, 0x75, 0x67, 0xc2, 0x0a, 0x20, 0x01, 0x78, 0x33, 0x02,
+ 0x75, 0x77, 0x03, 0xc2, 0x75, 0x7d, 0xc2, 0x01, 0x04, 0x01, 0x78, 0xb9,
+ 0x42, 0x00, 0xd3, 0xc2, 0x75, 0x8f, 0x14, 0xc2, 0x75, 0x9b, 0x0b, 0xc2,
+ 0x75, 0xad, 0x11, 0x42, 0x75, 0xb9, 0xc2, 0x00, 0x3a, 0x01, 0x78, 0x41,
+ 0x11, 0xc2, 0x75, 0xc5, 0x07, 0xc2, 0x75, 0xd3, 0x0b, 0x42, 0x75, 0xdf,
+ 0x10, 0xc2, 0x75, 0xeb, 0xc4, 0x00, 0x48, 0x01, 0x78, 0x59, 0x03, 0xc2,
+ 0x75, 0xf7, 0xc3, 0x15, 0xa8, 0x01, 0x7e, 0x8b, 0x02, 0x76, 0x02, 0xc2,
+ 0x04, 0x30, 0x01, 0x7b, 0x61, 0xc9, 0xb5, 0x19, 0x01, 0x7e, 0x58, 0x11,
+ 0xc2, 0x76, 0x08, 0x0e, 0xc2, 0x76, 0x24, 0xc4, 0xe7, 0x57, 0x01, 0x79,
+ 0x31, 0x03, 0xc2, 0x76, 0x34, 0xc3, 0x2e, 0x99, 0x01, 0x7d, 0x10, 0xc2,
+ 0x01, 0xa1, 0x01, 0x78, 0x71, 0x10, 0x42, 0x76, 0x46, 0xc4, 0x00, 0xc8,
+ 0x01, 0x78, 0x91, 0x14, 0xc2, 0x76, 0x52, 0xc3, 0x00, 0x36, 0x01, 0x7b,
+ 0xf1, 0xc2, 0x00, 0x48, 0x01, 0x7c, 0xb8, 0x14, 0xc2, 0x76, 0x5e, 0x11,
+ 0xc2, 0x76, 0x6a, 0x07, 0xc2, 0x76, 0x76, 0x03, 0xc2, 0x76, 0x82, 0x0a,
+ 0xc2, 0x76, 0x91, 0x42, 0x01, 0x5b, 0x42, 0x76, 0x9d, 0x0b, 0xc2, 0x76,
+ 0xa5, 0xc3, 0xbe, 0x95, 0x01, 0x79, 0x39, 0x03, 0xc2, 0x76, 0xb7, 0xc2,
+ 0x00, 0x5f, 0x01, 0x7c, 0xd1, 0xc2, 0x0b, 0xfd, 0x01, 0x7c, 0xd8, 0xc4,
+ 0x44, 0x00, 0x01, 0x78, 0xe1, 0xc2, 0x01, 0x66, 0x01, 0x7a, 0x21, 0x42,
+ 0x00, 0x63, 0xc2, 0x76, 0xc5, 0xc2, 0x00, 0x3b, 0x01, 0x7b, 0xe8, 0x91,
+ 0x01, 0x79, 0x0b, 0x02, 0x76, 0xd1, 0x42, 0x00, 0x9a, 0xc2, 0x76, 0xdd,
+ 0xc3, 0x00, 0x97, 0x01, 0x7d, 0x41, 0xc4, 0xd9, 0x1a, 0x01, 0x7e, 0x08,
+ 0x0b, 0xc2, 0x76, 0xe9, 0x11, 0xc2, 0x76, 0xf9, 0x14, 0xc2, 0x77, 0x15,
+ 0x03, 0xc2, 0x77, 0x27, 0x0e, 0xc2, 0x77, 0x33, 0xc3, 0x0d, 0xb8, 0x01,
+ 0x7c, 0xb0, 0x11, 0xc2, 0x77, 0x45, 0xc2, 0x00, 0xdd, 0x01, 0x7b, 0xc8,
+ 0xc2, 0x00, 0xd3, 0x01, 0x7a, 0x89, 0x0b, 0xc2, 0x77, 0x4f, 0x03, 0xc2,
+ 0x77, 0x67, 0xc6, 0x13, 0xfd, 0x01, 0x7b, 0xd9, 0xc3, 0x65, 0x6c, 0x01,
+ 0x7c, 0xe1, 0x0e, 0xc2, 0x77, 0x79, 0x14, 0x42, 0x77, 0x83, 0xc2, 0x00,
+ 0x06, 0x01, 0x7a, 0xf9, 0x94, 0x01, 0x7b, 0xc0, 0xc5, 0xe3, 0x7d, 0x01,
+ 0x7c, 0xa9, 0xc6, 0xd5, 0x02, 0x01, 0x7d, 0x28, 0xcb, 0x20, 0x59, 0x0f,
+ 0xb0, 0xd1, 0xcc, 0x1a, 0x5e, 0x0f, 0xb0, 0xc8, 0x42, 0x00, 0xe5, 0xc2,
+ 0x77, 0x8f, 0xc3, 0x01, 0x0d, 0x0b, 0x79, 0x90, 0xc3, 0xea, 0xce, 0x0b,
+ 0x7c, 0xc9, 0xc3, 0x83, 0xe8, 0x0b, 0x7c, 0xc1, 0xc3, 0x82, 0xb0, 0x0b,
+ 0x7c, 0xb9, 0xc3, 0x3b, 0x0b, 0x0b, 0x7c, 0xb1, 0xc3, 0x82, 0xe0, 0x0b,
+ 0x7c, 0xa9, 0xc3, 0x82, 0xec, 0x0b, 0x7c, 0xa1, 0xc3, 0x82, 0xa4, 0x0b,
+ 0x7c, 0x98, 0x87, 0x0b, 0x7a, 0x49, 0x83, 0x0b, 0x79, 0xb9, 0xc2, 0x01,
+ 0x0e, 0x0b, 0x79, 0x71, 0xc2, 0x0e, 0xe5, 0x0b, 0x79, 0x50, 0xc2, 0x1a,
+ 0x36, 0x0b, 0x78, 0xe1, 0x83, 0x0b, 0x78, 0xd0, 0xca, 0x56, 0x66, 0x0b,
+ 0x7a, 0x80, 0xc2, 0x01, 0x0e, 0x0b, 0x79, 0x69, 0x83, 0x0b, 0x79, 0x60,
+ 0xc2, 0x01, 0x0e, 0x0b, 0x79, 0x21, 0x83, 0x0b, 0x79, 0x18, 0xc2, 0x01,
+ 0x0e, 0x0b, 0x78, 0xa9, 0x83, 0x0b, 0x78, 0xa0, 0xc2, 0x18, 0x7a, 0x0b,
+ 0x7a, 0x39, 0x83, 0x0b, 0x79, 0xc1, 0xc2, 0x01, 0x0e, 0x0b, 0x79, 0x79,
+ 0xc2, 0x05, 0x5c, 0x0b, 0x79, 0x58, 0xc2, 0x1a, 0x36, 0x0b, 0x78, 0xe9,
+ 0x83, 0x0b, 0x78, 0xd8, 0xc3, 0x8f, 0x34, 0x0b, 0x79, 0xf9, 0x10, 0xc2,
+ 0x77, 0xb3, 0xc2, 0x00, 0x3f, 0x0b, 0x78, 0x30, 0x15, 0xc2, 0x77, 0xbd,
+ 0xc2, 0x1a, 0x36, 0x0b, 0x7a, 0x01, 0x83, 0x0b, 0x79, 0xe8, 0x83, 0x0b,
+ 0x79, 0xe1, 0xc2, 0x01, 0x0e, 0x0b, 0x79, 0xb0, 0x15, 0xc2, 0x77, 0xc7,
+ 0x83, 0x0b, 0x78, 0x69, 0xc2, 0x00, 0x44, 0x0b, 0x78, 0x60, 0xc2, 0x01,
+ 0x0e, 0x0b, 0x79, 0x49, 0x83, 0x0b, 0x79, 0x40, 0xc2, 0x1a, 0x36, 0x0b,
+ 0x78, 0xc9, 0x83, 0x0b, 0x78, 0xc0, 0x90, 0x0b, 0x7b, 0x62, 0x02, 0x77,
+ 0xd1, 0xc2, 0x00, 0x56, 0x0b, 0x7c, 0x30, 0x90, 0x0b, 0x7b, 0x1a, 0x02,
+ 0x77, 0xd5, 0x94, 0x0b, 0x7b, 0xa8, 0x89, 0x0b, 0x7a, 0xf8, 0x94, 0x0b,
+ 0x7c, 0x11, 0x9b, 0x0b, 0x7b, 0x00, 0x87, 0x0b, 0x7b, 0xa0, 0x89, 0x0b,
+ 0x7a, 0xc0, 0x00, 0x42, 0x77, 0xd9, 0xcd, 0x0f, 0x50, 0x0f, 0xbe, 0x19,
+ 0xca, 0x25, 0x5a, 0x0f, 0xbe, 0x08, 0xc6, 0x01, 0xe9, 0x0f, 0xbc, 0x79,
+ 0xc6, 0x03, 0xfa, 0x01, 0x35, 0x50, 0xd0, 0x60, 0x0f, 0x0f, 0xbc, 0x29,
+ 0xcb, 0x8d, 0xf1, 0x01, 0x35, 0x58, 0x42, 0x00, 0xd0, 0xc2, 0x77, 0xe5,
+ 0x43, 0x05, 0xe8, 0x42, 0x77, 0xf1, 0x42, 0x00, 0xd0, 0xc2, 0x77, 0xf7,
+ 0x43, 0x05, 0xe8, 0x42, 0x78, 0x03, 0x49, 0x34, 0x9f, 0xc2, 0x78, 0x09,
+ 0xd3, 0x3e, 0xe3, 0x0f, 0xbd, 0x81, 0x4c, 0x0f, 0x44, 0x42, 0x78, 0x15,
+ 0xd1, 0x56, 0x53, 0x01, 0x35, 0x61, 0xc4, 0x01, 0x0e, 0x01, 0x2c, 0x91,
+ 0xc6, 0x12, 0x73, 0x0f, 0xbd, 0x51, 0x43, 0x48, 0xf0, 0x42, 0x78, 0x21,
+ 0xcf, 0x18, 0x2e, 0x0f, 0xbd, 0xe1, 0xd2, 0x25, 0x52, 0x0f, 0xbe, 0x70,
+ 0x9b, 0x0b, 0x73, 0xfb, 0x02, 0x78, 0x2d, 0x83, 0x0b, 0x73, 0x6b, 0x02,
+ 0x78, 0x31, 0x91, 0x0b, 0x73, 0xeb, 0x02, 0x78, 0x3b, 0x94, 0x0b, 0x73,
+ 0xe1, 0x90, 0x0b, 0x73, 0xdb, 0x02, 0x78, 0x3f, 0x86, 0x0b, 0x73, 0xc9,
+ 0x9a, 0x0b, 0x73, 0xc1, 0x8a, 0x0b, 0x73, 0xb3, 0x02, 0x78, 0x47, 0x93,
+ 0x0b, 0x73, 0xa9, 0x8e, 0x0b, 0x73, 0xa1, 0x97, 0x0b, 0x73, 0x91, 0x85,
+ 0x0b, 0x73, 0x89, 0x84, 0x0b, 0x73, 0x81, 0x87, 0x0b, 0x73, 0x79, 0x8c,
+ 0x0b, 0x73, 0x71, 0x8d, 0x0b, 0x73, 0x63, 0x02, 0x78, 0x4b, 0x8b, 0x0b,
+ 0x73, 0x59, 0x88, 0x0b, 0x73, 0x51, 0x89, 0x0b, 0x73, 0x49, 0x96, 0x0b,
+ 0x73, 0x41, 0x92, 0x0b, 0x73, 0x39, 0x9c, 0x0b, 0x73, 0x29, 0x99, 0x0b,
+ 0x73, 0x19, 0x98, 0x0b, 0x73, 0x11, 0x95, 0x0b, 0x73, 0x09, 0x8f, 0x0b,
+ 0x73, 0x00, 0x9b, 0x0b, 0x72, 0xfb, 0x02, 0x78, 0x4f, 0x83, 0x0b, 0x72,
+ 0x6b, 0x02, 0x78, 0x53, 0x91, 0x0b, 0x72, 0xeb, 0x02, 0x78, 0x5d, 0x94,
+ 0x0b, 0x72, 0xe1, 0x90, 0x0b, 0x72, 0xdb, 0x02, 0x78, 0x61, 0x86, 0x0b,
+ 0x72, 0xc9, 0x9a, 0x0b, 0x72, 0xc1, 0x8a, 0x0b, 0x72, 0xb3, 0x02, 0x78,
+ 0x69, 0x93, 0x0b, 0x72, 0xa9, 0x8e, 0x0b, 0x72, 0xa1, 0x97, 0x0b, 0x72,
+ 0x91, 0x85, 0x0b, 0x72, 0x89, 0x84, 0x0b, 0x72, 0x81, 0x87, 0x0b, 0x72,
+ 0x79, 0x8c, 0x0b, 0x72, 0x71, 0x8d, 0x0b, 0x72, 0x63, 0x02, 0x78, 0x6d,
+ 0x8b, 0x0b, 0x72, 0x59, 0x88, 0x0b, 0x72, 0x51, 0x89, 0x0b, 0x72, 0x49,
+ 0x96, 0x0b, 0x72, 0x41, 0x92, 0x0b, 0x72, 0x39, 0x9c, 0x0b, 0x72, 0x29,
+ 0x99, 0x0b, 0x72, 0x19, 0x98, 0x0b, 0x72, 0x11, 0x95, 0x0b, 0x72, 0x09,
+ 0x8f, 0x0b, 0x72, 0x00, 0xc4, 0x05, 0xde, 0x0b, 0x74, 0x1b, 0x02, 0x78,
+ 0x71, 0xc2, 0x0a, 0x20, 0x0b, 0x74, 0x12, 0x02, 0x78, 0x77, 0xcf, 0x65,
+ 0xdc, 0x0b, 0x74, 0xa0, 0xc4, 0x15, 0xa7, 0x0b, 0x74, 0x39, 0xc2, 0x22,
+ 0x45, 0x0b, 0x74, 0x30, 0xc3, 0x0d, 0x8f, 0x0b, 0x74, 0x29, 0xc3, 0x08,
+ 0xde, 0x0b, 0x74, 0x20, 0xc7, 0x1f, 0xd9, 0x0b, 0x74, 0x91, 0xc5, 0x66,
+ 0x81, 0x0b, 0x74, 0x58, 0xc8, 0x4c, 0x2c, 0x0b, 0x74, 0x89, 0xc6, 0x42,
+ 0xe9, 0x0b, 0x74, 0x80, 0xc6, 0x12, 0xf0, 0x0b, 0x74, 0x79, 0xc7, 0x52,
+ 0xbd, 0x0b, 0x74, 0x70, 0xc7, 0x55, 0xba, 0x0b, 0x74, 0x69, 0xc5, 0x25,
+ 0x4c, 0x0b, 0x74, 0x61, 0xc2, 0x01, 0x04, 0x0b, 0x74, 0x50, 0xc6, 0x05,
+ 0xaf, 0x01, 0x1e, 0xb1, 0xc9, 0x6c, 0x63, 0x01, 0x1e, 0xa8, 0x24, 0xc2,
+ 0x78, 0x7d, 0x25, 0xc2, 0x78, 0xb9, 0x1f, 0xc2, 0x78, 0xf5, 0x1e, 0xc2,
+ 0x79, 0x31, 0x26, 0xc2, 0x79, 0x6d, 0x22, 0xc2, 0x79, 0xa9, 0x1d, 0xc2,
+ 0x79, 0xe5, 0x21, 0xc2, 0x7a, 0x1b, 0x23, 0xc2, 0x7a, 0x57, 0x20, 0x42,
+ 0x7a, 0x93, 0x26, 0xc2, 0x7a, 0xcf, 0x20, 0xc2, 0x7a, 0xff, 0x1e, 0xc2,
+ 0x7b, 0x3b, 0x23, 0xc2, 0x7b, 0x77, 0x24, 0xc2, 0x7b, 0xb3, 0x21, 0xc2,
+ 0x7b, 0xef, 0x1d, 0xc2, 0x7c, 0x2b, 0x22, 0xc2, 0x7c, 0x67, 0x25, 0xc2,
+ 0x7c, 0xa3, 0x1f, 0x42, 0x7c, 0xdf, 0xc2, 0x0a, 0x20, 0x0f, 0x46, 0x41,
+ 0xc4, 0x05, 0xde, 0x0f, 0x46, 0x48, 0xc3, 0x08, 0xde, 0x0f, 0x46, 0x51,
+ 0xc3, 0x0d, 0x8f, 0x0f, 0x46, 0x58, 0xc2, 0x22, 0x45, 0x0f, 0x46, 0x61,
+ 0xc4, 0x15, 0xa7, 0x0f, 0x46, 0x68, 0x07, 0xc2, 0x7d, 0x1b, 0xc8, 0x4f,
+ 0x8c, 0x0f, 0x46, 0x98, 0x95, 0x0f, 0x46, 0x91, 0xca, 0xa2, 0x9a, 0x0f,
+ 0x46, 0xa8, 0x16, 0xc2, 0x7d, 0x25, 0xcd, 0x81, 0x80, 0x08, 0x4f, 0xf1,
+ 0x07, 0xc2, 0x7d, 0x37, 0x15, 0xc2, 0x7d, 0x43, 0x08, 0xc2, 0x7d, 0x4f,
+ 0x44, 0x05, 0x17, 0x42, 0x7d, 0x5b, 0xc4, 0x24, 0x35, 0x08, 0x4e, 0x43,
+ 0x02, 0x7d, 0x67, 0xc5, 0x05, 0x1b, 0x08, 0x4e, 0x3b, 0x02, 0x7d, 0x71,
+ 0x15, 0xc2, 0x7d, 0x7b, 0x08, 0xc2, 0x7d, 0x8d, 0x16, 0xc2, 0x7d, 0x95,
+ 0xc3, 0x05, 0x17, 0x08, 0x4e, 0x02, 0x02, 0x7d, 0xa6, 0x48, 0x3e, 0x5e,
+ 0xc2, 0x7d, 0xaa, 0x46, 0x02, 0x00, 0x42, 0x7d, 0xb6, 0xc3, 0xec, 0x48,
+ 0x08, 0x4c, 0xf9, 0x8c, 0x08, 0x4c, 0xf0, 0xc2, 0x0c, 0x25, 0x08, 0x4c,
+ 0xe9, 0x16, 0xc2, 0x7e, 0x15, 0xc2, 0x0e, 0x13, 0x08, 0x4c, 0xb9, 0x0d,
+ 0xc2, 0x7e, 0x2d, 0x15, 0xc2, 0x7e, 0x37, 0xc3, 0x02, 0xe6, 0x08, 0x4c,
+ 0x91, 0xc2, 0x00, 0x96, 0x08, 0x4c, 0x81, 0x14, 0xc2, 0x7e, 0x45, 0x83,
+ 0x08, 0x4c, 0x01, 0x87, 0x08, 0x4c, 0x09, 0x8b, 0x08, 0x4c, 0x11, 0x91,
+ 0x08, 0x4c, 0x19, 0xc2, 0x1a, 0x36, 0x08, 0x4c, 0x21, 0xc2, 0x00, 0x4c,
+ 0x08, 0x4c, 0x29, 0x04, 0xc2, 0x7e, 0x4f, 0xc2, 0x07, 0x44, 0x08, 0x4c,
+ 0x41, 0xc2, 0x00, 0x3f, 0x08, 0x4c, 0x49, 0x10, 0x42, 0x7e, 0x59, 0x47,
+ 0x22, 0xfd, 0xc2, 0x7e, 0x6d, 0xcc, 0x8b, 0x80, 0x01, 0x4c, 0xd8, 0xc3,
+ 0x81, 0xa7, 0x05, 0x5f, 0x29, 0x03, 0xc2, 0x7e, 0x73, 0x97, 0x05, 0x57,
+ 0x70, 0xc3, 0x81, 0xa7, 0x05, 0x5f, 0x21, 0x8b, 0x05, 0x57, 0x58, 0x97,
+ 0x05, 0x57, 0x61, 0xc3, 0x81, 0xa7, 0x05, 0x5f, 0x40, 0xc7, 0xcd, 0x17,
+ 0x05, 0x5f, 0x10, 0xc3, 0x9a, 0x5e, 0x05, 0x5e, 0x4b, 0x02, 0x7e, 0x7b,
+ 0x83, 0x05, 0x5e, 0x2b, 0x02, 0x7e, 0x81, 0xc2, 0x01, 0x01, 0x05, 0x57,
+ 0x41, 0xc2, 0x1a, 0x36, 0x05, 0x57, 0x18, 0xc2, 0x00, 0x95, 0x05, 0x5e,
+ 0x3b, 0x02, 0x7e, 0x87, 0x16, 0xc2, 0x7e, 0x8d, 0xc3, 0x19, 0x3b, 0x05,
+ 0x5e, 0x50, 0x83, 0x05, 0x5e, 0x23, 0x02, 0x7e, 0x97, 0xc3, 0x08, 0x89,
+ 0x05, 0x5e, 0x80, 0xc2, 0x03, 0x84, 0x05, 0x5e, 0x03, 0x02, 0x7e, 0x9d,
+ 0xc3, 0x19, 0x3b, 0x05, 0x5e, 0x40, 0xc3, 0x08, 0x89, 0x05, 0x5e, 0xd1,
+ 0x83, 0x05, 0x5e, 0xa8, 0xc3, 0x19, 0x3b, 0x05, 0x5e, 0xc9, 0x06, 0xc2,
+ 0x7e, 0xa3, 0xc2, 0x00, 0x95, 0x05, 0x5e, 0xb8, 0xc3, 0x19, 0x3b, 0x05,
+ 0x5e, 0xc1, 0xc2, 0x03, 0x84, 0x05, 0x5e, 0x90, 0xc2, 0x0e, 0xe5, 0x05,
+ 0x57, 0x51, 0xc2, 0x01, 0x0e, 0x05, 0x57, 0x49, 0xc2, 0x01, 0x02, 0x05,
+ 0x5e, 0x08, 0x83, 0x05, 0x57, 0x11, 0xc2, 0x00, 0x95, 0x05, 0x5e, 0x30,
+ 0xc7, 0xcd, 0x17, 0x05, 0x5e, 0xe8, 0xc7, 0xcd, 0x17, 0x05, 0x5e, 0xe0,
+ 0xc3, 0x08, 0x89, 0x05, 0x5e, 0x99, 0xc2, 0x00, 0x95, 0x05, 0x5e, 0xb0,
+ 0xc9, 0xaf, 0xd3, 0x0f, 0xb5, 0xa9, 0xc7, 0x62, 0x06, 0x0f, 0xb4, 0xf1,
+ 0xc8, 0xc0, 0x83, 0x0f, 0xb5, 0x00, 0x05, 0xc2, 0x7e, 0xad, 0x15, 0xc2,
+ 0x7e, 0xd7, 0x14, 0xc2, 0x7e, 0xed, 0x0e, 0xc2, 0x7f, 0x03, 0x09, 0xc2,
+ 0x7f, 0x15, 0x04, 0xc2, 0x7f, 0x2a, 0x06, 0xc2, 0x7f, 0x36, 0x03, 0xc2,
+ 0x7f, 0x40, 0x12, 0xc2, 0x7f, 0x52, 0x16, 0xc2, 0x7f, 0x5e, 0x17, 0xc2,
+ 0x7f, 0x6a, 0x18, 0xc2, 0x7f, 0x7a, 0x0f, 0xc2, 0x7f, 0x86, 0x07, 0xc2,
+ 0x7f, 0x90, 0x0a, 0xc2, 0x7f, 0x9c, 0x1b, 0xc2, 0x7f, 0xa8, 0xca, 0xa3,
+ 0xb2, 0x00, 0x17, 0xf0, 0x45, 0x08, 0xd8, 0xc2, 0x7f, 0xb4, 0xcb, 0x91,
+ 0xff, 0x08, 0xb2, 0x11, 0xc4, 0x1c, 0xb3, 0x08, 0xb2, 0x08, 0xc4, 0xe7,
+ 0x97, 0x08, 0xb2, 0x21, 0x03, 0xc2, 0x7f, 0xd8, 0x42, 0x02, 0x52, 0x42,
+ 0x7f, 0xe4, 0x03, 0xc2, 0x7f, 0xf0, 0x91, 0x08, 0xb1, 0xd9, 0x87, 0x08,
+ 0xb1, 0xc9, 0x48, 0xb7, 0xd7, 0xc2, 0x7f, 0xfc, 0x97, 0x08, 0xb1, 0x9b,
+ 0x02, 0x80, 0x0a, 0x8b, 0x08, 0xb1, 0x8a, 0x02, 0x80, 0x0e, 0x0e, 0xc2,
+ 0x80, 0x12, 0xc2, 0x01, 0x0e, 0x08, 0xb1, 0x71, 0x15, 0xc2, 0x80, 0x1c,
+ 0x18, 0xc2, 0x80, 0x2c, 0xc2, 0x00, 0x9a, 0x08, 0xb1, 0x41, 0xc2, 0x1a,
+ 0x36, 0x08, 0xb1, 0x39, 0xc2, 0x00, 0x3f, 0x08, 0xb1, 0x31, 0x04, 0xc2,
+ 0x80, 0x36, 0x12, 0xc2, 0x80, 0x40, 0x10, 0xc2, 0x80, 0x4a, 0x06, 0xc2,
+ 0x80, 0x60, 0x16, 0xc2, 0x80, 0x6e, 0x0c, 0xc2, 0x80, 0x7c, 0x05, 0xc2,
+ 0x80, 0x86, 0x09, 0xc2, 0x80, 0x90, 0x0d, 0xc2, 0x80, 0x9a, 0x83, 0x08,
+ 0xb0, 0x03, 0x02, 0x80, 0xa4, 0x91, 0x08, 0xb0, 0x61, 0x87, 0x08, 0xb0,
+ 0x51, 0x97, 0x08, 0xb0, 0x23, 0x02, 0x80, 0xb0, 0x8b, 0x08, 0xb0, 0x12,
+ 0x02, 0x80, 0xb4, 0xc2, 0x01, 0x5b, 0x01, 0x34, 0x59, 0xc3, 0x00, 0x55,
+ 0x01, 0x34, 0x50, 0xe0, 0x09, 0x67, 0x08, 0xb3, 0x60, 0x46, 0x01, 0xab,
+ 0x42, 0x80, 0xb8, 0xcf, 0x09, 0x78, 0x08, 0xb3, 0x31, 0xc8, 0x00, 0xff,
+ 0x08, 0xb3, 0x28, 0xcf, 0x09, 0x78, 0x08, 0xb3, 0x21, 0xc8, 0x00, 0xff,
+ 0x08, 0xb3, 0x00, 0xc4, 0x24, 0x35, 0x00, 0xc0, 0xc9, 0xc5, 0x05, 0x1b,
+ 0x00, 0xc0, 0xc1, 0x15, 0xc2, 0x80, 0xc4, 0x08, 0xc2, 0x80, 0xd0, 0x16,
+ 0xc2, 0x80, 0xdc, 0xc3, 0x05, 0x17, 0x00, 0xc0, 0x89, 0xc4, 0x16, 0x57,
+ 0x00, 0xc0, 0x80, 0x45, 0xc7, 0x38, 0x42, 0x80, 0xe8, 0x48, 0xb7, 0xf3,
+ 0xc2, 0x81, 0x0a, 0xc2, 0x00, 0x56, 0x00, 0xc1, 0x48, 0x44, 0x67, 0x35,
+ 0xc2, 0x81, 0x56, 0xc2, 0x0e, 0xe5, 0x00, 0xc1, 0xe1, 0x83, 0x00, 0xc1,
+ 0x90, 0x83, 0x00, 0xc1, 0xa3, 0x02, 0x81, 0xc7, 0x8b, 0x00, 0xc2, 0x10,
+ 0x44, 0x13, 0x52, 0xc2, 0x81, 0xcd, 0xc2, 0x01, 0x0e, 0x00, 0xc1, 0x89,
+ 0x83, 0x00, 0xc1, 0x80, 0xc2, 0x00, 0x0a, 0x00, 0xc2, 0x09, 0xc2, 0x00,
+ 0x9a, 0x00, 0xc1, 0xf9, 0x83, 0x00, 0xc1, 0xe8, 0xc2, 0x01, 0x0e, 0x00,
+ 0xc2, 0x01, 0x83, 0x00, 0xc1, 0x78, 0xc2, 0x01, 0x0e, 0x00, 0xc1, 0xd9,
+ 0x83, 0x00, 0xc1, 0xd0, 0x87, 0x00, 0xc1, 0x38, 0x87, 0x00, 0xc1, 0x30,
+ 0x87, 0x00, 0xc1, 0x28, 0xc4, 0x08, 0xdd, 0x00, 0xc0, 0x79, 0x16, 0xc2,
+ 0x82, 0x2f, 0xc3, 0x05, 0x17, 0x00, 0xc0, 0x58, 0x89, 0x0e, 0xa1, 0xd3,
+ 0x02, 0x82, 0x3b, 0x88, 0x0e, 0xa1, 0xc9, 0x87, 0x0e, 0xa1, 0xc3, 0x02,
+ 0x82, 0x41, 0x86, 0x0e, 0xa1, 0xbb, 0x02, 0x82, 0x4d, 0x85, 0x0e, 0xa1,
+ 0xb3, 0x02, 0x82, 0x53, 0x84, 0x0e, 0xa1, 0xab, 0x02, 0x82, 0x59, 0x83,
+ 0x0e, 0xa1, 0xa3, 0x02, 0x82, 0x5f, 0x91, 0x0e, 0xa2, 0x13, 0x02, 0x82,
+ 0x65, 0x92, 0x0e, 0xa2, 0x1b, 0x02, 0x82, 0x69, 0x97, 0x0e, 0xa2, 0x43,
+ 0x02, 0x82, 0x79, 0x96, 0x0e, 0xa2, 0x3b, 0x02, 0x82, 0x7f, 0x95, 0x0e,
+ 0xa2, 0x33, 0x02, 0x82, 0x8e, 0x94, 0x0e, 0xa2, 0x2b, 0x02, 0x82, 0x94,
+ 0x9a, 0x0e, 0xa2, 0x5b, 0x02, 0x82, 0x9a, 0x90, 0x0e, 0xa2, 0x0b, 0x02,
+ 0x82, 0x9e, 0x8f, 0x0e, 0xa2, 0x03, 0x02, 0x82, 0xa2, 0x8e, 0x0e, 0xa1,
+ 0xfb, 0x02, 0x82, 0xa6, 0x8d, 0x0e, 0xa1, 0xf3, 0x02, 0x82, 0xac, 0x8b,
+ 0x0e, 0xa1, 0xe3, 0x02, 0x82, 0xb2, 0x9c, 0x0e, 0xa2, 0x6b, 0x02, 0x82,
+ 0xb8, 0x9b, 0x0e, 0xa2, 0x61, 0x99, 0x0e, 0xa2, 0x51, 0x98, 0x0e, 0xa2,
+ 0x49, 0x93, 0x0e, 0xa2, 0x21, 0x8c, 0x0e, 0xa1, 0xe9, 0x8a, 0x0e, 0xa1,
+ 0xd8, 0xc8, 0x9d, 0xb0, 0x0e, 0xb8, 0xd9, 0xc9, 0xad, 0x9c, 0x0e, 0xb8,
+ 0xc9, 0xd3, 0x41, 0x4d, 0x0e, 0xb8, 0xa8, 0x91, 0x0e, 0xa2, 0xe3, 0x02,
+ 0x82, 0xbe, 0x92, 0x0e, 0xa2, 0xeb, 0x02, 0x82, 0xc2, 0x85, 0x0e, 0xa2,
+ 0x83, 0x02, 0x82, 0xd2, 0x97, 0x0e, 0xa3, 0x13, 0x02, 0x82, 0xd8, 0x96,
+ 0x0e, 0xa3, 0x0b, 0x02, 0x82, 0xde, 0x95, 0x0e, 0xa3, 0x03, 0x02, 0x82,
+ 0xea, 0x88, 0x0e, 0xa2, 0x9b, 0x02, 0x82, 0xf0, 0x94, 0x0e, 0xa2, 0xfb,
+ 0x02, 0x82, 0xf6, 0x9a, 0x0e, 0xa3, 0x2b, 0x02, 0x82, 0xfc, 0x90, 0x0e,
+ 0xa2, 0xdb, 0x02, 0x83, 0x00, 0x8f, 0x0e, 0xa2, 0xd3, 0x02, 0x83, 0x04,
+ 0x8e, 0x0e, 0xa2, 0xcb, 0x02, 0x83, 0x08, 0x8d, 0x0e, 0xa2, 0xc3, 0x02,
+ 0x83, 0x0e, 0x8b, 0x0e, 0xa2, 0xb3, 0x02, 0x83, 0x14, 0x87, 0x0e, 0xa2,
+ 0x93, 0x02, 0x83, 0x1a, 0x9c, 0x0e, 0xa3, 0x3b, 0x02, 0x83, 0x26, 0x86,
+ 0x0e, 0xa2, 0x8b, 0x02, 0x83, 0x2c, 0x89, 0x0e, 0xa2, 0xa3, 0x02, 0x83,
+ 0x38, 0x84, 0x0e, 0xa2, 0x7b, 0x02, 0x83, 0x3e, 0x83, 0x0e, 0xa2, 0x73,
+ 0x02, 0x83, 0x44, 0x9b, 0x0e, 0xa3, 0x31, 0x99, 0x0e, 0xa3, 0x21, 0x98,
+ 0x0e, 0xa3, 0x19, 0x93, 0x0e, 0xa2, 0xf1, 0x8c, 0x0e, 0xa2, 0xb8, 0x45,
+ 0x00, 0x3f, 0xc2, 0x83, 0x4a, 0x46, 0x08, 0x2f, 0x42, 0x83, 0xee, 0xc4,
+ 0x24, 0x35, 0x0e, 0xbe, 0xb9, 0xc5, 0x05, 0x1b, 0x0e, 0xbe, 0xb1, 0x15,
+ 0xc2, 0x83, 0xfa, 0x08, 0xc2, 0x84, 0x06, 0x16, 0xc2, 0x84, 0x12, 0xc3,
+ 0x05, 0x17, 0x0e, 0xbe, 0x79, 0xc4, 0x16, 0x57, 0x0e, 0xbe, 0x70, 0x86,
+ 0x0e, 0xa0, 0x1b, 0x02, 0x84, 0x1e, 0x91, 0x0e, 0xa0, 0x73, 0x02, 0x84,
+ 0x2a, 0x92, 0x0e, 0xa0, 0x7b, 0x02, 0x84, 0x2e, 0x85, 0x0e, 0xa0, 0x13,
+ 0x02, 0x84, 0x3e, 0x97, 0x0e, 0xa0, 0xa3, 0x02, 0x84, 0x44, 0x96, 0x0e,
+ 0xa0, 0x9b, 0x02, 0x84, 0x4a, 0x95, 0x0e, 0xa0, 0x93, 0x02, 0x84, 0x59,
+ 0x94, 0x0e, 0xa0, 0x8b, 0x02, 0x84, 0x5f, 0x9a, 0x0e, 0xa0, 0xbb, 0x02,
+ 0x84, 0x65, 0x90, 0x0e, 0xa0, 0x6b, 0x02, 0x84, 0x69, 0x8f, 0x0e, 0xa0,
+ 0x63, 0x02, 0x84, 0x6d, 0x8e, 0x0e, 0xa0, 0x5b, 0x02, 0x84, 0x71, 0x8d,
+ 0x0e, 0xa0, 0x53, 0x02, 0x84, 0x77, 0x8b, 0x0e, 0xa0, 0x43, 0x02, 0x84,
+ 0x7d, 0x87, 0x0e, 0xa0, 0x23, 0x02, 0x84, 0x83, 0x9c, 0x0e, 0xa0, 0xcb,
+ 0x02, 0x84, 0x8f, 0x89, 0x0e, 0xa0, 0x33, 0x02, 0x84, 0x95, 0x84, 0x0e,
+ 0xa0, 0x0b, 0x02, 0x84, 0x9b, 0x83, 0x0e, 0xa0, 0x03, 0x02, 0x84, 0xa1,
+ 0x9b, 0x0e, 0xa0, 0xc1, 0x99, 0x0e, 0xa0, 0xb1, 0x98, 0x0e, 0xa0, 0xa9,
+ 0x93, 0x0e, 0xa0, 0x81, 0x8c, 0x0e, 0xa0, 0x49, 0x8a, 0x0e, 0xa0, 0x39,
+ 0x88, 0x0e, 0xa0, 0x28, 0x12, 0xc2, 0x84, 0xa7, 0xca, 0xa3, 0xf8, 0x0e,
+ 0xba, 0xa1, 0xcc, 0x8f, 0x28, 0x0e, 0xba, 0x91, 0xcc, 0x8b, 0xec, 0x0e,
+ 0xba, 0x89, 0xce, 0x12, 0x64, 0x0e, 0xba, 0x81, 0x46, 0x00, 0x3e, 0xc2,
+ 0x84, 0xb9, 0xc5, 0xdf, 0xef, 0x0e, 0xb9, 0xa9, 0x48, 0x01, 0xf7, 0x42,
+ 0x85, 0x5d, 0xc8, 0x9d, 0xb0, 0x0e, 0xb7, 0x09, 0xc9, 0xad, 0x9c, 0x0e,
+ 0xb6, 0xf9, 0xd3, 0x41, 0x4d, 0x0e, 0xb6, 0xd8, 0x46, 0x00, 0x3e, 0xc2,
+ 0x85, 0xfe, 0x48, 0x01, 0xf7, 0x42, 0x86, 0x66, 0xc4, 0x24, 0x35, 0x0e,
+ 0xbf, 0xf9, 0xc5, 0x05, 0x1b, 0x0e, 0xbf, 0xf1, 0x15, 0xc2, 0x86, 0xce,
+ 0x08, 0xc2, 0x86, 0xda, 0x16, 0xc2, 0x86, 0xe6, 0xc3, 0x05, 0x17, 0x0e,
+ 0xbf, 0xb9, 0xc4, 0x16, 0x57, 0x0e, 0xbf, 0xb0, 0x9c, 0x0e, 0xb5, 0x19,
+ 0x9b, 0x0e, 0xb5, 0x11, 0x9a, 0x0e, 0xb5, 0x09, 0x99, 0x0e, 0xb5, 0x01,
+ 0x98, 0x0e, 0xb4, 0xf9, 0x97, 0x0e, 0xb4, 0xf1, 0x96, 0x0e, 0xb4, 0xe9,
+ 0x95, 0x0e, 0xb4, 0xe1, 0x94, 0x0e, 0xb4, 0xd9, 0x93, 0x0e, 0xb4, 0xd1,
+ 0x92, 0x0e, 0xb4, 0xc9, 0x91, 0x0e, 0xb4, 0xc1, 0x90, 0x0e, 0xb4, 0xb9,
+ 0x8f, 0x0e, 0xb4, 0xb1, 0x8e, 0x0e, 0xb4, 0xa9, 0x8d, 0x0e, 0xb4, 0xa1,
+ 0x8c, 0x0e, 0xb4, 0x99, 0x8b, 0x0e, 0xb4, 0x91, 0x8a, 0x0e, 0xb4, 0x89,
+ 0x89, 0x0e, 0xb4, 0x81, 0x88, 0x0e, 0xb4, 0x79, 0x87, 0x0e, 0xb4, 0x71,
+ 0x86, 0x0e, 0xb4, 0x69, 0x85, 0x0e, 0xb4, 0x61, 0x84, 0x0e, 0xb4, 0x59,
+ 0x83, 0x0e, 0xb4, 0x50, 0x9c, 0x0e, 0xb4, 0x49, 0x9b, 0x0e, 0xb4, 0x41,
+ 0x9a, 0x0e, 0xb4, 0x39, 0x99, 0x0e, 0xb4, 0x31, 0x98, 0x0e, 0xb4, 0x29,
+ 0x97, 0x0e, 0xb4, 0x21, 0x96, 0x0e, 0xb4, 0x19, 0x95, 0x0e, 0xb4, 0x11,
+ 0x94, 0x0e, 0xb4, 0x09, 0x93, 0x0e, 0xb4, 0x01, 0x92, 0x0e, 0xb3, 0xf9,
+ 0x91, 0x0e, 0xb3, 0xf1, 0x90, 0x0e, 0xb3, 0xe9, 0x8f, 0x0e, 0xb3, 0xe1,
+ 0x8e, 0x0e, 0xb3, 0xd9, 0x8d, 0x0e, 0xb3, 0xd1, 0x8c, 0x0e, 0xb3, 0xc9,
+ 0x8b, 0x0e, 0xb3, 0xc1, 0x8a, 0x0e, 0xb3, 0xb9, 0x89, 0x0e, 0xb3, 0xb1,
+ 0x88, 0x0e, 0xb3, 0xa9, 0x87, 0x0e, 0xb3, 0xa1, 0x86, 0x0e, 0xb3, 0x99,
+ 0x85, 0x0e, 0xb3, 0x91, 0x84, 0x0e, 0xb3, 0x89, 0x83, 0x0e, 0xb3, 0x80,
+ 0x45, 0x60, 0x2f, 0xc2, 0x86, 0xf2, 0x46, 0x08, 0xd7, 0xc2, 0x87, 0x2c,
+ 0x47, 0xca, 0x00, 0xc2, 0x87, 0x50, 0x46, 0x00, 0x3e, 0xc2, 0x87, 0x5c,
+ 0x48, 0x01, 0xf7, 0x42, 0x87, 0xc4, 0x46, 0x00, 0x3e, 0xc2, 0x88, 0x2c,
+ 0x48, 0x01, 0xf7, 0x42, 0x88, 0x88, 0xc4, 0x24, 0x35, 0x0e, 0xbf, 0x09,
+ 0xc5, 0x05, 0x1b, 0x0e, 0xbf, 0x01, 0x15, 0xc2, 0x88, 0xd0, 0x08, 0xc2,
+ 0x88, 0xdc, 0x16, 0xc2, 0x88, 0xe8, 0xc3, 0x05, 0x17, 0x0e, 0xbe, 0xc9,
+ 0xc4, 0x16, 0x57, 0x0e, 0xbe, 0xc0, 0x9c, 0x0e, 0xab, 0x59, 0x9b, 0x0e,
+ 0xab, 0x51, 0x9a, 0x0e, 0xab, 0x49, 0x99, 0x0e, 0xab, 0x41, 0x98, 0x0e,
+ 0xab, 0x39, 0x97, 0x0e, 0xab, 0x31, 0x96, 0x0e, 0xab, 0x29, 0x95, 0x0e,
+ 0xab, 0x21, 0x94, 0x0e, 0xab, 0x19, 0x93, 0x0e, 0xab, 0x11, 0x92, 0x0e,
+ 0xab, 0x09, 0x91, 0x0e, 0xab, 0x01, 0x90, 0x0e, 0xaa, 0xf9, 0x8f, 0x0e,
+ 0xaa, 0xf1, 0x8e, 0x0e, 0xaa, 0xe9, 0x8d, 0x0e, 0xaa, 0xe1, 0x8c, 0x0e,
+ 0xaa, 0xd9, 0x8b, 0x0e, 0xaa, 0xd1, 0x8a, 0x0e, 0xaa, 0xc9, 0x89, 0x0e,
+ 0xaa, 0xc1, 0x88, 0x0e, 0xaa, 0xb9, 0x87, 0x0e, 0xaa, 0xb1, 0x86, 0x0e,
+ 0xaa, 0xa9, 0x85, 0x0e, 0xaa, 0xa1, 0x84, 0x0e, 0xaa, 0x99, 0x83, 0x0e,
+ 0xaa, 0x90, 0x9b, 0x0e, 0xaa, 0x81, 0x9a, 0x0e, 0xaa, 0x79, 0x99, 0x0e,
+ 0xaa, 0x71, 0x98, 0x0e, 0xaa, 0x69, 0x97, 0x0e, 0xaa, 0x61, 0x96, 0x0e,
+ 0xaa, 0x59, 0x95, 0x0e, 0xaa, 0x51, 0x91, 0x0e, 0xaa, 0x31, 0x8f, 0x0e,
+ 0xaa, 0x21, 0x8e, 0x0e, 0xaa, 0x19, 0x8d, 0x0e, 0xaa, 0x11, 0x8c, 0x0e,
+ 0xaa, 0x09, 0x8b, 0x0e, 0xaa, 0x01, 0x89, 0x0e, 0xa9, 0xf1, 0x88, 0x0e,
+ 0xa9, 0xe9, 0x87, 0x0e, 0xa9, 0xe1, 0x86, 0x0e, 0xa9, 0xd9, 0x84, 0x0e,
+ 0xa9, 0xc9, 0x83, 0x0e, 0xa9, 0xc0, 0x46, 0x00, 0x3e, 0xc2, 0x88, 0xf4,
+ 0x48, 0x01, 0xf7, 0x42, 0x89, 0x5c, 0xd5, 0x37, 0xd2, 0x01, 0x3f, 0x79,
+ 0x46, 0x01, 0x31, 0xc2, 0x89, 0xb0, 0xd4, 0x3a, 0x82, 0x01, 0x3f, 0x59,
+ 0xcd, 0x0f, 0x83, 0x01, 0x3f, 0x48, 0xd6, 0x0a, 0xe8, 0x01, 0x3f, 0x61,
+ 0xce, 0x26, 0x2e, 0x01, 0x3f, 0x30, 0xc2, 0x1a, 0x36, 0x08, 0xf7, 0x59,
+ 0x83, 0x08, 0xf7, 0x41, 0xc2, 0x07, 0x69, 0x08, 0xf7, 0x10, 0xc4, 0x15,
+ 0xa7, 0x08, 0xea, 0xb9, 0xc2, 0x22, 0x45, 0x08, 0xea, 0xb0, 0xc3, 0x0d,
+ 0x8f, 0x08, 0xea, 0xa9, 0xc3, 0x08, 0xde, 0x08, 0xea, 0xa0, 0xc4, 0x05,
+ 0xde, 0x08, 0xea, 0x99, 0xc2, 0x0a, 0x20, 0x08, 0xea, 0x90, 0x03, 0xc2,
+ 0x89, 0xbc, 0x91, 0x08, 0xe9, 0xe9, 0x87, 0x08, 0xe9, 0xd1, 0xc9, 0xb7,
+ 0xd7, 0x08, 0xe9, 0xb1, 0x97, 0x08, 0xe9, 0xa3, 0x02, 0x89, 0xc8, 0x8b,
+ 0x08, 0xe9, 0x92, 0x02, 0x89, 0xcc, 0xc2, 0x00, 0x9a, 0x08, 0xe9, 0x81,
+ 0xc2, 0x01, 0x0e, 0x08, 0xe8, 0xe1, 0x83, 0x08, 0xe8, 0xd9, 0x16, 0x42,
+ 0x89, 0xd0, 0xc3, 0x2c, 0x54, 0x08, 0xe9, 0x79, 0xc2, 0x01, 0x0e, 0x08,
+ 0xe8, 0xa1, 0x83, 0x08, 0xe8, 0x98, 0xc3, 0x1d, 0x55, 0x08, 0xe9, 0x71,
+ 0xc2, 0x01, 0x0e, 0x08, 0xe8, 0x69, 0x83, 0x08, 0xe8, 0x60, 0xc2, 0x00,
+ 0x96, 0x08, 0xe9, 0x69, 0x83, 0x08, 0xe9, 0x38, 0x83, 0x08, 0xe9, 0x59,
+ 0xc2, 0x0e, 0xe5, 0x08, 0xe9, 0x51, 0xc2, 0x01, 0x0e, 0x08, 0xe9, 0x48,
+ 0xc2, 0x01, 0x0e, 0x08, 0xe9, 0x19, 0x83, 0x08, 0xe9, 0x10, 0xc2, 0x01,
+ 0x0e, 0x08, 0xe9, 0x09, 0x83, 0x08, 0xe9, 0x00, 0x83, 0x08, 0xe8, 0xf9,
+ 0xc2, 0x01, 0x01, 0x08, 0xe8, 0xd1, 0xc2, 0x1a, 0x36, 0x08, 0xe8, 0xa9,
+ 0xc2, 0x07, 0x69, 0x08, 0xe8, 0x80, 0xc2, 0x01, 0x0e, 0x08, 0xe8, 0xf1,
+ 0x83, 0x08, 0xe8, 0xe9, 0x06, 0x42, 0x89, 0xda, 0xc2, 0x01, 0x0e, 0x08,
+ 0xe8, 0x91, 0x83, 0x08, 0xe8, 0x88, 0xc2, 0x01, 0x0e, 0x08, 0xe8, 0x79,
+ 0x83, 0x08, 0xe8, 0x70, 0x97, 0x08, 0xe8, 0x59, 0x8b, 0x08, 0xe8, 0x41,
+ 0x83, 0x08, 0xe8, 0x08, 0x97, 0x08, 0xe8, 0x28, 0x8b, 0x08, 0xe8, 0x18,
+ 0xcb, 0x21, 0x1a, 0x08, 0xe5, 0xb1, 0xc8, 0x10, 0xab, 0x08, 0xe5, 0xa8,
+ 0x83, 0x08, 0xe5, 0x79, 0xc2, 0x01, 0x0e, 0x08, 0xe5, 0x71, 0x15, 0xc2,
+ 0x89, 0xe4, 0xc2, 0x00, 0x96, 0x08, 0xe5, 0x59, 0xc2, 0x00, 0x9a, 0x08,
+ 0xe5, 0x51, 0xc2, 0x1a, 0x36, 0x08, 0xe5, 0x49, 0x1c, 0xc2, 0x89, 0xee,
+ 0xc2, 0x00, 0x4c, 0x08, 0xe5, 0x29, 0x06, 0xc2, 0x89, 0xf8, 0x16, 0xc2,
+ 0x8a, 0x02, 0xc2, 0x00, 0x3f, 0x08, 0xe5, 0x09, 0xc2, 0x02, 0x1d, 0x08,
+ 0xe5, 0x01, 0x12, 0xc2, 0x8a, 0x10, 0x10, 0xc2, 0x8a, 0x1a, 0xc2, 0x26,
+ 0x94, 0x08, 0xe4, 0xc1, 0x05, 0xc2, 0x8a, 0x2a, 0xc2, 0x07, 0x69, 0x08,
+ 0xe4, 0xa1, 0x0d, 0x42, 0x8a, 0x34, 0x83, 0x08, 0xe4, 0x69, 0xc2, 0x01,
+ 0x0e, 0x08, 0xe4, 0x60, 0x83, 0x08, 0xe4, 0x39, 0xc2, 0x01, 0x0e, 0x08,
+ 0xe4, 0x30, 0xc2, 0x05, 0x5c, 0x08, 0xe4, 0x21, 0x83, 0x08, 0xe3, 0xe0,
+ 0x15, 0xc2, 0x8a, 0x3e, 0xc2, 0x01, 0x0e, 0x08, 0xe3, 0xd9, 0x83, 0x08,
+ 0xe3, 0xd0, 0xc2, 0x01, 0x0e, 0x08, 0xe3, 0xf9, 0x83, 0x08, 0xe3, 0xf0,
+ 0x83, 0x08, 0xe3, 0xe9, 0xc2, 0x1a, 0x36, 0x08, 0xe3, 0xc9, 0xc2, 0x07,
+ 0x69, 0x08, 0xe3, 0xa8, 0xc2, 0x01, 0x0e, 0x08, 0xe3, 0xb9, 0x83, 0x08,
+ 0xe3, 0xb0, 0xc2, 0x01, 0x0e, 0x08, 0xe3, 0x99, 0x83, 0x08, 0xe3, 0x90,
+ 0xd7, 0x10, 0xa2, 0x00, 0x68, 0x01, 0xca, 0x21, 0x1b, 0x00, 0x68, 0x09,
+ 0xce, 0x6c, 0x6c, 0x00, 0x69, 0xe0, 0xc7, 0x10, 0xac, 0x00, 0x68, 0x11,
+ 0xc7, 0x7d, 0xf8, 0x00, 0x69, 0xe8, 0x0b, 0xc2, 0x8a, 0x48, 0xd2, 0x47,
+ 0x88, 0x00, 0x69, 0xd8, 0xcd, 0x78, 0x90, 0x00, 0x68, 0x21, 0x47, 0xb7,
+ 0xd8, 0xc2, 0x8a, 0x54, 0x83, 0x00, 0x69, 0xa8, 0x83, 0x00, 0x68, 0x31,
+ 0x8b, 0x00, 0x68, 0x81, 0x97, 0x00, 0x68, 0xa1, 0xc9, 0xb3, 0xb1, 0x00,
+ 0x6a, 0xf8, 0x8b, 0x00, 0x68, 0x40, 0x97, 0x00, 0x68, 0x50, 0x87, 0x00,
+ 0x68, 0x78, 0x91, 0x00, 0x68, 0x98, 0x83, 0x00, 0x68, 0xa9, 0xc2, 0x01,
+ 0x0e, 0x00, 0x68, 0xb0, 0x83, 0x00, 0x68, 0xb9, 0xc2, 0x01, 0x0e, 0x00,
+ 0x68, 0xc0, 0xc2, 0x07, 0x69, 0x00, 0x68, 0xc9, 0xc2, 0x1a, 0x36, 0x00,
+ 0x68, 0xf1, 0x10, 0xc2, 0x8a, 0x62, 0x83, 0x00, 0x69, 0x40, 0x83, 0x00,
+ 0x68, 0xd1, 0x0a, 0x42, 0x8a, 0x6c, 0x83, 0x00, 0x68, 0xe1, 0xc2, 0x01,
+ 0x0e, 0x00, 0x68, 0xe8, 0x16, 0xc2, 0x8a, 0x76, 0x83, 0x00, 0x69, 0x21,
+ 0xc2, 0x01, 0x0e, 0x00, 0x69, 0x28, 0x06, 0xc2, 0x8a, 0x86, 0x83, 0x00,
+ 0x69, 0x31, 0xc2, 0x01, 0x0e, 0x00, 0x69, 0x39, 0xc7, 0xc9, 0xac, 0x00,
+ 0x6a, 0x70, 0x83, 0x00, 0x69, 0x51, 0xc2, 0x01, 0x0e, 0x00, 0x69, 0x58,
+ 0x83, 0x00, 0x69, 0x61, 0xc2, 0x01, 0x0e, 0x00, 0x69, 0x68, 0x83, 0x00,
+ 0x69, 0x81, 0xc2, 0x00, 0x9a, 0x00, 0x69, 0x88, 0x83, 0x00, 0x69, 0x91,
+ 0x0e, 0x42, 0x8a, 0x90, 0xc2, 0x01, 0x0e, 0x00, 0x69, 0xb1, 0xc2, 0x0e,
+ 0xe5, 0x00, 0x69, 0xb9, 0x83, 0x00, 0x69, 0xc0, 0x83, 0x00, 0x69, 0xf1,
+ 0x8b, 0x00, 0x6a, 0x41, 0x97, 0x00, 0x6a, 0x60, 0x8b, 0x00, 0x6a, 0x00,
+ 0x97, 0x00, 0x6a, 0x10, 0x94, 0x00, 0x6a, 0x1b, 0x02, 0x8a, 0x9a, 0x8e,
+ 0x00, 0x6b, 0x12, 0x02, 0x8a, 0x9e, 0x87, 0x00, 0x6a, 0x38, 0x91, 0x00,
+ 0x6a, 0x58, 0xd8, 0x22, 0x34, 0x00, 0x6a, 0xc1, 0x08, 0xc2, 0x8a, 0xa2,
+ 0x16, 0xc2, 0x8a, 0xae, 0xc7, 0x08, 0x19, 0x00, 0x6b, 0x99, 0xc4, 0x01,
+ 0x1d, 0x00, 0x6b, 0xa1, 0xc9, 0x66, 0x90, 0x00, 0x6b, 0xb1, 0xc6, 0x05,
+ 0x1b, 0x00, 0x6b, 0xb8, 0xca, 0xa9, 0x66, 0x00, 0x6a, 0xd1, 0xca, 0xa9,
+ 0xb6, 0x00, 0x6a, 0xe9, 0xc8, 0x08, 0x19, 0x00, 0x6b, 0xa9, 0xca, 0xa7,
+ 0x22, 0x00, 0x6b, 0xc0, 0xc4, 0x16, 0x57, 0x00, 0x6b, 0x31, 0xc3, 0x05,
+ 0x17, 0x00, 0x6b, 0x39, 0x16, 0xc2, 0x8a, 0xba, 0x08, 0xc2, 0x8a, 0xc6,
+ 0x15, 0xc2, 0x8a, 0xd2, 0xc5, 0x05, 0x1b, 0x00, 0x6b, 0x71, 0xc4, 0x24,
+ 0x35, 0x00, 0x6b, 0x78, 0xc7, 0x0d, 0x7f, 0x00, 0x6b, 0x89, 0xc8, 0x4f,
+ 0xa2, 0x00, 0x6b, 0x90, 0x96, 0x08, 0x57, 0xa3, 0x02, 0x8a, 0xde, 0xd3,
+ 0x46, 0x0d, 0x08, 0x57, 0x90, 0xc8, 0x0d, 0x7e, 0x08, 0x57, 0x78, 0xc5,
+ 0x25, 0x27, 0x08, 0x57, 0x71, 0xc2, 0x01, 0x04, 0x08, 0x57, 0x68, 0xc2,
+ 0x3c, 0xd1, 0x08, 0x57, 0x21, 0xc6, 0xd6, 0xb8, 0x08, 0x56, 0xa9, 0xc3,
+ 0x1e, 0x54, 0x08, 0x56, 0x70, 0xc4, 0x3c, 0x8c, 0x08, 0x57, 0x19, 0xc3,
+ 0x11, 0x40, 0x08, 0x57, 0x11, 0x03, 0x42, 0x8a, 0xe4, 0xc4, 0xe7, 0x47,
+ 0x08, 0x57, 0x01, 0xc3, 0x2d, 0x41, 0x08, 0x56, 0xf0, 0xc3, 0x2d, 0x41,
+ 0x08, 0x56, 0xf9, 0xc3, 0x09, 0x36, 0x08, 0x56, 0x88, 0xc4, 0x43, 0x0a,
+ 0x08, 0x56, 0xd1, 0xc3, 0x18, 0x7a, 0x08, 0x56, 0xc9, 0xc4, 0x32, 0xac,
+ 0x08, 0x56, 0x00, 0xc6, 0xd6, 0xb8, 0x08, 0x56, 0xa1, 0xc5, 0x43, 0x10,
+ 0x08, 0x56, 0x28, 0xc4, 0xdd, 0xbb, 0x08, 0x56, 0x91, 0xc3, 0x09, 0x36,
+ 0x08, 0x56, 0x80, 0xc2, 0x01, 0x47, 0x08, 0x56, 0x68, 0xc5, 0xdf, 0x8b,
+ 0x08, 0x56, 0x61, 0xc4, 0x43, 0x0a, 0x08, 0x56, 0x58, 0xc5, 0xdf, 0x8b,
+ 0x08, 0x56, 0x51, 0xc4, 0x43, 0x0a, 0x08, 0x56, 0x48, 0xc5, 0xdf, 0xb8,
+ 0x08, 0x56, 0x21, 0xc4, 0x9e, 0xbd, 0x08, 0x56, 0x18, 0xc4, 0x9e, 0xc2,
+ 0x08, 0x56, 0x11, 0xc3, 0x1e, 0x54, 0x08, 0x56, 0x08, 0xc2, 0x01, 0x5b,
+ 0x00, 0x42, 0xc1, 0x96, 0x00, 0x42, 0xab, 0x02, 0x8a, 0xf0, 0x95, 0x00,
+ 0x42, 0x73, 0x02, 0x8a, 0xf4, 0x94, 0x00, 0x42, 0x99, 0x93, 0x00, 0x42,
+ 0x91, 0x92, 0x00, 0x42, 0x81, 0x90, 0x00, 0x42, 0x69, 0x8f, 0x00, 0x42,
+ 0x61, 0x8e, 0x00, 0x42, 0x59, 0x8d, 0x00, 0x42, 0x53, 0x02, 0x8a, 0xfc,
+ 0x9c, 0x00, 0x42, 0x31, 0x8a, 0x00, 0x42, 0x21, 0x86, 0x00, 0x42, 0x19,
+ 0x89, 0x00, 0x42, 0x11, 0x84, 0x00, 0x42, 0x08, 0x90, 0x00, 0x42, 0x79,
+ 0x96, 0x00, 0x42, 0x38, 0x14, 0xc2, 0x8b, 0x02, 0xc2, 0x01, 0x0e, 0x08,
+ 0x8b, 0x89, 0xc2, 0x0e, 0xe5, 0x08, 0x8b, 0x81, 0xc2, 0x06, 0x8c, 0x08,
+ 0x8b, 0x79, 0xc2, 0x00, 0x96, 0x08, 0x8b, 0x71, 0xc2, 0x00, 0x3f, 0x08,
+ 0x8b, 0x61, 0x04, 0xc2, 0x8b, 0x0c, 0x12, 0xc2, 0x8b, 0x16, 0x10, 0xc2,
+ 0x8b, 0x20, 0x06, 0xc2, 0x8b, 0x30, 0x16, 0xc2, 0x8b, 0x3e, 0x0c, 0xc2,
+ 0x8b, 0x4c, 0x05, 0xc2, 0x8b, 0x56, 0x09, 0xc2, 0x8b, 0x60, 0x0d, 0xc2,
+ 0x8b, 0x6a, 0x91, 0x08, 0x8a, 0xa1, 0x87, 0x08, 0x8a, 0x99, 0x97, 0x08,
+ 0x8a, 0x91, 0x8b, 0x08, 0x8a, 0x89, 0x83, 0x08, 0x8a, 0x80, 0x05, 0xc2,
+ 0x8b, 0x74, 0xc7, 0xcd, 0x80, 0x0f, 0x80, 0xb8, 0x05, 0xc2, 0x8b, 0x80,
+ 0xc7, 0xcd, 0x80, 0x0f, 0x80, 0xa8, 0x05, 0xc2, 0x8b, 0x8c, 0xc7, 0xcd,
+ 0x80, 0x0f, 0x80, 0xb0, 0x05, 0xc2, 0x8b, 0x98, 0xc7, 0xcd, 0x80, 0x0f,
+ 0x80, 0xc0, 0x05, 0xc2, 0x8b, 0xa4, 0xc7, 0xcd, 0x80, 0x0f, 0x80, 0x80,
+ 0x05, 0xc2, 0x8b, 0xb0, 0xc7, 0xcd, 0x80, 0x0f, 0x80, 0x88, 0x05, 0xc2,
+ 0x8b, 0xbc, 0xc7, 0xcd, 0x80, 0x0f, 0x80, 0x90, 0x05, 0xc2, 0x8b, 0xc8,
+ 0xc7, 0xcd, 0x80, 0x0f, 0x80, 0x98, 0x05, 0xc2, 0x8b, 0xd4, 0xc7, 0xcd,
+ 0x80, 0x0f, 0x80, 0xa0, 0x46, 0x11, 0xf1, 0xc2, 0x8b, 0xe0, 0xc4, 0xe7,
+ 0x3f, 0x0f, 0x9d, 0xe0, 0xcb, 0x99, 0x00, 0x0f, 0x9c, 0xc0, 0x9a, 0x01,
+ 0x38, 0xa9, 0xc4, 0x02, 0xcb, 0x00, 0x06, 0xba, 0x02, 0x8c, 0x46, 0xc5,
+ 0x14, 0x2d, 0x01, 0x14, 0x71, 0xce, 0x20, 0x32, 0x01, 0x14, 0x68, 0xc2,
+ 0x01, 0x0e, 0x08, 0x95, 0x41, 0xc2, 0x00, 0x9a, 0x08, 0x95, 0x39, 0x83,
+ 0x08, 0x95, 0x10, 0xc2, 0x01, 0x0e, 0x08, 0x94, 0xf9, 0x83, 0x08, 0x94,
+ 0xe8, 0xc2, 0x01, 0x0e, 0x08, 0x94, 0xe1, 0x83, 0x08, 0x94, 0xd8, 0x83,
+ 0x08, 0x94, 0xd1, 0xc2, 0x01, 0x01, 0x08, 0x94, 0xa9, 0xc2, 0x1a, 0x36,
+ 0x08, 0x94, 0x78, 0xc2, 0x01, 0x0e, 0x08, 0x94, 0xc9, 0x83, 0x08, 0x94,
+ 0xc1, 0x06, 0x42, 0x8c, 0x4a, 0xc2, 0x01, 0x0e, 0x08, 0x94, 0xb9, 0x83,
+ 0x08, 0x94, 0xb1, 0x16, 0x42, 0x8c, 0x5a, 0x83, 0x08, 0x94, 0x61, 0xc2,
+ 0x26, 0x94, 0x08, 0x94, 0x68, 0x83, 0x08, 0x94, 0x51, 0xc2, 0x01, 0x0e,
+ 0x08, 0x94, 0x58, 0xc2, 0x01, 0x0e, 0x08, 0x94, 0x41, 0x83, 0x08, 0x94,
+ 0x30, 0xc2, 0x01, 0x0e, 0x08, 0x94, 0x29, 0x83, 0x08, 0x94, 0x20, 0xc3,
+ 0x4d, 0xfe, 0x05, 0x4f, 0x29, 0x43, 0x0e, 0x70, 0xc2, 0x8c, 0x64, 0x48,
+ 0xbe, 0xf3, 0x42, 0x8c, 0x7c, 0xc3, 0x0a, 0x1f, 0x05, 0x53, 0xc9, 0xc3,
+ 0x05, 0x17, 0x05, 0x53, 0xc1, 0xcb, 0x0f, 0x62, 0x05, 0x53, 0xb8, 0x44,
+ 0x3f, 0x99, 0x42, 0x8c, 0x88, 0x48, 0x68, 0x62, 0x42, 0x8c, 0xcc, 0x83,
+ 0x00, 0x80, 0x59, 0xc2, 0x01, 0x0e, 0x00, 0x80, 0x60, 0x83, 0x00, 0x82,
+ 0x83, 0x02, 0x8c, 0xec, 0x4b, 0x98, 0xd4, 0x42, 0x8c, 0xf2, 0xc2, 0x1a,
+ 0x36, 0x00, 0x80, 0x51, 0x83, 0x00, 0x80, 0x78, 0x83, 0x00, 0x80, 0x69,
+ 0xc2, 0x01, 0x0e, 0x00, 0x80, 0x70, 0x87, 0x00, 0x81, 0x41, 0xc3, 0x7a,
+ 0xa3, 0x00, 0x82, 0xd1, 0xc3, 0xec, 0x54, 0x00, 0x82, 0xd9, 0x42, 0x40,
+ 0x58, 0x42, 0x8c, 0xfe, 0xc3, 0x03, 0x4b, 0x00, 0x83, 0x29, 0xc3, 0x02,
+ 0x6b, 0x00, 0x83, 0x30, 0xc3, 0x3f, 0x07, 0x00, 0x83, 0x71, 0xc3, 0xe4,
+ 0xcf, 0x00, 0x83, 0x79, 0xc4, 0xe6, 0xd7, 0x00, 0x83, 0x80, 0x94, 0x00,
+ 0x82, 0x98, 0x8e, 0x00, 0x82, 0xa8, 0x8b, 0x00, 0x84, 0xe8, 0xc6, 0x03,
+ 0x4f, 0x00, 0x84, 0x28, 0xd7, 0x29, 0x20, 0x0f, 0xd2, 0x68, 0x49, 0x29,
+ 0x20, 0x42, 0x8d, 0x06, 0xc3, 0x01, 0x5e, 0x0f, 0xd0, 0x03, 0x02, 0x8d,
+ 0x12, 0xc5, 0x8f, 0xc9, 0x0f, 0xd0, 0x22, 0x02, 0x8d, 0x18, 0x49, 0x29,
+ 0x20, 0x42, 0x8d, 0x1e, 0x49, 0x29, 0x20, 0x42, 0x8d, 0x2a, 0x49, 0x29,
+ 0x20, 0x42, 0x8d, 0x36, 0x0d, 0xc2, 0x8d, 0x42, 0xc5, 0xad, 0xae, 0x0f,
+ 0xd1, 0x59, 0xc4, 0xd4, 0xf2, 0x0f, 0xd1, 0x61, 0xc6, 0xba, 0xfd, 0x0f,
+ 0xd1, 0x69, 0xc4, 0xe8, 0x9b, 0x0f, 0xd1, 0x78, 0x43, 0x02, 0xcd, 0xc2,
+ 0x8d, 0x4e, 0xc4, 0xe7, 0x77, 0x08, 0xa2, 0x50, 0xcd, 0x78, 0x90, 0x08,
+ 0xa2, 0xf9, 0x47, 0xb7, 0xd8, 0x42, 0x8d, 0x76, 0x83, 0x08, 0xa1, 0x99,
+ 0xc2, 0x01, 0x0e, 0x08, 0xa1, 0x89, 0xc2, 0x0e, 0xe5, 0x08, 0xa1, 0x90,
+ 0x83, 0x08, 0xa1, 0x19, 0xc2, 0x01, 0x01, 0x08, 0xa0, 0xf1, 0x1b, 0xc2,
+ 0x8d, 0x84, 0x09, 0xc2, 0x8d, 0x8e, 0xc2, 0x01, 0x0e, 0x08, 0xa1, 0x20,
+ 0xc2, 0x01, 0x0e, 0x08, 0xa1, 0x11, 0x83, 0x08, 0xa1, 0x09, 0x06, 0x42,
+ 0x8d, 0x98, 0xc2, 0x01, 0x0e, 0x08, 0xa1, 0x01, 0x83, 0x08, 0xa0, 0xf9,
+ 0x16, 0x42, 0x8d, 0xa2, 0xc2, 0x01, 0x0e, 0x08, 0xa0, 0xb9, 0x83, 0x08,
+ 0xa0, 0xb0, 0xc2, 0x01, 0x0e, 0x08, 0xa0, 0xa9, 0x83, 0x08, 0xa0, 0xa0,
+ 0xc2, 0x01, 0x0e, 0x08, 0xa0, 0x89, 0x83, 0x08, 0xa0, 0x80, 0xc2, 0x01,
+ 0x0e, 0x08, 0xa0, 0x79, 0x83, 0x08, 0xa0, 0x70, 0x97, 0x08, 0xa0, 0x69,
+ 0x8b, 0x08, 0xa0, 0x59, 0x83, 0x08, 0xa0, 0x08, 0x97, 0x08, 0xa0, 0x28,
+ 0x8b, 0x08, 0xa0, 0x18, 0x83, 0x08, 0xa1, 0x29, 0xc2, 0x01, 0x0e, 0x08,
+ 0xa1, 0x30, 0x83, 0x08, 0xa1, 0x39, 0xc2, 0x01, 0x0e, 0x08, 0xa1, 0x40,
+ 0x83, 0x08, 0xa1, 0x49, 0xc2, 0x01, 0x0e, 0x08, 0xa1, 0x50, 0x83, 0x08,
+ 0xa1, 0x61, 0xc2, 0x01, 0x0e, 0x08, 0xa1, 0x68, 0x83, 0x08, 0xa1, 0x71,
+ 0xc2, 0x01, 0x0e, 0x08, 0xa1, 0x78, 0xc5, 0x00, 0xea, 0x08, 0xa2, 0xd1,
+ 0xc5, 0x83, 0xd7, 0x08, 0xa2, 0x60, 0xc4, 0x24, 0x35, 0x08, 0xa2, 0xc9,
+ 0xc5, 0x05, 0x1b, 0x08, 0xa2, 0xc1, 0x15, 0xc2, 0x8d, 0xac, 0x08, 0xc2,
+ 0x8d, 0xb8, 0x16, 0xc2, 0x8d, 0xc4, 0xc3, 0x05, 0x17, 0x08, 0xa2, 0x89,
+ 0xc4, 0x16, 0x57, 0x08, 0xa2, 0x80, 0x97, 0x08, 0xa2, 0x09, 0x8b, 0x08,
+ 0xa1, 0xf9, 0x83, 0x08, 0xa1, 0xa8, 0x8e, 0x08, 0xa1, 0xe3, 0x02, 0x8d,
+ 0xd0, 0x94, 0x08, 0xa1, 0xd2, 0x02, 0x8d, 0xd4, 0x97, 0x08, 0xa1, 0xc8,
+ 0x8b, 0x08, 0xa1, 0xb8, 0x98, 0x00, 0xce, 0xf8, 0xcd, 0x78, 0xc4, 0x00,
+ 0xce, 0xd1, 0x47, 0x2a, 0xe4, 0x42, 0x8d, 0xd8, 0xc4, 0x24, 0x35, 0x00,
+ 0xce, 0xc9, 0xc5, 0x05, 0x1b, 0x00, 0xce, 0xc1, 0x15, 0xc2, 0x8d, 0xe4,
+ 0x08, 0xc2, 0x8d, 0xf0, 0x16, 0xc2, 0x8d, 0xfc, 0xc3, 0x05, 0x17, 0x00,
+ 0xce, 0x89, 0xc4, 0x16, 0x57, 0x00, 0xce, 0x80, 0x46, 0x2a, 0xb4, 0xc2,
+ 0x8e, 0x08, 0x44, 0x00, 0x36, 0xc2, 0x8e, 0x23, 0x45, 0x0b, 0x2b, 0x42,
+ 0x8e, 0x71, 0x0b, 0xc2, 0x8e, 0xbf, 0x97, 0x00, 0xcd, 0x9b, 0x02, 0x8e,
+ 0xc7, 0x91, 0x00, 0xcd, 0xbb, 0x02, 0x8e, 0xd6, 0x03, 0xc2, 0x8e, 0xe1,
+ 0x87, 0x00, 0xcd, 0xa9, 0xcf, 0x66, 0x27, 0x00, 0xcd, 0x80, 0x9c, 0x0f,
+ 0x8c, 0x49, 0x9b, 0x0f, 0x8c, 0x41, 0x9a, 0x0f, 0x8c, 0x39, 0x99, 0x0f,
+ 0x8c, 0x31, 0x98, 0x0f, 0x8c, 0x29, 0x97, 0x0f, 0x8c, 0x21, 0x96, 0x0f,
+ 0x8c, 0x19, 0x95, 0x0f, 0x8c, 0x11, 0x94, 0x0f, 0x8c, 0x09, 0x93, 0x0f,
+ 0x8c, 0x01, 0x92, 0x0f, 0x8b, 0xf9, 0x91, 0x0f, 0x8b, 0xf1, 0x90, 0x0f,
+ 0x8b, 0xe9, 0x8f, 0x0f, 0x8b, 0xe1, 0x8e, 0x0f, 0x8b, 0xd9, 0x8d, 0x0f,
+ 0x8b, 0xd1, 0x8c, 0x0f, 0x8b, 0xc9, 0x8b, 0x0f, 0x8b, 0xc1, 0x8a, 0x0f,
+ 0x8b, 0xb9, 0x89, 0x0f, 0x8b, 0xb1, 0x88, 0x0f, 0x8b, 0xa9, 0x87, 0x0f,
+ 0x8b, 0xa1, 0x86, 0x0f, 0x8b, 0x99, 0x85, 0x0f, 0x8b, 0x91, 0x84, 0x0f,
+ 0x8b, 0x89, 0x83, 0x0f, 0x8b, 0x80, 0x16, 0xc2, 0x8e, 0xf0, 0xc8, 0x4c,
+ 0xbc, 0x01, 0x27, 0x99, 0x07, 0xc2, 0x8e, 0xfc, 0x15, 0xc2, 0x8f, 0x08,
+ 0x08, 0x42, 0x8f, 0x14, 0x9c, 0x0f, 0x8b, 0x49, 0x9b, 0x0f, 0x8b, 0x41,
+ 0x9a, 0x0f, 0x8b, 0x39, 0x99, 0x0f, 0x8b, 0x31, 0x98, 0x0f, 0x8b, 0x29,
+ 0x97, 0x0f, 0x8b, 0x21, 0x96, 0x0f, 0x8b, 0x19, 0x95, 0x0f, 0x8b, 0x11,
+ 0x94, 0x0f, 0x8b, 0x09, 0x93, 0x0f, 0x8b, 0x01, 0x92, 0x0f, 0x8a, 0xf9,
+ 0x91, 0x0f, 0x8a, 0xf1, 0x90, 0x0f, 0x8a, 0xe9, 0x8f, 0x0f, 0x8a, 0xe1,
+ 0x8e, 0x0f, 0x8a, 0xd9, 0x8d, 0x0f, 0x8a, 0xd1, 0x8c, 0x0f, 0x8a, 0xc9,
+ 0x8b, 0x0f, 0x8a, 0xc1, 0x8a, 0x0f, 0x8a, 0xb9, 0x89, 0x0f, 0x8a, 0xb1,
+ 0x88, 0x0f, 0x8a, 0xa9, 0x87, 0x0f, 0x8a, 0xa1, 0x86, 0x0f, 0x8a, 0x99,
+ 0x85, 0x0f, 0x8a, 0x91, 0x84, 0x0f, 0x8a, 0x89, 0x83, 0x0f, 0x8a, 0x80,
+ 0x90, 0x0f, 0x27, 0x28, 0x97, 0x08, 0xce, 0xe9, 0x8b, 0x08, 0xce, 0xd9,
+ 0x83, 0x08, 0xce, 0x88, 0x94, 0x08, 0xce, 0xb8, 0x97, 0x08, 0xce, 0xa8,
+ 0x8b, 0x08, 0xce, 0x98, 0xc7, 0x7d, 0xf8, 0x08, 0xcf, 0x09, 0xc7, 0x10,
+ 0xac, 0x08, 0xce, 0xf0, 0xc4, 0x21, 0x28, 0x08, 0xcf, 0x01, 0xc5, 0x45,
+ 0xcf, 0x08, 0xce, 0xf8, 0xc2, 0x00, 0x9a, 0x08, 0xce, 0x81, 0x83, 0x08,
+ 0xce, 0x40, 0xc2, 0x00, 0x96, 0x08, 0xce, 0x79, 0x83, 0x08, 0xce, 0x48,
+ 0x83, 0x08, 0xce, 0x69, 0xc2, 0x0e, 0xe5, 0x08, 0xce, 0x61, 0xc2, 0x01,
+ 0x0e, 0x08, 0xce, 0x58, 0x83, 0x08, 0xce, 0x51, 0xc8, 0xb7, 0xd8, 0x08,
+ 0xcd, 0x32, 0x02, 0x8f, 0x20, 0xc2, 0x01, 0x0e, 0x08, 0xce, 0x29, 0x83,
+ 0x08, 0xce, 0x20, 0xc2, 0x01, 0x0e, 0x08, 0xce, 0x19, 0x83, 0x08, 0xce,
+ 0x10, 0x83, 0x08, 0xce, 0x09, 0xc2, 0x01, 0x01, 0x08, 0xcd, 0xe1, 0xc2,
+ 0x1a, 0x36, 0x08, 0xcd, 0xb9, 0xc2, 0x07, 0x69, 0x08, 0xcd, 0x90, 0xc2,
+ 0x01, 0x0e, 0x08, 0xce, 0x01, 0x83, 0x08, 0xcd, 0xf9, 0x06, 0x42, 0x8f,
+ 0x24, 0xc2, 0x01, 0x0e, 0x08, 0xcd, 0xf1, 0x83, 0x08, 0xcd, 0xe9, 0x16,
+ 0x42, 0x8f, 0x2e, 0xc2, 0x01, 0x0e, 0x08, 0xcd, 0xb1, 0x83, 0x08, 0xcd,
+ 0xa8, 0xc2, 0x01, 0x0e, 0x08, 0xcd, 0xa1, 0x83, 0x08, 0xcd, 0x98, 0xc2,
+ 0x01, 0x0e, 0x08, 0xcd, 0x89, 0x83, 0x08, 0xcd, 0x80, 0xc2, 0x01, 0x0e,
+ 0x08, 0xcd, 0x79, 0x83, 0x08, 0xcd, 0x70, 0x97, 0x08, 0xcd, 0x69, 0x8b,
+ 0x08, 0xcd, 0x59, 0x83, 0x08, 0xcd, 0x08, 0x97, 0x08, 0xcd, 0x28, 0x8b,
+ 0x08, 0xcd, 0x18, 0xc8, 0x0d, 0x7e, 0x08, 0x45, 0x78, 0x19, 0xc2, 0x8f,
+ 0x38, 0xc2, 0x01, 0x04, 0x08, 0x45, 0x69, 0xc4, 0x05, 0xde, 0x08, 0x45,
+ 0x48, 0xc3, 0x0d, 0x8f, 0x08, 0x45, 0x61, 0xc3, 0x08, 0xde, 0x08, 0x45,
+ 0x50, 0xc2, 0x3c, 0xd1, 0x08, 0x44, 0xf1, 0xc3, 0x1e, 0x54, 0x08, 0x44,
+ 0x58, 0xc3, 0x11, 0x40, 0x08, 0x44, 0xe9, 0x03, 0x42, 0x8f, 0x42, 0xc4,
+ 0x3c, 0x8c, 0x08, 0x44, 0xe1, 0xc3, 0x21, 0x00, 0x08, 0x44, 0xa1, 0xc3,
+ 0x04, 0xae, 0x08, 0x44, 0x91, 0xc6, 0xd7, 0x12, 0x08, 0x44, 0x81, 0xc4,
+ 0xe5, 0x53, 0x08, 0x44, 0x71, 0xc4, 0x4d, 0x48, 0x08, 0x44, 0x61, 0xc2,
+ 0x00, 0x5b, 0x08, 0x44, 0x31, 0xc4, 0xe5, 0xaf, 0x08, 0x44, 0x11, 0xc5,
+ 0xa6, 0x5f, 0x08, 0x44, 0x00, 0xc3, 0x18, 0x7a, 0x08, 0x44, 0xb9, 0xc4,
+ 0x32, 0xac, 0x08, 0x44, 0x08, 0xc2, 0x01, 0x47, 0x08, 0x44, 0x50, 0x49,
+ 0x01, 0x59, 0xc2, 0x8f, 0x4e, 0xcc, 0x82, 0x80, 0x01, 0x0e, 0xb9, 0x03,
+ 0xc2, 0x8f, 0x60, 0xcb, 0x09, 0xfc, 0x01, 0x58, 0x01, 0xcb, 0x99, 0xe7,
+ 0x01, 0x58, 0x41, 0xd5, 0x00, 0x52, 0x01, 0x5b, 0x3b, 0x02, 0x8f, 0x6f,
+ 0xd0, 0x60, 0xaf, 0x0f, 0xc2, 0xa8, 0x03, 0xc2, 0x8f, 0x75, 0xcc, 0x82,
+ 0x80, 0x01, 0x0e, 0xb1, 0x49, 0x01, 0x59, 0xc2, 0x8f, 0x84, 0xcb, 0x09,
+ 0xfc, 0x01, 0x58, 0x09, 0xcb, 0x99, 0xe7, 0x01, 0x58, 0x49, 0xd5, 0x00,
+ 0x52, 0x01, 0x5b, 0x33, 0x02, 0x8f, 0x96, 0xd0, 0x60, 0xaf, 0x0f, 0xc2,
+ 0xa0, 0x49, 0x54, 0xdd, 0xc2, 0x8f, 0x9c, 0x43, 0x01, 0xea, 0xc2, 0x8f,
+ 0xa8, 0xd0, 0x5a, 0x8f, 0x05, 0x41, 0xb9, 0xca, 0xa2, 0xd6, 0x05, 0x41,
+ 0xc0, 0xde, 0x01, 0x29, 0x01, 0x3d, 0x78, 0xd7, 0x28, 0x23, 0x01, 0x17,
+ 0x19, 0xd4, 0x3a, 0x6e, 0x01, 0x17, 0x10, 0xc9, 0x2d, 0xdf, 0x01, 0x14,
+ 0x29, 0xc7, 0x3a, 0x0a, 0x01, 0x14, 0x20, 0xc2, 0x00, 0x96, 0x0f, 0x08,
+ 0xf1, 0x83, 0x0f, 0x08, 0xe0, 0xc2, 0x23, 0xe3, 0x0f, 0x08, 0x99, 0xc2,
+ 0x0e, 0xe5, 0x0f, 0x08, 0x69, 0x83, 0x0f, 0x08, 0x10, 0x84, 0x0d, 0x97,
+ 0xd9, 0x83, 0x0d, 0x97, 0xd1, 0xa6, 0x0d, 0x97, 0xc9, 0xa5, 0x0d, 0x97,
+ 0xc1, 0xa4, 0x0d, 0x97, 0xb9, 0xa3, 0x0d, 0x97, 0xb1, 0xa2, 0x0d, 0x97,
+ 0xa9, 0xa1, 0x0d, 0x97, 0xa1, 0xa0, 0x0d, 0x97, 0x99, 0x9f, 0x0d, 0x97,
+ 0x91, 0x9e, 0x0d, 0x97, 0x89, 0x9d, 0x0d, 0x97, 0x80, 0x88, 0x0d, 0x97,
+ 0x79, 0x87, 0x0d, 0x97, 0x71, 0x86, 0x0d, 0x97, 0x69, 0x83, 0x0d, 0x97,
+ 0x51, 0xa6, 0x0d, 0x97, 0x49, 0xa2, 0x0d, 0x97, 0x29, 0x85, 0x0d, 0x97,
+ 0x61, 0x84, 0x0d, 0x97, 0x59, 0xa5, 0x0d, 0x97, 0x41, 0xa4, 0x0d, 0x97,
+ 0x39, 0xa3, 0x0d, 0x97, 0x31, 0xa1, 0x0d, 0x97, 0x21, 0xa0, 0x0d, 0x97,
+ 0x19, 0x9f, 0x0d, 0x97, 0x11, 0x9e, 0x0d, 0x97, 0x09, 0x9d, 0x0d, 0x97,
+ 0x00, 0x83, 0x0d, 0x95, 0xd1, 0x88, 0x0d, 0x95, 0xf9, 0x87, 0x0d, 0x95,
+ 0xf1, 0xa6, 0x0d, 0x95, 0xc9, 0xa5, 0x0d, 0x95, 0xc1, 0xa4, 0x0d, 0x95,
+ 0xb9, 0xa3, 0x0d, 0x95, 0xb1, 0xa2, 0x0d, 0x95, 0xa9, 0xa1, 0x0d, 0x95,
+ 0xa1, 0xa0, 0x0d, 0x95, 0x99, 0x9f, 0x0d, 0x95, 0x91, 0x9e, 0x0d, 0x95,
+ 0x89, 0x9d, 0x0d, 0x95, 0x81, 0x84, 0x0d, 0x95, 0xd9, 0x85, 0x0d, 0x95,
+ 0xe1, 0x86, 0x0d, 0x95, 0xe8, 0x83, 0x0d, 0x94, 0xd1, 0xa6, 0x0d, 0x94,
+ 0xc9, 0xa5, 0x0d, 0x94, 0xc1, 0xa4, 0x0d, 0x94, 0xb9, 0xa3, 0x0d, 0x94,
+ 0xb1, 0xa2, 0x0d, 0x94, 0xa9, 0xa1, 0x0d, 0x94, 0xa1, 0xa0, 0x0d, 0x94,
+ 0x99, 0x9f, 0x0d, 0x94, 0x91, 0x9e, 0x0d, 0x94, 0x89, 0x9d, 0x0d, 0x94,
+ 0x81, 0x88, 0x0d, 0x94, 0xf9, 0x87, 0x0d, 0x94, 0xf1, 0x86, 0x0d, 0x94,
+ 0xe9, 0x85, 0x0d, 0x94, 0xe1, 0x84, 0x0d, 0x94, 0xd8, 0x88, 0x0d, 0x94,
+ 0x79, 0x87, 0x0d, 0x94, 0x71, 0x86, 0x0d, 0x94, 0x69, 0x85, 0x0d, 0x94,
+ 0x61, 0x84, 0x0d, 0x94, 0x59, 0x83, 0x0d, 0x94, 0x51, 0xa6, 0x0d, 0x94,
+ 0x49, 0xa5, 0x0d, 0x94, 0x41, 0xa4, 0x0d, 0x94, 0x39, 0xa3, 0x0d, 0x94,
+ 0x31, 0xa2, 0x0d, 0x94, 0x29, 0xa1, 0x0d, 0x94, 0x21, 0xa0, 0x0d, 0x94,
+ 0x19, 0x9f, 0x0d, 0x94, 0x11, 0x9e, 0x0d, 0x94, 0x09, 0x9d, 0x0d, 0x94,
+ 0x00, 0x88, 0x0d, 0x93, 0xf9, 0x87, 0x0d, 0x93, 0xf1, 0x86, 0x0d, 0x93,
+ 0xe9, 0x85, 0x0d, 0x93, 0xe1, 0x84, 0x0d, 0x93, 0xd9, 0x83, 0x0d, 0x93,
+ 0xd1, 0xa6, 0x0d, 0x93, 0xc9, 0xa5, 0x0d, 0x93, 0xc1, 0xa4, 0x0d, 0x93,
+ 0xb9, 0xa3, 0x0d, 0x93, 0xb1, 0xa2, 0x0d, 0x93, 0xa9, 0xa1, 0x0d, 0x93,
+ 0xa1, 0xa0, 0x0d, 0x93, 0x99, 0x9f, 0x0d, 0x93, 0x91, 0x9e, 0x0d, 0x93,
+ 0x89, 0x9d, 0x0d, 0x93, 0x80, 0x88, 0x0d, 0x93, 0x79, 0x87, 0x0d, 0x93,
+ 0x71, 0x86, 0x0d, 0x93, 0x69, 0x85, 0x0d, 0x93, 0x61, 0x84, 0x0d, 0x93,
+ 0x59, 0x83, 0x0d, 0x93, 0x51, 0xa6, 0x0d, 0x93, 0x49, 0xa5, 0x0d, 0x93,
+ 0x41, 0xa4, 0x0d, 0x93, 0x39, 0xa3, 0x0d, 0x93, 0x31, 0xa2, 0x0d, 0x93,
+ 0x29, 0xa1, 0x0d, 0x93, 0x21, 0xa0, 0x0d, 0x93, 0x19, 0x9f, 0x0d, 0x93,
+ 0x11, 0x9e, 0x0d, 0x93, 0x09, 0x9d, 0x0d, 0x93, 0x00, 0x88, 0x0d, 0x92,
+ 0xf9, 0x87, 0x0d, 0x92, 0xf1, 0x86, 0x0d, 0x92, 0xe9, 0x85, 0x0d, 0x92,
+ 0xe1, 0x84, 0x0d, 0x92, 0xd9, 0x83, 0x0d, 0x92, 0xd1, 0xa6, 0x0d, 0x92,
+ 0xc9, 0xa5, 0x0d, 0x92, 0xc1, 0xa4, 0x0d, 0x92, 0xb9, 0xa3, 0x0d, 0x92,
+ 0xb1, 0xa2, 0x0d, 0x92, 0xa9, 0xa1, 0x0d, 0x92, 0xa1, 0xa0, 0x0d, 0x92,
+ 0x99, 0x9f, 0x0d, 0x92, 0x91, 0x9e, 0x0d, 0x92, 0x89, 0x9d, 0x0d, 0x92,
+ 0x80, 0x88, 0x0d, 0x92, 0x79, 0x87, 0x0d, 0x92, 0x71, 0x86, 0x0d, 0x92,
+ 0x69, 0x85, 0x0d, 0x92, 0x61, 0x84, 0x0d, 0x92, 0x59, 0x83, 0x0d, 0x92,
+ 0x51, 0xa6, 0x0d, 0x92, 0x49, 0xa5, 0x0d, 0x92, 0x41, 0xa4, 0x0d, 0x92,
+ 0x39, 0xa3, 0x0d, 0x92, 0x31, 0xa2, 0x0d, 0x92, 0x29, 0xa1, 0x0d, 0x92,
+ 0x21, 0xa0, 0x0d, 0x92, 0x19, 0x9f, 0x0d, 0x92, 0x11, 0x9e, 0x0d, 0x92,
+ 0x09, 0x9d, 0x0d, 0x92, 0x00, 0x88, 0x0d, 0x91, 0xf9, 0x87, 0x0d, 0x91,
+ 0xf1, 0x86, 0x0d, 0x91, 0xe9, 0x85, 0x0d, 0x91, 0xe1, 0x84, 0x0d, 0x91,
+ 0xd9, 0x83, 0x0d, 0x91, 0xd1, 0xa6, 0x0d, 0x91, 0xc9, 0xa5, 0x0d, 0x91,
+ 0xc1, 0xa4, 0x0d, 0x91, 0xb9, 0xa3, 0x0d, 0x91, 0xb1, 0xa2, 0x0d, 0x91,
+ 0xa9, 0xa1, 0x0d, 0x91, 0xa1, 0xa0, 0x0d, 0x91, 0x99, 0x9f, 0x0d, 0x91,
+ 0x91, 0x9e, 0x0d, 0x91, 0x89, 0x9d, 0x0d, 0x91, 0x80, 0x88, 0x0d, 0x91,
+ 0x79, 0x87, 0x0d, 0x91, 0x71, 0x86, 0x0d, 0x91, 0x69, 0x85, 0x0d, 0x91,
+ 0x61, 0x84, 0x0d, 0x91, 0x59, 0x83, 0x0d, 0x91, 0x51, 0xa6, 0x0d, 0x91,
+ 0x49, 0xa5, 0x0d, 0x91, 0x41, 0xa4, 0x0d, 0x91, 0x39, 0xa3, 0x0d, 0x91,
+ 0x31, 0xa2, 0x0d, 0x91, 0x29, 0xa1, 0x0d, 0x91, 0x21, 0xa0, 0x0d, 0x91,
+ 0x19, 0x9f, 0x0d, 0x91, 0x11, 0x9e, 0x0d, 0x91, 0x09, 0x9d, 0x0d, 0x91,
+ 0x00, 0x88, 0x0d, 0x90, 0xf9, 0x87, 0x0d, 0x90, 0xf1, 0x86, 0x0d, 0x90,
+ 0xe9, 0x85, 0x0d, 0x90, 0xe1, 0x84, 0x0d, 0x90, 0xd9, 0x83, 0x0d, 0x90,
+ 0xd1, 0xa6, 0x0d, 0x90, 0xc9, 0xa5, 0x0d, 0x90, 0xc1, 0xa4, 0x0d, 0x90,
+ 0xb9, 0xa3, 0x0d, 0x90, 0xb1, 0xa2, 0x0d, 0x90, 0xa9, 0xa1, 0x0d, 0x90,
+ 0xa1, 0xa0, 0x0d, 0x90, 0x99, 0x9f, 0x0d, 0x90, 0x91, 0x9e, 0x0d, 0x90,
+ 0x89, 0x9d, 0x0d, 0x90, 0x80, 0x88, 0x0d, 0x90, 0x79, 0x87, 0x0d, 0x90,
+ 0x71, 0x86, 0x0d, 0x90, 0x69, 0x85, 0x0d, 0x90, 0x61, 0x84, 0x0d, 0x90,
+ 0x59, 0x83, 0x0d, 0x90, 0x51, 0xa6, 0x0d, 0x90, 0x49, 0xa5, 0x0d, 0x90,
+ 0x41, 0xa4, 0x0d, 0x90, 0x39, 0xa3, 0x0d, 0x90, 0x31, 0xa2, 0x0d, 0x90,
+ 0x29, 0xa1, 0x0d, 0x90, 0x21, 0xa0, 0x0d, 0x90, 0x19, 0x9f, 0x0d, 0x90,
+ 0x11, 0x9e, 0x0d, 0x90, 0x09, 0x9d, 0x0d, 0x90, 0x00, 0x88, 0x0d, 0x96,
+ 0xf9, 0x87, 0x0d, 0x96, 0xf1, 0x86, 0x0d, 0x96, 0xe9, 0x85, 0x0d, 0x96,
+ 0xe1, 0x84, 0x0d, 0x96, 0xd9, 0x83, 0x0d, 0x96, 0xd1, 0xa6, 0x0d, 0x96,
+ 0xc9, 0xa5, 0x0d, 0x96, 0xc1, 0xa4, 0x0d, 0x96, 0xb9, 0xa3, 0x0d, 0x96,
+ 0xb1, 0xa2, 0x0d, 0x96, 0xa9, 0xa1, 0x0d, 0x96, 0xa1, 0xa0, 0x0d, 0x96,
+ 0x99, 0x9f, 0x0d, 0x96, 0x91, 0x9e, 0x0d, 0x96, 0x89, 0x9d, 0x0d, 0x96,
+ 0x80, 0x88, 0x0d, 0x96, 0x79, 0x87, 0x0d, 0x96, 0x71, 0x86, 0x0d, 0x96,
+ 0x69, 0x85, 0x0d, 0x96, 0x61, 0x84, 0x0d, 0x96, 0x59, 0x83, 0x0d, 0x96,
+ 0x51, 0xa6, 0x0d, 0x96, 0x49, 0xa5, 0x0d, 0x96, 0x41, 0xa4, 0x0d, 0x96,
+ 0x39, 0xa3, 0x0d, 0x96, 0x31, 0xa2, 0x0d, 0x96, 0x29, 0xa1, 0x0d, 0x96,
+ 0x21, 0xa0, 0x0d, 0x96, 0x19, 0x9f, 0x0d, 0x96, 0x11, 0x9e, 0x0d, 0x96,
+ 0x09, 0x9d, 0x0d, 0x96, 0x00, 0x88, 0x0d, 0x95, 0x79, 0x87, 0x0d, 0x95,
+ 0x71, 0x86, 0x0d, 0x95, 0x69, 0x85, 0x0d, 0x95, 0x61, 0x84, 0x0d, 0x95,
+ 0x59, 0x83, 0x0d, 0x95, 0x51, 0xa6, 0x0d, 0x95, 0x49, 0xa5, 0x0d, 0x95,
+ 0x41, 0xa4, 0x0d, 0x95, 0x39, 0xa3, 0x0d, 0x95, 0x31, 0xa2, 0x0d, 0x95,
+ 0x29, 0xa1, 0x0d, 0x95, 0x21, 0xa0, 0x0d, 0x95, 0x19, 0x9f, 0x0d, 0x95,
+ 0x11, 0x9e, 0x0d, 0x95, 0x09, 0x9d, 0x0d, 0x95, 0x00, 0x88, 0x0d, 0x8f,
+ 0xf9, 0x87, 0x0d, 0x8f, 0xf1, 0x86, 0x0d, 0x8f, 0xe9, 0x85, 0x0d, 0x8f,
+ 0xe1, 0x84, 0x0d, 0x8f, 0xd9, 0x83, 0x0d, 0x8f, 0xd1, 0xa6, 0x0d, 0x8f,
+ 0xc9, 0xa5, 0x0d, 0x8f, 0xc1, 0xa4, 0x0d, 0x8f, 0xb9, 0xa3, 0x0d, 0x8f,
+ 0xb1, 0xa2, 0x0d, 0x8f, 0xa9, 0xa1, 0x0d, 0x8f, 0xa1, 0xa0, 0x0d, 0x8f,
+ 0x99, 0x9f, 0x0d, 0x8f, 0x91, 0x9e, 0x0d, 0x8f, 0x89, 0x9d, 0x0d, 0x8f,
+ 0x80, 0x88, 0x0d, 0x8f, 0x79, 0x87, 0x0d, 0x8f, 0x71, 0x86, 0x0d, 0x8f,
+ 0x69, 0x85, 0x0d, 0x8f, 0x61, 0x84, 0x0d, 0x8f, 0x59, 0x83, 0x0d, 0x8f,
+ 0x51, 0xa6, 0x0d, 0x8f, 0x49, 0xa5, 0x0d, 0x8f, 0x41, 0xa4, 0x0d, 0x8f,
+ 0x39, 0xa3, 0x0d, 0x8f, 0x31, 0xa2, 0x0d, 0x8f, 0x29, 0xa1, 0x0d, 0x8f,
+ 0x21, 0xa0, 0x0d, 0x8f, 0x19, 0x9f, 0x0d, 0x8f, 0x11, 0x9e, 0x0d, 0x8f,
+ 0x09, 0x9d, 0x0d, 0x8f, 0x00, 0x88, 0x0d, 0x8e, 0xf9, 0x87, 0x0d, 0x8e,
+ 0xf1, 0x86, 0x0d, 0x8e, 0xe9, 0x85, 0x0d, 0x8e, 0xe1, 0x84, 0x0d, 0x8e,
+ 0xd9, 0x83, 0x0d, 0x8e, 0xd1, 0xa6, 0x0d, 0x8e, 0xc9, 0xa5, 0x0d, 0x8e,
+ 0xc1, 0xa4, 0x0d, 0x8e, 0xb9, 0xa3, 0x0d, 0x8e, 0xb1, 0xa2, 0x0d, 0x8e,
+ 0xa9, 0xa1, 0x0d, 0x8e, 0xa1, 0xa0, 0x0d, 0x8e, 0x99, 0x9f, 0x0d, 0x8e,
+ 0x91, 0x9e, 0x0d, 0x8e, 0x89, 0x9d, 0x0d, 0x8e, 0x80, 0x88, 0x0d, 0x8e,
+ 0x79, 0x87, 0x0d, 0x8e, 0x71, 0x86, 0x0d, 0x8e, 0x69, 0x85, 0x0d, 0x8e,
+ 0x61, 0x84, 0x0d, 0x8e, 0x59, 0x83, 0x0d, 0x8e, 0x51, 0xa6, 0x0d, 0x8e,
+ 0x49, 0xa5, 0x0d, 0x8e, 0x41, 0xa4, 0x0d, 0x8e, 0x39, 0xa3, 0x0d, 0x8e,
+ 0x31, 0xa2, 0x0d, 0x8e, 0x29, 0xa1, 0x0d, 0x8e, 0x21, 0xa0, 0x0d, 0x8e,
+ 0x19, 0x9f, 0x0d, 0x8e, 0x11, 0x9e, 0x0d, 0x8e, 0x09, 0x9d, 0x0d, 0x8e,
+ 0x00, 0x88, 0x0d, 0x8d, 0xf9, 0x87, 0x0d, 0x8d, 0xf1, 0x86, 0x0d, 0x8d,
+ 0xe9, 0x85, 0x0d, 0x8d, 0xe1, 0x84, 0x0d, 0x8d, 0xd9, 0x83, 0x0d, 0x8d,
+ 0xd1, 0xa6, 0x0d, 0x8d, 0xc9, 0xa5, 0x0d, 0x8d, 0xc1, 0xa4, 0x0d, 0x8d,
+ 0xb9, 0xa3, 0x0d, 0x8d, 0xb1, 0xa2, 0x0d, 0x8d, 0xa9, 0xa1, 0x0d, 0x8d,
+ 0xa1, 0xa0, 0x0d, 0x8d, 0x99, 0x9f, 0x0d, 0x8d, 0x91, 0x9e, 0x0d, 0x8d,
+ 0x89, 0x9d, 0x0d, 0x8d, 0x80, 0x88, 0x0d, 0x8d, 0x79, 0x87, 0x0d, 0x8d,
+ 0x71, 0x86, 0x0d, 0x8d, 0x69, 0x85, 0x0d, 0x8d, 0x61, 0x84, 0x0d, 0x8d,
+ 0x59, 0x83, 0x0d, 0x8d, 0x51, 0xa6, 0x0d, 0x8d, 0x49, 0xa5, 0x0d, 0x8d,
+ 0x41, 0xa4, 0x0d, 0x8d, 0x39, 0xa3, 0x0d, 0x8d, 0x31, 0xa2, 0x0d, 0x8d,
+ 0x29, 0xa1, 0x0d, 0x8d, 0x21, 0xa0, 0x0d, 0x8d, 0x19, 0x9f, 0x0d, 0x8d,
+ 0x11, 0x9e, 0x0d, 0x8d, 0x09, 0x9d, 0x0d, 0x8d, 0x00, 0x88, 0x0d, 0x8c,
+ 0xf9, 0x87, 0x0d, 0x8c, 0xf1, 0x86, 0x0d, 0x8c, 0xe9, 0x85, 0x0d, 0x8c,
+ 0xe1, 0x84, 0x0d, 0x8c, 0xd9, 0x83, 0x0d, 0x8c, 0xd1, 0xa6, 0x0d, 0x8c,
+ 0xc9, 0xa5, 0x0d, 0x8c, 0xc1, 0xa4, 0x0d, 0x8c, 0xb9, 0xa3, 0x0d, 0x8c,
+ 0xb1, 0xa2, 0x0d, 0x8c, 0xa9, 0xa1, 0x0d, 0x8c, 0xa1, 0xa0, 0x0d, 0x8c,
+ 0x99, 0x9f, 0x0d, 0x8c, 0x91, 0x9e, 0x0d, 0x8c, 0x89, 0x9d, 0x0d, 0x8c,
+ 0x80, 0x88, 0x0d, 0x8c, 0x79, 0x87, 0x0d, 0x8c, 0x71, 0x86, 0x0d, 0x8c,
+ 0x69, 0x85, 0x0d, 0x8c, 0x61, 0x84, 0x0d, 0x8c, 0x59, 0x83, 0x0d, 0x8c,
+ 0x51, 0xa6, 0x0d, 0x8c, 0x49, 0xa5, 0x0d, 0x8c, 0x41, 0xa4, 0x0d, 0x8c,
+ 0x39, 0xa3, 0x0d, 0x8c, 0x31, 0xa2, 0x0d, 0x8c, 0x29, 0xa1, 0x0d, 0x8c,
+ 0x21, 0xa0, 0x0d, 0x8c, 0x19, 0x9f, 0x0d, 0x8c, 0x11, 0x9e, 0x0d, 0x8c,
+ 0x09, 0x9d, 0x0d, 0x8c, 0x00, 0x88, 0x0d, 0x8b, 0xf9, 0x87, 0x0d, 0x8b,
+ 0xf1, 0x86, 0x0d, 0x8b, 0xe9, 0x85, 0x0d, 0x8b, 0xe1, 0x84, 0x0d, 0x8b,
+ 0xd9, 0x83, 0x0d, 0x8b, 0xd1, 0xa6, 0x0d, 0x8b, 0xc9, 0xa5, 0x0d, 0x8b,
+ 0xc1, 0xa4, 0x0d, 0x8b, 0xb9, 0xa3, 0x0d, 0x8b, 0xb1, 0xa2, 0x0d, 0x8b,
+ 0xa9, 0xa1, 0x0d, 0x8b, 0xa1, 0xa0, 0x0d, 0x8b, 0x99, 0x9f, 0x0d, 0x8b,
+ 0x91, 0x9e, 0x0d, 0x8b, 0x89, 0x9d, 0x0d, 0x8b, 0x80, 0xcd, 0x7a, 0x57,
+ 0x01, 0x24, 0xd9, 0xcd, 0x7c, 0x6c, 0x01, 0x24, 0x98, 0xcf, 0x62, 0x85,
+ 0x01, 0x24, 0xb9, 0xc2, 0x02, 0xcd, 0x00, 0x01, 0x18, 0xc2, 0x00, 0x9a,
+ 0x00, 0x3f, 0x51, 0xc3, 0x1c, 0x4f, 0x00, 0x3f, 0x49, 0xc2, 0x26, 0x94,
+ 0x00, 0x3f, 0x40, 0xc7, 0xc5, 0xd1, 0x00, 0x3f, 0x38, 0xc7, 0xc5, 0xd1,
+ 0x00, 0x3f, 0x00, 0xd0, 0x5a, 0xff, 0x01, 0x4d, 0xa1, 0xd1, 0x09, 0xb6,
+ 0x01, 0x4d, 0x99, 0xd2, 0x49, 0xfe, 0x01, 0x4d, 0x91, 0xc7, 0x79, 0xb4,
+ 0x01, 0x4d, 0x88, 0x43, 0x07, 0x43, 0x42, 0x8f, 0xb4, 0x03, 0xc2, 0x8f,
+ 0xbe, 0xcd, 0x7c, 0x2b, 0x0f, 0x98, 0x68, 0xc6, 0x07, 0xba, 0x09, 0xa2,
+ 0x83, 0x02, 0x8f, 0xca, 0xc3, 0x02, 0x47, 0x09, 0xa2, 0x5b, 0x02, 0x8f,
+ 0xce, 0xc3, 0x03, 0x02, 0x09, 0xa2, 0x91, 0xc5, 0xdd, 0xa1, 0x09, 0xa2,
+ 0x4a, 0x02, 0x8f, 0xd2, 0xa1, 0x09, 0x8f, 0x71, 0xa0, 0x09, 0x8f, 0x69,
+ 0x9f, 0x09, 0x8f, 0x61, 0x9e, 0x09, 0x8f, 0x59, 0x9d, 0x09, 0x8f, 0x4a,
+ 0x02, 0x8f, 0xd8, 0xa6, 0x09, 0x8f, 0x41, 0xa5, 0x09, 0x8f, 0x39, 0xa4,
+ 0x09, 0x8f, 0x31, 0xa3, 0x09, 0x8f, 0x29, 0xa2, 0x09, 0x8f, 0x21, 0xa1,
+ 0x09, 0x8f, 0x19, 0xa0, 0x09, 0x8f, 0x03, 0x02, 0x8f, 0xdc, 0x9f, 0x09,
+ 0x8e, 0xf9, 0x9e, 0x09, 0x8e, 0xeb, 0x02, 0x8f, 0xe4, 0x9d, 0x09, 0x8e,
+ 0xe0, 0xa6, 0x09, 0x8e, 0xd9, 0xa5, 0x09, 0x8e, 0xcb, 0x02, 0x8f, 0xe8,
+ 0xa4, 0x09, 0x8e, 0xc1, 0xa3, 0x09, 0x8e, 0xb9, 0xa2, 0x09, 0x8e, 0xb1,
+ 0xa1, 0x09, 0x8e, 0xa3, 0x02, 0x8f, 0xec, 0xa0, 0x09, 0x8e, 0x99, 0x9f,
+ 0x09, 0x8e, 0x8b, 0x02, 0x8f, 0xf0, 0x9e, 0x09, 0x8e, 0x81, 0x9d, 0x09,
+ 0x8e, 0x78, 0xa6, 0x09, 0x8e, 0x71, 0xa5, 0x09, 0x8e, 0x69, 0xa4, 0x09,
+ 0x8e, 0x5b, 0x02, 0x8f, 0xf4, 0xa3, 0x09, 0x8e, 0x4b, 0x02, 0x8f, 0xf8,
+ 0xa2, 0x09, 0x8e, 0x3b, 0x02, 0x8f, 0xfc, 0xa1, 0x09, 0x8e, 0x31, 0xa0,
+ 0x09, 0x8e, 0x29, 0x9f, 0x09, 0x8d, 0xe3, 0x02, 0x90, 0x00, 0x9e, 0x09,
+ 0x8d, 0xd9, 0x9d, 0x09, 0x8d, 0xca, 0x02, 0x90, 0x20, 0xa6, 0x09, 0x8d,
+ 0xc1, 0xa5, 0x09, 0x8d, 0xb9, 0xa4, 0x09, 0x8d, 0xb1, 0xa3, 0x09, 0x8d,
+ 0xa9, 0xa2, 0x09, 0x8d, 0xa1, 0xa1, 0x09, 0x8d, 0x99, 0xa0, 0x09, 0x8d,
+ 0x8b, 0x02, 0x90, 0x24, 0x9f, 0x09, 0x8d, 0x81, 0x9e, 0x09, 0x8d, 0x6a,
+ 0x02, 0x90, 0x28, 0xa2, 0x09, 0x9e, 0x71, 0xa1, 0x09, 0x9e, 0x63, 0x02,
+ 0x90, 0x30, 0xa0, 0x09, 0x9e, 0x59, 0x9f, 0x09, 0x9e, 0x51, 0x9e, 0x09,
+ 0x9e, 0x49, 0x9d, 0x09, 0x9e, 0x40, 0xa6, 0x09, 0x9e, 0x39, 0xa5, 0x09,
+ 0x9e, 0x2b, 0x02, 0x90, 0x34, 0xa4, 0x09, 0x9e, 0x1b, 0x02, 0x90, 0x38,
+ 0xa3, 0x09, 0x9e, 0x11, 0xa2, 0x09, 0x9e, 0x09, 0xa1, 0x09, 0x9d, 0xfb,
+ 0x02, 0x90, 0x3c, 0xa0, 0x09, 0x9d, 0xf1, 0x9f, 0x09, 0x9d, 0xe9, 0x9e,
+ 0x09, 0x9d, 0xe1, 0x9d, 0x09, 0x9d, 0xd2, 0x02, 0x90, 0x40, 0xa6, 0x09,
+ 0x9d, 0xc3, 0x02, 0x90, 0x44, 0xa5, 0x09, 0x9d, 0xb9, 0xa4, 0x09, 0x9d,
+ 0xb1, 0xa3, 0x09, 0x9d, 0xa9, 0xa2, 0x09, 0x9d, 0xa1, 0xa1, 0x09, 0x9d,
+ 0x99, 0xa0, 0x09, 0x9d, 0x8b, 0x02, 0x90, 0x48, 0x9f, 0x09, 0x9d, 0x81,
+ 0x9e, 0x09, 0x9d, 0x78, 0xa3, 0x09, 0x99, 0x91, 0xa2, 0x09, 0x99, 0x89,
+ 0xa1, 0x09, 0x99, 0x81, 0xa0, 0x09, 0x99, 0x73, 0x02, 0x90, 0x4c, 0x9f,
+ 0x09, 0x99, 0x63, 0x02, 0x90, 0x50, 0x9e, 0x09, 0x99, 0x59, 0x9d, 0x09,
+ 0x99, 0x50, 0xa6, 0x09, 0x99, 0x49, 0xa5, 0x09, 0x99, 0x41, 0xa4, 0x09,
+ 0x99, 0x39, 0xa3, 0x09, 0x99, 0x31, 0xa2, 0x09, 0x99, 0x29, 0xa1, 0x09,
+ 0x99, 0x21, 0xa0, 0x09, 0x99, 0x19, 0x9f, 0x09, 0x99, 0x11, 0x9e, 0x09,
+ 0x99, 0x09, 0x9d, 0x09, 0x99, 0x00, 0xa6, 0x09, 0x98, 0xf9, 0xa5, 0x09,
+ 0x98, 0xf1, 0xa4, 0x09, 0x98, 0xe9, 0xa3, 0x09, 0x98, 0xdb, 0x02, 0x90,
+ 0x54, 0xa2, 0x09, 0x98, 0xd1, 0xa1, 0x09, 0x98, 0xc9, 0xa0, 0x09, 0x98,
+ 0xc1, 0x9f, 0x09, 0x98, 0xb9, 0x9e, 0x09, 0x98, 0xab, 0x02, 0x90, 0x58,
+ 0x9d, 0x09, 0x98, 0xa0, 0xa6, 0x09, 0x98, 0x93, 0x02, 0x90, 0x5c, 0xa5,
+ 0x09, 0x98, 0x83, 0x02, 0x90, 0x60, 0xa4, 0x09, 0x98, 0x73, 0x02, 0x90,
+ 0x64, 0xa3, 0x09, 0x98, 0x69, 0xa2, 0x09, 0x98, 0x61, 0xa1, 0x09, 0x98,
+ 0x59, 0xa0, 0x09, 0x98, 0x4b, 0x02, 0x90, 0x68, 0x9f, 0x09, 0x98, 0x41,
+ 0x9e, 0x09, 0x98, 0x38, 0x83, 0x09, 0x8c, 0x28, 0x83, 0x09, 0x8d, 0x50,
+ 0x83, 0x09, 0x8d, 0x28, 0xa0, 0x09, 0x89, 0xf1, 0x9f, 0x09, 0x89, 0xe9,
+ 0x9e, 0x09, 0x89, 0xcb, 0x02, 0x90, 0x6c, 0x9d, 0x09, 0x89, 0xc0, 0xa6,
+ 0x09, 0x89, 0xb9, 0xa5, 0x09, 0x89, 0xb1, 0xa4, 0x09, 0x89, 0xa3, 0x02,
+ 0x90, 0x78, 0xa3, 0x09, 0x89, 0x93, 0x02, 0x90, 0x7c, 0xa2, 0x09, 0x89,
+ 0x83, 0x02, 0x90, 0x80, 0xa1, 0x09, 0x89, 0x79, 0xa0, 0x09, 0x89, 0x71,
+ 0x9f, 0x09, 0x89, 0x69, 0x9e, 0x09, 0x89, 0x61, 0x9d, 0x09, 0x89, 0x58,
+ 0xa6, 0x09, 0x89, 0x51, 0xa5, 0x09, 0x89, 0x43, 0x02, 0x90, 0x84, 0xa4,
+ 0x09, 0x89, 0x33, 0x02, 0x90, 0x88, 0xa3, 0x09, 0x89, 0x29, 0xa2, 0x09,
+ 0x89, 0x21, 0xa1, 0x09, 0x89, 0x19, 0xa0, 0x09, 0x89, 0x11, 0x9f, 0x09,
+ 0x89, 0x09, 0x9e, 0x09, 0x88, 0xfb, 0x02, 0x90, 0x8c, 0x9d, 0x09, 0x88,
+ 0xf0, 0xa6, 0x09, 0x88, 0xe9, 0xa5, 0x09, 0x88, 0xe1, 0xa4, 0x09, 0x88,
+ 0xd9, 0xa3, 0x09, 0x88, 0xd1, 0xa2, 0x09, 0x88, 0xc9, 0xa1, 0x09, 0x88,
+ 0xc1, 0xa0, 0x09, 0x88, 0xb9, 0x9f, 0x09, 0x88, 0xb1, 0x9e, 0x09, 0x88,
+ 0xa3, 0x02, 0x90, 0x90, 0x9d, 0x09, 0x88, 0x98, 0xa6, 0x09, 0x88, 0x91,
+ 0xa5, 0x09, 0x88, 0x89, 0xa4, 0x09, 0x88, 0x81, 0xa3, 0x09, 0x88, 0x79,
+ 0xa2, 0x09, 0x88, 0x71, 0xa1, 0x09, 0x88, 0x69, 0xa0, 0x09, 0x88, 0x5b,
+ 0x02, 0x90, 0x94, 0x9f, 0x09, 0x88, 0x51, 0x9e, 0x09, 0x88, 0x49, 0x9d,
+ 0x09, 0x88, 0x40, 0xa6, 0x09, 0x88, 0x39, 0xa5, 0x09, 0x88, 0x31, 0xa4,
+ 0x09, 0x88, 0x29, 0xa3, 0x09, 0x88, 0x21, 0xa2, 0x09, 0x88, 0x19, 0xa1,
+ 0x09, 0x88, 0x11, 0xa0, 0x09, 0x88, 0x09, 0x9f, 0x09, 0x88, 0x01, 0x9e,
+ 0x09, 0x87, 0xf2, 0x02, 0x90, 0x98, 0xa5, 0x09, 0x87, 0xe9, 0xa4, 0x09,
+ 0x87, 0xe1, 0xa3, 0x09, 0x87, 0xd9, 0xa1, 0x09, 0x87, 0xcb, 0x02, 0x90,
+ 0x9c, 0xa0, 0x09, 0x87, 0xc1, 0x9f, 0x09, 0x87, 0xb9, 0x9e, 0x09, 0x87,
+ 0xb1, 0x9d, 0x09, 0x87, 0xa8, 0xa6, 0x09, 0x87, 0xa1, 0xa5, 0x09, 0x87,
+ 0x93, 0x02, 0x90, 0xa0, 0xa4, 0x09, 0x87, 0x89, 0xa3, 0x09, 0x87, 0x81,
+ 0xa2, 0x09, 0x87, 0x79, 0xa1, 0x09, 0x87, 0x71, 0xa0, 0x09, 0x87, 0x69,
+ 0x9f, 0x09, 0x87, 0x61, 0x9e, 0x09, 0x87, 0x59, 0x9d, 0x09, 0x87, 0x4a,
+ 0x02, 0x90, 0xa4, 0xa6, 0x09, 0x87, 0x41, 0xa5, 0x09, 0x87, 0x39, 0xa4,
+ 0x09, 0x87, 0x2b, 0x02, 0x90, 0xa8, 0xa3, 0x09, 0x87, 0x1b, 0x02, 0x90,
+ 0xac, 0xa2, 0x09, 0x87, 0x11, 0xa1, 0x09, 0x87, 0x09, 0xa0, 0x09, 0x87,
+ 0x01, 0x9f, 0x09, 0x86, 0xf9, 0x9e, 0x09, 0x86, 0xf1, 0x9d, 0x09, 0x86,
+ 0xe8, 0xa6, 0x09, 0x86, 0xdb, 0x02, 0x90, 0xb0, 0xa5, 0x09, 0x86, 0xcb,
+ 0x02, 0x90, 0xb4, 0xa4, 0x09, 0x86, 0xc1, 0xa3, 0x09, 0x86, 0xb9, 0xa2,
+ 0x09, 0x86, 0xb1, 0xa1, 0x09, 0x86, 0xa9, 0xa0, 0x09, 0x86, 0xa1, 0x9f,
+ 0x09, 0x86, 0x99, 0x9e, 0x09, 0x86, 0x90, 0x83, 0x09, 0x82, 0xa8, 0x00,
+ 0x42, 0x90, 0xb8, 0x00, 0x42, 0x90, 0xc4, 0xa2, 0x09, 0x8c, 0xd1, 0xa1,
+ 0x09, 0x8c, 0xc9, 0xa0, 0x09, 0x8c, 0xc1, 0x9f, 0x09, 0x8c, 0xb9, 0x9e,
+ 0x09, 0x8c, 0xab, 0x02, 0x90, 0xd0, 0x9d, 0x09, 0x8c, 0x9a, 0x02, 0x90,
+ 0xd4, 0xa6, 0x09, 0x8c, 0x8b, 0x02, 0x90, 0xd8, 0xa5, 0x09, 0x8c, 0x81,
+ 0xa4, 0x09, 0x8c, 0x79, 0xa3, 0x09, 0x8c, 0x71, 0xa2, 0x09, 0x8c, 0x63,
+ 0x02, 0x90, 0xdc, 0xa1, 0x09, 0x8c, 0x59, 0xa0, 0x09, 0x8c, 0x51, 0x9f,
+ 0x09, 0x8c, 0x49, 0x9e, 0x09, 0x8c, 0x40, 0x9e, 0x09, 0x94, 0xd1, 0x9d,
+ 0x09, 0x94, 0xba, 0x02, 0x90, 0xe0, 0xa6, 0x09, 0x94, 0xb1, 0xa5, 0x09,
+ 0x94, 0xa9, 0xa4, 0x09, 0x94, 0xa1, 0xa3, 0x09, 0x94, 0x99, 0xa2, 0x09,
+ 0x94, 0x91, 0xa1, 0x09, 0x94, 0x89, 0xa0, 0x09, 0x94, 0x81, 0x9f, 0x09,
+ 0x94, 0x79, 0x9e, 0x09, 0x94, 0x71, 0x9d, 0x09, 0x94, 0x68, 0xa6, 0x09,
+ 0x94, 0x61, 0xa5, 0x09, 0x94, 0x59, 0xa4, 0x09, 0x94, 0x51, 0xa3, 0x09,
+ 0x94, 0x2b, 0x02, 0x90, 0xe8, 0xa2, 0x09, 0x94, 0x21, 0xa1, 0x09, 0x94,
+ 0x19, 0xa0, 0x09, 0x94, 0x0b, 0x02, 0x90, 0xf8, 0x9f, 0x09, 0x94, 0x01,
+ 0x9e, 0x09, 0x93, 0xf9, 0x9d, 0x09, 0x93, 0xea, 0x02, 0x90, 0xfc, 0xa6,
+ 0x09, 0x93, 0xdb, 0x02, 0x91, 0x00, 0xa5, 0x09, 0x93, 0xd1, 0xa4, 0x09,
+ 0x93, 0xc9, 0xa3, 0x09, 0x93, 0xc1, 0xa2, 0x09, 0x93, 0xb3, 0x02, 0x91,
+ 0x04, 0xa1, 0x09, 0x93, 0xa3, 0x02, 0x91, 0x08, 0xa0, 0x09, 0x93, 0x99,
+ 0x9f, 0x09, 0x93, 0x91, 0x9e, 0x09, 0x93, 0x89, 0x9d, 0x09, 0x93, 0x7a,
+ 0x02, 0x91, 0x0c, 0xa6, 0x09, 0x93, 0x6b, 0x02, 0x91, 0x10, 0xa5, 0x09,
+ 0x93, 0x61, 0xa4, 0x09, 0x93, 0x59, 0xa3, 0x09, 0x93, 0x51, 0xa2, 0x09,
+ 0x93, 0x49, 0xa1, 0x09, 0x93, 0x41, 0xa0, 0x09, 0x93, 0x39, 0x9f, 0x09,
+ 0x93, 0x31, 0x9e, 0x09, 0x93, 0x29, 0x9d, 0x09, 0x93, 0x0a, 0x02, 0x91,
+ 0x14, 0xa6, 0x09, 0x93, 0x01, 0xa5, 0x09, 0x92, 0xf9, 0xa4, 0x09, 0x92,
+ 0xf1, 0xa3, 0x09, 0x92, 0xbb, 0x02, 0x91, 0x20, 0xa2, 0x09, 0x92, 0xab,
+ 0x02, 0x91, 0x38, 0xa1, 0x09, 0x92, 0xa1, 0xa0, 0x09, 0x92, 0x99, 0x9f,
+ 0x09, 0x92, 0x91, 0x9e, 0x09, 0x92, 0x82, 0x02, 0x91, 0x3c, 0x9e, 0x09,
+ 0x9b, 0xc3, 0x02, 0x91, 0x40, 0xa6, 0x09, 0x9c, 0x29, 0xa5, 0x09, 0x9c,
+ 0x21, 0xa4, 0x09, 0x9c, 0x19, 0xa3, 0x09, 0x9c, 0x11, 0xa2, 0x09, 0x9c,
+ 0x09, 0xa1, 0x09, 0x9c, 0x01, 0xa0, 0x09, 0x9b, 0xf9, 0x9f, 0x09, 0x9b,
+ 0xe3, 0x02, 0x91, 0x50, 0x9d, 0x09, 0x9b, 0xb8, 0x83, 0x09, 0x9d, 0x70,
+ 0xa6, 0x09, 0x9d, 0x61, 0xa5, 0x09, 0x9d, 0x59, 0xa4, 0x09, 0x9d, 0x4b,
+ 0x02, 0x91, 0x58, 0xa3, 0x09, 0x9d, 0x41, 0xa2, 0x09, 0x9d, 0x39, 0xa1,
+ 0x09, 0x9d, 0x31, 0xa0, 0x09, 0x9d, 0x23, 0x02, 0x91, 0x5c, 0x9f, 0x09,
+ 0x9d, 0x19, 0x9e, 0x09, 0x9d, 0x0b, 0x02, 0x91, 0x60, 0x9d, 0x09, 0x9c,
+ 0xfa, 0x02, 0x91, 0x64, 0xa6, 0x09, 0x9c, 0xeb, 0x02, 0x91, 0x68, 0xa5,
+ 0x09, 0x9c, 0xdb, 0x02, 0x91, 0x6c, 0xa4, 0x09, 0x9c, 0xd1, 0xa3, 0x09,
+ 0x9c, 0xc9, 0xa2, 0x09, 0x9c, 0xc1, 0xa1, 0x09, 0x9c, 0xb9, 0xa0, 0x09,
+ 0x9c, 0xab, 0x02, 0x91, 0x70, 0x9f, 0x09, 0x9c, 0xa1, 0x9e, 0x09, 0x9c,
+ 0x99, 0x9d, 0x09, 0x9c, 0x32, 0x02, 0x91, 0x74, 0xa6, 0x09, 0x9b, 0xb1,
+ 0xa5, 0x09, 0x9b, 0xa9, 0xa4, 0x09, 0x9b, 0x93, 0x02, 0x91, 0xa4, 0xa3,
+ 0x09, 0x9b, 0x89, 0xa2, 0x09, 0x9b, 0x81, 0xa1, 0x09, 0x9b, 0x79, 0xa0,
+ 0x09, 0x9b, 0x71, 0x9f, 0x09, 0x9b, 0x63, 0x02, 0x91, 0xac, 0x9e, 0x09,
+ 0x9b, 0x12, 0x02, 0x91, 0xb0, 0x9f, 0x09, 0xa1, 0x71, 0x9e, 0x09, 0xa1,
+ 0x69, 0x9d, 0x09, 0xa1, 0x60, 0xa6, 0x09, 0xa1, 0x59, 0xa5, 0x09, 0xa1,
+ 0x51, 0xa4, 0x09, 0xa1, 0x49, 0xa3, 0x09, 0xa1, 0x41, 0xa2, 0x09, 0xa1,
+ 0x39, 0xa1, 0x09, 0xa1, 0x31, 0xa0, 0x09, 0xa1, 0x29, 0x9f, 0x09, 0xa1,
+ 0x21, 0x9e, 0x09, 0xa1, 0x19, 0x9d, 0x09, 0xa1, 0x10, 0xa6, 0x09, 0xa1,
+ 0x09, 0xa5, 0x09, 0xa1, 0x01, 0xa4, 0x09, 0xa0, 0xf9, 0xa3, 0x09, 0xa0,
+ 0xf1, 0xa2, 0x09, 0xa0, 0xe9, 0xa1, 0x09, 0xa0, 0xe1, 0xa0, 0x09, 0xa0,
+ 0xd9, 0x9f, 0x09, 0xa0, 0xd1, 0x9e, 0x09, 0xa0, 0xc9, 0x9d, 0x09, 0xa0,
+ 0xc0, 0xa6, 0x09, 0xa0, 0xb9, 0xa5, 0x09, 0xa0, 0xb1, 0xa4, 0x09, 0xa0,
+ 0x9b, 0x02, 0x91, 0xd4, 0xa3, 0x09, 0xa0, 0x91, 0xa2, 0x09, 0xa0, 0x89,
+ 0xa1, 0x09, 0xa0, 0x81, 0xa0, 0x09, 0xa0, 0x79, 0x9f, 0x09, 0xa0, 0x71,
+ 0x9e, 0x09, 0xa0, 0x68, 0xa6, 0x09, 0x82, 0x71, 0xa5, 0x09, 0x82, 0x69,
+ 0xa4, 0x09, 0x82, 0x61, 0xa3, 0x09, 0x82, 0x59, 0xa2, 0x09, 0x82, 0x51,
+ 0xa1, 0x09, 0x82, 0x49, 0xa0, 0x09, 0x82, 0x41, 0x9f, 0x09, 0x82, 0x39,
+ 0x9e, 0x09, 0x82, 0x31, 0x9d, 0x09, 0x82, 0x28, 0xa6, 0x09, 0x82, 0x21,
+ 0xa5, 0x09, 0x82, 0x19, 0xa4, 0x09, 0x82, 0x11, 0xa3, 0x09, 0x82, 0x09,
+ 0xa2, 0x09, 0x82, 0x01, 0xa1, 0x09, 0x81, 0xf9, 0xa0, 0x09, 0x81, 0xf1,
+ 0x9f, 0x09, 0x81, 0xe9, 0x9e, 0x09, 0x81, 0xe1, 0x9d, 0x09, 0x81, 0xd8,
+ 0xa6, 0x09, 0x81, 0xd1, 0xa5, 0x09, 0x81, 0xc9, 0xa4, 0x09, 0x81, 0xc1,
+ 0xa3, 0x09, 0x81, 0xb9, 0xa2, 0x09, 0x81, 0xab, 0x02, 0x91, 0xdc, 0xa1,
+ 0x09, 0x81, 0xa1, 0xa0, 0x09, 0x81, 0x93, 0x02, 0x91, 0xe0, 0x9f, 0x09,
+ 0x81, 0x83, 0x02, 0x91, 0xe4, 0x9e, 0x09, 0x81, 0x79, 0x9d, 0x09, 0x81,
+ 0x6a, 0x02, 0x91, 0xe8, 0xa6, 0x09, 0x81, 0x61, 0xa5, 0x09, 0x81, 0x59,
+ 0xa4, 0x09, 0x81, 0x51, 0xa3, 0x09, 0x81, 0x49, 0xa2, 0x09, 0x81, 0x41,
+ 0xa1, 0x09, 0x81, 0x39, 0xa0, 0x09, 0x81, 0x31, 0x9f, 0x09, 0x81, 0x23,
+ 0x02, 0x91, 0xec, 0x9e, 0x09, 0x81, 0x19, 0x9d, 0x09, 0x81, 0x10, 0xa6,
+ 0x09, 0x81, 0x09, 0xa5, 0x09, 0x81, 0x01, 0xa4, 0x09, 0x80, 0xf9, 0xa3,
+ 0x09, 0x80, 0xf1, 0xa2, 0x09, 0x80, 0xe9, 0xa1, 0x09, 0x80, 0xe1, 0xa0,
+ 0x09, 0x80, 0xd9, 0x9f, 0x09, 0x80, 0xd1, 0x9e, 0x09, 0x80, 0xc9, 0x9d,
+ 0x09, 0x80, 0xc0, 0xa6, 0x09, 0x80, 0xb9, 0xa5, 0x09, 0x80, 0xb1, 0xa4,
+ 0x09, 0x80, 0xa3, 0x02, 0x91, 0xf0, 0xa3, 0x09, 0x80, 0x99, 0xa2, 0x09,
+ 0x80, 0x91, 0xa1, 0x09, 0x80, 0x83, 0x02, 0x91, 0xf4, 0xa0, 0x09, 0x80,
+ 0x79, 0x9f, 0x09, 0x80, 0x71, 0x9e, 0x09, 0x80, 0x69, 0x9d, 0x09, 0x80,
+ 0x60, 0xa6, 0x09, 0x80, 0x59, 0xa5, 0x09, 0x80, 0x51, 0xa4, 0x09, 0x80,
+ 0x49, 0xa3, 0x09, 0x80, 0x33, 0x02, 0x91, 0xf8, 0xa2, 0x09, 0x80, 0x23,
+ 0x02, 0x92, 0x00, 0xa1, 0x09, 0x80, 0x19, 0xa0, 0x09, 0x80, 0x11, 0x9f,
+ 0x09, 0x80, 0x09, 0x9e, 0x09, 0x80, 0x00, 0x8a, 0x09, 0xa0, 0x61, 0x89,
+ 0x09, 0xa0, 0x59, 0x88, 0x09, 0xa0, 0x51, 0x87, 0x09, 0xa0, 0x49, 0x86,
+ 0x09, 0xa0, 0x41, 0x85, 0x09, 0xa0, 0x39, 0x84, 0x09, 0xa0, 0x31, 0x83,
+ 0x09, 0xa0, 0x28, 0x8b, 0x09, 0xa0, 0x19, 0x8a, 0x09, 0xa0, 0x11, 0x89,
+ 0x09, 0xa0, 0x09, 0x88, 0x09, 0xa0, 0x01, 0x87, 0x09, 0x9f, 0xf9, 0x86,
+ 0x09, 0x9f, 0xf1, 0x85, 0x09, 0x9f, 0xe9, 0x84, 0x09, 0x9f, 0xe1, 0x83,
+ 0x09, 0x9f, 0xd8, 0x83, 0x09, 0x9f, 0x80, 0x83, 0x09, 0x9f, 0x70, 0x84,
+ 0x09, 0x9f, 0x61, 0x83, 0x09, 0x9f, 0x58, 0x86, 0x09, 0x9f, 0x49, 0x85,
+ 0x09, 0x9f, 0x41, 0x84, 0x09, 0x9f, 0x39, 0x83, 0x09, 0x9f, 0x30, 0x83,
+ 0x09, 0x9a, 0xb8, 0x83, 0x09, 0x9a, 0x98, 0x83, 0x09, 0x9a, 0x60, 0x84,
+ 0x09, 0x99, 0xd1, 0x83, 0x09, 0x99, 0xc8, 0x83, 0x09, 0x97, 0xd8, 0x84,
+ 0x09, 0x97, 0x89, 0x83, 0x09, 0x97, 0x80, 0x83, 0x09, 0x97, 0x30, 0x84,
+ 0x09, 0x97, 0x11, 0x83, 0x09, 0x97, 0x08, 0x83, 0x09, 0x96, 0xc0, 0x83,
+ 0x09, 0x96, 0x98, 0x83, 0x09, 0x96, 0x18, 0x83, 0x09, 0x95, 0xe0, 0x84,
+ 0x09, 0x95, 0xa1, 0x83, 0x09, 0x95, 0x98, 0x83, 0x09, 0x95, 0x88, 0x83,
+ 0x09, 0x94, 0xf8, 0x83, 0x09, 0x94, 0xe0, 0x9f, 0x09, 0x92, 0x73, 0x02,
+ 0x92, 0x04, 0x9e, 0x09, 0x92, 0x69, 0x9d, 0x09, 0x92, 0x60, 0xa6, 0x09,
+ 0x92, 0x59, 0xa5, 0x09, 0x92, 0x4b, 0x02, 0x92, 0x08, 0xa4, 0x09, 0x92,
+ 0x41, 0xa3, 0x09, 0x92, 0x39, 0xa2, 0x09, 0x92, 0x31, 0xa1, 0x09, 0x92,
+ 0x29, 0xa0, 0x09, 0x92, 0x21, 0x9f, 0x09, 0x92, 0x19, 0x9e, 0x09, 0x92,
+ 0x0b, 0x02, 0x92, 0x0c, 0x9d, 0x09, 0x91, 0xfa, 0x02, 0x92, 0x10, 0xa6,
+ 0x09, 0x91, 0xf1, 0xa5, 0x09, 0x91, 0xe9, 0xa4, 0x09, 0x91, 0xe1, 0xa3,
+ 0x09, 0x91, 0xd9, 0xa2, 0x09, 0x91, 0xd1, 0xa1, 0x09, 0x91, 0xc9, 0xa0,
+ 0x09, 0x91, 0xc1, 0x9f, 0x09, 0x91, 0xb9, 0x9e, 0x09, 0x91, 0xb0, 0xa6,
+ 0x09, 0x91, 0xa1, 0xa5, 0x09, 0x91, 0x99, 0xa4, 0x09, 0x91, 0x8b, 0x02,
+ 0x92, 0x14, 0xa3, 0x09, 0x91, 0x81, 0xa2, 0x09, 0x91, 0x79, 0xa1, 0x09,
+ 0x91, 0x71, 0xa0, 0x09, 0x91, 0x69, 0x9f, 0x09, 0x91, 0x61, 0x9e, 0x09,
+ 0x91, 0x59, 0x9d, 0x09, 0x91, 0x50, 0xa6, 0x09, 0x91, 0x49, 0xa5, 0x09,
+ 0x91, 0x41, 0xa4, 0x09, 0x91, 0x39, 0xa3, 0x09, 0x91, 0x31, 0xa2, 0x09,
+ 0x91, 0x23, 0x02, 0x92, 0x18, 0xa1, 0x09, 0x91, 0x19, 0xa0, 0x09, 0x91,
+ 0x11, 0x9f, 0x09, 0x91, 0x09, 0x9e, 0x09, 0x91, 0x00, 0x9f, 0x09, 0x90,
+ 0xf9, 0x9e, 0x09, 0x90, 0xf1, 0x9d, 0x09, 0x90, 0xe8, 0xa6, 0x09, 0x90,
+ 0xe1, 0xa5, 0x09, 0x90, 0xd9, 0xa4, 0x09, 0x90, 0xcb, 0x02, 0x92, 0x1c,
+ 0xa3, 0x09, 0x90, 0xc1, 0xa2, 0x09, 0x90, 0xb3, 0x02, 0x92, 0x20, 0xa1,
+ 0x09, 0x90, 0xa3, 0x02, 0x92, 0x24, 0xa0, 0x09, 0x90, 0x93, 0x02, 0x92,
+ 0x28, 0x9f, 0x09, 0x90, 0x89, 0x9e, 0x09, 0x90, 0x81, 0x9d, 0x09, 0x90,
+ 0x78, 0xa6, 0x09, 0x90, 0x71, 0xa5, 0x09, 0x90, 0x69, 0xa4, 0x09, 0x90,
+ 0x61, 0xa3, 0x09, 0x90, 0x59, 0xa2, 0x09, 0x90, 0x4b, 0x02, 0x92, 0x2c,
+ 0xa1, 0x09, 0x90, 0x41, 0xa0, 0x09, 0x90, 0x39, 0x9f, 0x09, 0x90, 0x31,
+ 0x9e, 0x09, 0x90, 0x29, 0x9d, 0x09, 0x90, 0x20, 0xa6, 0x09, 0x90, 0x19,
+ 0xa5, 0x09, 0x90, 0x03, 0x02, 0x92, 0x30, 0xa4, 0x09, 0x8f, 0xf9, 0xa3,
+ 0x09, 0x8f, 0xf1, 0xa2, 0x09, 0x8f, 0xe9, 0xa1, 0x09, 0x8f, 0xe1, 0xa0,
+ 0x09, 0x8f, 0xd9, 0x9f, 0x09, 0x8f, 0xd1, 0x9e, 0x09, 0x8f, 0xc9, 0x9d,
+ 0x09, 0x8f, 0xc0, 0xa6, 0x09, 0x8f, 0xb9, 0xa5, 0x09, 0x8f, 0xb1, 0xa4,
+ 0x09, 0x8f, 0xa9, 0xa3, 0x09, 0x8f, 0xa1, 0xa2, 0x09, 0x8f, 0x99, 0xa1,
+ 0x09, 0x8f, 0x91, 0xa0, 0x09, 0x8f, 0x89, 0x9f, 0x09, 0x8f, 0x81, 0x9e,
+ 0x09, 0x8f, 0x78, 0x83, 0x09, 0x8b, 0xa8, 0x83, 0x09, 0x8b, 0x90, 0x83,
+ 0x09, 0x8b, 0x58, 0x83, 0x09, 0x8b, 0x48, 0x83, 0x09, 0x8a, 0xf0, 0x83,
+ 0x09, 0x8a, 0xb8, 0x83, 0x09, 0x8a, 0x68, 0x84, 0x09, 0x8a, 0x41, 0x83,
+ 0x09, 0x8a, 0x38, 0x83, 0x09, 0x8a, 0x28, 0x8a, 0x09, 0x86, 0x89, 0x89,
+ 0x09, 0x86, 0x81, 0x88, 0x09, 0x86, 0x79, 0x87, 0x09, 0x86, 0x71, 0x86,
+ 0x09, 0x86, 0x69, 0x85, 0x09, 0x86, 0x61, 0x84, 0x09, 0x86, 0x59, 0x83,
+ 0x09, 0x86, 0x50, 0x83, 0x09, 0x85, 0xe0, 0x83, 0x09, 0x85, 0xc8, 0x8b,
+ 0x09, 0x85, 0xb1, 0x8a, 0x09, 0x85, 0xa9, 0x89, 0x09, 0x85, 0xa1, 0x88,
+ 0x09, 0x85, 0x99, 0x87, 0x09, 0x85, 0x91, 0x86, 0x09, 0x85, 0x89, 0x85,
+ 0x09, 0x85, 0x81, 0x84, 0x09, 0x85, 0x79, 0x83, 0x09, 0x85, 0x70, 0x83,
+ 0x09, 0x85, 0x58, 0x83, 0x09, 0x85, 0x40, 0x83, 0x09, 0x84, 0xd8, 0x83,
+ 0x09, 0x84, 0xb8, 0x83, 0x09, 0x84, 0x90, 0x83, 0x09, 0x83, 0xf0, 0x83,
+ 0x09, 0x83, 0x38, 0x85, 0x09, 0x82, 0xf1, 0x84, 0x09, 0x82, 0xe9, 0x83,
+ 0x09, 0x82, 0xe0, 0xc6, 0x03, 0xfa, 0x0f, 0xbc, 0x49, 0xc6, 0x01, 0xe9,
+ 0x0f, 0xbc, 0x98, 0xc6, 0x12, 0x73, 0x0f, 0xbd, 0x71, 0xd2, 0x48, 0xf0,
+ 0x0f, 0xbd, 0xd0, 0x45, 0x53, 0x36, 0x42, 0x92, 0x38, 0x83, 0x00, 0x95,
+ 0x03, 0x02, 0x92, 0x68, 0x97, 0x00, 0x95, 0x09, 0x8b, 0x00, 0x95, 0x11,
+ 0x87, 0x00, 0x95, 0x2b, 0x02, 0x92, 0x6c, 0x91, 0x00, 0x95, 0x33, 0x02,
+ 0x92, 0x70, 0xc2, 0x00, 0x4c, 0x00, 0x95, 0x38, 0x83, 0x00, 0x98, 0x58,
+ 0x87, 0x00, 0x98, 0x60, 0x83, 0x00, 0x98, 0x78, 0x83, 0x00, 0x98, 0x83,
+ 0x02, 0x92, 0x74, 0x8b, 0x00, 0x98, 0x91, 0x87, 0x00, 0x98, 0xaa, 0x02,
+ 0x92, 0x78, 0x83, 0x00, 0x98, 0xc3, 0x02, 0x92, 0x7c, 0x97, 0x00, 0x98,
+ 0xc9, 0x8b, 0x00, 0x98, 0xd1, 0x87, 0x00, 0x98, 0xeb, 0x02, 0x92, 0x80,
+ 0x91, 0x00, 0x98, 0xf1, 0x19, 0x42, 0x92, 0x84, 0x83, 0x01, 0x6e, 0xc3,
+ 0x02, 0x92, 0x96, 0x97, 0x01, 0x6e, 0xc9, 0x8b, 0x01, 0x6e, 0xd1, 0x87,
+ 0x01, 0x6e, 0xeb, 0x02, 0x92, 0x9a, 0x91, 0x01, 0x6e, 0xf0, 0x19, 0xc2,
+ 0x92, 0x9e, 0x1b, 0xc2, 0x92, 0xad, 0x83, 0x00, 0x90, 0x83, 0x02, 0x92,
+ 0xc7, 0x97, 0x00, 0x90, 0x89, 0x8b, 0x00, 0x90, 0x91, 0x87, 0x00, 0x90,
+ 0xab, 0x02, 0x92, 0xcb, 0x91, 0x00, 0x90, 0xb0, 0x83, 0x00, 0x90, 0x18,
+ 0x87, 0x00, 0x90, 0x20, 0x83, 0x00, 0x90, 0x38, 0x91, 0x05, 0x59, 0x71,
+ 0x87, 0x05, 0x59, 0x6b, 0x02, 0x92, 0xcf, 0x83, 0x05, 0x59, 0x43, 0x02,
+ 0x92, 0xd3, 0x8b, 0x05, 0x59, 0x51, 0x97, 0x05, 0x59, 0x48, 0x83, 0x00,
+ 0x93, 0x18, 0x87, 0x00, 0x93, 0x20, 0x83, 0x01, 0x6c, 0x28, 0x83, 0x00,
+ 0x93, 0x39, 0x8b, 0x00, 0x9c, 0x29, 0x87, 0x00, 0x9c, 0x3a, 0x02, 0x92,
+ 0xd7, 0x0a, 0xc2, 0x92, 0xdb, 0x83, 0x01, 0x6d, 0x43, 0x02, 0x92, 0xf9,
+ 0x97, 0x01, 0x6d, 0x49, 0x8b, 0x01, 0x6d, 0x51, 0x87, 0x01, 0x6d, 0x6b,
+ 0x02, 0x92, 0xfd, 0x91, 0x01, 0x6d, 0x70, 0x83, 0x00, 0x93, 0xd8, 0x87,
+ 0x00, 0x93, 0xe0, 0x83, 0x01, 0x6c, 0x38, 0x83, 0x00, 0x99, 0x43, 0x02,
+ 0x93, 0x01, 0x97, 0x00, 0x99, 0x49, 0x8b, 0x00, 0x99, 0x51, 0x87, 0x00,
+ 0x99, 0x6b, 0x02, 0x93, 0x05, 0x91, 0x00, 0x99, 0x73, 0x02, 0x93, 0x09,
+ 0xc2, 0x00, 0x4c, 0x00, 0x99, 0x78, 0x91, 0x05, 0x58, 0xb1, 0x87, 0x05,
+ 0x58, 0xab, 0x02, 0x93, 0x0d, 0xc2, 0x0e, 0x30, 0x05, 0x58, 0x99, 0x8b,
+ 0x05, 0x58, 0x91, 0x97, 0x05, 0x58, 0x88, 0x0a, 0xc2, 0x93, 0x11, 0x83,
+ 0x00, 0x97, 0xc3, 0x02, 0x93, 0x2a, 0x97, 0x00, 0x97, 0xc9, 0x8b, 0x00,
+ 0x97, 0xd1, 0x87, 0x00, 0x97, 0xeb, 0x02, 0x93, 0x2e, 0x91, 0x00, 0x97,
+ 0xf3, 0x02, 0x93, 0x32, 0xc2, 0x00, 0x4c, 0x00, 0x97, 0xf8, 0x83, 0x00,
+ 0x97, 0x98, 0x87, 0x00, 0x97, 0xa0, 0x83, 0x01, 0x6c, 0x60, 0x91, 0x05,
+ 0x58, 0x31, 0x87, 0x05, 0x58, 0x2b, 0x02, 0x93, 0x36, 0xc2, 0x0e, 0x30,
+ 0x05, 0x58, 0x19, 0x8b, 0x05, 0x58, 0x11, 0x97, 0x05, 0x58, 0x08, 0x83,
+ 0x00, 0x93, 0x98, 0x87, 0x00, 0x93, 0xa0, 0x83, 0x01, 0x6c, 0x30, 0x83,
+ 0x00, 0x99, 0x03, 0x02, 0x93, 0x3a, 0x97, 0x00, 0x99, 0x09, 0x8b, 0x00,
+ 0x99, 0x11, 0x87, 0x00, 0x99, 0x2b, 0x02, 0x93, 0x3e, 0x91, 0x00, 0x99,
+ 0x33, 0x02, 0x93, 0x42, 0xc2, 0x00, 0x4c, 0x00, 0x99, 0x38, 0x83, 0x00,
+ 0x99, 0xc3, 0x02, 0x93, 0x46, 0x97, 0x00, 0x99, 0xc9, 0x8b, 0x00, 0x99,
+ 0xd1, 0x87, 0x00, 0x99, 0xeb, 0x02, 0x93, 0x4a, 0x91, 0x00, 0x99, 0xf1,
+ 0xc2, 0x00, 0x4c, 0x00, 0x99, 0xf8, 0x83, 0x00, 0x9a, 0x03, 0x02, 0x93,
+ 0x4e, 0x97, 0x00, 0x9a, 0x09, 0x8b, 0x00, 0x9a, 0x11, 0x87, 0x00, 0x9a,
+ 0x2b, 0x02, 0x93, 0x52, 0x91, 0x00, 0x9a, 0x32, 0x02, 0x93, 0x56, 0x83,
+ 0x00, 0x90, 0x58, 0x87, 0x00, 0x90, 0x60, 0x83, 0x01, 0x6c, 0x00, 0x83,
+ 0x00, 0x90, 0xd8, 0x87, 0x00, 0x90, 0xe0, 0x83, 0x01, 0x6c, 0x08, 0x83,
+ 0x00, 0x90, 0xf9, 0x8b, 0x00, 0x9c, 0x09, 0x87, 0x00, 0x9c, 0x1a, 0x02,
+ 0x93, 0x5a, 0x83, 0x00, 0x91, 0x03, 0x02, 0x93, 0x5e, 0x97, 0x00, 0x91,
+ 0x09, 0x8b, 0x00, 0x91, 0x11, 0x87, 0x00, 0x91, 0x2b, 0x02, 0x93, 0x62,
+ 0x91, 0x00, 0x91, 0x31, 0xc2, 0x00, 0x4c, 0x00, 0x91, 0x38, 0x83, 0x00,
+ 0x91, 0x98, 0x87, 0x00, 0x91, 0xa1, 0x48, 0xbb, 0x03, 0x42, 0x93, 0x66,
+ 0x83, 0x01, 0x6c, 0x18, 0x83, 0x00, 0x91, 0xc3, 0x02, 0x93, 0x7e, 0x97,
+ 0x00, 0x91, 0xc9, 0x8b, 0x00, 0x91, 0xd1, 0x87, 0x00, 0x91, 0xeb, 0x02,
+ 0x93, 0x82, 0x91, 0x00, 0x91, 0xf3, 0x02, 0x93, 0x86, 0xc2, 0x00, 0x4c,
+ 0x00, 0x91, 0xf8, 0x83, 0x01, 0x6d, 0x03, 0x02, 0x93, 0x8a, 0x97, 0x01,
+ 0x6d, 0x09, 0x8b, 0x01, 0x6d, 0x11, 0x87, 0x01, 0x6d, 0x2b, 0x02, 0x93,
+ 0x8e, 0x91, 0x01, 0x6d, 0x30, 0x83, 0x00, 0x91, 0x58, 0x87, 0x00, 0x91,
+ 0x60, 0x83, 0x01, 0x6c, 0x10, 0x83, 0x00, 0x92, 0x18, 0x87, 0x00, 0x92,
+ 0x20, 0x83, 0x00, 0x92, 0x38, 0x83, 0x00, 0x92, 0x43, 0x02, 0x93, 0x92,
+ 0x8b, 0x00, 0x92, 0x51, 0x87, 0x00, 0x92, 0x6a, 0x02, 0x93, 0x96, 0x83,
+ 0x00, 0x92, 0x83, 0x02, 0x93, 0x9a, 0x97, 0x00, 0x92, 0x89, 0x8b, 0x00,
+ 0x92, 0x91, 0x87, 0x00, 0x92, 0xab, 0x02, 0x93, 0x9e, 0x91, 0x00, 0x92,
+ 0xb1, 0x19, 0x42, 0x93, 0xa2, 0x83, 0x01, 0x6e, 0x03, 0x02, 0x93, 0xb4,
+ 0x97, 0x01, 0x6e, 0x09, 0x8b, 0x01, 0x6e, 0x11, 0x87, 0x01, 0x6e, 0x2b,
+ 0x02, 0x93, 0xb8, 0x91, 0x01, 0x6e, 0x30, 0x83, 0x00, 0x93, 0x58, 0x87,
+ 0x00, 0x93, 0x60, 0x83, 0x00, 0x94, 0x18, 0x87, 0x00, 0x94, 0x20, 0x83,
+ 0x00, 0x94, 0x38, 0x83, 0x00, 0x94, 0x43, 0x02, 0x93, 0xbc, 0x8b, 0x00,
+ 0x94, 0x51, 0x87, 0x00, 0x94, 0x6a, 0x02, 0x93, 0xc0, 0x83, 0x01, 0x6e,
+ 0x83, 0x02, 0x93, 0xc4, 0x97, 0x01, 0x6e, 0x89, 0x8b, 0x01, 0x6e, 0x91,
+ 0x87, 0x01, 0x6e, 0xab, 0x02, 0x93, 0xc8, 0x91, 0x01, 0x6e, 0xb0, 0x83,
+ 0x00, 0x94, 0x98, 0x87, 0x00, 0x94, 0xa0, 0x83, 0x01, 0x6c, 0x40, 0x83,
+ 0x00, 0x94, 0xc3, 0x02, 0x93, 0xcc, 0x97, 0x00, 0x94, 0xc9, 0x8b, 0x00,
+ 0x94, 0xd1, 0x87, 0x00, 0x94, 0xeb, 0x02, 0x93, 0xd0, 0x91, 0x00, 0x94,
+ 0xf3, 0x02, 0x93, 0xd4, 0xc2, 0x00, 0x4c, 0x00, 0x94, 0xf8, 0x83, 0x00,
+ 0x95, 0x58, 0x87, 0x00, 0x95, 0x60, 0x83, 0x00, 0x95, 0x78, 0x83, 0x00,
+ 0x95, 0x83, 0x02, 0x93, 0xd8, 0x8b, 0x00, 0x95, 0x91, 0x87, 0x00, 0x95,
+ 0xaa, 0x02, 0x93, 0xdc, 0x83, 0x00, 0x95, 0xc3, 0x02, 0x93, 0xe0, 0x97,
+ 0x00, 0x95, 0xc9, 0x8b, 0x00, 0x95, 0xd1, 0x87, 0x00, 0x95, 0xeb, 0x02,
+ 0x93, 0xe4, 0x91, 0x00, 0x95, 0xf1, 0x19, 0x42, 0x93, 0xe8, 0x83, 0x01,
+ 0x6e, 0x43, 0x02, 0x93, 0xfa, 0x97, 0x01, 0x6e, 0x49, 0x8b, 0x01, 0x6e,
+ 0x51, 0x87, 0x01, 0x6e, 0x6b, 0x02, 0x93, 0xfe, 0x91, 0x01, 0x6e, 0x70,
+ 0x83, 0x00, 0x96, 0x58, 0x87, 0x00, 0x96, 0x60, 0x83, 0x00, 0x96, 0x78,
+ 0x83, 0x00, 0x99, 0x83, 0x02, 0x94, 0x02, 0x97, 0x00, 0x99, 0x89, 0x8b,
+ 0x00, 0x99, 0x91, 0x87, 0x00, 0x99, 0xab, 0x02, 0x94, 0x0c, 0x91, 0x00,
+ 0x99, 0xb3, 0x02, 0x94, 0x10, 0xc2, 0x00, 0x4c, 0x00, 0x99, 0xb8, 0x83,
+ 0x00, 0x9a, 0x98, 0x87, 0x00, 0x9a, 0xa0, 0x83, 0x01, 0x6c, 0x90, 0x83,
+ 0x00, 0x9a, 0xb9, 0x8b, 0x00, 0x9c, 0x69, 0x87, 0x00, 0x9c, 0x7a, 0x02,
+ 0x94, 0x14, 0x83, 0x00, 0x96, 0xd8, 0x87, 0x00, 0x96, 0xe0, 0x83, 0x01,
+ 0x6c, 0x58, 0x83, 0x00, 0x97, 0x03, 0x02, 0x94, 0x18, 0x97, 0x00, 0x97,
+ 0x09, 0x8b, 0x00, 0x97, 0x11, 0x87, 0x00, 0x97, 0x2b, 0x02, 0x94, 0x1c,
+ 0x91, 0x00, 0x97, 0x31, 0xc2, 0x00, 0x4c, 0x00, 0x97, 0x38, 0x83, 0x01,
+ 0x6d, 0x83, 0x02, 0x94, 0x20, 0x97, 0x01, 0x6d, 0x89, 0x8b, 0x01, 0x6d,
+ 0x91, 0x87, 0x01, 0x6d, 0xab, 0x02, 0x94, 0x24, 0x91, 0x01, 0x6d, 0xb0,
+ 0x83, 0x00, 0x97, 0x58, 0x87, 0x00, 0x97, 0x60, 0x83, 0x00, 0x97, 0x78,
+ 0x83, 0x00, 0x98, 0x18, 0x87, 0x00, 0x98, 0x20, 0x83, 0x01, 0x6c, 0x70,
+ 0x83, 0x00, 0x9a, 0x58, 0x87, 0x00, 0x9a, 0x60, 0x83, 0x00, 0x9a, 0x79,
+ 0x8b, 0x00, 0x9c, 0x49, 0x87, 0x00, 0x9c, 0x5a, 0x02, 0x94, 0x28, 0xd5,
+ 0x37, 0x93, 0x00, 0x9a, 0xe9, 0xc4, 0x00, 0xfa, 0x00, 0x9a, 0xf8, 0xc7,
+ 0x02, 0x6a, 0x01, 0x3e, 0x91, 0xc9, 0x00, 0x68, 0x01, 0x56, 0xc8, 0xd6,
+ 0x31, 0x65, 0x01, 0x17, 0xc9, 0xc8, 0x50, 0x0d, 0x01, 0x17, 0xc1, 0xc7,
+ 0x79, 0xb4, 0x01, 0x17, 0xb1, 0xc9, 0x18, 0x19, 0x01, 0x17, 0xa9, 0x48,
+ 0x00, 0x29, 0xc2, 0x94, 0x2c, 0xd6, 0x2c, 0x27, 0x01, 0x17, 0x90, 0xc3,
+ 0xe2, 0x62, 0x08, 0x7f, 0x89, 0xc4, 0xdd, 0x34, 0x08, 0x7f, 0x70, 0xc6,
+ 0x01, 0x21, 0x00, 0x00, 0xb8, 0xc8, 0xbe, 0x43, 0x01, 0x16, 0xf9, 0xc8,
+ 0xbf, 0x4b, 0x01, 0x16, 0xf1, 0xcc, 0x06, 0xfb, 0x01, 0x16, 0xe9, 0xc9,
+ 0x09, 0xde, 0x01, 0x16, 0xe0, 0x03, 0xc2, 0x94, 0x32, 0x45, 0x01, 0xac,
+ 0x42, 0x94, 0x41, 0x97, 0x08, 0xec, 0xa1, 0x8b, 0x08, 0xec, 0x89, 0x83,
+ 0x08, 0xec, 0x50, 0x97, 0x08, 0xec, 0x70, 0x8b, 0x08, 0xec, 0x60, 0xc2,
+ 0x01, 0x0e, 0x08, 0xec, 0x19, 0x83, 0x08, 0xec, 0x10, 0xc2, 0x01, 0x0e,
+ 0x08, 0xeb, 0xf1, 0x83, 0x08, 0xeb, 0xe8, 0x83, 0x00, 0x50, 0xb1, 0xc2,
+ 0x01, 0x0e, 0x00, 0x52, 0xc8, 0x83, 0x00, 0x50, 0xc1, 0xc2, 0x01, 0x0e,
+ 0x00, 0x52, 0xd0, 0x83, 0x00, 0x50, 0xf9, 0xc2, 0x01, 0x0e, 0x00, 0x51,
+ 0x00, 0x83, 0x00, 0x51, 0x09, 0xc2, 0x01, 0x0e, 0x00, 0x51, 0x10, 0x94,
+ 0x00, 0x54, 0x5b, 0x02, 0x94, 0x57, 0x8e, 0x00, 0x54, 0x62, 0x02, 0x94,
+ 0x5b, 0x83, 0x00, 0x54, 0xf9, 0xc2, 0x01, 0x0e, 0x00, 0x55, 0x00, 0x83,
+ 0x00, 0x55, 0x09, 0xc2, 0x01, 0x0e, 0x00, 0x55, 0x10, 0x83, 0x00, 0x55,
+ 0xf1, 0x8b, 0x00, 0x56, 0x41, 0x97, 0x00, 0x56, 0x60, 0x8b, 0x00, 0x56,
+ 0x00, 0x97, 0x00, 0x56, 0x10, 0x94, 0x00, 0x56, 0x1b, 0x02, 0x94, 0x5f,
+ 0x8e, 0x00, 0x57, 0x12, 0x02, 0x94, 0x63, 0x87, 0x00, 0x56, 0x29, 0x91,
+ 0x00, 0x56, 0x48, 0xcd, 0x77, 0x3e, 0x0e, 0x92, 0x29, 0xcc, 0x84, 0xd8,
+ 0x08, 0x0c, 0x08, 0x59, 0x1f, 0x83, 0xc2, 0x94, 0x67, 0xcc, 0x83, 0xf4,
+ 0x08, 0x0c, 0x68, 0x53, 0x45, 0x75, 0xc2, 0x94, 0x9d, 0xc4, 0x27, 0x59,
+ 0x00, 0xff, 0x78, 0xc4, 0x5b, 0xe0, 0x00, 0xff, 0xf3, 0x02, 0x94, 0xdc,
+ 0x49, 0x68, 0x8f, 0xc2, 0x94, 0xe2, 0xcb, 0x8f, 0x4a, 0x08, 0x0b, 0xd8,
+ 0xc3, 0x40, 0xb7, 0x00, 0xff, 0xe9, 0x43, 0x03, 0x5f, 0xc2, 0x94, 0xee,
+ 0xc8, 0xc1, 0xbb, 0x08, 0x0b, 0xe1, 0xca, 0xaa, 0x7e, 0x08, 0x0c, 0x20,
+ 0x0e, 0xc2, 0x94, 0xfd, 0xca, 0x9d, 0xae, 0x00, 0x1e, 0x79, 0xcc, 0x8b,
+ 0xec, 0x00, 0x1f, 0xa1, 0x49, 0x11, 0xad, 0xc2, 0x95, 0x09, 0xda, 0x19,
+ 0x80, 0x00, 0x1f, 0xf0, 0x45, 0x00, 0x3f, 0xc2, 0x95, 0x15, 0x56, 0x31,
+ 0xa7, 0xc2, 0x95, 0x27, 0xcc, 0x8b, 0xe0, 0x08, 0x0c, 0x61, 0xcd, 0x7d,
+ 0x2f, 0x08, 0x0d, 0x00, 0xc4, 0x7d, 0xa4, 0x00, 0xfd, 0xfb, 0x02, 0x95,
+ 0x45, 0xca, 0x90, 0xcc, 0x00, 0xfe, 0x01, 0xcd, 0x45, 0xed, 0x00, 0xfd,
+ 0xf1, 0xc8, 0x9d, 0xb0, 0x00, 0x1e, 0xb1, 0xc9, 0xad, 0x9c, 0x00, 0x1e,
+ 0xa8, 0xc6, 0x5c, 0x89, 0x00, 0xfd, 0xe9, 0x03, 0xc2, 0x95, 0x4b, 0xd0,
+ 0x5d, 0xaf, 0x08, 0x0c, 0x10, 0x46, 0x02, 0x00, 0xc2, 0x95, 0x57, 0xd1,
+ 0x53, 0xab, 0x00, 0x1b, 0xa9, 0x46, 0x12, 0x5e, 0xc2, 0x95, 0x73, 0xc9,
+ 0xb2, 0xb5, 0x08, 0x0c, 0x18, 0xcc, 0x49, 0x1a, 0x00, 0x1b, 0xd1, 0xc8,
+ 0xb8, 0x0e, 0x08, 0x0b, 0xc8, 0xc4, 0x68, 0x99, 0x00, 0x1c, 0x21, 0x0a,
+ 0xc2, 0x95, 0x7f, 0x43, 0x0a, 0x20, 0xc2, 0x95, 0x8b, 0xca, 0xa7, 0x4a,
+ 0x08, 0x0b, 0xd1, 0xd1, 0x57, 0x41, 0x08, 0x0c, 0x48, 0xc9, 0xb1, 0x9e,
+ 0x00, 0x1c, 0x39, 0x4a, 0x9e, 0x9e, 0xc2, 0x95, 0x97, 0x14, 0x42, 0x95,
+ 0xc9, 0x43, 0x69, 0x91, 0xc2, 0x95, 0xd5, 0xdd, 0x12, 0x55, 0x00, 0x1f,
+ 0xb0, 0xce, 0x74, 0x14, 0x08, 0x0b, 0xf9, 0xce, 0x75, 0x10, 0x08, 0x0c,
+ 0x00, 0xcb, 0x1e, 0x97, 0x00, 0x1e, 0x91, 0xd5, 0x34, 0x21, 0x00, 0x1e,
+ 0x99, 0xd9, 0x1e, 0x89, 0x00, 0x1e, 0xa0, 0xca, 0x37, 0x20, 0x01, 0x17,
+ 0x39, 0xc5, 0x09, 0x02, 0x01, 0x13, 0x48, 0xc9, 0x09, 0xde, 0x01, 0x13,
+ 0xb9, 0x43, 0x00, 0x92, 0xc2, 0x95, 0xe1, 0xd0, 0x59, 0xbf, 0x01, 0x53,
+ 0xf3, 0x02, 0x95, 0xed, 0xcb, 0x19, 0xd2, 0x01, 0x54, 0x30, 0xc9, 0x08,
+ 0xfe, 0x01, 0x13, 0x39, 0xd1, 0x51, 0xf1, 0x01, 0x55, 0x20, 0xd0, 0x01,
+ 0x37, 0x01, 0x4b, 0xc1, 0x06, 0xc2, 0x95, 0xf3, 0x15, 0xc2, 0x95, 0xf9,
+ 0x0e, 0x42, 0x96, 0x05, 0xd8, 0x23, 0x24, 0x01, 0x54, 0x41, 0xcf, 0x63,
+ 0x75, 0x01, 0x54, 0x50, 0x8e, 0x08, 0x9b, 0x13, 0x02, 0x96, 0x0b, 0x94,
+ 0x08, 0x9a, 0x1a, 0x02, 0x96, 0x0f, 0x97, 0x08, 0x9a, 0x61, 0x8b, 0x08,
+ 0x9a, 0x41, 0x83, 0x08, 0x99, 0xf0, 0x97, 0x08, 0x9a, 0x10, 0x8b, 0x08,
+ 0x9a, 0x00, 0x47, 0xb7, 0xd8, 0xc2, 0x96, 0x13, 0x45, 0x06, 0x8f, 0xc2,
+ 0x96, 0x21, 0x83, 0x08, 0x99, 0xa8, 0x83, 0x08, 0x99, 0xc1, 0xc2, 0x0e,
+ 0xe5, 0x08, 0x99, 0xb9, 0xc2, 0x01, 0x0e, 0x08, 0x99, 0xb0, 0xc2, 0x00,
+ 0x96, 0x08, 0x99, 0x99, 0x83, 0x08, 0x99, 0x90, 0xc2, 0x01, 0x0e, 0x08,
+ 0x99, 0x69, 0x83, 0x08, 0x99, 0x60, 0xc2, 0x01, 0x0e, 0x08, 0x99, 0x59,
+ 0x83, 0x08, 0x99, 0x50, 0xc2, 0x01, 0x0e, 0x08, 0x99, 0x39, 0x83, 0x08,
+ 0x99, 0x31, 0x06, 0x42, 0x96, 0x2d, 0xc2, 0x01, 0x0e, 0x08, 0x99, 0x29,
+ 0x16, 0xc2, 0x96, 0x37, 0x83, 0x08, 0x99, 0x20, 0xc2, 0x1a, 0x36, 0x08,
+ 0x98, 0xf1, 0xc2, 0x07, 0x69, 0x08, 0x98, 0xc9, 0xc2, 0x01, 0x01, 0x08,
+ 0x99, 0x19, 0x83, 0x08, 0x99, 0x40, 0xc2, 0x01, 0x0e, 0x08, 0x98, 0xe9,
+ 0x83, 0x08, 0x98, 0xe0, 0xc2, 0x01, 0x0e, 0x08, 0x98, 0xd9, 0x83, 0x08,
+ 0x98, 0xd0, 0xc2, 0x01, 0x0e, 0x08, 0x98, 0xc1, 0x83, 0x08, 0x98, 0xb8,
+ 0xc2, 0x01, 0x0e, 0x08, 0x98, 0xb1, 0x83, 0x08, 0x98, 0xa8, 0x97, 0x08,
+ 0x98, 0xa1, 0x8b, 0x08, 0x98, 0x81, 0x83, 0x08, 0x98, 0x30, 0x97, 0x08,
+ 0x98, 0x50, 0x8b, 0x08, 0x98, 0x40, 0xc4, 0x21, 0x28, 0x08, 0x9a, 0x69,
+ 0xc5, 0x45, 0xcf, 0x08, 0x98, 0x18, 0xc7, 0x7d, 0xf8, 0x08, 0x99, 0xe9,
+ 0xc7, 0x10, 0xac, 0x08, 0x98, 0x10, 0xca, 0x21, 0x1b, 0x08, 0x98, 0x09,
+ 0xd7, 0x10, 0xa2, 0x08, 0x98, 0x00, 0x15, 0xc2, 0x96, 0x41, 0xdb, 0x18,
+ 0x3d, 0x0f, 0xc9, 0x50, 0xc9, 0xaf, 0xb8, 0x00, 0xe5, 0xf9, 0x95, 0x00,
+ 0xe4, 0xd0, 0x03, 0xc2, 0x96, 0x4d, 0xc2, 0x09, 0x06, 0x00, 0xe5, 0xa9,
+ 0xc2, 0x05, 0x4a, 0x00, 0xe5, 0x91, 0x87, 0x00, 0xe5, 0x88, 0xc2, 0x01,
+ 0x04, 0x00, 0xe5, 0xe9, 0xc2, 0x01, 0x5b, 0x00, 0xe5, 0xd1, 0x90, 0x00,
+ 0xe4, 0x80, 0xc9, 0xac, 0x2b, 0x00, 0xe5, 0xc9, 0x03, 0x42, 0x96, 0x58,
+ 0xc4, 0x77, 0xb9, 0x00, 0xe5, 0xc1, 0x90, 0x00, 0xe4, 0xa0, 0xc3, 0x02,
+ 0x33, 0x00, 0xe5, 0x79, 0xc2, 0x00, 0x95, 0x00, 0xe5, 0x58, 0x0a, 0xc2,
+ 0x96, 0x60, 0xc2, 0x00, 0x95, 0x00, 0xe5, 0x61, 0xc2, 0x00, 0x3a, 0x00,
+ 0xe5, 0x50, 0xc3, 0x11, 0x40, 0x00, 0xe5, 0x41, 0xc2, 0x00, 0x3a, 0x00,
+ 0xe5, 0x08, 0xc3, 0x02, 0x33, 0x00, 0xe5, 0x31, 0xc2, 0x00, 0x3a, 0x00,
+ 0xe4, 0x90, 0xc3, 0x02, 0x10, 0x00, 0xe5, 0x29, 0xc2, 0x00, 0x3a, 0x00,
+ 0xe4, 0xc8, 0xc3, 0x02, 0x10, 0x00, 0xe5, 0x21, 0xc2, 0x00, 0x4d, 0x00,
+ 0xe4, 0xf0, 0xc3, 0x02, 0x10, 0x00, 0xe4, 0xf9, 0xc2, 0x01, 0x04, 0x00,
+ 0xe4, 0xb0, 0x90, 0x00, 0x85, 0x01, 0xc2, 0x01, 0x04, 0x00, 0x86, 0x68,
+ 0xc2, 0x00, 0x3a, 0x00, 0x85, 0x11, 0xc3, 0x02, 0x33, 0x00, 0x85, 0xb0,
+ 0xc2, 0x01, 0x04, 0x00, 0x85, 0x31, 0xc3, 0x02, 0x10, 0x00, 0x85, 0x78,
+ 0x90, 0x00, 0x85, 0x39, 0x94, 0x00, 0x85, 0x90, 0xc2, 0x00, 0x3a, 0x00,
+ 0x85, 0x49, 0xc3, 0x02, 0x10, 0x00, 0x85, 0xa8, 0xc2, 0x00, 0x4d, 0x00,
+ 0x85, 0x71, 0xc3, 0x02, 0x10, 0x00, 0x85, 0xa0, 0xc2, 0x00, 0x3a, 0x00,
+ 0x85, 0x89, 0xc3, 0x11, 0x40, 0x00, 0x85, 0xc0, 0x0a, 0xc2, 0x96, 0x6c,
+ 0xc2, 0x00, 0x3a, 0x00, 0x85, 0xd1, 0xc2, 0x00, 0x95, 0x00, 0x85, 0xe0,
+ 0xc2, 0x00, 0x95, 0x00, 0x85, 0xd9, 0xc3, 0x02, 0x33, 0x00, 0x85, 0xf8,
+ 0x03, 0xc2, 0x96, 0x78, 0x87, 0x00, 0x86, 0x09, 0xc2, 0x05, 0x4a, 0x00,
+ 0x86, 0x11, 0xc2, 0x09, 0x06, 0x00, 0x86, 0x28, 0x90, 0x00, 0x86, 0x81,
+ 0xc2, 0x01, 0x5b, 0x00, 0x87, 0xd1, 0xc2, 0x01, 0x04, 0x00, 0x87, 0xe8,
+ 0xc2, 0x00, 0x3a, 0x00, 0x86, 0x91, 0xc3, 0x02, 0x33, 0x00, 0x87, 0x30,
+ 0x90, 0x00, 0x86, 0xa1, 0xc4, 0x77, 0xb9, 0x00, 0x87, 0xc0, 0xc2, 0x01,
+ 0x04, 0x00, 0x86, 0xb1, 0xc3, 0x02, 0x10, 0x00, 0x86, 0xf8, 0x03, 0xc2,
+ 0x96, 0x80, 0xc9, 0xac, 0x2b, 0x00, 0x87, 0xc8, 0xc2, 0x00, 0x3a, 0x00,
+ 0x86, 0xc9, 0xc3, 0x02, 0x10, 0x00, 0x87, 0x28, 0x95, 0x00, 0x86, 0xd1,
+ 0xc9, 0xaf, 0xb8, 0x00, 0x87, 0xf8, 0xc2, 0x00, 0x4d, 0x00, 0x86, 0xf1,
+ 0xc3, 0x02, 0x10, 0x00, 0x87, 0x20, 0xc2, 0x00, 0x3a, 0x00, 0x87, 0x09,
+ 0xc3, 0x11, 0x40, 0x00, 0x87, 0x40, 0x0a, 0xc2, 0x96, 0x88, 0xc2, 0x00,
+ 0x3a, 0x00, 0x87, 0x51, 0xc2, 0x00, 0x95, 0x00, 0x87, 0x60, 0xc2, 0x00,
+ 0x95, 0x00, 0x87, 0x59, 0xc3, 0x02, 0x33, 0x00, 0x87, 0x78, 0x03, 0xc2,
+ 0x96, 0x94, 0x87, 0x00, 0x87, 0x89, 0xc2, 0x05, 0x4a, 0x00, 0x87, 0x91,
+ 0xc2, 0x09, 0x06, 0x00, 0x87, 0xa8, 0x90, 0x01, 0x68, 0x01, 0xc2, 0x01,
+ 0x04, 0x01, 0x69, 0x68, 0xc2, 0x00, 0x3a, 0x01, 0x68, 0x11, 0xc3, 0x02,
+ 0x33, 0x01, 0x68, 0xb0, 0xc2, 0x01, 0x04, 0x01, 0x68, 0x31, 0xc3, 0x02,
+ 0x10, 0x01, 0x68, 0x78, 0x90, 0x01, 0x68, 0x39, 0x94, 0x01, 0x68, 0x90,
+ 0xc2, 0x00, 0x3a, 0x01, 0x68, 0x49, 0xc3, 0x02, 0x10, 0x01, 0x68, 0xa8,
+ 0xc2, 0x00, 0x4d, 0x01, 0x68, 0x71, 0xc3, 0x02, 0x10, 0x01, 0x68, 0xa0,
+ 0xc2, 0x00, 0x3a, 0x01, 0x68, 0x89, 0xc3, 0x11, 0x40, 0x01, 0x68, 0xc0,
+ 0x0a, 0xc2, 0x96, 0x9f, 0xc2, 0x00, 0x3a, 0x01, 0x68, 0xd1, 0xc2, 0x00,
+ 0x95, 0x01, 0x68, 0xe0, 0xc2, 0x00, 0x95, 0x01, 0x68, 0xd9, 0xc3, 0x02,
+ 0x33, 0x01, 0x68, 0xf8, 0x03, 0xc2, 0x96, 0xab, 0x87, 0x01, 0x69, 0x09,
+ 0xc2, 0x05, 0x4a, 0x01, 0x69, 0x11, 0xc2, 0x09, 0x06, 0x01, 0x69, 0x28,
+ 0xc3, 0xcf, 0x2a, 0x01, 0x60, 0x09, 0xc6, 0xcc, 0x54, 0x01, 0x61, 0x40,
+ 0xc4, 0xe9, 0x5b, 0x01, 0x60, 0x21, 0xc4, 0xe4, 0xbf, 0x01, 0x60, 0x39,
+ 0xc5, 0xdd, 0x83, 0x01, 0x60, 0x60, 0x07, 0xc2, 0x96, 0xb3, 0xc3, 0x00,
+ 0xf4, 0x01, 0x61, 0x09, 0x97, 0x01, 0x61, 0x19, 0x91, 0x01, 0x61, 0x30,
+ 0xc6, 0xd6, 0x34, 0x01, 0x60, 0x31, 0xc5, 0xe2, 0xd3, 0x01, 0x60, 0x40,
+ 0x42, 0x23, 0x6a, 0xc2, 0x96, 0xbd, 0xcb, 0x95, 0x90, 0x01, 0x60, 0x51,
+ 0x47, 0x19, 0x66, 0x42, 0x96, 0xc7, 0xc6, 0xc9, 0x52, 0x01, 0x60, 0x71,
+ 0xcf, 0x62, 0x49, 0x01, 0x61, 0x70, 0xc2, 0x10, 0xac, 0x01, 0x60, 0x89,
+ 0xc2, 0x00, 0x16, 0x01, 0x60, 0xc8, 0xc5, 0xd4, 0xf7, 0x01, 0x60, 0x91,
+ 0x87, 0x01, 0x60, 0xd0, 0xc4, 0xe6, 0x3b, 0x01, 0x60, 0xa1, 0x0a, 0xc2,
+ 0x96, 0xd3, 0xc9, 0xaf, 0x43, 0x01, 0x61, 0x11, 0xc8, 0xac, 0x8f, 0x01,
+ 0x61, 0x22, 0x02, 0x96, 0xe0, 0xc5, 0xe3, 0x73, 0x01, 0x60, 0xa9, 0xc2,
+ 0x02, 0x29, 0x01, 0x60, 0xe1, 0xcb, 0x90, 0x5d, 0x01, 0x61, 0x68, 0xc4,
+ 0xad, 0xf9, 0x01, 0x60, 0xb9, 0xc3, 0x01, 0xfa, 0x01, 0x61, 0x50, 0xc5,
+ 0x82, 0x3e, 0x01, 0x60, 0xe9, 0xcd, 0x82, 0x36, 0x01, 0x61, 0x78, 0xc3,
+ 0xcf, 0x2a, 0x01, 0x61, 0x89, 0xc6, 0xcc, 0x54, 0x01, 0x62, 0xc0, 0xc4,
+ 0xe9, 0x5b, 0x01, 0x61, 0xa1, 0xc4, 0xe4, 0xbf, 0x01, 0x61, 0xb9, 0xc5,
+ 0xdd, 0x83, 0x01, 0x61, 0xe0, 0x07, 0xc2, 0x96, 0xe6, 0xc3, 0x00, 0xf4,
+ 0x01, 0x62, 0x89, 0x97, 0x01, 0x62, 0x99, 0x91, 0x01, 0x62, 0xb0, 0xc6,
+ 0xd6, 0x34, 0x01, 0x61, 0xb1, 0xc5, 0xe2, 0xd3, 0x01, 0x61, 0xc0, 0x42,
+ 0x23, 0x6a, 0xc2, 0x96, 0xf0, 0xcb, 0x95, 0x90, 0x01, 0x61, 0xd1, 0x47,
+ 0x19, 0x66, 0x42, 0x96, 0xfa, 0xc6, 0xc9, 0x52, 0x01, 0x61, 0xf1, 0xcf,
+ 0x62, 0x49, 0x01, 0x62, 0xf0, 0xc2, 0x10, 0xac, 0x01, 0x62, 0x09, 0xc2,
+ 0x00, 0x16, 0x01, 0x62, 0x48, 0xc5, 0xd4, 0xf7, 0x01, 0x62, 0x11, 0x87,
+ 0x01, 0x62, 0x50, 0xc4, 0xe6, 0x3b, 0x01, 0x62, 0x21, 0x0a, 0xc2, 0x97,
+ 0x06, 0xc9, 0xaf, 0x43, 0x01, 0x62, 0x91, 0xc8, 0xac, 0x8f, 0x01, 0x62,
+ 0xa2, 0x02, 0x97, 0x13, 0xc5, 0xe3, 0x73, 0x01, 0x62, 0x29, 0xc2, 0x02,
+ 0x29, 0x01, 0x62, 0x61, 0xcb, 0x90, 0x5d, 0x01, 0x62, 0xe8, 0xc4, 0xad,
+ 0xf9, 0x01, 0x62, 0x39, 0xc3, 0x01, 0xfa, 0x01, 0x62, 0xd0, 0xc5, 0x82,
+ 0x3e, 0x01, 0x62, 0x69, 0xcd, 0x82, 0x36, 0x01, 0x62, 0xf8, 0xc7, 0x10,
+ 0xac, 0x00, 0x58, 0x11, 0xc7, 0x7d, 0xf8, 0x00, 0x59, 0xe8, 0xc5, 0x45,
+ 0xcf, 0x00, 0x58, 0x19, 0xc4, 0x21, 0x28, 0x00, 0x5a, 0x68, 0x83, 0x00,
+ 0x58, 0x31, 0x8b, 0x00, 0x58, 0x81, 0x97, 0x00, 0x58, 0xa0, 0x8b, 0x00,
+ 0x58, 0x40, 0x97, 0x00, 0x58, 0x50, 0x47, 0xb7, 0xd8, 0xc2, 0x97, 0x19,
+ 0x83, 0x00, 0x59, 0xa8, 0x83, 0x00, 0x58, 0xa9, 0xc2, 0x01, 0x0e, 0x00,
+ 0x58, 0xb0, 0x83, 0x00, 0x58, 0xb9, 0xc2, 0x01, 0x0e, 0x00, 0x58, 0xc0,
+ 0xc2, 0x07, 0x69, 0x00, 0x58, 0xc9, 0xc2, 0x1a, 0x36, 0x00, 0x58, 0xf1,
+ 0xc2, 0x01, 0x01, 0x00, 0x59, 0x19, 0x83, 0x00, 0x59, 0x40, 0x83, 0x00,
+ 0x58, 0xd1, 0xc2, 0x01, 0x0e, 0x00, 0x58, 0xd8, 0x83, 0x00, 0x58, 0xe1,
+ 0xc2, 0x01, 0x0e, 0x00, 0x58, 0xe8, 0x16, 0xc2, 0x97, 0x27, 0x83, 0x00,
+ 0x59, 0x21, 0xc2, 0x01, 0x0e, 0x00, 0x59, 0x28, 0x06, 0xc2, 0x97, 0x31,
+ 0x83, 0x00, 0x59, 0x31, 0xc2, 0x01, 0x0e, 0x00, 0x59, 0x38, 0x83, 0x00,
+ 0x59, 0x51, 0xc2, 0x01, 0x0e, 0x00, 0x59, 0x58, 0x83, 0x00, 0x59, 0x61,
+ 0xc2, 0x01, 0x0e, 0x00, 0x59, 0x68, 0x83, 0x00, 0x59, 0x79, 0xc2, 0x1a,
+ 0x36, 0x00, 0x5a, 0xf8, 0x83, 0x00, 0x59, 0x81, 0xc2, 0x00, 0x9a, 0x00,
+ 0x5a, 0xe1, 0xc2, 0x01, 0x0e, 0x00, 0x5a, 0xe8, 0x83, 0x00, 0x59, 0x91,
+ 0xc2, 0x00, 0x96, 0x00, 0x59, 0x98, 0xc2, 0x01, 0x0e, 0x00, 0x59, 0xb1,
+ 0xc2, 0x0e, 0xe5, 0x00, 0x59, 0xb9, 0x83, 0x00, 0x59, 0xc0, 0x83, 0x00,
+ 0x59, 0xf1, 0x8b, 0x00, 0x5a, 0x41, 0x97, 0x00, 0x5a, 0x60, 0x8b, 0x00,
+ 0x5a, 0x00, 0x97, 0x00, 0x5a, 0x10, 0x94, 0x00, 0x5a, 0x1b, 0x02, 0x97,
+ 0x3b, 0x8e, 0x00, 0x5b, 0x12, 0x02, 0x97, 0x3f, 0xc2, 0x0a, 0x20, 0x00,
+ 0x5b, 0x41, 0xc4, 0x05, 0xde, 0x00, 0x5b, 0x48, 0xc3, 0x08, 0xde, 0x00,
+ 0x5b, 0x51, 0xc3, 0x0d, 0x8f, 0x00, 0x5b, 0x58, 0xc2, 0x22, 0x45, 0x00,
+ 0x5b, 0x61, 0xc4, 0x15, 0xa7, 0x00, 0x5b, 0x68, 0xc7, 0x08, 0x19, 0x00,
+ 0x5b, 0x91, 0xc4, 0x01, 0x1d, 0x00, 0x5b, 0x99, 0xc9, 0x66, 0x90, 0x00,
+ 0x5b, 0xa9, 0xc6, 0x05, 0x1b, 0x00, 0x5b, 0xb0, 0xc8, 0x08, 0x19, 0x00,
+ 0x5b, 0xa1, 0xca, 0xa7, 0x22, 0x00, 0x5b, 0xb8, 0xc3, 0x05, 0xdf, 0x0f,
+ 0x68, 0x1b, 0x02, 0x97, 0x43, 0xc4, 0x0d, 0x89, 0x0f, 0x68, 0x62, 0x02,
+ 0x97, 0x47, 0x91, 0x0f, 0x68, 0x13, 0x02, 0x97, 0x4d, 0xc4, 0x15, 0xa9,
+ 0x0f, 0x68, 0x5a, 0x02, 0x97, 0x51, 0xc9, 0x4f, 0xa1, 0x0f, 0x69, 0x28,
+ 0xc2, 0x00, 0xd3, 0x0f, 0x68, 0x23, 0x02, 0x97, 0x57, 0xc3, 0x0d, 0x8a,
+ 0x0f, 0x68, 0x6a, 0x02, 0x97, 0x5b, 0xc2, 0x00, 0x29, 0x0f, 0x68, 0x2b,
+ 0x02, 0x97, 0x61, 0xc3, 0x41, 0xca, 0x0f, 0x68, 0x72, 0x02, 0x97, 0x65,
+ 0xc7, 0x0d, 0x7f, 0x0f, 0x68, 0x99, 0xc8, 0x4f, 0xa2, 0x0f, 0x68, 0xe0,
+ 0xc2, 0x0d, 0x8b, 0x0f, 0x68, 0x7b, 0x02, 0x97, 0x6b, 0x00, 0x42, 0x97,
+ 0x71, 0xc2, 0x0d, 0x8b, 0x0f, 0x68, 0x83, 0x02, 0x97, 0x7d, 0x00, 0x42,
+ 0x97, 0x83, 0xc9, 0x4f, 0xa1, 0x0f, 0x69, 0x60, 0xc7, 0x0d, 0x7f, 0x0f,
+ 0x68, 0xd1, 0xc8, 0x4f, 0xa2, 0x0f, 0x69, 0x18, 0xc9, 0x4f, 0xa1, 0x0f,
+ 0x69, 0x68, 0xc7, 0x0d, 0x7f, 0x0f, 0x68, 0xd9, 0xc8, 0x4f, 0xa2, 0x0f,
+ 0x69, 0x20, 0xc9, 0x4f, 0xa1, 0x0f, 0x69, 0xd0, 0xc9, 0x4f, 0xa1, 0x0f,
+ 0x69, 0xd8, 0xc8, 0x0d, 0x7e, 0x0f, 0x69, 0xc0, 0xc8, 0x0d, 0x7e, 0x0f,
+ 0x69, 0xc8, 0xc9, 0x39, 0xbf, 0x08, 0x7c, 0x49, 0x44, 0x0a, 0x1f, 0xc2,
+ 0x97, 0x8f, 0xc3, 0x02, 0x1d, 0x08, 0x7c, 0x30, 0x49, 0x0b, 0x79, 0xc2,
+ 0x97, 0x9b, 0x44, 0x0b, 0xf8, 0x42, 0x97, 0xa7, 0x0e, 0xc2, 0x97, 0xb3,
+ 0xc3, 0x5f, 0x3d, 0x08, 0x7c, 0x01, 0xc2, 0x03, 0x07, 0x08, 0x7b, 0xe1,
+ 0x15, 0xc2, 0x97, 0xbf, 0xc3, 0x21, 0x00, 0x08, 0x7b, 0xd1, 0xc3, 0x04,
+ 0xae, 0x08, 0x7b, 0xc9, 0xc4, 0xe5, 0x53, 0x08, 0x7b, 0xb9, 0xc4, 0x4d,
+ 0x48, 0x08, 0x7b, 0xb1, 0xca, 0xaa, 0xb0, 0x08, 0x7b, 0xa9, 0xc5, 0x4d,
+ 0x42, 0x08, 0x7b, 0xa1, 0xc3, 0x7c, 0xad, 0x08, 0x7b, 0x99, 0xca, 0x9e,
+ 0xbc, 0x08, 0x7b, 0x91, 0xc4, 0xe5, 0xaf, 0x08, 0x7b, 0x89, 0xc5, 0xa6,
+ 0x5f, 0x08, 0x7b, 0x81, 0xc4, 0x5d, 0xef, 0x08, 0x7b, 0xf0, 0xd1, 0x54,
+ 0xdd, 0x08, 0x79, 0x31, 0x47, 0x37, 0x49, 0xc2, 0x97, 0xc9, 0x0e, 0x42,
+ 0x97, 0xda, 0x43, 0x2d, 0x13, 0xc2, 0x97, 0xe6, 0x47, 0x01, 0xff, 0x42,
+ 0x97, 0xf2, 0xc3, 0x03, 0x01, 0x08, 0x67, 0xe1, 0x42, 0x02, 0xf8, 0xc2,
+ 0x98, 0x4f, 0xc3, 0x05, 0x17, 0x08, 0x67, 0xd2, 0x02, 0x98, 0x5b, 0x97,
+ 0x08, 0x67, 0x53, 0x02, 0x98, 0x5f, 0x87, 0x08, 0x66, 0x4b, 0x02, 0x98,
+ 0x6d, 0x4a, 0xa2, 0xe0, 0xc2, 0x98, 0xcd, 0x4b, 0x95, 0x9b, 0xc2, 0x98,
+ 0xd9, 0xc8, 0xba, 0xc3, 0x08, 0x67, 0x19, 0x91, 0x08, 0x66, 0xdb, 0x02,
+ 0x98, 0xe5, 0x83, 0x08, 0x66, 0x03, 0x02, 0x98, 0xef, 0x8b, 0x08, 0x66,
+ 0x83, 0x02, 0x99, 0x03, 0xc7, 0xc8, 0x7f, 0x08, 0x66, 0x50, 0x87, 0x08,
+ 0x64, 0x4b, 0x02, 0x99, 0x07, 0xc8, 0xba, 0xc3, 0x08, 0x65, 0x19, 0x91,
+ 0x08, 0x64, 0xdb, 0x02, 0x99, 0x67, 0x4a, 0xa2, 0xe0, 0xc2, 0x99, 0x71,
+ 0x4b, 0x95, 0x9b, 0xc2, 0x99, 0x7d, 0x97, 0x08, 0x65, 0x53, 0x02, 0x99,
+ 0x89, 0x83, 0x08, 0x64, 0x03, 0x02, 0x99, 0x97, 0x8b, 0x08, 0x64, 0x83,
+ 0x02, 0x99, 0xab, 0xc7, 0xc8, 0x7f, 0x08, 0x64, 0x50, 0xc4, 0xe5, 0x93,
+ 0x08, 0x62, 0x41, 0x91, 0x08, 0x60, 0x33, 0x02, 0x99, 0xaf, 0x83, 0x08,
+ 0x60, 0x03, 0x02, 0x99, 0xc2, 0x07, 0xc2, 0x99, 0xf1, 0x8b, 0x08, 0x60,
+ 0x1a, 0x02, 0x9a, 0x11, 0x83, 0x08, 0x60, 0x0b, 0x02, 0x9a, 0x19, 0x87,
+ 0x08, 0x60, 0x2b, 0x02, 0x9a, 0x4c, 0x11, 0xc2, 0x9a, 0x5e, 0x8b, 0x08,
+ 0x60, 0x22, 0x02, 0x9a, 0x69, 0x16, 0xc2, 0x9a, 0x6d, 0xc3, 0x05, 0x17,
+ 0x08, 0x54, 0xe8, 0x42, 0x05, 0x5c, 0xc2, 0x9a, 0x79, 0x16, 0xc2, 0x9a,
+ 0x83, 0xc3, 0x2f, 0x22, 0x08, 0x54, 0xd1, 0x09, 0xc2, 0x9a, 0x93, 0x42,
+ 0x0c, 0x25, 0xc2, 0x9a, 0x9f, 0x07, 0xc2, 0x9a, 0xa7, 0xc3, 0x7c, 0xad,
+ 0x08, 0x54, 0x29, 0xc3, 0x0e, 0x13, 0x08, 0x54, 0x21, 0xc4, 0x1a, 0x6a,
+ 0x08, 0x54, 0x19, 0x0a, 0xc2, 0x9a, 0xb9, 0xc3, 0x0e, 0x1c, 0x08, 0x54,
+ 0x09, 0xc3, 0x71, 0x66, 0x08, 0x54, 0x39, 0xc3, 0x8c, 0x10, 0x08, 0x54,
+ 0x41, 0x0d, 0xc2, 0x9a, 0xc5, 0xc4, 0x3e, 0xff, 0x08, 0x54, 0x61, 0xc3,
+ 0x0e, 0xe5, 0x08, 0x54, 0x71, 0xc3, 0xae, 0x23, 0x08, 0x54, 0x81, 0x03,
+ 0x42, 0x9a, 0xd1, 0xcd, 0x7b, 0xea, 0x0f, 0xad, 0x99, 0x44, 0x04, 0x42,
+ 0x42, 0x9a, 0xdd, 0xc2, 0x00, 0x3a, 0x08, 0x1a, 0x81, 0xc3, 0x30, 0x93,
+ 0x08, 0x1a, 0x89, 0xc3, 0x44, 0x00, 0x08, 0x1a, 0x91, 0x06, 0xc2, 0x9a,
+ 0xef, 0x87, 0x08, 0x1a, 0xa3, 0x02, 0x9a, 0xf9, 0x1c, 0xc2, 0x9a, 0xfd,
+ 0x8b, 0x08, 0x1a, 0xcb, 0x02, 0x9b, 0x09, 0xc4, 0xb5, 0x96, 0x08, 0x1a,
+ 0xd1, 0xc3, 0x2c, 0x7d, 0x08, 0x1a, 0xd9, 0xc5, 0xe3, 0xd7, 0x08, 0x1a,
+ 0xe1, 0xc5, 0xe2, 0x79, 0x08, 0x1a, 0xe9, 0x18, 0xc2, 0x9b, 0x11, 0xc4,
+ 0xe9, 0x3b, 0x08, 0x1a, 0xf9, 0xc3, 0x2a, 0x38, 0x08, 0x1b, 0x01, 0x15,
+ 0xc2, 0x9b, 0x1d, 0x16, 0xc2, 0x9b, 0x27, 0x97, 0x08, 0x1b, 0x19, 0xc5,
+ 0xdf, 0x81, 0x08, 0x1b, 0x21, 0x1b, 0xc2, 0x9b, 0x33, 0x91, 0x08, 0x1b,
+ 0x4b, 0x02, 0x9b, 0x4d, 0xc2, 0x01, 0x0e, 0x08, 0x1b, 0x60, 0xc2, 0x00,
+ 0x34, 0x08, 0x18, 0x09, 0x0d, 0xc2, 0x9b, 0x51, 0xc2, 0x00, 0x06, 0x08,
+ 0x18, 0x19, 0x87, 0x08, 0x18, 0x23, 0x02, 0x9b, 0x63, 0xc2, 0x00, 0x29,
+ 0x08, 0x18, 0x29, 0xc2, 0x0b, 0xa2, 0x08, 0x18, 0x31, 0xc2, 0x00, 0x5b,
+ 0x08, 0x18, 0x39, 0x16, 0xc2, 0x9b, 0x87, 0x8b, 0x08, 0x18, 0x4b, 0x02,
+ 0x9b, 0x91, 0x83, 0x08, 0x18, 0x01, 0x91, 0x08, 0x18, 0x79, 0x12, 0xc2,
+ 0x9b, 0x95, 0x15, 0xc2, 0x9b, 0x9f, 0x97, 0x08, 0x18, 0xb3, 0x02, 0x9b,
+ 0xab, 0xc3, 0x27, 0xc3, 0x08, 0x18, 0xe1, 0xc2, 0x04, 0x30, 0x08, 0x19,
+ 0x69, 0xcc, 0x8a, 0xcc, 0x08, 0x19, 0x70, 0xc3, 0x05, 0x17, 0x08, 0x19,
+ 0x01, 0x42, 0x02, 0xf8, 0xc2, 0x9b, 0xaf, 0xc3, 0x03, 0x01, 0x08, 0x19,
+ 0x10, 0x83, 0x00, 0xe2, 0xf8, 0x99, 0x00, 0xe3, 0x19, 0x8f, 0x00, 0xe3,
+ 0x11, 0x8c, 0x00, 0xe3, 0x09, 0x8d, 0x00, 0xe3, 0x00, 0xc7, 0x50, 0x41,
+ 0x01, 0x5d, 0xd1, 0xd1, 0x50, 0x37, 0x01, 0x5d, 0xd8, 0x90, 0x08, 0x25,
+ 0x90, 0xc3, 0x1c, 0x4f, 0x08, 0x25, 0xb1, 0xc2, 0x06, 0x6b, 0x08, 0x25,
+ 0xe9, 0xc2, 0x07, 0x44, 0x08, 0x26, 0x29, 0x16, 0x42, 0x9b, 0xbb, 0x83,
+ 0x08, 0x26, 0x51, 0xc2, 0x01, 0x0e, 0x08, 0x26, 0x60, 0x90, 0x08, 0x26,
+ 0xd0, 0xc3, 0x1c, 0x4f, 0x08, 0x26, 0xf1, 0xc2, 0x06, 0x6b, 0x08, 0x27,
+ 0x29, 0xc2, 0x07, 0x44, 0x08, 0x27, 0x69, 0x16, 0x42, 0x9b, 0xc5, 0x83,
+ 0x08, 0x27, 0x91, 0xc2, 0x01, 0x0e, 0x08, 0x27, 0xa0, 0x0d, 0xc2, 0x9b,
+ 0xcf, 0xcb, 0x96, 0x35, 0x0e, 0x7d, 0x89, 0xc8, 0x49, 0x54, 0x0e, 0x7d,
+ 0x80, 0xc6, 0xd8, 0x62, 0x0e, 0x7a, 0x88, 0x0d, 0xc2, 0x9b, 0xdb, 0x16,
+ 0xc2, 0x9b, 0xe7, 0x44, 0xe6, 0x63, 0xc2, 0x9b, 0xf3, 0x49, 0x7a, 0xf3,
+ 0xc2, 0x9c, 0x00, 0xce, 0x67, 0x90, 0x0e, 0x7c, 0xb9, 0x12, 0xc2, 0x9c,
+ 0x0d, 0xce, 0x74, 0x68, 0x0e, 0x7c, 0x98, 0x00, 0x42, 0x9c, 0x17, 0x00,
+ 0x42, 0x9c, 0x2c, 0x42, 0x00, 0x27, 0xc2, 0x9c, 0x38, 0xc8, 0xb9, 0x33,
+ 0x0e, 0x7b, 0xf8, 0xcb, 0x88, 0xd5, 0x0e, 0x7b, 0xe1, 0xce, 0x67, 0x90,
+ 0x0e, 0x7b, 0xd9, 0xc8, 0x49, 0x54, 0x0e, 0x7b, 0xd1, 0xc8, 0xbf, 0x23,
+ 0x0e, 0x7b, 0xc8, 0x45, 0x49, 0x4f, 0xc2, 0x9c, 0x44, 0xce, 0x67, 0x90,
+ 0x0e, 0x7b, 0xb8, 0xc6, 0x6e, 0xd4, 0x0e, 0x7b, 0xa1, 0xca, 0x96, 0x36,
+ 0x0e, 0x7b, 0x98, 0xcc, 0x86, 0x40, 0x0e, 0x7d, 0x59, 0xc7, 0xcd, 0x41,
+ 0x0e, 0x7d, 0x51, 0xc3, 0xeb, 0x94, 0x0e, 0x7d, 0x48, 0xc8, 0xb8, 0xdb,
+ 0x0e, 0x79, 0x68, 0xc8, 0xbb, 0x2b, 0x0e, 0x79, 0xc8, 0xc9, 0x80, 0xca,
+ 0x0e, 0x78, 0xc1, 0x43, 0x00, 0xf7, 0x42, 0x9c, 0x50, 0xc5, 0x00, 0x47,
+ 0x0e, 0x78, 0x89, 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x28, 0xc7, 0x91, 0x73,
+ 0x0e, 0x79, 0xb3, 0x02, 0x9c, 0x5c, 0xc6, 0xd9, 0x76, 0x0e, 0x79, 0x30,
+ 0x15, 0xc2, 0x9c, 0x62, 0x43, 0x00, 0xf7, 0x42, 0x9c, 0x6e, 0xc3, 0xeb,
+ 0x7f, 0x0e, 0x79, 0x51, 0xc2, 0x00, 0x36, 0x0e, 0x79, 0x00, 0x43, 0x00,
+ 0xf7, 0xc2, 0x9c, 0x7a, 0x4d, 0x80, 0xca, 0x42, 0x9c, 0x86, 0xc6, 0x43,
+ 0xc0, 0x0e, 0x78, 0xf1, 0x42, 0x05, 0x27, 0x42, 0x9c, 0x92, 0xc5, 0x00,
+ 0x47, 0x0e, 0x78, 0x91, 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x30, 0xc6, 0x80,
+ 0xcd, 0x0e, 0x78, 0xe9, 0x4b, 0x91, 0xc8, 0x42, 0x9c, 0x9e, 0xc5, 0x00,
+ 0x47, 0x0e, 0x78, 0xa1, 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x40, 0xc5, 0x00,
+ 0x47, 0x0e, 0x78, 0x81, 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x20, 0xc5, 0x00,
+ 0x47, 0x0e, 0x78, 0x69, 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x08, 0xce, 0x1e,
+ 0x30, 0x08, 0xd1, 0xb0, 0xc3, 0x0d, 0x93, 0x05, 0x4e, 0x53, 0x02, 0x9c,
+ 0xaa, 0xc4, 0xe9, 0xbb, 0x05, 0x4e, 0x18, 0xc6, 0xd8, 0x26, 0x05, 0x4e,
+ 0x39, 0xc6, 0x41, 0xf2, 0x05, 0x4e, 0x60, 0x17, 0xc2, 0x9c, 0xb0, 0xc5,
+ 0xdc, 0x02, 0x05, 0x4e, 0x40, 0xc6, 0xd4, 0xea, 0x05, 0x4c, 0x98, 0x42,
+ 0x00, 0xf9, 0x42, 0x9c, 0xbc, 0xc6, 0xd5, 0x92, 0x05, 0x4d, 0x60, 0xc6,
+ 0xd4, 0xea, 0x05, 0x4d, 0x40, 0x00, 0x42, 0x9c, 0xc8, 0x83, 0x05, 0x4d,
+ 0x23, 0x02, 0x9c, 0xd4, 0xc2, 0x1a, 0x36, 0x05, 0x4c, 0xd3, 0x02, 0x9c,
+ 0xda, 0xc2, 0x07, 0x69, 0x05, 0x4c, 0xa2, 0x02, 0x9c, 0xe0, 0x83, 0x05,
+ 0x4d, 0x13, 0x02, 0x9c, 0xe9, 0xc2, 0x0c, 0x25, 0x05, 0x4c, 0xea, 0x02,
+ 0x9c, 0xef, 0x83, 0x05, 0x4d, 0x03, 0x02, 0x9c, 0xf5, 0xc2, 0x00, 0x44,
+ 0x05, 0x4c, 0xda, 0x02, 0x9c, 0xfb, 0xca, 0x65, 0x78, 0x05, 0x4c, 0xc8,
+ 0xc6, 0xd4, 0xea, 0x05, 0x4c, 0xb0, 0x00, 0x42, 0x9d, 0x01, 0x8b, 0x05,
+ 0x4c, 0x68, 0x8b, 0x05, 0x4c, 0x39, 0xc5, 0xdb, 0x8f, 0x05, 0x4c, 0x28,
+ 0xc4, 0x02, 0xb5, 0x05, 0x4d, 0xd1, 0xc4, 0xd7, 0x77, 0x05, 0x4d, 0xa0,
+ 0xcf, 0x64, 0x65, 0x01, 0x2c, 0xf2, 0x02, 0x9d, 0x0d, 0x45, 0x00, 0x39,
+ 0x42, 0x9d, 0x13, 0x97, 0x05, 0x22, 0xdb, 0x02, 0x9d, 0x1f, 0x91, 0x05,
+ 0x22, 0xbb, 0x02, 0x9d, 0x32, 0x8b, 0x05, 0x22, 0x62, 0x02, 0x9d, 0x3e,
+ 0x9b, 0x05, 0x22, 0x33, 0x02, 0x9d, 0x51, 0x97, 0x05, 0x22, 0x03, 0x02,
+ 0x9d, 0x64, 0x91, 0x05, 0x21, 0xeb, 0x02, 0x9d, 0x7a, 0x8b, 0x05, 0x21,
+ 0x9a, 0x02, 0x9d, 0x86, 0x9b, 0x05, 0x1d, 0x3b, 0x02, 0x9d, 0x99, 0x97,
+ 0x05, 0x1d, 0x0b, 0x02, 0x9d, 0xac, 0x87, 0x05, 0x1c, 0xeb, 0x02, 0x9d,
+ 0xbf, 0x91, 0x05, 0x1c, 0xcb, 0x02, 0x9d, 0xcb, 0x83, 0x05, 0x1c, 0xb2,
+ 0x02, 0x9d, 0xd3, 0xc2, 0x05, 0x4a, 0x05, 0x12, 0xf3, 0x02, 0x9d, 0xdf,
+ 0x83, 0x05, 0x13, 0x13, 0x02, 0x9d, 0xe7, 0xc2, 0x00, 0xf1, 0x05, 0x13,
+ 0x33, 0x02, 0x9d, 0xf3, 0x91, 0x05, 0x13, 0x4b, 0x02, 0x9d, 0xfb, 0x87,
+ 0x05, 0x13, 0x62, 0x02, 0x9e, 0x07, 0x8b, 0x05, 0x17, 0x7b, 0x02, 0x9e,
+ 0x0f, 0x83, 0x05, 0x17, 0xb3, 0x02, 0x9e, 0x22, 0x97, 0x05, 0x17, 0xfb,
+ 0x02, 0x9e, 0x2e, 0x11, 0xc2, 0x9e, 0x44, 0x87, 0x05, 0x17, 0xeb, 0x02,
+ 0x9e, 0x4c, 0x9b, 0x05, 0x18, 0x2a, 0x02, 0x9e, 0x50, 0x8b, 0x05, 0x03,
+ 0xc3, 0x02, 0x9e, 0x63, 0x83, 0x05, 0x03, 0xfb, 0x02, 0x9e, 0x76, 0x91,
+ 0x05, 0x04, 0x1b, 0x02, 0x9e, 0x82, 0x97, 0x05, 0x04, 0x3b, 0x02, 0x9e,
+ 0x8e, 0x9b, 0x05, 0x04, 0x6a, 0x02, 0x9e, 0xa1, 0x8b, 0x05, 0x0a, 0x9b,
+ 0x02, 0x9e, 0xb4, 0x83, 0x05, 0x0a, 0xcb, 0x02, 0x9e, 0xc7, 0x91, 0x05,
+ 0x0a, 0xeb, 0x02, 0x9e, 0xd3, 0x87, 0x05, 0x0b, 0x03, 0x02, 0x9e, 0xdf,
+ 0x97, 0x05, 0x0b, 0x22, 0x02, 0x9e, 0xe7, 0x96, 0x05, 0x0b, 0xe9, 0x9a,
+ 0x05, 0x0b, 0xf1, 0x92, 0x05, 0x0c, 0x01, 0x87, 0x05, 0x0c, 0x12, 0x02,
+ 0x9e, 0xfa, 0x9a, 0x05, 0x0c, 0x21, 0x92, 0x05, 0x0c, 0x30, 0x91, 0x05,
+ 0x0c, 0x43, 0x02, 0x9f, 0x02, 0x96, 0x05, 0x0c, 0x89, 0x9a, 0x05, 0x0c,
+ 0x91, 0x92, 0x05, 0x0c, 0xa1, 0x94, 0x05, 0x0c, 0xb2, 0x02, 0x9f, 0x0a,
+ 0x96, 0x05, 0x0c, 0x51, 0x9a, 0x05, 0x0c, 0x59, 0x92, 0x05, 0x0c, 0x68,
+ 0x9a, 0x05, 0x0c, 0x71, 0x92, 0x05, 0x0c, 0x80, 0x9b, 0x05, 0x21, 0x7b,
+ 0x02, 0x9f, 0x0e, 0x97, 0x05, 0x21, 0x4b, 0x02, 0x9f, 0x1a, 0x91, 0x05,
+ 0x21, 0x2b, 0x02, 0x9f, 0x34, 0x8b, 0x05, 0x20, 0xd2, 0x02, 0x9f, 0x40,
+ 0x94, 0x05, 0x1f, 0xdb, 0x02, 0x9f, 0x53, 0x92, 0x05, 0x1f, 0xc9, 0x9a,
+ 0x05, 0x1f, 0xb9, 0x96, 0x05, 0x1f, 0xb0, 0x94, 0x05, 0x1f, 0xab, 0x02,
+ 0x9f, 0x57, 0x92, 0x05, 0x1f, 0x99, 0x9a, 0x05, 0x1f, 0x89, 0x96, 0x05,
+ 0x1f, 0x81, 0x91, 0x05, 0x1f, 0x52, 0x02, 0x9f, 0x5b, 0x92, 0x05, 0x1f,
+ 0x79, 0x9a, 0x05, 0x1f, 0x69, 0x96, 0x05, 0x1f, 0x60, 0x87, 0x05, 0x1f,
+ 0x33, 0x02, 0x9f, 0x67, 0x92, 0x05, 0x1f, 0x19, 0x9a, 0x05, 0x1f, 0x09,
+ 0x96, 0x05, 0x1f, 0x00, 0x94, 0x05, 0x20, 0xbb, 0x02, 0x9f, 0x73, 0x92,
+ 0x05, 0x20, 0xa9, 0x9a, 0x05, 0x20, 0x99, 0x96, 0x05, 0x20, 0x90, 0x94,
+ 0x05, 0x20, 0x8b, 0x02, 0x9f, 0x77, 0x92, 0x05, 0x20, 0x79, 0x9a, 0x05,
+ 0x20, 0x69, 0x96, 0x05, 0x20, 0x61, 0x91, 0x05, 0x20, 0x32, 0x02, 0x9f,
+ 0x7b, 0x92, 0x05, 0x20, 0x59, 0x9a, 0x05, 0x20, 0x49, 0x96, 0x05, 0x20,
+ 0x40, 0x87, 0x05, 0x20, 0x13, 0x02, 0x9f, 0x87, 0x92, 0x05, 0x1f, 0xf9,
+ 0x9a, 0x05, 0x1f, 0xe9, 0x96, 0x05, 0x1f, 0xe0, 0x94, 0x05, 0x1e, 0xfb,
+ 0x02, 0x9f, 0x93, 0x92, 0x05, 0x1e, 0xe9, 0x9a, 0x05, 0x1e, 0xd9, 0x96,
+ 0x05, 0x1e, 0xd0, 0x94, 0x05, 0x1e, 0xcb, 0x02, 0x9f, 0x97, 0x92, 0x05,
+ 0x1e, 0xb9, 0x9a, 0x05, 0x1e, 0xa9, 0x96, 0x05, 0x1e, 0xa1, 0x91, 0x05,
+ 0x1e, 0x5a, 0x02, 0x9f, 0x9b, 0x92, 0x05, 0x1e, 0x99, 0x9a, 0x05, 0x1e,
+ 0x88, 0x92, 0x05, 0x1e, 0x81, 0x9a, 0x05, 0x1e, 0x71, 0x96, 0x05, 0x1e,
+ 0x68, 0x92, 0x05, 0x1e, 0x49, 0x9a, 0x05, 0x1e, 0x39, 0x96, 0x05, 0x1e,
+ 0x30, 0x9b, 0x05, 0x1c, 0x83, 0x02, 0x9f, 0xa3, 0x97, 0x05, 0x1c, 0x53,
+ 0x02, 0x9f, 0xb6, 0x87, 0x05, 0x1c, 0x33, 0x02, 0x9f, 0xd0, 0x91, 0x05,
+ 0x1c, 0x13, 0x02, 0x9f, 0xdc, 0x83, 0x05, 0x1b, 0xea, 0x02, 0x9f, 0xe8,
+ 0x9b, 0x05, 0x1e, 0x13, 0x02, 0x9f, 0xec, 0x97, 0x05, 0x1d, 0xe3, 0x02,
+ 0x9f, 0xff, 0x87, 0x05, 0x1d, 0xc3, 0x02, 0xa0, 0x19, 0x91, 0x05, 0x1d,
+ 0xa3, 0x02, 0xa0, 0x25, 0x83, 0x05, 0x1d, 0x6a, 0x02, 0xa0, 0x31, 0x9b,
+ 0x05, 0x1a, 0x13, 0x02, 0xa0, 0x3d, 0x8b, 0x05, 0x19, 0x63, 0x02, 0xa0,
+ 0x50, 0x83, 0x05, 0x19, 0x9b, 0x02, 0xa0, 0x63, 0x91, 0x05, 0x19, 0xbb,
+ 0x02, 0xa0, 0x6f, 0x87, 0x05, 0x19, 0xd3, 0x02, 0xa0, 0x7b, 0x97, 0x05,
+ 0x19, 0xf2, 0x02, 0xa0, 0x83, 0x96, 0x05, 0x18, 0x49, 0x9a, 0x05, 0x18,
+ 0x51, 0x92, 0x05, 0x18, 0x61, 0x87, 0x05, 0x18, 0x72, 0x02, 0xa0, 0x8f,
+ 0x96, 0x05, 0x18, 0x81, 0x9a, 0x05, 0x18, 0x89, 0x92, 0x05, 0x18, 0x98,
+ 0x91, 0x05, 0x18, 0xab, 0x02, 0xa0, 0x97, 0x96, 0x05, 0x18, 0xf1, 0x9a,
+ 0x05, 0x18, 0xf9, 0x92, 0x05, 0x19, 0x09, 0x94, 0x05, 0x19, 0x1a, 0x02,
+ 0xa0, 0x9f, 0x96, 0x05, 0x18, 0xb9, 0x9a, 0x05, 0x18, 0xc1, 0x92, 0x05,
+ 0x18, 0xd0, 0x9a, 0x05, 0x18, 0xd9, 0x92, 0x05, 0x18, 0xe8, 0x96, 0x05,
+ 0x19, 0x21, 0x9a, 0x05, 0x19, 0x29, 0x92, 0x05, 0x19, 0x39, 0x94, 0x05,
+ 0x19, 0x4a, 0x02, 0xa0, 0xa3, 0x9b, 0x05, 0x1b, 0xc3, 0x02, 0xa0, 0xa7,
+ 0x97, 0x05, 0x1b, 0x93, 0x02, 0xa0, 0xba, 0x87, 0x05, 0x1b, 0x7b, 0x02,
+ 0xa0, 0xd0, 0x91, 0x05, 0x1b, 0x5b, 0x02, 0xa0, 0xdc, 0x83, 0x05, 0x1b,
+ 0x1a, 0x02, 0xa0, 0xe8, 0x94, 0x05, 0x16, 0x7b, 0x02, 0xa0, 0xf4, 0x96,
+ 0x05, 0x16, 0x51, 0x9a, 0x05, 0x16, 0x59, 0x92, 0x05, 0x16, 0x68, 0x92,
+ 0x05, 0x16, 0x19, 0x9a, 0x05, 0x16, 0x08, 0x96, 0x05, 0x16, 0x21, 0x9a,
+ 0x05, 0x16, 0x29, 0x92, 0x05, 0x16, 0x39, 0x94, 0x05, 0x16, 0x4b, 0x02,
+ 0xa0, 0xf8, 0x91, 0x05, 0x15, 0xda, 0x02, 0xa0, 0xfc, 0x96, 0x05, 0x15,
+ 0x71, 0x9a, 0x05, 0x15, 0x79, 0x92, 0x05, 0x15, 0x89, 0x87, 0x05, 0x15,
+ 0xa2, 0x02, 0xa1, 0x04, 0x96, 0x05, 0x15, 0xb1, 0x9a, 0x05, 0x15, 0xb9,
+ 0x92, 0x05, 0x15, 0xc8, 0x96, 0x05, 0x15, 0xe9, 0x9a, 0x05, 0x15, 0xf1,
+ 0x92, 0x05, 0x16, 0x00, 0x9a, 0x05, 0x14, 0xf9, 0x92, 0x05, 0x15, 0x08,
+ 0x92, 0x05, 0x14, 0xf1, 0x9a, 0x05, 0x14, 0xe1, 0x96, 0x05, 0x14, 0xd8,
+ 0x91, 0x05, 0x14, 0xcb, 0x02, 0xa1, 0x10, 0x96, 0x05, 0x15, 0x11, 0x9a,
+ 0x05, 0x15, 0x19, 0x92, 0x05, 0x15, 0x29, 0x94, 0x05, 0x15, 0x3a, 0x02,
+ 0xa1, 0x18, 0x92, 0x05, 0x14, 0xb9, 0x9a, 0x05, 0x14, 0xa9, 0x96, 0x05,
+ 0x14, 0xa0, 0x87, 0x05, 0x14, 0x93, 0x02, 0xa1, 0x1c, 0x92, 0x05, 0x14,
+ 0x81, 0x9a, 0x05, 0x14, 0x71, 0x96, 0x05, 0x14, 0x68, 0x91, 0x05, 0x16,
+ 0xeb, 0x02, 0xa1, 0x24, 0x83, 0x05, 0x16, 0xd3, 0x02, 0xa1, 0x2c, 0x8b,
+ 0x05, 0x16, 0x93, 0x02, 0xa1, 0x38, 0x87, 0x05, 0x17, 0x03, 0x02, 0xa1,
+ 0x4b, 0x97, 0x05, 0x17, 0x1b, 0x02, 0xa1, 0x53, 0x9b, 0x05, 0x17, 0x4a,
+ 0x02, 0xa1, 0x62, 0x9b, 0x05, 0x1a, 0xeb, 0x02, 0xa1, 0x75, 0x97, 0x05,
+ 0x1a, 0xbb, 0x02, 0xa1, 0x88, 0x87, 0x05, 0x1a, 0x9b, 0x02, 0xa1, 0xa2,
+ 0x91, 0x05, 0x1a, 0x7b, 0x02, 0xa1, 0xae, 0x83, 0x05, 0x1a, 0x42, 0x02,
+ 0xa1, 0xba, 0x96, 0x05, 0x15, 0x41, 0x9a, 0x05, 0x15, 0x49, 0x92, 0x05,
+ 0x15, 0x59, 0x94, 0x05, 0x15, 0x6a, 0x02, 0xa1, 0xc6, 0x92, 0x05, 0x14,
+ 0x61, 0x9a, 0x05, 0x14, 0x50, 0x92, 0x05, 0x14, 0x49, 0x9a, 0x05, 0x14,
+ 0x38, 0x91, 0x05, 0x14, 0x2a, 0x02, 0xa1, 0xca, 0x92, 0x05, 0x14, 0x19,
+ 0x9a, 0x05, 0x14, 0x09, 0x96, 0x05, 0x14, 0x00, 0x92, 0x05, 0x13, 0xf9,
+ 0x9a, 0x05, 0x13, 0xe8, 0x87, 0x05, 0x12, 0xdb, 0x02, 0xa1, 0xd2, 0x91,
+ 0x05, 0x12, 0xc3, 0x02, 0xa1, 0xda, 0xc2, 0x00, 0xf1, 0x05, 0x12, 0xa3,
+ 0x02, 0xa1, 0xe6, 0x83, 0x05, 0x12, 0x83, 0x02, 0xa1, 0xf2, 0x8b, 0x05,
+ 0x12, 0x42, 0x02, 0xa1, 0xfe, 0x96, 0x05, 0x13, 0x71, 0x87, 0x05, 0x13,
+ 0x82, 0x02, 0xa2, 0x11, 0x96, 0x05, 0x13, 0x89, 0x9a, 0x05, 0x13, 0x91,
+ 0x92, 0x05, 0x13, 0xa0, 0x96, 0x05, 0x13, 0xa9, 0x9a, 0x05, 0x13, 0xb1,
+ 0x92, 0x05, 0x13, 0xc0, 0x96, 0x05, 0x13, 0xc9, 0x9a, 0x05, 0x13, 0xd1,
+ 0x92, 0x05, 0x13, 0xe0, 0x8b, 0x05, 0x04, 0x9b, 0x02, 0xa2, 0x15, 0x83,
+ 0x05, 0x04, 0xd3, 0x02, 0xa2, 0x28, 0x97, 0x05, 0x05, 0x2b, 0x02, 0xa2,
+ 0x34, 0x91, 0x05, 0x05, 0x0b, 0x02, 0xa2, 0x4e, 0x9b, 0x05, 0x05, 0x52,
+ 0x02, 0xa2, 0x5a, 0x8b, 0x05, 0x0b, 0x53, 0x02, 0xa2, 0x69, 0x83, 0x05,
+ 0x0b, 0x93, 0x02, 0xa2, 0x7c, 0x17, 0xc2, 0xa2, 0x88, 0x11, 0xc2, 0xa2,
+ 0x93, 0x87, 0x05, 0x0b, 0xd2, 0x02, 0xa2, 0x9f, 0x8b, 0x05, 0x0c, 0xcb,
+ 0x02, 0xa2, 0xa7, 0x83, 0x05, 0x0d, 0x03, 0x02, 0xa2, 0xba, 0x97, 0x05,
+ 0x0d, 0x6b, 0x02, 0xa2, 0xc6, 0x91, 0x05, 0x0d, 0x33, 0x02, 0xa2, 0xe0,
+ 0x87, 0x05, 0x0d, 0x4b, 0x02, 0xa2, 0xe8, 0x9b, 0x05, 0x0d, 0x9a, 0x02,
+ 0xa2, 0xf0, 0x87, 0x05, 0x23, 0xbb, 0x02, 0xa3, 0x03, 0x92, 0x05, 0x23,
+ 0xa1, 0x9a, 0x05, 0x23, 0x91, 0x96, 0x05, 0x23, 0x88, 0x91, 0x05, 0x23,
+ 0xdb, 0x02, 0xa3, 0x0f, 0x96, 0x05, 0x24, 0x09, 0x9a, 0x05, 0x24, 0x11,
+ 0x92, 0x05, 0x24, 0x21, 0x94, 0x05, 0x24, 0x32, 0x02, 0xa3, 0x1b, 0x96,
+ 0x05, 0x23, 0xe9, 0x9a, 0x05, 0x23, 0xf1, 0x92, 0x05, 0x24, 0x00, 0x96,
+ 0x05, 0x24, 0x39, 0x9a, 0x05, 0x24, 0x41, 0x92, 0x05, 0x24, 0x51, 0x94,
+ 0x05, 0x24, 0x62, 0x02, 0xa3, 0x1f, 0x94, 0x05, 0x23, 0x83, 0x02, 0xa3,
+ 0x23, 0x92, 0x05, 0x23, 0x71, 0x9a, 0x05, 0x23, 0x61, 0x96, 0x05, 0x23,
+ 0x58, 0x96, 0x05, 0x22, 0xe9, 0x9a, 0x05, 0x22, 0xf1, 0x92, 0x05, 0x23,
+ 0x01, 0x87, 0x05, 0x23, 0x1a, 0x02, 0xa3, 0x27, 0x9a, 0x05, 0x23, 0x41,
+ 0x92, 0x05, 0x23, 0x51, 0x96, 0x05, 0x23, 0x38, 0x9a, 0x05, 0x23, 0x28,
+ 0x97, 0x05, 0x12, 0x13, 0x02, 0xa3, 0x33, 0xc2, 0x05, 0x4a, 0x05, 0x11,
+ 0x8b, 0x02, 0xa3, 0x4d, 0x83, 0x05, 0x11, 0xa3, 0x02, 0xa3, 0x51, 0x91,
+ 0x05, 0x11, 0xdb, 0x02, 0xa3, 0x5d, 0x87, 0x05, 0x11, 0xf2, 0x02, 0xa3,
+ 0x69, 0x96, 0x05, 0x05, 0x71, 0x9a, 0x05, 0x05, 0x79, 0x92, 0x05, 0x05,
+ 0x89, 0x87, 0x05, 0x05, 0x9a, 0x02, 0xa3, 0x71, 0x96, 0x05, 0x05, 0xa9,
+ 0x9a, 0x05, 0x05, 0xb1, 0x92, 0x05, 0x05, 0xc0, 0x91, 0x05, 0x05, 0xdb,
+ 0x02, 0xa3, 0x79, 0x96, 0x05, 0x06, 0x19, 0x9a, 0x05, 0x06, 0x21, 0x92,
+ 0x05, 0x06, 0x31, 0x94, 0x05, 0x06, 0x42, 0x02, 0xa3, 0x85, 0x96, 0x05,
+ 0x05, 0xe9, 0x9a, 0x05, 0x05, 0xf1, 0x92, 0x05, 0x06, 0x00, 0x9a, 0x05,
+ 0x06, 0x08, 0x96, 0x05, 0x06, 0x49, 0x9a, 0x05, 0x06, 0x51, 0x92, 0x05,
+ 0x06, 0x60, 0xcc, 0x1b, 0x7c, 0x05, 0x00, 0xa8, 0x96, 0x05, 0x00, 0x21,
+ 0x9a, 0x05, 0x00, 0x29, 0x92, 0x05, 0x00, 0x38, 0x96, 0x05, 0x00, 0xb1,
+ 0x9a, 0x05, 0x00, 0xb9, 0x92, 0x05, 0x00, 0xc9, 0x87, 0x05, 0x00, 0xe2,
+ 0x02, 0xa3, 0x89, 0x96, 0x05, 0x00, 0xf1, 0x9a, 0x05, 0x00, 0xf9, 0x92,
+ 0x05, 0x01, 0x08, 0x91, 0x05, 0x01, 0x1b, 0x02, 0xa3, 0x95, 0x96, 0x05,
+ 0x01, 0x61, 0x9a, 0x05, 0x01, 0x69, 0x92, 0x05, 0x01, 0x79, 0x94, 0x05,
+ 0x01, 0x8a, 0x02, 0xa3, 0x9d, 0x96, 0x05, 0x01, 0x29, 0x9a, 0x05, 0x01,
+ 0x31, 0x92, 0x05, 0x01, 0x40, 0x9a, 0x05, 0x01, 0x49, 0x92, 0x05, 0x01,
+ 0x58, 0x96, 0x05, 0x01, 0x91, 0x9a, 0x05, 0x01, 0x99, 0x92, 0x05, 0x01,
+ 0xa9, 0x94, 0x05, 0x01, 0xba, 0x02, 0xa3, 0xa1, 0x8b, 0x05, 0x02, 0xc3,
+ 0x02, 0xa3, 0xa5, 0x83, 0x05, 0x03, 0x03, 0x02, 0xa3, 0xb8, 0x97, 0x05,
+ 0x03, 0x73, 0x02, 0xa3, 0xc4, 0x91, 0x05, 0x03, 0x3b, 0x02, 0xa3, 0xde,
+ 0x87, 0x05, 0x03, 0x53, 0x02, 0xa3, 0xea, 0x9b, 0x05, 0x03, 0xa2, 0x02,
+ 0xa3, 0xf2, 0x96, 0x05, 0x01, 0xc1, 0x9a, 0x05, 0x01, 0xc9, 0x92, 0x05,
+ 0x01, 0xd9, 0x87, 0x05, 0x01, 0xea, 0x02, 0xa3, 0xfe, 0x96, 0x05, 0x01,
+ 0xf9, 0x9a, 0x05, 0x02, 0x01, 0x92, 0x05, 0x02, 0x10, 0x91, 0x05, 0x02,
+ 0x23, 0x02, 0xa4, 0x06, 0x96, 0x05, 0x02, 0x51, 0x9a, 0x05, 0x02, 0x59,
+ 0x92, 0x05, 0x02, 0x69, 0x94, 0x05, 0x02, 0x7a, 0x02, 0xa4, 0x0e, 0x96,
+ 0x05, 0x02, 0x31, 0x9a, 0x05, 0x02, 0x39, 0x92, 0x05, 0x02, 0x48, 0x96,
+ 0x05, 0x02, 0x81, 0x9a, 0x05, 0x02, 0x89, 0x92, 0x05, 0x02, 0x99, 0x94,
+ 0x05, 0x02, 0xaa, 0x02, 0xa4, 0x12, 0x96, 0x05, 0x06, 0x69, 0x9a, 0x05,
+ 0x06, 0x71, 0x92, 0x05, 0x06, 0x80, 0x96, 0x05, 0x06, 0x89, 0x9a, 0x05,
+ 0x06, 0x91, 0x92, 0x05, 0x06, 0xa0, 0x9a, 0x05, 0x06, 0xa9, 0x92, 0x05,
+ 0x06, 0xb8, 0x96, 0x05, 0x06, 0xc1, 0x9a, 0x05, 0x06, 0xc9, 0x92, 0x05,
+ 0x06, 0xd9, 0x94, 0x05, 0x06, 0xea, 0x02, 0xa4, 0x16, 0x96, 0x05, 0x06,
+ 0xf1, 0x9a, 0x05, 0x06, 0xf9, 0x92, 0x05, 0x07, 0x08, 0x96, 0x05, 0x07,
+ 0x11, 0x9a, 0x05, 0x07, 0x19, 0x92, 0x05, 0x07, 0x29, 0x87, 0x05, 0x07,
+ 0x42, 0x02, 0xa4, 0x1a, 0x96, 0x05, 0x07, 0x51, 0x9a, 0x05, 0x07, 0x59,
+ 0x92, 0x05, 0x07, 0x68, 0x96, 0x05, 0x07, 0x71, 0x9a, 0x05, 0x07, 0x79,
+ 0x92, 0x05, 0x07, 0x88, 0x9a, 0x05, 0x07, 0x91, 0x92, 0x05, 0x07, 0x98,
+ 0x96, 0x05, 0x07, 0xa1, 0x9a, 0x05, 0x07, 0xa9, 0x92, 0x05, 0x07, 0xb9,
+ 0x94, 0x05, 0x07, 0xca, 0x02, 0xa4, 0x26, 0x96, 0x05, 0x07, 0xd1, 0x9a,
+ 0x05, 0x07, 0xd9, 0x92, 0x05, 0x07, 0xe9, 0x94, 0x05, 0x07, 0xfa, 0x02,
+ 0xa4, 0x2a, 0x96, 0x05, 0x08, 0x01, 0x9a, 0x05, 0x08, 0x09, 0x92, 0x05,
+ 0x08, 0x19, 0x87, 0x05, 0x08, 0x2a, 0x02, 0xa4, 0x2e, 0x96, 0x05, 0x08,
+ 0x39, 0x9a, 0x05, 0x08, 0x41, 0x92, 0x05, 0x08, 0x50, 0x91, 0x05, 0x08,
+ 0x63, 0x02, 0xa4, 0x36, 0x96, 0x05, 0x08, 0xa1, 0x9a, 0x05, 0x08, 0xa9,
+ 0x92, 0x05, 0x08, 0xb9, 0x94, 0x05, 0x08, 0xca, 0x02, 0xa4, 0x3a, 0x96,
+ 0x05, 0x08, 0x69, 0x9a, 0x05, 0x08, 0x71, 0x92, 0x05, 0x08, 0x80, 0x9a,
+ 0x05, 0x08, 0x89, 0x92, 0x05, 0x08, 0x98, 0x8b, 0x05, 0x09, 0xc3, 0x02,
+ 0xa4, 0x3e, 0x83, 0x05, 0x09, 0xfb, 0x02, 0xa4, 0x51, 0x97, 0x05, 0x0a,
+ 0x6b, 0x02, 0xa4, 0x5d, 0x91, 0x05, 0x0a, 0x33, 0x02, 0xa4, 0x77, 0x87,
+ 0x05, 0x0a, 0x4a, 0x02, 0xa4, 0x83, 0x96, 0x05, 0x08, 0xd1, 0x9a, 0x05,
+ 0x08, 0xd9, 0x92, 0x05, 0x08, 0xe9, 0x87, 0x05, 0x08, 0xfa, 0x02, 0xa4,
+ 0x8b, 0x96, 0x05, 0x09, 0x09, 0x9a, 0x05, 0x09, 0x11, 0x92, 0x05, 0x09,
+ 0x20, 0x91, 0x05, 0x09, 0x3b, 0x02, 0xa4, 0x93, 0x96, 0x05, 0x09, 0x81,
+ 0x9a, 0x05, 0x09, 0x89, 0x92, 0x05, 0x09, 0x99, 0x94, 0x05, 0x09, 0xaa,
+ 0x02, 0xa4, 0x9f, 0x96, 0x05, 0x09, 0x49, 0x9a, 0x05, 0x09, 0x51, 0x92,
+ 0x05, 0x09, 0x60, 0x9a, 0x05, 0x09, 0x69, 0x92, 0x05, 0x09, 0x78, 0x96,
+ 0x05, 0x0d, 0xb9, 0x9a, 0x05, 0x0d, 0xc1, 0x92, 0x05, 0x0d, 0xd1, 0x87,
+ 0x05, 0x0d, 0xea, 0x02, 0xa4, 0xa3, 0x96, 0x05, 0x0d, 0xf9, 0x9a, 0x05,
+ 0x0e, 0x01, 0x92, 0x05, 0x0e, 0x10, 0x91, 0x05, 0x0e, 0x2b, 0x02, 0xa4,
+ 0xaf, 0x96, 0x05, 0x0e, 0x71, 0x9a, 0x05, 0x0e, 0x79, 0x92, 0x05, 0x0e,
+ 0x89, 0x94, 0x05, 0x0e, 0x9a, 0x02, 0xa4, 0xbb, 0x96, 0x05, 0x0e, 0x39,
+ 0x9a, 0x05, 0x0e, 0x41, 0x92, 0x05, 0x0e, 0x50, 0x9a, 0x05, 0x0e, 0x59,
+ 0x92, 0x05, 0x0e, 0x68, 0x96, 0x05, 0x0e, 0xa1, 0x9a, 0x05, 0x0e, 0xa9,
+ 0x92, 0x05, 0x0e, 0xb9, 0x94, 0x05, 0x0e, 0xca, 0x02, 0xa4, 0xbf, 0x96,
+ 0x05, 0x0e, 0xd1, 0x9a, 0x05, 0x0e, 0xd9, 0x92, 0x05, 0x0e, 0xe9, 0x87,
+ 0x05, 0x0f, 0x02, 0x02, 0xa4, 0xc3, 0x96, 0x05, 0x0f, 0x11, 0x9a, 0x05,
+ 0x0f, 0x19, 0x92, 0x05, 0x0f, 0x28, 0x91, 0x05, 0x0f, 0x43, 0x02, 0xa4,
+ 0xcf, 0x96, 0x05, 0x0f, 0x91, 0x9a, 0x05, 0x0f, 0x99, 0x92, 0x05, 0x0f,
+ 0xa9, 0x94, 0x05, 0x0f, 0xba, 0x02, 0xa4, 0xdb, 0x96, 0x05, 0x0f, 0x51,
+ 0x9a, 0x05, 0x0f, 0x59, 0x92, 0x05, 0x0f, 0x68, 0x96, 0x05, 0x0f, 0x71,
+ 0x9a, 0x05, 0x0f, 0x79, 0x92, 0x05, 0x0f, 0x88, 0x8b, 0x05, 0x10, 0xb3,
+ 0x02, 0xa4, 0xdf, 0x83, 0x05, 0x10, 0xe3, 0x02, 0xa4, 0xee, 0x97, 0x05,
+ 0x11, 0x63, 0x02, 0xa4, 0xfa, 0x91, 0x05, 0x11, 0x23, 0x02, 0xa5, 0x14,
+ 0x87, 0x05, 0x11, 0x42, 0x02, 0xa5, 0x20, 0x96, 0x05, 0x0f, 0xc1, 0x9a,
+ 0x05, 0x0f, 0xc9, 0x92, 0x05, 0x0f, 0xd9, 0x87, 0x05, 0x0f, 0xea, 0x02,
+ 0xa5, 0x2c, 0x96, 0x05, 0x0f, 0xf9, 0x9a, 0x05, 0x10, 0x01, 0x92, 0x05,
+ 0x10, 0x10, 0x91, 0x05, 0x10, 0x23, 0x02, 0xa5, 0x34, 0x96, 0x05, 0x10,
+ 0x71, 0x9a, 0x05, 0x10, 0x79, 0x92, 0x05, 0x10, 0x89, 0x94, 0x05, 0x10,
+ 0x9a, 0x02, 0xa5, 0x3c, 0x96, 0x05, 0x10, 0x31, 0x9a, 0x05, 0x10, 0x39,
+ 0x92, 0x05, 0x10, 0x48, 0x96, 0x05, 0x10, 0x51, 0x9a, 0x05, 0x10, 0x59,
+ 0x92, 0x05, 0x10, 0x68, 0x87, 0x05, 0x25, 0xd8, 0xc2, 0x03, 0x1e, 0x05,
+ 0x24, 0x99, 0xc2, 0x00, 0x11, 0x05, 0x25, 0x38, 0x92, 0x05, 0x24, 0xa1,
+ 0x96, 0x05, 0x25, 0x18, 0x9b, 0x05, 0x25, 0x81, 0xc2, 0x00, 0xd3, 0x05,
+ 0x25, 0xd1, 0xc2, 0x00, 0x97, 0x05, 0x26, 0x01, 0xc2, 0x00, 0x11, 0x05,
+ 0x26, 0x10, 0xc2, 0x00, 0x11, 0x05, 0x24, 0xb1, 0xc2, 0x00, 0xf1, 0x05,
+ 0x25, 0x30, 0xc2, 0x01, 0x33, 0x05, 0x24, 0xc9, 0xc2, 0x00, 0xf1, 0x05,
+ 0x24, 0xf9, 0xc2, 0x00, 0x11, 0x05, 0x25, 0xf8, 0x92, 0x05, 0x25, 0x11,
+ 0x94, 0x05, 0x26, 0x08, 0xc2, 0x01, 0xc4, 0x05, 0x25, 0x51, 0x9b, 0x05,
+ 0x25, 0xa9, 0xc2, 0x05, 0x4a, 0x05, 0x25, 0xb8, 0x8e, 0x08, 0x74, 0x60,
+ 0xc3, 0x34, 0x6e, 0x08, 0x74, 0x41, 0xc2, 0x07, 0x6e, 0x08, 0x74, 0x38,
+ 0x44, 0xe5, 0x7f, 0x42, 0xa5, 0x40, 0x8b, 0x00, 0xa7, 0x70, 0x91, 0x00,
+ 0xa8, 0xeb, 0x02, 0xa5, 0x5e, 0x83, 0x00, 0xa9, 0x0b, 0x02, 0xa5, 0x66,
+ 0x8b, 0x00, 0xa8, 0xcb, 0x02, 0xa5, 0x6a, 0x87, 0x00, 0xa8, 0xb8, 0x9b,
+ 0x00, 0xc6, 0x09, 0x83, 0x00, 0xa8, 0xb0, 0x9b, 0x00, 0xc6, 0x01, 0x91,
+ 0x00, 0xa8, 0xa0, 0x8b, 0x00, 0xa8, 0x90, 0xc2, 0x08, 0x86, 0x00, 0xa4,
+ 0x29, 0xc2, 0x14, 0x94, 0x00, 0xa4, 0x31, 0xc2, 0x28, 0x39, 0x00, 0xa4,
+ 0x39, 0xc2, 0x00, 0x45, 0x00, 0xa4, 0x40, 0x83, 0x00, 0xa8, 0x10, 0x8b,
+ 0x00, 0xa7, 0xd0, 0x91, 0x00, 0xa7, 0xf0, 0x43, 0x69, 0x04, 0xc2, 0xa5,
+ 0x6e, 0x0a, 0x42, 0xa5, 0x83, 0xc4, 0xe4, 0xcf, 0x00, 0xa9, 0xe9, 0x19,
+ 0xc2, 0xa5, 0x98, 0x15, 0xc2, 0xa5, 0xa4, 0xc4, 0xe5, 0x43, 0x00, 0xa4,
+ 0x11, 0xc3, 0xec, 0x7b, 0x00, 0xa8, 0x59, 0xc4, 0xe7, 0x0f, 0x00, 0xa5,
+ 0x01, 0xc4, 0xea, 0x07, 0x00, 0xa5, 0xd1, 0xc4, 0xe8, 0x57, 0x00, 0xa6,
+ 0x79, 0xc4, 0xe5, 0x0f, 0x00, 0xa3, 0x28, 0x8b, 0x00, 0xa6, 0x08, 0x91,
+ 0x00, 0xc6, 0x60, 0x8b, 0x00, 0xc6, 0x40, 0x83, 0x00, 0xa6, 0x68, 0x83,
+ 0x00, 0xb3, 0xb0, 0x91, 0x00, 0xb3, 0xa0, 0x8b, 0x00, 0xb3, 0x90, 0x8b,
+ 0x00, 0xb3, 0x81, 0x83, 0x00, 0xac, 0xa2, 0x02, 0xa5, 0xc5, 0x91, 0x00,
+ 0xac, 0x90, 0x8b, 0x00, 0xac, 0x80, 0x83, 0x00, 0xab, 0xcb, 0x02, 0xa5,
+ 0xc9, 0x91, 0x00, 0xab, 0xbb, 0x02, 0xa5, 0xcd, 0x8b, 0x00, 0xab, 0xab,
+ 0x02, 0xa5, 0xd1, 0x87, 0x00, 0xab, 0xa0, 0x06, 0xc2, 0xa5, 0xd5, 0x0c,
+ 0xc2, 0xa5, 0xe5, 0x09, 0xc2, 0xa6, 0x06, 0x16, 0xc2, 0xa6, 0x28, 0x42,
+ 0x11, 0x3f, 0xc2, 0xa6, 0x38, 0x1b, 0xc2, 0xa6, 0x4f, 0x0f, 0xc2, 0xa6,
+ 0x66, 0x10, 0xc2, 0xa6, 0x7d, 0x0d, 0xc2, 0xa6, 0x98, 0x92, 0x00, 0xaf,
+ 0x73, 0x02, 0xa6, 0xa3, 0x8a, 0x00, 0xa2, 0x5b, 0x02, 0xa6, 0xba, 0x19,
+ 0xc2, 0xa6, 0xc8, 0x14, 0xc2, 0xa6, 0xdf, 0x0e, 0xc2, 0xa6, 0xf6, 0xc2,
+ 0x01, 0xe6, 0x00, 0xa0, 0x41, 0x8b, 0x00, 0xa0, 0x4b, 0x02, 0xa7, 0x11,
+ 0x9c, 0x00, 0xb2, 0x33, 0x02, 0xa7, 0x17, 0x15, 0x42, 0xa7, 0x2e, 0x8b,
+ 0x00, 0xa4, 0x50, 0x91, 0x00, 0xa4, 0xd0, 0x8b, 0x00, 0xa4, 0xb0, 0x83,
+ 0x00, 0xa4, 0xf0, 0x83, 0x00, 0xad, 0xb9, 0x91, 0x00, 0xad, 0xb1, 0x8b,
+ 0x00, 0xad, 0xa9, 0x87, 0x00, 0xad, 0xa0, 0x83, 0x00, 0xad, 0xf9, 0x91,
+ 0x00, 0xad, 0xf1, 0x8b, 0x00, 0xad, 0xe9, 0x87, 0x00, 0xad, 0xe0, 0x83,
+ 0x00, 0xad, 0xd9, 0x91, 0x00, 0xad, 0xd1, 0x8b, 0x00, 0xad, 0xc9, 0x87,
+ 0x00, 0xad, 0xc0, 0x91, 0x00, 0xc7, 0x48, 0x83, 0x00, 0xab, 0x73, 0x02,
+ 0xa7, 0x4c, 0x91, 0x00, 0xab, 0x6b, 0x02, 0xa7, 0x50, 0xc2, 0x00, 0xc9,
+ 0x00, 0xc7, 0x29, 0x8b, 0x00, 0xab, 0x61, 0x87, 0x00, 0xab, 0x58, 0x83,
+ 0x00, 0xc7, 0x23, 0x02, 0xa7, 0x54, 0x87, 0x00, 0xc7, 0x18, 0x83, 0x00,
+ 0xab, 0x38, 0x91, 0x00, 0xab, 0x28, 0x8b, 0x00, 0xab, 0x10, 0x8b, 0x00,
+ 0xa2, 0x68, 0x83, 0x00, 0xad, 0x68, 0x91, 0x00, 0xad, 0x58, 0x8b, 0x00,
+ 0xad, 0x48, 0x91, 0x00, 0xa2, 0xf8, 0x8b, 0x00, 0xa2, 0xd8, 0x83, 0x00,
+ 0xa3, 0x18, 0x47, 0xcc, 0x06, 0xc2, 0xa7, 0x58, 0xc6, 0xbd, 0xbd, 0x00,
+ 0xc6, 0xe8, 0x48, 0xbc, 0xe3, 0x42, 0xa7, 0x9f, 0x14, 0xc2, 0xa7, 0xae,
+ 0x15, 0xc2, 0xa7, 0xb8, 0xc5, 0x34, 0x21, 0x00, 0xa0, 0xf9, 0xc5, 0x1e,
+ 0x64, 0x00, 0xa1, 0x01, 0xd0, 0x60, 0xdf, 0x00, 0xa1, 0x09, 0xcd, 0x77,
+ 0x4b, 0x00, 0xa1, 0x11, 0x42, 0x00, 0x68, 0xc2, 0xa7, 0xc4, 0xca, 0x39,
+ 0x9c, 0x00, 0xa1, 0x39, 0xc4, 0x21, 0x5e, 0x00, 0xa1, 0x48, 0x8b, 0x00,
+ 0xaa, 0xa0, 0x8a, 0x00, 0xc6, 0xd8, 0x19, 0x42, 0xa7, 0xd0, 0x83, 0x00,
+ 0xaa, 0x70, 0x91, 0x00, 0xc6, 0x90, 0x8b, 0x00, 0xc6, 0x80, 0x8b, 0x00,
+ 0xaa, 0x28, 0x8b, 0x00, 0xa9, 0x38, 0x83, 0x00, 0xa9, 0xd8, 0x91, 0x00,
+ 0xa9, 0xb8, 0x8b, 0x00, 0xa9, 0x98, 0x8b, 0x00, 0xa6, 0xa0, 0x83, 0x00,
+ 0xad, 0x28, 0x91, 0x00, 0xad, 0x18, 0x8b, 0x00, 0xad, 0x08, 0x8b, 0x00,
+ 0xa7, 0x00, 0x91, 0x00, 0xa7, 0x20, 0x83, 0x00, 0xa7, 0x40, 0x8b, 0x00,
+ 0xa5, 0x20, 0x94, 0x00, 0xaa, 0x91, 0x8e, 0x00, 0xa7, 0x60, 0xca, 0xa7,
+ 0x9a, 0x00, 0xa8, 0x48, 0x8b, 0x00, 0xa5, 0x80, 0x91, 0x00, 0xa5, 0xa0,
+ 0x83, 0x00, 0xa5, 0xc0, 0x9b, 0x00, 0xc5, 0xc9, 0x83, 0x00, 0xa4, 0x00,
+ 0x8b, 0x00, 0xa3, 0xc0, 0x91, 0x00, 0xa3, 0xe0, 0x8b, 0x00, 0xa3, 0x60,
+ 0x9b, 0x00, 0xc5, 0xb1, 0x91, 0x00, 0xa2, 0x10, 0x83, 0x00, 0xa2, 0x30,
+ 0x8b, 0x00, 0xa1, 0xf0, 0x8b, 0x00, 0xa1, 0x80, 0x8b, 0x00, 0xab, 0xf0,
+ 0x97, 0x08, 0x15, 0xd9, 0x9f, 0x08, 0x16, 0x41, 0xa0, 0x08, 0x16, 0x80,
+ 0xc3, 0x47, 0xf0, 0x08, 0x2a, 0x79, 0xc2, 0x0c, 0x22, 0x08, 0x2a, 0xa8,
+ 0xc2, 0x00, 0x95, 0x08, 0x29, 0xb9, 0x83, 0x08, 0x29, 0xd8, 0x83, 0x08,
+ 0x29, 0xcb, 0x02, 0xa7, 0xde, 0xc2, 0x67, 0x96, 0x08, 0x2a, 0x49, 0x8b,
+ 0x08, 0x2a, 0x50, 0x94, 0x08, 0x2a, 0x11, 0xc2, 0x16, 0x0a, 0x08, 0x2b,
+ 0x00, 0x9b, 0x08, 0x2a, 0x59, 0x99, 0x08, 0x2a, 0xf8, 0x83, 0x08, 0x29,
+ 0xeb, 0x02, 0xa7, 0xe2, 0xc2, 0x67, 0x96, 0x08, 0x2a, 0xe8, 0xc2, 0x0a,
+ 0x20, 0x01, 0x74, 0x19, 0xc4, 0x05, 0xde, 0x01, 0x74, 0x20, 0xce, 0x72,
+ 0x7e, 0x01, 0x75, 0x31, 0xc3, 0x00, 0xff, 0x01, 0x76, 0x30, 0xc3, 0x80,
+ 0xfc, 0x01, 0x76, 0x61, 0xc4, 0x91, 0x5a, 0x01, 0x77, 0x40, 0x89, 0x01,
+ 0x8f, 0x08, 0x83, 0x05, 0x5b, 0xb1, 0x87, 0x05, 0x5b, 0xc1, 0x8b, 0x05,
+ 0x5b, 0xc9, 0x91, 0x05, 0x5b, 0xd1, 0x97, 0x05, 0x5b, 0xd9, 0x98, 0x05,
+ 0x5b, 0xe0, 0x83, 0x05, 0x5d, 0xf9, 0x87, 0x00, 0x9f, 0xc1, 0x8b, 0x00,
+ 0x9f, 0xc9, 0x91, 0x00, 0x9f, 0xd1, 0x97, 0x00, 0x9f, 0xd9, 0x98, 0x00,
+ 0x9f, 0xe0, 0x98, 0x05, 0x5d, 0xf1, 0x97, 0x05, 0x5d, 0xe9, 0x91, 0x05,
+ 0x5d, 0xe1, 0x8b, 0x05, 0x5d, 0xd9, 0x87, 0x05, 0x5d, 0xd1, 0x83, 0x05,
+ 0x5d, 0xc8, 0x15, 0xc2, 0xa7, 0xe6, 0x0e, 0xc2, 0xa7, 0xfe, 0x83, 0x05,
+ 0x5d, 0x21, 0x8b, 0x05, 0x5d, 0x41, 0x87, 0x05, 0x5d, 0x30, 0x91, 0x05,
+ 0x5c, 0x99, 0x8b, 0x05, 0x5c, 0x91, 0x87, 0x05, 0x5c, 0x89, 0x83, 0x05,
+ 0x5c, 0x73, 0x02, 0xa8, 0x16, 0x97, 0x05, 0x5c, 0xa1, 0x98, 0x05, 0x5c,
+ 0xa8, 0xc2, 0x01, 0x01, 0x05, 0x5c, 0x79, 0x83, 0x05, 0x5b, 0xe9, 0x87,
+ 0x05, 0x5b, 0xf1, 0x8b, 0x05, 0x5b, 0xf9, 0x91, 0x05, 0x5c, 0x01, 0x97,
+ 0x05, 0x5c, 0x09, 0x98, 0x05, 0x5c, 0x10, 0x97, 0x05, 0x5c, 0x69, 0x91,
+ 0x05, 0x5c, 0x61, 0x8b, 0x05, 0x5c, 0x59, 0x87, 0x05, 0x5c, 0x51, 0x83,
+ 0x05, 0x5c, 0x49, 0x98, 0x00, 0x9f, 0xe8, 0x98, 0x05, 0x5c, 0x41, 0x97,
+ 0x05, 0x5c, 0x39, 0x91, 0x05, 0x5c, 0x31, 0x8b, 0x05, 0x5c, 0x29, 0x87,
+ 0x05, 0x5c, 0x21, 0x83, 0x05, 0x5c, 0x18, 0x83, 0x05, 0x5c, 0xb1, 0x87,
+ 0x05, 0x5c, 0xb9, 0x8b, 0x05, 0x5c, 0xc1, 0x91, 0x05, 0x5c, 0xc9, 0x97,
+ 0x05, 0x5c, 0xd1, 0x98, 0x05, 0x5c, 0xd8, 0x83, 0x05, 0x5c, 0xe1, 0x87,
+ 0x05, 0x5c, 0xf1, 0x8b, 0x05, 0x5c, 0xf9, 0x91, 0x05, 0x5d, 0x01, 0x97,
+ 0x05, 0x5d, 0x09, 0x98, 0x05, 0x5d, 0x10, 0x83, 0x05, 0x5d, 0x19, 0x87,
+ 0x05, 0x5d, 0x29, 0x8b, 0x05, 0x5d, 0x39, 0x91, 0x05, 0x5d, 0x49, 0x97,
+ 0x05, 0x5d, 0x51, 0x98, 0x05, 0x5d, 0x59, 0xc2, 0x00, 0x96, 0x05, 0x5d,
+ 0x60, 0x83, 0x00, 0x9d, 0x31, 0x87, 0x00, 0x9d, 0x41, 0x8b, 0x00, 0x9d,
+ 0x49, 0x91, 0x00, 0x9d, 0x51, 0x97, 0x00, 0x9d, 0x59, 0x98, 0x00, 0x9d,
+ 0x60, 0x83, 0x00, 0x9d, 0x69, 0x87, 0x00, 0x9d, 0x71, 0x8b, 0x00, 0x9d,
+ 0x79, 0x91, 0x00, 0x9d, 0x81, 0x97, 0x00, 0x9d, 0x89, 0x98, 0x00, 0x9d,
+ 0x91, 0xc2, 0x01, 0x01, 0x00, 0x9d, 0xf8, 0x83, 0x00, 0x9d, 0x99, 0x87,
+ 0x00, 0x9d, 0xa1, 0x8b, 0x00, 0x9d, 0xa9, 0x91, 0x00, 0x9d, 0xb1, 0x97,
+ 0x00, 0x9d, 0xb9, 0x98, 0x00, 0x9d, 0xc0, 0x83, 0x00, 0x9d, 0xc9, 0x87,
+ 0x00, 0x9d, 0xd1, 0x8b, 0x00, 0x9d, 0xd9, 0x91, 0x00, 0x9d, 0xe1, 0x97,
+ 0x00, 0x9d, 0xe9, 0x98, 0x00, 0x9f, 0xa8, 0x83, 0x00, 0x9d, 0xf3, 0x02,
+ 0xa8, 0x1a, 0x87, 0x00, 0x9e, 0x09, 0x8b, 0x00, 0x9e, 0x11, 0x91, 0x00,
+ 0x9e, 0x19, 0x97, 0x00, 0x9e, 0x21, 0x98, 0x00, 0x9e, 0x28, 0x83, 0x00,
+ 0x9e, 0x31, 0x87, 0x00, 0x9e, 0x39, 0x8b, 0x00, 0x9e, 0x41, 0x91, 0x00,
+ 0x9e, 0x49, 0x97, 0x00, 0x9e, 0x51, 0x98, 0x00, 0x9e, 0x58, 0x83, 0x00,
+ 0x9e, 0x61, 0x87, 0x00, 0x9e, 0x71, 0x8b, 0x00, 0x9e, 0x79, 0x91, 0x00,
+ 0x9e, 0x81, 0x97, 0x00, 0x9e, 0x89, 0x98, 0x00, 0x9e, 0x90, 0x83, 0x00,
+ 0x9e, 0x99, 0x87, 0x00, 0x9e, 0xa9, 0x8b, 0x00, 0x9e, 0xb9, 0x91, 0x00,
+ 0x9e, 0xc9, 0x97, 0x00, 0x9e, 0xd1, 0x98, 0x00, 0x9e, 0xd9, 0xc2, 0x00,
+ 0x96, 0x00, 0x9e, 0xe0, 0x83, 0x00, 0x9e, 0xa1, 0x87, 0x00, 0x9e, 0xb1,
+ 0x8b, 0x00, 0x9e, 0xc1, 0x0e, 0xc2, 0xa8, 0x1e, 0x15, 0x42, 0xa8, 0x36,
+ 0x83, 0x00, 0x9f, 0x49, 0x87, 0x00, 0x9f, 0x51, 0x8b, 0x00, 0x9f, 0x59,
+ 0x91, 0x00, 0x9f, 0x61, 0x97, 0x00, 0x9f, 0x69, 0x98, 0x00, 0x9f, 0x70,
+ 0x83, 0x00, 0x9f, 0x79, 0x87, 0x00, 0x9f, 0x81, 0x8b, 0x00, 0x9f, 0x89,
+ 0x91, 0x00, 0x9f, 0x91, 0x97, 0x00, 0x9f, 0x99, 0x98, 0x00, 0x9f, 0xa0,
+ 0xc3, 0x0d, 0x99, 0x00, 0x04, 0x41, 0xd2, 0x4d, 0x04, 0x00, 0x04, 0x48,
+ 0xc3, 0x3b, 0xb0, 0x08, 0x88, 0xa1, 0xc2, 0x0e, 0x30, 0x08, 0x88, 0x98,
+ 0xc3, 0x3b, 0xb0, 0x08, 0x88, 0x91, 0xc2, 0x0e, 0x30, 0x08, 0x88, 0x88,
+ 0x8b, 0x08, 0x8a, 0x30, 0x83, 0x08, 0x8a, 0x29, 0x97, 0x08, 0x89, 0x79,
+ 0x8b, 0x08, 0x89, 0x68, 0x8b, 0x08, 0x89, 0x80, 0x97, 0x08, 0x89, 0x58,
+ 0x8b, 0x08, 0x89, 0x48, 0xc4, 0x15, 0xa7, 0x08, 0x89, 0xe9, 0xc2, 0x22,
+ 0x45, 0x08, 0x89, 0xe0, 0xc3, 0x0d, 0x8f, 0x08, 0x89, 0xd9, 0xc3, 0x08,
+ 0xde, 0x08, 0x89, 0xd0, 0xc4, 0x05, 0xde, 0x08, 0x89, 0xc9, 0xc2, 0x0a,
+ 0x20, 0x08, 0x89, 0xc0, 0xc2, 0x0e, 0x78, 0x05, 0x50, 0x51, 0x83, 0x05,
+ 0x50, 0x58, 0xc2, 0x26, 0x94, 0x05, 0x50, 0x91, 0x83, 0x05, 0x50, 0x89,
+ 0xc2, 0x0e, 0x78, 0x05, 0x50, 0x80, 0x89, 0x05, 0x52, 0x10, 0xc4, 0x15,
+ 0xa9, 0x08, 0x7e, 0x51, 0x91, 0x08, 0x7e, 0x30, 0xd7, 0x2a, 0x06, 0x0f,
+ 0xaa, 0x08, 0xce, 0x6c, 0xb2, 0x01, 0x72, 0x81, 0xcd, 0x79, 0xbb, 0x01,
+ 0x72, 0x88, 0xe0, 0x04, 0x27, 0x0f, 0x04, 0x78, 0xce, 0x70, 0x24, 0x00,
+ 0x24, 0x41, 0xcd, 0x36, 0xde, 0x05, 0x33, 0x88, 0xc7, 0xc7, 0x1a, 0x00,
+ 0x24, 0x39, 0xcd, 0x7d, 0xa4, 0x00, 0x24, 0x31, 0x03, 0x42, 0xa8, 0x4e,
+ 0xc4, 0x97, 0xff, 0x00, 0x24, 0x1b, 0x02, 0xa8, 0x5a, 0xd0, 0x5a, 0x2f,
+ 0x05, 0x33, 0x81, 0xd5, 0x36, 0xd6, 0x05, 0x33, 0x90, 0x07, 0xc2, 0xa8,
+ 0x5e, 0x8b, 0x05, 0x33, 0xab, 0x02, 0xa8, 0x7c, 0x97, 0x05, 0x33, 0xbb,
+ 0x02, 0xa8, 0x86, 0x1b, 0xc2, 0xa8, 0x8c, 0xc2, 0x01, 0x0e, 0x01, 0x6f,
+ 0x7b, 0x02, 0xa8, 0xa0, 0x15, 0xc2, 0xa8, 0xa6, 0x91, 0x01, 0x6f, 0x53,
+ 0x02, 0xa8, 0xb0, 0x04, 0xc2, 0xa8, 0xb6, 0xc2, 0x00, 0x29, 0x01, 0x6f,
+ 0x09, 0xc3, 0xc8, 0xfd, 0x01, 0x6f, 0x11, 0x06, 0xc2, 0xa8, 0xc0, 0x1c,
+ 0xc2, 0xa8, 0xca, 0xc2, 0x06, 0x6b, 0x01, 0x6f, 0x31, 0xc2, 0x03, 0x07,
+ 0x01, 0x6f, 0x59, 0x16, 0xc2, 0xa8, 0xd4, 0xc3, 0x27, 0xc3, 0x01, 0x6f,
+ 0x89, 0xc4, 0xe6, 0xa3, 0x01, 0x6f, 0xa1, 0x83, 0x01, 0x6f, 0xb1, 0xcc,
+ 0x85, 0x50, 0x01, 0x6f, 0xc9, 0xca, 0x56, 0x38, 0x01, 0x6f, 0xe8, 0xc6,
+ 0x00, 0x33, 0x00, 0x19, 0x60, 0xc5, 0x03, 0x50, 0x00, 0x18, 0x9b, 0x02,
+ 0xa8, 0xde, 0xc5, 0x00, 0x34, 0x00, 0x19, 0x30, 0xc6, 0x00, 0x33, 0x07,
+ 0xf1, 0x68, 0xc3, 0x01, 0xfa, 0x0f, 0x01, 0x51, 0xc4, 0xad, 0xf9, 0x0f,
+ 0x00, 0xb8, 0x47, 0x19, 0x66, 0xc2, 0xa8, 0xe4, 0xcb, 0x95, 0x90, 0x0f,
+ 0x00, 0x51, 0xc3, 0x80, 0xcf, 0x0f, 0x00, 0x48, 0xc6, 0xcc, 0x54, 0x0f,
+ 0x01, 0x41, 0xc3, 0xcf, 0x2a, 0x0f, 0x00, 0x08, 0x91, 0x0f, 0x01, 0x31,
+ 0x97, 0x0f, 0x01, 0x19, 0xc3, 0x00, 0xf4, 0x0f, 0x01, 0x09, 0x07, 0x42,
+ 0xa8, 0xf0, 0xc8, 0xac, 0x8f, 0x0f, 0x01, 0x21, 0x0a, 0xc2, 0xa8, 0xfa,
+ 0xc4, 0xe6, 0x3b, 0x0f, 0x00, 0xa0, 0xc2, 0x02, 0x29, 0x0f, 0x00, 0xe1,
+ 0xc5, 0xe3, 0x73, 0x0f, 0x00, 0xa8, 0xc5, 0xdd, 0x83, 0x0f, 0x00, 0x61,
+ 0xc4, 0xe9, 0x5b, 0x0f, 0x00, 0x20, 0xc5, 0xe2, 0xd3, 0x0f, 0x00, 0x41,
+ 0xc6, 0xd6, 0x34, 0x0f, 0x00, 0x30, 0x48, 0x23, 0x17, 0xc2, 0xa9, 0x04,
+ 0xcb, 0x90, 0xcb, 0x00, 0x1a, 0x11, 0xc7, 0xcd, 0xe9, 0x00, 0x1a, 0x19,
+ 0xcf, 0x68, 0x8e, 0x00, 0x1a, 0x21, 0xcd, 0x49, 0x19, 0x00, 0x1a, 0x28,
+ 0x45, 0xe2, 0x88, 0xc2, 0xa9, 0x0e, 0x18, 0xc2, 0xa9, 0x1a, 0xcc, 0x88,
+ 0xb0, 0x00, 0x1a, 0x78, 0xcc, 0x8a, 0xe4, 0x01, 0x06, 0xd1, 0xcb, 0x09,
+ 0x4c, 0x01, 0x06, 0xa0, 0xc6, 0x07, 0xa1, 0x00, 0x18, 0x0b, 0x02, 0xa9,
+ 0x2c, 0xc9, 0x2a, 0xe3, 0x00, 0x1a, 0x08, 0x00, 0xc2, 0xa9, 0x32, 0x19,
+ 0x42, 0xa9, 0x4a, 0xc7, 0x1f, 0x7c, 0x01, 0x06, 0xc1, 0xc5, 0x03, 0x50,
+ 0x00, 0x18, 0x51, 0xc5, 0x00, 0x34, 0x00, 0x19, 0x28, 0xd0, 0x2e, 0x53,
+ 0x01, 0x07, 0x29, 0xcd, 0x53, 0xc0, 0x00, 0x18, 0xa0, 0x03, 0xc2, 0xa9,
+ 0x50, 0x4c, 0x09, 0xb6, 0xc2, 0xa9, 0x5c, 0x42, 0x01, 0x0e, 0xc2, 0xa9,
+ 0x68, 0x4c, 0x1c, 0xe0, 0xc2, 0xa9, 0x74, 0xca, 0x9c, 0x24, 0x00, 0x18,
+ 0xc0, 0xdb, 0x0b, 0xac, 0x01, 0x07, 0x69, 0xcd, 0x81, 0x4c, 0x01, 0x07,
+ 0x50, 0xd6, 0x2e, 0x4d, 0x01, 0x07, 0x59, 0xd5, 0x36, 0x6d, 0x01, 0x06,
+ 0x91, 0x15, 0x42, 0xa9, 0x80, 0x97, 0x00, 0x1b, 0x3b, 0x02, 0xa9, 0x8c,
+ 0x91, 0x00, 0x1b, 0x33, 0x02, 0xa9, 0x92, 0x83, 0x00, 0x1b, 0x1b, 0x02,
+ 0xa9, 0x98, 0x99, 0x00, 0xef, 0x8b, 0x02, 0xa9, 0xb0, 0x87, 0x00, 0x1b,
+ 0x23, 0x02, 0xa9, 0xb6, 0x92, 0x00, 0xef, 0x71, 0x8e, 0x00, 0xee, 0xeb,
+ 0x02, 0xa9, 0xc2, 0x88, 0x00, 0xef, 0x5b, 0x02, 0xa9, 0xce, 0x95, 0x00,
+ 0xef, 0x23, 0x02, 0xa9, 0xd4, 0x84, 0x00, 0xef, 0x43, 0x02, 0xa9, 0xda,
+ 0x9c, 0x00, 0xef, 0x31, 0x94, 0x00, 0x1b, 0x63, 0x02, 0xa9, 0xe0, 0x90,
+ 0x00, 0xef, 0x01, 0x8d, 0x00, 0xee, 0xe1, 0x89, 0x00, 0xee, 0xd1, 0x8b,
+ 0x00, 0x1b, 0x2b, 0x02, 0xa9, 0xe4, 0x85, 0x00, 0x1b, 0x43, 0x02, 0xa9,
+ 0xea, 0x96, 0x00, 0x1b, 0x6b, 0x02, 0xa9, 0xf0, 0x86, 0x00, 0x1b, 0x49,
+ 0x8a, 0x00, 0x1b, 0x51, 0x8f, 0x00, 0x1b, 0x59, 0x98, 0x00, 0x1b, 0x71,
+ 0x9a, 0x00, 0x1b, 0x78, 0x94, 0x00, 0xef, 0x11, 0x90, 0x00, 0xef, 0x09,
+ 0x8f, 0x00, 0xee, 0xf9, 0x8e, 0x00, 0xee, 0xf1, 0x89, 0x00, 0xee, 0xd8,
+ 0xc9, 0x0f, 0xa9, 0x07, 0xf1, 0x03, 0x02, 0xa9, 0xf6, 0xca, 0x01, 0x17,
+ 0x07, 0xf1, 0x0a, 0x02, 0xa9, 0xfc, 0xc5, 0x00, 0x34, 0x00, 0x19, 0x81,
+ 0xc7, 0x1f, 0x7c, 0x00, 0x19, 0xa1, 0xcf, 0x6c, 0x30, 0x07, 0xf1, 0x49,
+ 0xd0, 0x5d, 0x6f, 0x07, 0xf1, 0x50, 0x00, 0xc2, 0xaa, 0x02, 0xd3, 0x43,
+ 0x61, 0x00, 0xd5, 0x80, 0x00, 0xc2, 0xaa, 0x52, 0x44, 0x04, 0x8d, 0x42,
+ 0xaa, 0x64, 0xcb, 0x01, 0x3c, 0x00, 0xd5, 0x99, 0xcb, 0x9c, 0x23, 0x00,
+ 0x18, 0xf0, 0xcd, 0x7d, 0xf2, 0x05, 0x47, 0x89, 0x47, 0x01, 0xff, 0xc2,
+ 0xaa, 0x70, 0x46, 0x08, 0xd7, 0x42, 0xaa, 0x96, 0xc6, 0x00, 0x33, 0x00,
+ 0x19, 0x20, 0xc5, 0x54, 0x16, 0x01, 0x07, 0x11, 0xc5, 0x01, 0xea, 0x01,
+ 0x06, 0xf0, 0xca, 0x02, 0xdd, 0x01, 0x07, 0x00, 0xcd, 0x3f, 0xd7, 0x00,
+ 0x19, 0xa9, 0xce, 0x2e, 0x55, 0x00, 0x19, 0xb8, 0xc8, 0xb9, 0x7b, 0x00,
+ 0xee, 0x59, 0x87, 0x00, 0x18, 0x32, 0x02, 0xaa, 0xba, 0xc5, 0x00, 0x34,
+ 0x00, 0x19, 0x51, 0xc5, 0x03, 0x50, 0x00, 0x1a, 0x30, 0xc5, 0x03, 0x50,
+ 0x00, 0xef, 0xa9, 0xc5, 0x00, 0x34, 0x00, 0x18, 0xe8, 0x4c, 0x82, 0x50,
+ 0xc2, 0xaa, 0xc0, 0x42, 0x00, 0xd8, 0x42, 0xaa, 0xcc, 0x00, 0x42, 0xaa,
+ 0xdb, 0xc7, 0x1f, 0x7c, 0x00, 0xd5, 0xf1, 0xc5, 0x00, 0x34, 0x00, 0xd5,
+ 0xe9, 0xc5, 0x03, 0x50, 0x00, 0xd5, 0xd8, 0xc4, 0x15, 0xa7, 0x0e, 0x9b,
+ 0x79, 0xc2, 0x22, 0x45, 0x0e, 0x9b, 0x70, 0xc3, 0x0d, 0x8f, 0x0e, 0x9b,
+ 0x69, 0xc3, 0x08, 0xde, 0x0e, 0x9b, 0x60, 0xc4, 0x05, 0xde, 0x0e, 0x9b,
+ 0x59, 0xc2, 0x0a, 0x20, 0x0e, 0x9b, 0x50, 0xc4, 0x15, 0xa7, 0x0e, 0x9b,
+ 0x31, 0xc2, 0x22, 0x45, 0x0e, 0x9b, 0x28, 0xc3, 0x0d, 0x8f, 0x0e, 0x9b,
+ 0x21, 0xc3, 0x08, 0xde, 0x0e, 0x9b, 0x18, 0xc4, 0x05, 0xde, 0x0e, 0x9b,
+ 0x11, 0xc2, 0x0a, 0x20, 0x0e, 0x9b, 0x08, 0xe0, 0x03, 0x67, 0x01, 0x17,
+ 0xd8, 0xcc, 0x25, 0x70, 0x01, 0x15, 0xa8, 0x0a, 0xc2, 0xaa, 0xf3, 0xc3,
+ 0x4f, 0xef, 0x01, 0x64, 0xa9, 0xc2, 0x02, 0x29, 0x01, 0x64, 0xe8, 0xc3,
+ 0x03, 0x77, 0x00, 0x1f, 0x49, 0xc3, 0x00, 0x97, 0x01, 0x64, 0x78, 0xc4,
+ 0xd5, 0xca, 0x00, 0x1f, 0x59, 0xc3, 0x00, 0xec, 0x01, 0x64, 0x28, 0x0a,
+ 0xc2, 0xaa, 0xfd, 0xc2, 0x01, 0xc3, 0x01, 0x64, 0x59, 0xc3, 0x08, 0xea,
+ 0x01, 0x65, 0x29, 0xc4, 0x89, 0x24, 0x01, 0x66, 0x08, 0xc2, 0x05, 0x1b,
+ 0x00, 0x1f, 0x79, 0xc4, 0xe5, 0xd3, 0x01, 0x64, 0x39, 0x49, 0xb2, 0xac,
+ 0x42, 0xab, 0x09, 0xc3, 0xde, 0xea, 0x01, 0x64, 0x09, 0xcc, 0x8e, 0x5c,
+ 0x01, 0x66, 0x48, 0xc5, 0xdf, 0xe0, 0x01, 0x64, 0x89, 0xc2, 0x20, 0xa8,
+ 0x01, 0x65, 0x38, 0xc2, 0x01, 0xc3, 0x01, 0x65, 0x89, 0x43, 0x1e, 0x71,
+ 0x42, 0xab, 0x21, 0x8b, 0x01, 0x65, 0x09, 0xc2, 0x02, 0x29, 0x01, 0x65,
+ 0x78, 0x8b, 0x01, 0x65, 0x59, 0xc2, 0x05, 0x1b, 0x00, 0x1f, 0x28, 0x4c,
+ 0x1d, 0xb2, 0xc2, 0xab, 0x2d, 0xca, 0xa2, 0x54, 0x01, 0x66, 0x18, 0xc2,
+ 0x01, 0xcc, 0x01, 0x67, 0x21, 0xc5, 0xdf, 0xd6, 0x01, 0x67, 0x48, 0xc6,
+ 0xd3, 0x04, 0x01, 0x67, 0x39, 0xc9, 0xab, 0x1d, 0x01, 0x67, 0x50, 0xc3,
+ 0x03, 0x77, 0x00, 0x1f, 0x41, 0xc3, 0x00, 0x97, 0x01, 0x64, 0x70, 0xc4,
+ 0xd5, 0xca, 0x00, 0x1f, 0x51, 0xc3, 0x00, 0xec, 0x01, 0x64, 0x20, 0x0a,
+ 0xc2, 0xab, 0x45, 0xc2, 0x01, 0xc3, 0x01, 0x64, 0x51, 0xc3, 0x08, 0xea,
+ 0x01, 0x65, 0x21, 0xc4, 0x89, 0x24, 0x01, 0x66, 0x00, 0xc2, 0x05, 0x1b,
+ 0x00, 0x1f, 0x71, 0xc4, 0xe5, 0xd3, 0x01, 0x64, 0x31, 0x49, 0xb2, 0xac,
+ 0x42, 0xab, 0x51, 0xc3, 0xde, 0xea, 0x01, 0x64, 0x01, 0xcc, 0x8e, 0x5c,
+ 0x01, 0x66, 0x40, 0xc5, 0xdf, 0xe0, 0x01, 0x64, 0x81, 0xc2, 0x20, 0xa8,
+ 0x01, 0x65, 0x30, 0xc3, 0x4f, 0xef, 0x01, 0x64, 0xa1, 0xc2, 0x02, 0x29,
+ 0x01, 0x64, 0xe1, 0x0a, 0x42, 0xab, 0x69, 0xc2, 0x01, 0xc3, 0x01, 0x65,
+ 0x81, 0x43, 0x1e, 0x71, 0x42, 0xab, 0x73, 0x8b, 0x01, 0x65, 0x01, 0xc2,
+ 0x02, 0x29, 0x01, 0x65, 0x70, 0x8b, 0x01, 0x65, 0x51, 0xc2, 0x05, 0x1b,
+ 0x00, 0x1f, 0x20, 0x4c, 0x1d, 0xb2, 0xc2, 0xab, 0x7f, 0xca, 0xa2, 0x54,
+ 0x01, 0x66, 0x10, 0xc5, 0xdd, 0x65, 0x01, 0x67, 0x81, 0xc5, 0x3e, 0x40,
+ 0x01, 0x67, 0x88, 0xc2, 0x0a, 0x20, 0x08, 0x17, 0x11, 0xc4, 0x05, 0xde,
+ 0x08, 0x17, 0x18, 0xc3, 0x08, 0xde, 0x08, 0x17, 0x21, 0xc3, 0x0d, 0x8f,
+ 0x08, 0x17, 0x28, 0xc2, 0x22, 0x45, 0x08, 0x17, 0x31, 0xc4, 0x15, 0xa7,
+ 0x08, 0x17, 0x38, 0xc2, 0x01, 0x04, 0x08, 0x17, 0x51, 0x19, 0xc2, 0xab,
+ 0x97, 0x0a, 0x42, 0xab, 0xa3, 0x11, 0xc2, 0xab, 0xaf, 0x0b, 0x42, 0xab,
+ 0xbb, 0x42, 0x22, 0x45, 0xc2, 0xab, 0xc7, 0x44, 0x15, 0xa7, 0x42, 0xab,
+ 0xd3, 0x9b, 0x08, 0x17, 0x89, 0xc8, 0x0d, 0x7e, 0x08, 0x17, 0xd0, 0xc2,
+ 0x0d, 0x8b, 0x08, 0x17, 0x91, 0xc8, 0x0d, 0x7e, 0x08, 0x17, 0xd8, 0xd2,
+ 0x4b, 0x0c, 0x01, 0x52, 0x80, 0xcc, 0x25, 0x70, 0x01, 0x56, 0x88, 0xcc,
+ 0x25, 0x70, 0x01, 0x56, 0x90, 0xe0, 0x07, 0x07, 0x0f, 0xa8, 0x0a, 0x02,
+ 0xab, 0xdf, 0x43, 0x25, 0x4d, 0xc2, 0xab, 0xe5, 0xc8, 0xc1, 0x4b, 0x0f,
+ 0xaa, 0xe9, 0xc8, 0xc1, 0x5b, 0x0f, 0xaa, 0x88, 0xc8, 0xbd, 0x93, 0x0f,
+ 0xab, 0x29, 0xc8, 0xbd, 0xfb, 0x0f, 0xaa, 0xc8, 0xc8, 0xbc, 0xd3, 0x0f,
+ 0xaa, 0xf1, 0xc8, 0xc0, 0xe3, 0x0f, 0xaa, 0x90, 0xc8, 0xbd, 0x93, 0x0f,
+ 0xab, 0x31, 0xc8, 0xbd, 0xfb, 0x0f, 0xaa, 0xd0, 0xc8, 0xc3, 0xa3, 0x0f,
+ 0xab, 0x19, 0xc8, 0xb8, 0xcb, 0x0f, 0xaa, 0xb8, 0xc8, 0xbd, 0x93, 0x0f,
+ 0xab, 0x11, 0xc8, 0xbd, 0xfb, 0x0f, 0xaa, 0xb0, 0xc8, 0xc1, 0x6b, 0x0f,
+ 0xab, 0x09, 0xc8, 0xc0, 0xf3, 0x0f, 0xaa, 0xa8, 0xc8, 0xbc, 0xd3, 0x0f,
+ 0xab, 0x01, 0xc8, 0xc0, 0xe3, 0x0f, 0xaa, 0xa0, 0xc8, 0xbf, 0x53, 0x0f,
+ 0xaa, 0xf9, 0xc8, 0xc3, 0x53, 0x0f, 0xaa, 0x98, 0x00, 0xc2, 0xab, 0xf1,
+ 0xc9, 0xb0, 0x87, 0x01, 0x36, 0x90, 0x0d, 0xc2, 0xac, 0x00, 0xc5, 0xc4,
+ 0x44, 0x01, 0x93, 0x0b, 0x02, 0xac, 0x12, 0x16, 0xc2, 0xac, 0x18, 0xc5,
+ 0xc3, 0xd4, 0x01, 0x93, 0x1b, 0x02, 0xac, 0x2a, 0xc5, 0xc4, 0x7c, 0x01,
+ 0x93, 0x23, 0x02, 0xac, 0x30, 0x12, 0xc2, 0xac, 0x36, 0xc4, 0x95, 0xb8,
+ 0x01, 0x93, 0x33, 0x02, 0xac, 0x48, 0xc5, 0xc2, 0x9e, 0x01, 0x93, 0x3b,
+ 0x02, 0xac, 0x4e, 0x05, 0xc2, 0xac, 0x52, 0xc5, 0x7b, 0x22, 0x01, 0x93,
+ 0x6a, 0x02, 0xac, 0x64, 0xc4, 0x0e, 0xa5, 0x01, 0x39, 0x51, 0xc6, 0x19,
+ 0x7a, 0x01, 0x4d, 0xf0, 0x44, 0x08, 0xd9, 0xc2, 0xac, 0x6a, 0x48, 0x2a,
+ 0xca, 0x42, 0xac, 0x8e, 0xca, 0x30, 0x7f, 0x01, 0x14, 0xc9, 0x0e, 0x42,
+ 0xac, 0x9a, 0x4d, 0x27, 0x71, 0xc2, 0xac, 0xa0, 0x4f, 0x01, 0xf7, 0x42,
+ 0xad, 0x08, 0x42, 0x00, 0xc9, 0xc2, 0xad, 0x70, 0x44, 0x0d, 0x88, 0xc2,
+ 0xad, 0x7f, 0xc2, 0x01, 0x04, 0x01, 0x23, 0x4a, 0x02, 0xad, 0x8c, 0x44,
+ 0x00, 0x48, 0xc2, 0xad, 0x92, 0xc5, 0x66, 0x81, 0x01, 0x23, 0x50, 0x45,
+ 0x15, 0xa7, 0xc2, 0xad, 0x9e, 0x43, 0x22, 0x45, 0x42, 0xad, 0xaa, 0x43,
+ 0x12, 0xf0, 0xc2, 0xad, 0xb6, 0x11, 0x42, 0xad, 0xc3, 0xc5, 0x00, 0x67,
+ 0x01, 0x1c, 0x50, 0xd7, 0x2a, 0xbe, 0x01, 0x4d, 0xe1, 0xc7, 0x00, 0xc0,
+ 0x0f, 0x88, 0x70, 0xe0, 0x07, 0x87, 0x01, 0x51, 0xb0, 0x03, 0xc2, 0xad,
+ 0xd2, 0xc8, 0x2d, 0xa6, 0x01, 0x92, 0x21, 0x0d, 0xc2, 0xad, 0xea, 0x15,
+ 0xc2, 0xad, 0xf6, 0xc3, 0x05, 0x17, 0x01, 0x94, 0x01, 0x16, 0xc2, 0xae,
+ 0x1a, 0x08, 0xc2, 0xae, 0x2c, 0x07, 0xc2, 0xae, 0x3c, 0x10, 0xc2, 0xae,
+ 0x54, 0x0f, 0xc2, 0xae, 0x5e, 0x19, 0xc2, 0xae, 0x6e, 0x0a, 0xc2, 0xae,
+ 0x7a, 0x05, 0xc2, 0xae, 0x86, 0x0e, 0xc2, 0xae, 0x90, 0xc5, 0xbc, 0x4d,
+ 0x01, 0x94, 0xf1, 0xc4, 0xb1, 0x19, 0x01, 0x95, 0x01, 0x14, 0x42, 0xae,
+ 0xa2, 0x85, 0x0f, 0x89, 0x59, 0x94, 0x0f, 0x89, 0x60, 0xc6, 0xd6, 0x0a,
+ 0x01, 0x93, 0xe1, 0xc5, 0xdd, 0x6a, 0x01, 0x93, 0xe8, 0x83, 0x01, 0x96,
+ 0x81, 0x8b, 0x01, 0x96, 0x89, 0x97, 0x01, 0x96, 0x91, 0x87, 0x01, 0x96,
+ 0x99, 0x91, 0x01, 0x96, 0xa1, 0x0d, 0xc2, 0xae, 0xac, 0x15, 0xc2, 0xae,
+ 0xc0, 0x16, 0xc2, 0xae, 0xd4, 0x10, 0xc2, 0xae, 0xe8, 0x0a, 0xc2, 0xae,
+ 0xfc, 0x0f, 0xc2, 0xaf, 0x10, 0x1b, 0xc2, 0xaf, 0x24, 0x14, 0xc2, 0xaf,
+ 0x30, 0x19, 0x42, 0xaf, 0x44, 0xe0, 0x0b, 0x07, 0x01, 0x2e, 0xa8, 0xd4,
+ 0x3c, 0x9e, 0x01, 0x2e, 0xa1, 0xca, 0x20, 0x73, 0x01, 0x2e, 0x98, 0xcf,
+ 0x62, 0xee, 0x01, 0x2e, 0x91, 0xce, 0x6c, 0x4f, 0x01, 0x2e, 0x80, 0xe0,
+ 0x06, 0x07, 0x01, 0x4e, 0x18, 0xd8, 0x22, 0x1c, 0x01, 0x4e, 0x11, 0xcd,
+ 0x7d, 0xff, 0x01, 0x4d, 0xd8, 0x47, 0x07, 0x6c, 0x42, 0xaf, 0x54, 0xd1,
+ 0x55, 0x87, 0x09, 0x1a, 0xf9, 0xc4, 0x5b, 0x6b, 0x09, 0x1a, 0xf0, 0xca,
+ 0xa3, 0xee, 0x09, 0x1b, 0x38, 0x47, 0x07, 0x6c, 0xc2, 0xaf, 0x5e, 0xc2,
+ 0x0c, 0x25, 0x09, 0x1a, 0x7a, 0x02, 0xaf, 0xa1, 0x00, 0x42, 0xaf, 0xa7,
+ 0xa0, 0x09, 0x19, 0xb0, 0xc7, 0x6f, 0x36, 0x09, 0x19, 0x51, 0xcb, 0x9b,
+ 0xd6, 0x09, 0x19, 0x48, 0xc2, 0x02, 0x69, 0x09, 0x18, 0x68, 0xda, 0x1a,
+ 0xd2, 0x09, 0x18, 0x81, 0xcc, 0x8f, 0x04, 0x09, 0x18, 0x79, 0xd7, 0x29,
+ 0xef, 0x09, 0x18, 0x70, 0xc2, 0x01, 0x03, 0x09, 0x1c, 0xc3, 0x02, 0xaf,
+ 0xb3, 0x97, 0x09, 0x19, 0x09, 0xc4, 0x52, 0xe5, 0x09, 0x19, 0x01, 0xc5,
+ 0x07, 0x67, 0x09, 0x18, 0xf0, 0x47, 0x07, 0x6c, 0x42, 0xaf, 0xb9, 0xcd,
+ 0x7a, 0x8b, 0x09, 0x1a, 0xd8, 0xc4, 0x3a, 0x6a, 0x09, 0x1a, 0xa9, 0xc2,
+ 0x07, 0xf2, 0x09, 0x1a, 0x9b, 0x02, 0xaf, 0xc5, 0x83, 0x09, 0x1a, 0x90,
+ 0xc7, 0x6f, 0x36, 0x09, 0x18, 0xd3, 0x02, 0xaf, 0xc9, 0xc4, 0x3a, 0xa6,
+ 0x09, 0x18, 0xc9, 0x46, 0x07, 0x6d, 0xc2, 0xaf, 0xcf, 0xc6, 0xd7, 0x8a,
+ 0x09, 0x18, 0xa0, 0x47, 0x07, 0x6c, 0x42, 0xaf, 0xe4, 0xd4, 0x3b, 0x9a,
+ 0x09, 0x18, 0x50, 0xc9, 0xab, 0x26, 0x09, 0x29, 0xc8, 0x47, 0x07, 0x6c,
+ 0x42, 0xaf, 0xf0, 0x00, 0x42, 0xb0, 0x0e, 0xc4, 0x3a, 0xa6, 0x09, 0x17,
+ 0x79, 0x46, 0x07, 0x6d, 0xc2, 0xb0, 0x1a, 0xc8, 0x0a, 0x5f, 0x09, 0x17,
+ 0x60, 0x00, 0x42, 0xb0, 0x26, 0xca, 0x3a, 0x64, 0x09, 0x29, 0xc1, 0xc4,
+ 0x3a, 0xa6, 0x09, 0x16, 0xe0, 0xa1, 0x09, 0x16, 0xf2, 0x02, 0xb0, 0x35,
+ 0x9f, 0x09, 0x16, 0xcb, 0x02, 0xb0, 0x3b, 0xc3, 0x30, 0x93, 0x09, 0x16,
+ 0xd1, 0xd2, 0x4f, 0x44, 0x09, 0x16, 0xc0, 0x00, 0xc2, 0xb0, 0x41, 0xc2,
+ 0x01, 0x0d, 0x09, 0x16, 0x03, 0x02, 0xb0, 0x56, 0x90, 0x09, 0x15, 0xf9,
+ 0xc2, 0xed, 0xcd, 0x09, 0x15, 0xf0, 0xa3, 0x09, 0x15, 0xbb, 0x02, 0xb0,
+ 0x60, 0xc2, 0x3e, 0x08, 0x09, 0x15, 0xc9, 0xc2, 0xe7, 0xaa, 0x09, 0x15,
+ 0xc1, 0xa0, 0x09, 0x15, 0x72, 0x02, 0xb0, 0x66, 0xc2, 0x00, 0x44, 0x09,
+ 0x16, 0xb1, 0x94, 0x09, 0x16, 0x9b, 0x02, 0xb0, 0x6c, 0xc3, 0x8f, 0xc9,
+ 0x09, 0x16, 0x91, 0x8f, 0x09, 0x16, 0x33, 0x02, 0xb0, 0x70, 0x86, 0x09,
+ 0x16, 0x1a, 0x02, 0xb0, 0x76, 0x00, 0x42, 0xb0, 0x7c, 0xd1, 0x51, 0x03,
+ 0x09, 0x15, 0x50, 0xa6, 0x09, 0x17, 0x50, 0xc3, 0x02, 0xe4, 0x09, 0x17,
+ 0x40, 0x9f, 0x09, 0x17, 0x28, 0xc3, 0xeb, 0xc7, 0x09, 0x12, 0x93, 0x02,
+ 0xb0, 0x97, 0xa6, 0x09, 0x1c, 0x80, 0x49, 0x3e, 0x0a, 0x42, 0xb0, 0x9d,
+ 0x00, 0x42, 0xb0, 0xa9, 0xc2, 0x4e, 0x2b, 0x09, 0x13, 0x6b, 0x02, 0xb0,
+ 0xbb, 0x00, 0x42, 0xb0, 0xbf, 0x9f, 0x09, 0x12, 0x39, 0xc8, 0xb9, 0xf3,
+ 0x09, 0x12, 0x28, 0x94, 0x09, 0x12, 0x21, 0x00, 0x42, 0xb0, 0xda, 0xc7,
+ 0x6f, 0x36, 0x09, 0x12, 0x59, 0x46, 0x07, 0x6d, 0x42, 0xb0, 0xec, 0x00,
+ 0xc2, 0xb0, 0xf6, 0xa0, 0x09, 0x11, 0xca, 0x02, 0xb1, 0x0b, 0xc5, 0x3a,
+ 0xa5, 0x09, 0x11, 0x78, 0x8a, 0x09, 0x1c, 0x60, 0x9f, 0x09, 0x11, 0x38,
+ 0xc4, 0x3a, 0xa6, 0x09, 0x11, 0x11, 0xca, 0x3a, 0x64, 0x09, 0x11, 0x08,
+ 0x00, 0x42, 0xb1, 0x0f, 0xc9, 0xb1, 0x4d, 0x09, 0x10, 0xf2, 0x02, 0xb1,
+ 0x29, 0x00, 0x42, 0xb1, 0x2f, 0xc2, 0xd2, 0x91, 0x09, 0x28, 0xc1, 0xc2,
+ 0xb3, 0x7b, 0x09, 0x28, 0xb8, 0xc2, 0xed, 0xb7, 0x09, 0x28, 0x6b, 0x02,
+ 0xb1, 0x39, 0xc2, 0xed, 0xac, 0x09, 0x28, 0x61, 0xc2, 0xeb, 0x45, 0x09,
+ 0x28, 0x0b, 0x02, 0xb1, 0x3f, 0xc2, 0x71, 0x2b, 0x09, 0x28, 0x00, 0x26,
+ 0xc2, 0xb1, 0x45, 0xc2, 0xed, 0xd3, 0x09, 0x27, 0xd1, 0xc2, 0xeb, 0xb3,
+ 0x09, 0x27, 0xc9, 0x22, 0xc2, 0xb1, 0x55, 0x21, 0x42, 0xb1, 0x5d, 0xc2,
+ 0xeb, 0x24, 0x09, 0x27, 0x79, 0x25, 0xc2, 0xb1, 0x68, 0x21, 0x42, 0xb1,
+ 0x70, 0x23, 0xc2, 0xb1, 0x7c, 0xc2, 0xed, 0xc5, 0x09, 0x27, 0x39, 0x1f,
+ 0xc2, 0xb1, 0x84, 0x1e, 0x42, 0xb1, 0x90, 0xc2, 0xeb, 0xb0, 0x09, 0x27,
+ 0x09, 0xc2, 0xeb, 0xbe, 0x09, 0x27, 0x00, 0xc2, 0xed, 0xdb, 0x09, 0x26,
+ 0xf9, 0x25, 0xc2, 0xb1, 0x98, 0xd4, 0x3a, 0x96, 0x09, 0x26, 0xe1, 0xc2,
+ 0xeb, 0x5e, 0x09, 0x26, 0xd9, 0x22, 0xc2, 0xb1, 0xa2, 0xc2, 0xeb, 0xbe,
+ 0x09, 0x26, 0xc1, 0x1f, 0xc2, 0xb1, 0xaa, 0xc2, 0xea, 0xd3, 0x09, 0x26,
+ 0xa8, 0x84, 0x09, 0x0d, 0xc3, 0x02, 0xb1, 0xb2, 0x94, 0x09, 0x0f, 0x62,
+ 0x02, 0xb1, 0xb6, 0xca, 0x55, 0x8e, 0x09, 0x0f, 0xaa, 0x02, 0xb1, 0xba,
+ 0xca, 0x94, 0x89, 0x09, 0x0f, 0x98, 0x97, 0x09, 0x0c, 0x3b, 0x02, 0xb1,
+ 0xc0, 0x0d, 0xc2, 0xb1, 0xe1, 0x04, 0xc2, 0xb1, 0xef, 0x16, 0xc2, 0xb1,
+ 0xfb, 0x15, 0xc2, 0xb2, 0x05, 0x12, 0xc2, 0xb2, 0x1c, 0x0e, 0xc2, 0xb2,
+ 0x24, 0xcd, 0x07, 0xfa, 0x09, 0x1c, 0x11, 0x09, 0xc2, 0xb2, 0x2f, 0x83,
+ 0x09, 0x0a, 0xc3, 0x02, 0xb2, 0x44, 0xc2, 0x31, 0xdd, 0x09, 0x0c, 0x61,
+ 0xc2, 0x04, 0x34, 0x09, 0x0b, 0xe9, 0x10, 0xc2, 0xb2, 0x57, 0x0f, 0xc2,
+ 0xb2, 0x61, 0x0b, 0xc2, 0xb2, 0x6f, 0x07, 0x42, 0xb2, 0x79, 0x00, 0x42,
+ 0xb2, 0x85, 0xa1, 0x09, 0x0c, 0xd9, 0x9f, 0x09, 0x0c, 0xd0, 0x00, 0x42,
+ 0xb2, 0x91, 0xcf, 0x61, 0x77, 0x09, 0x0c, 0xb0, 0xa2, 0x09, 0x0c, 0x9b,
+ 0x02, 0xb2, 0x9d, 0xa1, 0x09, 0x0c, 0x91, 0xa0, 0x09, 0x0c, 0x89, 0x9f,
+ 0x09, 0x0c, 0x80, 0xcd, 0x78, 0x4f, 0x09, 0x0c, 0x70, 0xcd, 0x7e, 0x26,
+ 0x09, 0x0d, 0xa0, 0xc5, 0x3a, 0xa5, 0x09, 0x0d, 0x88, 0xcd, 0x7a, 0xbf,
+ 0x09, 0x0d, 0x70, 0xe0, 0x07, 0xe7, 0x09, 0x0d, 0x58, 0xc3, 0x61, 0xe3,
+ 0x09, 0x0d, 0x43, 0x02, 0xb2, 0xa3, 0x8a, 0x09, 0x0d, 0x39, 0xc2, 0x00,
+ 0x2f, 0x09, 0x0d, 0x30, 0x97, 0x09, 0x0d, 0x13, 0x02, 0xb2, 0xa9, 0xc3,
+ 0x64, 0x5f, 0x09, 0x0d, 0x08, 0xc3, 0x02, 0xe4, 0x09, 0x09, 0x73, 0x02,
+ 0xb2, 0xad, 0x97, 0x09, 0x09, 0xb1, 0xc3, 0x0a, 0xf1, 0x09, 0x09, 0xa9,
+ 0xc3, 0x21, 0x00, 0x09, 0x09, 0xa1, 0xc3, 0x2d, 0x9c, 0x09, 0x09, 0x99,
+ 0xc3, 0x1d, 0x5d, 0x09, 0x09, 0x91, 0xc4, 0x07, 0xd9, 0x09, 0x09, 0x89,
+ 0xc3, 0x64, 0x5f, 0x09, 0x09, 0x80, 0xc4, 0x5b, 0x6b, 0x09, 0x09, 0x53,
+ 0x02, 0xb2, 0xb7, 0xc4, 0x3a, 0xa6, 0x09, 0x09, 0x58, 0x47, 0x07, 0x6c,
+ 0x42, 0xb2, 0xbd, 0x00, 0x42, 0xb2, 0xdb, 0x00, 0x42, 0xb2, 0xed, 0x17,
+ 0xc2, 0xb2, 0xf9, 0xa4, 0x09, 0x09, 0x30, 0xca, 0xa2, 0x86, 0x09, 0x09,
+ 0x20, 0x8a, 0x09, 0x08, 0x8b, 0x02, 0xb3, 0x03, 0xc2, 0x00, 0x2f, 0x09,
+ 0x08, 0x80, 0xa0, 0x09, 0x08, 0x53, 0x02, 0xb3, 0x07, 0x9f, 0x09, 0x08,
+ 0x42, 0x02, 0xb3, 0x0d, 0x00, 0x42, 0xb3, 0x13, 0xcb, 0x4d, 0xcf, 0x09,
+ 0x08, 0x19, 0x46, 0x07, 0x6d, 0x42, 0xb3, 0x1f, 0x47, 0x07, 0x6c, 0x42,
+ 0xb3, 0x27, 0x00, 0x42, 0xb3, 0x31, 0x00, 0x42, 0xb3, 0x3d, 0xa0, 0x09,
+ 0x07, 0xe0, 0x9f, 0x09, 0x07, 0xba, 0x02, 0xb3, 0x49, 0xc2, 0x01, 0x02,
+ 0x09, 0x07, 0xa1, 0xda, 0x1d, 0x5c, 0x09, 0x07, 0x98, 0xd6, 0x1d, 0x60,
+ 0x09, 0x07, 0x88, 0x46, 0x07, 0x6d, 0xc2, 0xb3, 0x4d, 0x4e, 0x6f, 0x36,
+ 0x42, 0xb3, 0x88, 0xc2, 0x4e, 0xc5, 0x09, 0x25, 0x58, 0xc3, 0x07, 0x44,
+ 0x09, 0x25, 0x51, 0xc3, 0x55, 0x95, 0x09, 0x25, 0x49, 0x97, 0x09, 0x04,
+ 0x99, 0x15, 0xc2, 0xb3, 0xb2, 0xc2, 0x00, 0xa9, 0x09, 0x04, 0x81, 0xc3,
+ 0x1d, 0x6a, 0x09, 0x04, 0x79, 0xd1, 0x57, 0x96, 0x09, 0x04, 0x70, 0xc7,
+ 0x01, 0xe9, 0x09, 0x04, 0xe9, 0xcb, 0x90, 0xb5, 0x09, 0x04, 0xe1, 0xcb,
+ 0x9a, 0xef, 0x09, 0x04, 0xd9, 0x46, 0x07, 0x6d, 0x42, 0xb3, 0xbe, 0x47,
+ 0x07, 0x6c, 0xc2, 0xb3, 0xcd, 0xc2, 0x03, 0xbd, 0x09, 0x04, 0x10, 0x47,
+ 0x07, 0x6c, 0xc2, 0xb4, 0x05, 0x9f, 0x09, 0x04, 0x00, 0xa1, 0x09, 0x04,
+ 0x41, 0xa0, 0x09, 0x04, 0x2a, 0x02, 0xb4, 0x11, 0xc7, 0x6f, 0x36, 0x09,
+ 0x03, 0xe9, 0xc4, 0x3a, 0xa6, 0x09, 0x03, 0xe1, 0xc7, 0xc6, 0xf0, 0x09,
+ 0x03, 0xd8, 0x9f, 0x09, 0x03, 0xb3, 0x02, 0xb4, 0x1a, 0x47, 0x07, 0x6c,
+ 0x42, 0xb4, 0x20, 0xc9, 0xa4, 0xfc, 0x09, 0x1b, 0xa8, 0xd3, 0x47, 0x63,
+ 0x09, 0x03, 0xc0, 0x00, 0xc2, 0xb4, 0x32, 0xa0, 0x09, 0x1b, 0xa0, 0x03,
+ 0x42, 0xb4, 0x3e, 0x48, 0xb8, 0xf3, 0xc2, 0xb4, 0x46, 0xcb, 0x9a, 0xfa,
+ 0x09, 0x02, 0x80, 0x9f, 0x09, 0x02, 0xa0, 0xcb, 0x9b, 0x31, 0x09, 0x02,
+ 0x90, 0x47, 0x07, 0x6c, 0x42, 0xb4, 0x58, 0xd0, 0x5b, 0x5f, 0x09, 0x24,
+ 0x18, 0xc2, 0x78, 0x5a, 0x09, 0x02, 0x40, 0xc2, 0x07, 0x68, 0x09, 0x02,
+ 0x31, 0xc9, 0xb2, 0xd0, 0x09, 0x02, 0x28, 0xc8, 0x61, 0x7e, 0x09, 0x02,
+ 0x61, 0xc3, 0x1d, 0x6a, 0x09, 0x02, 0x59, 0x83, 0x09, 0x02, 0x50, 0x46,
+ 0x07, 0x6d, 0xc2, 0xb4, 0x6a, 0xc4, 0x3a, 0xa6, 0x09, 0x00, 0xa8, 0x47,
+ 0x07, 0x6c, 0x42, 0xb4, 0xa1, 0xc3, 0xec, 0x39, 0x09, 0x1b, 0x91, 0xc3,
+ 0x0a, 0xf1, 0x09, 0x01, 0x60, 0xc3, 0x07, 0x69, 0x09, 0x01, 0xf9, 0x9f,
+ 0x09, 0x01, 0xf1, 0x00, 0x42, 0xb4, 0xc3, 0xca, 0x55, 0x8e, 0x09, 0x01,
+ 0xa8, 0x4a, 0xa5, 0x4c, 0xc2, 0xb4, 0xd5, 0xcb, 0x91, 0x65, 0x09, 0x01,
+ 0x79, 0xc7, 0xc7, 0x13, 0x09, 0x01, 0x70, 0xc3, 0x76, 0x92, 0x09, 0x01,
+ 0x41, 0xc3, 0x0a, 0xf1, 0x09, 0x01, 0x39, 0x0d, 0xc2, 0xb4, 0xe1, 0xc2,
+ 0x01, 0x0e, 0x09, 0x01, 0x21, 0xc4, 0x3a, 0x5f, 0x09, 0x01, 0x19, 0xc4,
+ 0xe7, 0x73, 0x09, 0x01, 0x11, 0xc2, 0x00, 0x2f, 0x09, 0x01, 0x08, 0xcf,
+ 0x68, 0xf7, 0x09, 0x00, 0xf9, 0xc5, 0xa5, 0x47, 0x09, 0x00, 0xf0, 0x9f,
+ 0x09, 0x1c, 0xa9, 0xc2, 0x00, 0x48, 0x09, 0x14, 0x52, 0x02, 0xb4, 0xeb,
+ 0xcb, 0x9a, 0xd9, 0x09, 0x14, 0x49, 0x46, 0x07, 0x6d, 0x42, 0xb4, 0xef,
+ 0xc7, 0x01, 0xe9, 0x09, 0x0a, 0x91, 0xcb, 0x90, 0x47, 0x09, 0x0a, 0x89,
+ 0xcb, 0x9a, 0xe4, 0x09, 0x0a, 0x81, 0xca, 0x3a, 0x64, 0x09, 0x0a, 0x78,
+ 0x00, 0x42, 0xb5, 0x0c, 0xc7, 0x01, 0xe9, 0x09, 0x0a, 0x21, 0xc3, 0x30,
+ 0x93, 0x09, 0x0a, 0x18, 0xcd, 0x7a, 0xbf, 0x09, 0x23, 0x70, 0xc2, 0x00,
+ 0x39, 0x09, 0x22, 0x49, 0xa1, 0x09, 0x22, 0x41, 0xa0, 0x09, 0x22, 0x38,
+ 0xcd, 0x7a, 0xbf, 0x09, 0x23, 0x68, 0xa0, 0x09, 0x22, 0x28, 0xc4, 0x41,
+ 0xc9, 0x09, 0x23, 0x41, 0xc4, 0x4d, 0x29, 0x09, 0x23, 0x38, 0xcd, 0x7a,
+ 0xbf, 0x09, 0x23, 0x60, 0x00, 0xc2, 0xb5, 0x24, 0xa0, 0x09, 0x22, 0x08,
+ 0xcd, 0x7a, 0xbf, 0x09, 0x23, 0x58, 0xc5, 0x5b, 0x6a, 0x09, 0x22, 0x70,
+ 0xcd, 0x7a, 0xbf, 0x09, 0x23, 0x50, 0xca, 0x9d, 0x72, 0x09, 0x22, 0xe1,
+ 0x43, 0x02, 0x10, 0x42, 0xb5, 0x2c, 0xc3, 0x5c, 0x77, 0x09, 0x22, 0xa3,
+ 0x02, 0xb5, 0x34, 0xc3, 0x7e, 0x31, 0x09, 0x21, 0xc8, 0xc5, 0x5b, 0x6a,
+ 0x09, 0x22, 0x68, 0x97, 0x09, 0x21, 0x11, 0x9f, 0x09, 0x20, 0xc8, 0xcd,
+ 0x7a, 0xbf, 0x09, 0x23, 0x48, 0xc3, 0x5c, 0x77, 0x09, 0x22, 0x93, 0x02,
+ 0xb5, 0x3a, 0xc3, 0x7e, 0x31, 0x09, 0x21, 0xc0, 0xc5, 0x5b, 0x6a, 0x09,
+ 0x22, 0x60, 0x00, 0xc2, 0xb5, 0x40, 0xa1, 0x09, 0x21, 0xe8, 0x97, 0x09,
+ 0x21, 0x81, 0x9f, 0x09, 0x21, 0x30, 0x97, 0x09, 0x21, 0x09, 0x9f, 0x09,
+ 0x20, 0xc0, 0xc3, 0x99, 0x12, 0x09, 0x23, 0x19, 0xc3, 0x02, 0xe4, 0x09,
+ 0x23, 0x00, 0xc9, 0xae, 0xb3, 0x09, 0x22, 0xf9, 0xc4, 0xdd, 0x4d, 0x09,
+ 0x22, 0xc0, 0xce, 0x55, 0x54, 0x09, 0x22, 0xe9, 0xc4, 0x07, 0xd9, 0x09,
+ 0x22, 0xd0, 0xc3, 0x5c, 0x77, 0x09, 0x22, 0x79, 0xc3, 0x7e, 0x31, 0x09,
+ 0x21, 0xa0, 0x97, 0x09, 0x20, 0xf1, 0x9f, 0x09, 0x20, 0xa8, 0xce, 0x55,
+ 0x54, 0x09, 0x22, 0xf1, 0xc4, 0x07, 0xd9, 0x09, 0x22, 0xd8, 0xc3, 0x5c,
+ 0x77, 0x09, 0x22, 0x81, 0xc3, 0x7e, 0x31, 0x09, 0x21, 0xa8, 0xc5, 0x5b,
+ 0x6a, 0x09, 0x22, 0x50, 0x97, 0x09, 0x21, 0x69, 0x9f, 0x09, 0x21, 0x18,
+ 0x97, 0x09, 0x20, 0xf9, 0x9f, 0x09, 0x20, 0xb0, 0xc3, 0x5c, 0x77, 0x09,
+ 0x22, 0x89, 0xc3, 0x7e, 0x31, 0x09, 0x21, 0xb2, 0x02, 0xb5, 0x48, 0xc5,
+ 0x5b, 0x6a, 0x09, 0x22, 0x58, 0xc2, 0xea, 0xcc, 0x09, 0x21, 0xd9, 0xc2,
+ 0xed, 0xb5, 0x09, 0x21, 0xd0, 0x97, 0x09, 0x21, 0x73, 0x02, 0xb5, 0x4e,
+ 0x9f, 0x09, 0x21, 0x22, 0x02, 0xb5, 0x54, 0x97, 0x09, 0x21, 0x01, 0x9f,
+ 0x09, 0x20, 0xb8, 0xc3, 0x00, 0x3a, 0x01, 0x16, 0x79, 0xc2, 0x00, 0xff,
+ 0x01, 0x16, 0x70, 0xc2, 0x00, 0x5b, 0x0f, 0x03, 0x51, 0x87, 0x0f, 0x03,
+ 0x48, 0xcb, 0x97, 0x32, 0x00, 0xe4, 0x41, 0x46, 0x01, 0xab, 0xc2, 0xb5,
+ 0x5a, 0x8d, 0x00, 0x23, 0xca, 0x02, 0xb5, 0x64, 0x44, 0x00, 0x40, 0xc2,
+ 0xb5, 0x6a, 0xce, 0x6e, 0x3a, 0x00, 0xe4, 0x29, 0x87, 0x00, 0x22, 0x13,
+ 0x02, 0xb5, 0x7c, 0x15, 0xc2, 0xb5, 0x82, 0xc2, 0x00, 0xc9, 0x05, 0x34,
+ 0x69, 0xc3, 0x27, 0xc3, 0x05, 0x34, 0x98, 0xc6, 0xd3, 0x64, 0x00, 0xe4,
+ 0x19, 0x87, 0x00, 0x28, 0xe8, 0xc7, 0x81, 0x4c, 0x00, 0xe4, 0x11, 0xca,
+ 0xa9, 0x98, 0x05, 0x32, 0x79, 0xc2, 0x1c, 0x3e, 0x00, 0x22, 0xd0, 0xcd,
+ 0x7d, 0x8a, 0x00, 0xe4, 0x09, 0xc2, 0x01, 0x0e, 0x00, 0x28, 0xa9, 0xc2,
+ 0x1c, 0x3e, 0x00, 0x22, 0xc9, 0xc9, 0x56, 0x39, 0x00, 0x23, 0x38, 0x44,
+ 0x0c, 0x3c, 0xc2, 0xb5, 0x98, 0xc2, 0x01, 0x0e, 0x00, 0x28, 0xb9, 0x48,
+ 0x11, 0xae, 0x42, 0xb5, 0xa4, 0x8e, 0x00, 0x21, 0xdb, 0x02, 0xb5, 0xbc,
+ 0x90, 0x00, 0x21, 0xeb, 0x02, 0xb5, 0xc2, 0xcf, 0x69, 0x9c, 0x00, 0x27,
+ 0x69, 0x8f, 0x00, 0x21, 0xe3, 0x02, 0xb5, 0xc8, 0x95, 0x00, 0x22, 0x0b,
+ 0x02, 0xb5, 0xce, 0x94, 0x00, 0x22, 0x03, 0x02, 0xb5, 0xd4, 0x88, 0x00,
+ 0x22, 0x20, 0xc3, 0x27, 0xc3, 0x00, 0x29, 0x69, 0x1c, 0xc2, 0xb5, 0xda,
+ 0x46, 0x02, 0x92, 0xc2, 0xb5, 0xf1, 0xc2, 0x1c, 0x3e, 0x00, 0x22, 0x93,
+ 0x02, 0xb5, 0xfb, 0x87, 0x00, 0x21, 0xa1, 0xc2, 0x00, 0xc9, 0x05, 0x34,
+ 0x08, 0x0a, 0xc2, 0xb6, 0x01, 0xc4, 0x6e, 0x0c, 0x00, 0x26, 0xcb, 0x02,
+ 0xb6, 0x20, 0xc9, 0xb6, 0x03, 0x00, 0x25, 0x7b, 0x02, 0xb6, 0x26, 0xcc,
+ 0x84, 0x48, 0x00, 0x24, 0x69, 0x44, 0x67, 0x3a, 0x42, 0xb6, 0x2c, 0x87,
+ 0x00, 0x21, 0xfb, 0x02, 0xb6, 0x3c, 0xc7, 0xbc, 0xcc, 0x00, 0x26, 0x79,
+ 0xc2, 0x02, 0x29, 0x00, 0x23, 0x88, 0xc7, 0xc8, 0x8d, 0x00, 0x28, 0xf9,
+ 0x49, 0xaf, 0xf7, 0xc2, 0xb6, 0x42, 0x46, 0x01, 0xab, 0x42, 0xb6, 0x57,
+ 0x83, 0x00, 0x22, 0x7b, 0x02, 0xb6, 0x63, 0xc3, 0x1a, 0xd0, 0x00, 0x22,
+ 0x5b, 0x02, 0xb6, 0x6b, 0x90, 0x05, 0x32, 0xf9, 0x97, 0x00, 0x22, 0x71,
+ 0x8b, 0x00, 0x22, 0xb8, 0x11, 0xc2, 0xb6, 0x71, 0xcd, 0x76, 0xd6, 0x00,
+ 0x26, 0x61, 0x83, 0x00, 0x21, 0xd3, 0x02, 0xb6, 0x7d, 0xc2, 0x1c, 0x3e,
+ 0x00, 0x22, 0xe1, 0xc2, 0x02, 0x29, 0x00, 0x23, 0x78, 0x83, 0x00, 0x22,
+ 0x2b, 0x02, 0xb6, 0x83, 0xc2, 0x00, 0xc9, 0x05, 0x34, 0xa8, 0xc2, 0x00,
+ 0x5b, 0x00, 0x21, 0x9b, 0x02, 0xb6, 0x8f, 0xc2, 0x1c, 0x3e, 0x00, 0x22,
+ 0x98, 0x03, 0xc2, 0xb6, 0x95, 0xca, 0xa3, 0x80, 0x05, 0x32, 0x69, 0x87,
+ 0x00, 0x21, 0x89, 0xca, 0xa6, 0xfa, 0x05, 0x32, 0xd9, 0x0b, 0xc2, 0xb6,
+ 0xa4, 0xd7, 0x04, 0x30, 0x00, 0x22, 0xb0, 0xcf, 0x69, 0x9c, 0x00, 0x27,
+ 0x39, 0xc4, 0x73, 0xe1, 0x00, 0x23, 0x0b, 0x02, 0xb6, 0xb0, 0x96, 0x00,
+ 0x23, 0xf8, 0x46, 0x01, 0xab, 0xc2, 0xb6, 0xb6, 0x87, 0x00, 0x21, 0xab,
+ 0x02, 0xb6, 0xc8, 0xc6, 0xd3, 0x28, 0x00, 0x23, 0xab, 0x02, 0xb6, 0xce,
+ 0x91, 0x00, 0x22, 0x8a, 0x02, 0xb6, 0xd4, 0x87, 0x00, 0x21, 0xbb, 0x02,
+ 0xb6, 0xd8, 0x0a, 0x42, 0xb6, 0xe4, 0xc2, 0x00, 0x5b, 0x00, 0x22, 0x3b,
+ 0x02, 0xb6, 0xf1, 0xc8, 0xc1, 0x03, 0x05, 0x34, 0xd9, 0xd0, 0x56, 0x32,
+ 0x05, 0x32, 0xc9, 0xc3, 0x27, 0xc3, 0x05, 0x34, 0x38, 0xc8, 0x85, 0x54,
+ 0x05, 0x32, 0x59, 0xc7, 0x80, 0xea, 0x05, 0x33, 0x48, 0x46, 0x01, 0xab,
+ 0x42, 0xb6, 0xf7, 0x46, 0x01, 0xab, 0x42, 0xb7, 0x0f, 0xca, 0xa3, 0xda,
+ 0x00, 0x26, 0x68, 0xcf, 0x6b, 0x6d, 0x00, 0x25, 0x50, 0xca, 0xa3, 0xd0,
+ 0x00, 0x24, 0x70, 0x1c, 0xc2, 0xb7, 0x2d, 0x87, 0x00, 0x20, 0x2b, 0x02,
+ 0xb7, 0x37, 0xc2, 0x00, 0x5b, 0x00, 0x20, 0x79, 0xc2, 0x00, 0xc9, 0x05,
+ 0x34, 0x10, 0x91, 0x05, 0x34, 0xc1, 0xcb, 0x96, 0xa3, 0x05, 0x33, 0x60,
+ 0xc2, 0x06, 0x8b, 0x05, 0x32, 0x40, 0xc2, 0x01, 0x0e, 0x00, 0x25, 0xd3,
+ 0x02, 0xb7, 0x3d, 0x44, 0x2f, 0xae, 0xc2, 0xb7, 0x43, 0x83, 0x00, 0x21,
+ 0x41, 0xc3, 0x1c, 0x4f, 0x00, 0x21, 0x49, 0xc2, 0x00, 0xc9, 0x05, 0x34,
+ 0xb0, 0xcf, 0x69, 0x9c, 0x00, 0x26, 0xd0, 0xcc, 0x26, 0x18, 0x00, 0x25,
+ 0x80, 0xc4, 0x02, 0xcb, 0x00, 0x21, 0x61, 0xc2, 0x00, 0x06, 0x05, 0x33,
+ 0x11, 0x07, 0x42, 0xb7, 0x4e, 0x46, 0x01, 0xab, 0x42, 0xb7, 0x56, 0xc3,
+ 0xea, 0xd7, 0x00, 0x27, 0x01, 0xc3, 0x27, 0xc3, 0x00, 0x25, 0xe3, 0x02,
+ 0xb7, 0x62, 0xc2, 0x01, 0x0e, 0x00, 0x25, 0x40, 0xc9, 0x1e, 0x89, 0x00,
+ 0x26, 0x91, 0xc5, 0x1f, 0x9c, 0x00, 0x26, 0x80, 0x87, 0x00, 0x28, 0xc1,
+ 0x96, 0x00, 0x23, 0x10, 0x46, 0x01, 0xab, 0x42, 0xb7, 0x68, 0xc2, 0x0b,
+ 0xa2, 0x00, 0x28, 0x81, 0xc3, 0xea, 0xd7, 0x05, 0x32, 0x21, 0xc2, 0x12,
+ 0xc5, 0x05, 0x32, 0xa1, 0xc3, 0x0a, 0x25, 0x05, 0x33, 0x00, 0x43, 0xca,
+ 0xd1, 0xc2, 0xb7, 0x74, 0xc3, 0x7a, 0xf0, 0x00, 0x24, 0x00, 0x46, 0x01,
+ 0xab, 0x42, 0xb7, 0x96, 0x46, 0x01, 0xab, 0xc2, 0xb7, 0xae, 0xc7, 0x8a,
+ 0x59, 0x00, 0x21, 0x50, 0x46, 0x01, 0xab, 0x42, 0xb7, 0xc0, 0x46, 0x01,
+ 0xab, 0x42, 0xb7, 0xdb, 0x06, 0xc2, 0xb7, 0xe5, 0xc6, 0x61, 0xbc, 0x00,
+ 0x27, 0x70, 0xca, 0x94, 0x3c, 0x00, 0x20, 0x20, 0xc6, 0xce, 0x68, 0x00,
+ 0x27, 0x43, 0x02, 0xb7, 0xf1, 0xc8, 0xba, 0xf3, 0x00, 0x25, 0x00, 0xc9,
+ 0x96, 0xa5, 0x05, 0x33, 0x51, 0xc5, 0xcc, 0x55, 0x00, 0x23, 0x50, 0xcb,
+ 0x97, 0xf8, 0x00, 0x23, 0xe0, 0xc9, 0x1e, 0x89, 0x00, 0x27, 0x21, 0xc6,
+ 0x61, 0xbc, 0x00, 0x27, 0x11, 0xc5, 0x1e, 0x64, 0x00, 0x20, 0x68, 0x46,
+ 0x01, 0xab, 0x42, 0xb7, 0xf7, 0xd9, 0x1e, 0x57, 0x00, 0x23, 0xb0, 0x16,
+ 0x42, 0xb8, 0x03, 0x47, 0x09, 0x72, 0xc2, 0xb8, 0x0d, 0xc4, 0xe5, 0xfb,
+ 0x05, 0x32, 0x00, 0x87, 0x00, 0x20, 0xb3, 0x02, 0xb8, 0x19, 0xc2, 0x00,
+ 0xc9, 0x05, 0x34, 0x20, 0x46, 0x01, 0xab, 0x42, 0xb8, 0x1f, 0xc2, 0x00,
+ 0x5b, 0x00, 0x20, 0x59, 0x87, 0x00, 0x21, 0x31, 0xc2, 0x00, 0xc9, 0x05,
+ 0x34, 0x71, 0xc2, 0x00, 0xa7, 0x05, 0x34, 0x80, 0x84, 0x09, 0x7e, 0x70,
+ 0x84, 0x09, 0x7c, 0xd8, 0xe0, 0x05, 0xe7, 0x01, 0x01, 0xc8, 0xc8, 0x4f,
+ 0xa2, 0x08, 0x8f, 0xa1, 0xc7, 0x0d, 0x7f, 0x08, 0x8f, 0x98, 0xc6, 0x15,
+ 0xa7, 0x08, 0x8f, 0x81, 0xc4, 0xd8, 0xce, 0x08, 0x8f, 0x78, 0xc4, 0x41,
+ 0xc9, 0x08, 0x8f, 0x71, 0xc4, 0x4d, 0x29, 0x08, 0x8f, 0x68, 0xc5, 0x0d,
+ 0x88, 0x08, 0x8f, 0x61, 0xc5, 0x25, 0x27, 0x08, 0x8f, 0x59, 0xc2, 0x01,
+ 0x04, 0x08, 0x8f, 0x50, 0xc4, 0x15, 0xa7, 0x08, 0x8f, 0x39, 0xc2, 0x22,
+ 0x45, 0x08, 0x8f, 0x30, 0xc3, 0x0d, 0x8f, 0x08, 0x8f, 0x29, 0xc3, 0x08,
+ 0xde, 0x08, 0x8f, 0x20, 0xc4, 0x05, 0xde, 0x08, 0x8f, 0x19, 0xc2, 0x0a,
+ 0x20, 0x08, 0x8f, 0x10, 0xc5, 0x67, 0x97, 0x00, 0x6c, 0x29, 0xc6, 0x93,
+ 0xa6, 0x00, 0x6c, 0x31, 0x07, 0xc2, 0xb8, 0x2b, 0xc6, 0xd8, 0xd4, 0x00,
+ 0x6c, 0x99, 0xc6, 0xd4, 0x24, 0x00, 0x6c, 0xb1, 0x4a, 0xa5, 0x88, 0xc2,
+ 0xb8, 0x37, 0xcb, 0x93, 0xa1, 0x00, 0x6d, 0xc8, 0xc5, 0x67, 0x97, 0x00,
+ 0x6c, 0x49, 0xc6, 0xd8, 0xd4, 0x00, 0x6c, 0x51, 0x42, 0x04, 0x34, 0xc2,
+ 0xb8, 0x63, 0x42, 0x12, 0x5d, 0x42, 0xb8, 0x6f, 0xc5, 0x67, 0x97, 0x00,
+ 0x6c, 0x59, 0xc6, 0xd4, 0x24, 0x00, 0x6c, 0x60, 0xc5, 0x67, 0x97, 0x00,
+ 0x6c, 0x89, 0xc6, 0xd3, 0x46, 0x00, 0x6c, 0x90, 0xc5, 0x67, 0x97, 0x00,
+ 0x6c, 0xa1, 0xc6, 0x67, 0x96, 0x00, 0x6c, 0xa8, 0x03, 0xc2, 0xb8, 0x7b,
+ 0x49, 0xb0, 0xa2, 0x42, 0xb8, 0x87, 0xc7, 0xc6, 0xb8, 0x00, 0x6c, 0xf9,
+ 0xc7, 0xca, 0xe7, 0x00, 0x6d, 0x31, 0x06, 0x42, 0xb8, 0x99, 0xca, 0x47,
+ 0xea, 0x00, 0x6d, 0x21, 0x42, 0x0e, 0xe5, 0x42, 0xb8, 0xa5, 0xc7, 0xcd,
+ 0x8e, 0x00, 0x6d, 0x89, 0xc7, 0xc4, 0xf8, 0x00, 0x6d, 0xe9, 0xc7, 0xcc,
+ 0xfb, 0x00, 0x6e, 0x18, 0xc2, 0x0a, 0x20, 0x00, 0x6f, 0x41, 0xc4, 0x05,
+ 0xde, 0x00, 0x6f, 0x48, 0xc3, 0x08, 0xde, 0x00, 0x6f, 0x51, 0xc3, 0x0d,
+ 0x8f, 0x00, 0x6f, 0x58, 0xc2, 0x22, 0x45, 0x00, 0x6f, 0x61, 0xc4, 0x15,
+ 0xa7, 0x00, 0x6f, 0x68, 0xca, 0xa6, 0x28, 0x00, 0x6e, 0x81, 0xc8, 0xb9,
+ 0x2b, 0x00, 0x6e, 0x91, 0xc9, 0xad, 0x5d, 0x00, 0x6e, 0xa0, 0xc2, 0x06,
+ 0x8c, 0x00, 0x6e, 0xcb, 0x02, 0xb8, 0xb1, 0xc5, 0xdb, 0x3f, 0x00, 0x6e,
+ 0xd8, 0xca, 0xaa, 0x92, 0x00, 0x6f, 0x91, 0xc9, 0x8f, 0xd0, 0x00, 0x6f,
+ 0x98, 0x43, 0x8c, 0x84, 0xc2, 0xb8, 0xb7, 0xc3, 0xed, 0x59, 0x0e, 0xd5,
+ 0x41, 0xc3, 0xed, 0x5c, 0x0e, 0xd5, 0x39, 0xc3, 0xed, 0x2f, 0x0e, 0xd5,
+ 0x31, 0xc3, 0xed, 0x32, 0x0e, 0xd5, 0x29, 0xc3, 0xed, 0x35, 0x0e, 0xd5,
+ 0x21, 0xc3, 0xed, 0x38, 0x0e, 0xd5, 0x19, 0xc3, 0xed, 0x3b, 0x0e, 0xd5,
+ 0x11, 0xc3, 0xed, 0x3e, 0x0e, 0xd5, 0x08, 0xcb, 0x53, 0x7e, 0x0e, 0xcf,
+ 0x0b, 0x02, 0xb8, 0xd3, 0xc6, 0x02, 0x91, 0x0e, 0xcf, 0x03, 0x02, 0xb8,
+ 0xd9, 0xc6, 0x23, 0x24, 0x0e, 0xce, 0xfa, 0x02, 0xb8, 0xdf, 0x48, 0x0d,
+ 0x26, 0xc2, 0xb8, 0xe5, 0xc6, 0x02, 0x91, 0x0e, 0xcd, 0x1b, 0x02, 0xb8,
+ 0xef, 0xc6, 0x23, 0x24, 0x0e, 0xcd, 0x12, 0x02, 0xb8, 0xf5, 0xc3, 0xed,
+ 0x32, 0x0e, 0xd4, 0xf9, 0xc3, 0xed, 0x35, 0x0e, 0xd4, 0xf1, 0xc3, 0xed,
+ 0x38, 0x0e, 0xd4, 0xe9, 0xc3, 0xed, 0x3b, 0x0e, 0xd4, 0xe1, 0xc3, 0xed,
+ 0x3e, 0x0e, 0xd4, 0xd8, 0x15, 0xc2, 0xb8, 0xfb, 0x46, 0x15, 0x2e, 0x42,
+ 0xb9, 0x07, 0xc8, 0x03, 0x0f, 0x0e, 0xd0, 0x48, 0xc9, 0xaf, 0x67, 0x0e,
+ 0xd3, 0x71, 0xc5, 0xdd, 0x0b, 0x0e, 0xd3, 0x68, 0xc9, 0x63, 0x21, 0x0e,
+ 0xc8, 0xd1, 0x45, 0x00, 0x3f, 0x42, 0xb9, 0x13, 0xc8, 0x3a, 0x32, 0x0e,
+ 0xc8, 0xc1, 0xc6, 0x23, 0x24, 0x0e, 0xc8, 0xb0, 0xcc, 0x88, 0x20, 0x0e,
+ 0xd4, 0x31, 0xc5, 0xde, 0x9b, 0x0e, 0xd4, 0x29, 0x42, 0x00, 0x5b, 0xc2,
+ 0xb9, 0x1f, 0xc5, 0xdf, 0xdb, 0x0e, 0xd4, 0x19, 0xc5, 0x47, 0xb8, 0x0e,
+ 0xd4, 0x10, 0x45, 0x37, 0x9d, 0xc2, 0xb9, 0x2b, 0xcb, 0x9b, 0xe1, 0x0e,
+ 0xd3, 0xb0, 0x00, 0xc2, 0xb9, 0x55, 0xd2, 0x4d, 0xb8, 0x0e, 0xd2, 0x98,
+ 0xd5, 0x33, 0x25, 0x0e, 0xd3, 0xa1, 0x4c, 0x8a, 0x84, 0x42, 0xb9, 0x61,
+ 0x47, 0x0e, 0x90, 0xc2, 0xb9, 0x6d, 0xd3, 0x43, 0xf9, 0x0e, 0xd2, 0xf1,
+ 0xd4, 0x38, 0xb6, 0x0e, 0xd2, 0xe9, 0x44, 0x03, 0x9a, 0xc2, 0xb9, 0x79,
+ 0xcc, 0x88, 0x2c, 0x0e, 0xd2, 0xd1, 0xd0, 0x5c, 0x1f, 0x0e, 0xd2, 0xc8,
+ 0xc7, 0x03, 0x28, 0x0e, 0xc8, 0x39, 0xc8, 0x3a, 0x32, 0x0e, 0xc8, 0x31,
+ 0xc6, 0x23, 0x24, 0x0e, 0xc8, 0x28, 0x00, 0x42, 0xb9, 0x85, 0xc3, 0x00,
+ 0x36, 0x0e, 0xd1, 0x79, 0xc6, 0x01, 0x8c, 0x0e, 0xd1, 0x71, 0xc4, 0x0b,
+ 0x2b, 0x0e, 0xd1, 0x68, 0xc7, 0xc9, 0x4a, 0x0e, 0xcc, 0x39, 0x47, 0x60,
+ 0xc6, 0x42, 0xb9, 0x97, 0x4b, 0x9b, 0x52, 0xc2, 0xb9, 0xa3, 0xc7, 0xc9,
+ 0x4a, 0x0e, 0xca, 0x89, 0x47, 0x60, 0xc6, 0x42, 0xb9, 0xb5, 0x4c, 0x83,
+ 0x58, 0xc2, 0xb9, 0xc1, 0x4d, 0x7e, 0x5a, 0x42, 0xb9, 0xce, 0xca, 0x46,
+ 0x34, 0x0e, 0xd1, 0x01, 0xc4, 0x00, 0x68, 0x0e, 0xd0, 0xf9, 0xc2, 0x02,
+ 0x6a, 0x0e, 0xd0, 0xf0, 0xc4, 0x99, 0xc6, 0x0e, 0xd0, 0xe9, 0x46, 0xd4,
+ 0x90, 0x42, 0xb9, 0xdd, 0x44, 0xe0, 0xdf, 0xc2, 0xb9, 0xe9, 0x45, 0x01,
+ 0x1d, 0xc2, 0xb9, 0xf5, 0xc6, 0x07, 0xc1, 0x0e, 0xd0, 0xb1, 0xc8, 0xbc,
+ 0x1b, 0x0e, 0xd0, 0xa9, 0xc4, 0x03, 0xf5, 0x0e, 0xd0, 0xa0, 0xc4, 0x00,
+ 0x68, 0x0e, 0xd0, 0x61, 0xc7, 0x82, 0x79, 0x0e, 0xd0, 0x59, 0xc2, 0x02,
+ 0x6a, 0x0e, 0xd0, 0x50, 0x43, 0x80, 0xc1, 0xc2, 0xba, 0x01, 0xc7, 0xc7,
+ 0x21, 0x0e, 0xc4, 0x2b, 0x02, 0xba, 0x13, 0x43, 0x4d, 0x94, 0xc2, 0xba,
+ 0x17, 0x43, 0xeb, 0xa3, 0xc2, 0xba, 0x29, 0xc6, 0xd5, 0x98, 0x0e, 0xc3,
+ 0xba, 0x02, 0xba, 0x3f, 0x47, 0x52, 0x93, 0xc2, 0xba, 0x43, 0xc3, 0x16,
+ 0x71, 0x0e, 0xd0, 0x00, 0xc6, 0xd3, 0x40, 0x0e, 0xd1, 0xa1, 0xc7, 0xaf,
+ 0x29, 0x0e, 0xd1, 0x98, 0xc3, 0xec, 0x4e, 0x0e, 0xd3, 0x49, 0x48, 0x18,
+ 0xdf, 0xc2, 0xba, 0x55, 0x19, 0xc2, 0xba, 0x61, 0x58, 0x22, 0x04, 0xc2,
+ 0xba, 0x6d, 0x15, 0xc2, 0xba, 0x7f, 0x45, 0xe3, 0x96, 0xc2, 0xba, 0x8b,
+ 0x45, 0xdf, 0x59, 0xc2, 0xba, 0x97, 0x05, 0xc2, 0xba, 0xa3, 0x46, 0xd8,
+ 0xf8, 0xc2, 0xba, 0xbb, 0x47, 0x31, 0xdd, 0xc2, 0xba, 0xcd, 0x04, 0xc2,
+ 0xba, 0xdf, 0x47, 0x30, 0x47, 0xc2, 0xba, 0xeb, 0x47, 0x02, 0x91, 0x42,
+ 0xba, 0xfd, 0xc3, 0xec, 0x4e, 0x0e, 0xd3, 0x41, 0x48, 0x18, 0xdf, 0xc2,
+ 0xbb, 0x12, 0x19, 0xc2, 0xbb, 0x1e, 0x4b, 0x22, 0x04, 0xc2, 0xbb, 0x2a,
+ 0x45, 0xe3, 0x96, 0xc2, 0xbb, 0x36, 0x45, 0xdf, 0x59, 0xc2, 0xbb, 0x51,
+ 0x05, 0xc2, 0xbb, 0x69, 0x15, 0xc2, 0xbb, 0x81, 0x46, 0xd8, 0xf8, 0xc2,
+ 0xbb, 0x8d, 0x47, 0x31, 0xdd, 0xc2, 0xbb, 0x9f, 0x04, 0xc2, 0xbb, 0xb1,
+ 0x47, 0x30, 0x47, 0xc2, 0xbb, 0xbd, 0x47, 0x02, 0x91, 0x42, 0xbb, 0xd2,
+ 0x48, 0x03, 0x28, 0xc2, 0xbb, 0xe7, 0x48, 0xbf, 0x73, 0xc2, 0xbb, 0xf3,
+ 0x45, 0xdc, 0x34, 0x42, 0xbc, 0x08, 0xd5, 0x32, 0xbc, 0x0e, 0xc9, 0x39,
+ 0x43, 0x11, 0x8a, 0xc2, 0xbc, 0x1d, 0xcf, 0x63, 0x1b, 0x0e, 0xc9, 0x20,
+ 0xc6, 0x02, 0x91, 0x0e, 0xd2, 0xc1, 0xc6, 0x23, 0x24, 0x0e, 0xd2, 0xb8,
+ 0xc6, 0x12, 0x88, 0x0e, 0xd2, 0xb1, 0x46, 0x18, 0xf0, 0x42, 0xbc, 0x29,
+ 0x00, 0x42, 0xbc, 0x3b, 0x00, 0x42, 0xbc, 0x47, 0xc9, 0x46, 0xf7, 0x0e,
+ 0xd2, 0x53, 0x02, 0xbc, 0x53, 0xc4, 0x38, 0x83, 0x0e, 0xd2, 0x3b, 0x02,
+ 0xbc, 0x57, 0xc8, 0xc3, 0xb3, 0x0e, 0xd2, 0x31, 0xc7, 0x29, 0xba, 0x0e,
+ 0xd2, 0x29, 0xc6, 0x03, 0xfa, 0x0e, 0xd2, 0x20, 0x00, 0x42, 0xbc, 0x5b,
+ 0x00, 0x42, 0xbc, 0x67, 0xc2, 0x02, 0x6a, 0x0e, 0xd0, 0x81, 0xc4, 0x00,
+ 0x68, 0x0e, 0xd0, 0x68, 0xcb, 0x97, 0x3d, 0x0e, 0xcf, 0xdb, 0x02, 0xbc,
+ 0x73, 0xc3, 0x00, 0x36, 0x0e, 0xcf, 0xc0, 0xc5, 0x15, 0x2e, 0x0e, 0xcf,
+ 0xb1, 0xc5, 0x00, 0x3e, 0x0e, 0xcf, 0xa8, 0x97, 0x08, 0xae, 0xe8, 0x8b,
+ 0x08, 0xae, 0xd0, 0xd6, 0x2e, 0xbb, 0x08, 0xae, 0xc1, 0x83, 0x08, 0xac,
+ 0xf0, 0xc2, 0x01, 0x0e, 0x08, 0xac, 0xc9, 0x83, 0x08, 0xac, 0xc0, 0x8e,
+ 0x08, 0xac, 0x43, 0x02, 0xbc, 0x79, 0x94, 0x08, 0xac, 0x32, 0x02, 0xbc,
+ 0x7d, 0xc2, 0x01, 0x0e, 0x08, 0xac, 0xd9, 0x83, 0x08, 0xac, 0xd0, 0x45,
+ 0x01, 0xac, 0xc2, 0xbc, 0x81, 0xcb, 0x98, 0x87, 0x08, 0xae, 0x7a, 0x02,
+ 0xbc, 0xa5, 0xc3, 0x02, 0x1d, 0x08, 0xae, 0x29, 0xc3, 0x05, 0xe3, 0x08,
+ 0xae, 0x20, 0xc4, 0x21, 0x28, 0x08, 0xad, 0xf9, 0xc5, 0x45, 0xcf, 0x08,
+ 0xad, 0xf0, 0x8e, 0x05, 0x45, 0xe8, 0x94, 0x05, 0x45, 0xd8, 0x94, 0x05,
+ 0x44, 0x43, 0x02, 0xbc, 0xab, 0x8e, 0x05, 0x44, 0x52, 0x02, 0xbc, 0xaf,
+ 0x83, 0x05, 0x44, 0xe1, 0xc2, 0x01, 0x0e, 0x05, 0x44, 0xe8, 0x83, 0x05,
+ 0x44, 0xf1, 0xc2, 0x01, 0x0e, 0x05, 0x44, 0xf8, 0xc2, 0x0a, 0x20, 0x05,
+ 0x46, 0x91, 0xc4, 0x05, 0xde, 0x05, 0x46, 0x98, 0xc3, 0x08, 0xde, 0x05,
+ 0x46, 0xa1, 0xc3, 0x0d, 0x8f, 0x05, 0x46, 0xa8, 0xc2, 0x22, 0x45, 0x05,
+ 0x46, 0xb1, 0xc4, 0x15, 0xa7, 0x05, 0x46, 0xb8, 0xe0, 0x09, 0x07, 0x0f,
+ 0xb3, 0xb0, 0x49, 0x25, 0x88, 0xc2, 0xbc, 0xb3, 0xc7, 0x1b, 0xae, 0x08,
+ 0x8e, 0x40, 0xc7, 0xc6, 0xf7, 0x08, 0x8e, 0xd9, 0xd4, 0x39, 0x56, 0x08,
+ 0x8e, 0x79, 0xc5, 0x33, 0x1a, 0x08, 0x8e, 0x51, 0xcb, 0x9c, 0xb2, 0x08,
+ 0x8e, 0x19, 0xcb, 0x93, 0xc2, 0x08, 0x8e, 0x11, 0x03, 0xc2, 0xbc, 0xbf,
+ 0x42, 0x02, 0x52, 0xc2, 0xbc, 0xcb, 0xcb, 0x21, 0x1a, 0x08, 0x8c, 0x00,
+ 0xc4, 0x24, 0x35, 0x08, 0x8e, 0xc9, 0xc5, 0x05, 0x1b, 0x08, 0x8e, 0xc1,
+ 0x15, 0xc2, 0xbc, 0xd7, 0x08, 0xc2, 0xbc, 0xe3, 0x16, 0xc2, 0xbc, 0xef,
+ 0xc3, 0x05, 0x17, 0x08, 0x8e, 0x89, 0xc4, 0x16, 0x57, 0x08, 0x8e, 0x80,
+ 0xcf, 0x68, 0x34, 0x08, 0x8e, 0x71, 0x03, 0xc2, 0xbc, 0xfb, 0x91, 0x08,
+ 0x8d, 0xf1, 0x87, 0x08, 0x8d, 0xe1, 0x48, 0xb7, 0xd7, 0xc2, 0xbd, 0x07,
+ 0x97, 0x08, 0x8d, 0xb3, 0x02, 0xbd, 0x15, 0x8b, 0x08, 0x8d, 0xa2, 0x02,
+ 0xbd, 0x19, 0x83, 0x08, 0x8d, 0x89, 0xc2, 0x0e, 0xe5, 0x08, 0x8d, 0x81,
+ 0xc2, 0x01, 0x0e, 0x08, 0x8d, 0x78, 0x83, 0x08, 0x8d, 0x71, 0x47, 0xb7,
+ 0xd8, 0x42, 0xbd, 0x1d, 0xc2, 0x00, 0x96, 0x08, 0x8d, 0x69, 0x83, 0x08,
+ 0x8d, 0x60, 0xc2, 0x01, 0x0e, 0x08, 0x8d, 0x41, 0x83, 0x08, 0x8d, 0x38,
+ 0xc2, 0x01, 0x0e, 0x08, 0x8d, 0x31, 0x83, 0x08, 0x8d, 0x28, 0x83, 0x08,
+ 0x8d, 0x21, 0xc2, 0x01, 0x01, 0x08, 0x8c, 0xf9, 0xc2, 0x1a, 0x36, 0x08,
+ 0x8c, 0xd1, 0xc2, 0x07, 0x69, 0x08, 0x8c, 0xa8, 0xc2, 0x01, 0x0e, 0x08,
+ 0x8d, 0x19, 0x83, 0x08, 0x8d, 0x11, 0x06, 0x42, 0xbd, 0x2b, 0xc2, 0x01,
+ 0x0e, 0x08, 0x8d, 0x09, 0x83, 0x08, 0x8d, 0x01, 0x16, 0x42, 0xbd, 0x35,
+ 0xc2, 0x01, 0x0e, 0x08, 0x8c, 0xc9, 0x83, 0x08, 0x8c, 0xc0, 0xc2, 0x01,
+ 0x0e, 0x08, 0x8c, 0xb9, 0x83, 0x08, 0x8c, 0xb0, 0xc2, 0x01, 0x0e, 0x08,
+ 0x8c, 0xa1, 0x83, 0x08, 0x8c, 0x98, 0xc2, 0x01, 0x0e, 0x08, 0x8c, 0x91,
+ 0x83, 0x08, 0x8c, 0x88, 0x97, 0x08, 0x8c, 0x81, 0x8b, 0x08, 0x8c, 0x71,
+ 0x83, 0x08, 0x8c, 0x20, 0x97, 0x08, 0x8c, 0x40, 0x8b, 0x08, 0x8c, 0x30,
+ 0xc3, 0x00, 0x48, 0x08, 0x22, 0xa1, 0xc2, 0x18, 0x55, 0x08, 0x22, 0xf0,
+ 0x96, 0x08, 0x23, 0x81, 0x94, 0x08, 0x23, 0xe8, 0x87, 0x08, 0x23, 0xc1,
+ 0xc3, 0x5a, 0x3f, 0x08, 0x23, 0xe0, 0xcd, 0x52, 0xb0, 0x01, 0x57, 0x41,
+ 0xd5, 0x32, 0x53, 0x01, 0x57, 0x48, 0xe0, 0x03, 0xc7, 0x01, 0x5a, 0xf8,
+ 0xc9, 0x1e, 0x1c, 0x01, 0x49, 0x31, 0xd4, 0x3d, 0x16, 0x01, 0x49, 0x50,
+ 0xc9, 0xb4, 0xda, 0x01, 0x0f, 0x91, 0xc9, 0x1e, 0x1c, 0x01, 0x49, 0x29,
+ 0xd4, 0x3c, 0x4e, 0x01, 0x49, 0x49, 0xd9, 0x20, 0x7d, 0x01, 0x49, 0x68,
+ 0xca, 0x9f, 0x3e, 0x01, 0x37, 0xb1, 0xc2, 0x00, 0xf2, 0x01, 0x1e, 0x68,
+ 0x0e, 0xc2, 0xbd, 0x3f, 0x46, 0x05, 0x07, 0xc2, 0xbd, 0x4b, 0xd0, 0x58,
+ 0x4f, 0x01, 0x2f, 0x41, 0xd8, 0x24, 0x74, 0x01, 0x2d, 0x49, 0xda, 0x1c,
+ 0xc0, 0x01, 0x2d, 0x31, 0xcd, 0x81, 0x18, 0x01, 0x2d, 0x29, 0xcf, 0x62,
+ 0xdf, 0x01, 0x2d, 0x21, 0xd1, 0x56, 0xa8, 0x01, 0x4f, 0x01, 0xce, 0x72,
+ 0x70, 0x01, 0x58, 0x91, 0xd1, 0x52, 0x02, 0x01, 0x58, 0x98, 0xc5, 0x0b,
+ 0xa2, 0x01, 0x18, 0x89, 0x89, 0x01, 0x9e, 0x90, 0x44, 0x19, 0xce, 0x42,
+ 0xbd, 0x57, 0x44, 0x19, 0xce, 0x42, 0xbd, 0x63, 0xc4, 0x64, 0xad, 0x01,
+ 0x98, 0x21, 0xc2, 0x00, 0x2c, 0x01, 0x98, 0x28, 0x92, 0x01, 0x14, 0x99,
+ 0x8e, 0x01, 0x9c, 0x40, 0xc9, 0xb6, 0x9c, 0x01, 0x9b, 0xf8, 0x00, 0x42,
+ 0xbd, 0x6f, 0xd5, 0x36, 0x58, 0x01, 0x56, 0x71, 0xc5, 0xdf, 0x72, 0x01,
+ 0x9a, 0x89, 0xc2, 0x00, 0x9a, 0x01, 0x9a, 0x90, 0xc3, 0x6f, 0x90, 0x01,
+ 0x9a, 0x99, 0xc5, 0xdb, 0x53, 0x01, 0x9a, 0xa0, 0xc2, 0x11, 0x89, 0x01,
+ 0x9a, 0xa9, 0xc6, 0xd1, 0x4e, 0x01, 0x9a, 0xb0, 0xc7, 0x03, 0xb2, 0x01,
+ 0x9d, 0x72, 0x02, 0xbd, 0x7b, 0xc3, 0x1a, 0xc4, 0x01, 0x99, 0x50, 0xc6,
+ 0xd6, 0xd0, 0x01, 0x99, 0x91, 0xc4, 0xe7, 0x0b, 0x01, 0x99, 0x99, 0xc3,
+ 0x05, 0x2a, 0x01, 0x99, 0xa8, 0xc7, 0xcf, 0x24, 0x01, 0x99, 0xb1, 0xc4,
+ 0xe4, 0x07, 0x01, 0x99, 0xc8, 0x90, 0x01, 0x99, 0xf9, 0x11, 0x42, 0xbd,
+ 0x81, 0x83, 0x01, 0x9b, 0x88, 0xc3, 0x14, 0xff, 0x01, 0x99, 0x20, 0x00,
+ 0x42, 0xbd, 0x8b, 0xd0, 0x60, 0x3f, 0x01, 0x5e, 0x81, 0xc4, 0x0e, 0x6e,
+ 0x01, 0x99, 0xe9, 0xc3, 0x16, 0xc2, 0x01, 0x9a, 0x00, 0x03, 0xc2, 0xbd,
+ 0x97, 0xc5, 0xda, 0x2c, 0x01, 0x9c, 0x00, 0xc7, 0xcb, 0x34, 0x01, 0x99,
+ 0x71, 0x0d, 0x42, 0xbd, 0xa3, 0xc2, 0x05, 0x3b, 0x01, 0x99, 0xb9, 0x10,
+ 0xc2, 0xbd, 0xad, 0xc3, 0x90, 0xd7, 0x01, 0x99, 0xd8, 0x89, 0x01, 0x96,
+ 0x69, 0x47, 0xc8, 0x01, 0x42, 0xbd, 0xb9, 0xc3, 0x08, 0xb0, 0x01, 0x98,
+ 0x59, 0x14, 0x42, 0xbd, 0xd7, 0xc6, 0xd5, 0x4a, 0x01, 0x98, 0xa9, 0xc7,
+ 0xc6, 0xdb, 0x01, 0x98, 0xb1, 0xc5, 0xe3, 0x2d, 0x01, 0x98, 0xb8, 0xc6,
+ 0xd4, 0x18, 0x01, 0x98, 0xd1, 0xc4, 0x82, 0xa1, 0x01, 0x98, 0xd8, 0xc4,
+ 0xe4, 0x7b, 0x01, 0x98, 0xe9, 0xc3, 0x9c, 0xc0, 0x01, 0x98, 0xf0, 0x00,
+ 0x42, 0xbd, 0xe3, 0xc3, 0x09, 0xe7, 0x01, 0x98, 0x71, 0xc3, 0x7b, 0xc2,
+ 0x01, 0x98, 0x79, 0x8e, 0x01, 0x9f, 0xf8, 0xc2, 0x07, 0x69, 0x01, 0x98,
+ 0x81, 0xc3, 0xec, 0x81, 0x01, 0x98, 0x89, 0xc5, 0xe1, 0x61, 0x01, 0x98,
+ 0x98, 0xc3, 0x0e, 0x70, 0x01, 0x98, 0xc8, 0xc5, 0xdc, 0xc5, 0x01, 0x98,
+ 0xf9, 0xc6, 0xd0, 0xb8, 0x01, 0x99, 0x00, 0x8b, 0x01, 0x99, 0x11, 0x91,
+ 0x01, 0x99, 0x18, 0xc2, 0x00, 0x10, 0x01, 0x99, 0x40, 0xc5, 0xdd, 0x1f,
+ 0x01, 0x99, 0x69, 0x94, 0x01, 0x9b, 0xa0, 0x0b, 0xc2, 0xbd, 0xed, 0xc3,
+ 0x02, 0xe6, 0x01, 0x9a, 0x29, 0xc4, 0xe4, 0x87, 0x01, 0x9a, 0x31, 0xc5,
+ 0xe2, 0x92, 0x01, 0x9a, 0x38, 0xc5, 0xdd, 0x1a, 0x01, 0x9a, 0x41, 0xc2,
+ 0x00, 0x47, 0x01, 0x9a, 0x4b, 0x02, 0xbd, 0xf9, 0x8e, 0x01, 0x9e, 0xa8,
+ 0xc2, 0x07, 0x69, 0x01, 0x9a, 0x5b, 0x02, 0xbd, 0xff, 0xc5, 0xc6, 0xdd,
+ 0x01, 0x9a, 0x68, 0x88, 0x01, 0x9c, 0x61, 0x89, 0x01, 0x9c, 0x69, 0x83,
+ 0x01, 0x9c, 0x11, 0x8e, 0x01, 0x9c, 0xa9, 0x8f, 0x01, 0x9c, 0xd9, 0x95,
+ 0x01, 0x9d, 0x91, 0x98, 0x01, 0x9d, 0xb1, 0x99, 0x01, 0x9d, 0xe0, 0x11,
+ 0xc2, 0xbe, 0x05, 0xc7, 0x01, 0xe9, 0x01, 0x9d, 0x09, 0xc5, 0xde, 0xaf,
+ 0x01, 0x9d, 0x28, 0xc6, 0x00, 0x3d, 0x01, 0x9e, 0xa0, 0x00, 0x42, 0xbe,
+ 0x14, 0xc5, 0x73, 0xe0, 0x01, 0x9d, 0xc8, 0xc5, 0x73, 0xe0, 0x01, 0x9d,
+ 0xf8, 0xc2, 0x00, 0x68, 0x01, 0x9a, 0x71, 0xc2, 0x04, 0x34, 0x01, 0x9a,
+ 0x78, 0x46, 0x1a, 0x91, 0xc2, 0xbe, 0x20, 0xc6, 0xd0, 0x58, 0x0f, 0x8d,
+ 0x48, 0xce, 0x70, 0x94, 0x0f, 0x8d, 0x29, 0x4f, 0x01, 0xf7, 0x42, 0xbe,
+ 0x2c, 0xcd, 0x81, 0x8d, 0x0f, 0x8d, 0x09, 0xcb, 0x9c, 0xa7, 0x0f, 0x8c,
+ 0xe0, 0xc2, 0x00, 0x06, 0x0f, 0x90, 0x99, 0xc2, 0x0e, 0xe5, 0x0f, 0x90,
+ 0x11, 0xc4, 0xe5, 0x2f, 0x0f, 0x90, 0x08, 0xd2, 0x47, 0x76, 0x0f, 0x8d,
+ 0x11, 0xc3, 0x32, 0xf0, 0x0f, 0x8c, 0xe8, 0x43, 0xec, 0xfc, 0xc2, 0xbe,
+ 0x94, 0x43, 0xec, 0xf0, 0xc2, 0xbe, 0xa0, 0x43, 0xec, 0xf6, 0xc2, 0xbe,
+ 0xd4, 0x43, 0xec, 0xf3, 0xc2, 0xbe, 0xf0, 0x43, 0xec, 0xf9, 0xc2, 0xbf,
+ 0x14, 0x44, 0xe6, 0x4b, 0x42, 0xbf, 0x26, 0x8d, 0x0f, 0x8c, 0xf1, 0xcf,
+ 0x0b, 0xf8, 0x01, 0x71, 0x60, 0xc9, 0x26, 0xef, 0x01, 0x21, 0x28, 0xc4,
+ 0x08, 0xdd, 0x01, 0x20, 0xa1, 0x16, 0xc2, 0xbf, 0x3c, 0xc3, 0x05, 0x17,
+ 0x01, 0x20, 0x88, 0xc6, 0x04, 0x1b, 0x01, 0x20, 0xc9, 0x16, 0x42, 0xbf,
+ 0x48, 0xc3, 0x1d, 0x55, 0x00, 0x43, 0x51, 0x42, 0x00, 0x30, 0xc2, 0xbf,
+ 0x57, 0xc2, 0x00, 0x9a, 0x00, 0x43, 0x39, 0xc3, 0x24, 0x3f, 0x00, 0x43,
+ 0x31, 0x10, 0xc2, 0xbf, 0x61, 0xc3, 0x1f, 0x50, 0x00, 0x43, 0x19, 0xc2,
+ 0x26, 0x94, 0x00, 0x43, 0x08, 0xc7, 0xcb, 0x96, 0x00, 0x39, 0x79, 0xc6,
+ 0xd5, 0x2c, 0x00, 0x39, 0x71, 0xc5, 0xdc, 0x0c, 0x00, 0x39, 0x68, 0xc9,
+ 0xab, 0xb6, 0x00, 0x38, 0xe0, 0xc2, 0x13, 0xfc, 0x00, 0x3a, 0x79, 0xc5,
+ 0xdd, 0xba, 0x00, 0x3a, 0x71, 0xc5, 0xda, 0x90, 0x00, 0x3a, 0x68, 0xc5,
+ 0x00, 0x34, 0x00, 0x39, 0xd9, 0xc5, 0x03, 0x50, 0x00, 0x39, 0xd0, 0x48,
+ 0x89, 0x04, 0x42, 0xbf, 0x71, 0xcc, 0x89, 0x04, 0x00, 0x38, 0x40, 0xd1,
+ 0x52, 0xce, 0x0f, 0xa8, 0x51, 0xce, 0x71, 0x12, 0x0f, 0xa8, 0x49, 0xd3,
+ 0x24, 0x91, 0x0f, 0xa8, 0x38, 0xd1, 0x57, 0x30, 0x01, 0x14, 0x59, 0xcb,
+ 0x25, 0x71, 0x01, 0x14, 0x3b, 0x02, 0xbf, 0x7d, 0x46, 0x03, 0x50, 0x42,
+ 0xbf, 0x83, 0xc4, 0x0d, 0xd3, 0x01, 0x56, 0xa1, 0xc6, 0x2d, 0xdf, 0x01,
+ 0x56, 0xb0, 0x90, 0x01, 0x03, 0xf9, 0x8b, 0x01, 0x03, 0x88, 0x8f, 0x00,
+ 0xdd, 0xf9, 0x8d, 0x00, 0xdd, 0xf0, 0x09, 0xc2, 0xbf, 0x9b, 0xc5, 0xda,
+ 0x31, 0x00, 0xdc, 0x00, 0xcf, 0x35, 0x23, 0x01, 0x56, 0x18, 0xcb, 0x0e,
+ 0xbc, 0x01, 0x56, 0x29, 0xce, 0x35, 0xda, 0x01, 0x56, 0x39, 0xcf, 0x66,
+ 0x45, 0x01, 0x56, 0x49, 0xcc, 0x23, 0x30, 0x01, 0x56, 0x58, 0x45, 0x00,
+ 0x39, 0x42, 0xbf, 0xa7, 0xc3, 0x0a, 0xa5, 0x0f, 0xb0, 0x39, 0xc4, 0x80,
+ 0x51, 0x0f, 0xb0, 0x41, 0xd0, 0x4f, 0xd2, 0x0f, 0xb0, 0x68, 0xcb, 0x1a,
+ 0x5f, 0x0f, 0xb0, 0x53, 0x02, 0xbf, 0xb9, 0xc9, 0xab, 0x65, 0x0f, 0xb0,
+ 0x70, 0x45, 0x01, 0xac, 0xc2, 0xbf, 0xbf, 0xc9, 0xb1, 0x8c, 0x01, 0x10,
+ 0x68, 0x83, 0x07, 0xf2, 0x81, 0xc9, 0xb4, 0x26, 0x07, 0xf3, 0x58, 0x46,
+ 0x01, 0xab, 0x42, 0xbf, 0xcb, 0xc3, 0x05, 0x17, 0x01, 0x0b, 0x83, 0x02,
+ 0xbf, 0xd7, 0x08, 0xc2, 0xbf, 0xdb, 0x16, 0xc2, 0xbf, 0xe5, 0x07, 0xc2,
+ 0xbf, 0xf5, 0xc4, 0x24, 0x35, 0x01, 0x0b, 0xc1, 0x15, 0x42, 0xc0, 0x01,
+ 0xcb, 0x1c, 0xe0, 0x07, 0xf2, 0xd1, 0xd6, 0x0a, 0xe8, 0x07, 0xf2, 0xf1,
+ 0xcd, 0x00, 0xd2, 0x07, 0xf2, 0xe0, 0xcb, 0x1c, 0xe0, 0x07, 0xf2, 0xc9,
+ 0xcd, 0x00, 0xd2, 0x07, 0xf2, 0xd9, 0xd6, 0x0a, 0xe8, 0x07, 0xf2, 0xe8,
+ 0xcb, 0x0e, 0xbc, 0x01, 0x55, 0x79, 0xcc, 0x23, 0x30, 0x01, 0x55, 0x88,
+ 0xc8, 0x08, 0xff, 0x01, 0x55, 0xa9, 0xcf, 0x66, 0x45, 0x01, 0x55, 0xc8,
+ 0xcb, 0x1c, 0xe0, 0x07, 0xf1, 0xa9, 0xd6, 0x0a, 0xe8, 0x07, 0xf1, 0xc9,
+ 0xd8, 0x23, 0x3c, 0x07, 0xf1, 0xd9, 0xd4, 0x3a, 0x82, 0x07, 0xf1, 0xe9,
+ 0xcd, 0x0f, 0x83, 0x07, 0xf1, 0xf9, 0x46, 0x01, 0x31, 0xc2, 0xc0, 0x0d,
+ 0xce, 0x26, 0x2e, 0x07, 0xf2, 0x39, 0x05, 0x42, 0xc0, 0x19, 0xcc, 0x00,
+ 0xd3, 0x07, 0xf1, 0xc1, 0xcd, 0x6a, 0x7f, 0x07, 0xf2, 0x10, 0x4e, 0x23,
+ 0x42, 0xc2, 0xc0, 0x25, 0xce, 0x6a, 0x7e, 0x07, 0xf2, 0x20, 0xc6, 0xd7,
+ 0xa2, 0x0f, 0x85, 0x11, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0x91, 0xc8, 0xba,
+ 0x2b, 0x0f, 0x86, 0x11, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0x90, 0xc6, 0xd7,
+ 0xa2, 0x0f, 0x85, 0x19, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0x99, 0xc8, 0xba,
+ 0x2b, 0x0f, 0x86, 0x19, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0x98, 0xc6, 0xd7,
+ 0xa2, 0x0f, 0x85, 0x51, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xd1, 0xc8, 0xba,
+ 0x2b, 0x0f, 0x86, 0x51, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xd0, 0xc3, 0xec,
+ 0x60, 0x0f, 0x87, 0x0b, 0x02, 0xc0, 0x31, 0xc3, 0xec, 0x5d, 0x0f, 0x87,
+ 0x13, 0x02, 0xc0, 0x59, 0xc3, 0xec, 0x57, 0x0f, 0x87, 0x19, 0xc3, 0xec,
+ 0x5a, 0x0f, 0x87, 0x21, 0xc3, 0xed, 0x56, 0x0f, 0x87, 0x29, 0xc3, 0xed,
+ 0x7d, 0x0f, 0x87, 0x31, 0xc3, 0xed, 0x7a, 0x0f, 0x87, 0x39, 0xc3, 0xed,
+ 0x77, 0x0f, 0x87, 0x41, 0xc3, 0xed, 0x74, 0x0f, 0x87, 0x48, 0x46, 0xc7,
+ 0xb5, 0xc2, 0xc0, 0x61, 0xc2, 0x01, 0xb5, 0x0f, 0x87, 0x00, 0xc6, 0xd7,
+ 0xa2, 0x0f, 0x85, 0x29, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xa9, 0xc8, 0xba,
+ 0x2b, 0x0f, 0x86, 0x29, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xa8, 0xc6, 0xd7,
+ 0xa2, 0x0f, 0x85, 0x31, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xb1, 0xc8, 0xba,
+ 0x2b, 0x0f, 0x86, 0x31, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xb0, 0xc6, 0xd7,
+ 0xa2, 0x0f, 0x85, 0x39, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xb9, 0xc8, 0xba,
+ 0x2b, 0x0f, 0x86, 0x39, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xb8, 0xc6, 0xd7,
+ 0xa2, 0x0f, 0x85, 0x61, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xe1, 0xc8, 0xba,
+ 0x2b, 0x0f, 0x86, 0x61, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xe0, 0xc6, 0xd7,
+ 0xa2, 0x0f, 0x85, 0x71, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xf1, 0xc8, 0xba,
+ 0x2b, 0x0f, 0x86, 0x71, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xf0, 0xcc, 0x83,
+ 0xb8, 0x01, 0x51, 0x39, 0xd1, 0x49, 0xff, 0x01, 0x51, 0x10, 0xc5, 0x00,
+ 0x34, 0x01, 0x51, 0x31, 0xc5, 0x03, 0x50, 0x01, 0x51, 0x20, 0xc2, 0x02,
+ 0x6a, 0x00, 0x04, 0x61, 0xc8, 0xb8, 0xb3, 0x00, 0x04, 0x61, 0xc4, 0x00,
+ 0x68, 0x00, 0x04, 0x59, 0xc7, 0x29, 0xba, 0x00, 0x04, 0x58, 0xc3, 0x15,
+ 0xaa, 0x01, 0x24, 0x39, 0xc3, 0x25, 0x4e, 0x01, 0x23, 0xf8, 0xc2, 0x00,
+ 0x39, 0x01, 0x90, 0x70, 0xc2, 0x00, 0x39, 0x01, 0x90, 0xc0, 0xc2, 0x00,
+ 0x39, 0x01, 0x90, 0x80, 0xc2, 0x00, 0x39, 0x01, 0x90, 0xc8, 0xc2, 0x00,
+ 0x39, 0x01, 0x90, 0x98, 0xc2, 0x00, 0x39, 0x01, 0x90, 0xd0, 0x00, 0x42,
+ 0xc0, 0x79, 0xc2, 0x00, 0x39, 0x01, 0x90, 0xb8, 0xc2, 0x00, 0x29, 0x01,
+ 0x91, 0x21, 0xc2, 0x01, 0xa5, 0x01, 0x91, 0x59, 0xc7, 0xcd, 0x10, 0x01,
+ 0x91, 0xb0, 0xc3, 0x15, 0xa8, 0x01, 0x91, 0x31, 0xc2, 0x01, 0x1f, 0x01,
+ 0x92, 0x10, 0x90, 0x01, 0x91, 0x81, 0xc7, 0xcb, 0x42, 0x01, 0x91, 0xe0,
+ 0xc3, 0x00, 0xba, 0x01, 0x91, 0x89, 0xc3, 0xeb, 0x31, 0x01, 0x91, 0xd8,
+ 0xc5, 0x56, 0xd6, 0x01, 0x91, 0xf1, 0x96, 0x01, 0x92, 0x08, 0xc6, 0x2a,
+ 0xb3, 0x08, 0xd7, 0xb0, 0x9b, 0x08, 0xd7, 0x21, 0x90, 0x08, 0xd7, 0x03,
+ 0x02, 0xc0, 0x81, 0x99, 0x08, 0xd7, 0x11, 0x8e, 0x08, 0xd7, 0x09, 0x8f,
+ 0x08, 0xd6, 0xf9, 0x96, 0x08, 0xd6, 0xf1, 0x8d, 0x08, 0xd6, 0xe9, 0x92,
+ 0x08, 0xd6, 0xe0, 0xc6, 0x2a, 0xb3, 0x08, 0xd7, 0x68, 0x19, 0xc2, 0xc0,
+ 0x85, 0xc2, 0x01, 0x04, 0x08, 0x43, 0xf1, 0xc4, 0x05, 0xde, 0x08, 0x43,
+ 0xd8, 0xc3, 0x0d, 0x8f, 0x08, 0x43, 0xe9, 0xc3, 0x08, 0xde, 0x08, 0x43,
+ 0xe0, 0x16, 0xc2, 0xc0, 0x8f, 0x15, 0xc2, 0xc0, 0x9b, 0xc4, 0x5d, 0xef,
+ 0x08, 0x43, 0xa1, 0xc4, 0xbc, 0xb7, 0x08, 0x43, 0x99, 0xc2, 0x03, 0x07,
+ 0x08, 0x43, 0x89, 0x03, 0xc2, 0xc0, 0xa5, 0xc3, 0x21, 0x00, 0x08, 0x43,
+ 0x71, 0xc9, 0xb3, 0x45, 0x08, 0x43, 0x69, 0xc3, 0x04, 0xae, 0x08, 0x43,
+ 0x61, 0xc6, 0xd7, 0x12, 0x08, 0x43, 0x59, 0xc4, 0xe5, 0x53, 0x08, 0x43,
+ 0x51, 0xc4, 0x4d, 0x48, 0x08, 0x43, 0x49, 0xc2, 0x00, 0x5b, 0x08, 0x43,
+ 0x23, 0x02, 0xc0, 0xb1, 0xc5, 0x4d, 0x42, 0x08, 0x43, 0x31, 0xc3, 0x7c,
+ 0xad, 0x08, 0x43, 0x29, 0xc6, 0x43, 0x0f, 0x08, 0x43, 0x19, 0xc5, 0x9e,
+ 0xbc, 0x08, 0x43, 0x11, 0xc4, 0xe5, 0xaf, 0x08, 0x43, 0x08, 0xc2, 0x14,
+ 0x15, 0x0b, 0x5c, 0x69, 0xc2, 0x00, 0x03, 0x0b, 0x5c, 0x31, 0xc4, 0xa8,
+ 0x13, 0x0b, 0x5b, 0xe8, 0xc3, 0x53, 0x44, 0x0b, 0x59, 0x59, 0xc3, 0xd2,
+ 0xda, 0x0b, 0x58, 0xe8, 0xc5, 0xdb, 0x44, 0x0b, 0x5b, 0xa8, 0xc4, 0xe6,
+ 0xbb, 0x0b, 0x59, 0xf9, 0xc3, 0x62, 0x0e, 0x0b, 0x59, 0xf1, 0xc3, 0x7d,
+ 0x39, 0x0b, 0x59, 0xe9, 0xc5, 0xe1, 0xc0, 0x0b, 0x59, 0xe0, 0xc3, 0x45,
+ 0x09, 0x0b, 0x59, 0xd1, 0xc2, 0x00, 0xeb, 0x0b, 0x59, 0xb8, 0xc8, 0xb9,
+ 0x73, 0x0b, 0x5b, 0x01, 0xc9, 0x70, 0x08, 0x0b, 0x5a, 0xe8, 0x04, 0xc2,
+ 0xc0, 0xb7, 0xcc, 0x82, 0xc8, 0x0f, 0xb2, 0x79, 0xcc, 0x88, 0x80, 0x0f,
+ 0xb2, 0x71, 0xc9, 0xac, 0xbb, 0x0f, 0xce, 0xa9, 0xc5, 0xde, 0x96, 0x0f,
+ 0xd6, 0x28, 0xe0, 0x02, 0x27, 0x0f, 0xb2, 0x60, 0xcb, 0x95, 0x2d, 0x0f,
+ 0xce, 0xb1, 0xce, 0x71, 0x74, 0x0f, 0xce, 0xc0, 0x91, 0x08, 0x48, 0xd1,
+ 0xc4, 0x15, 0xa9, 0x08, 0x48, 0xc0, 0xc9, 0x21, 0x1c, 0x05, 0x43, 0x98,
+ 0x83, 0x05, 0x42, 0x81, 0xc2, 0x01, 0x0e, 0x05, 0x42, 0x88, 0x83, 0x05,
+ 0x43, 0x49, 0xc2, 0x01, 0x0e, 0x05, 0x43, 0x50, 0xc2, 0x00, 0x4c, 0x05,
+ 0x43, 0x39, 0xc2, 0x1a, 0x36, 0x05, 0x43, 0x41, 0xc2, 0x00, 0x9a, 0x05,
+ 0x43, 0x88, 0xd4, 0x3b, 0x86, 0x08, 0x0f, 0xe8, 0xcf, 0x24, 0x35, 0x08,
+ 0xd8, 0x39, 0xcf, 0x64, 0xdd, 0x08, 0xd8, 0x30, 0xca, 0x1e, 0x1b, 0x08,
+ 0xd8, 0x28, 0xca, 0x1e, 0x1b, 0x08, 0xd8, 0x18, 0xc4, 0x21, 0x28, 0x00,
+ 0x4a, 0x69, 0xc5, 0x45, 0xcf, 0x00, 0x48, 0x18, 0xc7, 0x7d, 0xf8, 0x00,
+ 0x49, 0xe9, 0xc7, 0x10, 0xac, 0x00, 0x48, 0x10, 0x00, 0x42, 0xc0, 0xc3,
+ 0xc6, 0xc6, 0xf8, 0x05, 0x47, 0xe1, 0xd2, 0x4e, 0xd8, 0x05, 0x47, 0x90,
+ 0x94, 0x00, 0x4a, 0x20, 0x8e, 0x00, 0x4b, 0x18, 0x87, 0x00, 0x4a, 0xb8,
+ 0x83, 0x00, 0x49, 0xb1, 0x44, 0x2f, 0xae, 0x42, 0xc0, 0xd3, 0x8e, 0x00,
+ 0x48, 0x63, 0x02, 0xc0, 0xdf, 0x94, 0x00, 0x48, 0x5a, 0x02, 0xc0, 0xe3,
+ 0xc2, 0x00, 0x96, 0x00, 0x49, 0xa1, 0x83, 0x00, 0x49, 0x98, 0xc2, 0x01,
+ 0x01, 0x00, 0x49, 0x49, 0x83, 0x00, 0x49, 0x18, 0xc2, 0x01, 0x0e, 0x00,
+ 0x49, 0x11, 0x83, 0x00, 0x49, 0x09, 0x06, 0x42, 0xc0, 0xe7, 0xc2, 0x01,
+ 0x0e, 0x00, 0x49, 0x01, 0x83, 0x00, 0x48, 0xf8, 0x45, 0xa4, 0x94, 0x42,
+ 0xc0, 0xf1, 0x83, 0x00, 0x48, 0xc1, 0xc2, 0x01, 0x0e, 0x00, 0x4a, 0xd0,
+ 0x83, 0x00, 0x48, 0xb1, 0xc2, 0x01, 0x0e, 0x00, 0x4a, 0xc8, 0x87, 0x00,
+ 0x4b, 0xb8, 0xc4, 0x15, 0xa7, 0x00, 0x4b, 0x69, 0xc2, 0x22, 0x45, 0x00,
+ 0x4b, 0x60, 0xc3, 0x0d, 0x8f, 0x00, 0x4b, 0x59, 0xc3, 0x08, 0xde, 0x00,
+ 0x4b, 0x50, 0xc4, 0x05, 0xde, 0x00, 0x4b, 0x49, 0xc2, 0x0a, 0x20, 0x00,
+ 0x4b, 0x40, 0x8b, 0x08, 0x20, 0x01, 0x83, 0x08, 0x20, 0x13, 0x02, 0xc0,
+ 0xfd, 0x91, 0x08, 0x20, 0x23, 0x02, 0xc1, 0x01, 0x87, 0x08, 0x20, 0x08,
+ 0x8b, 0x08, 0x20, 0x31, 0x87, 0x08, 0x20, 0x39, 0x83, 0x08, 0x20, 0x43,
+ 0x02, 0xc1, 0x05, 0x91, 0x08, 0x20, 0x52, 0x02, 0xc1, 0x09, 0x99, 0x08,
+ 0x20, 0x69, 0x8b, 0x08, 0x21, 0x30, 0xc2, 0x01, 0xe6, 0x08, 0x20, 0x99,
+ 0xc3, 0x0f, 0x54, 0x08, 0x20, 0xe0, 0x88, 0x08, 0x20, 0xc9, 0xc2, 0x01,
+ 0x47, 0x08, 0x20, 0xd9, 0x95, 0x08, 0x20, 0xeb, 0x02, 0xc1, 0x0d, 0x94,
+ 0x08, 0x21, 0x09, 0x8e, 0x08, 0x21, 0x11, 0x8f, 0x08, 0x21, 0x19, 0x90,
+ 0x08, 0x21, 0x23, 0x02, 0xc1, 0x11, 0x99, 0x08, 0x21, 0x38, 0xc2, 0x01,
+ 0xe6, 0x08, 0x20, 0xf1, 0xc3, 0x0f, 0x54, 0x08, 0x21, 0x00, 0x8b, 0x08,
+ 0x21, 0x41, 0x87, 0x08, 0x21, 0x49, 0x83, 0x08, 0x21, 0x53, 0x02, 0xc1,
+ 0x15, 0x91, 0x08, 0x21, 0x62, 0x02, 0xc1, 0x19, 0x8b, 0x08, 0x21, 0x71,
+ 0x87, 0x08, 0x21, 0x79, 0x83, 0x08, 0x21, 0x83, 0x02, 0xc1, 0x1d, 0x91,
+ 0x08, 0x21, 0x92, 0x02, 0xc1, 0x21, 0x99, 0x08, 0x21, 0xa9, 0x8b, 0x08,
+ 0x22, 0x70, 0xc2, 0x01, 0xe6, 0x08, 0x21, 0xd9, 0xc3, 0x0f, 0x54, 0x08,
+ 0x22, 0x20, 0x88, 0x08, 0x22, 0x09, 0xc2, 0x01, 0x47, 0x08, 0x22, 0x19,
+ 0x95, 0x08, 0x22, 0x2b, 0x02, 0xc1, 0x25, 0x94, 0x08, 0x22, 0x49, 0x8e,
+ 0x08, 0x22, 0x51, 0x8f, 0x08, 0x22, 0x59, 0x90, 0x08, 0x22, 0x63, 0x02,
+ 0xc1, 0x29, 0x99, 0x08, 0x22, 0x78, 0xc2, 0x01, 0xe6, 0x08, 0x22, 0x31,
+ 0xc3, 0x0f, 0x54, 0x08, 0x22, 0x40, 0xc9, 0x11, 0x47, 0x01, 0x24, 0x71,
+ 0xc5, 0x00, 0xea, 0x0f, 0x88, 0x40, 0xc9, 0x11, 0x47, 0x01, 0x24, 0x69,
+ 0xc5, 0x00, 0xea, 0x0f, 0x88, 0x38, 0xc9, 0x11, 0x47, 0x01, 0x24, 0x61,
+ 0xc5, 0x00, 0xea, 0x0f, 0x88, 0x30, 0xc9, 0x11, 0x47, 0x01, 0x24, 0x59,
+ 0xc5, 0x00, 0xea, 0x0f, 0x88, 0x28, 0xc9, 0x11, 0x47, 0x01, 0x24, 0x51,
+ 0xc5, 0x00, 0xea, 0x0f, 0x88, 0x20, 0xc9, 0x11, 0x47, 0x01, 0x24, 0x49,
+ 0xc5, 0x00, 0xea, 0x0f, 0x88, 0x18, 0xc4, 0x15, 0xa7, 0x08, 0xca, 0xb9,
+ 0xc2, 0x22, 0x45, 0x08, 0xca, 0xb0, 0xc3, 0x0d, 0x8f, 0x08, 0xca, 0xa9,
+ 0xc3, 0x08, 0xde, 0x08, 0xca, 0xa0, 0xc4, 0x05, 0xde, 0x08, 0xca, 0x99,
+ 0xc2, 0x0a, 0x20, 0x08, 0xca, 0x90, 0x8b, 0x08, 0xc9, 0xb9, 0x83, 0x08,
+ 0xc9, 0x80, 0x97, 0x08, 0xc9, 0xa0, 0x8b, 0x08, 0xc9, 0x90, 0xc2, 0x01,
+ 0x0e, 0x08, 0xc8, 0xc9, 0x83, 0x08, 0xc8, 0xc0, 0xc4, 0x15, 0xa7, 0x01,
+ 0x3c, 0x81, 0xc2, 0x22, 0x45, 0x01, 0x3c, 0x78, 0xc3, 0x0d, 0x8f, 0x01,
+ 0x3c, 0x71, 0xc3, 0x08, 0xde, 0x01, 0x3c, 0x68, 0xc4, 0x05, 0xde, 0x01,
+ 0x3c, 0x61, 0xc2, 0x0a, 0x20, 0x01, 0x3c, 0x58, 0xc9, 0x35, 0x23, 0x0f,
+ 0xc8, 0x50, 0xc9, 0x35, 0x23, 0x0f, 0xc8, 0x58, 0x42, 0x00, 0x47, 0xc2,
+ 0xc1, 0x2d, 0x19, 0x42, 0xc1, 0x39, 0xcf, 0x60, 0xb0, 0x0f, 0xc2, 0x99,
+ 0xcc, 0x89, 0xac, 0x0f, 0xc1, 0xd8, 0x45, 0x11, 0x8e, 0xc2, 0xc1, 0x45,
+ 0x51, 0x02, 0x11, 0x42, 0xc1, 0x51, 0xc4, 0x00, 0x63, 0x01, 0x0c, 0x9b,
+ 0x02, 0xc1, 0x5d, 0xc5, 0xe0, 0x58, 0x01, 0x70, 0xa0, 0xda, 0x1d, 0x28,
+ 0x0f, 0xc4, 0xb8, 0xcb, 0x83, 0x05, 0x01, 0x0f, 0x19, 0xcb, 0x82, 0x81,
+ 0x01, 0x0e, 0x98, 0xc5, 0x00, 0x62, 0x01, 0x58, 0x39, 0xd3, 0x40, 0xc8,
+ 0x01, 0x5c, 0x58, 0xc9, 0xac, 0x07, 0x01, 0x3d, 0xf9, 0x47, 0x1f, 0x71,
+ 0xc2, 0xc1, 0x61, 0xca, 0xa3, 0x44, 0x01, 0x53, 0xa0, 0xc3, 0x02, 0x1d,
+ 0x01, 0x1f, 0xc3, 0x02, 0xc1, 0x6d, 0xc4, 0x03, 0x5d, 0x01, 0x00, 0xb0,
+ 0xc4, 0x14, 0x2e, 0x01, 0x16, 0x99, 0xc6, 0xc7, 0x53, 0x01, 0x57, 0x58,
+ 0xc8, 0x05, 0xbf, 0x01, 0x16, 0x91, 0xc4, 0x20, 0x0c, 0x01, 0x11, 0x60,
+ 0x17, 0xc2, 0xc1, 0x71, 0x46, 0x0c, 0x9d, 0xc2, 0xc1, 0x89, 0x16, 0xc2,
+ 0xc1, 0x95, 0xcf, 0x63, 0x2a, 0x01, 0x57, 0xe8, 0x14, 0xc2, 0xc1, 0xa1,
+ 0xc3, 0x21, 0x5f, 0x01, 0x4f, 0xd0, 0xc5, 0xd5, 0xa5, 0x01, 0x01, 0x09,
+ 0xc8, 0x32, 0x60, 0x01, 0x57, 0x50, 0xdd, 0x0f, 0xd6, 0x01, 0x00, 0xf9,
+ 0xc5, 0x57, 0xf7, 0x01, 0x72, 0x00, 0x11, 0xc2, 0xc1, 0xb0, 0xdc, 0x12,
+ 0xfe, 0x01, 0x4c, 0xa8, 0xc9, 0x09, 0xde, 0x01, 0x55, 0x0b, 0x02, 0xc1,
+ 0xc8, 0xcc, 0x06, 0xfb, 0x01, 0x55, 0x10, 0x8c, 0x01, 0x0a, 0x49, 0x8b,
+ 0x01, 0x0a, 0x41, 0x87, 0x01, 0x0a, 0x39, 0x86, 0x01, 0x0a, 0x30, 0x8b,
+ 0x01, 0x09, 0xf8, 0xc9, 0x09, 0xde, 0x01, 0x54, 0xd9, 0xcc, 0x06, 0xfb,
+ 0x01, 0x54, 0xe0, 0xc2, 0x01, 0x0e, 0x08, 0xc0, 0xb9, 0x83, 0x08, 0xc0,
+ 0xb0, 0xc2, 0x01, 0x0e, 0x08, 0xc0, 0xa9, 0x83, 0x08, 0xc0, 0xa0, 0xc4,
+ 0x00, 0x63, 0x0d, 0xe4, 0xc9, 0xc4, 0x34, 0x22, 0x0d, 0xe4, 0x80, 0xc7,
+ 0x29, 0x5e, 0x0d, 0xe3, 0x98, 0xc3, 0x03, 0x5e, 0x0d, 0xe4, 0xb1, 0xc9,
+ 0xb2, 0xc7, 0x0d, 0xe4, 0x98, 0xc5, 0x04, 0x62, 0x0d, 0xe3, 0xe0, 0xc2,
+ 0x01, 0x16, 0x0d, 0xe1, 0xa8, 0xc2, 0x01, 0x16, 0x0d, 0xe1, 0x98, 0xc2,
+ 0x00, 0x3d, 0x0d, 0xe1, 0x70, 0xc6, 0x00, 0x33, 0x0d, 0xe1, 0x30, 0xc2,
+ 0x01, 0x16, 0x0d, 0xe2, 0x00, 0x90, 0x0d, 0xe3, 0x49, 0x99, 0x0d, 0xe2,
0x10, 0x90, 0x0d, 0xe3, 0x39, 0x87, 0x0d, 0xe2, 0x71, 0x8a, 0x0d, 0xe2,
- 0x60, 0xc2, 0x00, 0x70, 0x0d, 0xe1, 0x88, 0xc9, 0x37, 0x1e, 0x0d, 0xe1,
- 0x78, 0xc2, 0x00, 0x70, 0x0d, 0xe1, 0x68, 0xd2, 0x4b, 0x66, 0x0d, 0xe1,
- 0x20, 0xc2, 0x00, 0x70, 0x0d, 0xe1, 0x60, 0xc2, 0x00, 0x70, 0x0d, 0xe1,
- 0x58, 0xd0, 0x59, 0x22, 0x01, 0x3e, 0x41, 0xd6, 0x2b, 0xa7, 0x01, 0x4f,
- 0xb9, 0xc8, 0x17, 0x45, 0x01, 0x4f, 0xa8, 0xc7, 0x0a, 0x61, 0x01, 0x16,
- 0x68, 0xc9, 0xb2, 0x3d, 0x0f, 0xac, 0x99, 0xc7, 0xc9, 0x8a, 0x0f, 0xac,
- 0x90, 0xcf, 0x00, 0xaf, 0x01, 0x80, 0xe8, 0xcc, 0x8c, 0x80, 0x01, 0x1d,
- 0x31, 0xc9, 0x4f, 0x27, 0x01, 0x1d, 0x29, 0xcc, 0x84, 0x70, 0x01, 0x1d,
- 0x21, 0x45, 0x00, 0x6c, 0x42, 0xc0, 0x87, 0x46, 0x00, 0x6b, 0x42, 0xc0,
- 0xa5, 0xcc, 0x8b, 0xc0, 0x01, 0x3f, 0xa1, 0xcc, 0x11, 0x65, 0x01, 0x0f,
- 0xa0, 0xc2, 0x00, 0x54, 0x01, 0x10, 0xfb, 0x02, 0xc0, 0xb1, 0xc9, 0xb5,
- 0xb8, 0x0f, 0xaf, 0x78, 0xd6, 0x31, 0xd7, 0x0f, 0xdb, 0xf1, 0xd6, 0x01,
- 0xd1, 0x0f, 0xdb, 0xf8, 0x44, 0x05, 0xf1, 0xc2, 0xc0, 0xb5, 0xc3, 0x01,
- 0xe4, 0x01, 0x2c, 0x80, 0xca, 0xa3, 0x6a, 0x01, 0x1d, 0x69, 0xcc, 0x81,
- 0xa0, 0x01, 0x1d, 0x61, 0xca, 0xa0, 0xf4, 0x01, 0x1d, 0x58, 0xc2, 0x00,
- 0x54, 0x01, 0x15, 0xfb, 0x02, 0xc0, 0xc1, 0xd6, 0x19, 0x02, 0x0f, 0xdb,
- 0x70, 0xcd, 0x40, 0x12, 0x0f, 0xdc, 0x41, 0xce, 0x0a, 0xb9, 0x0f, 0xdc,
- 0x50, 0xd6, 0x2f, 0xc7, 0x01, 0x4b, 0x81, 0xcc, 0x09, 0x52, 0x01, 0x80,
- 0x58, 0xcc, 0x02, 0x53, 0x01, 0x4c, 0x21, 0xcd, 0x66, 0x34, 0x01, 0x80,
- 0x78, 0xd9, 0x1d, 0x7a, 0x0f, 0xc4, 0xb1, 0xc9, 0xb4, 0x1a, 0x01, 0x0f,
- 0x80, 0xca, 0x03, 0x7d, 0x0f, 0xc4, 0x91, 0x48, 0x01, 0xef, 0x42, 0xc0,
- 0xc7, 0xc5, 0x01, 0x0f, 0x01, 0x0e, 0xd9, 0xca, 0x52, 0x78, 0x01, 0x48,
- 0x78, 0x46, 0x01, 0x0f, 0xc2, 0xc0, 0xdc, 0xd1, 0x52, 0x71, 0x01, 0x48,
- 0x80, 0xd6, 0x31, 0x95, 0x01, 0x0e, 0x61, 0x4a, 0x02, 0xd8, 0x42, 0xc0,
- 0xe8, 0xd5, 0x03, 0x72, 0x0f, 0xc0, 0xb1, 0x0e, 0xc2, 0xc0, 0xf4, 0x15,
- 0xc2, 0xc1, 0x00, 0x42, 0x01, 0x4a, 0xc2, 0xc1, 0x0c, 0xcf, 0x2c, 0x05,
- 0x01, 0x0f, 0xc1, 0xd0, 0x59, 0x42, 0x01, 0x0d, 0xa1, 0xc4, 0x02, 0x83,
- 0x01, 0x0d, 0x51, 0x16, 0xc2, 0xc1, 0x18, 0xca, 0xa2, 0x16, 0x01, 0x4a,
- 0x29, 0xd9, 0x1f, 0xeb, 0x0f, 0xc0, 0x31, 0xcc, 0x82, 0x48, 0x0f, 0xc4,
- 0xd0, 0x43, 0x11, 0xa6, 0xc2, 0xc1, 0x27, 0x47, 0x23, 0xd8, 0x42, 0xc1,
- 0x33, 0xd1, 0x55, 0x08, 0x01, 0x49, 0x00, 0x45, 0x00, 0x96, 0xc2, 0xc1,
- 0x43, 0x43, 0x00, 0x58, 0x42, 0xc1, 0x5b, 0x00, 0xc2, 0xc1, 0x61, 0xc5,
- 0x12, 0xea, 0x01, 0x48, 0xe0, 0xc9, 0x4f, 0xff, 0x01, 0x0c, 0x40, 0xc4,
- 0xe6, 0x47, 0x01, 0x0c, 0x00, 0x00, 0x42, 0xc1, 0x6d, 0x00, 0x42, 0xc1,
- 0x79, 0xe0, 0x00, 0x47, 0x0f, 0xac, 0xb0, 0x03, 0xc2, 0xc1, 0x85, 0xc2,
- 0x0d, 0xf7, 0x00, 0xb7, 0xb1, 0xc2, 0x03, 0x48, 0x00, 0xb7, 0xa9, 0xc2,
- 0x07, 0x43, 0x00, 0xb7, 0xa0, 0x49, 0xab, 0x62, 0x42, 0xc1, 0x8f, 0xc2,
- 0x04, 0x2a, 0x00, 0xb5, 0xa1, 0x83, 0x00, 0xb5, 0x90, 0xc3, 0x74, 0x9a,
- 0x00, 0xb6, 0xe0, 0xc2, 0x1e, 0x62, 0x00, 0xb7, 0x31, 0xc6, 0xd4, 0x6b,
- 0x00, 0xb6, 0xc1, 0xc5, 0xdf, 0x43, 0x00, 0xb6, 0x29, 0xc8, 0xbc, 0xcd,
- 0x00, 0xb5, 0xe1, 0xc5, 0x72, 0x78, 0x00, 0xb5, 0x60, 0xc3, 0x3d, 0x8f,
- 0x00, 0xb7, 0x21, 0x90, 0x00, 0xb5, 0x98, 0x8e, 0x00, 0xb6, 0xd9, 0x92,
- 0x00, 0xb6, 0xa1, 0x90, 0x00, 0xb6, 0x00, 0x94, 0x00, 0xb6, 0x21, 0xc9,
- 0xab, 0xaa, 0x00, 0xb5, 0xb8, 0x90, 0x05, 0x28, 0x08, 0x87, 0x05, 0x28,
- 0x11, 0x90, 0x05, 0x2f, 0x28, 0x90, 0x05, 0x29, 0x38, 0x90, 0x05, 0x2a,
- 0x68, 0x91, 0x05, 0x2b, 0x99, 0x90, 0x05, 0x2d, 0xf0, 0x90, 0x05, 0x2c,
- 0xc0, 0x87, 0x05, 0x28, 0x1b, 0x02, 0xc1, 0xc5, 0x90, 0x05, 0x2f, 0x38,
- 0x90, 0x05, 0x29, 0x48, 0x90, 0x05, 0x2a, 0x78, 0x91, 0x05, 0x2b, 0xa3,
- 0x02, 0xc1, 0xc9, 0x90, 0x05, 0x2e, 0x00, 0x90, 0x05, 0x2c, 0xd0, 0x87,
- 0x05, 0x28, 0x28, 0x91, 0x05, 0x2b, 0xb0, 0x87, 0x05, 0x2f, 0x4b, 0x02,
- 0xc1, 0xcd, 0x8b, 0x05, 0x29, 0x59, 0x83, 0x05, 0x2a, 0x89, 0x91, 0x05,
- 0x2e, 0x13, 0x02, 0xc1, 0xd1, 0x97, 0x05, 0x2c, 0xe0, 0x87, 0x05, 0x28,
- 0x38, 0x91, 0x05, 0x2b, 0xc0, 0x87, 0x05, 0x2f, 0x5b, 0x02, 0xc1, 0xd5,
- 0x8b, 0x05, 0x29, 0x69, 0x83, 0x05, 0x2a, 0x99, 0x91, 0x05, 0x2e, 0x23,
- 0x02, 0xc1, 0xd9, 0x97, 0x05, 0x2c, 0xf0, 0x87, 0x05, 0x2f, 0x73, 0x02,
- 0xc1, 0xdd, 0x8b, 0x05, 0x29, 0x79, 0x83, 0x05, 0x2a, 0xb1, 0x91, 0x05,
- 0x2e, 0x33, 0x02, 0xc1, 0xe1, 0x97, 0x05, 0x2d, 0x00, 0x87, 0x05, 0x29,
- 0x08, 0x91, 0x05, 0x2c, 0x90, 0x87, 0x05, 0x2f, 0x63, 0x02, 0xc1, 0xe5,
- 0x8b, 0x05, 0x29, 0x71, 0x83, 0x05, 0x2a, 0xa3, 0x02, 0xc1, 0xed, 0x91,
- 0x05, 0x2e, 0x2b, 0x02, 0xc1, 0xf1, 0x97, 0x05, 0x2c, 0xf8, 0x87, 0x05,
- 0x28, 0xf0, 0x90, 0x05, 0x2b, 0x58, 0x91, 0x05, 0x2c, 0x78, 0x87, 0x05,
- 0x2f, 0x7b, 0x02, 0xc1, 0xf5, 0x8b, 0x05, 0x29, 0x81, 0x83, 0x05, 0x2a,
- 0xb9, 0x91, 0x05, 0x2e, 0x3b, 0x02, 0xc1, 0xfd, 0x97, 0x05, 0x2d, 0x08,
- 0x87, 0x05, 0x29, 0x01, 0x90, 0x05, 0x30, 0x38, 0x91, 0x05, 0x2c, 0x88,
- 0x87, 0x05, 0x28, 0x60, 0x91, 0x05, 0x2b, 0xe8, 0x87, 0x05, 0x28, 0x68,
- 0x91, 0x05, 0x2b, 0xf0, 0x87, 0x05, 0x28, 0x70, 0x87, 0x05, 0x2f, 0xa3,
- 0x02, 0xc2, 0x05, 0x8b, 0x05, 0x29, 0xa1, 0x83, 0x05, 0x2a, 0xd9, 0x91,
- 0x05, 0x2e, 0x63, 0x02, 0xc2, 0x09, 0x97, 0x05, 0x2d, 0x28, 0x91, 0x05,
- 0x2b, 0xf8, 0x87, 0x05, 0x2f, 0xab, 0x02, 0xc2, 0x0d, 0x0a, 0xc2, 0xc2,
- 0x11, 0x8b, 0x05, 0x29, 0xa9, 0x83, 0x05, 0x2a, 0xe1, 0x91, 0x05, 0x2e,
- 0x6b, 0x02, 0xc2, 0x2b, 0x97, 0x05, 0x2d, 0x30, 0x87, 0x05, 0x28, 0xa0,
- 0x91, 0x05, 0x2c, 0x28, 0x87, 0x05, 0x28, 0x91, 0xc8, 0x4e, 0x0a, 0x05,
- 0x30, 0x60, 0x91, 0x05, 0x2c, 0x18, 0x87, 0x05, 0x28, 0x98, 0x91, 0x05,
- 0x2c, 0x20, 0x87, 0x05, 0x2f, 0xd3, 0x02, 0xc2, 0x2f, 0x8b, 0x05, 0x29,
- 0xd1, 0x83, 0x05, 0x2b, 0x09, 0x91, 0x05, 0x2e, 0x93, 0x02, 0xc2, 0x33,
- 0x97, 0x05, 0x2d, 0x58, 0x87, 0x05, 0x30, 0x0b, 0x02, 0xc2, 0x3d, 0x8b,
- 0x05, 0x2a, 0x09, 0x83, 0x05, 0x2b, 0x41, 0x91, 0x05, 0x2e, 0xcb, 0x02,
- 0xc2, 0x41, 0x97, 0x05, 0x2d, 0x90, 0x09, 0xc2, 0xc2, 0x45, 0xc2, 0x00,
- 0x57, 0x05, 0x2a, 0x59, 0xc2, 0x00, 0xb7, 0x05, 0x2d, 0xe1, 0xc2, 0x00,
- 0x4d, 0x05, 0x2f, 0x18, 0x87, 0x05, 0x29, 0x10, 0x87, 0x05, 0x30, 0x53,
- 0x02, 0xc2, 0x5f, 0x8b, 0x05, 0x2a, 0x41, 0x83, 0x05, 0x2b, 0x81, 0x91,
- 0x05, 0x2f, 0x03, 0x02, 0xc2, 0x63, 0x97, 0x05, 0x2d, 0xc8, 0x91, 0x05,
- 0x2c, 0x98, 0x87, 0x05, 0x28, 0xb0, 0x87, 0x05, 0x2f, 0xe3, 0x02, 0xc2,
- 0x67, 0x8b, 0x05, 0x29, 0xe1, 0x83, 0x05, 0x2b, 0x19, 0x91, 0x05, 0x2e,
- 0xa3, 0x02, 0xc2, 0x6b, 0x97, 0x05, 0x2d, 0x68, 0x91, 0x05, 0x2c, 0x38,
- 0x87, 0x05, 0x28, 0xc0, 0x87, 0x05, 0x2f, 0xf3, 0x02, 0xc2, 0x6f, 0x8b,
- 0x05, 0x29, 0xf1, 0x83, 0x05, 0x2b, 0x29, 0x91, 0x05, 0x2e, 0xb3, 0x02,
- 0xc2, 0x73, 0x97, 0x05, 0x2d, 0x78, 0x91, 0x05, 0x2c, 0x48, 0x87, 0x05,
- 0x28, 0xd0, 0x91, 0x05, 0x2c, 0x58, 0x87, 0x05, 0x28, 0xd8, 0x91, 0x05,
- 0x2c, 0x60, 0x87, 0x05, 0x28, 0xe8, 0x91, 0x05, 0x2c, 0x70, 0x90, 0x05,
- 0x2b, 0x90, 0xc3, 0x02, 0xa8, 0x05, 0x30, 0xd9, 0xc2, 0x49, 0xba, 0x05,
- 0x30, 0xf0, 0xca, 0x39, 0xef, 0x01, 0x1b, 0xf9, 0x47, 0x01, 0x7a, 0x42,
- 0xc2, 0x77, 0xc4, 0xb5, 0x00, 0x00, 0x04, 0x50, 0xca, 0x99, 0xff, 0x01,
- 0x81, 0x99, 0xca, 0x06, 0x08, 0x01, 0x81, 0xa8, 0xca, 0x9f, 0xb4, 0x00,
- 0xe7, 0x60, 0xce, 0x24, 0xb2, 0x70, 0x02, 0xd9, 0xcb, 0x1a, 0x3f, 0x70,
- 0x01, 0x41, 0xcd, 0x02, 0x52, 0x70, 0x03, 0xd8, 0x9c, 0x70, 0x02, 0xd1,
- 0x9b, 0x70, 0x02, 0xc9, 0x9a, 0x70, 0x02, 0xc1, 0x99, 0x70, 0x02, 0xb9,
- 0x98, 0x70, 0x02, 0xb1, 0x97, 0x70, 0x02, 0xa9, 0x96, 0x70, 0x02, 0xa1,
- 0x95, 0x70, 0x02, 0x99, 0x94, 0x70, 0x02, 0x91, 0x93, 0x70, 0x02, 0x89,
- 0x92, 0x70, 0x02, 0x81, 0x91, 0x70, 0x02, 0x79, 0x90, 0x70, 0x02, 0x71,
- 0x8f, 0x70, 0x02, 0x69, 0x8e, 0x70, 0x02, 0x61, 0x8d, 0x70, 0x02, 0x59,
- 0x8c, 0x70, 0x02, 0x51, 0x8b, 0x70, 0x02, 0x49, 0x8a, 0x70, 0x02, 0x41,
- 0x89, 0x70, 0x02, 0x39, 0x88, 0x70, 0x02, 0x31, 0x87, 0x70, 0x02, 0x29,
- 0x86, 0x70, 0x02, 0x21, 0x85, 0x70, 0x02, 0x19, 0x84, 0x70, 0x02, 0x11,
- 0x83, 0x70, 0x02, 0x08, 0x9c, 0x70, 0x03, 0xd1, 0x9b, 0x70, 0x03, 0xc9,
- 0x9a, 0x70, 0x03, 0xc1, 0x99, 0x70, 0x03, 0xb9, 0x98, 0x70, 0x03, 0xb1,
- 0x97, 0x70, 0x03, 0xa9, 0x96, 0x70, 0x03, 0xa1, 0x95, 0x70, 0x03, 0x99,
- 0x94, 0x70, 0x03, 0x91, 0x93, 0x70, 0x03, 0x89, 0x92, 0x70, 0x03, 0x81,
- 0x91, 0x70, 0x03, 0x79, 0x90, 0x70, 0x03, 0x71, 0x8f, 0x70, 0x03, 0x69,
- 0x8e, 0x70, 0x03, 0x61, 0x8d, 0x70, 0x03, 0x59, 0x8c, 0x70, 0x03, 0x51,
- 0x8b, 0x70, 0x03, 0x49, 0x8a, 0x70, 0x03, 0x41, 0x89, 0x70, 0x03, 0x39,
- 0x88, 0x70, 0x03, 0x31, 0x87, 0x70, 0x03, 0x29, 0x86, 0x70, 0x03, 0x21,
- 0x85, 0x70, 0x03, 0x19, 0x84, 0x70, 0x03, 0x11, 0x83, 0x70, 0x03, 0x08,
- 0xc9, 0xb5, 0xe5, 0x70, 0x02, 0x01, 0x83, 0x70, 0x01, 0x60, 0xc4, 0x18,
- 0x83, 0x70, 0x01, 0xb9, 0xc2, 0x26, 0x51, 0x70, 0x01, 0xb0, 0xc3, 0x0c,
- 0x5b, 0x70, 0x01, 0xa9, 0xc3, 0x06, 0x9e, 0x70, 0x01, 0xa0, 0xc4, 0x04,
- 0x5e, 0x70, 0x01, 0x99, 0xc2, 0x01, 0x47, 0x70, 0x01, 0x90, 0x23, 0xc2,
- 0xc2, 0x83, 0x22, 0xc2, 0xc2, 0xa7, 0x21, 0xc2, 0xc2, 0xcf, 0x20, 0xc2,
- 0xc2, 0xf7, 0x1f, 0xc2, 0xc3, 0x1f, 0x1e, 0xc2, 0xc3, 0x47, 0x1d, 0x42,
- 0xc3, 0x6f, 0x26, 0xc2, 0xc3, 0x97, 0x25, 0xc2, 0xc3, 0xbf, 0x24, 0xc2,
- 0xc3, 0xe7, 0x23, 0xc2, 0xc4, 0x0f, 0x22, 0xc2, 0xc4, 0x37, 0x21, 0xc2,
- 0xc4, 0x5f, 0x20, 0xc2, 0xc4, 0x87, 0x1f, 0xc2, 0xc4, 0xaf, 0x1e, 0xc2,
- 0xc4, 0xd7, 0x1d, 0x42, 0xc4, 0xff, 0x26, 0xc2, 0xc5, 0x27, 0x25, 0xc2,
- 0xc5, 0x4f, 0x24, 0xc2, 0xc5, 0x77, 0x23, 0xc2, 0xc5, 0x9f, 0x22, 0xc2,
- 0xc5, 0xc7, 0x21, 0xc2, 0xc5, 0xef, 0x20, 0xc2, 0xc6, 0x17, 0x1f, 0xc2,
- 0xc6, 0x3f, 0x1e, 0xc2, 0xc6, 0x67, 0x1d, 0x42, 0xc6, 0x8f, 0x26, 0xc2,
- 0xc6, 0xb7, 0x25, 0xc2, 0xc6, 0xdf, 0x24, 0xc2, 0xc7, 0x07, 0x23, 0xc2,
- 0xc7, 0x2f, 0x22, 0xc2, 0xc7, 0x57, 0x21, 0xc2, 0xc7, 0x7f, 0x20, 0xc2,
- 0xc7, 0xa7, 0x1f, 0xc2, 0xc7, 0xcf, 0x1e, 0xc2, 0xc7, 0xf7, 0x1d, 0x42,
- 0xc8, 0x1f, 0x26, 0xc2, 0xc8, 0x47, 0x25, 0xc2, 0xc8, 0x6f, 0x24, 0xc2,
- 0xc8, 0x97, 0x23, 0xc2, 0xc8, 0xbf, 0x22, 0xc2, 0xc8, 0xe7, 0x21, 0xc2,
- 0xc9, 0x0f, 0x20, 0xc2, 0xc9, 0x37, 0x1f, 0xc2, 0xc9, 0x5f, 0x1e, 0xc2,
- 0xc9, 0x87, 0x1d, 0x42, 0xc9, 0xaf, 0x26, 0xc2, 0xc9, 0xd7, 0x25, 0xc2,
- 0xc9, 0xff, 0x24, 0xc2, 0xca, 0x27, 0x23, 0xc2, 0xca, 0x4f, 0x22, 0xc2,
- 0xca, 0x77, 0x21, 0xc2, 0xca, 0x9f, 0x20, 0xc2, 0xca, 0xc7, 0x1f, 0xc2,
- 0xca, 0xef, 0x1e, 0xc2, 0xcb, 0x17, 0x1d, 0x42, 0xcb, 0x3f, 0x26, 0xc2,
- 0xcb, 0x67, 0x25, 0xc2, 0xcb, 0x8f, 0x24, 0xc2, 0xcb, 0xb7, 0x23, 0xc2,
- 0xcb, 0xdf, 0x22, 0xc2, 0xcc, 0x07, 0x21, 0xc2, 0xcc, 0x2f, 0x20, 0xc2,
- 0xcc, 0x57, 0x1f, 0xc2, 0xcc, 0x7f, 0x1e, 0xc2, 0xcc, 0xa7, 0x1d, 0x42,
- 0xcc, 0xcf, 0x26, 0xc2, 0xcc, 0xf7, 0x25, 0xc2, 0xcd, 0x1f, 0x24, 0xc2,
- 0xcd, 0x47, 0x23, 0xc2, 0xcd, 0x6f, 0x22, 0xc2, 0xcd, 0x97, 0x21, 0xc2,
- 0xcd, 0xbf, 0x20, 0xc2, 0xcd, 0xe7, 0x1f, 0xc2, 0xce, 0x0f, 0x1e, 0xc2,
- 0xce, 0x37, 0x1d, 0x42, 0xce, 0x5f, 0xc4, 0x18, 0x83, 0x0b, 0x56, 0x39,
- 0xc2, 0x26, 0x51, 0x0b, 0x56, 0x30, 0xc3, 0x0c, 0x5b, 0x0b, 0x56, 0x29,
- 0xc3, 0x06, 0x9e, 0x0b, 0x56, 0x20, 0xc4, 0x04, 0x5e, 0x0b, 0x56, 0x19,
- 0xc2, 0x01, 0x47, 0x0b, 0x56, 0x10, 0xc2, 0x00, 0xa4, 0x0b, 0x55, 0xe9,
- 0x83, 0x0b, 0x55, 0xa8, 0xc2, 0x00, 0xa4, 0x0b, 0x55, 0xe1, 0x83, 0x0b,
- 0x55, 0x88, 0x83, 0x0b, 0x55, 0xd9, 0xc7, 0xb4, 0x3f, 0x0b, 0x54, 0x80,
- 0xc2, 0x00, 0xa4, 0x0b, 0x55, 0xc9, 0xc2, 0x0c, 0x65, 0x0b, 0x55, 0xb1,
- 0x83, 0x0b, 0x55, 0x80, 0x16, 0xc2, 0xce, 0x83, 0x83, 0x0b, 0x55, 0x68,
- 0xc2, 0x00, 0xa4, 0x0b, 0x55, 0xb9, 0x83, 0x0b, 0x55, 0x10, 0x0a, 0xc2,
- 0xce, 0x8d, 0x83, 0x0b, 0x55, 0x20, 0xc2, 0x00, 0xa4, 0x0b, 0x55, 0x99,
- 0x83, 0x0b, 0x55, 0x61, 0xc2, 0x1d, 0x5f, 0x0b, 0x55, 0x41, 0xc2, 0x01,
- 0x29, 0x0b, 0x55, 0x18, 0x83, 0x0b, 0x55, 0x71, 0xc7, 0xc5, 0xc4, 0x0b,
- 0x54, 0x88, 0x83, 0x0b, 0x55, 0x59, 0x9a, 0x0b, 0x54, 0xf9, 0x93, 0x0b,
- 0x54, 0xf1, 0x85, 0x0b, 0x54, 0xe9, 0x9c, 0x0b, 0x54, 0xe0, 0xc2, 0x00,
- 0xa4, 0x0b, 0x55, 0x49, 0x83, 0x0b, 0x55, 0x38, 0xc2, 0x00, 0xa4, 0x0b,
- 0x55, 0x09, 0x83, 0x0b, 0x55, 0x00, 0x0b, 0xc2, 0xce, 0x97, 0x07, 0xc2,
- 0xce, 0xab, 0x9a, 0x0b, 0x54, 0x39, 0x93, 0x0b, 0x54, 0x31, 0x85, 0x0b,
- 0x54, 0x29, 0x9c, 0x0b, 0x54, 0x20, 0x19, 0xc2, 0xce, 0xbb, 0x9a, 0x0b,
- 0x53, 0xb9, 0x93, 0x0b, 0x53, 0xb1, 0x85, 0x0b, 0x53, 0xa9, 0x9c, 0x0b,
- 0x53, 0xa0, 0x9a, 0x0b, 0x54, 0x19, 0x93, 0x0b, 0x54, 0x11, 0x85, 0x0b,
- 0x54, 0x09, 0x9c, 0x0b, 0x54, 0x00, 0x9a, 0x0b, 0x53, 0xf9, 0x93, 0x0b,
- 0x53, 0xf1, 0x85, 0x0b, 0x53, 0xe9, 0x9c, 0x0b, 0x53, 0xe0, 0x9a, 0x0b,
- 0x53, 0xd9, 0x93, 0x0b, 0x53, 0xd1, 0x85, 0x0b, 0x53, 0xc9, 0x9c, 0x0b,
- 0x53, 0xc0, 0x9a, 0x0b, 0x53, 0x99, 0x93, 0x0b, 0x53, 0x91, 0x85, 0x0b,
- 0x53, 0x89, 0x9c, 0x0b, 0x53, 0x80, 0x03, 0xc2, 0xce, 0xcb, 0xc3, 0x12,
- 0x58, 0x08, 0xff, 0x19, 0x0b, 0x42, 0xce, 0xd7, 0xc7, 0xc8, 0x56, 0x08,
- 0xff, 0x81, 0xc7, 0xca, 0x01, 0x08, 0xfe, 0xe1, 0xc9, 0xaf, 0x49, 0x08,
- 0xfe, 0xc8, 0x17, 0xc2, 0xce, 0xe3, 0xc4, 0x48, 0x94, 0x08, 0xfe, 0xe8,
- 0x03, 0xc2, 0xce, 0xef, 0xc2, 0x00, 0xb7, 0x08, 0xfe, 0xf8, 0xc8, 0xb8,
- 0xa5, 0x08, 0xfe, 0xb9, 0xc7, 0x11, 0x41, 0x00, 0x5c, 0x10, 0x83, 0x00,
- 0x5c, 0x31, 0x8b, 0x00, 0x5c, 0x81, 0x97, 0x00, 0x5c, 0xa0, 0x8b, 0x00,
- 0x5c, 0x40, 0x97, 0x00, 0x5c, 0x50, 0x87, 0x00, 0x5c, 0x78, 0x91, 0x00,
- 0x5c, 0x98, 0xc2, 0x01, 0x29, 0x00, 0x5c, 0xc9, 0xc2, 0x1d, 0x5f, 0x00,
- 0x5c, 0xf1, 0x10, 0xc2, 0xcf, 0x01, 0x83, 0x00, 0x5d, 0x40, 0xc2, 0x00,
- 0xb3, 0x00, 0x5c, 0xf9, 0x83, 0x00, 0x5d, 0x20, 0x83, 0x00, 0x5d, 0x81,
- 0xc2, 0x02, 0x59, 0x00, 0x5d, 0x88, 0x83, 0x00, 0x5d, 0x91, 0x0e, 0x42,
- 0xcf, 0x0b, 0xc2, 0x00, 0xa4, 0x00, 0x5d, 0xb1, 0xc2, 0x0c, 0x65, 0x00,
- 0x5d, 0xb9, 0x83, 0x00, 0x5d, 0xc0, 0xc2, 0x01, 0x47, 0x00, 0x5f, 0x41,
- 0xc4, 0x04, 0x5e, 0x00, 0x5f, 0x48, 0xc3, 0x06, 0x9e, 0x00, 0x5f, 0x51,
- 0xc3, 0x0c, 0x5b, 0x00, 0x5f, 0x58, 0xc2, 0x26, 0x51, 0x00, 0x5f, 0x61,
- 0xc4, 0x18, 0x83, 0x00, 0x5f, 0x68, 0xc6, 0xa7, 0x60, 0x08, 0xfe, 0x71,
- 0xc9, 0xae, 0xb0, 0x08, 0xfe, 0x38, 0x9f, 0x08, 0xfe, 0x91, 0x9e, 0x08,
- 0xfe, 0x88, 0xc4, 0x9c, 0x2b, 0x08, 0xfe, 0x79, 0xc7, 0xc7, 0x76, 0x08,
- 0xfe, 0x20, 0x8a, 0x08, 0xfe, 0x61, 0xc4, 0x1f, 0x51, 0x08, 0xfe, 0x10,
- 0xc4, 0x0a, 0x04, 0x08, 0xfe, 0x59, 0xc8, 0x1f, 0x4d, 0x08, 0xfe, 0x41,
- 0x0a, 0x42, 0xcf, 0x15, 0x46, 0xd4, 0x05, 0xc2, 0xcf, 0x21, 0xc8, 0xae,
- 0x9e, 0x08, 0xfe, 0x18, 0xc2, 0x00, 0xa4, 0x08, 0xb4, 0xb9, 0x83, 0x08,
- 0xb4, 0xb0, 0xc2, 0x00, 0xa4, 0x08, 0xb4, 0xa9, 0x83, 0x08, 0xb4, 0xa0,
- 0xc3, 0x6c, 0xa7, 0x00, 0xd5, 0x58, 0xc3, 0x6c, 0xa7, 0x00, 0xd5, 0x48,
- 0xca, 0x9e, 0xec, 0x00, 0xd3, 0xe1, 0x46, 0x27, 0x0c, 0x42, 0xcf, 0x29,
- 0xc4, 0x62, 0xed, 0x00, 0xd2, 0xc0, 0x83, 0x00, 0xd2, 0xe1, 0x46, 0x2e,
- 0xcf, 0x42, 0xcf, 0x35, 0xc5, 0x2e, 0x7e, 0x00, 0xd2, 0xd1, 0xca, 0xa4,
- 0xbe, 0x00, 0xd2, 0xb8, 0xc5, 0x00, 0x95, 0x00, 0xd3, 0x99, 0xc5, 0x01,
- 0x62, 0x00, 0xd3, 0x60, 0x87, 0x00, 0xd3, 0x40, 0x87, 0x00, 0xd2, 0x98,
- 0xc2, 0x00, 0xa4, 0x00, 0xd2, 0x61, 0xc2, 0x1d, 0x5f, 0x00, 0xd1, 0xf9,
- 0x12, 0xc2, 0xcf, 0x41, 0xc2, 0x00, 0x67, 0x00, 0xd1, 0xe1, 0x16, 0xc2,
- 0xcf, 0x4b, 0xc5, 0x3b, 0x66, 0x00, 0xd1, 0x81, 0x05, 0xc2, 0xcf, 0x55,
- 0xc2, 0x0c, 0x65, 0x00, 0xd1, 0x51, 0x0d, 0x42, 0xcf, 0x5f, 0x83, 0x00,
- 0xd2, 0x41, 0xc2, 0x0c, 0x65, 0x00, 0xd2, 0x39, 0xc2, 0x00, 0xa4, 0x00,
- 0xd2, 0x30, 0xc2, 0x00, 0xa4, 0x00, 0xd1, 0xc9, 0x83, 0x00, 0xd1, 0xc0,
- 0xc2, 0x00, 0xa4, 0x00, 0xd1, 0x99, 0x83, 0x00, 0xd1, 0x90, 0xc2, 0x00,
- 0xa4, 0x00, 0xd1, 0x41, 0x83, 0x00, 0xd1, 0x38, 0xc2, 0x96, 0xd0, 0x00,
- 0xd1, 0x11, 0xc2, 0x00, 0xa4, 0x00, 0xd1, 0x09, 0x83, 0x00, 0xd1, 0x00,
- 0xc2, 0x00, 0xc1, 0x00, 0xd1, 0x89, 0xc2, 0x00, 0xb3, 0x00, 0xd1, 0x68,
- 0x83, 0x05, 0x55, 0xc8, 0xc2, 0x00, 0xf6, 0x05, 0x54, 0xf9, 0x91, 0x05,
- 0x54, 0xe8, 0x91, 0x05, 0x54, 0xc9, 0xc2, 0x13, 0x4f, 0x05, 0x54, 0x49,
- 0xc2, 0x20, 0x67, 0x05, 0x54, 0x88, 0xc2, 0x00, 0xf6, 0x05, 0x54, 0xb9,
- 0x91, 0x05, 0x54, 0xa8, 0x91, 0x05, 0x54, 0x59, 0xc2, 0x00, 0xf6, 0x05,
- 0x54, 0x68, 0x0a, 0xc2, 0xcf, 0x6f, 0x91, 0x05, 0x54, 0x08, 0xc2, 0x00,
- 0xf6, 0x05, 0x54, 0xf1, 0x91, 0x05, 0x54, 0xe0, 0x91, 0x05, 0x54, 0xc1,
- 0xc2, 0x13, 0x4f, 0x05, 0x54, 0x41, 0xc2, 0x20, 0x67, 0x05, 0x54, 0x80,
- 0xc2, 0x00, 0xf6, 0x05, 0x54, 0xb1, 0x91, 0x05, 0x54, 0xa0, 0xc2, 0x00,
- 0xf6, 0x05, 0x54, 0x61, 0x91, 0x05, 0x54, 0x50, 0x0a, 0xc2, 0xcf, 0x79,
- 0x91, 0x05, 0x54, 0x00, 0xd5, 0x03, 0x72, 0x01, 0x5c, 0xd1, 0xc9, 0x03,
- 0x7e, 0x01, 0x3d, 0x10, 0xc2, 0x12, 0x0a, 0x00, 0x3c, 0xd8, 0xc4, 0xdd,
- 0x41, 0x00, 0x3c, 0xf9, 0xc6, 0xb2, 0xd0, 0x00, 0x3c, 0x88, 0xc4, 0x58,
- 0x1f, 0x00, 0x3c, 0xe9, 0xc7, 0xb2, 0xcf, 0x00, 0x3c, 0x08, 0xc6, 0xb2,
- 0xd0, 0x00, 0x3c, 0x91, 0x83, 0x00, 0x3c, 0xe0, 0xc5, 0xde, 0xf3, 0x00,
- 0x70, 0x09, 0x42, 0x00, 0xf6, 0x42, 0xcf, 0x83, 0xc6, 0xd2, 0x67, 0x00,
- 0x70, 0x39, 0x43, 0xcd, 0x8e, 0xc2, 0xcf, 0x8d, 0xc7, 0xc8, 0x33, 0x00,
- 0x72, 0x68, 0xc2, 0x00, 0x57, 0x00, 0x70, 0x43, 0x02, 0xcf, 0x97, 0xc3,
- 0x00, 0x34, 0x00, 0x70, 0x49, 0xc2, 0x1b, 0xd8, 0x00, 0x70, 0x60, 0x42,
- 0x01, 0x48, 0xc2, 0xcf, 0x9b, 0x44, 0x11, 0x45, 0x42, 0xcf, 0xa5, 0x43,
- 0xe7, 0xa8, 0xc2, 0xcf, 0xc2, 0xc7, 0xb9, 0x36, 0x00, 0x72, 0x70, 0xc5,
- 0xd8, 0xbd, 0x00, 0x70, 0x71, 0xc3, 0x13, 0xa4, 0x00, 0x70, 0xa0, 0x42,
- 0x01, 0x48, 0xc2, 0xcf, 0xce, 0x0a, 0x42, 0xcf, 0xda, 0xc5, 0xdf, 0x07,
- 0x00, 0x70, 0xd9, 0x0a, 0xc2, 0xcf, 0xe6, 0xc8, 0xb7, 0x05, 0x00, 0x71,
- 0x78, 0xc3, 0x06, 0xcd, 0x00, 0x70, 0xeb, 0x02, 0xcf, 0xf2, 0xc5, 0xd9,
- 0xa8, 0x00, 0x72, 0x78, 0xc4, 0x41, 0xff, 0x00, 0x71, 0x09, 0x42, 0x00,
- 0xda, 0x42, 0xcf, 0xf6, 0xc5, 0xdf, 0x0c, 0x00, 0x71, 0x19, 0x97, 0x00,
- 0x71, 0x20, 0x42, 0x01, 0x48, 0xc2, 0xd0, 0x06, 0x97, 0x00, 0x71, 0x31,
- 0xca, 0xa6, 0x30, 0x00, 0x72, 0x28, 0xc3, 0x00, 0x3d, 0x00, 0x71, 0x59,
- 0xc6, 0xcd, 0x51, 0x00, 0x71, 0x70, 0xc2, 0x0f, 0xf5, 0x0f, 0x15, 0x61,
- 0x87, 0x0f, 0x15, 0x3b, 0x02, 0xd0, 0x12, 0x8b, 0x0f, 0x15, 0x12, 0x02,
- 0xd0, 0x16, 0xc6, 0x77, 0xb7, 0x0e, 0x98, 0xf1, 0xc3, 0x06, 0xcf, 0x0e,
- 0x98, 0xa9, 0xc7, 0xc4, 0xa5, 0x0e, 0x98, 0x58, 0xc5, 0xd7, 0x37, 0x0e,
- 0x99, 0x61, 0xc6, 0xd0, 0xab, 0x0e, 0x98, 0xd8, 0xca, 0x9f, 0x78, 0x0f,
- 0xab, 0xe0, 0xd1, 0x54, 0x4d, 0x00, 0x60, 0x01, 0xce, 0x11, 0x40, 0x00,
- 0x60, 0x20, 0x83, 0x00, 0x60, 0x31, 0x8b, 0x00, 0x60, 0x81, 0x97, 0x00,
- 0x60, 0xa0, 0x8b, 0x00, 0x60, 0x40, 0x97, 0x00, 0x60, 0x50, 0x47, 0xac,
- 0xc2, 0xc2, 0xd0, 0x1a, 0x83, 0x00, 0x61, 0xa8, 0x87, 0x00, 0x60, 0x78,
- 0x91, 0x00, 0x60, 0x98, 0x83, 0x00, 0x60, 0xa9, 0xc2, 0x00, 0xa4, 0x00,
- 0x60, 0xb0, 0x83, 0x00, 0x60, 0xb9, 0xc2, 0x00, 0xa4, 0x00, 0x60, 0xc0,
- 0xc2, 0x01, 0x29, 0x00, 0x60, 0xc9, 0xc2, 0x1d, 0x5f, 0x00, 0x60, 0xf1,
- 0xc2, 0x00, 0xc1, 0x00, 0x61, 0x19, 0x83, 0x00, 0x61, 0x42, 0x02, 0xd0,
- 0x28, 0x83, 0x00, 0x60, 0xd1, 0xc2, 0x00, 0xa4, 0x00, 0x60, 0xd8, 0x83,
- 0x00, 0x60, 0xe1, 0xc2, 0x00, 0xa4, 0x00, 0x60, 0xe8, 0x16, 0xc2, 0xd0,
- 0x2e, 0x83, 0x00, 0x61, 0x21, 0xc2, 0x00, 0xa4, 0x00, 0x61, 0x29, 0xc2,
- 0x0c, 0x65, 0x00, 0x62, 0xc0, 0x06, 0xc2, 0xd0, 0x38, 0x83, 0x00, 0x61,
- 0x31, 0xc2, 0x00, 0xa4, 0x00, 0x61, 0x39, 0xc2, 0x02, 0xb4, 0x00, 0x62,
- 0xc8, 0x83, 0x00, 0x61, 0x51, 0xc2, 0x00, 0xa4, 0x00, 0x61, 0x58, 0x83,
- 0x00, 0x61, 0x61, 0xc2, 0x00, 0xa4, 0x00, 0x61, 0x68, 0x83, 0x00, 0x61,
- 0x81, 0x14, 0x42, 0xd0, 0x42, 0x83, 0x00, 0x61, 0x91, 0x0e, 0x42, 0xd0,
- 0x4c, 0xc2, 0x00, 0xa4, 0x00, 0x61, 0xb1, 0xc2, 0x0c, 0x65, 0x00, 0x61,
- 0xb9, 0x83, 0x00, 0x61, 0xc0, 0x94, 0x00, 0x62, 0x20, 0x8e, 0x00, 0x63,
- 0x18, 0xd2, 0x15, 0xdc, 0x00, 0x63, 0xd1, 0xd3, 0x40, 0xdd, 0x00, 0x63,
- 0xe8, 0xd2, 0x15, 0xdc, 0x00, 0x63, 0xd9, 0xd3, 0x40, 0xdd, 0x00, 0x63,
- 0xf0, 0xd0, 0x01, 0xf7, 0x01, 0x4b, 0x91, 0xcf, 0x0b, 0x98, 0x01, 0x5a,
- 0x48, 0xcb, 0x93, 0x34, 0x01, 0x53, 0x59, 0xc9, 0x17, 0x7a, 0x01, 0x53,
- 0x50, 0x8e, 0x08, 0xa5, 0xc0, 0x94, 0x08, 0xa5, 0xb0, 0x8e, 0x08, 0xa4,
- 0x4b, 0x02, 0xd0, 0x56, 0x94, 0x08, 0xa4, 0x3a, 0x02, 0xd0, 0x5a, 0xc2,
- 0x00, 0xa4, 0x08, 0xa4, 0xe1, 0x83, 0x08, 0xa4, 0xd8, 0xc2, 0x00, 0xa4,
- 0x08, 0xa4, 0xd1, 0x83, 0x08, 0xa4, 0xc8, 0xca, 0xa0, 0xc2, 0x00, 0x7e,
- 0x38, 0xc9, 0xa9, 0x61, 0x00, 0x7e, 0x31, 0xc6, 0xd1, 0xdd, 0x00, 0x7e,
- 0x40, 0x00, 0x42, 0xd0, 0x5e, 0x45, 0xd8, 0x9f, 0xc2, 0xd0, 0x70, 0x44,
- 0xe5, 0x1b, 0x42, 0xd0, 0x7a, 0x83, 0x00, 0x7c, 0x81, 0xc2, 0x00, 0xa4,
- 0x00, 0x7c, 0x89, 0xc3, 0x1a, 0x80, 0x00, 0x7d, 0xc8, 0x83, 0x00, 0x7c,
- 0x91, 0xc2, 0x00, 0xa4, 0x00, 0x7c, 0x98, 0xc2, 0x01, 0x29, 0x00, 0x7c,
- 0xa1, 0xc2, 0x1d, 0x5f, 0x00, 0x7c, 0xc9, 0xc2, 0x00, 0xc1, 0x00, 0x7c,
- 0xf1, 0x83, 0x00, 0x7d, 0x18, 0x83, 0x00, 0x7c, 0xa9, 0xc2, 0x00, 0xa4,
- 0x00, 0x7c, 0xb0, 0x16, 0xc2, 0xd0, 0x84, 0x83, 0x00, 0x7c, 0xf9, 0xc2,
- 0x00, 0xa4, 0x00, 0x7d, 0x01, 0x15, 0x42, 0xd0, 0x8e, 0x06, 0xc2, 0xd0,
- 0x98, 0x83, 0x00, 0x7d, 0x09, 0xc2, 0x00, 0xa4, 0x00, 0x7d, 0x11, 0x1c,
- 0x42, 0xd0, 0xa2, 0x83, 0x00, 0x7d, 0x21, 0xc2, 0x00, 0xa4, 0x00, 0x7d,
- 0x28, 0x83, 0x00, 0x7d, 0x31, 0xc2, 0x00, 0xa4, 0x00, 0x7d, 0x38, 0xc2,
- 0x00, 0xa4, 0x00, 0x7d, 0x71, 0x83, 0x00, 0x7d, 0x78, 0xc2, 0x00, 0xa4,
- 0x00, 0x7d, 0xa1, 0xc2, 0x0c, 0x65, 0x00, 0x7d, 0xa9, 0x83, 0x00, 0x7d,
- 0xb0, 0xc2, 0x01, 0x09, 0x00, 0x7d, 0xd1, 0xc2, 0x1d, 0x5f, 0x00, 0x7d,
- 0xd9, 0xc2, 0x02, 0x59, 0x00, 0x7d, 0xe0, 0xcb, 0x95, 0x39, 0x00, 0x78,
- 0x09, 0x44, 0xe5, 0xbb, 0x42, 0xd0, 0xac, 0xcb, 0x8d, 0xb4, 0x00, 0x78,
- 0x99, 0xcc, 0x78, 0xc4, 0x00, 0x79, 0xb0, 0xca, 0x9c, 0x3a, 0x00, 0x78,
- 0x49, 0xd4, 0x3a, 0x49, 0x00, 0x7e, 0x80, 0xc5, 0x02, 0x27, 0x00, 0x78,
- 0x80, 0x83, 0x00, 0x7a, 0x51, 0xc2, 0x00, 0xa4, 0x00, 0x7a, 0x58, 0x83,
- 0x00, 0x7a, 0xc9, 0xc2, 0x00, 0xa4, 0x00, 0x7a, 0xd0, 0x83, 0x00, 0x7a,
- 0x61, 0xc2, 0x00, 0xa4, 0x00, 0x7a, 0x68, 0x83, 0x00, 0x7a, 0xd9, 0xc2,
- 0x00, 0xa4, 0x00, 0x7a, 0xe0, 0x8a, 0x01, 0x69, 0x90, 0x8a, 0x01, 0x6a,
- 0xb2, 0x02, 0xd0, 0xb8, 0x8a, 0x01, 0x69, 0xc1, 0x86, 0x01, 0x69, 0xca,
- 0x02, 0xd0, 0xbc, 0x8a, 0x01, 0x6a, 0x2a, 0x02, 0xd0, 0xc0, 0x8a, 0x01,
- 0x6a, 0x18, 0x8a, 0x01, 0x6a, 0x51, 0x9c, 0x01, 0x6b, 0x28, 0x94, 0x01,
- 0x6a, 0xa8, 0x95, 0x01, 0x6a, 0xd1, 0x8a, 0x01, 0x6a, 0xd8, 0x8a, 0x01,
- 0x6a, 0xe9, 0x96, 0x01, 0x6a, 0xf8, 0x8a, 0x01, 0x6a, 0x30, 0x90, 0x01,
- 0x6a, 0x81, 0x8a, 0x01, 0x6a, 0xb8, 0x19, 0xc2, 0xd0, 0xc4, 0xcf, 0x62,
- 0xae, 0x00, 0x46, 0xc9, 0xc4, 0x19, 0x9d, 0x00, 0x37, 0x69, 0xc4, 0xe0,
- 0x57, 0x00, 0x37, 0x18, 0x19, 0xc2, 0xd0, 0xd0, 0x15, 0xc2, 0xd0, 0xdc,
- 0x08, 0xc2, 0xd0, 0xee, 0xc4, 0x3a, 0x8e, 0x00, 0x37, 0x43, 0x02, 0xd0,
- 0xfa, 0xc3, 0x0f, 0x60, 0x00, 0x46, 0xb9, 0xc3, 0x04, 0x6c, 0x00, 0x46,
- 0xb1, 0x42, 0x02, 0xb4, 0xc2, 0xd1, 0x00, 0xc3, 0x2e, 0x60, 0x00, 0x37,
- 0x3b, 0x02, 0xd1, 0x0a, 0x0f, 0xc2, 0xd1, 0x10, 0xd4, 0x3c, 0x65, 0x00,
- 0x37, 0x09, 0xd8, 0x22, 0x08, 0x00, 0x37, 0x01, 0xcc, 0x87, 0xd0, 0x00,
- 0x36, 0xf9, 0x16, 0xc2, 0xd1, 0x1c, 0xc4, 0x2d, 0xfe, 0x00, 0x36, 0xd1,
- 0x0e, 0x42, 0xd1, 0x28, 0xc7, 0xb9, 0x7e, 0x00, 0x46, 0x49, 0xc3, 0x00,
- 0xa3, 0x00, 0x30, 0xc0, 0x00, 0x42, 0xd1, 0x34, 0xc5, 0x01, 0x62, 0x07,
- 0xde, 0x09, 0xc5, 0x00, 0x95, 0x07, 0xde, 0x00, 0x48, 0x01, 0x47, 0xc2,
- 0xd1, 0x46, 0x4a, 0x04, 0x5e, 0x42, 0xd1, 0x58, 0xd7, 0x28, 0x54, 0x07,
- 0xdd, 0xe1, 0x42, 0x00, 0xed, 0x42, 0xd1, 0x6a, 0xc5, 0x01, 0x62, 0x07,
- 0xdd, 0xd9, 0xc5, 0x00, 0x95, 0x07, 0xdd, 0xd0, 0x46, 0xd2, 0xeb, 0xc2,
- 0xd1, 0x76, 0x03, 0x42, 0xd1, 0x82, 0xcf, 0x63, 0xad, 0x00, 0x30, 0x99,
- 0xd0, 0x59, 0xf2, 0x00, 0x30, 0x90, 0xcd, 0x05, 0x7a, 0x07, 0xf3, 0xe1,
- 0xcb, 0x66, 0x54, 0x07, 0xf3, 0xe8, 0x49, 0x19, 0x9e, 0xc2, 0xd1, 0x9a,
- 0xce, 0x72, 0xa1, 0x07, 0xef, 0xd8, 0x48, 0x19, 0xa8, 0xc2, 0xd1, 0xb2,
- 0x48, 0xac, 0x68, 0x42, 0xd1, 0xca, 0x0a, 0xc2, 0xd1, 0xe8, 0x49, 0xb1,
- 0xe3, 0xc2, 0xd1, 0xf4, 0x03, 0xc2, 0xd2, 0x1c, 0xd4, 0x3b, 0xc5, 0x07,
- 0xef, 0xf0, 0x44, 0x2e, 0x60, 0xc2, 0xd2, 0x26, 0x45, 0x19, 0x9d, 0xc2,
- 0xd2, 0x32, 0x46, 0x2d, 0xfe, 0xc2, 0xd2, 0x3c, 0x4d, 0x08, 0x1a, 0x42,
- 0xd2, 0x48, 0x48, 0x95, 0xd6, 0xc2, 0xd2, 0x54, 0x0e, 0xc2, 0xd2, 0x6c,
- 0xd2, 0x4e, 0x6c, 0x07, 0xef, 0x99, 0xcb, 0x8d, 0xca, 0x07, 0xef, 0xf8,
- 0x03, 0xc2, 0xd2, 0x7e, 0x0a, 0xc2, 0xd2, 0x8a, 0x48, 0xac, 0x68, 0x42,
- 0xd2, 0x96, 0x0a, 0xc2, 0xd2, 0xca, 0x45, 0x19, 0x9d, 0xc2, 0xd2, 0xd4,
- 0x44, 0x2e, 0x60, 0xc2, 0xd2, 0xea, 0x4d, 0x08, 0x1a, 0xc2, 0xd2, 0xf6,
- 0x46, 0x51, 0xe9, 0xc2, 0xd3, 0x02, 0x45, 0x2d, 0xfe, 0xc2, 0xd3, 0x0e,
- 0xce, 0x6f, 0x91, 0x07, 0xe4, 0x89, 0xcf, 0x69, 0x5c, 0x07, 0xe4, 0x91,
- 0xcf, 0x62, 0x81, 0x07, 0xe4, 0xa0, 0x0a, 0xc2, 0xd3, 0x18, 0x44, 0x2e,
- 0x60, 0xc2, 0xd3, 0x24, 0x4d, 0x08, 0x1a, 0xc2, 0xd3, 0x30, 0x45, 0x19,
- 0x9d, 0xc2, 0xd3, 0x3c, 0x46, 0x51, 0xe9, 0xc2, 0xd3, 0x52, 0x45, 0x2d,
- 0xfe, 0xc2, 0xd3, 0x5e, 0xce, 0x6f, 0x91, 0x07, 0xe4, 0x51, 0xcf, 0x69,
- 0x5c, 0x07, 0xe4, 0x59, 0xcf, 0x62, 0x81, 0x07, 0xe4, 0x68, 0x48, 0x0f,
- 0x61, 0xc2, 0xd3, 0x68, 0x49, 0x19, 0xa7, 0x42, 0xd3, 0x92, 0x44, 0x2e,
- 0x60, 0xc2, 0xd3, 0xb0, 0x45, 0x08, 0x1a, 0xc2, 0xd3, 0xbc, 0x45, 0x19,
- 0x9d, 0xc2, 0xd3, 0xd4, 0x45, 0x51, 0xe9, 0xc2, 0xd3, 0xea, 0x0a, 0xc2,
- 0xd3, 0xf4, 0x45, 0x2d, 0xfe, 0x42, 0xd4, 0x00, 0x03, 0xc2, 0xd4, 0x0a,
- 0xcd, 0x7b, 0xe9, 0x07, 0xea, 0x58, 0x44, 0x2e, 0x60, 0xc2, 0xd4, 0x16,
- 0x4d, 0x08, 0x1a, 0xc2, 0xd4, 0x22, 0x45, 0x19, 0x9d, 0xc2, 0xd4, 0x2e,
- 0x45, 0x51, 0xe9, 0xc2, 0xd4, 0x38, 0x45, 0x51, 0xea, 0xc2, 0xd4, 0x42,
- 0x46, 0x2d, 0xfe, 0x42, 0xd4, 0x4e, 0x48, 0xac, 0x68, 0xc2, 0xd4, 0x5a,
- 0xdc, 0x14, 0x8a, 0x07, 0xef, 0xe8, 0x46, 0x2e, 0x61, 0xc2, 0xd4, 0x8e,
- 0x03, 0x42, 0xd4, 0x94, 0x49, 0x19, 0x9e, 0xc2, 0xd4, 0xa9, 0xd5, 0x32,
- 0x6b, 0x07, 0xef, 0xa0, 0x0b, 0xc2, 0xd4, 0xcd, 0xcb, 0x66, 0x54, 0x07,
- 0xe9, 0xd8, 0x46, 0x55, 0x4c, 0xc2, 0xd4, 0xd9, 0x45, 0x51, 0xe9, 0xc2,
- 0xd4, 0xe5, 0x44, 0x19, 0xa7, 0xc2, 0xd4, 0xef, 0x46, 0x2d, 0xfe, 0xc2,
- 0xd4, 0xf9, 0x44, 0x6f, 0x91, 0xc2, 0xd5, 0x05, 0x4d, 0x08, 0x1a, 0xc2,
- 0xd5, 0x11, 0x44, 0x2e, 0x60, 0x42, 0xd5, 0x1d, 0x60, 0x09, 0x87, 0x42,
- 0xd5, 0x29, 0xc5, 0x01, 0x62, 0x00, 0x47, 0xc9, 0xc5, 0x00, 0x95, 0x00,
- 0x47, 0xb8, 0x08, 0xc2, 0xd5, 0x33, 0x09, 0xc2, 0xd5, 0x45, 0x0e, 0xc2,
- 0xd5, 0x66, 0x42, 0x1b, 0xa5, 0xc2, 0xd5, 0x75, 0x03, 0xc2, 0xd5, 0x85,
- 0x0d, 0xc2, 0xd5, 0xa1, 0x16, 0xc2, 0xd5, 0xbd, 0xc3, 0xdd, 0x9b, 0x00,
- 0x33, 0xf3, 0x02, 0xd5, 0xe5, 0x1b, 0xc2, 0xd5, 0xf2, 0x14, 0xc2, 0xd6,
- 0x02, 0x42, 0x00, 0x48, 0xc2, 0xd6, 0x23, 0x97, 0x00, 0x36, 0x3b, 0x02,
- 0xd6, 0x33, 0xc3, 0x0f, 0x60, 0x00, 0x32, 0x13, 0x02, 0xd6, 0x3d, 0x87,
- 0x00, 0x36, 0x83, 0x02, 0xd6, 0x41, 0x42, 0x02, 0xb4, 0xc2, 0xd6, 0x45,
- 0x15, 0xc2, 0xd6, 0x55, 0x06, 0xc2, 0xd6, 0x82, 0xc2, 0x00, 0x7b, 0x00,
- 0x36, 0x5b, 0x02, 0xd6, 0xa4, 0xc3, 0x78, 0xa9, 0x00, 0x32, 0x43, 0x02,
- 0xd6, 0xaf, 0x0f, 0xc2, 0xd6, 0xb3, 0xc2, 0x1b, 0xd8, 0x00, 0x36, 0x33,
- 0x02, 0xd6, 0xc2, 0x10, 0xc2, 0xd6, 0xc6, 0x0a, 0x42, 0xd6, 0xdf, 0xd3,
- 0x45, 0xc3, 0x00, 0x46, 0x91, 0xc5, 0x01, 0x62, 0x00, 0x46, 0x79, 0xc5,
- 0x00, 0x95, 0x00, 0x46, 0x70, 0x11, 0xc2, 0xd6, 0xf5, 0x03, 0x42, 0xd7,
- 0x01, 0xc3, 0x01, 0x1e, 0x0f, 0x70, 0x01, 0xc2, 0x00, 0x34, 0x0f, 0x70,
- 0x78, 0xc2, 0x00, 0x34, 0x0f, 0x70, 0x31, 0x8a, 0x0f, 0x70, 0xd0, 0x03,
- 0xc2, 0xd7, 0x09, 0xc2, 0x0d, 0xf7, 0x0f, 0x70, 0xa9, 0x0a, 0x42, 0xd7,
- 0x13, 0xc2, 0x0f, 0x61, 0x0f, 0x70, 0x51, 0xc3, 0x19, 0xa7, 0x0f, 0x70,
- 0xb8, 0xc2, 0x00, 0xc2, 0x0f, 0x70, 0x59, 0x46, 0xce, 0x35, 0x42, 0xd7,
- 0x1f, 0xc3, 0x00, 0x4c, 0x0f, 0x70, 0x71, 0xc4, 0xe0, 0xfb, 0x0f, 0x70,
- 0xa1, 0x49, 0x9e, 0xf6, 0xc2, 0xd7, 0x83, 0xc2, 0x01, 0xf2, 0x0f, 0x70,
- 0x88, 0xc3, 0x88, 0x60, 0x0f, 0x71, 0x09, 0xc4, 0x2d, 0xfe, 0x0f, 0x71,
- 0x11, 0x0a, 0xc2, 0xd7, 0xd3, 0xc3, 0x2e, 0x60, 0x0f, 0x71, 0x49, 0x0d,
- 0xc2, 0xd7, 0xdf, 0xc3, 0x0f, 0x69, 0x0f, 0x71, 0x59, 0xc4, 0x19, 0x9d,
- 0x0f, 0x71, 0x61, 0xc4, 0x3a, 0x8e, 0x0f, 0x71, 0x69, 0x15, 0xc2, 0xd7,
- 0xeb, 0xc3, 0x04, 0x6c, 0x0f, 0x71, 0x79, 0xc3, 0xb2, 0x7c, 0x0f, 0x71,
- 0x81, 0xc3, 0x0f, 0x60, 0x0f, 0x71, 0x91, 0x16, 0xc2, 0xd7, 0xfd, 0xc3,
- 0xb5, 0x3a, 0x0f, 0x71, 0xc9, 0xc5, 0x95, 0xd3, 0x0f, 0x71, 0xd8, 0xda,
- 0x19, 0x9d, 0x0f, 0x77, 0x81, 0xcc, 0x8d, 0x34, 0x0f, 0x77, 0x88, 0x00,
- 0xc2, 0xd8, 0x09, 0xc3, 0x14, 0xa9, 0x00, 0x32, 0x62, 0x02, 0xd8, 0x1b,
- 0xc9, 0x37, 0x1e, 0x00, 0x47, 0xe0, 0xc9, 0x37, 0x1e, 0x00, 0x47, 0xe8,
- 0x45, 0x00, 0x6c, 0xc2, 0xd8, 0x21, 0xcd, 0x05, 0x7a, 0x07, 0xf3, 0xb1,
- 0xcb, 0x66, 0x54, 0x07, 0xf3, 0xb8, 0xce, 0x05, 0x79, 0x07, 0xf3, 0x80,
- 0xc5, 0x01, 0x62, 0x00, 0x47, 0x79, 0xc5, 0x00, 0x95, 0x00, 0x47, 0x60,
- 0xc5, 0x01, 0x62, 0x00, 0x47, 0x71, 0xc5, 0x00, 0x95, 0x00, 0x47, 0x58,
- 0xc5, 0x01, 0x62, 0x00, 0x47, 0x69, 0xc5, 0x00, 0x95, 0x00, 0x47, 0x50,
- 0x46, 0x00, 0x6b, 0x42, 0xd8, 0x33, 0xc3, 0x14, 0xa9, 0x00, 0x47, 0x48,
- 0xc3, 0x14, 0xa9, 0x00, 0x47, 0x40, 0xc3, 0x14, 0xa9, 0x00, 0x47, 0x38,
- 0x83, 0x00, 0x2b, 0xc9, 0xc2, 0x0d, 0xf7, 0x00, 0x2b, 0x98, 0x83, 0x00,
- 0x2a, 0x49, 0xc2, 0x0d, 0xf7, 0x00, 0x2a, 0x18, 0x9f, 0x0f, 0xbb, 0x31,
- 0xa0, 0x0f, 0xbb, 0x39, 0xa1, 0x0f, 0xbb, 0x41, 0xa2, 0x0f, 0xbb, 0x48,
- 0xc2, 0xe7, 0x79, 0x0f, 0xb9, 0x20, 0xa1, 0x0f, 0xb9, 0xa9, 0x9f, 0x0f,
- 0xb9, 0x99, 0xa0, 0x0f, 0xb9, 0xa0, 0xc8, 0x85, 0x40, 0x0f, 0xb9, 0x83,
- 0x02, 0xd8, 0x45, 0xc4, 0x1c, 0x64, 0x0f, 0xb8, 0xf8, 0x9f, 0x0f, 0xb8,
- 0x59, 0xa0, 0x0f, 0xb8, 0x60, 0x48, 0xba, 0x55, 0xc2, 0xd8, 0x4b, 0xc8,
- 0x85, 0x40, 0x0f, 0xb9, 0x61, 0xc6, 0x48, 0x2a, 0x0f, 0xb9, 0x10, 0xc8,
- 0x85, 0x40, 0x0f, 0xb9, 0x69, 0xd2, 0x48, 0x2a, 0x0f, 0xb9, 0x30, 0xc2,
- 0xe7, 0x79, 0x0f, 0xb8, 0x48, 0xc2, 0xe7, 0x79, 0x0f, 0xb8, 0x38, 0x84,
- 0x0a, 0x21, 0xa1, 0x83, 0x0a, 0x21, 0x98, 0x83, 0x0a, 0x21, 0x88, 0x83,
- 0x0a, 0x21, 0x60, 0x83, 0x0a, 0x21, 0x48, 0x83, 0x0a, 0x20, 0xd8, 0x83,
- 0x0a, 0x20, 0x50, 0x83, 0x0a, 0x22, 0x49, 0x84, 0x0a, 0x22, 0x51, 0x85,
- 0x0a, 0x22, 0x58, 0x83, 0x0a, 0x23, 0x58, 0x83, 0x0a, 0x23, 0x68, 0x83,
- 0x0a, 0x23, 0x80, 0x83, 0x0a, 0x23, 0x90, 0x83, 0x0a, 0x23, 0xa0, 0x83,
- 0x0a, 0x23, 0xb9, 0x84, 0x0a, 0x23, 0xc1, 0x85, 0x0a, 0x23, 0xc8, 0x83,
- 0x0a, 0x23, 0xd9, 0x84, 0x0a, 0x23, 0xe0, 0x83, 0x0a, 0x23, 0xf9, 0x84,
- 0x0a, 0x24, 0x01, 0x85, 0x0a, 0x24, 0x08, 0x83, 0x0a, 0x24, 0x29, 0x84,
- 0x0a, 0x24, 0x30, 0x83, 0x0a, 0x24, 0x60, 0x83, 0x0a, 0x24, 0xb8, 0x83,
- 0x0a, 0x25, 0x10, 0x83, 0x0a, 0x27, 0x31, 0x84, 0x0a, 0x27, 0x38, 0x83,
- 0x0a, 0x27, 0x68, 0x83, 0x0a, 0x27, 0x80, 0x83, 0x0a, 0x27, 0xb8, 0x83,
- 0x0a, 0x27, 0xc8, 0x83, 0x0a, 0x28, 0x28, 0x83, 0x0a, 0x29, 0x70, 0x83,
- 0x0a, 0x2a, 0x28, 0x83, 0x0a, 0x2a, 0x58, 0x83, 0x0a, 0x2a, 0x88, 0x83,
- 0x0a, 0x2a, 0xe0, 0x83, 0x0a, 0x2b, 0x88, 0x83, 0x0a, 0x2b, 0xa1, 0x84,
- 0x0a, 0x2b, 0xa9, 0x85, 0x0a, 0x2b, 0xb0, 0x83, 0x0a, 0x2b, 0xd9, 0x84,
- 0x0a, 0x2b, 0xe1, 0x85, 0x0a, 0x2b, 0xe8, 0x83, 0x0a, 0x2c, 0xa8, 0x83,
- 0x0a, 0x2c, 0xd8, 0x83, 0x0a, 0x2d, 0x00, 0x83, 0x0a, 0x2d, 0x20, 0x83,
- 0x0a, 0x2d, 0x78, 0xc9, 0xb1, 0xd1, 0x0a, 0x2d, 0x89, 0x83, 0x0a, 0x2d,
- 0x90, 0x83, 0x0a, 0x2d, 0xb0, 0xd4, 0x3c, 0xb5, 0x0a, 0x2e, 0x71, 0xd3,
- 0x42, 0xf1, 0x0a, 0x2e, 0x78, 0x83, 0x0a, 0x2f, 0xc0, 0x83, 0x0a, 0x30,
- 0x00, 0xc4, 0x0e, 0x40, 0x01, 0x1b, 0x01, 0xc5, 0x01, 0x7b, 0x01, 0x19,
- 0xe0, 0x43, 0x02, 0xc7, 0xc2, 0xd8, 0x57, 0xc2, 0x01, 0x63, 0x01, 0x1a,
- 0xa3, 0x02, 0xd8, 0x63, 0x0b, 0x42, 0xd8, 0x69, 0xc6, 0xce, 0xdd, 0x01,
- 0x1a, 0x99, 0xcb, 0x01, 0x09, 0x01, 0x1a, 0x80, 0xcd, 0x0b, 0x9a, 0x01,
- 0x1a, 0x39, 0xc7, 0x05, 0x88, 0x01, 0x1a, 0x18, 0xc3, 0xaa, 0xf4, 0x01,
- 0x1a, 0x71, 0xc8, 0x4f, 0x39, 0x01, 0x1a, 0x50, 0xd0, 0x59, 0x92, 0x01,
- 0x12, 0x90, 0x00, 0x42, 0xd8, 0x75, 0xc9, 0x4f, 0xff, 0x08, 0x09, 0x68,
- 0xc9, 0x4f, 0xff, 0x08, 0x09, 0x60, 0x00, 0x42, 0xd8, 0x81, 0x00, 0x42,
- 0xd8, 0x8d, 0xc9, 0x4f, 0xff, 0x08, 0x09, 0x78, 0x00, 0x42, 0xd8, 0x99,
- 0xc9, 0x4f, 0xff, 0x08, 0x09, 0x70, 0xc7, 0x0c, 0x4b, 0x08, 0x08, 0xf1,
- 0xc8, 0x50, 0x00, 0x08, 0x09, 0x38, 0xc9, 0x4f, 0xff, 0x08, 0x09, 0x80,
- 0xc7, 0x0c, 0x4b, 0x08, 0x08, 0xf9, 0xc8, 0x50, 0x00, 0x08, 0x09, 0x40,
- 0xc9, 0x4f, 0xff, 0x08, 0x09, 0x88, 0xd5, 0x37, 0xd5, 0x0f, 0xdd, 0x78,
- 0x48, 0x1d, 0xe5, 0xc2, 0xd8, 0xa5, 0x11, 0x42, 0xd8, 0xbd, 0x45, 0x00,
- 0x56, 0x42, 0xd8, 0xcc, 0xd0, 0x5c, 0xb2, 0x01, 0x2b, 0xe0, 0x47, 0x53,
- 0xb4, 0xc2, 0xd8, 0xdc, 0x49, 0x41, 0x75, 0x42, 0xd8, 0xe8, 0x45, 0x00,
- 0x56, 0x42, 0xd8, 0xf4, 0xc8, 0x03, 0x3b, 0x01, 0x28, 0x51, 0xca, 0x00,
- 0xf6, 0x01, 0x28, 0x40, 0xc8, 0x03, 0x3b, 0x01, 0x28, 0x31, 0xca, 0x00,
- 0xf6, 0x01, 0x28, 0x20, 0xce, 0x74, 0xdf, 0x01, 0x2a, 0x51, 0xc8, 0x11,
- 0x71, 0x01, 0x29, 0xd1, 0xca, 0x10, 0x32, 0x01, 0x29, 0x90, 0xce, 0x74,
- 0xc3, 0x01, 0x29, 0xe9, 0xc8, 0x10, 0x47, 0x01, 0x29, 0xa9, 0xca, 0x11,
- 0x84, 0x01, 0x29, 0x68, 0x0e, 0xc2, 0xd9, 0x06, 0xca, 0x00, 0xf6, 0x01,
- 0x29, 0xd9, 0xc5, 0x01, 0xf7, 0x01, 0x28, 0xb8, 0x45, 0x00, 0x56, 0x42,
- 0xd9, 0x12, 0xc8, 0x03, 0x3b, 0x01, 0x2a, 0x79, 0xca, 0x00, 0xf6, 0x01,
- 0x2a, 0x68, 0xca, 0x00, 0xf6, 0x01, 0x2a, 0x59, 0xc4, 0x01, 0x1e, 0x01,
- 0x29, 0x59, 0xc5, 0x01, 0xf7, 0x01, 0x29, 0x18, 0x45, 0x00, 0x56, 0x42,
- 0xd9, 0x24, 0xca, 0x00, 0xf6, 0x01, 0x2b, 0x49, 0xc4, 0x01, 0x1e, 0x01,
- 0x2a, 0xe9, 0xc5, 0x01, 0xf7, 0x01, 0x2a, 0xd0, 0xca, 0x00, 0xf6, 0x01,
- 0x2b, 0x31, 0xc4, 0x01, 0x1e, 0x01, 0x2a, 0xb9, 0xc5, 0x01, 0xf7, 0x01,
- 0x2a, 0xa0, 0xd1, 0x4f, 0x52, 0x01, 0x2b, 0x29, 0xcb, 0x95, 0x70, 0x01,
- 0x2a, 0xb1, 0xcc, 0x83, 0x44, 0x01, 0x2a, 0x98, 0xd1, 0x4f, 0x63, 0x01,
- 0x2b, 0x21, 0xcb, 0x96, 0x20, 0x01, 0x2a, 0xa9, 0xcc, 0x87, 0xa0, 0x01,
- 0x2a, 0x90, 0xd3, 0x46, 0x81, 0x01, 0x2a, 0x39, 0xd0, 0x34, 0x92, 0x01,
- 0x29, 0x79, 0x45, 0x02, 0x4d, 0xc2, 0xd9, 0x36, 0x46, 0x02, 0x12, 0x42,
- 0xd9, 0x42, 0xd3, 0x46, 0xba, 0x01, 0x2a, 0x09, 0xd0, 0x34, 0xa7, 0x01,
- 0x29, 0x81, 0x45, 0x02, 0x4d, 0xc2, 0xd9, 0x4e, 0x46, 0x02, 0x12, 0x42,
- 0xd9, 0x5a, 0xca, 0x10, 0x32, 0x01, 0x29, 0x51, 0xc5, 0x10, 0x37, 0x01,
- 0x28, 0xc8, 0xca, 0x10, 0x32, 0x01, 0x29, 0x11, 0xc5, 0x10, 0x37, 0x01,
- 0x28, 0xa8, 0xca, 0x11, 0x84, 0x01, 0x29, 0x31, 0xc5, 0x0a, 0x2b, 0x01,
- 0x28, 0xd0, 0xca, 0x11, 0x84, 0x01, 0x28, 0xf1, 0xc5, 0x0a, 0x2b, 0x01,
- 0x28, 0xb0, 0xa3, 0x0f, 0xd9, 0xb0, 0xa2, 0x0f, 0xd8, 0xab, 0x02, 0xd9,
- 0x66, 0xa1, 0x0f, 0xd8, 0x73, 0x02, 0xd9, 0x6a, 0xa3, 0x0f, 0xd9, 0x28,
- 0xa3, 0x0f, 0xd9, 0x80, 0xa3, 0x0f, 0xd9, 0x41, 0xa2, 0x0f, 0xd8, 0xca,
- 0x02, 0xd9, 0x72, 0xa3, 0x0f, 0xd9, 0x51, 0xa2, 0x0f, 0xd8, 0xda, 0x02,
- 0xd9, 0x76, 0xa3, 0x0f, 0xd9, 0xc8, 0xa3, 0x0f, 0xd9, 0x59, 0xa2, 0x0f,
- 0xd8, 0xe2, 0x02, 0xd9, 0x7a, 0xa3, 0x0f, 0xd9, 0x98, 0xa3, 0x0f, 0xd9,
- 0xb8, 0xca, 0xa8, 0x10, 0x0f, 0xd2, 0x4b, 0x02, 0xd9, 0x7e, 0x0d, 0xc2,
- 0xd9, 0x84, 0xc4, 0xe5, 0xdf, 0x01, 0x32, 0xfb, 0x02, 0xd9, 0x96, 0xc6,
- 0xd1, 0xf5, 0x01, 0x32, 0xeb, 0x02, 0xd9, 0x9c, 0xc4, 0xe0, 0xaf, 0x01,
- 0x32, 0xe3, 0x02, 0xd9, 0xa2, 0xc5, 0xb5, 0xaf, 0x01, 0x32, 0xdb, 0x02,
- 0xd9, 0xa8, 0x47, 0x41, 0x9b, 0x42, 0xd9, 0xae, 0x4e, 0x70, 0x9b, 0xc2,
- 0xd9, 0xca, 0x4e, 0x0e, 0x16, 0xc2, 0xd9, 0xd6, 0x4c, 0x14, 0xfa, 0xc2,
- 0xd9, 0xe2, 0x4f, 0x64, 0xe8, 0x42, 0xd9, 0xee, 0x00, 0x42, 0xd9, 0xfa,
- 0xc6, 0x07, 0x09, 0x0f, 0xbc, 0x69, 0xc6, 0x01, 0x7a, 0x0f, 0xbc, 0x20,
- 0xca, 0x83, 0xbe, 0x01, 0x31, 0xd9, 0x44, 0x04, 0x75, 0x42, 0xda, 0x06,
- 0x00, 0x42, 0xda, 0x16, 0xc6, 0x07, 0x09, 0x0f, 0xbc, 0x61, 0xc7, 0x3f,
- 0x2e, 0x0f, 0xbc, 0xb9, 0xc7, 0x08, 0xc0, 0x0f, 0xbc, 0xe8, 0x4a, 0x01,
- 0x89, 0xc2, 0xda, 0x28, 0xd8, 0x25, 0x08, 0x0f, 0xad, 0x19, 0xdb, 0x03,
- 0x6c, 0x01, 0x5c, 0xf8, 0x00, 0x42, 0xda, 0x40, 0x47, 0xba, 0xc6, 0xc2,
- 0xda, 0x5e, 0xc5, 0xde, 0x3f, 0x0f, 0x99, 0x10, 0x4a, 0x01, 0x89, 0xc2,
- 0xda, 0x6a, 0x46, 0x01, 0x09, 0xc2, 0xda, 0x8c, 0x4a, 0x03, 0xfd, 0x42,
- 0xda, 0xa1, 0x4a, 0x01, 0x89, 0xc2, 0xda, 0xad, 0x00, 0xc2, 0xda, 0xce,
- 0x46, 0x01, 0x09, 0x42, 0xda, 0xda, 0x44, 0x01, 0xd3, 0xc2, 0xda, 0xe6,
- 0xc5, 0x08, 0xc2, 0x01, 0x4f, 0x58, 0xc6, 0x07, 0x09, 0x01, 0x58, 0xd9,
- 0xc6, 0x01, 0x7a, 0x01, 0x59, 0x20, 0xc6, 0x06, 0x01, 0x01, 0x39, 0xf9,
- 0xc2, 0x01, 0x28, 0x01, 0x34, 0x88, 0xcf, 0x66, 0xd7, 0x01, 0x39, 0x31,
- 0xc4, 0x18, 0x9f, 0x0f, 0xad, 0xf8, 0x15, 0xc2, 0xda, 0xf2, 0x06, 0xc2,
- 0xda, 0xfe, 0xd4, 0x3e, 0xbd, 0x01, 0x1f, 0xb3, 0x02, 0xdb, 0x0d, 0xd7,
- 0x27, 0x29, 0x01, 0x1f, 0xab, 0x02, 0xdb, 0x13, 0x0e, 0x42, 0xdb, 0x19,
- 0x44, 0x00, 0x27, 0xc2, 0xdb, 0x28, 0x4a, 0x01, 0x89, 0xc2, 0xdb, 0x34,
- 0xd8, 0x25, 0x08, 0x0f, 0xad, 0x11, 0xdb, 0x03, 0x6c, 0x01, 0x5c, 0xe8,
- 0xc3, 0x0a, 0xbb, 0x0f, 0xad, 0x23, 0x02, 0xdb, 0x4c, 0xc5, 0xcb, 0x1b,
- 0x01, 0x59, 0x10, 0xc7, 0xc9, 0xbb, 0x01, 0x4e, 0xb9, 0xd0, 0x58, 0x42,
- 0x01, 0x59, 0x60, 0xc4, 0x2e, 0xc4, 0x0f, 0x9f, 0x91, 0xc5, 0xb9, 0x50,
- 0x01, 0x58, 0xf8, 0xc9, 0x45, 0x0b, 0x01, 0x2d, 0x71, 0xc7, 0x58, 0x4b,
- 0x01, 0x59, 0x70, 0xc6, 0x07, 0x09, 0x01, 0x58, 0xe9, 0xc7, 0x3f, 0x2e,
- 0x0f, 0xbc, 0xc1, 0xc7, 0x08, 0xc0, 0x0f, 0xbc, 0xf0, 0x9a, 0x01, 0x30,
- 0x83, 0x02, 0xdb, 0x52, 0xcb, 0x94, 0xcb, 0x0f, 0xaf, 0xb0, 0xc8, 0xb7,
- 0xcd, 0x00, 0xdb, 0xf0, 0xc3, 0x00, 0x34, 0x00, 0xdb, 0xe1, 0xc3, 0x3f,
- 0x7b, 0x00, 0xdb, 0xc9, 0xc3, 0x01, 0x93, 0x00, 0xdb, 0xc0, 0xc2, 0x05,
- 0xdc, 0x00, 0xdb, 0xd9, 0xc2, 0x08, 0x0e, 0x00, 0xdb, 0xd0, 0xc2, 0x08,
- 0x0e, 0x00, 0xdb, 0xb9, 0xc2, 0x05, 0xdc, 0x00, 0xdb, 0xb0, 0xc2, 0x01,
- 0x28, 0x00, 0xdb, 0xa9, 0xc2, 0x03, 0x87, 0x00, 0xdb, 0xa0, 0xc2, 0x00,
- 0x8a, 0x00, 0xdb, 0x73, 0x02, 0xdb, 0x58, 0xc2, 0x09, 0x6f, 0x00, 0xdb,
- 0x6a, 0x02, 0xdb, 0x5e, 0xc2, 0x00, 0x35, 0x00, 0xdb, 0x23, 0x02, 0xdb,
- 0x64, 0xc3, 0x00, 0x34, 0x00, 0xdb, 0x49, 0xc3, 0x08, 0xc3, 0x00, 0xdb,
- 0x38, 0xc3, 0x3f, 0x7b, 0x00, 0xdb, 0x41, 0xc2, 0x00, 0x35, 0x00, 0xdb,
- 0x10, 0xc7, 0xc9, 0xd7, 0x00, 0xd8, 0x30, 0x00, 0x42, 0xdb, 0x68, 0xc7,
- 0xcb, 0x20, 0x00, 0xda, 0x29, 0xca, 0x6a, 0xf6, 0x00, 0xd8, 0xa0, 0xc2,
- 0x03, 0xa4, 0x00, 0xd9, 0x89, 0xc2, 0x01, 0x29, 0x00, 0xd9, 0x80, 0xc7,
- 0xc9, 0xd7, 0x00, 0xd8, 0x70, 0xc7, 0xc9, 0xd7, 0x00, 0xd8, 0x60, 0xc7,
- 0xc7, 0x7d, 0x00, 0xd9, 0x08, 0xc3, 0x03, 0x85, 0x00, 0xd9, 0x29, 0x45,
- 0x6a, 0xf2, 0x42, 0xdb, 0x7a, 0x00, 0x42, 0xdb, 0x86, 0x0d, 0xc2, 0xdb,
- 0x95, 0x97, 0x0b, 0x50, 0x21, 0xc4, 0xe0, 0x9f, 0x0b, 0x51, 0xc1, 0x15,
- 0xc2, 0xdb, 0xb1, 0x16, 0xc2, 0xdb, 0xcb, 0x8f, 0x0b, 0x50, 0x8b, 0x02,
- 0xdb, 0xd5, 0x14, 0xc2, 0xdb, 0xe7, 0x0e, 0xc2, 0xdb, 0xf3, 0x19, 0xc2,
- 0xdc, 0x01, 0xc3, 0xe7, 0x93, 0x0b, 0x51, 0x59, 0x12, 0xc2, 0xdc, 0x0b,
- 0x10, 0xc2, 0xdc, 0x15, 0x1b, 0xc2, 0xdc, 0x40, 0xc2, 0x00, 0x4c, 0x0b,
- 0x50, 0x30, 0x09, 0xc2, 0xdc, 0x4a, 0x19, 0xc2, 0xdc, 0x54, 0x0d, 0xc2,
- 0xdc, 0x5e, 0x10, 0xc2, 0xdc, 0x74, 0x16, 0xc2, 0xdc, 0xa1, 0x12, 0xc2,
- 0xdc, 0xb1, 0x14, 0xc2, 0xdc, 0xce, 0x15, 0xc2, 0xdc, 0xde, 0x0e, 0xc2,
- 0xdc, 0xf8, 0x18, 0xc2, 0xdd, 0x0a, 0x0f, 0xc2, 0xdd, 0x14, 0x08, 0xc2,
- 0xdd, 0x4c, 0x1b, 0xc2, 0xdd, 0x63, 0x8b, 0x0b, 0x4e, 0xc1, 0x91, 0x0b,
- 0x4e, 0xb9, 0x83, 0x0b, 0x4e, 0xa8, 0x10, 0xc2, 0xdd, 0x7d, 0x0e, 0xc2,
- 0xdd, 0x9d, 0x8f, 0x0b, 0x4a, 0x8b, 0x02, 0xdd, 0xb3, 0x16, 0xc2, 0xdd,
- 0xd9, 0x0d, 0xc2, 0xdd, 0xf4, 0x15, 0xc2, 0xde, 0x0b, 0x08, 0xc2, 0xde,
- 0x23, 0x1b, 0xc2, 0xde, 0x2f, 0x14, 0xc2, 0xde, 0x3f, 0x12, 0xc2, 0xde,
- 0x51, 0x42, 0x00, 0x09, 0xc2, 0xde, 0x65, 0x19, 0x42, 0xde, 0x71, 0x0d,
- 0xc2, 0xde, 0x7d, 0x15, 0xc2, 0xde, 0x91, 0x16, 0xc2, 0xde, 0x9f, 0x12,
- 0xc2, 0xde, 0xaf, 0x0e, 0xc2, 0xde, 0xb9, 0x10, 0xc2, 0xde, 0xc7, 0x0f,
- 0xc2, 0xde, 0xe9, 0x1b, 0xc2, 0xdf, 0x03, 0x19, 0xc2, 0xdf, 0x13, 0xc2,
- 0x05, 0xd4, 0x0b, 0x46, 0x19, 0x43, 0x46, 0xb8, 0xc2, 0xdf, 0x1f, 0xc4,
- 0xe4, 0x6f, 0x0b, 0x46, 0x01, 0xc3, 0xd6, 0xef, 0x0b, 0x45, 0xe1, 0x09,
- 0x42, 0xdf, 0x29, 0x10, 0xc2, 0xdf, 0x35, 0x0f, 0xc2, 0xdf, 0x4d, 0x12,
- 0xc2, 0xdf, 0x68, 0x47, 0xc7, 0x6f, 0xc2, 0xdf, 0x80, 0x0d, 0xc2, 0xdf,
- 0x8a, 0x0e, 0xc2, 0xdf, 0x9a, 0x42, 0x17, 0x9f, 0xc2, 0xdf, 0xaa, 0x15,
- 0xc2, 0xdf, 0xb4, 0x16, 0xc2, 0xdf, 0xd2, 0xc5, 0xda, 0x11, 0x0b, 0x43,
- 0xb1, 0xc4, 0xe0, 0x8b, 0x0b, 0x43, 0x99, 0x1b, 0x42, 0xdf, 0xde, 0xc3,
- 0x0e, 0xc8, 0x0b, 0x42, 0x91, 0x15, 0xc2, 0xdf, 0xea, 0x16, 0xc2, 0xe0,
- 0x04, 0x0d, 0xc2, 0xe0, 0x14, 0x0f, 0xc2, 0xe0, 0x28, 0x10, 0xc2, 0xe0,
- 0x48, 0x0e, 0xc2, 0xe0, 0x7e, 0x12, 0xc2, 0xe0, 0x97, 0x17, 0xc2, 0xe0,
- 0xad, 0xc3, 0x00, 0x39, 0x0b, 0x41, 0xd1, 0xc4, 0xe1, 0x83, 0x0b, 0x41,
- 0xc9, 0x09, 0x42, 0xe0, 0xb9, 0xc7, 0xca, 0x24, 0x00, 0xdf, 0xf9, 0xc9,
- 0xb1, 0xad, 0x00, 0xdf, 0xe8, 0x49, 0xae, 0xc2, 0x42, 0xe0, 0xc5, 0xc2,
- 0x00, 0xc7, 0x00, 0xde, 0xf9, 0xc2, 0x1d, 0x5f, 0x00, 0xde, 0xe1, 0xc2,
- 0x0c, 0x65, 0x00, 0xde, 0xc9, 0xc2, 0x00, 0xad, 0x00, 0xde, 0xa9, 0xc2,
- 0x02, 0x59, 0x00, 0xde, 0x99, 0xc2, 0x01, 0x29, 0x00, 0xde, 0x79, 0xc2,
- 0x01, 0x09, 0x00, 0xde, 0x61, 0xc2, 0x03, 0xa4, 0x00, 0xde, 0x41, 0xc2,
- 0x00, 0xa4, 0x00, 0xde, 0x19, 0x83, 0x00, 0xde, 0x08, 0xc6, 0xce, 0x9b,
- 0x00, 0x4e, 0x70, 0x46, 0x00, 0x6b, 0x42, 0xe0, 0xd7, 0xc2, 0x00, 0xa4,
- 0x00, 0x4d, 0x11, 0x83, 0x00, 0x4d, 0x08, 0xc2, 0x00, 0xa4, 0x00, 0x4d,
- 0x01, 0x83, 0x00, 0x4c, 0xf8, 0x94, 0x00, 0x4c, 0x5b, 0x02, 0xe0, 0xe3,
- 0x8e, 0x00, 0x4c, 0x62, 0x02, 0xe0, 0xe7, 0xc4, 0x0f, 0x7c, 0x00, 0x4e,
- 0x69, 0xc5, 0x44, 0x7b, 0x00, 0x4c, 0x18, 0xc7, 0x76, 0x59, 0x00, 0x4d,
- 0xe9, 0xc7, 0x11, 0x41, 0x00, 0x4c, 0x10, 0x94, 0x00, 0x4e, 0x20, 0x8e,
- 0x00, 0x4f, 0x18, 0xda, 0x1c, 0x8f, 0x00, 0x4f, 0xc0, 0xc2, 0x01, 0x47,
- 0x00, 0x4f, 0xa9, 0xc4, 0x04, 0x5e, 0x00, 0x4f, 0xb0, 0xc2, 0x03, 0x40,
- 0x00, 0xd0, 0x79, 0x83, 0x00, 0xd0, 0x70, 0xc2, 0x04, 0x2b, 0x00, 0xd0,
+ 0x60, 0xc2, 0x00, 0x3d, 0x0d, 0xe1, 0x88, 0xc9, 0x35, 0x23, 0x0d, 0xe1,
+ 0x78, 0xc2, 0x00, 0x3d, 0x0d, 0xe1, 0x68, 0xd2, 0x4f, 0x56, 0x0d, 0xe1,
+ 0x20, 0xc2, 0x00, 0x3d, 0x0d, 0xe1, 0x60, 0xc2, 0x00, 0x3d, 0x0d, 0xe1,
+ 0x58, 0xd0, 0x5d, 0x7f, 0x01, 0x3e, 0x41, 0xd6, 0x2c, 0x95, 0x01, 0x4f,
+ 0xb9, 0xc8, 0x15, 0xe3, 0x01, 0x4f, 0xa8, 0xc7, 0x0e, 0xbb, 0x01, 0x16,
+ 0x68, 0xc9, 0xb6, 0x30, 0x0f, 0xac, 0x99, 0xc7, 0xc7, 0x67, 0x0f, 0xac,
+ 0x90, 0xcf, 0x00, 0xef, 0x01, 0x80, 0xe8, 0xd1, 0x43, 0x9c, 0x0f, 0xdc,
+ 0x59, 0xd0, 0x07, 0x57, 0x01, 0x16, 0x60, 0x00, 0x42, 0xc1, 0xce, 0xd3,
+ 0x05, 0xf4, 0x01, 0x00, 0xc9, 0xd0, 0x5b, 0x1f, 0x01, 0x71, 0x38, 0xca,
+ 0x75, 0x91, 0x0f, 0xaf, 0x49, 0xc4, 0x23, 0x98, 0x0f, 0xab, 0x42, 0x02,
+ 0xc1, 0xe6, 0x49, 0x03, 0x91, 0xc2, 0xc1, 0xec, 0xd6, 0x13, 0x04, 0x01,
+ 0x4c, 0xa0, 0x09, 0xc2, 0xc1, 0xf8, 0x42, 0x00, 0x3c, 0x42, 0xc2, 0x07,
+ 0xcc, 0x05, 0x1b, 0x01, 0x2c, 0xa9, 0xcd, 0x15, 0x72, 0x0f, 0xdc, 0x38,
+ 0x42, 0x01, 0x22, 0xc2, 0xc2, 0x13, 0xcc, 0x04, 0x1b, 0x0f, 0xdc, 0x69,
+ 0xcb, 0x96, 0x4b, 0x0f, 0xdd, 0x99, 0xc6, 0xa1, 0x04, 0x0f, 0xdd, 0xd0,
+ 0x00, 0x42, 0xc2, 0x1f, 0xca, 0xa5, 0xf6, 0x01, 0x1d, 0x01, 0xc9, 0x50,
+ 0xc7, 0x01, 0x1c, 0xf9, 0xca, 0x9f, 0x70, 0x01, 0x1c, 0xf0, 0xc7, 0xb3,
+ 0x98, 0x01, 0x4b, 0xe9, 0xd0, 0x49, 0x5e, 0x0f, 0xdc, 0x48, 0x44, 0x00,
+ 0x54, 0xc2, 0xc2, 0x31, 0xd3, 0x42, 0xb6, 0x01, 0x70, 0x50, 0xcc, 0x8a,
+ 0xf0, 0x0f, 0xaf, 0x69, 0x44, 0x05, 0xdf, 0xc2, 0xc2, 0x40, 0xde, 0x01,
+ 0x49, 0x0f, 0xde, 0x18, 0xce, 0x00, 0xf0, 0x01, 0x00, 0xe9, 0xcc, 0x85,
+ 0x80, 0x01, 0x4e, 0xd9, 0x03, 0xc2, 0xc2, 0x4c, 0xcb, 0x1c, 0xe0, 0x01,
+ 0x71, 0x48, 0xcb, 0x1c, 0xe0, 0x01, 0x4c, 0x31, 0x05, 0xc2, 0xc2, 0x58,
+ 0xd2, 0x23, 0x42, 0x01, 0x80, 0xb9, 0xd6, 0x0a, 0xe8, 0x01, 0x80, 0xc9,
+ 0xce, 0x26, 0x2e, 0x01, 0x80, 0xd8, 0x00, 0x42, 0xc2, 0x64, 0x44, 0x00,
+ 0x55, 0xc2, 0xc2, 0x70, 0x44, 0x15, 0xd2, 0x42, 0xc2, 0x7c, 0xce, 0x74,
+ 0x3e, 0x01, 0x0d, 0x01, 0x49, 0x6b, 0xf4, 0x42, 0xc2, 0x88, 0xcb, 0x76,
+ 0x63, 0x01, 0x0e, 0xe9, 0xca, 0x89, 0xae, 0x0f, 0xc1, 0xd0, 0xd0, 0x5c,
+ 0xef, 0x0f, 0xc2, 0x11, 0xc5, 0x00, 0x62, 0x0f, 0xc2, 0x30, 0x46, 0x02,
+ 0x12, 0xc2, 0xc2, 0x94, 0xc2, 0x00, 0x3b, 0x0f, 0xd7, 0x88, 0x00, 0x42,
+ 0xc2, 0xa0, 0x44, 0x00, 0x62, 0xc2, 0xc2, 0xbd, 0xca, 0x54, 0x07, 0x01,
+ 0x48, 0x68, 0xd3, 0x40, 0xc8, 0x01, 0x5c, 0x51, 0xc5, 0x00, 0x62, 0x01,
+ 0x5c, 0xa8, 0xc9, 0x4f, 0xa1, 0x01, 0x0c, 0x40, 0xc4, 0xea, 0x13, 0x01,
+ 0x0c, 0x00, 0x00, 0x42, 0xc2, 0xca, 0x00, 0x42, 0xc2, 0xd6, 0xe0, 0x03,
+ 0x27, 0x0f, 0xac, 0xb0, 0x03, 0xc2, 0xc2, 0xe2, 0xc2, 0x08, 0x86, 0x00,
+ 0xb7, 0xb1, 0xc2, 0x00, 0xbe, 0x00, 0xb7, 0xa9, 0xc2, 0x07, 0xc3, 0x00,
+ 0xb7, 0xa0, 0x47, 0xc4, 0x50, 0x42, 0xc2, 0xec, 0xc2, 0x05, 0x27, 0x00,
+ 0xb5, 0xa1, 0x83, 0x00, 0xb5, 0x90, 0xc3, 0x75, 0x1f, 0x00, 0xb6, 0xe0,
+ 0xc4, 0xe6, 0x47, 0x00, 0xb7, 0x31, 0xc8, 0xc0, 0xc3, 0x00, 0xb6, 0xc1,
+ 0xc7, 0xc4, 0x18, 0x00, 0xb6, 0x29, 0xca, 0x9e, 0x8a, 0x00, 0xb5, 0xe1,
+ 0xc7, 0xc4, 0x49, 0x00, 0xb5, 0x60, 0xc3, 0x66, 0xa5, 0x00, 0xb7, 0x21,
+ 0x90, 0x00, 0xb5, 0x98, 0xc3, 0x76, 0x46, 0x00, 0xb6, 0xd9, 0xc3, 0x74,
+ 0x35, 0x00, 0xb6, 0xa1, 0xc3, 0xeb, 0xac, 0x00, 0xb6, 0x00, 0x94, 0x00,
+ 0xb6, 0x21, 0xc9, 0xb0, 0x5a, 0x00, 0xb5, 0xb8, 0x90, 0x05, 0x28, 0x08,
+ 0x87, 0x05, 0x28, 0x11, 0x90, 0x05, 0x2f, 0x28, 0x90, 0x05, 0x29, 0x38,
+ 0x90, 0x05, 0x2a, 0x68, 0x91, 0x05, 0x2b, 0x99, 0x90, 0x05, 0x2d, 0xf0,
+ 0x90, 0x05, 0x2c, 0xc0, 0x87, 0x05, 0x28, 0x1b, 0x02, 0xc3, 0x22, 0x90,
+ 0x05, 0x2f, 0x38, 0x90, 0x05, 0x29, 0x48, 0x90, 0x05, 0x2a, 0x78, 0x91,
+ 0x05, 0x2b, 0xa3, 0x02, 0xc3, 0x26, 0x90, 0x05, 0x2e, 0x00, 0x90, 0x05,
+ 0x2c, 0xd0, 0x87, 0x05, 0x28, 0x28, 0x91, 0x05, 0x2b, 0xb0, 0x87, 0x05,
+ 0x2f, 0x4b, 0x02, 0xc3, 0x2a, 0x8b, 0x05, 0x29, 0x59, 0x83, 0x05, 0x2a,
+ 0x89, 0x91, 0x05, 0x2e, 0x13, 0x02, 0xc3, 0x2e, 0x97, 0x05, 0x2c, 0xe0,
+ 0x87, 0x05, 0x28, 0x38, 0x91, 0x05, 0x2b, 0xc0, 0x87, 0x05, 0x2f, 0x5b,
+ 0x02, 0xc3, 0x32, 0x8b, 0x05, 0x29, 0x69, 0x83, 0x05, 0x2a, 0x99, 0x91,
+ 0x05, 0x2e, 0x23, 0x02, 0xc3, 0x36, 0x97, 0x05, 0x2c, 0xf0, 0x87, 0x05,
+ 0x2f, 0x73, 0x02, 0xc3, 0x3a, 0x8b, 0x05, 0x29, 0x79, 0x83, 0x05, 0x2a,
+ 0xb1, 0x91, 0x05, 0x2e, 0x33, 0x02, 0xc3, 0x3e, 0x97, 0x05, 0x2d, 0x00,
+ 0x87, 0x05, 0x29, 0x08, 0x91, 0x05, 0x2c, 0x90, 0x87, 0x05, 0x2f, 0x63,
+ 0x02, 0xc3, 0x42, 0x8b, 0x05, 0x29, 0x71, 0x83, 0x05, 0x2a, 0xa3, 0x02,
+ 0xc3, 0x4a, 0x91, 0x05, 0x2e, 0x2b, 0x02, 0xc3, 0x4e, 0x97, 0x05, 0x2c,
+ 0xf8, 0x87, 0x05, 0x28, 0xf0, 0x90, 0x05, 0x2b, 0x58, 0x91, 0x05, 0x2c,
+ 0x78, 0x87, 0x05, 0x2f, 0x7b, 0x02, 0xc3, 0x52, 0x8b, 0x05, 0x29, 0x81,
+ 0x83, 0x05, 0x2a, 0xb9, 0x91, 0x05, 0x2e, 0x3b, 0x02, 0xc3, 0x5a, 0x97,
+ 0x05, 0x2d, 0x08, 0x87, 0x05, 0x29, 0x01, 0x90, 0x05, 0x30, 0x38, 0x91,
+ 0x05, 0x2c, 0x88, 0x87, 0x05, 0x28, 0x60, 0x91, 0x05, 0x2b, 0xe8, 0x87,
+ 0x05, 0x28, 0x68, 0x91, 0x05, 0x2b, 0xf0, 0x87, 0x05, 0x28, 0x70, 0x87,
+ 0x05, 0x2f, 0xa3, 0x02, 0xc3, 0x62, 0x8b, 0x05, 0x29, 0xa1, 0x83, 0x05,
+ 0x2a, 0xd9, 0x91, 0x05, 0x2e, 0x63, 0x02, 0xc3, 0x66, 0x97, 0x05, 0x2d,
+ 0x28, 0x91, 0x05, 0x2b, 0xf8, 0x87, 0x05, 0x2f, 0xab, 0x02, 0xc3, 0x6a,
+ 0x0a, 0xc2, 0xc3, 0x6e, 0x8b, 0x05, 0x29, 0xa9, 0x83, 0x05, 0x2a, 0xe1,
+ 0x91, 0x05, 0x2e, 0x6b, 0x02, 0xc3, 0x88, 0x97, 0x05, 0x2d, 0x30, 0x87,
+ 0x05, 0x28, 0xa0, 0x91, 0x05, 0x2c, 0x28, 0x87, 0x05, 0x28, 0x91, 0xc8,
+ 0x4b, 0x3a, 0x05, 0x30, 0x60, 0x91, 0x05, 0x2c, 0x18, 0x87, 0x05, 0x28,
+ 0x98, 0x91, 0x05, 0x2c, 0x20, 0x87, 0x05, 0x2f, 0xd3, 0x02, 0xc3, 0x8c,
+ 0x8b, 0x05, 0x29, 0xd1, 0x83, 0x05, 0x2b, 0x09, 0x91, 0x05, 0x2e, 0x93,
+ 0x02, 0xc3, 0x90, 0x97, 0x05, 0x2d, 0x58, 0x87, 0x05, 0x30, 0x0b, 0x02,
+ 0xc3, 0x9a, 0x8b, 0x05, 0x2a, 0x09, 0x83, 0x05, 0x2b, 0x41, 0x91, 0x05,
+ 0x2e, 0xcb, 0x02, 0xc3, 0x9e, 0x97, 0x05, 0x2d, 0x90, 0x09, 0xc2, 0xc3,
+ 0xa2, 0xc2, 0x00, 0x3a, 0x05, 0x2a, 0x59, 0xc2, 0x00, 0xe5, 0x05, 0x2d,
+ 0xe1, 0xc2, 0x01, 0x04, 0x05, 0x2f, 0x18, 0x87, 0x05, 0x29, 0x10, 0x87,
+ 0x05, 0x30, 0x53, 0x02, 0xc3, 0xbc, 0x8b, 0x05, 0x2a, 0x41, 0x83, 0x05,
+ 0x2b, 0x81, 0x91, 0x05, 0x2f, 0x03, 0x02, 0xc3, 0xc0, 0x97, 0x05, 0x2d,
+ 0xc8, 0x91, 0x05, 0x2c, 0x98, 0x87, 0x05, 0x28, 0xb0, 0x87, 0x05, 0x2f,
+ 0xe3, 0x02, 0xc3, 0xc4, 0x8b, 0x05, 0x29, 0xe1, 0x83, 0x05, 0x2b, 0x19,
+ 0x91, 0x05, 0x2e, 0xa3, 0x02, 0xc3, 0xc8, 0x97, 0x05, 0x2d, 0x68, 0x91,
+ 0x05, 0x2c, 0x38, 0x87, 0x05, 0x28, 0xc0, 0x87, 0x05, 0x2f, 0xf3, 0x02,
+ 0xc3, 0xcc, 0x8b, 0x05, 0x29, 0xf1, 0x83, 0x05, 0x2b, 0x29, 0x91, 0x05,
+ 0x2e, 0xb3, 0x02, 0xc3, 0xd0, 0x97, 0x05, 0x2d, 0x78, 0x91, 0x05, 0x2c,
+ 0x48, 0x87, 0x05, 0x28, 0xd0, 0x91, 0x05, 0x2c, 0x58, 0x87, 0x05, 0x28,
+ 0xd8, 0x91, 0x05, 0x2c, 0x60, 0x87, 0x05, 0x28, 0xe8, 0x91, 0x05, 0x2c,
+ 0x70, 0x90, 0x05, 0x2b, 0x90, 0xca, 0x39, 0x9c, 0x01, 0x1b, 0xf9, 0x47,
+ 0x03, 0xfa, 0x42, 0xc3, 0xd4, 0xc4, 0xb2, 0x9f, 0x00, 0x04, 0x50, 0xca,
+ 0x98, 0xa9, 0x01, 0x81, 0x99, 0xca, 0x01, 0x77, 0x01, 0x81, 0xa8, 0xca,
+ 0xa3, 0xda, 0x00, 0xe7, 0x60, 0xce, 0x26, 0x2e, 0x70, 0x02, 0xd9, 0xcb,
+ 0x1c, 0xe0, 0x70, 0x01, 0x41, 0xcd, 0x00, 0xd2, 0x70, 0x03, 0xd8, 0x9c,
+ 0x70, 0x02, 0xd1, 0x9b, 0x70, 0x02, 0xc9, 0x9a, 0x70, 0x02, 0xc1, 0x99,
+ 0x70, 0x02, 0xb9, 0x98, 0x70, 0x02, 0xb1, 0x97, 0x70, 0x02, 0xa9, 0x96,
+ 0x70, 0x02, 0xa1, 0x95, 0x70, 0x02, 0x99, 0x94, 0x70, 0x02, 0x91, 0x93,
+ 0x70, 0x02, 0x89, 0x92, 0x70, 0x02, 0x81, 0x91, 0x70, 0x02, 0x79, 0x90,
+ 0x70, 0x02, 0x71, 0x8f, 0x70, 0x02, 0x69, 0x8e, 0x70, 0x02, 0x61, 0x8d,
+ 0x70, 0x02, 0x59, 0x8c, 0x70, 0x02, 0x51, 0x8b, 0x70, 0x02, 0x49, 0x8a,
+ 0x70, 0x02, 0x41, 0x89, 0x70, 0x02, 0x39, 0x88, 0x70, 0x02, 0x31, 0x87,
+ 0x70, 0x02, 0x29, 0x86, 0x70, 0x02, 0x21, 0x85, 0x70, 0x02, 0x19, 0x84,
+ 0x70, 0x02, 0x11, 0x83, 0x70, 0x02, 0x08, 0x9c, 0x70, 0x03, 0xd1, 0x9b,
+ 0x70, 0x03, 0xc9, 0x9a, 0x70, 0x03, 0xc1, 0x99, 0x70, 0x03, 0xb9, 0x98,
+ 0x70, 0x03, 0xb1, 0x97, 0x70, 0x03, 0xa9, 0x96, 0x70, 0x03, 0xa1, 0x95,
+ 0x70, 0x03, 0x99, 0x94, 0x70, 0x03, 0x91, 0x93, 0x70, 0x03, 0x89, 0x92,
+ 0x70, 0x03, 0x81, 0x91, 0x70, 0x03, 0x79, 0x90, 0x70, 0x03, 0x71, 0x8f,
+ 0x70, 0x03, 0x69, 0x8e, 0x70, 0x03, 0x61, 0x8d, 0x70, 0x03, 0x59, 0x8c,
+ 0x70, 0x03, 0x51, 0x8b, 0x70, 0x03, 0x49, 0x8a, 0x70, 0x03, 0x41, 0x89,
+ 0x70, 0x03, 0x39, 0x88, 0x70, 0x03, 0x31, 0x87, 0x70, 0x03, 0x29, 0x86,
+ 0x70, 0x03, 0x21, 0x85, 0x70, 0x03, 0x19, 0x84, 0x70, 0x03, 0x11, 0x83,
+ 0x70, 0x03, 0x08, 0xc9, 0xb4, 0x26, 0x70, 0x02, 0x01, 0x83, 0x70, 0x01,
+ 0x60, 0xc4, 0x15, 0xa7, 0x70, 0x01, 0xb9, 0xc2, 0x22, 0x45, 0x70, 0x01,
+ 0xb0, 0xc3, 0x0d, 0x8f, 0x70, 0x01, 0xa9, 0xc3, 0x08, 0xde, 0x70, 0x01,
+ 0xa0, 0xc4, 0x05, 0xde, 0x70, 0x01, 0x99, 0xc2, 0x0a, 0x20, 0x70, 0x01,
+ 0x90, 0x23, 0xc2, 0xc3, 0xe0, 0x22, 0xc2, 0xc4, 0x04, 0x21, 0xc2, 0xc4,
+ 0x2c, 0x20, 0xc2, 0xc4, 0x54, 0x1f, 0xc2, 0xc4, 0x7c, 0x1e, 0xc2, 0xc4,
+ 0xa4, 0x1d, 0x42, 0xc4, 0xcc, 0x26, 0xc2, 0xc4, 0xf4, 0x25, 0xc2, 0xc5,
+ 0x1c, 0x24, 0xc2, 0xc5, 0x44, 0x23, 0xc2, 0xc5, 0x6c, 0x22, 0xc2, 0xc5,
+ 0x94, 0x21, 0xc2, 0xc5, 0xbc, 0x20, 0xc2, 0xc5, 0xe4, 0x1f, 0xc2, 0xc6,
+ 0x0c, 0x1e, 0xc2, 0xc6, 0x34, 0x1d, 0x42, 0xc6, 0x5c, 0x26, 0xc2, 0xc6,
+ 0x84, 0x25, 0xc2, 0xc6, 0xac, 0x24, 0xc2, 0xc6, 0xd4, 0x23, 0xc2, 0xc6,
+ 0xfc, 0x22, 0xc2, 0xc7, 0x24, 0x21, 0xc2, 0xc7, 0x4c, 0x20, 0xc2, 0xc7,
+ 0x74, 0x1f, 0xc2, 0xc7, 0x9c, 0x1e, 0xc2, 0xc7, 0xc4, 0x1d, 0x42, 0xc7,
+ 0xec, 0x26, 0xc2, 0xc8, 0x14, 0x25, 0xc2, 0xc8, 0x3c, 0x24, 0xc2, 0xc8,
+ 0x64, 0x23, 0xc2, 0xc8, 0x8c, 0x22, 0xc2, 0xc8, 0xb4, 0x21, 0xc2, 0xc8,
+ 0xdc, 0x20, 0xc2, 0xc9, 0x04, 0x1f, 0xc2, 0xc9, 0x2c, 0x1e, 0xc2, 0xc9,
+ 0x54, 0x1d, 0x42, 0xc9, 0x7c, 0x26, 0xc2, 0xc9, 0xa4, 0x25, 0xc2, 0xc9,
+ 0xcc, 0x24, 0xc2, 0xc9, 0xf4, 0x23, 0xc2, 0xca, 0x1c, 0x22, 0xc2, 0xca,
+ 0x44, 0x21, 0xc2, 0xca, 0x6c, 0x20, 0xc2, 0xca, 0x94, 0x1f, 0xc2, 0xca,
+ 0xbc, 0x1e, 0xc2, 0xca, 0xe4, 0x1d, 0x42, 0xcb, 0x0c, 0x26, 0xc2, 0xcb,
+ 0x34, 0x25, 0xc2, 0xcb, 0x5c, 0x24, 0xc2, 0xcb, 0x84, 0x23, 0xc2, 0xcb,
+ 0xac, 0x22, 0xc2, 0xcb, 0xd4, 0x21, 0xc2, 0xcb, 0xfc, 0x20, 0xc2, 0xcc,
+ 0x24, 0x1f, 0xc2, 0xcc, 0x4c, 0x1e, 0xc2, 0xcc, 0x74, 0x1d, 0x42, 0xcc,
+ 0x9c, 0x26, 0xc2, 0xcc, 0xc4, 0x25, 0xc2, 0xcc, 0xec, 0x24, 0xc2, 0xcd,
+ 0x14, 0x23, 0xc2, 0xcd, 0x3c, 0x22, 0xc2, 0xcd, 0x64, 0x21, 0xc2, 0xcd,
+ 0x8c, 0x20, 0xc2, 0xcd, 0xb4, 0x1f, 0xc2, 0xcd, 0xdc, 0x1e, 0xc2, 0xce,
+ 0x04, 0x1d, 0x42, 0xce, 0x2c, 0x26, 0xc2, 0xce, 0x54, 0x25, 0xc2, 0xce,
+ 0x7c, 0x24, 0xc2, 0xce, 0xa4, 0x23, 0xc2, 0xce, 0xcc, 0x22, 0xc2, 0xce,
+ 0xf4, 0x21, 0xc2, 0xcf, 0x1c, 0x20, 0xc2, 0xcf, 0x44, 0x1f, 0xc2, 0xcf,
+ 0x6c, 0x1e, 0xc2, 0xcf, 0x94, 0x1d, 0x42, 0xcf, 0xbc, 0xc4, 0x15, 0xa7,
+ 0x0b, 0x56, 0x39, 0xc2, 0x22, 0x45, 0x0b, 0x56, 0x30, 0xc3, 0x0d, 0x8f,
+ 0x0b, 0x56, 0x29, 0xc3, 0x08, 0xde, 0x0b, 0x56, 0x20, 0xc4, 0x05, 0xde,
+ 0x0b, 0x56, 0x19, 0xc2, 0x0a, 0x20, 0x0b, 0x56, 0x10, 0xc2, 0x01, 0x0e,
+ 0x0b, 0x55, 0xe9, 0x83, 0x0b, 0x55, 0xa8, 0xc2, 0x01, 0x0e, 0x0b, 0x55,
+ 0xe1, 0x83, 0x0b, 0x55, 0x88, 0x83, 0x0b, 0x55, 0xd9, 0xc7, 0xb3, 0x6a,
+ 0x0b, 0x54, 0x80, 0xc2, 0x01, 0x0e, 0x0b, 0x55, 0xc9, 0xc2, 0x0e, 0xe5,
+ 0x0b, 0x55, 0xb1, 0x83, 0x0b, 0x55, 0x80, 0x16, 0xc2, 0xcf, 0xe0, 0x83,
+ 0x0b, 0x55, 0x68, 0xc2, 0x01, 0x0e, 0x0b, 0x55, 0xb9, 0x83, 0x0b, 0x55,
+ 0x10, 0x0a, 0xc2, 0xcf, 0xea, 0x83, 0x0b, 0x55, 0x20, 0xc2, 0x01, 0x0e,
+ 0x0b, 0x55, 0x99, 0x83, 0x0b, 0x55, 0x61, 0xc2, 0x1a, 0x36, 0x0b, 0x55,
+ 0x41, 0xc2, 0x07, 0x69, 0x0b, 0x55, 0x18, 0x83, 0x0b, 0x55, 0x71, 0xc7,
+ 0xcf, 0xb7, 0x0b, 0x54, 0x88, 0x83, 0x0b, 0x55, 0x59, 0x9a, 0x0b, 0x54,
+ 0xf9, 0x93, 0x0b, 0x54, 0xf1, 0x85, 0x0b, 0x54, 0xe9, 0x9c, 0x0b, 0x54,
+ 0xe0, 0xc2, 0x01, 0x0e, 0x0b, 0x55, 0x49, 0x83, 0x0b, 0x55, 0x38, 0xc2,
+ 0x01, 0x0e, 0x0b, 0x55, 0x09, 0x83, 0x0b, 0x55, 0x00, 0x0b, 0xc2, 0xcf,
+ 0xf4, 0x07, 0xc2, 0xd0, 0x08, 0x9a, 0x0b, 0x54, 0x39, 0x93, 0x0b, 0x54,
+ 0x31, 0x85, 0x0b, 0x54, 0x29, 0x9c, 0x0b, 0x54, 0x20, 0x19, 0xc2, 0xd0,
+ 0x18, 0x9a, 0x0b, 0x53, 0xb9, 0x93, 0x0b, 0x53, 0xb1, 0x85, 0x0b, 0x53,
+ 0xa9, 0x9c, 0x0b, 0x53, 0xa0, 0x9a, 0x0b, 0x54, 0x19, 0x93, 0x0b, 0x54,
+ 0x11, 0x85, 0x0b, 0x54, 0x09, 0x9c, 0x0b, 0x54, 0x00, 0x9a, 0x0b, 0x53,
+ 0xf9, 0x93, 0x0b, 0x53, 0xf1, 0x85, 0x0b, 0x53, 0xe9, 0x9c, 0x0b, 0x53,
+ 0xe0, 0x9a, 0x0b, 0x53, 0xd9, 0x93, 0x0b, 0x53, 0xd1, 0x85, 0x0b, 0x53,
+ 0xc9, 0x9c, 0x0b, 0x53, 0xc0, 0x9a, 0x0b, 0x53, 0x99, 0x93, 0x0b, 0x53,
+ 0x91, 0x85, 0x0b, 0x53, 0x89, 0x9c, 0x0b, 0x53, 0x80, 0x03, 0xc2, 0xd0,
+ 0x28, 0xc3, 0x24, 0x43, 0x08, 0xff, 0x19, 0x0b, 0x42, 0xd0, 0x34, 0xc7,
+ 0xcd, 0xa3, 0x08, 0xff, 0x81, 0xc7, 0xcf, 0x1d, 0x08, 0xfe, 0xe1, 0xc9,
+ 0xb0, 0x36, 0x08, 0xfe, 0xc8, 0x17, 0xc2, 0xd0, 0x40, 0xc4, 0xe9, 0xeb,
+ 0x08, 0xfe, 0xe8, 0x03, 0xc2, 0xd0, 0x4c, 0xc2, 0x00, 0xe5, 0x08, 0xfe,
+ 0xf8, 0xc8, 0xb9, 0x8b, 0x08, 0xfe, 0xb9, 0xc7, 0x10, 0xac, 0x00, 0x5c,
+ 0x10, 0x83, 0x00, 0x5c, 0x31, 0x8b, 0x00, 0x5c, 0x81, 0x97, 0x00, 0x5c,
+ 0xa0, 0x8b, 0x00, 0x5c, 0x40, 0x97, 0x00, 0x5c, 0x50, 0x87, 0x00, 0x5c,
+ 0x78, 0x91, 0x00, 0x5c, 0x98, 0xc2, 0x07, 0x69, 0x00, 0x5c, 0xc9, 0xc2,
+ 0x1a, 0x36, 0x00, 0x5c, 0xf1, 0x10, 0xc2, 0xd0, 0x5e, 0x83, 0x00, 0x5d,
+ 0x40, 0xc2, 0x00, 0x44, 0x00, 0x5c, 0xf9, 0x83, 0x00, 0x5d, 0x20, 0x83,
+ 0x00, 0x5d, 0x81, 0xc2, 0x00, 0x9a, 0x00, 0x5d, 0x88, 0x83, 0x00, 0x5d,
+ 0x91, 0x0e, 0x42, 0xd0, 0x68, 0xc2, 0x01, 0x0e, 0x00, 0x5d, 0xb1, 0xc2,
+ 0x0e, 0xe5, 0x00, 0x5d, 0xb9, 0x83, 0x00, 0x5d, 0xc0, 0xc2, 0x0a, 0x20,
+ 0x00, 0x5f, 0x41, 0xc4, 0x05, 0xde, 0x00, 0x5f, 0x48, 0xc3, 0x08, 0xde,
+ 0x00, 0x5f, 0x51, 0xc3, 0x0d, 0x8f, 0x00, 0x5f, 0x58, 0xc2, 0x22, 0x45,
+ 0x00, 0x5f, 0x61, 0xc4, 0x15, 0xa7, 0x00, 0x5f, 0x68, 0xc6, 0xa7, 0x26,
+ 0x08, 0xfe, 0x71, 0xc9, 0xb4, 0x92, 0x08, 0xfe, 0x38, 0xc3, 0xed, 0x23,
+ 0x08, 0xfe, 0x91, 0xc3, 0xed, 0x20, 0x08, 0xfe, 0x88, 0xc4, 0xa2, 0x95,
+ 0x08, 0xfe, 0x79, 0xc7, 0xc4, 0x1f, 0x08, 0xfe, 0x20, 0x8a, 0x08, 0xfe,
+ 0x61, 0xc4, 0x1e, 0x53, 0x08, 0xfe, 0x10, 0xc4, 0x1c, 0xe3, 0x08, 0xfe,
+ 0x59, 0xc8, 0x1e, 0x4f, 0x08, 0xfe, 0x41, 0x0a, 0x42, 0xd0, 0x72, 0x44,
+ 0x66, 0x94, 0xc2, 0xd0, 0x7e, 0xc8, 0xb4, 0x92, 0x08, 0xfe, 0x18, 0xc2,
+ 0x01, 0x0e, 0x08, 0xb4, 0xb9, 0x83, 0x08, 0xb4, 0xb0, 0xc2, 0x01, 0x0e,
+ 0x08, 0xb4, 0xa9, 0x83, 0x08, 0xb4, 0xa0, 0xc3, 0x6f, 0x94, 0x00, 0xd5,
+ 0x58, 0xc3, 0x6f, 0x94, 0x00, 0xd5, 0x48, 0xcb, 0x97, 0x53, 0x00, 0xd3,
+ 0xe1, 0x45, 0x32, 0xf6, 0x42, 0xd0, 0x8a, 0xc4, 0x68, 0x63, 0x00, 0xd2,
+ 0xc0, 0x83, 0x00, 0xd2, 0xe1, 0x46, 0x2e, 0x47, 0x42, 0xd0, 0x9c, 0xc5,
+ 0x31, 0x3a, 0x00, 0xd2, 0xd1, 0xca, 0xa8, 0xb2, 0x00, 0xd2, 0xb8, 0xc5,
+ 0x03, 0x50, 0x00, 0xd3, 0x99, 0xc5, 0x00, 0x34, 0x00, 0xd3, 0x60, 0x87,
+ 0x00, 0xd3, 0x40, 0x87, 0x00, 0xd2, 0x98, 0xc2, 0x01, 0x0e, 0x00, 0xd2,
+ 0x61, 0xc2, 0x1a, 0x36, 0x00, 0xd1, 0xf9, 0x12, 0xc2, 0xd0, 0xa8, 0xc2,
+ 0x01, 0xa7, 0x00, 0xd1, 0xe1, 0x16, 0xc2, 0xd0, 0xb2, 0xc5, 0x3b, 0x63,
+ 0x00, 0xd1, 0x81, 0x05, 0xc2, 0xd0, 0xbc, 0xc2, 0x0e, 0xe5, 0x00, 0xd1,
+ 0x51, 0x0d, 0x42, 0xd0, 0xc6, 0x83, 0x00, 0xd2, 0x41, 0xc2, 0x0e, 0xe5,
+ 0x00, 0xd2, 0x39, 0xc2, 0x01, 0x0e, 0x00, 0xd2, 0x30, 0xc2, 0x01, 0x0e,
+ 0x00, 0xd1, 0xc9, 0x83, 0x00, 0xd1, 0xc0, 0xc2, 0x01, 0x0e, 0x00, 0xd1,
+ 0x99, 0x83, 0x00, 0xd1, 0x90, 0xc2, 0x01, 0x0e, 0x00, 0xd1, 0x41, 0x83,
+ 0x00, 0xd1, 0x38, 0xc2, 0x23, 0xe3, 0x00, 0xd1, 0x11, 0xc2, 0x01, 0x0e,
+ 0x00, 0xd1, 0x09, 0x83, 0x00, 0xd1, 0x00, 0xc2, 0x01, 0x01, 0x00, 0xd1,
+ 0x89, 0xc2, 0x00, 0x44, 0x00, 0xd1, 0x68, 0x83, 0x05, 0x55, 0xc8, 0xc2,
+ 0x03, 0x76, 0x05, 0x54, 0xf9, 0x91, 0x05, 0x54, 0xe8, 0x91, 0x05, 0x54,
+ 0xc9, 0xc2, 0x13, 0xf3, 0x05, 0x54, 0x49, 0xc2, 0x47, 0x43, 0x05, 0x54,
+ 0x88, 0xc2, 0x03, 0x76, 0x05, 0x54, 0xb9, 0x91, 0x05, 0x54, 0xa8, 0x91,
+ 0x05, 0x54, 0x59, 0xc2, 0x03, 0x76, 0x05, 0x54, 0x68, 0x0a, 0xc2, 0xd0,
+ 0xd6, 0x91, 0x05, 0x54, 0x08, 0xc2, 0x03, 0x76, 0x05, 0x54, 0xf1, 0x91,
+ 0x05, 0x54, 0xe0, 0x91, 0x05, 0x54, 0xc1, 0xc2, 0x13, 0xf3, 0x05, 0x54,
+ 0x41, 0xc2, 0x47, 0x43, 0x05, 0x54, 0x80, 0xc2, 0x03, 0x76, 0x05, 0x54,
+ 0xb1, 0x91, 0x05, 0x54, 0xa0, 0xc2, 0x03, 0x76, 0x05, 0x54, 0x61, 0x91,
+ 0x05, 0x54, 0x50, 0x0a, 0xc2, 0xd0, 0xe0, 0x91, 0x05, 0x54, 0x00, 0xc2,
+ 0x12, 0x5d, 0x00, 0x3c, 0xd8, 0xc4, 0xe3, 0xe7, 0x00, 0x3c, 0xf9, 0xc6,
+ 0xae, 0x4a, 0x00, 0x3c, 0x88, 0xc4, 0x9c, 0xaf, 0x00, 0x3c, 0xe9, 0xc7,
+ 0xae, 0x49, 0x00, 0x3c, 0x08, 0xc6, 0xae, 0x4a, 0x00, 0x3c, 0x91, 0x83,
+ 0x00, 0x3c, 0xe0, 0xc5, 0xe1, 0x7a, 0x00, 0x70, 0x09, 0x42, 0x03, 0x76,
+ 0x42, 0xd0, 0xea, 0xc6, 0xd5, 0x7a, 0x00, 0x70, 0x39, 0x43, 0xd0, 0x7d,
+ 0xc2, 0xd0, 0xf4, 0xc7, 0xcb, 0xf1, 0x00, 0x72, 0x68, 0xc2, 0x00, 0x3a,
+ 0x00, 0x70, 0x43, 0x02, 0xd0, 0xfe, 0xc3, 0x01, 0x5e, 0x00, 0x70, 0x49,
+ 0xc2, 0x09, 0x06, 0x00, 0x70, 0x60, 0x42, 0x01, 0x8a, 0xc2, 0xd1, 0x02,
+ 0x44, 0x10, 0xb0, 0x42, 0xd1, 0x0c, 0x43, 0xec, 0xae, 0xc2, 0xd1, 0x29,
+ 0xc7, 0xc2, 0xdc, 0x00, 0x72, 0x70, 0xc5, 0xdf, 0xe5, 0x00, 0x70, 0x71,
+ 0xc3, 0x13, 0x30, 0x00, 0x70, 0xa0, 0x42, 0x01, 0x8a, 0xc2, 0xd1, 0x35,
+ 0x0a, 0x42, 0xd1, 0x41, 0xc5, 0xe1, 0x70, 0x00, 0x70, 0xd9, 0x0a, 0xc2,
+ 0xd1, 0x4d, 0xc8, 0xbb, 0x9b, 0x00, 0x71, 0x78, 0xc3, 0x07, 0x4d, 0x00,
+ 0x70, 0xeb, 0x02, 0xd1, 0x59, 0xc5, 0xe2, 0x47, 0x00, 0x72, 0x78, 0xc4,
+ 0x43, 0xc5, 0x00, 0x71, 0x09, 0x42, 0x01, 0xcc, 0x42, 0xd1, 0x5d, 0xc5,
+ 0xe1, 0x6b, 0x00, 0x71, 0x19, 0x97, 0x00, 0x71, 0x20, 0x42, 0x01, 0x8a,
+ 0xc2, 0xd1, 0x6d, 0x97, 0x00, 0x71, 0x31, 0xca, 0xa5, 0xb0, 0x00, 0x72,
+ 0x28, 0xc3, 0x03, 0x1d, 0x00, 0x71, 0x59, 0xc6, 0xd1, 0xd8, 0x00, 0x71,
+ 0x70, 0xc2, 0x08, 0xc6, 0x0f, 0x15, 0x61, 0x87, 0x0f, 0x15, 0x3b, 0x02,
+ 0xd1, 0x79, 0x8b, 0x0f, 0x15, 0x12, 0x02, 0xd1, 0x7d, 0xc6, 0x80, 0x81,
+ 0x0e, 0x98, 0xf1, 0xc3, 0x07, 0x4f, 0x0e, 0x98, 0xa9, 0xc7, 0xc9, 0xa5,
+ 0x0e, 0x98, 0x58, 0xc5, 0xde, 0x00, 0x0e, 0x99, 0x61, 0xc6, 0xd8, 0xf2,
+ 0x0e, 0x98, 0xd8, 0xca, 0xa2, 0x40, 0x0f, 0xab, 0xe0, 0xd1, 0x53, 0xef,
+ 0x00, 0x60, 0x01, 0xce, 0x10, 0xab, 0x00, 0x60, 0x20, 0x83, 0x00, 0x60,
+ 0x31, 0x8b, 0x00, 0x60, 0x81, 0x97, 0x00, 0x60, 0xa0, 0x8b, 0x00, 0x60,
+ 0x40, 0x97, 0x00, 0x60, 0x50, 0x47, 0xb7, 0xd8, 0xc2, 0xd1, 0x81, 0x83,
+ 0x00, 0x61, 0xa8, 0x87, 0x00, 0x60, 0x78, 0x91, 0x00, 0x60, 0x98, 0x83,
+ 0x00, 0x60, 0xa9, 0xc2, 0x01, 0x0e, 0x00, 0x60, 0xb0, 0x83, 0x00, 0x60,
+ 0xb9, 0xc2, 0x01, 0x0e, 0x00, 0x60, 0xc0, 0xc2, 0x07, 0x69, 0x00, 0x60,
+ 0xc9, 0xc2, 0x1a, 0x36, 0x00, 0x60, 0xf1, 0xc2, 0x01, 0x01, 0x00, 0x61,
+ 0x19, 0x83, 0x00, 0x61, 0x42, 0x02, 0xd1, 0x8f, 0x83, 0x00, 0x60, 0xd1,
+ 0xc2, 0x01, 0x0e, 0x00, 0x60, 0xd8, 0x83, 0x00, 0x60, 0xe1, 0xc2, 0x01,
+ 0x0e, 0x00, 0x60, 0xe8, 0x16, 0xc2, 0xd1, 0x95, 0x83, 0x00, 0x61, 0x21,
+ 0xc2, 0x01, 0x0e, 0x00, 0x61, 0x29, 0xc2, 0x0e, 0xe5, 0x00, 0x62, 0xc0,
+ 0x06, 0xc2, 0xd1, 0x9f, 0x83, 0x00, 0x61, 0x31, 0xc2, 0x01, 0x0e, 0x00,
+ 0x61, 0x39, 0xc2, 0x05, 0x5c, 0x00, 0x62, 0xc8, 0x83, 0x00, 0x61, 0x51,
+ 0xc2, 0x01, 0x0e, 0x00, 0x61, 0x58, 0x83, 0x00, 0x61, 0x61, 0xc2, 0x01,
+ 0x0e, 0x00, 0x61, 0x68, 0x83, 0x00, 0x61, 0x81, 0x14, 0x42, 0xd1, 0xa9,
+ 0x83, 0x00, 0x61, 0x91, 0x0e, 0x42, 0xd1, 0xb3, 0xc2, 0x01, 0x0e, 0x00,
+ 0x61, 0xb1, 0xc2, 0x0e, 0xe5, 0x00, 0x61, 0xb9, 0x83, 0x00, 0x61, 0xc0,
+ 0x94, 0x00, 0x62, 0x20, 0x8e, 0x00, 0x63, 0x18, 0xd2, 0x16, 0x60, 0x00,
+ 0x63, 0xd1, 0xd3, 0x41, 0x86, 0x00, 0x63, 0xe8, 0xd2, 0x16, 0x60, 0x00,
+ 0x63, 0xd9, 0xd3, 0x41, 0x86, 0x00, 0x63, 0xf0, 0xd0, 0x01, 0x37, 0x01,
+ 0x4b, 0x91, 0xcf, 0x09, 0x58, 0x01, 0x5a, 0x48, 0xcb, 0x8f, 0x97, 0x01,
+ 0x53, 0x59, 0xc9, 0x18, 0x19, 0x01, 0x53, 0x50, 0x8e, 0x08, 0xa5, 0xc0,
+ 0x94, 0x08, 0xa5, 0xb0, 0x8e, 0x08, 0xa4, 0x4b, 0x02, 0xd1, 0xbd, 0x94,
+ 0x08, 0xa4, 0x3a, 0x02, 0xd1, 0xc1, 0xc2, 0x01, 0x0e, 0x08, 0xa4, 0xe1,
+ 0x83, 0x08, 0xa4, 0xd8, 0xc2, 0x01, 0x0e, 0x08, 0xa4, 0xd1, 0x83, 0x08,
+ 0xa4, 0xc8, 0xca, 0xa4, 0xca, 0x00, 0x7e, 0x38, 0xc9, 0xb6, 0x93, 0x00,
+ 0x7e, 0x31, 0xc6, 0xd3, 0xc4, 0x00, 0x7e, 0x40, 0x00, 0x42, 0xd1, 0xc5,
+ 0x45, 0xdc, 0xfc, 0xc2, 0xd1, 0xd7, 0x44, 0xea, 0x27, 0x42, 0xd1, 0xe1,
+ 0x83, 0x00, 0x7c, 0x81, 0xc2, 0x01, 0x0e, 0x00, 0x7c, 0x89, 0xc3, 0x1d,
+ 0x55, 0x00, 0x7d, 0xc8, 0x83, 0x00, 0x7c, 0x91, 0xc2, 0x01, 0x0e, 0x00,
+ 0x7c, 0x98, 0xc2, 0x07, 0x69, 0x00, 0x7c, 0xa1, 0xc2, 0x1a, 0x36, 0x00,
+ 0x7c, 0xc9, 0xc2, 0x01, 0x01, 0x00, 0x7c, 0xf1, 0x83, 0x00, 0x7d, 0x18,
+ 0x83, 0x00, 0x7c, 0xa9, 0xc2, 0x01, 0x0e, 0x00, 0x7c, 0xb0, 0x16, 0xc2,
+ 0xd1, 0xeb, 0x83, 0x00, 0x7c, 0xf9, 0xc2, 0x01, 0x0e, 0x00, 0x7d, 0x01,
+ 0x15, 0x42, 0xd1, 0xf5, 0x06, 0xc2, 0xd1, 0xff, 0x83, 0x00, 0x7d, 0x09,
+ 0xc2, 0x01, 0x0e, 0x00, 0x7d, 0x11, 0x1c, 0x42, 0xd2, 0x09, 0x83, 0x00,
+ 0x7d, 0x21, 0xc2, 0x01, 0x0e, 0x00, 0x7d, 0x28, 0x83, 0x00, 0x7d, 0x31,
+ 0xc2, 0x01, 0x0e, 0x00, 0x7d, 0x38, 0xc2, 0x01, 0x0e, 0x00, 0x7d, 0x71,
+ 0x83, 0x00, 0x7d, 0x78, 0xc2, 0x01, 0x0e, 0x00, 0x7d, 0xa1, 0xc2, 0x0e,
+ 0xe5, 0x00, 0x7d, 0xa9, 0x83, 0x00, 0x7d, 0xb0, 0xc2, 0x00, 0x4c, 0x00,
+ 0x7d, 0xd1, 0xc2, 0x1a, 0x36, 0x00, 0x7d, 0xd9, 0xc2, 0x00, 0x9a, 0x00,
+ 0x7d, 0xe0, 0xcb, 0x95, 0x0c, 0x00, 0x78, 0x09, 0x44, 0xe7, 0xb7, 0x42,
+ 0xd2, 0x13, 0xcb, 0x93, 0x80, 0x00, 0x78, 0x99, 0xcc, 0x78, 0x29, 0x00,
+ 0x79, 0xb0, 0xca, 0x9f, 0x66, 0x00, 0x78, 0x49, 0xd4, 0x3b, 0x0e, 0x00,
+ 0x7e, 0x80, 0xc5, 0x01, 0x0c, 0x00, 0x78, 0x80, 0x83, 0x00, 0x7a, 0x51,
+ 0xc2, 0x01, 0x0e, 0x00, 0x7a, 0x58, 0x83, 0x00, 0x7a, 0xc9, 0xc2, 0x01,
+ 0x0e, 0x00, 0x7a, 0xd0, 0x83, 0x00, 0x7a, 0x61, 0xc2, 0x01, 0x0e, 0x00,
+ 0x7a, 0x68, 0x83, 0x00, 0x7a, 0xd9, 0xc2, 0x01, 0x0e, 0x00, 0x7a, 0xe0,
+ 0x8a, 0x01, 0x69, 0x90, 0x8a, 0x01, 0x6a, 0xb2, 0x02, 0xd2, 0x1f, 0x8a,
+ 0x01, 0x69, 0xc1, 0x86, 0x01, 0x69, 0xca, 0x02, 0xd2, 0x23, 0x8a, 0x01,
+ 0x6a, 0x2a, 0x02, 0xd2, 0x27, 0x8a, 0x01, 0x6a, 0x18, 0x8a, 0x01, 0x6a,
+ 0x51, 0x9c, 0x01, 0x6b, 0x28, 0x94, 0x01, 0x6a, 0xa8, 0x95, 0x01, 0x6a,
+ 0xd1, 0x8a, 0x01, 0x6a, 0xd8, 0x8a, 0x01, 0x6a, 0xe9, 0x96, 0x01, 0x6a,
+ 0xf8, 0x8a, 0x01, 0x6a, 0x30, 0x90, 0x01, 0x6a, 0x81, 0x8a, 0x01, 0x6a,
+ 0xb8, 0x19, 0xc2, 0xd2, 0x2b, 0xcf, 0x63, 0xde, 0x00, 0x46, 0xc9, 0xc4,
+ 0x1a, 0x6a, 0x00, 0x37, 0x69, 0xc4, 0xe4, 0x9b, 0x00, 0x37, 0x18, 0x19,
+ 0xc2, 0xd2, 0x37, 0x15, 0xc2, 0xd2, 0x43, 0x08, 0xc2, 0xd2, 0x55, 0xc4,
+ 0x3e, 0xff, 0x00, 0x37, 0x43, 0x02, 0xd2, 0x61, 0xc3, 0x0e, 0x13, 0x00,
+ 0x46, 0xb9, 0xc3, 0x01, 0xcc, 0x00, 0x46, 0xb1, 0x42, 0x05, 0x5c, 0xc2,
+ 0xd2, 0x67, 0xc3, 0x2f, 0x22, 0x00, 0x37, 0x3b, 0x02, 0xd2, 0x71, 0x0f,
+ 0xc2, 0xd2, 0x77, 0xd4, 0x39, 0x6a, 0x00, 0x37, 0x09, 0xd8, 0x26, 0x6c,
+ 0x00, 0x37, 0x01, 0xcc, 0x8a, 0x30, 0x00, 0x36, 0xf9, 0x16, 0xc2, 0xd2,
+ 0x83, 0xc4, 0x2f, 0xc8, 0x00, 0x36, 0xd1, 0x0e, 0x42, 0xd2, 0x8f, 0xc7,
+ 0xbb, 0xc4, 0x00, 0x46, 0x49, 0xc3, 0x03, 0x4b, 0x00, 0x30, 0xc0, 0x00,
+ 0x42, 0xd2, 0x9b, 0xc5, 0x00, 0x34, 0x07, 0xde, 0x09, 0xc5, 0x03, 0x50,
+ 0x07, 0xde, 0x00, 0x48, 0x0b, 0x67, 0xc2, 0xd2, 0xad, 0x4a, 0x0d, 0xe6,
+ 0x42, 0xd2, 0xbf, 0xd7, 0x2b, 0x1a, 0x07, 0xdd, 0xe1, 0x42, 0x00, 0xd0,
+ 0x42, 0xd2, 0xd1, 0xc5, 0x00, 0x34, 0x07, 0xdd, 0xd9, 0xc5, 0x03, 0x50,
+ 0x07, 0xdd, 0xd0, 0x46, 0xd4, 0xc0, 0xc2, 0xd2, 0xdd, 0x03, 0x42, 0xd2,
+ 0xe9, 0xcf, 0x66, 0x09, 0x00, 0x30, 0x99, 0xd0, 0x5a, 0xdf, 0x00, 0x30,
+ 0x90, 0xcd, 0x05, 0x3a, 0x07, 0xf3, 0xe1, 0xcb, 0x6a, 0x72, 0x07, 0xf3,
+ 0xe8, 0x49, 0x1a, 0x6b, 0xc2, 0xd3, 0x01, 0xce, 0x72, 0x38, 0x07, 0xef,
+ 0xd8, 0x48, 0x1a, 0x75, 0xc2, 0xd3, 0x19, 0x48, 0xae, 0x24, 0x42, 0xd3,
+ 0x31, 0x0a, 0xc2, 0xd3, 0x4f, 0x49, 0xaa, 0xde, 0xc2, 0xd3, 0x5b, 0x03,
+ 0xc2, 0xd3, 0x83, 0xd4, 0x3b, 0xae, 0x07, 0xef, 0xf0, 0x44, 0x2f, 0x22,
+ 0xc2, 0xd3, 0x8d, 0x45, 0x1a, 0x6a, 0xc2, 0xd3, 0x99, 0x46, 0x2f, 0xc8,
+ 0xc2, 0xd3, 0xa3, 0x4d, 0x06, 0x7a, 0x42, 0xd3, 0xaf, 0x48, 0x91, 0x7e,
+ 0xc2, 0xd3, 0xbb, 0x0e, 0xc2, 0xd3, 0xd3, 0xd2, 0x47, 0x9a, 0x07, 0xef,
+ 0x99, 0xcb, 0x8f, 0x34, 0x07, 0xef, 0xf8, 0x03, 0xc2, 0xd3, 0xe5, 0x0a,
+ 0xc2, 0xd3, 0xf1, 0x48, 0xae, 0x24, 0x42, 0xd3, 0xfd, 0x0a, 0xc2, 0xd4,
+ 0x31, 0x45, 0x1a, 0x6a, 0xc2, 0xd4, 0x3b, 0x44, 0x2f, 0x22, 0xc2, 0xd4,
+ 0x51, 0x4d, 0x06, 0x7a, 0xc2, 0xd4, 0x5d, 0x46, 0x53, 0x23, 0xc2, 0xd4,
+ 0x69, 0x45, 0x2f, 0xc8, 0xc2, 0xd4, 0x75, 0xce, 0x71, 0x66, 0x07, 0xe4,
+ 0x89, 0xcf, 0x65, 0x55, 0x07, 0xe4, 0x91, 0xcf, 0x62, 0xb2, 0x07, 0xe4,
+ 0xa0, 0x0a, 0xc2, 0xd4, 0x7f, 0x44, 0x2f, 0x22, 0xc2, 0xd4, 0x8b, 0x4d,
+ 0x06, 0x7a, 0xc2, 0xd4, 0x97, 0x45, 0x1a, 0x6a, 0xc2, 0xd4, 0xa3, 0x46,
+ 0x53, 0x23, 0xc2, 0xd4, 0xb9, 0x45, 0x2f, 0xc8, 0xc2, 0xd4, 0xc5, 0xce,
+ 0x71, 0x66, 0x07, 0xe4, 0x51, 0xcf, 0x65, 0x55, 0x07, 0xe4, 0x59, 0xcf,
+ 0x62, 0xb2, 0x07, 0xe4, 0x68, 0x48, 0x0e, 0x14, 0xc2, 0xd4, 0xcf, 0x49,
+ 0x1a, 0x74, 0x42, 0xd4, 0xf9, 0x44, 0x2f, 0x22, 0xc2, 0xd5, 0x17, 0x45,
+ 0x06, 0x7a, 0xc2, 0xd5, 0x23, 0x45, 0x1a, 0x6a, 0xc2, 0xd5, 0x3b, 0x45,
+ 0x53, 0x23, 0xc2, 0xd5, 0x51, 0x0a, 0xc2, 0xd5, 0x5b, 0x45, 0x2f, 0xc8,
+ 0x42, 0xd5, 0x67, 0x03, 0xc2, 0xd5, 0x71, 0xcd, 0x77, 0x72, 0x07, 0xea,
+ 0x58, 0x44, 0x2f, 0x22, 0xc2, 0xd5, 0x7d, 0x4d, 0x06, 0x7a, 0xc2, 0xd5,
+ 0x89, 0x45, 0x1a, 0x6a, 0xc2, 0xd5, 0x95, 0x45, 0x53, 0x23, 0xc2, 0xd5,
+ 0x9f, 0x45, 0x50, 0xae, 0xc2, 0xd5, 0xa9, 0x46, 0x2f, 0xc8, 0x42, 0xd5,
+ 0xb5, 0x48, 0xae, 0x24, 0xc2, 0xd5, 0xc1, 0xdc, 0x14, 0xda, 0x07, 0xef,
+ 0xe8, 0x46, 0x2f, 0x23, 0xc2, 0xd5, 0xf5, 0x03, 0x42, 0xd5, 0xfb, 0x49,
+ 0x1a, 0x6b, 0xc2, 0xd6, 0x10, 0xd5, 0x33, 0x4f, 0x07, 0xef, 0xa0, 0x0b,
+ 0xc2, 0xd6, 0x34, 0xcb, 0x6a, 0x72, 0x07, 0xe9, 0xd8, 0x46, 0x57, 0xda,
+ 0xc2, 0xd6, 0x40, 0x45, 0x53, 0x23, 0xc2, 0xd6, 0x4c, 0x44, 0x1a, 0x74,
+ 0xc2, 0xd6, 0x56, 0x46, 0x2f, 0xc8, 0xc2, 0xd6, 0x60, 0x44, 0x71, 0x66,
+ 0xc2, 0xd6, 0x6c, 0x4d, 0x06, 0x7a, 0xc2, 0xd6, 0x78, 0x44, 0x2f, 0x22,
+ 0x42, 0xd6, 0x84, 0x60, 0x08, 0x87, 0x42, 0xd6, 0x90, 0xc5, 0x00, 0x34,
+ 0x00, 0x47, 0xc9, 0xc5, 0x03, 0x50, 0x00, 0x47, 0xb8, 0x08, 0xc2, 0xd6,
+ 0x9a, 0x09, 0xc2, 0xd6, 0xac, 0x0e, 0xc2, 0xd6, 0xcd, 0x42, 0x1c, 0x3e,
+ 0xc2, 0xd6, 0xdc, 0x03, 0xc2, 0xd6, 0xec, 0x0d, 0xc2, 0xd7, 0x08, 0x16,
+ 0xc2, 0xd7, 0x24, 0xc3, 0x08, 0x85, 0x00, 0x33, 0xf3, 0x02, 0xd7, 0x4c,
+ 0x1b, 0xc2, 0xd7, 0x59, 0x14, 0xc2, 0xd7, 0x69, 0x42, 0x00, 0x34, 0xc2,
+ 0xd7, 0x8a, 0x97, 0x00, 0x36, 0x3b, 0x02, 0xd7, 0x9a, 0xc3, 0x0e, 0x13,
+ 0x00, 0x32, 0x13, 0x02, 0xd7, 0xa4, 0x87, 0x00, 0x36, 0x83, 0x02, 0xd7,
+ 0xa8, 0x42, 0x05, 0x5c, 0xc2, 0xd7, 0xac, 0x15, 0xc2, 0xd7, 0xbc, 0x06,
+ 0xc2, 0xd7, 0xe9, 0xc2, 0x00, 0x29, 0x00, 0x36, 0x5b, 0x02, 0xd8, 0x0b,
+ 0xc3, 0x7c, 0xad, 0x00, 0x32, 0x43, 0x02, 0xd8, 0x16, 0x0f, 0xc2, 0xd8,
+ 0x1a, 0xc2, 0x09, 0x06, 0x00, 0x36, 0x33, 0x02, 0xd8, 0x29, 0x10, 0xc2,
+ 0xd8, 0x2d, 0x0a, 0x42, 0xd8, 0x46, 0xd3, 0x44, 0x6b, 0x00, 0x46, 0x91,
+ 0xc5, 0x00, 0x34, 0x00, 0x46, 0x79, 0xc5, 0x03, 0x50, 0x00, 0x46, 0x70,
+ 0x11, 0xc2, 0xd8, 0x5c, 0x03, 0x42, 0xd8, 0x68, 0xc3, 0x00, 0xcd, 0x0f,
+ 0x70, 0x01, 0xc2, 0x01, 0x5b, 0x0f, 0x70, 0x78, 0xc2, 0x01, 0x5b, 0x0f,
+ 0x70, 0x31, 0x8a, 0x0f, 0x70, 0xd0, 0x03, 0xc2, 0xd8, 0x70, 0xc2, 0x08,
+ 0x86, 0x0f, 0x70, 0xa9, 0x0a, 0x42, 0xd8, 0x7a, 0xc2, 0x0e, 0x14, 0x0f,
+ 0x70, 0x51, 0xc3, 0x1a, 0x74, 0x0f, 0x70, 0xb8, 0xc2, 0x01, 0x02, 0x0f,
+ 0x70, 0x59, 0x46, 0xd0, 0x28, 0x42, 0xd8, 0x86, 0xc3, 0x03, 0x2c, 0x0f,
+ 0x70, 0x71, 0xc4, 0xe6, 0x7b, 0x0f, 0x70, 0xa1, 0x49, 0x9f, 0xfc, 0xc2,
+ 0xd8, 0xea, 0xc2, 0x00, 0x5d, 0x0f, 0x70, 0x88, 0xc3, 0x8c, 0x10, 0x0f,
+ 0x71, 0x09, 0xc4, 0x2f, 0xc8, 0x0f, 0x71, 0x11, 0x0a, 0xc2, 0xd9, 0x3a,
+ 0xc3, 0x2f, 0x22, 0x0f, 0x71, 0x49, 0x0d, 0xc2, 0xd9, 0x46, 0xc3, 0x0e,
+ 0x1c, 0x0f, 0x71, 0x59, 0xc4, 0x1a, 0x6a, 0x0f, 0x71, 0x61, 0xc4, 0x3e,
+ 0xff, 0x0f, 0x71, 0x69, 0x15, 0xc2, 0xd9, 0x52, 0xc3, 0x01, 0xcc, 0x0f,
+ 0x71, 0x79, 0xc3, 0xae, 0x23, 0x0f, 0x71, 0x81, 0xc3, 0x0e, 0x13, 0x0f,
+ 0x71, 0x91, 0x16, 0xc2, 0xd9, 0x64, 0xc3, 0xa9, 0xe6, 0x0f, 0x71, 0xc9,
+ 0xc5, 0x91, 0x7b, 0x0f, 0x71, 0xd8, 0xda, 0x1a, 0x6a, 0x0f, 0x77, 0x81,
+ 0xcc, 0x8e, 0xb0, 0x0f, 0x77, 0x88, 0x00, 0xc2, 0xd9, 0x70, 0xc3, 0x06,
+ 0x26, 0x00, 0x32, 0x62, 0x02, 0xd9, 0x82, 0xc9, 0x35, 0x23, 0x00, 0x47,
+ 0xe0, 0xc9, 0x35, 0x23, 0x00, 0x47, 0xe8, 0x45, 0x01, 0xac, 0xc2, 0xd9,
+ 0x88, 0xcd, 0x05, 0x3a, 0x07, 0xf3, 0xb1, 0xcb, 0x6a, 0x72, 0x07, 0xf3,
+ 0xb8, 0xce, 0x05, 0x39, 0x07, 0xf3, 0x80, 0xc5, 0x00, 0x34, 0x00, 0x47,
+ 0x79, 0xc5, 0x03, 0x50, 0x00, 0x47, 0x60, 0xc5, 0x00, 0x34, 0x00, 0x47,
+ 0x71, 0xc5, 0x03, 0x50, 0x00, 0x47, 0x58, 0xc5, 0x00, 0x34, 0x00, 0x47,
+ 0x69, 0xc5, 0x03, 0x50, 0x00, 0x47, 0x50, 0x46, 0x01, 0xab, 0x42, 0xd9,
+ 0x9a, 0xc3, 0x06, 0x26, 0x00, 0x47, 0x48, 0xc3, 0x06, 0x26, 0x00, 0x47,
+ 0x40, 0xc3, 0x06, 0x26, 0x00, 0x47, 0x38, 0x83, 0x00, 0x2b, 0xc9, 0xc2,
+ 0x08, 0x86, 0x00, 0x2b, 0x98, 0x83, 0x00, 0x2a, 0x49, 0xc2, 0x08, 0x86,
+ 0x00, 0x2a, 0x18, 0x94, 0x0f, 0xb9, 0x19, 0xc3, 0xed, 0x3e, 0x0f, 0xb9,
+ 0x20, 0x44, 0x03, 0x72, 0x42, 0xd9, 0xac, 0xcc, 0x88, 0x98, 0x0f, 0xb9,
+ 0x78, 0x48, 0xc3, 0x7b, 0xc2, 0xd9, 0xb8, 0xc8, 0x88, 0x9c, 0x0f, 0xb9,
+ 0x61, 0xc6, 0x4e, 0x6c, 0x0f, 0xb9, 0x10, 0xc8, 0x88, 0x9c, 0x0f, 0xb9,
+ 0x69, 0xd2, 0x4e, 0x6c, 0x0f, 0xb9, 0x30, 0xc3, 0x82, 0xa4, 0x0f, 0xb8,
+ 0x49, 0x87, 0x0f, 0xb8, 0x40, 0xc3, 0xeb, 0x40, 0x0f, 0xb8, 0x39, 0x83,
+ 0x0f, 0xb8, 0x30, 0x84, 0x0a, 0x21, 0xa1, 0x83, 0x0a, 0x21, 0x98, 0x83,
+ 0x0a, 0x21, 0x88, 0x83, 0x0a, 0x21, 0x60, 0x83, 0x0a, 0x21, 0x48, 0x83,
+ 0x0a, 0x20, 0xd8, 0x83, 0x0a, 0x20, 0x50, 0x83, 0x0a, 0x22, 0x49, 0x84,
+ 0x0a, 0x22, 0x51, 0x85, 0x0a, 0x22, 0x58, 0x83, 0x0a, 0x23, 0x58, 0x83,
+ 0x0a, 0x23, 0x68, 0x83, 0x0a, 0x23, 0x80, 0x83, 0x0a, 0x23, 0x90, 0x83,
+ 0x0a, 0x23, 0xa0, 0x83, 0x0a, 0x23, 0xb9, 0x84, 0x0a, 0x23, 0xc1, 0x85,
+ 0x0a, 0x23, 0xc8, 0x83, 0x0a, 0x23, 0xd9, 0x84, 0x0a, 0x23, 0xe0, 0x83,
+ 0x0a, 0x23, 0xf9, 0x84, 0x0a, 0x24, 0x01, 0x85, 0x0a, 0x24, 0x08, 0x83,
+ 0x0a, 0x24, 0x29, 0x84, 0x0a, 0x24, 0x30, 0x83, 0x0a, 0x24, 0x60, 0x83,
+ 0x0a, 0x24, 0xb8, 0x83, 0x0a, 0x25, 0x10, 0x83, 0x0a, 0x27, 0x31, 0x84,
+ 0x0a, 0x27, 0x38, 0x83, 0x0a, 0x27, 0x68, 0x83, 0x0a, 0x27, 0x80, 0x83,
+ 0x0a, 0x27, 0xb8, 0x83, 0x0a, 0x27, 0xc8, 0x83, 0x0a, 0x28, 0x28, 0x83,
+ 0x0a, 0x29, 0x70, 0x83, 0x0a, 0x2a, 0x28, 0x83, 0x0a, 0x2a, 0x58, 0x83,
+ 0x0a, 0x2a, 0x88, 0x83, 0x0a, 0x2a, 0xe0, 0x83, 0x0a, 0x2b, 0x88, 0x83,
+ 0x0a, 0x2b, 0xa1, 0x84, 0x0a, 0x2b, 0xa9, 0x85, 0x0a, 0x2b, 0xb0, 0x83,
+ 0x0a, 0x2b, 0xd9, 0x84, 0x0a, 0x2b, 0xe1, 0x85, 0x0a, 0x2b, 0xe8, 0x83,
+ 0x0a, 0x2c, 0xa8, 0x83, 0x0a, 0x2c, 0xd8, 0x83, 0x0a, 0x2d, 0x00, 0x83,
+ 0x0a, 0x2d, 0x20, 0x83, 0x0a, 0x2d, 0x78, 0xc9, 0xac, 0x6a, 0x0a, 0x2d,
+ 0x89, 0x83, 0x0a, 0x2d, 0x90, 0x83, 0x0a, 0x2d, 0xb0, 0xd4, 0x3e, 0x56,
+ 0x0a, 0x2e, 0x71, 0xd3, 0x43, 0x3b, 0x0a, 0x2e, 0x78, 0x83, 0x0a, 0x2f,
+ 0xc0, 0x83, 0x0a, 0x30, 0x00, 0xc4, 0x0c, 0x33, 0x01, 0x1b, 0x01, 0xc5,
+ 0x03, 0xfb, 0x01, 0x19, 0xe0, 0x43, 0x00, 0x69, 0xc2, 0xd9, 0xc4, 0xc2,
+ 0x00, 0x35, 0x01, 0x1a, 0xa3, 0x02, 0xd9, 0xd0, 0x0b, 0x42, 0xd9, 0xd6,
+ 0xc6, 0xd6, 0x40, 0x01, 0x1a, 0x99, 0xcb, 0x01, 0x3c, 0x01, 0x1a, 0x80,
+ 0xcd, 0x09, 0x5a, 0x01, 0x1a, 0x39, 0xc7, 0x03, 0x48, 0x01, 0x1a, 0x18,
+ 0xc3, 0xba, 0x10, 0x01, 0x1a, 0x71, 0xc8, 0x50, 0x0d, 0x01, 0x1a, 0x50,
+ 0xd0, 0x5a, 0x5f, 0x01, 0x12, 0x90, 0x00, 0x42, 0xd9, 0xe2, 0xc9, 0x4f,
+ 0xa1, 0x08, 0x09, 0x68, 0xc9, 0x4f, 0xa1, 0x08, 0x09, 0x60, 0x00, 0x42,
+ 0xd9, 0xee, 0x00, 0x42, 0xd9, 0xfa, 0xc9, 0x4f, 0xa1, 0x08, 0x09, 0x78,
+ 0x00, 0x42, 0xda, 0x06, 0xc9, 0x4f, 0xa1, 0x08, 0x09, 0x70, 0xc7, 0x0d,
+ 0x7f, 0x08, 0x08, 0xf1, 0xc8, 0x4f, 0xa2, 0x08, 0x09, 0x38, 0xc9, 0x4f,
+ 0xa1, 0x08, 0x09, 0x80, 0xc7, 0x0d, 0x7f, 0x08, 0x08, 0xf9, 0xc8, 0x4f,
+ 0xa2, 0x08, 0x09, 0x40, 0xc9, 0x4f, 0xa1, 0x08, 0x09, 0x88, 0xd5, 0x36,
+ 0xc1, 0x0f, 0xdd, 0x78, 0x48, 0x20, 0x6b, 0xc2, 0xda, 0x12, 0x11, 0x42,
+ 0xda, 0x2a, 0x45, 0x00, 0x39, 0x42, 0xda, 0x39, 0xd0, 0x60, 0x6f, 0x01,
+ 0x2b, 0xe0, 0x47, 0x54, 0x55, 0xc2, 0xda, 0x49, 0x49, 0x45, 0xd4, 0x42,
+ 0xda, 0x55, 0x45, 0x00, 0x39, 0x42, 0xda, 0x61, 0xc8, 0x00, 0x29, 0x01,
+ 0x28, 0x51, 0xca, 0x03, 0x76, 0x01, 0x28, 0x40, 0xc8, 0x00, 0x29, 0x01,
+ 0x28, 0x31, 0xca, 0x03, 0x76, 0x01, 0x28, 0x20, 0xce, 0x73, 0x34, 0x01,
+ 0x2a, 0x51, 0xc8, 0x11, 0x8a, 0x01, 0x29, 0xd1, 0xca, 0x11, 0xfe, 0x01,
+ 0x29, 0x90, 0xce, 0x74, 0xca, 0x01, 0x29, 0xe9, 0xc8, 0x12, 0x13, 0x01,
+ 0x29, 0xa9, 0xca, 0x11, 0x9d, 0x01, 0x29, 0x68, 0x0e, 0xc2, 0xda, 0x73,
+ 0xca, 0x03, 0x76, 0x01, 0x29, 0xd9, 0xc5, 0x00, 0x47, 0x01, 0x28, 0xb8,
+ 0x45, 0x00, 0x39, 0x42, 0xda, 0x7f, 0xc8, 0x00, 0x29, 0x01, 0x2a, 0x79,
+ 0xca, 0x03, 0x76, 0x01, 0x2a, 0x68, 0xca, 0x03, 0x76, 0x01, 0x2a, 0x59,
+ 0xc4, 0x00, 0xcd, 0x01, 0x29, 0x59, 0xc5, 0x00, 0x47, 0x01, 0x29, 0x18,
+ 0x45, 0x00, 0x39, 0x42, 0xda, 0x91, 0xca, 0x03, 0x76, 0x01, 0x2b, 0x49,
+ 0xc4, 0x00, 0xcd, 0x01, 0x2a, 0xe9, 0xc5, 0x00, 0x47, 0x01, 0x2a, 0xd0,
+ 0xca, 0x03, 0x76, 0x01, 0x2b, 0x31, 0xc4, 0x00, 0xcd, 0x01, 0x2a, 0xb9,
+ 0xc5, 0x00, 0x47, 0x01, 0x2a, 0xa0, 0xd1, 0x53, 0x56, 0x01, 0x2b, 0x29,
+ 0xcb, 0x95, 0x01, 0x01, 0x2a, 0xb1, 0xcc, 0x88, 0xec, 0x01, 0x2a, 0x98,
+ 0xd1, 0x53, 0xcd, 0x01, 0x2b, 0x21, 0xcb, 0x95, 0x22, 0x01, 0x2a, 0xa9,
+ 0xcc, 0x88, 0xbc, 0x01, 0x2a, 0x90, 0xd3, 0x41, 0x27, 0x01, 0x2a, 0x39,
+ 0xd0, 0x33, 0xbd, 0x01, 0x29, 0x79, 0x45, 0x00, 0xcd, 0xc2, 0xda, 0xa3,
+ 0x46, 0x01, 0x17, 0x42, 0xda, 0xaf, 0xd3, 0x41, 0x3a, 0x01, 0x2a, 0x09,
+ 0xd0, 0x33, 0xa8, 0x01, 0x29, 0x81, 0x45, 0x00, 0xcd, 0xc2, 0xda, 0xbb,
+ 0x46, 0x01, 0x17, 0x42, 0xda, 0xc7, 0xca, 0x11, 0xfe, 0x01, 0x29, 0x51,
+ 0xc5, 0x11, 0x8d, 0x01, 0x28, 0xc8, 0xca, 0x11, 0xfe, 0x01, 0x29, 0x11,
+ 0xc5, 0x11, 0x8d, 0x01, 0x28, 0xa8, 0xca, 0x11, 0x9d, 0x01, 0x29, 0x31,
+ 0xc5, 0x04, 0xc6, 0x01, 0x28, 0xd0, 0xca, 0x11, 0x9d, 0x01, 0x28, 0xf1,
+ 0xc5, 0x04, 0xc6, 0x01, 0x28, 0xb0, 0xa3, 0x0f, 0xd9, 0xb0, 0xa2, 0x0f,
+ 0xd8, 0xab, 0x02, 0xda, 0xd3, 0xa1, 0x0f, 0xd8, 0x73, 0x02, 0xda, 0xd7,
+ 0xa3, 0x0f, 0xd9, 0x28, 0xa3, 0x0f, 0xd9, 0x80, 0xa3, 0x0f, 0xd9, 0x41,
+ 0xa2, 0x0f, 0xd8, 0xca, 0x02, 0xda, 0xdf, 0xa3, 0x0f, 0xd9, 0x51, 0xa2,
+ 0x0f, 0xd8, 0xda, 0x02, 0xda, 0xe3, 0xa3, 0x0f, 0xd9, 0xc8, 0xa3, 0x0f,
+ 0xd9, 0x59, 0xa2, 0x0f, 0xd8, 0xe2, 0x02, 0xda, 0xe7, 0xa3, 0x0f, 0xd9,
+ 0x98, 0xa3, 0x0f, 0xd9, 0xb8, 0xca, 0xa9, 0x34, 0x0f, 0xd2, 0x4b, 0x02,
+ 0xda, 0xeb, 0x0d, 0xc2, 0xda, 0xf1, 0xc4, 0xe8, 0x9b, 0x01, 0x32, 0xfb,
+ 0x02, 0xdb, 0x00, 0xc6, 0xba, 0xfd, 0x01, 0x32, 0xeb, 0x02, 0xdb, 0x06,
+ 0xc4, 0xd4, 0xf2, 0x01, 0x32, 0xe3, 0x02, 0xdb, 0x0c, 0xc5, 0xad, 0xae,
+ 0x01, 0x32, 0xdb, 0x02, 0xdb, 0x12, 0x47, 0x41, 0xe5, 0x42, 0xdb, 0x18,
+ 0x4e, 0x6f, 0xa6, 0xc2, 0xdb, 0x34, 0x4e, 0x0f, 0x21, 0xc2, 0xdb, 0x40,
+ 0x4c, 0x14, 0x4e, 0xc2, 0xdb, 0x4c, 0x4f, 0x62, 0xa3, 0x42, 0xdb, 0x58,
+ 0x00, 0x42, 0xdb, 0x64, 0xc6, 0x01, 0xe9, 0x0f, 0xbc, 0x69, 0xc6, 0x03,
+ 0xfa, 0x0f, 0xbc, 0x20, 0xca, 0x87, 0x6e, 0x01, 0x31, 0xd9, 0x44, 0x00,
+ 0x40, 0x42, 0xdb, 0x70, 0x00, 0x42, 0xdb, 0x80, 0xc6, 0x01, 0xe9, 0x0f,
+ 0xbc, 0x61, 0xc7, 0x3f, 0x7b, 0x0f, 0xbc, 0xb9, 0xc7, 0x0b, 0xa0, 0x0f,
+ 0xbc, 0xe8, 0x4b, 0x2d, 0x74, 0xc2, 0xdb, 0x92, 0x10, 0x42, 0xdb, 0xaa,
+ 0x00, 0x42, 0xdb, 0xb6, 0x47, 0xbe, 0xd4, 0xc2, 0xdb, 0xd4, 0xc5, 0xdb,
+ 0x0d, 0x0f, 0x99, 0x10, 0x4b, 0x05, 0xe8, 0xc2, 0xdb, 0xe0, 0x16, 0x42,
+ 0xdc, 0x02, 0x4b, 0x05, 0xe8, 0xc2, 0xdc, 0x0e, 0x16, 0x42, 0xdc, 0x2f,
+ 0x44, 0x00, 0xc9, 0xc2, 0xdc, 0x39, 0xc5, 0x0b, 0xa2, 0x01, 0x4f, 0x58,
+ 0xc6, 0x01, 0xe9, 0x01, 0x58, 0xd9, 0xc6, 0x03, 0xfa, 0x01, 0x59, 0x20,
+ 0xc6, 0x0b, 0x61, 0x01, 0x39, 0xf9, 0xc2, 0x07, 0x68, 0x01, 0x34, 0x88,
+ 0xcf, 0x69, 0x24, 0x01, 0x39, 0x31, 0xc4, 0x19, 0x3e, 0x0f, 0xad, 0xf8,
+ 0x15, 0xc2, 0xdc, 0x45, 0x06, 0xc2, 0xdc, 0x51, 0xd4, 0x39, 0x06, 0x01,
+ 0x1f, 0xb3, 0x02, 0xdc, 0x60, 0xd7, 0x29, 0x09, 0x01, 0x1f, 0xab, 0x02,
+ 0xdc, 0x66, 0x0e, 0x42, 0xdc, 0x6c, 0x44, 0x03, 0x07, 0xc2, 0xdc, 0x7b,
+ 0xd8, 0x23, 0xcc, 0x0f, 0xad, 0x11, 0xdb, 0x00, 0x8c, 0x01, 0x5c, 0xe8,
+ 0xce, 0x70, 0x32, 0x01, 0x2d, 0xa1, 0xc8, 0x00, 0x52, 0x01, 0x2d, 0x91,
+ 0xcf, 0x68, 0x52, 0x01, 0x1f, 0x59, 0xd8, 0x21, 0x44, 0x0f, 0xbc, 0x08,
+ 0xc3, 0x08, 0x1b, 0x0f, 0xad, 0x23, 0x02, 0xdc, 0x87, 0xc5, 0xce, 0x77,
+ 0x01, 0x59, 0x10, 0xc7, 0xce, 0xbb, 0x01, 0x4e, 0xb9, 0xd0, 0x58, 0x9f,
+ 0x01, 0x59, 0x60, 0xc4, 0x2e, 0x3c, 0x0f, 0x9f, 0x91, 0xc5, 0xbc, 0x06,
+ 0x01, 0x58, 0xf8, 0xc9, 0x46, 0xf7, 0x01, 0x2d, 0x71, 0xc7, 0x58, 0xa8,
+ 0x01, 0x59, 0x70, 0xc6, 0x01, 0xe9, 0x01, 0x58, 0xe9, 0xc7, 0x3f, 0x7b,
+ 0x0f, 0xbc, 0xc1, 0xc7, 0x0b, 0xa0, 0x0f, 0xbc, 0xf0, 0x9a, 0x01, 0x30,
+ 0x83, 0x02, 0xdc, 0x8d, 0xcb, 0x94, 0x1a, 0x0f, 0xaf, 0xb0, 0xc8, 0xbe,
+ 0xb3, 0x00, 0xdb, 0xf0, 0xc3, 0x01, 0x5e, 0x00, 0xdb, 0xe1, 0xc3, 0x3b,
+ 0x04, 0x00, 0xdb, 0xc9, 0xc3, 0x00, 0x55, 0x00, 0xdb, 0xc0, 0xc2, 0x04,
+ 0x3c, 0x00, 0xdb, 0xd9, 0xc2, 0x06, 0x6e, 0x00, 0xdb, 0xd0, 0xc2, 0x06,
+ 0x6e, 0x00, 0xdb, 0xb9, 0xc2, 0x04, 0x3c, 0x00, 0xdb, 0xb0, 0xc2, 0x07,
+ 0x68, 0x00, 0xdb, 0xa9, 0xc2, 0x07, 0x27, 0x00, 0xdb, 0xa0, 0xc2, 0x02,
+ 0xb5, 0x00, 0xdb, 0x73, 0x02, 0xdc, 0x93, 0xc2, 0x00, 0xaf, 0x00, 0xdb,
+ 0x6a, 0x02, 0xdc, 0x99, 0xc2, 0x00, 0x56, 0x00, 0xdb, 0x23, 0x02, 0xdc,
+ 0x9f, 0xc3, 0x01, 0x5e, 0x00, 0xdb, 0x49, 0xc3, 0x0b, 0xa3, 0x00, 0xdb,
+ 0x38, 0xc3, 0x3b, 0x04, 0x00, 0xdb, 0x41, 0xc2, 0x00, 0x56, 0x00, 0xdb,
+ 0x10, 0xc7, 0xc6, 0x3a, 0x00, 0xd8, 0x30, 0x00, 0x42, 0xdc, 0xa3, 0xc7,
+ 0xcb, 0x50, 0x00, 0xda, 0x29, 0xca, 0x65, 0x78, 0x00, 0xd8, 0xa0, 0xc2,
+ 0x07, 0x44, 0x00, 0xd9, 0x89, 0xc2, 0x07, 0x69, 0x00, 0xd9, 0x80, 0xc7,
+ 0xc6, 0x3a, 0x00, 0xd8, 0x70, 0xc7, 0xc6, 0x3a, 0x00, 0xd8, 0x60, 0xc7,
+ 0xc5, 0x3e, 0x00, 0xd9, 0x08, 0xc3, 0x68, 0x74, 0x00, 0xd9, 0x29, 0x45,
+ 0x65, 0x74, 0x42, 0xdc, 0xb5, 0x00, 0x42, 0xdc, 0xc1, 0x0d, 0xc2, 0xdc,
+ 0xd0, 0x97, 0x0b, 0x50, 0x21, 0xc4, 0xe4, 0x57, 0x0b, 0x51, 0xc1, 0x15,
+ 0xc2, 0xdc, 0xec, 0x16, 0xc2, 0xdd, 0x06, 0x8f, 0x0b, 0x50, 0x8b, 0x02,
+ 0xdd, 0x10, 0x14, 0xc2, 0xdd, 0x22, 0x0e, 0xc2, 0xdd, 0x2e, 0x19, 0xc2,
+ 0xdd, 0x3c, 0xc3, 0xd8, 0x6c, 0x0b, 0x51, 0x59, 0x12, 0xc2, 0xdd, 0x46,
+ 0x10, 0xc2, 0xdd, 0x50, 0x1b, 0xc2, 0xdd, 0x7b, 0xc2, 0x01, 0xe6, 0x0b,
+ 0x50, 0x30, 0x09, 0xc2, 0xdd, 0x85, 0x19, 0xc2, 0xdd, 0x8f, 0x0d, 0xc2,
+ 0xdd, 0x99, 0x10, 0xc2, 0xdd, 0xaf, 0x16, 0xc2, 0xdd, 0xdc, 0x12, 0xc2,
+ 0xdd, 0xec, 0x14, 0xc2, 0xde, 0x09, 0x15, 0xc2, 0xde, 0x19, 0x0e, 0xc2,
+ 0xde, 0x33, 0x18, 0xc2, 0xde, 0x45, 0x0f, 0xc2, 0xde, 0x4f, 0x08, 0xc2,
+ 0xde, 0x87, 0x1b, 0xc2, 0xde, 0x9e, 0x8b, 0x0b, 0x4e, 0xc1, 0x91, 0x0b,
+ 0x4e, 0xb9, 0x83, 0x0b, 0x4e, 0xa8, 0x10, 0xc2, 0xde, 0xb8, 0x0e, 0xc2,
+ 0xde, 0xd8, 0x8f, 0x0b, 0x4a, 0x8b, 0x02, 0xde, 0xee, 0x16, 0xc2, 0xdf,
+ 0x14, 0x0d, 0xc2, 0xdf, 0x2f, 0x15, 0xc2, 0xdf, 0x46, 0x08, 0xc2, 0xdf,
+ 0x5e, 0x1b, 0xc2, 0xdf, 0x6a, 0x14, 0xc2, 0xdf, 0x7a, 0x12, 0xc2, 0xdf,
+ 0x8c, 0x42, 0x00, 0x09, 0xc2, 0xdf, 0xa0, 0x19, 0x42, 0xdf, 0xac, 0x0d,
+ 0xc2, 0xdf, 0xb8, 0x15, 0xc2, 0xdf, 0xcc, 0x16, 0xc2, 0xdf, 0xda, 0x12,
+ 0xc2, 0xdf, 0xea, 0x0e, 0xc2, 0xdf, 0xf4, 0x10, 0xc2, 0xe0, 0x02, 0x0f,
+ 0xc2, 0xe0, 0x24, 0x1b, 0xc2, 0xe0, 0x3e, 0x19, 0xc2, 0xe0, 0x4e, 0xc2,
+ 0x04, 0x34, 0x0b, 0x46, 0x19, 0x43, 0x25, 0xc2, 0xc2, 0xe0, 0x5a, 0xc4,
+ 0xe5, 0x6f, 0x0b, 0x46, 0x01, 0xc3, 0xed, 0x41, 0x0b, 0x45, 0xe1, 0x09,
+ 0x42, 0xe0, 0x64, 0x10, 0xc2, 0xe0, 0x70, 0x0f, 0xc2, 0xe0, 0x88, 0x12,
+ 0xc2, 0xe0, 0xa3, 0x47, 0xc8, 0xfd, 0xc2, 0xe0, 0xbb, 0x0d, 0xc2, 0xe0,
+ 0xc5, 0x0e, 0xc2, 0xe0, 0xd5, 0x42, 0x13, 0xfc, 0xc2, 0xe0, 0xe5, 0x15,
+ 0xc2, 0xe0, 0xef, 0x16, 0xc2, 0xe1, 0x0d, 0xc5, 0xe2, 0x38, 0x0b, 0x43,
+ 0xb1, 0xc4, 0xe5, 0x03, 0x0b, 0x43, 0x99, 0x1b, 0x42, 0xe1, 0x19, 0xc3,
+ 0x35, 0x30, 0x0b, 0x42, 0x91, 0x15, 0xc2, 0xe1, 0x25, 0x16, 0xc2, 0xe1,
+ 0x3f, 0x0d, 0xc2, 0xe1, 0x4f, 0x0f, 0xc2, 0xe1, 0x63, 0x10, 0xc2, 0xe1,
+ 0x83, 0x0e, 0xc2, 0xe1, 0xb9, 0x12, 0xc2, 0xe1, 0xd2, 0x17, 0xc2, 0xe1,
+ 0xe8, 0xc3, 0x03, 0x19, 0x0b, 0x41, 0xd1, 0xc4, 0xe6, 0xa7, 0x0b, 0x41,
+ 0xc9, 0x09, 0x42, 0xe1, 0xf4, 0xc7, 0xc4, 0x9d, 0x00, 0xdf, 0xf9, 0xc9,
+ 0xb0, 0x12, 0x00, 0xdf, 0xe8, 0x49, 0xb0, 0x7e, 0x42, 0xe2, 0x00, 0xc2,
+ 0x00, 0x96, 0x00, 0xde, 0xf9, 0xc2, 0x1a, 0x36, 0x00, 0xde, 0xe1, 0xc2,
+ 0x0e, 0xe5, 0x00, 0xde, 0xc9, 0xc2, 0x00, 0x3f, 0x00, 0xde, 0xa9, 0xc2,
+ 0x00, 0x9a, 0x00, 0xde, 0x99, 0xc2, 0x07, 0x69, 0x00, 0xde, 0x79, 0xc2,
+ 0x00, 0x4c, 0x00, 0xde, 0x61, 0xc2, 0x07, 0x44, 0x00, 0xde, 0x41, 0xc2,
+ 0x01, 0x0e, 0x00, 0xde, 0x19, 0x83, 0x00, 0xde, 0x08, 0xc6, 0xd1, 0x30,
+ 0x00, 0x4e, 0x70, 0x46, 0x01, 0xab, 0x42, 0xe2, 0x12, 0xc2, 0x01, 0x0e,
+ 0x00, 0x4d, 0x11, 0x83, 0x00, 0x4d, 0x08, 0xc2, 0x01, 0x0e, 0x00, 0x4d,
+ 0x01, 0x83, 0x00, 0x4c, 0xf8, 0x94, 0x00, 0x4c, 0x5b, 0x02, 0xe2, 0x1e,
+ 0x8e, 0x00, 0x4c, 0x62, 0x02, 0xe2, 0x22, 0xc4, 0x21, 0x28, 0x00, 0x4e,
+ 0x69, 0xc5, 0x45, 0xcf, 0x00, 0x4c, 0x18, 0xc7, 0x7d, 0xf8, 0x00, 0x4d,
+ 0xe9, 0xc7, 0x10, 0xac, 0x00, 0x4c, 0x10, 0x94, 0x00, 0x4e, 0x20, 0x8e,
+ 0x00, 0x4f, 0x18, 0xda, 0x1b, 0x54, 0x00, 0x4f, 0xc0, 0xc2, 0x0a, 0x20,
+ 0x00, 0x4f, 0xa9, 0xc4, 0x05, 0xde, 0x00, 0x4f, 0xb0, 0xc2, 0x00, 0x2e,
+ 0x00, 0xd0, 0x79, 0x83, 0x00, 0xd0, 0x70, 0xc2, 0x06, 0x6b, 0x00, 0xd0,
0x19, 0x83, 0x00, 0xd0, 0x10, 0xa5, 0x01, 0x46, 0x00, 0x9f, 0x01, 0x40,
- 0x1b, 0x02, 0xe0, 0xeb, 0xa0, 0x01, 0x40, 0x2b, 0x02, 0xe1, 0x12, 0xa1,
- 0x01, 0x40, 0x4b, 0x02, 0xe1, 0x32, 0xa2, 0x01, 0x40, 0x8b, 0x02, 0xe1,
- 0x4b, 0xa3, 0x01, 0x41, 0x0b, 0x02, 0xe1, 0x5d, 0xa5, 0x01, 0x44, 0x09,
- 0xa4, 0x01, 0x42, 0x0a, 0x02, 0xe1, 0x68, 0xa0, 0x01, 0x40, 0x33, 0x02,
- 0xe1, 0x6c, 0xa1, 0x01, 0x40, 0x53, 0x02, 0xe1, 0x8c, 0xa2, 0x01, 0x40,
- 0x93, 0x02, 0xe1, 0xa5, 0xa3, 0x01, 0x41, 0x13, 0x02, 0xe1, 0xb7, 0xa5,
- 0x01, 0x44, 0x11, 0xa4, 0x01, 0x42, 0x12, 0x02, 0xe1, 0xc2, 0xa1, 0x01,
- 0x40, 0x63, 0x02, 0xe1, 0xc6, 0xa2, 0x01, 0x40, 0xa3, 0x02, 0xe1, 0xdf,
- 0xa3, 0x01, 0x41, 0x23, 0x02, 0xe1, 0xf1, 0xa5, 0x01, 0x44, 0x21, 0xa4,
- 0x01, 0x42, 0x22, 0x02, 0xe1, 0xfc, 0xa2, 0x01, 0x40, 0xc3, 0x02, 0xe2,
- 0x00, 0xa3, 0x01, 0x41, 0x43, 0x02, 0xe2, 0x12, 0xa5, 0x01, 0x44, 0x41,
- 0xa4, 0x01, 0x42, 0x42, 0x02, 0xe2, 0x1d, 0xa3, 0x01, 0x41, 0x83, 0x02,
- 0xe2, 0x21, 0xa5, 0x01, 0x44, 0x81, 0xa4, 0x01, 0x42, 0x82, 0x02, 0xe2,
- 0x2c, 0xa5, 0x01, 0x45, 0x01, 0xa4, 0x01, 0x43, 0x02, 0x02, 0xe2, 0x30,
- 0xc8, 0x50, 0x00, 0x08, 0x83, 0x29, 0xc7, 0x0c, 0x4b, 0x08, 0x83, 0x20,
- 0xc2, 0x0c, 0x57, 0x08, 0x83, 0x08, 0xc2, 0x0c, 0x57, 0x08, 0x83, 0x00,
- 0xc3, 0x43, 0xcd, 0x08, 0x82, 0xf9, 0xc2, 0x00, 0x7b, 0x08, 0x82, 0xb0,
- 0xc3, 0x0c, 0x56, 0x08, 0x82, 0xf1, 0xc2, 0x02, 0x53, 0x08, 0x82, 0xa8,
- 0xc4, 0x0c, 0x55, 0x08, 0x82, 0xe9, 0xc3, 0x04, 0x5f, 0x08, 0x82, 0xa0,
- 0xc4, 0x18, 0x85, 0x08, 0x82, 0xe1, 0x91, 0x08, 0x82, 0x98, 0x42, 0x03,
- 0xc7, 0xc2, 0xe2, 0x34, 0x46, 0x2b, 0xff, 0xc2, 0xe2, 0x3e, 0xc4, 0xe5,
- 0x97, 0x08, 0x81, 0xb9, 0xc3, 0x7b, 0xf2, 0x08, 0x81, 0xb0, 0xc2, 0x00,
- 0xa4, 0x08, 0x81, 0x01, 0x83, 0x08, 0x80, 0xf8, 0xc2, 0x00, 0xa4, 0x08,
- 0x80, 0xf1, 0x83, 0x08, 0x80, 0xe8, 0x8e, 0x08, 0x80, 0x6b, 0x02, 0xe2,
- 0x46, 0x94, 0x08, 0x80, 0x5a, 0x02, 0xe2, 0x4a, 0x4f, 0x6b, 0xb4, 0x42,
- 0xe2, 0x4e, 0x97, 0x08, 0x82, 0x29, 0x8b, 0x08, 0x82, 0x19, 0x83, 0x08,
- 0x81, 0xc0, 0x8e, 0x08, 0x82, 0x03, 0x02, 0xe2, 0x56, 0x94, 0x08, 0x81,
- 0xf2, 0x02, 0xe2, 0x5a, 0x97, 0x08, 0x81, 0xe8, 0x8b, 0x08, 0x81, 0xd8,
- 0xc4, 0x18, 0x83, 0x08, 0x83, 0x69, 0xc2, 0x26, 0x51, 0x08, 0x83, 0x60,
- 0xc3, 0x0c, 0x5b, 0x08, 0x83, 0x59, 0xc3, 0x06, 0x9e, 0x08, 0x83, 0x50,
- 0xc4, 0x04, 0x5e, 0x08, 0x83, 0x49, 0xc2, 0x01, 0x47, 0x08, 0x83, 0x40,
- 0x44, 0xe5, 0xa3, 0xc2, 0xe2, 0x5e, 0x4e, 0x68, 0x40, 0xc2, 0xe2, 0x6a,
- 0xc8, 0x9f, 0x0c, 0x0e, 0x80, 0xb0, 0xc4, 0x8f, 0x7c, 0x0e, 0x87, 0x99,
- 0xc4, 0xe6, 0x17, 0x0e, 0x87, 0x89, 0xc3, 0x8f, 0x80, 0x0e, 0x82, 0x78,
- 0x44, 0xe5, 0xa3, 0xc2, 0xe2, 0x76, 0xc8, 0x9f, 0x0c, 0x0e, 0x80, 0xe0,
- 0x00, 0xc2, 0xe2, 0x88, 0xc2, 0x00, 0xb3, 0x0e, 0x81, 0x90, 0xc8, 0xbf,
- 0x9d, 0x0e, 0x82, 0xa1, 0xc8, 0xac, 0xef, 0x0e, 0x82, 0x60, 0x42, 0x04,
- 0x32, 0xc2, 0xe2, 0x92, 0x95, 0x0e, 0x80, 0x8a, 0x02, 0xe2, 0x9e, 0xc3,
- 0x7e, 0xff, 0x0e, 0x84, 0x21, 0xc8, 0x9f, 0x0c, 0x0e, 0x81, 0x10, 0x16,
- 0xc2, 0xe2, 0xa2, 0xc7, 0xc3, 0xa2, 0x0e, 0x87, 0x18, 0x16, 0xc2, 0xe2,
- 0xae, 0xc7, 0xc3, 0xa2, 0x0e, 0x86, 0xf8, 0xc3, 0x7e, 0xff, 0x0e, 0x83,
- 0x29, 0xcc, 0x84, 0xc4, 0x0e, 0x81, 0x59, 0xc8, 0x9f, 0x0c, 0x0e, 0x81,
- 0x50, 0x4f, 0x68, 0x3f, 0x42, 0xe2, 0xba, 0xc7, 0xc7, 0x06, 0x0e, 0x86,
- 0xe9, 0xc5, 0xce, 0x96, 0x0e, 0x86, 0xe1, 0x46, 0xcf, 0xa3, 0x42, 0xe2,
- 0xc6, 0x42, 0x00, 0x5b, 0xc2, 0xe2, 0xd2, 0xcc, 0x2f, 0x63, 0x0e, 0x86,
- 0x78, 0xd5, 0x38, 0x29, 0x0e, 0x86, 0xb9, 0xc8, 0x2f, 0x67, 0x0e, 0x86,
- 0x68, 0xc6, 0xce, 0x95, 0x0e, 0x80, 0x58, 0xc6, 0xcf, 0x5b, 0x0e, 0x86,
- 0x31, 0xc5, 0x1c, 0x70, 0x0e, 0x86, 0x28, 0x42, 0x04, 0x32, 0xc2, 0xe2,
- 0xde, 0xc3, 0x0e, 0x3a, 0x0e, 0x85, 0xd8, 0xc2, 0x00, 0xb7, 0x0e, 0x85,
- 0xc1, 0x83, 0x0e, 0x81, 0xa8, 0xce, 0x6d, 0xdf, 0x0e, 0x85, 0x99, 0xc5,
- 0x6d, 0xe8, 0x0e, 0x85, 0x58, 0xcb, 0x98, 0xd5, 0x0e, 0x85, 0x91, 0xc7,
- 0x6d, 0xe6, 0x0e, 0x85, 0x10, 0xcd, 0x7f, 0xb8, 0x0e, 0x85, 0x49, 0xc5,
- 0x6d, 0xe8, 0x0e, 0x85, 0x40, 0xc6, 0x8f, 0x71, 0x0e, 0x85, 0x39, 0xc9,
- 0x6d, 0xe4, 0x0e, 0x85, 0x30, 0xca, 0x95, 0xa8, 0x0e, 0x83, 0x71, 0xc8,
- 0xc1, 0x0d, 0x0e, 0x83, 0x58, 0xc3, 0x7e, 0xff, 0x0e, 0x83, 0x19, 0x03,
- 0x42, 0xe2, 0xea, 0xc7, 0xc4, 0xf2, 0x0e, 0x83, 0xc1, 0x48, 0xbf, 0xc5,
- 0x42, 0xe2, 0xf6, 0xcf, 0x63, 0xcb, 0x0e, 0x84, 0x69, 0xcc, 0x85, 0x60,
- 0x0e, 0x84, 0x60, 0xc4, 0x7e, 0xfe, 0x0e, 0x82, 0xd0, 0xc3, 0x7e, 0xff,
- 0x0e, 0x82, 0xf9, 0xc8, 0x9f, 0x0c, 0x0e, 0x81, 0xe8, 0x00, 0x42, 0xe3,
- 0x02, 0xc9, 0xac, 0xee, 0x0e, 0x82, 0x59, 0x8b, 0x0e, 0x82, 0x48, 0x5b,
- 0x16, 0x24, 0xc2, 0xe3, 0x0e, 0x46, 0x01, 0xc7, 0x42, 0xe3, 0x1a, 0xc6,
- 0x07, 0x09, 0x01, 0x3a, 0x89, 0xc6, 0x01, 0x7a, 0x0f, 0xa9, 0xf0, 0xc6,
- 0x02, 0x21, 0x0f, 0xda, 0x09, 0xc5, 0x01, 0xf7, 0x0f, 0xda, 0x10, 0x55,
- 0x18, 0x10, 0xc2, 0xe3, 0x2c, 0x48, 0x09, 0x13, 0xc2, 0xe3, 0x3e, 0x4a,
- 0x13, 0x24, 0x42, 0xe3, 0x4a, 0xc7, 0x17, 0x7c, 0x01, 0x52, 0x91, 0x45,
- 0x06, 0xf3, 0x42, 0xe3, 0x56, 0xc7, 0x76, 0x66, 0x01, 0x52, 0xf1, 0xc8,
- 0x4f, 0x39, 0x01, 0x53, 0x00, 0x46, 0x02, 0x12, 0xc2, 0xe3, 0x62, 0x46,
- 0x01, 0xd1, 0xc2, 0xe3, 0x6c, 0x46, 0x01, 0xc7, 0x42, 0xe3, 0x78, 0xc9,
- 0xb5, 0xb8, 0x0f, 0xaf, 0x71, 0xca, 0x09, 0x54, 0x01, 0x80, 0x42, 0x02,
- 0xe3, 0x84, 0xcc, 0x11, 0x65, 0x01, 0x59, 0x81, 0xcc, 0x8b, 0xc0, 0x01,
- 0x59, 0x90, 0xe0, 0x0a, 0xe7, 0x0f, 0xdc, 0xa0, 0x46, 0x00, 0x6b, 0x42,
- 0xe3, 0x8a, 0x44, 0x05, 0xf1, 0xc2, 0xe3, 0x9a, 0xc3, 0x01, 0xe4, 0x01,
- 0x2c, 0x60, 0x00, 0x42, 0xe3, 0xa6, 0x46, 0x00, 0x6b, 0x42, 0xe3, 0xb2,
- 0xc9, 0xb0, 0xcc, 0x01, 0x0d, 0x69, 0xca, 0x04, 0xfd, 0x01, 0x58, 0x20,
- 0xcc, 0x8c, 0x80, 0x01, 0x1d, 0x19, 0xc9, 0x4f, 0x27, 0x01, 0x1d, 0x11,
- 0xcc, 0x84, 0x70, 0x01, 0x1d, 0x09, 0x45, 0x00, 0x6c, 0x42, 0xe3, 0xbe,
- 0xca, 0xa3, 0x6a, 0x01, 0x1d, 0x49, 0xcc, 0x81, 0xa0, 0x01, 0x1d, 0x41,
- 0xca, 0xa0, 0xf4, 0x01, 0x1d, 0x38, 0xcd, 0x40, 0x12, 0x01, 0x2c, 0x69,
- 0xce, 0x0a, 0xb9, 0x01, 0x2c, 0x50, 0xd6, 0x2c, 0x57, 0x01, 0x4e, 0x79,
- 0xd6, 0x19, 0x02, 0x0f, 0xdb, 0x60, 0xcc, 0x02, 0x53, 0x01, 0x4c, 0x19,
- 0xcd, 0x66, 0x34, 0x01, 0x80, 0x70, 0xcc, 0x89, 0x68, 0x01, 0x4a, 0x81,
- 0xca, 0xa8, 0x56, 0x01, 0x4a, 0x58, 0xcc, 0x89, 0x68, 0x01, 0x4a, 0x51,
- 0xca, 0xa8, 0x56, 0x01, 0x4a, 0x70, 0xca, 0x03, 0x7d, 0x0f, 0xc4, 0x81,
- 0x48, 0x01, 0xef, 0x42, 0xe3, 0xdc, 0xc5, 0x01, 0x0f, 0x01, 0x0e, 0xd1,
- 0xca, 0x52, 0x78, 0x01, 0x48, 0x70, 0x46, 0x01, 0x0f, 0xc2, 0xe3, 0xf1,
- 0xd1, 0x52, 0x71, 0x01, 0x59, 0xb8, 0xd9, 0x1f, 0xeb, 0x0f, 0xc0, 0x21,
- 0x15, 0xc2, 0xe3, 0xfd, 0x42, 0x01, 0x4a, 0xc2, 0xe4, 0x09, 0xcf, 0x2c,
- 0x05, 0x01, 0x0f, 0xb9, 0x0e, 0xc2, 0xe4, 0x15, 0xc4, 0x02, 0x83, 0x01,
- 0x0d, 0x49, 0x16, 0xc2, 0xe4, 0x21, 0xca, 0xa2, 0x16, 0x01, 0x4a, 0x31,
- 0xd5, 0x03, 0x72, 0x0f, 0xc0, 0xa1, 0xcc, 0x82, 0x48, 0x0f, 0xc4, 0xc0,
- 0x43, 0x11, 0xa6, 0xc2, 0xe4, 0x30, 0x47, 0x23, 0xd8, 0x42, 0xe4, 0x3f,
- 0xd1, 0x55, 0x08, 0x01, 0x48, 0xf8, 0x45, 0x00, 0x96, 0xc2, 0xe4, 0x4f,
- 0x43, 0x00, 0x58, 0x42, 0xe4, 0x67, 0x00, 0xc2, 0xe4, 0x6d, 0xc5, 0x12,
- 0xea, 0x01, 0x48, 0xd8, 0xd7, 0x27, 0x6e, 0x01, 0x0e, 0x59, 0x4a, 0x02,
- 0xd8, 0x42, 0xe4, 0x79, 0xc6, 0x0d, 0xf2, 0x01, 0x53, 0xf9, 0xc5, 0x00,
- 0x95, 0x01, 0x54, 0x0a, 0x02, 0xe4, 0x85, 0xc8, 0x23, 0x35, 0x01, 0x54,
- 0x69, 0xd2, 0x00, 0x95, 0x01, 0x54, 0x78, 0xe0, 0x03, 0x07, 0x01, 0x54,
- 0x98, 0xe0, 0x0a, 0x87, 0x01, 0x3b, 0x98, 0xc4, 0x12, 0x72, 0x01, 0x5e,
- 0x61, 0xc4, 0x0e, 0xa8, 0x0f, 0xbe, 0x20, 0xcf, 0x15, 0x8e, 0x0f, 0xbd,
- 0x79, 0xd2, 0x21, 0x36, 0x0f, 0xbe, 0x48, 0xc2, 0x00, 0x30, 0x05, 0x27,
- 0xc1, 0xc3, 0xdf, 0x96, 0x05, 0x27, 0xd1, 0xc2, 0x00, 0x2b, 0x05, 0x27,
- 0xd9, 0xc2, 0x00, 0xc1, 0x05, 0x27, 0xe1, 0xc3, 0xe7, 0x84, 0x05, 0x27,
- 0xe8, 0xdd, 0x12, 0x3c, 0x01, 0x50, 0x99, 0xdc, 0x14, 0xa6, 0x01, 0x50,
- 0x90, 0x1e, 0xc2, 0xe4, 0x8b, 0x1d, 0xc2, 0xe4, 0xb5, 0xc7, 0xc2, 0x36,
- 0x08, 0x3a, 0xa1, 0xc5, 0xd9, 0xf8, 0x08, 0x3a, 0xa8, 0x23, 0xc2, 0xe4,
- 0xe9, 0x1d, 0xc2, 0xe4, 0xfd, 0x1e, 0xc2, 0xe5, 0x1d, 0x1f, 0xc2, 0xe5,
- 0x45, 0x20, 0xc2, 0xe5, 0x69, 0x21, 0xc2, 0xe5, 0x75, 0x22, 0x42, 0xe5,
- 0x95, 0x9d, 0x08, 0x3b, 0x01, 0x9e, 0x08, 0x3b, 0x09, 0x9f, 0x08, 0x3b,
- 0x11, 0xa0, 0x08, 0x3b, 0x19, 0xa1, 0x08, 0x3b, 0x21, 0xa2, 0x08, 0x3b,
- 0x29, 0xa3, 0x08, 0x3b, 0x31, 0xa4, 0x08, 0x3b, 0x38, 0x1d, 0xc2, 0xe5,
- 0xb9, 0x1e, 0x42, 0xe5, 0xdd, 0xc6, 0xce, 0x77, 0x08, 0x32, 0x39, 0xc3,
- 0xe7, 0xb1, 0x08, 0x32, 0x79, 0xc3, 0xe7, 0xd8, 0x08, 0x32, 0x50, 0x1d,
- 0xc2, 0xe6, 0x03, 0x1e, 0xc2, 0xe6, 0x27, 0x1f, 0xc2, 0xe6, 0x4f, 0x20,
- 0xc2, 0xe6, 0x77, 0x21, 0xc2, 0xe6, 0x9f, 0x22, 0xc2, 0xe6, 0xc7, 0x23,
- 0xc2, 0xe6, 0xef, 0x24, 0x42, 0xe7, 0x17, 0x1d, 0xc2, 0xe7, 0x1f, 0x1e,
- 0x42, 0xe7, 0x5b, 0x1d, 0xc2, 0xe7, 0x91, 0x1e, 0xc2, 0xe7, 0xb1, 0x1f,
- 0xc2, 0xe7, 0xc9, 0x20, 0xc2, 0xe7, 0xed, 0x21, 0xc2, 0xe8, 0x11, 0x22,
- 0xc2, 0xe8, 0x2d, 0x23, 0xc2, 0xe8, 0x51, 0x24, 0xc2, 0xe8, 0x69, 0x25,
- 0xc2, 0xe8, 0x91, 0x26, 0x42, 0xe8, 0xb9, 0x49, 0xaf, 0xb5, 0xc2, 0xe8,
- 0xd1, 0x47, 0xc1, 0x3a, 0x42, 0xe8, 0xf9, 0x04, 0xc2, 0xe9, 0x21, 0x48,
- 0xb9, 0xc5, 0x42, 0xe9, 0x29, 0x1e, 0xc2, 0xe9, 0x39, 0xc9, 0xae, 0x32,
- 0x08, 0x06, 0x90, 0x83, 0x00, 0xc9, 0xa1, 0xc2, 0x01, 0x29, 0x00, 0xc9,
- 0x88, 0x91, 0x00, 0xc9, 0x28, 0x87, 0x00, 0xc9, 0x18, 0x97, 0x00, 0xc9,
- 0x31, 0x8b, 0x00, 0xc9, 0x20, 0xc6, 0x07, 0x09, 0x0f, 0xbf, 0x59, 0xc6,
- 0x01, 0x7a, 0x0f, 0xbf, 0x20, 0xc7, 0x3f, 0x2e, 0x0f, 0xa9, 0xb9, 0xc6,
- 0x01, 0x7a, 0x0f, 0xa9, 0xa9, 0xc6, 0x07, 0x09, 0x0f, 0xbf, 0x30, 0xdf,
- 0x0c, 0xe2, 0x08, 0x59, 0xf9, 0xdd, 0x10, 0x6c, 0x08, 0x59, 0xe8, 0xc7,
- 0x3f, 0x2e, 0x0f, 0xa9, 0xb1, 0xc6, 0x01, 0x7a, 0x0f, 0xbf, 0x01, 0xc6,
- 0x07, 0x09, 0x0f, 0xbf, 0x38, 0xdf, 0x0d, 0x01, 0x08, 0x59, 0xf1, 0xdd,
- 0x01, 0xaa, 0x08, 0x59, 0xe0, 0x96, 0x00, 0x03, 0xa3, 0x02, 0xe9, 0x47,
- 0x95, 0x00, 0x03, 0x9b, 0x02, 0xe9, 0x81, 0x94, 0x00, 0x03, 0x93, 0x02,
- 0xe9, 0xa5, 0x90, 0x00, 0x03, 0x73, 0x02, 0xe9, 0xbe, 0x8e, 0x00, 0x03,
- 0x63, 0x02, 0xe9, 0xcc, 0x86, 0x00, 0x03, 0x23, 0x02, 0xe9, 0xfb, 0x85,
- 0x00, 0x03, 0x1b, 0x02, 0xea, 0x1c, 0x91, 0x00, 0x03, 0x7b, 0x02, 0xea,
- 0x40, 0x8b, 0x00, 0x03, 0x4b, 0x02, 0xea, 0x64, 0x87, 0x00, 0x03, 0x2b,
- 0x02, 0xea, 0x78, 0x88, 0x00, 0x03, 0x33, 0x02, 0xea, 0xa6, 0x9b, 0x00,
- 0x03, 0xcb, 0x02, 0xea, 0xb5, 0x8f, 0x00, 0x03, 0x6b, 0x02, 0xea, 0xc1,
- 0x97, 0x00, 0x03, 0xab, 0x02, 0xea, 0xd3, 0x83, 0x00, 0x03, 0x0b, 0x02,
- 0xea, 0xf0, 0x99, 0x00, 0x03, 0xbb, 0x02, 0xeb, 0x21, 0x8a, 0x00, 0x03,
- 0x43, 0x02, 0xeb, 0x27, 0x9c, 0x00, 0x03, 0xd3, 0x02, 0xeb, 0x40, 0x9a,
- 0x00, 0x03, 0xc3, 0x02, 0xeb, 0x46, 0x98, 0x00, 0x03, 0xb3, 0x02, 0xeb,
- 0x4c, 0x92, 0x00, 0x03, 0x83, 0x02, 0xeb, 0x68, 0x8d, 0x00, 0x03, 0x5b,
- 0x02, 0xeb, 0x74, 0x89, 0x00, 0x03, 0x3b, 0x02, 0xeb, 0x80, 0x84, 0x00,
- 0x03, 0x13, 0x02, 0xeb, 0x98, 0x8c, 0x00, 0x03, 0x53, 0x02, 0xeb, 0xba,
- 0x93, 0x00, 0x03, 0x8a, 0x02, 0xeb, 0xc0, 0xc2, 0x00, 0x15, 0x07, 0xd8,
- 0x31, 0xc8, 0xc0, 0x95, 0x07, 0xd8, 0x29, 0x08, 0xc2, 0xeb, 0xcc, 0xc2,
- 0x00, 0x0b, 0x00, 0x09, 0x99, 0xc2, 0x1b, 0xd8, 0x00, 0x0a, 0x98, 0x46,
- 0x41, 0x9c, 0x42, 0xeb, 0xdb, 0x46, 0x00, 0x6b, 0x42, 0xeb, 0xef, 0xc2,
- 0x23, 0xb6, 0x00, 0xe9, 0x19, 0xc2, 0x00, 0x6e, 0x00, 0xe8, 0x30, 0x48,
- 0x10, 0x90, 0xc2, 0xeb, 0xfb, 0xcf, 0x67, 0x9a, 0x05, 0x5a, 0x31, 0xc2,
- 0x06, 0x1f, 0x05, 0x3b, 0xb0, 0x97, 0x00, 0xe8, 0xa9, 0xc5, 0x77, 0x2d,
- 0x00, 0xe8, 0x81, 0x87, 0x00, 0x13, 0xb0, 0xc7, 0xcb, 0x7b, 0x00, 0xe8,
- 0x18, 0x87, 0x00, 0xe8, 0x08, 0xca, 0x1d, 0xd4, 0x00, 0x14, 0xd8, 0xc9,
- 0xad, 0x87, 0x00, 0x14, 0x08, 0x46, 0x00, 0x6b, 0xc2, 0xec, 0x03, 0xc3,
- 0xe7, 0xba, 0x00, 0x10, 0xe0, 0x45, 0x00, 0x51, 0xc2, 0xec, 0x3a, 0x46,
- 0x00, 0x6b, 0x42, 0xec, 0x46, 0x00, 0xc2, 0xec, 0x58, 0xc6, 0x12, 0x12,
- 0x00, 0x0d, 0x88, 0x46, 0x00, 0x6b, 0xc2, 0xec, 0x64, 0x91, 0x05, 0x3a,
- 0x71, 0xc4, 0x70, 0xd8, 0x05, 0x3d, 0xb1, 0xcb, 0x94, 0xec, 0x05, 0x3e,
- 0x01, 0x44, 0x01, 0x76, 0xc2, 0xec, 0xaf, 0x8b, 0x00, 0x0d, 0x11, 0x97,
- 0x00, 0x11, 0x10, 0x46, 0x00, 0x6b, 0xc2, 0xec, 0xb7, 0x95, 0x05, 0x3b,
- 0x61, 0x47, 0x6a, 0x62, 0xc2, 0xec, 0xf8, 0xc3, 0x00, 0xb2, 0x00, 0x0c,
- 0xb0, 0x46, 0x00, 0x6b, 0xc2, 0xed, 0x10, 0x4e, 0x72, 0x69, 0xc2, 0xed,
- 0x54, 0x96, 0x05, 0x3b, 0x53, 0x02, 0xed, 0x60, 0xc2, 0x00, 0x35, 0x00,
- 0x0a, 0x51, 0xc2, 0x00, 0xe5, 0x00, 0x0d, 0x49, 0xc2, 0x23, 0xb6, 0x00,
- 0x0d, 0xba, 0x02, 0xed, 0x64, 0x46, 0x00, 0x6b, 0xc2, 0xed, 0x68, 0x87,
- 0x00, 0x06, 0x33, 0x02, 0xed, 0xaf, 0x83, 0x05, 0x39, 0x91, 0x91, 0x05,
- 0x39, 0xa1, 0x97, 0x05, 0x39, 0xb1, 0x98, 0x05, 0x39, 0xc3, 0x02, 0xed,
- 0xb5, 0x9b, 0x05, 0x39, 0xe1, 0xca, 0xa4, 0x96, 0x05, 0x3e, 0x11, 0xc4,
- 0xd9, 0x9f, 0x01, 0x63, 0x69, 0xc8, 0xc0, 0x2d, 0x00, 0x0c, 0x48, 0xc6,
- 0xa5, 0xb9, 0x00, 0xf4, 0xf1, 0x46, 0x00, 0x6b, 0xc2, 0xed, 0xbb, 0xc7,
- 0xc4, 0x04, 0x05, 0x3c, 0x59, 0x05, 0xc2, 0xed, 0xde, 0xc8, 0xbf, 0x75,
- 0x05, 0x3e, 0xc1, 0x45, 0x04, 0x74, 0x42, 0xed, 0xea, 0x46, 0x00, 0x6b,
- 0x42, 0xed, 0xf6, 0x47, 0x09, 0xf2, 0x42, 0xee, 0x1a, 0x46, 0x00, 0x6b,
- 0xc2, 0xee, 0x26, 0xc3, 0x99, 0x6a, 0x00, 0x0f, 0xb8, 0x46, 0x00, 0x6b,
- 0xc2, 0xee, 0x42, 0x9b, 0x05, 0x3b, 0x01, 0xcb, 0x91, 0x2f, 0x05, 0x3b,
- 0x11, 0xc3, 0x00, 0xef, 0x05, 0x3b, 0x41, 0x47, 0xc6, 0x2d, 0x42, 0xee,
- 0x52, 0x46, 0x00, 0x6b, 0xc2, 0xee, 0x64, 0x9c, 0x05, 0x39, 0x41, 0xc7,
- 0xc3, 0xfd, 0x05, 0x39, 0x51, 0xc4, 0x2a, 0xc6, 0x00, 0x06, 0xf3, 0x02,
- 0xee, 0x84, 0x46, 0x41, 0x9c, 0xc2, 0xee, 0x8d, 0x44, 0x01, 0xb4, 0x42,
- 0xee, 0xb2, 0x00, 0xc2, 0xee, 0xc4, 0x48, 0x10, 0x90, 0xc2, 0xee, 0xd0,
- 0xca, 0xa8, 0x1a, 0x05, 0x3a, 0xe0, 0x46, 0x00, 0x6b, 0x42, 0xee, 0xe6,
- 0x46, 0x00, 0x6b, 0xc2, 0xef, 0x02, 0x8c, 0x00, 0x0e, 0x50, 0x46, 0x00,
- 0x6b, 0xc2, 0xef, 0x2c, 0x8c, 0x00, 0x0e, 0x38, 0x46, 0x00, 0x6b, 0x42,
- 0xef, 0x56, 0x46, 0x00, 0x6b, 0xc2, 0xef, 0x7f, 0xc4, 0x73, 0xef, 0x00,
- 0x0f, 0xb1, 0xc3, 0x08, 0xc3, 0x05, 0x39, 0x31, 0xc5, 0xd3, 0x22, 0x01,
- 0x63, 0xa8, 0x46, 0x00, 0x6b, 0xc2, 0xef, 0x99, 0x47, 0x21, 0x79, 0xc2,
- 0xef, 0xc7, 0xc4, 0x32, 0x6d, 0x00, 0x0c, 0xa1, 0xc2, 0x00, 0xa4, 0x00,
- 0x0d, 0x10, 0x46, 0x00, 0x6b, 0x42, 0xef, 0xd9, 0x46, 0x00, 0x6b, 0xc2,
- 0xef, 0xeb, 0x9c, 0x00, 0x0f, 0x8a, 0x02, 0xf0, 0x0b, 0x46, 0x00, 0x6b,
- 0xc2, 0xf0, 0x11, 0xc2, 0x00, 0x0a, 0x05, 0x3d, 0x99, 0xc8, 0xb9, 0xe5,
- 0x05, 0x39, 0x63, 0x02, 0xf0, 0x39, 0xc2, 0x00, 0xb7, 0x05, 0x3b, 0x71,
- 0xcf, 0x6a, 0x5b, 0x05, 0x3e, 0x80, 0x46, 0x00, 0x6b, 0xc2, 0xf0, 0x3f,
- 0xc3, 0x05, 0xe7, 0x05, 0x3d, 0xa1, 0xc7, 0xc9, 0x9f, 0x05, 0x3a, 0x30,
- 0x46, 0x00, 0x6b, 0x42, 0xf0, 0x63, 0x46, 0x00, 0x6b, 0x42, 0xf0, 0x6d,
- 0xc4, 0xb2, 0x72, 0x00, 0x74, 0x11, 0xc3, 0x30, 0x10, 0x00, 0x74, 0x20,
- 0xc2, 0x13, 0x4f, 0x00, 0x76, 0xf1, 0xc3, 0x4e, 0xfc, 0x00, 0x76, 0xf8,
- 0xc2, 0x1d, 0x5f, 0x00, 0x74, 0x71, 0xc2, 0x00, 0xc1, 0x00, 0x74, 0x98,
- 0x83, 0x00, 0x74, 0x79, 0xc2, 0x00, 0xa4, 0x00, 0x74, 0x80, 0x06, 0xc2,
- 0xf0, 0x79, 0xc2, 0x00, 0xa4, 0x00, 0x74, 0xc0, 0xc5, 0x01, 0xf7, 0x0f,
- 0xda, 0xa9, 0xc6, 0x02, 0x21, 0x0f, 0xda, 0xa1, 0xcc, 0x02, 0x0b, 0x0f,
- 0xdb, 0x38, 0x46, 0x01, 0xd1, 0xc2, 0xf0, 0x83, 0xd2, 0x48, 0x3c, 0x0f,
- 0xdb, 0x18, 0xd2, 0x48, 0x3c, 0x0f, 0xdb, 0x11, 0x46, 0x01, 0xd1, 0x42,
- 0xf0, 0x8f, 0xc6, 0x02, 0x21, 0x0f, 0xda, 0xc9, 0xc5, 0x01, 0xf7, 0x0f,
- 0xda, 0xd1, 0xcc, 0x02, 0x0b, 0x0f, 0xda, 0xe0, 0x46, 0x01, 0xc7, 0xc2,
- 0xf0, 0x9b, 0xd2, 0x47, 0x52, 0x0f, 0xda, 0xf0, 0xd2, 0x47, 0x52, 0x0f,
- 0xda, 0xe9, 0x46, 0x01, 0xc7, 0x42, 0xf0, 0xa7, 0x46, 0x00, 0x6b, 0x42,
- 0xf0, 0xb3, 0xd4, 0x3d, 0xf5, 0x01, 0x5d, 0xc0, 0xc5, 0x01, 0x0f, 0x01,
- 0x5b, 0x0b, 0x02, 0xf0, 0xbf, 0xcc, 0x82, 0xb4, 0x01, 0x5b, 0x59, 0xcd,
- 0x79, 0x2b, 0x01, 0x5c, 0x28, 0xd5, 0x03, 0x72, 0x0f, 0xc0, 0xa9, 0xd8,
- 0x25, 0xc8, 0x0f, 0xc0, 0x49, 0xd9, 0x1f, 0xeb, 0x0f, 0xc0, 0x29, 0x46,
- 0x04, 0x73, 0xc2, 0xf0, 0xc3, 0xcd, 0x77, 0x57, 0x01, 0x0e, 0xf1, 0x44,
- 0x01, 0x1a, 0xc2, 0xf0, 0xcf, 0xd1, 0x00, 0xf6, 0x01, 0x48, 0x49, 0xcc,
- 0x82, 0x48, 0x0f, 0xc4, 0xc8, 0x47, 0x13, 0x72, 0xc2, 0xf0, 0xdb, 0xc6,
- 0x11, 0xa5, 0x01, 0x4a, 0xc1, 0xc8, 0xb2, 0xf2, 0x01, 0x4b, 0x00, 0xc8,
- 0xb2, 0xf2, 0x01, 0x4a, 0xe1, 0xc6, 0x11, 0xa5, 0x01, 0x4a, 0xa0, 0xe0,
- 0x0b, 0x67, 0x01, 0x3a, 0x58, 0xd6, 0x2b, 0xe9, 0x01, 0x39, 0xc1, 0xca,
- 0x21, 0x3e, 0x0f, 0xbe, 0x79, 0xcd, 0x0e, 0x9f, 0x0f, 0xbe, 0x88, 0xc3,
- 0xe6, 0xf7, 0x0f, 0xb3, 0x29, 0xc9, 0xac, 0x43, 0x0f, 0xb2, 0xe8, 0xc5,
- 0x01, 0x0f, 0x01, 0x3c, 0xc1, 0x49, 0x01, 0x8a, 0x42, 0xf0, 0xe5, 0xdd,
- 0x00, 0xaa, 0x01, 0x3a, 0xe1, 0x44, 0x06, 0xfe, 0x42, 0xf0, 0xf1, 0xcf,
- 0x15, 0x8e, 0x0f, 0xbd, 0xc1, 0xd2, 0x21, 0x36, 0x0f, 0xbe, 0x60, 0xc3,
- 0xe6, 0xf7, 0x0f, 0xb3, 0x31, 0xc9, 0xac, 0x43, 0x0f, 0xb2, 0xf0, 0xe0,
- 0x09, 0x27, 0x01, 0x3d, 0x68, 0x44, 0x05, 0x96, 0xc2, 0xf0, 0xf7, 0x44,
- 0x03, 0x49, 0x42, 0xf0, 0xfd, 0xd0, 0x09, 0x57, 0x01, 0x3b, 0x81, 0xd7,
- 0x00, 0xb0, 0x01, 0x3b, 0x70, 0xd5, 0x03, 0x72, 0x0f, 0xc0, 0xc1, 0xdb,
- 0x17, 0xef, 0x0f, 0xc0, 0xe0, 0xd1, 0x55, 0xd4, 0x01, 0x3a, 0x19, 0xc8,
- 0x0b, 0x7f, 0x01, 0x39, 0xe8, 0xd0, 0x1f, 0xc2, 0x01, 0x3d, 0xc9, 0xd0,
- 0x01, 0xf7, 0x01, 0x3d, 0xc1, 0xd0, 0x3a, 0x25, 0x01, 0x3d, 0xb8, 0x47,
- 0x3e, 0x81, 0xc2, 0xf1, 0x03, 0xc5, 0x1d, 0x53, 0x01, 0x3b, 0x20, 0xd9,
- 0x1d, 0x93, 0x01, 0x37, 0x19, 0xcd, 0x80, 0x54, 0x01, 0x5a, 0xb8, 0xdd,
- 0x00, 0xaa, 0x01, 0x3a, 0xf1, 0x44, 0x06, 0xfe, 0x42, 0xf1, 0x0f, 0xd5,
- 0x03, 0x72, 0x0f, 0xc0, 0xd9, 0xdb, 0x17, 0xef, 0x0f, 0xc0, 0xf8, 0x46,
- 0x00, 0x6b, 0x42, 0xf1, 0x15, 0xd0, 0x09, 0x57, 0x01, 0x3b, 0x89, 0xd7,
- 0x00, 0xb0, 0x01, 0x3b, 0x78, 0x00, 0x42, 0xf1, 0x21, 0xc3, 0x4b, 0x98,
- 0x00, 0x2f, 0x91, 0xc3, 0x07, 0x8c, 0x00, 0x2f, 0x80, 0xc4, 0xe5, 0xfb,
- 0x07, 0xda, 0x71, 0xc6, 0x64, 0xbb, 0x07, 0xda, 0x20, 0xc4, 0xe5, 0xfb,
- 0x07, 0xda, 0x69, 0xc6, 0x64, 0xbb, 0x07, 0xd9, 0xd8, 0xc4, 0xe5, 0xfb,
- 0x07, 0xda, 0x61, 0xc6, 0x64, 0xbb, 0x07, 0xd9, 0x88, 0xc5, 0xd6, 0x83,
- 0x07, 0xda, 0x59, 0xc6, 0x64, 0xbb, 0x07, 0xd9, 0xa8, 0xcc, 0x8c, 0x14,
- 0x07, 0xda, 0x50, 0xcc, 0x8c, 0x14, 0x07, 0xda, 0x30, 0xcc, 0x8c, 0x14,
- 0x07, 0xd9, 0xc0, 0x46, 0x00, 0x6b, 0x42, 0xf1, 0x2d, 0xcc, 0x8c, 0x14,
- 0x07, 0xda, 0x08, 0xcc, 0x8c, 0x14, 0x07, 0xda, 0x18, 0xcc, 0x8c, 0x14,
- 0x07, 0xd9, 0xd0, 0xc6, 0x64, 0xbb, 0x07, 0xd9, 0xc9, 0xc5, 0xd6, 0x2e,
- 0x07, 0xd8, 0xe8, 0xc2, 0x00, 0x07, 0x00, 0x2e, 0x83, 0x02, 0xf1, 0x3a,
- 0x4a, 0xa2, 0x7a, 0x42, 0xf1, 0x40, 0xc6, 0xce, 0x41, 0x00, 0x2e, 0x38,
- 0xc6, 0x44, 0x67, 0x00, 0x2e, 0x09, 0xc3, 0x42, 0x0c, 0x00, 0x2d, 0x80,
- 0xce, 0x6e, 0xb1, 0x00, 0x2d, 0xd0, 0xc6, 0xcc, 0xd9, 0x00, 0x2d, 0x99,
- 0xc5, 0x7d, 0xd2, 0x00, 0x2d, 0x91, 0xc5, 0xda, 0xb1, 0x00, 0x2d, 0x88,
- 0xc5, 0xd8, 0x13, 0x00, 0x2c, 0xa9, 0xc5, 0xce, 0x42, 0x00, 0x2c, 0xa0,
- 0xc6, 0xcc, 0xdf, 0x00, 0x2d, 0x49, 0xc6, 0xcf, 0x4f, 0x00, 0x2d, 0x00,
- 0xc2, 0x50, 0x6e, 0x02, 0x6e, 0x31, 0xce, 0x6e, 0xcd, 0x02, 0x6f, 0x90,
- 0x11, 0xc2, 0xf1, 0x4c, 0xcc, 0x79, 0x53, 0x02, 0x6e, 0xd8, 0x00, 0x42,
- 0xf1, 0x58, 0xc2, 0x1d, 0x5f, 0x08, 0x68, 0xc9, 0xc2, 0x01, 0x09, 0x08,
- 0x68, 0xb8, 0x02, 0x42, 0xf1, 0x64, 0x44, 0x29, 0x95, 0xc2, 0xf1, 0x90,
- 0xc3, 0x3c, 0x08, 0x00, 0x88, 0x4a, 0x02, 0xf1, 0xd0, 0xc5, 0xde, 0x35,
- 0x05, 0x4b, 0xd8, 0xc6, 0xc1, 0x07, 0x00, 0x88, 0x8b, 0x02, 0xf1, 0xd4,
- 0xc4, 0x7a, 0x93, 0x00, 0x88, 0x3b, 0x02, 0xf1, 0xd8, 0xc6, 0xcb, 0x4b,
- 0x00, 0x8a, 0x00, 0x02, 0x42, 0xf1, 0xdc, 0x02, 0x42, 0xf2, 0x06, 0xc5,
- 0xc8, 0x2e, 0x00, 0x88, 0x1b, 0x02, 0xf2, 0x1e, 0xc6, 0xc6, 0xf2, 0x00,
- 0x88, 0x80, 0xc5, 0x92, 0x32, 0x00, 0x88, 0x03, 0x02, 0xf2, 0x22, 0xc6,
- 0xc0, 0x37, 0x00, 0x88, 0x79, 0x47, 0x7a, 0x8b, 0x42, 0xf2, 0x28, 0x02,
- 0x42, 0xf2, 0x3e, 0xc4, 0xc7, 0x2b, 0x00, 0x88, 0x63, 0x02, 0xf2, 0x62,
- 0x42, 0x00, 0x0a, 0xc2, 0xf2, 0x68, 0x4a, 0xa4, 0xfa, 0x42, 0xf2, 0x77,
- 0xc6, 0xbb, 0x9f, 0x00, 0x8a, 0x61, 0xc9, 0x98, 0x3d, 0x00, 0x8a, 0xc8,
- 0xc6, 0x94, 0x2b, 0x00, 0x8b, 0x01, 0x83, 0x00, 0x8b, 0x0b, 0x02, 0xf2,
- 0x7f, 0x1b, 0xc2, 0xf2, 0x90, 0x87, 0x00, 0x8b, 0x33, 0x02, 0xf2, 0xb3,
- 0x91, 0x00, 0x8b, 0x4b, 0x02, 0xf2, 0xc1, 0x19, 0xc2, 0xf2, 0xc9, 0x97,
- 0x00, 0x8b, 0x73, 0x02, 0xf2, 0xdb, 0x8b, 0x00, 0x8b, 0xab, 0x02, 0xf2,
- 0xdf, 0xca, 0xa3, 0xe2, 0x00, 0x8d, 0x10, 0x0d, 0xc2, 0xf2, 0xe3, 0x15,
- 0xc2, 0xf2, 0xf8, 0xc5, 0xd9, 0x80, 0x00, 0x8d, 0x5b, 0x02, 0xf3, 0x07,
- 0x16, 0xc2, 0xf3, 0x0b, 0xc5, 0xd6, 0x3d, 0x00, 0x8d, 0x7b, 0x02, 0xf3,
- 0x1a, 0xc5, 0xdb, 0x51, 0x00, 0x8d, 0xbb, 0x02, 0xf3, 0x1e, 0x12, 0xc2,
- 0xf3, 0x22, 0xc5, 0xbb, 0xa0, 0x00, 0x8d, 0xe3, 0x02, 0xf3, 0x3d, 0x05,
- 0xc2, 0xf3, 0x41, 0xc5, 0x98, 0x41, 0x00, 0x8e, 0x13, 0x02, 0xf3, 0x50,
- 0x42, 0x05, 0xd0, 0x42, 0xf3, 0x54, 0xc5, 0x92, 0x32, 0x01, 0x89, 0x8b,
- 0x02, 0xf3, 0x63, 0xc6, 0xc0, 0x37, 0x01, 0x8a, 0x59, 0x47, 0x7a, 0x8b,
- 0x42, 0xf3, 0x69, 0x44, 0x29, 0x95, 0xc2, 0xf3, 0x79, 0xc3, 0x3c, 0x08,
- 0x01, 0x8a, 0x2a, 0x02, 0xf3, 0xa9, 0x02, 0x42, 0xf3, 0xad, 0xc5, 0xc8,
- 0x2e, 0x01, 0x89, 0xb9, 0xc6, 0xc6, 0xf2, 0x01, 0x8a, 0x60, 0x02, 0x42,
- 0xf3, 0xcb, 0x02, 0x42, 0xf3, 0xf4, 0xc4, 0x7a, 0x93, 0x01, 0x8a, 0x13,
- 0x02, 0xf3, 0xfe, 0xc6, 0xc1, 0x07, 0x01, 0x8a, 0x69, 0xc6, 0xcb, 0x4b,
- 0x01, 0x8b, 0xf8, 0xc4, 0xbb, 0xa1, 0x01, 0x8a, 0x38, 0xc4, 0xc7, 0x2b,
- 0x01, 0x8a, 0x41, 0xc6, 0xc7, 0x2a, 0x01, 0x8a, 0x50, 0x87, 0x01, 0x8a,
- 0x81, 0xc4, 0x0f, 0xf4, 0x01, 0x8c, 0x6a, 0x02, 0xf4, 0x02, 0x83, 0x01,
- 0x8a, 0x8b, 0x02, 0xf4, 0x06, 0x87, 0x01, 0x8a, 0xb3, 0x02, 0xf4, 0x0a,
- 0x91, 0x01, 0x8a, 0xdb, 0x02, 0xf4, 0x1a, 0x97, 0x01, 0x8b, 0x03, 0x02,
- 0xf4, 0x1e, 0x8b, 0x01, 0x8b, 0x10, 0x91, 0x01, 0x8a, 0x99, 0x97, 0x01,
- 0x8b, 0x08, 0x87, 0x01, 0x8a, 0xd0, 0x83, 0x01, 0x8a, 0xc3, 0x02, 0xf4,
- 0x22, 0x87, 0x01, 0x8a, 0xf3, 0x02, 0xf4, 0x26, 0x8b, 0x01, 0x8a, 0xf8,
- 0x91, 0x01, 0x81, 0x11, 0xc4, 0x18, 0x85, 0x01, 0x81, 0xc8, 0xc3, 0x04,
- 0x5f, 0x01, 0x81, 0x19, 0xc4, 0x0c, 0x55, 0x01, 0x81, 0xd0, 0xc3, 0xdf,
- 0x4a, 0x08, 0x47, 0x89, 0xc4, 0xd9, 0x77, 0x08, 0x47, 0x70, 0x91, 0x07,
- 0xfb, 0x31, 0x83, 0x07, 0xfc, 0xe0, 0x45, 0x04, 0x74, 0xc2, 0xf4, 0x2a,
- 0x83, 0x07, 0xfb, 0xd9, 0x97, 0x07, 0xfb, 0xe9, 0x87, 0x07, 0xfb, 0xf1,
- 0x91, 0x07, 0xfb, 0xf9, 0x8b, 0x07, 0xfb, 0xe0, 0x83, 0x07, 0xfb, 0xb1,
- 0x8b, 0x07, 0xfb, 0xb9, 0x87, 0x07, 0xfb, 0xc9, 0x91, 0x07, 0xfb, 0xd1,
- 0x97, 0x07, 0xfb, 0xc0, 0x83, 0x07, 0xfc, 0x01, 0x8b, 0x07, 0xfc, 0x09,
- 0x97, 0x07, 0xfc, 0x11, 0x87, 0x07, 0xfc, 0x19, 0x91, 0x07, 0xfc, 0x20,
- 0x87, 0x07, 0xfc, 0x41, 0x91, 0x07, 0xfc, 0x49, 0x83, 0x07, 0xfc, 0x29,
- 0x8b, 0x07, 0xfc, 0x31, 0x97, 0x07, 0xfc, 0x38, 0x8b, 0x07, 0xfc, 0x59,
- 0x97, 0x07, 0xfc, 0x61, 0x87, 0x07, 0xfc, 0x69, 0x83, 0x07, 0xfc, 0x51,
- 0x91, 0x07, 0xfc, 0x70, 0x8b, 0x07, 0xfc, 0x81, 0x91, 0x07, 0xfc, 0x99,
- 0x83, 0x07, 0xfc, 0x79, 0x97, 0x07, 0xfc, 0x89, 0x87, 0x07, 0xfc, 0x90,
- 0x83, 0x07, 0xfc, 0xa1, 0x97, 0x07, 0xfc, 0xa9, 0x91, 0x07, 0xfc, 0xb0,
- 0x97, 0x07, 0xfc, 0xc9, 0x87, 0x07, 0xfc, 0xd1, 0x91, 0x07, 0xfc, 0xd9,
- 0x83, 0x07, 0xfc, 0xb9, 0x8b, 0x07, 0xfc, 0xc0, 0xc5, 0xde, 0x35, 0x07,
- 0xfd, 0x18, 0xc6, 0x92, 0x31, 0x07, 0xfd, 0x11, 0xc5, 0x7a, 0x92, 0x07,
- 0xfd, 0x99, 0xc4, 0xac, 0xd8, 0x07, 0xfd, 0xb1, 0xc5, 0xd7, 0x8c, 0x07,
- 0xfd, 0xc9, 0xc6, 0xc8, 0x2d, 0x07, 0xfd, 0x40, 0xc6, 0x92, 0x31, 0x07,
- 0xfd, 0x51, 0xc5, 0xdb, 0x51, 0x07, 0xfd, 0x59, 0x12, 0xc2, 0xf4, 0x48,
- 0xc4, 0xac, 0xd8, 0x07, 0xfd, 0x69, 0xc7, 0xc6, 0xf1, 0x07, 0xfd, 0x71,
- 0xc5, 0x98, 0x41, 0x07, 0xfd, 0x80, 0xc5, 0xde, 0x35, 0x07, 0xfd, 0xa0,
- 0x87, 0x07, 0xfe, 0x28, 0x91, 0x07, 0xfe, 0x50, 0x87, 0x07, 0xfe, 0x70,
- 0x91, 0x07, 0xfe, 0xa0, 0xc5, 0xd7, 0x8c, 0x07, 0xfd, 0x29, 0xc5, 0x98,
- 0x41, 0x07, 0xfd, 0x30, 0x91, 0x0d, 0x89, 0x91, 0x83, 0x01, 0x84, 0xa9,
- 0x87, 0x01, 0x84, 0xb0, 0x91, 0x0d, 0x8a, 0x91, 0x87, 0x0d, 0x8a, 0x89,
- 0x8b, 0x0d, 0x8a, 0x81, 0x83, 0x01, 0x84, 0x70, 0x83, 0x01, 0x84, 0x19,
- 0x97, 0x01, 0x84, 0x29, 0x91, 0x01, 0x84, 0x38, 0xd2, 0x4b, 0x1e, 0x01,
- 0x72, 0x30, 0xe0, 0x08, 0x87, 0x01, 0x52, 0x58, 0xcf, 0x60, 0xb0, 0x01,
- 0x52, 0x49, 0xc5, 0x13, 0x89, 0x01, 0x52, 0x38, 0xcb, 0x27, 0x63, 0x01,
- 0x52, 0x21, 0xc7, 0x76, 0x66, 0x01, 0x52, 0x19, 0xc3, 0x01, 0x4a, 0x01,
- 0x52, 0x00, 0xc6, 0x4f, 0x3b, 0x01, 0x50, 0xe1, 0xc3, 0x00, 0xb6, 0x01,
- 0x50, 0xd0, 0x00, 0x42, 0xf4, 0x54, 0x19, 0xc2, 0xf4, 0x60, 0xc2, 0x00,
- 0x4d, 0x08, 0x5b, 0xe1, 0xc4, 0x04, 0x5e, 0x08, 0x5b, 0xd0, 0xc2, 0x26,
- 0xfa, 0x08, 0x5b, 0x91, 0xc3, 0x1a, 0xba, 0x08, 0x5b, 0x40, 0xc3, 0x0d,
- 0xd9, 0x08, 0x5b, 0x89, 0x03, 0x42, 0xf4, 0x6a, 0xc2, 0x00, 0x6e, 0x08,
- 0x5b, 0x38, 0x00, 0x42, 0xf4, 0x76, 0x19, 0xc2, 0xf4, 0x82, 0xc2, 0x00,
- 0x4d, 0x08, 0x5a, 0xe1, 0xc4, 0x04, 0x5e, 0x08, 0x5a, 0xd0, 0xc2, 0x26,
- 0xfa, 0x08, 0x5a, 0xa9, 0xc3, 0x1a, 0xba, 0x08, 0x5a, 0x40, 0xc3, 0x0d,
- 0xd9, 0x08, 0x5a, 0xa1, 0x03, 0x42, 0xf4, 0x8c, 0xc2, 0x00, 0x6e, 0x08,
- 0x5a, 0x38, 0xc4, 0x37, 0x5c, 0x08, 0x5a, 0x01, 0xc3, 0x15, 0x1d, 0x08,
- 0x5a, 0x78, 0xc2, 0x01, 0x47, 0x00, 0x00, 0xf1, 0xc4, 0x04, 0x5e, 0x00,
- 0x00, 0xe8, 0x16, 0xc2, 0xf4, 0x98, 0xc3, 0x01, 0xb4, 0x0f, 0x65, 0x88,
- 0xc4, 0x22, 0x71, 0x0f, 0x65, 0x59, 0xc5, 0x01, 0xdb, 0x0f, 0x65, 0x51,
- 0x15, 0xc2, 0xf4, 0xa4, 0x08, 0xc2, 0xf4, 0xb0, 0x16, 0xc2, 0xf4, 0xbc,
- 0xc3, 0x01, 0xb4, 0x0f, 0x65, 0x18, 0xc2, 0x00, 0x57, 0x0f, 0x65, 0x10,
- 0xc2, 0x00, 0x57, 0x0f, 0x64, 0xf8, 0xc2, 0x0c, 0x57, 0x0f, 0x64, 0x13,
- 0x02, 0xf4, 0xc8, 0x00, 0x42, 0xf4, 0xce, 0x9b, 0x0f, 0x64, 0x0b, 0x02,
- 0xf4, 0xda, 0x00, 0x42, 0xf4, 0xe0, 0xc4, 0x18, 0x83, 0x0f, 0x63, 0xbb,
- 0x02, 0xf4, 0xec, 0xc2, 0x26, 0x51, 0x0f, 0x63, 0xb2, 0x02, 0xf4, 0xf9,
- 0x0b, 0xc2, 0xf5, 0x06, 0x11, 0x42, 0xf5, 0x18, 0x0a, 0xc2, 0xf5, 0x2a,
- 0x19, 0xc2, 0xf5, 0x3c, 0xc2, 0x00, 0x4d, 0x0f, 0x63, 0xd2, 0x02, 0xf5,
- 0x4c, 0x00, 0x42, 0xf5, 0x52, 0xc4, 0x03, 0x2b, 0x0f, 0x65, 0x71, 0xc7,
- 0x0a, 0xb9, 0x0f, 0x65, 0x68, 0xc6, 0xd1, 0x0b, 0x01, 0x96, 0x01, 0x17,
- 0x42, 0xf5, 0x5e, 0xc3, 0x76, 0xfb, 0x01, 0x96, 0x11, 0x9b, 0x01, 0x96,
- 0x20, 0xc4, 0xe5, 0x6f, 0x01, 0x96, 0x19, 0xc5, 0xd6, 0xd8, 0x01, 0x96,
- 0x38, 0xc7, 0xc1, 0x80, 0x01, 0x96, 0x59, 0x43, 0x1b, 0x44, 0x42, 0xf5,
- 0x6a, 0xc4, 0x15, 0xd3, 0x01, 0x9a, 0xc1, 0xc3, 0x01, 0xb4, 0x01, 0x9a,
- 0xc9, 0x16, 0xc2, 0xf5, 0x89, 0x08, 0xc2, 0xf5, 0x97, 0x15, 0xc2, 0xf5,
- 0xa4, 0x07, 0xc2, 0xf5, 0xb6, 0xc4, 0x22, 0x71, 0x01, 0x9b, 0x0a, 0x02,
- 0xf5, 0xc5, 0xc3, 0x01, 0x1f, 0x01, 0x7f, 0xb9, 0xc9, 0x02, 0x48, 0x01,
- 0x7f, 0xd0, 0xc4, 0x01, 0x1e, 0x01, 0x7f, 0xc1, 0xc5, 0x01, 0xf7, 0x01,
- 0x7f, 0xc8, 0xc9, 0x4f, 0xff, 0x08, 0x42, 0xf8, 0xc4, 0x18, 0x85, 0x08,
- 0x42, 0xe1, 0x91, 0x08, 0x42, 0xc8, 0xc8, 0x50, 0x00, 0x08, 0x42, 0xf1,
- 0xc7, 0x0c, 0x4b, 0x08, 0x42, 0xe8, 0xc4, 0xd9, 0x77, 0x08, 0x42, 0x71,
- 0xc3, 0xdf, 0x4a, 0x08, 0x42, 0x88, 0xd7, 0x29, 0x7f, 0x0f, 0xd2, 0x58,
- 0x49, 0x29, 0x7f, 0x42, 0xf5, 0xcb, 0x49, 0x29, 0x7f, 0x42, 0xf5, 0xd7,
- 0xc5, 0x7c, 0xf9, 0x01, 0x32, 0xc3, 0x02, 0xf5, 0xe3, 0xc3, 0x00, 0x34,
- 0x01, 0x32, 0xa2, 0x02, 0xf5, 0xed, 0x49, 0x29, 0x7f, 0x42, 0xf5, 0xf3,
- 0x49, 0x29, 0x7f, 0x42, 0xf5, 0xff, 0x0d, 0xc2, 0xf6, 0x0b, 0xc5, 0xb5,
- 0xaf, 0x0f, 0xd0, 0xf9, 0xc4, 0xe0, 0xaf, 0x0f, 0xd1, 0x01, 0xc6, 0xd1,
- 0xf5, 0x0f, 0xd1, 0x09, 0xc4, 0xe5, 0xdf, 0x0f, 0xd1, 0x18, 0xdd, 0x11,
- 0x54, 0x0f, 0xbc, 0x51, 0x45, 0x00, 0x6c, 0x42, 0xf6, 0x17, 0xcf, 0x64,
- 0x25, 0x01, 0x3f, 0x19, 0xce, 0x70, 0x47, 0x01, 0x3f, 0x10, 0xc2, 0x03,
- 0x3d, 0x0f, 0xc8, 0x6b, 0x02, 0xf6, 0x2f, 0x43, 0x10, 0x3a, 0x42, 0xf6,
- 0x35, 0x51, 0x08, 0xa9, 0xc2, 0xf6, 0x41, 0x45, 0x00, 0x6c, 0xc2, 0xf6,
- 0x53, 0xc6, 0x87, 0x28, 0x0f, 0xa9, 0x98, 0x45, 0x00, 0x6c, 0xc2, 0xf6,
- 0x6d, 0xcc, 0x88, 0x60, 0x0f, 0x99, 0x2a, 0x02, 0xf6, 0x79, 0x15, 0xc2,
- 0xf6, 0x7f, 0xc7, 0x08, 0xc0, 0x01, 0x59, 0x58, 0xca, 0xa8, 0xf6, 0x01,
- 0x36, 0xc9, 0x49, 0x01, 0x8a, 0x42, 0xf6, 0x8b, 0xc7, 0x41, 0x48, 0x01,
- 0x2e, 0x29, 0xce, 0x73, 0x9d, 0x01, 0x2e, 0x19, 0xc8, 0x01, 0xe7, 0x01,
- 0x2e, 0x08, 0xd0, 0x59, 0x12, 0x01, 0x3e, 0x81, 0xc9, 0xb1, 0x89, 0x01,
- 0x36, 0x59, 0xc4, 0x23, 0x79, 0x01, 0x33, 0x11, 0x51, 0x08, 0xa9, 0x42,
- 0xf6, 0x97, 0xc5, 0x08, 0x42, 0x01, 0x30, 0xf9, 0xcf, 0x69, 0xe3, 0x0f,
- 0xac, 0xb9, 0xce, 0x25, 0x12, 0x0f, 0xa2, 0x38, 0xce, 0x73, 0x9d, 0x01,
- 0x2d, 0xf9, 0xc8, 0x01, 0xe7, 0x01, 0x2d, 0xe8, 0xe0, 0x04, 0x67, 0x01,
- 0x3e, 0x08, 0xc5, 0x06, 0x02, 0x01, 0x3a, 0x01, 0xc3, 0x01, 0xd3, 0x0f,
- 0xa5, 0x70, 0x44, 0x00, 0x6b, 0x42, 0xf6, 0xa9, 0xc5, 0x08, 0x42, 0x01,
- 0x30, 0xf1, 0xce, 0x25, 0x12, 0x0f, 0xa2, 0x48, 0x12, 0xc2, 0xf6, 0xaf,
- 0xce, 0x73, 0x9d, 0x01, 0x2d, 0xc9, 0xc8, 0x01, 0xe7, 0x01, 0x2d, 0xb8,
- 0xc9, 0x37, 0x1e, 0x01, 0x2f, 0x60, 0xcb, 0x54, 0x64, 0x01, 0x2f, 0xe9,
- 0xc5, 0x07, 0x0a, 0x01, 0x2f, 0xd9, 0xc3, 0x04, 0x45, 0x01, 0x5a, 0x80,
- 0x90, 0x0f, 0x17, 0x42, 0x02, 0xf6, 0xbb, 0x89, 0x0f, 0x17, 0x10, 0xc2,
- 0x01, 0x10, 0x08, 0xc6, 0xd9, 0xc2, 0x01, 0x64, 0x08, 0xc6, 0xd0, 0x90,
- 0x08, 0xc6, 0x81, 0x9b, 0x08, 0xc6, 0x68, 0x8c, 0x08, 0xc6, 0x70, 0xc2,
- 0x01, 0x10, 0x08, 0xc5, 0xd9, 0xc2, 0x01, 0x64, 0x08, 0xc5, 0xd0, 0x90,
- 0x08, 0xc5, 0x81, 0x9b, 0x08, 0xc5, 0x68, 0x8c, 0x08, 0xc5, 0x70, 0xe0,
- 0x04, 0xa7, 0x01, 0x5c, 0xa0, 0xcc, 0x8d, 0x40, 0x0f, 0xcb, 0xd1, 0xd7,
- 0x2a, 0xc1, 0x0f, 0xcb, 0x99, 0xca, 0xa3, 0xb0, 0x0f, 0xd7, 0x18, 0xcb,
- 0x89, 0x15, 0x0f, 0xb0, 0x11, 0xca, 0x9c, 0x6c, 0x0f, 0xc8, 0x90, 0xc9,
- 0xad, 0xc6, 0x0f, 0xb2, 0x31, 0x44, 0x01, 0x76, 0xc2, 0xf6, 0xbf, 0xd1,
- 0x56, 0x18, 0x0f, 0xc9, 0x40, 0x45, 0x00, 0x56, 0x42, 0xf6, 0xce, 0xc8,
- 0x6f, 0xa5, 0x0f, 0xb0, 0x99, 0xc8, 0xbc, 0xf5, 0x0f, 0xc9, 0x00, 0xcb,
- 0x8d, 0x93, 0x0f, 0xb1, 0xb9, 0xc6, 0xce, 0xef, 0x0f, 0xce, 0x80, 0xc2,
- 0x01, 0x47, 0x07, 0xf8, 0x91, 0xc4, 0x04, 0x5e, 0x07, 0xf8, 0x98, 0xc3,
- 0x06, 0x9e, 0x07, 0xf8, 0xa1, 0xc3, 0x0c, 0x5b, 0x07, 0xf8, 0xa8, 0xc2,
- 0x26, 0x51, 0x07, 0xf8, 0xb1, 0xc4, 0x18, 0x83, 0x07, 0xf8, 0xb8, 0xc9,
- 0xb5, 0xe5, 0x07, 0xf9, 0x01, 0x83, 0x07, 0xf8, 0x60, 0xce, 0x24, 0xb2,
- 0x07, 0xf9, 0xd9, 0xcd, 0x02, 0x52, 0x07, 0xfa, 0xd9, 0xd1, 0x57, 0x9f,
- 0x07, 0xfa, 0xf9, 0xcb, 0x1a, 0x3f, 0x07, 0xf8, 0x40, 0x83, 0x07, 0xf9,
- 0x09, 0x84, 0x07, 0xf9, 0x11, 0x85, 0x07, 0xf9, 0x19, 0x86, 0x07, 0xf9,
- 0x21, 0x87, 0x07, 0xf9, 0x29, 0x88, 0x07, 0xf9, 0x31, 0x89, 0x07, 0xf9,
- 0x39, 0x8a, 0x07, 0xf9, 0x41, 0x8b, 0x07, 0xf9, 0x49, 0x8c, 0x07, 0xf9,
- 0x51, 0x8d, 0x07, 0xf9, 0x59, 0x8e, 0x07, 0xf9, 0x61, 0x8f, 0x07, 0xf9,
- 0x69, 0x95, 0x07, 0xf9, 0x99, 0x96, 0x07, 0xf9, 0xa1, 0x97, 0x07, 0xf9,
- 0xa9, 0x98, 0x07, 0xf9, 0xb1, 0x99, 0x07, 0xf9, 0xb9, 0x9a, 0x07, 0xf9,
- 0xc1, 0x9b, 0x07, 0xf9, 0xc9, 0x9c, 0x07, 0xf9, 0xd1, 0x90, 0x07, 0xf9,
- 0x71, 0x91, 0x07, 0xf9, 0x79, 0x92, 0x07, 0xf9, 0x81, 0x93, 0x07, 0xf9,
- 0x89, 0x94, 0x07, 0xf9, 0x90, 0x83, 0x07, 0xfa, 0x09, 0x84, 0x07, 0xfa,
- 0x11, 0x85, 0x07, 0xfa, 0x19, 0x87, 0x07, 0xfa, 0x29, 0x88, 0x07, 0xfa,
- 0x31, 0x89, 0x07, 0xfa, 0x39, 0x8a, 0x07, 0xfa, 0x41, 0x8b, 0x07, 0xfa,
- 0x49, 0x8c, 0x07, 0xfa, 0x51, 0x8d, 0x07, 0xfa, 0x59, 0x8e, 0x07, 0xfa,
- 0x61, 0x8f, 0x07, 0xfa, 0x69, 0x90, 0x07, 0xfa, 0x71, 0x91, 0x07, 0xfa,
- 0x79, 0x92, 0x07, 0xfa, 0x81, 0x93, 0x07, 0xfa, 0x89, 0x94, 0x07, 0xfa,
- 0x91, 0x95, 0x07, 0xfa, 0x99, 0x96, 0x07, 0xfa, 0xa1, 0x97, 0x07, 0xfa,
- 0xa9, 0x98, 0x07, 0xfa, 0xb1, 0x99, 0x07, 0xfa, 0xb9, 0x9a, 0x07, 0xfa,
- 0xc1, 0x9b, 0x07, 0xfa, 0xc9, 0x9c, 0x07, 0xfa, 0xd1, 0x86, 0x07, 0xfa,
- 0x20, 0xca, 0x95, 0x0e, 0x08, 0x52, 0xb9, 0x96, 0x08, 0x52, 0x80, 0x91,
- 0x08, 0x50, 0x31, 0x87, 0x08, 0x50, 0x29, 0xc9, 0xac, 0xc1, 0x08, 0x50,
- 0x19, 0x97, 0x08, 0x50, 0x11, 0x8b, 0x08, 0x50, 0x08, 0x16, 0xc2, 0xf6,
- 0xda, 0xc2, 0x00, 0xa4, 0x08, 0x50, 0xd9, 0x83, 0x08, 0x50, 0xd0, 0xc2,
- 0x00, 0xa4, 0x08, 0x50, 0xe9, 0x83, 0x08, 0x50, 0xe0, 0x12, 0xc2, 0xf6,
- 0xe4, 0x04, 0xc2, 0xf6, 0xf0, 0x45, 0xdc, 0x82, 0x42, 0xf6, 0xfc, 0xc3,
- 0x32, 0xa9, 0x00, 0xcf, 0xd1, 0xc4, 0xe2, 0x27, 0x00, 0xcf, 0x50, 0x02,
- 0xc2, 0xf7, 0x08, 0x00, 0x42, 0xf7, 0x18, 0xc3, 0x32, 0xa9, 0x00, 0xcf,
- 0x91, 0xc4, 0xe2, 0x27, 0x00, 0xcf, 0x10, 0xc3, 0x32, 0xa9, 0x00, 0xcf,
- 0xa1, 0xc4, 0xe2, 0x27, 0x00, 0xcf, 0x20, 0xc3, 0x32, 0xa9, 0x00, 0xcf,
- 0x99, 0xc4, 0xe2, 0x27, 0x00, 0xcf, 0x18, 0xc3, 0x0f, 0x7c, 0x00, 0xbf,
- 0xab, 0x02, 0xf7, 0x24, 0xc2, 0x23, 0xb4, 0x00, 0xbf, 0x90, 0xc3, 0xe0,
- 0xeb, 0x00, 0xbf, 0xa1, 0xc2, 0x01, 0xdb, 0x00, 0xbf, 0x98, 0xc8, 0xbd,
- 0x1d, 0x00, 0xbe, 0xa9, 0xc8, 0xad, 0x37, 0x00, 0xbe, 0x99, 0xc4, 0xe5,
- 0xc3, 0x00, 0xbe, 0x58, 0x98, 0x00, 0xbd, 0x50, 0x90, 0x0d, 0x8b, 0x3b,
- 0x02, 0xf7, 0x28, 0x19, 0xc2, 0xf7, 0x2c, 0x0d, 0xc2, 0xf7, 0x3c, 0x83,
- 0x01, 0x85, 0x09, 0x8b, 0x01, 0x85, 0x19, 0x97, 0x01, 0x85, 0x29, 0x87,
- 0x01, 0x85, 0x39, 0x91, 0x01, 0x85, 0x49, 0x16, 0xc2, 0xf7, 0x4c, 0x1b,
- 0xc2, 0xf7, 0x54, 0x15, 0xc2, 0xf7, 0x60, 0x0a, 0xc2, 0xf7, 0x68, 0xc2,
- 0x13, 0x51, 0x01, 0x8f, 0xd1, 0x14, 0x42, 0xf7, 0x7c, 0x87, 0x0d, 0x80,
- 0x01, 0xc2, 0x13, 0x1d, 0x0d, 0x89, 0x11, 0x1b, 0x42, 0xf7, 0x90, 0x45,
- 0xda, 0xc0, 0x42, 0xf7, 0x98, 0x83, 0x00, 0x64, 0x31, 0x8b, 0x00, 0x64,
- 0x81, 0x97, 0x00, 0x64, 0xa0, 0x8b, 0x00, 0x64, 0x40, 0x97, 0x00, 0x64,
- 0x50, 0x47, 0xac, 0xc2, 0xc2, 0xf7, 0xa4, 0x83, 0x00, 0x65, 0xa8, 0x87,
- 0x00, 0x64, 0x78, 0x91, 0x00, 0x64, 0x98, 0x83, 0x00, 0x64, 0xa9, 0xc2,
- 0x00, 0xa4, 0x00, 0x64, 0xb0, 0x83, 0x00, 0x64, 0xb9, 0xc2, 0x00, 0xa4,
- 0x00, 0x64, 0xc0, 0xc2, 0x01, 0x29, 0x00, 0x64, 0xc9, 0xc2, 0x1d, 0x5f,
- 0x00, 0x64, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x65, 0x19, 0x83, 0x00, 0x65,
- 0x42, 0x02, 0xf7, 0xb2, 0x83, 0x00, 0x64, 0xd1, 0xc2, 0x00, 0xa4, 0x00,
- 0x64, 0xd8, 0x83, 0x00, 0x64, 0xe1, 0xc2, 0x00, 0xa4, 0x00, 0x64, 0xe8,
- 0x16, 0xc2, 0xf7, 0xb8, 0x83, 0x00, 0x65, 0x21, 0xc2, 0x00, 0xa4, 0x00,
- 0x65, 0x28, 0x06, 0xc2, 0xf7, 0xc2, 0x83, 0x00, 0x65, 0x31, 0xc2, 0x00,
- 0xa4, 0x00, 0x65, 0x38, 0x83, 0x00, 0x65, 0x51, 0xc2, 0x00, 0xa4, 0x00,
- 0x65, 0x58, 0x83, 0x00, 0x65, 0x61, 0xc2, 0x00, 0xa4, 0x00, 0x65, 0x68,
- 0x83, 0x00, 0x65, 0x81, 0xc2, 0x02, 0x59, 0x00, 0x65, 0x88, 0x83, 0x00,
- 0x65, 0x91, 0x0e, 0x42, 0xf7, 0xcc, 0xc2, 0x00, 0xa4, 0x00, 0x65, 0xb1,
- 0xc2, 0x0c, 0x65, 0x00, 0x65, 0xb9, 0x83, 0x00, 0x65, 0xc0, 0x94, 0x00,
- 0x66, 0x20, 0x8e, 0x00, 0x67, 0x18, 0xc4, 0xe0, 0x47, 0x01, 0x79, 0x80,
- 0xc6, 0x36, 0x37, 0x01, 0x78, 0x81, 0xc4, 0x77, 0x9e, 0x01, 0x7c, 0x38,
- 0xc2, 0x00, 0x8c, 0x01, 0x78, 0x09, 0x86, 0x01, 0x78, 0x39, 0xc2, 0x17,
- 0x9f, 0x01, 0x7b, 0x18, 0xc2, 0x0c, 0xf3, 0x01, 0x78, 0x49, 0x03, 0xc2,
- 0xf7, 0xd6, 0xc2, 0x13, 0xa5, 0x01, 0x7d, 0x90, 0xc2, 0x00, 0xc3, 0x01,
- 0x79, 0x51, 0xc2, 0x00, 0xe0, 0x01, 0x7a, 0x58, 0xc7, 0xca, 0x55, 0x01,
- 0x79, 0xa8, 0x96, 0x01, 0x78, 0x13, 0x02, 0xf7, 0xe0, 0xc6, 0xcc, 0x79,
- 0x01, 0x78, 0x61, 0xc2, 0x00, 0xbf, 0x01, 0x79, 0xf1, 0xc4, 0x16, 0x49,
- 0x01, 0x7a, 0x79, 0xc6, 0xd3, 0x7b, 0x01, 0x7a, 0xc1, 0x89, 0x01, 0x7a,
- 0xe8, 0xc8, 0xae, 0x96, 0x01, 0x78, 0xc1, 0xc4, 0x01, 0xc8, 0x01, 0x7a,
- 0x19, 0x15, 0x42, 0xf7, 0xe6, 0x9b, 0x01, 0x79, 0x91, 0xc2, 0x06, 0x1f,
- 0x01, 0x7e, 0x71, 0xc4, 0x1d, 0xe6, 0x01, 0x7e, 0x98, 0xc3, 0x07, 0x87,
- 0x01, 0x7a, 0x69, 0xc2, 0x00, 0x28, 0x01, 0x7e, 0x28, 0x03, 0xc2, 0xf7,
- 0xf0, 0xc3, 0x17, 0x5a, 0x01, 0x7a, 0xd0, 0xc4, 0x74, 0xa9, 0x01, 0x78,
- 0x21, 0xc2, 0x00, 0x92, 0x01, 0x78, 0xc9, 0xc2, 0x03, 0x2d, 0x01, 0x7c,
- 0x19, 0x87, 0x01, 0x7c, 0x90, 0xc3, 0x02, 0xd4, 0x01, 0x78, 0xa1, 0xc4,
- 0xdf, 0xe3, 0x01, 0x79, 0x61, 0x07, 0xc2, 0xf7, 0xfc, 0xc4, 0xaf, 0x42,
- 0x01, 0x7b, 0x80, 0xc5, 0xd6, 0x7e, 0x01, 0x79, 0x01, 0xc4, 0x0a, 0x10,
- 0x01, 0x7a, 0x10, 0x11, 0xc2, 0xf8, 0x08, 0x07, 0x42, 0xf8, 0x14, 0x07,
- 0xc2, 0xf8, 0x20, 0x11, 0xc2, 0xf8, 0x2a, 0xc3, 0x00, 0x57, 0x01, 0x7b,
- 0xa0, 0x9b, 0x01, 0x7a, 0x41, 0xce, 0x71, 0xa5, 0x01, 0x7d, 0xf9, 0xc2,
- 0x00, 0x32, 0x01, 0x7e, 0x20, 0xc6, 0xcf, 0x43, 0x01, 0x7b, 0x39, 0xc2,
- 0x44, 0x0d, 0x01, 0x7b, 0xb8, 0xc2, 0x01, 0x04, 0x01, 0x78, 0x29, 0x14,
- 0x42, 0xf8, 0x37, 0x03, 0xc2, 0xf8, 0x41, 0xc2, 0x19, 0x19, 0x01, 0x7e,
- 0x38, 0x0e, 0xc2, 0xf8, 0x4b, 0xc2, 0x00, 0x58, 0x01, 0x79, 0xf9, 0xc2,
- 0x01, 0x7b, 0x01, 0x7d, 0xe8, 0xc6, 0x03, 0x52, 0x01, 0x79, 0xb0, 0xc3,
- 0x05, 0x99, 0x01, 0x78, 0x79, 0xcc, 0x7f, 0xac, 0x01, 0x7d, 0x89, 0xc2,
- 0x03, 0xc7, 0x01, 0x7d, 0xe0, 0xc3, 0x10, 0xf0, 0x01, 0x79, 0x89, 0xc3,
- 0x0f, 0x59, 0x01, 0x7e, 0xa0, 0xc2, 0x00, 0x4c, 0x01, 0x7a, 0x51, 0xc3,
- 0x00, 0xdf, 0x01, 0x7b, 0x89, 0xc4, 0xe2, 0x9f, 0x01, 0x7e, 0x68, 0xc2,
- 0x00, 0x54, 0x01, 0x7b, 0x09, 0xc3, 0x00, 0x63, 0x01, 0x7c, 0x60, 0xc4,
- 0x5d, 0x1e, 0x01, 0x7c, 0x31, 0xc3, 0x03, 0x4f, 0x01, 0x7e, 0x90, 0x17,
- 0xc2, 0xf8, 0x57, 0xc2, 0x00, 0xb7, 0x01, 0x7a, 0x49, 0x14, 0x42, 0xf8,
- 0x61, 0xc3, 0x0e, 0x41, 0x01, 0x7b, 0x01, 0xc2, 0x00, 0x9e, 0x01, 0x7c,
- 0x08, 0xc6, 0xcd, 0xb1, 0x01, 0x7c, 0x11, 0xc4, 0x1d, 0x5b, 0x01, 0x7e,
- 0x48, 0xc3, 0x00, 0x9f, 0x01, 0x78, 0x51, 0xc7, 0x5f, 0x5b, 0x01, 0x78,
- 0xf0, 0x94, 0x01, 0x7b, 0xfb, 0x02, 0xf8, 0x6d, 0x96, 0x01, 0x7d, 0xb8,
- 0xc3, 0x00, 0x34, 0x01, 0x79, 0x18, 0xc3, 0x00, 0x7b, 0x01, 0x78, 0x69,
- 0xc4, 0xe0, 0x0f, 0x01, 0x79, 0x59, 0xc5, 0xdc, 0xd2, 0x01, 0x7a, 0x81,
- 0x99, 0x01, 0x7a, 0xe1, 0xc3, 0x05, 0xd2, 0x01, 0x7c, 0x50, 0xc3, 0x45,
- 0x46, 0x01, 0x78, 0xd1, 0x03, 0xc2, 0xf8, 0x73, 0xc5, 0x78, 0x8a, 0x01,
- 0x7c, 0x80, 0xc2, 0x00, 0x7b, 0x01, 0x7b, 0x99, 0xc2, 0x03, 0x3d, 0x01,
- 0x7c, 0xf1, 0xc6, 0xc6, 0x3c, 0x01, 0x7e, 0x00, 0xc2, 0x11, 0xd4, 0x01,
- 0x79, 0x41, 0xc4, 0x02, 0x60, 0x01, 0x7c, 0x68, 0xc5, 0xc1, 0x57, 0x01,
- 0x78, 0xd9, 0xc6, 0xcd, 0x27, 0x01, 0x7a, 0xf0, 0xc2, 0x03, 0xa5, 0x01,
- 0x78, 0x99, 0xc3, 0x02, 0xa8, 0x01, 0x7d, 0x70, 0xc3, 0x14, 0x99, 0x01,
- 0x79, 0x29, 0xc2, 0x00, 0xbc, 0x01, 0x79, 0x78, 0xc4, 0xe3, 0x07, 0x01,
- 0x7a, 0x71, 0xc2, 0x00, 0x15, 0x01, 0x7c, 0x88, 0xc3, 0x00, 0x9f, 0x01,
- 0x7a, 0xa1, 0xc2, 0x44, 0x0d, 0x01, 0x7d, 0x0a, 0x02, 0xf8, 0x7b, 0xc3,
- 0x02, 0x33, 0x01, 0x7b, 0x49, 0xc3, 0x00, 0x9d, 0x01, 0x7e, 0x30, 0x87,
- 0x01, 0x7d, 0x19, 0x86, 0x01, 0x7d, 0xa8, 0xcc, 0x37, 0xc0, 0x01, 0x78,
- 0xa9, 0xc3, 0x00, 0x7b, 0x01, 0x79, 0x71, 0xc2, 0x00, 0x69, 0x01, 0x7b,
- 0xb0, 0x92, 0x01, 0x7a, 0x09, 0xc2, 0x00, 0x34, 0x01, 0x7d, 0x61, 0x96,
- 0x01, 0x7e, 0x78, 0xc2, 0x01, 0x12, 0x01, 0x7b, 0x71, 0xc3, 0x0c, 0x5b,
- 0x01, 0x7c, 0x20, 0xc7, 0xcb, 0xcf, 0x01, 0x79, 0x11, 0xc2, 0x18, 0x9f,
- 0x01, 0x7d, 0x30, 0xc2, 0x00, 0x34, 0x01, 0x7b, 0x91, 0xc2, 0x05, 0x88,
- 0x01, 0x7c, 0x58, 0x89, 0x01, 0x79, 0x21, 0xc4, 0x02, 0x92, 0x01, 0x7c,
- 0xf9, 0xc2, 0x01, 0x0b, 0x01, 0x7e, 0x18, 0x99, 0x01, 0x79, 0xc1, 0xcb,
- 0x8e, 0x4e, 0x01, 0x7b, 0x31, 0xc2, 0x00, 0x9e, 0x01, 0x7c, 0x41, 0xc2,
- 0x00, 0x2d, 0x01, 0x7c, 0xe9, 0xc2, 0x00, 0x8c, 0x01, 0x7d, 0xd8, 0xc5,
- 0xdf, 0xa2, 0x01, 0x79, 0xd1, 0xc4, 0x20, 0x37, 0x01, 0x7a, 0x01, 0xc3,
- 0x79, 0x0e, 0x01, 0x7c, 0x00, 0xc4, 0x9a, 0xec, 0x01, 0x7b, 0xa9, 0xc4,
- 0xe4, 0x13, 0x01, 0x7c, 0xc0, 0xc3, 0x28, 0x7f, 0x01, 0x7c, 0x71, 0xc2,
- 0x0f, 0x4d, 0x01, 0x7d, 0x69, 0xc3, 0x00, 0xdf, 0x01, 0x7e, 0x50, 0x96,
- 0x01, 0x7a, 0x31, 0xc2, 0x02, 0x60, 0x01, 0x7e, 0x80, 0xc2, 0x00, 0x9c,
- 0x01, 0x7a, 0xa9, 0xc3, 0x1e, 0x74, 0x01, 0x7b, 0x29, 0xc3, 0x01, 0xdd,
- 0x01, 0x7d, 0xf1, 0xc2, 0x00, 0xe5, 0x01, 0x7e, 0x10, 0xc4, 0x17, 0xa2,
- 0x01, 0x7a, 0xb9, 0xc2, 0x00, 0x35, 0x01, 0x7a, 0xd9, 0xc2, 0x00, 0x69,
- 0x01, 0x7d, 0x78, 0x9b, 0x01, 0x7d, 0xb1, 0xc3, 0x35, 0x4c, 0x01, 0x7e,
- 0xa8, 0xc6, 0xd5, 0x19, 0x01, 0x7d, 0xc9, 0xc2, 0x13, 0x91, 0x01, 0x7e,
- 0x60, 0xa5, 0x0b, 0x7c, 0xf9, 0xa3, 0x0b, 0x7c, 0xf1, 0xa2, 0x0b, 0x7c,
- 0xe9, 0xa1, 0x0b, 0x7c, 0xe1, 0x9f, 0x0b, 0x7c, 0xd9, 0x9e, 0x0b, 0x7c,
- 0xd0, 0xc2, 0x01, 0x29, 0x0b, 0x79, 0x29, 0x83, 0x0b, 0x78, 0x98, 0xc2,
- 0x1d, 0x5f, 0x0b, 0x7a, 0x09, 0x83, 0x0b, 0x79, 0xf0, 0x83, 0x0b, 0x79,
- 0xc9, 0xc2, 0x00, 0xa4, 0x0b, 0x79, 0x80, 0x89, 0x0b, 0x7b, 0x68, 0x89,
- 0x0b, 0x7b, 0x20, 0xcb, 0x1d, 0x7e, 0x01, 0x51, 0xd1, 0x45, 0x00, 0x6c,
- 0x42, 0xf8, 0x81, 0xd6, 0x2d, 0xcd, 0x01, 0x3b, 0xa9, 0xd4, 0x1a, 0x3f,
- 0x01, 0x3b, 0x48, 0xd6, 0x2d, 0xcd, 0x01, 0x3b, 0xa1, 0xd4, 0x1a, 0x3f,
- 0x01, 0x3b, 0x40, 0xda, 0x1a, 0x39, 0x01, 0x3b, 0x59, 0xd9, 0x1e, 0xbf,
- 0x01, 0x3b, 0x50, 0xca, 0x21, 0x3e, 0x0f, 0xbe, 0x29, 0xcd, 0x0e, 0x9f,
- 0x0f, 0xbe, 0x38, 0xcf, 0x15, 0x8e, 0x0f, 0xbd, 0xb1, 0xd2, 0x21, 0x36,
- 0x0f, 0xbe, 0x58, 0x97, 0x0b, 0x73, 0x98, 0x8b, 0x0b, 0x73, 0xf1, 0xc3,
- 0x7c, 0x3b, 0x0b, 0x73, 0x20, 0x87, 0x0b, 0x73, 0xd0, 0x89, 0x0b, 0x73,
- 0xb9, 0x9b, 0x0b, 0x73, 0xb8, 0x92, 0x0b, 0x73, 0xb0, 0x92, 0x0b, 0x73,
- 0x30, 0x97, 0x0b, 0x72, 0x98, 0x8b, 0x0b, 0x72, 0xf1, 0xc3, 0x7c, 0x3b,
- 0x0b, 0x72, 0x20, 0x87, 0x0b, 0x72, 0xd0, 0x89, 0x0b, 0x72, 0xb9, 0x9b,
- 0x0b, 0x72, 0xb8, 0x92, 0x0b, 0x72, 0xb0, 0x92, 0x0b, 0x72, 0x30, 0xcf,
- 0x65, 0x8d, 0x0b, 0x74, 0xb0, 0xcf, 0x65, 0x8d, 0x0b, 0x74, 0xa8, 0xc4,
- 0xe1, 0x8f, 0x0f, 0x41, 0xd1, 0xc4, 0xe3, 0x93, 0x0f, 0x41, 0xa1, 0xc5,
- 0xdb, 0x74, 0x0f, 0x40, 0x29, 0xc4, 0xe4, 0x0f, 0x0f, 0x42, 0xf1, 0xc5,
- 0xdb, 0xce, 0x0f, 0x42, 0xe9, 0xc5, 0xdf, 0x89, 0x0f, 0x44, 0xc1, 0xc5,
- 0xd9, 0xd5, 0x0f, 0x45, 0x09, 0xc6, 0xce, 0xc5, 0x0f, 0x45, 0x59, 0xc5,
- 0xde, 0x94, 0x0f, 0x45, 0x61, 0xc4, 0xe3, 0xcf, 0x0f, 0x45, 0xf8, 0xc5,
- 0xd9, 0xb7, 0x0f, 0x41, 0xc9, 0xc5, 0xdb, 0x5b, 0x0f, 0x43, 0x99, 0xc6,
- 0xcd, 0x99, 0x0f, 0x43, 0x79, 0xc4, 0xe3, 0x67, 0x0f, 0x43, 0x01, 0xc4,
- 0xe6, 0x4b, 0x0f, 0x42, 0xb9, 0xc5, 0xdc, 0x19, 0x0f, 0x42, 0x09, 0xc6,
- 0xcd, 0xcf, 0x0f, 0x43, 0xc9, 0xcb, 0x9a, 0x61, 0x0f, 0x44, 0x01, 0xc5,
- 0xdf, 0x7a, 0x0f, 0x44, 0x79, 0xc4, 0xe4, 0x7f, 0x0f, 0x45, 0xe8, 0xc4,
- 0xe3, 0xe3, 0x0f, 0x41, 0xc1, 0xc4, 0xe3, 0x23, 0x0f, 0x41, 0xb9, 0xc4,
- 0xe4, 0xb7, 0x0f, 0x41, 0xb1, 0xc4, 0xe2, 0xc7, 0x0f, 0x41, 0x81, 0xc4,
- 0xe6, 0x33, 0x0f, 0x41, 0x79, 0xc4, 0xe3, 0x87, 0x0f, 0x42, 0x61, 0xc4,
- 0xe3, 0xbf, 0x0f, 0x42, 0x59, 0xc4, 0xe4, 0x3b, 0x0f, 0x42, 0x31, 0xc4,
- 0xe1, 0xa7, 0x0f, 0x42, 0x29, 0xc4, 0x3e, 0xac, 0x0f, 0x42, 0x20, 0xc4,
- 0xe3, 0x8f, 0x0f, 0x41, 0x71, 0xc3, 0xe6, 0x52, 0x0f, 0x41, 0x21, 0xc3,
- 0xda, 0x20, 0x0f, 0x41, 0x19, 0xc3, 0xe7, 0xa5, 0x0f, 0x41, 0x11, 0xc4,
- 0xe1, 0x5f, 0x0f, 0x40, 0xe9, 0xc4, 0xb6, 0xcd, 0x0f, 0x40, 0xe1, 0xc4,
- 0xe5, 0x13, 0x0f, 0x40, 0xd9, 0xc4, 0xe3, 0x73, 0x0f, 0x42, 0x01, 0xc4,
- 0xe3, 0x0f, 0x0f, 0x41, 0xf9, 0xc4, 0xe4, 0x9b, 0x0f, 0x41, 0xf0, 0xc4,
- 0xe2, 0xfb, 0x0f, 0x40, 0xf9, 0xc5, 0xd9, 0xc6, 0x0f, 0x40, 0xc1, 0xc4,
- 0xdc, 0x7d, 0x0f, 0x40, 0x21, 0xc4, 0xe3, 0x3b, 0x0f, 0x43, 0x61, 0xc5,
- 0xdf, 0xbb, 0x0f, 0x42, 0x39, 0xc6, 0xcf, 0x01, 0x0f, 0x43, 0xb9, 0xc4,
- 0xe3, 0x7f, 0x0f, 0x44, 0x69, 0xc5, 0xdf, 0xac, 0x0f, 0x45, 0x01, 0xc6,
- 0xcd, 0x81, 0x0f, 0x45, 0x49, 0xc6, 0xd4, 0x41, 0x0f, 0x46, 0x18, 0xc5,
- 0xdc, 0x32, 0x0f, 0x40, 0xb9, 0xc5, 0xde, 0xb2, 0x0f, 0x43, 0xa1, 0xc5,
- 0xda, 0xc5, 0x0f, 0x43, 0x89, 0xc4, 0xe4, 0xe3, 0x0f, 0x42, 0x41, 0xc5,
- 0xd9, 0xbc, 0x0f, 0x41, 0xd9, 0xc6, 0xcc, 0x5b, 0x0f, 0x44, 0x51, 0xc4,
- 0xe6, 0x13, 0x0f, 0x44, 0x71, 0xc4, 0xdf, 0x7a, 0x0f, 0x44, 0x81, 0xc5,
- 0xdc, 0xff, 0x0f, 0x45, 0x39, 0xc6, 0xd4, 0x47, 0x0f, 0x46, 0x08, 0xc5,
- 0xdc, 0x28, 0x0f, 0x40, 0xb1, 0xc5, 0xdb, 0xfb, 0x0f, 0x40, 0xa9, 0xc5,
- 0xdb, 0x6f, 0x0f, 0x40, 0xa1, 0xc4, 0xe3, 0x4b, 0x0f, 0x40, 0x51, 0xc4,
- 0xe4, 0x7b, 0x0f, 0x40, 0x49, 0xc4, 0xe4, 0x2f, 0x0f, 0x40, 0x41, 0xc4,
- 0xe1, 0x97, 0x0f, 0x40, 0x11, 0xc4, 0xe1, 0x53, 0x0f, 0x40, 0x09, 0xc4,
- 0xe2, 0x63, 0x0f, 0x40, 0x00, 0xc5, 0xdb, 0x33, 0x0f, 0x40, 0x91, 0xc4,
- 0xcb, 0xdd, 0x0f, 0x40, 0x71, 0xc4, 0xe3, 0xb7, 0x0f, 0x40, 0x31, 0xc5,
- 0xde, 0xc1, 0x0f, 0x43, 0x69, 0xc5, 0xde, 0x7b, 0x0f, 0x43, 0x59, 0xc4,
- 0xe2, 0x37, 0x0f, 0x43, 0x49, 0xc6, 0xcc, 0x43, 0x0f, 0x43, 0xb1, 0xc6,
- 0xcc, 0xc1, 0x0f, 0x43, 0xc1, 0xc6, 0xcc, 0x2b, 0x0f, 0x44, 0xb1, 0xc6,
- 0xce, 0x8f, 0x0f, 0x45, 0x10, 0xc5, 0xdf, 0xb1, 0x0f, 0x40, 0x89, 0xc5,
- 0xdc, 0x7d, 0x0f, 0x40, 0x19, 0xc4, 0xe1, 0xb7, 0x0f, 0x42, 0x89, 0xc4,
- 0xe3, 0x9b, 0x0f, 0x42, 0x51, 0xc4, 0xe3, 0xb3, 0x0f, 0x44, 0x61, 0xc4,
- 0xe3, 0x5b, 0x0f, 0x44, 0x91, 0xc5, 0xdf, 0x84, 0x0f, 0x44, 0xa1, 0xc6,
- 0xcc, 0x4f, 0x0f, 0x45, 0x99, 0xc5, 0xd9, 0xd0, 0x0f, 0x45, 0xa1, 0xc6,
- 0xcf, 0x25, 0x0f, 0x46, 0x20, 0xc5, 0xdb, 0x4c, 0x0f, 0x43, 0x29, 0xc5,
- 0xdb, 0xec, 0x0f, 0x43, 0x21, 0xc5, 0xdc, 0x2d, 0x0f, 0x43, 0x19, 0xc4,
- 0xe4, 0x93, 0x0f, 0x42, 0xe1, 0xc4, 0xe4, 0x17, 0x0f, 0x42, 0xd9, 0xc4,
- 0xe4, 0x5f, 0x0f, 0x42, 0xd1, 0xc4, 0xe1, 0xa3, 0x0f, 0x42, 0xa9, 0xc4,
- 0xe1, 0x27, 0x0f, 0x42, 0xa1, 0xc4, 0xe2, 0xb7, 0x0f, 0x42, 0x99, 0xc4,
- 0xe5, 0x63, 0x0f, 0x42, 0x68, 0xc5, 0xdb, 0xa1, 0x0f, 0x41, 0xa9, 0xc4,
- 0x3f, 0x84, 0x0f, 0x41, 0x61, 0xc5, 0xdc, 0x9b, 0x0f, 0x40, 0x79, 0xc5,
- 0xda, 0xca, 0x0f, 0x43, 0xa9, 0xc5, 0xdb, 0x1f, 0x0f, 0x43, 0x09, 0xc5,
- 0xde, 0x8a, 0x0f, 0x44, 0x31, 0xc6, 0xcd, 0x1b, 0x0f, 0x45, 0x89, 0xc5,
- 0xda, 0x89, 0x0f, 0x45, 0xb0, 0xc5, 0xdb, 0xa6, 0x0f, 0x41, 0x99, 0xc4,
- 0xe4, 0xcf, 0x0f, 0x41, 0x59, 0xc4, 0xe2, 0x1f, 0x0f, 0x41, 0x51, 0xc4,
- 0xe2, 0xff, 0x0f, 0x41, 0x49, 0xc4, 0xe3, 0x77, 0x0f, 0x41, 0x09, 0xc5,
- 0xdb, 0x29, 0x0f, 0x40, 0x99, 0xc5, 0xde, 0x80, 0x0f, 0x43, 0x91, 0xc5,
- 0xda, 0xbb, 0x0f, 0x42, 0xf9, 0xc5, 0xdf, 0x52, 0x0f, 0x44, 0xf9, 0xc6,
- 0xcf, 0x0d, 0x0f, 0x45, 0xc0, 0xc4, 0xe3, 0x4f, 0x0f, 0x41, 0x91, 0xc5,
- 0xdc, 0x8c, 0x0f, 0x40, 0x69, 0xc4, 0xe3, 0xa3, 0x0f, 0x40, 0x61, 0xc5,
- 0xdc, 0x1e, 0x0f, 0x43, 0x31, 0xc4, 0xe1, 0xfb, 0x0f, 0x42, 0x79, 0xc9,
- 0xb1, 0x6e, 0x0f, 0x41, 0xe9, 0xc7, 0xc4, 0xe4, 0x0f, 0x43, 0xd1, 0xc4,
- 0xe2, 0x3f, 0x0f, 0x44, 0x21, 0xc6, 0xcd, 0xb7, 0x0f, 0x45, 0x21, 0xc5,
- 0xdb, 0x92, 0x0f, 0x45, 0x90, 0xc5, 0xda, 0x66, 0x0f, 0x41, 0x89, 0xc4,
- 0xe4, 0xa7, 0x0f, 0x41, 0x39, 0xc4, 0xe1, 0xff, 0x0f, 0x41, 0x29, 0xc5,
- 0xdb, 0x6a, 0x0f, 0x43, 0x39, 0xc5, 0xde, 0x76, 0x0f, 0x42, 0x81, 0xc4,
- 0xe3, 0x6f, 0x0f, 0x44, 0x29, 0xc6, 0xcc, 0x55, 0x0f, 0x44, 0x39, 0xc6,
- 0xcc, 0x73, 0x0f, 0x44, 0x41, 0xca, 0x9b, 0xc2, 0x0f, 0x44, 0xe1, 0xc6,
- 0xce, 0xd1, 0x0f, 0x46, 0x00, 0xc4, 0xe3, 0x6b, 0x0f, 0x41, 0x69, 0xc5,
- 0xdc, 0x3c, 0x0f, 0x40, 0x39, 0xc4, 0xe3, 0x33, 0x0f, 0x43, 0x41, 0xc9,
- 0xaa, 0xed, 0x0f, 0x42, 0x91, 0xc7, 0xc2, 0xa6, 0x0f, 0x44, 0x59, 0xc6,
- 0xcd, 0xc9, 0x0f, 0x44, 0xc9, 0xc5, 0xd9, 0xc1, 0x0f, 0x44, 0xd1, 0xc4,
- 0xe2, 0xd3, 0x0f, 0x45, 0x69, 0xc5, 0xd9, 0x67, 0x0f, 0x45, 0xe1, 0xc6,
- 0xcf, 0x2b, 0x0f, 0x46, 0x10, 0xc3, 0xb4, 0x7c, 0x0f, 0x41, 0x41, 0xc5,
- 0xd9, 0x6c, 0x0f, 0x40, 0x81, 0xc4, 0xe4, 0xf3, 0x0f, 0x43, 0x71, 0xc5,
- 0xdf, 0xa7, 0x0f, 0x42, 0xc1, 0xc6, 0xcc, 0x1f, 0x0f, 0x43, 0xd9, 0xc5,
- 0xdd, 0x13, 0x0f, 0x44, 0x99, 0xca, 0x9d, 0xd4, 0x0f, 0x44, 0xf1, 0xc5,
- 0xdb, 0xdd, 0x0f, 0x45, 0x41, 0xc6, 0xd4, 0x0b, 0x0f, 0x45, 0xb9, 0xc5,
- 0xd9, 0xe9, 0x0f, 0x45, 0xf0, 0xc3, 0xe7, 0x42, 0x0f, 0x41, 0x31, 0xc5,
- 0xda, 0xb6, 0x0f, 0x41, 0x01, 0xc5, 0xdd, 0xc7, 0x0f, 0x43, 0x11, 0xc5,
- 0xdb, 0xb5, 0x0f, 0x42, 0xb1, 0xc5, 0xd9, 0xe4, 0x0f, 0x42, 0x49, 0xcc,
- 0x8d, 0x58, 0x0f, 0x44, 0x09, 0xc5, 0xda, 0x57, 0x0f, 0x44, 0x89, 0xcb,
- 0x9a, 0xe5, 0x0f, 0x44, 0xe9, 0xc5, 0xdc, 0x14, 0x0f, 0x45, 0x19, 0xc5,
- 0xdb, 0x56, 0x0f, 0x45, 0x50, 0xc5, 0xdd, 0xf4, 0x0f, 0x40, 0xf1, 0xc6,
- 0xce, 0x7d, 0x0f, 0x40, 0xc9, 0xc5, 0xdb, 0x60, 0x0f, 0x42, 0x71, 0xc4,
- 0x99, 0xb1, 0x0f, 0x41, 0xe1, 0xc7, 0xc4, 0x51, 0x0f, 0x43, 0xe1, 0xc7,
- 0xc2, 0x1a, 0x0f, 0x43, 0xf1, 0xc4, 0xe4, 0x0b, 0x0f, 0x44, 0x19, 0xc5,
- 0xdc, 0x0a, 0x0f, 0x45, 0x29, 0xc5, 0xda, 0x7f, 0x0f, 0x45, 0xa9, 0xc4,
- 0xe2, 0xc3, 0x0f, 0x45, 0xd8, 0xc6, 0xd4, 0x11, 0x0f, 0x40, 0xd1, 0xc4,
- 0xcc, 0x43, 0x0f, 0x43, 0x51, 0xc4, 0xe2, 0x47, 0x0f, 0x42, 0x19, 0xc5,
- 0xdd, 0xe5, 0x0f, 0x42, 0x11, 0xcb, 0x99, 0xb1, 0x0f, 0x44, 0x11, 0xc6,
- 0xcf, 0x1f, 0x0f, 0x44, 0x49, 0xc6, 0xd4, 0x53, 0x0f, 0x44, 0xb9, 0xc6,
- 0xcc, 0x3d, 0x0f, 0x44, 0xd9, 0xc4, 0xe1, 0x13, 0x0f, 0x45, 0xc9, 0xc4,
- 0xe4, 0x87, 0x0f, 0x45, 0xd0, 0xc5, 0xdb, 0x2e, 0x0f, 0x40, 0x59, 0xc6,
- 0xcc, 0x07, 0x0f, 0x43, 0x81, 0xc4, 0xdf, 0xa7, 0x0f, 0x42, 0xc9, 0xc6,
- 0xcf, 0x07, 0x0f, 0x43, 0xe9, 0xc7, 0xc1, 0xdb, 0x0f, 0x43, 0xf9, 0xc5,
- 0xda, 0x75, 0x0f, 0x44, 0xa9, 0xc5, 0xda, 0x2a, 0x0f, 0x45, 0x31, 0xc5,
- 0xd9, 0x85, 0x0f, 0x45, 0x71, 0xc5, 0xdc, 0x23, 0x0f, 0x45, 0x79, 0xc5,
- 0xdd, 0x18, 0x0f, 0x45, 0x80, 0xc3, 0x10, 0xdf, 0x0f, 0x46, 0x81, 0x10,
- 0x42, 0xf8, 0x99, 0xcb, 0x71, 0x62, 0x08, 0x4f, 0xf9, 0xcd, 0x80, 0x3a,
- 0x08, 0x4f, 0xc1, 0xcb, 0x97, 0xc2, 0x08, 0x4f, 0xb8, 0xcd, 0x7c, 0xfa,
- 0x08, 0x4f, 0xe9, 0xce, 0x71, 0x5f, 0x08, 0x4d, 0xe0, 0xcd, 0x71, 0x60,
- 0x08, 0x4f, 0xe1, 0xcb, 0x91, 0x92, 0x08, 0x4f, 0xd8, 0xcc, 0x83, 0xd4,
- 0x08, 0x4f, 0xd1, 0xcc, 0x8b, 0xe4, 0x08, 0x4f, 0xc8, 0xc7, 0x71, 0x65,
- 0x08, 0x4f, 0xb1, 0xc4, 0x03, 0x2b, 0x08, 0x4d, 0xe8, 0x00, 0xc2, 0xf8,
- 0xa3, 0xcb, 0x94, 0x1b, 0x08, 0x4f, 0x60, 0x00, 0xc2, 0xf8, 0xb2, 0xca,
- 0x94, 0x1c, 0x08, 0x4f, 0x58, 0xc4, 0x18, 0x83, 0x08, 0x4e, 0x33, 0x02,
- 0xf8, 0xc1, 0xc2, 0x26, 0x51, 0x08, 0x4e, 0x2a, 0x02, 0xf8, 0xce, 0x0b,
- 0xc2, 0xf8, 0xdb, 0x11, 0x42, 0xf8, 0xed, 0x0a, 0xc2, 0xf8, 0xff, 0x19,
- 0xc2, 0xf9, 0x11, 0xc2, 0x00, 0x4d, 0x08, 0x4e, 0x4a, 0x02, 0xf9, 0x21,
- 0x00, 0x42, 0xf9, 0x27, 0xc3, 0xe7, 0x27, 0x08, 0x4d, 0xf9, 0xc3, 0x0a,
- 0x25, 0x08, 0x4d, 0xf0, 0xc2, 0x0b, 0xc6, 0x08, 0x4d, 0xb9, 0x16, 0xc2,
- 0xf9, 0x36, 0xc2, 0x0f, 0x60, 0x08, 0x4d, 0x99, 0x0d, 0xc2, 0xf9, 0x42,
- 0x15, 0xc2, 0xf9, 0x4c, 0x83, 0x08, 0x4d, 0x03, 0x02, 0xf9, 0x54, 0xc3,
- 0xbf, 0xbc, 0x08, 0x4d, 0x71, 0xc2, 0x00, 0xc7, 0x08, 0x4d, 0x61, 0xc2,
- 0x02, 0x59, 0x08, 0x4d, 0x59, 0x10, 0xc2, 0xf9, 0x5a, 0xc2, 0x00, 0xad,
- 0x08, 0x4d, 0x41, 0xc2, 0x03, 0xa4, 0x08, 0x4d, 0x39, 0xc2, 0x00, 0xde,
- 0x08, 0x4d, 0x31, 0xc2, 0x01, 0x09, 0x08, 0x4d, 0x29, 0xc2, 0x1d, 0x5f,
- 0x08, 0x4d, 0x21, 0x91, 0x08, 0x4d, 0x19, 0x8b, 0x08, 0x4d, 0x11, 0x87,
- 0x08, 0x4d, 0x08, 0x91, 0x08, 0x4c, 0xe1, 0x87, 0x08, 0x4c, 0xd3, 0x02,
- 0xf9, 0x62, 0x83, 0x08, 0x4c, 0xc2, 0x02, 0xf9, 0x68, 0x83, 0x08, 0x4c,
- 0xb1, 0xc2, 0x00, 0xa4, 0x08, 0x4c, 0x88, 0x87, 0x08, 0x4c, 0xa9, 0x83,
- 0x08, 0x4c, 0x9a, 0x02, 0xf9, 0x6e, 0xc2, 0xe7, 0x79, 0x08, 0x4c, 0x78,
- 0xc2, 0xe7, 0x79, 0x08, 0x4c, 0x38, 0x83, 0x08, 0x4c, 0x53, 0x02, 0xf9,
- 0x74, 0x87, 0x08, 0x4c, 0x62, 0x02, 0xf9, 0x7a, 0x60, 0x03, 0xe7, 0x42,
- 0xf9, 0x80, 0x97, 0x05, 0x57, 0x79, 0x8b, 0x05, 0x57, 0x68, 0xc7, 0xca,
- 0x32, 0x05, 0x5f, 0x08, 0xc7, 0xca, 0x32, 0x05, 0x5e, 0xf8, 0xc7, 0xca,
- 0x32, 0x05, 0x5f, 0x00, 0xc2, 0x00, 0xa4, 0x05, 0x57, 0x29, 0x83, 0x05,
- 0x57, 0x20, 0xc7, 0xca, 0x32, 0x05, 0x5e, 0xf0, 0xc7, 0xca, 0x32, 0x05,
- 0x5e, 0xd8, 0xc2, 0x00, 0xa4, 0x05, 0x57, 0x39, 0x83, 0x05, 0x57, 0x30,
- 0x48, 0xbc, 0xe5, 0xc2, 0xf9, 0x98, 0x47, 0x07, 0x18, 0xc2, 0xf9, 0xa8,
- 0x4d, 0x76, 0x05, 0xc2, 0xfa, 0x0f, 0xd0, 0x0b, 0x37, 0x00, 0x16, 0x31,
- 0x47, 0x59, 0x18, 0xc2, 0xfa, 0x1b, 0xcb, 0x94, 0x68, 0x00, 0x16, 0xf9,
- 0xc4, 0x0e, 0x40, 0x05, 0x3c, 0x48, 0x45, 0x04, 0x74, 0xc2, 0xfa, 0x27,
- 0x4b, 0x0a, 0x0a, 0xc2, 0xfa, 0xca, 0x4a, 0xa0, 0x4a, 0xc2, 0xfa, 0xd6,
- 0x0a, 0x42, 0xfa, 0xe2, 0x45, 0x02, 0x13, 0xc2, 0xfa, 0xee, 0x07, 0xc2,
- 0xfb, 0x00, 0xca, 0xa0, 0x22, 0x00, 0x16, 0xf1, 0x46, 0x0c, 0x66, 0x42,
- 0xfb, 0x0a, 0x44, 0x02, 0x4e, 0xc2, 0xfb, 0x28, 0xcc, 0x77, 0x8c, 0x08,
- 0x3d, 0xb9, 0x42, 0x01, 0x12, 0x42, 0xfb, 0x3a, 0xcb, 0x21, 0x79, 0x00,
- 0x16, 0x03, 0x02, 0xfb, 0x44, 0xcb, 0x1f, 0x95, 0x00, 0x16, 0x59, 0xcb,
- 0x93, 0x76, 0x00, 0x87, 0xe0, 0xcd, 0x7f, 0x0f, 0x08, 0x3d, 0xa9, 0x45,
- 0x3c, 0xb7, 0x42, 0xfb, 0x4a, 0xcb, 0x83, 0x99, 0x08, 0x3d, 0xb1, 0x11,
- 0x42, 0xfb, 0x56, 0xcd, 0x7c, 0x78, 0x08, 0x3d, 0xc1, 0xc9, 0x30, 0x6e,
- 0x00, 0x15, 0xe1, 0xcb, 0x84, 0x41, 0x00, 0x16, 0x50, 0xc4, 0x18, 0x48,
- 0x00, 0x15, 0xc9, 0xc8, 0x68, 0x22, 0x00, 0x16, 0xb0, 0xcb, 0x53, 0xf8,
- 0x00, 0x15, 0xd9, 0xcf, 0x37, 0x87, 0x00, 0x16, 0x80, 0x42, 0x00, 0x3f,
- 0xc2, 0xfb, 0x68, 0xca, 0xa5, 0xd6, 0x00, 0x17, 0x69, 0x95, 0x05, 0x3b,
- 0x80, 0xcc, 0x36, 0xda, 0x00, 0x16, 0x41, 0xc6, 0xc4, 0x28, 0x00, 0x17,
- 0x60, 0xc5, 0x60, 0xe7, 0x00, 0x16, 0x49, 0x0b, 0x42, 0xfb, 0x74, 0x45,
- 0xdb, 0x10, 0xc2, 0xfb, 0x7e, 0x43, 0x00, 0x58, 0x42, 0xfb, 0x8a, 0x44,
- 0x05, 0x2c, 0xc2, 0xfb, 0x96, 0xd4, 0x37, 0x82, 0x00, 0x16, 0x88, 0xd6,
- 0x2e, 0x3b, 0x00, 0x17, 0x51, 0xd7, 0x29, 0x96, 0x00, 0x17, 0x58, 0xc4,
- 0x22, 0x71, 0x08, 0xb2, 0xc9, 0xc5, 0x01, 0xdb, 0x08, 0xb2, 0xc1, 0x15,
- 0xc2, 0xfb, 0xa8, 0x08, 0xc2, 0xfb, 0xb4, 0x16, 0xc2, 0xfb, 0xc0, 0xc3,
- 0x01, 0xb4, 0x08, 0xb2, 0x89, 0xc4, 0x15, 0xd3, 0x08, 0xb2, 0x80, 0xca,
- 0xa5, 0xe0, 0x08, 0xb2, 0x01, 0xc7, 0x11, 0x41, 0x08, 0xb1, 0xe8, 0xc4,
- 0x0f, 0x7c, 0x08, 0xb1, 0xf9, 0xc5, 0x44, 0x7b, 0x08, 0xb1, 0xf0, 0x97,
- 0x08, 0xb1, 0xe1, 0x8b, 0x08, 0xb1, 0xd1, 0x83, 0x08, 0xb1, 0x80, 0x8e,
- 0x08, 0xb1, 0xbb, 0x02, 0xfb, 0xcc, 0x94, 0x08, 0xb1, 0xaa, 0x02, 0xfb,
- 0xd0, 0x97, 0x08, 0xb1, 0xa0, 0x8b, 0x08, 0xb1, 0x90, 0xc2, 0x00, 0xc7,
- 0x08, 0xb1, 0x79, 0x83, 0x08, 0xb1, 0x48, 0x83, 0x08, 0xb1, 0x69, 0xc2,
- 0x0c, 0x65, 0x08, 0xb1, 0x61, 0xc2, 0x00, 0xa4, 0x08, 0xb1, 0x58, 0x83,
- 0x08, 0xb1, 0x51, 0x47, 0xac, 0xc2, 0x42, 0xfb, 0xd4, 0xc2, 0x00, 0xa4,
- 0x08, 0xb1, 0x29, 0x83, 0x08, 0xb1, 0x20, 0xc2, 0x00, 0xa4, 0x08, 0xb1,
- 0x19, 0x83, 0x08, 0xb1, 0x10, 0x83, 0x08, 0xb1, 0x09, 0xc2, 0x00, 0xc1,
- 0x08, 0xb0, 0xe1, 0xc2, 0x1d, 0x5f, 0x08, 0xb0, 0xb9, 0xc2, 0x01, 0x29,
- 0x08, 0xb0, 0x90, 0xc2, 0x00, 0xa4, 0x08, 0xb1, 0x01, 0x83, 0x08, 0xb0,
- 0xf9, 0x06, 0x42, 0xfb, 0xe2, 0xc2, 0x00, 0xa4, 0x08, 0xb0, 0xf1, 0x83,
- 0x08, 0xb0, 0xe9, 0x16, 0x42, 0xfb, 0xec, 0xc2, 0x00, 0xa4, 0x08, 0xb0,
- 0xb1, 0x83, 0x08, 0xb0, 0xa8, 0xc2, 0x00, 0xa4, 0x08, 0xb0, 0xa1, 0x83,
- 0x08, 0xb0, 0x98, 0xc2, 0x00, 0xa4, 0x08, 0xb0, 0x89, 0x83, 0x08, 0xb0,
- 0x80, 0xc2, 0x00, 0xa4, 0x08, 0xb0, 0x79, 0x83, 0x08, 0xb0, 0x70, 0x97,
- 0x08, 0xb0, 0x69, 0x8b, 0x08, 0xb0, 0x59, 0x83, 0x08, 0xb0, 0x08, 0x97,
- 0x08, 0xb0, 0x28, 0x8b, 0x08, 0xb0, 0x18, 0xcf, 0x09, 0xf8, 0x08, 0xb3,
- 0x59, 0xc8, 0x00, 0xbf, 0x08, 0xb3, 0x50, 0xc4, 0x18, 0x83, 0x00, 0xc0,
- 0xb9, 0xc2, 0x26, 0x51, 0x00, 0xc0, 0xb0, 0xc3, 0x0c, 0x5b, 0x00, 0xc0,
- 0xa9, 0xc3, 0x06, 0x9e, 0x00, 0xc0, 0xa0, 0xc4, 0x04, 0x5e, 0x00, 0xc0,
- 0x99, 0xc2, 0x01, 0x47, 0x00, 0xc0, 0x90, 0x49, 0xb0, 0x69, 0xc2, 0xfb,
- 0xf6, 0xc3, 0xaf, 0x4c, 0x00, 0xc3, 0xb9, 0xc2, 0x00, 0x67, 0x00, 0xc3,
- 0xb1, 0xc2, 0x02, 0x59, 0x00, 0xc3, 0xa9, 0xc2, 0x04, 0x2b, 0x00, 0xc3,
- 0xa1, 0x8b, 0x00, 0xc3, 0x98, 0x06, 0xc2, 0xfc, 0x2a, 0x45, 0x03, 0x2b,
- 0xc2, 0xfc, 0x37, 0x83, 0x00, 0xc4, 0x3b, 0x02, 0xfc, 0x41, 0x1c, 0xc2,
- 0xfc, 0x4b, 0xc3, 0x1a, 0x80, 0x00, 0xc4, 0xa1, 0x12, 0xc2, 0xfc, 0x55,
- 0x16, 0xc2, 0xfc, 0x5f, 0x10, 0xc2, 0xfc, 0x6d, 0xc2, 0x03, 0x40, 0x00,
- 0xc4, 0x59, 0xc2, 0x04, 0x2b, 0x00, 0xc4, 0x49, 0x8b, 0x00, 0xc4, 0x43,
- 0x02, 0xfc, 0x79, 0xc6, 0x90, 0x00, 0x00, 0xc4, 0x29, 0xc7, 0x61, 0x99,
- 0x00, 0xc4, 0x19, 0xcb, 0x98, 0x25, 0x00, 0xc4, 0x08, 0x03, 0xc2, 0xfc,
- 0x7f, 0x06, 0xc2, 0xfc, 0x8b, 0xc3, 0x05, 0xe1, 0x00, 0xc2, 0xd9, 0x0c,
- 0xc2, 0xfc, 0x95, 0xc3, 0x3b, 0xc7, 0x00, 0xc2, 0xc9, 0xc2, 0x01, 0x29,
- 0x00, 0xc2, 0x73, 0x02, 0xfc, 0x9f, 0xc2, 0x04, 0x2b, 0x00, 0xc2, 0xb9,
- 0xc2, 0x01, 0x09, 0x00, 0xc2, 0xb1, 0xc2, 0x1d, 0x5f, 0x00, 0xc2, 0xa9,
- 0x16, 0xc2, 0xfc, 0xa3, 0xc3, 0x1b, 0xb6, 0x00, 0xc2, 0x91, 0xc2, 0x00,
- 0xad, 0x00, 0xc2, 0x79, 0xc2, 0x0f, 0x60, 0x00, 0xc2, 0x69, 0xc2, 0x03,
- 0xa4, 0x00, 0xc2, 0x61, 0xc2, 0x00, 0xde, 0x00, 0xc2, 0x59, 0x97, 0x00,
- 0xc2, 0x3b, 0x02, 0xfc, 0xad, 0x91, 0x00, 0xc2, 0x33, 0x02, 0xfc, 0xb1,
- 0x8b, 0x00, 0xc2, 0x29, 0x87, 0x00, 0xc2, 0x21, 0xcf, 0x6b, 0xc3, 0x00,
- 0xc2, 0x18, 0xce, 0x17, 0xc0, 0x00, 0xc3, 0xc0, 0x1c, 0xc2, 0xfc, 0xb5,
- 0xc3, 0x1b, 0xb6, 0x00, 0xc3, 0x89, 0xc3, 0x4b, 0xf4, 0x00, 0xc3, 0x81,
- 0x16, 0xc2, 0xfc, 0xbf, 0xc2, 0x00, 0xa4, 0x00, 0xc3, 0x2b, 0x02, 0xfc,
- 0xc9, 0xc2, 0x01, 0x29, 0x00, 0xc3, 0x23, 0x02, 0xfc, 0xcd, 0xc2, 0x00,
- 0x67, 0x00, 0xc3, 0x59, 0xc2, 0x24, 0x58, 0x00, 0xc3, 0x51, 0xc2, 0x0b,
- 0xc6, 0x00, 0xc3, 0x49, 0xc3, 0x02, 0x28, 0x00, 0xc3, 0x39, 0xc2, 0x03,
- 0xa4, 0x00, 0xc3, 0x31, 0xc2, 0x04, 0x2b, 0x00, 0xc3, 0x19, 0xc3, 0x01,
- 0x93, 0x00, 0xc3, 0x11, 0x97, 0x00, 0xc3, 0x0b, 0x02, 0xfc, 0xd1, 0x8b,
- 0x00, 0xc2, 0xf3, 0x02, 0xfc, 0xd5, 0x87, 0x00, 0xc2, 0xe8, 0xc4, 0x04,
- 0x5e, 0x00, 0xc0, 0x69, 0xc2, 0x01, 0x47, 0x00, 0xc0, 0x60, 0xc4, 0x32,
- 0x6d, 0x0e, 0xb7, 0x20, 0xc2, 0x00, 0xb3, 0x0e, 0xb7, 0x41, 0xc6, 0x12,
- 0x12, 0x0e, 0xb7, 0x30, 0xc4, 0xd7, 0x88, 0x0e, 0xb7, 0x28, 0xc2, 0x00,
- 0x0a, 0x0e, 0xb7, 0xc0, 0xc3, 0x05, 0xe7, 0x0e, 0xb7, 0x18, 0xc4, 0xd9,
- 0x9f, 0x0e, 0xb7, 0x10, 0x0f, 0x42, 0xfc, 0xd9, 0xc2, 0x00, 0xcb, 0x0e,
- 0xb7, 0xc9, 0xc2, 0x00, 0x0a, 0x0e, 0xb7, 0xb9, 0x8b, 0x0e, 0xb7, 0x88,
- 0xc6, 0x12, 0x12, 0x0e, 0xb7, 0xb0, 0xc2, 0x13, 0xa9, 0x0e, 0xb7, 0xa9,
- 0xc4, 0x89, 0x81, 0x0e, 0xb7, 0x4a, 0x02, 0xfc, 0xe5, 0xc4, 0x1c, 0x84,
- 0x0e, 0xb7, 0xa0, 0xc2, 0x00, 0xf6, 0x0e, 0xb7, 0x90, 0x8b, 0x0e, 0xb7,
- 0x78, 0x97, 0x0e, 0xb7, 0x70, 0x97, 0x0e, 0xb7, 0x68, 0xc4, 0xda, 0xda,
- 0x0e, 0xb7, 0x60, 0xc4, 0x8b, 0x49, 0x0e, 0xb7, 0x58, 0xc3, 0x00, 0xb2,
- 0x0e, 0xb7, 0x50, 0xc3, 0x05, 0xe7, 0x0e, 0xb7, 0x38, 0x0f, 0x42, 0xfc,
- 0xeb, 0xc2, 0x00, 0xcb, 0x0e, 0xb8, 0x99, 0xc2, 0x00, 0x0a, 0x0e, 0xb8,
- 0x89, 0x8b, 0x0e, 0xb8, 0x58, 0xc2, 0x00, 0x0a, 0x0e, 0xb8, 0x90, 0xc6,
- 0x12, 0x12, 0x0e, 0xb8, 0x80, 0xc2, 0x13, 0xa9, 0x0e, 0xb8, 0x79, 0xc4,
- 0x89, 0x81, 0x0e, 0xb8, 0x18, 0xc4, 0x1c, 0x84, 0x0e, 0xb8, 0x70, 0xca,
- 0x92, 0xd2, 0x0e, 0xb8, 0x68, 0xc2, 0x00, 0xf6, 0x0e, 0xb8, 0x60, 0x8b,
- 0x0e, 0xb8, 0x48, 0x97, 0x0e, 0xb8, 0x40, 0x97, 0x0e, 0xb8, 0x38, 0xc4,
- 0xda, 0xda, 0x0e, 0xb8, 0x30, 0xc4, 0x8b, 0x49, 0x0e, 0xb8, 0x28, 0xc3,
- 0x00, 0xb2, 0x0e, 0xb8, 0x20, 0xc2, 0x00, 0xb3, 0x0e, 0xb8, 0x11, 0xc6,
- 0x12, 0x12, 0x0e, 0xb8, 0x00, 0xc3, 0x05, 0xe7, 0x0e, 0xb8, 0x08, 0xc4,
- 0xd7, 0x88, 0x0e, 0xb7, 0xf9, 0x47, 0x3b, 0xb1, 0x42, 0xfc, 0xf7, 0xc4,
- 0x32, 0x6d, 0x0e, 0xb7, 0xf0, 0xc3, 0x05, 0xe7, 0x0e, 0xb7, 0xe8, 0xc4,
- 0xd9, 0x9f, 0x0e, 0xb7, 0xe0, 0x9c, 0x0e, 0xa1, 0x9b, 0x02, 0xfc, 0xff,
- 0x9b, 0x0e, 0xa1, 0x91, 0x9a, 0x0e, 0xa1, 0x8b, 0x02, 0xfd, 0x05, 0x99,
- 0x0e, 0xa1, 0x81, 0x98, 0x0e, 0xa1, 0x79, 0x97, 0x0e, 0xa1, 0x73, 0x02,
- 0xfd, 0x09, 0x86, 0x0e, 0xa0, 0xeb, 0x02, 0xfd, 0x0f, 0x91, 0x0e, 0xa1,
- 0x43, 0x02, 0xfd, 0x1b, 0x92, 0x0e, 0xa1, 0x4b, 0x02, 0xfd, 0x1f, 0x85,
- 0x0e, 0xa0, 0xe3, 0x02, 0xfd, 0x2f, 0x96, 0x0e, 0xa1, 0x6b, 0x02, 0xfd,
- 0x35, 0x95, 0x0e, 0xa1, 0x63, 0x02, 0xfd, 0x41, 0x88, 0x0e, 0xa0, 0xfb,
- 0x02, 0xfd, 0x47, 0x94, 0x0e, 0xa1, 0x5b, 0x02, 0xfd, 0x4d, 0x90, 0x0e,
- 0xa1, 0x3b, 0x02, 0xfd, 0x53, 0x8f, 0x0e, 0xa1, 0x33, 0x02, 0xfd, 0x57,
- 0x8e, 0x0e, 0xa1, 0x2b, 0x02, 0xfd, 0x5b, 0x8d, 0x0e, 0xa1, 0x23, 0x02,
- 0xfd, 0x61, 0x8b, 0x0e, 0xa1, 0x13, 0x02, 0xfd, 0x67, 0x87, 0x0e, 0xa0,
- 0xf3, 0x02, 0xfd, 0x6d, 0x89, 0x0e, 0xa1, 0x03, 0x02, 0xfd, 0x79, 0x84,
- 0x0e, 0xa0, 0xdb, 0x02, 0xfd, 0x7f, 0x83, 0x0e, 0xa0, 0xd3, 0x02, 0xfd,
- 0x85, 0x93, 0x0e, 0xa1, 0x51, 0x8c, 0x0e, 0xa1, 0x19, 0x8a, 0x0e, 0xa1,
- 0x08, 0x46, 0x04, 0x73, 0xc2, 0xfd, 0x8b, 0x48, 0x07, 0x17, 0x42, 0xfd,
- 0xf3, 0xc4, 0x18, 0x83, 0x0e, 0xbe, 0xa9, 0xc2, 0x26, 0x51, 0x0e, 0xbe,
- 0xa0, 0xc3, 0x0c, 0x5b, 0x0e, 0xbe, 0x99, 0xc3, 0x06, 0x9e, 0x0e, 0xbe,
- 0x90, 0xc4, 0x04, 0x5e, 0x0e, 0xbe, 0x89, 0xc2, 0x01, 0x47, 0x0e, 0xbe,
- 0x80, 0xc6, 0x52, 0x38, 0x0e, 0xbe, 0x51, 0xc4, 0xd7, 0x88, 0x0e, 0xb5,
- 0x58, 0x0f, 0x42, 0xfe, 0x5b, 0xc2, 0x00, 0xcb, 0x0e, 0xb5, 0xf9, 0xc2,
- 0x00, 0x0a, 0x0e, 0xb5, 0xe9, 0x8b, 0x0e, 0xb5, 0xb8, 0xc2, 0x00, 0x0a,
- 0x0e, 0xb5, 0xf0, 0xc6, 0x12, 0x12, 0x0e, 0xb5, 0xe0, 0xc2, 0x13, 0xa9,
- 0x0e, 0xb5, 0xd9, 0xc4, 0x89, 0x81, 0x0e, 0xb5, 0x7a, 0x02, 0xfe, 0x67,
- 0xc4, 0x1c, 0x84, 0x0e, 0xb5, 0xd0, 0xc2, 0x00, 0xf6, 0x0e, 0xb5, 0xc0,
- 0x8b, 0x0e, 0xb5, 0xa8, 0x97, 0x0e, 0xb5, 0xa0, 0x97, 0x0e, 0xb5, 0x98,
- 0xc4, 0xda, 0xda, 0x0e, 0xb5, 0x90, 0xc4, 0x8b, 0x49, 0x0e, 0xb5, 0x88,
- 0xc3, 0x00, 0xb2, 0x0e, 0xb5, 0x80, 0xc2, 0x00, 0xb3, 0x0e, 0xb5, 0x71,
- 0xc6, 0x12, 0x12, 0x0e, 0xb5, 0x60, 0xc3, 0x05, 0xe7, 0x0e, 0xb5, 0x68,
- 0xc4, 0x32, 0x6d, 0x0e, 0xb5, 0x50, 0xc3, 0x05, 0xe7, 0x0e, 0xb5, 0x48,
- 0xc4, 0xd9, 0x9f, 0x0e, 0xb5, 0x40, 0xc8, 0x9d, 0xa4, 0x0e, 0xba, 0xa9,
- 0xc9, 0xaa, 0xa5, 0x0e, 0xba, 0x99, 0xd3, 0x45, 0x3e, 0x0e, 0xba, 0x78,
- 0x91, 0x0e, 0xa4, 0x83, 0x02, 0xfe, 0x6d, 0x92, 0x0e, 0xa4, 0x8b, 0x02,
- 0xfe, 0x71, 0x85, 0x0e, 0xa4, 0x23, 0x02, 0xfe, 0x81, 0x97, 0x0e, 0xa4,
- 0xb3, 0x02, 0xfe, 0x87, 0x96, 0x0e, 0xa4, 0xab, 0x02, 0xfe, 0x8d, 0x95,
- 0x0e, 0xa4, 0xa3, 0x02, 0xfe, 0x99, 0x88, 0x0e, 0xa4, 0x3b, 0x02, 0xfe,
- 0x9f, 0x94, 0x0e, 0xa4, 0x9b, 0x02, 0xfe, 0xa5, 0x9a, 0x0e, 0xa4, 0xcb,
- 0x02, 0xfe, 0xab, 0x90, 0x0e, 0xa4, 0x7b, 0x02, 0xfe, 0xaf, 0x8f, 0x0e,
- 0xa4, 0x73, 0x02, 0xfe, 0xb3, 0x8e, 0x0e, 0xa4, 0x6b, 0x02, 0xfe, 0xb7,
- 0x8d, 0x0e, 0xa4, 0x63, 0x02, 0xfe, 0xbd, 0x8b, 0x0e, 0xa4, 0x53, 0x02,
- 0xfe, 0xc3, 0x87, 0x0e, 0xa4, 0x33, 0x02, 0xfe, 0xc9, 0x9c, 0x0e, 0xa4,
- 0xdb, 0x02, 0xfe, 0xd5, 0x86, 0x0e, 0xa4, 0x2b, 0x02, 0xfe, 0xdb, 0x89,
- 0x0e, 0xa4, 0x43, 0x02, 0xfe, 0xe1, 0x84, 0x0e, 0xa4, 0x1b, 0x02, 0xfe,
- 0xe7, 0x83, 0x0e, 0xa4, 0x13, 0x02, 0xfe, 0xed, 0x9b, 0x0e, 0xa4, 0xd1,
- 0x99, 0x0e, 0xa4, 0xc1, 0x98, 0x0e, 0xa4, 0xb9, 0x93, 0x0e, 0xa4, 0x91,
- 0x8c, 0x0e, 0xa4, 0x59, 0x8a, 0x0e, 0xa4, 0x48, 0x91, 0x0e, 0xa3, 0xb3,
- 0x02, 0xfe, 0xf3, 0x92, 0x0e, 0xa3, 0xbb, 0x02, 0xfe, 0xf7, 0x85, 0x0e,
- 0xa3, 0x53, 0x02, 0xff, 0x07, 0x97, 0x0e, 0xa3, 0xe3, 0x02, 0xff, 0x0d,
- 0x96, 0x0e, 0xa3, 0xdb, 0x02, 0xff, 0x13, 0x95, 0x0e, 0xa3, 0xd3, 0x02,
- 0xff, 0x22, 0x94, 0x0e, 0xa3, 0xcb, 0x02, 0xff, 0x28, 0x9a, 0x0e, 0xa3,
- 0xfb, 0x02, 0xff, 0x2e, 0x90, 0x0e, 0xa3, 0xab, 0x02, 0xff, 0x32, 0x8f,
- 0x0e, 0xa3, 0xa3, 0x02, 0xff, 0x36, 0x8e, 0x0e, 0xa3, 0x9b, 0x02, 0xff,
- 0x3a, 0x8d, 0x0e, 0xa3, 0x93, 0x02, 0xff, 0x40, 0x8b, 0x0e, 0xa3, 0x83,
- 0x02, 0xff, 0x46, 0x87, 0x0e, 0xa3, 0x63, 0x02, 0xff, 0x4c, 0x9c, 0x0e,
- 0xa4, 0x0b, 0x02, 0xff, 0x58, 0x86, 0x0e, 0xa3, 0x5b, 0x02, 0xff, 0x5e,
- 0x89, 0x0e, 0xa3, 0x73, 0x02, 0xff, 0x64, 0x84, 0x0e, 0xa3, 0x4b, 0x02,
- 0xff, 0x6a, 0x83, 0x0e, 0xa3, 0x43, 0x02, 0xff, 0x70, 0x9b, 0x0e, 0xa4,
- 0x01, 0x99, 0x0e, 0xa3, 0xf1, 0x98, 0x0e, 0xa3, 0xe9, 0x93, 0x0e, 0xa3,
- 0xc1, 0x8c, 0x0e, 0xa3, 0x89, 0x8a, 0x0e, 0xa3, 0x79, 0x88, 0x0e, 0xa3,
- 0x68, 0x9c, 0x0e, 0xac, 0xf9, 0x9b, 0x0e, 0xac, 0xf1, 0x9a, 0x0e, 0xac,
- 0xe9, 0x99, 0x0e, 0xac, 0xe1, 0x98, 0x0e, 0xac, 0xd9, 0x97, 0x0e, 0xac,
- 0xd1, 0x96, 0x0e, 0xac, 0xc9, 0x95, 0x0e, 0xac, 0xc1, 0x94, 0x0e, 0xac,
- 0xb9, 0x93, 0x0e, 0xac, 0xb1, 0x92, 0x0e, 0xac, 0xa9, 0x91, 0x0e, 0xac,
- 0xa1, 0x90, 0x0e, 0xac, 0x99, 0x8f, 0x0e, 0xac, 0x91, 0x8e, 0x0e, 0xac,
- 0x89, 0x8d, 0x0e, 0xac, 0x81, 0x8c, 0x0e, 0xac, 0x79, 0x8b, 0x0e, 0xac,
- 0x71, 0x8a, 0x0e, 0xac, 0x69, 0x89, 0x0e, 0xac, 0x61, 0x88, 0x0e, 0xac,
- 0x59, 0x87, 0x0e, 0xac, 0x51, 0x86, 0x0e, 0xac, 0x49, 0x85, 0x0e, 0xac,
- 0x41, 0x84, 0x0e, 0xac, 0x39, 0x83, 0x0e, 0xac, 0x30, 0x9c, 0x0e, 0xac,
- 0x29, 0x9b, 0x0e, 0xac, 0x21, 0x9a, 0x0e, 0xac, 0x19, 0x99, 0x0e, 0xac,
- 0x11, 0x98, 0x0e, 0xac, 0x09, 0x97, 0x0e, 0xac, 0x01, 0x96, 0x0e, 0xab,
- 0xf9, 0x95, 0x0e, 0xab, 0xf1, 0x94, 0x0e, 0xab, 0xe9, 0x93, 0x0e, 0xab,
- 0xe1, 0x92, 0x0e, 0xab, 0xd9, 0x91, 0x0e, 0xab, 0xd1, 0x90, 0x0e, 0xab,
- 0xc9, 0x8f, 0x0e, 0xab, 0xc1, 0x8e, 0x0e, 0xab, 0xb9, 0x8d, 0x0e, 0xab,
- 0xb1, 0x8c, 0x0e, 0xab, 0xa9, 0x8b, 0x0e, 0xab, 0xa1, 0x8a, 0x0e, 0xab,
- 0x99, 0x89, 0x0e, 0xab, 0x91, 0x88, 0x0e, 0xab, 0x89, 0x87, 0x0e, 0xab,
- 0x81, 0x86, 0x0e, 0xab, 0x79, 0x85, 0x0e, 0xab, 0x71, 0x84, 0x0e, 0xab,
- 0x69, 0x83, 0x0e, 0xab, 0x60, 0xc4, 0x18, 0x83, 0x0e, 0xbf, 0xe9, 0xc2,
- 0x26, 0x51, 0x0e, 0xbf, 0xe0, 0xc3, 0x0c, 0x5b, 0x0e, 0xbf, 0xd9, 0xc3,
- 0x06, 0x9e, 0x0e, 0xbf, 0xd0, 0xc4, 0x04, 0x5e, 0x0e, 0xbf, 0xc9, 0xc2,
- 0x01, 0x47, 0x0e, 0xbf, 0xc0, 0x46, 0x06, 0x97, 0xc2, 0xff, 0x76, 0x47,
- 0xca, 0xe1, 0xc2, 0xff, 0x9a, 0x12, 0xc2, 0xff, 0xc8, 0xca, 0xa6, 0xda,
- 0x0e, 0xbc, 0x71, 0xcc, 0x8b, 0x48, 0x0e, 0xbc, 0x61, 0xcc, 0x89, 0x80,
- 0x0e, 0xbc, 0x59, 0xce, 0x12, 0x11, 0x0e, 0xbc, 0x51, 0x46, 0x04, 0x73,
- 0xc2, 0xff, 0xda, 0xc5, 0xdc, 0x87, 0x0e, 0xbb, 0x79, 0x48, 0x07, 0x17,
- 0x43, 0x00, 0x7e, 0xc4, 0x22, 0x71, 0x0e, 0xbf, 0x59, 0xc5, 0x01, 0xdb,
- 0x0e, 0xbf, 0x51, 0x15, 0xc3, 0x01, 0x1f, 0x08, 0xc3, 0x01, 0x2b, 0x16,
- 0xc3, 0x01, 0x37, 0xc3, 0x01, 0xb4, 0x0e, 0xbf, 0x19, 0xc4, 0x15, 0xd3,
- 0x0e, 0xbf, 0x10, 0x46, 0x04, 0x73, 0xc3, 0x01, 0x43, 0x48, 0x07, 0x17,
- 0x43, 0x01, 0xab, 0x9c, 0x0e, 0xae, 0x99, 0x9b, 0x0e, 0xae, 0x91, 0x9a,
- 0x0e, 0xae, 0x89, 0x99, 0x0e, 0xae, 0x81, 0x98, 0x0e, 0xae, 0x79, 0x97,
- 0x0e, 0xae, 0x71, 0x96, 0x0e, 0xae, 0x69, 0x95, 0x0e, 0xae, 0x61, 0x94,
- 0x0e, 0xae, 0x59, 0x93, 0x0e, 0xae, 0x51, 0x92, 0x0e, 0xae, 0x49, 0x91,
- 0x0e, 0xae, 0x41, 0x90, 0x0e, 0xae, 0x39, 0x8f, 0x0e, 0xae, 0x31, 0x8e,
- 0x0e, 0xae, 0x29, 0x8d, 0x0e, 0xae, 0x21, 0x8c, 0x0e, 0xae, 0x19, 0x8b,
- 0x0e, 0xae, 0x11, 0x8a, 0x0e, 0xae, 0x09, 0x89, 0x0e, 0xae, 0x01, 0x88,
- 0x0e, 0xad, 0xf9, 0x87, 0x0e, 0xad, 0xf1, 0x86, 0x0e, 0xad, 0xe9, 0x85,
- 0x0e, 0xad, 0xe1, 0x84, 0x0e, 0xad, 0xd9, 0x83, 0x0e, 0xad, 0xd0, 0x9c,
- 0x0e, 0xad, 0xc9, 0x9b, 0x0e, 0xad, 0xc1, 0x9a, 0x0e, 0xad, 0xb9, 0x99,
- 0x0e, 0xad, 0xb1, 0x98, 0x0e, 0xad, 0xa9, 0x97, 0x0e, 0xad, 0xa1, 0x96,
- 0x0e, 0xad, 0x99, 0x95, 0x0e, 0xad, 0x91, 0x94, 0x0e, 0xad, 0x89, 0x93,
- 0x0e, 0xad, 0x81, 0x92, 0x0e, 0xad, 0x79, 0x91, 0x0e, 0xad, 0x71, 0x90,
- 0x0e, 0xad, 0x69, 0x8f, 0x0e, 0xad, 0x61, 0x8e, 0x0e, 0xad, 0x59, 0x8d,
- 0x0e, 0xad, 0x51, 0x8c, 0x0e, 0xad, 0x49, 0x8b, 0x0e, 0xad, 0x41, 0x8a,
- 0x0e, 0xad, 0x39, 0x89, 0x0e, 0xad, 0x31, 0x88, 0x0e, 0xad, 0x29, 0x87,
- 0x0e, 0xad, 0x21, 0x86, 0x0e, 0xad, 0x19, 0x85, 0x0e, 0xad, 0x11, 0x84,
- 0x0e, 0xad, 0x09, 0x83, 0x0e, 0xad, 0x00, 0x9c, 0x0e, 0xa6, 0x79, 0x9b,
- 0x0e, 0xa6, 0x71, 0x9a, 0x0e, 0xa6, 0x69, 0x99, 0x0e, 0xa6, 0x61, 0x98,
- 0x0e, 0xa6, 0x59, 0x97, 0x0e, 0xa6, 0x51, 0x96, 0x0e, 0xa6, 0x49, 0x95,
- 0x0e, 0xa6, 0x41, 0x94, 0x0e, 0xa6, 0x39, 0x93, 0x0e, 0xa6, 0x31, 0x92,
- 0x0e, 0xa6, 0x29, 0x90, 0x0e, 0xa6, 0x19, 0x8f, 0x0e, 0xa6, 0x11, 0x8e,
- 0x0e, 0xa6, 0x09, 0x8d, 0x0e, 0xa6, 0x01, 0x8c, 0x0e, 0xa5, 0xf9, 0x8b,
- 0x0e, 0xa5, 0xf1, 0x8a, 0x0e, 0xa5, 0xe9, 0x88, 0x0e, 0xa5, 0xd9, 0x86,
- 0x0e, 0xa5, 0xc9, 0x85, 0x0e, 0xa5, 0xc1, 0x84, 0x0e, 0xa5, 0xb9, 0x83,
- 0x0e, 0xa5, 0xb0, 0x9c, 0x0e, 0xa5, 0xa9, 0x9b, 0x0e, 0xa5, 0xa1, 0x9a,
- 0x0e, 0xa5, 0x99, 0x99, 0x0e, 0xa5, 0x91, 0x98, 0x0e, 0xa5, 0x89, 0x97,
- 0x0e, 0xa5, 0x81, 0x96, 0x0e, 0xa5, 0x79, 0x95, 0x0e, 0xa5, 0x71, 0x93,
- 0x0e, 0xa5, 0x61, 0x92, 0x0e, 0xa5, 0x59, 0x91, 0x0e, 0xa5, 0x51, 0x90,
- 0x0e, 0xa5, 0x49, 0x8d, 0x0e, 0xa5, 0x31, 0x8c, 0x0e, 0xa5, 0x29, 0x89,
- 0x0e, 0xa5, 0x11, 0x86, 0x0e, 0xa4, 0xf9, 0x85, 0x0e, 0xa4, 0xf1, 0x83,
- 0x0e, 0xa4, 0xe0, 0xc4, 0x18, 0x83, 0x0e, 0xbe, 0xf9, 0xc2, 0x26, 0x51,
- 0x0e, 0xbe, 0xf0, 0xc3, 0x0c, 0x5b, 0x0e, 0xbe, 0xe9, 0xc3, 0x06, 0x9e,
- 0x0e, 0xbe, 0xe0, 0xc4, 0x04, 0x5e, 0x0e, 0xbe, 0xd9, 0xc2, 0x01, 0x47,
- 0x0e, 0xbe, 0xd0, 0x9c, 0x0e, 0xa9, 0xb9, 0x9b, 0x0e, 0xa9, 0xb1, 0x9a,
- 0x0e, 0xa9, 0xa9, 0x99, 0x0e, 0xa9, 0xa1, 0x98, 0x0e, 0xa9, 0x99, 0x97,
- 0x0e, 0xa9, 0x91, 0x96, 0x0e, 0xa9, 0x89, 0x95, 0x0e, 0xa9, 0x81, 0x94,
- 0x0e, 0xa9, 0x79, 0x93, 0x0e, 0xa9, 0x71, 0x92, 0x0e, 0xa9, 0x69, 0x91,
- 0x0e, 0xa9, 0x61, 0x90, 0x0e, 0xa9, 0x59, 0x8f, 0x0e, 0xa9, 0x51, 0x8e,
- 0x0e, 0xa9, 0x49, 0x8d, 0x0e, 0xa9, 0x41, 0x8c, 0x0e, 0xa9, 0x39, 0x8b,
- 0x0e, 0xa9, 0x31, 0x8a, 0x0e, 0xa9, 0x29, 0x89, 0x0e, 0xa9, 0x21, 0x88,
- 0x0e, 0xa9, 0x19, 0x87, 0x0e, 0xa9, 0x11, 0x86, 0x0e, 0xa9, 0x09, 0x85,
- 0x0e, 0xa9, 0x01, 0x84, 0x0e, 0xa8, 0xf9, 0x83, 0x0e, 0xa8, 0xf0, 0x9b,
- 0x0e, 0xa8, 0xe1, 0x9a, 0x0e, 0xa8, 0xd9, 0x99, 0x0e, 0xa8, 0xd1, 0x98,
- 0x0e, 0xa8, 0xc9, 0x97, 0x0e, 0xa8, 0xc1, 0x96, 0x0e, 0xa8, 0xb9, 0x95,
- 0x0e, 0xa8, 0xb1, 0x93, 0x0e, 0xa8, 0xa1, 0x92, 0x0e, 0xa8, 0x99, 0x91,
- 0x0e, 0xa8, 0x91, 0x90, 0x0e, 0xa8, 0x89, 0x8f, 0x0e, 0xa8, 0x81, 0x8e,
- 0x0e, 0xa8, 0x79, 0x8d, 0x0e, 0xa8, 0x71, 0x8c, 0x0e, 0xa8, 0x69, 0x89,
- 0x0e, 0xa8, 0x51, 0x88, 0x0e, 0xa8, 0x49, 0x87, 0x0e, 0xa8, 0x41, 0x86,
- 0x0e, 0xa8, 0x39, 0x84, 0x0e, 0xa8, 0x29, 0x83, 0x0e, 0xa8, 0x20, 0xd6,
- 0x0a, 0x88, 0x01, 0x3f, 0x69, 0xce, 0x24, 0xb2, 0x01, 0x3f, 0x38, 0x97,
- 0x08, 0xe9, 0xf9, 0x8b, 0x08, 0xe9, 0xe1, 0x83, 0x08, 0xe9, 0x88, 0x97,
- 0x08, 0xe9, 0xa8, 0x8b, 0x08, 0xe9, 0x98, 0xc2, 0x00, 0xa4, 0x08, 0xe8,
- 0xb9, 0x83, 0x08, 0xe8, 0xb0, 0xc2, 0x00, 0xa4, 0x08, 0xe8, 0xc9, 0x83,
- 0x08, 0xe8, 0xc0, 0x83, 0x08, 0xe5, 0x69, 0xc2, 0x00, 0xa4, 0x08, 0xe5,
- 0x60, 0x83, 0x08, 0xe5, 0x39, 0xc2, 0x00, 0xa4, 0x08, 0xe5, 0x30, 0xc2,
- 0x02, 0xb4, 0x08, 0xe5, 0x21, 0x83, 0x08, 0xe4, 0xe0, 0x15, 0xc3, 0x02,
- 0x13, 0xc2, 0x00, 0xa4, 0x08, 0xe4, 0xd9, 0x83, 0x08, 0xe4, 0xd0, 0xc2,
- 0x00, 0xa4, 0x08, 0xe4, 0xf9, 0x83, 0x08, 0xe4, 0xf0, 0x83, 0x08, 0xe4,
- 0xe9, 0xc2, 0x1d, 0x5f, 0x08, 0xe4, 0xc9, 0xc2, 0x01, 0x29, 0x08, 0xe4,
- 0xa8, 0xc2, 0x00, 0xa4, 0x08, 0xe4, 0xb9, 0x83, 0x08, 0xe4, 0xb0, 0xc2,
- 0x00, 0xa4, 0x08, 0xe4, 0x99, 0x83, 0x08, 0xe4, 0x90, 0xc2, 0x00, 0xa4,
- 0x08, 0xe4, 0x19, 0x83, 0x08, 0xe4, 0x10, 0xc5, 0x44, 0x7b, 0x00, 0x68,
- 0x19, 0xc4, 0x0f, 0x7c, 0x00, 0x6a, 0x68, 0x94, 0x00, 0x68, 0x5b, 0x03,
- 0x02, 0x1d, 0x8e, 0x00, 0x68, 0x62, 0x03, 0x02, 0x21, 0x83, 0x00, 0x69,
- 0x19, 0xc2, 0x00, 0xc1, 0x00, 0x69, 0x48, 0x83, 0x00, 0x68, 0xd9, 0x45,
- 0xde, 0xa3, 0x43, 0x02, 0x25, 0x83, 0x00, 0x68, 0xf9, 0xc2, 0x00, 0xa4,
- 0x00, 0x69, 0x01, 0xc2, 0x00, 0xb3, 0x00, 0x69, 0xd0, 0x83, 0x00, 0x69,
- 0x09, 0xc2, 0x00, 0xa4, 0x00, 0x69, 0x10, 0x83, 0x00, 0x69, 0x99, 0xc2,
- 0x00, 0xc7, 0x00, 0x69, 0xa0, 0x94, 0x00, 0x6a, 0x20, 0x8e, 0x00, 0x6b,
- 0x18, 0xc7, 0xc7, 0x76, 0x00, 0x6a, 0xc9, 0xc4, 0x9c, 0x2b, 0x00, 0x6a,
- 0xf0, 0xc8, 0x1f, 0x4d, 0x00, 0x6a, 0xd9, 0xc4, 0x0a, 0x04, 0x00, 0x6a,
- 0xe0, 0xc2, 0x01, 0x47, 0x00, 0x6b, 0x41, 0xc4, 0x04, 0x5e, 0x00, 0x6b,
- 0x48, 0xc3, 0x06, 0x9e, 0x00, 0x6b, 0x51, 0xc3, 0x0c, 0x5b, 0x00, 0x6b,
- 0x58, 0xc2, 0x26, 0x51, 0x00, 0x6b, 0x61, 0xc4, 0x18, 0x83, 0x00, 0x6b,
- 0x68, 0xcb, 0x44, 0x62, 0x08, 0x57, 0x98, 0xc3, 0xdf, 0x4a, 0x08, 0x56,
- 0xe9, 0xc4, 0xd9, 0x77, 0x08, 0x56, 0xc0, 0x96, 0x00, 0x42, 0x40, 0x8a,
- 0x00, 0x42, 0xa1, 0x9c, 0x00, 0x42, 0x88, 0xc2, 0x0c, 0x65, 0x00, 0x42,
- 0x48, 0xc2, 0x02, 0x59, 0x08, 0x8b, 0x91, 0x83, 0x08, 0x8b, 0x68, 0xc2,
- 0x00, 0xa4, 0x08, 0x8b, 0x59, 0x83, 0x08, 0x8b, 0x50, 0xc2, 0x00, 0xa4,
- 0x08, 0x8b, 0x49, 0x83, 0x08, 0x8b, 0x40, 0x83, 0x08, 0x8b, 0x39, 0xc2,
- 0x00, 0xc1, 0x08, 0x8b, 0x11, 0xc2, 0x1d, 0x5f, 0x08, 0x8a, 0xe8, 0xc2,
- 0x00, 0xa4, 0x08, 0x8b, 0x31, 0x83, 0x08, 0x8b, 0x29, 0x06, 0x43, 0x02,
- 0x45, 0xc2, 0x00, 0xa4, 0x08, 0x8b, 0x21, 0x83, 0x08, 0x8b, 0x19, 0x16,
- 0x43, 0x02, 0x4f, 0xc2, 0x00, 0xa4, 0x08, 0x8a, 0xe1, 0x83, 0x08, 0x8a,
- 0xd8, 0xc2, 0x00, 0xa4, 0x08, 0x8a, 0xd1, 0x83, 0x08, 0x8a, 0xc8, 0xc2,
- 0x00, 0xa4, 0x08, 0x8a, 0xc1, 0x83, 0x08, 0x8a, 0xb8, 0xc2, 0x00, 0xa4,
- 0x08, 0x8a, 0xb1, 0x83, 0x08, 0x8a, 0xa8, 0xc9, 0xa9, 0xcd, 0x0f, 0x80,
- 0x71, 0xc6, 0x39, 0x45, 0x0f, 0x81, 0x00, 0xc9, 0xa9, 0xcd, 0x0f, 0x80,
- 0x61, 0xc6, 0x39, 0x45, 0x0f, 0x80, 0xf0, 0xc9, 0xa9, 0xcd, 0x0f, 0x80,
- 0x69, 0xc6, 0x39, 0x45, 0x0f, 0x80, 0xf8, 0xc9, 0xa9, 0xcd, 0x0f, 0x80,
- 0x79, 0xc6, 0x39, 0x45, 0x0f, 0x81, 0x08, 0xc9, 0xa9, 0xcd, 0x0f, 0x80,
- 0x39, 0xc6, 0x39, 0x45, 0x0f, 0x80, 0xc8, 0xc9, 0xa9, 0xcd, 0x0f, 0x80,
- 0x41, 0xc6, 0x39, 0x45, 0x0f, 0x80, 0xd0, 0xc9, 0xa9, 0xcd, 0x0f, 0x80,
- 0x49, 0xc6, 0x39, 0x45, 0x0f, 0x80, 0xd8, 0xc9, 0xa9, 0xcd, 0x0f, 0x80,
- 0x51, 0xc6, 0x39, 0x45, 0x0f, 0x80, 0xe0, 0xc9, 0xa9, 0xcd, 0x0f, 0x80,
- 0x59, 0xc6, 0x39, 0x45, 0x0f, 0x80, 0xe8, 0x0d, 0xc3, 0x02, 0x59, 0x15,
- 0xc3, 0x02, 0x65, 0x12, 0xc3, 0x02, 0x8c, 0x16, 0xc3, 0x02, 0xa6, 0x05,
- 0xc3, 0x02, 0xcf, 0x18, 0xc3, 0x02, 0xf3, 0x09, 0xc3, 0x02, 0xff, 0x0f,
- 0xc3, 0x03, 0x12, 0x04, 0xc3, 0x03, 0x33, 0x0e, 0xc3, 0x03, 0x3d, 0x08,
- 0xc3, 0x03, 0x4c, 0x06, 0xc3, 0x03, 0x72, 0x19, 0xc3, 0x03, 0x86, 0x42,
- 0x00, 0xa4, 0xc3, 0x03, 0x92, 0x07, 0xc3, 0x03, 0x9e, 0x10, 0xc3, 0x03,
- 0xaa, 0x11, 0xc3, 0x03, 0xc2, 0xcd, 0x77, 0xf3, 0x0e, 0x8c, 0xc1, 0x9c,
- 0x0e, 0x8c, 0x71, 0x14, 0xc3, 0x03, 0xd4, 0x4b, 0x98, 0x72, 0xc3, 0x03,
- 0xdc, 0x42, 0x00, 0x90, 0xc3, 0x03, 0xe8, 0xca, 0x5c, 0x58, 0x0e, 0x8a,
- 0x18, 0x00, 0x43, 0x03, 0xf4, 0xc2, 0x00, 0xa4, 0x08, 0x94, 0xa1, 0xc2,
- 0x0b, 0xc6, 0x08, 0x94, 0x99, 0x83, 0x08, 0x94, 0x90, 0x83, 0x08, 0x94,
- 0x81, 0xc2, 0x00, 0xa4, 0x08, 0x94, 0x88, 0x9f, 0x00, 0x84, 0x39, 0xa0,
- 0x00, 0x84, 0x41, 0xa2, 0x00, 0x84, 0x49, 0xa3, 0x00, 0x84, 0x50, 0x45,
- 0x27, 0x0d, 0xc3, 0x04, 0x00, 0xcd, 0x79, 0xe1, 0x00, 0x84, 0x68, 0xc2,
- 0x02, 0x59, 0x05, 0x53, 0x99, 0xc2, 0x02, 0xb4, 0x05, 0x53, 0x91, 0xc2,
- 0x96, 0xd0, 0x05, 0x53, 0x89, 0xc2, 0x00, 0x67, 0x05, 0x53, 0x79, 0xc3,
- 0x14, 0x4e, 0x05, 0x53, 0x71, 0x0a, 0xc3, 0x04, 0x08, 0xc2, 0x0c, 0x65,
- 0x05, 0x53, 0x61, 0x10, 0xc3, 0x04, 0x12, 0x06, 0xc3, 0x04, 0x1c, 0x42,
- 0x02, 0x92, 0xc3, 0x04, 0x26, 0x0c, 0xc3, 0x04, 0x30, 0x05, 0xc3, 0x04,
- 0x3a, 0xc2, 0x01, 0x29, 0x05, 0x53, 0x00, 0x04, 0xc3, 0x04, 0x44, 0x06,
- 0xc3, 0x04, 0x4e, 0xc3, 0x14, 0x4e, 0x05, 0x4f, 0xd1, 0x10, 0xc3, 0x04,
- 0x5c, 0x0c, 0xc3, 0x04, 0x68, 0x09, 0xc3, 0x04, 0x72, 0xc2, 0x00, 0x67,
- 0x05, 0x4f, 0x40, 0x42, 0x00, 0x4e, 0x43, 0x04, 0x7c, 0xc3, 0x1b, 0xb6,
- 0x05, 0x53, 0xf1, 0xc3, 0x02, 0x28, 0x05, 0x53, 0xf8, 0x83, 0x00, 0x82,
- 0xe1, 0x87, 0x00, 0x82, 0xe8, 0xce, 0x29, 0x88, 0x0f, 0xd0, 0xb1, 0xdb,
- 0x18, 0x76, 0x0f, 0xd2, 0x00, 0x49, 0x29, 0x7f, 0x43, 0x04, 0xbe, 0x49,
- 0x29, 0x7f, 0x43, 0x04, 0xca, 0xce, 0x29, 0x88, 0x0f, 0xd0, 0xc1, 0xdb,
- 0x18, 0x76, 0x0f, 0xd2, 0x10, 0xce, 0x29, 0x88, 0x0f, 0xd0, 0xb9, 0xdb,
- 0x18, 0x76, 0x0f, 0xd2, 0x08, 0xce, 0x29, 0x88, 0x0f, 0xd0, 0xd1, 0xdb,
- 0x18, 0x76, 0x0f, 0xd2, 0x20, 0xc3, 0x00, 0x34, 0x0f, 0xd1, 0x51, 0xc5,
- 0x7c, 0xf9, 0x0f, 0xd1, 0x70, 0xcb, 0x98, 0x9e, 0x08, 0xa3, 0x09, 0xcb,
- 0x91, 0x66, 0x08, 0xa3, 0x01, 0xce, 0x71, 0x51, 0x08, 0xa2, 0x41, 0x03,
- 0xc3, 0x04, 0xe0, 0xc5, 0x35, 0x00, 0x08, 0xa2, 0x31, 0x42, 0x03, 0x32,
- 0xc3, 0x04, 0xec, 0xcb, 0x1e, 0x17, 0x08, 0xa2, 0x18, 0x8e, 0x08, 0xa0,
- 0x43, 0x03, 0x04, 0xf8, 0x94, 0x08, 0xa0, 0x32, 0x03, 0x04, 0xfc, 0xc2,
- 0x00, 0xa4, 0x08, 0xa0, 0xc9, 0x83, 0x08, 0xa0, 0xc0, 0xc2, 0x00, 0xa4,
- 0x08, 0xa0, 0x99, 0x83, 0x08, 0xa0, 0x90, 0xc2, 0x00, 0xa4, 0x08, 0xa0,
- 0xe9, 0x83, 0x08, 0xa0, 0xe0, 0xc2, 0x00, 0xa4, 0x08, 0xa0, 0xd9, 0x83,
- 0x08, 0xa0, 0xd0, 0xc4, 0x18, 0x83, 0x08, 0xa2, 0xb9, 0xc2, 0x26, 0x51,
- 0x08, 0xa2, 0xb0, 0xc3, 0x0c, 0x5b, 0x08, 0xa2, 0xa9, 0xc3, 0x06, 0x9e,
- 0x08, 0xa2, 0xa0, 0xc4, 0x04, 0x5e, 0x08, 0xa2, 0x99, 0xc2, 0x01, 0x47,
- 0x08, 0xa2, 0x90, 0x8e, 0x08, 0xa1, 0xe8, 0x94, 0x08, 0xa1, 0xd8, 0x9f,
- 0x00, 0xce, 0x49, 0x9e, 0x00, 0xce, 0x40, 0xc4, 0x18, 0x83, 0x00, 0xce,
- 0xb9, 0xc2, 0x26, 0x51, 0x00, 0xce, 0xb0, 0xc3, 0x0c, 0x5b, 0x00, 0xce,
- 0xa9, 0xc3, 0x06, 0x9e, 0x00, 0xce, 0xa0, 0xc4, 0x04, 0x5e, 0x00, 0xce,
- 0x99, 0xc2, 0x01, 0x47, 0x00, 0xce, 0x90, 0x84, 0x00, 0xce, 0x39, 0x86,
- 0x00, 0xce, 0x31, 0x8d, 0x00, 0xce, 0x29, 0x8f, 0x00, 0xce, 0x21, 0x90,
- 0x00, 0xce, 0x1b, 0x03, 0x05, 0x00, 0x98, 0x00, 0xce, 0x08, 0x15, 0xc3,
- 0x05, 0x04, 0x1a, 0xc3, 0x05, 0x0e, 0x0d, 0xc3, 0x05, 0x18, 0xc2, 0x00,
- 0xde, 0x00, 0xcd, 0x29, 0xc2, 0x0b, 0xc6, 0x00, 0xcd, 0x21, 0xc2, 0x00,
- 0xa4, 0x00, 0xcd, 0x19, 0xc2, 0x00, 0xc7, 0x00, 0xcc, 0xf9, 0xc2, 0x04,
- 0x41, 0x00, 0xcc, 0xf1, 0xc2, 0x00, 0x67, 0x00, 0xcc, 0xe9, 0xc2, 0x00,
- 0xad, 0x00, 0xcc, 0xc9, 0x12, 0xc3, 0x05, 0x22, 0x10, 0xc3, 0x05, 0x2c,
- 0x16, 0xc3, 0x05, 0x36, 0xc2, 0x1d, 0x5f, 0x00, 0xcc, 0x69, 0xc2, 0x0f,
- 0x60, 0x00, 0xcc, 0x08, 0x15, 0xc3, 0x05, 0x46, 0x1a, 0xc3, 0x05, 0x50,
- 0x0d, 0xc3, 0x05, 0x5a, 0xc2, 0x00, 0xde, 0x00, 0xcd, 0x11, 0xc2, 0x0b,
- 0xc6, 0x00, 0xcd, 0x09, 0xc2, 0x00, 0xa4, 0x00, 0xcd, 0x01, 0xc2, 0x00,
- 0xc7, 0x00, 0xcc, 0xe1, 0xc2, 0x04, 0x41, 0x00, 0xcc, 0xd9, 0xc2, 0x00,
- 0x67, 0x00, 0xcc, 0xd1, 0xc2, 0x00, 0xad, 0x00, 0xcc, 0xb1, 0x12, 0xc3,
- 0x05, 0x64, 0x10, 0xc3, 0x05, 0x6e, 0x16, 0xc3, 0x05, 0x78, 0xc2, 0x1d,
- 0x5f, 0x00, 0xcc, 0x51, 0xc2, 0x0f, 0x60, 0x00, 0xcc, 0x00, 0x9b, 0x00,
- 0xce, 0x01, 0x8b, 0x00, 0xcd, 0x90, 0x87, 0x00, 0xcd, 0xcb, 0x03, 0x05,
- 0x88, 0x9b, 0x00, 0xcd, 0xe1, 0x97, 0x00, 0xcd, 0xa0, 0x83, 0x00, 0xcd,
- 0xc3, 0x03, 0x05, 0x8c, 0x9b, 0x00, 0xcd, 0xe8, 0x83, 0x00, 0xcd, 0x8b,
- 0x03, 0x05, 0x90, 0x9b, 0x00, 0xcd, 0xd1, 0x87, 0x00, 0xcd, 0xb0, 0x42,
- 0x00, 0x4b, 0xc3, 0x05, 0x94, 0xc7, 0x53, 0x70, 0x01, 0x27, 0x68, 0xc7,
- 0x20, 0xbe, 0x01, 0x27, 0x91, 0xc5, 0x6a, 0x79, 0x01, 0x27, 0x58, 0xc8,
- 0x48, 0x4e, 0x01, 0x27, 0x89, 0xc6, 0x45, 0x38, 0x01, 0x27, 0x80, 0xc6,
- 0x13, 0xf0, 0x01, 0x27, 0x79, 0xc7, 0x56, 0x07, 0x01, 0x27, 0x70, 0x94,
- 0x08, 0xcd, 0x38, 0xc2, 0x00, 0xa4, 0x08, 0xcd, 0xd9, 0x83, 0x08, 0xcd,
- 0xd0, 0xc2, 0x00, 0xa4, 0x08, 0xcd, 0xc9, 0x83, 0x08, 0xcd, 0xc0, 0xc4,
- 0x18, 0x85, 0x08, 0x45, 0x71, 0x91, 0x08, 0x45, 0x40, 0xc3, 0xdf, 0x4a,
- 0x08, 0x44, 0xc9, 0xc4, 0xd9, 0x77, 0x08, 0x44, 0xb0, 0xc3, 0xe6, 0xf7,
- 0x0f, 0xb3, 0x11, 0xc9, 0xac, 0x43, 0x0f, 0xb2, 0xd1, 0xc4, 0x48, 0xc8,
- 0x0f, 0xb2, 0x90, 0xc4, 0x01, 0x10, 0x01, 0x0c, 0xbb, 0x03, 0x05, 0xa0,
- 0xd3, 0x3a, 0x0e, 0x01, 0x49, 0x10, 0xc7, 0x11, 0xa4, 0x01, 0x5b, 0xb8,
- 0xc4, 0x01, 0x10, 0x01, 0x0c, 0xb3, 0x03, 0x05, 0xa4, 0xd3, 0x3a, 0x22,
- 0x01, 0x49, 0x08, 0xc3, 0xe6, 0xf7, 0x0f, 0xb3, 0x01, 0xc9, 0xac, 0x43,
- 0x0f, 0xb2, 0xc1, 0xc4, 0x48, 0xc8, 0x0f, 0xb2, 0x80, 0xc7, 0x11, 0xa4,
- 0x01, 0x5b, 0xb0, 0x44, 0x01, 0xb4, 0xc3, 0x05, 0xa8, 0x46, 0x04, 0x5d,
- 0x43, 0x05, 0xc0, 0xc9, 0xae, 0xe6, 0x05, 0x41, 0xb1, 0xca, 0x9d, 0x8e,
- 0x05, 0x41, 0xc8, 0x86, 0x0f, 0xae, 0x39, 0xc2, 0x00, 0x63, 0x0f, 0xae,
- 0x30, 0xcd, 0x7b, 0xa8, 0x0f, 0x98, 0x79, 0xc7, 0xc2, 0x3d, 0x0f, 0x98,
- 0x70, 0x00, 0x43, 0x05, 0xcc, 0x00, 0x43, 0x05, 0xe1, 0x45, 0x00, 0x56,
- 0x43, 0x05, 0xf6, 0x83, 0x09, 0x8f, 0x50, 0x84, 0x09, 0x8f, 0x11, 0x83,
- 0x09, 0x8f, 0x08, 0x83, 0x09, 0x8e, 0xf0, 0x83, 0x09, 0x8e, 0xd0, 0x83,
- 0x09, 0x8e, 0xa8, 0x83, 0x09, 0x8e, 0x90, 0x83, 0x09, 0x8e, 0x60, 0x83,
- 0x09, 0x8e, 0x50, 0x83, 0x09, 0x8e, 0x40, 0x8a, 0x09, 0x8e, 0x21, 0x89,
- 0x09, 0x8e, 0x19, 0x88, 0x09, 0x8e, 0x11, 0x87, 0x09, 0x8e, 0x09, 0x86,
- 0x09, 0x8e, 0x01, 0x85, 0x09, 0x8d, 0xf9, 0x84, 0x09, 0x8d, 0xf1, 0x83,
- 0x09, 0x8d, 0xe8, 0x83, 0x09, 0x8d, 0xd0, 0x83, 0x09, 0x8d, 0x90, 0x84,
- 0x09, 0x8d, 0x79, 0x83, 0x09, 0x8d, 0x70, 0x83, 0x09, 0x9e, 0x68, 0x83,
- 0x09, 0x9e, 0x30, 0x83, 0x09, 0x9e, 0x20, 0x83, 0x09, 0x9e, 0x00, 0x83,
- 0x09, 0x9d, 0xd8, 0x83, 0x09, 0x9d, 0xc8, 0x83, 0x09, 0x9d, 0x90, 0x83,
- 0x09, 0x99, 0x78, 0x83, 0x09, 0x99, 0x68, 0x83, 0x09, 0x98, 0xe0, 0x83,
- 0x09, 0x98, 0xb0, 0x83, 0x09, 0x98, 0x98, 0x83, 0x09, 0x98, 0x88, 0x83,
- 0x09, 0x98, 0x78, 0x83, 0x09, 0x98, 0x50, 0x85, 0x09, 0x89, 0xe1, 0x84,
- 0x09, 0x89, 0xd9, 0x83, 0x09, 0x89, 0xd0, 0x83, 0x09, 0x89, 0xa8, 0x83,
- 0x09, 0x89, 0x98, 0x83, 0x09, 0x89, 0x88, 0x83, 0x09, 0x89, 0x48, 0x83,
- 0x09, 0x89, 0x38, 0x83, 0x09, 0x89, 0x00, 0x83, 0x09, 0x88, 0xa8, 0x83,
- 0x09, 0x88, 0x60, 0x83, 0x09, 0x87, 0xf8, 0x83, 0x09, 0x87, 0xd0, 0x83,
- 0x09, 0x87, 0x98, 0x83, 0x09, 0x87, 0x50, 0x83, 0x09, 0x87, 0x30, 0x83,
- 0x09, 0x87, 0x20, 0x83, 0x09, 0x86, 0xe0, 0x83, 0x09, 0x86, 0xd0, 0xc3,
- 0x00, 0xef, 0x09, 0xa1, 0xa9, 0xc5, 0xa0, 0x46, 0x09, 0xa1, 0x98, 0xc3,
- 0x00, 0xef, 0x09, 0xa1, 0xa1, 0xc5, 0xa0, 0x46, 0x09, 0xa1, 0x90, 0x83,
- 0x09, 0x8c, 0xb0, 0x83, 0x09, 0x8c, 0xa0, 0x83, 0x09, 0x8c, 0x90, 0x83,
- 0x09, 0x8c, 0x68, 0x84, 0x09, 0x94, 0xc9, 0x83, 0x09, 0x94, 0xc0, 0x86,
- 0x09, 0x94, 0x49, 0x85, 0x09, 0x94, 0x41, 0x84, 0x09, 0x94, 0x39, 0x83,
- 0x09, 0x94, 0x30, 0x83, 0x09, 0x94, 0x10, 0x83, 0x09, 0x93, 0xf0, 0x83,
- 0x09, 0x93, 0xe0, 0x83, 0x09, 0x93, 0xb8, 0x83, 0x09, 0x93, 0xa8, 0x83,
- 0x09, 0x93, 0x80, 0x83, 0x09, 0x93, 0x70, 0x85, 0x09, 0x93, 0x21, 0x84,
- 0x09, 0x93, 0x19, 0x83, 0x09, 0x93, 0x10, 0x88, 0x09, 0x92, 0xe9, 0x87,
- 0x09, 0x92, 0xe1, 0x86, 0x09, 0x92, 0xd9, 0x85, 0x09, 0x92, 0xd1, 0x84,
- 0x09, 0x92, 0xc9, 0x83, 0x09, 0x92, 0xc0, 0x83, 0x09, 0x92, 0xb0, 0x83,
- 0x09, 0x92, 0x88, 0x86, 0x09, 0xa1, 0x79, 0x85, 0x09, 0x9b, 0xd9, 0x84,
- 0x09, 0x9b, 0xd1, 0x83, 0x09, 0x9b, 0xc8, 0x84, 0x09, 0x9b, 0xf1, 0x83,
- 0x09, 0x9b, 0xe8, 0x83, 0x09, 0x9d, 0x50, 0x83, 0x09, 0x9d, 0x28, 0x83,
- 0x09, 0x9d, 0x10, 0x83, 0x09, 0x9d, 0x00, 0x83, 0x09, 0x9c, 0xf0, 0x83,
- 0x09, 0x9c, 0xe0, 0x83, 0x09, 0x9c, 0xb0, 0x8e, 0x09, 0x9c, 0x91, 0x8d,
- 0x09, 0x9c, 0x89, 0x8c, 0x09, 0x9c, 0x81, 0x8b, 0x09, 0x9c, 0x79, 0x8a,
- 0x09, 0x9c, 0x71, 0x89, 0x09, 0x9c, 0x69, 0x88, 0x09, 0x9c, 0x61, 0x87,
- 0x09, 0x9c, 0x59, 0x86, 0x09, 0x9c, 0x51, 0x85, 0x09, 0x9c, 0x49, 0x84,
- 0x09, 0x9c, 0x41, 0x83, 0x09, 0x9c, 0x38, 0x84, 0x09, 0x9b, 0xa1, 0x83,
- 0x09, 0x9b, 0x98, 0x83, 0x09, 0x9b, 0x68, 0x8b, 0x09, 0x9b, 0x59, 0x8a,
- 0x09, 0x9b, 0x51, 0x89, 0x09, 0x9b, 0x49, 0x88, 0x09, 0x9b, 0x41, 0x87,
- 0x09, 0x9b, 0x39, 0x86, 0x09, 0x9b, 0x31, 0x85, 0x09, 0x9b, 0x29, 0x84,
- 0x09, 0x9b, 0x21, 0x83, 0x09, 0x9b, 0x18, 0x84, 0x09, 0xa0, 0xa9, 0x83,
- 0x09, 0xa0, 0xa0, 0x83, 0x09, 0x81, 0xb0, 0x83, 0x09, 0x81, 0x98, 0x83,
- 0x09, 0x81, 0x88, 0x83, 0x09, 0x81, 0x70, 0x83, 0x09, 0x81, 0x28, 0x83,
- 0x09, 0x80, 0xa8, 0x83, 0x09, 0x80, 0x88, 0x84, 0x09, 0x80, 0x41, 0x83,
- 0x09, 0x80, 0x38, 0x83, 0x09, 0x80, 0x28, 0x83, 0x09, 0x92, 0x78, 0x83,
- 0x09, 0x92, 0x50, 0x83, 0x09, 0x92, 0x10, 0x83, 0x09, 0x92, 0x00, 0x83,
- 0x09, 0x91, 0x90, 0x83, 0x09, 0x91, 0x28, 0x83, 0x09, 0x90, 0xd0, 0x83,
- 0x09, 0x90, 0xb8, 0x83, 0x09, 0x90, 0xa8, 0x83, 0x09, 0x90, 0x98, 0x83,
- 0x09, 0x90, 0x50, 0x84, 0x09, 0x90, 0x11, 0x83, 0x09, 0x90, 0x08, 0x42,
- 0x08, 0xcb, 0xc3, 0x06, 0x02, 0x42, 0x49, 0x49, 0xc3, 0x06, 0x0c, 0x42,
- 0xc2, 0x51, 0xc3, 0x06, 0x16, 0x42, 0x01, 0xa6, 0xc3, 0x06, 0x21, 0x42,
- 0xc8, 0xba, 0xc3, 0x06, 0x2c, 0x42, 0xe8, 0x1e, 0xc3, 0x06, 0x36, 0x42,
- 0x6a, 0x0f, 0xc3, 0x06, 0x41, 0xc4, 0xe1, 0x17, 0x0f, 0x3f, 0x40, 0x83,
- 0x00, 0x95, 0x18, 0x87, 0x00, 0x95, 0x20, 0x83, 0x01, 0x6c, 0x50, 0x83,
- 0x00, 0x98, 0x98, 0x87, 0x00, 0x98, 0xa0, 0x83, 0x00, 0x98, 0xd8, 0x87,
- 0x00, 0x98, 0xe0, 0x83, 0x01, 0x6c, 0x9b, 0x03, 0x06, 0x4b, 0x8b, 0x01,
- 0x6c, 0xa1, 0x87, 0x01, 0x6c, 0xb2, 0x03, 0x06, 0x4f, 0x83, 0x01, 0x6e,
- 0xd8, 0x87, 0x01, 0x6e, 0xe0, 0x87, 0x0f, 0x3f, 0x5b, 0x03, 0x06, 0x53,
- 0x8b, 0x0f, 0x3f, 0x49, 0x83, 0x00, 0x90, 0xb8, 0x91, 0x0f, 0x3f, 0x31,
- 0x87, 0x0f, 0x3f, 0x2b, 0x03, 0x06, 0x57, 0x83, 0x0f, 0x3f, 0x03, 0x03,
- 0x06, 0x5b, 0x8b, 0x0f, 0x3f, 0x11, 0x97, 0x0f, 0x3f, 0x08, 0x83, 0x00,
- 0x90, 0x98, 0x87, 0x00, 0x90, 0xa0, 0x87, 0x05, 0x59, 0x60, 0x83, 0x05,
- 0x59, 0x58, 0x87, 0x00, 0x9c, 0x30, 0x0a, 0xc3, 0x06, 0x5f, 0x83, 0x01,
- 0x6d, 0xc3, 0x03, 0x06, 0x79, 0x97, 0x01, 0x6d, 0xc9, 0x8b, 0x01, 0x6d,
- 0xd1, 0x87, 0x01, 0x6d, 0xeb, 0x03, 0x06, 0x7d, 0x91, 0x01, 0x6d, 0xf0,
- 0x83, 0x01, 0x6d, 0x58, 0x87, 0x01, 0x6d, 0x60, 0x83, 0x00, 0x99, 0x58,
- 0x87, 0x00, 0x99, 0x60, 0x83, 0x01, 0x6c, 0x80, 0x87, 0x05, 0x58, 0xa0,
- 0x91, 0x05, 0x58, 0x71, 0x87, 0x05, 0x58, 0x6b, 0x03, 0x06, 0x81, 0xc2,
- 0x14, 0x40, 0x05, 0x58, 0x59, 0x8b, 0x05, 0x58, 0x51, 0x97, 0x05, 0x58,
- 0x48, 0x83, 0x00, 0x97, 0xd8, 0x87, 0x00, 0x97, 0xe0, 0x83, 0x01, 0x6c,
- 0x68, 0x87, 0x05, 0x58, 0x20, 0x83, 0x00, 0x99, 0x18, 0x87, 0x00, 0x99,
- 0x20, 0x83, 0x01, 0x6c, 0x78, 0x83, 0x00, 0x99, 0xd8, 0x87, 0x00, 0x99,
- 0xe0, 0x83, 0x00, 0x9a, 0x18, 0x87, 0x00, 0x9a, 0x20, 0x83, 0x00, 0x9a,
- 0x38, 0x87, 0x00, 0x9c, 0x10, 0x83, 0x00, 0x91, 0x18, 0x87, 0x00, 0x91,
- 0x20, 0xc3, 0x85, 0x50, 0x00, 0x9c, 0x01, 0xc3, 0xc8, 0xba, 0x00, 0x9c,
- 0x21, 0xc3, 0xe1, 0xf7, 0x00, 0x9c, 0x41, 0xc3, 0x08, 0xcb, 0x00, 0x9c,
- 0x60, 0x83, 0x00, 0x91, 0xd8, 0x87, 0x00, 0x91, 0xe0, 0x83, 0x01, 0x6c,
- 0x20, 0x83, 0x01, 0x6d, 0x18, 0x87, 0x01, 0x6d, 0x20, 0x83, 0x00, 0x92,
- 0x58, 0x87, 0x00, 0x92, 0x60, 0x83, 0x00, 0x92, 0x98, 0x87, 0x00, 0x92,
- 0xa0, 0x83, 0x00, 0x92, 0xc3, 0x03, 0x06, 0x85, 0x8b, 0x00, 0x92, 0xd1,
- 0x87, 0x00, 0x92, 0xea, 0x03, 0x06, 0x89, 0x83, 0x01, 0x6e, 0x18, 0x87,
- 0x01, 0x6e, 0x20, 0x83, 0x00, 0x94, 0x58, 0x87, 0x00, 0x94, 0x60, 0x83,
- 0x01, 0x6e, 0x98, 0x87, 0x01, 0x6e, 0xa0, 0x83, 0x00, 0x94, 0xd8, 0x87,
- 0x00, 0x94, 0xe0, 0x83, 0x01, 0x6c, 0x48, 0x83, 0x00, 0x95, 0x98, 0x87,
- 0x00, 0x95, 0xa0, 0x83, 0x00, 0x95, 0xd8, 0x87, 0x00, 0x95, 0xe0, 0x83,
- 0x00, 0x96, 0x03, 0x03, 0x06, 0x8d, 0x8b, 0x00, 0x96, 0x11, 0x87, 0x00,
- 0x96, 0x2a, 0x03, 0x06, 0x91, 0x83, 0x01, 0x6e, 0x58, 0x87, 0x01, 0x6e,
- 0x60, 0x48, 0x17, 0x50, 0xc3, 0x06, 0x95, 0x83, 0x00, 0x99, 0x98, 0x87,
- 0x00, 0x99, 0xa0, 0x83, 0x01, 0x6c, 0x88, 0x87, 0x00, 0x9c, 0x70, 0x83,
- 0x00, 0x97, 0x18, 0x87, 0x00, 0x97, 0x20, 0x83, 0x01, 0x6d, 0x98, 0x87,
- 0x01, 0x6d, 0xa0, 0x87, 0x00, 0x9c, 0x50, 0xe0, 0x00, 0xe7, 0x01, 0x17,
- 0x98, 0xd3, 0x36, 0xb1, 0x01, 0x4f, 0x1b, 0x03, 0x06, 0xaf, 0x45, 0x00,
- 0x96, 0x43, 0x06, 0xb5, 0x16, 0xc3, 0x06, 0xcd, 0xc9, 0x0e, 0xac, 0x01,
- 0x53, 0x31, 0xcb, 0x9b, 0x3d, 0x01, 0x55, 0x71, 0xce, 0x75, 0x5d, 0x01,
- 0x5f, 0xc8, 0x94, 0x00, 0x57, 0x00, 0x8e, 0x00, 0x57, 0x08, 0x94, 0x00,
- 0x56, 0x20, 0x8e, 0x00, 0x57, 0x18, 0xa2, 0x0e, 0x91, 0x03, 0x03, 0x06,
- 0xd3, 0xa1, 0x0e, 0x90, 0xfb, 0x03, 0x06, 0xe7, 0x20, 0xc3, 0x07, 0x03,
- 0x9f, 0x0e, 0x90, 0xf3, 0x03, 0x07, 0x1b, 0x9e, 0x0e, 0x90, 0xeb, 0x03,
- 0x07, 0x33, 0xa5, 0x0e, 0x91, 0x11, 0xa4, 0x0e, 0x91, 0x08, 0xa2, 0x0e,
- 0x90, 0x23, 0x03, 0x07, 0x4f, 0x9f, 0x0e, 0x90, 0x0b, 0x03, 0x07, 0x63,
- 0x9e, 0x0e, 0x90, 0x03, 0x03, 0x07, 0x77, 0xa6, 0x0e, 0x90, 0x41, 0xa5,
- 0x0e, 0x90, 0x39, 0xa4, 0x0e, 0x90, 0x31, 0xa3, 0x0e, 0x90, 0x29, 0xa1,
- 0x0e, 0x90, 0x19, 0xa0, 0x0e, 0x90, 0x10, 0x45, 0x00, 0x56, 0x43, 0x07,
- 0x9f, 0x44, 0x00, 0x57, 0xc3, 0x07, 0xb1, 0xc5, 0x63, 0xc6, 0x00, 0x1c,
- 0x28, 0xc9, 0xb6, 0x36, 0x08, 0x0b, 0xab, 0x03, 0x07, 0xc3, 0xcc, 0x83,
- 0x08, 0x08, 0x0c, 0x58, 0x46, 0x02, 0x91, 0xc3, 0x07, 0xc9, 0xd2, 0x1c,
- 0x7d, 0x00, 0x1f, 0xc8, 0xd3, 0x1c, 0x7c, 0x00, 0x1f, 0xe9, 0xda, 0x1c,
- 0x75, 0x00, 0x1f, 0xf8, 0x47, 0x02, 0x90, 0xc3, 0x08, 0x46, 0x49, 0x10,
- 0x8f, 0xc3, 0x08, 0xbf, 0xda, 0x1c, 0x75, 0x00, 0x1b, 0xe0, 0xc3, 0x11,
- 0xd4, 0x00, 0xeb, 0x51, 0xc3, 0x1b, 0x5e, 0x00, 0xeb, 0x49, 0xc3, 0x33,
- 0x12, 0x00, 0xeb, 0x41, 0xc5, 0x52, 0x39, 0x00, 0xeb, 0x39, 0xc4, 0x96,
- 0x57, 0x00, 0xeb, 0x30, 0x45, 0x00, 0x56, 0x43, 0x08, 0xcb, 0xc8, 0x9d,
- 0xa4, 0x00, 0x1e, 0xb9, 0xca, 0x8b, 0x4a, 0x00, 0x1f, 0x80, 0x15, 0xc3,
- 0x08, 0xdd, 0xcd, 0x7e, 0xdb, 0x00, 0x1e, 0xc1, 0xc3, 0xe7, 0x24, 0x00,
- 0x1f, 0x99, 0xc7, 0x52, 0x37, 0x00, 0x1e, 0xe1, 0xc5, 0x7e, 0xe3, 0x00,
- 0x1e, 0xf0, 0xcc, 0x1c, 0x83, 0x00, 0x1f, 0x91, 0xce, 0x12, 0x11, 0x00,
- 0x1f, 0xa8, 0xca, 0x89, 0x82, 0x00, 0x1e, 0x89, 0x44, 0x04, 0x5f, 0x43,
- 0x08, 0xef, 0xcb, 0x90, 0x95, 0x08, 0x0b, 0xb9, 0xca, 0x6c, 0xcb, 0x08,
- 0x0b, 0xe8, 0x46, 0xd2, 0x97, 0xc3, 0x08, 0xfb, 0x43, 0x17, 0xa3, 0xc3,
- 0x09, 0x0d, 0x16, 0xc3, 0x09, 0x19, 0x4b, 0x93, 0x8c, 0xc3, 0x09, 0x25,
- 0x05, 0xc3, 0x09, 0x34, 0xcd, 0x77, 0x7e, 0x08, 0x0b, 0x19, 0xd1, 0x53,
- 0xa3, 0x08, 0x0b, 0x99, 0xd3, 0x43, 0xc2, 0x08, 0x0b, 0xa1, 0xd3, 0x43,
- 0x17, 0x08, 0x0b, 0x80, 0xc9, 0xac, 0x55, 0x08, 0x0c, 0x31, 0xc9, 0xaa,
- 0x9c, 0x08, 0x0c, 0x38, 0xc6, 0x00, 0x71, 0x00, 0x1f, 0x89, 0xd2, 0x49,
- 0xfe, 0x00, 0x1f, 0xe0, 0xca, 0x37, 0x0e, 0x01, 0x13, 0x99, 0xc5, 0x07,
- 0x62, 0x01, 0x13, 0x28, 0x4a, 0x37, 0x1e, 0x43, 0x09, 0x40, 0xe0, 0x0c,
- 0x07, 0x01, 0x54, 0x60, 0x47, 0xc6, 0x0a, 0xc3, 0x09, 0x4f, 0x50, 0x42,
- 0xb8, 0x43, 0x09, 0x5b, 0xe0, 0x05, 0x87, 0x01, 0x54, 0x90, 0x8e, 0x08,
- 0x9b, 0x18, 0x94, 0x08, 0x9a, 0x20, 0x8e, 0x08, 0x98, 0x63, 0x03, 0x09,
- 0x61, 0x94, 0x08, 0x98, 0x5a, 0x03, 0x09, 0x65, 0xcf, 0x13, 0x3a, 0x08,
- 0x9a, 0xf9, 0xc8, 0x11, 0x40, 0x08, 0x9a, 0xf0, 0xc2, 0x00, 0xa4, 0x08,
- 0x99, 0x11, 0x83, 0x08, 0x99, 0x08, 0xc2, 0x00, 0xa4, 0x08, 0x99, 0x01,
- 0x83, 0x08, 0x98, 0xf8, 0xcb, 0x19, 0xc6, 0x0f, 0xb0, 0x09, 0xc8, 0xba,
- 0x6d, 0x0f, 0xc9, 0x48, 0x94, 0x00, 0xe5, 0xa3, 0x03, 0x09, 0x69, 0x87,
- 0x00, 0xe5, 0x80, 0x94, 0x00, 0xe5, 0x11, 0x90, 0x00, 0xe4, 0xb8, 0xc2,
- 0x00, 0xbb, 0x00, 0xe5, 0x69, 0xc2, 0x00, 0x34, 0x00, 0xe5, 0x48, 0xc2,
- 0x00, 0x34, 0x00, 0x85, 0xc9, 0xc2, 0x00, 0xbb, 0x00, 0x85, 0xe8, 0x87,
- 0x00, 0x86, 0x01, 0x94, 0x00, 0x86, 0x20, 0x90, 0x00, 0x86, 0xb9, 0x94,
- 0x00, 0x87, 0x10, 0xc2, 0x00, 0x34, 0x00, 0x87, 0x49, 0xc2, 0x00, 0xbb,
- 0x00, 0x87, 0x68, 0x87, 0x00, 0x87, 0x81, 0x94, 0x00, 0x87, 0xa2, 0x03,
- 0x09, 0x6f, 0xc2, 0x00, 0x34, 0x01, 0x68, 0xc9, 0xc2, 0x00, 0xbb, 0x01,
- 0x68, 0xe8, 0x87, 0x01, 0x69, 0x01, 0x94, 0x01, 0x69, 0x20, 0xc3, 0x00,
- 0x15, 0x01, 0x60, 0x29, 0x14, 0x43, 0x09, 0x75, 0x87, 0x01, 0x60, 0x49,
- 0xc4, 0x7f, 0x32, 0x01, 0x61, 0x58, 0xc9, 0xa9, 0x8e, 0x01, 0x61, 0x39,
- 0xc7, 0xc7, 0x4c, 0x01, 0x61, 0x48, 0xc2, 0x00, 0xb3, 0x01, 0x60, 0xdb,
- 0x03, 0x09, 0x7d, 0x83, 0x01, 0x60, 0xf0, 0xca, 0x9f, 0xb4, 0x01, 0x61,
- 0x28, 0xc3, 0x00, 0x15, 0x01, 0x61, 0xa9, 0x14, 0x43, 0x09, 0x83, 0x87,
- 0x01, 0x61, 0xc9, 0xc4, 0x7f, 0x32, 0x01, 0x62, 0xd8, 0xc9, 0xa9, 0x8e,
- 0x01, 0x62, 0xb9, 0xc7, 0xc7, 0x4c, 0x01, 0x62, 0xc8, 0xc2, 0x00, 0xb3,
- 0x01, 0x62, 0x5b, 0x03, 0x09, 0x8b, 0x83, 0x01, 0x62, 0x70, 0xca, 0x9f,
- 0xb4, 0x01, 0x62, 0xa8, 0x94, 0x00, 0x58, 0x5b, 0x03, 0x09, 0x91, 0x8e,
- 0x00, 0x58, 0x62, 0x03, 0x09, 0x95, 0x83, 0x00, 0x58, 0xf9, 0xc2, 0x00,
- 0xa4, 0x00, 0x59, 0x00, 0x83, 0x00, 0x59, 0x09, 0xc2, 0x00, 0xa4, 0x00,
- 0x59, 0x10, 0x94, 0x00, 0x5a, 0x20, 0x8e, 0x00, 0x5b, 0x18, 0x00, 0x43,
- 0x09, 0x99, 0xc9, 0x4f, 0xff, 0x0f, 0x69, 0x38, 0x00, 0x43, 0x09, 0xa5,
- 0xc9, 0x4f, 0xff, 0x0f, 0x69, 0x30, 0x00, 0x43, 0x09, 0xb1, 0xc9, 0x4f,
- 0xff, 0x0f, 0x69, 0x40, 0x00, 0x43, 0x09, 0xbd, 0xc9, 0x4f, 0xff, 0x0f,
- 0x69, 0x48, 0xc9, 0x4f, 0xff, 0x0f, 0x69, 0x50, 0xc7, 0x0c, 0x4b, 0x0f,
- 0x68, 0xc1, 0xc8, 0x50, 0x00, 0x0f, 0x69, 0x08, 0xc9, 0x4f, 0xff, 0x0f,
- 0x69, 0x58, 0xc7, 0x0c, 0x4b, 0x0f, 0x68, 0xc9, 0xc8, 0x50, 0x00, 0x0f,
- 0x69, 0x10, 0xc4, 0x01, 0x4a, 0x08, 0x7c, 0x41, 0xc4, 0x17, 0x13, 0x08,
- 0x7c, 0x38, 0xc5, 0x01, 0x62, 0x08, 0x7c, 0x29, 0xc5, 0x00, 0x95, 0x08,
- 0x7c, 0x20, 0xc5, 0x01, 0x62, 0x08, 0x7c, 0x19, 0xc5, 0x00, 0x95, 0x08,
- 0x7c, 0x10, 0xc3, 0x13, 0xfc, 0x08, 0x7c, 0x09, 0xc5, 0xd0, 0x5e, 0x08,
- 0x7b, 0xc0, 0x03, 0xc3, 0x09, 0xc9, 0xc3, 0x0d, 0xd9, 0x08, 0x7b, 0xf8,
- 0xc3, 0x01, 0xb4, 0x08, 0x78, 0xeb, 0x03, 0x09, 0xd5, 0x16, 0xc3, 0x09,
- 0xdb, 0x08, 0x43, 0x09, 0xe9, 0x46, 0x02, 0x91, 0xc3, 0x09, 0xf5, 0xd3,
- 0x45, 0xe9, 0x08, 0x79, 0x38, 0xce, 0x6d, 0x45, 0x08, 0x53, 0xf9, 0x44,
- 0x0b, 0x11, 0x43, 0x0a, 0x54, 0x16, 0xc3, 0x0a, 0x60, 0xc4, 0x4b, 0x98,
- 0x08, 0x53, 0xd1, 0x06, 0xc3, 0x0a, 0x70, 0xc4, 0xe0, 0xa3, 0x08, 0x53,
- 0xc1, 0x09, 0xc3, 0x0a, 0x7c, 0xc4, 0xe4, 0x8f, 0x08, 0x53, 0x41, 0xc4,
- 0x5d, 0xe2, 0x08, 0x53, 0x39, 0x15, 0xc3, 0x0a, 0x88, 0xc3, 0x78, 0xa9,
- 0x08, 0x53, 0x29, 0xc4, 0xbf, 0xb9, 0x08, 0x53, 0x21, 0xc3, 0x0b, 0x0e,
- 0x08, 0x53, 0x19, 0xc2, 0x01, 0xf0, 0x08, 0x53, 0x03, 0x03, 0x0a, 0x92,
- 0xc6, 0xd0, 0x5d, 0x08, 0x53, 0x09, 0x0d, 0xc3, 0x0a, 0x98, 0xc3, 0x1f,
- 0xd8, 0x08, 0x53, 0x61, 0xc2, 0x17, 0x9f, 0x08, 0x53, 0x81, 0x03, 0x43,
- 0x0a, 0xa4, 0xc2, 0x00, 0x7b, 0x08, 0x67, 0xd9, 0xc3, 0x43, 0xcd, 0x08,
- 0x67, 0xe8, 0x00, 0x43, 0x0a, 0xb0, 0x95, 0x08, 0x67, 0x91, 0x97, 0x08,
- 0x67, 0x59, 0xc2, 0x20, 0x3e, 0x08, 0x66, 0xa8, 0x90, 0x08, 0x66, 0xcb,
- 0x03, 0x0a, 0xbc, 0x9c, 0x08, 0x67, 0x7b, 0x03, 0x0a, 0xcb, 0x98, 0x08,
- 0x67, 0x71, 0x85, 0x08, 0x66, 0x23, 0x03, 0x0a, 0xcf, 0x96, 0x08, 0x67,
- 0x33, 0x03, 0x0a, 0xd7, 0x95, 0x08, 0x67, 0x23, 0x03, 0x0a, 0xdb, 0x8f,
- 0x08, 0x66, 0xc3, 0x03, 0x0a, 0xdf, 0x8e, 0x08, 0x66, 0xb3, 0x03, 0x0a,
- 0xe3, 0x8d, 0x08, 0x66, 0x99, 0x8c, 0x08, 0x66, 0x91, 0x8a, 0x08, 0x66,
- 0x79, 0x89, 0x08, 0x66, 0x6b, 0x03, 0x0a, 0xe7, 0x88, 0x08, 0x66, 0x61,
- 0x87, 0x08, 0x66, 0x59, 0x86, 0x08, 0x66, 0x39, 0x84, 0x08, 0x66, 0x11,
- 0x92, 0x08, 0x67, 0x01, 0x94, 0x08, 0x67, 0x10, 0xc2, 0x0f, 0x4d, 0x08,
- 0x67, 0x69, 0xc2, 0x1b, 0xd8, 0x08, 0x66, 0xf0, 0xc2, 0x0f, 0x4d, 0x08,
- 0x67, 0x61, 0xc2, 0x1b, 0xd8, 0x08, 0x66, 0xe8, 0x91, 0x08, 0x66, 0xe1,
- 0xc2, 0x00, 0x4c, 0x08, 0x66, 0xf8, 0x8d, 0x08, 0x66, 0xa1, 0xc2, 0x00,
- 0x58, 0x08, 0x66, 0x41, 0xc2, 0x00, 0x73, 0x08, 0x66, 0x19, 0x83, 0x08,
- 0x66, 0x08, 0x8b, 0x08, 0x66, 0x88, 0x90, 0x08, 0x64, 0xcb, 0x03, 0x0a,
- 0xeb, 0x96, 0x08, 0x65, 0x33, 0x03, 0x0a, 0xfa, 0x95, 0x08, 0x65, 0x23,
- 0x03, 0x0a, 0xfe, 0x92, 0x08, 0x65, 0x01, 0x8f, 0x08, 0x64, 0xc3, 0x03,
- 0x0b, 0x02, 0x8e, 0x08, 0x64, 0xb3, 0x03, 0x0b, 0x06, 0x8d, 0x08, 0x64,
- 0x99, 0x8c, 0x08, 0x64, 0x91, 0x8a, 0x08, 0x64, 0x79, 0x89, 0x08, 0x64,
- 0x6b, 0x03, 0x0b, 0x0a, 0x88, 0x08, 0x64, 0x61, 0x87, 0x08, 0x64, 0x59,
- 0x86, 0x08, 0x64, 0x39, 0x85, 0x08, 0x64, 0x23, 0x03, 0x0b, 0x0e, 0x84,
- 0x08, 0x64, 0x11, 0x94, 0x08, 0x65, 0x11, 0x98, 0x08, 0x65, 0x71, 0x9c,
- 0x08, 0x65, 0x7a, 0x03, 0x0b, 0x16, 0xc2, 0x00, 0x4c, 0x08, 0x64, 0xf9,
- 0x91, 0x08, 0x64, 0xe0, 0xc2, 0x1b, 0xd8, 0x08, 0x64, 0xf1, 0xc2, 0x0f,
- 0x4d, 0x08, 0x65, 0x68, 0xc2, 0x1b, 0xd8, 0x08, 0x64, 0xe9, 0xc2, 0x0f,
- 0x4d, 0x08, 0x65, 0x60, 0xc2, 0x20, 0x3e, 0x08, 0x64, 0xa9, 0x97, 0x08,
- 0x65, 0x59, 0x95, 0x08, 0x65, 0x90, 0x8d, 0x08, 0x64, 0xa1, 0xc2, 0x00,
- 0x58, 0x08, 0x64, 0x41, 0xc2, 0x00, 0x73, 0x08, 0x64, 0x19, 0x83, 0x08,
- 0x64, 0x08, 0x8b, 0x08, 0x64, 0x88, 0x96, 0x08, 0x62, 0x39, 0x93, 0x08,
- 0x61, 0xc1, 0x87, 0x08, 0x60, 0x3b, 0x03, 0x0b, 0x1a, 0x92, 0x08, 0x61,
- 0x80, 0x07, 0xc3, 0x0b, 0x1e, 0x96, 0x08, 0x62, 0x19, 0x95, 0x08, 0x61,
- 0xeb, 0x03, 0x0b, 0x46, 0x94, 0x08, 0x61, 0xd1, 0x93, 0x08, 0x61, 0xa1,
- 0x90, 0x08, 0x61, 0x19, 0x8e, 0x08, 0x60, 0xf1, 0x9b, 0x08, 0x60, 0xb1,
- 0x86, 0x08, 0x60, 0x89, 0x89, 0x08, 0x60, 0x69, 0x84, 0x08, 0x60, 0x48,
- 0xc2, 0x00, 0xe5, 0x08, 0x62, 0x09, 0x10, 0xc3, 0x0b, 0x4a, 0x8f, 0x08,
- 0x61, 0x11, 0xc2, 0x00, 0x32, 0x08, 0x61, 0x09, 0x9c, 0x08, 0x60, 0xa1,
- 0x92, 0x08, 0x61, 0x79, 0x85, 0x08, 0x61, 0x90, 0x93, 0x08, 0x61, 0xb1,
- 0x85, 0x08, 0x61, 0x88, 0x87, 0x08, 0x60, 0x13, 0x03, 0x0b, 0x5a, 0x96,
- 0x08, 0x62, 0x21, 0xc2, 0x00, 0xe5, 0x08, 0x62, 0x01, 0x94, 0x08, 0x61,
- 0xd9, 0x93, 0x08, 0x61, 0xa9, 0x8e, 0x08, 0x60, 0xf9, 0x9b, 0x08, 0x60,
- 0xb9, 0x86, 0x08, 0x60, 0x91, 0x89, 0x08, 0x60, 0x71, 0x84, 0x08, 0x60,
- 0x51, 0xc2, 0x00, 0x35, 0x08, 0x61, 0x60, 0xc2, 0x00, 0xe5, 0x08, 0x62,
- 0x11, 0x85, 0x08, 0x61, 0x99, 0x10, 0xc3, 0x0b, 0x75, 0x9c, 0x08, 0x60,
- 0xa8, 0x93, 0x08, 0x61, 0xc9, 0x87, 0x08, 0x60, 0x42, 0x03, 0x0b, 0x81,
- 0x93, 0x08, 0x61, 0xb8, 0xc5, 0x2a, 0x13, 0x08, 0x54, 0xf9, 0xc2, 0x00,
- 0x4d, 0x08, 0x54, 0xf0, 0x8a, 0x08, 0x54, 0xe1, 0xc2, 0x00, 0x34, 0x08,
- 0x54, 0xc0, 0x0a, 0xc3, 0x0b, 0x85, 0xc2, 0x02, 0x98, 0x08, 0x54, 0xb9,
- 0xc2, 0x0d, 0xf7, 0x08, 0x54, 0x48, 0xc4, 0x95, 0xd4, 0x08, 0x54, 0xb1,
- 0xc3, 0x14, 0xa3, 0x08, 0x54, 0xa0, 0x8e, 0x08, 0x54, 0xa9, 0x86, 0x08,
- 0x54, 0x98, 0x9f, 0x08, 0x54, 0x31, 0x9e, 0x08, 0x54, 0x51, 0xa0, 0x08,
- 0x54, 0x78, 0xc2, 0x02, 0x98, 0x08, 0x54, 0x11, 0xc2, 0x0d, 0xf7, 0x08,
- 0x54, 0x00, 0xc2, 0x0f, 0x61, 0x08, 0x54, 0x59, 0xc3, 0x19, 0xa7, 0x08,
- 0x54, 0x68, 0xc3, 0x01, 0x1e, 0x08, 0x54, 0x89, 0xc2, 0x00, 0x34, 0x08,
- 0x54, 0x90, 0x45, 0x00, 0xcb, 0xc3, 0x0b, 0x91, 0xcc, 0x1e, 0x68, 0x08,
- 0x1e, 0x81, 0x47, 0x33, 0xef, 0x43, 0x0b, 0xfa, 0xc2, 0x00, 0x42, 0x08,
- 0x1a, 0x99, 0x1c, 0x43, 0x0c, 0x0a, 0x88, 0x08, 0x1b, 0x58, 0xc3, 0xcd,
- 0x02, 0x08, 0x1a, 0xa9, 0xc3, 0x13, 0xa7, 0x08, 0x1a, 0xb8, 0x87, 0x08,
- 0x1b, 0x91, 0x83, 0x08, 0x1b, 0xa8, 0xc3, 0xd8, 0xf6, 0x08, 0x1a, 0xf1,
- 0xc2, 0x00, 0x28, 0x08, 0x1b, 0x70, 0xc2, 0x23, 0xb4, 0x08, 0x1b, 0x09,
- 0x0a, 0x43, 0x0c, 0x16, 0xc2, 0x00, 0xda, 0x08, 0x1b, 0x11, 0xc3, 0xca,
- 0xba, 0x08, 0x1b, 0x68, 0xc2, 0x14, 0xd5, 0x08, 0x1b, 0x39, 0xc2, 0x00,
- 0x28, 0x08, 0x1b, 0x7b, 0x03, 0x0c, 0x22, 0x83, 0x08, 0x1b, 0xa3, 0x03,
- 0x0c, 0x2a, 0x97, 0x08, 0x1b, 0x98, 0x91, 0x08, 0x1b, 0x50, 0x87, 0x08,
- 0x18, 0x11, 0x83, 0x08, 0x18, 0x51, 0x97, 0x08, 0x18, 0x91, 0xc2, 0x01,
- 0xf0, 0x08, 0x18, 0xc8, 0x8e, 0x08, 0x18, 0x59, 0x8f, 0x08, 0x18, 0x61,
- 0x90, 0x08, 0x18, 0x69, 0x95, 0x08, 0x18, 0xa3, 0x03, 0x0c, 0x2e, 0x94,
- 0x08, 0x18, 0x9b, 0x03, 0x0c, 0x36, 0xc2, 0x00, 0xe1, 0x08, 0x18, 0xb9,
- 0x88, 0x08, 0x18, 0xd0, 0xc2, 0x01, 0xf0, 0x08, 0x18, 0x41, 0x87, 0x08,
- 0x18, 0xa8, 0x8b, 0x08, 0x18, 0xe8, 0x87, 0x08, 0x18, 0x81, 0xc2, 0x01,
- 0xf0, 0x08, 0x18, 0xc0, 0xc2, 0x01, 0xf0, 0x08, 0x18, 0x89, 0xcb, 0x94,
- 0x73, 0x08, 0x19, 0x78, 0x97, 0x08, 0x18, 0xf0, 0xc2, 0x00, 0x7b, 0x08,
- 0x19, 0x09, 0xc3, 0x43, 0xcd, 0x08, 0x19, 0x18, 0x83, 0x08, 0x26, 0x49,
- 0xc2, 0x0c, 0x65, 0x08, 0x26, 0x58, 0x83, 0x08, 0x27, 0x89, 0xc2, 0x0c,
- 0x65, 0x08, 0x27, 0x98, 0x4b, 0x86, 0x8d, 0xc3, 0x0c, 0x3a, 0xd2, 0x4d,
- 0xb8, 0x0e, 0x7d, 0x90, 0x42, 0x14, 0xd5, 0xc3, 0x0c, 0x46, 0x46, 0x83,
- 0xe2, 0x43, 0x0c, 0x55, 0x45, 0xdd, 0x63, 0xc3, 0x0c, 0x61, 0xce, 0x6e,
- 0x6b, 0x0e, 0x7c, 0xd0, 0x11, 0xc3, 0x0c, 0x73, 0xc4, 0x64, 0x7b, 0x0e,
- 0x7d, 0x12, 0x03, 0x0c, 0x85, 0x11, 0xc3, 0x0c, 0x8b, 0xc3, 0x2d, 0xf6,
- 0x0e, 0x7c, 0xda, 0x03, 0x0c, 0x9a, 0x11, 0xc3, 0x0c, 0xa0, 0xc7, 0xbb,
- 0x06, 0x0e, 0x7c, 0x90, 0xce, 0x64, 0x71, 0x0e, 0x7c, 0x89, 0x42, 0x00,
- 0x46, 0xc3, 0x0c, 0xac, 0xc9, 0xb5, 0x0d, 0x0e, 0x7c, 0x5a, 0x03, 0x0c,
- 0xca, 0xd4, 0x3a, 0xfd, 0x0e, 0x7a, 0xd1, 0xc8, 0xbb, 0xe5, 0x0e, 0x7a,
- 0xb8, 0xc7, 0x78, 0x91, 0x0e, 0x7c, 0x01, 0xc8, 0x98, 0x1d, 0x0e, 0x7b,
- 0xf0, 0xc7, 0x6e, 0xe2, 0x0e, 0x7b, 0xc1, 0xc8, 0x4d, 0xc2, 0x0e, 0x7b,
- 0xb0, 0xc5, 0x01, 0xf7, 0x0e, 0x78, 0x71, 0xc4, 0x01, 0x1e, 0x0e, 0x78,
- 0x10, 0xd5, 0x37, 0xd5, 0x0e, 0x79, 0xb8, 0xc6, 0x41, 0xfa, 0x0e, 0x78,
- 0xe1, 0x42, 0x04, 0x2a, 0x43, 0x0c, 0xd0, 0xc5, 0x01, 0xf7, 0x0e, 0x78,
- 0x99, 0xc4, 0x01, 0x1e, 0x0e, 0x78, 0x38, 0xc5, 0x01, 0xf7, 0x0e, 0x78,
- 0x79, 0xc4, 0x01, 0x1e, 0x0e, 0x78, 0x18, 0xc5, 0x01, 0xf7, 0x0e, 0x78,
- 0x61, 0xc4, 0x01, 0x1e, 0x0e, 0x78, 0x00, 0xc6, 0x78, 0x92, 0x0e, 0x78,
- 0xc9, 0x4b, 0x90, 0x27, 0x43, 0x0c, 0xdc, 0xc5, 0x01, 0xf7, 0x0e, 0x78,
- 0xb9, 0xc4, 0x01, 0x1e, 0x0e, 0x78, 0x58, 0xc5, 0xab, 0x85, 0x05, 0x4e,
- 0x58, 0xc4, 0xac, 0xd6, 0x05, 0x4e, 0x49, 0xc3, 0x08, 0x07, 0x05, 0x4e,
- 0x28, 0xc8, 0x6a, 0xf8, 0x05, 0x4d, 0x81, 0xc4, 0x6a, 0xf2, 0x05, 0x4d,
- 0x78, 0xc5, 0x6a, 0xf1, 0x05, 0x4d, 0x31, 0xc5, 0xd8, 0xa4, 0x05, 0x4c,
- 0x48, 0xc6, 0xcc, 0xd3, 0x05, 0x4c, 0xf8, 0xc6, 0xcc, 0xd3, 0x05, 0x4c,
- 0xc0, 0xc6, 0xcd, 0x7b, 0x05, 0x4c, 0x52, 0x03, 0x0c, 0xe8, 0xca, 0x6a,
- 0xf6, 0x05, 0x4d, 0x18, 0xca, 0x6a, 0xf6, 0x05, 0x4c, 0xf0, 0xc6, 0xcc,
- 0xd3, 0x05, 0x4d, 0x08, 0xca, 0x6a, 0xf6, 0x05, 0x4c, 0xe0, 0xc5, 0x6a,
- 0xf1, 0x05, 0x4c, 0x89, 0xc5, 0x91, 0xa3, 0x05, 0x4c, 0x80, 0xd0, 0x3b,
- 0x51, 0x01, 0x2c, 0xf8, 0x56, 0x2f, 0x85, 0xc3, 0x0c, 0xee, 0x46, 0x01,
- 0xd1, 0x43, 0x0c, 0xfa, 0x9a, 0x05, 0x22, 0xd1, 0x96, 0x05, 0x22, 0xc9,
- 0x91, 0x05, 0x22, 0x9b, 0x03, 0x0d, 0x06, 0x92, 0x05, 0x22, 0xe0, 0x92,
- 0x05, 0x22, 0xc1, 0x9a, 0x05, 0x22, 0xb1, 0x96, 0x05, 0x22, 0xa8, 0x87,
- 0x05, 0x22, 0x83, 0x03, 0x0d, 0x0e, 0x92, 0x05, 0x22, 0x69, 0x9a, 0x05,
- 0x22, 0x59, 0x96, 0x05, 0x22, 0x50, 0x94, 0x05, 0x22, 0x4b, 0x03, 0x0d,
- 0x1a, 0x92, 0x05, 0x22, 0x39, 0x9a, 0x05, 0x22, 0x29, 0x96, 0x05, 0x22,
- 0x20, 0x94, 0x05, 0x22, 0x1b, 0x03, 0x0d, 0x1e, 0x92, 0x05, 0x22, 0x09,
- 0x9a, 0x05, 0x21, 0xf9, 0x91, 0x05, 0x21, 0xd2, 0x03, 0x0d, 0x22, 0x92,
- 0x05, 0x21, 0xf1, 0x9a, 0x05, 0x21, 0xe1, 0x96, 0x05, 0x21, 0xd8, 0x87,
- 0x05, 0x21, 0xbb, 0x03, 0x0d, 0x26, 0x92, 0x05, 0x21, 0xa1, 0x9a, 0x05,
- 0x21, 0x91, 0x96, 0x05, 0x21, 0x88, 0x94, 0x05, 0x1d, 0x53, 0x03, 0x0d,
- 0x32, 0x92, 0x05, 0x1d, 0x41, 0x9a, 0x05, 0x1d, 0x31, 0x96, 0x05, 0x1d,
- 0x28, 0x94, 0x05, 0x1d, 0x23, 0x03, 0x0d, 0x36, 0x92, 0x05, 0x1d, 0x11,
- 0x9a, 0x05, 0x1d, 0x01, 0x96, 0x05, 0x1c, 0xf8, 0x92, 0x05, 0x1c, 0xf1,
- 0x9a, 0x05, 0x1c, 0xe1, 0x96, 0x05, 0x1c, 0xd8, 0x92, 0x05, 0x1c, 0xd1,
- 0x9a, 0x05, 0x1c, 0xc0, 0x92, 0x05, 0x1c, 0xb9, 0x9a, 0x05, 0x1c, 0xa9,
- 0x96, 0x05, 0x1c, 0xa0, 0x9a, 0x05, 0x12, 0xe9, 0x92, 0x05, 0x12, 0xf8,
- 0x96, 0x05, 0x13, 0x01, 0x9a, 0x05, 0x13, 0x09, 0x92, 0x05, 0x13, 0x18,
- 0x96, 0x05, 0x13, 0x21, 0x9a, 0x05, 0x13, 0x28, 0x96, 0x05, 0x13, 0x39,
- 0x9a, 0x05, 0x13, 0x41, 0x92, 0x05, 0x13, 0x50, 0x9a, 0x05, 0x13, 0x59,
- 0x92, 0x05, 0x13, 0x68, 0x96, 0x05, 0x17, 0x69, 0x9a, 0x05, 0x17, 0x71,
- 0x92, 0x05, 0x17, 0x81, 0x87, 0x05, 0x17, 0x92, 0x03, 0x0d, 0x3a, 0x96,
- 0x05, 0x17, 0xa1, 0x9a, 0x05, 0x17, 0xa9, 0x92, 0x05, 0x17, 0xb8, 0x91,
- 0x05, 0x17, 0xcb, 0x03, 0x0d, 0x42, 0x9a, 0x05, 0x17, 0xf1, 0x92, 0x05,
- 0x18, 0x01, 0x94, 0x05, 0x18, 0x12, 0x03, 0x0d, 0x46, 0x9a, 0x05, 0x17,
- 0xd1, 0x92, 0x05, 0x17, 0xd8, 0x9a, 0x05, 0x17, 0xe0, 0x96, 0x05, 0x18,
- 0x19, 0x9a, 0x05, 0x18, 0x21, 0x92, 0x05, 0x18, 0x31, 0x94, 0x05, 0x18,
- 0x42, 0x03, 0x0d, 0x4a, 0x96, 0x05, 0x03, 0xb1, 0x9a, 0x05, 0x03, 0xb9,
- 0x92, 0x05, 0x03, 0xc9, 0x87, 0x05, 0x03, 0xda, 0x03, 0x0d, 0x4e, 0x96,
- 0x05, 0x03, 0xe9, 0x9a, 0x05, 0x03, 0xf1, 0x92, 0x05, 0x04, 0x00, 0x96,
- 0x05, 0x04, 0x09, 0x9a, 0x05, 0x04, 0x11, 0x92, 0x05, 0x04, 0x20, 0x96,
- 0x05, 0x04, 0x29, 0x9a, 0x05, 0x04, 0x31, 0x92, 0x05, 0x04, 0x41, 0x94,
- 0x05, 0x04, 0x52, 0x03, 0x0d, 0x56, 0x96, 0x05, 0x04, 0x59, 0x9a, 0x05,
- 0x04, 0x61, 0x92, 0x05, 0x04, 0x71, 0x94, 0x05, 0x04, 0x82, 0x03, 0x0d,
- 0x5a, 0x96, 0x05, 0x0a, 0x89, 0x9a, 0x05, 0x0a, 0x91, 0x92, 0x05, 0x0a,
- 0xa1, 0x87, 0x05, 0x0a, 0xb2, 0x03, 0x0d, 0x5e, 0x96, 0x05, 0x0a, 0xb9,
- 0x9a, 0x05, 0x0a, 0xc1, 0x92, 0x05, 0x0a, 0xd0, 0x96, 0x05, 0x0a, 0xd9,
- 0x9a, 0x05, 0x0a, 0xe1, 0x92, 0x05, 0x0a, 0xf0, 0x9a, 0x05, 0x0a, 0xf9,
- 0x92, 0x05, 0x0b, 0x08, 0x96, 0x05, 0x0b, 0x11, 0x9a, 0x05, 0x0b, 0x19,
- 0x92, 0x05, 0x0b, 0x29, 0x94, 0x05, 0x0b, 0x3a, 0x03, 0x0d, 0x62, 0x9a,
- 0x05, 0x0c, 0x09, 0x92, 0x05, 0x0c, 0x18, 0x9a, 0x05, 0x0c, 0x39, 0x92,
- 0x05, 0x0c, 0x48, 0x9a, 0x05, 0x0c, 0xa8, 0x92, 0x05, 0x21, 0x81, 0x9a,
- 0x05, 0x21, 0x71, 0x96, 0x05, 0x21, 0x68, 0x94, 0x05, 0x21, 0x63, 0x03,
- 0x0d, 0x66, 0x92, 0x05, 0x21, 0x51, 0x9a, 0x05, 0x21, 0x41, 0x96, 0x05,
- 0x21, 0x39, 0x91, 0x05, 0x21, 0x0a, 0x03, 0x0d, 0x6a, 0x92, 0x05, 0x21,
- 0x31, 0x9a, 0x05, 0x21, 0x21, 0x96, 0x05, 0x21, 0x18, 0x87, 0x05, 0x20,
- 0xf3, 0x03, 0x0d, 0x72, 0x92, 0x05, 0x20, 0xd9, 0x9a, 0x05, 0x20, 0xc9,
- 0x96, 0x05, 0x20, 0xc0, 0x9a, 0x05, 0x1f, 0xd0, 0x9a, 0x05, 0x1f, 0xa0,
- 0x92, 0x05, 0x1f, 0x59, 0x9a, 0x05, 0x1f, 0x49, 0x96, 0x05, 0x1f, 0x40,
- 0x92, 0x05, 0x1f, 0x39, 0x9a, 0x05, 0x1f, 0x29, 0x96, 0x05, 0x1f, 0x20,
- 0x9a, 0x05, 0x20, 0xb0, 0x9a, 0x05, 0x20, 0x80, 0x92, 0x05, 0x20, 0x39,
- 0x9a, 0x05, 0x20, 0x29, 0x96, 0x05, 0x20, 0x20, 0x92, 0x05, 0x20, 0x19,
- 0x9a, 0x05, 0x20, 0x09, 0x96, 0x05, 0x20, 0x00, 0x9a, 0x05, 0x1e, 0xf0,
- 0x9a, 0x05, 0x1e, 0xc0, 0x92, 0x05, 0x1e, 0x61, 0x9a, 0x05, 0x1e, 0x50,
- 0x94, 0x05, 0x1c, 0x9b, 0x03, 0x0d, 0x7e, 0x92, 0x05, 0x1c, 0x89, 0x9a,
- 0x05, 0x1c, 0x79, 0x96, 0x05, 0x1c, 0x70, 0x94, 0x05, 0x1c, 0x6b, 0x03,
- 0x0d, 0x82, 0x92, 0x05, 0x1c, 0x59, 0x9a, 0x05, 0x1c, 0x49, 0x96, 0x05,
- 0x1c, 0x41, 0x91, 0x05, 0x1b, 0xfa, 0x03, 0x0d, 0x86, 0x92, 0x05, 0x1c,
- 0x39, 0x9a, 0x05, 0x1c, 0x29, 0x96, 0x05, 0x1c, 0x20, 0x92, 0x05, 0x1c,
- 0x19, 0x9a, 0x05, 0x1c, 0x09, 0x96, 0x05, 0x1c, 0x00, 0x9a, 0x05, 0x1b,
- 0xe0, 0x94, 0x05, 0x1e, 0x2b, 0x03, 0x0d, 0x8a, 0x92, 0x05, 0x1e, 0x19,
- 0x9a, 0x05, 0x1e, 0x09, 0x96, 0x05, 0x1e, 0x00, 0x94, 0x05, 0x1d, 0xfb,
- 0x03, 0x0d, 0x8e, 0x92, 0x05, 0x1d, 0xe9, 0x9a, 0x05, 0x1d, 0xd9, 0x96,
- 0x05, 0x1d, 0xd1, 0x91, 0x05, 0x1d, 0x82, 0x03, 0x0d, 0x92, 0x92, 0x05,
- 0x1d, 0xc9, 0x9a, 0x05, 0x1d, 0xb9, 0x96, 0x05, 0x1d, 0xb0, 0x92, 0x05,
- 0x1d, 0xa9, 0x9a, 0x05, 0x1d, 0x99, 0x96, 0x05, 0x1d, 0x90, 0x92, 0x05,
- 0x1d, 0x71, 0x9a, 0x05, 0x1d, 0x61, 0x96, 0x05, 0x1d, 0x58, 0x92, 0x05,
- 0x1a, 0x19, 0x94, 0x05, 0x1a, 0x2b, 0x03, 0x0d, 0x9a, 0x96, 0x05, 0x1a,
- 0x01, 0x9a, 0x05, 0x1a, 0x08, 0x96, 0x05, 0x19, 0x51, 0x9a, 0x05, 0x19,
- 0x59, 0x92, 0x05, 0x19, 0x69, 0x87, 0x05, 0x19, 0x7a, 0x03, 0x0d, 0x9e,
- 0x96, 0x05, 0x19, 0x89, 0x9a, 0x05, 0x19, 0x91, 0x92, 0x05, 0x19, 0xa0,
- 0x96, 0x05, 0x19, 0xa9, 0x9a, 0x05, 0x19, 0xb1, 0x92, 0x05, 0x19, 0xc0,
- 0x9a, 0x05, 0x19, 0xc9, 0x92, 0x05, 0x19, 0xd8, 0x96, 0x05, 0x19, 0xe1,
- 0x9a, 0x05, 0x19, 0xe9, 0x92, 0x05, 0x19, 0xf8, 0x9a, 0x05, 0x18, 0x69,
- 0x92, 0x05, 0x18, 0x78, 0x9a, 0x05, 0x18, 0xa1, 0x92, 0x05, 0x18, 0xb0,
- 0x9a, 0x05, 0x19, 0x10, 0x9a, 0x05, 0x19, 0x40, 0x94, 0x05, 0x1b, 0xdb,
- 0x03, 0x0d, 0xa6, 0x92, 0x05, 0x1b, 0xc9, 0x9a, 0x05, 0x1b, 0xb9, 0x96,
- 0x05, 0x1b, 0xb0, 0x94, 0x05, 0x1b, 0xab, 0x03, 0x0d, 0xaa, 0x92, 0x05,
- 0x1b, 0x99, 0x9a, 0x05, 0x1b, 0x89, 0x91, 0x05, 0x1b, 0x3a, 0x03, 0x0d,
- 0xae, 0x92, 0x05, 0x1b, 0x81, 0x9a, 0x05, 0x1b, 0x71, 0x96, 0x05, 0x1b,
- 0x68, 0x92, 0x05, 0x1b, 0x61, 0x9a, 0x05, 0x1b, 0x51, 0x96, 0x05, 0x1b,
- 0x48, 0x92, 0x05, 0x1b, 0x21, 0x96, 0x05, 0x1b, 0x09, 0x9a, 0x05, 0x1b,
- 0x10, 0x9a, 0x05, 0x16, 0x70, 0x9a, 0x05, 0x16, 0x40, 0x9a, 0x05, 0x15,
- 0xd1, 0x92, 0x05, 0x15, 0xe0, 0x96, 0x05, 0x15, 0x91, 0x9a, 0x05, 0x15,
- 0x99, 0x92, 0x05, 0x15, 0xa8, 0x92, 0x05, 0x14, 0xd1, 0x9a, 0x05, 0x14,
- 0xc0, 0x9a, 0x05, 0x15, 0x30, 0x92, 0x05, 0x14, 0x99, 0x9a, 0x05, 0x14,
- 0x88, 0x9a, 0x05, 0x16, 0xe1, 0x92, 0x05, 0x16, 0xf0, 0x92, 0x05, 0x16,
- 0xd9, 0x9a, 0x05, 0x16, 0xc9, 0x96, 0x05, 0x16, 0xc0, 0x87, 0x05, 0x16,
- 0xb3, 0x03, 0x0d, 0xba, 0x92, 0x05, 0x16, 0x99, 0x9a, 0x05, 0x16, 0x89,
- 0x96, 0x05, 0x16, 0x80, 0x9a, 0x05, 0x16, 0xf9, 0x92, 0x05, 0x17, 0x08,
- 0x9a, 0x05, 0x17, 0x11, 0x92, 0x05, 0x17, 0x21, 0x94, 0x05, 0x17, 0x32,
- 0x03, 0x0d, 0xc6, 0x96, 0x05, 0x17, 0x39, 0x9a, 0x05, 0x17, 0x41, 0x92,
- 0x05, 0x17, 0x51, 0x94, 0x05, 0x17, 0x62, 0x03, 0x0d, 0xca, 0x94, 0x05,
- 0x1b, 0x03, 0x03, 0x0d, 0xce, 0x92, 0x05, 0x1a, 0xf1, 0x9a, 0x05, 0x1a,
- 0xe1, 0x96, 0x05, 0x1a, 0xd8, 0x94, 0x05, 0x1a, 0xd3, 0x03, 0x0d, 0xd2,
- 0x92, 0x05, 0x1a, 0xc1, 0x9a, 0x05, 0x1a, 0xb1, 0x96, 0x05, 0x1a, 0xa9,
- 0x91, 0x05, 0x1a, 0x5a, 0x03, 0x0d, 0xd6, 0x92, 0x05, 0x1a, 0xa1, 0x9a,
- 0x05, 0x1a, 0x91, 0x96, 0x05, 0x1a, 0x88, 0x92, 0x05, 0x1a, 0x81, 0x96,
- 0x05, 0x1a, 0x69, 0x9a, 0x05, 0x1a, 0x70, 0x96, 0x05, 0x1a, 0x31, 0x9a,
- 0x05, 0x1a, 0x39, 0x92, 0x05, 0x1a, 0x48, 0x9a, 0x05, 0x15, 0x60, 0x92,
- 0x05, 0x14, 0x31, 0x9a, 0x05, 0x14, 0x20, 0x92, 0x05, 0x12, 0xe1, 0x9a,
- 0x05, 0x12, 0xd0, 0x92, 0x05, 0x12, 0xc9, 0x9a, 0x05, 0x12, 0xb9, 0x96,
- 0x05, 0x12, 0xb0, 0x92, 0x05, 0x12, 0xa9, 0x9a, 0x05, 0x12, 0x99, 0x96,
- 0x05, 0x12, 0x90, 0x92, 0x05, 0x12, 0x89, 0x9a, 0x05, 0x12, 0x79, 0x96,
- 0x05, 0x12, 0x70, 0x96, 0x05, 0x12, 0x31, 0x9a, 0x05, 0x12, 0x39, 0x92,
- 0x05, 0x12, 0x49, 0x87, 0x05, 0x12, 0x62, 0x03, 0x0d, 0xde, 0x9a, 0x05,
- 0x13, 0x78, 0x96, 0x05, 0x04, 0x89, 0x9a, 0x05, 0x04, 0x91, 0x92, 0x05,
- 0x04, 0xa1, 0x87, 0x05, 0x04, 0xb2, 0x03, 0x0d, 0xea, 0x96, 0x05, 0x04,
- 0xc1, 0x9a, 0x05, 0x04, 0xc9, 0x92, 0x05, 0x04, 0xd8, 0x91, 0x05, 0x04,
- 0xeb, 0x03, 0x0d, 0xf2, 0x96, 0x05, 0x05, 0x19, 0x9a, 0x05, 0x05, 0x21,
- 0x92, 0x05, 0x05, 0x31, 0x94, 0x05, 0x05, 0x42, 0x03, 0x0d, 0xfa, 0x96,
- 0x05, 0x04, 0xf9, 0x9a, 0x05, 0x05, 0x01, 0x92, 0x05, 0x05, 0x10, 0x9a,
- 0x05, 0x05, 0x49, 0x92, 0x05, 0x05, 0x59, 0x94, 0x05, 0x05, 0x6a, 0x03,
- 0x0d, 0xfe, 0x96, 0x05, 0x0b, 0x41, 0x9a, 0x05, 0x0b, 0x49, 0x92, 0x05,
- 0x0b, 0x59, 0x87, 0x05, 0x0b, 0x72, 0x03, 0x0e, 0x02, 0x96, 0x05, 0x0b,
- 0x81, 0x9a, 0x05, 0x0b, 0x89, 0x92, 0x05, 0x0b, 0x98, 0x91, 0x05, 0x0b,
- 0xab, 0x03, 0x0e, 0x0e, 0x96, 0x05, 0x0b, 0xe0, 0x96, 0x05, 0x0b, 0xb1,
- 0x9a, 0x05, 0x0b, 0xb9, 0x92, 0x05, 0x0b, 0xc0, 0x9a, 0x05, 0x0b, 0xc9,
- 0x92, 0x05, 0x0b, 0xd8, 0x96, 0x05, 0x0c, 0xb9, 0x9a, 0x05, 0x0c, 0xc1,
- 0x92, 0x05, 0x0c, 0xd1, 0x87, 0x05, 0x0c, 0xe2, 0x03, 0x0e, 0x12, 0x96,
- 0x05, 0x0c, 0xf1, 0x9a, 0x05, 0x0c, 0xf9, 0x92, 0x05, 0x0d, 0x08, 0x91,
- 0x05, 0x0d, 0x1b, 0x03, 0x0e, 0x1a, 0x96, 0x05, 0x0d, 0x59, 0x9a, 0x05,
- 0x0d, 0x61, 0x92, 0x05, 0x0d, 0x71, 0x94, 0x05, 0x0d, 0x82, 0x03, 0x0e,
- 0x22, 0x9a, 0x05, 0x0d, 0x29, 0x92, 0x05, 0x0d, 0x38, 0x9a, 0x05, 0x0d,
- 0x41, 0x92, 0x05, 0x0d, 0x50, 0x96, 0x05, 0x0d, 0x89, 0x9a, 0x05, 0x0d,
- 0x91, 0x92, 0x05, 0x0d, 0xa1, 0x94, 0x05, 0x0d, 0xb2, 0x03, 0x0e, 0x26,
- 0x9a, 0x05, 0x23, 0xb1, 0x96, 0x05, 0x23, 0xa9, 0x92, 0x05, 0x23, 0xc0,
- 0x96, 0x05, 0x23, 0xc9, 0x9a, 0x05, 0x23, 0xd1, 0x92, 0x05, 0x23, 0xe0,
- 0x9a, 0x05, 0x24, 0x28, 0x9a, 0x05, 0x24, 0x58, 0x9a, 0x05, 0x23, 0x78,
- 0x96, 0x05, 0x23, 0x09, 0x9a, 0x05, 0x23, 0x11, 0x92, 0x05, 0x23, 0x20,
- 0x92, 0x05, 0x12, 0x19, 0x94, 0x05, 0x12, 0x2b, 0x03, 0x0e, 0x2a, 0x91,
- 0x05, 0x11, 0xbb, 0x03, 0x0e, 0x2e, 0x96, 0x05, 0x12, 0x01, 0x9a, 0x05,
- 0x12, 0x08, 0x9a, 0x05, 0x11, 0x80, 0x96, 0x05, 0x11, 0x91, 0x9a, 0x05,
- 0x11, 0x99, 0x92, 0x05, 0x11, 0xa8, 0x96, 0x05, 0x11, 0xc9, 0x9a, 0x05,
- 0x11, 0xd1, 0x92, 0x05, 0x11, 0xe0, 0x9a, 0x05, 0x11, 0xe9, 0x92, 0x05,
- 0x11, 0xf8, 0x9a, 0x05, 0x05, 0x91, 0x92, 0x05, 0x05, 0xa0, 0x96, 0x05,
- 0x05, 0xc9, 0x9a, 0x05, 0x05, 0xd1, 0x92, 0x05, 0x05, 0xe0, 0x9a, 0x05,
- 0x06, 0x38, 0x96, 0x05, 0x00, 0xd1, 0x9a, 0x05, 0x00, 0xd9, 0x92, 0x05,
- 0x00, 0xe8, 0x9a, 0x05, 0x01, 0x11, 0x92, 0x05, 0x01, 0x20, 0x9a, 0x05,
- 0x01, 0x80, 0x9a, 0x05, 0x01, 0xb0, 0x96, 0x05, 0x02, 0xb1, 0x9a, 0x05,
- 0x02, 0xb9, 0x92, 0x05, 0x02, 0xc9, 0x87, 0x05, 0x02, 0xe2, 0x03, 0x0e,
- 0x36, 0x96, 0x05, 0x02, 0xf1, 0x9a, 0x05, 0x02, 0xf9, 0x92, 0x05, 0x03,
- 0x08, 0x91, 0x05, 0x03, 0x1b, 0x03, 0x0e, 0x42, 0x96, 0x05, 0x03, 0x61,
- 0x9a, 0x05, 0x03, 0x69, 0x92, 0x05, 0x03, 0x79, 0x94, 0x05, 0x03, 0x8a,
- 0x03, 0x0e, 0x4a, 0x96, 0x05, 0x03, 0x29, 0x9a, 0x05, 0x03, 0x31, 0x92,
- 0x05, 0x03, 0x40, 0x9a, 0x05, 0x03, 0x49, 0x92, 0x05, 0x03, 0x58, 0x96,
- 0x05, 0x03, 0x91, 0x9a, 0x05, 0x03, 0x99, 0x92, 0x05, 0x03, 0xa8, 0x9a,
- 0x05, 0x01, 0xe1, 0x92, 0x05, 0x01, 0xf0, 0x9a, 0x05, 0x02, 0x19, 0x92,
- 0x05, 0x02, 0x28, 0x9a, 0x05, 0x02, 0x70, 0x9a, 0x05, 0x02, 0xa0, 0x9a,
- 0x05, 0x06, 0xe0, 0x96, 0x05, 0x07, 0x31, 0x9a, 0x05, 0x07, 0x39, 0x92,
- 0x05, 0x07, 0x48, 0x9a, 0x05, 0x07, 0xc0, 0x9a, 0x05, 0x07, 0xf0, 0x9a,
- 0x05, 0x08, 0x21, 0x92, 0x05, 0x08, 0x30, 0x9a, 0x05, 0x08, 0x58, 0x9a,
- 0x05, 0x08, 0xc0, 0x96, 0x05, 0x09, 0xb1, 0x9a, 0x05, 0x09, 0xb9, 0x92,
- 0x05, 0x09, 0xc9, 0x87, 0x05, 0x09, 0xda, 0x03, 0x0e, 0x4e, 0x96, 0x05,
- 0x09, 0xe9, 0x9a, 0x05, 0x09, 0xf1, 0x92, 0x05, 0x0a, 0x00, 0x91, 0x05,
- 0x0a, 0x13, 0x03, 0x0e, 0x56, 0x96, 0x05, 0x0a, 0x59, 0x9a, 0x05, 0x0a,
- 0x61, 0x92, 0x05, 0x0a, 0x71, 0x94, 0x05, 0x0a, 0x82, 0x03, 0x0e, 0x5e,
- 0x96, 0x05, 0x0a, 0x21, 0x9a, 0x05, 0x0a, 0x29, 0x92, 0x05, 0x0a, 0x38,
- 0x9a, 0x05, 0x0a, 0x41, 0x92, 0x05, 0x0a, 0x50, 0x9a, 0x05, 0x08, 0xf1,
- 0x92, 0x05, 0x09, 0x00, 0x96, 0x05, 0x09, 0x29, 0x9a, 0x05, 0x09, 0x31,
- 0x92, 0x05, 0x09, 0x40, 0x9a, 0x05, 0x09, 0xa0, 0x96, 0x05, 0x0d, 0xd9,
- 0x9a, 0x05, 0x0d, 0xe1, 0x92, 0x05, 0x0d, 0xf0, 0x96, 0x05, 0x0e, 0x19,
- 0x9a, 0x05, 0x0e, 0x21, 0x92, 0x05, 0x0e, 0x30, 0x9a, 0x05, 0x0e, 0x90,
- 0x9a, 0x05, 0x0e, 0xc0, 0x96, 0x05, 0x0e, 0xf1, 0x9a, 0x05, 0x0e, 0xf9,
- 0x92, 0x05, 0x0f, 0x08, 0x96, 0x05, 0x0f, 0x31, 0x9a, 0x05, 0x0f, 0x39,
- 0x92, 0x05, 0x0f, 0x48, 0x9a, 0x05, 0x0f, 0xb0, 0x96, 0x05, 0x10, 0xa1,
- 0x9a, 0x05, 0x10, 0xa9, 0x87, 0x05, 0x10, 0xc2, 0x03, 0x0e, 0x62, 0x96,
- 0x05, 0x10, 0xd1, 0x9a, 0x05, 0x10, 0xd9, 0x92, 0x05, 0x10, 0xe8, 0x91,
- 0x05, 0x11, 0x03, 0x03, 0x0e, 0x6a, 0x96, 0x05, 0x11, 0x51, 0x9a, 0x05,
- 0x11, 0x59, 0x92, 0x05, 0x11, 0x69, 0x94, 0x05, 0x11, 0x7a, 0x03, 0x0e,
- 0x76, 0x96, 0x05, 0x11, 0x11, 0x9a, 0x05, 0x11, 0x19, 0x92, 0x05, 0x11,
- 0x28, 0x96, 0x05, 0x11, 0x31, 0x9a, 0x05, 0x11, 0x39, 0x92, 0x05, 0x11,
- 0x48, 0x9a, 0x05, 0x0f, 0xe1, 0x92, 0x05, 0x0f, 0xf0, 0x9a, 0x05, 0x10,
- 0x19, 0x92, 0x05, 0x10, 0x28, 0x9a, 0x05, 0x10, 0x90, 0x0c, 0xc3, 0x0e,
- 0x7a, 0x0a, 0xc3, 0x0e, 0x85, 0x42, 0x00, 0xe5, 0xc3, 0x0e, 0x98, 0xc2,
- 0x15, 0x1d, 0x00, 0xaa, 0x09, 0xc2, 0x00, 0x6e, 0x00, 0xa5, 0x11, 0x8f,
- 0x00, 0xa5, 0xf8, 0x9b, 0x00, 0xc6, 0x11, 0x91, 0x00, 0xa8, 0xf8, 0x83,
- 0x00, 0xa9, 0x18, 0x8b, 0x00, 0xa8, 0xd8, 0x83, 0x08, 0xd5, 0xd3, 0x03,
- 0x0e, 0xb1, 0x91, 0x08, 0xd5, 0xc3, 0x03, 0x0e, 0xb5, 0x8b, 0x08, 0xd5,
- 0xb2, 0x03, 0x0e, 0xb9, 0x83, 0x08, 0xd5, 0xa3, 0x03, 0x0e, 0xbd, 0x91,
- 0x08, 0xd5, 0x93, 0x03, 0x0e, 0xc1, 0x8b, 0x08, 0xd5, 0x82, 0x03, 0x0e,
- 0xc5, 0xc2, 0x14, 0x40, 0x00, 0xa0, 0xd9, 0xc2, 0x02, 0x84, 0x00, 0xa0,
- 0xb0, 0xc3, 0xe4, 0xab, 0x00, 0xa8, 0x79, 0xc2, 0x01, 0xa6, 0x00, 0xa8,
- 0x53, 0x03, 0x0e, 0xc9, 0xc3, 0x01, 0x21, 0x00, 0xa8, 0x69, 0xc3, 0x14,
- 0x3f, 0x00, 0xa8, 0x21, 0xc2, 0x1b, 0x42, 0x00, 0xa8, 0x59, 0xc3, 0x08,
- 0xcb, 0x00, 0xa8, 0x60, 0x8b, 0x00, 0xac, 0x70, 0x83, 0x00, 0xab, 0xd0,
- 0x91, 0x00, 0xab, 0xc0, 0x8b, 0x00, 0xab, 0xb0, 0x07, 0xc3, 0x0e, 0xcd,
- 0x8b, 0x00, 0xa2, 0xa1, 0x0e, 0xc3, 0x0e, 0xd5, 0x1c, 0x43, 0x0e, 0xec,
- 0xc2, 0x01, 0x09, 0x00, 0xc7, 0x91, 0x83, 0x00, 0xb0, 0xd9, 0x8b, 0x00,
- 0xb0, 0xc9, 0x87, 0x00, 0xb0, 0xbb, 0x03, 0x0f, 0x03, 0x91, 0x00, 0xb0,
- 0xb1, 0x97, 0x00, 0xb0, 0xa1, 0x0c, 0x43, 0x0f, 0x07, 0x19, 0xc3, 0x0f,
- 0x1e, 0x83, 0x00, 0xaf, 0xa3, 0x03, 0x0f, 0x26, 0x8b, 0x00, 0xaf, 0x99,
- 0x87, 0x00, 0xaf, 0x8b, 0x03, 0x0f, 0x2a, 0x91, 0x00, 0xaf, 0x81, 0x97,
- 0x00, 0xaf, 0x79, 0x0a, 0x43, 0x0f, 0x2e, 0x16, 0xc3, 0x0f, 0x45, 0x15,
- 0xc3, 0x0f, 0x60, 0x0a, 0xc3, 0x0f, 0x77, 0x0e, 0x43, 0x0f, 0x8e, 0x83,
- 0x00, 0xb3, 0x31, 0x8b, 0x00, 0xb3, 0x29, 0x87, 0x00, 0xb3, 0x1b, 0x03,
- 0x0f, 0xa9, 0x91, 0x00, 0xb3, 0x11, 0x97, 0x00, 0xb3, 0x08, 0x83, 0x00,
- 0xb0, 0x99, 0x8b, 0x00, 0xb0, 0x91, 0x87, 0x00, 0xb0, 0x83, 0x03, 0x0f,
- 0xad, 0x91, 0x00, 0xb0, 0x79, 0x97, 0x00, 0xb0, 0x70, 0x83, 0x00, 0xb0,
- 0x69, 0x8b, 0x00, 0xb0, 0x61, 0x87, 0x00, 0xb0, 0x53, 0x03, 0x0f, 0xb1,
- 0x91, 0x00, 0xb0, 0x49, 0x97, 0x00, 0xb0, 0x40, 0x83, 0x00, 0xb0, 0x39,
- 0x8b, 0x00, 0xb0, 0x31, 0x87, 0x00, 0xb0, 0x23, 0x03, 0x0f, 0xb5, 0x91,
- 0x00, 0xb0, 0x19, 0x97, 0x00, 0xb0, 0x11, 0x89, 0x00, 0xa6, 0x88, 0x8d,
- 0x00, 0xb0, 0x0b, 0x03, 0x0f, 0xb9, 0x0a, 0x43, 0x0f, 0xd0, 0x83, 0x00,
- 0xaf, 0x69, 0x8b, 0x00, 0xaf, 0x61, 0x87, 0x00, 0xaf, 0x53, 0x03, 0x0f,
- 0xe7, 0x91, 0x00, 0xaf, 0x49, 0x97, 0x00, 0xaf, 0x40, 0x19, 0xc3, 0x0f,
- 0xeb, 0xc2, 0x00, 0x4c, 0x00, 0xa1, 0xb1, 0x8b, 0x00, 0xa1, 0xb8, 0x83,
- 0x00, 0xae, 0xa9, 0x8b, 0x00, 0xae, 0xa1, 0x87, 0x00, 0xae, 0x93, 0x03,
- 0x10, 0x02, 0x91, 0x00, 0xae, 0x89, 0x97, 0x00, 0xae, 0x80, 0x83, 0x00,
- 0xae, 0x79, 0x8b, 0x00, 0xae, 0x71, 0x87, 0x00, 0xae, 0x63, 0x03, 0x10,
- 0x06, 0x91, 0x00, 0xae, 0x59, 0x97, 0x00, 0xae, 0x50, 0x0a, 0xc3, 0x10,
- 0x0a, 0x97, 0x00, 0xb1, 0x11, 0x91, 0x00, 0xb1, 0x19, 0x87, 0x00, 0xb1,
- 0x23, 0x03, 0x10, 0x21, 0x8b, 0x00, 0xb1, 0x31, 0x83, 0x00, 0xb1, 0x38,
- 0xc8, 0xbb, 0xa5, 0x00, 0xb2, 0x38, 0x97, 0x00, 0xb2, 0x01, 0x91, 0x00,
- 0xb2, 0x09, 0x87, 0x00, 0xb2, 0x13, 0x03, 0x10, 0x25, 0x8b, 0x00, 0xb2,
- 0x21, 0x83, 0x00, 0xb2, 0x28, 0x97, 0x00, 0xb2, 0x71, 0x91, 0x00, 0xb2,
- 0x79, 0x87, 0x00, 0xb2, 0x83, 0x03, 0x10, 0x29, 0x8b, 0x00, 0xb2, 0x91,
- 0x83, 0x00, 0xb2, 0x99, 0x8a, 0x00, 0xb2, 0xd2, 0x03, 0x10, 0x2d, 0x83,
- 0x00, 0xc7, 0x38, 0x91, 0x00, 0xc7, 0x30, 0x83, 0x00, 0xab, 0x40, 0x83,
- 0x00, 0xad, 0x68, 0x91, 0x00, 0xad, 0x58, 0x8b, 0x00, 0xad, 0x48, 0x8e,
- 0x00, 0xa7, 0x5b, 0x03, 0x10, 0x44, 0x94, 0x00, 0xaa, 0x8b, 0x03, 0x10,
- 0x5a, 0x16, 0xc3, 0x10, 0x70, 0xc4, 0xe1, 0xf7, 0x00, 0xaa, 0xe1, 0x9b,
- 0x00, 0xaa, 0x03, 0x03, 0x10, 0x7a, 0x15, 0xc3, 0x10, 0x7e, 0x92, 0x00,
- 0xa2, 0x53, 0x03, 0x10, 0x88, 0x42, 0xe4, 0xab, 0xc3, 0x10, 0x8c, 0x19,
- 0xc3, 0x10, 0xa5, 0x42, 0x01, 0xa6, 0xc3, 0x10, 0xbe, 0x8f, 0x00, 0xa5,
- 0xe3, 0x03, 0x10, 0xd7, 0x42, 0x01, 0x08, 0x43, 0x10, 0xdb, 0xc8, 0xb7,
- 0x65, 0x00, 0xb3, 0xf1, 0xc2, 0x00, 0x6e, 0x00, 0xac, 0xfa, 0x03, 0x10,
- 0xe6, 0xc9, 0xaf, 0x01, 0x00, 0xc6, 0xf9, 0x0b, 0x43, 0x10, 0xfa, 0xc9,
- 0xaa, 0x8a, 0x00, 0xc6, 0xf1, 0xd6, 0x2d, 0x8b, 0x00, 0xa1, 0x40, 0x45,
- 0x06, 0xf3, 0xc3, 0x11, 0x06, 0xc7, 0x36, 0xa8, 0x00, 0xa1, 0x50, 0x91,
- 0x00, 0xc6, 0x5b, 0x03, 0x11, 0x12, 0x8b, 0x00, 0xc6, 0x3a, 0x03, 0x11,
- 0x16, 0x96, 0x08, 0x2a, 0xb0, 0x8d, 0x08, 0x2a, 0x80, 0x98, 0x05, 0x5d,
- 0xc1, 0x97, 0x05, 0x5d, 0xb9, 0x91, 0x05, 0x5d, 0xb1, 0x8b, 0x05, 0x5d,
- 0xa9, 0x83, 0x05, 0x5d, 0x99, 0x87, 0x05, 0x5d, 0xa0, 0x98, 0x05, 0x5d,
- 0x91, 0x83, 0x05, 0x5d, 0x69, 0x87, 0x05, 0x5d, 0x71, 0x97, 0x05, 0x5d,
- 0x89, 0x8b, 0x05, 0x5d, 0x79, 0x91, 0x05, 0x5d, 0x80, 0x8a, 0x05, 0x5c,
- 0x80, 0x8a, 0x00, 0x9e, 0x00, 0x83, 0x00, 0x9e, 0xe9, 0x87, 0x00, 0x9e,
- 0xf1, 0x8b, 0x00, 0x9e, 0xf9, 0x91, 0x00, 0x9f, 0x01, 0x97, 0x00, 0x9f,
- 0x09, 0x98, 0x00, 0x9f, 0x10, 0x83, 0x00, 0x9f, 0x19, 0x87, 0x00, 0x9f,
- 0x21, 0x8b, 0x00, 0x9f, 0x29, 0x91, 0x00, 0x9f, 0x31, 0x97, 0x00, 0x9f,
- 0x39, 0x98, 0x00, 0x9f, 0x40, 0xcc, 0x87, 0xdc, 0x00, 0x24, 0x21, 0xc5,
- 0xc5, 0x6b, 0x05, 0x33, 0xe8, 0x00, 0x43, 0x11, 0x1a, 0x88, 0x05, 0x34,
- 0xf1, 0x8e, 0x01, 0x6f, 0x39, 0x8f, 0x01, 0x6f, 0x41, 0x90, 0x01, 0x6f,
- 0x49, 0x94, 0x01, 0x6f, 0x61, 0x95, 0x01, 0x6f, 0x6a, 0x03, 0x11, 0x26,
- 0x48, 0xbd, 0xbd, 0xc3, 0x11, 0x2c, 0x87, 0x01, 0x6f, 0xb8, 0xcb, 0x05,
- 0xdd, 0x05, 0x33, 0xa0, 0x8b, 0x05, 0x33, 0xb1, 0xc3, 0x70, 0xed, 0x05,
- 0x33, 0xc9, 0xc2, 0x00, 0x9e, 0x01, 0x6f, 0xd1, 0x97, 0x01, 0x6f, 0xd8,
- 0xc7, 0x86, 0x25, 0x05, 0x33, 0xc0, 0xc8, 0x81, 0x8c, 0x05, 0x33, 0xd1,
- 0x0a, 0x43, 0x11, 0x3a, 0xc4, 0x70, 0xd8, 0x05, 0x33, 0xd8, 0x87, 0x01,
- 0x6f, 0x01, 0xc6, 0xc7, 0x4d, 0x01, 0x6f, 0xf0, 0x87, 0x01, 0x6f, 0x19,
- 0xc4, 0xde, 0xcb, 0x01, 0x6f, 0xc0, 0xc2, 0x01, 0xf0, 0x01, 0x6f, 0x21,
- 0x87, 0x01, 0x6f, 0x28, 0x87, 0x01, 0x6f, 0x71, 0xc2, 0x02, 0x60, 0x01,
- 0x6f, 0x80, 0xc6, 0x01, 0xf6, 0x00, 0x18, 0xa8, 0xc7, 0xc7, 0x4c, 0x0f,
- 0x01, 0x49, 0xc9, 0xa9, 0x8e, 0x0f, 0x01, 0x38, 0x14, 0xc3, 0x11, 0x44,
+ 0x1b, 0x02, 0xe2, 0x26, 0xa0, 0x01, 0x40, 0x2b, 0x02, 0xe2, 0x4d, 0xa1,
+ 0x01, 0x40, 0x4b, 0x02, 0xe2, 0x6d, 0xa2, 0x01, 0x40, 0x8b, 0x02, 0xe2,
+ 0x86, 0xa3, 0x01, 0x41, 0x0b, 0x02, 0xe2, 0x98, 0xa5, 0x01, 0x44, 0x09,
+ 0xa4, 0x01, 0x42, 0x0a, 0x02, 0xe2, 0xa3, 0xa0, 0x01, 0x40, 0x33, 0x02,
+ 0xe2, 0xa7, 0xa1, 0x01, 0x40, 0x53, 0x02, 0xe2, 0xc7, 0xa2, 0x01, 0x40,
+ 0x93, 0x02, 0xe2, 0xe0, 0xa3, 0x01, 0x41, 0x13, 0x02, 0xe2, 0xf2, 0xa5,
+ 0x01, 0x44, 0x11, 0xa4, 0x01, 0x42, 0x12, 0x02, 0xe2, 0xfd, 0xa1, 0x01,
+ 0x40, 0x63, 0x02, 0xe3, 0x01, 0xa2, 0x01, 0x40, 0xa3, 0x02, 0xe3, 0x1a,
+ 0xa3, 0x01, 0x41, 0x23, 0x02, 0xe3, 0x2c, 0xa5, 0x01, 0x44, 0x21, 0xa4,
+ 0x01, 0x42, 0x22, 0x02, 0xe3, 0x37, 0xa2, 0x01, 0x40, 0xc3, 0x02, 0xe3,
+ 0x3b, 0xa3, 0x01, 0x41, 0x43, 0x02, 0xe3, 0x4d, 0xa5, 0x01, 0x44, 0x41,
+ 0xa4, 0x01, 0x42, 0x42, 0x02, 0xe3, 0x58, 0xa3, 0x01, 0x41, 0x83, 0x02,
+ 0xe3, 0x5c, 0xa5, 0x01, 0x44, 0x81, 0xa4, 0x01, 0x42, 0x82, 0x02, 0xe3,
+ 0x67, 0xa5, 0x01, 0x45, 0x01, 0xa4, 0x01, 0x43, 0x02, 0x02, 0xe3, 0x6b,
+ 0xc8, 0x4f, 0xa2, 0x08, 0x83, 0x29, 0xc7, 0x0d, 0x7f, 0x08, 0x83, 0x20,
+ 0xc2, 0x0d, 0x8b, 0x08, 0x83, 0x08, 0xc2, 0x0d, 0x8b, 0x08, 0x83, 0x00,
+ 0xc3, 0x41, 0xca, 0x08, 0x82, 0xf9, 0xc2, 0x00, 0x29, 0x08, 0x82, 0xb0,
+ 0xc3, 0x0d, 0x8a, 0x08, 0x82, 0xf1, 0xc2, 0x00, 0xd3, 0x08, 0x82, 0xa8,
+ 0xc4, 0x0d, 0x89, 0x08, 0x82, 0xe9, 0xc3, 0x05, 0xdf, 0x08, 0x82, 0xa0,
+ 0xc4, 0x15, 0xa9, 0x08, 0x82, 0xe1, 0x91, 0x08, 0x82, 0x98, 0x42, 0x00,
+ 0x30, 0xc2, 0xe3, 0x6f, 0x46, 0x2f, 0xd9, 0xc2, 0xe3, 0x79, 0xc4, 0xea,
+ 0x3b, 0x08, 0x81, 0xb9, 0xc3, 0x28, 0x0b, 0x08, 0x81, 0xb0, 0xc2, 0x01,
+ 0x0e, 0x08, 0x81, 0x01, 0x83, 0x08, 0x80, 0xf8, 0xc2, 0x01, 0x0e, 0x08,
+ 0x80, 0xf1, 0x83, 0x08, 0x80, 0xe8, 0x8e, 0x08, 0x80, 0x6b, 0x02, 0xe3,
+ 0x81, 0x94, 0x08, 0x80, 0x5a, 0x02, 0xe3, 0x85, 0x4f, 0x69, 0x6f, 0x42,
+ 0xe3, 0x89, 0x97, 0x08, 0x82, 0x29, 0x8b, 0x08, 0x82, 0x19, 0x83, 0x08,
+ 0x81, 0xc0, 0x8e, 0x08, 0x82, 0x03, 0x02, 0xe3, 0x91, 0x94, 0x08, 0x81,
+ 0xf2, 0x02, 0xe3, 0x95, 0x97, 0x08, 0x81, 0xe8, 0x8b, 0x08, 0x81, 0xd8,
+ 0xc4, 0x15, 0xa7, 0x08, 0x83, 0x69, 0xc2, 0x22, 0x45, 0x08, 0x83, 0x60,
+ 0xc3, 0x0d, 0x8f, 0x08, 0x83, 0x59, 0xc3, 0x08, 0xde, 0x08, 0x83, 0x50,
+ 0xc4, 0x05, 0xde, 0x08, 0x83, 0x49, 0xc2, 0x0a, 0x20, 0x08, 0x83, 0x40,
+ 0x44, 0xe9, 0xab, 0xc2, 0xe3, 0x99, 0x4e, 0x61, 0x5a, 0xc2, 0xe3, 0xa5,
+ 0xc8, 0xa1, 0xd4, 0x0e, 0x80, 0xb0, 0xc4, 0x97, 0x8f, 0x0e, 0x87, 0x99,
+ 0xc4, 0xe5, 0xc7, 0x0e, 0x87, 0x89, 0xc3, 0x8a, 0xb3, 0x0e, 0x82, 0x78,
+ 0x44, 0xe9, 0xab, 0xc2, 0xe3, 0xb1, 0xc8, 0xa1, 0xd4, 0x0e, 0x80, 0xe0,
+ 0x00, 0xc2, 0xe3, 0xc3, 0xc2, 0x00, 0x44, 0x0e, 0x81, 0x90, 0xc8, 0xbb,
+ 0xcb, 0x0e, 0x82, 0xa1, 0xc8, 0xb5, 0x11, 0x0e, 0x82, 0x60, 0x42, 0x03,
+ 0xac, 0xc2, 0xe3, 0xcd, 0x95, 0x0e, 0x80, 0x8a, 0x02, 0xe3, 0xd9, 0xc3,
+ 0x73, 0xe8, 0x0e, 0x84, 0x21, 0xc8, 0xa1, 0xd4, 0x0e, 0x81, 0x10, 0x16,
+ 0xc2, 0xe3, 0xdd, 0xc7, 0xcd, 0xaa, 0x0e, 0x87, 0x18, 0x16, 0xc2, 0xe3,
+ 0xe9, 0xc7, 0xcd, 0xaa, 0x0e, 0x86, 0xf8, 0xc3, 0x73, 0xe8, 0x0e, 0x83,
+ 0x29, 0xcc, 0x85, 0x14, 0x0e, 0x81, 0x59, 0xc8, 0xa1, 0xd4, 0x0e, 0x81,
+ 0x50, 0x4f, 0x61, 0x59, 0x42, 0xe3, 0xf5, 0xc7, 0xcd, 0xd4, 0x0e, 0x86,
+ 0xe9, 0xc5, 0xd6, 0x8f, 0x0e, 0x86, 0xe1, 0x46, 0xd7, 0x1e, 0x42, 0xe4,
+ 0x01, 0x42, 0x00, 0x47, 0xc2, 0xe4, 0x0d, 0xcc, 0x2c, 0x89, 0x0e, 0x86,
+ 0x78, 0xd5, 0x38, 0x65, 0x0e, 0x86, 0xb9, 0xc8, 0x2c, 0x8d, 0x0e, 0x86,
+ 0x68, 0xc6, 0xd6, 0x8e, 0x0e, 0x80, 0x58, 0xc6, 0xd9, 0x34, 0x0e, 0x86,
+ 0x31, 0xc5, 0x1d, 0x23, 0x0e, 0x86, 0x28, 0x42, 0x03, 0xac, 0xc2, 0xe4,
+ 0x19, 0xc3, 0x07, 0x05, 0x0e, 0x85, 0xd8, 0xc2, 0x00, 0xe5, 0x0e, 0x85,
+ 0xc1, 0x83, 0x0e, 0x81, 0xa8, 0xce, 0x6e, 0x10, 0x0e, 0x85, 0x99, 0xc5,
+ 0x6e, 0x19, 0x0e, 0x85, 0x58, 0xcb, 0x9a, 0xb8, 0x0e, 0x85, 0x91, 0xc7,
+ 0x6e, 0x17, 0x0e, 0x85, 0x10, 0xcd, 0x7a, 0x4a, 0x0e, 0x85, 0x49, 0xc5,
+ 0x6e, 0x19, 0x0e, 0x85, 0x40, 0xc6, 0x8f, 0xde, 0x0e, 0x85, 0x39, 0xc9,
+ 0x6e, 0x15, 0x0e, 0x85, 0x30, 0xca, 0x91, 0x03, 0x0e, 0x83, 0x71, 0xc8,
+ 0xbe, 0x2b, 0x0e, 0x83, 0x58, 0xc3, 0x73, 0xe8, 0x0e, 0x83, 0x19, 0x03,
+ 0x42, 0xe4, 0x25, 0xc7, 0xc9, 0x04, 0x0e, 0x83, 0xc1, 0x48, 0xc1, 0x7b,
+ 0x42, 0xe4, 0x31, 0xcf, 0x69, 0xd8, 0x0e, 0x84, 0x69, 0xcc, 0x8b, 0x08,
+ 0x0e, 0x84, 0x60, 0xc4, 0x7f, 0xdc, 0x0e, 0x82, 0xd0, 0xc3, 0x73, 0xe8,
+ 0x0e, 0x82, 0xf9, 0xc8, 0xa1, 0xd4, 0x0e, 0x81, 0xe8, 0x00, 0x42, 0xe4,
+ 0x3d, 0xc9, 0xb5, 0x10, 0x0e, 0x82, 0x59, 0x8b, 0x0e, 0x82, 0x48, 0x5b,
+ 0x17, 0xec, 0xc2, 0xe4, 0x49, 0x46, 0x05, 0x07, 0x42, 0xe4, 0x55, 0xc6,
+ 0x01, 0xe9, 0x01, 0x3a, 0x89, 0xc6, 0x03, 0xfa, 0x0f, 0xa9, 0xf0, 0xc6,
+ 0x01, 0xa1, 0x0f, 0xda, 0x09, 0xc5, 0x00, 0x47, 0x0f, 0xda, 0x10, 0x55,
+ 0x17, 0x86, 0xc2, 0xe4, 0x67, 0x48, 0x01, 0x93, 0xc2, 0xe4, 0x79, 0x4a,
+ 0x12, 0xcc, 0x42, 0xe4, 0x85, 0xc7, 0x18, 0x1b, 0x01, 0x52, 0x91, 0x45,
+ 0x02, 0x93, 0x42, 0xe4, 0x91, 0xc7, 0x79, 0xb4, 0x01, 0x52, 0xf1, 0xc8,
+ 0x50, 0x0d, 0x01, 0x53, 0x00, 0x42, 0x00, 0x3c, 0xc2, 0xe4, 0x9d, 0x09,
+ 0x42, 0xe4, 0xaf, 0xd3, 0x16, 0x44, 0x01, 0x4c, 0x99, 0x49, 0x03, 0x91,
+ 0x42, 0xe4, 0xbe, 0x49, 0x04, 0x13, 0xc2, 0xe4, 0xca, 0xcc, 0x04, 0x1b,
+ 0x0f, 0xdc, 0x61, 0xc6, 0x03, 0xfa, 0x0f, 0xc8, 0x3b, 0x02, 0xe4, 0xd0,
+ 0x42, 0x01, 0x22, 0xc2, 0xe4, 0xd6, 0xcb, 0x96, 0x4b, 0x0f, 0xdd, 0x91,
+ 0xc6, 0xa1, 0x04, 0x0f, 0xdd, 0xc8, 0xd0, 0x60, 0xaf, 0x0f, 0xc2, 0xc1,
+ 0xd1, 0x51, 0xbe, 0x01, 0x0f, 0xf9, 0xc5, 0x00, 0x62, 0x01, 0x0c, 0xa3,
+ 0x02, 0xe4, 0xe2, 0xcc, 0x82, 0x80, 0x01, 0x0e, 0xa3, 0x02, 0xe4, 0xe6,
+ 0x19, 0xc2, 0xe4, 0xec, 0xcb, 0x99, 0xe7, 0x01, 0x58, 0x61, 0xd5, 0x00,
+ 0x52, 0x01, 0x5b, 0x20, 0xcc, 0x05, 0x1b, 0x01, 0x2c, 0x79, 0xcd, 0x15,
+ 0x72, 0x01, 0x2c, 0x70, 0x00, 0x42, 0xe4, 0xf8, 0x44, 0x05, 0xdf, 0xc2,
+ 0xe5, 0x0a, 0xcc, 0x8a, 0xf0, 0x0f, 0xaf, 0x61, 0xde, 0x01, 0x49, 0x0f,
+ 0xde, 0x08, 0x44, 0x00, 0x54, 0xc2, 0xe5, 0x16, 0xd3, 0x42, 0xb6, 0x01,
+ 0x70, 0x48, 0xd0, 0x49, 0x5e, 0x01, 0x2c, 0x59, 0xc7, 0xb3, 0x98, 0x01,
+ 0x4b, 0xe0, 0xd1, 0x43, 0x9c, 0x01, 0x2c, 0x49, 0xd0, 0x07, 0x57, 0x01,
+ 0x16, 0x58, 0x00, 0x42, 0xe5, 0x22, 0xd3, 0x05, 0xf4, 0x01, 0x00, 0xc1,
+ 0xd0, 0x5b, 0x1f, 0x01, 0x71, 0x30, 0xca, 0xa5, 0xf6, 0x01, 0x1c, 0xe9,
+ 0xc9, 0x50, 0xc7, 0x01, 0x1c, 0xe1, 0xca, 0x9f, 0x70, 0x01, 0x1c, 0xd8,
+ 0xce, 0x00, 0xf0, 0x01, 0x00, 0xe1, 0xcc, 0x85, 0x80, 0x01, 0x4e, 0xd1,
+ 0xcb, 0x1c, 0xe0, 0x01, 0x71, 0x41, 0xcd, 0x0f, 0x83, 0x01, 0x80, 0x50,
+ 0xcb, 0x1c, 0xe0, 0x01, 0x4c, 0x29, 0x05, 0xc2, 0xe5, 0x3a, 0xd2, 0x23,
+ 0x42, 0x01, 0x80, 0xb1, 0xd6, 0x0a, 0xe8, 0x01, 0x80, 0xc1, 0xce, 0x26,
+ 0x2e, 0x01, 0x80, 0xd0, 0xd6, 0x0a, 0xe8, 0x01, 0x4c, 0xb9, 0xd2, 0x23,
+ 0x42, 0x01, 0x80, 0x80, 0x50, 0x60, 0xcf, 0xc2, 0xe5, 0x46, 0x4e, 0x74,
+ 0x5a, 0x42, 0xe5, 0x52, 0xda, 0x1d, 0x28, 0x0f, 0xc4, 0xa0, 0x44, 0x00,
+ 0x55, 0xc2, 0xe5, 0x5e, 0x44, 0x15, 0xd2, 0x42, 0xe5, 0x6a, 0xce, 0x74,
+ 0x3e, 0x01, 0x0c, 0xf1, 0x49, 0x6b, 0xf4, 0x42, 0xe5, 0x76, 0x00, 0x42,
+ 0xe5, 0x82, 0x44, 0x00, 0x62, 0xc2, 0xe5, 0x9f, 0xca, 0x54, 0x07, 0x01,
+ 0x48, 0x60, 0xcb, 0x76, 0x63, 0x01, 0x0e, 0xe1, 0xca, 0x89, 0xae, 0x0f,
+ 0xc1, 0xc0, 0x46, 0x02, 0x12, 0xc2, 0xe5, 0xac, 0xc2, 0x00, 0x3b, 0x0f,
+ 0xd7, 0x90, 0xd0, 0x5c, 0xef, 0x0f, 0xc2, 0x01, 0xc5, 0x00, 0x62, 0x0f,
+ 0xc2, 0x20, 0xc5, 0x00, 0x62, 0x01, 0x58, 0x29, 0xd3, 0x40, 0xc8, 0x01,
+ 0x5c, 0x40, 0xc6, 0x0e, 0xdf, 0x01, 0x53, 0xf9, 0xc5, 0x03, 0x50, 0x01,
+ 0x54, 0x0a, 0x02, 0xe5, 0xb8, 0xc8, 0x25, 0x71, 0x01, 0x54, 0x69, 0xd2,
+ 0x06, 0xf5, 0x01, 0x54, 0x78, 0xe0, 0x08, 0x47, 0x01, 0x54, 0x98, 0xe0,
+ 0x0a, 0xe7, 0x01, 0x3b, 0x98, 0xc4, 0x10, 0x64, 0x01, 0x5e, 0x61, 0xc4,
+ 0x0e, 0xa5, 0x0f, 0xbe, 0x20, 0xcf, 0x18, 0x2e, 0x0f, 0xbd, 0x79, 0xd2,
+ 0x25, 0x52, 0x0f, 0xbe, 0x48, 0xc2, 0x00, 0x2c, 0x05, 0x27, 0xc1, 0xc3,
+ 0xeb, 0xa0, 0x05, 0x27, 0xd1, 0xc2, 0x01, 0xb4, 0x05, 0x27, 0xd9, 0xc2,
+ 0x01, 0x01, 0x05, 0x27, 0xe1, 0xc3, 0xec, 0x51, 0x05, 0x27, 0xe8, 0xdd,
+ 0x12, 0x1b, 0x01, 0x50, 0x99, 0xdc, 0x12, 0x8e, 0x01, 0x50, 0x90, 0x1e,
+ 0xc2, 0xe5, 0xbe, 0x1d, 0xc2, 0xe5, 0xe8, 0xc7, 0xc9, 0xb3, 0x08, 0x3a,
+ 0xa1, 0xc5, 0xe2, 0xa6, 0x08, 0x3a, 0xa8, 0x23, 0xc2, 0xe6, 0x36, 0x1d,
+ 0xc2, 0xe6, 0x4a, 0x1e, 0xc2, 0xe6, 0x6a, 0x1f, 0xc2, 0xe6, 0x92, 0x20,
+ 0xc2, 0xe6, 0xb6, 0x21, 0xc2, 0xe6, 0xc2, 0x22, 0x42, 0xe6, 0xe2, 0x9d,
+ 0x08, 0x3b, 0x01, 0x9e, 0x08, 0x3b, 0x09, 0x9f, 0x08, 0x3b, 0x11, 0xa0,
+ 0x08, 0x3b, 0x19, 0xa1, 0x08, 0x3b, 0x21, 0xa2, 0x08, 0x3b, 0x29, 0xa3,
+ 0x08, 0x3b, 0x31, 0xa4, 0x08, 0x3b, 0x38, 0x1d, 0xc2, 0xe7, 0x06, 0x1e,
+ 0x42, 0xe7, 0x2a, 0xc6, 0xd2, 0x56, 0x08, 0x32, 0x39, 0xc3, 0xeb, 0xbb,
+ 0x08, 0x32, 0x79, 0xc3, 0xeb, 0xdc, 0x08, 0x32, 0x50, 0x1d, 0xc2, 0xe7,
+ 0x50, 0x1e, 0xc2, 0xe7, 0x74, 0x1f, 0xc2, 0xe7, 0x9c, 0x20, 0xc2, 0xe7,
+ 0xc4, 0x21, 0xc2, 0xe7, 0xec, 0x22, 0xc2, 0xe8, 0x14, 0x23, 0xc2, 0xe8,
+ 0x3c, 0x24, 0x42, 0xe8, 0x64, 0x1d, 0xc2, 0xe8, 0x6c, 0x1e, 0x42, 0xe8,
+ 0xa8, 0x1d, 0xc2, 0xe8, 0xde, 0x1e, 0xc2, 0xe8, 0xfe, 0x1f, 0xc2, 0xe9,
+ 0x16, 0x20, 0xc2, 0xe9, 0x3a, 0x21, 0xc2, 0xe9, 0x5e, 0x22, 0xc2, 0xe9,
+ 0x7a, 0x23, 0xc2, 0xe9, 0x9e, 0x24, 0xc2, 0xe9, 0xb6, 0x25, 0xc2, 0xe9,
+ 0xde, 0x26, 0x42, 0xea, 0x06, 0x49, 0xb4, 0xd1, 0xc2, 0xea, 0x1e, 0x47,
+ 0xc7, 0x28, 0x42, 0xea, 0x46, 0x04, 0xc2, 0xea, 0x6e, 0x48, 0xbc, 0x83,
+ 0x42, 0xea, 0x76, 0x1e, 0xc2, 0xea, 0x86, 0xc9, 0xb3, 0x7b, 0x08, 0x06,
+ 0x90, 0x83, 0x00, 0xc9, 0xa1, 0xc2, 0x07, 0x69, 0x00, 0xc9, 0x88, 0x91,
+ 0x00, 0xc9, 0x28, 0x87, 0x00, 0xc9, 0x18, 0x97, 0x00, 0xc9, 0x31, 0x8b,
+ 0x00, 0xc9, 0x20, 0xc6, 0x01, 0xe9, 0x0f, 0xbf, 0x59, 0xc6, 0x03, 0xfa,
+ 0x0f, 0xbf, 0x20, 0xc7, 0x3f, 0x7b, 0x0f, 0xa9, 0xb9, 0xc6, 0x03, 0xfa,
+ 0x0f, 0xa9, 0xa9, 0xc6, 0x01, 0xe9, 0x0f, 0xbf, 0x30, 0xdf, 0x0c, 0x45,
+ 0x08, 0x59, 0xf9, 0xdd, 0x10, 0x2e, 0x08, 0x59, 0xe8, 0xc7, 0x3f, 0x7b,
+ 0x0f, 0xa9, 0xb1, 0xc6, 0x03, 0xfa, 0x0f, 0xbf, 0x01, 0xc6, 0x01, 0xe9,
+ 0x0f, 0xbf, 0x38, 0xdf, 0x0d, 0x3d, 0x08, 0x59, 0xf1, 0xdd, 0x0b, 0xea,
+ 0x08, 0x59, 0xe0, 0x96, 0x00, 0x03, 0xa3, 0x02, 0xea, 0x94, 0x95, 0x00,
+ 0x03, 0x9b, 0x02, 0xea, 0xce, 0x94, 0x00, 0x03, 0x93, 0x02, 0xea, 0xf2,
+ 0x90, 0x00, 0x03, 0x73, 0x02, 0xeb, 0x0b, 0x8e, 0x00, 0x03, 0x63, 0x02,
+ 0xeb, 0x19, 0x86, 0x00, 0x03, 0x23, 0x02, 0xeb, 0x48, 0x85, 0x00, 0x03,
+ 0x1b, 0x02, 0xeb, 0x69, 0x91, 0x00, 0x03, 0x7b, 0x02, 0xeb, 0x8d, 0x8b,
+ 0x00, 0x03, 0x4b, 0x02, 0xeb, 0xb1, 0x87, 0x00, 0x03, 0x2b, 0x02, 0xeb,
+ 0xc5, 0x88, 0x00, 0x03, 0x33, 0x02, 0xeb, 0xf3, 0x9b, 0x00, 0x03, 0xcb,
+ 0x02, 0xec, 0x02, 0x8f, 0x00, 0x03, 0x6b, 0x02, 0xec, 0x0e, 0x97, 0x00,
+ 0x03, 0xab, 0x02, 0xec, 0x20, 0x83, 0x00, 0x03, 0x0b, 0x02, 0xec, 0x3d,
+ 0x99, 0x00, 0x03, 0xbb, 0x02, 0xec, 0x6e, 0x8a, 0x00, 0x03, 0x43, 0x02,
+ 0xec, 0x74, 0x9c, 0x00, 0x03, 0xd3, 0x02, 0xec, 0x8d, 0x9a, 0x00, 0x03,
+ 0xc3, 0x02, 0xec, 0x93, 0x98, 0x00, 0x03, 0xb3, 0x02, 0xec, 0x99, 0x92,
+ 0x00, 0x03, 0x83, 0x02, 0xec, 0xb5, 0x8d, 0x00, 0x03, 0x5b, 0x02, 0xec,
+ 0xc1, 0x89, 0x00, 0x03, 0x3b, 0x02, 0xec, 0xcd, 0x84, 0x00, 0x03, 0x13,
+ 0x02, 0xec, 0xe5, 0x8c, 0x00, 0x03, 0x53, 0x02, 0xed, 0x07, 0x93, 0x00,
+ 0x03, 0x8a, 0x02, 0xed, 0x0d, 0xc2, 0x00, 0x15, 0x07, 0xd8, 0x31, 0xc8,
+ 0xbe, 0x0b, 0x07, 0xd8, 0x29, 0x08, 0xc2, 0xed, 0x19, 0xc2, 0x00, 0x0b,
+ 0x00, 0x09, 0x99, 0xc2, 0x09, 0x06, 0x00, 0x0a, 0x98, 0x46, 0x41, 0xe6,
+ 0x42, 0xed, 0x28, 0x46, 0x01, 0xab, 0x42, 0xed, 0x3c, 0xc2, 0x23, 0x6a,
+ 0x00, 0xe9, 0x19, 0xc2, 0x01, 0x47, 0x00, 0xe8, 0x30, 0x48, 0x11, 0xae,
+ 0xc2, 0xed, 0x48, 0xcf, 0x6a, 0x32, 0x05, 0x5a, 0x31, 0xc2, 0x01, 0x0a,
+ 0x05, 0x3b, 0xb0, 0x97, 0x00, 0xe8, 0xa9, 0xc5, 0xda, 0xa9, 0x00, 0xe8,
+ 0x81, 0x87, 0x00, 0x13, 0xb0, 0xc7, 0xce, 0x6e, 0x00, 0xe8, 0x18, 0x87,
+ 0x00, 0xe8, 0x08, 0xca, 0x1e, 0x1b, 0x00, 0x14, 0xd8, 0xc9, 0xac, 0xfa,
+ 0x00, 0x14, 0x08, 0x46, 0x01, 0xab, 0xc2, 0xed, 0x50, 0xc3, 0xce, 0x96,
+ 0x00, 0x10, 0xe0, 0x44, 0x01, 0x8d, 0xc2, 0xed, 0x87, 0x46, 0x01, 0xab,
+ 0x42, 0xed, 0x93, 0x00, 0xc2, 0xed, 0xa5, 0xc6, 0x12, 0x65, 0x00, 0x0d,
+ 0x88, 0x46, 0x01, 0xab, 0xc2, 0xed, 0xb1, 0x91, 0x05, 0x3a, 0x71, 0xc4,
+ 0x73, 0xe1, 0x05, 0x3d, 0xb1, 0xcb, 0x94, 0x25, 0x05, 0x3e, 0x01, 0x44,
+ 0x03, 0xf6, 0xc2, 0xed, 0xfc, 0x8b, 0x00, 0x0d, 0x11, 0x97, 0x00, 0x11,
+ 0x10, 0x46, 0x01, 0xab, 0xc2, 0xee, 0x04, 0x95, 0x05, 0x3b, 0x61, 0x47,
+ 0x68, 0x4a, 0xc2, 0xee, 0x45, 0xc3, 0x00, 0xf2, 0x00, 0x0c, 0xb0, 0x46,
+ 0x01, 0xab, 0xc2, 0xee, 0x5d, 0x4e, 0x70, 0xa2, 0xc2, 0xee, 0xa1, 0x96,
+ 0x05, 0x3b, 0x53, 0x02, 0xee, 0xad, 0xc2, 0x00, 0x56, 0x00, 0x0a, 0x51,
+ 0xc2, 0x01, 0x0d, 0x00, 0x0d, 0x49, 0xc2, 0x23, 0x6a, 0x00, 0x0d, 0xba,
+ 0x02, 0xee, 0xb1, 0x46, 0x01, 0xab, 0xc2, 0xee, 0xb5, 0x87, 0x00, 0x06,
+ 0x33, 0x02, 0xee, 0xfc, 0x83, 0x05, 0x39, 0x91, 0x91, 0x05, 0x39, 0xa1,
+ 0x97, 0x05, 0x39, 0xb1, 0x98, 0x05, 0x39, 0xc3, 0x02, 0xef, 0x02, 0x9b,
+ 0x05, 0x39, 0xe1, 0xca, 0xa7, 0xf4, 0x05, 0x3e, 0x11, 0xc4, 0xdd, 0x2f,
+ 0x01, 0x63, 0x69, 0xc8, 0xc1, 0xfb, 0x00, 0x0c, 0x48, 0xc6, 0xa8, 0x6d,
+ 0x00, 0xf4, 0xf1, 0x46, 0x01, 0xab, 0xc2, 0xef, 0x08, 0xc7, 0xc4, 0x0a,
+ 0x05, 0x3c, 0x59, 0x05, 0xc2, 0xef, 0x2b, 0xc8, 0xc1, 0xdb, 0x05, 0x3e,
+ 0xc1, 0x45, 0x00, 0x3f, 0x42, 0xef, 0x37, 0x46, 0x01, 0xab, 0x42, 0xef,
+ 0x43, 0x47, 0x09, 0x72, 0x42, 0xef, 0x67, 0x46, 0x01, 0xab, 0xc2, 0xef,
+ 0x73, 0xc3, 0x99, 0xd7, 0x00, 0x0f, 0xb8, 0x46, 0x01, 0xab, 0xc2, 0xef,
+ 0x8f, 0x9b, 0x05, 0x3b, 0x01, 0xcb, 0x93, 0xee, 0x05, 0x3b, 0x11, 0xc3,
+ 0x03, 0x02, 0x05, 0x3b, 0x41, 0x47, 0xc6, 0x80, 0x42, 0xef, 0x9f, 0x46,
+ 0x01, 0xab, 0xc2, 0xef, 0xb1, 0x9c, 0x05, 0x39, 0x41, 0xc7, 0xc5, 0xa7,
+ 0x05, 0x39, 0x51, 0xc4, 0x26, 0xcf, 0x00, 0x06, 0xf3, 0x02, 0xef, 0xd1,
+ 0x46, 0x41, 0xe6, 0xc2, 0xef, 0xda, 0x44, 0x05, 0x17, 0x42, 0xef, 0xff,
+ 0x00, 0xc2, 0xf0, 0x11, 0x48, 0x11, 0xae, 0xc2, 0xf0, 0x1d, 0xca, 0xa9,
+ 0xde, 0x05, 0x3a, 0xe0, 0x46, 0x01, 0xab, 0x42, 0xf0, 0x33, 0x46, 0x01,
+ 0xab, 0xc2, 0xf0, 0x4f, 0x8c, 0x00, 0x0e, 0x50, 0x46, 0x01, 0xab, 0xc2,
+ 0xf0, 0x79, 0x8c, 0x00, 0x0e, 0x38, 0x46, 0x01, 0xab, 0x42, 0xf0, 0xa3,
+ 0x46, 0x01, 0xab, 0xc2, 0xf0, 0xcc, 0xc4, 0xd4, 0x4c, 0x00, 0x0f, 0xb1,
+ 0xc3, 0x0b, 0xa3, 0x05, 0x39, 0x31, 0xc5, 0xd6, 0x47, 0x01, 0x63, 0xa8,
+ 0x46, 0x01, 0xab, 0xc2, 0xf0, 0xe6, 0x47, 0x26, 0x0d, 0xc2, 0xf1, 0x14,
+ 0xc4, 0x33, 0x51, 0x00, 0x0c, 0xa1, 0xc2, 0x01, 0x0e, 0x00, 0x0d, 0x10,
+ 0x46, 0x01, 0xab, 0x42, 0xf1, 0x26, 0x46, 0x01, 0xab, 0xc2, 0xf1, 0x38,
+ 0x9c, 0x00, 0x0f, 0x8a, 0x02, 0xf1, 0x58, 0x46, 0x01, 0xab, 0xc2, 0xf1,
+ 0x5e, 0xc2, 0x00, 0x0a, 0x05, 0x3d, 0x99, 0xc8, 0xbf, 0x33, 0x05, 0x39,
+ 0x63, 0x02, 0xf1, 0x86, 0xc2, 0x00, 0xe5, 0x05, 0x3b, 0x71, 0xcf, 0x68,
+ 0x43, 0x05, 0x3e, 0x80, 0x46, 0x01, 0xab, 0xc2, 0xf1, 0x8c, 0xc3, 0x0b,
+ 0x47, 0x05, 0x3d, 0xa1, 0xc7, 0xca, 0x85, 0x05, 0x3a, 0x30, 0x46, 0x01,
+ 0xab, 0x42, 0xf1, 0xb0, 0x46, 0x01, 0xab, 0x42, 0xf1, 0xba, 0xc4, 0xab,
+ 0x64, 0x00, 0x74, 0x11, 0xc3, 0x2b, 0x94, 0x00, 0x74, 0x20, 0xc2, 0x13,
+ 0xf3, 0x00, 0x76, 0xf1, 0xc3, 0x48, 0x2a, 0x00, 0x76, 0xf8, 0xc2, 0x1a,
+ 0x36, 0x00, 0x74, 0x71, 0xc2, 0x01, 0x01, 0x00, 0x74, 0x98, 0x83, 0x00,
+ 0x74, 0x79, 0xc2, 0x01, 0x0e, 0x00, 0x74, 0x80, 0x06, 0xc2, 0xf1, 0xc6,
+ 0xc2, 0x01, 0x0e, 0x00, 0x74, 0xc0, 0xc5, 0x00, 0x47, 0x0f, 0xda, 0xa9,
+ 0xc6, 0x01, 0xa1, 0x0f, 0xda, 0xa1, 0xcc, 0x06, 0x2b, 0x0f, 0xdb, 0x38,
+ 0x46, 0x00, 0xc7, 0xc2, 0xf1, 0xd0, 0xd2, 0x4f, 0x20, 0x0f, 0xdb, 0x18,
+ 0xd2, 0x4f, 0x20, 0x0f, 0xdb, 0x11, 0x46, 0x00, 0xc7, 0x42, 0xf1, 0xdc,
+ 0xc6, 0x01, 0xa1, 0x0f, 0xda, 0xc9, 0xc5, 0x00, 0x47, 0x0f, 0xda, 0xd1,
+ 0xcc, 0x06, 0x2b, 0x0f, 0xda, 0xe0, 0x46, 0x05, 0x07, 0xc2, 0xf1, 0xe8,
+ 0xd2, 0x47, 0xf4, 0x0f, 0xda, 0xf0, 0xd2, 0x47, 0xf4, 0x0f, 0xda, 0xe9,
+ 0x46, 0x05, 0x07, 0x42, 0xf1, 0xf4, 0x46, 0x01, 0xab, 0x42, 0xf2, 0x00,
+ 0xd4, 0x3c, 0x62, 0x01, 0x5d, 0xc0, 0xc5, 0x00, 0x62, 0x01, 0x5b, 0x0b,
+ 0x02, 0xf2, 0x0c, 0xcc, 0x83, 0x04, 0x01, 0x5b, 0x59, 0xcd, 0x81, 0xce,
+ 0x01, 0x5c, 0x28, 0xd5, 0x00, 0x92, 0x0f, 0xc0, 0xa9, 0xd8, 0x22, 0x64,
+ 0x0f, 0xc0, 0x49, 0xd9, 0x20, 0xc8, 0x0f, 0xc0, 0x29, 0x46, 0x00, 0x3e,
+ 0xc2, 0xf2, 0x10, 0xcd, 0x80, 0x14, 0x01, 0x0e, 0xf1, 0x44, 0x03, 0x9a,
+ 0xc2, 0xf2, 0x1c, 0xd1, 0x03, 0x76, 0x01, 0x48, 0x49, 0xcc, 0x8a, 0xb4,
+ 0x0f, 0xc4, 0xc8, 0x47, 0x14, 0x16, 0xc2, 0xf2, 0x28, 0xc6, 0x12, 0x4f,
+ 0x01, 0x4a, 0xc1, 0xc8, 0xab, 0xed, 0x01, 0x4b, 0x00, 0xc8, 0xab, 0xed,
+ 0x01, 0x4a, 0xe1, 0xc6, 0x12, 0x4f, 0x01, 0x4a, 0xa0, 0xe0, 0x0a, 0x47,
+ 0x01, 0x3a, 0x58, 0xd6, 0x2e, 0x0b, 0x01, 0x39, 0xc1, 0xca, 0x25, 0x5a,
+ 0x0f, 0xbe, 0x79, 0xcd, 0x0f, 0x50, 0x0f, 0xbe, 0x88, 0xc3, 0xeb, 0xeb,
+ 0x0f, 0xb3, 0x29, 0xc9, 0xac, 0x58, 0x0f, 0xb2, 0xe8, 0xc5, 0x00, 0x62,
+ 0x01, 0x3c, 0xc1, 0x49, 0x01, 0x59, 0x42, 0xf2, 0x32, 0xdd, 0x00, 0xea,
+ 0x01, 0x3a, 0xe1, 0x44, 0x02, 0x9e, 0x42, 0xf2, 0x3e, 0xcf, 0x18, 0x2e,
+ 0x0f, 0xbd, 0xc1, 0xd2, 0x25, 0x52, 0x0f, 0xbe, 0x60, 0xc3, 0xeb, 0xeb,
+ 0x0f, 0xb3, 0x31, 0xc9, 0xac, 0x58, 0x0f, 0xb2, 0xf0, 0xde, 0x01, 0x29,
+ 0x01, 0x3d, 0x68, 0x44, 0x02, 0x91, 0xc2, 0xf2, 0x44, 0x44, 0x02, 0x29,
+ 0x42, 0xf2, 0x4a, 0xd0, 0x0a, 0xf7, 0x01, 0x3b, 0x81, 0xd7, 0x00, 0xf0,
+ 0x01, 0x3b, 0x70, 0xd5, 0x00, 0x92, 0x0f, 0xc0, 0xc1, 0xdb, 0x15, 0x2e,
+ 0x0f, 0xc0, 0xe0, 0xd1, 0x50, 0x26, 0x01, 0x3a, 0x19, 0xc8, 0x0a, 0x5f,
+ 0x01, 0x39, 0xe8, 0xd0, 0x20, 0x86, 0x01, 0x3d, 0xc9, 0xd0, 0x01, 0x37,
+ 0x01, 0x3d, 0xc1, 0xd0, 0x3d, 0x06, 0x01, 0x3d, 0xb8, 0x47, 0x3a, 0x1e,
+ 0xc2, 0xf2, 0x50, 0xc5, 0x19, 0x74, 0x01, 0x3b, 0x20, 0xd9, 0x20, 0x19,
+ 0x01, 0x37, 0x19, 0xcd, 0x76, 0xbc, 0x01, 0x5a, 0xb8, 0xdd, 0x00, 0xea,
+ 0x01, 0x3a, 0xf1, 0x44, 0x02, 0x9e, 0x42, 0xf2, 0x5c, 0xd5, 0x00, 0x92,
+ 0x0f, 0xc0, 0xd9, 0xdb, 0x15, 0x2e, 0x0f, 0xc0, 0xf8, 0x46, 0x01, 0xab,
+ 0x42, 0xf2, 0x62, 0xd0, 0x0a, 0xf7, 0x01, 0x3b, 0x89, 0xd7, 0x00, 0xf0,
+ 0x01, 0x3b, 0x78, 0x00, 0x42, 0xf2, 0x6e, 0xc3, 0x4d, 0x48, 0x00, 0x2f,
+ 0x91, 0xc3, 0x06, 0x8c, 0x00, 0x2f, 0x80, 0xc4, 0xe9, 0xdf, 0x07, 0xda,
+ 0x71, 0xc6, 0x65, 0x82, 0x07, 0xda, 0x20, 0xc4, 0xe9, 0xdf, 0x07, 0xda,
+ 0x69, 0xc6, 0x65, 0x82, 0x07, 0xd9, 0xd8, 0xc4, 0xe9, 0xdf, 0x07, 0xda,
+ 0x61, 0xc6, 0x65, 0x82, 0x07, 0xd9, 0x88, 0xc5, 0xde, 0xfa, 0x07, 0xda,
+ 0x59, 0xc6, 0x65, 0x82, 0x07, 0xd9, 0xa8, 0xcc, 0x8b, 0x20, 0x07, 0xda,
+ 0x50, 0xcc, 0x8b, 0x20, 0x07, 0xda, 0x30, 0xcc, 0x8b, 0x20, 0x07, 0xd9,
+ 0xc0, 0x46, 0x01, 0xab, 0x42, 0xf2, 0x7a, 0xcc, 0x8b, 0x20, 0x07, 0xda,
+ 0x08, 0xcc, 0x8b, 0x20, 0x07, 0xda, 0x18, 0xcc, 0x8b, 0x20, 0x07, 0xd9,
+ 0xd0, 0xc6, 0x65, 0x82, 0x07, 0xd9, 0xc9, 0xc5, 0xdf, 0xae, 0x07, 0xd8,
+ 0xe8, 0xc2, 0x00, 0x07, 0x00, 0x2e, 0x83, 0x02, 0xf2, 0x87, 0x4a, 0xa1,
+ 0xbe, 0x42, 0xf2, 0x8d, 0xc6, 0xd7, 0x3c, 0x00, 0x2e, 0x38, 0xc6, 0x46,
+ 0x1a, 0x00, 0x2e, 0x09, 0xc3, 0x44, 0xb6, 0x00, 0x2d, 0x80, 0xce, 0x6d,
+ 0x5a, 0x00, 0x2d, 0xd0, 0xc6, 0xd3, 0x88, 0x00, 0x2d, 0x99, 0xc5, 0x7d,
+ 0x10, 0x00, 0x2d, 0x91, 0xc5, 0xe0, 0x21, 0x00, 0x2d, 0x88, 0xc5, 0xde,
+ 0x55, 0x00, 0x2c, 0xa9, 0xc5, 0xd7, 0x3d, 0x00, 0x2c, 0xa0, 0xc6, 0xd3,
+ 0x94, 0x00, 0x2d, 0x49, 0xc6, 0xd5, 0xaa, 0x00, 0x2d, 0x00, 0xc2, 0x54,
+ 0x83, 0x02, 0x6e, 0x31, 0xce, 0x73, 0xea, 0x02, 0x6f, 0x90, 0x11, 0xc2,
+ 0xf2, 0x99, 0xcc, 0x7f, 0x52, 0x02, 0x6e, 0xd8, 0x00, 0x42, 0xf2, 0xa5,
+ 0xc2, 0x1a, 0x36, 0x08, 0x68, 0xc9, 0xc2, 0x00, 0x4c, 0x08, 0x68, 0xb8,
+ 0x43, 0xae, 0x8f, 0xc2, 0xf2, 0xb1, 0x43, 0x94, 0x93, 0xc2, 0xf2, 0xc3,
+ 0xc7, 0xce, 0xd0, 0x05, 0x4b, 0x49, 0xc7, 0xc8, 0x6a, 0x05, 0x4b, 0x41,
+ 0xc8, 0xbd, 0xdb, 0x05, 0x4b, 0x31, 0x8e, 0x00, 0x88, 0x29, 0xc7, 0xce,
+ 0xc9, 0x00, 0x88, 0xc1, 0xc7, 0xce, 0xb4, 0x00, 0x88, 0xd1, 0xc7, 0xce,
+ 0x9f, 0x05, 0x4b, 0x68, 0x44, 0x5d, 0x46, 0xc2, 0xf2, 0xcf, 0x42, 0x00,
+ 0xe4, 0x42, 0xf3, 0x09, 0xc6, 0xd6, 0x7c, 0x05, 0x4b, 0xd9, 0x8a, 0x00,
+ 0x88, 0x90, 0x45, 0xae, 0x80, 0xc2, 0xf3, 0x49, 0x43, 0x68, 0xc6, 0xc2,
+ 0xf3, 0x59, 0xc6, 0xb2, 0x8b, 0x00, 0x8a, 0x00, 0xc7, 0xc6, 0x6b, 0x05,
+ 0x4b, 0xb9, 0xc7, 0xc6, 0x4f, 0x05, 0x4b, 0xb1, 0x89, 0x00, 0x88, 0x59,
+ 0xc8, 0xc2, 0x7b, 0x00, 0x8a, 0x09, 0x43, 0xc6, 0x2c, 0xc2, 0xf3, 0x87,
+ 0xc7, 0xc6, 0x5d, 0x00, 0x8a, 0x19, 0x43, 0xb6, 0x8a, 0xc2, 0xf3, 0x93,
+ 0xc6, 0xd5, 0xda, 0x00, 0x8a, 0x29, 0x43, 0xed, 0x53, 0x42, 0xf3, 0xa5,
+ 0xc6, 0xd9, 0x52, 0x05, 0x4b, 0x89, 0xc8, 0xba, 0x7b, 0x05, 0x4b, 0x81,
+ 0xc8, 0xb8, 0x9b, 0x05, 0x4b, 0x79, 0x8f, 0x00, 0x88, 0x31, 0xc7, 0xc7,
+ 0x83, 0x00, 0x88, 0xe0, 0x44, 0xba, 0x7e, 0xc2, 0xf3, 0xb1, 0xc6, 0xae,
+ 0x6e, 0x00, 0x88, 0x80, 0x44, 0x7f, 0x3f, 0xc2, 0xf3, 0xd9, 0xc6, 0xae,
+ 0x92, 0x00, 0x88, 0x79, 0x47, 0x68, 0xbe, 0x42, 0xf3, 0xe3, 0x90, 0x00,
+ 0x88, 0x11, 0xc8, 0xbd, 0x13, 0x00, 0x88, 0x99, 0xc8, 0xbd, 0x8b, 0x00,
+ 0x88, 0xa9, 0xc7, 0xcb, 0x65, 0x00, 0x88, 0xb1, 0xc6, 0xd1, 0x8a, 0x00,
+ 0x8a, 0xd9, 0xc7, 0xca, 0xbd, 0x00, 0x8a, 0xe1, 0xc7, 0xcb, 0x7a, 0x00,
+ 0x8a, 0xe8, 0x43, 0x68, 0xc6, 0xc2, 0xf3, 0xf9, 0x42, 0x00, 0x0a, 0xc2,
+ 0xf4, 0x03, 0x4a, 0xaa, 0x24, 0x42, 0xf4, 0x0f, 0xc6, 0xc2, 0x9d, 0x00,
+ 0x8a, 0x61, 0xc9, 0x7b, 0x1e, 0x00, 0x8a, 0xc8, 0xc6, 0x94, 0xb9, 0x00,
+ 0x8b, 0x01, 0x83, 0x00, 0x8b, 0x0b, 0x02, 0xf4, 0x17, 0x1b, 0xc2, 0xf4,
+ 0x21, 0x87, 0x00, 0x8b, 0x33, 0x02, 0xf4, 0xa1, 0x91, 0x00, 0x8b, 0x4b,
+ 0x02, 0xf4, 0xd6, 0x19, 0xc2, 0xf4, 0xda, 0x97, 0x00, 0x8b, 0x71, 0x8b,
+ 0x00, 0x8b, 0xa9, 0xc3, 0xeb, 0x9a, 0x00, 0x8b, 0xb1, 0xc3, 0xe7, 0xb6,
+ 0x00, 0x8b, 0xb9, 0xc3, 0x59, 0x32, 0x00, 0x8c, 0x03, 0x02, 0xf4, 0xec,
+ 0x43, 0xe7, 0xff, 0xc2, 0xf4, 0xf0, 0xc3, 0xec, 0xba, 0x00, 0x8c, 0x13,
+ 0x02, 0xf4, 0xfe, 0xc3, 0xec, 0xb7, 0x00, 0x8c, 0x19, 0xc3, 0xea, 0xe6,
+ 0x00, 0x8c, 0x4b, 0x02, 0xf5, 0x04, 0xc7, 0xc7, 0xd0, 0x00, 0x8c, 0x59,
+ 0xc4, 0xe4, 0xf3, 0x00, 0x8c, 0x63, 0x02, 0xf5, 0x08, 0xc3, 0xeb, 0x01,
+ 0x00, 0x8c, 0x69, 0xc3, 0xed, 0x8c, 0x00, 0x8c, 0xc3, 0x02, 0xf5, 0x0c,
+ 0x43, 0xed, 0x86, 0xc2, 0xf5, 0x12, 0xc3, 0xed, 0x92, 0x00, 0x8c, 0xd3,
+ 0x02, 0xf5, 0x2e, 0xc3, 0xed, 0x89, 0x00, 0x8c, 0xd9, 0xc4, 0xe7, 0x8b,
+ 0x00, 0x8c, 0xe1, 0xca, 0x9d, 0xfe, 0x00, 0x8d, 0x11, 0xc4, 0xe9, 0xc7,
+ 0x00, 0x8d, 0x19, 0xc5, 0xe3, 0x46, 0x06, 0xbd, 0xb1, 0xc3, 0xe3, 0x48,
+ 0x06, 0xbe, 0x20, 0x0d, 0xc2, 0xf5, 0x34, 0x15, 0xc2, 0xf5, 0x46, 0x44,
+ 0xc4, 0x44, 0xc2, 0xf5, 0x52, 0x16, 0xc2, 0xf5, 0x80, 0x44, 0xc3, 0xd4,
+ 0xc2, 0xf5, 0x8c, 0x44, 0xc4, 0x7c, 0xc2, 0xf5, 0xc0, 0x12, 0xc2, 0xf5,
+ 0xee, 0x44, 0xc2, 0x9e, 0xc2, 0xf6, 0x00, 0x05, 0xc2, 0xf6, 0x10, 0x44,
+ 0x7b, 0x22, 0xc2, 0xf6, 0x1c, 0x42, 0x04, 0x30, 0x42, 0xf6, 0x38, 0x44,
+ 0x7f, 0x3f, 0xc2, 0xf6, 0x44, 0xc6, 0xae, 0x92, 0x01, 0x8a, 0x59, 0x47,
+ 0x68, 0xbe, 0x42, 0xf6, 0x4e, 0x44, 0x5d, 0x46, 0xc2, 0xf6, 0x5e, 0x42,
+ 0x00, 0xe4, 0x42, 0xf6, 0x8e, 0x90, 0x01, 0x89, 0xa1, 0xc7, 0xca, 0xbd,
+ 0x01, 0x89, 0xa9, 0xc7, 0xcb, 0x7a, 0x01, 0x89, 0xb1, 0xc8, 0xbd, 0x8b,
+ 0x01, 0x8b, 0x31, 0xc6, 0xd1, 0x8a, 0x01, 0x8b, 0x39, 0xc9, 0xb2, 0x88,
+ 0x01, 0x8b, 0x40, 0xc5, 0xba, 0x7e, 0x01, 0x89, 0xb9, 0xc6, 0xae, 0x6e,
+ 0x01, 0x8a, 0x60, 0x8e, 0x01, 0x89, 0xc9, 0x47, 0xbb, 0x93, 0xc2, 0xf6,
+ 0xb0, 0xc7, 0xc8, 0x6a, 0x01, 0x89, 0xd9, 0x43, 0xae, 0x7d, 0xc2, 0xf6,
+ 0xba, 0xc6, 0xd6, 0xd6, 0x01, 0x89, 0xe9, 0x43, 0x7b, 0x0d, 0xc2, 0xf6,
+ 0xcc, 0xc7, 0xce, 0xb4, 0x01, 0x8a, 0x01, 0xcd, 0x7b, 0x1a, 0x01, 0x8b,
+ 0x68, 0x8f, 0x01, 0x8a, 0x09, 0x43, 0xc7, 0x83, 0xc2, 0xf6, 0xd8, 0xc6,
+ 0xd9, 0x52, 0x01, 0x8b, 0x78, 0x43, 0x68, 0xc6, 0xc2, 0xf6, 0xe4, 0xc6,
+ 0xae, 0x80, 0x01, 0x8a, 0x69, 0xc6, 0xb2, 0x8b, 0x01, 0x8b, 0xf8, 0xc4,
+ 0xc2, 0x9f, 0x01, 0x8a, 0x38, 0xc4, 0xb1, 0xd8, 0x01, 0x8a, 0x41, 0xc6,
+ 0xb1, 0xd7, 0x01, 0x8a, 0x50, 0x87, 0x01, 0x8a, 0x81, 0xc4, 0x9e, 0x04,
+ 0x01, 0x8c, 0x6a, 0x02, 0xf7, 0x00, 0x83, 0x01, 0x8a, 0x8b, 0x02, 0xf7,
+ 0x04, 0x87, 0x01, 0x8a, 0xb3, 0x02, 0xf7, 0x08, 0x91, 0x01, 0x8a, 0xd9,
+ 0x97, 0x01, 0x8b, 0x01, 0x8b, 0x01, 0x8b, 0x11, 0xc4, 0xe8, 0x03, 0x01,
+ 0x8c, 0x3b, 0x02, 0xf7, 0x15, 0xc3, 0xec, 0xbd, 0x01, 0x8c, 0x49, 0xc4,
+ 0xe4, 0xf3, 0x01, 0x8c, 0x5b, 0x02, 0xf7, 0x19, 0xc3, 0xe3, 0x46, 0x01,
+ 0x8c, 0x60, 0x91, 0x01, 0x8a, 0x99, 0x97, 0x01, 0x8b, 0x08, 0x87, 0x01,
+ 0x8a, 0xd0, 0x83, 0x01, 0x8a, 0xc3, 0x02, 0xf7, 0x1d, 0x87, 0x01, 0x8a,
+ 0xf3, 0x02, 0xf7, 0x21, 0x8b, 0x01, 0x8a, 0xf8, 0x91, 0x01, 0x81, 0x11,
+ 0xc4, 0x15, 0xa9, 0x01, 0x81, 0xc8, 0xc3, 0x05, 0xdf, 0x01, 0x81, 0x19,
+ 0xc4, 0x0d, 0x89, 0x01, 0x81, 0xd0, 0xc3, 0xe2, 0x62, 0x08, 0x47, 0x89,
+ 0xc4, 0xdd, 0x34, 0x08, 0x47, 0x70, 0x91, 0x07, 0xfb, 0x31, 0x83, 0x07,
+ 0xfc, 0xe0, 0x45, 0x00, 0x3f, 0xc2, 0xf7, 0x25, 0x83, 0x07, 0xfb, 0xd9,
+ 0x97, 0x07, 0xfb, 0xe9, 0x87, 0x07, 0xfb, 0xf1, 0x91, 0x07, 0xfb, 0xf9,
+ 0x8b, 0x07, 0xfb, 0xe0, 0x83, 0x07, 0xfb, 0xb1, 0x8b, 0x07, 0xfb, 0xb9,
+ 0x87, 0x07, 0xfb, 0xc9, 0x91, 0x07, 0xfb, 0xd1, 0x97, 0x07, 0xfb, 0xc0,
+ 0x83, 0x07, 0xfc, 0x01, 0x8b, 0x07, 0xfc, 0x09, 0x97, 0x07, 0xfc, 0x11,
+ 0x87, 0x07, 0xfc, 0x19, 0x91, 0x07, 0xfc, 0x20, 0x87, 0x07, 0xfc, 0x41,
+ 0x91, 0x07, 0xfc, 0x49, 0x83, 0x07, 0xfc, 0x29, 0x8b, 0x07, 0xfc, 0x31,
+ 0x97, 0x07, 0xfc, 0x38, 0x8b, 0x07, 0xfc, 0x59, 0x97, 0x07, 0xfc, 0x61,
+ 0x87, 0x07, 0xfc, 0x69, 0x83, 0x07, 0xfc, 0x51, 0x91, 0x07, 0xfc, 0x70,
+ 0x8b, 0x07, 0xfc, 0x81, 0x91, 0x07, 0xfc, 0x99, 0x83, 0x07, 0xfc, 0x79,
+ 0x97, 0x07, 0xfc, 0x89, 0x87, 0x07, 0xfc, 0x90, 0x83, 0x07, 0xfc, 0xa1,
+ 0x97, 0x07, 0xfc, 0xa9, 0x91, 0x07, 0xfc, 0xb0, 0x97, 0x07, 0xfc, 0xc9,
+ 0x87, 0x07, 0xfc, 0xd1, 0x91, 0x07, 0xfc, 0xd9, 0x83, 0x07, 0xfc, 0xb9,
+ 0x8b, 0x07, 0xfc, 0xc0, 0x8d, 0x07, 0xfd, 0x09, 0xc6, 0xd9, 0x4c, 0x07,
+ 0xfd, 0x18, 0xc6, 0x7f, 0x3e, 0x07, 0xfd, 0x11, 0xc5, 0x68, 0xc5, 0x07,
+ 0xfd, 0x99, 0xc4, 0x95, 0xb8, 0x07, 0xfd, 0xb1, 0xc5, 0xc3, 0xe9, 0x07,
+ 0xfd, 0xc9, 0xc6, 0xba, 0x7d, 0x07, 0xfd, 0x40, 0xc4, 0x68, 0xc6, 0x07,
+ 0xfd, 0x61, 0xc6, 0xae, 0x80, 0x07, 0xfd, 0x78, 0x92, 0x07, 0xfd, 0x91,
+ 0xc6, 0xd7, 0xf6, 0x07, 0xfd, 0xa0, 0x87, 0x07, 0xfe, 0x28, 0x91, 0x07,
+ 0xfe, 0x50, 0x87, 0x07, 0xfe, 0x70, 0x91, 0x07, 0xfe, 0xa0, 0x91, 0x0d,
+ 0x89, 0x91, 0x83, 0x01, 0x84, 0xa9, 0x87, 0x01, 0x84, 0xb0, 0x91, 0x0d,
+ 0x8a, 0x91, 0x87, 0x0d, 0x8a, 0x89, 0x8b, 0x0d, 0x8a, 0x81, 0x83, 0x01,
+ 0x84, 0x70, 0x83, 0x01, 0x84, 0x19, 0x97, 0x01, 0x84, 0x29, 0x91, 0x01,
+ 0x84, 0x38, 0xd2, 0x4c, 0x50, 0x01, 0x72, 0x30, 0xe0, 0x05, 0xa7, 0x01,
+ 0x52, 0x58, 0xcf, 0x65, 0x91, 0x01, 0x52, 0x49, 0xc5, 0x14, 0x2d, 0x01,
+ 0x52, 0x38, 0xcb, 0x2b, 0x0f, 0x01, 0x52, 0x21, 0xc7, 0x79, 0xb4, 0x01,
+ 0x52, 0x19, 0xc3, 0x05, 0xe3, 0x01, 0x52, 0x00, 0xc6, 0x50, 0x0f, 0x01,
+ 0x50, 0xe1, 0xc3, 0x00, 0xe4, 0x01, 0x50, 0xd0, 0x00, 0x42, 0xf7, 0x43,
+ 0x19, 0xc2, 0xf7, 0x4f, 0xc2, 0x01, 0x04, 0x08, 0x5b, 0xe1, 0xc4, 0x05,
+ 0xde, 0x08, 0x5b, 0xd0, 0xc2, 0x3c, 0xd1, 0x08, 0x5b, 0x91, 0xc3, 0x1e,
+ 0x54, 0x08, 0x5b, 0x40, 0xc3, 0x11, 0x40, 0x08, 0x5b, 0x89, 0x03, 0x42,
+ 0xf7, 0x59, 0xc2, 0x01, 0x47, 0x08, 0x5b, 0x38, 0x00, 0x42, 0xf7, 0x65,
+ 0x19, 0xc2, 0xf7, 0x71, 0xc2, 0x01, 0x04, 0x08, 0x5a, 0xe1, 0xc4, 0x05,
+ 0xde, 0x08, 0x5a, 0xd0, 0xc2, 0x3c, 0xd1, 0x08, 0x5a, 0xa9, 0xc3, 0x1e,
+ 0x54, 0x08, 0x5a, 0x40, 0xc3, 0x11, 0x40, 0x08, 0x5a, 0xa1, 0x03, 0x42,
+ 0xf7, 0x7b, 0xc2, 0x01, 0x47, 0x08, 0x5a, 0x38, 0xc4, 0x32, 0xac, 0x08,
+ 0x5a, 0x01, 0xc3, 0x18, 0x7a, 0x08, 0x5a, 0x78, 0xc2, 0x0a, 0x20, 0x00,
+ 0x00, 0xf1, 0xc4, 0x05, 0xde, 0x00, 0x00, 0xe8, 0x16, 0xc2, 0xf7, 0x87,
+ 0xc3, 0x05, 0x17, 0x0f, 0x65, 0x88, 0xc4, 0x24, 0x35, 0x0f, 0x65, 0x59,
+ 0xc5, 0x05, 0x1b, 0x0f, 0x65, 0x51, 0x15, 0xc2, 0xf7, 0x93, 0x08, 0xc2,
+ 0xf7, 0x9f, 0x16, 0xc2, 0xf7, 0xab, 0xc3, 0x05, 0x17, 0x0f, 0x65, 0x18,
+ 0xc2, 0x00, 0x3a, 0x0f, 0x65, 0x10, 0xc2, 0x00, 0x3a, 0x0f, 0x64, 0xf8,
+ 0xc2, 0x0d, 0x8b, 0x0f, 0x64, 0x13, 0x02, 0xf7, 0xb7, 0x00, 0x42, 0xf7,
+ 0xbd, 0x9b, 0x0f, 0x64, 0x0b, 0x02, 0xf7, 0xc9, 0x00, 0x42, 0xf7, 0xcf,
+ 0xc4, 0x15, 0xa7, 0x0f, 0x63, 0xbb, 0x02, 0xf7, 0xdb, 0xc2, 0x22, 0x45,
+ 0x0f, 0x63, 0xb2, 0x02, 0xf7, 0xe8, 0x0b, 0xc2, 0xf7, 0xf5, 0x11, 0x42,
+ 0xf8, 0x07, 0x0a, 0xc2, 0xf8, 0x19, 0x19, 0xc2, 0xf8, 0x2b, 0xc2, 0x01,
+ 0x04, 0x0f, 0x63, 0xd2, 0x02, 0xf8, 0x3b, 0x00, 0x42, 0xf8, 0x41, 0xc4,
+ 0x01, 0x1d, 0x0f, 0x65, 0x71, 0xc7, 0x08, 0x19, 0x0f, 0x65, 0x68, 0xc6,
+ 0xd7, 0xfc, 0x01, 0x96, 0x01, 0x17, 0x42, 0xf8, 0x4d, 0xc3, 0x7a, 0xe7,
+ 0x01, 0x96, 0x11, 0x9b, 0x01, 0x96, 0x20, 0xc4, 0xe9, 0x37, 0x01, 0x96,
+ 0x19, 0xc5, 0xda, 0x04, 0x01, 0x96, 0x38, 0xc7, 0xc6, 0x02, 0x01, 0x96,
+ 0x59, 0x43, 0x1b, 0x41, 0x42, 0xf8, 0x59, 0xc4, 0x16, 0x57, 0x01, 0x9a,
+ 0xc1, 0xc3, 0x05, 0x17, 0x01, 0x9a, 0xc9, 0x16, 0xc2, 0xf8, 0x78, 0x08,
+ 0xc2, 0xf8, 0x86, 0x15, 0xc2, 0xf8, 0x93, 0x07, 0xc2, 0xf8, 0xa5, 0xc4,
+ 0x24, 0x35, 0x01, 0x9b, 0x0a, 0x02, 0xf8, 0xb4, 0xc3, 0x00, 0xce, 0x01,
+ 0x7f, 0xb9, 0xc9, 0x00, 0xc8, 0x01, 0x7f, 0xd0, 0xc4, 0x00, 0xcd, 0x01,
+ 0x7f, 0xc1, 0xc5, 0x00, 0x47, 0x01, 0x7f, 0xc8, 0xc9, 0x4f, 0xa1, 0x08,
+ 0x42, 0xf8, 0xc4, 0x15, 0xa9, 0x08, 0x42, 0xe1, 0x91, 0x08, 0x42, 0xc8,
+ 0xc8, 0x4f, 0xa2, 0x08, 0x42, 0xf1, 0xc7, 0x0d, 0x7f, 0x08, 0x42, 0xe8,
+ 0xc4, 0xdd, 0x34, 0x08, 0x42, 0x71, 0xc3, 0xe2, 0x62, 0x08, 0x42, 0x88,
+ 0xd7, 0x29, 0x20, 0x0f, 0xd2, 0x58, 0x49, 0x29, 0x20, 0x42, 0xf8, 0xba,
+ 0x49, 0x29, 0x20, 0x42, 0xf8, 0xc6, 0x44, 0x8f, 0xc9, 0xc2, 0xf8, 0xd2,
+ 0xc3, 0x01, 0x5e, 0x01, 0x32, 0xa2, 0x02, 0xf8, 0xeb, 0x49, 0x29, 0x20,
+ 0x42, 0xf8, 0xf1, 0x49, 0x29, 0x20, 0x42, 0xf8, 0xfd, 0x0d, 0xc2, 0xf9,
+ 0x09, 0xc5, 0xad, 0xae, 0x0f, 0xd0, 0xf9, 0xc4, 0xd4, 0xf2, 0x0f, 0xd1,
+ 0x01, 0xc6, 0xba, 0xfd, 0x0f, 0xd1, 0x09, 0xc4, 0xe8, 0x9b, 0x0f, 0xd1,
+ 0x18, 0xdd, 0x11, 0x50, 0x0f, 0xbc, 0x51, 0x45, 0x01, 0xac, 0x42, 0xf9,
+ 0x15, 0xcf, 0x62, 0xd0, 0x01, 0x3f, 0x19, 0xce, 0x6f, 0xc2, 0x01, 0x3f,
+ 0x10, 0xc2, 0x00, 0x2b, 0x0f, 0xc8, 0x6b, 0x02, 0xf9, 0x2d, 0x43, 0x11,
+ 0x90, 0x42, 0xf9, 0x33, 0x51, 0x0b, 0x89, 0xc2, 0xf9, 0x3f, 0x45, 0x01,
+ 0xac, 0xc2, 0xf9, 0x51, 0xc6, 0x8d, 0x90, 0x0f, 0xa9, 0x98, 0x45, 0x01,
+ 0xac, 0xc2, 0xf9, 0x6b, 0xcc, 0x8c, 0x10, 0x0f, 0x99, 0x2a, 0x02, 0xf9,
+ 0x77, 0x15, 0xc2, 0xf9, 0x7d, 0xc7, 0x0b, 0xa0, 0x01, 0x59, 0x58, 0xca,
+ 0x9d, 0x9a, 0x01, 0x36, 0xc9, 0x49, 0x01, 0x59, 0x42, 0xf9, 0x89, 0xc7,
+ 0x40, 0x3c, 0x01, 0x2e, 0x29, 0xce, 0x70, 0x32, 0x01, 0x2e, 0x19, 0xc8,
+ 0x00, 0x52, 0x01, 0x2e, 0x08, 0xd0, 0x60, 0x9f, 0x01, 0x3e, 0x81, 0xc9,
+ 0xb0, 0x1b, 0x01, 0x36, 0x59, 0xc4, 0x25, 0x0d, 0x01, 0x33, 0x11, 0x51,
+ 0x0b, 0x89, 0x42, 0xf9, 0x95, 0xc5, 0x01, 0x62, 0x01, 0x30, 0xf9, 0xcf,
+ 0x6b, 0xb8, 0x0f, 0xac, 0xb9, 0xce, 0x23, 0xd6, 0x0f, 0xa2, 0x38, 0xce,
+ 0x70, 0x32, 0x01, 0x2d, 0xf9, 0xc8, 0x00, 0x52, 0x01, 0x2d, 0xe8, 0xe0,
+ 0x01, 0xc7, 0x01, 0x3e, 0x08, 0xc5, 0x0b, 0x62, 0x01, 0x3a, 0x01, 0xc3,
+ 0x00, 0xc9, 0x0f, 0xa5, 0x70, 0x44, 0x01, 0xab, 0x42, 0xf9, 0xa7, 0xc5,
+ 0x01, 0x62, 0x01, 0x30, 0xf1, 0xce, 0x23, 0xd6, 0x0f, 0xa2, 0x48, 0x12,
+ 0xc2, 0xf9, 0xad, 0xce, 0x70, 0x32, 0x01, 0x2d, 0xc9, 0xc8, 0x00, 0x52,
+ 0x01, 0x2d, 0xb8, 0xc9, 0x35, 0x23, 0x01, 0x2f, 0x60, 0xcb, 0x55, 0xd1,
+ 0x01, 0x2f, 0xe9, 0xc5, 0x01, 0xea, 0x01, 0x2f, 0xd9, 0xc3, 0x09, 0x46,
+ 0x01, 0x5a, 0x80, 0x90, 0x0f, 0x17, 0x42, 0x02, 0xf9, 0xb9, 0x89, 0x0f,
+ 0x17, 0x10, 0xc2, 0x00, 0x63, 0x08, 0xc6, 0xd9, 0xc2, 0x00, 0x36, 0x08,
+ 0xc6, 0xd0, 0x90, 0x08, 0xc6, 0x81, 0x9b, 0x08, 0xc6, 0x68, 0x8c, 0x08,
+ 0xc6, 0x70, 0xc2, 0x00, 0x63, 0x08, 0xc5, 0xd9, 0xc2, 0x00, 0x36, 0x08,
+ 0xc5, 0xd0, 0x90, 0x08, 0xc5, 0x81, 0x9b, 0x08, 0xc5, 0x68, 0x8c, 0x08,
+ 0xc5, 0x70, 0xe0, 0x02, 0xa7, 0x01, 0x5c, 0xa0, 0xcc, 0x87, 0x00, 0x0f,
+ 0xcb, 0xd1, 0xd7, 0x26, 0xca, 0x0f, 0xcb, 0x99, 0xca, 0xaa, 0x10, 0x0f,
+ 0xd7, 0x18, 0xcb, 0x8d, 0x79, 0x0f, 0xb0, 0x11, 0xca, 0xa1, 0x46, 0x0f,
+ 0xc8, 0x90, 0xc9, 0xb1, 0xef, 0x0f, 0xb2, 0x31, 0x44, 0x03, 0xf6, 0xc2,
+ 0xf9, 0xbd, 0xd1, 0x58, 0x1e, 0x0f, 0xc9, 0x40, 0x45, 0x00, 0x39, 0x42,
+ 0xf9, 0xcc, 0xc8, 0x6f, 0x20, 0x0f, 0xb0, 0x99, 0xc8, 0xba, 0x83, 0x0f,
+ 0xc9, 0x00, 0xcb, 0x91, 0xd3, 0x0f, 0xb1, 0xb9, 0xc6, 0xd1, 0xf0, 0x0f,
+ 0xce, 0x80, 0xc2, 0x0a, 0x20, 0x07, 0xf8, 0x91, 0xc4, 0x05, 0xde, 0x07,
+ 0xf8, 0x98, 0xc3, 0x08, 0xde, 0x07, 0xf8, 0xa1, 0xc3, 0x0d, 0x8f, 0x07,
+ 0xf8, 0xa8, 0xc2, 0x22, 0x45, 0x07, 0xf8, 0xb1, 0xc4, 0x15, 0xa7, 0x07,
+ 0xf8, 0xb8, 0xc9, 0xb4, 0x26, 0x07, 0xf9, 0x01, 0x83, 0x07, 0xf8, 0x60,
+ 0xce, 0x26, 0x2e, 0x07, 0xf9, 0xd9, 0xcd, 0x00, 0xd2, 0x07, 0xfa, 0xd9,
+ 0xd1, 0x57, 0xc9, 0x07, 0xfa, 0xf9, 0xcb, 0x1c, 0xe0, 0x07, 0xf8, 0x40,
+ 0x83, 0x07, 0xf9, 0x09, 0x84, 0x07, 0xf9, 0x11, 0x85, 0x07, 0xf9, 0x19,
+ 0x86, 0x07, 0xf9, 0x21, 0x87, 0x07, 0xf9, 0x29, 0x88, 0x07, 0xf9, 0x31,
+ 0x89, 0x07, 0xf9, 0x39, 0x8a, 0x07, 0xf9, 0x41, 0x8b, 0x07, 0xf9, 0x49,
+ 0x8c, 0x07, 0xf9, 0x51, 0x8d, 0x07, 0xf9, 0x59, 0x8e, 0x07, 0xf9, 0x61,
+ 0x8f, 0x07, 0xf9, 0x69, 0x95, 0x07, 0xf9, 0x99, 0x96, 0x07, 0xf9, 0xa1,
+ 0x97, 0x07, 0xf9, 0xa9, 0x98, 0x07, 0xf9, 0xb1, 0x99, 0x07, 0xf9, 0xb9,
+ 0x9a, 0x07, 0xf9, 0xc1, 0x9b, 0x07, 0xf9, 0xc9, 0x9c, 0x07, 0xf9, 0xd1,
+ 0x90, 0x07, 0xf9, 0x71, 0x91, 0x07, 0xf9, 0x79, 0x92, 0x07, 0xf9, 0x81,
+ 0x93, 0x07, 0xf9, 0x89, 0x94, 0x07, 0xf9, 0x90, 0x83, 0x07, 0xfa, 0x09,
+ 0x84, 0x07, 0xfa, 0x11, 0x85, 0x07, 0xfa, 0x19, 0x87, 0x07, 0xfa, 0x29,
+ 0x88, 0x07, 0xfa, 0x31, 0x89, 0x07, 0xfa, 0x39, 0x8a, 0x07, 0xfa, 0x41,
+ 0x8b, 0x07, 0xfa, 0x49, 0x8c, 0x07, 0xfa, 0x51, 0x8d, 0x07, 0xfa, 0x59,
+ 0x8e, 0x07, 0xfa, 0x61, 0x8f, 0x07, 0xfa, 0x69, 0x90, 0x07, 0xfa, 0x71,
+ 0x91, 0x07, 0xfa, 0x79, 0x92, 0x07, 0xfa, 0x81, 0x93, 0x07, 0xfa, 0x89,
+ 0x94, 0x07, 0xfa, 0x91, 0x95, 0x07, 0xfa, 0x99, 0x96, 0x07, 0xfa, 0xa1,
+ 0x97, 0x07, 0xfa, 0xa9, 0x98, 0x07, 0xfa, 0xb1, 0x99, 0x07, 0xfa, 0xb9,
+ 0x9a, 0x07, 0xfa, 0xc1, 0x9b, 0x07, 0xfa, 0xc9, 0x9c, 0x07, 0xfa, 0xd1,
+ 0x86, 0x07, 0xfa, 0x20, 0xca, 0x92, 0x00, 0x08, 0x52, 0xb9, 0x96, 0x08,
+ 0x52, 0x80, 0x91, 0x08, 0x50, 0x31, 0x87, 0x08, 0x50, 0x29, 0xc9, 0xb7,
+ 0xd7, 0x08, 0x50, 0x19, 0x97, 0x08, 0x50, 0x11, 0x8b, 0x08, 0x50, 0x08,
+ 0x16, 0xc2, 0xf9, 0xd8, 0xc2, 0x01, 0x0e, 0x08, 0x50, 0xd9, 0x83, 0x08,
+ 0x50, 0xd0, 0xc2, 0x01, 0x0e, 0x08, 0x50, 0xe9, 0x83, 0x08, 0x50, 0xe0,
+ 0x44, 0x24, 0x51, 0xc2, 0xf9, 0xe2, 0x43, 0x6f, 0x91, 0x42, 0xf9, 0xee,
+ 0x44, 0xe4, 0xb3, 0xc2, 0xf9, 0xfa, 0x43, 0x2c, 0x7e, 0x42, 0xfa, 0x06,
+ 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xd9, 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x58,
+ 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xd1, 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x50,
+ 0x43, 0xec, 0xc9, 0xc2, 0xfa, 0x12, 0x46, 0xd9, 0x70, 0xc2, 0xfa, 0x1e,
+ 0x47, 0xc6, 0x87, 0xc2, 0xfa, 0x2a, 0x42, 0x01, 0x0b, 0x42, 0xfa, 0x36,
+ 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0x91, 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x10,
+ 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xa1, 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x20,
+ 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0x99, 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x18,
+ 0x42, 0x00, 0x9a, 0xc2, 0xfa, 0x42, 0xc2, 0x23, 0x68, 0x00, 0xbf, 0x90,
+ 0xc3, 0xe4, 0xb3, 0x00, 0xbf, 0xa1, 0xc2, 0x05, 0x1b, 0x00, 0xbf, 0x98,
+ 0xc8, 0xbb, 0x4b, 0x00, 0xbe, 0xa9, 0xc8, 0xb1, 0x3c, 0x00, 0xbe, 0x99,
+ 0xc4, 0xe8, 0x73, 0x00, 0xbe, 0x58, 0x98, 0x00, 0xbd, 0x50, 0x90, 0x0d,
+ 0x8b, 0x3b, 0x02, 0xfa, 0x58, 0x19, 0xc2, 0xfa, 0x5c, 0x0d, 0xc2, 0xfa,
+ 0x6c, 0x83, 0x01, 0x85, 0x09, 0x8b, 0x01, 0x85, 0x19, 0x97, 0x01, 0x85,
+ 0x29, 0x87, 0x01, 0x85, 0x39, 0x91, 0x01, 0x85, 0x49, 0x16, 0xc2, 0xfa,
+ 0x7c, 0x1b, 0xc2, 0xfa, 0x84, 0x15, 0xc2, 0xfa, 0x90, 0x0a, 0xc2, 0xfa,
+ 0x98, 0xc2, 0x11, 0x89, 0x01, 0x8f, 0xd1, 0x14, 0x42, 0xfa, 0xac, 0x87,
+ 0x0d, 0x80, 0x01, 0xc2, 0x05, 0x06, 0x0d, 0x89, 0x11, 0x1b, 0x42, 0xfa,
+ 0xc0, 0x45, 0xe2, 0xe7, 0x42, 0xfa, 0xc8, 0x83, 0x00, 0x64, 0x31, 0x8b,
+ 0x00, 0x64, 0x81, 0x97, 0x00, 0x64, 0xa0, 0x8b, 0x00, 0x64, 0x40, 0x97,
+ 0x00, 0x64, 0x50, 0x47, 0xb7, 0xd8, 0xc2, 0xfa, 0xd4, 0x83, 0x00, 0x65,
+ 0xa8, 0x87, 0x00, 0x64, 0x78, 0x91, 0x00, 0x64, 0x98, 0x83, 0x00, 0x64,
+ 0xa9, 0xc2, 0x01, 0x0e, 0x00, 0x64, 0xb0, 0x83, 0x00, 0x64, 0xb9, 0xc2,
+ 0x01, 0x0e, 0x00, 0x64, 0xc0, 0xc2, 0x07, 0x69, 0x00, 0x64, 0xc9, 0xc2,
+ 0x1a, 0x36, 0x00, 0x64, 0xf1, 0xc2, 0x01, 0x01, 0x00, 0x65, 0x19, 0x83,
+ 0x00, 0x65, 0x42, 0x02, 0xfa, 0xe2, 0x83, 0x00, 0x64, 0xd1, 0xc2, 0x01,
+ 0x0e, 0x00, 0x64, 0xd8, 0x83, 0x00, 0x64, 0xe1, 0xc2, 0x01, 0x0e, 0x00,
+ 0x64, 0xe8, 0x16, 0xc2, 0xfa, 0xe8, 0x83, 0x00, 0x65, 0x21, 0xc2, 0x01,
+ 0x0e, 0x00, 0x65, 0x28, 0x06, 0xc2, 0xfa, 0xf2, 0x83, 0x00, 0x65, 0x31,
+ 0xc2, 0x01, 0x0e, 0x00, 0x65, 0x38, 0x83, 0x00, 0x65, 0x51, 0xc2, 0x01,
+ 0x0e, 0x00, 0x65, 0x58, 0x83, 0x00, 0x65, 0x61, 0xc2, 0x01, 0x0e, 0x00,
+ 0x65, 0x68, 0x83, 0x00, 0x65, 0x81, 0xc2, 0x00, 0x9a, 0x00, 0x65, 0x88,
+ 0x83, 0x00, 0x65, 0x91, 0x0e, 0x42, 0xfa, 0xfc, 0xc2, 0x01, 0x0e, 0x00,
+ 0x65, 0xb1, 0xc2, 0x0e, 0xe5, 0x00, 0x65, 0xb9, 0x83, 0x00, 0x65, 0xc0,
+ 0x94, 0x00, 0x66, 0x20, 0x8e, 0x00, 0x67, 0x18, 0xc4, 0xe4, 0xd3, 0x01,
+ 0x79, 0x80, 0xc6, 0x35, 0x38, 0x01, 0x78, 0x81, 0xc4, 0x7e, 0x05, 0x01,
+ 0x7c, 0x38, 0xc2, 0x03, 0x5f, 0x01, 0x78, 0x09, 0x86, 0x01, 0x78, 0x39,
+ 0xc2, 0x13, 0xfc, 0x01, 0x7b, 0x18, 0xc2, 0x0c, 0x56, 0x01, 0x78, 0x49,
+ 0x03, 0xc2, 0xfb, 0x06, 0xc2, 0x13, 0x31, 0x01, 0x7d, 0x90, 0xc2, 0x01,
+ 0x03, 0x01, 0x79, 0x51, 0xc2, 0x00, 0xbb, 0x01, 0x7a, 0x58, 0xc7, 0xc9,
+ 0x12, 0x01, 0x79, 0xa8, 0x96, 0x01, 0x78, 0x13, 0x02, 0xfb, 0x10, 0xc6,
+ 0xd1, 0xea, 0x01, 0x78, 0x61, 0xc2, 0x00, 0xff, 0x01, 0x79, 0xf1, 0xc4,
+ 0x1f, 0x8d, 0x01, 0x7a, 0x79, 0xc6, 0xd2, 0xc2, 0x01, 0x7a, 0xc1, 0x89,
+ 0x01, 0x7a, 0xe8, 0xc8, 0xb1, 0x57, 0x01, 0x78, 0xc1, 0xc4, 0x05, 0x08,
+ 0x01, 0x7a, 0x19, 0x15, 0x42, 0xfb, 0x16, 0x9b, 0x01, 0x79, 0x91, 0xc2,
+ 0x01, 0x0a, 0x01, 0x7e, 0x71, 0xc4, 0x20, 0x6c, 0x01, 0x7e, 0x98, 0xc3,
+ 0x06, 0x87, 0x01, 0x7a, 0x69, 0xc2, 0x00, 0x2a, 0x01, 0x7e, 0x28, 0x03,
+ 0xc2, 0xfb, 0x20, 0xc3, 0x16, 0x13, 0x01, 0x7a, 0xd0, 0xc4, 0x6c, 0xa6,
+ 0x01, 0x78, 0x21, 0xc2, 0x00, 0x2f, 0x01, 0x78, 0xc9, 0xc2, 0x01, 0x1f,
+ 0x01, 0x7c, 0x19, 0x87, 0x01, 0x7c, 0x90, 0xc3, 0x02, 0x14, 0x01, 0x78,
+ 0xa1, 0xc4, 0xe4, 0x6f, 0x01, 0x79, 0x61, 0x07, 0xc2, 0xfb, 0x2c, 0xc4,
+ 0xad, 0xf8, 0x01, 0x7b, 0x80, 0xc5, 0xde, 0xf5, 0x01, 0x79, 0x01, 0xc4,
+ 0x08, 0x30, 0x01, 0x7a, 0x10, 0x11, 0xc2, 0xfb, 0x38, 0x07, 0x42, 0xfb,
+ 0x44, 0x07, 0xc2, 0xfb, 0x50, 0x11, 0xc2, 0xfb, 0x5a, 0xc3, 0x00, 0x3a,
+ 0x01, 0x7b, 0xa0, 0x9b, 0x01, 0x7a, 0x41, 0xce, 0x72, 0xe0, 0x01, 0x7d,
+ 0xf9, 0xc2, 0x03, 0x12, 0x01, 0x7e, 0x20, 0xc6, 0xd6, 0x6a, 0x01, 0x7b,
+ 0x39, 0xc2, 0x47, 0xe1, 0x01, 0x7b, 0xb8, 0xc2, 0x03, 0x84, 0x01, 0x78,
+ 0x29, 0x14, 0x42, 0xfb, 0x67, 0x03, 0xc2, 0xfb, 0x71, 0xc2, 0x16, 0x73,
+ 0x01, 0x7e, 0x38, 0x0e, 0xc2, 0xfb, 0x7b, 0xc2, 0x00, 0x3b, 0x01, 0x79,
+ 0xf9, 0xc2, 0x00, 0xac, 0x01, 0x7d, 0xe8, 0xc6, 0x02, 0x32, 0x01, 0x79,
+ 0xb0, 0xc3, 0x02, 0x94, 0x01, 0x78, 0x79, 0xcc, 0x79, 0x95, 0x01, 0x7d,
+ 0x89, 0xc2, 0x00, 0x30, 0x01, 0x7d, 0xe0, 0xc3, 0x11, 0x26, 0x01, 0x79,
+ 0x89, 0xc3, 0x0e, 0x84, 0x01, 0x7e, 0xa0, 0xc2, 0x01, 0xe6, 0x01, 0x7a,
+ 0x51, 0xc3, 0x00, 0xda, 0x01, 0x7b, 0x89, 0xc4, 0xe5, 0x8f, 0x01, 0x7e,
+ 0x68, 0xc2, 0x00, 0x58, 0x01, 0x7b, 0x09, 0xc3, 0x02, 0xfb, 0x01, 0x7c,
+ 0x60, 0xc4, 0x59, 0x9b, 0x01, 0x7c, 0x31, 0xc3, 0x02, 0x2f, 0x01, 0x7e,
+ 0x90, 0x17, 0xc2, 0xfb, 0x87, 0xc2, 0x00, 0xe5, 0x01, 0x7a, 0x49, 0x14,
+ 0x42, 0xfb, 0x91, 0xc3, 0x0c, 0x34, 0x01, 0x7b, 0x01, 0xc2, 0x00, 0x97,
+ 0x01, 0x7c, 0x08, 0xc6, 0xd2, 0x44, 0x01, 0x7c, 0x11, 0xc4, 0x19, 0x7c,
+ 0x01, 0x7e, 0x48, 0xc3, 0x00, 0x98, 0x01, 0x78, 0x51, 0xc7, 0x5e, 0xe8,
+ 0x01, 0x78, 0xf0, 0x94, 0x01, 0x7b, 0xfb, 0x02, 0xfb, 0x9d, 0x96, 0x01,
+ 0x7d, 0xb8, 0xc3, 0x01, 0x5e, 0x01, 0x79, 0x18, 0xc3, 0x00, 0x29, 0x01,
+ 0x78, 0x69, 0xc4, 0xe4, 0x5b, 0x01, 0x79, 0x59, 0xc5, 0xe2, 0xb0, 0x01,
+ 0x7a, 0x81, 0x99, 0x01, 0x7a, 0xe1, 0xc3, 0x04, 0x32, 0x01, 0x7c, 0x50,
+ 0xc3, 0x41, 0x55, 0x01, 0x78, 0xd1, 0x03, 0xc2, 0xfb, 0xa3, 0xc5, 0x80,
+ 0x6a, 0x01, 0x7c, 0x80, 0xc2, 0x00, 0x29, 0x01, 0x7b, 0x99, 0xc2, 0x00,
+ 0x2b, 0x01, 0x7c, 0xf1, 0xc6, 0xca, 0x32, 0x01, 0x7e, 0x00, 0xc2, 0x11,
+ 0x3f, 0x01, 0x79, 0x41, 0xc4, 0x00, 0xe0, 0x01, 0x7c, 0x68, 0xc5, 0xc8,
+ 0xa3, 0x01, 0x78, 0xd9, 0xc6, 0xd0, 0x40, 0x01, 0x7a, 0xf0, 0xc2, 0x01,
+ 0xf8, 0x01, 0x78, 0x99, 0xc3, 0x0a, 0x68, 0x01, 0x7d, 0x70, 0xc3, 0x14,
+ 0xe9, 0x01, 0x79, 0x29, 0xc2, 0x00, 0xfc, 0x01, 0x79, 0x78, 0xc4, 0xe6,
+ 0x17, 0x01, 0x7a, 0x71, 0xc2, 0x00, 0x15, 0x01, 0x7c, 0x88, 0xc3, 0x00,
+ 0x98, 0x01, 0x7a, 0xa1, 0xc2, 0x47, 0xe1, 0x01, 0x7d, 0x0a, 0x02, 0xfb,
+ 0xab, 0xc3, 0x01, 0x33, 0x01, 0x7b, 0x49, 0xc3, 0x06, 0xfd, 0x01, 0x7e,
+ 0x30, 0x87, 0x01, 0x7d, 0x19, 0x86, 0x01, 0x7d, 0xa8, 0xcc, 0x36, 0x58,
+ 0x01, 0x78, 0xa9, 0xc3, 0x00, 0x29, 0x01, 0x79, 0x71, 0xc2, 0x01, 0xa1,
+ 0x01, 0x7b, 0xb0, 0x92, 0x01, 0x7a, 0x09, 0xc2, 0x01, 0x5b, 0x01, 0x7d,
+ 0x61, 0x96, 0x01, 0x7e, 0x78, 0xc2, 0x00, 0x37, 0x01, 0x7b, 0x71, 0xc3,
+ 0x0d, 0x8f, 0x01, 0x7c, 0x20, 0xc7, 0xc8, 0x47, 0x01, 0x79, 0x11, 0xc2,
+ 0x19, 0x3e, 0x01, 0x7d, 0x30, 0xc2, 0x01, 0x5b, 0x01, 0x7b, 0x91, 0xc2,
+ 0x03, 0x48, 0x01, 0x7c, 0x58, 0x89, 0x01, 0x79, 0x21, 0xc4, 0x02, 0x01,
+ 0x01, 0x7c, 0xf9, 0xc2, 0x00, 0x4e, 0x01, 0x7e, 0x18, 0x99, 0x01, 0x79,
+ 0xc1, 0xcb, 0x92, 0xfc, 0x01, 0x7b, 0x31, 0xc2, 0x00, 0x97, 0x01, 0x7c,
+ 0x41, 0xc2, 0x03, 0x0d, 0x01, 0x7c, 0xe9, 0xc2, 0x03, 0x5f, 0x01, 0x7d,
+ 0xd8, 0xc5, 0xdc, 0x25, 0x01, 0x79, 0xd1, 0xc4, 0x1d, 0xdb, 0x01, 0x7a,
+ 0x01, 0xc3, 0x76, 0x92, 0x01, 0x7c, 0x00, 0xc4, 0x99, 0xac, 0x01, 0x7b,
+ 0xa9, 0xc4, 0xe7, 0x8f, 0x01, 0x7c, 0xc0, 0xc3, 0x29, 0xec, 0x01, 0x7c,
+ 0x71, 0xc2, 0x0e, 0x78, 0x01, 0x7d, 0x69, 0xc3, 0x00, 0xda, 0x01, 0x7e,
+ 0x50, 0x96, 0x01, 0x7a, 0x31, 0xc2, 0x00, 0xa7, 0x01, 0x7e, 0x80, 0xc2,
+ 0x01, 0xa5, 0x01, 0x7a, 0xa9, 0xc3, 0x1e, 0x70, 0x01, 0x7b, 0x29, 0xc3,
+ 0x00, 0x49, 0x01, 0x7d, 0xf1, 0xc2, 0x01, 0x0d, 0x01, 0x7e, 0x10, 0xc4,
+ 0x13, 0xff, 0x01, 0x7a, 0xb9, 0xc2, 0x00, 0x56, 0x01, 0x7a, 0xd9, 0xc2,
+ 0x01, 0xa1, 0x01, 0x7d, 0x78, 0x9b, 0x01, 0x7d, 0xb1, 0xc3, 0x34, 0x23,
+ 0x01, 0x7e, 0xa8, 0xc6, 0xd6, 0xe8, 0x01, 0x7d, 0xc9, 0xc2, 0x13, 0x1d,
+ 0x01, 0x7e, 0x60, 0xc3, 0xea, 0xce, 0x0b, 0x7c, 0xf9, 0xc3, 0x82, 0xb0,
+ 0x0b, 0x7c, 0xf1, 0xc3, 0x3b, 0x0b, 0x0b, 0x7c, 0xe9, 0xc3, 0x82, 0xe0,
+ 0x0b, 0x7c, 0xe1, 0xc3, 0x82, 0xa4, 0x0b, 0x7c, 0xd9, 0xc3, 0x83, 0x28,
+ 0x0b, 0x7c, 0xd0, 0xc2, 0x07, 0x69, 0x0b, 0x79, 0x29, 0x83, 0x0b, 0x78,
+ 0x98, 0xc2, 0x1a, 0x36, 0x0b, 0x7a, 0x09, 0x83, 0x0b, 0x79, 0xf0, 0x83,
+ 0x0b, 0x79, 0xc9, 0xc2, 0x01, 0x0e, 0x0b, 0x79, 0x80, 0x89, 0x0b, 0x7b,
+ 0x68, 0x89, 0x0b, 0x7b, 0x20, 0xcb, 0x1d, 0x2d, 0x01, 0x51, 0xd1, 0x45,
+ 0x01, 0xac, 0x42, 0xfb, 0xb1, 0xd6, 0x2c, 0xc1, 0x01, 0x3b, 0xa9, 0xd4,
+ 0x1c, 0xe0, 0x01, 0x3b, 0x48, 0xde, 0x0f, 0x7b, 0x01, 0x3b, 0x68, 0xd6,
+ 0x2c, 0xc1, 0x01, 0x3b, 0xa1, 0xd4, 0x1c, 0xe0, 0x01, 0x3b, 0x40, 0xde,
+ 0x0f, 0x7b, 0x01, 0x3b, 0x60, 0xda, 0x1c, 0xda, 0x01, 0x3b, 0x59, 0xd9,
+ 0x1f, 0x1f, 0x01, 0x3b, 0x50, 0xca, 0x25, 0x5a, 0x0f, 0xbe, 0x29, 0xcd,
+ 0x0f, 0x50, 0x0f, 0xbe, 0x38, 0xcf, 0x18, 0x2e, 0x0f, 0xbd, 0xb1, 0xd2,
+ 0x25, 0x52, 0x0f, 0xbe, 0x58, 0x97, 0x0b, 0x73, 0x98, 0x8b, 0x0b, 0x73,
+ 0xf1, 0xc3, 0x81, 0x43, 0x0b, 0x73, 0x20, 0x87, 0x0b, 0x73, 0xd0, 0x89,
+ 0x0b, 0x73, 0xb9, 0x9b, 0x0b, 0x73, 0xb8, 0x92, 0x0b, 0x73, 0xb0, 0x92,
+ 0x0b, 0x73, 0x30, 0x97, 0x0b, 0x72, 0x98, 0x8b, 0x0b, 0x72, 0xf1, 0xc3,
+ 0x81, 0x43, 0x0b, 0x72, 0x20, 0x87, 0x0b, 0x72, 0xd0, 0x89, 0x0b, 0x72,
+ 0xb9, 0x9b, 0x0b, 0x72, 0xb8, 0x92, 0x0b, 0x72, 0xb0, 0x92, 0x0b, 0x72,
+ 0x30, 0xcf, 0x65, 0xdc, 0x0b, 0x74, 0xb0, 0xcf, 0x65, 0xdc, 0x0b, 0x74,
+ 0xa8, 0xc4, 0xe7, 0xe3, 0x0f, 0x41, 0xd1, 0xc4, 0xea, 0x5b, 0x0f, 0x41,
+ 0xa1, 0xc5, 0xe0, 0x4e, 0x0f, 0x40, 0x29, 0xc4, 0xe9, 0x87, 0x0f, 0x42,
+ 0xf1, 0xc5, 0xe0, 0x3a, 0x0f, 0x42, 0xe9, 0xc5, 0xd9, 0x91, 0x0f, 0x44,
+ 0xc1, 0xc5, 0xe2, 0xba, 0x0f, 0x45, 0x09, 0xc6, 0xd0, 0x0a, 0x0f, 0x45,
+ 0x59, 0xc5, 0xda, 0xae, 0x0f, 0x45, 0x61, 0xc4, 0xe8, 0x6b, 0x0f, 0x45,
+ 0xf8, 0xc5, 0xe2, 0xbf, 0x0f, 0x41, 0xc9, 0xc5, 0xe0, 0x08, 0x0f, 0x43,
+ 0x99, 0xc6, 0xd2, 0x08, 0x0f, 0x43, 0x79, 0xc4, 0xea, 0x6f, 0x0f, 0x43,
+ 0x01, 0xc4, 0xe8, 0xd3, 0x0f, 0x42, 0xb9, 0xc5, 0xe1, 0x2f, 0x0f, 0x42,
+ 0x09, 0xc6, 0xd1, 0x06, 0x0f, 0x43, 0xc9, 0xcb, 0x99, 0x84, 0x0f, 0x44,
+ 0x01, 0xc5, 0xdb, 0x3a, 0x0f, 0x44, 0x79, 0xc4, 0xe9, 0x53, 0x0f, 0x45,
+ 0xe8, 0xc4, 0xe8, 0x53, 0x0f, 0x41, 0xc1, 0xc4, 0xe7, 0xab, 0x0f, 0x41,
+ 0xb9, 0xc4, 0xe9, 0xa7, 0x0f, 0x41, 0xb1, 0xc4, 0xea, 0xb7, 0x0f, 0x41,
+ 0x81, 0xc4, 0xe9, 0x0f, 0x0f, 0x41, 0x79, 0xc4, 0xea, 0x47, 0x0f, 0x42,
+ 0x61, 0xc4, 0xea, 0x93, 0x0f, 0x42, 0x59, 0xc4, 0xe8, 0xfb, 0x0f, 0x42,
+ 0x31, 0xc4, 0xe9, 0xa3, 0x0f, 0x42, 0x29, 0xc4, 0x3e, 0x09, 0x0f, 0x42,
+ 0x20, 0xc4, 0xea, 0x57, 0x0f, 0x41, 0x71, 0xc3, 0xeb, 0xe5, 0x0f, 0x41,
+ 0x21, 0xc3, 0xe2, 0x83, 0x0f, 0x41, 0x19, 0xc3, 0xeb, 0xcd, 0x0f, 0x41,
+ 0x11, 0xc4, 0xe9, 0xbf, 0x0f, 0x40, 0xe9, 0xc4, 0xc2, 0x3b, 0x0f, 0x40,
+ 0xe1, 0xc4, 0xe7, 0xaf, 0x0f, 0x40, 0xd9, 0xc4, 0xea, 0x17, 0x0f, 0x42,
+ 0x01, 0xc4, 0xe8, 0x7b, 0x0f, 0x41, 0xf9, 0xc4, 0xe8, 0x0b, 0x0f, 0x41,
+ 0xf0, 0xc4, 0xe7, 0xe7, 0x0f, 0x40, 0xf9, 0xc5, 0xe2, 0xce, 0x0f, 0x40,
+ 0xc1, 0xc4, 0xe2, 0x51, 0x0f, 0x40, 0x21, 0xc4, 0xe8, 0x1b, 0x0f, 0x43,
+ 0x61, 0xc5, 0xdb, 0x30, 0x0f, 0x42, 0x39, 0xc6, 0xd1, 0x78, 0x0f, 0x43,
+ 0xb9, 0xc4, 0xea, 0x2b, 0x0f, 0x44, 0x69, 0xc5, 0xd9, 0xeb, 0x0f, 0x45,
+ 0x01, 0xc6, 0xd2, 0x02, 0x0f, 0x45, 0x49, 0xc6, 0xd1, 0x12, 0x0f, 0x46,
+ 0x18, 0xc5, 0xe1, 0x39, 0x0f, 0x40, 0xb9, 0xc5, 0xda, 0x0e, 0x0f, 0x43,
+ 0xa1, 0xc5, 0xe1, 0x7f, 0x0f, 0x43, 0x89, 0xc4, 0xe9, 0x23, 0x0f, 0x42,
+ 0x41, 0xc5, 0xe2, 0xc4, 0x0f, 0x41, 0xd9, 0xc6, 0xd1, 0xde, 0x0f, 0x44,
+ 0x51, 0xc4, 0xe9, 0x4b, 0x0f, 0x44, 0x71, 0xc4, 0xdb, 0x3a, 0x0f, 0x44,
+ 0x81, 0xc5, 0xe0, 0x49, 0x0f, 0x45, 0x39, 0xc6, 0xcf, 0xf2, 0x0f, 0x46,
+ 0x08, 0xc5, 0xe1, 0x3e, 0x0f, 0x40, 0xb1, 0xc5, 0xe0, 0xfd, 0x0f, 0x40,
+ 0xa9, 0xc5, 0xe0, 0x12, 0x0f, 0x40, 0xa1, 0xc4, 0xe7, 0xd7, 0x0f, 0x40,
+ 0x51, 0xc4, 0xe9, 0x57, 0x0f, 0x40, 0x49, 0xc4, 0xe9, 0x07, 0x0f, 0x40,
+ 0x41, 0xc4, 0xe7, 0xeb, 0x0f, 0x40, 0x11, 0xc4, 0xea, 0x0f, 0x0f, 0x40,
+ 0x09, 0xc4, 0xe8, 0x2f, 0x0f, 0x40, 0x00, 0xc5, 0xe0, 0x94, 0x0f, 0x40,
+ 0x91, 0xc4, 0xd1, 0x00, 0x0f, 0x40, 0x71, 0xc4, 0xea, 0x2f, 0x0f, 0x40,
+ 0x31, 0xc5, 0xda, 0x09, 0x0f, 0x43, 0x69, 0xc5, 0xda, 0x4a, 0x0f, 0x43,
+ 0x59, 0xc4, 0xe9, 0x13, 0x0f, 0x43, 0x49, 0xc6, 0xcf, 0xec, 0x0f, 0x43,
+ 0xb1, 0xc6, 0xd0, 0x10, 0x0f, 0x43, 0xc1, 0xc6, 0xd0, 0x4c, 0x0f, 0x44,
+ 0xb1, 0xc6, 0xd1, 0x6c, 0x0f, 0x45, 0x10, 0xc5, 0xdb, 0x17, 0x0f, 0x40,
+ 0x89, 0xc5, 0xe2, 0x51, 0x0f, 0x40, 0x19, 0xc4, 0xe7, 0xef, 0x0f, 0x42,
+ 0x89, 0xc4, 0x55, 0x64, 0x0f, 0x42, 0x51, 0xc4, 0xea, 0x33, 0x0f, 0x44,
+ 0x61, 0xc4, 0xe7, 0xbb, 0x0f, 0x44, 0x91, 0xc5, 0xda, 0x13, 0x0f, 0x44,
+ 0xa1, 0xc6, 0xd0, 0x70, 0x0f, 0x45, 0x99, 0xc5, 0xe2, 0xd8, 0x0f, 0x45,
+ 0xa1, 0xc6, 0xd0, 0x88, 0x0f, 0x46, 0x20, 0xc5, 0xe1, 0x57, 0x0f, 0x43,
+ 0x29, 0xc5, 0xe1, 0x02, 0x0f, 0x43, 0x21, 0xc5, 0xdf, 0xea, 0x0f, 0x43,
+ 0x19, 0xc4, 0xe8, 0x13, 0x0f, 0x42, 0xe1, 0xc4, 0xe9, 0x83, 0x0f, 0x42,
+ 0xd9, 0xc4, 0xe8, 0xe7, 0x0f, 0x42, 0xd1, 0xc4, 0xe9, 0x9f, 0x0f, 0x42,
+ 0xa9, 0xc4, 0xe9, 0xc3, 0x0f, 0x42, 0xa1, 0xc4, 0xea, 0x87, 0x0f, 0x42,
+ 0x99, 0xc4, 0xe7, 0xf3, 0x0f, 0x42, 0x68, 0xc5, 0xe0, 0x71, 0x0f, 0x41,
+ 0xa9, 0xc4, 0xe8, 0xd7, 0x0f, 0x41, 0x61, 0xc5, 0xe2, 0x1f, 0x0f, 0x40,
+ 0x79, 0xc5, 0xe1, 0x89, 0x0f, 0x43, 0xa9, 0xc5, 0xdf, 0xfe, 0x0f, 0x43,
+ 0x09, 0xc5, 0xda, 0xb3, 0x0f, 0x44, 0x31, 0xc6, 0xd1, 0x72, 0x0f, 0x45,
+ 0x89, 0xc5, 0xe1, 0xd9, 0x0f, 0x45, 0xb0, 0xc5, 0xe0, 0x76, 0x0f, 0x41,
+ 0x99, 0xc4, 0xe9, 0x93, 0x0f, 0x41, 0x59, 0xc4, 0xe9, 0x43, 0x0f, 0x41,
+ 0x51, 0xc4, 0xe8, 0xa3, 0x0f, 0x41, 0x49, 0xc4, 0xe8, 0x6f, 0x0f, 0x41,
+ 0x09, 0xc5, 0xe0, 0x99, 0x0f, 0x40, 0x99, 0xc5, 0xda, 0xbd, 0x0f, 0x43,
+ 0x91, 0xc5, 0xe1, 0x98, 0x0f, 0x42, 0xf9, 0xc5, 0xd9, 0xe6, 0x0f, 0x44,
+ 0xf9, 0xc6, 0xd1, 0xb4, 0x0f, 0x45, 0xc0, 0xc4, 0xe7, 0xc7, 0x0f, 0x41,
+ 0x91, 0xc5, 0xe2, 0x4c, 0x0f, 0x40, 0x69, 0xc4, 0xea, 0x43, 0x0f, 0x40,
+ 0x61, 0xc5, 0xe0, 0x17, 0x0f, 0x43, 0x31, 0xc4, 0xe8, 0x17, 0x0f, 0x42,
+ 0x79, 0xc9, 0xaa, 0xd5, 0x0f, 0x41, 0xe9, 0xc7, 0xcc, 0xa0, 0x0f, 0x43,
+ 0xd1, 0xc4, 0xe9, 0x7b, 0x0f, 0x44, 0x21, 0xc6, 0xd2, 0xec, 0x0f, 0x45,
+ 0x21, 0xc5, 0xe1, 0x4d, 0x0f, 0x45, 0x90, 0xc5, 0xe1, 0xed, 0x0f, 0x41,
+ 0x89, 0xc4, 0xe9, 0x1f, 0x0f, 0x41, 0x39, 0xc4, 0xe8, 0x9f, 0x0f, 0x41,
+ 0x29, 0xc5, 0xe1, 0x52, 0x0f, 0x43, 0x39, 0xc5, 0xda, 0x59, 0x0f, 0x42,
+ 0x81, 0xc4, 0xea, 0x77, 0x0f, 0x44, 0x29, 0xc6, 0xd0, 0x16, 0x0f, 0x44,
+ 0x39, 0xc6, 0xd0, 0x5e, 0x0f, 0x44, 0x41, 0xca, 0xa3, 0x6c, 0x0f, 0x44,
+ 0xe1, 0xc6, 0xd1, 0xcc, 0x0f, 0x46, 0x00, 0xc4, 0xea, 0x6b, 0x0f, 0x41,
+ 0x69, 0xc5, 0xe1, 0x34, 0x0f, 0x40, 0x39, 0xc4, 0xe8, 0x27, 0x0f, 0x43,
+ 0x41, 0xc9, 0xaf, 0x04, 0x0f, 0x42, 0x91, 0xc7, 0xce, 0x0c, 0x0f, 0x44,
+ 0x59, 0xc6, 0xd0, 0x34, 0x0f, 0x44, 0xc9, 0xc5, 0xe2, 0xc9, 0x0f, 0x44,
+ 0xd1, 0xc4, 0xea, 0xa3, 0x0f, 0x45, 0x69, 0xc5, 0xe1, 0xca, 0x0f, 0x45,
+ 0xe1, 0xc6, 0xd0, 0x76, 0x0f, 0x46, 0x10, 0xc3, 0xeb, 0x46, 0x0f, 0x41,
+ 0x41, 0xc5, 0xe1, 0xf7, 0x0f, 0x40, 0x81, 0xc4, 0xe8, 0x1f, 0x0f, 0x43,
+ 0x71, 0xc5, 0xdb, 0x6c, 0x0f, 0x42, 0xc1, 0xc6, 0xd0, 0x8e, 0x0f, 0x43,
+ 0xd9, 0xc5, 0xe2, 0x5b, 0x0f, 0x44, 0x99, 0xca, 0xa3, 0x30, 0x0f, 0x44,
+ 0xf1, 0xc5, 0xe0, 0x3f, 0x0f, 0x45, 0x41, 0xc6, 0xcf, 0xda, 0x0f, 0x45,
+ 0xb9, 0xc5, 0xe2, 0xe2, 0x0f, 0x45, 0xf0, 0xc3, 0xeb, 0x2e, 0x0f, 0x41,
+ 0x31, 0xc5, 0xe2, 0x1a, 0x0f, 0x41, 0x01, 0xc5, 0xdb, 0x9e, 0x0f, 0x43,
+ 0x11, 0xc5, 0xe0, 0xa3, 0x0f, 0x42, 0xb1, 0xc5, 0xe2, 0xdd, 0x0f, 0x42,
+ 0x49, 0xcc, 0x85, 0xec, 0x0f, 0x44, 0x09, 0xc5, 0xe1, 0xe3, 0x0f, 0x44,
+ 0x89, 0xcb, 0x99, 0xa5, 0x0f, 0x44, 0xe9, 0xc5, 0xe0, 0x44, 0x0f, 0x45,
+ 0x19, 0xc5, 0xe0, 0x03, 0x0f, 0x45, 0x50, 0xc5, 0xdb, 0x99, 0x0f, 0x40,
+ 0xf1, 0xc6, 0xd2, 0x68, 0x0f, 0x40, 0xc9, 0xc5, 0xe0, 0x0d, 0x0f, 0x42,
+ 0x71, 0xc4, 0x9a, 0x55, 0x0f, 0x41, 0xe1, 0xc7, 0xcc, 0x61, 0x0f, 0x43,
+ 0xe1, 0xc7, 0xc4, 0xea, 0x0f, 0x43, 0xf1, 0xc4, 0xe9, 0x8f, 0x0f, 0x44,
+ 0x19, 0xc5, 0xe0, 0xf8, 0x0f, 0x45, 0x29, 0xc5, 0xe1, 0xd4, 0x0f, 0x45,
+ 0xa9, 0xc4, 0xea, 0xab, 0x0f, 0x45, 0xd8, 0xc6, 0xd0, 0x1c, 0x0f, 0x40,
+ 0xd1, 0xc4, 0xcf, 0xec, 0x0f, 0x43, 0x51, 0xc4, 0xe8, 0xcf, 0x0f, 0x42,
+ 0x19, 0xc5, 0xda, 0x95, 0x0f, 0x42, 0x11, 0xcb, 0x9a, 0x55, 0x0f, 0x44,
+ 0x11, 0xc6, 0xd1, 0xae, 0x0f, 0x44, 0x49, 0xc6, 0xd0, 0x64, 0x0f, 0x44,
+ 0xb9, 0xc6, 0xd1, 0x3c, 0x0f, 0x44, 0xd9, 0xc4, 0xe9, 0xb3, 0x0f, 0x45,
+ 0xc9, 0xc4, 0xe9, 0x4f, 0x0f, 0x45, 0xd0, 0xc5, 0xe0, 0x85, 0x0f, 0x40,
+ 0x59, 0xc6, 0xd1, 0xc0, 0x0f, 0x43, 0x81, 0xc4, 0xdb, 0x6c, 0x0f, 0x42,
+ 0xc9, 0xc6, 0xd0, 0x6a, 0x0f, 0x43, 0xe9, 0xc7, 0xcf, 0x5c, 0x0f, 0x43,
+ 0xf9, 0xc5, 0xe1, 0xcf, 0x0f, 0x44, 0xa9, 0xc5, 0xe2, 0xb5, 0x0f, 0x45,
+ 0x31, 0xc5, 0xe1, 0xa2, 0x0f, 0x45, 0x71, 0xc5, 0xe1, 0x43, 0x0f, 0x45,
+ 0x79, 0xc5, 0xe2, 0x65, 0x0f, 0x45, 0x80, 0xc3, 0x11, 0x15, 0x0f, 0x46,
+ 0x81, 0x10, 0x42, 0xfb, 0xc9, 0xcb, 0x72, 0x2d, 0x08, 0x4f, 0xf9, 0xcd,
+ 0x77, 0x99, 0x08, 0x4f, 0xc1, 0xcb, 0x99, 0x4d, 0x08, 0x4f, 0xb8, 0xcd,
+ 0x7f, 0x92, 0x08, 0x4f, 0xe9, 0xce, 0x72, 0x2a, 0x08, 0x4d, 0xe0, 0xcd,
+ 0x72, 0x2b, 0x08, 0x4f, 0xe1, 0xcb, 0x93, 0x49, 0x08, 0x4f, 0xd8, 0xcc,
+ 0x8c, 0xd0, 0x08, 0x4f, 0xd1, 0xcc, 0x8a, 0xd8, 0x08, 0x4f, 0xc8, 0xc7,
+ 0x72, 0x30, 0x08, 0x4f, 0xb1, 0xc4, 0x01, 0x1d, 0x08, 0x4d, 0xe8, 0x00,
+ 0xc2, 0xfb, 0xd3, 0xcb, 0x97, 0x06, 0x08, 0x4f, 0x60, 0x00, 0xc2, 0xfb,
+ 0xe2, 0xca, 0x97, 0x07, 0x08, 0x4f, 0x58, 0xc4, 0x15, 0xa7, 0x08, 0x4e,
+ 0x33, 0x02, 0xfb, 0xf1, 0xc2, 0x22, 0x45, 0x08, 0x4e, 0x2a, 0x02, 0xfb,
+ 0xfe, 0x0b, 0xc2, 0xfc, 0x0b, 0x11, 0x42, 0xfc, 0x1d, 0x0a, 0xc2, 0xfc,
+ 0x2f, 0x19, 0xc2, 0xfc, 0x41, 0xc2, 0x01, 0x04, 0x08, 0x4e, 0x4a, 0x02,
+ 0xfc, 0x51, 0x00, 0x42, 0xfc, 0x57, 0xc3, 0xed, 0x80, 0x08, 0x4d, 0xf9,
+ 0xc3, 0x65, 0xe9, 0x08, 0x4d, 0xf0, 0xc2, 0x0c, 0x25, 0x08, 0x4d, 0xb9,
+ 0x16, 0xc2, 0xfc, 0x66, 0xc2, 0x0e, 0x13, 0x08, 0x4d, 0x99, 0x0d, 0xc2,
+ 0xfc, 0x72, 0x15, 0xc2, 0xfc, 0x7c, 0x83, 0x08, 0x4d, 0x03, 0x02, 0xfc,
+ 0x84, 0xc3, 0x02, 0xe6, 0x08, 0x4d, 0x71, 0xc2, 0x00, 0x96, 0x08, 0x4d,
+ 0x61, 0xc2, 0x00, 0x9a, 0x08, 0x4d, 0x59, 0x10, 0xc2, 0xfc, 0x8a, 0xc2,
+ 0x00, 0x3f, 0x08, 0x4d, 0x41, 0xc2, 0x07, 0x44, 0x08, 0x4d, 0x39, 0xc2,
+ 0x02, 0x1d, 0x08, 0x4d, 0x31, 0xc2, 0x00, 0x4c, 0x08, 0x4d, 0x29, 0xc2,
+ 0x1a, 0x36, 0x08, 0x4d, 0x21, 0x91, 0x08, 0x4d, 0x19, 0x8b, 0x08, 0x4d,
+ 0x11, 0x87, 0x08, 0x4d, 0x08, 0x91, 0x08, 0x4c, 0xe1, 0xc3, 0x82, 0xa4,
+ 0x08, 0x4c, 0xd9, 0x87, 0x08, 0x4c, 0xd1, 0xc3, 0xeb, 0x40, 0x08, 0x4c,
+ 0xc9, 0x83, 0x08, 0x4c, 0xc0, 0x83, 0x08, 0x4c, 0xb1, 0xc2, 0x01, 0x0e,
+ 0x08, 0x4c, 0x88, 0x87, 0x08, 0x4c, 0xa9, 0xc3, 0xeb, 0x40, 0x08, 0x4c,
+ 0xa1, 0x83, 0x08, 0x4c, 0x98, 0xc3, 0xeb, 0x40, 0x08, 0x4c, 0x79, 0x83,
+ 0x08, 0x4c, 0x70, 0x83, 0x08, 0x4c, 0x31, 0xc3, 0xeb, 0x40, 0x08, 0x4c,
+ 0x38, 0x83, 0x08, 0x4c, 0x51, 0xc3, 0xeb, 0x40, 0x08, 0x4c, 0x59, 0x87,
+ 0x08, 0x4c, 0x61, 0xc3, 0x82, 0xa4, 0x08, 0x4c, 0x68, 0x60, 0x0a, 0x27,
+ 0x42, 0xfc, 0x92, 0x97, 0x05, 0x57, 0x79, 0x8b, 0x05, 0x57, 0x68, 0xc7,
+ 0xcd, 0x17, 0x05, 0x5f, 0x08, 0xc7, 0xcd, 0x17, 0x05, 0x5e, 0xf8, 0xc7,
+ 0xcd, 0x17, 0x05, 0x5f, 0x00, 0xc2, 0x01, 0x0e, 0x05, 0x57, 0x29, 0x83,
+ 0x05, 0x57, 0x20, 0xc7, 0xcd, 0x17, 0x05, 0x5e, 0xf0, 0xc7, 0xcd, 0x17,
+ 0x05, 0x5e, 0xd8, 0xc2, 0x01, 0x0e, 0x05, 0x57, 0x39, 0x83, 0x05, 0x57,
+ 0x30, 0x48, 0xbb, 0xe3, 0xc2, 0xfc, 0xaa, 0x47, 0x01, 0xf8, 0xc2, 0xfc,
+ 0xba, 0x4d, 0x79, 0x12, 0xc2, 0xfd, 0x21, 0xd0, 0x07, 0x97, 0x00, 0x16,
+ 0x31, 0x47, 0x60, 0xa5, 0xc2, 0xfd, 0x2d, 0xcb, 0x90, 0x10, 0x00, 0x16,
+ 0xf9, 0xc4, 0x0c, 0x33, 0x05, 0x3c, 0x48, 0x45, 0x00, 0x3f, 0xc2, 0xfd,
+ 0x39, 0x4b, 0x08, 0x2a, 0xc2, 0xfd, 0xdc, 0x4a, 0xa0, 0x06, 0xc2, 0xfd,
+ 0xe8, 0x0a, 0x42, 0xfd, 0xf4, 0x45, 0x01, 0x18, 0xc2, 0xfe, 0x00, 0x07,
+ 0xc2, 0xfe, 0x12, 0xca, 0xa1, 0xa0, 0x00, 0x16, 0xf1, 0x46, 0x0c, 0x07,
+ 0x42, 0xfe, 0x1c, 0x44, 0x00, 0xce, 0xc2, 0xfe, 0x3a, 0xcc, 0x79, 0x54,
+ 0x08, 0x3d, 0xb9, 0x42, 0x00, 0x37, 0x42, 0xfe, 0x4c, 0xcb, 0x26, 0x0d,
+ 0x00, 0x16, 0x03, 0x02, 0xfe, 0x56, 0xcb, 0x1e, 0x65, 0x00, 0x16, 0x59,
+ 0xcb, 0x95, 0xe8, 0x00, 0x87, 0xe0, 0xcd, 0x7f, 0x1d, 0x08, 0x3d, 0xa9,
+ 0x45, 0x3e, 0x58, 0x42, 0xfe, 0x5c, 0xcb, 0x84, 0xf1, 0x08, 0x3d, 0xb1,
+ 0x11, 0x42, 0xfe, 0x68, 0xcd, 0x7c, 0x79, 0x08, 0x3d, 0xc1, 0xc9, 0x2d,
+ 0x3c, 0x00, 0x15, 0xe1, 0xcb, 0x87, 0x85, 0x00, 0x16, 0x50, 0xc4, 0x16,
+ 0x95, 0x00, 0x15, 0xc9, 0xc8, 0x65, 0xfb, 0x00, 0x16, 0xb0, 0xcb, 0x53,
+ 0xbc, 0x00, 0x15, 0xd9, 0xcf, 0x34, 0x90, 0x00, 0x16, 0x80, 0x42, 0x02,
+ 0x49, 0xc2, 0xfe, 0x7a, 0xca, 0xa7, 0xcc, 0x00, 0x17, 0x69, 0x95, 0x05,
+ 0x3b, 0x80, 0xcc, 0x36, 0x6e, 0x00, 0x16, 0x41, 0xc6, 0xc5, 0xd9, 0x00,
+ 0x17, 0x60, 0xc5, 0x61, 0xbd, 0x00, 0x16, 0x49, 0x0b, 0x42, 0xfe, 0x86,
+ 0x43, 0x33, 0x3a, 0xc2, 0xfe, 0x90, 0x43, 0x00, 0x3b, 0x42, 0xfe, 0x9c,
+ 0x44, 0x0b, 0x2c, 0xc2, 0xfe, 0xa8, 0xd4, 0x34, 0x8b, 0x00, 0x16, 0x88,
+ 0xd6, 0x2d, 0x87, 0x00, 0x17, 0x51, 0xd7, 0x2a, 0xd5, 0x00, 0x17, 0x58,
+ 0xc4, 0x24, 0x35, 0x08, 0xb2, 0xc9, 0xc5, 0x05, 0x1b, 0x08, 0xb2, 0xc1,
+ 0x15, 0xc2, 0xfe, 0xba, 0x08, 0xc2, 0xfe, 0xc6, 0x16, 0xc2, 0xfe, 0xd2,
+ 0xc3, 0x05, 0x17, 0x08, 0xb2, 0x89, 0xc4, 0x16, 0x57, 0x08, 0xb2, 0x80,
+ 0xca, 0xa4, 0x8e, 0x08, 0xb2, 0x01, 0xc7, 0x10, 0xac, 0x08, 0xb1, 0xe8,
+ 0xc4, 0x21, 0x28, 0x08, 0xb1, 0xf9, 0xc5, 0x45, 0xcf, 0x08, 0xb1, 0xf0,
+ 0x97, 0x08, 0xb1, 0xe1, 0x8b, 0x08, 0xb1, 0xd1, 0x83, 0x08, 0xb1, 0x80,
+ 0x8e, 0x08, 0xb1, 0xbb, 0x02, 0xfe, 0xde, 0x94, 0x08, 0xb1, 0xaa, 0x02,
+ 0xfe, 0xe2, 0x97, 0x08, 0xb1, 0xa0, 0x8b, 0x08, 0xb1, 0x90, 0xc2, 0x00,
+ 0x96, 0x08, 0xb1, 0x79, 0x83, 0x08, 0xb1, 0x48, 0x83, 0x08, 0xb1, 0x69,
+ 0xc2, 0x0e, 0xe5, 0x08, 0xb1, 0x61, 0xc2, 0x01, 0x0e, 0x08, 0xb1, 0x58,
+ 0x83, 0x08, 0xb1, 0x51, 0x47, 0xb7, 0xd8, 0x42, 0xfe, 0xe6, 0xc2, 0x01,
+ 0x0e, 0x08, 0xb1, 0x29, 0x83, 0x08, 0xb1, 0x20, 0xc2, 0x01, 0x0e, 0x08,
+ 0xb1, 0x19, 0x83, 0x08, 0xb1, 0x10, 0x83, 0x08, 0xb1, 0x09, 0xc2, 0x01,
+ 0x01, 0x08, 0xb0, 0xe1, 0xc2, 0x1a, 0x36, 0x08, 0xb0, 0xb9, 0xc2, 0x07,
+ 0x69, 0x08, 0xb0, 0x90, 0xc2, 0x01, 0x0e, 0x08, 0xb1, 0x01, 0x83, 0x08,
+ 0xb0, 0xf9, 0x06, 0x42, 0xfe, 0xf4, 0xc2, 0x01, 0x0e, 0x08, 0xb0, 0xf1,
+ 0x83, 0x08, 0xb0, 0xe9, 0x16, 0x42, 0xfe, 0xfe, 0xc2, 0x01, 0x0e, 0x08,
+ 0xb0, 0xb1, 0x83, 0x08, 0xb0, 0xa8, 0xc2, 0x01, 0x0e, 0x08, 0xb0, 0xa1,
+ 0x83, 0x08, 0xb0, 0x98, 0xc2, 0x01, 0x0e, 0x08, 0xb0, 0x89, 0x83, 0x08,
+ 0xb0, 0x80, 0xc2, 0x01, 0x0e, 0x08, 0xb0, 0x79, 0x83, 0x08, 0xb0, 0x70,
+ 0x97, 0x08, 0xb0, 0x69, 0x8b, 0x08, 0xb0, 0x59, 0x83, 0x08, 0xb0, 0x08,
+ 0x97, 0x08, 0xb0, 0x28, 0x8b, 0x08, 0xb0, 0x18, 0xcf, 0x09, 0x78, 0x08,
+ 0xb3, 0x59, 0xc8, 0x00, 0xff, 0x08, 0xb3, 0x50, 0xc4, 0x15, 0xa7, 0x00,
+ 0xc0, 0xb9, 0xc2, 0x22, 0x45, 0x00, 0xc0, 0xb0, 0xc3, 0x0d, 0x8f, 0x00,
+ 0xc0, 0xa9, 0xc3, 0x08, 0xde, 0x00, 0xc0, 0xa0, 0xc4, 0x05, 0xde, 0x00,
+ 0xc0, 0x99, 0xc2, 0x0a, 0x20, 0x00, 0xc0, 0x90, 0x49, 0xb7, 0xf2, 0xc2,
+ 0xff, 0x08, 0xc3, 0xb0, 0x39, 0x00, 0xc3, 0xb9, 0xc2, 0x01, 0xa7, 0x00,
+ 0xc3, 0xb1, 0xc2, 0x00, 0x9a, 0x00, 0xc3, 0xa9, 0xc2, 0x06, 0x6b, 0x00,
+ 0xc3, 0xa1, 0x8b, 0x00, 0xc3, 0x98, 0x06, 0xc2, 0xff, 0x3c, 0x45, 0x01,
+ 0x1d, 0xc2, 0xff, 0x49, 0x83, 0x00, 0xc4, 0x3b, 0x02, 0xff, 0x53, 0x1c,
+ 0xc2, 0xff, 0x5d, 0xc3, 0x1d, 0x55, 0x00, 0xc4, 0xa1, 0x12, 0xc2, 0xff,
+ 0x67, 0x16, 0xc2, 0xff, 0x71, 0x10, 0xc2, 0xff, 0x7f, 0xc2, 0x00, 0x2e,
+ 0x00, 0xc4, 0x59, 0xc2, 0x06, 0x6b, 0x00, 0xc4, 0x49, 0x8b, 0x00, 0xc4,
+ 0x43, 0x02, 0xff, 0x8b, 0xc6, 0x8f, 0x65, 0x00, 0xc4, 0x29, 0xc7, 0x64,
+ 0x5e, 0x00, 0xc4, 0x19, 0xcb, 0x9c, 0x0d, 0x00, 0xc4, 0x08, 0x03, 0xc2,
+ 0xff, 0x91, 0x06, 0xc2, 0xff, 0x9d, 0xc3, 0x04, 0x41, 0x00, 0xc2, 0xd9,
+ 0x0c, 0xc2, 0xff, 0xa7, 0xc3, 0x3b, 0xb0, 0x00, 0xc2, 0xc9, 0xc2, 0x07,
+ 0x69, 0x00, 0xc2, 0x73, 0x02, 0xff, 0xb1, 0xc2, 0x06, 0x6b, 0x00, 0xc2,
+ 0xb9, 0xc2, 0x00, 0x4c, 0x00, 0xc2, 0xb1, 0xc2, 0x1a, 0x36, 0x00, 0xc2,
+ 0xa9, 0x16, 0xc2, 0xff, 0xb5, 0xc3, 0x1c, 0x4f, 0x00, 0xc2, 0x91, 0xc2,
+ 0x00, 0x3f, 0x00, 0xc2, 0x79, 0xc2, 0x0e, 0x13, 0x00, 0xc2, 0x69, 0xc2,
+ 0x07, 0x44, 0x00, 0xc2, 0x61, 0xc2, 0x02, 0x1d, 0x00, 0xc2, 0x59, 0x97,
+ 0x00, 0xc2, 0x3b, 0x02, 0xff, 0xbf, 0x91, 0x00, 0xc2, 0x33, 0x02, 0xff,
+ 0xc3, 0x8b, 0x00, 0xc2, 0x29, 0x87, 0x00, 0xc2, 0x21, 0xcf, 0x67, 0xf8,
+ 0x00, 0xc2, 0x18, 0xce, 0x18, 0x5f, 0x00, 0xc3, 0xc0, 0x1c, 0xc2, 0xff,
+ 0xc7, 0xc3, 0x1c, 0x4f, 0x00, 0xc3, 0x89, 0xc3, 0x0e, 0x2f, 0x00, 0xc3,
+ 0x81, 0x16, 0xc2, 0xff, 0xd1, 0xc2, 0x01, 0x0e, 0x00, 0xc3, 0x2b, 0x02,
+ 0xff, 0xdb, 0xc2, 0x07, 0x69, 0x00, 0xc3, 0x23, 0x02, 0xff, 0xdf, 0xc2,
+ 0x01, 0xa7, 0x00, 0xc3, 0x59, 0xc2, 0x26, 0x94, 0x00, 0xc3, 0x51, 0xc2,
+ 0x0c, 0x25, 0x00, 0xc3, 0x49, 0xc3, 0x01, 0x0d, 0x00, 0xc3, 0x39, 0xc2,
+ 0x07, 0x44, 0x00, 0xc3, 0x31, 0xc2, 0x06, 0x6b, 0x00, 0xc3, 0x19, 0xc3,
+ 0x00, 0x55, 0x00, 0xc3, 0x11, 0x97, 0x00, 0xc3, 0x0b, 0x02, 0xff, 0xe3,
+ 0x8b, 0x00, 0xc2, 0xf3, 0x02, 0xff, 0xe7, 0x87, 0x00, 0xc2, 0xe8, 0xc4,
+ 0x05, 0xde, 0x00, 0xc0, 0x69, 0xc2, 0x0a, 0x20, 0x00, 0xc0, 0x60, 0xc4,
+ 0x33, 0x51, 0x0e, 0xb7, 0x20, 0xc2, 0x00, 0x44, 0x0e, 0xb7, 0x41, 0xc6,
+ 0x12, 0x65, 0x0e, 0xb7, 0x30, 0xc4, 0xde, 0x10, 0x0e, 0xb7, 0x28, 0xc2,
+ 0x00, 0x0a, 0x0e, 0xb7, 0xc0, 0xc3, 0x0b, 0x47, 0x0e, 0xb7, 0x18, 0xc4,
+ 0xdd, 0x2f, 0x0e, 0xb7, 0x10, 0x0f, 0x42, 0xff, 0xeb, 0xc2, 0x02, 0x29,
+ 0x0e, 0xb7, 0xc9, 0xc2, 0x00, 0x0a, 0x0e, 0xb7, 0xb9, 0x8b, 0x0e, 0xb7,
+ 0x88, 0xc6, 0x12, 0x65, 0x0e, 0xb7, 0xb0, 0xc2, 0x20, 0xa8, 0x0e, 0xb7,
+ 0xa9, 0xc4, 0x8b, 0xed, 0x0e, 0xb7, 0x4a, 0x02, 0xff, 0xf7, 0xc4, 0x19,
+ 0x8f, 0x0e, 0xb7, 0xa0, 0xc2, 0x03, 0x76, 0x0e, 0xb7, 0x90, 0x8b, 0x0e,
+ 0xb7, 0x78, 0x97, 0x0e, 0xb7, 0x70, 0x97, 0x0e, 0xb7, 0x68, 0xc4, 0xdc,
+ 0xdf, 0x0e, 0xb7, 0x60, 0xc4, 0x8f, 0x29, 0x0e, 0xb7, 0x58, 0xc3, 0x00,
+ 0xf2, 0x0e, 0xb7, 0x50, 0xc3, 0x0b, 0x47, 0x0e, 0xb7, 0x38, 0x0f, 0x42,
+ 0xff, 0xfd, 0xc2, 0x02, 0x29, 0x0e, 0xb8, 0x99, 0xc2, 0x00, 0x0a, 0x0e,
+ 0xb8, 0x89, 0x8b, 0x0e, 0xb8, 0x58, 0xc2, 0x00, 0x0a, 0x0e, 0xb8, 0x90,
+ 0xc6, 0x12, 0x65, 0x0e, 0xb8, 0x80, 0xc2, 0x20, 0xa8, 0x0e, 0xb8, 0x79,
+ 0xc4, 0x8b, 0xed, 0x0e, 0xb8, 0x18, 0xc4, 0x19, 0x8f, 0x0e, 0xb8, 0x70,
+ 0xca, 0x94, 0x73, 0x0e, 0xb8, 0x68, 0xc2, 0x03, 0x76, 0x0e, 0xb8, 0x60,
+ 0x8b, 0x0e, 0xb8, 0x48, 0x97, 0x0e, 0xb8, 0x40, 0x97, 0x0e, 0xb8, 0x38,
+ 0xc4, 0xdc, 0xdf, 0x0e, 0xb8, 0x30, 0xc4, 0x8f, 0x29, 0x0e, 0xb8, 0x28,
+ 0xc3, 0x00, 0xf2, 0x0e, 0xb8, 0x20, 0xc2, 0x00, 0x44, 0x0e, 0xb8, 0x11,
+ 0xc6, 0x12, 0x65, 0x0e, 0xb8, 0x00, 0xc3, 0x0b, 0x47, 0x0e, 0xb8, 0x08,
+ 0xc4, 0xde, 0x10, 0x0e, 0xb7, 0xf9, 0x47, 0x39, 0x6b, 0x43, 0x00, 0x09,
+ 0xc4, 0x33, 0x51, 0x0e, 0xb7, 0xf0, 0xc3, 0x0b, 0x47, 0x0e, 0xb7, 0xe8,
+ 0xc4, 0xdd, 0x2f, 0x0e, 0xb7, 0xe0, 0x9c, 0x0e, 0xa1, 0x9b, 0x03, 0x00,
+ 0x11, 0x9b, 0x0e, 0xa1, 0x91, 0x9a, 0x0e, 0xa1, 0x8b, 0x03, 0x00, 0x17,
+ 0x99, 0x0e, 0xa1, 0x81, 0x98, 0x0e, 0xa1, 0x79, 0x97, 0x0e, 0xa1, 0x73,
+ 0x03, 0x00, 0x1b, 0x86, 0x0e, 0xa0, 0xeb, 0x03, 0x00, 0x21, 0x91, 0x0e,
+ 0xa1, 0x43, 0x03, 0x00, 0x2d, 0x92, 0x0e, 0xa1, 0x4b, 0x03, 0x00, 0x31,
+ 0x85, 0x0e, 0xa0, 0xe3, 0x03, 0x00, 0x41, 0x96, 0x0e, 0xa1, 0x6b, 0x03,
+ 0x00, 0x47, 0x95, 0x0e, 0xa1, 0x63, 0x03, 0x00, 0x53, 0x88, 0x0e, 0xa0,
+ 0xfb, 0x03, 0x00, 0x59, 0x94, 0x0e, 0xa1, 0x5b, 0x03, 0x00, 0x5f, 0x90,
+ 0x0e, 0xa1, 0x3b, 0x03, 0x00, 0x65, 0x8f, 0x0e, 0xa1, 0x33, 0x03, 0x00,
+ 0x69, 0x8e, 0x0e, 0xa1, 0x2b, 0x03, 0x00, 0x6d, 0x8d, 0x0e, 0xa1, 0x23,
+ 0x03, 0x00, 0x73, 0x8b, 0x0e, 0xa1, 0x13, 0x03, 0x00, 0x79, 0x87, 0x0e,
+ 0xa0, 0xf3, 0x03, 0x00, 0x7f, 0x89, 0x0e, 0xa1, 0x03, 0x03, 0x00, 0x8b,
+ 0x84, 0x0e, 0xa0, 0xdb, 0x03, 0x00, 0x91, 0x83, 0x0e, 0xa0, 0xd3, 0x03,
+ 0x00, 0x97, 0x93, 0x0e, 0xa1, 0x51, 0x8c, 0x0e, 0xa1, 0x19, 0x8a, 0x0e,
+ 0xa1, 0x08, 0x46, 0x00, 0x3e, 0xc3, 0x00, 0x9d, 0x48, 0x01, 0xf7, 0x43,
+ 0x01, 0x05, 0xc4, 0x15, 0xa7, 0x0e, 0xbe, 0xa9, 0xc2, 0x22, 0x45, 0x0e,
+ 0xbe, 0xa0, 0xc3, 0x0d, 0x8f, 0x0e, 0xbe, 0x99, 0xc3, 0x08, 0xde, 0x0e,
+ 0xbe, 0x90, 0xc4, 0x05, 0xde, 0x0e, 0xbe, 0x89, 0xc2, 0x0a, 0x20, 0x0e,
+ 0xbe, 0x80, 0xc6, 0x4f, 0xcb, 0x0e, 0xbe, 0x51, 0xc4, 0xde, 0x10, 0x0e,
+ 0xb5, 0x58, 0x0f, 0x43, 0x01, 0x6d, 0xc2, 0x02, 0x29, 0x0e, 0xb5, 0xf9,
+ 0xc2, 0x00, 0x0a, 0x0e, 0xb5, 0xe9, 0x8b, 0x0e, 0xb5, 0xb8, 0xc2, 0x00,
+ 0x0a, 0x0e, 0xb5, 0xf0, 0xc6, 0x12, 0x65, 0x0e, 0xb5, 0xe0, 0xc2, 0x20,
+ 0xa8, 0x0e, 0xb5, 0xd9, 0xc4, 0x8b, 0xed, 0x0e, 0xb5, 0x7a, 0x03, 0x01,
+ 0x79, 0xc4, 0x19, 0x8f, 0x0e, 0xb5, 0xd0, 0xc2, 0x03, 0x76, 0x0e, 0xb5,
+ 0xc0, 0x8b, 0x0e, 0xb5, 0xa8, 0x97, 0x0e, 0xb5, 0xa0, 0x97, 0x0e, 0xb5,
+ 0x98, 0xc4, 0xdc, 0xdf, 0x0e, 0xb5, 0x90, 0xc4, 0x8f, 0x29, 0x0e, 0xb5,
+ 0x88, 0xc3, 0x00, 0xf2, 0x0e, 0xb5, 0x80, 0xc2, 0x00, 0x44, 0x0e, 0xb5,
+ 0x71, 0xc6, 0x12, 0x65, 0x0e, 0xb5, 0x60, 0xc3, 0x0b, 0x47, 0x0e, 0xb5,
+ 0x68, 0xc4, 0x33, 0x51, 0x0e, 0xb5, 0x50, 0xc3, 0x0b, 0x47, 0x0e, 0xb5,
+ 0x48, 0xc4, 0xdd, 0x2f, 0x0e, 0xb5, 0x40, 0xc8, 0x9d, 0xb0, 0x0e, 0xba,
+ 0xa9, 0xc9, 0xad, 0x9c, 0x0e, 0xba, 0x99, 0xd3, 0x41, 0x4d, 0x0e, 0xba,
+ 0x78, 0x91, 0x0e, 0xa4, 0x83, 0x03, 0x01, 0x7f, 0x92, 0x0e, 0xa4, 0x8b,
+ 0x03, 0x01, 0x83, 0x85, 0x0e, 0xa4, 0x23, 0x03, 0x01, 0x93, 0x97, 0x0e,
+ 0xa4, 0xb3, 0x03, 0x01, 0x99, 0x96, 0x0e, 0xa4, 0xab, 0x03, 0x01, 0x9f,
+ 0x95, 0x0e, 0xa4, 0xa3, 0x03, 0x01, 0xab, 0x88, 0x0e, 0xa4, 0x3b, 0x03,
+ 0x01, 0xb1, 0x94, 0x0e, 0xa4, 0x9b, 0x03, 0x01, 0xb7, 0x9a, 0x0e, 0xa4,
+ 0xcb, 0x03, 0x01, 0xbd, 0x90, 0x0e, 0xa4, 0x7b, 0x03, 0x01, 0xc1, 0x8f,
+ 0x0e, 0xa4, 0x73, 0x03, 0x01, 0xc5, 0x8e, 0x0e, 0xa4, 0x6b, 0x03, 0x01,
+ 0xc9, 0x8d, 0x0e, 0xa4, 0x63, 0x03, 0x01, 0xcf, 0x8b, 0x0e, 0xa4, 0x53,
+ 0x03, 0x01, 0xd5, 0x87, 0x0e, 0xa4, 0x33, 0x03, 0x01, 0xdb, 0x9c, 0x0e,
+ 0xa4, 0xdb, 0x03, 0x01, 0xe7, 0x86, 0x0e, 0xa4, 0x2b, 0x03, 0x01, 0xed,
+ 0x89, 0x0e, 0xa4, 0x43, 0x03, 0x01, 0xf3, 0x84, 0x0e, 0xa4, 0x1b, 0x03,
+ 0x01, 0xf9, 0x83, 0x0e, 0xa4, 0x13, 0x03, 0x01, 0xff, 0x9b, 0x0e, 0xa4,
+ 0xd1, 0x99, 0x0e, 0xa4, 0xc1, 0x98, 0x0e, 0xa4, 0xb9, 0x93, 0x0e, 0xa4,
+ 0x91, 0x8c, 0x0e, 0xa4, 0x59, 0x8a, 0x0e, 0xa4, 0x48, 0x91, 0x0e, 0xa3,
+ 0xb3, 0x03, 0x02, 0x05, 0x92, 0x0e, 0xa3, 0xbb, 0x03, 0x02, 0x09, 0x85,
+ 0x0e, 0xa3, 0x53, 0x03, 0x02, 0x19, 0x97, 0x0e, 0xa3, 0xe3, 0x03, 0x02,
+ 0x1f, 0x96, 0x0e, 0xa3, 0xdb, 0x03, 0x02, 0x25, 0x95, 0x0e, 0xa3, 0xd3,
+ 0x03, 0x02, 0x34, 0x94, 0x0e, 0xa3, 0xcb, 0x03, 0x02, 0x3a, 0x9a, 0x0e,
+ 0xa3, 0xfb, 0x03, 0x02, 0x40, 0x90, 0x0e, 0xa3, 0xab, 0x03, 0x02, 0x44,
+ 0x8f, 0x0e, 0xa3, 0xa3, 0x03, 0x02, 0x48, 0x8e, 0x0e, 0xa3, 0x9b, 0x03,
+ 0x02, 0x4c, 0x8d, 0x0e, 0xa3, 0x93, 0x03, 0x02, 0x52, 0x8b, 0x0e, 0xa3,
+ 0x83, 0x03, 0x02, 0x58, 0x87, 0x0e, 0xa3, 0x63, 0x03, 0x02, 0x5e, 0x9c,
+ 0x0e, 0xa4, 0x0b, 0x03, 0x02, 0x6a, 0x86, 0x0e, 0xa3, 0x5b, 0x03, 0x02,
+ 0x70, 0x89, 0x0e, 0xa3, 0x73, 0x03, 0x02, 0x76, 0x84, 0x0e, 0xa3, 0x4b,
+ 0x03, 0x02, 0x7c, 0x83, 0x0e, 0xa3, 0x43, 0x03, 0x02, 0x82, 0x9b, 0x0e,
+ 0xa4, 0x01, 0x99, 0x0e, 0xa3, 0xf1, 0x98, 0x0e, 0xa3, 0xe9, 0x93, 0x0e,
+ 0xa3, 0xc1, 0x8c, 0x0e, 0xa3, 0x89, 0x8a, 0x0e, 0xa3, 0x79, 0x88, 0x0e,
+ 0xa3, 0x68, 0x9c, 0x0e, 0xac, 0xf9, 0x9b, 0x0e, 0xac, 0xf1, 0x9a, 0x0e,
+ 0xac, 0xe9, 0x99, 0x0e, 0xac, 0xe1, 0x98, 0x0e, 0xac, 0xd9, 0x97, 0x0e,
+ 0xac, 0xd1, 0x96, 0x0e, 0xac, 0xc9, 0x95, 0x0e, 0xac, 0xc1, 0x94, 0x0e,
+ 0xac, 0xb9, 0x93, 0x0e, 0xac, 0xb1, 0x92, 0x0e, 0xac, 0xa9, 0x91, 0x0e,
+ 0xac, 0xa1, 0x90, 0x0e, 0xac, 0x99, 0x8f, 0x0e, 0xac, 0x91, 0x8e, 0x0e,
+ 0xac, 0x89, 0x8d, 0x0e, 0xac, 0x81, 0x8c, 0x0e, 0xac, 0x79, 0x8b, 0x0e,
+ 0xac, 0x71, 0x8a, 0x0e, 0xac, 0x69, 0x89, 0x0e, 0xac, 0x61, 0x88, 0x0e,
+ 0xac, 0x59, 0x87, 0x0e, 0xac, 0x51, 0x86, 0x0e, 0xac, 0x49, 0x85, 0x0e,
+ 0xac, 0x41, 0x84, 0x0e, 0xac, 0x39, 0x83, 0x0e, 0xac, 0x30, 0x9c, 0x0e,
+ 0xac, 0x29, 0x9b, 0x0e, 0xac, 0x21, 0x9a, 0x0e, 0xac, 0x19, 0x99, 0x0e,
+ 0xac, 0x11, 0x98, 0x0e, 0xac, 0x09, 0x97, 0x0e, 0xac, 0x01, 0x96, 0x0e,
+ 0xab, 0xf9, 0x95, 0x0e, 0xab, 0xf1, 0x94, 0x0e, 0xab, 0xe9, 0x93, 0x0e,
+ 0xab, 0xe1, 0x92, 0x0e, 0xab, 0xd9, 0x91, 0x0e, 0xab, 0xd1, 0x90, 0x0e,
+ 0xab, 0xc9, 0x8f, 0x0e, 0xab, 0xc1, 0x8e, 0x0e, 0xab, 0xb9, 0x8d, 0x0e,
+ 0xab, 0xb1, 0x8c, 0x0e, 0xab, 0xa9, 0x8b, 0x0e, 0xab, 0xa1, 0x8a, 0x0e,
+ 0xab, 0x99, 0x89, 0x0e, 0xab, 0x91, 0x88, 0x0e, 0xab, 0x89, 0x87, 0x0e,
+ 0xab, 0x81, 0x86, 0x0e, 0xab, 0x79, 0x85, 0x0e, 0xab, 0x71, 0x84, 0x0e,
+ 0xab, 0x69, 0x83, 0x0e, 0xab, 0x60, 0xc4, 0x15, 0xa7, 0x0e, 0xbf, 0xe9,
+ 0xc2, 0x22, 0x45, 0x0e, 0xbf, 0xe0, 0xc3, 0x0d, 0x8f, 0x0e, 0xbf, 0xd9,
+ 0xc3, 0x08, 0xde, 0x0e, 0xbf, 0xd0, 0xc4, 0x05, 0xde, 0x0e, 0xbf, 0xc9,
+ 0xc2, 0x0a, 0x20, 0x0e, 0xbf, 0xc0, 0x46, 0x08, 0xd7, 0xc3, 0x02, 0x88,
+ 0x47, 0xca, 0x00, 0xc3, 0x02, 0xac, 0x12, 0xc3, 0x02, 0xda, 0xca, 0xa3,
+ 0xf8, 0x0e, 0xbc, 0x71, 0xcc, 0x8f, 0x28, 0x0e, 0xbc, 0x61, 0xcc, 0x8b,
+ 0xec, 0x0e, 0xbc, 0x59, 0xce, 0x12, 0x64, 0x0e, 0xbc, 0x51, 0x46, 0x00,
+ 0x3e, 0xc3, 0x02, 0xec, 0xc5, 0xdf, 0xef, 0x0e, 0xbb, 0x79, 0x48, 0x01,
+ 0xf7, 0x43, 0x03, 0x90, 0xc4, 0x24, 0x35, 0x0e, 0xbf, 0x59, 0xc5, 0x05,
+ 0x1b, 0x0e, 0xbf, 0x51, 0x15, 0xc3, 0x04, 0x31, 0x08, 0xc3, 0x04, 0x3d,
+ 0x16, 0xc3, 0x04, 0x49, 0xc3, 0x05, 0x17, 0x0e, 0xbf, 0x19, 0xc4, 0x16,
+ 0x57, 0x0e, 0xbf, 0x10, 0x46, 0x00, 0x3e, 0xc3, 0x04, 0x55, 0x48, 0x01,
+ 0xf7, 0x43, 0x04, 0xbd, 0x9c, 0x0e, 0xae, 0x99, 0x9b, 0x0e, 0xae, 0x91,
+ 0x9a, 0x0e, 0xae, 0x89, 0x99, 0x0e, 0xae, 0x81, 0x98, 0x0e, 0xae, 0x79,
+ 0x97, 0x0e, 0xae, 0x71, 0x96, 0x0e, 0xae, 0x69, 0x95, 0x0e, 0xae, 0x61,
+ 0x94, 0x0e, 0xae, 0x59, 0x93, 0x0e, 0xae, 0x51, 0x92, 0x0e, 0xae, 0x49,
+ 0x91, 0x0e, 0xae, 0x41, 0x90, 0x0e, 0xae, 0x39, 0x8f, 0x0e, 0xae, 0x31,
+ 0x8e, 0x0e, 0xae, 0x29, 0x8d, 0x0e, 0xae, 0x21, 0x8c, 0x0e, 0xae, 0x19,
+ 0x8b, 0x0e, 0xae, 0x11, 0x8a, 0x0e, 0xae, 0x09, 0x89, 0x0e, 0xae, 0x01,
+ 0x88, 0x0e, 0xad, 0xf9, 0x87, 0x0e, 0xad, 0xf1, 0x86, 0x0e, 0xad, 0xe9,
+ 0x85, 0x0e, 0xad, 0xe1, 0x84, 0x0e, 0xad, 0xd9, 0x83, 0x0e, 0xad, 0xd0,
+ 0x9c, 0x0e, 0xad, 0xc9, 0x9b, 0x0e, 0xad, 0xc1, 0x9a, 0x0e, 0xad, 0xb9,
+ 0x99, 0x0e, 0xad, 0xb1, 0x98, 0x0e, 0xad, 0xa9, 0x97, 0x0e, 0xad, 0xa1,
+ 0x96, 0x0e, 0xad, 0x99, 0x95, 0x0e, 0xad, 0x91, 0x94, 0x0e, 0xad, 0x89,
+ 0x93, 0x0e, 0xad, 0x81, 0x92, 0x0e, 0xad, 0x79, 0x91, 0x0e, 0xad, 0x71,
+ 0x90, 0x0e, 0xad, 0x69, 0x8f, 0x0e, 0xad, 0x61, 0x8e, 0x0e, 0xad, 0x59,
+ 0x8d, 0x0e, 0xad, 0x51, 0x8c, 0x0e, 0xad, 0x49, 0x8b, 0x0e, 0xad, 0x41,
+ 0x8a, 0x0e, 0xad, 0x39, 0x89, 0x0e, 0xad, 0x31, 0x88, 0x0e, 0xad, 0x29,
+ 0x87, 0x0e, 0xad, 0x21, 0x86, 0x0e, 0xad, 0x19, 0x85, 0x0e, 0xad, 0x11,
+ 0x84, 0x0e, 0xad, 0x09, 0x83, 0x0e, 0xad, 0x00, 0x9c, 0x0e, 0xa6, 0x79,
+ 0x9b, 0x0e, 0xa6, 0x71, 0x9a, 0x0e, 0xa6, 0x69, 0x99, 0x0e, 0xa6, 0x61,
+ 0x98, 0x0e, 0xa6, 0x59, 0x97, 0x0e, 0xa6, 0x51, 0x96, 0x0e, 0xa6, 0x49,
+ 0x95, 0x0e, 0xa6, 0x41, 0x94, 0x0e, 0xa6, 0x39, 0x93, 0x0e, 0xa6, 0x31,
+ 0x92, 0x0e, 0xa6, 0x29, 0x90, 0x0e, 0xa6, 0x19, 0x8f, 0x0e, 0xa6, 0x11,
+ 0x8e, 0x0e, 0xa6, 0x09, 0x8d, 0x0e, 0xa6, 0x01, 0x8c, 0x0e, 0xa5, 0xf9,
+ 0x8b, 0x0e, 0xa5, 0xf1, 0x8a, 0x0e, 0xa5, 0xe9, 0x88, 0x0e, 0xa5, 0xd9,
+ 0x86, 0x0e, 0xa5, 0xc9, 0x85, 0x0e, 0xa5, 0xc1, 0x84, 0x0e, 0xa5, 0xb9,
+ 0x83, 0x0e, 0xa5, 0xb0, 0x9c, 0x0e, 0xa5, 0xa9, 0x9b, 0x0e, 0xa5, 0xa1,
+ 0x9a, 0x0e, 0xa5, 0x99, 0x99, 0x0e, 0xa5, 0x91, 0x98, 0x0e, 0xa5, 0x89,
+ 0x97, 0x0e, 0xa5, 0x81, 0x96, 0x0e, 0xa5, 0x79, 0x95, 0x0e, 0xa5, 0x71,
+ 0x93, 0x0e, 0xa5, 0x61, 0x92, 0x0e, 0xa5, 0x59, 0x91, 0x0e, 0xa5, 0x51,
+ 0x90, 0x0e, 0xa5, 0x49, 0x8d, 0x0e, 0xa5, 0x31, 0x8c, 0x0e, 0xa5, 0x29,
+ 0x89, 0x0e, 0xa5, 0x11, 0x86, 0x0e, 0xa4, 0xf9, 0x85, 0x0e, 0xa4, 0xf1,
+ 0x83, 0x0e, 0xa4, 0xe0, 0xc4, 0x15, 0xa7, 0x0e, 0xbe, 0xf9, 0xc2, 0x22,
+ 0x45, 0x0e, 0xbe, 0xf0, 0xc3, 0x0d, 0x8f, 0x0e, 0xbe, 0xe9, 0xc3, 0x08,
+ 0xde, 0x0e, 0xbe, 0xe0, 0xc4, 0x05, 0xde, 0x0e, 0xbe, 0xd9, 0xc2, 0x0a,
+ 0x20, 0x0e, 0xbe, 0xd0, 0x9c, 0x0e, 0xa9, 0xb9, 0x9b, 0x0e, 0xa9, 0xb1,
+ 0x9a, 0x0e, 0xa9, 0xa9, 0x99, 0x0e, 0xa9, 0xa1, 0x98, 0x0e, 0xa9, 0x99,
+ 0x97, 0x0e, 0xa9, 0x91, 0x96, 0x0e, 0xa9, 0x89, 0x95, 0x0e, 0xa9, 0x81,
+ 0x94, 0x0e, 0xa9, 0x79, 0x93, 0x0e, 0xa9, 0x71, 0x92, 0x0e, 0xa9, 0x69,
+ 0x91, 0x0e, 0xa9, 0x61, 0x90, 0x0e, 0xa9, 0x59, 0x8f, 0x0e, 0xa9, 0x51,
+ 0x8e, 0x0e, 0xa9, 0x49, 0x8d, 0x0e, 0xa9, 0x41, 0x8c, 0x0e, 0xa9, 0x39,
+ 0x8b, 0x0e, 0xa9, 0x31, 0x8a, 0x0e, 0xa9, 0x29, 0x89, 0x0e, 0xa9, 0x21,
+ 0x88, 0x0e, 0xa9, 0x19, 0x87, 0x0e, 0xa9, 0x11, 0x86, 0x0e, 0xa9, 0x09,
+ 0x85, 0x0e, 0xa9, 0x01, 0x84, 0x0e, 0xa8, 0xf9, 0x83, 0x0e, 0xa8, 0xf0,
+ 0x9b, 0x0e, 0xa8, 0xe1, 0x9a, 0x0e, 0xa8, 0xd9, 0x99, 0x0e, 0xa8, 0xd1,
+ 0x98, 0x0e, 0xa8, 0xc9, 0x97, 0x0e, 0xa8, 0xc1, 0x96, 0x0e, 0xa8, 0xb9,
+ 0x95, 0x0e, 0xa8, 0xb1, 0x93, 0x0e, 0xa8, 0xa1, 0x92, 0x0e, 0xa8, 0x99,
+ 0x91, 0x0e, 0xa8, 0x91, 0x90, 0x0e, 0xa8, 0x89, 0x8f, 0x0e, 0xa8, 0x81,
+ 0x8e, 0x0e, 0xa8, 0x79, 0x8d, 0x0e, 0xa8, 0x71, 0x8c, 0x0e, 0xa8, 0x69,
+ 0x89, 0x0e, 0xa8, 0x51, 0x88, 0x0e, 0xa8, 0x49, 0x87, 0x0e, 0xa8, 0x41,
+ 0x86, 0x0e, 0xa8, 0x39, 0x84, 0x0e, 0xa8, 0x29, 0x83, 0x0e, 0xa8, 0x20,
+ 0xd6, 0x0a, 0xe8, 0x01, 0x3f, 0x69, 0xce, 0x26, 0x2e, 0x01, 0x3f, 0x38,
+ 0x97, 0x08, 0xe9, 0xf9, 0x8b, 0x08, 0xe9, 0xe1, 0x83, 0x08, 0xe9, 0x88,
+ 0x97, 0x08, 0xe9, 0xa8, 0x8b, 0x08, 0xe9, 0x98, 0xc2, 0x01, 0x0e, 0x08,
+ 0xe8, 0xb9, 0x83, 0x08, 0xe8, 0xb0, 0xc2, 0x01, 0x0e, 0x08, 0xe8, 0xc9,
+ 0x83, 0x08, 0xe8, 0xc0, 0x83, 0x08, 0xe5, 0x69, 0xc2, 0x01, 0x0e, 0x08,
+ 0xe5, 0x60, 0x83, 0x08, 0xe5, 0x39, 0xc2, 0x01, 0x0e, 0x08, 0xe5, 0x30,
+ 0xc2, 0x05, 0x5c, 0x08, 0xe5, 0x21, 0x83, 0x08, 0xe4, 0xe0, 0x15, 0xc3,
+ 0x05, 0x25, 0xc2, 0x01, 0x0e, 0x08, 0xe4, 0xd9, 0x83, 0x08, 0xe4, 0xd0,
+ 0xc2, 0x01, 0x0e, 0x08, 0xe4, 0xf9, 0x83, 0x08, 0xe4, 0xf0, 0x83, 0x08,
+ 0xe4, 0xe9, 0xc2, 0x1a, 0x36, 0x08, 0xe4, 0xc9, 0xc2, 0x07, 0x69, 0x08,
+ 0xe4, 0xa8, 0xc2, 0x01, 0x0e, 0x08, 0xe4, 0xb9, 0x83, 0x08, 0xe4, 0xb0,
+ 0xc2, 0x01, 0x0e, 0x08, 0xe4, 0x99, 0x83, 0x08, 0xe4, 0x90, 0xc2, 0x01,
+ 0x0e, 0x08, 0xe4, 0x19, 0x83, 0x08, 0xe4, 0x10, 0xc5, 0x45, 0xcf, 0x00,
+ 0x68, 0x19, 0xc4, 0x21, 0x28, 0x00, 0x6a, 0x68, 0x94, 0x00, 0x68, 0x5b,
+ 0x03, 0x05, 0x2f, 0x8e, 0x00, 0x68, 0x62, 0x03, 0x05, 0x33, 0x83, 0x00,
+ 0x69, 0x19, 0xc2, 0x01, 0x01, 0x00, 0x69, 0x48, 0x83, 0x00, 0x68, 0xd9,
+ 0x45, 0xdc, 0xbb, 0x43, 0x05, 0x37, 0x83, 0x00, 0x68, 0xf9, 0xc2, 0x01,
+ 0x0e, 0x00, 0x69, 0x01, 0xc2, 0x00, 0x44, 0x00, 0x69, 0xd0, 0x83, 0x00,
+ 0x69, 0x09, 0xc2, 0x01, 0x0e, 0x00, 0x69, 0x10, 0x83, 0x00, 0x69, 0x99,
+ 0xc2, 0x00, 0x96, 0x00, 0x69, 0xa0, 0x94, 0x00, 0x6a, 0x20, 0x8e, 0x00,
+ 0x6b, 0x18, 0xc7, 0xc4, 0x1f, 0x00, 0x6a, 0xc9, 0xc4, 0xa2, 0x95, 0x00,
+ 0x6a, 0xf0, 0xc8, 0x1e, 0x4f, 0x00, 0x6a, 0xd9, 0xc4, 0x1c, 0xe3, 0x00,
+ 0x6a, 0xe0, 0xc2, 0x0a, 0x20, 0x00, 0x6b, 0x41, 0xc4, 0x05, 0xde, 0x00,
+ 0x6b, 0x48, 0xc3, 0x08, 0xde, 0x00, 0x6b, 0x51, 0xc3, 0x0d, 0x8f, 0x00,
+ 0x6b, 0x58, 0xc2, 0x22, 0x45, 0x00, 0x6b, 0x61, 0xc4, 0x15, 0xa7, 0x00,
+ 0x6b, 0x68, 0xcb, 0x46, 0x15, 0x08, 0x57, 0x98, 0xc3, 0xe2, 0x62, 0x08,
+ 0x56, 0xe9, 0xc4, 0xdd, 0x34, 0x08, 0x56, 0xc0, 0x96, 0x00, 0x42, 0x40,
+ 0x8a, 0x00, 0x42, 0xa1, 0x9c, 0x00, 0x42, 0x88, 0xc2, 0x0e, 0xe5, 0x00,
+ 0x42, 0x48, 0xc2, 0x00, 0x9a, 0x08, 0x8b, 0x91, 0x83, 0x08, 0x8b, 0x68,
+ 0xc2, 0x01, 0x0e, 0x08, 0x8b, 0x59, 0x83, 0x08, 0x8b, 0x50, 0xc2, 0x01,
+ 0x0e, 0x08, 0x8b, 0x49, 0x83, 0x08, 0x8b, 0x40, 0x83, 0x08, 0x8b, 0x39,
+ 0xc2, 0x01, 0x01, 0x08, 0x8b, 0x11, 0xc2, 0x1a, 0x36, 0x08, 0x8a, 0xe8,
+ 0xc2, 0x01, 0x0e, 0x08, 0x8b, 0x31, 0x83, 0x08, 0x8b, 0x29, 0x06, 0x43,
+ 0x05, 0x57, 0xc2, 0x01, 0x0e, 0x08, 0x8b, 0x21, 0x83, 0x08, 0x8b, 0x19,
+ 0x16, 0x43, 0x05, 0x61, 0xc2, 0x01, 0x0e, 0x08, 0x8a, 0xe1, 0x83, 0x08,
+ 0x8a, 0xd8, 0xc2, 0x01, 0x0e, 0x08, 0x8a, 0xd1, 0x83, 0x08, 0x8a, 0xc8,
+ 0xc2, 0x01, 0x0e, 0x08, 0x8a, 0xc1, 0x83, 0x08, 0x8a, 0xb8, 0xc2, 0x01,
+ 0x0e, 0x08, 0x8a, 0xb1, 0x83, 0x08, 0x8a, 0xa8, 0xc9, 0xad, 0x66, 0x0f,
+ 0x80, 0x71, 0xc6, 0x38, 0x8e, 0x0f, 0x81, 0x00, 0xc9, 0xad, 0x66, 0x0f,
+ 0x80, 0x61, 0xc6, 0x38, 0x8e, 0x0f, 0x80, 0xf0, 0xc9, 0xad, 0x66, 0x0f,
+ 0x80, 0x69, 0xc6, 0x38, 0x8e, 0x0f, 0x80, 0xf8, 0xc9, 0xad, 0x66, 0x0f,
+ 0x80, 0x79, 0xc6, 0x38, 0x8e, 0x0f, 0x81, 0x08, 0xc9, 0xad, 0x66, 0x0f,
+ 0x80, 0x39, 0xc6, 0x38, 0x8e, 0x0f, 0x80, 0xc8, 0xc9, 0xad, 0x66, 0x0f,
+ 0x80, 0x41, 0xc6, 0x38, 0x8e, 0x0f, 0x80, 0xd0, 0xc9, 0xad, 0x66, 0x0f,
+ 0x80, 0x49, 0xc6, 0x38, 0x8e, 0x0f, 0x80, 0xd8, 0xc9, 0xad, 0x66, 0x0f,
+ 0x80, 0x51, 0xc6, 0x38, 0x8e, 0x0f, 0x80, 0xe0, 0xc9, 0xad, 0x66, 0x0f,
+ 0x80, 0x59, 0xc6, 0x38, 0x8e, 0x0f, 0x80, 0xe8, 0x0d, 0xc3, 0x05, 0x6b,
+ 0x15, 0xc3, 0x05, 0x77, 0x12, 0xc3, 0x05, 0x9e, 0x16, 0xc3, 0x05, 0xb8,
+ 0x05, 0xc3, 0x05, 0xe1, 0x18, 0xc3, 0x06, 0x05, 0x09, 0xc3, 0x06, 0x11,
+ 0x0f, 0xc3, 0x06, 0x24, 0x04, 0xc3, 0x06, 0x45, 0x0e, 0xc3, 0x06, 0x4f,
+ 0x08, 0xc3, 0x06, 0x5e, 0x06, 0xc3, 0x06, 0x84, 0x19, 0xc3, 0x06, 0x98,
+ 0x42, 0x01, 0x0e, 0xc3, 0x06, 0xa4, 0x07, 0xc3, 0x06, 0xb0, 0x10, 0xc3,
+ 0x06, 0xbc, 0x11, 0xc3, 0x06, 0xd4, 0xcd, 0x7e, 0xc2, 0x0e, 0x8c, 0xc1,
+ 0x9c, 0x0e, 0x8c, 0x71, 0x14, 0xc3, 0x06, 0xe4, 0x4b, 0x9c, 0x5a, 0xc3,
+ 0x06, 0xec, 0x42, 0x00, 0x93, 0xc3, 0x06, 0xf8, 0xca, 0x5d, 0x55, 0x0e,
+ 0x8a, 0x18, 0x00, 0x43, 0x07, 0x04, 0xc2, 0x01, 0x0e, 0x08, 0x94, 0xa1,
+ 0xc2, 0x0c, 0x25, 0x08, 0x94, 0x99, 0x83, 0x08, 0x94, 0x90, 0x83, 0x08,
+ 0x94, 0x81, 0xc2, 0x01, 0x0e, 0x08, 0x94, 0x88, 0xc3, 0x82, 0xa4, 0x00,
+ 0x84, 0x39, 0xc3, 0x82, 0xec, 0x00, 0x84, 0x41, 0xc3, 0x3b, 0x0b, 0x00,
+ 0x84, 0x49, 0xc3, 0x82, 0xb0, 0x00, 0x84, 0x50, 0x43, 0x0e, 0x70, 0xc3,
+ 0x07, 0x10, 0xcd, 0x7a, 0x16, 0x00, 0x84, 0x68, 0xc2, 0x00, 0x9a, 0x05,
+ 0x53, 0x99, 0xc2, 0x05, 0x5c, 0x05, 0x53, 0x91, 0xc2, 0x23, 0xe3, 0x05,
+ 0x53, 0x89, 0xc2, 0x01, 0xa7, 0x05, 0x53, 0x79, 0xc3, 0x13, 0x58, 0x05,
+ 0x53, 0x71, 0x0a, 0xc3, 0x07, 0x1c, 0xc2, 0x0e, 0xe5, 0x05, 0x53, 0x61,
+ 0x10, 0xc3, 0x07, 0x26, 0x06, 0xc3, 0x07, 0x30, 0x42, 0x02, 0x01, 0xc3,
+ 0x07, 0x3a, 0x0c, 0xc3, 0x07, 0x44, 0x05, 0xc3, 0x07, 0x4e, 0xc2, 0x07,
+ 0x69, 0x05, 0x53, 0x00, 0x04, 0xc3, 0x07, 0x58, 0x06, 0xc3, 0x07, 0x62,
+ 0xc3, 0x13, 0x58, 0x05, 0x4f, 0xd1, 0x10, 0xc3, 0x07, 0x70, 0x0c, 0xc3,
+ 0x07, 0x7c, 0x09, 0xc3, 0x07, 0x86, 0xc2, 0x01, 0xa7, 0x05, 0x4f, 0x40,
+ 0x42, 0x00, 0xf8, 0x43, 0x07, 0x90, 0xc3, 0x1c, 0x4f, 0x05, 0x53, 0xf1,
+ 0xc3, 0x01, 0x0d, 0x05, 0x53, 0xf8, 0x83, 0x00, 0x82, 0xe1, 0x87, 0x00,
+ 0x82, 0xe8, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0xb1, 0xdb, 0x15, 0x9a, 0x0f,
+ 0xd2, 0x00, 0x49, 0x29, 0x20, 0x43, 0x07, 0xd2, 0x49, 0x29, 0x20, 0x43,
+ 0x07, 0xde, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0xc1, 0xdb, 0x15, 0x9a, 0x0f,
+ 0xd2, 0x10, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0xb9, 0xdb, 0x15, 0x9a, 0x0f,
+ 0xd2, 0x08, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0xd1, 0xdb, 0x15, 0x9a, 0x0f,
+ 0xd2, 0x20, 0xc3, 0x01, 0x5e, 0x0f, 0xd1, 0x51, 0xc5, 0x8f, 0xc9, 0x0f,
+ 0xd1, 0x70, 0xcb, 0x9c, 0xb2, 0x08, 0xa3, 0x09, 0xcb, 0x93, 0xc2, 0x08,
+ 0xa3, 0x01, 0xce, 0x75, 0xc6, 0x08, 0xa2, 0x41, 0x03, 0xc3, 0x07, 0xf4,
+ 0xc5, 0x33, 0x1a, 0x08, 0xa2, 0x31, 0x42, 0x02, 0x52, 0xc3, 0x08, 0x00,
+ 0xcb, 0x21, 0x1a, 0x08, 0xa2, 0x18, 0x8e, 0x08, 0xa0, 0x43, 0x03, 0x08,
+ 0x0c, 0x94, 0x08, 0xa0, 0x32, 0x03, 0x08, 0x10, 0xc2, 0x01, 0x0e, 0x08,
+ 0xa0, 0xc9, 0x83, 0x08, 0xa0, 0xc0, 0xc2, 0x01, 0x0e, 0x08, 0xa0, 0x99,
+ 0x83, 0x08, 0xa0, 0x90, 0xc2, 0x01, 0x0e, 0x08, 0xa0, 0xe9, 0x83, 0x08,
+ 0xa0, 0xe0, 0xc2, 0x01, 0x0e, 0x08, 0xa0, 0xd9, 0x83, 0x08, 0xa0, 0xd0,
+ 0xc4, 0x15, 0xa7, 0x08, 0xa2, 0xb9, 0xc2, 0x22, 0x45, 0x08, 0xa2, 0xb0,
+ 0xc3, 0x0d, 0x8f, 0x08, 0xa2, 0xa9, 0xc3, 0x08, 0xde, 0x08, 0xa2, 0xa0,
+ 0xc4, 0x05, 0xde, 0x08, 0xa2, 0x99, 0xc2, 0x0a, 0x20, 0x08, 0xa2, 0x90,
+ 0x8e, 0x08, 0xa1, 0xe8, 0x94, 0x08, 0xa1, 0xd8, 0xc3, 0xec, 0x6c, 0x00,
+ 0xce, 0x49, 0xc3, 0xd8, 0xbc, 0x00, 0xce, 0x40, 0xc4, 0x15, 0xa7, 0x00,
+ 0xce, 0xb9, 0xc2, 0x22, 0x45, 0x00, 0xce, 0xb0, 0xc3, 0x0d, 0x8f, 0x00,
+ 0xce, 0xa9, 0xc3, 0x08, 0xde, 0x00, 0xce, 0xa0, 0xc4, 0x05, 0xde, 0x00,
+ 0xce, 0x99, 0xc2, 0x0a, 0x20, 0x00, 0xce, 0x90, 0x84, 0x00, 0xce, 0x39,
+ 0x86, 0x00, 0xce, 0x31, 0x8d, 0x00, 0xce, 0x29, 0x8f, 0x00, 0xce, 0x21,
+ 0x90, 0x00, 0xce, 0x1b, 0x03, 0x08, 0x14, 0x98, 0x00, 0xce, 0x08, 0x15,
+ 0xc3, 0x08, 0x18, 0x1a, 0xc3, 0x08, 0x22, 0x0d, 0xc3, 0x08, 0x2c, 0xc2,
+ 0x02, 0x1d, 0x00, 0xcd, 0x29, 0xc2, 0x0c, 0x25, 0x00, 0xcd, 0x21, 0xc2,
+ 0x01, 0x0e, 0x00, 0xcd, 0x19, 0xc2, 0x00, 0x96, 0x00, 0xcc, 0xf9, 0xc2,
+ 0x06, 0x8c, 0x00, 0xcc, 0xf1, 0xc2, 0x01, 0xa7, 0x00, 0xcc, 0xe9, 0xc2,
+ 0x00, 0x3f, 0x00, 0xcc, 0xc9, 0x12, 0xc3, 0x08, 0x36, 0x10, 0xc3, 0x08,
+ 0x40, 0x16, 0xc3, 0x08, 0x4a, 0xc2, 0x1a, 0x36, 0x00, 0xcc, 0x69, 0xc2,
+ 0x0e, 0x13, 0x00, 0xcc, 0x08, 0x15, 0xc3, 0x08, 0x5a, 0x1a, 0xc3, 0x08,
+ 0x64, 0x0d, 0xc3, 0x08, 0x6e, 0xc2, 0x02, 0x1d, 0x00, 0xcd, 0x11, 0xc2,
+ 0x0c, 0x25, 0x00, 0xcd, 0x09, 0xc2, 0x01, 0x0e, 0x00, 0xcd, 0x01, 0xc2,
+ 0x00, 0x96, 0x00, 0xcc, 0xe1, 0xc2, 0x06, 0x8c, 0x00, 0xcc, 0xd9, 0xc2,
+ 0x01, 0xa7, 0x00, 0xcc, 0xd1, 0xc2, 0x00, 0x3f, 0x00, 0xcc, 0xb1, 0x12,
+ 0xc3, 0x08, 0x78, 0x10, 0xc3, 0x08, 0x82, 0x16, 0xc3, 0x08, 0x8c, 0xc2,
+ 0x1a, 0x36, 0x00, 0xcc, 0x51, 0xc2, 0x0e, 0x13, 0x00, 0xcc, 0x00, 0x9b,
+ 0x00, 0xce, 0x01, 0x8b, 0x00, 0xcd, 0x90, 0x87, 0x00, 0xcd, 0xcb, 0x03,
+ 0x08, 0x9c, 0x9b, 0x00, 0xcd, 0xe1, 0x97, 0x00, 0xcd, 0xa0, 0x83, 0x00,
+ 0xcd, 0xc3, 0x03, 0x08, 0xa0, 0x9b, 0x00, 0xcd, 0xe8, 0x83, 0x00, 0xcd,
+ 0x8b, 0x03, 0x08, 0xa4, 0x9b, 0x00, 0xcd, 0xd1, 0x87, 0x00, 0xcd, 0xb0,
+ 0x42, 0x00, 0xc9, 0xc3, 0x08, 0xa8, 0xc7, 0x55, 0xba, 0x01, 0x27, 0x68,
+ 0xc7, 0x1f, 0xd9, 0x01, 0x27, 0x91, 0xc5, 0x66, 0x81, 0x01, 0x27, 0x58,
+ 0xc8, 0x4c, 0x2c, 0x01, 0x27, 0x89, 0xc6, 0x42, 0xe9, 0x01, 0x27, 0x80,
+ 0xc6, 0x12, 0xf0, 0x01, 0x27, 0x79, 0xc7, 0x52, 0xbd, 0x01, 0x27, 0x70,
+ 0x94, 0x08, 0xcd, 0x38, 0xc2, 0x01, 0x0e, 0x08, 0xcd, 0xd9, 0x83, 0x08,
+ 0xcd, 0xd0, 0xc2, 0x01, 0x0e, 0x08, 0xcd, 0xc9, 0x83, 0x08, 0xcd, 0xc0,
+ 0xc4, 0x15, 0xa9, 0x08, 0x45, 0x71, 0x91, 0x08, 0x45, 0x40, 0xc3, 0xe2,
+ 0x62, 0x08, 0x44, 0xc9, 0xc4, 0xdd, 0x34, 0x08, 0x44, 0xb0, 0xc3, 0xeb,
+ 0xeb, 0x0f, 0xb3, 0x11, 0xc9, 0xac, 0x58, 0x0f, 0xb2, 0xd1, 0xc4, 0x4a,
+ 0x1e, 0x0f, 0xb2, 0x90, 0xc4, 0x00, 0x63, 0x01, 0x0c, 0xbb, 0x03, 0x08,
+ 0xb4, 0xd3, 0x3c, 0x4f, 0x01, 0x49, 0x10, 0xc7, 0x12, 0x4e, 0x01, 0x5b,
+ 0xb8, 0xc4, 0x00, 0x63, 0x01, 0x0c, 0xb3, 0x03, 0x08, 0xb8, 0xd3, 0x3d,
+ 0x03, 0x01, 0x49, 0x08, 0xc3, 0xeb, 0xeb, 0x0f, 0xb3, 0x01, 0xc9, 0xac,
+ 0x58, 0x0f, 0xb2, 0xc1, 0xc4, 0x4a, 0x1e, 0x0f, 0xb2, 0x80, 0xc7, 0x12,
+ 0x4e, 0x01, 0x5b, 0xb0, 0x44, 0x05, 0x17, 0xc3, 0x08, 0xbc, 0x46, 0x05,
+ 0xdd, 0x43, 0x08, 0xd4, 0xc9, 0xb7, 0x1a, 0x05, 0x41, 0xb1, 0xca, 0xa8,
+ 0x26, 0x05, 0x41, 0xc8, 0x86, 0x0f, 0xae, 0x39, 0xc2, 0x02, 0xfb, 0x0f,
+ 0xae, 0x30, 0xcd, 0x7c, 0x5f, 0x0f, 0x98, 0x79, 0xc7, 0xc8, 0xf6, 0x0f,
+ 0x98, 0x70, 0x00, 0x43, 0x08, 0xe0, 0x00, 0x43, 0x08, 0xf5, 0x45, 0x00,
+ 0x39, 0x43, 0x09, 0x0a, 0x83, 0x09, 0x8f, 0x50, 0x84, 0x09, 0x8f, 0x11,
+ 0x83, 0x09, 0x8f, 0x08, 0x83, 0x09, 0x8e, 0xf0, 0x83, 0x09, 0x8e, 0xd0,
+ 0x83, 0x09, 0x8e, 0xa8, 0x83, 0x09, 0x8e, 0x90, 0x83, 0x09, 0x8e, 0x60,
+ 0x83, 0x09, 0x8e, 0x50, 0x83, 0x09, 0x8e, 0x40, 0x8a, 0x09, 0x8e, 0x21,
+ 0x89, 0x09, 0x8e, 0x19, 0x88, 0x09, 0x8e, 0x11, 0x87, 0x09, 0x8e, 0x09,
+ 0x86, 0x09, 0x8e, 0x01, 0x85, 0x09, 0x8d, 0xf9, 0x84, 0x09, 0x8d, 0xf1,
+ 0x83, 0x09, 0x8d, 0xe8, 0x83, 0x09, 0x8d, 0xd0, 0x83, 0x09, 0x8d, 0x90,
+ 0x84, 0x09, 0x8d, 0x79, 0x83, 0x09, 0x8d, 0x70, 0x83, 0x09, 0x9e, 0x68,
+ 0x83, 0x09, 0x9e, 0x30, 0x83, 0x09, 0x9e, 0x20, 0x83, 0x09, 0x9e, 0x00,
+ 0x83, 0x09, 0x9d, 0xd8, 0x83, 0x09, 0x9d, 0xc8, 0x83, 0x09, 0x9d, 0x90,
+ 0x83, 0x09, 0x99, 0x78, 0x83, 0x09, 0x99, 0x68, 0x83, 0x09, 0x98, 0xe0,
+ 0x83, 0x09, 0x98, 0xb0, 0x83, 0x09, 0x98, 0x98, 0x83, 0x09, 0x98, 0x88,
+ 0x83, 0x09, 0x98, 0x78, 0x83, 0x09, 0x98, 0x50, 0x85, 0x09, 0x89, 0xe1,
+ 0x84, 0x09, 0x89, 0xd9, 0x83, 0x09, 0x89, 0xd0, 0x83, 0x09, 0x89, 0xa8,
+ 0x83, 0x09, 0x89, 0x98, 0x83, 0x09, 0x89, 0x88, 0x83, 0x09, 0x89, 0x48,
+ 0x83, 0x09, 0x89, 0x38, 0x83, 0x09, 0x89, 0x00, 0x83, 0x09, 0x88, 0xa8,
+ 0x83, 0x09, 0x88, 0x60, 0x83, 0x09, 0x87, 0xf8, 0x83, 0x09, 0x87, 0xd0,
+ 0x83, 0x09, 0x87, 0x98, 0x83, 0x09, 0x87, 0x50, 0x83, 0x09, 0x87, 0x30,
+ 0x83, 0x09, 0x87, 0x20, 0x83, 0x09, 0x86, 0xe0, 0x83, 0x09, 0x86, 0xd0,
+ 0xc3, 0x03, 0x02, 0x09, 0xa1, 0xa9, 0xc5, 0xdd, 0xa1, 0x09, 0xa1, 0x98,
+ 0xc3, 0x03, 0x02, 0x09, 0xa1, 0xa1, 0xc5, 0xdd, 0xa1, 0x09, 0xa1, 0x90,
+ 0x83, 0x09, 0x8c, 0xb0, 0x83, 0x09, 0x8c, 0xa0, 0x83, 0x09, 0x8c, 0x90,
+ 0x83, 0x09, 0x8c, 0x68, 0x84, 0x09, 0x94, 0xc9, 0x83, 0x09, 0x94, 0xc0,
+ 0x86, 0x09, 0x94, 0x49, 0x85, 0x09, 0x94, 0x41, 0x84, 0x09, 0x94, 0x39,
+ 0x83, 0x09, 0x94, 0x30, 0x83, 0x09, 0x94, 0x10, 0x83, 0x09, 0x93, 0xf0,
+ 0x83, 0x09, 0x93, 0xe0, 0x83, 0x09, 0x93, 0xb8, 0x83, 0x09, 0x93, 0xa8,
+ 0x83, 0x09, 0x93, 0x80, 0x83, 0x09, 0x93, 0x70, 0x85, 0x09, 0x93, 0x21,
+ 0x84, 0x09, 0x93, 0x19, 0x83, 0x09, 0x93, 0x10, 0x88, 0x09, 0x92, 0xe9,
+ 0x87, 0x09, 0x92, 0xe1, 0x86, 0x09, 0x92, 0xd9, 0x85, 0x09, 0x92, 0xd1,
+ 0x84, 0x09, 0x92, 0xc9, 0x83, 0x09, 0x92, 0xc0, 0x83, 0x09, 0x92, 0xb0,
+ 0x83, 0x09, 0x92, 0x88, 0x86, 0x09, 0xa1, 0x79, 0x85, 0x09, 0x9b, 0xd9,
+ 0x84, 0x09, 0x9b, 0xd1, 0x83, 0x09, 0x9b, 0xc8, 0x84, 0x09, 0x9b, 0xf1,
+ 0x83, 0x09, 0x9b, 0xe8, 0x83, 0x09, 0x9d, 0x50, 0x83, 0x09, 0x9d, 0x28,
+ 0x83, 0x09, 0x9d, 0x10, 0x83, 0x09, 0x9d, 0x00, 0x83, 0x09, 0x9c, 0xf0,
+ 0x83, 0x09, 0x9c, 0xe0, 0x83, 0x09, 0x9c, 0xb0, 0x8e, 0x09, 0x9c, 0x91,
+ 0x8d, 0x09, 0x9c, 0x89, 0x8c, 0x09, 0x9c, 0x81, 0x8b, 0x09, 0x9c, 0x79,
+ 0x8a, 0x09, 0x9c, 0x71, 0x89, 0x09, 0x9c, 0x69, 0x88, 0x09, 0x9c, 0x61,
+ 0x87, 0x09, 0x9c, 0x59, 0x86, 0x09, 0x9c, 0x51, 0x85, 0x09, 0x9c, 0x49,
+ 0x84, 0x09, 0x9c, 0x41, 0x83, 0x09, 0x9c, 0x38, 0x84, 0x09, 0x9b, 0xa1,
+ 0x83, 0x09, 0x9b, 0x98, 0x83, 0x09, 0x9b, 0x68, 0x8b, 0x09, 0x9b, 0x59,
+ 0x8a, 0x09, 0x9b, 0x51, 0x89, 0x09, 0x9b, 0x49, 0x88, 0x09, 0x9b, 0x41,
+ 0x87, 0x09, 0x9b, 0x39, 0x86, 0x09, 0x9b, 0x31, 0x85, 0x09, 0x9b, 0x29,
+ 0x84, 0x09, 0x9b, 0x21, 0x83, 0x09, 0x9b, 0x18, 0x84, 0x09, 0xa0, 0xa9,
+ 0x83, 0x09, 0xa0, 0xa0, 0x83, 0x09, 0x81, 0xb0, 0x83, 0x09, 0x81, 0x98,
+ 0x83, 0x09, 0x81, 0x88, 0x83, 0x09, 0x81, 0x70, 0x83, 0x09, 0x81, 0x28,
+ 0x83, 0x09, 0x80, 0xa8, 0x83, 0x09, 0x80, 0x88, 0x84, 0x09, 0x80, 0x41,
+ 0x83, 0x09, 0x80, 0x38, 0x83, 0x09, 0x80, 0x28, 0x83, 0x09, 0x92, 0x78,
+ 0x83, 0x09, 0x92, 0x50, 0x83, 0x09, 0x92, 0x10, 0x83, 0x09, 0x92, 0x00,
+ 0x83, 0x09, 0x91, 0x90, 0x83, 0x09, 0x91, 0x28, 0x83, 0x09, 0x90, 0xd0,
+ 0x83, 0x09, 0x90, 0xb8, 0x83, 0x09, 0x90, 0xa8, 0x83, 0x09, 0x90, 0x98,
+ 0x83, 0x09, 0x90, 0x50, 0x84, 0x09, 0x90, 0x11, 0x83, 0x09, 0x90, 0x08,
+ 0x42, 0x02, 0x6b, 0xc3, 0x09, 0x16, 0x42, 0xbc, 0x5c, 0xc3, 0x09, 0x20,
+ 0x42, 0xa8, 0x89, 0xc3, 0x09, 0x2a, 0x42, 0x00, 0xbc, 0xc3, 0x09, 0x35,
+ 0x42, 0x26, 0x9b, 0xc3, 0x09, 0x40, 0x42, 0xed, 0xbd, 0xc3, 0x09, 0x4a,
+ 0x42, 0x4b, 0x65, 0xc3, 0x09, 0x55, 0xc4, 0xe7, 0x63, 0x0f, 0x3f, 0x40,
+ 0x83, 0x00, 0x95, 0x18, 0x87, 0x00, 0x95, 0x20, 0x83, 0x01, 0x6c, 0x50,
+ 0x83, 0x00, 0x98, 0x98, 0x87, 0x00, 0x98, 0xa0, 0x83, 0x00, 0x98, 0xd8,
+ 0x87, 0x00, 0x98, 0xe0, 0x83, 0x01, 0x6c, 0x9b, 0x03, 0x09, 0x5f, 0x8b,
+ 0x01, 0x6c, 0xa1, 0x87, 0x01, 0x6c, 0xb2, 0x03, 0x09, 0x63, 0x83, 0x01,
+ 0x6e, 0xd8, 0x87, 0x01, 0x6e, 0xe0, 0x87, 0x0f, 0x3f, 0x5b, 0x03, 0x09,
+ 0x67, 0x8b, 0x0f, 0x3f, 0x49, 0x83, 0x00, 0x90, 0xb8, 0x91, 0x0f, 0x3f,
+ 0x31, 0x87, 0x0f, 0x3f, 0x2b, 0x03, 0x09, 0x6b, 0x83, 0x0f, 0x3f, 0x03,
+ 0x03, 0x09, 0x6f, 0x8b, 0x0f, 0x3f, 0x11, 0x97, 0x0f, 0x3f, 0x08, 0x83,
+ 0x00, 0x90, 0x98, 0x87, 0x00, 0x90, 0xa0, 0x87, 0x05, 0x59, 0x60, 0x83,
+ 0x05, 0x59, 0x58, 0x87, 0x00, 0x9c, 0x30, 0x0a, 0xc3, 0x09, 0x73, 0x83,
+ 0x01, 0x6d, 0xc3, 0x03, 0x09, 0x8d, 0x97, 0x01, 0x6d, 0xc9, 0x8b, 0x01,
+ 0x6d, 0xd1, 0x87, 0x01, 0x6d, 0xeb, 0x03, 0x09, 0x91, 0x91, 0x01, 0x6d,
+ 0xf0, 0x83, 0x01, 0x6d, 0x58, 0x87, 0x01, 0x6d, 0x60, 0x83, 0x00, 0x99,
+ 0x58, 0x87, 0x00, 0x99, 0x60, 0x83, 0x01, 0x6c, 0x80, 0x87, 0x05, 0x58,
+ 0xa0, 0x91, 0x05, 0x58, 0x71, 0x87, 0x05, 0x58, 0x6b, 0x03, 0x09, 0x95,
+ 0xc2, 0x0e, 0x30, 0x05, 0x58, 0x59, 0x8b, 0x05, 0x58, 0x51, 0x97, 0x05,
+ 0x58, 0x48, 0x83, 0x00, 0x97, 0xd8, 0x87, 0x00, 0x97, 0xe0, 0x83, 0x01,
+ 0x6c, 0x68, 0x87, 0x05, 0x58, 0x20, 0x83, 0x00, 0x99, 0x18, 0x87, 0x00,
+ 0x99, 0x20, 0x83, 0x01, 0x6c, 0x78, 0x83, 0x00, 0x99, 0xd8, 0x87, 0x00,
+ 0x99, 0xe0, 0x83, 0x00, 0x9a, 0x18, 0x87, 0x00, 0x9a, 0x20, 0x83, 0x00,
+ 0x9a, 0x38, 0x87, 0x00, 0x9c, 0x10, 0x83, 0x00, 0x91, 0x18, 0x87, 0x00,
+ 0x91, 0x20, 0xc3, 0x89, 0xd8, 0x00, 0x9c, 0x01, 0xc3, 0xcb, 0x28, 0x00,
+ 0x9c, 0x21, 0xc3, 0xe6, 0xff, 0x00, 0x9c, 0x41, 0xc3, 0x02, 0x6b, 0x00,
+ 0x9c, 0x60, 0x83, 0x00, 0x91, 0xd8, 0x87, 0x00, 0x91, 0xe0, 0x83, 0x01,
+ 0x6c, 0x20, 0x83, 0x01, 0x6d, 0x18, 0x87, 0x01, 0x6d, 0x20, 0x83, 0x00,
+ 0x92, 0x58, 0x87, 0x00, 0x92, 0x60, 0x83, 0x00, 0x92, 0x98, 0x87, 0x00,
+ 0x92, 0xa0, 0x83, 0x00, 0x92, 0xc3, 0x03, 0x09, 0x99, 0x8b, 0x00, 0x92,
+ 0xd1, 0x87, 0x00, 0x92, 0xea, 0x03, 0x09, 0x9d, 0x83, 0x01, 0x6e, 0x18,
+ 0x87, 0x01, 0x6e, 0x20, 0x83, 0x00, 0x94, 0x58, 0x87, 0x00, 0x94, 0x60,
+ 0x83, 0x01, 0x6e, 0x98, 0x87, 0x01, 0x6e, 0xa0, 0x83, 0x00, 0x94, 0xd8,
+ 0x87, 0x00, 0x94, 0xe0, 0x83, 0x01, 0x6c, 0x48, 0x83, 0x00, 0x95, 0x98,
+ 0x87, 0x00, 0x95, 0xa0, 0x83, 0x00, 0x95, 0xd8, 0x87, 0x00, 0x95, 0xe0,
+ 0x83, 0x00, 0x96, 0x03, 0x03, 0x09, 0xa1, 0x8b, 0x00, 0x96, 0x11, 0x87,
+ 0x00, 0x96, 0x2a, 0x03, 0x09, 0xa5, 0x83, 0x01, 0x6e, 0x58, 0x87, 0x01,
+ 0x6e, 0x60, 0x48, 0x16, 0x09, 0xc3, 0x09, 0xa9, 0x83, 0x00, 0x99, 0x98,
+ 0x87, 0x00, 0x99, 0xa0, 0x83, 0x01, 0x6c, 0x88, 0x87, 0x00, 0x9c, 0x70,
+ 0x83, 0x00, 0x97, 0x18, 0x87, 0x00, 0x97, 0x20, 0x83, 0x01, 0x6d, 0x98,
+ 0x87, 0x01, 0x6d, 0xa0, 0x87, 0x00, 0x9c, 0x50, 0xe0, 0x03, 0x67, 0x01,
+ 0x17, 0x98, 0xd3, 0x35, 0x9d, 0x01, 0x4f, 0x1b, 0x03, 0x09, 0xc3, 0x45,
+ 0x03, 0x51, 0x43, 0x09, 0xc9, 0x16, 0xc3, 0x09, 0xe1, 0xc9, 0x0d, 0xd7,
+ 0x01, 0x53, 0x31, 0xcb, 0x9a, 0x13, 0x01, 0x55, 0x71, 0xce, 0x6e, 0x2c,
+ 0x01, 0x5f, 0xc8, 0x94, 0x00, 0x57, 0x00, 0x8e, 0x00, 0x57, 0x08, 0x94,
+ 0x00, 0x56, 0x20, 0x8e, 0x00, 0x57, 0x18, 0xc3, 0xec, 0x24, 0x0e, 0x91,
+ 0x03, 0x03, 0x09, 0xe7, 0xc3, 0xec, 0x21, 0x0e, 0x90, 0xfb, 0x03, 0x09,
+ 0xfb, 0x43, 0xec, 0x1e, 0xc3, 0x0a, 0x17, 0xc3, 0xec, 0x33, 0x0e, 0x90,
+ 0xf3, 0x03, 0x0a, 0x2f, 0xc3, 0xec, 0x3f, 0x0e, 0x90, 0xeb, 0x03, 0x0a,
+ 0x47, 0xc3, 0xec, 0x2d, 0x0e, 0x91, 0x11, 0xc3, 0xec, 0x2a, 0x0e, 0x91,
+ 0x08, 0xc3, 0xec, 0x24, 0x0e, 0x90, 0x23, 0x03, 0x0a, 0x63, 0xc3, 0xec,
+ 0x33, 0x0e, 0x90, 0x0b, 0x03, 0x0a, 0x77, 0xc3, 0xec, 0x3f, 0x0e, 0x90,
+ 0x03, 0x03, 0x0a, 0x8b, 0xc3, 0xec, 0x30, 0x0e, 0x90, 0x41, 0xc3, 0xec,
+ 0x2d, 0x0e, 0x90, 0x39, 0xc3, 0xec, 0x2a, 0x0e, 0x90, 0x31, 0xc3, 0xec,
+ 0x27, 0x0e, 0x90, 0x29, 0xc3, 0xec, 0x21, 0x0e, 0x90, 0x19, 0xc3, 0xec,
+ 0x1e, 0x0e, 0x90, 0x10, 0x45, 0x00, 0x39, 0x43, 0x0a, 0xb3, 0x44, 0x00,
+ 0x3a, 0xc3, 0x0a, 0xc5, 0xc5, 0x68, 0x98, 0x00, 0x1c, 0x28, 0xc9, 0xac,
+ 0xd6, 0x08, 0x0b, 0xab, 0x03, 0x0a, 0xd7, 0xcc, 0x89, 0xa0, 0x08, 0x0c,
+ 0x58, 0x46, 0x02, 0x00, 0xc3, 0x0a, 0xdd, 0xd2, 0x19, 0x88, 0x00, 0x1f,
+ 0xc8, 0xd3, 0x19, 0x87, 0x00, 0x1f, 0xe9, 0xda, 0x19, 0x80, 0x00, 0x1f,
+ 0xf8, 0x47, 0x01, 0xff, 0xc3, 0x0b, 0x5a, 0x49, 0x11, 0xad, 0xc3, 0x0b,
+ 0xd3, 0xda, 0x19, 0x80, 0x00, 0x1b, 0xe0, 0xc3, 0x11, 0x3f, 0x00, 0xeb,
+ 0x51, 0xc3, 0x1b, 0x75, 0x00, 0xeb, 0x49, 0xc3, 0x7d, 0x39, 0x00, 0xeb,
+ 0x41, 0xc5, 0x4f, 0xcc, 0x00, 0xeb, 0x39, 0xc4, 0x99, 0xfd, 0x00, 0xeb,
+ 0x30, 0x45, 0x00, 0x39, 0x43, 0x0b, 0xdf, 0xc8, 0x9d, 0xb0, 0x00, 0x1e,
+ 0xb9, 0xca, 0x8f, 0x2a, 0x00, 0x1f, 0x80, 0x15, 0xc3, 0x0b, 0xf1, 0xcd,
+ 0x7a, 0xe6, 0x00, 0x1e, 0xc1, 0xc3, 0xc9, 0xab, 0x00, 0x1f, 0x99, 0xc7,
+ 0x4f, 0xca, 0x00, 0x1e, 0xe1, 0xc5, 0x7a, 0xee, 0x00, 0x1e, 0xf0, 0xcc,
+ 0x19, 0x8e, 0x00, 0x1f, 0x91, 0xce, 0x12, 0x64, 0x00, 0x1f, 0xa8, 0xca,
+ 0x8b, 0xee, 0x00, 0x1e, 0x89, 0x44, 0x05, 0xdf, 0x43, 0x0c, 0x03, 0xcb,
+ 0x91, 0x0d, 0x08, 0x0b, 0xb9, 0xca, 0x74, 0x18, 0x08, 0x0b, 0xe8, 0x46,
+ 0xd8, 0x92, 0xc3, 0x0c, 0x0f, 0x43, 0x14, 0x00, 0xc3, 0x0c, 0x21, 0x16,
+ 0xc3, 0x0c, 0x2d, 0x4b, 0x96, 0x09, 0xc3, 0x0c, 0x39, 0x05, 0xc3, 0x0c,
+ 0x48, 0xcd, 0x7a, 0x3d, 0x08, 0x0b, 0x19, 0xd1, 0x54, 0x22, 0x08, 0x0b,
+ 0x99, 0xd3, 0x41, 0xbf, 0x08, 0x0b, 0xa1, 0xd3, 0x40, 0xb5, 0x08, 0x0b,
+ 0x80, 0xc9, 0xaf, 0x1f, 0x08, 0x0c, 0x31, 0xc9, 0xb6, 0x4b, 0x08, 0x0c,
+ 0x38, 0xc6, 0x01, 0xb1, 0x00, 0x1f, 0x89, 0xd2, 0x4e, 0x00, 0x00, 0x1f,
+ 0xe0, 0xca, 0x37, 0x20, 0x01, 0x13, 0x99, 0xc5, 0x09, 0x02, 0x01, 0x13,
+ 0x28, 0x4a, 0x35, 0x23, 0x43, 0x0c, 0x54, 0xe0, 0x09, 0xc7, 0x01, 0x54,
+ 0x60, 0x47, 0xcc, 0x68, 0xc3, 0x0c, 0x63, 0x50, 0x42, 0xc9, 0x43, 0x0c,
+ 0x6f, 0xe0, 0x03, 0x47, 0x01, 0x54, 0x90, 0x8e, 0x08, 0x9b, 0x18, 0x94,
+ 0x08, 0x9a, 0x20, 0x8e, 0x08, 0x98, 0x63, 0x03, 0x0c, 0x75, 0x94, 0x08,
+ 0x98, 0x5a, 0x03, 0x0c, 0x79, 0xcf, 0x13, 0xde, 0x08, 0x9a, 0xf9, 0xc8,
+ 0x10, 0xab, 0x08, 0x9a, 0xf0, 0xc2, 0x01, 0x0e, 0x08, 0x99, 0x11, 0x83,
+ 0x08, 0x99, 0x08, 0xc2, 0x01, 0x0e, 0x08, 0x99, 0x01, 0x83, 0x08, 0x98,
+ 0xf8, 0xcb, 0x1a, 0x5f, 0x0f, 0xb0, 0x09, 0xc8, 0xbf, 0x43, 0x0f, 0xc9,
+ 0x48, 0x94, 0x00, 0xe5, 0xa3, 0x03, 0x0c, 0x7d, 0x87, 0x00, 0xe5, 0x80,
+ 0x94, 0x00, 0xe5, 0x11, 0x90, 0x00, 0xe4, 0xb8, 0xc2, 0x00, 0x4d, 0x00,
+ 0xe5, 0x69, 0xc2, 0x01, 0x5b, 0x00, 0xe5, 0x48, 0xc2, 0x01, 0x5b, 0x00,
+ 0x85, 0xc9, 0xc2, 0x00, 0x4d, 0x00, 0x85, 0xe8, 0x87, 0x00, 0x86, 0x01,
+ 0x94, 0x00, 0x86, 0x20, 0x90, 0x00, 0x86, 0xb9, 0x94, 0x00, 0x87, 0x10,
+ 0xc2, 0x01, 0x5b, 0x00, 0x87, 0x49, 0xc2, 0x00, 0x4d, 0x00, 0x87, 0x68,
+ 0x87, 0x00, 0x87, 0x81, 0x94, 0x00, 0x87, 0xa2, 0x03, 0x0c, 0x83, 0xc2,
+ 0x01, 0x5b, 0x01, 0x68, 0xc9, 0xc2, 0x00, 0x4d, 0x01, 0x68, 0xe8, 0x87,
+ 0x01, 0x69, 0x01, 0x94, 0x01, 0x69, 0x20, 0xc3, 0x00, 0x15, 0x01, 0x60,
+ 0x29, 0x14, 0x43, 0x0c, 0x89, 0x87, 0x01, 0x60, 0x49, 0xc4, 0x7d, 0x1e,
+ 0x01, 0x61, 0x58, 0xc9, 0xac, 0x8e, 0x01, 0x61, 0x39, 0xc7, 0xcc, 0x53,
+ 0x01, 0x61, 0x48, 0xc2, 0x00, 0x44, 0x01, 0x60, 0xdb, 0x03, 0x0c, 0x91,
+ 0x83, 0x01, 0x60, 0xf0, 0xca, 0xa3, 0xda, 0x01, 0x61, 0x28, 0xc3, 0x00,
+ 0x15, 0x01, 0x61, 0xa9, 0x14, 0x43, 0x0c, 0x97, 0x87, 0x01, 0x61, 0xc9,
+ 0xc4, 0x7d, 0x1e, 0x01, 0x62, 0xd8, 0xc9, 0xac, 0x8e, 0x01, 0x62, 0xb9,
+ 0xc7, 0xcc, 0x53, 0x01, 0x62, 0xc8, 0xc2, 0x00, 0x44, 0x01, 0x62, 0x5b,
+ 0x03, 0x0c, 0x9f, 0x83, 0x01, 0x62, 0x70, 0xca, 0xa3, 0xda, 0x01, 0x62,
+ 0xa8, 0x94, 0x00, 0x58, 0x5b, 0x03, 0x0c, 0xa5, 0x8e, 0x00, 0x58, 0x62,
+ 0x03, 0x0c, 0xa9, 0x83, 0x00, 0x58, 0xf9, 0xc2, 0x01, 0x0e, 0x00, 0x59,
+ 0x00, 0x83, 0x00, 0x59, 0x09, 0xc2, 0x01, 0x0e, 0x00, 0x59, 0x10, 0x94,
+ 0x00, 0x5a, 0x20, 0x8e, 0x00, 0x5b, 0x18, 0x00, 0x43, 0x0c, 0xad, 0xc9,
+ 0x4f, 0xa1, 0x0f, 0x69, 0x38, 0x00, 0x43, 0x0c, 0xb9, 0xc9, 0x4f, 0xa1,
+ 0x0f, 0x69, 0x30, 0x00, 0x43, 0x0c, 0xc5, 0xc9, 0x4f, 0xa1, 0x0f, 0x69,
+ 0x40, 0x00, 0x43, 0x0c, 0xd1, 0xc9, 0x4f, 0xa1, 0x0f, 0x69, 0x48, 0xc9,
+ 0x4f, 0xa1, 0x0f, 0x69, 0x50, 0xc7, 0x0d, 0x7f, 0x0f, 0x68, 0xc1, 0xc8,
+ 0x4f, 0xa2, 0x0f, 0x69, 0x08, 0xc9, 0x4f, 0xa1, 0x0f, 0x69, 0x58, 0xc7,
+ 0x0d, 0x7f, 0x0f, 0x68, 0xc9, 0xc8, 0x4f, 0xa2, 0x0f, 0x69, 0x10, 0xc4,
+ 0x05, 0xe3, 0x08, 0x7c, 0x41, 0xc4, 0x16, 0x38, 0x08, 0x7c, 0x38, 0xc5,
+ 0x00, 0x34, 0x08, 0x7c, 0x29, 0xc5, 0x03, 0x50, 0x08, 0x7c, 0x20, 0xc5,
+ 0x00, 0x34, 0x08, 0x7c, 0x19, 0xc5, 0x03, 0x50, 0x08, 0x7c, 0x10, 0xc3,
+ 0x30, 0xe0, 0x08, 0x7c, 0x09, 0xc5, 0xd6, 0xb9, 0x08, 0x7b, 0xc0, 0x03,
+ 0xc3, 0x0c, 0xdd, 0xc3, 0x11, 0x40, 0x08, 0x7b, 0xf8, 0xc3, 0x05, 0x17,
+ 0x08, 0x78, 0xeb, 0x03, 0x0c, 0xe9, 0x16, 0xc3, 0x0c, 0xef, 0x08, 0x43,
+ 0x0c, 0xfd, 0x46, 0x02, 0x00, 0xc3, 0x0d, 0x09, 0xd3, 0x43, 0x02, 0x08,
+ 0x79, 0x38, 0xce, 0x6d, 0x84, 0x08, 0x53, 0xf9, 0x44, 0x04, 0xb1, 0x43,
+ 0x0d, 0x68, 0x16, 0xc3, 0x0d, 0x74, 0xc4, 0x4d, 0x48, 0x08, 0x53, 0xd1,
+ 0x06, 0xc3, 0x0d, 0x84, 0xc4, 0xe4, 0xa7, 0x08, 0x53, 0xc1, 0x09, 0xc3,
+ 0x0d, 0x90, 0xc4, 0xe5, 0xaf, 0x08, 0x53, 0x41, 0xc4, 0x5d, 0xef, 0x08,
+ 0x53, 0x39, 0x15, 0xc3, 0x0d, 0x9c, 0xc3, 0x7c, 0xad, 0x08, 0x53, 0x29,
+ 0xc4, 0xbc, 0xb7, 0x08, 0x53, 0x21, 0xc3, 0x04, 0xae, 0x08, 0x53, 0x19,
+ 0xc2, 0x00, 0x5b, 0x08, 0x53, 0x03, 0x03, 0x0d, 0xa6, 0xc6, 0xd7, 0x12,
+ 0x08, 0x53, 0x09, 0x0d, 0xc3, 0x0d, 0xac, 0xc3, 0x21, 0x00, 0x08, 0x53,
+ 0x61, 0xc2, 0x13, 0xfc, 0x08, 0x53, 0x81, 0x03, 0x43, 0x0d, 0xb8, 0xc2,
+ 0x00, 0x29, 0x08, 0x67, 0xd9, 0xc3, 0x41, 0xca, 0x08, 0x67, 0xe8, 0x00,
+ 0x43, 0x0d, 0xc4, 0x95, 0x08, 0x67, 0x91, 0x97, 0x08, 0x67, 0x59, 0xc2,
+ 0x1d, 0xe2, 0x08, 0x66, 0xa8, 0x90, 0x08, 0x66, 0xcb, 0x03, 0x0d, 0xd0,
+ 0x9c, 0x08, 0x67, 0x7b, 0x03, 0x0d, 0xe2, 0x98, 0x08, 0x67, 0x71, 0x85,
+ 0x08, 0x66, 0x23, 0x03, 0x0d, 0xe6, 0x96, 0x08, 0x67, 0x33, 0x03, 0x0d,
+ 0xee, 0x95, 0x08, 0x67, 0x23, 0x03, 0x0d, 0xf2, 0x8f, 0x08, 0x66, 0xc3,
+ 0x03, 0x0d, 0xf6, 0x8e, 0x08, 0x66, 0xb3, 0x03, 0x0d, 0xfa, 0x8d, 0x08,
+ 0x66, 0x99, 0x8c, 0x08, 0x66, 0x91, 0x8a, 0x08, 0x66, 0x79, 0x89, 0x08,
+ 0x66, 0x6b, 0x03, 0x0d, 0xfe, 0x88, 0x08, 0x66, 0x61, 0x87, 0x08, 0x66,
+ 0x59, 0x86, 0x08, 0x66, 0x39, 0x84, 0x08, 0x66, 0x11, 0x92, 0x08, 0x67,
+ 0x01, 0x94, 0x08, 0x67, 0x10, 0xc2, 0x0e, 0x78, 0x08, 0x67, 0x69, 0xc2,
+ 0x09, 0x06, 0x08, 0x66, 0xf0, 0xc2, 0x0e, 0x78, 0x08, 0x67, 0x61, 0xc2,
+ 0x09, 0x06, 0x08, 0x66, 0xe8, 0x91, 0x08, 0x66, 0xe1, 0xc2, 0x01, 0xe6,
+ 0x08, 0x66, 0xf8, 0x8d, 0x08, 0x66, 0xa1, 0xc2, 0x00, 0x3b, 0x08, 0x66,
+ 0x41, 0xc2, 0x01, 0xb3, 0x08, 0x66, 0x19, 0x83, 0x08, 0x66, 0x08, 0x8b,
+ 0x08, 0x66, 0x88, 0x90, 0x08, 0x64, 0xcb, 0x03, 0x0e, 0x02, 0x96, 0x08,
+ 0x65, 0x33, 0x03, 0x0e, 0x14, 0x95, 0x08, 0x65, 0x23, 0x03, 0x0e, 0x18,
+ 0x92, 0x08, 0x65, 0x01, 0x8f, 0x08, 0x64, 0xc3, 0x03, 0x0e, 0x1c, 0x8e,
+ 0x08, 0x64, 0xb3, 0x03, 0x0e, 0x20, 0x8d, 0x08, 0x64, 0x99, 0x8c, 0x08,
+ 0x64, 0x91, 0x8a, 0x08, 0x64, 0x79, 0x89, 0x08, 0x64, 0x6b, 0x03, 0x0e,
+ 0x24, 0x88, 0x08, 0x64, 0x61, 0x87, 0x08, 0x64, 0x59, 0x86, 0x08, 0x64,
+ 0x39, 0x85, 0x08, 0x64, 0x23, 0x03, 0x0e, 0x28, 0x84, 0x08, 0x64, 0x11,
+ 0x94, 0x08, 0x65, 0x11, 0x98, 0x08, 0x65, 0x71, 0x9c, 0x08, 0x65, 0x7a,
+ 0x03, 0x0e, 0x30, 0xc2, 0x01, 0xe6, 0x08, 0x64, 0xf9, 0x91, 0x08, 0x64,
+ 0xe0, 0xc2, 0x09, 0x06, 0x08, 0x64, 0xf1, 0xc2, 0x0e, 0x78, 0x08, 0x65,
+ 0x68, 0xc2, 0x09, 0x06, 0x08, 0x64, 0xe9, 0xc2, 0x0e, 0x78, 0x08, 0x65,
+ 0x60, 0xc2, 0x1d, 0xe2, 0x08, 0x64, 0xa9, 0x97, 0x08, 0x65, 0x59, 0x95,
+ 0x08, 0x65, 0x90, 0x8d, 0x08, 0x64, 0xa1, 0xc2, 0x00, 0x3b, 0x08, 0x64,
+ 0x41, 0xc2, 0x01, 0xb3, 0x08, 0x64, 0x19, 0x83, 0x08, 0x64, 0x08, 0x8b,
+ 0x08, 0x64, 0x88, 0x96, 0x08, 0x62, 0x39, 0x93, 0x08, 0x61, 0xc1, 0x87,
+ 0x08, 0x60, 0x3b, 0x03, 0x0e, 0x34, 0x92, 0x08, 0x61, 0x80, 0x07, 0xc3,
+ 0x0e, 0x38, 0x96, 0x08, 0x62, 0x19, 0x95, 0x08, 0x61, 0xeb, 0x03, 0x0e,
+ 0x60, 0x94, 0x08, 0x61, 0xd1, 0x93, 0x08, 0x61, 0xa1, 0x90, 0x08, 0x61,
+ 0x19, 0x8e, 0x08, 0x60, 0xf1, 0x9b, 0x08, 0x60, 0xb1, 0x86, 0x08, 0x60,
+ 0x89, 0x89, 0x08, 0x60, 0x69, 0x84, 0x08, 0x60, 0x48, 0xc2, 0x01, 0x0d,
+ 0x08, 0x62, 0x09, 0x10, 0xc3, 0x0e, 0x64, 0x8f, 0x08, 0x61, 0x11, 0xc2,
+ 0x03, 0x12, 0x08, 0x61, 0x09, 0x9c, 0x08, 0x60, 0xa1, 0x92, 0x08, 0x61,
+ 0x79, 0x85, 0x08, 0x61, 0x90, 0x93, 0x08, 0x61, 0xb1, 0x85, 0x08, 0x61,
+ 0x88, 0x87, 0x08, 0x60, 0x13, 0x03, 0x0e, 0x74, 0x96, 0x08, 0x62, 0x21,
+ 0xc2, 0x01, 0x0d, 0x08, 0x62, 0x01, 0x94, 0x08, 0x61, 0xd9, 0x93, 0x08,
+ 0x61, 0xa9, 0x8e, 0x08, 0x60, 0xf9, 0x9b, 0x08, 0x60, 0xb9, 0x86, 0x08,
+ 0x60, 0x91, 0x89, 0x08, 0x60, 0x71, 0x84, 0x08, 0x60, 0x51, 0xc2, 0x00,
+ 0x56, 0x08, 0x61, 0x60, 0xc2, 0x01, 0x0d, 0x08, 0x62, 0x11, 0x85, 0x08,
+ 0x61, 0x99, 0x10, 0xc3, 0x0e, 0x8f, 0x9c, 0x08, 0x60, 0xa8, 0x93, 0x08,
+ 0x61, 0xc9, 0x87, 0x08, 0x60, 0x42, 0x03, 0x0e, 0x9b, 0x93, 0x08, 0x61,
+ 0xb8, 0xc5, 0x25, 0x27, 0x08, 0x54, 0xf9, 0xc2, 0x01, 0x04, 0x08, 0x54,
+ 0xf0, 0x8a, 0x08, 0x54, 0xe1, 0xc2, 0x01, 0x5b, 0x08, 0x54, 0xc0, 0x0a,
+ 0xc3, 0x0e, 0x9f, 0xc2, 0x00, 0x45, 0x08, 0x54, 0xb9, 0xc2, 0x08, 0x86,
+ 0x08, 0x54, 0x48, 0xc4, 0x8b, 0xde, 0x08, 0x54, 0xb1, 0xc3, 0x14, 0xf3,
+ 0x08, 0x54, 0xa0, 0x8e, 0x08, 0x54, 0xa9, 0x86, 0x08, 0x54, 0x98, 0xc3,
+ 0xec, 0xb1, 0x08, 0x54, 0x31, 0xc3, 0xec, 0xb4, 0x08, 0x54, 0x51, 0xc3,
+ 0xec, 0x96, 0x08, 0x54, 0x78, 0xc2, 0x00, 0x45, 0x08, 0x54, 0x11, 0xc2,
+ 0x08, 0x86, 0x08, 0x54, 0x00, 0xc2, 0x0e, 0x14, 0x08, 0x54, 0x59, 0xc3,
+ 0x1a, 0x74, 0x08, 0x54, 0x68, 0xc3, 0x00, 0xcd, 0x08, 0x54, 0x89, 0xc2,
+ 0x01, 0x5b, 0x08, 0x54, 0x90, 0x45, 0x02, 0xcb, 0xc3, 0x0e, 0xab, 0xcc,
+ 0x1f, 0xc2, 0x08, 0x1e, 0x81, 0x47, 0x37, 0x49, 0x43, 0x0f, 0x14, 0xc2,
+ 0x01, 0x5a, 0x08, 0x1a, 0x99, 0x1c, 0x43, 0x0f, 0x24, 0x88, 0x08, 0x1b,
+ 0x58, 0xc3, 0xd3, 0xc1, 0x08, 0x1a, 0xa9, 0xc3, 0x13, 0x33, 0x08, 0x1a,
+ 0xb8, 0x87, 0x08, 0x1b, 0x91, 0x83, 0x08, 0x1b, 0xa8, 0xc3, 0x8a, 0x3b,
+ 0x08, 0x1a, 0xf1, 0xc2, 0x00, 0x2a, 0x08, 0x1b, 0x70, 0xc2, 0x23, 0x68,
+ 0x08, 0x1b, 0x09, 0x0a, 0x43, 0x0f, 0x30, 0xc2, 0x01, 0xcc, 0x08, 0x1b,
+ 0x11, 0xc3, 0xce, 0xfd, 0x08, 0x1b, 0x68, 0xc2, 0x13, 0x65, 0x08, 0x1b,
+ 0x39, 0xc2, 0x00, 0x2a, 0x08, 0x1b, 0x7b, 0x03, 0x0f, 0x3c, 0x83, 0x08,
+ 0x1b, 0xa3, 0x03, 0x0f, 0x44, 0x97, 0x08, 0x1b, 0x98, 0x91, 0x08, 0x1b,
+ 0x50, 0x87, 0x08, 0x18, 0x11, 0x83, 0x08, 0x18, 0x51, 0x97, 0x08, 0x18,
+ 0x91, 0xc2, 0x00, 0x5b, 0x08, 0x18, 0xc8, 0x8e, 0x08, 0x18, 0x59, 0x8f,
+ 0x08, 0x18, 0x61, 0x90, 0x08, 0x18, 0x69, 0x95, 0x08, 0x18, 0xa3, 0x03,
+ 0x0f, 0x48, 0x94, 0x08, 0x18, 0x9b, 0x03, 0x0f, 0x50, 0xc2, 0x02, 0xe1,
+ 0x08, 0x18, 0xb9, 0x88, 0x08, 0x18, 0xd0, 0xc2, 0x00, 0x5b, 0x08, 0x18,
+ 0x41, 0x87, 0x08, 0x18, 0xa8, 0x8b, 0x08, 0x18, 0xe8, 0x87, 0x08, 0x18,
+ 0x81, 0xc2, 0x00, 0x5b, 0x08, 0x18, 0xc0, 0xc2, 0x00, 0x5b, 0x08, 0x18,
+ 0x89, 0xcb, 0x91, 0xe9, 0x08, 0x19, 0x78, 0x97, 0x08, 0x18, 0xf0, 0xc2,
+ 0x00, 0x29, 0x08, 0x19, 0x09, 0xc3, 0x41, 0xca, 0x08, 0x19, 0x18, 0x83,
+ 0x08, 0x26, 0x49, 0xc2, 0x0e, 0xe5, 0x08, 0x26, 0x58, 0x83, 0x08, 0x27,
+ 0x89, 0xc2, 0x0e, 0xe5, 0x08, 0x27, 0x98, 0x4b, 0x88, 0xd6, 0xc3, 0x0f,
+ 0x54, 0xd2, 0x49, 0x4a, 0x0e, 0x7d, 0x90, 0x42, 0x13, 0x65, 0xc3, 0x0f,
+ 0x60, 0x46, 0x5e, 0x4a, 0x43, 0x0f, 0x6f, 0x45, 0xe1, 0xac, 0xc3, 0x0f,
+ 0x7b, 0xce, 0x6e, 0xd4, 0x0e, 0x7c, 0xd0, 0x11, 0xc3, 0x0f, 0x8d, 0xc4,
+ 0x67, 0x9a, 0x0e, 0x7d, 0x12, 0x03, 0x0f, 0x9f, 0x11, 0xc3, 0x0f, 0xa5,
+ 0xc3, 0x2c, 0x50, 0x0e, 0x7c, 0xda, 0x03, 0x0f, 0xb4, 0x11, 0xc3, 0x0f,
+ 0xba, 0xc7, 0xbf, 0x24, 0x0e, 0x7c, 0x90, 0xce, 0x67, 0x90, 0x0e, 0x7c,
+ 0x89, 0x42, 0x00, 0x27, 0xc3, 0x0f, 0xc6, 0xc9, 0xb7, 0x7d, 0x0e, 0x7c,
+ 0x5a, 0x03, 0x0f, 0xe4, 0xd4, 0x3a, 0xbe, 0x0e, 0x7a, 0xd1, 0xc8, 0xc0,
+ 0x63, 0x0e, 0x7a, 0xb8, 0xc7, 0x80, 0xcc, 0x0e, 0x7c, 0x01, 0xc8, 0x9b,
+ 0x8c, 0x0e, 0x7b, 0xf0, 0xc7, 0x74, 0x6f, 0x0e, 0x7b, 0xc1, 0xc8, 0x49,
+ 0x54, 0x0e, 0x7b, 0xb0, 0xc5, 0x00, 0x47, 0x0e, 0x78, 0x71, 0xc4, 0x00,
+ 0xcd, 0x0e, 0x78, 0x10, 0xd5, 0x36, 0xc1, 0x0e, 0x79, 0xb8, 0xc6, 0x43,
+ 0xc0, 0x0e, 0x78, 0xe1, 0x42, 0x05, 0x27, 0x43, 0x0f, 0xea, 0xc5, 0x00,
+ 0x47, 0x0e, 0x78, 0x99, 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x38, 0xc5, 0x00,
+ 0x47, 0x0e, 0x78, 0x79, 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x18, 0xc5, 0x00,
+ 0x47, 0x0e, 0x78, 0x61, 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x00, 0xc6, 0x80,
+ 0xcd, 0x0e, 0x78, 0xc9, 0x4b, 0x91, 0xc8, 0x43, 0x0f, 0xf6, 0xc5, 0x00,
+ 0x47, 0x0e, 0x78, 0xb9, 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x58, 0xc5, 0xb6,
+ 0x6a, 0x05, 0x4e, 0x58, 0xc4, 0x39, 0x3f, 0x05, 0x4e, 0x49, 0xc3, 0x06,
+ 0x67, 0x05, 0x4e, 0x28, 0xc8, 0x65, 0x7a, 0x05, 0x4d, 0x81, 0xc4, 0x65,
+ 0x74, 0x05, 0x4d, 0x78, 0xc5, 0x65, 0x73, 0x05, 0x4d, 0x31, 0xc5, 0xdf,
+ 0xbd, 0x05, 0x4c, 0x48, 0xc6, 0xd4, 0xea, 0x05, 0x4c, 0xf8, 0xc6, 0xd4,
+ 0xea, 0x05, 0x4c, 0xc0, 0xc6, 0xd3, 0x6a, 0x05, 0x4c, 0x52, 0x03, 0x10,
+ 0x02, 0xca, 0x65, 0x78, 0x05, 0x4d, 0x18, 0xca, 0x65, 0x78, 0x05, 0x4c,
+ 0xf0, 0xc6, 0xd4, 0xea, 0x05, 0x4d, 0x08, 0xca, 0x65, 0x78, 0x05, 0x4c,
+ 0xe0, 0xc5, 0x65, 0x73, 0x05, 0x4c, 0x89, 0xc5, 0x93, 0xd3, 0x05, 0x4c,
+ 0x80, 0xd0, 0x3b, 0xee, 0x01, 0x2c, 0xf8, 0x56, 0x2c, 0xab, 0xc3, 0x10,
+ 0x08, 0x46, 0x00, 0xc7, 0x43, 0x10, 0x14, 0x9a, 0x05, 0x22, 0xd1, 0x96,
+ 0x05, 0x22, 0xc9, 0x91, 0x05, 0x22, 0x9b, 0x03, 0x10, 0x20, 0x92, 0x05,
+ 0x22, 0xe0, 0x92, 0x05, 0x22, 0xc1, 0x9a, 0x05, 0x22, 0xb1, 0x96, 0x05,
+ 0x22, 0xa8, 0x87, 0x05, 0x22, 0x83, 0x03, 0x10, 0x28, 0x92, 0x05, 0x22,
+ 0x69, 0x9a, 0x05, 0x22, 0x59, 0x96, 0x05, 0x22, 0x50, 0x94, 0x05, 0x22,
+ 0x4b, 0x03, 0x10, 0x34, 0x92, 0x05, 0x22, 0x39, 0x9a, 0x05, 0x22, 0x29,
+ 0x96, 0x05, 0x22, 0x20, 0x94, 0x05, 0x22, 0x1b, 0x03, 0x10, 0x38, 0x92,
+ 0x05, 0x22, 0x09, 0x9a, 0x05, 0x21, 0xf9, 0x91, 0x05, 0x21, 0xd2, 0x03,
+ 0x10, 0x3c, 0x92, 0x05, 0x21, 0xf1, 0x9a, 0x05, 0x21, 0xe1, 0x96, 0x05,
+ 0x21, 0xd8, 0x87, 0x05, 0x21, 0xbb, 0x03, 0x10, 0x40, 0x92, 0x05, 0x21,
+ 0xa1, 0x9a, 0x05, 0x21, 0x91, 0x96, 0x05, 0x21, 0x88, 0x94, 0x05, 0x1d,
+ 0x53, 0x03, 0x10, 0x4c, 0x92, 0x05, 0x1d, 0x41, 0x9a, 0x05, 0x1d, 0x31,
+ 0x96, 0x05, 0x1d, 0x28, 0x94, 0x05, 0x1d, 0x23, 0x03, 0x10, 0x50, 0x92,
+ 0x05, 0x1d, 0x11, 0x9a, 0x05, 0x1d, 0x01, 0x96, 0x05, 0x1c, 0xf8, 0x92,
+ 0x05, 0x1c, 0xf1, 0x9a, 0x05, 0x1c, 0xe1, 0x96, 0x05, 0x1c, 0xd8, 0x92,
+ 0x05, 0x1c, 0xd1, 0x9a, 0x05, 0x1c, 0xc0, 0x92, 0x05, 0x1c, 0xb9, 0x9a,
+ 0x05, 0x1c, 0xa9, 0x96, 0x05, 0x1c, 0xa0, 0x9a, 0x05, 0x12, 0xe9, 0x92,
+ 0x05, 0x12, 0xf8, 0x96, 0x05, 0x13, 0x01, 0x9a, 0x05, 0x13, 0x09, 0x92,
+ 0x05, 0x13, 0x18, 0x96, 0x05, 0x13, 0x21, 0x9a, 0x05, 0x13, 0x28, 0x96,
+ 0x05, 0x13, 0x39, 0x9a, 0x05, 0x13, 0x41, 0x92, 0x05, 0x13, 0x50, 0x9a,
+ 0x05, 0x13, 0x59, 0x92, 0x05, 0x13, 0x68, 0x96, 0x05, 0x17, 0x69, 0x9a,
+ 0x05, 0x17, 0x71, 0x92, 0x05, 0x17, 0x81, 0x87, 0x05, 0x17, 0x92, 0x03,
+ 0x10, 0x54, 0x96, 0x05, 0x17, 0xa1, 0x9a, 0x05, 0x17, 0xa9, 0x92, 0x05,
+ 0x17, 0xb8, 0x91, 0x05, 0x17, 0xcb, 0x03, 0x10, 0x5c, 0x9a, 0x05, 0x17,
+ 0xf1, 0x92, 0x05, 0x18, 0x01, 0x94, 0x05, 0x18, 0x12, 0x03, 0x10, 0x60,
+ 0x9a, 0x05, 0x17, 0xd1, 0x92, 0x05, 0x17, 0xd8, 0x9a, 0x05, 0x17, 0xe0,
+ 0x96, 0x05, 0x18, 0x19, 0x9a, 0x05, 0x18, 0x21, 0x92, 0x05, 0x18, 0x31,
+ 0x94, 0x05, 0x18, 0x42, 0x03, 0x10, 0x64, 0x96, 0x05, 0x03, 0xb1, 0x9a,
+ 0x05, 0x03, 0xb9, 0x92, 0x05, 0x03, 0xc9, 0x87, 0x05, 0x03, 0xda, 0x03,
+ 0x10, 0x68, 0x96, 0x05, 0x03, 0xe9, 0x9a, 0x05, 0x03, 0xf1, 0x92, 0x05,
+ 0x04, 0x00, 0x96, 0x05, 0x04, 0x09, 0x9a, 0x05, 0x04, 0x11, 0x92, 0x05,
+ 0x04, 0x20, 0x96, 0x05, 0x04, 0x29, 0x9a, 0x05, 0x04, 0x31, 0x92, 0x05,
+ 0x04, 0x41, 0x94, 0x05, 0x04, 0x52, 0x03, 0x10, 0x70, 0x96, 0x05, 0x04,
+ 0x59, 0x9a, 0x05, 0x04, 0x61, 0x92, 0x05, 0x04, 0x71, 0x94, 0x05, 0x04,
+ 0x82, 0x03, 0x10, 0x74, 0x96, 0x05, 0x0a, 0x89, 0x9a, 0x05, 0x0a, 0x91,
+ 0x92, 0x05, 0x0a, 0xa1, 0x87, 0x05, 0x0a, 0xb2, 0x03, 0x10, 0x78, 0x96,
+ 0x05, 0x0a, 0xb9, 0x9a, 0x05, 0x0a, 0xc1, 0x92, 0x05, 0x0a, 0xd0, 0x96,
+ 0x05, 0x0a, 0xd9, 0x9a, 0x05, 0x0a, 0xe1, 0x92, 0x05, 0x0a, 0xf0, 0x9a,
+ 0x05, 0x0a, 0xf9, 0x92, 0x05, 0x0b, 0x08, 0x96, 0x05, 0x0b, 0x11, 0x9a,
+ 0x05, 0x0b, 0x19, 0x92, 0x05, 0x0b, 0x29, 0x94, 0x05, 0x0b, 0x3a, 0x03,
+ 0x10, 0x7c, 0x9a, 0x05, 0x0c, 0x09, 0x92, 0x05, 0x0c, 0x18, 0x9a, 0x05,
+ 0x0c, 0x39, 0x92, 0x05, 0x0c, 0x48, 0x9a, 0x05, 0x0c, 0xa8, 0x92, 0x05,
+ 0x21, 0x81, 0x9a, 0x05, 0x21, 0x71, 0x96, 0x05, 0x21, 0x68, 0x94, 0x05,
+ 0x21, 0x63, 0x03, 0x10, 0x80, 0x92, 0x05, 0x21, 0x51, 0x9a, 0x05, 0x21,
+ 0x41, 0x96, 0x05, 0x21, 0x39, 0x91, 0x05, 0x21, 0x0a, 0x03, 0x10, 0x84,
+ 0x92, 0x05, 0x21, 0x31, 0x9a, 0x05, 0x21, 0x21, 0x96, 0x05, 0x21, 0x18,
+ 0x87, 0x05, 0x20, 0xf3, 0x03, 0x10, 0x8c, 0x92, 0x05, 0x20, 0xd9, 0x9a,
+ 0x05, 0x20, 0xc9, 0x96, 0x05, 0x20, 0xc0, 0x9a, 0x05, 0x1f, 0xd0, 0x9a,
+ 0x05, 0x1f, 0xa0, 0x92, 0x05, 0x1f, 0x59, 0x9a, 0x05, 0x1f, 0x49, 0x96,
+ 0x05, 0x1f, 0x40, 0x92, 0x05, 0x1f, 0x39, 0x9a, 0x05, 0x1f, 0x29, 0x96,
+ 0x05, 0x1f, 0x20, 0x9a, 0x05, 0x20, 0xb0, 0x9a, 0x05, 0x20, 0x80, 0x92,
+ 0x05, 0x20, 0x39, 0x9a, 0x05, 0x20, 0x29, 0x96, 0x05, 0x20, 0x20, 0x92,
+ 0x05, 0x20, 0x19, 0x9a, 0x05, 0x20, 0x09, 0x96, 0x05, 0x20, 0x00, 0x9a,
+ 0x05, 0x1e, 0xf0, 0x9a, 0x05, 0x1e, 0xc0, 0x92, 0x05, 0x1e, 0x61, 0x9a,
+ 0x05, 0x1e, 0x50, 0x94, 0x05, 0x1c, 0x9b, 0x03, 0x10, 0x98, 0x92, 0x05,
+ 0x1c, 0x89, 0x9a, 0x05, 0x1c, 0x79, 0x96, 0x05, 0x1c, 0x70, 0x94, 0x05,
+ 0x1c, 0x6b, 0x03, 0x10, 0x9c, 0x92, 0x05, 0x1c, 0x59, 0x9a, 0x05, 0x1c,
+ 0x49, 0x96, 0x05, 0x1c, 0x41, 0x91, 0x05, 0x1b, 0xfa, 0x03, 0x10, 0xa0,
+ 0x92, 0x05, 0x1c, 0x39, 0x9a, 0x05, 0x1c, 0x29, 0x96, 0x05, 0x1c, 0x20,
+ 0x92, 0x05, 0x1c, 0x19, 0x9a, 0x05, 0x1c, 0x09, 0x96, 0x05, 0x1c, 0x00,
+ 0x9a, 0x05, 0x1b, 0xe0, 0x94, 0x05, 0x1e, 0x2b, 0x03, 0x10, 0xa4, 0x92,
+ 0x05, 0x1e, 0x19, 0x9a, 0x05, 0x1e, 0x09, 0x96, 0x05, 0x1e, 0x00, 0x94,
+ 0x05, 0x1d, 0xfb, 0x03, 0x10, 0xa8, 0x92, 0x05, 0x1d, 0xe9, 0x9a, 0x05,
+ 0x1d, 0xd9, 0x96, 0x05, 0x1d, 0xd1, 0x91, 0x05, 0x1d, 0x82, 0x03, 0x10,
+ 0xac, 0x92, 0x05, 0x1d, 0xc9, 0x9a, 0x05, 0x1d, 0xb9, 0x96, 0x05, 0x1d,
+ 0xb0, 0x92, 0x05, 0x1d, 0xa9, 0x9a, 0x05, 0x1d, 0x99, 0x96, 0x05, 0x1d,
+ 0x90, 0x92, 0x05, 0x1d, 0x71, 0x9a, 0x05, 0x1d, 0x61, 0x96, 0x05, 0x1d,
+ 0x58, 0x92, 0x05, 0x1a, 0x19, 0x94, 0x05, 0x1a, 0x2b, 0x03, 0x10, 0xb4,
+ 0x96, 0x05, 0x1a, 0x01, 0x9a, 0x05, 0x1a, 0x08, 0x96, 0x05, 0x19, 0x51,
+ 0x9a, 0x05, 0x19, 0x59, 0x92, 0x05, 0x19, 0x69, 0x87, 0x05, 0x19, 0x7a,
+ 0x03, 0x10, 0xb8, 0x96, 0x05, 0x19, 0x89, 0x9a, 0x05, 0x19, 0x91, 0x92,
+ 0x05, 0x19, 0xa0, 0x96, 0x05, 0x19, 0xa9, 0x9a, 0x05, 0x19, 0xb1, 0x92,
+ 0x05, 0x19, 0xc0, 0x9a, 0x05, 0x19, 0xc9, 0x92, 0x05, 0x19, 0xd8, 0x96,
+ 0x05, 0x19, 0xe1, 0x9a, 0x05, 0x19, 0xe9, 0x92, 0x05, 0x19, 0xf8, 0x9a,
+ 0x05, 0x18, 0x69, 0x92, 0x05, 0x18, 0x78, 0x9a, 0x05, 0x18, 0xa1, 0x92,
+ 0x05, 0x18, 0xb0, 0x9a, 0x05, 0x19, 0x10, 0x9a, 0x05, 0x19, 0x40, 0x94,
+ 0x05, 0x1b, 0xdb, 0x03, 0x10, 0xc0, 0x92, 0x05, 0x1b, 0xc9, 0x9a, 0x05,
+ 0x1b, 0xb9, 0x96, 0x05, 0x1b, 0xb0, 0x94, 0x05, 0x1b, 0xab, 0x03, 0x10,
+ 0xc4, 0x92, 0x05, 0x1b, 0x99, 0x9a, 0x05, 0x1b, 0x89, 0x91, 0x05, 0x1b,
+ 0x3a, 0x03, 0x10, 0xc8, 0x92, 0x05, 0x1b, 0x81, 0x9a, 0x05, 0x1b, 0x71,
+ 0x96, 0x05, 0x1b, 0x68, 0x92, 0x05, 0x1b, 0x61, 0x9a, 0x05, 0x1b, 0x51,
+ 0x96, 0x05, 0x1b, 0x48, 0x92, 0x05, 0x1b, 0x21, 0x96, 0x05, 0x1b, 0x09,
+ 0x9a, 0x05, 0x1b, 0x10, 0x9a, 0x05, 0x16, 0x70, 0x9a, 0x05, 0x16, 0x40,
+ 0x9a, 0x05, 0x15, 0xd1, 0x92, 0x05, 0x15, 0xe0, 0x96, 0x05, 0x15, 0x91,
+ 0x9a, 0x05, 0x15, 0x99, 0x92, 0x05, 0x15, 0xa8, 0x92, 0x05, 0x14, 0xd1,
+ 0x9a, 0x05, 0x14, 0xc0, 0x9a, 0x05, 0x15, 0x30, 0x92, 0x05, 0x14, 0x99,
+ 0x9a, 0x05, 0x14, 0x88, 0x9a, 0x05, 0x16, 0xe1, 0x92, 0x05, 0x16, 0xf0,
+ 0x92, 0x05, 0x16, 0xd9, 0x9a, 0x05, 0x16, 0xc9, 0x96, 0x05, 0x16, 0xc0,
+ 0x87, 0x05, 0x16, 0xb3, 0x03, 0x10, 0xd4, 0x92, 0x05, 0x16, 0x99, 0x9a,
+ 0x05, 0x16, 0x89, 0x96, 0x05, 0x16, 0x80, 0x9a, 0x05, 0x16, 0xf9, 0x92,
+ 0x05, 0x17, 0x08, 0x9a, 0x05, 0x17, 0x11, 0x92, 0x05, 0x17, 0x21, 0x94,
+ 0x05, 0x17, 0x32, 0x03, 0x10, 0xe0, 0x96, 0x05, 0x17, 0x39, 0x9a, 0x05,
+ 0x17, 0x41, 0x92, 0x05, 0x17, 0x51, 0x94, 0x05, 0x17, 0x62, 0x03, 0x10,
+ 0xe4, 0x94, 0x05, 0x1b, 0x03, 0x03, 0x10, 0xe8, 0x92, 0x05, 0x1a, 0xf1,
+ 0x9a, 0x05, 0x1a, 0xe1, 0x96, 0x05, 0x1a, 0xd8, 0x94, 0x05, 0x1a, 0xd3,
+ 0x03, 0x10, 0xec, 0x92, 0x05, 0x1a, 0xc1, 0x9a, 0x05, 0x1a, 0xb1, 0x96,
+ 0x05, 0x1a, 0xa9, 0x91, 0x05, 0x1a, 0x5a, 0x03, 0x10, 0xf0, 0x92, 0x05,
+ 0x1a, 0xa1, 0x9a, 0x05, 0x1a, 0x91, 0x96, 0x05, 0x1a, 0x88, 0x92, 0x05,
+ 0x1a, 0x81, 0x96, 0x05, 0x1a, 0x69, 0x9a, 0x05, 0x1a, 0x70, 0x96, 0x05,
+ 0x1a, 0x31, 0x9a, 0x05, 0x1a, 0x39, 0x92, 0x05, 0x1a, 0x48, 0x9a, 0x05,
+ 0x15, 0x60, 0x92, 0x05, 0x14, 0x31, 0x9a, 0x05, 0x14, 0x20, 0x92, 0x05,
+ 0x12, 0xe1, 0x9a, 0x05, 0x12, 0xd0, 0x92, 0x05, 0x12, 0xc9, 0x9a, 0x05,
+ 0x12, 0xb9, 0x96, 0x05, 0x12, 0xb0, 0x92, 0x05, 0x12, 0xa9, 0x9a, 0x05,
+ 0x12, 0x99, 0x96, 0x05, 0x12, 0x90, 0x92, 0x05, 0x12, 0x89, 0x9a, 0x05,
+ 0x12, 0x79, 0x96, 0x05, 0x12, 0x70, 0x96, 0x05, 0x12, 0x31, 0x9a, 0x05,
+ 0x12, 0x39, 0x92, 0x05, 0x12, 0x49, 0x87, 0x05, 0x12, 0x62, 0x03, 0x10,
+ 0xf8, 0x9a, 0x05, 0x13, 0x78, 0x96, 0x05, 0x04, 0x89, 0x9a, 0x05, 0x04,
+ 0x91, 0x92, 0x05, 0x04, 0xa1, 0x87, 0x05, 0x04, 0xb2, 0x03, 0x11, 0x04,
+ 0x96, 0x05, 0x04, 0xc1, 0x9a, 0x05, 0x04, 0xc9, 0x92, 0x05, 0x04, 0xd8,
+ 0x91, 0x05, 0x04, 0xeb, 0x03, 0x11, 0x0c, 0x96, 0x05, 0x05, 0x19, 0x9a,
+ 0x05, 0x05, 0x21, 0x92, 0x05, 0x05, 0x31, 0x94, 0x05, 0x05, 0x42, 0x03,
+ 0x11, 0x14, 0x96, 0x05, 0x04, 0xf9, 0x9a, 0x05, 0x05, 0x01, 0x92, 0x05,
+ 0x05, 0x10, 0x9a, 0x05, 0x05, 0x49, 0x92, 0x05, 0x05, 0x59, 0x94, 0x05,
+ 0x05, 0x6a, 0x03, 0x11, 0x18, 0x96, 0x05, 0x0b, 0x41, 0x9a, 0x05, 0x0b,
+ 0x49, 0x92, 0x05, 0x0b, 0x59, 0x87, 0x05, 0x0b, 0x72, 0x03, 0x11, 0x1c,
+ 0x96, 0x05, 0x0b, 0x81, 0x9a, 0x05, 0x0b, 0x89, 0x92, 0x05, 0x0b, 0x98,
+ 0x91, 0x05, 0x0b, 0xab, 0x03, 0x11, 0x28, 0x96, 0x05, 0x0b, 0xe0, 0x96,
+ 0x05, 0x0b, 0xb1, 0x9a, 0x05, 0x0b, 0xb9, 0x92, 0x05, 0x0b, 0xc0, 0x9a,
+ 0x05, 0x0b, 0xc9, 0x92, 0x05, 0x0b, 0xd8, 0x96, 0x05, 0x0c, 0xb9, 0x9a,
+ 0x05, 0x0c, 0xc1, 0x92, 0x05, 0x0c, 0xd1, 0x87, 0x05, 0x0c, 0xe2, 0x03,
+ 0x11, 0x2c, 0x96, 0x05, 0x0c, 0xf1, 0x9a, 0x05, 0x0c, 0xf9, 0x92, 0x05,
+ 0x0d, 0x08, 0x91, 0x05, 0x0d, 0x1b, 0x03, 0x11, 0x34, 0x96, 0x05, 0x0d,
+ 0x59, 0x9a, 0x05, 0x0d, 0x61, 0x92, 0x05, 0x0d, 0x71, 0x94, 0x05, 0x0d,
+ 0x82, 0x03, 0x11, 0x3c, 0x9a, 0x05, 0x0d, 0x29, 0x92, 0x05, 0x0d, 0x38,
+ 0x9a, 0x05, 0x0d, 0x41, 0x92, 0x05, 0x0d, 0x50, 0x96, 0x05, 0x0d, 0x89,
+ 0x9a, 0x05, 0x0d, 0x91, 0x92, 0x05, 0x0d, 0xa1, 0x94, 0x05, 0x0d, 0xb2,
+ 0x03, 0x11, 0x40, 0x9a, 0x05, 0x23, 0xb1, 0x96, 0x05, 0x23, 0xa9, 0x92,
+ 0x05, 0x23, 0xc0, 0x96, 0x05, 0x23, 0xc9, 0x9a, 0x05, 0x23, 0xd1, 0x92,
+ 0x05, 0x23, 0xe0, 0x9a, 0x05, 0x24, 0x28, 0x9a, 0x05, 0x24, 0x58, 0x9a,
+ 0x05, 0x23, 0x78, 0x96, 0x05, 0x23, 0x09, 0x9a, 0x05, 0x23, 0x11, 0x92,
+ 0x05, 0x23, 0x20, 0x92, 0x05, 0x12, 0x19, 0x94, 0x05, 0x12, 0x2b, 0x03,
+ 0x11, 0x44, 0x91, 0x05, 0x11, 0xbb, 0x03, 0x11, 0x48, 0x96, 0x05, 0x12,
+ 0x01, 0x9a, 0x05, 0x12, 0x08, 0x9a, 0x05, 0x11, 0x80, 0x96, 0x05, 0x11,
+ 0x91, 0x9a, 0x05, 0x11, 0x99, 0x92, 0x05, 0x11, 0xa8, 0x96, 0x05, 0x11,
+ 0xc9, 0x9a, 0x05, 0x11, 0xd1, 0x92, 0x05, 0x11, 0xe0, 0x9a, 0x05, 0x11,
+ 0xe9, 0x92, 0x05, 0x11, 0xf8, 0x9a, 0x05, 0x05, 0x91, 0x92, 0x05, 0x05,
+ 0xa0, 0x96, 0x05, 0x05, 0xc9, 0x9a, 0x05, 0x05, 0xd1, 0x92, 0x05, 0x05,
+ 0xe0, 0x9a, 0x05, 0x06, 0x38, 0x96, 0x05, 0x00, 0xd1, 0x9a, 0x05, 0x00,
+ 0xd9, 0x92, 0x05, 0x00, 0xe8, 0x9a, 0x05, 0x01, 0x11, 0x92, 0x05, 0x01,
+ 0x20, 0x9a, 0x05, 0x01, 0x80, 0x9a, 0x05, 0x01, 0xb0, 0x96, 0x05, 0x02,
+ 0xb1, 0x9a, 0x05, 0x02, 0xb9, 0x92, 0x05, 0x02, 0xc9, 0x87, 0x05, 0x02,
+ 0xe2, 0x03, 0x11, 0x50, 0x96, 0x05, 0x02, 0xf1, 0x9a, 0x05, 0x02, 0xf9,
+ 0x92, 0x05, 0x03, 0x08, 0x91, 0x05, 0x03, 0x1b, 0x03, 0x11, 0x5c, 0x96,
+ 0x05, 0x03, 0x61, 0x9a, 0x05, 0x03, 0x69, 0x92, 0x05, 0x03, 0x79, 0x94,
+ 0x05, 0x03, 0x8a, 0x03, 0x11, 0x64, 0x96, 0x05, 0x03, 0x29, 0x9a, 0x05,
+ 0x03, 0x31, 0x92, 0x05, 0x03, 0x40, 0x9a, 0x05, 0x03, 0x49, 0x92, 0x05,
+ 0x03, 0x58, 0x96, 0x05, 0x03, 0x91, 0x9a, 0x05, 0x03, 0x99, 0x92, 0x05,
+ 0x03, 0xa8, 0x9a, 0x05, 0x01, 0xe1, 0x92, 0x05, 0x01, 0xf0, 0x9a, 0x05,
+ 0x02, 0x19, 0x92, 0x05, 0x02, 0x28, 0x9a, 0x05, 0x02, 0x70, 0x9a, 0x05,
+ 0x02, 0xa0, 0x9a, 0x05, 0x06, 0xe0, 0x96, 0x05, 0x07, 0x31, 0x9a, 0x05,
+ 0x07, 0x39, 0x92, 0x05, 0x07, 0x48, 0x9a, 0x05, 0x07, 0xc0, 0x9a, 0x05,
+ 0x07, 0xf0, 0x9a, 0x05, 0x08, 0x21, 0x92, 0x05, 0x08, 0x30, 0x9a, 0x05,
+ 0x08, 0x58, 0x9a, 0x05, 0x08, 0xc0, 0x96, 0x05, 0x09, 0xb1, 0x9a, 0x05,
+ 0x09, 0xb9, 0x92, 0x05, 0x09, 0xc9, 0x87, 0x05, 0x09, 0xda, 0x03, 0x11,
+ 0x68, 0x96, 0x05, 0x09, 0xe9, 0x9a, 0x05, 0x09, 0xf1, 0x92, 0x05, 0x0a,
+ 0x00, 0x91, 0x05, 0x0a, 0x13, 0x03, 0x11, 0x70, 0x96, 0x05, 0x0a, 0x59,
+ 0x9a, 0x05, 0x0a, 0x61, 0x92, 0x05, 0x0a, 0x71, 0x94, 0x05, 0x0a, 0x82,
+ 0x03, 0x11, 0x78, 0x96, 0x05, 0x0a, 0x21, 0x9a, 0x05, 0x0a, 0x29, 0x92,
+ 0x05, 0x0a, 0x38, 0x9a, 0x05, 0x0a, 0x41, 0x92, 0x05, 0x0a, 0x50, 0x9a,
+ 0x05, 0x08, 0xf1, 0x92, 0x05, 0x09, 0x00, 0x96, 0x05, 0x09, 0x29, 0x9a,
+ 0x05, 0x09, 0x31, 0x92, 0x05, 0x09, 0x40, 0x9a, 0x05, 0x09, 0xa0, 0x96,
+ 0x05, 0x0d, 0xd9, 0x9a, 0x05, 0x0d, 0xe1, 0x92, 0x05, 0x0d, 0xf0, 0x96,
+ 0x05, 0x0e, 0x19, 0x9a, 0x05, 0x0e, 0x21, 0x92, 0x05, 0x0e, 0x30, 0x9a,
+ 0x05, 0x0e, 0x90, 0x9a, 0x05, 0x0e, 0xc0, 0x96, 0x05, 0x0e, 0xf1, 0x9a,
+ 0x05, 0x0e, 0xf9, 0x92, 0x05, 0x0f, 0x08, 0x96, 0x05, 0x0f, 0x31, 0x9a,
+ 0x05, 0x0f, 0x39, 0x92, 0x05, 0x0f, 0x48, 0x9a, 0x05, 0x0f, 0xb0, 0x96,
+ 0x05, 0x10, 0xa1, 0x9a, 0x05, 0x10, 0xa9, 0x87, 0x05, 0x10, 0xc2, 0x03,
+ 0x11, 0x7c, 0x96, 0x05, 0x10, 0xd1, 0x9a, 0x05, 0x10, 0xd9, 0x92, 0x05,
+ 0x10, 0xe8, 0x91, 0x05, 0x11, 0x03, 0x03, 0x11, 0x84, 0x96, 0x05, 0x11,
+ 0x51, 0x9a, 0x05, 0x11, 0x59, 0x92, 0x05, 0x11, 0x69, 0x94, 0x05, 0x11,
+ 0x7a, 0x03, 0x11, 0x90, 0x96, 0x05, 0x11, 0x11, 0x9a, 0x05, 0x11, 0x19,
+ 0x92, 0x05, 0x11, 0x28, 0x96, 0x05, 0x11, 0x31, 0x9a, 0x05, 0x11, 0x39,
+ 0x92, 0x05, 0x11, 0x48, 0x9a, 0x05, 0x0f, 0xe1, 0x92, 0x05, 0x0f, 0xf0,
+ 0x9a, 0x05, 0x10, 0x19, 0x92, 0x05, 0x10, 0x28, 0x9a, 0x05, 0x10, 0x90,
+ 0x0c, 0xc3, 0x11, 0x94, 0x0a, 0xc3, 0x11, 0x9f, 0x42, 0x01, 0x0d, 0xc3,
+ 0x11, 0xb2, 0xc2, 0x18, 0x7a, 0x00, 0xaa, 0x09, 0xc2, 0x01, 0x47, 0x00,
+ 0xa5, 0x11, 0x8f, 0x00, 0xa5, 0xf8, 0x9b, 0x00, 0xc6, 0x11, 0x91, 0x00,
+ 0xa8, 0xf8, 0x83, 0x00, 0xa9, 0x18, 0x8b, 0x00, 0xa8, 0xd8, 0x83, 0x08,
+ 0xd5, 0xd3, 0x03, 0x11, 0xcb, 0x91, 0x08, 0xd5, 0xc3, 0x03, 0x11, 0xcf,
+ 0x8b, 0x08, 0xd5, 0xb2, 0x03, 0x11, 0xd3, 0x83, 0x08, 0xd5, 0xa3, 0x03,
+ 0x11, 0xd7, 0x91, 0x08, 0xd5, 0x93, 0x03, 0x11, 0xdb, 0x8b, 0x08, 0xd5,
+ 0x82, 0x03, 0x11, 0xdf, 0xc2, 0x0e, 0x30, 0x00, 0xa0, 0xd9, 0xc2, 0x02,
+ 0x14, 0x00, 0xa0, 0xb0, 0xc3, 0xe7, 0x0f, 0x00, 0xa8, 0x79, 0xc2, 0x00,
+ 0xbc, 0x00, 0xa8, 0x53, 0x03, 0x11, 0xe3, 0xc3, 0x00, 0x4b, 0x00, 0xa8,
+ 0x69, 0xc3, 0x14, 0x8f, 0x00, 0xa8, 0x21, 0xc3, 0x02, 0x6b, 0x00, 0xa8,
+ 0x60, 0x8b, 0x00, 0xac, 0x70, 0x83, 0x00, 0xab, 0xd0, 0x91, 0x00, 0xab,
+ 0xc0, 0x8b, 0x00, 0xab, 0xb0, 0x07, 0xc3, 0x11, 0xe7, 0x8b, 0x00, 0xa2,
+ 0xa1, 0x0e, 0xc3, 0x11, 0xef, 0x1c, 0x43, 0x12, 0x06, 0xc2, 0x00, 0x4c,
+ 0x00, 0xc7, 0x91, 0x83, 0x00, 0xb0, 0xd9, 0x8b, 0x00, 0xb0, 0xc9, 0x87,
+ 0x00, 0xb0, 0xbb, 0x03, 0x12, 0x1d, 0x91, 0x00, 0xb0, 0xb1, 0x97, 0x00,
+ 0xb0, 0xa1, 0x0c, 0x43, 0x12, 0x21, 0x19, 0xc3, 0x12, 0x38, 0x83, 0x00,
+ 0xaf, 0xa3, 0x03, 0x12, 0x40, 0x8b, 0x00, 0xaf, 0x99, 0x87, 0x00, 0xaf,
+ 0x8b, 0x03, 0x12, 0x44, 0x91, 0x00, 0xaf, 0x81, 0x97, 0x00, 0xaf, 0x79,
+ 0x0a, 0x43, 0x12, 0x48, 0x16, 0xc3, 0x12, 0x5f, 0x15, 0xc3, 0x12, 0x7a,
+ 0x0a, 0xc3, 0x12, 0x91, 0x0e, 0x43, 0x12, 0xa8, 0x83, 0x00, 0xb3, 0x31,
+ 0x8b, 0x00, 0xb3, 0x29, 0x87, 0x00, 0xb3, 0x1b, 0x03, 0x12, 0xc3, 0x91,
+ 0x00, 0xb3, 0x11, 0x97, 0x00, 0xb3, 0x08, 0x83, 0x00, 0xb0, 0x99, 0x8b,
+ 0x00, 0xb0, 0x91, 0x87, 0x00, 0xb0, 0x83, 0x03, 0x12, 0xc7, 0x91, 0x00,
+ 0xb0, 0x79, 0x97, 0x00, 0xb0, 0x70, 0x83, 0x00, 0xb0, 0x69, 0x8b, 0x00,
+ 0xb0, 0x61, 0x87, 0x00, 0xb0, 0x53, 0x03, 0x12, 0xcb, 0x91, 0x00, 0xb0,
+ 0x49, 0x97, 0x00, 0xb0, 0x40, 0x83, 0x00, 0xb0, 0x39, 0x8b, 0x00, 0xb0,
+ 0x31, 0x87, 0x00, 0xb0, 0x23, 0x03, 0x12, 0xcf, 0x91, 0x00, 0xb0, 0x19,
+ 0x97, 0x00, 0xb0, 0x11, 0x89, 0x00, 0xa6, 0x88, 0x8d, 0x00, 0xb0, 0x0b,
+ 0x03, 0x12, 0xd3, 0x0a, 0x43, 0x12, 0xea, 0x83, 0x00, 0xaf, 0x69, 0x8b,
+ 0x00, 0xaf, 0x61, 0x87, 0x00, 0xaf, 0x53, 0x03, 0x13, 0x01, 0x91, 0x00,
+ 0xaf, 0x49, 0x97, 0x00, 0xaf, 0x40, 0x19, 0xc3, 0x13, 0x05, 0xc2, 0x01,
+ 0xe6, 0x00, 0xa1, 0xb1, 0x8b, 0x00, 0xa1, 0xb8, 0x83, 0x00, 0xae, 0xa9,
+ 0x8b, 0x00, 0xae, 0xa1, 0x87, 0x00, 0xae, 0x93, 0x03, 0x13, 0x1c, 0x91,
+ 0x00, 0xae, 0x89, 0x97, 0x00, 0xae, 0x80, 0x83, 0x00, 0xae, 0x79, 0x8b,
+ 0x00, 0xae, 0x71, 0x87, 0x00, 0xae, 0x63, 0x03, 0x13, 0x20, 0x91, 0x00,
+ 0xae, 0x59, 0x97, 0x00, 0xae, 0x50, 0x0a, 0xc3, 0x13, 0x24, 0x97, 0x00,
+ 0xb1, 0x11, 0x91, 0x00, 0xb1, 0x19, 0x87, 0x00, 0xb1, 0x23, 0x03, 0x13,
+ 0x3b, 0x8b, 0x00, 0xb1, 0x31, 0x83, 0x00, 0xb1, 0x38, 0xc8, 0xc1, 0x43,
+ 0x00, 0xb2, 0x38, 0x97, 0x00, 0xb2, 0x01, 0x91, 0x00, 0xb2, 0x09, 0x87,
+ 0x00, 0xb2, 0x13, 0x03, 0x13, 0x3f, 0x8b, 0x00, 0xb2, 0x21, 0x83, 0x00,
+ 0xb2, 0x28, 0x97, 0x00, 0xb2, 0x71, 0x91, 0x00, 0xb2, 0x79, 0x87, 0x00,
+ 0xb2, 0x83, 0x03, 0x13, 0x43, 0x8b, 0x00, 0xb2, 0x91, 0x83, 0x00, 0xb2,
+ 0x99, 0x8a, 0x00, 0xb2, 0xd2, 0x03, 0x13, 0x47, 0x83, 0x00, 0xc7, 0x38,
+ 0x91, 0x00, 0xc7, 0x30, 0x83, 0x00, 0xab, 0x40, 0x8e, 0x00, 0xa7, 0x5b,
+ 0x03, 0x13, 0x5e, 0x94, 0x00, 0xaa, 0x8b, 0x03, 0x13, 0x74, 0x16, 0xc3,
+ 0x13, 0x8a, 0xc4, 0xe6, 0xff, 0x00, 0xaa, 0xe1, 0x9b, 0x00, 0xaa, 0x03,
+ 0x03, 0x13, 0x94, 0x15, 0xc3, 0x13, 0x98, 0x92, 0x00, 0xa2, 0x53, 0x03,
+ 0x13, 0xa2, 0x42, 0xcb, 0xdb, 0xc3, 0x13, 0xa6, 0x19, 0xc3, 0x13, 0xbf,
+ 0x42, 0x00, 0xbc, 0xc3, 0x13, 0xd8, 0x8f, 0x00, 0xa5, 0xe3, 0x03, 0x13,
+ 0xf1, 0x42, 0x00, 0x6b, 0x43, 0x13, 0xf5, 0xc8, 0xbe, 0xcb, 0x00, 0xb3,
+ 0xf1, 0xc2, 0x01, 0x47, 0x00, 0xac, 0xfa, 0x03, 0x14, 0x00, 0xc9, 0xb3,
+ 0x33, 0x00, 0xc6, 0xf9, 0x0b, 0x43, 0x14, 0x14, 0xc9, 0xb5, 0x07, 0x00,
+ 0xc6, 0xf1, 0xd6, 0x2f, 0xad, 0x00, 0xa1, 0x40, 0x45, 0x02, 0x93, 0xc3,
+ 0x14, 0x20, 0xc7, 0x32, 0x61, 0x00, 0xa1, 0x50, 0x91, 0x00, 0xc6, 0x5b,
+ 0x03, 0x14, 0x2c, 0x8b, 0x00, 0xc6, 0x3a, 0x03, 0x14, 0x30, 0x96, 0x08,
+ 0x2a, 0xb0, 0x8d, 0x08, 0x2a, 0x80, 0x98, 0x05, 0x5d, 0xc1, 0x97, 0x05,
+ 0x5d, 0xb9, 0x91, 0x05, 0x5d, 0xb1, 0x8b, 0x05, 0x5d, 0xa9, 0x83, 0x05,
+ 0x5d, 0x99, 0x87, 0x05, 0x5d, 0xa0, 0x98, 0x05, 0x5d, 0x91, 0x83, 0x05,
+ 0x5d, 0x69, 0x87, 0x05, 0x5d, 0x71, 0x97, 0x05, 0x5d, 0x89, 0x8b, 0x05,
+ 0x5d, 0x79, 0x91, 0x05, 0x5d, 0x80, 0x8a, 0x05, 0x5c, 0x80, 0x8a, 0x00,
+ 0x9e, 0x00, 0x83, 0x00, 0x9e, 0xe9, 0x87, 0x00, 0x9e, 0xf1, 0x8b, 0x00,
+ 0x9e, 0xf9, 0x91, 0x00, 0x9f, 0x01, 0x97, 0x00, 0x9f, 0x09, 0x98, 0x00,
+ 0x9f, 0x10, 0x83, 0x00, 0x9f, 0x19, 0x87, 0x00, 0x9f, 0x21, 0x8b, 0x00,
+ 0x9f, 0x29, 0x91, 0x00, 0x9f, 0x31, 0x97, 0x00, 0x9f, 0x39, 0x98, 0x00,
+ 0x9f, 0x40, 0xcc, 0x87, 0x30, 0x00, 0x24, 0x21, 0xc5, 0xce, 0x31, 0x05,
+ 0x33, 0xe8, 0x00, 0x43, 0x14, 0x34, 0x88, 0x05, 0x34, 0xf1, 0x8e, 0x01,
+ 0x6f, 0x39, 0x8f, 0x01, 0x6f, 0x41, 0x90, 0x01, 0x6f, 0x49, 0x94, 0x01,
+ 0x6f, 0x61, 0x95, 0x01, 0x6f, 0x69, 0xc4, 0xe4, 0xeb, 0x01, 0x6f, 0xa8,
+ 0x48, 0xb9, 0xb3, 0xc3, 0x14, 0x40, 0x87, 0x01, 0x6f, 0xb8, 0xcb, 0x94,
+ 0x3b, 0x05, 0x33, 0xa0, 0x8b, 0x05, 0x33, 0xb1, 0xc3, 0x1a, 0xd0, 0x05,
+ 0x33, 0xc9, 0xc2, 0x00, 0x97, 0x01, 0x6f, 0xd1, 0x97, 0x01, 0x6f, 0xd8,
+ 0xc7, 0x8a, 0x59, 0x05, 0x33, 0xc0, 0xc8, 0x84, 0x4c, 0x05, 0x33, 0xd1,
+ 0x0a, 0x43, 0x14, 0x4e, 0xc4, 0x73, 0xe1, 0x05, 0x33, 0xd8, 0x87, 0x01,
+ 0x6f, 0x01, 0xc6, 0xcc, 0x54, 0x01, 0x6f, 0xf0, 0x87, 0x01, 0x6f, 0x19,
+ 0xc4, 0xdd, 0x83, 0x01, 0x6f, 0xc0, 0xc2, 0x00, 0x5b, 0x01, 0x6f, 0x21,
+ 0x87, 0x01, 0x6f, 0x28, 0x87, 0x01, 0x6f, 0x71, 0xc2, 0x00, 0xa7, 0x01,
+ 0x6f, 0x80, 0xc6, 0x01, 0x16, 0x00, 0x18, 0xa8, 0xc7, 0xcc, 0x53, 0x0f,
+ 0x01, 0x49, 0xc9, 0xac, 0x8e, 0x0f, 0x01, 0x38, 0x14, 0xc3, 0x14, 0x58,
0xc3, 0x00, 0x15, 0x0f, 0x00, 0x28, 0x83, 0x0f, 0x00, 0xf1, 0xc2, 0x00,
- 0xb3, 0x0f, 0x00, 0xd8, 0xc9, 0xb3, 0x03, 0x0e, 0x92, 0x21, 0x16, 0x43,
- 0x11, 0x4c, 0x47, 0x02, 0x90, 0xc3, 0x11, 0x58, 0x46, 0x06, 0x97, 0x43,
- 0x11, 0x74, 0x02, 0xc3, 0x11, 0x8e, 0x00, 0x43, 0x11, 0x9a, 0xc6, 0x01,
- 0x61, 0x00, 0x18, 0xb8, 0x45, 0x01, 0x0f, 0xc3, 0x11, 0xa9, 0x42, 0x00,
- 0xa4, 0xc3, 0x11, 0xb3, 0x4c, 0x1a, 0x3f, 0xc3, 0x11, 0xbf, 0xca, 0x9b,
- 0x33, 0x00, 0x18, 0xc8, 0xe0, 0x0b, 0xc7, 0x01, 0x07, 0x60, 0x44, 0x01,
- 0x10, 0xc3, 0x11, 0xcb, 0x45, 0x01, 0x94, 0x43, 0x11, 0xd5, 0xc5, 0x00,
- 0x95, 0x01, 0x07, 0x09, 0xc5, 0x01, 0x62, 0x00, 0x1a, 0x68, 0xcb, 0x90,
- 0x74, 0x01, 0x06, 0x81, 0x48, 0xb8, 0x35, 0x43, 0x11, 0xe1, 0xca, 0xa2,
- 0xb6, 0x00, 0xd6, 0x19, 0xca, 0x0b, 0x92, 0x00, 0xd6, 0x08, 0xcd, 0x45,
- 0xb6, 0x00, 0x19, 0xb1, 0xce, 0x2d, 0xbf, 0x00, 0x19, 0xc0, 0x46, 0x00,
- 0x6b, 0x43, 0x11, 0xed, 0x46, 0x00, 0x6b, 0x43, 0x11, 0xf9, 0xcf, 0x66,
- 0xaa, 0x00, 0xef, 0x91, 0xc4, 0xd9, 0x9f, 0x00, 0xef, 0x39, 0x98, 0x00,
- 0xee, 0xb1, 0x91, 0x00, 0xee, 0xa9, 0x87, 0x00, 0xee, 0xa0, 0xc6, 0x01,
- 0x61, 0x00, 0xd5, 0xf8, 0xc2, 0x00, 0xe5, 0x00, 0xef, 0x79, 0xc2, 0x00,
- 0x6e, 0x00, 0xee, 0xc8, 0xd9, 0x20, 0xfe, 0x00, 0xef, 0x61, 0xc5, 0xc0,
- 0x96, 0x00, 0xef, 0x28, 0xd5, 0x34, 0x24, 0x00, 0xee, 0x98, 0xc4, 0x73,
- 0xed, 0x00, 0xef, 0x50, 0xc3, 0x05, 0xe7, 0x00, 0xef, 0x48, 0x00, 0x43,
- 0x12, 0x05, 0x47, 0x6a, 0x62, 0x43, 0x12, 0x11, 0xc8, 0xbf, 0x3d, 0x00,
- 0xee, 0xb8, 0xcd, 0x77, 0x16, 0x00, 0xd6, 0x00, 0xc6, 0x01, 0x61, 0x07,
- 0xf1, 0x38, 0xc6, 0x01, 0x61, 0x07, 0xf1, 0x40, 0x49, 0x03, 0x3b, 0xc3,
- 0x12, 0x21, 0xce, 0x1f, 0x15, 0x00, 0x1b, 0x0b, 0x03, 0x12, 0x2d, 0xd0,
- 0x30, 0x93, 0x00, 0xee, 0x69, 0x12, 0xc3, 0x12, 0x33, 0x11, 0xc3, 0x12,
- 0x3f, 0xcc, 0x84, 0x40, 0x00, 0x18, 0x59, 0xcc, 0x1f, 0x94, 0x00, 0x18,
- 0x79, 0xc8, 0x80, 0x80, 0x00, 0x19, 0x99, 0x42, 0x00, 0x5b, 0xc3, 0x12,
- 0x4b, 0xc5, 0x1f, 0x0a, 0x00, 0x1a, 0xeb, 0x03, 0x12, 0x57, 0xc6, 0x60,
- 0xe6, 0x00, 0x1a, 0xf3, 0x03, 0x12, 0x5d, 0xc5, 0x21, 0x12, 0x00, 0x1b,
- 0x02, 0x03, 0x12, 0x63, 0xc5, 0x01, 0x62, 0x00, 0x19, 0x1b, 0x03, 0x12,
- 0x67, 0xc5, 0x00, 0x95, 0x00, 0x18, 0x3a, 0x03, 0x12, 0x6d, 0xcc, 0x84,
- 0x40, 0x00, 0xee, 0x09, 0xcc, 0x1f, 0x94, 0x00, 0xee, 0x00, 0xc2, 0x03,
- 0x32, 0x05, 0x47, 0x81, 0xc2, 0x02, 0x59, 0x05, 0x47, 0x79, 0xc2, 0x03,
- 0xa4, 0x05, 0x47, 0x71, 0xc2, 0x00, 0xc1, 0x05, 0x47, 0x69, 0xc2, 0x04,
- 0x2b, 0x05, 0x47, 0x61, 0x97, 0x05, 0x47, 0x59, 0x83, 0x05, 0x47, 0x50,
- 0xc4, 0x22, 0x71, 0x05, 0x47, 0x49, 0xc5, 0x01, 0xdb, 0x05, 0x47, 0x41,
- 0x15, 0xc3, 0x12, 0x71, 0x08, 0xc3, 0x12, 0x7d, 0x16, 0xc3, 0x12, 0x89,
- 0xc3, 0x01, 0xb4, 0x05, 0x47, 0x09, 0xc4, 0x15, 0xd3, 0x05, 0x47, 0x00,
- 0xc5, 0x00, 0x95, 0x00, 0xd6, 0x39, 0xc5, 0x01, 0x62, 0x00, 0x19, 0x58,
- 0xc3, 0x0f, 0xc0, 0x00, 0x18, 0x8b, 0x03, 0x12, 0x95, 0xca, 0x32, 0x37,
- 0x00, 0x19, 0xd0, 0xc6, 0x01, 0x61, 0x07, 0xf1, 0x58, 0xc6, 0x01, 0x61,
- 0x07, 0xf1, 0x60, 0xc2, 0x01, 0xdb, 0x00, 0x1f, 0x39, 0x8b, 0x01, 0x65,
- 0x68, 0xc3, 0x07, 0x4a, 0x00, 0x1f, 0x69, 0xc2, 0x01, 0xdb, 0x00, 0x1f,
- 0x18, 0xc4, 0x08, 0x1a, 0x01, 0x65, 0x99, 0xc4, 0xcd, 0x5e, 0x01, 0x65,
- 0xc9, 0xc2, 0x04, 0x6e, 0x01, 0x65, 0xd9, 0xc4, 0x00, 0xf6, 0x01, 0x66,
- 0x58, 0x47, 0xc5, 0xe0, 0xc3, 0x12, 0x9b, 0x47, 0x92, 0x25, 0x43, 0x12,
- 0xc3, 0xc3, 0xe6, 0x67, 0x01, 0x65, 0xb9, 0xc2, 0x04, 0x6e, 0x01, 0x65,
- 0xe9, 0xc4, 0xa0, 0x00, 0x01, 0x67, 0x61, 0xc6, 0xcf, 0xa9, 0x01, 0x67,
- 0x70, 0xc3, 0x07, 0x4a, 0x00, 0x1f, 0x61, 0xc2, 0x01, 0xdb, 0x00, 0x1f,
- 0x10, 0xc4, 0x08, 0x1a, 0x01, 0x65, 0x91, 0xc4, 0xcd, 0x5e, 0x01, 0x65,
- 0xc1, 0xc2, 0x04, 0x6e, 0x01, 0x65, 0xd1, 0xc4, 0x00, 0xf6, 0x01, 0x66,
- 0x50, 0x8b, 0x01, 0x65, 0x61, 0xc2, 0x01, 0xdb, 0x00, 0x1f, 0x30, 0x47,
- 0xc5, 0xe0, 0xc3, 0x12, 0xd3, 0x47, 0x92, 0x25, 0x43, 0x12, 0xfb, 0xc3,
- 0xe6, 0x67, 0x01, 0x65, 0xb1, 0xc2, 0x04, 0x6e, 0x01, 0x65, 0xe1, 0xc4,
- 0xa0, 0x00, 0x01, 0x67, 0x59, 0xc6, 0xcf, 0xa9, 0x01, 0x67, 0x68, 0xc4,
- 0x18, 0x85, 0x08, 0x17, 0x59, 0xc9, 0x18, 0x78, 0x08, 0x17, 0xa0, 0xc4,
- 0x0c, 0x55, 0x08, 0x17, 0x61, 0xcb, 0x13, 0xe3, 0x08, 0x17, 0xa8, 0xc3,
- 0x0c, 0x56, 0x08, 0x17, 0x69, 0xca, 0xa2, 0xac, 0x08, 0x17, 0xb0, 0xc3,
- 0x43, 0xcd, 0x08, 0x17, 0x71, 0xca, 0x37, 0x4d, 0x08, 0x17, 0xb8, 0xc2,
- 0x0c, 0x57, 0x08, 0x17, 0x79, 0xc8, 0x0c, 0x4a, 0x08, 0x17, 0xc0, 0xc8,
- 0x0c, 0x4a, 0x08, 0x17, 0xc9, 0xc2, 0x0c, 0x57, 0x08, 0x17, 0x80, 0xd9,
- 0x20, 0x4f, 0x0f, 0xa8, 0x10, 0xc7, 0xc8, 0x1e, 0x0f, 0xab, 0x39, 0xc7,
- 0xcb, 0xb3, 0x0f, 0xaa, 0xd8, 0xc7, 0xc8, 0x1e, 0x0f, 0xaa, 0xe9, 0xc7,
- 0xcb, 0xb3, 0x0f, 0xaa, 0x88, 0xc6, 0xd0, 0xf9, 0x0f, 0xc8, 0x13, 0x03,
- 0x13, 0x0b, 0xc6, 0xcf, 0x19, 0x0f, 0xaa, 0x00, 0xc5, 0x92, 0x32, 0x01,
- 0x93, 0x03, 0x03, 0x13, 0x11, 0xc6, 0xc0, 0x37, 0x01, 0x93, 0x52, 0x03,
- 0x13, 0x17, 0xc2, 0x00, 0x56, 0x01, 0x93, 0x78, 0xc5, 0xc8, 0x2e, 0x01,
- 0x93, 0x13, 0x03, 0x13, 0x1d, 0xc6, 0xc6, 0xf2, 0x01, 0x93, 0x5a, 0x03,
- 0x13, 0x23, 0xc2, 0x00, 0x56, 0x01, 0x93, 0x88, 0xc2, 0x00, 0x56, 0x01,
- 0x93, 0x90, 0xc4, 0x7a, 0x93, 0x01, 0x93, 0x2b, 0x03, 0x13, 0x29, 0xc6,
- 0xc1, 0x07, 0x01, 0x93, 0x62, 0x03, 0x13, 0x2f, 0xc2, 0x00, 0x56, 0x01,
- 0x93, 0xa0, 0x00, 0x43, 0x13, 0x35, 0xc4, 0xc7, 0x2b, 0x01, 0x93, 0x43,
- 0x03, 0x13, 0x3d, 0xc6, 0xc7, 0x2a, 0x01, 0x93, 0x4a, 0x03, 0x13, 0x43,
- 0xc2, 0x00, 0x56, 0x01, 0x93, 0xd8, 0xc4, 0x15, 0xd3, 0x01, 0x27, 0x51,
- 0xc4, 0x22, 0x71, 0x01, 0x23, 0x41, 0xc5, 0x01, 0xdb, 0x01, 0x23, 0x39,
- 0x15, 0xc3, 0x13, 0x49, 0x08, 0xc3, 0x13, 0x55, 0x16, 0xc3, 0x13, 0x61,
- 0xc3, 0x01, 0xb4, 0x01, 0x23, 0x00, 0xc4, 0x00, 0xe3, 0x01, 0x14, 0xc1,
- 0xc3, 0x00, 0xcc, 0x01, 0x51, 0xc0, 0xe0, 0x00, 0xc7, 0x0f, 0x88, 0x78,
- 0x9c, 0x01, 0x27, 0x49, 0x9b, 0x01, 0x27, 0x41, 0x9a, 0x01, 0x27, 0x39,
- 0x99, 0x01, 0x27, 0x31, 0x98, 0x01, 0x27, 0x29, 0x97, 0x01, 0x27, 0x21,
- 0x96, 0x01, 0x27, 0x19, 0x95, 0x01, 0x27, 0x11, 0x94, 0x01, 0x27, 0x09,
- 0x93, 0x01, 0x27, 0x01, 0x92, 0x01, 0x26, 0xf9, 0x91, 0x01, 0x26, 0xf1,
- 0x90, 0x01, 0x26, 0xe9, 0x8f, 0x01, 0x26, 0xe1, 0x8e, 0x01, 0x26, 0xd9,
- 0x8d, 0x01, 0x26, 0xd1, 0x8c, 0x01, 0x26, 0xc9, 0x8b, 0x01, 0x26, 0xc1,
- 0x8a, 0x01, 0x26, 0xb9, 0x89, 0x01, 0x26, 0xb1, 0x88, 0x01, 0x26, 0xa9,
- 0x87, 0x01, 0x26, 0xa1, 0x86, 0x01, 0x26, 0x99, 0x85, 0x01, 0x26, 0x91,
- 0x84, 0x01, 0x26, 0x89, 0x83, 0x01, 0x26, 0x80, 0x9c, 0x01, 0x26, 0x79,
- 0x9b, 0x01, 0x26, 0x71, 0x9a, 0x01, 0x26, 0x69, 0x99, 0x01, 0x26, 0x61,
- 0x98, 0x01, 0x26, 0x59, 0x97, 0x01, 0x26, 0x51, 0x96, 0x01, 0x26, 0x49,
- 0x95, 0x01, 0x26, 0x41, 0x94, 0x01, 0x26, 0x39, 0x93, 0x01, 0x26, 0x31,
- 0x92, 0x01, 0x26, 0x29, 0x91, 0x01, 0x26, 0x21, 0x90, 0x01, 0x26, 0x19,
- 0x8f, 0x01, 0x26, 0x11, 0x8e, 0x01, 0x26, 0x09, 0x8d, 0x01, 0x26, 0x01,
- 0x8c, 0x01, 0x25, 0xf9, 0x8b, 0x01, 0x25, 0xf1, 0x8a, 0x01, 0x25, 0xe9,
- 0x89, 0x01, 0x25, 0xe1, 0x88, 0x01, 0x25, 0xd9, 0x87, 0x01, 0x25, 0xd1,
- 0x86, 0x01, 0x25, 0xc9, 0x85, 0x01, 0x25, 0xc1, 0x84, 0x01, 0x25, 0xb9,
- 0x83, 0x01, 0x25, 0xb0, 0xc3, 0x18, 0x86, 0x01, 0x23, 0x9b, 0x03, 0x13,
- 0x6d, 0xc3, 0x21, 0x32, 0x01, 0x23, 0x58, 0xc3, 0x00, 0x4c, 0x01, 0x23,
- 0x61, 0x9b, 0x01, 0x92, 0xd2, 0x03, 0x13, 0x71, 0xd0, 0x54, 0xc5, 0x01,
- 0x92, 0x40, 0xc3, 0x00, 0x4c, 0x01, 0x23, 0x89, 0xd1, 0x54, 0xc4, 0x01,
- 0x92, 0x78, 0xc3, 0x00, 0x4c, 0x01, 0x23, 0x81, 0xd1, 0x54, 0xc4, 0x01,
- 0x92, 0x70, 0xc3, 0x00, 0x4c, 0x01, 0x23, 0x79, 0xd1, 0x54, 0xc4, 0x01,
- 0x92, 0x68, 0xc3, 0x00, 0x4c, 0x01, 0x23, 0x71, 0x9b, 0x01, 0x95, 0xfa,
- 0x03, 0x13, 0x75, 0xc6, 0x56, 0x08, 0x01, 0x23, 0x69, 0xc3, 0x0c, 0x56,
- 0x01, 0x95, 0xaa, 0x03, 0x13, 0x7b, 0xc5, 0xd5, 0x9d, 0x0f, 0x92, 0x89,
- 0xc8, 0xbd, 0xb5, 0x0f, 0x92, 0x81, 0xc8, 0xb8, 0xd5, 0x01, 0x94, 0xf9,
- 0xc7, 0xb9, 0x6e, 0x01, 0x95, 0x78, 0xcb, 0x92, 0x00, 0x01, 0x92, 0x29,
- 0xc3, 0x44, 0x6b, 0x01, 0x92, 0x38, 0xc5, 0xd8, 0xf9, 0x01, 0x92, 0x31,
- 0xc2, 0x26, 0x51, 0x01, 0x94, 0x29, 0x07, 0xc3, 0x13, 0x7f, 0x17, 0xc3,
- 0x13, 0x8b, 0x16, 0xc3, 0x13, 0x9b, 0xc6, 0xcc, 0xc7, 0x01, 0x94, 0x99,
- 0xc6, 0xcc, 0x0d, 0x01, 0x94, 0xa8, 0xc2, 0x01, 0x47, 0x01, 0x94, 0x09,
- 0xc4, 0x04, 0x5e, 0x01, 0x94, 0x11, 0xc2, 0x00, 0x4d, 0x01, 0x94, 0x48,
- 0xc3, 0x06, 0x9e, 0x01, 0x94, 0x19, 0x0b, 0xc3, 0x13, 0xa7, 0xc5, 0x1c,
- 0xe4, 0x01, 0x94, 0xd8, 0xc4, 0x01, 0xdc, 0x01, 0x94, 0x39, 0xc4, 0x65,
- 0xf2, 0x01, 0x94, 0x79, 0xc8, 0xb6, 0x9d, 0x01, 0x94, 0xe9, 0xc9, 0xad,
- 0xd8, 0x01, 0x95, 0x68, 0x0b, 0xc3, 0x13, 0xb9, 0xc3, 0x00, 0xc2, 0x01,
- 0x94, 0xa0, 0xc3, 0x02, 0xd4, 0x01, 0x94, 0x51, 0x07, 0xc3, 0x13, 0xc5,
- 0xc3, 0x08, 0x1a, 0x01, 0x94, 0xd0, 0xc4, 0x00, 0x9e, 0x01, 0x94, 0x61,
- 0xc3, 0x28, 0x7f, 0x01, 0x94, 0x68, 0xc3, 0x07, 0x8d, 0x01, 0x94, 0x91,
- 0xc3, 0x01, 0xdc, 0x01, 0x95, 0x20, 0x11, 0xc3, 0x13, 0xd1, 0xc5, 0x02,
- 0x22, 0x01, 0x95, 0x28, 0xc4, 0xda, 0xad, 0x01, 0x94, 0xc1, 0xc2, 0x01,
- 0x12, 0x01, 0x95, 0x31, 0xc3, 0x01, 0x1f, 0x01, 0x95, 0x38, 0x07, 0xc3,
- 0x13, 0xe3, 0xc4, 0x01, 0xdc, 0x01, 0x95, 0x40, 0x83, 0x01, 0x96, 0xa9,
- 0x8b, 0x01, 0x96, 0xb1, 0x97, 0x01, 0x96, 0xb9, 0x87, 0x01, 0x96, 0xc1,
- 0x91, 0x01, 0x96, 0xc8, 0x83, 0x01, 0x96, 0xd1, 0x8b, 0x01, 0x96, 0xd9,
- 0x97, 0x01, 0x96, 0xe1, 0x87, 0x01, 0x96, 0xe9, 0x91, 0x01, 0x96, 0xf0,
- 0x83, 0x01, 0x96, 0xf9, 0x8b, 0x01, 0x97, 0x01, 0x97, 0x01, 0x97, 0x09,
- 0x87, 0x01, 0x97, 0x11, 0x91, 0x01, 0x97, 0x18, 0x83, 0x01, 0x97, 0x21,
- 0x8b, 0x01, 0x97, 0x29, 0x97, 0x01, 0x97, 0x31, 0x87, 0x01, 0x97, 0x39,
- 0x91, 0x01, 0x97, 0x40, 0x83, 0x01, 0x97, 0x49, 0x8b, 0x01, 0x97, 0x51,
- 0x97, 0x01, 0x97, 0x59, 0x87, 0x01, 0x97, 0x61, 0x91, 0x01, 0x97, 0x68,
- 0x83, 0x01, 0x97, 0x71, 0x8b, 0x01, 0x97, 0x79, 0x97, 0x01, 0x97, 0x81,
- 0x87, 0x01, 0x97, 0x89, 0x91, 0x01, 0x97, 0x90, 0x83, 0x01, 0x97, 0x99,
- 0x97, 0x01, 0x97, 0xa1, 0x91, 0x01, 0x97, 0xa8, 0x83, 0x01, 0x97, 0xb1,
- 0x8b, 0x01, 0x97, 0xb9, 0x97, 0x01, 0x97, 0xc1, 0x87, 0x01, 0x97, 0xc9,
- 0x91, 0x01, 0x97, 0xd0, 0x83, 0x01, 0x97, 0xd9, 0x8b, 0x01, 0x97, 0xe1,
- 0x87, 0x01, 0x97, 0xe9, 0x91, 0x01, 0x97, 0xf0, 0xcf, 0x61, 0x37, 0x09,
- 0x2a, 0x19, 0x83, 0x09, 0x1b, 0x60, 0x0e, 0xc3, 0x13, 0xed, 0x06, 0xc3,
- 0x13, 0xf7, 0x17, 0xc3, 0x14, 0x03, 0xc2, 0x00, 0x16, 0x09, 0x1a, 0x59,
- 0x15, 0xc3, 0x14, 0x13, 0xc2, 0x03, 0xa4, 0x09, 0x1a, 0x41, 0xc3, 0x0f,
- 0x42, 0x09, 0x1a, 0x39, 0xc2, 0x08, 0x12, 0x09, 0x1a, 0x29, 0x0b, 0xc3,
- 0x14, 0x1f, 0xc2, 0x00, 0xa4, 0x09, 0x1a, 0x09, 0x09, 0xc3, 0x14, 0x2f,
- 0xc3, 0x00, 0xe8, 0x09, 0x19, 0xd1, 0x83, 0x09, 0x19, 0xc2, 0x03, 0x14,
- 0x3a, 0xc8, 0x01, 0x2c, 0x09, 0x1a, 0x80, 0x46, 0x01, 0x2d, 0xc3, 0x14,
- 0x40, 0xc8, 0x1e, 0x8d, 0x09, 0x29, 0xe0, 0xc8, 0x4f, 0xb0, 0x09, 0x18,
- 0xf8, 0xc2, 0x03, 0xa4, 0x09, 0x19, 0x29, 0xc6, 0x47, 0x40, 0x09, 0x19,
- 0x20, 0x94, 0x09, 0x1a, 0xa0, 0xca, 0x90, 0xa1, 0x09, 0x18, 0xd8, 0xcf,
- 0x64, 0xf7, 0x09, 0x18, 0xbb, 0x03, 0x14, 0x54, 0xc2, 0x00, 0x50, 0x09,
- 0x18, 0xb1, 0xc3, 0x61, 0x9a, 0x09, 0x18, 0xa8, 0xca, 0x61, 0x37, 0x09,
- 0x29, 0xd9, 0xc9, 0x5b, 0xd9, 0x09, 0x29, 0xd0, 0xc2, 0x04, 0xdd, 0x09,
- 0x17, 0xc9, 0xc4, 0x03, 0x90, 0x09, 0x17, 0xc1, 0x42, 0x00, 0xe5, 0xc3,
- 0x14, 0x5a, 0xc3, 0x72, 0x28, 0x09, 0x17, 0xa9, 0xc2, 0x02, 0x8c, 0x09,
- 0x17, 0xa0, 0xc7, 0x07, 0x09, 0x09, 0x17, 0x91, 0x42, 0x00, 0x7a, 0x43,
- 0x14, 0x62, 0xc2, 0x00, 0x50, 0x09, 0x17, 0x71, 0xc2, 0x00, 0x0a, 0x09,
- 0x17, 0x68, 0xc8, 0xc0, 0xad, 0x09, 0x18, 0x1b, 0x03, 0x14, 0x68, 0xca,
- 0x3d, 0x4b, 0x09, 0x18, 0x10, 0xcf, 0x68, 0x5d, 0x09, 0x16, 0xf8, 0x46,
- 0x26, 0x11, 0x43, 0x14, 0x6e, 0x45, 0x26, 0x12, 0xc3, 0x14, 0x7a, 0xc8,
- 0xc0, 0xc5, 0x09, 0x29, 0x93, 0x03, 0x14, 0x8c, 0xc2, 0x08, 0x07, 0x09,
- 0x15, 0xd8, 0xc3, 0x0f, 0x69, 0x09, 0x16, 0x11, 0x9f, 0x09, 0x16, 0x08,
- 0xc5, 0x59, 0x54, 0x09, 0x29, 0x88, 0x47, 0x01, 0x2c, 0x43, 0x14, 0x90,
- 0x00, 0x43, 0x14, 0xb9, 0x47, 0x01, 0x2c, 0x43, 0x14, 0xc5, 0x47, 0x01,
- 0x2c, 0x43, 0x14, 0xfa, 0x46, 0x01, 0x2d, 0xc3, 0x15, 0x04, 0xc4, 0x39,
- 0x41, 0x09, 0x15, 0x43, 0x03, 0x15, 0x47, 0xc8, 0xc0, 0xbd, 0x09, 0x15,
- 0x39, 0xc7, 0xb9, 0x2e, 0x09, 0x14, 0xa0, 0x47, 0x01, 0x2c, 0x43, 0x15,
- 0x4d, 0xd0, 0x60, 0x52, 0x09, 0x12, 0x89, 0xc7, 0x5b, 0xdb, 0x09, 0x12,
- 0x80, 0xd6, 0x29, 0x80, 0x09, 0x1c, 0x99, 0xd6, 0x31, 0xab, 0x09, 0x16,
- 0xa9, 0xc4, 0x59, 0x55, 0x09, 0x16, 0xa0, 0x00, 0x43, 0x15, 0x91, 0xcc,
- 0x84, 0x10, 0x09, 0x13, 0x5b, 0x03, 0x15, 0xa0, 0xc8, 0x1f, 0xd3, 0x09,
- 0x13, 0x51, 0xc4, 0x59, 0x55, 0x09, 0x13, 0x49, 0x4c, 0x1f, 0xdc, 0x43,
- 0x15, 0xa6, 0xcd, 0x80, 0x6e, 0x09, 0x12, 0x19, 0xce, 0x6d, 0x1b, 0x09,
- 0x12, 0x11, 0xc8, 0x1e, 0x8d, 0x09, 0x12, 0x08, 0xc2, 0x04, 0xdd, 0x09,
- 0x12, 0x51, 0x83, 0x09, 0x12, 0x48, 0xc9, 0xab, 0xb3, 0x09, 0x11, 0xb3,
- 0x03, 0x15, 0xc1, 0xcd, 0x79, 0x04, 0x09, 0x11, 0xc1, 0x46, 0x01, 0x2d,
- 0x43, 0x15, 0xc7, 0x00, 0x43, 0x15, 0xd7, 0x16, 0xc3, 0x15, 0xe3, 0xce,
- 0x72, 0x5b, 0x09, 0x28, 0xc9, 0x15, 0xc3, 0x15, 0xef, 0xcc, 0x89, 0xa4,
- 0x09, 0x10, 0x99, 0xcc, 0x84, 0x1c, 0x09, 0x10, 0x90, 0xcd, 0x1c, 0x4e,
- 0x09, 0x10, 0xf8, 0xc7, 0x6e, 0x09, 0x09, 0x10, 0xd1, 0x11, 0x43, 0x15,
- 0xfe, 0xc2, 0xe8, 0x08, 0x09, 0x28, 0xc1, 0xc2, 0xae, 0x32, 0x09, 0x28,
- 0xb8, 0xc2, 0xe8, 0x00, 0x09, 0x28, 0x6b, 0x03, 0x16, 0x0a, 0xc2, 0xe7,
- 0xf0, 0x09, 0x28, 0x61, 0xc2, 0xe7, 0xfc, 0x09, 0x28, 0x0b, 0x03, 0x16,
- 0x10, 0xc2, 0x74, 0xdc, 0x09, 0x28, 0x00, 0x26, 0xc3, 0x16, 0x16, 0xc2,
- 0xe7, 0xf6, 0x09, 0x27, 0xd1, 0xc2, 0xe6, 0x7a, 0x09, 0x27, 0xc9, 0x22,
- 0xc3, 0x16, 0x26, 0x21, 0x43, 0x16, 0x2e, 0xc2, 0xe8, 0x1c, 0x09, 0x27,
- 0x79, 0x25, 0xc3, 0x16, 0x39, 0x21, 0x43, 0x16, 0x41, 0x23, 0xc3, 0x16,
- 0x4d, 0xc2, 0xe7, 0xf2, 0x09, 0x27, 0x39, 0x1f, 0xc3, 0x16, 0x55, 0x1e,
- 0x43, 0x16, 0x61, 0xc2, 0xe6, 0x7d, 0x09, 0x27, 0x09, 0xc2, 0xe7, 0xcf,
- 0x09, 0x27, 0x00, 0xc2, 0xe7, 0xf9, 0x09, 0x26, 0xf9, 0x25, 0xc3, 0x16,
- 0x69, 0xd4, 0x39, 0x31, 0x09, 0x26, 0xe1, 0xc2, 0xe7, 0x1b, 0x09, 0x26,
- 0xd9, 0x22, 0xc3, 0x16, 0x73, 0xc2, 0xe7, 0xcf, 0x09, 0x26, 0xc1, 0x1f,
- 0xc3, 0x16, 0x7b, 0xc2, 0xe7, 0xb0, 0x09, 0x26, 0xa8, 0x00, 0x43, 0x16,
- 0x83, 0x00, 0x43, 0x16, 0x8f, 0xc8, 0xc0, 0xe5, 0x09, 0x0f, 0xb0, 0x94,
- 0x09, 0x26, 0x9b, 0x03, 0x16, 0xa1, 0xc4, 0xd7, 0xdd, 0x09, 0x26, 0x91,
- 0xc2, 0x00, 0xe5, 0x09, 0x0c, 0x59, 0xcc, 0x8a, 0x1c, 0x09, 0x0c, 0x51,
- 0x86, 0x09, 0x0c, 0x49, 0x9f, 0x09, 0x0c, 0x40, 0x83, 0x09, 0x26, 0x8b,
- 0x03, 0x16, 0xa5, 0x8b, 0x09, 0x0b, 0x82, 0x03, 0x16, 0xa9, 0x97, 0x09,
- 0x26, 0x81, 0x8b, 0x09, 0x0a, 0xf9, 0x03, 0x43, 0x16, 0xad, 0x97, 0x09,
- 0x1c, 0x31, 0xc2, 0x00, 0xbb, 0x09, 0x0c, 0x30, 0x0a, 0xc3, 0x16, 0xbb,
- 0xc4, 0xdf, 0xf3, 0x09, 0x0c, 0x29, 0xc2, 0x00, 0xcc, 0x09, 0x0c, 0x21,
- 0x83, 0x09, 0x0b, 0xf2, 0x03, 0x16, 0xd0, 0x83, 0x09, 0x1c, 0x21, 0x8b,
- 0x09, 0x0b, 0xe0, 0x97, 0x09, 0x0b, 0x9b, 0x03, 0x16, 0xd4, 0x8b, 0x09,
- 0x0b, 0x90, 0x97, 0x09, 0x0b, 0x5b, 0x03, 0x16, 0xd8, 0x8b, 0x09, 0x0b,
- 0x3b, 0x03, 0x16, 0xe2, 0x83, 0x09, 0x0b, 0x12, 0x03, 0x16, 0xf1, 0x42,
- 0x00, 0xe5, 0xc3, 0x17, 0x02, 0xc4, 0xe5, 0x87, 0x09, 0x1b, 0xf1, 0x86,
- 0x09, 0x0a, 0xca, 0x03, 0x17, 0x0a, 0xc2, 0x01, 0xbd, 0x09, 0x0b, 0xd9,
- 0x87, 0x09, 0x0b, 0xd0, 0x8b, 0x09, 0x0b, 0xc3, 0x03, 0x17, 0x10, 0x87,
- 0x09, 0x0b, 0xa2, 0x03, 0x17, 0x16, 0x8f, 0x09, 0x0b, 0x71, 0xc2, 0x04,
- 0xcb, 0x09, 0x0b, 0x68, 0xc3, 0x0b, 0xee, 0x09, 0x0b, 0x09, 0xc4, 0xa1,
- 0xea, 0x09, 0x0b, 0x00, 0x4c, 0x8a, 0xd0, 0xc3, 0x17, 0x1c, 0xe0, 0x01,
- 0x27, 0x09, 0x0c, 0xe8, 0xcc, 0x84, 0x28, 0x09, 0x0c, 0xc9, 0xc9, 0x90,
- 0xa2, 0x09, 0x0c, 0xc0, 0xca, 0x9c, 0xa8, 0x09, 0x0c, 0xa0, 0xcc, 0x84,
- 0x4c, 0x09, 0x0d, 0x48, 0x86, 0x09, 0x0d, 0x18, 0xd2, 0x0b, 0xf4, 0x09,
- 0x26, 0x79, 0x9f, 0x09, 0x09, 0x78, 0xc5, 0x39, 0x40, 0x09, 0x26, 0x70,
- 0xc2, 0x04, 0xdd, 0x09, 0x09, 0xe9, 0xc4, 0x8c, 0xfc, 0x09, 0x09, 0xe1,
- 0xc6, 0x47, 0x40, 0x09, 0x09, 0xd9, 0xc3, 0x03, 0x2b, 0x09, 0x09, 0xd1,
- 0xc2, 0x00, 0x57, 0x09, 0x09, 0xc8, 0xd4, 0x3d, 0x41, 0x09, 0x26, 0x69,
- 0xce, 0x72, 0x23, 0x09, 0x09, 0x09, 0x46, 0x01, 0x2d, 0x43, 0x17, 0x22,
- 0x46, 0x01, 0x2d, 0xc3, 0x17, 0x2e, 0xc4, 0x39, 0x41, 0x09, 0x08, 0xe8,
- 0xc2, 0x00, 0xe5, 0x09, 0x09, 0x41, 0x90, 0x09, 0x09, 0x38, 0x00, 0x43,
- 0x17, 0x49, 0x47, 0x01, 0x2c, 0x43, 0x17, 0x53, 0xc5, 0x39, 0x40, 0x09,
- 0x08, 0x48, 0xcc, 0x84, 0x34, 0x09, 0x08, 0x31, 0xc8, 0xbd, 0x55, 0x09,
- 0x08, 0x28, 0x97, 0x09, 0x08, 0x11, 0x87, 0x09, 0x08, 0x08, 0x97, 0x09,
- 0x26, 0x51, 0xc3, 0x54, 0x8e, 0x09, 0x07, 0xf8, 0xd6, 0x29, 0x80, 0x09,
- 0x26, 0x49, 0xcd, 0x79, 0xfb, 0x09, 0x07, 0x78, 0x46, 0x01, 0x2d, 0xc3,
- 0x17, 0x71, 0xc8, 0xbd, 0x35, 0x09, 0x07, 0x68, 0x00, 0x43, 0x17, 0xba,
- 0x15, 0xc3, 0x17, 0xcc, 0xc3, 0x72, 0x28, 0x09, 0x1b, 0xb9, 0x17, 0xc3,
- 0x17, 0xd6, 0x0e, 0xc3, 0x17, 0xde, 0x0d, 0xc3, 0x17, 0xed, 0xc8, 0x66,
- 0xc0, 0x09, 0x05, 0x59, 0xc2, 0x00, 0xa4, 0x09, 0x05, 0x4b, 0x03, 0x17,
- 0xfc, 0xc9, 0x74, 0x6f, 0x09, 0x05, 0x3b, 0x03, 0x18, 0x02, 0xc3, 0x61,
- 0x9a, 0x09, 0x05, 0x31, 0x83, 0x09, 0x05, 0x12, 0x03, 0x18, 0x08, 0xc2,
- 0x08, 0x22, 0x09, 0x25, 0xa1, 0xc2, 0x00, 0xc3, 0x09, 0x25, 0x93, 0x03,
- 0x18, 0x15, 0xc2, 0x00, 0xc7, 0x09, 0x25, 0x83, 0x03, 0x18, 0x19, 0xc8,
- 0x66, 0xc0, 0x09, 0x25, 0x79, 0xc2, 0x00, 0x0a, 0x09, 0x25, 0x71, 0xc3,
- 0x00, 0xe4, 0x09, 0x25, 0x68, 0xc2, 0x01, 0xf0, 0x09, 0x04, 0x91, 0xc2,
- 0x00, 0x92, 0x09, 0x04, 0x88, 0xc2, 0x00, 0xc3, 0x09, 0x04, 0xd1, 0xc4,
- 0x5b, 0xd9, 0x09, 0x04, 0xc2, 0x03, 0x18, 0x1d, 0x15, 0xc3, 0x18, 0x23,
- 0xc2, 0x07, 0x19, 0x09, 0x25, 0x31, 0xc2, 0x04, 0x6e, 0x09, 0x25, 0x29,
- 0x0f, 0xc3, 0x18, 0x2f, 0x0e, 0xc3, 0x18, 0x3f, 0x0d, 0xc3, 0x18, 0x49,
- 0xc8, 0x66, 0xc0, 0x09, 0x24, 0xc9, 0x0a, 0xc3, 0x18, 0x53, 0x09, 0xc3,
- 0x18, 0x5b, 0xc5, 0xa1, 0xe9, 0x09, 0x24, 0x91, 0x06, 0xc3, 0x18, 0x66,
- 0x03, 0x43, 0x18, 0x72, 0xc3, 0x0a, 0x91, 0x09, 0x1b, 0xb1, 0xc4, 0x72,
- 0x9d, 0x09, 0x03, 0xf8, 0xc5, 0x39, 0x40, 0x09, 0x04, 0x32, 0x03, 0x18,
- 0x81, 0xc9, 0xaa, 0x54, 0x09, 0x24, 0x60, 0xc5, 0xd7, 0xdc, 0x09, 0x24,
- 0x59, 0xc3, 0x04, 0xca, 0x09, 0x24, 0x51, 0xc3, 0x0a, 0x91, 0x09, 0x03,
- 0xa8, 0xc9, 0x54, 0x88, 0x09, 0x24, 0x49, 0x4d, 0x6b, 0x5a, 0x43, 0x18,
- 0x87, 0xa1, 0x09, 0x03, 0x89, 0xa0, 0x09, 0x03, 0x80, 0xc9, 0xab, 0x11,
- 0x09, 0x24, 0x39, 0xc2, 0x01, 0xbd, 0x09, 0x02, 0x79, 0xc2, 0x00, 0x03,
- 0x09, 0x02, 0x70, 0xc2, 0x02, 0xb4, 0x09, 0x24, 0x31, 0xc2, 0x04, 0x6e,
- 0x09, 0x24, 0x29, 0xc3, 0xa2, 0xca, 0x09, 0x24, 0x20, 0x42, 0x00, 0xe5,
- 0xc3, 0x18, 0xc8, 0xc3, 0x1f, 0xd8, 0x09, 0x1b, 0x83, 0x03, 0x18, 0xd4,
- 0xcf, 0x64, 0xf7, 0x09, 0x00, 0xa1, 0xc5, 0x01, 0x27, 0x09, 0x00, 0x91,
- 0x0b, 0xc3, 0x18, 0xda, 0xc2, 0x00, 0xa4, 0x09, 0x00, 0x79, 0x42, 0x01,
- 0x29, 0xc3, 0x18, 0xe6, 0xc9, 0x74, 0x6f, 0x09, 0x00, 0x61, 0xc4, 0x0b,
- 0xed, 0x09, 0x00, 0x58, 0x83, 0x09, 0x1b, 0x89, 0xc4, 0x3d, 0x51, 0x09,
- 0x00, 0xd9, 0xc4, 0x56, 0x51, 0x09, 0x00, 0xd1, 0xca, 0xa8, 0x06, 0x09,
- 0x00, 0xc9, 0xc9, 0x5b, 0xd9, 0x09, 0x00, 0xc1, 0xc5, 0xde, 0x4e, 0x09,
- 0x00, 0xb8, 0x49, 0x0d, 0xc9, 0xc3, 0x18, 0xf0, 0xc9, 0xa7, 0x7b, 0x09,
- 0x01, 0xd1, 0xc9, 0x82, 0x33, 0x09, 0x01, 0xc8, 0xc7, 0x07, 0x09, 0x09,
- 0x01, 0x89, 0xd5, 0x33, 0x67, 0x09, 0x01, 0x80, 0x8b, 0x09, 0x01, 0x31,
- 0xc3, 0xe2, 0xe4, 0x09, 0x01, 0x28, 0x00, 0x43, 0x18, 0xfd, 0x97, 0x09,
- 0x14, 0x3b, 0x03, 0x19, 0x09, 0x8b, 0x09, 0x14, 0x2b, 0x03, 0x19, 0x0d,
- 0x87, 0x09, 0x14, 0x21, 0x04, 0xc3, 0x19, 0x11, 0x83, 0x09, 0x14, 0x02,
- 0x03, 0x19, 0x19, 0xc4, 0x39, 0x41, 0x09, 0x0a, 0x51, 0x42, 0x00, 0x7a,
- 0xc3, 0x19, 0x1d, 0xc2, 0x00, 0x5b, 0x09, 0x0a, 0x41, 0xc3, 0xe4, 0x9d,
- 0x09, 0x0a, 0x38, 0x84, 0x09, 0x22, 0x19, 0x83, 0x09, 0x22, 0x10, 0x97,
- 0x09, 0x21, 0x89, 0x9f, 0x09, 0x21, 0x38, 0xcd, 0x79, 0x38, 0x09, 0x22,
- 0xa8, 0xcd, 0x79, 0x38, 0x09, 0x22, 0x98, 0x84, 0x09, 0x21, 0xf9, 0x83,
- 0x09, 0x21, 0xf0, 0xcd, 0x79, 0x38, 0x09, 0x21, 0xb8, 0xcd, 0x79, 0x38,
- 0x09, 0x21, 0x78, 0xcd, 0x79, 0x38, 0x09, 0x21, 0x28, 0x06, 0xc3, 0x19,
- 0x23, 0xc6, 0x60, 0xe6, 0x00, 0x27, 0x78, 0xca, 0x05, 0xde, 0x00, 0x22,
- 0xa0, 0xc3, 0x2d, 0xf3, 0x00, 0xe4, 0x39, 0xc9, 0xa9, 0xd6, 0x00, 0xe4,
- 0x31, 0xc2, 0x00, 0x9f, 0x00, 0xe4, 0x20, 0x46, 0x00, 0x6b, 0x43, 0x19,
- 0x2f, 0x87, 0x00, 0x22, 0x31, 0xc2, 0x01, 0xf0, 0x00, 0x22, 0xd9, 0xc2,
- 0x00, 0x4b, 0x05, 0x34, 0x79, 0xc2, 0x02, 0x60, 0x05, 0x34, 0x88, 0xc5,
- 0x14, 0x61, 0x00, 0xe4, 0x01, 0xc6, 0xa1, 0x16, 0x00, 0x23, 0xd8, 0xc2,
- 0x08, 0xc2, 0x00, 0x28, 0x89, 0xc3, 0x98, 0x92, 0x05, 0x32, 0x29, 0xc2,
- 0x48, 0x12, 0x05, 0x32, 0xa9, 0xc3, 0x09, 0x66, 0x05, 0x33, 0x08, 0x46,
- 0x00, 0x6b, 0x43, 0x19, 0x3b, 0x46, 0x00, 0x6b, 0x43, 0x19, 0x53, 0xca,
- 0x9f, 0xb4, 0x00, 0x26, 0x70, 0xcf, 0x66, 0x5f, 0x00, 0x25, 0x58, 0xca,
- 0xa0, 0x04, 0x00, 0x24, 0x78, 0x1c, 0xc3, 0x19, 0x71, 0x87, 0x00, 0x22,
- 0xab, 0x03, 0x19, 0x7b, 0xc2, 0x01, 0xf0, 0x00, 0x22, 0xf9, 0xc2, 0x00,
- 0x4b, 0x05, 0x34, 0x18, 0x91, 0x05, 0x34, 0xc9, 0xcb, 0x90, 0x3d, 0x05,
- 0x33, 0x68, 0xc2, 0x07, 0x8b, 0x05, 0x32, 0x48, 0xc2, 0x00, 0xa4, 0x00,
- 0x25, 0xdb, 0x03, 0x19, 0x81, 0x44, 0x2c, 0x01, 0xc3, 0x19, 0x87, 0xc2,
- 0x00, 0x4b, 0x05, 0x34, 0xb9, 0x83, 0x00, 0x22, 0x41, 0xc3, 0x1b, 0xb6,
- 0x00, 0x22, 0x48, 0xcf, 0x66, 0xaa, 0x00, 0x26, 0xd8, 0xcc, 0x21, 0x84,
- 0x00, 0x25, 0x88, 0xc2, 0x00, 0x06, 0x05, 0x33, 0x19, 0x07, 0xc3, 0x19,
- 0x92, 0xc4, 0x00, 0xcb, 0x00, 0x22, 0x60, 0x46, 0x00, 0x6b, 0x43, 0x19,
- 0x9a, 0xc3, 0x98, 0x92, 0x00, 0x27, 0x09, 0xc3, 0x26, 0x9b, 0x00, 0x25,
- 0xeb, 0x03, 0x19, 0xa6, 0xc2, 0x00, 0xa4, 0x00, 0x25, 0x48, 0xc9, 0x1e,
- 0x42, 0x00, 0x26, 0x99, 0xc5, 0x1f, 0x0a, 0x00, 0x26, 0x88, 0x87, 0x00,
- 0x28, 0xc9, 0x96, 0x00, 0x23, 0x18, 0x46, 0x00, 0x6b, 0x43, 0x19, 0xac,
- 0x43, 0xd2, 0x0c, 0xc3, 0x19, 0xb8, 0xc3, 0x7e, 0xe5, 0x00, 0x24, 0x08,
- 0x46, 0x00, 0x6b, 0x43, 0x19, 0xda, 0x46, 0x00, 0x6b, 0xc3, 0x19, 0xf2,
- 0xc7, 0x86, 0x25, 0x00, 0x22, 0x50, 0x46, 0x00, 0x6b, 0x43, 0x1a, 0x04,
- 0xc6, 0xcb, 0xbb, 0x00, 0x27, 0x4b, 0x03, 0x1a, 0x1f, 0xc8, 0xbd, 0x65,
- 0x00, 0x25, 0x08, 0xc9, 0x90, 0x3f, 0x05, 0x33, 0x59, 0xc5, 0xc7, 0x4e,
- 0x00, 0x23, 0x58, 0xcb, 0x9a, 0xda, 0x00, 0x23, 0xe8, 0xc9, 0x1e, 0x42,
- 0x00, 0x27, 0x29, 0xc6, 0x60, 0xe6, 0x00, 0x27, 0x19, 0xc5, 0x1f, 0x94,
- 0x00, 0x22, 0xe8, 0x46, 0x00, 0x6b, 0x43, 0x1a, 0x25, 0xd9, 0x1f, 0x87,
- 0x00, 0x23, 0xb8, 0x16, 0x43, 0x1a, 0x31, 0x47, 0x09, 0xf2, 0xc3, 0x1a,
- 0x3b, 0xc4, 0xe5, 0x53, 0x05, 0x32, 0x08, 0x87, 0x00, 0x21, 0xb3, 0x03,
- 0x1a, 0x47, 0xc2, 0x00, 0x4b, 0x05, 0x34, 0x28, 0x46, 0x00, 0x6b, 0x43,
- 0x1a, 0x4d, 0xc9, 0x23, 0xdf, 0x00, 0x29, 0x71, 0xcb, 0x95, 0x44, 0x00,
- 0x29, 0x01, 0xc4, 0x02, 0x83, 0x00, 0x28, 0x91, 0xc4, 0x12, 0xeb, 0x00,
- 0x26, 0x28, 0xc9, 0x56, 0xec, 0x00, 0x29, 0x41, 0xcb, 0x95, 0x44, 0x00,
- 0x29, 0x11, 0xc4, 0x12, 0xeb, 0x00, 0x26, 0x49, 0xc4, 0x02, 0x83, 0x00,
- 0x26, 0x39, 0xc9, 0x23, 0xdf, 0x00, 0x25, 0x10, 0xc2, 0x01, 0xf0, 0x00,
- 0x29, 0x51, 0x87, 0x05, 0x34, 0x40, 0xc2, 0x01, 0x64, 0x05, 0x32, 0x10,
- 0xcf, 0x66, 0x5f, 0x00, 0x29, 0x30, 0x8b, 0x00, 0x20, 0xcb, 0x03, 0x1a,
- 0x57, 0x97, 0x00, 0x20, 0x70, 0x8e, 0x05, 0x33, 0x21, 0x8f, 0x05, 0x33,
- 0x30, 0xc9, 0x23, 0xdf, 0x00, 0x29, 0x21, 0xcb, 0x95, 0x44, 0x00, 0x25,
- 0x30, 0xcf, 0x66, 0x5f, 0x00, 0x25, 0xf0, 0xc9, 0x1e, 0x42, 0x00, 0x27,
- 0xc1, 0xc8, 0x6c, 0xf7, 0x05, 0x32, 0x80, 0xc3, 0xad, 0x7d, 0x00, 0x28,
- 0x71, 0xc3, 0xe7, 0x4b, 0x00, 0x28, 0x61, 0xc3, 0xc9, 0x7b, 0x00, 0x28,
- 0x51, 0xc3, 0xe7, 0x51, 0x00, 0x28, 0x41, 0x06, 0xc3, 0x1a, 0x5d, 0xc3,
- 0xe6, 0x94, 0x00, 0x28, 0x20, 0xc4, 0x02, 0x83, 0x00, 0x26, 0x19, 0xc9,
- 0x23, 0xdf, 0x00, 0x24, 0xd1, 0xcf, 0x2c, 0x05, 0x00, 0x24, 0xe1, 0xc6,
- 0x01, 0x01, 0x00, 0x24, 0xf0, 0xc6, 0x01, 0x01, 0x00, 0x27, 0xf1, 0xc4,
- 0x02, 0x83, 0x00, 0x27, 0xe1, 0xc9, 0x23, 0xdf, 0x00, 0x25, 0x90, 0xc6,
- 0x01, 0x01, 0x00, 0x24, 0x93, 0x03, 0x1a, 0x6d, 0xc9, 0x23, 0xdf, 0x00,
- 0x27, 0xb1, 0xc6, 0x5b, 0x8c, 0x00, 0x24, 0x81, 0xcb, 0x95, 0x44, 0x00,
- 0x24, 0xa0, 0x06, 0xc3, 0x1a, 0x73, 0xcf, 0x2c, 0x05, 0x00, 0x25, 0xc0,
- 0xcb, 0x95, 0x23, 0x00, 0x27, 0x91, 0xc8, 0x1e, 0x43, 0x00, 0x27, 0x80,
- 0xcf, 0x66, 0xaa, 0x00, 0x27, 0x50, 0xc5, 0x1f, 0x0a, 0x00, 0x26, 0xb1,
- 0xc5, 0x1f, 0x94, 0x00, 0x20, 0x00, 0x83, 0x05, 0x32, 0x31, 0x46, 0x2f,
- 0xb7, 0x43, 0x1a, 0x7f, 0xc8, 0x1e, 0x43, 0x00, 0x26, 0xf1, 0xc8, 0x23,
- 0xe0, 0x00, 0x24, 0xc0, 0x46, 0x00, 0x6b, 0x43, 0x1a, 0x9f, 0xc9, 0x23,
- 0xdf, 0x00, 0x25, 0x61, 0xcb, 0x95, 0x44, 0x05, 0x34, 0x50, 0xc5, 0x64,
- 0x78, 0x00, 0x6c, 0x39, 0xc6, 0xd0, 0xb1, 0x00, 0x6c, 0x40, 0xc7, 0xca,
- 0x9b, 0x00, 0x6c, 0xd1, 0xc7, 0xc7, 0x30, 0x00, 0x6c, 0xe1, 0xc7, 0xc8,
- 0xc6, 0x00, 0x6d, 0x01, 0xc7, 0xc9, 0x6e, 0x00, 0x6d, 0x11, 0x16, 0xc3,
- 0x1a, 0xa9, 0x06, 0xc3, 0x1a, 0xb5, 0xc7, 0xc5, 0x77, 0x00, 0x6d, 0xa1,
- 0xc7, 0x8e, 0x9f, 0x00, 0x6d, 0xb0, 0xc5, 0x64, 0x78, 0x00, 0x6c, 0x69,
- 0xc6, 0xd2, 0xd9, 0x00, 0x6c, 0x70, 0xc5, 0x64, 0x78, 0x00, 0x6c, 0x79,
- 0xc6, 0xd2, 0xd9, 0x00, 0x6c, 0x80, 0x4a, 0x9b, 0x7c, 0xc3, 0x1a, 0xc1,
- 0xc5, 0x64, 0x78, 0x00, 0x6d, 0xc0, 0xc7, 0xc3, 0x71, 0x00, 0x6d, 0x59,
- 0xc7, 0xca, 0x94, 0x00, 0x6e, 0x11, 0xc7, 0xc3, 0xef, 0x00, 0x6e, 0x28,
- 0xc7, 0xc9, 0xfa, 0x00, 0x6d, 0x61, 0xc6, 0x8e, 0xa0, 0x00, 0x6d, 0x98,
- 0xd2, 0x4e, 0xfc, 0x00, 0x6d, 0x29, 0xc5, 0x64, 0x78, 0x00, 0x6e, 0x08,
- 0x45, 0xd7, 0x96, 0x43, 0x1a, 0xed, 0xa3, 0x0e, 0xd5, 0x79, 0xa2, 0x0e,
- 0xd5, 0x71, 0xa1, 0x0e, 0xd5, 0x69, 0xa0, 0x0e, 0xd5, 0x61, 0x9f, 0x0e,
- 0xd5, 0x59, 0x9e, 0x0e, 0xd5, 0x51, 0x9d, 0x0e, 0xd5, 0x48, 0xcb, 0x51,
- 0xcd, 0x0e, 0xcf, 0x0b, 0x03, 0x1a, 0xff, 0xc6, 0x05, 0x96, 0x0e, 0xcf,
- 0x03, 0x03, 0x1b, 0x05, 0xc6, 0x24, 0x18, 0x0e, 0xce, 0xfa, 0x03, 0x1b,
- 0x0b, 0x48, 0x0d, 0x47, 0xc3, 0x1b, 0x11, 0xc6, 0x05, 0x96, 0x0e, 0xcd,
- 0x1b, 0x03, 0x1b, 0x1b, 0xc6, 0x24, 0x18, 0x0e, 0xcd, 0x12, 0x03, 0x1b,
- 0x21, 0xc9, 0x65, 0xb1, 0x0e, 0xc8, 0xf9, 0x45, 0x04, 0x74, 0x43, 0x1b,
- 0x27, 0xc8, 0x39, 0x95, 0x0e, 0xc8, 0xe9, 0xc6, 0x24, 0x18, 0x0e, 0xc8,
- 0xd8, 0xc8, 0x39, 0x95, 0x0e, 0xc8, 0xc9, 0xc6, 0x24, 0x18, 0x0e, 0xc8,
- 0xb8, 0xc7, 0xc2, 0xbb, 0x0e, 0xd4, 0x21, 0xc4, 0x01, 0xdc, 0x0e, 0xd4,
- 0x08, 0xa4, 0x0e, 0xd3, 0xe9, 0xa3, 0x0e, 0xd3, 0xe1, 0xa2, 0x0e, 0xd3,
- 0xd9, 0xa1, 0x0e, 0xd3, 0xd1, 0xa0, 0x0e, 0xd3, 0xc9, 0x9f, 0x0e, 0xd3,
- 0xc1, 0x9e, 0x0e, 0xd3, 0xb8, 0xd0, 0x58, 0x22, 0x0e, 0xd2, 0xa9, 0xd0,
- 0x5e, 0x22, 0x0e, 0xd2, 0xa0, 0xcb, 0x98, 0x51, 0x0e, 0xd3, 0x99, 0xd0,
- 0x59, 0x72, 0x0e, 0xd3, 0x90, 0xcc, 0x38, 0x71, 0x0e, 0xd3, 0x01, 0xcc,
- 0x58, 0x62, 0x0e, 0xd2, 0xf8, 0xd5, 0x38, 0x68, 0x0e, 0xd2, 0xe1, 0xcc,
- 0x8a, 0x10, 0x0e, 0xd2, 0xd8, 0xc9, 0xaa, 0x27, 0x0e, 0xd3, 0x39, 0x43,
- 0x00, 0xb7, 0xc3, 0x1b, 0x33, 0xc8, 0x52, 0x03, 0x0e, 0xd3, 0x10, 0x4a,
- 0x18, 0x91, 0xc3, 0x1b, 0x45, 0x4b, 0x43, 0xaf, 0x43, 0x1b, 0x57, 0xc6,
- 0x2f, 0x01, 0x0e, 0xca, 0xa1, 0xc6, 0x05, 0x96, 0x0e, 0xca, 0x99, 0xc6,
- 0x24, 0x18, 0x0e, 0xca, 0x90, 0x4b, 0x43, 0xaf, 0xc3, 0x1b, 0x69, 0x4a,
- 0x18, 0x91, 0x43, 0x1b, 0x7b, 0x05, 0xc3, 0x1b, 0x8d, 0xc8, 0x43, 0xd5,
- 0x0e, 0xd1, 0x0a, 0x03, 0x1b, 0x99, 0xc6, 0x3e, 0x81, 0x0e, 0xd1, 0x41,
- 0xc8, 0x43, 0xd5, 0x0e, 0xd1, 0x22, 0x03, 0x1b, 0x9d, 0xc8, 0x39, 0x95,
- 0x0e, 0xd0, 0xc1, 0xc6, 0x24, 0x18, 0x0e, 0xd0, 0xb8, 0xcd, 0x7c, 0x51,
- 0x0e, 0xd0, 0xe1, 0xc5, 0x01, 0x74, 0x0e, 0xd0, 0xd0, 0xc6, 0x07, 0x41,
- 0x0e, 0xd0, 0xd9, 0xc4, 0x01, 0x75, 0x0e, 0xd0, 0xc8, 0xc3, 0x1f, 0x24,
- 0x0e, 0xc8, 0x1b, 0x03, 0x1b, 0xa1, 0xc3, 0x03, 0x76, 0x0e, 0xc2, 0xd2,
- 0x03, 0x1b, 0xa5, 0x00, 0x43, 0x1b, 0xa9, 0xc4, 0x00, 0x61, 0x0e, 0xc3,
- 0xeb, 0x03, 0x1b, 0xc7, 0xc3, 0x02, 0x84, 0x0e, 0xc3, 0x5a, 0x03, 0x1b,
- 0xcb, 0x17, 0xc3, 0x1b, 0xcf, 0xc3, 0xcb, 0x93, 0x0e, 0xc3, 0x33, 0x03,
- 0x1b, 0xdf, 0xc5, 0x01, 0x7b, 0x0e, 0xc3, 0xb2, 0x03, 0x1b, 0xe3, 0x00,
- 0x43, 0x1b, 0xe7, 0xc7, 0x01, 0x79, 0x0e, 0xd0, 0x31, 0x02, 0x43, 0x1c,
- 0x0b, 0x54, 0x39, 0x45, 0xc3, 0x1c, 0x17, 0xc6, 0xc5, 0xcc, 0x0e, 0xc9,
- 0x48, 0x59, 0x1f, 0x23, 0xc3, 0x1c, 0x23, 0x44, 0x1f, 0x96, 0x43, 0x1c,
- 0x2f, 0x46, 0x17, 0xef, 0xc3, 0x1c, 0x3f, 0x47, 0x06, 0x1b, 0xc3, 0x1c,
- 0x4b, 0x46, 0x04, 0x73, 0x43, 0x1c, 0x57, 0xcf, 0x66, 0x41, 0x0e, 0xcf,
- 0x11, 0x46, 0x2d, 0xea, 0x43, 0x1c, 0x63, 0xc5, 0x04, 0x73, 0x0e, 0xce,
- 0xd9, 0x48, 0x1f, 0x2f, 0x43, 0x1c, 0x6f, 0xc5, 0x04, 0x73, 0x0e, 0xce,
- 0xd1, 0x48, 0x1f, 0x2f, 0x43, 0x1c, 0x7b, 0x45, 0x0d, 0xe7, 0xc3, 0x1c,
- 0x87, 0xc4, 0x6b, 0x56, 0x0e, 0xcb, 0xb9, 0x46, 0x35, 0xd2, 0xc3, 0x1c,
- 0xa8, 0xc4, 0x0d, 0xbd, 0x0e, 0xcb, 0x70, 0xc5, 0x17, 0xef, 0x0e, 0xcc,
- 0x01, 0xc6, 0x06, 0x1b, 0x0e, 0xcb, 0xf9, 0xc5, 0x04, 0x73, 0x0e, 0xcb,
- 0xf0, 0xc5, 0x17, 0xef, 0x0e, 0xcb, 0xe9, 0xc6, 0x06, 0x1b, 0x0e, 0xcb,
- 0xe1, 0xc5, 0x04, 0x73, 0x0e, 0xcb, 0xd8, 0x43, 0x36, 0x3b, 0xc3, 0x1c,
- 0xba, 0xc3, 0x00, 0xef, 0x0e, 0xcb, 0x98, 0x4c, 0x86, 0xc8, 0xc3, 0x1c,
- 0xcc, 0xca, 0x97, 0x34, 0x0e, 0xcb, 0x81, 0xd1, 0x51, 0xfa, 0x0e, 0xcb,
- 0x78, 0xcb, 0x51, 0xcd, 0x0e, 0xcb, 0x63, 0x03, 0x1c, 0xd8, 0xca, 0x97,
- 0x34, 0x0e, 0xcb, 0x59, 0xc8, 0x43, 0xd5, 0x0e, 0xcb, 0x50, 0x47, 0x39,
- 0x45, 0xc3, 0x1c, 0xde, 0xc6, 0xc5, 0xcc, 0x0e, 0xc9, 0x40, 0x52, 0x4e,
- 0xa2, 0xc3, 0x1c, 0xea, 0x44, 0x1f, 0x96, 0x43, 0x1c, 0xf6, 0x47, 0x06,
- 0x1b, 0xc3, 0x1d, 0x08, 0x46, 0x04, 0x73, 0x43, 0x1d, 0x14, 0x48, 0x1f,
- 0x2f, 0xc3, 0x1d, 0x20, 0xc5, 0x04, 0x73, 0x0e, 0xcc, 0xab, 0x03, 0x1d,
- 0x2c, 0xc5, 0x17, 0xef, 0x0e, 0xcc, 0xb9, 0xc6, 0x06, 0x1b, 0x0e, 0xcc,
- 0xb0, 0x48, 0x1f, 0x2f, 0xc3, 0x1d, 0x32, 0xc5, 0x17, 0xef, 0x0e, 0xcc,
- 0xa1, 0xc6, 0x06, 0x1b, 0x0e, 0xcc, 0x99, 0xc5, 0x04, 0x73, 0x0e, 0xcc,
- 0x90, 0x44, 0x0d, 0xe7, 0xc3, 0x1d, 0x3e, 0x45, 0xa2, 0x3f, 0xc3, 0x1d,
- 0x48, 0x46, 0x35, 0xd2, 0xc3, 0x1d, 0x5a, 0xc4, 0x0d, 0xbd, 0x0e, 0xc9,
- 0x98, 0xc6, 0x66, 0x41, 0x0e, 0xcd, 0x29, 0x46, 0x2d, 0xea, 0x43, 0x1d,
- 0x72, 0xc5, 0x17, 0xef, 0x0e, 0xca, 0x51, 0xc6, 0x06, 0x1b, 0x0e, 0xca,
- 0x49, 0xc5, 0x04, 0x73, 0x0e, 0xca, 0x40, 0xc5, 0x17, 0xef, 0x0e, 0xca,
- 0x39, 0xc6, 0x06, 0x1b, 0x0e, 0xca, 0x31, 0xc5, 0x04, 0x73, 0x0e, 0xca,
- 0x28, 0x43, 0x36, 0x3b, 0xc3, 0x1d, 0x7e, 0x44, 0x00, 0xef, 0x43, 0x1d,
- 0x90, 0xcb, 0x51, 0xcd, 0x0e, 0xc9, 0xb3, 0x03, 0x1d, 0xa2, 0xca, 0x97,
- 0x34, 0x0e, 0xc9, 0xa9, 0xd1, 0x51, 0xfa, 0x0e, 0xc9, 0xa0, 0xcb, 0x51,
- 0xcd, 0x0e, 0xc9, 0x8b, 0x03, 0x1d, 0xa8, 0xca, 0x97, 0x34, 0x0e, 0xc9,
- 0x81, 0xc8, 0x43, 0xd5, 0x0e, 0xc9, 0x78, 0x48, 0xbe, 0x8d, 0xc3, 0x1d,
- 0xae, 0x45, 0xdd, 0x45, 0x43, 0x1d, 0xc3, 0xc5, 0x17, 0xef, 0x0e, 0xca,
- 0xdb, 0x03, 0x1d, 0xd8, 0xc6, 0x06, 0x1b, 0x0e, 0xca, 0xd1, 0xc5, 0x04,
- 0x73, 0x0e, 0xca, 0xc8, 0xc5, 0x17, 0xef, 0x0e, 0xca, 0xbb, 0x03, 0x1d,
- 0xde, 0xc6, 0x06, 0x1b, 0x0e, 0xca, 0xb1, 0xc5, 0x04, 0x73, 0x0e, 0xca,
- 0xa8, 0x45, 0x05, 0x63, 0xc3, 0x1d, 0xe4, 0xca, 0x65, 0xb0, 0x0e, 0xc9,
- 0x18, 0xc7, 0xc5, 0xcb, 0x0e, 0xd1, 0xe9, 0xc7, 0x29, 0xd4, 0x0e, 0xd1,
- 0xe1, 0xc7, 0x89, 0xd9, 0x0e, 0xd1, 0xd8, 0xc6, 0xcf, 0x79, 0x0e, 0xd2,
- 0x91, 0xc7, 0x29, 0xd4, 0x0e, 0xd2, 0x88, 0xc8, 0xb7, 0xbd, 0x0e, 0xd2,
- 0x79, 0xc7, 0x29, 0xd4, 0x0e, 0xd2, 0x70, 0x00, 0x43, 0x1d, 0xf6, 0x00,
- 0x43, 0x1e, 0x02, 0xc4, 0x01, 0x75, 0x0e, 0xd2, 0x19, 0xc8, 0xb7, 0xbd,
- 0x0e, 0xd2, 0x10, 0xc4, 0x01, 0x75, 0x0e, 0xd2, 0x01, 0xc8, 0xb7, 0xbd,
- 0x0e, 0xd1, 0xf8, 0xcc, 0x51, 0xcc, 0x0e, 0xcf, 0xe0, 0x8e, 0x08, 0xac,
- 0x48, 0x94, 0x08, 0xac, 0x38, 0x4c, 0x8b, 0xa8, 0xc3, 0x1e, 0x0e, 0xd2,
- 0x4a, 0xd6, 0x08, 0xae, 0xa1, 0xd3, 0x41, 0x29, 0x08, 0xae, 0x99, 0x43,
- 0x01, 0xe7, 0xc3, 0x1e, 0x20, 0xd0, 0x5e, 0x32, 0x08, 0xae, 0x89, 0x50,
- 0x5c, 0x62, 0x43, 0x1e, 0x2c, 0xca, 0x86, 0xbe, 0x08, 0xae, 0x80, 0x94,
- 0x05, 0x44, 0x48, 0x8e, 0x05, 0x44, 0x58, 0x9f, 0x08, 0x8e, 0xf9, 0x9e,
- 0x08, 0x8e, 0xf0, 0xc7, 0x76, 0x59, 0x08, 0x8e, 0x09, 0xc7, 0x11, 0x41,
- 0x08, 0x8c, 0x08, 0xc4, 0x0f, 0x7c, 0x08, 0x8e, 0x01, 0xc5, 0x44, 0x7b,
- 0x08, 0x8c, 0x10, 0xc4, 0x18, 0x83, 0x08, 0x8e, 0xb9, 0xc2, 0x26, 0x51,
- 0x08, 0x8e, 0xb0, 0xc3, 0x0c, 0x5b, 0x08, 0x8e, 0xa9, 0xc3, 0x06, 0x9e,
- 0x08, 0x8e, 0xa0, 0xc4, 0x04, 0x5e, 0x08, 0x8e, 0x99, 0xc2, 0x01, 0x47,
+ 0x44, 0x0f, 0x00, 0xd8, 0xc9, 0xb0, 0xd8, 0x0e, 0x92, 0x21, 0x16, 0x43,
+ 0x14, 0x60, 0x47, 0x01, 0xff, 0xc3, 0x14, 0x6c, 0x46, 0x08, 0xd7, 0x43,
+ 0x14, 0x88, 0xcd, 0x7f, 0x03, 0x00, 0xee, 0x41, 0xc8, 0xb9, 0x7b, 0x00,
+ 0xee, 0x29, 0x42, 0x00, 0xc0, 0x43, 0x14, 0xa2, 0xc6, 0x00, 0x33, 0x00,
+ 0x18, 0xb8, 0x45, 0x00, 0x62, 0xc3, 0x14, 0xb1, 0x42, 0x01, 0x0e, 0xc3,
+ 0x14, 0xbb, 0x4c, 0x1c, 0xe0, 0xc3, 0x14, 0xc7, 0xca, 0x9c, 0x24, 0x00,
+ 0x18, 0xc8, 0xe0, 0x0b, 0xa7, 0x01, 0x07, 0x60, 0x44, 0x00, 0x63, 0xc3,
+ 0x14, 0xd3, 0x45, 0x05, 0x98, 0x43, 0x14, 0xdd, 0xc5, 0x03, 0x50, 0x01,
+ 0x07, 0x09, 0xc5, 0x00, 0x34, 0x00, 0x1a, 0x68, 0xcb, 0x96, 0x14, 0x01,
+ 0x06, 0x81, 0x48, 0xbb, 0x33, 0x43, 0x14, 0xe9, 0xca, 0x9d, 0x04, 0x00,
+ 0xd6, 0x19, 0xca, 0x09, 0x52, 0x00, 0xd6, 0x08, 0xcd, 0x3f, 0xd7, 0x00,
+ 0x19, 0xb1, 0xce, 0x2e, 0x55, 0x00, 0x19, 0xc0, 0x46, 0x01, 0xab, 0x43,
+ 0x14, 0xf5, 0x46, 0x01, 0xab, 0x43, 0x15, 0x01, 0xcf, 0x69, 0x9c, 0x00,
+ 0xef, 0x91, 0xc4, 0xdd, 0x2f, 0x00, 0xef, 0x39, 0x98, 0x00, 0xee, 0xb1,
+ 0x91, 0x00, 0xee, 0xa9, 0x87, 0x00, 0xee, 0xa0, 0xc6, 0x00, 0x33, 0x00,
+ 0xd5, 0xf8, 0xc2, 0x01, 0x0d, 0x00, 0xef, 0x79, 0xc2, 0x01, 0x47, 0x00,
+ 0xee, 0xc8, 0xd9, 0x1e, 0xed, 0x00, 0xef, 0x61, 0xc5, 0xaf, 0x0c, 0x00,
+ 0xef, 0x28, 0xd5, 0x34, 0x9f, 0x00, 0xee, 0x98, 0xc4, 0x6e, 0x0c, 0x00,
+ 0xef, 0x50, 0xc3, 0x0b, 0x47, 0x00, 0xef, 0x48, 0x00, 0x43, 0x15, 0x0d,
+ 0x47, 0x68, 0x4a, 0x43, 0x15, 0x19, 0xc8, 0xbf, 0xd3, 0x00, 0xee, 0xb8,
+ 0xcd, 0x76, 0xfd, 0x00, 0xd6, 0x00, 0xc6, 0x00, 0x33, 0x07, 0xf1, 0x38,
+ 0xc6, 0x00, 0x33, 0x07, 0xf1, 0x40, 0x49, 0x02, 0x5b, 0xc3, 0x15, 0x29,
+ 0xce, 0x1f, 0xa7, 0x00, 0x1b, 0x0b, 0x03, 0x15, 0x35, 0xd0, 0x2c, 0x01,
+ 0x00, 0xee, 0x69, 0x12, 0xc3, 0x15, 0x3b, 0x11, 0xc3, 0x15, 0x47, 0xcc,
+ 0x87, 0x84, 0x00, 0x18, 0x59, 0xcc, 0x1e, 0x64, 0x00, 0x18, 0x79, 0xc8,
+ 0x7c, 0x8b, 0x00, 0x19, 0x99, 0x42, 0x00, 0x47, 0xc3, 0x15, 0x53, 0xc5,
+ 0x1f, 0x9c, 0x00, 0x1a, 0xeb, 0x03, 0x15, 0x5f, 0xc6, 0x61, 0xbc, 0x00,
+ 0x1a, 0xf3, 0x03, 0x15, 0x65, 0xc5, 0x1f, 0x01, 0x00, 0x1b, 0x02, 0x03,
+ 0x15, 0x6b, 0xc5, 0x00, 0x34, 0x00, 0x19, 0x1b, 0x03, 0x15, 0x6f, 0xc5,
+ 0x03, 0x50, 0x00, 0x18, 0x3a, 0x03, 0x15, 0x75, 0xcc, 0x87, 0x84, 0x00,
+ 0xee, 0x09, 0xcc, 0x1e, 0x64, 0x00, 0xee, 0x00, 0xc2, 0x02, 0x52, 0x05,
+ 0x47, 0x81, 0xc2, 0x00, 0x9a, 0x05, 0x47, 0x79, 0xc2, 0x07, 0x44, 0x05,
+ 0x47, 0x71, 0xc2, 0x01, 0x01, 0x05, 0x47, 0x69, 0xc2, 0x06, 0x6b, 0x05,
+ 0x47, 0x61, 0x97, 0x05, 0x47, 0x59, 0x83, 0x05, 0x47, 0x50, 0xc4, 0x24,
+ 0x35, 0x05, 0x47, 0x49, 0xc5, 0x05, 0x1b, 0x05, 0x47, 0x41, 0x15, 0xc3,
+ 0x15, 0x79, 0x08, 0xc3, 0x15, 0x85, 0x16, 0xc3, 0x15, 0x91, 0xc3, 0x05,
+ 0x17, 0x05, 0x47, 0x09, 0xc4, 0x16, 0x57, 0x05, 0x47, 0x00, 0xc6, 0x00,
+ 0x33, 0x00, 0x19, 0x70, 0xc5, 0x03, 0x50, 0x00, 0xd6, 0x39, 0xc5, 0x00,
+ 0x34, 0x00, 0x19, 0x58, 0xc3, 0x0f, 0xdb, 0x00, 0x18, 0x8b, 0x03, 0x15,
+ 0x9d, 0xca, 0x34, 0xbf, 0x00, 0x19, 0xd0, 0xc5, 0x00, 0x34, 0x00, 0x19,
+ 0x89, 0xc9, 0x0f, 0xa9, 0x07, 0xf1, 0x23, 0x03, 0x15, 0xa3, 0xca, 0x01,
+ 0x17, 0x07, 0xf1, 0x2a, 0x03, 0x15, 0xa9, 0xc2, 0x05, 0x1b, 0x00, 0x1f,
+ 0x39, 0x8b, 0x01, 0x65, 0x68, 0xc3, 0x08, 0xea, 0x00, 0x1f, 0x69, 0xc2,
+ 0x05, 0x1b, 0x00, 0x1f, 0x18, 0xc4, 0x06, 0x7a, 0x01, 0x65, 0x99, 0xc4,
+ 0xd3, 0xd7, 0x01, 0x65, 0xc9, 0xc2, 0x01, 0xce, 0x01, 0x65, 0xd9, 0xc4,
+ 0x03, 0x76, 0x01, 0x66, 0x58, 0x47, 0xc9, 0x6d, 0xc3, 0x15, 0xaf, 0x47,
+ 0x8f, 0xf3, 0x43, 0x15, 0xd7, 0xc3, 0xd3, 0x51, 0x01, 0x65, 0xb9, 0xc2,
+ 0x01, 0xce, 0x01, 0x65, 0xe9, 0xc4, 0xa2, 0x5a, 0x01, 0x67, 0x61, 0xc6,
+ 0xd5, 0xc8, 0x01, 0x67, 0x70, 0xc3, 0x08, 0xea, 0x00, 0x1f, 0x61, 0xc2,
+ 0x05, 0x1b, 0x00, 0x1f, 0x10, 0xc4, 0x06, 0x7a, 0x01, 0x65, 0x91, 0xc4,
+ 0xd3, 0xd7, 0x01, 0x65, 0xc1, 0xc2, 0x01, 0xce, 0x01, 0x65, 0xd1, 0xc4,
+ 0x03, 0x76, 0x01, 0x66, 0x50, 0x8b, 0x01, 0x65, 0x61, 0xc2, 0x05, 0x1b,
+ 0x00, 0x1f, 0x30, 0x47, 0xc9, 0x6d, 0xc3, 0x15, 0xe7, 0x47, 0x8f, 0xf3,
+ 0x43, 0x16, 0x0f, 0xc3, 0xd3, 0x51, 0x01, 0x65, 0xb1, 0xc2, 0x01, 0xce,
+ 0x01, 0x65, 0xe1, 0xc4, 0xa2, 0x5a, 0x01, 0x67, 0x59, 0xc6, 0xd5, 0xc8,
+ 0x01, 0x67, 0x68, 0xc4, 0x15, 0xa9, 0x08, 0x17, 0x59, 0xc9, 0x15, 0x9c,
+ 0x08, 0x17, 0xa0, 0xc4, 0x0d, 0x89, 0x08, 0x17, 0x61, 0xcb, 0x12, 0xe3,
+ 0x08, 0x17, 0xa8, 0xc3, 0x0d, 0x8a, 0x08, 0x17, 0x69, 0xca, 0x9f, 0x48,
+ 0x08, 0x17, 0xb0, 0xc3, 0x41, 0xca, 0x08, 0x17, 0x71, 0xca, 0x36, 0x8d,
+ 0x08, 0x17, 0xb8, 0xc2, 0x0d, 0x8b, 0x08, 0x17, 0x79, 0xc8, 0x0d, 0x7e,
+ 0x08, 0x17, 0xc0, 0xc8, 0x0d, 0x7e, 0x08, 0x17, 0xc9, 0xc2, 0x0d, 0x8b,
+ 0x08, 0x17, 0x80, 0xd9, 0x1f, 0x6a, 0x0f, 0xa8, 0x10, 0xc8, 0xbc, 0xd3,
+ 0x0f, 0xab, 0x39, 0xc8, 0xc0, 0xe3, 0x0f, 0xaa, 0xd8, 0xc6, 0xaa, 0x0c,
+ 0x0f, 0xc8, 0x13, 0x03, 0x16, 0x1f, 0xc6, 0xd4, 0x96, 0x0f, 0xaa, 0x00,
+ 0xc5, 0x7f, 0x3f, 0x01, 0x93, 0x03, 0x03, 0x16, 0x25, 0xc6, 0xae, 0x92,
+ 0x01, 0x93, 0x52, 0x03, 0x16, 0x2b, 0xc2, 0x00, 0x39, 0x01, 0x93, 0x78,
+ 0xc5, 0xba, 0x7e, 0x01, 0x93, 0x13, 0x03, 0x16, 0x31, 0xc6, 0xae, 0x6e,
+ 0x01, 0x93, 0x5a, 0x03, 0x16, 0x37, 0xc2, 0x00, 0x39, 0x01, 0x93, 0x88,
+ 0xc2, 0x00, 0x39, 0x01, 0x93, 0x90, 0xc4, 0x68, 0xc6, 0x01, 0x93, 0x2b,
+ 0x03, 0x16, 0x3d, 0xc6, 0xae, 0x80, 0x01, 0x93, 0x62, 0x03, 0x16, 0x43,
+ 0xc2, 0x00, 0x39, 0x01, 0x93, 0xa0, 0x00, 0x43, 0x16, 0x49, 0xc4, 0xb1,
+ 0xd8, 0x01, 0x93, 0x43, 0x03, 0x16, 0x51, 0xc6, 0xb1, 0xd7, 0x01, 0x93,
+ 0x4a, 0x03, 0x16, 0x57, 0xc2, 0x00, 0x39, 0x01, 0x93, 0xd8, 0xc4, 0x16,
+ 0x57, 0x01, 0x27, 0x51, 0xc4, 0x24, 0x35, 0x01, 0x23, 0x41, 0xc5, 0x05,
+ 0x1b, 0x01, 0x23, 0x39, 0x15, 0xc3, 0x16, 0x5d, 0x08, 0xc3, 0x16, 0x69,
+ 0x16, 0xc3, 0x16, 0x75, 0xc3, 0x05, 0x17, 0x01, 0x23, 0x00, 0xc4, 0x02,
+ 0xe3, 0x01, 0x14, 0xc1, 0xc3, 0x02, 0xcc, 0x01, 0x51, 0xc0, 0xe0, 0x02,
+ 0xc7, 0x0f, 0x88, 0x78, 0x9c, 0x01, 0x27, 0x49, 0x9b, 0x01, 0x27, 0x41,
+ 0x9a, 0x01, 0x27, 0x39, 0x99, 0x01, 0x27, 0x31, 0x98, 0x01, 0x27, 0x29,
+ 0x97, 0x01, 0x27, 0x21, 0x96, 0x01, 0x27, 0x19, 0x95, 0x01, 0x27, 0x11,
+ 0x94, 0x01, 0x27, 0x09, 0x93, 0x01, 0x27, 0x01, 0x92, 0x01, 0x26, 0xf9,
+ 0x91, 0x01, 0x26, 0xf1, 0x90, 0x01, 0x26, 0xe9, 0x8f, 0x01, 0x26, 0xe1,
+ 0x8e, 0x01, 0x26, 0xd9, 0x8d, 0x01, 0x26, 0xd1, 0x8c, 0x01, 0x26, 0xc9,
+ 0x8b, 0x01, 0x26, 0xc1, 0x8a, 0x01, 0x26, 0xb9, 0x89, 0x01, 0x26, 0xb1,
+ 0x88, 0x01, 0x26, 0xa9, 0x87, 0x01, 0x26, 0xa1, 0x86, 0x01, 0x26, 0x99,
+ 0x85, 0x01, 0x26, 0x91, 0x84, 0x01, 0x26, 0x89, 0x83, 0x01, 0x26, 0x80,
+ 0x9c, 0x01, 0x26, 0x79, 0x9b, 0x01, 0x26, 0x71, 0x9a, 0x01, 0x26, 0x69,
+ 0x99, 0x01, 0x26, 0x61, 0x98, 0x01, 0x26, 0x59, 0x97, 0x01, 0x26, 0x51,
+ 0x96, 0x01, 0x26, 0x49, 0x95, 0x01, 0x26, 0x41, 0x94, 0x01, 0x26, 0x39,
+ 0x93, 0x01, 0x26, 0x31, 0x92, 0x01, 0x26, 0x29, 0x91, 0x01, 0x26, 0x21,
+ 0x90, 0x01, 0x26, 0x19, 0x8f, 0x01, 0x26, 0x11, 0x8e, 0x01, 0x26, 0x09,
+ 0x8d, 0x01, 0x26, 0x01, 0x8c, 0x01, 0x25, 0xf9, 0x8b, 0x01, 0x25, 0xf1,
+ 0x8a, 0x01, 0x25, 0xe9, 0x89, 0x01, 0x25, 0xe1, 0x88, 0x01, 0x25, 0xd9,
+ 0x87, 0x01, 0x25, 0xd1, 0x86, 0x01, 0x25, 0xc9, 0x85, 0x01, 0x25, 0xc1,
+ 0x84, 0x01, 0x25, 0xb9, 0x83, 0x01, 0x25, 0xb0, 0xc3, 0x15, 0xaa, 0x01,
+ 0x23, 0x9b, 0x03, 0x16, 0x81, 0xc3, 0x25, 0x4e, 0x01, 0x23, 0x58, 0xc3,
+ 0x03, 0x2c, 0x01, 0x23, 0x61, 0x9b, 0x01, 0x92, 0xd2, 0x03, 0x16, 0x85,
+ 0xd0, 0x56, 0xdc, 0x01, 0x92, 0x40, 0xc3, 0x03, 0x2c, 0x01, 0x23, 0x89,
+ 0xd1, 0x56, 0xdb, 0x01, 0x92, 0x78, 0xc3, 0x03, 0x2c, 0x01, 0x23, 0x81,
+ 0xd1, 0x56, 0xdb, 0x01, 0x92, 0x70, 0xc3, 0x03, 0x2c, 0x01, 0x23, 0x79,
+ 0xd1, 0x56, 0xdb, 0x01, 0x92, 0x68, 0xc3, 0x03, 0x2c, 0x01, 0x23, 0x71,
+ 0x9b, 0x01, 0x95, 0xfa, 0x03, 0x16, 0x89, 0xc6, 0x52, 0xbe, 0x01, 0x23,
+ 0x69, 0xc3, 0x0d, 0x8a, 0x01, 0x95, 0xaa, 0x03, 0x16, 0x8f, 0xc5, 0xde,
+ 0xff, 0x0f, 0x92, 0x89, 0xc8, 0xb8, 0xfb, 0x0f, 0x92, 0x81, 0xc8, 0xc3,
+ 0x5b, 0x01, 0x94, 0xf9, 0xc7, 0xbb, 0xbc, 0x01, 0x95, 0x78, 0xcb, 0x97,
+ 0x74, 0x01, 0x92, 0x29, 0xc3, 0x84, 0x75, 0x01, 0x92, 0x38, 0xc5, 0xdf,
+ 0x9a, 0x01, 0x92, 0x31, 0xc2, 0x22, 0x45, 0x01, 0x94, 0x29, 0x07, 0xc3,
+ 0x16, 0x93, 0x17, 0xc3, 0x16, 0x9f, 0x16, 0xc3, 0x16, 0xaf, 0xc6, 0xd2,
+ 0x26, 0x01, 0x94, 0x99, 0xc6, 0xd9, 0x6a, 0x01, 0x94, 0xa8, 0xc2, 0x0a,
+ 0x20, 0x01, 0x94, 0x09, 0xc4, 0x05, 0xde, 0x01, 0x94, 0x11, 0xc2, 0x01,
+ 0x04, 0x01, 0x94, 0x48, 0xc3, 0x08, 0xde, 0x01, 0x94, 0x19, 0x0b, 0xc3,
+ 0x16, 0xbb, 0xc5, 0x19, 0xbb, 0x01, 0x94, 0xd8, 0xc4, 0x00, 0x48, 0x01,
+ 0x94, 0x39, 0xc4, 0x67, 0xe5, 0x01, 0x94, 0x79, 0xc8, 0xc0, 0x23, 0x01,
+ 0x94, 0xe9, 0xc9, 0xad, 0x8a, 0x01, 0x95, 0x68, 0x0b, 0xc3, 0x16, 0xcd,
+ 0xc3, 0x01, 0x02, 0x01, 0x94, 0xa0, 0xc3, 0x02, 0x14, 0x01, 0x94, 0x51,
+ 0x07, 0xc3, 0x16, 0xd9, 0xc3, 0x06, 0x7a, 0x01, 0x94, 0xd0, 0xc4, 0x00,
+ 0x97, 0x01, 0x94, 0x61, 0xc3, 0x29, 0xec, 0x01, 0x94, 0x68, 0xc3, 0x06,
+ 0x8d, 0x01, 0x94, 0x91, 0xc3, 0x00, 0x48, 0x01, 0x95, 0x20, 0x11, 0xc3,
+ 0x16, 0xe5, 0xc5, 0x01, 0xa2, 0x01, 0x95, 0x28, 0xc4, 0xcd, 0x24, 0x01,
+ 0x94, 0xc1, 0xc2, 0x00, 0x37, 0x01, 0x95, 0x31, 0xc3, 0x00, 0xce, 0x01,
+ 0x95, 0x38, 0x07, 0xc3, 0x16, 0xf7, 0xc4, 0x00, 0x48, 0x01, 0x95, 0x40,
+ 0x83, 0x01, 0x96, 0xa9, 0x8b, 0x01, 0x96, 0xb1, 0x97, 0x01, 0x96, 0xb9,
+ 0x87, 0x01, 0x96, 0xc1, 0x91, 0x01, 0x96, 0xc8, 0x83, 0x01, 0x96, 0xd1,
+ 0x8b, 0x01, 0x96, 0xd9, 0x97, 0x01, 0x96, 0xe1, 0x87, 0x01, 0x96, 0xe9,
+ 0x91, 0x01, 0x96, 0xf0, 0x83, 0x01, 0x96, 0xf9, 0x8b, 0x01, 0x97, 0x01,
+ 0x97, 0x01, 0x97, 0x09, 0x87, 0x01, 0x97, 0x11, 0x91, 0x01, 0x97, 0x18,
+ 0x83, 0x01, 0x97, 0x21, 0x8b, 0x01, 0x97, 0x29, 0x97, 0x01, 0x97, 0x31,
+ 0x87, 0x01, 0x97, 0x39, 0x91, 0x01, 0x97, 0x40, 0x83, 0x01, 0x97, 0x49,
+ 0x8b, 0x01, 0x97, 0x51, 0x97, 0x01, 0x97, 0x59, 0x87, 0x01, 0x97, 0x61,
+ 0x91, 0x01, 0x97, 0x68, 0x83, 0x01, 0x97, 0x71, 0x8b, 0x01, 0x97, 0x79,
+ 0x97, 0x01, 0x97, 0x81, 0x87, 0x01, 0x97, 0x89, 0x91, 0x01, 0x97, 0x90,
+ 0x83, 0x01, 0x97, 0x99, 0x97, 0x01, 0x97, 0xa1, 0x91, 0x01, 0x97, 0xa8,
+ 0x83, 0x01, 0x97, 0xb1, 0x8b, 0x01, 0x97, 0xb9, 0x97, 0x01, 0x97, 0xc1,
+ 0x87, 0x01, 0x97, 0xc9, 0x91, 0x01, 0x97, 0xd0, 0x83, 0x01, 0x97, 0xd9,
+ 0x8b, 0x01, 0x97, 0xe1, 0x87, 0x01, 0x97, 0xe9, 0x91, 0x01, 0x97, 0xf0,
+ 0xcf, 0x6a, 0xd7, 0x09, 0x2a, 0x19, 0x83, 0x09, 0x1b, 0x60, 0x0e, 0xc3,
+ 0x17, 0x01, 0x06, 0xc3, 0x17, 0x0b, 0x17, 0xc3, 0x17, 0x17, 0xc2, 0x00,
+ 0x16, 0x09, 0x1a, 0x59, 0x15, 0xc3, 0x17, 0x27, 0xc2, 0x07, 0x44, 0x09,
+ 0x1a, 0x41, 0xc3, 0x0e, 0x6d, 0x09, 0x1a, 0x39, 0xc2, 0x06, 0x72, 0x09,
+ 0x1a, 0x29, 0x0b, 0xc3, 0x17, 0x33, 0xc2, 0x01, 0x0e, 0x09, 0x1a, 0x09,
+ 0x09, 0xc3, 0x17, 0x43, 0xc3, 0x02, 0x1d, 0x09, 0x19, 0xd1, 0x83, 0x09,
+ 0x19, 0xc2, 0x03, 0x17, 0x4e, 0xc8, 0x07, 0x6c, 0x09, 0x1a, 0x80, 0x46,
+ 0x07, 0x6d, 0xc3, 0x17, 0x54, 0xc8, 0x1d, 0xf3, 0x09, 0x29, 0xe0, 0xc8,
+ 0x57, 0x9f, 0x09, 0x18, 0xf8, 0xc2, 0x07, 0x44, 0x09, 0x19, 0x29, 0xc6,
+ 0x47, 0x64, 0x09, 0x19, 0x20, 0x94, 0x09, 0x1a, 0xa0, 0xca, 0x90, 0xab,
+ 0x09, 0x18, 0xd8, 0xcf, 0x69, 0xf6, 0x09, 0x18, 0xbb, 0x03, 0x17, 0x68,
+ 0xc2, 0x00, 0xa9, 0x09, 0x18, 0xb1, 0xc3, 0x64, 0x5f, 0x09, 0x18, 0xa8,
+ 0xca, 0x6a, 0xd7, 0x09, 0x29, 0xd9, 0xc9, 0x5c, 0x76, 0x09, 0x29, 0xd0,
+ 0xc2, 0x03, 0xbd, 0x09, 0x17, 0xc9, 0xc4, 0x07, 0x30, 0x09, 0x17, 0xc1,
+ 0x42, 0x01, 0x0d, 0xc3, 0x17, 0x6e, 0xc3, 0x73, 0x7f, 0x09, 0x17, 0xa9,
+ 0xc2, 0x05, 0x57, 0x09, 0x17, 0xa0, 0xc7, 0x01, 0xe9, 0x09, 0x17, 0x91,
+ 0x42, 0x01, 0xba, 0x43, 0x17, 0x76, 0xc2, 0x00, 0xa9, 0x09, 0x17, 0x71,
+ 0xc2, 0x00, 0x0a, 0x09, 0x17, 0x68, 0xc8, 0xbd, 0xf3, 0x09, 0x18, 0x1b,
+ 0x03, 0x17, 0x7c, 0xca, 0x3a, 0x64, 0x09, 0x18, 0x10, 0xcf, 0x6b, 0x7c,
+ 0x09, 0x16, 0xf8, 0x46, 0x21, 0x5d, 0x43, 0x17, 0x82, 0x45, 0x21, 0x5e,
+ 0xc3, 0x17, 0x8e, 0xc8, 0xbd, 0xb3, 0x09, 0x29, 0x93, 0x03, 0x17, 0xa0,
+ 0xc2, 0x06, 0x67, 0x09, 0x15, 0xd8, 0xc3, 0x0e, 0x1c, 0x09, 0x16, 0x11,
+ 0x9f, 0x09, 0x16, 0x08, 0xc5, 0x5b, 0x6a, 0x09, 0x29, 0x88, 0x47, 0x07,
+ 0x6c, 0x43, 0x17, 0xa4, 0x00, 0x43, 0x17, 0xcd, 0x47, 0x07, 0x6c, 0x43,
+ 0x17, 0xd9, 0x47, 0x07, 0x6c, 0x43, 0x18, 0x0e, 0x46, 0x07, 0x6d, 0xc3,
+ 0x18, 0x18, 0xc4, 0x3a, 0xa6, 0x09, 0x15, 0x43, 0x03, 0x18, 0x5b, 0xc8,
+ 0xbe, 0x8b, 0x09, 0x15, 0x39, 0xc7, 0xb9, 0xf4, 0x09, 0x14, 0xa0, 0x47,
+ 0x07, 0x6c, 0x43, 0x18, 0x61, 0xd0, 0x5c, 0x9f, 0x09, 0x12, 0x89, 0xc7,
+ 0x5c, 0x78, 0x09, 0x12, 0x80, 0xd6, 0x29, 0x21, 0x09, 0x1c, 0x99, 0xd6,
+ 0x30, 0x9f, 0x09, 0x16, 0xa9, 0xc4, 0x5b, 0x6b, 0x09, 0x16, 0xa0, 0x00,
+ 0x43, 0x18, 0xa5, 0xcc, 0x83, 0xa0, 0x09, 0x13, 0x5b, 0x03, 0x18, 0xb4,
+ 0xc8, 0x20, 0xfb, 0x09, 0x13, 0x51, 0xc4, 0x5b, 0x6b, 0x09, 0x13, 0x49,
+ 0x4c, 0x21, 0x04, 0x43, 0x18, 0xba, 0xcd, 0x7a, 0x30, 0x09, 0x12, 0x19,
+ 0xce, 0x6f, 0xde, 0x09, 0x12, 0x11, 0xc8, 0x1d, 0xf3, 0x09, 0x12, 0x08,
+ 0xc2, 0x03, 0xbd, 0x09, 0x12, 0x51, 0x83, 0x09, 0x12, 0x48, 0xc9, 0xb3,
+ 0xf0, 0x09, 0x11, 0xb3, 0x03, 0x18, 0xd5, 0xcd, 0x76, 0x88, 0x09, 0x11,
+ 0xc1, 0x46, 0x07, 0x6d, 0x43, 0x18, 0xdb, 0x00, 0x43, 0x18, 0xeb, 0x16,
+ 0xc3, 0x18, 0xf7, 0xce, 0x75, 0xf0, 0x09, 0x28, 0xc9, 0x15, 0xc3, 0x19,
+ 0x03, 0xcc, 0x8b, 0x68, 0x09, 0x10, 0x99, 0xcc, 0x84, 0x90, 0x09, 0x10,
+ 0x90, 0xcd, 0x1d, 0x69, 0x09, 0x10, 0xf8, 0xc7, 0x6f, 0x36, 0x09, 0x10,
+ 0xd1, 0x11, 0x43, 0x19, 0x12, 0x47, 0x07, 0x6c, 0x43, 0x19, 0x1e, 0x47,
+ 0x07, 0x6c, 0x43, 0x19, 0x49, 0xa2, 0x09, 0x27, 0xf1, 0xa0, 0x09, 0x27,
+ 0xe9, 0x9f, 0x09, 0x27, 0xe1, 0x9d, 0x09, 0x27, 0xd8, 0xa4, 0x09, 0x27,
+ 0xc1, 0x9d, 0x09, 0x27, 0xb8, 0xa6, 0x09, 0x27, 0x8b, 0x03, 0x19, 0x6f,
+ 0x9e, 0x09, 0x27, 0x80, 0xa1, 0x09, 0x27, 0x71, 0xa0, 0x09, 0x27, 0x68,
+ 0xa5, 0x09, 0x27, 0x61, 0xa4, 0x09, 0x27, 0x59, 0xa0, 0x09, 0x27, 0x50,
+ 0xa3, 0x09, 0x27, 0x49, 0xa2, 0x09, 0x27, 0x40, 0xa5, 0x09, 0x27, 0x31,
+ 0xa2, 0x09, 0x27, 0x29, 0x9d, 0x09, 0x27, 0x20, 0xa6, 0x09, 0x27, 0x19,
+ 0x9d, 0x09, 0x27, 0x10, 0xce, 0x71, 0x20, 0x09, 0x26, 0xf1, 0x9d, 0x09,
+ 0x26, 0xe8, 0x9e, 0x09, 0x26, 0xd1, 0x9d, 0x09, 0x26, 0xc8, 0xa2, 0x09,
+ 0x26, 0xb9, 0x9e, 0x09, 0x26, 0xb0, 0x00, 0x43, 0x19, 0x75, 0x00, 0x43,
+ 0x19, 0x81, 0xc8, 0xbc, 0x33, 0x09, 0x0f, 0xb0, 0x94, 0x09, 0x26, 0x9b,
+ 0x03, 0x19, 0x93, 0xc4, 0xe3, 0x0b, 0x09, 0x26, 0x91, 0xc2, 0x01, 0x0d,
+ 0x09, 0x0c, 0x59, 0xcc, 0x85, 0x2c, 0x09, 0x0c, 0x51, 0x86, 0x09, 0x0c,
+ 0x49, 0x9f, 0x09, 0x0c, 0x40, 0x83, 0x09, 0x26, 0x8b, 0x03, 0x19, 0x97,
+ 0x8b, 0x09, 0x0b, 0x82, 0x03, 0x19, 0x9b, 0x97, 0x09, 0x26, 0x81, 0x8b,
+ 0x09, 0x0a, 0xf9, 0x03, 0x43, 0x19, 0x9f, 0x97, 0x09, 0x1c, 0x31, 0xc2,
+ 0x00, 0x4d, 0x09, 0x0c, 0x30, 0x0a, 0xc3, 0x19, 0xad, 0xc4, 0xce, 0xb9,
+ 0x09, 0x0c, 0x29, 0xc2, 0x00, 0x48, 0x09, 0x0c, 0x21, 0x83, 0x09, 0x0b,
+ 0xf2, 0x03, 0x19, 0xc2, 0x83, 0x09, 0x1c, 0x21, 0x8b, 0x09, 0x0b, 0xe0,
+ 0x97, 0x09, 0x0b, 0x9b, 0x03, 0x19, 0xc6, 0x8b, 0x09, 0x0b, 0x90, 0x97,
+ 0x09, 0x0b, 0x5b, 0x03, 0x19, 0xca, 0x8b, 0x09, 0x0b, 0x3b, 0x03, 0x19,
+ 0xd4, 0x83, 0x09, 0x0b, 0x12, 0x03, 0x19, 0xe3, 0x42, 0x01, 0x0d, 0xc3,
+ 0x19, 0xf4, 0xc4, 0xea, 0x7f, 0x09, 0x1b, 0xf1, 0x86, 0x09, 0x0a, 0xca,
+ 0x03, 0x19, 0xfc, 0xc2, 0x0b, 0xfd, 0x09, 0x0b, 0xd9, 0x87, 0x09, 0x0b,
+ 0xd0, 0x8b, 0x09, 0x0b, 0xc3, 0x03, 0x1a, 0x02, 0x87, 0x09, 0x0b, 0xa2,
+ 0x03, 0x1a, 0x08, 0x8f, 0x09, 0x0b, 0x71, 0xc2, 0x03, 0xab, 0x09, 0x0b,
+ 0x68, 0xc3, 0x07, 0xee, 0x09, 0x0b, 0x09, 0xc4, 0xa5, 0x48, 0x09, 0x0b,
+ 0x00, 0x4c, 0x89, 0x7c, 0xc3, 0x1a, 0x0e, 0xe0, 0x07, 0x67, 0x09, 0x0c,
+ 0xe8, 0xcc, 0x84, 0x9c, 0x09, 0x0c, 0xc9, 0xc9, 0x90, 0xac, 0x09, 0x0c,
+ 0xc0, 0xca, 0xa4, 0xc0, 0x09, 0x0c, 0xa0, 0xcc, 0x88, 0xc8, 0x09, 0x0d,
+ 0x48, 0x86, 0x09, 0x0d, 0x18, 0xd2, 0x07, 0xf4, 0x09, 0x26, 0x79, 0x9f,
+ 0x09, 0x09, 0x78, 0xc5, 0x3a, 0xa5, 0x09, 0x26, 0x70, 0xc2, 0x03, 0xbd,
+ 0x09, 0x09, 0xe9, 0xc4, 0x85, 0xc0, 0x09, 0x09, 0xe1, 0xc6, 0x47, 0x64,
+ 0x09, 0x09, 0xd9, 0xc3, 0x01, 0x1d, 0x09, 0x09, 0xd1, 0xc2, 0x00, 0x3a,
+ 0x09, 0x09, 0xc8, 0xd4, 0x3a, 0x5a, 0x09, 0x26, 0x69, 0xce, 0x73, 0x7a,
+ 0x09, 0x09, 0x09, 0x46, 0x07, 0x6d, 0x43, 0x1a, 0x14, 0x46, 0x07, 0x6d,
+ 0xc3, 0x1a, 0x20, 0xc4, 0x3a, 0xa6, 0x09, 0x08, 0xe8, 0xc2, 0x01, 0x0d,
+ 0x09, 0x09, 0x41, 0x90, 0x09, 0x09, 0x38, 0x00, 0x43, 0x1a, 0x3b, 0x47,
+ 0x07, 0x6c, 0x43, 0x1a, 0x45, 0xc5, 0x3a, 0xa5, 0x09, 0x08, 0x48, 0xcc,
+ 0x84, 0xc0, 0x09, 0x08, 0x31, 0xc8, 0xb9, 0x53, 0x09, 0x08, 0x28, 0x97,
+ 0x09, 0x08, 0x11, 0x87, 0x09, 0x08, 0x08, 0x97, 0x09, 0x26, 0x51, 0xc3,
+ 0x55, 0x95, 0x09, 0x07, 0xf8, 0xd6, 0x29, 0x21, 0x09, 0x26, 0x49, 0xcd,
+ 0x76, 0xaf, 0x09, 0x07, 0x78, 0x46, 0x07, 0x6d, 0xc3, 0x1a, 0x63, 0xc8,
+ 0xb9, 0x43, 0x09, 0x07, 0x68, 0x00, 0x43, 0x1a, 0xac, 0x15, 0xc3, 0x1a,
+ 0xbe, 0xc3, 0x73, 0x7f, 0x09, 0x1b, 0xb9, 0x17, 0xc3, 0x1a, 0xc8, 0x0e,
+ 0xc3, 0x1a, 0xd0, 0x0d, 0xc3, 0x1a, 0xdf, 0xc8, 0x61, 0x7e, 0x09, 0x05,
+ 0x59, 0xc2, 0x01, 0x0e, 0x09, 0x05, 0x4b, 0x03, 0x1a, 0xee, 0xc9, 0x73,
+ 0x18, 0x09, 0x05, 0x3b, 0x03, 0x1a, 0xf4, 0xc3, 0x64, 0x5f, 0x09, 0x05,
+ 0x31, 0x83, 0x09, 0x05, 0x12, 0x03, 0x1a, 0xfa, 0xc2, 0x06, 0x82, 0x09,
+ 0x25, 0xa1, 0xc2, 0x01, 0x03, 0x09, 0x25, 0x93, 0x03, 0x1b, 0x07, 0xc2,
+ 0x00, 0x96, 0x09, 0x25, 0x83, 0x03, 0x1b, 0x0b, 0xc8, 0x61, 0x7e, 0x09,
+ 0x25, 0x79, 0xc2, 0x00, 0x0a, 0x09, 0x25, 0x71, 0xc3, 0x02, 0xe4, 0x09,
+ 0x25, 0x68, 0xc2, 0x00, 0x5b, 0x09, 0x04, 0x91, 0xc2, 0x00, 0x2f, 0x09,
+ 0x04, 0x88, 0xc2, 0x01, 0x03, 0x09, 0x04, 0xd1, 0xc4, 0x5c, 0x76, 0x09,
+ 0x04, 0xc2, 0x03, 0x1b, 0x0f, 0x15, 0xc3, 0x1b, 0x15, 0xc2, 0x01, 0xf9,
+ 0x09, 0x25, 0x31, 0xc2, 0x01, 0xce, 0x09, 0x25, 0x29, 0x0f, 0xc3, 0x1b,
+ 0x21, 0x0e, 0xc3, 0x1b, 0x31, 0x0d, 0xc3, 0x1b, 0x3b, 0xc8, 0x61, 0x7e,
+ 0x09, 0x24, 0xc9, 0x0a, 0xc3, 0x1b, 0x45, 0x09, 0xc3, 0x1b, 0x4d, 0xc5,
+ 0xa5, 0x47, 0x09, 0x24, 0x91, 0x06, 0xc3, 0x1b, 0x58, 0x03, 0x43, 0x1b,
+ 0x64, 0xc3, 0x0a, 0xf1, 0x09, 0x1b, 0xb1, 0xc4, 0x76, 0x32, 0x09, 0x03,
+ 0xf8, 0xc5, 0x3a, 0xa5, 0x09, 0x04, 0x32, 0x03, 0x1b, 0x73, 0xc9, 0xb5,
+ 0x58, 0x09, 0x24, 0x60, 0xc5, 0xe3, 0x0a, 0x09, 0x24, 0x59, 0xc3, 0x03,
+ 0xaa, 0x09, 0x24, 0x51, 0xc3, 0x0a, 0xf1, 0x09, 0x03, 0xa8, 0xc9, 0x55,
+ 0x8f, 0x09, 0x24, 0x49, 0x4d, 0x61, 0xe0, 0x43, 0x1b, 0x79, 0xa1, 0x09,
+ 0x03, 0x89, 0xa0, 0x09, 0x03, 0x80, 0xc9, 0xb7, 0xc5, 0x09, 0x24, 0x39,
+ 0xc2, 0x0b, 0xfd, 0x09, 0x02, 0x79, 0xc2, 0x00, 0x03, 0x09, 0x02, 0x70,
+ 0xc2, 0x05, 0x5c, 0x09, 0x24, 0x31, 0xc2, 0x01, 0xce, 0x09, 0x24, 0x29,
+ 0xc3, 0xa8, 0x58, 0x09, 0x24, 0x20, 0x42, 0x01, 0x0d, 0xc3, 0x1b, 0xba,
+ 0xc3, 0x21, 0x00, 0x09, 0x1b, 0x83, 0x03, 0x1b, 0xc6, 0xcf, 0x69, 0xf6,
+ 0x09, 0x00, 0xa1, 0xc5, 0x07, 0x67, 0x09, 0x00, 0x91, 0x0b, 0xc3, 0x1b,
+ 0xcc, 0xc2, 0x01, 0x0e, 0x09, 0x00, 0x79, 0x42, 0x07, 0x69, 0xc3, 0x1b,
+ 0xd8, 0xc9, 0x73, 0x18, 0x09, 0x00, 0x61, 0xc4, 0x07, 0xed, 0x09, 0x00,
+ 0x58, 0x83, 0x09, 0x1b, 0x89, 0xc4, 0x3a, 0x6a, 0x09, 0x00, 0xd9, 0xc4,
+ 0x52, 0xe5, 0x09, 0x00, 0xd1, 0xca, 0xa8, 0x62, 0x09, 0x00, 0xc9, 0xc9,
+ 0x5c, 0x76, 0x09, 0x00, 0xc1, 0xc5, 0xdb, 0xd5, 0x09, 0x00, 0xb8, 0x49,
+ 0x0c, 0xb0, 0xc3, 0x1b, 0xe2, 0xc9, 0xa3, 0xef, 0x09, 0x01, 0xd1, 0xc9,
+ 0x85, 0x6b, 0x09, 0x01, 0xc8, 0xc7, 0x01, 0xe9, 0x09, 0x01, 0x89, 0xd5,
+ 0x32, 0x92, 0x09, 0x01, 0x80, 0x8b, 0x09, 0x01, 0x31, 0xc3, 0xe5, 0x98,
+ 0x09, 0x01, 0x28, 0x00, 0x43, 0x1b, 0xef, 0x97, 0x09, 0x14, 0x3b, 0x03,
+ 0x1b, 0xfb, 0x8b, 0x09, 0x14, 0x2b, 0x03, 0x1b, 0xff, 0x87, 0x09, 0x14,
+ 0x21, 0x04, 0xc3, 0x1c, 0x03, 0x83, 0x09, 0x14, 0x02, 0x03, 0x1c, 0x0b,
+ 0xc4, 0x3a, 0xa6, 0x09, 0x0a, 0x51, 0x42, 0x01, 0xba, 0xc3, 0x1c, 0x0f,
+ 0xc2, 0x00, 0x47, 0x09, 0x0a, 0x41, 0xc3, 0xeb, 0xd6, 0x09, 0x0a, 0x38,
+ 0x84, 0x09, 0x22, 0x19, 0x83, 0x09, 0x22, 0x10, 0x97, 0x09, 0x21, 0x89,
+ 0x9f, 0x09, 0x21, 0x38, 0xcd, 0x7a, 0xbf, 0x09, 0x22, 0xa8, 0xcd, 0x7a,
+ 0xbf, 0x09, 0x22, 0x98, 0x84, 0x09, 0x21, 0xf9, 0x83, 0x09, 0x21, 0xf0,
+ 0xcd, 0x7a, 0xbf, 0x09, 0x21, 0xb8, 0xcd, 0x7a, 0xbf, 0x09, 0x21, 0x78,
+ 0xcd, 0x7a, 0xbf, 0x09, 0x21, 0x28, 0x06, 0xc3, 0x1c, 0x15, 0xc6, 0x61,
+ 0xbc, 0x00, 0x27, 0x78, 0xca, 0x94, 0x3c, 0x00, 0x22, 0xa0, 0xc3, 0x2c,
+ 0x4d, 0x00, 0xe4, 0x39, 0xc9, 0xac, 0x7c, 0x00, 0xe4, 0x31, 0xc2, 0x00,
+ 0x98, 0x00, 0xe4, 0x20, 0x46, 0x01, 0xab, 0x43, 0x1c, 0x21, 0x87, 0x00,
+ 0x22, 0x31, 0xc2, 0x00, 0x5b, 0x00, 0x22, 0xd9, 0xc2, 0x00, 0xc9, 0x05,
+ 0x34, 0x79, 0xc2, 0x00, 0xa7, 0x05, 0x34, 0x88, 0xc5, 0x12, 0xb9, 0x00,
+ 0xe4, 0x01, 0xc6, 0xa2, 0x80, 0x00, 0x23, 0xd8, 0xc2, 0x0b, 0xa2, 0x00,
+ 0x28, 0x89, 0xc3, 0xea, 0xd7, 0x05, 0x32, 0x29, 0xc2, 0x12, 0xc5, 0x05,
+ 0x32, 0xa9, 0xc3, 0x0a, 0x25, 0x05, 0x33, 0x08, 0x46, 0x01, 0xab, 0x43,
+ 0x1c, 0x2d, 0x46, 0x01, 0xab, 0x43, 0x1c, 0x45, 0xca, 0xa3, 0xda, 0x00,
+ 0x26, 0x70, 0xcf, 0x6b, 0x6d, 0x00, 0x25, 0x58, 0xca, 0xa3, 0xd0, 0x00,
+ 0x24, 0x78, 0x1c, 0xc3, 0x1c, 0x63, 0x87, 0x00, 0x22, 0xab, 0x03, 0x1c,
+ 0x6d, 0xc2, 0x00, 0x5b, 0x00, 0x22, 0xf9, 0xc2, 0x00, 0xc9, 0x05, 0x34,
+ 0x18, 0x91, 0x05, 0x34, 0xc9, 0xcb, 0x96, 0xa3, 0x05, 0x33, 0x68, 0xc2,
+ 0x06, 0x8b, 0x05, 0x32, 0x48, 0xc2, 0x01, 0x0e, 0x00, 0x25, 0xdb, 0x03,
+ 0x1c, 0x73, 0x44, 0x2f, 0xae, 0xc3, 0x1c, 0x79, 0xc2, 0x00, 0xc9, 0x05,
+ 0x34, 0xb9, 0x83, 0x00, 0x22, 0x41, 0xc3, 0x1c, 0x4f, 0x00, 0x22, 0x48,
+ 0xcf, 0x69, 0x9c, 0x00, 0x26, 0xd8, 0xcc, 0x26, 0x18, 0x00, 0x25, 0x88,
+ 0xc2, 0x00, 0x06, 0x05, 0x33, 0x19, 0x07, 0xc3, 0x1c, 0x84, 0xc4, 0x02,
+ 0xcb, 0x00, 0x22, 0x60, 0x46, 0x01, 0xab, 0x43, 0x1c, 0x8c, 0xc3, 0xea,
+ 0xd7, 0x00, 0x27, 0x09, 0xc3, 0x27, 0xc3, 0x00, 0x25, 0xeb, 0x03, 0x1c,
+ 0x98, 0xc2, 0x01, 0x0e, 0x00, 0x25, 0x48, 0xc9, 0x1e, 0x89, 0x00, 0x26,
+ 0x99, 0xc5, 0x1f, 0x9c, 0x00, 0x26, 0x88, 0x87, 0x00, 0x28, 0xc9, 0x96,
+ 0x00, 0x23, 0x18, 0x46, 0x01, 0xab, 0x43, 0x1c, 0x9e, 0x43, 0xca, 0xd1,
+ 0xc3, 0x1c, 0xaa, 0xc3, 0x7a, 0xf0, 0x00, 0x24, 0x08, 0x46, 0x01, 0xab,
+ 0x43, 0x1c, 0xcc, 0x46, 0x01, 0xab, 0xc3, 0x1c, 0xe4, 0xc7, 0x8a, 0x59,
+ 0x00, 0x22, 0x50, 0x46, 0x01, 0xab, 0x43, 0x1c, 0xf6, 0xc6, 0xce, 0x68,
+ 0x00, 0x27, 0x4b, 0x03, 0x1d, 0x11, 0xc8, 0xba, 0xf3, 0x00, 0x25, 0x08,
+ 0xc9, 0x96, 0xa5, 0x05, 0x33, 0x59, 0xc5, 0xcc, 0x55, 0x00, 0x23, 0x58,
+ 0xcb, 0x97, 0xf8, 0x00, 0x23, 0xe8, 0xc9, 0x1e, 0x89, 0x00, 0x27, 0x29,
+ 0xc6, 0x61, 0xbc, 0x00, 0x27, 0x19, 0xc5, 0x1e, 0x64, 0x00, 0x22, 0xe8,
+ 0x46, 0x01, 0xab, 0x43, 0x1d, 0x17, 0xd9, 0x1e, 0x57, 0x00, 0x23, 0xb8,
+ 0x16, 0x43, 0x1d, 0x23, 0x47, 0x09, 0x72, 0xc3, 0x1d, 0x2d, 0xc4, 0xe5,
+ 0xfb, 0x05, 0x32, 0x08, 0x87, 0x00, 0x21, 0xb3, 0x03, 0x1d, 0x39, 0xc2,
+ 0x00, 0xc9, 0x05, 0x34, 0x28, 0x46, 0x01, 0xab, 0x43, 0x1d, 0x3f, 0xc9,
+ 0x21, 0xcb, 0x00, 0x29, 0x71, 0xcb, 0x96, 0x8d, 0x00, 0x29, 0x01, 0xc4,
+ 0x04, 0x63, 0x00, 0x28, 0x91, 0xc4, 0x13, 0xc7, 0x00, 0x26, 0x28, 0xc9,
+ 0x4d, 0x9d, 0x00, 0x29, 0x41, 0xcb, 0x96, 0x8d, 0x00, 0x29, 0x11, 0xc4,
+ 0x13, 0xc7, 0x00, 0x26, 0x49, 0xc4, 0x04, 0x63, 0x00, 0x26, 0x39, 0xc9,
+ 0x21, 0xcb, 0x00, 0x25, 0x10, 0xc2, 0x00, 0x5b, 0x00, 0x29, 0x51, 0x87,
+ 0x05, 0x34, 0x40, 0xc2, 0x00, 0x36, 0x05, 0x32, 0x10, 0xcf, 0x6b, 0x6d,
+ 0x00, 0x29, 0x30, 0x8b, 0x00, 0x20, 0xcb, 0x03, 0x1d, 0x49, 0x97, 0x00,
+ 0x20, 0x70, 0x8e, 0x05, 0x33, 0x21, 0x8f, 0x05, 0x33, 0x30, 0xc9, 0x21,
+ 0xcb, 0x00, 0x29, 0x21, 0xcb, 0x96, 0x8d, 0x00, 0x25, 0x30, 0xcf, 0x6b,
+ 0x6d, 0x00, 0x25, 0xf0, 0xc9, 0x1e, 0x89, 0x00, 0x27, 0xc1, 0xc8, 0x75,
+ 0xa2, 0x05, 0x32, 0x80, 0xc3, 0xec, 0xe7, 0x00, 0x28, 0x71, 0xc3, 0xec,
+ 0x45, 0x00, 0x28, 0x61, 0xc3, 0xcf, 0xdc, 0x00, 0x28, 0x51, 0xc3, 0xb1,
+ 0x04, 0x00, 0x28, 0x41, 0x06, 0xc3, 0x1d, 0x4f, 0xc3, 0xeb, 0x0a, 0x00,
+ 0x28, 0x20, 0xc4, 0x04, 0x63, 0x00, 0x26, 0x19, 0xc9, 0x21, 0xcb, 0x00,
+ 0x24, 0xd1, 0xcf, 0x2e, 0xd8, 0x00, 0x24, 0xe1, 0xc6, 0x03, 0x81, 0x00,
+ 0x24, 0xf0, 0xc6, 0x03, 0x81, 0x00, 0x27, 0xf1, 0xc4, 0x04, 0x63, 0x00,
+ 0x27, 0xe1, 0xc9, 0x21, 0xcb, 0x00, 0x25, 0x90, 0xc6, 0x03, 0x81, 0x00,
+ 0x24, 0x93, 0x03, 0x1d, 0x5f, 0xc9, 0x21, 0xcb, 0x00, 0x27, 0xb1, 0xc6,
+ 0x5e, 0x39, 0x00, 0x24, 0x81, 0xcb, 0x96, 0x8d, 0x00, 0x24, 0xa0, 0x06,
+ 0xc3, 0x1d, 0x65, 0xcf, 0x2e, 0xd8, 0x00, 0x25, 0xc0, 0xcb, 0x92, 0x0a,
+ 0x00, 0x27, 0x91, 0xc8, 0x1e, 0x8a, 0x00, 0x27, 0x80, 0xcf, 0x69, 0x9c,
+ 0x00, 0x27, 0x50, 0xc5, 0x1f, 0x9c, 0x00, 0x26, 0xb1, 0xc5, 0x1e, 0x64,
+ 0x00, 0x20, 0x00, 0x83, 0x05, 0x32, 0x31, 0x46, 0x3f, 0x88, 0x43, 0x1d,
+ 0x71, 0xc8, 0x1e, 0x8a, 0x00, 0x26, 0xf1, 0xc8, 0x21, 0xcc, 0x00, 0x24,
+ 0xc0, 0x46, 0x01, 0xab, 0x43, 0x1d, 0x91, 0xc9, 0x21, 0xcb, 0x00, 0x25,
+ 0x61, 0xcb, 0x96, 0x8d, 0x05, 0x34, 0x50, 0xc5, 0x67, 0x97, 0x00, 0x6c,
+ 0x39, 0xc6, 0xd3, 0x46, 0x00, 0x6c, 0x40, 0xc7, 0xcd, 0xfe, 0x00, 0x6c,
+ 0xd1, 0xc7, 0xc6, 0xb8, 0x00, 0x6c, 0xe1, 0xc7, 0xce, 0x59, 0x00, 0x6d,
+ 0x01, 0xc7, 0xca, 0xe7, 0x00, 0x6d, 0x11, 0x16, 0xc3, 0x1d, 0x9b, 0x06,
+ 0xc3, 0x1d, 0xa7, 0xc7, 0xce, 0x36, 0x00, 0x6d, 0xa1, 0xc7, 0x93, 0xa5,
+ 0x00, 0x6d, 0xb0, 0xc5, 0x67, 0x97, 0x00, 0x6c, 0x69, 0xc6, 0xd4, 0x24,
+ 0x00, 0x6c, 0x70, 0xc5, 0x67, 0x97, 0x00, 0x6c, 0x79, 0xc6, 0xd4, 0x24,
+ 0x00, 0x6c, 0x80, 0x4a, 0x9f, 0x20, 0xc3, 0x1d, 0xb3, 0xc5, 0x67, 0x97,
+ 0x00, 0x6d, 0xc0, 0xc7, 0xcd, 0x8e, 0x00, 0x6d, 0x59, 0xc7, 0xcc, 0xfb,
+ 0x00, 0x6e, 0x11, 0xc7, 0xc4, 0xf8, 0x00, 0x6e, 0x28, 0xc7, 0xc6, 0x17,
+ 0x00, 0x6d, 0x61, 0xc6, 0x93, 0xa6, 0x00, 0x6d, 0x98, 0xd2, 0x48, 0x2a,
+ 0x00, 0x6d, 0x29, 0xc5, 0x67, 0x97, 0x00, 0x6e, 0x08, 0x45, 0xdb, 0xad,
+ 0x43, 0x1d, 0xdf, 0xa3, 0x0e, 0xd5, 0x79, 0xa2, 0x0e, 0xd5, 0x71, 0xa1,
+ 0x0e, 0xd5, 0x69, 0xa0, 0x0e, 0xd5, 0x61, 0x9f, 0x0e, 0xd5, 0x59, 0x9e,
+ 0x0e, 0xd5, 0x51, 0x9d, 0x0e, 0xd5, 0x48, 0x49, 0x1d, 0x81, 0x43, 0x1d,
+ 0xf1, 0x49, 0x1d, 0x81, 0x43, 0x1d, 0xfd, 0x49, 0x1d, 0x81, 0x43, 0x1e,
+ 0x09, 0x4c, 0x8c, 0x28, 0xc3, 0x1e, 0x15, 0x87, 0x0e, 0xcd, 0x20, 0x49,
+ 0x1d, 0x81, 0x43, 0x1e, 0x21, 0x49, 0x1d, 0x81, 0x43, 0x1e, 0x2d, 0xc9,
+ 0x63, 0x21, 0x0e, 0xc8, 0xf9, 0x45, 0x00, 0x3f, 0x43, 0x1e, 0x39, 0xc8,
+ 0x3a, 0x32, 0x0e, 0xc8, 0xe9, 0xc6, 0x23, 0x24, 0x0e, 0xc8, 0xd8, 0xc8,
+ 0x3a, 0x32, 0x0e, 0xc8, 0xc9, 0xc6, 0x23, 0x24, 0x0e, 0xc8, 0xb8, 0xc7,
+ 0xc7, 0xad, 0x0e, 0xd4, 0x21, 0xc4, 0x00, 0x48, 0x0e, 0xd4, 0x08, 0xc3,
+ 0xec, 0xf6, 0x0e, 0xd3, 0xe9, 0xc3, 0xec, 0xf3, 0x0e, 0xd3, 0xe1, 0xc3,
+ 0xec, 0xf0, 0x0e, 0xd3, 0xd9, 0xc3, 0xe6, 0x4b, 0x0e, 0xd3, 0xd1, 0xc3,
+ 0xec, 0xea, 0x0e, 0xd3, 0xc9, 0xc3, 0xe6, 0x4f, 0x0e, 0xd3, 0xc1, 0xc3,
+ 0xec, 0xe4, 0x0e, 0xd3, 0xb8, 0xd0, 0x5e, 0x7f, 0x0e, 0xd2, 0xa9, 0xd0,
+ 0x58, 0x3f, 0x0e, 0xd2, 0xa0, 0xcb, 0x9b, 0x94, 0x0e, 0xd3, 0x99, 0xd0,
+ 0x58, 0xff, 0x0e, 0xd3, 0x90, 0xcc, 0x38, 0x59, 0x0e, 0xd3, 0x01, 0xcc,
+ 0x5c, 0x1f, 0x0e, 0xd2, 0xf8, 0xd5, 0x38, 0x50, 0x0e, 0xd2, 0xe1, 0xcc,
+ 0x8e, 0x44, 0x0e, 0xd2, 0xd8, 0xc9, 0xb0, 0x00, 0x0e, 0xd3, 0x39, 0x43,
+ 0x00, 0xf7, 0xc3, 0x1e, 0x45, 0xc8, 0x52, 0x93, 0x0e, 0xd3, 0x10, 0x4c,
+ 0x56, 0x20, 0xc3, 0x1e, 0x57, 0x4d, 0x4e, 0x90, 0x43, 0x1e, 0x69, 0xc6,
+ 0x30, 0x47, 0x0e, 0xca, 0xa1, 0xc6, 0x02, 0x91, 0x0e, 0xca, 0x99, 0xc6,
+ 0x23, 0x24, 0x0e, 0xca, 0x90, 0x4d, 0x4e, 0x90, 0xc3, 0x1e, 0x7b, 0x4c,
+ 0x56, 0x20, 0x43, 0x1e, 0x8d, 0x05, 0xc3, 0x1e, 0x9f, 0xc8, 0x41, 0xac,
+ 0x0e, 0xd1, 0x0a, 0x03, 0x1e, 0xab, 0xc6, 0x3a, 0x1e, 0x0e, 0xd1, 0x41,
+ 0xc8, 0x41, 0xac, 0x0e, 0xd1, 0x22, 0x03, 0x1e, 0xaf, 0xc8, 0x3a, 0x32,
+ 0x0e, 0xd0, 0xc1, 0xc6, 0x23, 0x24, 0x0e, 0xd0, 0xb8, 0xcd, 0x81, 0xb4,
+ 0x0e, 0xd0, 0xe1, 0xc5, 0x03, 0xf4, 0x0e, 0xd0, 0xd0, 0xc6, 0x07, 0xc1,
+ 0x0e, 0xd0, 0xd9, 0xc4, 0x03, 0xf5, 0x0e, 0xd0, 0xc8, 0xc3, 0x1d, 0x77,
+ 0x0e, 0xc8, 0x1b, 0x03, 0x1e, 0xb3, 0xc3, 0x00, 0x96, 0x0e, 0xc2, 0xd2,
+ 0x03, 0x1e, 0xb7, 0x00, 0x43, 0x1e, 0xbb, 0xc4, 0x02, 0xf9, 0x0e, 0xc3,
+ 0xeb, 0x03, 0x1e, 0xd9, 0xc3, 0x04, 0x64, 0x0e, 0xc3, 0x5a, 0x03, 0x1e,
+ 0xdd, 0x17, 0xc3, 0x1e, 0xe1, 0xc3, 0x60, 0x9d, 0x0e, 0xc3, 0x33, 0x03,
+ 0x1e, 0xf1, 0xc5, 0x03, 0xfb, 0x0e, 0xc3, 0xb2, 0x03, 0x1e, 0xf5, 0x00,
+ 0x43, 0x1e, 0xf9, 0xc8, 0x1d, 0x79, 0x0e, 0xd0, 0x31, 0x4d, 0x7e, 0x19,
+ 0xc3, 0x1f, 0x1d, 0x4c, 0x8d, 0x48, 0x43, 0x1f, 0x29, 0x54, 0x38, 0x8e,
+ 0xc3, 0x1f, 0x3b, 0xc6, 0xc9, 0x60, 0x0e, 0xc9, 0x48, 0x59, 0x1d, 0x76,
+ 0xc3, 0x1f, 0x47, 0x44, 0x1e, 0x66, 0x43, 0x1f, 0x53, 0x46, 0x15, 0x2e,
+ 0xc3, 0x1f, 0x63, 0x47, 0x04, 0x1b, 0xc3, 0x1f, 0x6f, 0x46, 0x00, 0x3e,
+ 0x43, 0x1f, 0x7b, 0xcf, 0x68, 0x16, 0x0e, 0xcf, 0x11, 0x46, 0x2c, 0x44,
+ 0x43, 0x1f, 0x87, 0xc5, 0x00, 0x3e, 0x0e, 0xce, 0xd9, 0x48, 0x1d, 0x82,
+ 0x43, 0x1f, 0x93, 0xc5, 0x00, 0x3e, 0x0e, 0xce, 0xd1, 0x48, 0x1d, 0x82,
+ 0x43, 0x1f, 0x9f, 0x45, 0x0e, 0xd4, 0xc3, 0x1f, 0xab, 0xc4, 0x66, 0xf5,
+ 0x0e, 0xcb, 0xb9, 0x46, 0x32, 0xdb, 0xc3, 0x1f, 0xcc, 0xc4, 0x0c, 0xa4,
+ 0x0e, 0xcb, 0x70, 0xc5, 0x15, 0x2e, 0x0e, 0xcc, 0x01, 0xc6, 0x04, 0x1b,
+ 0x0e, 0xcb, 0xf9, 0xc5, 0x00, 0x3e, 0x0e, 0xcb, 0xf0, 0xc5, 0x15, 0x2e,
+ 0x0e, 0xcb, 0xe9, 0xc6, 0x04, 0x1b, 0x0e, 0xcb, 0xe1, 0xc5, 0x00, 0x3e,
+ 0x0e, 0xcb, 0xd8, 0x43, 0x35, 0x3c, 0xc3, 0x1f, 0xde, 0xc3, 0x03, 0x02,
+ 0x0e, 0xcb, 0x98, 0x4c, 0x87, 0x24, 0xc3, 0x1f, 0xf0, 0xca, 0x98, 0x0f,
+ 0x0e, 0xcb, 0x81, 0xd1, 0x52, 0x8a, 0x0e, 0xcb, 0x78, 0xcb, 0x53, 0x7e,
+ 0x0e, 0xcb, 0x63, 0x03, 0x1f, 0xfc, 0xca, 0x98, 0x0f, 0x0e, 0xcb, 0x59,
+ 0xc8, 0x41, 0xac, 0x0e, 0xcb, 0x50, 0x47, 0x38, 0x8e, 0xc3, 0x20, 0x02,
+ 0xc6, 0xc9, 0x60, 0x0e, 0xc9, 0x40, 0x52, 0x4a, 0xb2, 0xc3, 0x20, 0x0e,
+ 0x44, 0x1e, 0x66, 0x43, 0x20, 0x1a, 0x47, 0x04, 0x1b, 0xc3, 0x20, 0x2c,
+ 0x46, 0x00, 0x3e, 0x43, 0x20, 0x38, 0x48, 0x1d, 0x82, 0xc3, 0x20, 0x44,
+ 0xc5, 0x00, 0x3e, 0x0e, 0xcc, 0xab, 0x03, 0x20, 0x50, 0xc5, 0x15, 0x2e,
+ 0x0e, 0xcc, 0xb9, 0xc6, 0x04, 0x1b, 0x0e, 0xcc, 0xb0, 0x48, 0x1d, 0x82,
+ 0xc3, 0x20, 0x56, 0xc5, 0x15, 0x2e, 0x0e, 0xcc, 0xa1, 0xc6, 0x04, 0x1b,
+ 0x0e, 0xcc, 0x99, 0xc5, 0x00, 0x3e, 0x0e, 0xcc, 0x90, 0x44, 0x0e, 0xd4,
+ 0xc3, 0x20, 0x62, 0x45, 0xa1, 0x3d, 0xc3, 0x20, 0x6c, 0x46, 0x32, 0xdb,
+ 0xc3, 0x20, 0x7e, 0xc4, 0x0c, 0xa4, 0x0e, 0xc9, 0x98, 0xc6, 0x68, 0x16,
+ 0x0e, 0xcd, 0x29, 0x46, 0x2c, 0x44, 0x43, 0x20, 0x96, 0xc5, 0x15, 0x2e,
+ 0x0e, 0xca, 0x51, 0xc6, 0x04, 0x1b, 0x0e, 0xca, 0x49, 0xc5, 0x00, 0x3e,
+ 0x0e, 0xca, 0x40, 0xc5, 0x15, 0x2e, 0x0e, 0xca, 0x39, 0xc6, 0x04, 0x1b,
+ 0x0e, 0xca, 0x31, 0xc5, 0x00, 0x3e, 0x0e, 0xca, 0x28, 0x43, 0x35, 0x3c,
+ 0xc3, 0x20, 0xa2, 0x44, 0x03, 0x6f, 0x43, 0x20, 0xb4, 0xcb, 0x53, 0x7e,
+ 0x0e, 0xc9, 0xb3, 0x03, 0x20, 0xc6, 0xca, 0x98, 0x0f, 0x0e, 0xc9, 0xa9,
+ 0xd1, 0x52, 0x8a, 0x0e, 0xc9, 0xa0, 0xcb, 0x53, 0x7e, 0x0e, 0xc9, 0x8b,
+ 0x03, 0x20, 0xcc, 0xca, 0x98, 0x0f, 0x0e, 0xc9, 0x81, 0xc8, 0x41, 0xac,
+ 0x0e, 0xc9, 0x78, 0x48, 0xbf, 0x73, 0xc3, 0x20, 0xd2, 0x45, 0xdc, 0x34,
+ 0x43, 0x20, 0xe7, 0xc5, 0x15, 0x2e, 0x0e, 0xca, 0xdb, 0x03, 0x20, 0xfc,
+ 0xc6, 0x04, 0x1b, 0x0e, 0xca, 0xd1, 0xc5, 0x00, 0x3e, 0x0e, 0xca, 0xc8,
+ 0xc5, 0x15, 0x2e, 0x0e, 0xca, 0xbb, 0x03, 0x21, 0x02, 0xc6, 0x04, 0x1b,
+ 0x0e, 0xca, 0xb1, 0xc5, 0x00, 0x3e, 0x0e, 0xca, 0xa8, 0x45, 0x0a, 0xe3,
+ 0xc3, 0x21, 0x08, 0xca, 0x63, 0x20, 0x0e, 0xc9, 0x18, 0xc7, 0xc9, 0x5f,
+ 0x0e, 0xd1, 0xe9, 0xc7, 0x29, 0xba, 0x0e, 0xd1, 0xe1, 0xc7, 0x82, 0x79,
+ 0x0e, 0xd1, 0xd8, 0xc6, 0xd8, 0xe0, 0x0e, 0xd2, 0x91, 0xc7, 0x29, 0xba,
+ 0x0e, 0xd2, 0x88, 0xc8, 0xc3, 0xb3, 0x0e, 0xd2, 0x79, 0xc7, 0x29, 0xba,
+ 0x0e, 0xd2, 0x70, 0x00, 0x43, 0x21, 0x1a, 0x00, 0x43, 0x21, 0x26, 0xc4,
+ 0x03, 0xf5, 0x0e, 0xd2, 0x19, 0xc8, 0xc3, 0xb3, 0x0e, 0xd2, 0x10, 0xc4,
+ 0x03, 0xf5, 0x0e, 0xd2, 0x01, 0xc8, 0xc3, 0xb3, 0x0e, 0xd1, 0xf8, 0xcc,
+ 0x53, 0x7d, 0x0e, 0xcf, 0xe0, 0x8e, 0x08, 0xac, 0x48, 0x94, 0x08, 0xac,
+ 0x38, 0x4c, 0x8e, 0xc8, 0xc3, 0x21, 0x32, 0xd2, 0x4f, 0x32, 0x08, 0xae,
+ 0xa1, 0xd3, 0x45, 0xfa, 0x08, 0xae, 0x99, 0x43, 0x00, 0x52, 0xc3, 0x21,
+ 0x44, 0xd0, 0x5b, 0x8f, 0x08, 0xae, 0x89, 0x50, 0x5a, 0x3f, 0x43, 0x21,
+ 0x50, 0xca, 0x84, 0x62, 0x08, 0xae, 0x80, 0x94, 0x05, 0x44, 0x48, 0x8e,
+ 0x05, 0x44, 0x58, 0xc3, 0xec, 0x6c, 0x08, 0x8e, 0xf9, 0xc3, 0xd8, 0xbc,
+ 0x08, 0x8e, 0xf0, 0xc7, 0x7d, 0xf8, 0x08, 0x8e, 0x09, 0xc7, 0x10, 0xac,
+ 0x08, 0x8c, 0x08, 0xc4, 0x21, 0x28, 0x08, 0x8e, 0x01, 0xc5, 0x45, 0xcf,
+ 0x08, 0x8c, 0x10, 0xc4, 0x15, 0xa7, 0x08, 0x8e, 0xb9, 0xc2, 0x22, 0x45,
+ 0x08, 0x8e, 0xb0, 0xc3, 0x0d, 0x8f, 0x08, 0x8e, 0xa9, 0xc3, 0x08, 0xde,
+ 0x08, 0x8e, 0xa0, 0xc4, 0x05, 0xde, 0x08, 0x8e, 0x99, 0xc2, 0x0a, 0x20,
0x08, 0x8e, 0x90, 0x97, 0x08, 0x8d, 0xf9, 0x8b, 0x08, 0x8d, 0xe9, 0x83,
- 0x08, 0x8d, 0x98, 0x8e, 0x08, 0x8d, 0xd3, 0x03, 0x1e, 0x3e, 0x94, 0x08,
- 0x8d, 0xc2, 0x03, 0x1e, 0x42, 0x97, 0x08, 0x8d, 0xb8, 0x8b, 0x08, 0x8d,
- 0xa8, 0x8e, 0x08, 0x8c, 0x5b, 0x03, 0x1e, 0x46, 0x94, 0x08, 0x8c, 0x4a,
- 0x03, 0x1e, 0x4a, 0xc2, 0x00, 0xa4, 0x08, 0x8c, 0xf1, 0x83, 0x08, 0x8c,
- 0xe8, 0xc2, 0x00, 0xa4, 0x08, 0x8c, 0xe1, 0x83, 0x08, 0x8c, 0xd8, 0x45,
- 0x01, 0xd2, 0xc3, 0x1e, 0x4e, 0xce, 0x6b, 0x2e, 0x01, 0x2f, 0x38, 0x45,
- 0x02, 0x4d, 0xc3, 0x1e, 0x5a, 0x46, 0x02, 0x12, 0x43, 0x1e, 0x66, 0xcc,
- 0x24, 0x24, 0x01, 0x17, 0x29, 0xc8, 0x07, 0x5f, 0x01, 0x14, 0x90, 0xcc,
- 0x24, 0x24, 0x01, 0x17, 0x21, 0xc8, 0x07, 0x5f, 0x01, 0x14, 0x88, 0xc7,
- 0x07, 0x09, 0x01, 0x9d, 0x01, 0xc5, 0xd8, 0xc2, 0x01, 0x9d, 0x20, 0xc8,
- 0x07, 0x08, 0x01, 0x9d, 0x78, 0xc2, 0x05, 0xd4, 0x01, 0x9a, 0x09, 0x90,
- 0x01, 0x9a, 0x10, 0xc7, 0x07, 0x09, 0x01, 0x9b, 0xc1, 0xc5, 0xd8, 0xc2,
- 0x01, 0x9b, 0xc8, 0xc5, 0xd9, 0x8f, 0x01, 0x99, 0x59, 0xc2, 0x00, 0x16,
- 0x01, 0x99, 0x60, 0xc3, 0x31, 0xd5, 0x01, 0x99, 0x79, 0x91, 0x01, 0x99,
- 0x80, 0xc3, 0xe7, 0x3c, 0x01, 0x99, 0xc1, 0xc2, 0x08, 0x22, 0x01, 0x99,
- 0xd0, 0xc5, 0xd6, 0xc4, 0x01, 0x97, 0xf9, 0xc6, 0xd1, 0xa7, 0x01, 0x9b,
- 0xd9, 0xc6, 0xcf, 0xfd, 0x01, 0x9b, 0xe1, 0xc7, 0xcb, 0x0b, 0x01, 0x9b,
- 0xe9, 0xc5, 0xdc, 0x00, 0x01, 0x9b, 0xf0, 0xc4, 0x8c, 0xa4, 0x01, 0x98,
- 0x61, 0xc4, 0xe5, 0x2f, 0x01, 0x98, 0x68, 0x05, 0xc3, 0x1e, 0x72, 0xc7,
- 0x07, 0x09, 0x01, 0x9d, 0x10, 0xc4, 0xda, 0xf7, 0x01, 0x9a, 0x19, 0xc2,
- 0x05, 0xd4, 0x01, 0x9a, 0x20, 0xc5, 0xdd, 0xb3, 0x01, 0x9a, 0x50, 0xc3,
- 0x0c, 0x46, 0x01, 0x9a, 0x60, 0xc2, 0x04, 0x2e, 0x01, 0x9e, 0x09, 0xc5,
- 0x04, 0xd4, 0x01, 0x9d, 0x3a, 0x03, 0x1e, 0x7e, 0xc7, 0x07, 0x09, 0x01,
- 0x9c, 0xf9, 0xc5, 0xd8, 0xc2, 0x01, 0x9d, 0x18, 0xc2, 0x00, 0xbf, 0x01,
- 0x3e, 0x79, 0xc3, 0x00, 0x57, 0x01, 0x3e, 0x70, 0x95, 0x0f, 0x8a, 0x11,
+ 0x08, 0x8d, 0x98, 0x8e, 0x08, 0x8d, 0xd3, 0x03, 0x21, 0x62, 0x94, 0x08,
+ 0x8d, 0xc2, 0x03, 0x21, 0x66, 0x97, 0x08, 0x8d, 0xb8, 0x8b, 0x08, 0x8d,
+ 0xa8, 0x8e, 0x08, 0x8c, 0x5b, 0x03, 0x21, 0x6a, 0x94, 0x08, 0x8c, 0x4a,
+ 0x03, 0x21, 0x6e, 0xc2, 0x01, 0x0e, 0x08, 0x8c, 0xf1, 0x83, 0x08, 0x8c,
+ 0xe8, 0xc2, 0x01, 0x0e, 0x08, 0x8c, 0xe1, 0x83, 0x08, 0x8c, 0xd8, 0x45,
+ 0x00, 0xc8, 0xc3, 0x21, 0x72, 0xce, 0x6c, 0x4f, 0x01, 0x2f, 0x38, 0x45,
+ 0x00, 0xcd, 0xc3, 0x21, 0x7e, 0x46, 0x01, 0x17, 0x43, 0x21, 0x8a, 0xcc,
+ 0x23, 0x30, 0x01, 0x17, 0x29, 0xc8, 0x08, 0xff, 0x01, 0x14, 0x90, 0xcc,
+ 0x23, 0x30, 0x01, 0x17, 0x21, 0xc8, 0x08, 0xff, 0x01, 0x14, 0x88, 0xc7,
+ 0x01, 0xe9, 0x01, 0x9d, 0x01, 0xc5, 0xde, 0xaf, 0x01, 0x9d, 0x20, 0xc8,
+ 0x01, 0xe8, 0x01, 0x9d, 0x78, 0xc2, 0x04, 0x34, 0x01, 0x9a, 0x09, 0x90,
+ 0x01, 0x9a, 0x10, 0xc7, 0x01, 0xe9, 0x01, 0x9b, 0xc1, 0xc5, 0xde, 0xaf,
+ 0x01, 0x9b, 0xc8, 0xc5, 0xe0, 0xf3, 0x01, 0x99, 0x59, 0xc2, 0x00, 0x16,
+ 0x01, 0x99, 0x60, 0xc3, 0x7e, 0x31, 0x01, 0x99, 0x79, 0x91, 0x01, 0x99,
+ 0x80, 0xc3, 0xdf, 0xad, 0x01, 0x99, 0xc1, 0xc2, 0x06, 0x82, 0x01, 0x99,
+ 0xd0, 0xc5, 0xda, 0x36, 0x01, 0x97, 0xf9, 0xc6, 0xd6, 0x16, 0x01, 0x9b,
+ 0xd9, 0xc6, 0xd4, 0x48, 0x01, 0x9b, 0xe1, 0xc7, 0xcd, 0x72, 0x01, 0x9b,
+ 0xe9, 0xc5, 0xe3, 0xf5, 0x01, 0x9b, 0xf0, 0xc4, 0x8c, 0xc4, 0x01, 0x98,
+ 0x61, 0xc4, 0xea, 0x7b, 0x01, 0x98, 0x68, 0x05, 0xc3, 0x21, 0x96, 0xc7,
+ 0x01, 0xe9, 0x01, 0x9d, 0x10, 0xc4, 0xdd, 0x1a, 0x01, 0x9a, 0x19, 0xc2,
+ 0x04, 0x34, 0x01, 0x9a, 0x20, 0xc5, 0xdc, 0x3e, 0x01, 0x9a, 0x50, 0xc3,
+ 0x0e, 0x70, 0x01, 0x9a, 0x60, 0xc2, 0x08, 0xae, 0x01, 0x9e, 0x09, 0xc5,
+ 0x03, 0xb4, 0x01, 0x9d, 0x3a, 0x03, 0x21, 0xa2, 0xc7, 0x01, 0xe9, 0x01,
+ 0x9c, 0xf9, 0xc5, 0xde, 0xaf, 0x01, 0x9d, 0x18, 0xc2, 0x00, 0xff, 0x01,
+ 0x3e, 0x79, 0xc3, 0x00, 0x3a, 0x01, 0x3e, 0x70, 0x95, 0x0f, 0x8a, 0x11,
0x94, 0x0f, 0x8a, 0x09, 0x93, 0x0f, 0x8a, 0x01, 0x92, 0x0f, 0x89, 0xf9,
0x91, 0x0f, 0x89, 0xf1, 0x90, 0x0f, 0x89, 0xe9, 0x8f, 0x0f, 0x89, 0xe1,
0x8e, 0x0f, 0x89, 0xd9, 0x8d, 0x0f, 0x89, 0xd1, 0x8c, 0x0f, 0x89, 0xc9,
@@ -15919,1244 +15968,1282 @@ uint8_t UnicodeNameToCodepointIndex_[241561] = {
0x83, 0x0f, 0x89, 0x81, 0x84, 0x0f, 0x89, 0x89, 0x85, 0x0f, 0x89, 0x91,
0x96, 0x0f, 0x8a, 0x19, 0x97, 0x0f, 0x8a, 0x21, 0x98, 0x0f, 0x8a, 0x29,
0x99, 0x0f, 0x8a, 0x31, 0x9a, 0x0f, 0x8a, 0x39, 0x9b, 0x0f, 0x8a, 0x41,
- 0x9c, 0x0f, 0x8a, 0x48, 0xc3, 0xe7, 0x2d, 0x0f, 0x91, 0xd9, 0xc3, 0xe7,
- 0x60, 0x0f, 0x91, 0x58, 0xc3, 0xe7, 0x03, 0x0f, 0x91, 0xd1, 0x1f, 0xc3,
- 0x1e, 0x84, 0x21, 0xc3, 0x1e, 0x96, 0x20, 0xc3, 0x1e, 0xa2, 0xc3, 0xe6,
- 0x88, 0x0f, 0x91, 0x61, 0xc3, 0xe6, 0xd0, 0x0f, 0x91, 0x21, 0xc3, 0xe7,
- 0x3f, 0x0f, 0x90, 0xf1, 0xc3, 0xe7, 0xc6, 0x0f, 0x90, 0xe9, 0x26, 0xc3,
- 0x1e, 0xae, 0xc3, 0xe6, 0xfd, 0x0f, 0x90, 0x88, 0x22, 0xc3, 0x1e, 0xba,
- 0xc3, 0xe6, 0xc1, 0x0f, 0x91, 0x99, 0xc3, 0xe6, 0xcd, 0x0f, 0x91, 0x91,
- 0xc3, 0xe6, 0x7c, 0x0f, 0x91, 0x09, 0xc3, 0xe7, 0x75, 0x0f, 0x90, 0xd0,
- 0x42, 0xe6, 0x7a, 0xc3, 0x1e, 0xc6, 0xc3, 0xe6, 0x73, 0x0f, 0x91, 0xa9,
- 0x1f, 0xc3, 0x1e, 0xce, 0x20, 0xc3, 0x1e, 0xe0, 0xc3, 0xe7, 0xc3, 0x0f,
- 0x91, 0x31, 0x22, 0xc3, 0x1e, 0xec, 0xc3, 0xe7, 0x1b, 0x0f, 0x90, 0xc8,
- 0xc3, 0xe6, 0x79, 0x0f, 0x91, 0x81, 0xc3, 0xe6, 0x61, 0x0f, 0x91, 0x19,
- 0xc3, 0xe6, 0xca, 0x0f, 0x90, 0xb0, 0xc2, 0xe3, 0xf6, 0x0f, 0x91, 0x69,
- 0x1d, 0xc3, 0x1e, 0xf8, 0xc2, 0xce, 0xa0, 0x0f, 0x90, 0xc1, 0xc2, 0xe7,
- 0xc7, 0x0f, 0x90, 0xa0, 0xc4, 0x04, 0x5e, 0x01, 0x20, 0x99, 0xc2, 0x01,
- 0x47, 0x01, 0x20, 0x90, 0xcb, 0x8e, 0x64, 0x01, 0x20, 0x23, 0x03, 0x1f,
- 0x00, 0xc3, 0x0a, 0xdf, 0x01, 0x20, 0x18, 0xc2, 0x00, 0xc7, 0x00, 0x43,
- 0x49, 0x83, 0x00, 0x43, 0x40, 0x10, 0xc3, 0x1f, 0x06, 0xc2, 0x1d, 0x5f,
- 0x00, 0x43, 0x11, 0xc2, 0x01, 0x29, 0x00, 0x43, 0x00, 0xc4, 0x01, 0x1e,
- 0x00, 0x38, 0x49, 0xc5, 0x01, 0xf7, 0x00, 0x38, 0x48, 0xcf, 0x37, 0x1e,
- 0x01, 0x56, 0x20, 0xcb, 0x0e, 0x83, 0x01, 0x56, 0x31, 0xce, 0x38, 0x53,
- 0x01, 0x56, 0x41, 0xcf, 0x69, 0x89, 0x01, 0x56, 0x51, 0xcc, 0x24, 0x24,
- 0x01, 0x56, 0x60, 0xc5, 0xdb, 0xc4, 0x00, 0xdc, 0x11, 0xc5, 0xd9, 0x99,
- 0x00, 0xdc, 0x08, 0xca, 0x6f, 0xa3, 0x0f, 0xb0, 0x29, 0xcc, 0x19, 0xc5,
- 0x0f, 0xb0, 0x21, 0xd3, 0x45, 0x18, 0x0f, 0xb0, 0x30, 0x45, 0x00, 0x56,
- 0x43, 0x1f, 0x10, 0xc7, 0x76, 0x66, 0x01, 0x17, 0xf1, 0x48, 0x03, 0x3b,
- 0x43, 0x1f, 0x1c, 0xc7, 0x76, 0x66, 0x01, 0x17, 0xb9, 0x48, 0x03, 0x3b,
- 0x43, 0x1f, 0x22, 0x00, 0x43, 0x1f, 0x28, 0x0b, 0xc3, 0x1f, 0x34, 0xc3,
- 0x06, 0x9e, 0x01, 0x0b, 0x98, 0x19, 0xc3, 0x1f, 0x43, 0xc2, 0x00, 0x4d,
- 0x01, 0x0b, 0xc9, 0xc4, 0x04, 0x5e, 0x01, 0x0b, 0x90, 0xc5, 0x6a, 0x79,
- 0x01, 0x0b, 0xd1, 0xc4, 0x01, 0xdc, 0x01, 0x0b, 0xb8, 0xc4, 0x18, 0x83,
- 0x01, 0x0b, 0xb1, 0xc2, 0x26, 0x51, 0x01, 0x0b, 0xa8, 0xce, 0x66, 0x33,
- 0x07, 0xf2, 0x19, 0xd2, 0x23, 0xbe, 0x07, 0xf0, 0xb8, 0xcc, 0x02, 0x53,
- 0x07, 0xf1, 0xb9, 0xcd, 0x66, 0x34, 0x07, 0xf2, 0x08, 0xc4, 0x02, 0x5b,
- 0x07, 0xf0, 0xc1, 0xc4, 0xe2, 0x5b, 0x07, 0xf0, 0xc0, 0x9d, 0x0f, 0x87,
+ 0x9c, 0x0f, 0x8a, 0x48, 0xc3, 0xea, 0xc2, 0x0f, 0x91, 0xd9, 0xc3, 0xeb,
+ 0x25, 0x0f, 0x91, 0x58, 0xc3, 0xeb, 0xf1, 0x0f, 0x91, 0xd1, 0x1f, 0xc3,
+ 0x21, 0xa8, 0x21, 0xc3, 0x21, 0xba, 0x20, 0xc3, 0x21, 0xc6, 0xc3, 0xeb,
+ 0xe2, 0x0f, 0x91, 0x61, 0xc3, 0xeb, 0x52, 0x0f, 0x91, 0x21, 0xc3, 0xea,
+ 0xc8, 0x0f, 0x90, 0xf1, 0xc3, 0xed, 0x29, 0x0f, 0x90, 0xe9, 0x26, 0xc3,
+ 0x21, 0xd2, 0xc3, 0xeb, 0x67, 0x0f, 0x90, 0x88, 0x22, 0xc3, 0x21, 0xde,
+ 0xc3, 0xeb, 0x97, 0x0f, 0x91, 0x99, 0xc3, 0xeb, 0x4c, 0x0f, 0x91, 0x91,
+ 0xc3, 0xeb, 0xaf, 0x0f, 0x91, 0x09, 0xc3, 0xea, 0xcb, 0x0f, 0x90, 0xd0,
+ 0x42, 0xeb, 0xb3, 0xc3, 0x21, 0xea, 0xc3, 0xeb, 0x2b, 0x0f, 0x91, 0xa9,
+ 0x1f, 0xc3, 0x21, 0xf2, 0x20, 0xc3, 0x22, 0x04, 0xc3, 0xed, 0x17, 0x0f,
+ 0x91, 0x31, 0x22, 0xc3, 0x22, 0x10, 0xc3, 0xeb, 0x5e, 0x0f, 0x90, 0xc8,
+ 0xc3, 0xeb, 0xb2, 0x0f, 0x91, 0x81, 0xc3, 0xec, 0x0f, 0x0f, 0x91, 0x19,
+ 0xc3, 0xeb, 0x49, 0x0f, 0x90, 0xb0, 0xc2, 0x98, 0x86, 0x0f, 0x91, 0x69,
+ 0x1d, 0xc3, 0x22, 0x1c, 0xc2, 0xe2, 0x82, 0x0f, 0x90, 0xc1, 0xc2, 0x8c,
+ 0x87, 0x0f, 0x90, 0xa0, 0xc4, 0x05, 0xde, 0x01, 0x20, 0x99, 0xc2, 0x0a,
+ 0x20, 0x01, 0x20, 0x90, 0xcb, 0x8f, 0x6b, 0x01, 0x20, 0x23, 0x03, 0x22,
+ 0x24, 0xc3, 0x02, 0xff, 0x01, 0x20, 0x18, 0xc2, 0x00, 0x96, 0x00, 0x43,
+ 0x49, 0x83, 0x00, 0x43, 0x40, 0x10, 0xc3, 0x22, 0x2a, 0xc2, 0x1a, 0x36,
+ 0x00, 0x43, 0x11, 0xc2, 0x07, 0x69, 0x00, 0x43, 0x00, 0xc4, 0x00, 0xcd,
+ 0x00, 0x38, 0x49, 0xc5, 0x00, 0x47, 0x00, 0x38, 0x48, 0xcf, 0x35, 0x23,
+ 0x01, 0x56, 0x20, 0xcb, 0x0e, 0xbc, 0x01, 0x56, 0x31, 0xce, 0x35, 0xda,
+ 0x01, 0x56, 0x41, 0xcf, 0x66, 0x45, 0x01, 0x56, 0x51, 0xcc, 0x23, 0x30,
+ 0x01, 0x56, 0x60, 0xc5, 0xe0, 0xbc, 0x00, 0xdc, 0x11, 0xc5, 0x36, 0x7f,
+ 0x00, 0xdc, 0x08, 0xca, 0x6f, 0x1e, 0x0f, 0xb0, 0x29, 0xcc, 0x1a, 0x5e,
+ 0x0f, 0xb0, 0x21, 0xd3, 0x3f, 0xbe, 0x0f, 0xb0, 0x30, 0x45, 0x00, 0x39,
+ 0x43, 0x22, 0x34, 0xc7, 0x79, 0xb4, 0x01, 0x17, 0xf1, 0x48, 0x00, 0x29,
+ 0x43, 0x22, 0x40, 0xc7, 0x79, 0xb4, 0x01, 0x17, 0xb9, 0x48, 0x00, 0x29,
+ 0x43, 0x22, 0x46, 0x00, 0x43, 0x22, 0x4c, 0x0b, 0xc3, 0x22, 0x58, 0xc3,
+ 0x08, 0xde, 0x01, 0x0b, 0x98, 0x19, 0xc3, 0x22, 0x67, 0xc2, 0x01, 0x04,
+ 0x01, 0x0b, 0xc9, 0xc4, 0x05, 0xde, 0x01, 0x0b, 0x90, 0xc5, 0x66, 0x81,
+ 0x01, 0x0b, 0xd1, 0xc4, 0x00, 0x48, 0x01, 0x0b, 0xb8, 0xc4, 0x15, 0xa7,
+ 0x01, 0x0b, 0xb1, 0xc2, 0x22, 0x45, 0x01, 0x0b, 0xa8, 0xce, 0x6a, 0x7e,
+ 0x07, 0xf2, 0x19, 0xd2, 0x23, 0x42, 0x07, 0xf0, 0xb8, 0xcc, 0x00, 0xd3,
+ 0x07, 0xf1, 0xb9, 0xcd, 0x6a, 0x7f, 0x07, 0xf2, 0x08, 0xc4, 0x00, 0xdb,
+ 0x07, 0xf0, 0xc1, 0xc4, 0xe5, 0x4f, 0x07, 0xf0, 0xc0, 0x9d, 0x0f, 0x87,
0x51, 0x9e, 0x0f, 0x87, 0x59, 0x9f, 0x0f, 0x87, 0x61, 0xa0, 0x0f, 0x87,
0x69, 0xa1, 0x0f, 0x87, 0x71, 0xa2, 0x0f, 0x87, 0x79, 0xa3, 0x0f, 0x87,
0x81, 0xa4, 0x0f, 0x87, 0x89, 0xa5, 0x0f, 0x87, 0x91, 0xa6, 0x0f, 0x87,
- 0x98, 0x9d, 0x0f, 0x87, 0xa1, 0x9e, 0x0f, 0x87, 0xa8, 0xc6, 0xd4, 0xbf,
- 0x0f, 0x85, 0x21, 0xc6, 0x7b, 0xe3, 0x0f, 0x85, 0xa1, 0xc8, 0x4a, 0x99,
- 0x0f, 0x86, 0x21, 0xc5, 0xd8, 0x31, 0x0f, 0x86, 0xa0, 0xcc, 0x81, 0xb8,
- 0x01, 0x51, 0x39, 0xd1, 0x49, 0x03, 0x01, 0x51, 0x10, 0xc5, 0x01, 0x62,
- 0x01, 0x51, 0x31, 0xc5, 0x00, 0x95, 0x01, 0x51, 0x20, 0x83, 0x01, 0x90,
- 0xb1, 0x97, 0x01, 0x90, 0xe0, 0x89, 0x08, 0xd7, 0x18, 0xc4, 0x18, 0x85,
- 0x08, 0x43, 0xf9, 0x91, 0x08, 0x43, 0xd0, 0xc2, 0x26, 0xfa, 0x08, 0x43,
- 0xb1, 0xc3, 0x1a, 0xba, 0x08, 0x43, 0x40, 0xc3, 0x0d, 0xd9, 0x08, 0x43,
- 0xa9, 0x03, 0x43, 0x1f, 0x4d, 0xc3, 0x15, 0x1d, 0x08, 0x43, 0x81, 0xc4,
- 0x37, 0x5c, 0x08, 0x43, 0x00, 0xc2, 0x00, 0x6e, 0x08, 0x43, 0x38, 0xc3,
- 0x04, 0x75, 0x01, 0x37, 0xc9, 0xc9, 0xa9, 0xa9, 0x0f, 0xa3, 0x88, 0xc8,
- 0x76, 0x58, 0x05, 0x47, 0xb9, 0x16, 0xc3, 0x1f, 0x59, 0xc6, 0x1e, 0x23,
+ 0x98, 0x9d, 0x0f, 0x87, 0xa1, 0x9e, 0x0f, 0x87, 0xa8, 0xc6, 0xd7, 0xa2,
+ 0x0f, 0x85, 0x21, 0xc6, 0x7c, 0x59, 0x0f, 0x85, 0xa1, 0xc8, 0xba, 0x2b,
+ 0x0f, 0x86, 0x21, 0xc5, 0xdd, 0xe2, 0x0f, 0x86, 0xa0, 0x83, 0x01, 0x90,
+ 0xb1, 0x97, 0x01, 0x90, 0xe0, 0x89, 0x08, 0xd7, 0x18, 0xc4, 0x15, 0xa9,
+ 0x08, 0x43, 0xf9, 0x91, 0x08, 0x43, 0xd0, 0xc2, 0x3c, 0xd1, 0x08, 0x43,
+ 0xb1, 0xc3, 0x1e, 0x54, 0x08, 0x43, 0x40, 0xc3, 0x11, 0x40, 0x08, 0x43,
+ 0xa9, 0x03, 0x43, 0x22, 0x71, 0xc3, 0x18, 0x7a, 0x08, 0x43, 0x81, 0xc4,
+ 0x32, 0xac, 0x08, 0x43, 0x00, 0xc2, 0x01, 0x47, 0x08, 0x43, 0x38, 0xc3,
+ 0x00, 0x2f, 0x01, 0x37, 0xc9, 0xc9, 0xb3, 0x84, 0x0f, 0xa3, 0x88, 0xc8,
+ 0x7d, 0xf7, 0x05, 0x47, 0xb9, 0x16, 0xc3, 0x22, 0x7d, 0xc6, 0x21, 0x26,
0x05, 0x47, 0x98, 0x91, 0x00, 0x48, 0x91, 0x87, 0x00, 0x48, 0x71, 0x83,
0x00, 0x48, 0x20, 0x8e, 0x00, 0x4b, 0x08, 0x94, 0x00, 0x4b, 0x00, 0xc2,
- 0x00, 0xa4, 0x00, 0x4a, 0xe1, 0x83, 0x00, 0x4b, 0xf0, 0x91, 0x00, 0x48,
+ 0x01, 0x0e, 0x00, 0x4a, 0xe1, 0x83, 0x00, 0x4b, 0xf0, 0x91, 0x00, 0x48,
0x89, 0x87, 0x00, 0x48, 0x69, 0x83, 0x00, 0x4b, 0x90, 0x8a, 0x08, 0x20,
0x18, 0x91, 0x08, 0x20, 0x28, 0x8a, 0x08, 0x20, 0x48, 0x91, 0x08, 0x20,
0x58, 0x8a, 0x08, 0x20, 0xf8, 0x89, 0x08, 0x21, 0x28, 0x8a, 0x08, 0x21,
0x58, 0x91, 0x08, 0x21, 0x68, 0x8a, 0x08, 0x21, 0x88, 0x91, 0x08, 0x21,
- 0x98, 0x8a, 0x08, 0x22, 0x38, 0x89, 0x08, 0x22, 0x68, 0xca, 0x03, 0x7d,
- 0x0f, 0xc4, 0x99, 0x48, 0x01, 0xef, 0x43, 0x1f, 0x65, 0xe0, 0x06, 0x47,
- 0x01, 0x5f, 0x78, 0xc5, 0x01, 0x09, 0x01, 0x0e, 0x19, 0x00, 0x43, 0x1f,
- 0x80, 0xc5, 0x01, 0x09, 0x01, 0x0e, 0x11, 0x00, 0x43, 0x1f, 0x92, 0x45,
- 0x00, 0x6c, 0xc3, 0x1f, 0x9e, 0xda, 0x1b, 0x8b, 0x01, 0x0f, 0xa9, 0xc8,
- 0xb2, 0xf2, 0x01, 0x0d, 0x39, 0xc6, 0x11, 0xa5, 0x01, 0x48, 0x99, 0xda,
- 0x1b, 0x71, 0x0f, 0xdd, 0xb8, 0xc4, 0x22, 0x71, 0x01, 0x27, 0xe9, 0xc5,
- 0x01, 0xdb, 0x01, 0x27, 0xe1, 0x15, 0xc3, 0x1f, 0xd4, 0x08, 0xc3, 0x1f,
- 0xe0, 0x16, 0xc3, 0x1f, 0xec, 0xc3, 0x01, 0xb4, 0x01, 0x27, 0xa8, 0x47,
- 0x06, 0xf1, 0xc3, 0x1f, 0xf8, 0xce, 0x36, 0x23, 0x01, 0x57, 0x18, 0xcf,
- 0x00, 0xaf, 0x01, 0x80, 0xf0, 0x02, 0xc3, 0x20, 0x04, 0xc5, 0x2b, 0x74,
- 0x01, 0x00, 0xb8, 0xc2, 0x00, 0xbf, 0x01, 0x52, 0xa1, 0xc3, 0x00, 0x57,
- 0x01, 0x52, 0x98, 0x8c, 0x01, 0x0a, 0x49, 0x8b, 0x01, 0x0a, 0x41, 0x87,
- 0x01, 0x0a, 0x39, 0x86, 0x01, 0x0a, 0x30, 0x8b, 0x01, 0x09, 0xf8, 0xc9,
- 0x0a, 0x4a, 0x01, 0x54, 0xd9, 0xcc, 0x00, 0x9b, 0x01, 0x54, 0xe0, 0xc5,
- 0x7c, 0x32, 0x01, 0x02, 0x31, 0x48, 0xbd, 0x75, 0xc3, 0x20, 0x10, 0xc8,
- 0x4f, 0x39, 0x01, 0x4c, 0x61, 0xc6, 0x01, 0x01, 0x01, 0x72, 0xb1, 0xcd,
- 0x77, 0x57, 0x01, 0x72, 0xc0, 0xd1, 0x53, 0x2c, 0x0f, 0xab, 0x51, 0xce,
- 0x6e, 0x95, 0x0f, 0xab, 0x48, 0x00, 0x43, 0x20, 0x1c, 0xc6, 0x01, 0x7a,
- 0x01, 0x2e, 0xb9, 0xc4, 0x0e, 0xa8, 0x01, 0x5f, 0x48, 0xd4, 0x3e, 0xe5,
- 0x01, 0x4e, 0x70, 0xc5, 0x01, 0x0f, 0x01, 0x5b, 0x13, 0x03, 0x20, 0x3d,
- 0xcc, 0x82, 0xb4, 0x01, 0x5b, 0x61, 0xcd, 0x79, 0x2b, 0x01, 0x5c, 0x30,
- 0x45, 0x00, 0x6c, 0xc3, 0x20, 0x41, 0xc8, 0xb2, 0xf2, 0x01, 0x48, 0x28,
- 0x44, 0x03, 0x68, 0xc3, 0x20, 0x51, 0x42, 0x01, 0xc7, 0x43, 0x20, 0x5b,
- 0xd7, 0x25, 0xc9, 0x0f, 0xc0, 0x51, 0xc3, 0x7e, 0x2f, 0x01, 0x0d, 0x60,
- 0x45, 0x04, 0x74, 0xc3, 0x20, 0x65, 0xc5, 0x01, 0x02, 0x01, 0x0c, 0xd8,
- 0xd4, 0x2d, 0x09, 0x01, 0x0f, 0xd9, 0xc9, 0xb4, 0x11, 0x01, 0x48, 0x88,
- 0xc3, 0x12, 0xec, 0x01, 0x0d, 0x1b, 0x03, 0x20, 0x71, 0x43, 0x00, 0x3e,
- 0x43, 0x20, 0x77, 0xc2, 0x00, 0xbb, 0x01, 0x0f, 0x29, 0xcc, 0x55, 0x0d,
- 0x01, 0x48, 0xf0, 0x9a, 0x01, 0x4a, 0x39, 0xcc, 0x00, 0x9b, 0x01, 0x5a,
- 0x19, 0xc8, 0xba, 0xdd, 0x01, 0x5a, 0x20, 0xcf, 0x69, 0x89, 0x01, 0x4b,
- 0xa9, 0xce, 0x38, 0x53, 0x01, 0x4b, 0xa1, 0xd5, 0x34, 0x4e, 0x01, 0x4a,
- 0x11, 0x48, 0x65, 0x33, 0x43, 0x20, 0x83, 0xe0, 0x01, 0xc7, 0x0f, 0xdd,
- 0xb0, 0x45, 0x00, 0x6c, 0xc3, 0x20, 0x8f, 0xc8, 0xb2, 0xf2, 0x01, 0x48,
- 0x38, 0xc8, 0x50, 0x00, 0x01, 0x0c, 0x39, 0xca, 0xa7, 0x16, 0x01, 0x0c,
- 0x30, 0xc8, 0x50, 0x00, 0x01, 0x0c, 0x09, 0xc7, 0x0c, 0x4b, 0x01, 0x0b,
- 0x70, 0xc3, 0x22, 0x39, 0x00, 0xb7, 0xc1, 0x85, 0x00, 0xb7, 0xb8, 0xc2,
- 0x1e, 0x62, 0x00, 0xb7, 0x39, 0xc6, 0xd4, 0x6b, 0x00, 0xb6, 0xc9, 0xc9,
- 0x24, 0x57, 0x00, 0xb6, 0x99, 0xc5, 0x74, 0xa2, 0x00, 0xb6, 0x81, 0xc5,
- 0x30, 0x88, 0x00, 0xb6, 0x61, 0xc4, 0x00, 0xe9, 0x00, 0xb6, 0x31, 0xc6,
- 0x56, 0x89, 0x00, 0xb5, 0xf9, 0xc8, 0xbc, 0xcd, 0x00, 0xb5, 0xe9, 0xc5,
- 0x72, 0x78, 0x00, 0xb5, 0x68, 0x90, 0x05, 0x28, 0x20, 0x90, 0x05, 0x2b,
- 0xa8, 0x87, 0x05, 0x28, 0x30, 0x91, 0x05, 0x2b, 0xb8, 0x87, 0x05, 0x28,
- 0x40, 0x91, 0x05, 0x2b, 0xc8, 0x87, 0x05, 0x28, 0x50, 0x91, 0x05, 0x2b,
- 0xd8, 0x87, 0x05, 0x28, 0x49, 0x90, 0x05, 0x2f, 0x68, 0x90, 0x05, 0x2a,
- 0xa8, 0x91, 0x05, 0x2b, 0xd0, 0x87, 0x05, 0x28, 0x59, 0x90, 0x05, 0x2f,
- 0x80, 0x91, 0x05, 0x2b, 0xe1, 0x90, 0x05, 0x2e, 0x40, 0x87, 0x05, 0x28,
- 0x78, 0x91, 0x05, 0x2c, 0x00, 0x87, 0x05, 0x28, 0x80, 0x87, 0x05, 0x2f,
- 0xb3, 0x03, 0x20, 0x9b, 0x8b, 0x05, 0x29, 0xb1, 0x83, 0x05, 0x2a, 0xe9,
- 0x91, 0x05, 0x2e, 0x73, 0x03, 0x20, 0x9f, 0x97, 0x05, 0x2d, 0x38, 0x91,
- 0x05, 0x2c, 0x08, 0x87, 0x05, 0x28, 0xa8, 0x91, 0x05, 0x2c, 0x31, 0x43,
- 0x00, 0x54, 0x43, 0x20, 0xa3, 0x87, 0x05, 0x28, 0xe0, 0x91, 0x05, 0x2c,
- 0x68, 0x87, 0x05, 0x30, 0x23, 0x03, 0x20, 0xc1, 0x8b, 0x05, 0x2a, 0x21,
- 0x83, 0x05, 0x2b, 0x61, 0x91, 0x05, 0x2e, 0xe3, 0x03, 0x20, 0xc9, 0x97,
- 0x05, 0x2d, 0xa8, 0x87, 0x05, 0x29, 0x18, 0x91, 0x05, 0x2c, 0xa0, 0x87,
- 0x05, 0x28, 0xb8, 0x91, 0x05, 0x2c, 0x40, 0x87, 0x05, 0x28, 0xc8, 0x91,
- 0x05, 0x2c, 0x50, 0xc5, 0x00, 0x95, 0x01, 0x57, 0x79, 0xc5, 0x01, 0x62,
- 0x01, 0x57, 0x80, 0xa5, 0x0c, 0x57, 0xf9, 0xa4, 0x0c, 0x57, 0xf1, 0xa3,
- 0x0c, 0x57, 0xe9, 0xa2, 0x0c, 0x57, 0xe1, 0xa1, 0x0c, 0x57, 0xd9, 0xa0,
- 0x0c, 0x57, 0xd1, 0x9f, 0x0c, 0x57, 0xc9, 0x9e, 0x0c, 0x57, 0xc1, 0x9d,
- 0x0c, 0x57, 0xb8, 0xa6, 0x0c, 0x57, 0xb1, 0xa5, 0x0c, 0x57, 0xa9, 0xa4,
- 0x0c, 0x57, 0xa1, 0xa3, 0x0c, 0x57, 0x99, 0xa2, 0x0c, 0x57, 0x91, 0xa1,
- 0x0c, 0x57, 0x89, 0xa0, 0x0c, 0x57, 0x81, 0x9f, 0x0c, 0x57, 0x79, 0x9e,
- 0x0c, 0x57, 0x71, 0x9d, 0x0c, 0x57, 0x68, 0xa6, 0x0c, 0x57, 0x61, 0xa5,
- 0x0c, 0x57, 0x59, 0xa4, 0x0c, 0x57, 0x51, 0xa3, 0x0c, 0x57, 0x49, 0xa2,
- 0x0c, 0x57, 0x41, 0xa1, 0x0c, 0x57, 0x39, 0xa0, 0x0c, 0x57, 0x31, 0x9f,
- 0x0c, 0x57, 0x29, 0x9e, 0x0c, 0x57, 0x21, 0x9d, 0x0c, 0x57, 0x18, 0xa6,
- 0x0c, 0x57, 0x11, 0xa5, 0x0c, 0x57, 0x09, 0xa4, 0x0c, 0x57, 0x01, 0xa3,
- 0x0c, 0x56, 0xf9, 0xa2, 0x0c, 0x56, 0xf1, 0xa1, 0x0c, 0x56, 0xe9, 0xa0,
- 0x0c, 0x56, 0xe1, 0x9f, 0x0c, 0x56, 0xd9, 0x9e, 0x0c, 0x56, 0xd1, 0x9d,
- 0x0c, 0x56, 0xc8, 0xa6, 0x0c, 0x56, 0xc1, 0xa5, 0x0c, 0x56, 0xb9, 0xa4,
- 0x0c, 0x56, 0xb1, 0xa3, 0x0c, 0x56, 0xa9, 0xa2, 0x0c, 0x56, 0xa1, 0xa1,
- 0x0c, 0x56, 0x99, 0xa0, 0x0c, 0x56, 0x91, 0x9f, 0x0c, 0x56, 0x89, 0x9e,
- 0x0c, 0x56, 0x81, 0x9d, 0x0c, 0x56, 0x78, 0xa6, 0x0c, 0x56, 0x71, 0xa5,
- 0x0c, 0x56, 0x69, 0xa4, 0x0c, 0x56, 0x61, 0xa3, 0x0c, 0x56, 0x59, 0xa2,
- 0x0c, 0x56, 0x51, 0xa1, 0x0c, 0x56, 0x49, 0xa0, 0x0c, 0x56, 0x41, 0x9f,
- 0x0c, 0x56, 0x39, 0x9e, 0x0c, 0x56, 0x31, 0x9d, 0x0c, 0x56, 0x28, 0xa6,
- 0x0c, 0x56, 0x21, 0xa5, 0x0c, 0x56, 0x19, 0xa4, 0x0c, 0x56, 0x11, 0xa3,
- 0x0c, 0x56, 0x09, 0xa2, 0x0c, 0x56, 0x01, 0xa1, 0x0c, 0x55, 0xf9, 0xa0,
- 0x0c, 0x55, 0xf1, 0x9f, 0x0c, 0x55, 0xe9, 0x9e, 0x0c, 0x55, 0xe1, 0x9d,
- 0x0c, 0x55, 0xd8, 0xa6, 0x0c, 0x55, 0xd1, 0xa5, 0x0c, 0x55, 0xc9, 0xa4,
- 0x0c, 0x55, 0xc1, 0xa3, 0x0c, 0x55, 0xb9, 0xa2, 0x0c, 0x55, 0xb1, 0xa1,
- 0x0c, 0x55, 0xa9, 0xa0, 0x0c, 0x55, 0xa1, 0x9f, 0x0c, 0x55, 0x99, 0x9e,
- 0x0c, 0x55, 0x91, 0x9d, 0x0c, 0x55, 0x88, 0xa6, 0x0c, 0x55, 0x81, 0xa5,
- 0x0c, 0x55, 0x79, 0xa4, 0x0c, 0x55, 0x71, 0xa3, 0x0c, 0x55, 0x69, 0xa2,
- 0x0c, 0x55, 0x61, 0xa1, 0x0c, 0x55, 0x59, 0xa0, 0x0c, 0x55, 0x51, 0x9f,
- 0x0c, 0x55, 0x49, 0x9e, 0x0c, 0x55, 0x41, 0x9d, 0x0c, 0x55, 0x38, 0xa6,
- 0x0c, 0x55, 0x31, 0xa5, 0x0c, 0x55, 0x29, 0xa4, 0x0c, 0x55, 0x21, 0xa3,
- 0x0c, 0x55, 0x19, 0xa2, 0x0c, 0x55, 0x11, 0xa1, 0x0c, 0x55, 0x09, 0xa0,
- 0x0c, 0x55, 0x01, 0x9f, 0x0c, 0x54, 0xf9, 0x9e, 0x0c, 0x54, 0xf1, 0x9d,
- 0x0c, 0x54, 0xe8, 0xa6, 0x0c, 0x54, 0xe1, 0xa5, 0x0c, 0x54, 0xd9, 0xa4,
- 0x0c, 0x54, 0xd1, 0xa3, 0x0c, 0x54, 0xc9, 0xa2, 0x0c, 0x54, 0xc1, 0xa1,
- 0x0c, 0x54, 0xb9, 0xa0, 0x0c, 0x54, 0xb1, 0x9f, 0x0c, 0x54, 0xa9, 0x9e,
- 0x0c, 0x54, 0xa1, 0x9d, 0x0c, 0x54, 0x98, 0xa6, 0x0c, 0x54, 0x91, 0xa5,
- 0x0c, 0x54, 0x89, 0xa4, 0x0c, 0x54, 0x81, 0xa3, 0x0c, 0x54, 0x79, 0xa2,
- 0x0c, 0x54, 0x71, 0xa1, 0x0c, 0x54, 0x69, 0xa0, 0x0c, 0x54, 0x61, 0x9f,
- 0x0c, 0x54, 0x59, 0x9e, 0x0c, 0x54, 0x51, 0x9d, 0x0c, 0x54, 0x48, 0xa6,
- 0x0c, 0x54, 0x41, 0xa5, 0x0c, 0x54, 0x39, 0xa4, 0x0c, 0x54, 0x31, 0xa3,
- 0x0c, 0x54, 0x29, 0xa2, 0x0c, 0x54, 0x21, 0xa1, 0x0c, 0x54, 0x19, 0xa0,
- 0x0c, 0x54, 0x11, 0x9f, 0x0c, 0x54, 0x09, 0x9e, 0x0c, 0x54, 0x01, 0x9d,
- 0x0c, 0x53, 0xf8, 0xa6, 0x0c, 0x53, 0xf1, 0xa5, 0x0c, 0x53, 0xe9, 0xa4,
- 0x0c, 0x53, 0xe1, 0xa3, 0x0c, 0x53, 0xd9, 0xa2, 0x0c, 0x53, 0xd1, 0xa1,
- 0x0c, 0x53, 0xc9, 0xa0, 0x0c, 0x53, 0xc1, 0x9f, 0x0c, 0x53, 0xb9, 0x9e,
- 0x0c, 0x53, 0xb1, 0x9d, 0x0c, 0x53, 0xa8, 0xa6, 0x0c, 0x53, 0xa1, 0xa5,
- 0x0c, 0x53, 0x99, 0xa4, 0x0c, 0x53, 0x91, 0xa3, 0x0c, 0x53, 0x89, 0xa2,
- 0x0c, 0x53, 0x81, 0xa1, 0x0c, 0x53, 0x79, 0xa0, 0x0c, 0x53, 0x71, 0x9f,
- 0x0c, 0x53, 0x69, 0x9e, 0x0c, 0x53, 0x61, 0x9d, 0x0c, 0x53, 0x58, 0xa6,
- 0x0c, 0x53, 0x51, 0xa5, 0x0c, 0x53, 0x49, 0xa4, 0x0c, 0x53, 0x41, 0xa3,
- 0x0c, 0x53, 0x39, 0xa2, 0x0c, 0x53, 0x31, 0xa1, 0x0c, 0x53, 0x29, 0xa0,
- 0x0c, 0x53, 0x21, 0x9f, 0x0c, 0x53, 0x19, 0x9e, 0x0c, 0x53, 0x11, 0x9d,
- 0x0c, 0x53, 0x08, 0xa6, 0x0c, 0x53, 0x01, 0xa5, 0x0c, 0x52, 0xf9, 0xa4,
- 0x0c, 0x52, 0xf1, 0xa3, 0x0c, 0x52, 0xe9, 0xa2, 0x0c, 0x52, 0xe1, 0xa1,
- 0x0c, 0x52, 0xd9, 0xa0, 0x0c, 0x52, 0xd1, 0x9f, 0x0c, 0x52, 0xc9, 0x9e,
- 0x0c, 0x52, 0xc1, 0x9d, 0x0c, 0x52, 0xb8, 0xa6, 0x0c, 0x52, 0xb1, 0xa5,
- 0x0c, 0x52, 0xa9, 0xa4, 0x0c, 0x52, 0xa1, 0xa3, 0x0c, 0x52, 0x99, 0xa2,
- 0x0c, 0x52, 0x91, 0xa1, 0x0c, 0x52, 0x89, 0xa0, 0x0c, 0x52, 0x81, 0x9f,
- 0x0c, 0x52, 0x79, 0x9e, 0x0c, 0x52, 0x71, 0x9d, 0x0c, 0x52, 0x68, 0xa6,
- 0x0c, 0x52, 0x61, 0xa5, 0x0c, 0x52, 0x59, 0xa4, 0x0c, 0x52, 0x51, 0xa3,
- 0x0c, 0x52, 0x49, 0xa2, 0x0c, 0x52, 0x41, 0xa1, 0x0c, 0x52, 0x39, 0xa0,
- 0x0c, 0x52, 0x31, 0x9f, 0x0c, 0x52, 0x29, 0x9e, 0x0c, 0x52, 0x21, 0x9d,
- 0x0c, 0x52, 0x18, 0xa6, 0x0c, 0x52, 0x11, 0xa5, 0x0c, 0x52, 0x09, 0xa4,
- 0x0c, 0x52, 0x01, 0xa3, 0x0c, 0x51, 0xf9, 0xa2, 0x0c, 0x51, 0xf1, 0xa1,
- 0x0c, 0x51, 0xe9, 0xa0, 0x0c, 0x51, 0xe1, 0x9f, 0x0c, 0x51, 0xd9, 0x9e,
- 0x0c, 0x51, 0xd1, 0x9d, 0x0c, 0x51, 0xc8, 0xa6, 0x0c, 0x51, 0xc1, 0xa5,
- 0x0c, 0x51, 0xb9, 0xa4, 0x0c, 0x51, 0xb1, 0xa3, 0x0c, 0x51, 0xa9, 0xa2,
- 0x0c, 0x51, 0xa1, 0xa1, 0x0c, 0x51, 0x99, 0xa0, 0x0c, 0x51, 0x91, 0x9f,
- 0x0c, 0x51, 0x89, 0x9e, 0x0c, 0x51, 0x81, 0x9d, 0x0c, 0x51, 0x78, 0xa6,
- 0x0c, 0x51, 0x71, 0xa5, 0x0c, 0x51, 0x69, 0xa4, 0x0c, 0x51, 0x61, 0xa3,
- 0x0c, 0x51, 0x59, 0xa2, 0x0c, 0x51, 0x51, 0xa1, 0x0c, 0x51, 0x49, 0xa0,
- 0x0c, 0x51, 0x41, 0x9f, 0x0c, 0x51, 0x39, 0x9e, 0x0c, 0x51, 0x31, 0x9d,
- 0x0c, 0x51, 0x28, 0xa6, 0x0c, 0x51, 0x21, 0xa5, 0x0c, 0x51, 0x19, 0xa4,
- 0x0c, 0x51, 0x11, 0xa3, 0x0c, 0x51, 0x09, 0xa2, 0x0c, 0x51, 0x01, 0xa1,
- 0x0c, 0x50, 0xf9, 0xa0, 0x0c, 0x50, 0xf1, 0x9f, 0x0c, 0x50, 0xe9, 0x9e,
- 0x0c, 0x50, 0xe1, 0x9d, 0x0c, 0x50, 0xd8, 0xa6, 0x0c, 0x50, 0xd1, 0xa5,
- 0x0c, 0x50, 0xc9, 0xa4, 0x0c, 0x50, 0xc1, 0xa3, 0x0c, 0x50, 0xb9, 0xa2,
- 0x0c, 0x50, 0xb1, 0xa1, 0x0c, 0x50, 0xa9, 0xa0, 0x0c, 0x50, 0xa1, 0x9f,
- 0x0c, 0x50, 0x99, 0x9e, 0x0c, 0x50, 0x91, 0x9d, 0x0c, 0x50, 0x88, 0xa6,
- 0x0c, 0x50, 0x81, 0xa5, 0x0c, 0x50, 0x79, 0xa4, 0x0c, 0x50, 0x71, 0xa3,
- 0x0c, 0x50, 0x69, 0xa2, 0x0c, 0x50, 0x61, 0xa1, 0x0c, 0x50, 0x59, 0xa0,
- 0x0c, 0x50, 0x51, 0x9f, 0x0c, 0x50, 0x49, 0x9e, 0x0c, 0x50, 0x41, 0x9d,
- 0x0c, 0x50, 0x38, 0xa6, 0x0c, 0x50, 0x31, 0xa5, 0x0c, 0x50, 0x29, 0xa4,
- 0x0c, 0x50, 0x21, 0xa3, 0x0c, 0x50, 0x19, 0xa2, 0x0c, 0x50, 0x11, 0xa1,
- 0x0c, 0x50, 0x09, 0xa0, 0x0c, 0x50, 0x01, 0x9f, 0x0c, 0x4f, 0xf9, 0x9e,
- 0x0c, 0x4f, 0xf1, 0x9d, 0x0c, 0x4f, 0xe8, 0xa6, 0x0c, 0x4f, 0xe1, 0xa5,
- 0x0c, 0x4f, 0xd9, 0xa4, 0x0c, 0x4f, 0xd1, 0xa3, 0x0c, 0x4f, 0xc9, 0xa2,
- 0x0c, 0x4f, 0xc1, 0xa1, 0x0c, 0x4f, 0xb9, 0xa0, 0x0c, 0x4f, 0xb1, 0x9f,
- 0x0c, 0x4f, 0xa9, 0x9e, 0x0c, 0x4f, 0xa1, 0x9d, 0x0c, 0x4f, 0x98, 0xa6,
- 0x0c, 0x4f, 0x91, 0xa5, 0x0c, 0x4f, 0x89, 0xa4, 0x0c, 0x4f, 0x81, 0xa3,
- 0x0c, 0x4f, 0x79, 0xa2, 0x0c, 0x4f, 0x71, 0xa1, 0x0c, 0x4f, 0x69, 0xa0,
- 0x0c, 0x4f, 0x61, 0x9f, 0x0c, 0x4f, 0x59, 0x9e, 0x0c, 0x4f, 0x51, 0x9d,
- 0x0c, 0x4f, 0x48, 0xa6, 0x0c, 0x4f, 0x41, 0xa5, 0x0c, 0x4f, 0x39, 0xa4,
- 0x0c, 0x4f, 0x31, 0xa3, 0x0c, 0x4f, 0x29, 0xa2, 0x0c, 0x4f, 0x21, 0xa1,
- 0x0c, 0x4f, 0x19, 0xa0, 0x0c, 0x4f, 0x11, 0x9f, 0x0c, 0x4f, 0x09, 0x9e,
- 0x0c, 0x4f, 0x01, 0x9d, 0x0c, 0x4e, 0xf8, 0xa6, 0x0c, 0x4e, 0xf1, 0xa5,
- 0x0c, 0x4e, 0xe9, 0xa4, 0x0c, 0x4e, 0xe1, 0xa3, 0x0c, 0x4e, 0xd9, 0xa2,
- 0x0c, 0x4e, 0xd1, 0xa1, 0x0c, 0x4e, 0xc9, 0xa0, 0x0c, 0x4e, 0xc1, 0x9f,
- 0x0c, 0x4e, 0xb9, 0x9e, 0x0c, 0x4e, 0xb1, 0x9d, 0x0c, 0x4e, 0xa8, 0xa6,
- 0x0c, 0x4e, 0xa1, 0xa5, 0x0c, 0x4e, 0x99, 0xa4, 0x0c, 0x4e, 0x91, 0xa3,
- 0x0c, 0x4e, 0x89, 0xa2, 0x0c, 0x4e, 0x81, 0xa1, 0x0c, 0x4e, 0x79, 0xa0,
- 0x0c, 0x4e, 0x71, 0x9f, 0x0c, 0x4e, 0x69, 0x9e, 0x0c, 0x4e, 0x61, 0x9d,
- 0x0c, 0x4e, 0x58, 0xa6, 0x0c, 0x4e, 0x51, 0xa5, 0x0c, 0x4e, 0x49, 0xa4,
- 0x0c, 0x4e, 0x41, 0xa3, 0x0c, 0x4e, 0x39, 0xa2, 0x0c, 0x4e, 0x31, 0xa1,
- 0x0c, 0x4e, 0x29, 0xa0, 0x0c, 0x4e, 0x21, 0x9f, 0x0c, 0x4e, 0x19, 0x9e,
- 0x0c, 0x4e, 0x11, 0x9d, 0x0c, 0x4e, 0x08, 0xa6, 0x0c, 0x4e, 0x01, 0xa5,
- 0x0c, 0x4d, 0xf9, 0xa4, 0x0c, 0x4d, 0xf1, 0xa3, 0x0c, 0x4d, 0xe9, 0xa2,
- 0x0c, 0x4d, 0xe1, 0xa1, 0x0c, 0x4d, 0xd9, 0xa0, 0x0c, 0x4d, 0xd1, 0x9f,
- 0x0c, 0x4d, 0xc9, 0x9e, 0x0c, 0x4d, 0xc1, 0x9d, 0x0c, 0x4d, 0xb8, 0xa6,
- 0x0c, 0x4d, 0xb1, 0xa5, 0x0c, 0x4d, 0xa9, 0xa4, 0x0c, 0x4d, 0xa1, 0xa3,
- 0x0c, 0x4d, 0x99, 0xa2, 0x0c, 0x4d, 0x91, 0xa1, 0x0c, 0x4d, 0x89, 0xa0,
- 0x0c, 0x4d, 0x81, 0x9f, 0x0c, 0x4d, 0x79, 0x9e, 0x0c, 0x4d, 0x71, 0x9d,
- 0x0c, 0x4d, 0x68, 0xa6, 0x0c, 0x4d, 0x61, 0xa5, 0x0c, 0x4d, 0x59, 0xa4,
- 0x0c, 0x4d, 0x51, 0xa3, 0x0c, 0x4d, 0x49, 0xa2, 0x0c, 0x4d, 0x41, 0xa1,
- 0x0c, 0x4d, 0x39, 0xa0, 0x0c, 0x4d, 0x31, 0x9f, 0x0c, 0x4d, 0x29, 0x9e,
- 0x0c, 0x4d, 0x21, 0x9d, 0x0c, 0x4d, 0x18, 0xa6, 0x0c, 0x4d, 0x11, 0xa5,
- 0x0c, 0x4d, 0x09, 0xa4, 0x0c, 0x4d, 0x01, 0xa3, 0x0c, 0x4c, 0xf9, 0xa2,
- 0x0c, 0x4c, 0xf1, 0xa1, 0x0c, 0x4c, 0xe9, 0xa0, 0x0c, 0x4c, 0xe1, 0x9f,
- 0x0c, 0x4c, 0xd9, 0x9e, 0x0c, 0x4c, 0xd1, 0x9d, 0x0c, 0x4c, 0xc8, 0xa6,
- 0x0c, 0x4c, 0xc1, 0xa5, 0x0c, 0x4c, 0xb9, 0xa4, 0x0c, 0x4c, 0xb1, 0xa3,
- 0x0c, 0x4c, 0xa9, 0xa2, 0x0c, 0x4c, 0xa1, 0xa1, 0x0c, 0x4c, 0x99, 0xa0,
- 0x0c, 0x4c, 0x91, 0x9f, 0x0c, 0x4c, 0x89, 0x9e, 0x0c, 0x4c, 0x81, 0x9d,
- 0x0c, 0x4c, 0x78, 0xa6, 0x0c, 0x4c, 0x71, 0xa5, 0x0c, 0x4c, 0x69, 0xa4,
- 0x0c, 0x4c, 0x61, 0xa3, 0x0c, 0x4c, 0x59, 0xa2, 0x0c, 0x4c, 0x51, 0xa1,
- 0x0c, 0x4c, 0x49, 0xa0, 0x0c, 0x4c, 0x41, 0x9f, 0x0c, 0x4c, 0x39, 0x9e,
- 0x0c, 0x4c, 0x31, 0x9d, 0x0c, 0x4c, 0x28, 0xa6, 0x0c, 0x4c, 0x21, 0xa5,
- 0x0c, 0x4c, 0x19, 0xa4, 0x0c, 0x4c, 0x11, 0xa3, 0x0c, 0x4c, 0x09, 0xa2,
- 0x0c, 0x4c, 0x01, 0xa1, 0x0c, 0x4b, 0xf9, 0xa0, 0x0c, 0x4b, 0xf1, 0x9f,
- 0x0c, 0x4b, 0xe9, 0x9e, 0x0c, 0x4b, 0xe1, 0x9d, 0x0c, 0x4b, 0xd8, 0xa6,
- 0x0c, 0x4b, 0xd1, 0xa5, 0x0c, 0x4b, 0xc9, 0xa4, 0x0c, 0x4b, 0xc1, 0xa3,
- 0x0c, 0x4b, 0xb9, 0xa2, 0x0c, 0x4b, 0xb1, 0xa1, 0x0c, 0x4b, 0xa9, 0xa0,
- 0x0c, 0x4b, 0xa1, 0x9f, 0x0c, 0x4b, 0x99, 0x9e, 0x0c, 0x4b, 0x91, 0x9d,
- 0x0c, 0x4b, 0x88, 0xa6, 0x0c, 0x4b, 0x81, 0xa5, 0x0c, 0x4b, 0x79, 0xa4,
- 0x0c, 0x4b, 0x71, 0xa3, 0x0c, 0x4b, 0x69, 0xa2, 0x0c, 0x4b, 0x61, 0xa1,
- 0x0c, 0x4b, 0x59, 0xa0, 0x0c, 0x4b, 0x51, 0x9f, 0x0c, 0x4b, 0x49, 0x9e,
- 0x0c, 0x4b, 0x41, 0x9d, 0x0c, 0x4b, 0x38, 0xa6, 0x0c, 0x4b, 0x31, 0xa5,
- 0x0c, 0x4b, 0x29, 0xa4, 0x0c, 0x4b, 0x21, 0xa3, 0x0c, 0x4b, 0x19, 0xa2,
- 0x0c, 0x4b, 0x11, 0xa1, 0x0c, 0x4b, 0x09, 0xa0, 0x0c, 0x4b, 0x01, 0x9f,
- 0x0c, 0x4a, 0xf9, 0x9e, 0x0c, 0x4a, 0xf1, 0x9d, 0x0c, 0x4a, 0xe8, 0xa6,
- 0x0c, 0x4a, 0xe1, 0xa5, 0x0c, 0x4a, 0xd9, 0xa4, 0x0c, 0x4a, 0xd1, 0xa3,
- 0x0c, 0x4a, 0xc9, 0xa2, 0x0c, 0x4a, 0xc1, 0xa1, 0x0c, 0x4a, 0xb9, 0xa0,
- 0x0c, 0x4a, 0xb1, 0x9f, 0x0c, 0x4a, 0xa9, 0x9e, 0x0c, 0x4a, 0xa1, 0x9d,
- 0x0c, 0x4a, 0x98, 0xa6, 0x0c, 0x4a, 0x91, 0xa5, 0x0c, 0x4a, 0x89, 0xa4,
- 0x0c, 0x4a, 0x81, 0xa3, 0x0c, 0x4a, 0x79, 0xa2, 0x0c, 0x4a, 0x71, 0xa1,
- 0x0c, 0x4a, 0x69, 0xa0, 0x0c, 0x4a, 0x61, 0x9f, 0x0c, 0x4a, 0x59, 0x9e,
- 0x0c, 0x4a, 0x51, 0x9d, 0x0c, 0x4a, 0x48, 0xa6, 0x0c, 0x4a, 0x41, 0xa5,
- 0x0c, 0x4a, 0x39, 0xa4, 0x0c, 0x4a, 0x31, 0xa3, 0x0c, 0x4a, 0x29, 0xa2,
- 0x0c, 0x4a, 0x21, 0xa1, 0x0c, 0x4a, 0x19, 0xa0, 0x0c, 0x4a, 0x11, 0x9f,
- 0x0c, 0x4a, 0x09, 0x9e, 0x0c, 0x4a, 0x01, 0x9d, 0x0c, 0x49, 0xf8, 0xa6,
- 0x0c, 0x49, 0xf1, 0xa5, 0x0c, 0x49, 0xe9, 0xa4, 0x0c, 0x49, 0xe1, 0xa3,
- 0x0c, 0x49, 0xd9, 0xa2, 0x0c, 0x49, 0xd1, 0xa1, 0x0c, 0x49, 0xc9, 0xa0,
- 0x0c, 0x49, 0xc1, 0x9f, 0x0c, 0x49, 0xb9, 0x9e, 0x0c, 0x49, 0xb1, 0x9d,
- 0x0c, 0x49, 0xa8, 0xa6, 0x0c, 0x49, 0xa1, 0xa5, 0x0c, 0x49, 0x99, 0xa4,
- 0x0c, 0x49, 0x91, 0xa3, 0x0c, 0x49, 0x89, 0xa2, 0x0c, 0x49, 0x81, 0xa1,
- 0x0c, 0x49, 0x79, 0xa0, 0x0c, 0x49, 0x71, 0x9f, 0x0c, 0x49, 0x69, 0x9e,
- 0x0c, 0x49, 0x61, 0x9d, 0x0c, 0x49, 0x58, 0xa6, 0x0c, 0x49, 0x51, 0xa5,
- 0x0c, 0x49, 0x49, 0xa4, 0x0c, 0x49, 0x41, 0xa3, 0x0c, 0x49, 0x39, 0xa2,
- 0x0c, 0x49, 0x31, 0xa1, 0x0c, 0x49, 0x29, 0xa0, 0x0c, 0x49, 0x21, 0x9f,
- 0x0c, 0x49, 0x19, 0x9e, 0x0c, 0x49, 0x11, 0x9d, 0x0c, 0x49, 0x08, 0xa6,
- 0x0c, 0x49, 0x01, 0xa5, 0x0c, 0x48, 0xf9, 0xa4, 0x0c, 0x48, 0xf1, 0xa3,
- 0x0c, 0x48, 0xe9, 0xa2, 0x0c, 0x48, 0xe1, 0xa1, 0x0c, 0x48, 0xd9, 0xa0,
- 0x0c, 0x48, 0xd1, 0x9f, 0x0c, 0x48, 0xc9, 0x9e, 0x0c, 0x48, 0xc1, 0x9d,
- 0x0c, 0x48, 0xb8, 0xa6, 0x0c, 0x48, 0xb1, 0xa5, 0x0c, 0x48, 0xa9, 0xa4,
- 0x0c, 0x48, 0xa1, 0xa3, 0x0c, 0x48, 0x99, 0xa2, 0x0c, 0x48, 0x91, 0xa1,
- 0x0c, 0x48, 0x89, 0xa0, 0x0c, 0x48, 0x81, 0x9f, 0x0c, 0x48, 0x79, 0x9e,
- 0x0c, 0x48, 0x71, 0x9d, 0x0c, 0x48, 0x68, 0xa6, 0x0c, 0x48, 0x61, 0xa5,
- 0x0c, 0x48, 0x59, 0xa4, 0x0c, 0x48, 0x51, 0xa3, 0x0c, 0x48, 0x49, 0xa2,
- 0x0c, 0x48, 0x41, 0xa1, 0x0c, 0x48, 0x39, 0xa0, 0x0c, 0x48, 0x31, 0x9f,
- 0x0c, 0x48, 0x29, 0x9e, 0x0c, 0x48, 0x21, 0x9d, 0x0c, 0x48, 0x18, 0xa6,
- 0x0c, 0x48, 0x11, 0xa5, 0x0c, 0x48, 0x09, 0xa4, 0x0c, 0x48, 0x01, 0xa3,
- 0x0c, 0x47, 0xf9, 0xa2, 0x0c, 0x47, 0xf1, 0xa1, 0x0c, 0x47, 0xe9, 0xa0,
- 0x0c, 0x47, 0xe1, 0x9f, 0x0c, 0x47, 0xd9, 0x9e, 0x0c, 0x47, 0xd1, 0x9d,
- 0x0c, 0x47, 0xc8, 0xa6, 0x0c, 0x47, 0xc1, 0xa5, 0x0c, 0x47, 0xb9, 0xa4,
- 0x0c, 0x47, 0xb1, 0xa3, 0x0c, 0x47, 0xa9, 0xa2, 0x0c, 0x47, 0xa1, 0xa1,
- 0x0c, 0x47, 0x99, 0xa0, 0x0c, 0x47, 0x91, 0x9f, 0x0c, 0x47, 0x89, 0x9e,
- 0x0c, 0x47, 0x81, 0x9d, 0x0c, 0x47, 0x78, 0xa6, 0x0c, 0x47, 0x71, 0xa5,
- 0x0c, 0x47, 0x69, 0xa4, 0x0c, 0x47, 0x61, 0xa3, 0x0c, 0x47, 0x59, 0xa2,
- 0x0c, 0x47, 0x51, 0xa1, 0x0c, 0x47, 0x49, 0xa0, 0x0c, 0x47, 0x41, 0x9f,
- 0x0c, 0x47, 0x39, 0x9e, 0x0c, 0x47, 0x31, 0x9d, 0x0c, 0x47, 0x28, 0xa6,
- 0x0c, 0x47, 0x21, 0xa5, 0x0c, 0x47, 0x19, 0xa4, 0x0c, 0x47, 0x11, 0xa3,
- 0x0c, 0x47, 0x09, 0xa2, 0x0c, 0x47, 0x01, 0xa1, 0x0c, 0x46, 0xf9, 0xa0,
- 0x0c, 0x46, 0xf1, 0x9f, 0x0c, 0x46, 0xe9, 0x9e, 0x0c, 0x46, 0xe1, 0x9d,
- 0x0c, 0x46, 0xd8, 0xa6, 0x0c, 0x46, 0xd1, 0xa5, 0x0c, 0x46, 0xc9, 0xa4,
- 0x0c, 0x46, 0xc1, 0xa3, 0x0c, 0x46, 0xb9, 0xa2, 0x0c, 0x46, 0xb1, 0xa1,
- 0x0c, 0x46, 0xa9, 0xa0, 0x0c, 0x46, 0xa1, 0x9f, 0x0c, 0x46, 0x99, 0x9e,
- 0x0c, 0x46, 0x91, 0x9d, 0x0c, 0x46, 0x88, 0xa6, 0x0c, 0x46, 0x81, 0xa5,
- 0x0c, 0x46, 0x79, 0xa4, 0x0c, 0x46, 0x71, 0xa3, 0x0c, 0x46, 0x69, 0xa2,
- 0x0c, 0x46, 0x61, 0xa1, 0x0c, 0x46, 0x59, 0xa0, 0x0c, 0x46, 0x51, 0x9f,
- 0x0c, 0x46, 0x49, 0x9e, 0x0c, 0x46, 0x41, 0x9d, 0x0c, 0x46, 0x38, 0xa6,
- 0x0c, 0x46, 0x31, 0xa5, 0x0c, 0x46, 0x29, 0xa4, 0x0c, 0x46, 0x21, 0xa3,
- 0x0c, 0x46, 0x19, 0xa2, 0x0c, 0x46, 0x11, 0xa1, 0x0c, 0x46, 0x09, 0xa0,
- 0x0c, 0x46, 0x01, 0x9f, 0x0c, 0x45, 0xf9, 0x9e, 0x0c, 0x45, 0xf1, 0x9d,
- 0x0c, 0x45, 0xe8, 0xa6, 0x0c, 0x45, 0xe1, 0xa5, 0x0c, 0x45, 0xd9, 0xa4,
- 0x0c, 0x45, 0xd1, 0xa3, 0x0c, 0x45, 0xc9, 0xa2, 0x0c, 0x45, 0xc1, 0xa1,
- 0x0c, 0x45, 0xb9, 0xa0, 0x0c, 0x45, 0xb1, 0x9f, 0x0c, 0x45, 0xa9, 0x9e,
- 0x0c, 0x45, 0xa1, 0x9d, 0x0c, 0x45, 0x98, 0xa6, 0x0c, 0x45, 0x91, 0xa5,
- 0x0c, 0x45, 0x89, 0xa4, 0x0c, 0x45, 0x81, 0xa3, 0x0c, 0x45, 0x79, 0xa2,
- 0x0c, 0x45, 0x71, 0xa1, 0x0c, 0x45, 0x69, 0xa0, 0x0c, 0x45, 0x61, 0x9f,
- 0x0c, 0x45, 0x59, 0x9e, 0x0c, 0x45, 0x51, 0x9d, 0x0c, 0x45, 0x48, 0xa6,
- 0x0c, 0x45, 0x41, 0xa5, 0x0c, 0x45, 0x39, 0xa4, 0x0c, 0x45, 0x31, 0xa3,
- 0x0c, 0x45, 0x29, 0xa2, 0x0c, 0x45, 0x21, 0xa1, 0x0c, 0x45, 0x19, 0xa0,
- 0x0c, 0x45, 0x11, 0x9f, 0x0c, 0x45, 0x09, 0x9e, 0x0c, 0x45, 0x01, 0x9d,
- 0x0c, 0x44, 0xf8, 0xa6, 0x0c, 0x44, 0xf1, 0xa5, 0x0c, 0x44, 0xe9, 0xa4,
- 0x0c, 0x44, 0xe1, 0xa3, 0x0c, 0x44, 0xd9, 0xa2, 0x0c, 0x44, 0xd1, 0xa1,
- 0x0c, 0x44, 0xc9, 0xa0, 0x0c, 0x44, 0xc1, 0x9f, 0x0c, 0x44, 0xb9, 0x9e,
- 0x0c, 0x44, 0xb1, 0x9d, 0x0c, 0x44, 0xa8, 0xa6, 0x0c, 0x44, 0xa1, 0xa5,
- 0x0c, 0x44, 0x99, 0xa4, 0x0c, 0x44, 0x91, 0xa3, 0x0c, 0x44, 0x89, 0xa2,
- 0x0c, 0x44, 0x81, 0xa1, 0x0c, 0x44, 0x79, 0xa0, 0x0c, 0x44, 0x71, 0x9f,
- 0x0c, 0x44, 0x69, 0x9e, 0x0c, 0x44, 0x61, 0x9d, 0x0c, 0x44, 0x58, 0xa6,
- 0x0c, 0x44, 0x51, 0xa5, 0x0c, 0x44, 0x49, 0xa4, 0x0c, 0x44, 0x41, 0xa3,
- 0x0c, 0x44, 0x39, 0xa2, 0x0c, 0x44, 0x31, 0xa1, 0x0c, 0x44, 0x29, 0xa0,
- 0x0c, 0x44, 0x21, 0x9f, 0x0c, 0x44, 0x19, 0x9e, 0x0c, 0x44, 0x11, 0x9d,
- 0x0c, 0x44, 0x08, 0xa6, 0x0c, 0x44, 0x01, 0xa5, 0x0c, 0x43, 0xf9, 0xa4,
- 0x0c, 0x43, 0xf1, 0xa3, 0x0c, 0x43, 0xe9, 0xa2, 0x0c, 0x43, 0xe1, 0xa1,
- 0x0c, 0x43, 0xd9, 0xa0, 0x0c, 0x43, 0xd1, 0x9f, 0x0c, 0x43, 0xc9, 0x9e,
- 0x0c, 0x43, 0xc1, 0x9d, 0x0c, 0x43, 0xb8, 0xa6, 0x0c, 0x43, 0xb1, 0xa5,
- 0x0c, 0x43, 0xa9, 0xa4, 0x0c, 0x43, 0xa1, 0xa3, 0x0c, 0x43, 0x99, 0xa2,
- 0x0c, 0x43, 0x91, 0xa1, 0x0c, 0x43, 0x89, 0xa0, 0x0c, 0x43, 0x81, 0x9f,
- 0x0c, 0x43, 0x79, 0x9e, 0x0c, 0x43, 0x71, 0x9d, 0x0c, 0x43, 0x68, 0xa6,
- 0x0c, 0x43, 0x61, 0xa5, 0x0c, 0x43, 0x59, 0xa4, 0x0c, 0x43, 0x51, 0xa3,
- 0x0c, 0x43, 0x49, 0xa2, 0x0c, 0x43, 0x41, 0xa1, 0x0c, 0x43, 0x39, 0xa0,
- 0x0c, 0x43, 0x31, 0x9f, 0x0c, 0x43, 0x29, 0x9e, 0x0c, 0x43, 0x21, 0x9d,
- 0x0c, 0x43, 0x18, 0xa6, 0x0c, 0x43, 0x11, 0xa5, 0x0c, 0x43, 0x09, 0xa4,
- 0x0c, 0x43, 0x01, 0xa3, 0x0c, 0x42, 0xf9, 0xa2, 0x0c, 0x42, 0xf1, 0xa1,
- 0x0c, 0x42, 0xe9, 0xa0, 0x0c, 0x42, 0xe1, 0x9f, 0x0c, 0x42, 0xd9, 0x9e,
- 0x0c, 0x42, 0xd1, 0x9d, 0x0c, 0x42, 0xc8, 0xa6, 0x0c, 0x42, 0xc1, 0xa5,
- 0x0c, 0x42, 0xb9, 0xa4, 0x0c, 0x42, 0xb1, 0xa3, 0x0c, 0x42, 0xa9, 0xa2,
- 0x0c, 0x42, 0xa1, 0xa1, 0x0c, 0x42, 0x99, 0xa0, 0x0c, 0x42, 0x91, 0x9f,
- 0x0c, 0x42, 0x89, 0x9e, 0x0c, 0x42, 0x81, 0x9d, 0x0c, 0x42, 0x78, 0xa6,
- 0x0c, 0x42, 0x71, 0xa5, 0x0c, 0x42, 0x69, 0xa4, 0x0c, 0x42, 0x61, 0xa3,
- 0x0c, 0x42, 0x59, 0xa2, 0x0c, 0x42, 0x51, 0xa1, 0x0c, 0x42, 0x49, 0xa0,
- 0x0c, 0x42, 0x41, 0x9f, 0x0c, 0x42, 0x39, 0x9e, 0x0c, 0x42, 0x31, 0x9d,
- 0x0c, 0x42, 0x28, 0xa6, 0x0c, 0x42, 0x21, 0xa5, 0x0c, 0x42, 0x19, 0xa4,
- 0x0c, 0x42, 0x11, 0xa3, 0x0c, 0x42, 0x09, 0xa2, 0x0c, 0x42, 0x01, 0xa1,
- 0x0c, 0x41, 0xf9, 0xa0, 0x0c, 0x41, 0xf1, 0x9f, 0x0c, 0x41, 0xe9, 0x9e,
- 0x0c, 0x41, 0xe1, 0x9d, 0x0c, 0x41, 0xd8, 0xa6, 0x0c, 0x41, 0xd1, 0xa5,
- 0x0c, 0x41, 0xc9, 0xa4, 0x0c, 0x41, 0xc1, 0xa3, 0x0c, 0x41, 0xb9, 0xa2,
- 0x0c, 0x41, 0xb1, 0xa1, 0x0c, 0x41, 0xa9, 0xa0, 0x0c, 0x41, 0xa1, 0x9f,
- 0x0c, 0x41, 0x99, 0x9e, 0x0c, 0x41, 0x91, 0x9d, 0x0c, 0x41, 0x88, 0xa6,
- 0x0c, 0x41, 0x81, 0xa5, 0x0c, 0x41, 0x79, 0xa4, 0x0c, 0x41, 0x71, 0xa3,
- 0x0c, 0x41, 0x69, 0xa2, 0x0c, 0x41, 0x61, 0xa1, 0x0c, 0x41, 0x59, 0xa0,
- 0x0c, 0x41, 0x51, 0x9f, 0x0c, 0x41, 0x49, 0x9e, 0x0c, 0x41, 0x41, 0x9d,
- 0x0c, 0x41, 0x38, 0xa6, 0x0c, 0x41, 0x31, 0xa5, 0x0c, 0x41, 0x29, 0xa4,
- 0x0c, 0x41, 0x21, 0xa3, 0x0c, 0x41, 0x19, 0xa2, 0x0c, 0x41, 0x11, 0xa1,
- 0x0c, 0x41, 0x09, 0xa0, 0x0c, 0x41, 0x01, 0x9f, 0x0c, 0x40, 0xf9, 0x9e,
- 0x0c, 0x40, 0xf1, 0x9d, 0x0c, 0x40, 0xe8, 0xa6, 0x0c, 0x40, 0xe1, 0xa5,
- 0x0c, 0x40, 0xd9, 0xa4, 0x0c, 0x40, 0xd1, 0xa3, 0x0c, 0x40, 0xc9, 0xa2,
- 0x0c, 0x40, 0xc1, 0xa1, 0x0c, 0x40, 0xb9, 0xa0, 0x0c, 0x40, 0xb1, 0x9f,
- 0x0c, 0x40, 0xa9, 0x9e, 0x0c, 0x40, 0xa1, 0x9d, 0x0c, 0x40, 0x98, 0xa6,
- 0x0c, 0x40, 0x91, 0xa5, 0x0c, 0x40, 0x89, 0xa4, 0x0c, 0x40, 0x81, 0xa3,
- 0x0c, 0x40, 0x79, 0xa2, 0x0c, 0x40, 0x71, 0xa1, 0x0c, 0x40, 0x69, 0xa0,
- 0x0c, 0x40, 0x61, 0x9f, 0x0c, 0x40, 0x59, 0x9e, 0x0c, 0x40, 0x51, 0x9d,
- 0x0c, 0x40, 0x48, 0xa6, 0x0c, 0x40, 0x41, 0xa5, 0x0c, 0x40, 0x39, 0xa4,
- 0x0c, 0x40, 0x31, 0xa3, 0x0c, 0x40, 0x29, 0xa2, 0x0c, 0x40, 0x21, 0xa1,
- 0x0c, 0x40, 0x19, 0xa0, 0x0c, 0x40, 0x11, 0x9f, 0x0c, 0x40, 0x09, 0x9e,
- 0x0c, 0x40, 0x00, 0xc2, 0x00, 0xb3, 0x0b, 0x55, 0xc1, 0x83, 0x0b, 0x55,
- 0x78, 0x83, 0x0b, 0x55, 0xa1, 0x44, 0x2c, 0x01, 0x43, 0x20, 0xcd, 0x17,
- 0xc3, 0x20, 0xd9, 0x9a, 0x0b, 0x54, 0x79, 0x93, 0x0b, 0x54, 0x71, 0x85,
- 0x0b, 0x54, 0x69, 0x9c, 0x0b, 0x54, 0x60, 0x9a, 0x0b, 0x54, 0xb9, 0x93,
- 0x0b, 0x54, 0xb1, 0x9c, 0x0b, 0x54, 0xa9, 0x85, 0x0b, 0x54, 0xa0, 0x9a,
- 0x0b, 0x54, 0x59, 0x93, 0x0b, 0x54, 0x51, 0x85, 0x0b, 0x54, 0x49, 0x9c,
- 0x0b, 0x54, 0x40, 0xc8, 0xbb, 0xc5, 0x08, 0xff, 0x89, 0xc6, 0xcd, 0xa5,
- 0x08, 0xff, 0x00, 0xc5, 0x44, 0x7b, 0x00, 0x5c, 0x19, 0xc4, 0x0f, 0x7c,
- 0x00, 0x5e, 0x68, 0xc3, 0x78, 0xc5, 0x08, 0xff, 0x11, 0xc4, 0xc4, 0x41,
- 0x08, 0xfe, 0xd0, 0xc4, 0x6d, 0x2f, 0x08, 0xff, 0x09, 0xc3, 0x00, 0xc1,
- 0x08, 0xfe, 0xf1, 0xc6, 0xd1, 0x71, 0x08, 0xfe, 0xd8, 0x83, 0x00, 0x5d,
- 0x19, 0xc2, 0x00, 0xc1, 0x00, 0x5d, 0x48, 0x83, 0x00, 0x5d, 0x99, 0xc2,
- 0x00, 0xc7, 0x00, 0x5d, 0xa0, 0xcb, 0x81, 0x64, 0x08, 0xfe, 0x29, 0xd9,
- 0x1f, 0x3c, 0x08, 0xfe, 0x00, 0x9f, 0x08, 0xfe, 0x51, 0x9e, 0x08, 0xfe,
- 0x48, 0xa2, 0x00, 0xd3, 0xc9, 0xa1, 0x00, 0xd3, 0xc1, 0xa0, 0x00, 0xd3,
- 0xb8, 0xc2, 0x00, 0xc7, 0x00, 0xd2, 0xb1, 0xc2, 0x02, 0x59, 0x00, 0xd2,
- 0xa8, 0xc2, 0x00, 0xa4, 0x00, 0xd1, 0xe9, 0x83, 0x00, 0xd1, 0xd8, 0xc2,
- 0x00, 0xa4, 0x00, 0xd1, 0xa9, 0x83, 0x00, 0xd1, 0xa0, 0xc2, 0x00, 0xa4,
- 0x00, 0xd1, 0x59, 0x83, 0x00, 0xd1, 0x48, 0xc2, 0x00, 0xa4, 0x00, 0xd1,
- 0x29, 0xc2, 0x96, 0xd0, 0x00, 0xd1, 0x21, 0x83, 0x00, 0xd1, 0x18, 0xc2,
- 0x00, 0xf6, 0x05, 0x54, 0x29, 0x91, 0x05, 0x54, 0x18, 0xc2, 0x00, 0xf6,
- 0x05, 0x54, 0x21, 0x91, 0x05, 0x54, 0x10, 0x00, 0xc3, 0x20, 0xe9, 0xc3,
- 0x42, 0x1e, 0x00, 0x72, 0xd8, 0xc2, 0x00, 0x4d, 0x00, 0x70, 0x99, 0x97,
- 0x00, 0x70, 0xc8, 0x89, 0x00, 0x70, 0x50, 0x15, 0xc3, 0x20, 0xf5, 0xc4,
- 0xe0, 0xf3, 0x00, 0x71, 0x48, 0x83, 0x00, 0x71, 0x83, 0x03, 0x21, 0x05,
- 0x8b, 0x00, 0x71, 0xa3, 0x03, 0x21, 0x17, 0x97, 0x00, 0x71, 0xc3, 0x03,
- 0x21, 0x1b, 0x87, 0x00, 0x72, 0x01, 0x91, 0x00, 0x72, 0x10, 0xc3, 0x00,
- 0x34, 0x00, 0x70, 0x69, 0xc2, 0x08, 0x0e, 0x00, 0x71, 0x10, 0xc5, 0xd8,
- 0x63, 0x00, 0x70, 0x79, 0xc3, 0x9c, 0x92, 0x00, 0x70, 0xa8, 0x42, 0x01,
- 0x48, 0xc3, 0x21, 0x26, 0xc9, 0xad, 0x51, 0x00, 0x72, 0x60, 0x42, 0x01,
- 0x48, 0xc3, 0x21, 0x38, 0xc5, 0xdd, 0x8b, 0x00, 0x71, 0xd0, 0x90, 0x00,
- 0x70, 0xf8, 0x00, 0xc3, 0x21, 0x44, 0xc5, 0xdf, 0x2f, 0x00, 0x72, 0x31,
- 0xc6, 0xcf, 0xdf, 0x00, 0x72, 0x38, 0xc4, 0x04, 0xb5, 0x00, 0x71, 0x29,
- 0xc5, 0xd7, 0x78, 0x00, 0x71, 0x60, 0x91, 0x0f, 0x15, 0x48, 0x97, 0x0f,
- 0x15, 0x20, 0x94, 0x00, 0x60, 0x5b, 0x03, 0x21, 0x5a, 0x8e, 0x00, 0x60,
- 0x62, 0x03, 0x21, 0x5e, 0xcb, 0x91, 0x19, 0x00, 0x62, 0xe8, 0x83, 0x00,
- 0x60, 0xf9, 0xc2, 0x00, 0xa4, 0x00, 0x61, 0x00, 0x83, 0x00, 0x61, 0x09,
- 0xc2, 0x00, 0xa4, 0x00, 0x61, 0x10, 0x83, 0x00, 0x61, 0x89, 0xc2, 0x02,
- 0x59, 0x00, 0x62, 0xd0, 0x83, 0x00, 0x61, 0x99, 0xc2, 0x00, 0xc7, 0x00,
- 0x61, 0xa0, 0x8e, 0x08, 0xa4, 0x50, 0x94, 0x08, 0xa4, 0x40, 0xcb, 0x92,
- 0x63, 0x00, 0x7e, 0x51, 0xcb, 0x94, 0x52, 0x00, 0x7e, 0x59, 0xcb, 0x9b,
- 0x06, 0x00, 0x7e, 0x60, 0x09, 0xc3, 0x21, 0x62, 0xc8, 0xb7, 0xed, 0x00,
- 0x78, 0xf8, 0x09, 0xc3, 0x21, 0x74, 0xc9, 0xb3, 0xe4, 0x00, 0x7e, 0x70,
- 0x83, 0x00, 0x7c, 0xd1, 0xc2, 0x00, 0xa4, 0x00, 0x7c, 0xd8, 0x83, 0x00,
- 0x7d, 0x49, 0xc2, 0x00, 0xa4, 0x00, 0x7d, 0x50, 0x83, 0x00, 0x7c, 0xe1,
- 0xc2, 0x00, 0xa4, 0x00, 0x7c, 0xe8, 0x83, 0x00, 0x7d, 0x59, 0xc2, 0x00,
- 0xa4, 0x00, 0x7d, 0x60, 0xcc, 0x8b, 0x60, 0x00, 0x78, 0x11, 0xcd, 0x79,
- 0x6c, 0x00, 0x78, 0x18, 0x8a, 0x01, 0x69, 0xa0, 0x8a, 0x01, 0x69, 0xd0,
- 0x8a, 0x01, 0x69, 0xf8, 0x44, 0x1e, 0x69, 0xc3, 0x21, 0x86, 0xc2, 0x26,
- 0xfa, 0x00, 0x46, 0x98, 0xc2, 0x26, 0xfa, 0x00, 0x47, 0x99, 0x44, 0x1e,
- 0x69, 0x43, 0x21, 0xa2, 0xc9, 0xaf, 0x5b, 0x00, 0x47, 0x09, 0xc2, 0x01,
- 0xf2, 0x00, 0x46, 0xa9, 0xc3, 0x00, 0x4c, 0x00, 0x36, 0xe0, 0xce, 0x6d,
- 0xd1, 0x00, 0x47, 0x01, 0xc8, 0xb9, 0x7d, 0x00, 0x46, 0x50, 0xcb, 0x62,
- 0xb2, 0x00, 0x46, 0xc0, 0x8a, 0x00, 0x46, 0x69, 0xc2, 0x00, 0x34, 0x00,
- 0x30, 0xb8, 0xdb, 0x17, 0xb9, 0x00, 0x46, 0x58, 0xc4, 0x44, 0xbd, 0x00,
- 0x37, 0x21, 0x45, 0x2d, 0xff, 0x43, 0x21, 0xb8, 0xc9, 0x04, 0x5e, 0x00,
- 0x36, 0xd9, 0xc2, 0x02, 0x98, 0x00, 0x30, 0xa8, 0xc7, 0xc3, 0xf6, 0x00,
- 0x36, 0xc9, 0x48, 0x19, 0x70, 0x43, 0x21, 0xc4, 0xc5, 0x01, 0x62, 0x00,
- 0x46, 0x81, 0xcd, 0x05, 0x7a, 0x07, 0xf3, 0xf1, 0xcb, 0x66, 0x54, 0x07,
- 0xf3, 0xf8, 0x4b, 0x08, 0x69, 0xc3, 0x21, 0xd6, 0xc5, 0x01, 0x62, 0x07,
- 0xdd, 0xa9, 0xc5, 0x00, 0x95, 0x07, 0xdd, 0xa0, 0x53, 0x24, 0xc0, 0xc3,
- 0x21, 0xe2, 0xc5, 0x01, 0x62, 0x07, 0xdd, 0xb9, 0xc5, 0x00, 0x95, 0x07,
- 0xdd, 0xb0, 0xc5, 0x01, 0x62, 0x07, 0xdd, 0x99, 0xc5, 0x00, 0x95, 0x07,
- 0xdd, 0x90, 0xd0, 0x58, 0xf2, 0x00, 0x37, 0xf1, 0xc9, 0x37, 0x30, 0x00,
- 0x37, 0xe8, 0xda, 0x1a, 0x6d, 0x00, 0x30, 0x81, 0xc4, 0xe4, 0xf7, 0x00,
- 0x30, 0x21, 0xc3, 0xaf, 0x61, 0x00, 0x30, 0x19, 0xc3, 0x3b, 0xca, 0x00,
- 0x30, 0x08, 0x4d, 0x08, 0x1a, 0xc3, 0x21, 0xee, 0x45, 0x19, 0x9d, 0xc3,
- 0x21, 0xfa, 0x44, 0x19, 0xa7, 0xc3, 0x22, 0x04, 0x44, 0x2e, 0x60, 0x43,
- 0x22, 0x0e, 0x44, 0x2e, 0x60, 0xc3, 0x22, 0x1a, 0x4d, 0x08, 0x1a, 0xc3,
- 0x22, 0x26, 0x45, 0x19, 0x9d, 0xc3, 0x22, 0x32, 0x45, 0x2d, 0xfe, 0x43,
- 0x22, 0x3c, 0xd1, 0x55, 0x90, 0x07, 0xe2, 0xa1, 0xda, 0x1c, 0x27, 0x07,
- 0xe2, 0x99, 0x45, 0x19, 0x9d, 0xc3, 0x22, 0x46, 0x46, 0x2d, 0xfe, 0xc3,
- 0x22, 0x50, 0xdd, 0x12, 0x1f, 0x07, 0xe6, 0xc8, 0x49, 0xb1, 0xe3, 0xc3,
- 0x22, 0x5c, 0x4a, 0xa6, 0xa8, 0x43, 0x22, 0x84, 0x4d, 0x08, 0x1a, 0xc3,
- 0x22, 0x9c, 0x45, 0x19, 0x9d, 0xc3, 0x22, 0xa8, 0x45, 0x51, 0xe9, 0xc3,
- 0x22, 0xb8, 0x0a, 0xc3, 0x22, 0xc8, 0x45, 0x2d, 0xfe, 0xc3, 0x22, 0xd4,
- 0x44, 0x6f, 0x91, 0xc3, 0x22, 0xe4, 0x44, 0x2e, 0x60, 0x43, 0x22, 0xf0,
- 0x47, 0x08, 0x94, 0xc3, 0x22, 0xfc, 0x0e, 0x43, 0x23, 0x20, 0xcd, 0x05,
- 0x7a, 0x07, 0xe7, 0xd1, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0xb0, 0x0b, 0xc3,
- 0x23, 0x2a, 0x45, 0x00, 0x6c, 0x43, 0x23, 0x36, 0xcc, 0x05, 0x7b, 0x07,
- 0xe1, 0x59, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0xe0, 0xca, 0x2b, 0x13, 0x07,
- 0xe8, 0xa9, 0xcd, 0x05, 0x7a, 0x07, 0xe7, 0xc8, 0x4d, 0x08, 0x1a, 0xc3,
- 0x23, 0x48, 0x45, 0x19, 0x9d, 0xc3, 0x23, 0x54, 0x45, 0x2d, 0xfe, 0xc3,
- 0x23, 0x5e, 0x44, 0x2e, 0x60, 0x43, 0x23, 0x68, 0x43, 0x08, 0x1c, 0xc3,
- 0x23, 0x74, 0x43, 0x14, 0x3a, 0xc3, 0x23, 0x80, 0xd1, 0x52, 0x93, 0x07,
- 0xef, 0x90, 0x47, 0x0f, 0x17, 0xc3, 0x23, 0x90, 0xd2, 0x48, 0x84, 0x07,
- 0xea, 0x70, 0x48, 0xac, 0x68, 0xc3, 0x23, 0xa8, 0x46, 0x3f, 0x36, 0x43,
- 0x23, 0xd8, 0x44, 0x2e, 0x60, 0xc3, 0x23, 0xde, 0x4d, 0x08, 0x1a, 0xc3,
- 0x23, 0xea, 0xcf, 0x62, 0x81, 0x07, 0xe3, 0x99, 0x45, 0x19, 0x9d, 0xc3,
- 0x23, 0xf6, 0xcf, 0x69, 0x5c, 0x07, 0xe3, 0x89, 0xce, 0x6f, 0x91, 0x07,
- 0xe3, 0x81, 0x45, 0x51, 0xe9, 0xc3, 0x24, 0x0c, 0x0a, 0xc3, 0x24, 0x16,
- 0x45, 0x2d, 0xfe, 0x43, 0x24, 0x22, 0x43, 0x2e, 0x61, 0xc3, 0x24, 0x2c,
- 0x03, 0x43, 0x24, 0x38, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x81, 0x0b, 0xc3,
- 0x24, 0x44, 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x99, 0x45, 0x00, 0x6c, 0x43,
- 0x24, 0x50, 0xcd, 0x05, 0x7a, 0x07, 0xe2, 0xd1, 0xca, 0x2b, 0x13, 0x07,
- 0xe4, 0xb0, 0xcd, 0x05, 0x7a, 0x07, 0xe2, 0xc9, 0xca, 0x2b, 0x13, 0x07,
- 0xe4, 0xa8, 0xcc, 0x05, 0x7b, 0x07, 0xe2, 0xb9, 0xcb, 0x12, 0x31, 0x07,
- 0xe6, 0xe0, 0x0b, 0xc3, 0x24, 0x5c, 0xd3, 0x43, 0xe8, 0x07, 0xed, 0x78,
- 0x43, 0x2e, 0x61, 0xc3, 0x24, 0x68, 0x43, 0x02, 0x98, 0x43, 0x24, 0x74,
- 0xcd, 0x05, 0x7a, 0x07, 0xe2, 0x81, 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x78,
- 0xcd, 0x05, 0x7a, 0x07, 0xe2, 0x79, 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x70,
- 0x0b, 0xc3, 0x24, 0x7e, 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x61, 0x45, 0x00,
- 0x6c, 0xc3, 0x24, 0x8a, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x70, 0xcc, 0x05,
- 0x7b, 0x07, 0xe2, 0x69, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0xa0, 0x0b, 0xc3,
- 0x24, 0x96, 0x45, 0x00, 0x6c, 0x43, 0x24, 0xa2, 0x45, 0x19, 0x9d, 0xc3,
- 0x24, 0xba, 0x44, 0x0f, 0x69, 0xc3, 0x24, 0xd0, 0x44, 0x2e, 0x60, 0xc3,
- 0x24, 0xe0, 0x45, 0x08, 0x1a, 0xc3, 0x24, 0xec, 0x46, 0x51, 0xe9, 0xc3,
- 0x24, 0xfe, 0x45, 0x51, 0xea, 0xc3, 0x25, 0x0a, 0x46, 0x2d, 0xfe, 0x43,
- 0x25, 0x16, 0x46, 0x55, 0x4c, 0xc3, 0x25, 0x22, 0xd1, 0x54, 0x91, 0x07,
- 0xe0, 0xd1, 0x46, 0x2d, 0xfe, 0xc3, 0x25, 0x2e, 0x4d, 0x08, 0x1a, 0xc3,
- 0x25, 0x3a, 0x44, 0x2e, 0x60, 0x43, 0x25, 0x46, 0xca, 0x2b, 0x13, 0x07,
- 0xe4, 0x39, 0xcd, 0x05, 0x7a, 0x07, 0xe2, 0x20, 0x48, 0x08, 0x1f, 0xc3,
- 0x25, 0x52, 0x45, 0x00, 0x6c, 0xc3, 0x25, 0x5e, 0xcd, 0x05, 0x7a, 0x07,
- 0xf7, 0xd9, 0xca, 0x2b, 0x13, 0x07, 0xf7, 0xe0, 0xca, 0x2b, 0x13, 0x07,
- 0xe4, 0x29, 0x0b, 0xc3, 0x25, 0x6a, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x69,
- 0x45, 0x00, 0x6c, 0x43, 0x25, 0x76, 0x0b, 0xc3, 0x25, 0x82, 0x4a, 0x73,
- 0x4d, 0x43, 0x25, 0x8e, 0x43, 0x02, 0x98, 0xc3, 0x25, 0x9a, 0xcf, 0x64,
- 0x16, 0x07, 0xe6, 0x68, 0x0b, 0xc3, 0x25, 0xa4, 0x45, 0x00, 0x6c, 0x43,
- 0x25, 0xb0, 0x47, 0x0f, 0x62, 0xc3, 0x25, 0xc2, 0x4a, 0xa9, 0x1e, 0x43,
- 0x25, 0xda, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0xe9, 0xcd, 0x05, 0x7a, 0x07,
- 0xe1, 0x90, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0xe1, 0xcd, 0x05, 0x7a, 0x07,
- 0xe1, 0x88, 0x0b, 0xc3, 0x25, 0xe0, 0xd3, 0x43, 0xe8, 0x07, 0xee, 0x08,
- 0x0b, 0xc3, 0x25, 0xec, 0x4a, 0x73, 0x4d, 0x43, 0x25, 0xf8, 0xcc, 0x05,
- 0x7b, 0x07, 0xe1, 0x71, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0xf8, 0xcc, 0x05,
- 0x7b, 0x07, 0xe1, 0x69, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0xf0, 0x44, 0x2e,
- 0x60, 0xc3, 0x26, 0x04, 0x4d, 0x08, 0x1a, 0xc3, 0x26, 0x10, 0xcf, 0x62,
- 0x81, 0x07, 0xe3, 0x69, 0x45, 0x19, 0x9d, 0xc3, 0x26, 0x1c, 0xcf, 0x69,
- 0x5c, 0x07, 0xe3, 0x59, 0xce, 0x6f, 0x91, 0x07, 0xe3, 0x51, 0x45, 0x51,
- 0xe9, 0xc3, 0x26, 0x2c, 0x0a, 0xc3, 0x26, 0x36, 0x46, 0x2d, 0xfe, 0x43,
- 0x26, 0x42, 0xe0, 0x0a, 0x07, 0x07, 0xe2, 0xe0, 0xce, 0x6e, 0xa3, 0x07,
- 0xea, 0x0b, 0x03, 0x26, 0x4e, 0x46, 0xd1, 0xfb, 0xc3, 0x26, 0x58, 0xd2,
- 0x47, 0xac, 0x07, 0xef, 0xb0, 0xd1, 0x55, 0x90, 0x07, 0xe2, 0x51, 0x45,
- 0x08, 0x1a, 0xc3, 0x26, 0x64, 0x45, 0x19, 0x9d, 0xc3, 0x26, 0x70, 0x45,
- 0x51, 0xe9, 0xc3, 0x26, 0x80, 0x44, 0x19, 0xa7, 0xc3, 0x26, 0x8a, 0x45,
- 0x2d, 0xfe, 0x43, 0x26, 0x94, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x41, 0xcb,
- 0x12, 0x31, 0x07, 0xe5, 0xc8, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x29, 0xcb,
- 0x12, 0x31, 0x07, 0xe5, 0xb8, 0x0b, 0xc3, 0x26, 0x9e, 0x4a, 0x73, 0x4d,
- 0x43, 0x26, 0xaa, 0x0b, 0xc3, 0x26, 0xb6, 0x45, 0x00, 0x6c, 0x43, 0x26,
- 0xc2, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x11, 0xcb, 0x12, 0x31, 0x07, 0xe5,
- 0xa0, 0xcd, 0x05, 0x7a, 0x07, 0xe8, 0x81, 0xca, 0x2b, 0x13, 0x07, 0xe9,
- 0x60, 0xca, 0x2b, 0x13, 0x07, 0xe9, 0x19, 0xcd, 0x05, 0x7a, 0x07, 0xe8,
- 0x38, 0xca, 0x2b, 0x13, 0x07, 0xe9, 0x21, 0xcd, 0x05, 0x7a, 0x07, 0xe8,
- 0x40, 0x0b, 0xc3, 0x26, 0xce, 0xca, 0x2b, 0x13, 0x07, 0xdf, 0xd0, 0xc8,
- 0xb9, 0x7d, 0x00, 0x36, 0x63, 0x03, 0x26, 0xda, 0xc2, 0x0d, 0xf7, 0x00,
- 0x32, 0x0a, 0x03, 0x26, 0xde, 0xc3, 0x1b, 0x51, 0x00, 0x46, 0x41, 0xc4,
- 0x95, 0xd4, 0x00, 0x31, 0xd3, 0x03, 0x26, 0xe2, 0xc2, 0x0f, 0x61, 0x00,
- 0x35, 0x7b, 0x03, 0x26, 0xe6, 0xc3, 0x78, 0x0c, 0x00, 0x35, 0x9a, 0x03,
- 0x26, 0xea, 0xc2, 0x00, 0xc2, 0x00, 0x32, 0x23, 0x03, 0x26, 0xee, 0xc7,
- 0xca, 0x8d, 0x00, 0x45, 0x68, 0xc2, 0x04, 0x4e, 0x00, 0x31, 0x63, 0x03,
- 0x26, 0xf2, 0x8a, 0x00, 0x34, 0xc2, 0x03, 0x26, 0xf6, 0x47, 0xc0, 0x2d,
- 0xc3, 0x26, 0xfa, 0xc2, 0x00, 0x34, 0x00, 0x31, 0xcb, 0x03, 0x27, 0x0f,
- 0xc3, 0x01, 0x1e, 0x00, 0x31, 0x3b, 0x03, 0x27, 0x13, 0x87, 0x00, 0x36,
- 0xa8, 0xc4, 0xe4, 0x2b, 0x00, 0x35, 0x4b, 0x03, 0x27, 0x17, 0x03, 0xc3,
- 0x27, 0x1b, 0x47, 0x08, 0x13, 0xc3, 0x27, 0x28, 0xc3, 0x19, 0xa7, 0x00,
- 0x31, 0x72, 0x03, 0x27, 0x3a, 0xc4, 0xe4, 0x8b, 0x00, 0x34, 0x33, 0x03,
- 0x27, 0x3e, 0xc3, 0x32, 0xdd, 0x00, 0x33, 0xcb, 0x03, 0x27, 0x4b, 0xc2,
- 0x0d, 0xf7, 0x00, 0x31, 0x53, 0x03, 0x27, 0x58, 0xc2, 0x02, 0x98, 0x00,
- 0x31, 0xbb, 0x03, 0x27, 0x65, 0x0a, 0x43, 0x27, 0x69, 0x00, 0xc3, 0x27,
- 0x81, 0xc2, 0x0d, 0xf7, 0x00, 0x35, 0x32, 0x03, 0x27, 0x97, 0xc2, 0x0d,
- 0xf7, 0x00, 0x32, 0x53, 0x03, 0x27, 0x9b, 0x97, 0x00, 0x36, 0x42, 0x03,
- 0x27, 0x9f, 0xc2, 0x0d, 0xf7, 0x00, 0x31, 0x8b, 0x03, 0x27, 0xa3, 0xcb,
- 0x92, 0x37, 0x00, 0x45, 0x61, 0xc4, 0x3a, 0x8e, 0x00, 0x35, 0xdb, 0x03,
- 0x27, 0xa7, 0xc3, 0x6f, 0x91, 0x00, 0x34, 0x8a, 0x03, 0x27, 0xab, 0x8a,
- 0x00, 0x31, 0x43, 0x03, 0x27, 0xaf, 0xc2, 0x0d, 0xf7, 0x00, 0x33, 0xda,
- 0x03, 0x27, 0xbc, 0x42, 0x00, 0xcc, 0xc3, 0x27, 0xc0, 0x00, 0x43, 0x27,
- 0xc6, 0x00, 0x43, 0x27, 0xdb, 0x00, 0x43, 0x27, 0xf1, 0xc2, 0x00, 0x34,
- 0x00, 0x31, 0x93, 0x03, 0x28, 0x01, 0x8a, 0x00, 0x31, 0xc2, 0x03, 0x28,
- 0x05, 0xcb, 0x95, 0xde, 0x00, 0x45, 0x89, 0xc2, 0x01, 0xf2, 0x00, 0x31,
- 0xab, 0x03, 0x28, 0x09, 0xc4, 0xe0, 0xfb, 0x00, 0x31, 0xa3, 0x03, 0x28,
- 0x0d, 0xc8, 0xbf, 0x55, 0x00, 0x35, 0x51, 0xc3, 0x00, 0x4c, 0x00, 0x31,
- 0x9b, 0x03, 0x28, 0x11, 0xcf, 0x0a, 0x0a, 0x00, 0x33, 0x80, 0x03, 0xc3,
- 0x28, 0x15, 0x42, 0x0b, 0xc6, 0xc3, 0x28, 0x2c, 0xc2, 0x09, 0x6f, 0x00,
- 0x34, 0x73, 0x03, 0x28, 0x3c, 0xc3, 0x2e, 0x60, 0x00, 0x34, 0x23, 0x03,
- 0x28, 0x40, 0x47, 0x3b, 0xb1, 0x43, 0x28, 0x44, 0x00, 0xc3, 0x28, 0x56,
- 0x8a, 0x00, 0x35, 0x22, 0x03, 0x28, 0x62, 0x00, 0x43, 0x28, 0x66, 0xc3,
- 0x14, 0xa3, 0x00, 0x32, 0x2b, 0x03, 0x28, 0x78, 0xc3, 0x00, 0xbb, 0x00,
- 0x30, 0xe0, 0x00, 0x43, 0x28, 0x7c, 0x89, 0x00, 0x35, 0x6b, 0x03, 0x28,
- 0x88, 0xc3, 0x02, 0xd4, 0x00, 0x32, 0x33, 0x03, 0x28, 0x95, 0xc3, 0x2e,
- 0x60, 0x00, 0x34, 0x1a, 0x03, 0x28, 0x99, 0x03, 0xc3, 0x28, 0x9d, 0xc2,
- 0x0d, 0xf7, 0x00, 0x32, 0x3b, 0x03, 0x28, 0xad, 0xc9, 0xae, 0xd4, 0x00,
- 0x33, 0xa2, 0x03, 0x28, 0xb1, 0x4c, 0x6f, 0x15, 0xc3, 0x28, 0xb5, 0x46,
- 0x3b, 0xb2, 0x43, 0x29, 0x1d, 0x8e, 0x0f, 0x70, 0x19, 0x86, 0x0f, 0x70,
- 0xc8, 0x8a, 0x0f, 0x70, 0x41, 0x45, 0x12, 0xed, 0x43, 0x29, 0x35, 0xc2,
- 0x0d, 0xf7, 0x0f, 0x70, 0xb1, 0xc2, 0x00, 0x92, 0x0f, 0x70, 0xc0, 0x03,
- 0xc3, 0x29, 0x73, 0xc3, 0x88, 0x60, 0x0f, 0x74, 0x09, 0xc4, 0x2d, 0xfe,
- 0x0f, 0x74, 0x11, 0x42, 0x0b, 0xc6, 0xc3, 0x29, 0x7f, 0x0a, 0xc3, 0x29,
- 0x87, 0xc3, 0x78, 0xa9, 0x0f, 0x74, 0x29, 0x42, 0x02, 0xb4, 0xc3, 0x29,
- 0x93, 0x16, 0xc3, 0x29, 0x9d, 0xc3, 0x2e, 0x60, 0x0f, 0x74, 0x49, 0xc3,
- 0x0f, 0x69, 0x0f, 0x74, 0x59, 0xc4, 0x19, 0x9d, 0x0f, 0x74, 0x61, 0xc4,
- 0x3a, 0x8e, 0x0f, 0x74, 0x69, 0x15, 0xc3, 0x29, 0xad, 0xc3, 0xb2, 0x7c,
- 0x0f, 0x74, 0x81, 0xc3, 0x0f, 0x60, 0x0f, 0x74, 0x91, 0xc3, 0x6f, 0x91,
- 0x0f, 0x74, 0x99, 0xc4, 0x3c, 0x75, 0x0f, 0x74, 0xb9, 0xc5, 0x95, 0xd3,
- 0x0f, 0x74, 0xd8, 0xc3, 0x88, 0x60, 0x0f, 0x73, 0x09, 0xc4, 0x2d, 0xfe,
- 0x0f, 0x73, 0x11, 0x0a, 0xc3, 0x29, 0xbf, 0x16, 0xc3, 0x29, 0xcb, 0xc3,
- 0x2e, 0x60, 0x0f, 0x73, 0x49, 0x0d, 0xc3, 0x29, 0xdd, 0xc4, 0x19, 0x9d,
- 0x0f, 0x73, 0x61, 0xc4, 0x3a, 0x8e, 0x0f, 0x73, 0x69, 0x15, 0xc3, 0x29,
- 0xe9, 0xc3, 0x04, 0x6c, 0x0f, 0x73, 0x79, 0xc3, 0xb2, 0x7c, 0x0f, 0x73,
- 0x81, 0xc3, 0x0f, 0x60, 0x0f, 0x73, 0x91, 0x06, 0xc3, 0x29, 0xfb, 0xc3,
- 0x73, 0x49, 0x0f, 0x73, 0xd1, 0xc5, 0x95, 0xd3, 0x0f, 0x73, 0xd8, 0xc2,
- 0x0d, 0xf7, 0x0f, 0x71, 0x21, 0xc2, 0x02, 0x98, 0x0f, 0x71, 0x38, 0xc2,
- 0x0f, 0x61, 0x0f, 0x71, 0x51, 0xc3, 0x19, 0xa7, 0x0f, 0x71, 0xb8, 0xc3,
- 0x00, 0x4c, 0x0f, 0x71, 0x71, 0xc2, 0x01, 0xf2, 0x0f, 0x71, 0x89, 0xc4,
- 0xe0, 0xfb, 0x0f, 0x71, 0xa0, 0xc2, 0x0d, 0xf7, 0x0f, 0x71, 0xa9, 0xc3,
- 0x66, 0x50, 0x0f, 0x71, 0xb0, 0xc8, 0x37, 0x1f, 0x00, 0x47, 0xf1, 0xcd,
- 0x05, 0x7a, 0x07, 0xf3, 0xc1, 0xcb, 0x66, 0x54, 0x07, 0xf3, 0xc8, 0xce,
- 0x05, 0x79, 0x07, 0xf3, 0x90, 0xc9, 0x17, 0x7a, 0x00, 0x47, 0xa9, 0xc4,
- 0x00, 0x5b, 0x00, 0x47, 0xa1, 0xc8, 0x01, 0x59, 0x00, 0x32, 0xf0, 0xce,
- 0x01, 0x59, 0x00, 0x44, 0x29, 0x4b, 0x94, 0xb5, 0xc3, 0x2a, 0x07, 0xce,
- 0x71, 0x0b, 0x07, 0xf3, 0x88, 0xc2, 0xe7, 0x79, 0x0f, 0xb9, 0x88, 0xc8,
- 0x85, 0x40, 0x0f, 0xb9, 0x71, 0xc6, 0x48, 0x2a, 0x0f, 0xb9, 0x38, 0xcb,
- 0x01, 0x09, 0x01, 0x1a, 0xb9, 0xc6, 0xce, 0xdd, 0x01, 0x1a, 0x60, 0xc2,
- 0x00, 0xb3, 0x01, 0x1a, 0x68, 0xc5, 0x3f, 0x30, 0x01, 0x19, 0xd1, 0xc4,
- 0x03, 0x32, 0x01, 0x19, 0xc8, 0xc7, 0x0c, 0x4b, 0x08, 0x08, 0xd9, 0xc8,
- 0x50, 0x00, 0x08, 0x09, 0x20, 0xc7, 0x0c, 0x4b, 0x08, 0x08, 0xd1, 0xc8,
- 0x50, 0x00, 0x08, 0x09, 0x18, 0xc7, 0x0c, 0x4b, 0x08, 0x08, 0xe9, 0xc8,
- 0x50, 0x00, 0x08, 0x09, 0x30, 0xc7, 0x0c, 0x4b, 0x08, 0x08, 0xe1, 0xc8,
- 0x50, 0x00, 0x08, 0x09, 0x28, 0xc7, 0x3f, 0x2e, 0x0f, 0xdd, 0x71, 0x47,
- 0x00, 0x50, 0xc3, 0x2a, 0x13, 0x46, 0x01, 0xc7, 0xc3, 0x2a, 0x1f, 0xc5,
- 0x0d, 0xbc, 0x01, 0x2b, 0x98, 0xc2, 0x01, 0x07, 0x01, 0x2b, 0xbb, 0x03,
- 0x2a, 0x31, 0x4a, 0xa3, 0x10, 0x43, 0x2a, 0x37, 0x0a, 0xc3, 0x2a, 0x43,
- 0xc4, 0x01, 0x1e, 0x01, 0x28, 0xc1, 0xc5, 0x01, 0xf7, 0x01, 0x28, 0xa0,
- 0xc5, 0x01, 0xf7, 0x01, 0x2b, 0x81, 0xc4, 0x01, 0x1e, 0x01, 0x2b, 0x78,
- 0xc4, 0x01, 0x1e, 0x01, 0x2b, 0x71, 0xc5, 0x01, 0xf7, 0x01, 0x2b, 0x68,
- 0xca, 0x00, 0xf6, 0x01, 0x29, 0xe1, 0xc4, 0x01, 0x1e, 0x01, 0x29, 0x21,
- 0xc5, 0x01, 0xf7, 0x01, 0x28, 0xe0, 0xc9, 0x11, 0x7f, 0x01, 0x2b, 0xf9,
- 0xc3, 0x01, 0x1f, 0x01, 0x28, 0xd8, 0xca, 0x00, 0xf6, 0x01, 0x29, 0x99,
- 0xc4, 0x01, 0x1e, 0x01, 0x28, 0x99, 0xc5, 0x01, 0xf7, 0x01, 0x28, 0x78,
- 0xca, 0x00, 0xf6, 0x01, 0x2b, 0x61, 0xc4, 0x01, 0x1e, 0x01, 0x2b, 0x19,
- 0xc5, 0x01, 0xf7, 0x01, 0x2b, 0x00, 0xc8, 0x11, 0x71, 0x01, 0x29, 0x49,
- 0xc5, 0x10, 0x37, 0x01, 0x28, 0x88, 0xc8, 0x11, 0x71, 0x01, 0x29, 0x09,
- 0xc5, 0x10, 0x37, 0x01, 0x28, 0x68, 0xc8, 0x10, 0x47, 0x01, 0x29, 0x39,
- 0xc5, 0x0a, 0x2b, 0x01, 0x28, 0x90, 0xc8, 0x10, 0x47, 0x01, 0x28, 0xf9,
- 0xc5, 0x0a, 0x2b, 0x01, 0x28, 0x70, 0xa3, 0x0f, 0xd9, 0xa0, 0xa3, 0x0f,
- 0xd9, 0x61, 0xa2, 0x0f, 0xd8, 0xe8, 0xa3, 0x0f, 0xd9, 0xc0, 0xa3, 0x0f,
- 0xd9, 0xd0, 0xa3, 0x0f, 0xd9, 0xd8, 0xd7, 0x29, 0x7f, 0x0f, 0xd2, 0x60,
- 0xc5, 0x7c, 0xf9, 0x01, 0x32, 0xf3, 0x03, 0x2a, 0x4f, 0xc3, 0x00, 0x34,
- 0x01, 0x32, 0xd2, 0x03, 0x2a, 0x59, 0x49, 0x29, 0x7f, 0x43, 0x2a, 0x5f,
- 0x49, 0x29, 0x7f, 0x43, 0x2a, 0x6b, 0x49, 0x29, 0x7f, 0x43, 0x2a, 0x77,
- 0x49, 0x29, 0x7f, 0x43, 0x2a, 0x83, 0x0d, 0xc3, 0x2a, 0x8f, 0xc5, 0xb5,
- 0xaf, 0x0f, 0xd1, 0x29, 0xc4, 0xe0, 0xaf, 0x0f, 0xd1, 0x31, 0xc6, 0xd1,
- 0xf5, 0x0f, 0xd1, 0x39, 0xc4, 0xe5, 0xdf, 0x0f, 0xd1, 0x48, 0xcf, 0x13,
- 0x0f, 0x01, 0x5d, 0x71, 0xcd, 0x1b, 0x98, 0x01, 0x5d, 0x60, 0xcf, 0x0b,
- 0x98, 0x01, 0x5d, 0x41, 0xd0, 0x01, 0xf7, 0x01, 0x5d, 0x48, 0xcf, 0x0b,
- 0x98, 0x01, 0x5d, 0x51, 0xd0, 0x01, 0xf7, 0x01, 0x5d, 0x58, 0xcd, 0x1b,
- 0x98, 0x01, 0x5d, 0x69, 0xcf, 0x13, 0x0f, 0x01, 0x5d, 0x78, 0x45, 0x00,
- 0x6c, 0xc3, 0x2a, 0x9b, 0xca, 0xa1, 0xa8, 0x01, 0x1f, 0xd0, 0x15, 0xc3,
- 0x2a, 0xad, 0xc7, 0x3f, 0x2e, 0x01, 0x59, 0x49, 0xc7, 0x08, 0xc0, 0x01,
- 0x59, 0x50, 0xc8, 0xb7, 0x9d, 0x01, 0x1f, 0xc9, 0xc6, 0x87, 0x28, 0x0f,
- 0xa9, 0x91, 0xc7, 0x59, 0x17, 0x01, 0x5e, 0x00, 0xd8, 0x22, 0x98, 0x0f,
- 0xbc, 0x19, 0xce, 0x73, 0x9d, 0x01, 0x2d, 0xf1, 0xc8, 0x01, 0xe7, 0x01,
- 0x2d, 0xe1, 0xcf, 0x65, 0xc9, 0x01, 0x1f, 0x60, 0xcd, 0x79, 0x5f, 0x01,
- 0x3a, 0xb1, 0xc4, 0x23, 0x79, 0x01, 0x33, 0x31, 0xcf, 0x6a, 0x2e, 0x01,
- 0x4f, 0x51, 0xc7, 0x59, 0x17, 0x01, 0x5e, 0x09, 0xc8, 0x72, 0x91, 0x01,
- 0x5e, 0xf0, 0xc4, 0x58, 0x66, 0x01, 0x36, 0x19, 0xc3, 0x14, 0x99, 0x01,
- 0x36, 0x10, 0xd8, 0x22, 0x98, 0x0f, 0xbc, 0x11, 0x12, 0xc3, 0x2a, 0xb9,
- 0xce, 0x73, 0x9d, 0x01, 0x2d, 0xc1, 0xc8, 0x01, 0xe7, 0x01, 0x2d, 0xb3,
- 0x03, 0x2a, 0xc5, 0xcf, 0x65, 0xc9, 0x01, 0x1f, 0x4a, 0x03, 0x2a, 0xcb,
- 0xc5, 0x01, 0x0f, 0x01, 0x3d, 0x0b, 0x03, 0x2a, 0xd1, 0xc6, 0x1d, 0x59,
- 0x01, 0x02, 0x69, 0xd5, 0x03, 0x72, 0x01, 0x5c, 0xf0, 0xc5, 0x08, 0x42,
- 0x01, 0x30, 0xd9, 0xce, 0x25, 0x12, 0x0f, 0xac, 0xe8, 0xd8, 0x22, 0x98,
- 0x0f, 0xbc, 0x01, 0xc7, 0x41, 0x48, 0x01, 0x2e, 0x21, 0xce, 0x73, 0x9d,
- 0x01, 0x2e, 0x11, 0xc8, 0x01, 0xe7, 0x01, 0x2e, 0x01, 0xcf, 0x65, 0xc9,
- 0x01, 0x1f, 0x52, 0x03, 0x2a, 0xd7, 0xca, 0xa8, 0xf6, 0x01, 0x36, 0xc1,
- 0x49, 0x01, 0x8a, 0x43, 0x2a, 0xdd, 0xc6, 0x1d, 0x59, 0x01, 0x02, 0x61,
- 0xd5, 0x03, 0x72, 0x01, 0x5c, 0xe0, 0xcd, 0x2d, 0x1d, 0x01, 0x2f, 0x19,
- 0xce, 0x22, 0xa2, 0x01, 0x2f, 0x10, 0x45, 0x04, 0x74, 0xc3, 0x2a, 0xe9,
- 0xc5, 0x07, 0x0a, 0x01, 0x2f, 0xe0, 0xd5, 0x2f, 0x18, 0x01, 0x1f, 0xbb,
- 0x03, 0x2a, 0xfb, 0xc6, 0x3f, 0x2f, 0x01, 0x59, 0x28, 0xc8, 0x59, 0x16,
- 0x01, 0x5e, 0x28, 0xc8, 0x59, 0x16, 0x01, 0x5e, 0x40, 0xd5, 0x35, 0x89,
- 0x01, 0x1f, 0xa3, 0x03, 0x2b, 0x01, 0xc6, 0x08, 0xc1, 0x01, 0x59, 0x38,
- 0xce, 0x22, 0xa2, 0x01, 0x2f, 0x29, 0xcd, 0x2d, 0x1d, 0x01, 0x2f, 0x20,
- 0xce, 0x73, 0x9d, 0x01, 0x2d, 0xa1, 0xc8, 0x01, 0xe7, 0x01, 0x2d, 0x91,
- 0xcf, 0x65, 0xc9, 0x01, 0x1f, 0x59, 0xd8, 0x22, 0x98, 0x0f, 0xbc, 0x08,
- 0xc5, 0x23, 0x78, 0x01, 0x33, 0x28, 0x46, 0x00, 0x6b, 0x43, 0x2b, 0x07,
- 0xcd, 0x76, 0x2c, 0x00, 0xdb, 0x88, 0xcd, 0x76, 0x2c, 0x00, 0xdb, 0x80,
- 0x00, 0x43, 0x2b, 0x21, 0xc4, 0xbe, 0x11, 0x00, 0xd9, 0x19, 0xcf, 0x6a,
- 0xf1, 0x00, 0xd8, 0xf1, 0xc5, 0xd9, 0x7b, 0x00, 0xd8, 0xe8, 0xc9, 0x6a,
- 0xf7, 0x00, 0xd9, 0x01, 0xc9, 0xb0, 0xd5, 0x00, 0xd8, 0xf8, 0xc4, 0xc2,
- 0x2b, 0x00, 0xd9, 0xfb, 0x03, 0x2b, 0x2d, 0xc6, 0xc9, 0xd8, 0x00, 0xda,
- 0x00, 0x97, 0x0b, 0x50, 0x29, 0x83, 0x0b, 0x50, 0x19, 0xc2, 0x03, 0xa4,
- 0x0b, 0x51, 0xb1, 0x91, 0x0b, 0x51, 0x79, 0x07, 0xc3, 0x2b, 0x33, 0xc3,
- 0x19, 0x4c, 0x0b, 0x50, 0xb0, 0xc4, 0xc7, 0x7f, 0x0b, 0x51, 0xb9, 0x0a,
- 0xc3, 0x2b, 0x3b, 0xc3, 0xdd, 0xec, 0x0b, 0x50, 0xa9, 0x8b, 0x0b, 0x50,
- 0xa1, 0xc2, 0x5f, 0x91, 0x0b, 0x50, 0x90, 0xc2, 0x00, 0x49, 0x0b, 0x51,
- 0xa9, 0x03, 0x43, 0x2b, 0x49, 0x04, 0xc3, 0x2b, 0x51, 0x91, 0x0b, 0x51,
- 0x99, 0x83, 0x0b, 0x51, 0x91, 0xc4, 0xe3, 0xfe, 0x0b, 0x50, 0x68, 0x07,
- 0xc3, 0x2b, 0x5d, 0x97, 0x0b, 0x51, 0x19, 0x0b, 0x43, 0x2b, 0x6b, 0xc2,
- 0x89, 0x44, 0x0b, 0x51, 0x71, 0x8b, 0x0b, 0x51, 0x69, 0x83, 0x0b, 0x50,
- 0x50, 0x83, 0x0b, 0x51, 0x61, 0xc2, 0x0f, 0x4d, 0x0b, 0x51, 0x08, 0xc3,
- 0x85, 0x08, 0x0b, 0x51, 0x51, 0x07, 0x43, 0x2b, 0x75, 0x09, 0xc3, 0x2b,
- 0x7f, 0x8b, 0x0b, 0x51, 0x21, 0xc3, 0x13, 0xf2, 0x0b, 0x51, 0x01, 0xc3,
- 0x02, 0x28, 0x0b, 0x50, 0xf1, 0x0c, 0xc3, 0x2b, 0x8b, 0x97, 0x0b, 0x50,
- 0xcb, 0x03, 0x2b, 0x97, 0xc3, 0x57, 0x68, 0x0b, 0x50, 0x79, 0xc2, 0x15,
- 0x1d, 0x0b, 0x50, 0x48, 0x83, 0x0b, 0x50, 0xe9, 0xc2, 0x89, 0x44, 0x0b,
- 0x50, 0xd8, 0x0a, 0xc3, 0x2b, 0x9d, 0x42, 0x00, 0x48, 0x43, 0x2b, 0xad,
- 0x17, 0xc3, 0x2b, 0xb7, 0xc3, 0xdd, 0xec, 0x0b, 0x4c, 0xf0, 0xc4, 0xe5,
- 0xd3, 0x0b, 0x4b, 0xa1, 0x8b, 0x0b, 0x4f, 0xf1, 0x91, 0x0b, 0x4f, 0xc9,
- 0x07, 0xc3, 0x2b, 0xbf, 0x17, 0x43, 0x2b, 0xc7, 0x09, 0xc3, 0x2b, 0xd7,
- 0x06, 0xc3, 0x2b, 0xf6, 0x42, 0x00, 0xe5, 0xc3, 0x2c, 0x04, 0x83, 0x0b,
- 0x4f, 0xb3, 0x03, 0x2c, 0x0e, 0x0c, 0xc3, 0x2c, 0x12, 0x16, 0xc3, 0x2c,
- 0x1c, 0x1c, 0xc3, 0x2c, 0x28, 0x43, 0x75, 0xf8, 0xc3, 0x2c, 0x34, 0xc3,
- 0xbb, 0xea, 0x0b, 0x4d, 0x40, 0x03, 0xc3, 0x2c, 0x40, 0x11, 0xc3, 0x2c,
- 0x55, 0x07, 0xc3, 0x2c, 0x60, 0x17, 0x43, 0x2c, 0x6b, 0x97, 0x0b, 0x4d,
- 0x03, 0x03, 0x2c, 0x78, 0x03, 0xc3, 0x2c, 0x84, 0x8b, 0x0b, 0x4f, 0xbb,
- 0x03, 0x2c, 0x91, 0x07, 0xc3, 0x2c, 0x95, 0x91, 0x0b, 0x4c, 0xc2, 0x03,
- 0x2c, 0x9f, 0x03, 0xc3, 0x2c, 0xa5, 0xc3, 0xdd, 0xec, 0x0b, 0x4f, 0x79,
- 0xc5, 0xde, 0x67, 0x0b, 0x4c, 0x10, 0xc2, 0x00, 0x3a, 0x0b, 0x4b, 0x69,
- 0x0a, 0xc3, 0x2c, 0xad, 0xc4, 0xad, 0x65, 0x0b, 0x4c, 0xd9, 0x07, 0xc3,
- 0x2c, 0xc0, 0xc2, 0x14, 0x40, 0x0b, 0x4c, 0x28, 0x11, 0xc3, 0x2c, 0xc8,
- 0x03, 0xc3, 0x2c, 0xd4, 0x97, 0x0b, 0x4f, 0x69, 0xc5, 0xd5, 0x84, 0x0b,
- 0x4d, 0x98, 0xc2, 0x00, 0x3a, 0x0b, 0x4b, 0x51, 0x07, 0x43, 0x2c, 0xe2,
- 0x42, 0x00, 0x48, 0xc3, 0x2c, 0xec, 0xc2, 0x00, 0xb7, 0x0b, 0x4f, 0xf9,
- 0x83, 0x0b, 0x4f, 0xdb, 0x03, 0x2c, 0xf6, 0xc2, 0x00, 0x4d, 0x0b, 0x4f,
- 0xd1, 0x8b, 0x0b, 0x4f, 0x73, 0x03, 0x2d, 0x05, 0xc2, 0x03, 0x32, 0x0b,
- 0x4e, 0x49, 0xc3, 0x85, 0x08, 0x0b, 0x4e, 0x31, 0xc4, 0xe1, 0xcf, 0x0b,
- 0x4d, 0x79, 0x42, 0x8b, 0x90, 0x43, 0x2d, 0x0b, 0x83, 0x0b, 0x4d, 0xdb,
- 0x03, 0x2d, 0x15, 0x17, 0xc3, 0x2d, 0x19, 0xc2, 0x00, 0x4c, 0x0b, 0x4f,
- 0x59, 0xc2, 0x00, 0x3a, 0x0b, 0x4e, 0x98, 0x17, 0xc3, 0x2d, 0x24, 0x43,
- 0x89, 0x44, 0xc3, 0x2d, 0x38, 0x42, 0x11, 0x70, 0xc3, 0x2d, 0x44, 0x0b,
- 0xc3, 0x2d, 0x55, 0xc2, 0x03, 0xa5, 0x0b, 0x4d, 0x60, 0x09, 0xc3, 0x2d,
- 0x5f, 0x15, 0xc3, 0x2d, 0x67, 0x16, 0xc3, 0x2d, 0x77, 0x06, 0xc3, 0x2d,
- 0x81, 0x8b, 0x0b, 0x4a, 0xd9, 0x97, 0x0b, 0x4a, 0xb9, 0x1b, 0xc3, 0x2d,
- 0x91, 0x0c, 0x43, 0x2d, 0xa7, 0x07, 0xc3, 0x2d, 0xc0, 0xc2, 0x89, 0x44,
- 0x0b, 0x4a, 0xf9, 0xc2, 0x06, 0x1f, 0x0b, 0x48, 0xf1, 0xc3, 0x8e, 0x2c,
- 0x0b, 0x47, 0xb0, 0x03, 0xc3, 0x2d, 0xce, 0x07, 0xc3, 0x2d, 0xda, 0x04,
- 0xc3, 0x2d, 0xe4, 0xc3, 0xa8, 0x2b, 0x0b, 0x4a, 0xf1, 0x97, 0x0b, 0x4a,
- 0x99, 0x08, 0xc3, 0x2d, 0xf3, 0x42, 0x8b, 0x90, 0xc3, 0x2e, 0x06, 0xc3,
- 0x33, 0x26, 0x0b, 0x48, 0xc8, 0x07, 0xc3, 0x2e, 0x18, 0x97, 0x0b, 0x48,
- 0x8b, 0x03, 0x2e, 0x22, 0x8b, 0x0b, 0x4b, 0x09, 0xc2, 0x89, 0x44, 0x0b,
- 0x4a, 0x61, 0xc2, 0x0f, 0xf5, 0x0b, 0x4a, 0x58, 0x97, 0x0b, 0x4a, 0x4b,
- 0x03, 0x2e, 0x28, 0xc3, 0x19, 0x4c, 0x0b, 0x4a, 0xb1, 0x07, 0xc3, 0x2e,
- 0x36, 0xc4, 0xe0, 0xd3, 0x0b, 0x49, 0x08, 0x17, 0xc3, 0x2e, 0x3e, 0x03,
- 0xc3, 0x2e, 0x4c, 0x0a, 0xc3, 0x2e, 0x54, 0xc2, 0x00, 0xb2, 0x0b, 0x49,
- 0x21, 0xc5, 0x85, 0x07, 0x0b, 0x48, 0x60, 0xc8, 0xba, 0x8d, 0x0b, 0x48,
- 0xa1, 0xc2, 0x14, 0x40, 0x0b, 0x4b, 0x28, 0xc6, 0xd3, 0xed, 0x0b, 0x48,
- 0x29, 0x17, 0xc3, 0x2e, 0x68, 0xc2, 0x00, 0x4d, 0x0b, 0x48, 0x68, 0x43,
- 0x00, 0x4d, 0xc3, 0x2e, 0x72, 0xc2, 0x23, 0xb4, 0x0b, 0x4a, 0x71, 0xc3,
- 0x3c, 0x50, 0x0b, 0x49, 0x38, 0x17, 0xc3, 0x2e, 0x7e, 0x07, 0xc3, 0x2e,
- 0x88, 0xc2, 0x03, 0xa5, 0x0b, 0x49, 0xa9, 0xc2, 0x00, 0x3e, 0x0b, 0x49,
- 0x68, 0xc4, 0x85, 0x07, 0x0b, 0x4a, 0x41, 0xc2, 0x14, 0x40, 0x0b, 0x48,
- 0x90, 0xc4, 0xad, 0x65, 0x0b, 0x47, 0xd9, 0xc2, 0x03, 0xa5, 0x0b, 0x47,
- 0x90, 0x07, 0xc3, 0x2e, 0x92, 0x17, 0xc3, 0x2e, 0xa0, 0xc2, 0x14, 0x40,
- 0x0b, 0x45, 0x49, 0xc5, 0x59, 0xc8, 0x0b, 0x45, 0x40, 0x0a, 0xc3, 0x2e,
- 0xaa, 0x07, 0xc3, 0x2e, 0xb6, 0xc4, 0xa5, 0x0a, 0x0b, 0x45, 0x78, 0x07,
- 0xc3, 0x2e, 0xc2, 0x42, 0x00, 0x6d, 0xc3, 0x2e, 0xcc, 0xc6, 0xd3, 0x51,
- 0x0b, 0x45, 0x60, 0xc2, 0x00, 0x4d, 0x0b, 0x47, 0x79, 0x0b, 0x43, 0x2e,
- 0xd8, 0xc2, 0x13, 0xc7, 0x0b, 0x47, 0x69, 0x97, 0x0b, 0x46, 0x69, 0x03,
- 0x43, 0x2e, 0xe2, 0x03, 0xc3, 0x2e, 0xea, 0x09, 0xc3, 0x2e, 0xf4, 0x0c,
- 0xc3, 0x2f, 0x08, 0x06, 0xc3, 0x2f, 0x16, 0x15, 0xc3, 0x2f, 0x2c, 0x16,
- 0xc3, 0x2f, 0x46, 0x1c, 0xc3, 0x2f, 0x56, 0xd0, 0x59, 0xc2, 0x0b, 0x44,
- 0xc8, 0xc3, 0x85, 0x08, 0x0b, 0x47, 0x39, 0xc3, 0x8e, 0x2c, 0x0b, 0x47,
- 0x31, 0x04, 0xc3, 0x2f, 0x60, 0x03, 0xc3, 0x2f, 0x73, 0xc6, 0xcf, 0x3d,
- 0x0b, 0x45, 0xc0, 0x17, 0xc3, 0x2f, 0x7b, 0xc2, 0x14, 0x40, 0x0b, 0x46,
- 0xc9, 0xc3, 0x94, 0x18, 0x0b, 0x45, 0x38, 0xc2, 0x01, 0xc7, 0x0b, 0x46,
- 0x89, 0xc7, 0xc1, 0xe2, 0x0b, 0x44, 0x90, 0xc5, 0x6d, 0xc2, 0x0b, 0x46,
- 0x09, 0x9a, 0x0b, 0x45, 0x88, 0x42, 0x00, 0xa4, 0xc3, 0x2f, 0x8b, 0xc4,
- 0xe3, 0x2f, 0x0b, 0x44, 0xc0, 0x09, 0xc3, 0x2f, 0x95, 0x15, 0xc3, 0x2f,
- 0xa5, 0x1b, 0xc3, 0x2f, 0xb1, 0xc7, 0xc4, 0x74, 0x0b, 0x43, 0x29, 0xcb,
- 0x90, 0x06, 0x0b, 0x43, 0x20, 0x08, 0xc3, 0x2f, 0xbd, 0x83, 0x0b, 0x44,
- 0x63, 0x03, 0x2f, 0xc9, 0x04, 0xc3, 0x2f, 0xcf, 0x42, 0x11, 0x70, 0xc3,
- 0x2f, 0xe5, 0xc7, 0xc8, 0x25, 0x0b, 0x43, 0xf8, 0xc2, 0x00, 0x6d, 0x0b,
- 0x43, 0x39, 0xc6, 0xd3, 0x5d, 0x0b, 0x44, 0x09, 0xc4, 0xe2, 0xf7, 0x0b,
- 0x43, 0x91, 0xc5, 0xd6, 0xd3, 0x0b, 0x43, 0x08, 0xc4, 0xd5, 0x85, 0x0b,
- 0x43, 0x31, 0x90, 0x0b, 0x43, 0x78, 0x0b, 0xc3, 0x2f, 0xef, 0x42, 0x11,
- 0x70, 0xc3, 0x2f, 0xf9, 0xc2, 0x00, 0xc2, 0x0b, 0x43, 0x00, 0xc2, 0x00,
- 0x49, 0x0b, 0x44, 0x49, 0x03, 0xc3, 0x30, 0x0b, 0xc8, 0xb6, 0xb5, 0x0b,
- 0x42, 0xd8, 0x87, 0x0b, 0x44, 0x29, 0xc2, 0xd0, 0x6a, 0x0b, 0x44, 0x18,
- 0xc2, 0x0f, 0x4d, 0x0b, 0x43, 0xe9, 0xc6, 0xd4, 0xd1, 0x0b, 0x43, 0xb9,
- 0x42, 0x01, 0xf0, 0xc3, 0x30, 0x17, 0xc5, 0xd7, 0x1e, 0x0b, 0x42, 0xd1,
- 0xc3, 0x8e, 0x2c, 0x0b, 0x42, 0xc8, 0xc3, 0x7b, 0x8b, 0x0b, 0x43, 0xc1,
- 0x42, 0x01, 0x33, 0x43, 0x30, 0x23, 0xcc, 0x84, 0xf4, 0x0b, 0x43, 0x11,
- 0xc5, 0xd6, 0x42, 0x0b, 0x42, 0xf0, 0x11, 0xc3, 0x30, 0x2f, 0x0a, 0xc3,
- 0x30, 0x3d, 0xc3, 0x40, 0x48, 0x0b, 0x41, 0x19, 0xc2, 0x5f, 0x91, 0x0b,
- 0x40, 0xa9, 0xc6, 0xd4, 0xef, 0x0b, 0x40, 0x88, 0x42, 0x11, 0x70, 0xc3,
- 0x30, 0x4b, 0x17, 0xc3, 0x30, 0x57, 0xc8, 0xb9, 0xa5, 0x0b, 0x40, 0x30,
- 0xc3, 0xe7, 0x09, 0x0b, 0x41, 0xd9, 0x03, 0xc3, 0x30, 0x63, 0xc3, 0x90,
- 0x0e, 0x0b, 0x41, 0xa9, 0x07, 0x43, 0x30, 0x6d, 0x03, 0xc3, 0x30, 0x77,
- 0x42, 0x00, 0xde, 0xc3, 0x30, 0x87, 0x11, 0xc3, 0x30, 0x91, 0xcb, 0x94,
- 0x10, 0x0b, 0x41, 0x29, 0xc5, 0xcf, 0x3d, 0x0b, 0x41, 0x21, 0xc9, 0xad,
- 0x63, 0x0b, 0x40, 0x80, 0x03, 0xc3, 0x30, 0x9d, 0xc2, 0x00, 0x4d, 0x0b,
- 0x42, 0xa1, 0x42, 0x00, 0xe5, 0xc3, 0x30, 0xa7, 0x1b, 0xc3, 0x30, 0xb1,
- 0xc3, 0xe5, 0xa8, 0x0b, 0x42, 0x39, 0x09, 0xc3, 0x30, 0xbe, 0x0d, 0xc3,
- 0x30, 0xd0, 0x16, 0xc3, 0x30, 0xdc, 0x42, 0x0b, 0xc6, 0xc3, 0x30, 0xeb,
- 0xc3, 0x3a, 0xfa, 0x0b, 0x41, 0x61, 0x1c, 0x43, 0x30, 0xf7, 0x97, 0x0b,
- 0x42, 0x9b, 0x03, 0x31, 0x03, 0xc5, 0xdd, 0x77, 0x0b, 0x41, 0xc1, 0xc6,
- 0xd4, 0xcb, 0x0b, 0x40, 0xc1, 0xc4, 0xe3, 0x03, 0x0b, 0x40, 0xb8, 0x03,
- 0xc3, 0x31, 0x09, 0xc2, 0x01, 0xc7, 0x0b, 0x41, 0x69, 0xc2, 0x00, 0x49,
- 0x0b, 0x41, 0x51, 0x43, 0x00, 0xb7, 0x43, 0x31, 0x1f, 0xc6, 0xcd, 0xd5,
- 0x0b, 0x42, 0x21, 0xc8, 0xbc, 0x85, 0x0b, 0x41, 0x00, 0x45, 0xd0, 0x22,
- 0xc3, 0x31, 0x2b, 0xc8, 0xc0, 0xa5, 0x0b, 0x40, 0x08, 0xc2, 0x0c, 0x65,
- 0x00, 0xde, 0xd1, 0xc2, 0x00, 0xc1, 0x00, 0xde, 0x51, 0xc2, 0x00, 0xa4,
- 0x00, 0xde, 0x20, 0xcf, 0x6a, 0x1f, 0x00, 0x4f, 0x81, 0xce, 0x6c, 0xff,
- 0x00, 0x4f, 0x88, 0x94, 0x00, 0x4f, 0x00, 0x8e, 0x00, 0x4f, 0x08, 0xa0,
- 0x01, 0x40, 0x3b, 0x03, 0x31, 0x37, 0xa1, 0x01, 0x40, 0x5b, 0x03, 0x31,
- 0x57, 0xa2, 0x01, 0x40, 0x9b, 0x03, 0x31, 0x70, 0xa3, 0x01, 0x41, 0x1b,
- 0x03, 0x31, 0x82, 0xa5, 0x01, 0x44, 0x19, 0xa4, 0x01, 0x42, 0x1a, 0x03,
- 0x31, 0x8d, 0xa1, 0x01, 0x40, 0x6b, 0x03, 0x31, 0x91, 0xa2, 0x01, 0x40,
- 0xab, 0x03, 0x31, 0xaa, 0xa3, 0x01, 0x41, 0x2b, 0x03, 0x31, 0xbc, 0xa5,
- 0x01, 0x44, 0x29, 0xa4, 0x01, 0x42, 0x2a, 0x03, 0x31, 0xc7, 0xa2, 0x01,
- 0x40, 0xcb, 0x03, 0x31, 0xcb, 0xa3, 0x01, 0x41, 0x4b, 0x03, 0x31, 0xdd,
- 0xa5, 0x01, 0x44, 0x49, 0xa4, 0x01, 0x42, 0x4a, 0x03, 0x31, 0xe8, 0xa3,
- 0x01, 0x41, 0x8b, 0x03, 0x31, 0xec, 0xa5, 0x01, 0x44, 0x89, 0xa4, 0x01,
- 0x42, 0x8a, 0x03, 0x31, 0xf7, 0xa5, 0x01, 0x45, 0x09, 0xa4, 0x01, 0x43,
- 0x0a, 0x03, 0x31, 0xfb, 0xa5, 0x01, 0x46, 0x08, 0xa1, 0x01, 0x40, 0x73,
- 0x03, 0x31, 0xff, 0xa2, 0x01, 0x40, 0xb3, 0x03, 0x32, 0x18, 0xa3, 0x01,
- 0x41, 0x33, 0x03, 0x32, 0x2a, 0xa5, 0x01, 0x44, 0x31, 0xa4, 0x01, 0x42,
- 0x32, 0x03, 0x32, 0x35, 0xa2, 0x01, 0x40, 0xd3, 0x03, 0x32, 0x39, 0xa3,
- 0x01, 0x41, 0x53, 0x03, 0x32, 0x4b, 0xa5, 0x01, 0x44, 0x51, 0xa4, 0x01,
- 0x42, 0x52, 0x03, 0x32, 0x56, 0xa3, 0x01, 0x41, 0x93, 0x03, 0x32, 0x5a,
- 0xa5, 0x01, 0x44, 0x91, 0xa4, 0x01, 0x42, 0x92, 0x03, 0x32, 0x65, 0xa5,
- 0x01, 0x45, 0x11, 0xa4, 0x01, 0x43, 0x12, 0x03, 0x32, 0x69, 0xa5, 0x01,
- 0x46, 0x10, 0xa2, 0x01, 0x40, 0xe3, 0x03, 0x32, 0x6d, 0xa3, 0x01, 0x41,
- 0x63, 0x03, 0x32, 0x7f, 0xa5, 0x01, 0x44, 0x61, 0xa4, 0x01, 0x42, 0x62,
- 0x03, 0x32, 0x8a, 0xa3, 0x01, 0x41, 0xa3, 0x03, 0x32, 0x8e, 0xa5, 0x01,
- 0x44, 0xa1, 0xa4, 0x01, 0x42, 0xa2, 0x03, 0x32, 0x99, 0xa5, 0x01, 0x45,
- 0x21, 0xa4, 0x01, 0x43, 0x22, 0x03, 0x32, 0x9d, 0xa5, 0x01, 0x46, 0x20,
- 0xa3, 0x01, 0x41, 0xc3, 0x03, 0x32, 0xa1, 0xa5, 0x01, 0x44, 0xc1, 0xa4,
- 0x01, 0x42, 0xc2, 0x03, 0x32, 0xac, 0xa5, 0x01, 0x45, 0x41, 0xa4, 0x01,
- 0x43, 0x42, 0x03, 0x32, 0xb0, 0xa5, 0x01, 0x46, 0x40, 0xa5, 0x01, 0x45,
- 0x81, 0xa4, 0x01, 0x43, 0x82, 0x03, 0x32, 0xb4, 0xa5, 0x01, 0x46, 0x80,
- 0xa5, 0x01, 0x47, 0x00, 0x83, 0x08, 0x83, 0xa9, 0xc2, 0x00, 0xc7, 0x08,
- 0x81, 0xa8, 0x91, 0x08, 0x83, 0x91, 0x87, 0x08, 0x83, 0x88, 0x8e, 0x08,
- 0x80, 0x70, 0x94, 0x08, 0x80, 0x60, 0x91, 0x08, 0x83, 0xa1, 0x87, 0x08,
- 0x83, 0x98, 0x8e, 0x08, 0x82, 0x08, 0x94, 0x08, 0x81, 0xf8, 0xc4, 0x8f,
- 0x7c, 0x0e, 0x87, 0xa9, 0xc3, 0x8f, 0x80, 0x0e, 0x84, 0x78, 0xc5, 0xb1,
- 0xcc, 0x0e, 0x84, 0x89, 0xc8, 0xae, 0x7a, 0x0e, 0x84, 0x80, 0xc4, 0x8f,
- 0x7c, 0x0e, 0x87, 0x91, 0xc4, 0xe6, 0x17, 0x0e, 0x87, 0x81, 0xc3, 0x8f,
- 0x80, 0x0e, 0x82, 0x70, 0xc3, 0x7e, 0xff, 0x0e, 0x84, 0x19, 0x03, 0x43,
- 0x32, 0xb8, 0xd0, 0x36, 0x0c, 0x0e, 0x85, 0x69, 0xcd, 0x7e, 0xf5, 0x0e,
- 0x82, 0x90, 0x00, 0x43, 0x32, 0xc4, 0xc9, 0xb6, 0x09, 0x0e, 0x87, 0x29,
- 0xc7, 0xc8, 0xa3, 0x0e, 0x87, 0x20, 0xc9, 0xb6, 0x09, 0x0e, 0x87, 0x09,
- 0xc7, 0xc8, 0xa3, 0x0e, 0x87, 0x00, 0xc5, 0xb1, 0xcc, 0x0e, 0x84, 0xa9,
- 0x49, 0xae, 0x7a, 0x43, 0x32, 0xd0, 0xc5, 0xd5, 0x43, 0x0e, 0x86, 0xd9,
- 0xc4, 0x87, 0x77, 0x0e, 0x86, 0xd0, 0xd5, 0x38, 0x29, 0x0e, 0x86, 0x99,
- 0xc8, 0x2f, 0x67, 0x0e, 0x86, 0x70, 0xc3, 0x8f, 0x80, 0x0e, 0x86, 0x11,
- 0xc4, 0x8f, 0x7c, 0x0e, 0x86, 0x08, 0xc3, 0x15, 0x88, 0x0e, 0x82, 0x19,
- 0xc7, 0x9f, 0x0d, 0x0e, 0x81, 0xb0, 0xc2, 0x74, 0x53, 0x0e, 0x83, 0xb9,
- 0xc2, 0x05, 0x7b, 0x0e, 0x83, 0xb0, 0xc3, 0x7e, 0xff, 0x0e, 0x82, 0xf1,
- 0xc8, 0x9f, 0x0c, 0x0e, 0x81, 0xf0, 0xc6, 0x02, 0x21, 0x0f, 0xd9, 0xe1,
- 0xc5, 0x01, 0xf7, 0x0f, 0xd9, 0xe8, 0x55, 0x09, 0x0c, 0xc3, 0x32, 0xdc,
- 0x48, 0x09, 0x13, 0xc3, 0x32, 0xee, 0x4a, 0x13, 0x24, 0x43, 0x32, 0xfa,
- 0xc6, 0x02, 0x21, 0x0f, 0xda, 0x19, 0xc5, 0x01, 0xf7, 0x0f, 0xda, 0x21,
- 0xcc, 0x02, 0x0b, 0x0f, 0xda, 0x30, 0x46, 0x01, 0xc7, 0xc3, 0x33, 0x06,
- 0xd2, 0x47, 0x52, 0x0f, 0xda, 0x40, 0xd2, 0x47, 0x52, 0x0f, 0xda, 0x39,
- 0x46, 0x01, 0xc7, 0x43, 0x33, 0x12, 0xc7, 0x76, 0x66, 0x01, 0x53, 0x11,
- 0xc8, 0x4f, 0x39, 0x01, 0x53, 0x18, 0x16, 0xc3, 0x33, 0x1e, 0xd0, 0x5d,
- 0x82, 0x01, 0x3e, 0xd0, 0x49, 0x0a, 0xf3, 0xc3, 0x33, 0x2a, 0xd0, 0x01,
- 0xd7, 0x0f, 0xdb, 0xe0, 0x49, 0x0a, 0xf3, 0xc3, 0x33, 0x30, 0xd0, 0x01,
- 0xd7, 0x0f, 0xdb, 0xe8, 0xc9, 0x37, 0x1e, 0x01, 0x4c, 0x88, 0x16, 0xc3,
- 0x33, 0x36, 0xc9, 0x38, 0x82, 0x0f, 0xc8, 0x19, 0xc3, 0x01, 0x4a, 0x0f,
- 0xc8, 0x30, 0xc6, 0x01, 0x7a, 0x01, 0x2e, 0xb1, 0xc4, 0x0e, 0xa8, 0x01,
- 0x5f, 0x40, 0x45, 0x00, 0x6c, 0xc3, 0x33, 0x42, 0xd4, 0x3b, 0xed, 0x01,
- 0x4a, 0x40, 0xc6, 0x01, 0x01, 0x01, 0x0e, 0x71, 0xcf, 0x2c, 0x05, 0x01,
- 0x48, 0x20, 0xc5, 0x7c, 0x32, 0x01, 0x02, 0x29, 0x48, 0xbd, 0x75, 0xc3,
- 0x33, 0x54, 0xc8, 0x4f, 0x39, 0x01, 0x4c, 0x59, 0xc6, 0x01, 0x01, 0x01,
- 0x72, 0xa9, 0xcd, 0x77, 0x57, 0x01, 0x72, 0xb8, 0xc5, 0x01, 0x0f, 0x01,
- 0x5b, 0x03, 0x03, 0x33, 0x60, 0xcc, 0x82, 0xb4, 0x01, 0x5b, 0x51, 0xcd,
- 0x79, 0x2b, 0x01, 0x5c, 0x20, 0x45, 0x00, 0x6c, 0xc3, 0x33, 0x64, 0xc8,
- 0xb2, 0xf2, 0x01, 0x59, 0xb0, 0x45, 0x04, 0x74, 0xc3, 0x33, 0x74, 0xc5,
- 0x01, 0x02, 0x01, 0x0c, 0xd0, 0xd4, 0x2d, 0x09, 0x01, 0x0f, 0xd1, 0xc9,
- 0xb4, 0x11, 0x01, 0x59, 0xc0, 0xc3, 0x7e, 0x2f, 0x01, 0x0d, 0x59, 0xd7,
- 0x25, 0xc9, 0x0f, 0xc0, 0x40, 0xc3, 0x12, 0xec, 0x01, 0x0d, 0x13, 0x03,
- 0x33, 0x80, 0x43, 0x00, 0x3e, 0x43, 0x33, 0x86, 0xc2, 0x00, 0xbb, 0x01,
- 0x0f, 0x23, 0x03, 0x33, 0x92, 0xcc, 0x55, 0x0d, 0x01, 0x48, 0xe8, 0xc6,
- 0x0f, 0x1e, 0x01, 0x4b, 0xd1, 0xc9, 0x0a, 0x4a, 0x01, 0x4b, 0xb9, 0x9a,
- 0x01, 0x59, 0xf0, 0xce, 0x38, 0x53, 0x01, 0x4b, 0x99, 0xd6, 0x31, 0x3d,
- 0x01, 0x4a, 0x19, 0x48, 0x65, 0x33, 0xc3, 0x33, 0x98, 0xcf, 0x69, 0x89,
- 0x01, 0x5a, 0x50, 0xe0, 0x01, 0xc7, 0x0f, 0xdd, 0xa8, 0x45, 0x00, 0x6c,
- 0xc3, 0x33, 0xa4, 0xc8, 0xb2, 0xf2, 0x01, 0x48, 0x30, 0x44, 0x03, 0x68,
- 0xc3, 0x33, 0xb0, 0x42, 0x01, 0xc7, 0x43, 0x33, 0xba, 0xc6, 0x01, 0xf6,
- 0x01, 0x54, 0x18, 0xc3, 0xe7, 0x66, 0x08, 0x3a, 0x71, 0xc3, 0x55, 0xb2,
- 0x08, 0x3a, 0x69, 0xc3, 0xe1, 0x27, 0x08, 0x3a, 0x79, 0xc7, 0xc4, 0x66,
- 0x08, 0x3a, 0x81, 0xc5, 0xda, 0x20, 0x08, 0x3a, 0x89, 0xc4, 0xe3, 0xeb,
- 0x08, 0x3a, 0x91, 0xc4, 0xe3, 0x0b, 0x08, 0x3a, 0x98, 0x26, 0xc3, 0x33,
- 0xc4, 0xc3, 0xc0, 0x05, 0x08, 0x3a, 0x39, 0xc3, 0xdb, 0x60, 0x08, 0x3a,
- 0x31, 0xc3, 0xcc, 0x43, 0x08, 0x3a, 0x29, 0xc3, 0xe4, 0x0f, 0x08, 0x3a,
- 0x21, 0xc3, 0xe6, 0x58, 0x08, 0x3a, 0x19, 0xc3, 0xe7, 0xa2, 0x08, 0x3a,
- 0x11, 0xc3, 0xe1, 0x5f, 0x08, 0x3a, 0x09, 0xc3, 0xc6, 0xe3, 0x08, 0x3a,
- 0x00, 0x9e, 0x08, 0x39, 0x99, 0x9f, 0x08, 0x39, 0xa1, 0xa0, 0x08, 0x39,
- 0xa9, 0xa1, 0x08, 0x39, 0xb1, 0x9d, 0x08, 0x39, 0x90, 0x9d, 0x08, 0x38,
- 0x19, 0x9e, 0x08, 0x38, 0x21, 0x9f, 0x08, 0x38, 0x29, 0xa0, 0x08, 0x38,
- 0x31, 0xa1, 0x08, 0x38, 0x39, 0xa3, 0x08, 0x38, 0x41, 0xa5, 0x08, 0x38,
- 0x49, 0xa6, 0x08, 0x38, 0x50, 0x9d, 0x08, 0x38, 0x59, 0x9e, 0x08, 0x38,
- 0x61, 0x9f, 0x08, 0x38, 0x69, 0xa0, 0x08, 0x38, 0x71, 0xa1, 0x08, 0x38,
- 0x79, 0xa2, 0x08, 0x38, 0x81, 0xa3, 0x08, 0x38, 0x89, 0xa4, 0x08, 0x38,
- 0x91, 0xa5, 0x08, 0x38, 0x99, 0xa6, 0x08, 0x38, 0xa0, 0x9d, 0x08, 0x38,
- 0xa9, 0x9e, 0x08, 0x38, 0xb1, 0x9f, 0x08, 0x38, 0xb9, 0xa0, 0x08, 0x38,
- 0xc1, 0xa1, 0x08, 0x38, 0xc9, 0xa3, 0x08, 0x38, 0xd1, 0xa4, 0x08, 0x38,
- 0xd9, 0xa5, 0x08, 0x38, 0xe1, 0xa6, 0x08, 0x38, 0xe8, 0xa1, 0x08, 0x38,
- 0xf1, 0xa4, 0x08, 0x38, 0xf9, 0xa5, 0x08, 0x39, 0x00, 0x9d, 0x08, 0x39,
- 0x09, 0x9f, 0x08, 0x39, 0x11, 0xa0, 0x08, 0x39, 0x19, 0xa1, 0x08, 0x39,
- 0x21, 0xa2, 0x08, 0x39, 0x29, 0xa3, 0x08, 0x39, 0x31, 0xa5, 0x08, 0x39,
- 0x39, 0xa6, 0x08, 0x39, 0x40, 0xa0, 0x08, 0x39, 0x59, 0xa1, 0x08, 0x39,
- 0x61, 0xa2, 0x08, 0x39, 0x69, 0xa3, 0x08, 0x39, 0x71, 0xa4, 0x08, 0x39,
- 0x79, 0xa5, 0x08, 0x39, 0x81, 0x9e, 0x08, 0x39, 0x49, 0x9f, 0x08, 0x39,
- 0x51, 0xa6, 0x08, 0x39, 0x88, 0x1d, 0xc3, 0x33, 0xce, 0x1e, 0xc3, 0x33,
- 0xf2, 0x1f, 0xc3, 0x34, 0x06, 0x20, 0xc3, 0x34, 0x33, 0x21, 0xc3, 0x34,
- 0x4b, 0x22, 0xc3, 0x34, 0x6b, 0x23, 0xc3, 0x34, 0x8f, 0x24, 0xc3, 0x34,
- 0xa7, 0x25, 0x43, 0x34, 0xc3, 0xc2, 0x8a, 0xb6, 0x08, 0x32, 0x41, 0x1f,
- 0xc3, 0x34, 0xdb, 0x42, 0xda, 0x1d, 0xc3, 0x34, 0xe7, 0xc2, 0xe6, 0x8a,
- 0x08, 0x32, 0x81, 0xc2, 0xe7, 0xfd, 0x08, 0x32, 0x89, 0x25, 0xc3, 0x34,
- 0xef, 0xc2, 0xe7, 0xfb, 0x08, 0x32, 0xa0, 0x9e, 0x08, 0x32, 0xa9, 0x9f,
- 0x08, 0x32, 0xb1, 0xa0, 0x08, 0x32, 0xb9, 0xa1, 0x08, 0x32, 0xc1, 0xa2,
- 0x08, 0x32, 0xc9, 0xa3, 0x08, 0x32, 0xd1, 0xa4, 0x08, 0x32, 0xd9, 0xa5,
- 0x08, 0x32, 0xe1, 0x26, 0x43, 0x34, 0xf7, 0x9d, 0x08, 0x33, 0x01, 0x9e,
- 0x08, 0x33, 0x09, 0x9f, 0x08, 0x33, 0x11, 0x20, 0xc3, 0x35, 0x03, 0xa1,
- 0x08, 0x33, 0x31, 0xa2, 0x08, 0x33, 0x39, 0xa3, 0x08, 0x33, 0x41, 0xa4,
- 0x08, 0x33, 0x49, 0xa5, 0x08, 0x33, 0x51, 0xa6, 0x08, 0x33, 0x58, 0x9d,
- 0x08, 0x33, 0x61, 0x9e, 0x08, 0x33, 0x69, 0x9f, 0x08, 0x33, 0x71, 0xa0,
- 0x08, 0x33, 0x79, 0xa1, 0x08, 0x33, 0x81, 0xa2, 0x08, 0x33, 0x89, 0xa3,
- 0x08, 0x33, 0x91, 0xa4, 0x08, 0x33, 0x99, 0xa5, 0x08, 0x33, 0xa1, 0xa6,
- 0x08, 0x33, 0xa8, 0x9d, 0x08, 0x33, 0xb1, 0x9e, 0x08, 0x33, 0xb9, 0x9f,
- 0x08, 0x33, 0xc1, 0xa0, 0x08, 0x33, 0xc9, 0xa1, 0x08, 0x33, 0xd1, 0xa2,
- 0x08, 0x33, 0xd9, 0xa3, 0x08, 0x33, 0xe1, 0xa4, 0x08, 0x33, 0xe9, 0xa5,
- 0x08, 0x33, 0xf1, 0xa6, 0x08, 0x33, 0xf8, 0x9d, 0x08, 0x34, 0x01, 0x9e,
- 0x08, 0x34, 0x09, 0x9f, 0x08, 0x34, 0x11, 0xa0, 0x08, 0x34, 0x19, 0xa1,
- 0x08, 0x34, 0x21, 0xa2, 0x08, 0x34, 0x29, 0xa3, 0x08, 0x34, 0x31, 0xa4,
- 0x08, 0x34, 0x39, 0xa5, 0x08, 0x34, 0x41, 0xa6, 0x08, 0x34, 0x48, 0x9d,
- 0x08, 0x34, 0x51, 0x9e, 0x08, 0x34, 0x59, 0x9f, 0x08, 0x34, 0x61, 0xa0,
- 0x08, 0x34, 0x69, 0xa3, 0x08, 0x34, 0x81, 0xa4, 0x08, 0x34, 0x89, 0xa5,
- 0x08, 0x34, 0x91, 0xa6, 0x08, 0x34, 0x99, 0xa1, 0x08, 0x34, 0x71, 0xa2,
- 0x08, 0x34, 0x78, 0x9d, 0x08, 0x34, 0xa1, 0x9e, 0x08, 0x34, 0xa9, 0x9f,
- 0x08, 0x34, 0xb1, 0xa0, 0x08, 0x34, 0xb9, 0xa1, 0x08, 0x34, 0xc1, 0xa2,
- 0x08, 0x34, 0xc9, 0xa3, 0x08, 0x34, 0xd1, 0xa4, 0x08, 0x34, 0xd9, 0xa5,
- 0x08, 0x34, 0xe1, 0xa6, 0x08, 0x34, 0xe8, 0x9d, 0x08, 0x34, 0xf1, 0x9e,
- 0x08, 0x34, 0xf8, 0xc5, 0xdd, 0x5e, 0x08, 0x35, 0x01, 0xc5, 0xdf, 0x7f,
- 0x08, 0x35, 0x09, 0xc5, 0xdd, 0x27, 0x08, 0x35, 0x11, 0xc5, 0xd9, 0x71,
- 0x08, 0x35, 0x19, 0xc5, 0xdc, 0xaa, 0x08, 0x35, 0x21, 0xc5, 0xdc, 0xdc,
- 0x08, 0x35, 0x29, 0xc5, 0xdb, 0x3d, 0x08, 0x35, 0x31, 0xc5, 0xdc, 0x4b,
- 0x08, 0x35, 0x39, 0xc5, 0xdc, 0x55, 0x08, 0x35, 0x41, 0xc5, 0xdb, 0xab,
- 0x08, 0x35, 0x48, 0xc5, 0xdd, 0x5e, 0x08, 0x35, 0x51, 0xc5, 0xdf, 0x7f,
- 0x08, 0x35, 0x59, 0xc5, 0xdd, 0x27, 0x08, 0x35, 0x61, 0xc5, 0xd9, 0x71,
- 0x08, 0x35, 0x69, 0xc5, 0xdc, 0xaa, 0x08, 0x35, 0x71, 0xc5, 0xdc, 0xdc,
- 0x08, 0x35, 0x79, 0xc5, 0xdb, 0x3d, 0x08, 0x35, 0x81, 0xc5, 0xdc, 0x4b,
- 0x08, 0x35, 0x89, 0xc5, 0xdc, 0x55, 0x08, 0x35, 0x90, 0x9e, 0x08, 0x35,
- 0x99, 0x9f, 0x08, 0x35, 0xa1, 0xa0, 0x08, 0x35, 0xa9, 0xa1, 0x08, 0x35,
- 0xb1, 0xa2, 0x08, 0x35, 0xb9, 0xa3, 0x08, 0x35, 0xc1, 0xa5, 0x08, 0x35,
- 0xc9, 0xa6, 0x08, 0x35, 0xd0, 0x9d, 0x08, 0x35, 0xd9, 0x9e, 0x08, 0x35,
- 0xe1, 0x9f, 0x08, 0x35, 0xe9, 0xa0, 0x08, 0x35, 0xf1, 0xa2, 0x08, 0x35,
- 0xf9, 0xa3, 0x08, 0x36, 0x00, 0x9d, 0x08, 0x36, 0x09, 0x9e, 0x08, 0x36,
- 0x11, 0xa0, 0x08, 0x36, 0x19, 0xa1, 0x08, 0x36, 0x21, 0xa2, 0x08, 0x36,
- 0x29, 0xa3, 0x08, 0x36, 0x31, 0xa4, 0x08, 0x36, 0x39, 0xa5, 0x08, 0x36,
- 0x41, 0xa6, 0x08, 0x36, 0x48, 0x9d, 0x08, 0x36, 0x51, 0x9e, 0x08, 0x36,
- 0x59, 0x9f, 0x08, 0x36, 0x61, 0xa1, 0x08, 0x36, 0x69, 0xa2, 0x08, 0x36,
- 0x71, 0xa3, 0x08, 0x36, 0x79, 0xa4, 0x08, 0x36, 0x81, 0xa5, 0x08, 0x36,
- 0x89, 0xa6, 0x08, 0x36, 0x90, 0x9d, 0x08, 0x36, 0x99, 0x9e, 0x08, 0x36,
- 0xa1, 0x9f, 0x08, 0x36, 0xa9, 0xa2, 0x08, 0x36, 0xb1, 0xa4, 0x08, 0x36,
- 0xb9, 0xa5, 0x08, 0x36, 0xc1, 0xa6, 0x08, 0x36, 0xc8, 0x9d, 0x08, 0x36,
- 0xd1, 0x9e, 0x08, 0x36, 0xd9, 0x9f, 0x08, 0x36, 0xe1, 0xa0, 0x08, 0x36,
- 0xe9, 0xa1, 0x08, 0x36, 0xf1, 0xa2, 0x08, 0x36, 0xf9, 0xa3, 0x08, 0x37,
- 0x01, 0xa4, 0x08, 0x37, 0x09, 0xa6, 0x08, 0x37, 0x10, 0xa0, 0x08, 0x37,
- 0x19, 0xa1, 0x08, 0x37, 0x21, 0xa2, 0x08, 0x37, 0x29, 0xa3, 0x08, 0x37,
- 0x31, 0xa5, 0x08, 0x37, 0x39, 0xa6, 0x08, 0x37, 0x40, 0x9d, 0x08, 0x37,
- 0x49, 0x9e, 0x08, 0x37, 0x51, 0x9f, 0x08, 0x37, 0x59, 0xa0, 0x08, 0x37,
- 0x61, 0xa1, 0x08, 0x37, 0x69, 0xa2, 0x08, 0x37, 0x71, 0xa3, 0x08, 0x37,
- 0x79, 0xa4, 0x08, 0x37, 0x81, 0xa5, 0x08, 0x37, 0x89, 0xa6, 0x08, 0x37,
- 0x90, 0x9d, 0x08, 0x37, 0x99, 0x9e, 0x08, 0x37, 0xa1, 0x9f, 0x08, 0x37,
- 0xa9, 0xa0, 0x08, 0x37, 0xb1, 0xa1, 0x08, 0x37, 0xb9, 0xa2, 0x08, 0x37,
- 0xc1, 0xa3, 0x08, 0x37, 0xc9, 0xa4, 0x08, 0x37, 0xd1, 0xa5, 0x08, 0x37,
- 0xd9, 0xa6, 0x08, 0x37, 0xe0, 0x9e, 0x08, 0x37, 0xe9, 0x9f, 0x08, 0x37,
- 0xf1, 0xa1, 0x08, 0x37, 0xf9, 0xa2, 0x08, 0x38, 0x01, 0xa3, 0x08, 0x38,
- 0x09, 0xa5, 0x08, 0x38, 0x10, 0x1d, 0xc3, 0x35, 0x0f, 0x1e, 0xc3, 0x35,
- 0x45, 0x22, 0xc3, 0x35, 0x75, 0x21, 0xc3, 0x35, 0xab, 0x23, 0xc3, 0x35,
- 0xdb, 0x25, 0xc3, 0x36, 0x0b, 0x24, 0xc3, 0x36, 0x23, 0x1f, 0xc3, 0x36,
- 0x59, 0x20, 0xc3, 0x36, 0x8f, 0x26, 0x43, 0x36, 0xbf, 0x1e, 0xc3, 0x36,
- 0xcb, 0xc2, 0xda, 0x56, 0x08, 0x02, 0x91, 0xc2, 0x00, 0x20, 0x08, 0x02,
- 0x99, 0x21, 0xc3, 0x36, 0xd3, 0xc2, 0x00, 0x22, 0x08, 0x02, 0xb1, 0x23,
- 0xc3, 0x36, 0xdb, 0xc2, 0x39, 0x31, 0x08, 0x02, 0xc9, 0x25, 0x43, 0x36,
- 0xe3, 0x1e, 0xc3, 0x36, 0xf3, 0x1f, 0x43, 0x37, 0x17, 0xc3, 0xe7, 0x21,
- 0x08, 0x06, 0xf1, 0x1f, 0xc3, 0x37, 0x27, 0xc3, 0xe7, 0xcf, 0x08, 0x07,
- 0xd0, 0x1f, 0xc3, 0x37, 0x39, 0x20, 0xc3, 0x37, 0x45, 0xc8, 0xbe, 0x95,
- 0x08, 0x05, 0x20, 0x46, 0x00, 0x6b, 0xc3, 0x37, 0x51, 0xcb, 0x93, 0x60,
- 0x00, 0x15, 0x3b, 0x03, 0x37, 0x80, 0x17, 0xc3, 0x37, 0x86, 0x0a, 0xc3,
- 0x37, 0x90, 0x11, 0xc3, 0x37, 0x9f, 0xc9, 0xad, 0x2d, 0x00, 0x15, 0x33,
- 0x03, 0x37, 0xab, 0xd3, 0x46, 0x94, 0x00, 0x15, 0x41, 0x9c, 0x05, 0x39,
- 0x49, 0xc7, 0xc3, 0xfd, 0x05, 0x39, 0x59, 0xcb, 0x96, 0x2b, 0x01, 0x63,
- 0xb8, 0x46, 0x00, 0x6b, 0xc3, 0x37, 0xb1, 0x05, 0xc3, 0x37, 0xe0, 0x0b,
- 0xc3, 0x37, 0xef, 0x03, 0xc3, 0x37, 0xfb, 0xc8, 0xbf, 0x45, 0x05, 0x5a,
- 0x29, 0xd1, 0x55, 0x3b, 0x00, 0x14, 0x29, 0xc6, 0xa5, 0xb9, 0x00, 0x06,
- 0xf8, 0x00, 0xc3, 0x38, 0x07, 0x48, 0x10, 0x90, 0xc3, 0x38, 0x13, 0xc8,
- 0xba, 0x4d, 0x00, 0x13, 0x21, 0xc2, 0x06, 0x1f, 0x05, 0x3b, 0xaa, 0x03,
- 0x38, 0x40, 0x00, 0xc3, 0x38, 0x46, 0xc2, 0x06, 0x1f, 0x05, 0x3b, 0xa1,
- 0x8c, 0x00, 0x0e, 0x60, 0x46, 0x00, 0x6b, 0xc3, 0x38, 0x52, 0x07, 0xc3,
- 0x38, 0x99, 0xc5, 0xc0, 0x96, 0x00, 0x0b, 0xfb, 0x03, 0x38, 0xa8, 0xc9,
- 0xad, 0x2d, 0x00, 0x15, 0x51, 0xc9, 0xab, 0x74, 0x00, 0x15, 0x59, 0xc2,
- 0x06, 0x1f, 0x05, 0x3b, 0x91, 0xd1, 0x51, 0xb6, 0x00, 0x0c, 0xd9, 0x8c,
- 0x00, 0x0e, 0x48, 0x46, 0x00, 0x6b, 0xc3, 0x38, 0xae, 0x07, 0xc3, 0x38,
- 0xdb, 0x9c, 0x00, 0x0f, 0x9b, 0x03, 0x38, 0xea, 0x11, 0xc3, 0x38, 0xee,
- 0xc2, 0x06, 0x1f, 0x05, 0x3b, 0x89, 0xc9, 0xb1, 0xb6, 0x00, 0x11, 0xc0,
- 0x46, 0x00, 0x6b, 0xc3, 0x38, 0xfa, 0xc2, 0x00, 0x0a, 0x05, 0x5a, 0x9b,
- 0x03, 0x39, 0x28, 0x46, 0x15, 0x78, 0xc3, 0x39, 0x2e, 0xc8, 0xb9, 0xe5,
- 0x05, 0x39, 0x6b, 0x03, 0x39, 0x3e, 0xc2, 0x00, 0xb7, 0x05, 0x3b, 0x78,
- 0x46, 0x00, 0x6b, 0xc3, 0x39, 0x44, 0x44, 0x01, 0x76, 0xc3, 0x39, 0x9a,
- 0x91, 0x05, 0x3a, 0x79, 0xc4, 0x70, 0xd8, 0x05, 0x3d, 0xb9, 0xcb, 0x94,
- 0xec, 0x05, 0x3e, 0x09, 0x8b, 0x00, 0x0d, 0x19, 0x97, 0x00, 0x11, 0x18,
- 0x46, 0x00, 0x6b, 0xc3, 0x39, 0xa8, 0x42, 0x00, 0xb2, 0xc3, 0x39, 0xf2,
- 0x10, 0xc3, 0x39, 0xff, 0x95, 0x05, 0x3b, 0x68, 0xc2, 0x23, 0xb6, 0x00,
- 0x14, 0x93, 0x03, 0x3a, 0x0b, 0xc2, 0x00, 0x35, 0x00, 0x0a, 0x5b, 0x03,
- 0x3a, 0x0f, 0xc2, 0x00, 0xe5, 0x00, 0x14, 0x1b, 0x03, 0x3a, 0x15, 0x46,
- 0x00, 0x6b, 0xc3, 0x3a, 0x1b, 0x4e, 0x72, 0x69, 0xc3, 0x3a, 0x71, 0x96,
- 0x05, 0x3b, 0x5a, 0x03, 0x3a, 0x7d, 0xcb, 0x8d, 0x88, 0x00, 0x15, 0x4b,
- 0x03, 0x3a, 0x81, 0x46, 0x00, 0x6b, 0x43, 0x3a, 0x87, 0x46, 0x00, 0x6b,
- 0xc3, 0x3a, 0xa5, 0xc3, 0xe7, 0xba, 0x00, 0x10, 0xe8, 0x45, 0x00, 0x51,
- 0xc3, 0x3a, 0xe0, 0x46, 0x00, 0x6b, 0xc3, 0x3a, 0xec, 0xc2, 0x06, 0x1f,
- 0x05, 0x3b, 0x98, 0x00, 0xc3, 0x3b, 0x10, 0xc6, 0x12, 0x12, 0x00, 0x14,
- 0x53, 0x03, 0x3b, 0x1f, 0x87, 0x00, 0xeb, 0x59, 0x91, 0x05, 0x5b, 0x19,
- 0x8b, 0x05, 0x5a, 0x81, 0x8f, 0x05, 0x3b, 0xc0, 0x00, 0xc3, 0x3b, 0x25,
- 0xc4, 0xd9, 0x9f, 0x00, 0x12, 0x8b, 0x03, 0x3b, 0x31, 0x87, 0x00, 0x07,
- 0x33, 0x03, 0x3b, 0x37, 0x83, 0x05, 0x39, 0x99, 0x91, 0x05, 0x39, 0xa9,
- 0x97, 0x05, 0x39, 0xb9, 0x98, 0x05, 0x39, 0xcb, 0x03, 0x3b, 0x3d, 0x9b,
- 0x05, 0x39, 0xe9, 0xca, 0xa4, 0x96, 0x05, 0x3e, 0x18, 0x46, 0x00, 0x6b,
- 0x43, 0x3b, 0x43, 0x46, 0x00, 0x6b, 0xc3, 0x3b, 0x65, 0xc3, 0x08, 0xc3,
- 0x05, 0x39, 0x3b, 0x03, 0x3b, 0x8b, 0x98, 0x00, 0x0c, 0xa9, 0xc5, 0xd3,
- 0x22, 0x01, 0x63, 0xb0, 0x46, 0x00, 0x6b, 0x43, 0x3b, 0x91, 0x46, 0x00,
- 0x6b, 0x43, 0x3b, 0xc1, 0x46, 0x00, 0x6b, 0xc3, 0x3b, 0xd1, 0x9b, 0x05,
- 0x3b, 0x09, 0xcb, 0x91, 0x2f, 0x05, 0x3b, 0x19, 0xc3, 0x00, 0xef, 0x05,
- 0x3b, 0x49, 0x47, 0xc6, 0x2d, 0x43, 0x3b, 0xf3, 0x46, 0x00, 0x6b, 0xc3,
- 0x3c, 0x05, 0xc2, 0x00, 0x0a, 0x00, 0x13, 0xc0, 0x46, 0x00, 0x6b, 0xc3,
- 0x3c, 0x2d, 0xc2, 0x02, 0x59, 0x00, 0x09, 0xc0, 0x46, 0x00, 0x6b, 0xc3,
- 0x3c, 0x5c, 0x47, 0x21, 0x79, 0xc3, 0x3c, 0x90, 0xc4, 0x32, 0x6d, 0x00,
- 0x13, 0x19, 0xc2, 0x00, 0xa4, 0x00, 0x0d, 0x18, 0x46, 0x00, 0x6b, 0xc3,
- 0x3c, 0xa2, 0xcc, 0x87, 0x40, 0x00, 0xe8, 0xb9, 0x03, 0xc3, 0x3c, 0xd2,
- 0x4b, 0x90, 0xb6, 0xc3, 0x3c, 0xde, 0xc7, 0xc9, 0x9f, 0x05, 0x3a, 0x39,
- 0xc3, 0x05, 0xe7, 0x05, 0x3d, 0xa8, 0x46, 0x00, 0x6b, 0x43, 0x3c, 0xe9,
- 0x46, 0x00, 0x6b, 0xc3, 0x3c, 0xf3, 0xc9, 0xb5, 0xd3, 0x00, 0x11, 0xc8,
- 0x88, 0x07, 0xd8, 0x03, 0x03, 0x3d, 0x08, 0x8e, 0x07, 0xd8, 0x11, 0x8b,
- 0x07, 0xd8, 0x08, 0x8d, 0x0e, 0xf8, 0x81, 0x89, 0x0e, 0xf8, 0x11, 0x94,
- 0x00, 0xe8, 0xd1, 0x8f, 0x05, 0x3f, 0xd1, 0x87, 0x01, 0x63, 0xd8, 0xc4,
- 0xa0, 0xe6, 0x0e, 0xf8, 0x21, 0xc6, 0x01, 0x01, 0x00, 0xe8, 0x60, 0x94,
- 0x00, 0xe8, 0xc9, 0x90, 0x00, 0xe8, 0x70, 0xc4, 0xab, 0x3a, 0x00, 0xf7,
- 0xf1, 0xc5, 0x21, 0x12, 0x00, 0xf7, 0xc1, 0xc4, 0x02, 0x83, 0x00, 0x0d,
- 0x9b, 0x03, 0x3d, 0x10, 0x06, 0xc3, 0x3d, 0x16, 0xc5, 0x1f, 0x94, 0x00,
- 0xf7, 0x91, 0xc5, 0x35, 0x4a, 0x00, 0x06, 0xe9, 0xca, 0x0b, 0x36, 0x00,
- 0x0b, 0xb1, 0xc6, 0x60, 0xe6, 0x00, 0x11, 0x91, 0xc6, 0x01, 0x01, 0x00,
- 0x12, 0x70, 0x47, 0xc5, 0xa8, 0xc3, 0x3d, 0x22, 0xc8, 0xbd, 0xa5, 0x05,
- 0x3e, 0xb0, 0x44, 0x01, 0xb8, 0xc3, 0x3d, 0x2c, 0xc5, 0x35, 0x4a, 0x00,
- 0xf1, 0xf1, 0xc4, 0x02, 0x83, 0x01, 0x63, 0x70, 0x45, 0x00, 0x6c, 0xc3,
- 0x3d, 0x38, 0xc3, 0x00, 0xe8, 0x00, 0x12, 0x20, 0x42, 0x00, 0xf6, 0xc3,
- 0x3d, 0x82, 0x05, 0xc3, 0x3d, 0x91, 0x06, 0xc3, 0x3d, 0xa0, 0x0f, 0xc3,
- 0x3d, 0xad, 0xc5, 0x21, 0x12, 0x00, 0x06, 0xab, 0x03, 0x3d, 0xbc, 0xc6,
- 0x01, 0x01, 0x00, 0x06, 0xc3, 0x03, 0x3d, 0xc2, 0xc5, 0x1f, 0x94, 0x00,
- 0x06, 0x91, 0xc5, 0x35, 0x4a, 0x00, 0x06, 0x99, 0x42, 0x01, 0x64, 0xc3,
- 0x3d, 0xc8, 0xc5, 0x1f, 0x0a, 0x00, 0x0a, 0x71, 0xc6, 0xcd, 0x21, 0x00,
- 0x0f, 0x53, 0x03, 0x3d, 0xd4, 0xce, 0x1f, 0x15, 0x00, 0x10, 0x70, 0x91,
- 0x00, 0x0c, 0x31, 0x87, 0x00, 0x0c, 0x80, 0x06, 0xc3, 0x3d, 0xda, 0xca,
- 0x9f, 0xc8, 0x00, 0xf6, 0x41, 0xc5, 0x21, 0x12, 0x00, 0x09, 0x43, 0x03,
- 0x3d, 0xe7, 0xc5, 0x1f, 0x94, 0x00, 0x06, 0x61, 0xc5, 0x35, 0x4a, 0x00,
- 0x06, 0x69, 0x05, 0xc3, 0x3d, 0xed, 0xc6, 0x60, 0xe6, 0x00, 0x09, 0x51,
- 0xc5, 0x1f, 0x0a, 0x00, 0x09, 0x61, 0xc6, 0xcd, 0x21, 0x00, 0x09, 0x71,
- 0xc6, 0x01, 0x01, 0x00, 0x0c, 0xb9, 0xce, 0x1f, 0x15, 0x00, 0x10, 0x50,
- 0x88, 0x05, 0x3b, 0xd9, 0x89, 0x05, 0x3b, 0xe9, 0x94, 0x05, 0x3c, 0x11,
- 0x95, 0x05, 0x3c, 0x21, 0x96, 0x05, 0x3c, 0x31, 0x86, 0x05, 0x3b, 0xc8,
- 0x05, 0xc3, 0x3d, 0xf9, 0xc5, 0x21, 0x12, 0x00, 0xf5, 0xe3, 0x03, 0x3e,
- 0x11, 0xca, 0x9f, 0xc8, 0x00, 0xf5, 0xd1, 0x06, 0xc3, 0x3e, 0x17, 0xc6,
- 0x60, 0xe6, 0x00, 0x08, 0x93, 0x03, 0x3e, 0x21, 0xc5, 0x1f, 0x94, 0x00,
- 0x06, 0x41, 0xc5, 0x35, 0x4a, 0x00, 0x06, 0x49, 0xc5, 0x1f, 0x0a, 0x00,
- 0x08, 0xa1, 0xc6, 0xcd, 0x21, 0x00, 0x08, 0xc1, 0xce, 0x1f, 0x15, 0x00,
- 0x10, 0x31, 0xc6, 0x01, 0x01, 0x00, 0x12, 0x30, 0xc3, 0x01, 0x1e, 0x05,
- 0x39, 0x11, 0xc2, 0x00, 0x34, 0x05, 0x39, 0x20, 0x8a, 0x00, 0x06, 0x80,
- 0x00, 0x43, 0x3e, 0x27, 0xc5, 0x1f, 0x0a, 0x00, 0x08, 0x13, 0x03, 0x3e,
- 0x33, 0x05, 0xc3, 0x3e, 0x39, 0xca, 0x9f, 0xc8, 0x00, 0xf5, 0x11, 0x06,
- 0xc3, 0x3e, 0x48, 0x45, 0x00, 0x5b, 0xc3, 0x3e, 0x55, 0xce, 0x1f, 0x15,
- 0x00, 0x10, 0x11, 0xc5, 0x1f, 0x94, 0x00, 0x06, 0x01, 0xc5, 0x35, 0x4a,
- 0x00, 0x06, 0x09, 0xc5, 0x21, 0x12, 0x00, 0x06, 0x19, 0xc6, 0x60, 0xe6,
- 0x00, 0x08, 0x01, 0xc6, 0xcd, 0x21, 0x00, 0x08, 0x21, 0xc6, 0x01, 0x01,
- 0x00, 0x11, 0xd0, 0x46, 0x00, 0x6b, 0x43, 0x3e, 0x64, 0xd4, 0x3d, 0xf5,
- 0x05, 0x39, 0xd0, 0x44, 0x01, 0xb8, 0xc3, 0x3e, 0x70, 0x05, 0xc3, 0x3e,
- 0x7f, 0xc5, 0x35, 0x4a, 0x00, 0x0a, 0xd3, 0x03, 0x3e, 0x9a, 0xce, 0x3a,
- 0xdb, 0x05, 0x3d, 0x41, 0xc4, 0x02, 0x83, 0x05, 0x3e, 0x29, 0x15, 0x43,
- 0x3e, 0xa0, 0xc6, 0xb8, 0xe7, 0x05, 0x3d, 0x61, 0xc3, 0x6b, 0xe0, 0x00,
- 0x0c, 0x78, 0xd0, 0x5f, 0xa2, 0x00, 0x12, 0x51, 0xc9, 0xa9, 0x85, 0x05,
- 0x3d, 0x70, 0xca, 0x69, 0x25, 0x00, 0xf4, 0xa1, 0x06, 0xc3, 0x3e, 0xac,
- 0x05, 0xc3, 0x3e, 0xb8, 0xcc, 0x57, 0x82, 0x05, 0x3e, 0x31, 0xc5, 0x35,
- 0x4a, 0x00, 0x0b, 0xc9, 0x15, 0xc3, 0x3e, 0xc4, 0xc4, 0x02, 0x83, 0x00,
- 0x11, 0x20, 0xc8, 0x1e, 0x43, 0x00, 0xf4, 0x61, 0xc8, 0x17, 0x7b, 0x00,
- 0xf4, 0x50, 0x06, 0xc3, 0x3e, 0xd0, 0xc5, 0x35, 0x4a, 0x00, 0xf4, 0x11,
- 0xc5, 0x1f, 0x94, 0x00, 0xf4, 0x01, 0xc4, 0x02, 0x83, 0x01, 0x63, 0x91,
- 0xca, 0x0b, 0x36, 0x00, 0x0b, 0xa0, 0x06, 0xc3, 0x3e, 0xdc, 0xc5, 0x21,
- 0x12, 0x00, 0xf3, 0xe1, 0xc4, 0x02, 0x83, 0x00, 0x0d, 0x90, 0xc2, 0x0f,
- 0xf5, 0x05, 0x3c, 0xd1, 0xc2, 0x1b, 0xd8, 0x05, 0x3c, 0xe1, 0xc2, 0x0f,
- 0x4d, 0x05, 0x3c, 0xf0, 0x05, 0xc3, 0x3e, 0xe8, 0xca, 0x69, 0x25, 0x00,
- 0xf3, 0x71, 0x06, 0xc3, 0x3f, 0x00, 0xc6, 0x01, 0x01, 0x00, 0x0b, 0x31,
- 0xc4, 0x02, 0x83, 0x00, 0x0d, 0x61, 0xce, 0x02, 0x79, 0x00, 0x0d, 0x70,
- 0xcc, 0x21, 0x84, 0x05, 0x3b, 0x22, 0x03, 0x3f, 0x0c, 0xc9, 0x6a, 0x61,
- 0x05, 0x3b, 0xf1, 0x8e, 0x05, 0x3c, 0x01, 0x8a, 0x05, 0x3c, 0x69, 0x8d,
- 0x05, 0x3d, 0x81, 0x96, 0x05, 0x3d, 0x89, 0x8f, 0x00, 0x0c, 0xe1, 0x98,
- 0x00, 0x12, 0x29, 0x83, 0x01, 0x63, 0x7a, 0x03, 0x3f, 0x12, 0xc3, 0x26,
- 0x50, 0x00, 0x0c, 0x21, 0xc3, 0x01, 0x59, 0x00, 0x0d, 0x39, 0xc4, 0x0c,
- 0x5a, 0x00, 0x0d, 0xe0, 0x45, 0x00, 0x6c, 0xc3, 0x3f, 0x18, 0xc7, 0xa8,
- 0x1d, 0x05, 0x3a, 0xd0, 0xca, 0x9b, 0xf4, 0x05, 0x39, 0xf1, 0xc6, 0x25,
- 0x70, 0x05, 0x3d, 0x59, 0x87, 0x00, 0x0c, 0x71, 0xc6, 0xd3, 0x21, 0x05,
- 0x3f, 0xa8, 0xc9, 0x17, 0x7a, 0x00, 0xf2, 0xb1, 0xc5, 0x35, 0x4a, 0x00,
- 0xf2, 0xa1, 0x15, 0xc3, 0x3f, 0x46, 0xc4, 0x02, 0x83, 0x00, 0x0d, 0x21,
- 0xc8, 0xb8, 0x05, 0x05, 0x3a, 0x90, 0x05, 0xc3, 0x3f, 0x55, 0x0e, 0xc3,
- 0x3f, 0x67, 0x06, 0xc3, 0x3f, 0x79, 0xc5, 0x1f, 0x94, 0x00, 0x0f, 0xc1,
- 0xc5, 0x21, 0x12, 0x00, 0x06, 0x89, 0xc5, 0x35, 0x4a, 0x00, 0x0a, 0x19,
- 0xce, 0x3a, 0xdb, 0x05, 0x3d, 0x21, 0xce, 0x6d, 0x8b, 0x00, 0x0e, 0x58,
- 0x05, 0xc3, 0x3f, 0x85, 0xca, 0x69, 0x25, 0x00, 0xf1, 0xd1, 0x42, 0x01,
- 0x4a, 0xc3, 0x3f, 0x97, 0xcb, 0x95, 0x18, 0x05, 0x3a, 0x41, 0xc5, 0x35,
- 0x4a, 0x00, 0x09, 0xc9, 0x47, 0x00, 0x50, 0xc3, 0x3f, 0xa6, 0x15, 0xc3,
- 0x3f, 0xb2, 0x04, 0x43, 0x3f, 0xbe, 0xca, 0x69, 0x25, 0x00, 0xf1, 0xa1,
- 0x06, 0xc3, 0x3f, 0xca, 0xc5, 0x35, 0x4a, 0x00, 0xf1, 0x81, 0xc6, 0x01,
- 0x01, 0x05, 0x3a, 0x03, 0x03, 0x3f, 0xdc, 0x05, 0xc3, 0x3f, 0xe2, 0xce,
- 0x3a, 0xdb, 0x05, 0x3d, 0x11, 0xc4, 0x02, 0x83, 0x00, 0x0c, 0xc0, 0xcb,
- 0x9a, 0xf0, 0x00, 0xf1, 0x51, 0x05, 0xc3, 0x3f, 0xee, 0x06, 0xc3, 0x40,
- 0x00, 0xc6, 0x01, 0x01, 0x00, 0x09, 0x31, 0xc4, 0x02, 0x83, 0x05, 0x3d,
- 0x50, 0xc6, 0x60, 0xe6, 0x00, 0xf1, 0x01, 0xc5, 0x35, 0x4a, 0x00, 0x0f,
- 0xa1, 0x05, 0xc3, 0x40, 0x12, 0xc5, 0x1f, 0x0a, 0x00, 0x08, 0xf1, 0xc9,
- 0x17, 0x7a, 0x00, 0x09, 0x01, 0xce, 0x3a, 0xdb, 0x05, 0x3d, 0x01, 0xc4,
- 0x02, 0x83, 0x00, 0x0c, 0x99, 0xc6, 0x01, 0x01, 0x00, 0x0f, 0x20, 0x97,
- 0x05, 0x3d, 0xf1, 0x8b, 0x05, 0x3d, 0xe1, 0x83, 0x05, 0x3d, 0xd1, 0xc4,
- 0x05, 0x30, 0x00, 0x12, 0x08, 0xc9, 0x17, 0x7a, 0x00, 0xf0, 0xf1, 0xc6,
- 0x01, 0x01, 0x05, 0x3c, 0xc1, 0xc4, 0x02, 0x83, 0x00, 0x0c, 0x88, 0x05,
- 0xc3, 0x40, 0x24, 0xca, 0x69, 0x25, 0x00, 0xf0, 0x71, 0x44, 0x01, 0xb8,
- 0xc3, 0x40, 0x36, 0x15, 0xc3, 0x40, 0x42, 0xc4, 0x02, 0x83, 0x00, 0x0c,
- 0x51, 0xc6, 0xd2, 0x7f, 0x00, 0x0c, 0x58, 0xcb, 0x9a, 0x98, 0x00, 0x0e,
- 0x20, 0x05, 0xc3, 0x40, 0x57, 0xc5, 0x35, 0x4a, 0x00, 0x08, 0x31, 0xc9,
- 0x17, 0x7a, 0x00, 0x08, 0x51, 0xc3, 0x00, 0xe8, 0x05, 0x3c, 0x91, 0xcc,
- 0x57, 0x82, 0x05, 0x3e, 0x21, 0xc4, 0x02, 0x83, 0x00, 0x0c, 0x39, 0xc6,
- 0x01, 0x01, 0x00, 0x11, 0xd8, 0xcb, 0x9a, 0xa3, 0x05, 0x39, 0x70, 0xca,
- 0x69, 0x25, 0x00, 0xf0, 0x31, 0x44, 0x01, 0xb8, 0xc3, 0x40, 0x6c, 0xc8,
- 0xb8, 0x05, 0x05, 0x3c, 0xb1, 0xc4, 0x02, 0x83, 0x00, 0x0c, 0x09, 0xc6,
- 0xd2, 0x7f, 0x00, 0x0c, 0x11, 0xc6, 0x01, 0x01, 0x00, 0x12, 0x18, 0x05,
- 0xc3, 0x40, 0x78, 0xc6, 0x01, 0x01, 0x00, 0x12, 0x40, 0xd8, 0x23, 0xd0,
- 0x05, 0x3a, 0xb1, 0xcf, 0x3f, 0x8a, 0x05, 0x3a, 0xc0, 0x83, 0x00, 0x74,
- 0x89, 0xc2, 0x00, 0xa4, 0x00, 0x74, 0x90, 0xc6, 0x02, 0x21, 0x0f, 0xda,
- 0xb1, 0xcc, 0x02, 0x0b, 0x0f, 0xdb, 0x28, 0xcc, 0x02, 0x0b, 0x0f, 0xdb,
- 0x21, 0xc5, 0x01, 0xf7, 0x0f, 0xdb, 0x30, 0xc6, 0x02, 0x21, 0x0f, 0xda,
- 0xd9, 0xcc, 0x02, 0x0b, 0x0f, 0xdb, 0x00, 0xcc, 0x02, 0x0b, 0x0f, 0xda,
- 0xf9, 0xc5, 0x01, 0xf7, 0x0f, 0xdb, 0x08, 0xcc, 0x03, 0x3b, 0x01, 0x0f,
- 0x69, 0xce, 0x0f, 0xa7, 0x01, 0x0f, 0x60, 0x00, 0x43, 0x40, 0x84, 0xd2,
- 0x06, 0x54, 0x0f, 0xc0, 0x09, 0xd5, 0x03, 0x72, 0x0f, 0xc0, 0x88, 0xca,
- 0x01, 0xf7, 0x01, 0x0d, 0x89, 0xc9, 0x01, 0x1e, 0x01, 0x0d, 0x80, 0x06,
- 0xc3, 0x40, 0x96, 0xdf, 0x0d, 0x5e, 0x01, 0x4b, 0x18, 0xc3, 0xe6, 0xf7,
- 0x0f, 0xb3, 0x39, 0xc9, 0xac, 0x43, 0x0f, 0xb2, 0xf8, 0xe0, 0x0c, 0x27,
- 0x01, 0x3a, 0xd8, 0xe0, 0x00, 0xa7, 0x01, 0x3b, 0x00, 0xe0, 0x00, 0xa7,
- 0x01, 0x3a, 0xf8, 0xdc, 0x14, 0xfa, 0x01, 0x3d, 0x31, 0xde, 0x0e, 0x16,
- 0x01, 0x3d, 0x28, 0xe0, 0x0c, 0x27, 0x01, 0x3a, 0xe8, 0xd5, 0x03, 0x72,
- 0x0f, 0xc0, 0xd1, 0xdb, 0x17, 0xef, 0x0f, 0xc0, 0xf0, 0xc4, 0x03, 0x2b,
- 0x0f, 0xc4, 0xf1, 0xc5, 0x08, 0x27, 0x0f, 0xc4, 0xf8, 0xc6, 0x64, 0xbb,
- 0x07, 0xda, 0x4b, 0x03, 0x40, 0x9c, 0x15, 0x43, 0x40, 0xa2, 0x46, 0x00,
- 0x6b, 0x43, 0x40, 0xae, 0xc9, 0x68, 0x21, 0x07, 0xd9, 0x49, 0xc4, 0x45,
- 0xc1, 0x07, 0xd9, 0x00, 0xc8, 0x4e, 0x3b, 0x02, 0x6e, 0x69, 0xc3, 0x01,
- 0xd3, 0x02, 0x6f, 0x08, 0xc3, 0x0f, 0x21, 0x00, 0x04, 0x41, 0xd2, 0x4a,
- 0x58, 0x00, 0x04, 0x48, 0x0d, 0xc3, 0x40, 0xc0, 0x15, 0xc3, 0x40, 0xd2,
- 0xc5, 0x7a, 0x92, 0x05, 0x4b, 0x49, 0xc5, 0xdb, 0x51, 0x05, 0x4b, 0x41,
- 0xc6, 0xc8, 0x2d, 0x05, 0x4b, 0x31, 0xc5, 0xd9, 0x80, 0x00, 0x88, 0xc1,
- 0xc5, 0x98, 0x41, 0x00, 0x88, 0xd1, 0xc5, 0xd7, 0x8c, 0x05, 0x4b, 0x68,
- 0xcb, 0x98, 0x3b, 0x05, 0x4b, 0xe1, 0x16, 0xc3, 0x40, 0xde, 0xc5, 0xd7,
- 0x8c, 0x00, 0x88, 0x6b, 0x03, 0x40, 0xea, 0xc4, 0xac, 0xd8, 0x00, 0x88,
- 0x53, 0x03, 0x40, 0xf0, 0xc6, 0x92, 0x31, 0x00, 0x88, 0x09, 0xc5, 0x7a,
- 0x92, 0x00, 0x88, 0x41, 0xc5, 0xd9, 0x80, 0x00, 0x88, 0xa1, 0xc5, 0xd6,
- 0x3d, 0x00, 0x88, 0xc9, 0xc5, 0xbb, 0xa0, 0x00, 0x8a, 0x39, 0xc5, 0x98,
- 0x41, 0x00, 0x8a, 0xc0, 0x02, 0x43, 0x40, 0xf6, 0x02, 0x43, 0x41, 0x2a,
- 0x02, 0x43, 0x41, 0x36, 0xc5, 0x98, 0x41, 0x05, 0x4b, 0xb9, 0xc5, 0xd6,
- 0x3d, 0x05, 0x4b, 0xb1, 0xc6, 0x92, 0x31, 0x00, 0x8a, 0x09, 0x16, 0xc3,
- 0x41, 0x58, 0xc5, 0xdb, 0x51, 0x00, 0x8a, 0x19, 0x12, 0xc3, 0x41, 0x64,
- 0xc4, 0xac, 0xd8, 0x00, 0x8a, 0x29, 0x05, 0x43, 0x41, 0x76, 0xc4, 0xac,
- 0xd8, 0x05, 0x4b, 0x89, 0xc6, 0xc8, 0x2d, 0x05, 0x4b, 0x81, 0xc6, 0x92,
- 0x31, 0x05, 0x4b, 0x79, 0xc5, 0x7a, 0x92, 0x00, 0x88, 0xe0, 0x02, 0x43,
- 0x41, 0x82, 0xc7, 0xc8, 0x2c, 0x00, 0x8a, 0xd0, 0xc5, 0xd6, 0x3d, 0x00,
- 0x88, 0xd9, 0xc5, 0xdb, 0x51, 0x00, 0x88, 0xe9, 0x12, 0xc3, 0x41, 0xa6,
- 0xca, 0xa4, 0x3c, 0x00, 0x89, 0x60, 0xc6, 0x92, 0x31, 0x00, 0x88, 0x99,
- 0xc6, 0xc8, 0x2d, 0x00, 0x88, 0xa9, 0xc5, 0x7a, 0x92, 0x00, 0x88, 0xb1,
- 0xc4, 0xac, 0xd8, 0x00, 0x8a, 0xd9, 0xc5, 0xd7, 0x8c, 0x00, 0x8a, 0xe1,
- 0xc5, 0x98, 0x41, 0x00, 0x8a, 0xe8, 0xc6, 0xce, 0x0b, 0x00, 0x8a, 0x68,
- 0xc4, 0xc7, 0x2c, 0x00, 0x88, 0x73, 0x03, 0x41, 0xb2, 0x45, 0xd8, 0x86,
- 0x43, 0x41, 0xb6, 0x15, 0xc3, 0x41, 0xbe, 0x05, 0x43, 0x41, 0xca, 0x87,
- 0x00, 0x8b, 0x11, 0x02, 0xc3, 0x41, 0xd6, 0xc4, 0x0f, 0xf4, 0x00, 0x8c,
- 0xf2, 0x03, 0x41, 0xe4, 0x83, 0x00, 0x8b, 0x1b, 0x03, 0x41, 0xe8, 0x87,
- 0x00, 0x8b, 0x43, 0x03, 0x41, 0xf0, 0x91, 0x00, 0x8b, 0x6b, 0x03, 0x41,
- 0xf7, 0x97, 0x00, 0x8b, 0x93, 0x03, 0x41, 0xfb, 0x8b, 0x00, 0x8b, 0xa2,
- 0x03, 0x41, 0xff, 0x91, 0x00, 0x8b, 0x2b, 0x03, 0x42, 0x05, 0x97, 0x00,
- 0x8b, 0x9a, 0x03, 0x42, 0x09, 0x87, 0x00, 0x8b, 0x61, 0x02, 0x43, 0x42,
- 0x0d, 0x83, 0x00, 0x8b, 0x53, 0x03, 0x42, 0x23, 0x87, 0x00, 0x8b, 0x83,
- 0x03, 0x42, 0x27, 0x8b, 0x00, 0x8b, 0x88, 0x02, 0x43, 0x42, 0x2b, 0x02,
- 0x43, 0x42, 0x4b, 0xc5, 0x92, 0x32, 0x00, 0x8d, 0x43, 0x03, 0x42, 0x6b,
- 0xc6, 0xc0, 0x37, 0x00, 0x8d, 0xf9, 0x47, 0x7a, 0x8b, 0x43, 0x42, 0x6f,
- 0x44, 0x29, 0x95, 0xc3, 0x42, 0x7f, 0xc3, 0x3c, 0x08, 0x00, 0x8d, 0xd2,
- 0x03, 0x42, 0xc4, 0x02, 0x43, 0x42, 0xc8, 0xc5, 0xc8, 0x2e, 0x00, 0x8d,
- 0x73, 0x03, 0x42, 0xee, 0xc6, 0xc6, 0xf2, 0x00, 0x8e, 0x00, 0x02, 0x43,
- 0x42, 0xf2, 0x02, 0x43, 0x43, 0x1d, 0xc4, 0x7a, 0x93, 0x00, 0x8d, 0xc3,
- 0x03, 0x43, 0x41, 0xc6, 0xc1, 0x07, 0x00, 0x8e, 0x0b, 0x03, 0x43, 0x45,
- 0xc6, 0xcb, 0x4b, 0x00, 0x8f, 0x5a, 0x03, 0x43, 0x49, 0x02, 0x43, 0x43,
- 0x4d, 0xc4, 0xc7, 0x2b, 0x00, 0x8d, 0xeb, 0x03, 0x43, 0x57, 0xc6, 0xc7,
- 0x2a, 0x00, 0x8d, 0xf0, 0x02, 0x43, 0x43, 0x5b, 0xc6, 0xbb, 0x9f, 0x00,
- 0x8f, 0x83, 0x03, 0x43, 0x73, 0xc9, 0x98, 0x3d, 0x00, 0x8f, 0xc8, 0xc5,
- 0xde, 0x35, 0x01, 0x89, 0x98, 0xc5, 0xdb, 0x51, 0x01, 0x8b, 0x89, 0x12,
- 0xc3, 0x43, 0x77, 0xca, 0xa4, 0x3c, 0x01, 0x8b, 0xc8, 0xc6, 0x92, 0x31,
- 0x01, 0x89, 0x91, 0xc6, 0xc8, 0x2d, 0x01, 0x89, 0xc1, 0xc5, 0x7a, 0x92,
- 0x01, 0x8a, 0x19, 0xc4, 0xac, 0xd8, 0x01, 0x8a, 0x31, 0xc5, 0xd7, 0x8c,
- 0x01, 0x8a, 0x49, 0xc5, 0xd9, 0x80, 0x01, 0x8b, 0x29, 0xc5, 0xbb, 0xa0,
- 0x01, 0x8c, 0x01, 0xc5, 0x98, 0x41, 0x01, 0x8c, 0x28, 0x02, 0x43, 0x43,
- 0x83, 0xc5, 0xd7, 0x8c, 0x01, 0x89, 0xa9, 0xc5, 0x98, 0x41, 0x01, 0x89,
- 0xb1, 0xc6, 0xc8, 0x2d, 0x01, 0x8b, 0x31, 0xc4, 0xac, 0xd8, 0x01, 0x8b,
- 0x39, 0xc7, 0xcb, 0x4a, 0x01, 0x8b, 0x40, 0xc6, 0x92, 0x31, 0x01, 0x89,
- 0xd3, 0x03, 0x43, 0xa1, 0xc5, 0xdb, 0x51, 0x01, 0x89, 0xd9, 0x12, 0xc3,
- 0x43, 0xa7, 0xc4, 0xac, 0xd8, 0x01, 0x89, 0xe9, 0x16, 0xc3, 0x43, 0xbc,
- 0xc5, 0x98, 0x41, 0x01, 0x8a, 0x01, 0xcb, 0x98, 0x3b, 0x01, 0x8b, 0x68,
- 0x12, 0xc3, 0x43, 0xc8, 0xc4, 0xac, 0xd8, 0x01, 0x8b, 0x78, 0x02, 0x43,
- 0x43, 0xd4, 0x87, 0x01, 0x8c, 0x70, 0x87, 0x01, 0x8a, 0x90, 0x91, 0x01,
- 0x8a, 0xab, 0x03, 0x43, 0xed, 0xc6, 0xbb, 0x9f, 0x01, 0x8c, 0x0a, 0x03,
- 0x43, 0xf3, 0x02, 0x43, 0x43, 0xf7, 0x02, 0x43, 0x44, 0x04, 0x87, 0x01,
- 0x8a, 0xc8, 0x91, 0x01, 0x8a, 0xe8, 0x83, 0x07, 0xfb, 0x39, 0x8b, 0x07,
- 0xfb, 0x41, 0x97, 0x07, 0xfb, 0x49, 0x87, 0x07, 0xfb, 0x51, 0x91, 0x07,
- 0xfb, 0x59, 0x1b, 0xc3, 0x44, 0x11, 0xc2, 0x00, 0x16, 0x07, 0xfb, 0x78,
- 0xc4, 0x7a, 0x93, 0x07, 0xfd, 0x61, 0xc6, 0xc1, 0x07, 0x07, 0xfd, 0x78,
- 0xc8, 0x50, 0x00, 0x08, 0x5b, 0xf9, 0xc7, 0x0c, 0x4b, 0x08, 0x5b, 0xf0,
- 0xc4, 0x18, 0x85, 0x08, 0x5b, 0xe9, 0x91, 0x08, 0x5b, 0xc8, 0xc3, 0xdf,
- 0x4a, 0x08, 0x5b, 0x81, 0xc4, 0xd9, 0x77, 0x08, 0x5b, 0x70, 0xc8, 0x50,
- 0x00, 0x08, 0x5a, 0xf9, 0xc7, 0x0c, 0x4b, 0x08, 0x5a, 0xf0, 0xc4, 0x18,
- 0x85, 0x08, 0x5a, 0xe9, 0x91, 0x08, 0x5a, 0xc8, 0xc4, 0xd9, 0x77, 0x08,
- 0x5a, 0x71, 0xc3, 0xdf, 0x4a, 0x08, 0x5a, 0x88, 0xcb, 0x4f, 0xfd, 0x0f,
- 0x65, 0x99, 0xc2, 0x01, 0x47, 0x0f, 0x65, 0x90, 0xc4, 0x18, 0x83, 0x0f,
- 0x65, 0x49, 0xc2, 0x26, 0x51, 0x0f, 0x65, 0x40, 0xc3, 0x0c, 0x5b, 0x0f,
- 0x65, 0x39, 0xc3, 0x06, 0x9e, 0x0f, 0x65, 0x30, 0xc4, 0x04, 0x5e, 0x0f,
- 0x65, 0x29, 0xc2, 0x01, 0x47, 0x0f, 0x65, 0x20, 0xc9, 0x4f, 0xff, 0x0f,
- 0x64, 0xe8, 0xc8, 0x50, 0x00, 0x0f, 0x64, 0xa1, 0xc7, 0x0c, 0x4b, 0x0f,
- 0x64, 0x58, 0xc9, 0x4f, 0xff, 0x0f, 0x64, 0xe0, 0xc8, 0x50, 0x00, 0x0f,
- 0x64, 0x99, 0xc7, 0x0c, 0x4b, 0x0f, 0x64, 0x50, 0xc2, 0x0c, 0x57, 0x0f,
- 0x64, 0x03, 0x03, 0x44, 0x1d, 0x00, 0x43, 0x44, 0x23, 0xc2, 0x0c, 0x57,
- 0x0f, 0x63, 0xfb, 0x03, 0x44, 0x2f, 0x00, 0x43, 0x44, 0x35, 0xc3, 0x43,
- 0xcd, 0x0f, 0x63, 0xf3, 0x03, 0x44, 0x41, 0xc2, 0x00, 0x7b, 0x0f, 0x63,
- 0xaa, 0x03, 0x44, 0x47, 0xc3, 0x0c, 0x56, 0x0f, 0x63, 0xeb, 0x03, 0x44,
- 0x4b, 0xc2, 0x02, 0x53, 0x0f, 0x63, 0xa2, 0x03, 0x44, 0x51, 0xc4, 0x0c,
- 0x55, 0x0f, 0x63, 0xe3, 0x03, 0x44, 0x55, 0xc3, 0x04, 0x5f, 0x0f, 0x63,
- 0x9a, 0x03, 0x44, 0x5b, 0xc4, 0x18, 0x85, 0x0f, 0x63, 0xdb, 0x03, 0x44,
- 0x5f, 0x91, 0x0f, 0x63, 0x92, 0x03, 0x44, 0x65, 0xc9, 0x4f, 0xff, 0x0f,
- 0x64, 0xa8, 0xc8, 0x50, 0x00, 0x0f, 0x64, 0x61, 0xc7, 0x0c, 0x4b, 0x0f,
- 0x64, 0x18, 0xc2, 0x00, 0x8c, 0x01, 0x96, 0x29, 0xc2, 0x02, 0x55, 0x01,
- 0x96, 0x30, 0xc3, 0x01, 0xb4, 0x01, 0x9f, 0x01, 0x16, 0xc3, 0x44, 0x69,
- 0x08, 0xc3, 0x44, 0x77, 0x15, 0xc3, 0x44, 0x84, 0x07, 0xc3, 0x44, 0x96,
- 0xc4, 0x22, 0x71, 0x01, 0x9f, 0x42, 0x03, 0x44, 0xa5, 0x19, 0xc3, 0x44,
- 0xab, 0x0a, 0xc3, 0x44, 0xb3, 0xc2, 0x00, 0x4d, 0x01, 0x9b, 0x10, 0xc3,
- 0x06, 0x9e, 0x01, 0x9a, 0xe3, 0x03, 0x44, 0xbf, 0x0b, 0x43, 0x44, 0xc5,
- 0xc2, 0x26, 0x51, 0x01, 0x9a, 0xf3, 0x03, 0x44, 0xd1, 0xc4, 0x18, 0x83,
- 0x01, 0x9a, 0xfa, 0x03, 0x44, 0xd7, 0xc4, 0x01, 0xdc, 0x01, 0x9b, 0x03,
- 0x03, 0x44, 0xdd, 0xc5, 0x6a, 0x79, 0x01, 0x9b, 0x18, 0xc4, 0x13, 0xf2,
- 0x01, 0x9b, 0x58, 0xdb, 0x18, 0x76, 0x0f, 0xd1, 0xa9, 0xce, 0x29, 0x88,
- 0x0f, 0xd0, 0x58, 0xce, 0x29, 0x88, 0x0f, 0xd0, 0x71, 0xdb, 0x18, 0x76,
- 0x0f, 0xd1, 0xc0, 0x49, 0x29, 0x7f, 0xc3, 0x44, 0xe3, 0x02, 0x43, 0x44,
- 0xf9, 0x49, 0x29, 0x7f, 0x43, 0x45, 0x0b, 0xce, 0x29, 0x88, 0x0f, 0xd0,
- 0x61, 0xdb, 0x18, 0x76, 0x0f, 0xd1, 0xb0, 0xce, 0x29, 0x88, 0x0f, 0xd0,
- 0x51, 0xdb, 0x18, 0x76, 0x0f, 0xd1, 0xa0, 0xc3, 0x00, 0x34, 0x0f, 0xd0,
- 0xf1, 0xc5, 0x7c, 0xf9, 0x0f, 0xd1, 0x10, 0xc8, 0x01, 0x59, 0x01, 0x34,
- 0x39, 0x42, 0x01, 0x4a, 0xc3, 0x45, 0x17, 0x46, 0x01, 0xc7, 0xc3, 0x45,
- 0x23, 0x46, 0x01, 0xd1, 0x43, 0x45, 0x2f, 0xc5, 0x23, 0x78, 0x01, 0x33,
- 0x08, 0xca, 0xa2, 0x3e, 0x01, 0x38, 0x29, 0xdc, 0x13, 0x56, 0x0f, 0xde,
- 0x00, 0xcd, 0x80, 0xd6, 0x0f, 0xbc, 0xa9, 0xcc, 0x54, 0x63, 0x01, 0x2d,
- 0x19, 0xd1, 0x54, 0x5e, 0x0f, 0xbc, 0xa0, 0x14, 0xc3, 0x45, 0x3b, 0x0e,
- 0xc3, 0x45, 0x47, 0x46, 0x01, 0xc7, 0xc3, 0x45, 0x53, 0xd7, 0x2b, 0x62,
- 0x01, 0x2f, 0x59, 0xd4, 0x3e, 0x45, 0x01, 0x1c, 0x28, 0xc4, 0x5c, 0x62,
- 0x01, 0x31, 0xe1, 0xcb, 0x8e, 0x0c, 0x0f, 0x99, 0x20, 0xca, 0x9e, 0xd8,
- 0x0f, 0x99, 0x30, 0xc5, 0x07, 0x0a, 0x01, 0x2d, 0x59, 0xc3, 0x04, 0x45,
- 0x01, 0x5a, 0x90, 0xc5, 0x08, 0x42, 0x01, 0x30, 0xe1, 0xce, 0x25, 0x12,
- 0x0f, 0xa2, 0x40, 0xcd, 0x4a, 0x93, 0x01, 0x2e, 0x41, 0xd2, 0x4a, 0x8e,
- 0x0f, 0xbc, 0xd1, 0xce, 0x6d, 0x0d, 0x0f, 0xbc, 0xd8, 0xe0, 0x09, 0xa7,
- 0x01, 0x37, 0xf8, 0xc6, 0x41, 0x49, 0x01, 0x2d, 0xd9, 0xc7, 0xb9, 0x4e,
- 0x01, 0x5a, 0xa0, 0x89, 0x0f, 0x17, 0x18, 0xc5, 0x00, 0x82, 0x0f, 0xb1,
- 0x73, 0x03, 0x45, 0x5f, 0xd8, 0x25, 0x20, 0x0f, 0xd7, 0x10, 0xd3, 0x45,
- 0x18, 0x0f, 0xb0, 0xe9, 0xcb, 0x9a, 0x40, 0x0f, 0xb0, 0xe0, 0x83, 0x08,
- 0x51, 0xa1, 0xc2, 0x00, 0xa4, 0x08, 0x51, 0x98, 0x44, 0x23, 0x4d, 0xc3,
- 0x45, 0x63, 0x43, 0x6c, 0xa4, 0x43, 0x45, 0x6f, 0x44, 0xe0, 0xeb, 0xc3,
- 0x45, 0x7b, 0x43, 0x93, 0xfc, 0x43, 0x45, 0x87, 0xc3, 0x32, 0xa9, 0x00,
- 0xcf, 0xd9, 0xc4, 0xe2, 0x27, 0x00, 0xcf, 0x58, 0x04, 0xc3, 0x45, 0x93,
- 0x44, 0x6c, 0xa3, 0xc3, 0x45, 0x9f, 0x45, 0xdc, 0x82, 0x43, 0x45, 0xab,
- 0xc3, 0x32, 0xa9, 0x00, 0xcf, 0xa9, 0xc4, 0xe2, 0x27, 0x00, 0xcf, 0x28,
- 0x02, 0x43, 0x45, 0xb7, 0x97, 0x01, 0x8f, 0xa0, 0x91, 0x0d, 0x8b, 0x31,
+ 0x98, 0x8a, 0x08, 0x22, 0x38, 0x89, 0x08, 0x22, 0x68, 0x44, 0x00, 0x55,
+ 0xc3, 0x22, 0x89, 0xc9, 0x62, 0x22, 0x01, 0x48, 0x58, 0xce, 0x74, 0x3e,
+ 0x01, 0x0d, 0x09, 0x47, 0x6b, 0xf4, 0x43, 0x22, 0x95, 0xc5, 0x00, 0x62,
+ 0x0f, 0xc2, 0x39, 0xd0, 0x5c, 0xef, 0x0f, 0xc2, 0x18, 0x44, 0x00, 0xcd,
+ 0xc3, 0x22, 0x9b, 0x45, 0x00, 0x47, 0x43, 0x22, 0xa5, 0x00, 0x43, 0x22,
+ 0xaf, 0xca, 0xa8, 0x3a, 0x01, 0x27, 0xf1, 0x46, 0x08, 0xd7, 0x43, 0x22,
+ 0xcd, 0x00, 0x43, 0x22, 0xeb, 0xc6, 0x2d, 0xdf, 0x01, 0x16, 0x89, 0xc4,
+ 0x0d, 0xd3, 0x01, 0x16, 0x81, 0xc6, 0xbe, 0x45, 0x01, 0x55, 0xe1, 0xcd,
+ 0x6c, 0x89, 0x01, 0x72, 0x20, 0xc5, 0x14, 0x2d, 0x01, 0x52, 0x79, 0xcc,
+ 0x05, 0xbb, 0x01, 0x52, 0x70, 0xcd, 0x65, 0x2a, 0x01, 0x57, 0x61, 0xcb,
+ 0x9a, 0x1e, 0x01, 0x72, 0x48, 0xc3, 0x07, 0x6e, 0x01, 0x01, 0x9b, 0x03,
+ 0x22, 0xf7, 0xc6, 0xbf, 0x4d, 0x01, 0x55, 0xd8, 0xd2, 0x46, 0x93, 0x01,
+ 0x00, 0xf1, 0xc6, 0x7c, 0x8d, 0x01, 0x00, 0xb9, 0x46, 0x1a, 0x91, 0xc3,
+ 0x22, 0xfd, 0xdb, 0x15, 0x49, 0x01, 0x72, 0x10, 0xce, 0x52, 0xaf, 0x01,
+ 0x55, 0x18, 0xcc, 0x87, 0xf0, 0x01, 0x1d, 0x31, 0xc9, 0x50, 0xc7, 0x01,
+ 0x1d, 0x29, 0xcc, 0x83, 0x4c, 0x01, 0x1d, 0x21, 0x45, 0x01, 0xac, 0x43,
+ 0x23, 0x09, 0x46, 0x01, 0xab, 0x43, 0x23, 0x27, 0xcc, 0x8e, 0xbc, 0x01,
+ 0x3f, 0xa1, 0xcc, 0x11, 0x61, 0x01, 0x0f, 0xa0, 0xc2, 0x00, 0x58, 0x01,
+ 0x10, 0xfb, 0x03, 0x23, 0x33, 0xc9, 0xb3, 0x06, 0x0f, 0xaf, 0x78, 0xd6,
+ 0x2d, 0x19, 0x0f, 0xdb, 0xf1, 0xd6, 0x05, 0x11, 0x0f, 0xdb, 0xf8, 0x44,
+ 0x04, 0x93, 0xc3, 0x23, 0x37, 0xc3, 0x00, 0xba, 0x01, 0x2c, 0x80, 0xca,
+ 0xa5, 0xf6, 0x01, 0x1d, 0x69, 0xcc, 0x85, 0x74, 0x01, 0x1d, 0x61, 0xca,
+ 0x9f, 0x70, 0x01, 0x1d, 0x58, 0xc2, 0x00, 0x58, 0x01, 0x15, 0xfb, 0x03,
+ 0x23, 0x43, 0xd6, 0x15, 0x69, 0x0f, 0xdb, 0x70, 0xcd, 0x43, 0xa0, 0x0f,
+ 0xdc, 0x41, 0xce, 0x08, 0x19, 0x0f, 0xdc, 0x50, 0xd6, 0x31, 0x4f, 0x01,
+ 0x4b, 0x81, 0xcc, 0x0f, 0x84, 0x01, 0x80, 0x58, 0xcc, 0x00, 0xd3, 0x01,
+ 0x4c, 0x21, 0xcd, 0x6a, 0x7f, 0x01, 0x80, 0x78, 0xd9, 0x1d, 0x29, 0x0f,
+ 0xc4, 0xb1, 0xc9, 0xb5, 0x22, 0x01, 0x0f, 0x80, 0xcb, 0x05, 0x9b, 0x0f,
+ 0xc4, 0x91, 0x49, 0x00, 0x59, 0x43, 0x23, 0x49, 0xc5, 0x00, 0x62, 0x01,
+ 0x0e, 0xd9, 0xca, 0x54, 0x07, 0x01, 0x48, 0x78, 0x46, 0x00, 0x62, 0xc3,
+ 0x23, 0x5e, 0xd1, 0x54, 0x00, 0x01, 0x48, 0x80, 0xd6, 0x30, 0xb5, 0x01,
+ 0x0e, 0x61, 0x4a, 0x02, 0x18, 0x43, 0x23, 0x6a, 0x45, 0x01, 0xac, 0xc3,
+ 0x23, 0x76, 0x16, 0xc3, 0x23, 0xb2, 0xd4, 0x3e, 0x92, 0x01, 0x0e, 0x21,
+ 0xc8, 0xab, 0xed, 0x01, 0x0d, 0x33, 0x03, 0x23, 0xbe, 0x03, 0x43, 0x23,
+ 0xc4, 0x99, 0x01, 0x0e, 0x93, 0x03, 0x23, 0xd0, 0xc6, 0xd7, 0xf0, 0x01,
+ 0x48, 0xe0, 0xc8, 0x4f, 0xa2, 0x01, 0x0c, 0x39, 0xca, 0xaa, 0x06, 0x01,
+ 0x0c, 0x30, 0xc8, 0x4f, 0xa2, 0x01, 0x0c, 0x09, 0xc7, 0x0d, 0x7f, 0x01,
+ 0x0b, 0x70, 0xc3, 0x21, 0xcc, 0x00, 0xb7, 0xc1, 0x85, 0x00, 0xb7, 0xb8,
+ 0xc4, 0xe6, 0xfb, 0x00, 0xb7, 0x39, 0xc8, 0xc2, 0x23, 0x00, 0xb6, 0xc9,
+ 0xcb, 0x96, 0x6c, 0x00, 0xb6, 0x99, 0xc7, 0xc6, 0x2c, 0x00, 0xb6, 0x81,
+ 0xc7, 0xc6, 0x33, 0x00, 0xb6, 0x61, 0xc6, 0xd5, 0xe0, 0x00, 0xb6, 0x31,
+ 0xc8, 0xc2, 0xfb, 0x00, 0xb5, 0xf9, 0xca, 0x9f, 0xf2, 0x00, 0xb5, 0xe9,
+ 0xc7, 0xc6, 0x56, 0x00, 0xb5, 0x68, 0x90, 0x05, 0x28, 0x20, 0x90, 0x05,
+ 0x2b, 0xa8, 0x87, 0x05, 0x28, 0x30, 0x91, 0x05, 0x2b, 0xb8, 0x87, 0x05,
+ 0x28, 0x40, 0x91, 0x05, 0x2b, 0xc8, 0x87, 0x05, 0x28, 0x50, 0x91, 0x05,
+ 0x2b, 0xd8, 0x87, 0x05, 0x28, 0x49, 0x90, 0x05, 0x2f, 0x68, 0x90, 0x05,
+ 0x2a, 0xa8, 0x91, 0x05, 0x2b, 0xd0, 0x87, 0x05, 0x28, 0x59, 0x90, 0x05,
+ 0x2f, 0x80, 0x91, 0x05, 0x2b, 0xe1, 0x90, 0x05, 0x2e, 0x40, 0x87, 0x05,
+ 0x28, 0x78, 0x91, 0x05, 0x2c, 0x00, 0x87, 0x05, 0x28, 0x80, 0x87, 0x05,
+ 0x2f, 0xb3, 0x03, 0x23, 0xd4, 0x8b, 0x05, 0x29, 0xb1, 0x83, 0x05, 0x2a,
+ 0xe9, 0x91, 0x05, 0x2e, 0x73, 0x03, 0x23, 0xd8, 0x97, 0x05, 0x2d, 0x38,
+ 0x91, 0x05, 0x2c, 0x08, 0x87, 0x05, 0x28, 0xa8, 0x91, 0x05, 0x2c, 0x31,
+ 0x43, 0x00, 0xe7, 0x43, 0x23, 0xdc, 0x87, 0x05, 0x28, 0xe0, 0x91, 0x05,
+ 0x2c, 0x68, 0x87, 0x05, 0x30, 0x23, 0x03, 0x23, 0xfa, 0x8b, 0x05, 0x2a,
+ 0x21, 0x83, 0x05, 0x2b, 0x61, 0x91, 0x05, 0x2e, 0xe3, 0x03, 0x24, 0x02,
+ 0x97, 0x05, 0x2d, 0xa8, 0x87, 0x05, 0x29, 0x18, 0x91, 0x05, 0x2c, 0xa0,
+ 0x87, 0x05, 0x28, 0xb8, 0x91, 0x05, 0x2c, 0x40, 0x87, 0x05, 0x28, 0xc8,
+ 0x91, 0x05, 0x2c, 0x50, 0xc5, 0x03, 0x50, 0x01, 0x57, 0x79, 0xc5, 0x00,
+ 0x34, 0x01, 0x57, 0x80, 0xa5, 0x0c, 0x57, 0xf9, 0xa4, 0x0c, 0x57, 0xf1,
+ 0xa3, 0x0c, 0x57, 0xe9, 0xa2, 0x0c, 0x57, 0xe1, 0xa1, 0x0c, 0x57, 0xd9,
+ 0xa0, 0x0c, 0x57, 0xd1, 0x9f, 0x0c, 0x57, 0xc9, 0x9e, 0x0c, 0x57, 0xc1,
+ 0x9d, 0x0c, 0x57, 0xb8, 0xa6, 0x0c, 0x57, 0xb1, 0xa5, 0x0c, 0x57, 0xa9,
+ 0xa4, 0x0c, 0x57, 0xa1, 0xa3, 0x0c, 0x57, 0x99, 0xa2, 0x0c, 0x57, 0x91,
+ 0xa1, 0x0c, 0x57, 0x89, 0xa0, 0x0c, 0x57, 0x81, 0x9f, 0x0c, 0x57, 0x79,
+ 0x9e, 0x0c, 0x57, 0x71, 0x9d, 0x0c, 0x57, 0x68, 0xa6, 0x0c, 0x57, 0x61,
+ 0xa5, 0x0c, 0x57, 0x59, 0xa4, 0x0c, 0x57, 0x51, 0xa3, 0x0c, 0x57, 0x49,
+ 0xa2, 0x0c, 0x57, 0x41, 0xa1, 0x0c, 0x57, 0x39, 0xa0, 0x0c, 0x57, 0x31,
+ 0x9f, 0x0c, 0x57, 0x29, 0x9e, 0x0c, 0x57, 0x21, 0x9d, 0x0c, 0x57, 0x18,
+ 0xa6, 0x0c, 0x57, 0x11, 0xa5, 0x0c, 0x57, 0x09, 0xa4, 0x0c, 0x57, 0x01,
+ 0xa3, 0x0c, 0x56, 0xf9, 0xa2, 0x0c, 0x56, 0xf1, 0xa1, 0x0c, 0x56, 0xe9,
+ 0xa0, 0x0c, 0x56, 0xe1, 0x9f, 0x0c, 0x56, 0xd9, 0x9e, 0x0c, 0x56, 0xd1,
+ 0x9d, 0x0c, 0x56, 0xc8, 0xa6, 0x0c, 0x56, 0xc1, 0xa5, 0x0c, 0x56, 0xb9,
+ 0xa4, 0x0c, 0x56, 0xb1, 0xa3, 0x0c, 0x56, 0xa9, 0xa2, 0x0c, 0x56, 0xa1,
+ 0xa1, 0x0c, 0x56, 0x99, 0xa0, 0x0c, 0x56, 0x91, 0x9f, 0x0c, 0x56, 0x89,
+ 0x9e, 0x0c, 0x56, 0x81, 0x9d, 0x0c, 0x56, 0x78, 0xa6, 0x0c, 0x56, 0x71,
+ 0xa5, 0x0c, 0x56, 0x69, 0xa4, 0x0c, 0x56, 0x61, 0xa3, 0x0c, 0x56, 0x59,
+ 0xa2, 0x0c, 0x56, 0x51, 0xa1, 0x0c, 0x56, 0x49, 0xa0, 0x0c, 0x56, 0x41,
+ 0x9f, 0x0c, 0x56, 0x39, 0x9e, 0x0c, 0x56, 0x31, 0x9d, 0x0c, 0x56, 0x28,
+ 0xa6, 0x0c, 0x56, 0x21, 0xa5, 0x0c, 0x56, 0x19, 0xa4, 0x0c, 0x56, 0x11,
+ 0xa3, 0x0c, 0x56, 0x09, 0xa2, 0x0c, 0x56, 0x01, 0xa1, 0x0c, 0x55, 0xf9,
+ 0xa0, 0x0c, 0x55, 0xf1, 0x9f, 0x0c, 0x55, 0xe9, 0x9e, 0x0c, 0x55, 0xe1,
+ 0x9d, 0x0c, 0x55, 0xd8, 0xa6, 0x0c, 0x55, 0xd1, 0xa5, 0x0c, 0x55, 0xc9,
+ 0xa4, 0x0c, 0x55, 0xc1, 0xa3, 0x0c, 0x55, 0xb9, 0xa2, 0x0c, 0x55, 0xb1,
+ 0xa1, 0x0c, 0x55, 0xa9, 0xa0, 0x0c, 0x55, 0xa1, 0x9f, 0x0c, 0x55, 0x99,
+ 0x9e, 0x0c, 0x55, 0x91, 0x9d, 0x0c, 0x55, 0x88, 0xa6, 0x0c, 0x55, 0x81,
+ 0xa5, 0x0c, 0x55, 0x79, 0xa4, 0x0c, 0x55, 0x71, 0xa3, 0x0c, 0x55, 0x69,
+ 0xa2, 0x0c, 0x55, 0x61, 0xa1, 0x0c, 0x55, 0x59, 0xa0, 0x0c, 0x55, 0x51,
+ 0x9f, 0x0c, 0x55, 0x49, 0x9e, 0x0c, 0x55, 0x41, 0x9d, 0x0c, 0x55, 0x38,
+ 0xa6, 0x0c, 0x55, 0x31, 0xa5, 0x0c, 0x55, 0x29, 0xa4, 0x0c, 0x55, 0x21,
+ 0xa3, 0x0c, 0x55, 0x19, 0xa2, 0x0c, 0x55, 0x11, 0xa1, 0x0c, 0x55, 0x09,
+ 0xa0, 0x0c, 0x55, 0x01, 0x9f, 0x0c, 0x54, 0xf9, 0x9e, 0x0c, 0x54, 0xf1,
+ 0x9d, 0x0c, 0x54, 0xe8, 0xa6, 0x0c, 0x54, 0xe1, 0xa5, 0x0c, 0x54, 0xd9,
+ 0xa4, 0x0c, 0x54, 0xd1, 0xa3, 0x0c, 0x54, 0xc9, 0xa2, 0x0c, 0x54, 0xc1,
+ 0xa1, 0x0c, 0x54, 0xb9, 0xa0, 0x0c, 0x54, 0xb1, 0x9f, 0x0c, 0x54, 0xa9,
+ 0x9e, 0x0c, 0x54, 0xa1, 0x9d, 0x0c, 0x54, 0x98, 0xa6, 0x0c, 0x54, 0x91,
+ 0xa5, 0x0c, 0x54, 0x89, 0xa4, 0x0c, 0x54, 0x81, 0xa3, 0x0c, 0x54, 0x79,
+ 0xa2, 0x0c, 0x54, 0x71, 0xa1, 0x0c, 0x54, 0x69, 0xa0, 0x0c, 0x54, 0x61,
+ 0x9f, 0x0c, 0x54, 0x59, 0x9e, 0x0c, 0x54, 0x51, 0x9d, 0x0c, 0x54, 0x48,
+ 0xa6, 0x0c, 0x54, 0x41, 0xa5, 0x0c, 0x54, 0x39, 0xa4, 0x0c, 0x54, 0x31,
+ 0xa3, 0x0c, 0x54, 0x29, 0xa2, 0x0c, 0x54, 0x21, 0xa1, 0x0c, 0x54, 0x19,
+ 0xa0, 0x0c, 0x54, 0x11, 0x9f, 0x0c, 0x54, 0x09, 0x9e, 0x0c, 0x54, 0x01,
+ 0x9d, 0x0c, 0x53, 0xf8, 0xa6, 0x0c, 0x53, 0xf1, 0xa5, 0x0c, 0x53, 0xe9,
+ 0xa4, 0x0c, 0x53, 0xe1, 0xa3, 0x0c, 0x53, 0xd9, 0xa2, 0x0c, 0x53, 0xd1,
+ 0xa1, 0x0c, 0x53, 0xc9, 0xa0, 0x0c, 0x53, 0xc1, 0x9f, 0x0c, 0x53, 0xb9,
+ 0x9e, 0x0c, 0x53, 0xb1, 0x9d, 0x0c, 0x53, 0xa8, 0xa6, 0x0c, 0x53, 0xa1,
+ 0xa5, 0x0c, 0x53, 0x99, 0xa4, 0x0c, 0x53, 0x91, 0xa3, 0x0c, 0x53, 0x89,
+ 0xa2, 0x0c, 0x53, 0x81, 0xa1, 0x0c, 0x53, 0x79, 0xa0, 0x0c, 0x53, 0x71,
+ 0x9f, 0x0c, 0x53, 0x69, 0x9e, 0x0c, 0x53, 0x61, 0x9d, 0x0c, 0x53, 0x58,
+ 0xa6, 0x0c, 0x53, 0x51, 0xa5, 0x0c, 0x53, 0x49, 0xa4, 0x0c, 0x53, 0x41,
+ 0xa3, 0x0c, 0x53, 0x39, 0xa2, 0x0c, 0x53, 0x31, 0xa1, 0x0c, 0x53, 0x29,
+ 0xa0, 0x0c, 0x53, 0x21, 0x9f, 0x0c, 0x53, 0x19, 0x9e, 0x0c, 0x53, 0x11,
+ 0x9d, 0x0c, 0x53, 0x08, 0xa6, 0x0c, 0x53, 0x01, 0xa5, 0x0c, 0x52, 0xf9,
+ 0xa4, 0x0c, 0x52, 0xf1, 0xa3, 0x0c, 0x52, 0xe9, 0xa2, 0x0c, 0x52, 0xe1,
+ 0xa1, 0x0c, 0x52, 0xd9, 0xa0, 0x0c, 0x52, 0xd1, 0x9f, 0x0c, 0x52, 0xc9,
+ 0x9e, 0x0c, 0x52, 0xc1, 0x9d, 0x0c, 0x52, 0xb8, 0xa6, 0x0c, 0x52, 0xb1,
+ 0xa5, 0x0c, 0x52, 0xa9, 0xa4, 0x0c, 0x52, 0xa1, 0xa3, 0x0c, 0x52, 0x99,
+ 0xa2, 0x0c, 0x52, 0x91, 0xa1, 0x0c, 0x52, 0x89, 0xa0, 0x0c, 0x52, 0x81,
+ 0x9f, 0x0c, 0x52, 0x79, 0x9e, 0x0c, 0x52, 0x71, 0x9d, 0x0c, 0x52, 0x68,
+ 0xa6, 0x0c, 0x52, 0x61, 0xa5, 0x0c, 0x52, 0x59, 0xa4, 0x0c, 0x52, 0x51,
+ 0xa3, 0x0c, 0x52, 0x49, 0xa2, 0x0c, 0x52, 0x41, 0xa1, 0x0c, 0x52, 0x39,
+ 0xa0, 0x0c, 0x52, 0x31, 0x9f, 0x0c, 0x52, 0x29, 0x9e, 0x0c, 0x52, 0x21,
+ 0x9d, 0x0c, 0x52, 0x18, 0xa6, 0x0c, 0x52, 0x11, 0xa5, 0x0c, 0x52, 0x09,
+ 0xa4, 0x0c, 0x52, 0x01, 0xa3, 0x0c, 0x51, 0xf9, 0xa2, 0x0c, 0x51, 0xf1,
+ 0xa1, 0x0c, 0x51, 0xe9, 0xa0, 0x0c, 0x51, 0xe1, 0x9f, 0x0c, 0x51, 0xd9,
+ 0x9e, 0x0c, 0x51, 0xd1, 0x9d, 0x0c, 0x51, 0xc8, 0xa6, 0x0c, 0x51, 0xc1,
+ 0xa5, 0x0c, 0x51, 0xb9, 0xa4, 0x0c, 0x51, 0xb1, 0xa3, 0x0c, 0x51, 0xa9,
+ 0xa2, 0x0c, 0x51, 0xa1, 0xa1, 0x0c, 0x51, 0x99, 0xa0, 0x0c, 0x51, 0x91,
+ 0x9f, 0x0c, 0x51, 0x89, 0x9e, 0x0c, 0x51, 0x81, 0x9d, 0x0c, 0x51, 0x78,
+ 0xa6, 0x0c, 0x51, 0x71, 0xa5, 0x0c, 0x51, 0x69, 0xa4, 0x0c, 0x51, 0x61,
+ 0xa3, 0x0c, 0x51, 0x59, 0xa2, 0x0c, 0x51, 0x51, 0xa1, 0x0c, 0x51, 0x49,
+ 0xa0, 0x0c, 0x51, 0x41, 0x9f, 0x0c, 0x51, 0x39, 0x9e, 0x0c, 0x51, 0x31,
+ 0x9d, 0x0c, 0x51, 0x28, 0xa6, 0x0c, 0x51, 0x21, 0xa5, 0x0c, 0x51, 0x19,
+ 0xa4, 0x0c, 0x51, 0x11, 0xa3, 0x0c, 0x51, 0x09, 0xa2, 0x0c, 0x51, 0x01,
+ 0xa1, 0x0c, 0x50, 0xf9, 0xa0, 0x0c, 0x50, 0xf1, 0x9f, 0x0c, 0x50, 0xe9,
+ 0x9e, 0x0c, 0x50, 0xe1, 0x9d, 0x0c, 0x50, 0xd8, 0xa6, 0x0c, 0x50, 0xd1,
+ 0xa5, 0x0c, 0x50, 0xc9, 0xa4, 0x0c, 0x50, 0xc1, 0xa3, 0x0c, 0x50, 0xb9,
+ 0xa2, 0x0c, 0x50, 0xb1, 0xa1, 0x0c, 0x50, 0xa9, 0xa0, 0x0c, 0x50, 0xa1,
+ 0x9f, 0x0c, 0x50, 0x99, 0x9e, 0x0c, 0x50, 0x91, 0x9d, 0x0c, 0x50, 0x88,
+ 0xa6, 0x0c, 0x50, 0x81, 0xa5, 0x0c, 0x50, 0x79, 0xa4, 0x0c, 0x50, 0x71,
+ 0xa3, 0x0c, 0x50, 0x69, 0xa2, 0x0c, 0x50, 0x61, 0xa1, 0x0c, 0x50, 0x59,
+ 0xa0, 0x0c, 0x50, 0x51, 0x9f, 0x0c, 0x50, 0x49, 0x9e, 0x0c, 0x50, 0x41,
+ 0x9d, 0x0c, 0x50, 0x38, 0xa6, 0x0c, 0x50, 0x31, 0xa5, 0x0c, 0x50, 0x29,
+ 0xa4, 0x0c, 0x50, 0x21, 0xa3, 0x0c, 0x50, 0x19, 0xa2, 0x0c, 0x50, 0x11,
+ 0xa1, 0x0c, 0x50, 0x09, 0xa0, 0x0c, 0x50, 0x01, 0x9f, 0x0c, 0x4f, 0xf9,
+ 0x9e, 0x0c, 0x4f, 0xf1, 0x9d, 0x0c, 0x4f, 0xe8, 0xa6, 0x0c, 0x4f, 0xe1,
+ 0xa5, 0x0c, 0x4f, 0xd9, 0xa4, 0x0c, 0x4f, 0xd1, 0xa3, 0x0c, 0x4f, 0xc9,
+ 0xa2, 0x0c, 0x4f, 0xc1, 0xa1, 0x0c, 0x4f, 0xb9, 0xa0, 0x0c, 0x4f, 0xb1,
+ 0x9f, 0x0c, 0x4f, 0xa9, 0x9e, 0x0c, 0x4f, 0xa1, 0x9d, 0x0c, 0x4f, 0x98,
+ 0xa6, 0x0c, 0x4f, 0x91, 0xa5, 0x0c, 0x4f, 0x89, 0xa4, 0x0c, 0x4f, 0x81,
+ 0xa3, 0x0c, 0x4f, 0x79, 0xa2, 0x0c, 0x4f, 0x71, 0xa1, 0x0c, 0x4f, 0x69,
+ 0xa0, 0x0c, 0x4f, 0x61, 0x9f, 0x0c, 0x4f, 0x59, 0x9e, 0x0c, 0x4f, 0x51,
+ 0x9d, 0x0c, 0x4f, 0x48, 0xa6, 0x0c, 0x4f, 0x41, 0xa5, 0x0c, 0x4f, 0x39,
+ 0xa4, 0x0c, 0x4f, 0x31, 0xa3, 0x0c, 0x4f, 0x29, 0xa2, 0x0c, 0x4f, 0x21,
+ 0xa1, 0x0c, 0x4f, 0x19, 0xa0, 0x0c, 0x4f, 0x11, 0x9f, 0x0c, 0x4f, 0x09,
+ 0x9e, 0x0c, 0x4f, 0x01, 0x9d, 0x0c, 0x4e, 0xf8, 0xa6, 0x0c, 0x4e, 0xf1,
+ 0xa5, 0x0c, 0x4e, 0xe9, 0xa4, 0x0c, 0x4e, 0xe1, 0xa3, 0x0c, 0x4e, 0xd9,
+ 0xa2, 0x0c, 0x4e, 0xd1, 0xa1, 0x0c, 0x4e, 0xc9, 0xa0, 0x0c, 0x4e, 0xc1,
+ 0x9f, 0x0c, 0x4e, 0xb9, 0x9e, 0x0c, 0x4e, 0xb1, 0x9d, 0x0c, 0x4e, 0xa8,
+ 0xa6, 0x0c, 0x4e, 0xa1, 0xa5, 0x0c, 0x4e, 0x99, 0xa4, 0x0c, 0x4e, 0x91,
+ 0xa3, 0x0c, 0x4e, 0x89, 0xa2, 0x0c, 0x4e, 0x81, 0xa1, 0x0c, 0x4e, 0x79,
+ 0xa0, 0x0c, 0x4e, 0x71, 0x9f, 0x0c, 0x4e, 0x69, 0x9e, 0x0c, 0x4e, 0x61,
+ 0x9d, 0x0c, 0x4e, 0x58, 0xa6, 0x0c, 0x4e, 0x51, 0xa5, 0x0c, 0x4e, 0x49,
+ 0xa4, 0x0c, 0x4e, 0x41, 0xa3, 0x0c, 0x4e, 0x39, 0xa2, 0x0c, 0x4e, 0x31,
+ 0xa1, 0x0c, 0x4e, 0x29, 0xa0, 0x0c, 0x4e, 0x21, 0x9f, 0x0c, 0x4e, 0x19,
+ 0x9e, 0x0c, 0x4e, 0x11, 0x9d, 0x0c, 0x4e, 0x08, 0xa6, 0x0c, 0x4e, 0x01,
+ 0xa5, 0x0c, 0x4d, 0xf9, 0xa4, 0x0c, 0x4d, 0xf1, 0xa3, 0x0c, 0x4d, 0xe9,
+ 0xa2, 0x0c, 0x4d, 0xe1, 0xa1, 0x0c, 0x4d, 0xd9, 0xa0, 0x0c, 0x4d, 0xd1,
+ 0x9f, 0x0c, 0x4d, 0xc9, 0x9e, 0x0c, 0x4d, 0xc1, 0x9d, 0x0c, 0x4d, 0xb8,
+ 0xa6, 0x0c, 0x4d, 0xb1, 0xa5, 0x0c, 0x4d, 0xa9, 0xa4, 0x0c, 0x4d, 0xa1,
+ 0xa3, 0x0c, 0x4d, 0x99, 0xa2, 0x0c, 0x4d, 0x91, 0xa1, 0x0c, 0x4d, 0x89,
+ 0xa0, 0x0c, 0x4d, 0x81, 0x9f, 0x0c, 0x4d, 0x79, 0x9e, 0x0c, 0x4d, 0x71,
+ 0x9d, 0x0c, 0x4d, 0x68, 0xa6, 0x0c, 0x4d, 0x61, 0xa5, 0x0c, 0x4d, 0x59,
+ 0xa4, 0x0c, 0x4d, 0x51, 0xa3, 0x0c, 0x4d, 0x49, 0xa2, 0x0c, 0x4d, 0x41,
+ 0xa1, 0x0c, 0x4d, 0x39, 0xa0, 0x0c, 0x4d, 0x31, 0x9f, 0x0c, 0x4d, 0x29,
+ 0x9e, 0x0c, 0x4d, 0x21, 0x9d, 0x0c, 0x4d, 0x18, 0xa6, 0x0c, 0x4d, 0x11,
+ 0xa5, 0x0c, 0x4d, 0x09, 0xa4, 0x0c, 0x4d, 0x01, 0xa3, 0x0c, 0x4c, 0xf9,
+ 0xa2, 0x0c, 0x4c, 0xf1, 0xa1, 0x0c, 0x4c, 0xe9, 0xa0, 0x0c, 0x4c, 0xe1,
+ 0x9f, 0x0c, 0x4c, 0xd9, 0x9e, 0x0c, 0x4c, 0xd1, 0x9d, 0x0c, 0x4c, 0xc8,
+ 0xa6, 0x0c, 0x4c, 0xc1, 0xa5, 0x0c, 0x4c, 0xb9, 0xa4, 0x0c, 0x4c, 0xb1,
+ 0xa3, 0x0c, 0x4c, 0xa9, 0xa2, 0x0c, 0x4c, 0xa1, 0xa1, 0x0c, 0x4c, 0x99,
+ 0xa0, 0x0c, 0x4c, 0x91, 0x9f, 0x0c, 0x4c, 0x89, 0x9e, 0x0c, 0x4c, 0x81,
+ 0x9d, 0x0c, 0x4c, 0x78, 0xa6, 0x0c, 0x4c, 0x71, 0xa5, 0x0c, 0x4c, 0x69,
+ 0xa4, 0x0c, 0x4c, 0x61, 0xa3, 0x0c, 0x4c, 0x59, 0xa2, 0x0c, 0x4c, 0x51,
+ 0xa1, 0x0c, 0x4c, 0x49, 0xa0, 0x0c, 0x4c, 0x41, 0x9f, 0x0c, 0x4c, 0x39,
+ 0x9e, 0x0c, 0x4c, 0x31, 0x9d, 0x0c, 0x4c, 0x28, 0xa6, 0x0c, 0x4c, 0x21,
+ 0xa5, 0x0c, 0x4c, 0x19, 0xa4, 0x0c, 0x4c, 0x11, 0xa3, 0x0c, 0x4c, 0x09,
+ 0xa2, 0x0c, 0x4c, 0x01, 0xa1, 0x0c, 0x4b, 0xf9, 0xa0, 0x0c, 0x4b, 0xf1,
+ 0x9f, 0x0c, 0x4b, 0xe9, 0x9e, 0x0c, 0x4b, 0xe1, 0x9d, 0x0c, 0x4b, 0xd8,
+ 0xa6, 0x0c, 0x4b, 0xd1, 0xa5, 0x0c, 0x4b, 0xc9, 0xa4, 0x0c, 0x4b, 0xc1,
+ 0xa3, 0x0c, 0x4b, 0xb9, 0xa2, 0x0c, 0x4b, 0xb1, 0xa1, 0x0c, 0x4b, 0xa9,
+ 0xa0, 0x0c, 0x4b, 0xa1, 0x9f, 0x0c, 0x4b, 0x99, 0x9e, 0x0c, 0x4b, 0x91,
+ 0x9d, 0x0c, 0x4b, 0x88, 0xa6, 0x0c, 0x4b, 0x81, 0xa5, 0x0c, 0x4b, 0x79,
+ 0xa4, 0x0c, 0x4b, 0x71, 0xa3, 0x0c, 0x4b, 0x69, 0xa2, 0x0c, 0x4b, 0x61,
+ 0xa1, 0x0c, 0x4b, 0x59, 0xa0, 0x0c, 0x4b, 0x51, 0x9f, 0x0c, 0x4b, 0x49,
+ 0x9e, 0x0c, 0x4b, 0x41, 0x9d, 0x0c, 0x4b, 0x38, 0xa6, 0x0c, 0x4b, 0x31,
+ 0xa5, 0x0c, 0x4b, 0x29, 0xa4, 0x0c, 0x4b, 0x21, 0xa3, 0x0c, 0x4b, 0x19,
+ 0xa2, 0x0c, 0x4b, 0x11, 0xa1, 0x0c, 0x4b, 0x09, 0xa0, 0x0c, 0x4b, 0x01,
+ 0x9f, 0x0c, 0x4a, 0xf9, 0x9e, 0x0c, 0x4a, 0xf1, 0x9d, 0x0c, 0x4a, 0xe8,
+ 0xa6, 0x0c, 0x4a, 0xe1, 0xa5, 0x0c, 0x4a, 0xd9, 0xa4, 0x0c, 0x4a, 0xd1,
+ 0xa3, 0x0c, 0x4a, 0xc9, 0xa2, 0x0c, 0x4a, 0xc1, 0xa1, 0x0c, 0x4a, 0xb9,
+ 0xa0, 0x0c, 0x4a, 0xb1, 0x9f, 0x0c, 0x4a, 0xa9, 0x9e, 0x0c, 0x4a, 0xa1,
+ 0x9d, 0x0c, 0x4a, 0x98, 0xa6, 0x0c, 0x4a, 0x91, 0xa5, 0x0c, 0x4a, 0x89,
+ 0xa4, 0x0c, 0x4a, 0x81, 0xa3, 0x0c, 0x4a, 0x79, 0xa2, 0x0c, 0x4a, 0x71,
+ 0xa1, 0x0c, 0x4a, 0x69, 0xa0, 0x0c, 0x4a, 0x61, 0x9f, 0x0c, 0x4a, 0x59,
+ 0x9e, 0x0c, 0x4a, 0x51, 0x9d, 0x0c, 0x4a, 0x48, 0xa6, 0x0c, 0x4a, 0x41,
+ 0xa5, 0x0c, 0x4a, 0x39, 0xa4, 0x0c, 0x4a, 0x31, 0xa3, 0x0c, 0x4a, 0x29,
+ 0xa2, 0x0c, 0x4a, 0x21, 0xa1, 0x0c, 0x4a, 0x19, 0xa0, 0x0c, 0x4a, 0x11,
+ 0x9f, 0x0c, 0x4a, 0x09, 0x9e, 0x0c, 0x4a, 0x01, 0x9d, 0x0c, 0x49, 0xf8,
+ 0xa6, 0x0c, 0x49, 0xf1, 0xa5, 0x0c, 0x49, 0xe9, 0xa4, 0x0c, 0x49, 0xe1,
+ 0xa3, 0x0c, 0x49, 0xd9, 0xa2, 0x0c, 0x49, 0xd1, 0xa1, 0x0c, 0x49, 0xc9,
+ 0xa0, 0x0c, 0x49, 0xc1, 0x9f, 0x0c, 0x49, 0xb9, 0x9e, 0x0c, 0x49, 0xb1,
+ 0x9d, 0x0c, 0x49, 0xa8, 0xa6, 0x0c, 0x49, 0xa1, 0xa5, 0x0c, 0x49, 0x99,
+ 0xa4, 0x0c, 0x49, 0x91, 0xa3, 0x0c, 0x49, 0x89, 0xa2, 0x0c, 0x49, 0x81,
+ 0xa1, 0x0c, 0x49, 0x79, 0xa0, 0x0c, 0x49, 0x71, 0x9f, 0x0c, 0x49, 0x69,
+ 0x9e, 0x0c, 0x49, 0x61, 0x9d, 0x0c, 0x49, 0x58, 0xa6, 0x0c, 0x49, 0x51,
+ 0xa5, 0x0c, 0x49, 0x49, 0xa4, 0x0c, 0x49, 0x41, 0xa3, 0x0c, 0x49, 0x39,
+ 0xa2, 0x0c, 0x49, 0x31, 0xa1, 0x0c, 0x49, 0x29, 0xa0, 0x0c, 0x49, 0x21,
+ 0x9f, 0x0c, 0x49, 0x19, 0x9e, 0x0c, 0x49, 0x11, 0x9d, 0x0c, 0x49, 0x08,
+ 0xa6, 0x0c, 0x49, 0x01, 0xa5, 0x0c, 0x48, 0xf9, 0xa4, 0x0c, 0x48, 0xf1,
+ 0xa3, 0x0c, 0x48, 0xe9, 0xa2, 0x0c, 0x48, 0xe1, 0xa1, 0x0c, 0x48, 0xd9,
+ 0xa0, 0x0c, 0x48, 0xd1, 0x9f, 0x0c, 0x48, 0xc9, 0x9e, 0x0c, 0x48, 0xc1,
+ 0x9d, 0x0c, 0x48, 0xb8, 0xa6, 0x0c, 0x48, 0xb1, 0xa5, 0x0c, 0x48, 0xa9,
+ 0xa4, 0x0c, 0x48, 0xa1, 0xa3, 0x0c, 0x48, 0x99, 0xa2, 0x0c, 0x48, 0x91,
+ 0xa1, 0x0c, 0x48, 0x89, 0xa0, 0x0c, 0x48, 0x81, 0x9f, 0x0c, 0x48, 0x79,
+ 0x9e, 0x0c, 0x48, 0x71, 0x9d, 0x0c, 0x48, 0x68, 0xa6, 0x0c, 0x48, 0x61,
+ 0xa5, 0x0c, 0x48, 0x59, 0xa4, 0x0c, 0x48, 0x51, 0xa3, 0x0c, 0x48, 0x49,
+ 0xa2, 0x0c, 0x48, 0x41, 0xa1, 0x0c, 0x48, 0x39, 0xa0, 0x0c, 0x48, 0x31,
+ 0x9f, 0x0c, 0x48, 0x29, 0x9e, 0x0c, 0x48, 0x21, 0x9d, 0x0c, 0x48, 0x18,
+ 0xa6, 0x0c, 0x48, 0x11, 0xa5, 0x0c, 0x48, 0x09, 0xa4, 0x0c, 0x48, 0x01,
+ 0xa3, 0x0c, 0x47, 0xf9, 0xa2, 0x0c, 0x47, 0xf1, 0xa1, 0x0c, 0x47, 0xe9,
+ 0xa0, 0x0c, 0x47, 0xe1, 0x9f, 0x0c, 0x47, 0xd9, 0x9e, 0x0c, 0x47, 0xd1,
+ 0x9d, 0x0c, 0x47, 0xc8, 0xa6, 0x0c, 0x47, 0xc1, 0xa5, 0x0c, 0x47, 0xb9,
+ 0xa4, 0x0c, 0x47, 0xb1, 0xa3, 0x0c, 0x47, 0xa9, 0xa2, 0x0c, 0x47, 0xa1,
+ 0xa1, 0x0c, 0x47, 0x99, 0xa0, 0x0c, 0x47, 0x91, 0x9f, 0x0c, 0x47, 0x89,
+ 0x9e, 0x0c, 0x47, 0x81, 0x9d, 0x0c, 0x47, 0x78, 0xa6, 0x0c, 0x47, 0x71,
+ 0xa5, 0x0c, 0x47, 0x69, 0xa4, 0x0c, 0x47, 0x61, 0xa3, 0x0c, 0x47, 0x59,
+ 0xa2, 0x0c, 0x47, 0x51, 0xa1, 0x0c, 0x47, 0x49, 0xa0, 0x0c, 0x47, 0x41,
+ 0x9f, 0x0c, 0x47, 0x39, 0x9e, 0x0c, 0x47, 0x31, 0x9d, 0x0c, 0x47, 0x28,
+ 0xa6, 0x0c, 0x47, 0x21, 0xa5, 0x0c, 0x47, 0x19, 0xa4, 0x0c, 0x47, 0x11,
+ 0xa3, 0x0c, 0x47, 0x09, 0xa2, 0x0c, 0x47, 0x01, 0xa1, 0x0c, 0x46, 0xf9,
+ 0xa0, 0x0c, 0x46, 0xf1, 0x9f, 0x0c, 0x46, 0xe9, 0x9e, 0x0c, 0x46, 0xe1,
+ 0x9d, 0x0c, 0x46, 0xd8, 0xa6, 0x0c, 0x46, 0xd1, 0xa5, 0x0c, 0x46, 0xc9,
+ 0xa4, 0x0c, 0x46, 0xc1, 0xa3, 0x0c, 0x46, 0xb9, 0xa2, 0x0c, 0x46, 0xb1,
+ 0xa1, 0x0c, 0x46, 0xa9, 0xa0, 0x0c, 0x46, 0xa1, 0x9f, 0x0c, 0x46, 0x99,
+ 0x9e, 0x0c, 0x46, 0x91, 0x9d, 0x0c, 0x46, 0x88, 0xa6, 0x0c, 0x46, 0x81,
+ 0xa5, 0x0c, 0x46, 0x79, 0xa4, 0x0c, 0x46, 0x71, 0xa3, 0x0c, 0x46, 0x69,
+ 0xa2, 0x0c, 0x46, 0x61, 0xa1, 0x0c, 0x46, 0x59, 0xa0, 0x0c, 0x46, 0x51,
+ 0x9f, 0x0c, 0x46, 0x49, 0x9e, 0x0c, 0x46, 0x41, 0x9d, 0x0c, 0x46, 0x38,
+ 0xa6, 0x0c, 0x46, 0x31, 0xa5, 0x0c, 0x46, 0x29, 0xa4, 0x0c, 0x46, 0x21,
+ 0xa3, 0x0c, 0x46, 0x19, 0xa2, 0x0c, 0x46, 0x11, 0xa1, 0x0c, 0x46, 0x09,
+ 0xa0, 0x0c, 0x46, 0x01, 0x9f, 0x0c, 0x45, 0xf9, 0x9e, 0x0c, 0x45, 0xf1,
+ 0x9d, 0x0c, 0x45, 0xe8, 0xa6, 0x0c, 0x45, 0xe1, 0xa5, 0x0c, 0x45, 0xd9,
+ 0xa4, 0x0c, 0x45, 0xd1, 0xa3, 0x0c, 0x45, 0xc9, 0xa2, 0x0c, 0x45, 0xc1,
+ 0xa1, 0x0c, 0x45, 0xb9, 0xa0, 0x0c, 0x45, 0xb1, 0x9f, 0x0c, 0x45, 0xa9,
+ 0x9e, 0x0c, 0x45, 0xa1, 0x9d, 0x0c, 0x45, 0x98, 0xa6, 0x0c, 0x45, 0x91,
+ 0xa5, 0x0c, 0x45, 0x89, 0xa4, 0x0c, 0x45, 0x81, 0xa3, 0x0c, 0x45, 0x79,
+ 0xa2, 0x0c, 0x45, 0x71, 0xa1, 0x0c, 0x45, 0x69, 0xa0, 0x0c, 0x45, 0x61,
+ 0x9f, 0x0c, 0x45, 0x59, 0x9e, 0x0c, 0x45, 0x51, 0x9d, 0x0c, 0x45, 0x48,
+ 0xa6, 0x0c, 0x45, 0x41, 0xa5, 0x0c, 0x45, 0x39, 0xa4, 0x0c, 0x45, 0x31,
+ 0xa3, 0x0c, 0x45, 0x29, 0xa2, 0x0c, 0x45, 0x21, 0xa1, 0x0c, 0x45, 0x19,
+ 0xa0, 0x0c, 0x45, 0x11, 0x9f, 0x0c, 0x45, 0x09, 0x9e, 0x0c, 0x45, 0x01,
+ 0x9d, 0x0c, 0x44, 0xf8, 0xa6, 0x0c, 0x44, 0xf1, 0xa5, 0x0c, 0x44, 0xe9,
+ 0xa4, 0x0c, 0x44, 0xe1, 0xa3, 0x0c, 0x44, 0xd9, 0xa2, 0x0c, 0x44, 0xd1,
+ 0xa1, 0x0c, 0x44, 0xc9, 0xa0, 0x0c, 0x44, 0xc1, 0x9f, 0x0c, 0x44, 0xb9,
+ 0x9e, 0x0c, 0x44, 0xb1, 0x9d, 0x0c, 0x44, 0xa8, 0xa6, 0x0c, 0x44, 0xa1,
+ 0xa5, 0x0c, 0x44, 0x99, 0xa4, 0x0c, 0x44, 0x91, 0xa3, 0x0c, 0x44, 0x89,
+ 0xa2, 0x0c, 0x44, 0x81, 0xa1, 0x0c, 0x44, 0x79, 0xa0, 0x0c, 0x44, 0x71,
+ 0x9f, 0x0c, 0x44, 0x69, 0x9e, 0x0c, 0x44, 0x61, 0x9d, 0x0c, 0x44, 0x58,
+ 0xa6, 0x0c, 0x44, 0x51, 0xa5, 0x0c, 0x44, 0x49, 0xa4, 0x0c, 0x44, 0x41,
+ 0xa3, 0x0c, 0x44, 0x39, 0xa2, 0x0c, 0x44, 0x31, 0xa1, 0x0c, 0x44, 0x29,
+ 0xa0, 0x0c, 0x44, 0x21, 0x9f, 0x0c, 0x44, 0x19, 0x9e, 0x0c, 0x44, 0x11,
+ 0x9d, 0x0c, 0x44, 0x08, 0xa6, 0x0c, 0x44, 0x01, 0xa5, 0x0c, 0x43, 0xf9,
+ 0xa4, 0x0c, 0x43, 0xf1, 0xa3, 0x0c, 0x43, 0xe9, 0xa2, 0x0c, 0x43, 0xe1,
+ 0xa1, 0x0c, 0x43, 0xd9, 0xa0, 0x0c, 0x43, 0xd1, 0x9f, 0x0c, 0x43, 0xc9,
+ 0x9e, 0x0c, 0x43, 0xc1, 0x9d, 0x0c, 0x43, 0xb8, 0xa6, 0x0c, 0x43, 0xb1,
+ 0xa5, 0x0c, 0x43, 0xa9, 0xa4, 0x0c, 0x43, 0xa1, 0xa3, 0x0c, 0x43, 0x99,
+ 0xa2, 0x0c, 0x43, 0x91, 0xa1, 0x0c, 0x43, 0x89, 0xa0, 0x0c, 0x43, 0x81,
+ 0x9f, 0x0c, 0x43, 0x79, 0x9e, 0x0c, 0x43, 0x71, 0x9d, 0x0c, 0x43, 0x68,
+ 0xa6, 0x0c, 0x43, 0x61, 0xa5, 0x0c, 0x43, 0x59, 0xa4, 0x0c, 0x43, 0x51,
+ 0xa3, 0x0c, 0x43, 0x49, 0xa2, 0x0c, 0x43, 0x41, 0xa1, 0x0c, 0x43, 0x39,
+ 0xa0, 0x0c, 0x43, 0x31, 0x9f, 0x0c, 0x43, 0x29, 0x9e, 0x0c, 0x43, 0x21,
+ 0x9d, 0x0c, 0x43, 0x18, 0xa6, 0x0c, 0x43, 0x11, 0xa5, 0x0c, 0x43, 0x09,
+ 0xa4, 0x0c, 0x43, 0x01, 0xa3, 0x0c, 0x42, 0xf9, 0xa2, 0x0c, 0x42, 0xf1,
+ 0xa1, 0x0c, 0x42, 0xe9, 0xa0, 0x0c, 0x42, 0xe1, 0x9f, 0x0c, 0x42, 0xd9,
+ 0x9e, 0x0c, 0x42, 0xd1, 0x9d, 0x0c, 0x42, 0xc8, 0xa6, 0x0c, 0x42, 0xc1,
+ 0xa5, 0x0c, 0x42, 0xb9, 0xa4, 0x0c, 0x42, 0xb1, 0xa3, 0x0c, 0x42, 0xa9,
+ 0xa2, 0x0c, 0x42, 0xa1, 0xa1, 0x0c, 0x42, 0x99, 0xa0, 0x0c, 0x42, 0x91,
+ 0x9f, 0x0c, 0x42, 0x89, 0x9e, 0x0c, 0x42, 0x81, 0x9d, 0x0c, 0x42, 0x78,
+ 0xa6, 0x0c, 0x42, 0x71, 0xa5, 0x0c, 0x42, 0x69, 0xa4, 0x0c, 0x42, 0x61,
+ 0xa3, 0x0c, 0x42, 0x59, 0xa2, 0x0c, 0x42, 0x51, 0xa1, 0x0c, 0x42, 0x49,
+ 0xa0, 0x0c, 0x42, 0x41, 0x9f, 0x0c, 0x42, 0x39, 0x9e, 0x0c, 0x42, 0x31,
+ 0x9d, 0x0c, 0x42, 0x28, 0xa6, 0x0c, 0x42, 0x21, 0xa5, 0x0c, 0x42, 0x19,
+ 0xa4, 0x0c, 0x42, 0x11, 0xa3, 0x0c, 0x42, 0x09, 0xa2, 0x0c, 0x42, 0x01,
+ 0xa1, 0x0c, 0x41, 0xf9, 0xa0, 0x0c, 0x41, 0xf1, 0x9f, 0x0c, 0x41, 0xe9,
+ 0x9e, 0x0c, 0x41, 0xe1, 0x9d, 0x0c, 0x41, 0xd8, 0xa6, 0x0c, 0x41, 0xd1,
+ 0xa5, 0x0c, 0x41, 0xc9, 0xa4, 0x0c, 0x41, 0xc1, 0xa3, 0x0c, 0x41, 0xb9,
+ 0xa2, 0x0c, 0x41, 0xb1, 0xa1, 0x0c, 0x41, 0xa9, 0xa0, 0x0c, 0x41, 0xa1,
+ 0x9f, 0x0c, 0x41, 0x99, 0x9e, 0x0c, 0x41, 0x91, 0x9d, 0x0c, 0x41, 0x88,
+ 0xa6, 0x0c, 0x41, 0x81, 0xa5, 0x0c, 0x41, 0x79, 0xa4, 0x0c, 0x41, 0x71,
+ 0xa3, 0x0c, 0x41, 0x69, 0xa2, 0x0c, 0x41, 0x61, 0xa1, 0x0c, 0x41, 0x59,
+ 0xa0, 0x0c, 0x41, 0x51, 0x9f, 0x0c, 0x41, 0x49, 0x9e, 0x0c, 0x41, 0x41,
+ 0x9d, 0x0c, 0x41, 0x38, 0xa6, 0x0c, 0x41, 0x31, 0xa5, 0x0c, 0x41, 0x29,
+ 0xa4, 0x0c, 0x41, 0x21, 0xa3, 0x0c, 0x41, 0x19, 0xa2, 0x0c, 0x41, 0x11,
+ 0xa1, 0x0c, 0x41, 0x09, 0xa0, 0x0c, 0x41, 0x01, 0x9f, 0x0c, 0x40, 0xf9,
+ 0x9e, 0x0c, 0x40, 0xf1, 0x9d, 0x0c, 0x40, 0xe8, 0xa6, 0x0c, 0x40, 0xe1,
+ 0xa5, 0x0c, 0x40, 0xd9, 0xa4, 0x0c, 0x40, 0xd1, 0xa3, 0x0c, 0x40, 0xc9,
+ 0xa2, 0x0c, 0x40, 0xc1, 0xa1, 0x0c, 0x40, 0xb9, 0xa0, 0x0c, 0x40, 0xb1,
+ 0x9f, 0x0c, 0x40, 0xa9, 0x9e, 0x0c, 0x40, 0xa1, 0x9d, 0x0c, 0x40, 0x98,
+ 0xa6, 0x0c, 0x40, 0x91, 0xa5, 0x0c, 0x40, 0x89, 0xa4, 0x0c, 0x40, 0x81,
+ 0xa3, 0x0c, 0x40, 0x79, 0xa2, 0x0c, 0x40, 0x71, 0xa1, 0x0c, 0x40, 0x69,
+ 0xa0, 0x0c, 0x40, 0x61, 0x9f, 0x0c, 0x40, 0x59, 0x9e, 0x0c, 0x40, 0x51,
+ 0x9d, 0x0c, 0x40, 0x48, 0xa6, 0x0c, 0x40, 0x41, 0xa5, 0x0c, 0x40, 0x39,
+ 0xa4, 0x0c, 0x40, 0x31, 0xa3, 0x0c, 0x40, 0x29, 0xa2, 0x0c, 0x40, 0x21,
+ 0xa1, 0x0c, 0x40, 0x19, 0xa0, 0x0c, 0x40, 0x11, 0x9f, 0x0c, 0x40, 0x09,
+ 0x9e, 0x0c, 0x40, 0x00, 0xc2, 0x00, 0x44, 0x0b, 0x55, 0xc1, 0x83, 0x0b,
+ 0x55, 0x78, 0x83, 0x0b, 0x55, 0xa1, 0x44, 0x2f, 0xae, 0x43, 0x24, 0x06,
+ 0x17, 0xc3, 0x24, 0x12, 0x9a, 0x0b, 0x54, 0x79, 0x93, 0x0b, 0x54, 0x71,
+ 0x85, 0x0b, 0x54, 0x69, 0x9c, 0x0b, 0x54, 0x60, 0x9a, 0x0b, 0x54, 0xb9,
+ 0x93, 0x0b, 0x54, 0xb1, 0x9c, 0x0b, 0x54, 0xa9, 0x85, 0x0b, 0x54, 0xa0,
+ 0x9a, 0x0b, 0x54, 0x59, 0x93, 0x0b, 0x54, 0x51, 0x85, 0x0b, 0x54, 0x49,
+ 0x9c, 0x0b, 0x54, 0x40, 0xc8, 0xbb, 0xab, 0x08, 0xff, 0x89, 0xc6, 0xd6,
+ 0x52, 0x08, 0xff, 0x00, 0xc5, 0x45, 0xcf, 0x00, 0x5c, 0x19, 0xc4, 0x21,
+ 0x28, 0x00, 0x5e, 0x68, 0xc3, 0x78, 0x2a, 0x08, 0xff, 0x11, 0xc4, 0xcf,
+ 0x20, 0x08, 0xfe, 0xd0, 0xc4, 0x6d, 0x44, 0x08, 0xff, 0x09, 0xc3, 0x01,
+ 0x01, 0x08, 0xfe, 0xf1, 0xc6, 0xd3, 0xac, 0x08, 0xfe, 0xd8, 0x83, 0x00,
+ 0x5d, 0x19, 0xc2, 0x01, 0x01, 0x00, 0x5d, 0x48, 0x83, 0x00, 0x5d, 0x99,
+ 0xc2, 0x00, 0x96, 0x00, 0x5d, 0xa0, 0xcb, 0x8c, 0x94, 0x08, 0xfe, 0x29,
+ 0xd9, 0x1e, 0x3e, 0x08, 0xfe, 0x00, 0xc3, 0xe6, 0x4f, 0x08, 0xfe, 0x51,
+ 0xc3, 0xec, 0xe4, 0x08, 0xfe, 0x48, 0xc3, 0x3b, 0x0b, 0x00, 0xd3, 0xc9,
+ 0xc3, 0x82, 0xe0, 0x00, 0xd3, 0xc1, 0xc3, 0x82, 0xec, 0x00, 0xd3, 0xb8,
+ 0xc2, 0x00, 0x96, 0x00, 0xd2, 0xb1, 0xc2, 0x00, 0x9a, 0x00, 0xd2, 0xa8,
+ 0xc2, 0x01, 0x0e, 0x00, 0xd1, 0xe9, 0x83, 0x00, 0xd1, 0xd8, 0xc2, 0x01,
+ 0x0e, 0x00, 0xd1, 0xa9, 0x83, 0x00, 0xd1, 0xa0, 0xc2, 0x01, 0x0e, 0x00,
+ 0xd1, 0x59, 0x83, 0x00, 0xd1, 0x48, 0xc2, 0x01, 0x0e, 0x00, 0xd1, 0x29,
+ 0xc2, 0x23, 0xe3, 0x00, 0xd1, 0x21, 0x83, 0x00, 0xd1, 0x18, 0xc2, 0x03,
+ 0x76, 0x05, 0x54, 0x29, 0x91, 0x05, 0x54, 0x18, 0xc2, 0x03, 0x76, 0x05,
+ 0x54, 0x21, 0x91, 0x05, 0x54, 0x10, 0x00, 0xc3, 0x24, 0x22, 0xc3, 0x9e,
+ 0x50, 0x00, 0x72, 0xd8, 0xc2, 0x01, 0x04, 0x00, 0x70, 0x99, 0x97, 0x00,
+ 0x70, 0xc8, 0x89, 0x00, 0x70, 0x50, 0x15, 0xc3, 0x24, 0x2e, 0xc4, 0xe4,
+ 0x0b, 0x00, 0x71, 0x48, 0x83, 0x00, 0x71, 0x83, 0x03, 0x24, 0x3e, 0x8b,
+ 0x00, 0x71, 0xa3, 0x03, 0x24, 0x50, 0x97, 0x00, 0x71, 0xc3, 0x03, 0x24,
+ 0x54, 0x87, 0x00, 0x72, 0x01, 0x91, 0x00, 0x72, 0x10, 0xc3, 0x01, 0x5e,
+ 0x00, 0x70, 0x69, 0xc2, 0x06, 0x6e, 0x00, 0x71, 0x10, 0xc5, 0xda, 0xb8,
+ 0x00, 0x70, 0x79, 0xc3, 0x06, 0xa5, 0x00, 0x70, 0xa8, 0x42, 0x01, 0x8a,
+ 0xc3, 0x24, 0x5f, 0xc9, 0xad, 0x39, 0x00, 0x72, 0x60, 0x42, 0x01, 0x8a,
+ 0xc3, 0x24, 0x71, 0xc5, 0xdb, 0xe4, 0x00, 0x71, 0xd0, 0x90, 0x00, 0x70,
+ 0xf8, 0x00, 0xc3, 0x24, 0x7d, 0xc5, 0xe1, 0xf2, 0x00, 0x72, 0x31, 0xc6,
+ 0xd4, 0xb4, 0x00, 0x72, 0x38, 0xc4, 0x02, 0xb5, 0x00, 0x71, 0x29, 0xc5,
+ 0xdf, 0xa4, 0x00, 0x71, 0x60, 0x91, 0x0f, 0x15, 0x48, 0x97, 0x0f, 0x15,
+ 0x20, 0x94, 0x00, 0x60, 0x5b, 0x03, 0x24, 0x93, 0x8e, 0x00, 0x60, 0x62,
+ 0x03, 0x24, 0x97, 0xcb, 0x94, 0xeb, 0x00, 0x62, 0xe8, 0x83, 0x00, 0x60,
+ 0xf9, 0xc2, 0x01, 0x0e, 0x00, 0x61, 0x00, 0x83, 0x00, 0x61, 0x09, 0xc2,
+ 0x01, 0x0e, 0x00, 0x61, 0x10, 0x83, 0x00, 0x61, 0x89, 0xc2, 0x00, 0x9a,
+ 0x00, 0x62, 0xd0, 0x83, 0x00, 0x61, 0x99, 0xc2, 0x00, 0x96, 0x00, 0x61,
+ 0xa0, 0x8e, 0x08, 0xa4, 0x50, 0x94, 0x08, 0xa4, 0x40, 0xcb, 0x96, 0x40,
+ 0x00, 0x7e, 0x51, 0xcb, 0x8f, 0xa2, 0x00, 0x7e, 0x59, 0xcb, 0x9a, 0x4a,
+ 0x00, 0x7e, 0x60, 0x09, 0xc3, 0x24, 0x9b, 0xc8, 0xc2, 0xeb, 0x00, 0x78,
+ 0xf8, 0x09, 0xc3, 0x24, 0xad, 0xc9, 0xb5, 0xfa, 0x00, 0x7e, 0x70, 0x83,
+ 0x00, 0x7c, 0xd1, 0xc2, 0x01, 0x0e, 0x00, 0x7c, 0xd8, 0x83, 0x00, 0x7d,
+ 0x49, 0xc2, 0x01, 0x0e, 0x00, 0x7d, 0x50, 0x83, 0x00, 0x7c, 0xe1, 0xc2,
+ 0x01, 0x0e, 0x00, 0x7c, 0xe8, 0x83, 0x00, 0x7d, 0x59, 0xc2, 0x01, 0x0e,
+ 0x00, 0x7d, 0x60, 0xcc, 0x89, 0xf4, 0x00, 0x78, 0x11, 0xcd, 0x77, 0x31,
+ 0x00, 0x78, 0x18, 0x8a, 0x01, 0x69, 0xa0, 0x8a, 0x01, 0x69, 0xd0, 0x8a,
+ 0x01, 0x69, 0xf8, 0x44, 0x1f, 0xc3, 0xc3, 0x24, 0xbf, 0xc2, 0x3c, 0xd1,
+ 0x00, 0x46, 0x98, 0xc2, 0x3c, 0xd1, 0x00, 0x47, 0x99, 0x44, 0x1f, 0xc3,
+ 0x43, 0x24, 0xdb, 0xc9, 0xae, 0xe9, 0x00, 0x47, 0x09, 0xc2, 0x00, 0x5d,
+ 0x00, 0x46, 0xa9, 0xc3, 0x03, 0x2c, 0x00, 0x36, 0xe0, 0xce, 0x72, 0x8c,
+ 0x00, 0x47, 0x01, 0xc8, 0xbb, 0xc3, 0x00, 0x46, 0x50, 0xcb, 0x63, 0xe2,
+ 0x00, 0x46, 0xc0, 0x8a, 0x00, 0x46, 0x69, 0xc2, 0x01, 0x5b, 0x00, 0x30,
+ 0xb8, 0xdb, 0x18, 0x58, 0x00, 0x46, 0x58, 0xc4, 0x42, 0x6e, 0x00, 0x37,
+ 0x21, 0x45, 0x2f, 0xc9, 0x43, 0x24, 0xf1, 0xc9, 0x05, 0xde, 0x00, 0x36,
+ 0xd9, 0xc2, 0x00, 0x45, 0x00, 0x30, 0xa8, 0xc7, 0xc4, 0x88, 0x00, 0x36,
+ 0xc9, 0x48, 0x1b, 0x0d, 0x43, 0x24, 0xfd, 0xc5, 0x00, 0x34, 0x00, 0x46,
+ 0x81, 0xcd, 0x05, 0x3a, 0x07, 0xf3, 0xf1, 0xcb, 0x6a, 0x72, 0x07, 0xf3,
+ 0xf8, 0x4b, 0x00, 0x29, 0xc3, 0x25, 0x0f, 0xc5, 0x00, 0x34, 0x07, 0xdd,
+ 0xa9, 0xc5, 0x03, 0x50, 0x07, 0xdd, 0xa0, 0x53, 0x22, 0x94, 0xc3, 0x25,
+ 0x1b, 0xc5, 0x00, 0x34, 0x07, 0xdd, 0xb9, 0xc5, 0x03, 0x50, 0x07, 0xdd,
+ 0xb0, 0xc5, 0x00, 0x34, 0x07, 0xdd, 0x99, 0xc5, 0x03, 0x50, 0x07, 0xdd,
+ 0x90, 0xd0, 0x5f, 0x8f, 0x00, 0x37, 0xf1, 0xc9, 0x32, 0x17, 0x00, 0x37,
+ 0xe8, 0xda, 0x1d, 0x42, 0x00, 0x30, 0x81, 0xc4, 0xe5, 0xdf, 0x00, 0x30,
+ 0x21, 0xc3, 0xae, 0xef, 0x00, 0x30, 0x19, 0xc3, 0x3b, 0xb3, 0x00, 0x30,
+ 0x08, 0x4d, 0x06, 0x7a, 0xc3, 0x25, 0x27, 0x45, 0x1a, 0x6a, 0xc3, 0x25,
+ 0x33, 0x44, 0x1a, 0x74, 0xc3, 0x25, 0x3d, 0x44, 0x2f, 0x22, 0x43, 0x25,
+ 0x47, 0x44, 0x2f, 0x22, 0xc3, 0x25, 0x53, 0x4d, 0x06, 0x7a, 0xc3, 0x25,
+ 0x5f, 0x45, 0x1a, 0x6a, 0xc3, 0x25, 0x6b, 0x45, 0x2f, 0xc8, 0x43, 0x25,
+ 0x75, 0xd1, 0x50, 0xf2, 0x07, 0xe2, 0xa1, 0xda, 0x1b, 0xf0, 0x07, 0xe2,
+ 0x99, 0x45, 0x1a, 0x6a, 0xc3, 0x25, 0x7f, 0x46, 0x2f, 0xc8, 0xc3, 0x25,
+ 0x89, 0xdd, 0x10, 0x68, 0x07, 0xe6, 0xc8, 0x49, 0xaa, 0xde, 0xc3, 0x25,
+ 0x95, 0x4a, 0xa6, 0x3c, 0x43, 0x25, 0xbd, 0x4d, 0x06, 0x7a, 0xc3, 0x25,
+ 0xd5, 0x45, 0x1a, 0x6a, 0xc3, 0x25, 0xe1, 0x45, 0x53, 0x23, 0xc3, 0x25,
+ 0xf1, 0x0a, 0xc3, 0x26, 0x01, 0x45, 0x2f, 0xc8, 0xc3, 0x26, 0x0d, 0x44,
+ 0x71, 0x66, 0xc3, 0x26, 0x1d, 0x44, 0x2f, 0x22, 0x43, 0x26, 0x29, 0x47,
+ 0x02, 0xea, 0xc3, 0x26, 0x35, 0x0e, 0x43, 0x26, 0x59, 0xcd, 0x05, 0x3a,
+ 0x07, 0xe7, 0xd1, 0xca, 0x2a, 0xb4, 0x07, 0xe8, 0xb0, 0x0b, 0xc3, 0x26,
+ 0x63, 0x45, 0x01, 0xac, 0x43, 0x26, 0x6f, 0xcc, 0x05, 0x3b, 0x07, 0xe1,
+ 0x59, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0xe0, 0xca, 0x2a, 0xb4, 0x07, 0xe8,
+ 0xa9, 0xcd, 0x05, 0x3a, 0x07, 0xe7, 0xc8, 0x4d, 0x06, 0x7a, 0xc3, 0x26,
+ 0x81, 0x45, 0x1a, 0x6a, 0xc3, 0x26, 0x8d, 0x45, 0x2f, 0xc8, 0xc3, 0x26,
+ 0x97, 0x44, 0x2f, 0x22, 0x43, 0x26, 0xa1, 0x43, 0x06, 0x7c, 0xc3, 0x26,
+ 0xad, 0x43, 0x14, 0x8a, 0xc3, 0x26, 0xb9, 0xd1, 0x54, 0x44, 0x07, 0xef,
+ 0x90, 0x47, 0x0d, 0xca, 0xc3, 0x26, 0xc9, 0xd2, 0x4d, 0x16, 0x07, 0xea,
+ 0x70, 0x48, 0xae, 0x24, 0xc3, 0x26, 0xe1, 0x46, 0x38, 0x7b, 0x43, 0x27,
+ 0x11, 0x44, 0x2f, 0x22, 0xc3, 0x27, 0x17, 0x4d, 0x06, 0x7a, 0xc3, 0x27,
+ 0x23, 0xcf, 0x62, 0xb2, 0x07, 0xe3, 0x99, 0x45, 0x1a, 0x6a, 0xc3, 0x27,
+ 0x2f, 0xcf, 0x65, 0x55, 0x07, 0xe3, 0x89, 0xce, 0x71, 0x66, 0x07, 0xe3,
+ 0x81, 0x45, 0x53, 0x23, 0xc3, 0x27, 0x45, 0x0a, 0xc3, 0x27, 0x4f, 0x45,
+ 0x2f, 0xc8, 0x43, 0x27, 0x5b, 0x43, 0x08, 0x86, 0xc3, 0x27, 0x65, 0x03,
+ 0x43, 0x27, 0x71, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x81, 0x0b, 0xc3, 0x27,
+ 0x7d, 0xca, 0x2a, 0xb4, 0x07, 0xe4, 0x99, 0x45, 0x01, 0xac, 0x43, 0x27,
+ 0x89, 0xcd, 0x05, 0x3a, 0x07, 0xe2, 0xd1, 0xca, 0x2a, 0xb4, 0x07, 0xe4,
+ 0xb0, 0xcd, 0x05, 0x3a, 0x07, 0xe2, 0xc9, 0xca, 0x2a, 0xb4, 0x07, 0xe4,
+ 0xa8, 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0xb9, 0xcb, 0x10, 0x7a, 0x07, 0xe6,
+ 0xe0, 0x0b, 0xc3, 0x27, 0x95, 0xd3, 0x40, 0x43, 0x07, 0xed, 0x78, 0x43,
+ 0x08, 0x86, 0xc3, 0x27, 0xa1, 0x43, 0x0a, 0x18, 0x43, 0x27, 0xad, 0xcd,
+ 0x05, 0x3a, 0x07, 0xe2, 0x81, 0xca, 0x2a, 0xb4, 0x07, 0xe4, 0x78, 0xcd,
+ 0x05, 0x3a, 0x07, 0xe2, 0x79, 0xca, 0x2a, 0xb4, 0x07, 0xe4, 0x70, 0x0b,
+ 0xc3, 0x27, 0xb7, 0xca, 0x2a, 0xb4, 0x07, 0xe4, 0x61, 0x45, 0x01, 0xac,
+ 0xc3, 0x27, 0xc3, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x70, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe2, 0x69, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0xa0, 0x0b, 0xc3, 0x27,
+ 0xcf, 0x45, 0x01, 0xac, 0x43, 0x27, 0xdb, 0x45, 0x1a, 0x6a, 0xc3, 0x27,
+ 0xf3, 0x44, 0x0e, 0x1c, 0xc3, 0x28, 0x09, 0x44, 0x2f, 0x22, 0xc3, 0x28,
+ 0x19, 0x45, 0x06, 0x7a, 0xc3, 0x28, 0x25, 0x46, 0x53, 0x23, 0xc3, 0x28,
+ 0x37, 0x45, 0x50, 0xae, 0xc3, 0x28, 0x43, 0x46, 0x2f, 0xc8, 0x43, 0x28,
+ 0x4f, 0x46, 0x57, 0xda, 0xc3, 0x28, 0x5b, 0xd1, 0x50, 0xae, 0x07, 0xe0,
+ 0xd1, 0x46, 0x2f, 0xc8, 0xc3, 0x28, 0x67, 0x4d, 0x06, 0x7a, 0xc3, 0x28,
+ 0x73, 0x44, 0x2f, 0x22, 0x43, 0x28, 0x7f, 0xca, 0x2a, 0xb4, 0x07, 0xe4,
+ 0x39, 0xcd, 0x05, 0x3a, 0x07, 0xe2, 0x20, 0x48, 0x06, 0x7f, 0xc3, 0x28,
+ 0x8b, 0x45, 0x01, 0xac, 0xc3, 0x28, 0x97, 0xcd, 0x05, 0x3a, 0x07, 0xf7,
+ 0xd9, 0xca, 0x2a, 0xb4, 0x07, 0xf7, 0xe0, 0xca, 0x2a, 0xb4, 0x07, 0xe4,
+ 0x29, 0x0b, 0xc3, 0x28, 0xa3, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x69, 0x45,
+ 0x01, 0xac, 0x43, 0x28, 0xaf, 0x0b, 0xc3, 0x28, 0xbb, 0x4a, 0x75, 0x68,
+ 0x43, 0x28, 0xc7, 0x43, 0x0a, 0x18, 0xc3, 0x28, 0xd3, 0xcf, 0x63, 0x57,
+ 0x07, 0xe6, 0x68, 0x0b, 0xc3, 0x28, 0xdd, 0x45, 0x01, 0xac, 0x43, 0x28,
+ 0xe9, 0x47, 0x0e, 0x15, 0xc3, 0x28, 0xfb, 0x4a, 0x9d, 0x0e, 0x43, 0x29,
+ 0x13, 0xca, 0x2a, 0xb4, 0x07, 0xe3, 0xe9, 0xcd, 0x05, 0x3a, 0x07, 0xe1,
+ 0x90, 0xca, 0x2a, 0xb4, 0x07, 0xe3, 0xe1, 0xcd, 0x05, 0x3a, 0x07, 0xe1,
+ 0x88, 0x0b, 0xc3, 0x29, 0x19, 0xd3, 0x40, 0x43, 0x07, 0xee, 0x08, 0x0b,
+ 0xc3, 0x29, 0x25, 0x4a, 0x75, 0x68, 0x43, 0x29, 0x31, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe1, 0x71, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0xf8, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe1, 0x69, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0xf0, 0x44, 0x2f, 0x22,
+ 0xc3, 0x29, 0x3d, 0x4d, 0x06, 0x7a, 0xc3, 0x29, 0x49, 0xcf, 0x62, 0xb2,
+ 0x07, 0xe3, 0x69, 0x45, 0x1a, 0x6a, 0xc3, 0x29, 0x55, 0xcf, 0x65, 0x55,
+ 0x07, 0xe3, 0x59, 0xce, 0x71, 0x66, 0x07, 0xe3, 0x51, 0x45, 0x53, 0x23,
+ 0xc3, 0x29, 0x65, 0x0a, 0xc3, 0x29, 0x6f, 0x46, 0x2f, 0xc8, 0x43, 0x29,
+ 0x7b, 0xe0, 0x08, 0x27, 0x07, 0xe2, 0xe0, 0xce, 0x6f, 0xd0, 0x07, 0xea,
+ 0x0b, 0x03, 0x29, 0x87, 0x46, 0xd6, 0x2e, 0xc3, 0x29, 0x91, 0xd2, 0x49,
+ 0xda, 0x07, 0xef, 0xb0, 0xd1, 0x50, 0xf2, 0x07, 0xe2, 0x51, 0x45, 0x06,
+ 0x7a, 0xc3, 0x29, 0x9d, 0x45, 0x1a, 0x6a, 0xc3, 0x29, 0xa9, 0x45, 0x53,
+ 0x23, 0xc3, 0x29, 0xb9, 0x44, 0x1a, 0x74, 0xc3, 0x29, 0xc3, 0x45, 0x2f,
+ 0xc8, 0x43, 0x29, 0xcd, 0xcc, 0x05, 0x3b, 0x07, 0xe1, 0x41, 0xcb, 0x10,
+ 0x7a, 0x07, 0xe5, 0xc8, 0xcc, 0x05, 0x3b, 0x07, 0xe1, 0x29, 0xcb, 0x10,
+ 0x7a, 0x07, 0xe5, 0xb8, 0x0b, 0xc3, 0x29, 0xd7, 0x4a, 0x75, 0x68, 0x43,
+ 0x29, 0xe3, 0x0b, 0xc3, 0x29, 0xef, 0x45, 0x01, 0xac, 0x43, 0x29, 0xfb,
+ 0xcc, 0x05, 0x3b, 0x07, 0xe1, 0x11, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0xa0,
+ 0xcd, 0x05, 0x3a, 0x07, 0xe8, 0x81, 0xca, 0x2a, 0xb4, 0x07, 0xe9, 0x60,
+ 0xca, 0x2a, 0xb4, 0x07, 0xe9, 0x19, 0xcd, 0x05, 0x3a, 0x07, 0xe8, 0x38,
+ 0xca, 0x2a, 0xb4, 0x07, 0xe9, 0x21, 0xcd, 0x05, 0x3a, 0x07, 0xe8, 0x40,
+ 0x0b, 0xc3, 0x2a, 0x07, 0xca, 0x2a, 0xb4, 0x07, 0xdf, 0xd0, 0xc8, 0xbb,
+ 0xc3, 0x00, 0x36, 0x63, 0x03, 0x2a, 0x13, 0xc2, 0x08, 0x86, 0x00, 0x32,
+ 0x0a, 0x03, 0x2a, 0x17, 0xc3, 0x1b, 0x4e, 0x00, 0x46, 0x41, 0xc4, 0x8b,
+ 0xde, 0x00, 0x31, 0xd3, 0x03, 0x2a, 0x1b, 0xc2, 0x0e, 0x14, 0x00, 0x35,
+ 0x7b, 0x03, 0x2a, 0x1f, 0xc3, 0xeb, 0x19, 0x00, 0x35, 0x9a, 0x03, 0x2a,
+ 0x23, 0xc2, 0x01, 0x02, 0x00, 0x32, 0x23, 0x03, 0x2a, 0x27, 0xc7, 0xc9,
+ 0x9e, 0x00, 0x45, 0x68, 0xc2, 0x00, 0xa8, 0x00, 0x31, 0x63, 0x03, 0x2a,
+ 0x2b, 0x8a, 0x00, 0x34, 0xc2, 0x03, 0x2a, 0x2f, 0x47, 0xc1, 0xfb, 0xc3,
+ 0x2a, 0x33, 0xc2, 0x01, 0x5b, 0x00, 0x31, 0xcb, 0x03, 0x2a, 0x48, 0xc3,
+ 0x00, 0xcd, 0x00, 0x31, 0x3b, 0x03, 0x2a, 0x4c, 0x87, 0x00, 0x36, 0xa8,
+ 0xc4, 0xe6, 0xc7, 0x00, 0x35, 0x4b, 0x03, 0x2a, 0x50, 0x03, 0xc3, 0x2a,
+ 0x54, 0x47, 0x06, 0x73, 0xc3, 0x2a, 0x61, 0xc3, 0x1a, 0x74, 0x00, 0x31,
+ 0x72, 0x03, 0x2a, 0x73, 0xc4, 0xe7, 0x2b, 0x00, 0x34, 0x33, 0x03, 0x2a,
+ 0x77, 0xc3, 0x2c, 0xf7, 0x00, 0x33, 0xcb, 0x03, 0x2a, 0x84, 0xc2, 0x08,
+ 0x86, 0x00, 0x31, 0x53, 0x03, 0x2a, 0x91, 0xc2, 0x00, 0x45, 0x00, 0x31,
+ 0xbb, 0x03, 0x2a, 0x9e, 0x0a, 0x43, 0x2a, 0xa2, 0x00, 0xc3, 0x2a, 0xba,
+ 0xc2, 0x08, 0x86, 0x00, 0x35, 0x32, 0x03, 0x2a, 0xd0, 0xc2, 0x08, 0x86,
+ 0x00, 0x32, 0x53, 0x03, 0x2a, 0xd4, 0x97, 0x00, 0x36, 0x42, 0x03, 0x2a,
+ 0xd8, 0xc2, 0x08, 0x86, 0x00, 0x31, 0x8b, 0x03, 0x2a, 0xdc, 0xcb, 0x95,
+ 0xfe, 0x00, 0x45, 0x61, 0xc4, 0x3e, 0xff, 0x00, 0x35, 0xdb, 0x03, 0x2a,
+ 0xe0, 0xc3, 0x71, 0x66, 0x00, 0x34, 0x8a, 0x03, 0x2a, 0xe4, 0x8a, 0x00,
+ 0x31, 0x43, 0x03, 0x2a, 0xe8, 0xc2, 0x08, 0x86, 0x00, 0x33, 0xda, 0x03,
+ 0x2a, 0xf5, 0x42, 0x00, 0x48, 0xc3, 0x2a, 0xf9, 0x00, 0x43, 0x2a, 0xff,
+ 0x00, 0x43, 0x2b, 0x14, 0x00, 0x43, 0x2b, 0x2a, 0xc2, 0x01, 0x5b, 0x00,
+ 0x31, 0x93, 0x03, 0x2b, 0x3a, 0x8a, 0x00, 0x31, 0xc2, 0x03, 0x2b, 0x3e,
+ 0xcb, 0x90, 0x52, 0x00, 0x45, 0x89, 0xc2, 0x00, 0x5d, 0x00, 0x31, 0xab,
+ 0x03, 0x2b, 0x42, 0xc4, 0xe6, 0x7b, 0x00, 0x31, 0xa3, 0x03, 0x2b, 0x46,
+ 0xc8, 0xbd, 0xa3, 0x00, 0x35, 0x51, 0xc3, 0x03, 0x2c, 0x00, 0x31, 0x9b,
+ 0x03, 0x2b, 0x4a, 0xcf, 0x08, 0x2a, 0x00, 0x33, 0x80, 0x03, 0xc3, 0x2b,
+ 0x4e, 0x42, 0x0c, 0x25, 0xc3, 0x2b, 0x65, 0xc2, 0x00, 0xaf, 0x00, 0x34,
+ 0x73, 0x03, 0x2b, 0x75, 0xc3, 0x2f, 0x22, 0x00, 0x34, 0x23, 0x03, 0x2b,
+ 0x79, 0x47, 0x39, 0x6b, 0x43, 0x2b, 0x7d, 0x00, 0xc3, 0x2b, 0x8f, 0x8a,
+ 0x00, 0x35, 0x22, 0x03, 0x2b, 0x9b, 0x00, 0x43, 0x2b, 0x9f, 0xc3, 0x14,
+ 0xf3, 0x00, 0x32, 0x2b, 0x03, 0x2b, 0xb1, 0xc3, 0x00, 0xfb, 0x00, 0x30,
+ 0xe0, 0x00, 0x43, 0x2b, 0xb5, 0x89, 0x00, 0x35, 0x6b, 0x03, 0x2b, 0xc1,
+ 0xc3, 0x02, 0x14, 0x00, 0x32, 0x33, 0x03, 0x2b, 0xce, 0xc3, 0x2f, 0x22,
+ 0x00, 0x34, 0x1a, 0x03, 0x2b, 0xd2, 0x03, 0xc3, 0x2b, 0xd6, 0xc2, 0x08,
+ 0x86, 0x00, 0x32, 0x3b, 0x03, 0x2b, 0xe6, 0xc9, 0xaf, 0x94, 0x00, 0x33,
+ 0xa2, 0x03, 0x2b, 0xea, 0x4c, 0x6d, 0x08, 0xc3, 0x2b, 0xee, 0x46, 0x39,
+ 0x6c, 0x43, 0x2c, 0x56, 0x8e, 0x0f, 0x70, 0x19, 0x86, 0x0f, 0x70, 0xc8,
+ 0x8a, 0x0f, 0x70, 0x41, 0x45, 0x13, 0xc9, 0x43, 0x2c, 0x6e, 0xc2, 0x08,
+ 0x86, 0x0f, 0x70, 0xb1, 0xc2, 0x00, 0x2f, 0x0f, 0x70, 0xc0, 0x03, 0xc3,
+ 0x2c, 0xac, 0xc3, 0x8c, 0x10, 0x0f, 0x74, 0x09, 0xc4, 0x2f, 0xc8, 0x0f,
+ 0x74, 0x11, 0x42, 0x0c, 0x25, 0xc3, 0x2c, 0xb8, 0x0a, 0xc3, 0x2c, 0xc0,
+ 0xc3, 0x7c, 0xad, 0x0f, 0x74, 0x29, 0x42, 0x05, 0x5c, 0xc3, 0x2c, 0xcc,
+ 0x16, 0xc3, 0x2c, 0xd6, 0xc3, 0x2f, 0x22, 0x0f, 0x74, 0x49, 0xc3, 0x0e,
+ 0x1c, 0x0f, 0x74, 0x59, 0xc4, 0x1a, 0x6a, 0x0f, 0x74, 0x61, 0xc4, 0x3e,
+ 0xff, 0x0f, 0x74, 0x69, 0x15, 0xc3, 0x2c, 0xe6, 0xc3, 0xae, 0x23, 0x0f,
+ 0x74, 0x81, 0xc3, 0x0e, 0x13, 0x0f, 0x74, 0x91, 0xc3, 0x71, 0x66, 0x0f,
+ 0x74, 0x99, 0xc4, 0x39, 0x7a, 0x0f, 0x74, 0xb9, 0xc5, 0x91, 0x7b, 0x0f,
+ 0x74, 0xd8, 0xc3, 0x8c, 0x10, 0x0f, 0x73, 0x09, 0xc4, 0x2f, 0xc8, 0x0f,
+ 0x73, 0x11, 0x0a, 0xc3, 0x2c, 0xf8, 0x16, 0xc3, 0x2d, 0x04, 0xc3, 0x2f,
+ 0x22, 0x0f, 0x73, 0x49, 0x0d, 0xc3, 0x2d, 0x16, 0xc4, 0x1a, 0x6a, 0x0f,
+ 0x73, 0x61, 0xc4, 0x3e, 0xff, 0x0f, 0x73, 0x69, 0x15, 0xc3, 0x2d, 0x22,
+ 0xc3, 0x01, 0xcc, 0x0f, 0x73, 0x79, 0xc3, 0xae, 0x23, 0x0f, 0x73, 0x81,
+ 0xc3, 0x0e, 0x13, 0x0f, 0x73, 0x91, 0x06, 0xc3, 0x2d, 0x34, 0xc3, 0x75,
+ 0x64, 0x0f, 0x73, 0xd1, 0xc5, 0x91, 0x7b, 0x0f, 0x73, 0xd8, 0xc2, 0x08,
+ 0x86, 0x0f, 0x71, 0x21, 0xc2, 0x00, 0x45, 0x0f, 0x71, 0x38, 0xc2, 0x0e,
+ 0x14, 0x0f, 0x71, 0x51, 0xc3, 0x1a, 0x74, 0x0f, 0x71, 0xb8, 0xc3, 0x03,
+ 0x2c, 0x0f, 0x71, 0x71, 0xc2, 0x00, 0x5d, 0x0f, 0x71, 0x89, 0xc4, 0xe6,
+ 0x7b, 0x0f, 0x71, 0xa0, 0xc2, 0x08, 0x86, 0x0f, 0x71, 0xa9, 0xc3, 0x3b,
+ 0x5c, 0x0f, 0x71, 0xb0, 0xc8, 0x35, 0x24, 0x00, 0x47, 0xf1, 0xcd, 0x05,
+ 0x3a, 0x07, 0xf3, 0xc1, 0xcb, 0x6a, 0x72, 0x07, 0xf3, 0xc8, 0xce, 0x05,
+ 0x39, 0x07, 0xf3, 0x90, 0xc9, 0x18, 0x19, 0x00, 0x47, 0xa9, 0xc4, 0x01,
+ 0xbd, 0x00, 0x47, 0xa1, 0xc8, 0x0a, 0x1f, 0x00, 0x32, 0xf0, 0xce, 0x0b,
+ 0x79, 0x00, 0x44, 0x29, 0x4b, 0x92, 0x62, 0xc3, 0x2d, 0x40, 0xce, 0x70,
+ 0x78, 0x07, 0xf3, 0x88, 0x47, 0x4e, 0x73, 0xc3, 0x2d, 0x4c, 0xc4, 0x1d,
+ 0x17, 0x0f, 0xb8, 0xf8, 0xc8, 0x88, 0x9c, 0x0f, 0xb9, 0x71, 0xc6, 0x4e,
+ 0x6c, 0x0f, 0xb9, 0x38, 0xcb, 0x01, 0x3c, 0x01, 0x1a, 0xb9, 0xc6, 0xd6,
+ 0x40, 0x01, 0x1a, 0x60, 0xc2, 0x00, 0x44, 0x01, 0x1a, 0x68, 0xc5, 0x3f,
+ 0x7d, 0x01, 0x19, 0xd1, 0xc4, 0x02, 0x52, 0x01, 0x19, 0xc8, 0xc7, 0x0d,
+ 0x7f, 0x08, 0x08, 0xd9, 0xc8, 0x4f, 0xa2, 0x08, 0x09, 0x20, 0xc7, 0x0d,
+ 0x7f, 0x08, 0x08, 0xd1, 0xc8, 0x4f, 0xa2, 0x08, 0x09, 0x18, 0xc7, 0x0d,
+ 0x7f, 0x08, 0x08, 0xe9, 0xc8, 0x4f, 0xa2, 0x08, 0x09, 0x30, 0xc7, 0x0d,
+ 0x7f, 0x08, 0x08, 0xe1, 0xc8, 0x4f, 0xa2, 0x08, 0x09, 0x28, 0xc7, 0x3f,
+ 0x7b, 0x0f, 0xdd, 0x71, 0x47, 0x01, 0x8c, 0xc3, 0x2d, 0x56, 0x46, 0x05,
+ 0x07, 0xc3, 0x2d, 0x62, 0xc5, 0x0c, 0xa3, 0x01, 0x2b, 0x98, 0xc2, 0x00,
+ 0x6a, 0x01, 0x2b, 0xbb, 0x03, 0x2d, 0x74, 0x4a, 0xa9, 0x70, 0x43, 0x2d,
+ 0x7a, 0x0a, 0xc3, 0x2d, 0x86, 0xc4, 0x00, 0xcd, 0x01, 0x28, 0xc1, 0xc5,
+ 0x00, 0x47, 0x01, 0x28, 0xa0, 0xc5, 0x00, 0x47, 0x01, 0x2b, 0x81, 0xc4,
+ 0x00, 0xcd, 0x01, 0x2b, 0x78, 0xc4, 0x00, 0xcd, 0x01, 0x2b, 0x71, 0xc5,
+ 0x00, 0x47, 0x01, 0x2b, 0x68, 0xca, 0x03, 0x76, 0x01, 0x29, 0xe1, 0xc4,
+ 0x00, 0xcd, 0x01, 0x29, 0x21, 0xc5, 0x00, 0x47, 0x01, 0x28, 0xe0, 0xc9,
+ 0x11, 0x98, 0x01, 0x2b, 0xf9, 0xc3, 0x00, 0xce, 0x01, 0x28, 0xd8, 0xca,
+ 0x03, 0x76, 0x01, 0x29, 0x99, 0xc4, 0x00, 0xcd, 0x01, 0x28, 0x99, 0xc5,
+ 0x00, 0x47, 0x01, 0x28, 0x78, 0xca, 0x03, 0x76, 0x01, 0x2b, 0x61, 0xc4,
+ 0x00, 0xcd, 0x01, 0x2b, 0x19, 0xc5, 0x00, 0x47, 0x01, 0x2b, 0x00, 0xc8,
+ 0x11, 0x8a, 0x01, 0x29, 0x49, 0xc5, 0x11, 0x8d, 0x01, 0x28, 0x88, 0xc8,
+ 0x11, 0x8a, 0x01, 0x29, 0x09, 0xc5, 0x11, 0x8d, 0x01, 0x28, 0x68, 0xc8,
+ 0x12, 0x13, 0x01, 0x29, 0x39, 0xc5, 0x04, 0xc6, 0x01, 0x28, 0x90, 0xc8,
+ 0x12, 0x13, 0x01, 0x28, 0xf9, 0xc5, 0x04, 0xc6, 0x01, 0x28, 0x70, 0xa3,
+ 0x0f, 0xd9, 0xa0, 0xa3, 0x0f, 0xd9, 0x61, 0xa2, 0x0f, 0xd8, 0xe8, 0xa3,
+ 0x0f, 0xd9, 0xc0, 0xa3, 0x0f, 0xd9, 0xd0, 0xa3, 0x0f, 0xd9, 0xd8, 0xd7,
+ 0x29, 0x20, 0x0f, 0xd2, 0x60, 0x44, 0x8f, 0xc9, 0xc3, 0x2d, 0x92, 0xc3,
+ 0x01, 0x5e, 0x01, 0x32, 0xd2, 0x03, 0x2d, 0xab, 0x49, 0x29, 0x20, 0x43,
+ 0x2d, 0xb1, 0x49, 0x29, 0x20, 0x43, 0x2d, 0xbd, 0x49, 0x29, 0x20, 0x43,
+ 0x2d, 0xc9, 0x49, 0x29, 0x20, 0x43, 0x2d, 0xd5, 0x0d, 0xc3, 0x2d, 0xe1,
+ 0xc5, 0xad, 0xae, 0x0f, 0xd1, 0x29, 0xc4, 0xd4, 0xf2, 0x0f, 0xd1, 0x31,
+ 0xc6, 0xba, 0xfd, 0x0f, 0xd1, 0x39, 0xc4, 0xe8, 0x9b, 0x0f, 0xd1, 0x48,
+ 0xcf, 0x13, 0x43, 0x01, 0x5d, 0x71, 0xcd, 0x1b, 0xc9, 0x01, 0x5d, 0x60,
+ 0xcf, 0x09, 0x58, 0x01, 0x5d, 0x41, 0xd0, 0x01, 0x37, 0x01, 0x5d, 0x48,
+ 0xcf, 0x09, 0x58, 0x01, 0x5d, 0x51, 0xd0, 0x01, 0x37, 0x01, 0x5d, 0x58,
+ 0xcd, 0x1b, 0xc9, 0x01, 0x5d, 0x69, 0xcf, 0x13, 0x43, 0x01, 0x5d, 0x78,
+ 0x45, 0x01, 0xac, 0xc3, 0x2d, 0xed, 0xca, 0x9f, 0x8e, 0x01, 0x1f, 0xd0,
+ 0x15, 0xc3, 0x2d, 0xff, 0xc7, 0x3f, 0x7b, 0x01, 0x59, 0x49, 0xc7, 0x0b,
+ 0xa0, 0x01, 0x59, 0x50, 0xc8, 0xc3, 0xc3, 0x01, 0x1f, 0xc9, 0xc6, 0x8d,
+ 0x90, 0x0f, 0xa9, 0x91, 0xc7, 0x60, 0xa4, 0x01, 0x5e, 0x00, 0xd8, 0x21,
+ 0x44, 0x0f, 0xbc, 0x19, 0xce, 0x70, 0x32, 0x01, 0x2d, 0xf1, 0xc8, 0x00,
+ 0x52, 0x01, 0x2d, 0xe1, 0xcf, 0x68, 0x52, 0x01, 0x1f, 0x60, 0xd8, 0x23,
+ 0xcc, 0x0f, 0xad, 0x19, 0xdb, 0x00, 0x8c, 0x01, 0x5c, 0xf8, 0xcd, 0x7b,
+ 0x4e, 0x01, 0x3a, 0xb1, 0xc4, 0x25, 0x0d, 0x01, 0x33, 0x31, 0xcf, 0x63,
+ 0xed, 0x01, 0x4f, 0x51, 0xc7, 0x60, 0xa4, 0x01, 0x5e, 0x09, 0xc8, 0xbf,
+ 0x83, 0x01, 0x5e, 0xf0, 0xc4, 0x57, 0xf1, 0x01, 0x36, 0x19, 0xc3, 0x14,
+ 0xe9, 0x01, 0x36, 0x10, 0xd8, 0x21, 0x44, 0x0f, 0xbc, 0x11, 0x12, 0xc3,
+ 0x2e, 0x0b, 0xce, 0x70, 0x32, 0x01, 0x2d, 0xc1, 0xc8, 0x00, 0x52, 0x01,
+ 0x2d, 0xb3, 0x03, 0x2e, 0x17, 0xcf, 0x68, 0x52, 0x01, 0x1f, 0x4a, 0x03,
+ 0x2e, 0x1d, 0x46, 0x00, 0x4c, 0xc3, 0x2e, 0x23, 0x4a, 0x01, 0x58, 0x43,
+ 0x2e, 0x38, 0xd8, 0x21, 0x44, 0x0f, 0xbc, 0x01, 0xc7, 0x40, 0x3c, 0x01,
+ 0x2e, 0x21, 0xce, 0x70, 0x32, 0x01, 0x2e, 0x11, 0xc8, 0x00, 0x52, 0x01,
+ 0x2e, 0x01, 0xcf, 0x68, 0x52, 0x01, 0x1f, 0x52, 0x03, 0x2e, 0x44, 0x00,
+ 0xc3, 0x2e, 0x4a, 0x46, 0x00, 0x4c, 0x43, 0x2e, 0x56, 0xcd, 0x2c, 0x11,
+ 0x01, 0x2f, 0x19, 0xce, 0x21, 0x4e, 0x01, 0x2f, 0x10, 0x45, 0x00, 0x3f,
+ 0xc3, 0x2e, 0x62, 0xc5, 0x01, 0xea, 0x01, 0x2f, 0xe0, 0xd5, 0x2d, 0x72,
+ 0x01, 0x1f, 0xbb, 0x03, 0x2e, 0x74, 0xc6, 0x3f, 0x7c, 0x01, 0x59, 0x28,
+ 0xc8, 0x60, 0xa3, 0x01, 0x5e, 0x28, 0xc8, 0x60, 0xa3, 0x01, 0x5e, 0x40,
+ 0xd5, 0x36, 0x43, 0x01, 0x1f, 0xa3, 0x03, 0x2e, 0x7a, 0xc6, 0x0b, 0xa1,
+ 0x01, 0x59, 0x38, 0xce, 0x21, 0x4e, 0x01, 0x2f, 0x29, 0xcd, 0x2c, 0x11,
+ 0x01, 0x2f, 0x20, 0xc5, 0x25, 0x0c, 0x01, 0x33, 0x28, 0x46, 0x01, 0xab,
+ 0x43, 0x2e, 0x80, 0xcd, 0x79, 0x7a, 0x00, 0xdb, 0x88, 0xcd, 0x79, 0x7a,
+ 0x00, 0xdb, 0x80, 0x00, 0x43, 0x2e, 0x9a, 0xc4, 0xc1, 0x57, 0x00, 0xd9,
+ 0x19, 0xcf, 0x65, 0x73, 0x00, 0xd8, 0xf1, 0xc5, 0xdc, 0xd4, 0x00, 0xd8,
+ 0xe8, 0xc9, 0x65, 0x79, 0x00, 0xd9, 0x01, 0xc9, 0xb7, 0x08, 0x00, 0xd8,
+ 0xf8, 0xc4, 0xc5, 0xbf, 0x00, 0xd9, 0xfb, 0x03, 0x2e, 0xa6, 0xc6, 0xc6,
+ 0x3b, 0x00, 0xda, 0x00, 0x97, 0x0b, 0x50, 0x29, 0x83, 0x0b, 0x50, 0x19,
+ 0xc2, 0x07, 0x44, 0x0b, 0x51, 0xb1, 0x91, 0x0b, 0x51, 0x79, 0x07, 0xc3,
+ 0x2e, 0xac, 0xc3, 0xe0, 0xa5, 0x0b, 0x50, 0xb0, 0xc4, 0xc5, 0x40, 0x0b,
+ 0x51, 0xb9, 0x0a, 0xc3, 0x2e, 0xb4, 0xc3, 0xdc, 0x59, 0x0b, 0x50, 0xa9,
+ 0x8b, 0x0b, 0x50, 0xa1, 0xc2, 0x6f, 0x95, 0x0b, 0x50, 0x90, 0xc2, 0x00,
+ 0xdd, 0x0b, 0x51, 0xa9, 0x03, 0x43, 0x2e, 0xc2, 0x04, 0xc3, 0x2e, 0xca,
+ 0x91, 0x0b, 0x51, 0x99, 0x83, 0x0b, 0x51, 0x91, 0xc4, 0xe6, 0x53, 0x0b,
+ 0x50, 0x68, 0x07, 0xc3, 0x2e, 0xd6, 0x97, 0x0b, 0x51, 0x19, 0x0b, 0x43,
+ 0x2e, 0xe4, 0xc2, 0x8b, 0x5c, 0x0b, 0x51, 0x71, 0x8b, 0x0b, 0x51, 0x69,
+ 0x83, 0x0b, 0x50, 0x50, 0x83, 0x0b, 0x51, 0x61, 0xc2, 0x0e, 0x78, 0x0b,
+ 0x51, 0x08, 0xc3, 0x8c, 0x60, 0x0b, 0x51, 0x51, 0x07, 0x43, 0x2e, 0xee,
+ 0x09, 0xc3, 0x2e, 0xf8, 0x8b, 0x0b, 0x51, 0x21, 0xc3, 0x12, 0xf2, 0x0b,
+ 0x51, 0x01, 0xc3, 0x01, 0x0d, 0x0b, 0x50, 0xf1, 0x0c, 0xc3, 0x2f, 0x04,
+ 0x97, 0x0b, 0x50, 0xcb, 0x03, 0x2f, 0x10, 0xc3, 0x4a, 0x42, 0x0b, 0x50,
+ 0x79, 0xc2, 0x18, 0x7a, 0x0b, 0x50, 0x48, 0x83, 0x0b, 0x50, 0xe9, 0xc2,
+ 0x8b, 0x5c, 0x0b, 0x50, 0xd8, 0x0a, 0xc3, 0x2f, 0x16, 0x42, 0x00, 0x34,
+ 0x43, 0x2f, 0x26, 0x17, 0xc3, 0x2f, 0x30, 0xc3, 0xdc, 0x59, 0x0b, 0x4c,
+ 0xf0, 0xc4, 0xe8, 0xf7, 0x0b, 0x4b, 0xa1, 0x8b, 0x0b, 0x4f, 0xf1, 0x91,
+ 0x0b, 0x4f, 0xc9, 0x07, 0xc3, 0x2f, 0x38, 0x17, 0x43, 0x2f, 0x40, 0x09,
+ 0xc3, 0x2f, 0x50, 0x06, 0xc3, 0x2f, 0x6f, 0x42, 0x01, 0x0d, 0xc3, 0x2f,
+ 0x7d, 0x83, 0x0b, 0x4f, 0xb3, 0x03, 0x2f, 0x87, 0x0c, 0xc3, 0x2f, 0x8b,
+ 0x16, 0xc3, 0x2f, 0x95, 0x1c, 0xc3, 0x2f, 0xa1, 0x43, 0x73, 0xf9, 0xc3,
+ 0x2f, 0xad, 0xc3, 0xc0, 0x68, 0x0b, 0x4d, 0x40, 0x03, 0xc3, 0x2f, 0xb9,
+ 0x11, 0xc3, 0x2f, 0xce, 0x07, 0xc3, 0x2f, 0xd9, 0x17, 0x43, 0x2f, 0xe4,
+ 0x97, 0x0b, 0x4d, 0x03, 0x03, 0x2f, 0xf1, 0x03, 0xc3, 0x2f, 0xfd, 0x8b,
+ 0x0b, 0x4f, 0xbb, 0x03, 0x30, 0x0a, 0x07, 0xc3, 0x30, 0x0e, 0x91, 0x0b,
+ 0x4c, 0xc2, 0x03, 0x30, 0x18, 0x03, 0xc3, 0x30, 0x1e, 0xc3, 0xdc, 0x59,
+ 0x0b, 0x4f, 0x79, 0xc5, 0xdc, 0xa2, 0x0b, 0x4c, 0x10, 0xc2, 0x00, 0xeb,
+ 0x0b, 0x4b, 0x69, 0x0a, 0xc3, 0x30, 0x26, 0xc4, 0xaa, 0xbc, 0x0b, 0x4c,
+ 0xd9, 0x07, 0xc3, 0x30, 0x39, 0xc2, 0x0e, 0x30, 0x0b, 0x4c, 0x28, 0x11,
+ 0xc3, 0x30, 0x41, 0x03, 0xc3, 0x30, 0x4d, 0x97, 0x0b, 0x4f, 0x69, 0xc5,
+ 0xdd, 0x92, 0x0b, 0x4d, 0x98, 0xc2, 0x00, 0xeb, 0x0b, 0x4b, 0x51, 0x07,
+ 0x43, 0x30, 0x5b, 0x42, 0x00, 0x34, 0xc3, 0x30, 0x65, 0xc2, 0x00, 0xe5,
+ 0x0b, 0x4f, 0xf9, 0x83, 0x0b, 0x4f, 0xdb, 0x03, 0x30, 0x6f, 0xc2, 0x01,
+ 0x04, 0x0b, 0x4f, 0xd1, 0x8b, 0x0b, 0x4f, 0x73, 0x03, 0x30, 0x7e, 0xc2,
+ 0x02, 0x52, 0x0b, 0x4e, 0x49, 0xc3, 0x8c, 0x60, 0x0b, 0x4e, 0x31, 0xc4,
+ 0xe6, 0x9f, 0x0b, 0x4d, 0x79, 0x42, 0x83, 0xdc, 0x43, 0x30, 0x84, 0x83,
+ 0x0b, 0x4d, 0xdb, 0x03, 0x30, 0x8e, 0x17, 0xc3, 0x30, 0x92, 0xc2, 0x01,
+ 0xe6, 0x0b, 0x4f, 0x59, 0xc2, 0x00, 0xeb, 0x0b, 0x4e, 0x98, 0x17, 0xc3,
+ 0x30, 0x9d, 0x43, 0x8b, 0x5c, 0xc3, 0x30, 0xb1, 0x42, 0x0c, 0xfe, 0xc3,
+ 0x30, 0xbd, 0x0b, 0xc3, 0x30, 0xce, 0xc2, 0x01, 0xf8, 0x0b, 0x4d, 0x60,
+ 0x09, 0xc3, 0x30, 0xd8, 0x15, 0xc3, 0x30, 0xe0, 0x16, 0xc3, 0x30, 0xf0,
+ 0x06, 0xc3, 0x30, 0xfa, 0x8b, 0x0b, 0x4a, 0xd9, 0x97, 0x0b, 0x4a, 0xb9,
+ 0x1b, 0xc3, 0x31, 0x0a, 0x0c, 0x43, 0x31, 0x20, 0x07, 0xc3, 0x31, 0x39,
+ 0xc2, 0x8b, 0x5c, 0x0b, 0x4a, 0xf9, 0xc2, 0x01, 0x0a, 0x0b, 0x48, 0xf1,
+ 0xc3, 0x90, 0xd5, 0x0b, 0x47, 0xb0, 0x03, 0xc3, 0x31, 0x47, 0x07, 0xc3,
+ 0x31, 0x53, 0x04, 0xc3, 0x31, 0x5d, 0xc3, 0x9d, 0xab, 0x0b, 0x4a, 0xf1,
+ 0x97, 0x0b, 0x4a, 0x99, 0x08, 0xc3, 0x31, 0x6c, 0x42, 0x83, 0xdc, 0xc3,
+ 0x31, 0x7f, 0xc3, 0x1d, 0xf1, 0x0b, 0x48, 0xc8, 0x07, 0xc3, 0x31, 0x91,
+ 0x97, 0x0b, 0x48, 0x8b, 0x03, 0x31, 0x9b, 0x8b, 0x0b, 0x4b, 0x09, 0xc2,
+ 0x8b, 0x5c, 0x0b, 0x4a, 0x61, 0xc2, 0x08, 0xc6, 0x0b, 0x4a, 0x58, 0x97,
+ 0x0b, 0x4a, 0x4b, 0x03, 0x31, 0xa1, 0xc3, 0xe0, 0xa5, 0x0b, 0x4a, 0xb1,
+ 0x07, 0xc3, 0x31, 0xaf, 0xc4, 0xe4, 0x37, 0x0b, 0x49, 0x08, 0x17, 0xc3,
+ 0x31, 0xb7, 0x03, 0xc3, 0x31, 0xc5, 0x0a, 0xc3, 0x31, 0xcd, 0xc2, 0x00,
+ 0xf2, 0x0b, 0x49, 0x21, 0xc5, 0x8c, 0x5f, 0x0b, 0x48, 0x60, 0xc8, 0xbc,
+ 0xbb, 0x0b, 0x48, 0xa1, 0xc2, 0x0e, 0x30, 0x0b, 0x4b, 0x28, 0xc6, 0xd7,
+ 0x60, 0x0b, 0x48, 0x29, 0x17, 0xc3, 0x31, 0xe1, 0xc2, 0x01, 0x04, 0x0b,
+ 0x48, 0x68, 0x43, 0x03, 0x2d, 0xc3, 0x31, 0xeb, 0xc2, 0x23, 0x68, 0x0b,
+ 0x4a, 0x71, 0xc3, 0x7a, 0x15, 0x0b, 0x49, 0x38, 0x17, 0xc3, 0x31, 0xf7,
+ 0x07, 0xc3, 0x32, 0x01, 0xc2, 0x01, 0xf8, 0x0b, 0x49, 0xa9, 0xc2, 0x03,
+ 0x1e, 0x0b, 0x49, 0x68, 0xc4, 0x8c, 0x5f, 0x0b, 0x4a, 0x41, 0xc2, 0x0e,
+ 0x30, 0x0b, 0x48, 0x90, 0xc4, 0xaa, 0xbc, 0x0b, 0x47, 0xd9, 0xc2, 0x01,
+ 0xf8, 0x0b, 0x47, 0x90, 0x07, 0xc3, 0x32, 0x0b, 0x17, 0xc3, 0x32, 0x19,
+ 0xc2, 0x0e, 0x30, 0x0b, 0x45, 0x49, 0xc5, 0x5e, 0x15, 0x0b, 0x45, 0x40,
+ 0x0a, 0xc3, 0x32, 0x23, 0x07, 0xc3, 0x32, 0x2f, 0xc4, 0xa7, 0x96, 0x0b,
+ 0x45, 0x78, 0x07, 0xc3, 0x32, 0x3b, 0x42, 0x01, 0x33, 0xc3, 0x32, 0x45,
+ 0xc6, 0xd0, 0x94, 0x0b, 0x45, 0x60, 0xc2, 0x01, 0x04, 0x0b, 0x47, 0x79,
+ 0x0b, 0x43, 0x32, 0x51, 0xc2, 0x14, 0xf7, 0x0b, 0x47, 0x69, 0x97, 0x0b,
+ 0x46, 0x69, 0x03, 0x43, 0x32, 0x5b, 0x03, 0xc3, 0x32, 0x63, 0x09, 0xc3,
+ 0x32, 0x6d, 0x0c, 0xc3, 0x32, 0x81, 0x06, 0xc3, 0x32, 0x8f, 0x15, 0xc3,
+ 0x32, 0xa5, 0x16, 0xc3, 0x32, 0xbf, 0x1c, 0xc3, 0x32, 0xcf, 0xd0, 0x5e,
+ 0x0f, 0x0b, 0x44, 0xc8, 0xc3, 0x8c, 0x60, 0x0b, 0x47, 0x39, 0xc3, 0x90,
+ 0xd5, 0x0b, 0x47, 0x31, 0x04, 0xc3, 0x32, 0xd9, 0x03, 0xc3, 0x32, 0xec,
+ 0xc6, 0xd5, 0xbc, 0x0b, 0x45, 0xc0, 0x17, 0xc3, 0x32, 0xf4, 0xc2, 0x0e,
+ 0x30, 0x0b, 0x46, 0xc9, 0xc3, 0x92, 0xe3, 0x0b, 0x45, 0x38, 0xc2, 0x02,
+ 0x6a, 0x0b, 0x46, 0x89, 0xc7, 0xc4, 0xc7, 0x0b, 0x44, 0x90, 0xc5, 0xe3,
+ 0x32, 0x0b, 0x46, 0x09, 0x9a, 0x0b, 0x45, 0x88, 0x42, 0x01, 0x0e, 0xc3,
+ 0x33, 0x04, 0xc4, 0xe5, 0x9b, 0x0b, 0x44, 0xc0, 0x09, 0xc3, 0x33, 0x0e,
+ 0x15, 0xc3, 0x33, 0x1e, 0x1b, 0xc3, 0x33, 0x2a, 0xc7, 0xc8, 0x40, 0x0b,
+ 0x43, 0x29, 0xcb, 0x91, 0xa7, 0x0b, 0x43, 0x20, 0x08, 0xc3, 0x33, 0x36,
+ 0x83, 0x0b, 0x44, 0x63, 0x03, 0x33, 0x42, 0x04, 0xc3, 0x33, 0x48, 0x42,
+ 0x0c, 0xfe, 0xc3, 0x33, 0x5e, 0xc7, 0xcd, 0x64, 0x0b, 0x43, 0xf8, 0xc2,
+ 0x01, 0x33, 0x0b, 0x43, 0x39, 0xc6, 0xd7, 0x78, 0x0b, 0x44, 0x09, 0xc4,
+ 0xe3, 0xb6, 0x0b, 0x43, 0x91, 0xc5, 0xdb, 0xc6, 0x0b, 0x43, 0x08, 0xc4,
+ 0xbb, 0x18, 0x0b, 0x43, 0x31, 0x90, 0x0b, 0x43, 0x78, 0x0b, 0xc3, 0x33,
+ 0x68, 0x42, 0x0c, 0xfe, 0xc3, 0x33, 0x72, 0xc2, 0x01, 0x02, 0x0b, 0x43,
+ 0x00, 0xc2, 0x00, 0xdd, 0x0b, 0x44, 0x49, 0x03, 0xc3, 0x33, 0x84, 0xc8,
+ 0xc0, 0x8b, 0x0b, 0x42, 0xd8, 0x87, 0x0b, 0x44, 0x29, 0xc2, 0xd4, 0x79,
+ 0x0b, 0x44, 0x18, 0xc2, 0x0e, 0x78, 0x0b, 0x43, 0xe9, 0xc6, 0xce, 0x3c,
+ 0x0b, 0x43, 0xb9, 0x42, 0x00, 0x5b, 0xc3, 0x33, 0x90, 0xc5, 0xdd, 0xa6,
+ 0x0b, 0x42, 0xd1, 0xc3, 0x90, 0xd5, 0x0b, 0x42, 0xc8, 0xc3, 0x66, 0xe9,
+ 0x0b, 0x43, 0xc1, 0x42, 0x07, 0x73, 0x43, 0x33, 0x9c, 0xcc, 0x87, 0xe4,
+ 0x0b, 0x43, 0x11, 0xc5, 0xdc, 0x2f, 0x0b, 0x42, 0xf0, 0x11, 0xc3, 0x33,
+ 0xa8, 0x0a, 0xc3, 0x33, 0xb6, 0xc3, 0x44, 0x22, 0x0b, 0x41, 0x19, 0xc2,
+ 0x6f, 0x95, 0x0b, 0x40, 0xa9, 0xc6, 0xd8, 0x74, 0x0b, 0x40, 0x88, 0x42,
+ 0x0c, 0xfe, 0xc3, 0x33, 0xc4, 0x17, 0xc3, 0x33, 0xd0, 0xc8, 0xbd, 0x6b,
+ 0x0b, 0x40, 0x30, 0xc3, 0xed, 0x8f, 0x0b, 0x41, 0xd9, 0x03, 0xc3, 0x33,
+ 0xdc, 0xc3, 0x91, 0xaf, 0x0b, 0x41, 0xa9, 0x07, 0x43, 0x33, 0xe6, 0x03,
+ 0xc3, 0x33, 0xf0, 0x42, 0x02, 0x1d, 0xc3, 0x34, 0x00, 0x11, 0xc3, 0x34,
+ 0x0a, 0xcb, 0x92, 0xdb, 0x0b, 0x41, 0x29, 0xc5, 0xd5, 0xbc, 0x0b, 0x41,
+ 0x21, 0xc9, 0xaa, 0xba, 0x0b, 0x40, 0x80, 0x03, 0xc3, 0x34, 0x16, 0xc2,
+ 0x01, 0x04, 0x0b, 0x42, 0xa1, 0x42, 0x01, 0x0d, 0xc3, 0x34, 0x20, 0x1b,
+ 0xc3, 0x34, 0x2a, 0xc3, 0xea, 0x50, 0x0b, 0x42, 0x39, 0x09, 0xc3, 0x34,
+ 0x37, 0x0d, 0xc3, 0x34, 0x49, 0x16, 0xc3, 0x34, 0x55, 0x42, 0x0c, 0x25,
+ 0xc3, 0x34, 0x64, 0xc3, 0x3e, 0xb7, 0x0b, 0x41, 0x61, 0x1c, 0x43, 0x34,
+ 0x70, 0x97, 0x0b, 0x42, 0x9b, 0x03, 0x34, 0x7c, 0xc5, 0xd9, 0xcd, 0x0b,
+ 0x41, 0xc1, 0xc6, 0xd7, 0x00, 0x0b, 0x40, 0xc1, 0xc4, 0xe6, 0x0f, 0x0b,
+ 0x40, 0xb8, 0x03, 0xc3, 0x34, 0x82, 0xc2, 0x02, 0x6a, 0x0b, 0x41, 0x69,
+ 0xc2, 0x00, 0xdd, 0x0b, 0x41, 0x51, 0x43, 0x00, 0xf7, 0x43, 0x34, 0x98,
+ 0xc6, 0xd0, 0xac, 0x0b, 0x42, 0x21, 0xc8, 0xbc, 0x9b, 0x0b, 0x41, 0x00,
+ 0x45, 0xd5, 0x21, 0xc3, 0x34, 0xa4, 0xc8, 0xb8, 0xd3, 0x0b, 0x40, 0x08,
+ 0xc2, 0x0e, 0xe5, 0x00, 0xde, 0xd1, 0xc2, 0x01, 0x01, 0x00, 0xde, 0x51,
+ 0xc2, 0x01, 0x0e, 0x00, 0xde, 0x20, 0xcf, 0x61, 0x2c, 0x00, 0x4f, 0x81,
+ 0xce, 0x71, 0x90, 0x00, 0x4f, 0x88, 0x94, 0x00, 0x4f, 0x00, 0x8e, 0x00,
+ 0x4f, 0x08, 0xa0, 0x01, 0x40, 0x3b, 0x03, 0x34, 0xb0, 0xa1, 0x01, 0x40,
+ 0x5b, 0x03, 0x34, 0xd0, 0xa2, 0x01, 0x40, 0x9b, 0x03, 0x34, 0xe9, 0xa3,
+ 0x01, 0x41, 0x1b, 0x03, 0x34, 0xfb, 0xa5, 0x01, 0x44, 0x19, 0xa4, 0x01,
+ 0x42, 0x1a, 0x03, 0x35, 0x06, 0xa1, 0x01, 0x40, 0x6b, 0x03, 0x35, 0x0a,
+ 0xa2, 0x01, 0x40, 0xab, 0x03, 0x35, 0x23, 0xa3, 0x01, 0x41, 0x2b, 0x03,
+ 0x35, 0x35, 0xa5, 0x01, 0x44, 0x29, 0xa4, 0x01, 0x42, 0x2a, 0x03, 0x35,
+ 0x40, 0xa2, 0x01, 0x40, 0xcb, 0x03, 0x35, 0x44, 0xa3, 0x01, 0x41, 0x4b,
+ 0x03, 0x35, 0x56, 0xa5, 0x01, 0x44, 0x49, 0xa4, 0x01, 0x42, 0x4a, 0x03,
+ 0x35, 0x61, 0xa3, 0x01, 0x41, 0x8b, 0x03, 0x35, 0x65, 0xa5, 0x01, 0x44,
+ 0x89, 0xa4, 0x01, 0x42, 0x8a, 0x03, 0x35, 0x70, 0xa5, 0x01, 0x45, 0x09,
+ 0xa4, 0x01, 0x43, 0x0a, 0x03, 0x35, 0x74, 0xa5, 0x01, 0x46, 0x08, 0xa1,
+ 0x01, 0x40, 0x73, 0x03, 0x35, 0x78, 0xa2, 0x01, 0x40, 0xb3, 0x03, 0x35,
+ 0x91, 0xa3, 0x01, 0x41, 0x33, 0x03, 0x35, 0xa3, 0xa5, 0x01, 0x44, 0x31,
+ 0xa4, 0x01, 0x42, 0x32, 0x03, 0x35, 0xae, 0xa2, 0x01, 0x40, 0xd3, 0x03,
+ 0x35, 0xb2, 0xa3, 0x01, 0x41, 0x53, 0x03, 0x35, 0xc4, 0xa5, 0x01, 0x44,
+ 0x51, 0xa4, 0x01, 0x42, 0x52, 0x03, 0x35, 0xcf, 0xa3, 0x01, 0x41, 0x93,
+ 0x03, 0x35, 0xd3, 0xa5, 0x01, 0x44, 0x91, 0xa4, 0x01, 0x42, 0x92, 0x03,
+ 0x35, 0xde, 0xa5, 0x01, 0x45, 0x11, 0xa4, 0x01, 0x43, 0x12, 0x03, 0x35,
+ 0xe2, 0xa5, 0x01, 0x46, 0x10, 0xa2, 0x01, 0x40, 0xe3, 0x03, 0x35, 0xe6,
+ 0xa3, 0x01, 0x41, 0x63, 0x03, 0x35, 0xf8, 0xa5, 0x01, 0x44, 0x61, 0xa4,
+ 0x01, 0x42, 0x62, 0x03, 0x36, 0x03, 0xa3, 0x01, 0x41, 0xa3, 0x03, 0x36,
+ 0x07, 0xa5, 0x01, 0x44, 0xa1, 0xa4, 0x01, 0x42, 0xa2, 0x03, 0x36, 0x12,
+ 0xa5, 0x01, 0x45, 0x21, 0xa4, 0x01, 0x43, 0x22, 0x03, 0x36, 0x16, 0xa5,
+ 0x01, 0x46, 0x20, 0xa3, 0x01, 0x41, 0xc3, 0x03, 0x36, 0x1a, 0xa5, 0x01,
+ 0x44, 0xc1, 0xa4, 0x01, 0x42, 0xc2, 0x03, 0x36, 0x25, 0xa5, 0x01, 0x45,
+ 0x41, 0xa4, 0x01, 0x43, 0x42, 0x03, 0x36, 0x29, 0xa5, 0x01, 0x46, 0x40,
+ 0xa5, 0x01, 0x45, 0x81, 0xa4, 0x01, 0x43, 0x82, 0x03, 0x36, 0x2d, 0xa5,
+ 0x01, 0x46, 0x80, 0xa5, 0x01, 0x47, 0x00, 0x83, 0x08, 0x83, 0xa9, 0xc2,
+ 0x00, 0x96, 0x08, 0x81, 0xa8, 0x91, 0x08, 0x83, 0x91, 0x87, 0x08, 0x83,
+ 0x88, 0x8e, 0x08, 0x80, 0x70, 0x94, 0x08, 0x80, 0x60, 0x91, 0x08, 0x83,
+ 0xa1, 0x87, 0x08, 0x83, 0x98, 0x8e, 0x08, 0x82, 0x08, 0x94, 0x08, 0x81,
+ 0xf8, 0xc4, 0x97, 0x8f, 0x0e, 0x87, 0xa9, 0xc3, 0x8a, 0xb3, 0x0e, 0x84,
+ 0x78, 0xc5, 0xb3, 0x40, 0x0e, 0x84, 0x89, 0xc8, 0xaa, 0xc3, 0x0e, 0x84,
+ 0x80, 0xc4, 0x97, 0x8f, 0x0e, 0x87, 0x91, 0xc4, 0xe5, 0xc7, 0x0e, 0x87,
+ 0x81, 0xc3, 0x8a, 0xb3, 0x0e, 0x82, 0x70, 0xc3, 0x73, 0xe8, 0x0e, 0x84,
+ 0x19, 0x03, 0x43, 0x36, 0x31, 0xd0, 0x34, 0x65, 0x0e, 0x85, 0x69, 0xcd,
+ 0x7f, 0xd3, 0x0e, 0x82, 0x90, 0x00, 0x43, 0x36, 0x3d, 0xc9, 0xb1, 0xdd,
+ 0x0e, 0x87, 0x29, 0xc7, 0xc8, 0xb0, 0x0e, 0x87, 0x20, 0xc9, 0xb1, 0xdd,
+ 0x0e, 0x87, 0x09, 0xc7, 0xc8, 0xb0, 0x0e, 0x87, 0x00, 0xc5, 0xb3, 0x40,
+ 0x0e, 0x84, 0xa9, 0x49, 0xaa, 0xc3, 0x43, 0x36, 0x49, 0xc5, 0xd9, 0xb9,
+ 0x0e, 0x86, 0xd9, 0xc4, 0x87, 0xbb, 0x0e, 0x86, 0xd0, 0xd5, 0x38, 0x65,
+ 0x0e, 0x86, 0x99, 0xc8, 0x2c, 0x8d, 0x0e, 0x86, 0x70, 0xc3, 0x8a, 0xb3,
+ 0x0e, 0x86, 0x11, 0xc4, 0x97, 0x8f, 0x0e, 0x86, 0x08, 0xc3, 0x18, 0x28,
+ 0x0e, 0x82, 0x19, 0xc7, 0xa1, 0xd5, 0x0e, 0x81, 0xb0, 0xc2, 0x6b, 0xf3,
+ 0x0e, 0x83, 0xb9, 0xc2, 0x05, 0x3b, 0x0e, 0x83, 0xb0, 0xc3, 0x73, 0xe8,
+ 0x0e, 0x82, 0xf1, 0xc8, 0xa1, 0xd4, 0x0e, 0x81, 0xf0, 0xc6, 0x01, 0xa1,
+ 0x0f, 0xd9, 0xe1, 0xc5, 0x00, 0x47, 0x0f, 0xd9, 0xe8, 0x55, 0x01, 0x8c,
+ 0xc3, 0x36, 0x55, 0x48, 0x01, 0x93, 0xc3, 0x36, 0x67, 0x4a, 0x12, 0xcc,
+ 0x43, 0x36, 0x73, 0xc6, 0x01, 0xa1, 0x0f, 0xda, 0x19, 0xc5, 0x00, 0x47,
+ 0x0f, 0xda, 0x21, 0xcc, 0x06, 0x2b, 0x0f, 0xda, 0x30, 0x46, 0x05, 0x07,
+ 0xc3, 0x36, 0x7f, 0xd2, 0x47, 0xf4, 0x0f, 0xda, 0x40, 0xd2, 0x47, 0xf4,
+ 0x0f, 0xda, 0x39, 0x46, 0x05, 0x07, 0x43, 0x36, 0x8b, 0xc7, 0x79, 0xb4,
+ 0x01, 0x53, 0x11, 0xc8, 0x50, 0x0d, 0x01, 0x53, 0x18, 0x46, 0x01, 0x17,
+ 0xc3, 0x36, 0x97, 0x46, 0x00, 0xc7, 0xc3, 0x36, 0xa1, 0x46, 0x05, 0x07,
+ 0x43, 0x36, 0xad, 0xc9, 0xb3, 0x06, 0x0f, 0xaf, 0x71, 0xca, 0x0f, 0x86,
+ 0x01, 0x80, 0x42, 0x03, 0x36, 0xb9, 0xcc, 0x11, 0x61, 0x01, 0x59, 0x81,
+ 0xcc, 0x8e, 0xbc, 0x01, 0x59, 0x90, 0xe0, 0x01, 0x07, 0x0f, 0xdc, 0xa0,
+ 0x46, 0x01, 0xab, 0x43, 0x36, 0xbf, 0x44, 0x04, 0x93, 0xc3, 0x36, 0xcf,
+ 0xc3, 0x00, 0xba, 0x01, 0x2c, 0x60, 0x00, 0x43, 0x36, 0xdb, 0x46, 0x01,
+ 0xab, 0x43, 0x36, 0xe7, 0xc9, 0xb2, 0xf4, 0x01, 0x0d, 0x69, 0xca, 0x09,
+ 0xfd, 0x01, 0x58, 0x20, 0xca, 0xa5, 0xf6, 0x01, 0x1d, 0x49, 0xcc, 0x85,
+ 0x74, 0x01, 0x1d, 0x41, 0xca, 0x9f, 0x70, 0x01, 0x1d, 0x38, 0xcd, 0x43,
+ 0xa0, 0x01, 0x2c, 0x69, 0xce, 0x08, 0x19, 0x01, 0x2c, 0x50, 0xd6, 0x2b,
+ 0xcf, 0x01, 0x4e, 0x79, 0xd6, 0x15, 0x69, 0x0f, 0xdb, 0x60, 0xcc, 0x87,
+ 0xf0, 0x01, 0x1d, 0x19, 0xc9, 0x50, 0xc7, 0x01, 0x1d, 0x11, 0xcc, 0x83,
+ 0x4c, 0x01, 0x1d, 0x09, 0x45, 0x01, 0xac, 0x43, 0x36, 0xf3, 0xcc, 0x00,
+ 0xd3, 0x01, 0x4c, 0x19, 0xcd, 0x6a, 0x7f, 0x01, 0x80, 0x70, 0xcc, 0x82,
+ 0x98, 0x01, 0x4a, 0x81, 0xca, 0xaa, 0x56, 0x01, 0x4a, 0x58, 0xcc, 0x82,
+ 0x98, 0x01, 0x4a, 0x51, 0xca, 0xaa, 0x56, 0x01, 0x4a, 0x70, 0xcb, 0x05,
+ 0x9b, 0x0f, 0xc4, 0x81, 0x49, 0x00, 0x59, 0x43, 0x37, 0x11, 0xc5, 0x00,
+ 0x62, 0x01, 0x0e, 0xd1, 0xca, 0x54, 0x07, 0x01, 0x48, 0x70, 0x46, 0x00,
+ 0x62, 0xc3, 0x37, 0x26, 0xd1, 0x54, 0x00, 0x01, 0x59, 0xb8, 0x45, 0x01,
+ 0xac, 0xc3, 0x37, 0x32, 0x16, 0xc3, 0x37, 0x68, 0xd5, 0x12, 0x39, 0x01,
+ 0x0e, 0x31, 0xc8, 0xab, 0xed, 0x01, 0x0d, 0x23, 0x03, 0x37, 0x74, 0x03,
+ 0x43, 0x37, 0x7a, 0x99, 0x01, 0x0e, 0x83, 0x03, 0x37, 0x86, 0xc6, 0xd7,
+ 0xf0, 0x01, 0x48, 0xd8, 0xd7, 0x2a, 0xec, 0x01, 0x0e, 0x59, 0x4a, 0x02,
+ 0x18, 0x43, 0x37, 0x8a, 0xc6, 0x01, 0x16, 0x01, 0x54, 0x18, 0xc3, 0xea,
+ 0xbf, 0x08, 0x3a, 0x71, 0xc3, 0x51, 0x58, 0x08, 0x3a, 0x69, 0xc3, 0xe9,
+ 0xc3, 0x08, 0x3a, 0x79, 0xc7, 0xc5, 0xed, 0x08, 0x3a, 0x81, 0xc5, 0xe2,
+ 0x83, 0x08, 0x3a, 0x89, 0xc4, 0xe9, 0x73, 0x08, 0x3a, 0x91, 0xc4, 0xe8,
+ 0x93, 0x08, 0x3a, 0x98, 0xc6, 0xd1, 0xd2, 0x08, 0x3a, 0x61, 0xc6, 0xd2,
+ 0x8c, 0x08, 0x3a, 0x59, 0xc6, 0xd2, 0x92, 0x08, 0x3a, 0x51, 0xc6, 0xd2,
+ 0xc8, 0x08, 0x3a, 0x49, 0xc3, 0xeb, 0x4f, 0x08, 0x3a, 0x41, 0xc3, 0xbc,
+ 0x73, 0x08, 0x3a, 0x39, 0xc3, 0xe0, 0x0d, 0x08, 0x3a, 0x31, 0xc3, 0xcf,
+ 0xec, 0x08, 0x3a, 0x29, 0xc3, 0xe9, 0x83, 0x08, 0x3a, 0x21, 0xc3, 0xec,
+ 0x03, 0x08, 0x3a, 0x19, 0xc3, 0xeb, 0xa9, 0x08, 0x3a, 0x11, 0xc3, 0xe9,
+ 0xbf, 0x08, 0x3a, 0x09, 0xc3, 0xcd, 0x56, 0x08, 0x3a, 0x00, 0x9e, 0x08,
+ 0x39, 0x99, 0x9f, 0x08, 0x39, 0xa1, 0xa0, 0x08, 0x39, 0xa9, 0xa1, 0x08,
+ 0x39, 0xb1, 0x9d, 0x08, 0x39, 0x90, 0x9d, 0x08, 0x38, 0x19, 0x9e, 0x08,
+ 0x38, 0x21, 0x9f, 0x08, 0x38, 0x29, 0xa0, 0x08, 0x38, 0x31, 0xa1, 0x08,
+ 0x38, 0x39, 0xa3, 0x08, 0x38, 0x41, 0xa5, 0x08, 0x38, 0x49, 0xa6, 0x08,
+ 0x38, 0x50, 0x9d, 0x08, 0x38, 0x59, 0x9e, 0x08, 0x38, 0x61, 0x9f, 0x08,
+ 0x38, 0x69, 0xa0, 0x08, 0x38, 0x71, 0xa1, 0x08, 0x38, 0x79, 0xa2, 0x08,
+ 0x38, 0x81, 0xa3, 0x08, 0x38, 0x89, 0xa4, 0x08, 0x38, 0x91, 0xa5, 0x08,
+ 0x38, 0x99, 0xa6, 0x08, 0x38, 0xa0, 0x9d, 0x08, 0x38, 0xa9, 0x9e, 0x08,
+ 0x38, 0xb1, 0x9f, 0x08, 0x38, 0xb9, 0xa0, 0x08, 0x38, 0xc1, 0xa1, 0x08,
+ 0x38, 0xc9, 0xa3, 0x08, 0x38, 0xd1, 0xa4, 0x08, 0x38, 0xd9, 0xa5, 0x08,
+ 0x38, 0xe1, 0xa6, 0x08, 0x38, 0xe8, 0xa1, 0x08, 0x38, 0xf1, 0xa4, 0x08,
+ 0x38, 0xf9, 0xa5, 0x08, 0x39, 0x00, 0x9d, 0x08, 0x39, 0x09, 0x9f, 0x08,
+ 0x39, 0x11, 0xa0, 0x08, 0x39, 0x19, 0xa1, 0x08, 0x39, 0x21, 0xa2, 0x08,
+ 0x39, 0x29, 0xa3, 0x08, 0x39, 0x31, 0xa5, 0x08, 0x39, 0x39, 0xa6, 0x08,
+ 0x39, 0x40, 0xa0, 0x08, 0x39, 0x59, 0xa1, 0x08, 0x39, 0x61, 0xa2, 0x08,
+ 0x39, 0x69, 0xa3, 0x08, 0x39, 0x71, 0xa4, 0x08, 0x39, 0x79, 0xa5, 0x08,
+ 0x39, 0x81, 0x9e, 0x08, 0x39, 0x49, 0x9f, 0x08, 0x39, 0x51, 0xa6, 0x08,
+ 0x39, 0x88, 0x1d, 0xc3, 0x37, 0x96, 0x1e, 0xc3, 0x37, 0xba, 0x1f, 0xc3,
+ 0x37, 0xce, 0x20, 0xc3, 0x37, 0xfb, 0x21, 0xc3, 0x38, 0x13, 0x22, 0xc3,
+ 0x38, 0x33, 0x23, 0xc3, 0x38, 0x57, 0x24, 0xc3, 0x38, 0x6f, 0x25, 0x43,
+ 0x38, 0x8b, 0xc2, 0x8c, 0x86, 0x08, 0x32, 0x41, 0x1f, 0xc3, 0x38, 0xa3,
+ 0x42, 0xe2, 0x3f, 0xc3, 0x38, 0xaf, 0xc2, 0xed, 0xad, 0x08, 0x32, 0x81,
+ 0xc2, 0xed, 0xd1, 0x08, 0x32, 0x89, 0x25, 0xc3, 0x38, 0xb7, 0xc2, 0xed,
+ 0xdd, 0x08, 0x32, 0xa0, 0x9e, 0x08, 0x32, 0xa9, 0x9f, 0x08, 0x32, 0xb1,
+ 0xa0, 0x08, 0x32, 0xb9, 0xa1, 0x08, 0x32, 0xc1, 0xa2, 0x08, 0x32, 0xc9,
+ 0xa3, 0x08, 0x32, 0xd1, 0xa4, 0x08, 0x32, 0xd9, 0xa5, 0x08, 0x32, 0xe1,
+ 0x26, 0x43, 0x38, 0xbf, 0x9d, 0x08, 0x33, 0x01, 0x9e, 0x08, 0x33, 0x09,
+ 0x9f, 0x08, 0x33, 0x11, 0x20, 0xc3, 0x38, 0xcb, 0xa1, 0x08, 0x33, 0x31,
+ 0xa2, 0x08, 0x33, 0x39, 0xa3, 0x08, 0x33, 0x41, 0xa4, 0x08, 0x33, 0x49,
+ 0xa5, 0x08, 0x33, 0x51, 0xa6, 0x08, 0x33, 0x58, 0x9d, 0x08, 0x33, 0x61,
+ 0x9e, 0x08, 0x33, 0x69, 0x9f, 0x08, 0x33, 0x71, 0xa0, 0x08, 0x33, 0x79,
+ 0xa1, 0x08, 0x33, 0x81, 0xa2, 0x08, 0x33, 0x89, 0xa3, 0x08, 0x33, 0x91,
+ 0xa4, 0x08, 0x33, 0x99, 0xa5, 0x08, 0x33, 0xa1, 0xa6, 0x08, 0x33, 0xa8,
+ 0x9d, 0x08, 0x33, 0xb1, 0x9e, 0x08, 0x33, 0xb9, 0x9f, 0x08, 0x33, 0xc1,
+ 0xa0, 0x08, 0x33, 0xc9, 0xa1, 0x08, 0x33, 0xd1, 0xa2, 0x08, 0x33, 0xd9,
+ 0xa3, 0x08, 0x33, 0xe1, 0xa4, 0x08, 0x33, 0xe9, 0xa5, 0x08, 0x33, 0xf1,
+ 0xa6, 0x08, 0x33, 0xf8, 0x9d, 0x08, 0x34, 0x01, 0x9e, 0x08, 0x34, 0x09,
+ 0x9f, 0x08, 0x34, 0x11, 0xa0, 0x08, 0x34, 0x19, 0xa1, 0x08, 0x34, 0x21,
+ 0xa2, 0x08, 0x34, 0x29, 0xa3, 0x08, 0x34, 0x31, 0xa4, 0x08, 0x34, 0x39,
+ 0xa5, 0x08, 0x34, 0x41, 0xa6, 0x08, 0x34, 0x48, 0x9d, 0x08, 0x34, 0x51,
+ 0x9e, 0x08, 0x34, 0x59, 0x9f, 0x08, 0x34, 0x61, 0xa0, 0x08, 0x34, 0x69,
+ 0xa3, 0x08, 0x34, 0x81, 0xa4, 0x08, 0x34, 0x89, 0xa5, 0x08, 0x34, 0x91,
+ 0xa6, 0x08, 0x34, 0x99, 0xa1, 0x08, 0x34, 0x71, 0xa2, 0x08, 0x34, 0x78,
+ 0x9d, 0x08, 0x34, 0xa1, 0x9e, 0x08, 0x34, 0xa9, 0x9f, 0x08, 0x34, 0xb1,
+ 0xa0, 0x08, 0x34, 0xb9, 0xa1, 0x08, 0x34, 0xc1, 0xa2, 0x08, 0x34, 0xc9,
+ 0xa3, 0x08, 0x34, 0xd1, 0xa4, 0x08, 0x34, 0xd9, 0xa5, 0x08, 0x34, 0xe1,
+ 0xa6, 0x08, 0x34, 0xe8, 0x9d, 0x08, 0x34, 0xf1, 0x9e, 0x08, 0x34, 0xf8,
+ 0xc5, 0xda, 0xc2, 0x08, 0x35, 0x01, 0xc5, 0xdb, 0x67, 0x08, 0x35, 0x09,
+ 0xc5, 0xe1, 0xfc, 0x08, 0x35, 0x11, 0xc5, 0xe2, 0x97, 0x08, 0x35, 0x19,
+ 0xc5, 0xe2, 0x24, 0x08, 0x35, 0x21, 0xc5, 0xe1, 0x2a, 0x08, 0x35, 0x29,
+ 0xc5, 0xe0, 0x8a, 0x08, 0x35, 0x31, 0xc5, 0xe0, 0x2b, 0x08, 0x35, 0x39,
+ 0xc5, 0xe1, 0x20, 0x08, 0x35, 0x41, 0xc5, 0xe0, 0xad, 0x08, 0x35, 0x48,
+ 0xc5, 0xda, 0xc2, 0x08, 0x35, 0x51, 0xc5, 0xdb, 0x67, 0x08, 0x35, 0x59,
+ 0xc5, 0xe1, 0xfc, 0x08, 0x35, 0x61, 0xc5, 0xe2, 0x97, 0x08, 0x35, 0x69,
+ 0xc5, 0xe2, 0x24, 0x08, 0x35, 0x71, 0xc5, 0xe1, 0x2a, 0x08, 0x35, 0x79,
+ 0xc5, 0xe0, 0x8a, 0x08, 0x35, 0x81, 0xc5, 0xe0, 0x2b, 0x08, 0x35, 0x89,
+ 0xc5, 0xe1, 0x20, 0x08, 0x35, 0x90, 0x9e, 0x08, 0x35, 0x99, 0x9f, 0x08,
+ 0x35, 0xa1, 0xa0, 0x08, 0x35, 0xa9, 0xa1, 0x08, 0x35, 0xb1, 0xa2, 0x08,
+ 0x35, 0xb9, 0xa3, 0x08, 0x35, 0xc1, 0xa5, 0x08, 0x35, 0xc9, 0xa6, 0x08,
+ 0x35, 0xd0, 0x9d, 0x08, 0x35, 0xd9, 0x9e, 0x08, 0x35, 0xe1, 0x9f, 0x08,
+ 0x35, 0xe9, 0xa0, 0x08, 0x35, 0xf1, 0xa2, 0x08, 0x35, 0xf9, 0xa3, 0x08,
+ 0x36, 0x00, 0x9d, 0x08, 0x36, 0x09, 0x9e, 0x08, 0x36, 0x11, 0xa0, 0x08,
+ 0x36, 0x19, 0xa1, 0x08, 0x36, 0x21, 0xa2, 0x08, 0x36, 0x29, 0xa3, 0x08,
+ 0x36, 0x31, 0xa4, 0x08, 0x36, 0x39, 0xa5, 0x08, 0x36, 0x41, 0xa6, 0x08,
+ 0x36, 0x48, 0x9d, 0x08, 0x36, 0x51, 0x9e, 0x08, 0x36, 0x59, 0x9f, 0x08,
+ 0x36, 0x61, 0xa1, 0x08, 0x36, 0x69, 0xa2, 0x08, 0x36, 0x71, 0xa3, 0x08,
+ 0x36, 0x79, 0xa4, 0x08, 0x36, 0x81, 0xa5, 0x08, 0x36, 0x89, 0xa6, 0x08,
+ 0x36, 0x90, 0x9d, 0x08, 0x36, 0x99, 0x9e, 0x08, 0x36, 0xa1, 0x9f, 0x08,
+ 0x36, 0xa9, 0xa2, 0x08, 0x36, 0xb1, 0xa4, 0x08, 0x36, 0xb9, 0xa5, 0x08,
+ 0x36, 0xc1, 0xa6, 0x08, 0x36, 0xc8, 0x9d, 0x08, 0x36, 0xd1, 0x9e, 0x08,
+ 0x36, 0xd9, 0x9f, 0x08, 0x36, 0xe1, 0xa0, 0x08, 0x36, 0xe9, 0xa1, 0x08,
+ 0x36, 0xf1, 0xa2, 0x08, 0x36, 0xf9, 0xa3, 0x08, 0x37, 0x01, 0xa4, 0x08,
+ 0x37, 0x09, 0xa6, 0x08, 0x37, 0x10, 0xa0, 0x08, 0x37, 0x19, 0xa1, 0x08,
+ 0x37, 0x21, 0xa2, 0x08, 0x37, 0x29, 0xa3, 0x08, 0x37, 0x31, 0xa5, 0x08,
+ 0x37, 0x39, 0xa6, 0x08, 0x37, 0x40, 0x9d, 0x08, 0x37, 0x49, 0x9e, 0x08,
+ 0x37, 0x51, 0x9f, 0x08, 0x37, 0x59, 0xa0, 0x08, 0x37, 0x61, 0xa1, 0x08,
+ 0x37, 0x69, 0xa2, 0x08, 0x37, 0x71, 0xa3, 0x08, 0x37, 0x79, 0xa4, 0x08,
+ 0x37, 0x81, 0xa5, 0x08, 0x37, 0x89, 0xa6, 0x08, 0x37, 0x90, 0x9d, 0x08,
+ 0x37, 0x99, 0x9e, 0x08, 0x37, 0xa1, 0x9f, 0x08, 0x37, 0xa9, 0xa0, 0x08,
+ 0x37, 0xb1, 0xa1, 0x08, 0x37, 0xb9, 0xa2, 0x08, 0x37, 0xc1, 0xa3, 0x08,
+ 0x37, 0xc9, 0xa4, 0x08, 0x37, 0xd1, 0xa5, 0x08, 0x37, 0xd9, 0xa6, 0x08,
+ 0x37, 0xe0, 0x9e, 0x08, 0x37, 0xe9, 0x9f, 0x08, 0x37, 0xf1, 0xa1, 0x08,
+ 0x37, 0xf9, 0xa2, 0x08, 0x38, 0x01, 0xa3, 0x08, 0x38, 0x09, 0xa5, 0x08,
+ 0x38, 0x10, 0x1d, 0xc3, 0x38, 0xd7, 0x1e, 0xc3, 0x39, 0x0d, 0x22, 0xc3,
+ 0x39, 0x3d, 0x21, 0xc3, 0x39, 0x73, 0x23, 0xc3, 0x39, 0xa3, 0x25, 0xc3,
+ 0x39, 0xd3, 0x24, 0xc3, 0x39, 0xeb, 0x1f, 0xc3, 0x3a, 0x21, 0x20, 0xc3,
+ 0x3a, 0x57, 0x26, 0x43, 0x3a, 0x87, 0x1e, 0xc3, 0x3a, 0x93, 0xc2, 0xed,
+ 0xb9, 0x08, 0x02, 0x91, 0xc2, 0x00, 0x20, 0x08, 0x02, 0x99, 0x21, 0xc3,
+ 0x3a, 0x9b, 0xc2, 0x00, 0x22, 0x08, 0x02, 0xb1, 0x23, 0xc3, 0x3a, 0xa3,
+ 0xc2, 0x3a, 0x96, 0x08, 0x02, 0xc9, 0x25, 0x43, 0x3a, 0xab, 0x1e, 0xc3,
+ 0x3a, 0xbb, 0x1f, 0x43, 0x3a, 0xdf, 0xc3, 0xea, 0xc5, 0x08, 0x06, 0xf1,
+ 0x1f, 0xc3, 0x3a, 0xef, 0xc3, 0xeb, 0xbe, 0x08, 0x07, 0xd0, 0x1f, 0xc3,
+ 0x3b, 0x01, 0x20, 0xc3, 0x3b, 0x0d, 0xc8, 0xba, 0xdb, 0x08, 0x05, 0x20,
+ 0x46, 0x01, 0xab, 0xc3, 0x3b, 0x19, 0xcb, 0x96, 0xfb, 0x00, 0x15, 0x3b,
+ 0x03, 0x3b, 0x48, 0x17, 0xc3, 0x3b, 0x4e, 0x0a, 0xc3, 0x3b, 0x58, 0x11,
+ 0xc3, 0x3b, 0x67, 0xc9, 0xab, 0x2f, 0x00, 0x15, 0x33, 0x03, 0x3b, 0x73,
+ 0xd3, 0x43, 0xe6, 0x00, 0x15, 0x41, 0x9c, 0x05, 0x39, 0x49, 0xc7, 0xc5,
+ 0xa7, 0x05, 0x39, 0x59, 0xcb, 0x9a, 0x6b, 0x01, 0x63, 0xb8, 0x46, 0x01,
+ 0xab, 0xc3, 0x3b, 0x79, 0x05, 0xc3, 0x3b, 0xa8, 0x0b, 0xc3, 0x3b, 0xb7,
+ 0x03, 0xc3, 0x3b, 0xc3, 0xc8, 0xc0, 0x03, 0x05, 0x5a, 0x29, 0xd1, 0x53,
+ 0x89, 0x00, 0x14, 0x29, 0xc6, 0xa8, 0x6d, 0x00, 0x06, 0xf8, 0x00, 0xc3,
+ 0x3b, 0xcf, 0x48, 0x11, 0xae, 0xc3, 0x3b, 0xdb, 0xc8, 0xb8, 0x73, 0x00,
+ 0x13, 0x21, 0xc2, 0x01, 0x0a, 0x05, 0x3b, 0xaa, 0x03, 0x3c, 0x08, 0x00,
+ 0xc3, 0x3c, 0x0e, 0xc2, 0x01, 0x0a, 0x05, 0x3b, 0xa1, 0x8c, 0x00, 0x0e,
+ 0x60, 0x46, 0x01, 0xab, 0xc3, 0x3c, 0x1a, 0x07, 0xc3, 0x3c, 0x61, 0xc5,
+ 0xaf, 0x0c, 0x00, 0x0b, 0xfb, 0x03, 0x3c, 0x70, 0xc9, 0xab, 0x2f, 0x00,
+ 0x15, 0x51, 0xc9, 0xb5, 0x7c, 0x00, 0x15, 0x59, 0xc2, 0x01, 0x0a, 0x05,
+ 0x3b, 0x91, 0xd1, 0x51, 0x7a, 0x00, 0x0c, 0xd9, 0x8c, 0x00, 0x0e, 0x48,
+ 0x46, 0x01, 0xab, 0xc3, 0x3c, 0x76, 0x07, 0xc3, 0x3c, 0xa3, 0x9c, 0x00,
+ 0x0f, 0x9b, 0x03, 0x3c, 0xb2, 0x11, 0xc3, 0x3c, 0xb6, 0xc2, 0x01, 0x0a,
+ 0x05, 0x3b, 0x89, 0xc9, 0xaf, 0x31, 0x00, 0x11, 0xc0, 0x46, 0x01, 0xab,
+ 0xc3, 0x3c, 0xc2, 0xc2, 0x00, 0x0a, 0x05, 0x5a, 0x9b, 0x03, 0x3c, 0xf0,
+ 0x46, 0x18, 0xf0, 0xc3, 0x3c, 0xf6, 0xc8, 0xbf, 0x33, 0x05, 0x39, 0x6b,
+ 0x03, 0x3d, 0x06, 0xc2, 0x00, 0xe5, 0x05, 0x3b, 0x78, 0x46, 0x01, 0xab,
+ 0xc3, 0x3d, 0x0c, 0x44, 0x03, 0xf6, 0xc3, 0x3d, 0x62, 0x91, 0x05, 0x3a,
+ 0x79, 0xc4, 0x73, 0xe1, 0x05, 0x3d, 0xb9, 0xcb, 0x94, 0x25, 0x05, 0x3e,
+ 0x09, 0x8b, 0x00, 0x0d, 0x19, 0x97, 0x00, 0x11, 0x18, 0x46, 0x01, 0xab,
+ 0xc3, 0x3d, 0x70, 0x42, 0x00, 0xf2, 0xc3, 0x3d, 0xba, 0x10, 0xc3, 0x3d,
+ 0xc7, 0x95, 0x05, 0x3b, 0x68, 0xc2, 0x23, 0x6a, 0x00, 0x14, 0x93, 0x03,
+ 0x3d, 0xd3, 0xc2, 0x00, 0x56, 0x00, 0x0a, 0x5b, 0x03, 0x3d, 0xd7, 0xc2,
+ 0x01, 0x0d, 0x00, 0x14, 0x1b, 0x03, 0x3d, 0xdd, 0x46, 0x01, 0xab, 0xc3,
+ 0x3d, 0xe3, 0x4e, 0x70, 0xa2, 0xc3, 0x3e, 0x39, 0x96, 0x05, 0x3b, 0x5a,
+ 0x03, 0x3e, 0x45, 0xcb, 0x90, 0x3c, 0x00, 0x15, 0x4b, 0x03, 0x3e, 0x49,
+ 0x46, 0x01, 0xab, 0x43, 0x3e, 0x4f, 0x46, 0x01, 0xab, 0xc3, 0x3e, 0x6d,
+ 0xc3, 0xce, 0x96, 0x00, 0x10, 0xe8, 0x44, 0x01, 0x8d, 0xc3, 0x3e, 0xa8,
+ 0x46, 0x01, 0xab, 0xc3, 0x3e, 0xb4, 0xc2, 0x01, 0x0a, 0x05, 0x3b, 0x98,
+ 0x00, 0xc3, 0x3e, 0xd8, 0xc6, 0x12, 0x65, 0x00, 0x14, 0x53, 0x03, 0x3e,
+ 0xe7, 0x87, 0x00, 0xeb, 0x59, 0x91, 0x05, 0x5b, 0x19, 0x8b, 0x05, 0x5a,
+ 0x81, 0x8f, 0x05, 0x3b, 0xc0, 0x00, 0xc3, 0x3e, 0xed, 0xc4, 0xdd, 0x2f,
+ 0x00, 0x12, 0x8b, 0x03, 0x3e, 0xf9, 0x87, 0x00, 0x07, 0x33, 0x03, 0x3e,
+ 0xff, 0x83, 0x05, 0x39, 0x99, 0x91, 0x05, 0x39, 0xa9, 0x97, 0x05, 0x39,
+ 0xb9, 0x98, 0x05, 0x39, 0xcb, 0x03, 0x3f, 0x05, 0x9b, 0x05, 0x39, 0xe9,
+ 0xca, 0xa7, 0xf4, 0x05, 0x3e, 0x18, 0x46, 0x01, 0xab, 0x43, 0x3f, 0x0b,
+ 0x46, 0x01, 0xab, 0xc3, 0x3f, 0x2d, 0xc3, 0x0b, 0xa3, 0x05, 0x39, 0x3b,
+ 0x03, 0x3f, 0x53, 0x98, 0x00, 0x0c, 0xa9, 0xc5, 0xd6, 0x47, 0x01, 0x63,
+ 0xb0, 0x46, 0x01, 0xab, 0x43, 0x3f, 0x59, 0x46, 0x01, 0xab, 0x43, 0x3f,
+ 0x89, 0x46, 0x01, 0xab, 0xc3, 0x3f, 0x99, 0x9b, 0x05, 0x3b, 0x09, 0xcb,
+ 0x93, 0xee, 0x05, 0x3b, 0x19, 0xc3, 0x03, 0x02, 0x05, 0x3b, 0x49, 0x47,
+ 0xc6, 0x80, 0x43, 0x3f, 0xbb, 0x46, 0x01, 0xab, 0xc3, 0x3f, 0xcd, 0xc2,
+ 0x00, 0x0a, 0x00, 0x13, 0xc0, 0x46, 0x01, 0xab, 0xc3, 0x3f, 0xf5, 0xc2,
+ 0x00, 0x9a, 0x00, 0x09, 0xc0, 0x46, 0x01, 0xab, 0xc3, 0x40, 0x24, 0x47,
+ 0x26, 0x0d, 0xc3, 0x40, 0x58, 0xc4, 0x33, 0x51, 0x00, 0x13, 0x19, 0xc2,
+ 0x01, 0x0e, 0x00, 0x0d, 0x18, 0x46, 0x01, 0xab, 0xc3, 0x40, 0x6a, 0xcc,
+ 0x8b, 0x14, 0x00, 0xe8, 0xb9, 0x03, 0xc3, 0x40, 0x9a, 0x4b, 0x94, 0x9e,
+ 0xc3, 0x40, 0xa6, 0xc7, 0xca, 0x85, 0x05, 0x3a, 0x39, 0xc3, 0x0b, 0x47,
+ 0x05, 0x3d, 0xa8, 0x46, 0x01, 0xab, 0x43, 0x40, 0xb1, 0x46, 0x01, 0xab,
+ 0xc3, 0x40, 0xbb, 0xc9, 0xb6, 0xc9, 0x00, 0x11, 0xc8, 0x88, 0x07, 0xd8,
+ 0x03, 0x03, 0x40, 0xd0, 0x8e, 0x07, 0xd8, 0x11, 0x8b, 0x07, 0xd8, 0x08,
+ 0x8d, 0x0e, 0xf8, 0x81, 0x89, 0x0e, 0xf8, 0x11, 0x94, 0x00, 0xe8, 0xd1,
+ 0x8f, 0x05, 0x3f, 0xd1, 0x87, 0x01, 0x63, 0xd8, 0xc4, 0xa2, 0x1e, 0x0e,
+ 0xf8, 0x21, 0xc6, 0x03, 0x81, 0x00, 0xe8, 0x60, 0x94, 0x00, 0xe8, 0xc9,
+ 0x90, 0x00, 0xe8, 0x70, 0xc4, 0xac, 0x0c, 0x00, 0xf7, 0xf1, 0xc5, 0x1f,
+ 0x01, 0x00, 0xf7, 0xc1, 0xc4, 0x04, 0x63, 0x00, 0x0d, 0x9b, 0x03, 0x40,
+ 0xd8, 0x06, 0xc3, 0x40, 0xde, 0xc5, 0x1e, 0x64, 0x00, 0xf7, 0x91, 0xc5,
+ 0x34, 0x21, 0x00, 0x06, 0xe9, 0xca, 0x07, 0x96, 0x00, 0x0b, 0xb1, 0xc6,
+ 0x61, 0xbc, 0x00, 0x11, 0x91, 0xc6, 0x03, 0x81, 0x00, 0x12, 0x70, 0x48,
+ 0xba, 0x8b, 0xc3, 0x40, 0xea, 0xc9, 0xaf, 0xe5, 0x05, 0x3e, 0xb0, 0x44,
+ 0x0b, 0xf8, 0xc3, 0x40, 0xf4, 0xc5, 0x34, 0x21, 0x00, 0xf1, 0xf1, 0xc4,
+ 0x04, 0x63, 0x01, 0x63, 0x70, 0x45, 0x01, 0xac, 0xc3, 0x41, 0x00, 0xc3,
+ 0x02, 0x1d, 0x00, 0x12, 0x20, 0x42, 0x03, 0x76, 0xc3, 0x41, 0x4a, 0x05,
+ 0xc3, 0x41, 0x59, 0x06, 0xc3, 0x41, 0x68, 0x0f, 0xc3, 0x41, 0x75, 0xc5,
+ 0x1f, 0x01, 0x00, 0x06, 0xab, 0x03, 0x41, 0x84, 0xc6, 0x03, 0x81, 0x00,
+ 0x06, 0xc3, 0x03, 0x41, 0x8a, 0xc5, 0x1e, 0x64, 0x00, 0x06, 0x91, 0xc5,
+ 0x34, 0x21, 0x00, 0x06, 0x99, 0x42, 0x00, 0x36, 0xc3, 0x41, 0x90, 0xc5,
+ 0x1f, 0x9c, 0x00, 0x0a, 0x71, 0xc6, 0xd1, 0x0c, 0x00, 0x0f, 0x53, 0x03,
+ 0x41, 0x9c, 0xce, 0x1f, 0xa7, 0x00, 0x10, 0x70, 0x91, 0x00, 0x0c, 0x31,
+ 0x87, 0x00, 0x0c, 0x80, 0x06, 0xc3, 0x41, 0xa2, 0xca, 0xa1, 0x8c, 0x00,
+ 0xf6, 0x41, 0xc5, 0x1f, 0x01, 0x00, 0x09, 0x43, 0x03, 0x41, 0xaf, 0xc5,
+ 0x1e, 0x64, 0x00, 0x06, 0x61, 0xc5, 0x34, 0x21, 0x00, 0x06, 0x69, 0x05,
+ 0xc3, 0x41, 0xb5, 0xc6, 0x61, 0xbc, 0x00, 0x09, 0x51, 0xc5, 0x1f, 0x9c,
+ 0x00, 0x09, 0x61, 0xc6, 0xd1, 0x0c, 0x00, 0x09, 0x71, 0xc6, 0x03, 0x81,
+ 0x00, 0x0c, 0xb9, 0xce, 0x1f, 0xa7, 0x00, 0x10, 0x50, 0x88, 0x05, 0x3b,
+ 0xd9, 0x89, 0x05, 0x3b, 0xe9, 0x94, 0x05, 0x3c, 0x11, 0x95, 0x05, 0x3c,
+ 0x21, 0x96, 0x05, 0x3c, 0x31, 0x86, 0x05, 0x3b, 0xc8, 0x05, 0xc3, 0x41,
+ 0xc1, 0xc5, 0x1f, 0x01, 0x00, 0xf5, 0xe3, 0x03, 0x41, 0xd9, 0xca, 0xa1,
+ 0x8c, 0x00, 0xf5, 0xd1, 0x06, 0xc3, 0x41, 0xdf, 0xc6, 0x61, 0xbc, 0x00,
+ 0x08, 0x93, 0x03, 0x41, 0xe9, 0xc5, 0x1e, 0x64, 0x00, 0x06, 0x41, 0xc5,
+ 0x34, 0x21, 0x00, 0x06, 0x49, 0xc5, 0x1f, 0x9c, 0x00, 0x08, 0xa1, 0xc6,
+ 0xd1, 0x0c, 0x00, 0x08, 0xc1, 0xce, 0x1f, 0xa7, 0x00, 0x10, 0x31, 0xc6,
+ 0x03, 0x81, 0x00, 0x12, 0x30, 0xc3, 0x00, 0xcd, 0x05, 0x39, 0x11, 0xc2,
+ 0x01, 0x5b, 0x05, 0x39, 0x20, 0x8a, 0x00, 0x06, 0x80, 0x00, 0x43, 0x41,
+ 0xef, 0xc5, 0x1f, 0x9c, 0x00, 0x08, 0x13, 0x03, 0x41, 0xfb, 0x05, 0xc3,
+ 0x42, 0x01, 0xca, 0xa1, 0x8c, 0x00, 0xf5, 0x11, 0x06, 0xc3, 0x42, 0x10,
+ 0x45, 0x01, 0xbd, 0xc3, 0x42, 0x1d, 0xce, 0x1f, 0xa7, 0x00, 0x10, 0x11,
+ 0xc5, 0x1e, 0x64, 0x00, 0x06, 0x01, 0xc5, 0x34, 0x21, 0x00, 0x06, 0x09,
+ 0xc5, 0x1f, 0x01, 0x00, 0x06, 0x19, 0xc6, 0x61, 0xbc, 0x00, 0x08, 0x01,
+ 0xc6, 0xd1, 0x0c, 0x00, 0x08, 0x21, 0xc6, 0x03, 0x81, 0x00, 0x11, 0xd0,
+ 0x46, 0x01, 0xab, 0x43, 0x42, 0x2c, 0xd4, 0x3c, 0x62, 0x05, 0x39, 0xd0,
+ 0x44, 0x0b, 0xf8, 0xc3, 0x42, 0x38, 0x05, 0xc3, 0x42, 0x47, 0xc5, 0x34,
+ 0x21, 0x00, 0x0a, 0xd3, 0x03, 0x42, 0x62, 0xce, 0x3b, 0x8c, 0x05, 0x3d,
+ 0x41, 0xc4, 0x04, 0x63, 0x05, 0x3e, 0x29, 0x15, 0x43, 0x42, 0x68, 0xc6,
+ 0xbd, 0x35, 0x05, 0x3d, 0x61, 0xc3, 0x6e, 0x0d, 0x00, 0x0c, 0x78, 0xd0,
+ 0x58, 0x8f, 0x00, 0x12, 0x51, 0xc9, 0xab, 0xc8, 0x05, 0x3d, 0x70, 0xca,
+ 0x6a, 0xeb, 0x00, 0xf4, 0xa1, 0x06, 0xc3, 0x42, 0x74, 0x05, 0xc3, 0x42,
+ 0x80, 0xcc, 0x57, 0xac, 0x05, 0x3e, 0x31, 0xc5, 0x34, 0x21, 0x00, 0x0b,
+ 0xc9, 0x15, 0xc3, 0x42, 0x8c, 0xc4, 0x04, 0x63, 0x00, 0x11, 0x20, 0xc8,
+ 0x1e, 0x8a, 0x00, 0xf4, 0x61, 0xc8, 0x18, 0x1a, 0x00, 0xf4, 0x50, 0x06,
+ 0xc3, 0x42, 0x98, 0xc5, 0x34, 0x21, 0x00, 0xf4, 0x11, 0xc5, 0x1e, 0x64,
+ 0x00, 0xf4, 0x01, 0xc4, 0x04, 0x63, 0x01, 0x63, 0x91, 0xca, 0x07, 0x96,
+ 0x00, 0x0b, 0xa0, 0x06, 0xc3, 0x42, 0xa4, 0xc5, 0x1f, 0x01, 0x00, 0xf3,
+ 0xe1, 0xc4, 0x04, 0x63, 0x00, 0x0d, 0x90, 0xc2, 0x08, 0xc6, 0x05, 0x3c,
+ 0xd1, 0xc2, 0x09, 0x06, 0x05, 0x3c, 0xe1, 0xc2, 0x0e, 0x78, 0x05, 0x3c,
+ 0xf0, 0x05, 0xc3, 0x42, 0xb0, 0xca, 0x6a, 0xeb, 0x00, 0xf3, 0x71, 0x06,
+ 0xc3, 0x42, 0xc8, 0xc6, 0x03, 0x81, 0x00, 0x0b, 0x31, 0xc4, 0x04, 0x63,
+ 0x00, 0x0d, 0x61, 0xce, 0x04, 0x59, 0x00, 0x0d, 0x70, 0xcc, 0x26, 0x18,
+ 0x05, 0x3b, 0x22, 0x03, 0x42, 0xd4, 0xc9, 0x68, 0x49, 0x05, 0x3b, 0xf1,
+ 0x8e, 0x05, 0x3c, 0x01, 0x8a, 0x05, 0x3c, 0x69, 0x8d, 0x05, 0x3d, 0x81,
+ 0x96, 0x05, 0x3d, 0x89, 0x8f, 0x00, 0x0c, 0xe1, 0x98, 0x00, 0x12, 0x29,
+ 0x83, 0x01, 0x63, 0x7a, 0x03, 0x42, 0xda, 0xc3, 0x22, 0x44, 0x00, 0x0c,
+ 0x21, 0xc3, 0x0a, 0x1f, 0x00, 0x0d, 0x39, 0xc4, 0x0d, 0x8e, 0x00, 0x0d,
+ 0xe0, 0x45, 0x01, 0xac, 0xc3, 0x42, 0xe0, 0xc7, 0xa9, 0xe1, 0x05, 0x3a,
+ 0xd0, 0xca, 0xa1, 0xe6, 0x05, 0x39, 0xf1, 0xc6, 0x24, 0x64, 0x05, 0x3d,
+ 0x59, 0x87, 0x00, 0x0c, 0x71, 0xc6, 0xd6, 0x46, 0x05, 0x3f, 0xa8, 0xc9,
+ 0x18, 0x19, 0x00, 0xf2, 0xb1, 0xc5, 0x34, 0x21, 0x00, 0xf2, 0xa1, 0x15,
+ 0xc3, 0x43, 0x0e, 0xc4, 0x04, 0x63, 0x00, 0x0d, 0x21, 0xc8, 0xc1, 0xf3,
+ 0x05, 0x3a, 0x90, 0x05, 0xc3, 0x43, 0x1d, 0x0e, 0xc3, 0x43, 0x2f, 0x06,
+ 0xc3, 0x43, 0x41, 0xc5, 0x1e, 0x64, 0x00, 0x0f, 0xc1, 0xc5, 0x1f, 0x01,
+ 0x00, 0x06, 0x89, 0xc5, 0x34, 0x21, 0x00, 0x0a, 0x19, 0xce, 0x3b, 0x8c,
+ 0x05, 0x3d, 0x21, 0xce, 0x72, 0x9a, 0x00, 0x0e, 0x58, 0x05, 0xc3, 0x43,
+ 0x4d, 0xca, 0x6a, 0xeb, 0x00, 0xf1, 0xd1, 0x42, 0x00, 0x68, 0xc3, 0x43,
+ 0x5f, 0xcb, 0x91, 0x18, 0x05, 0x3a, 0x41, 0xc5, 0x34, 0x21, 0x00, 0x09,
+ 0xc9, 0x47, 0x01, 0x8c, 0xc3, 0x43, 0x6e, 0x15, 0xc3, 0x43, 0x7a, 0x04,
+ 0x43, 0x43, 0x86, 0xca, 0x6a, 0xeb, 0x00, 0xf1, 0xa1, 0x06, 0xc3, 0x43,
+ 0x92, 0xc5, 0x34, 0x21, 0x00, 0xf1, 0x81, 0xc6, 0x03, 0x81, 0x05, 0x3a,
+ 0x03, 0x03, 0x43, 0xa4, 0x05, 0xc3, 0x43, 0xaa, 0xce, 0x3b, 0x8c, 0x05,
+ 0x3d, 0x11, 0xc4, 0x04, 0x63, 0x00, 0x0c, 0xc0, 0xcb, 0x9a, 0xa2, 0x00,
+ 0xf1, 0x51, 0x05, 0xc3, 0x43, 0xb6, 0x06, 0xc3, 0x43, 0xc8, 0xc6, 0x03,
+ 0x81, 0x00, 0x09, 0x31, 0xc4, 0x04, 0x63, 0x05, 0x3d, 0x50, 0xc6, 0x61,
+ 0xbc, 0x00, 0xf1, 0x01, 0xc5, 0x34, 0x21, 0x00, 0x0f, 0xa1, 0x05, 0xc3,
+ 0x43, 0xda, 0xc5, 0x1f, 0x9c, 0x00, 0x08, 0xf1, 0xc9, 0x18, 0x19, 0x00,
+ 0x09, 0x01, 0xce, 0x3b, 0x8c, 0x05, 0x3d, 0x01, 0xc4, 0x04, 0x63, 0x00,
+ 0x0c, 0x99, 0xc6, 0x03, 0x81, 0x00, 0x0f, 0x20, 0x97, 0x05, 0x3d, 0xf1,
+ 0x8b, 0x05, 0x3d, 0xe1, 0x83, 0x05, 0x3d, 0xd1, 0xc4, 0x05, 0x30, 0x00,
+ 0x12, 0x08, 0xc9, 0x18, 0x19, 0x00, 0xf0, 0xf1, 0xc6, 0x03, 0x81, 0x05,
+ 0x3c, 0xc1, 0xc4, 0x04, 0x63, 0x00, 0x0c, 0x88, 0x05, 0xc3, 0x43, 0xec,
+ 0xca, 0x6a, 0xeb, 0x00, 0xf0, 0x71, 0x44, 0x0b, 0xf8, 0xc3, 0x43, 0xfe,
+ 0x15, 0xc3, 0x44, 0x0a, 0xc4, 0x04, 0x63, 0x00, 0x0c, 0x51, 0xc6, 0xd5,
+ 0x3e, 0x00, 0x0c, 0x58, 0xcb, 0x98, 0xb3, 0x00, 0x0e, 0x20, 0x05, 0xc3,
+ 0x44, 0x1f, 0xc5, 0x34, 0x21, 0x00, 0x08, 0x31, 0xc9, 0x18, 0x19, 0x00,
+ 0x08, 0x51, 0xc3, 0x02, 0x1d, 0x05, 0x3c, 0x91, 0xcc, 0x57, 0xac, 0x05,
+ 0x3e, 0x21, 0xc4, 0x04, 0x63, 0x00, 0x0c, 0x39, 0xc6, 0x03, 0x81, 0x00,
+ 0x11, 0xd8, 0xcb, 0x98, 0xbe, 0x05, 0x39, 0x70, 0xca, 0x6a, 0xeb, 0x00,
+ 0xf0, 0x31, 0x44, 0x0b, 0xf8, 0xc3, 0x44, 0x34, 0xc8, 0xc1, 0xf3, 0x05,
+ 0x3c, 0xb1, 0xc4, 0x04, 0x63, 0x00, 0x0c, 0x09, 0xc6, 0xd5, 0x3e, 0x00,
+ 0x0c, 0x11, 0xc6, 0x03, 0x81, 0x00, 0x12, 0x18, 0x05, 0xc3, 0x44, 0x40,
+ 0xc6, 0x03, 0x81, 0x00, 0x12, 0x40, 0xd8, 0x21, 0xbc, 0x05, 0x3a, 0xb1,
+ 0xcf, 0x3c, 0x2b, 0x05, 0x3a, 0xc0, 0x83, 0x00, 0x74, 0x89, 0xc2, 0x01,
+ 0x0e, 0x00, 0x74, 0x90, 0xc6, 0x01, 0xa1, 0x0f, 0xda, 0xb1, 0xcc, 0x06,
+ 0x2b, 0x0f, 0xdb, 0x28, 0xcc, 0x06, 0x2b, 0x0f, 0xdb, 0x21, 0xc5, 0x00,
+ 0x47, 0x0f, 0xdb, 0x30, 0xc6, 0x01, 0xa1, 0x0f, 0xda, 0xd9, 0xcc, 0x06,
+ 0x2b, 0x0f, 0xdb, 0x00, 0xcc, 0x06, 0x2b, 0x0f, 0xda, 0xf9, 0xc5, 0x00,
+ 0x47, 0x0f, 0xdb, 0x08, 0xcc, 0x02, 0x5b, 0x01, 0x0f, 0x69, 0xce, 0x0f,
+ 0x0e, 0x01, 0x0f, 0x60, 0x00, 0x43, 0x44, 0x4c, 0xd2, 0x05, 0x94, 0x0f,
+ 0xc0, 0x09, 0xd5, 0x00, 0x92, 0x0f, 0xc0, 0x88, 0xca, 0x00, 0x47, 0x01,
+ 0x0d, 0x89, 0xc9, 0x03, 0x9e, 0x01, 0x0d, 0x80, 0x06, 0xc3, 0x44, 0x5e,
+ 0xdf, 0x0c, 0xff, 0x01, 0x4b, 0x18, 0xc3, 0xeb, 0xeb, 0x0f, 0xb3, 0x39,
+ 0xc9, 0xac, 0x58, 0x0f, 0xb2, 0xf8, 0xe0, 0x09, 0x07, 0x01, 0x3a, 0xd8,
+ 0xe0, 0x00, 0xe7, 0x01, 0x3b, 0x00, 0xe0, 0x00, 0xe7, 0x01, 0x3a, 0xf8,
+ 0xdc, 0x14, 0x4e, 0x01, 0x3d, 0x31, 0xde, 0x0f, 0x21, 0x01, 0x3d, 0x28,
+ 0xe0, 0x09, 0x07, 0x01, 0x3a, 0xe8, 0xd5, 0x00, 0x92, 0x0f, 0xc0, 0xd1,
+ 0xdb, 0x15, 0x2e, 0x0f, 0xc0, 0xf0, 0xc4, 0x01, 0x1d, 0x0f, 0xc4, 0xf1,
+ 0xc5, 0x01, 0x47, 0x0f, 0xc4, 0xf8, 0xc6, 0x65, 0x82, 0x07, 0xda, 0x4b,
+ 0x03, 0x44, 0x64, 0x15, 0x43, 0x44, 0x6a, 0x46, 0x01, 0xab, 0x43, 0x44,
+ 0x76, 0xc9, 0x65, 0xfa, 0x07, 0xd9, 0x49, 0xc4, 0x43, 0x0a, 0x07, 0xd9,
+ 0x00, 0xc8, 0x4e, 0x3b, 0x02, 0x6e, 0x69, 0xc3, 0x00, 0xc9, 0x02, 0x6f,
+ 0x08, 0xc3, 0x0d, 0x99, 0x00, 0x04, 0x41, 0xd2, 0x4d, 0x04, 0x00, 0x04,
+ 0x48, 0xcc, 0x68, 0xbe, 0x05, 0x4b, 0x59, 0xc5, 0x7f, 0x3f, 0x05, 0x4b,
+ 0x21, 0xc6, 0xae, 0x92, 0x05, 0x4b, 0x70, 0xc3, 0x3b, 0xc9, 0x05, 0x4b,
+ 0x61, 0x44, 0x5d, 0x46, 0x43, 0x44, 0x88, 0xcb, 0x7b, 0x1c, 0x05, 0x4b,
+ 0xe1, 0x16, 0xc3, 0x44, 0x9a, 0x44, 0xc3, 0xe9, 0xc3, 0x44, 0xa6, 0x43,
+ 0x28, 0x17, 0xc3, 0x44, 0xb0, 0xc6, 0x7f, 0x3e, 0x00, 0x88, 0x09, 0xc5,
+ 0x68, 0xc5, 0x00, 0x88, 0x41, 0xc5, 0xc4, 0x44, 0x00, 0x88, 0xa1, 0xc5,
+ 0xc3, 0xd4, 0x00, 0x88, 0xc9, 0xc5, 0xc2, 0x9e, 0x00, 0x8a, 0x39, 0xc5,
+ 0x7b, 0x22, 0x00, 0x8a, 0xc0, 0x95, 0x00, 0x88, 0x49, 0x43, 0x68, 0xbb,
+ 0xc3, 0x44, 0xba, 0xc7, 0xca, 0x70, 0x00, 0x89, 0x71, 0x43, 0x03, 0x49,
+ 0xc3, 0x44, 0xc6, 0xc7, 0xca, 0x54, 0x00, 0x89, 0x81, 0xc7, 0xca, 0x8c,
+ 0x00, 0x89, 0x89, 0x43, 0xb9, 0x2c, 0xc3, 0x44, 0xd2, 0xcb, 0x95, 0xb1,
+ 0x00, 0x89, 0xa1, 0xc7, 0xc9, 0xf2, 0x00, 0x89, 0xa9, 0x43, 0xbc, 0xe4,
+ 0xc3, 0x44, 0xde, 0xc7, 0xca, 0x07, 0x00, 0x89, 0xd8, 0xc7, 0xc4, 0x03,
+ 0x05, 0x4b, 0xd1, 0x8a, 0x00, 0x88, 0x89, 0xc7, 0xc4, 0x65, 0x00, 0x8a,
+ 0xb0, 0xc7, 0xc5, 0x8b, 0x05, 0x4b, 0xa1, 0x43, 0xba, 0x3b, 0xc3, 0x44,
+ 0xea, 0x43, 0x0a, 0x4c, 0xc3, 0x44, 0xf6, 0x92, 0x00, 0x88, 0x39, 0xc7,
+ 0xc5, 0xae, 0x00, 0x88, 0xf9, 0x43, 0xba, 0xa3, 0xc3, 0x45, 0x02, 0x43,
+ 0xc3, 0xe7, 0xc3, 0x45, 0x0e, 0xc9, 0xb8, 0x04, 0x00, 0x89, 0x50, 0xc5,
+ 0xba, 0x7e, 0x00, 0x8a, 0x11, 0xc6, 0xae, 0x6e, 0x00, 0x8a, 0x50, 0xc4,
+ 0x68, 0xc6, 0x00, 0x8a, 0x21, 0xc6, 0xb2, 0x8b, 0x00, 0x8a, 0x31, 0xc6,
+ 0xae, 0x80, 0x00, 0x8a, 0x58, 0xc4, 0xb1, 0xd8, 0x00, 0x8a, 0x41, 0xc6,
+ 0xb1, 0xd7, 0x00, 0x8a, 0x48, 0xc7, 0xcd, 0x2c, 0x05, 0x4b, 0x19, 0xc6,
+ 0xd4, 0xfc, 0x05, 0x4b, 0x11, 0xc7, 0xcc, 0x3e, 0x05, 0x4b, 0x09, 0xc7,
+ 0xcc, 0x22, 0x05, 0x4b, 0x01, 0x96, 0x00, 0x88, 0x19, 0xc8, 0xb9, 0xab,
+ 0x00, 0x88, 0xb9, 0xc7, 0xcc, 0x84, 0x00, 0x8a, 0xf0, 0x8d, 0x00, 0x88,
+ 0x01, 0xc8, 0xbe, 0x1b, 0x00, 0x8a, 0xd0, 0xc5, 0xc3, 0xd4, 0x00, 0x88,
+ 0xd9, 0xc5, 0xc4, 0x7c, 0x00, 0x88, 0xe9, 0x12, 0xc3, 0x45, 0x1a, 0xca,
+ 0x8d, 0x02, 0x00, 0x89, 0x60, 0x85, 0x00, 0x88, 0x61, 0xc7, 0xcb, 0x1f,
+ 0x00, 0x8a, 0x68, 0x43, 0xb1, 0xd9, 0xc3, 0x45, 0x26, 0x45, 0xdf, 0x4f,
+ 0x43, 0x45, 0x36, 0x15, 0xc3, 0x45, 0x3e, 0x05, 0x43, 0x45, 0x4a, 0x87,
+ 0x00, 0x8b, 0x11, 0x43, 0x87, 0xeb, 0x43, 0x45, 0x56, 0x83, 0x00, 0x8b,
+ 0x1b, 0x03, 0x45, 0x75, 0x87, 0x00, 0x8b, 0x43, 0x03, 0x45, 0x79, 0x91,
+ 0x00, 0x8b, 0x69, 0x97, 0x00, 0x8b, 0x91, 0x8b, 0x00, 0x8b, 0xa1, 0xc3,
+ 0xeb, 0x9a, 0x00, 0x8b, 0xc1, 0xc4, 0xe9, 0xdb, 0x00, 0x8b, 0xc9, 0x43,
+ 0xe7, 0xff, 0xc3, 0x45, 0x8f, 0xc3, 0xec, 0xba, 0x00, 0x8c, 0x39, 0xc3,
+ 0xec, 0xbd, 0x00, 0x8c, 0x41, 0xc3, 0xea, 0xe6, 0x00, 0x8c, 0x73, 0x03,
+ 0x45, 0x9c, 0xc3, 0xc7, 0xd0, 0x00, 0x8c, 0x83, 0x03, 0x45, 0xa0, 0xc4,
+ 0xe4, 0xf3, 0x00, 0x8c, 0x93, 0x03, 0x45, 0xa4, 0xc3, 0xeb, 0x01, 0x00,
+ 0x8c, 0x99, 0xc3, 0xe3, 0x46, 0x00, 0x8c, 0xa1, 0xc3, 0xed, 0x89, 0x00,
+ 0x8c, 0xb9, 0xc3, 0xe7, 0xb6, 0x00, 0x8d, 0x21, 0xc3, 0xec, 0xc0, 0x06,
+ 0xbd, 0x93, 0x03, 0x45, 0xa8, 0xc4, 0xe8, 0x2b, 0x06, 0xbd, 0xa1, 0xc3,
+ 0xea, 0xf2, 0x06, 0xbd, 0xc0, 0x91, 0x00, 0x8b, 0x29, 0x97, 0x00, 0x8b,
+ 0x99, 0xc3, 0xec, 0xba, 0x00, 0x8b, 0xd1, 0xc3, 0xec, 0xb7, 0x00, 0x8b,
+ 0xd9, 0xc4, 0xc7, 0xd3, 0x00, 0x8b, 0xe1, 0xc3, 0xeb, 0x01, 0x00, 0x8c,
+ 0xa9, 0xc3, 0xc7, 0xd0, 0x06, 0xbd, 0xdb, 0x03, 0x45, 0xac, 0xc3, 0xea,
+ 0xe6, 0x06, 0xbd, 0xc9, 0xc3, 0xea, 0xf2, 0x06, 0xbd, 0xe0, 0x87, 0x00,
+ 0x8b, 0x60, 0x83, 0x00, 0x8b, 0x53, 0x03, 0x45, 0xb4, 0x87, 0x00, 0x8b,
+ 0x83, 0x03, 0x45, 0xb8, 0x8b, 0x00, 0x8b, 0x88, 0x91, 0x00, 0x8b, 0xf8,
+ 0x87, 0x00, 0x8c, 0x0b, 0x03, 0x45, 0xbc, 0x83, 0x00, 0x8d, 0x32, 0x03,
+ 0x45, 0xc0, 0xc2, 0x08, 0xd0, 0x06, 0xbd, 0x88, 0x87, 0x00, 0x8c, 0x50,
+ 0x91, 0x06, 0xbd, 0xa8, 0xc4, 0x9e, 0x04, 0x00, 0x8c, 0xe8, 0x83, 0x00,
+ 0x8c, 0xcb, 0x03, 0x45, 0xc4, 0xc3, 0xeb, 0x9a, 0x06, 0xbd, 0xe9, 0x87,
+ 0x06, 0xbe, 0x03, 0x03, 0x45, 0xc8, 0x91, 0x06, 0xbe, 0x11, 0x97, 0x06,
+ 0xbe, 0x18, 0xc2, 0x08, 0xd0, 0x06, 0xbe, 0x08, 0x44, 0x7f, 0x3f, 0xc3,
+ 0x45, 0xcc, 0xc6, 0xae, 0x92, 0x00, 0x8d, 0xf9, 0x47, 0x68, 0xbe, 0x43,
+ 0x45, 0xfa, 0x44, 0x5d, 0x46, 0xc3, 0x46, 0x0a, 0x42, 0x00, 0xe4, 0x43,
+ 0x46, 0x46, 0x90, 0x00, 0x8d, 0x59, 0x43, 0x57, 0xfc, 0xc3, 0x46, 0x74,
+ 0xc7, 0xcb, 0x7a, 0x00, 0x8d, 0x69, 0xc8, 0xbd, 0x13, 0x00, 0x8e, 0x29,
+ 0x43, 0xbd, 0x8b, 0xc3, 0x46, 0x80, 0xc6, 0xd1, 0x8a, 0x00, 0x8e, 0x39,
+ 0xc9, 0xb2, 0x88, 0x00, 0x8e, 0x41, 0xc7, 0xcb, 0x49, 0x06, 0xbe, 0x58,
+ 0x44, 0xba, 0x7e, 0xc3, 0x46, 0x8c, 0xc6, 0xae, 0x6e, 0x00, 0x8e, 0x00,
+ 0x8e, 0x00, 0x8d, 0x79, 0x43, 0xae, 0x8f, 0xc3, 0x46, 0xb4, 0x46, 0xc8,
+ 0x6a, 0xc3, 0x46, 0xc6, 0x43, 0xae, 0x7d, 0xc3, 0x46, 0xdc, 0x43, 0x94,
+ 0x93, 0xc3, 0x46, 0xee, 0x43, 0x7b, 0x0d, 0xc3, 0x46, 0xfa, 0xc7, 0xce,
+ 0xb4, 0x00, 0x8d, 0xb1, 0xc7, 0xce, 0xc9, 0x00, 0x8e, 0x69, 0x44, 0x7b,
+ 0x1a, 0x43, 0x47, 0x06, 0x8f, 0x00, 0x8d, 0xb9, 0xc8, 0xb8, 0x9b, 0x00,
+ 0x8e, 0xd1, 0xc7, 0xc7, 0x8a, 0x00, 0x8e, 0xd9, 0x43, 0xc7, 0x83, 0xc3,
+ 0x47, 0x12, 0x43, 0x21, 0x90, 0xc3, 0x47, 0x1e, 0x43, 0xec, 0xcc, 0xc3,
+ 0x47, 0x2a, 0xc7, 0xc6, 0x79, 0x00, 0x8f, 0x09, 0xc7, 0xc6, 0xcd, 0x06,
+ 0xbe, 0xf0, 0x43, 0x68, 0xc6, 0xc3, 0x47, 0x36, 0x45, 0xae, 0x80, 0xc3,
+ 0x47, 0x64, 0x45, 0xb2, 0x8b, 0x43, 0x47, 0x7a, 0x89, 0x00, 0x8d, 0xe1,
+ 0x43, 0xc2, 0x7b, 0xc3, 0x47, 0x8a, 0xcd, 0x7f, 0x37, 0x00, 0x8f, 0x68,
+ 0x43, 0x68, 0xc6, 0xc3, 0x47, 0x96, 0xc6, 0xb1, 0xd7, 0x00, 0x8d, 0xf0,
+ 0x8a, 0x00, 0x8e, 0x11, 0xc7, 0xc4, 0x42, 0x00, 0x8f, 0xa9, 0xc7, 0xc3,
+ 0xd2, 0x00, 0x8f, 0xb1, 0xc7, 0xc4, 0x7a, 0x00, 0x8f, 0xb9, 0xc7, 0xc4,
+ 0x65, 0x00, 0x8f, 0xc0, 0x45, 0xc2, 0x9d, 0xc3, 0x47, 0xa6, 0xc9, 0x7b,
+ 0x1e, 0x00, 0x8f, 0xc8, 0x8d, 0x01, 0x89, 0x89, 0xc6, 0xd9, 0x4c, 0x01,
+ 0x89, 0x98, 0xc5, 0xc4, 0x7c, 0x01, 0x8b, 0x89, 0x12, 0xc3, 0x47, 0xc8,
+ 0xca, 0x8d, 0x02, 0x01, 0x8b, 0xc8, 0xc6, 0x7f, 0x3e, 0x01, 0x89, 0x91,
+ 0xc6, 0xba, 0x7d, 0x01, 0x89, 0xc1, 0xc5, 0x68, 0xc5, 0x01, 0x8a, 0x19,
+ 0xc4, 0x95, 0xb8, 0x01, 0x8a, 0x31, 0xc5, 0xc3, 0xe9, 0x01, 0x8a, 0x49,
+ 0xc5, 0xc4, 0x44, 0x01, 0x8b, 0x29, 0xc5, 0xc2, 0x9e, 0x01, 0x8c, 0x01,
+ 0xc5, 0x7b, 0x22, 0x01, 0x8c, 0x28, 0x95, 0x01, 0x8a, 0x29, 0xc8, 0xbe,
+ 0xbb, 0x01, 0x8b, 0xd1, 0xc7, 0xca, 0x70, 0x01, 0x8b, 0xd9, 0xc8, 0xc2,
+ 0x1b, 0x01, 0x8b, 0xe1, 0xc7, 0xca, 0x5b, 0x01, 0x8b, 0xe9, 0xc7, 0xca,
+ 0x1c, 0x01, 0x8b, 0xf0, 0x8d, 0x01, 0x89, 0xd1, 0xc6, 0xd9, 0x4c, 0x01,
+ 0x8b, 0x48, 0x43, 0x68, 0xc6, 0xc3, 0x47, 0xd4, 0xc6, 0xae, 0x80, 0x01,
+ 0x89, 0xf9, 0xc6, 0xb2, 0x8b, 0x01, 0x8b, 0x60, 0xc6, 0xae, 0x6e, 0x01,
+ 0x89, 0xf1, 0xc5, 0xba, 0x7e, 0x01, 0x8b, 0x50, 0xc4, 0x68, 0xc6, 0x01,
+ 0x8b, 0x71, 0xc6, 0xb2, 0x8b, 0x01, 0x8b, 0x80, 0x92, 0x01, 0x8a, 0x11,
+ 0x45, 0xd7, 0xf6, 0xc3, 0x47, 0xde, 0xc8, 0xba, 0x3b, 0x01, 0x8b, 0x91,
+ 0x43, 0xba, 0xa3, 0xc3, 0x47, 0xee, 0xc7, 0xc3, 0xe7, 0x01, 0x8b, 0xb0,
+ 0x87, 0x01, 0x8c, 0x70, 0x87, 0x01, 0x8a, 0x90, 0x91, 0x01, 0x8a, 0xab,
+ 0x03, 0x47, 0xfa, 0x45, 0xc2, 0x9d, 0x43, 0x48, 0x00, 0x87, 0x01, 0x8c,
+ 0x40, 0x91, 0x01, 0x8c, 0x50, 0x87, 0x01, 0x8a, 0xc8, 0x91, 0x01, 0x8a,
+ 0xe8, 0x83, 0x07, 0xfb, 0x39, 0x8b, 0x07, 0xfb, 0x41, 0x97, 0x07, 0xfb,
+ 0x49, 0x87, 0x07, 0xfb, 0x51, 0x91, 0x07, 0xfb, 0x59, 0x1b, 0xc3, 0x48,
+ 0x10, 0xc2, 0x00, 0x16, 0x07, 0xfb, 0x78, 0xc8, 0x4f, 0xa2, 0x08, 0x5b,
+ 0xf9, 0xc7, 0x0d, 0x7f, 0x08, 0x5b, 0xf0, 0xc4, 0x15, 0xa9, 0x08, 0x5b,
+ 0xe9, 0x91, 0x08, 0x5b, 0xc8, 0xc3, 0xe2, 0x62, 0x08, 0x5b, 0x81, 0xc4,
+ 0xdd, 0x34, 0x08, 0x5b, 0x70, 0xc8, 0x4f, 0xa2, 0x08, 0x5a, 0xf9, 0xc7,
+ 0x0d, 0x7f, 0x08, 0x5a, 0xf0, 0xc4, 0x15, 0xa9, 0x08, 0x5a, 0xe9, 0x91,
+ 0x08, 0x5a, 0xc8, 0xc4, 0xdd, 0x34, 0x08, 0x5a, 0x71, 0xc3, 0xe2, 0x62,
+ 0x08, 0x5a, 0x88, 0xcb, 0x4f, 0x9f, 0x0f, 0x65, 0x99, 0xc2, 0x0a, 0x20,
+ 0x0f, 0x65, 0x90, 0xc4, 0x15, 0xa7, 0x0f, 0x65, 0x49, 0xc2, 0x22, 0x45,
+ 0x0f, 0x65, 0x40, 0xc3, 0x0d, 0x8f, 0x0f, 0x65, 0x39, 0xc3, 0x08, 0xde,
+ 0x0f, 0x65, 0x30, 0xc4, 0x05, 0xde, 0x0f, 0x65, 0x29, 0xc2, 0x0a, 0x20,
+ 0x0f, 0x65, 0x20, 0xc9, 0x4f, 0xa1, 0x0f, 0x64, 0xe8, 0xc8, 0x4f, 0xa2,
+ 0x0f, 0x64, 0xa1, 0xc7, 0x0d, 0x7f, 0x0f, 0x64, 0x58, 0xc9, 0x4f, 0xa1,
+ 0x0f, 0x64, 0xe0, 0xc8, 0x4f, 0xa2, 0x0f, 0x64, 0x99, 0xc7, 0x0d, 0x7f,
+ 0x0f, 0x64, 0x50, 0xc2, 0x0d, 0x8b, 0x0f, 0x64, 0x03, 0x03, 0x48, 0x1c,
+ 0x00, 0x43, 0x48, 0x22, 0xc2, 0x0d, 0x8b, 0x0f, 0x63, 0xfb, 0x03, 0x48,
+ 0x2e, 0x00, 0x43, 0x48, 0x34, 0xc3, 0x41, 0xca, 0x0f, 0x63, 0xf3, 0x03,
+ 0x48, 0x40, 0xc2, 0x00, 0x29, 0x0f, 0x63, 0xaa, 0x03, 0x48, 0x46, 0xc3,
+ 0x0d, 0x8a, 0x0f, 0x63, 0xeb, 0x03, 0x48, 0x4a, 0xc2, 0x00, 0xd3, 0x0f,
+ 0x63, 0xa2, 0x03, 0x48, 0x50, 0xc4, 0x0d, 0x89, 0x0f, 0x63, 0xe3, 0x03,
+ 0x48, 0x54, 0xc3, 0x05, 0xdf, 0x0f, 0x63, 0x9a, 0x03, 0x48, 0x5a, 0xc4,
+ 0x15, 0xa9, 0x0f, 0x63, 0xdb, 0x03, 0x48, 0x5e, 0x91, 0x0f, 0x63, 0x92,
+ 0x03, 0x48, 0x64, 0xc9, 0x4f, 0xa1, 0x0f, 0x64, 0xa8, 0xc8, 0x4f, 0xa2,
+ 0x0f, 0x64, 0x61, 0xc7, 0x0d, 0x7f, 0x0f, 0x64, 0x18, 0xc2, 0x03, 0x5f,
+ 0x01, 0x96, 0x29, 0xc2, 0x00, 0x31, 0x01, 0x96, 0x30, 0xc3, 0x05, 0x17,
+ 0x01, 0x9f, 0x01, 0x16, 0xc3, 0x48, 0x68, 0x08, 0xc3, 0x48, 0x76, 0x15,
+ 0xc3, 0x48, 0x83, 0x07, 0xc3, 0x48, 0x95, 0xc4, 0x24, 0x35, 0x01, 0x9f,
+ 0x42, 0x03, 0x48, 0xa4, 0x19, 0xc3, 0x48, 0xaa, 0x0a, 0xc3, 0x48, 0xb2,
+ 0xc2, 0x01, 0x04, 0x01, 0x9b, 0x10, 0xc3, 0x08, 0xde, 0x01, 0x9a, 0xe3,
+ 0x03, 0x48, 0xbe, 0x0b, 0x43, 0x48, 0xc4, 0xc2, 0x22, 0x45, 0x01, 0x9a,
+ 0xf3, 0x03, 0x48, 0xd0, 0xc4, 0x15, 0xa7, 0x01, 0x9a, 0xfa, 0x03, 0x48,
+ 0xd6, 0xc4, 0x00, 0x48, 0x01, 0x9b, 0x03, 0x03, 0x48, 0xdc, 0xc5, 0x66,
+ 0x81, 0x01, 0x9b, 0x18, 0xc4, 0x12, 0xf2, 0x01, 0x9b, 0x58, 0xdb, 0x15,
+ 0x9a, 0x0f, 0xd1, 0xa9, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0x58, 0xce, 0x29,
+ 0x29, 0x0f, 0xd0, 0x71, 0xdb, 0x15, 0x9a, 0x0f, 0xd1, 0xc0, 0x96, 0x01,
+ 0x32, 0xc3, 0x03, 0x48, 0xe2, 0xc7, 0xcc, 0x5a, 0x0f, 0xd2, 0x71, 0xc6,
+ 0xd4, 0xf0, 0x0f, 0xd2, 0x79, 0xc8, 0xba, 0xfb, 0x0f, 0xd2, 0x80, 0x49,
+ 0x29, 0x20, 0x43, 0x48, 0xe8, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0x61, 0xdb,
+ 0x15, 0x9a, 0x0f, 0xd1, 0xb0, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0x51, 0xdb,
+ 0x15, 0x9a, 0x0f, 0xd1, 0xa0, 0xc3, 0x01, 0x5e, 0x0f, 0xd0, 0xf1, 0xc5,
+ 0x8f, 0xc9, 0x0f, 0xd1, 0x10, 0xc8, 0x0a, 0x1f, 0x01, 0x34, 0x39, 0x42,
+ 0x00, 0x68, 0xc3, 0x48, 0xf4, 0x46, 0x05, 0x07, 0xc3, 0x49, 0x00, 0x46,
+ 0x00, 0xc7, 0x43, 0x49, 0x0c, 0xc5, 0x25, 0x0c, 0x01, 0x33, 0x08, 0xca,
+ 0xa1, 0x3c, 0x01, 0x38, 0x29, 0xdc, 0x12, 0x72, 0x0f, 0xde, 0x00, 0xcd,
+ 0x7d, 0xd8, 0x0f, 0xbc, 0xa9, 0xcc, 0x55, 0xd0, 0x01, 0x2d, 0x19, 0xd1,
+ 0x55, 0xcb, 0x0f, 0xbc, 0xa0, 0x14, 0xc3, 0x49, 0x18, 0x0e, 0xc3, 0x49,
+ 0x24, 0x46, 0x05, 0x07, 0xc3, 0x49, 0x30, 0xd7, 0x2a, 0x90, 0x01, 0x2f,
+ 0x59, 0xd4, 0x3a, 0xaa, 0x01, 0x1c, 0x28, 0xc4, 0x5a, 0x3f, 0x01, 0x31,
+ 0xe1, 0xcb, 0x94, 0x0f, 0x0f, 0x99, 0x20, 0xca, 0xa2, 0x5e, 0x0f, 0x99,
+ 0x30, 0xc5, 0x01, 0xea, 0x01, 0x2d, 0x59, 0xc3, 0x09, 0x46, 0x01, 0x5a,
+ 0x90, 0xc5, 0x01, 0x62, 0x01, 0x30, 0xe1, 0xce, 0x23, 0xd6, 0x0f, 0xa2,
+ 0x40, 0xcd, 0x48, 0x41, 0x01, 0x2e, 0x41, 0xd2, 0x48, 0x3c, 0x0f, 0xbc,
+ 0xd1, 0xce, 0x70, 0x4e, 0x0f, 0xbc, 0xd8, 0xe0, 0x08, 0x67, 0x01, 0x37,
+ 0xf8, 0xc6, 0x40, 0x3d, 0x01, 0x2d, 0xd9, 0xc7, 0xbc, 0x04, 0x01, 0x5a,
+ 0xa0, 0x89, 0x0f, 0x17, 0x18, 0xc5, 0x01, 0xc2, 0x0f, 0xb1, 0x73, 0x03,
+ 0x49, 0x3c, 0xd8, 0x24, 0x14, 0x0f, 0xd7, 0x10, 0xd3, 0x3f, 0xbe, 0x0f,
+ 0xb0, 0xe9, 0xcb, 0x99, 0xc6, 0x0f, 0xb0, 0xe0, 0x83, 0x08, 0x51, 0xa1,
+ 0xc2, 0x01, 0x0e, 0x08, 0x51, 0x98, 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xf9,
+ 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x78, 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xe1,
+ 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x60, 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xf1,
+ 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x70, 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xe9,
+ 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x68, 0x44, 0xe4, 0xb3, 0xc3, 0x49, 0x40,
+ 0x43, 0x2c, 0x7e, 0x43, 0x49, 0x4c, 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xb9,
+ 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x38, 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xb1,
+ 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x30, 0xc3, 0x3a, 0xc5, 0x00, 0xcf, 0xa9,
+ 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x28, 0x43, 0xec, 0xc9, 0xc3, 0x49, 0x58,
+ 0xc5, 0xd9, 0x70, 0x00, 0xbf, 0xb9, 0xc6, 0xc6, 0x87, 0x00, 0xbf, 0xb1,
+ 0x8f, 0x00, 0xbf, 0xa8, 0x97, 0x01, 0x8f, 0xa0, 0x91, 0x0d, 0x8b, 0x31,
0x87, 0x0d, 0x8b, 0x29, 0x8b, 0x0d, 0x8b, 0x21, 0x83, 0x01, 0x87, 0x70,
0x91, 0x0d, 0x8a, 0xa9, 0x83, 0x01, 0x87, 0xa9, 0x87, 0x01, 0x87, 0xb1,
0x97, 0x01, 0x8f, 0x80, 0x97, 0x01, 0x86, 0x19, 0x91, 0x01, 0x8f, 0x98,
@@ -17165,3935 +17252,3921 @@ uint8_t UnicodeNameToCodepointIndex_[241561] = {
0x8b, 0x01, 0x8f, 0xb1, 0x97, 0x01, 0x8f, 0xb9, 0x87, 0x01, 0x8f, 0xc1,
0x91, 0x01, 0x8f, 0xc8, 0x83, 0x01, 0x8f, 0xd9, 0x8b, 0x01, 0x8f, 0xe1,
0x97, 0x01, 0x8f, 0xe9, 0x87, 0x01, 0x8f, 0xf1, 0x91, 0x01, 0x8f, 0xf8,
- 0x87, 0x0d, 0x89, 0x09, 0x8b, 0x0d, 0x89, 0x00, 0x4f, 0x6b, 0x78, 0xc3,
- 0x45, 0xc7, 0x45, 0x27, 0x0d, 0x43, 0x45, 0xe3, 0x94, 0x00, 0x64, 0x5b,
- 0x03, 0x45, 0xfb, 0x8e, 0x00, 0x64, 0x62, 0x03, 0x45, 0xff, 0xcb, 0x91,
- 0x19, 0x00, 0x66, 0xe8, 0x83, 0x00, 0x64, 0xf9, 0xc2, 0x00, 0xa4, 0x00,
- 0x65, 0x00, 0x83, 0x00, 0x65, 0x09, 0xc2, 0x00, 0xa4, 0x00, 0x65, 0x10,
- 0x83, 0x00, 0x65, 0x99, 0xc2, 0x00, 0xc7, 0x00, 0x66, 0xf0, 0xc4, 0x17,
- 0xa2, 0x01, 0x7d, 0x81, 0x88, 0x01, 0x7d, 0xa0, 0x44, 0x03, 0x1e, 0x43,
- 0x46, 0x03, 0x8a, 0x01, 0x7b, 0x59, 0xc8, 0x98, 0x5d, 0x01, 0x7d, 0x20,
- 0xc2, 0x00, 0xe5, 0x01, 0x78, 0x19, 0xc2, 0x00, 0x7b, 0x01, 0x7d, 0x50,
- 0xc2, 0x00, 0xbb, 0x01, 0x7b, 0x69, 0xc3, 0x58, 0xc4, 0x01, 0x7c, 0xa0,
- 0x44, 0xdf, 0xeb, 0xc3, 0x46, 0x0f, 0xc2, 0x00, 0xb2, 0x01, 0x79, 0xb8,
- 0xc2, 0x04, 0x37, 0x01, 0x7b, 0xd1, 0xc2, 0x03, 0xc7, 0x01, 0x7c, 0xc8,
- 0x92, 0x01, 0x79, 0xd9, 0xc2, 0x00, 0xc2, 0x01, 0x7a, 0x98, 0x92, 0x01,
- 0x7a, 0x63, 0x03, 0x46, 0x1b, 0xc2, 0x00, 0x8c, 0x01, 0x7b, 0x78, 0x90,
- 0x01, 0x7c, 0x99, 0xc2, 0x02, 0x60, 0x01, 0x7d, 0xd0, 0xc2, 0x03, 0x3d,
- 0x01, 0x79, 0xe1, 0x86, 0x01, 0x7d, 0xc0, 0xc4, 0xe4, 0x97, 0x01, 0x79,
- 0xe9, 0xcc, 0x73, 0xd7, 0x01, 0x7a, 0xc8, 0xc2, 0x00, 0x6e, 0x01, 0x78,
- 0xe9, 0x10, 0x43, 0x46, 0x21, 0xc3, 0x04, 0x45, 0x01, 0x7c, 0x29, 0xc4,
- 0x04, 0x6e, 0x01, 0x7d, 0x00, 0xc2, 0x00, 0x6e, 0x01, 0x78, 0xf8, 0x90,
- 0x01, 0x7a, 0x91, 0x99, 0x01, 0x7a, 0xb0, 0xca, 0x5f, 0x58, 0x01, 0x7c,
- 0x78, 0xcb, 0x93, 0x34, 0x01, 0x51, 0x61, 0xcc, 0x85, 0x54, 0x01, 0x51,
- 0x59, 0xc9, 0x0e, 0xac, 0x01, 0x51, 0x51, 0xcb, 0x53, 0xfe, 0x01, 0x51,
- 0x48, 0x95, 0x0f, 0x46, 0x89, 0xca, 0x6f, 0x20, 0x0f, 0x46, 0xa0, 0xc7,
- 0x0c, 0x4b, 0x08, 0x4e, 0xd3, 0x03, 0x46, 0x2b, 0xc8, 0x50, 0x00, 0x08,
- 0x4f, 0x18, 0xc7, 0x0c, 0x4b, 0x08, 0x4e, 0xcb, 0x03, 0x46, 0x31, 0xc8,
- 0x50, 0x00, 0x08, 0x4f, 0x10, 0x00, 0xc3, 0x46, 0x37, 0xc2, 0x0c, 0x57,
- 0x08, 0x4e, 0x7a, 0x03, 0x46, 0x46, 0x00, 0xc3, 0x46, 0x4c, 0xc2, 0x0c,
- 0x57, 0x08, 0x4e, 0x72, 0x03, 0x46, 0x5b, 0xc2, 0x00, 0x7b, 0x08, 0x4e,
- 0x23, 0x03, 0x46, 0x61, 0xc3, 0x43, 0xcd, 0x08, 0x4e, 0x6a, 0x03, 0x46,
- 0x65, 0xc2, 0x02, 0x53, 0x08, 0x4e, 0x1b, 0x03, 0x46, 0x6b, 0xc3, 0x0c,
- 0x56, 0x08, 0x4e, 0x62, 0x03, 0x46, 0x6f, 0xc3, 0x04, 0x5f, 0x08, 0x4e,
- 0x13, 0x03, 0x46, 0x75, 0xc4, 0x0c, 0x55, 0x08, 0x4e, 0x5a, 0x03, 0x46,
- 0x79, 0x91, 0x08, 0x4e, 0x0b, 0x03, 0x46, 0x7f, 0xc4, 0x18, 0x85, 0x08,
- 0x4e, 0x52, 0x03, 0x46, 0x83, 0xc9, 0x4f, 0xff, 0x08, 0x4f, 0x20, 0xc7,
- 0x0c, 0x4b, 0x08, 0x4e, 0x93, 0x03, 0x46, 0x89, 0xc8, 0x50, 0x00, 0x08,
+ 0x87, 0x0d, 0x89, 0x09, 0x8b, 0x0d, 0x89, 0x00, 0x4d, 0x7e, 0xe9, 0xc3,
+ 0x49, 0x64, 0x43, 0x0e, 0x70, 0x43, 0x49, 0x8e, 0x94, 0x00, 0x64, 0x5b,
+ 0x03, 0x49, 0xb2, 0x8e, 0x00, 0x64, 0x62, 0x03, 0x49, 0xb6, 0xcb, 0x94,
+ 0xeb, 0x00, 0x66, 0xe8, 0x83, 0x00, 0x64, 0xf9, 0xc2, 0x01, 0x0e, 0x00,
+ 0x65, 0x00, 0x83, 0x00, 0x65, 0x09, 0xc2, 0x01, 0x0e, 0x00, 0x65, 0x10,
+ 0x83, 0x00, 0x65, 0x99, 0xc2, 0x00, 0x96, 0x00, 0x66, 0xf0, 0xc4, 0x13,
+ 0xff, 0x01, 0x7d, 0x81, 0x88, 0x01, 0x7d, 0xa0, 0x44, 0x04, 0x8d, 0x43,
+ 0x49, 0xba, 0x8a, 0x01, 0x7b, 0x59, 0xc8, 0x9a, 0x82, 0x01, 0x7d, 0x20,
+ 0xc2, 0x01, 0x0d, 0x01, 0x78, 0x19, 0xc2, 0x00, 0x29, 0x01, 0x7d, 0x50,
+ 0xc2, 0x00, 0x4d, 0x01, 0x7b, 0x69, 0xc3, 0x38, 0xf1, 0x01, 0x7c, 0xa0,
+ 0x44, 0x69, 0x7b, 0xc3, 0x49, 0xc6, 0xc2, 0x00, 0xf2, 0x01, 0x79, 0xb8,
+ 0xc2, 0x06, 0x97, 0x01, 0x7b, 0xd1, 0xc2, 0x00, 0x30, 0x01, 0x7c, 0xc8,
+ 0x92, 0x01, 0x79, 0xd9, 0xc2, 0x01, 0x02, 0x01, 0x7a, 0x98, 0x92, 0x01,
+ 0x7a, 0x63, 0x03, 0x49, 0xd2, 0xc2, 0x03, 0x5f, 0x01, 0x7b, 0x78, 0x90,
+ 0x01, 0x7c, 0x99, 0xc2, 0x00, 0xa7, 0x01, 0x7d, 0xd0, 0xc2, 0x00, 0x2b,
+ 0x01, 0x79, 0xe1, 0x86, 0x01, 0x7d, 0xc0, 0xc4, 0xe5, 0xa7, 0x01, 0x79,
+ 0xe9, 0xcc, 0x72, 0x80, 0x01, 0x7a, 0xc8, 0xc2, 0x01, 0x47, 0x01, 0x78,
+ 0xe9, 0x10, 0x43, 0x49, 0xd8, 0xc3, 0x09, 0x46, 0x01, 0x7c, 0x29, 0xc4,
+ 0x01, 0xce, 0x01, 0x7d, 0x00, 0xc2, 0x01, 0x47, 0x01, 0x78, 0xf8, 0x90,
+ 0x01, 0x7a, 0x91, 0x99, 0x01, 0x7a, 0xb0, 0xca, 0x5e, 0xe5, 0x01, 0x7c,
+ 0x78, 0xcb, 0x8f, 0x97, 0x01, 0x51, 0x61, 0xcc, 0x84, 0xfc, 0x01, 0x51,
+ 0x59, 0xc9, 0x0d, 0xd7, 0x01, 0x51, 0x51, 0xcb, 0x53, 0xc2, 0x01, 0x51,
+ 0x48, 0x95, 0x0f, 0x46, 0x89, 0xca, 0xa2, 0x9a, 0x0f, 0x46, 0xa0, 0xc7,
+ 0x0d, 0x7f, 0x08, 0x4e, 0xd3, 0x03, 0x49, 0xe2, 0xc8, 0x4f, 0xa2, 0x08,
+ 0x4f, 0x18, 0xc7, 0x0d, 0x7f, 0x08, 0x4e, 0xcb, 0x03, 0x49, 0xe8, 0xc8,
+ 0x4f, 0xa2, 0x08, 0x4f, 0x10, 0x00, 0xc3, 0x49, 0xee, 0xc2, 0x0d, 0x8b,
+ 0x08, 0x4e, 0x7a, 0x03, 0x49, 0xfd, 0x00, 0xc3, 0x4a, 0x03, 0xc2, 0x0d,
+ 0x8b, 0x08, 0x4e, 0x72, 0x03, 0x4a, 0x12, 0xc2, 0x00, 0x29, 0x08, 0x4e,
+ 0x23, 0x03, 0x4a, 0x18, 0xc3, 0x41, 0xca, 0x08, 0x4e, 0x6a, 0x03, 0x4a,
+ 0x1c, 0xc2, 0x00, 0xd3, 0x08, 0x4e, 0x1b, 0x03, 0x4a, 0x22, 0xc3, 0x0d,
+ 0x8a, 0x08, 0x4e, 0x62, 0x03, 0x4a, 0x26, 0xc3, 0x05, 0xdf, 0x08, 0x4e,
+ 0x13, 0x03, 0x4a, 0x2c, 0xc4, 0x0d, 0x89, 0x08, 0x4e, 0x5a, 0x03, 0x4a,
+ 0x30, 0x91, 0x08, 0x4e, 0x0b, 0x03, 0x4a, 0x36, 0xc4, 0x15, 0xa9, 0x08,
+ 0x4e, 0x52, 0x03, 0x4a, 0x3a, 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0x20, 0xc7,
+ 0x0d, 0x7f, 0x08, 0x4e, 0x93, 0x03, 0x4a, 0x40, 0xc8, 0x4f, 0xa2, 0x08,
0x4e, 0xd8, 0x91, 0x08, 0x4d, 0xb1, 0x87, 0x08, 0x4d, 0xa9, 0x83, 0x08,
- 0x4d, 0xa0, 0x83, 0x08, 0x4d, 0x91, 0xc2, 0x00, 0xa4, 0x08, 0x4d, 0x68,
- 0x87, 0x08, 0x4d, 0x89, 0x83, 0x08, 0x4d, 0x78, 0xc9, 0x8a, 0x4c, 0x08,
- 0x4d, 0x80, 0x87, 0x08, 0x4d, 0x51, 0x83, 0x08, 0x4d, 0x48, 0xc2, 0xe7,
- 0x79, 0x08, 0x4c, 0xd8, 0xc2, 0xe7, 0x79, 0x08, 0x4c, 0xc8, 0xc2, 0xe7,
- 0x79, 0x08, 0x4c, 0xa0, 0xc2, 0xe7, 0x79, 0x08, 0x4c, 0x58, 0xc2, 0xe7,
- 0x79, 0x08, 0x4c, 0x68, 0x49, 0x3b, 0x4d, 0xc3, 0x46, 0x8f, 0x4a, 0x2f,
- 0x8b, 0xc3, 0x46, 0x9b, 0x49, 0x41, 0x75, 0xc3, 0x46, 0xa7, 0x47, 0x53,
- 0xb4, 0x43, 0x46, 0xb3, 0x15, 0xc3, 0x46, 0xbf, 0xc2, 0x00, 0x4d, 0x00,
- 0xeb, 0xc1, 0xc9, 0xa9, 0xd6, 0x05, 0x34, 0xe0, 0x99, 0x00, 0xea, 0x11,
- 0x97, 0x00, 0xea, 0x09, 0x96, 0x00, 0xea, 0x01, 0x94, 0x00, 0xe9, 0xfb,
- 0x03, 0x46, 0xcb, 0x92, 0x00, 0xe9, 0xf1, 0x91, 0x00, 0xe9, 0xe3, 0x03,
- 0x46, 0xd1, 0x90, 0x00, 0xe9, 0xd1, 0x8f, 0x00, 0xe9, 0xc9, 0x8e, 0x00,
- 0xe9, 0xc1, 0x8d, 0x00, 0xe9, 0xb9, 0x8c, 0x00, 0xe9, 0xb1, 0x8b, 0x00,
- 0xe9, 0xa9, 0x8a, 0x00, 0xe9, 0xa3, 0x03, 0x46, 0xd5, 0x89, 0x00, 0xe9,
- 0x99, 0x87, 0x00, 0xe9, 0x89, 0x86, 0x00, 0xe9, 0x81, 0x84, 0x00, 0xe9,
- 0x73, 0x03, 0x46, 0xdb, 0x83, 0x00, 0xe9, 0x63, 0x03, 0x46, 0xe1, 0x85,
- 0x05, 0x3f, 0x91, 0x88, 0x05, 0x3f, 0x99, 0x93, 0x05, 0x3f, 0xa1, 0x98,
- 0x01, 0x63, 0xe8, 0x43, 0x03, 0x59, 0xc3, 0x46, 0xe5, 0x44, 0x10, 0xf1,
- 0x43, 0x46, 0xfd, 0xcf, 0x63, 0xf8, 0x00, 0x16, 0x91, 0xce, 0x0f, 0x34,
- 0x00, 0x16, 0x98, 0x96, 0x00, 0xea, 0xbb, 0x03, 0x47, 0x15, 0x87, 0x00,
- 0xea, 0x4b, 0x03, 0x47, 0x42, 0x9c, 0x00, 0xed, 0xdb, 0x03, 0x47, 0x5a,
- 0x98, 0x00, 0xea, 0xdb, 0x03, 0x47, 0x60, 0x85, 0x00, 0xec, 0xe3, 0x03,
- 0x47, 0x66, 0x97, 0x00, 0xea, 0xc3, 0x03, 0x47, 0x7e, 0x95, 0x00, 0x17,
- 0x13, 0x03, 0x47, 0x88, 0x92, 0x00, 0xea, 0xb3, 0x03, 0x47, 0x98, 0x84,
- 0x00, 0xea, 0x3b, 0x03, 0x47, 0x9e, 0x47, 0x00, 0xce, 0xc3, 0x47, 0xb6,
- 0x8f, 0x00, 0xea, 0x83, 0x03, 0x47, 0xc2, 0x8e, 0x00, 0x17, 0x0b, 0x03,
- 0x47, 0xc8, 0x8c, 0x00, 0x15, 0x93, 0x03, 0x47, 0xe9, 0x0b, 0xc3, 0x47,
- 0xef, 0x86, 0x00, 0xea, 0x43, 0x03, 0x47, 0xfb, 0x88, 0x00, 0xed, 0x03,
- 0x03, 0x48, 0x17, 0x94, 0x00, 0x15, 0x9b, 0x03, 0x48, 0x1d, 0x89, 0x00,
- 0xea, 0x6b, 0x03, 0x48, 0x2f, 0x83, 0x00, 0xea, 0x1b, 0x03, 0x48, 0x41,
- 0x91, 0x00, 0xea, 0x93, 0x03, 0x48, 0x51, 0x8d, 0x00, 0xea, 0x79, 0x8a,
- 0x00, 0x15, 0x83, 0x03, 0x48, 0x5d, 0x99, 0x00, 0x15, 0xb9, 0x9b, 0x00,
- 0x15, 0xc1, 0x9a, 0x00, 0x17, 0x19, 0x93, 0x08, 0x3d, 0x28, 0xd5, 0x37,
- 0x81, 0x08, 0x3c, 0x11, 0xd0, 0x37, 0x86, 0x08, 0x3c, 0x08, 0xc9, 0x3b,
- 0x75, 0x05, 0x39, 0x01, 0xc8, 0xaf, 0xa4, 0x05, 0x39, 0x08, 0xc3, 0x71,
- 0x67, 0x00, 0x17, 0xe9, 0xcf, 0x67, 0x31, 0x05, 0x3c, 0x50, 0xc4, 0x36,
- 0xab, 0x05, 0x5b, 0x59, 0xc9, 0x0f, 0x39, 0x00, 0x15, 0xf1, 0xc9, 0x03,
- 0x7e, 0x00, 0x16, 0x18, 0x47, 0x10, 0x91, 0xc3, 0x48, 0x6c, 0x16, 0x43,
- 0x48, 0x7b, 0xc8, 0x49, 0x92, 0x05, 0x38, 0xd9, 0xca, 0x3c, 0xdd, 0x05,
- 0x38, 0xe1, 0xd0, 0x0f, 0xfb, 0x05, 0x38, 0xe9, 0xd9, 0x1e, 0x8d, 0x05,
- 0x38, 0xf1, 0xc5, 0x37, 0x91, 0x00, 0x17, 0xc0, 0xc4, 0x36, 0xab, 0x05,
- 0x5b, 0x51, 0xc9, 0x0f, 0x39, 0x00, 0x15, 0xf9, 0xc9, 0x03, 0x7e, 0x00,
- 0x16, 0x10, 0x00, 0xc3, 0x48, 0x81, 0xd5, 0x35, 0xc8, 0x05, 0x38, 0xd0,
- 0xcc, 0x21, 0x84, 0x08, 0x3d, 0x98, 0xc9, 0x3b, 0x75, 0x00, 0x17, 0xc9,
- 0xc8, 0xaf, 0xa4, 0x00, 0x17, 0xd8, 0x45, 0x06, 0xf3, 0xc3, 0x48, 0xc1,
- 0x43, 0x05, 0x65, 0xc3, 0x48, 0xcd, 0x42, 0x00, 0xed, 0x43, 0x48, 0xd9,
- 0xc9, 0x03, 0x7e, 0x00, 0x16, 0x21, 0xc4, 0x36, 0xab, 0x00, 0x16, 0xa0,
- 0x06, 0xc3, 0x48, 0xeb, 0xc8, 0x61, 0x65, 0x00, 0x16, 0xb8, 0x45, 0x05,
- 0x2b, 0xc3, 0x48, 0xf5, 0x44, 0x08, 0x76, 0x43, 0x49, 0x07, 0xc9, 0x3b,
- 0x75, 0x00, 0x17, 0xd1, 0xc8, 0xaf, 0xa4, 0x00, 0x17, 0xe0, 0x47, 0x1b,
- 0xf3, 0xc3, 0x49, 0x19, 0xd2, 0x4a, 0x46, 0x05, 0x38, 0x99, 0xc8, 0x4a,
- 0x50, 0x00, 0x17, 0x30, 0xc4, 0x18, 0x83, 0x08, 0xb2, 0xb9, 0xc2, 0x26,
- 0x51, 0x08, 0xb2, 0xb0, 0xc3, 0x0c, 0x5b, 0x08, 0xb2, 0xa9, 0xc3, 0x06,
- 0x9e, 0x08, 0xb2, 0xa0, 0xc4, 0x04, 0x5e, 0x08, 0xb2, 0x99, 0xc2, 0x01,
- 0x47, 0x08, 0xb2, 0x90, 0x8e, 0x08, 0xb1, 0xc0, 0x94, 0x08, 0xb1, 0xb0,
- 0x8e, 0x08, 0xb0, 0x43, 0x03, 0x49, 0x25, 0x94, 0x08, 0xb0, 0x32, 0x03,
- 0x49, 0x29, 0xc2, 0x00, 0xa4, 0x08, 0xb0, 0xd9, 0x83, 0x08, 0xb0, 0xd0,
- 0xc2, 0x00, 0xa4, 0x08, 0xb0, 0xc9, 0x83, 0x08, 0xb0, 0xc0, 0xc3, 0x8c,
- 0x67, 0x00, 0xc5, 0x51, 0xc3, 0x22, 0x7b, 0x00, 0xc5, 0x41, 0x1c, 0xc3,
- 0x49, 0x2d, 0x05, 0xc3, 0x49, 0x37, 0xc3, 0x1a, 0x80, 0x00, 0xc5, 0x11,
- 0x06, 0xc3, 0x49, 0x41, 0x16, 0xc3, 0x49, 0x4d, 0xc3, 0xe7, 0x6c, 0x00,
- 0xc4, 0xe9, 0xc3, 0x6b, 0x49, 0x00, 0xc4, 0xd9, 0xc3, 0x4a, 0xb1, 0x00,
- 0xc4, 0xd0, 0x83, 0x00, 0xc4, 0x8b, 0x03, 0x49, 0x57, 0xc2, 0x0b, 0xc6,
- 0x00, 0xc4, 0x70, 0xc2, 0x1d, 0x5f, 0x00, 0xc5, 0x39, 0x97, 0x00, 0xc5,
- 0x30, 0x8a, 0x00, 0xc4, 0xb9, 0xcb, 0x92, 0x84, 0x00, 0xc4, 0x00, 0x83,
- 0x00, 0xc4, 0xb1, 0xc2, 0x00, 0xa4, 0x00, 0xc4, 0xa8, 0xc2, 0x00, 0xa4,
- 0x00, 0xc4, 0x99, 0x83, 0x00, 0xc4, 0x90, 0x83, 0x00, 0xc4, 0x81, 0x16,
- 0xc3, 0x49, 0x63, 0xcb, 0x8f, 0xfb, 0x00, 0xc4, 0x30, 0xc2, 0x00, 0xc1,
- 0x00, 0xc4, 0x79, 0xc2, 0x01, 0x29, 0x00, 0xc4, 0x50, 0xcf, 0x61, 0x91,
- 0x00, 0xc4, 0x20, 0x48, 0xb0, 0x6a, 0xc3, 0x49, 0x6d, 0xc2, 0x00, 0x35,
- 0x00, 0xc2, 0x50, 0xc2, 0x02, 0xb4, 0x00, 0xc2, 0xe1, 0x83, 0x00, 0xc2,
- 0x88, 0xc2, 0x01, 0xe9, 0x00, 0xc2, 0xd1, 0x83, 0x00, 0xc2, 0x98, 0x83,
- 0x00, 0xc2, 0xc0, 0xc2, 0x0c, 0x65, 0x00, 0xc2, 0xa1, 0x83, 0x00, 0xc2,
- 0x80, 0x87, 0x00, 0xc2, 0x48, 0x87, 0x00, 0xc2, 0x40, 0xc2, 0x00, 0xa4,
- 0x00, 0xc3, 0x91, 0x83, 0x00, 0xc3, 0x78, 0xc2, 0x0c, 0x65, 0x00, 0xc3,
- 0x71, 0x83, 0x00, 0xc3, 0x40, 0x83, 0x00, 0xc3, 0x68, 0x83, 0x00, 0xc3,
- 0x60, 0x87, 0x00, 0xc3, 0x00, 0x9b, 0x00, 0xc2, 0xf8, 0xc3, 0x10, 0x99,
- 0x0e, 0xb7, 0xd1, 0xc5, 0xde, 0x58, 0x0e, 0xb7, 0x80, 0xc7, 0x00, 0x70,
- 0x0e, 0xb7, 0x98, 0xc3, 0x10, 0x99, 0x0e, 0xb8, 0xa1, 0xc5, 0xde, 0x58,
- 0x0e, 0xb8, 0x50, 0x8c, 0x0e, 0xb5, 0x29, 0x8b, 0x0e, 0xb5, 0x20, 0xc3,
- 0x05, 0xe7, 0x0e, 0xb6, 0x38, 0x8b, 0x0e, 0xb6, 0x78, 0xc6, 0x12, 0x12,
- 0x0e, 0xb6, 0xb0, 0xc6, 0x52, 0x38, 0x0e, 0xbe, 0x59, 0xc4, 0xd7, 0x88,
- 0x0e, 0xb6, 0x28, 0x0f, 0x43, 0x49, 0x79, 0xc2, 0x00, 0xcb, 0x0e, 0xb6,
- 0xc9, 0xc2, 0x00, 0x0a, 0x0e, 0xb6, 0xb9, 0x8b, 0x0e, 0xb6, 0x88, 0xc2,
- 0x00, 0x0a, 0x0e, 0xb6, 0xc0, 0xc2, 0x13, 0xa9, 0x0e, 0xb6, 0xa9, 0xc4,
- 0x89, 0x81, 0x0e, 0xb6, 0x48, 0xc4, 0x1c, 0x84, 0x0e, 0xb6, 0xa0, 0xca,
- 0x92, 0xd2, 0x0e, 0xb6, 0x98, 0xc2, 0x00, 0xf6, 0x0e, 0xb6, 0x90, 0x97,
- 0x0e, 0xb6, 0x70, 0x97, 0x0e, 0xb6, 0x68, 0xc4, 0xda, 0xda, 0x0e, 0xb6,
- 0x60, 0xc4, 0x8b, 0x49, 0x0e, 0xb6, 0x58, 0xc3, 0x00, 0xb2, 0x0e, 0xb6,
- 0x50, 0xc2, 0x00, 0xb3, 0x0e, 0xb6, 0x41, 0xc6, 0x12, 0x12, 0x0e, 0xb6,
- 0x30, 0xc4, 0x32, 0x6d, 0x0e, 0xb6, 0x20, 0xc3, 0x05, 0xe7, 0x0e, 0xb6,
- 0x18, 0xc4, 0xd9, 0x9f, 0x0e, 0xb6, 0x10, 0x9c, 0x0e, 0xa8, 0x19, 0x9b,
- 0x0e, 0xa8, 0x11, 0x9a, 0x0e, 0xa8, 0x09, 0x99, 0x0e, 0xa8, 0x01, 0x98,
- 0x0e, 0xa7, 0xf9, 0x97, 0x0e, 0xa7, 0xf1, 0x96, 0x0e, 0xa7, 0xe9, 0x95,
- 0x0e, 0xa7, 0xe1, 0x94, 0x0e, 0xa7, 0xd9, 0x93, 0x0e, 0xa7, 0xd1, 0x92,
- 0x0e, 0xa7, 0xc9, 0x91, 0x0e, 0xa7, 0xc1, 0x90, 0x0e, 0xa7, 0xb9, 0x8f,
- 0x0e, 0xa7, 0xb1, 0x8e, 0x0e, 0xa7, 0xa9, 0x8d, 0x0e, 0xa7, 0xa1, 0x8c,
- 0x0e, 0xa7, 0x99, 0x8b, 0x0e, 0xa7, 0x91, 0x8a, 0x0e, 0xa7, 0x89, 0x89,
- 0x0e, 0xa7, 0x81, 0x88, 0x0e, 0xa7, 0x79, 0x87, 0x0e, 0xa7, 0x71, 0x86,
- 0x0e, 0xa7, 0x69, 0x85, 0x0e, 0xa7, 0x61, 0x84, 0x0e, 0xa7, 0x59, 0x83,
- 0x0e, 0xa7, 0x50, 0x9c, 0x0e, 0xa7, 0x49, 0x9b, 0x0e, 0xa7, 0x41, 0x9a,
- 0x0e, 0xa7, 0x39, 0x99, 0x0e, 0xa7, 0x31, 0x98, 0x0e, 0xa7, 0x29, 0x97,
- 0x0e, 0xa7, 0x21, 0x96, 0x0e, 0xa7, 0x19, 0x95, 0x0e, 0xa7, 0x11, 0x94,
- 0x0e, 0xa7, 0x09, 0x93, 0x0e, 0xa7, 0x01, 0x92, 0x0e, 0xa6, 0xf9, 0x91,
- 0x0e, 0xa6, 0xf1, 0x90, 0x0e, 0xa6, 0xe9, 0x8f, 0x0e, 0xa6, 0xe1, 0x8e,
- 0x0e, 0xa6, 0xd9, 0x8d, 0x0e, 0xa6, 0xd1, 0x8c, 0x0e, 0xa6, 0xc9, 0x8b,
- 0x0e, 0xa6, 0xc1, 0x8a, 0x0e, 0xa6, 0xb9, 0x89, 0x0e, 0xa6, 0xb1, 0x88,
- 0x0e, 0xa6, 0xa9, 0x87, 0x0e, 0xa6, 0xa1, 0x86, 0x0e, 0xa6, 0x99, 0x85,
- 0x0e, 0xa6, 0x91, 0x84, 0x0e, 0xa6, 0x89, 0x83, 0x0e, 0xa6, 0x80, 0xc3,
- 0x10, 0x99, 0x0e, 0xb6, 0x01, 0xc5, 0xde, 0x58, 0x0e, 0xb5, 0xb0, 0xc7,
- 0x00, 0x70, 0x0e, 0xb5, 0xc8, 0x0f, 0x43, 0x49, 0x85, 0xc2, 0x00, 0xcb,
- 0x0e, 0xba, 0x69, 0xc2, 0x00, 0x0a, 0x0e, 0xba, 0x59, 0x8b, 0x0e, 0xba,
- 0x28, 0xc2, 0x00, 0x0a, 0x0e, 0xba, 0x60, 0xc6, 0x12, 0x12, 0x0e, 0xba,
- 0x50, 0xc2, 0x13, 0xa9, 0x0e, 0xba, 0x49, 0xc4, 0x89, 0x81, 0x0e, 0xb9,
- 0xe8, 0xc4, 0x1c, 0x84, 0x0e, 0xba, 0x40, 0xca, 0x92, 0xd2, 0x0e, 0xba,
- 0x38, 0xc2, 0x00, 0xf6, 0x0e, 0xba, 0x30, 0x8b, 0x0e, 0xba, 0x18, 0x97,
- 0x0e, 0xba, 0x10, 0x97, 0x0e, 0xba, 0x08, 0xc4, 0xda, 0xda, 0x0e, 0xba,
- 0x00, 0xc4, 0x8b, 0x49, 0x0e, 0xb9, 0xf8, 0xc3, 0x00, 0xb2, 0x0e, 0xb9,
- 0xf0, 0xc2, 0x00, 0xb3, 0x0e, 0xb9, 0xe1, 0xc6, 0x12, 0x12, 0x0e, 0xb9,
- 0xd0, 0xc3, 0x05, 0xe7, 0x0e, 0xb9, 0xd8, 0xc4, 0xd7, 0x88, 0x0e, 0xb9,
- 0xc8, 0xc4, 0x32, 0x6d, 0x0e, 0xb9, 0xc0, 0xc3, 0x05, 0xe7, 0x0e, 0xb9,
- 0xb8, 0xc4, 0xd9, 0x9f, 0x0e, 0xb9, 0xb0, 0x0f, 0x43, 0x49, 0x91, 0xc2,
- 0x00, 0xcb, 0x0e, 0xb9, 0x99, 0xc2, 0x00, 0x0a, 0x0e, 0xb9, 0x89, 0x8b,
- 0x0e, 0xb9, 0x58, 0xc2, 0x00, 0x0a, 0x0e, 0xb9, 0x90, 0xc6, 0x12, 0x12,
- 0x0e, 0xb9, 0x80, 0xc2, 0x13, 0xa9, 0x0e, 0xb9, 0x79, 0xc4, 0x89, 0x81,
- 0x0e, 0xb9, 0x1a, 0x03, 0x49, 0x9d, 0xc4, 0x1c, 0x84, 0x0e, 0xb9, 0x70,
- 0xc2, 0x00, 0xf6, 0x0e, 0xb9, 0x60, 0x8b, 0x0e, 0xb9, 0x48, 0x97, 0x0e,
- 0xb9, 0x40, 0x97, 0x0e, 0xb9, 0x38, 0xc4, 0xda, 0xda, 0x0e, 0xb9, 0x30,
- 0xc4, 0x8b, 0x49, 0x0e, 0xb9, 0x28, 0xc3, 0x00, 0xb2, 0x0e, 0xb9, 0x20,
- 0xc2, 0x00, 0xb3, 0x0e, 0xb9, 0x11, 0xc6, 0x12, 0x12, 0x0e, 0xb9, 0x00,
- 0xc3, 0x05, 0xe7, 0x0e, 0xb9, 0x08, 0xc4, 0xd7, 0x88, 0x0e, 0xb8, 0xf8,
- 0xc4, 0x32, 0x6d, 0x0e, 0xb8, 0xf0, 0xc3, 0x05, 0xe7, 0x0e, 0xb8, 0xe8,
- 0xc4, 0xd9, 0x9f, 0x0e, 0xb8, 0xe0, 0xc4, 0x22, 0x71, 0x0e, 0xbf, 0xa9,
- 0xc5, 0x01, 0xdb, 0x0e, 0xbf, 0xa1, 0x15, 0xc3, 0x49, 0xa3, 0x08, 0xc3,
- 0x49, 0xaf, 0x16, 0xc3, 0x49, 0xbb, 0xc3, 0x01, 0xb4, 0x0e, 0xbf, 0x69,
- 0xc4, 0x15, 0xd3, 0x0e, 0xbf, 0x60, 0x12, 0xc3, 0x49, 0xc7, 0xca, 0xa6,
- 0xda, 0x0e, 0xbe, 0x41, 0xcc, 0x8b, 0x48, 0x0e, 0xbe, 0x31, 0xcc, 0x89,
- 0x80, 0x0e, 0xbe, 0x29, 0xce, 0x12, 0x11, 0x0e, 0xbe, 0x21, 0x46, 0x04,
- 0x73, 0xc3, 0x49, 0xd9, 0xc5, 0xdc, 0x87, 0x0e, 0xbd, 0x49, 0x48, 0x07,
- 0x17, 0x43, 0x4a, 0x7d, 0xc8, 0x9d, 0xa4, 0x0e, 0xbc, 0x79, 0xc9, 0xaa,
- 0xa5, 0x0e, 0xbc, 0x69, 0xd3, 0x45, 0x3e, 0x0e, 0xbc, 0x48, 0x91, 0x0e,
- 0xaf, 0xe3, 0x03, 0x4b, 0x1e, 0x92, 0x0e, 0xaf, 0xeb, 0x03, 0x4b, 0x22,
- 0x85, 0x0e, 0xaf, 0x83, 0x03, 0x4b, 0x32, 0x97, 0x0e, 0xb0, 0x13, 0x03,
- 0x4b, 0x38, 0x96, 0x0e, 0xb0, 0x0b, 0x03, 0x4b, 0x3e, 0x95, 0x0e, 0xb0,
- 0x03, 0x03, 0x4b, 0x4a, 0x88, 0x0e, 0xaf, 0x9b, 0x03, 0x4b, 0x50, 0x94,
- 0x0e, 0xaf, 0xfb, 0x03, 0x4b, 0x56, 0x9a, 0x0e, 0xb0, 0x2b, 0x03, 0x4b,
- 0x5c, 0x90, 0x0e, 0xaf, 0xdb, 0x03, 0x4b, 0x60, 0x8f, 0x0e, 0xaf, 0xd3,
- 0x03, 0x4b, 0x64, 0x8e, 0x0e, 0xaf, 0xcb, 0x03, 0x4b, 0x68, 0x8d, 0x0e,
- 0xaf, 0xc3, 0x03, 0x4b, 0x6e, 0x8b, 0x0e, 0xaf, 0xb3, 0x03, 0x4b, 0x74,
- 0x87, 0x0e, 0xaf, 0x93, 0x03, 0x4b, 0x7a, 0x9c, 0x0e, 0xb0, 0x3b, 0x03,
- 0x4b, 0x86, 0x86, 0x0e, 0xaf, 0x8b, 0x03, 0x4b, 0x8c, 0x89, 0x0e, 0xaf,
- 0xa3, 0x03, 0x4b, 0x92, 0x84, 0x0e, 0xaf, 0x7b, 0x03, 0x4b, 0x98, 0x83,
- 0x0e, 0xaf, 0x73, 0x03, 0x4b, 0x9e, 0x9b, 0x0e, 0xb0, 0x31, 0x99, 0x0e,
- 0xb0, 0x21, 0x98, 0x0e, 0xb0, 0x19, 0x93, 0x0e, 0xaf, 0xf1, 0x8c, 0x0e,
- 0xaf, 0xb9, 0x8a, 0x0e, 0xaf, 0xa8, 0x91, 0x0e, 0xaf, 0x13, 0x03, 0x4b,
- 0xa4, 0x92, 0x0e, 0xaf, 0x1b, 0x03, 0x4b, 0xa8, 0x85, 0x0e, 0xae, 0xb3,
- 0x03, 0x4b, 0xb8, 0x97, 0x0e, 0xaf, 0x43, 0x03, 0x4b, 0xbe, 0x96, 0x0e,
- 0xaf, 0x3b, 0x03, 0x4b, 0xc4, 0x95, 0x0e, 0xaf, 0x33, 0x03, 0x4b, 0xd3,
- 0x94, 0x0e, 0xaf, 0x2b, 0x03, 0x4b, 0xd9, 0x9a, 0x0e, 0xaf, 0x5b, 0x03,
- 0x4b, 0xdf, 0x90, 0x0e, 0xaf, 0x0b, 0x03, 0x4b, 0xe3, 0x8f, 0x0e, 0xaf,
- 0x03, 0x03, 0x4b, 0xe7, 0x8e, 0x0e, 0xae, 0xfb, 0x03, 0x4b, 0xeb, 0x8d,
- 0x0e, 0xae, 0xf3, 0x03, 0x4b, 0xf1, 0x8b, 0x0e, 0xae, 0xe3, 0x03, 0x4b,
- 0xf7, 0x87, 0x0e, 0xae, 0xc3, 0x03, 0x4b, 0xfd, 0x9c, 0x0e, 0xaf, 0x6b,
- 0x03, 0x4c, 0x09, 0x86, 0x0e, 0xae, 0xbb, 0x03, 0x4c, 0x0f, 0x89, 0x0e,
- 0xae, 0xd3, 0x03, 0x4c, 0x15, 0x84, 0x0e, 0xae, 0xab, 0x03, 0x4c, 0x1b,
- 0x83, 0x0e, 0xae, 0xa3, 0x03, 0x4c, 0x21, 0x9b, 0x0e, 0xaf, 0x61, 0x99,
- 0x0e, 0xaf, 0x51, 0x98, 0x0e, 0xaf, 0x49, 0x93, 0x0e, 0xaf, 0x21, 0x8c,
- 0x0e, 0xae, 0xe9, 0x8a, 0x0e, 0xae, 0xd9, 0x88, 0x0e, 0xae, 0xc8, 0xc4,
- 0x18, 0x83, 0x0e, 0xbf, 0x49, 0xc2, 0x26, 0x51, 0x0e, 0xbf, 0x40, 0xc3,
- 0x0c, 0x5b, 0x0e, 0xbf, 0x39, 0xc3, 0x06, 0x9e, 0x0e, 0xbf, 0x30, 0xc4,
- 0x04, 0x5e, 0x0e, 0xbf, 0x29, 0xc2, 0x01, 0x47, 0x0e, 0xbf, 0x20, 0x9c,
- 0x0e, 0xb1, 0xd9, 0x9b, 0x0e, 0xb1, 0xd1, 0x9a, 0x0e, 0xb1, 0xc9, 0x99,
- 0x0e, 0xb1, 0xc1, 0x98, 0x0e, 0xb1, 0xb9, 0x97, 0x0e, 0xb1, 0xb1, 0x96,
- 0x0e, 0xb1, 0xa9, 0x95, 0x0e, 0xb1, 0xa1, 0x94, 0x0e, 0xb1, 0x99, 0x93,
- 0x0e, 0xb1, 0x91, 0x92, 0x0e, 0xb1, 0x89, 0x91, 0x0e, 0xb1, 0x81, 0x90,
- 0x0e, 0xb1, 0x79, 0x8f, 0x0e, 0xb1, 0x71, 0x8e, 0x0e, 0xb1, 0x69, 0x8d,
- 0x0e, 0xb1, 0x61, 0x8c, 0x0e, 0xb1, 0x59, 0x8b, 0x0e, 0xb1, 0x51, 0x8a,
- 0x0e, 0xb1, 0x49, 0x89, 0x0e, 0xb1, 0x41, 0x88, 0x0e, 0xb1, 0x39, 0x87,
- 0x0e, 0xb1, 0x31, 0x86, 0x0e, 0xb1, 0x29, 0x85, 0x0e, 0xb1, 0x21, 0x84,
- 0x0e, 0xb1, 0x19, 0x83, 0x0e, 0xb1, 0x10, 0x9c, 0x0e, 0xb1, 0x09, 0x9b,
- 0x0e, 0xb1, 0x01, 0x9a, 0x0e, 0xb0, 0xf9, 0x99, 0x0e, 0xb0, 0xf1, 0x98,
- 0x0e, 0xb0, 0xe9, 0x97, 0x0e, 0xb0, 0xe1, 0x96, 0x0e, 0xb0, 0xd9, 0x95,
- 0x0e, 0xb0, 0xd1, 0x94, 0x0e, 0xb0, 0xc9, 0x93, 0x0e, 0xb0, 0xc1, 0x92,
- 0x0e, 0xb0, 0xb9, 0x91, 0x0e, 0xb0, 0xb1, 0x90, 0x0e, 0xb0, 0xa9, 0x8f,
- 0x0e, 0xb0, 0xa1, 0x8e, 0x0e, 0xb0, 0x99, 0x8d, 0x0e, 0xb0, 0x91, 0x8c,
- 0x0e, 0xb0, 0x89, 0x8b, 0x0e, 0xb0, 0x81, 0x8a, 0x0e, 0xb0, 0x79, 0x89,
- 0x0e, 0xb0, 0x71, 0x88, 0x0e, 0xb0, 0x69, 0x87, 0x0e, 0xb0, 0x61, 0x86,
- 0x0e, 0xb0, 0x59, 0x85, 0x0e, 0xb0, 0x51, 0x84, 0x0e, 0xb0, 0x49, 0x83,
- 0x0e, 0xb0, 0x40, 0xc2, 0x00, 0xa4, 0x08, 0xe5, 0x19, 0x83, 0x08, 0xe5,
- 0x10, 0x94, 0x00, 0x6b, 0x00, 0x8e, 0x00, 0x6b, 0x08, 0x8f, 0x00, 0x6a,
- 0xa1, 0x9b, 0x00, 0x6a, 0xa9, 0x8e, 0x00, 0x6b, 0xeb, 0x03, 0x4c, 0x27,
- 0x90, 0x00, 0x6b, 0xdb, 0x03, 0x4c, 0x2e, 0xc2, 0x01, 0x10, 0x00, 0x6b,
- 0xe1, 0x8d, 0x00, 0x6b, 0xf8, 0xc2, 0x00, 0xa4, 0x08, 0x8b, 0x09, 0x83,
- 0x08, 0x8b, 0x00, 0xc2, 0x00, 0xa4, 0x08, 0x8a, 0xf9, 0x83, 0x08, 0x8a,
- 0xf0, 0xc4, 0x5c, 0x7c, 0x0e, 0x8f, 0x51, 0x46, 0xd1, 0xb3, 0x43, 0x4c,
- 0x32, 0xc3, 0x00, 0xf7, 0x0e, 0x8f, 0x49, 0xc8, 0xbc, 0xdd, 0x0e, 0x8e,
- 0xb3, 0x03, 0x4c, 0x58, 0x46, 0x1e, 0xfc, 0xc3, 0x4c, 0x5e, 0x07, 0xc3,
- 0x4c, 0x68, 0xc5, 0xd7, 0xaa, 0x0e, 0x8c, 0x69, 0x0b, 0xc3, 0x4c, 0x74,
- 0x0a, 0x43, 0x4c, 0x7e, 0x07, 0xc3, 0x4c, 0x8a, 0x11, 0xc3, 0x4c, 0x96,
- 0xc4, 0xe1, 0x57, 0x0e, 0x8c, 0x79, 0xd3, 0x40, 0xf0, 0x0e, 0x8a, 0xb1,
- 0xcc, 0x8a, 0x94, 0x0e, 0x8a, 0x20, 0xc7, 0xc6, 0xf8, 0x0e, 0x8e, 0xc3,
- 0x03, 0x4c, 0xa5, 0x46, 0xd3, 0x69, 0xc3, 0x4c, 0xab, 0xc3, 0x06, 0xff,
- 0x0e, 0x8c, 0xbb, 0x03, 0x4c, 0xb7, 0x94, 0x0e, 0x8c, 0xb3, 0x03, 0x4c,
- 0xbb, 0x0a, 0xc3, 0x4c, 0xc1, 0xcd, 0x7d, 0xd7, 0x0e, 0x88, 0xb8, 0x0e,
- 0xc3, 0x4c, 0xcd, 0x14, 0xc3, 0x4c, 0xd7, 0x11, 0xc3, 0x4c, 0xe3, 0xd0,
- 0x5c, 0x52, 0x0e, 0x8a, 0x29, 0xc7, 0xc8, 0x3a, 0x0e, 0x89, 0xa9, 0xc5,
- 0xb1, 0x32, 0x0e, 0x89, 0x09, 0xc6, 0xd3, 0xd5, 0x0e, 0x88, 0x98, 0xc4,
- 0x09, 0xee, 0x0e, 0x8e, 0x99, 0xcc, 0x85, 0xcc, 0x0e, 0x8a, 0xb8, 0x14,
- 0xc3, 0x4c, 0xed, 0x49, 0xaf, 0xe2, 0xc3, 0x4c, 0xf9, 0xc5, 0xb1, 0x32,
- 0x0e, 0x88, 0xf2, 0x03, 0x4d, 0x05, 0xc5, 0xc1, 0x96, 0x0e, 0x8d, 0xdb,
- 0x03, 0x4d, 0x0b, 0xc5, 0xc6, 0xb9, 0x0e, 0x8d, 0xb1, 0xc4, 0xe1, 0xab,
- 0x0e, 0x8c, 0x81, 0x4d, 0x7c, 0x44, 0xc3, 0x4d, 0x0f, 0x44, 0x1e, 0xa7,
- 0x43, 0x4d, 0x1b, 0x14, 0xc3, 0x4d, 0x27, 0x45, 0x3c, 0xb7, 0x43, 0x4d,
- 0x31, 0xc4, 0x6b, 0x48, 0x0e, 0x8d, 0xbb, 0x03, 0x4d, 0x49, 0xcf, 0x66,
- 0x14, 0x0e, 0x88, 0x30, 0x44, 0xe0, 0x1f, 0xc3, 0x4d, 0x4d, 0x11, 0xc3,
- 0x4d, 0x59, 0x0b, 0xc3, 0x4d, 0x65, 0x44, 0xb3, 0x5e, 0xc3, 0x4d, 0x6f,
- 0xc5, 0xb1, 0x32, 0x0e, 0x89, 0x13, 0x03, 0x4d, 0x7b, 0xc6, 0xd3, 0xbd,
- 0x0e, 0x88, 0x82, 0x03, 0x4d, 0x81, 0x03, 0xc3, 0x4d, 0x87, 0x07, 0xc3,
- 0x4d, 0xa2, 0x46, 0x06, 0xf2, 0xc3, 0x4d, 0xae, 0x49, 0xb1, 0x2f, 0x43,
- 0x4d, 0xc0, 0xcf, 0x60, 0xa1, 0x0e, 0x8d, 0x99, 0x45, 0xa8, 0x6b, 0x43,
- 0x4d, 0xc8, 0x43, 0x03, 0x2d, 0xc3, 0x4d, 0xd4, 0xc9, 0xb4, 0x59, 0x0e,
- 0x8d, 0x30, 0x43, 0x00, 0x58, 0xc3, 0x4d, 0xe6, 0x46, 0x01, 0xdc, 0x43,
- 0x4e, 0x04, 0xca, 0xa8, 0x60, 0x0e, 0x8d, 0x39, 0xcc, 0x8c, 0x38, 0x0e,
- 0x8a, 0xc9, 0xcd, 0x78, 0x5b, 0x0e, 0x8a, 0xc1, 0x47, 0x8d, 0x29, 0x43,
- 0x4e, 0x10, 0x4f, 0x68, 0xf3, 0xc3, 0x4e, 0x1c, 0x42, 0x00, 0x8c, 0xc3,
- 0x4e, 0x43, 0x46, 0xb6, 0x8f, 0x43, 0x4e, 0x4f, 0x0b, 0xc3, 0x4e, 0x5b,
- 0x07, 0x43, 0x4e, 0x67, 0xc4, 0x03, 0x68, 0x0e, 0x8c, 0x21, 0xc2, 0x01,
- 0xc7, 0x0e, 0x8c, 0x18, 0x46, 0x19, 0x0d, 0xc3, 0x4e, 0x73, 0x4b, 0x92,
- 0x16, 0x43, 0x4e, 0x85, 0x43, 0x03, 0x59, 0xc3, 0x4e, 0x91, 0x45, 0x00,
- 0x6c, 0x43, 0x4e, 0xa9, 0x9f, 0x00, 0x84, 0x59, 0xa0, 0x00, 0x84, 0x60,
- 0xc2, 0x00, 0xa4, 0x05, 0x53, 0x71, 0x83, 0x05, 0x53, 0x68, 0x83, 0x05,
- 0x53, 0x59, 0xc2, 0x1d, 0x5f, 0x05, 0x53, 0x28, 0xc2, 0x00, 0xa4, 0x05,
- 0x53, 0x51, 0x06, 0x43, 0x4e, 0xb5, 0xc2, 0x00, 0xa4, 0x05, 0x53, 0x39,
- 0x83, 0x05, 0x53, 0x30, 0xc2, 0x00, 0xa4, 0x05, 0x53, 0x21, 0x83, 0x05,
- 0x53, 0x18, 0xc2, 0x00, 0xa4, 0x05, 0x53, 0x11, 0x83, 0x05, 0x53, 0x08,
- 0xc2, 0x00, 0xa4, 0x05, 0x4f, 0xf1, 0x83, 0x05, 0x4f, 0xe8, 0xc2, 0x00,
- 0xa4, 0x05, 0x4f, 0xe1, 0x83, 0x05, 0x4f, 0xd9, 0x06, 0x43, 0x4e, 0xbf,
- 0xc2, 0x00, 0xc1, 0x05, 0x4f, 0x79, 0xc2, 0x1d, 0x5f, 0x05, 0x4f, 0x38,
- 0xc2, 0x00, 0xa4, 0x05, 0x4f, 0x61, 0x83, 0x05, 0x4f, 0x58, 0xc2, 0x00,
- 0xa4, 0x05, 0x4f, 0x51, 0x83, 0x05, 0x4f, 0x48, 0x04, 0xc3, 0x4e, 0xc9,
- 0x10, 0xc3, 0x4e, 0xd3, 0xc3, 0xe7, 0x6c, 0x05, 0x4f, 0x11, 0x83, 0x00,
- 0x81, 0x11, 0x0d, 0xc3, 0x4e, 0xe3, 0x09, 0xc3, 0x4e, 0xed, 0x05, 0xc3,
- 0x4e, 0xf7, 0xc2, 0x02, 0xb4, 0x00, 0x83, 0xc9, 0xc2, 0x0b, 0xc6, 0x00,
- 0x83, 0xd9, 0xc3, 0x17, 0x4d, 0x00, 0x83, 0xe9, 0xc2, 0x00, 0x67, 0x00,
- 0x83, 0xf1, 0xc3, 0x00, 0xa3, 0x00, 0x84, 0x01, 0xc2, 0x00, 0xa4, 0x00,
- 0x84, 0x08, 0xce, 0x29, 0x88, 0x0f, 0xd0, 0xa9, 0xdb, 0x18, 0x76, 0x0f,
- 0xd1, 0xf8, 0xd2, 0x47, 0x9a, 0x0f, 0xd0, 0x41, 0xce, 0x29, 0x88, 0x0f,
- 0xd0, 0xc9, 0xdf, 0x0c, 0x47, 0x0f, 0xd0, 0xe9, 0x16, 0x43, 0x4f, 0x01,
- 0xc7, 0x76, 0x59, 0x08, 0xa2, 0x39, 0xc7, 0x11, 0x41, 0x08, 0xa2, 0x20,
- 0xc5, 0x44, 0x7b, 0x08, 0xa2, 0x29, 0xc4, 0x0f, 0x7c, 0x08, 0xa2, 0x10,
- 0x8e, 0x08, 0xa0, 0x48, 0x94, 0x08, 0xa0, 0x38, 0x89, 0x00, 0xce, 0x10,
- 0xc2, 0x00, 0x91, 0x00, 0xcd, 0x59, 0x83, 0x00, 0xcc, 0x60, 0xc2, 0x04,
- 0x41, 0x00, 0xcd, 0x49, 0x83, 0x00, 0xcc, 0x30, 0xc2, 0x04, 0x41, 0x00,
- 0xcd, 0x41, 0x83, 0x00, 0xcc, 0x28, 0xc2, 0x00, 0xa4, 0x00, 0xcc, 0xc1,
- 0x83, 0x00, 0xcc, 0xb8, 0x83, 0x00, 0xcc, 0x99, 0xc2, 0x01, 0x29, 0x00,
- 0xcc, 0x38, 0xc2, 0x00, 0xa4, 0x00, 0xcc, 0x91, 0x83, 0x00, 0xcc, 0x89,
- 0xc2, 0x0c, 0x65, 0x00, 0xcc, 0x58, 0xc2, 0x00, 0x91, 0x00, 0xcd, 0x51,
- 0x83, 0x00, 0xcc, 0x48, 0xc2, 0x04, 0x41, 0x00, 0xcd, 0x39, 0x83, 0x00,
- 0xcc, 0x18, 0xc2, 0x04, 0x41, 0x00, 0xcd, 0x31, 0x83, 0x00, 0xcc, 0x10,
- 0xc2, 0x00, 0xa4, 0x00, 0xcc, 0xa9, 0x83, 0x00, 0xcc, 0xa0, 0x83, 0x00,
- 0xcc, 0x81, 0xc2, 0x01, 0x29, 0x00, 0xcc, 0x20, 0xc2, 0x00, 0xa4, 0x00,
- 0xcc, 0x79, 0x83, 0x00, 0xcc, 0x71, 0xc2, 0x0c, 0x65, 0x00, 0xcc, 0x40,
- 0x9b, 0x00, 0xcd, 0xf8, 0x9b, 0x00, 0xcd, 0xf0, 0x9b, 0x00, 0xcd, 0xd8,
- 0xc3, 0x18, 0x86, 0x01, 0x27, 0xa1, 0xc3, 0x21, 0x32, 0x01, 0x27, 0x60,
- 0x00, 0x43, 0x4f, 0x0d, 0x00, 0x43, 0x4f, 0x1f, 0xc7, 0x0a, 0xb9, 0x05,
- 0x41, 0x81, 0xc4, 0x03, 0x2b, 0x05, 0x41, 0x89, 0xc9, 0x6b, 0x69, 0x05,
- 0x41, 0x99, 0xc6, 0x01, 0xdb, 0x05, 0x41, 0xa0, 0xc8, 0x0a, 0xb9, 0x05,
- 0x41, 0x91, 0xca, 0xa7, 0x5c, 0x05, 0x41, 0xa8, 0xc7, 0xc2, 0x2f, 0x09,
- 0xa2, 0xa1, 0xc3, 0x00, 0xef, 0x09, 0xa2, 0x71, 0xc5, 0xa0, 0x46, 0x09,
- 0xa2, 0x42, 0x03, 0x4f, 0x37, 0xc7, 0xc2, 0x2f, 0x09, 0xa2, 0x99, 0xc5,
- 0xa0, 0x46, 0x09, 0xa2, 0x3b, 0x03, 0x4f, 0x3d, 0xc3, 0x00, 0xef, 0x09,
- 0xa2, 0x50, 0xc6, 0x07, 0x3a, 0x09, 0xa2, 0x89, 0xc3, 0x03, 0x27, 0x09,
- 0xa2, 0x68, 0xc2, 0x00, 0x4c, 0x0f, 0x3f, 0xf1, 0x8b, 0x0f, 0x3f, 0xe8,
- 0xc2, 0x00, 0x4c, 0x0f, 0x3f, 0xe1, 0x8b, 0x0f, 0x3f, 0xd8, 0x87, 0x0f,
- 0x3f, 0xd3, 0x03, 0x4f, 0x43, 0x8b, 0x0f, 0x3f, 0xc0, 0x87, 0x0f, 0x3f,
- 0xbb, 0x03, 0x4f, 0x47, 0x8b, 0x0f, 0x3f, 0xa8, 0xc2, 0x00, 0x4c, 0x0f,
- 0x3f, 0xa1, 0x8b, 0x0f, 0x3f, 0x98, 0x87, 0x0f, 0x3f, 0x93, 0x03, 0x4f,
- 0x4b, 0x8b, 0x0f, 0x3f, 0x80, 0xc2, 0x00, 0x4c, 0x0f, 0x3f, 0x71, 0x8b,
- 0x0f, 0x3f, 0x68, 0x83, 0x00, 0x98, 0xf8, 0x87, 0x01, 0x6c, 0xa8, 0x87,
- 0x0f, 0x3f, 0x50, 0x87, 0x0f, 0x3f, 0x20, 0x83, 0x0f, 0x3f, 0x18, 0x91,
- 0x05, 0x59, 0x31, 0x87, 0x05, 0x59, 0x2b, 0x03, 0x4f, 0x4f, 0x83, 0x05,
- 0x59, 0x03, 0x03, 0x4f, 0x53, 0x8b, 0x05, 0x59, 0x11, 0x97, 0x05, 0x59,
- 0x08, 0x83, 0x01, 0x6d, 0xd8, 0x87, 0x01, 0x6d, 0xe0, 0x87, 0x05, 0x58,
- 0x60, 0x83, 0x00, 0x92, 0xd8, 0x87, 0x00, 0x92, 0xe0, 0x83, 0x00, 0x96,
- 0x18, 0x87, 0x00, 0x96, 0x20, 0x83, 0x00, 0x96, 0x83, 0x03, 0x4f, 0x57,
- 0x97, 0x00, 0x96, 0x89, 0x8b, 0x00, 0x96, 0x91, 0x87, 0x00, 0x96, 0xab,
- 0x03, 0x4f, 0x5b, 0x91, 0x00, 0x96, 0xb0, 0xd1, 0x57, 0x06, 0x01, 0x4f,
- 0x20, 0xd0, 0x01, 0xf7, 0x01, 0x4b, 0x89, 0xce, 0x38, 0x53, 0x01, 0x53,
- 0x99, 0xc9, 0x68, 0x21, 0x01, 0x53, 0x89, 0xcf, 0x0b, 0x98, 0x01, 0x5a,
- 0x00, 0xe0, 0x01, 0x47, 0x01, 0x53, 0xb8, 0xa1, 0x0e, 0x92, 0x09, 0xa0,
- 0x0e, 0x92, 0x01, 0x9f, 0x0e, 0x91, 0xf9, 0x9e, 0x0e, 0x91, 0xf1, 0x9d,
- 0x0e, 0x91, 0xe8, 0xa6, 0x0e, 0x91, 0xe1, 0xa5, 0x0e, 0x91, 0xd9, 0xa4,
- 0x0e, 0x91, 0xd1, 0xa2, 0x0e, 0x91, 0xc9, 0xa0, 0x0e, 0x91, 0xc1, 0x9f,
- 0x0e, 0x91, 0xb9, 0x9d, 0x0e, 0x91, 0xb0, 0xa6, 0x0e, 0x91, 0xa9, 0xa5,
- 0x0e, 0x91, 0xa1, 0xa4, 0x0e, 0x91, 0x99, 0xa3, 0x0e, 0x91, 0x91, 0x9f,
- 0x0e, 0x91, 0x89, 0x9d, 0x0e, 0x91, 0x80, 0xa6, 0x0e, 0x91, 0x79, 0xa4,
- 0x0e, 0x91, 0x71, 0xa3, 0x0e, 0x91, 0x69, 0xa2, 0x0e, 0x91, 0x61, 0xa1,
- 0x0e, 0x91, 0x59, 0xa0, 0x0e, 0x91, 0x50, 0xa6, 0x0e, 0x91, 0x49, 0xa5,
- 0x0e, 0x91, 0x41, 0xa4, 0x0e, 0x91, 0x39, 0xa1, 0x0e, 0x91, 0x31, 0xa0,
- 0x0e, 0x91, 0x29, 0x9f, 0x0e, 0x91, 0x21, 0x9e, 0x0e, 0x91, 0x18, 0xa1,
- 0x0e, 0x90, 0xe1, 0xa0, 0x0e, 0x90, 0xd9, 0x9f, 0x0e, 0x90, 0xd1, 0x9e,
- 0x0e, 0x90, 0xc9, 0x9d, 0x0e, 0x90, 0xc0, 0xa1, 0x0e, 0x90, 0xb9, 0xa0,
- 0x0e, 0x90, 0xb1, 0x9f, 0x0e, 0x90, 0xa9, 0x9e, 0x0e, 0x90, 0xa1, 0x9d,
- 0x0e, 0x90, 0x98, 0xa6, 0x0e, 0x90, 0x91, 0xa5, 0x0e, 0x90, 0x89, 0xa4,
- 0x0e, 0x90, 0x81, 0xa3, 0x0e, 0x90, 0x79, 0xa2, 0x0e, 0x90, 0x71, 0xa1,
- 0x0e, 0x90, 0x69, 0xa0, 0x0e, 0x90, 0x61, 0x9f, 0x0e, 0x90, 0x59, 0x9e,
- 0x0e, 0x90, 0x51, 0x9d, 0x0e, 0x90, 0x48, 0xcb, 0x95, 0x4f, 0x00, 0xfe,
- 0xf9, 0xc4, 0xe5, 0x9f, 0x00, 0xfe, 0xf1, 0xc5, 0x2a, 0xae, 0x00, 0xfe,
- 0xe8, 0xc4, 0xe5, 0x9f, 0x00, 0xff, 0x71, 0xc5, 0x2a, 0xae, 0x00, 0xff,
- 0x69, 0xcb, 0x95, 0x4f, 0x00, 0xfe, 0x08, 0xcf, 0x65, 0x8d, 0x08, 0x0b,
- 0xb0, 0x42, 0x00, 0x3a, 0xc3, 0x4f, 0x5f, 0xc3, 0x33, 0x12, 0x00, 0x1d,
- 0x0b, 0x03, 0x4f, 0x71, 0xc7, 0x7d, 0x62, 0x00, 0x1d, 0x2b, 0x03, 0x4f,
- 0x77, 0xc4, 0x26, 0xf7, 0x00, 0x1c, 0xcb, 0x03, 0x4f, 0x7d, 0x07, 0xc3,
- 0x4f, 0x83, 0x03, 0xc3, 0x4f, 0x95, 0xc4, 0x89, 0x81, 0x00, 0x1b, 0x81,
- 0x12, 0xc3, 0x4f, 0xa4, 0xc3, 0xe7, 0x24, 0x00, 0x1b, 0xf9, 0xc4, 0x96,
- 0x57, 0x00, 0x1c, 0x91, 0xc5, 0x52, 0x39, 0x00, 0x1c, 0x99, 0xc5, 0xd7,
- 0x87, 0x00, 0x1c, 0xa1, 0xc4, 0xe0, 0x6b, 0x00, 0x1c, 0xb1, 0x16, 0xc3,
- 0x4f, 0xba, 0xc5, 0x8b, 0x48, 0x00, 0x1c, 0xd1, 0xc5, 0xda, 0xd9, 0x00,
- 0x1c, 0xd9, 0xc2, 0x13, 0x51, 0x00, 0x1c, 0xe1, 0xc2, 0x01, 0xc6, 0x00,
- 0x1c, 0xe9, 0xc2, 0x07, 0x49, 0x00, 0x1c, 0xf1, 0x15, 0xc3, 0x4f, 0xc6,
- 0xc3, 0x11, 0xd4, 0x00, 0x1d, 0x38, 0x42, 0x00, 0x3a, 0xc3, 0x4f, 0xd8,
- 0xc7, 0x7d, 0x62, 0x00, 0x1e, 0x2b, 0x03, 0x4f, 0xea, 0xc3, 0x33, 0x12,
- 0x00, 0x1e, 0x0b, 0x03, 0x4f, 0xf0, 0xc4, 0x26, 0xf7, 0x00, 0x1d, 0xcb,
- 0x03, 0x4f, 0xf6, 0x07, 0xc3, 0x4f, 0xfc, 0x03, 0xc3, 0x50, 0x0e, 0xc4,
- 0x89, 0x81, 0x00, 0x1b, 0x89, 0xc4, 0x96, 0x57, 0x00, 0x1d, 0x91, 0xc5,
- 0x52, 0x39, 0x00, 0x1d, 0x99, 0x06, 0xc3, 0x50, 0x1d, 0xc4, 0xe0, 0x6b,
- 0x00, 0x1d, 0xb1, 0x16, 0xc3, 0x50, 0x29, 0x0d, 0xc3, 0x50, 0x35, 0xc5,
- 0xda, 0xd9, 0x00, 0x1d, 0xd9, 0xc2, 0x13, 0x51, 0x00, 0x1d, 0xe1, 0xc2,
- 0x01, 0xc6, 0x00, 0x1d, 0xe9, 0xc2, 0x07, 0x49, 0x00, 0x1d, 0xf1, 0x12,
- 0xc3, 0x50, 0x41, 0xcb, 0x92, 0xd1, 0x00, 0x1e, 0x11, 0x15, 0xc3, 0x50,
- 0x57, 0xc3, 0x11, 0xd4, 0x00, 0x1e, 0x38, 0xd3, 0x1c, 0x7c, 0x00, 0x1b,
- 0xd9, 0xda, 0x1c, 0x75, 0x00, 0x1b, 0xe8, 0xcb, 0x95, 0x4f, 0x00, 0xfe,
- 0x79, 0xc4, 0xe5, 0x9f, 0x00, 0xfe, 0x71, 0xc5, 0x2a, 0xae, 0x00, 0xfe,
- 0x68, 0x4d, 0x35, 0xa6, 0xc3, 0x50, 0x6d, 0xc5, 0xd6, 0x33, 0x00, 0x1e,
- 0xd1, 0xc4, 0x8a, 0x54, 0x00, 0x1f, 0x00, 0xcd, 0x7f, 0x6a, 0x08, 0x0b,
- 0xc1, 0xca, 0x6c, 0xcb, 0x08, 0x0b, 0xf0, 0x44, 0x01, 0xb4, 0xc3, 0x50,
- 0x89, 0x42, 0x00, 0x60, 0xc3, 0x50, 0x9f, 0x44, 0x4f, 0xfc, 0x43, 0x50,
- 0xb1, 0xd1, 0x4f, 0x0e, 0x08, 0x0a, 0xc1, 0x48, 0xbd, 0xe5, 0x43, 0x50,
- 0xc1, 0x48, 0xbe, 0x3d, 0xc3, 0x50, 0xd3, 0x4a, 0x9c, 0xbc, 0x43, 0x50,
- 0xe6, 0xc3, 0x01, 0x59, 0x08, 0x0a, 0xdb, 0x03, 0x50, 0xf5, 0xcc, 0x37,
- 0x4b, 0x08, 0x0b, 0x60, 0xd4, 0x3c, 0xf1, 0x08, 0x0a, 0xe9, 0xd5, 0x37,
- 0x42, 0x08, 0x0b, 0x78, 0xc6, 0x0d, 0xf2, 0x01, 0x54, 0x01, 0xc5, 0x00,
- 0x95, 0x01, 0x54, 0x12, 0x03, 0x50, 0xfb, 0xc8, 0x23, 0x35, 0x01, 0x54,
- 0x71, 0xcf, 0x0c, 0x18, 0x01, 0x54, 0x80, 0xe0, 0x0a, 0x47, 0x01, 0x54,
- 0xa0, 0x8e, 0x08, 0x9b, 0x08, 0x94, 0x08, 0x9b, 0x00, 0xc6, 0x46, 0x09,
- 0x00, 0xe5, 0xf0, 0xc6, 0x46, 0x09, 0x00, 0x87, 0xf0, 0x97, 0x01, 0x60,
- 0xf9, 0x8b, 0x01, 0x61, 0x00, 0xc3, 0x89, 0x0d, 0x01, 0x61, 0x60, 0x97,
- 0x01, 0x62, 0x79, 0x8b, 0x01, 0x62, 0x80, 0xc3, 0x89, 0x0d, 0x01, 0x62,
- 0xe0, 0x94, 0x00, 0x5b, 0x00, 0x8e, 0x00, 0x5b, 0x08, 0xc7, 0x0c, 0x4b,
- 0x0f, 0x68, 0xa9, 0xc8, 0x50, 0x00, 0x0f, 0x68, 0xf0, 0xc7, 0x0c, 0x4b,
- 0x0f, 0x68, 0xa1, 0xc8, 0x50, 0x00, 0x0f, 0x68, 0xe8, 0xc7, 0x0c, 0x4b,
- 0x0f, 0x68, 0xb1, 0xc8, 0x50, 0x00, 0x0f, 0x68, 0xf8, 0xc7, 0x0c, 0x4b,
- 0x0f, 0x68, 0xb9, 0xc8, 0x50, 0x00, 0x0f, 0x69, 0x00, 0xc4, 0xd9, 0x77,
- 0x08, 0x7b, 0xd9, 0xc3, 0xdf, 0x4a, 0x08, 0x7b, 0xe8, 0xc8, 0x0c, 0x4a,
- 0x08, 0x79, 0x28, 0x0a, 0xc3, 0x51, 0x01, 0x19, 0xc3, 0x51, 0x0d, 0xc2,
- 0x00, 0x4d, 0x08, 0x79, 0x10, 0xc3, 0x0c, 0x5b, 0x08, 0x79, 0x09, 0xc3,
- 0x06, 0x9e, 0x08, 0x79, 0x00, 0x46, 0x2b, 0x13, 0xc3, 0x51, 0x17, 0xc3,
- 0x26, 0xf9, 0x08, 0x78, 0xd1, 0x15, 0xc3, 0x51, 0x44, 0xd0, 0x5d, 0xe2,
- 0x08, 0x78, 0xc1, 0xc2, 0x00, 0x27, 0x08, 0x78, 0xa1, 0x03, 0xc3, 0x51,
- 0x4e, 0xc3, 0x1f, 0xd8, 0x08, 0x78, 0x71, 0xc3, 0x0b, 0x0e, 0x08, 0x78,
- 0x69, 0xc6, 0xd0, 0x5d, 0x08, 0x78, 0x61, 0xc4, 0xe2, 0x57, 0x08, 0x78,
- 0x59, 0xc4, 0x4b, 0x98, 0x08, 0x78, 0x51, 0xc2, 0x01, 0xf0, 0x08, 0x78,
- 0x2b, 0x03, 0x51, 0x58, 0xc5, 0x4b, 0x92, 0x08, 0x78, 0x41, 0xc3, 0x78,
- 0xa9, 0x08, 0x78, 0x39, 0xc5, 0xa1, 0x94, 0x08, 0x78, 0x21, 0xc4, 0xe4,
- 0x8f, 0x08, 0x78, 0x10, 0xc5, 0x43, 0xcb, 0x08, 0x53, 0xf1, 0xc3, 0x01,
- 0xb4, 0x08, 0x53, 0xe8, 0x0a, 0xc3, 0x51, 0x5e, 0xc3, 0x1a, 0xba, 0x08,
- 0x53, 0xb9, 0xc2, 0x26, 0xfa, 0x08, 0x53, 0x48, 0x42, 0x00, 0xa4, 0xc3,
- 0x51, 0x6a, 0xc5, 0x45, 0xf7, 0x08, 0x53, 0xa8, 0xc4, 0xe1, 0x2f, 0x08,
- 0x53, 0xb1, 0xc4, 0xa1, 0x95, 0x08, 0x53, 0xa0, 0xc3, 0x0d, 0xd9, 0x08,
- 0x53, 0x31, 0x03, 0x43, 0x51, 0x76, 0xc2, 0x00, 0x6e, 0x08, 0x53, 0x10,
- 0xc3, 0x08, 0x56, 0x08, 0x53, 0x59, 0xc4, 0x9c, 0xcc, 0x08, 0x53, 0x68,
- 0xc3, 0x01, 0x1e, 0x08, 0x53, 0x89, 0xc2, 0x17, 0x51, 0x08, 0x53, 0x90,
- 0xc7, 0x0c, 0x4b, 0x08, 0x67, 0xf1, 0xc8, 0x50, 0x00, 0x08, 0x67, 0xf8,
- 0x96, 0x08, 0x67, 0x3b, 0x03, 0x51, 0x86, 0x9b, 0x08, 0x66, 0xd1, 0x85,
- 0x08, 0x66, 0x28, 0x95, 0x08, 0x67, 0x80, 0x8a, 0x08, 0x67, 0x49, 0x95,
- 0x08, 0x66, 0x30, 0x9b, 0x08, 0x67, 0x40, 0x9c, 0x08, 0x67, 0x28, 0x92,
- 0x08, 0x67, 0x08, 0x9b, 0x08, 0x66, 0xb8, 0x9b, 0x08, 0x66, 0x70, 0x96,
- 0x08, 0x65, 0x3b, 0x03, 0x51, 0x8c, 0x9b, 0x08, 0x64, 0xd1, 0x85, 0x08,
- 0x64, 0x28, 0x9b, 0x08, 0x65, 0x40, 0x9c, 0x08, 0x65, 0x28, 0x92, 0x08,
- 0x65, 0x08, 0x9b, 0x08, 0x64, 0xb8, 0x9b, 0x08, 0x64, 0x70, 0x95, 0x08,
- 0x64, 0x31, 0x8a, 0x08, 0x65, 0x48, 0x95, 0x08, 0x65, 0x80, 0x8d, 0x08,
- 0x60, 0xe0, 0x96, 0x08, 0x62, 0x29, 0x95, 0x08, 0x61, 0xf1, 0x94, 0x08,
- 0x61, 0xe1, 0x90, 0x08, 0x61, 0x21, 0x8e, 0x08, 0x61, 0x01, 0x8d, 0x08,
- 0x60, 0xd1, 0x9b, 0x08, 0x60, 0xc1, 0x86, 0x08, 0x60, 0x99, 0x89, 0x08,
- 0x60, 0x79, 0x84, 0x08, 0x60, 0x58, 0x8a, 0x08, 0x61, 0xf8, 0x85, 0x08,
- 0x61, 0x41, 0x96, 0x08, 0x61, 0x31, 0x9b, 0x08, 0x61, 0x51, 0x89, 0x08,
- 0x61, 0x68, 0x96, 0x08, 0x62, 0x31, 0x90, 0x08, 0x61, 0x2b, 0x03, 0x51,
- 0x92, 0x8d, 0x08, 0x60, 0xd9, 0x9b, 0x08, 0x60, 0xc9, 0x89, 0x08, 0x60,
- 0x81, 0x84, 0x08, 0x60, 0x60, 0x96, 0x08, 0x61, 0x39, 0x85, 0x08, 0x61,
- 0x49, 0x9b, 0x08, 0x61, 0x58, 0x8d, 0x08, 0x60, 0xe8, 0xc2, 0x0d, 0xf7,
- 0x08, 0x54, 0xd9, 0xc2, 0x00, 0x92, 0x08, 0x54, 0xc8, 0x83, 0x08, 0x1d,
- 0x03, 0x03, 0x51, 0x96, 0x8b, 0x08, 0x1d, 0x09, 0x97, 0x08, 0x1d, 0x11,
- 0x0d, 0xc3, 0x51, 0x9f, 0x09, 0xc3, 0x51, 0xa7, 0x1a, 0xc3, 0x51, 0xaf,
- 0xc2, 0x03, 0x40, 0x08, 0x1d, 0x41, 0x0c, 0xc3, 0x51, 0xb9, 0x16, 0xc3,
- 0x51, 0xc1, 0x06, 0xc3, 0x51, 0xcf, 0xc2, 0x03, 0xa4, 0x08, 0x1d, 0x89,
- 0x04, 0xc3, 0x51, 0xde, 0xc2, 0x00, 0x67, 0x08, 0x1d, 0x99, 0x10, 0xc3,
- 0x51, 0xeb, 0x0f, 0xc3, 0x51, 0xf3, 0xc2, 0x1d, 0x5f, 0x08, 0x1d, 0xc9,
- 0x18, 0xc3, 0x51, 0xff, 0x14, 0xc3, 0x52, 0x07, 0xc2, 0x00, 0xc7, 0x08,
- 0x1d, 0xf1, 0x15, 0xc3, 0x52, 0x0f, 0xc2, 0x02, 0xb4, 0x08, 0x1e, 0x01,
- 0xc2, 0x00, 0xa4, 0x08, 0x1e, 0x18, 0xc3, 0x01, 0xb4, 0x08, 0x1e, 0x89,
- 0x16, 0xc3, 0x52, 0x1f, 0xc7, 0x0c, 0x4b, 0x08, 0x1e, 0xa8, 0xc3, 0xcd,
- 0x02, 0x08, 0x1a, 0xb1, 0xc3, 0x04, 0x44, 0x08, 0x1a, 0xc0, 0xc3, 0xd8,
- 0xf6, 0x08, 0x1b, 0x29, 0xc5, 0xd8, 0xf4, 0x08, 0x1b, 0x30, 0x97, 0x08,
- 0x1b, 0x41, 0x8b, 0x08, 0x1b, 0x80, 0x96, 0x08, 0x1b, 0x88, 0x8a, 0x08,
- 0x18, 0x71, 0x95, 0x08, 0x18, 0xf8, 0x95, 0x08, 0x18, 0xd8, 0xce, 0x64,
- 0x71, 0x0e, 0x7d, 0xa1, 0xc8, 0x4d, 0xc2, 0x0e, 0x7d, 0x98, 0xc7, 0x4d,
- 0xba, 0x0e, 0x7d, 0xab, 0x03, 0x52, 0x29, 0xc7, 0xa7, 0x0f, 0x0e, 0x7c,
- 0xa0, 0xce, 0x64, 0x71, 0x0e, 0x7c, 0xc9, 0xc9, 0x8e, 0x71, 0x0e, 0x7c,
- 0xc0, 0xc9, 0xae, 0xb9, 0x0e, 0x7d, 0x71, 0xc9, 0x8e, 0x71, 0x0e, 0x7d,
- 0x69, 0xc8, 0xbe, 0xad, 0x0e, 0x7d, 0x60, 0xca, 0xa7, 0x0c, 0x0e, 0x7d,
- 0x2b, 0x03, 0x52, 0x2d, 0xc9, 0x8e, 0x71, 0x0e, 0x7d, 0x1a, 0x03, 0x52,
- 0x33, 0xd6, 0x2d, 0xe3, 0x0e, 0x7d, 0x00, 0xc9, 0x8e, 0x71, 0x0e, 0x7c,
- 0xeb, 0x03, 0x52, 0x39, 0xca, 0xa7, 0x0c, 0x0e, 0x7c, 0xe0, 0xcc, 0x83,
- 0xe0, 0x0e, 0x7c, 0xf0, 0xc7, 0x8e, 0x73, 0x0e, 0x7c, 0xb1, 0xcb, 0x8e,
- 0x6f, 0x0e, 0x7c, 0xa8, 0xc8, 0x98, 0x1d, 0x0e, 0x7c, 0x3b, 0x03, 0x52,
- 0x3f, 0xd0, 0x5d, 0x22, 0x0e, 0x7c, 0x71, 0xc5, 0xd5, 0xc0, 0x0e, 0x7c,
- 0x69, 0xc7, 0x78, 0x91, 0x0e, 0x7c, 0x42, 0x03, 0x52, 0x45, 0xcb, 0x9b,
- 0x11, 0x0e, 0x7c, 0x60, 0xc6, 0x78, 0x92, 0x0e, 0x78, 0xd9, 0x4b, 0x90,
- 0x27, 0x43, 0x52, 0x4b, 0xc5, 0x01, 0xf7, 0x0e, 0x78, 0xa9, 0xc4, 0x01,
- 0x1e, 0x0e, 0x78, 0x48, 0xc8, 0xbc, 0x15, 0x05, 0x4c, 0x58, 0xc5, 0x01,
- 0xf7, 0x01, 0x2c, 0xe1, 0xc4, 0x01, 0x1e, 0x01, 0x2c, 0xd8, 0xc5, 0x01,
- 0xf7, 0x01, 0x2c, 0xd1, 0xd4, 0x3b, 0x4d, 0x01, 0x2c, 0xc8, 0x92, 0x05,
- 0x22, 0xa1, 0x9a, 0x05, 0x22, 0x90, 0x92, 0x05, 0x22, 0x89, 0x9a, 0x05,
- 0x22, 0x79, 0x96, 0x05, 0x22, 0x70, 0x9a, 0x05, 0x22, 0x40, 0x9a, 0x05,
- 0x22, 0x10, 0x9a, 0x05, 0x21, 0xc8, 0x92, 0x05, 0x21, 0xc1, 0x9a, 0x05,
- 0x21, 0xb1, 0x96, 0x05, 0x21, 0xa8, 0x9a, 0x05, 0x1d, 0x48, 0x9a, 0x05,
- 0x1d, 0x18, 0x9a, 0x05, 0x17, 0x89, 0x92, 0x05, 0x17, 0x98, 0x9a, 0x05,
- 0x17, 0xc0, 0x9a, 0x05, 0x18, 0x08, 0x9a, 0x05, 0x18, 0x38, 0x9a, 0x05,
- 0x03, 0xd1, 0x92, 0x05, 0x03, 0xe0, 0x9a, 0x05, 0x04, 0x48, 0x9a, 0x05,
- 0x04, 0x78, 0x9a, 0x05, 0x0a, 0xa8, 0x9a, 0x05, 0x0b, 0x30, 0x9a, 0x05,
- 0x21, 0x58, 0x92, 0x05, 0x21, 0x11, 0x9a, 0x05, 0x21, 0x00, 0x92, 0x05,
- 0x20, 0xf9, 0x9a, 0x05, 0x20, 0xe9, 0x96, 0x05, 0x20, 0xe0, 0x9a, 0x05,
- 0x1c, 0x90, 0x9a, 0x05, 0x1c, 0x60, 0x9a, 0x05, 0x1b, 0xf0, 0x9a, 0x05,
- 0x1e, 0x20, 0x9a, 0x05, 0x1d, 0xf0, 0x92, 0x05, 0x1d, 0x89, 0x9a, 0x05,
- 0x1d, 0x78, 0x9a, 0x05, 0x1a, 0x20, 0x9a, 0x05, 0x19, 0x71, 0x92, 0x05,
- 0x19, 0x80, 0x9a, 0x05, 0x1b, 0xd0, 0x9a, 0x05, 0x1b, 0xa0, 0x92, 0x05,
- 0x1b, 0x41, 0x9a, 0x05, 0x1b, 0x31, 0x96, 0x05, 0x1b, 0x28, 0x92, 0x05,
- 0x16, 0xb9, 0x9a, 0x05, 0x16, 0xa9, 0x96, 0x05, 0x16, 0xa0, 0x9a, 0x05,
- 0x17, 0x28, 0x9a, 0x05, 0x17, 0x58, 0x9a, 0x05, 0x1a, 0xf8, 0x9a, 0x05,
- 0x1a, 0xc8, 0x9a, 0x05, 0x1a, 0x51, 0x92, 0x05, 0x1a, 0x60, 0x96, 0x05,
- 0x12, 0x51, 0x9a, 0x05, 0x12, 0x59, 0x92, 0x05, 0x12, 0x68, 0x9a, 0x05,
- 0x04, 0xa9, 0x92, 0x05, 0x04, 0xb8, 0x9a, 0x05, 0x04, 0xe1, 0x92, 0x05,
- 0x04, 0xf0, 0x9a, 0x05, 0x05, 0x38, 0x9a, 0x05, 0x05, 0x60, 0x96, 0x05,
- 0x0b, 0x61, 0x9a, 0x05, 0x0b, 0x69, 0x92, 0x05, 0x0b, 0x78, 0x9a, 0x05,
- 0x0b, 0xa0, 0x9a, 0x05, 0x0c, 0xd9, 0x92, 0x05, 0x0c, 0xe8, 0x9a, 0x05,
- 0x0d, 0x11, 0x92, 0x05, 0x0d, 0x20, 0x9a, 0x05, 0x0d, 0x78, 0x9a, 0x05,
- 0x0d, 0xa8, 0x9a, 0x05, 0x12, 0x20, 0x9a, 0x05, 0x11, 0xb1, 0x92, 0x05,
- 0x11, 0xc0, 0x96, 0x05, 0x02, 0xd1, 0x9a, 0x05, 0x02, 0xd9, 0x92, 0x05,
- 0x02, 0xe8, 0x9a, 0x05, 0x03, 0x11, 0x92, 0x05, 0x03, 0x20, 0x9a, 0x05,
- 0x03, 0x80, 0x9a, 0x05, 0x09, 0xd1, 0x92, 0x05, 0x09, 0xe0, 0x9a, 0x05,
- 0x0a, 0x09, 0x92, 0x05, 0x0a, 0x18, 0x9a, 0x05, 0x0a, 0x78, 0x9a, 0x05,
- 0x10, 0xb9, 0x92, 0x05, 0x10, 0xc8, 0x96, 0x05, 0x10, 0xf1, 0x9a, 0x05,
- 0x10, 0xf9, 0x92, 0x05, 0x11, 0x08, 0x9a, 0x05, 0x11, 0x70, 0x97, 0x00,
- 0xb0, 0xab, 0x03, 0x52, 0x57, 0x8b, 0x00, 0xb0, 0xd0, 0x91, 0x00, 0xae,
- 0x13, 0x03, 0x52, 0x5b, 0x83, 0x00, 0xae, 0x19, 0x8b, 0x00, 0xae, 0x09,
- 0x87, 0x00, 0xae, 0x00, 0x91, 0x00, 0xac, 0xcb, 0x03, 0x52, 0x5f, 0xc2,
- 0x00, 0x4b, 0x00, 0xc7, 0x51, 0x83, 0x00, 0xac, 0xd1, 0x8b, 0x00, 0xac,
- 0xc1, 0x87, 0x00, 0xac, 0xb8, 0x83, 0x08, 0xd5, 0xd8, 0x91, 0x08, 0xd5,
- 0xc8, 0x8b, 0x08, 0xd5, 0xb8, 0x83, 0x08, 0xd5, 0xa8, 0x91, 0x08, 0xd5,
- 0x98, 0x8b, 0x08, 0xd5, 0x88, 0x83, 0x00, 0xa8, 0x70, 0x10, 0xc3, 0x52,
- 0x63, 0x87, 0x00, 0xa2, 0x98, 0x83, 0x00, 0xb1, 0x69, 0x8b, 0x00, 0xb1,
- 0x61, 0x87, 0x00, 0xb1, 0x53, 0x03, 0x52, 0x6f, 0x91, 0x00, 0xb1, 0x49,
- 0x97, 0x00, 0xb1, 0x40, 0x97, 0x00, 0xb2, 0x41, 0x91, 0x00, 0xb2, 0x49,
- 0x87, 0x00, 0xb2, 0x53, 0x03, 0x52, 0x73, 0x8b, 0x00, 0xb2, 0x61, 0x83,
- 0x00, 0xb2, 0x68, 0x87, 0x00, 0xb0, 0xc0, 0x97, 0x00, 0xb0, 0xe1, 0x91,
- 0x00, 0xb0, 0xe9, 0x87, 0x00, 0xb0, 0xf3, 0x03, 0x52, 0x77, 0x8b, 0x00,
- 0xb1, 0x01, 0x83, 0x00, 0xb1, 0x08, 0x83, 0x00, 0xc7, 0x81, 0x97, 0x00,
- 0xc7, 0x68, 0x83, 0x00, 0xc7, 0x78, 0x87, 0x00, 0xaf, 0x90, 0x83, 0x00,
- 0xae, 0x49, 0x8b, 0x00, 0xae, 0x41, 0x87, 0x00, 0xae, 0x33, 0x03, 0x52,
- 0x7b, 0x91, 0x00, 0xae, 0x29, 0x97, 0x00, 0xae, 0x20, 0x15, 0xc3, 0x52,
- 0x7f, 0x83, 0x00, 0xaf, 0x39, 0x8b, 0x00, 0xaf, 0x31, 0x87, 0x00, 0xaf,
- 0x23, 0x03, 0x52, 0x96, 0x91, 0x00, 0xaf, 0x19, 0x97, 0x00, 0xaf, 0x10,
- 0x83, 0x00, 0xb3, 0x01, 0x8b, 0x00, 0xb2, 0xf9, 0x87, 0x00, 0xb2, 0xeb,
- 0x03, 0x52, 0x9a, 0x91, 0x00, 0xb2, 0xe1, 0x97, 0x00, 0xb2, 0xd8, 0x83,
- 0x00, 0xaf, 0x09, 0x8b, 0x00, 0xaf, 0x01, 0x87, 0x00, 0xae, 0xf3, 0x03,
- 0x52, 0x9e, 0x91, 0x00, 0xae, 0xe9, 0x97, 0x00, 0xae, 0xe0, 0x0a, 0xc3,
- 0x52, 0xa2, 0x97, 0x00, 0xb1, 0xd1, 0x91, 0x00, 0xb1, 0xd9, 0x87, 0x00,
- 0xb1, 0xe3, 0x03, 0x52, 0xb9, 0x8b, 0x00, 0xb1, 0xf1, 0x83, 0x00, 0xb1,
- 0xf8, 0x87, 0x00, 0xb3, 0x20, 0x87, 0x00, 0xb0, 0x88, 0x87, 0x00, 0xb0,
- 0x58, 0x87, 0x00, 0xb0, 0x28, 0x83, 0x00, 0xb0, 0x01, 0x8b, 0x00, 0xaf,
- 0xf9, 0x87, 0x00, 0xaf, 0xeb, 0x03, 0x52, 0xbd, 0x91, 0x00, 0xaf, 0xe1,
- 0x97, 0x00, 0xaf, 0xd8, 0x83, 0x00, 0xaf, 0xd1, 0x8b, 0x00, 0xaf, 0xc9,
- 0x87, 0x00, 0xaf, 0xbb, 0x03, 0x52, 0xc1, 0x91, 0x00, 0xaf, 0xb1, 0x97,
- 0x00, 0xaf, 0xa8, 0x87, 0x00, 0xaf, 0x58, 0x83, 0x00, 0xae, 0xd9, 0x8b,
- 0x00, 0xae, 0xd1, 0x87, 0x00, 0xae, 0xc3, 0x03, 0x52, 0xc5, 0x91, 0x00,
- 0xae, 0xb9, 0x97, 0x00, 0xae, 0xb0, 0x87, 0x00, 0xae, 0x98, 0x87, 0x00,
- 0xae, 0x68, 0x83, 0x00, 0xb1, 0x99, 0x8b, 0x00, 0xb1, 0x91, 0x87, 0x00,
- 0xb1, 0x83, 0x03, 0x52, 0xc9, 0x91, 0x00, 0xb1, 0x79, 0x97, 0x00, 0xb1,
- 0x70, 0x87, 0x00, 0xb1, 0x28, 0x87, 0x00, 0xb2, 0x18, 0x87, 0x00, 0xb2,
- 0x88, 0x97, 0x00, 0xb2, 0xa1, 0x91, 0x00, 0xb2, 0xa9, 0x87, 0x00, 0xb2,
- 0xb3, 0x03, 0x52, 0xcd, 0x8b, 0x00, 0xb2, 0xc1, 0x83, 0x00, 0xb2, 0xc8,
- 0x83, 0x00, 0xaa, 0x6b, 0x03, 0x52, 0xd1, 0x91, 0x00, 0xaa, 0x53, 0x03,
- 0x52, 0xd5, 0x87, 0x00, 0xaa, 0x21, 0x19, 0x43, 0x52, 0xd9, 0x83, 0x00,
- 0xac, 0x69, 0x91, 0x00, 0xac, 0x61, 0x8b, 0x00, 0xac, 0x59, 0x87, 0x00,
- 0xac, 0x51, 0xc3, 0x14, 0x3f, 0x00, 0xaa, 0x78, 0xc4, 0xe1, 0x43, 0x00,
- 0xab, 0x49, 0x19, 0x43, 0x52, 0xf2, 0x19, 0x43, 0x53, 0x0b, 0x42, 0x16,
- 0xbb, 0xc3, 0x53, 0x24, 0x19, 0x43, 0x53, 0x3d, 0x19, 0x43, 0x53, 0x56,
- 0x91, 0x00, 0xa4, 0xcb, 0x03, 0x53, 0x6f, 0x8b, 0x00, 0xa4, 0xab, 0x03,
- 0x53, 0x73, 0x87, 0x00, 0xa4, 0x99, 0x83, 0x00, 0xa4, 0xea, 0x03, 0x53,
- 0x77, 0x83, 0x00, 0xa0, 0xc3, 0x03, 0x53, 0x7b, 0x91, 0x00, 0xa0, 0x9b,
- 0x03, 0x53, 0x7f, 0x8b, 0x00, 0xa0, 0x7b, 0x03, 0x53, 0x83, 0x87, 0x00,
- 0xa0, 0x68, 0x83, 0x00, 0xa3, 0xfb, 0x03, 0x53, 0x87, 0x87, 0x00, 0xa3,
- 0xa9, 0x8b, 0x00, 0xa3, 0xbb, 0x03, 0x53, 0x8b, 0x91, 0x00, 0xa3, 0xda,
- 0x03, 0x53, 0x8f, 0x19, 0x43, 0x53, 0x93, 0x87, 0x00, 0xa6, 0x51, 0x83,
- 0x00, 0xa6, 0x62, 0x03, 0x53, 0xac, 0x19, 0xc3, 0x53, 0xb0, 0x83, 0x00,
- 0xac, 0xf1, 0x91, 0x00, 0xac, 0xe9, 0x8b, 0x00, 0xac, 0xe1, 0x87, 0x00,
- 0xac, 0xd8, 0xcd, 0x63, 0xfa, 0x00, 0xa1, 0x19, 0xc2, 0x00, 0x35, 0x00,
- 0xa1, 0x20, 0xc5, 0x35, 0x4a, 0x00, 0xa1, 0x29, 0xd6, 0x2b, 0xff, 0x00,
- 0xa1, 0x30, 0x91, 0x00, 0xc6, 0x68, 0x8b, 0x00, 0xc6, 0x48, 0xc9, 0x0f,
- 0x34, 0x07, 0xf1, 0x71, 0xca, 0x0a, 0xf7, 0x07, 0xf1, 0x78, 0xc3, 0xb1,
- 0x52, 0x01, 0x6f, 0xa8, 0x87, 0x05, 0x34, 0xf9, 0x83, 0x01, 0x6f, 0xe1,
- 0xc7, 0xc7, 0x4c, 0x01, 0x6f, 0xf8, 0x83, 0x01, 0x6f, 0x91, 0xc3, 0x1b,
- 0xb6, 0x01, 0x6f, 0x98, 0x8b, 0x0f, 0x01, 0x01, 0x97, 0x0f, 0x00, 0xf8,
- 0xc8, 0xb7, 0x75, 0x0e, 0x92, 0x19, 0xc6, 0xce, 0x59, 0x0e, 0x92, 0x10,
- 0xc2, 0x03, 0xa4, 0x08, 0x9b, 0xa1, 0xc2, 0x03, 0x32, 0x08, 0x9b, 0x99,
- 0xc2, 0x00, 0xc1, 0x08, 0x9b, 0x91, 0xc2, 0x04, 0x2b, 0x08, 0x9b, 0x89,
- 0x83, 0x08, 0x9b, 0x80, 0xc3, 0x26, 0x50, 0x08, 0x9b, 0x61, 0x08, 0xc3,
- 0x53, 0xcb, 0x16, 0xc3, 0x53, 0xd7, 0xc3, 0x01, 0xb4, 0x08, 0x9b, 0x39,
- 0xc4, 0x15, 0xd3, 0x08, 0x9b, 0x30, 0xcb, 0x97, 0x6a, 0x00, 0xee, 0x41,
- 0xc6, 0x60, 0xe6, 0x00, 0xee, 0x28, 0xc6, 0x0b, 0x41, 0x00, 0x18, 0x03,
- 0x03, 0x53, 0xe3, 0xc9, 0x29, 0xa4, 0x00, 0x1a, 0x00, 0x00, 0xc3, 0x53,
- 0xe9, 0x45, 0x15, 0x9d, 0x43, 0x53, 0xf5, 0xcb, 0x90, 0x74, 0x01, 0x06,
- 0x89, 0x48, 0xb8, 0x35, 0x43, 0x53, 0xff, 0xcb, 0x96, 0xaf, 0x00, 0xd6,
- 0x21, 0xcb, 0x11, 0x49, 0x00, 0xd6, 0x10, 0x00, 0xc3, 0x54, 0x0b, 0x45,
- 0x15, 0x9d, 0x43, 0x54, 0x17, 0xc5, 0x00, 0x95, 0x00, 0x18, 0xd1, 0xc5,
- 0x01, 0x62, 0x00, 0x1a, 0x48, 0xc5, 0x01, 0x62, 0x00, 0x18, 0xe1, 0xc5,
- 0x00, 0x95, 0x00, 0x1a, 0x88, 0xc9, 0x1e, 0x42, 0x00, 0xef, 0xa1, 0xdb,
- 0x18, 0x25, 0x00, 0xef, 0x80, 0xc9, 0x1e, 0x42, 0x00, 0xef, 0x99, 0xdb,
- 0x18, 0x25, 0x00, 0xef, 0x68, 0xc7, 0xa8, 0x1d, 0x00, 0xef, 0x19, 0xc5,
- 0x01, 0x62, 0x00, 0xee, 0x50, 0x86, 0x00, 0xee, 0xc1, 0x96, 0x00, 0xd6,
- 0x71, 0x94, 0x00, 0xd6, 0x69, 0x89, 0x00, 0xd6, 0x60, 0xce, 0x45, 0xb5,
- 0x01, 0x07, 0x31, 0x45, 0x00, 0x8a, 0x43, 0x54, 0x23, 0xc6, 0x01, 0x61,
- 0x00, 0xef, 0xe0, 0x49, 0x68, 0x22, 0xc3, 0x54, 0x2f, 0xd0, 0x5e, 0x82,
- 0x00, 0xd5, 0xe0, 0xce, 0x73, 0x03, 0x00, 0xd5, 0xc1, 0xc7, 0x81, 0x2a,
- 0x00, 0x19, 0xf8, 0xc8, 0x63, 0xe1, 0x00, 0x1a, 0xd1, 0xd4, 0x3d, 0x69,
- 0x00, 0x1b, 0x10, 0xc6, 0x01, 0x61, 0x00, 0x1a, 0xe0, 0xc6, 0x01, 0x61,
- 0x00, 0x1a, 0xf8, 0x00, 0x43, 0x54, 0x3b, 0xc5, 0x01, 0x1d, 0x00, 0xef,
- 0xd0, 0x00, 0x43, 0x54, 0x47, 0xc4, 0x18, 0x83, 0x05, 0x47, 0x39, 0xc2,
- 0x26, 0x51, 0x05, 0x47, 0x30, 0xc3, 0x0c, 0x5b, 0x05, 0x47, 0x29, 0xc3,
- 0x06, 0x9e, 0x05, 0x47, 0x20, 0xc4, 0x04, 0x5e, 0x05, 0x47, 0x19, 0xc2,
- 0x01, 0x47, 0x05, 0x47, 0x10, 0xc6, 0x01, 0x61, 0x00, 0x19, 0x78, 0xc3,
- 0x04, 0x6c, 0x01, 0x65, 0xa9, 0xc3, 0xcc, 0x0a, 0x01, 0x65, 0xf9, 0x42,
- 0x00, 0xe5, 0xc3, 0x54, 0x53, 0xc3, 0x13, 0xfc, 0x01, 0x66, 0x39, 0x0a,
- 0xc3, 0x54, 0x5f, 0xc6, 0xcf, 0xa9, 0x01, 0x66, 0xb9, 0xc3, 0xe6, 0x6a,
- 0x01, 0x66, 0xc8, 0xc5, 0xaa, 0x2f, 0x01, 0x66, 0xe9, 0x10, 0xc3, 0x54,
- 0x72, 0xc3, 0xe6, 0xe2, 0x01, 0x67, 0x18, 0xc3, 0x04, 0x6c, 0x01, 0x65,
- 0xa1, 0xc3, 0xcc, 0x0a, 0x01, 0x65, 0xf1, 0x42, 0x00, 0xe5, 0xc3, 0x54,
- 0x7e, 0xc3, 0x13, 0xfc, 0x01, 0x66, 0x31, 0x0a, 0xc3, 0x54, 0x8a, 0xc6,
- 0xcf, 0xa9, 0x01, 0x66, 0xb1, 0xc3, 0xe6, 0x6a, 0x01, 0x66, 0xc0, 0xc5,
- 0xaa, 0x2f, 0x01, 0x66, 0xe1, 0x10, 0xc3, 0x54, 0x9d, 0xc3, 0xe6, 0xe2,
- 0x01, 0x67, 0x10, 0x46, 0x00, 0x6b, 0x43, 0x54, 0xa9, 0xc2, 0x00, 0x56,
- 0x01, 0x93, 0x70, 0xc2, 0x00, 0x56, 0x01, 0x93, 0xc0, 0xc2, 0x00, 0x56,
- 0x01, 0x93, 0x80, 0xc2, 0x00, 0x56, 0x01, 0x93, 0xc8, 0xc2, 0x00, 0x56,
- 0x01, 0x93, 0x98, 0xc2, 0x00, 0x56, 0x01, 0x93, 0xd0, 0x83, 0x01, 0x93,
- 0xa9, 0x97, 0x01, 0x93, 0xf0, 0xc2, 0x00, 0x56, 0x01, 0x93, 0xb0, 0xc2,
- 0x00, 0x56, 0x01, 0x93, 0xb8, 0xc4, 0x18, 0x83, 0x01, 0x23, 0x31, 0xc2,
- 0x26, 0x51, 0x01, 0x23, 0x28, 0xc3, 0x0c, 0x5b, 0x01, 0x23, 0x21, 0xc3,
- 0x06, 0x9e, 0x01, 0x23, 0x18, 0xc4, 0x04, 0x5e, 0x01, 0x23, 0x11, 0xc2,
- 0x01, 0x47, 0x01, 0x23, 0x08, 0x00, 0x43, 0x54, 0xb5, 0x00, 0x43, 0x54,
- 0xd3, 0xd0, 0x54, 0xc5, 0x01, 0x92, 0x60, 0x00, 0x43, 0x54, 0xf1, 0xc3,
- 0x18, 0x84, 0x01, 0x94, 0x31, 0xc4, 0xe4, 0xe7, 0x01, 0x94, 0xc8, 0x90,
- 0x01, 0x94, 0x81, 0xc6, 0xcf, 0xaf, 0x01, 0x94, 0xe1, 0xc7, 0xc7, 0x84,
- 0x01, 0x95, 0x60, 0xc3, 0x01, 0xe4, 0x01, 0x94, 0x89, 0xc3, 0xe6, 0x64,
- 0x01, 0x95, 0x58, 0xc2, 0x00, 0x7b, 0x01, 0x94, 0x21, 0xc2, 0x00, 0x9c,
- 0x01, 0x94, 0x59, 0xc7, 0xc3, 0x94, 0x01, 0x94, 0xb0, 0xc2, 0x00, 0x8c,
- 0x01, 0x94, 0x41, 0xc3, 0x01, 0xdd, 0x01, 0x95, 0x80, 0xc3, 0x00, 0xfd,
- 0x01, 0x94, 0x71, 0xc6, 0xd3, 0x4b, 0x01, 0x95, 0x48, 0xcc, 0x7e, 0x9b,
- 0x01, 0x94, 0xb9, 0xc2, 0x15, 0x32, 0x01, 0x95, 0x11, 0xc5, 0xc7, 0xbc,
- 0x01, 0x95, 0x18, 0x15, 0xc3, 0x55, 0x0f, 0xc6, 0xd3, 0xa5, 0x01, 0x95,
- 0x50, 0x17, 0xc3, 0x55, 0x19, 0xc6, 0xd4, 0xdd, 0x09, 0x29, 0xf8, 0xc4,
- 0xe2, 0xef, 0x09, 0x29, 0xf1, 0xc2, 0x01, 0xbd, 0x09, 0x19, 0xd8, 0xc4,
- 0xde, 0x30, 0x09, 0x1a, 0x71, 0x86, 0x09, 0x1a, 0x69, 0xc9, 0xb3, 0x78,
- 0x09, 0x1a, 0x60, 0xc3, 0x68, 0x64, 0x09, 0x1a, 0x51, 0xc2, 0x01, 0xf0,
- 0x09, 0x1a, 0x48, 0xc2, 0x00, 0xe5, 0x09, 0x1a, 0x21, 0x8f, 0x09, 0x1a,
- 0x19, 0xc2, 0x04, 0xcb, 0x09, 0x1a, 0x10, 0x97, 0x09, 0x1a, 0x01, 0x83,
- 0x09, 0x19, 0xe2, 0x03, 0x55, 0x21, 0xc5, 0xd2, 0x5c, 0x09, 0x19, 0xc8,
- 0x17, 0xc3, 0x55, 0x2f, 0xc3, 0x1f, 0xd8, 0x09, 0x19, 0x81, 0xc2, 0x00,
- 0xa4, 0x09, 0x19, 0x79, 0x03, 0x43, 0x55, 0x3a, 0xc5, 0x39, 0x40, 0x09,
- 0x18, 0xc0, 0x97, 0x09, 0x17, 0xb9, 0x87, 0x09, 0x17, 0xb0, 0xe0, 0x05,
- 0x07, 0x09, 0x17, 0x88, 0xda, 0x1c, 0x41, 0x09, 0x18, 0x20, 0xcb, 0x8e,
- 0x17, 0x09, 0x29, 0xb9, 0xcc, 0x87, 0x34, 0x09, 0x29, 0xb0, 0xc3, 0x22,
- 0x4f, 0x09, 0x29, 0xa9, 0xc4, 0xe5, 0xcf, 0x09, 0x29, 0xa1, 0xc4, 0xe1,
- 0x37, 0x09, 0x29, 0x98, 0x00, 0x43, 0x55, 0x44, 0x97, 0x09, 0x15, 0xab,
- 0x03, 0x55, 0x50, 0xc3, 0x06, 0xfe, 0x09, 0x15, 0xa1, 0xc4, 0x5a, 0x32,
- 0x09, 0x15, 0x99, 0xc2, 0x00, 0x8c, 0x09, 0x15, 0x91, 0xc4, 0x3d, 0x46,
- 0x09, 0x15, 0x89, 0xc3, 0x61, 0x9a, 0x09, 0x15, 0x81, 0x83, 0x09, 0x15,
- 0x78, 0xd6, 0x31, 0xc1, 0x09, 0x16, 0xa9, 0xc4, 0x59, 0x55, 0x09, 0x16,
- 0xa0, 0xc3, 0x13, 0x56, 0x09, 0x16, 0x89, 0xc3, 0xaf, 0x0f, 0x09, 0x16,
- 0x81, 0xc3, 0xe7, 0x57, 0x09, 0x16, 0x79, 0xc6, 0xd1, 0x23, 0x09, 0x16,
- 0x71, 0xc3, 0x04, 0xca, 0x09, 0x16, 0x63, 0x03, 0x55, 0x56, 0xc3, 0x1c,
- 0x4f, 0x09, 0x16, 0x59, 0xc3, 0x03, 0xf0, 0x09, 0x16, 0x51, 0x04, 0xc3,
- 0x55, 0x5c, 0x83, 0x09, 0x16, 0x38, 0xc2, 0x01, 0x2e, 0x09, 0x16, 0x29,
- 0x83, 0x09, 0x16, 0x20, 0x42, 0x00, 0xb3, 0xc3, 0x55, 0x68, 0x15, 0xc3,
- 0x55, 0x72, 0xc2, 0x00, 0x4d, 0x09, 0x29, 0x71, 0xc8, 0x66, 0xc0, 0x09,
- 0x1c, 0xb1, 0x17, 0xc3, 0x55, 0x7c, 0xc3, 0x1f, 0xd8, 0x09, 0x14, 0xf1,
- 0xc2, 0x00, 0x50, 0x09, 0x14, 0xe9, 0xc3, 0x4a, 0x33, 0x09, 0x14, 0xe1,
- 0x0d, 0xc3, 0x55, 0x92, 0xc2, 0x00, 0xa4, 0x09, 0x14, 0xc9, 0xc2, 0x03,
- 0x86, 0x09, 0x14, 0xbb, 0x03, 0x55, 0x9e, 0x83, 0x09, 0x14, 0xb0, 0xc9,
- 0xaa, 0x81, 0x09, 0x29, 0x68, 0x97, 0x09, 0x29, 0x53, 0x03, 0x55, 0xa2,
- 0xcc, 0x34, 0xe1, 0x09, 0x29, 0x49, 0x0f, 0xc3, 0x55, 0xba, 0xc7, 0xc2,
- 0x6e, 0x09, 0x29, 0x39, 0xc5, 0xda, 0x52, 0x09, 0x29, 0x31, 0xc2, 0x00,
- 0x0a, 0x09, 0x29, 0x29, 0x09, 0xc3, 0x55, 0xc6, 0xc8, 0xb7, 0x3d, 0x09,
- 0x29, 0x11, 0xc3, 0x15, 0x86, 0x09, 0x1c, 0x89, 0xc3, 0x0a, 0x91, 0x09,
- 0x12, 0xd3, 0x03, 0x55, 0xd1, 0x10, 0xc3, 0x55, 0xd7, 0x03, 0x43, 0x55,
- 0xe1, 0xcf, 0x6b, 0x5a, 0x09, 0x13, 0xc3, 0x03, 0x55, 0xee, 0x4a, 0xa2,
- 0xca, 0x43, 0x55, 0xf4, 0xd1, 0x54, 0xf7, 0x09, 0x13, 0x60, 0xc3, 0x79,
- 0x0e, 0x09, 0x13, 0x41, 0xc3, 0x13, 0x56, 0x09, 0x13, 0x33, 0x03, 0x56,
- 0x30, 0xc4, 0x47, 0xd6, 0x09, 0x13, 0x29, 0xc3, 0x1c, 0x4f, 0x09, 0x13,
- 0x20, 0x47, 0x01, 0x2c, 0x43, 0x56, 0x36, 0xc2, 0x02, 0xb4, 0x09, 0x11,
- 0xa9, 0xc3, 0x54, 0x8e, 0x09, 0x11, 0xa1, 0x83, 0x09, 0x11, 0x98, 0x46,
- 0x01, 0x2d, 0xc3, 0x56, 0x48, 0xc4, 0x39, 0x41, 0x09, 0x11, 0xe8, 0x45,
- 0x01, 0x2e, 0xc3, 0x56, 0x5b, 0xc3, 0x58, 0x20, 0x09, 0x10, 0x88, 0xc6,
- 0x6e, 0x0a, 0x09, 0x10, 0xab, 0x03, 0x56, 0xab, 0xc6, 0x07, 0x0a, 0x09,
- 0x10, 0xa0, 0xcd, 0x79, 0xee, 0x09, 0x10, 0xc9, 0xc9, 0xad, 0x1b, 0x09,
- 0x10, 0xc0, 0x47, 0x01, 0x2c, 0x43, 0x56, 0xb1, 0x47, 0x01, 0x2c, 0x43,
- 0x56, 0xdc, 0xa2, 0x09, 0x27, 0xf1, 0xa0, 0x09, 0x27, 0xe9, 0x9f, 0x09,
- 0x27, 0xe1, 0x9d, 0x09, 0x27, 0xd8, 0xa4, 0x09, 0x27, 0xc1, 0x9d, 0x09,
- 0x27, 0xb8, 0xa6, 0x09, 0x27, 0x8b, 0x03, 0x57, 0x02, 0x9e, 0x09, 0x27,
- 0x80, 0xa1, 0x09, 0x27, 0x71, 0xa0, 0x09, 0x27, 0x68, 0xa5, 0x09, 0x27,
- 0x61, 0xa4, 0x09, 0x27, 0x59, 0xa0, 0x09, 0x27, 0x50, 0xa3, 0x09, 0x27,
- 0x49, 0xa2, 0x09, 0x27, 0x40, 0xa5, 0x09, 0x27, 0x31, 0xa2, 0x09, 0x27,
- 0x29, 0x9d, 0x09, 0x27, 0x20, 0xa6, 0x09, 0x27, 0x19, 0x9d, 0x09, 0x27,
- 0x10, 0xce, 0x74, 0xd1, 0x09, 0x26, 0xf1, 0x9d, 0x09, 0x26, 0xe8, 0x9e,
- 0x09, 0x26, 0xd1, 0x9d, 0x09, 0x26, 0xc8, 0xa2, 0x09, 0x26, 0xb9, 0x9e,
- 0x09, 0x26, 0xb0, 0x46, 0x01, 0x2d, 0xc3, 0x57, 0x08, 0xc7, 0x07, 0x09,
- 0x09, 0x0f, 0x58, 0xc4, 0x39, 0x41, 0x09, 0x0f, 0x7b, 0x03, 0x57, 0x52,
- 0xc9, 0x9b, 0x69, 0x09, 0x0f, 0x6a, 0x03, 0x57, 0x58, 0x9f, 0x09, 0x1c,
- 0x38, 0x8d, 0x09, 0x0b, 0x78, 0x86, 0x09, 0x0b, 0x88, 0x94, 0x09, 0x0a,
- 0xf1, 0xc3, 0x01, 0x27, 0x09, 0x0a, 0xe9, 0x86, 0x09, 0x0a, 0xe0, 0x97,
- 0x09, 0x0c, 0x1b, 0x03, 0x57, 0x5e, 0xc2, 0x00, 0x51, 0x09, 0x0c, 0x11,
- 0x87, 0x09, 0x0c, 0x09, 0x83, 0x09, 0x0c, 0x00, 0x94, 0x09, 0x0b, 0xf8,
- 0x8f, 0x09, 0x1c, 0x18, 0x86, 0x09, 0x1c, 0x09, 0xc2, 0xe8, 0x24, 0x09,
- 0x0b, 0x60, 0xc2, 0x00, 0xe5, 0x09, 0x1c, 0x03, 0x03, 0x57, 0x62, 0xc2,
- 0x3e, 0xab, 0x09, 0x0b, 0x40, 0x94, 0x09, 0x0b, 0x2b, 0x03, 0x57, 0x66,
- 0xc7, 0x5b, 0xdb, 0x09, 0x0b, 0x21, 0x8e, 0x09, 0x0b, 0x18, 0xa0, 0x09,
- 0x1b, 0xf9, 0x9f, 0x09, 0x0a, 0xd8, 0xc9, 0xb3, 0x8a, 0x09, 0x0a, 0xd0,
- 0xcb, 0x96, 0x62, 0x09, 0x0b, 0xc8, 0x46, 0x26, 0x11, 0x43, 0x57, 0x6c,
- 0xe0, 0x01, 0x27, 0x09, 0x0c, 0xf0, 0xc3, 0x54, 0x8e, 0x09, 0x09, 0x01,
- 0xca, 0xa4, 0x1e, 0x09, 0x08, 0xf8, 0xc8, 0x66, 0xc0, 0x09, 0x26, 0x61,
- 0xcd, 0x79, 0xa0, 0x09, 0x08, 0xe1, 0xc3, 0x1f, 0xd8, 0x09, 0x08, 0xd9,
- 0xc3, 0x59, 0xa8, 0x09, 0x08, 0xca, 0x03, 0x57, 0x7e, 0x16, 0xc3, 0x57,
- 0x84, 0xcd, 0x4d, 0x99, 0x09, 0x08, 0x90, 0xc2, 0x03, 0xa4, 0x09, 0x08,
- 0x79, 0xcb, 0x8e, 0x22, 0x09, 0x08, 0x71, 0xc3, 0x04, 0xca, 0x09, 0x08,
- 0x69, 0xc9, 0x5b, 0xd9, 0x09, 0x08, 0x61, 0xca, 0xa4, 0x32, 0x09, 0x08,
- 0x58, 0xc4, 0xe0, 0x33, 0x09, 0x26, 0x41, 0x15, 0xc3, 0x57, 0x90, 0x10,
- 0xc3, 0x57, 0x9e, 0x0f, 0xc3, 0x57, 0xae, 0x0e, 0xc3, 0x57, 0xbe, 0x0d,
- 0xc3, 0x57, 0xcb, 0x0a, 0xc3, 0x57, 0xdc, 0x09, 0xc3, 0x57, 0xec, 0x07,
- 0xc3, 0x57, 0xfa, 0x06, 0xc3, 0x58, 0x0e, 0x04, 0xc3, 0x58, 0x1d, 0x03,
- 0xc3, 0x58, 0x2a, 0x97, 0x09, 0x07, 0x53, 0x03, 0x58, 0x46, 0xc4, 0x3d,
- 0x51, 0x09, 0x07, 0x49, 0xc2, 0x03, 0xa4, 0x09, 0x07, 0x11, 0x0b, 0x43,
- 0x58, 0x4d, 0xcd, 0x7a, 0x2f, 0x09, 0x07, 0xd1, 0xc9, 0xaa, 0xb7, 0x09,
- 0x07, 0xc9, 0xc4, 0x59, 0x55, 0x09, 0x07, 0xc0, 0x97, 0x09, 0x25, 0xa9,
- 0xc2, 0x01, 0xf0, 0x09, 0x1b, 0xc0, 0x86, 0x09, 0x05, 0xa1, 0x9f, 0x09,
- 0x05, 0x98, 0x97, 0x09, 0x05, 0x91, 0x8b, 0x09, 0x05, 0x89, 0x83, 0x09,
- 0x05, 0x7a, 0x03, 0x58, 0x59, 0xc2, 0x34, 0xf4, 0x09, 0x05, 0x71, 0xc5,
- 0x47, 0x41, 0x09, 0x05, 0x62, 0x03, 0x58, 0x5f, 0xc5, 0x39, 0x40, 0x09,
- 0x05, 0x50, 0xc5, 0x39, 0x40, 0x09, 0x05, 0x40, 0x90, 0x09, 0x05, 0x29,
- 0xc9, 0xb3, 0xdb, 0x09, 0x05, 0x1a, 0x03, 0x58, 0x65, 0x95, 0x09, 0x25,
- 0x98, 0x8e, 0x09, 0x25, 0x88, 0xc5, 0x59, 0x54, 0x09, 0x04, 0xc8, 0xc6,
- 0x66, 0xc2, 0x09, 0x25, 0x41, 0xc2, 0x01, 0xf0, 0x09, 0x25, 0x38, 0x8b,
- 0x09, 0x25, 0x21, 0xc2, 0x01, 0x30, 0x09, 0x25, 0x19, 0xc3, 0x00, 0xe4,
- 0x09, 0x25, 0x10, 0xcc, 0x8a, 0x34, 0x09, 0x25, 0x09, 0x03, 0x43, 0x58,
- 0x6b, 0x17, 0xc3, 0x58, 0x78, 0xc5, 0x47, 0x41, 0x09, 0x24, 0xd0, 0x8b,
- 0x09, 0x24, 0xc1, 0x83, 0x09, 0x24, 0xb8, 0x8b, 0x09, 0x24, 0xa3, 0x03,
- 0x58, 0x85, 0x83, 0x09, 0x24, 0x98, 0xc2, 0x01, 0xbd, 0x09, 0x24, 0x89,
- 0xc2, 0x00, 0x34, 0x09, 0x24, 0x80, 0xc2, 0x00, 0xe5, 0x09, 0x24, 0x73,
- 0x03, 0x58, 0x91, 0xc4, 0xe5, 0x87, 0x09, 0x24, 0x68, 0xc5, 0x39, 0x40,
- 0x09, 0x04, 0x38, 0x17, 0xc3, 0x58, 0x97, 0xc4, 0x3d, 0x51, 0x09, 0x03,
- 0x59, 0xc2, 0x00, 0xcb, 0x09, 0x03, 0x51, 0xcc, 0x34, 0xe1, 0x09, 0x03,
- 0x49, 0xc2, 0x00, 0x8c, 0x09, 0x03, 0x41, 0x0e, 0xc3, 0x58, 0xa3, 0xc3,
- 0x59, 0xa8, 0x09, 0x03, 0x19, 0xc2, 0x01, 0x7b, 0x09, 0x03, 0x0b, 0x03,
- 0x58, 0xae, 0xc2, 0x00, 0xa4, 0x09, 0x03, 0x01, 0x09, 0xc3, 0x58, 0xb4,
- 0x04, 0xc3, 0x58, 0xc8, 0x03, 0x43, 0x58, 0xd2, 0xc2, 0x5a, 0x34, 0x09,
- 0x24, 0x09, 0xc3, 0x13, 0xfc, 0x09, 0x00, 0x98, 0xc5, 0x59, 0x54, 0x09,
- 0x24, 0x00, 0xc3, 0x0f, 0x42, 0x09, 0x00, 0x89, 0xc7, 0x66, 0xc1, 0x09,
- 0x00, 0x80, 0xc7, 0x5b, 0xdb, 0x09, 0x00, 0x71, 0x8e, 0x09, 0x00, 0x68,
- 0xc8, 0x0d, 0xc9, 0x09, 0x01, 0xe3, 0x03, 0x58, 0xde, 0x16, 0x43, 0x58,
- 0xe4, 0xce, 0x6f, 0x59, 0x09, 0x14, 0x71, 0x46, 0x01, 0x2d, 0x43, 0x58,
- 0xea, 0x9f, 0x09, 0x14, 0x40, 0x84, 0x09, 0x14, 0x30, 0x97, 0x09, 0x14,
- 0x19, 0x8b, 0x09, 0x14, 0x10, 0x84, 0x09, 0x14, 0x08, 0xe0, 0x04, 0xc7,
- 0x09, 0x0a, 0x48, 0xcb, 0x95, 0x23, 0x00, 0x27, 0x99, 0xc8, 0x1e, 0x43,
- 0x00, 0x27, 0x88, 0xc9, 0x23, 0xdf, 0x00, 0x25, 0x69, 0xcb, 0x95, 0x44,
- 0x05, 0x34, 0x58, 0xc9, 0x23, 0xdf, 0x00, 0x29, 0x79, 0xcb, 0x95, 0x44,
- 0x00, 0x29, 0x09, 0xc4, 0x02, 0x83, 0x00, 0x28, 0x99, 0xc4, 0x12, 0xeb,
- 0x00, 0x26, 0x30, 0xc9, 0x56, 0xec, 0x00, 0x29, 0x49, 0xcb, 0x95, 0x44,
- 0x00, 0x29, 0x19, 0xc4, 0x12, 0xeb, 0x00, 0x26, 0x51, 0xc4, 0x02, 0x83,
- 0x00, 0x26, 0x41, 0xc9, 0x23, 0xdf, 0x00, 0x25, 0x18, 0xc2, 0x01, 0xf0,
- 0x00, 0x29, 0x59, 0x87, 0x05, 0x34, 0x48, 0xc2, 0x01, 0x64, 0x05, 0x32,
- 0x18, 0xcf, 0x66, 0x5f, 0x00, 0x29, 0x38, 0x8b, 0x00, 0x21, 0xcb, 0x03,
- 0x58, 0xfc, 0x97, 0x00, 0x22, 0xf0, 0x8e, 0x05, 0x33, 0x29, 0x8f, 0x05,
- 0x33, 0x38, 0xc9, 0x23, 0xdf, 0x00, 0x29, 0x29, 0xcb, 0x95, 0x44, 0x00,
- 0x25, 0x38, 0xcf, 0x66, 0x5f, 0x00, 0x25, 0xf8, 0xc9, 0x1e, 0x42, 0x00,
- 0x27, 0xc9, 0xc8, 0x6c, 0xf7, 0x05, 0x32, 0x88, 0xc3, 0xad, 0x7d, 0x00,
- 0x28, 0x79, 0xc3, 0xe7, 0x4b, 0x00, 0x28, 0x69, 0xc3, 0xc9, 0x7b, 0x00,
- 0x28, 0x59, 0xc3, 0xe7, 0x51, 0x00, 0x28, 0x49, 0x06, 0xc3, 0x59, 0x02,
- 0xc3, 0xe6, 0x94, 0x00, 0x28, 0x28, 0xc4, 0x02, 0x83, 0x00, 0x26, 0x21,
- 0xc6, 0x01, 0x01, 0x00, 0x24, 0xf9, 0xc9, 0x23, 0xdf, 0x00, 0x24, 0xd9,
- 0xcf, 0x2c, 0x05, 0x00, 0x24, 0xe8, 0xc6, 0x01, 0x01, 0x00, 0x27, 0xf9,
- 0xc4, 0x02, 0x83, 0x00, 0x27, 0xe9, 0xc9, 0x23, 0xdf, 0x00, 0x25, 0x98,
- 0xc6, 0x01, 0x01, 0x00, 0x24, 0x9b, 0x03, 0x59, 0x12, 0xc9, 0x23, 0xdf,
- 0x00, 0x27, 0xb9, 0xc6, 0x5b, 0x8c, 0x00, 0x24, 0x89, 0xcb, 0x95, 0x44,
- 0x00, 0x24, 0xa8, 0xcf, 0x66, 0xaa, 0x00, 0x27, 0x58, 0xc5, 0x1f, 0x0a,
- 0x00, 0x26, 0xb9, 0xc5, 0x1f, 0x94, 0x00, 0x22, 0x80, 0x83, 0x05, 0x32,
- 0x39, 0x46, 0x2f, 0xb7, 0x43, 0x59, 0x18, 0xc8, 0x1e, 0x43, 0x00, 0x26,
- 0xf9, 0xc8, 0x23, 0xe0, 0x00, 0x24, 0xc8, 0x46, 0x00, 0x6b, 0x43, 0x59,
- 0x38, 0xcf, 0x2c, 0x05, 0x00, 0x25, 0xc9, 0x06, 0x43, 0x59, 0x42, 0xca,
- 0x9f, 0xb4, 0x00, 0x24, 0x50, 0xc3, 0xe6, 0x94, 0x00, 0x28, 0x31, 0xc2,
- 0x1b, 0xa5, 0x00, 0x28, 0x11, 0x87, 0x00, 0x28, 0x00, 0xc9, 0x1e, 0x4b,
- 0x00, 0x27, 0xd0, 0xc8, 0x1e, 0x43, 0x00, 0x27, 0xa1, 0xc8, 0x23, 0xe0,
- 0x00, 0x25, 0xb0, 0xc3, 0x2d, 0xf3, 0x05, 0x32, 0x91, 0x83, 0x05, 0x32,
- 0xb1, 0xd1, 0x52, 0x60, 0x05, 0x32, 0xe1, 0x87, 0x00, 0x23, 0x21, 0xca,
- 0x52, 0x67, 0x00, 0x23, 0x41, 0xc7, 0xc7, 0x4c, 0x00, 0x23, 0x60, 0x06,
- 0xc3, 0x59, 0x4e, 0xc5, 0x1f, 0x0a, 0x00, 0x26, 0x08, 0xc7, 0xc6, 0xa4,
- 0x00, 0x6d, 0x39, 0xc6, 0x8e, 0xa0, 0x00, 0x6d, 0x68, 0xc7, 0xc9, 0xfa,
- 0x00, 0x6d, 0x49, 0xc6, 0x8e, 0xa0, 0x00, 0x6d, 0x78, 0xc7, 0xca, 0x9b,
- 0x00, 0x6c, 0xd9, 0xc7, 0xc7, 0x30, 0x00, 0x6c, 0xe9, 0xc7, 0xc8, 0xc6,
- 0x00, 0x6d, 0x09, 0xc7, 0xc9, 0x6e, 0x00, 0x6d, 0x19, 0x16, 0xc3, 0x59,
- 0x5a, 0x06, 0xc3, 0x59, 0x66, 0xc7, 0xc5, 0x77, 0x00, 0x6d, 0xa9, 0xc7,
- 0x8e, 0x9f, 0x00, 0x6d, 0xb8, 0xca, 0x63, 0xee, 0x00, 0x6e, 0xe1, 0xcf,
- 0x63, 0xe9, 0x00, 0x6e, 0xe9, 0xcb, 0x95, 0x7b, 0x00, 0x6e, 0xf0, 0x49,
- 0x1f, 0x2e, 0x43, 0x59, 0x72, 0x49, 0x1f, 0x2e, 0x43, 0x59, 0x7e, 0x49,
- 0x1f, 0x2e, 0x43, 0x59, 0x8a, 0x4c, 0x85, 0xf0, 0xc3, 0x59, 0x96, 0x87,
- 0x0e, 0xcd, 0x20, 0x49, 0x1f, 0x2e, 0x43, 0x59, 0xa2, 0x49, 0x1f, 0x2e,
- 0x43, 0x59, 0xae, 0xc8, 0x39, 0x95, 0x0e, 0xc8, 0xf1, 0xc6, 0x24, 0x18,
- 0x0e, 0xc8, 0xe0, 0xc4, 0x15, 0x7e, 0x0e, 0xd3, 0x2b, 0x03, 0x59, 0xba,
- 0xc6, 0x5d, 0x1c, 0x0e, 0xd3, 0x1a, 0x03, 0x59, 0xc0, 0xcb, 0x51, 0xcd,
- 0x0e, 0xcc, 0x31, 0xc6, 0x05, 0x96, 0x0e, 0xcc, 0x29, 0xc6, 0x24, 0x18,
- 0x0e, 0xcc, 0x20, 0xcb, 0x51, 0xcd, 0x0e, 0xcc, 0x19, 0xc6, 0x05, 0x96,
- 0x0e, 0xcc, 0x11, 0xc6, 0x24, 0x18, 0x0e, 0xcc, 0x08, 0xcb, 0x51, 0xcd,
- 0x0e, 0xca, 0x81, 0xc6, 0x05, 0x96, 0x0e, 0xca, 0x79, 0xc6, 0x24, 0x18,
- 0x0e, 0xca, 0x70, 0xcb, 0x51, 0xcd, 0x0e, 0xca, 0x69, 0xc6, 0x05, 0x96,
- 0x0e, 0xca, 0x61, 0xc6, 0x24, 0x18, 0x0e, 0xca, 0x58, 0xc7, 0x04, 0xb2,
- 0x0e, 0xd1, 0x49, 0xc5, 0x1d, 0x62, 0x0e, 0xd1, 0x38, 0x00, 0x43, 0x59,
- 0xc6, 0x00, 0x43, 0x59, 0xd2, 0x00, 0x43, 0x59, 0xde, 0x00, 0x43, 0x5a,
- 0x0e, 0xc5, 0x08, 0x42, 0x0e, 0xc0, 0x2b, 0x03, 0x5a, 0x2d, 0xd2, 0x14,
- 0xde, 0x0e, 0xc6, 0xa3, 0x03, 0x5a, 0x31, 0x45, 0x00, 0x5b, 0xc3, 0x5a,
- 0x35, 0x47, 0x14, 0xea, 0x43, 0x5a, 0x41, 0x00, 0x43, 0x5a, 0x50, 0x00,
- 0x43, 0x5a, 0x93, 0x92, 0x0e, 0xc3, 0x6b, 0x03, 0x5a, 0xab, 0xc6, 0xba,
- 0x9f, 0x0e, 0xc3, 0xaa, 0x03, 0x5a, 0xaf, 0x00, 0x43, 0x5a, 0xb3, 0x00,
- 0x43, 0x5a, 0xd4, 0xcb, 0x14, 0xe5, 0x0e, 0xc5, 0x91, 0xc9, 0xaf, 0xd9,
- 0x0e, 0xc4, 0xa9, 0x46, 0x0d, 0xe0, 0xc3, 0x5a, 0xef, 0xc8, 0xb7, 0x55,
- 0x0e, 0xc3, 0xc9, 0xd3, 0x40, 0xa4, 0x0e, 0xc2, 0xb1, 0xc5, 0x08, 0x42,
- 0x0e, 0xc0, 0x18, 0x4b, 0x43, 0xaf, 0xc3, 0x5a, 0xfb, 0x4a, 0x18, 0x91,
- 0x43, 0x5b, 0x07, 0xc6, 0x05, 0x96, 0x0e, 0xcf, 0xa1, 0xc6, 0x24, 0x18,
- 0x0e, 0xcf, 0x98, 0xc6, 0x05, 0x96, 0x0e, 0xcf, 0x81, 0xc6, 0x24, 0x18,
- 0x0e, 0xcf, 0x78, 0xc5, 0x17, 0xef, 0x0e, 0xce, 0xf1, 0x15, 0xc3, 0x5b,
- 0x19, 0x48, 0x1f, 0x2f, 0x43, 0x5b, 0x25, 0xc6, 0x05, 0x96, 0x0e, 0xcf,
- 0x61, 0xc6, 0x24, 0x18, 0x0e, 0xcf, 0x48, 0xc6, 0x05, 0x96, 0x0e, 0xcf,
- 0x59, 0xc6, 0x24, 0x18, 0x0e, 0xcf, 0x40, 0xc6, 0x05, 0x96, 0x0e, 0xcf,
- 0x51, 0xc6, 0x24, 0x18, 0x0e, 0xcf, 0x38, 0xca, 0x97, 0x34, 0x0e, 0xcb,
- 0x49, 0x49, 0x43, 0xd5, 0x43, 0x5b, 0x31, 0x46, 0x20, 0x8c, 0xc3, 0x5b,
- 0x46, 0x48, 0xbc, 0x7d, 0x43, 0x5b, 0x52, 0x46, 0x20, 0x8c, 0xc3, 0x5b,
- 0x5e, 0x48, 0xbc, 0x7d, 0x43, 0x5b, 0x70, 0xc8, 0xbf, 0x2d, 0x0e, 0xce,
- 0xc9, 0xc5, 0x17, 0xef, 0x0e, 0xce, 0xbb, 0x03, 0x5b, 0x7c, 0xc6, 0x06,
- 0x1b, 0x0e, 0xce, 0xb1, 0xc5, 0x04, 0x73, 0x0e, 0xce, 0xa9, 0x48, 0x1f,
- 0x2f, 0x43, 0x5b, 0x82, 0xc5, 0x17, 0xef, 0x0e, 0xcb, 0xb1, 0xc6, 0x06,
- 0x1b, 0x0e, 0xcb, 0xa9, 0xc5, 0x04, 0x73, 0x0e, 0xcb, 0xa0, 0xc5, 0x17,
- 0xef, 0x0e, 0xcb, 0xd1, 0xc6, 0x06, 0x1b, 0x0e, 0xcb, 0xc9, 0xc5, 0x04,
- 0x73, 0x0e, 0xcb, 0xc0, 0xca, 0x97, 0x34, 0x0e, 0xcb, 0x91, 0xc8, 0x52,
- 0x03, 0x0e, 0xcb, 0x88, 0xcb, 0x97, 0x33, 0x0e, 0xcb, 0x68, 0xc6, 0x05,
- 0x96, 0x0e, 0xcf, 0x91, 0xc6, 0x24, 0x18, 0x0e, 0xcf, 0x88, 0xc6, 0x05,
- 0x96, 0x0e, 0xcf, 0x71, 0xc6, 0x24, 0x18, 0x0e, 0xcf, 0x68, 0x4e, 0x6d,
- 0x99, 0xc3, 0x5b, 0x8e, 0x48, 0x1f, 0x2f, 0xc3, 0x5b, 0xa0, 0x46, 0x0d,
- 0xe6, 0x43, 0x5b, 0xac, 0xc6, 0x05, 0x96, 0x0e, 0xcf, 0x31, 0xc6, 0x24,
- 0x18, 0x0e, 0xcf, 0x20, 0xc6, 0x05, 0x96, 0x0e, 0xcf, 0x29, 0xc6, 0x24,
- 0x18, 0x0e, 0xcf, 0x18, 0xc5, 0xd8, 0xcc, 0x0e, 0xcd, 0x79, 0xca, 0xa2,
- 0x70, 0x0e, 0xcd, 0x40, 0xc7, 0x05, 0x95, 0x0e, 0xcc, 0xc0, 0xc5, 0xd8,
- 0xcc, 0x0e, 0xcd, 0x71, 0xca, 0xa2, 0x70, 0x0e, 0xcd, 0x38, 0x00, 0xc3,
- 0x5b, 0xb8, 0x48, 0xba, 0x1d, 0x43, 0x5b, 0xc8, 0xc5, 0x17, 0xef, 0x0e,
- 0xca, 0x09, 0xc6, 0x06, 0x1b, 0x0e, 0xca, 0x01, 0xc5, 0x04, 0x73, 0x0e,
- 0xc9, 0xf8, 0xc8, 0x5b, 0x59, 0x0e, 0xc9, 0xf1, 0xc5, 0x17, 0xef, 0x0e,
- 0xc9, 0xe9, 0xc6, 0x06, 0x1b, 0x0e, 0xc9, 0xe1, 0xc5, 0x04, 0x73, 0x0e,
- 0xc9, 0xd8, 0xca, 0x97, 0x34, 0x0e, 0xc9, 0x71, 0x49, 0x43, 0xd5, 0x43,
- 0x5b, 0xd4, 0xc5, 0x17, 0xef, 0x0e, 0xca, 0x21, 0xc6, 0x06, 0x1b, 0x0e,
- 0xca, 0x19, 0xc5, 0x04, 0x73, 0x0e, 0xca, 0x10, 0xc5, 0x17, 0xef, 0x0e,
- 0xc9, 0xd1, 0xc6, 0x06, 0x1b, 0x0e, 0xc9, 0xc9, 0xc5, 0x04, 0x73, 0x0e,
- 0xc9, 0xc0, 0xcb, 0x97, 0x33, 0x0e, 0xc9, 0xb8, 0xcb, 0x97, 0x33, 0x0e,
- 0xc9, 0x90, 0xc5, 0x17, 0xef, 0x0e, 0xcb, 0x1b, 0x03, 0x5b, 0xe9, 0xc6,
- 0x06, 0x1b, 0x0e, 0xcb, 0x11, 0xc5, 0x04, 0x73, 0x0e, 0xcb, 0x08, 0xc5,
- 0x17, 0xef, 0x0e, 0xca, 0xfb, 0x03, 0x5b, 0xef, 0xc6, 0x06, 0x1b, 0x0e,
- 0xca, 0xf1, 0xc5, 0x04, 0x73, 0x0e, 0xca, 0xe8, 0xc2, 0x00, 0x15, 0x0e,
- 0xca, 0xe0, 0xc2, 0x00, 0x15, 0x0e, 0xca, 0xc0, 0x4c, 0x86, 0xc8, 0xc3,
- 0x5b, 0xf5, 0xc5, 0x04, 0x73, 0x0e, 0xc9, 0x11, 0xc5, 0x17, 0xef, 0x0e,
- 0xc9, 0x08, 0xc4, 0xe0, 0xa7, 0x0e, 0xd2, 0x61, 0xc8, 0xb7, 0xbd, 0x0e,
- 0xd2, 0x58, 0xc4, 0xe0, 0xa7, 0x0e, 0xd2, 0x49, 0xc8, 0xb7, 0xbd, 0x0e,
- 0xd2, 0x40, 0xcf, 0x67, 0xf4, 0x08, 0xae, 0xb9, 0xce, 0x6f, 0xd7, 0x08,
- 0xae, 0xb1, 0xc4, 0x5c, 0x62, 0x08, 0xae, 0xa8, 0xcd, 0x41, 0x2f, 0x08,
- 0xae, 0x91, 0x49, 0xb5, 0x43, 0x43, 0x5c, 0x01, 0xd0, 0x5b, 0x62, 0x08,
- 0xae, 0x71, 0xd0, 0x5e, 0x32, 0x08, 0xae, 0x69, 0xc9, 0x41, 0x33, 0x08,
+ 0x4d, 0xa0, 0x83, 0x08, 0x4d, 0x91, 0xc2, 0x01, 0x0e, 0x08, 0x4d, 0x68,
+ 0x87, 0x08, 0x4d, 0x89, 0x83, 0x08, 0x4d, 0x78, 0xc9, 0x89, 0x1c, 0x08,
+ 0x4d, 0x80, 0x87, 0x08, 0x4d, 0x51, 0x83, 0x08, 0x4d, 0x48, 0x49, 0x3b,
+ 0xea, 0xc3, 0x4a, 0x46, 0x4a, 0x2c, 0xb1, 0xc3, 0x4a, 0x52, 0x49, 0x45,
+ 0xd4, 0xc3, 0x4a, 0x5e, 0x47, 0x54, 0x55, 0x43, 0x4a, 0x6a, 0x15, 0xc3,
+ 0x4a, 0x76, 0xc2, 0x01, 0x04, 0x00, 0xeb, 0xc1, 0xc9, 0xac, 0x7c, 0x05,
+ 0x34, 0xe0, 0x99, 0x00, 0xea, 0x11, 0x97, 0x00, 0xea, 0x09, 0x96, 0x00,
+ 0xea, 0x01, 0x94, 0x00, 0xe9, 0xfb, 0x03, 0x4a, 0x82, 0x92, 0x00, 0xe9,
+ 0xf1, 0x91, 0x00, 0xe9, 0xe3, 0x03, 0x4a, 0x88, 0x90, 0x00, 0xe9, 0xd1,
+ 0x8f, 0x00, 0xe9, 0xc9, 0x8e, 0x00, 0xe9, 0xc1, 0x8d, 0x00, 0xe9, 0xb9,
+ 0x8c, 0x00, 0xe9, 0xb1, 0x8b, 0x00, 0xe9, 0xa9, 0x8a, 0x00, 0xe9, 0xa3,
+ 0x03, 0x4a, 0x8c, 0x89, 0x00, 0xe9, 0x99, 0x87, 0x00, 0xe9, 0x89, 0x86,
+ 0x00, 0xe9, 0x81, 0x84, 0x00, 0xe9, 0x73, 0x03, 0x4a, 0x92, 0x83, 0x00,
+ 0xe9, 0x63, 0x03, 0x4a, 0x98, 0x85, 0x05, 0x3f, 0x91, 0x88, 0x05, 0x3f,
+ 0x99, 0x93, 0x05, 0x3f, 0xa1, 0x98, 0x01, 0x63, 0xe8, 0x43, 0x01, 0xf4,
+ 0xc3, 0x4a, 0x9c, 0x44, 0x11, 0x27, 0x43, 0x4a, 0xb4, 0xcf, 0x64, 0xa1,
+ 0x00, 0x16, 0x91, 0xce, 0x0f, 0xa9, 0x00, 0x16, 0x98, 0x96, 0x00, 0xea,
+ 0xbb, 0x03, 0x4a, 0xcc, 0x87, 0x00, 0xea, 0x4b, 0x03, 0x4a, 0xf9, 0x9c,
+ 0x00, 0xed, 0xdb, 0x03, 0x4b, 0x11, 0x98, 0x00, 0xea, 0xdb, 0x03, 0x4b,
+ 0x17, 0x85, 0x00, 0xec, 0xe3, 0x03, 0x4b, 0x1d, 0x97, 0x00, 0xea, 0xc3,
+ 0x03, 0x4b, 0x35, 0x95, 0x00, 0x17, 0x13, 0x03, 0x4b, 0x3f, 0x92, 0x00,
+ 0xea, 0xb3, 0x03, 0x4b, 0x4f, 0x84, 0x00, 0xea, 0x3b, 0x03, 0x4b, 0x55,
+ 0x47, 0x02, 0x16, 0xc3, 0x4b, 0x6d, 0x8f, 0x00, 0xea, 0x83, 0x03, 0x4b,
+ 0x79, 0x8e, 0x00, 0x17, 0x0b, 0x03, 0x4b, 0x7f, 0x8c, 0x00, 0x15, 0x93,
+ 0x03, 0x4b, 0xa0, 0x0b, 0xc3, 0x4b, 0xa6, 0x86, 0x00, 0xea, 0x43, 0x03,
+ 0x4b, 0xb2, 0x88, 0x00, 0xed, 0x03, 0x03, 0x4b, 0xce, 0x94, 0x00, 0x15,
+ 0x9b, 0x03, 0x4b, 0xd4, 0x89, 0x00, 0xea, 0x6b, 0x03, 0x4b, 0xe6, 0x83,
+ 0x00, 0xea, 0x1b, 0x03, 0x4b, 0xf8, 0x91, 0x00, 0xea, 0x93, 0x03, 0x4c,
+ 0x08, 0x8d, 0x00, 0xea, 0x79, 0x8a, 0x00, 0x15, 0x83, 0x03, 0x4c, 0x14,
+ 0x99, 0x00, 0x15, 0xb9, 0x9b, 0x00, 0x15, 0xc1, 0x9a, 0x00, 0x17, 0x19,
+ 0x93, 0x08, 0x3d, 0x28, 0xd5, 0x34, 0x8a, 0x08, 0x3c, 0x11, 0xd0, 0x34,
+ 0x8f, 0x08, 0x3c, 0x08, 0xc9, 0x3b, 0x22, 0x05, 0x39, 0x01, 0xc8, 0x9f,
+ 0x18, 0x05, 0x39, 0x08, 0xc3, 0x72, 0x32, 0x00, 0x17, 0xe9, 0xcf, 0x6a,
+ 0x41, 0x05, 0x3c, 0x50, 0xc4, 0x32, 0x64, 0x05, 0x5b, 0x59, 0xc9, 0x0f,
+ 0xae, 0x00, 0x15, 0xf1, 0xc9, 0x00, 0x9e, 0x00, 0x16, 0x18, 0x47, 0x11,
+ 0xaf, 0xc3, 0x4c, 0x23, 0x16, 0x43, 0x4c, 0x32, 0xc8, 0x4b, 0xd2, 0x05,
+ 0x38, 0xd9, 0xca, 0x3b, 0x72, 0x05, 0x38, 0xe1, 0xd0, 0x0f, 0x62, 0x05,
+ 0x38, 0xe9, 0xd9, 0x1d, 0xf3, 0x05, 0x38, 0xf1, 0xc5, 0x34, 0x9a, 0x00,
+ 0x17, 0xc0, 0xc4, 0x32, 0x64, 0x05, 0x5b, 0x51, 0xc9, 0x0f, 0xae, 0x00,
+ 0x15, 0xf9, 0xc9, 0x00, 0x9e, 0x00, 0x16, 0x10, 0x00, 0xc3, 0x4c, 0x38,
+ 0xd5, 0x32, 0xd1, 0x05, 0x38, 0xd0, 0xcc, 0x26, 0x18, 0x08, 0x3d, 0x98,
+ 0xc9, 0x3b, 0x22, 0x00, 0x17, 0xc9, 0xc8, 0x9f, 0x18, 0x00, 0x17, 0xd8,
+ 0x45, 0x02, 0x93, 0xc3, 0x4c, 0x78, 0x43, 0x0a, 0xe5, 0xc3, 0x4c, 0x84,
+ 0x42, 0x00, 0xd0, 0x43, 0x4c, 0x90, 0xc9, 0x00, 0x9e, 0x00, 0x16, 0x21,
+ 0xc4, 0x32, 0x64, 0x00, 0x16, 0xa0, 0x06, 0xc3, 0x4c, 0xa2, 0xc8, 0x64,
+ 0xcf, 0x00, 0x16, 0xb8, 0x47, 0xc8, 0xef, 0xc3, 0x4c, 0xac, 0x46, 0xd1,
+ 0xba, 0x43, 0x4c, 0xbe, 0xc9, 0x3b, 0x22, 0x00, 0x17, 0xd1, 0xc8, 0x9f,
+ 0x18, 0x00, 0x17, 0xe0, 0x47, 0x19, 0x80, 0xc3, 0x4c, 0xd0, 0xd2, 0x4a,
+ 0xe8, 0x05, 0x38, 0x99, 0xc8, 0x4a, 0xf2, 0x00, 0x17, 0x30, 0xc4, 0x15,
+ 0xa7, 0x08, 0xb2, 0xb9, 0xc2, 0x22, 0x45, 0x08, 0xb2, 0xb0, 0xc3, 0x0d,
+ 0x8f, 0x08, 0xb2, 0xa9, 0xc3, 0x08, 0xde, 0x08, 0xb2, 0xa0, 0xc4, 0x05,
+ 0xde, 0x08, 0xb2, 0x99, 0xc2, 0x0a, 0x20, 0x08, 0xb2, 0x90, 0x8e, 0x08,
+ 0xb1, 0xc0, 0x94, 0x08, 0xb1, 0xb0, 0x8e, 0x08, 0xb0, 0x43, 0x03, 0x4c,
+ 0xdc, 0x94, 0x08, 0xb0, 0x32, 0x03, 0x4c, 0xe0, 0xc2, 0x01, 0x0e, 0x08,
+ 0xb0, 0xd9, 0x83, 0x08, 0xb0, 0xd0, 0xc2, 0x01, 0x0e, 0x08, 0xb0, 0xc9,
+ 0x83, 0x08, 0xb0, 0xc0, 0xc3, 0x3e, 0xcd, 0x00, 0xc5, 0x51, 0xc3, 0x24,
+ 0x3f, 0x00, 0xc5, 0x41, 0x1c, 0xc3, 0x4c, 0xe4, 0x05, 0xc3, 0x4c, 0xee,
+ 0xc3, 0x1d, 0x55, 0x00, 0xc5, 0x11, 0x06, 0xc3, 0x4c, 0xf8, 0x16, 0xc3,
+ 0x4d, 0x04, 0xc3, 0xec, 0x54, 0x00, 0xc4, 0xe9, 0xc3, 0x7a, 0xa3, 0x00,
+ 0xc4, 0xd9, 0xc3, 0x91, 0x7b, 0x00, 0xc4, 0xd0, 0x83, 0x00, 0xc4, 0x8b,
+ 0x03, 0x4d, 0x0e, 0xc2, 0x0c, 0x25, 0x00, 0xc4, 0x70, 0xc2, 0x1a, 0x36,
+ 0x00, 0xc5, 0x39, 0x97, 0x00, 0xc5, 0x30, 0x8a, 0x00, 0xc4, 0xb9, 0xcb,
+ 0x96, 0x56, 0x00, 0xc4, 0x00, 0x83, 0x00, 0xc4, 0xb1, 0xc2, 0x01, 0x0e,
+ 0x00, 0xc4, 0xa8, 0xc2, 0x01, 0x0e, 0x00, 0xc4, 0x99, 0x83, 0x00, 0xc4,
+ 0x90, 0x83, 0x00, 0xc4, 0x81, 0x16, 0xc3, 0x4d, 0x1a, 0xcb, 0x8f, 0x60,
+ 0x00, 0xc4, 0x30, 0xc2, 0x01, 0x01, 0x00, 0xc4, 0x79, 0xc2, 0x07, 0x69,
+ 0x00, 0xc4, 0x50, 0xcf, 0x64, 0x56, 0x00, 0xc4, 0x20, 0x48, 0xb7, 0xf3,
+ 0xc3, 0x4d, 0x24, 0xc2, 0x00, 0x56, 0x00, 0xc2, 0x50, 0xc2, 0x05, 0x5c,
+ 0x00, 0xc2, 0xe1, 0x83, 0x00, 0xc2, 0x88, 0xc2, 0x00, 0x54, 0x00, 0xc2,
+ 0xd1, 0x83, 0x00, 0xc2, 0x98, 0x83, 0x00, 0xc2, 0xc0, 0xc2, 0x0e, 0xe5,
+ 0x00, 0xc2, 0xa1, 0x83, 0x00, 0xc2, 0x80, 0x87, 0x00, 0xc2, 0x48, 0x87,
+ 0x00, 0xc2, 0x40, 0xc2, 0x01, 0x0e, 0x00, 0xc3, 0x91, 0x83, 0x00, 0xc3,
+ 0x78, 0xc2, 0x0e, 0xe5, 0x00, 0xc3, 0x71, 0x83, 0x00, 0xc3, 0x40, 0x83,
+ 0x00, 0xc3, 0x68, 0x83, 0x00, 0xc3, 0x60, 0x87, 0x00, 0xc3, 0x00, 0x9b,
+ 0x00, 0xc2, 0xf8, 0xc3, 0x11, 0xb7, 0x0e, 0xb7, 0xd1, 0xc5, 0xdc, 0x84,
+ 0x0e, 0xb7, 0x80, 0xc7, 0x01, 0xb0, 0x0e, 0xb7, 0x98, 0xc3, 0x11, 0xb7,
+ 0x0e, 0xb8, 0xa1, 0xc5, 0xdc, 0x84, 0x0e, 0xb8, 0x50, 0x8c, 0x0e, 0xb5,
+ 0x29, 0x8b, 0x0e, 0xb5, 0x20, 0xc3, 0x0b, 0x47, 0x0e, 0xb6, 0x38, 0x8b,
+ 0x0e, 0xb6, 0x78, 0xc6, 0x12, 0x65, 0x0e, 0xb6, 0xb0, 0xc6, 0x4f, 0xcb,
+ 0x0e, 0xbe, 0x59, 0xc4, 0xde, 0x10, 0x0e, 0xb6, 0x28, 0x0f, 0x43, 0x4d,
+ 0x30, 0xc2, 0x02, 0x29, 0x0e, 0xb6, 0xc9, 0xc2, 0x00, 0x0a, 0x0e, 0xb6,
+ 0xb9, 0x8b, 0x0e, 0xb6, 0x88, 0xc2, 0x00, 0x0a, 0x0e, 0xb6, 0xc0, 0xc2,
+ 0x20, 0xa8, 0x0e, 0xb6, 0xa9, 0xc4, 0x8b, 0xed, 0x0e, 0xb6, 0x48, 0xc4,
+ 0x19, 0x8f, 0x0e, 0xb6, 0xa0, 0xca, 0x94, 0x73, 0x0e, 0xb6, 0x98, 0xc2,
+ 0x03, 0x76, 0x0e, 0xb6, 0x90, 0x97, 0x0e, 0xb6, 0x70, 0x97, 0x0e, 0xb6,
+ 0x68, 0xc4, 0xdc, 0xdf, 0x0e, 0xb6, 0x60, 0xc4, 0x8f, 0x29, 0x0e, 0xb6,
+ 0x58, 0xc3, 0x00, 0xf2, 0x0e, 0xb6, 0x50, 0xc2, 0x00, 0x44, 0x0e, 0xb6,
+ 0x41, 0xc6, 0x12, 0x65, 0x0e, 0xb6, 0x30, 0xc4, 0x33, 0x51, 0x0e, 0xb6,
+ 0x20, 0xc3, 0x0b, 0x47, 0x0e, 0xb6, 0x18, 0xc4, 0xdd, 0x2f, 0x0e, 0xb6,
+ 0x10, 0x9c, 0x0e, 0xa8, 0x19, 0x9b, 0x0e, 0xa8, 0x11, 0x9a, 0x0e, 0xa8,
+ 0x09, 0x99, 0x0e, 0xa8, 0x01, 0x98, 0x0e, 0xa7, 0xf9, 0x97, 0x0e, 0xa7,
+ 0xf1, 0x96, 0x0e, 0xa7, 0xe9, 0x95, 0x0e, 0xa7, 0xe1, 0x94, 0x0e, 0xa7,
+ 0xd9, 0x93, 0x0e, 0xa7, 0xd1, 0x92, 0x0e, 0xa7, 0xc9, 0x91, 0x0e, 0xa7,
+ 0xc1, 0x90, 0x0e, 0xa7, 0xb9, 0x8f, 0x0e, 0xa7, 0xb1, 0x8e, 0x0e, 0xa7,
+ 0xa9, 0x8d, 0x0e, 0xa7, 0xa1, 0x8c, 0x0e, 0xa7, 0x99, 0x8b, 0x0e, 0xa7,
+ 0x91, 0x8a, 0x0e, 0xa7, 0x89, 0x89, 0x0e, 0xa7, 0x81, 0x88, 0x0e, 0xa7,
+ 0x79, 0x87, 0x0e, 0xa7, 0x71, 0x86, 0x0e, 0xa7, 0x69, 0x85, 0x0e, 0xa7,
+ 0x61, 0x84, 0x0e, 0xa7, 0x59, 0x83, 0x0e, 0xa7, 0x50, 0x9c, 0x0e, 0xa7,
+ 0x49, 0x9b, 0x0e, 0xa7, 0x41, 0x9a, 0x0e, 0xa7, 0x39, 0x99, 0x0e, 0xa7,
+ 0x31, 0x98, 0x0e, 0xa7, 0x29, 0x97, 0x0e, 0xa7, 0x21, 0x96, 0x0e, 0xa7,
+ 0x19, 0x95, 0x0e, 0xa7, 0x11, 0x94, 0x0e, 0xa7, 0x09, 0x93, 0x0e, 0xa7,
+ 0x01, 0x92, 0x0e, 0xa6, 0xf9, 0x91, 0x0e, 0xa6, 0xf1, 0x90, 0x0e, 0xa6,
+ 0xe9, 0x8f, 0x0e, 0xa6, 0xe1, 0x8e, 0x0e, 0xa6, 0xd9, 0x8d, 0x0e, 0xa6,
+ 0xd1, 0x8c, 0x0e, 0xa6, 0xc9, 0x8b, 0x0e, 0xa6, 0xc1, 0x8a, 0x0e, 0xa6,
+ 0xb9, 0x89, 0x0e, 0xa6, 0xb1, 0x88, 0x0e, 0xa6, 0xa9, 0x87, 0x0e, 0xa6,
+ 0xa1, 0x86, 0x0e, 0xa6, 0x99, 0x85, 0x0e, 0xa6, 0x91, 0x84, 0x0e, 0xa6,
+ 0x89, 0x83, 0x0e, 0xa6, 0x80, 0xc3, 0x11, 0xb7, 0x0e, 0xb6, 0x01, 0xc5,
+ 0xdc, 0x84, 0x0e, 0xb5, 0xb0, 0xc7, 0x01, 0xb0, 0x0e, 0xb5, 0xc8, 0x0f,
+ 0x43, 0x4d, 0x3c, 0xc2, 0x02, 0x29, 0x0e, 0xba, 0x69, 0xc2, 0x00, 0x0a,
+ 0x0e, 0xba, 0x59, 0x8b, 0x0e, 0xba, 0x28, 0xc2, 0x00, 0x0a, 0x0e, 0xba,
+ 0x60, 0xc6, 0x12, 0x65, 0x0e, 0xba, 0x50, 0xc2, 0x20, 0xa8, 0x0e, 0xba,
+ 0x49, 0xc4, 0x8b, 0xed, 0x0e, 0xb9, 0xe8, 0xc4, 0x19, 0x8f, 0x0e, 0xba,
+ 0x40, 0xca, 0x94, 0x73, 0x0e, 0xba, 0x38, 0xc2, 0x03, 0x76, 0x0e, 0xba,
+ 0x30, 0x8b, 0x0e, 0xba, 0x18, 0x97, 0x0e, 0xba, 0x10, 0x97, 0x0e, 0xba,
+ 0x08, 0xc4, 0xdc, 0xdf, 0x0e, 0xba, 0x00, 0xc4, 0x8f, 0x29, 0x0e, 0xb9,
+ 0xf8, 0xc3, 0x00, 0xf2, 0x0e, 0xb9, 0xf0, 0xc2, 0x00, 0x44, 0x0e, 0xb9,
+ 0xe1, 0xc6, 0x12, 0x65, 0x0e, 0xb9, 0xd0, 0xc3, 0x0b, 0x47, 0x0e, 0xb9,
+ 0xd8, 0xc4, 0xde, 0x10, 0x0e, 0xb9, 0xc8, 0xc4, 0x33, 0x51, 0x0e, 0xb9,
+ 0xc0, 0xc3, 0x0b, 0x47, 0x0e, 0xb9, 0xb8, 0xc4, 0xdd, 0x2f, 0x0e, 0xb9,
+ 0xb0, 0x0f, 0x43, 0x4d, 0x48, 0xc2, 0x02, 0x29, 0x0e, 0xb9, 0x99, 0xc2,
+ 0x00, 0x0a, 0x0e, 0xb9, 0x89, 0x8b, 0x0e, 0xb9, 0x58, 0xc2, 0x00, 0x0a,
+ 0x0e, 0xb9, 0x90, 0xc6, 0x12, 0x65, 0x0e, 0xb9, 0x80, 0xc2, 0x20, 0xa8,
+ 0x0e, 0xb9, 0x79, 0xc4, 0x8b, 0xed, 0x0e, 0xb9, 0x1a, 0x03, 0x4d, 0x54,
+ 0xc4, 0x19, 0x8f, 0x0e, 0xb9, 0x70, 0xc2, 0x03, 0x76, 0x0e, 0xb9, 0x60,
+ 0x8b, 0x0e, 0xb9, 0x48, 0x97, 0x0e, 0xb9, 0x40, 0x97, 0x0e, 0xb9, 0x38,
+ 0xc4, 0xdc, 0xdf, 0x0e, 0xb9, 0x30, 0xc4, 0x8f, 0x29, 0x0e, 0xb9, 0x28,
+ 0xc3, 0x00, 0xf2, 0x0e, 0xb9, 0x20, 0xc2, 0x00, 0x44, 0x0e, 0xb9, 0x11,
+ 0xc6, 0x12, 0x65, 0x0e, 0xb9, 0x00, 0xc3, 0x0b, 0x47, 0x0e, 0xb9, 0x08,
+ 0xc4, 0xde, 0x10, 0x0e, 0xb8, 0xf8, 0xc4, 0x33, 0x51, 0x0e, 0xb8, 0xf0,
+ 0xc3, 0x0b, 0x47, 0x0e, 0xb8, 0xe8, 0xc4, 0xdd, 0x2f, 0x0e, 0xb8, 0xe0,
+ 0xc4, 0x24, 0x35, 0x0e, 0xbf, 0xa9, 0xc5, 0x05, 0x1b, 0x0e, 0xbf, 0xa1,
+ 0x15, 0xc3, 0x4d, 0x5a, 0x08, 0xc3, 0x4d, 0x66, 0x16, 0xc3, 0x4d, 0x72,
+ 0xc3, 0x05, 0x17, 0x0e, 0xbf, 0x69, 0xc4, 0x16, 0x57, 0x0e, 0xbf, 0x60,
+ 0x12, 0xc3, 0x4d, 0x7e, 0xca, 0xa3, 0xf8, 0x0e, 0xbe, 0x41, 0xcc, 0x8f,
+ 0x28, 0x0e, 0xbe, 0x31, 0xcc, 0x8b, 0xec, 0x0e, 0xbe, 0x29, 0xce, 0x12,
+ 0x64, 0x0e, 0xbe, 0x21, 0x46, 0x00, 0x3e, 0xc3, 0x4d, 0x90, 0xc5, 0xdf,
+ 0xef, 0x0e, 0xbd, 0x49, 0x48, 0x01, 0xf7, 0x43, 0x4e, 0x34, 0xc8, 0x9d,
+ 0xb0, 0x0e, 0xbc, 0x79, 0xc9, 0xad, 0x9c, 0x0e, 0xbc, 0x69, 0xd3, 0x41,
+ 0x4d, 0x0e, 0xbc, 0x48, 0x91, 0x0e, 0xaf, 0xe3, 0x03, 0x4e, 0xd5, 0x92,
+ 0x0e, 0xaf, 0xeb, 0x03, 0x4e, 0xd9, 0x85, 0x0e, 0xaf, 0x83, 0x03, 0x4e,
+ 0xe9, 0x97, 0x0e, 0xb0, 0x13, 0x03, 0x4e, 0xef, 0x96, 0x0e, 0xb0, 0x0b,
+ 0x03, 0x4e, 0xf5, 0x95, 0x0e, 0xb0, 0x03, 0x03, 0x4f, 0x01, 0x88, 0x0e,
+ 0xaf, 0x9b, 0x03, 0x4f, 0x07, 0x94, 0x0e, 0xaf, 0xfb, 0x03, 0x4f, 0x0d,
+ 0x9a, 0x0e, 0xb0, 0x2b, 0x03, 0x4f, 0x13, 0x90, 0x0e, 0xaf, 0xdb, 0x03,
+ 0x4f, 0x17, 0x8f, 0x0e, 0xaf, 0xd3, 0x03, 0x4f, 0x1b, 0x8e, 0x0e, 0xaf,
+ 0xcb, 0x03, 0x4f, 0x1f, 0x8d, 0x0e, 0xaf, 0xc3, 0x03, 0x4f, 0x25, 0x8b,
+ 0x0e, 0xaf, 0xb3, 0x03, 0x4f, 0x2b, 0x87, 0x0e, 0xaf, 0x93, 0x03, 0x4f,
+ 0x31, 0x9c, 0x0e, 0xb0, 0x3b, 0x03, 0x4f, 0x3d, 0x86, 0x0e, 0xaf, 0x8b,
+ 0x03, 0x4f, 0x43, 0x89, 0x0e, 0xaf, 0xa3, 0x03, 0x4f, 0x49, 0x84, 0x0e,
+ 0xaf, 0x7b, 0x03, 0x4f, 0x4f, 0x83, 0x0e, 0xaf, 0x73, 0x03, 0x4f, 0x55,
+ 0x9b, 0x0e, 0xb0, 0x31, 0x99, 0x0e, 0xb0, 0x21, 0x98, 0x0e, 0xb0, 0x19,
+ 0x93, 0x0e, 0xaf, 0xf1, 0x8c, 0x0e, 0xaf, 0xb9, 0x8a, 0x0e, 0xaf, 0xa8,
+ 0x91, 0x0e, 0xaf, 0x13, 0x03, 0x4f, 0x5b, 0x92, 0x0e, 0xaf, 0x1b, 0x03,
+ 0x4f, 0x5f, 0x85, 0x0e, 0xae, 0xb3, 0x03, 0x4f, 0x6f, 0x97, 0x0e, 0xaf,
+ 0x43, 0x03, 0x4f, 0x75, 0x96, 0x0e, 0xaf, 0x3b, 0x03, 0x4f, 0x7b, 0x95,
+ 0x0e, 0xaf, 0x33, 0x03, 0x4f, 0x8a, 0x94, 0x0e, 0xaf, 0x2b, 0x03, 0x4f,
+ 0x90, 0x9a, 0x0e, 0xaf, 0x5b, 0x03, 0x4f, 0x96, 0x90, 0x0e, 0xaf, 0x0b,
+ 0x03, 0x4f, 0x9a, 0x8f, 0x0e, 0xaf, 0x03, 0x03, 0x4f, 0x9e, 0x8e, 0x0e,
+ 0xae, 0xfb, 0x03, 0x4f, 0xa2, 0x8d, 0x0e, 0xae, 0xf3, 0x03, 0x4f, 0xa8,
+ 0x8b, 0x0e, 0xae, 0xe3, 0x03, 0x4f, 0xae, 0x87, 0x0e, 0xae, 0xc3, 0x03,
+ 0x4f, 0xb4, 0x9c, 0x0e, 0xaf, 0x6b, 0x03, 0x4f, 0xc0, 0x86, 0x0e, 0xae,
+ 0xbb, 0x03, 0x4f, 0xc6, 0x89, 0x0e, 0xae, 0xd3, 0x03, 0x4f, 0xcc, 0x84,
+ 0x0e, 0xae, 0xab, 0x03, 0x4f, 0xd2, 0x83, 0x0e, 0xae, 0xa3, 0x03, 0x4f,
+ 0xd8, 0x9b, 0x0e, 0xaf, 0x61, 0x99, 0x0e, 0xaf, 0x51, 0x98, 0x0e, 0xaf,
+ 0x49, 0x93, 0x0e, 0xaf, 0x21, 0x8c, 0x0e, 0xae, 0xe9, 0x8a, 0x0e, 0xae,
+ 0xd9, 0x88, 0x0e, 0xae, 0xc8, 0xc4, 0x15, 0xa7, 0x0e, 0xbf, 0x49, 0xc2,
+ 0x22, 0x45, 0x0e, 0xbf, 0x40, 0xc3, 0x0d, 0x8f, 0x0e, 0xbf, 0x39, 0xc3,
+ 0x08, 0xde, 0x0e, 0xbf, 0x30, 0xc4, 0x05, 0xde, 0x0e, 0xbf, 0x29, 0xc2,
+ 0x0a, 0x20, 0x0e, 0xbf, 0x20, 0x9c, 0x0e, 0xb1, 0xd9, 0x9b, 0x0e, 0xb1,
+ 0xd1, 0x9a, 0x0e, 0xb1, 0xc9, 0x99, 0x0e, 0xb1, 0xc1, 0x98, 0x0e, 0xb1,
+ 0xb9, 0x97, 0x0e, 0xb1, 0xb1, 0x96, 0x0e, 0xb1, 0xa9, 0x95, 0x0e, 0xb1,
+ 0xa1, 0x94, 0x0e, 0xb1, 0x99, 0x93, 0x0e, 0xb1, 0x91, 0x92, 0x0e, 0xb1,
+ 0x89, 0x91, 0x0e, 0xb1, 0x81, 0x90, 0x0e, 0xb1, 0x79, 0x8f, 0x0e, 0xb1,
+ 0x71, 0x8e, 0x0e, 0xb1, 0x69, 0x8d, 0x0e, 0xb1, 0x61, 0x8c, 0x0e, 0xb1,
+ 0x59, 0x8b, 0x0e, 0xb1, 0x51, 0x8a, 0x0e, 0xb1, 0x49, 0x89, 0x0e, 0xb1,
+ 0x41, 0x88, 0x0e, 0xb1, 0x39, 0x87, 0x0e, 0xb1, 0x31, 0x86, 0x0e, 0xb1,
+ 0x29, 0x85, 0x0e, 0xb1, 0x21, 0x84, 0x0e, 0xb1, 0x19, 0x83, 0x0e, 0xb1,
+ 0x10, 0x9c, 0x0e, 0xb1, 0x09, 0x9b, 0x0e, 0xb1, 0x01, 0x9a, 0x0e, 0xb0,
+ 0xf9, 0x99, 0x0e, 0xb0, 0xf1, 0x98, 0x0e, 0xb0, 0xe9, 0x97, 0x0e, 0xb0,
+ 0xe1, 0x96, 0x0e, 0xb0, 0xd9, 0x95, 0x0e, 0xb0, 0xd1, 0x94, 0x0e, 0xb0,
+ 0xc9, 0x93, 0x0e, 0xb0, 0xc1, 0x92, 0x0e, 0xb0, 0xb9, 0x91, 0x0e, 0xb0,
+ 0xb1, 0x90, 0x0e, 0xb0, 0xa9, 0x8f, 0x0e, 0xb0, 0xa1, 0x8e, 0x0e, 0xb0,
+ 0x99, 0x8d, 0x0e, 0xb0, 0x91, 0x8c, 0x0e, 0xb0, 0x89, 0x8b, 0x0e, 0xb0,
+ 0x81, 0x8a, 0x0e, 0xb0, 0x79, 0x89, 0x0e, 0xb0, 0x71, 0x88, 0x0e, 0xb0,
+ 0x69, 0x87, 0x0e, 0xb0, 0x61, 0x86, 0x0e, 0xb0, 0x59, 0x85, 0x0e, 0xb0,
+ 0x51, 0x84, 0x0e, 0xb0, 0x49, 0x83, 0x0e, 0xb0, 0x40, 0xc2, 0x01, 0x0e,
+ 0x08, 0xe5, 0x19, 0x83, 0x08, 0xe5, 0x10, 0x94, 0x00, 0x6b, 0x00, 0x8e,
+ 0x00, 0x6b, 0x08, 0x8f, 0x00, 0x6a, 0xa1, 0x9b, 0x00, 0x6a, 0xa9, 0x8e,
+ 0x00, 0x6b, 0xeb, 0x03, 0x4f, 0xde, 0x90, 0x00, 0x6b, 0xdb, 0x03, 0x4f,
+ 0xe5, 0xc2, 0x00, 0x63, 0x00, 0x6b, 0xe1, 0x8d, 0x00, 0x6b, 0xf8, 0xc2,
+ 0x01, 0x0e, 0x08, 0x8b, 0x09, 0x83, 0x08, 0x8b, 0x00, 0xc2, 0x01, 0x0e,
+ 0x08, 0x8a, 0xf9, 0x83, 0x08, 0x8a, 0xf0, 0xc4, 0x5c, 0x89, 0x0e, 0x8f,
+ 0x51, 0x46, 0xd3, 0x52, 0x43, 0x4f, 0xe9, 0xc3, 0x03, 0x77, 0x0e, 0x8f,
+ 0x49, 0xc8, 0xc0, 0x3b, 0x0e, 0x8e, 0xb3, 0x03, 0x50, 0x0f, 0x46, 0x0c,
+ 0x9d, 0xc3, 0x50, 0x15, 0x07, 0xc3, 0x50, 0x1f, 0xc5, 0xe2, 0x06, 0x0e,
+ 0x8c, 0x69, 0x0b, 0xc3, 0x50, 0x2b, 0x0a, 0x43, 0x50, 0x43, 0x07, 0xc3,
+ 0x50, 0x4f, 0x11, 0xc3, 0x50, 0x5b, 0xc4, 0xe7, 0x83, 0x0e, 0x8c, 0x79,
+ 0xd3, 0x41, 0x01, 0x0e, 0x8a, 0xb1, 0xcc, 0x87, 0xc0, 0x0e, 0x8a, 0x20,
+ 0xc7, 0xcd, 0xbf, 0x0e, 0x8e, 0xc3, 0x03, 0x50, 0x6a, 0x46, 0xd2, 0x86,
+ 0xc3, 0x50, 0x70, 0xc3, 0x02, 0x9f, 0x0e, 0x8c, 0xbb, 0x03, 0x50, 0x7c,
+ 0x94, 0x0e, 0x8c, 0xb3, 0x03, 0x50, 0x80, 0x0a, 0xc3, 0x50, 0x86, 0xcd,
+ 0x77, 0xe7, 0x0e, 0x88, 0xb8, 0x0e, 0xc3, 0x50, 0x92, 0x14, 0xc3, 0x50,
+ 0x9c, 0x11, 0xc3, 0x50, 0xa8, 0xd0, 0x5d, 0x4f, 0x0e, 0x8a, 0x29, 0xc7,
+ 0xcc, 0xd8, 0x0e, 0x89, 0xa9, 0xc5, 0xd3, 0x35, 0x0e, 0x89, 0x09, 0xc6,
+ 0xd7, 0x54, 0x0e, 0x88, 0x98, 0xc4, 0x09, 0x6e, 0x0e, 0x8e, 0x99, 0xcc,
+ 0x8a, 0x48, 0x0e, 0x8a, 0xb8, 0x14, 0xc3, 0x50, 0xb2, 0x49, 0xb4, 0x4a,
+ 0xc3, 0x50, 0xbe, 0xc5, 0xd3, 0x35, 0x0e, 0x88, 0xf2, 0x03, 0x50, 0xca,
+ 0xc5, 0xc4, 0x5f, 0x0e, 0x8d, 0xdb, 0x03, 0x50, 0xd0, 0xc5, 0xca, 0xb6,
+ 0x0e, 0x8d, 0xb1, 0xc4, 0xe6, 0xc3, 0x0e, 0x8c, 0x81, 0x4d, 0x7d, 0x49,
+ 0xc3, 0x50, 0xd4, 0x44, 0x20, 0x33, 0x43, 0x50, 0xe0, 0x14, 0xc3, 0x50,
+ 0xec, 0x45, 0x3e, 0x58, 0x43, 0x50, 0xf6, 0xc4, 0xd3, 0x60, 0x0e, 0x8d,
+ 0xbb, 0x03, 0x51, 0x0e, 0xcf, 0x6a, 0x50, 0x0e, 0x88, 0x30, 0x44, 0xe4,
+ 0xc3, 0xc3, 0x51, 0x12, 0x11, 0xc3, 0x51, 0x1e, 0x0b, 0xc3, 0x51, 0x28,
+ 0x44, 0xb5, 0x3e, 0xc3, 0x51, 0x32, 0xc5, 0xd3, 0x35, 0x0e, 0x89, 0x13,
+ 0x03, 0x51, 0x3e, 0xc6, 0xd7, 0xcc, 0x0e, 0x88, 0x82, 0x03, 0x51, 0x44,
+ 0x03, 0xc3, 0x51, 0x4a, 0x07, 0xc3, 0x51, 0x65, 0x46, 0x02, 0x92, 0xc3,
+ 0x51, 0x71, 0x47, 0xc5, 0x6f, 0x43, 0x51, 0x83, 0xcf, 0x63, 0xcf, 0x0e,
+ 0x8d, 0x99, 0x45, 0xa8, 0x9f, 0x43, 0x51, 0x8f, 0x43, 0x01, 0x1f, 0xc3,
+ 0x51, 0x9b, 0xc9, 0xb4, 0xad, 0x0e, 0x8d, 0x30, 0x43, 0x00, 0x3b, 0xc3,
+ 0x51, 0xad, 0x46, 0x05, 0x1c, 0x43, 0x51, 0xcb, 0xca, 0xa5, 0x92, 0x0e,
+ 0x8d, 0x39, 0xcc, 0x83, 0x94, 0x0e, 0x8a, 0xc9, 0xcd, 0x79, 0x60, 0x0e,
+ 0x8a, 0xc1, 0x47, 0x86, 0x65, 0x43, 0x51, 0xd7, 0x4d, 0x7d, 0x3c, 0xc3,
+ 0x51, 0xe3, 0x10, 0xc3, 0x52, 0x1c, 0x46, 0xbd, 0x4d, 0x43, 0x52, 0x28,
+ 0x0b, 0xc3, 0x52, 0x34, 0x07, 0x43, 0x52, 0x40, 0xc4, 0x00, 0x68, 0x0e,
+ 0x8c, 0x21, 0xc2, 0x02, 0x6a, 0x0e, 0x8c, 0x18, 0x46, 0x15, 0x74, 0xc3,
+ 0x52, 0x4c, 0x4b, 0x97, 0x7f, 0x43, 0x52, 0x5e, 0x43, 0x01, 0xf4, 0xc3,
+ 0x52, 0x6a, 0x45, 0x01, 0xac, 0x43, 0x52, 0x82, 0xc3, 0x82, 0xa4, 0x00,
+ 0x84, 0x59, 0xc3, 0x82, 0xec, 0x00, 0x84, 0x60, 0xc2, 0x01, 0x0e, 0x05,
+ 0x53, 0x71, 0x83, 0x05, 0x53, 0x68, 0x83, 0x05, 0x53, 0x59, 0xc2, 0x1a,
+ 0x36, 0x05, 0x53, 0x28, 0xc2, 0x01, 0x0e, 0x05, 0x53, 0x51, 0x06, 0x43,
+ 0x52, 0x8e, 0xc2, 0x01, 0x0e, 0x05, 0x53, 0x39, 0x83, 0x05, 0x53, 0x30,
+ 0xc2, 0x01, 0x0e, 0x05, 0x53, 0x21, 0x83, 0x05, 0x53, 0x18, 0xc2, 0x01,
+ 0x0e, 0x05, 0x53, 0x11, 0x83, 0x05, 0x53, 0x08, 0xc2, 0x01, 0x0e, 0x05,
+ 0x4f, 0xf1, 0x83, 0x05, 0x4f, 0xe8, 0xc2, 0x01, 0x0e, 0x05, 0x4f, 0xe1,
+ 0x83, 0x05, 0x4f, 0xd9, 0x06, 0x43, 0x52, 0x98, 0xc2, 0x01, 0x01, 0x05,
+ 0x4f, 0x79, 0xc2, 0x1a, 0x36, 0x05, 0x4f, 0x38, 0xc2, 0x01, 0x0e, 0x05,
+ 0x4f, 0x61, 0x83, 0x05, 0x4f, 0x58, 0xc2, 0x01, 0x0e, 0x05, 0x4f, 0x51,
+ 0x83, 0x05, 0x4f, 0x48, 0x04, 0xc3, 0x52, 0xa2, 0x10, 0xc3, 0x52, 0xac,
+ 0xc3, 0xec, 0x54, 0x05, 0x4f, 0x11, 0x83, 0x00, 0x81, 0x11, 0x0d, 0xc3,
+ 0x52, 0xbc, 0x09, 0xc3, 0x52, 0xc6, 0x05, 0xc3, 0x52, 0xd0, 0xc2, 0x05,
+ 0x5c, 0x00, 0x83, 0xc9, 0xc2, 0x0c, 0x25, 0x00, 0x83, 0xd9, 0xc3, 0x16,
+ 0x06, 0x00, 0x83, 0xe9, 0xc2, 0x01, 0xa7, 0x00, 0x83, 0xf1, 0xc3, 0x03,
+ 0x4b, 0x00, 0x84, 0x01, 0xc2, 0x01, 0x0e, 0x00, 0x84, 0x08, 0xce, 0x29,
+ 0x29, 0x0f, 0xd0, 0xa9, 0xdb, 0x15, 0x9a, 0x0f, 0xd1, 0xf8, 0xd2, 0x4d,
+ 0x28, 0x0f, 0xd0, 0x41, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0xc9, 0xdf, 0x0d,
+ 0x7b, 0x0f, 0xd0, 0xe9, 0x16, 0x43, 0x52, 0xda, 0xc7, 0x7d, 0xf8, 0x08,
+ 0xa2, 0x39, 0xc7, 0x10, 0xac, 0x08, 0xa2, 0x20, 0xc5, 0x45, 0xcf, 0x08,
+ 0xa2, 0x29, 0xc4, 0x21, 0x28, 0x08, 0xa2, 0x10, 0x8e, 0x08, 0xa0, 0x48,
+ 0x94, 0x08, 0xa0, 0x38, 0x89, 0x00, 0xce, 0x10, 0xc2, 0x01, 0xeb, 0x00,
+ 0xcd, 0x59, 0x83, 0x00, 0xcc, 0x60, 0xc2, 0x06, 0x8c, 0x00, 0xcd, 0x49,
+ 0x83, 0x00, 0xcc, 0x30, 0xc2, 0x06, 0x8c, 0x00, 0xcd, 0x41, 0x83, 0x00,
+ 0xcc, 0x28, 0xc2, 0x01, 0x0e, 0x00, 0xcc, 0xc1, 0x83, 0x00, 0xcc, 0xb8,
+ 0x83, 0x00, 0xcc, 0x99, 0xc2, 0x07, 0x69, 0x00, 0xcc, 0x38, 0xc2, 0x01,
+ 0x0e, 0x00, 0xcc, 0x91, 0x83, 0x00, 0xcc, 0x89, 0xc2, 0x0e, 0xe5, 0x00,
+ 0xcc, 0x58, 0xc2, 0x01, 0xeb, 0x00, 0xcd, 0x51, 0x83, 0x00, 0xcc, 0x48,
+ 0xc2, 0x06, 0x8c, 0x00, 0xcd, 0x39, 0x83, 0x00, 0xcc, 0x18, 0xc2, 0x06,
+ 0x8c, 0x00, 0xcd, 0x31, 0x83, 0x00, 0xcc, 0x10, 0xc2, 0x01, 0x0e, 0x00,
+ 0xcc, 0xa9, 0x83, 0x00, 0xcc, 0xa0, 0x83, 0x00, 0xcc, 0x81, 0xc2, 0x07,
+ 0x69, 0x00, 0xcc, 0x20, 0xc2, 0x01, 0x0e, 0x00, 0xcc, 0x79, 0x83, 0x00,
+ 0xcc, 0x71, 0xc2, 0x0e, 0xe5, 0x00, 0xcc, 0x40, 0x9b, 0x00, 0xcd, 0xf8,
+ 0x9b, 0x00, 0xcd, 0xf0, 0x9b, 0x00, 0xcd, 0xd8, 0xc3, 0x15, 0xaa, 0x01,
+ 0x27, 0xa1, 0xc3, 0x25, 0x4e, 0x01, 0x27, 0x60, 0x00, 0x43, 0x52, 0xe6,
+ 0x00, 0x43, 0x52, 0xf8, 0xc7, 0x08, 0x19, 0x05, 0x41, 0x81, 0xc4, 0x01,
+ 0x1d, 0x05, 0x41, 0x89, 0xc9, 0x66, 0x90, 0x05, 0x41, 0x99, 0xc6, 0x05,
+ 0x1b, 0x05, 0x41, 0xa0, 0xc8, 0x08, 0x19, 0x05, 0x41, 0x91, 0xca, 0xa7,
+ 0x22, 0x05, 0x41, 0xa8, 0xc7, 0xc5, 0x5a, 0x09, 0xa2, 0xa1, 0xc3, 0x03,
+ 0x02, 0x09, 0xa2, 0x71, 0xc5, 0xdd, 0xa1, 0x09, 0xa2, 0x42, 0x03, 0x53,
+ 0x10, 0xc7, 0xc5, 0x5a, 0x09, 0xa2, 0x99, 0xc5, 0xdd, 0xa1, 0x09, 0xa2,
+ 0x3b, 0x03, 0x53, 0x16, 0xc3, 0x03, 0x02, 0x09, 0xa2, 0x50, 0xc6, 0x07,
+ 0xba, 0x09, 0xa2, 0x89, 0xc3, 0x02, 0x47, 0x09, 0xa2, 0x68, 0xc2, 0x01,
+ 0xe6, 0x0f, 0x3f, 0xf1, 0x8b, 0x0f, 0x3f, 0xe8, 0xc2, 0x01, 0xe6, 0x0f,
+ 0x3f, 0xe1, 0x8b, 0x0f, 0x3f, 0xd8, 0x87, 0x0f, 0x3f, 0xd3, 0x03, 0x53,
+ 0x1c, 0x8b, 0x0f, 0x3f, 0xc0, 0x87, 0x0f, 0x3f, 0xbb, 0x03, 0x53, 0x20,
+ 0x8b, 0x0f, 0x3f, 0xa8, 0xc2, 0x01, 0xe6, 0x0f, 0x3f, 0xa1, 0x8b, 0x0f,
+ 0x3f, 0x98, 0x87, 0x0f, 0x3f, 0x93, 0x03, 0x53, 0x24, 0x8b, 0x0f, 0x3f,
+ 0x80, 0xc2, 0x01, 0xe6, 0x0f, 0x3f, 0x71, 0x8b, 0x0f, 0x3f, 0x68, 0x83,
+ 0x00, 0x98, 0xf8, 0x87, 0x01, 0x6c, 0xa8, 0x87, 0x0f, 0x3f, 0x50, 0x87,
+ 0x0f, 0x3f, 0x20, 0x83, 0x0f, 0x3f, 0x18, 0x91, 0x05, 0x59, 0x31, 0x87,
+ 0x05, 0x59, 0x2b, 0x03, 0x53, 0x28, 0x83, 0x05, 0x59, 0x03, 0x03, 0x53,
+ 0x2c, 0x8b, 0x05, 0x59, 0x11, 0x97, 0x05, 0x59, 0x08, 0x83, 0x01, 0x6d,
+ 0xd8, 0x87, 0x01, 0x6d, 0xe0, 0x87, 0x05, 0x58, 0x60, 0x83, 0x00, 0x92,
+ 0xd8, 0x87, 0x00, 0x92, 0xe0, 0x83, 0x00, 0x96, 0x18, 0x87, 0x00, 0x96,
+ 0x20, 0x83, 0x00, 0x96, 0x83, 0x03, 0x53, 0x30, 0x97, 0x00, 0x96, 0x89,
+ 0x8b, 0x00, 0x96, 0x91, 0x87, 0x00, 0x96, 0xab, 0x03, 0x53, 0x34, 0x91,
+ 0x00, 0x96, 0xb0, 0xd1, 0x4f, 0xf3, 0x01, 0x4f, 0x20, 0xd0, 0x01, 0x37,
+ 0x01, 0x4b, 0x89, 0xce, 0x35, 0xda, 0x01, 0x53, 0x99, 0xc9, 0x65, 0xfa,
+ 0x01, 0x53, 0x89, 0xcf, 0x09, 0x58, 0x01, 0x5a, 0x00, 0xe0, 0x0b, 0x67,
+ 0x01, 0x53, 0xb8, 0xa1, 0x0e, 0x92, 0x09, 0xa0, 0x0e, 0x92, 0x01, 0x9f,
+ 0x0e, 0x91, 0xf9, 0x9e, 0x0e, 0x91, 0xf1, 0x9d, 0x0e, 0x91, 0xe8, 0xa6,
+ 0x0e, 0x91, 0xe1, 0xa5, 0x0e, 0x91, 0xd9, 0xa4, 0x0e, 0x91, 0xd1, 0xa2,
+ 0x0e, 0x91, 0xc9, 0xa0, 0x0e, 0x91, 0xc1, 0x9f, 0x0e, 0x91, 0xb9, 0x9d,
+ 0x0e, 0x91, 0xb0, 0xa6, 0x0e, 0x91, 0xa9, 0xa5, 0x0e, 0x91, 0xa1, 0xa4,
+ 0x0e, 0x91, 0x99, 0xa3, 0x0e, 0x91, 0x91, 0x9f, 0x0e, 0x91, 0x89, 0x9d,
+ 0x0e, 0x91, 0x80, 0xa6, 0x0e, 0x91, 0x79, 0xa4, 0x0e, 0x91, 0x71, 0xa3,
+ 0x0e, 0x91, 0x69, 0xa2, 0x0e, 0x91, 0x61, 0xa1, 0x0e, 0x91, 0x59, 0xa0,
+ 0x0e, 0x91, 0x50, 0xa6, 0x0e, 0x91, 0x49, 0xa5, 0x0e, 0x91, 0x41, 0xa4,
+ 0x0e, 0x91, 0x39, 0xa1, 0x0e, 0x91, 0x31, 0xa0, 0x0e, 0x91, 0x29, 0x9f,
+ 0x0e, 0x91, 0x21, 0x9e, 0x0e, 0x91, 0x18, 0xa1, 0x0e, 0x90, 0xe1, 0xa0,
+ 0x0e, 0x90, 0xd9, 0x9f, 0x0e, 0x90, 0xd1, 0x9e, 0x0e, 0x90, 0xc9, 0x9d,
+ 0x0e, 0x90, 0xc0, 0xa1, 0x0e, 0x90, 0xb9, 0xa0, 0x0e, 0x90, 0xb1, 0x9f,
+ 0x0e, 0x90, 0xa9, 0x9e, 0x0e, 0x90, 0xa1, 0x9d, 0x0e, 0x90, 0x98, 0xa6,
+ 0x0e, 0x90, 0x91, 0xa5, 0x0e, 0x90, 0x89, 0xa4, 0x0e, 0x90, 0x81, 0xa3,
+ 0x0e, 0x90, 0x79, 0xa2, 0x0e, 0x90, 0x71, 0xa1, 0x0e, 0x90, 0x69, 0xa0,
+ 0x0e, 0x90, 0x61, 0x9f, 0x0e, 0x90, 0x59, 0x9e, 0x0e, 0x90, 0x51, 0x9d,
+ 0x0e, 0x90, 0x48, 0xcb, 0x90, 0xcb, 0x00, 0xfe, 0xf9, 0xc4, 0xea, 0x23,
+ 0x00, 0xfe, 0xf1, 0xc5, 0x27, 0x58, 0x00, 0xfe, 0xe8, 0xc4, 0xea, 0x23,
+ 0x00, 0xff, 0x71, 0xc5, 0x27, 0x58, 0x00, 0xff, 0x69, 0xcb, 0x90, 0xcb,
+ 0x00, 0xfe, 0x08, 0xcf, 0x65, 0xdc, 0x08, 0x0b, 0xb0, 0x42, 0x00, 0xeb,
+ 0xc3, 0x53, 0x38, 0xc3, 0x7d, 0x39, 0x00, 0x1d, 0x0b, 0x03, 0x53, 0x4a,
+ 0xc7, 0x78, 0x42, 0x00, 0x1d, 0x2b, 0x03, 0x53, 0x50, 0xc4, 0x27, 0x7e,
+ 0x00, 0x1c, 0xcb, 0x03, 0x53, 0x56, 0x07, 0xc3, 0x53, 0x5c, 0x03, 0xc3,
+ 0x53, 0x6e, 0xc4, 0x8b, 0xed, 0x00, 0x1b, 0x81, 0x12, 0xc3, 0x53, 0x7d,
+ 0xc3, 0xc9, 0xab, 0x00, 0x1b, 0xf9, 0xc4, 0x99, 0xfd, 0x00, 0x1c, 0x91,
+ 0xc5, 0x4f, 0xcc, 0x00, 0x1c, 0x99, 0xc5, 0xde, 0x0f, 0x00, 0x1c, 0xa1,
+ 0xc4, 0xe4, 0x67, 0x00, 0x1c, 0xb1, 0x16, 0xc3, 0x53, 0x93, 0xc5, 0x8f,
+ 0x28, 0x00, 0x1c, 0xd1, 0xc5, 0xdc, 0xde, 0x00, 0x1c, 0xd9, 0xc2, 0x11,
+ 0x89, 0x00, 0x1c, 0xe1, 0xc2, 0x10, 0xac, 0x00, 0x1c, 0xe9, 0xc2, 0x08,
+ 0xe9, 0x00, 0x1c, 0xf1, 0x15, 0xc3, 0x53, 0x9f, 0xc3, 0x11, 0x3f, 0x00,
+ 0x1d, 0x38, 0x42, 0x00, 0xeb, 0xc3, 0x53, 0xb1, 0xc7, 0x78, 0x42, 0x00,
+ 0x1e, 0x2b, 0x03, 0x53, 0xc3, 0xc3, 0x7d, 0x39, 0x00, 0x1e, 0x0b, 0x03,
+ 0x53, 0xc9, 0xc4, 0x27, 0x7e, 0x00, 0x1d, 0xcb, 0x03, 0x53, 0xcf, 0x07,
+ 0xc3, 0x53, 0xd5, 0x03, 0xc3, 0x53, 0xe7, 0xc4, 0x8b, 0xed, 0x00, 0x1b,
+ 0x89, 0xc4, 0x99, 0xfd, 0x00, 0x1d, 0x91, 0xc5, 0x4f, 0xcc, 0x00, 0x1d,
+ 0x99, 0x06, 0xc3, 0x53, 0xf6, 0xc4, 0xe4, 0x67, 0x00, 0x1d, 0xb1, 0x16,
+ 0xc3, 0x54, 0x02, 0x0d, 0xc3, 0x54, 0x0e, 0xc5, 0xdc, 0xde, 0x00, 0x1d,
+ 0xd9, 0xc2, 0x11, 0x89, 0x00, 0x1d, 0xe1, 0xc2, 0x10, 0xac, 0x00, 0x1d,
+ 0xe9, 0xc2, 0x08, 0xe9, 0x00, 0x1d, 0xf1, 0x12, 0xc3, 0x54, 0x1a, 0xcb,
+ 0x94, 0x72, 0x00, 0x1e, 0x11, 0x15, 0xc3, 0x54, 0x30, 0xc3, 0x11, 0x3f,
+ 0x00, 0x1e, 0x38, 0xd3, 0x19, 0x87, 0x00, 0x1b, 0xd9, 0xda, 0x19, 0x80,
+ 0x00, 0x1b, 0xe8, 0xcb, 0x90, 0xcb, 0x00, 0xfe, 0x79, 0xc4, 0xea, 0x23,
+ 0x00, 0xfe, 0x71, 0xc5, 0x27, 0x58, 0x00, 0xfe, 0x68, 0x4d, 0x32, 0x07,
+ 0xc3, 0x54, 0x46, 0xc5, 0xde, 0xa5, 0x00, 0x1e, 0xd1, 0xc4, 0x89, 0x24,
+ 0x00, 0x1f, 0x00, 0xcd, 0x76, 0x95, 0x08, 0x0b, 0xc1, 0xca, 0x74, 0x18,
+ 0x08, 0x0b, 0xf0, 0x44, 0x05, 0x17, 0xc3, 0x54, 0x62, 0x42, 0x02, 0xf8,
+ 0xc3, 0x54, 0x78, 0x44, 0x4f, 0x9e, 0x43, 0x54, 0x8a, 0xd1, 0x51, 0x14,
+ 0x08, 0x0a, 0xc1, 0x48, 0xb8, 0x7b, 0x43, 0x54, 0x9a, 0x48, 0xc2, 0x73,
+ 0xc3, 0x54, 0xac, 0x4a, 0xa3, 0x76, 0x43, 0x54, 0xbf, 0xc3, 0x0a, 0x1f,
+ 0x08, 0x0a, 0xdb, 0x03, 0x54, 0xce, 0xcc, 0x36, 0x8b, 0x08, 0x0b, 0x60,
+ 0xd4, 0x3d, 0x8e, 0x08, 0x0a, 0xe9, 0xd5, 0x36, 0x82, 0x08, 0x0b, 0x78,
+ 0xc6, 0x0e, 0xdf, 0x01, 0x54, 0x01, 0xc5, 0x03, 0x50, 0x01, 0x54, 0x12,
+ 0x03, 0x54, 0xd4, 0xc8, 0x25, 0x71, 0x01, 0x54, 0x71, 0xcf, 0x09, 0xd8,
+ 0x01, 0x54, 0x80, 0xe0, 0x0a, 0x87, 0x01, 0x54, 0xa0, 0x8e, 0x08, 0x9b,
+ 0x08, 0x94, 0x08, 0x9b, 0x00, 0xc6, 0x47, 0x4a, 0x00, 0xe5, 0xf0, 0xc6,
+ 0x47, 0x4a, 0x00, 0x87, 0xf0, 0x97, 0x01, 0x60, 0xf9, 0x8b, 0x01, 0x61,
+ 0x00, 0xc3, 0x89, 0x2d, 0x01, 0x61, 0x60, 0x97, 0x01, 0x62, 0x79, 0x8b,
+ 0x01, 0x62, 0x80, 0xc3, 0x89, 0x2d, 0x01, 0x62, 0xe0, 0x94, 0x00, 0x5b,
+ 0x00, 0x8e, 0x00, 0x5b, 0x08, 0xc7, 0x0d, 0x7f, 0x0f, 0x68, 0xa9, 0xc8,
+ 0x4f, 0xa2, 0x0f, 0x68, 0xf0, 0xc7, 0x0d, 0x7f, 0x0f, 0x68, 0xa1, 0xc8,
+ 0x4f, 0xa2, 0x0f, 0x68, 0xe8, 0xc7, 0x0d, 0x7f, 0x0f, 0x68, 0xb1, 0xc8,
+ 0x4f, 0xa2, 0x0f, 0x68, 0xf8, 0xc7, 0x0d, 0x7f, 0x0f, 0x68, 0xb9, 0xc8,
+ 0x4f, 0xa2, 0x0f, 0x69, 0x00, 0xc4, 0xdd, 0x34, 0x08, 0x7b, 0xd9, 0xc3,
+ 0xe2, 0x62, 0x08, 0x7b, 0xe8, 0xc8, 0x0d, 0x7e, 0x08, 0x79, 0x28, 0x0a,
+ 0xc3, 0x54, 0xda, 0x19, 0xc3, 0x54, 0xe6, 0xc2, 0x01, 0x04, 0x08, 0x79,
+ 0x10, 0xc3, 0x0d, 0x8f, 0x08, 0x79, 0x09, 0xc3, 0x08, 0xde, 0x08, 0x79,
+ 0x00, 0x46, 0x2a, 0xb4, 0xc3, 0x54, 0xf0, 0xc3, 0x5f, 0x3d, 0x08, 0x78,
+ 0xd1, 0x15, 0xc3, 0x55, 0x1d, 0xd0, 0x5d, 0xef, 0x08, 0x78, 0xc1, 0xc2,
+ 0x03, 0x07, 0x08, 0x78, 0xa1, 0x03, 0xc3, 0x55, 0x27, 0xc3, 0x21, 0x00,
+ 0x08, 0x78, 0x71, 0xc3, 0x04, 0xae, 0x08, 0x78, 0x69, 0xc6, 0xd7, 0x12,
+ 0x08, 0x78, 0x61, 0xc4, 0xe5, 0x53, 0x08, 0x78, 0x59, 0xc4, 0x4d, 0x48,
+ 0x08, 0x78, 0x51, 0xc2, 0x00, 0x5b, 0x08, 0x78, 0x2b, 0x03, 0x55, 0x31,
+ 0xc5, 0x4d, 0x42, 0x08, 0x78, 0x41, 0xc3, 0x7c, 0xad, 0x08, 0x78, 0x39,
+ 0xc5, 0x9e, 0xbc, 0x08, 0x78, 0x21, 0xc4, 0xe5, 0xaf, 0x08, 0x78, 0x10,
+ 0xc5, 0x41, 0xc8, 0x08, 0x53, 0xf1, 0xc3, 0x05, 0x17, 0x08, 0x53, 0xe8,
+ 0x0a, 0xc3, 0x55, 0x37, 0xc3, 0x1e, 0x54, 0x08, 0x53, 0xb9, 0xc2, 0x3c,
+ 0xd1, 0x08, 0x53, 0x48, 0x42, 0x01, 0x0e, 0xc3, 0x55, 0x43, 0xc5, 0x43,
+ 0x10, 0x08, 0x53, 0xa8, 0xc4, 0xe6, 0x87, 0x08, 0x53, 0xb1, 0xc4, 0x9e,
+ 0xbd, 0x08, 0x53, 0xa0, 0xc3, 0x11, 0x40, 0x08, 0x53, 0x31, 0x03, 0x43,
+ 0x55, 0x4f, 0xc2, 0x01, 0x47, 0x08, 0x53, 0x10, 0xc3, 0x09, 0x36, 0x08,
+ 0x53, 0x59, 0xc4, 0x9e, 0xc2, 0x08, 0x53, 0x68, 0xc3, 0x00, 0xcd, 0x08,
+ 0x53, 0x89, 0xc2, 0x16, 0x0a, 0x08, 0x53, 0x90, 0xc7, 0x0d, 0x7f, 0x08,
+ 0x67, 0xf1, 0xc8, 0x4f, 0xa2, 0x08, 0x67, 0xf8, 0xcd, 0x80, 0x2e, 0x08,
+ 0x67, 0x89, 0x96, 0x08, 0x67, 0x39, 0x9b, 0x08, 0x66, 0xd1, 0x85, 0x08,
+ 0x66, 0x28, 0x95, 0x08, 0x67, 0x80, 0x8a, 0x08, 0x67, 0x49, 0x95, 0x08,
+ 0x66, 0x30, 0x9b, 0x08, 0x67, 0x40, 0x9c, 0x08, 0x67, 0x28, 0x92, 0x08,
+ 0x67, 0x08, 0x9b, 0x08, 0x66, 0xb8, 0x9b, 0x08, 0x66, 0x70, 0x96, 0x08,
+ 0x65, 0x39, 0x9b, 0x08, 0x64, 0xd1, 0x85, 0x08, 0x64, 0x29, 0xcd, 0x80,
+ 0x2e, 0x08, 0x65, 0x88, 0x9b, 0x08, 0x65, 0x40, 0x9c, 0x08, 0x65, 0x28,
+ 0x92, 0x08, 0x65, 0x08, 0x9b, 0x08, 0x64, 0xb8, 0x9b, 0x08, 0x64, 0x70,
+ 0x95, 0x08, 0x64, 0x31, 0x8a, 0x08, 0x65, 0x48, 0x95, 0x08, 0x65, 0x80,
+ 0x8d, 0x08, 0x60, 0xe0, 0x96, 0x08, 0x62, 0x29, 0x95, 0x08, 0x61, 0xf1,
+ 0x94, 0x08, 0x61, 0xe1, 0x90, 0x08, 0x61, 0x21, 0x8e, 0x08, 0x61, 0x01,
+ 0x8d, 0x08, 0x60, 0xd1, 0x9b, 0x08, 0x60, 0xc1, 0x86, 0x08, 0x60, 0x99,
+ 0x89, 0x08, 0x60, 0x79, 0x84, 0x08, 0x60, 0x58, 0x8a, 0x08, 0x61, 0xf8,
+ 0x85, 0x08, 0x61, 0x41, 0x96, 0x08, 0x61, 0x31, 0x9b, 0x08, 0x61, 0x51,
+ 0x89, 0x08, 0x61, 0x68, 0x96, 0x08, 0x62, 0x31, 0x90, 0x08, 0x61, 0x2b,
+ 0x03, 0x55, 0x5f, 0x8d, 0x08, 0x60, 0xd9, 0x9b, 0x08, 0x60, 0xc9, 0x89,
+ 0x08, 0x60, 0x81, 0x84, 0x08, 0x60, 0x60, 0x96, 0x08, 0x61, 0x39, 0x85,
+ 0x08, 0x61, 0x49, 0x9b, 0x08, 0x61, 0x58, 0x8d, 0x08, 0x60, 0xe8, 0xc2,
+ 0x08, 0x86, 0x08, 0x54, 0xd9, 0xc2, 0x00, 0x2f, 0x08, 0x54, 0xc8, 0x83,
+ 0x08, 0x1d, 0x03, 0x03, 0x55, 0x63, 0x8b, 0x08, 0x1d, 0x09, 0x97, 0x08,
+ 0x1d, 0x11, 0x0d, 0xc3, 0x55, 0x69, 0x09, 0xc3, 0x55, 0x71, 0x1a, 0xc3,
+ 0x55, 0x79, 0xc2, 0x00, 0x2e, 0x08, 0x1d, 0x41, 0x0c, 0xc3, 0x55, 0x83,
+ 0x16, 0xc3, 0x55, 0x8b, 0x06, 0xc3, 0x55, 0x99, 0xc2, 0x07, 0x44, 0x08,
+ 0x1d, 0x89, 0x04, 0xc3, 0x55, 0xa8, 0xc2, 0x01, 0xa7, 0x08, 0x1d, 0x99,
+ 0x10, 0xc3, 0x55, 0xb5, 0x0f, 0xc3, 0x55, 0xbd, 0xc2, 0x1a, 0x36, 0x08,
+ 0x1d, 0xc9, 0x18, 0xc3, 0x55, 0xc9, 0x14, 0xc3, 0x55, 0xd1, 0xc2, 0x00,
+ 0x96, 0x08, 0x1d, 0xf1, 0x15, 0xc3, 0x55, 0xd9, 0xc2, 0x05, 0x5c, 0x08,
+ 0x1e, 0x01, 0xc2, 0x01, 0x0e, 0x08, 0x1e, 0x18, 0xc3, 0x05, 0x17, 0x08,
+ 0x1e, 0x89, 0x16, 0xc3, 0x55, 0xe9, 0xc7, 0x0d, 0x7f, 0x08, 0x1e, 0xa8,
+ 0xc3, 0xd3, 0xc1, 0x08, 0x1a, 0xb1, 0xc3, 0x01, 0xfa, 0x08, 0x1a, 0xc0,
+ 0xc3, 0x8a, 0x3b, 0x08, 0x1b, 0x29, 0xc5, 0xdf, 0x95, 0x08, 0x1b, 0x30,
+ 0x97, 0x08, 0x1b, 0x41, 0x8b, 0x08, 0x1b, 0x80, 0x96, 0x08, 0x1b, 0x88,
+ 0x8a, 0x08, 0x18, 0x71, 0x95, 0x08, 0x18, 0xf8, 0x95, 0x08, 0x18, 0xd8,
+ 0xce, 0x67, 0x90, 0x0e, 0x7d, 0xa1, 0xc8, 0x49, 0x54, 0x0e, 0x7d, 0x98,
+ 0xc7, 0x49, 0x4c, 0x0e, 0x7d, 0xab, 0x03, 0x55, 0xf3, 0xc7, 0x9e, 0x33,
+ 0x0e, 0x7c, 0xa0, 0xce, 0x67, 0x90, 0x0e, 0x7c, 0xc9, 0xc9, 0x92, 0xb1,
+ 0x0e, 0x7c, 0xc0, 0xc9, 0xb1, 0xe6, 0x0e, 0x7d, 0x71, 0xc9, 0x92, 0xb1,
+ 0x0e, 0x7d, 0x69, 0xc8, 0xc1, 0x33, 0x0e, 0x7d, 0x60, 0xca, 0x9e, 0x30,
+ 0x0e, 0x7d, 0x2b, 0x03, 0x55, 0xf7, 0xc9, 0x92, 0xb1, 0x0e, 0x7d, 0x1a,
+ 0x03, 0x55, 0xfd, 0xd6, 0x2c, 0x3d, 0x0e, 0x7d, 0x00, 0xc9, 0x92, 0xb1,
+ 0x0e, 0x7c, 0xeb, 0x03, 0x56, 0x03, 0xca, 0x9e, 0x30, 0x0e, 0x7c, 0xe0,
+ 0xcc, 0x88, 0xd4, 0x0e, 0x7c, 0xf0, 0xc7, 0x92, 0xb3, 0x0e, 0x7c, 0xb1,
+ 0xcb, 0x92, 0xaf, 0x0e, 0x7c, 0xa8, 0xc8, 0x9b, 0x8c, 0x0e, 0x7c, 0x3b,
+ 0x03, 0x56, 0x09, 0xd0, 0x59, 0xdf, 0x0e, 0x7c, 0x71, 0xc5, 0xda, 0x72,
+ 0x0e, 0x7c, 0x69, 0xc7, 0x80, 0xcc, 0x0e, 0x7c, 0x42, 0x03, 0x56, 0x0f,
+ 0xcb, 0x9b, 0xec, 0x0e, 0x7c, 0x60, 0xc6, 0x80, 0xcd, 0x0e, 0x78, 0xd9,
+ 0x4b, 0x91, 0xc8, 0x43, 0x56, 0x15, 0xc5, 0x00, 0x47, 0x0e, 0x78, 0xa9,
+ 0xc4, 0x00, 0xcd, 0x0e, 0x78, 0x48, 0xc8, 0xb9, 0xcb, 0x05, 0x4c, 0x58,
+ 0xc5, 0x00, 0x47, 0x01, 0x2c, 0xe1, 0xc4, 0x00, 0xcd, 0x01, 0x2c, 0xd8,
+ 0xc5, 0x00, 0x47, 0x01, 0x2c, 0xd1, 0xd4, 0x3b, 0xea, 0x01, 0x2c, 0xc8,
+ 0x92, 0x05, 0x22, 0xa1, 0x9a, 0x05, 0x22, 0x90, 0x92, 0x05, 0x22, 0x89,
+ 0x9a, 0x05, 0x22, 0x79, 0x96, 0x05, 0x22, 0x70, 0x9a, 0x05, 0x22, 0x40,
+ 0x9a, 0x05, 0x22, 0x10, 0x9a, 0x05, 0x21, 0xc8, 0x92, 0x05, 0x21, 0xc1,
+ 0x9a, 0x05, 0x21, 0xb1, 0x96, 0x05, 0x21, 0xa8, 0x9a, 0x05, 0x1d, 0x48,
+ 0x9a, 0x05, 0x1d, 0x18, 0x9a, 0x05, 0x17, 0x89, 0x92, 0x05, 0x17, 0x98,
+ 0x9a, 0x05, 0x17, 0xc0, 0x9a, 0x05, 0x18, 0x08, 0x9a, 0x05, 0x18, 0x38,
+ 0x9a, 0x05, 0x03, 0xd1, 0x92, 0x05, 0x03, 0xe0, 0x9a, 0x05, 0x04, 0x48,
+ 0x9a, 0x05, 0x04, 0x78, 0x9a, 0x05, 0x0a, 0xa8, 0x9a, 0x05, 0x0b, 0x30,
+ 0x9a, 0x05, 0x21, 0x58, 0x92, 0x05, 0x21, 0x11, 0x9a, 0x05, 0x21, 0x00,
+ 0x92, 0x05, 0x20, 0xf9, 0x9a, 0x05, 0x20, 0xe9, 0x96, 0x05, 0x20, 0xe0,
+ 0x9a, 0x05, 0x1c, 0x90, 0x9a, 0x05, 0x1c, 0x60, 0x9a, 0x05, 0x1b, 0xf0,
+ 0x9a, 0x05, 0x1e, 0x20, 0x9a, 0x05, 0x1d, 0xf0, 0x92, 0x05, 0x1d, 0x89,
+ 0x9a, 0x05, 0x1d, 0x78, 0x9a, 0x05, 0x1a, 0x20, 0x9a, 0x05, 0x19, 0x71,
+ 0x92, 0x05, 0x19, 0x80, 0x9a, 0x05, 0x1b, 0xd0, 0x9a, 0x05, 0x1b, 0xa0,
+ 0x92, 0x05, 0x1b, 0x41, 0x9a, 0x05, 0x1b, 0x31, 0x96, 0x05, 0x1b, 0x28,
+ 0x92, 0x05, 0x16, 0xb9, 0x9a, 0x05, 0x16, 0xa9, 0x96, 0x05, 0x16, 0xa0,
+ 0x9a, 0x05, 0x17, 0x28, 0x9a, 0x05, 0x17, 0x58, 0x9a, 0x05, 0x1a, 0xf8,
+ 0x9a, 0x05, 0x1a, 0xc8, 0x9a, 0x05, 0x1a, 0x51, 0x92, 0x05, 0x1a, 0x60,
+ 0x96, 0x05, 0x12, 0x51, 0x9a, 0x05, 0x12, 0x59, 0x92, 0x05, 0x12, 0x68,
+ 0x9a, 0x05, 0x04, 0xa9, 0x92, 0x05, 0x04, 0xb8, 0x9a, 0x05, 0x04, 0xe1,
+ 0x92, 0x05, 0x04, 0xf0, 0x9a, 0x05, 0x05, 0x38, 0x9a, 0x05, 0x05, 0x60,
+ 0x96, 0x05, 0x0b, 0x61, 0x9a, 0x05, 0x0b, 0x69, 0x92, 0x05, 0x0b, 0x78,
+ 0x9a, 0x05, 0x0b, 0xa0, 0x9a, 0x05, 0x0c, 0xd9, 0x92, 0x05, 0x0c, 0xe8,
+ 0x9a, 0x05, 0x0d, 0x11, 0x92, 0x05, 0x0d, 0x20, 0x9a, 0x05, 0x0d, 0x78,
+ 0x9a, 0x05, 0x0d, 0xa8, 0x9a, 0x05, 0x12, 0x20, 0x9a, 0x05, 0x11, 0xb1,
+ 0x92, 0x05, 0x11, 0xc0, 0x96, 0x05, 0x02, 0xd1, 0x9a, 0x05, 0x02, 0xd9,
+ 0x92, 0x05, 0x02, 0xe8, 0x9a, 0x05, 0x03, 0x11, 0x92, 0x05, 0x03, 0x20,
+ 0x9a, 0x05, 0x03, 0x80, 0x9a, 0x05, 0x09, 0xd1, 0x92, 0x05, 0x09, 0xe0,
+ 0x9a, 0x05, 0x0a, 0x09, 0x92, 0x05, 0x0a, 0x18, 0x9a, 0x05, 0x0a, 0x78,
+ 0x9a, 0x05, 0x10, 0xb9, 0x92, 0x05, 0x10, 0xc8, 0x96, 0x05, 0x10, 0xf1,
+ 0x9a, 0x05, 0x10, 0xf9, 0x92, 0x05, 0x11, 0x08, 0x9a, 0x05, 0x11, 0x70,
+ 0x97, 0x00, 0xb0, 0xab, 0x03, 0x56, 0x21, 0x8b, 0x00, 0xb0, 0xd0, 0x91,
+ 0x00, 0xae, 0x13, 0x03, 0x56, 0x25, 0x83, 0x00, 0xae, 0x19, 0x8b, 0x00,
+ 0xae, 0x09, 0x87, 0x00, 0xae, 0x00, 0x91, 0x00, 0xac, 0xcb, 0x03, 0x56,
+ 0x29, 0xc2, 0x00, 0xc9, 0x00, 0xc7, 0x51, 0x83, 0x00, 0xac, 0xd1, 0x8b,
+ 0x00, 0xac, 0xc1, 0x87, 0x00, 0xac, 0xb8, 0x83, 0x08, 0xd5, 0xd8, 0x91,
+ 0x08, 0xd5, 0xc8, 0x8b, 0x08, 0xd5, 0xb8, 0x83, 0x08, 0xd5, 0xa8, 0x91,
+ 0x08, 0xd5, 0x98, 0x8b, 0x08, 0xd5, 0x88, 0x83, 0x00, 0xa8, 0x70, 0x10,
+ 0xc3, 0x56, 0x2d, 0x87, 0x00, 0xa2, 0x98, 0x83, 0x00, 0xb1, 0x69, 0x8b,
+ 0x00, 0xb1, 0x61, 0x87, 0x00, 0xb1, 0x53, 0x03, 0x56, 0x39, 0x91, 0x00,
+ 0xb1, 0x49, 0x97, 0x00, 0xb1, 0x40, 0x97, 0x00, 0xb2, 0x41, 0x91, 0x00,
+ 0xb2, 0x49, 0x87, 0x00, 0xb2, 0x53, 0x03, 0x56, 0x3d, 0x8b, 0x00, 0xb2,
+ 0x61, 0x83, 0x00, 0xb2, 0x68, 0x87, 0x00, 0xb0, 0xc0, 0x97, 0x00, 0xb0,
+ 0xe1, 0x91, 0x00, 0xb0, 0xe9, 0x87, 0x00, 0xb0, 0xf3, 0x03, 0x56, 0x41,
+ 0x8b, 0x00, 0xb1, 0x01, 0x83, 0x00, 0xb1, 0x08, 0x83, 0x00, 0xc7, 0x81,
+ 0x97, 0x00, 0xc7, 0x68, 0x83, 0x00, 0xc7, 0x78, 0x87, 0x00, 0xaf, 0x90,
+ 0x83, 0x00, 0xae, 0x49, 0x8b, 0x00, 0xae, 0x41, 0x87, 0x00, 0xae, 0x33,
+ 0x03, 0x56, 0x45, 0x91, 0x00, 0xae, 0x29, 0x97, 0x00, 0xae, 0x20, 0x15,
+ 0xc3, 0x56, 0x49, 0x83, 0x00, 0xaf, 0x39, 0x8b, 0x00, 0xaf, 0x31, 0x87,
+ 0x00, 0xaf, 0x23, 0x03, 0x56, 0x60, 0x91, 0x00, 0xaf, 0x19, 0x97, 0x00,
+ 0xaf, 0x10, 0x83, 0x00, 0xb3, 0x01, 0x8b, 0x00, 0xb2, 0xf9, 0x87, 0x00,
+ 0xb2, 0xeb, 0x03, 0x56, 0x64, 0x91, 0x00, 0xb2, 0xe1, 0x97, 0x00, 0xb2,
+ 0xd8, 0x83, 0x00, 0xaf, 0x09, 0x8b, 0x00, 0xaf, 0x01, 0x87, 0x00, 0xae,
+ 0xf3, 0x03, 0x56, 0x68, 0x91, 0x00, 0xae, 0xe9, 0x97, 0x00, 0xae, 0xe0,
+ 0x0a, 0xc3, 0x56, 0x6c, 0x97, 0x00, 0xb1, 0xd1, 0x91, 0x00, 0xb1, 0xd9,
+ 0x87, 0x00, 0xb1, 0xe3, 0x03, 0x56, 0x83, 0x8b, 0x00, 0xb1, 0xf1, 0x83,
+ 0x00, 0xb1, 0xf8, 0x87, 0x00, 0xb3, 0x20, 0x87, 0x00, 0xb0, 0x88, 0x87,
+ 0x00, 0xb0, 0x58, 0x87, 0x00, 0xb0, 0x28, 0x83, 0x00, 0xb0, 0x01, 0x8b,
+ 0x00, 0xaf, 0xf9, 0x87, 0x00, 0xaf, 0xeb, 0x03, 0x56, 0x87, 0x91, 0x00,
+ 0xaf, 0xe1, 0x97, 0x00, 0xaf, 0xd8, 0x83, 0x00, 0xaf, 0xd1, 0x8b, 0x00,
+ 0xaf, 0xc9, 0x87, 0x00, 0xaf, 0xbb, 0x03, 0x56, 0x8b, 0x91, 0x00, 0xaf,
+ 0xb1, 0x97, 0x00, 0xaf, 0xa8, 0x87, 0x00, 0xaf, 0x58, 0x83, 0x00, 0xae,
+ 0xd9, 0x8b, 0x00, 0xae, 0xd1, 0x87, 0x00, 0xae, 0xc3, 0x03, 0x56, 0x8f,
+ 0x91, 0x00, 0xae, 0xb9, 0x97, 0x00, 0xae, 0xb0, 0x87, 0x00, 0xae, 0x98,
+ 0x87, 0x00, 0xae, 0x68, 0x83, 0x00, 0xb1, 0x99, 0x8b, 0x00, 0xb1, 0x91,
+ 0x87, 0x00, 0xb1, 0x83, 0x03, 0x56, 0x93, 0x91, 0x00, 0xb1, 0x79, 0x97,
+ 0x00, 0xb1, 0x70, 0x87, 0x00, 0xb1, 0x28, 0x87, 0x00, 0xb2, 0x18, 0x87,
+ 0x00, 0xb2, 0x88, 0x97, 0x00, 0xb2, 0xa1, 0x91, 0x00, 0xb2, 0xa9, 0x87,
+ 0x00, 0xb2, 0xb3, 0x03, 0x56, 0x97, 0x8b, 0x00, 0xb2, 0xc1, 0x83, 0x00,
+ 0xb2, 0xc8, 0x83, 0x00, 0xaa, 0x6b, 0x03, 0x56, 0x9b, 0x91, 0x00, 0xaa,
+ 0x53, 0x03, 0x56, 0x9f, 0x87, 0x00, 0xaa, 0x21, 0x19, 0x43, 0x56, 0xa3,
+ 0x83, 0x00, 0xac, 0x69, 0x91, 0x00, 0xac, 0x61, 0x8b, 0x00, 0xac, 0x59,
+ 0x87, 0x00, 0xac, 0x51, 0xc3, 0x14, 0x8f, 0x00, 0xaa, 0x78, 0xc4, 0xe7,
+ 0x93, 0x00, 0xab, 0x49, 0x19, 0x43, 0x56, 0xbc, 0x19, 0x43, 0x56, 0xd5,
+ 0x42, 0x11, 0xc3, 0xc3, 0x56, 0xee, 0x19, 0x43, 0x57, 0x07, 0x19, 0x43,
+ 0x57, 0x20, 0x91, 0x00, 0xa4, 0xcb, 0x03, 0x57, 0x39, 0x8b, 0x00, 0xa4,
+ 0xab, 0x03, 0x57, 0x3d, 0x87, 0x00, 0xa4, 0x99, 0x83, 0x00, 0xa4, 0xea,
+ 0x03, 0x57, 0x41, 0x83, 0x00, 0xa0, 0xc3, 0x03, 0x57, 0x45, 0x91, 0x00,
+ 0xa0, 0x9b, 0x03, 0x57, 0x49, 0x8b, 0x00, 0xa0, 0x7b, 0x03, 0x57, 0x4d,
+ 0x87, 0x00, 0xa0, 0x68, 0x83, 0x00, 0xa3, 0xfb, 0x03, 0x57, 0x51, 0x87,
+ 0x00, 0xa3, 0xa9, 0x8b, 0x00, 0xa3, 0xbb, 0x03, 0x57, 0x55, 0x91, 0x00,
+ 0xa3, 0xda, 0x03, 0x57, 0x59, 0x19, 0x43, 0x57, 0x5d, 0x87, 0x00, 0xa6,
+ 0x51, 0x83, 0x00, 0xa6, 0x62, 0x03, 0x57, 0x76, 0x19, 0xc3, 0x57, 0x7a,
+ 0x83, 0x00, 0xac, 0xf1, 0x91, 0x00, 0xac, 0xe9, 0x8b, 0x00, 0xac, 0xe1,
+ 0x87, 0x00, 0xac, 0xd8, 0xcd, 0x64, 0xa3, 0x00, 0xa1, 0x19, 0xc2, 0x00,
+ 0x56, 0x00, 0xa1, 0x20, 0xc5, 0x34, 0x21, 0x00, 0xa1, 0x29, 0xd6, 0x30,
+ 0xe1, 0x00, 0xa1, 0x30, 0x91, 0x00, 0xc6, 0x68, 0x8b, 0x00, 0xc6, 0x48,
+ 0xc9, 0x0f, 0xa9, 0x07, 0xf1, 0x71, 0xca, 0x01, 0x17, 0x07, 0xf1, 0x78,
+ 0x87, 0x05, 0x34, 0xf9, 0x83, 0x01, 0x6f, 0xe1, 0xc7, 0xcc, 0x53, 0x01,
+ 0x6f, 0xf8, 0x83, 0x01, 0x6f, 0x91, 0xc3, 0x1c, 0x4f, 0x01, 0x6f, 0x98,
+ 0x8b, 0x0f, 0x01, 0x01, 0x97, 0x0f, 0x00, 0xf8, 0xc8, 0xbe, 0x13, 0x0e,
+ 0x92, 0x19, 0xc6, 0xd0, 0xb2, 0x0e, 0x92, 0x10, 0xc2, 0x07, 0x44, 0x08,
+ 0x9b, 0xa1, 0xc2, 0x02, 0x52, 0x08, 0x9b, 0x99, 0xc2, 0x01, 0x01, 0x08,
+ 0x9b, 0x91, 0xc2, 0x06, 0x6b, 0x08, 0x9b, 0x89, 0x83, 0x08, 0x9b, 0x80,
+ 0xc3, 0x22, 0x44, 0x08, 0x9b, 0x61, 0x08, 0xc3, 0x57, 0x95, 0x16, 0xc3,
+ 0x57, 0xa1, 0xc3, 0x05, 0x17, 0x08, 0x9b, 0x39, 0xc4, 0x16, 0x57, 0x08,
+ 0x9b, 0x30, 0xc6, 0x07, 0xa1, 0x00, 0x18, 0x03, 0x03, 0x57, 0xad, 0xc9,
+ 0x2a, 0xe3, 0x00, 0x1a, 0x00, 0x00, 0xc3, 0x57, 0xb3, 0x45, 0x16, 0xa8,
+ 0x43, 0x57, 0xbf, 0xcb, 0x96, 0x14, 0x01, 0x06, 0x89, 0x48, 0xbb, 0x33,
+ 0x43, 0x57, 0xc9, 0xcb, 0x99, 0x9a, 0x00, 0xd6, 0x21, 0xcb, 0x10, 0xb4,
+ 0x00, 0xd6, 0x10, 0x00, 0xc3, 0x57, 0xd5, 0x45, 0x16, 0xa8, 0x43, 0x57,
+ 0xe1, 0xc5, 0x03, 0x50, 0x00, 0x18, 0xd1, 0xc5, 0x00, 0x34, 0x00, 0x1a,
+ 0x48, 0xc5, 0x00, 0x34, 0x00, 0x18, 0xe1, 0xc5, 0x03, 0x50, 0x00, 0x1a,
+ 0x88, 0xc9, 0x1e, 0x89, 0x00, 0xef, 0xa1, 0xdb, 0x17, 0xd1, 0x00, 0xef,
+ 0x80, 0xc9, 0x1e, 0x89, 0x00, 0xef, 0x99, 0xdb, 0x17, 0xd1, 0x00, 0xef,
+ 0x68, 0xc7, 0xa9, 0xe1, 0x00, 0xef, 0x19, 0xc5, 0x00, 0x34, 0x00, 0xee,
+ 0x50, 0x86, 0x00, 0xee, 0xc1, 0x96, 0x00, 0xd6, 0x71, 0x94, 0x00, 0xd6,
+ 0x69, 0x89, 0x00, 0xd6, 0x60, 0xce, 0x3f, 0xd6, 0x01, 0x07, 0x31, 0x45,
+ 0x03, 0x5d, 0x43, 0x57, 0xed, 0xc6, 0x00, 0x33, 0x00, 0xef, 0xe0, 0x49,
+ 0x65, 0xfb, 0xc3, 0x57, 0xf9, 0xd0, 0x5f, 0xbf, 0x00, 0xd5, 0xe0, 0xce,
+ 0x6e, 0xe2, 0x00, 0xd5, 0xc1, 0xc7, 0x7c, 0x99, 0x00, 0x19, 0xf8, 0xc8,
+ 0x64, 0xb7, 0x00, 0x1a, 0xd1, 0xd4, 0x3f, 0x32, 0x00, 0x1b, 0x10, 0xc6,
+ 0x00, 0x33, 0x00, 0x1a, 0xe0, 0xc6, 0x00, 0x33, 0x00, 0x1a, 0xf8, 0x00,
+ 0x43, 0x58, 0x05, 0xc5, 0x00, 0xcc, 0x00, 0xef, 0xd0, 0x00, 0x43, 0x58,
+ 0x11, 0xc4, 0x15, 0xa7, 0x05, 0x47, 0x39, 0xc2, 0x22, 0x45, 0x05, 0x47,
+ 0x30, 0xc3, 0x0d, 0x8f, 0x05, 0x47, 0x29, 0xc3, 0x08, 0xde, 0x05, 0x47,
+ 0x20, 0xc4, 0x05, 0xde, 0x05, 0x47, 0x19, 0xc2, 0x0a, 0x20, 0x05, 0x47,
+ 0x10, 0xc6, 0x00, 0x33, 0x00, 0x19, 0x78, 0xc6, 0x00, 0x33, 0x07, 0xf1,
+ 0x58, 0xc6, 0x00, 0x33, 0x07, 0xf1, 0x60, 0xc3, 0x01, 0xcc, 0x01, 0x65,
+ 0xa9, 0xc3, 0x48, 0x5f, 0x01, 0x65, 0xf9, 0x42, 0x01, 0x0d, 0xc3, 0x58,
+ 0x1d, 0xc3, 0x30, 0xe0, 0x01, 0x66, 0x39, 0x0a, 0xc3, 0x58, 0x29, 0xc6,
+ 0xd5, 0xc8, 0x01, 0x66, 0xb9, 0xc3, 0xeb, 0xd9, 0x01, 0x66, 0xc8, 0xc5,
+ 0xe3, 0x3c, 0x01, 0x66, 0xe9, 0x10, 0xc3, 0x58, 0x3c, 0xc3, 0xeb, 0x76,
+ 0x01, 0x67, 0x18, 0xc3, 0x01, 0xcc, 0x01, 0x65, 0xa1, 0xc3, 0x48, 0x5f,
+ 0x01, 0x65, 0xf1, 0x42, 0x01, 0x0d, 0xc3, 0x58, 0x48, 0xc3, 0x30, 0xe0,
+ 0x01, 0x66, 0x31, 0x0a, 0xc3, 0x58, 0x54, 0xc6, 0xd5, 0xc8, 0x01, 0x66,
+ 0xb1, 0xc3, 0xeb, 0xd9, 0x01, 0x66, 0xc0, 0xc5, 0xe3, 0x3c, 0x01, 0x66,
+ 0xe1, 0x10, 0xc3, 0x58, 0x67, 0xc3, 0xeb, 0x76, 0x01, 0x67, 0x10, 0x46,
+ 0x01, 0xab, 0x43, 0x58, 0x73, 0xc2, 0x00, 0x39, 0x01, 0x93, 0x70, 0xc2,
+ 0x00, 0x39, 0x01, 0x93, 0xc0, 0xc2, 0x00, 0x39, 0x01, 0x93, 0x80, 0xc2,
+ 0x00, 0x39, 0x01, 0x93, 0xc8, 0xc2, 0x00, 0x39, 0x01, 0x93, 0x98, 0xc2,
+ 0x00, 0x39, 0x01, 0x93, 0xd0, 0x83, 0x01, 0x93, 0xa9, 0x97, 0x01, 0x93,
+ 0xf0, 0xc2, 0x00, 0x39, 0x01, 0x93, 0xb0, 0xc2, 0x00, 0x39, 0x01, 0x93,
+ 0xb8, 0xc4, 0x15, 0xa7, 0x01, 0x23, 0x31, 0xc2, 0x22, 0x45, 0x01, 0x23,
+ 0x28, 0xc3, 0x0d, 0x8f, 0x01, 0x23, 0x21, 0xc3, 0x08, 0xde, 0x01, 0x23,
+ 0x18, 0xc4, 0x05, 0xde, 0x01, 0x23, 0x11, 0xc2, 0x0a, 0x20, 0x01, 0x23,
+ 0x08, 0x00, 0x43, 0x58, 0x7f, 0x00, 0x43, 0x58, 0x9d, 0xd0, 0x56, 0xdc,
+ 0x01, 0x92, 0x60, 0x00, 0x43, 0x58, 0xbb, 0xc3, 0x15, 0xa8, 0x01, 0x94,
+ 0x31, 0xc4, 0xe7, 0x43, 0x01, 0x94, 0xc8, 0x90, 0x01, 0x94, 0x81, 0xc6,
+ 0xd8, 0x44, 0x01, 0x94, 0xe1, 0xc7, 0xcb, 0x42, 0x01, 0x95, 0x60, 0xc3,
+ 0x00, 0xba, 0x01, 0x94, 0x89, 0xc3, 0xeb, 0x31, 0x01, 0x95, 0x58, 0xc2,
+ 0x00, 0x29, 0x01, 0x94, 0x21, 0xc2, 0x01, 0xa5, 0x01, 0x94, 0x59, 0xc7,
+ 0xcd, 0x10, 0x01, 0x94, 0xb0, 0xc2, 0x03, 0x5f, 0x01, 0x94, 0x41, 0xc3,
+ 0x00, 0x49, 0x01, 0x95, 0x80, 0xc3, 0x01, 0xfb, 0x01, 0x94, 0x71, 0xc6,
+ 0xd4, 0x66, 0x01, 0x95, 0x48, 0xcc, 0x7c, 0xd5, 0x01, 0x94, 0xb9, 0xc2,
+ 0x15, 0x80, 0x01, 0x95, 0x11, 0xc5, 0x2e, 0x0a, 0x01, 0x95, 0x18, 0x15,
+ 0xc3, 0x58, 0xd9, 0xc6, 0xd7, 0xb4, 0x01, 0x95, 0x50, 0x17, 0xc3, 0x58,
+ 0xe3, 0xc6, 0xd8, 0x14, 0x09, 0x29, 0xf8, 0xc4, 0xe6, 0x1b, 0x09, 0x29,
+ 0xf1, 0xc2, 0x0b, 0xfd, 0x09, 0x19, 0xd8, 0xc4, 0xe1, 0x61, 0x09, 0x1a,
+ 0x71, 0x86, 0x09, 0x1a, 0x69, 0xc9, 0xab, 0xfe, 0x09, 0x1a, 0x60, 0xc3,
+ 0x6b, 0x83, 0x09, 0x1a, 0x51, 0xc2, 0x00, 0x5b, 0x09, 0x1a, 0x48, 0xc2,
+ 0x01, 0x0d, 0x09, 0x1a, 0x21, 0x8f, 0x09, 0x1a, 0x19, 0xc2, 0x03, 0xab,
+ 0x09, 0x1a, 0x10, 0x97, 0x09, 0x1a, 0x01, 0x83, 0x09, 0x19, 0xe2, 0x03,
+ 0x58, 0xeb, 0xc5, 0xd8, 0x7b, 0x09, 0x19, 0xc8, 0x17, 0xc3, 0x58, 0xf9,
+ 0xc3, 0x21, 0x00, 0x09, 0x19, 0x81, 0xc2, 0x01, 0x0e, 0x09, 0x19, 0x79,
+ 0x03, 0x43, 0x59, 0x04, 0xc5, 0x3a, 0xa5, 0x09, 0x18, 0xc0, 0x97, 0x09,
+ 0x17, 0xb9, 0x87, 0x09, 0x17, 0xb0, 0xe0, 0x07, 0xc7, 0x09, 0x17, 0x88,
+ 0xda, 0x1d, 0x5c, 0x09, 0x18, 0x20, 0xcb, 0x9c, 0x7b, 0x09, 0x29, 0xb9,
+ 0xcc, 0x84, 0x30, 0x09, 0x29, 0xb0, 0xc3, 0x45, 0xcf, 0x09, 0x29, 0xa9,
+ 0xc4, 0xea, 0x1f, 0x09, 0x29, 0xa1, 0xc4, 0xe7, 0x9b, 0x09, 0x29, 0x98,
+ 0x00, 0x43, 0x59, 0x0e, 0x97, 0x09, 0x15, 0xab, 0x03, 0x59, 0x1a, 0xc3,
+ 0x02, 0x9e, 0x09, 0x15, 0xa1, 0xc4, 0x5a, 0xcf, 0x09, 0x15, 0x99, 0xc2,
+ 0x03, 0x5f, 0x09, 0x15, 0x91, 0xc4, 0x3a, 0x5f, 0x09, 0x15, 0x89, 0xc3,
+ 0x64, 0x5f, 0x09, 0x15, 0x81, 0x83, 0x09, 0x15, 0x78, 0xd6, 0x30, 0x89,
+ 0x09, 0x16, 0xa9, 0xc4, 0x5b, 0x6b, 0x09, 0x16, 0xa0, 0xc3, 0x12, 0x72,
+ 0x09, 0x16, 0x89, 0xc3, 0xae, 0xdc, 0x09, 0x16, 0x81, 0xc3, 0x94, 0xf4,
+ 0x09, 0x16, 0x79, 0xc6, 0xd7, 0x8a, 0x09, 0x16, 0x71, 0xc3, 0x03, 0xaa,
+ 0x09, 0x16, 0x63, 0x03, 0x59, 0x20, 0xc3, 0x1d, 0x6a, 0x09, 0x16, 0x59,
+ 0xc3, 0x03, 0xc6, 0x09, 0x16, 0x51, 0x04, 0xc3, 0x59, 0x26, 0x83, 0x09,
+ 0x16, 0x38, 0xc2, 0x07, 0x6e, 0x09, 0x16, 0x29, 0x83, 0x09, 0x16, 0x20,
+ 0x42, 0x00, 0x44, 0xc3, 0x59, 0x32, 0x15, 0xc3, 0x59, 0x3c, 0xc2, 0x01,
+ 0x04, 0x09, 0x29, 0x71, 0xc8, 0x61, 0x7e, 0x09, 0x1c, 0xb1, 0x17, 0xc3,
+ 0x59, 0x46, 0xc3, 0x21, 0x00, 0x09, 0x14, 0xf1, 0xc2, 0x00, 0xa9, 0x09,
+ 0x14, 0xe9, 0xc3, 0xb1, 0x53, 0x09, 0x14, 0xe1, 0x0d, 0xc3, 0x59, 0x5c,
+ 0xc2, 0x01, 0x0e, 0x09, 0x14, 0xc9, 0xc2, 0x07, 0x63, 0x09, 0x14, 0xbb,
+ 0x03, 0x59, 0x68, 0x83, 0x09, 0x14, 0xb0, 0xc9, 0xb4, 0xfe, 0x09, 0x29,
+ 0x68, 0x97, 0x09, 0x29, 0x53, 0x03, 0x59, 0x6c, 0xcc, 0x36, 0x97, 0x09,
+ 0x29, 0x49, 0x0f, 0xc3, 0x59, 0x84, 0xc7, 0xc5, 0x7d, 0x09, 0x29, 0x39,
+ 0xc5, 0xdd, 0xbf, 0x09, 0x29, 0x31, 0xc2, 0x00, 0x0a, 0x09, 0x29, 0x29,
+ 0x09, 0xc3, 0x59, 0x90, 0xc8, 0xbd, 0xd3, 0x09, 0x29, 0x11, 0xc3, 0x18,
+ 0x26, 0x09, 0x1c, 0x89, 0xc3, 0x0a, 0xf1, 0x09, 0x12, 0xd3, 0x03, 0x59,
+ 0x9b, 0x10, 0xc3, 0x59, 0xa1, 0x03, 0x43, 0x59, 0xab, 0xcf, 0x61, 0xe0,
+ 0x09, 0x13, 0xc3, 0x03, 0x59, 0xb8, 0x4a, 0xa8, 0x58, 0x43, 0x59, 0xbe,
+ 0xd1, 0x50, 0xe1, 0x09, 0x13, 0x60, 0xc3, 0x76, 0x92, 0x09, 0x13, 0x41,
+ 0xc3, 0x12, 0x72, 0x09, 0x13, 0x33, 0x03, 0x59, 0xfa, 0xc4, 0x4b, 0x12,
+ 0x09, 0x13, 0x29, 0xc3, 0x1d, 0x6a, 0x09, 0x13, 0x20, 0x47, 0x07, 0x6c,
+ 0x43, 0x5a, 0x00, 0xc2, 0x05, 0x5c, 0x09, 0x11, 0xa9, 0xc3, 0x55, 0x95,
+ 0x09, 0x11, 0xa1, 0x83, 0x09, 0x11, 0x98, 0x46, 0x07, 0x6d, 0xc3, 0x5a,
+ 0x12, 0xc4, 0x3a, 0xa6, 0x09, 0x11, 0xe8, 0x45, 0x07, 0x6e, 0xc3, 0x5a,
+ 0x25, 0xc3, 0x5b, 0x6c, 0x09, 0x10, 0x88, 0xc6, 0x6f, 0x37, 0x09, 0x10,
+ 0xab, 0x03, 0x5a, 0x75, 0xc6, 0x01, 0xea, 0x09, 0x10, 0xa0, 0xcd, 0x7f,
+ 0x78, 0x09, 0x10, 0xc9, 0xc9, 0xb2, 0x01, 0x09, 0x10, 0xc0, 0x17, 0xc3,
+ 0x5a, 0x7b, 0xcd, 0x7b, 0xa9, 0x09, 0x28, 0xa1, 0xd5, 0x36, 0x97, 0x09,
+ 0x28, 0x99, 0xc2, 0x01, 0xce, 0x09, 0x28, 0x91, 0xc3, 0x03, 0xaa, 0x09,
+ 0x28, 0x83, 0x03, 0x5a, 0x85, 0xc2, 0x07, 0x69, 0x09, 0x28, 0x79, 0xc3,
+ 0x2b, 0x19, 0x09, 0x28, 0x70, 0x17, 0xc3, 0x5a, 0x8b, 0x16, 0xc3, 0x5a,
+ 0x99, 0xc2, 0x00, 0x96, 0x09, 0x28, 0x31, 0xc3, 0xab, 0xe0, 0x09, 0x28,
+ 0x29, 0xce, 0x73, 0x18, 0x09, 0x28, 0x21, 0xc3, 0x64, 0x5f, 0x09, 0x28,
+ 0x19, 0xc3, 0x02, 0xe4, 0x09, 0x28, 0x10, 0x47, 0x07, 0x6c, 0x43, 0x5a,
+ 0xa3, 0x46, 0x07, 0x6d, 0xc3, 0x5a, 0xbb, 0xc7, 0x01, 0xe9, 0x09, 0x0f,
+ 0x58, 0xc4, 0x3a, 0xa6, 0x09, 0x0f, 0x7b, 0x03, 0x5b, 0x05, 0xc9, 0xa2,
+ 0x87, 0x09, 0x0f, 0x6a, 0x03, 0x5b, 0x0b, 0x9f, 0x09, 0x1c, 0x38, 0x8d,
+ 0x09, 0x0b, 0x78, 0x86, 0x09, 0x0b, 0x88, 0x94, 0x09, 0x0a, 0xf1, 0xc3,
+ 0x07, 0x67, 0x09, 0x0a, 0xe9, 0x86, 0x09, 0x0a, 0xe0, 0x97, 0x09, 0x0c,
+ 0x1b, 0x03, 0x5b, 0x11, 0xc2, 0x01, 0x8d, 0x09, 0x0c, 0x11, 0x87, 0x09,
+ 0x0c, 0x09, 0x83, 0x09, 0x0c, 0x00, 0x94, 0x09, 0x0b, 0xf8, 0x8f, 0x09,
+ 0x1c, 0x18, 0x86, 0x09, 0x1c, 0x09, 0xc2, 0xd2, 0x7f, 0x09, 0x0b, 0x60,
+ 0xc2, 0x01, 0x0d, 0x09, 0x1c, 0x03, 0x03, 0x5b, 0x15, 0xc2, 0x3e, 0x08,
+ 0x09, 0x0b, 0x40, 0x94, 0x09, 0x0b, 0x2b, 0x03, 0x5b, 0x19, 0xc7, 0x5c,
+ 0x78, 0x09, 0x0b, 0x21, 0x8e, 0x09, 0x0b, 0x18, 0xa0, 0x09, 0x1b, 0xf9,
+ 0x9f, 0x09, 0x0a, 0xd8, 0xc9, 0xab, 0xda, 0x09, 0x0a, 0xd0, 0xcb, 0x9c,
+ 0x91, 0x09, 0x0b, 0xc8, 0x46, 0x21, 0x5d, 0x43, 0x5b, 0x1f, 0xe0, 0x07,
+ 0x67, 0x09, 0x0c, 0xf0, 0xc3, 0x55, 0x95, 0x09, 0x09, 0x01, 0xca, 0xa8,
+ 0xbc, 0x09, 0x08, 0xf8, 0xc8, 0x61, 0x7e, 0x09, 0x26, 0x61, 0xcd, 0x80,
+ 0x21, 0x09, 0x08, 0xe1, 0xc3, 0x21, 0x00, 0x09, 0x08, 0xd9, 0xc3, 0x59,
+ 0x15, 0x09, 0x08, 0xca, 0x03, 0x5b, 0x31, 0x16, 0xc3, 0x5b, 0x37, 0xcd,
+ 0x4d, 0xcf, 0x09, 0x08, 0x90, 0xc2, 0x07, 0x44, 0x09, 0x08, 0x79, 0xcb,
+ 0x92, 0xba, 0x09, 0x08, 0x71, 0xc3, 0x03, 0xaa, 0x09, 0x08, 0x69, 0xc9,
+ 0x5c, 0x76, 0x09, 0x08, 0x61, 0xca, 0xa8, 0xc6, 0x09, 0x08, 0x58, 0xc4,
+ 0xe4, 0x2b, 0x09, 0x26, 0x41, 0x15, 0xc3, 0x5b, 0x43, 0x10, 0xc3, 0x5b,
+ 0x51, 0x0f, 0xc3, 0x5b, 0x61, 0x0e, 0xc3, 0x5b, 0x71, 0x0d, 0xc3, 0x5b,
+ 0x7e, 0x0a, 0xc3, 0x5b, 0x8f, 0x09, 0xc3, 0x5b, 0x9f, 0x07, 0xc3, 0x5b,
+ 0xad, 0x06, 0xc3, 0x5b, 0xc1, 0x04, 0xc3, 0x5b, 0xd0, 0x03, 0xc3, 0x5b,
+ 0xdd, 0x97, 0x09, 0x07, 0x53, 0x03, 0x5b, 0xf9, 0xc4, 0x3a, 0x6a, 0x09,
+ 0x07, 0x49, 0xc2, 0x07, 0x44, 0x09, 0x07, 0x11, 0x0b, 0x43, 0x5c, 0x00,
+ 0xcd, 0x77, 0xcd, 0x09, 0x07, 0xd1, 0xc9, 0xb5, 0xcd, 0x09, 0x07, 0xc9,
+ 0xc4, 0x5b, 0x6b, 0x09, 0x07, 0xc0, 0x97, 0x09, 0x25, 0xa9, 0xc2, 0x00,
+ 0x5b, 0x09, 0x1b, 0xc0, 0x86, 0x09, 0x05, 0xa1, 0x9f, 0x09, 0x05, 0x98,
+ 0x97, 0x09, 0x05, 0x91, 0x8b, 0x09, 0x05, 0x89, 0x83, 0x09, 0x05, 0x7a,
+ 0x03, 0x5c, 0x0c, 0xc2, 0x36, 0xaa, 0x09, 0x05, 0x71, 0xc5, 0x47, 0x65,
+ 0x09, 0x05, 0x62, 0x03, 0x5c, 0x12, 0xc5, 0x3a, 0xa5, 0x09, 0x05, 0x50,
+ 0xc5, 0x3a, 0xa5, 0x09, 0x05, 0x40, 0x90, 0x09, 0x05, 0x29, 0xc9, 0xac,
+ 0x3d, 0x09, 0x05, 0x1a, 0x03, 0x5c, 0x18, 0x95, 0x09, 0x25, 0x98, 0x8e,
+ 0x09, 0x25, 0x88, 0xc5, 0x5b, 0x6a, 0x09, 0x04, 0xc8, 0xc6, 0x61, 0x80,
+ 0x09, 0x25, 0x41, 0xc2, 0x00, 0x5b, 0x09, 0x25, 0x38, 0x8b, 0x09, 0x25,
+ 0x21, 0xc2, 0x03, 0x47, 0x09, 0x25, 0x19, 0xc3, 0x02, 0xe4, 0x09, 0x25,
+ 0x10, 0xcc, 0x82, 0x5c, 0x09, 0x25, 0x09, 0x03, 0x43, 0x5c, 0x1e, 0x17,
+ 0xc3, 0x5c, 0x2b, 0xc5, 0x47, 0x65, 0x09, 0x24, 0xd0, 0x8b, 0x09, 0x24,
+ 0xc1, 0x83, 0x09, 0x24, 0xb8, 0x8b, 0x09, 0x24, 0xa3, 0x03, 0x5c, 0x38,
+ 0x83, 0x09, 0x24, 0x98, 0xc2, 0x0b, 0xfd, 0x09, 0x24, 0x89, 0xc2, 0x01,
+ 0x5b, 0x09, 0x24, 0x80, 0xc2, 0x01, 0x0d, 0x09, 0x24, 0x73, 0x03, 0x5c,
+ 0x44, 0xc4, 0xea, 0x7f, 0x09, 0x24, 0x68, 0xc5, 0x3a, 0xa5, 0x09, 0x04,
+ 0x38, 0x17, 0xc3, 0x5c, 0x4a, 0xc4, 0x3a, 0x6a, 0x09, 0x03, 0x59, 0xc2,
+ 0x02, 0x29, 0x09, 0x03, 0x51, 0xcc, 0x36, 0x97, 0x09, 0x03, 0x49, 0xc2,
+ 0x03, 0x5f, 0x09, 0x03, 0x41, 0x0e, 0xc3, 0x5c, 0x56, 0xc3, 0x59, 0x15,
+ 0x09, 0x03, 0x19, 0xc2, 0x00, 0xac, 0x09, 0x03, 0x0b, 0x03, 0x5c, 0x61,
+ 0xc2, 0x01, 0x0e, 0x09, 0x03, 0x01, 0x09, 0xc3, 0x5c, 0x67, 0x04, 0xc3,
+ 0x5c, 0x7b, 0x03, 0x43, 0x5c, 0x85, 0xc2, 0x5a, 0xd1, 0x09, 0x24, 0x09,
+ 0xc3, 0x30, 0xe0, 0x09, 0x00, 0x98, 0xc5, 0x5b, 0x6a, 0x09, 0x24, 0x00,
+ 0xc3, 0x0e, 0x6d, 0x09, 0x00, 0x89, 0xc7, 0x61, 0x7f, 0x09, 0x00, 0x80,
+ 0xc7, 0x5c, 0x78, 0x09, 0x00, 0x71, 0x8e, 0x09, 0x00, 0x68, 0xc8, 0x0c,
+ 0xb0, 0x09, 0x01, 0xe3, 0x03, 0x5c, 0x91, 0x16, 0x43, 0x5c, 0x97, 0xce,
+ 0x6f, 0xb4, 0x09, 0x14, 0x71, 0x46, 0x07, 0x6d, 0x43, 0x5c, 0x9d, 0x9f,
+ 0x09, 0x14, 0x40, 0x84, 0x09, 0x14, 0x30, 0x97, 0x09, 0x14, 0x19, 0x8b,
+ 0x09, 0x14, 0x10, 0x84, 0x09, 0x14, 0x08, 0xe0, 0x03, 0xa7, 0x09, 0x0a,
+ 0x48, 0xcb, 0x92, 0x0a, 0x00, 0x27, 0x99, 0xc8, 0x1e, 0x8a, 0x00, 0x27,
+ 0x88, 0xc9, 0x21, 0xcb, 0x00, 0x25, 0x69, 0xcb, 0x96, 0x8d, 0x05, 0x34,
+ 0x58, 0xc9, 0x21, 0xcb, 0x00, 0x29, 0x79, 0xcb, 0x96, 0x8d, 0x00, 0x29,
+ 0x09, 0xc4, 0x04, 0x63, 0x00, 0x28, 0x99, 0xc4, 0x13, 0xc7, 0x00, 0x26,
+ 0x30, 0xc9, 0x4d, 0x9d, 0x00, 0x29, 0x49, 0xcb, 0x96, 0x8d, 0x00, 0x29,
+ 0x19, 0xc4, 0x13, 0xc7, 0x00, 0x26, 0x51, 0xc4, 0x04, 0x63, 0x00, 0x26,
+ 0x41, 0xc9, 0x21, 0xcb, 0x00, 0x25, 0x18, 0xc2, 0x00, 0x5b, 0x00, 0x29,
+ 0x59, 0x87, 0x05, 0x34, 0x48, 0xc2, 0x00, 0x36, 0x05, 0x32, 0x18, 0xcf,
+ 0x6b, 0x6d, 0x00, 0x29, 0x38, 0x8b, 0x00, 0x21, 0xcb, 0x03, 0x5c, 0xaf,
+ 0x97, 0x00, 0x22, 0xf0, 0x8e, 0x05, 0x33, 0x29, 0x8f, 0x05, 0x33, 0x38,
+ 0xc9, 0x21, 0xcb, 0x00, 0x29, 0x29, 0xcb, 0x96, 0x8d, 0x00, 0x25, 0x38,
+ 0xcf, 0x6b, 0x6d, 0x00, 0x25, 0xf8, 0xc9, 0x1e, 0x89, 0x00, 0x27, 0xc9,
+ 0xc8, 0x75, 0xa2, 0x05, 0x32, 0x88, 0xc3, 0xec, 0xe7, 0x00, 0x28, 0x79,
+ 0xc3, 0xec, 0x45, 0x00, 0x28, 0x69, 0xc3, 0xcf, 0xdc, 0x00, 0x28, 0x59,
+ 0xc3, 0xb1, 0x04, 0x00, 0x28, 0x49, 0x06, 0xc3, 0x5c, 0xb5, 0xc3, 0xeb,
+ 0x0a, 0x00, 0x28, 0x28, 0xc4, 0x04, 0x63, 0x00, 0x26, 0x21, 0xc6, 0x03,
+ 0x81, 0x00, 0x24, 0xf9, 0xc9, 0x21, 0xcb, 0x00, 0x24, 0xd9, 0xcf, 0x2e,
+ 0xd8, 0x00, 0x24, 0xe8, 0xc6, 0x03, 0x81, 0x00, 0x27, 0xf9, 0xc4, 0x04,
+ 0x63, 0x00, 0x27, 0xe9, 0xc9, 0x21, 0xcb, 0x00, 0x25, 0x98, 0xc6, 0x03,
+ 0x81, 0x00, 0x24, 0x9b, 0x03, 0x5c, 0xc5, 0xc9, 0x21, 0xcb, 0x00, 0x27,
+ 0xb9, 0xc6, 0x5e, 0x39, 0x00, 0x24, 0x89, 0xcb, 0x96, 0x8d, 0x00, 0x24,
+ 0xa8, 0xcf, 0x69, 0x9c, 0x00, 0x27, 0x58, 0xc5, 0x1f, 0x9c, 0x00, 0x26,
+ 0xb9, 0xc5, 0x1e, 0x64, 0x00, 0x22, 0x80, 0x83, 0x05, 0x32, 0x39, 0x46,
+ 0x3f, 0x88, 0x43, 0x5c, 0xcb, 0xc8, 0x1e, 0x8a, 0x00, 0x26, 0xf9, 0xc8,
+ 0x21, 0xcc, 0x00, 0x24, 0xc8, 0x46, 0x01, 0xab, 0x43, 0x5c, 0xeb, 0xcf,
+ 0x2e, 0xd8, 0x00, 0x25, 0xc9, 0x06, 0x43, 0x5c, 0xf5, 0xca, 0xa3, 0xda,
+ 0x00, 0x24, 0x50, 0xc3, 0xeb, 0x0a, 0x00, 0x28, 0x31, 0xc2, 0x1c, 0x3e,
+ 0x00, 0x28, 0x11, 0x87, 0x00, 0x28, 0x00, 0xc9, 0x1e, 0x92, 0x00, 0x27,
+ 0xd0, 0xc8, 0x1e, 0x8a, 0x00, 0x27, 0xa1, 0xc8, 0x21, 0xcc, 0x00, 0x25,
+ 0xb0, 0xc3, 0x2c, 0x4d, 0x05, 0x32, 0x91, 0x83, 0x05, 0x32, 0xb1, 0xd1,
+ 0x56, 0x31, 0x05, 0x32, 0xe1, 0x87, 0x00, 0x23, 0x21, 0xca, 0x56, 0x38,
+ 0x00, 0x23, 0x41, 0xc7, 0xcc, 0x53, 0x00, 0x23, 0x60, 0x06, 0xc3, 0x5d,
+ 0x01, 0xc5, 0x1f, 0x9c, 0x00, 0x26, 0x08, 0xc7, 0xc9, 0xd6, 0x00, 0x6d,
+ 0x39, 0xc6, 0x93, 0xa6, 0x00, 0x6d, 0x68, 0xc7, 0xc6, 0x17, 0x00, 0x6d,
+ 0x49, 0xc6, 0x93, 0xa6, 0x00, 0x6d, 0x78, 0xc7, 0xcd, 0xfe, 0x00, 0x6c,
+ 0xd9, 0xc7, 0xc6, 0xb8, 0x00, 0x6c, 0xe9, 0xc7, 0xce, 0x59, 0x00, 0x6d,
+ 0x09, 0xc7, 0xca, 0xe7, 0x00, 0x6d, 0x19, 0x16, 0xc3, 0x5d, 0x0d, 0x06,
+ 0xc3, 0x5d, 0x19, 0xc7, 0xce, 0x36, 0x00, 0x6d, 0xa9, 0xc7, 0x93, 0xa5,
+ 0x00, 0x6d, 0xb8, 0xca, 0x6b, 0x09, 0x00, 0x6e, 0xe1, 0xcf, 0x6b, 0x04,
+ 0x00, 0x6e, 0xe9, 0xcb, 0x8f, 0xce, 0x00, 0x6e, 0xf0, 0xc5, 0x20, 0xa1,
+ 0x0e, 0xce, 0xa1, 0xc7, 0xbf, 0x8b, 0x0e, 0xce, 0x28, 0xc5, 0x20, 0xa1,
+ 0x0e, 0xce, 0x99, 0xc7, 0xbf, 0x8b, 0x0e, 0xce, 0x20, 0xc5, 0x20, 0xa1,
+ 0x0e, 0xce, 0x91, 0xc7, 0xbf, 0x8b, 0x0e, 0xce, 0x18, 0xc5, 0xdf, 0x86,
+ 0x0e, 0xcd, 0x99, 0xca, 0x9f, 0xc0, 0x0e, 0xcd, 0x60, 0xc5, 0xdf, 0x86,
+ 0x0e, 0xcd, 0x91, 0xca, 0x9f, 0xc0, 0x0e, 0xcd, 0x58, 0xc5, 0xdf, 0x86,
+ 0x0e, 0xcd, 0x89, 0xca, 0x9f, 0xc0, 0x0e, 0xcd, 0x50, 0xc8, 0x3a, 0x32,
+ 0x0e, 0xc8, 0xf1, 0xc6, 0x23, 0x24, 0x0e, 0xc8, 0xe0, 0xc4, 0x18, 0xf6,
+ 0x0e, 0xd3, 0x2b, 0x03, 0x5d, 0x25, 0xc6, 0x59, 0x99, 0x0e, 0xd3, 0x1a,
+ 0x03, 0x5d, 0x2b, 0xcb, 0x53, 0x7e, 0x0e, 0xcc, 0x31, 0xc6, 0x02, 0x91,
+ 0x0e, 0xcc, 0x29, 0xc6, 0x23, 0x24, 0x0e, 0xcc, 0x20, 0xcb, 0x53, 0x7e,
+ 0x0e, 0xcc, 0x19, 0xc6, 0x02, 0x91, 0x0e, 0xcc, 0x11, 0xc6, 0x23, 0x24,
+ 0x0e, 0xcc, 0x08, 0xcb, 0x53, 0x7e, 0x0e, 0xca, 0x81, 0xc6, 0x02, 0x91,
+ 0x0e, 0xca, 0x79, 0xc6, 0x23, 0x24, 0x0e, 0xca, 0x70, 0xcb, 0x53, 0x7e,
+ 0x0e, 0xca, 0x69, 0xc6, 0x02, 0x91, 0x0e, 0xca, 0x61, 0xc6, 0x23, 0x24,
+ 0x0e, 0xca, 0x58, 0xc7, 0x02, 0xb2, 0x0e, 0xd1, 0x49, 0xc5, 0x1a, 0x39,
+ 0x0e, 0xd1, 0x38, 0x00, 0x43, 0x5d, 0x31, 0x00, 0x43, 0x5d, 0x3d, 0x00,
+ 0x43, 0x5d, 0x49, 0x00, 0x43, 0x5d, 0x79, 0xc5, 0x01, 0x62, 0x0e, 0xc0,
+ 0x2b, 0x03, 0x5d, 0x98, 0xd2, 0x14, 0xbe, 0x0e, 0xc6, 0xa3, 0x03, 0x5d,
+ 0x9c, 0x45, 0x01, 0xbd, 0xc3, 0x5d, 0xa0, 0x47, 0x14, 0xca, 0x43, 0x5d,
+ 0xac, 0x00, 0x43, 0x5d, 0xbb, 0x00, 0x43, 0x5d, 0xfe, 0x92, 0x0e, 0xc3,
+ 0x6b, 0x03, 0x5e, 0x16, 0xc6, 0xc2, 0xf5, 0x0e, 0xc3, 0xaa, 0x03, 0x5e,
+ 0x1a, 0x00, 0x43, 0x5e, 0x1e, 0x00, 0x43, 0x5e, 0x3f, 0xcb, 0x14, 0xc5,
+ 0x0e, 0xc5, 0x91, 0xc9, 0xb4, 0x38, 0x0e, 0xc4, 0xa9, 0x46, 0x0e, 0xcd,
+ 0xc3, 0x5e, 0x5a, 0xc8, 0xc0, 0x0b, 0x0e, 0xc3, 0xc9, 0xd3, 0x41, 0x73,
+ 0x0e, 0xc2, 0xb1, 0xc5, 0x01, 0x62, 0x0e, 0xc0, 0x18, 0xc5, 0x0e, 0xd3,
+ 0x0e, 0xd0, 0x29, 0xc8, 0x41, 0xac, 0x0e, 0xd0, 0x18, 0xc5, 0x0e, 0xd3,
+ 0x0e, 0xd0, 0x21, 0xc4, 0x03, 0x10, 0x0e, 0xd0, 0x11, 0xc8, 0x41, 0xac,
+ 0x0e, 0xd0, 0x08, 0xc6, 0x02, 0x91, 0x0e, 0xcf, 0xa1, 0xc6, 0x23, 0x24,
+ 0x0e, 0xcf, 0x98, 0xc6, 0x02, 0x91, 0x0e, 0xcf, 0x81, 0xc6, 0x23, 0x24,
+ 0x0e, 0xcf, 0x78, 0xc5, 0x15, 0x2e, 0x0e, 0xce, 0xf1, 0x15, 0xc3, 0x5e,
+ 0x66, 0x48, 0x1d, 0x82, 0x43, 0x5e, 0x72, 0xc6, 0x02, 0x91, 0x0e, 0xcf,
+ 0x61, 0xc6, 0x23, 0x24, 0x0e, 0xcf, 0x48, 0xc6, 0x02, 0x91, 0x0e, 0xcf,
+ 0x59, 0xc6, 0x23, 0x24, 0x0e, 0xcf, 0x40, 0xc6, 0x02, 0x91, 0x0e, 0xcf,
+ 0x51, 0xc6, 0x23, 0x24, 0x0e, 0xcf, 0x38, 0xca, 0x98, 0x0f, 0x0e, 0xcb,
+ 0x49, 0x49, 0x41, 0xac, 0x43, 0x5e, 0x7e, 0x46, 0x20, 0xa1, 0xc3, 0x5e,
+ 0x93, 0x48, 0xbf, 0x8b, 0x43, 0x5e, 0x9f, 0x46, 0x20, 0xa1, 0xc3, 0x5e,
+ 0xab, 0x48, 0xbf, 0x8b, 0x43, 0x5e, 0xbd, 0xc8, 0xc0, 0x53, 0x0e, 0xce,
+ 0xc9, 0xc5, 0x15, 0x2e, 0x0e, 0xce, 0xbb, 0x03, 0x5e, 0xc9, 0xc6, 0x04,
+ 0x1b, 0x0e, 0xce, 0xb1, 0xc5, 0x00, 0x3e, 0x0e, 0xce, 0xa9, 0x48, 0x1d,
+ 0x82, 0x43, 0x5e, 0xcf, 0xc5, 0x15, 0x2e, 0x0e, 0xcb, 0xb1, 0xc6, 0x04,
+ 0x1b, 0x0e, 0xcb, 0xa9, 0xc5, 0x00, 0x3e, 0x0e, 0xcb, 0xa0, 0xc5, 0x15,
+ 0x2e, 0x0e, 0xcb, 0xd1, 0xc6, 0x04, 0x1b, 0x0e, 0xcb, 0xc9, 0xc5, 0x00,
+ 0x3e, 0x0e, 0xcb, 0xc0, 0xca, 0x98, 0x0f, 0x0e, 0xcb, 0x91, 0xc8, 0x52,
+ 0x93, 0x0e, 0xcb, 0x88, 0xcb, 0x98, 0x0e, 0x0e, 0xcb, 0x68, 0xc6, 0x02,
+ 0x91, 0x0e, 0xcf, 0x91, 0xc6, 0x23, 0x24, 0x0e, 0xcf, 0x88, 0xc6, 0x02,
+ 0x91, 0x0e, 0xcf, 0x71, 0xc6, 0x23, 0x24, 0x0e, 0xcf, 0x68, 0x4e, 0x72,
+ 0xee, 0xc3, 0x5e, 0xdb, 0x48, 0x1d, 0x82, 0xc3, 0x5e, 0xed, 0x46, 0x0e,
+ 0xd3, 0x43, 0x5e, 0xf9, 0xc6, 0x02, 0x91, 0x0e, 0xcf, 0x31, 0xc6, 0x23,
+ 0x24, 0x0e, 0xcf, 0x20, 0xc6, 0x02, 0x91, 0x0e, 0xcf, 0x29, 0xc6, 0x23,
+ 0x24, 0x0e, 0xcf, 0x18, 0xc5, 0xdf, 0x86, 0x0e, 0xcd, 0x79, 0xca, 0x9f,
+ 0xc0, 0x0e, 0xcd, 0x40, 0xc7, 0x02, 0x90, 0x0e, 0xcc, 0xc0, 0xc5, 0xdf,
+ 0x86, 0x0e, 0xcd, 0x71, 0xca, 0x9f, 0xc0, 0x0e, 0xcd, 0x38, 0x00, 0xc3,
+ 0x5f, 0x05, 0x48, 0xc2, 0x6b, 0x43, 0x5f, 0x15, 0xc5, 0x15, 0x2e, 0x0e,
+ 0xca, 0x09, 0xc6, 0x04, 0x1b, 0x0e, 0xca, 0x01, 0xc5, 0x00, 0x3e, 0x0e,
+ 0xc9, 0xf8, 0xc8, 0x60, 0xc6, 0x0e, 0xc9, 0xf1, 0xc5, 0x15, 0x2e, 0x0e,
+ 0xc9, 0xe9, 0xc6, 0x04, 0x1b, 0x0e, 0xc9, 0xe1, 0xc5, 0x00, 0x3e, 0x0e,
+ 0xc9, 0xd8, 0xca, 0x98, 0x0f, 0x0e, 0xc9, 0x71, 0x49, 0x41, 0xac, 0x43,
+ 0x5f, 0x21, 0xc5, 0x15, 0x2e, 0x0e, 0xca, 0x21, 0xc6, 0x04, 0x1b, 0x0e,
+ 0xca, 0x19, 0xc5, 0x00, 0x3e, 0x0e, 0xca, 0x10, 0xc5, 0x15, 0x2e, 0x0e,
+ 0xc9, 0xd1, 0xc6, 0x04, 0x1b, 0x0e, 0xc9, 0xc9, 0xc5, 0x00, 0x3e, 0x0e,
+ 0xc9, 0xc0, 0xcb, 0x98, 0x0e, 0x0e, 0xc9, 0xb8, 0xcb, 0x98, 0x0e, 0x0e,
+ 0xc9, 0x90, 0xc5, 0x15, 0x2e, 0x0e, 0xcb, 0x1b, 0x03, 0x5f, 0x36, 0xc6,
+ 0x04, 0x1b, 0x0e, 0xcb, 0x11, 0xc5, 0x00, 0x3e, 0x0e, 0xcb, 0x08, 0xc5,
+ 0x15, 0x2e, 0x0e, 0xca, 0xfb, 0x03, 0x5f, 0x3c, 0xc6, 0x04, 0x1b, 0x0e,
+ 0xca, 0xf1, 0xc5, 0x00, 0x3e, 0x0e, 0xca, 0xe8, 0xc2, 0x00, 0x15, 0x0e,
+ 0xca, 0xe0, 0xc2, 0x00, 0x15, 0x0e, 0xca, 0xc0, 0x4c, 0x87, 0x24, 0xc3,
+ 0x5f, 0x42, 0xc5, 0x00, 0x3e, 0x0e, 0xc9, 0x11, 0xc5, 0x15, 0x2e, 0x0e,
+ 0xc9, 0x08, 0xc4, 0xe4, 0x6b, 0x0e, 0xd2, 0x61, 0xc8, 0xc3, 0xb3, 0x0e,
+ 0xd2, 0x58, 0xc4, 0xe4, 0x6b, 0x0e, 0xd2, 0x49, 0xc8, 0xc3, 0xb3, 0x0e,
+ 0xd2, 0x40, 0xcf, 0x61, 0xc2, 0x08, 0xae, 0xb9, 0xce, 0x71, 0x2e, 0x08,
+ 0xae, 0xb1, 0xc4, 0x5a, 0x3f, 0x08, 0xae, 0xa8, 0xcd, 0x46, 0x00, 0x08,
+ 0xae, 0x91, 0x49, 0xac, 0x85, 0x43, 0x5f, 0x4e, 0xd0, 0x5e, 0x5f, 0x08,
+ 0xae, 0x71, 0xd0, 0x5b, 0x8f, 0x08, 0xae, 0x69, 0xc9, 0x46, 0x04, 0x08,
0xae, 0x60, 0x8e, 0x08, 0x8d, 0xd8, 0x94, 0x08, 0x8d, 0xc8, 0x8e, 0x08,
- 0x8c, 0x60, 0x94, 0x08, 0x8c, 0x50, 0xd9, 0x1d, 0xde, 0x01, 0x2f, 0x51,
- 0xd8, 0x26, 0x70, 0x01, 0x58, 0xa8, 0xd3, 0x1d, 0xe4, 0x01, 0x2f, 0x49,
- 0xd3, 0x40, 0x6b, 0x01, 0x2d, 0x38, 0xd2, 0x4b, 0x0c, 0x01, 0x2d, 0x41,
- 0xd3, 0x1d, 0xe4, 0x01, 0x58, 0xa0, 0xc6, 0x07, 0x18, 0x01, 0x9e, 0x71,
- 0xc4, 0xd8, 0xc3, 0x01, 0x9d, 0x30, 0xc8, 0x07, 0x08, 0x01, 0x9d, 0x40,
- 0xc2, 0xe6, 0xdf, 0x0f, 0x91, 0xc9, 0xc2, 0xe8, 0x06, 0x0f, 0x91, 0x01,
- 0xc2, 0xe7, 0x2e, 0x0f, 0x90, 0xe0, 0xc2, 0x74, 0xdc, 0x0f, 0x91, 0xa1,
- 0xc2, 0xe6, 0xcc, 0x0f, 0x91, 0x28, 0xc2, 0xe8, 0x22, 0x0f, 0x91, 0x71,
- 0xc2, 0x0b, 0x42, 0x0f, 0x90, 0x90, 0xc2, 0xe7, 0xfa, 0x0f, 0x90, 0xb9,
- 0xc2, 0xe8, 0x0e, 0x0f, 0x90, 0xa8, 0xc2, 0xe1, 0x96, 0x0f, 0x91, 0xc1,
- 0xc2, 0xe8, 0x04, 0x0f, 0x91, 0x10, 0xa5, 0x0f, 0x91, 0xb9, 0xa6, 0x0f,
- 0x91, 0xb0, 0xc2, 0xe7, 0xeb, 0x0f, 0x91, 0x89, 0xc2, 0xe6, 0xf0, 0x0f,
- 0x91, 0x39, 0xc2, 0xe8, 0x0a, 0x0f, 0x90, 0x80, 0xc2, 0x39, 0x3d, 0x0f,
- 0x91, 0x79, 0xc2, 0xe6, 0xfa, 0x0f, 0x91, 0x40, 0xc2, 0xaf, 0xbc, 0x0f,
- 0x90, 0xf9, 0xc2, 0xe8, 0x1a, 0x0f, 0x90, 0xd8, 0xa6, 0x0f, 0x91, 0x51,
- 0x9d, 0x0f, 0x91, 0x48, 0xc6, 0x01, 0xe1, 0x01, 0x20, 0xb8, 0xc2, 0x00,
- 0xc1, 0x00, 0x43, 0x29, 0x83, 0x00, 0x43, 0x20, 0xd3, 0x41, 0x4f, 0x0f,
- 0xc9, 0x69, 0xcc, 0x86, 0x14, 0x0f, 0xcb, 0x80, 0xe0, 0x00, 0xe7, 0x01,
- 0x17, 0xe0, 0xe0, 0x00, 0xe7, 0x01, 0x17, 0xa0, 0xc8, 0x50, 0x00, 0x01,
- 0x0b, 0xf9, 0xc7, 0x0c, 0x4b, 0x01, 0x0b, 0xe8, 0xc2, 0x00, 0x7b, 0x01,
- 0x0b, 0xa3, 0x03, 0x5c, 0x0d, 0xc3, 0x43, 0xcd, 0x01, 0x0b, 0xe0, 0xc4,
- 0x21, 0x31, 0x01, 0x0b, 0xd9, 0x91, 0x01, 0x0b, 0x88, 0xc3, 0xdf, 0x4a,
- 0x08, 0x43, 0x91, 0xc4, 0xd9, 0x77, 0x08, 0x43, 0x78, 0xc4, 0x04, 0x5e,
- 0x05, 0x47, 0xb1, 0xc2, 0x01, 0x47, 0x05, 0x47, 0xa8, 0xc5, 0x01, 0x0f,
- 0x01, 0x5b, 0x1b, 0x03, 0x5c, 0x13, 0xcc, 0x30, 0xd9, 0x01, 0x5a, 0x69,
- 0xcc, 0x82, 0xb4, 0x01, 0x5b, 0x69, 0xcd, 0x79, 0x2b, 0x01, 0x5c, 0x38,
- 0x47, 0x13, 0x72, 0xc3, 0x5c, 0x17, 0xc6, 0x11, 0xa5, 0x01, 0x4a, 0xc9,
- 0xc8, 0xb2, 0xf2, 0x01, 0x4b, 0x08, 0xc8, 0xb2, 0xf2, 0x01, 0x4a, 0xe9,
- 0xc6, 0x11, 0xa5, 0x01, 0x4a, 0xa8, 0xd8, 0x25, 0xc8, 0x0f, 0xc0, 0x59,
- 0x46, 0x04, 0x73, 0xc3, 0x5c, 0x21, 0xcd, 0x77, 0x57, 0x01, 0x0e, 0xf9,
- 0xd0, 0x59, 0x32, 0x01, 0x0d, 0xa9, 0x44, 0x01, 0x1a, 0xc3, 0x5c, 0x2d,
- 0xd1, 0x00, 0xf6, 0x01, 0x48, 0x41, 0xd9, 0x1f, 0xeb, 0x0f, 0xc0, 0x39,
- 0xd5, 0x03, 0x72, 0x0f, 0xc0, 0xb9, 0xcc, 0x82, 0x48, 0x0f, 0xc4, 0xd8,
- 0xc4, 0x18, 0x83, 0x01, 0x27, 0xd9, 0xc2, 0x26, 0x51, 0x01, 0x27, 0xd0,
- 0xc3, 0x0c, 0x5b, 0x01, 0x27, 0xc9, 0xc3, 0x06, 0x9e, 0x01, 0x27, 0xc0,
- 0xc4, 0x04, 0x5e, 0x01, 0x27, 0xb9, 0xc2, 0x01, 0x47, 0x01, 0x27, 0xb0,
- 0xcf, 0x06, 0xf8, 0x01, 0x15, 0x59, 0xce, 0x36, 0x23, 0x01, 0x57, 0x28,
- 0xd0, 0x0f, 0xc8, 0x01, 0x00, 0xf1, 0xd9, 0x0f, 0xbf, 0x01, 0x72, 0x10,
- 0xca, 0x9c, 0x08, 0x01, 0x4c, 0x81, 0xcd, 0x7f, 0xc5, 0x01, 0x4c, 0x70,
- 0x45, 0x00, 0x6c, 0xc3, 0x5c, 0x39, 0xd3, 0x43, 0x50, 0x01, 0x4c, 0xe1,
- 0xc7, 0x02, 0x58, 0x01, 0x80, 0x4b, 0x03, 0x5c, 0x45, 0xd3, 0x1b, 0xfa,
- 0x01, 0x70, 0x01, 0xda, 0x1b, 0xf3, 0x01, 0x70, 0x08, 0x00, 0x43, 0x5c,
- 0x4b, 0xcf, 0x2c, 0x05, 0x01, 0x48, 0x01, 0xd6, 0x2d, 0x07, 0x01, 0x48,
- 0x09, 0x16, 0x43, 0x5c, 0x5d, 0xc5, 0x01, 0x09, 0x01, 0x0e, 0x09, 0x00,
- 0x43, 0x5c, 0x6c, 0xc5, 0x01, 0x09, 0x01, 0x0e, 0x01, 0x00, 0x43, 0x5c,
- 0x84, 0xd2, 0x06, 0x54, 0x0f, 0xc0, 0x11, 0xd5, 0x03, 0x72, 0x0f, 0xc0,
- 0x90, 0x46, 0x00, 0x6b, 0x43, 0x5c, 0x96, 0xc9, 0x03, 0x68, 0x01, 0x58,
- 0x71, 0xc7, 0x08, 0xca, 0x01, 0x58, 0x78, 0xcf, 0x69, 0x89, 0x01, 0x5a,
- 0x41, 0xce, 0x38, 0x53, 0x01, 0x5a, 0x60, 0xc6, 0x01, 0x01, 0x01, 0x0e,
- 0x79, 0xcf, 0x2c, 0x05, 0x01, 0x48, 0x18, 0x87, 0x05, 0x28, 0x88, 0x91,
- 0x05, 0x2c, 0x10, 0xc2, 0x00, 0x67, 0x05, 0x30, 0x81, 0xc2, 0x04, 0x2b,
- 0x05, 0x30, 0x89, 0xc3, 0x19, 0xd0, 0x05, 0x30, 0x91, 0xc2, 0x00, 0xad,
- 0x05, 0x31, 0x51, 0xc2, 0x01, 0x4a, 0x05, 0x31, 0x58, 0x87, 0x05, 0x28,
- 0xf9, 0x90, 0x05, 0x30, 0x28, 0x91, 0x05, 0x2c, 0x80, 0xc3, 0xe7, 0x18,
- 0x0b, 0x54, 0x99, 0xc3, 0xe6, 0x55, 0x0b, 0x54, 0x90, 0x9a, 0x0b, 0x54,
- 0xd9, 0x93, 0x0b, 0x54, 0xd1, 0x85, 0x0b, 0x54, 0xc9, 0x9c, 0x0b, 0x54,
- 0xc0, 0x42, 0x08, 0x0f, 0xc3, 0x5c, 0xa2, 0xc7, 0xc4, 0x19, 0x00, 0x70,
- 0x30, 0x91, 0x00, 0x70, 0x59, 0xc3, 0x14, 0x38, 0x00, 0x71, 0x41, 0xc2,
- 0x00, 0x91, 0x00, 0x71, 0x50, 0x83, 0x00, 0x71, 0x91, 0x8f, 0x00, 0x71,
- 0x99, 0x87, 0x00, 0x72, 0x09, 0x46, 0xd2, 0x07, 0x43, 0x5c, 0xba, 0x8b,
- 0x00, 0x71, 0xa8, 0x87, 0x00, 0x71, 0xb3, 0x03, 0x5c, 0xc6, 0x97, 0x00,
- 0x71, 0xc8, 0x42, 0x00, 0x6e, 0xc3, 0x5c, 0xca, 0xca, 0xa5, 0xc2, 0x00,
- 0x70, 0x89, 0xc7, 0xc7, 0xca, 0x00, 0x70, 0x90, 0x42, 0x08, 0x57, 0xc3,
- 0x5c, 0xda, 0xc7, 0xc9, 0x83, 0x00, 0x71, 0x00, 0xc8, 0xbf, 0xbd, 0x00,
- 0x71, 0x89, 0xc2, 0x13, 0xa5, 0x00, 0x72, 0x41, 0x16, 0xc3, 0x5c, 0xe6,
- 0xc8, 0xbe, 0x0d, 0x00, 0x72, 0x58, 0x94, 0x00, 0x63, 0x00, 0x8e, 0x00,
- 0x63, 0x08, 0xc3, 0xe6, 0x4f, 0x00, 0x78, 0xd1, 0xc4, 0x92, 0x63, 0x00,
- 0x78, 0xd9, 0xc3, 0x6a, 0xca, 0x00, 0x78, 0xe0, 0xc3, 0xe6, 0x4f, 0x00,
- 0x78, 0xe9, 0xc4, 0x92, 0x63, 0x00, 0x78, 0xf1, 0xc3, 0x6a, 0xca, 0x00,
- 0x7e, 0x78, 0xc5, 0x44, 0xbc, 0x08, 0x77, 0xf9, 0xc4, 0xe5, 0x3b, 0x08,
- 0x77, 0xf1, 0xc5, 0xdf, 0x11, 0x08, 0x77, 0xe9, 0xc7, 0xc6, 0x42, 0x00,
- 0x44, 0xd9, 0x0b, 0x43, 0x5c, 0xf2, 0xc5, 0xd9, 0xda, 0x00, 0x46, 0xf9,
- 0xc3, 0xe6, 0xb2, 0x00, 0x46, 0xf1, 0x42, 0x0c, 0x65, 0xc3, 0x5c, 0xfe,
- 0x03, 0x43, 0x5d, 0x08, 0xcc, 0x05, 0x7b, 0x00, 0x37, 0x11, 0xcb, 0x12,
- 0x31, 0x00, 0x36, 0xc0, 0xde, 0x0f, 0x60, 0x00, 0x36, 0xb9, 0xde, 0x0f,
- 0xd8, 0x00, 0x36, 0xb1, 0x4a, 0xa3, 0xa6, 0x43, 0x5d, 0x26, 0xc5, 0x01,
- 0x62, 0x07, 0xdd, 0xf1, 0xc5, 0x00, 0x95, 0x07, 0xdd, 0xe8, 0xc5, 0x01,
- 0x62, 0x07, 0xdd, 0xc9, 0xc5, 0x00, 0x95, 0x07, 0xdd, 0xc0, 0xcd, 0x05,
- 0x7a, 0x07, 0xe8, 0x09, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0xe8, 0x0b, 0xc3,
- 0x5d, 0x32, 0x45, 0x00, 0x6c, 0x43, 0x5d, 0x3e, 0x0b, 0xc3, 0x5d, 0x50,
- 0x45, 0x00, 0x6c, 0x43, 0x5d, 0x5c, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0xf1,
- 0xcd, 0x05, 0x7a, 0x07, 0xe8, 0x10, 0xcd, 0x05, 0x7a, 0x07, 0xe8, 0x01,
- 0xca, 0x2b, 0x13, 0x07, 0xe8, 0xe0, 0xcd, 0x05, 0x7a, 0x07, 0xe7, 0xf9,
- 0xca, 0x2b, 0x13, 0x07, 0xe8, 0xd8, 0x0b, 0xc3, 0x5d, 0x68, 0x45, 0x00,
- 0x6c, 0x43, 0x5d, 0x74, 0x0b, 0xc3, 0x5d, 0x80, 0xd3, 0x43, 0xe8, 0x07,
- 0xed, 0xf8, 0x0b, 0xc3, 0x5d, 0x8c, 0x45, 0x00, 0x6c, 0x43, 0x5d, 0x98,
- 0xcc, 0x05, 0x7b, 0x07, 0xe2, 0x89, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0xb8,
- 0x44, 0x2e, 0x60, 0xc3, 0x5d, 0xa4, 0x0a, 0xc3, 0x5d, 0xb0, 0x45, 0x19,
- 0x9d, 0xc3, 0x5d, 0xbc, 0x4d, 0x08, 0x1a, 0xc3, 0x5d, 0xd2, 0x45, 0x2d,
- 0xfe, 0xc3, 0x5d, 0xde, 0x45, 0x51, 0xe9, 0xc3, 0x5d, 0xf4, 0x44, 0x6f,
- 0x91, 0x43, 0x5e, 0x04, 0x45, 0x4b, 0xf1, 0xc3, 0x5e, 0x10, 0x45, 0x52,
- 0xee, 0xc3, 0x5e, 0x1a, 0x46, 0xd0, 0x57, 0xc3, 0x5e, 0x24, 0xde, 0x0a,
- 0x09, 0x07, 0xe3, 0x18, 0xcd, 0x05, 0x7a, 0x07, 0xe7, 0xd9, 0xca, 0x2b,
- 0x13, 0x07, 0xe8, 0xb8, 0x0b, 0xc3, 0x5e, 0x30, 0x45, 0x00, 0x6c, 0xc3,
- 0x5e, 0x3c, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x38, 0x0b, 0xc3, 0x5e, 0x4e,
- 0xcb, 0x66, 0x54, 0x07, 0xe9, 0xb1, 0x45, 0x00, 0x6c, 0x43, 0x5e, 0x5a,
- 0x43, 0x02, 0x98, 0xc3, 0x5e, 0x66, 0x43, 0x2e, 0x61, 0x43, 0x5e, 0x76,
- 0x0b, 0xc3, 0x5e, 0x82, 0xcb, 0x66, 0x54, 0x07, 0xe9, 0xa1, 0x45, 0x00,
- 0x6c, 0x43, 0x5e, 0x8e, 0xca, 0x2b, 0x13, 0x07, 0xe9, 0x51, 0xcd, 0x05,
- 0x7a, 0x07, 0xe8, 0x70, 0xcd, 0x05, 0x7a, 0x07, 0xe7, 0xe1, 0xca, 0x2b,
- 0x13, 0x07, 0xe8, 0xc0, 0x45, 0x19, 0x9d, 0xc3, 0x5e, 0x9a, 0x44, 0x19,
- 0xa7, 0xc3, 0x5e, 0xa4, 0x44, 0x6f, 0x91, 0xc3, 0x5e, 0xae, 0xd1, 0x51,
- 0xe9, 0x07, 0xe5, 0x91, 0x4d, 0x08, 0x1a, 0xc3, 0x5e, 0xba, 0x44, 0x2e,
- 0x60, 0x43, 0x5e, 0xc6, 0x42, 0x00, 0xc7, 0xc3, 0x5e, 0xd2, 0x03, 0x43,
- 0x5e, 0xdc, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x61, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0xe8, 0xce, 0x43, 0xed, 0x07, 0xeb, 0xd1, 0xd7, 0x2b, 0x06, 0x07,
- 0xeb, 0xd9, 0xcf, 0x6b, 0xff, 0x07, 0xeb, 0xc8, 0xcd, 0x05, 0x7a, 0x07,
- 0xe7, 0xb9, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0x98, 0x0b, 0xc3, 0x5e, 0xe8,
- 0x45, 0x00, 0x6c, 0x43, 0x5e, 0xf4, 0x0b, 0xc3, 0x5f, 0x06, 0x4a, 0x73,
- 0x4d, 0x43, 0x5f, 0x12, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0xa1, 0xcd, 0x05,
- 0x7a, 0x07, 0xe7, 0xc0, 0x5e, 0x0f, 0x7e, 0xc3, 0x5f, 0x1e, 0x4e, 0x72,
- 0xf5, 0x43, 0x5f, 0x2a, 0x0b, 0xc3, 0x5f, 0x36, 0xcc, 0x8b, 0xb4, 0x07,
- 0xea, 0x69, 0xcf, 0x65, 0x6f, 0x07, 0xef, 0xb8, 0x44, 0x2e, 0x60, 0xc3,
- 0x5f, 0x40, 0x4d, 0x08, 0x1a, 0xc3, 0x5f, 0x4c, 0x45, 0x19, 0x9d, 0xc3,
- 0x5f, 0x58, 0x45, 0x51, 0xea, 0x43, 0x5f, 0x68, 0x44, 0x2e, 0x60, 0xc3,
- 0x5f, 0x74, 0x4d, 0x08, 0x1a, 0xc3, 0x5f, 0x80, 0xcf, 0x62, 0x81, 0x07,
- 0xe3, 0xc9, 0x45, 0x19, 0x9d, 0xc3, 0x5f, 0x8c, 0xcf, 0x69, 0x5c, 0x07,
- 0xe3, 0xb9, 0xce, 0x6f, 0x91, 0x07, 0xe3, 0xb1, 0xd2, 0x4b, 0x30, 0x07,
- 0xe0, 0x89, 0xcf, 0x66, 0x50, 0x07, 0xe7, 0x30, 0xe0, 0x0a, 0x07, 0x07,
- 0xe2, 0xd8, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0xa9, 0xcd, 0x05, 0x7a, 0x07,
- 0xe0, 0x80, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0xa1, 0xcd, 0x05, 0x7a, 0x07,
- 0xe0, 0x78, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0x91, 0x0b, 0xc3, 0x5f, 0x9c,
- 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x19, 0x45, 0x00, 0x6c, 0x43, 0x5f, 0xa8,
- 0x0b, 0xc3, 0x5f, 0xc6, 0x45, 0x00, 0x6c, 0x43, 0x5f, 0xd2, 0x43, 0x02,
- 0x98, 0xc3, 0x5f, 0xe4, 0x43, 0x2e, 0x61, 0x43, 0x5f, 0xee, 0x0b, 0xc3,
- 0x5f, 0xfa, 0x45, 0x00, 0x6c, 0x43, 0x60, 0x06, 0xcb, 0x66, 0x54, 0x07,
- 0xe7, 0x89, 0xcc, 0x12, 0x30, 0x07, 0xe6, 0xf0, 0x4f, 0x09, 0x8b, 0xc3,
- 0x60, 0x18, 0x42, 0x00, 0x6f, 0x43, 0x60, 0x60, 0xcc, 0x05, 0x7b, 0x07,
- 0xe2, 0xc1, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0xe8, 0x45, 0x19, 0x9d, 0xc3,
- 0x60, 0x6a, 0xce, 0x43, 0xed, 0x07, 0xed, 0x80, 0xcc, 0x05, 0x7b, 0x07,
- 0xe2, 0xa9, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0xd0, 0xcb, 0x66, 0x54, 0x07,
- 0xe7, 0x79, 0xcc, 0x12, 0x30, 0x07, 0xe6, 0xb0, 0x0b, 0xc3, 0x60, 0x76,
- 0x45, 0x00, 0x6c, 0x43, 0x60, 0x82, 0xcc, 0x05, 0x7b, 0x07, 0xe2, 0x71,
- 0xcb, 0x12, 0x31, 0x07, 0xe6, 0xa8, 0xce, 0x43, 0xed, 0x07, 0xec, 0xd1,
- 0xd7, 0x2b, 0x06, 0x07, 0xec, 0xd8, 0xcc, 0x05, 0x7b, 0x07, 0xe2, 0x59,
- 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x90, 0xd7, 0x2b, 0x06, 0x07, 0xec, 0xc9,
- 0x44, 0x19, 0xa7, 0xc3, 0x60, 0x94, 0xce, 0x43, 0xed, 0x07, 0xee, 0x39,
- 0x45, 0x19, 0x9d, 0x43, 0x60, 0xa0, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x61,
- 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x11, 0x0b, 0xc3, 0x60, 0xac, 0x45, 0x00,
- 0x6c, 0x43, 0x60, 0xb8, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x59, 0xca, 0x2b,
- 0x13, 0x07, 0xe4, 0x09, 0x0b, 0x43, 0x60, 0xc4, 0xca, 0x2b, 0x13, 0x07,
- 0xe4, 0x21, 0xcd, 0x05, 0x7a, 0x07, 0xe1, 0xf0, 0x48, 0x08, 0x1f, 0xc3,
- 0x60, 0xd0, 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x01, 0xcd, 0x05, 0x7a, 0x07,
- 0xe1, 0xb8, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0xd1, 0xcb, 0x12, 0x31, 0x07,
- 0xe6, 0x30, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0xc9, 0xcb, 0x12, 0x31, 0x07,
- 0xe6, 0x28, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0xc1, 0xcb, 0x12, 0x31, 0x07,
- 0xe6, 0x20, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0xd9, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0x60, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0xc9, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0x58, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0xf9, 0xcd, 0x05, 0x7a, 0x07,
- 0xe8, 0x18, 0xca, 0x2b, 0x13, 0x07, 0xe9, 0x01, 0xcd, 0x05, 0x7a, 0x07,
- 0xe8, 0x20, 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x31, 0xcd, 0x05, 0x7a, 0x07,
- 0xe2, 0x18, 0x4c, 0x83, 0x5c, 0xc3, 0x60, 0xdc, 0x46, 0x09, 0x89, 0x43,
- 0x60, 0xe8, 0xcc, 0x05, 0x7b, 0x07, 0xe2, 0x11, 0xcb, 0x12, 0x31, 0x07,
- 0xe6, 0x60, 0x44, 0x19, 0xa7, 0xc3, 0x60, 0xf4, 0xce, 0x43, 0xed, 0x07,
- 0xed, 0x68, 0xcc, 0x05, 0x7b, 0x07, 0xe2, 0x09, 0xcb, 0x12, 0x31, 0x07,
- 0xe6, 0x58, 0xca, 0x2b, 0x13, 0x07, 0xec, 0x29, 0xcc, 0x12, 0x30, 0x07,
- 0xec, 0x30, 0x0b, 0xc3, 0x61, 0x00, 0x45, 0x00, 0x6c, 0x43, 0x61, 0x0c,
- 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0xf9, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x48,
- 0x45, 0x2d, 0xfe, 0xc3, 0x61, 0x1e, 0x45, 0x19, 0x9d, 0xc3, 0x61, 0x2a,
- 0xce, 0x43, 0xed, 0x07, 0xed, 0x60, 0x44, 0x2e, 0x60, 0xc3, 0x61, 0x36,
- 0x4d, 0x08, 0x1a, 0xc3, 0x61, 0x42, 0x45, 0x19, 0x9d, 0xc3, 0x61, 0x4e,
- 0x45, 0x51, 0xea, 0x43, 0x61, 0x58, 0xe0, 0x05, 0x67, 0x07, 0xef, 0x88,
- 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x81, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x08,
- 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x79, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x00,
- 0xca, 0x2b, 0x13, 0x07, 0xeb, 0xe1, 0xcc, 0x12, 0x30, 0x07, 0xeb, 0xe8,
- 0xca, 0x2b, 0x13, 0x07, 0xe3, 0x79, 0xcd, 0x05, 0x7a, 0x07, 0xe0, 0x50,
- 0xca, 0x2b, 0x13, 0x07, 0xe3, 0x71, 0xcd, 0x05, 0x7a, 0x07, 0xe0, 0x48,
- 0xca, 0x2b, 0x13, 0x07, 0xe3, 0x61, 0x0b, 0xc3, 0x61, 0x64, 0xcb, 0x66,
- 0x54, 0x07, 0xe7, 0x08, 0x0b, 0xc3, 0x61, 0x70, 0xd3, 0x43, 0xe8, 0x07,
- 0xec, 0xf0, 0x43, 0x02, 0x98, 0xc3, 0x61, 0x7c, 0x43, 0x2e, 0x61, 0x43,
- 0x61, 0x86, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0x29, 0xcb, 0x12, 0x31, 0x07,
- 0xe4, 0xe0, 0xc2, 0x14, 0x40, 0x07, 0xea, 0x11, 0x17, 0x43, 0x61, 0x92,
- 0xc8, 0xbe, 0x6d, 0x07, 0xea, 0x79, 0xc7, 0x6e, 0xa5, 0x07, 0xea, 0x00,
- 0xd5, 0x1c, 0x2c, 0x07, 0xe2, 0x49, 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x40,
- 0x0b, 0xc3, 0x61, 0x9f, 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x49, 0xd3, 0x43,
- 0xe8, 0x07, 0xed, 0x88, 0x0b, 0xc3, 0x61, 0xab, 0x45, 0x00, 0x6c, 0x43,
- 0x61, 0xb7, 0x0b, 0xc3, 0x61, 0xc9, 0x45, 0x00, 0x6c, 0x43, 0x61, 0xd5,
- 0x0b, 0xc3, 0x61, 0xe7, 0x45, 0x00, 0x6c, 0x43, 0x61, 0xf3, 0xcc, 0x05,
- 0x7b, 0x07, 0xe1, 0x21, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0xb0, 0xca, 0x2b,
- 0x13, 0x07, 0xeb, 0x79, 0xcc, 0x12, 0x30, 0x07, 0xeb, 0x80, 0xcc, 0x05,
- 0x7b, 0x07, 0xe1, 0x19, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0xa8, 0xd7, 0x2b,
- 0x06, 0x07, 0xeb, 0x71, 0xce, 0x43, 0xed, 0x07, 0xed, 0x58, 0xcb, 0x12,
- 0x31, 0x07, 0xdf, 0xd9, 0xcc, 0x05, 0x7b, 0x07, 0xdf, 0xc8, 0x00, 0x43,
- 0x62, 0x0b, 0x00, 0x43, 0x62, 0x21, 0x00, 0x43, 0x62, 0x37, 0x00, 0x43,
- 0x62, 0x4d, 0x00, 0x43, 0x62, 0x63, 0x00, 0x43, 0x62, 0x73, 0x00, 0x43,
- 0x62, 0x89, 0x00, 0x43, 0x62, 0x9f, 0xc3, 0x0f, 0x60, 0x00, 0x45, 0xe3,
- 0x03, 0x62, 0xab, 0xc4, 0x3a, 0x8e, 0x00, 0x45, 0xe9, 0xc3, 0xb2, 0x7c,
- 0x00, 0x45, 0xd8, 0x00, 0x43, 0x62, 0xb1, 0x00, 0x43, 0x62, 0xc7, 0x00,
- 0x43, 0x62, 0xe0, 0x88, 0x00, 0x32, 0x1b, 0x03, 0x62, 0xf6, 0xca, 0xa3,
- 0x1a, 0x00, 0x31, 0x00, 0xc2, 0x48, 0x12, 0x00, 0x36, 0x4b, 0x03, 0x62,
- 0xfa, 0xc2, 0x1b, 0xd8, 0x00, 0x36, 0x2a, 0x03, 0x62, 0xfe, 0x00, 0x43,
- 0x63, 0x02, 0x00, 0xc3, 0x63, 0x12, 0xc2, 0x0d, 0xf7, 0x00, 0x34, 0x3a,
- 0x03, 0x63, 0x28, 0x00, 0xc3, 0x63, 0x2c, 0xc2, 0x0d, 0xf7, 0x00, 0x33,
- 0xd2, 0x03, 0x63, 0x42, 0x00, 0xc3, 0x63, 0x46, 0xc2, 0x0d, 0xf7, 0x00,
- 0x33, 0xfa, 0x03, 0x63, 0x5a, 0x00, 0x43, 0x63, 0x5e, 0xc6, 0xd3, 0x09,
- 0x00, 0x44, 0x31, 0xc2, 0x00, 0x92, 0x00, 0x31, 0x83, 0x03, 0x63, 0x74,
- 0xc2, 0x0d, 0xf7, 0x00, 0x31, 0x5a, 0x03, 0x63, 0x78, 0x4b, 0x8d, 0x64,
- 0xc3, 0x63, 0x7c, 0xcb, 0x66, 0x54, 0x07, 0xda, 0xc9, 0x0b, 0xc3, 0x63,
- 0x86, 0xca, 0x2b, 0x13, 0x07, 0xda, 0xb8, 0x00, 0x43, 0x63, 0x92, 0x00,
- 0x43, 0x63, 0xa2, 0x00, 0x43, 0x63, 0xc1, 0x00, 0x43, 0x63, 0xcd, 0x00,
- 0x43, 0x63, 0xdf, 0x00, 0x43, 0x63, 0xef, 0x00, 0xc3, 0x63, 0xfb, 0xc2,
- 0x0d, 0xf7, 0x00, 0x34, 0x02, 0x03, 0x64, 0x11, 0x00, 0x43, 0x64, 0x15,
- 0x60, 0x08, 0x07, 0x43, 0x64, 0x25, 0xd0, 0x5b, 0x22, 0x00, 0x33, 0xbb,
- 0x03, 0x64, 0x31, 0xca, 0x2b, 0x13, 0x07, 0xde, 0xc1, 0xcd, 0x05, 0x7a,
- 0x07, 0xde, 0xb8, 0x45, 0x00, 0x6c, 0xc3, 0x64, 0x37, 0xca, 0x2b, 0x13,
- 0x07, 0xf6, 0xb1, 0x0b, 0xc3, 0x64, 0x43, 0xcb, 0x66, 0x54, 0x07, 0xf6,
- 0xc0, 0xcb, 0x66, 0x54, 0x07, 0xdf, 0x39, 0x0b, 0xc3, 0x64, 0x4f, 0xca,
- 0x2b, 0x13, 0x07, 0xdf, 0x28, 0x00, 0x43, 0x64, 0x5b, 0x00, 0x43, 0x64,
- 0x6d, 0x00, 0x43, 0x64, 0x7d, 0x00, 0x43, 0x64, 0x93, 0x00, 0x43, 0x64,
- 0xa9, 0x8e, 0x00, 0x31, 0x7b, 0x03, 0x64, 0xbf, 0xc3, 0x03, 0x2b, 0x00,
- 0x34, 0x63, 0x03, 0x64, 0xc3, 0x86, 0x00, 0x31, 0xb2, 0x03, 0x64, 0xc7,
- 0x8e, 0x00, 0x34, 0x43, 0x03, 0x64, 0xcb, 0xc3, 0x03, 0x2b, 0x00, 0x34,
- 0x6a, 0x03, 0x64, 0xcf, 0x00, 0x43, 0x64, 0xd3, 0x00, 0x43, 0x64, 0xdf,
- 0xc3, 0xb2, 0x7c, 0x00, 0x35, 0x09, 0xc3, 0x0f, 0x60, 0x00, 0x33, 0x79,
- 0xc3, 0x88, 0x60, 0x00, 0x33, 0x70, 0xca, 0x2b, 0x13, 0x07, 0xde, 0xf9,
- 0xcd, 0x05, 0x7a, 0x07, 0xde, 0xf0, 0x00, 0x43, 0x64, 0xef, 0x45, 0x00,
- 0x6c, 0xc3, 0x64, 0xff, 0xcd, 0x05, 0x7a, 0x07, 0xf7, 0x69, 0xca, 0x2b,
- 0x13, 0x07, 0xf7, 0x70, 0x00, 0x43, 0x65, 0x20, 0xca, 0x2b, 0x13, 0x07,
- 0xde, 0xd1, 0xcd, 0x05, 0x7a, 0x07, 0xde, 0xc8, 0x00, 0xc3, 0x65, 0x36,
- 0xc3, 0x90, 0x25, 0x00, 0x35, 0x8a, 0x03, 0x65, 0x46, 0x00, 0x43, 0x65,
- 0x4a, 0x00, 0x43, 0x65, 0x69, 0x8a, 0x00, 0x31, 0x6b, 0x03, 0x65, 0x79,
- 0xc3, 0x09, 0x8b, 0x00, 0x31, 0x0a, 0x03, 0x65, 0x7d, 0x00, 0x43, 0x65,
- 0x83, 0x00, 0x43, 0x65, 0xab, 0x16, 0xc3, 0x65, 0xbd, 0x15, 0xc3, 0x65,
- 0xcd, 0xc3, 0x6f, 0x91, 0x0f, 0x75, 0x99, 0xc3, 0x0f, 0x60, 0x0f, 0x75,
- 0x91, 0xc3, 0xb2, 0x7c, 0x0f, 0x75, 0x81, 0xc3, 0x04, 0x6c, 0x0f, 0x75,
- 0x79, 0xc4, 0x3a, 0x8e, 0x0f, 0x75, 0x69, 0xc4, 0x19, 0x9d, 0x0f, 0x75,
- 0x61, 0xc3, 0x0f, 0x69, 0x0f, 0x75, 0x59, 0xc3, 0x2e, 0x60, 0x0f, 0x75,
- 0x49, 0xc3, 0x19, 0xa7, 0x0f, 0x75, 0x39, 0x42, 0x02, 0xb4, 0xc3, 0x65,
- 0xdf, 0xc3, 0x78, 0xa9, 0x0f, 0x75, 0x29, 0x42, 0x0b, 0xc6, 0xc3, 0x65,
- 0xe9, 0xc4, 0x2d, 0xfe, 0x0f, 0x75, 0x11, 0xc3, 0x88, 0x60, 0x0f, 0x75,
- 0x09, 0xc4, 0x3c, 0x75, 0x0f, 0x75, 0xb9, 0xc5, 0x95, 0xd3, 0x0f, 0x75,
- 0xd8, 0xc3, 0x88, 0x60, 0x0f, 0x70, 0xe1, 0xc4, 0x3a, 0x8e, 0x0f, 0x70,
- 0xe9, 0xc3, 0xb2, 0x7c, 0x0f, 0x70, 0xf1, 0xc3, 0x0f, 0x60, 0x0f, 0x70,
- 0xf8, 0xc4, 0x2d, 0xfe, 0x0f, 0x72, 0x11, 0xc3, 0x19, 0xa7, 0x0f, 0x72,
- 0x39, 0xc3, 0x2e, 0x60, 0x0f, 0x72, 0x49, 0xc3, 0x0f, 0x69, 0x0f, 0x72,
- 0x59, 0xc4, 0x3a, 0x8e, 0x0f, 0x72, 0x69, 0x15, 0xc3, 0x65, 0xf1, 0xc3,
- 0x04, 0x6c, 0x0f, 0x72, 0x79, 0xc3, 0x0f, 0x60, 0x0f, 0x72, 0x91, 0xc4,
- 0x3c, 0x75, 0x0f, 0x72, 0xb9, 0x06, 0xc3, 0x66, 0x03, 0xc5, 0x95, 0xd3,
- 0x0f, 0x72, 0xd8, 0xc3, 0x01, 0x1e, 0x0f, 0x74, 0x01, 0xc2, 0x00, 0x34,
- 0x0f, 0x74, 0x78, 0x8e, 0x0f, 0x74, 0x19, 0x86, 0x0f, 0x74, 0xc8, 0xc2,
- 0x0d, 0xf7, 0x0f, 0x74, 0x21, 0xc2, 0x02, 0x98, 0x0f, 0x74, 0x38, 0xc2,
- 0x00, 0x34, 0x0f, 0x74, 0x31, 0x8a, 0x0f, 0x74, 0xd0, 0xc2, 0x02, 0x98,
- 0x0f, 0x74, 0x41, 0xc2, 0x0d, 0xf7, 0x0f, 0x74, 0xa9, 0x0a, 0x43, 0x66,
- 0x0f, 0xc3, 0x00, 0x4c, 0x0f, 0x74, 0x71, 0xc2, 0x01, 0xf2, 0x0f, 0x74,
- 0x89, 0xc4, 0xe0, 0xfb, 0x0f, 0x74, 0xa0, 0xc2, 0x0d, 0xf7, 0x0f, 0x73,
- 0x21, 0xc2, 0x02, 0x98, 0x0f, 0x73, 0x38, 0xc2, 0x02, 0x98, 0x0f, 0x73,
- 0x41, 0xc2, 0x0d, 0xf7, 0x0f, 0x73, 0xa9, 0xc3, 0x66, 0x50, 0x0f, 0x73,
- 0xb0, 0xc2, 0x0f, 0x61, 0x0f, 0x73, 0x51, 0xc3, 0x19, 0xa7, 0x0f, 0x73,
- 0xb8, 0xc3, 0x00, 0x4c, 0x0f, 0x73, 0x71, 0xc2, 0x01, 0xf2, 0x0f, 0x73,
- 0x89, 0xc4, 0xe0, 0xfb, 0x0f, 0x73, 0xa0, 0xc2, 0x01, 0xf2, 0x0f, 0x73,
- 0xc9, 0x47, 0x3b, 0xb1, 0x43, 0x66, 0x1b, 0xc3, 0x78, 0xa9, 0x00, 0x44,
- 0x21, 0xc5, 0x09, 0x89, 0x00, 0x44, 0x18, 0x49, 0x02, 0x12, 0xc3, 0x66,
- 0x27, 0x48, 0x09, 0x13, 0x43, 0x66, 0x33, 0x51, 0x13, 0x24, 0xc3, 0x66,
- 0x45, 0xd3, 0x41, 0x03, 0x01, 0x2b, 0x91, 0xd3, 0x44, 0x34, 0x01, 0x2b,
- 0x88, 0x45, 0x00, 0x56, 0x43, 0x66, 0x57, 0xc8, 0x03, 0x3b, 0x01, 0x2a,
- 0x71, 0xca, 0x00, 0xf6, 0x01, 0x2a, 0x60, 0xc9, 0xb5, 0x55, 0x01, 0x2b,
- 0xe9, 0xc9, 0x00, 0xf7, 0x01, 0x29, 0xa0, 0x49, 0x29, 0x7f, 0xc3, 0x66,
- 0x69, 0x02, 0x43, 0x66, 0x7f, 0x49, 0x29, 0x7f, 0x43, 0x66, 0x91, 0xce,
- 0x29, 0x88, 0x0f, 0xd0, 0xa1, 0xdb, 0x18, 0x76, 0x0f, 0xd1, 0xf0, 0xce,
- 0x29, 0x88, 0x0f, 0xd0, 0x91, 0xdb, 0x18, 0x76, 0x0f, 0xd1, 0xe0, 0xce,
- 0x29, 0x88, 0x0f, 0xd0, 0x89, 0xdb, 0x18, 0x76, 0x0f, 0xd1, 0xd8, 0xce,
- 0x29, 0x88, 0x0f, 0xd0, 0x81, 0xdb, 0x18, 0x76, 0x0f, 0xd1, 0xd0, 0xc3,
- 0x00, 0x34, 0x0f, 0xd1, 0x21, 0xc5, 0x7c, 0xf9, 0x0f, 0xd1, 0x40, 0xce,
- 0x72, 0xe7, 0x01, 0x34, 0x49, 0xcf, 0x6a, 0x3d, 0x01, 0x34, 0x41, 0xca,
- 0x3c, 0xdd, 0x01, 0x4f, 0x68, 0xc5, 0x07, 0x0a, 0x01, 0x2d, 0x51, 0xc3,
- 0x04, 0x45, 0x01, 0x5a, 0x88, 0xc6, 0x41, 0x49, 0x01, 0x2d, 0xd1, 0xc7,
- 0xb9, 0x4e, 0x01, 0x5a, 0x98, 0xd9, 0x1f, 0x6e, 0x01, 0x1f, 0x78, 0xd2,
- 0x1c, 0xff, 0x01, 0x1f, 0x68, 0xc4, 0x01, 0xf0, 0x01, 0x3d, 0x20, 0xd2,
- 0x1c, 0xff, 0x01, 0x1f, 0x70, 0xc5, 0x08, 0x42, 0x01, 0x30, 0xd1, 0xce,
- 0x25, 0x12, 0x0f, 0xac, 0xe0, 0xc6, 0x07, 0x09, 0x01, 0x2f, 0xf1, 0xc7,
- 0x3f, 0x2e, 0x0f, 0xbc, 0xc9, 0xc7, 0x08, 0xc0, 0x0f, 0xbc, 0xf8, 0xc8,
- 0x59, 0x16, 0x01, 0x5e, 0x30, 0xc8, 0x59, 0x16, 0x01, 0x5e, 0x38, 0x9a,
- 0x01, 0x30, 0x91, 0xc5, 0x6b, 0x55, 0x01, 0x30, 0x89, 0x04, 0xc3, 0x66,
- 0x9d, 0xc8, 0x94, 0xce, 0x0f, 0xaf, 0xa9, 0xc7, 0xc3, 0x63, 0x01, 0x5d,
- 0xe8, 0xc4, 0xe2, 0x0b, 0x00, 0xdb, 0x51, 0xc6, 0xcf, 0x97, 0x00, 0xdb,
- 0x28, 0xc7, 0xc9, 0xd7, 0x00, 0xda, 0x08, 0x90, 0x0b, 0x51, 0x31, 0x96,
- 0x0b, 0x50, 0xb8, 0x91, 0x0b, 0x51, 0x49, 0x97, 0x0b, 0x50, 0xe1, 0xc2,
- 0x23, 0xb4, 0x0b, 0x50, 0x98, 0x83, 0x0b, 0x50, 0x71, 0x87, 0x0b, 0x50,
- 0x40, 0xc2, 0x14, 0x40, 0x0b, 0x51, 0xa1, 0xc2, 0x00, 0x4d, 0x0b, 0x51,
- 0x80, 0x90, 0x0b, 0x51, 0x89, 0xc2, 0xd0, 0x6a, 0x0b, 0x51, 0x29, 0x87,
- 0x0b, 0x50, 0x38, 0xc2, 0x00, 0x4c, 0x0b, 0x50, 0x61, 0x8b, 0x0b, 0x50,
- 0x58, 0x87, 0x0b, 0x51, 0x11, 0xc2, 0xd0, 0x6a, 0x0b, 0x50, 0xf8, 0xc2,
- 0x01, 0x29, 0x0b, 0x51, 0x41, 0xc5, 0xdb, 0x06, 0x0b, 0x51, 0x38, 0xc3,
- 0x85, 0x08, 0x0b, 0x50, 0xd1, 0xc3, 0x3c, 0x50, 0x0b, 0x50, 0x80, 0xc2,
- 0x0f, 0xf5, 0x0b, 0x50, 0xc0, 0xc2, 0x00, 0x3a, 0x0b, 0x50, 0x11, 0x07,
- 0xc3, 0x66, 0xa9, 0xc5, 0xdd, 0x09, 0x0b, 0x4d, 0x10, 0xc2, 0xd0, 0x6a,
- 0x0b, 0x4d, 0xa9, 0x96, 0x0b, 0x4d, 0x48, 0x91, 0x0b, 0x4b, 0xa9, 0x87,
- 0x0b, 0x4f, 0x50, 0x17, 0xc3, 0x66, 0xb1, 0x96, 0x0b, 0x4d, 0xb8, 0x96,
- 0x0b, 0x4e, 0x61, 0xc2, 0x00, 0x49, 0x0b, 0x4d, 0x59, 0xc2, 0x00, 0x11,
- 0x0b, 0x4b, 0xd0, 0x0d, 0xc3, 0x66, 0xbb, 0x83, 0x0b, 0x4f, 0x91, 0xc3,
- 0x85, 0x08, 0x0b, 0x4f, 0x03, 0x03, 0x66, 0xcc, 0x09, 0xc3, 0x66, 0xd0,
- 0xc6, 0xd4, 0xe9, 0x0b, 0x4d, 0x19, 0x11, 0x43, 0x66, 0xd8, 0xc2, 0x01,
- 0xbd, 0x0b, 0x4b, 0x81, 0x03, 0xc3, 0x66, 0xe0, 0x0b, 0x43, 0x66, 0xea,
- 0x17, 0xc3, 0x66, 0xf4, 0xc3, 0x8e, 0x2c, 0x0b, 0x4b, 0xe0, 0x87, 0x0b,
- 0x4e, 0x28, 0x07, 0xc3, 0x66, 0xfe, 0xc5, 0xc2, 0x15, 0x0b, 0x4c, 0x50,
- 0xc2, 0x03, 0xa5, 0x0b, 0x4e, 0x71, 0xc2, 0x06, 0x1f, 0x0b, 0x4d, 0xe0,
- 0xc2, 0x64, 0x9c, 0x0b, 0x4e, 0x09, 0xc2, 0x59, 0xcb, 0x0b, 0x4d, 0x38,
- 0xc7, 0x00, 0x48, 0x0b, 0x4e, 0x01, 0xc7, 0xc8, 0x5d, 0x0b, 0x4d, 0x68,
- 0x8f, 0x0b, 0x4b, 0x91, 0x93, 0x0b, 0x4e, 0xe1, 0x83, 0x0b, 0x4e, 0xdb,
- 0x03, 0x67, 0x0b, 0xc8, 0xb6, 0x85, 0x0b, 0x4c, 0x78, 0x91, 0x0b, 0x4b,
- 0xcb, 0x03, 0x67, 0x0f, 0x93, 0x0b, 0x4e, 0xb0, 0x90, 0x0b, 0x50, 0x01,
- 0x97, 0x0b, 0x4f, 0xea, 0x03, 0x67, 0x13, 0x8f, 0x0b, 0x4d, 0x53, 0x03,
- 0x67, 0x19, 0xc2, 0x0f, 0xf5, 0x0b, 0x4c, 0xb0, 0x03, 0xc3, 0x67, 0x1f,
- 0x87, 0x0b, 0x4f, 0x49, 0x8f, 0x0b, 0x4c, 0x88, 0x83, 0x0b, 0x4b, 0x63,
- 0x03, 0x67, 0x27, 0x42, 0x01, 0xbb, 0x43, 0x67, 0x2b, 0x07, 0x43, 0x67,
- 0x37, 0x17, 0xc3, 0x67, 0x41, 0xc2, 0x04, 0x4e, 0x0b, 0x4c, 0x20, 0xc2,
- 0x00, 0xb7, 0x0b, 0x4e, 0x10, 0x93, 0x0b, 0x4b, 0x71, 0x87, 0x0b, 0x4f,
- 0x80, 0x91, 0x0b, 0x4f, 0x9b, 0x03, 0x67, 0x49, 0xc2, 0x13, 0xc7, 0x0b,
- 0x4e, 0xf1, 0xc5, 0x85, 0x07, 0x0b, 0x4d, 0x20, 0x96, 0x0b, 0x4c, 0x81,
- 0x87, 0x0b, 0x4b, 0xb0, 0x11, 0xc3, 0x67, 0x4d, 0x93, 0x0b, 0x4f, 0xc1,
- 0x8f, 0x0b, 0x4b, 0xd8, 0x92, 0x0b, 0x4b, 0x49, 0x93, 0x0b, 0x4e, 0xc9,
- 0xc2, 0x00, 0xc2, 0x0b, 0x4c, 0xf8, 0x87, 0x0b, 0x4f, 0x61, 0xc3, 0x85,
- 0x08, 0x0b, 0x4c, 0xe8, 0xc2, 0x06, 0x1f, 0x0b, 0x4b, 0x41, 0x87, 0x0b,
- 0x4d, 0x30, 0x93, 0x0b, 0x4f, 0xe1, 0x87, 0x0b, 0x4d, 0xc3, 0x03, 0x67,
- 0x55, 0x92, 0x0b, 0x4c, 0x58, 0xc2, 0x00, 0x4c, 0x0b, 0x4e, 0x18, 0xc2,
- 0x00, 0x4d, 0x0b, 0x4d, 0x29, 0x83, 0x0b, 0x4c, 0x38, 0x93, 0x0b, 0x50,
- 0x08, 0x00, 0xc3, 0x67, 0x59, 0x87, 0x0b, 0x4d, 0xa2, 0x03, 0x67, 0x69,
- 0x90, 0x0b, 0x4f, 0x29, 0x93, 0x0b, 0x4f, 0x21, 0xc3, 0x5e, 0xb1, 0x0b,
- 0x4f, 0x09, 0xc2, 0x00, 0x8f, 0x0b, 0x4d, 0x90, 0xc5, 0x00, 0x79, 0x0b,
- 0x4f, 0x19, 0xc8, 0xb9, 0x75, 0x0b, 0x4f, 0x10, 0x9a, 0x0b, 0x4e, 0xf9,
- 0xc2, 0x0f, 0xf5, 0x0b, 0x4c, 0xbb, 0x03, 0x67, 0x6d, 0x8f, 0x0b, 0x4d,
- 0xf0, 0x96, 0x0b, 0x4d, 0x71, 0xc2, 0x00, 0x4c, 0x0b, 0x4c, 0xa0, 0x09,
- 0xc3, 0x67, 0x71, 0x0d, 0x43, 0x67, 0x87, 0xc2, 0x06, 0x1f, 0x0b, 0x4a,
- 0x01, 0x0a, 0xc3, 0x67, 0xa5, 0x43, 0x8e, 0x2c, 0x43, 0x67, 0xb1, 0x07,
- 0xc3, 0x67, 0xb9, 0xc2, 0x5f, 0x91, 0x0b, 0x4b, 0x10, 0xc2, 0x00, 0xc2,
- 0x0b, 0x49, 0xb9, 0x07, 0xc3, 0x67, 0xc3, 0xc2, 0x00, 0xb7, 0x0b, 0x48,
- 0xc0, 0x8b, 0x0b, 0x4a, 0x69, 0xc2, 0x0f, 0x4d, 0x0b, 0x49, 0x79, 0xc2,
- 0x00, 0x49, 0x0b, 0x49, 0x11, 0xc2, 0x00, 0xc2, 0x0b, 0x47, 0xd0, 0xc3,
- 0xe0, 0xa0, 0x0b, 0x4a, 0x39, 0x42, 0x11, 0x70, 0xc3, 0x67, 0xcd, 0xc2,
- 0x03, 0xa5, 0x0b, 0x48, 0x11, 0x8b, 0x0b, 0x47, 0x9a, 0x03, 0x67, 0xd7,
- 0x17, 0xc3, 0x67, 0xdd, 0xc3, 0xb1, 0xe2, 0x0b, 0x4a, 0x79, 0x96, 0x0b,
- 0x49, 0x80, 0xc5, 0xd8, 0xe0, 0x0b, 0x4a, 0x11, 0xc5, 0xd8, 0x2a, 0x0b,
- 0x48, 0x50, 0x17, 0xc3, 0x67, 0xe7, 0xc3, 0xb1, 0xe2, 0x0b, 0x4a, 0x80,
- 0xc2, 0x14, 0x40, 0x0b, 0x49, 0x03, 0x03, 0x67, 0xef, 0xc2, 0x00, 0xb1,
- 0x0b, 0x47, 0x88, 0xc3, 0x8e, 0x2c, 0x0b, 0x49, 0x91, 0x42, 0x11, 0x70,
- 0xc3, 0x67, 0xf5, 0x91, 0x0b, 0x48, 0xea, 0x03, 0x67, 0xff, 0xc3, 0x8e,
- 0x2c, 0x0b, 0x48, 0xe1, 0xc3, 0x59, 0xcf, 0x0b, 0x48, 0xd1, 0xc4, 0xe6,
- 0x23, 0x0b, 0x48, 0xb0, 0x17, 0xc3, 0x68, 0x03, 0xc3, 0xb1, 0xe2, 0x0b,
- 0x49, 0x40, 0xc2, 0x00, 0xb2, 0x0b, 0x49, 0xe8, 0x93, 0x0b, 0x49, 0xf9,
- 0x90, 0x0b, 0x49, 0xd1, 0xc2, 0x00, 0x3a, 0x0b, 0x48, 0x30, 0x17, 0xc3,
- 0x68, 0x11, 0x96, 0x0b, 0x48, 0x20, 0xc2, 0x0f, 0xf5, 0x0b, 0x49, 0xc9,
- 0x97, 0x0b, 0x4a, 0x91, 0x87, 0x0b, 0x48, 0x18, 0x93, 0x0b, 0x4b, 0x21,
- 0x92, 0x0b, 0x48, 0x38, 0xc2, 0x89, 0x44, 0x0b, 0x4a, 0xe1, 0x97, 0x0b,
- 0x4a, 0xc1, 0x07, 0xc3, 0x68, 0x25, 0xc2, 0x23, 0xb4, 0x0b, 0x4a, 0xa0,
- 0x11, 0xc3, 0x68, 0x2d, 0xc3, 0xe6, 0x85, 0x0b, 0x49, 0x28, 0xc4, 0xdd,
- 0x22, 0x0b, 0x4b, 0x01, 0xc3, 0x1e, 0x5b, 0x0b, 0x4a, 0x50, 0x93, 0x0b,
- 0x4a, 0xe9, 0xc2, 0x00, 0x84, 0x0b, 0x48, 0xd8, 0x87, 0x0b, 0x4a, 0xd1,
- 0xc4, 0xe0, 0x27, 0x0b, 0x49, 0x70, 0x42, 0x00, 0x4e, 0xc3, 0x68, 0x35,
- 0x17, 0xc3, 0x68, 0x41, 0x96, 0x0b, 0x46, 0x48, 0xca, 0xa2, 0x52, 0x0b,
- 0x46, 0xa9, 0x96, 0x0b, 0x46, 0x70, 0xc2, 0x13, 0xc7, 0x0b, 0x47, 0x41,
- 0xc3, 0xe0, 0xa0, 0x0b, 0x46, 0xd8, 0xc4, 0xdf, 0xcf, 0x0b, 0x46, 0xe1,
- 0xc2, 0xd0, 0x6a, 0x0b, 0x45, 0x50, 0x96, 0x0b, 0x47, 0x81, 0xc5, 0xdd,
- 0xea, 0x0b, 0x45, 0xd0, 0xc4, 0xd1, 0x67, 0x0b, 0x46, 0x31, 0xc5, 0xdc,
- 0xd7, 0x0b, 0x45, 0x70, 0x90, 0x0b, 0x47, 0x71, 0xc5, 0xd6, 0x1a, 0x0b,
- 0x44, 0xe0, 0x8f, 0x0b, 0x46, 0x29, 0x92, 0x0b, 0x45, 0xb0, 0x93, 0x0b,
- 0x47, 0x61, 0xc6, 0xcd, 0x57, 0x0b, 0x45, 0x90, 0xc2, 0x59, 0xcb, 0x0b,
- 0x47, 0x59, 0x09, 0xc3, 0x68, 0x4f, 0xc2, 0x00, 0x3a, 0x0b, 0x46, 0x81,
- 0x0d, 0x43, 0x68, 0x5c, 0x07, 0xc3, 0x68, 0x68, 0x03, 0xc3, 0x68, 0x74,
- 0xc3, 0xe0, 0xa0, 0x0b, 0x45, 0x68, 0x03, 0xc3, 0x68, 0x7e, 0x42, 0x11,
- 0x70, 0xc3, 0x68, 0x86, 0xc3, 0x82, 0x34, 0x0b, 0x45, 0x59, 0xc4, 0xc1,
- 0x2d, 0x0b, 0x44, 0xe8, 0x17, 0xc3, 0x68, 0x90, 0xc2, 0x00, 0x3a, 0x0b,
- 0x46, 0x99, 0xc3, 0xe7, 0xe1, 0x0b, 0x45, 0xf9, 0x83, 0x0b, 0x45, 0xf1,
- 0xc5, 0xad, 0x64, 0x0b, 0x45, 0x28, 0x07, 0xc3, 0x68, 0x9a, 0xc2, 0x14,
- 0x40, 0x0b, 0x45, 0xa1, 0xc6, 0xd4, 0xb9, 0x0b, 0x44, 0xd0, 0xc3, 0x4f,
- 0x0f, 0x0b, 0x45, 0x19, 0x83, 0x0b, 0x44, 0x80, 0x03, 0xc3, 0x68, 0xa4,
- 0x07, 0xc3, 0x68, 0xb0, 0x8b, 0x0b, 0x46, 0xeb, 0x03, 0x68, 0xc0, 0x17,
- 0x43, 0x68, 0xca, 0x07, 0xc3, 0x68, 0xd4, 0x00, 0x43, 0x68, 0xe0, 0xc3,
- 0xe6, 0x85, 0x0b, 0x47, 0x21, 0xc7, 0xc8, 0xdb, 0x0b, 0x45, 0x11, 0x8f,
- 0x0b, 0x44, 0x88, 0x92, 0x0b, 0x45, 0x01, 0xc3, 0x0f, 0xf4, 0x0b, 0x44,
- 0xb0, 0x09, 0xc3, 0x68, 0xec, 0xc2, 0x00, 0x3a, 0x0b, 0x44, 0x71, 0xca,
- 0xa8, 0x24, 0x0b, 0x43, 0xa0, 0xc2, 0x00, 0x4d, 0x0b, 0x44, 0x59, 0xc4,
- 0xc4, 0xa1, 0x0b, 0x42, 0xb8, 0xc5, 0xdc, 0xb9, 0x0b, 0x44, 0x01, 0xc7,
- 0xc7, 0xa7, 0x0b, 0x43, 0x68, 0xc9, 0xb1, 0x92, 0x0b, 0x43, 0x59, 0xc4,
- 0x91, 0xe5, 0x0b, 0x43, 0xe0, 0x43, 0x7a, 0x4b, 0x43, 0x69, 0x01, 0xc3,
- 0x90, 0x0e, 0x0b, 0x44, 0x21, 0xc4, 0xe1, 0x9f, 0x0b, 0x43, 0xf1, 0xca,
- 0x9e, 0x10, 0x0b, 0x43, 0x61, 0x03, 0x43, 0x69, 0x0d, 0xc8, 0xba, 0x15,
- 0x0b, 0x44, 0x11, 0x93, 0x0b, 0x43, 0xc8, 0x93, 0x0b, 0x44, 0x69, 0xc3,
- 0x14, 0xa3, 0x0b, 0x42, 0xe8, 0xc3, 0x3c, 0x50, 0x0b, 0x44, 0x31, 0xc4,
- 0xe0, 0x73, 0x0b, 0x43, 0x81, 0xc3, 0xe6, 0xac, 0x0b, 0x43, 0x70, 0xc4,
- 0x3c, 0x4e, 0x0b, 0x43, 0x89, 0xcc, 0x83, 0x80, 0x0b, 0x43, 0x18, 0xc6,
- 0xd3, 0x93, 0x0b, 0x43, 0x51, 0xc6, 0xd0, 0x33, 0x0b, 0x43, 0x48, 0xc5,
- 0xda, 0x4d, 0x0b, 0x43, 0x41, 0xc9, 0xa9, 0xdf, 0x0b, 0x42, 0xc0, 0x96,
- 0x0b, 0x42, 0x59, 0x93, 0x0b, 0x41, 0xe1, 0xc4, 0xe5, 0xa7, 0x0b, 0x41,
- 0x80, 0xcc, 0x89, 0x44, 0x0b, 0x42, 0x01, 0x0b, 0xc3, 0x69, 0x19, 0x17,
- 0x43, 0x69, 0x25, 0xc3, 0x5e, 0xb1, 0x0b, 0x42, 0x51, 0xc6, 0xd1, 0xa1,
- 0x0b, 0x41, 0x88, 0xc3, 0x6d, 0xc2, 0x0b, 0x41, 0x71, 0xc7, 0xb4, 0xb5,
- 0x0b, 0x40, 0x60, 0x93, 0x0b, 0x42, 0x81, 0xc2, 0x00, 0x67, 0x0b, 0x41,
- 0x38, 0x96, 0x0b, 0x41, 0x99, 0xc8, 0xbd, 0x4d, 0x0b, 0x40, 0x98, 0x07,
- 0xc3, 0x69, 0x2f, 0xc7, 0xc8, 0xf7, 0x0b, 0x41, 0xe9, 0xc5, 0xda, 0x43,
- 0x0b, 0x40, 0x78, 0x93, 0x0b, 0x42, 0xb1, 0xc3, 0x15, 0x1c, 0x0b, 0x42,
- 0x40, 0x42, 0x00, 0x3a, 0xc3, 0x69, 0x48, 0xca, 0xa5, 0x04, 0x0b, 0x40,
- 0xf0, 0x93, 0x0b, 0x42, 0xa9, 0xc6, 0xb9, 0xa7, 0x0b, 0x40, 0x20, 0x83,
- 0x0b, 0x42, 0x89, 0xc3, 0x8e, 0x2c, 0x0b, 0x42, 0x68, 0x8b, 0x0b, 0x42,
- 0x7b, 0x03, 0x69, 0x54, 0xc2, 0x00, 0x49, 0x0b, 0x42, 0x48, 0xc3, 0x51,
- 0x0c, 0x0b, 0x42, 0x29, 0x43, 0xb8, 0xab, 0xc3, 0x69, 0x5a, 0xc4, 0x0a,
- 0xab, 0x0b, 0x40, 0x68, 0xc5, 0xa2, 0x57, 0x0b, 0x42, 0x19, 0xc4, 0x06,
- 0x91, 0x0b, 0x40, 0xa0, 0xc2, 0x03, 0xa5, 0x0b, 0x41, 0xfb, 0x03, 0x69,
- 0x66, 0xc5, 0xdd, 0x7c, 0x0b, 0x40, 0x90, 0xc9, 0xae, 0x20, 0x0b, 0x41,
- 0xa1, 0xc9, 0x84, 0xf7, 0x0b, 0x41, 0x48, 0xc7, 0xc4, 0xeb, 0x0b, 0x40,
- 0xf9, 0xc6, 0xb9, 0xa7, 0x0b, 0x40, 0x38, 0xc3, 0x6d, 0xc2, 0x0b, 0x41,
- 0x78, 0x03, 0xc3, 0x69, 0x6a, 0xc9, 0x84, 0xf7, 0x0b, 0x41, 0x41, 0xc5,
- 0xdd, 0x31, 0x0b, 0x40, 0xe9, 0xc4, 0x96, 0x38, 0x0b, 0x40, 0xd8, 0x4d,
- 0x7a, 0x49, 0xc3, 0x69, 0x74, 0x4b, 0x92, 0xfd, 0x43, 0x69, 0x80, 0xc6,
- 0xd0, 0x21, 0x0b, 0x41, 0x09, 0xc3, 0x0f, 0xf4, 0x0b, 0x40, 0xe0, 0xa1,
- 0x01, 0x40, 0x7b, 0x03, 0x69, 0x8c, 0xa2, 0x01, 0x40, 0xbb, 0x03, 0x69,
- 0xa5, 0xa3, 0x01, 0x41, 0x3b, 0x03, 0x69, 0xb7, 0xa5, 0x01, 0x44, 0x39,
- 0xa4, 0x01, 0x42, 0x3a, 0x03, 0x69, 0xc2, 0xa2, 0x01, 0x40, 0xdb, 0x03,
- 0x69, 0xc6, 0xa3, 0x01, 0x41, 0x5b, 0x03, 0x69, 0xd8, 0xa5, 0x01, 0x44,
- 0x59, 0xa4, 0x01, 0x42, 0x5a, 0x03, 0x69, 0xe3, 0xa3, 0x01, 0x41, 0x9b,
- 0x03, 0x69, 0xe7, 0xa5, 0x01, 0x44, 0x99, 0xa4, 0x01, 0x42, 0x9a, 0x03,
- 0x69, 0xf2, 0xa5, 0x01, 0x45, 0x19, 0xa4, 0x01, 0x43, 0x1a, 0x03, 0x69,
- 0xf6, 0xa5, 0x01, 0x46, 0x18, 0xa2, 0x01, 0x40, 0xeb, 0x03, 0x69, 0xfa,
- 0xa3, 0x01, 0x41, 0x6b, 0x03, 0x6a, 0x0c, 0xa5, 0x01, 0x44, 0x69, 0xa4,
- 0x01, 0x42, 0x6a, 0x03, 0x6a, 0x17, 0xa3, 0x01, 0x41, 0xab, 0x03, 0x6a,
- 0x1b, 0xa5, 0x01, 0x44, 0xa9, 0xa4, 0x01, 0x42, 0xaa, 0x03, 0x6a, 0x26,
- 0xa5, 0x01, 0x45, 0x29, 0xa4, 0x01, 0x43, 0x2a, 0x03, 0x6a, 0x2a, 0xa5,
- 0x01, 0x46, 0x28, 0xa3, 0x01, 0x41, 0xcb, 0x03, 0x6a, 0x2e, 0xa5, 0x01,
- 0x44, 0xc9, 0xa4, 0x01, 0x42, 0xca, 0x03, 0x6a, 0x39, 0xa5, 0x01, 0x45,
- 0x49, 0xa4, 0x01, 0x43, 0x4a, 0x03, 0x6a, 0x3d, 0xa5, 0x01, 0x46, 0x48,
- 0xa5, 0x01, 0x45, 0x89, 0xa4, 0x01, 0x43, 0x8a, 0x03, 0x6a, 0x41, 0xa5,
- 0x01, 0x46, 0x88, 0xa5, 0x01, 0x47, 0x08, 0xa2, 0x01, 0x40, 0xf3, 0x03,
- 0x6a, 0x45, 0xa3, 0x01, 0x41, 0x73, 0x03, 0x6a, 0x57, 0xa5, 0x01, 0x44,
- 0x71, 0xa4, 0x01, 0x42, 0x72, 0x03, 0x6a, 0x62, 0xa3, 0x01, 0x41, 0xb3,
- 0x03, 0x6a, 0x66, 0xa5, 0x01, 0x44, 0xb1, 0xa4, 0x01, 0x42, 0xb2, 0x03,
- 0x6a, 0x71, 0xa5, 0x01, 0x45, 0x31, 0xa4, 0x01, 0x43, 0x32, 0x03, 0x6a,
- 0x75, 0xa5, 0x01, 0x46, 0x30, 0xa3, 0x01, 0x41, 0xd3, 0x03, 0x6a, 0x79,
- 0xa5, 0x01, 0x44, 0xd1, 0xa4, 0x01, 0x42, 0xd2, 0x03, 0x6a, 0x84, 0xa5,
- 0x01, 0x45, 0x51, 0xa4, 0x01, 0x43, 0x52, 0x03, 0x6a, 0x88, 0xa5, 0x01,
- 0x46, 0x50, 0xa5, 0x01, 0x45, 0x91, 0xa4, 0x01, 0x43, 0x92, 0x03, 0x6a,
- 0x8c, 0xa5, 0x01, 0x46, 0x90, 0xa5, 0x01, 0x47, 0x10, 0xa3, 0x01, 0x41,
- 0xe3, 0x03, 0x6a, 0x90, 0xa5, 0x01, 0x44, 0xe1, 0xa4, 0x01, 0x42, 0xe2,
- 0x03, 0x6a, 0x9b, 0xa5, 0x01, 0x45, 0x61, 0xa4, 0x01, 0x43, 0x62, 0x03,
- 0x6a, 0x9f, 0xa5, 0x01, 0x46, 0x60, 0xa5, 0x01, 0x45, 0xa1, 0xa4, 0x01,
- 0x43, 0xa2, 0x03, 0x6a, 0xa3, 0xa5, 0x01, 0x46, 0xa0, 0xa5, 0x01, 0x47,
- 0x20, 0xa5, 0x01, 0x45, 0xc1, 0xa4, 0x01, 0x43, 0xc2, 0x03, 0x6a, 0xa7,
- 0xa5, 0x01, 0x46, 0xc0, 0xa5, 0x01, 0x47, 0x40, 0xa5, 0x01, 0x47, 0x80,
- 0xc3, 0x15, 0x88, 0x0e, 0x84, 0x11, 0xc7, 0x9f, 0x0d, 0x0e, 0x84, 0x08,
- 0xc3, 0x7e, 0xff, 0x0e, 0x82, 0x89, 0xc5, 0xce, 0x96, 0x0e, 0x80, 0x90,
- 0xc3, 0x8f, 0x80, 0x0e, 0x84, 0xa1, 0xc4, 0x8f, 0x7c, 0x0e, 0x84, 0x98,
- 0xc6, 0x02, 0x21, 0x0f, 0xd9, 0xf1, 0xc5, 0x01, 0xf7, 0x0f, 0xd9, 0xf9,
- 0xcc, 0x02, 0x0b, 0x0f, 0xda, 0x88, 0x46, 0x01, 0xd1, 0xc3, 0x6a, 0xab,
- 0xd2, 0x48, 0x3c, 0x0f, 0xda, 0x68, 0xd2, 0x48, 0x3c, 0x0f, 0xda, 0x61,
- 0x46, 0x01, 0xd1, 0x43, 0x6a, 0xb7, 0xc6, 0x02, 0x21, 0x0f, 0xda, 0x29,
- 0xcc, 0x02, 0x0b, 0x0f, 0xda, 0x50, 0xcc, 0x02, 0x0b, 0x0f, 0xda, 0x49,
- 0xc5, 0x01, 0xf7, 0x0f, 0xda, 0x58, 0xd4, 0x34, 0x79, 0x0f, 0xdc, 0xd9,
- 0xc3, 0x00, 0xdf, 0x01, 0x3e, 0xd8, 0xe0, 0x0a, 0xa7, 0x0f, 0xdb, 0x48,
- 0xe0, 0x0a, 0xa7, 0x0f, 0xdb, 0x58, 0xc7, 0x01, 0x47, 0x0f, 0xc8, 0x29,
- 0xc9, 0x04, 0x5e, 0x0f, 0xc8, 0x20, 0xd6, 0x2d, 0x07, 0x01, 0x0f, 0xe1,
- 0xcf, 0x2c, 0x05, 0x01, 0x0f, 0xc9, 0xc6, 0x01, 0x01, 0x01, 0x0d, 0x70,
- 0xcd, 0x7f, 0xc5, 0x01, 0x4c, 0x79, 0xca, 0x9c, 0x08, 0x01, 0x4c, 0x68,
- 0x00, 0x43, 0x6a, 0xc3, 0xcf, 0x2c, 0x05, 0x01, 0x59, 0xa1, 0xd6, 0x2d,
- 0x07, 0x01, 0x59, 0xa9, 0x16, 0x43, 0x6a, 0xd5, 0xd2, 0x06, 0x54, 0x0f,
- 0xc0, 0x01, 0xd5, 0x03, 0x72, 0x0f, 0xc0, 0x80, 0x46, 0x00, 0x6b, 0x43,
- 0x6a, 0xe4, 0xc9, 0x03, 0x68, 0x01, 0x58, 0x81, 0xc7, 0x08, 0xca, 0x01,
- 0x58, 0x88, 0xdd, 0x11, 0x8e, 0x01, 0x0d, 0xc8, 0xcf, 0x69, 0x89, 0x01,
- 0x5a, 0x11, 0xce, 0x38, 0x53, 0x01, 0x5a, 0x58, 0xc6, 0x01, 0x01, 0x01,
- 0x0e, 0x69, 0xcf, 0x2c, 0x05, 0x01, 0x48, 0x10, 0xc5, 0x01, 0x09, 0x01,
- 0x0d, 0xe9, 0x00, 0x43, 0x6a, 0xf0, 0xc5, 0x01, 0x09, 0x01, 0x0d, 0xe1,
- 0x00, 0x43, 0x6b, 0x08, 0x02, 0xc3, 0x6b, 0x1a, 0xc2, 0x01, 0x1d, 0x08,
- 0x3a, 0x40, 0x9e, 0x08, 0x30, 0x01, 0x9f, 0x08, 0x30, 0x09, 0xa0, 0x08,
- 0x30, 0x11, 0xa1, 0x08, 0x30, 0x19, 0xa2, 0x08, 0x30, 0x21, 0xa3, 0x08,
- 0x30, 0x29, 0xa4, 0x08, 0x30, 0x31, 0xa5, 0x08, 0x30, 0x39, 0xa6, 0x08,
- 0x30, 0x40, 0x9d, 0x08, 0x30, 0x49, 0xa0, 0x08, 0x30, 0x59, 0xa3, 0x08,
- 0x30, 0x61, 0xa4, 0x08, 0x30, 0x69, 0x9e, 0x08, 0x30, 0x50, 0x9d, 0x08,
- 0x30, 0x71, 0x9e, 0x08, 0x30, 0x7b, 0x03, 0x6b, 0x32, 0x9f, 0x08, 0x30,
- 0x93, 0x03, 0x6b, 0x3a, 0xa0, 0x08, 0x30, 0xab, 0x03, 0x6b, 0x42, 0xa1,
- 0x08, 0x30, 0xb9, 0xa3, 0x08, 0x30, 0xc1, 0xa4, 0x08, 0x30, 0xc9, 0xa5,
- 0x08, 0x30, 0xd1, 0xa6, 0x08, 0x30, 0xe0, 0x9d, 0x08, 0x30, 0xe9, 0x9e,
- 0x08, 0x30, 0xf1, 0xa1, 0x08, 0x30, 0xf9, 0xa4, 0x08, 0x31, 0x01, 0xa5,
- 0x08, 0x31, 0x09, 0xa6, 0x08, 0x31, 0x10, 0x9d, 0x08, 0x31, 0x19, 0x9e,
- 0x08, 0x31, 0x21, 0xa1, 0x08, 0x31, 0x29, 0xa2, 0x08, 0x31, 0x31, 0xa3,
- 0x08, 0x31, 0x39, 0xa4, 0x08, 0x31, 0x41, 0xa5, 0x08, 0x31, 0x49, 0xa6,
- 0x08, 0x31, 0x50, 0x9d, 0x08, 0x31, 0x59, 0x9e, 0x08, 0x31, 0x61, 0xa0,
- 0x08, 0x31, 0x69, 0xa1, 0x08, 0x31, 0x71, 0xa2, 0x08, 0x31, 0x79, 0xa3,
- 0x08, 0x31, 0x81, 0xa4, 0x08, 0x31, 0x89, 0xa5, 0x08, 0x31, 0x91, 0xa6,
- 0x08, 0x31, 0x98, 0x9d, 0x08, 0x31, 0xa1, 0x9e, 0x08, 0x31, 0xa9, 0xa2,
- 0x08, 0x31, 0xb1, 0xa3, 0x08, 0x31, 0xb9, 0xa4, 0x08, 0x31, 0xc1, 0xa6,
- 0x08, 0x31, 0xc8, 0x9d, 0x08, 0x31, 0xd1, 0xa0, 0x08, 0x31, 0xd9, 0xa1,
- 0x08, 0x31, 0xe1, 0xa3, 0x08, 0x31, 0xe9, 0xa4, 0x08, 0x31, 0xf1, 0xa5,
- 0x08, 0x31, 0xf9, 0xa6, 0x08, 0x32, 0x00, 0x9d, 0x08, 0x32, 0x09, 0x9e,
- 0x08, 0x32, 0x11, 0x9f, 0x08, 0x32, 0x19, 0xa3, 0x08, 0x32, 0x29, 0xa4,
- 0x08, 0x32, 0x31, 0xa2, 0x08, 0x32, 0x20, 0x9f, 0x08, 0x32, 0x59, 0xa0,
- 0x08, 0x32, 0x61, 0x9d, 0x08, 0x32, 0x48, 0x83, 0x08, 0x32, 0x69, 0x84,
- 0x08, 0x32, 0x70, 0x9d, 0x08, 0x32, 0x91, 0xa5, 0x08, 0x32, 0x98, 0x83,
- 0x08, 0x32, 0xe9, 0x84, 0x08, 0x32, 0xf1, 0x85, 0x08, 0x32, 0xf8, 0x83,
- 0x08, 0x33, 0x19, 0x84, 0x08, 0x33, 0x21, 0x85, 0x08, 0x33, 0x28, 0xc3,
- 0xe6, 0xe5, 0x08, 0x00, 0x01, 0xc4, 0xe3, 0x5f, 0x08, 0x00, 0xc9, 0xc4,
- 0xe1, 0xb3, 0x08, 0x00, 0xf1, 0xc4, 0xe4, 0x3f, 0x08, 0x01, 0x99, 0xc4,
- 0xe4, 0xd7, 0x08, 0x01, 0xa9, 0xc4, 0xe2, 0x4b, 0x08, 0x00, 0x29, 0xc4,
- 0xae, 0x34, 0x08, 0x00, 0x39, 0xc4, 0xe1, 0x6b, 0x08, 0x01, 0x59, 0xc4,
- 0xe3, 0x8b, 0x08, 0x01, 0x70, 0xc4, 0xe4, 0x27, 0x08, 0x00, 0x41, 0xc4,
- 0xe1, 0x7f, 0x08, 0x00, 0xa9, 0xc4, 0xe2, 0xdb, 0x08, 0x01, 0x09, 0xc4,
- 0xe3, 0xd7, 0x08, 0x01, 0xe1, 0xc3, 0xe7, 0x45, 0x08, 0x00, 0x21, 0xc4,
- 0xe4, 0xb3, 0x08, 0x00, 0xb9, 0xc4, 0xe3, 0x57, 0x08, 0x01, 0x19, 0xc4,
- 0xe1, 0x63, 0x08, 0x01, 0x80, 0xc4, 0xe2, 0x43, 0x08, 0x00, 0x49, 0xc4,
- 0xe1, 0x33, 0x08, 0x00, 0xe1, 0xc4, 0xe4, 0xaf, 0x08, 0x00, 0xe9, 0xc4,
- 0xe6, 0x13, 0x08, 0x01, 0x11, 0xc4, 0xe4, 0x37, 0x08, 0x01, 0xb9, 0xc4,
- 0xe3, 0xcb, 0x08, 0x00, 0x51, 0xc4, 0xe1, 0xaf, 0x08, 0x01, 0x51, 0xc4,
- 0xe3, 0xa7, 0x08, 0x01, 0x89, 0xc4, 0xe3, 0x97, 0x08, 0x01, 0x90, 0xc4,
- 0xe4, 0x4f, 0x08, 0x00, 0x81, 0xc4, 0xe6, 0x2f, 0x08, 0x01, 0xc9, 0xc4,
- 0xc1, 0xa3, 0x08, 0x01, 0xd1, 0xc4, 0xe1, 0xf3, 0x08, 0x02, 0x09, 0xc5,
- 0xdb, 0xbf, 0x08, 0x02, 0x29, 0xc4, 0xe4, 0x23, 0x08, 0x00, 0x31, 0xc4,
- 0xe3, 0x2b, 0x08, 0x00, 0x59, 0xc4, 0xe2, 0xcb, 0x08, 0x01, 0x78, 0xc4,
- 0xe3, 0xe7, 0x08, 0x00, 0x89, 0xc4, 0xe3, 0x7b, 0x08, 0x01, 0xb1, 0xc5,
- 0xda, 0x2f, 0x08, 0x02, 0x39, 0xc5, 0xdb, 0xc9, 0x08, 0x02, 0x51, 0xc5,
- 0xdb, 0xba, 0x08, 0x02, 0x59, 0xc3, 0x74, 0xd1, 0x08, 0x00, 0x19, 0xc4,
- 0xe4, 0xc3, 0x08, 0x00, 0x71, 0xc4, 0xe6, 0x3f, 0x08, 0x01, 0x40, 0xc4,
- 0xe2, 0x5f, 0x08, 0x00, 0x99, 0xc4, 0xde, 0x62, 0x08, 0x00, 0xa1, 0xc4,
- 0xe3, 0xef, 0x08, 0x02, 0x11, 0xc5, 0xdc, 0x37, 0x08, 0x02, 0x60, 0xc4,
- 0xe1, 0xc3, 0x08, 0x00, 0xb1, 0xc4, 0xe1, 0x47, 0x08, 0x00, 0xf9, 0xc4,
- 0xe3, 0xab, 0x08, 0x01, 0x21, 0xc4, 0xe4, 0x77, 0x08, 0x01, 0xc1, 0xc4,
- 0xe4, 0x07, 0x08, 0x01, 0xe9, 0xc5, 0xdd, 0xcc, 0x08, 0x02, 0x19, 0xc5,
- 0xdb, 0x9c, 0x08, 0x02, 0x41, 0xc4, 0xcc, 0x8b, 0x08, 0x00, 0x79, 0xc4,
- 0xe5, 0x6b, 0x08, 0x00, 0x90, 0xc4, 0xe4, 0x43, 0x08, 0x00, 0xd1, 0xc4,
- 0xe2, 0xd7, 0x08, 0x01, 0x29, 0xc4, 0xe6, 0x43, 0x08, 0x01, 0xf9, 0xc5,
- 0xdb, 0x38, 0x08, 0x02, 0x31, 0xc3, 0xe6, 0xc4, 0x08, 0x00, 0x11, 0xc4,
- 0xe1, 0x9b, 0x08, 0x00, 0xc1, 0xc4, 0xe3, 0xdf, 0x08, 0x01, 0x49, 0xc4,
- 0xe3, 0x53, 0x08, 0x01, 0x61, 0xc4, 0xe3, 0xf3, 0x08, 0x02, 0x00, 0xc4,
- 0xe5, 0xf3, 0x08, 0x00, 0xd9, 0xc4, 0xe3, 0x83, 0x08, 0x01, 0x01, 0xc4,
- 0xe1, 0x3f, 0x08, 0x01, 0x31, 0xc4, 0xe3, 0xdb, 0x08, 0x01, 0xa1, 0xc5,
- 0xd9, 0x53, 0x08, 0x02, 0x49, 0xc3, 0xe3, 0x6b, 0x08, 0x00, 0x09, 0xc4,
- 0xe3, 0x13, 0x08, 0x00, 0x69, 0xc4, 0xe2, 0xcf, 0x08, 0x01, 0x68, 0xc5,
- 0xdf, 0xca, 0x08, 0x02, 0x69, 0xc5, 0xde, 0x9e, 0x08, 0x02, 0x20, 0xa5,
- 0x08, 0x02, 0x81, 0xa6, 0x08, 0x02, 0x88, 0xa4, 0x08, 0x02, 0xa1, 0xa6,
- 0x08, 0x02, 0xa8, 0xa0, 0x08, 0x02, 0xb9, 0xa1, 0x08, 0x02, 0xc0, 0x9f,
- 0x08, 0x02, 0xd1, 0xa0, 0x08, 0x02, 0xd9, 0xa3, 0x08, 0x02, 0xe1, 0xa6,
- 0x08, 0x02, 0xe8, 0x1d, 0xc3, 0x6b, 0x46, 0x1f, 0xc3, 0x6b, 0x6c, 0x20,
- 0xc3, 0x6b, 0x8a, 0x21, 0xc3, 0x6b, 0x9a, 0x22, 0xc3, 0x6b, 0xb4, 0x23,
- 0xc3, 0x6b, 0xd8, 0x24, 0xc3, 0x6c, 0x04, 0x25, 0xc3, 0x6c, 0x2c, 0x26,
- 0x43, 0x6c, 0x48, 0x1f, 0xc3, 0x6c, 0x52, 0x20, 0xc3, 0x6c, 0x5e, 0x21,
- 0xc3, 0x6c, 0x7c, 0x22, 0x43, 0x6c, 0xa4, 0x1d, 0xc3, 0x6c, 0xca, 0x1e,
- 0xc3, 0x6c, 0xf2, 0x1f, 0xc3, 0x6d, 0x1a, 0xc2, 0xc2, 0x73, 0x08, 0x07,
- 0xc8, 0xc6, 0xcc, 0x8b, 0x08, 0x04, 0x99, 0xc8, 0xc0, 0x05, 0x08, 0x04,
- 0xa0, 0xc6, 0xce, 0x47, 0x08, 0x04, 0xc9, 0xc7, 0xc2, 0x67, 0x08, 0x04,
- 0xc0, 0x43, 0x00, 0x50, 0xc3, 0x6d, 0x32, 0xc4, 0x02, 0x83, 0x00, 0x0d,
- 0x6b, 0x03, 0x6d, 0x3e, 0x06, 0xc3, 0x6d, 0x44, 0x05, 0xc3, 0x6d, 0x50,
- 0xca, 0x69, 0x25, 0x00, 0xf3, 0x79, 0xce, 0x02, 0x79, 0x00, 0x14, 0x41,
- 0xcc, 0x57, 0x82, 0x00, 0x0d, 0x59, 0xc6, 0x01, 0x01, 0x00, 0x0b, 0x38,
- 0x46, 0x00, 0x6b, 0x43, 0x6d, 0x6e, 0x45, 0x41, 0x9d, 0xc3, 0x6d, 0x7a,
- 0x8f, 0x05, 0x3b, 0xb8, 0xd3, 0x44, 0xdf, 0x00, 0xeb, 0xd1, 0xc3, 0x00,
- 0xbf, 0x00, 0x07, 0xf2, 0x03, 0x6d, 0xd3, 0xc8, 0xb0, 0x4f, 0x00, 0xe8,
- 0xb1, 0x43, 0x00, 0x8c, 0x43, 0x6d, 0xdc, 0xd4, 0x02, 0x73, 0x05, 0x5b,
- 0x38, 0x43, 0x00, 0x50, 0xc3, 0x6d, 0xee, 0x05, 0xc3, 0x6d, 0xfa, 0x44,
- 0x01, 0xb8, 0xc3, 0x6e, 0x1b, 0xc5, 0x35, 0x4a, 0x00, 0x0a, 0xdb, 0x03,
- 0x6e, 0x2a, 0xcc, 0x57, 0x82, 0x00, 0xec, 0x51, 0xc4, 0x02, 0x83, 0x00,
- 0x14, 0x11, 0xce, 0x3a, 0xdb, 0x05, 0x3d, 0x49, 0x15, 0x43, 0x6e, 0x30,
- 0xc3, 0x6b, 0xe0, 0x00, 0x12, 0xcb, 0x03, 0x6e, 0x3c, 0x45, 0x0a, 0x10,
- 0x43, 0x6e, 0x42, 0x47, 0x3a, 0x87, 0xc3, 0x6e, 0x50, 0xc7, 0xbf, 0x76,
- 0x05, 0x3e, 0xc8, 0xc7, 0xc9, 0xc2, 0x05, 0x5b, 0x01, 0xc6, 0xc4, 0x05,
- 0x05, 0x3c, 0x60, 0x44, 0x00, 0x6c, 0xc3, 0x6e, 0x67, 0xc7, 0xa8, 0x1d,
- 0x05, 0x3a, 0xd8, 0x87, 0x00, 0x12, 0xc3, 0x03, 0x6e, 0x71, 0x8d, 0x0e,
- 0xf8, 0x19, 0xc8, 0xb8, 0xe5, 0x0e, 0xf8, 0x09, 0x85, 0x01, 0x0c, 0x23,
- 0x03, 0x6e, 0x77, 0xc6, 0x25, 0x70, 0x00, 0x12, 0xe3, 0x03, 0x6e, 0x7d,
- 0xcf, 0x63, 0x80, 0x00, 0x13, 0xf9, 0xc6, 0xd3, 0x21, 0x05, 0x3f, 0xb0,
- 0xc8, 0xa8, 0x1c, 0x05, 0x3a, 0xe8, 0x45, 0x00, 0x6c, 0xc3, 0x6e, 0x83,
- 0xd6, 0x30, 0x61, 0x00, 0x0a, 0x48, 0x43, 0x00, 0x50, 0xc3, 0x6e, 0xb9,
- 0x04, 0xc3, 0x6e, 0xc5, 0xc8, 0x63, 0x87, 0x0e, 0xf8, 0x89, 0x05, 0xc3,
- 0x6e, 0xd4, 0xca, 0x69, 0x25, 0x00, 0xf1, 0xd9, 0x42, 0x01, 0x4a, 0xc3,
- 0x6e, 0xec, 0xcc, 0x57, 0x82, 0x00, 0xec, 0x29, 0xcf, 0x61, 0x82, 0x05,
- 0x59, 0xb9, 0xce, 0x02, 0x79, 0x00, 0x13, 0x6b, 0x03, 0x6e, 0xfb, 0xcb,
- 0x95, 0x18, 0x05, 0x3a, 0x49, 0xc5, 0x35, 0x4a, 0x00, 0x09, 0xd1, 0xc6,
- 0x01, 0x01, 0x00, 0x0a, 0x10, 0xc2, 0x23, 0xb6, 0x00, 0x13, 0x73, 0x03,
- 0x6f, 0x01, 0xc5, 0xbb, 0x84, 0x05, 0x59, 0xa8, 0x46, 0x00, 0x6b, 0x43,
- 0x6f, 0x07, 0x43, 0x00, 0x50, 0xc3, 0x6f, 0x11, 0x05, 0xc3, 0x6f, 0x1d,
- 0xca, 0x69, 0x25, 0x00, 0xf0, 0x79, 0x44, 0x01, 0xb8, 0xc3, 0x6f, 0x35,
- 0xc4, 0x02, 0x83, 0x00, 0x12, 0xbb, 0x03, 0x6f, 0x41, 0xcc, 0x57, 0x82,
- 0x00, 0xec, 0x09, 0x15, 0xc3, 0x6f, 0x47, 0x16, 0x43, 0x6f, 0x53, 0xca,
- 0x9d, 0x84, 0x00, 0x15, 0x23, 0x03, 0x6f, 0x5f, 0xc3, 0x8a, 0xf6, 0x00,
- 0xf4, 0xf8, 0x00, 0x43, 0x6f, 0x65, 0x45, 0x06, 0xf3, 0xc3, 0x6f, 0x74,
- 0x46, 0x3b, 0xb2, 0x43, 0x6f, 0x87, 0xce, 0x02, 0x79, 0x0e, 0xf8, 0xe9,
- 0x05, 0xc3, 0x6f, 0x92, 0xc5, 0x35, 0x4a, 0x00, 0x08, 0x39, 0xc9, 0x17,
- 0x7a, 0x00, 0x08, 0x59, 0xc3, 0x00, 0xe8, 0x05, 0x3c, 0x99, 0xcc, 0x57,
- 0x82, 0x05, 0x3c, 0xa1, 0xc4, 0x02, 0x83, 0x00, 0x0c, 0x41, 0xc6, 0x01,
- 0x01, 0x00, 0x11, 0xe0, 0x4a, 0x9f, 0x3c, 0x43, 0x6f, 0xad, 0xcf, 0x65,
- 0x51, 0x00, 0x12, 0xf1, 0x11, 0xc3, 0x6f, 0xb9, 0xc9, 0x6a, 0x61, 0x05,
- 0x3e, 0x88, 0xcb, 0x9a, 0xa3, 0x05, 0x39, 0x78, 0xce, 0x02, 0x79, 0x0e,
- 0xf8, 0xd9, 0x42, 0x00, 0xf6, 0xc3, 0x6f, 0xc5, 0x05, 0xc3, 0x6f, 0xd4,
- 0x06, 0xc3, 0x6f, 0xe3, 0xc6, 0x60, 0xe6, 0x00, 0x0a, 0x6b, 0x03, 0x6f,
- 0xf0, 0xc5, 0x21, 0x12, 0x00, 0x07, 0xab, 0x03, 0x6f, 0xf6, 0xc6, 0x01,
- 0x01, 0x00, 0x07, 0xc3, 0x03, 0x6f, 0xfc, 0xc5, 0x1f, 0x94, 0x00, 0x07,
- 0x91, 0xc5, 0x35, 0x4a, 0x00, 0x07, 0x99, 0x42, 0x01, 0x64, 0xc3, 0x70,
- 0x02, 0xc5, 0x1f, 0x0a, 0x00, 0x0a, 0x79, 0xc6, 0xcd, 0x21, 0x00, 0x0f,
- 0x5b, 0x03, 0x70, 0x14, 0xce, 0x1f, 0x15, 0x00, 0x10, 0x78, 0x91, 0x00,
- 0x12, 0xa3, 0x03, 0x70, 0x1a, 0x87, 0x00, 0x12, 0xda, 0x03, 0x70, 0x24,
- 0xc6, 0x01, 0x01, 0x00, 0x13, 0x43, 0x03, 0x70, 0x2a, 0x06, 0xc3, 0x70,
- 0x30, 0xca, 0x9f, 0xc8, 0x00, 0xf6, 0x49, 0xc5, 0x21, 0x12, 0x00, 0x09,
- 0x4b, 0x03, 0x70, 0x3d, 0xce, 0x02, 0x79, 0x00, 0xec, 0xb1, 0xc5, 0x1f,
- 0x94, 0x00, 0x07, 0x61, 0xc5, 0x35, 0x4a, 0x00, 0x07, 0x69, 0x05, 0xc3,
- 0x70, 0x43, 0xc6, 0x60, 0xe6, 0x00, 0x09, 0x59, 0xc5, 0x1f, 0x0a, 0x00,
- 0x09, 0x69, 0xc6, 0xcd, 0x21, 0x00, 0x09, 0x79, 0xce, 0x1f, 0x15, 0x00,
- 0x10, 0x58, 0x83, 0x00, 0x13, 0x4b, 0x03, 0x70, 0x4f, 0xc7, 0xc8, 0x79,
- 0x05, 0x5b, 0x08, 0x46, 0x6a, 0x63, 0xc3, 0x70, 0x55, 0x47, 0x1e, 0x8f,
- 0x43, 0x70, 0x6d, 0x00, 0x43, 0x70, 0x79, 0x46, 0x00, 0x6b, 0x43, 0x70,
- 0x85, 0x46, 0x00, 0x6b, 0x43, 0x70, 0x91, 0x05, 0xc3, 0x70, 0xac, 0xc5,
- 0x21, 0x12, 0x00, 0xf5, 0xeb, 0x03, 0x70, 0xc4, 0xca, 0x9f, 0xc8, 0x00,
- 0xf5, 0xd9, 0x06, 0xc3, 0x70, 0xca, 0xc6, 0x60, 0xe6, 0x00, 0x08, 0x9b,
- 0x03, 0x70, 0xd4, 0xce, 0x02, 0x79, 0x00, 0xec, 0x91, 0xc8, 0xb8, 0x05,
- 0x05, 0x59, 0xa1, 0xc5, 0x1f, 0x94, 0x00, 0x07, 0x41, 0xc5, 0x35, 0x4a,
- 0x00, 0x07, 0x49, 0xc5, 0x1f, 0x0a, 0x00, 0x08, 0xa9, 0xc6, 0xcd, 0x21,
- 0x00, 0x08, 0xc9, 0xce, 0x1f, 0x15, 0x00, 0x10, 0x39, 0xc6, 0x01, 0x01,
- 0x00, 0x12, 0x39, 0xc5, 0x21, 0x5b, 0x01, 0x63, 0xc0, 0xc3, 0x01, 0x1e,
- 0x05, 0x39, 0x19, 0xc2, 0x00, 0x34, 0x05, 0x39, 0x28, 0x8a, 0x00, 0x07,
- 0x80, 0xcb, 0x9a, 0xcf, 0x0e, 0xf8, 0x00, 0xc9, 0x17, 0x7a, 0x00, 0xf0,
- 0xf9, 0xcc, 0x57, 0x82, 0x00, 0xec, 0x11, 0xcc, 0x21, 0x0b, 0x00, 0xeb,
- 0x71, 0xc6, 0x01, 0x01, 0x05, 0x3c, 0xc9, 0xc4, 0x02, 0x83, 0x00, 0x0c,
- 0x90, 0xc4, 0xab, 0x3a, 0x00, 0xf7, 0xf9, 0xc5, 0x21, 0x12, 0x00, 0xf7,
- 0xc9, 0xc4, 0x02, 0x83, 0x00, 0x0d, 0xa3, 0x03, 0x70, 0xda, 0x06, 0xc3,
- 0x70, 0xe0, 0xc5, 0x1f, 0x94, 0x00, 0xf7, 0x99, 0xca, 0x9e, 0x60, 0x00,
- 0xf4, 0xc9, 0x15, 0xc3, 0x70, 0xec, 0xc5, 0x35, 0x4a, 0x00, 0x07, 0xe9,
- 0xca, 0x0b, 0x36, 0x00, 0x0b, 0xb9, 0xc6, 0x60, 0xe6, 0x00, 0x11, 0x98,
- 0x47, 0xc5, 0xa8, 0xc3, 0x70, 0xf8, 0xc8, 0xbd, 0xa5, 0x05, 0x3e, 0xb8,
- 0x44, 0x01, 0xb8, 0xc3, 0x71, 0x02, 0xc5, 0x35, 0x4a, 0x00, 0xf1, 0xf9,
- 0xcc, 0x57, 0x82, 0x00, 0xec, 0x31, 0xcc, 0x21, 0x0b, 0x00, 0xeb, 0x79,
- 0xcc, 0x4c, 0x8c, 0x05, 0x59, 0xd1, 0xc4, 0x02, 0x83, 0x00, 0x13, 0x88,
- 0x45, 0x00, 0x6c, 0xc3, 0x71, 0x0e, 0xc3, 0x00, 0xe8, 0x00, 0x14, 0x4a,
- 0x03, 0x71, 0x5a, 0xcc, 0x21, 0x84, 0x00, 0xeb, 0xf8, 0x45, 0x00, 0x6c,
- 0xc3, 0x71, 0x60, 0xce, 0x73, 0xe3, 0x05, 0x59, 0x88, 0xd4, 0x02, 0x73,
- 0x00, 0xec, 0x80, 0x46, 0x00, 0x6b, 0x43, 0x71, 0xa5, 0xd4, 0x3d, 0xf5,
- 0x05, 0x39, 0xd8, 0xca, 0x9e, 0x60, 0x00, 0xf4, 0xc1, 0x06, 0xc3, 0x71,
- 0xb1, 0xc5, 0x35, 0x4a, 0x00, 0xf4, 0x19, 0xc5, 0x1f, 0x94, 0x00, 0xf4,
- 0x09, 0xca, 0x0b, 0x36, 0x00, 0x0b, 0xa9, 0xc4, 0x02, 0x83, 0x01, 0x63,
- 0x98, 0xca, 0x69, 0x25, 0x00, 0xf4, 0xb1, 0xcb, 0x9a, 0xf0, 0x00, 0xf1,
- 0x59, 0x05, 0xc3, 0x71, 0xbd, 0x06, 0xc3, 0x71, 0xcf, 0xc4, 0x02, 0x83,
- 0x00, 0x13, 0x31, 0xc6, 0x01, 0x01, 0x00, 0x09, 0x39, 0xcc, 0x57, 0x82,
- 0x05, 0x3c, 0xa8, 0xca, 0x1d, 0xd4, 0x00, 0x13, 0x38, 0xca, 0x69, 0x25,
- 0x00, 0xf4, 0xa9, 0x06, 0xc3, 0x71, 0xe1, 0x05, 0xc3, 0x71, 0xed, 0xcc,
- 0x57, 0x82, 0x00, 0xec, 0x71, 0xcc, 0x21, 0x0b, 0x00, 0xeb, 0xb1, 0xce,
- 0x02, 0x79, 0x00, 0x14, 0x81, 0xc5, 0x35, 0x4a, 0x00, 0x0b, 0xd1, 0x15,
- 0xc3, 0x71, 0xff, 0xc4, 0x02, 0x83, 0x00, 0x11, 0x28, 0x06, 0xc3, 0x72,
- 0x0b, 0xcc, 0x57, 0x82, 0x00, 0xec, 0x69, 0x42, 0x01, 0x64, 0x43, 0x72,
- 0x17, 0x06, 0xc3, 0x72, 0x26, 0xc5, 0x21, 0x12, 0x00, 0xf3, 0xe9, 0xcc,
- 0x57, 0x82, 0x00, 0xec, 0x61, 0xc4, 0x02, 0x83, 0x00, 0x14, 0x59, 0xca,
- 0xa6, 0x26, 0x01, 0x63, 0x89, 0xc4, 0x02, 0x52, 0x01, 0x63, 0xa0, 0xc2,
- 0x0f, 0xf5, 0x05, 0x3c, 0xd9, 0xc2, 0x1b, 0xd8, 0x05, 0x3c, 0xe9, 0xc2,
- 0x0f, 0x4d, 0x05, 0x3c, 0xf8, 0xc9, 0x17, 0x7a, 0x00, 0xf2, 0xb9, 0xc5,
- 0x35, 0x4a, 0x00, 0xf2, 0xa9, 0xcc, 0x57, 0x82, 0x00, 0xec, 0x41, 0x15,
- 0xc3, 0x72, 0x32, 0xcc, 0x21, 0x0b, 0x00, 0xeb, 0x89, 0xc8, 0xb8, 0x05,
- 0x05, 0x3a, 0x99, 0xc4, 0x02, 0x83, 0x00, 0x0d, 0x28, 0xca, 0x69, 0x25,
- 0x00, 0xf1, 0xa9, 0x06, 0xc3, 0x72, 0x41, 0xc5, 0x35, 0x4a, 0x00, 0xf1,
- 0x89, 0xcc, 0x57, 0x82, 0x00, 0xec, 0x21, 0xc6, 0x01, 0x01, 0x05, 0x3a,
- 0x0b, 0x03, 0x72, 0x53, 0x05, 0xc3, 0x72, 0x59, 0xce, 0x3a, 0xdb, 0x05,
- 0x3d, 0x19, 0xc4, 0x02, 0x83, 0x00, 0x0c, 0xc8, 0xc6, 0x60, 0xe6, 0x00,
- 0xf1, 0x09, 0xcc, 0x57, 0x82, 0x00, 0xec, 0x19, 0xc5, 0x35, 0x4a, 0x00,
- 0x0f, 0xa9, 0xc4, 0x02, 0x83, 0x00, 0x13, 0x01, 0x05, 0xc3, 0x72, 0x65,
- 0xc5, 0x1f, 0x0a, 0x00, 0x08, 0xf9, 0xc9, 0x17, 0x7a, 0x00, 0x09, 0x09,
- 0xce, 0x3a, 0xdb, 0x05, 0x3d, 0x09, 0xc6, 0x01, 0x01, 0x00, 0x0f, 0x28,
- 0x8b, 0x05, 0x3d, 0xe9, 0x83, 0x05, 0x3d, 0xd9, 0x97, 0x05, 0x3d, 0xf9,
- 0xc4, 0x05, 0x30, 0x00, 0x12, 0x10, 0xca, 0x69, 0x25, 0x00, 0xf0, 0x39,
- 0x44, 0x01, 0xb8, 0xc3, 0x72, 0x77, 0xcc, 0x57, 0x82, 0x00, 0xec, 0x01,
- 0xcc, 0x21, 0x0b, 0x00, 0xeb, 0x61, 0xc8, 0xb8, 0x05, 0x05, 0x3c, 0xb9,
- 0xc6, 0x01, 0x01, 0x00, 0x0c, 0x01, 0xc6, 0xd2, 0x7f, 0x00, 0x0c, 0x19,
- 0xc4, 0x02, 0x83, 0x00, 0x12, 0x98, 0xca, 0xa4, 0x64, 0x05, 0x5a, 0x69,
- 0x45, 0x77, 0x0a, 0x43, 0x72, 0x83, 0x91, 0x05, 0x59, 0xeb, 0x03, 0x72,
- 0x91, 0x87, 0x05, 0x59, 0x90, 0x05, 0xc3, 0x72, 0x97, 0xc6, 0x01, 0x01,
- 0x00, 0x12, 0x48, 0xc4, 0x02, 0x83, 0x00, 0x15, 0x03, 0x03, 0x72, 0xa9,
- 0xd8, 0x23, 0xd0, 0x05, 0x3a, 0xb9, 0xcf, 0x3f, 0x8a, 0x05, 0x3a, 0xc8,
- 0x8e, 0x07, 0xd8, 0x21, 0x8b, 0x07, 0xd8, 0x18, 0xc6, 0x00, 0x94, 0x00,
- 0xf7, 0xb0, 0x43, 0x01, 0xb9, 0xc3, 0x72, 0xaf, 0xc8, 0x1e, 0x43, 0x00,
- 0x0b, 0xc0, 0x98, 0x00, 0xf7, 0xe1, 0xc2, 0x03, 0xc7, 0x00, 0xf7, 0xd0,
- 0xc5, 0x01, 0x62, 0x00, 0xf2, 0x11, 0xc5, 0x00, 0x95, 0x00, 0xf2, 0x00,
- 0x42, 0x00, 0xf6, 0xc3, 0x72, 0xbb, 0x06, 0xc3, 0x72, 0xca, 0xc6, 0x60,
- 0xe6, 0x00, 0x0b, 0x53, 0x03, 0x72, 0xd7, 0xc5, 0x21, 0x12, 0x00, 0x0b,
- 0x43, 0x03, 0x72, 0xdd, 0x05, 0xc3, 0x72, 0xe1, 0xc5, 0x1f, 0x94, 0x00,
- 0x06, 0xc9, 0xc5, 0x35, 0x4a, 0x00, 0x06, 0xd1, 0xc6, 0x01, 0x01, 0x05,
- 0x3d, 0xc1, 0xc5, 0x1f, 0x0a, 0x00, 0x0b, 0x61, 0xca, 0x9e, 0x60, 0x00,
- 0x0b, 0x71, 0xce, 0x1f, 0x15, 0x00, 0x10, 0xb1, 0xc6, 0xcd, 0x21, 0x00,
- 0x0b, 0x90, 0xc2, 0x00, 0xc0, 0x00, 0x0d, 0x03, 0x03, 0x72, 0xf0, 0xc8,
- 0x9f, 0xca, 0x00, 0xf6, 0x70, 0xc9, 0x0b, 0x37, 0x00, 0x06, 0xa3, 0x03,
- 0x72, 0xf6, 0xc4, 0x63, 0xce, 0x00, 0x0e, 0x88, 0x11, 0xc3, 0x72, 0xfc,
- 0xc8, 0x1e, 0x43, 0x00, 0x06, 0xb2, 0x03, 0x73, 0x08, 0xc5, 0x60, 0xe7,
- 0x00, 0x0a, 0x63, 0x03, 0x73, 0x0e, 0xcb, 0x21, 0x0c, 0x00, 0x0c, 0xf8,
- 0x45, 0x00, 0x56, 0x43, 0x73, 0x14, 0xca, 0xa9, 0x28, 0x00, 0x0f, 0xf0,
- 0xd1, 0x57, 0xc1, 0x05, 0x3a, 0x51, 0xc2, 0x00, 0x11, 0x05, 0x3a, 0x60,
- 0xcb, 0x9a, 0x2a, 0x00, 0x0f, 0x60, 0x11, 0xc3, 0x73, 0x26, 0xc8, 0x1e,
- 0x43, 0x00, 0x06, 0x7a, 0x03, 0x73, 0x32, 0xc6, 0x01, 0x61, 0x00, 0xf1,
- 0x60, 0xc9, 0x0b, 0x37, 0x00, 0x06, 0x71, 0xc4, 0x63, 0xce, 0x00, 0x0e,
- 0x78, 0xc9, 0x0b, 0x37, 0x00, 0x06, 0x53, 0x03, 0x73, 0x38, 0xc6, 0xbf,
- 0x3f, 0x00, 0x11, 0x43, 0x03, 0x73, 0x3c, 0xc4, 0x63, 0xce, 0x00, 0x08,
- 0xd0, 0xc6, 0x01, 0x61, 0x00, 0xf0, 0xd0, 0x11, 0xc3, 0x73, 0x42, 0xc8,
- 0x1e, 0x43, 0x00, 0x06, 0x58, 0x45, 0x00, 0x56, 0x43, 0x73, 0x4e, 0xc8,
- 0x0f, 0xbf, 0x00, 0x0d, 0xc1, 0xca, 0x9a, 0x99, 0x00, 0x0f, 0x70, 0x45,
- 0x00, 0x56, 0x43, 0x73, 0x5a, 0xc9, 0x0b, 0x37, 0x00, 0x06, 0x13, 0x03,
- 0x73, 0x78, 0xc4, 0x63, 0xce, 0x00, 0x0e, 0x68, 0x11, 0xc3, 0x73, 0x7e,
- 0xc8, 0x1e, 0x43, 0x00, 0x06, 0x22, 0x03, 0x73, 0x8a, 0xc5, 0x01, 0x62,
- 0x00, 0xf0, 0x01, 0xc5, 0x00, 0x95, 0x00, 0x06, 0x2a, 0x03, 0x73, 0x90,
- 0xc5, 0x35, 0x4a, 0x00, 0x0f, 0xe1, 0xc6, 0x60, 0xe6, 0x00, 0x0f, 0x10,
- 0xc5, 0x01, 0x62, 0x00, 0xf3, 0x13, 0x03, 0x73, 0x96, 0xc5, 0x00, 0x95,
- 0x00, 0xf3, 0x00, 0xc4, 0x63, 0xce, 0x00, 0x0b, 0x03, 0x03, 0x73, 0x9c,
- 0xc9, 0x0b, 0x37, 0x00, 0x0a, 0xe1, 0xc6, 0xbf, 0x3f, 0x00, 0x0a, 0xf1,
- 0xca, 0xa1, 0x80, 0x00, 0x10, 0xc0, 0xce, 0x17, 0x75, 0x00, 0xf3, 0x20,
- 0xd3, 0x45, 0xb0, 0x05, 0x3e, 0x49, 0xc9, 0xa9, 0xc4, 0x01, 0x63, 0xf0,
- 0x43, 0x01, 0xb9, 0xc3, 0x73, 0xa2, 0xc8, 0x23, 0xe0, 0x01, 0x63, 0x58,
- 0xc9, 0x0b, 0x37, 0x00, 0xf4, 0x81, 0xc4, 0x63, 0xce, 0x00, 0x0b, 0xe8,
- 0xc5, 0x01, 0x02, 0x00, 0x0d, 0xa9, 0xc9, 0xa9, 0xc4, 0x01, 0x63, 0xf8,
- 0x43, 0x01, 0xb9, 0xc3, 0x73, 0xae, 0xc8, 0x1e, 0x43, 0x00, 0xf4, 0x20,
- 0xc8, 0x0e, 0xad, 0x00, 0xf3, 0xf1, 0xce, 0x3f, 0x8b, 0x05, 0x3a, 0xf0,
- 0xcf, 0x63, 0x08, 0x00, 0xf3, 0x81, 0xc6, 0xbf, 0x3f, 0x00, 0x0b, 0x11,
- 0xc4, 0x63, 0xce, 0x00, 0x0b, 0x21, 0xca, 0xa1, 0x80, 0x00, 0x10, 0xd0,
- 0x43, 0x01, 0xb9, 0xc3, 0x73, 0xba, 0xce, 0x3f, 0x8b, 0x00, 0x11, 0xf0,
- 0xd2, 0x23, 0xd6, 0x05, 0x3b, 0x30, 0xc4, 0xd9, 0x9f, 0x01, 0x63, 0x80,
- 0xca, 0x69, 0x25, 0x00, 0xf2, 0xf1, 0x42, 0x01, 0x4a, 0xc3, 0x73, 0xc6,
- 0xce, 0x3a, 0xdb, 0x05, 0x3d, 0x31, 0xc5, 0x35, 0x4a, 0x00, 0x0a, 0xa1,
- 0x05, 0xc3, 0x73, 0xd2, 0xce, 0x1f, 0x15, 0x00, 0x10, 0x91, 0xc6, 0x01,
- 0x01, 0x00, 0x12, 0x61, 0xc4, 0x12, 0xeb, 0x01, 0x63, 0x20, 0xc5, 0x01,
- 0x02, 0x01, 0x63, 0x1b, 0x03, 0x73, 0xde, 0xcc, 0x82, 0x90, 0x05, 0x3a,
- 0xa0, 0xcf, 0x63, 0x08, 0x00, 0xf2, 0x51, 0xc6, 0xbf, 0x3f, 0x00, 0x0a,
- 0x29, 0xc4, 0x63, 0xce, 0x00, 0x0a, 0x38, 0xc9, 0x69, 0x26, 0x00, 0xf2,
- 0x41, 0xc8, 0x56, 0xed, 0x00, 0x0c, 0xe9, 0xcd, 0x7f, 0x02, 0x00, 0x11,
- 0x00, 0x43, 0x01, 0xb9, 0xc3, 0x73, 0xe4, 0xc8, 0x23, 0xe0, 0x05, 0x3c,
- 0x80, 0xcf, 0x63, 0x08, 0x00, 0xf1, 0xe1, 0xc6, 0xbf, 0x3f, 0x00, 0x09,
- 0xd9, 0xc4, 0x63, 0xce, 0x00, 0x09, 0xe8, 0xc7, 0x0e, 0xae, 0x00, 0xf1,
- 0xb3, 0x03, 0x73, 0xf0, 0xc8, 0xa6, 0xb4, 0x01, 0x63, 0x00, 0xc3, 0x01,
- 0x4a, 0x00, 0x09, 0xf9, 0xc5, 0x21, 0x12, 0x01, 0x63, 0x10, 0xc5, 0x01,
- 0x02, 0x00, 0x0a, 0x09, 0xcd, 0x6d, 0x8c, 0x00, 0x0e, 0x40, 0xc2, 0x00,
- 0xbb, 0x00, 0x11, 0xe9, 0xc3, 0x8e, 0x37, 0x05, 0x3d, 0x68, 0xc8, 0x0e,
- 0xad, 0x00, 0xf1, 0x91, 0xce, 0x3f, 0x8b, 0x05, 0x3a, 0x11, 0xc8, 0x23,
- 0xe0, 0x01, 0x63, 0x48, 0xd4, 0x3f, 0x85, 0x05, 0x3a, 0x20, 0xc6, 0xbf,
- 0x3f, 0x00, 0x09, 0xb1, 0xc4, 0x63, 0xce, 0x00, 0x0f, 0x40, 0xc6, 0xbf,
- 0x3f, 0x00, 0xf1, 0x41, 0xc9, 0x0b, 0x37, 0x00, 0x09, 0x21, 0xc4, 0x63,
- 0xce, 0x00, 0x10, 0xf0, 0xc8, 0x1e, 0x43, 0x00, 0xf1, 0x31, 0x43, 0x01,
- 0xb9, 0xc3, 0x73, 0xf6, 0xc8, 0x23, 0xe0, 0x01, 0x63, 0x38, 0xc9, 0x0b,
- 0x37, 0x00, 0x08, 0xe1, 0xc6, 0xbf, 0x3f, 0x00, 0x09, 0x11, 0xc4, 0x63,
- 0xce, 0x00, 0x0f, 0x30, 0xcf, 0x63, 0x08, 0x00, 0xf0, 0x91, 0xc6, 0xbf,
- 0x3f, 0x00, 0xf0, 0x81, 0xc4, 0x63, 0xce, 0x00, 0x08, 0x70, 0xc5, 0x01,
- 0x62, 0x00, 0xf0, 0x61, 0xc5, 0x00, 0x95, 0x00, 0xf0, 0x50, 0xcd, 0x80,
- 0x88, 0x00, 0x0f, 0x93, 0x03, 0x74, 0x02, 0xc5, 0x01, 0x02, 0x00, 0x08,
- 0x81, 0xd3, 0x45, 0xb0, 0x05, 0x3e, 0x38, 0xc6, 0xbf, 0x3f, 0x00, 0x06,
- 0x3b, 0x03, 0x74, 0x08, 0xc9, 0x0b, 0x37, 0x00, 0x08, 0x41, 0xc4, 0x63,
- 0xce, 0x00, 0x08, 0x60, 0xc5, 0x01, 0x62, 0x00, 0xf0, 0x21, 0xc5, 0x00,
- 0x95, 0x00, 0xf0, 0x10, 0xc9, 0x0b, 0x37, 0x00, 0x09, 0xa1, 0xcb, 0x4c,
- 0x8d, 0x05, 0x3d, 0x90, 0x45, 0x00, 0x6c, 0xc3, 0x74, 0x0e, 0xc6, 0x11,
- 0xa5, 0x01, 0x5b, 0x89, 0x4c, 0x13, 0x02, 0x43, 0x74, 0x38, 0xe0, 0x02,
- 0xc7, 0x01, 0x4b, 0x70, 0x46, 0x08, 0x79, 0x43, 0x74, 0x3e, 0xc6, 0x44,
- 0x67, 0x07, 0xd9, 0x59, 0xc7, 0x44, 0x66, 0x07, 0xd9, 0x50, 0xc5, 0x64,
- 0xc5, 0x07, 0xd9, 0x81, 0xc5, 0x7d, 0xd2, 0x07, 0xd9, 0x71, 0xc6, 0xcc,
- 0xd9, 0x07, 0xd9, 0x78, 0xcc, 0x7a, 0x8b, 0x05, 0x4b, 0x59, 0xc5, 0x92,
- 0x32, 0x05, 0x4b, 0x21, 0xc6, 0xc0, 0x37, 0x05, 0x4b, 0x70, 0xc3, 0x3c,
- 0x08, 0x05, 0x4b, 0x61, 0x44, 0x29, 0x95, 0x43, 0x74, 0x4a, 0xc6, 0xc6,
- 0xf2, 0x05, 0x4b, 0xc9, 0xc5, 0xc8, 0x2e, 0x00, 0x88, 0x20, 0xc6, 0xce,
- 0x6b, 0x05, 0x4b, 0xc0, 0xc6, 0xce, 0x17, 0x05, 0x4b, 0xa8, 0x0d, 0xc3,
- 0x74, 0x5c, 0xc5, 0xd9, 0x80, 0x00, 0x89, 0x71, 0x16, 0xc3, 0x74, 0x68,
- 0xc5, 0xd6, 0x3d, 0x00, 0x89, 0x81, 0xc5, 0xdb, 0x51, 0x00, 0x89, 0x89,
- 0x12, 0xc3, 0x74, 0x74, 0xc9, 0xac, 0xd3, 0x00, 0x89, 0xa1, 0xc5, 0xbb,
- 0xa0, 0x00, 0x89, 0xa9, 0x05, 0xc3, 0x74, 0x83, 0xc5, 0x98, 0x41, 0x00,
- 0x89, 0xd8, 0xc5, 0x98, 0x41, 0x05, 0x4b, 0xd1, 0xc5, 0x7a, 0x92, 0x00,
- 0x8a, 0xb0, 0xc5, 0x98, 0x41, 0x05, 0x4b, 0xa1, 0x0d, 0xc3, 0x74, 0x8f,
- 0x15, 0xc3, 0x74, 0x9b, 0xc5, 0xd9, 0x80, 0x00, 0x88, 0xf9, 0x16, 0xc3,
- 0x74, 0xaa, 0x05, 0xc3, 0x74, 0xb6, 0xc7, 0xc1, 0x06, 0x00, 0x89, 0x50,
- 0xc5, 0xc8, 0x2e, 0x00, 0x8a, 0x11, 0xc6, 0xc6, 0xf2, 0x00, 0x8a, 0x50,
- 0xc4, 0x7a, 0x93, 0x00, 0x8a, 0x21, 0xc6, 0xcb, 0x4b, 0x00, 0x8a, 0x31,
- 0xc6, 0xc1, 0x07, 0x00, 0x8a, 0x58, 0xc4, 0xc7, 0x2b, 0x00, 0x8a, 0x41,
- 0xc6, 0xc7, 0x2a, 0x00, 0x8a, 0x48, 0xc5, 0xd7, 0x8c, 0x05, 0x4b, 0x19,
- 0xc4, 0xac, 0xd8, 0x05, 0x4b, 0x11, 0xc5, 0x7a, 0x92, 0x05, 0x4b, 0x09,
- 0xc5, 0xdb, 0x51, 0x05, 0x4b, 0x01, 0xc6, 0x92, 0x31, 0x00, 0x88, 0xb9,
- 0xc5, 0xd6, 0x3d, 0x00, 0x8a, 0xf0, 0xc4, 0x7a, 0x93, 0x00, 0x89, 0x59,
- 0xc6, 0xc1, 0x07, 0x00, 0x8a, 0xb8, 0x02, 0x43, 0x74, 0xc2, 0x15, 0xc3,
- 0x74, 0xce, 0x05, 0x43, 0x74, 0xda, 0xc3, 0x3c, 0x08, 0x00, 0x89, 0xf1,
- 0x44, 0x29, 0x95, 0x43, 0x74, 0xe6, 0xc4, 0xc7, 0x2b, 0x00, 0x8a, 0x81,
- 0xc6, 0xc7, 0x2a, 0x00, 0x8a, 0xa8, 0x91, 0x00, 0x8b, 0xb1, 0x97, 0x00,
- 0x8b, 0xb9, 0xc2, 0x11, 0x70, 0x00, 0x8d, 0x18, 0x02, 0x43, 0x74, 0xf2,
- 0x87, 0x00, 0x8b, 0x21, 0x02, 0x43, 0x75, 0x05, 0x91, 0x00, 0x8b, 0x3a,
- 0x03, 0x75, 0x13, 0x02, 0x43, 0x75, 0x17, 0x02, 0x43, 0x75, 0x30, 0xc2,
- 0x05, 0xdb, 0x00, 0x8c, 0xb8, 0x02, 0x43, 0x75, 0x53, 0x02, 0x43, 0x75,
- 0x61, 0x87, 0x00, 0x8c, 0x03, 0x03, 0x75, 0x74, 0x1b, 0xc3, 0x75, 0x78,
- 0x91, 0x00, 0x8c, 0x13, 0x03, 0x75, 0x86, 0x97, 0x00, 0x8c, 0x18, 0x87,
- 0x00, 0x8b, 0x58, 0x91, 0x00, 0x8b, 0x78, 0x83, 0x00, 0x8c, 0x4b, 0x03,
- 0x75, 0x8c, 0xc5, 0xd9, 0x0d, 0x00, 0x8c, 0x59, 0xc2, 0x05, 0xd0, 0x00,
- 0x8c, 0x63, 0x03, 0x75, 0x90, 0x97, 0x00, 0x8c, 0x69, 0xc3, 0xe7, 0x30,
- 0x06, 0xbd, 0xb0, 0x83, 0x00, 0x8c, 0xc3, 0x03, 0x75, 0x94, 0x1b, 0xc3,
- 0x75, 0x9a, 0x91, 0x00, 0x8c, 0xd3, 0x03, 0x75, 0xb0, 0x97, 0x00, 0x8c,
- 0xd9, 0xc2, 0x11, 0x70, 0x00, 0x8c, 0xe1, 0x8b, 0x06, 0xbe, 0x20, 0x02,
- 0x43, 0x75, 0xb6, 0xc5, 0xdb, 0x51, 0x00, 0x8f, 0x11, 0x12, 0xc3, 0x75,
- 0xe3, 0xc5, 0xd6, 0x3d, 0x06, 0xbe, 0xe8, 0xc6, 0x92, 0x31, 0x00, 0x8d,
- 0x49, 0xc4, 0xac, 0xd8, 0x00, 0x8d, 0xdb, 0x03, 0x75, 0xef, 0xc5, 0xd6,
- 0x3d, 0x00, 0x8e, 0x83, 0x03, 0x75, 0xf3, 0xc8, 0xbb, 0x9d, 0x00, 0x8f,
- 0x71, 0xc5, 0xbb, 0xa0, 0x00, 0x8f, 0x71, 0xc5, 0xd9, 0x80, 0x00, 0x8f,
- 0xf9, 0xc6, 0xc8, 0x2d, 0x06, 0xbe, 0x6b, 0x03, 0x75, 0xf9, 0xc5, 0xdb,
- 0x51, 0x06, 0xbf, 0x01, 0xc5, 0x7a, 0x92, 0x06, 0xbf, 0x31, 0xc5, 0xd7,
- 0x8c, 0x06, 0xbf, 0xc8, 0x02, 0x43, 0x75, 0xff, 0x05, 0xc3, 0x76, 0x21,
- 0xc5, 0x98, 0x41, 0x00, 0x8d, 0x69, 0xc6, 0x92, 0x31, 0x00, 0x8e, 0x29,
- 0x16, 0xc3, 0x76, 0x2d, 0xc4, 0xac, 0xd8, 0x00, 0x8e, 0x39, 0xc7, 0xcb,
- 0x4a, 0x00, 0x8e, 0x41, 0xc5, 0xd6, 0x3d, 0x06, 0xbe, 0x58, 0x02, 0x43,
- 0x76, 0x39, 0x0d, 0xc3, 0x76, 0x5e, 0xc5, 0xdb, 0x51, 0x00, 0x8d, 0x8b,
- 0x03, 0x76, 0x73, 0x12, 0xc3, 0x76, 0x77, 0x15, 0xc3, 0x76, 0x8c, 0x16,
- 0xc3, 0x76, 0x98, 0xc5, 0x98, 0x41, 0x00, 0x8d, 0xb1, 0xc5, 0xd9, 0x80,
- 0x00, 0x8e, 0x69, 0x42, 0x05, 0xd0, 0x43, 0x76, 0xa7, 0xc6, 0x92, 0x31,
- 0x00, 0x8e, 0xd1, 0xc5, 0xd6, 0x3d, 0x00, 0x8e, 0xd9, 0x12, 0xc3, 0x76,
- 0xb6, 0x15, 0xc3, 0x76, 0xc5, 0x05, 0xc3, 0x76, 0xd1, 0xc5, 0x98, 0x41,
- 0x00, 0x8f, 0x09, 0xc5, 0xd9, 0x80, 0x06, 0xbe, 0xf0, 0x02, 0x43, 0x76,
- 0xdd, 0x02, 0x43, 0x77, 0x0b, 0x02, 0x43, 0x77, 0x1d, 0x0d, 0xc3, 0x77,
- 0x29, 0xcb, 0x92, 0x2c, 0x00, 0x8f, 0x68, 0x02, 0x43, 0x77, 0x35, 0xc5,
- 0xd9, 0x80, 0x00, 0x8f, 0xa9, 0xc5, 0xd6, 0x3d, 0x00, 0x8f, 0xb1, 0xc5,
- 0xdb, 0x51, 0x00, 0x8f, 0xb9, 0xc5, 0x7a, 0x92, 0x00, 0x8f, 0xc0, 0x02,
- 0x43, 0x77, 0x41, 0xc4, 0x7a, 0x93, 0x01, 0x8b, 0xc1, 0xc6, 0xc1, 0x07,
- 0x01, 0x8c, 0x20, 0xc6, 0x92, 0x31, 0x01, 0x8b, 0xd1, 0xc5, 0xd9, 0x80,
- 0x01, 0x8b, 0xd9, 0xc6, 0xc8, 0x2d, 0x01, 0x8b, 0xe1, 0xc5, 0x7a, 0x92,
- 0x01, 0x8b, 0xe9, 0xc5, 0xd7, 0x8c, 0x01, 0x8b, 0xf0, 0xc5, 0xde, 0x35,
- 0x01, 0x8b, 0x48, 0xc4, 0x7a, 0x93, 0x01, 0x89, 0xe3, 0x03, 0x77, 0x5b,
- 0xc6, 0xc1, 0x07, 0x01, 0x89, 0xf9, 0xc6, 0xcb, 0x4b, 0x01, 0x8b, 0x60,
- 0xc6, 0xc6, 0xf2, 0x01, 0x89, 0xf1, 0xc5, 0xc8, 0x2e, 0x01, 0x8b, 0x50,
- 0xc4, 0x7a, 0x93, 0x01, 0x8b, 0x71, 0xc6, 0xcb, 0x4b, 0x01, 0x8b, 0x80,
- 0xc4, 0xac, 0xd8, 0x01, 0x8a, 0x23, 0x03, 0x77, 0x61, 0xc6, 0x92, 0x31,
- 0x01, 0x8b, 0x91, 0x16, 0xc3, 0x77, 0x65, 0xc5, 0xd7, 0x8c, 0x01, 0x8b,
- 0xb0, 0xc8, 0x98, 0x3e, 0x01, 0x8c, 0x30, 0x02, 0x43, 0x77, 0x71, 0xc2,
- 0x1d, 0x5f, 0x01, 0x8c, 0x3b, 0x03, 0x77, 0x7d, 0x8b, 0x01, 0x8c, 0x48,
- 0xc2, 0x05, 0xd0, 0x01, 0x8c, 0x5b, 0x03, 0x77, 0x81, 0x8b, 0x01, 0x8c,
- 0x60, 0x83, 0x07, 0xfb, 0x61, 0x97, 0x07, 0xfb, 0x69, 0x91, 0x07, 0xfb,
- 0x70, 0xc9, 0x4f, 0xff, 0x0f, 0x64, 0xd8, 0xc8, 0x50, 0x00, 0x0f, 0x64,
- 0x91, 0xc7, 0x0c, 0x4b, 0x0f, 0x64, 0x48, 0xc9, 0x4f, 0xff, 0x0f, 0x64,
- 0xd0, 0xc8, 0x50, 0x00, 0x0f, 0x64, 0x89, 0xc7, 0x0c, 0x4b, 0x0f, 0x64,
- 0x40, 0xc9, 0x4f, 0xff, 0x0f, 0x64, 0xc8, 0x00, 0x43, 0x77, 0x85, 0xc9,
- 0x4f, 0xff, 0x0f, 0x64, 0xc0, 0x00, 0x43, 0x77, 0x91, 0xc9, 0x4f, 0xff,
- 0x0f, 0x64, 0xb8, 0x00, 0x43, 0x77, 0x9d, 0xc9, 0x4f, 0xff, 0x0f, 0x64,
- 0xb0, 0x00, 0x43, 0x77, 0xa9, 0x19, 0xc3, 0x77, 0xb5, 0x0a, 0xc3, 0x77,
- 0xbd, 0xc2, 0x00, 0x4d, 0x01, 0x9f, 0x48, 0xc3, 0x06, 0x9e, 0x01, 0x9f,
- 0x1b, 0x03, 0x77, 0xc9, 0x0b, 0x43, 0x77, 0xcf, 0xc2, 0x26, 0x51, 0x01,
- 0x9f, 0x2b, 0x03, 0x77, 0xdb, 0xc4, 0x18, 0x83, 0x01, 0x9f, 0x32, 0x03,
- 0x77, 0xe1, 0xc4, 0x01, 0xdc, 0x01, 0x9f, 0x3b, 0x03, 0x77, 0xe7, 0xc5,
- 0x6a, 0x79, 0x01, 0x9f, 0x50, 0xc4, 0x13, 0xf2, 0x01, 0x9f, 0x90, 0x91,
- 0x01, 0x9a, 0xd1, 0x07, 0x43, 0x77, 0xed, 0xc3, 0x04, 0x5f, 0x01, 0x9a,
- 0xd9, 0xc6, 0x53, 0x71, 0x01, 0x9b, 0x28, 0xc4, 0x13, 0xf2, 0x01, 0x9b,
- 0x30, 0xc2, 0x00, 0x7b, 0x01, 0x9a, 0xe9, 0xc5, 0x13, 0xf1, 0x01, 0x9b,
- 0x38, 0xc4, 0x13, 0xf2, 0x01, 0x9b, 0x40, 0xc4, 0x13, 0xf2, 0x01, 0x9b,
- 0x48, 0xc3, 0x00, 0x4c, 0x01, 0x9b, 0x50, 0xd2, 0x47, 0x9a, 0x0f, 0xd0,
- 0x31, 0xce, 0x29, 0x88, 0x0f, 0xd0, 0x69, 0xdf, 0x0c, 0x47, 0x0f, 0xd0,
- 0xd9, 0x16, 0x43, 0x77, 0xfc, 0xc5, 0xb5, 0xaf, 0x0f, 0xd2, 0x71, 0xc4,
- 0xe0, 0xaf, 0x0f, 0xd2, 0x79, 0xc6, 0xd1, 0xf5, 0x0f, 0xd2, 0x80, 0xce,
- 0x29, 0x88, 0x0f, 0xd0, 0x49, 0xdb, 0x18, 0x76, 0x0f, 0xd1, 0x98, 0xc7,
- 0x09, 0xd4, 0x01, 0x34, 0x31, 0xc8, 0x3c, 0xdf, 0x01, 0x4f, 0x60, 0xce,
- 0x3c, 0x29, 0x01, 0x2f, 0xb9, 0xcd, 0x03, 0xd4, 0x01, 0x2f, 0xa0, 0xce,
- 0x3c, 0x29, 0x01, 0x2f, 0xb1, 0xcd, 0x03, 0xd4, 0x01, 0x2f, 0xa8, 0xce,
- 0x64, 0x26, 0x01, 0x3f, 0x29, 0xce, 0x13, 0x64, 0x01, 0x2d, 0x10, 0xcd,
- 0x70, 0x48, 0x01, 0x3f, 0x21, 0x45, 0x01, 0xd2, 0x43, 0x78, 0x08, 0xce,
- 0x3c, 0x29, 0x01, 0x2f, 0x99, 0xcd, 0x03, 0xd4, 0x01, 0x2f, 0x80, 0x00,
- 0x43, 0x78, 0x14, 0xc3, 0x32, 0xa9, 0x00, 0xcf, 0xf9, 0xc4, 0xe2, 0x27,
- 0x00, 0xcf, 0x78, 0xc3, 0x32, 0xa9, 0x00, 0xcf, 0xe1, 0xc4, 0xe2, 0x27,
- 0x00, 0xcf, 0x60, 0xc3, 0x32, 0xa9, 0x00, 0xcf, 0xf1, 0xc4, 0xe2, 0x27,
- 0x00, 0xcf, 0x70, 0xc3, 0x32, 0xa9, 0x00, 0xcf, 0xe9, 0xc4, 0xe2, 0x27,
- 0x00, 0xcf, 0x68, 0x44, 0xe0, 0xeb, 0xc3, 0x78, 0x20, 0x43, 0x93, 0xfc,
- 0x43, 0x78, 0x2c, 0xc3, 0x32, 0xa9, 0x00, 0xcf, 0xb9, 0xc4, 0xe2, 0x27,
- 0x00, 0xcf, 0x38, 0xc3, 0x32, 0xa9, 0x00, 0xcf, 0xb1, 0xc4, 0xe2, 0x27,
- 0x00, 0xcf, 0x30, 0x04, 0xc3, 0x78, 0x38, 0xc3, 0x6c, 0xa3, 0x00, 0xbf,
- 0xb9, 0xc4, 0xdc, 0x82, 0x00, 0xbf, 0xb0, 0xa5, 0x0d, 0x7f, 0xf1, 0xa4,
- 0x0d, 0x7f, 0xe9, 0xa2, 0x0d, 0x7f, 0xd9, 0xa1, 0x0d, 0x7f, 0xd1, 0xa0,
- 0x0d, 0x7f, 0xc9, 0x9f, 0x0d, 0x7f, 0xc1, 0x9e, 0x0d, 0x7f, 0xb8, 0xa5,
- 0x0d, 0x7f, 0xb1, 0xa4, 0x0d, 0x7f, 0xa9, 0xa2, 0x0d, 0x7f, 0x99, 0xa1,
- 0x0d, 0x7f, 0x91, 0xa0, 0x0d, 0x7f, 0x89, 0x9f, 0x0d, 0x7f, 0x80, 0x94,
- 0x00, 0x67, 0x00, 0x8e, 0x00, 0x67, 0x08, 0xc5, 0xd8, 0x09, 0x01, 0x79,
- 0xa1, 0xc4, 0x9d, 0xc7, 0x01, 0x7b, 0x40, 0xc5, 0x8f, 0xd5, 0x01, 0x79,
- 0x99, 0xca, 0xa6, 0x6c, 0x01, 0x7d, 0x58, 0xc4, 0x27, 0x5e, 0x01, 0x7c,
- 0x48, 0xc4, 0x04, 0x6b, 0x01, 0x79, 0x69, 0x86, 0x01, 0x7d, 0x48, 0xc9,
- 0x4f, 0xff, 0x08, 0x4f, 0xa8, 0xc9, 0x4f, 0xff, 0x08, 0x4f, 0xa0, 0xc7,
- 0x0c, 0x4b, 0x08, 0x4e, 0xc3, 0x03, 0x78, 0x44, 0xc8, 0x50, 0x00, 0x08,
- 0x4f, 0x08, 0xc9, 0x4f, 0xff, 0x08, 0x4f, 0x50, 0xc7, 0x0c, 0x4b, 0x08,
- 0x4e, 0xbb, 0x03, 0x78, 0x4a, 0xc8, 0x50, 0x00, 0x08, 0x4f, 0x00, 0xc9,
- 0x4f, 0xff, 0x08, 0x4f, 0x48, 0x00, 0x43, 0x78, 0x50, 0xc9, 0x4f, 0xff,
- 0x08, 0x4f, 0x40, 0x00, 0x43, 0x78, 0x5f, 0xc9, 0x4f, 0xff, 0x08, 0x4f,
- 0x38, 0x00, 0x43, 0x78, 0x6e, 0xc9, 0x4f, 0xff, 0x08, 0x4f, 0x30, 0x00,
- 0x43, 0x78, 0x7d, 0xc9, 0x4f, 0xff, 0x08, 0x4f, 0x28, 0xc9, 0x4f, 0xff,
- 0x08, 0x4f, 0x68, 0xc4, 0x03, 0x68, 0x01, 0x4d, 0x79, 0xc2, 0x01, 0xc7,
- 0x01, 0x4d, 0x68, 0xc4, 0x03, 0x68, 0x01, 0x4d, 0x71, 0xc2, 0x01, 0xc7,
- 0x01, 0x4d, 0x60, 0xc4, 0x01, 0x1e, 0x01, 0x4d, 0x59, 0xc5, 0x01, 0xf7,
- 0x01, 0x4d, 0x50, 0xc4, 0x01, 0x1e, 0x01, 0x4d, 0x49, 0xc5, 0x01, 0xf7,
- 0x01, 0x4d, 0x40, 0x45, 0x04, 0x74, 0xc3, 0x78, 0x8c, 0xc8, 0x81, 0x8c,
- 0x05, 0x34, 0xe8, 0x48, 0x10, 0x90, 0x43, 0x78, 0xe6, 0x97, 0x00, 0xe9,
- 0xe8, 0xcc, 0x21, 0x84, 0x05, 0x3f, 0xc0, 0xc7, 0xcb, 0x7b, 0x00, 0xe9,
- 0x78, 0x87, 0x00, 0xe9, 0x68, 0xc4, 0x27, 0x5e, 0x05, 0x38, 0x01, 0xc5,
- 0xd9, 0xee, 0x05, 0x38, 0x11, 0xc2, 0x00, 0x90, 0x05, 0x38, 0x21, 0xc2,
- 0x05, 0xd4, 0x05, 0x38, 0x30, 0xc4, 0x27, 0x5e, 0x05, 0x38, 0x09, 0xc5,
- 0xd9, 0xee, 0x05, 0x38, 0x19, 0xc2, 0x00, 0x90, 0x05, 0x38, 0x29, 0xc2,
- 0x05, 0xd4, 0x05, 0x38, 0x38, 0xc4, 0x89, 0x81, 0x00, 0xed, 0xf9, 0x46,
- 0x41, 0x9c, 0xc3, 0x78, 0xee, 0x46, 0x00, 0x6b, 0xc3, 0x79, 0x20, 0xc9,
- 0xb0, 0x4e, 0x00, 0xea, 0xa1, 0xd3, 0x46, 0x94, 0x08, 0x3d, 0x59, 0xc9,
- 0xad, 0x2d, 0x08, 0x3d, 0x63, 0x03, 0x79, 0x2c, 0xcb, 0x93, 0x60, 0x08,
- 0x3d, 0x70, 0xc2, 0x23, 0xb6, 0x00, 0xed, 0xf1, 0xc2, 0x00, 0xe5, 0x00,
- 0xed, 0xa1, 0xc2, 0x00, 0x6e, 0x00, 0xec, 0xf1, 0xc2, 0x00, 0x35, 0x00,
- 0xea, 0x88, 0x46, 0x00, 0x6b, 0x43, 0x79, 0x32, 0x46, 0x00, 0x6b, 0x43,
- 0x79, 0x3e, 0x47, 0x07, 0x18, 0xc3, 0x79, 0x4a, 0xca, 0x46, 0x9d, 0x00,
- 0xec, 0xe9, 0xc2, 0x00, 0x0a, 0x00, 0xeb, 0x09, 0x46, 0x15, 0x78, 0x43,
- 0x79, 0x83, 0xc6, 0x12, 0x12, 0x00, 0xed, 0xb9, 0x00, 0x43, 0x79, 0x8f,
- 0x46, 0x00, 0x6b, 0xc3, 0x79, 0x9b, 0x05, 0xc3, 0x79, 0xa7, 0xc9, 0xb0,
- 0x0f, 0x00, 0xea, 0xc8, 0xc2, 0x00, 0x0a, 0x00, 0xed, 0x90, 0xc7, 0xcb,
- 0xba, 0x00, 0xed, 0x89, 0xc3, 0x05, 0xe7, 0x00, 0xea, 0xe9, 0xcc, 0x87,
- 0x40, 0x00, 0xea, 0xa9, 0xca, 0x1d, 0xd4, 0x08, 0x3c, 0x28, 0xce, 0x02,
- 0x79, 0x00, 0xed, 0x79, 0xc9, 0x56, 0xec, 0x00, 0xed, 0x70, 0xca, 0x1d,
- 0xd4, 0x00, 0xed, 0x60, 0x46, 0x00, 0x6b, 0xc3, 0x79, 0xb3, 0xca, 0xa2,
- 0x34, 0x05, 0x3f, 0xc9, 0xc9, 0xad, 0x2d, 0x08, 0x3c, 0xc9, 0xc9, 0xab,
- 0x74, 0x08, 0x3c, 0xd1, 0xc3, 0xd8, 0xb7, 0x08, 0x3c, 0xf2, 0x03, 0x79,
- 0xd4, 0xd2, 0x4c, 0x86, 0x00, 0xed, 0x40, 0xc3, 0x00, 0xb2, 0x00, 0xed,
- 0x29, 0xcc, 0x21, 0x84, 0x00, 0xed, 0x20, 0xd4, 0x3b, 0xb1, 0x00, 0xed,
- 0x0b, 0x03, 0x79, 0xda, 0x07, 0xc3, 0x79, 0xe0, 0x46, 0x00, 0x6b, 0xc3,
- 0x79, 0xec, 0xc9, 0xab, 0x74, 0x08, 0x3c, 0x3a, 0x03, 0x79, 0xfb, 0xcb,
- 0x8d, 0x88, 0x08, 0x3c, 0x80, 0x48, 0x10, 0x90, 0xc3, 0x7a, 0x01, 0xc8,
- 0xba, 0x4d, 0x08, 0x3c, 0x89, 0x46, 0x00, 0x6b, 0x43, 0x7a, 0x11, 0x45,
- 0x26, 0xe5, 0xc3, 0x7a, 0x1d, 0xc4, 0x32, 0x6d, 0x00, 0x17, 0x01, 0xca,
- 0x1d, 0xd4, 0x08, 0x3c, 0x98, 0xc2, 0x00, 0x34, 0x00, 0xea, 0xe1, 0xc4,
- 0xd9, 0x9f, 0x00, 0xea, 0x29, 0x87, 0x08, 0x3c, 0x18, 0x44, 0x01, 0x76,
- 0xc3, 0x7a, 0x29, 0xcc, 0x21, 0x84, 0x08, 0x3d, 0x10, 0xc3, 0x08, 0xc3,
- 0x05, 0x5a, 0xe3, 0x03, 0x7a, 0x31, 0x46, 0x00, 0x6b, 0x43, 0x7a, 0x37,
- 0xcc, 0x21, 0x78, 0x00, 0x16, 0x0b, 0x03, 0x7a, 0x43, 0xc5, 0x00, 0xaa,
- 0x00, 0x15, 0xe8, 0xe0, 0x02, 0x67, 0x08, 0x3d, 0xc8, 0xcd, 0x36, 0xd9,
- 0x00, 0x16, 0x61, 0xc6, 0x60, 0xe6, 0x00, 0x16, 0x69, 0xcc, 0x1f, 0x94,
- 0x00, 0x16, 0x71, 0xcc, 0x84, 0x40, 0x00, 0x16, 0x79, 0x42, 0x01, 0x4a,
- 0xc3, 0x7a, 0x49, 0x44, 0x01, 0x1e, 0xc3, 0x7a, 0x55, 0xd9, 0x1e, 0x8d,
- 0x05, 0x38, 0xf9, 0x16, 0xc3, 0x7a, 0x64, 0xcc, 0x49, 0x92, 0x00, 0x17,
- 0x81, 0x42, 0x00, 0x5b, 0xc3, 0x7a, 0x70, 0xd1, 0x0b, 0x36, 0x05, 0x3c,
- 0x40, 0xc5, 0x18, 0x47, 0x00, 0x15, 0xd1, 0xca, 0x30, 0x6d, 0x00, 0x17,
- 0x70, 0xc9, 0x03, 0x7e, 0x00, 0x16, 0x29, 0xc4, 0x36, 0xab, 0x00, 0x16,
- 0xa8, 0xcc, 0x03, 0x3b, 0x05, 0x38, 0xb9, 0xc5, 0x00, 0xe2, 0x05, 0x38,
- 0xc1, 0xce, 0x0f, 0xa7, 0x05, 0x38, 0xc8, 0x00, 0xc3, 0x7a, 0x7c, 0x44,
- 0x00, 0x53, 0x43, 0x7a, 0x8e, 0x47, 0x1b, 0xf3, 0xc3, 0x7a, 0x9a, 0xd2,
- 0x4a, 0x46, 0x05, 0x38, 0x91, 0xc8, 0x4a, 0x50, 0x00, 0x17, 0x28, 0x47,
- 0x1b, 0xf3, 0xc3, 0x7a, 0xa6, 0xd2, 0x4a, 0x46, 0x05, 0x38, 0xb1, 0xc8,
- 0x4a, 0x50, 0x00, 0x17, 0x48, 0xc8, 0x4a, 0x50, 0x05, 0x38, 0x49, 0xd2,
- 0x4a, 0x46, 0x05, 0x38, 0x70, 0x8e, 0x08, 0xb0, 0x48, 0x94, 0x08, 0xb0,
- 0x38, 0x83, 0x00, 0xc5, 0x29, 0xc2, 0x00, 0xa4, 0x00, 0xc5, 0x20, 0xc2,
- 0x1d, 0x5f, 0x00, 0xc5, 0x19, 0x83, 0x00, 0xc4, 0xe0, 0xc2, 0x00, 0xa4,
- 0x00, 0xc5, 0x09, 0xc3, 0x44, 0x76, 0x00, 0xc4, 0xf8, 0x83, 0x00, 0xc5,
- 0x01, 0xc2, 0x00, 0xb3, 0x00, 0xc4, 0xf0, 0xc5, 0xd7, 0x5a, 0x00, 0xc5,
- 0x49, 0xc4, 0xe6, 0x07, 0x00, 0xc4, 0x10, 0xc2, 0x00, 0xa4, 0x00, 0xc4,
- 0x69, 0x83, 0x00, 0xc4, 0x60, 0xc3, 0xaf, 0x4c, 0x00, 0xc4, 0xc9, 0xc2,
- 0x00, 0xb3, 0x00, 0xc4, 0xc0, 0xc3, 0x10, 0x99, 0x0e, 0xb6, 0xd1, 0xc5,
- 0xde, 0x58, 0x0e, 0xb6, 0x80, 0xc3, 0x10, 0x99, 0x0e, 0xba, 0x71, 0xc5,
- 0xde, 0x58, 0x0e, 0xba, 0x20, 0xc3, 0x10, 0x99, 0x0e, 0xb9, 0xa1, 0xc5,
- 0xde, 0x58, 0x0e, 0xb9, 0x50, 0xc7, 0x00, 0x70, 0x0e, 0xb9, 0x68, 0xc4,
- 0x18, 0x83, 0x0e, 0xbf, 0x99, 0xc2, 0x26, 0x51, 0x0e, 0xbf, 0x90, 0xc3,
- 0x0c, 0x5b, 0x0e, 0xbf, 0x89, 0xc3, 0x06, 0x9e, 0x0e, 0xbf, 0x80, 0xc4,
- 0x04, 0x5e, 0x0e, 0xbf, 0x79, 0xc2, 0x01, 0x47, 0x0e, 0xbf, 0x70, 0xc8,
- 0x9d, 0xa4, 0x0e, 0xbe, 0x49, 0xc9, 0xaa, 0xa5, 0x0e, 0xbe, 0x39, 0xd3,
- 0x45, 0x3e, 0x0e, 0xbe, 0x18, 0x91, 0x0e, 0xb3, 0x23, 0x03, 0x7a, 0xb2,
- 0x92, 0x0e, 0xb3, 0x2b, 0x03, 0x7a, 0xb6, 0x85, 0x0e, 0xb2, 0xc3, 0x03,
- 0x7a, 0xc6, 0x97, 0x0e, 0xb3, 0x53, 0x03, 0x7a, 0xcc, 0x96, 0x0e, 0xb3,
- 0x4b, 0x03, 0x7a, 0xd2, 0x95, 0x0e, 0xb3, 0x43, 0x03, 0x7a, 0xde, 0x88,
- 0x0e, 0xb2, 0xdb, 0x03, 0x7a, 0xe4, 0x94, 0x0e, 0xb3, 0x3b, 0x03, 0x7a,
- 0xea, 0x9a, 0x0e, 0xb3, 0x6b, 0x03, 0x7a, 0xf0, 0x90, 0x0e, 0xb3, 0x1b,
- 0x03, 0x7a, 0xf4, 0x8f, 0x0e, 0xb3, 0x13, 0x03, 0x7a, 0xf8, 0x8e, 0x0e,
- 0xb3, 0x0b, 0x03, 0x7a, 0xfc, 0x8d, 0x0e, 0xb3, 0x03, 0x03, 0x7b, 0x02,
- 0x8b, 0x0e, 0xb2, 0xf3, 0x03, 0x7b, 0x08, 0x87, 0x0e, 0xb2, 0xd3, 0x03,
- 0x7b, 0x0e, 0x9c, 0x0e, 0xb3, 0x7b, 0x03, 0x7b, 0x1a, 0x86, 0x0e, 0xb2,
- 0xcb, 0x03, 0x7b, 0x20, 0x89, 0x0e, 0xb2, 0xe3, 0x03, 0x7b, 0x26, 0x84,
- 0x0e, 0xb2, 0xbb, 0x03, 0x7b, 0x2c, 0x83, 0x0e, 0xb2, 0xb3, 0x03, 0x7b,
- 0x32, 0x9b, 0x0e, 0xb3, 0x71, 0x99, 0x0e, 0xb3, 0x61, 0x98, 0x0e, 0xb3,
- 0x59, 0x93, 0x0e, 0xb3, 0x31, 0x8c, 0x0e, 0xb2, 0xf9, 0x8a, 0x0e, 0xb2,
- 0xe8, 0x91, 0x0e, 0xb2, 0x53, 0x03, 0x7b, 0x38, 0x92, 0x0e, 0xb2, 0x5b,
- 0x03, 0x7b, 0x3c, 0x85, 0x0e, 0xb1, 0xf3, 0x03, 0x7b, 0x4c, 0x97, 0x0e,
- 0xb2, 0x83, 0x03, 0x7b, 0x52, 0x96, 0x0e, 0xb2, 0x7b, 0x03, 0x7b, 0x58,
- 0x95, 0x0e, 0xb2, 0x73, 0x03, 0x7b, 0x67, 0x94, 0x0e, 0xb2, 0x6b, 0x03,
- 0x7b, 0x6d, 0x9a, 0x0e, 0xb2, 0x9b, 0x03, 0x7b, 0x73, 0x90, 0x0e, 0xb2,
- 0x4b, 0x03, 0x7b, 0x77, 0x8f, 0x0e, 0xb2, 0x43, 0x03, 0x7b, 0x7b, 0x8e,
- 0x0e, 0xb2, 0x3b, 0x03, 0x7b, 0x7f, 0x8d, 0x0e, 0xb2, 0x33, 0x03, 0x7b,
- 0x85, 0x8b, 0x0e, 0xb2, 0x23, 0x03, 0x7b, 0x8b, 0x87, 0x0e, 0xb2, 0x03,
- 0x03, 0x7b, 0x91, 0x9c, 0x0e, 0xb2, 0xab, 0x03, 0x7b, 0x9d, 0x86, 0x0e,
- 0xb1, 0xfb, 0x03, 0x7b, 0xa3, 0x89, 0x0e, 0xb2, 0x13, 0x03, 0x7b, 0xa9,
- 0x84, 0x0e, 0xb1, 0xeb, 0x03, 0x7b, 0xaf, 0x83, 0x0e, 0xb1, 0xe3, 0x03,
- 0x7b, 0xb5, 0x9b, 0x0e, 0xb2, 0xa1, 0x99, 0x0e, 0xb2, 0x91, 0x98, 0x0e,
- 0xb2, 0x89, 0x93, 0x0e, 0xb2, 0x61, 0x8c, 0x0e, 0xb2, 0x29, 0x8a, 0x0e,
- 0xb2, 0x19, 0x88, 0x0e, 0xb2, 0x08, 0x0f, 0x43, 0x7b, 0xbb, 0xc2, 0x00,
- 0xcb, 0x0e, 0xbc, 0x39, 0xc2, 0x00, 0x0a, 0x0e, 0xbc, 0x29, 0x8b, 0x0e,
- 0xbb, 0xf8, 0xc2, 0x00, 0x0a, 0x0e, 0xbc, 0x30, 0xc6, 0x12, 0x12, 0x0e,
- 0xbc, 0x20, 0xc2, 0x13, 0xa9, 0x0e, 0xbc, 0x19, 0xc4, 0x89, 0x81, 0x0e,
- 0xbb, 0xb8, 0xc4, 0x1c, 0x84, 0x0e, 0xbc, 0x10, 0xca, 0x92, 0xd2, 0x0e,
- 0xbc, 0x08, 0xc2, 0x00, 0xf6, 0x0e, 0xbc, 0x00, 0x8b, 0x0e, 0xbb, 0xe8,
- 0x97, 0x0e, 0xbb, 0xe0, 0x97, 0x0e, 0xbb, 0xd8, 0xc4, 0xda, 0xda, 0x0e,
- 0xbb, 0xd0, 0xc4, 0x8b, 0x49, 0x0e, 0xbb, 0xc8, 0xc3, 0x00, 0xb2, 0x0e,
- 0xbb, 0xc0, 0xc2, 0x00, 0xb3, 0x0e, 0xbb, 0xb1, 0xc6, 0x12, 0x12, 0x0e,
- 0xbb, 0xa0, 0xc3, 0x05, 0xe7, 0x0e, 0xbb, 0xa8, 0xc4, 0xd7, 0x88, 0x0e,
- 0xbb, 0x98, 0xc4, 0x32, 0x6d, 0x0e, 0xbb, 0x90, 0xc3, 0x05, 0xe7, 0x0e,
- 0xbb, 0x88, 0xc4, 0xd9, 0x9f, 0x0e, 0xbb, 0x80, 0x0f, 0x43, 0x7b, 0xc7,
- 0xc2, 0x00, 0xcb, 0x0e, 0xbb, 0x69, 0xc2, 0x00, 0x0a, 0x0e, 0xbb, 0x59,
- 0x8b, 0x0e, 0xbb, 0x28, 0xc2, 0x00, 0x0a, 0x0e, 0xbb, 0x60, 0xc6, 0x12,
- 0x12, 0x0e, 0xbb, 0x50, 0xc2, 0x13, 0xa9, 0x0e, 0xbb, 0x49, 0xc4, 0x89,
- 0x81, 0x0e, 0xba, 0xea, 0x03, 0x7b, 0xd3, 0xc4, 0x1c, 0x84, 0x0e, 0xbb,
- 0x40, 0xc2, 0x00, 0xf6, 0x0e, 0xbb, 0x30, 0x8b, 0x0e, 0xbb, 0x18, 0x97,
- 0x0e, 0xbb, 0x10, 0x97, 0x0e, 0xbb, 0x08, 0xc4, 0xda, 0xda, 0x0e, 0xbb,
- 0x00, 0xc4, 0x8b, 0x49, 0x0e, 0xba, 0xf8, 0xc3, 0x00, 0xb2, 0x0e, 0xba,
- 0xf0, 0xc2, 0x00, 0xb3, 0x0e, 0xba, 0xe1, 0xc6, 0x12, 0x12, 0x0e, 0xba,
- 0xd0, 0xc3, 0x05, 0xe7, 0x0e, 0xba, 0xd8, 0xc4, 0xd7, 0x88, 0x0e, 0xba,
- 0xc8, 0xc4, 0x32, 0x6d, 0x0e, 0xba, 0xc0, 0xc3, 0x05, 0xe7, 0x0e, 0xba,
- 0xb8, 0xc4, 0xd9, 0x9f, 0x0e, 0xba, 0xb0, 0x8e, 0x00, 0x6b, 0xf2, 0x03,
- 0x7b, 0xd9, 0x90, 0x00, 0x6b, 0xd0, 0x08, 0xc3, 0x7b, 0xdd, 0x07, 0xc3,
- 0x7b, 0xe9, 0x52, 0x47, 0xe2, 0xc3, 0x7b, 0xf5, 0xc9, 0xb6, 0x51, 0x0e,
- 0x8f, 0x19, 0xca, 0xa8, 0x6a, 0x0e, 0x8f, 0x11, 0xcf, 0x65, 0xd8, 0x0e,
- 0x8f, 0x09, 0xc6, 0xd1, 0x77, 0x0e, 0x8e, 0xf0, 0xc7, 0x97, 0x39, 0x0e,
- 0x8e, 0xd8, 0x84, 0x0e, 0x8e, 0x91, 0x49, 0x32, 0xdb, 0x43, 0x7c, 0x01,
- 0x42, 0x00, 0x50, 0xc3, 0x7c, 0x0d, 0xc3, 0x4e, 0x46, 0x0e, 0x88, 0x58,
- 0x1a, 0xc3, 0x7c, 0x19, 0xcc, 0x83, 0x20, 0x0e, 0x88, 0x00, 0x44, 0xa5,
- 0xba, 0xc3, 0x7c, 0x21, 0xcb, 0x90, 0x69, 0x0e, 0x88, 0x28, 0xcc, 0x8b,
- 0xf0, 0x0e, 0x8e, 0xe9, 0x44, 0x9e, 0xe4, 0x43, 0x7c, 0x2d, 0xc7, 0xc7,
- 0xbc, 0x0e, 0x8e, 0xcb, 0x03, 0x7c, 0x39, 0xc5, 0xd7, 0xc3, 0x0e, 0x8e,
- 0xa0, 0xca, 0xa0, 0xb8, 0x0e, 0x8e, 0xe0, 0x5b, 0x19, 0x18, 0xc3, 0x7c,
- 0x3f, 0x59, 0x19, 0x1a, 0x43, 0x7c, 0x4e, 0x00, 0x43, 0x7c, 0x5d, 0x46,
- 0x01, 0xe9, 0x43, 0x7c, 0x69, 0x4c, 0x81, 0x64, 0xc3, 0x7c, 0x75, 0xce,
- 0x6f, 0x75, 0x0e, 0x88, 0xc0, 0x0b, 0xc3, 0x7c, 0x81, 0x4f, 0x61, 0x0a,
- 0x43, 0x7c, 0x8d, 0xc3, 0xe7, 0xc9, 0x0e, 0x8e, 0x79, 0xc7, 0xb3, 0x4d,
- 0x0e, 0x8c, 0x90, 0x0f, 0xc3, 0x7c, 0x99, 0xc2, 0x0b, 0xc6, 0x0e, 0x88,
- 0x60, 0x48, 0xc0, 0xdd, 0xc3, 0x7c, 0xa5, 0x49, 0xaa, 0xd2, 0x43, 0x7c,
- 0xb1, 0xc4, 0x03, 0x68, 0x0e, 0x8d, 0x91, 0xc2, 0x01, 0xc7, 0x0e, 0x8d,
- 0x88, 0x48, 0xb6, 0x8d, 0x43, 0x7c, 0xbd, 0x00, 0x43, 0x7c, 0xc9, 0xc5,
- 0x03, 0xe2, 0x0e, 0x8a, 0x99, 0xc5, 0x02, 0x31, 0x0e, 0x8a, 0x90, 0xc5,
- 0x59, 0xdd, 0x0e, 0x89, 0xd1, 0xd0, 0x59, 0xd2, 0x0e, 0x89, 0x48, 0x07,
- 0xc3, 0x7c, 0xd5, 0x42, 0x00, 0x68, 0x43, 0x7c, 0xdf, 0xc6, 0x2e, 0x85,
- 0x0e, 0x8b, 0xc9, 0xc4, 0xdf, 0xe7, 0x0e, 0x8b, 0xb9, 0xc3, 0x1f, 0x50,
- 0x0e, 0x8b, 0xa9, 0xc4, 0xdf, 0x62, 0x0e, 0x8b, 0x98, 0x00, 0x43, 0x7c,
- 0xe9, 0xc5, 0x03, 0xe2, 0x0e, 0x8e, 0x01, 0xc5, 0x02, 0x31, 0x0e, 0x8d,
- 0xf8, 0xc3, 0x0a, 0xbc, 0x0e, 0x8c, 0x89, 0x43, 0xb6, 0x48, 0x43, 0x7c,
- 0xf5, 0x10, 0xc3, 0x7d, 0x01, 0xcd, 0x7e, 0xb4, 0x0e, 0x88, 0xd0, 0xc4,
- 0x03, 0x68, 0x0e, 0x89, 0x69, 0xc2, 0x01, 0xc7, 0x0e, 0x89, 0x60, 0x48,
- 0xb6, 0x8d, 0x43, 0x7d, 0x0d, 0xc6, 0x01, 0x61, 0x0e, 0x88, 0x88, 0xc2,
- 0x19, 0x19, 0x0e, 0x8d, 0xa3, 0x03, 0x7d, 0x19, 0xc5, 0xd5, 0x34, 0x0e,
- 0x88, 0x51, 0xc7, 0xc1, 0x87, 0x0e, 0x88, 0x49, 0xcc, 0x88, 0x18, 0x0e,
- 0x88, 0x20, 0xca, 0xa0, 0xcc, 0x0e, 0x8d, 0x49, 0xc9, 0xb3, 0x4b, 0x0e,
- 0x8c, 0x98, 0xc4, 0x34, 0x39, 0x0e, 0x89, 0x59, 0xc5, 0xa5, 0xb8, 0x0e,
- 0x89, 0x51, 0xc7, 0x44, 0x9f, 0x0e, 0x88, 0x08, 0x9f, 0x0e, 0x89, 0x31,
- 0x9e, 0x0e, 0x89, 0x28, 0xc4, 0x22, 0x4b, 0x0e, 0x8a, 0xe9, 0xc4, 0x2b,
- 0xc8, 0x0e, 0x89, 0xd8, 0xca, 0x9e, 0xe2, 0x0e, 0x8d, 0x81, 0xc4, 0x22,
- 0x4b, 0x0e, 0x8a, 0xf1, 0xc4, 0x2b, 0xc8, 0x0e, 0x89, 0xe0, 0xc9, 0xac,
- 0x1f, 0x0e, 0x8d, 0x41, 0xc6, 0x2e, 0x85, 0x0e, 0x8b, 0xd1, 0xc4, 0xdf,
- 0xe7, 0x0e, 0x8b, 0xc1, 0xc3, 0x1f, 0x50, 0x0e, 0x8b, 0xb1, 0xc4, 0xdf,
- 0x62, 0x0e, 0x8b, 0xa0, 0xc4, 0x22, 0x4b, 0x0e, 0x8b, 0x01, 0xc4, 0x2b,
- 0xc8, 0x0e, 0x89, 0xf0, 0xc4, 0x03, 0x68, 0x0e, 0x89, 0x79, 0xc2, 0x01,
- 0xc7, 0x0e, 0x89, 0x70, 0x9e, 0x0e, 0x8c, 0xdb, 0x03, 0x7d, 0x1f, 0xa6,
- 0x0e, 0x8d, 0x19, 0xa5, 0x0e, 0x8d, 0x11, 0xa4, 0x0e, 0x8d, 0x09, 0xa3,
- 0x0e, 0x8d, 0x01, 0xa2, 0x0e, 0x8c, 0xf9, 0xa1, 0x0e, 0x8c, 0xf1, 0xa0,
- 0x0e, 0x8c, 0xe9, 0x9f, 0x0e, 0x8c, 0xe0, 0x57, 0x2a, 0x09, 0xc3, 0x7d,
- 0x27, 0xcb, 0x6f, 0x78, 0x0e, 0x88, 0xb0, 0xc5, 0xde, 0x08, 0x0e, 0x89,
- 0xb9, 0xc4, 0xe3, 0xc7, 0x0e, 0x89, 0xb0, 0xc9, 0xb1, 0x26, 0x0e, 0x8c,
- 0x61, 0xcf, 0x69, 0x2f, 0x0e, 0x88, 0x38, 0x44, 0x66, 0x1a, 0xc3, 0x7d,
- 0x33, 0xd3, 0x44, 0x93, 0x0e, 0x88, 0x18, 0xc4, 0x22, 0x4b, 0x0e, 0x8a,
- 0xf9, 0xc4, 0x2b, 0xc8, 0x0e, 0x89, 0xe9, 0x45, 0x0c, 0x46, 0x43, 0x7d,
- 0x3f, 0xc5, 0xde, 0x08, 0x0e, 0x89, 0xc9, 0xc4, 0xe3, 0xc7, 0x0e, 0x89,
- 0xc0, 0xc8, 0x01, 0xe7, 0x01, 0x51, 0xd9, 0xcd, 0x7a, 0xff, 0x01, 0x51,
- 0xb9, 0xd1, 0x55, 0xf6, 0x01, 0x51, 0xa9, 0xd0, 0x5b, 0x72, 0x01, 0x51,
- 0xa0, 0xc8, 0x4f, 0x39, 0x01, 0x51, 0x89, 0xc9, 0x17, 0x7a, 0x01, 0x51,
- 0x80, 0xc2, 0x00, 0xa4, 0x05, 0x53, 0x49, 0x83, 0x05, 0x53, 0x40, 0xc2,
- 0x00, 0xa4, 0x05, 0x4f, 0x71, 0x83, 0x05, 0x4f, 0x68, 0xc2, 0x00, 0xa4,
- 0x05, 0x4f, 0x21, 0x83, 0x00, 0x83, 0xf8, 0xc2, 0x00, 0xc1, 0x05, 0x4f,
- 0x19, 0xc2, 0x1d, 0x5f, 0x00, 0x83, 0xd1, 0x83, 0x00, 0x83, 0xe0, 0x83,
- 0x00, 0x83, 0xa9, 0xc2, 0x00, 0xa4, 0x00, 0x83, 0xb0, 0x83, 0x00, 0x83,
- 0xb9, 0xc2, 0x00, 0xa4, 0x05, 0x4f, 0x00, 0x83, 0x00, 0x83, 0xc1, 0xc2,
- 0x00, 0xa4, 0x05, 0x4f, 0x08, 0x4b, 0x18, 0x77, 0xc3, 0x7d, 0x4b, 0xdc,
- 0x13, 0xe2, 0x0f, 0xd2, 0x38, 0xc9, 0x1d, 0xd5, 0x01, 0x49, 0x21, 0xd4,
- 0x3a, 0x21, 0x01, 0x49, 0x41, 0x49, 0x0d, 0xbc, 0x43, 0x7d, 0x57, 0x43,
- 0x02, 0x18, 0xc3, 0x7d, 0x63, 0xc9, 0x1d, 0xd5, 0x01, 0x49, 0x19, 0xd4,
- 0x3a, 0x5d, 0x01, 0x49, 0x39, 0xd9, 0x1f, 0xb9, 0x01, 0x49, 0x90, 0xcc,
- 0x88, 0xb4, 0x09, 0xa2, 0x60, 0xcf, 0x65, 0x42, 0x09, 0xa2, 0x78, 0x87,
- 0x0f, 0x3f, 0xc8, 0x87, 0x0f, 0x3f, 0xb0, 0x87, 0x0f, 0x3f, 0x88, 0x87,
- 0x05, 0x59, 0x20, 0x83, 0x05, 0x59, 0x18, 0x83, 0x00, 0x96, 0x98, 0x87,
- 0x00, 0x96, 0xa0, 0xc3, 0x10, 0x99, 0x00, 0x1d, 0x4b, 0x03, 0x7d, 0x6f,
- 0xc5, 0xde, 0x58, 0x00, 0x1c, 0xfa, 0x03, 0x7d, 0x75, 0xcb, 0x9a, 0x77,
- 0x00, 0xff, 0x60, 0x46, 0x00, 0x6b, 0x43, 0x7d, 0x7b, 0x46, 0x00, 0x6b,
- 0x43, 0x7d, 0x95, 0xc2, 0x00, 0xb3, 0x00, 0x1c, 0xbb, 0x03, 0x7d, 0xb8,
- 0xc6, 0x12, 0x12, 0x00, 0x1c, 0xaa, 0x03, 0x7d, 0xbe, 0xc4, 0xd9, 0x9f,
- 0x00, 0x1c, 0x8b, 0x03, 0x7d, 0xc4, 0xcc, 0x8a, 0x4c, 0x00, 0x1b, 0x90,
- 0xd1, 0x52, 0x2d, 0x00, 0x1b, 0xb1, 0x8b, 0x00, 0x1d, 0x01, 0xc2, 0x00,
- 0x0a, 0x00, 0x1d, 0x31, 0xc2, 0x00, 0xcb, 0x00, 0x1d, 0x40, 0xc4, 0x89,
- 0x81, 0x00, 0x1c, 0xc1, 0xc2, 0x13, 0xa9, 0x00, 0x1d, 0x20, 0xc4, 0x1c,
- 0x84, 0x00, 0x1d, 0x19, 0xc2, 0x00, 0xf6, 0x00, 0x1f, 0xb9, 0xc2, 0x00,
- 0x57, 0x00, 0x1f, 0xd0, 0xc3, 0x10, 0x99, 0x00, 0x1e, 0x4b, 0x03, 0x7d,
- 0xca, 0xc5, 0xde, 0x58, 0x00, 0x1d, 0xfa, 0x03, 0x7d, 0xd0, 0x46, 0x00,
- 0x6b, 0x43, 0x7d, 0xd6, 0x46, 0x00, 0x6b, 0x43, 0x7d, 0xf4, 0x46, 0x00,
- 0x6b, 0x43, 0x7e, 0x00, 0xc2, 0x00, 0xb3, 0x00, 0x1d, 0xbb, 0x03, 0x7e,
- 0x1e, 0xc6, 0x12, 0x12, 0x00, 0x1d, 0xaa, 0x03, 0x7e, 0x24, 0xc4, 0xd9,
- 0x9f, 0x00, 0x1d, 0x8b, 0x03, 0x7e, 0x2a, 0x47, 0x7e, 0xdc, 0x43, 0x7e,
- 0x30, 0xc4, 0xd7, 0x88, 0x00, 0x1d, 0xa1, 0xc6, 0x52, 0x38, 0x00, 0x1e,
- 0xe8, 0xc4, 0x89, 0x81, 0x00, 0x1d, 0xc1, 0xc2, 0x13, 0xa9, 0x00, 0x1e,
- 0x20, 0xc4, 0x8b, 0x49, 0x00, 0x1d, 0xd1, 0xc4, 0x7e, 0xe4, 0x00, 0x1e,
- 0xf8, 0x8b, 0x00, 0x1e, 0x01, 0xc2, 0x00, 0x0a, 0x00, 0x1e, 0x31, 0xc2,
- 0x00, 0xcb, 0x00, 0x1e, 0x41, 0xd1, 0x52, 0x2d, 0x00, 0x1b, 0xb8, 0xc4,
- 0x1c, 0x84, 0x00, 0x1e, 0x19, 0xc5, 0xd6, 0x33, 0x00, 0x1e, 0xd9, 0xc2,
- 0x00, 0xf6, 0x00, 0x1f, 0xc1, 0x03, 0x43, 0x7e, 0x3c, 0x12, 0xc3, 0x7e,
- 0x46, 0xc3, 0x33, 0x12, 0x00, 0xe9, 0x49, 0xc5, 0xda, 0xd9, 0x00, 0xe9,
- 0x39, 0xc5, 0x52, 0x39, 0x00, 0xe9, 0x31, 0xc5, 0xa1, 0x17, 0x05, 0x5b,
- 0x28, 0xc7, 0x0a, 0xb9, 0x08, 0x0a, 0x01, 0x0a, 0xc3, 0x7e, 0x50, 0xc7,
- 0x3c, 0xfd, 0x08, 0x0a, 0x11, 0x49, 0x50, 0x00, 0x43, 0x7e, 0x5c, 0xc2,
- 0x00, 0x7b, 0x08, 0x0a, 0x1b, 0x03, 0x7e, 0x68, 0xc3, 0x43, 0xcd, 0x08,
- 0x0a, 0x22, 0x03, 0x7e, 0x6c, 0x16, 0xc3, 0x7e, 0x70, 0xc7, 0x67, 0x1b,
- 0x08, 0x0a, 0x81, 0xc4, 0x43, 0xd1, 0x08, 0x0a, 0xb8, 0xc3, 0x01, 0xb4,
- 0x08, 0x0a, 0xd1, 0xc3, 0x0a, 0xe1, 0x08, 0x0b, 0x11, 0xc5, 0x43, 0xcb,
- 0x08, 0x0b, 0x40, 0xc3, 0x01, 0xb4, 0x08, 0x0a, 0xcb, 0x03, 0x7e, 0x7c,
- 0x16, 0xc3, 0x7e, 0x80, 0x42, 0x00, 0x60, 0x43, 0x7e, 0x90, 0x42, 0x00,
- 0x60, 0xc3, 0x7e, 0x9c, 0xc3, 0x0a, 0xe1, 0x08, 0x0b, 0x02, 0x03, 0x7e,
- 0xae, 0xc9, 0x3c, 0xfc, 0x08, 0x0a, 0xf0, 0xc5, 0x01, 0x1d, 0x01, 0x54,
- 0x20, 0xc4, 0x0c, 0x55, 0x08, 0x79, 0x21, 0xc3, 0x04, 0x5f, 0x08, 0x78,
- 0xf8, 0xc4, 0x18, 0x85, 0x08, 0x79, 0x19, 0x91, 0x08, 0x78, 0xf0, 0xc3,
- 0x26, 0xf9, 0x08, 0x78, 0xdb, 0x03, 0x7e, 0xb4, 0xc5, 0xdf, 0x48, 0x08,
- 0x78, 0xb3, 0x03, 0x7e, 0xba, 0xc3, 0x1f, 0xd8, 0x08, 0x78, 0x7b, 0x03,
- 0x7e, 0xc0, 0xc2, 0x01, 0xf0, 0x08, 0x78, 0x31, 0xc4, 0xe4, 0x8f, 0x08,
- 0x78, 0x19, 0xc5, 0xa8, 0xf1, 0x08, 0x78, 0x08, 0xc3, 0x0d, 0xd9, 0x08,
- 0x78, 0xc9, 0x03, 0x43, 0x7e, 0xc6, 0x0e, 0xc3, 0x7e, 0xd2, 0xc3, 0x15,
- 0x1d, 0x08, 0x78, 0x90, 0xc2, 0x00, 0x6e, 0x08, 0x78, 0x48, 0xc3, 0x1a,
- 0xba, 0x08, 0x53, 0xe1, 0xc2, 0x26, 0xfa, 0x08, 0x53, 0xd8, 0xc4, 0x45,
- 0xf8, 0x08, 0x53, 0xc9, 0xc3, 0xdf, 0x4a, 0x08, 0x53, 0x98, 0x96, 0x08,
- 0x53, 0x51, 0xc3, 0xdf, 0x4a, 0x08, 0x53, 0x71, 0xc4, 0xd9, 0x77, 0x08,
- 0x53, 0x78, 0xcc, 0x88, 0x84, 0x08, 0x67, 0x88, 0xcc, 0x88, 0x84, 0x08,
- 0x65, 0x88, 0x89, 0x08, 0x61, 0x70, 0xc9, 0xb5, 0x4c, 0x08, 0x1e, 0x42,
- 0x03, 0x7e, 0xde, 0x83, 0x08, 0x1d, 0x19, 0x97, 0x08, 0x1d, 0x20, 0x83,
- 0x08, 0x1d, 0x29, 0x97, 0x08, 0x1d, 0x30, 0x83, 0x08, 0x1d, 0x39, 0xcb,
- 0x91, 0x45, 0x08, 0x1e, 0x58, 0x83, 0x08, 0x1d, 0x49, 0x8b, 0x08, 0x1d,
- 0x50, 0x83, 0x08, 0x1d, 0x59, 0x97, 0x08, 0x1d, 0x61, 0xc2, 0x00, 0xa4,
- 0x08, 0x1d, 0x80, 0x83, 0x08, 0x1d, 0x6b, 0x03, 0x7e, 0xea, 0x8b, 0x08,
- 0x1d, 0x71, 0x97, 0x08, 0x1d, 0x78, 0x83, 0x08, 0x1d, 0x93, 0x03, 0x7e,
- 0xf3, 0xc6, 0xd2, 0xe5, 0x08, 0x1e, 0x78, 0x83, 0x08, 0x1d, 0xa1, 0x97,
- 0x08, 0x1d, 0xa8, 0x83, 0x08, 0x1d, 0xb1, 0x8b, 0x08, 0x1d, 0xb9, 0x97,
- 0x08, 0x1d, 0xc0, 0x83, 0x08, 0x1d, 0xd1, 0x8b, 0x08, 0x1d, 0xd8, 0x83,
- 0x08, 0x1d, 0xe1, 0x97, 0x08, 0x1d, 0xe8, 0x83, 0x08, 0x1d, 0xf9, 0xc2,
- 0x00, 0xa4, 0x08, 0x1e, 0x09, 0xc2, 0x0c, 0x65, 0x08, 0x1e, 0x10, 0x19,
- 0xc3, 0x7e, 0xf9, 0xc2, 0x00, 0x4d, 0x08, 0x1e, 0x98, 0x00, 0x43, 0x7f,
- 0x03, 0xca, 0x9f, 0x00, 0x0e, 0x7d, 0x30, 0x46, 0x00, 0x6b, 0x43, 0x7f,
- 0x15, 0xcc, 0x83, 0xe0, 0x0e, 0x7c, 0xf8, 0x43, 0x98, 0x1a, 0x43, 0x7f,
- 0x21, 0xcb, 0x98, 0x1a, 0x0e, 0x7c, 0x50, 0xc5, 0x01, 0xf7, 0x0e, 0x78,
- 0xb1, 0xc4, 0x01, 0x1e, 0x0e, 0x78, 0x50, 0x97, 0x00, 0xc7, 0x88, 0x91,
- 0x00, 0xc7, 0x60, 0x91, 0x00, 0xc7, 0x58, 0xc5, 0x00, 0xfd, 0x00, 0xc7,
- 0xa9, 0xc5, 0xd6, 0x6f, 0x00, 0xc7, 0x70, 0x87, 0x00, 0xb1, 0x58, 0x87,
- 0x00, 0xb2, 0x58, 0x87, 0x00, 0xb0, 0xf8, 0x87, 0x00, 0xae, 0x38, 0x83,
- 0x00, 0xb3, 0x61, 0x8b, 0x00, 0xb3, 0x59, 0x87, 0x00, 0xb3, 0x4b, 0x03,
- 0x7f, 0x2d, 0x91, 0x00, 0xb3, 0x41, 0x97, 0x00, 0xb3, 0x38, 0x87, 0x00,
- 0xaf, 0x28, 0x87, 0x00, 0xb2, 0xf0, 0x87, 0x00, 0xae, 0xf8, 0x8b, 0x00,
- 0xb1, 0xc1, 0x87, 0x00, 0xb1, 0xb3, 0x03, 0x7f, 0x31, 0x91, 0x00, 0xb1,
- 0xa9, 0x97, 0x00, 0xb1, 0xa1, 0x83, 0x00, 0xb1, 0xc8, 0x87, 0x00, 0xb1,
- 0xe8, 0x87, 0x00, 0xaf, 0xf0, 0x87, 0x00, 0xaf, 0xc0, 0x87, 0x00, 0xae,
- 0xc8, 0x87, 0x00, 0xb1, 0x88, 0x87, 0x00, 0xb2, 0xb8, 0x83, 0x00, 0xc7,
- 0x10, 0x91, 0x00, 0xc7, 0x08, 0x87, 0x00, 0xa6, 0xe9, 0x8b, 0x00, 0xa6,
- 0xfb, 0x03, 0x7f, 0x35, 0x91, 0x00, 0xa7, 0x1b, 0x03, 0x7f, 0x39, 0x83,
- 0x00, 0xa7, 0x3a, 0x03, 0x7f, 0x3d, 0x8b, 0x00, 0xa2, 0xd3, 0x03, 0x7f,
- 0x41, 0x87, 0x00, 0xa2, 0xc1, 0x91, 0x00, 0xa2, 0xf3, 0x03, 0x7f, 0x45,
- 0x83, 0x00, 0xa3, 0x12, 0x03, 0x7f, 0x49, 0x83, 0x00, 0xa9, 0xd3, 0x03,
- 0x7f, 0x4d, 0x91, 0x00, 0xa9, 0xb3, 0x03, 0x7f, 0x51, 0x8b, 0x00, 0xa9,
- 0x93, 0x03, 0x7f, 0x55, 0x87, 0x00, 0xa9, 0x80, 0x83, 0x00, 0xa9, 0x13,
- 0x03, 0x7f, 0x59, 0x8b, 0x00, 0xa8, 0xd3, 0x03, 0x7f, 0x5d, 0x87, 0x00,
- 0xa8, 0xc1, 0x91, 0x00, 0xa8, 0xf2, 0x03, 0x7f, 0x61, 0x83, 0x00, 0xa8,
- 0x0b, 0x03, 0x7f, 0x65, 0x87, 0x00, 0xa7, 0xb9, 0x8b, 0x00, 0xa7, 0xcb,
- 0x03, 0x7f, 0x69, 0x91, 0x00, 0xa7, 0xea, 0x03, 0x7f, 0x6d, 0x83, 0x00,
- 0xa2, 0x2b, 0x03, 0x7f, 0x71, 0x91, 0x00, 0xa2, 0x0b, 0x03, 0x7f, 0x75,
- 0x8b, 0x00, 0xa1, 0xeb, 0x03, 0x7f, 0x79, 0x87, 0x00, 0xa1, 0xd8, 0x91,
- 0x00, 0xa4, 0xd8, 0x8b, 0x00, 0xa4, 0xb8, 0x83, 0x00, 0xa4, 0xf8, 0x83,
- 0x00, 0xa0, 0xd0, 0x91, 0x00, 0xa0, 0xa8, 0x8b, 0x00, 0xa0, 0x88, 0x83,
- 0x00, 0xa4, 0x08, 0x8b, 0x00, 0xa3, 0xc8, 0x91, 0x00, 0xa3, 0xe8, 0x87,
- 0x00, 0xa5, 0x69, 0x8b, 0x00, 0xa5, 0x7b, 0x03, 0x7f, 0x7d, 0x91, 0x00,
- 0xa5, 0x9b, 0x03, 0x7f, 0x81, 0x83, 0x00, 0xa5, 0xba, 0x03, 0x7f, 0x85,
- 0x83, 0x00, 0xa6, 0x70, 0x83, 0x00, 0xb3, 0xe3, 0x03, 0x7f, 0x89, 0x91,
- 0x00, 0xb3, 0xd3, 0x03, 0x7f, 0x8d, 0x8b, 0x00, 0xb3, 0xc3, 0x03, 0x7f,
- 0x91, 0xc2, 0x00, 0x4c, 0x00, 0xb3, 0xb8, 0xc3, 0x0c, 0x5b, 0x08, 0x9b,
- 0x59, 0xc3, 0x06, 0x9e, 0x08, 0x9b, 0x50, 0xc4, 0x04, 0x5e, 0x08, 0x9b,
- 0x49, 0xc2, 0x01, 0x47, 0x08, 0x9b, 0x40, 0xc6, 0x01, 0x61, 0x00, 0x18,
- 0xb0, 0xc5, 0x01, 0x62, 0x01, 0x07, 0x79, 0xc5, 0x00, 0x95, 0x01, 0x06,
- 0xb8, 0x03, 0xc3, 0x7f, 0x95, 0xc5, 0x01, 0x62, 0x00, 0x1a, 0xa8, 0xc5,
- 0x01, 0x62, 0x00, 0x19, 0xc9, 0xc5, 0x00, 0x95, 0x00, 0x1a, 0xb8, 0xc5,
- 0x01, 0x62, 0x01, 0x07, 0x71, 0xc5, 0x00, 0x95, 0x01, 0x06, 0xb0, 0xc5,
- 0x00, 0x95, 0x00, 0xef, 0xf1, 0xc5, 0x01, 0x62, 0x00, 0x1a, 0xa0, 0xc5,
- 0x00, 0x95, 0x00, 0x18, 0x71, 0xc5, 0x01, 0x62, 0x00, 0x1a, 0x40, 0xc5,
- 0x01, 0x62, 0x00, 0xd6, 0x51, 0xc5, 0x00, 0x95, 0x00, 0xd6, 0x48, 0xc9,
- 0x0f, 0x34, 0x07, 0xf1, 0x11, 0xca, 0x0a, 0xf7, 0x07, 0xf1, 0x18, 0xc4,
- 0x01, 0x1e, 0x00, 0xef, 0xc1, 0xc5, 0x01, 0xf7, 0x00, 0x1a, 0xc0, 0xc2,
- 0x01, 0xdb, 0x01, 0x66, 0x29, 0xc3, 0x07, 0x4a, 0x01, 0x66, 0xd8, 0xc3,
- 0x00, 0xf7, 0x01, 0x66, 0x69, 0x83, 0x01, 0x66, 0x7b, 0x03, 0x7f, 0xa1,
- 0xc2, 0x01, 0xdb, 0x01, 0x66, 0x98, 0xc2, 0x04, 0xcb, 0x01, 0x66, 0xf9,
- 0xc2, 0x15, 0x1d, 0x01, 0x67, 0x08, 0xc2, 0x01, 0xdb, 0x01, 0x66, 0x21,
- 0xc3, 0x07, 0x4a, 0x01, 0x66, 0xd0, 0xc3, 0x00, 0xf7, 0x01, 0x66, 0x61,
- 0x83, 0x01, 0x66, 0x73, 0x03, 0x7f, 0xa5, 0xc2, 0x01, 0xdb, 0x01, 0x66,
- 0x90, 0xc2, 0x04, 0xcb, 0x01, 0x66, 0xf1, 0xc2, 0x15, 0x1d, 0x01, 0x67,
- 0x00, 0xc8, 0x01, 0x59, 0x0f, 0xc8, 0x09, 0xc9, 0x38, 0x82, 0x0f, 0xc8,
- 0x00, 0x42, 0x00, 0xb7, 0xc3, 0x7f, 0xa9, 0x16, 0xc3, 0x7f, 0xb3, 0x08,
- 0xc3, 0x7f, 0xbf, 0x15, 0xc3, 0x7f, 0xcb, 0xc5, 0x01, 0xdb, 0x01, 0x92,
- 0xc1, 0xc4, 0x22, 0x71, 0x01, 0x92, 0xc8, 0x42, 0x00, 0xb7, 0xc3, 0x7f,
- 0xd7, 0x16, 0xc3, 0x7f, 0xe1, 0x08, 0xc3, 0x7f, 0xed, 0x15, 0xc3, 0x7f,
- 0xf9, 0xc5, 0x01, 0xdb, 0x01, 0x95, 0x99, 0xc4, 0x22, 0x71, 0x01, 0x95,
- 0xa0, 0x42, 0x00, 0xb7, 0xc3, 0x80, 0x05, 0x16, 0xc3, 0x80, 0x0f, 0x08,
- 0xc3, 0x80, 0x1b, 0x15, 0xc3, 0x80, 0x27, 0xc5, 0x01, 0xdb, 0x01, 0x95,
- 0xe9, 0xc4, 0x22, 0x71, 0x01, 0x95, 0xf0, 0x96, 0x01, 0x95, 0x09, 0xc5,
- 0x56, 0xbd, 0x01, 0x95, 0x70, 0xa0, 0x09, 0x2a, 0x01, 0x8f, 0x09, 0x1a,
- 0x30, 0x94, 0x09, 0x19, 0xf9, 0xc7, 0x5b, 0xdb, 0x09, 0x19, 0xf1, 0x8e,
- 0x09, 0x19, 0xe8, 0x86, 0x09, 0x29, 0xe9, 0x9f, 0x09, 0x19, 0x8a, 0x03,
- 0x80, 0x33, 0x8e, 0x09, 0x19, 0x71, 0x46, 0x26, 0x11, 0x43, 0x80, 0x39,
- 0xd9, 0x1d, 0xf7, 0x09, 0x15, 0xe9, 0xd9, 0x1c, 0x42, 0x09, 0x15, 0xe0,
- 0xc7, 0x26, 0x11, 0x09, 0x15, 0xb0, 0xc5, 0x39, 0x40, 0x09, 0x16, 0x68,
- 0xc4, 0x99, 0x97, 0x09, 0x16, 0x49, 0xc2, 0x00, 0x92, 0x09, 0x16, 0x40,
- 0xc2, 0x3d, 0x53, 0x09, 0x29, 0x81, 0x84, 0x09, 0x15, 0x08, 0x0a, 0xc3,
- 0x80, 0x45, 0xc2, 0x00, 0x92, 0x09, 0x14, 0xf8, 0xc2, 0x00, 0xe5, 0x09,
- 0x15, 0x31, 0x94, 0x09, 0x15, 0x29, 0x8f, 0x09, 0x15, 0x21, 0x84, 0x09,
- 0x15, 0x19, 0x9f, 0x09, 0x15, 0x10, 0xc2, 0x02, 0x53, 0x09, 0x14, 0xd9,
- 0xc2, 0x08, 0x0e, 0x09, 0x14, 0xd0, 0x84, 0x09, 0x14, 0xc0, 0xc4, 0xde,
- 0x30, 0x09, 0x29, 0x61, 0xc7, 0x3e, 0xb7, 0x09, 0x29, 0x59, 0xc2, 0x00,
- 0xe5, 0x09, 0x12, 0xf9, 0xca, 0xa7, 0x84, 0x09, 0x12, 0xf0, 0xc3, 0x00,
- 0xe4, 0x09, 0x29, 0x41, 0xd0, 0x5f, 0x82, 0x09, 0x12, 0xb8, 0x17, 0xc3,
- 0x80, 0x51, 0x8b, 0x09, 0x1c, 0x92, 0x03, 0x80, 0x59, 0x47, 0x26, 0x11,
- 0x43, 0x80, 0x5f, 0xc2, 0x01, 0xbd, 0x09, 0x12, 0xc9, 0x87, 0x09, 0x12,
- 0xc0, 0xc2, 0x00, 0xe5, 0x09, 0x12, 0xa3, 0x03, 0x80, 0x6e, 0x90, 0x09,
- 0x12, 0x98, 0xc2, 0x01, 0x32, 0x09, 0x13, 0xc8, 0xc2, 0x5a, 0x34, 0x09,
- 0x13, 0xb9, 0xc5, 0xdb, 0xf1, 0x09, 0x13, 0xb1, 0xc2, 0x00, 0x8c, 0x09,
- 0x13, 0xa9, 0xc2, 0x00, 0xc7, 0x09, 0x13, 0xa1, 0xc4, 0xe2, 0xe3, 0x09,
- 0x13, 0x99, 0xc8, 0x66, 0xc0, 0x09, 0x13, 0x91, 0xc3, 0x72, 0x28, 0x09,
- 0x13, 0x89, 0xc3, 0x03, 0x86, 0x09, 0x13, 0x81, 0xc2, 0x02, 0x8c, 0x09,
- 0x13, 0x79, 0xc6, 0xd2, 0x5b, 0x09, 0x13, 0x70, 0xd9, 0x1f, 0xd2, 0x09,
- 0x13, 0x38, 0xc3, 0x59, 0xa8, 0x09, 0x29, 0x09, 0xc2, 0x01, 0x29, 0x09,
- 0x29, 0x01, 0xc9, 0xab, 0x59, 0x09, 0x11, 0xb8, 0xc2, 0x02, 0xb4, 0x09,
- 0x1c, 0x69, 0xc2, 0x03, 0x30, 0x09, 0x11, 0xe1, 0x83, 0x09, 0x11, 0xd2,
- 0x03, 0x80, 0x74, 0x16, 0xc3, 0x80, 0x7a, 0xc3, 0x03, 0xa4, 0x09, 0x28,
- 0xe3, 0x03, 0x80, 0x86, 0x0a, 0xc3, 0x80, 0x8c, 0xc4, 0x05, 0x19, 0x09,
- 0x28, 0xd1, 0x15, 0xc3, 0x80, 0x98, 0xc4, 0x72, 0x9d, 0x09, 0x10, 0x03,
- 0x03, 0x80, 0xa2, 0x10, 0xc3, 0x80, 0xa6, 0xca, 0xa8, 0x06, 0x09, 0x10,
- 0x59, 0x42, 0x00, 0xc7, 0xc3, 0x80, 0xae, 0x0d, 0xc3, 0x80, 0xba, 0xc2,
- 0x01, 0x2e, 0x09, 0x10, 0x21, 0xc9, 0x5b, 0xd9, 0x09, 0x10, 0x11, 0xc3,
- 0x61, 0x9a, 0x09, 0x0f, 0xf9, 0xc2, 0x00, 0x92, 0x09, 0x0f, 0xf0, 0xca,
- 0x90, 0xa1, 0x09, 0x1c, 0x48, 0x17, 0xc3, 0x80, 0xc4, 0xcd, 0x7c, 0x85,
- 0x09, 0x28, 0xa1, 0xd5, 0x34, 0xe1, 0x09, 0x28, 0x99, 0xc2, 0x04, 0x6e,
- 0x09, 0x28, 0x91, 0xc3, 0x04, 0xca, 0x09, 0x28, 0x83, 0x03, 0x80, 0xce,
- 0xc2, 0x01, 0x29, 0x09, 0x28, 0x79, 0xc3, 0xa3, 0x0f, 0x09, 0x28, 0x70,
- 0x17, 0xc3, 0x80, 0xd4, 0x16, 0xc3, 0x80, 0xe2, 0xc2, 0x00, 0xc7, 0x09,
- 0x28, 0x31, 0xc3, 0xb3, 0x90, 0x09, 0x28, 0x29, 0xce, 0x74, 0x6f, 0x09,
- 0x28, 0x21, 0xc3, 0x61, 0x9a, 0x09, 0x28, 0x19, 0xc3, 0x00, 0xe4, 0x09,
- 0x28, 0x10, 0x47, 0x01, 0x2c, 0x43, 0x80, 0xec, 0xca, 0xa9, 0x32, 0x09,
- 0x26, 0xa1, 0x09, 0xc3, 0x81, 0x04, 0x97, 0x09, 0x0f, 0x2b, 0x03, 0x81,
- 0x18, 0x16, 0xc3, 0x81, 0x2e, 0x15, 0xc3, 0x81, 0x38, 0xc2, 0x00, 0x8c,
- 0x09, 0x0e, 0xd9, 0x0f, 0xc3, 0x81, 0x42, 0x0e, 0xc3, 0x81, 0x4f, 0x0d,
- 0xc3, 0x81, 0x62, 0x0b, 0xc3, 0x81, 0x6d, 0x0a, 0xc3, 0x81, 0x7a, 0xc2,
- 0x00, 0x4d, 0x09, 0x0e, 0x19, 0xc3, 0x0b, 0xc6, 0x09, 0x0e, 0x11, 0x04,
- 0xc3, 0x81, 0x87, 0x83, 0x09, 0x0d, 0xca, 0x03, 0x81, 0x91, 0xd4, 0x3e,
- 0x1d, 0x09, 0x0f, 0x80, 0xc9, 0xa7, 0xc1, 0x09, 0x0f, 0x70, 0x8e, 0x09,
- 0x1c, 0x28, 0x00, 0x43, 0x81, 0xa5, 0xd1, 0x56, 0x4b, 0x09, 0x0b, 0x30,
- 0xc2, 0x00, 0x9f, 0x09, 0x0b, 0xb9, 0xc2, 0x04, 0xcb, 0x09, 0x0b, 0xb1,
- 0xc2, 0x03, 0x86, 0x09, 0x0b, 0xa8, 0xcf, 0x66, 0xb9, 0x09, 0x08, 0xd0,
- 0x45, 0x01, 0x2e, 0xc3, 0x81, 0xb1, 0xc3, 0x58, 0x20, 0x09, 0x08, 0xa8,
- 0x0a, 0xc3, 0x81, 0xc3, 0xc2, 0x06, 0x1f, 0x09, 0x07, 0x41, 0x03, 0x43,
- 0x81, 0xce, 0x87, 0x09, 0x26, 0x23, 0x03, 0x81, 0xd6, 0xc2, 0x01, 0xbd,
- 0x09, 0x07, 0x02, 0x03, 0x81, 0xdc, 0xc3, 0x79, 0x0e, 0x09, 0x26, 0x19,
- 0x8b, 0x09, 0x06, 0xf9, 0xc9, 0xa8, 0x07, 0x09, 0x06, 0xf0, 0xc2, 0x6c,
- 0xa9, 0x09, 0x26, 0x11, 0x83, 0x09, 0x06, 0xea, 0x03, 0x81, 0xe2, 0x17,
- 0xc3, 0x81, 0xe9, 0xc2, 0x00, 0x51, 0x09, 0x06, 0xd3, 0x03, 0x81, 0xf5,
- 0x03, 0x43, 0x81, 0xfb, 0x03, 0xc3, 0x82, 0x05, 0xc3, 0xe1, 0x38, 0x09,
- 0x06, 0xa9, 0xc9, 0xaa, 0x4b, 0x09, 0x06, 0xa0, 0x83, 0x09, 0x25, 0xdb,
- 0x03, 0x82, 0x12, 0x8b, 0x09, 0x06, 0x6a, 0x03, 0x82, 0x1f, 0xc3, 0x1a,
- 0x41, 0x09, 0x25, 0xd1, 0x90, 0x09, 0x06, 0x4b, 0x03, 0x82, 0x2c, 0x8e,
- 0x09, 0x06, 0x3a, 0x03, 0x82, 0x32, 0x17, 0xc3, 0x82, 0x38, 0x8b, 0x09,
- 0x06, 0x23, 0x03, 0x82, 0x42, 0x83, 0x09, 0x06, 0x18, 0x03, 0xc3, 0x82,
- 0x48, 0xc2, 0x02, 0x53, 0x09, 0x06, 0x0a, 0x03, 0x82, 0x58, 0xc2, 0x00,
- 0xe5, 0x09, 0x05, 0xeb, 0x03, 0x82, 0x5e, 0x90, 0x09, 0x05, 0xe3, 0x03,
- 0x82, 0x65, 0xd0, 0x59, 0x52, 0x09, 0x05, 0xd9, 0x46, 0x26, 0x11, 0x43,
- 0x82, 0x6b, 0x86, 0x09, 0x07, 0x5a, 0x03, 0x82, 0x7d, 0xd3, 0x40, 0x7e,
- 0x09, 0x06, 0xb9, 0xc7, 0x66, 0xc1, 0x09, 0x06, 0xb0, 0xcb, 0x8f, 0xc4,
- 0x09, 0x05, 0x80, 0xc8, 0x07, 0x08, 0x09, 0x05, 0x68, 0xca, 0x8f, 0xc5,
- 0x09, 0x05, 0x20, 0x8f, 0x09, 0x24, 0xfb, 0x03, 0x82, 0x83, 0xc5, 0xda,
- 0x7a, 0x09, 0x24, 0xf0, 0xc4, 0x5a, 0x32, 0x09, 0x24, 0xe3, 0x03, 0x82,
- 0x89, 0x94, 0x09, 0x24, 0xd8, 0xc2, 0x00, 0xe5, 0x09, 0x24, 0xb1, 0xc7,
- 0xc2, 0x91, 0x09, 0x24, 0xa8, 0xc8, 0x11, 0xc0, 0x09, 0x24, 0x78, 0x47,
- 0x5a, 0x35, 0xc3, 0x82, 0x8f, 0xc2, 0x00, 0xe5, 0x09, 0x03, 0x68, 0x97,
- 0x09, 0x03, 0x2b, 0x03, 0x82, 0x9b, 0x83, 0x09, 0x03, 0x20, 0xc8, 0x34,
- 0xed, 0x09, 0x03, 0x10, 0xc2, 0x04, 0xdd, 0x09, 0x02, 0xf9, 0x8b, 0x09,
- 0x02, 0xeb, 0x03, 0x82, 0xa5, 0x83, 0x09, 0x02, 0xda, 0x03, 0x82, 0xab,
- 0x8b, 0x09, 0x02, 0xd1, 0xc4, 0xde, 0x4f, 0x09, 0x02, 0xc8, 0xc3, 0x00,
- 0xba, 0x09, 0x02, 0xc1, 0xca, 0x96, 0x6d, 0x09, 0x02, 0xb8, 0xdf, 0x0d,
- 0xbb, 0x09, 0x01, 0xe8, 0xe0, 0x03, 0x87, 0x09, 0x01, 0xd8, 0xc2, 0x02,
- 0xb4, 0x09, 0x14, 0x69, 0xc2, 0x04, 0xdd, 0x09, 0x14, 0x61, 0xc3, 0x47,
- 0x43, 0x09, 0x14, 0x58, 0xca, 0x9f, 0xb4, 0x00, 0x24, 0x58, 0xc3, 0xe6,
- 0x94, 0x00, 0x28, 0x39, 0xc2, 0x1b, 0xa5, 0x00, 0x28, 0x19, 0x87, 0x00,
- 0x28, 0x08, 0xc9, 0x1e, 0x4b, 0x00, 0x27, 0xd8, 0xc3, 0x2d, 0xf3, 0x05,
- 0x32, 0x99, 0x83, 0x05, 0x32, 0xb9, 0xd1, 0x52, 0x60, 0x05, 0x32, 0xe9,
- 0x87, 0x00, 0x23, 0x29, 0xca, 0x52, 0x67, 0x00, 0x23, 0x49, 0xc7, 0xc7,
- 0x4c, 0x00, 0x23, 0x68, 0x06, 0xc3, 0x82, 0xb1, 0xc5, 0x1f, 0x0a, 0x00,
- 0x26, 0x10, 0xc8, 0x23, 0xe0, 0x00, 0x25, 0xb9, 0xc8, 0x1e, 0x43, 0x00,
- 0x27, 0xa8, 0xc8, 0x1e, 0x43, 0x00, 0x26, 0xe1, 0xc8, 0x23, 0xe0, 0x00,
- 0x24, 0xb0, 0xc7, 0xc6, 0xa4, 0x00, 0x6d, 0x41, 0xc6, 0x8e, 0xa0, 0x00,
- 0x6d, 0x70, 0xc7, 0xc9, 0xfa, 0x00, 0x6d, 0x51, 0xc6, 0x8e, 0xa0, 0x00,
- 0x6d, 0x80, 0xc5, 0x20, 0x8c, 0x0e, 0xce, 0xa1, 0xc7, 0xbc, 0x7d, 0x0e,
- 0xce, 0x28, 0xc5, 0x20, 0x8c, 0x0e, 0xce, 0x99, 0xc7, 0xbc, 0x7d, 0x0e,
- 0xce, 0x20, 0xc5, 0x20, 0x8c, 0x0e, 0xce, 0x91, 0xc7, 0xbc, 0x7d, 0x0e,
- 0xce, 0x18, 0xc5, 0xd8, 0xcc, 0x0e, 0xcd, 0x99, 0xca, 0xa2, 0x70, 0x0e,
- 0xcd, 0x60, 0xc5, 0xd8, 0xcc, 0x0e, 0xcd, 0x91, 0xca, 0xa2, 0x70, 0x0e,
- 0xcd, 0x58, 0xc5, 0xd8, 0xcc, 0x0e, 0xcd, 0x89, 0xca, 0xa2, 0x70, 0x0e,
- 0xcd, 0x50, 0xc9, 0x52, 0x02, 0x0e, 0xd3, 0x30, 0xc9, 0x52, 0x02, 0x0e,
- 0xd3, 0x20, 0xcb, 0x51, 0xcd, 0x0e, 0xd1, 0x19, 0xc6, 0x05, 0x96, 0x0e,
- 0xd1, 0x10, 0xcb, 0x51, 0xcd, 0x0e, 0xd1, 0x31, 0xc6, 0x05, 0x96, 0x0e,
- 0xd1, 0x28, 0xc4, 0x0e, 0xa3, 0x0e, 0xc8, 0x21, 0xc5, 0x0d, 0xe0, 0x0e,
- 0xc7, 0xab, 0x03, 0x82, 0xbd, 0xc5, 0x08, 0x42, 0x0e, 0xc0, 0x03, 0x03,
- 0x82, 0xc1, 0x47, 0x00, 0x50, 0xc3, 0x82, 0xc5, 0x45, 0x00, 0x5b, 0xc3,
- 0x82, 0xea, 0x47, 0x14, 0xea, 0xc3, 0x83, 0x17, 0xdb, 0x17, 0x83, 0x0e,
- 0xc2, 0x50, 0x46, 0xd4, 0xb3, 0xc3, 0x83, 0x3f, 0x46, 0x0d, 0xe0, 0xc3,
- 0x83, 0x54, 0xc4, 0x0e, 0xa3, 0x0e, 0xc2, 0xe3, 0x03, 0x83, 0x66, 0xd4,
- 0x3f, 0x99, 0x0e, 0xc2, 0xd9, 0x08, 0x43, 0x83, 0x6a, 0x00, 0x43, 0x83,
- 0x7c, 0x00, 0x43, 0x83, 0x94, 0xc6, 0x14, 0xea, 0x0e, 0xc5, 0x99, 0xdd,
- 0x10, 0xa6, 0x0e, 0xc5, 0x68, 0xc5, 0x08, 0x42, 0x0e, 0xc5, 0x1b, 0x03,
- 0x83, 0xa0, 0xc2, 0x01, 0xc7, 0x0e, 0xc4, 0xb0, 0xc5, 0x08, 0x42, 0x0e,
- 0xc0, 0x23, 0x03, 0x83, 0xa9, 0xc6, 0x00, 0x50, 0x0e, 0xc6, 0x2b, 0x03,
- 0x83, 0xad, 0xc4, 0x00, 0x5b, 0x0e, 0xc5, 0x3b, 0x03, 0x83, 0xb3, 0xc6,
- 0x14, 0xea, 0x0e, 0xc4, 0x53, 0x03, 0x83, 0xb9, 0x46, 0x0d, 0xe0, 0xc3,
- 0x83, 0xbd, 0xc8, 0xb7, 0x55, 0x0e, 0xc4, 0x11, 0xc4, 0x01, 0x75, 0x0e,
- 0xc3, 0xdb, 0x03, 0x83, 0xcc, 0xc5, 0x04, 0x73, 0x0e, 0xc3, 0xf1, 0x08,
- 0x43, 0x83, 0xd0, 0x47, 0x00, 0x50, 0xc3, 0x83, 0xdc, 0x52, 0x38, 0xf5,
- 0xc3, 0x83, 0xeb, 0xca, 0x9d, 0xfc, 0x0e, 0xc5, 0xc9, 0xc8, 0xba, 0x9d,
- 0x0e, 0xc3, 0x50, 0x00, 0x43, 0x83, 0xfd, 0x00, 0x43, 0x84, 0x2a, 0xde,
- 0x0d, 0xda, 0x0e, 0xc7, 0x49, 0xdc, 0x14, 0xde, 0x0e, 0xc6, 0xb3, 0x03,
- 0x84, 0x3c, 0x46, 0x0d, 0xe0, 0xc3, 0x84, 0x42, 0xc8, 0xb7, 0x55, 0x0e,
- 0xc3, 0x41, 0xd6, 0x17, 0x83, 0x0e, 0xc2, 0x48, 0x47, 0x00, 0x50, 0xc3,
- 0x84, 0x4e, 0xc5, 0x08, 0x42, 0x0e, 0xc0, 0x0b, 0x03, 0x84, 0x5d, 0xcb,
- 0x14, 0xe5, 0x0e, 0xc5, 0x89, 0x47, 0x14, 0xea, 0x43, 0x84, 0x61, 0xc7,
- 0x29, 0xd4, 0x0e, 0xc3, 0xd1, 0xc4, 0x0d, 0xf4, 0x0e, 0xc3, 0xc0, 0xc5,
- 0x0d, 0xe6, 0x0e, 0xd0, 0x29, 0xc8, 0x43, 0xd5, 0x0e, 0xd0, 0x18, 0xc5,
- 0x0d, 0xe6, 0x0e, 0xd0, 0x21, 0xc4, 0x00, 0x30, 0x0e, 0xd0, 0x11, 0xc8,
- 0x43, 0xd5, 0x0e, 0xd0, 0x08, 0xc4, 0x04, 0x74, 0x0e, 0xce, 0xe9, 0xc4,
- 0xa5, 0x72, 0x0e, 0xce, 0xe0, 0x46, 0x20, 0x8c, 0xc3, 0x84, 0x6d, 0x48,
- 0xbc, 0x7d, 0x43, 0x84, 0x79, 0xc5, 0x17, 0xef, 0x0e, 0xcb, 0x3b, 0x03,
- 0x84, 0x85, 0xc6, 0x06, 0x1b, 0x0e, 0xcb, 0x31, 0xc5, 0x04, 0x73, 0x0e,
- 0xcb, 0x28, 0x46, 0x17, 0xef, 0xc3, 0x84, 0x8b, 0x46, 0x04, 0x73, 0x43,
- 0x84, 0x97, 0x46, 0x17, 0xef, 0xc3, 0x84, 0xa3, 0x46, 0x04, 0x73, 0x43,
- 0x84, 0xaf, 0x47, 0x2f, 0x01, 0xc3, 0x84, 0xbb, 0xcc, 0x86, 0x08, 0x0e,
- 0xce, 0x49, 0xcc, 0x88, 0x6c, 0x0e, 0xce, 0x40, 0x46, 0x17, 0xef, 0xc3,
- 0x84, 0xc7, 0x46, 0x04, 0x73, 0x43, 0x84, 0xd3, 0xc2, 0x00, 0x15, 0x0e,
- 0xce, 0xc0, 0x46, 0x20, 0x8c, 0xc3, 0x84, 0xdf, 0x48, 0xbc, 0x7d, 0x43,
- 0x84, 0xeb, 0xc5, 0x17, 0xef, 0x0e, 0xcd, 0xb1, 0xc6, 0x06, 0x1b, 0x0e,
- 0xcd, 0xa9, 0xc5, 0x04, 0x73, 0x0e, 0xcd, 0xa0, 0xc5, 0xd8, 0xcc, 0x0e,
- 0xcd, 0x81, 0xca, 0xa2, 0x70, 0x0e, 0xcd, 0x48, 0x47, 0x2f, 0x01, 0xc3,
- 0x84, 0xf7, 0x47, 0x06, 0xf1, 0x43, 0x85, 0x09, 0x0a, 0xc3, 0x85, 0x1b,
- 0x42, 0x00, 0x6e, 0xc3, 0x85, 0x27, 0x48, 0x19, 0x0b, 0x43, 0x85, 0x33,
- 0xc6, 0x06, 0x1b, 0x0e, 0xcd, 0x09, 0xc5, 0x04, 0x73, 0x0e, 0xcd, 0x00,
- 0xc5, 0x17, 0xef, 0x0e, 0xc9, 0x63, 0x03, 0x85, 0x48, 0xc6, 0x06, 0x1b,
- 0x0e, 0xc9, 0x59, 0xc5, 0x04, 0x73, 0x0e, 0xc9, 0x50, 0xc2, 0x00, 0x15,
- 0x0e, 0xcb, 0x20, 0xc2, 0x00, 0x15, 0x0e, 0xcb, 0x00, 0xc5, 0x04, 0x73,
- 0x0e, 0xc9, 0x31, 0xc5, 0x17, 0xef, 0x0e, 0xc9, 0x28, 0xd0, 0x5b, 0xf2,
- 0x08, 0xae, 0x59, 0xd2, 0x49, 0xec, 0x08, 0xae, 0x50, 0xc8, 0x0c, 0x4a,
- 0x01, 0x0b, 0xf0, 0x00, 0x43, 0x85, 0x4e, 0xdf, 0x0d, 0x5e, 0x01, 0x4b,
- 0x79, 0x06, 0x43, 0x85, 0x60, 0xd2, 0x06, 0x54, 0x0f, 0xc0, 0x19, 0xd5,
- 0x03, 0x72, 0x0f, 0xc0, 0x98, 0xca, 0x01, 0xf7, 0x01, 0x0d, 0x99, 0xc9,
- 0x01, 0x1e, 0x01, 0x0d, 0x90, 0xd6, 0x30, 0xcf, 0x01, 0x1b, 0xe1, 0xc3,
- 0x12, 0x7a, 0x01, 0x15, 0xf0, 0xc9, 0x37, 0x1e, 0x01, 0x4c, 0x90, 0x45,
- 0x00, 0x6c, 0xc3, 0x85, 0x66, 0xc6, 0x11, 0xa5, 0x01, 0x5b, 0x91, 0x44,
- 0x00, 0x7a, 0x43, 0x85, 0x90, 0xc3, 0x12, 0xec, 0x01, 0x48, 0xb3, 0x03,
- 0x85, 0x96, 0xd2, 0x06, 0x55, 0x01, 0x5f, 0x70, 0xcf, 0x62, 0x72, 0x01,
- 0x4b, 0x69, 0x46, 0x00, 0x95, 0xc3, 0x85, 0x9c, 0xc6, 0x11, 0xa5, 0x01,
- 0x4a, 0xb9, 0xc8, 0xb2, 0xf2, 0x01, 0x4a, 0xf8, 0x46, 0x00, 0x95, 0xc3,
- 0x85, 0xa2, 0xc8, 0xb2, 0xf2, 0x01, 0x4a, 0xd9, 0xc6, 0x11, 0xa5, 0x01,
- 0x4a, 0x98, 0xcf, 0x2c, 0x05, 0x01, 0x48, 0xa1, 0xd6, 0x2d, 0x07, 0x01,
- 0x48, 0xa8, 0xc2, 0x00, 0xda, 0x00, 0x70, 0x11, 0xc3, 0x01, 0xc1, 0x00,
- 0x70, 0x19, 0xc3, 0x4c, 0x27, 0x00, 0x70, 0x21, 0xc2, 0x00, 0xb7, 0x00,
- 0x70, 0x28, 0xc3, 0x93, 0xe1, 0x00, 0x72, 0x19, 0xc4, 0xa6, 0x6a, 0x00,
- 0x72, 0x20, 0x87, 0x00, 0x71, 0xb8, 0x03, 0xc3, 0x85, 0xaa, 0xc3, 0x3f,
- 0x7b, 0x00, 0x70, 0xb1, 0xc3, 0x02, 0xa8, 0x00, 0x70, 0xc0, 0xc3, 0x3f,
- 0x7b, 0x00, 0x70, 0xe1, 0xc2, 0x00, 0x57, 0x00, 0x70, 0xf0, 0xc2, 0x00,
- 0xf6, 0x00, 0x72, 0x49, 0xc2, 0x00, 0x5b, 0x00, 0x72, 0x50, 0xc5, 0xdc,
- 0xfa, 0x00, 0x44, 0xd1, 0xc6, 0xce, 0x4d, 0x00, 0x44, 0xc8, 0xc3, 0x1d,
- 0xf6, 0x00, 0x46, 0xe9, 0x8a, 0x00, 0x46, 0x60, 0xc6, 0xce, 0x2f, 0x00,
- 0x46, 0xe1, 0xc7, 0xca, 0xda, 0x00, 0x46, 0xd9, 0xcb, 0x8f, 0x61, 0x00,
- 0x46, 0xd1, 0xc5, 0xd7, 0xb9, 0x00, 0x46, 0xa1, 0xc5, 0xdb, 0x1a, 0x00,
- 0x44, 0xc0, 0xca, 0x9e, 0x24, 0x00, 0x30, 0xb1, 0xcc, 0x86, 0x74, 0x00,
- 0x30, 0xb0, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0xb1, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0x40, 0x44, 0x19, 0xa7, 0xc3, 0x85, 0xb4, 0xce, 0x43, 0xed, 0x07,
- 0xed, 0x29, 0xd7, 0x2b, 0x06, 0x07, 0xed, 0x38, 0xcc, 0x05, 0x7b, 0x07,
- 0xe0, 0xa9, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0x38, 0xd7, 0x2b, 0x06, 0x07,
- 0xed, 0x31, 0xce, 0x43, 0xed, 0x07, 0xed, 0xf0, 0xcc, 0x05, 0x7b, 0x07,
- 0xe0, 0xc1, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0x50, 0xce, 0x43, 0xed, 0x07,
- 0xea, 0xd1, 0xd7, 0x2b, 0x06, 0x07, 0xea, 0xd8, 0xcc, 0x05, 0x7b, 0x07,
- 0xe0, 0xb9, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0x48, 0xcc, 0x05, 0x7b, 0x07,
- 0xe2, 0x91, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0xc0, 0xd1, 0x2d, 0xfe, 0x07,
- 0xec, 0x99, 0xd1, 0x55, 0x4c, 0x07, 0xec, 0xa0, 0xcd, 0x05, 0x7a, 0x07,
- 0xe7, 0xf1, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0xd0, 0x43, 0x2e, 0x61, 0xc3,
- 0x85, 0xc0, 0x43, 0x02, 0x98, 0x43, 0x85, 0xcc, 0xcb, 0x66, 0x54, 0x07,
- 0xe7, 0x49, 0xca, 0x2b, 0x13, 0x07, 0xe9, 0x41, 0x0b, 0xc3, 0x85, 0xe2,
- 0x45, 0x00, 0x6c, 0x43, 0x85, 0xee, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0xc9,
- 0xcd, 0x05, 0x7a, 0x07, 0xe7, 0xe8, 0xca, 0x2b, 0x13, 0x07, 0xe9, 0x29,
- 0x0b, 0xc3, 0x85, 0xfa, 0xd3, 0x43, 0xe8, 0x07, 0xeb, 0x49, 0xcb, 0x66,
- 0x54, 0x07, 0xe9, 0xb8, 0xca, 0x2b, 0x13, 0x07, 0xe9, 0x39, 0x0b, 0xc3,
- 0x86, 0x06, 0xcb, 0x66, 0x54, 0x07, 0xe9, 0xc8, 0xca, 0x2b, 0x13, 0x07,
- 0xe9, 0x49, 0xcd, 0x05, 0x7a, 0x07, 0xe8, 0x68, 0x00, 0xc3, 0x86, 0x12,
- 0xd1, 0x53, 0x1b, 0x07, 0xe2, 0xf8, 0x00, 0xc3, 0x86, 0x1e, 0xd1, 0x53,
- 0x1b, 0x07, 0xe2, 0xf0, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x91, 0xcd, 0x05,
- 0x7a, 0x07, 0xe3, 0x00, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0xf9, 0xcb, 0x12,
- 0x31, 0x07, 0xe5, 0x80, 0x44, 0x19, 0xa7, 0xc3, 0x86, 0x2a, 0xd1, 0x2d,
- 0xfe, 0x07, 0xeb, 0x09, 0x45, 0x19, 0x9d, 0x43, 0x86, 0x36, 0xcc, 0x05,
- 0x7b, 0x07, 0xe0, 0xf1, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0x78, 0xd7, 0x2b,
- 0x06, 0x07, 0xed, 0x41, 0xce, 0x43, 0xed, 0x07, 0xee, 0x30, 0x0b, 0xc3,
- 0x86, 0x42, 0xcb, 0x66, 0x54, 0x07, 0xe9, 0xa9, 0xd6, 0x2d, 0xf9, 0x07,
- 0xea, 0xe0, 0xcc, 0x12, 0x30, 0x07, 0xe9, 0x89, 0xcb, 0x66, 0x54, 0x07,
- 0xe7, 0x40, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0xe1, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0x68, 0xd0, 0x51, 0xea, 0x07, 0xea, 0xe9, 0xd7, 0x2b, 0x06, 0x07,
- 0xea, 0xf0, 0x0b, 0xc3, 0x86, 0x4e, 0x4a, 0x73, 0x4d, 0x43, 0x86, 0x5a,
- 0x0b, 0xc3, 0x86, 0x66, 0x45, 0x00, 0x6c, 0x43, 0x86, 0x72, 0xcd, 0x05,
- 0x7a, 0x07, 0xe8, 0x79, 0xca, 0x2b, 0x13, 0x07, 0xe9, 0x58, 0xca, 0x2b,
- 0x13, 0x07, 0xe9, 0x09, 0xcd, 0x05, 0x7a, 0x07, 0xe8, 0x28, 0xca, 0x2b,
- 0x13, 0x07, 0xe9, 0x11, 0xcd, 0x05, 0x7a, 0x07, 0xe8, 0x30, 0x43, 0x14,
- 0x4e, 0xc3, 0x86, 0x7e, 0x00, 0x43, 0x86, 0x88, 0xcd, 0x7f, 0xec, 0x07,
- 0xee, 0x79, 0xcf, 0x2d, 0xa8, 0x07, 0xef, 0xa8, 0xcc, 0x05, 0x7b, 0x07,
- 0xe1, 0x51, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0xd8, 0xce, 0x43, 0xed, 0x07,
- 0xed, 0xb1, 0x45, 0x19, 0x9d, 0xc3, 0x86, 0x94, 0xd7, 0x2b, 0x06, 0x07,
- 0xeb, 0xc0, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x49, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0xd0, 0xca, 0x2b, 0x13, 0x07, 0xeb, 0xa9, 0xcc, 0x12, 0x30, 0x07,
- 0xee, 0x20, 0xcd, 0x05, 0x7a, 0x07, 0xe2, 0xe9, 0xca, 0x2b, 0x13, 0x07,
- 0xe4, 0x80, 0xca, 0x2b, 0x13, 0x07, 0xe9, 0xe1, 0xcd, 0x05, 0x7a, 0x07,
- 0xe9, 0xe8, 0x49, 0x8b, 0xb6, 0xc3, 0x86, 0xa0, 0x0f, 0x43, 0x86, 0xaa,
- 0xcd, 0x05, 0x7a, 0x07, 0xe7, 0xb1, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0x90,
- 0xcd, 0x05, 0x7a, 0x07, 0xe7, 0xa9, 0xca, 0x2b, 0x13, 0x07, 0xe8, 0x88,
- 0x0b, 0xc3, 0x86, 0xb6, 0xcb, 0x66, 0x54, 0x07, 0xe9, 0xd1, 0x45, 0x00,
- 0x6c, 0x43, 0x86, 0xc2, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x31, 0xcb, 0x12,
- 0x31, 0x07, 0xe5, 0xc0, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0xd9, 0xcd, 0x05,
- 0x7a, 0x07, 0xe0, 0xa0, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0xd1, 0xcd, 0x05,
- 0x7a, 0x07, 0xe0, 0x98, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0xc1, 0x0b, 0xc3,
- 0x86, 0xd4, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x28, 0xcc, 0x05, 0x7b, 0x07,
- 0xe0, 0x71, 0xcb, 0x12, 0x31, 0x07, 0xe5, 0x20, 0xd1, 0x2d, 0xfe, 0x07,
- 0xea, 0xa9, 0xd0, 0x51, 0xea, 0x07, 0xea, 0xb1, 0xd1, 0x51, 0xe9, 0x07,
- 0xea, 0xb9, 0xce, 0x43, 0xed, 0x07, 0xed, 0x19, 0xd7, 0x2b, 0x06, 0x07,
- 0xed, 0x20, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0x69, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0x18, 0xd1, 0x55, 0x4c, 0x07, 0xea, 0xa1, 0xce, 0x43, 0xed, 0x07,
- 0xed, 0x09, 0xd7, 0x2b, 0x06, 0x07, 0xed, 0x10, 0x0b, 0xc3, 0x86, 0xe0,
- 0x45, 0x00, 0x6c, 0x43, 0x86, 0xec, 0xcc, 0x12, 0x30, 0x07, 0xe5, 0x29,
- 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x20, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0x59,
- 0xcb, 0x12, 0x31, 0x07, 0xe5, 0x08, 0xd1, 0x55, 0x4c, 0x07, 0xea, 0x81,
- 0xce, 0x43, 0xed, 0x07, 0xec, 0xf9, 0xd7, 0x2b, 0x06, 0x07, 0xed, 0x00,
- 0x1b, 0xc3, 0x86, 0xf8, 0x03, 0xc3, 0x87, 0x04, 0xcf, 0x62, 0x81, 0x07,
- 0xe3, 0x39, 0x45, 0x19, 0x9d, 0xc3, 0x87, 0x10, 0xcf, 0x69, 0x5c, 0x07,
- 0xe3, 0x29, 0xce, 0x6f, 0x91, 0x07, 0xe3, 0x21, 0x0a, 0xc3, 0x87, 0x20,
- 0x46, 0x2d, 0xfe, 0xc3, 0x87, 0x2c, 0x42, 0x00, 0x55, 0xc3, 0x87, 0x38,
- 0x43, 0x9b, 0x1f, 0xc3, 0x87, 0x42, 0x42, 0x01, 0x33, 0xc3, 0x87, 0x4e,
- 0x44, 0xe0, 0xab, 0xc3, 0x87, 0x5a, 0xd1, 0x51, 0xe9, 0x07, 0xe4, 0xc8,
- 0x0b, 0xc3, 0x87, 0x66, 0xd3, 0x43, 0xe8, 0x07, 0xed, 0x70, 0xca, 0x2b,
- 0x13, 0x07, 0xec, 0xe1, 0xcc, 0x12, 0x30, 0x07, 0xec, 0xe8, 0xcc, 0x05,
- 0x7b, 0x07, 0xe2, 0x61, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x98, 0xd1, 0x55,
- 0x4c, 0x07, 0xec, 0xa9, 0xd7, 0x2b, 0x06, 0x07, 0xec, 0xb1, 0xce, 0x43,
- 0xed, 0x07, 0xed, 0x98, 0xcc, 0x12, 0x30, 0x07, 0xed, 0xc1, 0xca, 0x2b,
- 0x13, 0x07, 0xed, 0xe8, 0xca, 0x2b, 0x13, 0x07, 0xec, 0xb9, 0xcc, 0x12,
- 0x30, 0x07, 0xec, 0xc0, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0xe1, 0xcb, 0x12,
- 0x31, 0x07, 0xe6, 0x40, 0x45, 0x19, 0x9d, 0xc3, 0x87, 0x72, 0xce, 0x43,
- 0xed, 0x07, 0xed, 0xb8, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0xd9, 0xcb, 0x12,
- 0x31, 0x07, 0xe6, 0x38, 0xca, 0x2b, 0x13, 0x07, 0xe4, 0x19, 0xcd, 0x05,
- 0x7a, 0x07, 0xe1, 0xe8, 0xcd, 0x05, 0x7a, 0x07, 0xf7, 0xa9, 0xca, 0x2b,
- 0x13, 0x07, 0xf7, 0xb0, 0x46, 0x08, 0x74, 0xc3, 0x87, 0x7e, 0x46, 0x00,
- 0x95, 0x43, 0x87, 0x8a, 0xca, 0x2b, 0x13, 0x07, 0xec, 0x39, 0xcc, 0x12,
- 0x30, 0x07, 0xec, 0x40, 0xcc, 0x05, 0x7b, 0x07, 0xe2, 0x01, 0xcb, 0x12,
- 0x31, 0x07, 0xe6, 0x50, 0x45, 0x19, 0x9d, 0xc3, 0x87, 0x96, 0xce, 0x43,
- 0xed, 0x07, 0xec, 0x09, 0xd7, 0x2b, 0x06, 0x07, 0xec, 0x10, 0xca, 0x2b,
- 0x13, 0x07, 0xec, 0x21, 0xcc, 0x12, 0x30, 0x07, 0xec, 0x18, 0xcc, 0x12,
- 0x30, 0x07, 0xed, 0xd1, 0xca, 0x2b, 0x13, 0x07, 0xed, 0xe0, 0xca, 0x2b,
- 0x13, 0x07, 0xe3, 0xf9, 0xcd, 0x05, 0x7a, 0x07, 0xe1, 0xb0, 0xca, 0x2b,
- 0x13, 0x07, 0xe3, 0xf1, 0xcd, 0x05, 0x7a, 0x07, 0xe1, 0xa8, 0x0b, 0xc3,
- 0x87, 0xa2, 0x45, 0x00, 0x6c, 0x43, 0x87, 0xae, 0xcc, 0x05, 0x7b, 0x07,
- 0xe1, 0x99, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x10, 0xcc, 0x05, 0x7b, 0x07,
- 0xe0, 0x41, 0xcb, 0x12, 0x31, 0x07, 0xe4, 0xf8, 0xcc, 0x05, 0x7b, 0x07,
- 0xe0, 0x39, 0xcb, 0x12, 0x31, 0x07, 0xe4, 0xf0, 0x0b, 0xc3, 0x87, 0xc0,
- 0xd3, 0x43, 0xe8, 0x07, 0xee, 0x10, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x11,
- 0xcc, 0x12, 0x30, 0x07, 0xe5, 0x00, 0x8f, 0x07, 0xea, 0x1b, 0x03, 0x87,
- 0xcc, 0xc3, 0x3a, 0x96, 0x07, 0xea, 0x28, 0xcc, 0x05, 0x7b, 0x07, 0xe2,
- 0x41, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x88, 0xcc, 0x05, 0x7b, 0x07, 0xe2,
- 0x39, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x80, 0xd1, 0x2d, 0xfe, 0x07, 0xec,
- 0x71, 0xd1, 0x55, 0x4c, 0x07, 0xec, 0x79, 0xce, 0x43, 0xed, 0x07, 0xed,
- 0xc8, 0xcc, 0x05, 0x7b, 0x07, 0xe2, 0x31, 0xcb, 0x12, 0x31, 0x07, 0xe6,
- 0x78, 0xd1, 0x2d, 0xfe, 0x07, 0xec, 0x49, 0xd1, 0x55, 0x4c, 0x07, 0xec,
- 0x51, 0xce, 0x43, 0xed, 0x07, 0xec, 0x58, 0xcc, 0x05, 0x7b, 0x07, 0xe2,
- 0x29, 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x70, 0xd0, 0x51, 0xea, 0x07, 0xec,
- 0x61, 0xd1, 0x55, 0x4c, 0x07, 0xec, 0x69, 0xce, 0x43, 0xed, 0x07, 0xee,
- 0x01, 0xd1, 0x51, 0xe9, 0x07, 0xec, 0x90, 0xcb, 0x66, 0x54, 0x07, 0xdf,
- 0xf9, 0x0b, 0xc3, 0x87, 0xd2, 0xca, 0x2b, 0x13, 0x07, 0xdf, 0xe9, 0x45,
- 0x00, 0x6c, 0x43, 0x87, 0xde, 0x45, 0x00, 0x6c, 0xc3, 0x87, 0xee, 0x0b,
- 0xc3, 0x87, 0xf8, 0xca, 0x2b, 0x13, 0x07, 0xf6, 0x91, 0xcb, 0x66, 0x54,
- 0x07, 0xf6, 0xa0, 0x45, 0x00, 0x6c, 0xc3, 0x88, 0x04, 0x0b, 0xc3, 0x88,
- 0x10, 0xca, 0x2b, 0x13, 0x07, 0xf6, 0x71, 0xcb, 0x66, 0x54, 0x07, 0xf6,
- 0x80, 0x45, 0x00, 0x6c, 0xc3, 0x88, 0x1c, 0xcb, 0x66, 0x54, 0x07, 0xdc,
- 0xa9, 0x0b, 0xc3, 0x88, 0x2c, 0xca, 0x2b, 0x13, 0x07, 0xdc, 0x98, 0xcb,
- 0x66, 0x54, 0x07, 0xdc, 0xc9, 0x0b, 0xc3, 0x88, 0x38, 0xca, 0x2b, 0x13,
- 0x07, 0xdc, 0xb8, 0x45, 0x00, 0x6c, 0xc3, 0x88, 0x44, 0x0b, 0xc3, 0x88,
- 0x5c, 0xca, 0x2b, 0x13, 0x07, 0xf6, 0xf1, 0xcb, 0x66, 0x54, 0x07, 0xf7,
- 0x00, 0x46, 0x01, 0x15, 0xc3, 0x88, 0x68, 0x0b, 0xc3, 0x88, 0x74, 0xca,
- 0x2b, 0x13, 0x07, 0xf4, 0xf1, 0xcb, 0x66, 0x54, 0x07, 0xf5, 0x00, 0xca,
- 0x2b, 0x13, 0x07, 0xdc, 0x59, 0xcd, 0x05, 0x7a, 0x07, 0xdc, 0x50, 0xd6,
- 0x2d, 0x75, 0x00, 0x46, 0x20, 0x46, 0x01, 0x15, 0xc3, 0x88, 0x80, 0xcb,
- 0x66, 0x54, 0x07, 0xf6, 0x61, 0x0b, 0xc3, 0x88, 0x8c, 0xca, 0x2b, 0x13,
- 0x07, 0xf6, 0x50, 0x19, 0xc3, 0x88, 0x98, 0xc7, 0x08, 0x1f, 0x00, 0x32,
- 0x4b, 0x03, 0x88, 0xa7, 0xcd, 0x05, 0x7a, 0x07, 0xf4, 0x69, 0xca, 0x2b,
- 0x13, 0x07, 0xf4, 0x70, 0x45, 0x00, 0x6c, 0xc3, 0x88, 0xab, 0xcb, 0x66,
- 0x54, 0x07, 0xdc, 0x89, 0x0b, 0xc3, 0x88, 0xbb, 0xca, 0x2b, 0x13, 0x07,
- 0xdc, 0x78, 0x00, 0x43, 0x88, 0xc7, 0x00, 0x43, 0x88, 0xdd, 0x00, 0x43,
- 0x88, 0xe9, 0x0b, 0xc3, 0x88, 0xf5, 0xca, 0x2b, 0x13, 0x07, 0xf5, 0x31,
- 0xcb, 0x66, 0x54, 0x07, 0xf5, 0x40, 0x45, 0x00, 0x6c, 0xc3, 0x89, 0x01,
- 0xcb, 0x66, 0x54, 0x07, 0xdb, 0xe9, 0x0b, 0xc3, 0x89, 0x0d, 0xca, 0x2b,
- 0x13, 0x07, 0xdb, 0xd8, 0x00, 0x43, 0x89, 0x19, 0xcc, 0x8d, 0x64, 0x00,
- 0x46, 0x01, 0xcb, 0x66, 0x54, 0x07, 0xdb, 0x49, 0x0b, 0xc3, 0x89, 0x29,
- 0xca, 0x2b, 0x13, 0x07, 0xdb, 0x38, 0x00, 0x43, 0x89, 0x35, 0x45, 0x00,
- 0x6c, 0xc3, 0x89, 0x45, 0x0f, 0xc3, 0x89, 0x57, 0x0b, 0xc3, 0x89, 0x66,
- 0xca, 0x2b, 0x13, 0x07, 0xf4, 0xb0, 0x00, 0x43, 0x89, 0x72, 0x45, 0x00,
- 0x6c, 0xc3, 0x89, 0x82, 0x0b, 0xc3, 0x89, 0x8c, 0xca, 0x2b, 0x13, 0x07,
- 0xf6, 0x11, 0xcb, 0x66, 0x54, 0x07, 0xf6, 0x20, 0x00, 0x43, 0x89, 0x98,
- 0x00, 0x43, 0x89, 0xa4, 0x98, 0x00, 0x45, 0xf1, 0xca, 0xa7, 0x98, 0x00,
- 0x45, 0xb8, 0xcb, 0x12, 0x31, 0x07, 0xda, 0xc1, 0xcc, 0x05, 0x7b, 0x07,
- 0xda, 0xb0, 0xcb, 0x66, 0x54, 0x07, 0xdb, 0x89, 0x0b, 0xc3, 0x89, 0xb4,
- 0xca, 0x2b, 0x13, 0x07, 0xdb, 0x78, 0x45, 0x00, 0x6c, 0xc3, 0x89, 0xc0,
- 0xc6, 0x17, 0xba, 0x00, 0x36, 0x93, 0x03, 0x89, 0xd3, 0x0b, 0xc3, 0x89,
- 0xd7, 0xca, 0x2b, 0x13, 0x07, 0xf7, 0x91, 0xcb, 0x66, 0x54, 0x07, 0xf7,
- 0xa0, 0xca, 0x2b, 0x13, 0x07, 0xde, 0xe1, 0xcd, 0x05, 0x7a, 0x07, 0xde,
- 0xd8, 0x45, 0x00, 0x6c, 0xc3, 0x89, 0xe3, 0xcd, 0x05, 0x7a, 0x07, 0xf5,
- 0x69, 0xca, 0x2b, 0x13, 0x07, 0xf5, 0x70, 0xcb, 0x66, 0x54, 0x07, 0xdd,
- 0x19, 0x0b, 0xc3, 0x8a, 0x14, 0xca, 0x2b, 0x13, 0x07, 0xdd, 0x08, 0xca,
- 0x2b, 0x13, 0x07, 0xdc, 0x69, 0xcd, 0x05, 0x7a, 0x07, 0xdc, 0x60, 0x45,
- 0x00, 0x6c, 0xc3, 0x8a, 0x20, 0x0b, 0xc3, 0x8a, 0x3c, 0xca, 0x2b, 0x13,
- 0x07, 0xf4, 0x81, 0xcb, 0x66, 0x54, 0x07, 0xf4, 0x90, 0x00, 0x43, 0x8a,
- 0x48, 0xcb, 0x66, 0x54, 0x07, 0xda, 0xa9, 0x0b, 0xc3, 0x8a, 0x58, 0xca,
- 0x2b, 0x13, 0x07, 0xda, 0x98, 0xcb, 0x66, 0x54, 0x07, 0xdf, 0x49, 0xcc,
- 0x12, 0x30, 0x07, 0xdf, 0x40, 0xce, 0x05, 0x79, 0x07, 0xde, 0xe8, 0x44,
- 0x01, 0xb8, 0xc3, 0x8a, 0x64, 0xd0, 0x0e, 0xba, 0x00, 0x35, 0x40, 0xcb,
- 0x12, 0x31, 0x07, 0xf6, 0xb9, 0xcc, 0x05, 0x7b, 0x07, 0xf6, 0xa8, 0xcb,
- 0x12, 0x31, 0x07, 0xdf, 0x31, 0xcc, 0x05, 0x7b, 0x07, 0xdf, 0x20, 0xd5,
- 0x36, 0x85, 0x00, 0x45, 0x91, 0xcd, 0x05, 0x7a, 0x07, 0xf5, 0x79, 0xca,
- 0x2b, 0x13, 0x07, 0xf5, 0x80, 0x0b, 0xc3, 0x8a, 0x73, 0xca, 0x2b, 0x13,
- 0x07, 0xf6, 0x31, 0xcb, 0x66, 0x54, 0x07, 0xf6, 0x40, 0x46, 0x01, 0x15,
- 0xc3, 0x8a, 0x7f, 0x0b, 0xc3, 0x8a, 0x8b, 0xca, 0x2b, 0x13, 0x07, 0xf5,
- 0xd1, 0xcb, 0x66, 0x54, 0x07, 0xf5, 0xe0, 0xce, 0x70, 0x63, 0x00, 0x37,
- 0xd1, 0x0b, 0xc3, 0x8a, 0x97, 0xca, 0x2b, 0x13, 0x07, 0xf5, 0xb1, 0xcb,
- 0x66, 0x54, 0x07, 0xf5, 0xc0, 0x45, 0x00, 0x6c, 0xc3, 0x8a, 0xa3, 0x0b,
- 0xc3, 0x8a, 0xc5, 0xca, 0x2b, 0x13, 0x07, 0xf5, 0x91, 0xcb, 0x66, 0x54,
- 0x07, 0xf5, 0xa0, 0x00, 0x43, 0x8a, 0xd1, 0x00, 0x43, 0x8a, 0xe3, 0x00,
- 0x43, 0x8a, 0xef, 0x00, 0x43, 0x8b, 0x05, 0x00, 0x43, 0x8b, 0x11, 0xca,
- 0x2b, 0x13, 0x07, 0xdc, 0x39, 0xcd, 0x05, 0x7a, 0x07, 0xdc, 0x30, 0xcb,
- 0x66, 0x54, 0x07, 0xdb, 0xa9, 0x0b, 0xc3, 0x8b, 0x1d, 0xca, 0x2b, 0x13,
- 0x07, 0xdb, 0x98, 0xcb, 0x66, 0x54, 0x07, 0xdb, 0x69, 0x0b, 0xc3, 0x8b,
- 0x29, 0xca, 0x2b, 0x13, 0x07, 0xdb, 0x58, 0x44, 0x01, 0xb8, 0xc3, 0x8b,
- 0x35, 0xce, 0x1d, 0x9e, 0x00, 0x36, 0x51, 0xc4, 0x00, 0x5b, 0x00, 0x36,
- 0x21, 0xcb, 0x09, 0x89, 0x00, 0x31, 0x23, 0x03, 0x8b, 0x41, 0x5d, 0x11,
- 0xe5, 0x43, 0x8b, 0x45, 0x45, 0x00, 0x6c, 0xc3, 0x8b, 0x51, 0x0b, 0xc3,
- 0x8b, 0x5d, 0xca, 0x2b, 0x13, 0x07, 0xf7, 0x11, 0xcb, 0x66, 0x54, 0x07,
- 0xf7, 0x20, 0xcb, 0x66, 0x54, 0x07, 0xde, 0xb1, 0x0b, 0xc3, 0x8b, 0x69,
- 0xca, 0x2b, 0x13, 0x07, 0xde, 0xa0, 0x00, 0x43, 0x8b, 0x75, 0x45, 0x00,
- 0x6c, 0xc3, 0x8b, 0x85, 0xc6, 0x3a, 0x93, 0x00, 0x35, 0xd3, 0x03, 0x8b,
- 0xa1, 0x0b, 0xc3, 0x8b, 0xa5, 0xca, 0x2b, 0x13, 0x07, 0xf7, 0x31, 0xcb,
- 0x66, 0x54, 0x07, 0xf7, 0x40, 0xcb, 0x66, 0x54, 0x07, 0xdb, 0xc9, 0x0b,
- 0xc3, 0x8b, 0xb1, 0xca, 0x2b, 0x13, 0x07, 0xdb, 0xb8, 0x00, 0x43, 0x8b,
- 0xbd, 0xce, 0x05, 0x79, 0x07, 0xf4, 0x00, 0xcb, 0x97, 0xf9, 0x00, 0x35,
- 0xf3, 0x03, 0x8b, 0xd3, 0xc4, 0xe1, 0xe3, 0x00, 0x36, 0x0b, 0x03, 0x8b,
- 0xd7, 0x45, 0x00, 0x6c, 0xc3, 0x8b, 0xdb, 0x0b, 0xc3, 0x8b, 0xea, 0xca,
- 0x2b, 0x13, 0x07, 0xf7, 0x51, 0xcb, 0x66, 0x54, 0x07, 0xf7, 0x60, 0xc3,
- 0x2e, 0x60, 0x00, 0x33, 0xc1, 0xc4, 0x08, 0x1a, 0x00, 0x33, 0xa9, 0xc3,
- 0x78, 0xa9, 0x00, 0x33, 0xb0, 0xc2, 0x0d, 0xf7, 0x0f, 0x75, 0xa9, 0xc2,
- 0x02, 0x98, 0x0f, 0x75, 0x41, 0x0a, 0x43, 0x8b, 0xf6, 0xc4, 0xe0, 0xfb,
- 0x0f, 0x75, 0xa1, 0xc2, 0x01, 0xf2, 0x0f, 0x75, 0x89, 0xc3, 0x00, 0x4c,
- 0x0f, 0x75, 0x70, 0xc2, 0x00, 0x34, 0x0f, 0x75, 0x31, 0x8a, 0x0f, 0x75,
- 0xd0, 0x8e, 0x0f, 0x75, 0x19, 0x86, 0x0f, 0x75, 0xc8, 0xc3, 0x00, 0x4c,
- 0x0f, 0x72, 0x71, 0xc2, 0x01, 0xf2, 0x0f, 0x72, 0x89, 0xc4, 0xe0, 0xfb,
- 0x0f, 0x72, 0xa0, 0xc2, 0x01, 0xf2, 0x0f, 0x72, 0xc9, 0x47, 0x3b, 0xb1,
- 0x43, 0x8c, 0x02, 0xc2, 0x0d, 0xf7, 0x0f, 0x74, 0xb1, 0xc2, 0x00, 0x92,
- 0x0f, 0x74, 0xc0, 0xc3, 0x88, 0x60, 0x0f, 0x73, 0xe1, 0xc3, 0xb2, 0x7c,
- 0x0f, 0x73, 0xf0, 0x4b, 0x13, 0x1e, 0xc3, 0x8c, 0x0e, 0xcc, 0x02, 0x1b,
- 0x0f, 0xdd, 0x18, 0xdc, 0x13, 0x1e, 0x0f, 0xdd, 0x3b, 0x03, 0x8c, 0x14,
- 0xcc, 0x02, 0x1b, 0x0f, 0xdd, 0x12, 0x03, 0x8c, 0x1a, 0xc4, 0x01, 0x1e,
- 0x0f, 0xdd, 0x03, 0x03, 0x8c, 0x20, 0xc5, 0x01, 0xf7, 0x0f, 0xdd, 0x0a,
- 0x03, 0x8c, 0x24, 0xca, 0x00, 0xf6, 0x01, 0x29, 0x61, 0xc4, 0x01, 0x1e,
- 0x01, 0x28, 0x81, 0xc5, 0x01, 0xf7, 0x01, 0x28, 0x60, 0x16, 0xc3, 0x8c,
- 0x28, 0xd2, 0x47, 0x9a, 0x0f, 0xd0, 0x39, 0xce, 0x29, 0x88, 0x0f, 0xd0,
- 0x99, 0xdf, 0x0c, 0x47, 0x0f, 0xd0, 0xe0, 0xc5, 0xb5, 0xaf, 0x0f, 0xd2,
- 0x89, 0xc4, 0xe0, 0xaf, 0x0f, 0xd2, 0x91, 0xc6, 0xd1, 0xf5, 0x0f, 0xd2,
- 0x98, 0xce, 0x29, 0x88, 0x0f, 0xd0, 0x79, 0xdb, 0x18, 0x76, 0x0f, 0xd1,
- 0xc8, 0x44, 0x1e, 0x5b, 0xc3, 0x8c, 0x34, 0xc5, 0xc4, 0xc1, 0x0f, 0xaf,
- 0x98, 0x17, 0xc3, 0x8c, 0x40, 0x96, 0x0b, 0x4d, 0xd0, 0x9a, 0x0b, 0x4f,
- 0x31, 0xc2, 0x0f, 0xf5, 0x0b, 0x4c, 0xd0, 0x83, 0x0b, 0x4b, 0x9b, 0x03,
- 0x8c, 0x4e, 0x17, 0xc3, 0x8c, 0x54, 0x42, 0x11, 0x70, 0x43, 0x8c, 0x5c,
- 0x96, 0x0b, 0x4f, 0x88, 0x17, 0xc3, 0x8c, 0x66, 0x07, 0x43, 0x8c, 0x76,
- 0x93, 0x0b, 0x4c, 0x01, 0x92, 0x0b, 0x4b, 0xe8, 0x42, 0x00, 0xae, 0xc3,
- 0x8c, 0x85, 0x92, 0x0b, 0x4b, 0x30, 0xc2, 0x59, 0xcb, 0x0b, 0x4d, 0x81,
- 0x93, 0x0b, 0x4c, 0x70, 0xc2, 0x00, 0x11, 0x0b, 0x4b, 0x79, 0x87, 0x0b,
- 0x4c, 0x08, 0x87, 0x0b, 0x4e, 0xa3, 0x03, 0x8c, 0x91, 0xc2, 0xd0, 0x6a,
- 0x0b, 0x4c, 0x18, 0x93, 0x0b, 0x4d, 0x08, 0x90, 0x0b, 0x4b, 0x38, 0xc3,
- 0x85, 0x09, 0x0b, 0x4c, 0xe0, 0xc2, 0x0f, 0xf5, 0x0b, 0x4c, 0xc8, 0x87,
- 0x0b, 0x4b, 0x89, 0x93, 0x0b, 0x4e, 0x50, 0x8f, 0x0b, 0x4b, 0xc0, 0xc5,
- 0xdd, 0x3b, 0x0b, 0x4e, 0xd1, 0xc5, 0xd8, 0x27, 0x0b, 0x4e, 0x88, 0x96,
- 0x0b, 0x4e, 0x69, 0xc2, 0x00, 0x8f, 0x0b, 0x4d, 0x88, 0x9a, 0x0b, 0x4f,
- 0x39, 0x96, 0x0b, 0x4d, 0xe8, 0x93, 0x0b, 0x4f, 0xa0, 0x90, 0x0b, 0x4b,
- 0x59, 0x96, 0x0b, 0x4c, 0x60, 0x8f, 0x0b, 0x4b, 0xf0, 0xc6, 0xce, 0x5f,
- 0x0b, 0x4f, 0xa9, 0xc4, 0x08, 0x6e, 0x0b, 0x4e, 0x91, 0x8b, 0x0b, 0x4e,
- 0x40, 0x96, 0x0b, 0x4e, 0x20, 0x96, 0x0b, 0x4e, 0x78, 0xc3, 0xc8, 0xdd,
- 0x0b, 0x4a, 0x29, 0x03, 0xc3, 0x8c, 0x97, 0xc3, 0xdd, 0xec, 0x0b, 0x49,
- 0xd9, 0xc4, 0xc2, 0xda, 0x0b, 0x49, 0x98, 0xc3, 0x8e, 0x2c, 0x0b, 0x49,
- 0xe1, 0xc3, 0x19, 0x4c, 0x0b, 0x48, 0x99, 0x42, 0x11, 0x70, 0xc3, 0x8c,
- 0xa4, 0xc2, 0x03, 0xa5, 0x0b, 0x47, 0xf1, 0xc2, 0x01, 0xbd, 0x0b, 0x47,
- 0xe0, 0xc2, 0x00, 0x84, 0x0b, 0x4a, 0x31, 0xc2, 0x00, 0x4c, 0x0b, 0x47,
- 0xc0, 0x96, 0x0b, 0x49, 0x59, 0x92, 0x0b, 0x48, 0xf8, 0xc2, 0x06, 0x1f,
- 0x0b, 0x49, 0xc1, 0x87, 0x0b, 0x4a, 0xc8, 0x87, 0x0b, 0x48, 0xa9, 0xc2,
- 0xd0, 0x6a, 0x0b, 0x48, 0x48, 0xc3, 0x3c, 0x50, 0x0b, 0x48, 0x71, 0x96,
- 0x0b, 0x47, 0xb8, 0xc2, 0x00, 0x4c, 0x0b, 0x47, 0xa8, 0x8f, 0x0b, 0x4a,
- 0x21, 0xc3, 0x6d, 0xc2, 0x0b, 0x48, 0xb8, 0x90, 0x0b, 0x49, 0xf1, 0x96,
- 0x0b, 0x48, 0x58, 0xc6, 0x18, 0x81, 0x0b, 0x4b, 0x18, 0xc2, 0x0f, 0xf5,
- 0x0b, 0x49, 0x51, 0x96, 0x0b, 0x48, 0x40, 0x90, 0x0b, 0x47, 0xa0, 0x90,
- 0x0b, 0x4a, 0x09, 0xc3, 0x5e, 0xb1, 0x0b, 0x49, 0x19, 0x96, 0x0b, 0x48,
- 0x00, 0x92, 0x0b, 0x49, 0x61, 0x8f, 0x0b, 0x49, 0x31, 0xc8, 0xbe, 0x5d,
- 0x0b, 0x48, 0x79, 0xc7, 0xc3, 0x8d, 0x0b, 0x47, 0xf8, 0x17, 0xc3, 0x8c,
- 0xb0, 0x87, 0x0b, 0x47, 0xe8, 0x92, 0x0b, 0x49, 0xb1, 0x8f, 0x0b, 0x49,
- 0xa0, 0xc3, 0xcb, 0x93, 0x0b, 0x47, 0x49, 0xc7, 0xca, 0xf6, 0x0b, 0x47,
- 0x50, 0x8f, 0x0b, 0x47, 0x11, 0x15, 0xc3, 0x8c, 0xba, 0xc3, 0xe7, 0x8d,
- 0x0b, 0x45, 0x08, 0x97, 0x0b, 0x46, 0x53, 0x03, 0x8c, 0xc6, 0xc2, 0x00,
- 0x4d, 0x0b, 0x44, 0x98, 0xc2, 0x59, 0xcb, 0x0b, 0x44, 0xa9, 0xc9, 0xb4,
- 0xb3, 0x0b, 0x44, 0x78, 0xc2, 0xd0, 0x6a, 0x0b, 0x47, 0x29, 0xc3, 0xb1,
- 0xe2, 0x0b, 0x46, 0x40, 0x8f, 0x0b, 0x46, 0x79, 0xc2, 0x04, 0x4e, 0x0b,
- 0x46, 0x20, 0x92, 0x0b, 0x46, 0xd1, 0x8f, 0x0b, 0x46, 0xb8, 0x96, 0x0b,
- 0x45, 0xe9, 0xc5, 0xdc, 0xaf, 0x0b, 0x44, 0xa0, 0x90, 0x0b, 0x46, 0xb1,
- 0xc7, 0xc8, 0x48, 0x0b, 0x46, 0x38, 0x90, 0x0b, 0x46, 0xa1, 0xc5, 0xd8,
- 0x6d, 0x0b, 0x45, 0xc8, 0x42, 0x00, 0xae, 0xc3, 0x8c, 0xdc, 0xc3, 0x15,
- 0x1c, 0x0b, 0x46, 0xf8, 0x17, 0xc3, 0x8c, 0xe8, 0xc3, 0x0f, 0xf4, 0x0b,
- 0x46, 0x11, 0xc5, 0xd5, 0x93, 0x0b, 0x44, 0xb8, 0xc5, 0xd6, 0x1a, 0x0b,
- 0x45, 0xb9, 0x96, 0x0b, 0x45, 0x30, 0xc3, 0x3c, 0x50, 0x0b, 0x46, 0x61,
- 0x87, 0x0b, 0x45, 0x20, 0xc3, 0x8e, 0x9b, 0x0b, 0x46, 0xf1, 0xc2, 0x00,
- 0xcb, 0x0b, 0x46, 0x58, 0xc5, 0xda, 0x4d, 0x0b, 0x46, 0xc1, 0xc7, 0xc9,
- 0xb4, 0x0b, 0x45, 0x98, 0xc6, 0xd5, 0x0d, 0x0b, 0x43, 0xa9, 0xc3, 0x7b,
- 0x8b, 0x0b, 0x44, 0x51, 0xc3, 0x90, 0x0e, 0x0b, 0x43, 0xd2, 0x03, 0x8c,
- 0xf0, 0xc3, 0xe5, 0x3a, 0x0b, 0x44, 0x41, 0xc6, 0xd4, 0xfb, 0x0b, 0x44,
- 0x38, 0xc4, 0x2e, 0x22, 0x0b, 0x42, 0xf9, 0xc7, 0xca, 0x40, 0x0b, 0x42,
- 0xe0, 0xc3, 0x0f, 0xf4, 0x0b, 0x41, 0xf1, 0xca, 0xa5, 0x5e, 0x0b, 0x40,
- 0x40, 0x8f, 0x0b, 0x41, 0xb9, 0xc7, 0xc4, 0x9e, 0x0b, 0x40, 0x28, 0x8f,
- 0x0b, 0x42, 0x73, 0x03, 0x8c, 0xf6, 0xc2, 0x00, 0xcb, 0x0b, 0x42, 0x31,
- 0xc3, 0x15, 0x1c, 0x0b, 0x41, 0x91, 0xc4, 0x2f, 0x83, 0x0b, 0x40, 0xd0,
- 0xc3, 0x89, 0xda, 0x0b, 0x41, 0xb1, 0xc3, 0xe6, 0xac, 0x0b, 0x41, 0x30,
- 0xcc, 0x85, 0x00, 0x0b, 0x42, 0x08, 0xc5, 0xdf, 0x2a, 0x0b, 0x40, 0xb1,
- 0xc5, 0xb9, 0xa8, 0x0b, 0x40, 0x00, 0x00, 0x43, 0x8d, 0x08, 0x8f, 0x0b,
- 0x42, 0x61, 0xc3, 0x0f, 0xf4, 0x0b, 0x42, 0x10, 0xc2, 0x00, 0xde, 0x0b,
- 0x40, 0x51, 0xc5, 0xa9, 0xe3, 0x0b, 0x40, 0x48, 0xc2, 0x00, 0xde, 0x0b,
- 0x40, 0x19, 0xc5, 0xa9, 0xe3, 0x0b, 0x40, 0x10, 0xa2, 0x01, 0x40, 0xfb,
- 0x03, 0x8d, 0x14, 0xa3, 0x01, 0x41, 0x7b, 0x03, 0x8d, 0x26, 0xa5, 0x01,
- 0x44, 0x79, 0xa4, 0x01, 0x42, 0x7a, 0x03, 0x8d, 0x31, 0xa3, 0x01, 0x41,
- 0xbb, 0x03, 0x8d, 0x35, 0xa5, 0x01, 0x44, 0xb9, 0xa4, 0x01, 0x42, 0xba,
- 0x03, 0x8d, 0x40, 0xa5, 0x01, 0x45, 0x39, 0xa4, 0x01, 0x43, 0x3a, 0x03,
- 0x8d, 0x44, 0xa5, 0x01, 0x46, 0x38, 0xa3, 0x01, 0x41, 0xdb, 0x03, 0x8d,
- 0x48, 0xa5, 0x01, 0x44, 0xd9, 0xa4, 0x01, 0x42, 0xda, 0x03, 0x8d, 0x53,
- 0xa5, 0x01, 0x45, 0x59, 0xa4, 0x01, 0x43, 0x5a, 0x03, 0x8d, 0x57, 0xa5,
- 0x01, 0x46, 0x58, 0xa5, 0x01, 0x45, 0x99, 0xa4, 0x01, 0x43, 0x9a, 0x03,
- 0x8d, 0x5b, 0xa5, 0x01, 0x46, 0x98, 0xa5, 0x01, 0x47, 0x18, 0xa3, 0x01,
- 0x41, 0xeb, 0x03, 0x8d, 0x5f, 0xa5, 0x01, 0x44, 0xe9, 0xa4, 0x01, 0x42,
- 0xea, 0x03, 0x8d, 0x6a, 0xa5, 0x01, 0x45, 0x69, 0xa4, 0x01, 0x43, 0x6a,
- 0x03, 0x8d, 0x6e, 0xa5, 0x01, 0x46, 0x68, 0xa5, 0x01, 0x45, 0xa9, 0xa4,
- 0x01, 0x43, 0xaa, 0x03, 0x8d, 0x72, 0xa5, 0x01, 0x46, 0xa8, 0xa5, 0x01,
- 0x47, 0x28, 0xa5, 0x01, 0x45, 0xc9, 0xa4, 0x01, 0x43, 0xca, 0x03, 0x8d,
- 0x76, 0xa5, 0x01, 0x46, 0xc8, 0xa5, 0x01, 0x47, 0x48, 0xa5, 0x01, 0x47,
- 0x88, 0xa3, 0x01, 0x41, 0xf3, 0x03, 0x8d, 0x7a, 0xa5, 0x01, 0x44, 0xf1,
- 0xa4, 0x01, 0x42, 0xf2, 0x03, 0x8d, 0x85, 0xa5, 0x01, 0x45, 0x71, 0xa4,
- 0x01, 0x43, 0x72, 0x03, 0x8d, 0x89, 0xa5, 0x01, 0x46, 0x70, 0xa5, 0x01,
- 0x45, 0xb1, 0xa4, 0x01, 0x43, 0xb2, 0x03, 0x8d, 0x8d, 0xa5, 0x01, 0x46,
- 0xb0, 0xa5, 0x01, 0x47, 0x30, 0xa5, 0x01, 0x45, 0xd1, 0xa4, 0x01, 0x43,
- 0xd2, 0x03, 0x8d, 0x91, 0xa5, 0x01, 0x46, 0xd0, 0xa5, 0x01, 0x47, 0x50,
- 0xa5, 0x01, 0x47, 0x90, 0xa5, 0x01, 0x45, 0xe1, 0xa4, 0x01, 0x43, 0xe2,
- 0x03, 0x8d, 0x95, 0xa5, 0x01, 0x46, 0xe0, 0xa5, 0x01, 0x47, 0x60, 0xa5,
- 0x01, 0x47, 0xa0, 0xa5, 0x01, 0x47, 0xc0, 0xc6, 0x02, 0x21, 0x0f, 0xda,
- 0x01, 0xcc, 0x02, 0x0b, 0x0f, 0xda, 0x78, 0xcc, 0x02, 0x0b, 0x0f, 0xda,
- 0x71, 0xc5, 0x01, 0xf7, 0x0f, 0xda, 0x80, 0x45, 0x00, 0x6c, 0xc3, 0x8d,
- 0x99, 0xc6, 0x11, 0xa5, 0x01, 0x5b, 0x81, 0x45, 0x01, 0x35, 0x43, 0x8d,
- 0xc3, 0xc3, 0x12, 0xec, 0x01, 0x59, 0xdb, 0x03, 0x8d, 0xc9, 0xd2, 0x06,
- 0x55, 0x01, 0x5f, 0x60, 0xcf, 0x2c, 0x05, 0x01, 0x59, 0xc9, 0xd6, 0x2d,
- 0x07, 0x01, 0x59, 0xd0, 0xcf, 0x62, 0x72, 0x01, 0x4b, 0x59, 0x47, 0x11,
- 0x49, 0xc3, 0x8d, 0xcf, 0xc8, 0xb2, 0xf2, 0x01, 0x4a, 0xf1, 0xc6, 0x11,
- 0xa5, 0x01, 0x4a, 0xb0, 0x46, 0x00, 0x95, 0xc3, 0x8d, 0xd5, 0xc8, 0xb2,
- 0xf2, 0x01, 0x4a, 0xd1, 0xc6, 0x11, 0xa5, 0x01, 0x4a, 0x90, 0xc4, 0xe3,
- 0x17, 0x08, 0x3a, 0x61, 0xc4, 0xe4, 0x4b, 0x08, 0x3a, 0x59, 0xc4, 0xe1,
- 0x93, 0x08, 0x3a, 0x51, 0xc4, 0xe1, 0x4f, 0x08, 0x3a, 0x48, 0x88, 0x08,
- 0x30, 0x81, 0x8f, 0x08, 0x30, 0x88, 0x88, 0x08, 0x30, 0x99, 0x8f, 0x08,
- 0x30, 0xa0, 0x8f, 0x08, 0x30, 0xb0, 0xc5, 0xde, 0x62, 0x08, 0x04, 0x01,
- 0xc7, 0xc1, 0xa3, 0x08, 0x04, 0x09, 0xc6, 0xce, 0xa1, 0x08, 0x04, 0x11,
- 0x23, 0xc3, 0x8d, 0xdf, 0x24, 0xc3, 0x8d, 0xeb, 0x25, 0xc3, 0x8d, 0xf7,
- 0x26, 0xc3, 0x8e, 0x03, 0x22, 0x43, 0x8e, 0x0f, 0xc7, 0xc5, 0x7e, 0x08,
- 0x04, 0x71, 0xc8, 0xb6, 0xcd, 0x08, 0x04, 0x79, 0xc7, 0xca, 0x47, 0x08,
- 0x04, 0x81, 0xc7, 0xc4, 0xdd, 0x08, 0x04, 0x89, 0xc9, 0xaa, 0x78, 0x08,
- 0x04, 0x90, 0xc5, 0xde, 0x49, 0x08, 0x04, 0xa9, 0xc6, 0xcc, 0xaf, 0x08,
- 0x04, 0xb1, 0x9f, 0x08, 0x04, 0xb8, 0xc8, 0xbf, 0xed, 0x08, 0x04, 0xd1,
- 0xc6, 0xcc, 0xf1, 0x08, 0x04, 0xd9, 0x9f, 0x08, 0x04, 0xe1, 0xc6, 0xcb,
- 0xdd, 0x08, 0x04, 0xe9, 0xa3, 0x08, 0x04, 0xf0, 0x9d, 0x08, 0x04, 0xf9,
- 0xc6, 0xcd, 0x0f, 0x08, 0x05, 0x01, 0x9f, 0x08, 0x05, 0x09, 0xa0, 0x08,
- 0x05, 0x11, 0xa1, 0x08, 0x05, 0x19, 0xa4, 0x08, 0x05, 0x29, 0xa5, 0x08,
- 0x05, 0x31, 0xc7, 0xc1, 0x64, 0x08, 0x05, 0x38, 0x9d, 0x08, 0x05, 0x41,
- 0x9e, 0x08, 0x05, 0x49, 0xc9, 0xad, 0x75, 0x08, 0x05, 0x51, 0xc8, 0xba,
- 0xd5, 0x08, 0x05, 0x59, 0xa1, 0x08, 0x05, 0x61, 0xa2, 0x08, 0x05, 0x69,
- 0xa3, 0x08, 0x05, 0x71, 0xa4, 0x08, 0x05, 0x79, 0xa5, 0x08, 0x05, 0x81,
- 0xa6, 0x08, 0x05, 0x88, 0x9d, 0x08, 0x05, 0x91, 0x9f, 0x08, 0x05, 0xa1,
- 0xc7, 0xc4, 0x6d, 0x08, 0x05, 0xa9, 0xa1, 0x08, 0x05, 0xb1, 0xa4, 0x08,
- 0x05, 0xc1, 0xa5, 0x08, 0x05, 0xc9, 0xa6, 0x08, 0x05, 0xd1, 0x9e, 0x08,
- 0x05, 0x99, 0xc6, 0xcb, 0xf5, 0x08, 0x05, 0xb8, 0x9d, 0x08, 0x05, 0xd9,
- 0x9e, 0x08, 0x05, 0xe1, 0x9f, 0x08, 0x05, 0xe9, 0xa0, 0x08, 0x05, 0xf1,
- 0xa1, 0x08, 0x05, 0xf9, 0xa2, 0x08, 0x06, 0x01, 0xa6, 0x08, 0x06, 0x08,
- 0x9d, 0x08, 0x06, 0x11, 0xc8, 0xba, 0x95, 0x08, 0x06, 0x18, 0xcb, 0x97,
- 0x12, 0x08, 0x06, 0x21, 0xc9, 0xab, 0xa1, 0x08, 0x06, 0x28, 0xc7, 0xc5,
- 0x4d, 0x08, 0x06, 0x31, 0xc7, 0xc6, 0xe3, 0x08, 0x06, 0x39, 0x9f, 0x08,
- 0x06, 0x41, 0xc7, 0xc4, 0xf9, 0x08, 0x06, 0x49, 0xa1, 0x08, 0x06, 0x51,
- 0xa3, 0x08, 0x06, 0x58, 0xc9, 0xae, 0x68, 0x08, 0x06, 0x69, 0xcf, 0x61,
- 0xa0, 0x08, 0x06, 0x71, 0xc7, 0xc4, 0x82, 0x08, 0x06, 0x79, 0xa2, 0x08,
- 0x06, 0x81, 0xa3, 0x08, 0x06, 0x89, 0xa5, 0x08, 0x06, 0x99, 0xa6, 0x08,
- 0x06, 0xa1, 0xd1, 0x55, 0xb2, 0x08, 0x06, 0x60, 0x9e, 0x08, 0x06, 0xa9,
- 0x9f, 0x08, 0x06, 0xb1, 0xa0, 0x08, 0x06, 0xb9, 0xc6, 0xce, 0xad, 0x08,
- 0x06, 0xc1, 0xa2, 0x08, 0x06, 0xc9, 0xa3, 0x08, 0x06, 0xd1, 0xa4, 0x08,
- 0x06, 0xd9, 0xa5, 0x08, 0x06, 0xe1, 0xa6, 0x08, 0x06, 0xe8, 0x9d, 0x08,
- 0x06, 0xf9, 0x9e, 0x08, 0x07, 0x01, 0x9f, 0x08, 0x07, 0x09, 0xa0, 0x08,
- 0x07, 0x11, 0xa1, 0x08, 0x07, 0x19, 0xa2, 0x08, 0x07, 0x21, 0xa4, 0x08,
- 0x07, 0x31, 0xa5, 0x08, 0x07, 0x39, 0xa6, 0x08, 0x07, 0x41, 0xa3, 0x08,
- 0x07, 0x28, 0x9d, 0x08, 0x07, 0x49, 0x9e, 0x08, 0x07, 0x51, 0x9f, 0x08,
- 0x07, 0x59, 0xa0, 0x08, 0x07, 0x61, 0xa1, 0x08, 0x07, 0x69, 0xa2, 0x08,
- 0x07, 0x71, 0xa4, 0x08, 0x07, 0x81, 0xa3, 0x08, 0x07, 0x79, 0xa5, 0x08,
- 0x07, 0x89, 0xa6, 0x08, 0x07, 0x90, 0x9e, 0x08, 0x07, 0x99, 0x9f, 0x08,
- 0x07, 0xa1, 0xa3, 0x08, 0x07, 0xa9, 0xa4, 0x08, 0x07, 0xb1, 0xa5, 0x08,
- 0x07, 0xb9, 0xa6, 0x08, 0x07, 0xc0, 0xd1, 0x56, 0xe4, 0x0e, 0xf9, 0x51,
- 0xc9, 0x21, 0x0e, 0x00, 0xeb, 0xa8, 0xd3, 0x46, 0x48, 0x0e, 0xf8, 0x48,
- 0x42, 0x01, 0xe9, 0xc3, 0x8e, 0x21, 0x43, 0x01, 0xb9, 0x43, 0x8e, 0x2d,
- 0xcf, 0x63, 0x08, 0x00, 0xf3, 0x89, 0xc6, 0xbf, 0x3f, 0x00, 0x0b, 0x19,
- 0xc4, 0x63, 0xce, 0x00, 0x0b, 0x29, 0xca, 0xa1, 0x80, 0x00, 0x10, 0xd9,
- 0xc3, 0x02, 0x53, 0x00, 0x11, 0xb0, 0xce, 0x02, 0x79, 0x0e, 0xf8, 0xe1,
- 0xcc, 0x57, 0x82, 0x0e, 0xf8, 0xb8, 0x94, 0x00, 0x13, 0xcb, 0x03, 0x8e,
- 0x39, 0x96, 0x00, 0x14, 0x3b, 0x03, 0x8e, 0x3f, 0x9b, 0x00, 0x14, 0x73,
- 0x03, 0x8e, 0x45, 0x89, 0x00, 0xeb, 0xb9, 0x11, 0xc3, 0x8e, 0x4b, 0x8b,
- 0x00, 0xe8, 0x4b, 0x03, 0x8e, 0x61, 0x83, 0x00, 0x12, 0x83, 0x03, 0x8e,
- 0x67, 0xc2, 0x03, 0x74, 0x05, 0x5a, 0x89, 0x8a, 0x00, 0x13, 0x2b, 0x03,
- 0x8e, 0x71, 0x8f, 0x00, 0x13, 0x7b, 0x03, 0x8e, 0x7a, 0x98, 0x00, 0x14,
- 0x61, 0x99, 0x00, 0x14, 0x69, 0x8d, 0x00, 0x14, 0xf1, 0x8e, 0x05, 0x3c,
- 0x09, 0xc5, 0xd7, 0x87, 0x00, 0x0c, 0x69, 0x87, 0x00, 0x0e, 0xe8, 0xcc,
- 0x21, 0x84, 0x05, 0x3b, 0x2a, 0x03, 0x8e, 0x80, 0xc3, 0x26, 0x50, 0x00,
- 0x0c, 0x29, 0xc3, 0x01, 0x59, 0x00, 0x0d, 0x41, 0xc4, 0x0c, 0x5a, 0x00,
- 0x0d, 0xe8, 0xd1, 0x56, 0xe4, 0x0e, 0xf9, 0x49, 0xc9, 0x21, 0x0e, 0x00,
- 0xeb, 0xa0, 0xc3, 0x02, 0x53, 0x0e, 0xf8, 0xf1, 0xc4, 0x63, 0xce, 0x00,
- 0x0b, 0x0b, 0x03, 0x8e, 0x86, 0xc9, 0x0b, 0x37, 0x00, 0x0a, 0xe9, 0xca,
- 0xa1, 0x80, 0x00, 0x10, 0xc9, 0xc6, 0xbf, 0x3f, 0x00, 0x0a, 0xf8, 0xc5,
- 0x01, 0x62, 0x00, 0xf3, 0x1b, 0x03, 0x8e, 0x8c, 0xc5, 0x00, 0x95, 0x00,
- 0xf3, 0x08, 0xce, 0x17, 0x75, 0x00, 0xf3, 0x28, 0xd3, 0x45, 0xb0, 0x05,
- 0x3e, 0x51, 0xc9, 0xa9, 0xc4, 0x00, 0x11, 0xf8, 0x46, 0x00, 0x6b, 0x43,
- 0x8e, 0x92, 0x94, 0x05, 0x5a, 0x5b, 0x03, 0x8e, 0x9e, 0x89, 0x00, 0x13,
- 0x0a, 0x03, 0x8e, 0xa4, 0xc8, 0xbb, 0xad, 0x00, 0xe8, 0xf9, 0xcd, 0x78,
- 0x00, 0x00, 0xe8, 0xf1, 0x97, 0x00, 0xe8, 0xe9, 0x91, 0x00, 0xe8, 0x8a,
- 0x03, 0x8e, 0xaa, 0x00, 0xc3, 0x8e, 0xb6, 0xca, 0x48, 0x9e, 0x05, 0x5a,
- 0x38, 0xc2, 0x00, 0x35, 0x0e, 0xf8, 0x38, 0xc9, 0x37, 0x1e, 0x05, 0x39,
- 0xf8, 0x46, 0x00, 0x6b, 0x43, 0x8e, 0xf7, 0x43, 0x00, 0x50, 0xc3, 0x8f,
- 0x03, 0x05, 0xc3, 0x8f, 0x0f, 0x0e, 0xc3, 0x8f, 0x2d, 0x06, 0xc3, 0x8f,
- 0x3f, 0xcc, 0x57, 0x82, 0x00, 0xec, 0x39, 0xc5, 0x1f, 0x94, 0x00, 0x0f,
- 0xc9, 0xce, 0x02, 0x79, 0x00, 0x13, 0x99, 0xc5, 0x21, 0x12, 0x00, 0x07,
- 0x89, 0xc5, 0x35, 0x4a, 0x00, 0x0a, 0x21, 0xce, 0x3a, 0xdb, 0x05, 0x3d,
- 0x28, 0xd1, 0x56, 0xe4, 0x0e, 0xf9, 0x31, 0x44, 0x00, 0x53, 0x43, 0x8f,
- 0x4b, 0xc3, 0x8e, 0x37, 0x00, 0x13, 0x63, 0x03, 0x8f, 0x5d, 0xc2, 0x00,
- 0xbb, 0x00, 0x0c, 0xd0, 0xcf, 0x63, 0x08, 0x00, 0xf1, 0xe9, 0xc6, 0xbf,
- 0x3f, 0x00, 0x09, 0xe1, 0xc4, 0x63, 0xce, 0x00, 0x09, 0xf1, 0xc3, 0x02,
- 0x53, 0x00, 0x11, 0xa0, 0xc7, 0x0e, 0xae, 0x00, 0xf1, 0xbb, 0x03, 0x8f,
- 0x63, 0x45, 0x06, 0xf3, 0x43, 0x8f, 0x69, 0xc9, 0xb5, 0x04, 0x05, 0x3c,
- 0x70, 0xd4, 0x02, 0x73, 0x0e, 0xf8, 0x28, 0xcb, 0x95, 0x18, 0x00, 0xf4,
- 0xe9, 0x06, 0x43, 0x8f, 0x75, 0xd1, 0x56, 0xe4, 0x0e, 0xf9, 0x29, 0xc9,
- 0x21, 0x0e, 0x00, 0xeb, 0x68, 0xcf, 0x63, 0x08, 0x00, 0xf0, 0x99, 0xc6,
- 0xbf, 0x3f, 0x00, 0xf0, 0x89, 0xc4, 0x63, 0xce, 0x00, 0x08, 0x79, 0xc3,
- 0x02, 0x53, 0x00, 0x11, 0x08, 0xc5, 0x01, 0x62, 0x00, 0xf0, 0x69, 0xc5,
- 0x00, 0x95, 0x00, 0xf0, 0x58, 0xc9, 0xb1, 0xf5, 0x00, 0xec, 0x88, 0xd3,
- 0x45, 0xb0, 0x05, 0x3e, 0x41, 0xc5, 0x01, 0x02, 0x00, 0x08, 0x88, 0xc5,
- 0xd2, 0x80, 0x00, 0x0c, 0x61, 0xc3, 0x12, 0xec, 0x00, 0x12, 0xb0, 0x46,
- 0x00, 0x6b, 0x43, 0x8f, 0x81, 0xc7, 0x46, 0x96, 0x00, 0x15, 0x1b, 0x03,
- 0x8f, 0x8d, 0xca, 0x9a, 0x99, 0x00, 0x0e, 0x30, 0x94, 0x05, 0x5a, 0x43,
- 0x03, 0x8f, 0x93, 0xc5, 0x40, 0x53, 0x05, 0x3e, 0x99, 0xc4, 0x99, 0x69,
- 0x05, 0x3e, 0xa8, 0x8c, 0x00, 0x11, 0xbb, 0x03, 0x8f, 0x99, 0x8b, 0x00,
- 0x09, 0x88, 0xc6, 0xbf, 0x3f, 0x00, 0x07, 0x3b, 0x03, 0x8f, 0xa2, 0xc9,
- 0x0b, 0x37, 0x00, 0x08, 0x49, 0xc4, 0x63, 0xce, 0x00, 0x08, 0x69, 0xc3,
- 0x02, 0x53, 0x00, 0x12, 0xa8, 0xca, 0xa3, 0x4c, 0x05, 0x5a, 0xa9, 0xca,
- 0x4e, 0x98, 0x05, 0x5a, 0xa0, 0xc4, 0x70, 0xd8, 0x00, 0x13, 0xb9, 0xc5,
- 0x25, 0x71, 0x00, 0x14, 0xd0, 0xc2, 0x00, 0xc0, 0x00, 0x0d, 0x0b, 0x03,
- 0x8f, 0xa8, 0xc8, 0x9f, 0xca, 0x00, 0xf6, 0x78, 0xc9, 0x0b, 0x37, 0x00,
- 0x07, 0xa3, 0x03, 0x8f, 0xae, 0xc4, 0x63, 0xce, 0x00, 0x0e, 0x90, 0x11,
- 0xc3, 0x8f, 0xb4, 0xc8, 0x1e, 0x43, 0x00, 0x07, 0xb2, 0x03, 0x8f, 0xc0,
- 0x45, 0x00, 0x56, 0x43, 0x8f, 0xc6, 0x45, 0x00, 0x56, 0x43, 0x8f, 0xd2,
- 0xca, 0xa9, 0x28, 0x00, 0x0f, 0xf8, 0xd1, 0x57, 0xc1, 0x05, 0x3a, 0x59,
- 0xc2, 0x00, 0x11, 0x05, 0x3a, 0x69, 0xcd, 0x7b, 0x5a, 0x01, 0x63, 0xd0,
- 0xcb, 0x9a, 0x2a, 0x00, 0x0f, 0x68, 0x46, 0x00, 0x6b, 0xc3, 0x8f, 0xe4,
- 0x87, 0x05, 0x5b, 0x10, 0xd4, 0x02, 0x73, 0x00, 0xec, 0x98, 0xd3, 0x46,
- 0x48, 0x0e, 0xf8, 0xd0, 0x11, 0xc3, 0x8f, 0xf0, 0xc8, 0x1e, 0x43, 0x00,
- 0x07, 0x7a, 0x03, 0x8f, 0xfc, 0xc6, 0x01, 0x61, 0x00, 0xf1, 0x68, 0xc9,
- 0x0b, 0x37, 0x00, 0x07, 0x71, 0xc4, 0x63, 0xce, 0x00, 0x0e, 0x80, 0xcc,
- 0x21, 0x84, 0x00, 0xeb, 0xe0, 0x89, 0x00, 0xeb, 0xc9, 0x88, 0x05, 0x3b,
- 0xe1, 0x94, 0x05, 0x3c, 0x19, 0x95, 0x05, 0x3c, 0x29, 0x96, 0x05, 0x3c,
- 0x39, 0x86, 0x05, 0x3b, 0xd0, 0xc5, 0xd9, 0x9e, 0x05, 0x5b, 0x21, 0xc2,
- 0x1b, 0xd8, 0x05, 0x5a, 0x00, 0x45, 0x00, 0x6c, 0xc3, 0x90, 0x02, 0xc8,
- 0x0f, 0xbf, 0x00, 0x0d, 0xc8, 0xcc, 0x57, 0x82, 0x0e, 0xf8, 0xa1, 0xcc,
- 0x4c, 0x8c, 0x05, 0x59, 0xe0, 0xca, 0xa6, 0xb2, 0x0e, 0xf8, 0x5b, 0x03,
- 0x90, 0x18, 0xce, 0x02, 0x79, 0x00, 0xec, 0xc1, 0xcc, 0x57, 0x82, 0x00,
- 0xec, 0x59, 0xc4, 0x02, 0x52, 0x00, 0x14, 0x30, 0xc9, 0x0b, 0x37, 0x00,
- 0x07, 0x53, 0x03, 0x90, 0x1e, 0xc6, 0xbf, 0x3f, 0x00, 0x11, 0x4b, 0x03,
- 0x90, 0x22, 0xc4, 0x63, 0xce, 0x00, 0x08, 0xd8, 0xc6, 0x01, 0x61, 0x00,
- 0xf0, 0xd8, 0x11, 0xc3, 0x90, 0x28, 0xc8, 0x1e, 0x43, 0x00, 0x07, 0x58,
- 0x45, 0x00, 0x56, 0x43, 0x90, 0x34, 0xc6, 0x00, 0x94, 0x00, 0xf7, 0xb8,
- 0x43, 0x01, 0xb9, 0xc3, 0x90, 0x40, 0xc8, 0x1e, 0x43, 0x00, 0x07, 0xf8,
- 0xce, 0x38, 0x45, 0x05, 0x5a, 0xd1, 0xc5, 0x01, 0x02, 0x00, 0x12, 0x78,
- 0x98, 0x00, 0xf7, 0xe9, 0xc2, 0x03, 0xc7, 0x00, 0xf7, 0xd8, 0xc5, 0x01,
- 0x62, 0x00, 0xf2, 0x19, 0xc5, 0x00, 0x95, 0x00, 0xf2, 0x08, 0x42, 0x00,
- 0xf6, 0xc3, 0x90, 0x4c, 0x06, 0xc3, 0x90, 0x5b, 0xc6, 0x60, 0xe6, 0x00,
- 0x0b, 0x5b, 0x03, 0x90, 0x68, 0xc5, 0x21, 0x12, 0x00, 0x0b, 0x4b, 0x03,
- 0x90, 0x6e, 0x05, 0xc3, 0x90, 0x72, 0x14, 0xc3, 0x90, 0x81, 0xc9, 0x56,
- 0xec, 0x05, 0x5a, 0x91, 0x15, 0xc3, 0x90, 0x8d, 0xc5, 0x1f, 0x94, 0x00,
- 0x07, 0xc9, 0xc5, 0x35, 0x4a, 0x00, 0x07, 0xd1, 0xc5, 0x1f, 0x0a, 0x00,
- 0x0b, 0x69, 0xc6, 0xcd, 0x21, 0x00, 0x0b, 0x99, 0xce, 0x1f, 0x15, 0x00,
- 0x10, 0xb8, 0xd5, 0x38, 0x3e, 0x05, 0x5a, 0x78, 0xc5, 0x1f, 0x0a, 0x00,
- 0x08, 0x1b, 0x03, 0x90, 0x99, 0x05, 0xc3, 0x90, 0x9f, 0xca, 0x9f, 0xc8,
- 0x00, 0xf5, 0x19, 0x06, 0xc3, 0x90, 0xae, 0x14, 0xc3, 0x90, 0xbb, 0xce,
- 0x1f, 0x15, 0x00, 0x10, 0x19, 0xc5, 0x1f, 0x94, 0x00, 0x07, 0x01, 0xc5,
- 0x35, 0x4a, 0x00, 0x07, 0x09, 0xc5, 0x21, 0x12, 0x00, 0x07, 0x19, 0xc6,
- 0x60, 0xe6, 0x00, 0x08, 0x09, 0xc6, 0xcd, 0x21, 0x00, 0x08, 0x29, 0xc6,
- 0x01, 0x01, 0x01, 0x63, 0x28, 0xc5, 0x35, 0x4a, 0x00, 0x0f, 0xe9, 0xc6,
- 0x60, 0xe6, 0x00, 0x0f, 0x18, 0x43, 0x01, 0xb9, 0xc3, 0x90, 0xc5, 0xc8,
- 0x1e, 0x43, 0x00, 0xf4, 0x28, 0xc6, 0xbf, 0x3f, 0x00, 0xf1, 0x49, 0xc9,
- 0x0b, 0x37, 0x00, 0x09, 0x29, 0xc4, 0x63, 0xce, 0x00, 0x10, 0xf8, 0xc8,
- 0x1e, 0x43, 0x00, 0xf1, 0x39, 0x43, 0x01, 0xb9, 0xc3, 0x90, 0xd1, 0xc8,
- 0x23, 0xe0, 0x01, 0x63, 0x40, 0x43, 0x01, 0xb9, 0xc3, 0x90, 0xdd, 0xc8,
- 0x23, 0xe0, 0x01, 0x63, 0x60, 0xc9, 0x0b, 0x37, 0x00, 0xf4, 0x89, 0xc3,
- 0x02, 0x53, 0x00, 0x14, 0x89, 0xc4, 0x63, 0xce, 0x00, 0x0b, 0xf0, 0xc5,
- 0x01, 0x02, 0x00, 0x0d, 0xb1, 0xc9, 0xa9, 0xc4, 0x00, 0x12, 0x00, 0xc8,
- 0x1e, 0x43, 0x00, 0xf4, 0x69, 0xc8, 0x17, 0x7b, 0x00, 0xf4, 0x58, 0xcb,
- 0x91, 0xb3, 0x05, 0x5a, 0xbb, 0x03, 0x90, 0xe9, 0xcc, 0x4e, 0x96, 0x05,
- 0x5a, 0xb0, 0xc8, 0x0e, 0xad, 0x00, 0xf3, 0xf9, 0xce, 0x3f, 0x8b, 0x05,
- 0x3a, 0xf8, 0xc5, 0x01, 0x02, 0x00, 0xeb, 0xeb, 0x03, 0x90, 0xed, 0xcc,
- 0x82, 0x90, 0x05, 0x3a, 0xa8, 0xc8, 0x0e, 0xad, 0x00, 0xf1, 0x99, 0xce,
- 0x3f, 0x8b, 0x05, 0x3a, 0x19, 0xc8, 0x23, 0xe0, 0x01, 0x63, 0x50, 0xd4,
- 0x3f, 0x85, 0x05, 0x3a, 0x28, 0xc6, 0xbf, 0x3f, 0x00, 0x09, 0xb9, 0xc4,
- 0x63, 0xce, 0x00, 0x0f, 0x48, 0xc9, 0x0b, 0x37, 0x00, 0x08, 0xe9, 0xc6,
- 0xbf, 0x3f, 0x00, 0x09, 0x19, 0xc4, 0x63, 0xce, 0x00, 0x0f, 0x38, 0xc5,
- 0x01, 0x62, 0x00, 0xf0, 0x29, 0xc5, 0x00, 0x95, 0x00, 0xf0, 0x18, 0x87,
- 0x05, 0x59, 0x99, 0xc5, 0xd9, 0x9e, 0x05, 0x59, 0x81, 0x91, 0x00, 0x13,
- 0xa8, 0xcc, 0x21, 0x84, 0x05, 0x59, 0xf0, 0xcb, 0x4c, 0x8d, 0x00, 0x14,
- 0xe9, 0xc9, 0x0b, 0x37, 0x00, 0x09, 0xa9, 0xc4, 0x63, 0xce, 0x00, 0x0f,
- 0x80, 0xc5, 0x46, 0x43, 0x00, 0x12, 0x58, 0xc5, 0x01, 0x62, 0x00, 0xf7,
- 0xa1, 0xc5, 0x00, 0x95, 0x00, 0xf4, 0x70, 0xc2, 0x00, 0xc0, 0x00, 0x0d,
- 0x7b, 0x03, 0x90, 0xf3, 0xc8, 0x9f, 0xca, 0x00, 0xf7, 0x30, 0x11, 0xc3,
- 0x90, 0xf9, 0xc8, 0x1e, 0x43, 0x00, 0x06, 0xe2, 0x03, 0x91, 0x05, 0xce,
- 0x75, 0x41, 0x00, 0xf3, 0xd0, 0x00, 0x43, 0x91, 0x09, 0xc9, 0x0b, 0x37,
- 0x00, 0x06, 0xdb, 0x03, 0x91, 0x15, 0xc4, 0x63, 0xce, 0x00, 0x0e, 0x98,
- 0x45, 0x00, 0x56, 0x43, 0x91, 0x1b, 0x45, 0x00, 0x56, 0x43, 0x91, 0x39,
- 0x42, 0x00, 0xed, 0xc3, 0x91, 0x57, 0x45, 0x06, 0xf3, 0x43, 0x91, 0x66,
- 0xcb, 0x9a, 0x2a, 0x00, 0x11, 0x50, 0x45, 0x00, 0x56, 0x43, 0x91, 0x72,
- 0xc9, 0x1e, 0x42, 0x00, 0xf2, 0x71, 0xc5, 0x35, 0x4a, 0x00, 0xf2, 0x61,
- 0xc6, 0x60, 0xe6, 0x00, 0x11, 0x60, 0x42, 0x00, 0xed, 0xc3, 0x91, 0x7e,
- 0xca, 0x1f, 0x8f, 0x00, 0x10, 0x40, 0xca, 0xa9, 0x28, 0x00, 0xf1, 0x70,
- 0x00, 0x43, 0x91, 0x8a, 0xca, 0xa8, 0xd8, 0x00, 0xf0, 0xe0, 0x42, 0x00,
- 0xed, 0xc3, 0x91, 0x96, 0xca, 0x1f, 0x8f, 0x00, 0x10, 0x20, 0xc5, 0x35,
- 0x4a, 0x00, 0xf0, 0xb1, 0xc5, 0x1f, 0x94, 0x00, 0xf0, 0xa0, 0xc9, 0x0e,
- 0xac, 0x00, 0xf5, 0xb1, 0xc5, 0x21, 0x12, 0x00, 0xf5, 0xa1, 0xca, 0x9f,
- 0xc8, 0x00, 0xf5, 0x91, 0xc5, 0x1f, 0x94, 0x00, 0xf5, 0x81, 0xc5, 0x35,
- 0x4a, 0x00, 0xf5, 0x70, 0x45, 0x00, 0x56, 0x43, 0x91, 0xa2, 0x42, 0x00,
- 0xed, 0xc3, 0x91, 0xc0, 0xca, 0x1f, 0x8f, 0x00, 0x10, 0x00, 0xcb, 0x9a,
- 0x2a, 0x00, 0x0e, 0xf0, 0xca, 0xa9, 0x28, 0x00, 0x0f, 0xd0, 0xce, 0x17,
- 0x75, 0x00, 0xf3, 0x40, 0xce, 0x17, 0x75, 0x00, 0xf3, 0x30, 0xc5, 0x01,
- 0x62, 0x00, 0xf4, 0x91, 0xc5, 0x00, 0x95, 0x00, 0x0b, 0xd8, 0xc5, 0x01,
- 0x62, 0x00, 0xf4, 0x41, 0xc5, 0x00, 0x95, 0x00, 0xf4, 0x30, 0xc5, 0x01,
- 0x62, 0x00, 0xf3, 0x61, 0xc5, 0x00, 0x95, 0x00, 0xf3, 0x50, 0x42, 0x00,
- 0xed, 0xc3, 0x91, 0xcf, 0xca, 0x1f, 0x8f, 0x00, 0x10, 0x80, 0xc6, 0xbf,
- 0x3f, 0x00, 0x0a, 0xb1, 0xc4, 0x63, 0xce, 0x00, 0x0a, 0xc0, 0xd2, 0x23,
- 0xd6, 0x05, 0x3a, 0x80, 0xc5, 0x01, 0x62, 0x00, 0xf2, 0x31, 0xc5, 0x00,
- 0x95, 0x00, 0xf2, 0x20, 0xcb, 0x9a, 0x2a, 0x00, 0xf1, 0xc0, 0xc5, 0x01,
- 0x62, 0x00, 0xf1, 0x21, 0xc5, 0x00, 0x95, 0x00, 0xf1, 0x10, 0xcb, 0x9a,
- 0x98, 0x00, 0x0e, 0x28, 0xca, 0xa9, 0x28, 0x00, 0xf0, 0x40, 0xd0, 0x5e,
- 0x72, 0x0f, 0xc1, 0x89, 0xcb, 0x5e, 0x77, 0x0f, 0xc1, 0x69, 0xca, 0x9f,
- 0xd2, 0x0f, 0xc1, 0x49, 0x49, 0xab, 0x7d, 0xc3, 0x91, 0xde, 0xd8, 0x25,
- 0x98, 0x01, 0x5b, 0xd9, 0xcc, 0x86, 0xb0, 0x0f, 0xc1, 0x09, 0xcc, 0x82,
- 0x84, 0x0f, 0xc1, 0x28, 0xe0, 0x03, 0xa7, 0x01, 0x5c, 0x08, 0xc6, 0x44,
- 0x67, 0x07, 0xd9, 0x69, 0xc7, 0x44, 0x66, 0x07, 0xd9, 0x60, 0xc5, 0x7a,
- 0x92, 0x05, 0x4b, 0x51, 0xc6, 0xc8, 0x2d, 0x05, 0x4b, 0x39, 0xc6, 0x92,
- 0x31, 0x05, 0x4b, 0x28, 0xc5, 0x92, 0x32, 0x00, 0x89, 0x69, 0xc6, 0xc0,
- 0x37, 0x00, 0x89, 0xc0, 0xc5, 0xc8, 0x2e, 0x00, 0x89, 0x79, 0xc6, 0xc6,
- 0xf2, 0x00, 0x89, 0xc8, 0xc4, 0x7a, 0x93, 0x00, 0x89, 0x93, 0x03, 0x91,
- 0xea, 0xc6, 0xc1, 0x07, 0x00, 0x89, 0xd0, 0xc4, 0xc7, 0x2b, 0x00, 0x89,
- 0xb1, 0xc6, 0xc7, 0x2a, 0x00, 0x89, 0xb8, 0xc6, 0xc0, 0x37, 0x05, 0x4b,
- 0x99, 0xc5, 0x92, 0x32, 0x00, 0x88, 0xf0, 0xc3, 0x3c, 0x08, 0x00, 0x89,
- 0x0b, 0x03, 0x91, 0xf0, 0xc8, 0xac, 0xd4, 0x00, 0x89, 0x28, 0xc5, 0xc8,
- 0x2e, 0x00, 0x89, 0x01, 0xc6, 0xc6, 0xf2, 0x00, 0x89, 0x48, 0xc4, 0xc7,
- 0x2b, 0x00, 0x89, 0x39, 0xc6, 0xc7, 0x2a, 0x00, 0x89, 0x40, 0xc7, 0xc0,
- 0x36, 0x00, 0x8a, 0x91, 0xc5, 0x98, 0x41, 0x00, 0x8a, 0x98, 0xc3, 0x3c,
- 0x08, 0x00, 0x89, 0xe1, 0x44, 0x29, 0x95, 0x43, 0x91, 0xf4, 0xc4, 0xc7,
- 0x2b, 0x00, 0x8a, 0x71, 0xc6, 0xc7, 0x2a, 0x00, 0x8a, 0xa0, 0xc4, 0xac,
- 0xd8, 0x00, 0x89, 0xf9, 0xc5, 0xd7, 0x8c, 0x00, 0x8a, 0x88, 0x87, 0x06,
- 0xbe, 0x33, 0x03, 0x92, 0x00, 0x97, 0x00, 0x8d, 0x01, 0x8b, 0x00, 0x8d,
- 0x09, 0x83, 0x06, 0xbe, 0x28, 0x91, 0x00, 0x8b, 0xc1, 0xc2, 0x20, 0x67,
- 0x00, 0x8b, 0xc9, 0x97, 0x00, 0x8d, 0x20, 0x02, 0x43, 0x92, 0x04, 0x1b,
- 0xc3, 0x92, 0x12, 0x91, 0x00, 0x8c, 0x39, 0x8b, 0x00, 0x8c, 0x41, 0x83,
- 0x06, 0xbd, 0x93, 0x03, 0x92, 0x1f, 0xc2, 0x08, 0xc6, 0x06, 0xbd, 0xa0,
- 0x83, 0x00, 0x8c, 0x73, 0x03, 0x92, 0x23, 0x87, 0x00, 0x8c, 0x83, 0x03,
- 0x92, 0x27, 0xc2, 0x05, 0xd0, 0x00, 0x8c, 0x93, 0x03, 0x92, 0x2b, 0x97,
- 0x00, 0x8c, 0x99, 0x8b, 0x00, 0x8c, 0xa1, 0x91, 0x06, 0xbd, 0xc0, 0x91,
- 0x00, 0x8b, 0xd1, 0x97, 0x00, 0x8b, 0xd9, 0xc2, 0x11, 0x70, 0x00, 0x8b,
- 0xe0, 0x97, 0x00, 0x8c, 0xa9, 0x87, 0x06, 0xbd, 0xdb, 0x03, 0x92, 0x2f,
- 0x83, 0x06, 0xbd, 0xc9, 0x91, 0x06, 0xbd, 0xe0, 0x91, 0x00, 0x8b, 0xf8,
- 0x87, 0x00, 0x8c, 0x0b, 0x03, 0x92, 0x37, 0x83, 0x00, 0x8d, 0x32, 0x03,
- 0x92, 0x3b, 0xc2, 0x06, 0x90, 0x06, 0xbd, 0x88, 0x87, 0x00, 0x8c, 0x50,
- 0x91, 0x06, 0xbd, 0xa8, 0xc4, 0x0f, 0xf4, 0x00, 0x8c, 0xe8, 0x83, 0x00,
- 0x8c, 0xcb, 0x03, 0x92, 0x3f, 0x87, 0x06, 0xbe, 0x03, 0x03, 0x92, 0x49,
- 0x91, 0x06, 0xbe, 0x11, 0x97, 0x06, 0xbe, 0x18, 0xc2, 0x06, 0x90, 0x06,
- 0xbe, 0x08, 0xc4, 0xac, 0xd8, 0x00, 0x8d, 0x53, 0x03, 0x92, 0x4d, 0xc5,
- 0xd6, 0x3d, 0x00, 0x8e, 0x19, 0xc5, 0xd9, 0x80, 0x00, 0x8f, 0xd1, 0xc5,
- 0x7a, 0x92, 0x00, 0x8f, 0xd9, 0xc7, 0xc7, 0x29, 0x00, 0x8f, 0xe1, 0xc7,
- 0xc0, 0x36, 0x00, 0x8f, 0xe9, 0xc5, 0x98, 0x41, 0x00, 0x8f, 0xf0, 0xc4,
- 0x7a, 0x93, 0x00, 0x8f, 0x31, 0xc6, 0xc1, 0x07, 0x00, 0x8f, 0xa0, 0x02,
- 0x43, 0x92, 0x53, 0xc8, 0xc0, 0x35, 0x06, 0xbe, 0xb8, 0xc6, 0xce, 0x17,
- 0x06, 0xbe, 0x70, 0x0d, 0xc3, 0x92, 0x5f, 0x16, 0xc3, 0x92, 0x6b, 0xc5,
- 0xd6, 0x3d, 0x00, 0x8f, 0x49, 0x12, 0xc3, 0x92, 0x77, 0xc5, 0xdb, 0x51,
- 0x06, 0xbf, 0x51, 0x05, 0xc3, 0x92, 0x83, 0xc5, 0x98, 0x41, 0x06, 0xbf,
- 0x90, 0xc4, 0xc7, 0x2b, 0x00, 0x8d, 0x61, 0xc6, 0xc7, 0x2a, 0x06, 0xbe,
- 0x60, 0xc5, 0xc8, 0x2e, 0x00, 0x8e, 0x31, 0xc6, 0xc6, 0xf2, 0x00, 0x8e,
- 0x48, 0xc6, 0x92, 0x31, 0x00, 0x8e, 0x51, 0xc5, 0xd6, 0x3d, 0x00, 0x8e,
- 0x59, 0xc5, 0x7a, 0x92, 0x06, 0xbe, 0x79, 0xc4, 0xac, 0xd8, 0x06, 0xbe,
- 0x83, 0x03, 0x92, 0x8f, 0x05, 0xc3, 0x92, 0x95, 0xc7, 0xc6, 0xf1, 0x06,
- 0xbe, 0xa0, 0xc5, 0x92, 0x32, 0x00, 0x8d, 0x83, 0x03, 0x92, 0xa1, 0xcc,
- 0x7a, 0x8b, 0x00, 0x8e, 0xa9, 0xc6, 0xc0, 0x37, 0x00, 0x8e, 0xc0, 0x02,
- 0x43, 0x92, 0xa5, 0xc4, 0x7a, 0x93, 0x00, 0x8d, 0x93, 0x03, 0x92, 0xb7,
- 0xc6, 0xc1, 0x07, 0x00, 0x8d, 0xa9, 0xc6, 0xcb, 0x4b, 0x00, 0x8e, 0xb8,
- 0xc3, 0x3c, 0x08, 0x00, 0x8d, 0x99, 0x44, 0x29, 0x95, 0x43, 0x92, 0xbb,
- 0xc6, 0xc6, 0xf2, 0x00, 0x8d, 0xa1, 0xc5, 0xc8, 0x2e, 0x00, 0x8e, 0x72,
- 0x03, 0x92, 0xc7, 0xc9, 0x98, 0x3d, 0x00, 0x8e, 0xcb, 0x03, 0x92, 0xcd,
- 0xc6, 0xbb, 0x9f, 0x06, 0xbe, 0xd8, 0xc4, 0x7a, 0x93, 0x00, 0x8e, 0xe3,
- 0x03, 0x92, 0xd3, 0xc6, 0xcb, 0x4b, 0x00, 0x8e, 0xf8, 0xc3, 0x3c, 0x08,
- 0x00, 0x8e, 0xe9, 0x44, 0x29, 0x95, 0x43, 0x92, 0xd9, 0xc6, 0xc7, 0x2a,
- 0x00, 0x8f, 0x01, 0xc4, 0xc7, 0x2b, 0x06, 0xbf, 0x10, 0xc4, 0xac, 0xd8,
- 0x00, 0x8d, 0xcb, 0x03, 0x92, 0xe5, 0xc5, 0xd6, 0x3d, 0x00, 0x8f, 0x1b,
- 0x03, 0x92, 0xeb, 0xc7, 0xc1, 0x06, 0x00, 0x8f, 0x21, 0xc5, 0x98, 0x41,
- 0x00, 0x8f, 0x29, 0xc6, 0xc8, 0x2d, 0x06, 0xbf, 0x19, 0xc5, 0xdb, 0x51,
- 0x06, 0xbf, 0x29, 0x05, 0x43, 0x92, 0xf1, 0xc5, 0x7a, 0x92, 0x00, 0x8f,
- 0x99, 0xc4, 0xac, 0xd8, 0x06, 0xbf, 0xd1, 0xc7, 0xc6, 0xf1, 0x06, 0xbf,
- 0xd8, 0xc5, 0x7a, 0x92, 0x06, 0xbf, 0x99, 0xcd, 0x7a, 0x8a, 0x06, 0xbf,
- 0xa0, 0xc5, 0x92, 0x32, 0x00, 0x8f, 0x61, 0xc6, 0xc0, 0x37, 0x00, 0x8f,
- 0x78, 0xc5, 0x7a, 0x92, 0x06, 0xbf, 0xb9, 0xca, 0xa4, 0x3c, 0x06, 0xbf,
- 0xc0, 0x0d, 0xc3, 0x92, 0xfd, 0x15, 0xc3, 0x93, 0x09, 0xc7, 0xcb, 0x4a,
- 0x00, 0x8f, 0x91, 0xc5, 0xdb, 0x51, 0x06, 0xbf, 0xa9, 0xc5, 0x98, 0x41,
- 0x06, 0xbf, 0xb0, 0xc5, 0xde, 0x35, 0x01, 0x8b, 0x58, 0x02, 0x43, 0x93,
- 0x15, 0xc5, 0xc8, 0x2e, 0x01, 0x8b, 0x99, 0xc6, 0xc6, 0xf2, 0x01, 0x8b,
- 0xb8, 0xc4, 0xac, 0xd8, 0x01, 0x8c, 0x11, 0xc7, 0xcb, 0x4a, 0x01, 0x8c,
- 0x18, 0x87, 0x01, 0x8c, 0x40, 0x91, 0x01, 0x8c, 0x50, 0xc8, 0x50, 0x00,
- 0x0f, 0x64, 0x81, 0xc7, 0x0c, 0x4b, 0x0f, 0x64, 0x38, 0xc8, 0x50, 0x00,
- 0x0f, 0x64, 0x79, 0xc7, 0x0c, 0x4b, 0x0f, 0x64, 0x30, 0xc8, 0x50, 0x00,
- 0x0f, 0x64, 0x71, 0xc7, 0x0c, 0x4b, 0x0f, 0x64, 0x28, 0xc8, 0x50, 0x00,
- 0x0f, 0x64, 0x69, 0xc7, 0x0c, 0x4b, 0x0f, 0x64, 0x20, 0x91, 0x01, 0x9f,
- 0x09, 0x07, 0x43, 0x93, 0x21, 0xc3, 0x04, 0x5f, 0x01, 0x9f, 0x11, 0x43,
- 0x0c, 0x55, 0x43, 0x93, 0x30, 0xc4, 0x13, 0xf2, 0x01, 0x9f, 0x68, 0xc2,
- 0x00, 0x7b, 0x01, 0x9f, 0x21, 0xc5, 0x13, 0xf1, 0x01, 0x9f, 0x70, 0xc4,
- 0x13, 0xf2, 0x01, 0x9f, 0x78, 0xc4, 0x13, 0xf2, 0x01, 0x9f, 0x80, 0xc3,
- 0x00, 0x4c, 0x01, 0x9f, 0x88, 0xc3, 0x21, 0x32, 0x01, 0x9b, 0x21, 0xc3,
- 0x18, 0x86, 0x01, 0x9b, 0x62, 0x03, 0x93, 0x3d, 0x4b, 0x18, 0x77, 0xc3,
- 0x93, 0x41, 0xdc, 0x13, 0xe2, 0x0f, 0xd2, 0x28, 0xce, 0x3c, 0x29, 0x01,
- 0x2f, 0x91, 0xcd, 0x03, 0xd4, 0x01, 0x2f, 0x88, 0xce, 0x6f, 0x9f, 0x0f,
- 0xb1, 0x81, 0xc8, 0xbb, 0x65, 0x0f, 0xc9, 0x70, 0xc3, 0x32, 0xa9, 0x00,
- 0xcf, 0xc9, 0xc4, 0xe2, 0x27, 0x00, 0xcf, 0x48, 0xc3, 0x32, 0xa9, 0x00,
- 0xcf, 0xc1, 0xc4, 0xe2, 0x27, 0x00, 0xcf, 0x40, 0xc3, 0xe0, 0xeb, 0x00,
- 0xbf, 0xc9, 0xc2, 0x01, 0xdb, 0x00, 0xbf, 0xc0, 0xc9, 0x4f, 0xff, 0x08,
- 0x4f, 0x98, 0xc9, 0x4f, 0xff, 0x08, 0x4f, 0x90, 0xc7, 0x0c, 0x4b, 0x08,
- 0x4e, 0xb3, 0x03, 0x93, 0x4d, 0xc8, 0x50, 0x00, 0x08, 0x4e, 0xf8, 0xc7,
- 0x0c, 0x4b, 0x08, 0x4e, 0xab, 0x03, 0x93, 0x53, 0xc8, 0x50, 0x00, 0x08,
- 0x4e, 0xf0, 0xc7, 0x0c, 0x4b, 0x08, 0x4e, 0xa3, 0x03, 0x93, 0x59, 0xc8,
- 0x50, 0x00, 0x08, 0x4e, 0xe8, 0xc7, 0x0c, 0x4b, 0x08, 0x4e, 0x9b, 0x03,
- 0x93, 0x5f, 0xc8, 0x50, 0x00, 0x08, 0x4e, 0xe0, 0x15, 0xc3, 0x93, 0x65,
- 0x1b, 0xc3, 0x93, 0x7a, 0x87, 0x0f, 0x02, 0x43, 0x03, 0x93, 0x87, 0x12,
- 0xc3, 0x93, 0x9e, 0x04, 0xc3, 0x93, 0xa8, 0xc2, 0x1b, 0xa5, 0x0f, 0x02,
- 0x69, 0x06, 0xc3, 0x93, 0xb8, 0xc3, 0x26, 0x9b, 0x0f, 0x02, 0x29, 0x16,
- 0xc3, 0x93, 0xc2, 0xc2, 0x00, 0xa4, 0x0f, 0x02, 0x19, 0x97, 0x0f, 0x02,
- 0x09, 0x91, 0x0f, 0x01, 0xe1, 0xc2, 0x04, 0x2b, 0x0f, 0x01, 0xc9, 0x8b,
- 0x0f, 0x01, 0xc3, 0x03, 0x93, 0xcc, 0x1c, 0xc3, 0x93, 0xd0, 0xc3, 0x8f,
- 0x12, 0x0f, 0x01, 0x99, 0xc2, 0x00, 0x7b, 0x0f, 0x01, 0x91, 0x83, 0x0f,
- 0x01, 0x80, 0x90, 0x00, 0xe9, 0xd9, 0x87, 0x00, 0xe9, 0x90, 0x98, 0x00,
- 0xed, 0xd1, 0x8f, 0x00, 0xea, 0xd3, 0x03, 0x93, 0xda, 0x8a, 0x00, 0xed,
- 0x19, 0x83, 0x00, 0xea, 0x23, 0x03, 0x93, 0xe0, 0x8b, 0x00, 0xea, 0x71,
- 0xc6, 0x25, 0x70, 0x00, 0xea, 0x61, 0x99, 0x05, 0x5b, 0x49, 0x94, 0x00,
- 0x15, 0xa3, 0x03, 0x93, 0xea, 0x9b, 0x08, 0x3d, 0x02, 0x03, 0x93, 0xf0,
- 0xcc, 0x57, 0x82, 0x00, 0xed, 0xa9, 0xce, 0x02, 0x79, 0x08, 0x3d, 0x78,
- 0xd4, 0x02, 0x73, 0x08, 0x3d, 0x68, 0xc4, 0x02, 0x52, 0x00, 0xed, 0xe9,
- 0xce, 0x02, 0x79, 0x00, 0xed, 0xe0, 0xc4, 0x02, 0x83, 0x00, 0xed, 0xc9,
- 0xca, 0xa6, 0x26, 0x08, 0x3d, 0x80, 0x97, 0x00, 0xed, 0xc1, 0x90, 0x00,
- 0xed, 0x81, 0x8e, 0x00, 0xed, 0x5b, 0x03, 0x93, 0xf6, 0x8b, 0x00, 0xed,
- 0x33, 0x03, 0x93, 0xfc, 0x84, 0x08, 0x3c, 0x21, 0xc2, 0x14, 0x40, 0x08,
- 0x3c, 0x01, 0x9b, 0x08, 0x3d, 0x91, 0x89, 0x08, 0x3c, 0x93, 0x03, 0x94,
- 0x08, 0x8a, 0x08, 0x3c, 0xb1, 0xc2, 0x1b, 0xd8, 0x08, 0x3d, 0x19, 0x94,
- 0x08, 0x3d, 0x50, 0xcf, 0x65, 0x51, 0x08, 0x3c, 0x79, 0xc5, 0xa1, 0x17,
- 0x08, 0x3d, 0x20, 0xc3, 0x00, 0xe8, 0x00, 0xed, 0xb1, 0xce, 0x72, 0x3f,
- 0x05, 0x5a, 0xf8, 0xc4, 0x02, 0x83, 0x00, 0xed, 0x99, 0xc4, 0x02, 0x52,
- 0x08, 0x3d, 0xd0, 0xc6, 0xb8, 0xe7, 0x00, 0xed, 0x11, 0xc3, 0x6b, 0xe0,
- 0x00, 0xea, 0x50, 0xcc, 0x57, 0x82, 0x00, 0xed, 0x51, 0xce, 0x02, 0x79,
- 0x00, 0xed, 0x4b, 0x03, 0x94, 0x0e, 0xcc, 0x21, 0x0b, 0x05, 0x5a, 0xf1,
- 0xcf, 0x61, 0x82, 0x05, 0x5a, 0xe9, 0xc4, 0xa0, 0xe6, 0x08, 0x3c, 0xd8,
- 0xd4, 0x02, 0x73, 0x08, 0x3c, 0xf8, 0xc9, 0x1e, 0x4b, 0x08, 0x3c, 0xc0,
- 0xc3, 0x8a, 0xf6, 0x00, 0xea, 0xf9, 0xca, 0x9d, 0x84, 0x08, 0x3c, 0x50,
- 0xc4, 0x02, 0x83, 0x08, 0x3c, 0x63, 0x03, 0x94, 0x14, 0xc4, 0x12, 0xeb,
- 0x08, 0x3c, 0x58, 0x46, 0x00, 0x6b, 0x43, 0x94, 0x1a, 0xc6, 0x25, 0x70,
- 0x00, 0xec, 0xf9, 0x87, 0x08, 0x3c, 0x71, 0xcc, 0x21, 0x78, 0x00, 0x17,
- 0x20, 0xc4, 0x12, 0xeb, 0x08, 0x3d, 0x41, 0xc8, 0x63, 0x87, 0x08, 0x3d,
- 0x48, 0xc3, 0x1b, 0x5e, 0x00, 0xeb, 0x01, 0xc5, 0x52, 0x39, 0x00, 0xea,
- 0xf0, 0x91, 0x00, 0xea, 0x99, 0x87, 0x00, 0xea, 0x58, 0xca, 0x1d, 0xd4,
- 0x08, 0x3c, 0xb8, 0xc4, 0x02, 0x83, 0x00, 0x15, 0x89, 0xc6, 0x01, 0x01,
- 0x08, 0x3c, 0xa8, 0xcc, 0x21, 0x84, 0x08, 0x3d, 0xa0, 0x45, 0x1b, 0xf5,
- 0xc3, 0x94, 0x26, 0xcc, 0x3c, 0xdf, 0x00, 0x17, 0x78, 0xce, 0x4a, 0x4a,
- 0x05, 0x38, 0xa9, 0xc6, 0x01, 0x0e, 0x00, 0x17, 0xfa, 0x03, 0x94, 0x32,
- 0xc7, 0x4a, 0x51, 0x00, 0x17, 0x41, 0xc4, 0x21, 0x13, 0x00, 0x17, 0xb8,
- 0xcd, 0x30, 0xe8, 0x00, 0x17, 0x91, 0xc2, 0x00, 0x35, 0x00, 0x17, 0x98,
- 0x47, 0x1b, 0xf3, 0xc3, 0x94, 0x38, 0xd2, 0x4a, 0x46, 0x05, 0x38, 0xa1,
- 0xc8, 0x4a, 0x50, 0x00, 0x17, 0x38, 0xcc, 0x1f, 0x94, 0x00, 0x17, 0xa1,
- 0x47, 0x06, 0xf1, 0x43, 0x94, 0x44, 0xc8, 0x4a, 0x50, 0x05, 0x38, 0x41,
- 0xd2, 0x4a, 0x46, 0x05, 0x38, 0x68, 0xc8, 0x4a, 0x50, 0x05, 0x38, 0x61,
- 0xd2, 0x4a, 0x46, 0x05, 0x38, 0x88, 0x0f, 0x43, 0x94, 0x50, 0xc2, 0x00,
- 0xcb, 0x0e, 0xbe, 0x09, 0xc2, 0x00, 0x0a, 0x0e, 0xbd, 0xf9, 0x8b, 0x0e,
- 0xbd, 0xc8, 0xc2, 0x00, 0x0a, 0x0e, 0xbe, 0x00, 0xc6, 0x12, 0x12, 0x0e,
- 0xbd, 0xf0, 0xc2, 0x13, 0xa9, 0x0e, 0xbd, 0xe9, 0xc4, 0x89, 0x81, 0x0e,
- 0xbd, 0x88, 0xc4, 0x1c, 0x84, 0x0e, 0xbd, 0xe0, 0xca, 0x92, 0xd2, 0x0e,
- 0xbd, 0xd8, 0xc2, 0x00, 0xf6, 0x0e, 0xbd, 0xd0, 0x8b, 0x0e, 0xbd, 0xb8,
- 0x97, 0x0e, 0xbd, 0xb0, 0x97, 0x0e, 0xbd, 0xa8, 0xc4, 0xda, 0xda, 0x0e,
- 0xbd, 0xa0, 0xc4, 0x8b, 0x49, 0x0e, 0xbd, 0x98, 0xc3, 0x00, 0xb2, 0x0e,
- 0xbd, 0x90, 0xc2, 0x00, 0xb3, 0x0e, 0xbd, 0x81, 0xc6, 0x12, 0x12, 0x0e,
- 0xbd, 0x70, 0xc3, 0x05, 0xe7, 0x0e, 0xbd, 0x78, 0xc4, 0xd7, 0x88, 0x0e,
- 0xbd, 0x68, 0xc4, 0x32, 0x6d, 0x0e, 0xbd, 0x60, 0xc3, 0x05, 0xe7, 0x0e,
- 0xbd, 0x58, 0xc4, 0xd9, 0x9f, 0x0e, 0xbd, 0x50, 0x0f, 0x43, 0x94, 0x5c,
- 0xc2, 0x00, 0xcb, 0x0e, 0xbd, 0x39, 0xc2, 0x00, 0x0a, 0x0e, 0xbd, 0x29,
- 0x8b, 0x0e, 0xbc, 0xf8, 0xc2, 0x00, 0x0a, 0x0e, 0xbd, 0x30, 0xc6, 0x12,
- 0x12, 0x0e, 0xbd, 0x20, 0xc2, 0x13, 0xa9, 0x0e, 0xbd, 0x19, 0xc4, 0x89,
- 0x81, 0x0e, 0xbc, 0xba, 0x03, 0x94, 0x68, 0xc4, 0x1c, 0x84, 0x0e, 0xbd,
- 0x10, 0xc2, 0x00, 0xf6, 0x0e, 0xbd, 0x00, 0x8b, 0x0e, 0xbc, 0xe8, 0x97,
- 0x0e, 0xbc, 0xe0, 0x97, 0x0e, 0xbc, 0xd8, 0xc4, 0xda, 0xda, 0x0e, 0xbc,
- 0xd0, 0xc4, 0x8b, 0x49, 0x0e, 0xbc, 0xc8, 0xc3, 0x00, 0xb2, 0x0e, 0xbc,
- 0xc0, 0xc2, 0x00, 0xb3, 0x0e, 0xbc, 0xb1, 0xc6, 0x12, 0x12, 0x0e, 0xbc,
- 0xa0, 0xc3, 0x05, 0xe7, 0x0e, 0xbc, 0xa8, 0xc4, 0xd7, 0x88, 0x0e, 0xbc,
- 0x98, 0xc4, 0x32, 0x6d, 0x0e, 0xbc, 0x90, 0xc3, 0x05, 0xe7, 0x0e, 0xbc,
- 0x88, 0xc4, 0xd9, 0x9f, 0x0e, 0xbc, 0x80, 0xc3, 0x10, 0x99, 0x0e, 0xbc,
- 0x41, 0xc5, 0xde, 0x58, 0x0e, 0xbb, 0xf0, 0xc3, 0x10, 0x99, 0x0e, 0xbb,
- 0x71, 0xc5, 0xde, 0x58, 0x0e, 0xbb, 0x20, 0xc7, 0x00, 0x70, 0x0e, 0xbb,
- 0x38, 0x8e, 0x00, 0x6a, 0xb0, 0xc8, 0xb3, 0x5e, 0x0e, 0x8f, 0x41, 0xc9,
- 0xb1, 0xa4, 0x0e, 0x8f, 0x00, 0x50, 0x5b, 0xc2, 0xc3, 0x94, 0x6e, 0xcb,
- 0x92, 0x79, 0x0e, 0x8e, 0xf8, 0xc2, 0x01, 0xc7, 0x0e, 0x8f, 0x29, 0xc4,
- 0x03, 0x68, 0x0e, 0x8f, 0x20, 0xc5, 0x03, 0xe2, 0x0e, 0x8a, 0x39, 0xc5,
- 0x02, 0x31, 0x0e, 0x8a, 0x30, 0x47, 0xc1, 0x95, 0xc3, 0x94, 0x7a, 0x47,
- 0xc8, 0x80, 0x43, 0x94, 0x8c, 0x16, 0xc3, 0x94, 0x9e, 0x02, 0x43, 0x94,
- 0xaa, 0xc4, 0x03, 0x68, 0x0e, 0x89, 0x89, 0xc2, 0x01, 0xc7, 0x0e, 0x89,
- 0x80, 0xc7, 0xc8, 0xe9, 0x0e, 0x8d, 0x79, 0xc4, 0x00, 0xba, 0x0e, 0x8d,
- 0x70, 0xc7, 0x97, 0x39, 0x0e, 0x8e, 0xd0, 0xca, 0x63, 0x35, 0x0e, 0x8e,
- 0x5b, 0x03, 0x94, 0xb6, 0xc8, 0x63, 0x37, 0x0e, 0x8e, 0x50, 0xc8, 0x63,
- 0x37, 0x0e, 0x8e, 0x3b, 0x03, 0x94, 0xbc, 0xca, 0x63, 0x35, 0x0e, 0x8e,
- 0x40, 0xc2, 0x01, 0xc7, 0x0e, 0x8c, 0xd1, 0xc5, 0x00, 0xe2, 0x0e, 0x8c,
- 0xc8, 0x55, 0x32, 0xd4, 0xc3, 0x94, 0xc2, 0x4a, 0x32, 0xda, 0x43, 0x94,
- 0xce, 0xc4, 0x22, 0x4b, 0x0e, 0x8b, 0x11, 0xc4, 0x2b, 0xc8, 0x0e, 0x8a,
- 0x00, 0xc5, 0xdd, 0x04, 0x0e, 0x8e, 0xb9, 0xc3, 0x2f, 0x38, 0x0e, 0x8e,
- 0xa8, 0xc5, 0x03, 0xe2, 0x0e, 0x8a, 0xd9, 0xc5, 0x02, 0x31, 0x0e, 0x8a,
- 0xd0, 0x47, 0x11, 0x39, 0xc3, 0x94, 0xe6, 0xc8, 0xbd, 0x8d, 0x0e, 0x89,
- 0xa0, 0xc6, 0xd0, 0xbd, 0x0e, 0x8e, 0x89, 0xc6, 0xd1, 0x77, 0x0e, 0x8e,
- 0x80, 0xc8, 0xb7, 0x2d, 0x0e, 0x8c, 0xa9, 0xc5, 0x00, 0xe2, 0x0e, 0x8c,
- 0xa0, 0xc5, 0xde, 0x08, 0x0e, 0x89, 0x01, 0xc4, 0xe3, 0xc7, 0x0e, 0x88,
- 0xf8, 0xc4, 0x2b, 0xc8, 0x0e, 0x8e, 0x29, 0xc5, 0x03, 0xe2, 0x0e, 0x8d,
- 0xe0, 0x18, 0xc3, 0x95, 0x25, 0xc8, 0xba, 0xf5, 0x0e, 0x88, 0x90, 0xc3,
- 0x02, 0x5c, 0x0e, 0x88, 0xa9, 0x87, 0x0e, 0x88, 0xa0, 0xcf, 0x63, 0x35,
- 0x0e, 0x8e, 0x11, 0xcd, 0x63, 0x37, 0x0e, 0x8e, 0x08, 0xd0, 0x57, 0xf2,
- 0x0e, 0x88, 0xe9, 0xca, 0x6f, 0x79, 0x0e, 0x88, 0xc8, 0x4e, 0x73, 0x73,
- 0xc3, 0x95, 0x32, 0xca, 0x44, 0x9c, 0x0e, 0x88, 0x10, 0xc5, 0xde, 0x08,
- 0x0e, 0x89, 0x21, 0xc4, 0xe3, 0xc7, 0x0e, 0x89, 0x18, 0xc4, 0x63, 0xaf,
- 0x0e, 0x8d, 0xa8, 0x9e, 0x0e, 0x8d, 0x29, 0x9d, 0x0e, 0x8d, 0x20, 0xc4,
- 0x22, 0x4b, 0x0e, 0x8b, 0x21, 0xc4, 0x2b, 0xc8, 0x0e, 0x8a, 0x10, 0x4a,
- 0xa3, 0xf6, 0xc3, 0x95, 0x3e, 0xc5, 0x01, 0x49, 0x0e, 0x88, 0x40, 0xc4,
- 0x34, 0x39, 0x0e, 0x89, 0x99, 0xc5, 0xa5, 0xb8, 0x0e, 0x89, 0x90, 0xd3,
- 0x41, 0x88, 0x0f, 0xd1, 0x91, 0xcf, 0x18, 0x82, 0x0f, 0xd2, 0x18, 0xd0,
- 0x3a, 0x25, 0x01, 0x49, 0x71, 0xd0, 0x3a, 0x39, 0x01, 0x49, 0x88, 0xc6,
- 0x13, 0x6b, 0x01, 0x0f, 0x89, 0xc8, 0xc0, 0xfd, 0x01, 0x0d, 0xc0, 0x46,
- 0x00, 0x6b, 0x43, 0x95, 0x4a, 0x46, 0x00, 0x6b, 0x43, 0x95, 0x69, 0xc4,
- 0xe5, 0x9f, 0x00, 0xff, 0x59, 0x18, 0xc3, 0x95, 0x8d, 0xc6, 0x60, 0xe6,
- 0x00, 0xff, 0x49, 0x06, 0xc3, 0x95, 0x99, 0xc5, 0x63, 0xc6, 0x00, 0x1c,
- 0x70, 0xc4, 0xe5, 0x9f, 0x00, 0xfe, 0xd9, 0x18, 0xc3, 0x95, 0xa8, 0xc6,
- 0x60, 0xe6, 0x00, 0xfe, 0xc9, 0x06, 0xc3, 0x95, 0xb4, 0xc5, 0xd5, 0x8e,
- 0x00, 0xf9, 0xc3, 0x03, 0x95, 0xc3, 0xc5, 0x63, 0xc6, 0x00, 0x1c, 0x50,
- 0x46, 0x00, 0x6b, 0x43, 0x95, 0xc9, 0x46, 0x00, 0x6b, 0x43, 0x95, 0xe8,
- 0x46, 0x00, 0x6b, 0x43, 0x96, 0x0c, 0x46, 0x00, 0x6b, 0x43, 0x96, 0x2f,
- 0x46, 0x00, 0x6b, 0x43, 0x96, 0x5a, 0x06, 0xc3, 0x96, 0x7e, 0x12, 0xc3,
- 0x96, 0x90, 0xc6, 0x60, 0xe6, 0x00, 0xff, 0x09, 0x18, 0xc3, 0x96, 0x9f,
- 0xc4, 0xe5, 0x9f, 0x00, 0xfb, 0xd9, 0xc5, 0x63, 0xc6, 0x00, 0x1e, 0x68,
- 0xc5, 0x71, 0xcf, 0x00, 0xff, 0x29, 0xc5, 0xd5, 0x8e, 0x00, 0xff, 0x20,
- 0x06, 0xc3, 0x96, 0xab, 0x12, 0xc3, 0x96, 0xbd, 0xc6, 0x60, 0xe6, 0x00,
- 0xfe, 0x89, 0x18, 0xc3, 0x96, 0xcc, 0xc4, 0xe5, 0x9f, 0x00, 0xfb, 0xb9,
- 0xc5, 0x63, 0xc6, 0x00, 0x1d, 0x78, 0x46, 0x00, 0x6b, 0x43, 0x96, 0xd8,
- 0x46, 0x00, 0x6b, 0x43, 0x97, 0x03, 0x46, 0x00, 0x6b, 0x43, 0x97, 0x27,
- 0xc5, 0x7e, 0xe3, 0x00, 0x1e, 0xc9, 0xc5, 0x8a, 0x53, 0x00, 0x1b, 0x98,
- 0x90, 0x00, 0x1f, 0xd9, 0xc3, 0x8a, 0x55, 0x00, 0x1f, 0x08, 0xc2, 0x00,
- 0xcb, 0x00, 0xe9, 0x51, 0x8b, 0x00, 0xe9, 0x40, 0xc3, 0x03, 0x2c, 0x08,
- 0x0a, 0x09, 0x47, 0x0c, 0x4c, 0x43, 0x97, 0x53, 0xc7, 0xbf, 0xd6, 0x08,
- 0x0a, 0x69, 0xc7, 0x67, 0x1b, 0x08, 0x0a, 0xa0, 0x00, 0x43, 0x97, 0x5f,
- 0x00, 0x43, 0x97, 0x72, 0xc6, 0xbf, 0xd7, 0x08, 0x0a, 0x49, 0xcf, 0x67,
- 0x13, 0x08, 0x0a, 0xa8, 0x00, 0x43, 0x97, 0x7c, 0xc2, 0x01, 0x47, 0x08,
- 0x0a, 0xe1, 0xc2, 0x00, 0x4d, 0x08, 0x0b, 0x21, 0x0a, 0x43, 0x97, 0x88,
- 0xc3, 0x43, 0xcd, 0x08, 0x0b, 0x49, 0x43, 0x00, 0x98, 0x43, 0x97, 0x94,
- 0xc2, 0x00, 0x7b, 0x08, 0x0a, 0xfb, 0x03, 0x97, 0xa0, 0xc3, 0x43, 0xcd,
- 0x08, 0x0b, 0x32, 0x03, 0x97, 0xa6, 0xcf, 0x65, 0x8d, 0x08, 0x0b, 0x08,
- 0xd3, 0x46, 0x35, 0x08, 0x78, 0xe0, 0xd3, 0x46, 0x35, 0x08, 0x78, 0xb8,
- 0xd3, 0x46, 0x35, 0x08, 0x78, 0x80, 0xc3, 0xdf, 0x4a, 0x08, 0x78, 0xa9,
- 0xc4, 0xd9, 0x77, 0x08, 0x78, 0x88, 0xcc, 0x88, 0x90, 0x08, 0x78, 0x99,
- 0xc3, 0x37, 0x5d, 0x08, 0x78, 0x00, 0xc2, 0xe7, 0x79, 0x08, 0x1e, 0x49,
- 0xc2, 0x00, 0xa4, 0x08, 0x1e, 0x50, 0xc7, 0xc7, 0x61, 0x08, 0x1e, 0x62,
- 0x03, 0x97, 0xac, 0xc2, 0x01, 0x29, 0x08, 0x1e, 0x70, 0x91, 0x08, 0x1e,
- 0x91, 0xc4, 0x18, 0x85, 0x08, 0x1e, 0xa0, 0xc7, 0xc8, 0x10, 0x0e, 0x7d,
- 0xf1, 0x44, 0xe1, 0xeb, 0xc3, 0x97, 0xb2, 0xc9, 0x8e, 0x71, 0x0e, 0x7d,
- 0xb0, 0xd0, 0x5d, 0x72, 0x0e, 0x7d, 0x21, 0xd0, 0x2d, 0xe9, 0x0e, 0x7d,
- 0x08, 0xcb, 0x93, 0x29, 0x0e, 0x7c, 0x79, 0xc7, 0x78, 0x91, 0x0e, 0x7c,
- 0x48, 0x87, 0x00, 0xb3, 0x50, 0x87, 0x00, 0xb1, 0xb8, 0x8b, 0x00, 0xa7,
- 0x08, 0x91, 0x00, 0xa7, 0x28, 0x83, 0x00, 0xa7, 0x48, 0x8b, 0x00, 0xa2,
- 0xe0, 0x91, 0x00, 0xa3, 0x00, 0x83, 0x00, 0xa3, 0x20, 0x83, 0x00, 0xa9,
- 0xe0, 0x91, 0x00, 0xa9, 0xc0, 0x8b, 0x00, 0xa9, 0xa0, 0x83, 0x00, 0xa9,
- 0x20, 0x8b, 0x00, 0xa8, 0xe0, 0x91, 0x00, 0xa9, 0x00, 0x83, 0x00, 0xa8,
- 0x18, 0x8b, 0x00, 0xa7, 0xd8, 0x91, 0x00, 0xa7, 0xf8, 0x83, 0x00, 0xa2,
- 0x38, 0x91, 0x00, 0xa2, 0x18, 0x8b, 0x00, 0xa1, 0xf8, 0x8b, 0x00, 0xa5,
- 0x88, 0x91, 0x00, 0xa5, 0xa8, 0x83, 0x00, 0xa5, 0xc8, 0x83, 0x00, 0xb3,
- 0xe8, 0x91, 0x00, 0xb3, 0xd8, 0x8b, 0x00, 0xb3, 0xc8, 0x43, 0x00, 0x58,
- 0xc3, 0x97, 0xbf, 0xc4, 0x00, 0x96, 0x00, 0x1a, 0x80, 0x96, 0x01, 0x66,
- 0xa8, 0x96, 0x01, 0x66, 0xa0, 0xcd, 0x0d, 0x8f, 0x01, 0x92, 0x49, 0x87,
- 0x01, 0x92, 0x88, 0xc2, 0x01, 0x47, 0x01, 0x92, 0x91, 0xc4, 0x04, 0x5e,
- 0x01, 0x92, 0x98, 0xc3, 0x06, 0x9e, 0x01, 0x92, 0xa1, 0xc3, 0x0c, 0x5b,
- 0x01, 0x92, 0xa8, 0xc2, 0x26, 0x51, 0x01, 0x92, 0xb1, 0xc4, 0x18, 0x83,
- 0x01, 0x92, 0xb8, 0xcd, 0x0d, 0x8f, 0x01, 0x92, 0x51, 0x87, 0x01, 0x92,
- 0xd8, 0xc2, 0x01, 0x47, 0x01, 0x92, 0xe1, 0xc4, 0x04, 0x5e, 0x01, 0x92,
- 0xe8, 0xc3, 0x06, 0x9e, 0x01, 0x92, 0xf1, 0xc3, 0x0c, 0x5b, 0x01, 0x92,
- 0xf8, 0xc2, 0x26, 0x51, 0x01, 0x95, 0x89, 0xc4, 0x18, 0x83, 0x01, 0x95,
- 0x90, 0xcd, 0x0d, 0x8f, 0x01, 0x92, 0x59, 0x87, 0x01, 0x95, 0xb0, 0xc2,
- 0x01, 0x47, 0x01, 0x95, 0xb9, 0xc4, 0x04, 0x5e, 0x01, 0x95, 0xc0, 0xc3,
- 0x06, 0x9e, 0x01, 0x95, 0xc9, 0xc3, 0x0c, 0x5b, 0x01, 0x95, 0xd0, 0xc2,
- 0x26, 0x51, 0x01, 0x95, 0xd9, 0xc4, 0x18, 0x83, 0x01, 0x95, 0xe0, 0x46,
- 0x26, 0x11, 0x43, 0x97, 0xcb, 0xc2, 0x00, 0xc1, 0x09, 0x19, 0x69, 0xc2,
- 0x00, 0xa4, 0x09, 0x19, 0x60, 0xc9, 0xb5, 0x16, 0x09, 0x29, 0x79, 0xc2,
- 0x00, 0x51, 0x09, 0x15, 0x00, 0x8e, 0x09, 0x29, 0x21, 0x86, 0x09, 0x12,
- 0xb0, 0xc2, 0x00, 0xe5, 0x09, 0x29, 0x18, 0xc2, 0x00, 0xe5, 0x09, 0x12,
- 0xe3, 0x03, 0x97, 0xd7, 0xc3, 0x00, 0x56, 0x09, 0x12, 0xd8, 0xc9, 0x40,
- 0x88, 0x09, 0x12, 0xa8, 0xc8, 0xc0, 0x15, 0x09, 0x11, 0xd8, 0xc3, 0x3d,
- 0x52, 0x09, 0x28, 0xf1, 0xc3, 0x03, 0x87, 0x09, 0x10, 0x80, 0xd2, 0x34,
- 0xe4, 0x09, 0x28, 0xe8, 0xc2, 0x00, 0x92, 0x09, 0x28, 0xd9, 0xcb, 0x90,
- 0xa0, 0x09, 0x10, 0x18, 0xc2, 0x08, 0x07, 0x09, 0x1c, 0x59, 0x0b, 0x43,
- 0x97, 0xdd, 0x00, 0x43, 0x97, 0xe9, 0x97, 0x09, 0x10, 0x69, 0x87, 0x09,
- 0x10, 0x60, 0xc3, 0x01, 0x29, 0x09, 0x10, 0x51, 0xc9, 0x40, 0x88, 0x09,
- 0x10, 0x48, 0x8b, 0x09, 0x10, 0x41, 0x42, 0x01, 0xf2, 0x43, 0x97, 0xf5,
- 0xc4, 0xde, 0x30, 0x09, 0x28, 0xb1, 0x86, 0x09, 0x28, 0xa8, 0xc5, 0x39,
- 0x40, 0x09, 0x28, 0x88, 0xc4, 0xde, 0x30, 0x09, 0x28, 0x59, 0x86, 0x09,
- 0x28, 0x51, 0x9f, 0x09, 0x28, 0x48, 0x87, 0x09, 0x28, 0x41, 0xc2, 0x00,
- 0xbb, 0x09, 0x28, 0x38, 0xca, 0xa6, 0xee, 0x09, 0x27, 0xb1, 0x49, 0x34,
- 0xe1, 0xc3, 0x98, 0x00, 0xc3, 0x04, 0xca, 0x09, 0x27, 0x99, 0xc2, 0x0a,
- 0xad, 0x09, 0x27, 0x90, 0x8b, 0x09, 0x1c, 0x41, 0xc2, 0x04, 0xdd, 0x09,
- 0x0e, 0x33, 0x03, 0x98, 0x0c, 0x83, 0x09, 0x0e, 0x22, 0x03, 0x98, 0x12,
- 0xc2, 0x00, 0xe5, 0x09, 0x0f, 0x51, 0x86, 0x09, 0x0f, 0x49, 0xca, 0xa7,
- 0x84, 0x09, 0x0f, 0x41, 0x46, 0x26, 0x11, 0x43, 0x98, 0x16, 0xd8, 0x26,
- 0x10, 0x09, 0x0f, 0x21, 0x03, 0x43, 0x98, 0x20, 0xc2, 0x06, 0x1f, 0x09,
- 0x0f, 0x09, 0x0a, 0x43, 0x98, 0x2a, 0xc3, 0x79, 0x0e, 0x09, 0x0e, 0xd1,
- 0x87, 0x09, 0x0e, 0xc2, 0x03, 0x98, 0x3f, 0x97, 0x09, 0x0e, 0xb3, 0x03,
- 0x98, 0x45, 0xc3, 0x05, 0x1a, 0x09, 0x0e, 0xa9, 0xc4, 0x01, 0x28, 0x09,
- 0x0e, 0xa0, 0x17, 0xc3, 0x98, 0x49, 0x8b, 0x09, 0x0e, 0x7a, 0x03, 0x98,
- 0x54, 0x8f, 0x09, 0x0e, 0x63, 0x03, 0x98, 0x58, 0xc7, 0x66, 0xc1, 0x09,
- 0x0e, 0x58, 0xcb, 0x90, 0xab, 0x09, 0x0e, 0x51, 0x83, 0x09, 0x0e, 0x42,
- 0x03, 0x98, 0x5e, 0x8b, 0x09, 0x0e, 0x09, 0xc2, 0x01, 0xf2, 0x09, 0x0e,
- 0x00, 0xcc, 0x82, 0x30, 0x09, 0x0d, 0xf9, 0x90, 0x09, 0x0d, 0xf1, 0x8e,
- 0x09, 0x0d, 0xe9, 0x46, 0x26, 0x11, 0x43, 0x98, 0x62, 0xcd, 0x4d, 0x99,
- 0x09, 0x0b, 0x51, 0xc8, 0x55, 0x77, 0x09, 0x0b, 0x48, 0xd2, 0x4d, 0x94,
- 0x09, 0x26, 0x59, 0xc4, 0x3d, 0x51, 0x09, 0x08, 0xa1, 0xc3, 0x61, 0x9a,
- 0x09, 0x08, 0x98, 0x0b, 0xc3, 0x98, 0x74, 0x87, 0x09, 0x07, 0x2a, 0x03,
- 0x98, 0x7c, 0x94, 0x09, 0x07, 0x21, 0x8e, 0x09, 0x07, 0x18, 0x46, 0x26,
- 0x11, 0x43, 0x98, 0x82, 0xc9, 0x1f, 0xd2, 0x09, 0x07, 0x08, 0x8f, 0x09,
- 0x26, 0x02, 0x03, 0x98, 0x8e, 0xd0, 0x5a, 0x32, 0x09, 0x25, 0xf9, 0xc9,
- 0xb0, 0xa8, 0x09, 0x06, 0xe0, 0xc9, 0xb3, 0xdb, 0x09, 0x06, 0xd8, 0xc4,
- 0x47, 0x42, 0x09, 0x06, 0xc9, 0x8d, 0x09, 0x06, 0xc0, 0x46, 0x26, 0x11,
- 0xc3, 0x98, 0x94, 0x8e, 0x09, 0x06, 0x92, 0x03, 0x98, 0x9e, 0x94, 0x09,
- 0x06, 0x63, 0x03, 0x98, 0xa4, 0xc7, 0x5b, 0xdb, 0x09, 0x06, 0x58, 0xca,
- 0xa9, 0x3c, 0x09, 0x06, 0x81, 0xa1, 0x09, 0x06, 0x72, 0x03, 0x98, 0xaa,
- 0xd0, 0x5b, 0xd2, 0x09, 0x06, 0x50, 0xc8, 0xb3, 0xdb, 0x09, 0x06, 0x40,
- 0x48, 0x6e, 0x0f, 0xc3, 0x98, 0xb0, 0x84, 0x09, 0x06, 0x30, 0x42, 0x00,
- 0x3b, 0x43, 0x98, 0xbc, 0xc4, 0x3e, 0xa9, 0x09, 0x25, 0xb1, 0xc9, 0xb5,
- 0xdc, 0x09, 0x06, 0x01, 0x86, 0x09, 0x05, 0xf8, 0xc8, 0xb5, 0xdd, 0x09,
- 0x06, 0x10, 0x9f, 0x09, 0x1b, 0xd2, 0x03, 0x98, 0xc8, 0xd0, 0x59, 0xa2,
- 0x09, 0x1b, 0xc8, 0xc3, 0x04, 0xca, 0x09, 0x05, 0xd1, 0xc2, 0x00, 0xa4,
- 0x09, 0x05, 0xc9, 0xca, 0x9d, 0x48, 0x09, 0x05, 0xc0, 0xc8, 0xc0, 0x0d,
- 0x09, 0x07, 0x60, 0xca, 0x54, 0x87, 0x09, 0x25, 0x00, 0xcc, 0x5a, 0x36,
- 0x09, 0x24, 0xe8, 0xc4, 0x47, 0xd6, 0x09, 0x1b, 0x99, 0xc4, 0xe1, 0xdf,
- 0x09, 0x03, 0x60, 0x8f, 0x09, 0x03, 0x39, 0xcb, 0x96, 0x6d, 0x09, 0x03,
- 0x30, 0xc2, 0x3e, 0xab, 0x09, 0x02, 0xf0, 0xca, 0x96, 0x6d, 0x09, 0x02,
- 0xe0, 0xc8, 0x1e, 0x43, 0x00, 0x26, 0xe9, 0xc8, 0x23, 0xe0, 0x00, 0x24,
- 0xb8, 0x00, 0x43, 0x98, 0xce, 0x00, 0x43, 0x98, 0xf2, 0x14, 0xc3, 0x99,
- 0x26, 0xc6, 0x14, 0xea, 0x0e, 0xc6, 0x61, 0x46, 0x0d, 0xe0, 0xc3, 0x99,
- 0x32, 0xc2, 0x01, 0xc7, 0x0e, 0xc6, 0x33, 0x03, 0x99, 0x48, 0xc4, 0x03,
- 0x68, 0x0e, 0xc6, 0x21, 0xcf, 0x62, 0x63, 0x0e, 0xc0, 0xe0, 0xc5, 0x0d,
- 0xe0, 0x0e, 0xc5, 0xc1, 0xc5, 0x08, 0x42, 0x0e, 0xc5, 0xb9, 0xc6, 0x00,
- 0x50, 0x0e, 0xc5, 0xa3, 0x03, 0x99, 0x4e, 0xc6, 0x14, 0xea, 0x0e, 0xc5,
- 0x81, 0xce, 0x3d, 0x1e, 0x0e, 0xc5, 0x79, 0xc2, 0x01, 0xc7, 0x0e, 0xc5,
- 0x71, 0xc4, 0x03, 0x68, 0x0e, 0xc5, 0x58, 0xc5, 0x08, 0x42, 0x0e, 0xc5,
- 0x03, 0x03, 0x99, 0x52, 0x16, 0xc3, 0x99, 0x58, 0xc4, 0x17, 0x9a, 0x0e,
- 0xc4, 0xc1, 0xce, 0x3d, 0x1e, 0x0e, 0xc4, 0xb9, 0xc2, 0x01, 0xc7, 0x0e,
- 0xc4, 0x91, 0xc4, 0x03, 0x68, 0x0e, 0xc4, 0x72, 0x03, 0x99, 0x64, 0xc6,
- 0x14, 0xea, 0x0e, 0xc3, 0x29, 0xc6, 0x02, 0x21, 0x0e, 0xc3, 0x13, 0x03,
- 0x99, 0x68, 0xd0, 0x59, 0x62, 0x0e, 0xc3, 0x08, 0xc7, 0x29, 0xd4, 0x0e,
- 0xc3, 0x01, 0xc4, 0x17, 0x9a, 0x0e, 0xc2, 0xf9, 0xc4, 0x0d, 0xf4, 0x0e,
- 0xc2, 0xe8, 0x00, 0x43, 0x99, 0x71, 0xd2, 0x26, 0xcd, 0x0e, 0xc2, 0x63,
- 0x03, 0x99, 0x80, 0xcb, 0x17, 0x84, 0x0e, 0xc2, 0x22, 0x03, 0x99, 0x84,
- 0xc5, 0x0d, 0xe0, 0x0e, 0xc7, 0xa3, 0x03, 0x99, 0x88, 0xcb, 0x14, 0xe5,
- 0x0e, 0xc6, 0x1b, 0x03, 0x99, 0x8c, 0x47, 0x00, 0x50, 0x43, 0x99, 0x92,
- 0xc2, 0x00, 0x34, 0x0e, 0xc6, 0x99, 0xc3, 0x00, 0x83, 0x0e, 0xc6, 0x90,
- 0xd2, 0x4c, 0x62, 0x0e, 0xc4, 0xfa, 0x03, 0x99, 0x9e, 0x00, 0x43, 0x99,
- 0xa4, 0xcc, 0x14, 0xe4, 0x0e, 0xc6, 0x88, 0xdd, 0x10, 0xc3, 0x0e, 0xc5,
- 0x60, 0x00, 0x43, 0x99, 0xbf, 0xd3, 0x45, 0x77, 0x0e, 0xc4, 0x21, 0xc4,
- 0x0d, 0xf4, 0x0e, 0xc4, 0x02, 0x03, 0x99, 0xce, 0x00, 0x43, 0x99, 0xd4,
- 0xd7, 0x26, 0xcd, 0x0e, 0xc2, 0xa9, 0xd5, 0x17, 0x84, 0x0e, 0xc2, 0x58,
- 0xd5, 0x14, 0xe5, 0x0e, 0xc6, 0xd3, 0x03, 0x99, 0xe0, 0xc5, 0x0d, 0xe0,
- 0x0e, 0xc6, 0x50, 0xc5, 0x15, 0xf9, 0x0e, 0xc5, 0xf9, 0xc2, 0x00, 0x34,
- 0x0e, 0xc5, 0xf1, 0xc3, 0x00, 0x83, 0x0e, 0xc5, 0xe8, 0xc5, 0x08, 0x42,
- 0x0e, 0xc0, 0x13, 0x03, 0x99, 0xe4, 0xd2, 0x14, 0xde, 0x0e, 0xc6, 0x81,
- 0x46, 0x0d, 0xe0, 0xc3, 0x99, 0xe8, 0xc4, 0x01, 0x75, 0x0e, 0xc3, 0x63,
- 0x03, 0x99, 0xf4, 0xc8, 0xb7, 0x55, 0x0e, 0xc3, 0x89, 0xd3, 0x40, 0xa4,
- 0x0e, 0xc2, 0x9a, 0x03, 0x99, 0xf8, 0xd5, 0x36, 0x46, 0x0e, 0xc6, 0x79,
- 0xd4, 0x38, 0xf5, 0x0e, 0xc5, 0xe1, 0xc4, 0x01, 0x75, 0x0e, 0xc3, 0xa0,
- 0xc5, 0x33, 0xad, 0x0e, 0xc6, 0xb8, 0xc7, 0x29, 0xd4, 0x0e, 0xc3, 0x49,
- 0xc4, 0x0d, 0xf4, 0x0e, 0xc3, 0x38, 0xcb, 0x14, 0xe5, 0x0e, 0xc6, 0x73,
- 0x03, 0x99, 0xfe, 0xc2, 0x01, 0xc7, 0x0e, 0xc6, 0x38, 0x00, 0x43, 0x9a,
- 0x04, 0xc5, 0x08, 0x42, 0x0e, 0xc5, 0x09, 0xc2, 0x01, 0xc7, 0x0e, 0xc4,
- 0xa0, 0xc5, 0x17, 0xef, 0x0e, 0xce, 0x89, 0xc5, 0x04, 0x73, 0x0e, 0xce,
- 0x80, 0xc5, 0x17, 0xef, 0x0e, 0xce, 0x11, 0xc5, 0x04, 0x73, 0x0e, 0xce,
- 0x08, 0xc2, 0x00, 0x15, 0x0e, 0xcb, 0x40, 0xc6, 0x05, 0x96, 0x0e, 0xce,
- 0x79, 0xc6, 0x24, 0x18, 0x0e, 0xce, 0x68, 0xc6, 0x05, 0x96, 0x0e, 0xce,
- 0x71, 0xc6, 0x24, 0x18, 0x0e, 0xce, 0x60, 0xc6, 0x05, 0x96, 0x0e, 0xce,
- 0x01, 0xc6, 0x24, 0x18, 0x0e, 0xcd, 0xf0, 0xc6, 0x05, 0x96, 0x0e, 0xcd,
- 0xf9, 0xc6, 0x24, 0x18, 0x0e, 0xcd, 0xe8, 0xcc, 0x85, 0xb4, 0x0e, 0xce,
- 0x59, 0xcc, 0x82, 0x9c, 0x0e, 0xce, 0x50, 0xc6, 0x2f, 0x01, 0x0e, 0xcd,
- 0xe1, 0xc6, 0x05, 0x96, 0x0e, 0xcd, 0xd0, 0xc6, 0x2f, 0x01, 0x0e, 0xcd,
- 0xd9, 0xc6, 0x05, 0x96, 0x0e, 0xcd, 0xc8, 0xc5, 0x17, 0xef, 0x0e, 0xce,
- 0x39, 0xc5, 0x04, 0x73, 0x0e, 0xce, 0x30, 0xc5, 0x17, 0xef, 0x0e, 0xcd,
- 0xc1, 0xc5, 0x04, 0x73, 0x0e, 0xcd, 0xb8, 0xc5, 0x17, 0xef, 0x0e, 0xcc,
- 0xf1, 0xc6, 0x06, 0x1b, 0x0e, 0xcc, 0xe9, 0xc5, 0x04, 0x73, 0x0e, 0xcc,
- 0xe0, 0xc5, 0x17, 0xef, 0x0e, 0xcc, 0xd9, 0xc6, 0x06, 0x1b, 0x0e, 0xcc,
- 0xd1, 0xc5, 0x04, 0x73, 0x0e, 0xcc, 0xc8, 0x47, 0x1f, 0x30, 0xc3, 0x9a,
- 0x1f, 0x4b, 0x27, 0x8c, 0x43, 0x9a, 0x2b, 0xcb, 0x8f, 0x14, 0x0e, 0xcc,
- 0xf9, 0x53, 0x44, 0x21, 0x43, 0x9a, 0x40, 0xc5, 0x17, 0xef, 0x0e, 0xcc,
- 0x53, 0x03, 0x9a, 0x4c, 0xc6, 0x06, 0x1b, 0x0e, 0xcc, 0x49, 0xc5, 0x04,
- 0x73, 0x0e, 0xcc, 0x40, 0xc2, 0x00, 0x15, 0x0e, 0xc9, 0x68, 0x45, 0x00,
- 0x6c, 0xc3, 0x9a, 0x52, 0xc6, 0x11, 0xa5, 0x01, 0x5b, 0x99, 0x4a, 0x01,
- 0x6b, 0x43, 0x9a, 0x7c, 0xe0, 0x02, 0xc7, 0x01, 0x4b, 0x28, 0xd0, 0x5e,
- 0x72, 0x0f, 0xc1, 0x91, 0xcb, 0x5e, 0x77, 0x0f, 0xc1, 0x71, 0xca, 0x9f,
- 0xd2, 0x0f, 0xc1, 0x51, 0x47, 0x06, 0xf1, 0xc3, 0x9a, 0x82, 0x49, 0xab,
- 0x7d, 0xc3, 0x9a, 0x8e, 0xcc, 0x86, 0xb0, 0x0f, 0xc1, 0x11, 0xcc, 0x82,
- 0x84, 0x0f, 0xc1, 0x30, 0xe0, 0x05, 0xa7, 0x01, 0x5c, 0x10, 0x46, 0x00,
- 0x6b, 0x43, 0x9a, 0x9a, 0xe0, 0x05, 0x47, 0x01, 0x4b, 0x48, 0x0e, 0xc3,
- 0x9a, 0xa6, 0x14, 0x43, 0x9a, 0xb2, 0x90, 0x00, 0x70, 0x81, 0xc3, 0x00,
- 0xa4, 0x00, 0x70, 0xb8, 0xca, 0x2b, 0x13, 0x07, 0xea, 0xc1, 0xcc, 0x12,
- 0x30, 0x07, 0xea, 0xc8, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x51, 0xcc, 0x12,
- 0x30, 0x07, 0xe9, 0x90, 0x0b, 0xc3, 0x9a, 0xb8, 0xca, 0x2b, 0x13, 0x07,
- 0xe9, 0x31, 0xcb, 0x66, 0x54, 0x07, 0xe9, 0xc1, 0x45, 0x00, 0x6c, 0x43,
- 0x9a, 0xc4, 0xcb, 0x12, 0x31, 0x07, 0xe9, 0x81, 0xcc, 0x05, 0x7b, 0x07,
- 0xe8, 0x60, 0x45, 0x51, 0xe9, 0xc3, 0x9a, 0xd0, 0x45, 0x19, 0x9d, 0x43,
- 0x9a, 0xdc, 0xcb, 0x12, 0x31, 0x07, 0xe9, 0x69, 0xcc, 0x05, 0x7b, 0x07,
- 0xe8, 0x48, 0xcb, 0x12, 0x31, 0x07, 0xe9, 0x79, 0xcc, 0x05, 0x7b, 0x07,
- 0xe8, 0x58, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0xa1, 0xcd, 0x05, 0x7a, 0x07,
- 0xe3, 0x10, 0xcb, 0x66, 0x54, 0x07, 0xe7, 0x99, 0xcd, 0x05, 0x7a, 0x07,
- 0xe3, 0x08, 0xca, 0x2b, 0x13, 0x07, 0xea, 0xf9, 0xcc, 0x12, 0x30, 0x07,
- 0xeb, 0x00, 0xca, 0x2b, 0x13, 0x07, 0xeb, 0x11, 0xcc, 0x12, 0x30, 0x07,
- 0xeb, 0x18, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0xe9, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0x70, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x09, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0x98, 0xca, 0x2b, 0x13, 0x07, 0xeb, 0x31, 0xcc, 0x12, 0x30, 0x07,
- 0xee, 0x28, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0x01, 0xcb, 0x12, 0x31, 0x07,
- 0xe5, 0x88, 0x44, 0x19, 0xa7, 0xc3, 0x9a, 0xe8, 0xce, 0x43, 0xed, 0x07,
- 0xed, 0x48, 0xd3, 0x44, 0xa6, 0x07, 0xea, 0x31, 0x0a, 0x43, 0x9a, 0xf4,
- 0x47, 0xa9, 0x21, 0xc3, 0x9b, 0x00, 0xcd, 0x05, 0x7a, 0x07, 0xef, 0xc8,
- 0xca, 0x2b, 0x13, 0x07, 0xeb, 0xb1, 0xcc, 0x12, 0x30, 0x07, 0xeb, 0xb8,
- 0x8f, 0x07, 0xea, 0x39, 0xcd, 0x7b, 0x81, 0x07, 0xea, 0x50, 0xca, 0x8b,
- 0xb6, 0x07, 0xea, 0x41, 0xcc, 0x8b, 0xb4, 0x07, 0xea, 0x48, 0xcc, 0x05,
- 0x7b, 0x07, 0xe1, 0x39, 0xcb, 0x12, 0x31, 0x07, 0xe9, 0x98, 0x44, 0x19,
- 0xa7, 0xc3, 0x9b, 0x06, 0xd1, 0x55, 0x4c, 0x07, 0xeb, 0x99, 0xce, 0x43,
- 0xed, 0x07, 0xeb, 0xa0, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0x91, 0xcb, 0x12,
- 0x31, 0x07, 0xe5, 0x30, 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0x61, 0xcb, 0x12,
- 0x31, 0x07, 0xe5, 0x10, 0x45, 0x2d, 0xfe, 0xc3, 0x9b, 0x12, 0xd1, 0x55,
- 0x4c, 0x07, 0xea, 0x98, 0x43, 0x2e, 0x61, 0xc3, 0x9b, 0x1e, 0x42, 0x01,
- 0x33, 0x43, 0x9b, 0x2a, 0x44, 0x08, 0x1b, 0xc3, 0x9b, 0x36, 0x42, 0x00,
- 0x55, 0x43, 0x9b, 0x48, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0x31, 0x0b, 0xc3,
- 0x9b, 0x54, 0xcb, 0x66, 0x54, 0x07, 0xe6, 0xf8, 0x44, 0x22, 0x95, 0xc3,
- 0x9b, 0x60, 0x43, 0x2e, 0x61, 0x43, 0x9b, 0x6c, 0xcc, 0x05, 0x7b, 0x07,
- 0xe0, 0x01, 0xcb, 0x12, 0x31, 0x07, 0xe4, 0xb8, 0x0b, 0xc3, 0x9b, 0x78,
- 0xca, 0x2b, 0x13, 0x07, 0xdf, 0xb8, 0xca, 0x2b, 0x13, 0x07, 0xdf, 0x99,
- 0xcd, 0x05, 0x7a, 0x07, 0xdf, 0x90, 0xca, 0x2b, 0x13, 0x07, 0xdf, 0x89,
- 0xcd, 0x05, 0x7a, 0x07, 0xdf, 0x80, 0xca, 0x2b, 0x13, 0x07, 0xdf, 0x79,
- 0xcd, 0x05, 0x7a, 0x07, 0xdf, 0x70, 0xcc, 0x05, 0x7b, 0x07, 0xe2, 0xb1,
- 0xcb, 0x12, 0x31, 0x07, 0xe6, 0xd8, 0xca, 0x2b, 0x13, 0x07, 0xed, 0xd9,
- 0xcc, 0x12, 0x30, 0x07, 0xee, 0x18, 0xcd, 0x05, 0x7a, 0x07, 0xf7, 0xc9,
- 0xca, 0x2b, 0x13, 0x07, 0xf7, 0xd0, 0xcd, 0x05, 0x7a, 0x07, 0xf7, 0xb9,
- 0xca, 0x2b, 0x13, 0x07, 0xf7, 0xc0, 0xca, 0x2b, 0x13, 0x07, 0xec, 0x01,
- 0xcc, 0x12, 0x30, 0x07, 0xed, 0xa8, 0xcc, 0x05, 0x7b, 0x07, 0xe1, 0xa1,
- 0xcb, 0x12, 0x31, 0x07, 0xe6, 0x18, 0x44, 0x19, 0xa7, 0xc3, 0x9b, 0x84,
- 0xcf, 0x6b, 0xff, 0x07, 0xeb, 0xf9, 0xce, 0x43, 0xed, 0x07, 0xed, 0x90,
- 0xcc, 0x05, 0x7b, 0x07, 0xe0, 0x31, 0xcb, 0x12, 0x31, 0x07, 0xe4, 0xe8,
- 0xc2, 0x14, 0x40, 0x07, 0xea, 0x20, 0xcb, 0x12, 0x31, 0x07, 0xdf, 0xf1,
- 0xcc, 0x05, 0x7b, 0x07, 0xdf, 0xe0, 0x16, 0xc3, 0x9b, 0x90, 0xca, 0x36,
- 0x8a, 0x00, 0x31, 0xe9, 0x5c, 0x11, 0xe5, 0x43, 0x9b, 0x9c, 0x44, 0x01,
- 0xb8, 0xc3, 0x9b, 0xa6, 0x16, 0x43, 0x9b, 0xb5, 0xcc, 0x05, 0x7b, 0x07,
- 0xf6, 0x89, 0xcb, 0x12, 0x31, 0x07, 0xf6, 0x98, 0xd0, 0x0e, 0xba, 0x00,
- 0x46, 0x19, 0xc9, 0x0e, 0xac, 0x00, 0x37, 0xe0, 0xcc, 0x05, 0x7b, 0x07,
- 0xf6, 0x69, 0xcb, 0x12, 0x31, 0x07, 0xf6, 0x78, 0xcf, 0x63, 0x44, 0x00,
- 0x45, 0x81, 0x16, 0xc3, 0x9b, 0xc1, 0xc4, 0x00, 0x5b, 0x00, 0x35, 0x80,
- 0xcb, 0x12, 0x31, 0x07, 0xdc, 0xa1, 0xcc, 0x05, 0x7b, 0x07, 0xdc, 0x90,
- 0xcb, 0x12, 0x31, 0x07, 0xdc, 0xc1, 0xcc, 0x05, 0x7b, 0x07, 0xdc, 0xb0,
- 0x46, 0x04, 0x73, 0xc3, 0x9b, 0xcd, 0x42, 0x01, 0x4a, 0xc3, 0x9b, 0xd7,
- 0x4b, 0x04, 0x5d, 0xc3, 0x9b, 0xe3, 0xc3, 0x00, 0xe8, 0x00, 0x3b, 0x50,
- 0xcc, 0x05, 0x7b, 0x07, 0xf6, 0xe9, 0xcb, 0x12, 0x31, 0x07, 0xf6, 0xf8,
- 0x4a, 0x04, 0x5e, 0xc3, 0x9b, 0xef, 0xcd, 0x01, 0x47, 0x00, 0x45, 0x10,
- 0xcc, 0x05, 0x7b, 0x07, 0xf4, 0xe9, 0xcb, 0x12, 0x31, 0x07, 0xf4, 0xf8,
- 0x4a, 0x04, 0x5e, 0xc3, 0x9b, 0xfb, 0x48, 0x01, 0x47, 0x43, 0x9c, 0x0d,
- 0xcc, 0x05, 0x7b, 0x07, 0xf6, 0x49, 0xcb, 0x12, 0x31, 0x07, 0xf6, 0x58,
- 0x44, 0x00, 0x6d, 0xc3, 0x9c, 0x19, 0xc4, 0x97, 0xad, 0x00, 0x33, 0x8a,
- 0x03, 0x9c, 0x4f, 0x00, 0x43, 0x9c, 0x53, 0xc7, 0x2c, 0x1e, 0x00, 0x46,
- 0x11, 0x16, 0xc3, 0x9c, 0x5f, 0xc9, 0x17, 0x7a, 0x00, 0x3b, 0x10, 0xcc,
- 0x05, 0x7b, 0x07, 0xdc, 0x71, 0xcb, 0x12, 0x31, 0x07, 0xdc, 0x80, 0x45,
- 0x00, 0x6c, 0xc3, 0x9c, 0x6b, 0x0b, 0xc3, 0x9c, 0x7b, 0xcb, 0x66, 0x54,
- 0x07, 0xf6, 0xe1, 0xca, 0x2b, 0x13, 0x07, 0xf6, 0xd0, 0xca, 0x2b, 0x13,
- 0x07, 0xdf, 0x19, 0xcd, 0x05, 0x7a, 0x07, 0xdf, 0x10, 0xca, 0x2b, 0x13,
- 0x07, 0xdf, 0x09, 0xcd, 0x05, 0x7a, 0x07, 0xdf, 0x00, 0xcc, 0x05, 0x7b,
- 0x07, 0xf5, 0x29, 0xcb, 0x12, 0x31, 0x07, 0xf5, 0x38, 0xc7, 0x2c, 0x1e,
- 0x00, 0x46, 0x09, 0xc9, 0x17, 0x7a, 0x00, 0x35, 0xf8, 0xcb, 0x12, 0x31,
- 0x07, 0xdb, 0xe1, 0xcc, 0x05, 0x7b, 0x07, 0xdb, 0xd0, 0xcb, 0x66, 0x54,
- 0x07, 0xdc, 0x09, 0x0b, 0xc3, 0x9c, 0x87, 0xca, 0x2b, 0x13, 0x07, 0xdb,
- 0xf8, 0xcb, 0x12, 0x31, 0x07, 0xdb, 0x41, 0xcc, 0x05, 0x7b, 0x07, 0xdb,
- 0x30, 0x0b, 0xc3, 0x9c, 0x93, 0xca, 0x2b, 0x13, 0x07, 0xda, 0xf9, 0xcb,
- 0x66, 0x54, 0x07, 0xdb, 0x08, 0x46, 0x04, 0x73, 0xc3, 0x9c, 0x9f, 0xc4,
- 0x00, 0x5b, 0x00, 0x33, 0xe1, 0xda, 0x1a, 0xbb, 0x00, 0x33, 0xe8, 0xc6,
- 0xcf, 0x61, 0x00, 0x31, 0x4b, 0x03, 0x9c, 0xa9, 0xca, 0x66, 0x55, 0x07,
- 0xf4, 0xc0, 0xcc, 0x05, 0x7b, 0x07, 0xf4, 0xa9, 0xcb, 0x12, 0x31, 0x07,
- 0xf4, 0xb8, 0xcb, 0x66, 0x54, 0x07, 0xdb, 0x29, 0x0b, 0xc3, 0x9c, 0xad,
- 0xca, 0x2b, 0x13, 0x07, 0xdb, 0x18, 0x16, 0xc3, 0x9c, 0xb9, 0xc9, 0x0e,
- 0xac, 0x00, 0x44, 0x58, 0xcc, 0x05, 0x7b, 0x07, 0xf6, 0x09, 0xcb, 0x12,
- 0x31, 0x07, 0xf6, 0x18, 0xcd, 0x05, 0x7a, 0x07, 0xf5, 0x59, 0xca, 0x2b,
- 0x13, 0x07, 0xf5, 0x60, 0x0b, 0xc3, 0x9c, 0xc5, 0xca, 0x2b, 0x13, 0x07,
- 0xf4, 0xd1, 0xcb, 0x66, 0x54, 0x07, 0xf4, 0xe0, 0xcb, 0x12, 0x31, 0x07,
- 0xdb, 0x81, 0xcc, 0x05, 0x7b, 0x07, 0xdb, 0x70, 0x16, 0xc3, 0x9c, 0xd1,
- 0xc7, 0x2c, 0x1e, 0x00, 0x36, 0x71, 0xcb, 0x09, 0x89, 0x00, 0x31, 0x32,
- 0x03, 0x9c, 0xe3, 0x00, 0x43, 0x9c, 0xe7, 0xcc, 0x05, 0x7b, 0x07, 0xf7,
- 0x89, 0xcb, 0x12, 0x31, 0x07, 0xf7, 0x98, 0x15, 0xc3, 0x9c, 0xf9, 0xc4,
- 0xab, 0x3a, 0x00, 0x45, 0x51, 0xca, 0x36, 0x8a, 0x00, 0x37, 0x79, 0xcf,
- 0x38, 0x82, 0x00, 0x34, 0xc9, 0x49, 0x01, 0x59, 0xc3, 0x9d, 0x05, 0xc9,
- 0x0e, 0xac, 0x00, 0x34, 0xa3, 0x03, 0x9d, 0x11, 0xc4, 0x00, 0x5b, 0x00,
- 0x34, 0x99, 0xcb, 0x09, 0x89, 0x00, 0x3b, 0x60, 0xcc, 0x05, 0x7b, 0x07,
- 0xdd, 0x01, 0xcb, 0x12, 0x31, 0x07, 0xdd, 0x10, 0x46, 0x04, 0x73, 0xc3,
- 0x9d, 0x17, 0xcb, 0x09, 0x89, 0x00, 0x45, 0x09, 0xd6, 0x2c, 0x99, 0x00,
- 0x3a, 0xa9, 0x16, 0xc3, 0x9d, 0x24, 0xde, 0x0e, 0xac, 0x00, 0x3a, 0x88,
- 0xcc, 0x05, 0x7b, 0x07, 0xf4, 0x79, 0xcb, 0x12, 0x31, 0x07, 0xf4, 0x88,
- 0xcb, 0x66, 0x54, 0x07, 0xda, 0xe9, 0x0b, 0xc3, 0x9d, 0x30, 0xca, 0x2b,
- 0x13, 0x07, 0xda, 0xd8, 0xcb, 0x12, 0x31, 0x07, 0xda, 0xa1, 0xcc, 0x05,
- 0x7b, 0x07, 0xda, 0x90, 0xc5, 0x01, 0x62, 0x00, 0x45, 0x2b, 0x03, 0x9d,
- 0x3c, 0xc5, 0x00, 0x95, 0x00, 0x35, 0x38, 0xcc, 0x05, 0x7b, 0x07, 0xf6,
- 0x29, 0xcb, 0x12, 0x31, 0x07, 0xf6, 0x38, 0x4a, 0x04, 0x5e, 0xc3, 0x9d,
- 0x42, 0xcd, 0x01, 0x5a, 0x00, 0x34, 0xe8, 0xcc, 0x05, 0x7b, 0x07, 0xf5,
- 0xc9, 0xcb, 0x12, 0x31, 0x07, 0xf5, 0xd8, 0xcc, 0x05, 0x7b, 0x07, 0xf5,
- 0xa9, 0xcb, 0x12, 0x31, 0x07, 0xf5, 0xb8, 0x16, 0xc3, 0x9d, 0x4e, 0xd7,
- 0x2a, 0xd8, 0x00, 0x34, 0xd1, 0xca, 0x36, 0x8a, 0x00, 0x3b, 0xf1, 0x46,
- 0x0a, 0xdf, 0xc3, 0x9d, 0x5d, 0xcf, 0x38, 0x82, 0x00, 0x3a, 0xe1, 0x44,
- 0x04, 0x73, 0x43, 0x9d, 0x63, 0xcc, 0x05, 0x7b, 0x07, 0xf5, 0x89, 0xcb,
- 0x12, 0x31, 0x07, 0xf5, 0x98, 0x45, 0x00, 0x6c, 0xc3, 0x9d, 0x69, 0xcd,
- 0x05, 0x7a, 0x07, 0xf5, 0x49, 0xca, 0x2b, 0x13, 0x07, 0xf5, 0x50, 0xca,
- 0x2b, 0x13, 0x07, 0xdc, 0x29, 0xcd, 0x05, 0x7a, 0x07, 0xdc, 0x20, 0xce,
- 0x70, 0x63, 0x00, 0x37, 0xd9, 0x0b, 0xc3, 0x9d, 0x88, 0xca, 0x2b, 0x13,
- 0x07, 0xf5, 0xf1, 0xcb, 0x66, 0x54, 0x07, 0xf6, 0x00, 0xca, 0x2b, 0x13,
- 0x07, 0xdc, 0x49, 0xcd, 0x05, 0x7a, 0x07, 0xdc, 0x40, 0xca, 0x2b, 0x13,
- 0x07, 0xdc, 0x19, 0xcd, 0x05, 0x7a, 0x07, 0xdc, 0x10, 0xcb, 0x12, 0x31,
- 0x07, 0xdb, 0xa1, 0xcc, 0x05, 0x7b, 0x07, 0xdb, 0x90, 0xcb, 0x12, 0x31,
- 0x07, 0xdb, 0x61, 0xcc, 0x05, 0x7b, 0x07, 0xdb, 0x50, 0xc6, 0x1d, 0x7a,
- 0x00, 0x45, 0x59, 0xc5, 0x00, 0x95, 0x00, 0x36, 0x78, 0x00, 0x43, 0x9d,
- 0x94, 0xc8, 0xbc, 0x5d, 0x00, 0x3b, 0xc1, 0xca, 0x9f, 0xaa, 0x00, 0x3b,
- 0xc8, 0xd0, 0x0e, 0xba, 0x00, 0x45, 0x39, 0x44, 0x01, 0xb8, 0x43, 0x9d,
- 0xa0, 0xcc, 0x05, 0x7b, 0x07, 0xf7, 0x09, 0xcb, 0x12, 0x31, 0x07, 0xf7,
- 0x18, 0xcb, 0x12, 0x31, 0x07, 0xde, 0xa9, 0xcc, 0x05, 0x7b, 0x07, 0xde,
- 0x98, 0xcb, 0x66, 0x54, 0x07, 0xdc, 0xe9, 0x0b, 0xc3, 0x9d, 0xac, 0xca,
- 0x2b, 0x13, 0x07, 0xdc, 0xd8, 0xd0, 0x2c, 0x15, 0x00, 0x44, 0x49, 0x16,
- 0xc3, 0x9d, 0xb8, 0xc4, 0x00, 0x5b, 0x00, 0x35, 0xe1, 0xc9, 0x0e, 0xac,
- 0x00, 0x35, 0xc9, 0x46, 0x04, 0x73, 0x43, 0x9d, 0xc4, 0x00, 0x43, 0x9d,
- 0xce, 0xcc, 0x05, 0x7b, 0x07, 0xf7, 0x29, 0xcb, 0x12, 0x31, 0x07, 0xf7,
- 0x38, 0xcb, 0x12, 0x31, 0x07, 0xdb, 0xc1, 0xcc, 0x05, 0x7b, 0x07, 0xdb,
- 0xb0, 0x45, 0x00, 0x6c, 0xc3, 0x9d, 0xda, 0x0b, 0xc3, 0x9d, 0xf6, 0xca,
- 0x2b, 0x13, 0x07, 0xf5, 0x11, 0xcb, 0x66, 0x54, 0x07, 0xf5, 0x20, 0x00,
- 0x43, 0x9e, 0x02, 0x00, 0x43, 0x9e, 0x12, 0xc9, 0xae, 0x05, 0x00, 0x36,
- 0x03, 0x03, 0x9e, 0x28, 0xca, 0x36, 0x8a, 0x00, 0x37, 0xf8, 0xcc, 0x05,
- 0x7b, 0x07, 0xf7, 0x49, 0xcb, 0x12, 0x31, 0x07, 0xf7, 0x58, 0xc2, 0x0d,
- 0xf7, 0x0f, 0x75, 0xb1, 0xc2, 0x00, 0x92, 0x0f, 0x75, 0xc0, 0xc4, 0x3a,
- 0x8e, 0x0f, 0x72, 0xe9, 0xc3, 0x0f, 0x60, 0x0f, 0x72, 0xf8, 0xe0, 0x09,
- 0x07, 0x0f, 0xdd, 0x68, 0xd0, 0x02, 0x17, 0x0f, 0xdd, 0x60, 0xd0, 0x13,
- 0x2a, 0x0f, 0xdd, 0x30, 0x00, 0x43, 0x9e, 0x2c, 0x00, 0x43, 0x9e, 0x3b,
- 0x4b, 0x18, 0x77, 0xc3, 0x9e, 0x4a, 0xdc, 0x13, 0xe2, 0x0f, 0xd2, 0x30,
- 0xc5, 0x6b, 0x55, 0x0f, 0xaf, 0xc9, 0xc8, 0x94, 0xce, 0x0f, 0xaf, 0xb8,
- 0xc2, 0x0f, 0xf5, 0x0b, 0x4e, 0x39, 0x90, 0x0b, 0x4c, 0xa9, 0x9a, 0x0b,
- 0x4c, 0x40, 0xc3, 0x58, 0xd5, 0x0b, 0x4d, 0xc8, 0x8f, 0x0b, 0x4e, 0x59,
- 0x92, 0x0b, 0x4d, 0xb0, 0xc3, 0x3c, 0x50, 0x0b, 0x4c, 0x49, 0x9a, 0x0b,
- 0x4b, 0xf8, 0x92, 0x0b, 0x4e, 0x81, 0xcb, 0x96, 0x36, 0x0b, 0x4c, 0x99,
- 0xc3, 0x0f, 0xf4, 0x0b, 0x4c, 0x30, 0xc3, 0x85, 0x08, 0x0b, 0x4d, 0xfb,
- 0x03, 0x9e, 0x56, 0xc3, 0xb1, 0xe2, 0x0b, 0x4c, 0x68, 0xc8, 0xc0, 0x75,
- 0x0b, 0x4e, 0xe9, 0xc8, 0xc0, 0x5d, 0x0b, 0x4c, 0x90, 0xc6, 0xcc, 0xfd,
- 0x0b, 0x4f, 0x40, 0x92, 0x0b, 0x4a, 0x19, 0xc2, 0x00, 0xc2, 0x0b, 0x49,
- 0x8a, 0x03, 0x9e, 0x5a, 0xc3, 0x85, 0x09, 0x0b, 0x49, 0x49, 0xc2, 0x00,
- 0x5b, 0x0b, 0x48, 0x80, 0x9a, 0x0b, 0x4a, 0xa9, 0xc2, 0x0f, 0xf5, 0x0b,
- 0x48, 0x08, 0xc3, 0xdd, 0xec, 0x0b, 0x47, 0x01, 0xc6, 0xd1, 0x65, 0x0b,
- 0x44, 0xf8, 0xc3, 0x4d, 0x78, 0x0b, 0x46, 0x91, 0x8f, 0x0b, 0x45, 0xd9,
- 0xc2, 0x00, 0xb7, 0x0b, 0x45, 0xa9, 0xc8, 0xbf, 0x05, 0x0b, 0x45, 0x80,
- 0xc6, 0xce, 0x71, 0x0b, 0x47, 0x19, 0xcc, 0x8c, 0xe0, 0x0b, 0x44, 0xf0,
- 0x9a, 0x0b, 0x47, 0x09, 0x8f, 0x0b, 0x44, 0xd8, 0xc6, 0x19, 0x36, 0x0b,
- 0x43, 0xd8, 0xc4, 0xe2, 0x33, 0x0b, 0x41, 0x59, 0xc4, 0xe0, 0x7f, 0x0b,
- 0x40, 0x71, 0xc6, 0xd1, 0x53, 0x0b, 0x40, 0x58, 0xc4, 0xe5, 0xe3, 0x0b,
- 0x41, 0x11, 0xc4, 0xe6, 0x27, 0x0b, 0x40, 0xc8, 0xa3, 0x01, 0x41, 0xfb,
- 0x03, 0x9e, 0x60, 0xa5, 0x01, 0x44, 0xf9, 0xa4, 0x01, 0x42, 0xfa, 0x03,
- 0x9e, 0x6b, 0xa5, 0x01, 0x45, 0x79, 0xa4, 0x01, 0x43, 0x7a, 0x03, 0x9e,
- 0x6f, 0xa5, 0x01, 0x46, 0x78, 0xa5, 0x01, 0x45, 0xb9, 0xa4, 0x01, 0x43,
- 0xba, 0x03, 0x9e, 0x73, 0xa5, 0x01, 0x46, 0xb8, 0xa5, 0x01, 0x47, 0x38,
- 0xa5, 0x01, 0x45, 0xd9, 0xa4, 0x01, 0x43, 0xda, 0x03, 0x9e, 0x77, 0xa5,
- 0x01, 0x46, 0xd8, 0xa5, 0x01, 0x47, 0x58, 0xa5, 0x01, 0x47, 0x98, 0xa5,
- 0x01, 0x45, 0xe9, 0xa4, 0x01, 0x43, 0xea, 0x03, 0x9e, 0x7b, 0xa5, 0x01,
- 0x46, 0xe8, 0xa5, 0x01, 0x47, 0x68, 0xa5, 0x01, 0x47, 0xa8, 0xa5, 0x01,
- 0x47, 0xc8, 0xa5, 0x01, 0x45, 0xf1, 0xa4, 0x01, 0x43, 0xf2, 0x03, 0x9e,
- 0x7f, 0xa5, 0x01, 0x46, 0xf0, 0xa5, 0x01, 0x47, 0x70, 0xa5, 0x01, 0x47,
- 0xb0, 0xa5, 0x01, 0x47, 0xd0, 0xa5, 0x01, 0x47, 0xe0, 0xd0, 0x5e, 0x72,
- 0x0f, 0xc1, 0x81, 0xcb, 0x5e, 0x77, 0x0f, 0xc1, 0x61, 0x49, 0xab, 0x7d,
- 0xc3, 0x9e, 0x83, 0x47, 0x06, 0xf1, 0xc3, 0x9e, 0x8f, 0xcc, 0x86, 0xb0,
- 0x0f, 0xc1, 0x01, 0xcc, 0x82, 0x84, 0x0f, 0xc1, 0x21, 0xca, 0x9f, 0xd2,
- 0x0f, 0xc1, 0x40, 0xe0, 0x06, 0x27, 0x01, 0x5c, 0x00, 0x46, 0x00, 0x6b,
- 0x43, 0x9e, 0x9b, 0xe0, 0x07, 0x67, 0x01, 0x4b, 0x38, 0x0e, 0xc3, 0x9e,
- 0xa7, 0xdf, 0x0c, 0xc3, 0x01, 0x4b, 0x30, 0xc5, 0xdc, 0xe6, 0x08, 0x04,
- 0x39, 0xc5, 0xd6, 0xb0, 0x08, 0x04, 0x30, 0xca, 0x9e, 0x1a, 0x08, 0x04,
- 0x41, 0xc9, 0xaa, 0x6f, 0x08, 0x04, 0x48, 0xc5, 0xd6, 0xce, 0x08, 0x04,
- 0x51, 0xc6, 0xd3, 0x81, 0x08, 0x04, 0x58, 0xc5, 0xd6, 0xa6, 0x08, 0x04,
- 0x61, 0xc6, 0xd3, 0x87, 0x08, 0x04, 0x68, 0xc6, 0xcd, 0x09, 0x08, 0x04,
- 0x19, 0xc6, 0xd0, 0xc9, 0x08, 0x04, 0x21, 0xca, 0xa4, 0x82, 0x08, 0x04,
- 0x28, 0xc6, 0x1e, 0x45, 0x00, 0xf4, 0xb9, 0xcc, 0x3f, 0x8d, 0x01, 0x63,
- 0x30, 0xc5, 0x01, 0x62, 0x00, 0xf3, 0x69, 0xc5, 0x00, 0x95, 0x00, 0xf3,
- 0x58, 0x46, 0x00, 0x6b, 0x43, 0x9e, 0xb3, 0xca, 0x46, 0x9d, 0x0e, 0xf8,
- 0x68, 0xca, 0xa0, 0xe0, 0x0e, 0xf8, 0x30, 0x87, 0x00, 0xe8, 0xa3, 0x03,
- 0x9e, 0xd4, 0xc5, 0x25, 0x71, 0x00, 0xe8, 0x41, 0xc7, 0xc6, 0x7a, 0x05,
- 0x5a, 0x1a, 0x03, 0x9e, 0xda, 0xc8, 0x6a, 0x62, 0x05, 0x3b, 0xf8, 0x87,
- 0x00, 0xe8, 0x11, 0xc4, 0xd9, 0x9f, 0x00, 0x12, 0x90, 0xce, 0x63, 0x81,
- 0x00, 0x15, 0x72, 0x03, 0x9e, 0xe0, 0xce, 0x6d, 0x61, 0x00, 0x13, 0x80,
- 0xd2, 0x23, 0xd6, 0x05, 0x3b, 0x38, 0xce, 0x17, 0x75, 0x00, 0xf3, 0x38,
- 0xce, 0x17, 0x75, 0x00, 0xf3, 0x48, 0xce, 0x02, 0x79, 0x00, 0xec, 0xa9,
- 0xc4, 0x02, 0x83, 0x00, 0x12, 0xd0, 0xca, 0xa0, 0x7c, 0x05, 0x5a, 0x60,
- 0xd2, 0x4c, 0x86, 0x05, 0x59, 0xb0, 0xcc, 0x21, 0x84, 0x00, 0xe8, 0x99,
- 0xc5, 0x77, 0x2d, 0x00, 0xe8, 0x90, 0x43, 0x00, 0x50, 0xc3, 0x9e, 0xe6,
- 0xc8, 0x63, 0x87, 0x00, 0x13, 0xf3, 0x03, 0x9e, 0xf2, 0x0e, 0xc3, 0x9e,
- 0xf8, 0x42, 0x01, 0x4a, 0xc3, 0x9f, 0x04, 0xcc, 0x57, 0x82, 0x00, 0xec,
- 0x49, 0x05, 0xc3, 0x9f, 0x10, 0xc4, 0x12, 0xeb, 0x00, 0x13, 0xe9, 0xce,
- 0x3a, 0xdb, 0x05, 0x3d, 0x39, 0xc5, 0x35, 0x4a, 0x00, 0x0a, 0xa9, 0xce,
- 0x1f, 0x15, 0x00, 0x10, 0x99, 0xc6, 0x01, 0x01, 0x00, 0x12, 0x68, 0xce,
- 0x02, 0x79, 0x00, 0xec, 0xa1, 0xc4, 0x02, 0x83, 0x00, 0x12, 0xe8, 0xd1,
- 0x56, 0xe4, 0x0e, 0xf9, 0x39, 0xc9, 0x21, 0x0e, 0x00, 0xeb, 0x80, 0xcf,
- 0x63, 0x08, 0x00, 0xf2, 0x59, 0xcb, 0x4c, 0x8d, 0x05, 0x59, 0xd9, 0xc6,
- 0xbf, 0x3f, 0x00, 0x0a, 0x31, 0xc4, 0x63, 0xce, 0x00, 0x0a, 0x41, 0xc3,
- 0x02, 0x53, 0x00, 0x11, 0xa8, 0xc9, 0x69, 0x26, 0x00, 0xf2, 0x49, 0xc8,
- 0x56, 0xed, 0x00, 0x13, 0x91, 0xcd, 0x7f, 0x02, 0x00, 0x0c, 0xf0, 0x43,
- 0x01, 0xb9, 0xc3, 0x9f, 0x22, 0xc8, 0x23, 0xe0, 0x05, 0x3c, 0x88, 0xc4,
- 0x00, 0x5b, 0x05, 0x59, 0xc9, 0xc5, 0x21, 0x12, 0x00, 0x13, 0x59, 0xc3,
- 0x01, 0x4a, 0x00, 0x0a, 0x00, 0xd1, 0x57, 0x7d, 0x0e, 0xf8, 0x98, 0xcb,
- 0x9a, 0x2a, 0x00, 0xf1, 0xc8, 0xcc, 0x21, 0x0b, 0x05, 0x59, 0xc1, 0xc3,
- 0x00, 0xe8, 0x01, 0x63, 0x08, 0xce, 0x3f, 0x8b, 0x00, 0xf4, 0xe1, 0xc8,
- 0x17, 0x7b, 0x00, 0xf4, 0xd8, 0xce, 0x02, 0x79, 0x0e, 0xf8, 0xc9, 0xcc,
- 0x57, 0x82, 0x0e, 0xf8, 0x90, 0x46, 0x00, 0x6b, 0x43, 0x9f, 0x2e, 0xd2,
- 0x4c, 0x86, 0x05, 0x5a, 0x50, 0xcc, 0x21, 0x84, 0x00, 0x12, 0xfa, 0x03,
- 0x9f, 0x3a, 0xca, 0xa9, 0x28, 0x00, 0xf0, 0x48, 0x45, 0x00, 0x56, 0x43,
- 0x9f, 0x40, 0x45, 0x00, 0x56, 0x43, 0x9f, 0x5e, 0x42, 0x00, 0xed, 0xc3,
- 0x9f, 0x7c, 0x45, 0x06, 0xf3, 0x43, 0x9f, 0x8b, 0xcb, 0x9a, 0x2a, 0x00,
- 0x11, 0x58, 0xc5, 0x35, 0x4a, 0x00, 0xf2, 0x99, 0xc5, 0x1f, 0x94, 0x00,
- 0xf2, 0x88, 0xc9, 0x1e, 0x42, 0x00, 0xf2, 0x79, 0xc5, 0x35, 0x4a, 0x00,
- 0xf2, 0x69, 0xc6, 0x60, 0xe6, 0x00, 0x11, 0x68, 0xce, 0x02, 0x79, 0x00,
- 0xec, 0xb9, 0xc6, 0x01, 0x01, 0x05, 0x59, 0xf8, 0xc7, 0x0e, 0xae, 0x00,
- 0xf6, 0x59, 0xca, 0x1f, 0x8f, 0x00, 0x10, 0x48, 0xca, 0xa9, 0x28, 0x00,
- 0xf1, 0x78, 0xcc, 0x57, 0x82, 0x0e, 0xf8, 0xc1, 0xce, 0x02, 0x79, 0x00,
- 0xec, 0xd1, 0x05, 0xc3, 0x9f, 0x97, 0xc4, 0x12, 0xeb, 0x00, 0x0d, 0xd0,
- 0xc9, 0xb2, 0xbb, 0x0e, 0xf8, 0x60, 0x00, 0x43, 0x9f, 0xa3, 0xca, 0xa8,
- 0xd8, 0x00, 0xf0, 0xe8, 0x42, 0x00, 0xed, 0xc3, 0x9f, 0xaf, 0xca, 0x1f,
- 0x8f, 0x00, 0x10, 0x28, 0xc5, 0x35, 0x4a, 0x00, 0xf0, 0xb9, 0xc5, 0x1f,
- 0x94, 0x00, 0xf0, 0xa8, 0xc5, 0x01, 0x62, 0x00, 0xf7, 0xa9, 0xc5, 0x00,
- 0x95, 0x00, 0xf4, 0x78, 0xc2, 0x00, 0xc0, 0x00, 0x0d, 0x83, 0x03, 0x9f,
- 0xbb, 0xc8, 0x9f, 0xca, 0x00, 0xf7, 0x38, 0x11, 0xc3, 0x9f, 0xc1, 0xc8,
- 0x1e, 0x43, 0x00, 0x07, 0xe2, 0x03, 0x9f, 0xcd, 0xce, 0x75, 0x41, 0x00,
- 0xf3, 0xd8, 0x00, 0x43, 0x9f, 0xd1, 0xc9, 0x0b, 0x37, 0x00, 0x07, 0xdb,
- 0x03, 0x9f, 0xdd, 0xc4, 0x63, 0xce, 0x00, 0x0e, 0xa0, 0xcd, 0x02, 0x7a,
- 0x00, 0xec, 0xc9, 0xc9, 0x9e, 0x61, 0x00, 0x0b, 0x78, 0xce, 0x38, 0x45,
- 0x05, 0x5a, 0x71, 0xc5, 0x01, 0x02, 0x05, 0x3d, 0xc8, 0x45, 0x00, 0x56,
- 0x43, 0x9f, 0xe3, 0xc9, 0x0b, 0x37, 0x00, 0x07, 0x13, 0x03, 0xa0, 0x01,
- 0xc4, 0x63, 0xce, 0x00, 0x0e, 0x70, 0x11, 0xc3, 0xa0, 0x07, 0xc8, 0x1e,
- 0x43, 0x00, 0x07, 0x22, 0x03, 0xa0, 0x13, 0x0b, 0xc3, 0xa0, 0x19, 0xcd,
- 0x02, 0x7a, 0x00, 0xec, 0x78, 0xc5, 0x01, 0x62, 0x00, 0xf4, 0x49, 0xc5,
- 0x00, 0x95, 0x00, 0xf4, 0x38, 0xc5, 0x01, 0x62, 0x00, 0xf1, 0x29, 0xc5,
- 0x00, 0x95, 0x00, 0xf1, 0x18, 0xc5, 0x01, 0x62, 0x00, 0xf4, 0x99, 0xc5,
- 0x00, 0x95, 0x00, 0x0b, 0xe0, 0x00, 0x43, 0xa0, 0x25, 0xd2, 0x23, 0xd6,
- 0x05, 0x3a, 0x88, 0x45, 0x00, 0x56, 0x43, 0xa0, 0x31, 0xc7, 0x0e, 0xae,
- 0x00, 0xf7, 0x21, 0x45, 0x06, 0xf3, 0x43, 0xa0, 0x4f, 0x00, 0x43, 0xa0,
- 0x5b, 0xc9, 0xa9, 0x29, 0x00, 0xf3, 0xc1, 0xc5, 0x01, 0x62, 0x00, 0xf3,
- 0xa0, 0xc6, 0x01, 0x61, 0x00, 0xf3, 0xb0, 0xc9, 0x0e, 0xac, 0x00, 0xf7,
- 0x11, 0xc5, 0x21, 0x12, 0x00, 0xf7, 0x01, 0xca, 0x9f, 0xc8, 0x00, 0xf6,
- 0xf1, 0xc5, 0x1f, 0x94, 0x00, 0xf6, 0xe1, 0xc5, 0x35, 0x4a, 0x00, 0xf6,
- 0xd0, 0xc9, 0x0e, 0xac, 0x00, 0xf6, 0xc1, 0xc5, 0x21, 0x12, 0x00, 0xf6,
- 0xb1, 0xca, 0x9f, 0xc8, 0x00, 0xf6, 0xa1, 0xc5, 0x1f, 0x94, 0x00, 0xf6,
- 0x91, 0xc5, 0x35, 0x4a, 0x00, 0xf6, 0x80, 0xc5, 0x01, 0x62, 0x00, 0xf6,
- 0x61, 0xc5, 0x00, 0x95, 0x00, 0x11, 0x72, 0x03, 0xa0, 0x67, 0xc5, 0x35,
- 0x4a, 0x00, 0x0a, 0x81, 0xc5, 0x1f, 0x94, 0x00, 0x10, 0x60, 0xc5, 0x35,
- 0x4a, 0x00, 0xf2, 0x91, 0xc5, 0x1f, 0x94, 0x00, 0xf2, 0x80, 0xc5, 0x01,
- 0x62, 0x00, 0xf6, 0x51, 0xc5, 0x00, 0x95, 0x00, 0x09, 0x80, 0x44, 0x00,
- 0x57, 0xc3, 0xa0, 0x6d, 0xc5, 0x01, 0x62, 0x00, 0xf0, 0xc0, 0xc5, 0x01,
- 0x62, 0x00, 0xf5, 0xc1, 0xc5, 0x00, 0x95, 0x00, 0x08, 0xb0, 0xc9, 0x0e,
- 0xac, 0x00, 0xf5, 0x61, 0xc5, 0x21, 0x12, 0x00, 0xf5, 0x51, 0xca, 0x9f,
- 0xc8, 0x00, 0xf5, 0x41, 0xc5, 0x1f, 0x94, 0x00, 0xf5, 0x31, 0xc5, 0x35,
- 0x4a, 0x00, 0xf5, 0x20, 0xc5, 0x01, 0x62, 0x00, 0xf5, 0x01, 0xc5, 0x00,
- 0x95, 0x00, 0x11, 0x32, 0x03, 0xa0, 0x8b, 0xc5, 0x01, 0x62, 0x00, 0xf2,
- 0xd3, 0x03, 0xa0, 0x91, 0xc5, 0x00, 0x95, 0x00, 0xf2, 0xc0, 0xca, 0x01,
- 0xf7, 0x01, 0x5d, 0x19, 0xc9, 0x01, 0x1e, 0x01, 0x5d, 0x10, 0xc7, 0xc7,
- 0x14, 0x00, 0x89, 0x98, 0x02, 0x43, 0xa0, 0x97, 0xc4, 0xac, 0xd8, 0x00,
- 0x89, 0xe9, 0xc5, 0xd7, 0x8c, 0x00, 0x8a, 0x78, 0x91, 0x00, 0x8c, 0xf8,
- 0x91, 0x00, 0x8b, 0xe9, 0x97, 0x00, 0x8b, 0xf1, 0xc2, 0x1d, 0x5f, 0x00,
- 0x8d, 0x28, 0x83, 0x00, 0x8c, 0x23, 0x03, 0xa0, 0xad, 0xc2, 0x08, 0xc6,
- 0x00, 0x8c, 0x30, 0x87, 0x06, 0xbd, 0x98, 0x87, 0x06, 0xbd, 0xb8, 0x91,
- 0x00, 0x8c, 0x78, 0x91, 0x00, 0x8c, 0x88, 0x97, 0x00, 0x8c, 0xb1, 0x91,
- 0x06, 0xbd, 0xd0, 0x91, 0x06, 0xbd, 0x80, 0x87, 0x00, 0x8d, 0x38, 0xc2,
- 0x49, 0xba, 0x06, 0xbd, 0xe9, 0x87, 0x06, 0xbd, 0xf0, 0x91, 0x06, 0xbd,
- 0xf8, 0xc7, 0xc7, 0x14, 0x00, 0x8e, 0x20, 0xc6, 0x92, 0x31, 0x06, 0xbf,
- 0x61, 0xc6, 0xc8, 0x2d, 0x06, 0xbf, 0x68, 0xc5, 0x92, 0x32, 0x00, 0x8f,
- 0x39, 0xcc, 0x7a, 0x8b, 0x06, 0xbf, 0x58, 0xc5, 0xc8, 0x2e, 0x00, 0x8f,
- 0x41, 0xc6, 0xc6, 0xf2, 0x06, 0xbf, 0x88, 0xc4, 0x7a, 0x93, 0x00, 0x8f,
- 0x51, 0xc6, 0xcb, 0x4b, 0x06, 0xbf, 0x70, 0xc4, 0xc7, 0x2b, 0x06, 0xbf,
- 0x79, 0xc6, 0xc7, 0x2a, 0x06, 0xbf, 0x80, 0xc7, 0xc7, 0x14, 0x06, 0xbe,
- 0x88, 0xc4, 0xc7, 0x2b, 0x06, 0xbe, 0x91, 0xc6, 0xc7, 0x2a, 0x06, 0xbe,
- 0x98, 0x02, 0x43, 0xa0, 0xb1, 0xc6, 0x92, 0x31, 0x00, 0x8e, 0x89, 0xc4,
- 0xac, 0xd8, 0x00, 0x8e, 0x91, 0xc5, 0x98, 0x41, 0x06, 0xbe, 0xc0, 0x02,
- 0x43, 0xa0, 0xbd, 0xc4, 0xac, 0xd8, 0x00, 0x8e, 0xb1, 0xc6, 0x92, 0x31,
- 0x06, 0xbe, 0xa8, 0xc6, 0xce, 0x6b, 0x00, 0x8e, 0x78, 0xc6, 0xce, 0x6b,
- 0x06, 0xbe, 0xe0, 0xc5, 0xde, 0x35, 0x06, 0xbf, 0x08, 0xc4, 0xac, 0xd8,
- 0x00, 0x8e, 0xf1, 0xc5, 0xd9, 0x80, 0x06, 0xbe, 0xf8, 0xc7, 0xc8, 0x2c,
- 0x06, 0xbf, 0x38, 0xc8, 0xc1, 0x05, 0x06, 0xbf, 0x20, 0xc4, 0xc7, 0x2b,
- 0x06, 0xbf, 0x41, 0xc6, 0xc7, 0x2a, 0x06, 0xbf, 0x48, 0xc5, 0x92, 0x32,
- 0x00, 0x8f, 0x61, 0xc6, 0xc0, 0x37, 0x00, 0x8f, 0x78, 0xca, 0x92, 0x2d,
- 0x00, 0x8f, 0x69, 0xc3, 0x3c, 0x08, 0x00, 0x8f, 0x88, 0xc6, 0x92, 0x31,
- 0x01, 0x8b, 0xa1, 0xc6, 0xc8, 0x2d, 0x01, 0x8b, 0xa8, 0xc3, 0x21, 0x32,
- 0x01, 0x9f, 0x59, 0xc3, 0x18, 0x86, 0x01, 0x9f, 0x9a, 0x03, 0xa0, 0xd5,
- 0xc3, 0x00, 0x4c, 0x01, 0x9f, 0x61, 0x9b, 0x01, 0x9f, 0xea, 0x03, 0xa0,
- 0xd9, 0x02, 0x43, 0xa0, 0xdf, 0xd3, 0x41, 0x88, 0x0f, 0xd1, 0x81, 0xcf,
- 0x18, 0x82, 0x0f, 0xd1, 0xb8, 0xc9, 0x4f, 0xff, 0x08, 0x4f, 0x88, 0xc9,
- 0x4f, 0xff, 0x08, 0x4f, 0x80, 0xc9, 0x4f, 0xff, 0x08, 0x4f, 0x78, 0xc9,
- 0x4f, 0xff, 0x08, 0x4f, 0x70, 0xc9, 0xb2, 0xfa, 0x0f, 0x02, 0x7b, 0x03,
- 0xa0, 0xef, 0xc4, 0x73, 0xed, 0x0f, 0x02, 0x59, 0xc2, 0x00, 0xa4, 0x0f,
- 0x02, 0x30, 0xc3, 0x70, 0xed, 0x0f, 0x02, 0x3b, 0x03, 0xa0, 0xf5, 0x97,
- 0x0f, 0x02, 0x48, 0x95, 0x0f, 0x01, 0xfb, 0x03, 0xa0, 0xfb, 0x88, 0x0f,
- 0x02, 0x11, 0x94, 0x0f, 0x01, 0xf1, 0x8f, 0x0f, 0x01, 0xd9, 0x8e, 0x0f,
- 0x01, 0xd0, 0xc7, 0xbc, 0x96, 0x0f, 0x02, 0x81, 0x87, 0x0f, 0x01, 0xe8,
- 0xc7, 0xcb, 0xba, 0x0f, 0x02, 0x71, 0xd7, 0x05, 0xd0, 0x0f, 0x02, 0x61,
- 0x87, 0x0f, 0x01, 0x88, 0xc3, 0xe6, 0xd9, 0x0f, 0x02, 0x51, 0x87, 0x0f,
- 0x01, 0xa0, 0xc2, 0x02, 0x60, 0x0f, 0x02, 0x21, 0x87, 0x0f, 0x02, 0x00,
- 0x87, 0x0f, 0x01, 0xa8, 0x87, 0x0f, 0x01, 0xb9, 0xc2, 0x01, 0xf0, 0x0f,
- 0x01, 0xb0, 0xce, 0x6d, 0x61, 0x00, 0xed, 0x68, 0xc4, 0xd9, 0x9f, 0x00,
- 0xec, 0xd9, 0x87, 0x00, 0xea, 0x30, 0x46, 0x00, 0x6b, 0x43, 0xa1, 0x01,
- 0xca, 0xa0, 0xe0, 0x08, 0x3d, 0x08, 0xca, 0xa0, 0xe0, 0x08, 0x3c, 0xe0,
- 0xcc, 0x21, 0x84, 0x00, 0xed, 0x39, 0xc9, 0xad, 0x87, 0x00, 0x15, 0xb0,
- 0xca, 0x1d, 0xd4, 0x08, 0x3c, 0xa0, 0xc9, 0xb5, 0x04, 0x08, 0x3c, 0xe8,
- 0xc9, 0xb1, 0xf5, 0x08, 0x3c, 0x68, 0xc4, 0x02, 0x52, 0x08, 0x3c, 0x49,
- 0xce, 0x02, 0x79, 0x08, 0x3c, 0x40, 0xc8, 0x4a, 0x50, 0x05, 0x38, 0x59,
- 0xd2, 0x4a, 0x46, 0x05, 0x38, 0x80, 0xc4, 0x01, 0xf0, 0x00, 0x17, 0x88,
- 0xc8, 0x4a, 0x50, 0x05, 0x38, 0x51, 0xd2, 0x4a, 0x46, 0x05, 0x38, 0x78,
- 0xcc, 0x1f, 0x94, 0x00, 0x17, 0xa9, 0xcc, 0x84, 0x40, 0x00, 0x17, 0xb0,
- 0xc3, 0x10, 0x99, 0x0e, 0xbe, 0x11, 0xc5, 0xde, 0x58, 0x0e, 0xbd, 0xc0,
- 0xc3, 0x10, 0x99, 0x0e, 0xbd, 0x41, 0xc5, 0xde, 0x58, 0x0e, 0xbc, 0xf0,
- 0xc7, 0x00, 0x70, 0x0e, 0xbd, 0x08, 0xc2, 0x01, 0xc7, 0x0e, 0x8f, 0x39,
- 0xc4, 0x03, 0x68, 0x0e, 0x8f, 0x30, 0xc4, 0x2b, 0xc8, 0x0e, 0x8e, 0x31,
- 0xc5, 0x03, 0xe2, 0x0e, 0x8d, 0xf1, 0xc5, 0x02, 0x31, 0x0e, 0x8d, 0xe8,
- 0xc4, 0x2b, 0xc8, 0x0e, 0x8e, 0x21, 0xc5, 0x03, 0xe2, 0x0e, 0x8d, 0xd1,
- 0xc5, 0x02, 0x31, 0x0e, 0x8d, 0xc8, 0x49, 0xae, 0x9e, 0xc3, 0xa1, 0x10,
- 0x46, 0x6b, 0x6d, 0x43, 0xa1, 0x1c, 0xd0, 0x57, 0xf2, 0x0e, 0x88, 0xe1,
- 0xca, 0x6f, 0x79, 0x0e, 0x88, 0xd8, 0x4c, 0x7b, 0x74, 0x43, 0xa1, 0x28,
- 0xcd, 0x7b, 0x74, 0x0e, 0x8e, 0x48, 0xc5, 0x03, 0xe2, 0x0e, 0x8a, 0xa9,
- 0xc5, 0x02, 0x31, 0x0e, 0x8a, 0xa0, 0x43, 0x10, 0x47, 0xc3, 0xa1, 0x34,
- 0x45, 0x05, 0x63, 0xc3, 0xa1, 0x46, 0x46, 0x02, 0x12, 0xc3, 0xa1, 0x52,
- 0x45, 0x02, 0x4d, 0x43, 0xa1, 0x5e, 0x15, 0xc3, 0xa1, 0x6a, 0xc8, 0xbe,
- 0xe5, 0x0e, 0x8d, 0x61, 0xc6, 0xd3, 0x99, 0x0e, 0x8d, 0x59, 0x42, 0x01,
- 0x4a, 0xc3, 0xa1, 0x80, 0x16, 0xc3, 0xa1, 0x92, 0xc4, 0x98, 0x58, 0x0e,
- 0x8c, 0x49, 0x42, 0x02, 0x69, 0xc3, 0xa1, 0x9c, 0xc3, 0x09, 0xc3, 0x0e,
- 0x8c, 0x31, 0xc5, 0xdc, 0x5f, 0x0e, 0x8c, 0x11, 0x03, 0xc3, 0xa1, 0xa8,
- 0xc7, 0xc1, 0xf7, 0x0e, 0x8b, 0xfa, 0x03, 0xa1, 0xb7, 0xc2, 0x03, 0x48,
- 0x0e, 0x8d, 0xc3, 0x03, 0xa1, 0xbd, 0x87, 0x0e, 0x8a, 0xe0, 0xa0, 0x0e,
- 0x8b, 0x61, 0x9f, 0x0e, 0x8b, 0x59, 0x9e, 0x0e, 0x8b, 0x50, 0xa0, 0x0e,
- 0x88, 0x79, 0x9f, 0x0e, 0x88, 0x71, 0x9e, 0x0e, 0x88, 0x68, 0x12, 0xc3,
- 0xa1, 0xc3, 0xc4, 0xe5, 0x9f, 0x00, 0xff, 0xd9, 0xc5, 0x2a, 0xae, 0x00,
- 0xff, 0xd1, 0xc5, 0x71, 0xcf, 0x00, 0xfb, 0x4b, 0x03, 0xa1, 0xd2, 0xc5,
- 0x63, 0xc6, 0x00, 0x1c, 0x78, 0xc4, 0xe5, 0x9f, 0x00, 0xff, 0xc9, 0xc5,
- 0x2a, 0xae, 0x00, 0xff, 0xc1, 0xc5, 0x71, 0xcf, 0x00, 0xfa, 0x4b, 0x03,
- 0xa1, 0xd8, 0xc5, 0xd5, 0x8e, 0x00, 0xfa, 0x43, 0x03, 0xa1, 0xde, 0xc5,
- 0x63, 0xc6, 0x00, 0x1c, 0x60, 0xc4, 0x07, 0xa6, 0x00, 0xff, 0x51, 0xc5,
- 0xd7, 0x41, 0x00, 0xff, 0x40, 0xc4, 0x5a, 0x03, 0x00, 0xfa, 0xcb, 0x03,
- 0xa1, 0xe4, 0xc8, 0x63, 0xbd, 0x00, 0x1d, 0x58, 0xc4, 0x07, 0xa6, 0x00,
- 0xfe, 0xd1, 0xc5, 0xd7, 0x41, 0x00, 0xfe, 0xc0, 0xc4, 0x5a, 0x03, 0x00,
- 0xf9, 0xcb, 0x03, 0xa1, 0xea, 0xc8, 0x63, 0xbd, 0x00, 0x1d, 0x50, 0x45,
- 0x00, 0x56, 0x43, 0xa1, 0xf0, 0x12, 0xc3, 0xa2, 0x02, 0xc4, 0xe5, 0x9f,
- 0x00, 0xfe, 0x59, 0xc5, 0x2a, 0xae, 0x00, 0xfe, 0x51, 0xc5, 0x71, 0xcf,
- 0x00, 0xf9, 0x4b, 0x03, 0xa2, 0x11, 0xc5, 0x63, 0xc6, 0x00, 0x1c, 0x48,
- 0xc4, 0xe5, 0x9f, 0x00, 0xfe, 0x49, 0xc5, 0x2a, 0xae, 0x00, 0xfe, 0x41,
- 0xc5, 0x71, 0xcf, 0x00, 0xf8, 0xcb, 0x03, 0xa2, 0x17, 0xc5, 0xd5, 0x8e,
- 0x00, 0xf8, 0xc3, 0x03, 0xa2, 0x1d, 0xc5, 0x63, 0xc6, 0x00, 0x1c, 0x40,
- 0x12, 0xc3, 0xa2, 0x23, 0xc4, 0xe5, 0x9f, 0x00, 0xfd, 0xd9, 0x18, 0xc3,
- 0xa2, 0x32, 0xc6, 0x60, 0xe6, 0x00, 0xfd, 0xc9, 0xc5, 0x71, 0xcf, 0x00,
- 0xf8, 0x4b, 0x03, 0xa2, 0x3e, 0xc5, 0x63, 0xc6, 0x00, 0x1c, 0x30, 0x12,
- 0xc3, 0xa2, 0x44, 0xc4, 0xe5, 0x9f, 0x00, 0xfb, 0xeb, 0x03, 0xa2, 0x56,
- 0xcd, 0x48, 0x1d, 0x00, 0xff, 0x99, 0xc5, 0x2a, 0xae, 0x00, 0xfb, 0xe3,
- 0x03, 0xa2, 0x5c, 0xc5, 0x71, 0xcf, 0x00, 0xfb, 0x0b, 0x03, 0xa2, 0x62,
- 0xc5, 0x63, 0xc6, 0x00, 0x1e, 0x70, 0xc4, 0xe5, 0x9f, 0x00, 0xfb, 0xc9,
- 0xc5, 0x2a, 0xae, 0x00, 0xfb, 0xc1, 0xc5, 0x71, 0xcf, 0x00, 0xfa, 0x0b,
- 0x03, 0xa2, 0x68, 0xc5, 0xd5, 0x8e, 0x00, 0xfa, 0x03, 0x03, 0xa2, 0x6e,
- 0xc5, 0x63, 0xc6, 0x00, 0x1e, 0x60, 0xc8, 0x63, 0xbd, 0x00, 0x1e, 0x5b,
- 0x03, 0xa2, 0x74, 0xc4, 0x5a, 0x03, 0x00, 0xfa, 0x8a, 0x03, 0xa2, 0x7a,
- 0xca, 0x95, 0x50, 0x00, 0xff, 0x31, 0xc4, 0x7f, 0x43, 0x00, 0xfa, 0x82,
- 0x03, 0xa2, 0x80, 0xc5, 0xd7, 0x41, 0x00, 0xff, 0x01, 0xc4, 0x07, 0xa6,
- 0x00, 0xfb, 0xd0, 0xc8, 0x63, 0xbd, 0x00, 0x1e, 0x53, 0x03, 0xa2, 0x86,
- 0xc4, 0x5a, 0x03, 0x00, 0xf9, 0x8a, 0x03, 0xa2, 0x8c, 0xca, 0x95, 0x50,
- 0x00, 0xfe, 0xb1, 0xc4, 0x7f, 0x43, 0x00, 0xf9, 0x82, 0x03, 0xa2, 0x92,
- 0xc5, 0xd7, 0x41, 0x00, 0xfe, 0x81, 0xc4, 0x07, 0xa6, 0x00, 0xfb, 0xb0,
- 0x12, 0xc3, 0xa2, 0x98, 0xc4, 0xe5, 0x9f, 0x00, 0xfb, 0xab, 0x03, 0xa2,
- 0xaa, 0xcd, 0x48, 0x1d, 0x00, 0xfe, 0x19, 0xc5, 0x2a, 0xae, 0x00, 0xfb,
- 0xa3, 0x03, 0xa2, 0xb0, 0xc5, 0x71, 0xcf, 0x00, 0xf9, 0x0b, 0x03, 0xa2,
- 0xb6, 0xc5, 0x63, 0xc6, 0x00, 0x1d, 0x70, 0xc4, 0xe5, 0x9f, 0x00, 0xfb,
- 0x99, 0xc5, 0x2a, 0xae, 0x00, 0xfb, 0x91, 0xc5, 0x71, 0xcf, 0x00, 0xf8,
- 0x8b, 0x03, 0xa2, 0xbc, 0xc5, 0xd5, 0x8e, 0x00, 0xf8, 0x83, 0x03, 0xa2,
- 0xc2, 0xc5, 0x63, 0xc6, 0x00, 0x1d, 0x68, 0x12, 0xc3, 0xa2, 0xc8, 0xc4,
- 0xe5, 0x9f, 0x00, 0xfb, 0x8b, 0x03, 0xa2, 0xda, 0xcd, 0x48, 0x1d, 0x00,
- 0xfd, 0x99, 0x18, 0xc3, 0xa2, 0xe0, 0xc6, 0x60, 0xe6, 0x00, 0xfd, 0x89,
- 0xc5, 0x71, 0xcf, 0x00, 0xf8, 0x0b, 0x03, 0xa2, 0xef, 0xc5, 0x63, 0xc6,
- 0x00, 0x1d, 0x60, 0xc7, 0xbf, 0xd6, 0x08, 0x0a, 0x59, 0xc7, 0x67, 0x1b,
- 0x08, 0x0a, 0x90, 0xc7, 0x0c, 0x4b, 0x08, 0x0a, 0x2b, 0x03, 0xa2, 0xf5,
- 0x16, 0xc3, 0xa2, 0xf9, 0xc7, 0x67, 0x1b, 0x08, 0x0a, 0x78, 0x16, 0xc3,
- 0xa3, 0x08, 0xc7, 0x67, 0x1b, 0x08, 0x0a, 0x88, 0xc7, 0x0c, 0x4b, 0x08,
- 0x0b, 0x51, 0xc8, 0x50, 0x00, 0x08, 0x0b, 0x88, 0xc4, 0x0c, 0x55, 0x08,
- 0x0b, 0x29, 0xcb, 0x13, 0xe3, 0x08, 0x0b, 0x58, 0xc8, 0x50, 0x00, 0x08,
- 0x0b, 0x91, 0xc7, 0x0c, 0x4b, 0x08, 0x0b, 0x70, 0xc8, 0x0c, 0x4a, 0x08,
- 0x0b, 0x68, 0xcf, 0x65, 0x8d, 0x08, 0x0b, 0x38, 0xc2, 0xe7, 0x79, 0x08,
- 0x1e, 0x68, 0x11, 0xc3, 0xa3, 0x17, 0xc4, 0x64, 0x7b, 0x0e, 0x7d, 0xca,
- 0x03, 0xa3, 0x29, 0xd4, 0x3c, 0xdd, 0x00, 0xef, 0xf9, 0xd2, 0x49, 0x92,
- 0x00, 0x1a, 0xb0, 0xc2, 0x02, 0x8c, 0x09, 0x19, 0x99, 0xc3, 0x00, 0xe4,
- 0x09, 0x19, 0x90, 0xc9, 0x40, 0x88, 0x09, 0x12, 0xe8, 0xca, 0x9d, 0xb6,
- 0x09, 0x10, 0x79, 0xc9, 0x40, 0x88, 0x09, 0x10, 0x70, 0xc8, 0xb3, 0xdc,
- 0x09, 0x1c, 0x51, 0xc4, 0x59, 0x55, 0x09, 0x10, 0x08, 0xa0, 0x09, 0x10,
- 0x33, 0x03, 0xa3, 0x2f, 0x9f, 0x09, 0x10, 0x28, 0xcc, 0x34, 0xea, 0x09,
- 0x27, 0xa9, 0xc3, 0x34, 0xf3, 0x09, 0x27, 0xa0, 0xc9, 0xb3, 0x78, 0x09,
- 0x0e, 0x38, 0x94, 0x09, 0x0e, 0x28, 0xc8, 0x64, 0xf7, 0x09, 0x0f, 0x39,
- 0x83, 0x09, 0x0f, 0x30, 0xc2, 0x3d, 0x53, 0x09, 0x0f, 0x19, 0x89, 0x09,
- 0x0f, 0x10, 0xc2, 0x5a, 0x34, 0x09, 0x0e, 0xfb, 0x03, 0xa3, 0x35, 0x4e,
- 0x75, 0xa3, 0xc3, 0xa3, 0x3b, 0xca, 0xa7, 0xc0, 0x09, 0x0e, 0xe0, 0xc8,
- 0xa8, 0x08, 0x09, 0x0e, 0xc8, 0x8e, 0x09, 0x0e, 0xb8, 0x8e, 0x09, 0x0e,
- 0x93, 0x03, 0xa3, 0x47, 0xa0, 0x09, 0x0e, 0x88, 0x90, 0x09, 0x0e, 0x80,
- 0x46, 0x26, 0x11, 0x43, 0xa3, 0x4d, 0x8e, 0x09, 0x0e, 0x48, 0xc3, 0x1a,
- 0x6e, 0x09, 0x0d, 0xe1, 0xc3, 0x1c, 0x4f, 0x09, 0x0d, 0xd9, 0xca, 0x9d,
- 0x48, 0x09, 0x0d, 0xd0, 0x8f, 0x09, 0x26, 0x39, 0x86, 0x09, 0x07, 0x38,
- 0xc9, 0xb4, 0x2c, 0x09, 0x07, 0x30, 0xc2, 0x04, 0xcb, 0x09, 0x26, 0x31,
- 0xc2, 0x8e, 0x17, 0x09, 0x26, 0x28, 0xca, 0x54, 0x87, 0x09, 0x26, 0x08,
- 0x83, 0x09, 0x25, 0xf1, 0xcc, 0x88, 0xc0, 0x09, 0x06, 0x88, 0xc8, 0xb3,
- 0xdb, 0x09, 0x06, 0x98, 0x46, 0x26, 0x11, 0x43, 0xa3, 0x59, 0xc7, 0x26,
- 0x11, 0x09, 0x06, 0x78, 0xc6, 0x47, 0x40, 0x09, 0x25, 0xc9, 0xc8, 0x66,
- 0xc0, 0x09, 0x25, 0xc0, 0xc4, 0x39, 0x41, 0x09, 0x25, 0xb9, 0xc9, 0x9b,
- 0x69, 0x09, 0x06, 0x28, 0xc9, 0xb3, 0x54, 0x09, 0x05, 0xf0, 0x45, 0x01,
- 0x35, 0xc3, 0xa3, 0x65, 0x46, 0x20, 0xb7, 0xc3, 0xa3, 0x71, 0x48, 0x00,
- 0x48, 0xc3, 0xa3, 0x87, 0xc7, 0x29, 0xd4, 0x0e, 0xc7, 0xd1, 0x45, 0x13,
- 0x74, 0xc3, 0xa3, 0x9c, 0xc4, 0x0e, 0xa3, 0x0e, 0xc7, 0xb0, 0x46, 0x0d,
- 0xe0, 0xc3, 0xa3, 0xae, 0x14, 0xc3, 0xa3, 0xd0, 0xc6, 0x00, 0x50, 0x0e,
- 0xc0, 0x73, 0x03, 0xa3, 0xdc, 0xc6, 0x5e, 0xcc, 0x0e, 0xc0, 0x5b, 0x03,
- 0xa3, 0xe0, 0xd0, 0x5e, 0xc2, 0x0e, 0xc0, 0x9b, 0x03, 0xa3, 0xe4, 0xc4,
- 0x17, 0x9a, 0x0e, 0xc0, 0x33, 0x03, 0xa3, 0xea, 0xc6, 0xd0, 0x63, 0x0e,
- 0xc0, 0x50, 0xca, 0x14, 0xe6, 0x0e, 0xc6, 0x69, 0xcd, 0x3d, 0x1f, 0x0e,
- 0xc6, 0x40, 0xc6, 0x14, 0xea, 0x0e, 0xc6, 0x59, 0x47, 0xc1, 0xb1, 0xc3,
- 0xa3, 0xf0, 0x05, 0xc3, 0xa3, 0xfc, 0xcf, 0x67, 0x8b, 0x0e, 0xc1, 0x80,
- 0xcb, 0x4d, 0x41, 0x0e, 0xc6, 0x48, 0x00, 0x43, 0xa4, 0x08, 0xc6, 0x0d,
- 0xdf, 0x0e, 0xc4, 0xe0, 0xc4, 0x0d, 0xe1, 0x0e, 0xc4, 0xd1, 0xcc, 0x8b,
- 0x78, 0x0e, 0xc4, 0xc8, 0x00, 0x43, 0xa4, 0x14, 0xcb, 0x4d, 0x41, 0x0e,
- 0xc3, 0x1a, 0x03, 0xa4, 0x20, 0xca, 0x4d, 0x42, 0x0e, 0xc2, 0xf1, 0xd3,
- 0x40, 0xa4, 0x0e, 0xc2, 0x6a, 0x03, 0xa4, 0x26, 0x00, 0x43, 0xa4, 0x2a,
- 0x00, 0x43, 0xa4, 0x45, 0x00, 0x43, 0xa4, 0x5a, 0xc4, 0x0c, 0xab, 0x0e,
- 0xc6, 0x10, 0xc6, 0x14, 0xea, 0x0e, 0xc5, 0x41, 0xc4, 0x00, 0x5b, 0x0e,
- 0xc4, 0x48, 0xc4, 0x0c, 0xab, 0x0e, 0xc4, 0xf0, 0xc5, 0x0d, 0xe0, 0x0e,
- 0xc7, 0x83, 0x03, 0xa4, 0x66, 0xc6, 0x5e, 0xcc, 0x0e, 0xc6, 0xd9, 0xcb,
- 0x14, 0xe5, 0x0e, 0xc6, 0x09, 0x47, 0x00, 0x50, 0x43, 0xa4, 0x6a, 0xc5,
- 0x08, 0x42, 0x0e, 0xc5, 0x13, 0x03, 0xa4, 0x79, 0xc5, 0x0d, 0xe0, 0x0e,
- 0xc4, 0xd8, 0xcf, 0x66, 0x9b, 0x0e, 0xc4, 0x18, 0xc8, 0xb7, 0x55, 0x0e,
- 0xc4, 0x09, 0x46, 0x0d, 0xe0, 0x43, 0xa4, 0x7f, 0x00, 0x43, 0xa4, 0x8b,
- 0x00, 0x43, 0xa4, 0x97, 0xc7, 0x29, 0xd4, 0x0e, 0xc3, 0x99, 0xc4, 0x0d,
- 0xf4, 0x0e, 0xc3, 0x78, 0x00, 0x43, 0xa4, 0xa6, 0xc5, 0x01, 0x74, 0x0e,
- 0xc2, 0xa0, 0xc5, 0x17, 0x99, 0x0e, 0xc6, 0xa8, 0xcb, 0x14, 0xe5, 0x0e,
- 0xc5, 0xd9, 0xc6, 0x00, 0x50, 0x0e, 0xc0, 0x7b, 0x03, 0xa4, 0xb2, 0xc5,
- 0x5e, 0xcc, 0x0e, 0xc0, 0x69, 0xc4, 0x17, 0x9a, 0x0e, 0xc0, 0x38, 0xc5,
- 0xd8, 0xcc, 0x0e, 0xcd, 0x69, 0xca, 0xa2, 0x70, 0x0e, 0xcd, 0x30, 0xc5,
- 0x17, 0xef, 0x0e, 0xcc, 0x73, 0x03, 0xa4, 0xb6, 0xc6, 0x06, 0x1b, 0x0e,
- 0xcc, 0x69, 0xc5, 0x04, 0x73, 0x0e, 0xcc, 0x60, 0xc6, 0x06, 0x1b, 0x0e,
- 0xcc, 0x89, 0xc5, 0x04, 0x73, 0x0e, 0xcc, 0x80, 0xc2, 0x00, 0x15, 0x0e,
- 0xcc, 0x58, 0xcb, 0x5e, 0x77, 0x0f, 0xc1, 0x79, 0xca, 0x9f, 0xd2, 0x0f,
- 0xc1, 0x59, 0x49, 0xab, 0x7d, 0xc3, 0xa4, 0xbc, 0xd8, 0x25, 0x98, 0x01,
- 0x5b, 0xe9, 0xcc, 0x86, 0xb0, 0x0f, 0xc1, 0x19, 0xcc, 0x82, 0x84, 0x0f,
- 0xc1, 0x39, 0xd0, 0x5e, 0x72, 0x0f, 0xc1, 0x98, 0xe0, 0x08, 0xc7, 0x01,
- 0x5c, 0x18, 0xcf, 0x2c, 0x05, 0x01, 0x5b, 0xe1, 0xd1, 0x00, 0xf6, 0x01,
- 0x5b, 0xe0, 0xc7, 0x08, 0xca, 0x01, 0x5d, 0x29, 0xc9, 0x03, 0x68, 0x01,
- 0x5d, 0x38, 0xcf, 0x2c, 0x05, 0x01, 0x48, 0xb9, 0xd6, 0x2d, 0x07, 0x01,
- 0x48, 0xc0, 0xc8, 0x62, 0x79, 0x01, 0x4b, 0x61, 0xdd, 0x11, 0x1a, 0x01,
- 0x4b, 0x40, 0xe0, 0x07, 0x67, 0x01, 0x4b, 0x20, 0xcc, 0x05, 0x7b, 0x07,
- 0xe8, 0x51, 0xcb, 0x12, 0x31, 0x07, 0xe9, 0x70, 0x45, 0x19, 0x9d, 0xc3,
- 0xa4, 0xc8, 0xce, 0x43, 0xed, 0x07, 0xed, 0x50, 0xcc, 0x12, 0x30, 0x07,
- 0xeb, 0x59, 0xca, 0x2b, 0x13, 0x07, 0xeb, 0x50, 0xca, 0x2b, 0x13, 0x07,
- 0xeb, 0x61, 0xcc, 0x12, 0x30, 0x07, 0xeb, 0x68, 0xcc, 0x12, 0x30, 0x07,
- 0xeb, 0x29, 0xca, 0x2b, 0x13, 0x07, 0xeb, 0x20, 0xdc, 0x14, 0x36, 0x07,
- 0xea, 0x61, 0xd2, 0x4b, 0x54, 0x07, 0xef, 0xd0, 0xe0, 0x05, 0x67, 0x07,
- 0xef, 0x80, 0xca, 0x2b, 0x13, 0x07, 0xeb, 0x89, 0xcc, 0x12, 0x30, 0x07,
- 0xeb, 0x90, 0xca, 0x2b, 0x13, 0x07, 0xea, 0x89, 0xcc, 0x12, 0x30, 0x07,
- 0xea, 0x90, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0x49, 0xcd, 0x05, 0x7a, 0x07,
- 0xe0, 0x20, 0xca, 0x2b, 0x13, 0x07, 0xdf, 0xa9, 0xcd, 0x05, 0x7a, 0x07,
- 0xdf, 0xa0, 0x48, 0x08, 0x1f, 0xc3, 0xa4, 0xd4, 0xca, 0x2b, 0x13, 0x07,
- 0xdf, 0x59, 0xcd, 0x05, 0x7a, 0x07, 0xdf, 0x50, 0xca, 0x2b, 0x13, 0x07,
- 0xdf, 0x69, 0xcd, 0x05, 0x7a, 0x07, 0xdf, 0x60, 0xcc, 0x05, 0x7b, 0x07,
- 0xe0, 0x11, 0xcb, 0x12, 0x31, 0x07, 0xe4, 0xd0, 0xcc, 0x05, 0x7b, 0x07,
- 0xe0, 0x09, 0xcb, 0x12, 0x31, 0x07, 0xe4, 0xc0, 0xcb, 0x66, 0x54, 0x07,
- 0xe7, 0x01, 0xcc, 0x12, 0x30, 0x07, 0xe4, 0xd8, 0xcb, 0x12, 0x31, 0x07,
- 0xdf, 0xc1, 0xcc, 0x05, 0x7b, 0x07, 0xdf, 0xb0, 0xca, 0x2b, 0x13, 0x07,
- 0xeb, 0xf1, 0xcc, 0x12, 0x30, 0x07, 0xed, 0xa0, 0xcf, 0x0e, 0xbb, 0x00,
- 0x31, 0xf9, 0xcd, 0x01, 0x47, 0x00, 0x31, 0xf0, 0xca, 0x07, 0xfd, 0x00,
- 0x3b, 0xb9, 0x16, 0x43, 0xa4, 0xe0, 0xc5, 0x01, 0x62, 0x00, 0x35, 0x1b,
- 0x03, 0xa4, 0xec, 0xcb, 0x90, 0x5e, 0x00, 0x35, 0x10, 0x4a, 0x04, 0x5e,
- 0xc3, 0xa4, 0xf2, 0xcd, 0x01, 0x5a, 0x00, 0x3b, 0x00, 0xcf, 0x0e, 0xbb,
- 0x00, 0x35, 0xa1, 0xcd, 0x01, 0x5a, 0x00, 0x35, 0x90, 0xd7, 0x29, 0x0c,
- 0x00, 0x46, 0x39, 0x98, 0x00, 0x35, 0xa8, 0xc8, 0xa6, 0xb4, 0x00, 0x45,
- 0x31, 0xc7, 0x17, 0x7c, 0x00, 0x35, 0xb0, 0xc5, 0x01, 0x62, 0x00, 0x35,
- 0xc1, 0xc5, 0x00, 0x95, 0x00, 0x35, 0xb8, 0xc5, 0x01, 0x62, 0x00, 0x46,
- 0x31, 0xc5, 0x00, 0x95, 0x00, 0x46, 0x28, 0xc5, 0x01, 0x62, 0x00, 0x45,
- 0x99, 0xc5, 0x00, 0x95, 0x00, 0x35, 0x01, 0xd8, 0x24, 0xc0, 0x00, 0x3a,
- 0xf0, 0xc5, 0x00, 0x95, 0x00, 0x3a, 0xe9, 0xd0, 0x24, 0xf8, 0x00, 0x3a,
- 0xf8, 0x49, 0xad, 0x09, 0xc3, 0xa4, 0xfe, 0xd3, 0x45, 0x64, 0x00, 0x43,
- 0x93, 0x03, 0xa5, 0x26, 0xc9, 0x17, 0x7a, 0x00, 0x43, 0xd1, 0xd2, 0x47,
- 0xbe, 0x00, 0x43, 0x99, 0x4b, 0x5c, 0xd2, 0xc3, 0xa5, 0x2c, 0x46, 0x09,
- 0x89, 0xc3, 0xa5, 0x38, 0xcb, 0x83, 0x5c, 0x00, 0x31, 0x13, 0x03, 0xa5,
- 0x4a, 0x5d, 0x11, 0xe5, 0x43, 0xa5, 0x4e, 0x00, 0x43, 0xa5, 0x5a, 0xcd,
- 0x05, 0x7a, 0x07, 0xf7, 0x79, 0xca, 0x2b, 0x13, 0x07, 0xf7, 0x80, 0x48,
- 0x01, 0x47, 0xc3, 0xa5, 0x66, 0x4a, 0x04, 0x5e, 0x43, 0xa5, 0x72, 0x44,
- 0x01, 0xb8, 0xc3, 0xa5, 0x84, 0x16, 0xc3, 0xa5, 0x90, 0xc4, 0x00, 0x5b,
- 0x00, 0x35, 0x58, 0xcb, 0x12, 0x31, 0x07, 0xf6, 0xd9, 0xcc, 0x05, 0x7b,
- 0x07, 0xf6, 0xc8, 0xcb, 0x12, 0x31, 0x07, 0xdc, 0x01, 0xcc, 0x05, 0x7b,
- 0x07, 0xdb, 0xf0, 0xcb, 0x12, 0x31, 0x07, 0xdb, 0x01, 0xcc, 0x05, 0x7b,
- 0x07, 0xda, 0xf0, 0x98, 0x00, 0x45, 0xf9, 0xc9, 0xb1, 0x02, 0x00, 0x45,
- 0xc0, 0x00, 0x43, 0xa5, 0x9c, 0xcb, 0x12, 0x31, 0x07, 0xdb, 0x21, 0xcc,
- 0x05, 0x7b, 0x07, 0xdb, 0x10, 0xcd, 0x01, 0x47, 0x00, 0x45, 0x19, 0x4a,
- 0x04, 0x5e, 0x43, 0xa5, 0xae, 0xcc, 0x05, 0x7b, 0x07, 0xf4, 0xc9, 0xcb,
- 0x12, 0x31, 0x07, 0xf4, 0xd8, 0x52, 0x17, 0x68, 0xc3, 0xa5, 0xba, 0xcf,
- 0x68, 0x8a, 0x00, 0x36, 0x89, 0xc3, 0x12, 0xec, 0x00, 0x36, 0x68, 0x00,
- 0x43, 0xa5, 0xcc, 0x45, 0x00, 0x6c, 0xc3, 0xa5, 0xdc, 0xca, 0x2b, 0x13,
- 0x07, 0xdd, 0x79, 0xcd, 0x05, 0x7a, 0x07, 0xdd, 0x70, 0x45, 0x04, 0x74,
- 0xc3, 0xa5, 0xeb, 0xc5, 0x01, 0x02, 0x00, 0x3a, 0xd8, 0xc5, 0x00, 0x95,
- 0x00, 0x34, 0xb9, 0xd0, 0x24, 0xf8, 0x00, 0x3b, 0x58, 0xce, 0x17, 0x75,
- 0x00, 0x34, 0xb0, 0xca, 0xa7, 0x98, 0x00, 0x45, 0xb1, 0x98, 0x00, 0x3a,
- 0xb2, 0x03, 0xa5, 0xfe, 0xdb, 0x17, 0x68, 0x00, 0x3a, 0xa1, 0x4a, 0x04,
- 0x5e, 0x43, 0xa6, 0x04, 0xcb, 0x12, 0x31, 0x07, 0xda, 0xe1, 0xcc, 0x05,
- 0x7b, 0x07, 0xda, 0xd0, 0xd2, 0x48, 0xcc, 0x00, 0x45, 0xa8, 0xc5, 0x01,
- 0x62, 0x00, 0x45, 0x79, 0xc5, 0x00, 0x95, 0x00, 0x34, 0xf0, 0xcf, 0x68,
- 0x8a, 0x00, 0x34, 0xdb, 0x03, 0xa6, 0x13, 0xd8, 0x24, 0xf0, 0x00, 0x3b,
- 0x68, 0xe0, 0x06, 0x87, 0x00, 0x3b, 0xe8, 0xe0, 0x02, 0x87, 0x00, 0x3b,
- 0x80, 0x16, 0xc3, 0xa6, 0x19, 0x49, 0x1e, 0x8d, 0xc3, 0xa6, 0x25, 0xcf,
- 0x38, 0x82, 0x00, 0x34, 0x81, 0xc9, 0x0e, 0xac, 0x00, 0x34, 0x53, 0x03,
- 0xa6, 0x2f, 0xc4, 0x00, 0x5b, 0x00, 0x34, 0x48, 0xcc, 0x05, 0x7b, 0x07,
- 0xf5, 0xe9, 0xcb, 0x12, 0x31, 0x07, 0xf5, 0xf8, 0xcd, 0x05, 0x7a, 0x07,
- 0xf4, 0x29, 0xca, 0x2b, 0x13, 0x07, 0xf4, 0x30, 0xc5, 0x00, 0x95, 0x00,
- 0x3b, 0x29, 0xc5, 0x01, 0x62, 0x00, 0x3b, 0x30, 0xcb, 0x12, 0x31, 0x07,
- 0xdc, 0xe1, 0xcc, 0x05, 0x7b, 0x07, 0xdc, 0xd0, 0xcf, 0x0e, 0xbb, 0x00,
- 0x35, 0xe9, 0xcd, 0x01, 0x5a, 0x00, 0x3b, 0x38, 0xc3, 0x02, 0x97, 0x00,
- 0x3b, 0x41, 0x98, 0x00, 0x3b, 0x48, 0xcd, 0x05, 0x7a, 0x07, 0xdc, 0xf1,
- 0xca, 0x2b, 0x13, 0x07, 0xdc, 0xf8, 0xd6, 0x2c, 0x99, 0x00, 0x44, 0x51,
- 0x16, 0xc3, 0xa6, 0x35, 0xcb, 0x09, 0x89, 0x00, 0x34, 0x09, 0x46, 0x0a,
- 0xdf, 0xc3, 0xa6, 0x41, 0x58, 0x22, 0x80, 0x43, 0xa6, 0x47, 0xcc, 0x05,
- 0x7b, 0x07, 0xf5, 0x09, 0xcb, 0x12, 0x31, 0x07, 0xf5, 0x18, 0xcb, 0x66,
- 0x54, 0x07, 0xdd, 0x69, 0x0b, 0xc3, 0xa6, 0x51, 0xca, 0x2b, 0x13, 0x07,
- 0xdd, 0x58, 0xcb, 0x66, 0x54, 0x07, 0xdd, 0x49, 0x0b, 0xc3, 0xa6, 0x5d,
- 0xca, 0x2b, 0x13, 0x07, 0xdd, 0x39, 0xd0, 0x5b, 0x22, 0x00, 0x36, 0x10,
- 0x00, 0x43, 0xa6, 0x69, 0xcf, 0x02, 0x18, 0x0f, 0xdd, 0x23, 0x03, 0xa6,
- 0x75, 0xe0, 0x02, 0x07, 0x0f, 0xdd, 0x40, 0xcf, 0x02, 0x18, 0x0f, 0xdd,
- 0x2b, 0x03, 0xa6, 0x7b, 0xdf, 0x0d, 0x20, 0x0f, 0xdd, 0x48, 0xd3, 0x41,
- 0x88, 0x0f, 0xd1, 0x89, 0xcf, 0x18, 0x82, 0x0f, 0xd1, 0xe8, 0x96, 0x0b,
- 0x4b, 0xb8, 0xc2, 0x0f, 0xf5, 0x0b, 0x47, 0xc8, 0xa5, 0x01, 0x45, 0xf9,
- 0xa4, 0x01, 0x43, 0xfa, 0x03, 0xa6, 0x81, 0xa5, 0x01, 0x46, 0xf8, 0xa5,
- 0x01, 0x47, 0x78, 0xa5, 0x01, 0x47, 0xb8, 0xa5, 0x01, 0x47, 0xd8, 0xa5,
- 0x01, 0x47, 0xe8, 0xa5, 0x01, 0x47, 0xf0, 0xc7, 0x08, 0xca, 0x01, 0x5d,
- 0x21, 0xc9, 0x03, 0x68, 0x01, 0x5d, 0x30, 0xcf, 0x2c, 0x05, 0x01, 0x5b,
- 0xd1, 0xd1, 0x00, 0xf6, 0x01, 0x5b, 0xd0, 0xcf, 0x2c, 0x05, 0x01, 0x59,
- 0xe1, 0xd6, 0x2d, 0x07, 0x01, 0x59, 0xe8, 0xc8, 0x62, 0x79, 0x01, 0x4b,
- 0x51, 0xdf, 0x05, 0x48, 0x01, 0x4b, 0x10, 0xcc, 0x57, 0x82, 0x0e, 0xf8,
- 0xa9, 0xc8, 0x6d, 0x67, 0x00, 0x13, 0xd3, 0x03, 0xa6, 0x85, 0xcc, 0x21,
- 0x0b, 0x05, 0x5b, 0x41, 0xc4, 0x02, 0x83, 0x00, 0x13, 0xd9, 0xc4, 0x12,
- 0xeb, 0x01, 0x63, 0xc8, 0x46, 0x00, 0x6b, 0x43, 0xa6, 0x8b, 0xcc, 0x21,
- 0x84, 0x05, 0x5a, 0x20, 0xc9, 0xb1, 0xf5, 0x00, 0x15, 0x78, 0xd1, 0x56,
- 0xe4, 0x0e, 0xf9, 0x41, 0xc9, 0x21, 0x0e, 0x00, 0xeb, 0x90, 0x45, 0x00,
- 0x56, 0x43, 0xa6, 0x97, 0xc9, 0x69, 0x26, 0x00, 0xf2, 0xf9, 0xc7, 0x6d,
- 0x68, 0x00, 0x13, 0xe0, 0x42, 0x00, 0xed, 0xc3, 0xa6, 0xa3, 0xca, 0x1f,
- 0x8f, 0x00, 0x10, 0x88, 0xcb, 0x4c, 0x8d, 0x05, 0x5a, 0x49, 0xc6, 0xbf,
- 0x3f, 0x00, 0x0a, 0xb9, 0xc4, 0x63, 0xce, 0x00, 0x0a, 0xc8, 0xc5, 0x01,
- 0x62, 0x00, 0xf2, 0x39, 0xc5, 0x00, 0x95, 0x00, 0xf2, 0x28, 0xce, 0x02,
- 0x79, 0x05, 0x5b, 0x31, 0xc4, 0x02, 0x52, 0x00, 0x15, 0x28, 0xc9, 0x1e,
- 0x4b, 0x00, 0x14, 0x20, 0xc9, 0x0e, 0xac, 0x00, 0xf7, 0x19, 0xc5, 0x21,
- 0x12, 0x00, 0xf7, 0x09, 0xca, 0x9f, 0xc8, 0x00, 0xf6, 0xf9, 0xc5, 0x1f,
- 0x94, 0x00, 0xf6, 0xe9, 0xc5, 0x35, 0x4a, 0x00, 0xf6, 0xd8, 0xc9, 0x0e,
- 0xac, 0x00, 0xf6, 0xc9, 0xc5, 0x21, 0x12, 0x00, 0xf6, 0xb9, 0xca, 0x9f,
- 0xc8, 0x00, 0xf6, 0xa9, 0xc5, 0x1f, 0x94, 0x00, 0xf6, 0x99, 0xc5, 0x35,
- 0x4a, 0x00, 0xf6, 0x88, 0xc5, 0x01, 0x62, 0x00, 0xf6, 0x69, 0xc5, 0x00,
- 0x95, 0x00, 0x11, 0x7a, 0x03, 0xa6, 0xb2, 0xc5, 0x35, 0x4a, 0x00, 0x0a,
- 0x89, 0xc5, 0x1f, 0x94, 0x00, 0x10, 0x68, 0xc3, 0x02, 0x53, 0x00, 0x14,
- 0x99, 0xc4, 0x63, 0xce, 0x00, 0x0f, 0x78, 0x44, 0x00, 0x57, 0xc3, 0xa6,
- 0xb8, 0xc5, 0x01, 0x62, 0x00, 0xf0, 0xc8, 0xc5, 0x01, 0x62, 0x00, 0xf5,
- 0xc9, 0xc5, 0x00, 0x95, 0x00, 0x08, 0xb8, 0x45, 0x00, 0x56, 0x43, 0xa6,
- 0xd6, 0xc7, 0x0e, 0xae, 0x00, 0xf7, 0x29, 0x45, 0x06, 0xf3, 0x43, 0xa6,
- 0xf4, 0x00, 0x43, 0xa7, 0x00, 0xc9, 0xa9, 0x29, 0x00, 0xf3, 0xc9, 0xc5,
- 0x01, 0x62, 0x00, 0xf3, 0xa8, 0xc6, 0x01, 0x61, 0x00, 0xf3, 0xb8, 0xc9,
- 0x0e, 0xac, 0x00, 0xf5, 0xb9, 0xc5, 0x21, 0x12, 0x00, 0xf5, 0xa9, 0xca,
- 0x9f, 0xc8, 0x00, 0xf5, 0x99, 0xc5, 0x1f, 0x94, 0x00, 0xf5, 0x89, 0xc5,
- 0x35, 0x4a, 0x00, 0xf5, 0x78, 0x45, 0x00, 0x56, 0x43, 0xa7, 0x0c, 0x42,
- 0x00, 0xed, 0xc3, 0xa7, 0x2a, 0xca, 0x1f, 0x8f, 0x00, 0x10, 0x08, 0xcb,
- 0x9a, 0x2a, 0x00, 0x0e, 0xf8, 0xcd, 0x63, 0xfa, 0x00, 0xf4, 0xd1, 0x43,
- 0x00, 0x35, 0x43, 0xa7, 0x39, 0xca, 0x23, 0xf5, 0x05, 0x5a, 0xc9, 0xd2,
- 0x4e, 0x90, 0x05, 0x5a, 0xc0, 0xc9, 0x0e, 0xac, 0x00, 0xf7, 0x81, 0xc5,
- 0x21, 0x12, 0x00, 0xf7, 0x71, 0xca, 0x9f, 0xc8, 0x00, 0xf7, 0x61, 0xc5,
- 0x1f, 0x94, 0x00, 0xf7, 0x51, 0xc5, 0x35, 0x4a, 0x00, 0xf7, 0x40, 0xc5,
- 0x35, 0x4a, 0x00, 0x0b, 0x81, 0xc5, 0x1f, 0x94, 0x00, 0x10, 0xa0, 0xc5,
- 0x01, 0x62, 0x00, 0xf3, 0x91, 0x44, 0x00, 0x57, 0x43, 0xa7, 0x48, 0xcb,
- 0x9a, 0x2a, 0x00, 0x11, 0x80, 0xc9, 0x0e, 0xac, 0x00, 0xf6, 0x31, 0xc5,
- 0x21, 0x12, 0x00, 0xf6, 0x21, 0xca, 0x9f, 0xc8, 0x00, 0xf6, 0x11, 0xc5,
- 0x1f, 0x94, 0x00, 0xf6, 0x01, 0xc5, 0x35, 0x4a, 0x00, 0xf5, 0xf0, 0xcb,
- 0x9a, 0x2a, 0x00, 0x0f, 0x00, 0xcb, 0x9a, 0x2a, 0x00, 0xf2, 0xe0, 0x16,
- 0xc3, 0xa7, 0x60, 0xc6, 0x92, 0x31, 0x00, 0x89, 0x11, 0xc5, 0x7a, 0x92,
- 0x00, 0x89, 0x21, 0xc5, 0xd7, 0x8c, 0x00, 0x89, 0x30, 0x87, 0x00, 0x8c,
- 0x28, 0xc4, 0xac, 0xd8, 0x00, 0x8e, 0x61, 0xc5, 0x98, 0x41, 0x06, 0xbe,
- 0xb0, 0xc4, 0xac, 0xd8, 0x00, 0x8e, 0x99, 0xc5, 0x98, 0x41, 0x00, 0x8e,
- 0xa1, 0xc6, 0xc8, 0x2d, 0x06, 0xbe, 0xc9, 0xc7, 0xc1, 0x06, 0x06, 0xbe,
- 0xd0, 0x02, 0x43, 0xa7, 0x6c, 0xc4, 0xe5, 0x9b, 0x01, 0x9f, 0xf0, 0xc3,
- 0x01, 0xb4, 0x01, 0x9b, 0x69, 0x16, 0xc3, 0xa7, 0x8a, 0xc4, 0x06, 0x9d,
- 0x01, 0x9b, 0x80, 0xcc, 0x21, 0x84, 0x0f, 0x03, 0x68, 0xce, 0x6c, 0xf1,
- 0x0f, 0x03, 0x60, 0xcf, 0x66, 0x5f, 0x0f, 0x03, 0x58, 0xc4, 0x02, 0x83,
- 0x00, 0x15, 0xa9, 0xc8, 0x6d, 0x67, 0x08, 0x3d, 0x32, 0x03, 0xa7, 0x96,
- 0xc4, 0x22, 0x4b, 0x0e, 0x8b, 0x19, 0xc4, 0x2b, 0xc8, 0x0e, 0x8a, 0x08,
- 0xc4, 0x22, 0x4b, 0x0e, 0x8b, 0x09, 0xc4, 0x2b, 0xc8, 0x0e, 0x89, 0xf8,
- 0xa0, 0x0e, 0x8e, 0x71, 0x9f, 0x0e, 0x8e, 0x69, 0x9e, 0x0e, 0x8e, 0x60,
- 0x46, 0x02, 0x12, 0xc3, 0xa7, 0x9c, 0xc5, 0x03, 0xe2, 0x0e, 0x8a, 0x49,
- 0xc5, 0x02, 0x31, 0x0e, 0x8a, 0x40, 0xc5, 0x03, 0xe2, 0x0e, 0x8a, 0x79,
- 0xc5, 0x02, 0x31, 0x0e, 0x8a, 0x70, 0xc5, 0x03, 0xe2, 0x0e, 0x8a, 0x69,
- 0xc5, 0x02, 0x31, 0x0e, 0x8a, 0x60, 0xc5, 0x03, 0xe2, 0x0e, 0x8a, 0x59,
- 0xc5, 0x02, 0x31, 0x0e, 0x8a, 0x50, 0xcd, 0x81, 0x4b, 0x0e, 0x8d, 0x69,
- 0xc4, 0xa7, 0x1f, 0x0e, 0x8c, 0x41, 0x16, 0xc3, 0xa7, 0xa8, 0xd0, 0x58,
- 0xc2, 0x0e, 0x8b, 0x30, 0xc6, 0xcc, 0x85, 0x0e, 0x8d, 0x51, 0xcb, 0x8e,
- 0xe8, 0x0e, 0x8c, 0x51, 0xc2, 0x00, 0x6d, 0x0e, 0x8c, 0x28, 0x14, 0xc3,
- 0xa7, 0xb4, 0xc5, 0xd9, 0x08, 0x0e, 0x8b, 0xe8, 0xc2, 0x00, 0x3e, 0x0e,
- 0x8c, 0x39, 0x43, 0xe6, 0xdc, 0x43, 0xa7, 0xc0, 0xc5, 0x0b, 0x42, 0x0e,
- 0x8b, 0xdb, 0x03, 0xa7, 0xd4, 0xcf, 0x65, 0xba, 0x0e, 0x8b, 0x68, 0xc9,
- 0xae, 0xdd, 0x0e, 0x8c, 0x00, 0xc5, 0x59, 0xdd, 0x0e, 0x8e, 0x18, 0xcd,
- 0x44, 0xd2, 0x00, 0xff, 0xe1, 0xc4, 0x7f, 0x43, 0x00, 0xfb, 0x42, 0x03,
- 0xa7, 0xda, 0x45, 0x00, 0x56, 0x43, 0xa7, 0xe0, 0x45, 0x00, 0x56, 0x43,
- 0xa7, 0xf6, 0x45, 0x00, 0x56, 0x43, 0xa8, 0x02, 0x45, 0x00, 0x56, 0x43,
- 0xa8, 0x0e, 0x45, 0x00, 0x56, 0x43, 0xa8, 0x20, 0xcb, 0x95, 0x4f, 0x00,
- 0xf9, 0xf1, 0xc4, 0xe5, 0x9f, 0x00, 0xf9, 0xe1, 0xc5, 0x2a, 0xae, 0x00,
- 0xf9, 0xd0, 0xcd, 0x44, 0xd2, 0x00, 0xfe, 0x61, 0xc4, 0x7f, 0x43, 0x00,
- 0xf9, 0x42, 0x03, 0xa8, 0x32, 0x45, 0x00, 0x56, 0x43, 0xa8, 0x38, 0x45,
- 0x00, 0x56, 0x43, 0xa8, 0x4e, 0x45, 0x00, 0x56, 0x43, 0xa8, 0x5a, 0xcd,
- 0x44, 0xd2, 0x00, 0xfd, 0xe1, 0xc4, 0x7f, 0x43, 0x00, 0xf8, 0x42, 0x03,
- 0xa8, 0x66, 0xc4, 0x07, 0xa6, 0x00, 0xfd, 0xd1, 0xc5, 0xd7, 0x41, 0x00,
- 0xfd, 0xc0, 0x45, 0x00, 0x56, 0x43, 0xa8, 0x6c, 0xca, 0x95, 0x50, 0x00,
- 0xff, 0xb3, 0x03, 0xa8, 0x82, 0xc4, 0x7f, 0x43, 0x00, 0xfb, 0x02, 0x03,
- 0xa8, 0x88, 0xd2, 0x48, 0x18, 0x00, 0xff, 0xa0, 0xd2, 0x48, 0x18, 0x00,
- 0xff, 0x90, 0x45, 0x00, 0x56, 0x43, 0xa8, 0x8e, 0x45, 0x00, 0x56, 0x43,
- 0xa8, 0xaf, 0x45, 0x00, 0x56, 0x43, 0xa8, 0xbb, 0x45, 0x00, 0x56, 0x43,
- 0xa8, 0xc7, 0x45, 0x00, 0x56, 0x43, 0xa8, 0xdf, 0x45, 0x00, 0x56, 0x43,
- 0xa8, 0xf1, 0x45, 0x00, 0x56, 0x43, 0xa9, 0x03, 0x45, 0x00, 0x56, 0x43,
- 0xa9, 0x1b, 0x45, 0x00, 0x56, 0x43, 0xa9, 0x2d, 0xca, 0x95, 0x50, 0x00,
- 0xfe, 0x33, 0x03, 0xa9, 0x3f, 0xc4, 0x7f, 0x43, 0x00, 0xf9, 0x02, 0x03,
- 0xa9, 0x45, 0xd2, 0x48, 0x18, 0x00, 0xfe, 0x20, 0xd2, 0x48, 0x18, 0x00,
- 0xfe, 0x10, 0x45, 0x00, 0x56, 0x43, 0xa9, 0x4b, 0x45, 0x00, 0x56, 0x43,
- 0xa9, 0x6c, 0x45, 0x00, 0x56, 0x43, 0xa9, 0x78, 0xca, 0x95, 0x50, 0x00,
- 0xfd, 0xb3, 0x03, 0xa9, 0x84, 0xc4, 0x7f, 0x43, 0x00, 0xf8, 0x02, 0x03,
- 0xa9, 0x8a, 0xd2, 0x48, 0x18, 0x00, 0xfd, 0xa0, 0xc4, 0x07, 0xa6, 0x00,
- 0xfb, 0x83, 0x03, 0xa9, 0x90, 0xc5, 0xd7, 0x41, 0x00, 0xfd, 0x80, 0x45,
- 0x00, 0x56, 0x43, 0xa9, 0x96, 0x00, 0x43, 0xa9, 0xb7, 0xc7, 0x32, 0x80,
- 0x08, 0x0a, 0x33, 0x03, 0xa9, 0xc3, 0xc6, 0xbf, 0xd7, 0x08, 0x0a, 0x40,
- 0xc7, 0x32, 0x80, 0x08, 0x0a, 0x3b, 0x03, 0xa9, 0xc9, 0xc6, 0xbf, 0xd7,
- 0x08, 0x0a, 0x50, 0xca, 0xa7, 0x0c, 0x0e, 0x7d, 0xe3, 0x03, 0xa9, 0xcf,
- 0xc9, 0x8e, 0x71, 0x0e, 0x7d, 0xd2, 0x03, 0xa9, 0xd5, 0xd6, 0x2d, 0xe3,
- 0x0e, 0x7d, 0xb8, 0xc9, 0x40, 0x88, 0x09, 0x10, 0x38, 0xca, 0x9f, 0x5a,
- 0x09, 0x0f, 0x00, 0xc4, 0x59, 0x55, 0x09, 0x0e, 0xf1, 0xca, 0xa1, 0xe4,
- 0x09, 0x0e, 0xe8, 0xcf, 0x67, 0xc7, 0x09, 0x0e, 0x98, 0xc2, 0x12, 0x0a,
- 0x09, 0x0e, 0x71, 0xc2, 0x00, 0xa4, 0x09, 0x0e, 0x68, 0xc2, 0x00, 0x8c,
- 0x09, 0x25, 0xe9, 0xc2, 0x03, 0x30, 0x09, 0x25, 0xe0, 0xd4, 0x3d, 0x19,
- 0x0e, 0xc8, 0x11, 0xcb, 0x94, 0x31, 0x0e, 0xc7, 0xf8, 0xcc, 0x17, 0x83,
- 0x0e, 0xc8, 0x09, 0x16, 0xc3, 0xa9, 0xdb, 0xc9, 0xaf, 0xd9, 0x0e, 0xc4,
- 0x99, 0xca, 0xa1, 0x76, 0x0e, 0xc0, 0x40, 0xcb, 0x14, 0xe5, 0x0e, 0xc7,
- 0xe9, 0xcb, 0x14, 0xde, 0x0e, 0xc7, 0xe1, 0xcc, 0x89, 0xb0, 0x0e, 0xc7,
- 0xda, 0x03, 0xa9, 0xe7, 0xc4, 0x17, 0x9a, 0x0e, 0xc7, 0xc9, 0xc9, 0x14,
- 0xf1, 0x0e, 0xc7, 0xc1, 0xc8, 0x1d, 0xe4, 0x0e, 0xc7, 0xb8, 0x05, 0xc3,
- 0xa9, 0xed, 0xc4, 0x02, 0x83, 0x0e, 0xc7, 0x33, 0x03, 0xa9, 0xfa, 0x4e,
- 0x6f, 0xf3, 0xc3, 0xaa, 0x00, 0xc4, 0x0d, 0xf4, 0x0e, 0xc6, 0xe3, 0x03,
- 0xaa, 0x0c, 0x47, 0xc1, 0xb1, 0x43, 0xaa, 0x10, 0xca, 0x14, 0xe6, 0x0e,
- 0xc5, 0xd1, 0xcd, 0x3d, 0x1f, 0x0e, 0xc0, 0x48, 0x00, 0x43, 0xaa, 0x1c,
- 0x00, 0x43, 0xaa, 0x51, 0x47, 0x0d, 0xdf, 0x43, 0xaa, 0x60, 0xcc, 0x84,
- 0x64, 0x0e, 0xc0, 0xe8, 0xc8, 0x67, 0x92, 0x0e, 0xc2, 0x11, 0x4a, 0x9b,
- 0x5e, 0x43, 0xaa, 0x6c, 0x4d, 0x7e, 0x73, 0xc3, 0xaa, 0x78, 0xce, 0x70,
- 0xef, 0x0e, 0xc1, 0xb0, 0xcf, 0x3d, 0x1e, 0x0e, 0xc5, 0xb1, 0xc9, 0x14,
- 0xf1, 0x0e, 0xc5, 0xa8, 0xce, 0x6d, 0x7d, 0x0e, 0xc4, 0x89, 0x47, 0xc5,
- 0x8c, 0x43, 0xaa, 0x84, 0xc5, 0x17, 0x99, 0x0e, 0xc3, 0x20, 0x00, 0x43,
- 0xaa, 0x90, 0xc6, 0x5e, 0xcc, 0x0e, 0xc2, 0xbb, 0x03, 0xaa, 0x9c, 0xcd,
- 0x29, 0xce, 0x0e, 0xc2, 0x91, 0xc4, 0x17, 0x9a, 0x0e, 0xc2, 0x81, 0xc9,
- 0xb3, 0xb7, 0x0e, 0xc2, 0x70, 0xc9, 0x14, 0xf1, 0x0e, 0xc2, 0x3b, 0x03,
- 0xaa, 0xa0, 0xc6, 0x5e, 0xcc, 0x0e, 0xc2, 0x31, 0xc4, 0x17, 0x9a, 0x0e,
- 0xc2, 0x28, 0xc2, 0x00, 0x34, 0x0e, 0xc7, 0x99, 0xc3, 0x00, 0x83, 0x0e,
- 0xc7, 0x90, 0x00, 0x43, 0xaa, 0xa6, 0xc6, 0x14, 0xea, 0x0e, 0xc5, 0x31,
- 0xc4, 0x00, 0x5b, 0x0e, 0xc4, 0x42, 0x03, 0xaa, 0xb6, 0xc6, 0x0d, 0xdf,
- 0x0e, 0xc4, 0xe8, 0xc4, 0x0d, 0xf4, 0x0e, 0xc3, 0xf9, 0xc7, 0x29, 0xd4,
- 0x0e, 0xc3, 0xe0, 0xc2, 0x00, 0x34, 0x0e, 0xc6, 0xc9, 0xc3, 0x00, 0x83,
- 0x0e, 0xc6, 0xc0, 0xc5, 0x0d, 0xe0, 0x0e, 0xc7, 0x63, 0x03, 0xaa, 0xbc,
- 0xcb, 0x14, 0xe5, 0x0e, 0xc6, 0x00, 0x46, 0x0d, 0xe0, 0xc3, 0xaa, 0xc2,
- 0xc8, 0xb7, 0x55, 0x0e, 0xc3, 0x80, 0x00, 0x43, 0xaa, 0xce, 0xc2, 0x00,
- 0x15, 0x0e, 0xcc, 0x78, 0xca, 0x01, 0xf7, 0x01, 0x5d, 0x09, 0xc9, 0x01,
- 0x1e, 0x01, 0x5d, 0x00, 0xcc, 0x12, 0x30, 0x07, 0xeb, 0x41, 0xca, 0x2b,
- 0x13, 0x07, 0xeb, 0x38, 0xca, 0x2b, 0x13, 0x07, 0xe3, 0x41, 0xcd, 0x05,
- 0x7a, 0x07, 0xe0, 0x18, 0xca, 0x9f, 0xaa, 0x00, 0x3b, 0xb1, 0xc8, 0xbc,
- 0x5d, 0x00, 0x3b, 0xa8, 0xd5, 0x0e, 0xb5, 0x00, 0x45, 0x20, 0xc5, 0x01,
- 0x62, 0x00, 0x35, 0x29, 0xd6, 0x31, 0x69, 0x00, 0x3b, 0x08, 0x45, 0x02,
- 0x4d, 0xc3, 0xaa, 0xe6, 0x14, 0xc3, 0xaa, 0xf2, 0xd2, 0x4b, 0xe4, 0x00,
- 0x43, 0xab, 0x03, 0xaa, 0xfe, 0xcf, 0x69, 0x4d, 0x00, 0x43, 0x8b, 0x03,
- 0xab, 0x04, 0xc5, 0x4b, 0xf1, 0x00, 0x43, 0xa1, 0xc5, 0x69, 0x57, 0x00,
- 0x43, 0x80, 0x45, 0x00, 0x56, 0x43, 0xab, 0x0a, 0xc5, 0x01, 0x62, 0x00,
- 0x33, 0x99, 0xc5, 0x00, 0x95, 0x00, 0x33, 0x90, 0xc5, 0x01, 0x62, 0x00,
- 0x31, 0x2b, 0x03, 0xab, 0x16, 0xc5, 0x00, 0x95, 0x00, 0x31, 0x1a, 0x03,
- 0xab, 0x1a, 0x00, 0x43, 0xab, 0x1e, 0xc8, 0xbc, 0x5d, 0x00, 0x3b, 0x99,
- 0xca, 0x9f, 0xaa, 0x00, 0x3b, 0xa0, 0xca, 0x2b, 0x13, 0x07, 0xda, 0x89,
- 0xcd, 0x05, 0x7a, 0x07, 0xda, 0x80, 0xd0, 0x08, 0x69, 0x00, 0x44, 0x69,
- 0xc5, 0x00, 0x95, 0x00, 0x31, 0xd8, 0xc5, 0x01, 0x62, 0x00, 0x31, 0xe1,
- 0xc5, 0x00, 0x95, 0x00, 0x3b, 0x19, 0xd6, 0x31, 0x69, 0x00, 0x3b, 0x20,
- 0xc5, 0x01, 0x62, 0x00, 0x45, 0xa1, 0xc5, 0x00, 0x95, 0x00, 0x35, 0x60,
- 0xcf, 0x68, 0x8a, 0x00, 0x35, 0x71, 0xcd, 0x01, 0x47, 0x00, 0x3b, 0xf8,
- 0xc4, 0xe1, 0xe3, 0x00, 0x36, 0x19, 0xcd, 0x05, 0x7a, 0x07, 0xf4, 0x99,
- 0xca, 0x2b, 0x13, 0x07, 0xf4, 0xa0, 0xc5, 0x01, 0x62, 0x00, 0x44, 0x61,
- 0xc5, 0x00, 0x95, 0x00, 0x34, 0xf8, 0xd0, 0x5c, 0x02, 0x00, 0x45, 0xd1,
- 0xc9, 0x17, 0x7a, 0x00, 0x45, 0x49, 0xcb, 0x09, 0x89, 0x00, 0x45, 0x40,
- 0x0b, 0xc3, 0xab, 0x2a, 0xca, 0x2b, 0x13, 0x07, 0xf4, 0x51, 0xcb, 0x66,
- 0x54, 0x07, 0xf4, 0x60, 0xcb, 0x09, 0x89, 0x00, 0x36, 0x9b, 0x03, 0xab,
- 0x36, 0x5d, 0x11, 0xe5, 0x43, 0xab, 0x3a, 0xca, 0x5c, 0x08, 0x00, 0x45,
- 0xc9, 0x98, 0x00, 0x34, 0x93, 0x03, 0xab, 0x46, 0xde, 0x02, 0x89, 0x00,
- 0x3b, 0x88, 0xc6, 0x01, 0x61, 0x00, 0x45, 0x00, 0xd6, 0x31, 0x69, 0x00,
- 0x3a, 0x93, 0x03, 0xab, 0x4c, 0xd2, 0x4c, 0x08, 0x00, 0x3a, 0x80, 0xd5,
- 0x0e, 0xb5, 0x00, 0x34, 0xe0, 0x4a, 0x04, 0x5e, 0xc3, 0xab, 0x52, 0x46,
- 0x01, 0x47, 0x43, 0xab, 0x5e, 0x98, 0x00, 0x37, 0x71, 0xcd, 0x2c, 0xa2,
- 0x00, 0x3a, 0xd0, 0xce, 0x08, 0x79, 0x00, 0x34, 0x58, 0x4a, 0x04, 0x5e,
- 0xc3, 0xab, 0x64, 0x48, 0x01, 0x47, 0x43, 0xab, 0x70, 0xe0, 0x07, 0xe7,
- 0x00, 0x3b, 0xe0, 0xc5, 0x01, 0x62, 0x00, 0x3b, 0x71, 0x03, 0x43, 0xab,
- 0x7c, 0xcb, 0x12, 0x31, 0x07, 0xdd, 0x61, 0xcc, 0x05, 0x7b, 0x07, 0xdd,
- 0x50, 0xcb, 0x12, 0x31, 0x07, 0xdd, 0x41, 0xcc, 0x05, 0x7b, 0x07, 0xdd,
- 0x30, 0xca, 0x2b, 0x13, 0x07, 0xdd, 0x29, 0xcd, 0x05, 0x7a, 0x07, 0xdd,
- 0x20, 0xd0, 0x13, 0x2a, 0x0f, 0xdd, 0x58, 0xcf, 0x09, 0x08, 0x0f, 0xdd,
- 0x50, 0xa5, 0x01, 0x47, 0xf8, 0xd3, 0x46, 0x48, 0x0e, 0xf8, 0x40, 0xd1,
- 0x00, 0xf6, 0x05, 0x5a, 0x11, 0xc6, 0x01, 0x01, 0x05, 0x5a, 0x08, 0xcc,
- 0x57, 0x82, 0x0e, 0xf8, 0xb1, 0xcc, 0x21, 0x0b, 0x00, 0xeb, 0x98, 0xc5,
- 0x01, 0x62, 0x00, 0xf2, 0xdb, 0x03, 0xab, 0x88, 0xc5, 0x00, 0x95, 0x00,
- 0xf2, 0xc8, 0xcb, 0x9a, 0x2a, 0x00, 0x11, 0x88, 0xc9, 0x0e, 0xac, 0x00,
- 0xf6, 0x39, 0xc5, 0x21, 0x12, 0x00, 0xf6, 0x29, 0xca, 0x9f, 0xc8, 0x00,
- 0xf6, 0x19, 0xc5, 0x1f, 0x94, 0x00, 0xf6, 0x09, 0xc5, 0x35, 0x4a, 0x00,
- 0xf5, 0xf8, 0xc9, 0x0e, 0xac, 0x00, 0xf7, 0x89, 0xc5, 0x21, 0x12, 0x00,
- 0xf7, 0x79, 0xca, 0x9f, 0xc8, 0x00, 0xf7, 0x69, 0xc5, 0x1f, 0x94, 0x00,
- 0xf7, 0x59, 0xc5, 0x35, 0x4a, 0x00, 0xf7, 0x48, 0xc5, 0x35, 0x4a, 0x00,
- 0x0b, 0x89, 0xc5, 0x1f, 0x94, 0x00, 0x10, 0xa8, 0xc5, 0x01, 0x62, 0x00,
- 0xf3, 0x99, 0x44, 0x00, 0x57, 0x43, 0xab, 0x8e, 0xc9, 0x0e, 0xac, 0x00,
- 0xf5, 0x69, 0xc5, 0x21, 0x12, 0x00, 0xf5, 0x59, 0xca, 0x9f, 0xc8, 0x00,
- 0xf5, 0x49, 0xc5, 0x1f, 0x94, 0x00, 0xf5, 0x39, 0xc5, 0x35, 0x4a, 0x00,
- 0xf5, 0x28, 0xc5, 0x01, 0x62, 0x00, 0xf5, 0x09, 0xc5, 0x00, 0x95, 0x00,
- 0x11, 0x3a, 0x03, 0xab, 0xa6, 0xc5, 0x01, 0x62, 0x00, 0xf0, 0x09, 0xc5,
- 0x00, 0x95, 0x00, 0x07, 0x2a, 0x03, 0xab, 0xac, 0xc6, 0x60, 0xe6, 0x00,
- 0x0e, 0xa9, 0xc5, 0x35, 0x4a, 0x00, 0x0e, 0xb9, 0xc5, 0x9a, 0x9e, 0x00,
- 0x0e, 0xc9, 0xc5, 0x1f, 0x94, 0x00, 0x0e, 0xd8, 0xc6, 0xc6, 0xf2, 0x05,
- 0x4b, 0x91, 0xc5, 0xc8, 0x2e, 0x00, 0x89, 0x18, 0xc3, 0x01, 0xb4, 0x01,
- 0x9f, 0xa1, 0x16, 0xc3, 0xab, 0xb2, 0x08, 0xc3, 0xab, 0xbe, 0x15, 0xc3,
- 0xab, 0xca, 0xc5, 0x01, 0xdb, 0x01, 0x9f, 0xd9, 0xc4, 0x22, 0x71, 0x01,
- 0x9f, 0xe0, 0xc2, 0x01, 0x47, 0x01, 0x9b, 0x71, 0xc4, 0x04, 0x5e, 0x01,
- 0x9b, 0x78, 0xd3, 0x46, 0x48, 0x08, 0x3d, 0x38, 0xc5, 0x03, 0xe2, 0x0e,
- 0x8a, 0x89, 0xc5, 0x02, 0x31, 0x0e, 0x8a, 0x80, 0x45, 0xae, 0xe0, 0xc3,
- 0xab, 0xd6, 0xc2, 0x04, 0x4e, 0x0e, 0x8b, 0x28, 0xcb, 0x90, 0xf8, 0x0e,
- 0x8c, 0x59, 0x46, 0x73, 0x7b, 0x43, 0xab, 0xe0, 0xa2, 0x0e, 0x8b, 0x91,
- 0xa1, 0x0e, 0x8b, 0x89, 0xa0, 0x0e, 0x8b, 0x81, 0x9f, 0x0e, 0x8b, 0x79,
- 0x9e, 0x0e, 0x8b, 0x70, 0xc9, 0xae, 0xdd, 0x0e, 0x8c, 0x08, 0x45, 0x00,
- 0x56, 0x43, 0xab, 0xec, 0x12, 0xc3, 0xac, 0x02, 0xc4, 0xe5, 0x9f, 0x00,
- 0xfb, 0x6b, 0x03, 0xac, 0x11, 0xc5, 0x2a, 0xae, 0x00, 0xfb, 0x5a, 0x03,
- 0xac, 0x17, 0xc4, 0xe5, 0x9f, 0x00, 0xfa, 0x69, 0xc5, 0x2a, 0xae, 0x00,
- 0xfa, 0x58, 0xc4, 0xe5, 0x9f, 0x00, 0xfa, 0x61, 0xc5, 0x2a, 0xae, 0x00,
- 0xfa, 0x50, 0xcb, 0x95, 0x4f, 0x00, 0xfa, 0xf9, 0xc4, 0xe5, 0x9f, 0x00,
- 0xfa, 0xe9, 0xc5, 0x2a, 0xae, 0x00, 0xfa, 0xd8, 0xcb, 0x95, 0x4f, 0x00,
- 0xf9, 0xf9, 0xc4, 0xe5, 0x9f, 0x00, 0xf9, 0xe9, 0xc5, 0x2a, 0xae, 0x00,
- 0xf9, 0xd8, 0x45, 0x00, 0x56, 0x43, 0xac, 0x1d, 0x12, 0xc3, 0xac, 0x33,
- 0xc4, 0xe5, 0x9f, 0x00, 0xf9, 0x6b, 0x03, 0xac, 0x42, 0xc5, 0x2a, 0xae,
- 0x00, 0xf9, 0x5a, 0x03, 0xac, 0x48, 0xc4, 0xe5, 0x9f, 0x00, 0xf8, 0xe9,
- 0xc5, 0x2a, 0xae, 0x00, 0xf8, 0xd8, 0xc4, 0xe5, 0x9f, 0x00, 0xf8, 0xe1,
- 0xc5, 0x2a, 0xae, 0x00, 0xf8, 0xd0, 0x45, 0x00, 0x56, 0x43, 0xac, 0x4e,
- 0x12, 0xc3, 0xac, 0x64, 0xc4, 0xe5, 0x9f, 0x00, 0xf8, 0x6b, 0x03, 0xac,
- 0x73, 0xc5, 0x2a, 0xae, 0x00, 0xf8, 0x5a, 0x03, 0xac, 0x79, 0xd2, 0x48,
- 0x18, 0x00, 0xff, 0xb8, 0x45, 0x00, 0x56, 0x43, 0xac, 0x7f, 0xcb, 0x95,
- 0x4f, 0x00, 0xfb, 0x3b, 0x03, 0xac, 0xa0, 0xc4, 0xe5, 0x9f, 0x00, 0xfb,
- 0x2b, 0x03, 0xac, 0xa6, 0xc5, 0x2a, 0xae, 0x00, 0xfb, 0x1b, 0x03, 0xac,
- 0xac, 0xcd, 0x48, 0x1d, 0x00, 0xfd, 0x08, 0xc4, 0xe5, 0x9f, 0x00, 0xfa,
- 0x29, 0xc5, 0x2a, 0xae, 0x00, 0xfa, 0x18, 0xc4, 0xe5, 0x9f, 0x00, 0xfa,
- 0x21, 0xc5, 0x2a, 0xae, 0x00, 0xfa, 0x10, 0xcb, 0x95, 0x4f, 0x00, 0xff,
- 0x39, 0xc4, 0xe5, 0x9f, 0x00, 0xff, 0x19, 0xc5, 0x2a, 0xae, 0x00, 0xff,
- 0x11, 0xc5, 0x63, 0xc6, 0x00, 0x1d, 0x80, 0xcb, 0x95, 0x4f, 0x00, 0xfa,
- 0xb9, 0xc4, 0xe5, 0x9f, 0x00, 0xfa, 0xa9, 0xc5, 0x2a, 0xae, 0x00, 0xfa,
- 0x98, 0xcb, 0x95, 0x4f, 0x00, 0xfa, 0xb1, 0xc4, 0xe5, 0x9f, 0x00, 0xfa,
- 0xa1, 0xc5, 0x2a, 0xae, 0x00, 0xfa, 0x90, 0xcb, 0x95, 0x4f, 0x00, 0xfe,
- 0xb9, 0xc4, 0xe5, 0x9f, 0x00, 0xfe, 0x99, 0xc5, 0x2a, 0xae, 0x00, 0xfe,
- 0x91, 0xc5, 0x63, 0xc6, 0x00, 0x1c, 0x80, 0xcb, 0x95, 0x4f, 0x00, 0xf9,
- 0xb9, 0xc4, 0xe5, 0x9f, 0x00, 0xf9, 0xa9, 0xc5, 0x2a, 0xae, 0x00, 0xf9,
- 0x98, 0xcb, 0x95, 0x4f, 0x00, 0xf9, 0xb1, 0xc4, 0xe5, 0x9f, 0x00, 0xf9,
- 0xa1, 0xc5, 0x2a, 0xae, 0x00, 0xf9, 0x90, 0xd2, 0x48, 0x18, 0x00, 0xfe,
- 0x38, 0x45, 0x00, 0x56, 0x43, 0xac, 0xb2, 0xcb, 0x95, 0x4f, 0x00, 0xf9,
- 0x3b, 0x03, 0xac, 0xd3, 0xc4, 0xe5, 0x9f, 0x00, 0xf9, 0x2b, 0x03, 0xac,
- 0xd9, 0xc5, 0x2a, 0xae, 0x00, 0xf9, 0x1b, 0x03, 0xac, 0xdf, 0xcd, 0x48,
- 0x1d, 0x00, 0xfc, 0x88, 0xc4, 0xe5, 0x9f, 0x00, 0xf8, 0xa9, 0xc5, 0x2a,
- 0xae, 0x00, 0xf8, 0x98, 0xc4, 0xe5, 0x9f, 0x00, 0xf8, 0xa1, 0xc5, 0x2a,
- 0xae, 0x00, 0xf8, 0x90, 0xd2, 0x48, 0x18, 0x00, 0xfd, 0xb8, 0x45, 0x00,
- 0x56, 0x43, 0xac, 0xe5, 0xd2, 0x48, 0x18, 0x00, 0xfd, 0x90, 0xcb, 0x95,
- 0x4f, 0x00, 0xf8, 0x3b, 0x03, 0xad, 0x06, 0xc4, 0xe5, 0x9f, 0x00, 0xf8,
- 0x2b, 0x03, 0xad, 0x0c, 0xc5, 0x2a, 0xae, 0x00, 0xf8, 0x1b, 0x03, 0xad,
- 0x12, 0xcd, 0x48, 0x1d, 0x00, 0xfc, 0x08, 0xc7, 0xbf, 0xd6, 0x08, 0x0a,
- 0x61, 0xc7, 0x67, 0x1b, 0x08, 0x0a, 0x98, 0xc8, 0xbf, 0xd5, 0x08, 0x0a,
- 0x70, 0xc8, 0x67, 0x1a, 0x08, 0x0a, 0xb0, 0xca, 0x9f, 0x00, 0x0e, 0x7d,
- 0xe8, 0x46, 0x00, 0x6b, 0x43, 0xad, 0x18, 0xcc, 0x86, 0x50, 0x0e, 0xc8,
- 0x01, 0xca, 0x94, 0x32, 0x0e, 0xc7, 0xf0, 0xc9, 0x66, 0xec, 0x0e, 0xc1,
- 0x60, 0xc5, 0x01, 0x7b, 0x0e, 0xc7, 0x5b, 0x03, 0xad, 0x24, 0x17, 0x43,
- 0xad, 0x2a, 0x4a, 0x70, 0x1f, 0x43, 0xad, 0x34, 0xc4, 0x17, 0x9a, 0x0e,
- 0xc7, 0x29, 0xc8, 0x43, 0xd5, 0x0e, 0xc7, 0x20, 0x00, 0x43, 0xad, 0x40,
- 0xcc, 0x88, 0x9c, 0x0e, 0xc1, 0xd9, 0xcd, 0x79, 0xad, 0x0e, 0xc1, 0xd0,
- 0x05, 0xc3, 0xad, 0x52, 0xc6, 0x14, 0xea, 0x0e, 0xc5, 0x21, 0x14, 0xc3,
- 0xad, 0x61, 0xc5, 0x0d, 0xe0, 0x0e, 0xc0, 0xf3, 0x03, 0xad, 0x70, 0xd7,
- 0x29, 0xc4, 0x0e, 0xc1, 0x39, 0xc6, 0x5e, 0xcc, 0x0e, 0xc0, 0x93, 0x03,
- 0xad, 0x74, 0xc4, 0x17, 0x9a, 0x0e, 0xc0, 0x83, 0x03, 0xad, 0x7a, 0xd3,
- 0x43, 0xd5, 0x0e, 0xc1, 0x00, 0xc9, 0x70, 0x22, 0x0e, 0xc0, 0xa3, 0x03,
- 0xad, 0x80, 0xc3, 0x01, 0x64, 0x0e, 0xc0, 0x60, 0xc9, 0x14, 0xf1, 0x0e,
- 0xc1, 0x29, 0xc4, 0x0d, 0xf4, 0x0e, 0xc1, 0x20, 0xc7, 0x1b, 0x1c, 0x0e,
- 0xc2, 0x09, 0xc2, 0x01, 0xc7, 0x0e, 0xc2, 0x00, 0xc6, 0x5e, 0xcc, 0x0e,
- 0xc1, 0xc9, 0xc2, 0x01, 0xc7, 0x0e, 0xc1, 0xc0, 0xc6, 0x3e, 0x81, 0x0e,
- 0xc4, 0x81, 0xc8, 0x43, 0xd5, 0x0e, 0xc4, 0x78, 0xc4, 0x17, 0x9a, 0x0e,
- 0xc2, 0x89, 0xc9, 0xb3, 0xb7, 0x0e, 0xc2, 0x78, 0x00, 0x43, 0xad, 0x86,
- 0xc6, 0xcc, 0x49, 0x0e, 0xc2, 0x40, 0x15, 0xc3, 0xad, 0x92, 0xc5, 0x17,
- 0xef, 0x0e, 0xc7, 0x79, 0xc4, 0x01, 0x75, 0x0e, 0xc7, 0x70, 0xca, 0x14,
- 0xf0, 0x0e, 0xc4, 0x68, 0xc5, 0x01, 0x74, 0x0e, 0xc7, 0x68, 0xc7, 0x29,
- 0xd4, 0x0e, 0xc3, 0x91, 0xc4, 0x0d, 0xf4, 0x0e, 0xc3, 0x70, 0x45, 0x0d,
- 0xbc, 0xc3, 0xad, 0x9e, 0xc6, 0x14, 0xea, 0x0e, 0xc5, 0x29, 0xc4, 0x00,
- 0x5b, 0x0e, 0xc4, 0x39, 0xc5, 0x0d, 0xe0, 0x0e, 0xc0, 0xf8, 0xc5, 0x09,
- 0x89, 0x00, 0x44, 0x11, 0xc9, 0x4d, 0x67, 0x00, 0x43, 0xc0, 0x45, 0x02,
- 0x13, 0xc3, 0xad, 0xaa, 0x49, 0x75, 0x39, 0x43, 0xad, 0xb6, 0x45, 0x00,
- 0x56, 0x43, 0xad, 0xc2, 0x45, 0x00, 0x56, 0x43, 0xad, 0xce, 0xc9, 0xb1,
- 0x80, 0x00, 0x43, 0xf9, 0xc9, 0x17, 0x7a, 0x00, 0x43, 0xe0, 0x00, 0x43,
- 0xad, 0xda, 0x00, 0x43, 0xad, 0xe6, 0xcd, 0x05, 0x7a, 0x07, 0xf4, 0x09,
- 0xca, 0x2b, 0x13, 0x07, 0xf4, 0x10, 0xcc, 0x05, 0x7b, 0x07, 0xf4, 0x49,
- 0xcb, 0x12, 0x31, 0x07, 0xf4, 0x58, 0x00, 0x43, 0xad, 0xf2, 0xca, 0x9f,
- 0xaa, 0x00, 0x3b, 0xd9, 0xc8, 0xbc, 0x5d, 0x00, 0x3b, 0xd0, 0xc6, 0x01,
- 0x61, 0x00, 0x34, 0xa8, 0xd3, 0x1d, 0x99, 0x00, 0x3a, 0x98, 0xc5, 0x01,
- 0x62, 0x00, 0x45, 0x71, 0xcf, 0x1a, 0xc6, 0x00, 0x34, 0x78, 0xe0, 0x08,
- 0x67, 0x00, 0x3a, 0xc8, 0xc5, 0x00, 0x95, 0x00, 0x34, 0x29, 0xd6, 0x31,
- 0x69, 0x00, 0x3a, 0xc0, 0xce, 0x75, 0x6b, 0x00, 0x34, 0x11, 0xc5, 0x00,
- 0x95, 0x00, 0x3a, 0xb8, 0xcb, 0x01, 0x56, 0x00, 0x3b, 0x79, 0xc4, 0x00,
- 0x96, 0x00, 0x3b, 0x90, 0xcb, 0x9a, 0x2a, 0x00, 0xf2, 0xe8, 0xc6, 0x60,
- 0xe6, 0x00, 0x0e, 0xb1, 0xc5, 0x35, 0x4a, 0x00, 0x0e, 0xc1, 0xc5, 0x9a,
- 0x9e, 0x00, 0x0e, 0xd1, 0xc5, 0x1f, 0x94, 0x00, 0x0e, 0xe0, 0xcb, 0x9a,
- 0x2a, 0x00, 0x0f, 0x08, 0xca, 0xa9, 0x28, 0x00, 0x0f, 0xd8, 0xc2, 0x01,
- 0x47, 0x01, 0x9f, 0xa9, 0xc4, 0x04, 0x5e, 0x01, 0x9f, 0xb0, 0xc3, 0x06,
- 0x9e, 0x01, 0x9f, 0xb9, 0xc3, 0x0c, 0x5b, 0x01, 0x9f, 0xc0, 0xc2, 0x26,
- 0x51, 0x01, 0x9f, 0xc9, 0xc4, 0x18, 0x83, 0x01, 0x9f, 0xd0, 0xc6, 0xcf,
- 0xb5, 0x0e, 0x8b, 0xf1, 0x91, 0x0e, 0x8b, 0xe0, 0xa0, 0x0e, 0x8b, 0x49,
- 0x9f, 0x0e, 0x8b, 0x41, 0x9e, 0x0e, 0x8b, 0x38, 0x12, 0xc3, 0xad, 0xfe,
- 0xc4, 0xe5, 0x9f, 0x00, 0xfb, 0x63, 0x03, 0xae, 0x0d, 0xc5, 0x2a, 0xae,
- 0x00, 0xfb, 0x52, 0x03, 0xae, 0x13, 0xca, 0x95, 0x50, 0x00, 0xfb, 0x7b,
- 0x03, 0xae, 0x19, 0xcd, 0x44, 0xd2, 0x00, 0xfd, 0x48, 0xd3, 0x44, 0xcc,
- 0x00, 0xfd, 0x68, 0xd3, 0x44, 0xcc, 0x00, 0xfd, 0x58, 0x12, 0xc3, 0xae,
- 0x1f, 0xc4, 0xe5, 0x9f, 0x00, 0xf9, 0x63, 0x03, 0xae, 0x2e, 0xc5, 0x2a,
- 0xae, 0x00, 0xf9, 0x52, 0x03, 0xae, 0x34, 0xca, 0x95, 0x50, 0x00, 0xf9,
- 0x7b, 0x03, 0xae, 0x3a, 0xcd, 0x44, 0xd2, 0x00, 0xfc, 0xc8, 0xd3, 0x44,
- 0xcc, 0x00, 0xfc, 0xe8, 0xd3, 0x44, 0xcc, 0x00, 0xfc, 0xd8, 0x12, 0xc3,
- 0xae, 0x40, 0xc4, 0xe5, 0x9f, 0x00, 0xf8, 0x63, 0x03, 0xae, 0x4f, 0xc5,
- 0x2a, 0xae, 0x00, 0xf8, 0x52, 0x03, 0xae, 0x55, 0xca, 0x95, 0x50, 0x00,
- 0xf8, 0x7b, 0x03, 0xae, 0x5b, 0xcd, 0x44, 0xd2, 0x00, 0xfc, 0x48, 0xd3,
- 0x44, 0xcc, 0x00, 0xfc, 0x68, 0xd3, 0x44, 0xcc, 0x00, 0xfc, 0x58, 0xcb,
- 0x95, 0x4f, 0x00, 0xfb, 0x33, 0x03, 0xae, 0x61, 0xc4, 0xe5, 0x9f, 0x00,
- 0xfb, 0x23, 0x03, 0xae, 0x67, 0xc5, 0x2a, 0xae, 0x00, 0xfb, 0x13, 0x03,
- 0xae, 0x6d, 0xcd, 0x48, 0x1d, 0x00, 0xfd, 0x00, 0xd2, 0x48, 0x18, 0x00,
- 0xfd, 0x38, 0xd2, 0x48, 0x18, 0x00, 0xfd, 0x28, 0xd2, 0x48, 0x18, 0x00,
- 0xfd, 0x18, 0xcb, 0x95, 0x4f, 0x00, 0xf9, 0x33, 0x03, 0xae, 0x73, 0xc4,
- 0xe5, 0x9f, 0x00, 0xf9, 0x23, 0x03, 0xae, 0x79, 0xc5, 0x2a, 0xae, 0x00,
- 0xf9, 0x13, 0x03, 0xae, 0x7f, 0xcd, 0x48, 0x1d, 0x00, 0xfc, 0x80, 0xd2,
- 0x48, 0x18, 0x00, 0xfc, 0xb8, 0xd2, 0x48, 0x18, 0x00, 0xfc, 0xa8, 0xd2,
- 0x48, 0x18, 0x00, 0xfc, 0x98, 0xcb, 0x95, 0x4f, 0x00, 0xf8, 0x33, 0x03,
- 0xae, 0x85, 0xc4, 0xe5, 0x9f, 0x00, 0xf8, 0x23, 0x03, 0xae, 0x8b, 0xc5,
- 0x2a, 0xae, 0x00, 0xf8, 0x13, 0x03, 0xae, 0x91, 0xcd, 0x48, 0x1d, 0x00,
- 0xfc, 0x00, 0xd2, 0x48, 0x18, 0x00, 0xfc, 0x38, 0xd2, 0x48, 0x18, 0x00,
- 0xfc, 0x28, 0xd2, 0x48, 0x18, 0x00, 0xfc, 0x18, 0xd0, 0x5d, 0x72, 0x0e,
- 0x7d, 0xd9, 0xd0, 0x2d, 0xe9, 0x0e, 0x7d, 0xc0, 0xcb, 0x70, 0x20, 0x0e,
- 0xc1, 0xe0, 0x14, 0xc3, 0xae, 0x97, 0xce, 0x70, 0x1d, 0x0e, 0xc1, 0xb8,
- 0xc6, 0x5e, 0xcc, 0x0e, 0xc2, 0x19, 0xc2, 0x01, 0xc7, 0x0e, 0xc1, 0x88,
- 0x46, 0x0d, 0xda, 0xc3, 0xae, 0xa3, 0xc9, 0xaa, 0x93, 0x0e, 0xc7, 0x11,
- 0x46, 0x0d, 0xe0, 0x43, 0xae, 0xaf, 0x44, 0x0d, 0xbd, 0xc3, 0xae, 0xc1,
- 0xc8, 0x14, 0xf2, 0x0e, 0xc0, 0xaa, 0x03, 0xae, 0xd0, 0xc3, 0x00, 0x34,
- 0x0e, 0xc4, 0x33, 0x03, 0xae, 0xd4, 0xce, 0x3d, 0x1f, 0x0e, 0xc0, 0x88,
- 0x00, 0x43, 0xae, 0xd8, 0xd2, 0x4d, 0x3a, 0x0e, 0xc1, 0x18, 0xcf, 0x66,
- 0xe6, 0x0e, 0xc1, 0x08, 0xcb, 0x4d, 0x41, 0x0e, 0xc1, 0x30, 0xc8, 0xb7,
- 0x55, 0x0e, 0xc2, 0xc9, 0xca, 0x4d, 0x42, 0x0e, 0xc2, 0xc0, 0xc4, 0x04,
- 0x74, 0x0e, 0xc7, 0x89, 0xc3, 0x03, 0x33, 0x0e, 0xc6, 0xe8, 0xc7, 0x14,
- 0xe9, 0x0e, 0xc5, 0x51, 0xc2, 0x01, 0xf4, 0x0e, 0xc0, 0xd8, 0xc5, 0x09,
- 0x89, 0x00, 0x44, 0x09, 0xc9, 0x4d, 0x67, 0x00, 0x43, 0xb8, 0xc5, 0x01,
- 0x62, 0x00, 0x43, 0xc9, 0xc5, 0x00, 0x95, 0x00, 0x43, 0xb0, 0xc9, 0xb1,
- 0x80, 0x00, 0x44, 0x01, 0xc9, 0x17, 0x7a, 0x00, 0x43, 0xe8, 0xc9, 0xb1,
- 0x80, 0x00, 0x43, 0xf1, 0xc9, 0x17, 0x7a, 0x00, 0x43, 0xd8, 0xca, 0x2b,
- 0x13, 0x07, 0xf4, 0x41, 0xcd, 0x05, 0x7a, 0x07, 0xf4, 0x38, 0xcd, 0x05,
- 0x7a, 0x07, 0xf4, 0x19, 0xca, 0x2b, 0x13, 0x07, 0xf4, 0x20, 0xca, 0x2b,
- 0x13, 0x07, 0xdd, 0x89, 0xcd, 0x05, 0x7a, 0x07, 0xdd, 0x80, 0xca, 0x95,
- 0x50, 0x00, 0xfb, 0x73, 0x03, 0xae, 0xef, 0xcd, 0x44, 0xd2, 0x00, 0xfd,
- 0x40, 0xd3, 0x44, 0xcc, 0x00, 0xfd, 0x60, 0xd3, 0x44, 0xcc, 0x00, 0xfd,
- 0x50, 0xd3, 0x44, 0xcc, 0x00, 0xfd, 0x78, 0xca, 0x95, 0x50, 0x00, 0xf9,
- 0x73, 0x03, 0xae, 0xf5, 0xcd, 0x44, 0xd2, 0x00, 0xfc, 0xc0, 0xd3, 0x44,
- 0xcc, 0x00, 0xfc, 0xe0, 0xd3, 0x44, 0xcc, 0x00, 0xfc, 0xd0, 0xd3, 0x44,
- 0xcc, 0x00, 0xfc, 0xf8, 0xca, 0x95, 0x50, 0x00, 0xf8, 0x73, 0x03, 0xae,
- 0xfb, 0xcd, 0x44, 0xd2, 0x00, 0xfc, 0x40, 0xd3, 0x44, 0xcc, 0x00, 0xfc,
- 0x60, 0xd3, 0x44, 0xcc, 0x00, 0xfc, 0x50, 0xd3, 0x44, 0xcc, 0x00, 0xfc,
- 0x78, 0xd2, 0x48, 0x18, 0x00, 0xfd, 0x30, 0xd2, 0x48, 0x18, 0x00, 0xfd,
- 0x20, 0xd2, 0x48, 0x18, 0x00, 0xfd, 0x10, 0xd2, 0x48, 0x18, 0x00, 0xfc,
- 0xb0, 0xd2, 0x48, 0x18, 0x00, 0xfc, 0xa0, 0xd2, 0x48, 0x18, 0x00, 0xfc,
- 0x90, 0xd2, 0x48, 0x18, 0x00, 0xfc, 0x30, 0xd2, 0x48, 0x18, 0x00, 0xfc,
- 0x20, 0xd2, 0x48, 0x18, 0x00, 0xfc, 0x10, 0x49, 0x0d, 0xe9, 0xc3, 0xaf,
- 0x01, 0xc5, 0xba, 0xa0, 0x0e, 0xc7, 0x38, 0xc5, 0x5e, 0xcc, 0x0e, 0xc7,
- 0x19, 0xc4, 0x17, 0x9a, 0x0e, 0xc7, 0x08, 0xc4, 0x17, 0x9a, 0x0e, 0xc7,
- 0x01, 0xc9, 0x14, 0xf1, 0x0e, 0xc6, 0xf9, 0xc8, 0x1d, 0xe4, 0x0e, 0xc6,
- 0xf0, 0xc7, 0x14, 0xe9, 0x0e, 0xc5, 0x49, 0xc2, 0x01, 0xf4, 0x0e, 0xc0,
- 0xd2, 0x03, 0xaf, 0x0d, 0x00, 0x43, 0xaf, 0x13, 0x00, 0x43, 0xaf, 0x37,
- 0xc6, 0xc1, 0xb1, 0x0e, 0xc1, 0xfb, 0x03, 0xaf, 0x43, 0x05, 0xc3, 0xaf,
- 0x49, 0x0a, 0xc3, 0xaf, 0x5b, 0xc4, 0x17, 0x9a, 0x0e, 0xc1, 0x10, 0xd3,
- 0x44, 0xcc, 0x00, 0xfd, 0x70, 0xd3, 0x44, 0xcc, 0x00, 0xfc, 0xf0, 0xd3,
- 0x44, 0xcc, 0x00, 0xfc, 0x70, 0xc5, 0x15, 0xf9, 0x0e, 0xc7, 0x51, 0xc6,
- 0x0d, 0xf2, 0x0e, 0xc7, 0x40, 0xcb, 0x4d, 0x41, 0x0e, 0xc1, 0x98, 0xc6,
- 0xd0, 0x63, 0x0e, 0xc0, 0xc3, 0x03, 0xaf, 0x67, 0x46, 0x0d, 0xe0, 0xc3,
- 0xaf, 0x6d, 0xc6, 0x5e, 0xcc, 0x0e, 0xc0, 0xcb, 0x03, 0xaf, 0x7c, 0xcb,
- 0x93, 0x3f, 0x0e, 0xc0, 0xb9, 0xca, 0xa1, 0x76, 0x0e, 0xc0, 0xb0, 0xc9,
- 0x14, 0xf1, 0x0e, 0xc4, 0x61, 0xc4, 0x17, 0x9a, 0x0e, 0xc4, 0x58, 0xc4,
- 0x0c, 0xab, 0x0e, 0xc1, 0xf0, 0xcf, 0x62, 0xbd, 0x0e, 0xc1, 0xe9, 0xc6,
- 0x20, 0x56, 0x0e, 0xc1, 0x49, 0xc5, 0x70, 0xef, 0x0e, 0xc1, 0x40, 0xc5,
- 0x5e, 0xcd, 0x0e, 0xc1, 0x59, 0xc5, 0x67, 0x8c, 0x0e, 0xc1, 0x50, 0xce,
- 0x29, 0xcd, 0x0e, 0xc1, 0xa8, 0xc7, 0x29, 0xd4, 0x0e, 0xc1, 0xa1, 0xc4,
- 0x0d, 0xf4, 0x0e, 0xc1, 0x6a, 0x03, 0xaf, 0x82, 0xcb, 0x4d, 0x41, 0x0e,
- 0xc1, 0x90, 0x00, 0x43, 0xaf, 0x86, 0xc4, 0x17, 0x9a, 0x0e, 0xc1, 0x79,
- 0xc9, 0x14, 0xf1, 0x0e, 0xc1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0};
+ 0x8c, 0x60, 0x94, 0x08, 0x8c, 0x50, 0xd9, 0x20, 0x64, 0x01, 0x2f, 0x51,
+ 0xd8, 0x24, 0xbc, 0x01, 0x58, 0xa8, 0xd3, 0x20, 0x6a, 0x01, 0x2f, 0x49,
+ 0xd3, 0x40, 0x69, 0x01, 0x2d, 0x38, 0xd2, 0x4c, 0xaa, 0x01, 0x2d, 0x41,
+ 0xd3, 0x20, 0x6a, 0x01, 0x58, 0xa0, 0xc6, 0x01, 0xf8, 0x01, 0x9e, 0x71,
+ 0xc4, 0xde, 0xb0, 0x01, 0x9d, 0x30, 0xc8, 0x01, 0xe8, 0x01, 0x9d, 0x40,
+ 0xc2, 0xeb, 0x3a, 0x0f, 0x91, 0xc9, 0xc2, 0xed, 0xb3, 0x0f, 0x91, 0x01,
+ 0xc2, 0xea, 0xc3, 0x0f, 0x90, 0xe0, 0xc2, 0x71, 0x2b, 0x0f, 0x91, 0xa1,
+ 0xc2, 0xe5, 0x7d, 0x0f, 0x91, 0x28, 0xc2, 0xed, 0xcf, 0x0f, 0x91, 0x71,
+ 0xc2, 0x07, 0xa2, 0x0f, 0x90, 0x90, 0xc2, 0xed, 0xc3, 0x0f, 0x90, 0xb9,
+ 0xc2, 0xed, 0xb1, 0x0f, 0x90, 0xa8, 0xc2, 0xeb, 0xa8, 0x0f, 0x91, 0xc1,
+ 0xc2, 0xed, 0x22, 0x0f, 0x91, 0x10, 0xa5, 0x0f, 0x91, 0xb9, 0xa6, 0x0f,
+ 0x91, 0xb0, 0xc2, 0xeb, 0xa7, 0x0f, 0x91, 0x89, 0xc2, 0xed, 0xd9, 0x0f,
+ 0x91, 0x39, 0xc2, 0xed, 0xbb, 0x0f, 0x90, 0x80, 0xc2, 0x3a, 0xa2, 0x0f,
+ 0x91, 0x79, 0xc2, 0xdd, 0x5d, 0x0f, 0x91, 0x40, 0xc2, 0xaa, 0xd4, 0x0f,
+ 0x90, 0xf9, 0xc2, 0xed, 0xd7, 0x0f, 0x90, 0xd8, 0xa6, 0x0f, 0x91, 0x51,
+ 0x9d, 0x0f, 0x91, 0x48, 0xc6, 0x01, 0x21, 0x01, 0x20, 0xb8, 0xc2, 0x01,
+ 0x01, 0x00, 0x43, 0x29, 0x83, 0x00, 0x43, 0x20, 0xd3, 0x40, 0x0a, 0x0f,
+ 0xc9, 0x69, 0xcc, 0x8c, 0xa0, 0x0f, 0xcb, 0x80, 0xe0, 0x03, 0x67, 0x01,
+ 0x17, 0xe0, 0xe0, 0x03, 0x67, 0x01, 0x17, 0xa0, 0xc8, 0x4f, 0xa2, 0x01,
+ 0x0b, 0xf9, 0xc7, 0x0d, 0x7f, 0x01, 0x0b, 0xe8, 0xc2, 0x00, 0x29, 0x01,
+ 0x0b, 0xa3, 0x03, 0x5f, 0x5a, 0xc3, 0x41, 0xca, 0x01, 0x0b, 0xe0, 0xc4,
+ 0x25, 0x4d, 0x01, 0x0b, 0xd9, 0x91, 0x01, 0x0b, 0x88, 0xc3, 0xe2, 0x62,
+ 0x08, 0x43, 0x91, 0xc4, 0xdd, 0x34, 0x08, 0x43, 0x78, 0xc4, 0x05, 0xde,
+ 0x05, 0x47, 0xb1, 0xc2, 0x0a, 0x20, 0x05, 0x47, 0xa8, 0xcb, 0x05, 0x9b,
+ 0x0f, 0xc4, 0x99, 0x49, 0x00, 0x59, 0x43, 0x5f, 0x60, 0xe0, 0x05, 0x87,
+ 0x01, 0x5f, 0x78, 0xc5, 0x00, 0x4c, 0x01, 0x0e, 0x19, 0x00, 0x43, 0x5f,
+ 0x7b, 0xc5, 0x00, 0x4c, 0x01, 0x0e, 0x11, 0x00, 0x43, 0x5f, 0x8d, 0x45,
+ 0x01, 0xac, 0xc3, 0x5f, 0x99, 0xda, 0x1b, 0xbc, 0x01, 0x0f, 0xa9, 0xc8,
+ 0xab, 0xed, 0x01, 0x0d, 0x39, 0xc6, 0x12, 0x4f, 0x01, 0x48, 0x99, 0xda,
+ 0x1c, 0xf4, 0x0f, 0xdd, 0xb8, 0xc4, 0x24, 0x35, 0x01, 0x27, 0xe9, 0xc5,
+ 0x05, 0x1b, 0x01, 0x27, 0xe1, 0x15, 0xc3, 0x5f, 0xcf, 0x08, 0xc3, 0x5f,
+ 0xdb, 0x16, 0xc3, 0x5f, 0xe7, 0xc3, 0x05, 0x17, 0x01, 0x27, 0xa8, 0x47,
+ 0x02, 0x91, 0xc3, 0x5f, 0xf3, 0xce, 0x34, 0x7c, 0x01, 0x57, 0x18, 0xcf,
+ 0x00, 0xef, 0x01, 0x80, 0xf0, 0xc2, 0x00, 0xff, 0x01, 0x52, 0xa1, 0xc3,
+ 0x00, 0x3a, 0x01, 0x52, 0x98, 0xc5, 0x7b, 0x2f, 0x01, 0x02, 0x31, 0x48,
+ 0xc3, 0x9b, 0xc3, 0x5f, 0xff, 0xc8, 0x50, 0x0d, 0x01, 0x4c, 0x61, 0xc6,
+ 0x03, 0x81, 0x01, 0x72, 0xb1, 0xcd, 0x80, 0x14, 0x01, 0x72, 0xc0, 0xd1,
+ 0x52, 0xce, 0x0f, 0xab, 0x51, 0xce, 0x71, 0x12, 0x0f, 0xab, 0x48, 0x00,
+ 0x43, 0x60, 0x0b, 0xc6, 0x03, 0xfa, 0x01, 0x2e, 0xb9, 0xc4, 0x0e, 0xa5,
+ 0x01, 0x5f, 0x48, 0xd4, 0x3d, 0xca, 0x01, 0x4e, 0x70, 0xc5, 0x00, 0x62,
+ 0x01, 0x5b, 0x13, 0x03, 0x60, 0x2c, 0xcc, 0x83, 0x04, 0x01, 0x5b, 0x61,
+ 0xcd, 0x81, 0xce, 0x01, 0x5c, 0x30, 0x45, 0x01, 0xac, 0xc3, 0x60, 0x30,
+ 0xc8, 0xab, 0xed, 0x01, 0x48, 0x28, 0x44, 0x00, 0x68, 0xc3, 0x60, 0x40,
+ 0x42, 0x02, 0x6a, 0x43, 0x60, 0x4a, 0xd5, 0x00, 0x92, 0x0f, 0xc0, 0xb1,
+ 0x0e, 0xc3, 0x60, 0x54, 0x15, 0xc3, 0x60, 0x60, 0x42, 0x00, 0x68, 0xc3,
+ 0x60, 0x6c, 0xcf, 0x2e, 0xd8, 0x01, 0x0f, 0xc1, 0xd0, 0x5e, 0xbf, 0x01,
+ 0x0d, 0xa1, 0xc4, 0x04, 0x63, 0x01, 0x0d, 0x51, 0x16, 0xc3, 0x60, 0x78,
+ 0xca, 0xa5, 0x38, 0x01, 0x4a, 0x29, 0xd9, 0x20, 0xc8, 0x0f, 0xc0, 0x31,
+ 0xcc, 0x8a, 0xb4, 0x0f, 0xc4, 0xd0, 0x43, 0x12, 0x50, 0xc3, 0x60, 0x87,
+ 0x47, 0x21, 0xc4, 0x43, 0x60, 0x93, 0xd1, 0x55, 0x65, 0x01, 0x49, 0x00,
+ 0x45, 0x03, 0x51, 0xc3, 0x60, 0xa3, 0x43, 0x00, 0x3b, 0x43, 0x60, 0xbb,
+ 0x00, 0x43, 0x60, 0xc1, 0x87, 0x05, 0x28, 0x88, 0x91, 0x05, 0x2c, 0x10,
+ 0xc2, 0x01, 0xa7, 0x05, 0x30, 0x81, 0xc2, 0x06, 0x6b, 0x05, 0x30, 0x89,
+ 0xc3, 0xe0, 0x78, 0x05, 0x30, 0x91, 0xc2, 0x00, 0x3f, 0x05, 0x31, 0x51,
+ 0xc2, 0x00, 0x68, 0x05, 0x31, 0x58, 0x87, 0x05, 0x28, 0xf9, 0x90, 0x05,
+ 0x30, 0x28, 0x91, 0x05, 0x2c, 0x80, 0xc3, 0xeb, 0x58, 0x0b, 0x54, 0x99,
+ 0xc3, 0xeb, 0x16, 0x0b, 0x54, 0x90, 0x9a, 0x0b, 0x54, 0xd9, 0x93, 0x0b,
+ 0x54, 0xd1, 0x85, 0x0b, 0x54, 0xc9, 0x9c, 0x0b, 0x54, 0xc0, 0x42, 0x06,
+ 0x6f, 0xc3, 0x60, 0xcd, 0xc7, 0xc8, 0x2b, 0x00, 0x70, 0x30, 0x91, 0x00,
+ 0x70, 0x59, 0xc3, 0x14, 0x88, 0x00, 0x71, 0x41, 0xc2, 0x01, 0xeb, 0x00,
+ 0x71, 0x50, 0x83, 0x00, 0x71, 0x91, 0x8f, 0x00, 0x71, 0x99, 0x87, 0x00,
+ 0x72, 0x09, 0x46, 0xd3, 0xca, 0x43, 0x60, 0xe5, 0x8b, 0x00, 0x71, 0xa8,
+ 0x87, 0x00, 0x71, 0xb3, 0x03, 0x60, 0xf1, 0x97, 0x00, 0x71, 0xc8, 0x42,
+ 0x01, 0x47, 0xc3, 0x60, 0xf5, 0xca, 0xa7, 0xc2, 0x00, 0x70, 0x89, 0xc7,
+ 0xca, 0x46, 0x00, 0x70, 0x90, 0x42, 0x09, 0x37, 0xc3, 0x61, 0x05, 0xc7,
+ 0xcc, 0x29, 0x00, 0x71, 0x00, 0xc8, 0xbf, 0x9b, 0x00, 0x71, 0x89, 0xc2,
+ 0x13, 0x31, 0x00, 0x72, 0x41, 0x16, 0xc3, 0x61, 0x11, 0xc8, 0xc1, 0x53,
+ 0x00, 0x72, 0x58, 0x94, 0x00, 0x63, 0x00, 0x8e, 0x00, 0x63, 0x08, 0xc3,
+ 0xeb, 0xc1, 0x00, 0x78, 0xd1, 0xc4, 0x96, 0x40, 0x00, 0x78, 0xd9, 0xc3,
+ 0x69, 0xb1, 0x00, 0x78, 0xe0, 0xc3, 0xeb, 0xc1, 0x00, 0x78, 0xe9, 0xc4,
+ 0x96, 0x40, 0x00, 0x78, 0xf1, 0xc3, 0x69, 0xb1, 0x00, 0x7e, 0x78, 0xc5,
+ 0x42, 0x6d, 0x08, 0x77, 0xf9, 0xc4, 0xe7, 0xdf, 0x08, 0x77, 0xf1, 0xc5,
+ 0xe2, 0x6f, 0x08, 0x77, 0xe9, 0xc7, 0xca, 0x77, 0x00, 0x44, 0xd9, 0x0b,
+ 0x43, 0x61, 0x1d, 0xc5, 0xe3, 0x91, 0x00, 0x46, 0xf9, 0xc3, 0xec, 0xe1,
+ 0x00, 0x46, 0xf1, 0x42, 0x0e, 0xe5, 0xc3, 0x61, 0x29, 0x03, 0x43, 0x61,
+ 0x33, 0xcc, 0x05, 0x3b, 0x00, 0x37, 0x11, 0xcb, 0x10, 0x7a, 0x00, 0x36,
+ 0xc0, 0xde, 0x0e, 0x13, 0x00, 0x36, 0xb9, 0xde, 0x0e, 0xe5, 0x00, 0x36,
+ 0xb1, 0x4a, 0xa7, 0x54, 0x43, 0x61, 0x51, 0xc5, 0x00, 0x34, 0x07, 0xdd,
+ 0xf1, 0xc5, 0x03, 0x50, 0x07, 0xdd, 0xe8, 0xc5, 0x00, 0x34, 0x07, 0xdd,
+ 0xc9, 0xc5, 0x03, 0x50, 0x07, 0xdd, 0xc0, 0xcd, 0x05, 0x3a, 0x07, 0xe8,
+ 0x09, 0xca, 0x2a, 0xb4, 0x07, 0xe8, 0xe8, 0x0b, 0xc3, 0x61, 0x5d, 0x45,
+ 0x01, 0xac, 0x43, 0x61, 0x69, 0x0b, 0xc3, 0x61, 0x7b, 0x45, 0x01, 0xac,
+ 0x43, 0x61, 0x87, 0xca, 0x2a, 0xb4, 0x07, 0xe8, 0xf1, 0xcd, 0x05, 0x3a,
+ 0x07, 0xe8, 0x10, 0xcd, 0x05, 0x3a, 0x07, 0xe8, 0x01, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe8, 0xe0, 0xcd, 0x05, 0x3a, 0x07, 0xe7, 0xf9, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe8, 0xd8, 0x0b, 0xc3, 0x61, 0x93, 0x45, 0x01, 0xac, 0x43, 0x61,
+ 0x9f, 0x0b, 0xc3, 0x61, 0xab, 0xd3, 0x40, 0x43, 0x07, 0xed, 0xf8, 0x0b,
+ 0xc3, 0x61, 0xb7, 0x45, 0x01, 0xac, 0x43, 0x61, 0xc3, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe2, 0x89, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0xb8, 0x44, 0x2f, 0x22,
+ 0xc3, 0x61, 0xcf, 0x0a, 0xc3, 0x61, 0xdb, 0x45, 0x1a, 0x6a, 0xc3, 0x61,
+ 0xe7, 0x4d, 0x06, 0x7a, 0xc3, 0x61, 0xfd, 0x45, 0x2f, 0xc8, 0xc3, 0x62,
+ 0x09, 0x45, 0x53, 0x23, 0xc3, 0x62, 0x1f, 0x44, 0x71, 0x66, 0x43, 0x62,
+ 0x2f, 0x45, 0x4c, 0x81, 0xc3, 0x62, 0x3b, 0x45, 0x53, 0x6d, 0xc3, 0x62,
+ 0x45, 0x46, 0xd4, 0xe4, 0xc3, 0x62, 0x4f, 0xde, 0x08, 0x29, 0x07, 0xe3,
+ 0x18, 0xcd, 0x05, 0x3a, 0x07, 0xe7, 0xd9, 0xca, 0x2a, 0xb4, 0x07, 0xe8,
+ 0xb8, 0x0b, 0xc3, 0x62, 0x5b, 0x45, 0x01, 0xac, 0xc3, 0x62, 0x67, 0xcb,
+ 0x6a, 0x72, 0x07, 0xe7, 0x38, 0x0b, 0xc3, 0x62, 0x79, 0xcb, 0x6a, 0x72,
+ 0x07, 0xe9, 0xb1, 0x45, 0x01, 0xac, 0x43, 0x62, 0x85, 0x43, 0x0a, 0x18,
+ 0xc3, 0x62, 0x91, 0x43, 0x08, 0x86, 0x43, 0x62, 0xa1, 0x0b, 0xc3, 0x62,
+ 0xad, 0xcb, 0x6a, 0x72, 0x07, 0xe9, 0xa1, 0x45, 0x01, 0xac, 0x43, 0x62,
+ 0xb9, 0xca, 0x2a, 0xb4, 0x07, 0xe9, 0x51, 0xcd, 0x05, 0x3a, 0x07, 0xe8,
+ 0x70, 0xcd, 0x05, 0x3a, 0x07, 0xe7, 0xe1, 0xca, 0x2a, 0xb4, 0x07, 0xe8,
+ 0xc0, 0x45, 0x1a, 0x6a, 0xc3, 0x62, 0xc5, 0x44, 0x1a, 0x74, 0xc3, 0x62,
+ 0xcf, 0x44, 0x71, 0x66, 0xc3, 0x62, 0xd9, 0xd1, 0x53, 0x23, 0x07, 0xe5,
+ 0x91, 0x4d, 0x06, 0x7a, 0xc3, 0x62, 0xe5, 0x44, 0x2f, 0x22, 0x43, 0x62,
+ 0xf1, 0x42, 0x00, 0x96, 0xc3, 0x62, 0xfd, 0x03, 0x43, 0x63, 0x07, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe1, 0x61, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0xe8, 0xce,
+ 0x40, 0x48, 0x07, 0xeb, 0xd1, 0xd7, 0x2a, 0xa7, 0x07, 0xeb, 0xd9, 0xcf,
+ 0x6a, 0xf5, 0x07, 0xeb, 0xc8, 0xcd, 0x05, 0x3a, 0x07, 0xe7, 0xb9, 0xca,
+ 0x2a, 0xb4, 0x07, 0xe8, 0x98, 0x0b, 0xc3, 0x63, 0x13, 0x45, 0x01, 0xac,
+ 0x43, 0x63, 0x1f, 0x0b, 0xc3, 0x63, 0x31, 0x4a, 0x75, 0x68, 0x43, 0x63,
+ 0x3d, 0xca, 0x2a, 0xb4, 0x07, 0xe8, 0xa1, 0xcd, 0x05, 0x3a, 0x07, 0xe7,
+ 0xc0, 0x5e, 0x0f, 0xb7, 0xc3, 0x63, 0x49, 0x4e, 0x74, 0xae, 0x43, 0x63,
+ 0x55, 0x0b, 0xc3, 0x63, 0x61, 0xcc, 0x85, 0x8c, 0x07, 0xea, 0x69, 0xcf,
+ 0x6c, 0x12, 0x07, 0xef, 0xb8, 0x44, 0x2f, 0x22, 0xc3, 0x63, 0x6b, 0x4d,
+ 0x06, 0x7a, 0xc3, 0x63, 0x77, 0x45, 0x1a, 0x6a, 0xc3, 0x63, 0x83, 0x45,
+ 0x50, 0xae, 0x43, 0x63, 0x93, 0x44, 0x2f, 0x22, 0xc3, 0x63, 0x9f, 0x4d,
+ 0x06, 0x7a, 0xc3, 0x63, 0xab, 0xcf, 0x62, 0xb2, 0x07, 0xe3, 0xc9, 0x45,
+ 0x1a, 0x6a, 0xc3, 0x63, 0xb7, 0xcf, 0x65, 0x55, 0x07, 0xe3, 0xb9, 0xce,
+ 0x71, 0x66, 0x07, 0xe3, 0xb1, 0xd2, 0x48, 0xa8, 0x07, 0xe0, 0x89, 0xcf,
+ 0x6a, 0x6e, 0x07, 0xe7, 0x30, 0xe0, 0x08, 0x27, 0x07, 0xe2, 0xd8, 0xca,
+ 0x2a, 0xb4, 0x07, 0xe3, 0xa9, 0xcd, 0x05, 0x3a, 0x07, 0xe0, 0x80, 0xca,
+ 0x2a, 0xb4, 0x07, 0xe3, 0xa1, 0xcd, 0x05, 0x3a, 0x07, 0xe0, 0x78, 0xca,
+ 0x2a, 0xb4, 0x07, 0xe3, 0x91, 0x0b, 0xc3, 0x63, 0xc7, 0xcb, 0x6a, 0x72,
+ 0x07, 0xe7, 0x19, 0x45, 0x01, 0xac, 0x43, 0x63, 0xd3, 0x0b, 0xc3, 0x63,
+ 0xf1, 0x45, 0x01, 0xac, 0x43, 0x63, 0xfd, 0x43, 0x0a, 0x18, 0xc3, 0x64,
+ 0x0f, 0x43, 0x08, 0x86, 0x43, 0x64, 0x19, 0x0b, 0xc3, 0x64, 0x25, 0x45,
+ 0x01, 0xac, 0x43, 0x64, 0x31, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x89, 0xcc,
+ 0x10, 0x79, 0x07, 0xe6, 0xf0, 0x4f, 0x08, 0x8b, 0xc3, 0x64, 0x43, 0x42,
+ 0x01, 0xaf, 0x43, 0x64, 0x8b, 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0xc1, 0xcb,
+ 0x10, 0x7a, 0x07, 0xe6, 0xe8, 0x45, 0x1a, 0x6a, 0xc3, 0x64, 0x95, 0xce,
+ 0x40, 0x48, 0x07, 0xed, 0x80, 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0xa9, 0xcb,
+ 0x10, 0x7a, 0x07, 0xe6, 0xd0, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x79, 0xcc,
+ 0x10, 0x79, 0x07, 0xe6, 0xb0, 0x0b, 0xc3, 0x64, 0xa1, 0x45, 0x01, 0xac,
+ 0x43, 0x64, 0xad, 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0x71, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe6, 0xa8, 0xce, 0x40, 0x48, 0x07, 0xec, 0xd1, 0xd7, 0x2a, 0xa7,
+ 0x07, 0xec, 0xd8, 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0x59, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe6, 0x90, 0xd7, 0x2a, 0xa7, 0x07, 0xec, 0xc9, 0x44, 0x1a, 0x74,
+ 0xc3, 0x64, 0xbf, 0xce, 0x40, 0x48, 0x07, 0xee, 0x39, 0x45, 0x1a, 0x6a,
+ 0x43, 0x64, 0xcb, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x61, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe4, 0x11, 0x0b, 0xc3, 0x64, 0xd7, 0x45, 0x01, 0xac, 0x43, 0x64,
+ 0xe3, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x59, 0xca, 0x2a, 0xb4, 0x07, 0xe4,
+ 0x09, 0x0b, 0x43, 0x64, 0xef, 0xca, 0x2a, 0xb4, 0x07, 0xe4, 0x21, 0xcd,
+ 0x05, 0x3a, 0x07, 0xe1, 0xf0, 0x48, 0x06, 0x7f, 0xc3, 0x64, 0xfb, 0xca,
+ 0x2a, 0xb4, 0x07, 0xe4, 0x01, 0xcd, 0x05, 0x3a, 0x07, 0xe1, 0xb8, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe1, 0xd1, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x30, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe1, 0xc9, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x28, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe1, 0xc1, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x20, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe0, 0xd9, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0x60, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe0, 0xc9, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0x58, 0xca,
+ 0x2a, 0xb4, 0x07, 0xe8, 0xf9, 0xcd, 0x05, 0x3a, 0x07, 0xe8, 0x18, 0xca,
+ 0x2a, 0xb4, 0x07, 0xe9, 0x01, 0xcd, 0x05, 0x3a, 0x07, 0xe8, 0x20, 0xca,
+ 0x2a, 0xb4, 0x07, 0xe4, 0x31, 0xcd, 0x05, 0x3a, 0x07, 0xe2, 0x18, 0x4c,
+ 0x87, 0x9c, 0xc3, 0x65, 0x07, 0x46, 0x08, 0x89, 0x43, 0x65, 0x13, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe2, 0x11, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x60, 0x44,
+ 0x1a, 0x74, 0xc3, 0x65, 0x1f, 0xce, 0x40, 0x48, 0x07, 0xed, 0x68, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe2, 0x09, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x58, 0xca,
+ 0x2a, 0xb4, 0x07, 0xec, 0x29, 0xcc, 0x10, 0x79, 0x07, 0xec, 0x30, 0x0b,
+ 0xc3, 0x65, 0x2b, 0x45, 0x01, 0xac, 0x43, 0x65, 0x37, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe1, 0xf9, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x48, 0x45, 0x2f, 0xc8,
+ 0xc3, 0x65, 0x49, 0x45, 0x1a, 0x6a, 0xc3, 0x65, 0x55, 0xce, 0x40, 0x48,
+ 0x07, 0xed, 0x60, 0x44, 0x2f, 0x22, 0xc3, 0x65, 0x61, 0x4d, 0x06, 0x7a,
+ 0xc3, 0x65, 0x6d, 0x45, 0x1a, 0x6a, 0xc3, 0x65, 0x79, 0x45, 0x50, 0xae,
+ 0x43, 0x65, 0x83, 0xe0, 0x05, 0x27, 0x07, 0xef, 0x88, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe1, 0x81, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x08, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe1, 0x79, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x00, 0xca, 0x2a, 0xb4,
+ 0x07, 0xeb, 0xe1, 0xcc, 0x10, 0x79, 0x07, 0xeb, 0xe8, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe3, 0x79, 0xcd, 0x05, 0x3a, 0x07, 0xe0, 0x50, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe3, 0x71, 0xcd, 0x05, 0x3a, 0x07, 0xe0, 0x48, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe3, 0x61, 0x0b, 0xc3, 0x65, 0x8f, 0xcb, 0x6a, 0x72, 0x07, 0xe7,
+ 0x08, 0x0b, 0xc3, 0x65, 0x9b, 0xd3, 0x40, 0x43, 0x07, 0xec, 0xf0, 0x43,
+ 0x0a, 0x18, 0xc3, 0x65, 0xa7, 0x43, 0x08, 0x86, 0x43, 0x65, 0xb1, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe0, 0x29, 0xcb, 0x10, 0x7a, 0x07, 0xe4, 0xe0, 0xc2,
+ 0x0e, 0x30, 0x07, 0xea, 0x11, 0x17, 0x43, 0x65, 0xbd, 0xc8, 0xb8, 0xbb,
+ 0x07, 0xea, 0x79, 0xc7, 0x6f, 0xd2, 0x07, 0xea, 0x00, 0xd5, 0x1b, 0xf5,
+ 0x07, 0xe2, 0x49, 0xca, 0x2a, 0xb4, 0x07, 0xe4, 0x40, 0x0b, 0xc3, 0x65,
+ 0xca, 0xca, 0x2a, 0xb4, 0x07, 0xe4, 0x49, 0xd3, 0x40, 0x43, 0x07, 0xed,
+ 0x88, 0x0b, 0xc3, 0x65, 0xd6, 0x45, 0x01, 0xac, 0x43, 0x65, 0xe2, 0x0b,
+ 0xc3, 0x65, 0xf4, 0x45, 0x01, 0xac, 0x43, 0x66, 0x00, 0x0b, 0xc3, 0x66,
+ 0x12, 0x45, 0x01, 0xac, 0x43, 0x66, 0x1e, 0xcc, 0x05, 0x3b, 0x07, 0xe1,
+ 0x21, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0xb0, 0xca, 0x2a, 0xb4, 0x07, 0xeb,
+ 0x79, 0xcc, 0x10, 0x79, 0x07, 0xeb, 0x80, 0xcc, 0x05, 0x3b, 0x07, 0xe1,
+ 0x19, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0xa8, 0xd7, 0x2a, 0xa7, 0x07, 0xeb,
+ 0x71, 0xce, 0x40, 0x48, 0x07, 0xed, 0x58, 0xcb, 0x10, 0x7a, 0x07, 0xdf,
+ 0xd9, 0xcc, 0x05, 0x3b, 0x07, 0xdf, 0xc8, 0x00, 0x43, 0x66, 0x36, 0x00,
+ 0x43, 0x66, 0x4c, 0x00, 0x43, 0x66, 0x62, 0x00, 0x43, 0x66, 0x78, 0x00,
+ 0x43, 0x66, 0x8e, 0x00, 0x43, 0x66, 0x9e, 0x00, 0x43, 0x66, 0xb4, 0x00,
+ 0x43, 0x66, 0xca, 0xc3, 0x0e, 0x13, 0x00, 0x45, 0xe3, 0x03, 0x66, 0xd6,
+ 0xc4, 0x3e, 0xff, 0x00, 0x45, 0xe9, 0xc3, 0xae, 0x23, 0x00, 0x45, 0xd8,
+ 0x00, 0x43, 0x66, 0xdc, 0x00, 0x43, 0x66, 0xf2, 0x00, 0x43, 0x67, 0x0b,
+ 0x88, 0x00, 0x32, 0x1b, 0x03, 0x67, 0x21, 0xca, 0xa6, 0x6e, 0x00, 0x31,
+ 0x00, 0xc2, 0x12, 0xc5, 0x00, 0x36, 0x4b, 0x03, 0x67, 0x25, 0xc2, 0x09,
+ 0x06, 0x00, 0x36, 0x2a, 0x03, 0x67, 0x29, 0x00, 0x43, 0x67, 0x2d, 0x00,
+ 0xc3, 0x67, 0x3d, 0xc2, 0x08, 0x86, 0x00, 0x34, 0x3a, 0x03, 0x67, 0x53,
+ 0x00, 0xc3, 0x67, 0x57, 0xc2, 0x08, 0x86, 0x00, 0x33, 0xd2, 0x03, 0x67,
+ 0x6d, 0x00, 0xc3, 0x67, 0x71, 0xc2, 0x08, 0x86, 0x00, 0x33, 0xfa, 0x03,
+ 0x67, 0x85, 0x00, 0x43, 0x67, 0x89, 0xc6, 0xd4, 0xd2, 0x00, 0x44, 0x31,
+ 0xc2, 0x00, 0x2f, 0x00, 0x31, 0x83, 0x03, 0x67, 0x9f, 0xc2, 0x08, 0x86,
+ 0x00, 0x31, 0x5a, 0x03, 0x67, 0xa3, 0x4b, 0x85, 0x20, 0xc3, 0x67, 0xa7,
+ 0xcb, 0x6a, 0x72, 0x07, 0xda, 0xc9, 0x0b, 0xc3, 0x67, 0xb1, 0xca, 0x2a,
+ 0xb4, 0x07, 0xda, 0xb8, 0x00, 0x43, 0x67, 0xbd, 0x00, 0x43, 0x67, 0xcd,
+ 0x00, 0x43, 0x67, 0xec, 0x00, 0x43, 0x67, 0xf8, 0x00, 0x43, 0x68, 0x0a,
+ 0x00, 0x43, 0x68, 0x1a, 0x00, 0xc3, 0x68, 0x26, 0xc2, 0x08, 0x86, 0x00,
+ 0x34, 0x02, 0x03, 0x68, 0x3c, 0x00, 0x43, 0x68, 0x40, 0x60, 0x06, 0x67,
+ 0x43, 0x68, 0x50, 0xd0, 0x5f, 0x5f, 0x00, 0x33, 0xbb, 0x03, 0x68, 0x5c,
+ 0xca, 0x2a, 0xb4, 0x07, 0xde, 0xc1, 0xcd, 0x05, 0x3a, 0x07, 0xde, 0xb8,
+ 0x45, 0x01, 0xac, 0xc3, 0x68, 0x62, 0xca, 0x2a, 0xb4, 0x07, 0xf6, 0xb1,
+ 0x0b, 0xc3, 0x68, 0x6e, 0xcb, 0x6a, 0x72, 0x07, 0xf6, 0xc0, 0xcb, 0x6a,
+ 0x72, 0x07, 0xdf, 0x39, 0x0b, 0xc3, 0x68, 0x7a, 0xca, 0x2a, 0xb4, 0x07,
+ 0xdf, 0x28, 0x00, 0x43, 0x68, 0x86, 0x00, 0x43, 0x68, 0x98, 0x00, 0x43,
+ 0x68, 0xa8, 0x00, 0x43, 0x68, 0xbe, 0x00, 0x43, 0x68, 0xd4, 0x8e, 0x00,
+ 0x31, 0x7b, 0x03, 0x68, 0xea, 0xc3, 0x01, 0x1d, 0x00, 0x34, 0x63, 0x03,
+ 0x68, 0xee, 0x86, 0x00, 0x31, 0xb2, 0x03, 0x68, 0xf2, 0x8e, 0x00, 0x34,
+ 0x43, 0x03, 0x68, 0xf6, 0xc3, 0x01, 0x1d, 0x00, 0x34, 0x6a, 0x03, 0x68,
+ 0xfa, 0x00, 0x43, 0x68, 0xfe, 0x00, 0x43, 0x69, 0x0a, 0xc3, 0xae, 0x23,
+ 0x00, 0x35, 0x09, 0xc3, 0x0e, 0x13, 0x00, 0x33, 0x79, 0xc3, 0x8c, 0x10,
+ 0x00, 0x33, 0x70, 0xca, 0x2a, 0xb4, 0x07, 0xde, 0xf9, 0xcd, 0x05, 0x3a,
+ 0x07, 0xde, 0xf0, 0x00, 0x43, 0x69, 0x1a, 0x45, 0x01, 0xac, 0xc3, 0x69,
+ 0x2a, 0xcd, 0x05, 0x3a, 0x07, 0xf7, 0x69, 0xca, 0x2a, 0xb4, 0x07, 0xf7,
+ 0x70, 0x00, 0x43, 0x69, 0x4b, 0xca, 0x2a, 0xb4, 0x07, 0xde, 0xd1, 0xcd,
+ 0x05, 0x3a, 0x07, 0xde, 0xc8, 0x00, 0xc3, 0x69, 0x61, 0xc3, 0x92, 0x76,
+ 0x00, 0x35, 0x8a, 0x03, 0x69, 0x71, 0x00, 0x43, 0x69, 0x75, 0x00, 0x43,
+ 0x69, 0x94, 0x8a, 0x00, 0x31, 0x6b, 0x03, 0x69, 0xa4, 0xc3, 0x08, 0x8b,
+ 0x00, 0x31, 0x0a, 0x03, 0x69, 0xa8, 0x00, 0x43, 0x69, 0xae, 0x00, 0x43,
+ 0x69, 0xd6, 0x16, 0xc3, 0x69, 0xe8, 0x15, 0xc3, 0x69, 0xf8, 0xc3, 0x71,
+ 0x66, 0x0f, 0x75, 0x99, 0xc3, 0x0e, 0x13, 0x0f, 0x75, 0x91, 0xc3, 0xae,
+ 0x23, 0x0f, 0x75, 0x81, 0xc3, 0x01, 0xcc, 0x0f, 0x75, 0x79, 0xc4, 0x3e,
+ 0xff, 0x0f, 0x75, 0x69, 0xc4, 0x1a, 0x6a, 0x0f, 0x75, 0x61, 0xc3, 0x0e,
+ 0x1c, 0x0f, 0x75, 0x59, 0xc3, 0x2f, 0x22, 0x0f, 0x75, 0x49, 0xc3, 0x1a,
+ 0x74, 0x0f, 0x75, 0x39, 0x42, 0x05, 0x5c, 0xc3, 0x6a, 0x0a, 0xc3, 0x7c,
+ 0xad, 0x0f, 0x75, 0x29, 0x42, 0x0c, 0x25, 0xc3, 0x6a, 0x14, 0xc4, 0x2f,
+ 0xc8, 0x0f, 0x75, 0x11, 0xc3, 0x8c, 0x10, 0x0f, 0x75, 0x09, 0xc4, 0x39,
+ 0x7a, 0x0f, 0x75, 0xb9, 0xc5, 0x91, 0x7b, 0x0f, 0x75, 0xd8, 0xc3, 0x8c,
+ 0x10, 0x0f, 0x70, 0xe1, 0xc4, 0x3e, 0xff, 0x0f, 0x70, 0xe9, 0xc3, 0xae,
+ 0x23, 0x0f, 0x70, 0xf1, 0xc3, 0x0e, 0x13, 0x0f, 0x70, 0xf8, 0xc4, 0x2f,
+ 0xc8, 0x0f, 0x72, 0x11, 0xc3, 0x1a, 0x74, 0x0f, 0x72, 0x39, 0xc3, 0x2f,
+ 0x22, 0x0f, 0x72, 0x49, 0xc3, 0x0e, 0x1c, 0x0f, 0x72, 0x59, 0xc4, 0x3e,
+ 0xff, 0x0f, 0x72, 0x69, 0x15, 0xc3, 0x6a, 0x1c, 0xc3, 0x01, 0xcc, 0x0f,
+ 0x72, 0x79, 0xc3, 0x0e, 0x13, 0x0f, 0x72, 0x91, 0xc4, 0x39, 0x7a, 0x0f,
+ 0x72, 0xb9, 0x06, 0xc3, 0x6a, 0x2e, 0xc5, 0x91, 0x7b, 0x0f, 0x72, 0xd8,
+ 0xc3, 0x00, 0xcd, 0x0f, 0x74, 0x01, 0xc2, 0x01, 0x5b, 0x0f, 0x74, 0x78,
+ 0x8e, 0x0f, 0x74, 0x19, 0x86, 0x0f, 0x74, 0xc8, 0xc2, 0x08, 0x86, 0x0f,
+ 0x74, 0x21, 0xc2, 0x00, 0x45, 0x0f, 0x74, 0x38, 0xc2, 0x01, 0x5b, 0x0f,
+ 0x74, 0x31, 0x8a, 0x0f, 0x74, 0xd0, 0xc2, 0x00, 0x45, 0x0f, 0x74, 0x41,
+ 0xc2, 0x08, 0x86, 0x0f, 0x74, 0xa9, 0x0a, 0x43, 0x6a, 0x3a, 0xc3, 0x03,
+ 0x2c, 0x0f, 0x74, 0x71, 0xc2, 0x00, 0x5d, 0x0f, 0x74, 0x89, 0xc4, 0xe6,
+ 0x7b, 0x0f, 0x74, 0xa0, 0xc2, 0x08, 0x86, 0x0f, 0x73, 0x21, 0xc2, 0x00,
+ 0x45, 0x0f, 0x73, 0x38, 0xc2, 0x00, 0x45, 0x0f, 0x73, 0x41, 0xc2, 0x08,
+ 0x86, 0x0f, 0x73, 0xa9, 0xc3, 0x3b, 0x5c, 0x0f, 0x73, 0xb0, 0xc2, 0x0e,
+ 0x14, 0x0f, 0x73, 0x51, 0xc3, 0x1a, 0x74, 0x0f, 0x73, 0xb8, 0xc3, 0x03,
+ 0x2c, 0x0f, 0x73, 0x71, 0xc2, 0x00, 0x5d, 0x0f, 0x73, 0x89, 0xc4, 0xe6,
+ 0x7b, 0x0f, 0x73, 0xa0, 0xc2, 0x00, 0x5d, 0x0f, 0x73, 0xc9, 0x47, 0x39,
+ 0x6b, 0x43, 0x6a, 0x46, 0xc3, 0x7c, 0xad, 0x00, 0x44, 0x21, 0xc5, 0x08,
+ 0x89, 0x00, 0x44, 0x18, 0xc3, 0xeb, 0xd3, 0x0f, 0xb9, 0x89, 0x9b, 0x0f,
+ 0xb9, 0x80, 0x49, 0x06, 0x32, 0xc3, 0x6a, 0x52, 0x48, 0x01, 0x93, 0x43,
+ 0x6a, 0x5e, 0x51, 0x12, 0xcc, 0xc3, 0x6a, 0x70, 0xd3, 0x42, 0x1e, 0x01,
+ 0x2b, 0x91, 0xd3, 0x46, 0xb8, 0x01, 0x2b, 0x88, 0x45, 0x00, 0x39, 0x43,
+ 0x6a, 0x82, 0xc8, 0x00, 0x29, 0x01, 0x2a, 0x71, 0xca, 0x03, 0x76, 0x01,
+ 0x2a, 0x60, 0xc9, 0xb4, 0x14, 0x01, 0x2b, 0xe9, 0xc9, 0x03, 0x77, 0x01,
+ 0x29, 0xa0, 0x96, 0x01, 0x32, 0xf3, 0x03, 0x6a, 0x94, 0xc7, 0xcc, 0x5a,
+ 0x0f, 0xd2, 0x89, 0xc6, 0xd4, 0xf0, 0x0f, 0xd2, 0x91, 0xc8, 0xba, 0xfb,
+ 0x0f, 0xd2, 0x98, 0x49, 0x29, 0x20, 0x43, 0x6a, 0x9a, 0xce, 0x29, 0x29,
+ 0x0f, 0xd0, 0xa1, 0xdb, 0x15, 0x9a, 0x0f, 0xd1, 0xf0, 0xce, 0x29, 0x29,
+ 0x0f, 0xd0, 0x91, 0xdb, 0x15, 0x9a, 0x0f, 0xd1, 0xe0, 0xce, 0x29, 0x29,
+ 0x0f, 0xd0, 0x89, 0xdb, 0x15, 0x9a, 0x0f, 0xd1, 0xd8, 0xce, 0x29, 0x29,
+ 0x0f, 0xd0, 0x81, 0xdb, 0x15, 0x9a, 0x0f, 0xd1, 0xd0, 0xc3, 0x01, 0x5e,
+ 0x0f, 0xd1, 0x21, 0xc5, 0x8f, 0xc9, 0x0f, 0xd1, 0x40, 0xce, 0x70, 0xe8,
+ 0x01, 0x34, 0x49, 0xcf, 0x69, 0xc9, 0x01, 0x34, 0x41, 0xca, 0x3b, 0x72,
+ 0x01, 0x4f, 0x68, 0xc5, 0x01, 0xea, 0x01, 0x2d, 0x51, 0xc3, 0x09, 0x46,
+ 0x01, 0x5a, 0x88, 0xc6, 0x40, 0x3d, 0x01, 0x2d, 0xd1, 0xc7, 0xbc, 0x04,
+ 0x01, 0x5a, 0x98, 0xd9, 0x1e, 0xd4, 0x01, 0x1f, 0x78, 0xd2, 0x1c, 0x2c,
+ 0x01, 0x1f, 0x68, 0xc5, 0x00, 0x62, 0x01, 0x3d, 0x0b, 0x03, 0x6a, 0xa6,
+ 0xc6, 0x19, 0x7a, 0x01, 0x02, 0x69, 0xd5, 0x00, 0x92, 0x01, 0x5c, 0xf0,
+ 0xc5, 0x01, 0x62, 0x01, 0x30, 0xd9, 0xce, 0x23, 0xd6, 0x0f, 0xac, 0xe8,
+ 0xd2, 0x1c, 0x2c, 0x01, 0x1f, 0x70, 0xca, 0x9d, 0x9a, 0x01, 0x36, 0xc1,
+ 0x49, 0x01, 0x59, 0x43, 0x6a, 0xac, 0xc6, 0x19, 0x7a, 0x01, 0x02, 0x61,
+ 0xd5, 0x00, 0x92, 0x01, 0x5c, 0xe0, 0xc6, 0x01, 0xe9, 0x01, 0x2f, 0xf1,
+ 0xc7, 0x3f, 0x7b, 0x0f, 0xbc, 0xc9, 0xc7, 0x0b, 0xa0, 0x0f, 0xbc, 0xf8,
+ 0xc8, 0x60, 0xa3, 0x01, 0x5e, 0x30, 0xc8, 0x60, 0xa3, 0x01, 0x5e, 0x38,
+ 0x9a, 0x01, 0x30, 0x91, 0xc5, 0x66, 0xf4, 0x01, 0x30, 0x89, 0x04, 0xc3,
+ 0x6a, 0xb8, 0xc8, 0x94, 0x1d, 0x0f, 0xaf, 0xa9, 0xc7, 0xc8, 0x32, 0x01,
+ 0x5d, 0xe8, 0xc4, 0xe6, 0xef, 0x00, 0xdb, 0x51, 0xc6, 0xd8, 0xb6, 0x00,
+ 0xdb, 0x28, 0xc7, 0xc6, 0x3a, 0x00, 0xda, 0x08, 0x90, 0x0b, 0x51, 0x31,
+ 0x96, 0x0b, 0x50, 0xb8, 0x91, 0x0b, 0x51, 0x49, 0x97, 0x0b, 0x50, 0xe1,
+ 0xc2, 0x23, 0x68, 0x0b, 0x50, 0x98, 0x83, 0x0b, 0x50, 0x71, 0x87, 0x0b,
+ 0x50, 0x40, 0xc2, 0x0e, 0x30, 0x0b, 0x51, 0xa1, 0xc2, 0x01, 0x04, 0x0b,
+ 0x51, 0x80, 0x90, 0x0b, 0x51, 0x89, 0xc2, 0xd4, 0x79, 0x0b, 0x51, 0x29,
+ 0x87, 0x0b, 0x50, 0x38, 0xc2, 0x01, 0xe6, 0x0b, 0x50, 0x61, 0x8b, 0x0b,
+ 0x50, 0x58, 0x87, 0x0b, 0x51, 0x11, 0xc2, 0xd4, 0x79, 0x0b, 0x50, 0xf8,
+ 0xc2, 0x07, 0x69, 0x0b, 0x51, 0x41, 0xc5, 0xdd, 0x10, 0x0b, 0x51, 0x38,
+ 0xc3, 0x8c, 0x60, 0x0b, 0x50, 0xd1, 0xc3, 0x7a, 0x15, 0x0b, 0x50, 0x80,
+ 0xc2, 0x08, 0xc6, 0x0b, 0x50, 0xc0, 0xc2, 0x00, 0xeb, 0x0b, 0x50, 0x11,
+ 0x07, 0xc3, 0x6a, 0xc4, 0xc5, 0xe3, 0xc8, 0x0b, 0x4d, 0x10, 0xc2, 0xd4,
+ 0x79, 0x0b, 0x4d, 0xa9, 0x96, 0x0b, 0x4d, 0x48, 0x91, 0x0b, 0x4b, 0xa9,
+ 0x87, 0x0b, 0x4f, 0x50, 0x17, 0xc3, 0x6a, 0xcc, 0x96, 0x0b, 0x4d, 0xb8,
+ 0x96, 0x0b, 0x4e, 0x61, 0xc2, 0x00, 0xdd, 0x0b, 0x4d, 0x59, 0xc2, 0x00,
+ 0x11, 0x0b, 0x4b, 0xd0, 0x0d, 0xc3, 0x6a, 0xd6, 0x83, 0x0b, 0x4f, 0x91,
+ 0xc3, 0x8c, 0x60, 0x0b, 0x4f, 0x03, 0x03, 0x6a, 0xe7, 0x09, 0xc3, 0x6a,
+ 0xeb, 0xc6, 0xd9, 0x22, 0x0b, 0x4d, 0x19, 0x11, 0x43, 0x6a, 0xf3, 0xc2,
+ 0x0b, 0xfd, 0x0b, 0x4b, 0x81, 0x03, 0xc3, 0x6a, 0xfb, 0x0b, 0x43, 0x6b,
+ 0x05, 0x17, 0xc3, 0x6b, 0x0f, 0xc3, 0x90, 0xd5, 0x0b, 0x4b, 0xe0, 0x87,
+ 0x0b, 0x4e, 0x28, 0x07, 0xc3, 0x6b, 0x19, 0xc5, 0xcb, 0x0c, 0x0b, 0x4c,
+ 0x50, 0xc2, 0x01, 0xf8, 0x0b, 0x4e, 0x71, 0xc2, 0x01, 0x0a, 0x0b, 0x4d,
+ 0xe0, 0xc2, 0x89, 0x93, 0x0b, 0x4e, 0x09, 0xc2, 0x4c, 0x85, 0x0b, 0x4d,
+ 0x38, 0xc7, 0x03, 0x28, 0x0b, 0x4e, 0x01, 0xc7, 0xc3, 0xe0, 0x0b, 0x4d,
+ 0x68, 0x8f, 0x0b, 0x4b, 0x91, 0x93, 0x0b, 0x4e, 0xe1, 0x83, 0x0b, 0x4e,
+ 0xdb, 0x03, 0x6b, 0x26, 0xc8, 0xba, 0x63, 0x0b, 0x4c, 0x78, 0x91, 0x0b,
+ 0x4b, 0xcb, 0x03, 0x6b, 0x2a, 0x93, 0x0b, 0x4e, 0xb0, 0x90, 0x0b, 0x50,
+ 0x01, 0x97, 0x0b, 0x4f, 0xea, 0x03, 0x6b, 0x2e, 0x8f, 0x0b, 0x4d, 0x53,
+ 0x03, 0x6b, 0x34, 0xc2, 0x08, 0xc6, 0x0b, 0x4c, 0xb0, 0x03, 0xc3, 0x6b,
+ 0x3a, 0x87, 0x0b, 0x4f, 0x49, 0x8f, 0x0b, 0x4c, 0x88, 0x83, 0x0b, 0x4b,
+ 0x63, 0x03, 0x6b, 0x42, 0x42, 0x01, 0x58, 0x43, 0x6b, 0x46, 0x07, 0x43,
+ 0x6b, 0x52, 0x17, 0xc3, 0x6b, 0x5c, 0xc2, 0x00, 0xa8, 0x0b, 0x4c, 0x20,
+ 0xc2, 0x00, 0xe5, 0x0b, 0x4e, 0x10, 0x93, 0x0b, 0x4b, 0x71, 0x87, 0x0b,
+ 0x4f, 0x80, 0x91, 0x0b, 0x4f, 0x9b, 0x03, 0x6b, 0x64, 0xc2, 0x14, 0xf7,
+ 0x0b, 0x4e, 0xf1, 0xc5, 0x8c, 0x5f, 0x0b, 0x4d, 0x20, 0x96, 0x0b, 0x4c,
+ 0x81, 0x87, 0x0b, 0x4b, 0xb0, 0x11, 0xc3, 0x6b, 0x68, 0x93, 0x0b, 0x4f,
+ 0xc1, 0x8f, 0x0b, 0x4b, 0xd8, 0x92, 0x0b, 0x4b, 0x49, 0x93, 0x0b, 0x4e,
+ 0xc9, 0xc2, 0x01, 0x02, 0x0b, 0x4c, 0xf8, 0x87, 0x0b, 0x4f, 0x61, 0xc3,
+ 0x8c, 0x60, 0x0b, 0x4c, 0xe8, 0xc2, 0x01, 0x0a, 0x0b, 0x4b, 0x41, 0x87,
+ 0x0b, 0x4d, 0x30, 0x93, 0x0b, 0x4f, 0xe1, 0x87, 0x0b, 0x4d, 0xc3, 0x03,
+ 0x6b, 0x70, 0x92, 0x0b, 0x4c, 0x58, 0xc2, 0x01, 0xe6, 0x0b, 0x4e, 0x18,
+ 0xc2, 0x01, 0x04, 0x0b, 0x4d, 0x29, 0x83, 0x0b, 0x4c, 0x38, 0x93, 0x0b,
+ 0x50, 0x08, 0x00, 0xc3, 0x6b, 0x74, 0x87, 0x0b, 0x4d, 0xa2, 0x03, 0x6b,
+ 0x84, 0x90, 0x0b, 0x4f, 0x29, 0x93, 0x0b, 0x4f, 0x21, 0xc3, 0x3b, 0xc1,
+ 0x0b, 0x4f, 0x09, 0xc2, 0x00, 0x92, 0x0b, 0x4d, 0x90, 0xc5, 0x01, 0xb9,
+ 0x0b, 0x4f, 0x19, 0xc8, 0xbd, 0x9b, 0x0b, 0x4f, 0x10, 0x9a, 0x0b, 0x4e,
+ 0xf9, 0xc2, 0x08, 0xc6, 0x0b, 0x4c, 0xbb, 0x03, 0x6b, 0x88, 0x8f, 0x0b,
+ 0x4d, 0xf0, 0x96, 0x0b, 0x4d, 0x71, 0xc2, 0x01, 0xe6, 0x0b, 0x4c, 0xa0,
+ 0x09, 0xc3, 0x6b, 0x8c, 0x0d, 0x43, 0x6b, 0xa2, 0xc2, 0x01, 0x0a, 0x0b,
+ 0x4a, 0x01, 0x0a, 0xc3, 0x6b, 0xc0, 0x43, 0x90, 0xd5, 0x43, 0x6b, 0xcc,
+ 0x07, 0xc3, 0x6b, 0xd4, 0xc2, 0x6f, 0x95, 0x0b, 0x4b, 0x10, 0xc2, 0x01,
+ 0x02, 0x0b, 0x49, 0xb9, 0x07, 0xc3, 0x6b, 0xde, 0xc2, 0x00, 0xe5, 0x0b,
+ 0x48, 0xc0, 0x8b, 0x0b, 0x4a, 0x69, 0xc2, 0x0e, 0x78, 0x0b, 0x49, 0x79,
+ 0xc2, 0x00, 0xdd, 0x0b, 0x49, 0x11, 0xc2, 0x01, 0x02, 0x0b, 0x47, 0xd0,
+ 0xc3, 0xe4, 0x58, 0x0b, 0x4a, 0x39, 0x42, 0x0c, 0xfe, 0xc3, 0x6b, 0xe8,
+ 0xc2, 0x01, 0xf8, 0x0b, 0x48, 0x11, 0x8b, 0x0b, 0x47, 0x9a, 0x03, 0x6b,
+ 0xf2, 0x17, 0xc3, 0x6b, 0xf8, 0xc3, 0xaa, 0xdd, 0x0b, 0x4a, 0x79, 0x96,
+ 0x0b, 0x49, 0x80, 0xc5, 0xe1, 0x48, 0x0b, 0x4a, 0x11, 0xc5, 0xdd, 0x3d,
+ 0x0b, 0x48, 0x50, 0x17, 0xc3, 0x6c, 0x02, 0xc3, 0xaa, 0xdd, 0x0b, 0x4a,
+ 0x80, 0xc2, 0x0e, 0x30, 0x0b, 0x49, 0x03, 0x03, 0x6c, 0x0a, 0xc2, 0x00,
+ 0xf1, 0x0b, 0x47, 0x88, 0xc3, 0x90, 0xd5, 0x0b, 0x49, 0x91, 0x42, 0x0c,
+ 0xfe, 0xc3, 0x6c, 0x10, 0x91, 0x0b, 0x48, 0xea, 0x03, 0x6c, 0x1a, 0xc3,
+ 0x90, 0xd5, 0x0b, 0x48, 0xe1, 0xc3, 0x5e, 0x1c, 0x0b, 0x48, 0xd1, 0xc4,
+ 0xe9, 0x9b, 0x0b, 0x48, 0xb0, 0x17, 0xc3, 0x6c, 0x1e, 0xc3, 0xaa, 0xdd,
+ 0x0b, 0x49, 0x40, 0xc2, 0x00, 0xf2, 0x0b, 0x49, 0xe8, 0x93, 0x0b, 0x49,
+ 0xf9, 0x90, 0x0b, 0x49, 0xd1, 0xc2, 0x00, 0xeb, 0x0b, 0x48, 0x30, 0x17,
+ 0xc3, 0x6c, 0x2c, 0x96, 0x0b, 0x48, 0x20, 0xc2, 0x08, 0xc6, 0x0b, 0x49,
+ 0xc9, 0x97, 0x0b, 0x4a, 0x91, 0x87, 0x0b, 0x48, 0x18, 0x93, 0x0b, 0x4b,
+ 0x21, 0x92, 0x0b, 0x48, 0x38, 0xc2, 0x8b, 0x5c, 0x0b, 0x4a, 0xe1, 0x97,
+ 0x0b, 0x4a, 0xc1, 0x07, 0xc3, 0x6c, 0x40, 0xc2, 0x23, 0x68, 0x0b, 0x4a,
+ 0xa0, 0x11, 0xc3, 0x6c, 0x48, 0xc3, 0xdb, 0x29, 0x0b, 0x49, 0x28, 0xc4,
+ 0xe0, 0xee, 0x0b, 0x4b, 0x01, 0xc3, 0x1a, 0xb6, 0x0b, 0x4a, 0x50, 0x93,
+ 0x0b, 0x4a, 0xe9, 0xc2, 0x01, 0xc4, 0x0b, 0x48, 0xd8, 0x87, 0x0b, 0x4a,
+ 0xd1, 0xc4, 0xd6, 0xfd, 0x0b, 0x49, 0x70, 0x42, 0x00, 0xf8, 0xc3, 0x6c,
+ 0x50, 0x17, 0xc3, 0x6c, 0x5c, 0x96, 0x0b, 0x46, 0x48, 0xca, 0xa3, 0xa8,
+ 0x0b, 0x46, 0xa9, 0x96, 0x0b, 0x46, 0x70, 0xc2, 0x14, 0xf7, 0x0b, 0x47,
+ 0x41, 0xc3, 0xe4, 0x58, 0x0b, 0x46, 0xd8, 0xc4, 0xe4, 0xa3, 0x0b, 0x46,
+ 0xe1, 0xc2, 0xd4, 0x79, 0x0b, 0x45, 0x50, 0x96, 0x0b, 0x47, 0x81, 0xc5,
+ 0xdc, 0x57, 0x0b, 0x45, 0xd0, 0xc4, 0xd5, 0xfa, 0x0b, 0x46, 0x31, 0xc5,
+ 0xe0, 0xd0, 0x0b, 0x45, 0x70, 0x90, 0x0b, 0x47, 0x71, 0xc5, 0xda, 0xf4,
+ 0x0b, 0x44, 0xe0, 0x8f, 0x0b, 0x46, 0x29, 0x92, 0x0b, 0x45, 0xb0, 0x93,
+ 0x0b, 0x47, 0x61, 0xc6, 0xd0, 0x46, 0x0b, 0x45, 0x90, 0xc2, 0x4c, 0x85,
+ 0x0b, 0x47, 0x59, 0x09, 0xc3, 0x6c, 0x6a, 0xc2, 0x00, 0xeb, 0x0b, 0x46,
+ 0x81, 0x0d, 0x43, 0x6c, 0x77, 0x07, 0xc3, 0x6c, 0x83, 0x03, 0xc3, 0x6c,
+ 0x8f, 0xc3, 0xe4, 0x58, 0x0b, 0x45, 0x68, 0x03, 0xc3, 0x6c, 0x99, 0x42,
+ 0x0c, 0xfe, 0xc3, 0x6c, 0xa1, 0xc3, 0x85, 0x6c, 0x0b, 0x45, 0x59, 0xc4,
+ 0xcd, 0xdc, 0x0b, 0x44, 0xe8, 0x17, 0xc3, 0x6c, 0xab, 0xc2, 0x00, 0xeb,
+ 0x0b, 0x46, 0x99, 0xc3, 0xed, 0x11, 0x0b, 0x45, 0xf9, 0x83, 0x0b, 0x45,
+ 0xf1, 0xc5, 0xaa, 0xbb, 0x0b, 0x45, 0x28, 0x07, 0xc3, 0x6c, 0xb5, 0xc2,
+ 0x0e, 0x30, 0x0b, 0x45, 0xa1, 0xc6, 0xd7, 0xc0, 0x0b, 0x44, 0xd0, 0xc3,
+ 0x51, 0x15, 0x0b, 0x45, 0x19, 0x83, 0x0b, 0x44, 0x80, 0x03, 0xc3, 0x6c,
+ 0xbf, 0x07, 0xc3, 0x6c, 0xcb, 0x8b, 0x0b, 0x46, 0xeb, 0x03, 0x6c, 0xdb,
+ 0x17, 0x43, 0x6c, 0xe5, 0x07, 0xc3, 0x6c, 0xef, 0x00, 0x43, 0x6c, 0xfb,
+ 0xc3, 0xdb, 0x29, 0x0b, 0x47, 0x21, 0xc7, 0xcc, 0x6f, 0x0b, 0x45, 0x11,
+ 0x8f, 0x0b, 0x44, 0x88, 0x92, 0x0b, 0x45, 0x01, 0xc3, 0x87, 0xeb, 0x0b,
+ 0x44, 0xb0, 0x09, 0xc3, 0x6d, 0x07, 0xc2, 0x00, 0xeb, 0x0b, 0x44, 0x71,
+ 0xca, 0x9d, 0xa4, 0x0b, 0x43, 0xa0, 0xc2, 0x01, 0x04, 0x0b, 0x44, 0x59,
+ 0xc4, 0xc5, 0x48, 0x0b, 0x42, 0xb8, 0xc5, 0xe3, 0x1e, 0x0b, 0x44, 0x01,
+ 0xc7, 0xca, 0xd9, 0x0b, 0x43, 0x68, 0xc9, 0xae, 0x1a, 0x0b, 0x43, 0x59,
+ 0xc4, 0x90, 0x0b, 0x0b, 0x43, 0xe0, 0x43, 0x61, 0xb2, 0x43, 0x6d, 0x1c,
+ 0xc3, 0x91, 0xaf, 0x0b, 0x44, 0x21, 0xc4, 0xe6, 0xbf, 0x0b, 0x43, 0xf1,
+ 0xca, 0x9e, 0x58, 0x0b, 0x43, 0x61, 0x03, 0x43, 0x6d, 0x28, 0xc8, 0xb8,
+ 0xeb, 0x0b, 0x44, 0x11, 0x93, 0x0b, 0x43, 0xc8, 0x93, 0x0b, 0x44, 0x69,
+ 0xc3, 0x14, 0xf3, 0x0b, 0x42, 0xe8, 0xc3, 0x7a, 0x15, 0x0b, 0x44, 0x31,
+ 0xc4, 0xe4, 0x0f, 0x0b, 0x43, 0x81, 0xc3, 0xed, 0x08, 0x0b, 0x43, 0x70,
+ 0xc4, 0xc6, 0x0e, 0x0b, 0x43, 0x89, 0xcc, 0x87, 0xa8, 0x0b, 0x43, 0x18,
+ 0xc6, 0xd2, 0x98, 0x0b, 0x43, 0x51, 0xc6, 0xd4, 0x60, 0x0b, 0x43, 0x48,
+ 0xc5, 0xe2, 0xa1, 0x0b, 0x43, 0x41, 0xc9, 0xb5, 0x46, 0x0b, 0x42, 0xc0,
+ 0x96, 0x0b, 0x42, 0x59, 0x93, 0x0b, 0x41, 0xe1, 0xc4, 0xea, 0x4f, 0x0b,
+ 0x41, 0x80, 0xcc, 0x8b, 0x5c, 0x0b, 0x42, 0x01, 0x0b, 0xc3, 0x6d, 0x34,
+ 0x17, 0x43, 0x6d, 0x40, 0xc3, 0x3b, 0xc1, 0x0b, 0x42, 0x51, 0xc6, 0xd3,
+ 0x82, 0x0b, 0x41, 0x88, 0xc3, 0x59, 0x1e, 0x0b, 0x41, 0x71, 0xc7, 0xb0,
+ 0xe3, 0x0b, 0x40, 0x60, 0x93, 0x0b, 0x42, 0x81, 0xc2, 0x01, 0xa7, 0x0b,
+ 0x41, 0x38, 0x96, 0x0b, 0x41, 0x99, 0xc8, 0xba, 0xab, 0x0b, 0x40, 0x98,
+ 0x07, 0xc3, 0x6d, 0x4a, 0xc7, 0xc9, 0x74, 0x0b, 0x41, 0xe9, 0xc5, 0xe2,
+ 0x9c, 0x0b, 0x40, 0x78, 0x93, 0x0b, 0x42, 0xb1, 0xc3, 0x18, 0x79, 0x0b,
+ 0x42, 0x40, 0x42, 0x00, 0xeb, 0xc3, 0x6d, 0x63, 0xca, 0xa7, 0x90, 0x0b,
+ 0x40, 0xf0, 0x93, 0x0b, 0x42, 0xa9, 0xc6, 0xbd, 0x6d, 0x0b, 0x40, 0x20,
+ 0x83, 0x0b, 0x42, 0x89, 0xc3, 0x90, 0xd5, 0x0b, 0x42, 0x68, 0x8b, 0x0b,
+ 0x42, 0x7b, 0x03, 0x6d, 0x6f, 0xc2, 0x00, 0xdd, 0x0b, 0x42, 0x48, 0xc3,
+ 0x4f, 0x0c, 0x0b, 0x42, 0x29, 0x43, 0xec, 0x36, 0xc3, 0x6d, 0x75, 0xc4,
+ 0x08, 0x0b, 0x0b, 0x40, 0x68, 0xc5, 0xa3, 0xad, 0x0b, 0x42, 0x19, 0xc4,
+ 0x08, 0xd1, 0x0b, 0x40, 0xa0, 0xc2, 0x01, 0xf8, 0x0b, 0x41, 0xfb, 0x03,
+ 0x6d, 0x81, 0xc5, 0xda, 0x4f, 0x0b, 0x40, 0x90, 0xc9, 0xb8, 0x16, 0x0b,
+ 0x41, 0xa1, 0xc9, 0x87, 0xe7, 0x0b, 0x41, 0x48, 0xc7, 0xc4, 0xf1, 0x0b,
+ 0x40, 0xf9, 0xc6, 0xbd, 0x6d, 0x0b, 0x40, 0x38, 0xc3, 0x59, 0x1e, 0x0b,
+ 0x41, 0x78, 0x03, 0xc3, 0x6d, 0x85, 0xc9, 0x87, 0xe7, 0x0b, 0x41, 0x41,
+ 0xc5, 0xe1, 0x07, 0x0b, 0x40, 0xe9, 0xc4, 0x97, 0xce, 0x0b, 0x40, 0xd8,
+ 0x4d, 0x81, 0xe8, 0xc3, 0x6d, 0x8f, 0x4b, 0x96, 0xf0, 0x43, 0x6d, 0x9b,
+ 0xc6, 0xd5, 0x20, 0x0b, 0x41, 0x09, 0xc3, 0x87, 0xeb, 0x0b, 0x40, 0xe0,
+ 0xa1, 0x01, 0x40, 0x7b, 0x03, 0x6d, 0xa7, 0xa2, 0x01, 0x40, 0xbb, 0x03,
+ 0x6d, 0xc0, 0xa3, 0x01, 0x41, 0x3b, 0x03, 0x6d, 0xd2, 0xa5, 0x01, 0x44,
+ 0x39, 0xa4, 0x01, 0x42, 0x3a, 0x03, 0x6d, 0xdd, 0xa2, 0x01, 0x40, 0xdb,
+ 0x03, 0x6d, 0xe1, 0xa3, 0x01, 0x41, 0x5b, 0x03, 0x6d, 0xf3, 0xa5, 0x01,
+ 0x44, 0x59, 0xa4, 0x01, 0x42, 0x5a, 0x03, 0x6d, 0xfe, 0xa3, 0x01, 0x41,
+ 0x9b, 0x03, 0x6e, 0x02, 0xa5, 0x01, 0x44, 0x99, 0xa4, 0x01, 0x42, 0x9a,
+ 0x03, 0x6e, 0x0d, 0xa5, 0x01, 0x45, 0x19, 0xa4, 0x01, 0x43, 0x1a, 0x03,
+ 0x6e, 0x11, 0xa5, 0x01, 0x46, 0x18, 0xa2, 0x01, 0x40, 0xeb, 0x03, 0x6e,
+ 0x15, 0xa3, 0x01, 0x41, 0x6b, 0x03, 0x6e, 0x27, 0xa5, 0x01, 0x44, 0x69,
+ 0xa4, 0x01, 0x42, 0x6a, 0x03, 0x6e, 0x32, 0xa3, 0x01, 0x41, 0xab, 0x03,
+ 0x6e, 0x36, 0xa5, 0x01, 0x44, 0xa9, 0xa4, 0x01, 0x42, 0xaa, 0x03, 0x6e,
+ 0x41, 0xa5, 0x01, 0x45, 0x29, 0xa4, 0x01, 0x43, 0x2a, 0x03, 0x6e, 0x45,
+ 0xa5, 0x01, 0x46, 0x28, 0xa3, 0x01, 0x41, 0xcb, 0x03, 0x6e, 0x49, 0xa5,
+ 0x01, 0x44, 0xc9, 0xa4, 0x01, 0x42, 0xca, 0x03, 0x6e, 0x54, 0xa5, 0x01,
+ 0x45, 0x49, 0xa4, 0x01, 0x43, 0x4a, 0x03, 0x6e, 0x58, 0xa5, 0x01, 0x46,
+ 0x48, 0xa5, 0x01, 0x45, 0x89, 0xa4, 0x01, 0x43, 0x8a, 0x03, 0x6e, 0x5c,
+ 0xa5, 0x01, 0x46, 0x88, 0xa5, 0x01, 0x47, 0x08, 0xa2, 0x01, 0x40, 0xf3,
+ 0x03, 0x6e, 0x60, 0xa3, 0x01, 0x41, 0x73, 0x03, 0x6e, 0x72, 0xa5, 0x01,
+ 0x44, 0x71, 0xa4, 0x01, 0x42, 0x72, 0x03, 0x6e, 0x7d, 0xa3, 0x01, 0x41,
+ 0xb3, 0x03, 0x6e, 0x81, 0xa5, 0x01, 0x44, 0xb1, 0xa4, 0x01, 0x42, 0xb2,
+ 0x03, 0x6e, 0x8c, 0xa5, 0x01, 0x45, 0x31, 0xa4, 0x01, 0x43, 0x32, 0x03,
+ 0x6e, 0x90, 0xa5, 0x01, 0x46, 0x30, 0xa3, 0x01, 0x41, 0xd3, 0x03, 0x6e,
+ 0x94, 0xa5, 0x01, 0x44, 0xd1, 0xa4, 0x01, 0x42, 0xd2, 0x03, 0x6e, 0x9f,
+ 0xa5, 0x01, 0x45, 0x51, 0xa4, 0x01, 0x43, 0x52, 0x03, 0x6e, 0xa3, 0xa5,
+ 0x01, 0x46, 0x50, 0xa5, 0x01, 0x45, 0x91, 0xa4, 0x01, 0x43, 0x92, 0x03,
+ 0x6e, 0xa7, 0xa5, 0x01, 0x46, 0x90, 0xa5, 0x01, 0x47, 0x10, 0xa3, 0x01,
+ 0x41, 0xe3, 0x03, 0x6e, 0xab, 0xa5, 0x01, 0x44, 0xe1, 0xa4, 0x01, 0x42,
+ 0xe2, 0x03, 0x6e, 0xb6, 0xa5, 0x01, 0x45, 0x61, 0xa4, 0x01, 0x43, 0x62,
+ 0x03, 0x6e, 0xba, 0xa5, 0x01, 0x46, 0x60, 0xa5, 0x01, 0x45, 0xa1, 0xa4,
+ 0x01, 0x43, 0xa2, 0x03, 0x6e, 0xbe, 0xa5, 0x01, 0x46, 0xa0, 0xa5, 0x01,
+ 0x47, 0x20, 0xa5, 0x01, 0x45, 0xc1, 0xa4, 0x01, 0x43, 0xc2, 0x03, 0x6e,
+ 0xc2, 0xa5, 0x01, 0x46, 0xc0, 0xa5, 0x01, 0x47, 0x40, 0xa5, 0x01, 0x47,
+ 0x80, 0xc3, 0x18, 0x28, 0x0e, 0x84, 0x11, 0xc7, 0xa1, 0xd5, 0x0e, 0x84,
+ 0x08, 0xc3, 0x73, 0xe8, 0x0e, 0x82, 0x89, 0xc5, 0xd6, 0x8f, 0x0e, 0x80,
+ 0x90, 0xc3, 0x8a, 0xb3, 0x0e, 0x84, 0xa1, 0xc4, 0x97, 0x8f, 0x0e, 0x84,
+ 0x98, 0xc6, 0x01, 0xa1, 0x0f, 0xd9, 0xf1, 0xc5, 0x00, 0x47, 0x0f, 0xd9,
+ 0xf9, 0xcc, 0x06, 0x2b, 0x0f, 0xda, 0x88, 0x46, 0x00, 0xc7, 0xc3, 0x6e,
+ 0xc6, 0xd2, 0x4f, 0x20, 0x0f, 0xda, 0x68, 0xd2, 0x4f, 0x20, 0x0f, 0xda,
+ 0x61, 0x46, 0x00, 0xc7, 0x43, 0x6e, 0xd2, 0xc6, 0x01, 0xa1, 0x0f, 0xda,
+ 0x29, 0xcc, 0x06, 0x2b, 0x0f, 0xda, 0x50, 0xcc, 0x06, 0x2b, 0x0f, 0xda,
+ 0x49, 0xc5, 0x00, 0x47, 0x0f, 0xda, 0x58, 0x16, 0xc3, 0x6e, 0xde, 0xd0,
+ 0x5e, 0xef, 0x01, 0x3e, 0xd0, 0x49, 0x01, 0x13, 0xc3, 0x6e, 0xea, 0xd0,
+ 0x05, 0x17, 0x0f, 0xdb, 0xe0, 0x49, 0x01, 0x13, 0xc3, 0x6e, 0xf0, 0xd0,
+ 0x05, 0x17, 0x0f, 0xdb, 0xe8, 0xc9, 0x35, 0x23, 0x01, 0x4c, 0x88, 0x16,
+ 0xc3, 0x6e, 0xf6, 0xc9, 0x39, 0xbf, 0x0f, 0xc8, 0x19, 0xc3, 0x05, 0xe3,
+ 0x0f, 0xc8, 0x30, 0xc6, 0x03, 0xfa, 0x01, 0x2e, 0xb1, 0xc4, 0x0e, 0xa5,
+ 0x01, 0x5f, 0x40, 0x45, 0x01, 0xac, 0xc3, 0x6f, 0x02, 0xd4, 0x38, 0xde,
+ 0x01, 0x4a, 0x40, 0xc6, 0x03, 0x81, 0x01, 0x0e, 0x71, 0xcf, 0x2e, 0xd8,
+ 0x01, 0x48, 0x20, 0xc5, 0x7b, 0x2f, 0x01, 0x02, 0x29, 0x48, 0xc3, 0x9b,
+ 0xc3, 0x6f, 0x14, 0xc8, 0x50, 0x0d, 0x01, 0x4c, 0x59, 0xc6, 0x03, 0x81,
+ 0x01, 0x72, 0xa9, 0xcd, 0x80, 0x14, 0x01, 0x72, 0xb8, 0xc5, 0x00, 0x62,
+ 0x01, 0x5b, 0x03, 0x03, 0x6f, 0x20, 0xcc, 0x83, 0x04, 0x01, 0x5b, 0x51,
+ 0xcd, 0x81, 0xce, 0x01, 0x5c, 0x20, 0x45, 0x01, 0xac, 0xc3, 0x6f, 0x24,
+ 0xc8, 0xab, 0xed, 0x01, 0x59, 0xb0, 0xd9, 0x20, 0xc8, 0x0f, 0xc0, 0x21,
+ 0x15, 0xc3, 0x6f, 0x34, 0x42, 0x00, 0x68, 0xc3, 0x6f, 0x40, 0xcf, 0x2e,
+ 0xd8, 0x01, 0x0f, 0xb9, 0x0e, 0xc3, 0x6f, 0x4c, 0xc4, 0x04, 0x63, 0x01,
+ 0x0d, 0x49, 0x16, 0xc3, 0x6f, 0x58, 0xca, 0xa5, 0x38, 0x01, 0x4a, 0x31,
+ 0xd5, 0x00, 0x92, 0x0f, 0xc0, 0xa1, 0xcc, 0x8a, 0xb4, 0x0f, 0xc4, 0xc0,
+ 0x43, 0x12, 0x50, 0xc3, 0x6f, 0x67, 0x47, 0x21, 0xc4, 0x43, 0x6f, 0x76,
+ 0xd1, 0x55, 0x65, 0x01, 0x48, 0xf8, 0x45, 0x03, 0x51, 0xc3, 0x6f, 0x86,
+ 0x43, 0x00, 0x3b, 0x43, 0x6f, 0x9e, 0x00, 0x43, 0x6f, 0xa4, 0x44, 0x00,
+ 0x68, 0xc3, 0x6f, 0xb0, 0x42, 0x02, 0x6a, 0x43, 0x6f, 0xba, 0x9e, 0x08,
+ 0x30, 0x01, 0x9f, 0x08, 0x30, 0x09, 0xa0, 0x08, 0x30, 0x11, 0xa1, 0x08,
+ 0x30, 0x19, 0xa2, 0x08, 0x30, 0x21, 0xa3, 0x08, 0x30, 0x29, 0xa4, 0x08,
+ 0x30, 0x31, 0xa5, 0x08, 0x30, 0x39, 0xa6, 0x08, 0x30, 0x40, 0x9d, 0x08,
+ 0x30, 0x49, 0xa0, 0x08, 0x30, 0x59, 0xa3, 0x08, 0x30, 0x61, 0xa4, 0x08,
+ 0x30, 0x69, 0x9e, 0x08, 0x30, 0x50, 0x9d, 0x08, 0x30, 0x71, 0x9e, 0x08,
+ 0x30, 0x7b, 0x03, 0x6f, 0xc4, 0x9f, 0x08, 0x30, 0x93, 0x03, 0x6f, 0xcc,
+ 0xa0, 0x08, 0x30, 0xab, 0x03, 0x6f, 0xd4, 0xa1, 0x08, 0x30, 0xb9, 0xa3,
+ 0x08, 0x30, 0xc1, 0xa4, 0x08, 0x30, 0xc9, 0xa5, 0x08, 0x30, 0xd1, 0xa6,
+ 0x08, 0x30, 0xe0, 0x9d, 0x08, 0x30, 0xe9, 0x9e, 0x08, 0x30, 0xf1, 0xa1,
+ 0x08, 0x30, 0xf9, 0xa4, 0x08, 0x31, 0x01, 0xa5, 0x08, 0x31, 0x09, 0xa6,
+ 0x08, 0x31, 0x10, 0x9d, 0x08, 0x31, 0x19, 0x9e, 0x08, 0x31, 0x21, 0xa1,
+ 0x08, 0x31, 0x29, 0xa2, 0x08, 0x31, 0x31, 0xa3, 0x08, 0x31, 0x39, 0xa4,
+ 0x08, 0x31, 0x41, 0xa5, 0x08, 0x31, 0x49, 0xa6, 0x08, 0x31, 0x50, 0x9d,
+ 0x08, 0x31, 0x59, 0x9e, 0x08, 0x31, 0x61, 0xa0, 0x08, 0x31, 0x69, 0xa1,
+ 0x08, 0x31, 0x71, 0xa2, 0x08, 0x31, 0x79, 0xa3, 0x08, 0x31, 0x81, 0xa4,
+ 0x08, 0x31, 0x89, 0xa5, 0x08, 0x31, 0x91, 0xa6, 0x08, 0x31, 0x98, 0x9d,
+ 0x08, 0x31, 0xa1, 0x9e, 0x08, 0x31, 0xa9, 0xa2, 0x08, 0x31, 0xb1, 0xa3,
+ 0x08, 0x31, 0xb9, 0xa4, 0x08, 0x31, 0xc1, 0xa6, 0x08, 0x31, 0xc8, 0x9d,
+ 0x08, 0x31, 0xd1, 0xa0, 0x08, 0x31, 0xd9, 0xa1, 0x08, 0x31, 0xe1, 0xa3,
+ 0x08, 0x31, 0xe9, 0xa4, 0x08, 0x31, 0xf1, 0xa5, 0x08, 0x31, 0xf9, 0xa6,
+ 0x08, 0x32, 0x00, 0x9d, 0x08, 0x32, 0x09, 0x9e, 0x08, 0x32, 0x11, 0x9f,
+ 0x08, 0x32, 0x19, 0xa3, 0x08, 0x32, 0x29, 0xa4, 0x08, 0x32, 0x31, 0xa2,
+ 0x08, 0x32, 0x20, 0x9f, 0x08, 0x32, 0x59, 0xa0, 0x08, 0x32, 0x61, 0x9d,
+ 0x08, 0x32, 0x48, 0x83, 0x08, 0x32, 0x69, 0x84, 0x08, 0x32, 0x70, 0x9d,
+ 0x08, 0x32, 0x91, 0xa5, 0x08, 0x32, 0x98, 0x83, 0x08, 0x32, 0xe9, 0x84,
+ 0x08, 0x32, 0xf1, 0x85, 0x08, 0x32, 0xf8, 0x83, 0x08, 0x33, 0x19, 0x84,
+ 0x08, 0x33, 0x21, 0x85, 0x08, 0x33, 0x28, 0xc3, 0xeb, 0x37, 0x08, 0x00,
+ 0x01, 0xc4, 0xe7, 0xb3, 0x08, 0x00, 0xc9, 0xc4, 0xe8, 0x37, 0x08, 0x00,
+ 0xf1, 0xc4, 0xe8, 0xf3, 0x08, 0x01, 0x99, 0xc4, 0xe9, 0x2b, 0x08, 0x01,
+ 0xa9, 0xc4, 0xe8, 0xeb, 0x08, 0x00, 0x29, 0xc4, 0xb3, 0x7d, 0x08, 0x00,
+ 0x39, 0xc4, 0xe9, 0xf7, 0x08, 0x01, 0x59, 0xc4, 0xea, 0x53, 0x08, 0x01,
+ 0x70, 0xc4, 0xe8, 0xcb, 0x08, 0x00, 0x41, 0xc4, 0xe7, 0xdb, 0x08, 0x00,
+ 0xa9, 0xc4, 0xea, 0x97, 0x08, 0x01, 0x09, 0xc4, 0xe9, 0x8b, 0x08, 0x01,
+ 0xe1, 0xc3, 0xeb, 0x13, 0x08, 0x00, 0x21, 0xc4, 0xe9, 0x17, 0x08, 0x00,
+ 0xb9, 0xc4, 0xe7, 0xbf, 0x08, 0x01, 0x19, 0xc4, 0xe9, 0xef, 0x08, 0x01,
+ 0x80, 0xc4, 0xe8, 0xdf, 0x08, 0x00, 0x49, 0xc4, 0xe9, 0xf3, 0x08, 0x00,
+ 0xe1, 0xc4, 0xe9, 0x1b, 0x08, 0x00, 0xe9, 0xc4, 0xe9, 0x4b, 0x08, 0x01,
+ 0x11, 0xc4, 0xe9, 0x03, 0x08, 0x01, 0xb9, 0xc4, 0xe8, 0xa7, 0x08, 0x00,
+ 0x51, 0xc4, 0xe7, 0xfb, 0x08, 0x01, 0x51, 0xc4, 0xea, 0x3f, 0x08, 0x01,
+ 0x89, 0xc4, 0xea, 0x5f, 0x08, 0x01, 0x90, 0xc4, 0xe8, 0xe3, 0x08, 0x00,
+ 0x81, 0xc4, 0xe9, 0x6b, 0x08, 0x01, 0xc9, 0xc4, 0xc6, 0x09, 0x08, 0x01,
+ 0xd1, 0xc4, 0xe8, 0x5b, 0x08, 0x02, 0x09, 0xc5, 0xe1, 0x11, 0x08, 0x02,
+ 0x29, 0xc4, 0xe9, 0x77, 0x08, 0x00, 0x31, 0xc4, 0xe8, 0x33, 0x08, 0x00,
+ 0x59, 0xc4, 0xea, 0xaf, 0x08, 0x01, 0x78, 0xc4, 0xe8, 0x4f, 0x08, 0x00,
+ 0x89, 0xc4, 0xea, 0xbb, 0x08, 0x01, 0xb1, 0xc5, 0xe1, 0xe8, 0x08, 0x02,
+ 0x39, 0xc5, 0xe1, 0x1b, 0x08, 0x02, 0x51, 0xc5, 0xe0, 0x7b, 0x08, 0x02,
+ 0x59, 0xc3, 0x71, 0x20, 0x08, 0x00, 0x19, 0xc4, 0xe9, 0x97, 0x08, 0x00,
+ 0x71, 0xc4, 0xe9, 0x47, 0x08, 0x01, 0x40, 0xc4, 0xe8, 0x8f, 0x08, 0x00,
+ 0x99, 0xc4, 0xda, 0x68, 0x08, 0x00, 0xa1, 0xc4, 0xe8, 0xdb, 0x08, 0x02,
+ 0x11, 0xc5, 0xe0, 0x26, 0x08, 0x02, 0x60, 0xc4, 0xe7, 0xd3, 0x08, 0x00,
+ 0xb1, 0xc4, 0xea, 0x03, 0x08, 0x00, 0xf9, 0xc4, 0xea, 0x37, 0x08, 0x01,
+ 0x21, 0xc4, 0xe9, 0x63, 0x08, 0x01, 0xc1, 0xc4, 0xe8, 0xbf, 0x08, 0x01,
+ 0xe9, 0xc5, 0xda, 0xf9, 0x08, 0x02, 0x19, 0xc5, 0xe0, 0x5d, 0x08, 0x02,
+ 0x41, 0xc4, 0xd2, 0x80, 0x08, 0x00, 0x79, 0xc4, 0xe8, 0x07, 0x08, 0x00,
+ 0x90, 0xc4, 0xe8, 0xef, 0x08, 0x00, 0xd1, 0xc4, 0xea, 0xb3, 0x08, 0x01,
+ 0x29, 0xc4, 0xe8, 0xbb, 0x08, 0x01, 0xf9, 0xc5, 0xe0, 0xc1, 0x08, 0x02,
+ 0x31, 0xc3, 0xeb, 0x34, 0x08, 0x00, 0x11, 0xc4, 0xe7, 0xcb, 0x08, 0x00,
+ 0xc1, 0xc4, 0xe8, 0x3f, 0x08, 0x01, 0x49, 0xc4, 0xe7, 0xc3, 0x08, 0x01,
+ 0x61, 0xc4, 0xe8, 0xc3, 0x08, 0x02, 0x00, 0xc4, 0xe8, 0x97, 0x08, 0x00,
+ 0xd9, 0xc4, 0xea, 0x4b, 0x08, 0x01, 0x01, 0xc4, 0xe9, 0xff, 0x08, 0x01,
+ 0x31, 0xc4, 0xe8, 0x67, 0x08, 0x01, 0xa1, 0xc5, 0xe1, 0xb1, 0x08, 0x02,
+ 0x49, 0xc3, 0xea, 0x6b, 0x08, 0x00, 0x09, 0xc4, 0xe8, 0x3b, 0x08, 0x00,
+ 0x69, 0xc4, 0xea, 0x9f, 0x08, 0x01, 0x68, 0xc5, 0xdb, 0x26, 0x08, 0x02,
+ 0x69, 0xc5, 0xda, 0xa4, 0x08, 0x02, 0x20, 0xa5, 0x08, 0x02, 0x81, 0xa6,
+ 0x08, 0x02, 0x88, 0xa4, 0x08, 0x02, 0xa1, 0xa6, 0x08, 0x02, 0xa8, 0xa0,
+ 0x08, 0x02, 0xb9, 0xa1, 0x08, 0x02, 0xc0, 0x9f, 0x08, 0x02, 0xd1, 0xa0,
+ 0x08, 0x02, 0xd9, 0xa3, 0x08, 0x02, 0xe1, 0xa6, 0x08, 0x02, 0xe8, 0x1d,
+ 0xc3, 0x6f, 0xd8, 0x1f, 0xc3, 0x6f, 0xfe, 0x20, 0xc3, 0x70, 0x1c, 0x21,
+ 0xc3, 0x70, 0x2c, 0x22, 0xc3, 0x70, 0x46, 0x23, 0xc3, 0x70, 0x6a, 0x24,
+ 0xc3, 0x70, 0x96, 0x25, 0xc3, 0x70, 0xbe, 0x26, 0x43, 0x70, 0xda, 0x1f,
+ 0xc3, 0x70, 0xe4, 0x20, 0xc3, 0x70, 0xf0, 0x21, 0xc3, 0x71, 0x0e, 0x22,
+ 0x43, 0x71, 0x36, 0x1d, 0xc3, 0x71, 0x5c, 0x1e, 0xc3, 0x71, 0x84, 0x1f,
+ 0xc3, 0x71, 0xac, 0xc2, 0xc5, 0x82, 0x08, 0x07, 0xc8, 0xc6, 0xd2, 0x80,
+ 0x08, 0x04, 0x99, 0xc8, 0xbc, 0x73, 0x08, 0x04, 0xa0, 0xc6, 0xd0, 0x2e,
+ 0x08, 0x04, 0xc9, 0xc7, 0xc8, 0x08, 0x08, 0x04, 0xc0, 0x42, 0x00, 0xa9,
+ 0xc3, 0x71, 0xc4, 0xc4, 0x04, 0x63, 0x00, 0x0d, 0x6b, 0x03, 0x71, 0xd0,
+ 0x06, 0xc3, 0x71, 0xd6, 0x05, 0xc3, 0x71, 0xe2, 0xca, 0x6a, 0xeb, 0x00,
+ 0xf3, 0x79, 0xce, 0x04, 0x59, 0x00, 0x14, 0x41, 0xcc, 0x57, 0xac, 0x00,
+ 0x0d, 0x59, 0xc6, 0x03, 0x81, 0x00, 0x0b, 0x38, 0x46, 0x01, 0xab, 0x43,
+ 0x72, 0x00, 0x45, 0x41, 0xe7, 0xc3, 0x72, 0x0c, 0x8f, 0x05, 0x3b, 0xb8,
+ 0xd3, 0x45, 0x88, 0x00, 0xeb, 0xd1, 0xc3, 0x00, 0xff, 0x00, 0x07, 0xf2,
+ 0x03, 0x72, 0x65, 0xc8, 0xaf, 0x7a, 0x00, 0xe8, 0xb1, 0x43, 0x03, 0x5f,
+ 0x43, 0x72, 0x6e, 0xd4, 0x04, 0x53, 0x05, 0x5b, 0x38, 0x42, 0x00, 0xa9,
+ 0xc3, 0x72, 0x80, 0x05, 0xc3, 0x72, 0x8c, 0x44, 0x0b, 0xf8, 0xc3, 0x72,
+ 0xad, 0xc5, 0x34, 0x21, 0x00, 0x0a, 0xdb, 0x03, 0x72, 0xbc, 0xcc, 0x57,
+ 0xac, 0x00, 0xec, 0x51, 0xc4, 0x04, 0x63, 0x00, 0x14, 0x11, 0xce, 0x3b,
+ 0x8c, 0x05, 0x3d, 0x49, 0x15, 0x43, 0x72, 0xc2, 0xc3, 0x6e, 0x0d, 0x00,
+ 0x12, 0xcb, 0x03, 0x72, 0xce, 0x45, 0x08, 0x30, 0x43, 0x72, 0xd4, 0x47,
+ 0x3e, 0xf8, 0xc3, 0x72, 0xe2, 0xc7, 0xc1, 0xdc, 0x05, 0x3e, 0xc8, 0xc7,
+ 0xce, 0x4b, 0x05, 0x5b, 0x01, 0xc6, 0xc4, 0x0b, 0x05, 0x3c, 0x60, 0x44,
+ 0x01, 0xac, 0xc3, 0x72, 0xf9, 0xc7, 0xa9, 0xe1, 0x05, 0x3a, 0xd8, 0x87,
+ 0x00, 0x12, 0xc3, 0x03, 0x73, 0x03, 0x8d, 0x0e, 0xf8, 0x19, 0xc8, 0xbd,
+ 0x33, 0x0e, 0xf8, 0x09, 0x85, 0x01, 0x0c, 0x23, 0x03, 0x73, 0x09, 0xc6,
+ 0x24, 0x64, 0x00, 0x12, 0xe3, 0x03, 0x73, 0x0f, 0xcf, 0x64, 0x92, 0x00,
+ 0x13, 0xf9, 0xc6, 0xd6, 0x46, 0x05, 0x3f, 0xb0, 0xc8, 0xa9, 0xe0, 0x05,
+ 0x3a, 0xe8, 0x45, 0x01, 0xac, 0xc3, 0x73, 0x15, 0xd6, 0x2d, 0x2f, 0x00,
+ 0x0a, 0x48, 0x42, 0x00, 0xa9, 0xc3, 0x73, 0x4b, 0x04, 0xc3, 0x73, 0x57,
+ 0xc8, 0x64, 0x99, 0x0e, 0xf8, 0x89, 0x05, 0xc3, 0x73, 0x66, 0xca, 0x6a,
+ 0xeb, 0x00, 0xf1, 0xd9, 0x42, 0x00, 0x68, 0xc3, 0x73, 0x7e, 0xcc, 0x57,
+ 0xac, 0x00, 0xec, 0x29, 0xcf, 0x65, 0x37, 0x05, 0x59, 0xb9, 0xce, 0x04,
+ 0x59, 0x00, 0x13, 0x6b, 0x03, 0x73, 0x8d, 0xcb, 0x91, 0x18, 0x05, 0x3a,
+ 0x49, 0xc5, 0x34, 0x21, 0x00, 0x09, 0xd1, 0xc6, 0x03, 0x81, 0x00, 0x0a,
+ 0x10, 0xc2, 0x23, 0x6a, 0x00, 0x13, 0x73, 0x03, 0x73, 0x93, 0xc5, 0xe2,
+ 0x2e, 0x05, 0x59, 0xa8, 0x46, 0x01, 0xab, 0x43, 0x73, 0x99, 0x42, 0x00,
+ 0xa9, 0xc3, 0x73, 0xa3, 0x05, 0xc3, 0x73, 0xaf, 0xca, 0x6a, 0xeb, 0x00,
+ 0xf0, 0x79, 0x44, 0x0b, 0xf8, 0xc3, 0x73, 0xc7, 0xc4, 0x04, 0x63, 0x00,
+ 0x12, 0xbb, 0x03, 0x73, 0xd3, 0xcc, 0x57, 0xac, 0x00, 0xec, 0x09, 0x15,
+ 0xc3, 0x73, 0xd9, 0x16, 0x43, 0x73, 0xe5, 0xca, 0xa4, 0x48, 0x00, 0x15,
+ 0x23, 0x03, 0x73, 0xf1, 0xc3, 0x86, 0xc6, 0x00, 0xf4, 0xf8, 0x00, 0x43,
+ 0x73, 0xf7, 0x45, 0x02, 0x93, 0xc3, 0x74, 0x06, 0x46, 0x39, 0x6c, 0x43,
+ 0x74, 0x19, 0xce, 0x04, 0x59, 0x0e, 0xf8, 0xe9, 0x05, 0xc3, 0x74, 0x24,
+ 0xc5, 0x34, 0x21, 0x00, 0x08, 0x39, 0xc9, 0x18, 0x19, 0x00, 0x08, 0x59,
+ 0xc3, 0x02, 0x1d, 0x05, 0x3c, 0x99, 0xcc, 0x57, 0xac, 0x05, 0x3c, 0xa1,
+ 0xc4, 0x04, 0x63, 0x00, 0x0c, 0x41, 0xc6, 0x03, 0x81, 0x00, 0x11, 0xe0,
+ 0x4a, 0xa2, 0x04, 0x43, 0x74, 0x3f, 0xcf, 0x6b, 0x5e, 0x00, 0x12, 0xf1,
+ 0x11, 0xc3, 0x74, 0x4b, 0xc9, 0x68, 0x49, 0x05, 0x3e, 0x88, 0xcb, 0x98,
+ 0xbe, 0x05, 0x39, 0x78, 0xce, 0x04, 0x59, 0x0e, 0xf8, 0xd9, 0x42, 0x03,
+ 0x76, 0xc3, 0x74, 0x57, 0x05, 0xc3, 0x74, 0x66, 0x06, 0xc3, 0x74, 0x75,
+ 0xc6, 0x61, 0xbc, 0x00, 0x0a, 0x6b, 0x03, 0x74, 0x82, 0xc5, 0x1f, 0x01,
+ 0x00, 0x07, 0xab, 0x03, 0x74, 0x88, 0xc6, 0x03, 0x81, 0x00, 0x07, 0xc3,
+ 0x03, 0x74, 0x8e, 0xc5, 0x1e, 0x64, 0x00, 0x07, 0x91, 0xc5, 0x34, 0x21,
+ 0x00, 0x07, 0x99, 0x42, 0x00, 0x36, 0xc3, 0x74, 0x94, 0xc5, 0x1f, 0x9c,
+ 0x00, 0x0a, 0x79, 0xc6, 0xd1, 0x0c, 0x00, 0x0f, 0x5b, 0x03, 0x74, 0xa6,
+ 0xce, 0x1f, 0xa7, 0x00, 0x10, 0x78, 0x91, 0x00, 0x12, 0xa3, 0x03, 0x74,
+ 0xac, 0x87, 0x00, 0x12, 0xda, 0x03, 0x74, 0xb6, 0xc6, 0x03, 0x81, 0x00,
+ 0x13, 0x43, 0x03, 0x74, 0xbc, 0x06, 0xc3, 0x74, 0xc2, 0xca, 0xa1, 0x8c,
+ 0x00, 0xf6, 0x49, 0xc5, 0x1f, 0x01, 0x00, 0x09, 0x4b, 0x03, 0x74, 0xcf,
+ 0xce, 0x04, 0x59, 0x00, 0xec, 0xb1, 0xc5, 0x1e, 0x64, 0x00, 0x07, 0x61,
+ 0xc5, 0x34, 0x21, 0x00, 0x07, 0x69, 0x05, 0xc3, 0x74, 0xd5, 0xc6, 0x61,
+ 0xbc, 0x00, 0x09, 0x59, 0xc5, 0x1f, 0x9c, 0x00, 0x09, 0x69, 0xc6, 0xd1,
+ 0x0c, 0x00, 0x09, 0x79, 0xce, 0x1f, 0xa7, 0x00, 0x10, 0x58, 0x83, 0x00,
+ 0x13, 0x4b, 0x03, 0x74, 0xe1, 0xc7, 0xcc, 0xed, 0x05, 0x5b, 0x08, 0x46,
+ 0x68, 0x4b, 0xc3, 0x74, 0xe7, 0x47, 0x1d, 0xf5, 0x43, 0x74, 0xff, 0x00,
+ 0x43, 0x75, 0x0b, 0x46, 0x01, 0xab, 0x43, 0x75, 0x17, 0x46, 0x01, 0xab,
+ 0x43, 0x75, 0x23, 0x05, 0xc3, 0x75, 0x3e, 0xc5, 0x1f, 0x01, 0x00, 0xf5,
+ 0xeb, 0x03, 0x75, 0x56, 0xca, 0xa1, 0x8c, 0x00, 0xf5, 0xd9, 0x06, 0xc3,
+ 0x75, 0x5c, 0xc6, 0x61, 0xbc, 0x00, 0x08, 0x9b, 0x03, 0x75, 0x66, 0xce,
+ 0x04, 0x59, 0x00, 0xec, 0x91, 0xc8, 0xc1, 0xf3, 0x05, 0x59, 0xa1, 0xc5,
+ 0x1e, 0x64, 0x00, 0x07, 0x41, 0xc5, 0x34, 0x21, 0x00, 0x07, 0x49, 0xc5,
+ 0x1f, 0x9c, 0x00, 0x08, 0xa9, 0xc6, 0xd1, 0x0c, 0x00, 0x08, 0xc9, 0xce,
+ 0x1f, 0xa7, 0x00, 0x10, 0x39, 0xc6, 0x03, 0x81, 0x00, 0x12, 0x39, 0xc5,
+ 0x22, 0xef, 0x01, 0x63, 0xc0, 0xc3, 0x00, 0xcd, 0x05, 0x39, 0x19, 0xc2,
+ 0x01, 0x5b, 0x05, 0x39, 0x28, 0x8a, 0x00, 0x07, 0x80, 0xcb, 0x98, 0x03,
+ 0x0e, 0xf8, 0x00, 0xc9, 0x18, 0x19, 0x00, 0xf0, 0xf9, 0xcc, 0x57, 0xac,
+ 0x00, 0xec, 0x11, 0xcc, 0x1e, 0xfa, 0x00, 0xeb, 0x71, 0xc6, 0x03, 0x81,
+ 0x05, 0x3c, 0xc9, 0xc4, 0x04, 0x63, 0x00, 0x0c, 0x90, 0xc4, 0xac, 0x0c,
+ 0x00, 0xf7, 0xf9, 0xc5, 0x1f, 0x01, 0x00, 0xf7, 0xc9, 0xc4, 0x04, 0x63,
+ 0x00, 0x0d, 0xa3, 0x03, 0x75, 0x6c, 0x06, 0xc3, 0x75, 0x72, 0xc5, 0x1e,
+ 0x64, 0x00, 0xf7, 0x99, 0xca, 0xa2, 0xc2, 0x00, 0xf4, 0xc9, 0x15, 0xc3,
+ 0x75, 0x7e, 0xc5, 0x34, 0x21, 0x00, 0x07, 0xe9, 0xca, 0x07, 0x96, 0x00,
+ 0x0b, 0xb9, 0xc6, 0x61, 0xbc, 0x00, 0x11, 0x98, 0x48, 0xba, 0x8b, 0xc3,
+ 0x75, 0x8a, 0xc9, 0xaf, 0xe5, 0x05, 0x3e, 0xb8, 0x44, 0x0b, 0xf8, 0xc3,
+ 0x75, 0x94, 0xc5, 0x34, 0x21, 0x00, 0xf1, 0xf9, 0xcc, 0x57, 0xac, 0x00,
+ 0xec, 0x31, 0xcc, 0x1e, 0xfa, 0x00, 0xeb, 0x79, 0xcc, 0x4b, 0x48, 0x05,
+ 0x59, 0xd1, 0xc4, 0x04, 0x63, 0x00, 0x13, 0x88, 0x45, 0x01, 0xac, 0xc3,
+ 0x75, 0xa0, 0xc3, 0x02, 0x1d, 0x00, 0x14, 0x4a, 0x03, 0x75, 0xec, 0xcc,
+ 0x26, 0x18, 0x00, 0xeb, 0xf8, 0x45, 0x01, 0xac, 0xc3, 0x75, 0xf2, 0xce,
+ 0x6e, 0x02, 0x05, 0x59, 0x88, 0xd4, 0x04, 0x53, 0x00, 0xec, 0x80, 0x46,
+ 0x01, 0xab, 0x43, 0x76, 0x37, 0xd4, 0x3c, 0x62, 0x05, 0x39, 0xd8, 0xca,
+ 0xa2, 0xc2, 0x00, 0xf4, 0xc1, 0x06, 0xc3, 0x76, 0x43, 0xc5, 0x34, 0x21,
+ 0x00, 0xf4, 0x19, 0xc5, 0x1e, 0x64, 0x00, 0xf4, 0x09, 0xca, 0x07, 0x96,
+ 0x00, 0x0b, 0xa9, 0xc4, 0x04, 0x63, 0x01, 0x63, 0x98, 0xca, 0x6a, 0xeb,
+ 0x00, 0xf4, 0xb1, 0xcb, 0x9a, 0xa2, 0x00, 0xf1, 0x59, 0x05, 0xc3, 0x76,
+ 0x4f, 0x06, 0xc3, 0x76, 0x61, 0xc4, 0x04, 0x63, 0x00, 0x13, 0x31, 0xc6,
+ 0x03, 0x81, 0x00, 0x09, 0x39, 0xcc, 0x57, 0xac, 0x05, 0x3c, 0xa8, 0xca,
+ 0x1e, 0x1b, 0x00, 0x13, 0x38, 0xca, 0x6a, 0xeb, 0x00, 0xf4, 0xa9, 0x06,
+ 0xc3, 0x76, 0x73, 0x05, 0xc3, 0x76, 0x7f, 0xcc, 0x57, 0xac, 0x00, 0xec,
+ 0x71, 0xcc, 0x1e, 0xfa, 0x00, 0xeb, 0xb1, 0xce, 0x04, 0x59, 0x00, 0x14,
+ 0x81, 0xc5, 0x34, 0x21, 0x00, 0x0b, 0xd1, 0x15, 0xc3, 0x76, 0x91, 0xc4,
+ 0x04, 0x63, 0x00, 0x11, 0x28, 0x06, 0xc3, 0x76, 0x9d, 0xcc, 0x57, 0xac,
+ 0x00, 0xec, 0x69, 0x42, 0x00, 0x36, 0x43, 0x76, 0xa9, 0x06, 0xc3, 0x76,
+ 0xb8, 0xc5, 0x1f, 0x01, 0x00, 0xf3, 0xe9, 0xcc, 0x57, 0xac, 0x00, 0xec,
+ 0x61, 0xc4, 0x04, 0x63, 0x00, 0x14, 0x59, 0xca, 0xa5, 0x1a, 0x01, 0x63,
+ 0x89, 0xc4, 0x00, 0xd2, 0x01, 0x63, 0xa0, 0xc2, 0x08, 0xc6, 0x05, 0x3c,
+ 0xd9, 0xc2, 0x09, 0x06, 0x05, 0x3c, 0xe9, 0xc2, 0x0e, 0x78, 0x05, 0x3c,
+ 0xf8, 0xc9, 0x18, 0x19, 0x00, 0xf2, 0xb9, 0xc5, 0x34, 0x21, 0x00, 0xf2,
+ 0xa9, 0xcc, 0x57, 0xac, 0x00, 0xec, 0x41, 0x15, 0xc3, 0x76, 0xc4, 0xcc,
+ 0x1e, 0xfa, 0x00, 0xeb, 0x89, 0xc8, 0xc1, 0xf3, 0x05, 0x3a, 0x99, 0xc4,
+ 0x04, 0x63, 0x00, 0x0d, 0x28, 0xca, 0x6a, 0xeb, 0x00, 0xf1, 0xa9, 0x06,
+ 0xc3, 0x76, 0xd3, 0xc5, 0x34, 0x21, 0x00, 0xf1, 0x89, 0xcc, 0x57, 0xac,
+ 0x00, 0xec, 0x21, 0xc6, 0x03, 0x81, 0x05, 0x3a, 0x0b, 0x03, 0x76, 0xe5,
+ 0x05, 0xc3, 0x76, 0xeb, 0xce, 0x3b, 0x8c, 0x05, 0x3d, 0x19, 0xc4, 0x04,
+ 0x63, 0x00, 0x0c, 0xc8, 0xc6, 0x61, 0xbc, 0x00, 0xf1, 0x09, 0xcc, 0x57,
+ 0xac, 0x00, 0xec, 0x19, 0xc5, 0x34, 0x21, 0x00, 0x0f, 0xa9, 0xc4, 0x04,
+ 0x63, 0x00, 0x13, 0x01, 0x05, 0xc3, 0x76, 0xf7, 0xc5, 0x1f, 0x9c, 0x00,
+ 0x08, 0xf9, 0xc9, 0x18, 0x19, 0x00, 0x09, 0x09, 0xce, 0x3b, 0x8c, 0x05,
+ 0x3d, 0x09, 0xc6, 0x03, 0x81, 0x00, 0x0f, 0x28, 0x8b, 0x05, 0x3d, 0xe9,
+ 0x83, 0x05, 0x3d, 0xd9, 0x97, 0x05, 0x3d, 0xf9, 0xc4, 0x05, 0x30, 0x00,
+ 0x12, 0x10, 0xca, 0x6a, 0xeb, 0x00, 0xf0, 0x39, 0x44, 0x0b, 0xf8, 0xc3,
+ 0x77, 0x09, 0xcc, 0x57, 0xac, 0x00, 0xec, 0x01, 0xcc, 0x1e, 0xfa, 0x00,
+ 0xeb, 0x61, 0xc8, 0xc1, 0xf3, 0x05, 0x3c, 0xb9, 0xc6, 0x03, 0x81, 0x00,
+ 0x0c, 0x01, 0xc6, 0xd5, 0x3e, 0x00, 0x0c, 0x19, 0xc4, 0x04, 0x63, 0x00,
+ 0x12, 0x98, 0xca, 0xa7, 0x7c, 0x05, 0x5a, 0x69, 0x45, 0x81, 0x67, 0x43,
+ 0x77, 0x15, 0x91, 0x05, 0x59, 0xeb, 0x03, 0x77, 0x23, 0x87, 0x05, 0x59,
+ 0x90, 0x05, 0xc3, 0x77, 0x29, 0xc6, 0x03, 0x81, 0x00, 0x12, 0x48, 0xc4,
+ 0x04, 0x63, 0x00, 0x15, 0x03, 0x03, 0x77, 0x3b, 0xd8, 0x21, 0xbc, 0x05,
+ 0x3a, 0xb9, 0xcf, 0x3c, 0x2b, 0x05, 0x3a, 0xc8, 0x8e, 0x07, 0xd8, 0x21,
+ 0x8b, 0x07, 0xd8, 0x18, 0xc6, 0x03, 0x4f, 0x00, 0xf7, 0xb0, 0x43, 0x0b,
+ 0xf9, 0xc3, 0x77, 0x41, 0xc8, 0x1e, 0x8a, 0x00, 0x0b, 0xc0, 0x98, 0x00,
+ 0xf7, 0xe1, 0xc2, 0x00, 0x30, 0x00, 0xf7, 0xd0, 0xc5, 0x00, 0x34, 0x00,
+ 0xf2, 0x11, 0xc5, 0x03, 0x50, 0x00, 0xf2, 0x00, 0x42, 0x03, 0x76, 0xc3,
+ 0x77, 0x4d, 0x06, 0xc3, 0x77, 0x5c, 0xc6, 0x61, 0xbc, 0x00, 0x0b, 0x53,
+ 0x03, 0x77, 0x69, 0xc5, 0x1f, 0x01, 0x00, 0x0b, 0x43, 0x03, 0x77, 0x6f,
+ 0x05, 0xc3, 0x77, 0x73, 0xc5, 0x1e, 0x64, 0x00, 0x06, 0xc9, 0xc5, 0x34,
+ 0x21, 0x00, 0x06, 0xd1, 0xc6, 0x03, 0x81, 0x05, 0x3d, 0xc1, 0xc5, 0x1f,
+ 0x9c, 0x00, 0x0b, 0x61, 0xca, 0xa2, 0xc2, 0x00, 0x0b, 0x71, 0xce, 0x1f,
+ 0xa7, 0x00, 0x10, 0xb1, 0xc6, 0xd1, 0x0c, 0x00, 0x0b, 0x90, 0xc2, 0x01,
+ 0x00, 0x00, 0x0d, 0x03, 0x03, 0x77, 0x82, 0xc8, 0xa1, 0x8e, 0x00, 0xf6,
+ 0x70, 0xc9, 0x07, 0x97, 0x00, 0x06, 0xa3, 0x03, 0x77, 0x88, 0xc4, 0x69,
+ 0xdb, 0x00, 0x0e, 0x88, 0x11, 0xc3, 0x77, 0x8e, 0xc8, 0x1e, 0x8a, 0x00,
+ 0x06, 0xb2, 0x03, 0x77, 0x9a, 0xc5, 0x61, 0xbd, 0x00, 0x0a, 0x63, 0x03,
+ 0x77, 0xa0, 0xcb, 0x1e, 0xfb, 0x00, 0x0c, 0xf8, 0x45, 0x00, 0x39, 0x43,
+ 0x77, 0xa6, 0xca, 0xa4, 0x98, 0x00, 0x0f, 0xf0, 0xd1, 0x56, 0xb9, 0x05,
+ 0x3a, 0x51, 0xc2, 0x00, 0x11, 0x05, 0x3a, 0x60, 0xcb, 0x9c, 0x9c, 0x00,
+ 0x0f, 0x60, 0x11, 0xc3, 0x77, 0xb8, 0xc8, 0x1e, 0x8a, 0x00, 0x06, 0x7a,
+ 0x03, 0x77, 0xc4, 0xc6, 0x00, 0x33, 0x00, 0xf1, 0x60, 0xc9, 0x07, 0x97,
+ 0x00, 0x06, 0x71, 0xc4, 0x69, 0xdb, 0x00, 0x0e, 0x78, 0xc9, 0x07, 0x97,
+ 0x00, 0x06, 0x53, 0x03, 0x77, 0xca, 0xc6, 0xbf, 0xd5, 0x00, 0x11, 0x43,
+ 0x03, 0x77, 0xce, 0xc4, 0x69, 0xdb, 0x00, 0x08, 0xd0, 0xc6, 0x00, 0x33,
+ 0x00, 0xf0, 0xd0, 0x11, 0xc3, 0x77, 0xd4, 0xc8, 0x1e, 0x8a, 0x00, 0x06,
+ 0x58, 0x45, 0x00, 0x39, 0x43, 0x77, 0xe0, 0xc8, 0x0f, 0xda, 0x00, 0x0d,
+ 0xc1, 0xca, 0x98, 0xb4, 0x00, 0x0f, 0x70, 0x45, 0x00, 0x39, 0x43, 0x77,
+ 0xec, 0xc9, 0x07, 0x97, 0x00, 0x06, 0x13, 0x03, 0x78, 0x0a, 0xc4, 0x69,
+ 0xdb, 0x00, 0x0e, 0x68, 0x11, 0xc3, 0x78, 0x10, 0xc8, 0x1e, 0x8a, 0x00,
+ 0x06, 0x22, 0x03, 0x78, 0x1c, 0xc5, 0x00, 0x34, 0x00, 0xf0, 0x01, 0xc5,
+ 0x03, 0x50, 0x00, 0x06, 0x2a, 0x03, 0x78, 0x22, 0xc5, 0x34, 0x21, 0x00,
+ 0x0f, 0xe1, 0xc6, 0x61, 0xbc, 0x00, 0x0f, 0x10, 0xc5, 0x00, 0x34, 0x00,
+ 0xf3, 0x13, 0x03, 0x78, 0x28, 0xc5, 0x03, 0x50, 0x00, 0xf3, 0x00, 0xc4,
+ 0x69, 0xdb, 0x00, 0x0b, 0x03, 0x03, 0x78, 0x2e, 0xc9, 0x07, 0x97, 0x00,
+ 0x0a, 0xe1, 0xc6, 0xbf, 0xd5, 0x00, 0x0a, 0xf1, 0xca, 0xa3, 0x58, 0x00,
+ 0x10, 0xc0, 0xce, 0x18, 0x14, 0x00, 0xf3, 0x20, 0xd3, 0x3f, 0xd1, 0x05,
+ 0x3e, 0x49, 0xc9, 0xb3, 0x96, 0x01, 0x63, 0xf0, 0x43, 0x0b, 0xf9, 0xc3,
+ 0x78, 0x34, 0xc8, 0x21, 0xcc, 0x01, 0x63, 0x58, 0xc9, 0x07, 0x97, 0x00,
+ 0xf4, 0x81, 0xc4, 0x69, 0xdb, 0x00, 0x0b, 0xe8, 0xc5, 0x03, 0x82, 0x00,
+ 0x0d, 0xa9, 0xc9, 0xb3, 0x96, 0x01, 0x63, 0xf8, 0x43, 0x0b, 0xf9, 0xc3,
+ 0x78, 0x40, 0xc8, 0x1e, 0x8a, 0x00, 0xf4, 0x20, 0xc8, 0x0d, 0xd8, 0x00,
+ 0xf3, 0xf1, 0xce, 0x3c, 0x2c, 0x05, 0x3a, 0xf0, 0xcf, 0x63, 0x66, 0x00,
+ 0xf3, 0x81, 0xc6, 0xbf, 0xd5, 0x00, 0x0b, 0x11, 0xc4, 0x69, 0xdb, 0x00,
+ 0x0b, 0x21, 0xca, 0xa3, 0x58, 0x00, 0x10, 0xd0, 0x43, 0x0b, 0xf9, 0xc3,
+ 0x78, 0x4c, 0xce, 0x3c, 0x2c, 0x00, 0x11, 0xf0, 0xd2, 0x21, 0xc2, 0x05,
+ 0x3b, 0x30, 0xc4, 0xdd, 0x2f, 0x01, 0x63, 0x80, 0xca, 0x6a, 0xeb, 0x00,
+ 0xf2, 0xf1, 0x42, 0x00, 0x68, 0xc3, 0x78, 0x58, 0xce, 0x3b, 0x8c, 0x05,
+ 0x3d, 0x31, 0xc5, 0x34, 0x21, 0x00, 0x0a, 0xa1, 0x05, 0xc3, 0x78, 0x64,
+ 0xce, 0x1f, 0xa7, 0x00, 0x10, 0x91, 0xc6, 0x03, 0x81, 0x00, 0x12, 0x61,
+ 0xc4, 0x13, 0xc7, 0x01, 0x63, 0x20, 0xc5, 0x03, 0x82, 0x01, 0x63, 0x1b,
+ 0x03, 0x78, 0x70, 0xcc, 0x89, 0x94, 0x05, 0x3a, 0xa0, 0xcf, 0x63, 0x66,
+ 0x00, 0xf2, 0x51, 0xc6, 0xbf, 0xd5, 0x00, 0x0a, 0x29, 0xc4, 0x69, 0xdb,
+ 0x00, 0x0a, 0x38, 0xc9, 0x6a, 0xec, 0x00, 0xf2, 0x41, 0xc8, 0x4d, 0x9e,
+ 0x00, 0x0c, 0xe9, 0xcd, 0x7d, 0x97, 0x00, 0x11, 0x00, 0x43, 0x0b, 0xf9,
+ 0xc3, 0x78, 0x76, 0xc8, 0x21, 0xcc, 0x05, 0x3c, 0x80, 0xcf, 0x63, 0x66,
+ 0x00, 0xf1, 0xe1, 0xc6, 0xbf, 0xd5, 0x00, 0x09, 0xd9, 0xc4, 0x69, 0xdb,
+ 0x00, 0x09, 0xe8, 0xc7, 0x0d, 0xd9, 0x00, 0xf1, 0xb3, 0x03, 0x78, 0x82,
+ 0xc8, 0xaa, 0xa8, 0x01, 0x63, 0x00, 0xc3, 0x05, 0xe3, 0x00, 0x09, 0xf9,
+ 0xc5, 0x1f, 0x01, 0x01, 0x63, 0x10, 0xc5, 0x03, 0x82, 0x00, 0x0a, 0x09,
+ 0xcd, 0x72, 0x9b, 0x00, 0x0e, 0x40, 0xc2, 0x00, 0x4d, 0x00, 0x11, 0xe9,
+ 0xc3, 0xa2, 0x1f, 0x05, 0x3d, 0x68, 0xc8, 0x0d, 0xd8, 0x00, 0xf1, 0x91,
+ 0xce, 0x3c, 0x2c, 0x05, 0x3a, 0x11, 0xc8, 0x21, 0xcc, 0x01, 0x63, 0x48,
+ 0xd4, 0x3c, 0x26, 0x05, 0x3a, 0x20, 0xc6, 0xbf, 0xd5, 0x00, 0x09, 0xb1,
+ 0xc4, 0x69, 0xdb, 0x00, 0x0f, 0x40, 0xc6, 0xbf, 0xd5, 0x00, 0xf1, 0x41,
+ 0xc9, 0x07, 0x97, 0x00, 0x09, 0x21, 0xc4, 0x69, 0xdb, 0x00, 0x10, 0xf0,
+ 0xc8, 0x1e, 0x8a, 0x00, 0xf1, 0x31, 0x43, 0x0b, 0xf9, 0xc3, 0x78, 0x88,
+ 0xc8, 0x21, 0xcc, 0x01, 0x63, 0x38, 0xc9, 0x07, 0x97, 0x00, 0x08, 0xe1,
+ 0xc6, 0xbf, 0xd5, 0x00, 0x09, 0x11, 0xc4, 0x69, 0xdb, 0x00, 0x0f, 0x30,
+ 0xcf, 0x63, 0x66, 0x00, 0xf0, 0x91, 0xc6, 0xbf, 0xd5, 0x00, 0xf0, 0x81,
+ 0xc4, 0x69, 0xdb, 0x00, 0x08, 0x70, 0xc5, 0x00, 0x34, 0x00, 0xf0, 0x61,
+ 0xc5, 0x03, 0x50, 0x00, 0xf0, 0x50, 0xcd, 0x7c, 0xa0, 0x00, 0x0f, 0x93,
+ 0x03, 0x78, 0x94, 0xc5, 0x03, 0x82, 0x00, 0x08, 0x81, 0xd3, 0x3f, 0xd1,
+ 0x05, 0x3e, 0x38, 0xc6, 0xbf, 0xd5, 0x00, 0x06, 0x3b, 0x03, 0x78, 0x9a,
+ 0xc9, 0x07, 0x97, 0x00, 0x08, 0x41, 0xc4, 0x69, 0xdb, 0x00, 0x08, 0x60,
+ 0xc5, 0x00, 0x34, 0x00, 0xf0, 0x21, 0xc5, 0x03, 0x50, 0x00, 0xf0, 0x10,
+ 0xc9, 0x07, 0x97, 0x00, 0x09, 0xa1, 0xcb, 0x4b, 0x49, 0x05, 0x3d, 0x90,
+ 0x45, 0x01, 0xac, 0xc3, 0x78, 0xa0, 0xc6, 0x12, 0x4f, 0x01, 0x5b, 0x89,
+ 0x4c, 0x13, 0x36, 0x43, 0x78, 0xca, 0xe0, 0x02, 0x07, 0x01, 0x4b, 0x70,
+ 0x46, 0x00, 0x39, 0x43, 0x78, 0xd0, 0xc6, 0x46, 0x1a, 0x07, 0xd9, 0x59,
+ 0xc7, 0x46, 0x19, 0x07, 0xd9, 0x50, 0xc5, 0x65, 0x8c, 0x07, 0xd9, 0x81,
+ 0xc5, 0x7d, 0x10, 0x07, 0xd9, 0x71, 0xc6, 0xd3, 0x88, 0x07, 0xd9, 0x78,
+ 0xc5, 0x68, 0xc5, 0x05, 0x4b, 0x51, 0xc6, 0xba, 0x7d, 0x05, 0x4b, 0x39,
+ 0xc6, 0x7f, 0x3e, 0x05, 0x4b, 0x28, 0xc6, 0xae, 0x6e, 0x05, 0x4b, 0xc9,
+ 0xc5, 0xba, 0x7e, 0x00, 0x88, 0x20, 0xc7, 0xca, 0xee, 0x05, 0x4b, 0xc1,
+ 0x85, 0x00, 0x88, 0x68, 0xc7, 0xca, 0x5b, 0x05, 0x4b, 0xa9, 0x95, 0x00,
+ 0x88, 0x50, 0xc5, 0x7f, 0x3f, 0x00, 0x89, 0x69, 0xc6, 0xae, 0x92, 0x00,
+ 0x89, 0xc0, 0xc5, 0xba, 0x7e, 0x00, 0x89, 0x79, 0xc6, 0xae, 0x6e, 0x00,
+ 0x89, 0xc8, 0x43, 0x68, 0xc6, 0xc3, 0x78, 0xdc, 0xc6, 0xae, 0x80, 0x00,
+ 0x89, 0xd0, 0xc4, 0xb1, 0xd8, 0x00, 0x89, 0xb1, 0xc6, 0xb1, 0xd7, 0x00,
+ 0x89, 0xb8, 0xc6, 0xae, 0x92, 0x05, 0x4b, 0x99, 0xc5, 0x7f, 0x3f, 0x00,
+ 0x88, 0xf0, 0x42, 0x00, 0xe4, 0xc3, 0x78, 0xe6, 0xc8, 0x95, 0xb4, 0x00,
+ 0x89, 0x28, 0xc5, 0xba, 0x7e, 0x00, 0x89, 0x01, 0xc6, 0xae, 0x6e, 0x00,
+ 0x89, 0x48, 0xc4, 0xb1, 0xd8, 0x00, 0x89, 0x39, 0xc6, 0xb1, 0xd7, 0x00,
+ 0x89, 0x40, 0xc4, 0x68, 0xc6, 0x00, 0x89, 0x59, 0xc6, 0xae, 0x80, 0x00,
+ 0x8a, 0xb8, 0x8a, 0x00, 0x88, 0x71, 0xc9, 0xb7, 0x6b, 0x00, 0x8a, 0x91,
+ 0xc7, 0xc4, 0x03, 0x00, 0x8a, 0x98, 0x15, 0xc3, 0x79, 0x02, 0x05, 0x43,
+ 0x79, 0x0e, 0xc3, 0x3b, 0xc9, 0x00, 0x89, 0xf1, 0x44, 0x5d, 0x46, 0x43,
+ 0x79, 0x1a, 0xc4, 0xb1, 0xd8, 0x00, 0x8a, 0x81, 0xc6, 0xb1, 0xd7, 0x00,
+ 0x8a, 0xa8, 0x83, 0x00, 0x8c, 0xf1, 0xc3, 0x75, 0x56, 0x06, 0xbe, 0x33,
+ 0x03, 0x79, 0x26, 0xc3, 0xe7, 0xb6, 0x00, 0x8d, 0x01, 0xc3, 0xe5, 0x00,
+ 0x00, 0x8d, 0x09, 0xc3, 0x40, 0xfa, 0x06, 0xbe, 0x28, 0x87, 0x00, 0x8b,
+ 0x20, 0x91, 0x00, 0x8b, 0x39, 0xc3, 0xec, 0xba, 0x00, 0x8b, 0xe9, 0xc3,
+ 0xec, 0xb7, 0x00, 0x8b, 0xf1, 0xc4, 0xe8, 0x03, 0x00, 0x8d, 0x28, 0x83,
+ 0x00, 0x8c, 0x23, 0x03, 0x79, 0x2a, 0xc2, 0x09, 0xc6, 0x00, 0x8c, 0x30,
+ 0x87, 0x06, 0xbd, 0xb8, 0x91, 0x00, 0x8c, 0x78, 0x91, 0x00, 0x8c, 0x88,
+ 0x87, 0x06, 0xbd, 0x98, 0x97, 0x00, 0x8c, 0xb1, 0x91, 0x06, 0xbd, 0xd0,
+ 0x87, 0x00, 0x8b, 0x58, 0x91, 0x00, 0x8b, 0x78, 0x91, 0x06, 0xbd, 0x80,
+ 0x87, 0x00, 0x8d, 0x38, 0x87, 0x06, 0xbd, 0xf0, 0x91, 0x06, 0xbd, 0xf8,
+ 0x8d, 0x00, 0x8d, 0x41, 0x45, 0xd9, 0x4c, 0xc3, 0x79, 0x2e, 0xc7, 0xcf,
+ 0x7f, 0x00, 0x8e, 0x19, 0xc7, 0xcf, 0x9b, 0x00, 0x8f, 0xd1, 0xc7, 0xcf,
+ 0x47, 0x00, 0x8f, 0xd9, 0xc9, 0xb1, 0xd4, 0x00, 0x8f, 0xe1, 0xc9, 0xb1,
+ 0xcb, 0x00, 0x8f, 0xe9, 0xc7, 0xcf, 0xa2, 0x00, 0x8f, 0xf0, 0xc5, 0xc4,
+ 0x7c, 0x00, 0x8f, 0x11, 0x12, 0xc3, 0x79, 0x38, 0xc5, 0xc3, 0xd4, 0x06,
+ 0xbe, 0xe8, 0xc6, 0x7f, 0x3e, 0x00, 0x8d, 0x49, 0x43, 0x28, 0x17, 0xc3,
+ 0x79, 0x44, 0x44, 0xc3, 0xd4, 0xc3, 0x79, 0x54, 0xc8, 0xc2, 0x9b, 0x00,
+ 0x8f, 0x71, 0xc5, 0xc2, 0x9e, 0x00, 0x8f, 0x71, 0xc5, 0xc4, 0x44, 0x00,
+ 0x8f, 0xf9, 0x45, 0xba, 0x7d, 0xc3, 0x79, 0x5e, 0xc5, 0xc4, 0x7c, 0x06,
+ 0xbf, 0x01, 0xc5, 0x68, 0xc5, 0x06, 0xbf, 0x31, 0xc5, 0xc3, 0xe9, 0x06,
+ 0xbf, 0xc8, 0x95, 0x00, 0x8d, 0xd1, 0x43, 0x68, 0xbb, 0xc3, 0x79, 0x68,
+ 0x43, 0x03, 0x49, 0xc3, 0x79, 0x74, 0xc7, 0xca, 0x54, 0x00, 0x8f, 0x49,
+ 0x43, 0xb9, 0x2c, 0xc3, 0x79, 0x80, 0xc7, 0xca, 0x8c, 0x06, 0xbf, 0x51,
+ 0x43, 0xbc, 0xe4, 0xc3, 0x79, 0x8c, 0xc7, 0xca, 0x07, 0x06, 0xbf, 0x90,
+ 0xc4, 0xb1, 0xd8, 0x00, 0x8d, 0x61, 0xc6, 0xb1, 0xd7, 0x06, 0xbe, 0x60,
+ 0xc5, 0xba, 0x7e, 0x00, 0x8e, 0x31, 0xc6, 0xae, 0x6e, 0x00, 0x8e, 0x48,
+ 0x96, 0x00, 0x8d, 0x71, 0xc8, 0xb9, 0xab, 0x00, 0x8e, 0x51, 0xc7, 0xcc,
+ 0x84, 0x00, 0x8e, 0x59, 0xc7, 0xcc, 0x3e, 0x06, 0xbe, 0x79, 0x45, 0xd4,
+ 0xfc, 0xc3, 0x79, 0x98, 0x43, 0xcc, 0x06, 0xc3, 0x79, 0xa2, 0xc9, 0xb0,
+ 0x90, 0x06, 0xbe, 0xa0, 0x44, 0x7f, 0x3f, 0xc3, 0x79, 0xae, 0xcc, 0x68,
+ 0xbe, 0x00, 0x8e, 0xa9, 0xc6, 0xae, 0x92, 0x00, 0x8e, 0xc0, 0x8f, 0x00,
+ 0x8d, 0x89, 0xc8, 0xb8, 0x9b, 0x00, 0x8e, 0x89, 0xc6, 0xd9, 0x52, 0x00,
+ 0x8e, 0x91, 0xc7, 0xc6, 0x79, 0x06, 0xbe, 0xc0, 0x43, 0x68, 0xc6, 0xc3,
+ 0x79, 0xbe, 0xc6, 0xae, 0x80, 0x00, 0x8d, 0xa9, 0xc6, 0xb2, 0x8b, 0x00,
+ 0x8e, 0xb8, 0xc3, 0x3b, 0xc9, 0x00, 0x8d, 0x99, 0x44, 0x5d, 0x46, 0x43,
+ 0x79, 0xda, 0xc6, 0xae, 0x6e, 0x00, 0x8d, 0xa1, 0x44, 0xba, 0x7e, 0x43,
+ 0x79, 0xe6, 0x48, 0x7b, 0x1e, 0xc3, 0x79, 0xf0, 0xc6, 0xc2, 0x9d, 0x06,
+ 0xbe, 0xd8, 0x43, 0x68, 0xc6, 0xc3, 0x79, 0xfa, 0xc6, 0xb2, 0x8b, 0x00,
+ 0x8e, 0xf8, 0xc3, 0x3b, 0xc9, 0x00, 0x8e, 0xe9, 0x44, 0x5d, 0x46, 0x43,
+ 0x7a, 0x04, 0xc6, 0xb1, 0xd7, 0x00, 0x8f, 0x01, 0xc4, 0xb1, 0xd8, 0x06,
+ 0xbf, 0x10, 0x92, 0x00, 0x8d, 0xc1, 0x45, 0xd7, 0xf6, 0xc3, 0x7a, 0x10,
+ 0x46, 0xd8, 0x20, 0xc3, 0x7a, 0x1a, 0xc9, 0xb8, 0x04, 0x00, 0x8f, 0x21,
+ 0xc7, 0xc5, 0x8b, 0x00, 0x8f, 0x29, 0xc8, 0xba, 0xa3, 0x06, 0xbf, 0x19,
+ 0xc7, 0xc5, 0xca, 0x06, 0xbf, 0x29, 0x43, 0xc3, 0xe7, 0x43, 0x7a, 0x24,
+ 0x8a, 0x00, 0x8e, 0x09, 0xc7, 0xc4, 0x65, 0x00, 0x8f, 0x99, 0xc6, 0xd6,
+ 0x7c, 0x06, 0xbf, 0xd1, 0xc9, 0xb7, 0x8f, 0x06, 0xbf, 0xd8, 0x95, 0x00,
+ 0x8f, 0x59, 0xc7, 0xca, 0x5b, 0x06, 0xbf, 0x99, 0xcf, 0x68, 0xbb, 0x06,
+ 0xbf, 0xa0, 0xc5, 0x7f, 0x3f, 0x00, 0x8f, 0x61, 0xc6, 0xae, 0x92, 0x00,
+ 0x8f, 0x78, 0x85, 0x00, 0x8d, 0xe9, 0xc7, 0xcf, 0x86, 0x06, 0xbf, 0xb9,
+ 0xcc, 0x8d, 0x00, 0x06, 0xbf, 0xc0, 0x43, 0xc2, 0x7b, 0xc3, 0x7a, 0x30,
+ 0x43, 0x7f, 0x37, 0xc3, 0x7a, 0x3c, 0x89, 0x00, 0x8f, 0x81, 0xc9, 0xb6,
+ 0x8a, 0x00, 0x8f, 0x91, 0xc7, 0xc6, 0x5d, 0x06, 0xbf, 0xa9, 0xc7, 0xc6,
+ 0x6b, 0x06, 0xbf, 0xb0, 0xc4, 0x68, 0xc6, 0x01, 0x8b, 0xc1, 0xc6, 0xae,
+ 0x80, 0x01, 0x8c, 0x20, 0x92, 0x01, 0x89, 0xe1, 0xc6, 0xd7, 0xf6, 0x01,
+ 0x8b, 0x58, 0x95, 0x01, 0x8a, 0x21, 0xc8, 0xbe, 0xbb, 0x01, 0x8b, 0xa1,
+ 0xc8, 0xc2, 0x1b, 0x01, 0x8b, 0xa8, 0xc5, 0xba, 0x7e, 0x01, 0x8b, 0x99,
+ 0xc6, 0xae, 0x6e, 0x01, 0x8b, 0xb8, 0xc8, 0x7b, 0x1f, 0x01, 0x8c, 0x30,
+ 0x89, 0x01, 0x8c, 0x09, 0xc6, 0xd5, 0xda, 0x01, 0x8c, 0x11, 0xc9, 0xb6,
+ 0x8a, 0x01, 0x8c, 0x18, 0x83, 0x07, 0xfb, 0x61, 0x97, 0x07, 0xfb, 0x69,
+ 0x91, 0x07, 0xfb, 0x70, 0xc9, 0x4f, 0xa1, 0x0f, 0x64, 0xd8, 0xc8, 0x4f,
+ 0xa2, 0x0f, 0x64, 0x91, 0xc7, 0x0d, 0x7f, 0x0f, 0x64, 0x48, 0xc9, 0x4f,
+ 0xa1, 0x0f, 0x64, 0xd0, 0xc8, 0x4f, 0xa2, 0x0f, 0x64, 0x89, 0xc7, 0x0d,
+ 0x7f, 0x0f, 0x64, 0x40, 0xc9, 0x4f, 0xa1, 0x0f, 0x64, 0xc8, 0x00, 0x43,
+ 0x7a, 0x48, 0xc9, 0x4f, 0xa1, 0x0f, 0x64, 0xc0, 0x00, 0x43, 0x7a, 0x54,
+ 0xc9, 0x4f, 0xa1, 0x0f, 0x64, 0xb8, 0x00, 0x43, 0x7a, 0x60, 0xc9, 0x4f,
+ 0xa1, 0x0f, 0x64, 0xb0, 0x00, 0x43, 0x7a, 0x6c, 0x19, 0xc3, 0x7a, 0x78,
+ 0x0a, 0xc3, 0x7a, 0x80, 0xc2, 0x01, 0x04, 0x01, 0x9f, 0x48, 0xc3, 0x08,
+ 0xde, 0x01, 0x9f, 0x1b, 0x03, 0x7a, 0x8c, 0x0b, 0x43, 0x7a, 0x92, 0xc2,
+ 0x22, 0x45, 0x01, 0x9f, 0x2b, 0x03, 0x7a, 0x9e, 0xc4, 0x15, 0xa7, 0x01,
+ 0x9f, 0x32, 0x03, 0x7a, 0xa4, 0xc4, 0x00, 0x48, 0x01, 0x9f, 0x3b, 0x03,
+ 0x7a, 0xaa, 0xc5, 0x66, 0x81, 0x01, 0x9f, 0x50, 0xc4, 0x12, 0xf2, 0x01,
+ 0x9f, 0x90, 0x91, 0x01, 0x9a, 0xd1, 0x07, 0x43, 0x7a, 0xb0, 0xc3, 0x05,
+ 0xdf, 0x01, 0x9a, 0xd9, 0xc6, 0x55, 0xbb, 0x01, 0x9b, 0x28, 0xc4, 0x12,
+ 0xf2, 0x01, 0x9b, 0x30, 0xc2, 0x00, 0x29, 0x01, 0x9a, 0xe9, 0xc5, 0x12,
+ 0xf1, 0x01, 0x9b, 0x38, 0xc4, 0x12, 0xf2, 0x01, 0x9b, 0x40, 0xc4, 0x12,
+ 0xf2, 0x01, 0x9b, 0x48, 0xc3, 0x03, 0x2c, 0x01, 0x9b, 0x50, 0x49, 0x29,
+ 0x20, 0x43, 0x7a, 0xbc, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0x49, 0xdb, 0x15,
+ 0x9a, 0x0f, 0xd1, 0x98, 0xc7, 0x09, 0xb4, 0x01, 0x34, 0x31, 0xc8, 0x3b,
+ 0x74, 0x01, 0x4f, 0x60, 0xce, 0x3c, 0x9e, 0x01, 0x2f, 0xb9, 0xcd, 0x0b,
+ 0x14, 0x01, 0x2f, 0xa0, 0xce, 0x3c, 0x9e, 0x01, 0x2f, 0xb1, 0xcd, 0x0b,
+ 0x14, 0x01, 0x2f, 0xa8, 0xce, 0x62, 0xd1, 0x01, 0x3f, 0x29, 0xce, 0x12,
+ 0x80, 0x01, 0x2d, 0x10, 0xcd, 0x6f, 0xc3, 0x01, 0x3f, 0x21, 0x45, 0x00,
+ 0xc8, 0x43, 0x7a, 0xd2, 0xce, 0x3c, 0x9e, 0x01, 0x2f, 0x99, 0xcd, 0x0b,
+ 0x14, 0x01, 0x2f, 0x80, 0x00, 0x43, 0x7a, 0xde, 0xc3, 0x3a, 0xc5, 0x00,
+ 0xcf, 0xc9, 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x48, 0xc3, 0x3a, 0xc5, 0x00,
+ 0xcf, 0xc1, 0xc4, 0xbc, 0x79, 0x00, 0xcf, 0x40, 0xc3, 0xe4, 0xb3, 0x00,
+ 0xbf, 0xc9, 0xc2, 0x05, 0x1b, 0x00, 0xbf, 0xc0, 0xc3, 0xea, 0xce, 0x0d,
+ 0x7f, 0xf1, 0xc3, 0x83, 0xe8, 0x0d, 0x7f, 0xe9, 0xc3, 0x3b, 0x0b, 0x0d,
+ 0x7f, 0xd9, 0xc3, 0x82, 0xe0, 0x0d, 0x7f, 0xd1, 0xc3, 0x82, 0xec, 0x0d,
+ 0x7f, 0xc9, 0xc3, 0x82, 0xa4, 0x0d, 0x7f, 0xc1, 0xc3, 0x83, 0x28, 0x0d,
+ 0x7f, 0xb8, 0xc3, 0xea, 0xce, 0x0d, 0x7f, 0xb1, 0xc3, 0x83, 0xe8, 0x0d,
+ 0x7f, 0xa9, 0xc3, 0x3b, 0x0b, 0x0d, 0x7f, 0x99, 0xc3, 0x82, 0xe0, 0x0d,
+ 0x7f, 0x91, 0xc3, 0x82, 0xec, 0x0d, 0x7f, 0x89, 0xc3, 0x82, 0xa4, 0x0d,
+ 0x7f, 0x80, 0x94, 0x00, 0x67, 0x00, 0x8e, 0x00, 0x67, 0x08, 0xc5, 0xde,
+ 0x46, 0x01, 0x79, 0xa1, 0xc4, 0xc3, 0x5c, 0x01, 0x7b, 0x40, 0xc5, 0x94,
+ 0x83, 0x01, 0x79, 0x99, 0xca, 0xa5, 0xec, 0x01, 0x7d, 0x58, 0xc4, 0x2b,
+ 0x0a, 0x01, 0x7c, 0x48, 0xc4, 0x01, 0xcb, 0x01, 0x79, 0x69, 0x86, 0x01,
+ 0x7d, 0x48, 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0xa8, 0xc9, 0x4f, 0xa1, 0x08,
+ 0x4f, 0xa0, 0xc7, 0x0d, 0x7f, 0x08, 0x4e, 0xc3, 0x03, 0x7a, 0xea, 0xc8,
+ 0x4f, 0xa2, 0x08, 0x4f, 0x08, 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0x50, 0xc7,
+ 0x0d, 0x7f, 0x08, 0x4e, 0xbb, 0x03, 0x7a, 0xf0, 0xc8, 0x4f, 0xa2, 0x08,
+ 0x4f, 0x00, 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0x48, 0x00, 0x43, 0x7a, 0xf6,
+ 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0x40, 0x00, 0x43, 0x7b, 0x05, 0xc9, 0x4f,
+ 0xa1, 0x08, 0x4f, 0x38, 0x00, 0x43, 0x7b, 0x14, 0xc9, 0x4f, 0xa1, 0x08,
+ 0x4f, 0x30, 0x00, 0x43, 0x7b, 0x23, 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0x28,
+ 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0x68, 0xc4, 0x00, 0x68, 0x01, 0x4d, 0x79,
+ 0xc2, 0x02, 0x6a, 0x01, 0x4d, 0x68, 0xc4, 0x00, 0x68, 0x01, 0x4d, 0x71,
+ 0xc2, 0x02, 0x6a, 0x01, 0x4d, 0x60, 0xc4, 0x00, 0xcd, 0x01, 0x4d, 0x59,
+ 0xc5, 0x00, 0x47, 0x01, 0x4d, 0x50, 0xc4, 0x00, 0xcd, 0x01, 0x4d, 0x49,
+ 0xc5, 0x00, 0x47, 0x01, 0x4d, 0x40, 0x45, 0x00, 0x3f, 0xc3, 0x7b, 0x32,
+ 0xc8, 0x84, 0x4c, 0x05, 0x34, 0xe8, 0x48, 0x11, 0xae, 0x43, 0x7b, 0x8c,
+ 0x97, 0x00, 0xe9, 0xe8, 0xcc, 0x26, 0x18, 0x05, 0x3f, 0xc0, 0xc7, 0xce,
+ 0x6e, 0x00, 0xe9, 0x78, 0x87, 0x00, 0xe9, 0x68, 0xc4, 0x2b, 0x0a, 0x05,
+ 0x38, 0x01, 0xc5, 0xe3, 0xc3, 0x05, 0x38, 0x11, 0xc2, 0x00, 0x93, 0x05,
+ 0x38, 0x21, 0xc2, 0x04, 0x34, 0x05, 0x38, 0x30, 0xc4, 0x2b, 0x0a, 0x05,
+ 0x38, 0x09, 0xc5, 0xe3, 0xc3, 0x05, 0x38, 0x19, 0xc2, 0x00, 0x93, 0x05,
+ 0x38, 0x29, 0xc2, 0x04, 0x34, 0x05, 0x38, 0x38, 0xc4, 0x8b, 0xed, 0x00,
+ 0xed, 0xf9, 0x46, 0x41, 0xe6, 0xc3, 0x7b, 0x94, 0x46, 0x01, 0xab, 0xc3,
+ 0x7b, 0xc6, 0xc9, 0xaf, 0x79, 0x00, 0xea, 0xa1, 0xd3, 0x43, 0xe6, 0x08,
+ 0x3d, 0x59, 0xc9, 0xab, 0x2f, 0x08, 0x3d, 0x63, 0x03, 0x7b, 0xd2, 0xcb,
+ 0x96, 0xfb, 0x08, 0x3d, 0x70, 0xc2, 0x23, 0x6a, 0x00, 0xed, 0xf1, 0xc2,
+ 0x01, 0x0d, 0x00, 0xed, 0xa1, 0xc2, 0x01, 0x47, 0x00, 0xec, 0xf1, 0xc2,
+ 0x00, 0x56, 0x00, 0xea, 0x88, 0x46, 0x01, 0xab, 0x43, 0x7b, 0xd8, 0x46,
+ 0x01, 0xab, 0x43, 0x7b, 0xe4, 0x47, 0x01, 0xf8, 0xc3, 0x7b, 0xf0, 0xca,
+ 0x43, 0xef, 0x00, 0xec, 0xe9, 0xc2, 0x00, 0x0a, 0x00, 0xeb, 0x09, 0x46,
+ 0x18, 0xf0, 0x43, 0x7c, 0x29, 0xc6, 0x12, 0x65, 0x00, 0xed, 0xb9, 0x00,
+ 0x43, 0x7c, 0x35, 0x46, 0x01, 0xab, 0xc3, 0x7c, 0x41, 0x05, 0xc3, 0x7c,
+ 0x4d, 0xc9, 0xb2, 0x7f, 0x00, 0xea, 0xc8, 0xc2, 0x00, 0x0a, 0x00, 0xed,
+ 0x90, 0xc7, 0xce, 0x67, 0x00, 0xed, 0x89, 0xc3, 0x0b, 0x47, 0x00, 0xea,
+ 0xe9, 0xcc, 0x8b, 0x14, 0x00, 0xea, 0xa9, 0xca, 0x1e, 0x1b, 0x08, 0x3c,
+ 0x28, 0xce, 0x04, 0x59, 0x00, 0xed, 0x79, 0xc9, 0x4d, 0x9d, 0x00, 0xed,
+ 0x70, 0xca, 0x1e, 0x1b, 0x00, 0xed, 0x60, 0x46, 0x01, 0xab, 0xc3, 0x7c,
+ 0x59, 0xca, 0xa0, 0xba, 0x05, 0x3f, 0xc9, 0xc9, 0xab, 0x2f, 0x08, 0x3c,
+ 0xc9, 0xc9, 0xb5, 0x7c, 0x08, 0x3c, 0xd1, 0xc3, 0xed, 0x1d, 0x08, 0x3c,
+ 0xf2, 0x03, 0x7c, 0x7a, 0xd2, 0x4b, 0x42, 0x00, 0xed, 0x40, 0xc3, 0x00,
+ 0xf2, 0x00, 0xed, 0x29, 0xcc, 0x26, 0x18, 0x00, 0xed, 0x20, 0xd4, 0x3c,
+ 0xb2, 0x00, 0xed, 0x0b, 0x03, 0x7c, 0x80, 0x07, 0xc3, 0x7c, 0x86, 0x46,
+ 0x01, 0xab, 0xc3, 0x7c, 0x92, 0xc9, 0xb5, 0x7c, 0x08, 0x3c, 0x3a, 0x03,
+ 0x7c, 0xa1, 0xcb, 0x90, 0x3c, 0x08, 0x3c, 0x80, 0x48, 0x11, 0xae, 0xc3,
+ 0x7c, 0xa7, 0xc8, 0xb8, 0x73, 0x08, 0x3c, 0x89, 0x46, 0x01, 0xab, 0x43,
+ 0x7c, 0xb7, 0x45, 0x27, 0x6c, 0xc3, 0x7c, 0xc3, 0xc4, 0x33, 0x51, 0x00,
+ 0x17, 0x01, 0xca, 0x1e, 0x1b, 0x08, 0x3c, 0x98, 0xc2, 0x01, 0x5b, 0x00,
+ 0xea, 0xe1, 0xc4, 0xdd, 0x2f, 0x00, 0xea, 0x29, 0x87, 0x08, 0x3c, 0x18,
+ 0x44, 0x03, 0xf6, 0xc3, 0x7c, 0xcf, 0xcc, 0x26, 0x18, 0x08, 0x3d, 0x10,
+ 0xc3, 0x0b, 0xa3, 0x05, 0x5a, 0xe3, 0x03, 0x7c, 0xd7, 0x46, 0x01, 0xab,
+ 0x43, 0x7c, 0xdd, 0xcc, 0x26, 0x0c, 0x00, 0x16, 0x0b, 0x03, 0x7c, 0xe9,
+ 0xc5, 0x00, 0xea, 0x00, 0x15, 0xe8, 0xe0, 0x04, 0x47, 0x08, 0x3d, 0xc8,
+ 0xcd, 0x36, 0x6d, 0x00, 0x16, 0x61, 0xc6, 0x61, 0xbc, 0x00, 0x16, 0x69,
+ 0xcc, 0x1e, 0x64, 0x00, 0x16, 0x71, 0xcc, 0x87, 0x84, 0x00, 0x16, 0x79,
+ 0x42, 0x00, 0x68, 0xc3, 0x7c, 0xef, 0x43, 0x00, 0xcd, 0xc3, 0x7c, 0xfb,
+ 0xd9, 0x1d, 0xf3, 0x05, 0x38, 0xf9, 0x16, 0xc3, 0x7d, 0x0a, 0xcc, 0x4b,
+ 0xd2, 0x00, 0x17, 0x81, 0x42, 0x00, 0x47, 0xc3, 0x7d, 0x16, 0xd1, 0x07,
+ 0x96, 0x05, 0x3c, 0x40, 0xc5, 0x16, 0x94, 0x00, 0x15, 0xd1, 0xca, 0x2d,
+ 0x3b, 0x00, 0x17, 0x70, 0xc9, 0x00, 0x9e, 0x00, 0x16, 0x29, 0xc4, 0x32,
+ 0x64, 0x00, 0x16, 0xa8, 0xcc, 0x02, 0x5b, 0x05, 0x38, 0xb9, 0xc5, 0x02,
+ 0xe2, 0x05, 0x38, 0xc1, 0xce, 0x0f, 0x0e, 0x05, 0x38, 0xc8, 0x00, 0xc3,
+ 0x7d, 0x22, 0x44, 0x01, 0x8f, 0x43, 0x7d, 0x34, 0x47, 0x19, 0x80, 0xc3,
+ 0x7d, 0x40, 0xd2, 0x4a, 0xe8, 0x05, 0x38, 0x91, 0xc8, 0x4a, 0xf2, 0x00,
+ 0x17, 0x28, 0x47, 0x19, 0x80, 0xc3, 0x7d, 0x4c, 0xd2, 0x4a, 0xe8, 0x05,
+ 0x38, 0xb1, 0xc8, 0x4a, 0xf2, 0x00, 0x17, 0x48, 0xc8, 0x4a, 0xf2, 0x05,
+ 0x38, 0x49, 0xd2, 0x4a, 0xe8, 0x05, 0x38, 0x70, 0x8e, 0x08, 0xb0, 0x48,
+ 0x94, 0x08, 0xb0, 0x38, 0x83, 0x00, 0xc5, 0x29, 0xc2, 0x01, 0x0e, 0x00,
+ 0xc5, 0x20, 0xc2, 0x1a, 0x36, 0x00, 0xc5, 0x19, 0x83, 0x00, 0xc4, 0xe0,
+ 0xc2, 0x01, 0x0e, 0x00, 0xc5, 0x09, 0xc3, 0x45, 0xca, 0x00, 0xc4, 0xf8,
+ 0x83, 0x00, 0xc5, 0x01, 0xc2, 0x00, 0x44, 0x00, 0xc4, 0xf0, 0xc5, 0xde,
+ 0x50, 0x00, 0xc5, 0x49, 0xc4, 0xe5, 0xdb, 0x00, 0xc4, 0x10, 0xc2, 0x01,
+ 0x0e, 0x00, 0xc4, 0x69, 0x83, 0x00, 0xc4, 0x60, 0xc3, 0xb0, 0x39, 0x00,
+ 0xc4, 0xc9, 0xc2, 0x00, 0x44, 0x00, 0xc4, 0xc0, 0xc3, 0x11, 0xb7, 0x0e,
+ 0xb6, 0xd1, 0xc5, 0xdc, 0x84, 0x0e, 0xb6, 0x80, 0xc3, 0x11, 0xb7, 0x0e,
+ 0xba, 0x71, 0xc5, 0xdc, 0x84, 0x0e, 0xba, 0x20, 0xc3, 0x11, 0xb7, 0x0e,
+ 0xb9, 0xa1, 0xc5, 0xdc, 0x84, 0x0e, 0xb9, 0x50, 0xc7, 0x01, 0xb0, 0x0e,
+ 0xb9, 0x68, 0xc4, 0x15, 0xa7, 0x0e, 0xbf, 0x99, 0xc2, 0x22, 0x45, 0x0e,
+ 0xbf, 0x90, 0xc3, 0x0d, 0x8f, 0x0e, 0xbf, 0x89, 0xc3, 0x08, 0xde, 0x0e,
+ 0xbf, 0x80, 0xc4, 0x05, 0xde, 0x0e, 0xbf, 0x79, 0xc2, 0x0a, 0x20, 0x0e,
+ 0xbf, 0x70, 0xc8, 0x9d, 0xb0, 0x0e, 0xbe, 0x49, 0xc9, 0xad, 0x9c, 0x0e,
+ 0xbe, 0x39, 0xd3, 0x41, 0x4d, 0x0e, 0xbe, 0x18, 0x91, 0x0e, 0xb3, 0x23,
+ 0x03, 0x7d, 0x58, 0x92, 0x0e, 0xb3, 0x2b, 0x03, 0x7d, 0x5c, 0x85, 0x0e,
+ 0xb2, 0xc3, 0x03, 0x7d, 0x6c, 0x97, 0x0e, 0xb3, 0x53, 0x03, 0x7d, 0x72,
+ 0x96, 0x0e, 0xb3, 0x4b, 0x03, 0x7d, 0x78, 0x95, 0x0e, 0xb3, 0x43, 0x03,
+ 0x7d, 0x84, 0x88, 0x0e, 0xb2, 0xdb, 0x03, 0x7d, 0x8a, 0x94, 0x0e, 0xb3,
+ 0x3b, 0x03, 0x7d, 0x90, 0x9a, 0x0e, 0xb3, 0x6b, 0x03, 0x7d, 0x96, 0x90,
+ 0x0e, 0xb3, 0x1b, 0x03, 0x7d, 0x9a, 0x8f, 0x0e, 0xb3, 0x13, 0x03, 0x7d,
+ 0x9e, 0x8e, 0x0e, 0xb3, 0x0b, 0x03, 0x7d, 0xa2, 0x8d, 0x0e, 0xb3, 0x03,
+ 0x03, 0x7d, 0xa8, 0x8b, 0x0e, 0xb2, 0xf3, 0x03, 0x7d, 0xae, 0x87, 0x0e,
+ 0xb2, 0xd3, 0x03, 0x7d, 0xb4, 0x9c, 0x0e, 0xb3, 0x7b, 0x03, 0x7d, 0xc0,
+ 0x86, 0x0e, 0xb2, 0xcb, 0x03, 0x7d, 0xc6, 0x89, 0x0e, 0xb2, 0xe3, 0x03,
+ 0x7d, 0xcc, 0x84, 0x0e, 0xb2, 0xbb, 0x03, 0x7d, 0xd2, 0x83, 0x0e, 0xb2,
+ 0xb3, 0x03, 0x7d, 0xd8, 0x9b, 0x0e, 0xb3, 0x71, 0x99, 0x0e, 0xb3, 0x61,
+ 0x98, 0x0e, 0xb3, 0x59, 0x93, 0x0e, 0xb3, 0x31, 0x8c, 0x0e, 0xb2, 0xf9,
+ 0x8a, 0x0e, 0xb2, 0xe8, 0x91, 0x0e, 0xb2, 0x53, 0x03, 0x7d, 0xde, 0x92,
+ 0x0e, 0xb2, 0x5b, 0x03, 0x7d, 0xe2, 0x85, 0x0e, 0xb1, 0xf3, 0x03, 0x7d,
+ 0xf2, 0x97, 0x0e, 0xb2, 0x83, 0x03, 0x7d, 0xf8, 0x96, 0x0e, 0xb2, 0x7b,
+ 0x03, 0x7d, 0xfe, 0x95, 0x0e, 0xb2, 0x73, 0x03, 0x7e, 0x0d, 0x94, 0x0e,
+ 0xb2, 0x6b, 0x03, 0x7e, 0x13, 0x9a, 0x0e, 0xb2, 0x9b, 0x03, 0x7e, 0x19,
+ 0x90, 0x0e, 0xb2, 0x4b, 0x03, 0x7e, 0x1d, 0x8f, 0x0e, 0xb2, 0x43, 0x03,
+ 0x7e, 0x21, 0x8e, 0x0e, 0xb2, 0x3b, 0x03, 0x7e, 0x25, 0x8d, 0x0e, 0xb2,
+ 0x33, 0x03, 0x7e, 0x2b, 0x8b, 0x0e, 0xb2, 0x23, 0x03, 0x7e, 0x31, 0x87,
+ 0x0e, 0xb2, 0x03, 0x03, 0x7e, 0x37, 0x9c, 0x0e, 0xb2, 0xab, 0x03, 0x7e,
+ 0x43, 0x86, 0x0e, 0xb1, 0xfb, 0x03, 0x7e, 0x49, 0x89, 0x0e, 0xb2, 0x13,
+ 0x03, 0x7e, 0x4f, 0x84, 0x0e, 0xb1, 0xeb, 0x03, 0x7e, 0x55, 0x83, 0x0e,
+ 0xb1, 0xe3, 0x03, 0x7e, 0x5b, 0x9b, 0x0e, 0xb2, 0xa1, 0x99, 0x0e, 0xb2,
+ 0x91, 0x98, 0x0e, 0xb2, 0x89, 0x93, 0x0e, 0xb2, 0x61, 0x8c, 0x0e, 0xb2,
+ 0x29, 0x8a, 0x0e, 0xb2, 0x19, 0x88, 0x0e, 0xb2, 0x08, 0x0f, 0x43, 0x7e,
+ 0x61, 0xc2, 0x02, 0x29, 0x0e, 0xbc, 0x39, 0xc2, 0x00, 0x0a, 0x0e, 0xbc,
+ 0x29, 0x8b, 0x0e, 0xbb, 0xf8, 0xc2, 0x00, 0x0a, 0x0e, 0xbc, 0x30, 0xc6,
+ 0x12, 0x65, 0x0e, 0xbc, 0x20, 0xc2, 0x20, 0xa8, 0x0e, 0xbc, 0x19, 0xc4,
+ 0x8b, 0xed, 0x0e, 0xbb, 0xb8, 0xc4, 0x19, 0x8f, 0x0e, 0xbc, 0x10, 0xca,
+ 0x94, 0x73, 0x0e, 0xbc, 0x08, 0xc2, 0x03, 0x76, 0x0e, 0xbc, 0x00, 0x8b,
+ 0x0e, 0xbb, 0xe8, 0x97, 0x0e, 0xbb, 0xe0, 0x97, 0x0e, 0xbb, 0xd8, 0xc4,
+ 0xdc, 0xdf, 0x0e, 0xbb, 0xd0, 0xc4, 0x8f, 0x29, 0x0e, 0xbb, 0xc8, 0xc3,
+ 0x00, 0xf2, 0x0e, 0xbb, 0xc0, 0xc2, 0x00, 0x44, 0x0e, 0xbb, 0xb1, 0xc6,
+ 0x12, 0x65, 0x0e, 0xbb, 0xa0, 0xc3, 0x0b, 0x47, 0x0e, 0xbb, 0xa8, 0xc4,
+ 0xde, 0x10, 0x0e, 0xbb, 0x98, 0xc4, 0x33, 0x51, 0x0e, 0xbb, 0x90, 0xc3,
+ 0x0b, 0x47, 0x0e, 0xbb, 0x88, 0xc4, 0xdd, 0x2f, 0x0e, 0xbb, 0x80, 0x0f,
+ 0x43, 0x7e, 0x6d, 0xc2, 0x02, 0x29, 0x0e, 0xbb, 0x69, 0xc2, 0x00, 0x0a,
+ 0x0e, 0xbb, 0x59, 0x8b, 0x0e, 0xbb, 0x28, 0xc2, 0x00, 0x0a, 0x0e, 0xbb,
+ 0x60, 0xc6, 0x12, 0x65, 0x0e, 0xbb, 0x50, 0xc2, 0x20, 0xa8, 0x0e, 0xbb,
+ 0x49, 0xc4, 0x8b, 0xed, 0x0e, 0xba, 0xea, 0x03, 0x7e, 0x79, 0xc4, 0x19,
+ 0x8f, 0x0e, 0xbb, 0x40, 0xc2, 0x03, 0x76, 0x0e, 0xbb, 0x30, 0x8b, 0x0e,
+ 0xbb, 0x18, 0x97, 0x0e, 0xbb, 0x10, 0x97, 0x0e, 0xbb, 0x08, 0xc4, 0xdc,
+ 0xdf, 0x0e, 0xbb, 0x00, 0xc4, 0x8f, 0x29, 0x0e, 0xba, 0xf8, 0xc3, 0x00,
+ 0xf2, 0x0e, 0xba, 0xf0, 0xc2, 0x00, 0x44, 0x0e, 0xba, 0xe1, 0xc6, 0x12,
+ 0x65, 0x0e, 0xba, 0xd0, 0xc3, 0x0b, 0x47, 0x0e, 0xba, 0xd8, 0xc4, 0xde,
+ 0x10, 0x0e, 0xba, 0xc8, 0xc4, 0x33, 0x51, 0x0e, 0xba, 0xc0, 0xc3, 0x0b,
+ 0x47, 0x0e, 0xba, 0xb8, 0xc4, 0xdd, 0x2f, 0x0e, 0xba, 0xb0, 0x8e, 0x00,
+ 0x6b, 0xf2, 0x03, 0x7e, 0x7f, 0x90, 0x00, 0x6b, 0xd0, 0x08, 0xc3, 0x7e,
+ 0x83, 0x07, 0xc3, 0x7e, 0x8f, 0x52, 0x4c, 0x86, 0xc3, 0x7e, 0x9b, 0xc9,
+ 0xb6, 0x78, 0x0e, 0x8f, 0x19, 0xca, 0xa8, 0x9e, 0x0e, 0x8f, 0x11, 0xcf,
+ 0x69, 0x06, 0x0e, 0x8f, 0x09, 0xc6, 0xd3, 0x34, 0x0e, 0x8e, 0xf0, 0xc7,
+ 0xcd, 0xcd, 0x0e, 0x8e, 0xd8, 0x84, 0x0e, 0x8e, 0x91, 0x49, 0x2c, 0xf5,
+ 0x43, 0x7e, 0xa7, 0x42, 0x00, 0xa9, 0xc3, 0x7e, 0xb3, 0xc3, 0x46, 0x90,
+ 0x0e, 0x88, 0x58, 0x42, 0x03, 0x00, 0xc3, 0x7e, 0xbf, 0xd2, 0x4e, 0xc6,
+ 0x0e, 0x88, 0xe1, 0xcc, 0x86, 0x10, 0x0e, 0x88, 0xd9, 0xcc, 0x86, 0x28,
+ 0x0e, 0x88, 0x00, 0x44, 0xa8, 0x6e, 0xc3, 0x7e, 0xcb, 0xcb, 0x8f, 0xad,
+ 0x0e, 0x88, 0x28, 0xcc, 0x85, 0xf8, 0x0e, 0x8e, 0xe9, 0x44, 0xa5, 0x12,
+ 0x43, 0x7e, 0xd7, 0xc7, 0xc6, 0xd4, 0x0e, 0x8e, 0xcb, 0x03, 0x7e, 0xe3,
+ 0xc5, 0xde, 0x5f, 0x0e, 0x8e, 0xa0, 0xca, 0xa4, 0xd4, 0x0e, 0x8e, 0xe0,
+ 0x5b, 0x16, 0x72, 0xc3, 0x7e, 0xe9, 0x59, 0x16, 0x74, 0x43, 0x7e, 0xf8,
+ 0x00, 0x43, 0x7f, 0x07, 0x45, 0x00, 0x54, 0x43, 0x7f, 0x13, 0x4c, 0x8c,
+ 0x94, 0xc3, 0x7f, 0x1f, 0xce, 0x71, 0x04, 0x0e, 0x88, 0xc0, 0x0b, 0xc3,
+ 0x7f, 0x2b, 0x4f, 0x6a, 0x23, 0x43, 0x7f, 0x37, 0xc3, 0xd8, 0x85, 0x0e,
+ 0x8e, 0x79, 0xc7, 0xb3, 0x62, 0x0e, 0x8c, 0x90, 0x0f, 0xc3, 0x7f, 0x43,
+ 0xc2, 0x0c, 0x25, 0x0e, 0x88, 0x60, 0x48, 0xb8, 0x5b, 0xc3, 0x7f, 0x4f,
+ 0x49, 0xb5, 0xc4, 0x43, 0x7f, 0x5b, 0xc4, 0x00, 0x68, 0x0e, 0x8d, 0x91,
+ 0xc2, 0x02, 0x6a, 0x0e, 0x8d, 0x88, 0x48, 0xbd, 0x4b, 0x43, 0x7f, 0x67,
+ 0x00, 0x43, 0x7f, 0x73, 0xc5, 0x04, 0x91, 0x0e, 0x8a, 0x99, 0xc5, 0x01,
+ 0x31, 0x0e, 0x8a, 0x90, 0xc5, 0x5c, 0x5a, 0x0e, 0x89, 0xd1, 0xd0, 0x5c,
+ 0x4f, 0x0e, 0x89, 0x48, 0x07, 0xc3, 0x7f, 0x7f, 0x42, 0x00, 0xda, 0x43,
+ 0x7f, 0x89, 0xc6, 0x31, 0x41, 0x0e, 0x8b, 0xc9, 0xc4, 0xe4, 0x73, 0x0e,
+ 0x8b, 0xb9, 0xc3, 0x1e, 0x52, 0x0e, 0x8b, 0xa9, 0xc4, 0xd3, 0x24, 0x0e,
+ 0x8b, 0x98, 0x00, 0x43, 0x7f, 0x93, 0xc5, 0x04, 0x91, 0x0e, 0x8e, 0x01,
+ 0xc5, 0x01, 0x31, 0x0e, 0x8d, 0xf8, 0xc3, 0x08, 0x1c, 0x0e, 0x8c, 0x89,
+ 0x17, 0x43, 0x7f, 0x9f, 0x10, 0xc3, 0x7f, 0xab, 0xcd, 0x7f, 0x10, 0x0e,
+ 0x88, 0xd0, 0xc4, 0x00, 0x68, 0x0e, 0x89, 0x69, 0xc2, 0x02, 0x6a, 0x0e,
+ 0x89, 0x60, 0x48, 0xbd, 0x4b, 0x43, 0x7f, 0xb7, 0xc6, 0x00, 0x33, 0x0e,
+ 0x88, 0x88, 0xc2, 0x16, 0x73, 0x0e, 0x8d, 0xa3, 0x03, 0x7f, 0xc3, 0xc5,
+ 0xd9, 0xc3, 0x0e, 0x88, 0x51, 0xc7, 0xcf, 0x40, 0x0e, 0x88, 0x49, 0xcc,
+ 0x85, 0xb0, 0x0e, 0x88, 0x20, 0xca, 0x9e, 0xda, 0x0e, 0x8d, 0x49, 0xc9,
+ 0xb3, 0x60, 0x0e, 0x8c, 0x98, 0xc4, 0x37, 0xd2, 0x0e, 0x89, 0x59, 0xc5,
+ 0xa8, 0x6c, 0x0e, 0x89, 0x51, 0xc7, 0x42, 0x89, 0x0e, 0x88, 0x08, 0xc3,
+ 0xed, 0x23, 0x0e, 0x89, 0x31, 0xc3, 0xed, 0x20, 0x0e, 0x89, 0x28, 0xc4,
+ 0x23, 0x1f, 0x0e, 0x8a, 0xe9, 0xc4, 0x2d, 0xbe, 0x0e, 0x89, 0xd8, 0xca,
+ 0xa5, 0x10, 0x0e, 0x8d, 0x81, 0xc4, 0x23, 0x1f, 0x0e, 0x8a, 0xf1, 0xc4,
+ 0x2d, 0xbe, 0x0e, 0x89, 0xe0, 0xc9, 0xb6, 0xae, 0x0e, 0x8d, 0x41, 0xc6,
+ 0x31, 0x41, 0x0e, 0x8b, 0xd1, 0xc4, 0xe4, 0x73, 0x0e, 0x8b, 0xc1, 0xc3,
+ 0x1e, 0x52, 0x0e, 0x8b, 0xb1, 0xc4, 0xd3, 0x24, 0x0e, 0x8b, 0xa0, 0xc4,
+ 0x23, 0x1f, 0x0e, 0x8b, 0x01, 0xc4, 0x2d, 0xbe, 0x0e, 0x89, 0xf0, 0xc4,
+ 0x00, 0x68, 0x0e, 0x89, 0x79, 0xc2, 0x02, 0x6a, 0x0e, 0x89, 0x70, 0xc3,
+ 0x83, 0x28, 0x0e, 0x8c, 0xdb, 0x03, 0x7f, 0xc9, 0xc3, 0xea, 0xda, 0x0e,
+ 0x8d, 0x19, 0xc3, 0xea, 0xce, 0x0e, 0x8d, 0x11, 0xc3, 0x83, 0xe8, 0x0e,
+ 0x8d, 0x09, 0xc3, 0x82, 0xb0, 0x0e, 0x8d, 0x01, 0xc3, 0x3b, 0x0b, 0x0e,
+ 0x8c, 0xf9, 0xc3, 0x82, 0xe0, 0x0e, 0x8c, 0xf1, 0xc3, 0x82, 0xec, 0x0e,
+ 0x8c, 0xe9, 0xc3, 0x82, 0xa4, 0x0e, 0x8c, 0xe0, 0x58, 0x25, 0x1c, 0xc3,
+ 0x7f, 0xd1, 0xcc, 0x71, 0x06, 0x0e, 0x88, 0xb0, 0xc5, 0xdb, 0xdf, 0x0e,
+ 0x89, 0xb9, 0xc4, 0xe6, 0x2b, 0x0e, 0x89, 0xb0, 0xc9, 0xb0, 0x75, 0x0e,
+ 0x8c, 0x61, 0xcf, 0x68, 0x25, 0x0e, 0x88, 0x38, 0x44, 0x68, 0x2b, 0xc3,
+ 0x7f, 0xdd, 0xd3, 0x42, 0x7d, 0x0e, 0x88, 0x18, 0xc4, 0x23, 0x1f, 0x0e,
+ 0x8a, 0xf9, 0xc4, 0x2d, 0xbe, 0x0e, 0x89, 0xe9, 0x45, 0x2a, 0xe3, 0x43,
+ 0x7f, 0xe9, 0xc5, 0xdb, 0xdf, 0x0e, 0x89, 0xc9, 0xc4, 0xe6, 0x2b, 0x0e,
+ 0x89, 0xc0, 0xc8, 0x00, 0x52, 0x01, 0x51, 0xd9, 0xcd, 0x81, 0xf5, 0x01,
+ 0x51, 0xb9, 0xd1, 0x55, 0x21, 0x01, 0x51, 0xa9, 0xd0, 0x5c, 0xcf, 0x01,
+ 0x51, 0xa0, 0xc8, 0x50, 0x0d, 0x01, 0x51, 0x89, 0xc9, 0x18, 0x19, 0x01,
+ 0x51, 0x80, 0xc2, 0x01, 0x0e, 0x05, 0x53, 0x49, 0x83, 0x05, 0x53, 0x40,
+ 0xc2, 0x01, 0x0e, 0x05, 0x4f, 0x71, 0x83, 0x05, 0x4f, 0x68, 0xc2, 0x01,
+ 0x0e, 0x05, 0x4f, 0x21, 0x83, 0x00, 0x83, 0xf8, 0xc2, 0x01, 0x01, 0x05,
+ 0x4f, 0x19, 0xc2, 0x1a, 0x36, 0x00, 0x83, 0xd1, 0x83, 0x00, 0x83, 0xe0,
+ 0x83, 0x00, 0x83, 0xa9, 0xc2, 0x01, 0x0e, 0x00, 0x83, 0xb0, 0x83, 0x00,
+ 0x83, 0xb9, 0xc2, 0x01, 0x0e, 0x05, 0x4f, 0x00, 0x83, 0x00, 0x83, 0xc1,
+ 0xc2, 0x01, 0x0e, 0x05, 0x4f, 0x08, 0x4b, 0x15, 0x9b, 0xc3, 0x7f, 0xf5,
+ 0xdc, 0x12, 0xe2, 0x0f, 0xd2, 0x38, 0xc9, 0x1e, 0x1c, 0x01, 0x49, 0x21,
+ 0xd4, 0x3d, 0x02, 0x01, 0x49, 0x41, 0x49, 0x0c, 0xa3, 0x43, 0x80, 0x01,
+ 0x43, 0x01, 0x89, 0xc3, 0x80, 0x0d, 0xc9, 0x1e, 0x1c, 0x01, 0x49, 0x19,
+ 0xd4, 0x3d, 0x52, 0x01, 0x49, 0x39, 0xd9, 0x20, 0x7d, 0x01, 0x49, 0x90,
+ 0xcc, 0x89, 0x34, 0x09, 0xa2, 0x60, 0xcf, 0x62, 0xc1, 0x09, 0xa2, 0x78,
+ 0x87, 0x0f, 0x3f, 0xc8, 0x87, 0x0f, 0x3f, 0xb0, 0x87, 0x0f, 0x3f, 0x88,
+ 0x87, 0x05, 0x59, 0x20, 0x83, 0x05, 0x59, 0x18, 0x83, 0x00, 0x96, 0x98,
+ 0x87, 0x00, 0x96, 0xa0, 0xc3, 0x11, 0xb7, 0x00, 0x1d, 0x4b, 0x03, 0x80,
+ 0x19, 0xc5, 0xdc, 0x84, 0x00, 0x1c, 0xfa, 0x03, 0x80, 0x1f, 0xcb, 0x98,
+ 0x71, 0x00, 0xff, 0x60, 0x46, 0x01, 0xab, 0x43, 0x80, 0x25, 0x46, 0x01,
+ 0xab, 0x43, 0x80, 0x3f, 0xc2, 0x00, 0x44, 0x00, 0x1c, 0xbb, 0x03, 0x80,
+ 0x62, 0xc6, 0x12, 0x65, 0x00, 0x1c, 0xaa, 0x03, 0x80, 0x68, 0xc4, 0xdd,
+ 0x2f, 0x00, 0x1c, 0x8b, 0x03, 0x80, 0x6e, 0xcc, 0x89, 0x1c, 0x00, 0x1b,
+ 0x90, 0xd1, 0x4f, 0xc0, 0x00, 0x1b, 0xb1, 0x8b, 0x00, 0x1d, 0x01, 0xc2,
+ 0x00, 0x0a, 0x00, 0x1d, 0x31, 0xc2, 0x02, 0x29, 0x00, 0x1d, 0x40, 0xc4,
+ 0x8b, 0xed, 0x00, 0x1c, 0xc1, 0xc2, 0x20, 0xa8, 0x00, 0x1d, 0x20, 0xc4,
+ 0x19, 0x8f, 0x00, 0x1d, 0x19, 0xc2, 0x03, 0x76, 0x00, 0x1f, 0xb9, 0xc2,
+ 0x00, 0x3a, 0x00, 0x1f, 0xd0, 0xc3, 0x11, 0xb7, 0x00, 0x1e, 0x4b, 0x03,
+ 0x80, 0x74, 0xc5, 0xdc, 0x84, 0x00, 0x1d, 0xfa, 0x03, 0x80, 0x7a, 0x46,
+ 0x01, 0xab, 0x43, 0x80, 0x80, 0x46, 0x01, 0xab, 0x43, 0x80, 0x9e, 0x46,
+ 0x01, 0xab, 0x43, 0x80, 0xaa, 0xc2, 0x00, 0x44, 0x00, 0x1d, 0xbb, 0x03,
+ 0x80, 0xc8, 0xc6, 0x12, 0x65, 0x00, 0x1d, 0xaa, 0x03, 0x80, 0xce, 0xc4,
+ 0xdd, 0x2f, 0x00, 0x1d, 0x8b, 0x03, 0x80, 0xd4, 0x47, 0x7a, 0xe7, 0x43,
+ 0x80, 0xda, 0xc4, 0xde, 0x10, 0x00, 0x1d, 0xa1, 0xc6, 0x4f, 0xcb, 0x00,
+ 0x1e, 0xe8, 0xc4, 0x8b, 0xed, 0x00, 0x1d, 0xc1, 0xc2, 0x20, 0xa8, 0x00,
+ 0x1e, 0x20, 0xc4, 0x8f, 0x29, 0x00, 0x1d, 0xd1, 0xc4, 0x7a, 0xef, 0x00,
+ 0x1e, 0xf8, 0x8b, 0x00, 0x1e, 0x01, 0xc2, 0x00, 0x0a, 0x00, 0x1e, 0x31,
+ 0xc2, 0x02, 0x29, 0x00, 0x1e, 0x41, 0xd1, 0x4f, 0xc0, 0x00, 0x1b, 0xb8,
+ 0xc4, 0x19, 0x8f, 0x00, 0x1e, 0x19, 0xc5, 0xde, 0xa5, 0x00, 0x1e, 0xd9,
+ 0xc2, 0x03, 0x76, 0x00, 0x1f, 0xc1, 0x03, 0x43, 0x80, 0xe6, 0x12, 0xc3,
+ 0x80, 0xf0, 0xc3, 0x7d, 0x39, 0x00, 0xe9, 0x49, 0xc5, 0xdc, 0xde, 0x00,
+ 0xe9, 0x39, 0xc5, 0x4f, 0xcc, 0x00, 0xe9, 0x31, 0xc5, 0xa2, 0x81, 0x05,
+ 0x5b, 0x28, 0xc7, 0x08, 0x19, 0x08, 0x0a, 0x01, 0x0a, 0xc3, 0x80, 0xfa,
+ 0xc7, 0x3d, 0x9a, 0x08, 0x0a, 0x11, 0x49, 0x4f, 0xa2, 0x43, 0x81, 0x06,
+ 0xc2, 0x00, 0x29, 0x08, 0x0a, 0x1b, 0x03, 0x81, 0x12, 0xc3, 0x41, 0xca,
+ 0x08, 0x0a, 0x22, 0x03, 0x81, 0x16, 0x16, 0xc3, 0x81, 0x1a, 0xc7, 0x63,
+ 0x9b, 0x08, 0x0a, 0x81, 0xc4, 0x41, 0xce, 0x08, 0x0a, 0xb8, 0xc3, 0x05,
+ 0x17, 0x08, 0x0a, 0xd1, 0xc3, 0x03, 0x01, 0x08, 0x0b, 0x11, 0xc5, 0x41,
+ 0xc8, 0x08, 0x0b, 0x40, 0xc3, 0x05, 0x17, 0x08, 0x0a, 0xcb, 0x03, 0x81,
+ 0x26, 0x16, 0xc3, 0x81, 0x2a, 0x42, 0x02, 0xf8, 0x43, 0x81, 0x3a, 0x42,
+ 0x02, 0xf8, 0xc3, 0x81, 0x46, 0xc3, 0x03, 0x01, 0x08, 0x0b, 0x02, 0x03,
+ 0x81, 0x58, 0xc9, 0x3d, 0x99, 0x08, 0x0a, 0xf0, 0xc5, 0x00, 0xcc, 0x01,
+ 0x54, 0x20, 0xc4, 0x0d, 0x89, 0x08, 0x79, 0x21, 0xc3, 0x05, 0xdf, 0x08,
+ 0x78, 0xf8, 0xc4, 0x15, 0xa9, 0x08, 0x79, 0x19, 0x91, 0x08, 0x78, 0xf0,
+ 0xc3, 0x5f, 0x3d, 0x08, 0x78, 0xdb, 0x03, 0x81, 0x5e, 0xc5, 0xe2, 0x60,
+ 0x08, 0x78, 0xb3, 0x03, 0x81, 0x64, 0xc3, 0x21, 0x00, 0x08, 0x78, 0x7b,
+ 0x03, 0x81, 0x6a, 0xc2, 0x00, 0x5b, 0x08, 0x78, 0x31, 0xc4, 0xe5, 0xaf,
+ 0x08, 0x78, 0x19, 0xc5, 0xa6, 0x5f, 0x08, 0x78, 0x08, 0xc3, 0x11, 0x40,
+ 0x08, 0x78, 0xc9, 0x03, 0x43, 0x81, 0x70, 0x0e, 0xc3, 0x81, 0x7c, 0xc3,
+ 0x18, 0x7a, 0x08, 0x78, 0x90, 0xc2, 0x01, 0x47, 0x08, 0x78, 0x48, 0xc3,
+ 0x1e, 0x54, 0x08, 0x53, 0xe1, 0xc2, 0x3c, 0xd1, 0x08, 0x53, 0xd8, 0xc4,
+ 0x43, 0x11, 0x08, 0x53, 0xc9, 0xc3, 0xe2, 0x62, 0x08, 0x53, 0x98, 0x96,
+ 0x08, 0x53, 0x51, 0xc3, 0xe2, 0x62, 0x08, 0x53, 0x71, 0xc4, 0xdd, 0x34,
+ 0x08, 0x53, 0x78, 0x89, 0x08, 0x61, 0x70, 0x48, 0xc2, 0x0b, 0x43, 0x81,
+ 0x88, 0x83, 0x08, 0x1d, 0x19, 0x97, 0x08, 0x1d, 0x20, 0x83, 0x08, 0x1d,
+ 0x29, 0x97, 0x08, 0x1d, 0x30, 0x83, 0x08, 0x1d, 0x39, 0xcb, 0x96, 0xb9,
+ 0x08, 0x1e, 0x58, 0x83, 0x08, 0x1d, 0x49, 0x8b, 0x08, 0x1d, 0x50, 0x83,
+ 0x08, 0x1d, 0x59, 0x97, 0x08, 0x1d, 0x61, 0xc2, 0x01, 0x0e, 0x08, 0x1d,
+ 0x80, 0x83, 0x08, 0x1d, 0x6b, 0x03, 0x81, 0x95, 0x8b, 0x08, 0x1d, 0x71,
+ 0x97, 0x08, 0x1d, 0x78, 0x83, 0x08, 0x1d, 0x93, 0x03, 0x81, 0x9b, 0xc6,
+ 0xd4, 0x36, 0x08, 0x1e, 0x78, 0x83, 0x08, 0x1d, 0xa1, 0x97, 0x08, 0x1d,
+ 0xa8, 0x83, 0x08, 0x1d, 0xb1, 0x8b, 0x08, 0x1d, 0xb9, 0x97, 0x08, 0x1d,
+ 0xc0, 0x83, 0x08, 0x1d, 0xd1, 0x8b, 0x08, 0x1d, 0xd8, 0x83, 0x08, 0x1d,
+ 0xe1, 0x97, 0x08, 0x1d, 0xe8, 0x83, 0x08, 0x1d, 0xf9, 0xc2, 0x01, 0x0e,
+ 0x08, 0x1e, 0x09, 0xc2, 0x0e, 0xe5, 0x08, 0x1e, 0x10, 0x19, 0xc3, 0x81,
+ 0xa1, 0xc2, 0x01, 0x04, 0x08, 0x1e, 0x98, 0x00, 0x43, 0x81, 0xab, 0xca,
+ 0xa4, 0xf2, 0x0e, 0x7d, 0x30, 0x46, 0x01, 0xab, 0x43, 0x81, 0xbd, 0xcc,
+ 0x88, 0xd4, 0x0e, 0x7c, 0xf8, 0x43, 0x90, 0x7d, 0x43, 0x81, 0xc9, 0xcb,
+ 0x9b, 0x89, 0x0e, 0x7c, 0x50, 0xc5, 0x00, 0x47, 0x0e, 0x78, 0xb1, 0xc4,
+ 0x00, 0xcd, 0x0e, 0x78, 0x50, 0x97, 0x00, 0xc7, 0x88, 0x91, 0x00, 0xc7,
+ 0x60, 0x91, 0x00, 0xc7, 0x58, 0xc5, 0x03, 0x7d, 0x00, 0xc7, 0xa9, 0xc5,
+ 0xe2, 0x8d, 0x00, 0xc7, 0x70, 0x87, 0x00, 0xb1, 0x58, 0x87, 0x00, 0xb2,
+ 0x58, 0x87, 0x00, 0xb0, 0xf8, 0x87, 0x00, 0xae, 0x38, 0x83, 0x00, 0xb3,
+ 0x61, 0x8b, 0x00, 0xb3, 0x59, 0x87, 0x00, 0xb3, 0x4b, 0x03, 0x81, 0xd5,
+ 0x91, 0x00, 0xb3, 0x41, 0x97, 0x00, 0xb3, 0x38, 0x87, 0x00, 0xaf, 0x28,
+ 0x87, 0x00, 0xb2, 0xf0, 0x87, 0x00, 0xae, 0xf8, 0x8b, 0x00, 0xb1, 0xc1,
+ 0x87, 0x00, 0xb1, 0xb3, 0x03, 0x81, 0xd9, 0x91, 0x00, 0xb1, 0xa9, 0x97,
+ 0x00, 0xb1, 0xa1, 0x83, 0x00, 0xb1, 0xc8, 0x87, 0x00, 0xb1, 0xe8, 0x87,
+ 0x00, 0xaf, 0xf0, 0x87, 0x00, 0xaf, 0xc0, 0x87, 0x00, 0xae, 0xc8, 0x87,
+ 0x00, 0xb1, 0x88, 0x87, 0x00, 0xb2, 0xb8, 0x83, 0x00, 0xc7, 0x10, 0x91,
+ 0x00, 0xc7, 0x08, 0x87, 0x00, 0xa6, 0xe9, 0x8b, 0x00, 0xa6, 0xfb, 0x03,
+ 0x81, 0xdd, 0x91, 0x00, 0xa7, 0x1b, 0x03, 0x81, 0xe1, 0x83, 0x00, 0xa7,
+ 0x3a, 0x03, 0x81, 0xe5, 0x8b, 0x00, 0xa2, 0xd3, 0x03, 0x81, 0xe9, 0x87,
+ 0x00, 0xa2, 0xc1, 0x91, 0x00, 0xa2, 0xf3, 0x03, 0x81, 0xed, 0x83, 0x00,
+ 0xa3, 0x12, 0x03, 0x81, 0xf1, 0x83, 0x00, 0xa9, 0xd3, 0x03, 0x81, 0xf5,
+ 0x91, 0x00, 0xa9, 0xb3, 0x03, 0x81, 0xf9, 0x8b, 0x00, 0xa9, 0x93, 0x03,
+ 0x81, 0xfd, 0x87, 0x00, 0xa9, 0x80, 0x83, 0x00, 0xa9, 0x13, 0x03, 0x82,
+ 0x01, 0x8b, 0x00, 0xa8, 0xd3, 0x03, 0x82, 0x05, 0x87, 0x00, 0xa8, 0xc1,
+ 0x91, 0x00, 0xa8, 0xf2, 0x03, 0x82, 0x09, 0x83, 0x00, 0xa8, 0x0b, 0x03,
+ 0x82, 0x0d, 0x87, 0x00, 0xa7, 0xb9, 0x8b, 0x00, 0xa7, 0xcb, 0x03, 0x82,
+ 0x11, 0x91, 0x00, 0xa7, 0xea, 0x03, 0x82, 0x15, 0x83, 0x00, 0xa2, 0x2b,
+ 0x03, 0x82, 0x19, 0x91, 0x00, 0xa2, 0x0b, 0x03, 0x82, 0x1d, 0x8b, 0x00,
+ 0xa1, 0xeb, 0x03, 0x82, 0x21, 0x87, 0x00, 0xa1, 0xd8, 0x91, 0x00, 0xa4,
+ 0xd8, 0x8b, 0x00, 0xa4, 0xb8, 0x83, 0x00, 0xa4, 0xf8, 0x83, 0x00, 0xa0,
+ 0xd0, 0x91, 0x00, 0xa0, 0xa8, 0x8b, 0x00, 0xa0, 0x88, 0x83, 0x00, 0xa4,
+ 0x08, 0x8b, 0x00, 0xa3, 0xc8, 0x91, 0x00, 0xa3, 0xe8, 0x87, 0x00, 0xa5,
+ 0x69, 0x8b, 0x00, 0xa5, 0x7b, 0x03, 0x82, 0x25, 0x91, 0x00, 0xa5, 0x9b,
+ 0x03, 0x82, 0x29, 0x83, 0x00, 0xa5, 0xba, 0x03, 0x82, 0x2d, 0x83, 0x00,
+ 0xa6, 0x70, 0x83, 0x00, 0xb3, 0xe3, 0x03, 0x82, 0x31, 0x91, 0x00, 0xb3,
+ 0xd3, 0x03, 0x82, 0x35, 0x8b, 0x00, 0xb3, 0xc3, 0x03, 0x82, 0x39, 0xc2,
+ 0x01, 0xe6, 0x00, 0xb3, 0xb8, 0xc3, 0x0d, 0x8f, 0x08, 0x9b, 0x59, 0xc3,
+ 0x08, 0xde, 0x08, 0x9b, 0x50, 0xc4, 0x05, 0xde, 0x08, 0x9b, 0x49, 0xc2,
+ 0x0a, 0x20, 0x08, 0x9b, 0x40, 0xc6, 0x00, 0x33, 0x00, 0x18, 0xb0, 0xc5,
+ 0x00, 0x34, 0x01, 0x07, 0x79, 0xc5, 0x03, 0x50, 0x01, 0x06, 0xb8, 0x03,
+ 0xc3, 0x82, 0x3d, 0xc5, 0x00, 0x34, 0x00, 0x1a, 0xa8, 0xc5, 0x00, 0x34,
+ 0x00, 0x19, 0xc9, 0xc5, 0x03, 0x50, 0x00, 0x1a, 0xb8, 0xc5, 0x00, 0x34,
+ 0x01, 0x07, 0x71, 0xc5, 0x03, 0x50, 0x01, 0x06, 0xb0, 0xc5, 0x03, 0x50,
+ 0x00, 0xef, 0xf1, 0xc5, 0x00, 0x34, 0x00, 0x1a, 0xa0, 0xc5, 0x03, 0x50,
+ 0x00, 0x18, 0x71, 0xc5, 0x00, 0x34, 0x00, 0x1a, 0x40, 0xc5, 0x00, 0x34,
+ 0x00, 0xd6, 0x51, 0xc5, 0x03, 0x50, 0x00, 0xd6, 0x48, 0xc9, 0x0f, 0xa9,
+ 0x07, 0xf1, 0x11, 0xca, 0x01, 0x17, 0x07, 0xf1, 0x18, 0xc4, 0x00, 0xcd,
+ 0x00, 0xef, 0xc1, 0xc5, 0x00, 0x47, 0x00, 0x1a, 0xc0, 0xc2, 0x05, 0x1b,
+ 0x01, 0x66, 0x29, 0xc3, 0x08, 0xea, 0x01, 0x66, 0xd8, 0xc3, 0x03, 0x77,
+ 0x01, 0x66, 0x69, 0x83, 0x01, 0x66, 0x7b, 0x03, 0x82, 0x49, 0xc2, 0x05,
+ 0x1b, 0x01, 0x66, 0x98, 0xc2, 0x03, 0xab, 0x01, 0x66, 0xf9, 0xc2, 0x18,
+ 0x7a, 0x01, 0x67, 0x08, 0xc2, 0x05, 0x1b, 0x01, 0x66, 0x21, 0xc3, 0x08,
+ 0xea, 0x01, 0x66, 0xd0, 0xc3, 0x03, 0x77, 0x01, 0x66, 0x61, 0x83, 0x01,
+ 0x66, 0x73, 0x03, 0x82, 0x4d, 0xc2, 0x05, 0x1b, 0x01, 0x66, 0x90, 0xc2,
+ 0x03, 0xab, 0x01, 0x66, 0xf1, 0xc2, 0x18, 0x7a, 0x01, 0x67, 0x00, 0xc8,
+ 0x0a, 0x1f, 0x0f, 0xc8, 0x09, 0xc9, 0x39, 0xbf, 0x0f, 0xc8, 0x00, 0x42,
+ 0x00, 0xe5, 0xc3, 0x82, 0x51, 0x16, 0xc3, 0x82, 0x5b, 0x08, 0xc3, 0x82,
+ 0x67, 0x15, 0xc3, 0x82, 0x73, 0xc5, 0x05, 0x1b, 0x01, 0x92, 0xc1, 0xc4,
+ 0x24, 0x35, 0x01, 0x92, 0xc8, 0x42, 0x00, 0xe5, 0xc3, 0x82, 0x7f, 0x16,
+ 0xc3, 0x82, 0x89, 0x08, 0xc3, 0x82, 0x95, 0x15, 0xc3, 0x82, 0xa1, 0xc5,
+ 0x05, 0x1b, 0x01, 0x95, 0x99, 0xc4, 0x24, 0x35, 0x01, 0x95, 0xa0, 0x42,
+ 0x00, 0xe5, 0xc3, 0x82, 0xad, 0x16, 0xc3, 0x82, 0xb7, 0x08, 0xc3, 0x82,
+ 0xc3, 0x15, 0xc3, 0x82, 0xcf, 0xc5, 0x05, 0x1b, 0x01, 0x95, 0xe9, 0xc4,
+ 0x24, 0x35, 0x01, 0x95, 0xf0, 0x96, 0x01, 0x95, 0x09, 0xc5, 0x56, 0xd6,
+ 0x01, 0x95, 0x70, 0xa0, 0x09, 0x2a, 0x01, 0x8f, 0x09, 0x1a, 0x30, 0x94,
+ 0x09, 0x19, 0xf9, 0xc7, 0x5c, 0x78, 0x09, 0x19, 0xf1, 0x8e, 0x09, 0x19,
+ 0xe8, 0x86, 0x09, 0x29, 0xe9, 0x9f, 0x09, 0x19, 0x8a, 0x03, 0x82, 0xdb,
+ 0x8e, 0x09, 0x19, 0x71, 0x46, 0x21, 0x5d, 0x43, 0x82, 0xe1, 0xd9, 0x1f,
+ 0x51, 0x09, 0x15, 0xe9, 0xd9, 0x1d, 0x5d, 0x09, 0x15, 0xe0, 0xc7, 0x21,
+ 0x5d, 0x09, 0x15, 0xb0, 0xc5, 0x3a, 0xa5, 0x09, 0x16, 0x68, 0xc4, 0x9b,
+ 0x38, 0x09, 0x16, 0x49, 0xc2, 0x00, 0x2f, 0x09, 0x16, 0x40, 0xc2, 0x3a,
+ 0x6c, 0x09, 0x29, 0x81, 0x84, 0x09, 0x15, 0x08, 0x0a, 0xc3, 0x82, 0xed,
+ 0xc2, 0x00, 0x2f, 0x09, 0x14, 0xf8, 0xc2, 0x01, 0x0d, 0x09, 0x15, 0x31,
+ 0x94, 0x09, 0x15, 0x29, 0x8f, 0x09, 0x15, 0x21, 0x84, 0x09, 0x15, 0x19,
+ 0x9f, 0x09, 0x15, 0x10, 0xc2, 0x00, 0xd3, 0x09, 0x14, 0xd9, 0xc2, 0x06,
+ 0x6e, 0x09, 0x14, 0xd0, 0x84, 0x09, 0x14, 0xc0, 0xc4, 0xe1, 0x61, 0x09,
+ 0x29, 0x61, 0xc7, 0x69, 0xf7, 0x09, 0x29, 0x59, 0xc2, 0x01, 0x0d, 0x09,
+ 0x12, 0xf9, 0xca, 0xa3, 0x12, 0x09, 0x12, 0xf0, 0xc3, 0x02, 0xe4, 0x09,
+ 0x29, 0x41, 0xd0, 0x5d, 0x5f, 0x09, 0x12, 0xb8, 0x17, 0xc3, 0x82, 0xf9,
+ 0x8b, 0x09, 0x1c, 0x92, 0x03, 0x83, 0x01, 0x47, 0x21, 0x5d, 0x43, 0x83,
+ 0x07, 0xc2, 0x0b, 0xfd, 0x09, 0x12, 0xc9, 0x87, 0x09, 0x12, 0xc0, 0xc2,
+ 0x01, 0x0d, 0x09, 0x12, 0xa3, 0x03, 0x83, 0x16, 0x90, 0x09, 0x12, 0x98,
+ 0xc2, 0x02, 0x69, 0x09, 0x13, 0xc8, 0xc2, 0x5a, 0xd1, 0x09, 0x13, 0xb9,
+ 0xc5, 0xe3, 0xa0, 0x09, 0x13, 0xb1, 0xc2, 0x03, 0x5f, 0x09, 0x13, 0xa9,
+ 0xc2, 0x00, 0x96, 0x09, 0x13, 0xa1, 0xc4, 0xe5, 0x97, 0x09, 0x13, 0x99,
+ 0xc8, 0x61, 0x7e, 0x09, 0x13, 0x91, 0xc3, 0x73, 0x7f, 0x09, 0x13, 0x89,
+ 0xc3, 0x84, 0x30, 0x09, 0x13, 0x81, 0xc2, 0x05, 0x57, 0x09, 0x13, 0x79,
+ 0xc6, 0xd8, 0x7a, 0x09, 0x13, 0x70, 0xd9, 0x20, 0xfa, 0x09, 0x13, 0x38,
+ 0xc3, 0x59, 0x15, 0x09, 0x29, 0x09, 0xc2, 0x07, 0x69, 0x09, 0x29, 0x01,
+ 0xc9, 0xb7, 0xe9, 0x09, 0x11, 0xb8, 0xc2, 0x05, 0x5c, 0x09, 0x1c, 0x69,
+ 0xc2, 0x01, 0x08, 0x09, 0x11, 0xe1, 0x83, 0x09, 0x11, 0xd2, 0x03, 0x83,
+ 0x1c, 0x16, 0xc3, 0x83, 0x22, 0xc3, 0x07, 0x44, 0x09, 0x28, 0xe3, 0x03,
+ 0x83, 0x2e, 0x0a, 0xc3, 0x83, 0x34, 0xc4, 0x07, 0xd9, 0x09, 0x28, 0xd1,
+ 0x15, 0xc3, 0x83, 0x40, 0xc4, 0x76, 0x32, 0x09, 0x10, 0x03, 0x03, 0x83,
+ 0x4a, 0x10, 0xc3, 0x83, 0x4e, 0xca, 0xa8, 0x62, 0x09, 0x10, 0x59, 0x42,
+ 0x00, 0x96, 0xc3, 0x83, 0x56, 0x0d, 0xc3, 0x83, 0x62, 0xc2, 0x07, 0x6e,
+ 0x09, 0x10, 0x21, 0xc9, 0x5c, 0x76, 0x09, 0x10, 0x11, 0xc3, 0x64, 0x5f,
+ 0x09, 0x0f, 0xf9, 0xc2, 0x00, 0x2f, 0x09, 0x0f, 0xf0, 0xca, 0x90, 0xab,
+ 0x09, 0x1c, 0x48, 0xc4, 0xe1, 0x61, 0x09, 0x28, 0xb1, 0x86, 0x09, 0x28,
+ 0xa8, 0xc5, 0x3a, 0xa5, 0x09, 0x28, 0x88, 0xc4, 0xe1, 0x61, 0x09, 0x28,
+ 0x59, 0x86, 0x09, 0x28, 0x51, 0x9f, 0x09, 0x28, 0x48, 0x87, 0x09, 0x28,
+ 0x41, 0xc2, 0x00, 0x4d, 0x09, 0x28, 0x38, 0xca, 0xa9, 0xa2, 0x09, 0x27,
+ 0xb1, 0x49, 0x36, 0x97, 0xc3, 0x83, 0x6c, 0xc3, 0x03, 0xaa, 0x09, 0x27,
+ 0x99, 0xc2, 0x08, 0x0d, 0x09, 0x27, 0x90, 0xca, 0xa4, 0xa2, 0x09, 0x26,
+ 0xa1, 0x09, 0xc3, 0x83, 0x78, 0x97, 0x09, 0x0f, 0x2b, 0x03, 0x83, 0x8c,
+ 0x16, 0xc3, 0x83, 0xa2, 0x15, 0xc3, 0x83, 0xac, 0xc2, 0x03, 0x5f, 0x09,
+ 0x0e, 0xd9, 0x0f, 0xc3, 0x83, 0xb6, 0x0e, 0xc3, 0x83, 0xc3, 0x0d, 0xc3,
+ 0x83, 0xd6, 0x0b, 0xc3, 0x83, 0xe1, 0x0a, 0xc3, 0x83, 0xee, 0xc2, 0x01,
+ 0x04, 0x09, 0x0e, 0x19, 0xc3, 0x13, 0x63, 0x09, 0x0e, 0x11, 0x04, 0xc3,
+ 0x83, 0xfb, 0x83, 0x09, 0x0d, 0xca, 0x03, 0x84, 0x05, 0xd4, 0x3b, 0x4a,
+ 0x09, 0x0f, 0x80, 0xc9, 0xaa, 0x89, 0x09, 0x0f, 0x70, 0x8e, 0x09, 0x1c,
+ 0x28, 0x00, 0x43, 0x84, 0x19, 0xd1, 0x52, 0xdf, 0x09, 0x0b, 0x30, 0xc2,
+ 0x00, 0x98, 0x09, 0x0b, 0xb9, 0xc2, 0x03, 0xab, 0x09, 0x0b, 0xb1, 0xc2,
+ 0x07, 0x63, 0x09, 0x0b, 0xa8, 0xcf, 0x61, 0x77, 0x09, 0x08, 0xd0, 0x45,
+ 0x07, 0x6e, 0xc3, 0x84, 0x25, 0xc3, 0x5b, 0x6c, 0x09, 0x08, 0xa8, 0x0a,
+ 0xc3, 0x84, 0x37, 0xc2, 0x01, 0x0a, 0x09, 0x07, 0x41, 0x03, 0x43, 0x84,
+ 0x42, 0x87, 0x09, 0x26, 0x23, 0x03, 0x84, 0x4a, 0xc2, 0x0b, 0xfd, 0x09,
+ 0x07, 0x02, 0x03, 0x84, 0x50, 0xc3, 0x76, 0x92, 0x09, 0x26, 0x19, 0x8b,
+ 0x09, 0x06, 0xf9, 0xc9, 0xa8, 0x63, 0x09, 0x06, 0xf0, 0xc2, 0x49, 0xc7,
+ 0x09, 0x26, 0x11, 0x83, 0x09, 0x06, 0xea, 0x03, 0x84, 0x56, 0x17, 0xc3,
+ 0x84, 0x5d, 0xc2, 0x01, 0x8d, 0x09, 0x06, 0xd3, 0x03, 0x84, 0x69, 0x03,
+ 0x43, 0x84, 0x6f, 0x03, 0xc3, 0x84, 0x79, 0xc3, 0xe7, 0x9c, 0x09, 0x06,
+ 0xa9, 0xc9, 0xb1, 0x20, 0x09, 0x06, 0xa0, 0x83, 0x09, 0x25, 0xdb, 0x03,
+ 0x84, 0x86, 0x8b, 0x09, 0x06, 0x6a, 0x03, 0x84, 0x93, 0xc3, 0x17, 0x64,
+ 0x09, 0x25, 0xd1, 0x90, 0x09, 0x06, 0x4b, 0x03, 0x84, 0xa0, 0x8e, 0x09,
+ 0x06, 0x3a, 0x03, 0x84, 0xa6, 0x17, 0xc3, 0x84, 0xac, 0x8b, 0x09, 0x06,
+ 0x23, 0x03, 0x84, 0xb6, 0x83, 0x09, 0x06, 0x18, 0x03, 0xc3, 0x84, 0xbc,
+ 0xc2, 0x00, 0xd3, 0x09, 0x06, 0x0a, 0x03, 0x84, 0xcc, 0xc2, 0x01, 0x0d,
+ 0x09, 0x05, 0xeb, 0x03, 0x84, 0xd2, 0x90, 0x09, 0x05, 0xe3, 0x03, 0x84,
+ 0xd9, 0xd0, 0x5c, 0xff, 0x09, 0x05, 0xd9, 0x46, 0x21, 0x5d, 0x43, 0x84,
+ 0xdf, 0x86, 0x09, 0x07, 0x5a, 0x03, 0x84, 0xf1, 0xd3, 0x43, 0x87, 0x09,
+ 0x06, 0xb9, 0xc7, 0x61, 0x7f, 0x09, 0x06, 0xb0, 0xcb, 0x94, 0x88, 0x09,
+ 0x05, 0x80, 0xc8, 0x01, 0xe8, 0x09, 0x05, 0x68, 0xca, 0x94, 0x89, 0x09,
+ 0x05, 0x20, 0x8f, 0x09, 0x24, 0xfb, 0x03, 0x84, 0xf7, 0xc5, 0xdd, 0x60,
+ 0x09, 0x24, 0xf0, 0xc4, 0x5a, 0xcf, 0x09, 0x24, 0xe3, 0x03, 0x84, 0xfd,
+ 0x94, 0x09, 0x24, 0xd8, 0xc2, 0x01, 0x0d, 0x09, 0x24, 0xb1, 0xc7, 0xc4,
+ 0x96, 0x09, 0x24, 0xa8, 0xc8, 0x11, 0xd9, 0x09, 0x24, 0x78, 0x47, 0x5a,
+ 0xd2, 0xc3, 0x85, 0x03, 0xc2, 0x01, 0x0d, 0x09, 0x03, 0x68, 0x97, 0x09,
+ 0x03, 0x2b, 0x03, 0x85, 0x0f, 0x83, 0x09, 0x03, 0x20, 0xc8, 0x36, 0xa3,
+ 0x09, 0x03, 0x10, 0xc2, 0x03, 0xbd, 0x09, 0x02, 0xf9, 0x8b, 0x09, 0x02,
+ 0xeb, 0x03, 0x85, 0x19, 0x83, 0x09, 0x02, 0xda, 0x03, 0x85, 0x1f, 0x8b,
+ 0x09, 0x02, 0xd1, 0xc4, 0xdb, 0xd6, 0x09, 0x02, 0xc8, 0xc3, 0x00, 0xfa,
+ 0x09, 0x02, 0xc1, 0xca, 0x9c, 0x86, 0x09, 0x02, 0xb8, 0xdf, 0x0c, 0xa2,
+ 0x09, 0x01, 0xe8, 0xe0, 0x07, 0x27, 0x09, 0x01, 0xd8, 0xc2, 0x05, 0x5c,
+ 0x09, 0x14, 0x69, 0xc2, 0x03, 0xbd, 0x09, 0x14, 0x61, 0xc3, 0x47, 0x67,
+ 0x09, 0x14, 0x58, 0xca, 0xa3, 0xda, 0x00, 0x24, 0x58, 0xc3, 0xeb, 0x0a,
+ 0x00, 0x28, 0x39, 0xc2, 0x1c, 0x3e, 0x00, 0x28, 0x19, 0x87, 0x00, 0x28,
+ 0x08, 0xc9, 0x1e, 0x92, 0x00, 0x27, 0xd8, 0xc3, 0x2c, 0x4d, 0x05, 0x32,
+ 0x99, 0x83, 0x05, 0x32, 0xb9, 0xd1, 0x56, 0x31, 0x05, 0x32, 0xe9, 0x87,
+ 0x00, 0x23, 0x29, 0xca, 0x56, 0x38, 0x00, 0x23, 0x49, 0xc7, 0xcc, 0x53,
+ 0x00, 0x23, 0x68, 0x06, 0xc3, 0x85, 0x25, 0xc5, 0x1f, 0x9c, 0x00, 0x26,
+ 0x10, 0xc8, 0x21, 0xcc, 0x00, 0x25, 0xb9, 0xc8, 0x1e, 0x8a, 0x00, 0x27,
+ 0xa8, 0xc8, 0x1e, 0x8a, 0x00, 0x26, 0xe1, 0xc8, 0x21, 0xcc, 0x00, 0x24,
+ 0xb0, 0xc7, 0xc9, 0xd6, 0x00, 0x6d, 0x41, 0xc6, 0x93, 0xa6, 0x00, 0x6d,
+ 0x70, 0xc7, 0xc6, 0x17, 0x00, 0x6d, 0x51, 0xc6, 0x93, 0xa6, 0x00, 0x6d,
+ 0x80, 0xc9, 0x52, 0x92, 0x0e, 0xd3, 0x30, 0xc9, 0x52, 0x92, 0x0e, 0xd3,
+ 0x20, 0xcb, 0x53, 0x7e, 0x0e, 0xd1, 0x19, 0xc6, 0x02, 0x91, 0x0e, 0xd1,
+ 0x10, 0xcb, 0x53, 0x7e, 0x0e, 0xd1, 0x31, 0xc6, 0x02, 0x91, 0x0e, 0xd1,
+ 0x28, 0xc4, 0x0f, 0x54, 0x0e, 0xc8, 0x21, 0xc5, 0x0e, 0xcd, 0x0e, 0xc7,
+ 0xab, 0x03, 0x85, 0x31, 0xc5, 0x01, 0x62, 0x0e, 0xc0, 0x03, 0x03, 0x85,
+ 0x35, 0x47, 0x01, 0x8c, 0xc3, 0x85, 0x39, 0x45, 0x01, 0xbd, 0xc3, 0x85,
+ 0x5e, 0x47, 0x14, 0xca, 0xc3, 0x85, 0x8b, 0xdb, 0x15, 0xeb, 0x0e, 0xc2,
+ 0x50, 0x46, 0xd7, 0x7e, 0xc3, 0x85, 0xb3, 0x46, 0x0e, 0xcd, 0xc3, 0x85,
+ 0xc8, 0xc4, 0x0f, 0x54, 0x0e, 0xc2, 0xe3, 0x03, 0x85, 0xda, 0xd4, 0x39,
+ 0x2e, 0x0e, 0xc2, 0xd9, 0x08, 0x43, 0x85, 0xde, 0x00, 0x43, 0x85, 0xf0,
+ 0x00, 0x43, 0x86, 0x08, 0xc6, 0x14, 0xca, 0x0e, 0xc5, 0x99, 0xdd, 0x10,
+ 0xbf, 0x0e, 0xc5, 0x68, 0xc5, 0x01, 0x62, 0x0e, 0xc5, 0x1b, 0x03, 0x86,
+ 0x14, 0xc2, 0x02, 0x6a, 0x0e, 0xc4, 0xb0, 0xc5, 0x01, 0x62, 0x0e, 0xc0,
+ 0x23, 0x03, 0x86, 0x1d, 0xc6, 0x01, 0x8c, 0x0e, 0xc6, 0x2b, 0x03, 0x86,
+ 0x21, 0xc4, 0x01, 0xbd, 0x0e, 0xc5, 0x3b, 0x03, 0x86, 0x27, 0xc6, 0x14,
+ 0xca, 0x0e, 0xc4, 0x53, 0x03, 0x86, 0x2d, 0x46, 0x0e, 0xcd, 0xc3, 0x86,
+ 0x31, 0xc8, 0xc0, 0x0b, 0x0e, 0xc4, 0x11, 0xc4, 0x03, 0xf5, 0x0e, 0xc3,
+ 0xdb, 0x03, 0x86, 0x40, 0xc5, 0x00, 0x3e, 0x0e, 0xc3, 0xf1, 0x08, 0x43,
+ 0x86, 0x44, 0x47, 0x01, 0x8c, 0xc3, 0x86, 0x50, 0x52, 0x3a, 0xd2, 0xc3,
+ 0x86, 0x5f, 0xca, 0xa2, 0x36, 0x0e, 0xc5, 0xc9, 0xc8, 0xc2, 0xf3, 0x0e,
+ 0xc3, 0x50, 0x00, 0x43, 0x86, 0x71, 0x00, 0x43, 0x86, 0x9e, 0xde, 0x0e,
+ 0xc7, 0x0e, 0xc7, 0x49, 0xdc, 0x14, 0xbe, 0x0e, 0xc6, 0xb3, 0x03, 0x86,
+ 0xb0, 0x46, 0x0e, 0xcd, 0xc3, 0x86, 0xb6, 0xc8, 0xc0, 0x0b, 0x0e, 0xc3,
+ 0x41, 0xd6, 0x15, 0xeb, 0x0e, 0xc2, 0x48, 0x47, 0x01, 0x8c, 0xc3, 0x86,
+ 0xc2, 0xc5, 0x01, 0x62, 0x0e, 0xc0, 0x0b, 0x03, 0x86, 0xd1, 0xcb, 0x14,
+ 0xc5, 0x0e, 0xc5, 0x89, 0x47, 0x14, 0xca, 0x43, 0x86, 0xd5, 0xc7, 0x29,
+ 0xba, 0x0e, 0xc3, 0xd1, 0xc4, 0x05, 0xa6, 0x0e, 0xc3, 0xc0, 0xc4, 0x00,
+ 0x3f, 0x0e, 0xce, 0xe9, 0xc4, 0xa7, 0xae, 0x0e, 0xce, 0xe0, 0x46, 0x20,
+ 0xa1, 0xc3, 0x86, 0xe1, 0x48, 0xbf, 0x8b, 0x43, 0x86, 0xed, 0xc5, 0x15,
+ 0x2e, 0x0e, 0xcb, 0x3b, 0x03, 0x86, 0xf9, 0xc6, 0x04, 0x1b, 0x0e, 0xcb,
+ 0x31, 0xc5, 0x00, 0x3e, 0x0e, 0xcb, 0x28, 0x46, 0x15, 0x2e, 0xc3, 0x86,
+ 0xff, 0x46, 0x00, 0x3e, 0x43, 0x87, 0x0b, 0x46, 0x15, 0x2e, 0xc3, 0x87,
+ 0x17, 0x46, 0x00, 0x3e, 0x43, 0x87, 0x23, 0x47, 0x30, 0x47, 0xc3, 0x87,
+ 0x2f, 0xcc, 0x87, 0x54, 0x0e, 0xce, 0x49, 0xcc, 0x87, 0x0c, 0x0e, 0xce,
+ 0x40, 0x46, 0x15, 0x2e, 0xc3, 0x87, 0x3b, 0x46, 0x00, 0x3e, 0x43, 0x87,
+ 0x47, 0xc2, 0x00, 0x15, 0x0e, 0xce, 0xc0, 0x46, 0x20, 0xa1, 0xc3, 0x87,
+ 0x53, 0x48, 0xbf, 0x8b, 0x43, 0x87, 0x5f, 0xc5, 0x15, 0x2e, 0x0e, 0xcd,
+ 0xb1, 0xc6, 0x04, 0x1b, 0x0e, 0xcd, 0xa9, 0xc5, 0x00, 0x3e, 0x0e, 0xcd,
+ 0xa0, 0xc5, 0xdf, 0x86, 0x0e, 0xcd, 0x81, 0xca, 0x9f, 0xc0, 0x0e, 0xcd,
+ 0x48, 0x47, 0x30, 0x47, 0xc3, 0x87, 0x6b, 0x47, 0x02, 0x91, 0x43, 0x87,
+ 0x7d, 0x0a, 0xc3, 0x87, 0x8f, 0x42, 0x01, 0x47, 0xc3, 0x87, 0x9b, 0x48,
+ 0x15, 0x72, 0x43, 0x87, 0xa7, 0xc6, 0x04, 0x1b, 0x0e, 0xcd, 0x09, 0xc5,
+ 0x00, 0x3e, 0x0e, 0xcd, 0x00, 0xc5, 0x15, 0x2e, 0x0e, 0xc9, 0x63, 0x03,
+ 0x87, 0xbc, 0xc6, 0x04, 0x1b, 0x0e, 0xc9, 0x59, 0xc5, 0x00, 0x3e, 0x0e,
+ 0xc9, 0x50, 0xc2, 0x00, 0x15, 0x0e, 0xcb, 0x20, 0xc2, 0x00, 0x15, 0x0e,
+ 0xcb, 0x00, 0xc5, 0x00, 0x3e, 0x0e, 0xc9, 0x31, 0xc5, 0x15, 0x2e, 0x0e,
+ 0xc9, 0x28, 0xd0, 0x5f, 0x1f, 0x08, 0xae, 0x59, 0xd2, 0x4b, 0xf6, 0x08,
+ 0xae, 0x50, 0xc8, 0x0d, 0x7e, 0x01, 0x0b, 0xf0, 0xc5, 0x00, 0x62, 0x01,
+ 0x5b, 0x1b, 0x03, 0x87, 0xc2, 0xcc, 0x31, 0xdd, 0x01, 0x5a, 0x69, 0xcc,
+ 0x83, 0x04, 0x01, 0x5b, 0x69, 0xcd, 0x81, 0xce, 0x01, 0x5c, 0x38, 0x47,
+ 0x14, 0x16, 0xc3, 0x87, 0xc6, 0xc6, 0x12, 0x4f, 0x01, 0x4a, 0xc9, 0xc8,
+ 0xab, 0xed, 0x01, 0x4b, 0x08, 0xc8, 0xab, 0xed, 0x01, 0x4a, 0xe9, 0xc6,
+ 0x12, 0x4f, 0x01, 0x4a, 0xa8, 0xd8, 0x22, 0x64, 0x0f, 0xc0, 0x59, 0x46,
+ 0x00, 0x3e, 0xc3, 0x87, 0xd0, 0xcd, 0x80, 0x14, 0x01, 0x0e, 0xf9, 0xd0,
+ 0x59, 0xaf, 0x01, 0x0d, 0xa9, 0x44, 0x03, 0x9a, 0xc3, 0x87, 0xdc, 0xd1,
+ 0x03, 0x76, 0x01, 0x48, 0x41, 0xd9, 0x20, 0xc8, 0x0f, 0xc0, 0x39, 0xd5,
+ 0x00, 0x92, 0x0f, 0xc0, 0xb9, 0xcc, 0x8a, 0xb4, 0x0f, 0xc4, 0xd8, 0xc4,
+ 0x15, 0xa7, 0x01, 0x27, 0xd9, 0xc2, 0x22, 0x45, 0x01, 0x27, 0xd0, 0xc3,
+ 0x0d, 0x8f, 0x01, 0x27, 0xc9, 0xc3, 0x08, 0xde, 0x01, 0x27, 0xc0, 0xc4,
+ 0x05, 0xde, 0x01, 0x27, 0xb9, 0xc2, 0x0a, 0x20, 0x01, 0x27, 0xb0, 0xcf,
+ 0x02, 0x98, 0x01, 0x15, 0x59, 0xce, 0x34, 0x7c, 0x01, 0x57, 0x28, 0xca,
+ 0x9e, 0xa8, 0x01, 0x4c, 0x81, 0xcd, 0x7f, 0x85, 0x01, 0x4c, 0x70, 0x45,
+ 0x01, 0xac, 0xc3, 0x87, 0xe8, 0xd3, 0x44, 0xa4, 0x01, 0x4c, 0xe1, 0xc7,
+ 0x00, 0xd8, 0x01, 0x80, 0x4b, 0x03, 0x87, 0xf4, 0xd3, 0x1a, 0xbf, 0x01,
+ 0x70, 0x01, 0xda, 0x1a, 0xb8, 0x01, 0x70, 0x08, 0x00, 0x43, 0x87, 0xfa,
+ 0xcf, 0x2e, 0xd8, 0x01, 0x48, 0x01, 0xd6, 0x2e, 0xd1, 0x01, 0x48, 0x09,
+ 0x16, 0x43, 0x88, 0x0c, 0xc5, 0x00, 0x4c, 0x01, 0x0e, 0x09, 0x00, 0x43,
+ 0x88, 0x1b, 0xc5, 0x00, 0x4c, 0x01, 0x0e, 0x01, 0x00, 0x43, 0x88, 0x33,
+ 0xd7, 0x22, 0x65, 0x0f, 0xc0, 0x51, 0xc3, 0x7e, 0xb2, 0x01, 0x0d, 0x60,
+ 0x45, 0x00, 0x3f, 0xc3, 0x88, 0x45, 0xc5, 0x03, 0x82, 0x01, 0x0c, 0xd8,
+ 0xd4, 0x2e, 0xd3, 0x01, 0x0f, 0xd9, 0xc9, 0xac, 0x73, 0x01, 0x48, 0x88,
+ 0xc3, 0x13, 0xc8, 0x01, 0x0d, 0x1b, 0x03, 0x88, 0x51, 0x43, 0x03, 0x1e,
+ 0x43, 0x88, 0x57, 0xc2, 0x00, 0x4d, 0x01, 0x0f, 0x29, 0xcc, 0x55, 0x6a,
+ 0x01, 0x48, 0xf0, 0x9a, 0x01, 0x4a, 0x39, 0xcc, 0x06, 0xfb, 0x01, 0x5a,
+ 0x19, 0xc8, 0xbd, 0x0b, 0x01, 0x5a, 0x20, 0xcf, 0x66, 0x45, 0x01, 0x4b,
+ 0xa9, 0xce, 0x35, 0xda, 0x01, 0x4b, 0xa1, 0xd5, 0x34, 0x36, 0x01, 0x4a,
+ 0x11, 0x48, 0x6a, 0x14, 0x43, 0x88, 0x63, 0xe0, 0x05, 0x07, 0x0f, 0xdd,
+ 0xb0, 0x45, 0x01, 0xac, 0xc3, 0x88, 0x6f, 0xc8, 0xab, 0xed, 0x01, 0x48,
+ 0x38, 0xc2, 0x01, 0xcc, 0x00, 0x70, 0x11, 0xc3, 0x0c, 0x01, 0x00, 0x70,
+ 0x19, 0xc3, 0x48, 0x25, 0x00, 0x70, 0x21, 0xc2, 0x00, 0xe5, 0x00, 0x70,
+ 0x28, 0xc3, 0x91, 0x57, 0x00, 0x72, 0x19, 0xc4, 0xe5, 0xcb, 0x00, 0x72,
+ 0x20, 0x87, 0x00, 0x71, 0xb8, 0x03, 0xc3, 0x88, 0x7b, 0xc3, 0x3b, 0x04,
+ 0x00, 0x70, 0xb1, 0xc3, 0x0a, 0x68, 0x00, 0x70, 0xc0, 0xc3, 0x3b, 0x04,
+ 0x00, 0x70, 0xe1, 0xc2, 0x00, 0x3a, 0x00, 0x70, 0xf0, 0xc2, 0x03, 0x76,
+ 0x00, 0x72, 0x49, 0xc2, 0x00, 0x47, 0x00, 0x72, 0x50, 0xc5, 0xe2, 0xf1,
+ 0x00, 0x44, 0xd1, 0xc6, 0xd8, 0x9e, 0x00, 0x44, 0xc8, 0xc3, 0x33, 0x1c,
+ 0x00, 0x46, 0xe9, 0x8a, 0x00, 0x46, 0x60, 0xc6, 0xd2, 0x32, 0x00, 0x46,
+ 0xe1, 0xc7, 0xcd, 0x1e, 0x00, 0x46, 0xd9, 0xcb, 0x92, 0x20, 0x00, 0x46,
+ 0xd1, 0xc5, 0xdb, 0xb7, 0x00, 0x46, 0xa1, 0xc5, 0xdd, 0x01, 0x00, 0x44,
+ 0xc0, 0xca, 0xa2, 0xcc, 0x00, 0x30, 0xb1, 0xcc, 0x87, 0x60, 0x00, 0x30,
+ 0xb0, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0xb1, 0xcb, 0x10, 0x7a, 0x07, 0xe5,
+ 0x40, 0x44, 0x1a, 0x74, 0xc3, 0x88, 0x85, 0xce, 0x40, 0x48, 0x07, 0xed,
+ 0x29, 0xd7, 0x2a, 0xa7, 0x07, 0xed, 0x38, 0xcc, 0x05, 0x3b, 0x07, 0xe0,
+ 0xa9, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0x38, 0xd7, 0x2a, 0xa7, 0x07, 0xed,
+ 0x31, 0xce, 0x40, 0x48, 0x07, 0xed, 0xf0, 0xcc, 0x05, 0x3b, 0x07, 0xe0,
+ 0xc1, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0x50, 0xce, 0x40, 0x48, 0x07, 0xea,
+ 0xd1, 0xd7, 0x2a, 0xa7, 0x07, 0xea, 0xd8, 0xcc, 0x05, 0x3b, 0x07, 0xe0,
+ 0xb9, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0x48, 0xcc, 0x05, 0x3b, 0x07, 0xe2,
+ 0x91, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0xc0, 0xd1, 0x2f, 0xc8, 0x07, 0xec,
+ 0x99, 0xd1, 0x57, 0xda, 0x07, 0xec, 0xa0, 0xcd, 0x05, 0x3a, 0x07, 0xe7,
+ 0xf1, 0xca, 0x2a, 0xb4, 0x07, 0xe8, 0xd0, 0x43, 0x08, 0x86, 0xc3, 0x88,
+ 0x91, 0x43, 0x0a, 0x18, 0x43, 0x88, 0x9d, 0xcb, 0x6a, 0x72, 0x07, 0xe7,
+ 0x49, 0xca, 0x2a, 0xb4, 0x07, 0xe9, 0x41, 0x0b, 0xc3, 0x88, 0xb3, 0x45,
+ 0x01, 0xac, 0x43, 0x88, 0xbf, 0xca, 0x2a, 0xb4, 0x07, 0xe8, 0xc9, 0xcd,
+ 0x05, 0x3a, 0x07, 0xe7, 0xe8, 0xca, 0x2a, 0xb4, 0x07, 0xe9, 0x29, 0x0b,
+ 0xc3, 0x88, 0xcb, 0xd3, 0x40, 0x43, 0x07, 0xeb, 0x49, 0xcb, 0x6a, 0x72,
+ 0x07, 0xe9, 0xb8, 0xca, 0x2a, 0xb4, 0x07, 0xe9, 0x39, 0x0b, 0xc3, 0x88,
+ 0xd7, 0xcb, 0x6a, 0x72, 0x07, 0xe9, 0xc8, 0xca, 0x2a, 0xb4, 0x07, 0xe9,
+ 0x49, 0xcd, 0x05, 0x3a, 0x07, 0xe8, 0x68, 0x00, 0xc3, 0x88, 0xe3, 0xd1,
+ 0x52, 0x35, 0x07, 0xe2, 0xf8, 0x00, 0xc3, 0x88, 0xef, 0xd1, 0x52, 0x35,
+ 0x07, 0xe2, 0xf0, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x91, 0xcd, 0x05, 0x3a,
+ 0x07, 0xe3, 0x00, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0xf9, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe5, 0x80, 0x44, 0x1a, 0x74, 0xc3, 0x88, 0xfb, 0xd1, 0x2f, 0xc8,
+ 0x07, 0xeb, 0x09, 0x45, 0x1a, 0x6a, 0x43, 0x89, 0x07, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe0, 0xf1, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0x78, 0xd7, 0x2a, 0xa7,
+ 0x07, 0xed, 0x41, 0xce, 0x40, 0x48, 0x07, 0xee, 0x30, 0x0b, 0xc3, 0x89,
+ 0x13, 0xcb, 0x6a, 0x72, 0x07, 0xe9, 0xa9, 0xd6, 0x2f, 0xc3, 0x07, 0xea,
+ 0xe0, 0xcc, 0x10, 0x79, 0x07, 0xe9, 0x89, 0xcb, 0x6a, 0x72, 0x07, 0xe7,
+ 0x40, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0xe1, 0xcb, 0x10, 0x7a, 0x07, 0xe5,
+ 0x68, 0xd0, 0x53, 0x24, 0x07, 0xea, 0xe9, 0xd7, 0x2a, 0xa7, 0x07, 0xea,
+ 0xf0, 0x0b, 0xc3, 0x89, 0x1f, 0x4a, 0x75, 0x68, 0x43, 0x89, 0x2b, 0x0b,
+ 0xc3, 0x89, 0x37, 0x45, 0x01, 0xac, 0x43, 0x89, 0x43, 0xcd, 0x05, 0x3a,
+ 0x07, 0xe8, 0x79, 0xca, 0x2a, 0xb4, 0x07, 0xe9, 0x58, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe9, 0x09, 0xcd, 0x05, 0x3a, 0x07, 0xe8, 0x28, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe9, 0x11, 0xcd, 0x05, 0x3a, 0x07, 0xe8, 0x30, 0x43, 0x13, 0x58,
+ 0xc3, 0x89, 0x4f, 0x00, 0x43, 0x89, 0x59, 0xcd, 0x7a, 0x09, 0x07, 0xee,
+ 0x79, 0xcf, 0x30, 0xfe, 0x07, 0xef, 0xa8, 0xcc, 0x05, 0x3b, 0x07, 0xe1,
+ 0x51, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0xd8, 0xce, 0x40, 0x48, 0x07, 0xed,
+ 0xb1, 0x45, 0x1a, 0x6a, 0xc3, 0x89, 0x65, 0xd7, 0x2a, 0xa7, 0x07, 0xeb,
+ 0xc0, 0xcc, 0x05, 0x3b, 0x07, 0xe1, 0x49, 0xcb, 0x10, 0x7a, 0x07, 0xe5,
+ 0xd0, 0xca, 0x2a, 0xb4, 0x07, 0xeb, 0xa9, 0xcc, 0x10, 0x79, 0x07, 0xee,
+ 0x20, 0xcd, 0x05, 0x3a, 0x07, 0xe2, 0xe9, 0xca, 0x2a, 0xb4, 0x07, 0xe4,
+ 0x80, 0xca, 0x2a, 0xb4, 0x07, 0xe9, 0xe1, 0xcd, 0x05, 0x3a, 0x07, 0xe9,
+ 0xe8, 0x49, 0x85, 0x8e, 0xc3, 0x89, 0x71, 0x0f, 0x43, 0x89, 0x7b, 0xcd,
+ 0x05, 0x3a, 0x07, 0xe7, 0xb1, 0xca, 0x2a, 0xb4, 0x07, 0xe8, 0x90, 0xcd,
+ 0x05, 0x3a, 0x07, 0xe7, 0xa9, 0xca, 0x2a, 0xb4, 0x07, 0xe8, 0x88, 0x0b,
+ 0xc3, 0x89, 0x87, 0xcb, 0x6a, 0x72, 0x07, 0xe9, 0xd1, 0x45, 0x01, 0xac,
+ 0x43, 0x89, 0x93, 0xcc, 0x05, 0x3b, 0x07, 0xe1, 0x31, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe5, 0xc0, 0xca, 0x2a, 0xb4, 0x07, 0xe3, 0xd9, 0xcd, 0x05, 0x3a,
+ 0x07, 0xe0, 0xa0, 0xca, 0x2a, 0xb4, 0x07, 0xe3, 0xd1, 0xcd, 0x05, 0x3a,
+ 0x07, 0xe0, 0x98, 0xca, 0x2a, 0xb4, 0x07, 0xe3, 0xc1, 0x0b, 0xc3, 0x89,
+ 0xa5, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x28, 0xcc, 0x05, 0x3b, 0x07, 0xe0,
+ 0x71, 0xcb, 0x10, 0x7a, 0x07, 0xe5, 0x20, 0xd1, 0x2f, 0xc8, 0x07, 0xea,
+ 0xa9, 0xd0, 0x53, 0x24, 0x07, 0xea, 0xb1, 0xd1, 0x53, 0x23, 0x07, 0xea,
+ 0xb9, 0xce, 0x40, 0x48, 0x07, 0xed, 0x19, 0xd7, 0x2a, 0xa7, 0x07, 0xed,
+ 0x20, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0x69, 0xcb, 0x10, 0x7a, 0x07, 0xe5,
+ 0x18, 0xd1, 0x57, 0xda, 0x07, 0xea, 0xa1, 0xce, 0x40, 0x48, 0x07, 0xed,
+ 0x09, 0xd7, 0x2a, 0xa7, 0x07, 0xed, 0x10, 0x0b, 0xc3, 0x89, 0xb1, 0x45,
+ 0x01, 0xac, 0x43, 0x89, 0xbd, 0xcc, 0x10, 0x79, 0x07, 0xe5, 0x29, 0xcb,
+ 0x6a, 0x72, 0x07, 0xe7, 0x20, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0x59, 0xcb,
+ 0x10, 0x7a, 0x07, 0xe5, 0x08, 0xd1, 0x57, 0xda, 0x07, 0xea, 0x81, 0xce,
+ 0x40, 0x48, 0x07, 0xec, 0xf9, 0xd7, 0x2a, 0xa7, 0x07, 0xed, 0x00, 0x1b,
+ 0xc3, 0x89, 0xc9, 0x03, 0xc3, 0x89, 0xd5, 0xcf, 0x62, 0xb2, 0x07, 0xe3,
+ 0x39, 0x45, 0x1a, 0x6a, 0xc3, 0x89, 0xe1, 0xcf, 0x65, 0x55, 0x07, 0xe3,
+ 0x29, 0xce, 0x71, 0x66, 0x07, 0xe3, 0x21, 0x0a, 0xc3, 0x89, 0xf1, 0x46,
+ 0x2f, 0xc8, 0xc3, 0x89, 0xfd, 0x42, 0x00, 0xc0, 0xc3, 0x8a, 0x09, 0x43,
+ 0x9c, 0x1b, 0xc3, 0x8a, 0x13, 0x42, 0x07, 0x73, 0xc3, 0x8a, 0x1f, 0x44,
+ 0xe5, 0x0b, 0xc3, 0x8a, 0x2b, 0xd1, 0x53, 0x23, 0x07, 0xe4, 0xc8, 0x0b,
+ 0xc3, 0x8a, 0x37, 0xd3, 0x40, 0x43, 0x07, 0xed, 0x70, 0xca, 0x2a, 0xb4,
+ 0x07, 0xec, 0xe1, 0xcc, 0x10, 0x79, 0x07, 0xec, 0xe8, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe2, 0x61, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x98, 0xd1, 0x57, 0xda,
+ 0x07, 0xec, 0xa9, 0xd7, 0x2a, 0xa7, 0x07, 0xec, 0xb1, 0xce, 0x40, 0x48,
+ 0x07, 0xed, 0x98, 0xcc, 0x10, 0x79, 0x07, 0xed, 0xc1, 0xca, 0x2a, 0xb4,
+ 0x07, 0xed, 0xe8, 0xca, 0x2a, 0xb4, 0x07, 0xec, 0xb9, 0xcc, 0x10, 0x79,
+ 0x07, 0xec, 0xc0, 0xcc, 0x05, 0x3b, 0x07, 0xe1, 0xe1, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe6, 0x40, 0x45, 0x1a, 0x6a, 0xc3, 0x8a, 0x43, 0xce, 0x40, 0x48,
+ 0x07, 0xed, 0xb8, 0xcc, 0x05, 0x3b, 0x07, 0xe1, 0xd9, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe6, 0x38, 0xca, 0x2a, 0xb4, 0x07, 0xe4, 0x19, 0xcd, 0x05, 0x3a,
+ 0x07, 0xe1, 0xe8, 0xcd, 0x05, 0x3a, 0x07, 0xf7, 0xa9, 0xca, 0x2a, 0xb4,
+ 0x07, 0xf7, 0xb0, 0x46, 0x00, 0x34, 0xc3, 0x8a, 0x4f, 0x46, 0x03, 0x50,
+ 0x43, 0x8a, 0x5b, 0xca, 0x2a, 0xb4, 0x07, 0xec, 0x39, 0xcc, 0x10, 0x79,
+ 0x07, 0xec, 0x40, 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0x01, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe6, 0x50, 0x45, 0x1a, 0x6a, 0xc3, 0x8a, 0x67, 0xce, 0x40, 0x48,
+ 0x07, 0xec, 0x09, 0xd7, 0x2a, 0xa7, 0x07, 0xec, 0x10, 0xca, 0x2a, 0xb4,
+ 0x07, 0xec, 0x21, 0xcc, 0x10, 0x79, 0x07, 0xec, 0x18, 0xcc, 0x10, 0x79,
+ 0x07, 0xed, 0xd1, 0xca, 0x2a, 0xb4, 0x07, 0xed, 0xe0, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe3, 0xf9, 0xcd, 0x05, 0x3a, 0x07, 0xe1, 0xb0, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe3, 0xf1, 0xcd, 0x05, 0x3a, 0x07, 0xe1, 0xa8, 0x0b, 0xc3, 0x8a,
+ 0x73, 0x45, 0x01, 0xac, 0x43, 0x8a, 0x7f, 0xcc, 0x05, 0x3b, 0x07, 0xe1,
+ 0x99, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x10, 0xcc, 0x05, 0x3b, 0x07, 0xe0,
+ 0x41, 0xcb, 0x10, 0x7a, 0x07, 0xe4, 0xf8, 0xcc, 0x05, 0x3b, 0x07, 0xe0,
+ 0x39, 0xcb, 0x10, 0x7a, 0x07, 0xe4, 0xf0, 0x0b, 0xc3, 0x8a, 0x91, 0xd3,
+ 0x40, 0x43, 0x07, 0xee, 0x10, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x11, 0xcc,
+ 0x10, 0x79, 0x07, 0xe5, 0x00, 0x8f, 0x07, 0xea, 0x1b, 0x03, 0x8a, 0x9d,
+ 0xc3, 0x3f, 0x07, 0x07, 0xea, 0x28, 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0x41,
+ 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x88, 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0x39,
+ 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x80, 0xd1, 0x2f, 0xc8, 0x07, 0xec, 0x71,
+ 0xd1, 0x57, 0xda, 0x07, 0xec, 0x79, 0xce, 0x40, 0x48, 0x07, 0xed, 0xc8,
+ 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0x31, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x78,
+ 0xd1, 0x2f, 0xc8, 0x07, 0xec, 0x49, 0xd1, 0x57, 0xda, 0x07, 0xec, 0x51,
+ 0xce, 0x40, 0x48, 0x07, 0xec, 0x58, 0xcc, 0x05, 0x3b, 0x07, 0xe2, 0x29,
+ 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x70, 0xd0, 0x53, 0x24, 0x07, 0xec, 0x61,
+ 0xd1, 0x57, 0xda, 0x07, 0xec, 0x69, 0xce, 0x40, 0x48, 0x07, 0xee, 0x01,
+ 0xd1, 0x53, 0x23, 0x07, 0xec, 0x90, 0xcb, 0x6a, 0x72, 0x07, 0xdf, 0xf9,
+ 0x0b, 0xc3, 0x8a, 0xa3, 0xca, 0x2a, 0xb4, 0x07, 0xdf, 0xe9, 0x45, 0x01,
+ 0xac, 0x43, 0x8a, 0xaf, 0x45, 0x01, 0xac, 0xc3, 0x8a, 0xbf, 0x0b, 0xc3,
+ 0x8a, 0xc9, 0xca, 0x2a, 0xb4, 0x07, 0xf6, 0x91, 0xcb, 0x6a, 0x72, 0x07,
+ 0xf6, 0xa0, 0x45, 0x01, 0xac, 0xc3, 0x8a, 0xd5, 0x0b, 0xc3, 0x8a, 0xe1,
+ 0xca, 0x2a, 0xb4, 0x07, 0xf6, 0x71, 0xcb, 0x6a, 0x72, 0x07, 0xf6, 0x80,
+ 0x45, 0x01, 0xac, 0xc3, 0x8a, 0xed, 0xcb, 0x6a, 0x72, 0x07, 0xdc, 0xa9,
+ 0x0b, 0xc3, 0x8a, 0xfd, 0xca, 0x2a, 0xb4, 0x07, 0xdc, 0x98, 0xcb, 0x6a,
+ 0x72, 0x07, 0xdc, 0xc9, 0x0b, 0xc3, 0x8b, 0x09, 0xca, 0x2a, 0xb4, 0x07,
+ 0xdc, 0xb8, 0x45, 0x01, 0xac, 0xc3, 0x8b, 0x15, 0x0b, 0xc3, 0x8b, 0x2d,
+ 0xca, 0x2a, 0xb4, 0x07, 0xf6, 0xf1, 0xcb, 0x6a, 0x72, 0x07, 0xf7, 0x00,
+ 0x46, 0x03, 0x95, 0xc3, 0x8b, 0x39, 0x0b, 0xc3, 0x8b, 0x45, 0xca, 0x2a,
+ 0xb4, 0x07, 0xf4, 0xf1, 0xcb, 0x6a, 0x72, 0x07, 0xf5, 0x00, 0xca, 0x2a,
+ 0xb4, 0x07, 0xdc, 0x59, 0xcd, 0x05, 0x3a, 0x07, 0xdc, 0x50, 0xd6, 0x2b,
+ 0xb9, 0x00, 0x46, 0x20, 0x46, 0x03, 0x95, 0xc3, 0x8b, 0x51, 0xcb, 0x6a,
+ 0x72, 0x07, 0xf6, 0x61, 0x0b, 0xc3, 0x8b, 0x5d, 0xca, 0x2a, 0xb4, 0x07,
+ 0xf6, 0x50, 0x19, 0xc3, 0x8b, 0x69, 0xc7, 0x06, 0x7f, 0x00, 0x32, 0x4b,
+ 0x03, 0x8b, 0x78, 0xcd, 0x05, 0x3a, 0x07, 0xf4, 0x69, 0xca, 0x2a, 0xb4,
+ 0x07, 0xf4, 0x70, 0x45, 0x01, 0xac, 0xc3, 0x8b, 0x7c, 0xcb, 0x6a, 0x72,
+ 0x07, 0xdc, 0x89, 0x0b, 0xc3, 0x8b, 0x8c, 0xca, 0x2a, 0xb4, 0x07, 0xdc,
+ 0x78, 0x00, 0x43, 0x8b, 0x98, 0x00, 0x43, 0x8b, 0xae, 0x00, 0x43, 0x8b,
+ 0xba, 0x0b, 0xc3, 0x8b, 0xc6, 0xca, 0x2a, 0xb4, 0x07, 0xf5, 0x31, 0xcb,
+ 0x6a, 0x72, 0x07, 0xf5, 0x40, 0x45, 0x01, 0xac, 0xc3, 0x8b, 0xd2, 0xcb,
+ 0x6a, 0x72, 0x07, 0xdb, 0xe9, 0x0b, 0xc3, 0x8b, 0xde, 0xca, 0x2a, 0xb4,
+ 0x07, 0xdb, 0xd8, 0x00, 0x43, 0x8b, 0xea, 0xcc, 0x85, 0x20, 0x00, 0x46,
+ 0x01, 0xcb, 0x6a, 0x72, 0x07, 0xdb, 0x49, 0x0b, 0xc3, 0x8b, 0xfa, 0xca,
+ 0x2a, 0xb4, 0x07, 0xdb, 0x38, 0x00, 0x43, 0x8c, 0x06, 0x45, 0x01, 0xac,
+ 0xc3, 0x8c, 0x16, 0x0f, 0xc3, 0x8c, 0x28, 0x0b, 0xc3, 0x8c, 0x37, 0xca,
+ 0x2a, 0xb4, 0x07, 0xf4, 0xb0, 0x00, 0x43, 0x8c, 0x43, 0x45, 0x01, 0xac,
+ 0xc3, 0x8c, 0x53, 0x0b, 0xc3, 0x8c, 0x5d, 0xca, 0x2a, 0xb4, 0x07, 0xf6,
+ 0x11, 0xcb, 0x6a, 0x72, 0x07, 0xf6, 0x20, 0x00, 0x43, 0x8c, 0x69, 0x00,
+ 0x43, 0x8c, 0x75, 0x98, 0x00, 0x45, 0xf1, 0xca, 0xa8, 0x76, 0x00, 0x45,
+ 0xb8, 0xcb, 0x10, 0x7a, 0x07, 0xda, 0xc1, 0xcc, 0x05, 0x3b, 0x07, 0xda,
+ 0xb0, 0xcb, 0x6a, 0x72, 0x07, 0xdb, 0x89, 0x0b, 0xc3, 0x8c, 0x85, 0xca,
+ 0x2a, 0xb4, 0x07, 0xdb, 0x78, 0x45, 0x01, 0xac, 0xc3, 0x8c, 0x91, 0xc6,
+ 0x18, 0x59, 0x00, 0x36, 0x93, 0x03, 0x8c, 0xa4, 0x0b, 0xc3, 0x8c, 0xa8,
+ 0xca, 0x2a, 0xb4, 0x07, 0xf7, 0x91, 0xcb, 0x6a, 0x72, 0x07, 0xf7, 0xa0,
+ 0xca, 0x2a, 0xb4, 0x07, 0xde, 0xe1, 0xcd, 0x05, 0x3a, 0x07, 0xde, 0xd8,
+ 0x45, 0x01, 0xac, 0xc3, 0x8c, 0xb4, 0xcd, 0x05, 0x3a, 0x07, 0xf5, 0x69,
+ 0xca, 0x2a, 0xb4, 0x07, 0xf5, 0x70, 0xcb, 0x6a, 0x72, 0x07, 0xdd, 0x19,
+ 0x0b, 0xc3, 0x8c, 0xe5, 0xca, 0x2a, 0xb4, 0x07, 0xdd, 0x08, 0xca, 0x2a,
+ 0xb4, 0x07, 0xdc, 0x69, 0xcd, 0x05, 0x3a, 0x07, 0xdc, 0x60, 0x45, 0x01,
+ 0xac, 0xc3, 0x8c, 0xf1, 0x0b, 0xc3, 0x8d, 0x0d, 0xca, 0x2a, 0xb4, 0x07,
+ 0xf4, 0x81, 0xcb, 0x6a, 0x72, 0x07, 0xf4, 0x90, 0x00, 0x43, 0x8d, 0x19,
+ 0xcb, 0x6a, 0x72, 0x07, 0xda, 0xa9, 0x0b, 0xc3, 0x8d, 0x29, 0xca, 0x2a,
+ 0xb4, 0x07, 0xda, 0x98, 0xcb, 0x6a, 0x72, 0x07, 0xdf, 0x49, 0xcc, 0x10,
+ 0x79, 0x07, 0xdf, 0x40, 0xce, 0x05, 0x39, 0x07, 0xde, 0xe8, 0x44, 0x0b,
+ 0xf8, 0xc3, 0x8d, 0x35, 0xd0, 0x0d, 0xe5, 0x00, 0x35, 0x40, 0xcb, 0x10,
+ 0x7a, 0x07, 0xf6, 0xb9, 0xcc, 0x05, 0x3b, 0x07, 0xf6, 0xa8, 0xcb, 0x10,
+ 0x7a, 0x07, 0xdf, 0x31, 0xcc, 0x05, 0x3b, 0x07, 0xdf, 0x20, 0xd5, 0x33,
+ 0xf7, 0x00, 0x45, 0x91, 0xcd, 0x05, 0x3a, 0x07, 0xf5, 0x79, 0xca, 0x2a,
+ 0xb4, 0x07, 0xf5, 0x80, 0x0b, 0xc3, 0x8d, 0x44, 0xca, 0x2a, 0xb4, 0x07,
+ 0xf6, 0x31, 0xcb, 0x6a, 0x72, 0x07, 0xf6, 0x40, 0x46, 0x03, 0x95, 0xc3,
+ 0x8d, 0x50, 0x0b, 0xc3, 0x8d, 0x5c, 0xca, 0x2a, 0xb4, 0x07, 0xf5, 0xd1,
+ 0xcb, 0x6a, 0x72, 0x07, 0xf5, 0xe0, 0xce, 0x74, 0x4c, 0x00, 0x37, 0xd1,
+ 0x0b, 0xc3, 0x8d, 0x68, 0xca, 0x2a, 0xb4, 0x07, 0xf5, 0xb1, 0xcb, 0x6a,
+ 0x72, 0x07, 0xf5, 0xc0, 0x45, 0x01, 0xac, 0xc3, 0x8d, 0x74, 0x0b, 0xc3,
+ 0x8d, 0x96, 0xca, 0x2a, 0xb4, 0x07, 0xf5, 0x91, 0xcb, 0x6a, 0x72, 0x07,
+ 0xf5, 0xa0, 0x00, 0x43, 0x8d, 0xa2, 0x00, 0x43, 0x8d, 0xb4, 0x00, 0x43,
+ 0x8d, 0xc0, 0x00, 0x43, 0x8d, 0xd6, 0x00, 0x43, 0x8d, 0xe2, 0xca, 0x2a,
+ 0xb4, 0x07, 0xdc, 0x39, 0xcd, 0x05, 0x3a, 0x07, 0xdc, 0x30, 0xcb, 0x6a,
+ 0x72, 0x07, 0xdb, 0xa9, 0x0b, 0xc3, 0x8d, 0xee, 0xca, 0x2a, 0xb4, 0x07,
+ 0xdb, 0x98, 0xcb, 0x6a, 0x72, 0x07, 0xdb, 0x69, 0x0b, 0xc3, 0x8d, 0xfa,
+ 0xca, 0x2a, 0xb4, 0x07, 0xdb, 0x58, 0x44, 0x0b, 0xf8, 0xc3, 0x8e, 0x06,
+ 0xce, 0x20, 0x24, 0x00, 0x36, 0x51, 0xc4, 0x01, 0xbd, 0x00, 0x36, 0x21,
+ 0xcb, 0x08, 0x89, 0x00, 0x31, 0x23, 0x03, 0x8e, 0x12, 0x5d, 0x10, 0xdc,
+ 0x43, 0x8e, 0x16, 0x45, 0x01, 0xac, 0xc3, 0x8e, 0x22, 0x0b, 0xc3, 0x8e,
+ 0x2e, 0xca, 0x2a, 0xb4, 0x07, 0xf7, 0x11, 0xcb, 0x6a, 0x72, 0x07, 0xf7,
+ 0x20, 0xcb, 0x6a, 0x72, 0x07, 0xde, 0xb1, 0x0b, 0xc3, 0x8e, 0x3a, 0xca,
+ 0x2a, 0xb4, 0x07, 0xde, 0xa0, 0x00, 0x43, 0x8e, 0x46, 0x45, 0x01, 0xac,
+ 0xc3, 0x8e, 0x56, 0xc6, 0x3f, 0x04, 0x00, 0x35, 0xd3, 0x03, 0x8e, 0x72,
+ 0x0b, 0xc3, 0x8e, 0x76, 0xca, 0x2a, 0xb4, 0x07, 0xf7, 0x31, 0xcb, 0x6a,
+ 0x72, 0x07, 0xf7, 0x40, 0xcb, 0x6a, 0x72, 0x07, 0xdb, 0xc9, 0x0b, 0xc3,
+ 0x8e, 0x82, 0xca, 0x2a, 0xb4, 0x07, 0xdb, 0xb8, 0x00, 0x43, 0x8e, 0x8e,
+ 0xce, 0x05, 0x39, 0x07, 0xf4, 0x00, 0xcb, 0x9b, 0xf7, 0x00, 0x35, 0xf3,
+ 0x03, 0x8e, 0xa4, 0xc4, 0xe6, 0x67, 0x00, 0x36, 0x0b, 0x03, 0x8e, 0xa8,
+ 0x45, 0x01, 0xac, 0xc3, 0x8e, 0xac, 0x0b, 0xc3, 0x8e, 0xbb, 0xca, 0x2a,
+ 0xb4, 0x07, 0xf7, 0x51, 0xcb, 0x6a, 0x72, 0x07, 0xf7, 0x60, 0xc3, 0x2f,
+ 0x22, 0x00, 0x33, 0xc1, 0xc4, 0x06, 0x7a, 0x00, 0x33, 0xa9, 0xc3, 0x7c,
+ 0xad, 0x00, 0x33, 0xb0, 0xc2, 0x08, 0x86, 0x0f, 0x75, 0xa9, 0xc2, 0x00,
+ 0x45, 0x0f, 0x75, 0x41, 0x0a, 0x43, 0x8e, 0xc7, 0xc4, 0xe6, 0x7b, 0x0f,
+ 0x75, 0xa1, 0xc2, 0x00, 0x5d, 0x0f, 0x75, 0x89, 0xc3, 0x03, 0x2c, 0x0f,
+ 0x75, 0x70, 0xc2, 0x01, 0x5b, 0x0f, 0x75, 0x31, 0x8a, 0x0f, 0x75, 0xd0,
+ 0x8e, 0x0f, 0x75, 0x19, 0x86, 0x0f, 0x75, 0xc8, 0xc3, 0x03, 0x2c, 0x0f,
+ 0x72, 0x71, 0xc2, 0x00, 0x5d, 0x0f, 0x72, 0x89, 0xc4, 0xe6, 0x7b, 0x0f,
+ 0x72, 0xa0, 0xc2, 0x00, 0x5d, 0x0f, 0x72, 0xc9, 0x47, 0x39, 0x6b, 0x43,
+ 0x8e, 0xd3, 0xc2, 0x08, 0x86, 0x0f, 0x74, 0xb1, 0xc2, 0x00, 0x2f, 0x0f,
+ 0x74, 0xc0, 0xc3, 0x8c, 0x10, 0x0f, 0x73, 0xe1, 0xc3, 0xae, 0x23, 0x0f,
+ 0x73, 0xf0, 0x4b, 0x12, 0xc6, 0xc3, 0x8e, 0xdf, 0xcc, 0x01, 0x9b, 0x0f,
+ 0xdd, 0x18, 0xdc, 0x12, 0xc6, 0x0f, 0xdd, 0x3b, 0x03, 0x8e, 0xe5, 0xcc,
+ 0x01, 0x9b, 0x0f, 0xdd, 0x12, 0x03, 0x8e, 0xeb, 0xc4, 0x00, 0xcd, 0x0f,
+ 0xdd, 0x03, 0x03, 0x8e, 0xf1, 0xc5, 0x00, 0x47, 0x0f, 0xdd, 0x0a, 0x03,
+ 0x8e, 0xf5, 0xca, 0x03, 0x76, 0x01, 0x29, 0x61, 0xc4, 0x00, 0xcd, 0x01,
+ 0x28, 0x81, 0xc5, 0x00, 0x47, 0x01, 0x28, 0x60, 0x49, 0x29, 0x20, 0x43,
+ 0x8e, 0xf9, 0xce, 0x29, 0x29, 0x0f, 0xd0, 0x79, 0xdb, 0x15, 0x9a, 0x0f,
+ 0xd1, 0xc8, 0xc4, 0x00, 0x5b, 0x01, 0x3d, 0x20, 0xc5, 0x01, 0x62, 0x01,
+ 0x30, 0xd1, 0xce, 0x23, 0xd6, 0x0f, 0xac, 0xe0, 0x44, 0x1f, 0xb5, 0xc3,
+ 0x8f, 0x0f, 0xc5, 0xc8, 0x86, 0x0f, 0xaf, 0x98, 0x17, 0xc3, 0x8f, 0x1b,
+ 0x96, 0x0b, 0x4d, 0xd0, 0x9a, 0x0b, 0x4f, 0x31, 0xc2, 0x08, 0xc6, 0x0b,
+ 0x4c, 0xd0, 0x83, 0x0b, 0x4b, 0x9b, 0x03, 0x8f, 0x29, 0x17, 0xc3, 0x8f,
+ 0x2f, 0x42, 0x0c, 0xfe, 0x43, 0x8f, 0x37, 0x96, 0x0b, 0x4f, 0x88, 0x17,
+ 0xc3, 0x8f, 0x41, 0x07, 0x43, 0x8f, 0x51, 0x93, 0x0b, 0x4c, 0x01, 0x92,
+ 0x0b, 0x4b, 0xe8, 0x42, 0x00, 0xee, 0xc3, 0x8f, 0x60, 0x92, 0x0b, 0x4b,
+ 0x30, 0xc2, 0x4c, 0x85, 0x0b, 0x4d, 0x81, 0x93, 0x0b, 0x4c, 0x70, 0xc2,
+ 0x00, 0x11, 0x0b, 0x4b, 0x79, 0x87, 0x0b, 0x4c, 0x08, 0x87, 0x0b, 0x4e,
+ 0xa3, 0x03, 0x8f, 0x6c, 0xc2, 0xd4, 0x79, 0x0b, 0x4c, 0x18, 0x93, 0x0b,
+ 0x4d, 0x08, 0x90, 0x0b, 0x4b, 0x38, 0xc3, 0x8c, 0x61, 0x0b, 0x4c, 0xe0,
+ 0xc2, 0x08, 0xc6, 0x0b, 0x4c, 0xc8, 0x87, 0x0b, 0x4b, 0x89, 0x93, 0x0b,
+ 0x4e, 0x50, 0x8f, 0x0b, 0x4b, 0xc0, 0xc5, 0xe3, 0x23, 0x0b, 0x4e, 0xd1,
+ 0xc5, 0xe3, 0xf0, 0x0b, 0x4e, 0x88, 0x96, 0x0b, 0x4e, 0x69, 0xc2, 0x00,
+ 0x92, 0x0b, 0x4d, 0x88, 0x9a, 0x0b, 0x4f, 0x39, 0x96, 0x0b, 0x4d, 0xe8,
+ 0x93, 0x0b, 0x4f, 0xa0, 0x90, 0x0b, 0x4b, 0x59, 0x96, 0x0b, 0x4c, 0x60,
+ 0x8f, 0x0b, 0x4b, 0xf0, 0xc6, 0xd6, 0x76, 0x0b, 0x4f, 0xa9, 0xc4, 0x00,
+ 0x2e, 0x0b, 0x4e, 0x91, 0x8b, 0x0b, 0x4e, 0x40, 0x96, 0x0b, 0x4e, 0x20,
+ 0x96, 0x0b, 0x4e, 0x78, 0xc3, 0xcc, 0x71, 0x0b, 0x4a, 0x29, 0x03, 0xc3,
+ 0x8f, 0x72, 0xc3, 0xdc, 0x59, 0x0b, 0x49, 0xd9, 0xc4, 0xcc, 0x33, 0x0b,
+ 0x49, 0x98, 0xc3, 0x90, 0xd5, 0x0b, 0x49, 0xe1, 0xc3, 0xe0, 0xa5, 0x0b,
+ 0x48, 0x99, 0x42, 0x0c, 0xfe, 0xc3, 0x8f, 0x7f, 0xc2, 0x01, 0xf8, 0x0b,
+ 0x47, 0xf1, 0xc2, 0x0b, 0xfd, 0x0b, 0x47, 0xe0, 0xc2, 0x01, 0xc4, 0x0b,
+ 0x4a, 0x31, 0xc2, 0x01, 0xe6, 0x0b, 0x47, 0xc0, 0x96, 0x0b, 0x49, 0x59,
+ 0x92, 0x0b, 0x48, 0xf8, 0xc2, 0x01, 0x0a, 0x0b, 0x49, 0xc1, 0x87, 0x0b,
+ 0x4a, 0xc8, 0x87, 0x0b, 0x48, 0xa9, 0xc2, 0xd4, 0x79, 0x0b, 0x48, 0x48,
+ 0xc3, 0x7a, 0x15, 0x0b, 0x48, 0x71, 0x96, 0x0b, 0x47, 0xb8, 0xc2, 0x01,
+ 0xe6, 0x0b, 0x47, 0xa8, 0x8f, 0x0b, 0x4a, 0x21, 0xc3, 0x59, 0x1e, 0x0b,
+ 0x48, 0xb8, 0x90, 0x0b, 0x49, 0xf1, 0x96, 0x0b, 0x48, 0x58, 0xc6, 0x15,
+ 0xa5, 0x0b, 0x4b, 0x18, 0xc2, 0x08, 0xc6, 0x0b, 0x49, 0x51, 0x96, 0x0b,
+ 0x48, 0x40, 0x90, 0x0b, 0x47, 0xa0, 0x90, 0x0b, 0x4a, 0x09, 0xc3, 0x3b,
+ 0xc1, 0x0b, 0x49, 0x19, 0x96, 0x0b, 0x48, 0x00, 0x92, 0x0b, 0x49, 0x61,
+ 0x8f, 0x0b, 0x49, 0x31, 0xc8, 0xbb, 0x13, 0x0b, 0x48, 0x79, 0xc7, 0xc6,
+ 0xc6, 0x0b, 0x47, 0xf8, 0x17, 0xc3, 0x8f, 0x8b, 0x87, 0x0b, 0x47, 0xe8,
+ 0x92, 0x0b, 0x49, 0xb1, 0x8f, 0x0b, 0x49, 0xa0, 0xc3, 0x60, 0x9d, 0x0b,
+ 0x47, 0x49, 0xc7, 0xce, 0x28, 0x0b, 0x47, 0x50, 0x8f, 0x0b, 0x47, 0x11,
+ 0x15, 0xc3, 0x8f, 0x95, 0xc3, 0xec, 0x69, 0x0b, 0x45, 0x08, 0x97, 0x0b,
+ 0x46, 0x53, 0x03, 0x8f, 0xa1, 0xc2, 0x01, 0x04, 0x0b, 0x44, 0x98, 0xc2,
+ 0x4c, 0x85, 0x0b, 0x44, 0xa9, 0xc9, 0xb0, 0xe1, 0x0b, 0x44, 0x78, 0xc2,
+ 0xd4, 0x79, 0x0b, 0x47, 0x29, 0xc3, 0xaa, 0xdd, 0x0b, 0x46, 0x40, 0x8f,
+ 0x0b, 0x46, 0x79, 0xc2, 0x00, 0xa8, 0x0b, 0x46, 0x20, 0x92, 0x0b, 0x46,
+ 0xd1, 0x8f, 0x0b, 0x46, 0xb8, 0x96, 0x0b, 0x45, 0xe9, 0xc5, 0xe3, 0x50,
+ 0x0b, 0x44, 0xa0, 0x90, 0x0b, 0x46, 0xb1, 0xc7, 0xc9, 0x7b, 0x0b, 0x46,
+ 0x38, 0x90, 0x0b, 0x46, 0xa1, 0xc5, 0xdd, 0xb5, 0x0b, 0x45, 0xc8, 0x42,
+ 0x00, 0xee, 0xc3, 0x8f, 0xb7, 0xc3, 0x18, 0x79, 0x0b, 0x46, 0xf8, 0x17,
+ 0xc3, 0x8f, 0xc3, 0xc3, 0x87, 0xeb, 0x0b, 0x46, 0x11, 0xc5, 0xda, 0xd6,
+ 0x0b, 0x44, 0xb8, 0xc5, 0xda, 0xf4, 0x0b, 0x45, 0xb9, 0x96, 0x0b, 0x45,
+ 0x30, 0xc3, 0x7a, 0x15, 0x0b, 0x46, 0x61, 0x87, 0x0b, 0x45, 0x20, 0xc3,
+ 0x93, 0xa1, 0x0b, 0x46, 0xf1, 0xc2, 0x02, 0x29, 0x0b, 0x46, 0x58, 0xc5,
+ 0xe2, 0xa1, 0x0b, 0x46, 0xc1, 0xc7, 0xce, 0x3d, 0x0b, 0x45, 0x98, 0xc6,
+ 0xd8, 0xec, 0x0b, 0x43, 0xa9, 0xc3, 0x66, 0xe9, 0x0b, 0x44, 0x51, 0xc3,
+ 0x91, 0xaf, 0x0b, 0x43, 0xd2, 0x03, 0x8f, 0xcb, 0xc3, 0xeb, 0xc4, 0x0b,
+ 0x44, 0x41, 0xc6, 0xd8, 0xa4, 0x0b, 0x44, 0x38, 0xc4, 0x9e, 0x03, 0x0b,
+ 0x42, 0xf9, 0xc7, 0xcc, 0xca, 0x0b, 0x42, 0xe0, 0xc3, 0x87, 0xeb, 0x0b,
+ 0x41, 0xf1, 0xca, 0xa7, 0xd6, 0x0b, 0x40, 0x40, 0x8f, 0x0b, 0x41, 0xb9,
+ 0xc7, 0xc5, 0x45, 0x0b, 0x40, 0x28, 0x8f, 0x0b, 0x42, 0x73, 0x03, 0x8f,
+ 0xd1, 0xc2, 0x02, 0x29, 0x0b, 0x42, 0x31, 0xc3, 0x18, 0x79, 0x0b, 0x41,
+ 0x91, 0xc4, 0x2c, 0xa9, 0x0b, 0x40, 0xd0, 0xc3, 0x82, 0x7a, 0x0b, 0x41,
+ 0xb1, 0xc3, 0xed, 0x08, 0x0b, 0x41, 0x30, 0xcc, 0x8c, 0x58, 0x0b, 0x42,
+ 0x08, 0xc5, 0xe1, 0x93, 0x0b, 0x40, 0xb1, 0xc5, 0xbd, 0x6e, 0x0b, 0x40,
+ 0x00, 0x00, 0x43, 0x8f, 0xe3, 0x8f, 0x0b, 0x42, 0x61, 0xc3, 0x87, 0xeb,
+ 0x0b, 0x42, 0x10, 0xc2, 0x02, 0x1d, 0x0b, 0x40, 0x51, 0xc5, 0xb5, 0x4a,
+ 0x0b, 0x40, 0x48, 0xc2, 0x02, 0x1d, 0x0b, 0x40, 0x19, 0xc5, 0xb5, 0x4a,
+ 0x0b, 0x40, 0x10, 0xa2, 0x01, 0x40, 0xfb, 0x03, 0x8f, 0xef, 0xa3, 0x01,
+ 0x41, 0x7b, 0x03, 0x90, 0x01, 0xa5, 0x01, 0x44, 0x79, 0xa4, 0x01, 0x42,
+ 0x7a, 0x03, 0x90, 0x0c, 0xa3, 0x01, 0x41, 0xbb, 0x03, 0x90, 0x10, 0xa5,
+ 0x01, 0x44, 0xb9, 0xa4, 0x01, 0x42, 0xba, 0x03, 0x90, 0x1b, 0xa5, 0x01,
+ 0x45, 0x39, 0xa4, 0x01, 0x43, 0x3a, 0x03, 0x90, 0x1f, 0xa5, 0x01, 0x46,
+ 0x38, 0xa3, 0x01, 0x41, 0xdb, 0x03, 0x90, 0x23, 0xa5, 0x01, 0x44, 0xd9,
+ 0xa4, 0x01, 0x42, 0xda, 0x03, 0x90, 0x2e, 0xa5, 0x01, 0x45, 0x59, 0xa4,
+ 0x01, 0x43, 0x5a, 0x03, 0x90, 0x32, 0xa5, 0x01, 0x46, 0x58, 0xa5, 0x01,
+ 0x45, 0x99, 0xa4, 0x01, 0x43, 0x9a, 0x03, 0x90, 0x36, 0xa5, 0x01, 0x46,
+ 0x98, 0xa5, 0x01, 0x47, 0x18, 0xa3, 0x01, 0x41, 0xeb, 0x03, 0x90, 0x3a,
+ 0xa5, 0x01, 0x44, 0xe9, 0xa4, 0x01, 0x42, 0xea, 0x03, 0x90, 0x45, 0xa5,
+ 0x01, 0x45, 0x69, 0xa4, 0x01, 0x43, 0x6a, 0x03, 0x90, 0x49, 0xa5, 0x01,
+ 0x46, 0x68, 0xa5, 0x01, 0x45, 0xa9, 0xa4, 0x01, 0x43, 0xaa, 0x03, 0x90,
+ 0x4d, 0xa5, 0x01, 0x46, 0xa8, 0xa5, 0x01, 0x47, 0x28, 0xa5, 0x01, 0x45,
+ 0xc9, 0xa4, 0x01, 0x43, 0xca, 0x03, 0x90, 0x51, 0xa5, 0x01, 0x46, 0xc8,
+ 0xa5, 0x01, 0x47, 0x48, 0xa5, 0x01, 0x47, 0x88, 0xa3, 0x01, 0x41, 0xf3,
+ 0x03, 0x90, 0x55, 0xa5, 0x01, 0x44, 0xf1, 0xa4, 0x01, 0x42, 0xf2, 0x03,
+ 0x90, 0x60, 0xa5, 0x01, 0x45, 0x71, 0xa4, 0x01, 0x43, 0x72, 0x03, 0x90,
+ 0x64, 0xa5, 0x01, 0x46, 0x70, 0xa5, 0x01, 0x45, 0xb1, 0xa4, 0x01, 0x43,
+ 0xb2, 0x03, 0x90, 0x68, 0xa5, 0x01, 0x46, 0xb0, 0xa5, 0x01, 0x47, 0x30,
+ 0xa5, 0x01, 0x45, 0xd1, 0xa4, 0x01, 0x43, 0xd2, 0x03, 0x90, 0x6c, 0xa5,
+ 0x01, 0x46, 0xd0, 0xa5, 0x01, 0x47, 0x50, 0xa5, 0x01, 0x47, 0x90, 0xa5,
+ 0x01, 0x45, 0xe1, 0xa4, 0x01, 0x43, 0xe2, 0x03, 0x90, 0x70, 0xa5, 0x01,
+ 0x46, 0xe0, 0xa5, 0x01, 0x47, 0x60, 0xa5, 0x01, 0x47, 0xa0, 0xa5, 0x01,
+ 0x47, 0xc0, 0xc6, 0x01, 0xa1, 0x0f, 0xda, 0x01, 0xcc, 0x06, 0x2b, 0x0f,
+ 0xda, 0x78, 0xcc, 0x06, 0x2b, 0x0f, 0xda, 0x71, 0xc5, 0x00, 0x47, 0x0f,
+ 0xda, 0x80, 0xd4, 0x38, 0x3c, 0x0f, 0xdc, 0xd9, 0xc3, 0x00, 0xda, 0x01,
+ 0x3e, 0xd8, 0xe0, 0x08, 0x07, 0x0f, 0xdb, 0x48, 0xe0, 0x08, 0x07, 0x0f,
+ 0xdb, 0x58, 0xc7, 0x0a, 0x20, 0x0f, 0xc8, 0x29, 0xc9, 0x05, 0xde, 0x0f,
+ 0xc8, 0x20, 0xd6, 0x2e, 0xd1, 0x01, 0x0f, 0xe1, 0xcf, 0x2e, 0xd8, 0x01,
+ 0x0f, 0xc9, 0xc6, 0x03, 0x81, 0x01, 0x0d, 0x70, 0xcd, 0x7f, 0x85, 0x01,
+ 0x4c, 0x79, 0xca, 0x9e, 0xa8, 0x01, 0x4c, 0x68, 0x00, 0x43, 0x90, 0x74,
+ 0xcf, 0x2e, 0xd8, 0x01, 0x59, 0xa1, 0xd6, 0x2e, 0xd1, 0x01, 0x59, 0xa9,
+ 0x16, 0x43, 0x90, 0x86, 0x45, 0x00, 0x3f, 0xc3, 0x90, 0x95, 0xc5, 0x03,
+ 0x82, 0x01, 0x0c, 0xd0, 0xd4, 0x2e, 0xd3, 0x01, 0x0f, 0xd1, 0xc9, 0xac,
+ 0x73, 0x01, 0x59, 0xc0, 0xc3, 0x7e, 0xb2, 0x01, 0x0d, 0x59, 0xd7, 0x22,
+ 0x65, 0x0f, 0xc0, 0x40, 0xc3, 0x13, 0xc8, 0x01, 0x0d, 0x13, 0x03, 0x90,
+ 0xa1, 0x43, 0x03, 0x1e, 0x43, 0x90, 0xa7, 0xc2, 0x00, 0x4d, 0x01, 0x0f,
+ 0x23, 0x03, 0x90, 0xb3, 0xcc, 0x55, 0x6a, 0x01, 0x48, 0xe8, 0xc6, 0x0d,
+ 0xd1, 0x01, 0x4b, 0xd1, 0xc9, 0x09, 0xde, 0x01, 0x4b, 0xb9, 0x9a, 0x01,
+ 0x59, 0xf0, 0xce, 0x35, 0xda, 0x01, 0x4b, 0x99, 0xd6, 0x2f, 0xd9, 0x01,
+ 0x4a, 0x19, 0x48, 0x6a, 0x14, 0xc3, 0x90, 0xb9, 0xcf, 0x66, 0x45, 0x01,
+ 0x5a, 0x50, 0xe0, 0x05, 0x07, 0x0f, 0xdd, 0xa8, 0x45, 0x01, 0xac, 0xc3,
+ 0x90, 0xc5, 0xc8, 0xab, 0xed, 0x01, 0x48, 0x30, 0xc5, 0x00, 0x4c, 0x01,
+ 0x0d, 0xe9, 0x00, 0x43, 0x90, 0xd1, 0xc5, 0x00, 0x4c, 0x01, 0x0d, 0xe1,
+ 0x00, 0x43, 0x90, 0xe9, 0x88, 0x08, 0x30, 0x81, 0x8f, 0x08, 0x30, 0x88,
+ 0x88, 0x08, 0x30, 0x99, 0x8f, 0x08, 0x30, 0xa0, 0x8f, 0x08, 0x30, 0xb0,
+ 0xc5, 0xda, 0x68, 0x08, 0x04, 0x01, 0xc7, 0xc6, 0x09, 0x08, 0x04, 0x09,
+ 0xc6, 0xd1, 0x84, 0x08, 0x04, 0x11, 0x23, 0xc3, 0x90, 0xfb, 0x24, 0xc3,
+ 0x91, 0x07, 0x25, 0xc3, 0x91, 0x13, 0x26, 0xc3, 0x91, 0x1f, 0x22, 0x43,
+ 0x91, 0x2b, 0xc7, 0xcc, 0xe6, 0x08, 0x04, 0x71, 0xc8, 0xc2, 0x3b, 0x08,
+ 0x04, 0x79, 0xc7, 0xc7, 0x2f, 0x08, 0x04, 0x81, 0xc7, 0xc4, 0x26, 0x08,
+ 0x04, 0x89, 0xc9, 0xb2, 0xe2, 0x08, 0x04, 0x90, 0xc5, 0xdb, 0x12, 0x08,
+ 0x04, 0xa9, 0xc6, 0xd0, 0xd6, 0x08, 0x04, 0xb1, 0x9f, 0x08, 0x04, 0xb8,
+ 0xc8, 0xbc, 0x23, 0x08, 0x04, 0xd1, 0xc6, 0xd0, 0xa6, 0x08, 0x04, 0xd9,
+ 0x9f, 0x08, 0x04, 0xe1, 0xc6, 0xd1, 0x00, 0x08, 0x04, 0xe9, 0xa3, 0x08,
+ 0x04, 0xf0, 0x9d, 0x08, 0x04, 0xf9, 0xc6, 0xd1, 0xf6, 0x08, 0x05, 0x01,
+ 0x9f, 0x08, 0x05, 0x09, 0xa0, 0x08, 0x05, 0x11, 0xa1, 0x08, 0x05, 0x19,
+ 0xa4, 0x08, 0x05, 0x29, 0xa5, 0x08, 0x05, 0x31, 0xc7, 0xce, 0xc2, 0x08,
+ 0x05, 0x38, 0x9d, 0x08, 0x05, 0x41, 0x9e, 0x08, 0x05, 0x49, 0xc9, 0xb3,
+ 0xf9, 0x08, 0x05, 0x51, 0xc8, 0xc0, 0x33, 0x08, 0x05, 0x59, 0xa1, 0x08,
+ 0x05, 0x61, 0xa2, 0x08, 0x05, 0x69, 0xa3, 0x08, 0x05, 0x71, 0xa4, 0x08,
+ 0x05, 0x79, 0xa5, 0x08, 0x05, 0x81, 0xa6, 0x08, 0x05, 0x88, 0x9d, 0x08,
+ 0x05, 0x91, 0x9f, 0x08, 0x05, 0xa1, 0xc7, 0xc4, 0xb2, 0x08, 0x05, 0xa9,
+ 0xa1, 0x08, 0x05, 0xb1, 0xa4, 0x08, 0x05, 0xc1, 0xa5, 0x08, 0x05, 0xc9,
+ 0xa6, 0x08, 0x05, 0xd1, 0x9e, 0x08, 0x05, 0x99, 0xc6, 0xd2, 0xb0, 0x08,
+ 0x05, 0xb8, 0x9d, 0x08, 0x05, 0xd9, 0x9e, 0x08, 0x05, 0xe1, 0x9f, 0x08,
+ 0x05, 0xe9, 0xa0, 0x08, 0x05, 0xf1, 0xa1, 0x08, 0x05, 0xf9, 0xa2, 0x08,
+ 0x06, 0x01, 0xa6, 0x08, 0x06, 0x08, 0x9d, 0x08, 0x06, 0x11, 0xc8, 0xc0,
+ 0x9b, 0x08, 0x06, 0x18, 0xcb, 0x97, 0xa0, 0x08, 0x06, 0x21, 0xc9, 0xb2,
+ 0xd9, 0x08, 0x06, 0x28, 0xc7, 0xca, 0x7e, 0x08, 0x06, 0x31, 0xc7, 0xcd,
+ 0x56, 0x08, 0x06, 0x39, 0x9f, 0x08, 0x06, 0x41, 0xc7, 0xce, 0xec, 0x08,
+ 0x06, 0x49, 0xa1, 0x08, 0x06, 0x51, 0xa3, 0x08, 0x06, 0x58, 0xc9, 0xb2,
+ 0x25, 0x08, 0x06, 0x69, 0xcf, 0x64, 0x38, 0x08, 0x06, 0x71, 0xc7, 0xce,
+ 0xde, 0x08, 0x06, 0x79, 0xa2, 0x08, 0x06, 0x81, 0xa3, 0x08, 0x06, 0x89,
+ 0xa5, 0x08, 0x06, 0x99, 0xa6, 0x08, 0x06, 0xa1, 0xd1, 0x51, 0x58, 0x08,
+ 0x06, 0x60, 0x9e, 0x08, 0x06, 0xa9, 0x9f, 0x08, 0x06, 0xb1, 0xa0, 0x08,
+ 0x06, 0xb9, 0xc6, 0xd1, 0xa8, 0x08, 0x06, 0xc1, 0xa2, 0x08, 0x06, 0xc9,
+ 0xa3, 0x08, 0x06, 0xd1, 0xa4, 0x08, 0x06, 0xd9, 0xa5, 0x08, 0x06, 0xe1,
+ 0xa6, 0x08, 0x06, 0xe8, 0x9d, 0x08, 0x06, 0xf9, 0x9e, 0x08, 0x07, 0x01,
+ 0x9f, 0x08, 0x07, 0x09, 0xa0, 0x08, 0x07, 0x11, 0xa1, 0x08, 0x07, 0x19,
+ 0xa2, 0x08, 0x07, 0x21, 0xa4, 0x08, 0x07, 0x31, 0xa5, 0x08, 0x07, 0x39,
+ 0xa6, 0x08, 0x07, 0x41, 0xa3, 0x08, 0x07, 0x28, 0x9d, 0x08, 0x07, 0x49,
+ 0x9e, 0x08, 0x07, 0x51, 0x9f, 0x08, 0x07, 0x59, 0xa0, 0x08, 0x07, 0x61,
+ 0xa1, 0x08, 0x07, 0x69, 0xa2, 0x08, 0x07, 0x71, 0xa4, 0x08, 0x07, 0x81,
+ 0xa3, 0x08, 0x07, 0x79, 0xa5, 0x08, 0x07, 0x89, 0xa6, 0x08, 0x07, 0x90,
+ 0x9e, 0x08, 0x07, 0x99, 0x9f, 0x08, 0x07, 0xa1, 0xa3, 0x08, 0x07, 0xa9,
+ 0xa4, 0x08, 0x07, 0xb1, 0xa5, 0x08, 0x07, 0xb9, 0xa6, 0x08, 0x07, 0xc0,
+ 0xd2, 0x4d, 0x94, 0x0e, 0xf9, 0x51, 0xca, 0x1e, 0xfc, 0x00, 0xeb, 0xa8,
+ 0xd3, 0x45, 0x4f, 0x0e, 0xf8, 0x48, 0x42, 0x00, 0x54, 0xc3, 0x91, 0x3d,
+ 0x43, 0x0b, 0xf9, 0x43, 0x91, 0x49, 0xcf, 0x63, 0x66, 0x00, 0xf3, 0x89,
+ 0xc6, 0xbf, 0xd5, 0x00, 0x0b, 0x19, 0xc4, 0x69, 0xdb, 0x00, 0x0b, 0x29,
+ 0xca, 0xa3, 0x58, 0x00, 0x10, 0xd9, 0xc3, 0x00, 0xd3, 0x00, 0x11, 0xb0,
+ 0xce, 0x04, 0x59, 0x0e, 0xf8, 0xe1, 0xcc, 0x57, 0xac, 0x0e, 0xf8, 0xb8,
+ 0x94, 0x00, 0x13, 0xcb, 0x03, 0x91, 0x55, 0x96, 0x00, 0x14, 0x3b, 0x03,
+ 0x91, 0x5b, 0x9b, 0x00, 0x14, 0x73, 0x03, 0x91, 0x61, 0x89, 0x00, 0xeb,
+ 0xb9, 0x11, 0xc3, 0x91, 0x67, 0x8b, 0x00, 0xe8, 0x4b, 0x03, 0x91, 0x7d,
+ 0x83, 0x00, 0x12, 0x83, 0x03, 0x91, 0x83, 0xc2, 0x00, 0x94, 0x05, 0x5a,
+ 0x89, 0x8a, 0x00, 0x13, 0x2b, 0x03, 0x91, 0x8d, 0x8f, 0x00, 0x13, 0x7b,
+ 0x03, 0x91, 0x96, 0x98, 0x00, 0x14, 0x61, 0x99, 0x00, 0x14, 0x69, 0x8d,
+ 0x00, 0x14, 0xf1, 0x8e, 0x05, 0x3c, 0x09, 0xc5, 0xde, 0x0f, 0x00, 0x0c,
+ 0x69, 0x87, 0x00, 0x0e, 0xe8, 0xcc, 0x26, 0x18, 0x05, 0x3b, 0x2a, 0x03,
+ 0x91, 0x9c, 0xc3, 0x22, 0x44, 0x00, 0x0c, 0x29, 0xc3, 0x0a, 0x1f, 0x00,
+ 0x0d, 0x41, 0xc4, 0x0d, 0x8e, 0x00, 0x0d, 0xe8, 0xd2, 0x4d, 0x94, 0x0e,
+ 0xf9, 0x49, 0xca, 0x1e, 0xfc, 0x00, 0xeb, 0xa0, 0xc3, 0x00, 0xd3, 0x0e,
+ 0xf8, 0xf1, 0xc4, 0x69, 0xdb, 0x00, 0x0b, 0x0b, 0x03, 0x91, 0xa2, 0xc9,
+ 0x07, 0x97, 0x00, 0x0a, 0xe9, 0xca, 0xa3, 0x58, 0x00, 0x10, 0xc9, 0xc6,
+ 0xbf, 0xd5, 0x00, 0x0a, 0xf8, 0xc5, 0x00, 0x34, 0x00, 0xf3, 0x1b, 0x03,
+ 0x91, 0xa8, 0xc5, 0x03, 0x50, 0x00, 0xf3, 0x08, 0xce, 0x18, 0x14, 0x00,
+ 0xf3, 0x28, 0xd3, 0x3f, 0xd1, 0x05, 0x3e, 0x51, 0xc9, 0xb3, 0x96, 0x00,
+ 0x11, 0xf8, 0x46, 0x01, 0xab, 0x43, 0x91, 0xae, 0x94, 0x05, 0x5a, 0x5b,
+ 0x03, 0x91, 0xba, 0x89, 0x00, 0x13, 0x0a, 0x03, 0x91, 0xc0, 0xc8, 0xc3,
+ 0x0b, 0x00, 0xe8, 0xf9, 0xcd, 0x7c, 0xe1, 0x00, 0xe8, 0xf1, 0x97, 0x00,
+ 0xe8, 0xe9, 0x91, 0x00, 0xe8, 0x8a, 0x03, 0x91, 0xc6, 0x00, 0xc3, 0x91,
+ 0xd2, 0xca, 0x4c, 0xfa, 0x05, 0x5a, 0x38, 0xc2, 0x00, 0x56, 0x0e, 0xf8,
+ 0x38, 0xc9, 0x35, 0x23, 0x05, 0x39, 0xf8, 0x46, 0x01, 0xab, 0x43, 0x92,
+ 0x13, 0x42, 0x00, 0xa9, 0xc3, 0x92, 0x1f, 0x05, 0xc3, 0x92, 0x2b, 0x0e,
+ 0xc3, 0x92, 0x49, 0x06, 0xc3, 0x92, 0x5b, 0xcc, 0x57, 0xac, 0x00, 0xec,
+ 0x39, 0xc5, 0x1e, 0x64, 0x00, 0x0f, 0xc9, 0xce, 0x04, 0x59, 0x00, 0x13,
+ 0x99, 0xc5, 0x1f, 0x01, 0x00, 0x07, 0x89, 0xc5, 0x34, 0x21, 0x00, 0x0a,
+ 0x21, 0xce, 0x3b, 0x8c, 0x05, 0x3d, 0x28, 0xd2, 0x4d, 0x94, 0x0e, 0xf9,
+ 0x31, 0x45, 0x01, 0x8e, 0x43, 0x92, 0x67, 0xc3, 0xa2, 0x1f, 0x00, 0x13,
+ 0x63, 0x03, 0x92, 0x79, 0xc2, 0x00, 0x4d, 0x00, 0x0c, 0xd0, 0xcf, 0x63,
+ 0x66, 0x00, 0xf1, 0xe9, 0xc6, 0xbf, 0xd5, 0x00, 0x09, 0xe1, 0xc4, 0x69,
+ 0xdb, 0x00, 0x09, 0xf1, 0xc3, 0x00, 0xd3, 0x00, 0x11, 0xa0, 0xc7, 0x0d,
+ 0xd9, 0x00, 0xf1, 0xbb, 0x03, 0x92, 0x7f, 0x45, 0x02, 0x93, 0x43, 0x92,
+ 0x85, 0xc9, 0xb6, 0x0c, 0x05, 0x3c, 0x70, 0xd4, 0x04, 0x53, 0x0e, 0xf8,
+ 0x28, 0xcb, 0x91, 0x18, 0x00, 0xf4, 0xe9, 0x06, 0x43, 0x92, 0x91, 0xd2,
+ 0x4d, 0x94, 0x0e, 0xf9, 0x29, 0xca, 0x1e, 0xfc, 0x00, 0xeb, 0x68, 0xcf,
+ 0x63, 0x66, 0x00, 0xf0, 0x99, 0xc6, 0xbf, 0xd5, 0x00, 0xf0, 0x89, 0xc4,
+ 0x69, 0xdb, 0x00, 0x08, 0x79, 0xc3, 0x00, 0xd3, 0x00, 0x11, 0x08, 0xc5,
+ 0x00, 0x34, 0x00, 0xf0, 0x69, 0xc5, 0x03, 0x50, 0x00, 0xf0, 0x58, 0xc9,
+ 0xb3, 0x4e, 0x00, 0xec, 0x88, 0xd3, 0x3f, 0xd1, 0x05, 0x3e, 0x41, 0xc5,
+ 0x03, 0x82, 0x00, 0x08, 0x88, 0xc5, 0xd5, 0x3f, 0x00, 0x0c, 0x61, 0xc3,
+ 0x13, 0xc8, 0x00, 0x12, 0xb0, 0x46, 0x01, 0xab, 0x43, 0x92, 0x9d, 0xc7,
+ 0x43, 0xe8, 0x00, 0x15, 0x1b, 0x03, 0x92, 0xa9, 0xca, 0x98, 0xb4, 0x00,
+ 0x0e, 0x30, 0x94, 0x05, 0x5a, 0x43, 0x03, 0x92, 0xaf, 0xc5, 0x44, 0x2d,
+ 0x05, 0x3e, 0x99, 0xc4, 0x99, 0xd6, 0x05, 0x3e, 0xa8, 0x8c, 0x00, 0x11,
+ 0xbb, 0x03, 0x92, 0xb5, 0x8b, 0x00, 0x09, 0x88, 0xc6, 0xbf, 0xd5, 0x00,
+ 0x07, 0x3b, 0x03, 0x92, 0xbe, 0xc9, 0x07, 0x97, 0x00, 0x08, 0x49, 0xc4,
+ 0x69, 0xdb, 0x00, 0x08, 0x69, 0xc3, 0x00, 0xd3, 0x00, 0x12, 0xa8, 0xca,
+ 0xa6, 0xdc, 0x05, 0x5a, 0xa9, 0xca, 0x4a, 0x72, 0x05, 0x5a, 0xa0, 0xc4,
+ 0x73, 0xe1, 0x00, 0x13, 0xb9, 0xc5, 0x24, 0x65, 0x00, 0x14, 0xd0, 0xc2,
+ 0x01, 0x00, 0x00, 0x0d, 0x0b, 0x03, 0x92, 0xc4, 0xc8, 0xa1, 0x8e, 0x00,
+ 0xf6, 0x78, 0xc9, 0x07, 0x97, 0x00, 0x07, 0xa3, 0x03, 0x92, 0xca, 0xc4,
+ 0x69, 0xdb, 0x00, 0x0e, 0x90, 0x11, 0xc3, 0x92, 0xd0, 0xc8, 0x1e, 0x8a,
+ 0x00, 0x07, 0xb2, 0x03, 0x92, 0xdc, 0x45, 0x00, 0x39, 0x43, 0x92, 0xe2,
+ 0x45, 0x00, 0x39, 0x43, 0x92, 0xee, 0xca, 0xa4, 0x98, 0x00, 0x0f, 0xf8,
+ 0xd1, 0x56, 0xb9, 0x05, 0x3a, 0x59, 0xc2, 0x00, 0x11, 0x05, 0x3a, 0x69,
+ 0xcd, 0x7d, 0x63, 0x01, 0x63, 0xd0, 0xcb, 0x9c, 0x9c, 0x00, 0x0f, 0x68,
+ 0x46, 0x01, 0xab, 0xc3, 0x93, 0x00, 0x87, 0x05, 0x5b, 0x10, 0xd4, 0x04,
+ 0x53, 0x00, 0xec, 0x98, 0xd3, 0x45, 0x4f, 0x0e, 0xf8, 0xd0, 0x11, 0xc3,
+ 0x93, 0x0c, 0xc8, 0x1e, 0x8a, 0x00, 0x07, 0x7a, 0x03, 0x93, 0x18, 0xc6,
+ 0x00, 0x33, 0x00, 0xf1, 0x68, 0xc9, 0x07, 0x97, 0x00, 0x07, 0x71, 0xc4,
+ 0x69, 0xdb, 0x00, 0x0e, 0x80, 0xcc, 0x26, 0x18, 0x00, 0xeb, 0xe0, 0x89,
+ 0x00, 0xeb, 0xc9, 0x88, 0x05, 0x3b, 0xe1, 0x94, 0x05, 0x3c, 0x19, 0x95,
+ 0x05, 0x3c, 0x29, 0x96, 0x05, 0x3c, 0x39, 0x86, 0x05, 0x3b, 0xd0, 0xc5,
+ 0xdd, 0x2e, 0x05, 0x5b, 0x21, 0xc2, 0x09, 0x06, 0x05, 0x5a, 0x00, 0x45,
+ 0x01, 0xac, 0xc3, 0x93, 0x1e, 0xc8, 0x0f, 0xda, 0x00, 0x0d, 0xc8, 0xcc,
+ 0x57, 0xac, 0x0e, 0xf8, 0xa1, 0xcc, 0x4b, 0x48, 0x05, 0x59, 0xe0, 0xca,
+ 0xaa, 0xa6, 0x0e, 0xf8, 0x5b, 0x03, 0x93, 0x34, 0xce, 0x04, 0x59, 0x00,
+ 0xec, 0xc1, 0xcc, 0x57, 0xac, 0x00, 0xec, 0x59, 0xc4, 0x00, 0xd2, 0x00,
+ 0x14, 0x30, 0xc9, 0x07, 0x97, 0x00, 0x07, 0x53, 0x03, 0x93, 0x3a, 0xc6,
+ 0xbf, 0xd5, 0x00, 0x11, 0x4b, 0x03, 0x93, 0x3e, 0xc4, 0x69, 0xdb, 0x00,
+ 0x08, 0xd8, 0xc6, 0x00, 0x33, 0x00, 0xf0, 0xd8, 0x11, 0xc3, 0x93, 0x44,
+ 0xc8, 0x1e, 0x8a, 0x00, 0x07, 0x58, 0x45, 0x00, 0x39, 0x43, 0x93, 0x50,
+ 0xc6, 0x03, 0x4f, 0x00, 0xf7, 0xb8, 0x43, 0x0b, 0xf9, 0xc3, 0x93, 0x5c,
+ 0xc8, 0x1e, 0x8a, 0x00, 0x07, 0xf8, 0xce, 0x38, 0x03, 0x05, 0x5a, 0xd1,
+ 0xc5, 0x03, 0x82, 0x00, 0x12, 0x78, 0x98, 0x00, 0xf7, 0xe9, 0xc2, 0x00,
+ 0x30, 0x00, 0xf7, 0xd8, 0xc5, 0x00, 0x34, 0x00, 0xf2, 0x19, 0xc5, 0x03,
+ 0x50, 0x00, 0xf2, 0x08, 0x42, 0x03, 0x76, 0xc3, 0x93, 0x68, 0x06, 0xc3,
+ 0x93, 0x77, 0xc6, 0x61, 0xbc, 0x00, 0x0b, 0x5b, 0x03, 0x93, 0x84, 0xc5,
+ 0x1f, 0x01, 0x00, 0x0b, 0x4b, 0x03, 0x93, 0x8a, 0x05, 0xc3, 0x93, 0x8e,
+ 0x14, 0xc3, 0x93, 0x9d, 0xc9, 0x4d, 0x9d, 0x05, 0x5a, 0x91, 0x15, 0xc3,
+ 0x93, 0xa9, 0xc5, 0x1e, 0x64, 0x00, 0x07, 0xc9, 0xc5, 0x34, 0x21, 0x00,
+ 0x07, 0xd1, 0xc5, 0x1f, 0x9c, 0x00, 0x0b, 0x69, 0xc6, 0xd1, 0x0c, 0x00,
+ 0x0b, 0x99, 0xce, 0x1f, 0xa7, 0x00, 0x10, 0xb8, 0xd5, 0x37, 0xfc, 0x05,
+ 0x5a, 0x78, 0xc5, 0x1f, 0x9c, 0x00, 0x08, 0x1b, 0x03, 0x93, 0xb5, 0x05,
+ 0xc3, 0x93, 0xbb, 0xca, 0xa1, 0x8c, 0x00, 0xf5, 0x19, 0x06, 0xc3, 0x93,
+ 0xca, 0x14, 0xc3, 0x93, 0xd7, 0xce, 0x1f, 0xa7, 0x00, 0x10, 0x19, 0xc5,
+ 0x1e, 0x64, 0x00, 0x07, 0x01, 0xc5, 0x34, 0x21, 0x00, 0x07, 0x09, 0xc5,
+ 0x1f, 0x01, 0x00, 0x07, 0x19, 0xc6, 0x61, 0xbc, 0x00, 0x08, 0x09, 0xc6,
+ 0xd1, 0x0c, 0x00, 0x08, 0x29, 0xc6, 0x03, 0x81, 0x01, 0x63, 0x28, 0xc5,
+ 0x34, 0x21, 0x00, 0x0f, 0xe9, 0xc6, 0x61, 0xbc, 0x00, 0x0f, 0x18, 0x43,
+ 0x0b, 0xf9, 0xc3, 0x93, 0xe1, 0xc8, 0x1e, 0x8a, 0x00, 0xf4, 0x28, 0xc6,
+ 0xbf, 0xd5, 0x00, 0xf1, 0x49, 0xc9, 0x07, 0x97, 0x00, 0x09, 0x29, 0xc4,
+ 0x69, 0xdb, 0x00, 0x10, 0xf8, 0xc8, 0x1e, 0x8a, 0x00, 0xf1, 0x39, 0x43,
+ 0x0b, 0xf9, 0xc3, 0x93, 0xed, 0xc8, 0x21, 0xcc, 0x01, 0x63, 0x40, 0x43,
+ 0x0b, 0xf9, 0xc3, 0x93, 0xf9, 0xc8, 0x21, 0xcc, 0x01, 0x63, 0x60, 0xc9,
+ 0x07, 0x97, 0x00, 0xf4, 0x89, 0xc3, 0x00, 0xd3, 0x00, 0x14, 0x89, 0xc4,
+ 0x69, 0xdb, 0x00, 0x0b, 0xf0, 0xc5, 0x03, 0x82, 0x00, 0x0d, 0xb1, 0xc9,
+ 0xb3, 0x96, 0x00, 0x12, 0x00, 0xc8, 0x1e, 0x8a, 0x00, 0xf4, 0x69, 0xc8,
+ 0x18, 0x1a, 0x00, 0xf4, 0x58, 0xcb, 0x95, 0xbc, 0x05, 0x5a, 0xbb, 0x03,
+ 0x94, 0x05, 0xcc, 0x4a, 0x70, 0x05, 0x5a, 0xb0, 0xc8, 0x0d, 0xd8, 0x00,
+ 0xf3, 0xf9, 0xce, 0x3c, 0x2c, 0x05, 0x3a, 0xf8, 0xc5, 0x03, 0x82, 0x00,
+ 0xeb, 0xeb, 0x03, 0x94, 0x09, 0xcc, 0x89, 0x94, 0x05, 0x3a, 0xa8, 0xc8,
+ 0x0d, 0xd8, 0x00, 0xf1, 0x99, 0xce, 0x3c, 0x2c, 0x05, 0x3a, 0x19, 0xc8,
+ 0x21, 0xcc, 0x01, 0x63, 0x50, 0xd4, 0x3c, 0x26, 0x05, 0x3a, 0x28, 0xc6,
+ 0xbf, 0xd5, 0x00, 0x09, 0xb9, 0xc4, 0x69, 0xdb, 0x00, 0x0f, 0x48, 0xc9,
+ 0x07, 0x97, 0x00, 0x08, 0xe9, 0xc6, 0xbf, 0xd5, 0x00, 0x09, 0x19, 0xc4,
+ 0x69, 0xdb, 0x00, 0x0f, 0x38, 0xc5, 0x00, 0x34, 0x00, 0xf0, 0x29, 0xc5,
+ 0x03, 0x50, 0x00, 0xf0, 0x18, 0x87, 0x05, 0x59, 0x99, 0xc5, 0xdd, 0x2e,
+ 0x05, 0x59, 0x81, 0x91, 0x00, 0x13, 0xa8, 0xcc, 0x26, 0x18, 0x05, 0x59,
+ 0xf0, 0xcb, 0x4b, 0x49, 0x00, 0x14, 0xe9, 0xc9, 0x07, 0x97, 0x00, 0x09,
+ 0xa9, 0xc4, 0x69, 0xdb, 0x00, 0x0f, 0x80, 0xc5, 0x45, 0x70, 0x00, 0x12,
+ 0x58, 0xc5, 0x00, 0x34, 0x00, 0xf7, 0xa1, 0xc5, 0x03, 0x50, 0x00, 0xf4,
+ 0x70, 0xc2, 0x01, 0x00, 0x00, 0x0d, 0x7b, 0x03, 0x94, 0x0f, 0xc8, 0xa1,
+ 0x8e, 0x00, 0xf7, 0x30, 0x11, 0xc3, 0x94, 0x15, 0xc8, 0x1e, 0x8a, 0x00,
+ 0x06, 0xe2, 0x03, 0x94, 0x21, 0xce, 0x74, 0xf4, 0x00, 0xf3, 0xd0, 0x00,
+ 0x43, 0x94, 0x25, 0xc9, 0x07, 0x97, 0x00, 0x06, 0xdb, 0x03, 0x94, 0x31,
+ 0xc4, 0x69, 0xdb, 0x00, 0x0e, 0x98, 0x45, 0x00, 0x39, 0x43, 0x94, 0x37,
+ 0x45, 0x00, 0x39, 0x43, 0x94, 0x55, 0x42, 0x00, 0xd0, 0xc3, 0x94, 0x73,
+ 0x45, 0x02, 0x93, 0x43, 0x94, 0x82, 0xcb, 0x9c, 0x9c, 0x00, 0x11, 0x50,
+ 0x45, 0x00, 0x39, 0x43, 0x94, 0x8e, 0xc9, 0x1e, 0x89, 0x00, 0xf2, 0x71,
+ 0xc5, 0x34, 0x21, 0x00, 0xf2, 0x61, 0xc6, 0x61, 0xbc, 0x00, 0x11, 0x60,
+ 0x42, 0x00, 0xd0, 0xc3, 0x94, 0x9a, 0xca, 0x1e, 0x5f, 0x00, 0x10, 0x40,
+ 0xca, 0xa4, 0x98, 0x00, 0xf1, 0x70, 0x00, 0x43, 0x94, 0xa6, 0xca, 0xa4,
+ 0x70, 0x00, 0xf0, 0xe0, 0x42, 0x00, 0xd0, 0xc3, 0x94, 0xb2, 0xca, 0x1e,
+ 0x5f, 0x00, 0x10, 0x20, 0xc5, 0x34, 0x21, 0x00, 0xf0, 0xb1, 0xc5, 0x1e,
+ 0x64, 0x00, 0xf0, 0xa0, 0xc9, 0x0d, 0xd7, 0x00, 0xf5, 0xb1, 0xc5, 0x1f,
+ 0x01, 0x00, 0xf5, 0xa1, 0xca, 0xa1, 0x8c, 0x00, 0xf5, 0x91, 0xc5, 0x1e,
+ 0x64, 0x00, 0xf5, 0x81, 0xc5, 0x34, 0x21, 0x00, 0xf5, 0x70, 0x45, 0x00,
+ 0x39, 0x43, 0x94, 0xbe, 0x42, 0x00, 0xd0, 0xc3, 0x94, 0xdc, 0xca, 0x1e,
+ 0x5f, 0x00, 0x10, 0x00, 0xcb, 0x9c, 0x9c, 0x00, 0x0e, 0xf0, 0xca, 0xa4,
+ 0x98, 0x00, 0x0f, 0xd0, 0xce, 0x18, 0x14, 0x00, 0xf3, 0x40, 0xce, 0x18,
+ 0x14, 0x00, 0xf3, 0x30, 0xc5, 0x00, 0x34, 0x00, 0xf4, 0x91, 0xc5, 0x03,
+ 0x50, 0x00, 0x0b, 0xd8, 0xc5, 0x00, 0x34, 0x00, 0xf4, 0x41, 0xc5, 0x03,
+ 0x50, 0x00, 0xf4, 0x30, 0xc5, 0x00, 0x34, 0x00, 0xf3, 0x61, 0xc5, 0x03,
+ 0x50, 0x00, 0xf3, 0x50, 0x42, 0x00, 0xd0, 0xc3, 0x94, 0xeb, 0xca, 0x1e,
+ 0x5f, 0x00, 0x10, 0x80, 0xc6, 0xbf, 0xd5, 0x00, 0x0a, 0xb1, 0xc4, 0x69,
+ 0xdb, 0x00, 0x0a, 0xc0, 0xd2, 0x21, 0xc2, 0x05, 0x3a, 0x80, 0xc5, 0x00,
+ 0x34, 0x00, 0xf2, 0x31, 0xc5, 0x03, 0x50, 0x00, 0xf2, 0x20, 0xcb, 0x9c,
+ 0x9c, 0x00, 0xf1, 0xc0, 0xc5, 0x00, 0x34, 0x00, 0xf1, 0x21, 0xc5, 0x03,
+ 0x50, 0x00, 0xf1, 0x10, 0xcb, 0x98, 0xb3, 0x00, 0x0e, 0x28, 0xca, 0xa4,
+ 0x98, 0x00, 0xf0, 0x40, 0xd0, 0x5b, 0x9f, 0x0f, 0xc1, 0x89, 0xcb, 0x5b,
+ 0xa4, 0x0f, 0xc1, 0x69, 0xca, 0xa0, 0xce, 0x0f, 0xc1, 0x49, 0x49, 0xb3,
+ 0x57, 0xc3, 0x94, 0xfa, 0xd8, 0x26, 0x3c, 0x01, 0x5b, 0xd9, 0xcc, 0x86,
+ 0x70, 0x0f, 0xc1, 0x09, 0xcc, 0x85, 0x98, 0x0f, 0xc1, 0x28, 0xe0, 0x00,
+ 0x67, 0x01, 0x5c, 0x08, 0xc6, 0x46, 0x1a, 0x07, 0xd9, 0x69, 0xc7, 0x46,
+ 0x19, 0x07, 0xd9, 0x60, 0x92, 0x00, 0x89, 0x91, 0xc8, 0xba, 0x3b, 0x00,
+ 0x89, 0x98, 0x43, 0x03, 0x49, 0xc3, 0x95, 0x06, 0x95, 0x00, 0x89, 0x09,
+ 0xc8, 0xbe, 0xbb, 0x00, 0x89, 0x11, 0xc7, 0xca, 0x5b, 0x00, 0x89, 0x21,
+ 0xc7, 0xca, 0x1c, 0x00, 0x89, 0x30, 0xc3, 0x3b, 0xc9, 0x00, 0x89, 0xe1,
+ 0x44, 0x5d, 0x46, 0x43, 0x95, 0x12, 0xc4, 0xb1, 0xd8, 0x00, 0x8a, 0x71,
+ 0xc6, 0xb1, 0xd7, 0x00, 0x8a, 0xa0, 0xc4, 0x95, 0xb8, 0x00, 0x89, 0xf9,
+ 0xc5, 0xc3, 0xe9, 0x00, 0x8a, 0x88, 0x91, 0x00, 0x8c, 0xf8, 0x87, 0x00,
+ 0x8c, 0x28, 0x95, 0x00, 0x8d, 0x51, 0xc8, 0xbe, 0xbb, 0x00, 0x8e, 0x20,
+ 0xc4, 0x68, 0xc6, 0x00, 0x8f, 0x31, 0xc6, 0xae, 0x80, 0x00, 0x8f, 0xa0,
+ 0x95, 0x00, 0x8d, 0xd9, 0xc8, 0xbe, 0xbb, 0x06, 0xbf, 0x61, 0xc8, 0xc2,
+ 0x1b, 0x06, 0xbf, 0x68, 0x8e, 0x00, 0x8e, 0x81, 0xc9, 0xae, 0x8f, 0x06,
+ 0xbe, 0xb8, 0x96, 0x06, 0xbe, 0x69, 0xc7, 0xcc, 0x3e, 0x06, 0xbe, 0x70,
+ 0xc5, 0x7f, 0x3f, 0x00, 0x8f, 0x39, 0xcc, 0x68, 0xbe, 0x06, 0xbf, 0x58,
+ 0xc5, 0xba, 0x7e, 0x00, 0x8f, 0x41, 0xc6, 0xae, 0x6e, 0x06, 0xbf, 0x88,
+ 0xc4, 0x68, 0xc6, 0x00, 0x8f, 0x51, 0xc6, 0xb2, 0x8b, 0x06, 0xbf, 0x70,
+ 0xc4, 0xb1, 0xd8, 0x06, 0xbf, 0x79, 0xc6, 0xb1, 0xd7, 0x06, 0xbf, 0x80,
+ 0x95, 0x06, 0xbe, 0x81, 0xc8, 0xbe, 0xbb, 0x06, 0xbe, 0x88, 0xc4, 0xb1,
+ 0xd8, 0x06, 0xbe, 0x91, 0xc6, 0xb1, 0xd7, 0x06, 0xbe, 0x98, 0x8d, 0x00,
+ 0x8d, 0x81, 0xc6, 0xd9, 0x4c, 0x00, 0x8e, 0x61, 0xc7, 0xcf, 0xa2, 0x06,
+ 0xbe, 0xb0, 0x92, 0x00, 0x8d, 0x91, 0xc6, 0xd7, 0xf6, 0x00, 0x8e, 0x99,
+ 0xc7, 0xc5, 0x8b, 0x00, 0x8e, 0xa1, 0xc8, 0xba, 0xa3, 0x06, 0xbe, 0xc9,
+ 0xc9, 0xb8, 0x04, 0x06, 0xbe, 0xd0, 0xc4, 0x95, 0xb8, 0x00, 0x8e, 0xb1,
+ 0xc6, 0x7f, 0x3e, 0x06, 0xbe, 0xa8, 0x96, 0x00, 0x8e, 0x71, 0xc7, 0xcc,
+ 0x7d, 0x00, 0x8e, 0x78, 0x8a, 0x00, 0x8e, 0xc9, 0xc7, 0xc4, 0x03, 0x06,
+ 0xbe, 0xe0, 0x92, 0x00, 0x8e, 0xe1, 0xc6, 0xd7, 0xf6, 0x06, 0xbf, 0x08,
+ 0xc4, 0x95, 0xb8, 0x00, 0x8e, 0xf1, 0xc5, 0xc4, 0x44, 0x06, 0xbe, 0xf8,
+ 0x95, 0x00, 0x8d, 0xc9, 0xc8, 0xc2, 0x1b, 0x06, 0xbf, 0x38, 0x8e, 0x00,
+ 0x8f, 0x19, 0xc9, 0xae, 0x7d, 0x06, 0xbf, 0x20, 0xc4, 0xb1, 0xd8, 0x06,
+ 0xbf, 0x41, 0xc6, 0xb1, 0xd7, 0x06, 0xbf, 0x48, 0xc5, 0x7f, 0x3f, 0x00,
+ 0x8f, 0x61, 0xc6, 0xae, 0x92, 0x00, 0x8f, 0x78, 0xca, 0x7f, 0x3a, 0x00,
+ 0x8f, 0x69, 0xc3, 0x3b, 0xc9, 0x00, 0x8f, 0x88, 0xc8, 0x4f, 0xa2, 0x0f,
+ 0x64, 0x81, 0xc7, 0x0d, 0x7f, 0x0f, 0x64, 0x38, 0xc8, 0x4f, 0xa2, 0x0f,
+ 0x64, 0x79, 0xc7, 0x0d, 0x7f, 0x0f, 0x64, 0x30, 0xc8, 0x4f, 0xa2, 0x0f,
+ 0x64, 0x71, 0xc7, 0x0d, 0x7f, 0x0f, 0x64, 0x28, 0xc8, 0x4f, 0xa2, 0x0f,
+ 0x64, 0x69, 0xc7, 0x0d, 0x7f, 0x0f, 0x64, 0x20, 0x91, 0x01, 0x9f, 0x09,
+ 0x07, 0x43, 0x95, 0x1e, 0xc3, 0x05, 0xdf, 0x01, 0x9f, 0x11, 0x43, 0x0d,
+ 0x89, 0x43, 0x95, 0x2a, 0xc4, 0x12, 0xf2, 0x01, 0x9f, 0x68, 0xc2, 0x00,
+ 0x29, 0x01, 0x9f, 0x21, 0xc5, 0x12, 0xf1, 0x01, 0x9f, 0x70, 0xc4, 0x12,
+ 0xf2, 0x01, 0x9f, 0x78, 0xc4, 0x12, 0xf2, 0x01, 0x9f, 0x80, 0xc3, 0x03,
+ 0x2c, 0x01, 0x9f, 0x88, 0xc3, 0x25, 0x4e, 0x01, 0x9b, 0x21, 0x42, 0x00,
+ 0xb5, 0x43, 0x95, 0x3a, 0xd2, 0x4d, 0x28, 0x0f, 0xd0, 0x31, 0xce, 0x29,
+ 0x29, 0x0f, 0xd0, 0x69, 0xdf, 0x0d, 0x7b, 0x0f, 0xd0, 0xd9, 0x16, 0x43,
+ 0x95, 0x50, 0xce, 0x3c, 0x9e, 0x01, 0x2f, 0x91, 0xcd, 0x0b, 0x14, 0x01,
+ 0x2f, 0x88, 0xce, 0x6f, 0x1a, 0x0f, 0xb1, 0x81, 0xc8, 0xb9, 0x83, 0x0f,
+ 0xc9, 0x70, 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0x98, 0xc9, 0x4f, 0xa1, 0x08,
+ 0x4f, 0x90, 0xc7, 0x0d, 0x7f, 0x08, 0x4e, 0xb3, 0x03, 0x95, 0x5c, 0xc8,
+ 0x4f, 0xa2, 0x08, 0x4e, 0xf8, 0xc7, 0x0d, 0x7f, 0x08, 0x4e, 0xab, 0x03,
+ 0x95, 0x62, 0xc8, 0x4f, 0xa2, 0x08, 0x4e, 0xf0, 0xc7, 0x0d, 0x7f, 0x08,
+ 0x4e, 0xa3, 0x03, 0x95, 0x68, 0xc8, 0x4f, 0xa2, 0x08, 0x4e, 0xe8, 0xc7,
+ 0x0d, 0x7f, 0x08, 0x4e, 0x9b, 0x03, 0x95, 0x6e, 0xc8, 0x4f, 0xa2, 0x08,
+ 0x4e, 0xe0, 0x15, 0xc3, 0x95, 0x74, 0x1b, 0xc3, 0x95, 0x89, 0x87, 0x0f,
+ 0x02, 0x43, 0x03, 0x95, 0x96, 0x12, 0xc3, 0x95, 0xad, 0x04, 0xc3, 0x95,
+ 0xb7, 0xc2, 0x1c, 0x3e, 0x0f, 0x02, 0x69, 0x06, 0xc3, 0x95, 0xc7, 0xc3,
+ 0x27, 0xc3, 0x0f, 0x02, 0x29, 0x16, 0xc3, 0x95, 0xd1, 0xc2, 0x01, 0x0e,
+ 0x0f, 0x02, 0x19, 0x97, 0x0f, 0x02, 0x09, 0x91, 0x0f, 0x01, 0xe1, 0xc2,
+ 0x06, 0x6b, 0x0f, 0x01, 0xc9, 0x8b, 0x0f, 0x01, 0xc3, 0x03, 0x95, 0xdb,
+ 0x1c, 0xc3, 0x95, 0xdf, 0xc3, 0xc8, 0xfd, 0x0f, 0x01, 0x99, 0xc2, 0x00,
+ 0x29, 0x0f, 0x01, 0x91, 0x83, 0x0f, 0x01, 0x80, 0x90, 0x00, 0xe9, 0xd9,
+ 0x87, 0x00, 0xe9, 0x90, 0x98, 0x00, 0xed, 0xd1, 0x8f, 0x00, 0xea, 0xd3,
+ 0x03, 0x95, 0xe9, 0x8a, 0x00, 0xed, 0x19, 0x83, 0x00, 0xea, 0x23, 0x03,
+ 0x95, 0xef, 0x8b, 0x00, 0xea, 0x71, 0xc6, 0x24, 0x64, 0x00, 0xea, 0x61,
+ 0x99, 0x05, 0x5b, 0x49, 0x94, 0x00, 0x15, 0xa3, 0x03, 0x95, 0xf9, 0x9b,
+ 0x08, 0x3d, 0x02, 0x03, 0x95, 0xff, 0xcc, 0x57, 0xac, 0x00, 0xed, 0xa9,
+ 0xce, 0x04, 0x59, 0x08, 0x3d, 0x78, 0xd4, 0x04, 0x53, 0x08, 0x3d, 0x68,
+ 0xc4, 0x00, 0xd2, 0x00, 0xed, 0xe9, 0xce, 0x04, 0x59, 0x00, 0xed, 0xe0,
+ 0xc4, 0x04, 0x63, 0x00, 0xed, 0xc9, 0xca, 0xa5, 0x1a, 0x08, 0x3d, 0x80,
+ 0x97, 0x00, 0xed, 0xc1, 0x90, 0x00, 0xed, 0x81, 0x8e, 0x00, 0xed, 0x5b,
+ 0x03, 0x96, 0x05, 0x8b, 0x00, 0xed, 0x33, 0x03, 0x96, 0x0b, 0x84, 0x08,
+ 0x3c, 0x21, 0xc2, 0x0e, 0x30, 0x08, 0x3c, 0x01, 0x9b, 0x08, 0x3d, 0x91,
+ 0x89, 0x08, 0x3c, 0x93, 0x03, 0x96, 0x17, 0x8a, 0x08, 0x3c, 0xb1, 0xc2,
+ 0x09, 0x06, 0x08, 0x3d, 0x19, 0x94, 0x08, 0x3d, 0x50, 0xcf, 0x6b, 0x5e,
+ 0x08, 0x3c, 0x79, 0xc5, 0xa2, 0x81, 0x08, 0x3d, 0x20, 0xc3, 0x02, 0x1d,
+ 0x00, 0xed, 0xb1, 0xce, 0x6c, 0xc0, 0x05, 0x5a, 0xf8, 0xc4, 0x04, 0x63,
+ 0x00, 0xed, 0x99, 0xc4, 0x00, 0xd2, 0x08, 0x3d, 0xd0, 0xc6, 0xbd, 0x35,
+ 0x00, 0xed, 0x11, 0xc3, 0x6e, 0x0d, 0x00, 0xea, 0x50, 0xcc, 0x57, 0xac,
+ 0x00, 0xed, 0x51, 0xce, 0x04, 0x59, 0x00, 0xed, 0x4b, 0x03, 0x96, 0x1d,
+ 0xcc, 0x1e, 0xfa, 0x05, 0x5a, 0xf1, 0xcf, 0x65, 0x37, 0x05, 0x5a, 0xe9,
+ 0xc4, 0xa2, 0x1e, 0x08, 0x3c, 0xd8, 0xd4, 0x04, 0x53, 0x08, 0x3c, 0xf8,
+ 0xc9, 0x1e, 0x92, 0x08, 0x3c, 0xc0, 0xc3, 0x86, 0xc6, 0x00, 0xea, 0xf9,
+ 0xca, 0xa4, 0x48, 0x08, 0x3c, 0x50, 0xc4, 0x04, 0x63, 0x08, 0x3c, 0x63,
+ 0x03, 0x96, 0x23, 0xc4, 0x13, 0xc7, 0x08, 0x3c, 0x58, 0x46, 0x01, 0xab,
+ 0x43, 0x96, 0x29, 0xc6, 0x24, 0x64, 0x00, 0xec, 0xf9, 0x87, 0x08, 0x3c,
+ 0x71, 0xcc, 0x26, 0x0c, 0x00, 0x17, 0x20, 0xc4, 0x13, 0xc7, 0x08, 0x3d,
+ 0x41, 0xc8, 0x64, 0x99, 0x08, 0x3d, 0x48, 0xc3, 0x1b, 0x75, 0x00, 0xeb,
+ 0x01, 0xc5, 0x4f, 0xcc, 0x00, 0xea, 0xf0, 0x91, 0x00, 0xea, 0x99, 0x87,
+ 0x00, 0xea, 0x58, 0xca, 0x1e, 0x1b, 0x08, 0x3c, 0xb8, 0xc4, 0x04, 0x63,
+ 0x00, 0x15, 0x89, 0xc6, 0x03, 0x81, 0x08, 0x3c, 0xa8, 0xcc, 0x26, 0x18,
+ 0x08, 0x3d, 0xa0, 0x45, 0x19, 0x82, 0xc3, 0x96, 0x35, 0xcc, 0x3b, 0x74,
+ 0x00, 0x17, 0x78, 0xcf, 0x4a, 0xeb, 0x05, 0x38, 0xa9, 0xc7, 0x09, 0xba,
+ 0x00, 0x17, 0xfa, 0x03, 0x96, 0x41, 0xc7, 0x4a, 0xf3, 0x00, 0x17, 0x41,
+ 0xc4, 0x1f, 0x02, 0x00, 0x17, 0xb8, 0xcd, 0x2f, 0xf2, 0x00, 0x17, 0x91,
+ 0xc2, 0x00, 0x56, 0x00, 0x17, 0x98, 0x47, 0x19, 0x80, 0xc3, 0x96, 0x47,
+ 0xd2, 0x4a, 0xe8, 0x05, 0x38, 0xa1, 0xc8, 0x4a, 0xf2, 0x00, 0x17, 0x38,
+ 0xcc, 0x1e, 0x64, 0x00, 0x17, 0xa1, 0x47, 0x02, 0x91, 0x43, 0x96, 0x53,
+ 0xc8, 0x4a, 0xf2, 0x05, 0x38, 0x41, 0xd2, 0x4a, 0xe8, 0x05, 0x38, 0x68,
+ 0xc8, 0x4a, 0xf2, 0x05, 0x38, 0x61, 0xd2, 0x4a, 0xe8, 0x05, 0x38, 0x88,
+ 0x0f, 0x43, 0x96, 0x5f, 0xc2, 0x02, 0x29, 0x0e, 0xbe, 0x09, 0xc2, 0x00,
+ 0x0a, 0x0e, 0xbd, 0xf9, 0x8b, 0x0e, 0xbd, 0xc8, 0xc2, 0x00, 0x0a, 0x0e,
+ 0xbe, 0x00, 0xc6, 0x12, 0x65, 0x0e, 0xbd, 0xf0, 0xc2, 0x20, 0xa8, 0x0e,
+ 0xbd, 0xe9, 0xc4, 0x8b, 0xed, 0x0e, 0xbd, 0x88, 0xc4, 0x19, 0x8f, 0x0e,
+ 0xbd, 0xe0, 0xca, 0x94, 0x73, 0x0e, 0xbd, 0xd8, 0xc2, 0x03, 0x76, 0x0e,
+ 0xbd, 0xd0, 0x8b, 0x0e, 0xbd, 0xb8, 0x97, 0x0e, 0xbd, 0xb0, 0x97, 0x0e,
+ 0xbd, 0xa8, 0xc4, 0xdc, 0xdf, 0x0e, 0xbd, 0xa0, 0xc4, 0x8f, 0x29, 0x0e,
+ 0xbd, 0x98, 0xc3, 0x00, 0xf2, 0x0e, 0xbd, 0x90, 0xc2, 0x00, 0x44, 0x0e,
+ 0xbd, 0x81, 0xc6, 0x12, 0x65, 0x0e, 0xbd, 0x70, 0xc3, 0x0b, 0x47, 0x0e,
+ 0xbd, 0x78, 0xc4, 0xde, 0x10, 0x0e, 0xbd, 0x68, 0xc4, 0x33, 0x51, 0x0e,
+ 0xbd, 0x60, 0xc3, 0x0b, 0x47, 0x0e, 0xbd, 0x58, 0xc4, 0xdd, 0x2f, 0x0e,
+ 0xbd, 0x50, 0x0f, 0x43, 0x96, 0x6b, 0xc2, 0x02, 0x29, 0x0e, 0xbd, 0x39,
+ 0xc2, 0x00, 0x0a, 0x0e, 0xbd, 0x29, 0x8b, 0x0e, 0xbc, 0xf8, 0xc2, 0x00,
+ 0x0a, 0x0e, 0xbd, 0x30, 0xc6, 0x12, 0x65, 0x0e, 0xbd, 0x20, 0xc2, 0x20,
+ 0xa8, 0x0e, 0xbd, 0x19, 0xc4, 0x8b, 0xed, 0x0e, 0xbc, 0xba, 0x03, 0x96,
+ 0x77, 0xc4, 0x19, 0x8f, 0x0e, 0xbd, 0x10, 0xc2, 0x03, 0x76, 0x0e, 0xbd,
+ 0x00, 0x8b, 0x0e, 0xbc, 0xe8, 0x97, 0x0e, 0xbc, 0xe0, 0x97, 0x0e, 0xbc,
+ 0xd8, 0xc4, 0xdc, 0xdf, 0x0e, 0xbc, 0xd0, 0xc4, 0x8f, 0x29, 0x0e, 0xbc,
+ 0xc8, 0xc3, 0x00, 0xf2, 0x0e, 0xbc, 0xc0, 0xc2, 0x00, 0x44, 0x0e, 0xbc,
+ 0xb1, 0xc6, 0x12, 0x65, 0x0e, 0xbc, 0xa0, 0xc3, 0x0b, 0x47, 0x0e, 0xbc,
+ 0xa8, 0xc4, 0xde, 0x10, 0x0e, 0xbc, 0x98, 0xc4, 0x33, 0x51, 0x0e, 0xbc,
+ 0x90, 0xc3, 0x0b, 0x47, 0x0e, 0xbc, 0x88, 0xc4, 0xdd, 0x2f, 0x0e, 0xbc,
+ 0x80, 0xc3, 0x11, 0xb7, 0x0e, 0xbc, 0x41, 0xc5, 0xdc, 0x84, 0x0e, 0xbb,
+ 0xf0, 0xc3, 0x11, 0xb7, 0x0e, 0xbb, 0x71, 0xc5, 0xdc, 0x84, 0x0e, 0xbb,
+ 0x20, 0xc7, 0x01, 0xb0, 0x0e, 0xbb, 0x38, 0x8e, 0x00, 0x6a, 0xb0, 0xc8,
+ 0xb5, 0x3e, 0x0e, 0x8f, 0x41, 0xc9, 0xb0, 0x24, 0x0e, 0x8f, 0x00, 0x50,
+ 0x5f, 0x6f, 0xc3, 0x96, 0x7d, 0xcb, 0x97, 0x1c, 0x0e, 0x8e, 0xf8, 0xc2,
+ 0x02, 0x6a, 0x0e, 0x8f, 0x29, 0xc4, 0x00, 0x68, 0x0e, 0x8f, 0x20, 0xc5,
+ 0x04, 0x91, 0x0e, 0x8a, 0x39, 0xc5, 0x01, 0x31, 0x0e, 0x8a, 0x30, 0x47,
+ 0xc4, 0x5e, 0xc3, 0x96, 0x89, 0x47, 0xca, 0x4d, 0x43, 0x96, 0x9b, 0x49,
+ 0xb4, 0x9b, 0xc3, 0x96, 0xad, 0x46, 0x66, 0x94, 0x43, 0x96, 0xb9, 0xc4,
+ 0x00, 0x68, 0x0e, 0x89, 0x89, 0xc2, 0x02, 0x6a, 0x0e, 0x89, 0x80, 0xc7,
+ 0xc7, 0x0c, 0x0e, 0x8d, 0x79, 0xc4, 0x00, 0xfa, 0x0e, 0x8d, 0x70, 0xc7,
+ 0xcd, 0xcd, 0x0e, 0x8e, 0xd0, 0xca, 0x6b, 0xc7, 0x0e, 0x8e, 0x5b, 0x03,
+ 0x96, 0xc5, 0xc8, 0x6b, 0xc9, 0x0e, 0x8e, 0x50, 0xc8, 0x6b, 0xc9, 0x0e,
+ 0x8e, 0x3b, 0x03, 0x96, 0xcb, 0xca, 0x6b, 0xc7, 0x0e, 0x8e, 0x40, 0xc2,
+ 0x02, 0x6a, 0x0e, 0x8c, 0xd1, 0xc5, 0x02, 0xe2, 0x0e, 0x8c, 0xc8, 0x56,
+ 0x2c, 0xed, 0xc3, 0x96, 0xd1, 0x4b, 0x9a, 0x3f, 0x43, 0x96, 0xdd, 0xc4,
+ 0x23, 0x1f, 0x0e, 0x8b, 0x11, 0xc4, 0x2d, 0xbe, 0x0e, 0x8a, 0x00, 0xc5,
+ 0xe3, 0x0f, 0x0e, 0x8e, 0xb9, 0xc3, 0x2a, 0xca, 0x0e, 0x8e, 0xa8, 0xc5,
+ 0x04, 0x91, 0x0e, 0x8a, 0xd9, 0xc5, 0x01, 0x31, 0x0e, 0x8a, 0xd0, 0x47,
+ 0x10, 0xa4, 0xc3, 0x96, 0xf5, 0xc8, 0xba, 0xeb, 0x0e, 0x89, 0xa0, 0xc6,
+ 0xd3, 0x76, 0x0e, 0x8e, 0x89, 0xc6, 0xd3, 0x34, 0x0e, 0x8e, 0x80, 0xc8,
+ 0xc0, 0xcb, 0x0e, 0x8c, 0xa9, 0xc5, 0x02, 0xe2, 0x0e, 0x8c, 0xa0, 0xc5,
+ 0xdb, 0xdf, 0x0e, 0x89, 0x01, 0xc4, 0xe6, 0x2b, 0x0e, 0x88, 0xf8, 0xc4,
+ 0x2d, 0xbe, 0x0e, 0x8e, 0x29, 0xc5, 0x04, 0x91, 0x0e, 0x8d, 0xe0, 0x18,
+ 0xc3, 0x97, 0x31, 0xc8, 0xbe, 0x53, 0x0e, 0x88, 0x90, 0xc3, 0x00, 0xdc,
+ 0x0e, 0x88, 0xa9, 0x87, 0x0e, 0x88, 0xa0, 0xcf, 0x6b, 0xc7, 0x0e, 0x8e,
+ 0x11, 0xcd, 0x6b, 0xc9, 0x0e, 0x8e, 0x08, 0xd2, 0x4a, 0xd6, 0x0e, 0x88,
+ 0xe9, 0xcc, 0x89, 0x10, 0x0e, 0x88, 0xc8, 0x4c, 0x84, 0xa8, 0xc3, 0x97,
+ 0x3e, 0xca, 0x42, 0x86, 0x0e, 0x88, 0x10, 0xc5, 0xdb, 0xdf, 0x0e, 0x89,
+ 0x21, 0xc4, 0xe6, 0x2b, 0x0e, 0x89, 0x18, 0xc4, 0x66, 0x0b, 0x0e, 0x8d,
+ 0xa8, 0x9e, 0x0e, 0x8d, 0x29, 0x9d, 0x0e, 0x8d, 0x20, 0xc4, 0x23, 0x1f,
+ 0x0e, 0x8b, 0x21, 0xc4, 0x2d, 0xbe, 0x0e, 0x8a, 0x10, 0x48, 0xc3, 0x33,
+ 0xc3, 0x97, 0x50, 0xc5, 0x05, 0xe2, 0x0e, 0x88, 0x40, 0xc4, 0x37, 0xd2,
+ 0x0e, 0x89, 0x99, 0xc5, 0xa8, 0x6c, 0x0e, 0x89, 0x90, 0xd3, 0x40, 0xdb,
+ 0x0f, 0xd1, 0x91, 0xcf, 0x15, 0xa6, 0x0f, 0xd2, 0x18, 0xd0, 0x3d, 0x06,
+ 0x01, 0x49, 0x71, 0xd0, 0x3d, 0x1a, 0x01, 0x49, 0x88, 0xc6, 0x12, 0x87,
+ 0x01, 0x0f, 0x89, 0xc8, 0xbb, 0xa3, 0x01, 0x0d, 0xc0, 0x46, 0x01, 0xab,
+ 0x43, 0x97, 0x62, 0x46, 0x01, 0xab, 0x43, 0x97, 0x81, 0xc4, 0xea, 0x23,
+ 0x00, 0xff, 0x59, 0x18, 0xc3, 0x97, 0xa5, 0xc6, 0x61, 0xbc, 0x00, 0xff,
+ 0x49, 0x06, 0xc3, 0x97, 0xb1, 0xc5, 0x68, 0x98, 0x00, 0x1c, 0x70, 0xc4,
+ 0xea, 0x23, 0x00, 0xfe, 0xd9, 0x18, 0xc3, 0x97, 0xc0, 0xc6, 0x61, 0xbc,
+ 0x00, 0xfe, 0xc9, 0x06, 0xc3, 0x97, 0xcc, 0xc5, 0xda, 0x86, 0x00, 0xf9,
+ 0xc3, 0x03, 0x97, 0xdb, 0xc5, 0x68, 0x98, 0x00, 0x1c, 0x50, 0x46, 0x01,
+ 0xab, 0x43, 0x97, 0xe1, 0x46, 0x01, 0xab, 0x43, 0x98, 0x00, 0x46, 0x01,
+ 0xab, 0x43, 0x98, 0x24, 0x46, 0x01, 0xab, 0x43, 0x98, 0x47, 0x46, 0x01,
+ 0xab, 0x43, 0x98, 0x72, 0x06, 0xc3, 0x98, 0x96, 0x12, 0xc3, 0x98, 0xa8,
+ 0xc6, 0x61, 0xbc, 0x00, 0xff, 0x09, 0x18, 0xc3, 0x98, 0xb7, 0xc4, 0xea,
+ 0x23, 0x00, 0xfb, 0xd9, 0xc5, 0x68, 0x98, 0x00, 0x1e, 0x68, 0xc5, 0x73,
+ 0xb2, 0x00, 0xff, 0x29, 0xc5, 0xda, 0x86, 0x00, 0xff, 0x20, 0x06, 0xc3,
+ 0x98, 0xc3, 0x12, 0xc3, 0x98, 0xd5, 0xc6, 0x61, 0xbc, 0x00, 0xfe, 0x89,
+ 0x18, 0xc3, 0x98, 0xe4, 0xc4, 0xea, 0x23, 0x00, 0xfb, 0xb9, 0xc5, 0x68,
+ 0x98, 0x00, 0x1d, 0x78, 0x46, 0x01, 0xab, 0x43, 0x98, 0xf0, 0x46, 0x01,
+ 0xab, 0x43, 0x99, 0x1b, 0x46, 0x01, 0xab, 0x43, 0x99, 0x3f, 0xc5, 0x7a,
+ 0xee, 0x00, 0x1e, 0xc9, 0xc5, 0x89, 0x23, 0x00, 0x1b, 0x98, 0x90, 0x00,
+ 0x1f, 0xd9, 0xc3, 0x89, 0x25, 0x00, 0x1f, 0x08, 0xc2, 0x02, 0x29, 0x00,
+ 0xe9, 0x51, 0x8b, 0x00, 0xe9, 0x40, 0xc3, 0x01, 0x1e, 0x08, 0x0a, 0x09,
+ 0x47, 0x0d, 0x80, 0x43, 0x99, 0x6b, 0xc7, 0xb9, 0x94, 0x08, 0x0a, 0x69,
+ 0xc7, 0x63, 0x9b, 0x08, 0x0a, 0xa0, 0x00, 0x43, 0x99, 0x77, 0x00, 0x43,
+ 0x99, 0x8a, 0xc6, 0xb9, 0x95, 0x08, 0x0a, 0x49, 0xcf, 0x63, 0x93, 0x08,
+ 0x0a, 0xa8, 0x00, 0x43, 0x99, 0x94, 0xc2, 0x0a, 0x20, 0x08, 0x0a, 0xe1,
+ 0xc2, 0x01, 0x04, 0x08, 0x0b, 0x21, 0x0a, 0x43, 0x99, 0xa0, 0xc3, 0x41,
+ 0xca, 0x08, 0x0b, 0x49, 0x43, 0x03, 0x53, 0x43, 0x99, 0xac, 0xc2, 0x00,
+ 0x29, 0x08, 0x0a, 0xfb, 0x03, 0x99, 0xb8, 0xc3, 0x41, 0xca, 0x08, 0x0b,
+ 0x32, 0x03, 0x99, 0xbe, 0xcf, 0x65, 0xdc, 0x08, 0x0b, 0x08, 0xd3, 0x45,
+ 0x62, 0x08, 0x78, 0xe0, 0xd3, 0x45, 0x62, 0x08, 0x78, 0xb8, 0xd3, 0x45,
+ 0x62, 0x08, 0x78, 0x80, 0xc3, 0xe2, 0x62, 0x08, 0x78, 0xa9, 0xc4, 0xdd,
+ 0x34, 0x08, 0x78, 0x88, 0xcc, 0x8d, 0xd8, 0x08, 0x78, 0x99, 0xc3, 0x32,
+ 0xad, 0x08, 0x78, 0x00, 0x83, 0x08, 0x1e, 0x43, 0x03, 0x99, 0xc4, 0xc3,
+ 0xeb, 0x40, 0x08, 0x1e, 0x48, 0x46, 0xd3, 0xdc, 0x43, 0x99, 0xca, 0xc2,
+ 0x07, 0x69, 0x08, 0x1e, 0x70, 0x91, 0x08, 0x1e, 0x91, 0xc4, 0x15, 0xa9,
+ 0x08, 0x1e, 0xa0, 0xc7, 0xc6, 0xa3, 0x0e, 0x7d, 0xf1, 0x44, 0xe6, 0x63,
+ 0xc3, 0x99, 0xd4, 0xc9, 0x92, 0xb1, 0x0e, 0x7d, 0xb0, 0xd0, 0x5d, 0x0f,
+ 0x0e, 0x7d, 0x21, 0xd0, 0x2c, 0x43, 0x0e, 0x7d, 0x08, 0xcb, 0x91, 0x70,
+ 0x0e, 0x7c, 0x79, 0xc7, 0x80, 0xcc, 0x0e, 0x7c, 0x48, 0x87, 0x00, 0xb3,
+ 0x50, 0x87, 0x00, 0xb1, 0xb8, 0x8b, 0x00, 0xa7, 0x08, 0x91, 0x00, 0xa7,
+ 0x28, 0x83, 0x00, 0xa7, 0x48, 0x8b, 0x00, 0xa2, 0xe0, 0x91, 0x00, 0xa3,
+ 0x00, 0x83, 0x00, 0xa3, 0x20, 0x83, 0x00, 0xa9, 0xe0, 0x91, 0x00, 0xa9,
+ 0xc0, 0x8b, 0x00, 0xa9, 0xa0, 0x83, 0x00, 0xa9, 0x20, 0x8b, 0x00, 0xa8,
+ 0xe0, 0x91, 0x00, 0xa9, 0x00, 0x83, 0x00, 0xa8, 0x18, 0x8b, 0x00, 0xa7,
+ 0xd8, 0x91, 0x00, 0xa7, 0xf8, 0x83, 0x00, 0xa2, 0x38, 0x91, 0x00, 0xa2,
+ 0x18, 0x8b, 0x00, 0xa1, 0xf8, 0x8b, 0x00, 0xa5, 0x88, 0x91, 0x00, 0xa5,
+ 0xa8, 0x83, 0x00, 0xa5, 0xc8, 0x83, 0x00, 0xb3, 0xe8, 0x91, 0x00, 0xb3,
+ 0xd8, 0x8b, 0x00, 0xb3, 0xc8, 0x43, 0x00, 0x3b, 0xc3, 0x99, 0xe1, 0xc4,
+ 0x03, 0x51, 0x00, 0x1a, 0x80, 0x96, 0x01, 0x66, 0xa8, 0x96, 0x01, 0x66,
+ 0xa0, 0xcd, 0x0c, 0x95, 0x01, 0x92, 0x49, 0x87, 0x01, 0x92, 0x88, 0xc2,
+ 0x0a, 0x20, 0x01, 0x92, 0x91, 0xc4, 0x05, 0xde, 0x01, 0x92, 0x98, 0xc3,
+ 0x08, 0xde, 0x01, 0x92, 0xa1, 0xc3, 0x0d, 0x8f, 0x01, 0x92, 0xa8, 0xc2,
+ 0x22, 0x45, 0x01, 0x92, 0xb1, 0xc4, 0x15, 0xa7, 0x01, 0x92, 0xb8, 0xcd,
+ 0x0c, 0x95, 0x01, 0x92, 0x51, 0x87, 0x01, 0x92, 0xd8, 0xc2, 0x0a, 0x20,
+ 0x01, 0x92, 0xe1, 0xc4, 0x05, 0xde, 0x01, 0x92, 0xe8, 0xc3, 0x08, 0xde,
+ 0x01, 0x92, 0xf1, 0xc3, 0x0d, 0x8f, 0x01, 0x92, 0xf8, 0xc2, 0x22, 0x45,
+ 0x01, 0x95, 0x89, 0xc4, 0x15, 0xa7, 0x01, 0x95, 0x90, 0xcd, 0x0c, 0x95,
+ 0x01, 0x92, 0x59, 0x87, 0x01, 0x95, 0xb0, 0xc2, 0x0a, 0x20, 0x01, 0x95,
+ 0xb9, 0xc4, 0x05, 0xde, 0x01, 0x95, 0xc0, 0xc3, 0x08, 0xde, 0x01, 0x95,
+ 0xc9, 0xc3, 0x0d, 0x8f, 0x01, 0x95, 0xd0, 0xc2, 0x22, 0x45, 0x01, 0x95,
+ 0xd9, 0xc4, 0x15, 0xa7, 0x01, 0x95, 0xe0, 0x46, 0x21, 0x5d, 0x43, 0x99,
+ 0xed, 0xc2, 0x01, 0x01, 0x09, 0x19, 0x69, 0xc2, 0x01, 0x0e, 0x09, 0x19,
+ 0x60, 0xc9, 0xab, 0x92, 0x09, 0x29, 0x79, 0xc2, 0x01, 0x8d, 0x09, 0x15,
+ 0x00, 0x8e, 0x09, 0x29, 0x21, 0x86, 0x09, 0x12, 0xb0, 0xc2, 0x01, 0x0d,
+ 0x09, 0x29, 0x18, 0xc2, 0x01, 0x0d, 0x09, 0x12, 0xe3, 0x03, 0x99, 0xf9,
+ 0xc3, 0x00, 0x39, 0x09, 0x12, 0xd8, 0xc9, 0x43, 0x91, 0x09, 0x12, 0xa8,
+ 0xc8, 0xbb, 0x7b, 0x09, 0x11, 0xd8, 0xc3, 0x3a, 0x6b, 0x09, 0x28, 0xf1,
+ 0xc3, 0x07, 0x27, 0x09, 0x10, 0x80, 0xd2, 0x36, 0x9a, 0x09, 0x28, 0xe8,
+ 0xc2, 0x00, 0x2f, 0x09, 0x28, 0xd9, 0xcb, 0x90, 0xaa, 0x09, 0x10, 0x18,
+ 0xc2, 0x06, 0x67, 0x09, 0x1c, 0x59, 0x0b, 0x43, 0x99, 0xff, 0x00, 0x43,
+ 0x9a, 0x0b, 0x97, 0x09, 0x10, 0x69, 0x87, 0x09, 0x10, 0x60, 0xc3, 0x07,
+ 0x69, 0x09, 0x10, 0x51, 0xc9, 0x43, 0x91, 0x09, 0x10, 0x48, 0x8b, 0x09,
+ 0x10, 0x41, 0x42, 0x00, 0x5d, 0x43, 0x9a, 0x17, 0xcc, 0x36, 0xa0, 0x09,
+ 0x27, 0xa9, 0xc3, 0x36, 0xa9, 0x09, 0x27, 0xa0, 0x8b, 0x09, 0x1c, 0x41,
+ 0xc2, 0x03, 0xbd, 0x09, 0x0e, 0x33, 0x03, 0x9a, 0x22, 0x83, 0x09, 0x0e,
+ 0x22, 0x03, 0x9a, 0x28, 0xc2, 0x01, 0x0d, 0x09, 0x0f, 0x51, 0x86, 0x09,
+ 0x0f, 0x49, 0xca, 0xa3, 0x12, 0x09, 0x0f, 0x41, 0x46, 0x21, 0x5d, 0x43,
+ 0x9a, 0x2c, 0xd8, 0x21, 0x5c, 0x09, 0x0f, 0x21, 0x03, 0x43, 0x9a, 0x36,
+ 0xc2, 0x01, 0x0a, 0x09, 0x0f, 0x09, 0x0a, 0x43, 0x9a, 0x40, 0xc3, 0x76,
+ 0x92, 0x09, 0x0e, 0xd1, 0x87, 0x09, 0x0e, 0xc2, 0x03, 0x9a, 0x55, 0x97,
+ 0x09, 0x0e, 0xb3, 0x03, 0x9a, 0x5b, 0xc3, 0x07, 0xda, 0x09, 0x0e, 0xa9,
+ 0xc4, 0x07, 0x68, 0x09, 0x0e, 0xa0, 0x17, 0xc3, 0x9a, 0x5f, 0x8b, 0x09,
+ 0x0e, 0x7a, 0x03, 0x9a, 0x6a, 0x8f, 0x09, 0x0e, 0x63, 0x03, 0x9a, 0x6e,
+ 0xc7, 0x61, 0x7f, 0x09, 0x0e, 0x58, 0xcb, 0x90, 0x7e, 0x09, 0x0e, 0x51,
+ 0x83, 0x09, 0x0e, 0x42, 0x03, 0x9a, 0x74, 0x8b, 0x09, 0x0e, 0x09, 0xc2,
+ 0x00, 0x5d, 0x09, 0x0e, 0x00, 0xcc, 0x85, 0x68, 0x09, 0x0d, 0xf9, 0x90,
+ 0x09, 0x0d, 0xf1, 0x8e, 0x09, 0x0d, 0xe9, 0x46, 0x21, 0x5d, 0x43, 0x9a,
+ 0x78, 0xcd, 0x4d, 0xcf, 0x09, 0x0b, 0x51, 0xc8, 0x54, 0x91, 0x09, 0x0b,
+ 0x48, 0xd2, 0x4d, 0xca, 0x09, 0x26, 0x59, 0xc4, 0x3a, 0x6a, 0x09, 0x08,
+ 0xa1, 0xc3, 0x64, 0x5f, 0x09, 0x08, 0x98, 0x0b, 0xc3, 0x9a, 0x8a, 0x87,
+ 0x09, 0x07, 0x2a, 0x03, 0x9a, 0x92, 0x94, 0x09, 0x07, 0x21, 0x8e, 0x09,
+ 0x07, 0x18, 0x46, 0x21, 0x5d, 0x43, 0x9a, 0x98, 0xc9, 0x20, 0xfa, 0x09,
+ 0x07, 0x08, 0x8f, 0x09, 0x26, 0x02, 0x03, 0x9a, 0xa4, 0xd0, 0x5a, 0xcf,
+ 0x09, 0x25, 0xf9, 0xc9, 0xad, 0x1e, 0x09, 0x06, 0xe0, 0xc9, 0xac, 0x3d,
+ 0x09, 0x06, 0xd8, 0xc4, 0x47, 0x66, 0x09, 0x06, 0xc9, 0x8d, 0x09, 0x06,
+ 0xc0, 0x46, 0x21, 0x5d, 0xc3, 0x9a, 0xaa, 0x8e, 0x09, 0x06, 0x92, 0x03,
+ 0x9a, 0xb4, 0x94, 0x09, 0x06, 0x63, 0x03, 0x9a, 0xba, 0xc7, 0x5c, 0x78,
+ 0x09, 0x06, 0x58, 0xca, 0xa4, 0x3e, 0x09, 0x06, 0x81, 0xa1, 0x09, 0x06,
+ 0x72, 0x03, 0x9a, 0xc0, 0xd0, 0x5c, 0x6f, 0x09, 0x06, 0x50, 0xc8, 0xac,
+ 0x3d, 0x09, 0x06, 0x40, 0x48, 0x6f, 0x3c, 0xc3, 0x9a, 0xc6, 0x84, 0x09,
+ 0x06, 0x30, 0x42, 0x01, 0x0b, 0x43, 0x9a, 0xd2, 0xc4, 0x3e, 0x06, 0x09,
+ 0x25, 0xb1, 0xc9, 0xac, 0xb2, 0x09, 0x06, 0x01, 0x86, 0x09, 0x05, 0xf8,
+ 0xc8, 0xac, 0xb3, 0x09, 0x06, 0x10, 0x9f, 0x09, 0x1b, 0xd2, 0x03, 0x9a,
+ 0xde, 0xd0, 0x59, 0x0f, 0x09, 0x1b, 0xc8, 0xc3, 0x03, 0xaa, 0x09, 0x05,
+ 0xd1, 0xc2, 0x01, 0x0e, 0x09, 0x05, 0xc9, 0xca, 0x9f, 0xac, 0x09, 0x05,
+ 0xc0, 0xc8, 0xbb, 0x83, 0x09, 0x07, 0x60, 0xca, 0x55, 0x8e, 0x09, 0x25,
+ 0x00, 0xcc, 0x5a, 0xd3, 0x09, 0x24, 0xe8, 0xc4, 0x4b, 0x12, 0x09, 0x1b,
+ 0x99, 0xc4, 0xe6, 0x8f, 0x09, 0x03, 0x60, 0x8f, 0x09, 0x03, 0x39, 0xcb,
+ 0x9c, 0x86, 0x09, 0x03, 0x30, 0xc2, 0x3e, 0x08, 0x09, 0x02, 0xf0, 0xca,
+ 0x9c, 0x86, 0x09, 0x02, 0xe0, 0xc8, 0x1e, 0x8a, 0x00, 0x26, 0xe9, 0xc8,
+ 0x21, 0xcc, 0x00, 0x24, 0xb8, 0x00, 0x43, 0x9a, 0xe4, 0x00, 0x43, 0x9b,
+ 0x08, 0x14, 0xc3, 0x9b, 0x3c, 0xc6, 0x14, 0xca, 0x0e, 0xc6, 0x61, 0x46,
+ 0x0e, 0xcd, 0xc3, 0x9b, 0x48, 0xc2, 0x02, 0x6a, 0x0e, 0xc6, 0x33, 0x03,
+ 0x9b, 0x5e, 0xc4, 0x00, 0x68, 0x0e, 0xc6, 0x21, 0xcf, 0x6b, 0xe5, 0x0e,
+ 0xc0, 0xe0, 0xc5, 0x0e, 0xcd, 0x0e, 0xc5, 0xc1, 0xc5, 0x01, 0x62, 0x0e,
+ 0xc5, 0xb9, 0xc6, 0x01, 0x8c, 0x0e, 0xc5, 0xa3, 0x03, 0x9b, 0x64, 0xc6,
+ 0x14, 0xca, 0x0e, 0xc5, 0x81, 0xce, 0x3b, 0x3b, 0x0e, 0xc5, 0x79, 0xc2,
+ 0x02, 0x6a, 0x0e, 0xc5, 0x71, 0xc4, 0x00, 0x68, 0x0e, 0xc5, 0x58, 0xc5,
+ 0x01, 0x62, 0x0e, 0xc5, 0x03, 0x03, 0x9b, 0x68, 0x16, 0xc3, 0x9b, 0x6e,
+ 0xc4, 0x16, 0x02, 0x0e, 0xc4, 0xc1, 0xce, 0x3b, 0x3b, 0x0e, 0xc4, 0xb9,
+ 0xc2, 0x02, 0x6a, 0x0e, 0xc4, 0x91, 0xc4, 0x00, 0x68, 0x0e, 0xc4, 0x72,
+ 0x03, 0x9b, 0x7a, 0xc6, 0x14, 0xca, 0x0e, 0xc3, 0x29, 0xc6, 0x01, 0xa1,
+ 0x0e, 0xc3, 0x13, 0x03, 0x9b, 0x7e, 0xd0, 0x5a, 0x9f, 0x0e, 0xc3, 0x08,
+ 0xc7, 0x29, 0xba, 0x0e, 0xc3, 0x01, 0xc4, 0x16, 0x02, 0x0e, 0xc2, 0xf9,
+ 0xc4, 0x05, 0xa6, 0x0e, 0xc2, 0xe8, 0x00, 0x43, 0x9b, 0x87, 0xd2, 0x29,
+ 0x65, 0x0e, 0xc2, 0x63, 0x03, 0x9b, 0x96, 0xcb, 0x15, 0xec, 0x0e, 0xc2,
+ 0x22, 0x03, 0x9b, 0x9a, 0xc5, 0x0e, 0xcd, 0x0e, 0xc7, 0xa3, 0x03, 0x9b,
+ 0x9e, 0xcb, 0x14, 0xc5, 0x0e, 0xc6, 0x1b, 0x03, 0x9b, 0xa2, 0x47, 0x01,
+ 0x8c, 0x43, 0x9b, 0xa8, 0xc2, 0x01, 0x5b, 0x0e, 0xc6, 0x99, 0xc3, 0x01,
+ 0xc3, 0x0e, 0xc6, 0x90, 0xd2, 0x47, 0xbe, 0x0e, 0xc4, 0xfa, 0x03, 0x9b,
+ 0xb4, 0x00, 0x43, 0x9b, 0xba, 0xcc, 0x14, 0xc4, 0x0e, 0xc6, 0x88, 0xdd,
+ 0x10, 0xf9, 0x0e, 0xc5, 0x60, 0x00, 0x43, 0x9b, 0xd5, 0xd3, 0x44, 0x32,
+ 0x0e, 0xc4, 0x21, 0xc4, 0x05, 0xa6, 0x0e, 0xc4, 0x02, 0x03, 0x9b, 0xe4,
+ 0x00, 0x43, 0x9b, 0xea, 0xd7, 0x29, 0x65, 0x0e, 0xc2, 0xa9, 0xd5, 0x15,
+ 0xec, 0x0e, 0xc2, 0x58, 0xd5, 0x14, 0xc5, 0x0e, 0xc6, 0xd3, 0x03, 0x9b,
+ 0xf6, 0xc5, 0x0e, 0xcd, 0x0e, 0xc6, 0x50, 0xc5, 0x19, 0x05, 0x0e, 0xc5,
+ 0xf9, 0xc2, 0x01, 0x5b, 0x0e, 0xc5, 0xf1, 0xc3, 0x01, 0xc3, 0x0e, 0xc5,
+ 0xe8, 0xc5, 0x01, 0x62, 0x0e, 0xc0, 0x13, 0x03, 0x9b, 0xfa, 0xd2, 0x14,
+ 0xbe, 0x0e, 0xc6, 0x81, 0x46, 0x0e, 0xcd, 0xc3, 0x9b, 0xfe, 0xc4, 0x03,
+ 0xf5, 0x0e, 0xc3, 0x63, 0x03, 0x9c, 0x0a, 0xc8, 0xc0, 0x0b, 0x0e, 0xc3,
+ 0x89, 0xd3, 0x41, 0x73, 0x0e, 0xc2, 0x9a, 0x03, 0x9c, 0x0e, 0xd5, 0x32,
+ 0x68, 0x0e, 0xc6, 0x79, 0xd4, 0x3a, 0xd2, 0x0e, 0xc5, 0xe1, 0xc4, 0x03,
+ 0xf5, 0x0e, 0xc3, 0xa0, 0xc5, 0x32, 0xc3, 0x0e, 0xc6, 0xb8, 0xc7, 0x29,
+ 0xba, 0x0e, 0xc3, 0x49, 0xc4, 0x05, 0xa6, 0x0e, 0xc3, 0x38, 0xcb, 0x14,
+ 0xc5, 0x0e, 0xc6, 0x73, 0x03, 0x9c, 0x14, 0xc2, 0x02, 0x6a, 0x0e, 0xc6,
+ 0x38, 0x00, 0x43, 0x9c, 0x1a, 0xc5, 0x01, 0x62, 0x0e, 0xc5, 0x09, 0xc2,
+ 0x02, 0x6a, 0x0e, 0xc4, 0xa0, 0xc5, 0x15, 0x2e, 0x0e, 0xce, 0x89, 0xc5,
+ 0x00, 0x3e, 0x0e, 0xce, 0x80, 0xc5, 0x15, 0x2e, 0x0e, 0xce, 0x11, 0xc5,
+ 0x00, 0x3e, 0x0e, 0xce, 0x08, 0xc2, 0x00, 0x15, 0x0e, 0xcb, 0x40, 0xc6,
+ 0x02, 0x91, 0x0e, 0xce, 0x79, 0xc6, 0x23, 0x24, 0x0e, 0xce, 0x68, 0xc6,
+ 0x02, 0x91, 0x0e, 0xce, 0x71, 0xc6, 0x23, 0x24, 0x0e, 0xce, 0x60, 0xc6,
+ 0x02, 0x91, 0x0e, 0xce, 0x01, 0xc6, 0x23, 0x24, 0x0e, 0xcd, 0xf0, 0xc6,
+ 0x02, 0x91, 0x0e, 0xcd, 0xf9, 0xc6, 0x23, 0x24, 0x0e, 0xcd, 0xe8, 0xcc,
+ 0x87, 0x48, 0x0e, 0xce, 0x59, 0xcc, 0x87, 0x3c, 0x0e, 0xce, 0x50, 0xc6,
+ 0x30, 0x47, 0x0e, 0xcd, 0xe1, 0xc6, 0x02, 0x91, 0x0e, 0xcd, 0xd0, 0xc6,
+ 0x30, 0x47, 0x0e, 0xcd, 0xd9, 0xc6, 0x02, 0x91, 0x0e, 0xcd, 0xc8, 0xc5,
+ 0x15, 0x2e, 0x0e, 0xce, 0x39, 0xc5, 0x00, 0x3e, 0x0e, 0xce, 0x30, 0xc5,
+ 0x15, 0x2e, 0x0e, 0xcd, 0xc1, 0xc5, 0x00, 0x3e, 0x0e, 0xcd, 0xb8, 0xc5,
+ 0x15, 0x2e, 0x0e, 0xcc, 0xf1, 0xc6, 0x04, 0x1b, 0x0e, 0xcc, 0xe9, 0xc5,
+ 0x00, 0x3e, 0x0e, 0xcc, 0xe0, 0xc5, 0x15, 0x2e, 0x0e, 0xcc, 0xd9, 0xc6,
+ 0x04, 0x1b, 0x0e, 0xcc, 0xd1, 0xc5, 0x00, 0x3e, 0x0e, 0xcc, 0xc8, 0x47,
+ 0x1d, 0x83, 0xc3, 0x9c, 0x35, 0x4b, 0x2a, 0x0d, 0x43, 0x9c, 0x41, 0xcb,
+ 0x90, 0x9f, 0x0e, 0xcc, 0xf9, 0x53, 0x47, 0x17, 0x43, 0x9c, 0x56, 0xc5,
+ 0x15, 0x2e, 0x0e, 0xcc, 0x53, 0x03, 0x9c, 0x62, 0xc6, 0x04, 0x1b, 0x0e,
+ 0xcc, 0x49, 0xc5, 0x00, 0x3e, 0x0e, 0xcc, 0x40, 0xc2, 0x00, 0x15, 0x0e,
+ 0xc9, 0x68, 0x00, 0x43, 0x9c, 0x68, 0xdf, 0x0c, 0xff, 0x01, 0x4b, 0x79,
+ 0x06, 0x43, 0x9c, 0x7a, 0xd2, 0x05, 0x94, 0x0f, 0xc0, 0x19, 0xd5, 0x00,
+ 0x92, 0x0f, 0xc0, 0x98, 0xca, 0x00, 0x47, 0x01, 0x0d, 0x99, 0xc9, 0x03,
+ 0x9e, 0x01, 0x0d, 0x90, 0xd6, 0x31, 0xd3, 0x01, 0x1b, 0xe1, 0xc3, 0x13,
+ 0x02, 0x01, 0x15, 0xf0, 0xc9, 0x35, 0x23, 0x01, 0x4c, 0x90, 0x45, 0x01,
+ 0xac, 0xc3, 0x9c, 0x80, 0xc6, 0x12, 0x4f, 0x01, 0x5b, 0x91, 0x44, 0x01,
+ 0xba, 0x43, 0x9c, 0xaa, 0xc3, 0x13, 0xc8, 0x01, 0x48, 0xb3, 0x03, 0x9c,
+ 0xb0, 0xd2, 0x05, 0x95, 0x01, 0x5f, 0x70, 0xcf, 0x67, 0x71, 0x01, 0x4b,
+ 0x69, 0x46, 0x03, 0x50, 0xc3, 0x9c, 0xb6, 0xc6, 0x12, 0x4f, 0x01, 0x4a,
+ 0xb9, 0xc8, 0xab, 0xed, 0x01, 0x4a, 0xf8, 0x46, 0x03, 0x50, 0xc3, 0x9c,
+ 0xbc, 0xc8, 0xab, 0xed, 0x01, 0x4a, 0xd9, 0xc6, 0x12, 0x4f, 0x01, 0x4a,
+ 0x98, 0xd2, 0x05, 0x94, 0x0f, 0xc0, 0x11, 0xd5, 0x00, 0x92, 0x0f, 0xc0,
+ 0x90, 0x46, 0x01, 0xab, 0x43, 0x9c, 0xc4, 0xc9, 0x00, 0x68, 0x01, 0x58,
+ 0x71, 0xc7, 0x02, 0x6a, 0x01, 0x58, 0x78, 0xcf, 0x66, 0x45, 0x01, 0x5a,
+ 0x41, 0xce, 0x35, 0xda, 0x01, 0x5a, 0x60, 0xc6, 0x03, 0x81, 0x01, 0x0e,
+ 0x79, 0xcf, 0x2e, 0xd8, 0x01, 0x48, 0x18, 0x90, 0x00, 0x70, 0x81, 0xc3,
+ 0x02, 0x33, 0x00, 0x70, 0xb8, 0xca, 0x2a, 0xb4, 0x07, 0xea, 0xc1, 0xcc,
+ 0x10, 0x79, 0x07, 0xea, 0xc8, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x51, 0xcc,
+ 0x10, 0x79, 0x07, 0xe9, 0x90, 0x0b, 0xc3, 0x9c, 0xd0, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe9, 0x31, 0xcb, 0x6a, 0x72, 0x07, 0xe9, 0xc1, 0x45, 0x01, 0xac,
+ 0x43, 0x9c, 0xdc, 0xcb, 0x10, 0x7a, 0x07, 0xe9, 0x81, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe8, 0x60, 0x45, 0x53, 0x23, 0xc3, 0x9c, 0xe8, 0x45, 0x1a, 0x6a,
+ 0x43, 0x9c, 0xf4, 0xcb, 0x10, 0x7a, 0x07, 0xe9, 0x69, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe8, 0x48, 0xcb, 0x10, 0x7a, 0x07, 0xe9, 0x79, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe8, 0x58, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0xa1, 0xcd, 0x05, 0x3a,
+ 0x07, 0xe3, 0x10, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x99, 0xcd, 0x05, 0x3a,
+ 0x07, 0xe3, 0x08, 0xca, 0x2a, 0xb4, 0x07, 0xea, 0xf9, 0xcc, 0x10, 0x79,
+ 0x07, 0xeb, 0x00, 0xca, 0x2a, 0xb4, 0x07, 0xeb, 0x11, 0xcc, 0x10, 0x79,
+ 0x07, 0xeb, 0x18, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0xe9, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe5, 0x70, 0xcc, 0x05, 0x3b, 0x07, 0xe1, 0x09, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe5, 0x98, 0xca, 0x2a, 0xb4, 0x07, 0xeb, 0x31, 0xcc, 0x10, 0x79,
+ 0x07, 0xee, 0x28, 0xcc, 0x05, 0x3b, 0x07, 0xe1, 0x01, 0xcb, 0x10, 0x7a,
+ 0x07, 0xe5, 0x88, 0x44, 0x1a, 0x74, 0xc3, 0x9d, 0x00, 0xce, 0x40, 0x48,
+ 0x07, 0xed, 0x48, 0xd3, 0x40, 0xee, 0x07, 0xea, 0x31, 0x0a, 0x43, 0x9d,
+ 0x0c, 0x47, 0x9d, 0x11, 0xc3, 0x9d, 0x18, 0xcd, 0x05, 0x3a, 0x07, 0xef,
+ 0xc8, 0xca, 0x2a, 0xb4, 0x07, 0xeb, 0xb1, 0xcc, 0x10, 0x79, 0x07, 0xeb,
+ 0xb8, 0x8f, 0x07, 0xea, 0x39, 0xcd, 0x79, 0xa1, 0x07, 0xea, 0x50, 0xca,
+ 0x85, 0x8e, 0x07, 0xea, 0x41, 0xcc, 0x85, 0x8c, 0x07, 0xea, 0x48, 0xcc,
+ 0x05, 0x3b, 0x07, 0xe1, 0x39, 0xcb, 0x10, 0x7a, 0x07, 0xe9, 0x98, 0x44,
+ 0x1a, 0x74, 0xc3, 0x9d, 0x1e, 0xd1, 0x57, 0xda, 0x07, 0xeb, 0x99, 0xce,
+ 0x40, 0x48, 0x07, 0xeb, 0xa0, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0x91, 0xcb,
+ 0x10, 0x7a, 0x07, 0xe5, 0x30, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0x61, 0xcb,
+ 0x10, 0x7a, 0x07, 0xe5, 0x10, 0x45, 0x2f, 0xc8, 0xc3, 0x9d, 0x2a, 0xd1,
+ 0x57, 0xda, 0x07, 0xea, 0x98, 0x43, 0x08, 0x86, 0xc3, 0x9d, 0x36, 0x42,
+ 0x07, 0x73, 0x43, 0x9d, 0x42, 0x44, 0x06, 0x7b, 0xc3, 0x9d, 0x4e, 0x42,
+ 0x00, 0xc0, 0x43, 0x9d, 0x60, 0xca, 0x2a, 0xb4, 0x07, 0xe3, 0x31, 0x0b,
+ 0xc3, 0x9d, 0x6c, 0xcb, 0x6a, 0x72, 0x07, 0xe6, 0xf8, 0x44, 0x21, 0x41,
+ 0xc3, 0x9d, 0x78, 0x43, 0x08, 0x86, 0x43, 0x9d, 0x84, 0xcc, 0x05, 0x3b,
+ 0x07, 0xe0, 0x01, 0xcb, 0x10, 0x7a, 0x07, 0xe4, 0xb8, 0x0b, 0xc3, 0x9d,
+ 0x90, 0xca, 0x2a, 0xb4, 0x07, 0xdf, 0xb8, 0xca, 0x2a, 0xb4, 0x07, 0xdf,
+ 0x99, 0xcd, 0x05, 0x3a, 0x07, 0xdf, 0x90, 0xca, 0x2a, 0xb4, 0x07, 0xdf,
+ 0x89, 0xcd, 0x05, 0x3a, 0x07, 0xdf, 0x80, 0xca, 0x2a, 0xb4, 0x07, 0xdf,
+ 0x79, 0xcd, 0x05, 0x3a, 0x07, 0xdf, 0x70, 0xcc, 0x05, 0x3b, 0x07, 0xe2,
+ 0xb1, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0xd8, 0xca, 0x2a, 0xb4, 0x07, 0xed,
+ 0xd9, 0xcc, 0x10, 0x79, 0x07, 0xee, 0x18, 0xcd, 0x05, 0x3a, 0x07, 0xf7,
+ 0xc9, 0xca, 0x2a, 0xb4, 0x07, 0xf7, 0xd0, 0xcd, 0x05, 0x3a, 0x07, 0xf7,
+ 0xb9, 0xca, 0x2a, 0xb4, 0x07, 0xf7, 0xc0, 0xca, 0x2a, 0xb4, 0x07, 0xec,
+ 0x01, 0xcc, 0x10, 0x79, 0x07, 0xed, 0xa8, 0xcc, 0x05, 0x3b, 0x07, 0xe1,
+ 0xa1, 0xcb, 0x10, 0x7a, 0x07, 0xe6, 0x18, 0x44, 0x1a, 0x74, 0xc3, 0x9d,
+ 0x9c, 0xcf, 0x6a, 0xf5, 0x07, 0xeb, 0xf9, 0xce, 0x40, 0x48, 0x07, 0xed,
+ 0x90, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0x31, 0xcb, 0x10, 0x7a, 0x07, 0xe4,
+ 0xe8, 0xc2, 0x0e, 0x30, 0x07, 0xea, 0x20, 0xcb, 0x10, 0x7a, 0x07, 0xdf,
+ 0xf1, 0xcc, 0x05, 0x3b, 0x07, 0xdf, 0xe0, 0x16, 0xc3, 0x9d, 0xa8, 0xca,
+ 0x33, 0xfc, 0x00, 0x31, 0xe9, 0x5c, 0x10, 0xdc, 0x43, 0x9d, 0xb4, 0x44,
+ 0x0b, 0xf8, 0xc3, 0x9d, 0xbe, 0x16, 0x43, 0x9d, 0xcd, 0xcc, 0x05, 0x3b,
+ 0x07, 0xf6, 0x89, 0xcb, 0x10, 0x7a, 0x07, 0xf6, 0x98, 0xd0, 0x0d, 0xe5,
+ 0x00, 0x46, 0x19, 0xc9, 0x0d, 0xd7, 0x00, 0x37, 0xe0, 0xcc, 0x05, 0x3b,
+ 0x07, 0xf6, 0x69, 0xcb, 0x10, 0x7a, 0x07, 0xf6, 0x78, 0xcf, 0x62, 0x67,
+ 0x00, 0x45, 0x81, 0x16, 0xc3, 0x9d, 0xd9, 0xc4, 0x01, 0xbd, 0x00, 0x35,
+ 0x80, 0xcb, 0x10, 0x7a, 0x07, 0xdc, 0xa1, 0xcc, 0x05, 0x3b, 0x07, 0xdc,
+ 0x90, 0xcb, 0x10, 0x7a, 0x07, 0xdc, 0xc1, 0xcc, 0x05, 0x3b, 0x07, 0xdc,
+ 0xb0, 0x46, 0x00, 0x3e, 0xc3, 0x9d, 0xe5, 0x42, 0x00, 0x68, 0xc3, 0x9d,
+ 0xef, 0x4b, 0x0d, 0xe5, 0xc3, 0x9d, 0xfb, 0xc3, 0x02, 0x1d, 0x00, 0x3b,
+ 0x50, 0xcc, 0x05, 0x3b, 0x07, 0xf6, 0xe9, 0xcb, 0x10, 0x7a, 0x07, 0xf6,
+ 0xf8, 0x4a, 0x0d, 0xe6, 0xc3, 0x9e, 0x07, 0xcd, 0x0b, 0x67, 0x00, 0x45,
+ 0x10, 0xcc, 0x05, 0x3b, 0x07, 0xf4, 0xe9, 0xcb, 0x10, 0x7a, 0x07, 0xf4,
+ 0xf8, 0x4a, 0x0d, 0xe6, 0xc3, 0x9e, 0x13, 0x48, 0x0b, 0x67, 0x43, 0x9e,
+ 0x25, 0xcc, 0x05, 0x3b, 0x07, 0xf6, 0x49, 0xcb, 0x10, 0x7a, 0x07, 0xf6,
+ 0x58, 0x44, 0x01, 0xad, 0xc3, 0x9e, 0x31, 0xc4, 0x54, 0x31, 0x00, 0x33,
+ 0x8a, 0x03, 0x9e, 0x67, 0x00, 0x43, 0x9e, 0x6b, 0xc7, 0x31, 0xc6, 0x00,
+ 0x46, 0x11, 0x16, 0xc3, 0x9e, 0x77, 0xc9, 0x18, 0x19, 0x00, 0x3b, 0x10,
+ 0xcc, 0x05, 0x3b, 0x07, 0xdc, 0x71, 0xcb, 0x10, 0x7a, 0x07, 0xdc, 0x80,
+ 0x45, 0x01, 0xac, 0xc3, 0x9e, 0x83, 0x0b, 0xc3, 0x9e, 0x93, 0xcb, 0x6a,
+ 0x72, 0x07, 0xf6, 0xe1, 0xca, 0x2a, 0xb4, 0x07, 0xf6, 0xd0, 0xca, 0x2a,
+ 0xb4, 0x07, 0xdf, 0x19, 0xcd, 0x05, 0x3a, 0x07, 0xdf, 0x10, 0xca, 0x2a,
+ 0xb4, 0x07, 0xdf, 0x09, 0xcd, 0x05, 0x3a, 0x07, 0xdf, 0x00, 0xcc, 0x05,
+ 0x3b, 0x07, 0xf5, 0x29, 0xcb, 0x10, 0x7a, 0x07, 0xf5, 0x38, 0xc7, 0x31,
+ 0xc6, 0x00, 0x46, 0x09, 0xc9, 0x18, 0x19, 0x00, 0x35, 0xf8, 0xcb, 0x10,
+ 0x7a, 0x07, 0xdb, 0xe1, 0xcc, 0x05, 0x3b, 0x07, 0xdb, 0xd0, 0xcb, 0x6a,
+ 0x72, 0x07, 0xdc, 0x09, 0x0b, 0xc3, 0x9e, 0x9f, 0xca, 0x2a, 0xb4, 0x07,
+ 0xdb, 0xf8, 0xcb, 0x10, 0x7a, 0x07, 0xdb, 0x41, 0xcc, 0x05, 0x3b, 0x07,
+ 0xdb, 0x30, 0x0b, 0xc3, 0x9e, 0xab, 0xca, 0x2a, 0xb4, 0x07, 0xda, 0xf9,
+ 0xcb, 0x6a, 0x72, 0x07, 0xdb, 0x08, 0x46, 0x00, 0x3e, 0xc3, 0x9e, 0xb7,
+ 0xc4, 0x01, 0xbd, 0x00, 0x33, 0xe1, 0xda, 0x1a, 0x1c, 0x00, 0x33, 0xe8,
+ 0xc6, 0xd6, 0xfa, 0x00, 0x31, 0x4b, 0x03, 0x9e, 0xc1, 0xca, 0x6a, 0x73,
+ 0x07, 0xf4, 0xc0, 0xcc, 0x05, 0x3b, 0x07, 0xf4, 0xa9, 0xcb, 0x10, 0x7a,
+ 0x07, 0xf4, 0xb8, 0xcb, 0x6a, 0x72, 0x07, 0xdb, 0x29, 0x0b, 0xc3, 0x9e,
+ 0xc5, 0xca, 0x2a, 0xb4, 0x07, 0xdb, 0x18, 0x16, 0xc3, 0x9e, 0xd1, 0xc9,
+ 0x0d, 0xd7, 0x00, 0x44, 0x58, 0xcc, 0x05, 0x3b, 0x07, 0xf6, 0x09, 0xcb,
+ 0x10, 0x7a, 0x07, 0xf6, 0x18, 0xcd, 0x05, 0x3a, 0x07, 0xf5, 0x59, 0xca,
+ 0x2a, 0xb4, 0x07, 0xf5, 0x60, 0x0b, 0xc3, 0x9e, 0xdd, 0xca, 0x2a, 0xb4,
+ 0x07, 0xf4, 0xd1, 0xcb, 0x6a, 0x72, 0x07, 0xf4, 0xe0, 0xcb, 0x10, 0x7a,
+ 0x07, 0xdb, 0x81, 0xcc, 0x05, 0x3b, 0x07, 0xdb, 0x70, 0x16, 0xc3, 0x9e,
+ 0xe9, 0xc7, 0x31, 0xc6, 0x00, 0x36, 0x71, 0xcb, 0x08, 0x89, 0x00, 0x31,
+ 0x32, 0x03, 0x9e, 0xfb, 0x00, 0x43, 0x9e, 0xff, 0xcc, 0x05, 0x3b, 0x07,
+ 0xf7, 0x89, 0xcb, 0x10, 0x7a, 0x07, 0xf7, 0x98, 0x15, 0xc3, 0x9f, 0x11,
+ 0xc4, 0xac, 0x0c, 0x00, 0x45, 0x51, 0xca, 0x33, 0xfc, 0x00, 0x37, 0x79,
+ 0xcf, 0x39, 0xbf, 0x00, 0x34, 0xc9, 0x49, 0x0b, 0x79, 0xc3, 0x9f, 0x1d,
+ 0xc9, 0x0d, 0xd7, 0x00, 0x34, 0xa3, 0x03, 0x9f, 0x29, 0xc4, 0x01, 0xbd,
+ 0x00, 0x34, 0x99, 0xcb, 0x08, 0x89, 0x00, 0x3b, 0x60, 0xcc, 0x05, 0x3b,
+ 0x07, 0xdd, 0x01, 0xcb, 0x10, 0x7a, 0x07, 0xdd, 0x10, 0x46, 0x00, 0x3e,
+ 0xc3, 0x9f, 0x2f, 0xcb, 0x08, 0x89, 0x00, 0x45, 0x09, 0xd6, 0x31, 0xe9,
+ 0x00, 0x3a, 0xa9, 0x16, 0xc3, 0x9f, 0x3c, 0xde, 0x0d, 0xd7, 0x00, 0x3a,
+ 0x88, 0xcc, 0x05, 0x3b, 0x07, 0xf4, 0x79, 0xcb, 0x10, 0x7a, 0x07, 0xf4,
+ 0x88, 0xcb, 0x6a, 0x72, 0x07, 0xda, 0xe9, 0x0b, 0xc3, 0x9f, 0x48, 0xca,
+ 0x2a, 0xb4, 0x07, 0xda, 0xd8, 0xcb, 0x10, 0x7a, 0x07, 0xda, 0xa1, 0xcc,
+ 0x05, 0x3b, 0x07, 0xda, 0x90, 0xc5, 0x00, 0x34, 0x00, 0x45, 0x2b, 0x03,
+ 0x9f, 0x54, 0xc5, 0x03, 0x50, 0x00, 0x35, 0x38, 0xcc, 0x05, 0x3b, 0x07,
+ 0xf6, 0x29, 0xcb, 0x10, 0x7a, 0x07, 0xf6, 0x38, 0x4a, 0x0d, 0xe6, 0xc3,
+ 0x9f, 0x5a, 0xcd, 0x0b, 0x7a, 0x00, 0x34, 0xe8, 0xcc, 0x05, 0x3b, 0x07,
+ 0xf5, 0xc9, 0xcb, 0x10, 0x7a, 0x07, 0xf5, 0xd8, 0xcc, 0x05, 0x3b, 0x07,
+ 0xf5, 0xa9, 0xcb, 0x10, 0x7a, 0x07, 0xf5, 0xb8, 0x16, 0xc3, 0x9f, 0x66,
+ 0xd7, 0x2b, 0x76, 0x00, 0x34, 0xd1, 0xca, 0x33, 0xfc, 0x00, 0x3b, 0xf1,
+ 0x46, 0x02, 0xff, 0xc3, 0x9f, 0x75, 0xcf, 0x39, 0xbf, 0x00, 0x3a, 0xe1,
+ 0x44, 0x00, 0x3e, 0x43, 0x9f, 0x7b, 0xcc, 0x05, 0x3b, 0x07, 0xf5, 0x89,
+ 0xcb, 0x10, 0x7a, 0x07, 0xf5, 0x98, 0x45, 0x01, 0xac, 0xc3, 0x9f, 0x81,
+ 0xcd, 0x05, 0x3a, 0x07, 0xf5, 0x49, 0xca, 0x2a, 0xb4, 0x07, 0xf5, 0x50,
+ 0xca, 0x2a, 0xb4, 0x07, 0xdc, 0x29, 0xcd, 0x05, 0x3a, 0x07, 0xdc, 0x20,
+ 0xce, 0x74, 0x4c, 0x00, 0x37, 0xd9, 0x0b, 0xc3, 0x9f, 0xa0, 0xca, 0x2a,
+ 0xb4, 0x07, 0xf5, 0xf1, 0xcb, 0x6a, 0x72, 0x07, 0xf6, 0x00, 0xca, 0x2a,
+ 0xb4, 0x07, 0xdc, 0x49, 0xcd, 0x05, 0x3a, 0x07, 0xdc, 0x40, 0xca, 0x2a,
+ 0xb4, 0x07, 0xdc, 0x19, 0xcd, 0x05, 0x3a, 0x07, 0xdc, 0x10, 0xcb, 0x10,
+ 0x7a, 0x07, 0xdb, 0xa1, 0xcc, 0x05, 0x3b, 0x07, 0xdb, 0x90, 0xcb, 0x10,
+ 0x7a, 0x07, 0xdb, 0x61, 0xcc, 0x05, 0x3b, 0x07, 0xdb, 0x50, 0xc6, 0x1d,
+ 0x29, 0x00, 0x45, 0x59, 0xc5, 0x03, 0x50, 0x00, 0x36, 0x78, 0x00, 0x43,
+ 0x9f, 0xac, 0xc8, 0xb8, 0xa3, 0x00, 0x3b, 0xc1, 0xca, 0xa1, 0xaa, 0x00,
+ 0x3b, 0xc8, 0xd0, 0x0d, 0xe5, 0x00, 0x45, 0x39, 0x44, 0x0b, 0xf8, 0x43,
+ 0x9f, 0xb8, 0xcc, 0x05, 0x3b, 0x07, 0xf7, 0x09, 0xcb, 0x10, 0x7a, 0x07,
+ 0xf7, 0x18, 0xcb, 0x10, 0x7a, 0x07, 0xde, 0xa9, 0xcc, 0x05, 0x3b, 0x07,
+ 0xde, 0x98, 0xcb, 0x6a, 0x72, 0x07, 0xdc, 0xe9, 0x0b, 0xc3, 0x9f, 0xc4,
+ 0xca, 0x2a, 0xb4, 0x07, 0xdc, 0xd8, 0xd0, 0x31, 0xbd, 0x00, 0x44, 0x49,
+ 0x16, 0xc3, 0x9f, 0xd0, 0xc4, 0x01, 0xbd, 0x00, 0x35, 0xe1, 0xc9, 0x0d,
+ 0xd7, 0x00, 0x35, 0xc9, 0x46, 0x00, 0x3e, 0x43, 0x9f, 0xdc, 0x00, 0x43,
+ 0x9f, 0xe6, 0xcc, 0x05, 0x3b, 0x07, 0xf7, 0x29, 0xcb, 0x10, 0x7a, 0x07,
+ 0xf7, 0x38, 0xcb, 0x10, 0x7a, 0x07, 0xdb, 0xc1, 0xcc, 0x05, 0x3b, 0x07,
+ 0xdb, 0xb0, 0x45, 0x01, 0xac, 0xc3, 0x9f, 0xf2, 0x0b, 0xc3, 0xa0, 0x0e,
+ 0xca, 0x2a, 0xb4, 0x07, 0xf5, 0x11, 0xcb, 0x6a, 0x72, 0x07, 0xf5, 0x20,
+ 0x00, 0x43, 0xa0, 0x1a, 0x00, 0x43, 0xa0, 0x2a, 0xc9, 0xb6, 0xdb, 0x00,
+ 0x36, 0x03, 0x03, 0xa0, 0x40, 0xca, 0x33, 0xfc, 0x00, 0x37, 0xf8, 0xcc,
+ 0x05, 0x3b, 0x07, 0xf7, 0x49, 0xcb, 0x10, 0x7a, 0x07, 0xf7, 0x58, 0xc2,
+ 0x08, 0x86, 0x0f, 0x75, 0xb1, 0xc2, 0x00, 0x2f, 0x0f, 0x75, 0xc0, 0xc4,
+ 0x3e, 0xff, 0x0f, 0x72, 0xe9, 0xc3, 0x0e, 0x13, 0x0f, 0x72, 0xf8, 0xe0,
+ 0x01, 0x87, 0x0f, 0xdd, 0x68, 0xd0, 0x01, 0x97, 0x0f, 0xdd, 0x60, 0xd0,
+ 0x12, 0xd2, 0x0f, 0xdd, 0x30, 0x00, 0x43, 0xa0, 0x44, 0x00, 0x43, 0xa0,
+ 0x53, 0x16, 0xc3, 0xa0, 0x62, 0xd2, 0x4d, 0x28, 0x0f, 0xd0, 0x39, 0xce,
+ 0x29, 0x29, 0x0f, 0xd0, 0x99, 0xdf, 0x0d, 0x7b, 0x0f, 0xd0, 0xe0, 0xc5,
+ 0x66, 0xf4, 0x0f, 0xaf, 0xc9, 0xc8, 0x94, 0x1d, 0x0f, 0xaf, 0xb8, 0xc2,
+ 0x08, 0xc6, 0x0b, 0x4e, 0x39, 0x90, 0x0b, 0x4c, 0xa9, 0x9a, 0x0b, 0x4c,
+ 0x40, 0xc3, 0x5a, 0x14, 0x0b, 0x4d, 0xc8, 0x8f, 0x0b, 0x4e, 0x59, 0x92,
+ 0x0b, 0x4d, 0xb0, 0xc3, 0x7a, 0x15, 0x0b, 0x4c, 0x49, 0x9a, 0x0b, 0x4b,
+ 0xf8, 0x92, 0x0b, 0x4e, 0x81, 0xcb, 0x97, 0xcc, 0x0b, 0x4c, 0x99, 0xc3,
+ 0x87, 0xeb, 0x0b, 0x4c, 0x30, 0xc3, 0x8c, 0x60, 0x0b, 0x4d, 0xfb, 0x03,
+ 0xa0, 0x6e, 0xc3, 0xaa, 0xdd, 0x0b, 0x4c, 0x68, 0xc8, 0xbb, 0x1b, 0x0b,
+ 0x4e, 0xe9, 0xc8, 0xbc, 0xc3, 0x0b, 0x4c, 0x90, 0xc6, 0xd5, 0xec, 0x0b,
+ 0x4f, 0x40, 0x92, 0x0b, 0x4a, 0x19, 0xc2, 0x01, 0x02, 0x0b, 0x49, 0x8a,
+ 0x03, 0xa0, 0x72, 0xc3, 0x8c, 0x61, 0x0b, 0x49, 0x49, 0xc2, 0x00, 0x47,
+ 0x0b, 0x48, 0x80, 0x9a, 0x0b, 0x4a, 0xa9, 0xc2, 0x08, 0xc6, 0x0b, 0x48,
+ 0x08, 0xc3, 0xdc, 0x59, 0x0b, 0x47, 0x01, 0xc6, 0xd5, 0xf8, 0x0b, 0x44,
+ 0xf8, 0xc3, 0x0b, 0x46, 0x0b, 0x46, 0x91, 0x8f, 0x0b, 0x45, 0xd9, 0xc2,
+ 0x00, 0xe5, 0x0b, 0x45, 0xa9, 0xc8, 0xb9, 0x03, 0x0b, 0x45, 0x80, 0xc6,
+ 0xd4, 0x5a, 0x0b, 0x47, 0x19, 0xcc, 0x8d, 0xc0, 0x0b, 0x44, 0xf0, 0x9a,
+ 0x0b, 0x47, 0x09, 0x8f, 0x0b, 0x44, 0xd8, 0xc6, 0x18, 0x40, 0x0b, 0x43,
+ 0xd8, 0xc4, 0xe5, 0x2b, 0x0b, 0x41, 0x59, 0xc4, 0xe4, 0x13, 0x0b, 0x40,
+ 0x71, 0xc6, 0xd4, 0x3c, 0x0b, 0x40, 0x58, 0xc4, 0xea, 0x67, 0x0b, 0x41,
+ 0x11, 0xc4, 0xe5, 0xc3, 0x0b, 0x40, 0xc8, 0xa3, 0x01, 0x41, 0xfb, 0x03,
+ 0xa0, 0x78, 0xa5, 0x01, 0x44, 0xf9, 0xa4, 0x01, 0x42, 0xfa, 0x03, 0xa0,
+ 0x83, 0xa5, 0x01, 0x45, 0x79, 0xa4, 0x01, 0x43, 0x7a, 0x03, 0xa0, 0x87,
+ 0xa5, 0x01, 0x46, 0x78, 0xa5, 0x01, 0x45, 0xb9, 0xa4, 0x01, 0x43, 0xba,
+ 0x03, 0xa0, 0x8b, 0xa5, 0x01, 0x46, 0xb8, 0xa5, 0x01, 0x47, 0x38, 0xa5,
+ 0x01, 0x45, 0xd9, 0xa4, 0x01, 0x43, 0xda, 0x03, 0xa0, 0x8f, 0xa5, 0x01,
+ 0x46, 0xd8, 0xa5, 0x01, 0x47, 0x58, 0xa5, 0x01, 0x47, 0x98, 0xa5, 0x01,
+ 0x45, 0xe9, 0xa4, 0x01, 0x43, 0xea, 0x03, 0xa0, 0x93, 0xa5, 0x01, 0x46,
+ 0xe8, 0xa5, 0x01, 0x47, 0x68, 0xa5, 0x01, 0x47, 0xa8, 0xa5, 0x01, 0x47,
+ 0xc8, 0xa5, 0x01, 0x45, 0xf1, 0xa4, 0x01, 0x43, 0xf2, 0x03, 0xa0, 0x97,
+ 0xa5, 0x01, 0x46, 0xf0, 0xa5, 0x01, 0x47, 0x70, 0xa5, 0x01, 0x47, 0xb0,
+ 0xa5, 0x01, 0x47, 0xd0, 0xa5, 0x01, 0x47, 0xe0, 0x45, 0x01, 0xac, 0xc3,
+ 0xa0, 0x9b, 0xc6, 0x12, 0x4f, 0x01, 0x5b, 0x81, 0x45, 0x03, 0xb3, 0x43,
+ 0xa0, 0xc5, 0xc3, 0x13, 0xc8, 0x01, 0x59, 0xdb, 0x03, 0xa0, 0xcb, 0xd2,
+ 0x05, 0x95, 0x01, 0x5f, 0x60, 0xd2, 0x05, 0x94, 0x0f, 0xc0, 0x01, 0xd5,
+ 0x00, 0x92, 0x0f, 0xc0, 0x80, 0x46, 0x01, 0xab, 0x43, 0xa0, 0xd1, 0xc9,
+ 0x00, 0x68, 0x01, 0x58, 0x81, 0xc7, 0x02, 0x6a, 0x01, 0x58, 0x88, 0xdd,
+ 0x12, 0x38, 0x01, 0x0d, 0xc8, 0xcf, 0x66, 0x45, 0x01, 0x5a, 0x11, 0xce,
+ 0x35, 0xda, 0x01, 0x5a, 0x58, 0xc6, 0x03, 0x81, 0x01, 0x0e, 0x69, 0xcf,
+ 0x2e, 0xd8, 0x01, 0x48, 0x10, 0xcf, 0x67, 0x71, 0x01, 0x4b, 0x59, 0x47,
+ 0x10, 0xb4, 0xc3, 0xa0, 0xdd, 0xc8, 0xab, 0xed, 0x01, 0x4a, 0xf1, 0xc6,
+ 0x12, 0x4f, 0x01, 0x4a, 0xb0, 0x46, 0x03, 0x50, 0xc3, 0xa0, 0xe3, 0xc8,
+ 0xab, 0xed, 0x01, 0x4a, 0xd1, 0xc6, 0x12, 0x4f, 0x01, 0x4a, 0x90, 0xc5,
+ 0xe3, 0x00, 0x08, 0x04, 0x39, 0xc5, 0xdf, 0x22, 0x08, 0x04, 0x30, 0xca,
+ 0xa9, 0x52, 0x08, 0x04, 0x41, 0xc9, 0xb6, 0x6f, 0x08, 0x04, 0x48, 0xc5,
+ 0xdf, 0x3b, 0x08, 0x04, 0x51, 0xc6, 0xd9, 0x40, 0x08, 0x04, 0x58, 0xc5,
+ 0xdd, 0xc9, 0x08, 0x04, 0x61, 0xc6, 0xd9, 0x3a, 0x08, 0x04, 0x68, 0xc6,
+ 0xd6, 0x3a, 0x08, 0x04, 0x19, 0xc6, 0xd3, 0x1c, 0x08, 0x04, 0x21, 0xca,
+ 0xa6, 0x32, 0x08, 0x04, 0x28, 0xc6, 0x1e, 0x8c, 0x00, 0xf4, 0xb9, 0xcc,
+ 0x3c, 0x2e, 0x01, 0x63, 0x30, 0xc5, 0x00, 0x34, 0x00, 0xf3, 0x69, 0xc5,
+ 0x03, 0x50, 0x00, 0xf3, 0x58, 0x46, 0x01, 0xab, 0x43, 0xa0, 0xed, 0xca,
+ 0x43, 0xef, 0x0e, 0xf8, 0x68, 0xca, 0xa2, 0x18, 0x0e, 0xf8, 0x30, 0x87,
+ 0x00, 0xe8, 0xa3, 0x03, 0xa1, 0x0e, 0xc5, 0x24, 0x65, 0x00, 0xe8, 0x41,
+ 0xc7, 0xcd, 0x02, 0x05, 0x5a, 0x1a, 0x03, 0xa1, 0x14, 0xc8, 0x68, 0x4a,
+ 0x05, 0x3b, 0xf8, 0x87, 0x00, 0xe8, 0x11, 0xc4, 0xdd, 0x2f, 0x00, 0x12,
+ 0x90, 0xce, 0x64, 0x93, 0x00, 0x15, 0x72, 0x03, 0xa1, 0x1a, 0xce, 0x70,
+ 0x5c, 0x00, 0x13, 0x80, 0xd2, 0x21, 0xc2, 0x05, 0x3b, 0x38, 0xce, 0x18,
+ 0x14, 0x00, 0xf3, 0x38, 0xce, 0x18, 0x14, 0x00, 0xf3, 0x48, 0xce, 0x04,
+ 0x59, 0x00, 0xec, 0xa9, 0xc4, 0x04, 0x63, 0x00, 0x12, 0xd0, 0xca, 0xa2,
+ 0x72, 0x05, 0x5a, 0x60, 0xd2, 0x4b, 0x42, 0x05, 0x59, 0xb0, 0xcc, 0x26,
+ 0x18, 0x00, 0xe8, 0x99, 0xc5, 0xda, 0xa9, 0x00, 0xe8, 0x90, 0x42, 0x00,
+ 0xa9, 0xc3, 0xa1, 0x20, 0xc8, 0x64, 0x99, 0x00, 0x13, 0xf3, 0x03, 0xa1,
+ 0x2c, 0x0e, 0xc3, 0xa1, 0x32, 0x42, 0x00, 0x68, 0xc3, 0xa1, 0x3e, 0xcc,
+ 0x57, 0xac, 0x00, 0xec, 0x49, 0x05, 0xc3, 0xa1, 0x4a, 0xc4, 0x13, 0xc7,
+ 0x00, 0x13, 0xe9, 0xce, 0x3b, 0x8c, 0x05, 0x3d, 0x39, 0xc5, 0x34, 0x21,
+ 0x00, 0x0a, 0xa9, 0xce, 0x1f, 0xa7, 0x00, 0x10, 0x99, 0xc6, 0x03, 0x81,
+ 0x00, 0x12, 0x68, 0xce, 0x04, 0x59, 0x00, 0xec, 0xa1, 0xc4, 0x04, 0x63,
+ 0x00, 0x12, 0xe8, 0xd2, 0x4d, 0x94, 0x0e, 0xf9, 0x39, 0xca, 0x1e, 0xfc,
+ 0x00, 0xeb, 0x80, 0xcf, 0x63, 0x66, 0x00, 0xf2, 0x59, 0xcb, 0x4b, 0x49,
+ 0x05, 0x59, 0xd9, 0xc6, 0xbf, 0xd5, 0x00, 0x0a, 0x31, 0xc4, 0x69, 0xdb,
+ 0x00, 0x0a, 0x41, 0xc3, 0x00, 0xd3, 0x00, 0x11, 0xa8, 0xc9, 0x6a, 0xec,
+ 0x00, 0xf2, 0x49, 0xc8, 0x4d, 0x9e, 0x00, 0x13, 0x91, 0xcd, 0x7d, 0x97,
+ 0x00, 0x0c, 0xf0, 0x43, 0x0b, 0xf9, 0xc3, 0xa1, 0x5c, 0xc8, 0x21, 0xcc,
+ 0x05, 0x3c, 0x88, 0xc4, 0x01, 0xbd, 0x05, 0x59, 0xc9, 0xc5, 0x1f, 0x01,
+ 0x00, 0x13, 0x59, 0xc3, 0x05, 0xe3, 0x00, 0x0a, 0x00, 0xd1, 0x57, 0xa7,
+ 0x0e, 0xf8, 0x98, 0xcb, 0x9c, 0x9c, 0x00, 0xf1, 0xc8, 0xcc, 0x1e, 0xfa,
+ 0x05, 0x59, 0xc1, 0xc3, 0x02, 0x1d, 0x01, 0x63, 0x08, 0xce, 0x3c, 0x2c,
+ 0x00, 0xf4, 0xe1, 0xc8, 0x18, 0x1a, 0x00, 0xf4, 0xd8, 0xce, 0x04, 0x59,
+ 0x0e, 0xf8, 0xc9, 0xcc, 0x57, 0xac, 0x0e, 0xf8, 0x90, 0x46, 0x01, 0xab,
+ 0x43, 0xa1, 0x68, 0xd2, 0x4b, 0x42, 0x05, 0x5a, 0x50, 0xcc, 0x26, 0x18,
+ 0x00, 0x12, 0xfa, 0x03, 0xa1, 0x74, 0xca, 0xa4, 0x98, 0x00, 0xf0, 0x48,
+ 0x45, 0x00, 0x39, 0x43, 0xa1, 0x7a, 0x45, 0x00, 0x39, 0x43, 0xa1, 0x98,
+ 0x42, 0x00, 0xd0, 0xc3, 0xa1, 0xb6, 0x45, 0x02, 0x93, 0x43, 0xa1, 0xc5,
+ 0xcb, 0x9c, 0x9c, 0x00, 0x11, 0x58, 0xc5, 0x34, 0x21, 0x00, 0xf2, 0x99,
+ 0xc5, 0x1e, 0x64, 0x00, 0xf2, 0x88, 0xc9, 0x1e, 0x89, 0x00, 0xf2, 0x79,
+ 0xc5, 0x34, 0x21, 0x00, 0xf2, 0x69, 0xc6, 0x61, 0xbc, 0x00, 0x11, 0x68,
+ 0xce, 0x04, 0x59, 0x00, 0xec, 0xb9, 0xc6, 0x03, 0x81, 0x05, 0x59, 0xf8,
+ 0xc7, 0x0d, 0xd9, 0x00, 0xf6, 0x59, 0xca, 0x1e, 0x5f, 0x00, 0x10, 0x48,
+ 0xca, 0xa4, 0x98, 0x00, 0xf1, 0x78, 0xcc, 0x57, 0xac, 0x0e, 0xf8, 0xc1,
+ 0xce, 0x04, 0x59, 0x00, 0xec, 0xd1, 0x05, 0xc3, 0xa1, 0xd1, 0xc4, 0x13,
+ 0xc7, 0x00, 0x0d, 0xd0, 0xc9, 0xb3, 0xcc, 0x0e, 0xf8, 0x60, 0x00, 0x43,
+ 0xa1, 0xdd, 0xca, 0xa4, 0x70, 0x00, 0xf0, 0xe8, 0x42, 0x00, 0xd0, 0xc3,
+ 0xa1, 0xe9, 0xca, 0x1e, 0x5f, 0x00, 0x10, 0x28, 0xc5, 0x34, 0x21, 0x00,
+ 0xf0, 0xb9, 0xc5, 0x1e, 0x64, 0x00, 0xf0, 0xa8, 0xc5, 0x00, 0x34, 0x00,
+ 0xf7, 0xa9, 0xc5, 0x03, 0x50, 0x00, 0xf4, 0x78, 0xc2, 0x01, 0x00, 0x00,
+ 0x0d, 0x83, 0x03, 0xa1, 0xf5, 0xc8, 0xa1, 0x8e, 0x00, 0xf7, 0x38, 0x11,
+ 0xc3, 0xa1, 0xfb, 0xc8, 0x1e, 0x8a, 0x00, 0x07, 0xe2, 0x03, 0xa2, 0x07,
+ 0xce, 0x74, 0xf4, 0x00, 0xf3, 0xd8, 0x00, 0x43, 0xa2, 0x0b, 0xc9, 0x07,
+ 0x97, 0x00, 0x07, 0xdb, 0x03, 0xa2, 0x17, 0xc4, 0x69, 0xdb, 0x00, 0x0e,
+ 0xa0, 0xcd, 0x04, 0x5a, 0x00, 0xec, 0xc9, 0xc9, 0xa2, 0xc3, 0x00, 0x0b,
+ 0x78, 0xce, 0x38, 0x03, 0x05, 0x5a, 0x71, 0xc5, 0x03, 0x82, 0x05, 0x3d,
+ 0xc8, 0x45, 0x00, 0x39, 0x43, 0xa2, 0x1d, 0xc9, 0x07, 0x97, 0x00, 0x07,
+ 0x13, 0x03, 0xa2, 0x3b, 0xc4, 0x69, 0xdb, 0x00, 0x0e, 0x70, 0x11, 0xc3,
+ 0xa2, 0x41, 0xc8, 0x1e, 0x8a, 0x00, 0x07, 0x22, 0x03, 0xa2, 0x4d, 0x0b,
+ 0xc3, 0xa2, 0x53, 0xcd, 0x04, 0x5a, 0x00, 0xec, 0x78, 0xc5, 0x00, 0x34,
+ 0x00, 0xf4, 0x49, 0xc5, 0x03, 0x50, 0x00, 0xf4, 0x38, 0xc5, 0x00, 0x34,
+ 0x00, 0xf1, 0x29, 0xc5, 0x03, 0x50, 0x00, 0xf1, 0x18, 0xc5, 0x00, 0x34,
+ 0x00, 0xf4, 0x99, 0xc5, 0x03, 0x50, 0x00, 0x0b, 0xe0, 0x00, 0x43, 0xa2,
+ 0x5f, 0xd2, 0x21, 0xc2, 0x05, 0x3a, 0x88, 0x45, 0x00, 0x39, 0x43, 0xa2,
+ 0x6b, 0xc7, 0x0d, 0xd9, 0x00, 0xf7, 0x21, 0x45, 0x02, 0x93, 0x43, 0xa2,
+ 0x89, 0x00, 0x43, 0xa2, 0x95, 0xc9, 0xa4, 0x99, 0x00, 0xf3, 0xc1, 0xc5,
+ 0x00, 0x34, 0x00, 0xf3, 0xa0, 0xc6, 0x00, 0x33, 0x00, 0xf3, 0xb0, 0xc9,
+ 0x0d, 0xd7, 0x00, 0xf7, 0x11, 0xc5, 0x1f, 0x01, 0x00, 0xf7, 0x01, 0xca,
+ 0xa1, 0x8c, 0x00, 0xf6, 0xf1, 0xc5, 0x1e, 0x64, 0x00, 0xf6, 0xe1, 0xc5,
+ 0x34, 0x21, 0x00, 0xf6, 0xd0, 0xc9, 0x0d, 0xd7, 0x00, 0xf6, 0xc1, 0xc5,
+ 0x1f, 0x01, 0x00, 0xf6, 0xb1, 0xca, 0xa1, 0x8c, 0x00, 0xf6, 0xa1, 0xc5,
+ 0x1e, 0x64, 0x00, 0xf6, 0x91, 0xc5, 0x34, 0x21, 0x00, 0xf6, 0x80, 0xc5,
+ 0x00, 0x34, 0x00, 0xf6, 0x61, 0xc5, 0x03, 0x50, 0x00, 0x11, 0x72, 0x03,
+ 0xa2, 0xa1, 0xc5, 0x34, 0x21, 0x00, 0x0a, 0x81, 0xc5, 0x1e, 0x64, 0x00,
+ 0x10, 0x60, 0xc5, 0x34, 0x21, 0x00, 0xf2, 0x91, 0xc5, 0x1e, 0x64, 0x00,
+ 0xf2, 0x80, 0xc5, 0x00, 0x34, 0x00, 0xf6, 0x51, 0xc5, 0x03, 0x50, 0x00,
+ 0x09, 0x80, 0x44, 0x00, 0x3a, 0xc3, 0xa2, 0xa7, 0xc5, 0x00, 0x34, 0x00,
+ 0xf0, 0xc0, 0xc5, 0x00, 0x34, 0x00, 0xf5, 0xc1, 0xc5, 0x03, 0x50, 0x00,
+ 0x08, 0xb0, 0xc9, 0x0d, 0xd7, 0x00, 0xf5, 0x61, 0xc5, 0x1f, 0x01, 0x00,
+ 0xf5, 0x51, 0xca, 0xa1, 0x8c, 0x00, 0xf5, 0x41, 0xc5, 0x1e, 0x64, 0x00,
+ 0xf5, 0x31, 0xc5, 0x34, 0x21, 0x00, 0xf5, 0x20, 0xc5, 0x00, 0x34, 0x00,
+ 0xf5, 0x01, 0xc5, 0x03, 0x50, 0x00, 0x11, 0x32, 0x03, 0xa2, 0xc5, 0xc5,
+ 0x00, 0x34, 0x00, 0xf2, 0xd3, 0x03, 0xa2, 0xcb, 0xc5, 0x03, 0x50, 0x00,
+ 0xf2, 0xc0, 0xca, 0x00, 0x47, 0x01, 0x5d, 0x19, 0xc9, 0x03, 0x9e, 0x01,
+ 0x5d, 0x10, 0xc6, 0xae, 0x6e, 0x05, 0x4b, 0x91, 0xc5, 0xba, 0x7e, 0x00,
+ 0x89, 0x18, 0xc4, 0x95, 0xb8, 0x00, 0x89, 0xe9, 0xc5, 0xc3, 0xe9, 0x00,
+ 0x8a, 0x78, 0xc3, 0x25, 0x4e, 0x01, 0x9f, 0x59, 0x42, 0x00, 0xb5, 0x43,
+ 0xa2, 0xd1, 0xc3, 0x03, 0x2c, 0x01, 0x9f, 0x61, 0x9b, 0x01, 0x9f, 0xe9,
+ 0xc5, 0xe2, 0x0b, 0x01, 0x9f, 0xf0, 0x9b, 0x01, 0x9b, 0x61, 0xc5, 0xe2,
+ 0x0b, 0x01, 0x9b, 0x69, 0x43, 0x47, 0x79, 0xc3, 0xa2, 0xf9, 0xc6, 0xb4,
+ 0x92, 0x01, 0x9b, 0x80, 0x4b, 0x15, 0x9b, 0xc3, 0xa3, 0x05, 0xdc, 0x12,
+ 0xe2, 0x0f, 0xd2, 0x28, 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0x88, 0xc9, 0x4f,
+ 0xa1, 0x08, 0x4f, 0x80, 0xc9, 0x4f, 0xa1, 0x08, 0x4f, 0x78, 0xc9, 0x4f,
+ 0xa1, 0x08, 0x4f, 0x70, 0xc9, 0xb6, 0x03, 0x0f, 0x02, 0x7b, 0x03, 0xa3,
+ 0x11, 0xc4, 0x6e, 0x0c, 0x0f, 0x02, 0x59, 0xc2, 0x01, 0x0e, 0x0f, 0x02,
+ 0x30, 0xc3, 0x1a, 0xd0, 0x0f, 0x02, 0x3b, 0x03, 0xa3, 0x17, 0x97, 0x0f,
+ 0x02, 0x48, 0x95, 0x0f, 0x01, 0xfb, 0x03, 0xa3, 0x1d, 0x88, 0x0f, 0x02,
+ 0x11, 0x94, 0x0f, 0x01, 0xf1, 0x8f, 0x0f, 0x01, 0xd9, 0x8e, 0x0f, 0x01,
+ 0xd0, 0xc7, 0xbc, 0xcc, 0x0f, 0x02, 0x81, 0x87, 0x0f, 0x01, 0xe8, 0xc7,
+ 0xce, 0x67, 0x0f, 0x02, 0x71, 0xd7, 0x04, 0x30, 0x0f, 0x02, 0x61, 0x87,
+ 0x0f, 0x01, 0x88, 0xc3, 0xeb, 0x07, 0x0f, 0x02, 0x51, 0x87, 0x0f, 0x01,
+ 0xa0, 0xc2, 0x00, 0xa7, 0x0f, 0x02, 0x21, 0x87, 0x0f, 0x02, 0x00, 0x87,
+ 0x0f, 0x01, 0xa8, 0x87, 0x0f, 0x01, 0xb9, 0xc2, 0x00, 0x5b, 0x0f, 0x01,
+ 0xb0, 0xce, 0x70, 0x5c, 0x00, 0xed, 0x68, 0xc4, 0xdd, 0x2f, 0x00, 0xec,
+ 0xd9, 0x87, 0x00, 0xea, 0x30, 0x46, 0x01, 0xab, 0x43, 0xa3, 0x23, 0xca,
+ 0xa2, 0x18, 0x08, 0x3d, 0x08, 0xca, 0xa2, 0x18, 0x08, 0x3c, 0xe0, 0xcc,
+ 0x26, 0x18, 0x00, 0xed, 0x39, 0xc9, 0xac, 0xfa, 0x00, 0x15, 0xb0, 0xca,
+ 0x1e, 0x1b, 0x08, 0x3c, 0xa0, 0xc9, 0xb6, 0x0c, 0x08, 0x3c, 0xe8, 0xc9,
+ 0xb3, 0x4e, 0x08, 0x3c, 0x68, 0xc4, 0x00, 0xd2, 0x08, 0x3c, 0x49, 0xce,
+ 0x04, 0x59, 0x08, 0x3c, 0x40, 0xc8, 0x4a, 0xf2, 0x05, 0x38, 0x59, 0xd2,
+ 0x4a, 0xe8, 0x05, 0x38, 0x80, 0xc4, 0x00, 0x5b, 0x00, 0x17, 0x88, 0xc8,
+ 0x4a, 0xf2, 0x05, 0x38, 0x51, 0xd2, 0x4a, 0xe8, 0x05, 0x38, 0x78, 0xcc,
+ 0x1e, 0x64, 0x00, 0x17, 0xa9, 0xcc, 0x87, 0x84, 0x00, 0x17, 0xb0, 0xc3,
+ 0x11, 0xb7, 0x0e, 0xbe, 0x11, 0xc5, 0xdc, 0x84, 0x0e, 0xbd, 0xc0, 0xc3,
+ 0x11, 0xb7, 0x0e, 0xbd, 0x41, 0xc5, 0xdc, 0x84, 0x0e, 0xbc, 0xf0, 0xc7,
+ 0x01, 0xb0, 0x0e, 0xbd, 0x08, 0xc2, 0x02, 0x6a, 0x0e, 0x8f, 0x39, 0xc4,
+ 0x00, 0x68, 0x0e, 0x8f, 0x30, 0xc4, 0x2d, 0xbe, 0x0e, 0x8e, 0x31, 0xc5,
+ 0x04, 0x91, 0x0e, 0x8d, 0xf1, 0xc5, 0x01, 0x31, 0x0e, 0x8d, 0xe8, 0xc4,
+ 0x2d, 0xbe, 0x0e, 0x8e, 0x21, 0xc5, 0x04, 0x91, 0x0e, 0x8d, 0xd1, 0xc5,
+ 0x01, 0x31, 0x0e, 0x8d, 0xc8, 0xc4, 0x23, 0x1f, 0x0e, 0x8b, 0x19, 0xc4,
+ 0x2d, 0xbe, 0x0e, 0x8a, 0x08, 0xc4, 0x23, 0x1f, 0x0e, 0x8b, 0x09, 0xc4,
+ 0x2d, 0xbe, 0x0e, 0x89, 0xf8, 0x4a, 0x76, 0xc9, 0x43, 0xa3, 0x32, 0xcd,
+ 0x76, 0xc9, 0x0e, 0x8e, 0x48, 0xc5, 0x04, 0x91, 0x0e, 0x8a, 0xa9, 0xc5,
+ 0x01, 0x31, 0x0e, 0x8a, 0xa0, 0x43, 0x11, 0x8a, 0xc3, 0xa3, 0x44, 0x45,
+ 0x0a, 0xe3, 0xc3, 0xa3, 0x56, 0x46, 0x01, 0x17, 0xc3, 0xa3, 0x62, 0x45,
+ 0x00, 0xcd, 0x43, 0xa3, 0x6e, 0x15, 0xc3, 0xa3, 0x7a, 0xc8, 0xc3, 0x3b,
+ 0x0e, 0x8d, 0x61, 0xc6, 0xd6, 0x64, 0x0e, 0x8d, 0x59, 0x42, 0x00, 0x68,
+ 0xc3, 0xa3, 0x90, 0x16, 0xc3, 0xa3, 0xa2, 0xc4, 0x9b, 0x9b, 0x0e, 0x8c,
+ 0x49, 0x42, 0x04, 0x49, 0xc3, 0xa3, 0xac, 0xc3, 0x08, 0x30, 0x0e, 0x8c,
+ 0x31, 0xc5, 0xe3, 0x8c, 0x0e, 0x8c, 0x11, 0x03, 0xc3, 0xa3, 0xb6, 0x46,
+ 0xd9, 0x28, 0x43, 0xa3, 0xc2, 0xc2, 0x00, 0xbe, 0x0e, 0x8d, 0xc3, 0x03,
+ 0xa3, 0xcc, 0x87, 0x0e, 0x8a, 0xe0, 0xc3, 0xec, 0xa5, 0x0e, 0x8b, 0x61,
+ 0xc3, 0xec, 0xa8, 0x0e, 0x8b, 0x59, 0xc3, 0xec, 0xab, 0x0e, 0x8b, 0x50,
+ 0xc3, 0x82, 0xec, 0x0e, 0x88, 0x79, 0xc3, 0x82, 0xa4, 0x0e, 0x88, 0x71,
+ 0xc3, 0x83, 0x28, 0x0e, 0x88, 0x68, 0x12, 0xc3, 0xa3, 0xd2, 0xc4, 0xea,
+ 0x23, 0x00, 0xff, 0xd9, 0xc5, 0x27, 0x58, 0x00, 0xff, 0xd1, 0xc5, 0x73,
+ 0xb2, 0x00, 0xfb, 0x4b, 0x03, 0xa3, 0xe1, 0xc5, 0x68, 0x98, 0x00, 0x1c,
+ 0x78, 0xc4, 0xea, 0x23, 0x00, 0xff, 0xc9, 0xc5, 0x27, 0x58, 0x00, 0xff,
+ 0xc1, 0xc5, 0x73, 0xb2, 0x00, 0xfa, 0x4b, 0x03, 0xa3, 0xe7, 0xc5, 0xda,
+ 0x86, 0x00, 0xfa, 0x43, 0x03, 0xa3, 0xed, 0xc5, 0x68, 0x98, 0x00, 0x1c,
+ 0x60, 0xc4, 0x27, 0x59, 0x00, 0xff, 0x51, 0xc5, 0xd9, 0x8c, 0x00, 0xff,
+ 0x40, 0xc4, 0x5b, 0xe0, 0x00, 0xfa, 0xcb, 0x03, 0xa3, 0xf3, 0xc8, 0x68,
+ 0x8f, 0x00, 0x1d, 0x58, 0xc4, 0x27, 0x59, 0x00, 0xfe, 0xd1, 0xc5, 0xd9,
+ 0x8c, 0x00, 0xfe, 0xc0, 0xc4, 0x5b, 0xe0, 0x00, 0xf9, 0xcb, 0x03, 0xa3,
+ 0xf9, 0xc8, 0x68, 0x8f, 0x00, 0x1d, 0x50, 0x45, 0x00, 0x39, 0x43, 0xa3,
+ 0xff, 0x12, 0xc3, 0xa4, 0x11, 0xc4, 0xea, 0x23, 0x00, 0xfe, 0x59, 0xc5,
+ 0x27, 0x58, 0x00, 0xfe, 0x51, 0xc5, 0x73, 0xb2, 0x00, 0xf9, 0x4b, 0x03,
+ 0xa4, 0x20, 0xc5, 0x68, 0x98, 0x00, 0x1c, 0x48, 0xc4, 0xea, 0x23, 0x00,
+ 0xfe, 0x49, 0xc5, 0x27, 0x58, 0x00, 0xfe, 0x41, 0xc5, 0x73, 0xb2, 0x00,
+ 0xf8, 0xcb, 0x03, 0xa4, 0x26, 0xc5, 0xda, 0x86, 0x00, 0xf8, 0xc3, 0x03,
+ 0xa4, 0x2c, 0xc5, 0x68, 0x98, 0x00, 0x1c, 0x40, 0x12, 0xc3, 0xa4, 0x32,
+ 0xc4, 0xea, 0x23, 0x00, 0xfd, 0xd9, 0x18, 0xc3, 0xa4, 0x41, 0xc6, 0x61,
+ 0xbc, 0x00, 0xfd, 0xc9, 0xc5, 0x73, 0xb2, 0x00, 0xf8, 0x4b, 0x03, 0xa4,
+ 0x4d, 0xc5, 0x68, 0x98, 0x00, 0x1c, 0x30, 0x12, 0xc3, 0xa4, 0x53, 0xc4,
+ 0xea, 0x23, 0x00, 0xfb, 0xeb, 0x03, 0xa4, 0x65, 0xcd, 0x49, 0x19, 0x00,
+ 0xff, 0x99, 0xc5, 0x27, 0x58, 0x00, 0xfb, 0xe3, 0x03, 0xa4, 0x6b, 0xc5,
+ 0x73, 0xb2, 0x00, 0xfb, 0x0b, 0x03, 0xa4, 0x71, 0xc5, 0x68, 0x98, 0x00,
+ 0x1e, 0x70, 0xc4, 0xea, 0x23, 0x00, 0xfb, 0xc9, 0xc5, 0x27, 0x58, 0x00,
+ 0xfb, 0xc1, 0xc5, 0x73, 0xb2, 0x00, 0xfa, 0x0b, 0x03, 0xa4, 0x77, 0xc5,
+ 0xda, 0x86, 0x00, 0xfa, 0x03, 0x03, 0xa4, 0x7d, 0xc5, 0x68, 0x98, 0x00,
+ 0x1e, 0x60, 0xc8, 0x68, 0x8f, 0x00, 0x1e, 0x5b, 0x03, 0xa4, 0x83, 0xc4,
+ 0x5b, 0xe0, 0x00, 0xfa, 0x8a, 0x03, 0xa4, 0x89, 0xca, 0x90, 0xcc, 0x00,
+ 0xff, 0x31, 0xc4, 0x7d, 0xa4, 0x00, 0xfa, 0x82, 0x03, 0xa4, 0x8f, 0xc5,
+ 0xd9, 0x8c, 0x00, 0xff, 0x01, 0xc4, 0x27, 0x59, 0x00, 0xfb, 0xd0, 0xc8,
+ 0x68, 0x8f, 0x00, 0x1e, 0x53, 0x03, 0xa4, 0x95, 0xc4, 0x5b, 0xe0, 0x00,
+ 0xf9, 0x8a, 0x03, 0xa4, 0x9b, 0xca, 0x90, 0xcc, 0x00, 0xfe, 0xb1, 0xc4,
+ 0x7d, 0xa4, 0x00, 0xf9, 0x82, 0x03, 0xa4, 0xa1, 0xc5, 0xd9, 0x8c, 0x00,
+ 0xfe, 0x81, 0xc4, 0x27, 0x59, 0x00, 0xfb, 0xb0, 0x12, 0xc3, 0xa4, 0xa7,
+ 0xc4, 0xea, 0x23, 0x00, 0xfb, 0xab, 0x03, 0xa4, 0xb9, 0xcd, 0x49, 0x19,
+ 0x00, 0xfe, 0x19, 0xc5, 0x27, 0x58, 0x00, 0xfb, 0xa3, 0x03, 0xa4, 0xbf,
+ 0xc5, 0x73, 0xb2, 0x00, 0xf9, 0x0b, 0x03, 0xa4, 0xc5, 0xc5, 0x68, 0x98,
+ 0x00, 0x1d, 0x70, 0xc4, 0xea, 0x23, 0x00, 0xfb, 0x99, 0xc5, 0x27, 0x58,
+ 0x00, 0xfb, 0x91, 0xc5, 0x73, 0xb2, 0x00, 0xf8, 0x8b, 0x03, 0xa4, 0xcb,
+ 0xc5, 0xda, 0x86, 0x00, 0xf8, 0x83, 0x03, 0xa4, 0xd1, 0xc5, 0x68, 0x98,
+ 0x00, 0x1d, 0x68, 0x12, 0xc3, 0xa4, 0xd7, 0xc4, 0xea, 0x23, 0x00, 0xfb,
+ 0x8b, 0x03, 0xa4, 0xe9, 0xcd, 0x49, 0x19, 0x00, 0xfd, 0x99, 0x18, 0xc3,
+ 0xa4, 0xef, 0xc6, 0x61, 0xbc, 0x00, 0xfd, 0x89, 0xc5, 0x73, 0xb2, 0x00,
+ 0xf8, 0x0b, 0x03, 0xa4, 0xfe, 0xc5, 0x68, 0x98, 0x00, 0x1d, 0x60, 0xc7,
+ 0xb9, 0x94, 0x08, 0x0a, 0x59, 0xc7, 0x63, 0x9b, 0x08, 0x0a, 0x90, 0xc7,
+ 0x0d, 0x7f, 0x08, 0x0a, 0x2b, 0x03, 0xa5, 0x04, 0x16, 0xc3, 0xa5, 0x08,
+ 0xc7, 0x63, 0x9b, 0x08, 0x0a, 0x78, 0x16, 0xc3, 0xa5, 0x17, 0xc7, 0x63,
+ 0x9b, 0x08, 0x0a, 0x88, 0xc7, 0x0d, 0x7f, 0x08, 0x0b, 0x51, 0xc8, 0x4f,
+ 0xa2, 0x08, 0x0b, 0x88, 0xc4, 0x0d, 0x89, 0x08, 0x0b, 0x29, 0xcb, 0x12,
+ 0xe3, 0x08, 0x0b, 0x58, 0xc8, 0x4f, 0xa2, 0x08, 0x0b, 0x91, 0xc7, 0x0d,
+ 0x7f, 0x08, 0x0b, 0x70, 0xc8, 0x0d, 0x7e, 0x08, 0x0b, 0x68, 0xcf, 0x65,
+ 0xdc, 0x08, 0x0b, 0x38, 0xc2, 0x01, 0x0e, 0x08, 0x1e, 0x50, 0x8a, 0x08,
+ 0x1e, 0x61, 0xc3, 0xe6, 0x4f, 0x08, 0x1e, 0x68, 0x11, 0xc3, 0xa5, 0x26,
+ 0xc4, 0x67, 0x9a, 0x0e, 0x7d, 0xca, 0x03, 0xa5, 0x38, 0xd4, 0x3b, 0x72,
+ 0x00, 0xef, 0xf9, 0xd2, 0x4b, 0xd2, 0x00, 0x1a, 0xb0, 0xc2, 0x05, 0x57,
+ 0x09, 0x19, 0x99, 0xc3, 0x02, 0xe4, 0x09, 0x19, 0x90, 0xc9, 0x43, 0x91,
+ 0x09, 0x12, 0xe8, 0xca, 0x9d, 0xea, 0x09, 0x10, 0x79, 0xc9, 0x43, 0x91,
+ 0x09, 0x10, 0x70, 0xc8, 0xac, 0x3e, 0x09, 0x1c, 0x51, 0xc4, 0x5b, 0x6b,
+ 0x09, 0x10, 0x08, 0xa0, 0x09, 0x10, 0x33, 0x03, 0xa5, 0x3e, 0x9f, 0x09,
+ 0x10, 0x28, 0xc9, 0xab, 0xfe, 0x09, 0x0e, 0x38, 0x94, 0x09, 0x0e, 0x28,
+ 0xc8, 0x69, 0xf6, 0x09, 0x0f, 0x39, 0x83, 0x09, 0x0f, 0x30, 0xc2, 0x3a,
+ 0x6c, 0x09, 0x0f, 0x19, 0x89, 0x09, 0x0f, 0x10, 0xc2, 0x5a, 0xd1, 0x09,
+ 0x0e, 0xfb, 0x03, 0xa5, 0x44, 0x4e, 0x74, 0xa0, 0xc3, 0xa5, 0x4a, 0xca,
+ 0xaa, 0x88, 0x09, 0x0e, 0xe0, 0xc8, 0xa8, 0x64, 0x09, 0x0e, 0xc8, 0x8e,
+ 0x09, 0x0e, 0xb8, 0x8e, 0x09, 0x0e, 0x93, 0x03, 0xa5, 0x56, 0xa0, 0x09,
+ 0x0e, 0x88, 0x90, 0x09, 0x0e, 0x80, 0x46, 0x21, 0x5d, 0x43, 0xa5, 0x5c,
+ 0x8e, 0x09, 0x0e, 0x48, 0xc3, 0x1d, 0x43, 0x09, 0x0d, 0xe1, 0xc3, 0x1d,
+ 0x6a, 0x09, 0x0d, 0xd9, 0xca, 0x9f, 0xac, 0x09, 0x0d, 0xd0, 0x8f, 0x09,
+ 0x26, 0x39, 0x86, 0x09, 0x07, 0x38, 0xc9, 0xae, 0x08, 0x09, 0x07, 0x30,
+ 0xc2, 0x03, 0xab, 0x09, 0x26, 0x31, 0xc2, 0x64, 0x37, 0x09, 0x26, 0x28,
+ 0xca, 0x55, 0x8e, 0x09, 0x26, 0x08, 0x83, 0x09, 0x25, 0xf1, 0xcc, 0x87,
+ 0xcc, 0x09, 0x06, 0x88, 0xc8, 0xac, 0x3d, 0x09, 0x06, 0x98, 0x46, 0x21,
+ 0x5d, 0x43, 0xa5, 0x68, 0xc7, 0x21, 0x5d, 0x09, 0x06, 0x78, 0xc6, 0x47,
+ 0x64, 0x09, 0x25, 0xc9, 0xc8, 0x61, 0x7e, 0x09, 0x25, 0xc0, 0xc4, 0x3a,
+ 0xa6, 0x09, 0x25, 0xb9, 0xc9, 0xa2, 0x87, 0x09, 0x06, 0x28, 0xc9, 0xaa,
+ 0xf9, 0x09, 0x05, 0xf0, 0x45, 0x03, 0xb3, 0xc3, 0xa5, 0x74, 0x46, 0x1f,
+ 0xd2, 0xc3, 0xa5, 0x80, 0x48, 0x03, 0x28, 0xc3, 0xa5, 0x96, 0xc7, 0x29,
+ 0xba, 0x0e, 0xc7, 0xd1, 0x45, 0x05, 0xa6, 0xc3, 0xa5, 0xab, 0xc4, 0x0f,
+ 0x54, 0x0e, 0xc7, 0xb0, 0x46, 0x0e, 0xcd, 0xc3, 0xa5, 0xbd, 0x14, 0xc3,
+ 0xa5, 0xdf, 0xc6, 0x01, 0x8c, 0x0e, 0xc0, 0x73, 0x03, 0xa5, 0xeb, 0xc6,
+ 0x5a, 0x59, 0x0e, 0xc0, 0x5b, 0x03, 0xa5, 0xef, 0xd0, 0x5a, 0x4f, 0x0e,
+ 0xc0, 0x9b, 0x03, 0xa5, 0xf3, 0xc4, 0x16, 0x02, 0x0e, 0xc0, 0x33, 0x03,
+ 0xa5, 0xf9, 0xc6, 0xd5, 0xce, 0x0e, 0xc0, 0x50, 0xca, 0x14, 0xc6, 0x0e,
+ 0xc6, 0x69, 0xcd, 0x3b, 0x3c, 0x0e, 0xc6, 0x40, 0xc6, 0x14, 0xca, 0x0e,
+ 0xc6, 0x59, 0x47, 0xc4, 0xb9, 0xc3, 0xa5, 0xff, 0x05, 0xc3, 0xa6, 0x0b,
+ 0xcf, 0x65, 0xaf, 0x0e, 0xc1, 0x80, 0xcb, 0x49, 0x87, 0x0e, 0xc6, 0x48,
+ 0x00, 0x43, 0xa6, 0x17, 0xc6, 0x0e, 0xcc, 0x0e, 0xc4, 0xe0, 0xc4, 0x0e,
+ 0xce, 0x0e, 0xc4, 0xd1, 0xcc, 0x8a, 0xfc, 0x0e, 0xc4, 0xc8, 0x00, 0x43,
+ 0xa6, 0x23, 0xcb, 0x49, 0x87, 0x0e, 0xc3, 0x1a, 0x03, 0xa6, 0x2f, 0xca,
+ 0x49, 0x88, 0x0e, 0xc2, 0xf1, 0xd3, 0x41, 0x73, 0x0e, 0xc2, 0x6a, 0x03,
+ 0xa6, 0x35, 0x00, 0x43, 0xa6, 0x39, 0x00, 0x43, 0xa6, 0x54, 0x00, 0x43,
+ 0xa6, 0x69, 0xc4, 0x0d, 0xa1, 0x0e, 0xc6, 0x10, 0xc6, 0x14, 0xca, 0x0e,
+ 0xc5, 0x41, 0xc4, 0x01, 0xbd, 0x0e, 0xc4, 0x48, 0xc4, 0x0d, 0xa1, 0x0e,
+ 0xc4, 0xf0, 0xc5, 0x0e, 0xcd, 0x0e, 0xc7, 0x83, 0x03, 0xa6, 0x75, 0xc6,
+ 0x5a, 0x59, 0x0e, 0xc6, 0xd9, 0xcb, 0x14, 0xc5, 0x0e, 0xc6, 0x09, 0x47,
+ 0x01, 0x8c, 0x43, 0xa6, 0x79, 0xc5, 0x01, 0x62, 0x0e, 0xc5, 0x13, 0x03,
+ 0xa6, 0x88, 0xc5, 0x0e, 0xcd, 0x0e, 0xc4, 0xd8, 0xcf, 0x62, 0x3a, 0x0e,
+ 0xc4, 0x18, 0xc8, 0xc0, 0x0b, 0x0e, 0xc4, 0x09, 0x46, 0x0e, 0xcd, 0x43,
+ 0xa6, 0x8e, 0x00, 0x43, 0xa6, 0x9a, 0x00, 0x43, 0xa6, 0xa6, 0xc7, 0x29,
+ 0xba, 0x0e, 0xc3, 0x99, 0xc4, 0x05, 0xa6, 0x0e, 0xc3, 0x78, 0x00, 0x43,
+ 0xa6, 0xb5, 0xc5, 0x03, 0xf4, 0x0e, 0xc2, 0xa0, 0xc5, 0x16, 0x01, 0x0e,
+ 0xc6, 0xa8, 0xcb, 0x14, 0xc5, 0x0e, 0xc5, 0xd9, 0xc6, 0x01, 0x8c, 0x0e,
+ 0xc0, 0x7b, 0x03, 0xa6, 0xc1, 0xc5, 0x5a, 0x59, 0x0e, 0xc0, 0x69, 0xc4,
+ 0x16, 0x02, 0x0e, 0xc0, 0x38, 0xc5, 0xdf, 0x86, 0x0e, 0xcd, 0x69, 0xca,
+ 0x9f, 0xc0, 0x0e, 0xcd, 0x30, 0xc5, 0x15, 0x2e, 0x0e, 0xcc, 0x73, 0x03,
+ 0xa6, 0xc5, 0xc6, 0x04, 0x1b, 0x0e, 0xcc, 0x69, 0xc5, 0x00, 0x3e, 0x0e,
+ 0xcc, 0x60, 0xc6, 0x04, 0x1b, 0x0e, 0xcc, 0x89, 0xc5, 0x00, 0x3e, 0x0e,
+ 0xcc, 0x80, 0xc2, 0x00, 0x15, 0x0e, 0xcc, 0x58, 0x45, 0x01, 0xac, 0xc3,
+ 0xa6, 0xcb, 0xc6, 0x12, 0x4f, 0x01, 0x5b, 0x99, 0x4a, 0x04, 0x68, 0x43,
+ 0xa6, 0xf5, 0xe0, 0x02, 0x07, 0x01, 0x4b, 0x28, 0xd0, 0x5b, 0x9f, 0x0f,
+ 0xc1, 0x91, 0xcb, 0x5b, 0xa4, 0x0f, 0xc1, 0x71, 0xca, 0xa0, 0xce, 0x0f,
+ 0xc1, 0x51, 0x47, 0x02, 0x91, 0xc3, 0xa6, 0xfb, 0x49, 0xb3, 0x57, 0xc3,
+ 0xa7, 0x07, 0xcc, 0x86, 0x70, 0x0f, 0xc1, 0x11, 0xcc, 0x85, 0x98, 0x0f,
+ 0xc1, 0x30, 0xe0, 0x04, 0x67, 0x01, 0x5c, 0x10, 0x46, 0x01, 0xab, 0x43,
+ 0xa7, 0x13, 0xe0, 0x0a, 0xc7, 0x01, 0x4b, 0x48, 0x0e, 0xc3, 0xa7, 0x1f,
+ 0x14, 0x43, 0xa7, 0x2b, 0xcf, 0x2e, 0xd8, 0x01, 0x48, 0xa1, 0xd6, 0x2e,
+ 0xd1, 0x01, 0x48, 0xa8, 0xcc, 0x05, 0x3b, 0x07, 0xe8, 0x51, 0xcb, 0x10,
+ 0x7a, 0x07, 0xe9, 0x70, 0x45, 0x1a, 0x6a, 0xc3, 0xa7, 0x31, 0xce, 0x40,
+ 0x48, 0x07, 0xed, 0x50, 0xcc, 0x10, 0x79, 0x07, 0xeb, 0x59, 0xca, 0x2a,
+ 0xb4, 0x07, 0xeb, 0x50, 0xca, 0x2a, 0xb4, 0x07, 0xeb, 0x61, 0xcc, 0x10,
+ 0x79, 0x07, 0xeb, 0x68, 0xcc, 0x10, 0x79, 0x07, 0xeb, 0x29, 0xca, 0x2a,
+ 0xb4, 0x07, 0xeb, 0x20, 0xdc, 0x14, 0x86, 0x07, 0xea, 0x61, 0xd2, 0x4b,
+ 0x54, 0x07, 0xef, 0xd0, 0xe0, 0x05, 0x27, 0x07, 0xef, 0x80, 0xca, 0x2a,
+ 0xb4, 0x07, 0xeb, 0x89, 0xcc, 0x10, 0x79, 0x07, 0xeb, 0x90, 0xca, 0x2a,
+ 0xb4, 0x07, 0xea, 0x89, 0xcc, 0x10, 0x79, 0x07, 0xea, 0x90, 0xca, 0x2a,
+ 0xb4, 0x07, 0xe3, 0x49, 0xcd, 0x05, 0x3a, 0x07, 0xe0, 0x20, 0xca, 0x2a,
+ 0xb4, 0x07, 0xdf, 0xa9, 0xcd, 0x05, 0x3a, 0x07, 0xdf, 0xa0, 0x48, 0x06,
+ 0x7f, 0xc3, 0xa7, 0x3d, 0xca, 0x2a, 0xb4, 0x07, 0xdf, 0x59, 0xcd, 0x05,
+ 0x3a, 0x07, 0xdf, 0x50, 0xca, 0x2a, 0xb4, 0x07, 0xdf, 0x69, 0xcd, 0x05,
+ 0x3a, 0x07, 0xdf, 0x60, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0x11, 0xcb, 0x10,
+ 0x7a, 0x07, 0xe4, 0xd0, 0xcc, 0x05, 0x3b, 0x07, 0xe0, 0x09, 0xcb, 0x10,
+ 0x7a, 0x07, 0xe4, 0xc0, 0xcb, 0x6a, 0x72, 0x07, 0xe7, 0x01, 0xcc, 0x10,
+ 0x79, 0x07, 0xe4, 0xd8, 0xcb, 0x10, 0x7a, 0x07, 0xdf, 0xc1, 0xcc, 0x05,
+ 0x3b, 0x07, 0xdf, 0xb0, 0xca, 0x2a, 0xb4, 0x07, 0xeb, 0xf1, 0xcc, 0x10,
+ 0x79, 0x07, 0xed, 0xa0, 0xcf, 0x0d, 0xe6, 0x00, 0x31, 0xf9, 0xcd, 0x0b,
+ 0x67, 0x00, 0x31, 0xf0, 0xca, 0x09, 0x9d, 0x00, 0x3b, 0xb9, 0x16, 0x43,
+ 0xa7, 0x49, 0xc5, 0x00, 0x34, 0x00, 0x35, 0x1b, 0x03, 0xa7, 0x55, 0xcb,
+ 0x96, 0x98, 0x00, 0x35, 0x10, 0x4a, 0x0d, 0xe6, 0xc3, 0xa7, 0x5b, 0xcd,
+ 0x0b, 0x7a, 0x00, 0x3b, 0x00, 0xcf, 0x0d, 0xe6, 0x00, 0x35, 0xa1, 0xcd,
+ 0x0b, 0x7a, 0x00, 0x35, 0x90, 0xd7, 0x26, 0xf8, 0x00, 0x46, 0x39, 0x98,
+ 0x00, 0x35, 0xa8, 0xc8, 0xaa, 0xa8, 0x00, 0x45, 0x31, 0xc7, 0x18, 0x1b,
+ 0x00, 0x35, 0xb0, 0xc5, 0x00, 0x34, 0x00, 0x35, 0xc1, 0xc5, 0x03, 0x50,
+ 0x00, 0x35, 0xb8, 0xc5, 0x00, 0x34, 0x00, 0x46, 0x31, 0xc5, 0x03, 0x50,
+ 0x00, 0x46, 0x28, 0xc5, 0x00, 0x34, 0x00, 0x45, 0x99, 0xc5, 0x03, 0x50,
+ 0x00, 0x35, 0x01, 0xd8, 0x22, 0x94, 0x00, 0x3a, 0xf0, 0xc5, 0x03, 0x50,
+ 0x00, 0x3a, 0xe9, 0xd0, 0x25, 0xb4, 0x00, 0x3a, 0xf8, 0x49, 0xb1, 0xb9,
+ 0xc3, 0xa7, 0x67, 0xd3, 0x45, 0x29, 0x00, 0x43, 0x93, 0x03, 0xa7, 0x8f,
+ 0xc9, 0x18, 0x19, 0x00, 0x43, 0xd1, 0xd2, 0x4a, 0x8e, 0x00, 0x43, 0x99,
+ 0x4b, 0x59, 0x6f, 0xc3, 0xa7, 0x95, 0x46, 0x08, 0x89, 0xc3, 0xa7, 0xa1,
+ 0xcb, 0x87, 0x9c, 0x00, 0x31, 0x13, 0x03, 0xa7, 0xb3, 0x5d, 0x10, 0xdc,
+ 0x43, 0xa7, 0xb7, 0x00, 0x43, 0xa7, 0xc3, 0xcd, 0x05, 0x3a, 0x07, 0xf7,
+ 0x79, 0xca, 0x2a, 0xb4, 0x07, 0xf7, 0x80, 0x48, 0x0b, 0x67, 0xc3, 0xa7,
+ 0xcf, 0x4a, 0x0d, 0xe6, 0x43, 0xa7, 0xdb, 0x44, 0x0b, 0xf8, 0xc3, 0xa7,
+ 0xed, 0x16, 0xc3, 0xa7, 0xf9, 0xc4, 0x01, 0xbd, 0x00, 0x35, 0x58, 0xcb,
+ 0x10, 0x7a, 0x07, 0xf6, 0xd9, 0xcc, 0x05, 0x3b, 0x07, 0xf6, 0xc8, 0xcb,
+ 0x10, 0x7a, 0x07, 0xdc, 0x01, 0xcc, 0x05, 0x3b, 0x07, 0xdb, 0xf0, 0xcb,
+ 0x10, 0x7a, 0x07, 0xdb, 0x01, 0xcc, 0x05, 0x3b, 0x07, 0xda, 0xf0, 0x98,
+ 0x00, 0x45, 0xf9, 0xc9, 0xb1, 0x44, 0x00, 0x45, 0xc0, 0x00, 0x43, 0xa8,
+ 0x05, 0xcb, 0x10, 0x7a, 0x07, 0xdb, 0x21, 0xcc, 0x05, 0x3b, 0x07, 0xdb,
+ 0x10, 0xcd, 0x0b, 0x67, 0x00, 0x45, 0x19, 0x4a, 0x0d, 0xe6, 0x43, 0xa8,
+ 0x17, 0xcc, 0x05, 0x3b, 0x07, 0xf4, 0xc9, 0xcb, 0x10, 0x7a, 0x07, 0xf4,
+ 0xd8, 0x52, 0x18, 0x07, 0xc3, 0xa8, 0x23, 0xcf, 0x63, 0x84, 0x00, 0x36,
+ 0x89, 0xc3, 0x13, 0xc8, 0x00, 0x36, 0x68, 0x00, 0x43, 0xa8, 0x35, 0x45,
+ 0x01, 0xac, 0xc3, 0xa8, 0x45, 0xca, 0x2a, 0xb4, 0x07, 0xdd, 0x79, 0xcd,
+ 0x05, 0x3a, 0x07, 0xdd, 0x70, 0x45, 0x00, 0x3f, 0xc3, 0xa8, 0x54, 0xc5,
+ 0x03, 0x82, 0x00, 0x3a, 0xd8, 0xc5, 0x03, 0x50, 0x00, 0x34, 0xb9, 0xd0,
+ 0x25, 0xb4, 0x00, 0x3b, 0x58, 0xce, 0x18, 0x14, 0x00, 0x34, 0xb0, 0xca,
+ 0xa8, 0x76, 0x00, 0x45, 0xb1, 0x98, 0x00, 0x3a, 0xb2, 0x03, 0xa8, 0x67,
+ 0xdb, 0x18, 0x07, 0x00, 0x3a, 0xa1, 0x4a, 0x0d, 0xe6, 0x43, 0xa8, 0x6d,
+ 0xcb, 0x10, 0x7a, 0x07, 0xda, 0xe1, 0xcc, 0x05, 0x3b, 0x07, 0xda, 0xd0,
+ 0xd2, 0x48, 0x84, 0x00, 0x45, 0xa8, 0xc5, 0x00, 0x34, 0x00, 0x45, 0x79,
+ 0xc5, 0x03, 0x50, 0x00, 0x34, 0xf0, 0xcf, 0x63, 0x84, 0x00, 0x34, 0xdb,
+ 0x03, 0xa8, 0x7c, 0xd8, 0x25, 0xac, 0x00, 0x3b, 0x68, 0xe0, 0x08, 0xc7,
+ 0x00, 0x3b, 0xe8, 0xe0, 0x0a, 0x07, 0x00, 0x3b, 0x80, 0x16, 0xc3, 0xa8,
+ 0x82, 0x49, 0x1d, 0xf3, 0xc3, 0xa8, 0x8e, 0xcf, 0x39, 0xbf, 0x00, 0x34,
+ 0x81, 0xc9, 0x0d, 0xd7, 0x00, 0x34, 0x53, 0x03, 0xa8, 0x98, 0xc4, 0x01,
+ 0xbd, 0x00, 0x34, 0x48, 0xcc, 0x05, 0x3b, 0x07, 0xf5, 0xe9, 0xcb, 0x10,
+ 0x7a, 0x07, 0xf5, 0xf8, 0xcd, 0x05, 0x3a, 0x07, 0xf4, 0x29, 0xca, 0x2a,
+ 0xb4, 0x07, 0xf4, 0x30, 0xc5, 0x03, 0x50, 0x00, 0x3b, 0x29, 0xc5, 0x00,
+ 0x34, 0x00, 0x3b, 0x30, 0xcb, 0x10, 0x7a, 0x07, 0xdc, 0xe1, 0xcc, 0x05,
+ 0x3b, 0x07, 0xdc, 0xd0, 0xcf, 0x0d, 0xe6, 0x00, 0x35, 0xe9, 0xcd, 0x0b,
+ 0x7a, 0x00, 0x3b, 0x38, 0xc3, 0x00, 0x44, 0x00, 0x3b, 0x41, 0x98, 0x00,
+ 0x3b, 0x48, 0xcd, 0x05, 0x3a, 0x07, 0xdc, 0xf1, 0xca, 0x2a, 0xb4, 0x07,
+ 0xdc, 0xf8, 0xd6, 0x31, 0xe9, 0x00, 0x44, 0x51, 0x16, 0xc3, 0xa8, 0x9e,
+ 0xcb, 0x08, 0x89, 0x00, 0x34, 0x09, 0x46, 0x02, 0xff, 0xc3, 0xa8, 0xaa,
+ 0x58, 0x21, 0x2c, 0x43, 0xa8, 0xb0, 0xcc, 0x05, 0x3b, 0x07, 0xf5, 0x09,
+ 0xcb, 0x10, 0x7a, 0x07, 0xf5, 0x18, 0xcb, 0x6a, 0x72, 0x07, 0xdd, 0x69,
+ 0x0b, 0xc3, 0xa8, 0xba, 0xca, 0x2a, 0xb4, 0x07, 0xdd, 0x58, 0xcb, 0x6a,
+ 0x72, 0x07, 0xdd, 0x49, 0x0b, 0xc3, 0xa8, 0xc6, 0xca, 0x2a, 0xb4, 0x07,
+ 0xdd, 0x39, 0xd0, 0x5f, 0x5f, 0x00, 0x36, 0x10, 0x00, 0x43, 0xa8, 0xd2,
+ 0xcf, 0x01, 0x98, 0x0f, 0xdd, 0x23, 0x03, 0xa8, 0xde, 0xe0, 0x06, 0x27,
+ 0x0f, 0xdd, 0x40, 0xcf, 0x01, 0x98, 0x0f, 0xdd, 0x2b, 0x03, 0xa8, 0xe4,
+ 0xdf, 0x0c, 0xe0, 0x0f, 0xdd, 0x48, 0x4b, 0x15, 0x9b, 0xc3, 0xa8, 0xea,
+ 0xdc, 0x12, 0xe2, 0x0f, 0xd2, 0x30, 0x96, 0x0b, 0x4b, 0xb8, 0xc2, 0x08,
+ 0xc6, 0x0b, 0x47, 0xc8, 0xa5, 0x01, 0x45, 0xf9, 0xa4, 0x01, 0x43, 0xfa,
+ 0x03, 0xa8, 0xf6, 0xa5, 0x01, 0x46, 0xf8, 0xa5, 0x01, 0x47, 0x78, 0xa5,
+ 0x01, 0x47, 0xb8, 0xa5, 0x01, 0x47, 0xd8, 0xa5, 0x01, 0x47, 0xe8, 0xa5,
+ 0x01, 0x47, 0xf0, 0xd0, 0x5b, 0x9f, 0x0f, 0xc1, 0x81, 0xcb, 0x5b, 0xa4,
+ 0x0f, 0xc1, 0x61, 0x49, 0xb3, 0x57, 0xc3, 0xa8, 0xfa, 0x47, 0x02, 0x91,
+ 0xc3, 0xa9, 0x06, 0xcc, 0x86, 0x70, 0x0f, 0xc1, 0x01, 0xcc, 0x85, 0x98,
+ 0x0f, 0xc1, 0x21, 0xca, 0xa0, 0xce, 0x0f, 0xc1, 0x40, 0xe0, 0x00, 0x47,
+ 0x01, 0x5c, 0x00, 0x46, 0x01, 0xab, 0x43, 0xa9, 0x12, 0xcf, 0x2e, 0xd8,
+ 0x01, 0x59, 0xc9, 0xd6, 0x2e, 0xd1, 0x01, 0x59, 0xd0, 0xe0, 0x04, 0xc7,
+ 0x01, 0x4b, 0x38, 0x0e, 0xc3, 0xa9, 0x1e, 0xdf, 0x0c, 0x64, 0x01, 0x4b,
+ 0x30, 0xcc, 0x57, 0xac, 0x0e, 0xf8, 0xa9, 0xc8, 0x70, 0x62, 0x00, 0x13,
+ 0xd3, 0x03, 0xa9, 0x2a, 0xcc, 0x1e, 0xfa, 0x05, 0x5b, 0x41, 0xc4, 0x04,
+ 0x63, 0x00, 0x13, 0xd9, 0xc4, 0x13, 0xc7, 0x01, 0x63, 0xc8, 0x46, 0x01,
+ 0xab, 0x43, 0xa9, 0x30, 0xcc, 0x26, 0x18, 0x05, 0x5a, 0x20, 0xc9, 0xb3,
+ 0x4e, 0x00, 0x15, 0x78, 0xd2, 0x4d, 0x94, 0x0e, 0xf9, 0x41, 0xca, 0x1e,
+ 0xfc, 0x00, 0xeb, 0x90, 0x45, 0x00, 0x39, 0x43, 0xa9, 0x3c, 0xc9, 0x6a,
+ 0xec, 0x00, 0xf2, 0xf9, 0xc7, 0x70, 0x63, 0x00, 0x13, 0xe0, 0x42, 0x00,
+ 0xd0, 0xc3, 0xa9, 0x48, 0xca, 0x1e, 0x5f, 0x00, 0x10, 0x88, 0xcb, 0x4b,
+ 0x49, 0x05, 0x5a, 0x49, 0xc6, 0xbf, 0xd5, 0x00, 0x0a, 0xb9, 0xc4, 0x69,
+ 0xdb, 0x00, 0x0a, 0xc8, 0xc5, 0x00, 0x34, 0x00, 0xf2, 0x39, 0xc5, 0x03,
+ 0x50, 0x00, 0xf2, 0x28, 0xce, 0x04, 0x59, 0x05, 0x5b, 0x31, 0xc4, 0x00,
+ 0xd2, 0x00, 0x15, 0x28, 0xc9, 0x1e, 0x92, 0x00, 0x14, 0x20, 0xc9, 0x0d,
+ 0xd7, 0x00, 0xf7, 0x19, 0xc5, 0x1f, 0x01, 0x00, 0xf7, 0x09, 0xca, 0xa1,
+ 0x8c, 0x00, 0xf6, 0xf9, 0xc5, 0x1e, 0x64, 0x00, 0xf6, 0xe9, 0xc5, 0x34,
+ 0x21, 0x00, 0xf6, 0xd8, 0xc9, 0x0d, 0xd7, 0x00, 0xf6, 0xc9, 0xc5, 0x1f,
+ 0x01, 0x00, 0xf6, 0xb9, 0xca, 0xa1, 0x8c, 0x00, 0xf6, 0xa9, 0xc5, 0x1e,
+ 0x64, 0x00, 0xf6, 0x99, 0xc5, 0x34, 0x21, 0x00, 0xf6, 0x88, 0xc5, 0x00,
+ 0x34, 0x00, 0xf6, 0x69, 0xc5, 0x03, 0x50, 0x00, 0x11, 0x7a, 0x03, 0xa9,
+ 0x57, 0xc5, 0x34, 0x21, 0x00, 0x0a, 0x89, 0xc5, 0x1e, 0x64, 0x00, 0x10,
+ 0x68, 0xc3, 0x00, 0xd3, 0x00, 0x14, 0x99, 0xc4, 0x69, 0xdb, 0x00, 0x0f,
+ 0x78, 0x44, 0x00, 0x3a, 0xc3, 0xa9, 0x5d, 0xc5, 0x00, 0x34, 0x00, 0xf0,
+ 0xc8, 0xc5, 0x00, 0x34, 0x00, 0xf5, 0xc9, 0xc5, 0x03, 0x50, 0x00, 0x08,
+ 0xb8, 0x45, 0x00, 0x39, 0x43, 0xa9, 0x7b, 0xc7, 0x0d, 0xd9, 0x00, 0xf7,
+ 0x29, 0x45, 0x02, 0x93, 0x43, 0xa9, 0x99, 0x00, 0x43, 0xa9, 0xa5, 0xc9,
+ 0xa4, 0x99, 0x00, 0xf3, 0xc9, 0xc5, 0x00, 0x34, 0x00, 0xf3, 0xa8, 0xc6,
+ 0x00, 0x33, 0x00, 0xf3, 0xb8, 0xc9, 0x0d, 0xd7, 0x00, 0xf5, 0xb9, 0xc5,
+ 0x1f, 0x01, 0x00, 0xf5, 0xa9, 0xca, 0xa1, 0x8c, 0x00, 0xf5, 0x99, 0xc5,
+ 0x1e, 0x64, 0x00, 0xf5, 0x89, 0xc5, 0x34, 0x21, 0x00, 0xf5, 0x78, 0x45,
+ 0x00, 0x39, 0x43, 0xa9, 0xb1, 0x42, 0x00, 0xd0, 0xc3, 0xa9, 0xcf, 0xca,
+ 0x1e, 0x5f, 0x00, 0x10, 0x08, 0xcb, 0x9c, 0x9c, 0x00, 0x0e, 0xf8, 0xcd,
+ 0x64, 0xa3, 0x00, 0xf4, 0xd1, 0x43, 0x01, 0x5f, 0x43, 0xa9, 0xde, 0xca,
+ 0x25, 0x41, 0x05, 0x5a, 0xc9, 0xd2, 0x4a, 0x6a, 0x05, 0x5a, 0xc0, 0xc9,
+ 0x0d, 0xd7, 0x00, 0xf7, 0x81, 0xc5, 0x1f, 0x01, 0x00, 0xf7, 0x71, 0xca,
+ 0xa1, 0x8c, 0x00, 0xf7, 0x61, 0xc5, 0x1e, 0x64, 0x00, 0xf7, 0x51, 0xc5,
+ 0x34, 0x21, 0x00, 0xf7, 0x40, 0xc5, 0x34, 0x21, 0x00, 0x0b, 0x81, 0xc5,
+ 0x1e, 0x64, 0x00, 0x10, 0xa0, 0xc5, 0x00, 0x34, 0x00, 0xf3, 0x91, 0x44,
+ 0x00, 0x3a, 0x43, 0xa9, 0xed, 0xcb, 0x9c, 0x9c, 0x00, 0x11, 0x80, 0xc9,
+ 0x0d, 0xd7, 0x00, 0xf6, 0x31, 0xc5, 0x1f, 0x01, 0x00, 0xf6, 0x21, 0xca,
+ 0xa1, 0x8c, 0x00, 0xf6, 0x11, 0xc5, 0x1e, 0x64, 0x00, 0xf6, 0x01, 0xc5,
+ 0x34, 0x21, 0x00, 0xf5, 0xf0, 0xcb, 0x9c, 0x9c, 0x00, 0x0f, 0x00, 0xcb,
+ 0x9c, 0x9c, 0x00, 0xf2, 0xe0, 0x9b, 0x01, 0x9f, 0x99, 0xc5, 0xe2, 0x0b,
+ 0x01, 0x9f, 0xa1, 0x43, 0x47, 0x79, 0xc3, 0xaa, 0x05, 0x43, 0x0d, 0x8c,
+ 0xc3, 0xaa, 0x11, 0x43, 0x8c, 0x97, 0xc3, 0xaa, 0x1d, 0xc7, 0x25, 0x2b,
+ 0x01, 0x9f, 0xd9, 0xc6, 0xd1, 0xfc, 0x01, 0x9f, 0xe0, 0xc2, 0x0a, 0x20,
+ 0x01, 0x9b, 0x71, 0xc4, 0x05, 0xde, 0x01, 0x9b, 0x78, 0xd3, 0x40, 0xdb,
+ 0x0f, 0xd1, 0x81, 0xcf, 0x15, 0xa6, 0x0f, 0xd1, 0xb8, 0xcc, 0x26, 0x18,
+ 0x0f, 0x03, 0x68, 0xce, 0x75, 0x9c, 0x0f, 0x03, 0x60, 0xcf, 0x6b, 0x6d,
+ 0x0f, 0x03, 0x58, 0xc4, 0x04, 0x63, 0x00, 0x15, 0xa9, 0xc8, 0x70, 0x62,
+ 0x08, 0x3d, 0x32, 0x03, 0xaa, 0x29, 0xc3, 0xec, 0xd2, 0x0e, 0x8e, 0x71,
+ 0xc3, 0xec, 0xcf, 0x0e, 0x8e, 0x69, 0xc3, 0x76, 0xd3, 0x0e, 0x8e, 0x60,
+ 0x46, 0x01, 0x17, 0xc3, 0xaa, 0x2f, 0xc5, 0x04, 0x91, 0x0e, 0x8a, 0x49,
+ 0xc5, 0x01, 0x31, 0x0e, 0x8a, 0x40, 0xc5, 0x04, 0x91, 0x0e, 0x8a, 0x79,
+ 0xc5, 0x01, 0x31, 0x0e, 0x8a, 0x70, 0xc5, 0x04, 0x91, 0x0e, 0x8a, 0x69,
+ 0xc5, 0x01, 0x31, 0x0e, 0x8a, 0x60, 0xc5, 0x04, 0x91, 0x0e, 0x8a, 0x59,
+ 0xc5, 0x01, 0x31, 0x0e, 0x8a, 0x50, 0xcd, 0x81, 0xa7, 0x0e, 0x8d, 0x69,
+ 0xc4, 0xe5, 0xcf, 0x0e, 0x8c, 0x41, 0x16, 0xc3, 0xaa, 0x3b, 0xd0, 0x5d,
+ 0x3f, 0x0e, 0x8b, 0x30, 0xc6, 0xd1, 0x66, 0x0e, 0x8d, 0x51, 0xcb, 0x92,
+ 0x41, 0x0e, 0x8c, 0x51, 0xc2, 0x01, 0x33, 0x0e, 0x8c, 0x28, 0x14, 0xc3,
+ 0xaa, 0x47, 0xc5, 0xe3, 0xe1, 0x0e, 0x8b, 0xe8, 0xc2, 0x03, 0x1e, 0x0e,
+ 0x8c, 0x39, 0x03, 0x43, 0xaa, 0x53, 0x44, 0x07, 0xa2, 0xc3, 0xaa, 0x71,
+ 0xcf, 0x64, 0xbf, 0x0e, 0x8b, 0x68, 0xca, 0xa2, 0xea, 0x0e, 0x8c, 0x01,
+ 0x91, 0x0e, 0x8b, 0xf8, 0xc5, 0x5c, 0x5a, 0x0e, 0x8e, 0x18, 0xcd, 0x45,
+ 0xed, 0x00, 0xff, 0xe1, 0xc4, 0x7d, 0xa4, 0x00, 0xfb, 0x42, 0x03, 0xaa,
+ 0x7b, 0x45, 0x00, 0x39, 0x43, 0xaa, 0x81, 0x45, 0x00, 0x39, 0x43, 0xaa,
+ 0x97, 0x45, 0x00, 0x39, 0x43, 0xaa, 0xa3, 0x45, 0x00, 0x39, 0x43, 0xaa,
+ 0xaf, 0x45, 0x00, 0x39, 0x43, 0xaa, 0xc1, 0xcb, 0x90, 0xcb, 0x00, 0xf9,
+ 0xf1, 0xc4, 0xea, 0x23, 0x00, 0xf9, 0xe1, 0xc5, 0x27, 0x58, 0x00, 0xf9,
+ 0xd0, 0xcd, 0x45, 0xed, 0x00, 0xfe, 0x61, 0xc4, 0x7d, 0xa4, 0x00, 0xf9,
+ 0x42, 0x03, 0xaa, 0xd3, 0x45, 0x00, 0x39, 0x43, 0xaa, 0xd9, 0x45, 0x00,
+ 0x39, 0x43, 0xaa, 0xef, 0x45, 0x00, 0x39, 0x43, 0xaa, 0xfb, 0xcd, 0x45,
+ 0xed, 0x00, 0xfd, 0xe1, 0xc4, 0x7d, 0xa4, 0x00, 0xf8, 0x42, 0x03, 0xab,
+ 0x07, 0xc4, 0x27, 0x59, 0x00, 0xfd, 0xd1, 0xc5, 0xd9, 0x8c, 0x00, 0xfd,
+ 0xc0, 0x45, 0x00, 0x39, 0x43, 0xab, 0x0d, 0xca, 0x90, 0xcc, 0x00, 0xff,
+ 0xb3, 0x03, 0xab, 0x23, 0xc4, 0x7d, 0xa4, 0x00, 0xfb, 0x02, 0x03, 0xab,
+ 0x29, 0xd2, 0x49, 0x14, 0x00, 0xff, 0xa0, 0xd2, 0x49, 0x14, 0x00, 0xff,
+ 0x90, 0x45, 0x00, 0x39, 0x43, 0xab, 0x2f, 0x45, 0x00, 0x39, 0x43, 0xab,
+ 0x50, 0x45, 0x00, 0x39, 0x43, 0xab, 0x5c, 0x45, 0x00, 0x39, 0x43, 0xab,
+ 0x68, 0x45, 0x00, 0x39, 0x43, 0xab, 0x80, 0x45, 0x00, 0x39, 0x43, 0xab,
+ 0x92, 0x45, 0x00, 0x39, 0x43, 0xab, 0xa4, 0x45, 0x00, 0x39, 0x43, 0xab,
+ 0xbc, 0x45, 0x00, 0x39, 0x43, 0xab, 0xce, 0xca, 0x90, 0xcc, 0x00, 0xfe,
+ 0x33, 0x03, 0xab, 0xe0, 0xc4, 0x7d, 0xa4, 0x00, 0xf9, 0x02, 0x03, 0xab,
+ 0xe6, 0xd2, 0x49, 0x14, 0x00, 0xfe, 0x20, 0xd2, 0x49, 0x14, 0x00, 0xfe,
+ 0x10, 0x45, 0x00, 0x39, 0x43, 0xab, 0xec, 0x45, 0x00, 0x39, 0x43, 0xac,
+ 0x0d, 0x45, 0x00, 0x39, 0x43, 0xac, 0x19, 0xca, 0x90, 0xcc, 0x00, 0xfd,
+ 0xb3, 0x03, 0xac, 0x25, 0xc4, 0x7d, 0xa4, 0x00, 0xf8, 0x02, 0x03, 0xac,
+ 0x2b, 0xd2, 0x49, 0x14, 0x00, 0xfd, 0xa0, 0xc4, 0x27, 0x59, 0x00, 0xfb,
+ 0x83, 0x03, 0xac, 0x31, 0xc5, 0xd9, 0x8c, 0x00, 0xfd, 0x80, 0x45, 0x00,
+ 0x39, 0x43, 0xac, 0x37, 0x00, 0x43, 0xac, 0x58, 0xc7, 0x36, 0xd6, 0x08,
+ 0x0a, 0x33, 0x03, 0xac, 0x64, 0xc6, 0xb9, 0x95, 0x08, 0x0a, 0x40, 0xc7,
+ 0x36, 0xd6, 0x08, 0x0a, 0x3b, 0x03, 0xac, 0x6a, 0xc6, 0xb9, 0x95, 0x08,
+ 0x0a, 0x50, 0xca, 0x9e, 0x30, 0x0e, 0x7d, 0xe3, 0x03, 0xac, 0x70, 0xc9,
+ 0x92, 0xb1, 0x0e, 0x7d, 0xd2, 0x03, 0xac, 0x76, 0xd6, 0x2c, 0x3d, 0x0e,
+ 0x7d, 0xb8, 0xc9, 0x43, 0x91, 0x09, 0x10, 0x38, 0xca, 0xa4, 0xfc, 0x09,
+ 0x0f, 0x00, 0xc4, 0x5b, 0x6b, 0x09, 0x0e, 0xf1, 0xca, 0xa5, 0x42, 0x09,
+ 0x0e, 0xe8, 0xcf, 0x65, 0xa0, 0x09, 0x0e, 0x98, 0xc2, 0x12, 0x5d, 0x09,
+ 0x0e, 0x71, 0xc2, 0x01, 0x0e, 0x09, 0x0e, 0x68, 0xc2, 0x03, 0x5f, 0x09,
+ 0x25, 0xe9, 0xc2, 0x01, 0x08, 0x09, 0x25, 0xe0, 0xd4, 0x3b, 0x36, 0x0e,
+ 0xc8, 0x11, 0xcb, 0x93, 0x6a, 0x0e, 0xc7, 0xf8, 0xcc, 0x15, 0xeb, 0x0e,
+ 0xc8, 0x09, 0x16, 0xc3, 0xac, 0x7c, 0xc9, 0xb4, 0x38, 0x0e, 0xc4, 0x99,
+ 0xca, 0xa1, 0x28, 0x0e, 0xc0, 0x40, 0xcb, 0x14, 0xc5, 0x0e, 0xc7, 0xe9,
+ 0xcb, 0x14, 0xbe, 0x0e, 0xc7, 0xe1, 0xcc, 0x8e, 0x38, 0x0e, 0xc7, 0xda,
+ 0x03, 0xac, 0x88, 0xc4, 0x16, 0x02, 0x0e, 0xc7, 0xc9, 0xc9, 0x14, 0xd1,
+ 0x0e, 0xc7, 0xc1, 0xc8, 0x20, 0x6a, 0x0e, 0xc7, 0xb8, 0x05, 0xc3, 0xac,
+ 0x8e, 0xc4, 0x04, 0x63, 0x0e, 0xc7, 0x33, 0x03, 0xac, 0x9b, 0x4e, 0x6e,
+ 0x8e, 0xc3, 0xac, 0xa1, 0xc4, 0x05, 0xa6, 0x0e, 0xc6, 0xe3, 0x03, 0xac,
+ 0xad, 0x47, 0xc4, 0xb9, 0x43, 0xac, 0xb1, 0xca, 0x14, 0xc6, 0x0e, 0xc5,
+ 0xd1, 0xcd, 0x3b, 0x3c, 0x0e, 0xc0, 0x48, 0x00, 0x43, 0xac, 0xbd, 0x00,
+ 0x43, 0xac, 0xf2, 0x47, 0x0e, 0xcc, 0x43, 0xad, 0x01, 0xcc, 0x88, 0x5c,
+ 0x0e, 0xc0, 0xe8, 0xc8, 0x65, 0xb6, 0x0e, 0xc2, 0x11, 0x4a, 0xa2, 0xa4,
+ 0x43, 0xad, 0x0d, 0x4d, 0x7e, 0xf6, 0xc3, 0xad, 0x19, 0xce, 0x6d, 0x4c,
+ 0x0e, 0xc1, 0xb0, 0xcf, 0x3b, 0x3b, 0x0e, 0xc5, 0xb1, 0xc9, 0x14, 0xd1,
+ 0x0e, 0xc5, 0xa8, 0xce, 0x6f, 0xfa, 0x0e, 0xc4, 0x89, 0x47, 0xc6, 0xb1,
+ 0x43, 0xad, 0x25, 0xc5, 0x16, 0x01, 0x0e, 0xc3, 0x20, 0x00, 0x43, 0xad,
+ 0x31, 0xc6, 0x5a, 0x59, 0x0e, 0xc2, 0xbb, 0x03, 0xad, 0x3d, 0xcd, 0x29,
+ 0xb4, 0x0e, 0xc2, 0x91, 0xc4, 0x16, 0x02, 0x0e, 0xc2, 0x81, 0xc9, 0xb5,
+ 0xa0, 0x0e, 0xc2, 0x70, 0xc9, 0x14, 0xd1, 0x0e, 0xc2, 0x3b, 0x03, 0xad,
+ 0x41, 0xc6, 0x5a, 0x59, 0x0e, 0xc2, 0x31, 0xc4, 0x16, 0x02, 0x0e, 0xc2,
+ 0x28, 0xc2, 0x01, 0x5b, 0x0e, 0xc7, 0x99, 0xc3, 0x01, 0xc3, 0x0e, 0xc7,
+ 0x90, 0x00, 0x43, 0xad, 0x47, 0xc6, 0x14, 0xca, 0x0e, 0xc5, 0x31, 0xc4,
+ 0x01, 0xbd, 0x0e, 0xc4, 0x42, 0x03, 0xad, 0x57, 0xc6, 0x0e, 0xcc, 0x0e,
+ 0xc4, 0xe8, 0xc4, 0x05, 0xa6, 0x0e, 0xc3, 0xf9, 0xc7, 0x29, 0xba, 0x0e,
+ 0xc3, 0xe0, 0xc2, 0x01, 0x5b, 0x0e, 0xc6, 0xc9, 0xc3, 0x01, 0xc3, 0x0e,
+ 0xc6, 0xc0, 0xc5, 0x0e, 0xcd, 0x0e, 0xc7, 0x63, 0x03, 0xad, 0x5d, 0xcb,
+ 0x14, 0xc5, 0x0e, 0xc6, 0x00, 0x46, 0x0e, 0xcd, 0xc3, 0xad, 0x63, 0xc8,
+ 0xc0, 0x0b, 0x0e, 0xc3, 0x80, 0x00, 0x43, 0xad, 0x6f, 0xc2, 0x00, 0x15,
+ 0x0e, 0xcc, 0x78, 0xcb, 0x5b, 0xa4, 0x0f, 0xc1, 0x79, 0xca, 0xa0, 0xce,
+ 0x0f, 0xc1, 0x59, 0x49, 0xb3, 0x57, 0xc3, 0xad, 0x87, 0xd8, 0x26, 0x3c,
+ 0x01, 0x5b, 0xe9, 0xcc, 0x86, 0x70, 0x0f, 0xc1, 0x19, 0xcc, 0x85, 0x98,
+ 0x0f, 0xc1, 0x39, 0xd0, 0x5b, 0x9f, 0x0f, 0xc1, 0x98, 0xe0, 0x0a, 0xa7,
+ 0x01, 0x5c, 0x18, 0xcf, 0x2e, 0xd8, 0x01, 0x5b, 0xe1, 0xd1, 0x03, 0x76,
+ 0x01, 0x5b, 0xe0, 0xc7, 0x02, 0x6a, 0x01, 0x5d, 0x29, 0xc9, 0x00, 0x68,
+ 0x01, 0x5d, 0x38, 0xcf, 0x2e, 0xd8, 0x01, 0x48, 0xb9, 0xd6, 0x2e, 0xd1,
+ 0x01, 0x48, 0xc0, 0xc8, 0x67, 0x78, 0x01, 0x4b, 0x61, 0xdd, 0x10, 0x85,
+ 0x01, 0x4b, 0x40, 0xe0, 0x04, 0xc7, 0x01, 0x4b, 0x20, 0xcc, 0x10, 0x79,
+ 0x07, 0xeb, 0x41, 0xca, 0x2a, 0xb4, 0x07, 0xeb, 0x38, 0xca, 0x2a, 0xb4,
+ 0x07, 0xe3, 0x41, 0xcd, 0x05, 0x3a, 0x07, 0xe0, 0x18, 0xca, 0xa1, 0xaa,
+ 0x00, 0x3b, 0xb1, 0xc8, 0xb8, 0xa3, 0x00, 0x3b, 0xa8, 0xd5, 0x0d, 0xe0,
+ 0x00, 0x45, 0x20, 0xc5, 0x00, 0x34, 0x00, 0x35, 0x29, 0xd6, 0x2e, 0xe7,
+ 0x00, 0x3b, 0x08, 0x45, 0x00, 0xcd, 0xc3, 0xad, 0x93, 0x14, 0xc3, 0xad,
+ 0x9f, 0xd2, 0x4c, 0x74, 0x00, 0x43, 0xab, 0x03, 0xad, 0xab, 0xcf, 0x6a,
+ 0xc8, 0x00, 0x43, 0x8b, 0x03, 0xad, 0xb1, 0xc5, 0x4c, 0x81, 0x00, 0x43,
+ 0xa1, 0xc5, 0x6a, 0xd2, 0x00, 0x43, 0x80, 0x45, 0x00, 0x39, 0x43, 0xad,
+ 0xb7, 0xc5, 0x00, 0x34, 0x00, 0x33, 0x99, 0xc5, 0x03, 0x50, 0x00, 0x33,
+ 0x90, 0xc5, 0x00, 0x34, 0x00, 0x31, 0x2b, 0x03, 0xad, 0xc3, 0xc5, 0x03,
+ 0x50, 0x00, 0x31, 0x1a, 0x03, 0xad, 0xc7, 0x00, 0x43, 0xad, 0xcb, 0xc8,
+ 0xb8, 0xa3, 0x00, 0x3b, 0x99, 0xca, 0xa1, 0xaa, 0x00, 0x3b, 0xa0, 0xca,
+ 0x2a, 0xb4, 0x07, 0xda, 0x89, 0xcd, 0x05, 0x3a, 0x07, 0xda, 0x80, 0xd0,
+ 0x00, 0x29, 0x00, 0x44, 0x69, 0xc5, 0x03, 0x50, 0x00, 0x31, 0xd8, 0xc5,
+ 0x00, 0x34, 0x00, 0x31, 0xe1, 0xc5, 0x03, 0x50, 0x00, 0x3b, 0x19, 0xd6,
+ 0x2e, 0xe7, 0x00, 0x3b, 0x20, 0xc5, 0x00, 0x34, 0x00, 0x45, 0xa1, 0xc5,
+ 0x03, 0x50, 0x00, 0x35, 0x60, 0xcf, 0x63, 0x84, 0x00, 0x35, 0x71, 0xcd,
+ 0x0b, 0x67, 0x00, 0x3b, 0xf8, 0xc4, 0xe6, 0x67, 0x00, 0x36, 0x19, 0xcd,
+ 0x05, 0x3a, 0x07, 0xf4, 0x99, 0xca, 0x2a, 0xb4, 0x07, 0xf4, 0xa0, 0xc5,
+ 0x00, 0x34, 0x00, 0x44, 0x61, 0xc5, 0x03, 0x50, 0x00, 0x34, 0xf8, 0xd0,
+ 0x5c, 0x5f, 0x00, 0x45, 0xd1, 0xc9, 0x18, 0x19, 0x00, 0x45, 0x49, 0xcb,
+ 0x08, 0x89, 0x00, 0x45, 0x40, 0x0b, 0xc3, 0xad, 0xd7, 0xca, 0x2a, 0xb4,
+ 0x07, 0xf4, 0x51, 0xcb, 0x6a, 0x72, 0x07, 0xf4, 0x60, 0xcb, 0x08, 0x89,
+ 0x00, 0x36, 0x9b, 0x03, 0xad, 0xe3, 0x5d, 0x10, 0xdc, 0x43, 0xad, 0xe7,
+ 0xca, 0x5c, 0x65, 0x00, 0x45, 0xc9, 0x98, 0x00, 0x34, 0x93, 0x03, 0xad,
+ 0xf3, 0xde, 0x0a, 0x09, 0x00, 0x3b, 0x88, 0xc6, 0x00, 0x33, 0x00, 0x45,
+ 0x00, 0xd6, 0x2e, 0xe7, 0x00, 0x3a, 0x93, 0x03, 0xad, 0xf9, 0xd2, 0x48,
+ 0xba, 0x00, 0x3a, 0x80, 0xd5, 0x0d, 0xe0, 0x00, 0x34, 0xe0, 0x4a, 0x0d,
+ 0xe6, 0xc3, 0xad, 0xff, 0x46, 0x0a, 0x20, 0x43, 0xae, 0x0b, 0x98, 0x00,
+ 0x37, 0x71, 0xcd, 0x31, 0xf2, 0x00, 0x3a, 0xd0, 0xce, 0x00, 0x39, 0x00,
+ 0x34, 0x58, 0x4a, 0x0d, 0xe6, 0xc3, 0xae, 0x11, 0x48, 0x0b, 0x67, 0x43,
+ 0xae, 0x1d, 0xe0, 0x09, 0x87, 0x00, 0x3b, 0xe0, 0xc5, 0x00, 0x34, 0x00,
+ 0x3b, 0x71, 0x03, 0x43, 0xae, 0x29, 0xcb, 0x10, 0x7a, 0x07, 0xdd, 0x61,
+ 0xcc, 0x05, 0x3b, 0x07, 0xdd, 0x50, 0xcb, 0x10, 0x7a, 0x07, 0xdd, 0x41,
+ 0xcc, 0x05, 0x3b, 0x07, 0xdd, 0x30, 0xca, 0x2a, 0xb4, 0x07, 0xdd, 0x29,
+ 0xcd, 0x05, 0x3a, 0x07, 0xdd, 0x20, 0xd0, 0x12, 0xd2, 0x0f, 0xdd, 0x58,
+ 0xcf, 0x01, 0x88, 0x0f, 0xdd, 0x50, 0xd3, 0x40, 0xdb, 0x0f, 0xd1, 0x89,
+ 0xcf, 0x15, 0xa6, 0x0f, 0xd1, 0xe8, 0xa5, 0x01, 0x47, 0xf8, 0xc7, 0x02,
+ 0x6a, 0x01, 0x5d, 0x21, 0xc9, 0x00, 0x68, 0x01, 0x5d, 0x30, 0xcf, 0x2e,
+ 0xd8, 0x01, 0x5b, 0xd1, 0xd1, 0x03, 0x76, 0x01, 0x5b, 0xd0, 0xcf, 0x2e,
+ 0xd8, 0x01, 0x59, 0xe1, 0xd6, 0x2e, 0xd1, 0x01, 0x59, 0xe8, 0xc8, 0x67,
+ 0x78, 0x01, 0x4b, 0x51, 0xdf, 0x0a, 0xc8, 0x01, 0x4b, 0x10, 0xd3, 0x45,
+ 0x4f, 0x0e, 0xf8, 0x40, 0xd1, 0x03, 0x76, 0x05, 0x5a, 0x11, 0xc6, 0x03,
+ 0x81, 0x05, 0x5a, 0x08, 0xcc, 0x57, 0xac, 0x0e, 0xf8, 0xb1, 0xcc, 0x1e,
+ 0xfa, 0x00, 0xeb, 0x98, 0xc5, 0x00, 0x34, 0x00, 0xf2, 0xdb, 0x03, 0xae,
+ 0x35, 0xc5, 0x03, 0x50, 0x00, 0xf2, 0xc8, 0xcb, 0x9c, 0x9c, 0x00, 0x11,
+ 0x88, 0xc9, 0x0d, 0xd7, 0x00, 0xf6, 0x39, 0xc5, 0x1f, 0x01, 0x00, 0xf6,
+ 0x29, 0xca, 0xa1, 0x8c, 0x00, 0xf6, 0x19, 0xc5, 0x1e, 0x64, 0x00, 0xf6,
+ 0x09, 0xc5, 0x34, 0x21, 0x00, 0xf5, 0xf8, 0xc9, 0x0d, 0xd7, 0x00, 0xf7,
+ 0x89, 0xc5, 0x1f, 0x01, 0x00, 0xf7, 0x79, 0xca, 0xa1, 0x8c, 0x00, 0xf7,
+ 0x69, 0xc5, 0x1e, 0x64, 0x00, 0xf7, 0x59, 0xc5, 0x34, 0x21, 0x00, 0xf7,
+ 0x48, 0xc5, 0x34, 0x21, 0x00, 0x0b, 0x89, 0xc5, 0x1e, 0x64, 0x00, 0x10,
+ 0xa8, 0xc5, 0x00, 0x34, 0x00, 0xf3, 0x99, 0x44, 0x00, 0x3a, 0x43, 0xae,
+ 0x3b, 0xc9, 0x0d, 0xd7, 0x00, 0xf5, 0x69, 0xc5, 0x1f, 0x01, 0x00, 0xf5,
+ 0x59, 0xca, 0xa1, 0x8c, 0x00, 0xf5, 0x49, 0xc5, 0x1e, 0x64, 0x00, 0xf5,
+ 0x39, 0xc5, 0x34, 0x21, 0x00, 0xf5, 0x28, 0xc5, 0x00, 0x34, 0x00, 0xf5,
+ 0x09, 0xc5, 0x03, 0x50, 0x00, 0x11, 0x3a, 0x03, 0xae, 0x53, 0xc5, 0x00,
+ 0x34, 0x00, 0xf0, 0x09, 0xc5, 0x03, 0x50, 0x00, 0x07, 0x2a, 0x03, 0xae,
+ 0x59, 0xc6, 0x61, 0xbc, 0x00, 0x0e, 0xa9, 0xc5, 0x34, 0x21, 0x00, 0x0e,
+ 0xb9, 0xc5, 0x98, 0xb9, 0x00, 0x0e, 0xc9, 0xc5, 0x1e, 0x64, 0x00, 0x0e,
+ 0xd8, 0xc2, 0x0a, 0x20, 0x01, 0x9f, 0xa9, 0xc4, 0x05, 0xde, 0x01, 0x9f,
+ 0xb0, 0xc3, 0x08, 0xde, 0x01, 0x9f, 0xb9, 0xc3, 0x0d, 0x8f, 0x01, 0x9f,
+ 0xc0, 0xc2, 0x22, 0x45, 0x01, 0x9f, 0xc9, 0xc4, 0x15, 0xa7, 0x01, 0x9f,
+ 0xd0, 0xd3, 0x45, 0x4f, 0x08, 0x3d, 0x38, 0xc5, 0x04, 0x91, 0x0e, 0x8a,
+ 0x89, 0xc5, 0x01, 0x31, 0x0e, 0x8a, 0x80, 0x45, 0xa1, 0x0e, 0xc3, 0xae,
+ 0x5f, 0xc2, 0x00, 0xa8, 0x0e, 0x8b, 0x28, 0xcb, 0x93, 0x8b, 0x0e, 0x8c,
+ 0x59, 0x44, 0x84, 0xb0, 0x43, 0xae, 0x69, 0xc3, 0xed, 0x50, 0x0e, 0x8b,
+ 0x91, 0xc3, 0xed, 0x4d, 0x0e, 0x8b, 0x89, 0xc3, 0xed, 0x4a, 0x0e, 0x8b,
+ 0x81, 0xc3, 0xed, 0x47, 0x0e, 0x8b, 0x79, 0xc3, 0xed, 0x44, 0x0e, 0x8b,
+ 0x70, 0xca, 0xa1, 0x0a, 0x0e, 0x8c, 0x09, 0x96, 0x0e, 0x8b, 0xd8, 0x45,
+ 0x00, 0x39, 0x43, 0xae, 0x7b, 0x12, 0xc3, 0xae, 0x91, 0xc4, 0xea, 0x23,
+ 0x00, 0xfb, 0x6b, 0x03, 0xae, 0xa0, 0xc5, 0x27, 0x58, 0x00, 0xfb, 0x5a,
+ 0x03, 0xae, 0xa6, 0xc4, 0xea, 0x23, 0x00, 0xfa, 0x69, 0xc5, 0x27, 0x58,
+ 0x00, 0xfa, 0x58, 0xc4, 0xea, 0x23, 0x00, 0xfa, 0x61, 0xc5, 0x27, 0x58,
+ 0x00, 0xfa, 0x50, 0xcb, 0x90, 0xcb, 0x00, 0xfa, 0xf9, 0xc4, 0xea, 0x23,
+ 0x00, 0xfa, 0xe9, 0xc5, 0x27, 0x58, 0x00, 0xfa, 0xd8, 0xcb, 0x90, 0xcb,
+ 0x00, 0xf9, 0xf9, 0xc4, 0xea, 0x23, 0x00, 0xf9, 0xe9, 0xc5, 0x27, 0x58,
+ 0x00, 0xf9, 0xd8, 0x45, 0x00, 0x39, 0x43, 0xae, 0xac, 0x12, 0xc3, 0xae,
+ 0xc2, 0xc4, 0xea, 0x23, 0x00, 0xf9, 0x6b, 0x03, 0xae, 0xd1, 0xc5, 0x27,
+ 0x58, 0x00, 0xf9, 0x5a, 0x03, 0xae, 0xd7, 0xc4, 0xea, 0x23, 0x00, 0xf8,
+ 0xe9, 0xc5, 0x27, 0x58, 0x00, 0xf8, 0xd8, 0xc4, 0xea, 0x23, 0x00, 0xf8,
+ 0xe1, 0xc5, 0x27, 0x58, 0x00, 0xf8, 0xd0, 0x45, 0x00, 0x39, 0x43, 0xae,
+ 0xdd, 0x12, 0xc3, 0xae, 0xf3, 0xc4, 0xea, 0x23, 0x00, 0xf8, 0x6b, 0x03,
+ 0xaf, 0x02, 0xc5, 0x27, 0x58, 0x00, 0xf8, 0x5a, 0x03, 0xaf, 0x08, 0xd2,
+ 0x49, 0x14, 0x00, 0xff, 0xb8, 0x45, 0x00, 0x39, 0x43, 0xaf, 0x0e, 0xcb,
+ 0x90, 0xcb, 0x00, 0xfb, 0x3b, 0x03, 0xaf, 0x2f, 0xc4, 0xea, 0x23, 0x00,
+ 0xfb, 0x2b, 0x03, 0xaf, 0x35, 0xc5, 0x27, 0x58, 0x00, 0xfb, 0x1b, 0x03,
+ 0xaf, 0x3b, 0xcd, 0x49, 0x19, 0x00, 0xfd, 0x08, 0xc4, 0xea, 0x23, 0x00,
+ 0xfa, 0x29, 0xc5, 0x27, 0x58, 0x00, 0xfa, 0x18, 0xc4, 0xea, 0x23, 0x00,
+ 0xfa, 0x21, 0xc5, 0x27, 0x58, 0x00, 0xfa, 0x10, 0xcb, 0x90, 0xcb, 0x00,
+ 0xff, 0x39, 0xc4, 0xea, 0x23, 0x00, 0xff, 0x19, 0xc5, 0x27, 0x58, 0x00,
+ 0xff, 0x11, 0xc5, 0x68, 0x98, 0x00, 0x1d, 0x80, 0xcb, 0x90, 0xcb, 0x00,
+ 0xfa, 0xb9, 0xc4, 0xea, 0x23, 0x00, 0xfa, 0xa9, 0xc5, 0x27, 0x58, 0x00,
+ 0xfa, 0x98, 0xcb, 0x90, 0xcb, 0x00, 0xfa, 0xb1, 0xc4, 0xea, 0x23, 0x00,
+ 0xfa, 0xa1, 0xc5, 0x27, 0x58, 0x00, 0xfa, 0x90, 0xcb, 0x90, 0xcb, 0x00,
+ 0xfe, 0xb9, 0xc4, 0xea, 0x23, 0x00, 0xfe, 0x99, 0xc5, 0x27, 0x58, 0x00,
+ 0xfe, 0x91, 0xc5, 0x68, 0x98, 0x00, 0x1c, 0x80, 0xcb, 0x90, 0xcb, 0x00,
+ 0xf9, 0xb9, 0xc4, 0xea, 0x23, 0x00, 0xf9, 0xa9, 0xc5, 0x27, 0x58, 0x00,
+ 0xf9, 0x98, 0xcb, 0x90, 0xcb, 0x00, 0xf9, 0xb1, 0xc4, 0xea, 0x23, 0x00,
+ 0xf9, 0xa1, 0xc5, 0x27, 0x58, 0x00, 0xf9, 0x90, 0xd2, 0x49, 0x14, 0x00,
+ 0xfe, 0x38, 0x45, 0x00, 0x39, 0x43, 0xaf, 0x41, 0xcb, 0x90, 0xcb, 0x00,
+ 0xf9, 0x3b, 0x03, 0xaf, 0x62, 0xc4, 0xea, 0x23, 0x00, 0xf9, 0x2b, 0x03,
+ 0xaf, 0x68, 0xc5, 0x27, 0x58, 0x00, 0xf9, 0x1b, 0x03, 0xaf, 0x6e, 0xcd,
+ 0x49, 0x19, 0x00, 0xfc, 0x88, 0xc4, 0xea, 0x23, 0x00, 0xf8, 0xa9, 0xc5,
+ 0x27, 0x58, 0x00, 0xf8, 0x98, 0xc4, 0xea, 0x23, 0x00, 0xf8, 0xa1, 0xc5,
+ 0x27, 0x58, 0x00, 0xf8, 0x90, 0xd2, 0x49, 0x14, 0x00, 0xfd, 0xb8, 0x45,
+ 0x00, 0x39, 0x43, 0xaf, 0x74, 0xd2, 0x49, 0x14, 0x00, 0xfd, 0x90, 0xcb,
+ 0x90, 0xcb, 0x00, 0xf8, 0x3b, 0x03, 0xaf, 0x95, 0xc4, 0xea, 0x23, 0x00,
+ 0xf8, 0x2b, 0x03, 0xaf, 0x9b, 0xc5, 0x27, 0x58, 0x00, 0xf8, 0x1b, 0x03,
+ 0xaf, 0xa1, 0xcd, 0x49, 0x19, 0x00, 0xfc, 0x08, 0xc7, 0xb9, 0x94, 0x08,
+ 0x0a, 0x61, 0xc7, 0x63, 0x9b, 0x08, 0x0a, 0x98, 0xc8, 0xb9, 0x93, 0x08,
+ 0x0a, 0x70, 0xc8, 0x63, 0x9a, 0x08, 0x0a, 0xb0, 0xca, 0xa4, 0xf2, 0x0e,
+ 0x7d, 0xe8, 0x46, 0x01, 0xab, 0x43, 0xaf, 0xa7, 0xcc, 0x8e, 0x14, 0x0e,
+ 0xc8, 0x01, 0xca, 0x93, 0x6b, 0x0e, 0xc7, 0xf0, 0xc9, 0x64, 0x89, 0x0e,
+ 0xc1, 0x60, 0xc5, 0x03, 0xfb, 0x0e, 0xc7, 0x5b, 0x03, 0xaf, 0xb3, 0x17,
+ 0x43, 0xaf, 0xb9, 0x4a, 0x6e, 0x74, 0x43, 0xaf, 0xc3, 0xc4, 0x16, 0x02,
+ 0x0e, 0xc7, 0x29, 0xc8, 0x41, 0xac, 0x0e, 0xc7, 0x20, 0x00, 0x43, 0xaf,
+ 0xcf, 0xcc, 0x8e, 0x20, 0x0e, 0xc1, 0xd9, 0xcd, 0x81, 0x25, 0x0e, 0xc1,
+ 0xd0, 0x05, 0xc3, 0xaf, 0xe1, 0xc6, 0x14, 0xca, 0x0e, 0xc5, 0x21, 0x14,
+ 0xc3, 0xaf, 0xf0, 0xc5, 0x0e, 0xcd, 0x0e, 0xc0, 0xf3, 0x03, 0xaf, 0xff,
+ 0xd7, 0x29, 0xaa, 0x0e, 0xc1, 0x39, 0xc6, 0x5a, 0x59, 0x0e, 0xc0, 0x93,
+ 0x03, 0xb0, 0x03, 0xc4, 0x16, 0x02, 0x0e, 0xc0, 0x83, 0x03, 0xb0, 0x09,
+ 0xd3, 0x41, 0xac, 0x0e, 0xc1, 0x00, 0xc9, 0x6e, 0x77, 0x0e, 0xc0, 0xa3,
+ 0x03, 0xb0, 0x0f, 0xc3, 0x00, 0x36, 0x0e, 0xc0, 0x60, 0xc9, 0x14, 0xd1,
+ 0x0e, 0xc1, 0x29, 0xc4, 0x05, 0xa6, 0x0e, 0xc1, 0x20, 0xc7, 0x1b, 0x33,
+ 0x0e, 0xc2, 0x09, 0xc2, 0x02, 0x6a, 0x0e, 0xc2, 0x00, 0xc6, 0x5a, 0x59,
+ 0x0e, 0xc1, 0xc9, 0xc2, 0x02, 0x6a, 0x0e, 0xc1, 0xc0, 0xc6, 0x3a, 0x1e,
+ 0x0e, 0xc4, 0x81, 0xc8, 0x41, 0xac, 0x0e, 0xc4, 0x78, 0xc4, 0x16, 0x02,
+ 0x0e, 0xc2, 0x89, 0xc9, 0xb5, 0xa0, 0x0e, 0xc2, 0x78, 0x00, 0x43, 0xb0,
+ 0x15, 0xc6, 0xd8, 0x3e, 0x0e, 0xc2, 0x40, 0x15, 0xc3, 0xb0, 0x21, 0xc5,
+ 0x15, 0x2e, 0x0e, 0xc7, 0x79, 0xc4, 0x03, 0xf5, 0x0e, 0xc7, 0x70, 0xca,
+ 0x14, 0xd0, 0x0e, 0xc4, 0x68, 0xc5, 0x03, 0xf4, 0x0e, 0xc7, 0x68, 0xc7,
+ 0x29, 0xba, 0x0e, 0xc3, 0x91, 0xc4, 0x05, 0xa6, 0x0e, 0xc3, 0x70, 0x45,
+ 0x0c, 0xa3, 0xc3, 0xb0, 0x2d, 0xc6, 0x14, 0xca, 0x0e, 0xc5, 0x29, 0xc4,
+ 0x01, 0xbd, 0x0e, 0xc4, 0x39, 0xc5, 0x0e, 0xcd, 0x0e, 0xc0, 0xf8, 0xca,
+ 0x00, 0x47, 0x01, 0x5d, 0x09, 0xc9, 0x03, 0x9e, 0x01, 0x5d, 0x00, 0xc5,
+ 0x08, 0x89, 0x00, 0x44, 0x11, 0xc9, 0x4e, 0x1b, 0x00, 0x43, 0xc0, 0x45,
+ 0x01, 0x18, 0xc3, 0xb0, 0x39, 0x49, 0xa3, 0x27, 0x43, 0xb0, 0x45, 0x45,
+ 0x00, 0x39, 0x43, 0xb0, 0x51, 0x45, 0x00, 0x39, 0x43, 0xb0, 0x5d, 0xc9,
+ 0xb7, 0xbc, 0x00, 0x43, 0xf9, 0xc9, 0x18, 0x19, 0x00, 0x43, 0xe0, 0x00,
+ 0x43, 0xb0, 0x69, 0x00, 0x43, 0xb0, 0x75, 0xcd, 0x05, 0x3a, 0x07, 0xf4,
+ 0x09, 0xca, 0x2a, 0xb4, 0x07, 0xf4, 0x10, 0xcc, 0x05, 0x3b, 0x07, 0xf4,
+ 0x49, 0xcb, 0x10, 0x7a, 0x07, 0xf4, 0x58, 0x00, 0x43, 0xb0, 0x81, 0xca,
+ 0xa1, 0xaa, 0x00, 0x3b, 0xd9, 0xc8, 0xb8, 0xa3, 0x00, 0x3b, 0xd0, 0xc6,
+ 0x00, 0x33, 0x00, 0x34, 0xa8, 0xd3, 0x20, 0x1f, 0x00, 0x3a, 0x98, 0xc5,
+ 0x00, 0x34, 0x00, 0x45, 0x71, 0xcf, 0x1a, 0x27, 0x00, 0x34, 0x78, 0xe0,
+ 0x00, 0x27, 0x00, 0x3a, 0xc8, 0xc5, 0x03, 0x50, 0x00, 0x34, 0x29, 0xd6,
+ 0x2e, 0xe7, 0x00, 0x3a, 0xc0, 0xce, 0x73, 0xc0, 0x00, 0x34, 0x11, 0xc5,
+ 0x03, 0x50, 0x00, 0x3a, 0xb8, 0xcb, 0x0a, 0x1c, 0x00, 0x3b, 0x79, 0xc4,
+ 0x03, 0x51, 0x00, 0x3b, 0x90, 0xcb, 0x9c, 0x9c, 0x00, 0xf2, 0xe8, 0xc6,
+ 0x61, 0xbc, 0x00, 0x0e, 0xb1, 0xc5, 0x34, 0x21, 0x00, 0x0e, 0xc1, 0xc5,
+ 0x98, 0xb9, 0x00, 0x0e, 0xd1, 0xc5, 0x1e, 0x64, 0x00, 0x0e, 0xe0, 0xcb,
+ 0x9c, 0x9c, 0x00, 0x0f, 0x08, 0xca, 0xa4, 0x98, 0x00, 0x0f, 0xd8, 0xc6,
+ 0xd8, 0x80, 0x0e, 0x8b, 0xf1, 0x91, 0x0e, 0x8b, 0xe0, 0xc3, 0xec, 0xa5,
+ 0x0e, 0x8b, 0x49, 0xc3, 0xec, 0xa8, 0x0e, 0x8b, 0x41, 0xc3, 0xec, 0xab,
+ 0x0e, 0x8b, 0x38, 0x12, 0xc3, 0xb0, 0x8d, 0xc4, 0xea, 0x23, 0x00, 0xfb,
+ 0x63, 0x03, 0xb0, 0x9c, 0xc5, 0x27, 0x58, 0x00, 0xfb, 0x52, 0x03, 0xb0,
+ 0xa2, 0xca, 0x90, 0xcc, 0x00, 0xfb, 0x7b, 0x03, 0xb0, 0xa8, 0xcd, 0x45,
+ 0xed, 0x00, 0xfd, 0x48, 0xd3, 0x45, 0xe7, 0x00, 0xfd, 0x68, 0xd3, 0x45,
+ 0xe7, 0x00, 0xfd, 0x58, 0x12, 0xc3, 0xb0, 0xae, 0xc4, 0xea, 0x23, 0x00,
+ 0xf9, 0x63, 0x03, 0xb0, 0xbd, 0xc5, 0x27, 0x58, 0x00, 0xf9, 0x52, 0x03,
+ 0xb0, 0xc3, 0xca, 0x90, 0xcc, 0x00, 0xf9, 0x7b, 0x03, 0xb0, 0xc9, 0xcd,
+ 0x45, 0xed, 0x00, 0xfc, 0xc8, 0xd3, 0x45, 0xe7, 0x00, 0xfc, 0xe8, 0xd3,
+ 0x45, 0xe7, 0x00, 0xfc, 0xd8, 0x12, 0xc3, 0xb0, 0xcf, 0xc4, 0xea, 0x23,
+ 0x00, 0xf8, 0x63, 0x03, 0xb0, 0xde, 0xc5, 0x27, 0x58, 0x00, 0xf8, 0x52,
+ 0x03, 0xb0, 0xe4, 0xca, 0x90, 0xcc, 0x00, 0xf8, 0x7b, 0x03, 0xb0, 0xea,
+ 0xcd, 0x45, 0xed, 0x00, 0xfc, 0x48, 0xd3, 0x45, 0xe7, 0x00, 0xfc, 0x68,
+ 0xd3, 0x45, 0xe7, 0x00, 0xfc, 0x58, 0xcb, 0x90, 0xcb, 0x00, 0xfb, 0x33,
+ 0x03, 0xb0, 0xf0, 0xc4, 0xea, 0x23, 0x00, 0xfb, 0x23, 0x03, 0xb0, 0xf6,
+ 0xc5, 0x27, 0x58, 0x00, 0xfb, 0x13, 0x03, 0xb0, 0xfc, 0xcd, 0x49, 0x19,
+ 0x00, 0xfd, 0x00, 0xd2, 0x49, 0x14, 0x00, 0xfd, 0x38, 0xd2, 0x49, 0x14,
+ 0x00, 0xfd, 0x28, 0xd2, 0x49, 0x14, 0x00, 0xfd, 0x18, 0xcb, 0x90, 0xcb,
+ 0x00, 0xf9, 0x33, 0x03, 0xb1, 0x02, 0xc4, 0xea, 0x23, 0x00, 0xf9, 0x23,
+ 0x03, 0xb1, 0x08, 0xc5, 0x27, 0x58, 0x00, 0xf9, 0x13, 0x03, 0xb1, 0x0e,
+ 0xcd, 0x49, 0x19, 0x00, 0xfc, 0x80, 0xd2, 0x49, 0x14, 0x00, 0xfc, 0xb8,
+ 0xd2, 0x49, 0x14, 0x00, 0xfc, 0xa8, 0xd2, 0x49, 0x14, 0x00, 0xfc, 0x98,
+ 0xcb, 0x90, 0xcb, 0x00, 0xf8, 0x33, 0x03, 0xb1, 0x14, 0xc4, 0xea, 0x23,
+ 0x00, 0xf8, 0x23, 0x03, 0xb1, 0x1a, 0xc5, 0x27, 0x58, 0x00, 0xf8, 0x13,
+ 0x03, 0xb1, 0x20, 0xcd, 0x49, 0x19, 0x00, 0xfc, 0x00, 0xd2, 0x49, 0x14,
+ 0x00, 0xfc, 0x38, 0xd2, 0x49, 0x14, 0x00, 0xfc, 0x28, 0xd2, 0x49, 0x14,
+ 0x00, 0xfc, 0x18, 0xd0, 0x5d, 0x0f, 0x0e, 0x7d, 0xd9, 0xd0, 0x2c, 0x43,
+ 0x0e, 0x7d, 0xc0, 0xcb, 0x6e, 0x75, 0x0e, 0xc1, 0xe0, 0x14, 0xc3, 0xb1,
+ 0x26, 0xce, 0x6e, 0x72, 0x0e, 0xc1, 0xb8, 0xc6, 0x5a, 0x59, 0x0e, 0xc2,
+ 0x19, 0xc2, 0x02, 0x6a, 0x0e, 0xc1, 0x88, 0x46, 0x0e, 0xc7, 0xc3, 0xb1,
+ 0x32, 0xc9, 0xaf, 0x9d, 0x0e, 0xc7, 0x11, 0x46, 0x0e, 0xcd, 0x43, 0xb1,
+ 0x3e, 0x44, 0x0c, 0xa4, 0xc3, 0xb1, 0x50, 0xc8, 0x14, 0xd2, 0x0e, 0xc0,
+ 0xaa, 0x03, 0xb1, 0x5f, 0xc3, 0x01, 0x5e, 0x0e, 0xc4, 0x33, 0x03, 0xb1,
+ 0x63, 0xce, 0x3b, 0x3c, 0x0e, 0xc0, 0x88, 0x00, 0x43, 0xb1, 0x67, 0xd2,
+ 0x49, 0x80, 0x0e, 0xc1, 0x18, 0xcf, 0x64, 0x83, 0x0e, 0xc1, 0x08, 0xcb,
+ 0x49, 0x87, 0x0e, 0xc1, 0x30, 0xc8, 0xc0, 0x0b, 0x0e, 0xc2, 0xc9, 0xca,
+ 0x49, 0x88, 0x0e, 0xc2, 0xc0, 0xc4, 0x00, 0x3f, 0x0e, 0xc7, 0x89, 0xc3,
+ 0x02, 0x53, 0x0e, 0xc6, 0xe8, 0xc7, 0x14, 0xc9, 0x0e, 0xc5, 0x51, 0xc2,
+ 0x00, 0x5f, 0x0e, 0xc0, 0xd8, 0xc5, 0x08, 0x89, 0x00, 0x44, 0x09, 0xc9,
+ 0x4e, 0x1b, 0x00, 0x43, 0xb8, 0xc5, 0x00, 0x34, 0x00, 0x43, 0xc9, 0xc5,
+ 0x03, 0x50, 0x00, 0x43, 0xb0, 0xc9, 0xb7, 0xbc, 0x00, 0x44, 0x01, 0xc9,
+ 0x18, 0x19, 0x00, 0x43, 0xe8, 0xc9, 0xb7, 0xbc, 0x00, 0x43, 0xf1, 0xc9,
+ 0x18, 0x19, 0x00, 0x43, 0xd8, 0xca, 0x2a, 0xb4, 0x07, 0xf4, 0x41, 0xcd,
+ 0x05, 0x3a, 0x07, 0xf4, 0x38, 0xcd, 0x05, 0x3a, 0x07, 0xf4, 0x19, 0xca,
+ 0x2a, 0xb4, 0x07, 0xf4, 0x20, 0xca, 0x2a, 0xb4, 0x07, 0xdd, 0x89, 0xcd,
+ 0x05, 0x3a, 0x07, 0xdd, 0x80, 0xca, 0x90, 0xcc, 0x00, 0xfb, 0x73, 0x03,
+ 0xb1, 0x7e, 0xcd, 0x45, 0xed, 0x00, 0xfd, 0x40, 0xd3, 0x45, 0xe7, 0x00,
+ 0xfd, 0x60, 0xd3, 0x45, 0xe7, 0x00, 0xfd, 0x50, 0xd3, 0x45, 0xe7, 0x00,
+ 0xfd, 0x78, 0xca, 0x90, 0xcc, 0x00, 0xf9, 0x73, 0x03, 0xb1, 0x84, 0xcd,
+ 0x45, 0xed, 0x00, 0xfc, 0xc0, 0xd3, 0x45, 0xe7, 0x00, 0xfc, 0xe0, 0xd3,
+ 0x45, 0xe7, 0x00, 0xfc, 0xd0, 0xd3, 0x45, 0xe7, 0x00, 0xfc, 0xf8, 0xca,
+ 0x90, 0xcc, 0x00, 0xf8, 0x73, 0x03, 0xb1, 0x8a, 0xcd, 0x45, 0xed, 0x00,
+ 0xfc, 0x40, 0xd3, 0x45, 0xe7, 0x00, 0xfc, 0x60, 0xd3, 0x45, 0xe7, 0x00,
+ 0xfc, 0x50, 0xd3, 0x45, 0xe7, 0x00, 0xfc, 0x78, 0xd2, 0x49, 0x14, 0x00,
+ 0xfd, 0x30, 0xd2, 0x49, 0x14, 0x00, 0xfd, 0x20, 0xd2, 0x49, 0x14, 0x00,
+ 0xfd, 0x10, 0xd2, 0x49, 0x14, 0x00, 0xfc, 0xb0, 0xd2, 0x49, 0x14, 0x00,
+ 0xfc, 0xa0, 0xd2, 0x49, 0x14, 0x00, 0xfc, 0x90, 0xd2, 0x49, 0x14, 0x00,
+ 0xfc, 0x30, 0xd2, 0x49, 0x14, 0x00, 0xfc, 0x20, 0xd2, 0x49, 0x14, 0x00,
+ 0xfc, 0x10, 0x49, 0x0e, 0xd6, 0xc3, 0xb1, 0x90, 0xc5, 0xc2, 0xf6, 0x0e,
+ 0xc7, 0x38, 0xc5, 0x5a, 0x59, 0x0e, 0xc7, 0x19, 0xc4, 0x16, 0x02, 0x0e,
+ 0xc7, 0x08, 0xc4, 0x16, 0x02, 0x0e, 0xc7, 0x01, 0xc9, 0x14, 0xd1, 0x0e,
+ 0xc6, 0xf9, 0xc8, 0x20, 0x6a, 0x0e, 0xc6, 0xf0, 0xc7, 0x14, 0xc9, 0x0e,
+ 0xc5, 0x49, 0xc2, 0x00, 0x5f, 0x0e, 0xc0, 0xd2, 0x03, 0xb1, 0x9c, 0x00,
+ 0x43, 0xb1, 0xa2, 0x00, 0x43, 0xb1, 0xc6, 0xc6, 0xc4, 0xb9, 0x0e, 0xc1,
+ 0xfb, 0x03, 0xb1, 0xd2, 0x05, 0xc3, 0xb1, 0xd8, 0x0a, 0xc3, 0xb1, 0xea,
+ 0xc4, 0x16, 0x02, 0x0e, 0xc1, 0x10, 0xd3, 0x45, 0xe7, 0x00, 0xfd, 0x70,
+ 0xd3, 0x45, 0xe7, 0x00, 0xfc, 0xf0, 0xd3, 0x45, 0xe7, 0x00, 0xfc, 0x70,
+ 0xc5, 0x19, 0x05, 0x0e, 0xc7, 0x51, 0xc6, 0x0e, 0xdf, 0x0e, 0xc7, 0x40,
+ 0xcb, 0x49, 0x87, 0x0e, 0xc1, 0x98, 0xc6, 0xd5, 0xce, 0x0e, 0xc0, 0xc3,
+ 0x03, 0xb1, 0xf6, 0x46, 0x0e, 0xcd, 0xc3, 0xb1, 0xfc, 0xc6, 0x5a, 0x59,
+ 0x0e, 0xc0, 0xcb, 0x03, 0xb2, 0x0b, 0xcb, 0x96, 0xda, 0x0e, 0xc0, 0xb9,
+ 0xca, 0xa1, 0x28, 0x0e, 0xc0, 0xb0, 0xc9, 0x14, 0xd1, 0x0e, 0xc4, 0x61,
+ 0xc4, 0x16, 0x02, 0x0e, 0xc4, 0x58, 0xc4, 0x0d, 0xa1, 0x0e, 0xc1, 0xf0,
+ 0xcf, 0x68, 0xe8, 0x0e, 0xc1, 0xe9, 0xc6, 0x1f, 0x71, 0x0e, 0xc1, 0x49,
+ 0xc5, 0x6d, 0x4c, 0x0e, 0xc1, 0x40, 0xc5, 0x5a, 0x5a, 0x0e, 0xc1, 0x59,
+ 0xc5, 0x65, 0xb0, 0x0e, 0xc1, 0x50, 0xce, 0x29, 0xb3, 0x0e, 0xc1, 0xa8,
+ 0xc7, 0x29, 0xba, 0x0e, 0xc1, 0xa1, 0xc4, 0x05, 0xa6, 0x0e, 0xc1, 0x6a,
+ 0x03, 0xb2, 0x11, 0xcb, 0x49, 0x87, 0x0e, 0xc1, 0x90, 0x00, 0x43, 0xb2,
+ 0x15, 0xc4, 0x16, 0x02, 0x0e, 0xc1, 0x79, 0xc9, 0x14, 0xd1, 0x0e, 0xc1,
+ 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0};
const uint8_t *UnicodeNameToCodepointIndex = UnicodeNameToCodepointIndex_;
-const std::size_t UnicodeNameToCodepointIndexSize = 241561;
+const std::size_t UnicodeNameToCodepointIndexSize = 242216;
const std::size_t UnicodeNameToCodepointLargestNameSize = 74;
} // namespace unicode
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc
index 4c8f6b2ea7d3..69bd1164343d 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc
@@ -237,7 +237,8 @@ void Memory::InvalidateInstructionCache(const void *Addr, size_t Len) {
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
asm volatile("icbi 0, %0" : : "r"(Line));
asm volatile("isync");
-#elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \
+#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \
+ defined(__mips__)) && \
defined(__GNUC__)
// FIXME: Can we safely always call this for __GNUC__ everywhere?
const char *Start = static_cast<const char *>(Addr);
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc
index e2aece49cbc5..9f89d63bb0fd 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc
@@ -126,10 +126,10 @@ namespace fs {
const file_t kInvalidFile = -1;
-#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
- defined(__minix) || defined(__FreeBSD_kernel__) || defined(__linux__) || \
- defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX) || \
- defined(__GNU__) || (defined(__sun__) && defined(__svr4__))
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
+ defined(__FreeBSD_kernel__) || defined(__linux__) || defined(__CYGWIN__) || \
+ defined(__DragonFly__) || defined(_AIX) || defined(__GNU__) || \
+ (defined(__sun__) && defined(__svr4__) || defined(__HAIKU__))
static int test_dir(char ret[PATH_MAX], const char *dir, const char *bin) {
struct stat sb;
char fullpath[PATH_MAX];
@@ -233,8 +233,8 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
// Fall back to argv[0] if auxiliary vectors are not available.
if (getprogpath(exe_path, argv0) != NULL)
return exe_path;
-#elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__minix) || \
- defined(__DragonFly__) || defined(__FreeBSD_kernel__) || defined(_AIX)
+#elif defined(_AIX) || defined(__DragonFly__) || defined(__FreeBSD_kernel__) || \
+ defined(__NetBSD__)
const char *curproc = "/proc/curproc/file";
char exe_path[PATH_MAX];
if (sys::fs::exists(curproc)) {
@@ -283,6 +283,11 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
// Fall back to the classical detection.
if (getprogpath(exe_path, argv0))
return exe_path;
+#elif defined(__OpenBSD__) || defined(__HAIKU__)
+ char exe_path[PATH_MAX];
+ // argv[0] only
+ if (getprogpath(exe_path, argv0) != NULL)
+ return exe_path;
#elif defined(__sun__) && defined(__svr4__)
char exe_path[PATH_MAX];
const char *aPath = "/proc/self/execname";
@@ -647,7 +652,7 @@ std::error_code equivalent(const Twine &A, const Twine &B, bool &result) {
static void expandTildeExpr(SmallVectorImpl<char> &Path) {
StringRef PathStr(Path.begin(), Path.size());
- if (PathStr.empty() || !PathStr.startswith("~"))
+ if (PathStr.empty() || !PathStr.starts_with("~"))
return;
PathStr = PathStr.drop_front();
@@ -951,7 +956,7 @@ ErrorOr<basic_file_status> directory_entry::status() const {
// FreeBSD optionally provides /proc/self/fd, but it is incompatible with
// Linux. The thing to use is realpath.
//
-#if !defined(__FreeBSD__)
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__)
#define TRY_PROC_SELF_FD
#endif
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Process.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Process.inc
index c8b15cb759df..a003596e8c02 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Process.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Process.inc
@@ -62,7 +62,9 @@ getRUsageTimes() {
::getrusage(RUSAGE_SELF, &RU);
return {toDuration(RU.ru_utime), toDuration(RU.ru_stime)};
#else
+#ifndef __MVS__ // Exclude for MVS in case -pedantic is used
#warning Cannot get usage times on this platform
+#endif
return {std::chrono::microseconds::zero(), std::chrono::microseconds::zero()};
#endif
}
@@ -117,7 +119,9 @@ size_t Process::GetMallocUsage() {
return EndOfMemory - StartOfMemory;
return 0;
#else
+#ifndef __MVS__ // Exclude for MVS in case -pedantic is used
#warning Cannot get malloc info on this platform
+#endif
return 0;
#endif
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Program.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Program.inc
index 897e22711ae2..260719b2b58d 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Program.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Program.inc
@@ -20,11 +20,13 @@
#include "Unix.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/SystemZ/zOSSupport.h"
#include "llvm/Support/raw_ostream.h"
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
@@ -340,10 +342,10 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
namespace llvm {
namespace sys {
-#ifndef _AIX
-using ::wait4;
-#else
+#if defined(_AIX)
static pid_t(wait4)(pid_t pid, int *status, int options, struct rusage *usage);
+#elif !defined(__Fuchsia__)
+using ::wait4;
#endif
} // namespace sys
@@ -414,6 +416,7 @@ ProcessInfo llvm::sys::Wait(const ProcessInfo &PI,
// Parent process: Wait for the child process to terminate.
int status = 0;
ProcessInfo WaitResult;
+#ifndef __Fuchsia__
rusage Info;
if (ProcStat)
ProcStat->reset();
@@ -421,6 +424,7 @@ ProcessInfo llvm::sys::Wait(const ProcessInfo &PI,
do {
WaitResult.Pid = sys::wait4(ChildPid, &status, WaitPidOptions, &Info);
} while (WaitUntilTerminates && WaitResult.Pid == -1 && errno == EINTR);
+#endif
if (WaitResult.Pid != PI.Pid) {
if (WaitResult.Pid == 0) {
@@ -459,15 +463,17 @@ ProcessInfo llvm::sys::Wait(const ProcessInfo &PI,
sigaction(SIGALRM, &Old, nullptr);
}
+#ifndef __Fuchsia__
if (ProcStat) {
std::chrono::microseconds UserT = toDuration(Info.ru_utime);
std::chrono::microseconds KernelT = toDuration(Info.ru_stime);
uint64_t PeakMemory = 0;
-#ifndef __HAIKU__
+#if !defined(__HAIKU__) && !defined(__MVS__)
PeakMemory = static_cast<uint64_t>(Info.ru_maxrss);
#endif
*ProcStat = ProcessStatistics{UserT + KernelT, UserT, PeakMemory};
}
+#endif
// Return the proper exit status. Detect error conditions
// so we can return -1 for them and set ErrMsg informatively.
@@ -516,8 +522,12 @@ std::error_code llvm::sys::ChangeStdoutMode(fs::OpenFlags Flags) {
}
std::error_code llvm::sys::ChangeStdinToBinary() {
+#ifdef __MVS__
+ return disableAutoConversion(STDIN_FILENO);
+#else
// Do nothing, as Unix doesn't differentiate between text and binary.
return std::error_code();
+#endif
}
std::error_code llvm::sys::ChangeStdoutToBinary() {
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc
index fcf5701afcfd..792b0fd66b45 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc
@@ -510,6 +510,117 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
dl_iterate_phdr(dl_iterate_phdr_cb, &data);
return true;
}
+
+class DSOMarkupPrinter {
+ llvm::raw_ostream &OS;
+ const char *MainExecutableName;
+ size_t ModuleCount = 0;
+ bool IsFirst = true;
+
+public:
+ DSOMarkupPrinter(llvm::raw_ostream &OS, const char *MainExecutableName)
+ : OS(OS), MainExecutableName(MainExecutableName) {}
+
+ /// Print llvm-symbolizer markup describing the layout of the given DSO.
+ void printDSOMarkup(dl_phdr_info *Info) {
+ ArrayRef<uint8_t> BuildID = findBuildID(Info);
+ if (BuildID.empty())
+ return;
+ OS << format("{{{module:%d:%s:elf:", ModuleCount,
+ IsFirst ? MainExecutableName : Info->dlpi_name);
+ for (uint8_t X : BuildID)
+ OS << format("%02x", X);
+ OS << "}}}\n";
+
+ for (int I = 0; I < Info->dlpi_phnum; I++) {
+ const auto *Phdr = &Info->dlpi_phdr[I];
+ if (Phdr->p_type != PT_LOAD)
+ continue;
+ uintptr_t StartAddress = Info->dlpi_addr + Phdr->p_vaddr;
+ uintptr_t ModuleRelativeAddress = Phdr->p_vaddr;
+ std::array<char, 4> ModeStr = modeStrFromFlags(Phdr->p_flags);
+ OS << format("{{{mmap:%#016x:%#x:load:%d:%s:%#016x}}}\n", StartAddress,
+ Phdr->p_memsz, ModuleCount, &ModeStr[0],
+ ModuleRelativeAddress);
+ }
+ IsFirst = false;
+ ModuleCount++;
+ }
+
+ /// Callback for use with dl_iterate_phdr. The last dl_iterate_phdr argument
+ /// must be a pointer to an instance of this class.
+ static int printDSOMarkup(dl_phdr_info *Info, size_t Size, void *Arg) {
+ static_cast<DSOMarkupPrinter *>(Arg)->printDSOMarkup(Info);
+ return 0;
+ }
+
+ // Returns the build ID for the given DSO as an array of bytes. Returns an
+ // empty array if none could be found.
+ ArrayRef<uint8_t> findBuildID(dl_phdr_info *Info) {
+ for (int I = 0; I < Info->dlpi_phnum; I++) {
+ const auto *Phdr = &Info->dlpi_phdr[I];
+ if (Phdr->p_type != PT_NOTE)
+ continue;
+
+ ArrayRef<uint8_t> Notes(
+ reinterpret_cast<const uint8_t *>(Info->dlpi_addr + Phdr->p_vaddr),
+ Phdr->p_memsz);
+ while (Notes.size() > 12) {
+ uint32_t NameSize = *reinterpret_cast<const uint32_t *>(Notes.data());
+ Notes = Notes.drop_front(4);
+ uint32_t DescSize = *reinterpret_cast<const uint32_t *>(Notes.data());
+ Notes = Notes.drop_front(4);
+ uint32_t Type = *reinterpret_cast<const uint32_t *>(Notes.data());
+ Notes = Notes.drop_front(4);
+
+ ArrayRef<uint8_t> Name = Notes.take_front(NameSize);
+ auto CurPos = reinterpret_cast<uintptr_t>(Notes.data());
+ uint32_t BytesUntilDesc =
+ alignToPowerOf2(CurPos + NameSize, 4) - CurPos;
+ if (BytesUntilDesc >= Notes.size())
+ break;
+ Notes = Notes.drop_front(BytesUntilDesc);
+
+ ArrayRef<uint8_t> Desc = Notes.take_front(DescSize);
+ CurPos = reinterpret_cast<uintptr_t>(Notes.data());
+ uint32_t BytesUntilNextNote =
+ alignToPowerOf2(CurPos + DescSize, 4) - CurPos;
+ if (BytesUntilNextNote > Notes.size())
+ break;
+ Notes = Notes.drop_front(BytesUntilNextNote);
+
+ if (Type == 3 /*NT_GNU_BUILD_ID*/ && Name.size() >= 3 &&
+ Name[0] == 'G' && Name[1] == 'N' && Name[2] == 'U')
+ return Desc;
+ }
+ }
+ return {};
+ }
+
+ // Returns a symbolizer markup string describing the permissions on a DSO
+ // with the given p_flags.
+ std::array<char, 4> modeStrFromFlags(uint32_t Flags) {
+ std::array<char, 4> Mode;
+ char *Cur = &Mode[0];
+ if (Flags & PF_R)
+ *Cur++ = 'r';
+ if (Flags & PF_W)
+ *Cur++ = 'w';
+ if (Flags & PF_X)
+ *Cur++ = 'x';
+ *Cur = '\0';
+ return Mode;
+ }
+};
+
+static bool printMarkupContext(llvm::raw_ostream &OS,
+ const char *MainExecutableName) {
+ OS << "{{{reset}}}\n";
+ DSOMarkupPrinter MP(OS, MainExecutableName);
+ dl_iterate_phdr(DSOMarkupPrinter::printDSOMarkup, &MP);
+ return true;
+}
+
#elif ENABLE_BACKTRACES && defined(__APPLE__) && defined(__LP64__)
static bool findModulesAndOffsets(void **StackTrace, int Depth,
const char **Modules, intptr_t *Offsets,
@@ -544,6 +655,11 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
}
return true;
}
+
+static bool printMarkupContext(llvm::raw_ostream &OS,
+ const char *MainExecutableName) {
+ return false;
+}
#else
/// Backtraces are not enabled or we don't yet know how to find all loaded DSOs
/// on this platform.
@@ -553,6 +669,11 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
StringSaver &StrPool) {
return false;
}
+
+static bool printMarkupContext(llvm::raw_ostream &OS,
+ const char *MainExecutableName) {
+ return false;
+}
#endif // ENABLE_BACKTRACES && ... (findModulesAndOffsets variants)
#if ENABLE_BACKTRACES && defined(HAVE__UNWIND_BACKTRACE)
@@ -613,6 +734,8 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
// backtrace() for printing a symbolized stack trace.
if (!Depth)
Depth = depth;
+ if (printMarkupStackTrace(Argv0, StackTrace, Depth, OS))
+ return;
if (printSymbolizedStackTrace(Argv0, StackTrace, Depth, OS))
return;
OS << "Stack dump without symbol names (ensure you have llvm-symbolizer in "
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc
index 819748db4ec2..55e7dcfa4678 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc
@@ -371,7 +371,7 @@ static int computeHostNumPhysicalCores() {
}
return CPU_COUNT(&Enabled);
}
-#elif defined(__linux__) && defined(__s390x__)
+#elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX)
static int computeHostNumPhysicalCores() {
return sysconf(_SC_NPROCESSORS_ONLN);
}
diff --git a/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp b/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp
index d381d79fba96..367e794d38f6 100644
--- a/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp
@@ -489,7 +489,7 @@ void OverlayFileSystem::printImpl(raw_ostream &OS, PrintType Type,
if (Type == PrintType::Contents)
Type = PrintType::Summary;
- for (auto FS : overlays_range())
+ for (const auto &FS : overlays_range())
FS->print(OS, Type, IndentLevel + 1);
}
@@ -552,7 +552,7 @@ class CombiningDirIterImpl : public llvm::vfs::detail::DirIterImpl {
public:
CombiningDirIterImpl(ArrayRef<FileSystemPtr> FileSystems, std::string Dir,
std::error_code &EC) {
- for (auto FS : FileSystems) {
+ for (const auto &FS : FileSystems) {
std::error_code FEC;
directory_iterator Iter = FS->dir_begin(Dir, FEC);
if (FEC && FEC != errc::no_such_file_or_directory) {
@@ -1337,7 +1337,7 @@ std::error_code RedirectingFileSystem::isLocal(const Twine &Path_,
SmallString<256> Path;
Path_.toVector(Path);
- if (std::error_code EC = makeCanonical(Path))
+ if (makeCanonical(Path))
return {};
return ExternalFS->isLocal(Path, Result);
@@ -1385,7 +1385,7 @@ RedirectingFileSystem::makeAbsolute(StringRef WorkingDir,
std::string Result = std::string(WorkingDir);
StringRef Dir(Result);
- if (!Dir.endswith(sys::path::get_separator(style))) {
+ if (!Dir.ends_with(sys::path::get_separator(style))) {
Result += sys::path::get_separator(style);
}
// backslashes '\' are legit path charactors under POSIX. Windows APIs
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
index b949b724509f..168a63bb2d96 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
@@ -76,7 +76,7 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
// If the path is a long path, mangled into forward slashes, normalize
// back to backslashes here.
- if (Path8Str.startswith("//?/"))
+ if (Path8Str.starts_with("//?/"))
llvm::sys::path::native(Path8Str, path::Style::windows_backslash);
if (std::error_code EC = UTF8ToUTF16(Path8Str, Path16))
@@ -96,7 +96,7 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
const char *const LongPathPrefix = "\\\\?\\";
if ((Path16.size() + CurPathLen) < MaxPathLen ||
- Path8Str.startswith(LongPathPrefix))
+ Path8Str.starts_with(LongPathPrefix))
return std::error_code();
if (!IsAbsolute) {
@@ -158,12 +158,7 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr) {
}
UniqueID file_status::getUniqueID() const {
- // The file is uniquely identified by the volume serial number along
- // with the 64-bit file identifier.
- uint64_t FileID = (static_cast<uint64_t>(FileIndexHigh) << 32ULL) |
- static_cast<uint64_t>(FileIndexLow);
-
- return UniqueID(VolumeSerialNumber, FileID);
+ return UniqueID(VolumeSerialNumber, PathHash);
}
ErrorOr<space_info> disk_space(const Twine &Path) {
@@ -362,16 +357,17 @@ std::error_code is_local(const Twine &path, bool &result) {
}
static std::error_code realPathFromHandle(HANDLE H,
- SmallVectorImpl<wchar_t> &Buffer) {
+ SmallVectorImpl<wchar_t> &Buffer,
+ DWORD flags = VOLUME_NAME_DOS) {
Buffer.resize_for_overwrite(Buffer.capacity());
DWORD CountChars = ::GetFinalPathNameByHandleW(
- H, Buffer.begin(), Buffer.capacity(), FILE_NAME_NORMALIZED);
+ H, Buffer.begin(), Buffer.capacity(), FILE_NAME_NORMALIZED | flags);
if (CountChars && CountChars >= Buffer.capacity()) {
// The buffer wasn't big enough, try again. In this case the return value
// *does* indicate the size of the null terminator.
Buffer.resize_for_overwrite(CountChars);
CountChars = ::GetFinalPathNameByHandleW(H, Buffer.begin(), Buffer.size(),
- FILE_NAME_NORMALIZED);
+ FILE_NAME_NORMALIZED | flags);
}
Buffer.truncate(CountChars);
if (CountChars == 0)
@@ -647,12 +643,7 @@ bool can_execute(const Twine &Path) {
bool equivalent(file_status A, file_status B) {
assert(status_known(A) && status_known(B));
- return A.FileIndexHigh == B.FileIndexHigh &&
- A.FileIndexLow == B.FileIndexLow && A.FileSizeHigh == B.FileSizeHigh &&
- A.FileSizeLow == B.FileSizeLow &&
- A.LastWriteTimeHigh == B.LastWriteTimeHigh &&
- A.LastWriteTimeLow == B.LastWriteTimeLow &&
- A.VolumeSerialNumber == B.VolumeSerialNumber;
+ return A.getUniqueID() == B.getUniqueID();
}
std::error_code equivalent(const Twine &A, const Twine &B, bool &result) {
@@ -675,7 +666,7 @@ static bool isReservedName(StringRef path) {
// First, check to see if this is a device namespace, which always
// starts with \\.\, since device namespaces are not legal file paths.
- if (path.startswith("\\\\.\\"))
+ if (path.starts_with("\\\\.\\"))
return true;
// Then compare against the list of ancient reserved names.
@@ -698,6 +689,7 @@ static perms perms_from_attrs(DWORD Attrs) {
}
static std::error_code getStatus(HANDLE FileHandle, file_status &Result) {
+ SmallVector<wchar_t, MAX_PATH> ntPath;
if (FileHandle == INVALID_HANDLE_VALUE)
goto handle_status_error;
@@ -725,13 +717,37 @@ static std::error_code getStatus(HANDLE FileHandle, file_status &Result) {
if (!::GetFileInformationByHandle(FileHandle, &Info))
goto handle_status_error;
+ // File indices aren't necessarily stable after closing the file handle;
+ // instead hash a canonicalized path.
+ //
+ // For getting a canonical path to the file, call GetFinalPathNameByHandleW
+ // with VOLUME_NAME_NT. We don't really care exactly what the path looks
+ // like here, as long as it is canonical (e.g. doesn't differentiate between
+ // whether a file was referred to with upper/lower case names originally).
+ // The default format with VOLUME_NAME_DOS doesn't work with all file system
+ // drivers, such as ImDisk. (See
+ // https://github.com/rust-lang/rust/pull/86447.)
+ uint64_t PathHash;
+ if (std::error_code EC =
+ realPathFromHandle(FileHandle, ntPath, VOLUME_NAME_NT)) {
+ // If realPathFromHandle failed, fall back on the fields
+ // nFileIndex{High,Low} instead. They're not necessarily stable on all file
+ // systems as they're only documented as being unique/stable as long as the
+ // file handle is open - but they're a decent fallback if we couldn't get
+ // the canonical path.
+ PathHash = (static_cast<uint64_t>(Info.nFileIndexHigh) << 32ULL) |
+ static_cast<uint64_t>(Info.nFileIndexLow);
+ } else {
+ PathHash = hash_combine_range(ntPath.begin(), ntPath.end());
+ }
+
Result = file_status(
file_type_from_attrs(Info.dwFileAttributes),
perms_from_attrs(Info.dwFileAttributes), Info.nNumberOfLinks,
Info.ftLastAccessTime.dwHighDateTime, Info.ftLastAccessTime.dwLowDateTime,
Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime,
Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow,
- Info.nFileIndexHigh, Info.nFileIndexLow);
+ PathHash);
return std::error_code();
handle_status_error:
@@ -924,10 +940,10 @@ static bool hasFlushBufferKernelBug() {
static bool isEXE(StringRef Magic) {
static const char PEMagic[] = {'P', 'E', '\0', '\0'};
- if (Magic.startswith(StringRef("MZ")) && Magic.size() >= 0x3c + 4) {
+ if (Magic.starts_with(StringRef("MZ")) && Magic.size() >= 0x3c + 4) {
uint32_t off = read32le(Magic.data() + 0x3c);
// PE/COFF file, either EXE or DLL.
- if (Magic.substr(off).startswith(StringRef(PEMagic, sizeof(PEMagic))))
+ if (Magic.substr(off).starts_with(StringRef(PEMagic, sizeof(PEMagic))))
return true;
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc
index 493209052a1c..a54c06d46870 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc
@@ -158,7 +158,7 @@ static std::error_code WildcardExpand(StringRef Arg,
// option. Paths that start with \\?\ are absolute paths, and aren't
// expected to be used with wildcard expressions.
if (Arg.find_first_of("*?") == StringRef::npos || Arg == "/?" ||
- Arg == "-?" || Arg.startswith("\\\\?\\")) {
+ Arg == "-?" || Arg.starts_with("\\\\?\\")) {
Args.push_back(Arg.data());
return EC;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc
index cb82f55fc38b..34635b5aba7a 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc
@@ -302,6 +302,11 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
return true;
}
+static bool printMarkupContext(llvm::raw_ostream &OS,
+ const char *MainExecutableName) {
+ return false;
+}
+
static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
HANDLE hThread, STACKFRAME64 &StackFrame,
CONTEXT *Context) {
diff --git a/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp
index 6ac2c6aeeb46..fdd0ed6e682e 100644
--- a/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp
@@ -392,6 +392,10 @@ private:
/// Pos is whitespace or a new line
bool isBlankOrBreak(StringRef::iterator Position);
+ /// Return true if the minimal well-formed code unit subsequence at
+ /// Pos is considered a "safe" character for plain scalars.
+ bool isPlainSafeNonBlank(StringRef::iterator Position);
+
/// Return true if the line is a line break, false otherwise.
bool isLineEmpty(StringRef Line);
@@ -545,6 +549,10 @@ private:
/// Can the next token be the start of a simple key?
bool IsSimpleKeyAllowed;
+ /// Can the next token be a value indicator even if it does not have a
+ /// trailing space?
+ bool IsAdjacentValueAllowedInFlow;
+
/// True if an error has occurred.
bool Failed;
@@ -868,6 +876,7 @@ void Scanner::init(MemoryBufferRef Buffer) {
FlowLevel = 0;
IsStartOfStream = true;
IsSimpleKeyAllowed = true;
+ IsAdjacentValueAllowedInFlow = false;
Failed = false;
std::unique_ptr<MemoryBuffer> InputBufferOwner =
MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false);
@@ -1049,6 +1058,15 @@ bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
*Position == '\n';
}
+bool Scanner::isPlainSafeNonBlank(StringRef::iterator Position) {
+ if (Position == End || isBlankOrBreak(Position))
+ return false;
+ if (FlowLevel &&
+ StringRef(Position, 1).find_first_of(",[]{}") != StringRef::npos)
+ return false;
+ return true;
+}
+
bool Scanner::isLineEmpty(StringRef Line) {
for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
if (!isBlankOrBreak(Position))
@@ -1189,6 +1207,7 @@ bool Scanner::scanStreamEnd() {
unrollIndent(-1);
SimpleKeys.clear();
IsSimpleKeyAllowed = false;
+ IsAdjacentValueAllowedInFlow = false;
Token T;
T.Kind = Token::TK_StreamEnd;
@@ -1202,6 +1221,7 @@ bool Scanner::scanDirective() {
unrollIndent(-1);
SimpleKeys.clear();
IsSimpleKeyAllowed = false;
+ IsAdjacentValueAllowedInFlow = false;
StringRef::iterator Start = Current;
consume('%');
@@ -1233,6 +1253,7 @@ bool Scanner::scanDocumentIndicator(bool IsStart) {
unrollIndent(-1);
SimpleKeys.clear();
IsSimpleKeyAllowed = false;
+ IsAdjacentValueAllowedInFlow = false;
Token T;
T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
@@ -1255,6 +1276,8 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) {
// And may also be followed by a simple key.
IsSimpleKeyAllowed = true;
+ // Adjacent values are allowed in flows only after JSON-style keys.
+ IsAdjacentValueAllowedInFlow = false;
++FlowLevel;
return true;
}
@@ -1262,6 +1285,7 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) {
bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
IsSimpleKeyAllowed = false;
+ IsAdjacentValueAllowedInFlow = true;
Token T;
T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
: Token::TK_FlowMappingEnd;
@@ -1276,6 +1300,7 @@ bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
bool Scanner::scanFlowEntry() {
removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
IsSimpleKeyAllowed = true;
+ IsAdjacentValueAllowedInFlow = false;
Token T;
T.Kind = Token::TK_FlowEntry;
T.Range = StringRef(Current, 1);
@@ -1288,6 +1313,7 @@ bool Scanner::scanBlockEntry() {
rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
IsSimpleKeyAllowed = true;
+ IsAdjacentValueAllowedInFlow = false;
Token T;
T.Kind = Token::TK_BlockEntry;
T.Range = StringRef(Current, 1);
@@ -1302,6 +1328,7 @@ bool Scanner::scanKey() {
removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
IsSimpleKeyAllowed = !FlowLevel;
+ IsAdjacentValueAllowedInFlow = false;
Token T;
T.Kind = Token::TK_Key;
@@ -1339,6 +1366,7 @@ bool Scanner::scanValue() {
rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
IsSimpleKeyAllowed = !FlowLevel;
}
+ IsAdjacentValueAllowedInFlow = false;
Token T;
T.Kind = Token::TK_Value;
@@ -1420,6 +1448,7 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
+ IsAdjacentValueAllowedInFlow = true;
return true;
}
@@ -1434,21 +1463,9 @@ bool Scanner::scanPlainScalar() {
if (*Current == '#')
break;
- while (Current != End && !isBlankOrBreak(Current)) {
- if (FlowLevel && *Current == ':' &&
- (Current + 1 == End ||
- !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) {
- setError("Found unexpected ':' while scanning a plain scalar", Current);
- return false;
- }
-
- // Check for the end of the plain scalar.
- if ( (*Current == ':' && isBlankOrBreak(Current + 1))
- || ( FlowLevel
- && (StringRef(Current, 1).find_first_of(",:?[]{}")
- != StringRef::npos)))
- break;
-
+ while (Current != End &&
+ ((*Current != ':' && isPlainSafeNonBlank(Current)) ||
+ (*Current == ':' && isPlainSafeNonBlank(Current + 1)))) {
StringRef::iterator i = skip_nb_char(Current);
if (i == Current)
break;
@@ -1499,6 +1516,7 @@ bool Scanner::scanPlainScalar() {
saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
+ IsAdjacentValueAllowedInFlow = false;
return true;
}
@@ -1534,6 +1552,7 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
+ IsAdjacentValueAllowedInFlow = false;
return true;
}
@@ -1766,6 +1785,7 @@ bool Scanner::scanBlockScalar(bool IsLiteral) {
// New lines may start a simple key.
if (!FlowLevel)
IsSimpleKeyAllowed = true;
+ IsAdjacentValueAllowedInFlow = false;
Token T;
T.Kind = Token::TK_BlockScalar;
@@ -1799,6 +1819,7 @@ bool Scanner::scanTag() {
saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
+ IsAdjacentValueAllowedInFlow = false;
return true;
}
@@ -1848,13 +1869,14 @@ bool Scanner::fetchMoreTokens() {
if (*Current == ',')
return scanFlowEntry();
- if (*Current == '-' && isBlankOrBreak(Current + 1))
+ if (*Current == '-' && (isBlankOrBreak(Current + 1) || Current + 1 == End))
return scanBlockEntry();
- if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
+ if (*Current == '?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))
return scanKey();
- if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
+ if (*Current == ':' &&
+ (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow))
return scanValue();
if (*Current == '*')
@@ -1880,15 +1902,10 @@ bool Scanner::fetchMoreTokens() {
// Get a plain scalar.
StringRef FirstChar(Current, 1);
- if (!(isBlankOrBreak(Current)
- || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
- || (*Current == '-' && !isBlankOrBreak(Current + 1))
- || (!FlowLevel && (*Current == '?' || *Current == ':')
- && isBlankOrBreak(Current + 1))
- || (!FlowLevel && *Current == ':'
- && Current + 2 < End
- && *(Current + 1) == ':'
- && !isBlankOrBreak(Current + 2)))
+ if ((!isBlankOrBreak(Current) &&
+ FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") == StringRef::npos) ||
+ (FirstChar.find_first_of("?:-") != StringRef::npos &&
+ isPlainSafeNonBlank(Current + 1)))
return scanPlainScalar();
setError("Unrecognized character while tokenizing.", Current);
@@ -1951,7 +1968,7 @@ std::string Node::getVerbatimTag() const {
Ret = std::string(Doc->getTagMap().find("!")->second);
Ret += Raw.substr(1);
return Ret;
- } else if (Raw.startswith("!!")) {
+ } else if (Raw.starts_with("!!")) {
Ret = std::string(Doc->getTagMap().find("!!")->second);
Ret += Raw.substr(2);
return Ret;
@@ -2013,185 +2030,229 @@ bool Node::failed() const {
}
StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
- // TODO: Handle newlines properly. We need to remove leading whitespace.
- if (Value[0] == '"') { // Double quoted.
- // Pull off the leading and trailing "s.
- StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
- // Search for characters that would require unescaping the value.
- StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
- if (i != StringRef::npos)
- return unescapeDoubleQuoted(UnquotedValue, i, Storage);
+ if (Value[0] == '"')
+ return getDoubleQuotedValue(Value, Storage);
+ if (Value[0] == '\'')
+ return getSingleQuotedValue(Value, Storage);
+ return getPlainValue(Value, Storage);
+}
+
+/// parseScalarValue - A common parsing routine for all flow scalar styles.
+/// It handles line break characters by itself, adds regular content characters
+/// to the result, and forwards escaped sequences to the provided routine for
+/// the style-specific processing.
+///
+/// \param UnquotedValue - An input value without quotation marks.
+/// \param Storage - A storage for the result if the input value is multiline or
+/// contains escaped characters.
+/// \param LookupChars - A set of special characters to search in the input
+/// string. Should include line break characters and the escape character
+/// specific for the processing scalar style, if any.
+/// \param UnescapeCallback - This is called when the escape character is found
+/// in the input.
+/// \returns - The unfolded and unescaped value.
+static StringRef
+parseScalarValue(StringRef UnquotedValue, SmallVectorImpl<char> &Storage,
+ StringRef LookupChars,
+ std::function<StringRef(StringRef, SmallVectorImpl<char> &)>
+ UnescapeCallback) {
+ size_t I = UnquotedValue.find_first_of(LookupChars);
+ if (I == StringRef::npos)
return UnquotedValue;
- } else if (Value[0] == '\'') { // Single quoted.
- // Pull off the leading and trailing 's.
- StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
- StringRef::size_type i = UnquotedValue.find('\'');
- if (i != StringRef::npos) {
- // We're going to need Storage.
- Storage.clear();
- Storage.reserve(UnquotedValue.size());
- for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
- StringRef Valid(UnquotedValue.begin(), i);
- llvm::append_range(Storage, Valid);
- Storage.push_back('\'');
- UnquotedValue = UnquotedValue.substr(i + 2);
+
+ Storage.clear();
+ Storage.reserve(UnquotedValue.size());
+ char LastNewLineAddedAs = '\0';
+ for (; I != StringRef::npos; I = UnquotedValue.find_first_of(LookupChars)) {
+ if (UnquotedValue[I] != '\r' && UnquotedValue[I] != '\n') {
+ llvm::append_range(Storage, UnquotedValue.take_front(I));
+ UnquotedValue = UnescapeCallback(UnquotedValue.drop_front(I), Storage);
+ LastNewLineAddedAs = '\0';
+ continue;
+ }
+ if (size_t LastNonSWhite = UnquotedValue.find_last_not_of(" \t", I);
+ LastNonSWhite != StringRef::npos) {
+ llvm::append_range(Storage, UnquotedValue.take_front(LastNonSWhite + 1));
+ Storage.push_back(' ');
+ LastNewLineAddedAs = ' ';
+ } else {
+ // Note: we can't just check if the last character in Storage is ' ',
+ // '\n', or something else; that would give a wrong result for double
+ // quoted values containing an escaped space character before a new-line
+ // character.
+ switch (LastNewLineAddedAs) {
+ case ' ':
+ assert(!Storage.empty() && Storage.back() == ' ');
+ Storage.back() = '\n';
+ LastNewLineAddedAs = '\n';
+ break;
+ case '\n':
+ assert(!Storage.empty() && Storage.back() == '\n');
+ Storage.push_back('\n');
+ break;
+ default:
+ Storage.push_back(' ');
+ LastNewLineAddedAs = ' ';
+ break;
}
- llvm::append_range(Storage, UnquotedValue);
- return StringRef(Storage.begin(), Storage.size());
}
- return UnquotedValue;
+ // Handle Windows-style EOL
+ if (UnquotedValue.substr(I, 2) == "\r\n")
+ I++;
+ UnquotedValue = UnquotedValue.drop_front(I + 1).ltrim(" \t");
}
- // Plain.
- // Trim whitespace ('b-char' and 's-white').
- // NOTE: Alternatively we could change the scanner to not include whitespace
- // here in the first place.
- return Value.rtrim("\x0A\x0D\x20\x09");
+ llvm::append_range(Storage, UnquotedValue);
+ return StringRef(Storage.begin(), Storage.size());
}
-StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
- , StringRef::size_type i
- , SmallVectorImpl<char> &Storage)
- const {
- // Use Storage to build proper value.
- Storage.clear();
- Storage.reserve(UnquotedValue.size());
- for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
- // Insert all previous chars into Storage.
- StringRef Valid(UnquotedValue.begin(), i);
- llvm::append_range(Storage, Valid);
- // Chop off inserted chars.
- UnquotedValue = UnquotedValue.substr(i);
-
- assert(!UnquotedValue.empty() && "Can't be empty!");
-
- // Parse escape or line break.
+StringRef
+ScalarNode::getDoubleQuotedValue(StringRef RawValue,
+ SmallVectorImpl<char> &Storage) const {
+ assert(RawValue.size() >= 2 && RawValue.front() == '"' &&
+ RawValue.back() == '"');
+ StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
+
+ auto UnescapeFunc = [this](StringRef UnquotedValue,
+ SmallVectorImpl<char> &Storage) {
+ assert(UnquotedValue.take_front(1) == "\\");
+ if (UnquotedValue.size() == 1) {
+ Token T;
+ T.Range = UnquotedValue;
+ setError("Unrecognized escape code", T);
+ Storage.clear();
+ return StringRef();
+ }
+ UnquotedValue = UnquotedValue.drop_front(1);
switch (UnquotedValue[0]) {
+ default: {
+ Token T;
+ T.Range = UnquotedValue.take_front(1);
+ setError("Unrecognized escape code", T);
+ Storage.clear();
+ return StringRef();
+ }
case '\r':
+ // Shrink the Windows-style EOL.
+ if (UnquotedValue.size() >= 2 && UnquotedValue[1] == '\n')
+ UnquotedValue = UnquotedValue.drop_front(1);
+ [[fallthrough]];
case '\n':
- Storage.push_back('\n');
- if ( UnquotedValue.size() > 1
- && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
- UnquotedValue = UnquotedValue.substr(1);
- UnquotedValue = UnquotedValue.substr(1);
+ return UnquotedValue.drop_front(1).ltrim(" \t");
+ case '0':
+ Storage.push_back(0x00);
break;
- default:
- if (UnquotedValue.size() == 1) {
- Token T;
- T.Range = StringRef(UnquotedValue.begin(), 1);
- setError("Unrecognized escape code", T);
- return "";
- }
- UnquotedValue = UnquotedValue.substr(1);
- switch (UnquotedValue[0]) {
- default: {
- Token T;
- T.Range = StringRef(UnquotedValue.begin(), 1);
- setError("Unrecognized escape code", T);
- return "";
- }
- case '\r':
- case '\n':
- // Remove the new line.
- if ( UnquotedValue.size() > 1
- && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
- UnquotedValue = UnquotedValue.substr(1);
- // If this was just a single byte newline, it will get skipped
- // below.
- break;
- case '0':
- Storage.push_back(0x00);
- break;
- case 'a':
- Storage.push_back(0x07);
- break;
- case 'b':
- Storage.push_back(0x08);
- break;
- case 't':
- case 0x09:
- Storage.push_back(0x09);
- break;
- case 'n':
- Storage.push_back(0x0A);
- break;
- case 'v':
- Storage.push_back(0x0B);
- break;
- case 'f':
- Storage.push_back(0x0C);
- break;
- case 'r':
- Storage.push_back(0x0D);
- break;
- case 'e':
- Storage.push_back(0x1B);
- break;
- case ' ':
- Storage.push_back(0x20);
- break;
- case '"':
- Storage.push_back(0x22);
- break;
- case '/':
- Storage.push_back(0x2F);
- break;
- case '\\':
- Storage.push_back(0x5C);
- break;
- case 'N':
- encodeUTF8(0x85, Storage);
- break;
- case '_':
- encodeUTF8(0xA0, Storage);
+ case 'a':
+ Storage.push_back(0x07);
+ break;
+ case 'b':
+ Storage.push_back(0x08);
+ break;
+ case 't':
+ case 0x09:
+ Storage.push_back(0x09);
+ break;
+ case 'n':
+ Storage.push_back(0x0A);
+ break;
+ case 'v':
+ Storage.push_back(0x0B);
+ break;
+ case 'f':
+ Storage.push_back(0x0C);
+ break;
+ case 'r':
+ Storage.push_back(0x0D);
+ break;
+ case 'e':
+ Storage.push_back(0x1B);
+ break;
+ case ' ':
+ Storage.push_back(0x20);
+ break;
+ case '"':
+ Storage.push_back(0x22);
+ break;
+ case '/':
+ Storage.push_back(0x2F);
+ break;
+ case '\\':
+ Storage.push_back(0x5C);
+ break;
+ case 'N':
+ encodeUTF8(0x85, Storage);
+ break;
+ case '_':
+ encodeUTF8(0xA0, Storage);
+ break;
+ case 'L':
+ encodeUTF8(0x2028, Storage);
+ break;
+ case 'P':
+ encodeUTF8(0x2029, Storage);
+ break;
+ case 'x': {
+ if (UnquotedValue.size() < 3)
+ // TODO: Report error.
break;
- case 'L':
- encodeUTF8(0x2028, Storage);
+ unsigned int UnicodeScalarValue;
+ if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
+ encodeUTF8(UnicodeScalarValue, Storage);
+ return UnquotedValue.drop_front(3);
+ }
+ case 'u': {
+ if (UnquotedValue.size() < 5)
+ // TODO: Report error.
break;
- case 'P':
- encodeUTF8(0x2029, Storage);
+ unsigned int UnicodeScalarValue;
+ if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
+ encodeUTF8(UnicodeScalarValue, Storage);
+ return UnquotedValue.drop_front(5);
+ }
+ case 'U': {
+ if (UnquotedValue.size() < 9)
+ // TODO: Report error.
break;
- case 'x': {
- if (UnquotedValue.size() < 3)
- // TODO: Report error.
- break;
- unsigned int UnicodeScalarValue;
- if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
- // TODO: Report error.
- UnicodeScalarValue = 0xFFFD;
- encodeUTF8(UnicodeScalarValue, Storage);
- UnquotedValue = UnquotedValue.substr(2);
- break;
- }
- case 'u': {
- if (UnquotedValue.size() < 5)
- // TODO: Report error.
- break;
- unsigned int UnicodeScalarValue;
- if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
- // TODO: Report error.
- UnicodeScalarValue = 0xFFFD;
- encodeUTF8(UnicodeScalarValue, Storage);
- UnquotedValue = UnquotedValue.substr(4);
- break;
- }
- case 'U': {
- if (UnquotedValue.size() < 9)
- // TODO: Report error.
- break;
- unsigned int UnicodeScalarValue;
- if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
- // TODO: Report error.
- UnicodeScalarValue = 0xFFFD;
- encodeUTF8(UnicodeScalarValue, Storage);
- UnquotedValue = UnquotedValue.substr(8);
- break;
- }
- }
- UnquotedValue = UnquotedValue.substr(1);
+ unsigned int UnicodeScalarValue;
+ if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
+ encodeUTF8(UnicodeScalarValue, Storage);
+ return UnquotedValue.drop_front(9);
}
- }
- llvm::append_range(Storage, UnquotedValue);
- return StringRef(Storage.begin(), Storage.size());
+ }
+ return UnquotedValue.drop_front(1);
+ };
+
+ return parseScalarValue(UnquotedValue, Storage, "\\\r\n", UnescapeFunc);
+}
+
+StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue,
+ SmallVectorImpl<char> &Storage) {
+ assert(RawValue.size() >= 2 && RawValue.front() == '\'' &&
+ RawValue.back() == '\'');
+ StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
+
+ auto UnescapeFunc = [](StringRef UnquotedValue,
+ SmallVectorImpl<char> &Storage) {
+ assert(UnquotedValue.take_front(2) == "''");
+ Storage.push_back('\'');
+ return UnquotedValue.drop_front(2);
+ };
+
+ return parseScalarValue(UnquotedValue, Storage, "'\r\n", UnescapeFunc);
+}
+
+StringRef ScalarNode::getPlainValue(StringRef RawValue,
+ SmallVectorImpl<char> &Storage) {
+ // Trim trailing whitespace ('b-char' and 's-white').
+ // NOTE: Alternatively we could change the scanner to not include whitespace
+ // here in the first place.
+ RawValue = RawValue.rtrim("\r\n \t");
+ return parseScalarValue(RawValue, Storage, "\r\n", nullptr);
}
Node *KeyValueNode::getKey() {
diff --git a/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp
index f21b7a0ca699..4aaf59be2ce5 100644
--- a/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp
@@ -75,13 +75,6 @@ Input::~Input() = default;
std::error_code Input::error() { return EC; }
-// Pin the vtables to this file.
-void Input::HNode::anchor() {}
-void Input::EmptyHNode::anchor() {}
-void Input::ScalarHNode::anchor() {}
-void Input::MapHNode::anchor() {}
-void Input::SequenceHNode::anchor() {}
-
bool Input::outputting() const {
return false;
}
@@ -99,8 +92,9 @@ bool Input::setCurrentDocument() {
++DocIterator;
return setCurrentDocument();
}
+ releaseHNodeBuffers();
TopNode = createHNodes(N);
- CurrentNode = TopNode.get();
+ CurrentNode = TopNode;
return true;
}
return false;
@@ -162,6 +156,8 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
if (!CurrentNode) {
if (Required)
EC = make_error_code(errc::invalid_argument);
+ else
+ UseDefault = true;
return false;
}
@@ -174,7 +170,7 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
return false;
}
MN->ValidKeys.push_back(Key);
- HNode *Value = MN->Mapping[Key].first.get();
+ HNode *Value = MN->Mapping[Key].first;
if (!Value) {
if (Required)
setError(CurrentNode, Twine("missing required key '") + Key + "'");
@@ -237,7 +233,7 @@ bool Input::preflightElement(unsigned Index, void *&SaveInfo) {
return false;
if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
SaveInfo = CurrentNode;
- CurrentNode = SQ->Entries[Index].get();
+ CurrentNode = SQ->Entries[Index];
return true;
}
return false;
@@ -254,7 +250,7 @@ bool Input::preflightFlowElement(unsigned index, void *&SaveInfo) {
return false;
if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
SaveInfo = CurrentNode;
- CurrentNode = SQ->Entries[index].get();
+ CurrentNode = SQ->Entries[index];
return true;
}
return false;
@@ -313,7 +309,7 @@ bool Input::bitSetMatch(const char *Str, bool) {
if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
unsigned Index = 0;
for (auto &N : SQ->Entries) {
- if (ScalarHNode *SN = dyn_cast<ScalarHNode>(N.get())) {
+ if (ScalarHNode *SN = dyn_cast<ScalarHNode>(N)) {
if (SN->value().equals(Str)) {
BitValuesUsed[Index] = true;
return true;
@@ -336,7 +332,7 @@ void Input::endBitSetScalar() {
assert(BitValuesUsed.size() == SQ->Entries.size());
for (unsigned i = 0; i < SQ->Entries.size(); ++i) {
if (!BitValuesUsed[i]) {
- setError(SQ->Entries[i].get(), "unknown bit value");
+ setError(SQ->Entries[i], "unknown bit value");
return;
}
}
@@ -395,7 +391,14 @@ void Input::reportWarning(const SMRange &range, const Twine &message) {
Strm->printError(range, message, SourceMgr::DK_Warning);
}
-std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
+void Input::releaseHNodeBuffers() {
+ EmptyHNodeAllocator.DestroyAll();
+ ScalarHNodeAllocator.DestroyAll();
+ SequenceHNodeAllocator.DestroyAll();
+ MapHNodeAllocator.DestroyAll();
+}
+
+Input::HNode *Input::createHNodes(Node *N) {
SmallString<128> StringStorage;
switch (N->getType()) {
case Node::NK_Scalar: {
@@ -405,27 +408,27 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
// Copy string to permanent storage
KeyStr = StringStorage.str().copy(StringAllocator);
}
- return std::make_unique<ScalarHNode>(N, KeyStr);
+ return new (ScalarHNodeAllocator.Allocate()) ScalarHNode(N, KeyStr);
}
case Node::NK_BlockScalar: {
BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N);
StringRef ValueCopy = BSN->getValue().copy(StringAllocator);
- return std::make_unique<ScalarHNode>(N, ValueCopy);
+ return new (ScalarHNodeAllocator.Allocate()) ScalarHNode(N, ValueCopy);
}
case Node::NK_Sequence: {
SequenceNode *SQ = dyn_cast<SequenceNode>(N);
- auto SQHNode = std::make_unique<SequenceHNode>(N);
+ auto SQHNode = new (SequenceHNodeAllocator.Allocate()) SequenceHNode(N);
for (Node &SN : *SQ) {
auto Entry = createHNodes(&SN);
if (EC)
break;
- SQHNode->Entries.push_back(std::move(Entry));
+ SQHNode->Entries.push_back(Entry);
}
- return std::move(SQHNode);
+ return SQHNode;
}
case Node::NK_Mapping: {
MappingNode *Map = dyn_cast<MappingNode>(N);
- auto mapHNode = std::make_unique<MapHNode>(N);
+ auto mapHNode = new (MapHNodeAllocator.Allocate()) MapHNode(N);
for (KeyValueNode &KVN : *Map) {
Node *KeyNode = KVN.getKey();
ScalarNode *Key = dyn_cast_or_null<ScalarNode>(KeyNode);
@@ -457,7 +460,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
return std::move(mapHNode);
}
case Node::NK_Null:
- return std::make_unique<EmptyHNode>(N);
+ return new (EmptyHNodeAllocator.Allocate()) EmptyHNode(N);
default:
setError(N, "unknown node kind");
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
index a4fc605019c2..28ab85d4344c 100644
--- a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
@@ -13,8 +13,10 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Duration.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
@@ -23,11 +25,17 @@
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
+#include "llvm/Support/Threading.h"
#include <algorithm>
#include <cerrno>
#include <cstdio>
#include <sys/stat.h>
+#ifndef _WIN32
+#include <sys/socket.h>
+#include <sys/un.h>
+#endif // _WIN32
+
// <fcntl.h> may provide O_BINARY.
#if defined(HAVE_FCNTL_H)
# include <fcntl.h>
@@ -58,6 +66,13 @@
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/Windows/WindowsSupport.h"
+// winsock2.h must be included before afunix.h. Briefly turn off clang-format to
+// avoid error.
+// clang-format off
+#include <winsock2.h>
+#include <afunix.h>
+// clang-format on
+#include <io.h>
#endif
using namespace llvm;
@@ -644,7 +659,7 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered,
// Check if this is a console device. This is not equivalent to isatty.
IsWindowsConsole =
::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR;
-#endif
+#endif // _WIN32
// Get the starting position.
off_t loc = ::lseek(FD, 0, SEEK_CUR);
@@ -845,8 +860,7 @@ size_t raw_fd_ostream::preferred_buffer_size() const {
if (IsWindowsConsole)
return 0;
return raw_ostream::preferred_buffer_size();
-#elif !defined(__minix)
- // Minix has no st_blksize.
+#else
assert(FD >= 0 && "File not yet open!");
struct stat statbuf;
if (fstat(FD, &statbuf) != 0)
@@ -859,8 +873,6 @@ size_t raw_fd_ostream::preferred_buffer_size() const {
return 0;
// Return the preferred block size.
return statbuf.st_blksize;
-#else
- return raw_ostream::preferred_buffer_size();
#endif
}
@@ -898,6 +910,10 @@ void raw_fd_ostream::anchor() {}
raw_fd_ostream &llvm::outs() {
// Set buffer settings to model stdout behavior.
std::error_code EC;
+#ifdef __MVS__
+ EC = enableAutoConversion(STDOUT_FILENO);
+ assert(!EC);
+#endif
static raw_fd_ostream S("-", EC, sys::fs::OF_None);
assert(!EC);
return S;
@@ -905,6 +921,10 @@ raw_fd_ostream &llvm::outs() {
raw_fd_ostream &llvm::errs() {
// Set standard error to be unbuffered and tied to outs() by default.
+#ifdef __MVS__
+ std::error_code EC = enableAutoConversion(STDERR_FILENO);
+ assert(!EC);
+#endif
static raw_fd_ostream S(STDERR_FILENO, false, true);
return S;
}
@@ -931,6 +951,9 @@ raw_fd_stream::raw_fd_stream(StringRef Filename, std::error_code &EC)
EC = std::make_error_code(std::errc::invalid_argument);
}
+raw_fd_stream::raw_fd_stream(int fd, bool shouldClose)
+ : raw_fd_ostream(fd, shouldClose, false, OStreamKind::OK_FDStream) {}
+
ssize_t raw_fd_stream::read(char *Ptr, size_t Size) {
assert(get_fd() >= 0 && "File already closed.");
ssize_t Ret = ::read(get_fd(), (void *)Ptr, Size);
@@ -946,6 +969,145 @@ bool raw_fd_stream::classof(const raw_ostream *OS) {
}
//===----------------------------------------------------------------------===//
+// raw_socket_stream
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+WSABalancer::WSABalancer() {
+ WSADATA WsaData;
+ ::memset(&WsaData, 0, sizeof(WsaData));
+ if (WSAStartup(MAKEWORD(2, 2), &WsaData) != 0) {
+ llvm::report_fatal_error("WSAStartup failed");
+ }
+}
+
+WSABalancer::~WSABalancer() { WSACleanup(); }
+
+#endif // _WIN32
+
+static std::error_code getLastSocketErrorCode() {
+#ifdef _WIN32
+ return std::error_code(::WSAGetLastError(), std::system_category());
+#else
+ return std::error_code(errno, std::system_category());
+#endif
+}
+
+ListeningSocket::ListeningSocket(int SocketFD, StringRef SocketPath)
+ : FD(SocketFD), SocketPath(SocketPath) {}
+
+ListeningSocket::ListeningSocket(ListeningSocket &&LS)
+ : FD(LS.FD), SocketPath(LS.SocketPath) {
+ LS.FD = -1;
+}
+
+Expected<ListeningSocket> ListeningSocket::createUnix(StringRef SocketPath,
+ int MaxBacklog) {
+
+#ifdef _WIN32
+ WSABalancer _;
+ SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (MaybeWinsocket == INVALID_SOCKET) {
+#else
+ int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (MaybeWinsocket == -1) {
+#endif
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "socket create failed");
+ }
+
+ struct sockaddr_un Addr;
+ memset(&Addr, 0, sizeof(Addr));
+ Addr.sun_family = AF_UNIX;
+ strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1);
+
+ if (bind(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr)) == -1) {
+ std::error_code Err = getLastSocketErrorCode();
+ if (Err == std::errc::address_in_use)
+ ::close(MaybeWinsocket);
+ return llvm::make_error<StringError>(Err, "Bind error");
+ }
+ if (listen(MaybeWinsocket, MaxBacklog) == -1) {
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "Listen error");
+ }
+ int UnixSocket;
+#ifdef _WIN32
+ UnixSocket = _open_osfhandle(MaybeWinsocket, 0);
+#else
+ UnixSocket = MaybeWinsocket;
+#endif // _WIN32
+ return ListeningSocket{UnixSocket, SocketPath};
+}
+
+Expected<std::unique_ptr<raw_socket_stream>> ListeningSocket::accept() {
+ int AcceptFD;
+#ifdef _WIN32
+ SOCKET WinServerSock = _get_osfhandle(FD);
+ SOCKET WinAcceptSock = ::accept(WinServerSock, NULL, NULL);
+ AcceptFD = _open_osfhandle(WinAcceptSock, 0);
+#else
+ AcceptFD = ::accept(FD, NULL, NULL);
+#endif //_WIN32
+ if (AcceptFD == -1)
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "Accept failed");
+ return std::make_unique<raw_socket_stream>(AcceptFD);
+}
+
+ListeningSocket::~ListeningSocket() {
+ if (FD == -1)
+ return;
+ ::close(FD);
+ unlink(SocketPath.c_str());
+}
+
+static Expected<int> GetSocketFD(StringRef SocketPath) {
+#ifdef _WIN32
+ SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (MaybeWinsocket == INVALID_SOCKET) {
+#else
+ int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (MaybeWinsocket == -1) {
+#endif // _WIN32
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "Create socket failed");
+ }
+
+ struct sockaddr_un Addr;
+ memset(&Addr, 0, sizeof(Addr));
+ Addr.sun_family = AF_UNIX;
+ strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1);
+
+ int status = connect(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr));
+ if (status == -1) {
+ return llvm::make_error<StringError>(getLastSocketErrorCode(),
+ "Connect socket failed");
+ }
+#ifdef _WIN32
+ return _open_osfhandle(MaybeWinsocket, 0);
+#else
+ return MaybeWinsocket;
+#endif // _WIN32
+}
+
+raw_socket_stream::raw_socket_stream(int SocketFD)
+ : raw_fd_stream(SocketFD, true) {}
+
+Expected<std::unique_ptr<raw_socket_stream>>
+raw_socket_stream::createConnectedUnix(StringRef SocketPath) {
+#ifdef _WIN32
+ WSABalancer _;
+#endif // _WIN32
+ Expected<int> FD = GetSocketFD(SocketPath);
+ if (!FD)
+ return FD.takeError();
+ return std::make_unique<raw_socket_stream>(*FD);
+}
+
+raw_socket_stream::~raw_socket_stream() {}
+
+//===----------------------------------------------------------------------===//
// raw_string_ostream
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Support/regcomp.c b/contrib/llvm-project/llvm/lib/Support/regcomp.c
index 4e9082cec456..990aef32a396 100644
--- a/contrib/llvm-project/llvm/lib/Support/regcomp.c
+++ b/contrib/llvm-project/llvm/lib/Support/regcomp.c
@@ -190,8 +190,8 @@ static struct cname {
* other clumsinesses
*/
struct parse {
- char *next; /* next character in RE */
- char *end; /* end of string (-> NUL normally) */
+ const char *next; /* next character in RE */
+ const char *end; /* end of string (-> NUL normally) */
int error; /* has an error been seen? */
sop *strip; /* malloced strip */
sopno ssize; /* malloced strip size (allocated) */
@@ -329,15 +329,7 @@ llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags)
/* set things up */
p->g = g;
- /* suppress warning from the following explicit cast. */
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wcast-qual"
-#endif /* __GNUC__ */
- p->next = (char *)pattern; /* convenience; we do not modify it */
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif /* __GNUC__ */
+ p->next = pattern;
p->end = p->next + len;
p->error = 0;
p->ncsalloc = 0;
@@ -948,7 +940,7 @@ p_b_term(struct parse *p, cset *cs)
static void
p_b_cclass(struct parse *p, cset *cs)
{
- char *sp = p->next;
+ const char *sp = p->next;
struct cclass *cp;
size_t len;
const char *u;
@@ -1012,7 +1004,7 @@ static char /* value of collating element */
p_b_coll_elem(struct parse *p,
int endc) /* name ended by endc,']' */
{
- char *sp = p->next;
+ const char *sp = p->next;
struct cname *cp;
size_t len;
@@ -1056,8 +1048,8 @@ othercase(int ch)
static void
bothcases(struct parse *p, int ch)
{
- char *oldnext = p->next;
- char *oldend = p->end;
+ const char *oldnext = p->next;
+ const char *oldend = p->end;
char bracket[3];
ch = (uch)ch;
@@ -1098,16 +1090,12 @@ ordinary(struct parse *p, int ch)
static void
nonnewline(struct parse *p)
{
- char *oldnext = p->next;
- char *oldend = p->end;
- char bracket[4];
+ const char *oldnext = p->next;
+ const char *oldend = p->end;
+ static const char bracket[4] = {'^', '\n', ']', '\0'};
p->next = bracket;
p->end = bracket+3;
- bracket[0] = '^';
- bracket[1] = '\n';
- bracket[2] = ']';
- bracket[3] = '\0';
p_bracket(p);
assert(p->next == bracket+3);
p->next = oldnext;
diff --git a/contrib/llvm-project/llvm/lib/TableGen/Error.cpp b/contrib/llvm-project/llvm/lib/TableGen/Error.cpp
index ebe9129ebaeb..dabb265ef80c 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/Error.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/Error.cpp
@@ -170,4 +170,11 @@ void CheckAssert(SMLoc Loc, Init *Condition, Init *Message) {
}
}
+// Dump a message to stderr.
+void dumpMessage(SMLoc Loc, Init *Message) {
+ auto *MessageInit = dyn_cast<StringInit>(Message);
+ assert(MessageInit && "no debug message to print");
+ PrintNote(Loc, MessageInit->getValue());
+}
+
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp b/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp
index a9dc30d7f59b..2a3f522a9c0e 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/TableGen/Main.cpp b/contrib/llvm-project/llvm/lib/TableGen/Main.cpp
index 9aee1f8fecd2..c6e2e6c89fd2 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/Main.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/Main.cpp
@@ -34,7 +34,6 @@
#include <string>
#include <system_error>
#include <utility>
-#include <vector>
using namespace llvm;
static cl::opt<std::string>
diff --git a/contrib/llvm-project/llvm/lib/TableGen/Record.cpp b/contrib/llvm-project/llvm/lib/TableGen/Record.cpp
index 20db470855a1..aa981fdab4b3 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/Record.cpp
@@ -797,6 +797,25 @@ void UnOpInit::Profile(FoldingSetNodeID &ID) const {
Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
RecordKeeper &RK = getRecordKeeper();
switch (getOpcode()) {
+ case REPR:
+ if (LHS->isConcrete()) {
+ // If it is a Record, print the full content.
+ if (const auto *Def = dyn_cast<DefInit>(LHS)) {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << *Def->getDef();
+ OS.flush();
+ return StringInit::get(RK, S);
+ } else {
+ // Otherwise, print the value of the variable.
+ //
+ // NOTE: we could recursively !repr the elements of a list,
+ // but that could produce a lot of output when printing a
+ // defset.
+ return StringInit::get(RK, LHS->getAsString());
+ }
+ }
+ break;
case TOLOWER:
if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
return StringInit::get(RK, LHSs->getValue().lower());
@@ -957,6 +976,9 @@ std::string UnOpInit::getAsString() const {
case EMPTY: Result = "!empty"; break;
case GETDAGOP: Result = "!getdagop"; break;
case LOG2 : Result = "!logtwo"; break;
+ case REPR:
+ Result = "!repr";
+ break;
case TOLOWER:
Result = "!tolower";
break;
@@ -1287,7 +1309,6 @@ Init *BinOpInit::Fold(Record *CurRec) const {
}
return ListInit::get(Args, TheList->getElementType());
}
- case RANGE:
case RANGEC: {
auto *LHSi = dyn_cast<IntInit>(LHS);
auto *RHSi = dyn_cast<IntInit>(RHS);
@@ -1487,8 +1508,9 @@ std::string BinOpInit::getAsString() const {
case GT: Result = "!gt"; break;
case LISTCONCAT: Result = "!listconcat"; break;
case LISTSPLAT: Result = "!listsplat"; break;
- case LISTREMOVE: Result = "!listremove"; break;
- case RANGE: Result = "!range"; break;
+ case LISTREMOVE:
+ Result = "!listremove";
+ break;
case STRCONCAT: Result = "!strconcat"; break;
case INTERLEAVE: Result = "!interleave"; break;
case SETDAGOP: Result = "!setdagop"; break;
@@ -1704,6 +1726,34 @@ Init *TernOpInit::Fold(Record *CurRec) const {
break;
}
+ case RANGE: {
+ auto *LHSi = dyn_cast<IntInit>(LHS);
+ auto *MHSi = dyn_cast<IntInit>(MHS);
+ auto *RHSi = dyn_cast<IntInit>(RHS);
+ if (!LHSi || !MHSi || !RHSi)
+ break;
+
+ auto Start = LHSi->getValue();
+ auto End = MHSi->getValue();
+ auto Step = RHSi->getValue();
+ if (Step == 0)
+ PrintError(CurRec->getLoc(), "Step of !range can't be 0");
+
+ SmallVector<Init *, 8> Args;
+ if (Start < End && Step > 0) {
+ Args.reserve((End - Start) / Step);
+ for (auto I = Start; I < End; I += Step)
+ Args.push_back(IntInit::get(getRecordKeeper(), I));
+ } else if (Start > End && Step < 0) {
+ Args.reserve((Start - End) / -Step);
+ for (auto I = Start; I > End; I += Step)
+ Args.push_back(IntInit::get(getRecordKeeper(), I));
+ } else {
+ // Empty set
+ }
+ return ListInit::get(Args, LHSi->getType());
+ }
+
case SUBSTR: {
StringInit *LHSs = dyn_cast<StringInit>(LHS);
IntInit *MHSi = dyn_cast<IntInit>(MHS);
@@ -1823,6 +1873,9 @@ std::string TernOpInit::getAsString() const {
case FILTER: Result = "!filter"; UnquotedLHS = true; break;
case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
case IF: Result = "!if"; break;
+ case RANGE:
+ Result = "!range";
+ break;
case SUBST: Result = "!subst"; break;
case SUBSTR: Result = "!substr"; break;
case FIND: Result = "!find"; break;
@@ -2210,9 +2263,9 @@ void VarDefInit::Profile(FoldingSetNodeID &ID) const {
DefInit *VarDefInit::instantiate() {
if (!Def) {
RecordKeeper &Records = Class->getRecords();
- auto NewRecOwner = std::make_unique<Record>(Records.getNewAnonymousName(),
- Class->getLoc(), Records,
- /*IsAnonymous=*/true);
+ auto NewRecOwner =
+ std::make_unique<Record>(Records.getNewAnonymousName(), Class->getLoc(),
+ Records, Record::RK_AnonymousDef);
Record *NewRec = NewRecOwner.get();
// Copy values from class to instance
@@ -2222,6 +2275,9 @@ DefInit *VarDefInit::instantiate() {
// Copy assertions from class to instance.
NewRec->appendAssertions(Class);
+ // Copy dumps from class to instance.
+ NewRec->appendDumps(Class);
+
// Substitute and resolve template arguments
ArrayRef<Init *> TArgs = Class->getTemplateArgs();
MapResolver R(NewRec);
@@ -2256,6 +2312,9 @@ DefInit *VarDefInit::instantiate() {
// Check the assertions.
NewRec->checkRecordAssertions();
+ // Check the assertions.
+ NewRec->emitRecordDumps();
+
Def = DefInit::get(NewRec);
}
@@ -2813,6 +2872,11 @@ void Record::resolveReferences(Resolver &R, const RecordVal *SkipVal) {
Value = Assertion.Message->resolveReferences(R);
Assertion.Message = Value;
}
+ // Resolve the dump expressions.
+ for (auto &Dump : Dumps) {
+ Init *Value = Dump.Message->resolveReferences(R);
+ Dump.Message = Value;
+ }
}
void Record::resolveReferences(Init *NewName) {
@@ -3069,6 +3133,16 @@ void Record::checkRecordAssertions() {
}
}
+void Record::emitRecordDumps() {
+ RecordResolver R(*this);
+ R.setFinal(true);
+
+ for (const auto &Dump : getDumps()) {
+ Init *Message = Dump.Message->resolveReferences(R);
+ dumpMessage(Dump.Loc, Message);
+ }
+}
+
// Report a warning if the record has unused template arguments.
void Record::checkUnusedTemplateArgs() {
for (const Init *TA : getTemplateArgs()) {
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
index 98f0e8c1149c..c811a67d930d 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
@@ -346,31 +346,32 @@ tgtok::TokKind TGLexer::LexIdentifier() {
StringRef Str(IdentStart, CurPtr-IdentStart);
tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
- .Case("int", tgtok::Int)
- .Case("bit", tgtok::Bit)
- .Case("bits", tgtok::Bits)
- .Case("string", tgtok::String)
- .Case("list", tgtok::List)
- .Case("code", tgtok::Code)
- .Case("dag", tgtok::Dag)
- .Case("class", tgtok::Class)
- .Case("def", tgtok::Def)
- .Case("true", tgtok::TrueVal)
- .Case("false", tgtok::FalseVal)
- .Case("foreach", tgtok::Foreach)
- .Case("defm", tgtok::Defm)
- .Case("defset", tgtok::Defset)
- .Case("multiclass", tgtok::MultiClass)
- .Case("field", tgtok::Field)
- .Case("let", tgtok::Let)
- .Case("in", tgtok::In)
- .Case("defvar", tgtok::Defvar)
- .Case("include", tgtok::Include)
- .Case("if", tgtok::If)
- .Case("then", tgtok::Then)
- .Case("else", tgtok::ElseKW)
- .Case("assert", tgtok::Assert)
- .Default(tgtok::Id);
+ .Case("int", tgtok::Int)
+ .Case("bit", tgtok::Bit)
+ .Case("bits", tgtok::Bits)
+ .Case("string", tgtok::String)
+ .Case("list", tgtok::List)
+ .Case("code", tgtok::Code)
+ .Case("dag", tgtok::Dag)
+ .Case("class", tgtok::Class)
+ .Case("def", tgtok::Def)
+ .Case("true", tgtok::TrueVal)
+ .Case("false", tgtok::FalseVal)
+ .Case("foreach", tgtok::Foreach)
+ .Case("defm", tgtok::Defm)
+ .Case("defset", tgtok::Defset)
+ .Case("multiclass", tgtok::MultiClass)
+ .Case("field", tgtok::Field)
+ .Case("let", tgtok::Let)
+ .Case("in", tgtok::In)
+ .Case("defvar", tgtok::Defvar)
+ .Case("include", tgtok::Include)
+ .Case("if", tgtok::If)
+ .Case("then", tgtok::Then)
+ .Case("else", tgtok::ElseKW)
+ .Case("assert", tgtok::Assert)
+ .Case("dump", tgtok::Dump)
+ .Default(tgtok::Id);
// A couple of tokens require special processing.
switch (Kind) {
@@ -462,56 +463,62 @@ bool TGLexer::SkipCComment() {
/// 0x[0-9a-fA-F]+
/// 0b[01]+
tgtok::TokKind TGLexer::LexNumber() {
+ unsigned Base = 0;
+ const char *NumStart;
+
+ // Check if it's a hex or a binary value.
if (CurPtr[-1] == '0') {
+ NumStart = CurPtr + 1;
if (CurPtr[0] == 'x') {
- ++CurPtr;
- const char *NumStart = CurPtr;
- while (isxdigit(CurPtr[0]))
+ Base = 16;
+ do
++CurPtr;
-
- // Requires at least one hex digit.
- if (CurPtr == NumStart)
- return ReturnError(TokStart, "Invalid hexadecimal number");
-
- errno = 0;
- CurIntVal = strtoll(NumStart, nullptr, 16);
- if (errno == EINVAL)
- return ReturnError(TokStart, "Invalid hexadecimal number");
- if (errno == ERANGE) {
- errno = 0;
- CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
- if (errno == EINVAL)
- return ReturnError(TokStart, "Invalid hexadecimal number");
- if (errno == ERANGE)
- return ReturnError(TokStart, "Hexadecimal number out of range");
- }
- return tgtok::IntVal;
+ while (isxdigit(CurPtr[0]));
} else if (CurPtr[0] == 'b') {
- ++CurPtr;
- const char *NumStart = CurPtr;
- while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ Base = 2;
+ do
++CurPtr;
-
- // Requires at least one binary digit.
- if (CurPtr == NumStart)
- return ReturnError(CurPtr-2, "Invalid binary number");
- CurIntVal = strtoll(NumStart, nullptr, 2);
- return tgtok::BinaryIntVal;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1');
}
}
- // Check for a sign without a digit.
- if (!isdigit(CurPtr[0])) {
- if (CurPtr[-1] == '-')
- return tgtok::minus;
- else if (CurPtr[-1] == '+')
- return tgtok::plus;
+ // For a hex or binary value, we always convert it to an unsigned value.
+ bool IsMinus = false;
+
+ // Check if it's a decimal value.
+ if (Base == 0) {
+ // Check for a sign without a digit.
+ if (!isdigit(CurPtr[0])) {
+ if (CurPtr[-1] == '-')
+ return tgtok::minus;
+ else if (CurPtr[-1] == '+')
+ return tgtok::plus;
+ }
+
+ Base = 10;
+ NumStart = TokStart;
+ IsMinus = CurPtr[-1] == '-';
+
+ while (isdigit(CurPtr[0]))
+ ++CurPtr;
}
- while (isdigit(CurPtr[0]))
- ++CurPtr;
- CurIntVal = strtoll(TokStart, nullptr, 10);
- return tgtok::IntVal;
+ // Requires at least one digit.
+ if (CurPtr == NumStart)
+ return ReturnError(TokStart, "Invalid number");
+
+ errno = 0;
+ if (IsMinus)
+ CurIntVal = strtoll(NumStart, nullptr, Base);
+ else
+ CurIntVal = strtoull(NumStart, nullptr, Base);
+
+ if (errno == EINVAL)
+ return ReturnError(TokStart, "Invalid number");
+ if (errno == ERANGE)
+ return ReturnError(TokStart, "Number out of range");
+
+ return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal;
}
/// LexBracket - We just read '['. If this is a code block, return it,
@@ -599,6 +606,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("exists", tgtok::XExists)
.Case("tolower", tgtok::XToLower)
.Case("toupper", tgtok::XToUpper)
+ .Case("repr", tgtok::XRepr)
.Default(tgtok::Error);
return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
@@ -717,16 +725,15 @@ tgtok::TokKind TGLexer::lexPreprocessor(
bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
- // Canonicalize ifndef to ifdef equivalent
- if (Kind == tgtok::Ifndef) {
+ // Canonicalize ifndef's MacroIsDefined to its ifdef equivalent.
+ if (Kind == tgtok::Ifndef)
MacroIsDefined = !MacroIsDefined;
- Kind = tgtok::Ifdef;
- }
// Regardless of whether we are processing tokens or not,
// we put the #ifdef control on stack.
+ // Note that MacroIsDefined has been canonicalized against ifdef.
PrepIncludeStack.back()->push_back(
- {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
+ {tgtok::Ifdef, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
if (!prepSkipDirectiveEnd())
return ReturnError(CurPtr, "Only comments are supported after " +
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
index c9bba98971d0..2e2aa59f3440 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
@@ -54,35 +54,60 @@ enum TokKind {
paste, // #
dotdotdot, // ...
+ // Boolean literals.
+ TrueVal,
+ FalseVal,
+
+ // Integer value.
+ IntVal,
+
+ // Binary constant. Note that these are sized according to the number of
+ // bits given.
+ BinaryIntVal,
+
+ // Preprocessing tokens for internal usage by the lexer.
+ // They are never returned as a result of Lex().
+ Ifdef,
+ Ifndef,
+ Else,
+ Endif,
+ Define,
+
// Reserved keywords. ('ElseKW' is named to distinguish it from the
// existing 'Else' that means the preprocessor #else.)
- Assert,
Bit,
Bits,
- Class,
Code,
Dag,
- Def,
- Defm,
- Defset,
- Defvar,
ElseKW,
FalseKW,
Field,
- Foreach,
- If,
In,
Include,
Int,
- Let,
List,
- MultiClass,
String,
Then,
TrueKW,
+ // Object start tokens.
+ OBJECT_START_FIRST,
+ Assert = OBJECT_START_FIRST,
+ Class,
+ Def,
+ Defm,
+ Defset,
+ Defvar,
+ Dump,
+ Foreach,
+ If,
+ Let,
+ MultiClass,
+ OBJECT_START_LAST = MultiClass,
+
// Bang operators.
- XConcat,
+ BANG_OPERATOR_FIRST,
+ XConcat = BANG_OPERATOR_FIRST,
XADD,
XSUB,
XMUL,
@@ -131,33 +156,33 @@ enum TokKind {
XGetDagName,
XSetDagArg,
XSetDagName,
-
- // Boolean literals.
- TrueVal,
- FalseVal,
-
- // Integer value.
- IntVal,
-
- // Binary constant. Note that these are sized according to the number of
- // bits given.
- BinaryIntVal,
+ XRepr,
+ BANG_OPERATOR_LAST = XRepr,
// String valued tokens.
- Id,
+ STRING_VALUE_FIRST,
+ Id = STRING_VALUE_FIRST,
StrVal,
VarName,
CodeFragment,
-
- // Preprocessing tokens for internal usage by the lexer.
- // They are never returned as a result of Lex().
- Ifdef,
- Ifndef,
- Else,
- Endif,
- Define
+ STRING_VALUE_LAST = CodeFragment,
};
+
+/// isBangOperator - Return true if this is a bang operator.
+static inline bool isBangOperator(tgtok::TokKind Kind) {
+ return tgtok::BANG_OPERATOR_FIRST <= Kind && Kind <= BANG_OPERATOR_LAST;
+}
+
+/// isObjectStart - Return true if this is a valid first token for a statement.
+static inline bool isObjectStart(tgtok::TokKind Kind) {
+ return tgtok::OBJECT_START_FIRST <= Kind && Kind <= OBJECT_START_LAST;
+}
+
+/// isStringValue - Return true if this is a string value.
+static inline bool isStringValue(tgtok::TokKind Kind) {
+ return tgtok::STRING_VALUE_FIRST <= Kind && Kind <= STRING_VALUE_LAST;
}
+} // namespace tgtok
/// TGLexer - TableGen Lexer class.
class TGLexer {
@@ -197,8 +222,7 @@ public:
tgtok::TokKind getCode() const { return CurCode; }
const std::string &getCurStrVal() const {
- assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
- CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
+ assert(tgtok::isStringValue(CurCode) &&
"This token doesn't have a string value");
return CurStrVal;
}
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp
index a371bd21f026..e7dcb91ba20a 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "TGParser.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
@@ -35,20 +34,20 @@ namespace llvm {
struct SubClassReference {
SMRange RefRange;
- Record *Rec;
+ Record *Rec = nullptr;
SmallVector<ArgumentInit *, 4> TemplateArgs;
- SubClassReference() : Rec(nullptr) {}
+ SubClassReference() = default;
bool isInvalid() const { return Rec == nullptr; }
};
struct SubMultiClassReference {
SMRange RefRange;
- MultiClass *MC;
+ MultiClass *MC = nullptr;
SmallVector<ArgumentInit *, 4> TemplateArgs;
- SubMultiClassReference() : MC(nullptr) {}
+ SubMultiClassReference() = default;
bool isInvalid() const { return MC == nullptr; }
void dump() const;
@@ -111,11 +110,11 @@ static void checkConcrete(Record &R) {
/// Return an Init with a qualifier prefix referring
/// to CurRec's name.
-static Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, Init *Name,
- StringRef Scoper) {
+static Init *QualifyName(Record &CurRec, Init *Name) {
RecordKeeper &RK = CurRec.getRecords();
- Init *NewName = BinOpInit::getStrConcat(CurRec.getNameInit(),
- StringInit::get(RK, Scoper));
+ Init *NewName = BinOpInit::getStrConcat(
+ CurRec.getNameInit(),
+ StringInit::get(RK, CurRec.isMultiClass() ? "::" : ":"));
NewName = BinOpInit::getStrConcat(NewName, Name);
if (BinOpInit *BinOp = dyn_cast<BinOpInit>(NewName))
@@ -123,18 +122,20 @@ static Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, Init *Name,
return NewName;
}
+static Init *QualifyName(MultiClass *MC, Init *Name) {
+ return QualifyName(MC->Rec, Name);
+}
+
/// Return the qualified version of the implicit 'NAME' template argument.
-static Init *QualifiedNameOfImplicitName(Record &Rec,
- MultiClass *MC = nullptr) {
- return QualifyName(Rec, MC, StringInit::get(Rec.getRecords(), "NAME"),
- MC ? "::" : ":");
+static Init *QualifiedNameOfImplicitName(Record &Rec) {
+ return QualifyName(Rec, StringInit::get(Rec.getRecords(), "NAME"));
}
static Init *QualifiedNameOfImplicitName(MultiClass *MC) {
- return QualifiedNameOfImplicitName(MC->Rec, MC);
+ return QualifiedNameOfImplicitName(MC->Rec);
}
-Init *TGVarScope::getVar(RecordKeeper &Records, MultiClass* ParsingMultiClass,
+Init *TGVarScope::getVar(RecordKeeper &Records, MultiClass *ParsingMultiClass,
StringInit *Name, SMRange NameLoc,
bool TrackReferenceLocs) const {
// First, we search in local variables.
@@ -142,11 +143,10 @@ Init *TGVarScope::getVar(RecordKeeper &Records, MultiClass* ParsingMultiClass,
if (It != Vars.end())
return It->second;
- std::function<Init *(Record *, StringInit *, StringRef)> FindValueInArgs =
- [&](Record *Rec, StringInit *Name, StringRef Scoper) -> Init * {
+ auto FindValueInArgs = [&](Record *Rec, StringInit *Name) -> Init * {
if (!Rec)
return nullptr;
- Init *ArgName = QualifyName(*Rec, ParsingMultiClass, Name, Scoper);
+ Init *ArgName = QualifyName(*Rec, Name);
if (Rec->isTemplateArg(ArgName)) {
RecordVal *RV = Rec->getValue(ArgName);
assert(RV && "Template arg doesn't exist??");
@@ -176,7 +176,7 @@ Init *TGVarScope::getVar(RecordKeeper &Records, MultiClass* ParsingMultiClass,
// The variable is a class template argument?
if (CurRec->isClass())
- if (auto *V = FindValueInArgs(CurRec, Name, ":"))
+ if (auto *V = FindValueInArgs(CurRec, Name))
return V;
}
break;
@@ -193,7 +193,7 @@ Init *TGVarScope::getVar(RecordKeeper &Records, MultiClass* ParsingMultiClass,
case SK_MultiClass: {
// The variable is a multiclass template argument?
if (CurMultiClass)
- if (auto *V = FindValueInArgs(&CurMultiClass->Rec, Name, "::"))
+ if (auto *V = FindValueInArgs(&CurMultiClass->Rec, Name))
return V;
break;
}
@@ -313,6 +313,9 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
// Copy the subclass record's assertions to the new record.
CurRec->appendAssertions(SC);
+ // Copy the subclass record's dumps to the new record.
+ CurRec->appendDumps(SC);
+
Init *Name;
if (CurRec->isClass())
Name = VarInit::get(QualifiedNameOfImplicitName(*CurRec),
@@ -376,7 +379,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
/// Add a record, foreach loop, or assertion to the current context.
bool TGParser::addEntry(RecordsEntry E) {
- assert((!!E.Rec + !!E.Loop + !!E.Assertion) == 1 &&
+ assert((!!E.Rec + !!E.Loop + !!E.Assertion + !!E.Dump) == 1 &&
"RecordsEntry has invalid number of items");
// If we are parsing a loop, add it to the loop's entries.
@@ -404,6 +407,11 @@ bool TGParser::addEntry(RecordsEntry E) {
return false;
}
+ if (E.Dump) {
+ dumpMessage(E.Dump->Loc, E.Dump->Message);
+ return false;
+ }
+
// It must be a record, so finish it off.
return addDefOne(std::move(E.Rec));
}
@@ -498,6 +506,18 @@ bool TGParser::resolve(const std::vector<RecordsEntry> &Source,
else
CheckAssert(E.Assertion->Loc, Condition, Message);
+ } else if (E.Dump) {
+ MapResolver R;
+ for (const auto &S : Substs)
+ R.set(S.first, S.second);
+ Init *Message = E.Dump->Message->resolveReferences(R);
+
+ if (Dest)
+ Dest->push_back(
+ std::make_unique<Record::DumpInfo>(E.Dump->Loc, Message));
+ else
+ dumpMessage(E.Dump->Loc, Message);
+
} else {
auto Rec = std::make_unique<Record>(*E.Rec);
if (Loc)
@@ -545,6 +565,9 @@ bool TGParser::addDefOne(std::unique_ptr<Record> Rec) {
// Check the assertions.
Rec->checkRecordAssertions();
+ // Run the dumps.
+ Rec->emitRecordDumps();
+
// If ObjectBody has template arguments, it's an error.
assert(Rec->getTemplateArgs().empty() && "How'd this get template args?");
@@ -586,7 +609,7 @@ bool TGParser::resolveArguments(Record *Rec, ArrayRef<ArgumentInit *> ArgValues,
ArgName->getAsUnquotedString() + "' once");
ArgValueHandler(ArgName, ArgValue);
- llvm::erase_value(UnsolvedArgNames, ArgName);
+ llvm::erase(UnsolvedArgNames, ArgName);
}
// For unsolved arguments, if there is no default value, complain.
@@ -630,14 +653,6 @@ bool TGParser::resolveArgumentsOfMultiClass(SubstStack &Substs, MultiClass *MC,
// Parser Code
//===----------------------------------------------------------------------===//
-/// isObjectStart - Return true if this is a valid first token for a statement.
-static bool isObjectStart(tgtok::TokKind K) {
- return K == tgtok::Assert || K == tgtok::Class || K == tgtok::Def ||
- K == tgtok::Defm || K == tgtok::Defset || K == tgtok::Defvar ||
- K == tgtok::Foreach || K == tgtok::If || K == tgtok::Let ||
- K == tgtok::MultiClass;
-}
-
bool TGParser::consume(tgtok::TokKind K) {
if (Lex.getCode() == K) {
Lex.Lex();
@@ -756,8 +771,7 @@ ParseSubClassReference(Record *CurRec, bool isDefm) {
return Result;
}
- if (ParseTemplateArgValueList(Result.TemplateArgs, CurRec, Result.Rec,
- isDefm)) {
+ if (ParseTemplateArgValueList(Result.TemplateArgs, CurRec, Result.Rec)) {
Result.Rec = nullptr; // Error parsing value list.
return Result;
}
@@ -794,7 +808,7 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
}
if (ParseTemplateArgValueList(Result.TemplateArgs, &CurMC->Rec,
- &Result.MC->Rec, true)) {
+ &Result.MC->Rec)) {
Result.MC = nullptr; // Error parsing value list.
return Result;
}
@@ -1176,6 +1190,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XSize:
case tgtok::XEmpty:
case tgtok::XCast:
+ case tgtok::XRepr:
case tgtok::XGetDagOp: { // Value ::= !unop '(' Value ')'
UnOpInit::UnaryOp Code;
RecTy *Type = nullptr;
@@ -1194,6 +1209,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
}
break;
+ case tgtok::XRepr:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::REPR;
+ Type = StringRecTy::get(Records);
+ break;
case tgtok::XToLower:
Lex.Lex(); // eat the operation
Code = UnOpInit::TOLOWER;
@@ -1416,7 +1436,6 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XListConcat:
case tgtok::XListSplat:
case tgtok::XListRemove:
- case tgtok::XRange:
case tgtok::XStrConcat:
case tgtok::XInterleave:
case tgtok::XGetDagArg:
@@ -1448,8 +1467,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XGt: Code = BinOpInit::GT; break;
case tgtok::XListConcat: Code = BinOpInit::LISTCONCAT; break;
case tgtok::XListSplat: Code = BinOpInit::LISTSPLAT; break;
- case tgtok::XListRemove: Code = BinOpInit::LISTREMOVE; break;
- case tgtok::XRange: Code = BinOpInit::RANGE; break;
+ case tgtok::XListRemove:
+ Code = BinOpInit::LISTREMOVE;
+ break;
case tgtok::XStrConcat: Code = BinOpInit::STRCONCAT; break;
case tgtok::XInterleave: Code = BinOpInit::INTERLEAVE; break;
case tgtok::XSetDagOp: Code = BinOpInit::SETDAGOP; break;
@@ -1516,10 +1536,6 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
// We don't know the list type until we parse the first argument.
ArgType = ItemType;
break;
- case tgtok::XRange:
- Type = IntRecTy::get(Records)->getListTy();
- // ArgType may be either Int or List.
- break;
case tgtok::XStrConcat:
Type = StringRecTy::get(Records);
ArgType = StringRecTy::get(Records);
@@ -1604,27 +1620,6 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return nullptr;
}
break;
- case BinOpInit::RANGE:
- if (InitList.size() == 1) {
- if (isa<ListRecTy>(ArgType)) {
- ArgType = nullptr; // Detect error if 2nd arg were present.
- } else if (isa<IntRecTy>(ArgType)) {
- // Assume 2nd arg should be IntRecTy
- } else {
- Error(InitLoc,
- Twine("expected list or int, got value of type '") +
- ArgType->getAsString() + "'");
- return nullptr;
- }
- } else {
- // Don't come here unless 1st arg is ListRecTy.
- assert(isa<ListRecTy>(cast<TypedInit>(InitList[0])->getType()));
- Error(InitLoc,
- Twine("expected one list, got extra value of type '") +
- ArgType->getAsString() + "'");
- return nullptr;
- }
- break;
case BinOpInit::EQ:
case BinOpInit::NE:
if (!ArgType->typeIsConvertibleTo(IntRecTy::get(Records)) &&
@@ -1734,37 +1729,6 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
if (Code == BinOpInit::LISTREMOVE)
Type = ArgType;
- if (Code == BinOpInit::RANGE) {
- Init *LHS, *RHS;
- auto ArgCount = InitList.size();
- assert(ArgCount >= 1);
- auto *Arg0 = cast<TypedInit>(InitList[0]);
- auto *Arg0Ty = Arg0->getType();
- if (ArgCount == 1) {
- if (isa<ListRecTy>(Arg0Ty)) {
- // (0, !size(arg))
- LHS = IntInit::get(Records, 0);
- RHS = UnOpInit::get(UnOpInit::SIZE, Arg0, IntRecTy::get(Records))
- ->Fold(CurRec);
- } else {
- assert(isa<IntRecTy>(Arg0Ty));
- // (0, arg)
- LHS = IntInit::get(Records, 0);
- RHS = Arg0;
- }
- } else if (ArgCount == 2) {
- assert(isa<IntRecTy>(Arg0Ty));
- auto *Arg1 = cast<TypedInit>(InitList[1]);
- assert(isa<IntRecTy>(Arg1->getType()));
- LHS = Arg0;
- RHS = Arg1;
- } else {
- Error(OpLoc, "expected at most two values of integer");
- return nullptr;
- }
- return BinOpInit::get(Code, LHS, RHS, Type)->Fold(CurRec);
- }
-
// We allow multiple operands to associative operators like !strconcat as
// shorthand for nesting them.
if (Code == BinOpInit::STRCONCAT || Code == BinOpInit::LISTCONCAT ||
@@ -1791,6 +1755,105 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return ParseOperationForEachFilter(CurRec, ItemType);
}
+ case tgtok::XRange: {
+ SMLoc OpLoc = Lex.getLoc();
+ Lex.Lex(); // eat the operation
+
+ if (!consume(tgtok::l_paren)) {
+ TokError("expected '(' after !range operator");
+ return nullptr;
+ }
+
+ SmallVector<Init *, 2> Args;
+ bool FirstArgIsList = false;
+ for (;;) {
+ if (Args.size() >= 3) {
+ TokError("expected at most three values of integer");
+ return nullptr;
+ }
+
+ SMLoc InitLoc = Lex.getLoc();
+ Args.push_back(ParseValue(CurRec));
+ if (!Args.back())
+ return nullptr;
+
+ TypedInit *ArgBack = dyn_cast<TypedInit>(Args.back());
+ if (!ArgBack) {
+ Error(OpLoc, Twine("expected value to be a typed value, got '" +
+ Args.back()->getAsString() + "'"));
+ return nullptr;
+ }
+
+ RecTy *ArgBackType = ArgBack->getType();
+ if (!FirstArgIsList || Args.size() == 1) {
+ if (Args.size() == 1 && isa<ListRecTy>(ArgBackType)) {
+ FirstArgIsList = true; // Detect error if 2nd arg were present.
+ } else if (isa<IntRecTy>(ArgBackType)) {
+ // Assume 2nd arg should be IntRecTy
+ } else {
+ if (Args.size() != 1)
+ Error(InitLoc, Twine("expected value of type 'int', got '" +
+ ArgBackType->getAsString() + "'"));
+ else
+ Error(InitLoc, Twine("expected list or int, got value of type '") +
+ ArgBackType->getAsString() + "'");
+ return nullptr;
+ }
+ } else {
+ // Don't come here unless 1st arg is ListRecTy.
+ assert(isa<ListRecTy>(cast<TypedInit>(Args[0])->getType()));
+ Error(InitLoc, Twine("expected one list, got extra value of type '") +
+ ArgBackType->getAsString() + "'");
+ return nullptr;
+ }
+ if (!consume(tgtok::comma))
+ break;
+ }
+
+ if (!consume(tgtok::r_paren)) {
+ TokError("expected ')' in operator");
+ return nullptr;
+ }
+
+ Init *LHS, *MHS, *RHS;
+ auto ArgCount = Args.size();
+ assert(ArgCount >= 1);
+ auto *Arg0 = cast<TypedInit>(Args[0]);
+ auto *Arg0Ty = Arg0->getType();
+ if (ArgCount == 1) {
+ if (isa<ListRecTy>(Arg0Ty)) {
+ // (0, !size(arg), 1)
+ LHS = IntInit::get(Records, 0);
+ MHS = UnOpInit::get(UnOpInit::SIZE, Arg0, IntRecTy::get(Records))
+ ->Fold(CurRec);
+ RHS = IntInit::get(Records, 1);
+ } else {
+ assert(isa<IntRecTy>(Arg0Ty));
+ // (0, arg, 1)
+ LHS = IntInit::get(Records, 0);
+ MHS = Arg0;
+ RHS = IntInit::get(Records, 1);
+ }
+ } else {
+ assert(isa<IntRecTy>(Arg0Ty));
+ auto *Arg1 = cast<TypedInit>(Args[1]);
+ assert(isa<IntRecTy>(Arg1->getType()));
+ LHS = Arg0;
+ MHS = Arg1;
+ if (ArgCount == 3) {
+ // (start, end, step)
+ auto *Arg2 = cast<TypedInit>(Args[2]);
+ assert(isa<IntRecTy>(Arg2->getType()));
+ RHS = Arg2;
+ } else
+ // (start, end, 1)
+ RHS = IntInit::get(Records, 1);
+ }
+ return TernOpInit::get(TernOpInit::RANGE, LHS, MHS, RHS,
+ IntRecTy::get(Records)->getListTy())
+ ->Fold(CurRec);
+ }
+
case tgtok::XSetDagArg:
case tgtok::XSetDagName:
case tgtok::XDag:
@@ -2542,13 +2605,20 @@ Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) {
/// SimpleValue ::= LISTREMOVETOK '(' Value ',' Value ')'
/// SimpleValue ::= RANGE '(' Value ')'
/// SimpleValue ::= RANGE '(' Value ',' Value ')'
+/// SimpleValue ::= RANGE '(' Value ',' Value ',' Value ')'
/// SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
/// SimpleValue ::= COND '(' [Value ':' Value,]+ ')'
///
Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
IDParseMode Mode) {
Init *R = nullptr;
- switch (Lex.getCode()) {
+ tgtok::TokKind Code = Lex.getCode();
+
+ // Parse bang operators.
+ if (tgtok::isBangOperator(Code))
+ return ParseOperation(CurRec, ItemType);
+
+ switch (Code) {
default: TokError("Unknown or reserved token when parsing a value"); break;
case tgtok::TrueVal:
@@ -2803,58 +2873,6 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
return DagInit::get(Operator, OperatorName, DagArgs);
}
-
- case tgtok::XHead:
- case tgtok::XTail:
- case tgtok::XSize:
- case tgtok::XEmpty:
- case tgtok::XCast:
- case tgtok::XToLower:
- case tgtok::XToUpper:
- case tgtok::XGetDagOp: // Value ::= !unop '(' Value ')'
- case tgtok::XExists:
- case tgtok::XIsA:
- case tgtok::XConcat:
- case tgtok::XDag:
- case tgtok::XADD:
- case tgtok::XSUB:
- case tgtok::XMUL:
- case tgtok::XDIV:
- case tgtok::XNOT:
- case tgtok::XLOG2:
- case tgtok::XAND:
- case tgtok::XOR:
- case tgtok::XXOR:
- case tgtok::XSRA:
- case tgtok::XSRL:
- case tgtok::XSHL:
- case tgtok::XEq:
- case tgtok::XNe:
- case tgtok::XLe:
- case tgtok::XLt:
- case tgtok::XGe:
- case tgtok::XGt:
- case tgtok::XListConcat:
- case tgtok::XListSplat:
- case tgtok::XListRemove:
- case tgtok::XRange:
- case tgtok::XStrConcat:
- case tgtok::XInterleave:
- case tgtok::XGetDagArg:
- case tgtok::XGetDagName:
- case tgtok::XSetDagOp: // Value ::= !binop '(' Value ',' Value ')'
- case tgtok::XSetDagArg:
- case tgtok::XSetDagName:
- case tgtok::XIf:
- case tgtok::XCond:
- case tgtok::XFoldl:
- case tgtok::XForEach:
- case tgtok::XFilter:
- case tgtok::XSubst:
- case tgtok::XSubstr:
- case tgtok::XFind: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
- return ParseOperation(CurRec, ItemType);
- }
}
return R;
@@ -3140,8 +3158,7 @@ void TGParser::ParseValueList(SmallVectorImpl<Init *> &Result, Record *CurRec,
// PostionalArgValueList ::= [Value {',' Value}*]
// NamedArgValueList ::= [NameValue '=' Value {',' NameValue '=' Value}*]
bool TGParser::ParseTemplateArgValueList(
- SmallVectorImpl<ArgumentInit *> &Result, Record *CurRec, Record *ArgsRec,
- bool IsDefm) {
+ SmallVectorImpl<ArgumentInit *> &Result, Record *CurRec, Record *ArgsRec) {
assert(Result.empty() && "Result vector is not empty");
ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
@@ -3172,8 +3189,7 @@ bool TGParser::ParseTemplateArgValueList(
"The name of named argument should be a valid identifier");
auto *Name = cast<StringInit>(Value);
- Init *QualifiedName =
- QualifyName(*ArgsRec, CurMultiClass, Name, IsDefm ? "::" : ":");
+ Init *QualifiedName = QualifyName(*ArgsRec, Name);
auto *NamedArg = ArgsRec->getValue(QualifiedName);
if (!NamedArg)
return Error(ValueLoc,
@@ -3252,17 +3268,17 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
RecordVal(DeclName, IdLoc, Type,
HasField ? RecordVal::FK_NonconcreteOK
: RecordVal::FK_Normal));
-
} else if (CurRec) { // class template argument
- DeclName = QualifyName(*CurRec, CurMultiClass, DeclName, ":");
- BadField = AddValue(CurRec, IdLoc, RecordVal(DeclName, IdLoc, Type,
- RecordVal::FK_TemplateArg));
-
+ DeclName = QualifyName(*CurRec, DeclName);
+ BadField =
+ AddValue(CurRec, IdLoc,
+ RecordVal(DeclName, IdLoc, Type, RecordVal::FK_TemplateArg));
} else { // multiclass template argument
assert(CurMultiClass && "invalid context for template argument");
- DeclName = QualifyName(CurMultiClass->Rec, CurMultiClass, DeclName, "::");
- BadField = AddValue(CurRec, IdLoc, RecordVal(DeclName, IdLoc, Type,
- RecordVal::FK_TemplateArg));
+ DeclName = QualifyName(CurMultiClass, DeclName);
+ BadField =
+ AddValue(CurRec, IdLoc,
+ RecordVal(DeclName, IdLoc, Type, RecordVal::FK_TemplateArg));
}
if (BadField)
return nullptr;
@@ -3409,6 +3425,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
/// BodyItem ::= Declaration ';'
/// BodyItem ::= LET ID OptionalBitList '=' Value ';'
/// BodyItem ::= Defvar
+/// BodyItem ::= Dump
/// BodyItem ::= Assert
///
bool TGParser::ParseBodyItem(Record *CurRec) {
@@ -3418,6 +3435,9 @@ bool TGParser::ParseBodyItem(Record *CurRec) {
if (Lex.getCode() == tgtok::Defvar)
return ParseDefvar(CurRec);
+ if (Lex.getCode() == tgtok::Dump)
+ return ParseDump(nullptr, CurRec);
+
if (Lex.getCode() != tgtok::Let) {
if (!ParseDeclaration(CurRec, false))
return true;
@@ -3514,6 +3534,10 @@ bool TGParser::ApplyLetStack(RecordsEntry &Entry) {
if (Entry.Assertion)
return false;
+ // Let bindings are not applied to dumps.
+ if (Entry.Dump)
+ return false;
+
for (auto &E : Entry.Loop->Entries) {
if (ApplyLetStack(E))
return true;
@@ -3583,9 +3607,8 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
return true;
if (isa<UnsetInit>(Name)) {
- CurRec =
- std::make_unique<Record>(Records.getNewAnonymousName(), DefLoc, Records,
- /*Anonymous=*/true);
+ CurRec = std::make_unique<Record>(Records.getNewAnonymousName(), DefLoc,
+ Records, Record::RK_AnonymousDef);
} else {
CurRec = std::make_unique<Record>(Name, NameLoc, Records);
}
@@ -3903,9 +3926,8 @@ bool TGParser::ParseClass() {
CurRec->updateClassLoc(Lex.getLoc());
} else {
// If this is the first reference to this class, create and add it.
- auto NewRec =
- std::make_unique<Record>(Lex.getCurStrVal(), Lex.getLoc(), Records,
- /*Class=*/true);
+ auto NewRec = std::make_unique<Record>(Lex.getCurStrVal(), Lex.getLoc(),
+ Records, Record::RK_Class);
CurRec = NewRec.get();
Records.addClass(std::move(NewRec));
}
@@ -4094,13 +4116,14 @@ bool TGParser::ParseMultiClass() {
while (Lex.getCode() != tgtok::r_brace) {
switch (Lex.getCode()) {
default:
- return TokError("expected 'assert', 'def', 'defm', 'defvar', "
+ return TokError("expected 'assert', 'def', 'defm', 'defvar', 'dump', "
"'foreach', 'if', or 'let' in multiclass body");
case tgtok::Assert:
case tgtok::Def:
case tgtok::Defm:
case tgtok::Defvar:
+ case tgtok::Dump:
case tgtok::Foreach:
case tgtok::If:
case tgtok::Let:
@@ -4244,15 +4267,18 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
/// Object ::= Defset
/// Object ::= Defvar
/// Object ::= Assert
+/// Object ::= Dump
bool TGParser::ParseObject(MultiClass *MC) {
switch (Lex.getCode()) {
default:
return TokError(
- "Expected assert, class, def, defm, defset, foreach, if, or let");
+ "Expected assert, class, def, defm, defset, dump, foreach, if, or let");
case tgtok::Assert: return ParseAssert(MC);
case tgtok::Def: return ParseDef(MC);
case tgtok::Defm: return ParseDefm(MC);
case tgtok::Defvar: return ParseDefvar();
+ case tgtok::Dump:
+ return ParseDump(MC);
case tgtok::Foreach: return ParseForeach(MC);
case tgtok::If: return ParseIf(MC);
case tgtok::Let: return ParseTopLevelLet(MC);
@@ -4276,7 +4302,7 @@ bool TGParser::ParseObject(MultiClass *MC) {
/// ParseObjectList
/// ObjectList :== Object*
bool TGParser::ParseObjectList(MultiClass *MC) {
- while (isObjectStart(Lex.getCode())) {
+ while (tgtok::isObjectStart(Lex.getCode())) {
if (ParseObject(MC))
return true;
}
@@ -4363,3 +4389,30 @@ LLVM_DUMP_METHOD void MultiClass::dump() const {
E.dump();
}
#endif
+
+bool TGParser::ParseDump(MultiClass *CurMultiClass, Record *CurRec) {
+ // Location of the `dump` statement.
+ SMLoc Loc = Lex.getLoc();
+ assert(Lex.getCode() == tgtok::Dump && "Unknown tok");
+ Lex.Lex(); // eat the operation
+
+ Init *Message = ParseValue(CurRec);
+ if (!Message)
+ return true;
+
+ // Allow to use dump directly on `defvar` and `def`, by wrapping
+ // them with a `!repl`.
+ if (isa<DefInit>(Message))
+ Message = UnOpInit::get(UnOpInit::REPR, Message, StringRecTy::get(Records))
+ ->Fold(CurRec);
+
+ if (!consume(tgtok::semi))
+ return TokError("expected ';'");
+
+ if (CurRec)
+ CurRec->addDump(Loc, Message);
+ else
+ addEntry(std::make_unique<Record::DumpInfo>(Loc, Message));
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGParser.h b/contrib/llvm-project/llvm/lib/TableGen/TGParser.h
index d42cdad88a84..0929154fed3d 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGParser.h
@@ -41,6 +41,7 @@ struct RecordsEntry {
std::unique_ptr<Record> Rec;
std::unique_ptr<ForeachLoop> Loop;
std::unique_ptr<Record::AssertionInfo> Assertion;
+ std::unique_ptr<Record::DumpInfo> Dump;
void dump() const;
@@ -49,6 +50,8 @@ struct RecordsEntry {
RecordsEntry(std::unique_ptr<ForeachLoop> Loop) : Loop(std::move(Loop)) {}
RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion)
: Assertion(std::move(Assertion)) {}
+ RecordsEntry(std::unique_ptr<Record::DumpInfo> Dump)
+ : Dump(std::move(Dump)) {}
};
/// ForeachLoop - Record the iteration state associated with a for loop.
@@ -82,7 +85,7 @@ struct MultiClass {
void dump() const;
MultiClass(StringRef Name, SMLoc Loc, RecordKeeper &Records)
- : Rec(Name, Loc, Records) {}
+ : Rec(Name, Loc, Records, Record::RK_MultiClass) {}
};
class TGVarScope {
@@ -262,6 +265,7 @@ private: // Parser methods.
bool ParseDef(MultiClass *CurMultiClass);
bool ParseDefset();
bool ParseDefvar(Record *CurRec = nullptr);
+ bool ParseDump(MultiClass *CurMultiClass, Record *CurRec = nullptr);
bool ParseForeach(MultiClass *CurMultiClass);
bool ParseIf(MultiClass *CurMultiClass);
bool ParseIfBody(MultiClass *CurMultiClass, StringRef Kind);
@@ -289,8 +293,7 @@ private: // Parser methods.
void ParseValueList(SmallVectorImpl<llvm::Init*> &Result,
Record *CurRec, RecTy *ItemType = nullptr);
bool ParseTemplateArgValueList(SmallVectorImpl<llvm::ArgumentInit *> &Result,
- Record *CurRec, Record *ArgsRec,
- bool IsDefm = false);
+ Record *CurRec, Record *ArgsRec);
void ParseDagArgList(
SmallVectorImpl<std::pair<llvm::Init*, StringInit*>> &Result,
Record *CurRec);
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TableGenBackend.cpp b/contrib/llvm-project/llvm/lib/TableGen/TableGenBackend.cpp
index 135ec643bc3a..035abe936e11 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TableGenBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/TableGenBackend.cpp
@@ -12,6 +12,7 @@
#include "llvm/TableGen/TableGenBackend.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -40,7 +41,8 @@ static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill,
OS << Suffix << '\n';
}
-void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS) {
+void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS,
+ const RecordKeeper &Record) {
printLine(OS, "/*===- TableGen'erated file ", '-', "*- C++ -*-===*\\");
StringRef Prefix("|* ");
StringRef Suffix(" *|");
@@ -55,7 +57,13 @@ void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS) {
} while (Pos < Desc.size());
printLine(OS, Prefix, ' ', Suffix);
printLine(OS, Prefix + "Automatically generated file, do not edit!", ' ',
- Suffix);
+ Suffix);
+
+ // Print the filename of source file
+ if (!Record.getInputFilename().empty())
+ printLine(
+ OS, Prefix + "From: " + sys::path::filename(Record.getInputFilename()),
+ ' ', Suffix);
printLine(OS, Prefix, ' ', Suffix);
printLine(OS, "\\*===", '-', "===*/");
OS << '\n';
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TableGenBackendSkeleton.cpp b/contrib/llvm-project/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
index 2fde4a66727b..8e65b7a5b300 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
@@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/TableGen/TableGenBackend.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h
index 76f55666e743..901769c54b6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h
@@ -36,7 +36,7 @@ FunctionPass *createAArch64CompressJumpTablesPass();
FunctionPass *createAArch64ConditionalCompares();
FunctionPass *createAArch64AdvSIMDScalar();
FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
FunctionPass *createAArch64StorePairSuppressPass();
FunctionPass *createAArch64ExpandPseudoPass();
FunctionPass *createAArch64SLSHardeningPass();
@@ -51,6 +51,7 @@ FunctionPass *createAArch64A57FPLoadBalancing();
FunctionPass *createAArch64A53Fix835769();
FunctionPass *createFalkorHWPFFixPass();
FunctionPass *createFalkorMarkStridedAccessesPass();
+FunctionPass *createAArch64PointerAuthPass();
FunctionPass *createAArch64BranchTargetsPass();
FunctionPass *createAArch64MIPeepholeOptPass();
@@ -74,6 +75,7 @@ ModulePass *createAArch64GlobalsTaggingPass();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
+void initializeAArch64PointerAuthPass(PassRegistry&);
void initializeAArch64BranchTargetsPass(PassRegistry&);
void initializeAArch64CFIFixupPass(PassRegistry&);
void initializeAArch64CollectLOHPass(PassRegistry &);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
index 8f50af4b71fd..c600bcaab2b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
@@ -127,6 +127,12 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>;
+def FeatureFPMR : SubtargetFeature<"fpmr", "HasFPMR", "true",
+ "Enable FPMR Register (FEAT_FPMR)">;
+
+def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true",
+ "Enable FP8 instructions (FEAT_FP8)">;
+
// This flag is currently still labeled as Experimental, but when fully
// implemented this should tell the compiler to use the zeroing pseudos to
// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
@@ -148,6 +154,9 @@ def FeatureExperimentalZeroingPseudos
def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
"UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
+def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",
+ "true", "Enable BFloat16 Extension (FEAT_BF16)" >;
+
def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",
"NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;
@@ -172,7 +181,7 @@ def FeatureSVE2p1: SubtargetFeature<"sve2p1", "HasSVE2p1", "true",
"Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>;
def FeatureB16B16 : SubtargetFeature<"b16b16", "HasB16B16", "true",
- "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", []>;
+ "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", [FeatureBF16]>;
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
@@ -223,14 +232,9 @@ def FeatureEnableSelectOptimize : SubtargetFeature<
"enable-select-opt", "EnableSelectOptimize", "true",
"Enable the select optimize pass for select loop heuristics">;
-def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
- "HasCustomCheapAsMoveHandling", "true",
- "Use custom handling of cheap instructions">;
-
def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
"HasExynosCheapAsMoveHandling", "true",
- "Use Exynos specific handling of cheap instructions",
- [FeatureCustomCheapAsMoveHandling]>;
+ "Use Exynos specific handling of cheap instructions">;
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
@@ -300,6 +304,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
+def FeatureStorePairSuppress : SubtargetFeature<
+ "store-pair-suppress", "EnableStorePairSuppress", "true",
+ "Enable Store Pair Suppression heuristics">;
+
def FeatureForce32BitJumpTables
: SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
"Force jump table entries to be 32-bits wide except at MinSize">;
@@ -382,9 +390,13 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
"equivalent when the immediate does "
"not fit in the encoding.">;
-def FeatureLSLFast : SubtargetFeature<
- "lsl-fast", "HasLSLFast", "true",
- "CPU has a fastpath logical shift of up to 3 places">;
+def FeatureAddrLSLFast : SubtargetFeature<
+ "addr-lsl-fast", "HasAddrLSLFast", "true",
+ "Address operands with logical shift of up to 3 places are cheap">;
+
+def FeatureALULSLFast : SubtargetFeature<
+ "alu-lsl-fast", "HasALULSLFast", "true",
+ "Add/Sub operations with lsl shift <= 4 are cheap">;
def FeatureAggressiveFMA :
SubtargetFeature<"aggressive-fma",
@@ -438,9 +450,6 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"true", "Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits">;
-def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",
- "true", "Enable BFloat16 Extension (FEAT_BF16)" >;
-
def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8",
"true", "Enable Matrix Multiply Int8 Extension (FEAT_I8MM)">;
@@ -499,12 +508,47 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true",
def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true",
"Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>;
+def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true",
+ "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>;
+
def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true",
"Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>;
def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true",
"Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>;
+def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true",
+ "Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">;
+
+def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true",
+ "Enable fp8 multiply-add instructions (FEAT_FP8FMA)">;
+
+def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true",
+ "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>;
+
+def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true",
+ "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)">;
+
+def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true",
+ "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSME2]>;
+
+def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true",
+ "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)">;
+
+def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true",
+ "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>;
+def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true",
+ "Enable Lookup Table instructions (FEAT_LUT)">;
+
+def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true",
+ "Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">;
+
+def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true",
+ "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>;
+
+def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true",
+ "Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>;
+
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
"Apple A7 (the CPU formerly known as Cyclone)">;
@@ -566,6 +610,18 @@ def FeatureD128 : SubtargetFeature<"d128", "HasD128",
"and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)",
[FeatureLSE128]>;
+def FeatureDisableLdp : SubtargetFeature<"disable-ldp", "HasDisableLdp",
+ "true", "Do not emit ldp">;
+
+def FeatureDisableStp : SubtargetFeature<"disable-stp", "HasDisableStp",
+ "true", "Do not emit stp">;
+
+def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedOnly",
+ "true", "In order to emit ldp, first check if the load will be aligned to 2 * element_size">;
+
+def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly",
+ "true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -634,6 +690,10 @@ def HasV9_4aOps : SubtargetFeature<
"v9.4a", "HasV9_4aOps", "true", "Support ARM v9.4a instructions",
[HasV8_9aOps, HasV9_3aOps]>;
+def HasV9_5aOps : SubtargetFeature<
+ "v9.5a", "HasV9_5aOps", "true", "Support ARM v9.5a instructions",
+ [HasV9_4aOps]>;
+
def HasV8_0rOps : SubtargetFeature<
"v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
[//v8.1
@@ -723,8 +783,8 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
def SVE2p1Unsupported : AArch64Unsupported;
def SVE2Unsupported : AArch64Unsupported {
- let F = !listconcat([HasSVE2, HasSVE2orSME,
- HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
+ let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA, HasSMEF8F16,
+ HasSMEF8F32, HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
SVE2p1Unsupported.F);
}
@@ -737,12 +797,13 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
def SME2p1Unsupported : AArch64Unsupported;
def SME2Unsupported : AArch64Unsupported {
- let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2],
+ let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
+ HasSMEF8F16, HasSMEF8F32],
SME2p1Unsupported.F);
}
def SMEUnsupported : AArch64Unsupported {
- let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64],
+ let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64, HasSMEFA64],
SME2Unsupported.F);
}
@@ -778,7 +839,6 @@ def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
FeatureFuseAES,
FeatureFuseAdrpAdd,
FeatureBalanceFPOps,
- FeatureCustomCheapAsMoveHandling,
FeaturePostRAScheduler]>;
def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
@@ -795,11 +855,16 @@ def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
FeaturePostRAScheduler
]>;
+def TuneA520 : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520",
+ "Cortex-A520 ARM processors", [
+ FeatureFuseAES,
+ FeatureFuseAdrpAdd,
+ FeaturePostRAScheduler]>;
+
def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors", [
FeatureFuseAES,
FeatureBalanceFPOps,
- FeatureCustomCheapAsMoveHandling,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
FeaturePostRAScheduler,
@@ -841,7 +906,8 @@ def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -850,7 +916,8 @@ def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -859,7 +926,8 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -870,7 +938,8 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -880,7 +949,8 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -890,7 +960,19 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
FeatureFuseAES,
FeaturePostRAScheduler,
FeatureCmpBccFusion,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureFuseAdrpAdd,
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
+
+def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",
+ "Cortex-A720 ARM processors", [
+ FeatureFuseAES,
+ FeaturePostRAScheduler,
+ FeatureCmpBccFusion,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureFuseAdrpAdd,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -905,7 +987,8 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -915,14 +998,26 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
"Cortex-X3 ARM processors", [
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureFuseAdrpAdd,
+ FeatureFuseAES,
+ FeaturePostRAScheduler,
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
+
+def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
+ "Cortex-X4 ARM processors", [
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureFuseAdrpAdd,
FeatureFuseAES,
FeaturePostRAScheduler,
@@ -934,8 +1029,8 @@ def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
FeaturePostRAScheduler,
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
- FeaturePredictableSelectIsExpensive
- ]>;
+ FeatureStorePairSuppress,
+ FeaturePredictableSelectIsExpensive]>;
def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
"Nvidia Carmel processors">;
@@ -949,10 +1044,10 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
FeatureArithmeticCbzFusion,
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES, FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCZeroing,
- FeatureZCZeroingFPWorkaround]
- >;
+ FeatureZCZeroingFPWorkaround]>;
def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
"Apple A10", [
@@ -962,9 +1057,9 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES,
FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing]
- >;
+ FeatureZCZeroing]>;
def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
"Apple A11", [
@@ -974,9 +1069,9 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES,
FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing]
- >;
+ FeatureZCZeroing]>;
def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
"Apple A12", [
@@ -986,9 +1081,9 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES,
FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing]
- >;
+ FeatureZCZeroing]>;
def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
"Apple A13", [
@@ -998,9 +1093,9 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES,
FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing]
- >;
+ FeatureZCZeroing]>;
def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
"Apple A14", [
@@ -1016,6 +1111,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
FeatureFuseCryptoEOR,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCZeroing]>;
@@ -1031,9 +1127,9 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing
- ]>;
+ FeatureZCZeroing]>;
def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
"Apple A16", [
@@ -1047,9 +1143,25 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
+ FeatureStorePairSuppress,
+ FeatureZCRegMove,
+ FeatureZCZeroing]>;
+
+def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
+ "Apple A17", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseCryptoEOR,
+ FeatureFuseLiterals,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing
- ]>;
+ FeatureZCZeroing]>;
def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
@@ -1060,7 +1172,9 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
FeatureFuseCCSelect,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
- FeatureLSLFast,
+ FeatureStorePairSuppress,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive]>;
@@ -1077,28 +1191,30 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
FeatureFuseCCSelect,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
- FeatureLSLFast,
+ FeatureStorePairSuppress,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureZCZeroing]>;
def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
"Qualcomm Kryo processors", [
- FeatureCustomCheapAsMoveHandling,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast]
- >;
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureStorePairSuppress]>;
def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
"Qualcomm Falkor processors", [
- FeatureCustomCheapAsMoveHandling,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast,
- FeatureSlowSTRQro
- ]>;
+ FeatureStorePairSuppress,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureSlowSTRQro]>;
def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1",
"Neoverse E1 ARM processors", [
@@ -1110,7 +1226,8 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1
"Neoverse N1 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -1119,7 +1236,8 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2
"Neoverse N2 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -1128,7 +1246,8 @@ def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Ne
"Neoverse 512-TVB ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -1137,7 +1256,8 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
"Neoverse V1 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive,
@@ -1146,24 +1266,28 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2",
"Neoverse V2 ARM processors", [
FeatureFuseAES,
- FeatureLSLFast,
+ FeatureFuseAdrpAdd,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
"Qualcomm Saphira processors", [
- FeatureCustomCheapAsMoveHandling,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast]>;
+ FeatureStorePairSuppress,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast]>;
def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99",
"Cavium ThunderX2 processors", [
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
@@ -1174,59 +1298,72 @@ def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureBalanceFPOps,
+ FeatureStorePairSuppress,
FeatureStrictAlign]>;
def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
"Cavium ThunderX processors", [
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
"ThunderXT88",
"Cavium ThunderX processors", [
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
"ThunderXT81",
"Cavium ThunderX processors", [
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
"ThunderXT83",
"Cavium ThunderX processors", [
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
"HiSilicon TS-V110 processors", [
- FeatureCustomCheapAsMoveHandling,
FeatureFuseAES,
+ FeatureStorePairSuppress,
FeaturePostRAScheduler]>;
def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
"Ampere Computing Ampere-1 processors", [
FeaturePostRAScheduler,
FeatureFuseAES,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
FeatureCmpBccFusion,
FeatureFuseAddress,
- FeatureFuseLiterals]>;
+ FeatureFuseLiterals,
+ FeatureStorePairSuppress,
+ FeatureLdpAlignedOnly,
+ FeatureStpAlignedOnly]>;
def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A",
"Ampere Computing Ampere-1A processors", [
FeaturePostRAScheduler,
FeatureFuseAES,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
FeatureCmpBccFusion,
FeatureFuseAddress,
FeatureFuseLiterals,
- FeatureFuseLiterals]>;
+ FeatureFuseLiterals,
+ FeatureStorePairSuppress,
+ FeatureLdpAlignedOnly,
+ FeatureStpAlignedOnly]>;
def ProcessorFeatures {
list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
@@ -1238,6 +1375,9 @@ def ProcessorFeatures {
FeatureMatMulInt8, FeatureBF16, FeatureAM,
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
FeatureFP16FML];
+ list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM,
+ FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+ FeatureFP16FML];
list<SubtargetFeature> A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureFullFP16, FeatureDotProd,
FeatureRCPC, FeatureSSBS, FeatureRAS,
@@ -1264,6 +1404,9 @@ def ProcessorFeatures {
FeatureFP16FML, FeatureSVE, FeatureTRBE,
FeatureSVE2BitPerm, FeatureBF16, FeatureETE,
FeaturePerfMon, FeatureMatMulInt8, FeatureSPE];
+ list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+ FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,
+ FeaturePerfMon, FeatureSPE, FeatureSPE_EEF];
list<SubtargetFeature> R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
FeatureFP16FML, FeatureSSBS, FeaturePredRes,
FeatureSB];
@@ -1285,6 +1428,10 @@ def ProcessorFeatures {
FeatureSPE, FeatureBF16, FeatureMatMulInt8,
FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16,
FeatureFP16FML];
+ list<SubtargetFeature> X4 = [HasV9_2aOps,
+ FeaturePerfMon, FeatureETE, FeatureTRBE,
+ FeatureSPE, FeatureMTE, FeatureSVE2BitPerm,
+ FeatureFP16FML, FeatureSPE_EEF];
list<SubtargetFeature> A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,
FeatureSHA2, FeaturePerfMon, FeatureFullFP16,
FeatureSVE, FeatureComplxNum];
@@ -1315,6 +1462,10 @@ def ProcessorFeatures {
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
FeatureHCX];
+ list<SubtargetFeature> AppleA17 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureSHA3,
+ FeatureFullFP16, FeatureFP16FML,
+ FeatureHCX];
list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
FeaturePerfMon];
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
@@ -1329,9 +1480,9 @@ def ProcessorFeatures {
FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
FeatureRCPC, FeatureSPE, FeatureSSBS,
FeaturePerfMon];
- list<SubtargetFeature> NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE,
+ list<SubtargetFeature> NeoverseN2 = [HasV9_0aOps, FeatureBF16, FeatureETE,
FeatureMatMulInt8, FeatureMTE, FeatureSVE2,
- FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto,
+ FeatureSVE2BitPerm, FeatureTRBE,
FeaturePerfMon];
list<SubtargetFeature> Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
@@ -1376,7 +1527,7 @@ def ProcessorFeatures {
// FeatureFuseAdrpAdd is enabled under Generic to allow linker merging
// optimizations.
-def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic,
+def : ProcessorModel<"generic", CortexA510Model, ProcessorFeatures.Generic,
[FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler,
FeatureEnableSelectOptimize]>;
def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53,
@@ -1389,6 +1540,8 @@ def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
[TuneA55]>;
def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510,
[TuneA510]>;
+def : ProcessorModel<"cortex-a520", CortexA510Model, ProcessorFeatures.A520,
+ [TuneA520]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
[TuneA57]>;
def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,
@@ -1415,6 +1568,8 @@ def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710,
[TuneA710]>;
def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715,
[TuneA715]>;
+def : ProcessorModel<"cortex-a720", NeoverseN2Model, ProcessorFeatures.A720,
+ [TuneA720]>;
def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
[TuneR82]>;
def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
@@ -1425,6 +1580,8 @@ def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2,
[TuneX2]>;
def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3,
[TuneX3]>;
+def : ProcessorModel<"cortex-x4", NeoverseN2Model, ProcessorFeatures.X4,
+ [TuneX4]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model,
ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
def : ProcessorModel<"neoverse-n1", NeoverseN1Model,
@@ -1492,12 +1649,15 @@ def : ProcessorModel<"apple-a15", CycloneModel, ProcessorFeatures.AppleA15,
[TuneAppleA15]>;
def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16,
[TuneAppleA16]>;
-
+def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17,
+ [TuneAppleA17]>;
// Mac CPUs
def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
def : ProcessorModel<"apple-m2", CycloneModel, ProcessorFeatures.AppleA15,
[TuneAppleA15]>;
+def : ProcessorModel<"apple-m3", CycloneModel, ProcessorFeatures.AppleA16,
+ [TuneAppleA16]>;
// watch CPUs.
def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index ea8e30269ece..c27ec8e6dc6b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -30,7 +30,6 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 76f1cc782b24..90e1ce9ddf66 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -47,10 +48,12 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -89,6 +92,10 @@ public:
void emitStartOfAsmFile(Module &M) override;
void emitJumpTableInfo() override;
+ std::tuple<const MCSymbol *, uint64_t, const MCSymbol *,
+ codeview::JumpTableEntrySize>
+ getCodeViewJumpTableInfo(int JTI, const MachineInstr *BranchInstr,
+ const MCSymbol *BranchLabel) const override;
void emitFunctionEntryLabel() override;
@@ -138,9 +145,9 @@ public:
SetupMachineFunction(MF);
if (STI->isTargetCOFF()) {
- bool Internal = MF.getFunction().hasInternalLinkage();
- COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
- : COFF::IMAGE_SYM_CLASS_EXTERNAL;
+ bool Local = MF.getFunction().hasLocalLinkage();
+ COFF::SymbolStorageClass Scl =
+ Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL;
int Type =
COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
@@ -194,6 +201,15 @@ private:
bool shouldEmitWeakSwiftAsyncExtendedFramePointerFlags() const override {
return ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags;
}
+
+ const MCSubtargetInfo *getIFuncMCSubtargetInfo() const override {
+ assert(STI);
+ return STI;
+ }
+ void emitMachOIFuncStubBody(Module &M, const GlobalIFunc &GI,
+ MCSymbol *LazyPointer) override;
+ void emitMachOIFuncStubHelperBody(Module &M, const GlobalIFunc &GI,
+ MCSymbol *LazyPointer) override;
};
} // end anonymous namespace
@@ -970,6 +986,8 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
RegClass = &AArch64::ZPRRegClass;
} else if (AArch64::PPRRegClass.contains(Reg)) {
RegClass = &AArch64::PPRRegClass;
+ } else if (AArch64::PNRRegClass.contains(Reg)) {
+ RegClass = &AArch64::PNRRegClass;
} else {
RegClass = &AArch64::FPR128RegClass;
AltName = AArch64::vreg;
@@ -1062,6 +1080,30 @@ void AArch64AsmPrinter::emitJumpTableInfo() {
}
}
+std::tuple<const MCSymbol *, uint64_t, const MCSymbol *,
+ codeview::JumpTableEntrySize>
+AArch64AsmPrinter::getCodeViewJumpTableInfo(int JTI,
+ const MachineInstr *BranchInstr,
+ const MCSymbol *BranchLabel) const {
+ const auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ const auto Base = AArch64FI->getJumpTableEntryPCRelSymbol(JTI);
+ codeview::JumpTableEntrySize EntrySize;
+ switch (AFI->getJumpTableEntrySize(JTI)) {
+ case 1:
+ EntrySize = codeview::JumpTableEntrySize::UInt8ShiftLeft;
+ break;
+ case 2:
+ EntrySize = codeview::JumpTableEntrySize::UInt16ShiftLeft;
+ break;
+ case 4:
+ EntrySize = codeview::JumpTableEntrySize::Int32;
+ break;
+ default:
+ llvm_unreachable("Unexpected jump table entry size");
+ }
+ return std::make_tuple(Base, 0, BranchLabel, EntrySize);
+}
+
void AArch64AsmPrinter::emitFunctionEntryLabel() {
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall ||
MF->getFunction().getCallingConv() ==
@@ -1453,8 +1495,13 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
case AArch64::MOVIv2d_ns:
- // If the target has <rdar://problem/16473581>, lower this
- // instruction to movi.16b instead.
+ // It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
+ // as movi is more efficient across all cores. Newer cores can eliminate
+ // fmovs early and there is no difference with movi, but this not true for
+ // all implementations.
+ //
+ // The floating-point version doesn't quite work in rare cases on older
+ // CPUs, so on those targets we lower this instruction to movi.16b instead.
if (STI->hasZeroCycleZeroingFPWorkaround() &&
MI->getOperand(1).getImm() == 0) {
MCInst TmpInst;
@@ -1774,6 +1821,201 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
+void AArch64AsmPrinter::emitMachOIFuncStubBody(Module &M, const GlobalIFunc &GI,
+ MCSymbol *LazyPointer) {
+ // _ifunc:
+ // adrp x16, lazy_pointer@GOTPAGE
+ // ldr x16, [x16, lazy_pointer@GOTPAGEOFF]
+ // ldr x16, [x16]
+ // br x16
+
+ {
+ MCInst Adrp;
+ Adrp.setOpcode(AArch64::ADRP);
+ Adrp.addOperand(MCOperand::createReg(AArch64::X16));
+ MCOperand SymPage;
+ MCInstLowering.lowerOperand(
+ MachineOperand::CreateMCSymbol(LazyPointer,
+ AArch64II::MO_GOT | AArch64II::MO_PAGE),
+ SymPage);
+ Adrp.addOperand(SymPage);
+ OutStreamer->emitInstruction(Adrp, *STI);
+ }
+
+ {
+ MCInst Ldr;
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::createReg(AArch64::X16));
+ Ldr.addOperand(MCOperand::createReg(AArch64::X16));
+ MCOperand SymPageOff;
+ MCInstLowering.lowerOperand(
+ MachineOperand::CreateMCSymbol(LazyPointer, AArch64II::MO_GOT |
+ AArch64II::MO_PAGEOFF),
+ SymPageOff);
+ Ldr.addOperand(SymPageOff);
+ Ldr.addOperand(MCOperand::createImm(0));
+ OutStreamer->emitInstruction(Ldr, *STI);
+ }
+
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDRXui)
+ .addReg(AArch64::X16)
+ .addReg(AArch64::X16)
+ .addImm(0),
+ *STI);
+
+ OutStreamer->emitInstruction(MCInstBuilder(TM.getTargetTriple().isArm64e()
+ ? AArch64::BRAAZ
+ : AArch64::BR)
+ .addReg(AArch64::X16),
+ *STI);
+}
+
+void AArch64AsmPrinter::emitMachOIFuncStubHelperBody(Module &M,
+ const GlobalIFunc &GI,
+ MCSymbol *LazyPointer) {
+ // These stub helpers are only ever called once, so here we're optimizing for
+ // minimum size by using the pre-indexed store variants, which saves a few
+ // bytes of instructions to bump & restore sp.
+
+ // _ifunc.stub_helper:
+ // stp fp, lr, [sp, #-16]!
+ // mov fp, sp
+ // stp x1, x0, [sp, #-16]!
+ // stp x3, x2, [sp, #-16]!
+ // stp x5, x4, [sp, #-16]!
+ // stp x7, x6, [sp, #-16]!
+ // stp d1, d0, [sp, #-16]!
+ // stp d3, d2, [sp, #-16]!
+ // stp d5, d4, [sp, #-16]!
+ // stp d7, d6, [sp, #-16]!
+ // bl _resolver
+ // adrp x16, lazy_pointer@GOTPAGE
+ // ldr x16, [x16, lazy_pointer@GOTPAGEOFF]
+ // str x0, [x16]
+ // mov x16, x0
+ // ldp d7, d6, [sp], #16
+ // ldp d5, d4, [sp], #16
+ // ldp d3, d2, [sp], #16
+ // ldp d1, d0, [sp], #16
+ // ldp x7, x6, [sp], #16
+ // ldp x5, x4, [sp], #16
+ // ldp x3, x2, [sp], #16
+ // ldp x1, x0, [sp], #16
+ // ldp fp, lr, [sp], #16
+ // br x16
+
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::STPXpre)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::FP)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(-2),
+ *STI);
+
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::ADDXri)
+ .addReg(AArch64::FP)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .addImm(0),
+ *STI);
+
+ for (int I = 0; I != 4; ++I)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::STPXpre)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X1 + 2 * I)
+ .addReg(AArch64::X0 + 2 * I)
+ .addReg(AArch64::SP)
+ .addImm(-2),
+ *STI);
+
+ for (int I = 0; I != 4; ++I)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::STPDpre)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::D1 + 2 * I)
+ .addReg(AArch64::D0 + 2 * I)
+ .addReg(AArch64::SP)
+ .addImm(-2),
+ *STI);
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::BL)
+ .addOperand(MCOperand::createExpr(lowerConstant(GI.getResolver()))),
+ *STI);
+
+ {
+ MCInst Adrp;
+ Adrp.setOpcode(AArch64::ADRP);
+ Adrp.addOperand(MCOperand::createReg(AArch64::X16));
+ MCOperand SymPage;
+ MCInstLowering.lowerOperand(
+ MachineOperand::CreateES(LazyPointer->getName().data() + 1,
+ AArch64II::MO_GOT | AArch64II::MO_PAGE),
+ SymPage);
+ Adrp.addOperand(SymPage);
+ OutStreamer->emitInstruction(Adrp, *STI);
+ }
+
+ {
+ MCInst Ldr;
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::createReg(AArch64::X16));
+ Ldr.addOperand(MCOperand::createReg(AArch64::X16));
+ MCOperand SymPageOff;
+ MCInstLowering.lowerOperand(
+ MachineOperand::CreateES(LazyPointer->getName().data() + 1,
+ AArch64II::MO_GOT | AArch64II::MO_PAGEOFF),
+ SymPageOff);
+ Ldr.addOperand(SymPageOff);
+ Ldr.addOperand(MCOperand::createImm(0));
+ OutStreamer->emitInstruction(Ldr, *STI);
+ }
+
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::STRXui)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X16)
+ .addImm(0),
+ *STI);
+
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::ADDXri)
+ .addReg(AArch64::X16)
+ .addReg(AArch64::X0)
+ .addImm(0)
+ .addImm(0),
+ *STI);
+
+ for (int I = 3; I != -1; --I)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDPDpost)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::D1 + 2 * I)
+ .addReg(AArch64::D0 + 2 * I)
+ .addReg(AArch64::SP)
+ .addImm(2),
+ *STI);
+
+ for (int I = 3; I != -1; --I)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDPXpost)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X1 + 2 * I)
+ .addReg(AArch64::X0 + 2 * I)
+ .addReg(AArch64::SP)
+ .addImm(2),
+ *STI);
+
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDPXpost)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::FP)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(2),
+ *STI);
+
+ OutStreamer->emitInstruction(MCInstBuilder(TM.getTargetTriple().isArm64e()
+ ? AArch64::BRAAZ
+ : AArch64::BR)
+ .addReg(AArch64::X16),
+ *STI);
+}
+
// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64AsmPrinter() {
RegisterAsmPrinter<AArch64AsmPrinter> X(getTheAArch64leTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index d7ab83c946ce..ec46f62d065f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -31,24 +31,21 @@ bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
-bool CC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
-bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 37976a222783..e47996bf38d4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -22,8 +22,7 @@ class CCIfILP32<CCAction A> :
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
-let Entry = 1 in
-def CC_AArch64_AAPCS : CallingConv<[
+defvar AArch64_Common = [
CCIfType<[iPTR], CCBitConvertToType<i64>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
@@ -58,11 +57,6 @@ def CC_AArch64_AAPCS : CallingConv<[
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
- // The 'nest' parameter, if any, is passed in X18.
- // Darwin uses X18 as the platform register and hence 'nest' isn't currently
- // supported there.
- CCIfNest<CCAssignToReg<[X18]>>,
-
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,
@@ -115,7 +109,16 @@ def CC_AArch64_AAPCS : CallingConv<[
CCAssignToStack<8, 8>>,
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToStack<16, 16>>
-]>;
+];
+
+let Entry = 1 in
+def CC_AArch64_AAPCS : CallingConv<!listconcat(
+ // The 'nest' parameter, if any, is passed in X18.
+ // Darwin and Windows use X18 as the platform register and hence 'nest' isn't
+ // currently supported there.
+ [CCIfNest<CCAssignToReg<[X18]>>],
+ AArch64_Common
+)>;
let Entry = 1 in
def RetCC_AArch64_AAPCS : CallingConv<[
@@ -153,13 +156,16 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCAssignToReg<[P0, P1, P2, P3]>>
]>;
+let Entry = 1 in
+def CC_AArch64_Win64PCS : CallingConv<AArch64_Common>;
+
// Vararg functions on windows pass floats in integer registers
let Entry = 1 in
def CC_AArch64_Win64_VarArg : CallingConv<[
CCIfType<[f16, bf16], CCBitConvertToType<i16>>,
CCIfType<[f32], CCBitConvertToType<i32>>,
CCIfType<[f64], CCBitConvertToType<i64>>,
- CCDelegateTo<CC_AArch64_AAPCS>
+ CCDelegateTo<CC_AArch64_Win64PCS>
]>;
// Vararg functions on Arm64EC ABI use a different convention, using
@@ -319,31 +325,6 @@ def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
CCAssignToStack<16, 16>>
]>;
-
-// The WebKit_JS calling convention only passes the first argument (the callee)
-// in register and the remaining arguments on stack. We allow 32bit stack slots,
-// so that WebKit can write partial values in the stack and define the other
-// 32bit quantity as undef.
-let Entry = 1 in
-def CC_AArch64_WebKit_JS : CallingConv<[
- // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- CCIfType<[i32], CCAssignToReg<[W0]>>,
- CCIfType<[i64], CCAssignToReg<[X0]>>,
-
- // Pass the remaining arguments on the stack instead.
- CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
- CCIfType<[i64, f64], CCAssignToStack<8, 8>>
-]>;
-
-let Entry = 1 in
-def RetCC_AArch64_WebKit_JS : CallingConv<[
- CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
- CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
- CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
- CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>
-]>;
-
//===----------------------------------------------------------------------===//
// ARM64 Calling Convention for GHC
//===----------------------------------------------------------------------===//
@@ -352,7 +333,7 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
// The only documentation is the GHC source code, specifically the C header
// file:
//
-// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h
+// https://github.com/ghc/ghc/blob/master/rts/include/stg/MachRegs.h
//
// which defines the registers for the Spineless Tagless G-Machine (STG) that
// GHC uses to implement lazy evaluation. The generic STG machine has a set of
@@ -363,8 +344,10 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
//
// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode
//
-// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
-// register mapping".
+// The AArch64 register mapping is defined in the following header file:
+//
+// https://github.com/ghc/ghc/blob/master/rts/include/stg/MachRegs/arm64.h
+//
let Entry = 1 in
def CC_AArch64_GHC : CallingConv<[
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index c73b33a58408..ff569e3dce2e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -100,11 +100,7 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
index 96fd28650504..d6c00be80bd9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -1,4 +1,4 @@
-//=- AArch64.td - Define AArch64 Combine Rules ---------------*- tablegen -*-=//
+//=- AArch64Combine.td - Define AArch64 Combine Rules ---------*-tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -33,15 +33,30 @@ def fold_global_offset : GICombineRule<
(apply [{ applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
>;
-def AArch64PreLegalizerCombiner: GICombinerHelper<
+// Boolean: 0 = G_ZEXT, 1 = G_SEXT
+def ext_addv_to_udot_addv_matchinfo : GIDefMatchData<"std::tuple<Register, Register, bool>">;
+let Predicates = [HasDotProd] in {
+def ext_addv_to_udot_addv : GICombineRule<
+ (defs root:$root, ext_addv_to_udot_addv_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_VECREDUCE_ADD):$root,
+ [{ return matchExtAddvToUdotAddv(*${root}, MRI, STI, ${matchinfo}); }]),
+ (apply [{ applyExtAddvToUdotAddv(*${root}, MRI, B, Observer, STI, ${matchinfo}); }])
+>;
+}
+
+def AArch64PreLegalizerCombiner: GICombiner<
"AArch64PreLegalizerCombinerImpl", [all_combines,
fconstant_to_constant,
icmp_redundant_trunc,
- fold_global_offset]> {
+ fold_global_offset,
+ shuffle_to_extract,
+ ext_addv_to_udot_addv]> {
+ let CombineAllMethodName = "tryCombineAllImpl";
}
-def AArch64O0PreLegalizerCombiner: GICombinerHelper<
+def AArch64O0PreLegalizerCombiner: GICombiner<
"AArch64O0PreLegalizerCombinerImpl", [optnone_combines]> {
+ let CombineAllMethodName = "tryCombineAllImpl";
}
// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
@@ -153,6 +168,13 @@ def mul_const : GICombineRule<
(apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
>;
+def lower_mull : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_MUL):$root,
+ [{ return matchExtMulToMULL(*${root}, MRI); }]),
+ (apply [{ applyExtMulToMULL(*${root}, MRI, B, Observer); }])
+>;
+
def build_vector_to_dup : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_BUILD_VECTOR):$root,
@@ -204,21 +226,39 @@ def vector_sext_inreg_to_shift : GICombineRule<
(apply [{ applyVectorSextInReg(*${d}, MRI, B, Observer); }])
>;
+def unmerge_ext_to_unmerge_matchdata : GIDefMatchData<"Register">;
+def unmerge_ext_to_unmerge : GICombineRule<
+ (defs root:$d, unmerge_ext_to_unmerge_matchdata:$matchinfo),
+ (match (wip_match_opcode G_UNMERGE_VALUES):$d,
+ [{ return matchUnmergeExtToUnmerge(*${d}, MRI, ${matchinfo}); }]),
+ (apply [{ applyUnmergeExtToUnmerge(*${d}, MRI, B, Observer, ${matchinfo}); }])
+>;
+
+def regtriple_matchdata : GIDefMatchData<"std::tuple<Register, Register, Register>">;
+def or_to_bsp: GICombineRule <
+ (defs root:$root, regtriple_matchdata:$matchinfo),
+ (match (wip_match_opcode G_OR):$root,
+ [{ return matchOrToBSP(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
def AArch64PostLegalizerLowering
- : GICombinerHelper<"AArch64PostLegalizerLoweringImpl",
+ : GICombiner<"AArch64PostLegalizerLoweringImpl",
[shuffle_vector_lowering, vashr_vlshr_imm,
icmp_lowering, build_vector_lowering,
lower_vector_fcmp, form_truncstore,
- vector_sext_inreg_to_shift]> {
+ vector_sext_inreg_to_shift,
+ unmerge_ext_to_unmerge, lower_mull]> {
}
// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombiner
- : GICombinerHelper<"AArch64PostLegalizerCombinerImpl",
+ : GICombiner<"AArch64PostLegalizerCombinerImpl",
[copy_prop, combines_for_extload,
+ combine_indexed_load_store,
sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
@@ -227,8 +267,8 @@ def AArch64PostLegalizerCombiner
form_bitfield_extract, rotate_out_of_range,
icmp_to_true_false_known_bits, merge_unmerge,
select_combines, fold_merge_to_zext,
- constant_fold, identity_combines,
+ constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
- select_to_minmax]> {
+ select_to_minmax, or_to_bsp]> {
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 4c8c03a4c693..17e0e3072db6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -18,7 +18,6 @@
#include "AArch64.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -935,7 +934,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
SchedModel = MF.getSubtarget().getSchedModel();
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
- Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Loops = &getAnalysis<MachineLoopInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 731972a039ba..a7d72b59b1d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -362,6 +362,105 @@ static bool tryAndOfLogicalImmediates(uint64_t UImm,
return false;
}
+// Check whether the constant can be represented by exclusive-or of two 64-bit
+// logical immediates. If so, materialize it with an ORR instruction followed
+// by an EOR instruction.
+//
+// This encoding allows all remaining repeated byte patterns, and many repeated
+// 16-bit values, to be encoded without needing four instructions. It can also
+// represent some irregular bitmasks (although those would mostly only need
+// three instructions otherwise).
+static bool tryEorOfLogicalImmediates(uint64_t Imm,
+ SmallVectorImpl<ImmInsnModel> &Insn) {
+ // Determine the larger repetition size of the two possible logical
+ // immediates, by finding the repetition size of Imm.
+ unsigned BigSize = 64;
+
+ do {
+ BigSize /= 2;
+ uint64_t Mask = (1ULL << BigSize) - 1;
+
+ if ((Imm & Mask) != ((Imm >> BigSize) & Mask)) {
+ BigSize *= 2;
+ break;
+ }
+ } while (BigSize > 2);
+
+ uint64_t BigMask = ((uint64_t)-1LL) >> (64 - BigSize);
+
+ // Find the last bit of each run of ones, circularly. For runs which wrap
+ // around from bit 0 to bit 63, this is the bit before the most-significant
+ // zero, otherwise it is the least-significant bit in the run of ones.
+ uint64_t RunStarts = Imm & ~rotl<uint64_t>(Imm, 1);
+
+ // Find the smaller repetition size of the two possible logical immediates by
+ // counting the number of runs of one-bits within the BigSize-bit value. Both
+ // sizes may be the same. The EOR may add one or subtract one from the
+ // power-of-two count that can be represented by a logical immediate, or it
+ // may be left unchanged.
+ int RunsPerBigChunk = popcount(RunStarts & BigMask);
+
+ static const int8_t BigToSmallSizeTable[32] = {
+ -1, -1, 0, 1, 2, 2, -1, 3, 3, 3, -1, -1, -1, -1, -1, 4,
+ 4, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5,
+ };
+
+ int BigToSmallShift = BigToSmallSizeTable[RunsPerBigChunk];
+
+ // Early-exit if the big chunk couldn't be a power-of-two number of runs
+ // EORed with another single run.
+ if (BigToSmallShift == -1)
+ return false;
+
+ unsigned SmallSize = BigSize >> BigToSmallShift;
+
+ // 64-bit values with a bit set every (1 << index) bits.
+ static const uint64_t RepeatedOnesTable[] = {
+ 0xffffffffffffffff, 0x5555555555555555, 0x1111111111111111,
+ 0x0101010101010101, 0x0001000100010001, 0x0000000100000001,
+ 0x0000000000000001,
+ };
+
+ // This RepeatedOnesTable lookup is a faster implementation of the division
+ // 0xffffffffffffffff / ((1 << SmallSize) - 1), and can be thought of as
+ // dividing the 64-bit value into fields of width SmallSize, and placing a
+ // one in the least significant bit of each field.
+ uint64_t SmallOnes = RepeatedOnesTable[countr_zero(SmallSize)];
+
+ // Now we try to find the number of ones in each of the smaller repetitions,
+ // by looking at runs of ones in Imm. This can take three attempts, as the
+ // EOR may have changed the length of the first two runs we find.
+
+ // Rotate a run of ones so we can count the number of trailing set bits.
+ int Rotation = countr_zero(RunStarts);
+ uint64_t RotatedImm = rotr<uint64_t>(Imm, Rotation);
+ for (int Attempt = 0; Attempt < 3; ++Attempt) {
+ unsigned RunLength = countr_one(RotatedImm);
+
+ // Construct candidate values BigImm and SmallImm, such that if these two
+ // values are encodable, we have a solution. (SmallImm is constructed to be
+ // encodable, but this isn't guaranteed when RunLength >= SmallSize)
+ uint64_t SmallImm =
+ rotl<uint64_t>((SmallOnes << RunLength) - SmallOnes, Rotation);
+ uint64_t BigImm = Imm ^ SmallImm;
+
+ uint64_t BigEncoding = 0;
+ uint64_t SmallEncoding = 0;
+ if (AArch64_AM::processLogicalImmediate(BigImm, 64, BigEncoding) &&
+ AArch64_AM::processLogicalImmediate(SmallImm, 64, SmallEncoding)) {
+ Insn.push_back({AArch64::ORRXri, 0, SmallEncoding});
+ Insn.push_back({AArch64::EORXri, 1, BigEncoding});
+ return true;
+ }
+
+ // Rotate to the next run of ones
+ Rotation += countr_zero(rotr<uint64_t>(RunStarts, Rotation) & ~1);
+ RotatedImm = rotr<uint64_t>(Imm, Rotation);
+ }
+
+ return false;
+}
+
/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
@@ -503,6 +602,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
if (tryAndOfLogicalImmediates(Imm, Insn))
return;
+ // Attempt to use a sequence of ORR-immediate followed by EOR-immediate.
+ if (tryEorOfLogicalImmediates(UImm, Insn))
+ return;
+
// FIXME: Add more two-instruction sequences.
// Three instruction sequences.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index dcb73ae2dce2..757471d6a905 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -19,7 +19,6 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -39,7 +38,6 @@
#include <cassert>
#include <cstdint>
#include <iterator>
-#include <limits>
#include <utility>
using namespace llvm;
@@ -66,6 +64,11 @@ private:
bool expandMBB(MachineBasicBlock &MBB);
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandMultiVecPseudo(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ TargetRegisterClass ContiguousClass,
+ TargetRegisterClass StridedClass,
+ unsigned ContiguousOpc, unsigned StridedOpc);
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned BitSize);
@@ -166,6 +169,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
}
break;
case AArch64::ANDXri:
+ case AArch64::EORXri:
if (I->Op1 == 0) {
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
.add(MI.getOperand(0))
@@ -788,7 +792,8 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
auto MOP = MI.getOperand(RegMaskStartIdx);
assert(MOP.isReg() && "can only add register operands");
OriginalCall->addOperand(MachineOperand::CreateReg(
- MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
+ MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
+ /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
RegMaskStartIdx++;
}
for (const MachineOperand &MO :
@@ -831,6 +836,7 @@ bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
Call->addOperand(CallTarget);
Call->setCFIType(*MBB.getParent(), MI.getCFIType());
+ Call->copyImplicitOps(*MBB.getParent(), MI);
MachineInstr *BTI =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
@@ -1000,10 +1006,12 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
// expected value for the callee (0 for a normal callee and 1 for a streaming
// callee).
auto PStateSM = MI.getOperand(2).getReg();
+ auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
bool IsStreamingCallee = MI.getOperand(3).getImm();
- unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX;
+ unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW;
MachineInstrBuilder Tbx =
- BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0);
+ BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
// Split MBB and create two new blocks:
// - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
@@ -1038,6 +1046,33 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
return EndBB;
}
+bool AArch64ExpandPseudo::expandMultiVecPseudo(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
+ unsigned ContiguousOp, unsigned StridedOpc) {
+ MachineInstr &MI = *MBBI;
+ Register Tuple = MI.getOperand(0).getReg();
+
+ auto ContiguousRange = ContiguousClass.getRegisters();
+ auto StridedRange = StridedClass.getRegisters();
+ unsigned Opc;
+ if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
+ Opc = ContiguousOp;
+ } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
+ Opc = StridedOpc;
+ } else
+ llvm_unreachable("Cannot expand Multi-Vector pseudo");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ transferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+}
+
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -1481,17 +1516,134 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
return true;
}
- case AArch64::OBSCURE_COPY: {
- if (MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
- .add(MI.getOperand(0))
- .addReg(AArch64::XZR)
- .add(MI.getOperand(1))
- .addImm(0);
- }
- MI.eraseFromParent();
- return true;
- }
+ case AArch64::LD1B_2Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
+ case AArch64::LD1H_2Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
+ case AArch64::LD1W_2Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
+ case AArch64::LD1D_2Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
+ case AArch64::LDNT1B_2Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
+ case AArch64::LDNT1H_2Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
+ case AArch64::LDNT1W_2Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
+ case AArch64::LDNT1D_2Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
+ case AArch64::LD1B_2Z_PSEUDO:
+ return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
+ AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
+ AArch64::LD1B_2Z_STRIDED);
+ case AArch64::LD1H_2Z_PSEUDO:
+ return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
+ AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
+ AArch64::LD1H_2Z_STRIDED);
+ case AArch64::LD1W_2Z_PSEUDO:
+ return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
+ AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
+ AArch64::LD1W_2Z_STRIDED);
+ case AArch64::LD1D_2Z_PSEUDO:
+ return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
+ AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
+ AArch64::LD1D_2Z_STRIDED);
+ case AArch64::LDNT1B_2Z_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
+ case AArch64::LDNT1H_2Z_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
+ case AArch64::LDNT1W_2Z_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
+ case AArch64::LDNT1D_2Z_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
+ AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
+ case AArch64::LD1B_4Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
+ case AArch64::LD1H_4Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
+ case AArch64::LD1W_4Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
+ case AArch64::LD1D_4Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
+ case AArch64::LDNT1B_4Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
+ case AArch64::LDNT1H_4Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
+ case AArch64::LDNT1W_4Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
+ case AArch64::LDNT1D_4Z_IMM_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
+ case AArch64::LD1B_4Z_PSEUDO:
+ return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
+ AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
+ AArch64::LD1B_4Z_STRIDED);
+ case AArch64::LD1H_4Z_PSEUDO:
+ return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
+ AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
+ AArch64::LD1H_4Z_STRIDED);
+ case AArch64::LD1W_4Z_PSEUDO:
+ return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
+ AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
+ AArch64::LD1W_4Z_STRIDED);
+ case AArch64::LD1D_4Z_PSEUDO:
+ return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
+ AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
+ AArch64::LD1D_4Z_STRIDED);
+ case AArch64::LDNT1B_4Z_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
+ case AArch64::LDNT1H_4Z_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
+ case AArch64::LDNT1W_4Z_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
+ case AArch64::LDNT1D_4Z_PSEUDO:
+ return expandMultiVecPseudo(
+ MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
+ AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
}
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 32686b25f211..7544786d9f6c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -46,7 +46,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/raw_ostream.h"
-#include <cassert>
#include <iterator>
#include <utility>
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 1ae3709e9588..9b8162ce8dd4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -335,13 +335,15 @@ static unsigned getImplicitScaleFactor(MVT VT) {
}
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
- if (CC == CallingConv::WebKit_JS)
- return CC_AArch64_WebKit_JS;
if (CC == CallingConv::GHC)
return CC_AArch64_GHC;
if (CC == CallingConv::CFGuard_Check)
return CC_AArch64_Win64_CFGuard_Check;
- return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
+ if (Subtarget->isTargetDarwin())
+ return CC_AArch64_DarwinPCS;
+ if (Subtarget->isTargetWindows())
+ return CC_AArch64_Win64PCS;
+ return CC_AArch64_AAPCS;
}
unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
@@ -3860,9 +3862,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
- CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
- : RetCC_AArch64_AAPCS;
- CCInfo.AnalyzeReturn(Outs, RetCC);
+ CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
// Only handle a single return value for now.
if (ValLocs.size() != 1)
@@ -5034,7 +5034,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
}
bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
- assert(TM.getOptLevel() == CodeGenOpt::None &&
+ assert(TM.getOptLevel() == CodeGenOptLevel::None &&
"cmpxchg survived AtomicExpand at optlevel > -O0");
auto *RetPairTy = cast<StructType>(I->getType());
@@ -5187,8 +5187,8 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
SMEAttrs CallerAttrs(*FuncInfo.Fn);
- if (CallerAttrs.hasZAState() ||
- (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody()))
+ if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
+ CallerAttrs.hasStreamingCompatibleInterface())
return nullptr;
return new AArch64FastISel(FuncInfo, LibInfo);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 4d5676f34101..caab59201a8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -269,14 +269,10 @@ STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
static int64_t getArgumentStackToRestore(MachineFunction &MF,
MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- bool IsTailCallReturn = false;
- if (MBB.end() != MBBI) {
- unsigned RetOpcode = MBBI->getOpcode();
- IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
- RetOpcode == AArch64::TCRETURNri ||
- RetOpcode == AArch64::TCRETURNriBTI;
- }
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ bool IsTailCallReturn = (MBB.end() != MBBI)
+ ? AArch64InstrInfo::isTailCallReturnInst(*MBBI)
+ : false;
int64_t ArgumentPopSize = 0;
if (IsTailCallReturn) {
@@ -300,7 +296,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
-static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
@@ -331,6 +327,27 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
if (Exit && getArgumentStackToRestore(MF, *Exit))
return false;
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+ if (AFI->hasSwiftAsyncContext())
+ return false;
+
+ // If there are an odd number of GPRs before LR and FP in the CSRs list,
+ // they will not be paired into one RegPairInfo, which is incompatible with
+ // the assumption made by the homogeneous prolog epilog pass.
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+ unsigned NumGPRs = 0;
+ for (unsigned I = 0; CSRegs[I]; ++I) {
+ Register Reg = CSRegs[I];
+ if (Reg == AArch64::LR) {
+ assert(CSRegs[I + 1] == AArch64::FP);
+ if (NumGPRs % 2 != 0)
+ return false;
+ break;
+ }
+ if (AArch64::GPR64RegClass.contains(Reg))
+ ++NumGPRs;
+ }
+
return true;
}
@@ -427,6 +444,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
// Win64 EH requires a frame pointer if funclets are present, as the locals
// are accessed off the frame pointer in both the parent function and the
// funclets.
@@ -461,6 +479,11 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
/// included as part of the stack frame.
bool
AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ // The stack probing code for the dynamically allocated outgoing arguments
+ // area assumes that the stack is probed at the top - either by the prologue
+ // code, which issues a probe if `hasVarSizedObjects` return true, or by the
+ // most recent variable-sized object allocation. Changing the condition here
+ // may need to be followed up by changes to the probe issuing logic.
return !MF.getFrameInfo().hasVarSizedObjects();
}
@@ -469,6 +492,9 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
MachineBasicBlock::iterator I) const {
const AArch64InstrInfo *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const AArch64TargetLowering *TLI =
+ MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
+ [[maybe_unused]] MachineFrameInfo &MFI = MF.getFrameInfo();
DebugLoc DL = I->getDebugLoc();
unsigned Opc = I->getOpcode();
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
@@ -495,8 +521,24 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
// Most call frames will be allocated at the start of a function so
// this is OK, but it is a limitation that needs dealing with.
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
- emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(Amount), TII);
+
+ if (TLI->hasInlineStackProbe(MF) &&
+ -Amount >= AArch64::StackProbeMaxUnprobedStack) {
+ // When stack probing is enabled, the decrement of SP may need to be
+ // probed. We only need to do this if the call site needs 1024 bytes of
+ // space or more, because a region smaller than that is allowed to be
+ // unprobed at an ABI boundary. We rely on the fact that SP has been
+ // probed exactly at this point, either by the prologue or most recent
+ // dynamic allocation.
+ assert(MFI.hasVarSizedObjects() &&
+ "non-reserved call frame without var sized objects?");
+ Register ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
+ } else {
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(Amount), TII);
+ }
}
} else if (CalleePopAmount != 0) {
// If the calling convention demands that the callee pops arguments from the
@@ -612,7 +654,7 @@ void AArch64FrameLowering::resetCFIToInitialState(
}
// Shadow call stack uses X18, reset it.
- if (needsShadowCallStackPrologueEpilogue(MF))
+ if (MFI.needsShadowCallStackPrologueEpilogue(MF))
insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
TRI.getDwarfRegNum(AArch64::X18, true));
@@ -671,6 +713,153 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
emitCalleeSavedRestores(MBB, MBBI, true);
}
+// Return the maximum possible number of bytes for `Size` due to the
+// architectural limit on the size of a SVE register.
+static int64_t upperBound(StackOffset Size) {
+ static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
+ return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
+}
+
+void AArch64FrameLowering::allocateStackSpace(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,
+ bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,
+ bool FollowupAllocs) const {
+
+ if (!AllocSize)
+ return;
+
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ const int64_t MaxAlign = MFI.getMaxAlign().value();
+ const uint64_t AndMask = ~(MaxAlign - 1);
+
+ if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
+ Register TargetReg = RealignmentPadding
+ ? findScratchNonCalleeSaveRegister(&MBB)
+ : AArch64::SP;
+ // SUB Xd/SP, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+ EmitCFI, InitialOffset);
+
+ if (RealignmentPadding) {
+ // AND SP, X9, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
+ .addReg(TargetReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ AFI.setStackRealigned(true);
+
+ // No need for SEH instructions here; if we're realigning the stack,
+ // we've set a frame pointer and already finished the SEH prologue.
+ assert(!NeedsWinCFI);
+ }
+ return;
+ }
+
+ //
+ // Stack probing allocation.
+ //
+
+ // Fixed length allocation. If we don't need to re-align the stack and don't
+ // have SVE objects, we can use a more efficient sequence for stack probing.
+ if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
+ Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
+ assert(ScratchReg != AArch64::NoRegister);
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC))
+ .addDef(ScratchReg)
+ .addImm(AllocSize.getFixed())
+ .addImm(InitialOffset.getFixed())
+ .addImm(InitialOffset.getScalable());
+ // The fixed allocation may leave unprobed bytes at the top of the
+ // stack. If we have subsequent alocation (e.g. if we have variable-sized
+ // objects), we need to issue an extra probe, so these allocations start in
+ // a known state.
+ if (FollowupAllocs) {
+ // STR XZR, [SP]
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ return;
+ }
+
+ // Variable length allocation.
+
+ // If the (unknown) allocation size cannot exceed the probe size, decrement
+ // the stack pointer right away.
+ int64_t ProbeSize = AFI.getStackProbeSize();
+ if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
+ Register ScratchReg = RealignmentPadding
+ ? findScratchNonCalleeSaveRegister(&MBB)
+ : AArch64::SP;
+ assert(ScratchReg != AArch64::NoRegister);
+ // SUB Xd, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+ EmitCFI, InitialOffset);
+ if (RealignmentPadding) {
+ // AND SP, Xn, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ AFI.setStackRealigned(true);
+ }
+ if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
+ AArch64::StackProbeMaxUnprobedStack) {
+ // STR XZR, [SP]
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ return;
+ }
+
+ // Emit a variable-length allocation probing loop.
+ // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
+ // each of them guaranteed to adjust the stack by less than the probe size.
+ Register TargetReg = findScratchNonCalleeSaveRegister(&MBB);
+ assert(TargetReg != AArch64::NoRegister);
+ // SUB Xd, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+ EmitCFI, InitialOffset);
+ if (RealignmentPadding) {
+ // AND Xn, Xn, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg)
+ .addReg(TargetReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR))
+ .addReg(TargetReg);
+ if (EmitCFI) {
+ // Set the CFA register back to SP.
+ unsigned Reg =
+ Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ if (RealignmentPadding)
+ AFI.setStackRealigned(true);
+}
+
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
switch (Reg.id()) {
default:
@@ -785,16 +974,11 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
// Zero out GPRs.
for (MCRegister Reg : GPRsToZero.set_bits())
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), Reg).addImm(0);
+ TII.buildClearRegister(Reg, MBB, MBBI, DL);
// Zero out FP/vector registers.
for (MCRegister Reg : FPRsToZero.set_bits())
- if (HasSVE)
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::DUP_ZI_D), Reg)
- .addImm(0)
- .addImm(0);
- else
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVIv2d_ns), Reg).addImm(0);
+ TII.buildClearRegister(Reg, MBB, MBBI, DL);
if (HasSVE) {
for (MCRegister PReg :
@@ -808,6 +992,16 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
}
}
+static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
+ const MachineBasicBlock &MBB) {
+ const MachineFunction *MF = MBB.getParent();
+ LiveRegs.addLiveIns(MBB);
+ // Mark callee saved registers as used so we will not choose them.
+ const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ LiveRegs.addReg(CSRegs[i]);
+}
+
// Find a scratch register that we can use at the start of the prologue to
// re-align the stack pointer. We avoid using callee-save registers since they
// may appear to be free when this is called from canUseAsPrologue (during
@@ -829,12 +1023,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
LivePhysRegs LiveRegs(TRI);
- LiveRegs.addLiveIns(*MBB);
-
- // Mark callee saved registers as used so we will not choose them.
- const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
- for (unsigned i = 0; CSRegs[i]; ++i)
- LiveRegs.addReg(CSRegs[i]);
+ getLiveRegsForEntryMBB(LiveRegs, *MBB);
// Prefer X9 since it was historically used for the prologue scratch reg.
const MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -854,9 +1043,24 @@ bool AArch64FrameLowering::canUseAsPrologue(
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
+ const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
+
+ if (AFI->hasSwiftAsyncContext()) {
+ const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ LivePhysRegs LiveRegs(TRI);
+ getLiveRegsForEntryMBB(LiveRegs, MBB);
+ // The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are
+ // available.
+ if (!LiveRegs.available(MRI, AArch64::X16) ||
+ !LiveRegs.available(MRI, AArch64::X17))
+ return false;
+ }
- // Don't need a scratch register if we're not going to re-align the stack.
- if (!RegInfo->hasStackRealignment(*MF))
+ // Don't need a scratch register if we're not going to re-align the stack or
+ // emit stack probes.
+ if (!RegInfo->hasStackRealignment(*MF) && TLI->hasInlineStackProbe(*MF))
return true;
// Otherwise, we can use any block as long as it has a scratch register
// available.
@@ -866,15 +1070,11 @@ bool AArch64FrameLowering::canUseAsPrologue(
static bool windowsRequiresStackProbe(MachineFunction &MF,
uint64_t StackSizeInBytes) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- if (!Subtarget.isTargetWindows())
- return false;
- const Function &F = MF.getFunction();
+ const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
// TODO: When implementing stack protectors, take that into account
// for the probe threshold.
- unsigned StackProbeSize =
- F.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
- return (StackSizeInBytes >= StackProbeSize) &&
- !F.hasFnAttribute("no-stack-arg-probe");
+ return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&
+ StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
}
static bool needsWinCFI(const MachineFunction &MF) {
@@ -1163,8 +1363,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
SEH->eraseFromParent();
}
- TypeSize Scale = TypeSize::Fixed(1);
- unsigned Width;
+ TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
int64_t MinOffset, MaxOffset;
bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
NewOpc, Scale, Width, MinOffset, MaxOffset);
@@ -1290,19 +1489,6 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
}
}
-static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) {
- if (!(llvm::any_of(
- MF.getFrameInfo().getCalleeSavedInfo(),
- [](const auto &Info) { return Info.getReg() == AArch64::LR; }) &&
- MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)))
- return false;
-
- if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
- report_fatal_error("Must reserve x18 to use shadow call stack");
-
- return true;
-}
-
static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -1385,6 +1571,20 @@ static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
.setMIFlags(MachineInstr::FrameSetup);
}
+#ifndef NDEBUG
+/// Collect live registers from the end of \p MI's parent up to (including) \p
+/// MI in \p LiveRegs.
+static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
+ LivePhysRegs &LiveRegs) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ LiveRegs.addLiveOuts(MBB);
+ for (const MachineInstr &MI :
+ reverse(make_range(MI.getIterator(), MBB.instr_end())))
+ LiveRegs.stepBackward(MI);
+}
+#endif
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1393,6 +1593,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
@@ -1402,6 +1603,40 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool HasWinCFI = false;
auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
+ MachineBasicBlock::iterator End = MBB.end();
+#ifndef NDEBUG
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ // Collect live register from the end of MBB up to the start of the existing
+ // frame setup instructions.
+ MachineBasicBlock::iterator NonFrameStart = MBB.begin();
+ while (NonFrameStart != End &&
+ NonFrameStart->getFlag(MachineInstr::FrameSetup))
+ ++NonFrameStart;
+
+ LivePhysRegs LiveRegs(*TRI);
+ if (NonFrameStart != MBB.end()) {
+ getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs);
+ // Ignore registers used for stack management for now.
+ LiveRegs.removeReg(AArch64::SP);
+ LiveRegs.removeReg(AArch64::X19);
+ LiveRegs.removeReg(AArch64::FP);
+ LiveRegs.removeReg(AArch64::LR);
+ }
+
+ auto VerifyClobberOnExit = make_scope_exit([&]() {
+ if (NonFrameStart == MBB.end())
+ return;
+ // Check if any of the newly instructions clobber any of the live registers.
+ for (MachineInstr &MI :
+ make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
+ for (auto &Op : MI.operands())
+ if (Op.isReg() && Op.isDef())
+ assert(!LiveRegs.contains(Op.getReg()) &&
+ "live register clobbered by inserted prologue instructions");
+ }
+ });
+#endif
+
bool IsFunclet = MBB.isEHFuncletEntry();
// At this point, we're going to decide whether or not the function uses a
@@ -1414,35 +1649,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
DebugLoc DL;
const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
- if (needsShadowCallStackPrologueEpilogue(MF))
+ if (MFnI.needsShadowCallStackPrologueEpilogue(MF))
emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
MFnI.needsDwarfUnwindInfo(MF));
if (MFnI.shouldSignReturnAddress(MF)) {
- if (MFnI.shouldSignWithBKey()) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // No SEH opcode for this one; it doesn't materialize into an
- // instruction on Windows.
- BuildMI(MBB, MBBI, DL,
- TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSP
- : AArch64::PACIASP))
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
.setMIFlag(MachineInstr::FrameSetup);
-
- if (EmitCFI) {
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- } else if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
- .setMIFlag(MachineInstr::FrameSetup);
- }
+ if (NeedsWinCFI)
+ HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
}
+
if (EmitCFI && MFnI.isMTETagged()) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
.setMIFlag(MachineInstr::FrameSetup);
@@ -1461,10 +1678,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
.addExternalSymbol("swift_async_extendedFramePointerFlags",
AArch64II::MO_GOT);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
.addUse(AArch64::FP)
.addUse(AArch64::X16)
.addImm(Subtarget.isTargetILP32() ? 32 : 0);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
break;
}
[[fallthrough]];
@@ -1475,6 +1702,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addUse(AArch64::FP)
.addImm(0x1100)
.setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
break;
case SwiftAsyncFramePointerMode::Never:
@@ -1573,7 +1805,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Move past the saves of the callee-saved registers, fixing up the offsets
// and pre-inc if we decided to combine the callee-save and local stack
// pointer bump above.
- MachineBasicBlock::iterator End = MBB.end();
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
!IsSVECalleeSave(MBBI)) {
if (CombineSPBump)
@@ -1598,11 +1829,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
if (HaveInitialContext)
MBB.addLiveIn(AArch64::X22);
+ Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
- .addUse(HaveInitialContext ? AArch64::X22 : AArch64::XZR)
+ .addUse(Reg)
.addUse(AArch64::SP)
.addImm(FPOffset - 8)
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
+ // to multiple instructions, should be mutually-exclusive.
+ assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
}
if (HomPrologEpilog) {
@@ -1639,7 +1879,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Alignment is required for the parent frame, not the funclet
const bool NeedsRealignment =
NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
- int64_t RealignmentPadding =
+ const int64_t RealignmentPadding =
(NeedsRealignment && MFI.getMaxAlign() > Align(16))
? MFI.getMaxAlign().value() - 16
: 0;
@@ -1769,12 +2009,14 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
- StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+ StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
// Process the SVE callee-saves to determine what space needs to be
// allocated.
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
+ << "\n");
// Find callee save instructions in frame.
CalleeSavesBegin = MBBI;
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
@@ -1782,67 +2024,34 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
CalleeSavesEnd = MBBI;
- AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
- AllocateAfter = SVEStackSize - AllocateBefore;
+ SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+ SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
}
// Allocate space for the callee saves (if any).
- emitFrameOffset(
- MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
- MachineInstr::FrameSetup, false, false, nullptr,
- EmitAsyncCFI && !HasFP && AllocateBefore,
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+ StackOffset CFAOffset =
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+ StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
+ allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
+ nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+ CFAOffset += SVECalleeSavesSize;
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
- // Finally allocate remaining SVE stack space.
- emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
- -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
- nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
- AllocateBefore + StackOffset::getFixed(
- (int64_t)MFI.getStackSize() - NumBytes));
-
- // Allocate space for the rest of the frame.
- if (NumBytes) {
- unsigned scratchSPReg = AArch64::SP;
-
- if (NeedsRealignment) {
- scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
- assert(scratchSPReg != AArch64::NoRegister);
- }
-
- // If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF)) {
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- emitFrameOffset(
- MBB, MBBI, DL, scratchSPReg, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
- false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
- SVEStackSize +
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
- }
- if (NeedsRealignment) {
- assert(MFI.getMaxAlign() > Align(1));
- assert(scratchSPReg != AArch64::SP);
-
- // SUB X9, SP, NumBytes
- // -- X9 is temporary register, so shouldn't contain any live data here,
- // -- free to use. This is already produced by emitFrameOffset above.
- // AND SP, X9, 0b11111...0000
- uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
-
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
- .addReg(scratchSPReg, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
- AFI->setStackRealigned(true);
-
- // No need for SEH instructions here; if we're realigning the stack,
- // we've set a frame pointer and already finished the SEH prologue.
- assert(!NeedsWinCFI);
- }
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+ CFAOffset, MFI.hasVarSizedObjects());
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -1901,54 +2110,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
-static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB,
- bool NeedsWinCFI, bool *HasWinCFI) {
- const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
- if (!MFI.shouldSignReturnAddress(MF))
- return;
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- bool EmitAsyncCFI = MFI.needsAsyncDwarfUnwindInfo(MF);
-
- MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
- DebugLoc DL;
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
-
- // The AUTIASP instruction assembles to a hint instruction before v8.3a so
- // this instruction can safely used for any v8a architecture.
- // From v8.3a onwards there are optimised authenticate LR and return
- // instructions, namely RETA{A,B}, that can be used instead. In this case the
- // DW_CFA_AARCH64_negate_ra_state can't be emitted.
- if (Subtarget.hasPAuth() &&
- !MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack) &&
- MBBI != MBB.end() && MBBI->getOpcode() == AArch64::RET_ReallyLR &&
- !NeedsWinCFI) {
- BuildMI(MBB, MBBI, DL,
- TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
- .copyImplicitOps(*MBBI);
- MBB.erase(MBBI);
- } else {
- BuildMI(
- MBB, MBBI, DL,
- TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
- .setMIFlag(MachineInstr::FrameDestroy);
-
- if (EmitAsyncCFI) {
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameDestroy);
- }
- if (NeedsWinCFI) {
- *HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
- .setMIFlag(MachineInstr::FrameDestroy);
- }
- }
-}
-
static bool isFuncletReturnInstr(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
@@ -1963,36 +2124,50 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool NeedsWinCFI = needsWinCFI(MF);
- bool EmitCFI =
- MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF);
+ bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
bool HasWinCFI = false;
bool IsFunclet = false;
- auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); });
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
IsFunclet = isFuncletReturnInstr(*MBBI);
}
+ MachineBasicBlock::iterator EpilogStartI = MBB.end();
+
auto FinishingTouches = make_scope_exit([&]() {
- InsertReturnAddressAuth(MF, MBB, NeedsWinCFI, &HasWinCFI);
- if (needsShadowCallStackPrologueEpilogue(MF))
+ if (AFI->shouldSignReturnAddress(MF)) {
+ BuildMI(MBB, MBB.getFirstTerminator(), DL,
+ TII->get(AArch64::PAUTH_EPILOGUE))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ if (NeedsWinCFI)
+ HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
+ }
+ if (AFI->needsShadowCallStackPrologueEpilogue(MF))
emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL);
if (EmitCFI)
emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator());
- if (HasWinCFI)
+ if (HasWinCFI) {
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
+ if (!MF.hasWinCFI())
+ MF.setHasWinCFI(true);
+ }
+ if (NeedsWinCFI) {
+ assert(EpilogStartI != MBB.end());
+ if (!HasWinCFI)
+ MBB.erase(EpilogStartI);
+ }
});
int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
: MFI.getStackSize();
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
@@ -2026,7 +2201,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// Adjust local stack
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AFI->getLocalStackSize()), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
// SP has been already adjusted while restoring callee save regs.
// We've bailed-out the case with adjusting SP for arguments.
@@ -2078,16 +2253,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
NeedsWinCFI, &HasWinCFI);
}
- if (MF.hasWinCFI()) {
- // If the prologue didn't contain any SEH opcodes and didn't set the
- // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the
- // EpilogStart - to avoid generating CFI for functions that don't need it.
- // (And as we didn't generate any prologue at all, it would be asymmetrical
- // to the epilogue.) By the end of the function, we assert that
- // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption.
- HasWinCFI = true;
+ if (NeedsWinCFI) {
+ // Note that there are cases where we insert SEH opcodes in the
+ // epilogue when we had no SEH opcodes in the prologue. For
+ // example, when there is no stack frame but there are stack
+ // arguments. Insert the SEH_EpilogStart and remove it later if it
+ // we didn't emit any SEH opcodes to avoid generating WinCFI for
+ // functions that don't need it.
BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
.setMIFlag(MachineInstr::FrameDestroy);
+ EpilogStartI = LastPopI;
+ --EpilogStartI;
}
if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
@@ -2107,6 +2283,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
.addUse(AArch64::FP)
.addImm(0x10fe)
.setMIFlag(MachineInstr::FrameDestroy);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameDestroy);
+ HasWinCFI = true;
+ }
break;
case SwiftAsyncFramePointerMode::Never:
@@ -2241,11 +2422,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
emitFrameOffset(
MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
- TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
// When we are about to restore the CSRs, the CFA register is SP again.
if (EmitCFI && hasFP(MF)) {
@@ -2702,7 +2883,8 @@ static void computeCalleeSaveRegisterPairs(
// Swift's async context is directly before FP, so allocate an extra
// 8 bytes for it.
if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
- RPI.Reg2 == AArch64::FP)
+ ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
+ (IsWindows && RPI.Reg2 == AArch64::LR)))
ByteOffset += StackFillDir * 8;
assert(!(RPI.isScalable() && RPI.isPaired()) &&
@@ -2725,13 +2907,14 @@ static void computeCalleeSaveRegisterPairs(
int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
assert(OffsetPost % Scale == 0);
// If filling top down (default), we want the offset after incrementing it.
- // If fillibg bootom up (WinCFI) we need the original offset.
+ // If filling bottom up (WinCFI) we need the original offset.
int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
// The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the
// Swift context can directly precede FP.
if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
- RPI.Reg2 == AArch64::FP)
+ ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
+ (IsWindows && RPI.Reg2 == AArch64::LR)))
Offset += 8;
RPI.Offset = Offset / Scale;
@@ -2788,7 +2971,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Update register live in.
if (!MRI.isReserved(RPI.Reg1))
MBB.addLiveIn(RPI.Reg1);
- if (!MRI.isReserved(RPI.Reg2))
+ if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2))
MBB.addLiveIn(RPI.Reg2);
}
return true;
@@ -3038,6 +3221,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
: (unsigned)AArch64::NoRegister;
unsigned ExtraCSSpill = 0;
+ bool HasUnpairedGPR64 = false;
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
const unsigned Reg = CSRegs[i];
@@ -3048,10 +3232,29 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
bool RegUsed = SavedRegs.test(Reg);
unsigned PairedReg = AArch64::NoRegister;
- if (AArch64::GPR64RegClass.contains(Reg) ||
- AArch64::FPR64RegClass.contains(Reg) ||
- AArch64::FPR128RegClass.contains(Reg))
- PairedReg = CSRegs[i ^ 1];
+ const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
+ if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
+ AArch64::FPR128RegClass.contains(Reg)) {
+ // Compensate for odd numbers of GP CSRs.
+ // For now, all the known cases of odd number of CSRs are of GPRs.
+ if (HasUnpairedGPR64)
+ PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
+ else
+ PairedReg = CSRegs[i ^ 1];
+ }
+
+ // If the function requires all the GP registers to save (SavedRegs),
+ // and there are an odd number of GP CSRs at the same time (CSRegs),
+ // PairedReg could be in a different register class from Reg, which would
+ // lead to a FPR (usually D8) accidentally being marked saved.
+ if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
+ PairedReg = AArch64::NoRegister;
+ HasUnpairedGPR64 = true;
+ }
+ assert(PairedReg == AArch64::NoRegister ||
+ AArch64::GPR64RegClass.contains(Reg, PairedReg) ||
+ AArch64::FPR64RegClass.contains(Reg, PairedReg) ||
+ AArch64::FPR128RegClass.contains(Reg, PairedReg));
if (!RegUsed) {
if (AArch64::GPR64RegClass.contains(Reg) &&
@@ -3150,12 +3353,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
<< " to get a scratch register.\n");
SavedRegs.set(UnspilledCSGPR);
+ ExtraCSSpill = UnspilledCSGPR;
+
// MachO's compact unwind format relies on all registers being stored in
// pairs, so if we need to spill one extra for BigStack, then we need to
// store the pair.
- if (producePairRegisters(MF))
- SavedRegs.set(UnspilledCSGPRPaired);
- ExtraCSSpill = UnspilledCSGPR;
+ if (producePairRegisters(MF)) {
+ if (UnspilledCSGPRPaired == AArch64::NoRegister) {
+ // Failed to make a pair for compact unwind format, revert spilling.
+ if (produceCompactUnwindFrame(MF)) {
+ SavedRegs.reset(UnspilledCSGPR);
+ ExtraCSSpill = AArch64::NoRegister;
+ }
+ } else
+ SavedRegs.set(UnspilledCSGPRPaired);
+ }
}
// If we didn't find an extra callee-saved register to spill, create
@@ -3252,6 +3464,12 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
bool AArch64FrameLowering::enableStackSlotScavenging(
const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ // If the function has streaming-mode changes, don't scavenge a
+ // spillslot in the callee-save area, as that might require an
+ // 'addvl' in the streaming-mode-changing call-sequence when the
+ // function doesn't use a FP.
+ if (AFI->hasStreamingModeChanges() && !hasFP(MF))
+ return false;
return AFI->hasCalleeSaveStackFreeSpace();
}
@@ -3412,7 +3630,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
// function.
DebugLoc DL;
RS->enterBasicBlockEnd(MBB);
- RS->backward(std::prev(MBBI));
+ RS->backward(MBBI);
Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
assert(DstReg && "There must be a free register after frame setup");
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
@@ -3775,7 +3993,26 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
// New code will be inserted after the last tagging instruction we've found.
MachineBasicBlock::iterator InsertI = Instrs.back().MI;
+
+ // All the gathered stack tag instructions are merged and placed after
+ // last tag store in the list. The check should be made if the nzcv
+ // flag is live at the point where we are trying to insert. Otherwise
+ // the nzcv flag might get clobbered if any stg loops are present.
+
+ // FIXME : This approach of bailing out from merge is conservative in
+ // some ways like even if stg loops are not present after merge the
+ // insert list, this liveness check is done (which is not needed).
+ LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
+ LiveRegs.addLiveOuts(*MBB);
+ for (auto I = MBB->rbegin();; ++I) {
+ MachineInstr &MI = *I;
+ if (MI == InsertI)
+ break;
+ LiveRegs.stepBackward(*I);
+ }
InsertI++;
+ if (LiveRegs.contains(AArch64::NZCV))
+ return InsertI;
llvm::stable_sort(Instrs,
[](const TagStoreInstr &Left, const TagStoreInstr &Right) {
@@ -4024,3 +4261,170 @@ void AArch64FrameLowering::orderFrameObjects(
dbgs() << "\n";
});
}
+
+/// Emit a loop to decrement SP until it is equal to TargetReg, with probes at
+/// least every ProbeSize bytes. Returns an iterator of the first instruction
+/// after the loop. The difference between SP and TargetReg must be an exact
+/// multiple of ProbeSize.
+MachineBasicBlock::iterator
+AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
+ MachineBasicBlock::iterator MBBI, int64_t ProbeSize,
+ Register TargetReg) const {
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+
+ // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable
+ // in SUB).
+ emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-ProbeSize), TII,
+ MachineInstr::FrameSetup);
+ // STR XZR, [SP]
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ // CMP SP, TargetReg
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+ AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addReg(TargetReg)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlags(MachineInstr::FrameSetup);
+ // B.CC Loop
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))
+ .addImm(AArch64CC::NE)
+ .addMBB(LoopMBB)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ MBB.addSuccessor(LoopMBB);
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
+
+ return ExitMBB->begin();
+}
+
+void AArch64FrameLowering::inlineStackProbeFixed(
+ MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize,
+ StackOffset CFAOffset) const {
+ MachineBasicBlock *MBB = MBBI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+ bool HasFP = hasFP(MF);
+
+ DebugLoc DL;
+ int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
+ int64_t NumBlocks = FrameSize / ProbeSize;
+ int64_t ResidualSize = FrameSize % ProbeSize;
+
+ LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "
+ << NumBlocks << " blocks of " << ProbeSize
+ << " bytes, plus " << ResidualSize << " bytes\n");
+
+ // Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or
+ // ordinary loop.
+ if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) {
+ for (int i = 0; i < NumBlocks; ++i) {
+ // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not
+ // encodable in a SUB).
+ emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-ProbeSize), TII,
+ MachineInstr::FrameSetup, false, false, nullptr,
+ EmitAsyncCFI && !HasFP, CFAOffset);
+ CFAOffset += StackOffset::getFixed(ProbeSize);
+ // STR XZR, [SP]
+ BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ } else if (NumBlocks != 0) {
+ // SUB ScratchReg, SP, #FrameSize (or equivalent if FrameSize is not
+ // encodable in ADD). ScrathReg may temporarily become the CFA register.
+ emitFrameOffset(*MBB, MBBI, DL, ScratchReg, AArch64::SP,
+ StackOffset::getFixed(-ProbeSize * NumBlocks), TII,
+ MachineInstr::FrameSetup, false, false, nullptr,
+ EmitAsyncCFI && !HasFP, CFAOffset);
+ CFAOffset += StackOffset::getFixed(ProbeSize * NumBlocks);
+ MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);
+ MBB = MBBI->getParent();
+ if (EmitAsyncCFI && !HasFP) {
+ // Set the CFA register back to SP.
+ const AArch64RegisterInfo &RegInfo =
+ *MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+
+ if (ResidualSize != 0) {
+ // SUB SP, SP, #ResidualSize (or equivalent if ResidualSize is not encodable
+ // in SUB).
+ emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-ResidualSize), TII,
+ MachineInstr::FrameSetup, false, false, nullptr,
+ EmitAsyncCFI && !HasFP, CFAOffset);
+ if (ResidualSize > AArch64::StackProbeMaxUnprobedStack) {
+ // STR XZR, [SP]
+ BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+}
+
+void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // Get the instructions that need to be replaced. We emit at most two of
+ // these. Remember them in order to avoid complications coming from the need
+ // to traverse the block while potentially creating more blocks.
+ SmallVector<MachineInstr *, 4> ToReplace;
+ for (MachineInstr &MI : MBB)
+ if (MI.getOpcode() == AArch64::PROBED_STACKALLOC ||
+ MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)
+ ToReplace.push_back(&MI);
+
+ for (MachineInstr *MI : ToReplace) {
+ if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) {
+ Register ScratchReg = MI->getOperand(0).getReg();
+ int64_t FrameSize = MI->getOperand(1).getImm();
+ StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(),
+ MI->getOperand(3).getImm());
+ inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize,
+ CFAOffset);
+ } else {
+ assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&
+ "Stack probe pseudo-instruction expected");
+ const AArch64InstrInfo *TII =
+ MI->getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo();
+ Register TargetReg = MI->getOperand(0).getReg();
+ (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true);
+ }
+ MI->eraseFromParent();
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 147b5c181be5..941af03a78b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -150,10 +150,28 @@ private:
MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
+ void allocateStackSpace(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int64_t RealignmentPadding, StackOffset AllocSize,
+ bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
+ StackOffset InitialOffset, bool FollowupAllocs) const;
/// Emit target zero call-used regs.
void emitZeroCallUsedRegs(BitVector RegsToZero,
MachineBasicBlock &MBB) const override;
+
+ /// Replace a StackProbe stub (if any) with the actual probe code inline
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologueMBB) const override;
+
+ void inlineStackProbeFixed(MachineBasicBlock::iterator MBBI,
+ Register ScratchReg, int64_t FrameSize,
+ StackOffset CFAOffset) const;
+
+ MachineBasicBlock::iterator
+ inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI,
+ int64_t NegProbeSize,
+ Register TargetReg) const;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index 87aef1dfe8cf..b87421e5ee46 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
namespace llvm {
-RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
+const RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
/* StartIdx, Length, RegBank */
// 0: FPR 16-bit value.
{0, 16, AArch64::FPRRegBank},
@@ -34,7 +34,7 @@ RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
};
// ValueMappings.
-RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
+const RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
/* BreakDown, NumBreakDowns */
// 0: invalid
{nullptr, 0},
@@ -212,7 +212,7 @@ AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx,
return &ValMappings[ValMappingIdx];
}
-AArch64GenRegisterBankInfo::PartialMappingIdx
+const AArch64GenRegisterBankInfo::PartialMappingIdx
AArch64GenRegisterBankInfo::BankIDToCopyMapIdx[]{
PMI_None, // CCR
PMI_FirstFPR, // FPR
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
index 2ed668712897..8ce6f94e7341 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
@@ -37,7 +37,19 @@ static bool shouldTagGlobal(GlobalVariable &G) {
// For now, don't instrument constant data, as it'll be in .rodata anyway. It
// may be worth instrumenting these in future to stop them from being used as
// gadgets.
- if (G.getName().startswith("llvm.") || G.isThreadLocal() || G.isConstant()) {
+ if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant()) {
+ Meta.Memtag = false;
+ G.setSanitizerMetadata(Meta);
+ return false;
+ }
+
+ // Don't instrument function pointers that are going into various init arrays
+ // via `__attribute__((section(<foo>)))`:
+ // https://github.com/llvm/llvm-project/issues/69939
+ if (G.hasSection() &&
+ (G.getSection() == ".init" || G.getSection() == ".fini" ||
+ G.getSection() == ".init_array" || G.getSection() == ".fini_array" ||
+ G.getSection() == ".ctors" || G.getSection() == ".dtors")) {
Meta.Memtag = false;
G.setSanitizerMetadata(Meta);
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index f79d4d1934aa..463ec41b94e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -49,7 +49,7 @@ public:
AArch64DAGToDAGISel() = delete;
explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
- CodeGenOpt::Level OptLevel)
+ CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -62,7 +62,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
template <signed Low, signed High, signed Scale>
@@ -326,9 +326,14 @@ public:
return false;
}
- template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
+ template <unsigned BaseReg, unsigned Max>
+ bool ImmToReg(SDValue N, SDValue &Imm) {
if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
uint64_t C = CI->getZExtValue();
+
+ if (C > Max)
+ return false;
+
Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
return true;
}
@@ -399,6 +404,9 @@ public:
return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
}
+ void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
+ uint32_t MaxImm);
+
template <unsigned MaxIdx, unsigned Scale>
bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
@@ -427,6 +435,8 @@ public:
bool trySelectCastFixedLengthToScalableVector(SDNode *N);
bool trySelectCastScalableToFixedLengthVector(SDNode *N);
+ bool trySelectXAR(SDNode *N);
+
// Include the pieces autogenerated from the target description.
#include "AArch64GenDAGISel.inc"
@@ -451,7 +461,8 @@ private:
bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
SDValue &Offset, SDValue &SignExtend,
SDValue &DoShift);
- bool isWorthFolding(SDValue V) const;
+ bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
+ bool isWorthFoldingAddr(SDValue V) const;
bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
SDValue &Offset, SDValue &SignExtend);
@@ -462,6 +473,14 @@ private:
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
+ template<unsigned RegWidth>
+ bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
+ return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
+ }
+
+ bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
+ unsigned Width);
+
bool SelectCMP_SWAP(SDNode *N);
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
@@ -524,13 +543,14 @@ static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
#endif
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
switch(ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_m:
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_Q:
+ case InlineAsm::ConstraintCode::m:
+ case InlineAsm::ConstraintCode::o:
+ case InlineAsm::ConstraintCode::Q:
// We need to make sure that this one operand does not end up in XZR, thus
// require the address to be in a PointerRegClass register.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -652,18 +672,19 @@ static bool isWorthFoldingSHL(SDValue V) {
return true;
}
-/// Determine whether it is worth to fold V into an extended register.
-bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
+/// Determine whether it is worth to fold V into an extended register addressing
+/// mode.
+bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V) const {
// Trivial if we are optimizing for code size or if there is only
// one use of the value.
if (CurDAG->shouldOptForSize() || V.hasOneUse())
return true;
// If a subtarget has a fastpath LSL we can fold a logical shift into
// the addressing mode and save a cycle.
- if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
+ if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::SHL &&
isWorthFoldingSHL(V))
return true;
- if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
+ if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::ADD) {
const SDValue LHS = V.getOperand(0);
const SDValue RHS = V.getOperand(1);
if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
@@ -754,35 +775,6 @@ bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
return true;
}
-/// SelectShiftedRegister - Select a "shifted register" operand. If the value
-/// is not shifted, set the Shift operand to default of "LSL 0". The logical
-/// instructions allow the shifted register to be rotated, but the arithmetic
-/// instructions do not. The AllowROR parameter specifies whether ROR is
-/// supported.
-bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
- SDValue &Reg, SDValue &Shift) {
- if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
- return true;
-
- AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
- if (ShType == AArch64_AM::InvalidShiftExtend)
- return false;
- if (!AllowROR && ShType == AArch64_AM::ROR)
- return false;
-
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- unsigned BitSize = N.getValueSizeInBits();
- unsigned Val = RHS->getZExtValue() & (BitSize - 1);
- unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
-
- Reg = N.getOperand(0);
- Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
- return isWorthFolding(N);
- }
-
- return false;
-}
-
/// getExtendTypeForNode - Translate an extend node to the corresponding
/// ExtendType value.
static AArch64_AM::ShiftExtendType
@@ -837,6 +829,56 @@ getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
return AArch64_AM::InvalidShiftExtend;
}
+/// Determine whether it is worth to fold V into an extended register of an
+/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
+/// instruction, and the shift should be treated as worth folding even if has
+/// multiple uses.
+bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
+ // Trivial if we are optimizing for code size or if there is only
+ // one use of the value.
+ if (CurDAG->shouldOptForSize() || V.hasOneUse())
+ return true;
+
+ // If a subtarget has a fastpath LSL we can fold a logical shift into
+ // the add/sub and save a cycle.
+ if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
+ V.getConstantOperandVal(1) <= 4 &&
+ getExtendTypeForNode(V.getOperand(0)) == AArch64_AM::InvalidShiftExtend)
+ return true;
+
+ // It hurts otherwise, since the value will be reused.
+ return false;
+}
+
+/// SelectShiftedRegister - Select a "shifted register" operand. If the value
+/// is not shifted, set the Shift operand to default of "LSL 0". The logical
+/// instructions allow the shifted register to be rotated, but the arithmetic
+/// instructions do not. The AllowROR parameter specifies whether ROR is
+/// supported.
+bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
+ SDValue &Reg, SDValue &Shift) {
+ if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
+ return true;
+
+ AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
+ if (ShType == AArch64_AM::InvalidShiftExtend)
+ return false;
+ if (!AllowROR && ShType == AArch64_AM::ROR)
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ unsigned BitSize = N.getValueSizeInBits();
+ unsigned Val = RHS->getZExtValue() & (BitSize - 1);
+ unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
+
+ Reg = N.getOperand(0);
+ Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
+ return isWorthFoldingALU(N, true);
+ }
+
+ return false;
+}
+
/// Instructions that accept extend modifiers like UXTW expect the register
/// being extended to be a GPR32, but the incoming DAG might be acting on a
/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
@@ -917,7 +959,7 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
Reg = narrowIfNeeded(CurDAG, Reg);
Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
MVT::i32);
- return isWorthFolding(N);
+ return isWorthFoldingALU(N);
}
/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
@@ -941,7 +983,7 @@ bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
Reg = N.getOperand(0);
Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
MVT::i32);
- return isWorthFolding(N);
+ return isWorthFoldingALU(N);
}
/// If there's a use of this ADDlow that's not itself a load/store then we'll
@@ -965,6 +1007,15 @@ static bool isWorthFoldingADDlow(SDValue N) {
return true;
}
+/// Check if the immediate offset is valid as a scaled immediate.
+static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
+ unsigned Size) {
+ if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
+ Offset < (Range << Log2_32(Size)))
+ return true;
+ return false;
+}
+
/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
/// immediate" address. The "Size" argument is the size in bytes of the memory
/// reference, which determines the scale.
@@ -1060,7 +1111,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int64_t RHSC = (int64_t)RHS->getZExtValue();
unsigned Scale = Log2_32(Size);
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
+ if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
@@ -1098,10 +1149,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
return false;
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int64_t RHSC = RHS->getSExtValue();
- // If the offset is valid as a scaled immediate, don't match here.
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
- RHSC < (0x1000 << Log2_32(Size)))
- return false;
if (RHSC >= -256 && RHSC < 256) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
@@ -1156,7 +1203,7 @@ bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
return false;
- return isWorthFolding(N);
+ return isWorthFoldingAddr(N);
}
bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
@@ -1184,7 +1231,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
}
// Remember if it is worth folding N when it produces extended register.
- bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
+ bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N);
// Try to match a shifted extend on the RHS.
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
@@ -1214,7 +1261,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
MVT::i32);
- if (isWorthFolding(LHS))
+ if (isWorthFoldingAddr(LHS))
return true;
}
@@ -1226,7 +1273,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
MVT::i32);
- if (isWorthFolding(RHS))
+ if (isWorthFoldingAddr(RHS))
return true;
}
@@ -1280,11 +1327,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
// LDR X2, [BaseReg, X0]
if (isa<ConstantSDNode>(RHS)) {
int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
- unsigned Scale = Log2_32(Size);
// Skip the immediate can be selected by load/store addressing mode.
// Also skip the immediate can be encoded by a single ADD (SUB is also
// checked by using -ImmOff).
- if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
+ if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
return false;
@@ -1297,7 +1343,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
}
// Remember if it is worth folding N when it produces extended register.
- bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
+ bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N);
// Try to match a shifted extend on the RHS.
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
@@ -1751,7 +1797,7 @@ void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
unsigned Scale, unsigned Opc_ri,
unsigned Opc_rr, bool IsIntr) {
- assert(Scale < 4 && "Invalid scaling value.");
+ assert(Scale < 5 && "Invalid scaling value.");
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue Chain = N->getOperand(0);
@@ -1823,6 +1869,34 @@ void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
}
+void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
+ unsigned NumOutVecs,
+ unsigned Opc, uint32_t MaxImm) {
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
+ if (Imm->getZExtValue() > MaxImm)
+ return;
+
+ SDValue ZtValue;
+ if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
+ return;
+ SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+
+ SDNode *Instruction =
+ CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
+ SDValue SuperReg = SDValue(Instruction, 0);
+
+ for (unsigned I = 0; I < NumOutVecs; ++I)
+ ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
+ AArch64::zsub0 + I, DL, VT, SuperReg));
+
+ // Copy chain
+ unsigned ChainIdx = NumOutVecs;
+ ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
+ CurDAG->RemoveDeadNode(Node);
+}
+
void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
unsigned Op) {
SDLoc DL(N);
@@ -3625,9 +3699,10 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
return true;
}
-bool
-AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
- unsigned RegWidth) {
+static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
+ SDValue &FixedPos,
+ unsigned RegWidth,
+ bool isReciprocal) {
APFloat FVal(0.0);
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
FVal = CN->getValueAPF();
@@ -3652,13 +3727,18 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
// integers.
bool IsExact;
+ if (isReciprocal)
+ if (!FVal.getExactInverse(&FVal))
+ return false;
+
// fbits is between 1 and 64 in the worst-case, which means the fmul
// could have 2^64 as an actual operand. Need 65 bits of precision.
APSInt IntVal(65, true);
FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
// N.b. isPowerOf2 also checks for > 0.
- if (!IsExact || !IntVal.isPowerOf2()) return false;
+ if (!IsExact || !IntVal.isPowerOf2())
+ return false;
unsigned FBits = IntVal.logBase2();
// Checks above should have guaranteed that we haven't lost information in
@@ -3669,6 +3749,19 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
return true;
}
+bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth) {
+ return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
+ false);
+}
+
+bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
+ SDValue &FixedPos,
+ unsigned RegWidth) {
+ return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
+ true);
+}
+
// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
// of the string and obtains the integer values from them and combines these
// into a single value to be used in the MRS/MSR instruction.
@@ -4182,6 +4275,40 @@ bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
return true;
}
+bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (N0->getOpcode() != AArch64ISD::VSHL ||
+ N1->getOpcode() != AArch64ISD::VLSHR)
+ return false;
+
+ if (N0->getOperand(0) != N1->getOperand(0) ||
+ N1->getOperand(0)->getOpcode() != ISD::XOR)
+ return false;
+
+ SDValue XOR = N0.getOperand(0);
+ SDValue R1 = XOR.getOperand(0);
+ SDValue R2 = XOR.getOperand(1);
+
+ unsigned HsAmt = N0.getConstantOperandVal(1);
+ unsigned ShAmt = N1.getConstantOperandVal(1);
+
+ SDLoc DL = SDLoc(N0.getOperand(1));
+ SDValue Imm = CurDAG->getTargetConstant(
+ ShAmt, DL, N0.getOperand(1).getValueType(), false);
+
+ if (ShAmt + HsAmt != 64)
+ return false;
+
+ SDValue Ops[] = {R1, R2, Imm};
+ CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
+
+ return true;
+}
+
void AArch64DAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@@ -4245,6 +4372,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case ISD::OR:
if (tryBitfieldInsertOp(Node))
return;
+ if (Subtarget->hasSHA3() && trySelectXAR(Node))
+ return;
break;
case ISD::EXTRACT_SUBVECTOR: {
@@ -4637,6 +4766,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_ld64b:
SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
return;
+ case Intrinsic::aarch64_sve_ld2q_sret: {
+ SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
+ return;
+ }
+ case Intrinsic::aarch64_sve_ld3q_sret: {
+ SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
+ return;
+ }
+ case Intrinsic::aarch64_sve_ld4q_sret: {
+ SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
+ return;
+ }
case Intrinsic::aarch64_sve_ld2_sret: {
if (VT == MVT::nxv16i8) {
SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
@@ -4660,68 +4801,188 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
case Intrinsic::aarch64_sve_ld1_pn_x2: {
if (VT == MVT::nxv16i8) {
- SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(
+ Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
+ AArch64::LD1B_2Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
- SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(
+ Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
+ AArch64::LD1H_2Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(
+ Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
+ AArch64::LD1W_2Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(
+ Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
+ AArch64::LD1D_2Z);
+ else
+ break;
return;
}
break;
}
case Intrinsic::aarch64_sve_ld1_pn_x4: {
if (VT == MVT::nxv16i8) {
- SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(
+ Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
+ AArch64::LD1B_4Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
- SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(
+ Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
+ AArch64::LD1H_4Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(
+ Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
+ AArch64::LD1W_4Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(
+ Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
+ AArch64::LD1D_4Z);
+ else
+ break;
return;
}
break;
}
case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
if (VT == MVT::nxv16i8) {
- SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(Node, 2, 0,
+ AArch64::LDNT1B_2Z_IMM_PSEUDO,
+ AArch64::LDNT1B_2Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
+ AArch64::LDNT1B_2Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
- SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(Node, 2, 1,
+ AArch64::LDNT1H_2Z_IMM_PSEUDO,
+ AArch64::LDNT1H_2Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
+ AArch64::LDNT1H_2Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(Node, 2, 2,
+ AArch64::LDNT1W_2Z_IMM_PSEUDO,
+ AArch64::LDNT1W_2Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
+ AArch64::LDNT1W_2Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(Node, 2, 3,
+ AArch64::LDNT1D_2Z_IMM_PSEUDO,
+ AArch64::LDNT1D_2Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
+ AArch64::LDNT1D_2Z);
+ else
+ break;
return;
}
break;
}
case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
if (VT == MVT::nxv16i8) {
- SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(Node, 4, 0,
+ AArch64::LDNT1B_4Z_IMM_PSEUDO,
+ AArch64::LDNT1B_4Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
+ AArch64::LDNT1B_4Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
- SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(Node, 4, 1,
+ AArch64::LDNT1H_4Z_IMM_PSEUDO,
+ AArch64::LDNT1H_4Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
+ AArch64::LDNT1H_4Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(Node, 4, 2,
+ AArch64::LDNT1W_4Z_IMM_PSEUDO,
+ AArch64::LDNT1W_4Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
+ AArch64::LDNT1W_4Z);
+ else
+ break;
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z);
+ if (Subtarget->hasSME2())
+ SelectContiguousMultiVectorLoad(Node, 4, 3,
+ AArch64::LDNT1D_4Z_IMM_PSEUDO,
+ AArch64::LDNT1D_4Z_PSEUDO);
+ else if (Subtarget->hasSVE2p1())
+ SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
+ AArch64::LDNT1D_4Z);
+ else
+ break;
return;
}
break;
@@ -4880,6 +5141,41 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
return;
}
+ case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
+ if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
+ AArch64::LUTI2_4ZTZI_S}))
+ // Second Immediate must be <= 3:
+ SelectMultiVectorLuti(Node, 4, Opc, 3);
+ return;
+ }
+ case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
+ if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
+ // Second Immediate must be <= 1:
+ SelectMultiVectorLuti(Node, 4, Opc, 1);
+ return;
+ }
+ case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
+ if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
+ AArch64::LUTI2_2ZTZI_S}))
+ // Second Immediate must be <= 7:
+ SelectMultiVectorLuti(Node, 2, Opc, 7);
+ return;
+ }
+ case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
+ if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
+ AArch64::LUTI4_2ZTZI_S}))
+ // Second Immediate must be <= 3:
+ SelectMultiVectorLuti(Node, 2, Opc, 3);
+ return;
+ }
}
} break;
case ISD::INTRINSIC_WO_CHAIN: {
@@ -5729,6 +6025,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case Intrinsic::aarch64_sve_st2q: {
+ SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
+ return;
+ }
+ case Intrinsic::aarch64_sve_st3q: {
+ SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
+ return;
+ }
+ case Intrinsic::aarch64_sve_st4q: {
+ SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
+ return;
+ }
case Intrinsic::aarch64_sve_st2: {
if (VT == MVT::nxv16i8) {
SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
@@ -6433,7 +6741,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
/// createAArch64ISelDag - This pass converts a legalized DAG into a
/// AArch64-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new AArch64DAGToDAGISel(TM, OptLevel);
}
@@ -6510,14 +6818,32 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
return getPackedVectorTypeFromPredicateType(
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
case Intrinsic::aarch64_sve_ld2_sret:
+ case Intrinsic::aarch64_sve_ld2q_sret:
return getPackedVectorTypeFromPredicateType(
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
+ case Intrinsic::aarch64_sve_st2q:
+ return getPackedVectorTypeFromPredicateType(
+ Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
case Intrinsic::aarch64_sve_ld3_sret:
+ case Intrinsic::aarch64_sve_ld3q_sret:
return getPackedVectorTypeFromPredicateType(
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
+ case Intrinsic::aarch64_sve_st3q:
+ return getPackedVectorTypeFromPredicateType(
+ Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
case Intrinsic::aarch64_sve_ld4_sret:
+ case Intrinsic::aarch64_sve_ld4q_sret:
return getPackedVectorTypeFromPredicateType(
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
+ case Intrinsic::aarch64_sve_st4q:
+ return getPackedVectorTypeFromPredicateType(
+ Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
+ case Intrinsic::aarch64_sve_ld1udq:
+ case Intrinsic::aarch64_sve_st1udq:
+ return EVT(MVT::nxv1i64);
+ case Intrinsic::aarch64_sve_ld1uwq:
+ case Intrinsic::aarch64_sve_st1uwq:
+ return EVT(MVT::nxv1i32);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6e721b937846..3882e843fb69 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -31,11 +31,15 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -50,6 +54,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -66,7 +71,6 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -76,7 +80,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstructionCost.h"
@@ -132,6 +135,10 @@ EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
"gather intrinsics"),
cl::init(true));
+static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
+ cl::desc("Combine ext and trunc to TBL"),
+ cl::init(true));
+
// All of the XOR, OR and CMP use ALU ports, and data dependency will become the
// bottleneck after this transform on high end CPU. So this max leaf node
// limitation is guard cmp+ccmp will be profitable.
@@ -148,9 +155,9 @@ static const MCPhysReg FPRArgRegs[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
AArch64::Q3, AArch64::Q4, AArch64::Q5,
AArch64::Q6, AArch64::Q7};
-const ArrayRef<MCPhysReg> llvm::AArch64::getGPRArgRegs() { return GPRArgRegs; }
+ArrayRef<MCPhysReg> llvm::AArch64::getGPRArgRegs() { return GPRArgRegs; }
-const ArrayRef<MCPhysReg> llvm::AArch64::getFPRArgRegs() { return FPRArgRegs; }
+ArrayRef<MCPhysReg> llvm::AArch64::getFPRArgRegs() { return FPRArgRegs; }
static inline EVT getPackedSVEVectorVT(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
@@ -556,10 +563,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (Subtarget->isTargetWindows())
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
- else
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+ // Lowering Funnel Shifts to EXTR
+ setOperationAction(ISD::FSHR, MVT::i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::i64, Custom);
+ setOperationAction(ISD::FSHL, MVT::i32, Custom);
+ setOperationAction(ISD::FSHL, MVT::i64, Custom);
+
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
// Constant pool entries
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
@@ -671,8 +681,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
- ISD::FEXP, ISD::FEXP2, ISD::FLOG,
- ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
+ ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
+ ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
+ ISD::STRICT_FREM,
ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) {
@@ -701,7 +712,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Round-to-integer need custom lowering for fp16, as Promote doesn't work
// because the result type is integer.
- for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
+ for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
+ ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
ISD::STRICT_LLRINT})
setOperationAction(Op, MVT::f16, Custom);
@@ -788,8 +800,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, LibCall);
+ } else {
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
+ }
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
@@ -1007,6 +1024,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::VECREDUCE_OR);
setTargetDAGCombine(ISD::VECREDUCE_XOR);
+ setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
@@ -1272,7 +1291,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget->hasSVE()) {
+ if (Subtarget->hasSVEorSME()) {
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
@@ -1336,6 +1355,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::AVGFLOORU, VT, Custom);
setOperationAction(ISD::AVGCEILS, VT, Custom);
setOperationAction(ISD::AVGCEILU, VT, Custom);
+
+ if (!Subtarget->isLittleEndian())
+ setOperationAction(ISD::BITCAST, VT, Expand);
}
// Illegal unpacked integer vector types.
@@ -1456,7 +1478,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ if (Subtarget->isSVEAvailable())
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -1470,6 +1493,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FEXP10, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
@@ -1485,6 +1509,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETONE, VT, Expand);
+
+ if (!Subtarget->isLittleEndian())
+ setOperationAction(ISD::BITCAST, VT, Expand);
}
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
@@ -1494,6 +1521,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+
+ if (!Subtarget->isLittleEndian())
+ setOperationAction(ISD::BITCAST, VT, Expand);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
@@ -1510,9 +1540,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- // NEON doesn't support across-vector reductions, but SVE does.
- for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ if (Subtarget->isSVEAvailable()) {
+ // NEON doesn't support across-vector reductions, but SVE does.
+ for (auto VT :
+ {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ }
if (!Subtarget->isNeonAvailable()) {
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);
@@ -1609,9 +1642,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ if (Subtarget->hasSVE()) {
+ setOperationAction(ISD::FLDEXP, MVT::f64, Custom);
+ setOperationAction(ISD::FLDEXP, MVT::f32, Custom);
+ setOperationAction(ISD::FLDEXP, MVT::f16, Custom);
+ }
+
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
IsStrictFPEnabled = true;
+ setMaxAtomicSizeInBitsSupported(128);
}
void AArch64TargetLowering::addTypeForNEON(MVT VT) {
@@ -1633,6 +1673,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FEXP10, VT, Expand);
}
// But we do support custom-lowering for FCOPYSIGN.
@@ -1749,6 +1790,10 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
return false;
}
+bool AArch64TargetLowering::shouldExpandCttzElements(EVT VT) const {
+ return !Subtarget->hasSVEorSME() || VT != MVT::nxv16i1;
+}
+
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
bool StreamingSVE) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
@@ -1870,7 +1915,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT,
+ StreamingSVE ? Expand : Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
@@ -2106,6 +2152,13 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Known = KnownBits::ashr(Known, Known2);
break;
}
+ case AArch64ISD::VSHL: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
+ Known = KnownBits::shl(Known, Known2);
+ break;
+ }
case AArch64ISD::MOVI: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(0));
Known =
@@ -2147,6 +2200,17 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
switch (IntNo) {
default:
break;
+ case Intrinsic::aarch64_neon_uaddlv: {
+ MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
+ unsigned BitWidth = Known.getBitWidth();
+ if (VT == MVT::v8i8 || VT == MVT::v16i8) {
+ unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
+ assert(BitWidth >= Bound && "Unexpected width!");
+ APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - Bound);
+ Known.Zero |= Mask;
+ }
+ break;
+ }
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv: {
// Figure out the datatype of the vector operand. The UMINV instruction
@@ -2271,7 +2335,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((AArch64ISD::NodeType)Opcode) {
case AArch64ISD::FIRST_NUMBER:
break;
- MAKE_CASE(AArch64ISD::OBSCURE_COPY)
MAKE_CASE(AArch64ISD::SMSTART)
MAKE_CASE(AArch64ISD::SMSTOP)
MAKE_CASE(AArch64ISD::RESTORE_ZA)
@@ -2288,6 +2351,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::CSINC)
MAKE_CASE(AArch64ISD::THREAD_POINTER)
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
+ MAKE_CASE(AArch64ISD::PROBED_ALLOCA)
MAKE_CASE(AArch64ISD::ABDS_PRED)
MAKE_CASE(AArch64ISD::ABDU_PRED)
MAKE_CASE(AArch64ISD::HADDS_PRED)
@@ -2342,6 +2406,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FCMP)
MAKE_CASE(AArch64ISD::STRICT_FCMP)
MAKE_CASE(AArch64ISD::STRICT_FCMPE)
+ MAKE_CASE(AArch64ISD::SME_ZA_LDR)
+ MAKE_CASE(AArch64ISD::SME_ZA_STR)
MAKE_CASE(AArch64ISD::DUP)
MAKE_CASE(AArch64ISD::DUPLANE8)
MAKE_CASE(AArch64ISD::DUPLANE16)
@@ -2358,7 +2424,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::BICi)
MAKE_CASE(AArch64ISD::ORRi)
MAKE_CASE(AArch64ISD::BSP)
- MAKE_CASE(AArch64ISD::EXTR)
MAKE_CASE(AArch64ISD::ZIP1)
MAKE_CASE(AArch64ISD::ZIP2)
MAKE_CASE(AArch64ISD::UZP1)
@@ -2395,6 +2460,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
+ MAKE_CASE(AArch64ISD::UADDLV)
MAKE_CASE(AArch64ISD::SDOT)
MAKE_CASE(AArch64ISD::UDOT)
MAKE_CASE(AArch64ISD::SMINV)
@@ -2512,6 +2578,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1Q_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1Q_INDEX_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
@@ -2536,6 +2604,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SST1Q_PRED)
+ MAKE_CASE(AArch64ISD::SST1Q_INDEX_PRED)
MAKE_CASE(AArch64ISD::ST1_PRED)
MAKE_CASE(AArch64ISD::SST1_PRED)
MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
@@ -2573,6 +2643,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::CALL_BTI)
MAKE_CASE(AArch64ISD::MRRS)
MAKE_CASE(AArch64ISD::MSRR)
+ MAKE_CASE(AArch64ISD::RSHRNB_I)
+ MAKE_CASE(AArch64ISD::CTTZ_ELTS)
}
#undef MAKE_CASE
return nullptr;
@@ -2647,6 +2719,22 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
}
MachineBasicBlock *
+AArch64TargetLowering::EmitDynamicProbedAlloc(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ MachineFunction &MF = *MBB->getParent();
+ MachineBasicBlock::iterator MBBI = MI.getIterator();
+ DebugLoc DL = MBB->findDebugLoc(MBBI);
+ const AArch64InstrInfo &TII =
+ *MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ Register TargetReg = MI.getOperand(0).getReg();
+ MachineBasicBlock::iterator NextInst =
+ TII.probedStackAlloc(MBBI, TargetReg, false);
+
+ MI.eraseFromParent();
+ return NextInst->getParent();
+}
+
+MachineBasicBlock *
AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -2680,6 +2768,22 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
return BB;
}
+MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode,
+ bool Op0IsDef) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineInstrBuilder MIB;
+
+ MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(MI.getOperand(0).getReg(), Op0IsDef ? RegState::Define : 0);
+ for (unsigned I = 1; I < MI.getNumOperands(); ++I)
+ MIB.add(MI.getOperand(I));
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
MachineBasicBlock *
AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
@@ -2774,6 +2878,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case AArch64::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
+
+ case AArch64::PROBED_STACKALLOC_DYN:
+ return EmitDynamicProbedAlloc(MI, BB);
+
case AArch64::LD1_MXIPXX_H_PSEUDO_B:
return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
case AArch64::LD1_MXIPXX_H_PSEUDO_H:
@@ -2796,8 +2904,14 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
case AArch64::LDR_ZA_PSEUDO:
return EmitFill(MI, BB);
+ case AArch64::LDR_TX_PSEUDO:
+ return EmitZTInstr(MI, BB, AArch64::LDR_TX, /*Op0IsDef=*/true);
+ case AArch64::STR_TX_PSEUDO:
+ return EmitZTInstr(MI, BB, AArch64::STR_TX, /*Op0IsDef=*/false);
case AArch64::ZERO_M_PSEUDO:
return EmitZero(MI, BB);
+ case AArch64::ZERO_T_PSEUDO:
+ return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
}
}
@@ -4352,11 +4466,11 @@ getConstantLaneNumOfExtractHalfOperand(SDValue &Op) {
return C->getZExtValue();
}
-static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG,
bool isSigned) {
- EVT VT = N->getValueType(0);
+ EVT VT = N.getValueType();
- if (N->getOpcode() != ISD::BUILD_VECTOR)
+ if (N.getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Elt : N->op_values()) {
@@ -4378,58 +4492,65 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
return true;
}
-static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
- if (ISD::isExtOpcode(N->getOpcode()))
- return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
- N->getOperand(0)->getValueType(0),
- N->getValueType(0),
- N->getOpcode());
+static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) {
+ EVT VT = N.getValueType();
+ assert(VT.is128BitVector() && "Unexpected vector MULL size");
- assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
- EVT VT = N->getValueType(0);
- SDLoc dl(N);
- unsigned EltSize = VT.getScalarSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
- MVT TruncVT = MVT::getIntegerVT(EltSize);
+ unsigned OrigEltSize = VT.getScalarSizeInBits();
+ unsigned EltSize = OrigEltSize / 2;
+ MVT TruncVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
+
+ APInt HiBits = APInt::getHighBitsSet(OrigEltSize, EltSize);
+ if (DAG.MaskedValueIsZero(N, HiBits))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N);
+
+ if (ISD::isExtOpcode(N.getOpcode()))
+ return addRequiredExtensionForVectorMULL(N.getOperand(0), DAG,
+ N.getOperand(0).getValueType(), VT,
+ N.getOpcode());
+
+ assert(N.getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+ SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {
- ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+ ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i));
const APInt &CInt = C->getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
- return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
+ return DAG.getBuildVector(TruncVT, dl, Ops);
}
-static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
- return N->getOpcode() == ISD::SIGN_EXTEND ||
- N->getOpcode() == ISD::ANY_EXTEND ||
+static bool isSignExtended(SDValue N, SelectionDAG &DAG) {
+ return N.getOpcode() == ISD::SIGN_EXTEND ||
+ N.getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, true);
}
-static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
- return N->getOpcode() == ISD::ZERO_EXTEND ||
- N->getOpcode() == ISD::ANY_EXTEND ||
+static bool isZeroExtended(SDValue N, SelectionDAG &DAG) {
+ return N.getOpcode() == ISD::ZERO_EXTEND ||
+ N.getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, false);
}
-static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
- unsigned Opcode = N->getOpcode();
+static bool isAddSubSExt(SDValue N, SelectionDAG &DAG) {
+ unsigned Opcode = N.getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
- SDNode *N0 = N->getOperand(0).getNode();
- SDNode *N1 = N->getOperand(1).getNode();
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
return N0->hasOneUse() && N1->hasOneUse() &&
isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
}
return false;
}
-static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
- unsigned Opcode = N->getOpcode();
+static bool isAddSubZExt(SDValue N, SelectionDAG &DAG) {
+ unsigned Opcode = N.getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
- SDNode *N0 = N->getOperand(0).getNode();
- SDNode *N1 = N->getOperand(1).getNode();
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
return N0->hasOneUse() && N1->hasOneUse() &&
isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
}
@@ -4503,7 +4624,7 @@ SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
}
-static unsigned selectUmullSmull(SDNode *&N0, SDNode *&N1, SelectionDAG &DAG,
+static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG,
SDLoc DL, bool &IsMLA) {
bool IsN0SExt = isSignExtended(N0, DAG);
bool IsN1SExt = isSignExtended(N1, DAG);
@@ -4522,12 +4643,12 @@ static unsigned selectUmullSmull(SDNode *&N0, SDNode *&N1, SelectionDAG &DAG,
!isExtendedBUILD_VECTOR(N1, DAG, false)) {
SDValue ZextOperand;
if (IsN0ZExt)
- ZextOperand = N0->getOperand(0);
+ ZextOperand = N0.getOperand(0);
else
- ZextOperand = N1->getOperand(0);
+ ZextOperand = N1.getOperand(0);
if (DAG.SignBitIsZero(ZextOperand)) {
- SDNode *NewSext =
- DAG.getSExtOrTrunc(ZextOperand, DL, N0->getValueType(0)).getNode();
+ SDValue NewSext =
+ DAG.getSExtOrTrunc(ZextOperand, DL, N0.getValueType());
if (IsN0ZExt)
N0 = NewSext;
else
@@ -4538,34 +4659,11 @@ static unsigned selectUmullSmull(SDNode *&N0, SDNode *&N1, SelectionDAG &DAG,
// Select UMULL if we can replace the other operand with an extend.
if (IsN0ZExt || IsN1ZExt) {
- EVT VT = N0->getValueType(0);
+ EVT VT = N0.getValueType();
APInt Mask = APInt::getHighBitsSet(VT.getScalarSizeInBits(),
VT.getScalarSizeInBits() / 2);
- if (DAG.MaskedValueIsZero(SDValue(IsN0ZExt ? N1 : N0, 0), Mask)) {
- EVT HalfVT;
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::v2i64:
- HalfVT = MVT::v2i32;
- break;
- case MVT::v4i32:
- HalfVT = MVT::v4i16;
- break;
- case MVT::v8i16:
- HalfVT = MVT::v8i8;
- break;
- default:
- return 0;
- }
- // Truncate and then extend the result.
- SDValue NewExt = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
- SDValue(IsN0ZExt ? N1 : N0, 0));
- NewExt = DAG.getZExtOrTrunc(NewExt, DL, VT);
- if (IsN0ZExt)
- N1 = NewExt.getNode();
- else
- N0 = NewExt.getNode();
+ if (DAG.MaskedValueIsZero(IsN0ZExt ? N1 : N0, Mask))
return AArch64ISD::UMULL;
- }
}
if (!IsN1SExt && !IsN1ZExt)
@@ -4600,18 +4698,18 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
// that VMULL can be detected. Otherwise v2i64 multiplications are not legal.
assert((VT.is128BitVector() || VT.is64BitVector()) && VT.isInteger() &&
"unexpected type for custom-lowering ISD::MUL");
- SDNode *N0 = Op.getOperand(0).getNode();
- SDNode *N1 = Op.getOperand(1).getNode();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
bool isMLA = false;
EVT OVT = VT;
if (VT.is64BitVector()) {
- if (N0->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- isNullConstant(N0->getOperand(1)) &&
- N1->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- isNullConstant(N1->getOperand(1))) {
- N0 = N0->getOperand(0).getNode();
- N1 = N1->getOperand(0).getNode();
- VT = N0->getValueType(0);
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isNullConstant(N0.getOperand(1)) &&
+ N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isNullConstant(N1.getOperand(1))) {
+ N0 = N0.getOperand(0);
+ N1 = N1.getOperand(0);
+ VT = N0.getValueType();
} else {
if (VT == MVT::v1i64) {
if (Subtarget->hasSVE())
@@ -4655,12 +4753,12 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
// Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
// isel lowering to take advantage of no-stall back to back s/umul + s/umla.
// This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
- SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
- SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
+ SDValue N00 = skipExtensionForVectorMULL(N0.getOperand(0), DAG);
+ SDValue N01 = skipExtensionForVectorMULL(N0.getOperand(1), DAG);
EVT Op1VT = Op1.getValueType();
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, DL, OVT,
- DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(N0.getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
DAG.getNode(NewOpc, DL, VT,
@@ -4780,15 +4878,88 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain,
Mask);
}
-static std::optional<SMEAttrs> getCalleeAttrsFromExternalFunction(SDValue V) {
- if (auto *ES = dyn_cast<ExternalSymbolSDNode>(V)) {
- StringRef S(ES->getSymbol());
- if (S == "__arm_sme_state" || S == "__arm_tpidr2_save")
- return SMEAttrs(SMEAttrs::SM_Compatible | SMEAttrs::ZA_Preserved);
- if (S == "__arm_tpidr2_restore")
- return SMEAttrs(SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared);
+// Lower an SME LDR/STR ZA intrinsic
+// Case 1: If the vector number (vecnum) is an immediate in range, it gets
+// folded into the instruction
+// ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11]
+// Case 2: If the vecnum is not an immediate, then it is used to modify the base
+// and tile slice registers
+// ldr(%tileslice, %ptr, %vecnum)
+// ->
+// %svl = rdsvl
+// %ptr2 = %ptr + %svl * %vecnum
+// %tileslice2 = %tileslice + %vecnum
+// ldr [%tileslice2, 0], [%ptr2, 0]
+// Case 3: If the vecnum is an immediate out of range, then the same is done as
+// case 2, but the base and slice registers are modified by the greatest
+// multiple of 15 lower than the vecnum and the remainder is folded into the
+// instruction. This means that successive loads and stores that are offset from
+// each other can share the same base and slice register updates.
+// ldr(%tileslice, %ptr, 22)
+// ldr(%tileslice, %ptr, 23)
+// ->
+// %svl = rdsvl
+// %ptr2 = %ptr + %svl * 15
+// %tileslice2 = %tileslice + 15
+// ldr [%tileslice2, 7], [%ptr2, 7]
+// ldr [%tileslice2, 8], [%ptr2, 8]
+// Case 4: If the vecnum is an add of an immediate, then the non-immediate
+// operand and the immediate can be folded into the instruction, like case 2.
+// ldr(%tileslice, %ptr, %vecnum + 7)
+// ldr(%tileslice, %ptr, %vecnum + 8)
+// ->
+// %svl = rdsvl
+// %ptr2 = %ptr + %svl * %vecnum
+// %tileslice2 = %tileslice + %vecnum
+// ldr [%tileslice2, 7], [%ptr2, 7]
+// ldr [%tileslice2, 8], [%ptr2, 8]
+// Case 5: The vecnum being an add of an immediate out of range is also handled,
+// in which case the same remainder logic as case 3 is used.
+SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
+ SDLoc DL(N);
+
+ SDValue TileSlice = N->getOperand(2);
+ SDValue Base = N->getOperand(3);
+ SDValue VecNum = N->getOperand(4);
+ int32_t ConstAddend = 0;
+ SDValue VarAddend = VecNum;
+
+ // If the vnum is an add of an immediate, we can fold it into the instruction
+ if (VecNum.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(VecNum.getOperand(1))) {
+ ConstAddend = cast<ConstantSDNode>(VecNum.getOperand(1))->getSExtValue();
+ VarAddend = VecNum.getOperand(0);
+ } else if (auto ImmNode = dyn_cast<ConstantSDNode>(VecNum)) {
+ ConstAddend = ImmNode->getSExtValue();
+ VarAddend = SDValue();
+ }
+
+ int32_t ImmAddend = ConstAddend % 16;
+ if (int32_t C = (ConstAddend - ImmAddend)) {
+ SDValue CVal = DAG.getTargetConstant(C, DL, MVT::i32);
+ VarAddend = VarAddend
+ ? DAG.getNode(ISD::ADD, DL, MVT::i32, {VarAddend, CVal})
+ : CVal;
+ }
+
+ if (VarAddend) {
+ // Get the vector length that will be multiplied by vnum
+ auto SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+ DAG.getConstant(1, DL, MVT::i32));
+
+ // Multiply SVL and vnum then add it to the base
+ SDValue Mul = DAG.getNode(
+ ISD::MUL, DL, MVT::i64,
+ {SVL, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, VarAddend)});
+ Base = DAG.getNode(ISD::ADD, DL, MVT::i64, {Base, Mul});
+ // Just add vnum to the tileslice
+ TileSlice = DAG.getNode(ISD::ADD, DL, MVT::i32, {TileSlice, VarAddend});
}
- return std::nullopt;
+
+ return DAG.getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR,
+ DL, MVT::Other,
+ {/*Chain=*/N.getOperand(0), TileSlice, Base,
+ DAG.getTargetConstant(ImmAddend, DL, MVT::i32)});
}
SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
@@ -4814,6 +4985,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain,
DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr);
}
+ case Intrinsic::aarch64_sme_str:
+ case Intrinsic::aarch64_sme_ldr: {
+ return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr);
+ }
case Intrinsic::aarch64_sme_za_enable:
return DAG.getNode(
AArch64ISD::SMSTART, DL, MVT::Other,
@@ -5289,6 +5464,27 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::aarch64_neon_uaddlv: {
+ EVT OpVT = Op.getOperand(1).getValueType();
+ EVT ResVT = Op.getValueType();
+ if (ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
+ OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) {
+ // In order to avoid insert_subvector, used v4i32 than v2i32.
+ SDValue UADDLV =
+ DAG.getNode(AArch64ISD::UADDLV, dl, MVT::v4i32, Op.getOperand(1));
+ SDValue EXTRACT_VEC_ELT =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, UADDLV,
+ DAG.getConstant(0, dl, MVT::i64));
+ return EXTRACT_VEC_ELT;
+ }
+ return SDValue();
+ }
+ case Intrinsic::experimental_cttz_elts: {
+ SDValue NewCttzElts =
+ DAG.getNode(AArch64ISD::CTTZ_ELTS, dl, MVT::i64, Op.getOperand(1));
+
+ return DAG.getZExtOrTrunc(NewCttzElts, dl, Op.getValueType());
+ }
}
}
@@ -5301,8 +5497,9 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
return false;
}
-bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
EVT DataVT) const {
+ const EVT IndexVT = Extend.getOperand(0).getValueType();
// SVE only supports implicit extension of 32-bit indices.
if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
return false;
@@ -5317,8 +5514,33 @@ bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
}
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
- return ExtVal.getValueType().isScalableVector() ||
- Subtarget->useSVEForFixedLengthVectors();
+ EVT ExtVT = ExtVal.getValueType();
+ if (!ExtVT.isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
+ return false;
+
+ // It may be worth creating extending masked loads if there are multiple
+ // masked loads using the same predicate. That way we'll end up creating
+ // extending masked loads that may then get split by the legaliser. This
+ // results in just one set of predicate unpacks at the start, instead of
+ // multiple sets of vector unpacks after each load.
+ if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal->getOperand(0))) {
+ if (!isLoadExtLegalOrCustom(ISD::ZEXTLOAD, ExtVT, Ld->getValueType(0))) {
+ // Disable extending masked loads for fixed-width for now, since the code
+ // quality doesn't look great.
+ if (!ExtVT.isScalableVector())
+ return false;
+
+ unsigned NumExtMaskedLoads = 0;
+ for (auto *U : Ld->getMask()->uses())
+ if (isa<MaskedLoadSDNode>(U))
+ NumExtMaskedLoads++;
+
+ if (NumExtMaskedLoads <= 1)
+ return false;
+ }
+ }
+
+ return true;
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
@@ -5552,9 +5774,7 @@ SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(LoadNode && "Expected custom lowering of a masked load node");
EVT VT = Op->getValueType(0);
- if (useSVEForFixedLengthVectorVT(
- VT,
- /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
+ if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
SDValue PassThru = LoadNode->getPassThru();
@@ -5644,11 +5864,11 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
// legalization will break up 256 bit inputs.
ElementCount EC = MemVT.getVectorElementCount();
if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
- EC.isKnownEven() &&
- ((MemVT.getScalarSizeInBits() == 8u ||
- MemVT.getScalarSizeInBits() == 16u ||
- MemVT.getScalarSizeInBits() == 32u ||
- MemVT.getScalarSizeInBits() == 64u))) {
+ EC.isKnownEven() && DAG.getDataLayout().isLittleEndian() &&
+ (MemVT.getScalarSizeInBits() == 8u ||
+ MemVT.getScalarSizeInBits() == 16u ||
+ MemVT.getScalarSizeInBits() == 32u ||
+ MemVT.getScalarSizeInBits() == 64u)) {
SDValue Lo =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
@@ -5701,12 +5921,15 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
- SDValue Value = StoreNode->getOpcode() == ISD::STORE
+ SDValue Value = (StoreNode->getOpcode() == ISD::STORE ||
+ StoreNode->getOpcode() == ISD::ATOMIC_STORE)
? StoreNode->getOperand(1)
: StoreNode->getOperand(2);
SDLoc DL(Op);
auto StoreValue = DAG.SplitScalar(Value, DL, MVT::i64, MVT::i64);
unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(StoreValue.first, StoreValue.second);
SDValue Result = DAG.getMemIntrinsicNode(
Opcode, DL, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), StoreValue.first, StoreValue.second,
@@ -5803,6 +6026,73 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
+// Treat FSHR with constant shifts as legal operation, otherwise it is expanded
+// FSHL is converted to FSHR before deciding what to do with it
+static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
+ SDValue Shifts = Op.getOperand(2);
+ // Check if the shift amount is a constant
+ // If opcode is FSHL, convert it to FSHR
+ if (auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ if (Op.getOpcode() == ISD::FSHL) {
+ unsigned int NewShiftNo =
+ VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
+ return DAG.getNode(
+ ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
+ DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
+ } else if (Op.getOpcode() == ISD::FSHR) {
+ return Op;
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
+ SDValue X = Op.getOperand(0);
+ EVT XScalarTy = X.getValueType();
+ SDValue Exp = Op.getOperand(1);
+
+ SDLoc DL(Op);
+ EVT XVT, ExpVT;
+ switch (Op.getSimpleValueType().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::f16:
+ X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
+ [[fallthrough]];
+ case MVT::f32:
+ XVT = MVT::nxv4f32;
+ ExpVT = MVT::nxv4i32;
+ break;
+ case MVT::f64:
+ XVT = MVT::nxv2f64;
+ ExpVT = MVT::nxv2i64;
+ Exp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Exp);
+ break;
+ }
+
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ SDValue VX =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, XVT, DAG.getUNDEF(XVT), X, Zero);
+ SDValue VExp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpVT,
+ DAG.getUNDEF(ExpVT), Exp, Zero);
+ SDValue VPg = getPTrue(DAG, DL, XVT.changeVectorElementType(MVT::i1),
+ AArch64SVEPredPattern::all);
+ SDValue FScale =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XVT,
+ DAG.getConstant(Intrinsic::aarch64_sve_fscale, DL, MVT::i64),
+ VPg, VX, VExp);
+ SDValue Final =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), FScale, Zero);
+ if (X.getValueType() != XScalarTy)
+ Final = DAG.getNode(ISD::FP_ROUND, DL, XScalarTy, Final,
+ DAG.getIntPtrConstant(1, SDLoc(Op)));
+ return Final;
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6010,8 +6300,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
return LowerVECREDUCE(Op, DAG);
- case ISD::ATOMIC_LOAD_SUB:
- return LowerATOMIC_LOAD_SUB(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
@@ -6084,6 +6372,16 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerVECTOR_DEINTERLEAVE(Op, DAG);
case ISD::VECTOR_INTERLEAVE:
return LowerVECTOR_INTERLEAVE(Op, DAG);
+ case ISD::LROUND:
+ case ISD::LLROUND:
+ case ISD::LRINT:
+ case ISD::LLRINT: {
+ assert(Op.getOperand(0).getValueType() == MVT::f16 &&
+ "Expected custom lowering of rounding operations only for f16");
+ SDLoc DL(Op);
+ SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
+ }
case ISD::STRICT_LROUND:
case ISD::STRICT_LLROUND:
case ISD::STRICT_LRINT:
@@ -6112,6 +6410,11 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return Result;
}
+ case ISD::FSHL:
+ case ISD::FSHR:
+ return LowerFunnelShift(Op, DAG);
+ case ISD::FLDEXP:
+ return LowerFLDEXP(Op, DAG);
}
}
@@ -6141,9 +6444,9 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
break;
}
- // All SVE implementations support NEON sized vectors.
+ // NEON-sized vectors can be emulated using SVE instructions.
if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
- return Subtarget->hasSVE();
+ return Subtarget->hasSVEorSME();
// Ensure NEON MVTs only belong to a single register class.
if (VT.getFixedSizeInBits() <= 128)
@@ -6205,8 +6508,6 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
- case CallingConv::WebKit_JS:
- return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
return CC_AArch64_GHC;
case CallingConv::C:
@@ -6217,10 +6518,14 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
- if (Subtarget->isTargetWindows() && IsVarArg) {
- if (Subtarget->isWindowsArm64EC())
- return CC_AArch64_Arm64EC_VarArg;
- return CC_AArch64_Win64_VarArg;
+ case CallingConv::GRAAL:
+ if (Subtarget->isTargetWindows()) {
+ if (IsVarArg) {
+ if (Subtarget->isWindowsArm64EC())
+ return CC_AArch64_Arm64EC_VarArg;
+ return CC_AArch64_Win64_VarArg;
+ }
+ return CC_AArch64_Win64PCS;
}
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
@@ -6234,7 +6539,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
return CC_AArch64_Arm64EC_VarArg;
return CC_AArch64_Win64_VarArg;
}
- return CC_AArch64_AAPCS;
+ return CC_AArch64_Win64PCS;
case CallingConv::CFGuard_Check:
return CC_AArch64_Win64_CFGuard_Check;
case CallingConv::AArch64_VectorCall:
@@ -6247,8 +6552,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
CCAssignFn *
AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
- return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
- : RetCC_AArch64_AAPCS;
+ return RetCC_AArch64_AAPCS;
}
@@ -6278,6 +6582,17 @@ AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
Chain = DAG.getStore(Chain, DL, Buffer, Ptr, MPI);
+ // Set the reserved bytes (10-15) to zero
+ EVT PtrTy = Ptr.getValueType();
+ SDValue ReservedPtr =
+ DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(10, DL, PtrTy));
+ Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i16), ReservedPtr,
+ MPI);
+ ReservedPtr =
+ DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(12, DL, PtrTy));
+ Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i32), ReservedPtr,
+ MPI);
+
return TPIDR2Obj;
}
@@ -6573,13 +6888,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Insert the SMSTART if this is a locally streaming function and
// make sure it is Glued to the last CopyFromReg value.
if (IsLocallyStreaming) {
- const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
- Chain = DAG.getNode(
- AArch64ISD::SMSTART, DL, DAG.getVTList(MVT::Other, MVT::Glue),
- {DAG.getRoot(),
- DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64),
- DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()), Glue});
+ Chain =
+ changeStreamingMode(DAG, DL, /*Enable*/ true, DAG.getRoot(), Glue,
+ DAG.getConstant(0, DL, MVT::i64), /*Entry*/ true);
+
// Ensure that the SMSTART happens after the CopyWithChain such that its
// chain result is used.
for (unsigned I=0; I<InVals.size(); ++I) {
@@ -6921,7 +7233,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
SMEAttrs CallerAttrs(MF.getFunction());
auto CalleeAttrs = CLI.CB ? SMEAttrs(*CLI.CB) : SMEAttrs(SMEAttrs::Normal);
if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
- CallerAttrs.requiresLazySave(CalleeAttrs))
+ CallerAttrs.requiresLazySave(CalleeAttrs) ||
+ CallerAttrs.hasStreamingBody())
return false;
// Functions using the C or Fast calling convention that have an SVE signature
@@ -7104,9 +7417,29 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
return ZExtBool;
}
+void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const {
+ // Live-in physreg copies that are glued to SMSTART are applied as
+ // implicit-def's in the InstrEmitter. Here we remove them, allowing the
+ // register allocator to pass call args in callee saved regs, without extra
+ // copies to avoid these fake clobbers of actually-preserved GPRs.
+ if (MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
+ MI.getOpcode() == AArch64::MSRpstatePseudo)
+ for (unsigned I = MI.getNumOperands() - 1; I > 0; --I)
+ if (MachineOperand &MO = MI.getOperand(I);
+ MO.isReg() && MO.isImplicit() && MO.isDef() &&
+ (AArch64::GPR32RegClass.contains(MO.getReg()) ||
+ AArch64::GPR64RegClass.contains(MO.getReg())))
+ MI.removeOperand(I);
+}
+
SDValue AArch64TargetLowering::changeStreamingMode(
SelectionDAG &DAG, SDLoc DL, bool Enable,
SDValue Chain, SDValue InGlue, SDValue PStateSM, bool Entry) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ FuncInfo->setHasStreamingModeChanges(true);
+
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask());
SDValue MSROp =
@@ -7253,39 +7586,79 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SMEAttrs CalleeAttrs, CallerAttrs(MF.getFunction());
if (CLI.CB)
CalleeAttrs = SMEAttrs(*CLI.CB);
- else if (std::optional<SMEAttrs> Attrs =
- getCalleeAttrsFromExternalFunction(CLI.Callee))
- CalleeAttrs = *Attrs;
+ else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
+ CalleeAttrs = SMEAttrs(ES->getSymbol());
+
+ auto DescribeCallsite =
+ [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
+ R << "call from '" << ore::NV("Caller", MF.getName()) << "' to '";
+ if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
+ R << ore::NV("Callee", ES->getSymbol());
+ else if (CLI.CB && CLI.CB->getCalledFunction())
+ R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
+ else
+ R << "unknown callee";
+ R << "'";
+ return R;
+ };
bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
-
- MachineFrameInfo &MFI = MF.getFrameInfo();
if (RequiresLazySave) {
- // Set up a lazy save mechanism by storing the runtime live slices
- // (worst-case N*N) to the TPIDR2 stack object.
- SDValue N = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
- DAG.getConstant(1, DL, MVT::i32));
- SDValue NN = DAG.getNode(ISD::MUL, DL, MVT::i64, N, N);
- unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj();
+ SDValue NumZaSaveSlices;
+ if (!CalleeAttrs.preservesZA()) {
+ // Set up a lazy save mechanism by storing the runtime live slices
+ // (worst-case SVL) to the TPIDR2 stack object.
+ NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+ DAG.getConstant(1, DL, MVT::i32));
+ } else if (CalleeAttrs.preservesZA()) {
+ NumZaSaveSlices = DAG.getConstant(0, DL, MVT::i64);
+ }
+ unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj();
MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj);
SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj,
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
- SDValue BufferPtrAddr =
+ SDValue NumZaSaveSlicesAddr =
DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr,
DAG.getConstant(8, DL, TPIDR2ObjAddr.getValueType()));
- Chain = DAG.getTruncStore(Chain, DL, NN, BufferPtrAddr, MPI, MVT::i16);
+ Chain = DAG.getTruncStore(Chain, DL, NumZaSaveSlices, NumZaSaveSlicesAddr,
+ MPI, MVT::i16);
Chain = DAG.getNode(
ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
TPIDR2ObjAddr);
+ OptimizationRemarkEmitter ORE(&MF.getFunction());
+ ORE.emit([&]() {
+ auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
+ CLI.CB)
+ : OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
+ &MF.getFunction());
+ DescribeCallsite(R) << " sets up a lazy save for ZA";
+ if (CalleeAttrs.preservesZA())
+ R << ", but callee preserves ZA, so we request 0 slices to be saved";
+ else
+ R << ", and we request that all slices be saved";
+ R << ore::setExtraArgs()
+ << ore::NV("CalleePreservesZA", CalleeAttrs.preservesZA());
+ return R;
+ });
}
SDValue PStateSM;
std::optional<bool> RequiresSMChange =
CallerAttrs.requiresSMChange(CalleeAttrs);
- if (RequiresSMChange)
+ if (RequiresSMChange) {
PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64);
+ OptimizationRemarkEmitter ORE(&MF.getFunction());
+ ORE.emit([&]() {
+ auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition",
+ CLI.CB)
+ : OptimizationRemarkAnalysis("sme", "SMETransition",
+ &MF.getFunction());
+ DescribeCallsite(R) << " requires a streaming mode transition";
+ return R;
+ });
+ }
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
@@ -7381,6 +7754,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
if (isScalable)
MFI.setStackID(FI, TargetStackID::ScalableVector);
@@ -7451,11 +7825,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
return ArgReg.Reg == VA.getLocReg();
});
} else {
- // Add an extra level of indirection for streaming mode changes by
- // using a pseudo copy node that cannot be rematerialised between a
- // smstart/smstop and the call by the simple register coalescer.
- if (RequiresSMChange && isa<FrameIndexSDNode>(Arg))
- Arg = DAG.getNode(AArch64ISD::OBSCURE_COPY, DL, MVT::i64, Arg);
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
const TargetOptions &Options = DAG.getTarget().Options;
@@ -7702,35 +8071,34 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
if (RequiresLazySave) {
- // Unconditionally resume ZA.
- Result = DAG.getNode(
- AArch64ISD::SMSTART, DL, MVT::Other, Result,
- DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
-
- // Conditionally restore the lazy save using a pseudo node.
- unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
- SDValue RegMask = DAG.getRegisterMask(
- TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
- SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
- "__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout()));
- SDValue TPIDR2_EL0 = DAG.getNode(
- ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result,
- DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
-
- // Copy the address of the TPIDR2 block into X0 before 'calling' the
- // RESTORE_ZA pseudo.
- SDValue Glue;
- SDValue TPIDR2Block = DAG.getFrameIndex(
- FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
- Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
- Result = DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
- {Result, TPIDR2_EL0,
- DAG.getRegister(AArch64::X0, MVT::i64),
- RestoreRoutine,
- RegMask,
- Result.getValue(1)});
-
+ if (!CalleeAttrs.preservesZA()) {
+ // Unconditionally resume ZA.
+ Result = DAG.getNode(
+ AArch64ISD::SMSTART, DL, MVT::Other, Result,
+ DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+
+ // Conditionally restore the lazy save using a pseudo node.
+ unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
+ SDValue RegMask = DAG.getRegisterMask(
+ TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
+ SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
+ "__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout()));
+ SDValue TPIDR2_EL0 = DAG.getNode(
+ ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result,
+ DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
+
+ // Copy the address of the TPIDR2 block into X0 before 'calling' the
+ // RESTORE_ZA pseudo.
+ SDValue Glue;
+ SDValue TPIDR2Block = DAG.getFrameIndex(
+ FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
+ Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
+ Result = DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
+ {Result, TPIDR2_EL0,
+ DAG.getRegister(AArch64::X0, MVT::i64),
+ RestoreRoutine, RegMask, Result.getValue(1)});
+ }
// Finally reset the TPIDR2_EL0 register to 0.
Result = DAG.getNode(
ISD::INTRINSIC_VOID, DL, MVT::Other, Result,
@@ -7833,11 +8201,9 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Emit SMSTOP before returning from a locally streaming function
SMEAttrs FuncAttrs(MF.getFunction());
if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
- Chain = DAG.getNode(
- AArch64ISD::SMSTOP, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain,
- DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32),
- DAG.getConstant(1, DL, MVT::i64), DAG.getConstant(0, DL, MVT::i64),
- DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()));
+ Chain = changeStreamingMode(
+ DAG, DL, /*Enable*/ false, Chain, /*Glue*/ SDValue(),
+ DAG.getConstant(1, DL, MVT::i64), /*Entry*/ true);
Glue = Chain.getValue(1);
}
@@ -7987,7 +8353,8 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
}
SDValue Result;
- if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ if (getTargetMachine().getCodeModel() == CodeModel::Large &&
+ !getTargetMachine().isPositionIndependent()) {
Result = getAddrLarge(GN, DAG, OpFlags);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
Result = getAddrTiny(GN, DAG, OpFlags);
@@ -8645,9 +9012,9 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
- SDValue UaddLV = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
- DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ SDValue UaddLV = DAG.getNode(AArch64ISD::UADDLV, DL, MVT::v4i32, CtPop);
+ UaddLV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, UaddLV,
+ DAG.getConstant(0, DL, MVT::i64));
if (IsParity)
UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
@@ -8660,9 +9027,9 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
- SDValue UaddLV = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
- DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ SDValue UaddLV = DAG.getNode(AArch64ISD::UADDLV, DL, MVT::v4i32, CtPop);
+ UaddLV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, UaddLV,
+ DAG.getConstant(0, DL, MVT::i64));
if (IsParity)
UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
@@ -9352,12 +9719,12 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
// is necessary here. Just get the address of the jump table.
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- if (getTargetMachine().getCodeModel() == CodeModel::Large &&
- !Subtarget->isTargetMachO()) {
+ CodeModel::Model CM = getTargetMachine().getCodeModel();
+ if (CM == CodeModel::Large && !getTargetMachine().isPositionIndependent() &&
+ !Subtarget->isTargetMachO())
return getAddrLarge(JT, DAG);
- } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ if (CM == CodeModel::Tiny)
return getAddrTiny(JT, DAG);
- }
return getAddr(JT, DAG);
}
@@ -9376,34 +9743,35 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
SDNode *Dest =
DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
- return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
- SDValue(Dest, 0));
+ SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Op.getOperand(0), DL);
+ return DAG.getNode(ISD::BRIND, DL, MVT::Other, JTInfo, SDValue(Dest, 0));
}
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-
- if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ CodeModel::Model CM = getTargetMachine().getCodeModel();
+ if (CM == CodeModel::Large) {
// Use the GOT for the large code model on iOS.
if (Subtarget->isTargetMachO()) {
return getGOT(CP, DAG);
}
- return getAddrLarge(CP, DAG);
- } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ if (!getTargetMachine().isPositionIndependent())
+ return getAddrLarge(CP, DAG);
+ } else if (CM == CodeModel::Tiny) {
return getAddrTiny(CP, DAG);
- } else {
- return getAddr(CP, DAG);
}
+ return getAddr(CP, DAG);
}
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
- if (getTargetMachine().getCodeModel() == CodeModel::Large &&
- !Subtarget->isTargetMachO()) {
- return getAddrLarge(BA, DAG);
- } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ CodeModel::Model CM = getTargetMachine().getCodeModel();
+ if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) {
+ if (!getTargetMachine().isPositionIndependent())
+ return getAddrLarge(BA, DAG);
+ } else if (CM == CodeModel::Tiny) {
return getAddrTiny(BA, DAG);
}
return getAddr(BA, DAG);
@@ -9674,9 +10042,10 @@ Register AArch64TargetLowering::
getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
Register Reg = MatchRegisterName(RegName);
if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
- const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
+ const AArch64RegisterInfo *MRI = Subtarget->getRegisterInfo();
unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
- if (!Subtarget->isXRegisterReserved(DwarfRegNum))
+ if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
+ !MRI->isReservedReg(MF, Reg))
Reg = 0;
}
if (Reg)
@@ -9945,19 +10314,61 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
return "r";
}
-enum PredicateConstraint {
- Upl,
- Upa,
- Invalid
-};
+enum class PredicateConstraint { Uph, Upl, Upa };
+
+static std::optional<PredicateConstraint>
+parsePredicateConstraint(StringRef Constraint) {
+ return StringSwitch<std::optional<PredicateConstraint>>(Constraint)
+ .Case("Uph", PredicateConstraint::Uph)
+ .Case("Upl", PredicateConstraint::Upl)
+ .Case("Upa", PredicateConstraint::Upa)
+ .Default(std::nullopt);
+}
+
+static const TargetRegisterClass *
+getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT) {
+ if (VT != MVT::aarch64svcount &&
+ (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1))
+ return nullptr;
-static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
- PredicateConstraint P = PredicateConstraint::Invalid;
- if (Constraint == "Upa")
- P = PredicateConstraint::Upa;
- if (Constraint == "Upl")
- P = PredicateConstraint::Upl;
- return P;
+ switch (Constraint) {
+ case PredicateConstraint::Uph:
+ return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
+ : &AArch64::PPR_p8to15RegClass;
+ case PredicateConstraint::Upl:
+ return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
+ : &AArch64::PPR_3bRegClass;
+ case PredicateConstraint::Upa:
+ return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
+ : &AArch64::PPRRegClass;
+ }
+
+ llvm_unreachable("Missing PredicateConstraint!");
+}
+
+enum class ReducedGprConstraint { Uci, Ucj };
+
+static std::optional<ReducedGprConstraint>
+parseReducedGprConstraint(StringRef Constraint) {
+ return StringSwitch<std::optional<ReducedGprConstraint>>(Constraint)
+ .Case("Uci", ReducedGprConstraint::Uci)
+ .Case("Ucj", ReducedGprConstraint::Ucj)
+ .Default(std::nullopt);
+}
+
+static const TargetRegisterClass *
+getReducedGprRegisterClass(ReducedGprConstraint Constraint, EVT VT) {
+ if (!VT.isScalarInteger() || VT.getFixedSizeInBits() > 64)
+ return nullptr;
+
+ switch (Constraint) {
+ case ReducedGprConstraint::Uci:
+ return &AArch64::MatrixIndexGPR32_8_11RegClass;
+ case ReducedGprConstraint::Ucj:
+ return &AArch64::MatrixIndexGPR32_12_15RegClass;
+ }
+
+ llvm_unreachable("Missing ReducedGprConstraint!");
}
// The set of cc code supported is from
@@ -10055,9 +10466,10 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
case 'S': // A symbolic address
return C_Other;
}
- } else if (parsePredicateConstraint(Constraint) !=
- PredicateConstraint::Invalid)
- return C_RegisterClass;
+ } else if (parsePredicateConstraint(Constraint))
+ return C_RegisterClass;
+ else if (parseReducedGprConstraint(Constraint))
+ return C_RegisterClass;
else if (parseConstraintCode(Constraint) != AArch64CC::Invalid)
return C_Other;
return TargetLowering::getConstraintType(Constraint);
@@ -10091,7 +10503,8 @@ AArch64TargetLowering::getSingleConstraintMatchWeight(
weight = CW_Constant;
break;
case 'U':
- if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
+ if (parsePredicateConstraint(constraint) ||
+ parseReducedGprConstraint(constraint))
weight = CW_Register;
break;
}
@@ -10148,14 +10561,13 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
break;
}
} else {
- PredicateConstraint PC = parsePredicateConstraint(Constraint);
- if (PC != PredicateConstraint::Invalid) {
- if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
- return std::make_pair(0U, nullptr);
- bool restricted = (PC == PredicateConstraint::Upl);
- return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
- : std::make_pair(0U, &AArch64::PPRRegClass);
- }
+ if (const auto PC = parsePredicateConstraint(Constraint))
+ if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
+ return std::make_pair(0U, RegClass);
+
+ if (const auto RGC = parseReducedGprConstraint(Constraint))
+ if (const auto *RegClass = getReducedGprRegisterClass(*RGC, VT))
+ return std::make_pair(0U, RegClass);
}
if (StringRef("{cc}").equals_insensitive(Constraint) ||
parseConstraintCode(Constraint) != AArch64CC::Invalid)
@@ -10208,12 +10620,12 @@ EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void AArch64TargetLowering::LowerAsmOperandForConstraint(
- SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Currently only support length 1 constraints.
- if (Constraint.length() != 1)
+ if (Constraint.size() != 1)
return;
char ConstraintLetter = Constraint[0];
@@ -10626,7 +11038,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
EVT SrcVT = Src.ShuffleVec.getValueType();
TypeSize SrcVTSize = SrcVT.getSizeInBits();
- if (SrcVTSize == TypeSize::Fixed(VTSize))
+ if (SrcVTSize == TypeSize::getFixed(VTSize))
continue;
// This stage of the search produces a source with the same element type as
@@ -10700,7 +11112,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (SrcEltTy == SmallestEltTy)
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
- Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ if (DAG.getDataLayout().isBigEndian()) {
+ Src.ShuffleVec =
+ DAG.getNode(AArch64ISD::NVCAST, dl, ShuffleVT, Src.ShuffleVec);
+ } else {
+ Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ }
Src.WindowScale =
SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
Src.WindowBase *= Src.WindowScale;
@@ -10752,7 +11169,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], Mask);
- SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ SDValue V;
+ if (DAG.getDataLayout().isBigEndian()) {
+ V = DAG.getNode(AArch64ISD::NVCAST, dl, VT, Shuffle);
+ } else {
+ V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ }
LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
dbgs() << "Reshuffle, creating node: "; V.dump(););
@@ -11220,7 +11642,7 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
};
// For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
- // get the lane to move from from the PFID, which is always from the
+ // get the lane to move from the PFID, which is always from the
// original vectors (V1 or V2).
SDValue OpLHS = GeneratePerfectShuffle(
LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
@@ -11700,7 +12122,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
(VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
- ShuffleVectorInst::isReverseMask(ShuffleMask)) {
+ ShuffleVectorInst::isReverseMask(ShuffleMask, ShuffleMask.size())) {
SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
DAG.getConstant(8, dl, MVT::i32));
@@ -13284,11 +13706,17 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
"function only supposed to emit natural comparisons");
+ APInt SplatValue;
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
- APInt CnstBits(VT.getSizeInBits(), 0);
- APInt UndefBits(VT.getSizeInBits(), 0);
- bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
- bool IsZero = IsCnst && (CnstBits == 0);
+ bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ bool IsZero = IsCnst && SplatValue == 0;
+ bool IsOne = IsCnst && SplatValue == 1;
+ bool IsMinusOne = IsCnst && SplatValue.isAllOnes();
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
@@ -13357,6 +13785,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
case AArch64CC::GT:
if (IsZero)
return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
+ if (IsMinusOne)
+ return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS, RHS);
return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
case AArch64CC::LE:
if (IsZero)
@@ -13369,6 +13799,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
case AArch64CC::LT:
if (IsZero)
return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
+ if (IsOne)
+ return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
case AArch64CC::HI:
return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
@@ -13630,23 +14062,6 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
}
}
-SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
- SelectionDAG &DAG) const {
- auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
- if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
- return SDValue();
-
- // LSE has an atomic load-add instruction, but not a load-sub.
- SDLoc dl(Op);
- MVT VT = Op.getSimpleValueType();
- SDValue RHS = Op.getOperand(2);
- AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
- RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
- return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
- Op.getOperand(0), Op.getOperand(1), RHS,
- AN->getMemOperand());
-}
-
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
@@ -13666,9 +14081,34 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
AN->getMemOperand());
}
-SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
- SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+
SDLoc dl(Op);
+ // Get the inputs.
+ SDNode *Node = Op.getNode();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ MaybeAlign Align =
+ cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
+ EVT VT = Node->getValueType(0);
+
+ if (DAG.getMachineFunction().getFunction().hasFnAttribute(
+ "no-stack-arg-probe")) {
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
+ Chain = SP.getValue(1);
+ SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
+ if (Align)
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
+ SDValue Ops[2] = {SP, Chain};
+ return DAG.getMergeValues(Ops, dl);
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
+
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
PtrVT, 0);
@@ -13692,7 +14132,59 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));
- return Chain;
+
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
+ Chain = SP.getValue(1);
+ SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
+ if (Align)
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
+
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
+
+ SDValue Ops[2] = {SP, Chain};
+ return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue
+AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Get the inputs.
+ SDNode *Node = Op.getNode();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+
+ MaybeAlign Align =
+ cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
+ SDLoc dl(Op);
+ EVT VT = Node->getValueType(0);
+
+ // Construct the new SP value in a GPR.
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
+ Chain = SP.getValue(1);
+ SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
+ if (Align)
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
+
+ // Set the real SP to the new value with a probing loop.
+ Chain = DAG.getNode(AArch64ISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
+ SDValue Ops[2] = {SP, Chain};
+ return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue
+AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ if (Subtarget->isTargetWindows())
+ return LowerWindowsDYNAMIC_STACKALLOC(Op, DAG);
+ else if (hasInlineStackProbe(MF))
+ return LowerInlineDYNAMIC_STACKALLOC(Op, DAG);
+ else
+ return SDValue();
}
// When x and y are extended, lower:
@@ -13746,51 +14238,6 @@ SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, dl, VT, Add, tmp);
}
-SDValue
-AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Subtarget->isTargetWindows() &&
- "Only Windows alloca probing supported");
- SDLoc dl(Op);
- // Get the inputs.
- SDNode *Node = Op.getNode();
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- MaybeAlign Align =
- cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
- EVT VT = Node->getValueType(0);
-
- if (DAG.getMachineFunction().getFunction().hasFnAttribute(
- "no-stack-arg-probe")) {
- SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
- Chain = SP.getValue(1);
- SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
- if (Align)
- SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
- DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
- Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
- SDValue Ops[2] = {SP, Chain};
- return DAG.getMergeValues(Ops, dl);
- }
-
- Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
-
- Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
-
- SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
- Chain = SP.getValue(1);
- SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
- if (Align)
- SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
- DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
- Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
-
- Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
-
- SDValue Ops[2] = {SP, Chain};
- return DAG.getMergeValues(Ops, dl);
-}
-
SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -14283,6 +14730,44 @@ static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
}
+static bool shouldSinkVectorOfPtrs(Value *Ptrs, SmallVectorImpl<Use *> &Ops) {
+ // Restrict ourselves to the form CodeGenPrepare typically constructs.
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptrs);
+ if (!GEP || GEP->getNumOperands() != 2)
+ return false;
+
+ Value *Base = GEP->getOperand(0);
+ Value *Offsets = GEP->getOperand(1);
+
+ // We only care about scalar_base+vector_offsets.
+ if (Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
+ return false;
+
+ // Sink extends that would allow us to use 32-bit offset vectors.
+ if (isa<SExtInst>(Offsets) || isa<ZExtInst>(Offsets)) {
+ auto *OffsetsInst = cast<Instruction>(Offsets);
+ if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
+ OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
+ Ops.push_back(&GEP->getOperandUse(1));
+ }
+
+ // Sink the GEP.
+ return true;
+}
+
+/// We want to sink following cases:
+/// (add|sub|gep) A, ((mul|shl) vscale, imm); (add|sub|gep) A, vscale
+static bool shouldSinkVScale(Value *Op, SmallVectorImpl<Use *> &Ops) {
+ if (match(Op, m_VScale()))
+ return true;
+ if (match(Op, m_Shl(m_VScale(), m_ConstantInt())) ||
+ match(Op, m_Mul(m_VScale(), m_ConstantInt()))) {
+ Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0));
+ return true;
+ }
+ return false;
+}
+
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
@@ -14315,6 +14800,16 @@ bool AArch64TargetLowering::shouldSinkOperands(
if (isSplatShuffle(II->getOperand(1)))
Ops.push_back(&II->getOperandUse(1));
return !Ops.empty();
+ case Intrinsic::aarch64_neon_fmlal:
+ case Intrinsic::aarch64_neon_fmlal2:
+ case Intrinsic::aarch64_neon_fmlsl:
+ case Intrinsic::aarch64_neon_fmlsl2:
+ // Sink splats for index lane variants
+ if (isSplatShuffle(II->getOperand(1)))
+ Ops.push_back(&II->getOperandUse(1));
+ if (isSplatShuffle(II->getOperand(2)))
+ Ops.push_back(&II->getOperandUse(2));
+ return !Ops.empty();
case Intrinsic::aarch64_sve_ptest_first:
case Intrinsic::aarch64_sve_ptest_last:
if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
@@ -14374,11 +14869,37 @@ bool AArch64TargetLowering::shouldSinkOperands(
Ops.push_back(&II->getArgOperandUse(0));
Ops.push_back(&II->getArgOperandUse(1));
return true;
+ case Intrinsic::masked_gather:
+ if (!shouldSinkVectorOfPtrs(II->getArgOperand(0), Ops))
+ return false;
+ Ops.push_back(&II->getArgOperandUse(0));
+ return true;
+ case Intrinsic::masked_scatter:
+ if (!shouldSinkVectorOfPtrs(II->getArgOperand(1), Ops))
+ return false;
+ Ops.push_back(&II->getArgOperandUse(1));
+ return true;
default:
return false;
}
}
+ // Sink vscales closer to uses for better isel
+ switch (I->getOpcode()) {
+ case Instruction::GetElementPtr:
+ case Instruction::Add:
+ case Instruction::Sub:
+ for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {
+ if (shouldSinkVScale(I->getOperand(Op), Ops)) {
+ Ops.push_back(&I->getOperandUse(Op));
+ return true;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
if (!I->getType()->isVectorTy())
return false;
@@ -14684,7 +15205,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
Instruction *I, Loop *L, const TargetTransformInfo &TTI) const {
// shuffle_vector instructions are serialized when targeting SVE,
// see LowerSPLAT_VECTOR. This peephole is not beneficial.
- if (Subtarget->useSVEForFixedLengthVectors())
+ if (!EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
return false;
// Try to optimize conversions using tbl. This requires materializing constant
@@ -14966,15 +15487,6 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// The base address of the load.
Value *BaseAddr = LI->getPointerOperand();
- if (NumLoads > 1) {
- // We will compute the pointer operand of each load from the original base
- // address using GEPs. Cast the base address to a pointer to the scalar
- // element type.
- BaseAddr = Builder.CreateBitCast(
- BaseAddr,
- LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
- }
-
Type *PtrTy = LI->getPointerOperandType();
Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
LDVTy->getElementCount());
@@ -15012,11 +15524,9 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
CallInst *LdN;
if (UseScalable)
- LdN = Builder.CreateCall(
- LdNFunc, {PTrue, Builder.CreateBitCast(BaseAddr, PtrTy)}, "ldN");
+ LdN = Builder.CreateCall(LdNFunc, {PTrue, BaseAddr}, "ldN");
else
- LdN = Builder.CreateCall(LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy),
- "ldN");
+ LdN = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
// Extract and store the sub-vectors returned by the load intrinsic.
for (unsigned i = 0; i < Shuffles.size(); i++) {
@@ -15054,6 +15564,33 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
return true;
}
+template <typename Iter>
+bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) {
+ int MaxLookupDist = 20;
+ unsigned IdxWidth = DL.getIndexSizeInBits(0);
+ APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
+ const Value *PtrA1 =
+ Ptr->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
+
+ while (++It != End) {
+ if (It->isDebugOrPseudoInst())
+ continue;
+ if (MaxLookupDist-- == 0)
+ break;
+ if (const auto *SI = dyn_cast<StoreInst>(&*It)) {
+ const Value *PtrB1 =
+ SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
+ DL, OffsetB);
+ if (PtrA1 == PtrB1 &&
+ (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.sextOrTrunc(IdxWidth))
+ .abs() == 16)
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// Lower an interleaved store into a stN intrinsic.
///
/// E.g. Lower an interleaved store (Factor = 3):
@@ -15136,15 +15673,6 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// The base address of the store.
Value *BaseAddr = SI->getPointerOperand();
- if (NumStores > 1) {
- // We will compute the pointer operand of each store from the original base
- // address using GEPs. Cast the base address to a pointer to the scalar
- // element type.
- BaseAddr = Builder.CreateBitCast(
- BaseAddr,
- SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
- }
-
auto Mask = SVI->getShuffleMask();
// Sanity check if all the indices are NOT in range.
@@ -15154,8 +15682,15 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
return false;
}
// A 64bit st2 which does not start at element 0 will involved adding extra
- // ext elements, making the st2 unprofitable.
- if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 && Mask[0] != 0)
+ // ext elements making the st2 unprofitable, and if there is a nearby store
+ // that points to BaseAddr+16 or BaseAddr-16 then it can be better left as a
+ // zip;ldp pair which has higher throughput.
+ if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
+ (Mask[0] != 0 ||
+ hasNearbyPairedStore(SI->getIterator(), SI->getParent()->end(), BaseAddr,
+ DL) ||
+ hasNearbyPairedStore(SI->getReverseIterator(), SI->getParent()->rend(),
+ BaseAddr, DL)))
return false;
Type *PtrTy = SI->getPointerOperandType();
@@ -15227,7 +15762,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
BaseAddr, LaneLen * Factor);
- Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
+ Ops.push_back(BaseAddr);
Builder.CreateCall(StNFunc, Ops);
}
return true;
@@ -15542,25 +16077,22 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
NumBytes = 0;
}
- if (!AM.Scale) {
- int64_t Offset = AM.BaseOffs;
-
- // 9-bit signed offset
- if (isInt<9>(Offset))
- return true;
+ return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.BaseOffs,
+ AM.Scale);
+}
- // 12-bit unsigned offset
- unsigned shift = Log2_64(NumBytes);
- if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
- // Must be a multiple of NumBytes (NumBytes is a power of 2)
- (Offset >> shift) << shift == Offset)
- return true;
- return false;
+// Check whether the 2 offsets belong to the same imm24 range, and their high
+// 12bits are same, then their high part can be decoded with the offset of add.
+int64_t
+AArch64TargetLowering::getPreferredLargeGEPBaseOffset(int64_t MinOffset,
+ int64_t MaxOffset) const {
+ int64_t HighPart = MinOffset & ~0xfffULL;
+ if (MinOffset >> 12 == MaxOffset >> 12 && isLegalAddImmediate(HighPart)) {
+ // Rebase the value to an integer multiple of imm12.
+ return HighPart;
}
- // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
-
- return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
+ return 0;
}
bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
@@ -15600,8 +16132,8 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
}
bool AArch64TargetLowering::generateFMAsInMachineCombiner(
- EVT VT, CodeGenOpt::Level OptLevel) const {
- return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector() &&
+ EVT VT, CodeGenOptLevel OptLevel) const {
+ return (OptLevel >= CodeGenOptLevel::Aggressive) && !VT.isScalableVector() &&
!useSVEForFixedLengthVectorVT(VT);
}
@@ -15862,7 +16394,7 @@ static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
return performVecReduceAddCombineWithUADDLP(N, DAG);
SDValue Op0 = N->getOperand(0);
- if (N->getValueType(0) != MVT::i32 ||
+ if (N->getValueType(0) != MVT::i32 || Op0.getValueType().isScalableVT() ||
Op0.getValueType().getVectorElementType() != MVT::i32)
return SDValue();
@@ -16044,33 +16576,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
- SDLoc DL(N);
- SDValue N0 = N->getOperand(0);
- unsigned Lg2 = Divisor.countr_zero();
- SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
-
- // Add (N0 < 0) ? Pow2 - 1 : 0;
- SDValue CCVal;
- SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
- SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
-
- Created.push_back(Cmp.getNode());
- Created.push_back(Add.getNode());
- Created.push_back(CSel.getNode());
-
- // Divide by pow2.
- SDValue SRA =
- DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
-
- // If we're dividing by a positive value, we're done. Otherwise, we must
- // negate the result.
- if (Divisor.isNonNegative())
- return SRA;
-
- Created.push_back(SRA.getNode());
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
}
SDValue
@@ -16370,7 +16876,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Multiplication of a power of two plus/minus one can be done more
- // cheaply as as shift+add/sub. For now, this is true unilaterally. If
+ // cheaply as shift+add/sub. For now, this is true unilaterally. If
// future CPUs have a cheaper MADD instruction, this may need to be
// gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
// 64-bit is 5 cycles, so this is always a win.
@@ -16385,8 +16891,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (TrailingZeroes) {
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into smul or umul.
- if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
- isZeroExtended(N0.getNode(), DAG)))
+ if (N0->hasOneUse() && (isSignExtended(N0, DAG) ||
+ isZeroExtended(N0, DAG)))
return SDValue();
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into madd or msub.
@@ -16451,7 +16957,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
} else if (SCVPlus1.isPowerOf2()) {
ShiftAmt = SCVPlus1.logBase2() + TrailingZeroes;
return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
- } else if (Subtarget->hasLSLFast() &&
+ } else if (Subtarget->hasALULSLFast() &&
isPowPlusPlusConst(ConstValue, CVM, CVN)) {
APInt CVMMinus1 = CVM - 1;
APInt CVNMinus1 = CVN - 1;
@@ -16550,7 +17056,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
// conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
// This eliminates an "integer-to-vector-move" UOP and improves throughput.
SDValue N0 = N->getOperand(0);
- if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ if (Subtarget->isNeonAvailable() && ISD::isNormalLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -16710,70 +17217,6 @@ static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(C, DL, MVT::i32));
}
-/// An EXTR instruction is made up of two shifts, ORed together. This helper
-/// searches for and classifies those shifts.
-static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
- bool &FromHi) {
- if (N.getOpcode() == ISD::SHL)
- FromHi = false;
- else if (N.getOpcode() == ISD::SRL)
- FromHi = true;
- else
- return false;
-
- if (!isa<ConstantSDNode>(N.getOperand(1)))
- return false;
-
- ShiftAmount = N->getConstantOperandVal(1);
- Src = N->getOperand(0);
- return true;
-}
-
-/// EXTR instruction extracts a contiguous chunk of bits from two existing
-/// registers viewed as a high/low pair. This function looks for the pattern:
-/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
-/// with an EXTR. Can't quite be done in TableGen because the two immediates
-/// aren't independent.
-static SDValue tryCombineToEXTR(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- assert(N->getOpcode() == ISD::OR && "Unexpected root");
-
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- SDValue LHS;
- uint32_t ShiftLHS = 0;
- bool LHSFromHi = false;
- if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
- return SDValue();
-
- SDValue RHS;
- uint32_t ShiftRHS = 0;
- bool RHSFromHi = false;
- if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
- return SDValue();
-
- // If they're both trying to come from the high part of the register, they're
- // not really an EXTR.
- if (LHSFromHi == RHSFromHi)
- return SDValue();
-
- if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
- return SDValue();
-
- if (LHSFromHi) {
- std::swap(LHS, RHS);
- std::swap(ShiftLHS, ShiftRHS);
- }
-
- return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
- DAG.getConstant(ShiftRHS, DL, MVT::i64));
-}
-
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64TargetLowering &TLI) {
EVT VT = N->getValueType(0);
@@ -16957,10 +17400,6 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
- // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
- if (SDValue Res = tryCombineToEXTR(N, DCI))
- return Res;
-
if (SDValue Res = tryCombineToBSL(N, DCI, TLI))
return Res;
@@ -17164,6 +17603,46 @@ static SDValue performSVEAndCombine(SDNode *N,
return SDValue();
}
+// Transform and(fcmp(a, b), fcmp(c, d)) into fccmp(fcmp(a, b), c, d)
+static SDValue performANDSETCCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ // This function performs an optimization on a specific pattern involving
+ // an AND operation and SETCC (Set Condition Code) node.
+
+ SDValue SetCC = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Checks if the current node (N) is used by any SELECT instruction and
+ // returns an empty SDValue to avoid applying the optimization to prevent
+ // incorrect results
+ for (auto U : N->uses())
+ if (U->getOpcode() == ISD::SELECT)
+ return SDValue();
+
+ // Check if the operand is a SETCC node with floating-point comparison
+ if (SetCC.getOpcode() == ISD::SETCC &&
+ SetCC.getOperand(0).getValueType() == MVT::f32) {
+
+ SDValue Cmp;
+ AArch64CC::CondCode CC;
+
+ // Check if the DAG is after legalization and if we can emit the conjunction
+ if (!DCI.isBeforeLegalize() &&
+ (Cmp = emitConjunction(DAG, SDValue(N, 0), CC))) {
+
+ AArch64CC::CondCode InvertedCC = AArch64CC::getInvertedCondCode(CC);
+
+ SDLoc DL(N);
+ return DAG.getNode(AArch64ISD::CSINC, DL, VT, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, VT),
+ DAG.getConstant(InvertedCC, DL, MVT::i32), Cmp);
+ }
+ }
+ return SDValue();
+}
+
static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
@@ -17174,6 +17653,9 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
+ if (SDValue R = performANDSETCCCombine(N,DCI))
+ return R;
+
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
@@ -18968,9 +19450,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
case Intrinsic::aarch64_neon_sshl:
case Intrinsic::aarch64_neon_ushl:
// For positive shift amounts we can use SHL, as ushl/sshl perform a regular
- // left shift for positive shift amounts. Below, we only replace the current
- // node with VSHL, if this condition is met.
- Opcode = AArch64ISD::VSHL;
+ // left shift for positive shift amounts. For negative shifts we can use a
+ // VASHR/VLSHR as appropiate.
+ if (ShiftAmount < 0) {
+ Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR
+ : AArch64ISD::VLSHR;
+ ShiftAmount = -ShiftAmount;
+ } else
+ Opcode = AArch64ISD::VSHL;
IsRightShift = false;
break;
}
@@ -19356,17 +19843,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_sshl:
case Intrinsic::aarch64_neon_ushl:
return tryCombineShiftImm(IID, N, DAG);
- case Intrinsic::aarch64_neon_rshrn: {
- EVT VT = N->getOperand(1).getValueType();
- SDLoc DL(N);
- SDValue Imm =
- DAG.getConstant(1LLU << (N->getConstantOperandVal(2) - 1), DL, VT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(1), Imm);
- SDValue Sht =
- DAG.getNode(ISD::SRL, DL, VT, Add,
- DAG.getConstant(N->getConstantOperandVal(2), DL, VT));
- return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Sht);
- }
case Intrinsic::aarch64_neon_sabd:
return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
@@ -20138,7 +20614,61 @@ static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
+// Try to simplify:
+// t1 = nxv8i16 add(X, 1 << (ShiftValue - 1))
+// t2 = nxv8i16 srl(t1, ShiftValue)
+// to
+// t1 = nxv8i16 rshrnb(X, shiftvalue).
+// rshrnb will zero the top half bits of each element. Therefore, this combine
+// should only be performed when a following instruction with the rshrnb
+// as an operand does not care about the top half of each element. For example,
+// a uzp1 or a truncating store.
+static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ EVT VT = Srl->getValueType(0);
+
+ if (!VT.isScalableVector() || !Subtarget->hasSVE2() ||
+ Srl->getOpcode() != ISD::SRL)
+ return SDValue();
+
+ EVT ResVT;
+ if (VT == MVT::nxv8i16)
+ ResVT = MVT::nxv16i8;
+ else if (VT == MVT::nxv4i32)
+ ResVT = MVT::nxv8i16;
+ else if (VT == MVT::nxv2i64)
+ ResVT = MVT::nxv4i32;
+ else
+ return SDValue();
+
+ auto SrlOp1 =
+ dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Srl->getOperand(1)));
+ if (!SrlOp1)
+ return SDValue();
+ unsigned ShiftValue = SrlOp1->getZExtValue();
+ if (ShiftValue < 1 || ShiftValue > ResVT.getScalarSizeInBits())
+ return SDValue();
+
+ SDValue Add = Srl->getOperand(0);
+ if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse())
+ return SDValue();
+ auto AddOp1 =
+ dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Add->getOperand(1)));
+ if (!AddOp1)
+ return SDValue();
+ uint64_t AddValue = AddOp1->getZExtValue();
+ if (AddValue != 1ULL << (ShiftValue - 1))
+ return SDValue();
+
+ SDLoc DL(Srl);
+ SDValue Rshrnb = DAG.getNode(
+ AArch64ISD::RSHRNB_I, DL, ResVT,
+ {Add->getOperand(0), DAG.getTargetConstant(ShiftValue, DL, MVT::i32)});
+ return DAG.getNode(ISD::BITCAST, DL, VT, Rshrnb);
+}
+
+static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -20171,6 +20701,12 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
}
}
+ if (SDValue Rshrnb = trySimplifySrlAddToRshrnb(Op0, DAG, Subtarget))
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Rshrnb, Op1);
+
+ if (SDValue Rshrnb = trySimplifySrlAddToRshrnb(Op1, DAG, Subtarget))
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Rshrnb);
+
// uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
@@ -20555,7 +21091,7 @@ static SDValue performLOADCombine(SDNode *N,
for (unsigned I = 0; I < Num256Loads; I++) {
unsigned PtrOffset = I * 32;
SDValue NewPtr = DAG.getMemBasePlusOffset(
- BasePtr, TypeSize::Fixed(PtrOffset), DL, Flags);
+ BasePtr, TypeSize::getFixed(PtrOffset), DL, Flags);
Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
SDValue NewLoad = DAG.getLoad(
NewVT, DL, Chain, NewPtr, LD->getPointerInfo().getWithOffset(PtrOffset),
@@ -20573,8 +21109,8 @@ static SDValue performLOADCombine(SDNode *N,
MVT RemainingVT = MVT::getVectorVT(
MemVT.getVectorElementType().getSimpleVT(),
BitsRemaining / MemVT.getVectorElementType().getSizeInBits());
- SDValue NewPtr =
- DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(PtrOffset), DL, Flags);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(
+ BasePtr, TypeSize::getFixed(PtrOffset), DL, Flags);
Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
SDValue RemainingLoad =
DAG.getLoad(RemainingVT, DL, Chain, NewPtr,
@@ -20669,8 +21205,9 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
SmallVector<SDValue, 16> MaskConstants;
if (VecVT == MVT::v16i8) {
- // v16i8 is a special case, as we need to split it into two halves and
- // combine, perform the mask+addition twice, and then combine them.
+ // v16i8 is a special case, as we have 16 entries but only 8 positional bits
+ // per entry. We split it into two halves, apply the mask, zip the halves to
+ // create 8x 16-bit values, and the perform the vector reduce.
for (unsigned Half = 0; Half < 2; ++Half) {
for (unsigned MaskBit = 1; MaskBit <= 128; MaskBit *= 2) {
MaskConstants.push_back(DAG.getConstant(MaskBit, DL, MVT::i32));
@@ -20680,25 +21217,13 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
SDValue RepresentativeBits =
DAG.getNode(ISD::AND, DL, VecVT, ComparisonResult, Mask);
- EVT HalfVT = VecVT.getHalfNumVectorElementsVT(*DAG.getContext());
- unsigned NumElementsInHalf = HalfVT.getVectorNumElements();
-
- SDValue LowHalf =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, RepresentativeBits,
- DAG.getConstant(0, DL, MVT::i64));
- SDValue HighHalf =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, RepresentativeBits,
- DAG.getConstant(NumElementsInHalf, DL, MVT::i64));
-
- SDValue ReducedLowBits =
- DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i16, LowHalf);
- SDValue ReducedHighBits =
- DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i16, HighHalf);
-
- SDValue ShiftedHighBits =
- DAG.getNode(ISD::SHL, DL, MVT::i16, ReducedHighBits,
- DAG.getConstant(NumElementsInHalf, DL, MVT::i32));
- return DAG.getNode(ISD::OR, DL, MVT::i16, ShiftedHighBits, ReducedLowBits);
+ SDValue UpperRepresentativeBits =
+ DAG.getNode(AArch64ISD::EXT, DL, VecVT, RepresentativeBits,
+ RepresentativeBits, DAG.getConstant(8, DL, MVT::i32));
+ SDValue Zipped = DAG.getNode(AArch64ISD::ZIP1, DL, VecVT,
+ RepresentativeBits, UpperRepresentativeBits);
+ Zipped = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, Zipped);
+ return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i16, Zipped);
}
// All other vector sizes.
@@ -20746,6 +21271,12 @@ static SDValue combineBoolVectorAndTruncateStore(SelectionDAG &DAG,
Store->getMemOperand());
}
+bool isHalvingTruncateOfLegalScalableType(EVT SrcVT, EVT DstVT) {
+ return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv8i8) ||
+ (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv4i16) ||
+ (SrcVT == MVT::nxv2i64 && DstVT == MVT::nxv2i32);
+}
+
static SDValue performSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
@@ -20787,6 +21318,17 @@ static SDValue performSTORECombine(SDNode *N,
if (SDValue Store = combineBoolVectorAndTruncateStore(DAG, ST))
return Store;
+ if (ST->isTruncatingStore()) {
+ EVT StoreVT = ST->getMemoryVT();
+ if (!isHalvingTruncateOfLegalScalableType(ValueVT, StoreVT))
+ return SDValue();
+ if (SDValue Rshrnb =
+ trySimplifySrlAddToRshrnb(ST->getOperand(1), DAG, Subtarget)) {
+ return DAG.getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(),
+ StoreVT, ST->getMemOperand());
+ }
+ }
+
return SDValue();
}
@@ -20831,6 +21373,19 @@ static SDValue performMSTORECombine(SDNode *N,
}
}
+ if (MST->isTruncatingStore()) {
+ EVT ValueVT = Value->getValueType(0);
+ EVT MemVT = MST->getMemoryVT();
+ if (!isHalvingTruncateOfLegalScalableType(ValueVT, MemVT))
+ return SDValue();
+ if (SDValue Rshrnb = trySimplifySrlAddToRshrnb(Value, DAG, Subtarget)) {
+ return DAG.getMaskedStore(MST->getChain(), DL, Rshrnb, MST->getBasePtr(),
+ MST->getOffset(), MST->getMask(),
+ MST->getMemoryVT(), MST->getMemOperand(),
+ MST->getAddressingMode(), true);
+ }
+ }
+
return SDValue();
}
@@ -22054,13 +22609,14 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
}
}
+ EVT CmpVT = N0.getOperand(0).getValueType();
if (N0.getOpcode() != ISD::SETCC ||
CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
- CCVT.getVectorElementType() != MVT::i1)
+ CCVT.getVectorElementType() != MVT::i1 ||
+ CmpVT.getVectorElementType().isFloatingPoint())
return SDValue();
EVT ResVT = N->getValueType(0);
- EVT CmpVT = N0.getOperand(0).getValueType();
// Only combine when the result type is of the same size as the compared
// operands.
if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
@@ -22103,8 +22659,10 @@ static SDValue performSelectCombine(SDNode *N,
EVT SrcVT = N0.getOperand(0).getValueType();
// Don't try to do this optimization when the setcc itself has i1 operands.
- // There are no legal vectors of i1, so this would be pointless.
- if (SrcVT == MVT::i1)
+ // There are no legal vectors of i1, so this would be pointless. v1f16 is
+ // ruled out to prevent the creation of setcc that need to be scalarized.
+ if (SrcVT == MVT::i1 ||
+ (SrcVT.isFloatingPoint() && SrcVT.getSizeInBits() <= 16))
return SDValue();
int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
@@ -22145,6 +22703,7 @@ static SDValue performSelectCombine(SDNode *N,
static SDValue performDUPCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// If "v2i32 DUP(x)" and "v4i32 DUP(x)" both exist, use an extract from the
// 128bit vector version.
if (VT.is64BitVector() && DCI.isAfterLegalizeDAG()) {
@@ -22152,14 +22711,32 @@ static SDValue performDUPCombine(SDNode *N,
SmallVector<SDValue> Ops(N->ops());
if (SDNode *LN = DCI.DAG.getNodeIfExists(N->getOpcode(),
DCI.DAG.getVTList(LVT), Ops)) {
- SDLoc DL(N);
return DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(LN, 0),
DCI.DAG.getConstant(0, DL, MVT::i64));
}
}
- if (N->getOpcode() == AArch64ISD::DUP)
+ if (N->getOpcode() == AArch64ISD::DUP) {
+ if (DCI.isAfterLegalizeDAG()) {
+ // If scalar dup's operand is extract_vector_elt, try to combine them into
+ // duplane. For example,
+ //
+ // t21: i32 = extract_vector_elt t19, Constant:i64<0>
+ // t18: v4i32 = AArch64ISD::DUP t21
+ // ==>
+ // t22: v4i32 = AArch64ISD::DUPLANE32 t19, Constant:i64<0>
+ SDValue EXTRACT_VEC_ELT = N->getOperand(0);
+ if (EXTRACT_VEC_ELT.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ if (VT == EXTRACT_VEC_ELT.getOperand(0).getValueType()) {
+ unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
+ return DCI.DAG.getNode(Opcode, DL, VT, EXTRACT_VEC_ELT.getOperand(0),
+ EXTRACT_VEC_ELT.getOperand(1));
+ }
+ }
+ }
+
return performPostLD1Combine(N, DCI, false);
+ }
return SDValue();
}
@@ -22300,8 +22877,12 @@ static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// For FPs, ACLE only supports _packed_ single and double precision types.
+ // SST1Q_[INDEX_]PRED is the ST1Q for sve2p1 and should allow all sizes.
if (SrcElVT.isFloatingPoint())
- if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
+ if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64) &&
+ ((Opcode != AArch64ISD::SST1Q_PRED &&
+ Opcode != AArch64ISD::SST1Q_INDEX_PRED) ||
+ ((SrcVT != MVT::nxv8f16) && (SrcVT != MVT::nxv8bf16))))
return SDValue();
// Depending on the addressing mode, this is either a pointer or a vector of
@@ -22318,6 +22899,10 @@ static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
Offset =
getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
Opcode = AArch64ISD::SSTNT1_PRED;
+ } else if (Opcode == AArch64ISD::SST1Q_INDEX_PRED) {
+ Offset =
+ getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
+ Opcode = AArch64ISD::SST1Q_PRED;
}
// In the case of non-temporal gather loads there's only one SVE instruction
@@ -22325,7 +22910,8 @@ static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
// * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
- if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
+ if ((Opcode == AArch64ISD::SSTNT1_PRED || Opcode == AArch64ISD::SST1Q_PRED) &&
+ Offset.getValueType().isVector())
std::swap(Base, Offset);
// SST1_IMM requires that the offset is an immediate that is:
@@ -22408,21 +22994,26 @@ static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);
- // For "scalar + vector of indices", just scale the indices. This only
- // applies to non-temporal gathers because there's no instruction that takes
- // indicies.
+ // For "scalar + vector of indices", scale the indices to obtain unscaled
+ // offsets. This applies to non-temporal and quadword gathers, which do not
+ // have an addressing mode with scaled offset.
if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
RetVT.getScalarSizeInBits());
Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
+ } else if (Opcode == AArch64ISD::GLD1Q_INDEX_MERGE_ZERO) {
+ Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
+ RetVT.getScalarSizeInBits());
+ Opcode = AArch64ISD::GLD1Q_MERGE_ZERO;
}
- // In the case of non-temporal gather loads there's only one SVE instruction
- // per data-size: "scalar + vector", i.e.
- // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
+ // In the case of non-temporal gather loads and quadword gather loads there's
+ // only one addressing mode : "vector + scalar", e.g.
+ // ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
- if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
+ if ((Opcode == AArch64ISD::GLDNT1_MERGE_ZERO ||
+ Opcode == AArch64ISD::GLD1Q_MERGE_ZERO) &&
Offset.getValueType().isVector())
std::swap(Base, Offset);
@@ -22929,19 +23520,20 @@ static SDValue tryCombineMULLWithUZP1(SDNode *N,
// Check ExtractLow's user.
if (HasFoundMULLow) {
SDNode *ExtractLowUser = *ExtractLow.getNode()->use_begin();
- if (ExtractLowUser->getOpcode() != N->getOpcode())
+ if (ExtractLowUser->getOpcode() != N->getOpcode()) {
HasFoundMULLow = false;
-
- if (ExtractLowUser->getOperand(0) == ExtractLow) {
- if (ExtractLowUser->getOperand(1).getOpcode() == ISD::TRUNCATE)
- TruncLow = ExtractLowUser->getOperand(1);
- else
- HasFoundMULLow = false;
} else {
- if (ExtractLowUser->getOperand(0).getOpcode() == ISD::TRUNCATE)
- TruncLow = ExtractLowUser->getOperand(0);
- else
- HasFoundMULLow = false;
+ if (ExtractLowUser->getOperand(0) == ExtractLow) {
+ if (ExtractLowUser->getOperand(1).getOpcode() == ISD::TRUNCATE)
+ TruncLow = ExtractLowUser->getOperand(1);
+ else
+ HasFoundMULLow = false;
+ } else {
+ if (ExtractLowUser->getOperand(0).getOpcode() == ISD::TRUNCATE)
+ TruncLow = ExtractLowUser->getOperand(0);
+ else
+ HasFoundMULLow = false;
+ }
}
}
@@ -22994,6 +23586,55 @@ static SDValue performMULLCombine(SDNode *N,
return SDValue();
}
+static SDValue
+performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ // Let's do below transform.
+ //
+ // t34: v4i32 = AArch64ISD::UADDLV t2
+ // t35: i32 = extract_vector_elt t34, Constant:i64<0>
+ // t7: i64 = zero_extend t35
+ // t20: v1i64 = scalar_to_vector t7
+ // ==>
+ // t34: v4i32 = AArch64ISD::UADDLV t2
+ // t39: v2i32 = extract_subvector t34, Constant:i64<0>
+ // t40: v1i64 = AArch64ISD::NVCAST t39
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v1i64)
+ return SDValue();
+
+ SDValue ZEXT = N->getOperand(0);
+ if (ZEXT.getOpcode() != ISD::ZERO_EXTEND || ZEXT.getValueType() != MVT::i64)
+ return SDValue();
+
+ SDValue EXTRACT_VEC_ELT = ZEXT.getOperand(0);
+ if (EXTRACT_VEC_ELT.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ EXTRACT_VEC_ELT.getValueType() != MVT::i32)
+ return SDValue();
+
+ if (!isNullConstant(EXTRACT_VEC_ELT.getOperand(1)))
+ return SDValue();
+
+ SDValue UADDLV = EXTRACT_VEC_ELT.getOperand(0);
+ if (UADDLV.getOpcode() != AArch64ISD::UADDLV ||
+ UADDLV.getValueType() != MVT::v4i32 ||
+ UADDLV.getOperand(0).getValueType() != MVT::v8i8)
+ return SDValue();
+
+ // Let's generate new sequence with AArch64ISD::NVCAST.
+ SDLoc DL(N);
+ SDValue EXTRACT_SUBVEC =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, UADDLV,
+ DAG.getConstant(0, DL, MVT::i64));
+ SDValue NVCAST =
+ DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, EXTRACT_SUBVEC);
+
+ return NVCAST;
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -23104,7 +23745,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::UUNPKHI:
return performUnpackCombine(N, DAG, Subtarget);
case AArch64ISD::UZP1:
- return performUzpCombine(N, DAG);
+ return performUzpCombine(N, DAG, Subtarget);
case AArch64ISD::SETCC_MERGE_ZERO:
return performSetccMergeZeroCombine(N, DCI);
case AArch64ISD::REINTERPRET_CAST:
@@ -23220,6 +23861,12 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
case Intrinsic::aarch64_sve_ld1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1Q_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ld1q_gather_index:
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLD1Q_INDEX_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_SCALED_MERGE_ZERO);
@@ -23263,6 +23910,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
+ case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1Q_PRED);
+ case Intrinsic::aarch64_sve_st1q_scatter_index:
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1Q_INDEX_PRED);
case Intrinsic::aarch64_sve_st1_scatter:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
case Intrinsic::aarch64_sve_st1_scatter_index:
@@ -23309,6 +23961,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
case ISD::CTLZ:
return performCTLZCombine(N, DAG, Subtarget);
+ case ISD::SCALAR_TO_VECTOR:
+ return performScalarToVectorCombine(N, DCI, DAG);
}
return SDValue();
}
@@ -23358,6 +24012,19 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
+bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
+ Register Offset, bool IsPre,
+ MachineRegisterInfo &MRI) const {
+ auto CstOffset = getIConstantVRegVal(Offset, MRI);
+ if (!CstOffset || CstOffset->isZero())
+ return false;
+
+ // All of the indexed addressing mode instructions take a signed 9 bit
+ // immediate offset. Our CstOffset is a G_PTR_ADD offset so it already
+ // encodes the sign/indexing direction.
+ return isInt<9>(CstOffset->getSExtValue());
+}
+
bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue &Base,
SDValue &Offset,
@@ -23494,7 +24161,6 @@ static void CustomNonLegalBITCASTResults(SDNode *N,
SDValue IdxZero = DAG.getVectorIdxConstant(0, DL);
Results.push_back(
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, CastVal, IdxZero));
- return;
}
void AArch64TargetLowering::ReplaceBITCASTResults(
@@ -23646,10 +24312,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
// Create an even/odd pair of X registers holding integer value V.
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
- SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
- SDValue VHi = DAG.getAnyExtOrTrunc(
- DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
- dl, MVT::i64);
+ auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i64, MVT::i64);
if (DAG.getDataLayout().isBigEndian())
std::swap (VLo, VHi);
SDValue RegClass =
@@ -24005,8 +24668,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
{LoadNode->getChain(), LoadNode->getBasePtr()},
LoadNode->getMemoryVT(), LoadNode->getMemOperand());
- SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
- Result.getValue(0), Result.getValue(1));
+ unsigned FirstRes = DAG.getDataLayout().isBigEndian() ? 1 : 0;
+
+ SDValue Pair =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
+ Result.getValue(FirstRes), Result.getValue(1 - FirstRes));
Results.append({Pair, Result.getValue(2) /* Chain */});
}
return;
@@ -24235,7 +24901,7 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
// stack and close enough to the spill slot, this can lead to a situation
// where the monitor always gets cleared and the atomic operation can never
// succeed. So at -O0 lower this operation to a CAS loop.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
return AtomicExpansionKind::CmpXChg;
// Using CAS for an atomic load has a better chance of succeeding under high
@@ -24244,15 +24910,21 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
: AtomicExpansionKind::LLSC;
}
-// For the real atomic operations, we have ldxr/stxr up to 128 bits,
+// The "default" for integer RMW operations is to expand to an LL/SC loop.
+// However, with the LSE instructions (or outline-atomics mode, which provides
+// library routines in place of the LSE-instructions), we can directly emit many
+// operations instead.
+//
+// Floating-point operations are always emitted to a cmpxchg loop, because they
+// may trigger a trap which aborts an LLSC sequence.
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");
+
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
- unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- if (Size > 128) return AtomicExpansionKind::None;
-
bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
(AI->getOperation() == AtomicRMWInst::Xchg ||
AI->getOperation() == AtomicRMWInst::Or ||
@@ -24287,7 +24959,7 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// where the monitor always gets cleared and the atomic operation can never
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
// we have a single CAS instruction that can replace the loop.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None ||
+ if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None ||
Subtarget->hasLSE())
return AtomicExpansionKind::CmpXChg;
@@ -24305,7 +24977,7 @@ AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
return AtomicExpansionKind::None;
// 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
@@ -24331,7 +25003,6 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
Function *Ldxr = Intrinsic::getDeclaration(M, Int);
- Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
@@ -24380,7 +25051,6 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
- Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
}
@@ -24428,7 +25098,7 @@ static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
return IRB.CreatePointerCast(
IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
Offset),
- IRB.getInt8PtrTy()->getPointerTo(0));
+ IRB.getPtrTy(0));
}
Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
@@ -24451,12 +25121,13 @@ void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
// MSVC CRT has a global variable holding security cookie.
M.getOrInsertGlobal("__security_cookie",
- Type::getInt8PtrTy(M.getContext()));
+ PointerType::getUnqual(M.getContext()));
// MSVC CRT has a function to validate security cookie.
- FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
- Subtarget->getSecurityCheckCookieName(),
- Type::getVoidTy(M.getContext()), Type::getInt8PtrTy(M.getContext()));
+ FunctionCallee SecurityCheckCookie =
+ M.getOrInsertFunction(Subtarget->getSecurityCheckCookieName(),
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
F->setCallingConv(CallingConv::Win64);
F->addParamAttr(0, Attribute::AttrKind::InReg);
@@ -24689,7 +25360,8 @@ bool AArch64TargetLowering::shouldLocalize(
llvm_unreachable("Unexpected remat cost");
};
- switch (MI.getOpcode()) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
case TargetOpcode::G_GLOBAL_VALUE: {
// On Darwin, TLS global vars get selected into function calls, which
// we don't want localized, as they can get moved into the middle of a
@@ -24699,14 +25371,37 @@ bool AArch64TargetLowering::shouldLocalize(
return false;
return true; // Always localize G_GLOBAL_VALUE to avoid high reg pressure.
}
+ case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_CONSTANT: {
- auto *CI = MI.getOperand(1).getCImm();
+ const ConstantInt *CI;
+ unsigned AdditionalCost = 0;
+
+ if (Opc == TargetOpcode::G_CONSTANT)
+ CI = MI.getOperand(1).getCImm();
+ else {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ // We try to estimate cost of 32/64b fpimms, as they'll likely be
+ // materialized as integers.
+ if (Ty.getScalarSizeInBits() != 32 && Ty.getScalarSizeInBits() != 64)
+ break;
+ auto APF = MI.getOperand(1).getFPImm()->getValueAPF();
+ bool OptForSize =
+ MF.getFunction().hasOptSize() || MF.getFunction().hasMinSize();
+ if (isFPImmLegal(APF, EVT::getFloatingPointVT(Ty.getScalarSizeInBits()),
+ OptForSize))
+ return true; // Constant should be cheap.
+ CI =
+ ConstantInt::get(MF.getFunction().getContext(), APF.bitcastToAPInt());
+ // FP materialization also costs an extra move, from gpr to fpr.
+ AdditionalCost = 1;
+ }
APInt Imm = CI->getValue();
InstructionCost Cost = TTI->getIntImmCost(
Imm, CI->getType(), TargetTransformInfo::TCK_CodeSize);
assert(Cost.isValid() && "Expected a valid imm cost");
unsigned RematCost = *Cost.getValue();
+ RematCost += AdditionalCost;
Register Reg = MI.getOperand(0).getReg();
unsigned MaxUses = maxUses(RematCost);
// Don't pass UINT_MAX sentinal value to hasAtMostUserInstrs().
@@ -24926,7 +25621,15 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
- SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG);
+ SDValue Mask = Load->getMask();
+ // If this is an extending load and the mask type is not the same as
+ // load's type then we have to extend the mask type.
+ if (VT.getScalarSizeInBits() > Mask.getValueType().getScalarSizeInBits()) {
+ assert(Load->getExtensionType() != ISD::NON_EXTLOAD &&
+ "Incorrect mask type");
+ Mask = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Mask);
+ }
+ Mask = convertFixedMaskToScalableVector(Mask, DAG);
SDValue PassThru;
bool IsPassThruZeroOrUndef = false;
@@ -25633,6 +26336,80 @@ AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
}
}
+static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
+ ArrayRef<int> ShuffleMask, EVT VT,
+ EVT ContainerVT, SelectionDAG &DAG) {
+ auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+ SDLoc DL(Op);
+ unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
+ unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
+ bool IsSingleOp =
+ ShuffleVectorInst::isSingleSourceMask(ShuffleMask, ShuffleMask.size());
+
+ if (!Subtarget.isNeonAvailable() && !MinSVESize)
+ MinSVESize = 128;
+
+ // Ignore two operands if no SVE2 or all index numbers couldn't
+ // be represented.
+ if (!IsSingleOp && (!Subtarget.hasSVE2() || MinSVESize != MaxSVESize))
+ return SDValue();
+
+ EVT VTOp1 = Op.getOperand(0).getValueType();
+ unsigned BitsPerElt = VTOp1.getVectorElementType().getSizeInBits();
+ unsigned IndexLen = MinSVESize / BitsPerElt;
+ unsigned ElementsPerVectorReg = VTOp1.getVectorNumElements();
+ uint64_t MaxOffset = APInt(BitsPerElt, -1, false).getZExtValue();
+ assert(ElementsPerVectorReg <= IndexLen && ShuffleMask.size() <= IndexLen &&
+ "Incorrectly legalised shuffle operation");
+
+ SmallVector<SDValue, 8> TBLMask;
+ for (int Index : ShuffleMask) {
+ // Handling poison index value.
+ if (Index < 0)
+ Index = 0;
+ // If we refer to the second operand then we have to add elements
+ // number in hardware register minus number of elements in a type.
+ if ((unsigned)Index >= ElementsPerVectorReg)
+ Index += IndexLen - ElementsPerVectorReg;
+ // For 8-bit elements and 1024-bit SVE registers and MaxOffset equals
+ // to 255, this might point to the last element of in the second operand
+ // of the shufflevector, thus we are rejecting this transform.
+ if ((unsigned)Index >= MaxOffset)
+ return SDValue();
+ TBLMask.push_back(DAG.getConstant(Index, DL, MVT::i64));
+ }
+
+ // Choosing an out-of-range index leads to the lane being zeroed vs zero
+ // value where it would perform first lane duplication for out of
+ // index elements. For i8 elements an out-of-range index could be a valid
+ // for 2048-bit vector register size.
+ for (unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i)
+ TBLMask.push_back(DAG.getConstant((int)MaxOffset, DL, MVT::i64));
+
+ EVT MaskEltType = EVT::getIntegerVT(*DAG.getContext(), BitsPerElt);
+ EVT MaskType = EVT::getVectorVT(*DAG.getContext(), MaskEltType, IndexLen);
+ EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskType);
+ SDValue VecMask =
+ DAG.getBuildVector(MaskType, DL, ArrayRef(TBLMask.data(), IndexLen));
+ SDValue SVEMask = convertToScalableVector(DAG, MaskContainerVT, VecMask);
+
+ SDValue Shuffle;
+ if (IsSingleOp)
+ Shuffle =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+ DAG.getConstant(Intrinsic::aarch64_sve_tbl, DL, MVT::i32),
+ Op1, SVEMask);
+ else if (Subtarget.hasSVE2())
+ Shuffle =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+ DAG.getConstant(Intrinsic::aarch64_sve_tbl2, DL, MVT::i32),
+ Op1, Op2, SVEMask);
+ else
+ llvm_unreachable("Cannot lower shuffle without SVE2 TBL");
+ Shuffle = convertFromScalableVector(DAG, VT, Shuffle);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
+}
+
SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -25752,7 +26529,8 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
if (MinSVESize == MaxSVESize && MaxSVESize == VT.getSizeInBits()) {
- if (ShuffleVectorInst::isReverseMask(ShuffleMask) && Op2.isUndef()) {
+ if (ShuffleVectorInst::isReverseMask(ShuffleMask, ShuffleMask.size()) &&
+ Op2.isUndef()) {
Op = DAG.getNode(ISD::VECTOR_REVERSE, DL, ContainerVT, Op1);
return convertFromScalableVector(DAG, VT, Op);
}
@@ -25778,6 +26556,13 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
}
}
+ // Avoid producing TBL instruction if we don't know SVE register minimal size,
+ // unless NEON is not available and we can assume minimal SVE register size is
+ // 128-bits.
+ if (MinSVESize || !Subtarget->isNeonAvailable())
+ return GenerateFixedLengthSVETBL(Op, Op1, Op2, ShuffleMask, VT, ContainerVT,
+ DAG);
+
return SDValue();
}
@@ -25902,11 +26687,6 @@ bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
TargetLowering::isTargetCanonicalConstantNode(Op);
}
-bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
- unsigned Opc, LLT Ty1, LLT Ty2) const {
- return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
-}
-
bool AArch64TargetLowering::isComplexDeinterleavingSupported() const {
return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
Subtarget->hasComplxNum();
@@ -26007,7 +26787,7 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
return B.CreateIntrinsic(IdMap[(int)Rotation], Ty,
- {Accumulator, InputB, InputA});
+ {Accumulator, InputA, InputB});
}
if (OperationType == ComplexDeinterleavingOperation::CAdd) {
@@ -26051,3 +26831,109 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
}
return true;
}
+
+unsigned AArch64TargetLowering::getMinimumJumpTableEntries() const {
+ return Subtarget->getMinimumJumpTableEntries();
+}
+
+MVT AArch64TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ bool NonUnitFixedLengthVector =
+ VT.isFixedLengthVector() && !VT.getVectorElementCount().isScalar();
+ if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+
+ EVT VT1;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ getVectorTypeBreakdownForCallingConv(Context, CC, VT, VT1, NumIntermediates,
+ RegisterVT);
+ return RegisterVT;
+}
+
+unsigned AArch64TargetLowering::getNumRegistersForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
+ bool NonUnitFixedLengthVector =
+ VT.isFixedLengthVector() && !VT.getVectorElementCount().isScalar();
+ if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+
+ EVT VT1;
+ MVT VT2;
+ unsigned NumIntermediates;
+ return getVectorTypeBreakdownForCallingConv(Context, CC, VT, VT1,
+ NumIntermediates, VT2);
+}
+
+unsigned AArch64TargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ int NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+ if (!RegisterVT.isFixedLengthVector() ||
+ RegisterVT.getFixedSizeInBits() <= 128)
+ return NumRegs;
+
+ assert(Subtarget->useSVEForFixedLengthVectors() && "Unexpected mode!");
+ assert(IntermediateVT == RegisterVT && "Unexpected VT mismatch!");
+ assert(RegisterVT.getFixedSizeInBits() % 128 == 0 && "Unexpected size!");
+
+ // A size mismatch here implies either type promotion or widening and would
+ // have resulted in scalarisation if larger vectors had not be available.
+ if (RegisterVT.getSizeInBits() * NumRegs != VT.getSizeInBits()) {
+ EVT EltTy = VT.getVectorElementType();
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, ElementCount::getFixed(1));
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+
+ IntermediateVT = NewVT;
+ NumIntermediates = VT.getVectorNumElements();
+ RegisterVT = getRegisterType(Context, NewVT);
+ return NumIntermediates;
+ }
+
+ // SVE VLS support does not introduce a new ABI so we should use NEON sized
+ // types for vector arguments and returns.
+
+ unsigned NumSubRegs = RegisterVT.getFixedSizeInBits() / 128;
+ NumIntermediates *= NumSubRegs;
+ NumRegs *= NumSubRegs;
+
+ switch (RegisterVT.getVectorElementType().SimpleTy) {
+ default:
+ llvm_unreachable("unexpected element type for vector");
+ case MVT::i8:
+ IntermediateVT = RegisterVT = MVT::v16i8;
+ break;
+ case MVT::i16:
+ IntermediateVT = RegisterVT = MVT::v8i16;
+ break;
+ case MVT::i32:
+ IntermediateVT = RegisterVT = MVT::v4i32;
+ break;
+ case MVT::i64:
+ IntermediateVT = RegisterVT = MVT::v2i64;
+ break;
+ case MVT::f16:
+ IntermediateVT = RegisterVT = MVT::v8f16;
+ break;
+ case MVT::f32:
+ IntermediateVT = RegisterVT = MVT::v4f32;
+ break;
+ case MVT::f64:
+ IntermediateVT = RegisterVT = MVT::v2f64;
+ break;
+ case MVT::bf16:
+ IntermediateVT = RegisterVT = MVT::v8bf16;
+ break;
+ }
+
+ return NumRegs;
+}
+
+bool AArch64TargetLowering::hasInlineStackProbe(
+ const MachineFunction &MF) const {
+ return !Subtarget->isTargetWindows() &&
+ MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index aca45f113e73..6ddbcd41dcb7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -58,13 +58,6 @@ enum NodeType : unsigned {
CALL_BTI, // Function call followed by a BTI instruction.
- // Essentially like a normal COPY that works on GPRs, but cannot be
- // rematerialised by passes like the simple register coalescer. It's
- // required for SME when lowering calls because we cannot allow frame
- // index calculations using addvl to slip in between the smstart/smstop
- // and the bl instruction. The scalable vector length may change across
- // the smstart/smstop boundary.
- OBSCURE_COPY,
SMSTART,
SMSTOP,
RESTORE_ZA,
@@ -90,6 +83,10 @@ enum NodeType : unsigned {
ADC,
SBC, // adc, sbc instructions
+ // To avoid stack clash, allocation is performed by block and each block is
+ // probed.
+ PROBED_ALLOCA,
+
// Predicated instructions where inactive lanes produce undefined results.
ABDS_PRED,
ABDU_PRED,
@@ -165,9 +162,6 @@ enum NodeType : unsigned {
// Floating point comparison
FCMP,
- // Scalar extract
- EXTR,
-
// Scalar-to-vector duplication
DUP,
DUPLANE8,
@@ -218,6 +212,9 @@ enum NodeType : unsigned {
SRSHR_I,
URSHR_I,
+ // Vector narrowing shift by immediate (bottom)
+ RSHRNB_I,
+
// Vector shift by constant and insert
VSLI,
VSRI,
@@ -249,6 +246,9 @@ enum NodeType : unsigned {
SADDV,
UADDV,
+ // Unsigned sum Long across Vector
+ UADDLV,
+
// Add Pairwise of two vectors
ADDP,
// Add Long Pairwise
@@ -339,6 +339,8 @@ enum NodeType : unsigned {
PTEST_ANY,
PTRUE,
+ CTTZ_ELTS,
+
BITREVERSE_MERGE_PASSTHRU,
BSWAP_MERGE_PASSTHRU,
REVH_MERGE_PASSTHRU,
@@ -377,6 +379,8 @@ enum NodeType : unsigned {
GLD1_UXTW_SCALED_MERGE_ZERO,
GLD1_SXTW_SCALED_MERGE_ZERO,
GLD1_IMM_MERGE_ZERO,
+ GLD1Q_MERGE_ZERO,
+ GLD1Q_INDEX_MERGE_ZERO,
// Signed gather loads
GLD1S_MERGE_ZERO,
@@ -421,6 +425,8 @@ enum NodeType : unsigned {
SST1_UXTW_SCALED_PRED,
SST1_SXTW_SCALED_PRED,
SST1_IMM_PRED,
+ SST1Q_PRED,
+ SST1Q_INDEX_PRED,
// Non-temporal scatter store
SSTNT1_PRED,
@@ -444,6 +450,10 @@ enum NodeType : unsigned {
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPE,
+ // SME ZA loads and stores
+ SME_ZA_LDR,
+ SME_ZA_STR,
+
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
@@ -505,8 +515,15 @@ enum Rounding {
const unsigned RoundingBitsPos = 22;
// Registers used to pass function arguments.
-const ArrayRef<MCPhysReg> getGPRArgRegs();
-const ArrayRef<MCPhysReg> getFPRArgRegs();
+ArrayRef<MCPhysReg> getGPRArgRegs();
+ArrayRef<MCPhysReg> getFPRArgRegs();
+
+/// Maximum allowed number of unprobed bytes above SP at an ABI
+/// boundary.
+const unsigned StackProbeMaxUnprobedStack = 1024;
+
+/// Maximum number of iterations to unroll for a constant size probing loop.
+const unsigned StackProbeMaxLoopUnroll = 4;
} // namespace AArch64
@@ -603,6 +620,9 @@ public:
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
+
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const;
@@ -610,6 +630,8 @@ public:
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
MachineInstr &MI, MachineBasicBlock *BB,
bool HasTile) const;
+ MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned Opcode, bool Op0IsDef) const;
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *
@@ -677,6 +699,9 @@ public:
unsigned AS,
Instruction *I = nullptr) const override;
+ int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
+ int64_t MaxOffset) const override;
+
/// Return true if an FMA operation is faster than a pair of fmul and fadd
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
@@ -685,7 +710,7 @@ public:
bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
bool generateFMAsInMachineCombiner(EVT VT,
- CodeGenOpt::Level OptLevel) const override;
+ CodeGenOptLevel OptLevel) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
@@ -694,6 +719,10 @@ public:
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
+ bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
+ return false;
+ }
+
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
@@ -927,6 +956,8 @@ public:
bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
+ bool shouldExpandCttzElements(EVT VT) const override;
+
/// If a change in streaming mode is required on entry to/return from a
/// function call it emits and returns the corresponding SMSTART or SMSTOP node.
/// \p Entry tells whether this is before/after the Call, which is necessary
@@ -942,6 +973,21 @@ public:
// used for 64bit and 128bit vectors as well.
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
+ // Follow NEON ABI rules even when using SVE for fixed length vectors.
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
+ EVT VT) const override;
+ unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+ unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const override;
+
+ /// True if stack clash protection is enabled for this functions.
+ bool hasInlineStackProbe(const MachineFunction &MF) const override;
+
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -963,6 +1009,9 @@ private:
const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
+ void AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const override;
+
SDValue LowerCall(CallLoweringInfo & /*CLI*/,
SmallVectorImpl<SDValue> &InVals) const override;
@@ -1101,12 +1150,11 @@ private:
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
- SDValue &Size,
- SelectionDAG &DAG) const;
+
SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
@@ -1169,13 +1217,14 @@ private:
const char *LowerXConstraint(EVT ConstraintVT) const override;
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+ InlineAsm::ConstraintCode
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "Q")
- return InlineAsm::Constraint_Q;
+ return InlineAsm::ConstraintCode::Q;
// FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
// followed by llvm_unreachable so we'll leave them unimplemented in
// the backend for now.
@@ -1189,7 +1238,7 @@ private:
SelectionDAG &DAG) const override;
bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
- bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
+ bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
@@ -1201,6 +1250,8 @@ private:
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
+ bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
+ bool IsPre, MachineRegisterInfo &MRI) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
@@ -1243,10 +1294,9 @@ private:
SDValue getPStateSM(SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs,
SDLoc DL, EVT VT) const;
- bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
- LLT Ty2) const override;
-
bool preferScalarizeSplat(SDNode *N) const override;
+
+ unsigned getMinimumJumpTableEntries() const override;
};
namespace AArch64 {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 1427886d71c0..0002db52b199 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -162,14 +162,14 @@ def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64>
// A store operation that actually needs release semantics.
class releasing_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$val, node:$ptr)> {
let IsAtomic = 1;
let IsAtomicOrderingReleaseOrStronger = 1;
}
// An atomic store operation that doesn't actually need to be atomic on AArch64.
class relaxed_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$val, node:$ptr)> {
let IsAtomic = 1;
let IsAtomicOrderingReleaseOrStronger = 0;
}
@@ -573,3 +573,34 @@ let Predicates = [HasRCPC3, HasNEON] in {
(i64 (bitconvert (v1f64 VecListOne64:$Vt)))),
(STL1 (SUBREG_TO_REG (i64 0), VecListOne64:$Vt, dsub), (i64 0), GPR64sp:$Rn)>;
}
+
+// v8.4a FEAT_LRCPC2 patterns
+let Predicates = [HasRCPC_IMMO] in {
+ // Load-Acquire RCpc Register unscaled loads
+ def : Pat<(acquiring_load<atomic_load_az_8>
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
+ (LDAPURBi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(acquiring_load<atomic_load_az_16>
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (LDAPURHi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(acquiring_load<atomic_load_32>
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
+ (LDAPURi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(acquiring_load<atomic_load_64>
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (LDAPURXi GPR64sp:$Rn, simm9:$offset)>;
+
+ // Store-Release Register unscaled stores
+ def : Pat<(releasing_store<atomic_store_8>
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+ (STLURBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(releasing_store<atomic_store_16>
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+ (STLURHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(releasing_store<atomic_store_32>
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+ (STLURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(releasing_store<atomic_store_64>
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
+ (STLURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 39135df285c2..68e87f491a09 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -133,6 +133,16 @@ def extract_high_v4i32 :
def extract_high_v2i64 :
ComplexPattern<v1i64, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+def gi_extract_high_v16i8 :
+ GIComplexOperandMatcher<v8s8, "selectExtractHigh">,
+ GIComplexPatternEquiv<extract_high_v16i8>;
+def gi_extract_high_v8i16 :
+ GIComplexOperandMatcher<v4s16, "selectExtractHigh">,
+ GIComplexPatternEquiv<extract_high_v8i16>;
+def gi_extract_high_v4i32 :
+ GIComplexOperandMatcher<v2s32, "selectExtractHigh">,
+ GIComplexPatternEquiv<extract_high_v4i32>;
+
def extract_high_v8f16 :
ComplexPattern<v4f16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v4f32 :
@@ -703,6 +713,28 @@ def fixedpoint_f16_i64 : fixedpoint_i64<f16>;
def fixedpoint_f32_i64 : fixedpoint_i64<f32>;
def fixedpoint_f64_i64 : fixedpoint_i64<f64>;
+class fixedpoint_recip_i32<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperand<32>", [fpimm, ld]> {
+ let EncoderMethod = "getFixedPointScaleOpValue";
+ let DecoderMethod = "DecodeFixedPointScaleImm32";
+}
+
+class fixedpoint_recip_i64<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperand<64>", [fpimm, ld]> {
+ let EncoderMethod = "getFixedPointScaleOpValue";
+ let DecoderMethod = "DecodeFixedPointScaleImm64";
+}
+
+def fixedpoint_recip_f16_i32 : fixedpoint_recip_i32<f16>;
+def fixedpoint_recip_f32_i32 : fixedpoint_recip_i32<f32>;
+def fixedpoint_recip_f64_i32 : fixedpoint_recip_i32<f64>;
+
+def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>;
+def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>;
+def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>;
+
def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
@@ -786,8 +818,11 @@ def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
def Imm0_0Operand : AsmImmRange<0, 0>;
def Imm0_1Operand : AsmImmRange<0, 1>;
+def Imm1_1Operand : AsmImmRange<1, 1>;
def Imm0_3Operand : AsmImmRange<0, 3>;
+def Imm1_3Operand : AsmImmRange<1, 3>;
def Imm0_7Operand : AsmImmRange<0, 7>;
+def Imm1_7Operand : AsmImmRange<1, 7>;
def Imm0_15Operand : AsmImmRange<0, 15>;
def Imm0_31Operand : AsmImmRange<0, 31>;
def Imm0_63Operand : AsmImmRange<0, 63>;
@@ -1003,6 +1038,13 @@ def timm0_1 : Operand<i64>, TImmLeaf<i64, [{
let ParserMatchClass = Imm0_1Operand;
}
+// timm32_0_0 predicate - True if the 32-bit immediate is in the range [0,0]
+def timm32_0_0 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) == 0;
+}]> {
+ let ParserMatchClass = Imm0_0Operand;
+}
+
// timm32_0_1 predicate - True if the 32-bit immediate is in the range [0,1]
def timm32_0_1 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 2;
@@ -1010,6 +1052,20 @@ def timm32_0_1 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_1Operand;
}
+// timm32_1_1 - True if the 32-bit immediate is in the range [1,1]
+def timm32_1_1 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) == 1;
+}]> {
+ let ParserMatchClass = Imm1_1Operand;
+}
+
+// timm32_1_3 predicate - True if the 32-bit immediate is in the range [1,3]
+def timm32_1_3 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) > 0 && ((uint32_t)Imm) < 4;
+}]> {
+ let ParserMatchClass = Imm1_3Operand;
+}
+
// imm0_15 predicate - True if the immediate is in the range [0,15]
def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 16;
@@ -1045,6 +1101,13 @@ def timm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_7Operand;
}
+// timm32_1_7 predicate - True if the 32-bit immediate is in the range [1,7]
+def timm32_1_7 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) > 0 && ((uint32_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = Imm1_7Operand;
+}
+
// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 16;
@@ -1398,6 +1461,8 @@ let OperandNamespace = "AArch64" in {
let OperandType = "OPERAND_IMPLICIT_IMM_0" in {
defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
[{ return ((uint64_t)Imm) == 0; }]>;
+ defm VectorIndex032b : VectorIndex<i32, VectorIndex0Operand,
+ [{ return ((uint32_t)Imm) == 0; }]>;
}
}
defm VectorIndex1 : VectorIndex<i64, VectorIndex1Operand,
@@ -1485,7 +1550,7 @@ def UImm3s8Operand : UImmScaledMemoryIndexed<3, 8>;
def uimm3s8 : Operand<i64>, ImmLeaf<i64,
[{ return Imm >= 0 && Imm <= 56 && ((Imm % 8) == 0); }], UImmS8XForm> {
- let PrintMethod = "printVectorIndex<8>";
+ let PrintMethod = "printMatrixIndex<8>";
let ParserMatchClass = UImm3s8Operand;
}
@@ -2990,7 +3055,7 @@ class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm,
multiclass ExtractImm<string asm> {
def Wrri : BaseExtractImm<GPR32, imm0_31, asm,
[(set GPR32:$Rd,
- (AArch64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
+ (fshr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
let Inst{31} = 0;
let Inst{22} = 0;
// imm<5> must be zero.
@@ -2998,7 +3063,7 @@ multiclass ExtractImm<string asm> {
}
def Xrri : BaseExtractImm<GPR64, imm0_63, asm,
[(set GPR64:$Rd,
- (AArch64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
+ (fshr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
let Inst{31} = 1;
let Inst{22} = 1;
@@ -4984,55 +5049,55 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
}
// Scaled
- def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_f16_i32, asm,
+ def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
[(set (f16 FPR16:$Rd),
- (fdiv (node GPR32:$Rn),
- fixedpoint_f16_i32:$scale))]> {
+ (fmul (node GPR32:$Rn),
+ fixedpoint_recip_f16_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
let scale{5} = 1;
let Predicates = [HasFullFP16];
}
- def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm,
+ def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
[(set FPR32:$Rd,
- (fdiv (node GPR32:$Rn),
- fixedpoint_f32_i32:$scale))]> {
+ (fmul (node GPR32:$Rn),
+ fixedpoint_recip_f32_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
let scale{5} = 1;
}
- def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_f64_i32, asm,
+ def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
[(set FPR64:$Rd,
- (fdiv (node GPR32:$Rn),
- fixedpoint_f64_i32:$scale))]> {
+ (fmul (node GPR32:$Rn),
+ fixedpoint_recip_f64_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
let scale{5} = 1;
}
- def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_f16_i64, asm,
+ def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
[(set (f16 FPR16:$Rd),
- (fdiv (node GPR64:$Rn),
- fixedpoint_f16_i64:$scale))]> {
+ (fmul (node GPR64:$Rn),
+ fixedpoint_recip_f16_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
let Predicates = [HasFullFP16];
}
- def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm,
+ def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
[(set FPR32:$Rd,
- (fdiv (node GPR64:$Rn),
- fixedpoint_f32_i64:$scale))]> {
+ (fmul (node GPR64:$Rn),
+ fixedpoint_recip_f32_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}
- def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm,
+ def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
[(set FPR64:$Rd,
- (fdiv (node GPR64:$Rn),
- fixedpoint_f64_i64:$scale))]> {
+ (fmul (node GPR64:$Rn),
+ fixedpoint_recip_f64_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
}
@@ -5377,6 +5442,44 @@ multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
(node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> {
let Inst{23-22} = 0b01; // 64-bit size flag
}
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(f16 (node (f16 FPR16:$Rn),
+ (f16 (extractelt (v8f16 V128:$Rm), (i64 0))),
+ (f16 FPR16:$Ra))),
+ (!cast<Instruction>(NAME # Hrrr)
+ FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)), FPR16:$Ra)>;
+
+ def : Pat<(f16 (node (f16 (extractelt (v8f16 V128:$Rn), (i64 0))),
+ (f16 FPR16:$Rm),
+ (f16 FPR16:$Ra))),
+ (!cast<Instruction>(NAME # Hrrr)
+ (f16 (EXTRACT_SUBREG V128:$Rn, hsub)), FPR16:$Rm, FPR16:$Ra)>;
+ }
+
+ def : Pat<(f32 (node (f32 FPR32:$Rn),
+ (f32 (extractelt (v4f32 V128:$Rm), (i64 0))),
+ (f32 FPR32:$Ra))),
+ (!cast<Instruction>(NAME # Srrr)
+ FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub), FPR32:$Ra)>;
+
+ def : Pat<(f32 (node (f32 (extractelt (v4f32 V128:$Rn), (i64 0))),
+ (f32 FPR32:$Rm),
+ (f32 FPR32:$Ra))),
+ (!cast<Instruction>(NAME # Srrr)
+ (EXTRACT_SUBREG V128:$Rn, ssub), FPR32:$Rm, FPR32:$Ra)>;
+
+ def : Pat<(f64 (node (f64 FPR64:$Rn),
+ (f64 (extractelt (v2f64 V128:$Rm), (i64 0))),
+ (f64 FPR64:$Ra))),
+ (!cast<Instruction>(NAME # Drrr)
+ FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub), FPR64:$Ra)>;
+
+ def : Pat<(f64 (node (f64 (extractelt (v2f64 V128:$Rn), (i64 0))),
+ (f64 FPR64:$Rm),
+ (f64 FPR64:$Ra))),
+ (!cast<Instruction>(NAME # Drrr)
+ (EXTRACT_SUBREG V128:$Rn, dsub), FPR64:$Rm, FPR64:$Ra)>;
}
//---
@@ -5941,11 +6044,11 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
// ARMv8.2-A Dot Product Instructions (Vector): These instructions extract
// bytes from S-sized elements.
-class BaseSIMDThreeSameVectorDot<bit Q, bit U, bit Mixed, string asm, string kind1,
- string kind2, RegisterOperand RegType,
+class BaseSIMDThreeSameVectorDot<bit Q, bit U, bits<2> sz, bits<4> opc, string asm,
+ string kind1, string kind2, RegisterOperand RegType,
ValueType AccumType, ValueType InputType,
SDPatternOperator OpNode> :
- BaseSIMDThreeSameVectorTied<Q, U, 0b100, {0b1001, Mixed}, RegType, asm, kind1,
+ BaseSIMDThreeSameVectorTied<Q, U, {sz, 0b0}, {0b1, opc}, RegType, asm, kind1,
[(set (AccumType RegType:$dst),
(OpNode (AccumType RegType:$Rd),
(InputType RegType:$Rn),
@@ -5954,9 +6057,9 @@ class BaseSIMDThreeSameVectorDot<bit Q, bit U, bit Mixed, string asm, string kin
}
multiclass SIMDThreeSameVectorDot<bit U, bit Mixed, string asm, SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorDot<0, U, Mixed, asm, ".2s", ".8b", V64,
+ def v8i8 : BaseSIMDThreeSameVectorDot<0, U, 0b10, {0b001, Mixed}, asm, ".2s", ".8b", V64,
v2i32, v8i8, OpNode>;
- def v16i8 : BaseSIMDThreeSameVectorDot<1, U, Mixed, asm, ".4s", ".16b", V128,
+ def v16i8 : BaseSIMDThreeSameVectorDot<1, U, 0b10, {0b001, Mixed}, asm, ".4s", ".16b", V128,
v4i32, v16i8, OpNode>;
}
@@ -5985,6 +6088,73 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
v4f32, v8f16, OpNode>;
}
+multiclass SIMDThreeSameVectorMLA<bit Q, string asm>{
+ def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ V128, v8f16, v16i8, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm>{
+ def v4f32 : BaseSIMDThreeSameVectorDot<Q, 0b0, sz, 0b1000, asm, ".4s", ".16b",
+ V128, v4f32, v16i8, null_frag>;
+}
+
+// FP8 assembly/disassembly classes
+
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD three-register extension
+//----------------------------------------------------------------------------
+class BaseSIMDThreeVectors<bit Q, bit U, bits<2> size, bits<4> op,
+ RegisterOperand regtype1,
+ RegisterOperand regtype2, string asm,
+ string kind1, string kind2>
+ : I<(outs regtype1:$Rd), (ins regtype2:$Rn, regtype2:$Rm), asm,
+ "\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2, "", []>, Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0b1;
+ let Inst{14-11} = op;
+ let Inst{10} = 0b1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+
+// FCVTN (FP16 to FP8)
+multiclass SIMDThreeSameSizeVectorCvt<string asm> {
+ def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b01, 0b1110, V64, V64, asm, ".8b",".4h">;
+ def v16f8 : BaseSIMDThreeVectors<0b1, 0b0, 0b01, 0b1110, V128, V128, asm, ".16b", ".8h">;
+}
+
+// TODO : Create v16f8 value type
+// FCVTN, FCVTN2 (FP32 to FP8)
+multiclass SIMDThreeVectorCvt<string asm> {
+ def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b00, 0b1110, V64, V128, asm, ".8b", ".4s">;
+ def 2v16f8 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b00, 0b1110, asm#2, ".16b", ".4s",
+ V128, v16i8, v4f32, null_frag>;
+}
+
+// TODO: Create a new Value Type v8f8 and v16f8
+multiclass SIMDThreeSameVectorDOT2<string asm> {
+ def v4f16 : BaseSIMDThreeSameVectorDot<0b0, 0b0, 0b01, 0b1111, asm, ".4h", ".8b",
+ V64, v4f16, v8i8, null_frag>;
+ def v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1111, asm, ".8h", ".16b",
+ V128, v8f16, v16i8, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorDOT4<string asm> {
+ def v2f32 : BaseSIMDThreeSameVectorDot<0b0, 0b0, 0b00, 0b1111, asm, ".2s", ".8b",
+ V64, v2f32, v8i8, null_frag>;
+ def v4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b00, 0b1111, asm, ".4s", ".16b",
+ V128, v4f32, v16i8, null_frag>;
+}
//----------------------------------------------------------------------------
// AdvSIMD two register vector instructions.
@@ -6409,6 +6579,16 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
}
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD two-register miscellaneous
+//----------------------------------------------------------------------------
+multiclass SIMDMixedTwoVectorFP8<bits<2>sz, string asm> {
+ def v8f16 : BaseSIMDMixedTwoVector<0b0, 0b1, sz, 0b10111, V64, V128,
+ asm, ".8h", ".8b", []>;
+ def 2v8f16 : BaseSIMDMixedTwoVector<0b1, 0b1, sz, 0b10111, V128, V128,
+ asm#2, ".8h", ".16b", []>;
+}
+
class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
bits<5> opcode, RegisterOperand regtype, string asm,
string kind, string zero, ValueType dty,
@@ -7972,6 +8152,54 @@ multiclass SIMDTableLookupTied<bit op, string asm> {
V128, VecListFour128>;
}
+//----------------------------------------------------------------------------
+// AdvSIMD LUT
+//----------------------------------------------------------------------------
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTableLookupIndexed<bit Q, bits<5> opc, RegisterOperand vectype,
+ RegisterOperand listtype, Operand idx_type,
+ string asm, string kind>
+ : I<(outs vectype:$Rd),
+ (ins listtype:$Rn, vectype:$Rm, idx_type:$idx),
+ asm, "\t$Rd" # kind # ", $Rn, $Rm$idx", "", []>,
+ Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-24} = 0b001110;
+ let Inst{23-22} = opc{4-3};
+ let Inst{21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{14-12} = opc{2-0};
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass BaseSIMDTableLookupIndexed2<string asm> {
+ def v16f8 : BaseSIMDTableLookupIndexed<0b1, {0b10,?,?,0b1}, V128, VecListOne16b, VectorIndexS, asm, ".16b"> {
+ bits<2> idx;
+ let Inst{14-13} = idx;
+ }
+ def v8f16 : BaseSIMDTableLookupIndexed<0b1, {0b11,?,?,?}, V128, VecListOne8h, VectorIndexH, asm, ".8h" > {
+ bits<3> idx;
+ let Inst{14-12} = idx;
+ }
+}
+
+multiclass BaseSIMDTableLookupIndexed4<string asm> {
+ def v16f8 : BaseSIMDTableLookupIndexed<0b1, {0b01,?,0b10}, V128, VecListOne16b, VectorIndexD, asm, ".16b"> {
+ bit idx;
+ let Inst{14} = idx;
+ }
+ def v8f16 : BaseSIMDTableLookupIndexed<0b1, {0b01,?,?,0b1}, V128, VecListTwo8h, VectorIndexS, asm, ".8h" > {
+ bits<2> idx;
+ let Inst{14-13} = idx;
+ }
+}
//----------------------------------------------------------------------------
// AdvSIMD scalar DUP
@@ -8399,6 +8627,31 @@ class BF16ToSinglePrecision<string asm>
} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0
//----------------------------------------------------------------------------
+class BaseSIMDThreeSameVectorIndexB<bit Q, bit U, bits<2> sz, bits<4> opc,
+ string asm, string dst_kind,
+ RegisterOperand RegType,
+ RegisterOperand RegType_lo>
+ : BaseSIMDIndexedTied<Q, U, 0b0, sz, opc,
+ RegType, RegType, RegType_lo, VectorIndexB,
+ asm, "", dst_kind, ".16b", ".b", []> {
+
+ // idx = H:L:M
+ bits<4> idx;
+ let Inst{11} = idx{3};
+ let Inst{21-19} = idx{2-0};
+}
+
+multiclass SIMDThreeSameVectorMLAIndex<bit Q, string asm> {
+ def v8f16 : BaseSIMDThreeSameVectorIndexB<Q, 0b0, 0b11, 0b0000, asm, ".8h",
+ V128, V128_0to7>;
+}
+
+multiclass SIMDThreeSameVectorMLALIndex<bit Q, bits<2> sz, string asm> {
+ def v4f32 : BaseSIMDThreeSameVectorIndexB<Q, 0b1, sz, 0b1000, asm, ".4s",
+ V128, V128_0to7>;
+}
+
+//----------------------------------------------------------------------------
// Armv8.6 Matrix Multiply Extension
//----------------------------------------------------------------------------
@@ -8412,12 +8665,12 @@ class SIMDThreeSameVectorMatMul<bit B, bit U, string asm, SDPatternOperator OpNo
//----------------------------------------------------------------------------
// ARMv8.2-A Dot Product Instructions (Indexed)
-class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, bit Mixed, bits<2> size, string asm,
- string dst_kind, string lhs_kind, string rhs_kind,
- RegisterOperand RegType,
- ValueType AccumType, ValueType InputType,
- SDPatternOperator OpNode> :
- BaseSIMDIndexedTied<Q, U, 0b0, size, {0b111, Mixed}, RegType, RegType, V128,
+class BaseSIMDThreeSameVectorIndexS<bit Q, bit U, bits<2> size, bits<4> opc, string asm,
+ string dst_kind, string lhs_kind, string rhs_kind,
+ RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
+ BaseSIMDIndexedTied<Q, U, 0b0, size, opc, RegType, RegType, V128,
VectorIndexS, asm, "", dst_kind, lhs_kind, rhs_kind,
[(set (AccumType RegType:$dst),
(AccumType (OpNode (AccumType RegType:$Rd),
@@ -8432,20 +8685,28 @@ class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, bit Mixed, bits<2> size, str
multiclass SIMDThreeSameVectorDotIndex<bit U, bit Mixed, bits<2> size, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, Mixed, size, asm, ".2s", ".8b", ".4b",
+ def v8i8 : BaseSIMDThreeSameVectorIndexS<0, U, size, {0b111, Mixed}, asm, ".2s", ".8b", ".4b",
V64, v2i32, v8i8, OpNode>;
- def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, Mixed, size, asm, ".4s", ".16b", ".4b",
+ def v16i8 : BaseSIMDThreeSameVectorIndexS<1, U, size, {0b111, Mixed}, asm, ".4s", ".16b", ".4b",
V128, v4i32, v16i8, OpNode>;
}
+// TODO: The vectors v8i8 and v16i8 should be v8f8 and v16f8
+multiclass SIMDThreeSameVectorFP8DOT4Index<string asm> {
+ def v8f8 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b00, 0b0000, asm, ".2s", ".8b", ".4b",
+ V64, v2f32, v8i8, null_frag>;
+ def v16f8 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b00, 0b0000, asm, ".4s", ".16b",".4b",
+ V128, v4f32, v16i8, null_frag>;
+}
+
// ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed)
let mayRaiseFPException = 1, Uses = [FPCR] in
-class BaseSIMDThreeSameVectorFMLIndex<bit Q, bit U, bits<4> opc, string asm,
+class BaseSIMDThreeSameVectorIndexH<bit Q, bit U, bits<2> sz, bits<4> opc, string asm,
string dst_kind, string lhs_kind,
string rhs_kind, RegisterOperand RegType,
- ValueType AccumType, ValueType InputType,
- SDPatternOperator OpNode> :
- BaseSIMDIndexedTied<Q, U, 0, 0b10, opc, RegType, RegType, V128_lo,
+ RegisterOperand RegType_lo, ValueType AccumType,
+ ValueType InputType, SDPatternOperator OpNode> :
+ BaseSIMDIndexedTied<Q, U, 0, sz, opc, RegType, RegType, RegType_lo,
VectorIndexH, asm, "", dst_kind, lhs_kind, rhs_kind,
[(set (AccumType RegType:$dst),
(AccumType (OpNode (AccumType RegType:$Rd),
@@ -8461,10 +8722,20 @@ class BaseSIMDThreeSameVectorFMLIndex<bit Q, bit U, bits<4> opc, string asm,
multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode> {
- def v4f16 : BaseSIMDThreeSameVectorFMLIndex<0, U, opc, asm, ".2s", ".2h", ".h",
- V64, v2f32, v4f16, OpNode>;
- def v8f16 : BaseSIMDThreeSameVectorFMLIndex<1, U, opc, asm, ".4s", ".4h", ".h",
- V128, v4f32, v8f16, OpNode>;
+ def v4f16 : BaseSIMDThreeSameVectorIndexH<0, U, 0b10, opc, asm, ".2s", ".2h", ".h",
+ V64, V128_lo, v2f32, v4f16, OpNode>;
+ def v8f16 : BaseSIMDThreeSameVectorIndexH<1, U, 0b10, opc, asm, ".4s", ".4h", ".h",
+ V128, V128_lo, v4f32, v8f16, OpNode>;
+}
+
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD vector x indexed element
+// TODO: Replace value types v8i8 and v16i8 by v8f8 and v16f8
+multiclass SIMDThreeSameVectorFP8DOT2Index<string asm> {
+ def v4f16 : BaseSIMDThreeSameVectorIndexH<0b0, 0b0, 0b01, 0b0000, asm, ".4h", ".8b", ".2b",
+ V64, V128_lo, v4f16, v8i8, null_frag>;
+ def v8f16 : BaseSIMDThreeSameVectorIndexH<0b1, 0b0, 0b01, 0b0000, asm, ".8h", ".16b", ".2b",
+ V128, V128_lo, v8f16, v8i16, null_frag>;
}
multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
@@ -8579,7 +8850,7 @@ multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
(f16 (vector_extract (v8f16 V128:$Rm), VectorIndexH:$idx)))),
(!cast<Instruction>(NAME # v1i16_indexed)
- (EXTRACT_SUBREG V128:$Rn, hsub), V128:$Rm, VectorIndexH:$idx)>;
+ (f16 (EXTRACT_SUBREG V128:$Rn, hsub)), V128:$Rm, VectorIndexH:$idx)>;
}
let Predicates = [HasNEON] in {
@@ -9135,7 +9406,7 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
(i64 0))))),
(!cast<Instruction>(NAME # v1i32_indexed)
FPR32Op:$Rd,
- (EXTRACT_SUBREG V64:$Rn, hsub),
+ (f16 (EXTRACT_SUBREG V64:$Rn, hsub)),
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
(i64 0))>;
@@ -9148,7 +9419,7 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
(i64 0))))),
(!cast<Instruction>(NAME # v1i32_indexed)
FPR32Op:$Rd,
- (EXTRACT_SUBREG V64:$Rn, hsub),
+ (f16 (EXTRACT_SUBREG V64:$Rn, hsub)),
V128_lo:$Rm,
VectorIndexH:$idx)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index b3d093af1c16..1c88456560d3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -209,14 +209,39 @@ def G_FCMLTZ : AArch64GenericInstruction {
let hasSideEffects = 0;
}
-def G_PREFETCH : AArch64GenericInstruction {
+def G_AARCH64_PREFETCH : AArch64GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins type0:$imm, ptype0:$src1);
let hasSideEffects = 1;
}
-// Generic bitwise insert if true.
-def G_BIT : AArch64GenericInstruction {
+def G_UMULL : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_SMULL : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_UDOT : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+ let hasSideEffects = 0;
+}
+
+def G_SDOT : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+ let hasSideEffects = 0;
+}
+
+// Generic instruction for the BSP pseudo. It is expanded into BSP, which
+// expands into BSL/BIT/BIF after register allocation.
+def G_BSP : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
let hasSideEffects = 0;
@@ -252,11 +277,17 @@ def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
-def : GINodeEquiv<G_BIT, AArch64bit>;
+def : GINodeEquiv<G_BSP, AArch64bsp>;
+
+def : GINodeEquiv<G_UMULL, AArch64umull>;
+def : GINodeEquiv<G_SMULL, AArch64smull>;
+
+def : GINodeEquiv<G_UDOT, AArch64udot>;
+def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
-def : GINodeEquiv<G_PREFETCH, AArch64Prefetch>;
+def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
// These are patterns that we only use for GlobalISel via the importer.
def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
@@ -303,30 +334,43 @@ def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
(STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
+let GIIgnoreCopies = 1 in
+class PatIgnoreCopies<dag pattern, dag result> : Pat<pattern, result>, GISelFlags;
+
multiclass SIMDAcrossLanesSignedIntrinsicBHS<string baseOpc, Intrinsic intOp> {
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ def : PatIgnoreCopies<(i32 (sext (i8 (intOp (v8i8 V64:$Rn))))),
(i32 (SMOVvi8to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
(i64 0)))>;
- def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ def : Pat<(i8 (intOp (v8i8 V64:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn)>;
+
+ def : PatIgnoreCopies<(i32 (sext (i8 (intOp (v16i8 V128:$Rn))))),
(i32 (SMOVvi8to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
(i64 0)))>;
+ def : Pat<(i8 (intOp (v16i8 V128:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn)>;
- def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ def : PatIgnoreCopies<(i32 (sext (i16 (intOp (v4i16 V64:$Rn))))),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
(i64 0)))>;
- def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ def : Pat<(i16 (intOp (v4i16 V64:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn)>;
+
+ def : PatIgnoreCopies<(i32 (sext (i16 (intOp (v8i16 V128:$Rn))))),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
(i64 0)))>;
+ def : Pat<(i16 (intOp (v8i16 V128:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn)>;
- def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
+ def : PatIgnoreCopies<(i32 (intOp (v4i32 V128:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
@@ -335,29 +379,48 @@ multiclass SIMDAcrossLanesSignedIntrinsicBHS<string baseOpc, Intrinsic intOp> {
multiclass SIMDAcrossLanesUnsignedIntrinsicBHS<string baseOpc,
Intrinsic intOp> {
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
- def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- ssub))>;
+ def : PatIgnoreCopies<(i32 (zext (i8 (intOp (v8i8 V64:$Rn))))),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub)),
+ GPR32)>;
+ def : Pat<(i8 (intOp (v8i8 V64:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn)>;
+
+ def : PatIgnoreCopies<(i32 (zext (i8 (intOp (v16i8 V128:$Rn))))),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ ssub)),
+ GPR32)>;
+ def : Pat<(i8 (intOp (v16i8 V128:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn)>;
- def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+
+ def : PatIgnoreCopies<(i32 (zext (i16 (intOp (v4i16 V64:$Rn))))),
+ (COPY_TO_REGCLASS
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- ssub))>;
- def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- ssub))>;
+ ssub)),
+ GPR32)>;
+ def : Pat<(i16 (intOp (v4i16 V64:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn)>;
- def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
+ def : PatIgnoreCopies<(i32 (zext (i16 (intOp (v8i16 V128:$Rn))))),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ ssub)),
+ GPR32)>;
+ def : Pat<(i16 (intOp (v8i16 V128:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn)>;
+
+ def : PatIgnoreCopies<(i32 (intOp (v4i32 V128:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
@@ -373,12 +436,23 @@ def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
(ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
ssub))>;
+def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ (ADDPv2i64p V128:$Rn), dsub),
+ dsub))>;
+
defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"ADDV", int_aarch64_neon_uaddv>;
def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
ssub))>;
+def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ (ADDPv2i64p V128:$Rn), dsub),
+ dsub))>;
defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMAXV", int_aarch64_neon_smaxv>;
def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
@@ -407,3 +481,65 @@ def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(UMINPv2i32 V64:$Rn, V64:$Rn), dsub),
ssub))>;
+
+// Match stores from lane 0 to the appropriate subreg's store.
+multiclass VecStoreLane64_0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
+ ValueType VTy, ValueType STy,
+ SubRegIndex SubRegIdx, Operand IndexType,
+ Instruction STR> {
+ def : Pat<(storeop (STy (vector_extract (VTy VecListOne64:$Vt), (i64 0))),
+ (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
+ (STR (EXTRACT_SUBREG VecListOne64:$Vt, SubRegIdx),
+ GPR64sp:$Rn, IndexType:$offset)>;
+}
+multiclass VecStoreULane64_0Pat<SDPatternOperator StoreOp,
+ ValueType VTy, ValueType STy,
+ SubRegIndex SubRegIdx, Instruction STR> {
+ defm : VecStoreLane64_0Pat<am_unscaled64, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
+}
+
+multiclass VecROStoreLane64_0Pat<ROAddrMode ro, SDPatternOperator storeop,
+ ValueType VecTy, ValueType STy,
+ SubRegIndex SubRegIdx,
+ Instruction STRW, Instruction STRX> {
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne64:$Vt), (i64 0))),
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW (EXTRACT_SUBREG VecListOne64:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne64:$Vt), (i64 0))),
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX (EXTRACT_SUBREG VecListOne64:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+}
+
+let AddedComplexity = 19 in {
+ def : St1Lane128Pat<store, VectorIndexB, v16i8, i8, ST1i8>;
+ def : St1Lane64Pat<store, VectorIndexB, v8i8, i8, ST1i8>;
+
+ defm : VecStoreLane64_0Pat<am_indexed16, store, v4i16, i16, hsub, uimm12s2, STRHui>;
+ defm : VecStoreLane64_0Pat<am_indexed32, store, v2i32, i32, ssub, uimm12s4, STRSui>;
+
+ defm : VecStoreULane64_0Pat<store, v4i16, i16, hsub, STURHi>;
+ defm : VecStoreULane64_0Pat<store, v2i32, i32, ssub, STURSi>;
+ defm : VecROStoreLane64_0Pat<ro16, store, v4i16, i16, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane64_0Pat<ro32, store, v2i32, i32, ssub, STRSroW, STRSroX>;
+}
+
+def : Pat<(v8i8 (AArch64dup (i8 (load (am_indexed8 GPR64sp:$Rn))))),
+ (LD1Rv8b GPR64sp:$Rn)>;
+def : Pat<(v16i8 (AArch64dup (i8 (load GPR64sp:$Rn)))),
+ (LD1Rv16b GPR64sp:$Rn)>;
+def : Pat<(v4i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
+def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv2s GPR64sp:$Rn)>;
+def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv4s GPR64sp:$Rn)>;
+def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv2d GPR64sp:$Rn)>;
+def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv1d GPR64sp:$Rn)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 0691e07a639b..7d71c316bcb0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -11,13 +11,17 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64ExpandImm.h"
+#include "AArch64FrameLowering.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64PointerAuth.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -28,6 +32,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -41,7 +46,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
@@ -69,6 +73,10 @@ static cl::opt<unsigned>
BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
+static cl::opt<unsigned>
+ BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
+ cl::desc("Restrict range of B instructions (DEBUG)"));
+
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
AArch64::CATCHRET),
@@ -79,6 +87,7 @@ AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MachineBasicBlock &MBB = *MI.getParent();
const MachineFunction *MF = MBB.getParent();
+ const Function &F = MF->getFunction();
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
{
@@ -127,10 +136,21 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
NumBytes = 4;
break;
case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
+ // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
+ // instructions are expanded to the specified number of NOPs. Otherwise,
+ // they are expanded to 36-byte XRay sleds.
+ NumBytes =
+ F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
+ break;
case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
+ case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
// An XRay sled can be 4 bytes of alignment plus a 32-byte block.
NumBytes = 36;
break;
+ case TargetOpcode::PATCHABLE_EVENT_CALL:
+ // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
+ NumBytes = 24;
+ break;
case AArch64::SPACE:
NumBytes = MI.getOperand(1).getImm();
@@ -190,7 +210,7 @@ static unsigned getBranchDisplacementBits(unsigned Opc) {
default:
llvm_unreachable("unexpected opcode!");
case AArch64::B:
- return 64;
+ return BDisplacementBits;
case AArch64::TBNZW:
case AArch64::TBZW:
case AArch64::TBNZX:
@@ -235,6 +255,78 @@ AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
}
}
+void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB,
+ const DebugLoc &DL,
+ int64_t BrOffset,
+ RegScavenger *RS) const {
+ assert(RS && "RegScavenger required for long branching");
+ assert(MBB.empty() &&
+ "new block should be inserted for expanding unconditional branch");
+ assert(MBB.pred_size() == 1);
+ assert(RestoreBB.empty() &&
+ "restore block should be inserted for restoring clobbered registers");
+
+ auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
+ // Offsets outside of the signed 33-bit range are not supported for ADRP +
+ // ADD.
+ if (!isInt<33>(BrOffset))
+ report_fatal_error(
+ "Branch offsets outside of the signed 33-bit range not supported");
+
+ BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
+ .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
+ BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
+ .addReg(Reg)
+ .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
+ .addImm(0);
+ BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
+ };
+
+ RS->enterBasicBlockEnd(MBB);
+ // If X16 is unused, we can rely on the linker to insert a range extension
+ // thunk if NewDestBB is out of range of a single B instruction.
+ constexpr Register Reg = AArch64::X16;
+ if (!RS->isRegUsed(Reg)) {
+ insertUnconditionalBranch(MBB, &NewDestBB, DL);
+ RS->setRegUsed(Reg);
+ return;
+ }
+
+ // If there's a free register and it's worth inflating the code size,
+ // manually insert the indirect branch.
+ Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
+ if (Scavenged != AArch64::NoRegister &&
+ MBB.getSectionID() == MBBSectionID::ColdSectionID) {
+ buildIndirectBranch(Scavenged, NewDestBB);
+ RS->setRegUsed(Scavenged);
+ return;
+ }
+
+ // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
+ // with red zones.
+ AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
+ if (!AFI || AFI->hasRedZone().value_or(true))
+ report_fatal_error(
+ "Unable to insert indirect branch inside function that has red zone");
+
+ // Otherwise, spill X16 and defer range extension to the linker.
+ BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(Reg)
+ .addReg(AArch64::SP)
+ .addImm(-16);
+
+ BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
+
+ BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(Reg, RegState::Define)
+ .addReg(AArch64::SP)
+ .addImm(16);
+}
+
// Branch analysis.
bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
@@ -288,10 +380,9 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Return now the only terminator is an unconditional branch.
TBB = LastInst->getOperand(0).getMBB();
return false;
- } else {
- SecondLastInst = &*I;
- SecondLastOpc = SecondLastInst->getOpcode();
}
+ SecondLastInst = &*I;
+ SecondLastOpc = SecondLastInst->getOpcode();
}
}
@@ -314,10 +405,9 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return false;
}
return true; // Can't handle indirect branch.
- } else {
- SecondLastInst = &*I;
- SecondLastOpc = SecondLastInst->getOpcode();
}
+ SecondLastInst = &*I;
+ SecondLastOpc = SecondLastInst->getOpcode();
}
// If there are three terminators, we don't know what sort of block this is.
@@ -790,93 +880,47 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
.addImm(CC);
}
-/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
-static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
- uint64_t Imm = MI.getOperand(1).getImm();
- uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
- uint64_t Encoding;
- return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
+// Return true if Imm can be loaded into a register by a "cheap" sequence of
+// instructions. For now, "cheap" means at most two instructions.
+static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
+ if (BitSize == 32)
+ return true;
+
+ assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
+ uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Is;
+ AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
+
+ return Is.size() <= 2;
}
// FIXME: this implementation should be micro-architecture dependent, so a
// micro-architecture target hook should be introduced here in future.
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
- if (!Subtarget.hasCustomCheapAsMoveHandling())
- return MI.isAsCheapAsAMove();
-
- const unsigned Opcode = MI.getOpcode();
-
- // Firstly, check cases gated by features.
-
- if (Subtarget.hasZeroCycleZeroingFP()) {
- if (Opcode == AArch64::FMOVH0 ||
- Opcode == AArch64::FMOVS0 ||
- Opcode == AArch64::FMOVD0)
- return true;
- }
-
- if (Subtarget.hasZeroCycleZeroingGP()) {
- if (Opcode == TargetOpcode::COPY &&
- (MI.getOperand(1).getReg() == AArch64::WZR ||
- MI.getOperand(1).getReg() == AArch64::XZR))
- return true;
- }
-
- // Secondly, check cases specific to sub-targets.
-
if (Subtarget.hasExynosCheapAsMoveHandling()) {
if (isExynosCheapAsMove(MI))
return true;
-
return MI.isAsCheapAsAMove();
}
- // Finally, check generic cases.
-
- switch (Opcode) {
+ switch (MI.getOpcode()) {
default:
- return false;
-
- // add/sub on register without shift
- case AArch64::ADDWri:
- case AArch64::ADDXri:
- case AArch64::SUBWri:
- case AArch64::SUBXri:
- return (MI.getOperand(3).getImm() == 0);
-
- // logical ops on immediate
- case AArch64::ANDWri:
- case AArch64::ANDXri:
- case AArch64::EORWri:
- case AArch64::EORXri:
- case AArch64::ORRWri:
- case AArch64::ORRXri:
- return true;
+ return MI.isAsCheapAsAMove();
- // logical ops on register without shift
- case AArch64::ANDWrr:
- case AArch64::ANDXrr:
- case AArch64::BICWrr:
- case AArch64::BICXrr:
- case AArch64::EONWrr:
- case AArch64::EONXrr:
- case AArch64::EORWrr:
- case AArch64::EORXrr:
- case AArch64::ORNWrr:
- case AArch64::ORNXrr:
- case AArch64::ORRWrr:
- case AArch64::ORRXrr:
- return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
// If MOVi32imm or MOVi64imm can be expanded into ORRWri or
- // ORRXri, it is as cheap as MOV
+ // ORRXri, it is as cheap as MOV.
+ // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
case AArch64::MOVi32imm:
- return canBeExpandedToORR(MI, 32);
+ return isCheapImmediate(MI, 32);
case AArch64::MOVi64imm:
- return canBeExpandedToORR(MI, 64);
+ return isCheapImmediate(MI, 64);
}
-
- llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
@@ -1053,7 +1097,7 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
const TargetRegisterInfo *TRI = &getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
- unsigned WidthA = 0, WidthB = 0;
+ TypeSize WidthA(0, false), WidthB(0, false);
bool OffsetAIsScalable = false, OffsetBIsScalable = false;
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
@@ -1078,8 +1122,9 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
OffsetAIsScalable == OffsetBIsScalable) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
- int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
- if (LowOffset + LowWidth <= HighOffset)
+ TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowWidth.isScalable() == OffsetAIsScalable &&
+ LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
return true;
}
}
@@ -1091,6 +1136,11 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineFunction &MF) const {
if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
return true;
+
+ // Do not move an instruction that can be recognized as a branch target.
+ if (hasBTISemantics(MI))
+ return true;
+
switch (MI.getOpcode()) {
case AArch64::HINT:
// CSDB hints are scheduling barriers.
@@ -2146,6 +2196,7 @@ unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
+ case AArch64::LDR_PXI:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
@@ -2169,7 +2220,6 @@ unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
- case AArch64::LDR_PXI:
case AArch64::STR_PXI:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
@@ -2228,6 +2278,7 @@ bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
case AArch64::LDRWpre:
case AArch64::LDURXi:
case AArch64::LDRXpre:
+ case AArch64::LDRSWpre:
case AArch64::LDURSWi:
case AArch64::LDURHHi:
case AArch64::LDURBBi:
@@ -2437,6 +2488,21 @@ bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
case AArch64::LDURXi:
case AArch64::LDRXpre:
case AArch64::LDURSWi:
+ case AArch64::LDRSWpre:
+ return true;
+ }
+}
+
+bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ assert((!MI.isCall() || !MI.isReturn()) &&
+ "Unexpected instruction - was a new tail call opcode introduced?");
+ return false;
+ case AArch64::TCRETURNdi:
+ case AArch64::TCRETURNri:
+ case AArch64::TCRETURNriBTI:
+ case AArch64::TCRETURNriALL:
return true;
}
}
@@ -2557,7 +2623,8 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
// Can't merge/pair if the instruction modifies the base register.
// e.g., ldr x0, [x0]
// This case will never occur with an FI base.
- // However, if the instruction is an LDR/STR<S,D,Q,W,X>pre, it can be merged.
+ // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
+ // STR<S,D,Q,W,X>pre, it can be merged.
// For example:
// ldr q0, [x11, #32]!
// ldr q1, [x11, #16]
@@ -2610,9 +2677,16 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
return false;
const MachineOperand *BaseOp;
+ TypeSize WidthN(0, false);
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
- Width, TRI))
+ WidthN, TRI))
return false;
+ // The maximum vscale is 16 under AArch64, return the maximal extent for the
+ // vector.
+ Width = WidthN.isScalable()
+ ? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector /
+ AArch64::SVEBitsPerBlock
+ : WidthN.getKnownMinValue();
BaseOps.push_back(BaseOp);
return true;
}
@@ -2636,9 +2710,762 @@ AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
return AM;
}
+bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
+ Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const {
+ // Filter out instructions into which we cannot fold.
+ unsigned NumBytes;
+ int64_t OffsetScale = 1;
+ switch (MemI.getOpcode()) {
+ default:
+ return false;
+
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ NumBytes = 16;
+ break;
+
+ case AArch64::LDURDi:
+ case AArch64::STURDi:
+ case AArch64::LDURXi:
+ case AArch64::STURXi:
+ NumBytes = 8;
+ break;
+
+ case AArch64::LDURWi:
+ case AArch64::LDURSWi:
+ case AArch64::STURWi:
+ NumBytes = 4;
+ break;
+
+ case AArch64::LDURHi:
+ case AArch64::STURHi:
+ case AArch64::LDURHHi:
+ case AArch64::STURHHi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSHWi:
+ NumBytes = 2;
+ break;
+
+ case AArch64::LDRBroX:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSBWroX:
+ case AArch64::STRBroX:
+ case AArch64::STRBBroX:
+ case AArch64::LDURBi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSBWi:
+ case AArch64::STURBi:
+ case AArch64::STURBBi:
+ case AArch64::LDRBui:
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSBWui:
+ case AArch64::STRBui:
+ case AArch64::STRBBui:
+ NumBytes = 1;
+ break;
+
+ case AArch64::LDRQroX:
+ case AArch64::STRQroX:
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ NumBytes = 16;
+ OffsetScale = 16;
+ break;
+
+ case AArch64::LDRDroX:
+ case AArch64::STRDroX:
+ case AArch64::LDRXroX:
+ case AArch64::STRXroX:
+ case AArch64::LDRDui:
+ case AArch64::STRDui:
+ case AArch64::LDRXui:
+ case AArch64::STRXui:
+ NumBytes = 8;
+ OffsetScale = 8;
+ break;
+
+ case AArch64::LDRWroX:
+ case AArch64::LDRSWroX:
+ case AArch64::STRWroX:
+ case AArch64::LDRWui:
+ case AArch64::LDRSWui:
+ case AArch64::STRWui:
+ NumBytes = 4;
+ OffsetScale = 4;
+ break;
+
+ case AArch64::LDRHroX:
+ case AArch64::STRHroX:
+ case AArch64::LDRHHroX:
+ case AArch64::STRHHroX:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRHui:
+ case AArch64::STRHui:
+ case AArch64::LDRHHui:
+ case AArch64::STRHHui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSHWui:
+ NumBytes = 2;
+ OffsetScale = 2;
+ break;
+ }
+
+ // Check the fold operand is not the loaded/stored value.
+ const MachineOperand &BaseRegOp = MemI.getOperand(0);
+ if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
+ return false;
+
+ // Handle memory instructions with a [Reg, Reg] addressing mode.
+ if (MemI.getOperand(2).isReg()) {
+ // Bail if the addressing mode already includes extension of the offset
+ // register.
+ if (MemI.getOperand(3).getImm())
+ return false;
+
+ // Check if we actually have a scaled offset.
+ if (MemI.getOperand(4).getImm() == 0)
+ OffsetScale = 1;
+
+ // If the address instructions is folded into the base register, then the
+ // addressing mode must not have a scale. Then we can swap the base and the
+ // scaled registers.
+ if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
+ return false;
+
+ switch (AddrI.getOpcode()) {
+ default:
+ return false;
+
+ case AArch64::SBFMXri:
+ // sxtw Xa, Wm
+ // ldr Xd, [Xn, Xa, lsl #N]
+ // ->
+ // ldr Xd, [Xn, Wm, sxtw #N]
+ if (AddrI.getOperand(2).getImm() != 0 ||
+ AddrI.getOperand(3).getImm() != 31)
+ return false;
+
+ AM.BaseReg = MemI.getOperand(1).getReg();
+ if (AM.BaseReg == Reg)
+ AM.BaseReg = MemI.getOperand(2).getReg();
+ AM.ScaledReg = AddrI.getOperand(1).getReg();
+ AM.Scale = OffsetScale;
+ AM.Displacement = 0;
+ AM.Form = ExtAddrMode::Formula::SExtScaledReg;
+ return true;
+
+ case TargetOpcode::SUBREG_TO_REG: {
+ // mov Wa, Wm
+ // ldr Xd, [Xn, Xa, lsl #N]
+ // ->
+ // ldr Xd, [Xn, Wm, uxtw #N]
+
+ // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
+ if (AddrI.getOperand(1).getImm() != 0 ||
+ AddrI.getOperand(3).getImm() != AArch64::sub_32)
+ return false;
+
+ const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
+ Register OffsetReg = AddrI.getOperand(2).getReg();
+ if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
+ return false;
+
+ const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
+ if (DefMI.getOpcode() != AArch64::ORRWrs ||
+ DefMI.getOperand(1).getReg() != AArch64::WZR ||
+ DefMI.getOperand(3).getImm() != 0)
+ return false;
+
+ AM.BaseReg = MemI.getOperand(1).getReg();
+ if (AM.BaseReg == Reg)
+ AM.BaseReg = MemI.getOperand(2).getReg();
+ AM.ScaledReg = DefMI.getOperand(2).getReg();
+ AM.Scale = OffsetScale;
+ AM.Displacement = 0;
+ AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
+ return true;
+ }
+ }
+ }
+
+ // Handle memory instructions with a [Reg, #Imm] addressing mode.
+
+ // Check we are not breaking a potential conversion to an LDP.
+ auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
+ int64_t NewOffset) -> bool {
+ int64_t MinOffset, MaxOffset;
+ switch (NumBytes) {
+ default:
+ return true;
+ case 4:
+ MinOffset = -256;
+ MaxOffset = 252;
+ break;
+ case 8:
+ MinOffset = -512;
+ MaxOffset = 504;
+ break;
+ case 16:
+ MinOffset = -1024;
+ MaxOffset = 1008;
+ break;
+ }
+ return OldOffset < MinOffset || OldOffset > MaxOffset ||
+ (NewOffset >= MinOffset && NewOffset <= MaxOffset);
+ };
+ auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
+ int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
+ int64_t NewOffset = OldOffset + Disp;
+ if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
+ return false;
+ // If the old offset would fit into an LDP, but the new offset wouldn't,
+ // bail out.
+ if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
+ return false;
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = 0;
+ AM.Scale = 0;
+ AM.Displacement = NewOffset;
+ AM.Form = ExtAddrMode::Formula::Basic;
+ return true;
+ };
+
+ auto canFoldAddRegIntoAddrMode =
+ [&](int64_t Scale,
+ ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
+ if (MemI.getOperand(2).getImm() != 0)
+ return false;
+ if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
+ return false;
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = AddrI.getOperand(2).getReg();
+ AM.Scale = Scale;
+ AM.Displacement = 0;
+ AM.Form = Form;
+ return true;
+ };
+
+ auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
+ unsigned Opcode = MemI.getOpcode();
+ return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
+ Subtarget.isSTRQroSlow();
+ };
+
+ int64_t Disp = 0;
+ const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
+ switch (AddrI.getOpcode()) {
+ default:
+ return false;
+
+ case AArch64::ADDXri:
+ // add Xa, Xn, #N
+ // ldr Xd, [Xa, #M]
+ // ->
+ // ldr Xd, [Xn, #N'+M]
+ Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+ return canFoldAddSubImmIntoAddrMode(Disp);
+
+ case AArch64::SUBXri:
+ // sub Xa, Xn, #N
+ // ldr Xd, [Xa, #M]
+ // ->
+ // ldr Xd, [Xn, #N'+M]
+ Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+ return canFoldAddSubImmIntoAddrMode(-Disp);
+
+ case AArch64::ADDXrs: {
+ // add Xa, Xn, Xm, lsl #N
+ // ldr Xd, [Xa]
+ // ->
+ // ldr Xd, [Xn, Xm, lsl #N]
+
+ // Don't fold the add if the result would be slower, unless optimising for
+ // size.
+ unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
+ if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
+ return false;
+ Shift = AArch64_AM::getShiftValue(Shift);
+ if (!OptSize) {
+ if ((Shift != 2 && Shift != 3) || !Subtarget.hasAddrLSLFast())
+ return false;
+ if (avoidSlowSTRQ(MemI))
+ return false;
+ }
+ return canFoldAddRegIntoAddrMode(1ULL << Shift);
+ }
+
+ case AArch64::ADDXrr:
+ // add Xa, Xn, Xm
+ // ldr Xd, [Xa]
+ // ->
+ // ldr Xd, [Xn, Xm, lsl #0]
+
+ // Don't fold the add if the result would be slower, unless optimising for
+ // size.
+ if (!OptSize && avoidSlowSTRQ(MemI))
+ return false;
+ return canFoldAddRegIntoAddrMode(1);
+
+ case AArch64::ADDXrx:
+ // add Xa, Xn, Wm, {s,u}xtw #N
+ // ldr Xd, [Xa]
+ // ->
+ // ldr Xd, [Xn, Wm, {s,u}xtw #N]
+
+ // Don't fold the add if the result would be slower, unless optimising for
+ // size.
+ if (!OptSize && avoidSlowSTRQ(MemI))
+ return false;
+
+ // Can fold only sign-/zero-extend of a word.
+ unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
+ AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
+ if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
+ return false;
+
+ return canFoldAddRegIntoAddrMode(
+ 1ULL << AArch64_AM::getArithShiftValue(Imm),
+ (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
+ : ExtAddrMode::Formula::ZExtScaledReg);
+ }
+}
+
+// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+// return the opcode of an instruction performing the same operation, but using
+// the [Reg, Reg] addressing mode.
+static unsigned regOffsetOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Address folding not implemented for instruction");
+
+ case AArch64::LDURQi:
+ case AArch64::LDRQui:
+ return AArch64::LDRQroX;
+ case AArch64::STURQi:
+ case AArch64::STRQui:
+ return AArch64::STRQroX;
+ case AArch64::LDURDi:
+ case AArch64::LDRDui:
+ return AArch64::LDRDroX;
+ case AArch64::STURDi:
+ case AArch64::STRDui:
+ return AArch64::STRDroX;
+ case AArch64::LDURXi:
+ case AArch64::LDRXui:
+ return AArch64::LDRXroX;
+ case AArch64::STURXi:
+ case AArch64::STRXui:
+ return AArch64::STRXroX;
+ case AArch64::LDURWi:
+ case AArch64::LDRWui:
+ return AArch64::LDRWroX;
+ case AArch64::LDURSWi:
+ case AArch64::LDRSWui:
+ return AArch64::LDRSWroX;
+ case AArch64::STURWi:
+ case AArch64::STRWui:
+ return AArch64::STRWroX;
+ case AArch64::LDURHi:
+ case AArch64::LDRHui:
+ return AArch64::LDRHroX;
+ case AArch64::STURHi:
+ case AArch64::STRHui:
+ return AArch64::STRHroX;
+ case AArch64::LDURHHi:
+ case AArch64::LDRHHui:
+ return AArch64::LDRHHroX;
+ case AArch64::STURHHi:
+ case AArch64::STRHHui:
+ return AArch64::STRHHroX;
+ case AArch64::LDURSHXi:
+ case AArch64::LDRSHXui:
+ return AArch64::LDRSHXroX;
+ case AArch64::LDURSHWi:
+ case AArch64::LDRSHWui:
+ return AArch64::LDRSHWroX;
+ case AArch64::LDURBi:
+ case AArch64::LDRBui:
+ return AArch64::LDRBroX;
+ case AArch64::LDURBBi:
+ case AArch64::LDRBBui:
+ return AArch64::LDRBBroX;
+ case AArch64::LDURSBXi:
+ case AArch64::LDRSBXui:
+ return AArch64::LDRSBXroX;
+ case AArch64::LDURSBWi:
+ case AArch64::LDRSBWui:
+ return AArch64::LDRSBWroX;
+ case AArch64::STURBi:
+ case AArch64::STRBui:
+ return AArch64::STRBroX;
+ case AArch64::STURBBi:
+ case AArch64::STRBBui:
+ return AArch64::STRBBroX;
+ }
+}
+
+// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
+// the opcode of an instruction performing the same operation, but using the
+// [Reg, #Imm] addressing mode with scaled offset.
+unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Address folding not implemented for instruction");
+
+ case AArch64::LDURQi:
+ Scale = 16;
+ return AArch64::LDRQui;
+ case AArch64::STURQi:
+ Scale = 16;
+ return AArch64::STRQui;
+ case AArch64::LDURDi:
+ Scale = 8;
+ return AArch64::LDRDui;
+ case AArch64::STURDi:
+ Scale = 8;
+ return AArch64::STRDui;
+ case AArch64::LDURXi:
+ Scale = 8;
+ return AArch64::LDRXui;
+ case AArch64::STURXi:
+ Scale = 8;
+ return AArch64::STRXui;
+ case AArch64::LDURWi:
+ Scale = 4;
+ return AArch64::LDRWui;
+ case AArch64::LDURSWi:
+ Scale = 4;
+ return AArch64::LDRSWui;
+ case AArch64::STURWi:
+ Scale = 4;
+ return AArch64::STRWui;
+ case AArch64::LDURHi:
+ Scale = 2;
+ return AArch64::LDRHui;
+ case AArch64::STURHi:
+ Scale = 2;
+ return AArch64::STRHui;
+ case AArch64::LDURHHi:
+ Scale = 2;
+ return AArch64::LDRHHui;
+ case AArch64::STURHHi:
+ Scale = 2;
+ return AArch64::STRHHui;
+ case AArch64::LDURSHXi:
+ Scale = 2;
+ return AArch64::LDRSHXui;
+ case AArch64::LDURSHWi:
+ Scale = 2;
+ return AArch64::LDRSHWui;
+ case AArch64::LDURBi:
+ Scale = 1;
+ return AArch64::LDRBui;
+ case AArch64::LDURBBi:
+ Scale = 1;
+ return AArch64::LDRBBui;
+ case AArch64::LDURSBXi:
+ Scale = 1;
+ return AArch64::LDRSBXui;
+ case AArch64::LDURSBWi:
+ Scale = 1;
+ return AArch64::LDRSBWui;
+ case AArch64::STURBi:
+ Scale = 1;
+ return AArch64::STRBui;
+ case AArch64::STURBBi:
+ Scale = 1;
+ return AArch64::STRBBui;
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ Scale = 16;
+ return Opcode;
+ case AArch64::LDRDui:
+ case AArch64::STRDui:
+ case AArch64::LDRXui:
+ case AArch64::STRXui:
+ Scale = 8;
+ return Opcode;
+ case AArch64::LDRWui:
+ case AArch64::LDRSWui:
+ case AArch64::STRWui:
+ Scale = 4;
+ return Opcode;
+ case AArch64::LDRHui:
+ case AArch64::STRHui:
+ case AArch64::LDRHHui:
+ case AArch64::STRHHui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSHWui:
+ Scale = 2;
+ return Opcode;
+ case AArch64::LDRBui:
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSBWui:
+ case AArch64::STRBui:
+ case AArch64::STRBBui:
+ Scale = 1;
+ return Opcode;
+ }
+}
+
+// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
+// the opcode of an instruction performing the same operation, but using the
+// [Reg, #Imm] addressing mode with unscaled offset.
+unsigned unscaledOffsetOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Address folding not implemented for instruction");
+
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ case AArch64::LDURDi:
+ case AArch64::STURDi:
+ case AArch64::LDURXi:
+ case AArch64::STURXi:
+ case AArch64::LDURWi:
+ case AArch64::LDURSWi:
+ case AArch64::STURWi:
+ case AArch64::LDURHi:
+ case AArch64::STURHi:
+ case AArch64::LDURHHi:
+ case AArch64::STURHHi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSHWi:
+ case AArch64::LDURBi:
+ case AArch64::STURBi:
+ case AArch64::LDURBBi:
+ case AArch64::STURBBi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSBXi:
+ return Opcode;
+ case AArch64::LDRQui:
+ return AArch64::LDURQi;
+ case AArch64::STRQui:
+ return AArch64::STURQi;
+ case AArch64::LDRDui:
+ return AArch64::LDURDi;
+ case AArch64::STRDui:
+ return AArch64::STURDi;
+ case AArch64::LDRXui:
+ return AArch64::LDURXi;
+ case AArch64::STRXui:
+ return AArch64::STURXi;
+ case AArch64::LDRWui:
+ return AArch64::LDURWi;
+ case AArch64::LDRSWui:
+ return AArch64::LDURSWi;
+ case AArch64::STRWui:
+ return AArch64::STURWi;
+ case AArch64::LDRHui:
+ return AArch64::LDURHi;
+ case AArch64::STRHui:
+ return AArch64::STURHi;
+ case AArch64::LDRHHui:
+ return AArch64::LDURHHi;
+ case AArch64::STRHHui:
+ return AArch64::STURHHi;
+ case AArch64::LDRSHXui:
+ return AArch64::LDURSHXi;
+ case AArch64::LDRSHWui:
+ return AArch64::LDURSHWi;
+ case AArch64::LDRBBui:
+ return AArch64::LDURBBi;
+ case AArch64::LDRBui:
+ return AArch64::LDURBi;
+ case AArch64::STRBBui:
+ return AArch64::STURBBi;
+ case AArch64::STRBui:
+ return AArch64::STURBi;
+ case AArch64::LDRSBWui:
+ return AArch64::LDURSBWi;
+ case AArch64::LDRSBXui:
+ return AArch64::LDURSBXi;
+ }
+}
+
+// Given the opcode of a memory load/store instruction, return the opcode of an
+// instruction performing the same operation, but using
+// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
+// offset register.
+static unsigned offsetExtendOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Address folding not implemented for instruction");
+
+ case AArch64::LDRQroX:
+ case AArch64::LDURQi:
+ case AArch64::LDRQui:
+ return AArch64::LDRQroW;
+ case AArch64::STRQroX:
+ case AArch64::STURQi:
+ case AArch64::STRQui:
+ return AArch64::STRQroW;
+ case AArch64::LDRDroX:
+ case AArch64::LDURDi:
+ case AArch64::LDRDui:
+ return AArch64::LDRDroW;
+ case AArch64::STRDroX:
+ case AArch64::STURDi:
+ case AArch64::STRDui:
+ return AArch64::STRDroW;
+ case AArch64::LDRXroX:
+ case AArch64::LDURXi:
+ case AArch64::LDRXui:
+ return AArch64::LDRXroW;
+ case AArch64::STRXroX:
+ case AArch64::STURXi:
+ case AArch64::STRXui:
+ return AArch64::STRXroW;
+ case AArch64::LDRWroX:
+ case AArch64::LDURWi:
+ case AArch64::LDRWui:
+ return AArch64::LDRWroW;
+ case AArch64::LDRSWroX:
+ case AArch64::LDURSWi:
+ case AArch64::LDRSWui:
+ return AArch64::LDRSWroW;
+ case AArch64::STRWroX:
+ case AArch64::STURWi:
+ case AArch64::STRWui:
+ return AArch64::STRWroW;
+ case AArch64::LDRHroX:
+ case AArch64::LDURHi:
+ case AArch64::LDRHui:
+ return AArch64::LDRHroW;
+ case AArch64::STRHroX:
+ case AArch64::STURHi:
+ case AArch64::STRHui:
+ return AArch64::STRHroW;
+ case AArch64::LDRHHroX:
+ case AArch64::LDURHHi:
+ case AArch64::LDRHHui:
+ return AArch64::LDRHHroW;
+ case AArch64::STRHHroX:
+ case AArch64::STURHHi:
+ case AArch64::STRHHui:
+ return AArch64::STRHHroW;
+ case AArch64::LDRSHXroX:
+ case AArch64::LDURSHXi:
+ case AArch64::LDRSHXui:
+ return AArch64::LDRSHXroW;
+ case AArch64::LDRSHWroX:
+ case AArch64::LDURSHWi:
+ case AArch64::LDRSHWui:
+ return AArch64::LDRSHWroW;
+ case AArch64::LDRBroX:
+ case AArch64::LDURBi:
+ case AArch64::LDRBui:
+ return AArch64::LDRBroW;
+ case AArch64::LDRBBroX:
+ case AArch64::LDURBBi:
+ case AArch64::LDRBBui:
+ return AArch64::LDRBBroW;
+ case AArch64::LDRSBXroX:
+ case AArch64::LDURSBXi:
+ case AArch64::LDRSBXui:
+ return AArch64::LDRSBXroW;
+ case AArch64::LDRSBWroX:
+ case AArch64::LDURSBWi:
+ case AArch64::LDRSBWui:
+ return AArch64::LDRSBWroW;
+ case AArch64::STRBroX:
+ case AArch64::STURBi:
+ case AArch64::STRBui:
+ return AArch64::STRBroW;
+ case AArch64::STRBBroX:
+ case AArch64::STURBBi:
+ case AArch64::STRBBui:
+ return AArch64::STRBBroW;
+ }
+}
+
+MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const {
+
+ const DebugLoc &DL = MemI.getDebugLoc();
+ MachineBasicBlock &MBB = *MemI.getParent();
+ MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
+
+ if (AM.Form == ExtAddrMode::Formula::Basic) {
+ if (AM.ScaledReg) {
+ // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
+ unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
+ MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
+ auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addReg(AM.ScaledReg)
+ .addImm(0)
+ .addImm(AM.Scale > 1)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ return B.getInstr();
+ }
+
+ assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+ "Addressing mode not supported for folding");
+
+ // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
+ unsigned Scale = 1;
+ unsigned Opcode = MemI.getOpcode();
+ if (isInt<9>(AM.Displacement))
+ Opcode = unscaledOffsetOpcode(Opcode);
+ else
+ Opcode = scaledOffsetOpcode(Opcode, Scale);
+
+ auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement / Scale)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ return B.getInstr();
+ }
+
+ if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||
+ AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
+ // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
+ assert(AM.ScaledReg && !AM.Displacement &&
+ "Address offset can be a register or an immediate, but not both");
+ unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
+ MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
+ // Make sure the offset register is in the correct register class.
+ Register OffsetReg = AM.ScaledReg;
+ const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
+ if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
+ OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
+ .addReg(AM.ScaledReg, 0, AArch64::sub_32);
+ }
+ auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addReg(OffsetReg)
+ .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)
+ .addImm(AM.Scale != 1)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+
+ return B.getInstr();
+ }
+
+ llvm_unreachable(
+ "Function must not be called with an addressing mode it can't handle");
+}
+
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
- bool &OffsetIsScalable, unsigned &Width,
+ bool &OffsetIsScalable, TypeSize &Width,
const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
@@ -2693,47 +3520,51 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
}
bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
- unsigned &Width, int64_t &MinOffset,
+ TypeSize &Width, int64_t &MinOffset,
int64_t &MaxOffset) {
- const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
switch (Opcode) {
// Not a memory operation or something we want to handle.
default:
- Scale = TypeSize::Fixed(0);
- Width = 0;
+ Scale = TypeSize::getFixed(0);
+ Width = TypeSize::getFixed(0);
MinOffset = MaxOffset = 0;
return false;
case AArch64::STRWpost:
case AArch64::LDRWpost:
- Width = 32;
- Scale = TypeSize::Fixed(4);
+ Width = TypeSize::getFixed(32);
+ Scale = TypeSize::getFixed(4);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURQi:
case AArch64::STURQi:
- Width = 16;
- Scale = TypeSize::Fixed(1);
+ Width = TypeSize::getFixed(16);
+ Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::PRFUMi:
case AArch64::LDURXi:
case AArch64::LDURDi:
+ case AArch64::LDAPURXi:
case AArch64::STURXi:
case AArch64::STURDi:
- Width = 8;
- Scale = TypeSize::Fixed(1);
+ case AArch64::STLURXi:
+ Width = TypeSize::getFixed(8);
+ Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURWi:
case AArch64::LDURSi:
case AArch64::LDURSWi:
+ case AArch64::LDAPURi:
+ case AArch64::LDAPURSWi:
case AArch64::STURWi:
case AArch64::STURSi:
- Width = 4;
- Scale = TypeSize::Fixed(1);
+ case AArch64::STLURWi:
+ Width = TypeSize::getFixed(4);
+ Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2741,10 +3572,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDURHHi:
case AArch64::LDURSHXi:
case AArch64::LDURSHWi:
+ case AArch64::LDAPURHi:
+ case AArch64::LDAPURSHWi:
+ case AArch64::LDAPURSHXi:
case AArch64::STURHi:
case AArch64::STURHHi:
- Width = 2;
- Scale = TypeSize::Fixed(1);
+ case AArch64::STLURHi:
+ Width = TypeSize::getFixed(2);
+ Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2752,10 +3587,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDURBBi:
case AArch64::LDURSBXi:
case AArch64::LDURSBWi:
+ case AArch64::LDAPURBi:
+ case AArch64::LDAPURSBWi:
+ case AArch64::LDAPURSBXi:
case AArch64::STURBi:
case AArch64::STURBBi:
- Width = 1;
- Scale = TypeSize::Fixed(1);
+ case AArch64::STLURBi:
+ Width = TypeSize::getFixed(1);
+ Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2763,15 +3602,15 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDNPQi:
case AArch64::STPQi:
case AArch64::STNPQi:
- Scale = TypeSize::Fixed(16);
- Width = 32;
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(32);
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
- Scale = TypeSize::Fixed(16);
- Width = 16;
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(16);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2783,8 +3622,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STPDi:
case AArch64::STNPXi:
case AArch64::STNPDi:
- Scale = TypeSize::Fixed(8);
- Width = 16;
+ Scale = TypeSize::getFixed(8);
+ Width = TypeSize::getFixed(16);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -2793,15 +3632,15 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
- Scale = TypeSize::Fixed(8);
- Width = 8;
+ Scale = TypeSize::getFixed(8);
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::StoreSwiftAsyncContext:
// Store is an STRXui, but there might be an ADDXri in the expansion too.
- Scale = TypeSize::Fixed(1);
- Width = 8;
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2813,8 +3652,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STPSi:
case AArch64::STNPWi:
case AArch64::STNPSi:
- Scale = TypeSize::Fixed(4);
- Width = 8;
+ Scale = TypeSize::getFixed(4);
+ Width = TypeSize::getFixed(8);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -2823,8 +3662,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDRSWui:
case AArch64::STRWui:
case AArch64::STRSui:
- Scale = TypeSize::Fixed(4);
- Width = 4;
+ Scale = TypeSize::getFixed(4);
+ Width = TypeSize::getFixed(4);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2834,8 +3673,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDRSHXui:
case AArch64::STRHui:
case AArch64::STRHHui:
- Scale = TypeSize::Fixed(2);
- Width = 2;
+ Scale = TypeSize::getFixed(2);
+ Width = TypeSize::getFixed(2);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2845,8 +3684,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDRSBXui:
case AArch64::STRBui:
case AArch64::STRBBui:
- Scale = TypeSize::Fixed(1);
- Width = 1;
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(1);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2854,15 +3693,15 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDPXpost:
case AArch64::STPDpre:
case AArch64::LDPDpost:
- Scale = TypeSize::Fixed(8);
- Width = 8;
+ Scale = TypeSize::getFixed(8);
+ Width = TypeSize::getFixed(8);
MinOffset = -512;
MaxOffset = 504;
break;
case AArch64::STPQpre:
case AArch64::LDPQpost:
- Scale = TypeSize::Fixed(16);
- Width = 16;
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(16);
MinOffset = -1024;
MaxOffset = 1008;
break;
@@ -2870,27 +3709,27 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRDpre:
case AArch64::LDRXpost:
case AArch64::LDRDpost:
- Scale = TypeSize::Fixed(1);
- Width = 8;
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(8);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STRQpre:
case AArch64::LDRQpost:
- Scale = TypeSize::Fixed(1);
- Width = 16;
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(16);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::ADDG:
- Scale = TypeSize::Fixed(16);
- Width = 0;
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(0);
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::TAGPstack:
- Scale = TypeSize::Fixed(16);
- Width = 0;
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(0);
// TAGP with a negative offset turns into SUBP, which has a maximum offset
// of 63 (not 64!).
MinOffset = -63;
@@ -2899,43 +3738,43 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDG:
case AArch64::STGi:
case AArch64::STZGi:
- Scale = TypeSize::Fixed(16);
- Width = 16;
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(16);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STR_ZZZZXI:
case AArch64::LDR_ZZZZXI:
- Scale = TypeSize::Scalable(16);
- Width = SVEMaxBytesPerVector * 4;
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16 * 4);
MinOffset = -256;
MaxOffset = 252;
break;
case AArch64::STR_ZZZXI:
case AArch64::LDR_ZZZXI:
- Scale = TypeSize::Scalable(16);
- Width = SVEMaxBytesPerVector * 3;
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16 * 3);
MinOffset = -256;
MaxOffset = 253;
break;
case AArch64::STR_ZZXI:
case AArch64::LDR_ZZXI:
- Scale = TypeSize::Scalable(16);
- Width = SVEMaxBytesPerVector * 2;
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16 * 2);
MinOffset = -256;
MaxOffset = 254;
break;
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
- Scale = TypeSize::Scalable(2);
- Width = SVEMaxBytesPerVector / 8;
+ Scale = TypeSize::getScalable(2);
+ Width = TypeSize::getScalable(2);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
- Scale = TypeSize::Scalable(16);
- Width = SVEMaxBytesPerVector;
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2961,8 +3800,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDNF1D_IMM:
// A full vectors worth of data
// Width = mbytes * elements
- Scale = TypeSize::Scalable(16);
- Width = SVEMaxBytesPerVector;
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -2974,8 +3813,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST2H_IMM:
case AArch64::ST2W_IMM:
case AArch64::ST2D_IMM:
- Scale = TypeSize::Scalable(32);
- Width = SVEMaxBytesPerVector * 2;
+ Scale = TypeSize::getScalable(32);
+ Width = TypeSize::getScalable(16 * 2);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -2987,8 +3826,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST3H_IMM:
case AArch64::ST3W_IMM:
case AArch64::ST3D_IMM:
- Scale = TypeSize::Scalable(48);
- Width = SVEMaxBytesPerVector * 3;
+ Scale = TypeSize::getScalable(48);
+ Width = TypeSize::getScalable(16 * 3);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3000,8 +3839,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST4H_IMM:
case AArch64::ST4W_IMM:
case AArch64::ST4D_IMM:
- Scale = TypeSize::Scalable(64);
- Width = SVEMaxBytesPerVector * 4;
+ Scale = TypeSize::getScalable(64);
+ Width = TypeSize::getScalable(16 * 4);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3022,8 +3861,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDNF1SW_D_IMM:
// A half vector worth of data
// Width = mbytes * elements
- Scale = TypeSize::Scalable(8);
- Width = SVEMaxBytesPerVector / 2;
+ Scale = TypeSize::getScalable(8);
+ Width = TypeSize::getScalable(8);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3039,8 +3878,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDNF1SH_D_IMM:
// A quarter vector worth of data
// Width = mbytes * elements
- Scale = TypeSize::Scalable(4);
- Width = SVEMaxBytesPerVector / 4;
+ Scale = TypeSize::getScalable(4);
+ Width = TypeSize::getScalable(4);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3051,21 +3890,21 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDNF1SB_D_IMM:
// A eighth vector worth of data
// Width = mbytes * elements
- Scale = TypeSize::Scalable(2);
- Width = SVEMaxBytesPerVector / 8;
+ Scale = TypeSize::getScalable(2);
+ Width = TypeSize::getScalable(2);
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::ST2Gi:
case AArch64::STZ2Gi:
- Scale = TypeSize::Fixed(16);
- Width = 32;
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(32);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STGPi:
- Scale = TypeSize::Fixed(16);
- Width = 16;
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(16);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3076,8 +3915,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RSB_H_IMM:
case AArch64::LD1RSB_S_IMM:
case AArch64::LD1RSB_D_IMM:
- Scale = TypeSize::Fixed(1);
- Width = 1;
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(1);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -3086,22 +3925,22 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RH_D_IMM:
case AArch64::LD1RSH_S_IMM:
case AArch64::LD1RSH_D_IMM:
- Scale = TypeSize::Fixed(2);
- Width = 2;
+ Scale = TypeSize::getFixed(2);
+ Width = TypeSize::getFixed(2);
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::LD1RW_IMM:
case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM:
- Scale = TypeSize::Fixed(4);
- Width = 4;
+ Scale = TypeSize::getFixed(4);
+ Width = TypeSize::getFixed(4);
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::LD1RD_IMM:
- Scale = TypeSize::Fixed(8);
- Width = 8;
+ Scale = TypeSize::getFixed(8);
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -3134,6 +3973,7 @@ int AArch64InstrInfo::getMemScale(unsigned Opc) {
case AArch64::LDRSpre:
case AArch64::LDRSWui:
case AArch64::LDURSWi:
+ case AArch64::LDRSWpre:
case AArch64::LDRWpre:
case AArch64::LDRWui:
case AArch64::LDURWi:
@@ -3189,6 +4029,7 @@ bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
return false;
case AArch64::LDRWpre:
case AArch64::LDRXpre:
+ case AArch64::LDRSWpre:
case AArch64::LDRSpre:
case AArch64::LDRDpre:
case AArch64::LDRQpre:
@@ -3284,6 +4125,32 @@ bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
return llvm::any_of(MI.operands(), IsQFPR);
}
+bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AArch64::BRK:
+ case AArch64::HLT:
+ case AArch64::PACIASP:
+ case AArch64::PACIBSP:
+ // Implicit BTI behavior.
+ return true;
+ case AArch64::PAUTH_PROLOGUE:
+ // PAUTH_PROLOGUE expands to PACI(A|B)SP.
+ return true;
+ case AArch64::HINT: {
+ unsigned Imm = MI.getOperand(0).getImm();
+ // Explicit BTI instruction.
+ if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
+ return true;
+ // PACI(A|B)SP instructions.
+ if (Imm == 25 || Imm == 27)
+ return true;
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
auto IsFPR = [&](const MachineOperand &Op) {
if (!Op.isReg())
@@ -3371,8 +4238,9 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
///
/// Only called for LdSt for which getMemOperandWithOffset returns true.
bool AArch64InstrInfo::shouldClusterMemOps(
- ArrayRef<const MachineOperand *> BaseOps1,
- ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+ ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
+ bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+ int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
unsigned NumBytes) const {
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
const MachineOperand &BaseOp1 = *BaseOps1.front();
@@ -3390,7 +4258,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(
return false;
// Only cluster up to a single pair.
- if (NumLoads > 2)
+ if (ClusterSize > 2)
return false;
if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
@@ -3580,6 +4448,30 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // Copy a predicate-as-counter register by ORRing with itself as if it
+ // were a regular predicate (mask) register.
+ bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
+ bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
+ if (DestIsPNR || SrcIsPNR) {
+ assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
+ "Unexpected predicate-as-counter register.");
+ auto ToPPR = [](MCRegister R) -> MCRegister {
+ return (R - AArch64::PN0) + AArch64::P0;
+ };
+ MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
+ MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
+
+ if (PPRSrcReg != PPRDestReg) {
+ auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
+ .addReg(PPRSrcReg) // Pg
+ .addReg(PPRSrcReg)
+ .addReg(PPRSrcReg, getKillRegState(KillSrc));
+ if (DestIsPNR)
+ NewMI.addDef(DestReg, RegState::Implicit);
+ }
+ return;
+ }
+
// Copy a Z register by ORRing with itself.
if (AArch64::ZPRRegClass.contains(DestReg) &&
AArch64::ZPRRegClass.contains(SrcReg)) {
@@ -3591,8 +4483,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
// Copy a Z register pair by copying the individual sub-registers.
- if (AArch64::ZPR2RegClass.contains(DestReg) &&
- AArch64::ZPR2RegClass.contains(SrcReg)) {
+ if ((AArch64::ZPR2RegClass.contains(DestReg) ||
+ AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
+ (AArch64::ZPR2RegClass.contains(SrcReg) ||
+ AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@@ -3612,8 +4506,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
// Copy a Z register quad by copying the individual sub-registers.
- if (AArch64::ZPR4RegClass.contains(DestReg) &&
- AArch64::ZPR4RegClass.contains(SrcReg)) {
+ if ((AArch64::ZPR4RegClass.contains(DestReg) ||
+ AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
+ (AArch64::ZPR4RegClass.contains(SrcReg) ||
+ AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
AArch64::zsub2, AArch64::zsub3};
@@ -3869,6 +4765,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
unsigned Opc = 0;
bool Offset = true;
+ MCRegister PNRReg = MCRegister::NoRegister;
unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
@@ -3879,7 +4776,21 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::STRHui;
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register store without SVE store instructions");
+ Opc = AArch64::STR_PXI;
+ StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) {
+ assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
+ "Unexpected register store without SVE2p1 or SME2");
+ if (SrcReg.isVirtual()) {
+ auto NewSrcReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
+ BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg)
+ .addReg(SrcReg);
+ SrcReg = NewSrcReg;
+ } else
+ SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0;
Opc = AArch64::STR_PXI;
StackID = TargetStackID::ScalableVector;
}
@@ -3923,7 +4834,8 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
AArch64::sube64, AArch64::subo64, FI, MMO);
return;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZXI;
StackID = TargetStackID::ScalableVector;
}
@@ -3944,8 +4856,10 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d;
Offset = false;
- } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
+ AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZXI;
StackID = TargetStackID::ScalableVector;
}
@@ -3956,7 +4870,8 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opc = AArch64::ST1Threev2d;
Offset = false;
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZZXI;
StackID = TargetStackID::ScalableVector;
}
@@ -3966,8 +4881,10 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d;
Offset = false;
- } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
+ AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZZZXI;
StackID = TargetStackID::ScalableVector;
}
@@ -3982,6 +4899,8 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (Offset)
MI.addImm(0);
+ if (PNRReg.isValid())
+ MI.addDef(PNRReg, RegState::Implicit);
MI.addMemOperand(MMO);
}
@@ -4026,6 +4945,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned Opc = 0;
bool Offset = true;
unsigned StackID = TargetStackID::Default;
+ Register PNRReg = MCRegister::NoRegister;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -4035,7 +4955,18 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRHui;
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register load without SVE load instructions");
+ Opc = AArch64::LDR_PXI;
+ StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) {
+ assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
+ "Unexpected register load without SVE2p1 or SME2");
+ PNRReg = DestReg;
+ if (DestReg.isVirtual())
+ DestReg = MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
+ else
+ DestReg = (DestReg - AArch64::PN0) + AArch64::P0;
Opc = AArch64::LDR_PXI;
StackID = TargetStackID::ScalableVector;
}
@@ -4079,7 +5010,8 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
AArch64::subo64, FI, MMO);
return;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZXI;
StackID = TargetStackID::ScalableVector;
}
@@ -4100,8 +5032,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d;
Offset = false;
- } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
+ AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZXI;
StackID = TargetStackID::ScalableVector;
}
@@ -4112,7 +5046,8 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opc = AArch64::LD1Threev2d;
Offset = false;
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZZXI;
StackID = TargetStackID::ScalableVector;
}
@@ -4122,8 +5057,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d;
Offset = false;
- } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
+ AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVEorSME() &&
+ "Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZZZXI;
StackID = TargetStackID::ScalableVector;
}
@@ -4138,7 +5075,13 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FI);
if (Offset)
MI.addImm(0);
+ if (PNRReg.isValid() && !PNRReg.isVirtual())
+ MI.addDef(PNRReg, RegState::Implicit);
MI.addMemOperand(MMO);
+
+ if (PNRReg.isValid() && PNRReg.isVirtual())
+ BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
+ .addReg(DestReg);
}
bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
@@ -4513,9 +5456,6 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
// register class, TargetInstrInfo::foldMemoryOperand() is going to try.
//
// To prevent that, we are going to constrain the %0 register class here.
- //
- // <rdar://problem/11522048>
- //
if (MI.isFullCopy()) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
@@ -4597,42 +5537,14 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// STRXui %xzr, %stack.0
//
- if (IsSpill && DstMO.isUndef() && SrcReg.isPhysical()) {
+ if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
+ TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
assert(SrcMO.getSubReg() == 0 &&
"Unexpected subreg on physical register");
- const TargetRegisterClass *SpillRC;
- unsigned SpillSubreg;
- switch (DstMO.getSubReg()) {
- default:
- SpillRC = nullptr;
- break;
- case AArch64::sub_32:
- case AArch64::ssub:
- if (AArch64::GPR32RegClass.contains(SrcReg)) {
- SpillRC = &AArch64::GPR64RegClass;
- SpillSubreg = AArch64::sub_32;
- } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
- SpillRC = &AArch64::FPR64RegClass;
- SpillSubreg = AArch64::ssub;
- } else
- SpillRC = nullptr;
- break;
- case AArch64::dsub:
- if (AArch64::FPR64RegClass.contains(SrcReg)) {
- SpillRC = &AArch64::FPR128RegClass;
- SpillSubreg = AArch64::dsub;
- } else
- SpillRC = nullptr;
- break;
- }
-
- if (SpillRC)
- if (unsigned WidenedSrcReg =
- TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
- storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
- FrameIndex, SpillRC, &TRI, Register());
- return &*--InsertPt;
- }
+ storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
+ FrameIndex, &AArch64::GPR64RegClass, &TRI,
+ Register());
+ return &*--InsertPt;
}
// Handle cases like filling use of:
@@ -4699,6 +5611,14 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
switch (MI.getOpcode()) {
default:
break;
+ case AArch64::LD1Rv1d:
+ case AArch64::LD1Rv2s:
+ case AArch64::LD1Rv2d:
+ case AArch64::LD1Rv4h:
+ case AArch64::LD1Rv4s:
+ case AArch64::LD1Rv8b:
+ case AArch64::LD1Rv8h:
+ case AArch64::LD1Rv16b:
case AArch64::LD1Twov2d:
case AArch64::LD1Threev2d:
case AArch64::LD1Fourv2d:
@@ -4723,8 +5643,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
}
// Get the min/max offset and the scale.
- TypeSize ScaleValue(0U, false);
- unsigned Width;
+ TypeSize ScaleValue(0U, false), Width(0U, false);
int64_t MinOff, MaxOff;
if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
MaxOff))
@@ -4817,6 +5736,12 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return false;
}
+void AArch64InstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
+}
+
MCInst AArch64InstrInfo::getNop() const {
return MCInstBuilder(AArch64::HINT).addImm(0);
}
@@ -7316,12 +8241,24 @@ AArch64InstrInfo::getOutliningCandidateInfo(
// necessary. However, at this point we don't know if the outlined function
// will have a RET instruction so we assume the worst.
const TargetRegisterInfo &TRI = getRegisterInfo();
+ // Performing a tail call may require extra checks when PAuth is enabled.
+ // If PAuth is disabled, set it to zero for uniformity.
+ unsigned NumBytesToCheckLRInTCEpilogue = 0;
if (FirstCand.getMF()
->getInfo<AArch64FunctionInfo>()
->shouldSignReturnAddress(true)) {
// One PAC and one AUT instructions
NumBytesToCreateFrame += 8;
+ // PAuth is enabled - set extra tail call cost, if any.
+ auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod();
+ NumBytesToCheckLRInTCEpilogue =
+ AArch64PAuth::getCheckerSizeInBytes(LRCheckMethod);
+ // Checking the authenticated LR value may significantly impact
+ // SequenceSize, so account for it for more precise results.
+ if (isTailCallReturnInst(*RepeatedSequenceLocs[0].back()))
+ SequenceSize += NumBytesToCheckLRInTCEpilogue;
+
// We have to check if sp modifying instructions would get outlined.
// If so we only allow outlining if sp is unchanged overall, so matching
// sub and add instructions are okay to outline, all other sp modifications
@@ -7463,8 +8400,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
// if fixing it up would be in range.
int64_t MinOffset,
MaxOffset; // Unscaled offsets for the instruction.
- TypeSize Scale(0U, false); // The scale to multiply the offsets by.
- unsigned DummyWidth;
+ // The scale to multiply the offsets by.
+ TypeSize Scale(0U, false), DummyWidth(0U, false);
getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
Offset += 16; // Update the offset to what it would be if we outlined.
@@ -7492,7 +8429,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
FrameID = MachineOutlinerTailCall;
NumBytesToCreateFrame = 0;
- SetCandidateCallInfo(MachineOutlinerTailCall, 4);
+ unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
+ SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
}
else if (LastInstrOpcode == AArch64::BL ||
@@ -7501,7 +8439,7 @@ AArch64InstrInfo::getOutliningCandidateInfo(
!HasBTI)) {
// FIXME: Do we need to check if the code after this uses the value of LR?
FrameID = MachineOutlinerThunk;
- NumBytesToCreateFrame = 0;
+ NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
SetCandidateCallInfo(MachineOutlinerThunk, 4);
}
@@ -7670,6 +8608,23 @@ AArch64InstrInfo::getOutliningCandidateInfo(
NumBytesToCreateFrame, FrameID);
}
+void AArch64InstrInfo::mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const {
+ // If a bunch of candidates reach this point they must agree on their return
+ // address signing. It is therefore enough to just consider the signing
+ // behaviour of one of them
+ const auto &CFn = Candidates.front().getMF()->getFunction();
+
+ // Since all candidates belong to the same module, just copy the
+ // function-level attributes of an arbitrary function.
+ if (CFn.hasFnAttribute("sign-return-address"))
+ F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
+ if (CFn.hasFnAttribute("sign-return-address-key"))
+ F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
+
+ AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
+}
+
bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
const Function &F = MF.getFunction();
@@ -7831,6 +8786,8 @@ AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
case AArch64::RETAA:
case AArch64::RETAB:
case AArch64::EMITBKEY:
+ case AArch64::PAUTH_PROLOGUE:
+ case AArch64::PAUTH_EPILOGUE:
return outliner::InstrType::Illegal;
}
@@ -7940,11 +8897,8 @@ AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
// Don't outline BTI instructions, because that will prevent the outlining
// site from being indirectly callable.
- if (MI.getOpcode() == AArch64::HINT) {
- int64_t Imm = MI.getOperand(0).getImm();
- if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
- return outliner::InstrType::Illegal;
- }
+ if (hasBTISemantics(MI))
+ return outliner::InstrType::Illegal;
return outliner::InstrType::Legal;
}
@@ -7952,7 +8906,7 @@ AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
for (MachineInstr &MI : MBB) {
const MachineOperand *Base;
- unsigned Width;
+ TypeSize Width(0, false);
int64_t Offset;
bool OffsetIsScalable;
@@ -7982,65 +8936,16 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
}
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
- bool ShouldSignReturnAddr,
- bool ShouldSignReturnAddrWithBKey) {
- if (ShouldSignReturnAddr) {
- MachineBasicBlock::iterator MBBPAC = MBB.begin();
- MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- DebugLoc DL;
-
- if (MBBAUT != MBB.end())
- DL = MBBAUT->getDebugLoc();
-
- // At the very beginning of the basic block we insert the following
- // depending on the key type
- //
- // a_key: b_key:
- // PACIASP EMITBKEY
- // CFI_INSTRUCTION PACIBSP
- // CFI_INSTRUCTION
- if (ShouldSignReturnAddrWithBKey) {
- BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- BuildMI(MBB, MBBPAC, DebugLoc(),
- TII->get(ShouldSignReturnAddrWithBKey ? AArch64::PACIBSP
- : AArch64::PACIASP))
- .setMIFlag(MachineInstr::FrameSetup);
-
- if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- }
+ const AArch64InstrInfo *TII,
+ bool ShouldSignReturnAddr) {
+ if (!ShouldSignReturnAddr)
+ return;
- // If v8.3a features are available we can replace a RET instruction by
- // RETAA or RETAB and omit the AUT instructions. In this case the
- // DW_CFA_AARCH64_negate_ra_state can't be emitted.
- if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
- MBBAUT->getOpcode() == AArch64::RET) {
- BuildMI(MBB, MBBAUT, DL,
- TII->get(ShouldSignReturnAddrWithBKey ? AArch64::RETAB
- : AArch64::RETAA))
- .copyImplicitOps(*MBBAUT);
- MBB.erase(MBBAUT);
- } else {
- BuildMI(MBB, MBBAUT, DL,
- TII->get(ShouldSignReturnAddrWithBKey ? AArch64::AUTIBSP
- : AArch64::AUTIASP))
- .setMIFlag(MachineInstr::FrameDestroy);
- unsigned CFIIndexAuth =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBAUT, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndexAuth)
- .setMIFlags(MachineInstr::FrameDestroy);
- }
- }
+ BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
+ TII->get(AArch64::PAUTH_EPILOGUE))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
void AArch64InstrInfo::buildOutlinedFrame(
@@ -8140,20 +9045,12 @@ void AArch64InstrInfo::buildOutlinedFrame(
Et = MBB.insert(Et, LDRXpost);
}
- // If a bunch of candidates reach this point they must agree on their return
- // address signing. It is therefore enough to just consider the signing
- // behaviour of one of them
- const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>();
- bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction);
-
- // a_key is the default
- bool ShouldSignReturnAddrWithBKey = MFI.shouldSignWithBKey();
+ bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
// If this is a tail call outlined function, then there's already a return.
if (OF.FrameConstructionID == MachineOutlinerTailCall ||
OF.FrameConstructionID == MachineOutlinerThunk) {
- signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
- ShouldSignReturnAddrWithBKey);
+ signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
return;
}
@@ -8167,8 +9064,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
.addReg(AArch64::LR);
MBB.insert(MBB.end(), ret);
- signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
- ShouldSignReturnAddrWithBKey);
+ signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
FI->setOutliningStyle("Function");
@@ -8262,6 +9158,26 @@ bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
return MF.getFunction().hasMinSize();
}
+void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Iter,
+ DebugLoc &DL,
+ bool AllowSideEffects) const {
+ const MachineFunction &MF = *MBB.getParent();
+ const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
+
+ if (TRI.isGeneralPurposeRegister(MF, Reg)) {
+ BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
+ } else if (STI.hasSVE()) {
+ BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
+ .addImm(0)
+ .addImm(0);
+ } else {
+ BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
+ .addImm(0);
+ }
+}
+
std::optional<DestSourcePair>
AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
@@ -8269,19 +9185,32 @@ AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
// and zero immediate operands used as an alias for mov instruction.
if (MI.getOpcode() == AArch64::ORRWrs &&
MI.getOperand(1).getReg() == AArch64::WZR &&
- MI.getOperand(3).getImm() == 0x0) {
+ MI.getOperand(3).getImm() == 0x0 &&
+ // Check that the w->w move is not a zero-extending w->x mov.
+ (!MI.getOperand(0).getReg().isVirtual() ||
+ MI.getOperand(0).getSubReg() == 0) &&
+ (!MI.getOperand(0).getReg().isPhysical() ||
+ MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
+ AArch64::X0) == -1))
return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
- }
if (MI.getOpcode() == AArch64::ORRXrs &&
MI.getOperand(1).getReg() == AArch64::XZR &&
- MI.getOperand(3).getImm() == 0x0) {
+ MI.getOperand(3).getImm() == 0x0)
return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
- }
return std::nullopt;
}
+std::optional<DestSourcePair>
+AArch64InstrInfo::isCopyLikeInstrImpl(const MachineInstr &MI) const {
+ if (MI.getOpcode() == AArch64::ORRWrs &&
+ MI.getOperand(1).getReg() == AArch64::WZR &&
+ MI.getOperand(3).getImm() == 0x0)
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
+ return std::nullopt;
+}
+
std::optional<RegImmPair>
AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
int Sign = 1;
@@ -8325,7 +9254,7 @@ static std::optional<ParamLoadedValue>
describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- auto DestSrc = TII->isCopyInstr(MI);
+ auto DestSrc = TII->isCopyLikeInstr(MI);
if (!DestSrc)
return std::nullopt;
@@ -8356,6 +9285,55 @@ describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
return std::nullopt;
}
+bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
+ // Functions cannot be split to different sections on AArch64 if they have
+ // a red zone. This is because relaxing a cross-section branch may require
+ // incrementing the stack pointer to spill a register, which would overwrite
+ // the red zone.
+ if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
+ return false;
+
+ return TargetInstrInfo::isFunctionSafeToSplit(MF);
+}
+
+bool AArch64InstrInfo::isMBBSafeToSplitToCold(
+ const MachineBasicBlock &MBB) const {
+ // Asm Goto blocks can contain conditional branches to goto labels, which can
+ // get moved out of range of the branch instruction.
+ auto isAsmGoto = [](const MachineInstr &MI) {
+ return MI.getOpcode() == AArch64::INLINEASM_BR;
+ };
+ if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
+ return false;
+
+ // Because jump tables are label-relative instead of table-relative, they all
+ // must be in the same section or relocation fixup handling will fail.
+
+ // Check if MBB is a jump table target
+ const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
+ auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
+ return llvm::is_contained(JTE.MBBs, &MBB);
+ };
+ if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
+ return false;
+
+ // Check if MBB contains a jump table lookup
+ for (const MachineInstr &MI : MBB) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_BRJT:
+ case AArch64::JumpTableDest32:
+ case AArch64::JumpTableDest16:
+ case AArch64::JumpTableDest8:
+ return false;
+ default:
+ continue;
+ }
+ }
+
+ // MBB isn't a special case, so it's safe to be split to the cold section.
+ return true;
+}
+
std::optional<ParamLoadedValue>
AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
Register Reg) const {
@@ -8417,8 +9395,32 @@ bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
}
unsigned int
-AArch64InstrInfo::getTailDuplicateSize(CodeGenOpt::Level OptLevel) const {
- return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2;
+AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
+ return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
+}
+
+bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
+ unsigned Scale) const {
+ if (Offset && Scale)
+ return false;
+
+ // Check Reg + Imm
+ if (!Scale) {
+ // 9-bit signed offset
+ if (isInt<9>(Offset))
+ return true;
+
+ // 12-bit unsigned offset
+ unsigned Shift = Log2_64(NumBytes);
+ if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
+ // Must be a multiple of NumBytes (NumBytes is a power of 2)
+ (Offset >> Shift) << Shift == Offset)
+ return true;
+ return false;
+ }
+
+ // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
+ return Scale == 1 || (Scale > 0 && Scale == NumBytes);
}
unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
@@ -8428,6 +9430,146 @@ unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
return AArch64::BLR;
}
+bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
+ const MachineInstr &MI) const {
+ const MachineFunction &MF = *MI.getMF();
+ const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+
+ // If the function contains changes to streaming mode, then there
+ // is a danger that rematerialised instructions end up between
+ // instruction sequences (e.g. call sequences, or prolog/epilogue)
+ // where the streaming-SVE mode is temporarily changed.
+ if (AFI.hasStreamingModeChanges()) {
+ // Avoid rematerializing rematerializable instructions that use/define
+ // scalable values, such as 'pfalse' or 'ptrue', which result in different
+ // results when the runtime vector length is different.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (any_of(MI.operands(), [&MRI, &MFI](const MachineOperand &MO) {
+ if (MO.isFI() &&
+ MFI.getStackID(MO.getIndex()) == TargetStackID::ScalableVector)
+ return true;
+ if (!MO.isReg())
+ return false;
+
+ if (MO.getReg().isVirtual()) {
+ const TargetRegisterClass *RC = MRI.getRegClass(MO.getReg());
+ return AArch64::ZPRRegClass.hasSubClassEq(RC) ||
+ AArch64::PPRRegClass.hasSubClassEq(RC);
+ }
+ return AArch64::ZPRRegClass.contains(MO.getReg()) ||
+ AArch64::PPRRegClass.contains(MO.getReg());
+ }))
+ return false;
+
+ // Avoid rematerializing instructions that return a value that is
+ // different depending on vector length, even when it is not returned
+ // in a scalable vector/predicate register.
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::RDVLI_XI:
+ case AArch64::ADDVL_XXI:
+ case AArch64::ADDPL_XXI:
+ case AArch64::CNTB_XPiI:
+ case AArch64::CNTH_XPiI:
+ case AArch64::CNTW_XPiI:
+ case AArch64::CNTD_XPiI:
+ return false;
+ }
+ }
+
+ return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
+}
+
+MachineBasicBlock::iterator
+AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
+ Register TargetReg, bool FrameSetup) const {
+ assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
+
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopTestMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopTestMBB);
+ MachineBasicBlock *LoopBodyMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopBodyMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+ MachineInstr::MIFlag Flags =
+ FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;
+
+ // LoopTest:
+ // SUB SP, SP, #ProbeSize
+ emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
+ AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
+
+ // CMP SP, TargetReg
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+ AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addReg(TargetReg)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlags(Flags);
+
+ // B.<Cond> LoopExit
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
+ .addImm(AArch64CC::LE)
+ .addMBB(ExitMBB)
+ .setMIFlags(Flags);
+
+ // STR XZR, [SP]
+ BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ // B loop
+ BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
+ .addMBB(LoopTestMBB)
+ .setMIFlags(Flags);
+
+ // LoopExit:
+ // MOV SP, TargetReg
+ BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
+ .addReg(TargetReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlags(Flags);
+
+ // LDR XZR, [SP]
+ BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
+ .addReg(AArch64::XZR, RegState::Define)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+
+ LoopTestMBB->addSuccessor(ExitMBB);
+ LoopTestMBB->addSuccessor(LoopBodyMBB);
+ LoopBodyMBB->addSuccessor(LoopTestMBB);
+ MBB.addSuccessor(LoopTestMBB);
+
+ // Update liveins.
+ if (MF.getRegInfo().reservedRegsFrozen()) {
+ recomputeLiveIns(*LoopTestMBB);
+ recomputeLiveIns(*LoopBodyMBB);
+ recomputeLiveIns(*ExitMBB);
+ }
+
+ return ExitMBB->begin();
+}
+
#define GET_INSTRINFO_HELPERS
#define GET_INSTRMAP_INFO
#include "AArch64GenInstrInfo.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 20210a96d67a..6526f6740747 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -108,7 +108,7 @@ public:
/// Returns the base register operator of a load/store.
static const MachineOperand &getLdStBaseOp(const MachineInstr &MI);
- /// Returns the the immediate offset operator of a load/store.
+ /// Returns the immediate offset operator of a load/store.
static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);
/// Returns whether the instruction is FP or NEON.
@@ -120,12 +120,18 @@ public:
/// Returns whether the instruction is in Q form (128 bit operands)
static bool isQForm(const MachineInstr &MI);
+ /// Returns whether the instruction can be compatible with non-zero BTYPE.
+ static bool hasBTISemantics(const MachineInstr &MI);
+
/// Returns the index for the immediate for a given instruction.
static unsigned getLoadStoreImmIdx(unsigned Opc);
/// Return true if pairing the given load or store may be paired with another.
static bool isPairableLdStInst(const MachineInstr &MI);
+ /// Returns true if MI is one of the TCRETURN* instructions.
+ static bool isTailCallReturnInst(const MachineInstr &MI);
+
/// Return the opcode that set flags when possible. The caller is
/// responsible for ensuring the opc has a flag setting equivalent.
static unsigned convertToFlagSettingOpc(unsigned Opc);
@@ -140,6 +146,13 @@ public:
getAddrModeFromMemoryOp(const MachineInstr &MemI,
const TargetRegisterInfo *TRI) const override;
+ bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const override;
+
+ MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const override;
+
bool getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
@@ -152,7 +165,7 @@ public:
bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
const MachineOperand *&BaseOp,
int64_t &Offset, bool &OffsetIsScalable,
- unsigned &Width,
+ TypeSize &Width,
const TargetRegisterInfo *TRI) const;
/// Return the immediate offset of the base register in a load/store \p LdSt.
@@ -162,12 +175,15 @@ public:
/// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
///
/// For unscaled instructions, \p Scale is set to 1.
- static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width,
+ static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width,
int64_t &MinOffset, int64_t &MaxOffset);
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
- unsigned NumLoads, unsigned NumBytes) const override;
+ int64_t Offset2, bool OffsetIsScalable2,
+ unsigned ClusterSize,
+ unsigned NumBytes) const override;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
@@ -213,6 +229,11 @@ public:
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
+ void insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB, const DebugLoc &DL,
+ int64_t BrOffset, RegScavenger *RS) const override;
+
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -235,6 +256,10 @@ public:
const DebugLoc &DL, Register DstReg,
ArrayRef<MachineOperand> Cond, Register TrueReg,
Register FalseReg) const override;
+
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+
MCInst getNop() const override;
bool isSchedulingBoundary(const MachineInstr &MI,
@@ -295,6 +320,8 @@ public:
bool OutlineFromLinkOnceODRs) const override;
std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ void mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const override;
outliner::InstrType
getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
SmallVector<
@@ -307,6 +334,11 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
outliner::Candidate &C) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
+
+ void buildClearRegister(Register Reg, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Iter, DebugLoc &DL,
+ bool AllowSideEffects = true) const override;
+
/// Returns the vector element size (B, H, S or D) of an SVE opcode.
uint64_t getElementSizeForOpcode(unsigned Opc) const;
/// Returns true if the opcode is for an SVE instruction that sets the
@@ -325,10 +357,14 @@ public:
std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
Register Reg) const override;
+ bool isFunctionSafeToSplit(const MachineFunction &MF) const override;
+
+ bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override;
+
std::optional<ParamLoadedValue>
describeLoadedValue(const MachineInstr &MI, Register Reg) const override;
- unsigned int getTailDuplicateSize(CodeGenOpt::Level OptLevel) const override;
+ unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI,
MachineRegisterInfo &MRI) const override;
@@ -340,6 +376,22 @@ public:
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset,
int64_t &ByteSized,
int64_t &VGSized);
+
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
+
+ // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can
+ // be used for a load/store of NumBytes. BaseReg is always present and
+ // implicit.
+ bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
+ unsigned Scale) const;
+
+ // Decrement the SP, issuing probes along the way. `TargetReg` is the new top
+ // of the stack. `FrameSetup` is passed as true, if the allocation is a part
+ // of constructing the activation frame of a function.
+ MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI,
+ Register TargetReg,
+ bool FrameSetup) const;
+
#define GET_INSTRINFO_HELPER_DECLS
#include "AArch64GenInstrInfo.inc"
@@ -349,6 +401,8 @@ protected:
/// registers as machine operands.
std::optional<DestSourcePair>
isCopyInstrImpl(const MachineInstr &MI) const override;
+ std::optional<DestSourcePair>
+ isCopyLikeInstrImpl(const MachineInstr &MI) const override;
private:
unsigned getInstBundleLength(const MachineInstr &MI) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9e72d37880c5..44b0337fe787 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -94,7 +94,7 @@ def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
-def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
+def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
@@ -137,7 +137,7 @@ def HasSVE : Predicate<"Subtarget->hasSVE()">,
def HasSVE2 : Predicate<"Subtarget->hasSVE2()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
def HasSVE2p1 : Predicate<"Subtarget->hasSVE2p1()">,
- AssemblerPredicate<(any_of FeatureSVE2p1), "sve2p1">;
+ AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">;
def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
@@ -154,12 +154,49 @@ def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
+def HasSMEFA64 : Predicate<"Subtarget->hasSMEFA64()">,
+ AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">,
AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
def HasSME2 : Predicate<"Subtarget->hasSME2()">,
AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
+def HasFPMR : Predicate<"Subtarget->hasFPMR()">,
+ AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">;
+def HasFP8 : Predicate<"Subtarget->hasFP8()">,
+ AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
+def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">,
+ AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">;
+def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">,
+ AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">;
+def HasSSVE_FP8FMA : Predicate<"Subtarget->SSVE_FP8FMA() || "
+ "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
+ AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA,
+ (all_of FeatureSVE2, FeatureFP8FMA)),
+ "ssve-fp8fma or (sve2 and fp8fma)">;
+def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">,
+ AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">;
+def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
+ "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
+ AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2,
+ (all_of FeatureSVE2, FeatureFP8DOT2)),
+ "ssve-fp8dot2 or (sve2 and fp8dot2)">;
+def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">,
+ AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">;
+def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
+ "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
+ AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
+ (all_of FeatureSVE2, FeatureFP8DOT4)),
+ "ssve-fp8dot4 or (sve2 and fp8dot4)">;
+def HasLUT : Predicate<"Subtarget->hasLUT()">,
+ AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
+def HasSME_LUTv2 : Predicate<"Subtarget->hasSME_LUTv2()">,
+ AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
+def HasSMEF8F16 : Predicate<"Subtarget->hasSMEF8F16()">,
+ AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
+def HasSMEF8F32 : Predicate<"Subtarget->hasSMEF8F32()">,
+ AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -171,6 +208,10 @@ def HasSVE2orSME
: Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
"sve2 or sme">;
+def HasSVE2orSME2
+ : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
+ "sve2 or sme2">;
def HasSVE2p1_or_HasSME
: Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
@@ -752,6 +793,7 @@ def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;
def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
[(abdu node:$lhs, node:$rhs),
@@ -776,6 +818,9 @@ def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm),
[(AArch64addp_n node:$Rn, node:$Rm),
(int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>;
+def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS),
+ [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)),
+ (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>;
def AArch64facge : PatFrags<(ops node:$Rn, node:$Rm),
[(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)),
(int_aarch64_neon_facge node:$Rn, node:$Rm)]>;
@@ -818,10 +863,25 @@ def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain,
def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
+
+def AArch64probedalloca
+ : SDNode<"AArch64ISD::PROBED_ALLOCA",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPMayStore]>;
+
def AArch64mrs : SDNode<"AArch64ISD::MRS",
SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
[SDNPHasChain, SDNPOutGlue]>;
+def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>;
+def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>;
+def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
+ [(AArch64rshrnb node:$rs, node:$i),
+ (int_aarch64_sve_rshrnb node:$rs, node:$i)]>;
+
+def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
+ [SDTCisInt<0>, SDTCisVec<1>]>, []>;
+
// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
// have no common bits.
def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
@@ -880,7 +940,8 @@ include "SMEInstrFormats.td"
// Miscellaneous instructions.
//===----------------------------------------------------------------------===//
-let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
+let Defs = [SP], Uses = [SP] in {
// We set Sched to empty list because we expect these instructions to simply get
// removed in most cases.
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
@@ -889,7 +950,35 @@ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
[(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
Sched<[]>;
-} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
+
+}
+
+let Defs = [SP, NZCV], Uses = [SP] in {
+// Probed stack allocation of a constant size, used in function prologues when
+// stack-clash protection is enabled.
+def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch),
+ (ins i64imm:$stacksize, i64imm:$fixed_offset,
+ i64imm:$scalable_offset),
+ []>,
+ Sched<[]>;
+
+// Probed stack allocation of a variable size, used in function prologues when
+// stack-clash protection is enabled.
+def PROBED_STACKALLOC_VAR : Pseudo<(outs),
+ (ins GPR64sp:$target),
+ []>,
+ Sched<[]>;
+
+// Probed stack allocations of a variable size, used for allocas of unknown size
+// when stack-clash protection is enabled.
+let usesCustomInserter = 1 in
+def PROBED_STACKALLOC_DYN : Pseudo<(outs),
+ (ins GPR64common:$target),
+ [(AArch64probedalloca GPR64common:$target)]>,
+ Sched<[]>;
+
+} // Defs = [SP, NZCV], Uses = [SP] in
+} // hasSideEffects = 1, isCodeGenOnly = 1
let isReMaterializable = 1, isCodeGenOnly = 1 in {
// FIXME: The following pseudo instructions are only needed because remat
@@ -1196,7 +1285,7 @@ defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_ne
class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
string rhs_kind, RegisterOperand RegType,
ValueType AccumType, ValueType InputType>
- : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind,
+ : BaseSIMDThreeSameVectorIndexS<Q, 0, 0b00, 0b1111, "sudot", dst_kind,
lhs_kind, rhs_kind, RegType, AccumType,
InputType, null_frag> {
let Pattern = [(set (AccumType RegType:$dst),
@@ -1470,6 +1559,17 @@ def : InstAlias<"autia1716", (AUTIA1716), 0>;
def : InstAlias<"autib1716", (AUTIB1716), 0>;
def : InstAlias<"xpaclri", (XPACLRI), 0>;
+// Pseudos
+
+let Uses = [LR, SP], Defs = [LR] in {
+// Insertion point of LR signing code.
+def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
+// Insertion point of LR authentication code.
+// The RET terminator of the containing machine basic block may be replaced
+// with a combined RETA(A|B) instruction when rewriting this Pseudo.
+def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
+}
+
// These pointer authentication instructions require armv8.3a
let Predicates = [HasPAuth] in {
@@ -3673,27 +3773,28 @@ let Predicates = [IsLE, UseSTRQro] in {
// Match stores from lane 0 to the appropriate subreg's store.
multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
ValueType VecTy, ValueType STy,
+ ValueType SubRegTy,
SubRegIndex SubRegIdx,
Instruction STRW, Instruction STRX> {
- def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
- (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
- def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
- (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 19 in {
- defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
- defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>;
- defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>;
- defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>;
- defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>;
- defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>;
+ defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, f16, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, i64, dsub, STRDroW, STRDroX>;
+ defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, i64, dsub, STRDroW, STRDroX>;
}
//---
@@ -3812,21 +3913,22 @@ def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
// Match stores from lane 0 to the appropriate subreg's store.
multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
ValueType VTy, ValueType STy,
+ ValueType SubRegTy,
SubRegIndex SubRegIdx, Operand IndexType,
Instruction STR> {
- def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
+ def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))),
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
- (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
GPR64sp:$Rn, IndexType:$offset)>;
}
let AddedComplexity = 19 in {
- defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
- defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>;
- defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>;
- defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>;
- defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>;
- defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>;
+ defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
+ defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
+ defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
+ defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, i32, ssub, uimm12s4, STRSui>;
+ defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, i64, dsub, uimm12s8, STRDui>;
+ defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, i64, dsub, uimm12s8, STRDui>;
}
//---
@@ -3859,6 +3961,11 @@ defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
[(truncstorei8 GPR32z:$Rt,
(am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
+// bf16 store pattern
+def : Pat<(store (bf16 FPR16Op:$Rt),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+
// Armv8.4 Weaker Release Consistency enhancements
// LDAPR & STLR with Immediate Offset instructions
let Predicates = [HasRCPC_IMMO] in {
@@ -3955,17 +4062,18 @@ def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
// Match stores from lane 0 to the appropriate subreg's store.
multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
ValueType VTy, ValueType STy,
+ ValueType SubRegTy,
SubRegIndex SubRegIdx, Instruction STR> {
- defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
+ defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>;
}
let AddedComplexity = 19 in {
- defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
- defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>;
- defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>;
- defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>;
- defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>;
- defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>;
+ defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
+ defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>;
+ defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>;
+ defm : VecStoreULane0Pat<store, v4f32, f32, i32, ssub, STURSi>;
+ defm : VecStoreULane0Pat<store, v2i64, i64, i64, dsub, STURDi>;
+ defm : VecStoreULane0Pat<store, v2f64, f64, i64, dsub, STURDi>;
}
//---
@@ -4342,6 +4450,34 @@ def : Pat<(i64 (any_llround f64:$Rn)),
defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
+def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
+ (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
+ (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
+ (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
+
+def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
+ (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
+ (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
+ (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
+
+def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
+ (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
+ (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
+ (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
+
+def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
+ (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
+ (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
+ (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
+
//===----------------------------------------------------------------------===//
// Unscaled integer to floating point conversion instruction.
//===----------------------------------------------------------------------===//
@@ -4409,6 +4545,16 @@ let Predicates = [HasFRInt3264] in {
defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
} // HasFRInt3264
+// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
+def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))),
+ (FRINT32ZDr FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))),
+ (FRINT64ZDr FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))),
+ (FRINT32XDr FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))),
+ (FRINT64XDr FPR64:$Rn)>;
+
// Emitting strict_lrint as two instructions is valid as any exceptions that
// occur will happen in exactly one of the instructions (e.g. if the input is
// not an integer the inexact exception will happen in the FRINTX but not then
@@ -4462,7 +4608,7 @@ multiclass FMULScalarFromIndexedLane0Patterns<string inst,
def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
(f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
(!cast<Instruction>(inst # inst_f16_suffix)
- FPR16:$Rn, (EXTRACT_SUBREG V128:$Rm, hsub))>;
+ FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>;
}
let Predicates = preds in {
def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
@@ -5824,6 +5970,17 @@ def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
(v2i32 (trunc (v2i64 V128:$Vm))))),
(UZP1v4i32 V128:$Vn, V128:$Vm)>;
+// These are the same as above, with an optional assertzext node that can be
+// generated from fptoi lowering.
+def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))),
+ (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))),
+ (UZP1v16i8 V128:$Vn, V128:$Vm)>;
+def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))),
+ (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))),
+ (UZP1v8i16 V128:$Vn, V128:$Vm)>;
+def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))),
+ (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))),
+ (UZP1v4i32 V128:$Vn, V128:$Vm)>;
def : Pat<(v16i8 (concat_vectors
(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
@@ -5857,6 +6014,13 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
(v16i8 V128:$Ri), (v16i8 V128:$Rn))),
(TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
+//----------------------------------------------------------------------------
+// AdvSIMD LUT instructions
+//----------------------------------------------------------------------------
+let Predicates = [HasLUT] in {
+ defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
+ defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
+}
//----------------------------------------------------------------------------
// AdvSIMD scalar DUP instruction
@@ -6424,6 +6588,24 @@ def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op)))
(v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
ssub))>;
+def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>;
+
+def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>;
+
+def : Pat<(v4i32 (AArch64uaddlv (v8i8 V64:$Rn))),
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$Rn), hsub))>;
+
+def : Pat<(v4i32 (AArch64uaddlv (v4i16 V64:$Rn))),
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv4i16v V64:$Rn), ssub))>;
+
+def : Pat<(v4i32 (AArch64uaddlv (v16i8 V128:$Rn))),
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$Rn), hsub))>;
+
+def : Pat<(v4i32 (AArch64uaddlv (v8i16 V128:$Rn))),
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$Rn), ssub))>;
+
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
// In effect, opNode is the same as (scalar_to_vector (IntNode)).
@@ -6536,6 +6718,22 @@ def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
ssub))>;
}
+// For vecreduce_add, used by GlobalISel not SDAG
+def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))),
+ (i8 (ADDVv8i8v V64:$Rn))>;
+def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))),
+ (i8 (ADDVv16i8v V128:$Rn))>;
+def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))),
+ (i16 (ADDVv4i16v V64:$Rn))>;
+def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))),
+ (i16 (ADDVv8i16v V128:$Rn))>;
+def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>;
+def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))),
+ (i32 (ADDVv4i32v V128:$Rn))>;
+def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))),
+ (i64 (ADDPv2i64p V128:$Rn))>;
+
defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
@@ -6562,6 +6760,43 @@ defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
(UMINPv2i32 V64:$Rn, V64:$Rn)>;
+// For vecreduce_{opc} used by GlobalISel, not SDAG at the moment
+// because GlobalISel allows us to specify the return register to be a FPR
+multiclass SIMDAcrossLanesVecReductionIntrinsic<string baseOpc,
+ SDPatternOperator opNode> {
+def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>;
+
+def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>;
+
+def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>;
+
+def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>;
+
+def : Pat<(i32 (opNode (v4i32 V128:$Rn))),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>;
+}
+
+// For v2i32 source type, the pairwise instruction can be used instead
+defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>;
+def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
+
+defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>;
+def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
+
+defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>;
+def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
+
+defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>;
+def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
+
multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
(i32 (SMOVvi16to32
@@ -7030,19 +7265,19 @@ def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
- (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+ (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
- (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+ (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
- (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
+ (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
(and FPR32:$Rn, (i32 65535)),
vecshiftR16:$imm)),
- (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+ (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
- (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+ (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
- (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
+ (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
(i32 (INSERT_SUBREG
(i32 (IMPLICIT_DEF)),
@@ -7116,8 +7351,7 @@ defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs
defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
int_aarch64_neon_vcvtfxs2fp>;
-defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
- BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>;
+defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>;
defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
// X << 1 ==> X + X
@@ -7188,6 +7422,12 @@ def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift
let AddedComplexity = 5 in
def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))),
(RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
+ (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
+ (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
+ (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
def : Pat<(v16i8 (concat_vectors
@@ -7209,6 +7449,24 @@ def : Pat<(v4i32 (concat_vectors
(RADDHNv2i64_v4i32
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
(v2i64 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v16i8 (concat_vectors
+ (v8i8 V64:$Vd),
+ (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
+ (RADDHNv8i16_v16i8
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
+ (v8i16 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v8i16 (concat_vectors
+ (v4i16 V64:$Vd),
+ (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
+ (RADDHNv4i32_v8i16
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
+ (v4i32 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v4i32 (concat_vectors
+ (v2i32 V64:$Vd),
+ (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
+ (RADDHNv2i64_v4i32
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
+ (v2i64 (MOVIv2d_ns (i32 0))))>;
// SHRN patterns for when a logical right shift was used instead of arithmetic
// (the immediate guarantees no sign bits actually end up in the result so it
@@ -7248,23 +7506,23 @@ def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>
def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
// Also match an extend from the upper half of a 128 bit source register.
-def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
(USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+def : Pat<(v8i16 (zext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
(USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+def : Pat<(v8i16 (sext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
(SSHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
(USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+def : Pat<(v4i32 (zext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
(USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+def : Pat<(v4i32 (sext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
(SSHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
(USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+def : Pat<(v2i64 (zext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
(USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+def : Pat<(v2i64 (sext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
(SSHLLv4i32_shift V128:$Rn, (i32 0))>;
// Vector shift sxtl aliases
@@ -9144,6 +9402,46 @@ let Predicates = [HasD128] in {
}
}
+let Predicates = [HasFP8] in {
+ defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
+ defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
+ defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">;
+ defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">;
+ defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">;
+ defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">;
+ defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
+} // End let Predicates = [HasFP8]
+
+let Predicates = [HasFAMINMAX] in {
+ defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
+ defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
+} // End let Predicates = [HasFAMAXMIN]
+
+let Predicates = [HasFP8FMA] in {
+ defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
+ defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">;
+ defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">;
+ defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">;
+ defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">;
+ defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">;
+
+ defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">;
+ defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">;
+ defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">;
+ defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">;
+ defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">;
+ defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">;
+} // End let Predicates = [HasFP8FMA]
+
+let Predicates = [HasFP8DOT2] in {
+ defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">;
+ defm FDOT : SIMDThreeSameVectorDOT2<"fdot">;
+} // End let Predicates = [HasFP8DOT2]
+
+let Predicates = [HasFP8DOT4] in {
+ defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">;
+ defm FDOT : SIMDThreeSameVectorDOT4<"fdot">;
+} // End let Predicates = [HasFP8DOT4]
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 41af5522d967..dc6d5b8950c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -21,7 +21,6 @@
#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -293,6 +292,8 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
return AArch64::LDRWui;
case AArch64::LDURSWi:
return AArch64::LDURWi;
+ case AArch64::LDRSWpre:
+ return AArch64::LDRWpre;
}
}
@@ -372,6 +373,8 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::LDRSWui:
case AArch64::LDURSWi:
return AArch64::LDPSWi;
+ case AArch64::LDRSWpre:
+ return AArch64::LDPSWpre;
}
}
@@ -585,6 +588,8 @@ static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
case AArch64::LDRXpre:
return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
+ case AArch64::LDRSWpre:
+ return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
}
}
@@ -717,6 +722,16 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
}
}
+static bool isRewritableImplicitDef(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::ORRWrs:
+ case AArch64::ADDWri:
+ return true;
+ }
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator MergeMI,
@@ -859,66 +874,100 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
bool MergeForward = Flags.getMergeForward();
std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
- if (MergeForward && RenameReg) {
+ if (RenameReg) {
MCRegister RegToRename = getLdStRegOp(*I).getReg();
DefinedInBB.addReg(*RenameReg);
// Return the sub/super register for RenameReg, matching the size of
// OriginalReg.
- auto GetMatchingSubReg = [this,
- RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
- for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
- if (TRI->getMinimalPhysRegClass(OriginalReg) ==
- TRI->getMinimalPhysRegClass(SubOrSuper))
+ auto GetMatchingSubReg =
+ [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
+ for (MCPhysReg SubOrSuper :
+ TRI->sub_and_superregs_inclusive(*RenameReg)) {
+ if (C->contains(SubOrSuper))
return SubOrSuper;
+ }
llvm_unreachable("Should have found matching sub or super register!");
};
std::function<bool(MachineInstr &, bool)> UpdateMIs =
- [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
+ [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
+ bool IsDef) {
if (IsDef) {
bool SeenDef = false;
- for (auto &MOP : MI.operands()) {
+ for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
+ MachineOperand &MOP = MI.getOperand(OpIdx);
// Rename the first explicit definition and all implicit
// definitions matching RegToRename.
if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
- (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
+ (!MergeForward || !SeenDef ||
+ (MOP.isDef() && MOP.isImplicit())) &&
TRI->regsOverlap(MOP.getReg(), RegToRename)) {
assert((MOP.isImplicit() ||
(MOP.isRenamable() && !MOP.isEarlyClobber())) &&
"Need renamable operands");
- MOP.setReg(GetMatchingSubReg(MOP.getReg()));
+ Register MatchingReg;
+ if (const TargetRegisterClass *RC =
+ MI.getRegClassConstraint(OpIdx, TII, TRI))
+ MatchingReg = GetMatchingSubReg(RC);
+ else {
+ if (!isRewritableImplicitDef(MI.getOpcode()))
+ continue;
+ MatchingReg = GetMatchingSubReg(
+ TRI->getMinimalPhysRegClass(MOP.getReg()));
+ }
+ MOP.setReg(MatchingReg);
SeenDef = true;
}
}
} else {
- for (auto &MOP : MI.operands()) {
+ for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
+ MachineOperand &MOP = MI.getOperand(OpIdx);
if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
TRI->regsOverlap(MOP.getReg(), RegToRename)) {
assert((MOP.isImplicit() ||
(MOP.isRenamable() && !MOP.isEarlyClobber())) &&
"Need renamable operands");
- MOP.setReg(GetMatchingSubReg(MOP.getReg()));
+ Register MatchingReg;
+ if (const TargetRegisterClass *RC =
+ MI.getRegClassConstraint(OpIdx, TII, TRI))
+ MatchingReg = GetMatchingSubReg(RC);
+ else
+ MatchingReg = GetMatchingSubReg(
+ TRI->getMinimalPhysRegClass(MOP.getReg()));
+ assert(MatchingReg != AArch64::NoRegister &&
+ "Cannot find matching regs for renaming");
+ MOP.setReg(MatchingReg);
}
}
}
- LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
+ LLVM_DEBUG(dbgs() << "Renamed " << MI);
return true;
};
- forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
+ forAllMIsUntilDef(MergeForward ? *I : *std::prev(Paired), RegToRename, TRI,
+ UINT32_MAX, UpdateMIs);
#if !defined(NDEBUG)
- // Make sure the register used for renaming is not used between the paired
- // instructions. That would trash the content before the new paired
- // instruction.
+ // For forward merging store:
+ // Make sure the register used for renaming is not used between the
+ // paired instructions. That would trash the content before the new
+ // paired instruction.
+ MCPhysReg RegToCheck = *RenameReg;
+ // For backward merging load:
+ // Make sure the register being renamed is not used between the
+ // paired instructions. That would trash the content after the new
+ // paired instruction.
+ if (!MergeForward)
+ RegToCheck = RegToRename;
for (auto &MI :
iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
- std::next(I), std::next(Paired)))
+ MergeForward ? std::next(I) : I,
+ MergeForward ? std::next(Paired) : Paired))
assert(all_of(MI.operands(),
- [this, &RenameReg](const MachineOperand &MOP) {
+ [this, RegToCheck](const MachineOperand &MOP) {
return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
MOP.isUndef() ||
- !TRI->regsOverlap(MOP.getReg(), *RenameReg);
+ !TRI->regsOverlap(MOP.getReg(), RegToCheck);
}) &&
"Rename register used between paired instruction, trashing the "
"content");
@@ -984,15 +1033,23 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock *MBB = I->getParent();
MachineOperand RegOp0 = getLdStRegOp(*RtMI);
MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
+ MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
// Kill flags may become invalid when moving stores for pairing.
if (RegOp0.isUse()) {
if (!MergeForward) {
// Clear kill flags on store if moving upwards. Example:
- // STRWui %w0, ...
+ // STRWui kill %w0, ...
// USE %w1
// STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
- RegOp0.setIsKill(false);
- RegOp1.setIsKill(false);
+ // We are about to move the store of w1, so its kill flag may become
+ // invalid; not the case for w0.
+ // Since w1 is used between the stores, the kill flag on w1 is cleared
+ // after merging.
+ // STPWi kill %w0, %w1, ...
+ // USE %w1
+ for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
+ if (It->readsRegister(PairedRegOp.getReg(), TRI))
+ PairedRegOp.setIsKill(false);
} else {
// Clear kill flags of the first stores register. Example:
// STRWui %w1, ...
@@ -1318,6 +1375,10 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
if (OpcA == OpcB)
return !AArch64InstrInfo::isPreLdSt(FirstMI);
+ // Two pre ld/st of different opcodes cannot be merged either
+ if (AArch64InstrInfo::isPreLdSt(FirstMI) && AArch64InstrInfo::isPreLdSt(MI))
+ return false;
+
// Try to match a sign-extended load/store with a zero-extended load/store.
bool IsValidLdStrOpc, PairIsValidLdStrOpc;
unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
@@ -1340,7 +1401,7 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
return false;
// The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
- // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
+ // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
// are candidate pairs that can be merged.
if (isPreLdStPairCandidate(FirstMI, MI))
return true;
@@ -1352,6 +1413,38 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
// FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
}
+static bool canRenameMOP(const MachineOperand &MOP,
+ const TargetRegisterInfo *TRI) {
+ if (MOP.isReg()) {
+ auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
+ // Renaming registers with multiple disjunct sub-registers (e.g. the
+ // result of a LD3) means that all sub-registers are renamed, potentially
+ // impacting other instructions we did not check. Bail out.
+ // Note that this relies on the structure of the AArch64 register file. In
+ // particular, a subregister cannot be written without overwriting the
+ // whole register.
+ if (RegClass->HasDisjunctSubRegs) {
+ LLVM_DEBUG(
+ dbgs()
+ << " Cannot rename operands with multiple disjunct subregisters ("
+ << MOP << ")\n");
+ return false;
+ }
+
+ // We cannot rename arbitrary implicit-defs, the specific rule to rewrite
+ // them must be known. For example, in ORRWrs the implicit-def
+ // corresponds to the result register.
+ if (MOP.isImplicit() && MOP.isDef()) {
+ if (!isRewritableImplicitDef(MOP.getParent()->getOpcode()))
+ return false;
+ return TRI->isSuperOrSubRegisterEq(
+ MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
+ }
+ }
+ return MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
+}
+
static bool
canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
@@ -1361,10 +1454,6 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
// Check if we can find an unused register which we can use to rename
// the register used by the first load/store.
- auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
- MachineFunction &MF = *FirstMI.getParent()->getParent();
- if (!RegClass || !MF.getRegInfo().tracksLiveness())
- return false;
auto RegToRename = getLdStRegOp(FirstMI).getReg();
// For now, we only rename if the store operand gets killed at the store.
@@ -1375,29 +1464,9 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
MOP.isImplicit() && MOP.isKill() &&
TRI->regsOverlap(RegToRename, MOP.getReg());
})) {
- LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
+ LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
return false;
}
- auto canRenameMOP = [TRI](const MachineOperand &MOP) {
- if (MOP.isReg()) {
- auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
- // Renaming registers with multiple disjunct sub-registers (e.g. the
- // result of a LD3) means that all sub-registers are renamed, potentially
- // impacting other instructions we did not check. Bail out.
- // Note that this relies on the structure of the AArch64 register file. In
- // particular, a subregister cannot be written without overwriting the
- // whole register.
- if (RegClass->HasDisjunctSubRegs) {
- LLVM_DEBUG(
- dbgs()
- << " Cannot rename operands with multiple disjunct subregisters ("
- << MOP << ")\n");
- return false;
- }
- }
- return MOP.isImplicit() ||
- (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
- };
bool FoundDef = false;
@@ -1407,11 +1476,11 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
// * collect the registers used and required register classes for RegToRename.
std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
bool IsDef) {
- LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
+ LLVM_DEBUG(dbgs() << "Checking " << MI);
// Currently we do not try to rename across frame-setup instructions.
if (MI.getFlag(MachineInstr::FrameSetup)) {
- LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
- << MI << ")\n");
+ LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
+ << "currently\n");
return false;
}
@@ -1431,8 +1500,7 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
// 1. Insert an extra copy, to materialize the def.
// 2. Skip pseudo-defs until we find an non-pseudo def.
if (MI.isPseudo()) {
- LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI
- << "\n");
+ LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
return false;
}
@@ -1440,9 +1508,8 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
!TRI->regsOverlap(MOP.getReg(), RegToRename))
continue;
- if (!canRenameMOP(MOP)) {
- LLVM_DEBUG(dbgs()
- << " Cannot rename " << MOP << " in " << MI << "\n");
+ if (!canRenameMOP(MOP, TRI)) {
+ LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
return false;
}
RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
@@ -1454,9 +1521,8 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
!TRI->regsOverlap(MOP.getReg(), RegToRename))
continue;
- if (!canRenameMOP(MOP)) {
- LLVM_DEBUG(dbgs()
- << " Cannot rename " << MOP << " in " << MI << "\n");
+ if (!canRenameMOP(MOP, TRI)) {
+ LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
return false;
}
RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
@@ -1475,6 +1541,52 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
return true;
}
+// We want to merge the second load into the first by rewriting the usages of
+// the same reg between first (incl.) and second (excl.). We don't need to care
+// about any insns before FirstLoad or after SecondLoad.
+// 1. The second load writes new value into the same reg.
+// - The renaming is impossible to impact later use of the reg.
+// - The second load always trash the value written by the first load which
+// means the reg must be killed before the second load.
+// 2. The first load must be a def for the same reg so we don't need to look
+// into anything before it.
+static bool canRenameUntilSecondLoad(
+ MachineInstr &FirstLoad, MachineInstr &SecondLoad,
+ LiveRegUnits &UsedInBetween,
+ SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
+ const TargetRegisterInfo *TRI) {
+ if (FirstLoad.isPseudo())
+ return false;
+
+ UsedInBetween.accumulate(FirstLoad);
+ auto RegToRename = getLdStRegOp(FirstLoad).getReg();
+ bool Success = std::all_of(
+ FirstLoad.getIterator(), SecondLoad.getIterator(),
+ [&](MachineInstr &MI) {
+ LLVM_DEBUG(dbgs() << "Checking " << MI);
+ // Currently we do not try to rename across frame-setup instructions.
+ if (MI.getFlag(MachineInstr::FrameSetup)) {
+ LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
+ << "currently\n");
+ return false;
+ }
+
+ for (auto &MOP : MI.operands()) {
+ if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
+ !TRI->regsOverlap(MOP.getReg(), RegToRename))
+ continue;
+ if (!canRenameMOP(MOP, TRI)) {
+ LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
+ return false;
+ }
+ RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
+ }
+
+ return true;
+ });
+ return Success;
+}
+
// Check if we can find a physical register for renaming \p Reg. This register
// must:
// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
@@ -1501,10 +1613,9 @@ static std::optional<MCPhysReg> tryToFindRegisterToRename(
// required register classes.
auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
- return any_of(TRI->sub_and_superregs_inclusive(PR),
- [C, TRI](MCPhysReg SubOrSuper) {
- return C == TRI->getMinimalPhysRegClass(SubOrSuper);
- });
+ return any_of(
+ TRI->sub_and_superregs_inclusive(PR),
+ [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
});
};
@@ -1524,6 +1635,41 @@ static std::optional<MCPhysReg> tryToFindRegisterToRename(
return std::nullopt;
}
+// For store pairs: returns a register from FirstMI to the beginning of the
+// block that can be renamed.
+// For load pairs: returns a register from FirstMI to MI that can be renamed.
+static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
+ std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
+ Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
+ SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
+ const TargetRegisterInfo *TRI) {
+ std::optional<MCPhysReg> RenameReg;
+ if (!DebugCounter::shouldExecute(RegRenamingCounter))
+ return RenameReg;
+
+ auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
+ MachineFunction &MF = *FirstMI.getParent()->getParent();
+ if (!RegClass || !MF.getRegInfo().tracksLiveness())
+ return RenameReg;
+
+ const bool IsLoad = FirstMI.mayLoad();
+
+ if (!MaybeCanRename) {
+ if (IsLoad)
+ MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween,
+ RequiredClasses, TRI)};
+ else
+ MaybeCanRename = {
+ canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
+ }
+
+ if (*MaybeCanRename) {
+ RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween,
+ RequiredClasses, TRI);
+ }
+ return RenameReg;
+}
+
/// Scan the instructions looking for a load/store that can be combined with the
/// current instruction into a wider equivalent or a load/store pair.
MachineBasicBlock::iterator
@@ -1676,17 +1822,6 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
continue;
}
}
- // If the destination register of one load is the same register or a
- // sub/super register of the other load, bail and keep looking. A
- // load-pair instruction with both destination registers the same is
- // UNPREDICTABLE and will result in an exception.
- if (MayLoad &&
- TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) {
- LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
- TRI);
- MemInsns.push_back(&MI);
- continue;
- }
// If the BaseReg has been modified, then we cannot do the optimization.
// For example, in the following pattern
@@ -1697,17 +1832,37 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
if (!ModifiedRegUnits.available(BaseReg))
return E;
+ const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
+ Reg, getLdStRegOp(MI).getReg());
+
// If the Rt of the second instruction was not modified or used between
// the two instructions and none of the instructions between the second
// and first alias with the second, we can combine the second into the
// first.
if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
- !(MI.mayLoad() &&
+ !(MI.mayLoad() && !SameLoadReg &&
!UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
!mayAlias(MI, MemInsns, AA)) {
+ // For pairs loading into the same reg, try to find a renaming
+ // opportunity to allow the renaming of Reg between FirstMI and MI
+ // and combine MI into FirstMI; otherwise bail and keep looking.
+ if (SameLoadReg) {
+ std::optional<MCPhysReg> RenameReg =
+ findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI,
+ Reg, DefinedInBB, UsedInBetween,
+ RequiredClasses, TRI);
+ if (!RenameReg) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
+ UsedRegUnits, TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
+ Flags.setRenameReg(*RenameReg);
+ }
Flags.setMergeForward(false);
- Flags.clearRenameReg();
+ if (!SameLoadReg)
+ Flags.clearRenameReg();
return MBBI;
}
@@ -1725,22 +1880,13 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
return MBBI;
}
- if (DebugCounter::shouldExecute(RegRenamingCounter)) {
- if (!MaybeCanRename)
- MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
- RequiredClasses, TRI)};
-
- if (*MaybeCanRename) {
- std::optional<MCPhysReg> MaybeRenameReg =
- tryToFindRegisterToRename(*FirstMI.getParent()->getParent(),
- Reg, DefinedInBB, UsedInBetween,
- RequiredClasses, TRI);
- if (MaybeRenameReg) {
- Flags.setRenameReg(*MaybeRenameReg);
- Flags.setMergeForward(true);
- MBBIWithRenameReg = MBBI;
- }
- }
+ std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair(
+ MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
+ RequiredClasses, TRI);
+ if (RenameReg) {
+ Flags.setMergeForward(true);
+ Flags.setRenameReg(*RenameReg);
+ MBBIWithRenameReg = MBBI;
}
}
// Unable to combine these instructions due to interference in between.
@@ -2136,6 +2282,14 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
if (!TII->isCandidateToMergeOrPair(MI))
return false;
+ // If disable-ldp feature is opted, do not emit ldp.
+ if (MI.mayLoad() && Subtarget->hasDisableLdp())
+ return false;
+
+ // If disable-stp feature is opted, do not emit stp.
+ if (MI.mayStore() && Subtarget->hasDisableStp())
+ return false;
+
// Early exit if the offset is not possible to match. (6 bits of positive
// range, plus allow an extra one in case we find a later insn that matches
// with Offset-1)
@@ -2159,6 +2313,31 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
// Keeping the iterator straight is a pain, so we let the merge routine tell
// us what the next instruction is after it's done mucking about.
auto Prev = std::prev(MBBI);
+
+ // Fetch the memoperand of the load/store that is a candidate for
+ // combination.
+ MachineMemOperand *MemOp =
+ MI.memoperands_empty() ? nullptr : MI.memoperands().front();
+
+ // Get the needed alignments to check them if
+ // ldp-aligned-only/stp-aligned-only features are opted.
+ uint64_t MemAlignment = MemOp ? MemOp->getAlign().value() : -1;
+ uint64_t TypeAlignment = MemOp ? Align(MemOp->getSize()).value() : -1;
+
+ // If a load arrives and ldp-aligned-only feature is opted, check that the
+ // alignment of the source pointer is at least double the alignment of the
+ // type.
+ if (MI.mayLoad() && Subtarget->hasLdpAlignedOnly() && MemOp &&
+ MemAlignment < 2 * TypeAlignment)
+ return false;
+
+ // If a store arrives and stp-aligned-only feature is opted, check that the
+ // alignment of the source pointer is at least double the alignment of the
+ // type.
+ if (MI.mayStore() && Subtarget->hasStpAlignedOnly() && MemOp &&
+ MemAlignment < 2 * TypeAlignment)
+ return false;
+
MBBI = mergePairedInsns(MBBI, Paired, Flags);
// Collect liveness info for instructions between Prev and the new position
// MBBI.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index 351b6abf870c..4afc678abaca 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -61,7 +61,7 @@ private:
/// Lower a HOM_Prolog pseudo instruction into a helper call
/// or a sequence of homogeneous stores.
- /// When a a fp setup follows, it can be optimized.
+ /// When a fp setup follows, it can be optimized.
bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
/// Lower a HOM_Epilog pseudo instruction into a helper call
@@ -146,8 +146,11 @@ static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
break;
}
- for (auto Reg : Regs)
+ for (auto Reg : Regs) {
+ if (Reg == AArch64::NoRegister)
+ continue;
RegStream << AArch64InstPrinter::getRegisterName(Reg);
+ }
return RegStream.str();
}
@@ -195,46 +198,82 @@ static MachineFunction &createFrameHelperMachineFunction(Module *M,
}
/// Emit a store-pair instruction for frame-setup.
+/// If Reg2 is AArch64::NoRegister, emit STR instead.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator Pos,
const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
int Offset, bool IsPreDec) {
+ assert(Reg1 != AArch64::NoRegister);
+ const bool IsPaired = Reg2 != AArch64::NoRegister;
bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
unsigned Opc;
- if (IsPreDec)
- Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre;
- else
- Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi;
+ if (IsPreDec) {
+ if (IsFloat)
+ Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
+ else
+ Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
+ } else {
+ if (IsFloat)
+ Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
+ else
+ Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
+ }
+ // The implicit scale for Offset is 8.
+ TypeSize Scale(0U, false), Width(0U, false);
+ int64_t MinOffset, MaxOffset;
+ [[maybe_unused]] bool Success =
+ AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
+ assert(Success && "Invalid Opcode");
+ Offset *= (8 / (int)Scale);
MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
if (IsPreDec)
MIB.addDef(AArch64::SP);
- MIB.addReg(Reg2)
- .addReg(Reg1)
+ if (IsPaired)
+ MIB.addReg(Reg2);
+ MIB.addReg(Reg1)
.addReg(AArch64::SP)
.addImm(Offset)
.setMIFlag(MachineInstr::FrameSetup);
}
/// Emit a load-pair instruction for frame-destroy.
+/// If Reg2 is AArch64::NoRegister, emit LDR instead.
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator Pos,
const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
int Offset, bool IsPostDec) {
+ assert(Reg1 != AArch64::NoRegister);
+ const bool IsPaired = Reg2 != AArch64::NoRegister;
bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
unsigned Opc;
- if (IsPostDec)
- Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost;
- else
- Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi;
+ if (IsPostDec) {
+ if (IsFloat)
+ Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
+ else
+ Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
+ } else {
+ if (IsFloat)
+ Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
+ else
+ Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
+ }
+ // The implicit scale for Offset is 8.
+ TypeSize Scale(0U, false), Width(0U, false);
+ int64_t MinOffset, MaxOffset;
+ [[maybe_unused]] bool Success =
+ AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
+ assert(Success && "Invalid Opcode");
+ Offset *= (8 / (int)Scale);
MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
if (IsPostDec)
MIB.addDef(AArch64::SP);
- MIB.addReg(Reg2, getDefRegState(true))
- .addReg(Reg1, getDefRegState(true))
+ if (IsPaired)
+ MIB.addReg(Reg2, getDefRegState(true));
+ MIB.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
.addImm(Offset)
.setMIFlag(MachineInstr::FrameDestroy);
@@ -433,9 +472,18 @@ bool AArch64LowerHomogeneousPE::lowerEpilog(
DebugLoc DL = MI.getDebugLoc();
SmallVector<unsigned, 8> Regs;
+ bool HasUnpairedReg = false;
for (auto &MO : MI.operands())
- if (MO.isReg())
+ if (MO.isReg()) {
+ if (!MO.getReg().isValid()) {
+ // For now we are only expecting unpaired GP registers which should
+ // occur exactly once.
+ assert(!HasUnpairedReg);
+ HasUnpairedReg = true;
+ }
Regs.push_back(MO.getReg());
+ }
+ (void)HasUnpairedReg;
int Size = (int)Regs.size();
if (Size == 0)
return false;
@@ -507,17 +555,26 @@ bool AArch64LowerHomogeneousPE::lowerProlog(
DebugLoc DL = MI.getDebugLoc();
SmallVector<unsigned, 8> Regs;
+ bool HasUnpairedReg = false;
int LRIdx = 0;
std::optional<int> FpOffset;
for (auto &MO : MI.operands()) {
if (MO.isReg()) {
- if (MO.getReg() == AArch64::LR)
- LRIdx = Regs.size();
+ if (MO.getReg().isValid()) {
+ if (MO.getReg() == AArch64::LR)
+ LRIdx = Regs.size();
+ } else {
+ // For now we are only expecting unpaired GP registers which should
+ // occur exactly once.
+ assert(!HasUnpairedReg);
+ HasUnpairedReg = true;
+ }
Regs.push_back(MO.getReg());
} else if (MO.isImm()) {
FpOffset = MO.getImm();
}
}
+ (void)HasUnpairedReg;
int Size = (int)Regs.size();
if (Size == 0)
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index 69a66f4335c1..59969f9dc31c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -100,8 +100,8 @@ AArch64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
}
-MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
- MCSymbol *Sym) const {
+MCOperand AArch64MCInstLower::lowerSymbolOperandMachO(const MachineOperand &MO,
+ MCSymbol *Sym) const {
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
@@ -270,8 +270,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
- if (Printer.TM.getTargetTriple().isOSDarwin())
- return lowerSymbolOperandDarwin(MO, Sym);
+ if (Printer.TM.getTargetTriple().isOSBinFormatMachO())
+ return lowerSymbolOperandMachO(MO, Sym);
if (Printer.TM.getTargetTriple().isOSBinFormatCOFF())
return lowerSymbolOperandCOFF(MO, Sym);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.h
index 8b6abc5c1f23..e4f8a1bc1a31 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MCInstLower.h
@@ -34,8 +34,8 @@ public:
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
- MCOperand lowerSymbolOperandDarwin(const MachineOperand &MO,
- MCSymbol *Sym) const;
+ MCOperand lowerSymbolOperandMachO(const MachineOperand &MO,
+ MCSymbol *Sym) const;
MCOperand lowerSymbolOperandELF(const MachineOperand &MO,
MCSymbol *Sym) const;
MCOperand lowerSymbolOperandCOFF(const MachineOperand &MO,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index 961a19317d66..9da59ef2a806 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -78,8 +78,8 @@ static bool ShouldSignWithBKey(const Function &F, const AArch64Subtarget &STI) {
const StringRef Key =
F.getFnAttribute("sign-return-address-key").getValueAsString();
- assert(Key.equals_insensitive("a_key") || Key.equals_insensitive("b_key"));
- return Key.equals_insensitive("b_key");
+ assert(Key == "a_key" || Key == "b_key");
+ return Key == "b_key";
}
AArch64FunctionInfo::AArch64FunctionInfo(const Function &F,
@@ -97,14 +97,44 @@ AArch64FunctionInfo::AArch64FunctionInfo(const Function &F,
if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
F.getParent()->getModuleFlag("branch-target-enforcement")))
BranchTargetEnforcement = BTE->getZExtValue();
- return;
+ } else {
+ const StringRef BTIEnable =
+ F.getFnAttribute("branch-target-enforcement").getValueAsString();
+ assert(BTIEnable == "true" || BTIEnable == "false");
+ BranchTargetEnforcement = BTIEnable == "true";
}
- const StringRef BTIEnable =
- F.getFnAttribute("branch-target-enforcement").getValueAsString();
- assert(BTIEnable.equals_insensitive("true") ||
- BTIEnable.equals_insensitive("false"));
- BranchTargetEnforcement = BTIEnable.equals_insensitive("true");
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute. This is a safe default because it is the
+ // smallest possible guard page size.
+ uint64_t ProbeSize = 4096;
+ if (F.hasFnAttribute("stack-probe-size"))
+ ProbeSize = F.getFnAttributeAsParsedInteger("stack-probe-size");
+ else if (const auto *PS = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("stack-probe-size")))
+ ProbeSize = PS->getZExtValue();
+ assert(int64_t(ProbeSize) > 0 && "Invalid stack probe size");
+
+ if (STI->isTargetWindows()) {
+ if (!F.hasFnAttribute("no-stack-arg-probe"))
+ StackProbeSize = ProbeSize;
+ } else {
+ // Round down to the stack alignment.
+ uint64_t StackAlign =
+ STI->getFrameLowering()->getTransientStackAlign().value();
+ ProbeSize = std::max(StackAlign, ProbeSize & ~(StackAlign - 1U));
+ StringRef ProbeKind;
+ if (F.hasFnAttribute("probe-stack"))
+ ProbeKind = F.getFnAttribute("probe-stack").getValueAsString();
+ else if (const auto *PS = dyn_cast_or_null<MDString>(
+ F.getParent()->getModuleFlag("probe-stack")))
+ ProbeKind = PS->getString();
+ if (ProbeKind.size()) {
+ if (ProbeKind != "inline-asm")
+ report_fatal_error("Unsupported stack probing method");
+ StackProbeSize = ProbeSize;
+ }
+ }
}
MachineFunctionInfo *AArch64FunctionInfo::clone(
@@ -122,11 +152,27 @@ bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const {
return SpillsLR;
}
+static bool isLRSpilled(const MachineFunction &MF) {
+ return llvm::any_of(
+ MF.getFrameInfo().getCalleeSavedInfo(),
+ [](const auto &Info) { return Info.getReg() == AArch64::LR; });
+}
+
bool AArch64FunctionInfo::shouldSignReturnAddress(
const MachineFunction &MF) const {
- return shouldSignReturnAddress(llvm::any_of(
- MF.getFrameInfo().getCalleeSavedInfo(),
- [](const auto &Info) { return Info.getReg() == AArch64::LR; }));
+ return shouldSignReturnAddress(isLRSpilled(MF));
+}
+
+bool AArch64FunctionInfo::needsShadowCallStackPrologueEpilogue(
+ MachineFunction &MF) const {
+ if (!(isLRSpilled(MF) &&
+ MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)))
+ return false;
+
+ if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
+ report_fatal_error("Must reserve x18 to use shadow call stack");
+
+ return true;
}
bool AArch64FunctionInfo::needsDwarfUnwindInfo(
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index d82fb436925e..219f83cfd32e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -185,6 +185,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// The frame-index for the TPIDR2 object used for lazy saves.
Register LazySaveTPIDR2Obj = 0;
+ /// Whether this function changes streaming mode within the function.
+ bool HasStreamingModeChanges = false;
/// True if the function need unwind information.
mutable std::optional<bool> NeedsDwarfUnwindInfo;
@@ -192,6 +194,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// True if the function need asynchronous unwind information.
mutable std::optional<bool> NeedsAsyncDwarfUnwindInfo;
+ int64_t StackProbeSize = 0;
+
public:
AArch64FunctionInfo(const Function &F, const AArch64Subtarget *STI);
@@ -429,6 +433,8 @@ public:
bool shouldSignReturnAddress(const MachineFunction &MF) const;
bool shouldSignReturnAddress(bool SpillsLR) const;
+ bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) const;
+
bool shouldSignWithBKey() const { return SignWithBKey; }
bool isMTETagged() const { return IsMTETagged; }
@@ -447,6 +453,15 @@ public:
bool needsDwarfUnwindInfo(const MachineFunction &MF) const;
bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const;
+ bool hasStreamingModeChanges() const { return HasStreamingModeChanges; }
+ void setHasStreamingModeChanges(bool HasChanges) {
+ HasStreamingModeChanges = HasChanges;
+ }
+
+ bool hasStackProbing() const { return StackProbeSize != 0; }
+
+ int64_t getStackProbeSize() const { return StackProbeSize; }
+
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
new file mode 100644
index 000000000000..7576d2a899d1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -0,0 +1,363 @@
+//===-- AArch64PointerAuth.cpp -- Harden code using PAuth ------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64PointerAuth.h"
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+using namespace llvm;
+using namespace llvm::AArch64PAuth;
+
+#define AARCH64_POINTER_AUTH_NAME "AArch64 Pointer Authentication"
+
+namespace {
+
+class AArch64PointerAuth : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AArch64PointerAuth() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return AARCH64_POINTER_AUTH_NAME; }
+
+private:
+ /// An immediate operand passed to BRK instruction, if it is ever emitted.
+ const unsigned BrkOperand = 0xc471;
+
+ const AArch64Subtarget *Subtarget = nullptr;
+ const AArch64InstrInfo *TII = nullptr;
+ const AArch64RegisterInfo *TRI = nullptr;
+
+ void signLR(MachineFunction &MF, MachineBasicBlock::iterator MBBI) const;
+
+ void authenticateLR(MachineFunction &MF,
+ MachineBasicBlock::iterator MBBI) const;
+
+ bool checkAuthenticatedLR(MachineBasicBlock::iterator TI) const;
+};
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(AArch64PointerAuth, "aarch64-ptrauth",
+ AARCH64_POINTER_AUTH_NAME, false, false)
+
+FunctionPass *llvm::createAArch64PointerAuthPass() {
+ return new AArch64PointerAuth();
+}
+
+char AArch64PointerAuth::ID = 0;
+
+void AArch64PointerAuth::signLR(MachineFunction &MF,
+ MachineBasicBlock::iterator MBBI) const {
+ const AArch64FunctionInfo *MFnI = MF.getInfo<AArch64FunctionInfo>();
+ bool UseBKey = MFnI->shouldSignWithBKey();
+ bool EmitCFI = MFnI->needsDwarfUnwindInfo(MF);
+ bool NeedsWinCFI = MF.hasWinCFI();
+
+ MachineBasicBlock &MBB = *MBBI->getParent();
+
+ // Debug location must be unknown, see AArch64FrameLowering::emitPrologue.
+ DebugLoc DL;
+
+ if (UseBKey) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // No SEH opcode for this one; it doesn't materialize into an
+ // instruction on Windows.
+ BuildMI(MBB, MBBI, DL,
+ TII->get(UseBKey ? AArch64::PACIBSP : AArch64::PACIASP))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (EmitCFI) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ } else if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+}
+
+void AArch64PointerAuth::authenticateLR(
+ MachineFunction &MF, MachineBasicBlock::iterator MBBI) const {
+ const AArch64FunctionInfo *MFnI = MF.getInfo<AArch64FunctionInfo>();
+ bool UseBKey = MFnI->shouldSignWithBKey();
+ bool EmitAsyncCFI = MFnI->needsAsyncDwarfUnwindInfo(MF);
+ bool NeedsWinCFI = MF.hasWinCFI();
+
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ DebugLoc DL = MBBI->getDebugLoc();
+ // MBBI points to a PAUTH_EPILOGUE instruction to be replaced and
+ // TI points to a terminator instruction that may or may not be combined.
+ // Note that inserting new instructions "before MBBI" and "before TI" is
+ // not the same because if ShadowCallStack is enabled, its instructions
+ // are placed between MBBI and TI.
+ MachineBasicBlock::iterator TI = MBB.getFirstInstrTerminator();
+
+ // The AUTIASP instruction assembles to a hint instruction before v8.3a so
+ // this instruction can safely used for any v8a architecture.
+ // From v8.3a onwards there are optimised authenticate LR and return
+ // instructions, namely RETA{A,B}, that can be used instead. In this case the
+ // DW_CFA_AARCH64_negate_ra_state can't be emitted.
+ bool TerminatorIsCombinable =
+ TI != MBB.end() && TI->getOpcode() == AArch64::RET;
+ if (Subtarget->hasPAuth() && TerminatorIsCombinable && !NeedsWinCFI &&
+ !MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
+ unsigned CombinedRetOpcode = UseBKey ? AArch64::RETAB : AArch64::RETAA;
+ BuildMI(MBB, TI, DL, TII->get(CombinedRetOpcode)).copyImplicitOps(*TI);
+ MBB.erase(TI);
+ } else {
+ unsigned AutOpcode = UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP;
+ BuildMI(MBB, MBBI, DL, TII->get(AutOpcode))
+ .setMIFlag(MachineInstr::FrameDestroy);
+
+ if (EmitAsyncCFI) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameDestroy);
+ }
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+ }
+}
+
+namespace {
+
+// Mark dummy LDR instruction as volatile to prevent removing it as dead code.
+MachineMemOperand *createCheckMemOperand(MachineFunction &MF,
+ const AArch64Subtarget &Subtarget) {
+ MachinePointerInfo PointerInfo(Subtarget.getAddressCheckPSV());
+ auto MOVolatileLoad =
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
+
+ return MF.getMachineMemOperand(PointerInfo, MOVolatileLoad, 4, Align(4));
+}
+
+} // namespace
+
+MachineBasicBlock &llvm::AArch64PAuth::checkAuthenticatedRegister(
+ MachineBasicBlock::iterator MBBI, AuthCheckMethod Method,
+ Register AuthenticatedReg, Register TmpReg, bool UseIKey, unsigned BrkImm) {
+
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64InstrInfo *TII = Subtarget.getInstrInfo();
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ // First, handle the methods not requiring creating extra MBBs.
+ switch (Method) {
+ default:
+ break;
+ case AuthCheckMethod::None:
+ return MBB;
+ case AuthCheckMethod::DummyLoad:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRWui), getWRegFromXReg(TmpReg))
+ .addReg(AArch64::LR)
+ .addImm(0)
+ .addMemOperand(createCheckMemOperand(MF, Subtarget));
+ return MBB;
+ }
+
+ // Control flow has to be changed, so arrange new MBBs.
+
+ // At now, at least an AUT* instruction is expected before MBBI
+ assert(MBBI != MBB.begin() &&
+ "Cannot insert the check at the very beginning of MBB");
+ // The block to insert check into.
+ MachineBasicBlock *CheckBlock = &MBB;
+ // The remaining part of the original MBB that is executed on success.
+ MachineBasicBlock *SuccessBlock = MBB.splitAt(*std::prev(MBBI));
+
+ // The block that explicitly generates a break-point exception on failure.
+ MachineBasicBlock *BreakBlock =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.push_back(BreakBlock);
+ MBB.splitSuccessor(SuccessBlock, BreakBlock);
+
+ assert(CheckBlock->getFallThrough() == SuccessBlock);
+ BuildMI(BreakBlock, DL, TII->get(AArch64::BRK)).addImm(BrkImm);
+
+ switch (Method) {
+ case AuthCheckMethod::None:
+ case AuthCheckMethod::DummyLoad:
+ llvm_unreachable("Should be handled above");
+ case AuthCheckMethod::HighBitsNoTBI:
+ BuildMI(CheckBlock, DL, TII->get(AArch64::EORXrs), TmpReg)
+ .addReg(AuthenticatedReg)
+ .addReg(AuthenticatedReg)
+ .addImm(1);
+ BuildMI(CheckBlock, DL, TII->get(AArch64::TBNZX))
+ .addReg(TmpReg)
+ .addImm(62)
+ .addMBB(BreakBlock);
+ return *SuccessBlock;
+ case AuthCheckMethod::XPACHint:
+ assert(AuthenticatedReg == AArch64::LR &&
+ "XPACHint mode is only compatible with checking the LR register");
+ assert(UseIKey && "XPACHint mode is only compatible with I-keys");
+ BuildMI(CheckBlock, DL, TII->get(AArch64::ORRXrs), TmpReg)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::LR)
+ .addImm(0);
+ BuildMI(CheckBlock, DL, TII->get(AArch64::XPACLRI));
+ BuildMI(CheckBlock, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
+ .addReg(TmpReg)
+ .addReg(AArch64::LR)
+ .addImm(0);
+ BuildMI(CheckBlock, DL, TII->get(AArch64::Bcc))
+ .addImm(AArch64CC::NE)
+ .addMBB(BreakBlock);
+ return *SuccessBlock;
+ }
+ llvm_unreachable("Unknown AuthCheckMethod enum");
+}
+
+unsigned llvm::AArch64PAuth::getCheckerSizeInBytes(AuthCheckMethod Method) {
+ switch (Method) {
+ case AuthCheckMethod::None:
+ return 0;
+ case AuthCheckMethod::DummyLoad:
+ return 4;
+ case AuthCheckMethod::HighBitsNoTBI:
+ return 12;
+ case AuthCheckMethod::XPACHint:
+ return 20;
+ }
+ llvm_unreachable("Unknown AuthCheckMethod enum");
+}
+
+bool AArch64PointerAuth::checkAuthenticatedLR(
+ MachineBasicBlock::iterator TI) const {
+ AuthCheckMethod Method = Subtarget->getAuthenticatedLRCheckMethod();
+
+ if (Method == AuthCheckMethod::None)
+ return false;
+
+ // FIXME If FEAT_FPAC is implemented by the CPU, this check can be skipped.
+
+ assert(!TI->getMF()->hasWinCFI() && "WinCFI is not yet supported");
+
+ // The following code may create a signing oracle:
+ //
+ // <authenticate LR>
+ // TCRETURN ; the callee may sign and spill the LR in its prologue
+ //
+ // To avoid generating a signing oracle, check the authenticated value
+ // before possibly re-signing it in the callee, as follows:
+ //
+ // <authenticate LR>
+ // <check if LR contains a valid address>
+ // b.<cond> break_block
+ // ret_block:
+ // TCRETURN
+ // break_block:
+ // brk <BrkOperand>
+ //
+ // or just
+ //
+ // <authenticate LR>
+ // ldr tmp, [lr]
+ // TCRETURN
+
+ // TmpReg is chosen assuming X16 and X17 are dead after TI.
+ assert(AArch64InstrInfo::isTailCallReturnInst(*TI) &&
+ "Tail call is expected");
+ Register TmpReg =
+ TI->readsRegister(AArch64::X16, TRI) ? AArch64::X17 : AArch64::X16;
+ assert(!TI->readsRegister(TmpReg, TRI) &&
+ "More than a single register is used by TCRETURN");
+
+ checkAuthenticatedRegister(TI, Method, AArch64::LR, TmpReg, /*UseIKey=*/true,
+ BrkOperand);
+
+ return true;
+}
+
+bool AArch64PointerAuth::runOnMachineFunction(MachineFunction &MF) {
+ const auto *MFnI = MF.getInfo<AArch64FunctionInfo>();
+
+ Subtarget = &MF.getSubtarget<AArch64Subtarget>();
+ TII = Subtarget->getInstrInfo();
+ TRI = Subtarget->getRegisterInfo();
+
+ SmallVector<MachineBasicBlock::instr_iterator> PAuthPseudoInstrs;
+ SmallVector<MachineBasicBlock::instr_iterator> TailCallInstrs;
+
+ bool Modified = false;
+ bool HasAuthenticationInstrs = false;
+
+ for (auto &MBB : MF) {
+ // Using instr_iterator to catch unsupported bundled TCRETURN* instructions
+ // instead of just skipping them.
+ for (auto &MI : MBB.instrs()) {
+ switch (MI.getOpcode()) {
+ default:
+ // Bundled TCRETURN* instructions (such as created by KCFI)
+ // are not supported yet, but no support is required if no
+ // PAUTH_EPILOGUE instructions exist in the same function.
+ // Skip the BUNDLE instruction itself (actual bundled instructions
+ // follow it in the instruction list).
+ if (MI.isBundle())
+ continue;
+ if (AArch64InstrInfo::isTailCallReturnInst(MI))
+ TailCallInstrs.push_back(MI.getIterator());
+ break;
+ case AArch64::PAUTH_PROLOGUE:
+ case AArch64::PAUTH_EPILOGUE:
+ assert(!MI.isBundled());
+ PAuthPseudoInstrs.push_back(MI.getIterator());
+ break;
+ }
+ }
+ }
+
+ for (auto It : PAuthPseudoInstrs) {
+ switch (It->getOpcode()) {
+ case AArch64::PAUTH_PROLOGUE:
+ signLR(MF, It);
+ break;
+ case AArch64::PAUTH_EPILOGUE:
+ authenticateLR(MF, It);
+ HasAuthenticationInstrs = true;
+ break;
+ default:
+ llvm_unreachable("Unhandled opcode");
+ }
+ It->eraseFromParent();
+ Modified = true;
+ }
+
+ // FIXME Do we need to emit any PAuth-related epilogue code at all
+ // when SCS is enabled?
+ if (HasAuthenticationInstrs &&
+ !MFnI->needsShadowCallStackPrologueEpilogue(MF)) {
+ for (auto TailCall : TailCallInstrs) {
+ assert(!TailCall->isBundled() && "Not yet supported");
+ Modified |= checkAuthenticatedLR(TailCall);
+ }
+ }
+
+ return Modified;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.h
new file mode 100644
index 000000000000..e1ceaed58abe
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PointerAuth.h
@@ -0,0 +1,116 @@
+//===-- AArch64PointerAuth.h -- Harden code using PAuth ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64POINTERAUTH_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64POINTERAUTH_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/Register.h"
+
+namespace llvm {
+namespace AArch64PAuth {
+
+/// Variants of check performed on an authenticated pointer.
+///
+/// In cases such as authenticating the LR value when performing a tail call
+/// or when re-signing a signed pointer with a different signing schema,
+/// a failed authentication may not generate an exception on its own and may
+/// create an authentication or signing oracle if not checked explicitly.
+///
+/// A number of check methods modify control flow in a similar way by
+/// rewriting the code
+///
+/// ```
+/// <authenticate LR>
+/// <more instructions>
+/// ```
+///
+/// as follows:
+///
+/// ```
+/// <authenticate LR>
+/// <method-specific checker>
+/// ret_block:
+/// <more instructions>
+/// ...
+///
+/// break_block:
+/// brk <code>
+/// ```
+enum class AuthCheckMethod {
+ /// Do not check the value at all
+ None,
+ /// Perform a load to a temporary register
+ DummyLoad,
+ /// Check by comparing bits 62 and 61 of the authenticated address.
+ ///
+ /// This method modifies control flow and inserts the following checker:
+ ///
+ /// ```
+ /// eor Xtmp, Xn, Xn, lsl #1
+ /// tbnz Xtmp, #62, break_block
+ /// ```
+ HighBitsNoTBI,
+ /// Check by comparing the authenticated value with an XPAC-ed one without
+ /// using PAuth instructions not encoded as HINT. Can only be applied to LR.
+ ///
+ /// This method modifies control flow and inserts the following checker:
+ ///
+ /// ```
+ /// mov Xtmp, LR
+ /// xpaclri ; encoded as "hint #7"
+ /// ; Note: at this point, the LR register contains the address as if
+ /// ; the authentication succeeded and the temporary register contains the
+ /// ; *real* result of authentication.
+ /// cmp Xtmp, LR
+ /// b.ne break_block
+ /// ```
+ XPACHint,
+};
+
+#define AUTH_CHECK_METHOD_CL_VALUES_COMMON \
+ clEnumValN(AArch64PAuth::AuthCheckMethod::None, "none", \
+ "Do not check authenticated address"), \
+ clEnumValN(AArch64PAuth::AuthCheckMethod::DummyLoad, "load", \
+ "Perform dummy load from authenticated address"), \
+ clEnumValN(AArch64PAuth::AuthCheckMethod::HighBitsNoTBI, \
+ "high-bits-notbi", \
+ "Compare bits 62 and 61 of address (TBI should be disabled)")
+
+#define AUTH_CHECK_METHOD_CL_VALUES_LR \
+ AUTH_CHECK_METHOD_CL_VALUES_COMMON, \
+ clEnumValN(AArch64PAuth::AuthCheckMethod::XPACHint, "xpac-hint", \
+ "Compare with the result of XPACLRI")
+
+/// Explicitly checks that pointer authentication succeeded.
+///
+/// Assuming AuthenticatedReg contains a value returned by one of the AUT*
+/// instructions, check the value using Method just before the instruction
+/// pointed to by MBBI. If the check succeeds, execution proceeds to the
+/// instruction pointed to by MBBI, otherwise a CPU exception is generated.
+///
+/// Some of the methods may need to know if the pointer was authenticated
+/// using an I-key or D-key and which register can be used as temporary.
+/// If an explicit BRK instruction is used to generate an exception, BrkImm
+/// specifies its immediate operand.
+///
+/// \returns The machine basic block containing the code that is executed
+/// after the check succeeds.
+MachineBasicBlock &checkAuthenticatedRegister(MachineBasicBlock::iterator MBBI,
+ AuthCheckMethod Method,
+ Register AuthenticatedReg,
+ Register TmpReg, bool UseIKey,
+ unsigned BrkImm);
+
+/// Returns the number of bytes added by checkAuthenticatedRegister.
+unsigned getCheckerSizeInBytes(AuthCheckMethod Method);
+
+} // end namespace AArch64PAuth
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 369801a8ea7c..1494312886a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -462,7 +462,9 @@ bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) {
// Clear kills in the range where changes were made. This is conservative,
// but should be okay since kill markers are being phased out.
LLVM_DEBUG(dbgs() << "Clearing kill flags.\n\tFirstUse: " << *FirstUse
- << "\tLastChange: " << *LastChange);
+ << "\tLastChange: ";
+ if (LastChange == MBB->end()) dbgs() << "<end>\n";
+ else dbgs() << *LastChange);
for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end()))
MMI.clearKillInfo();
for (MachineInstr &MMI : make_range(MBB->begin(), LastChange))
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index d1ddf6d76975..24ba9dd95004 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -440,8 +440,21 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
Reserved.set(SubReg);
}
+ if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) {
+ for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true);
+ SubReg.isValid(); ++SubReg)
+ Reserved.set(*SubReg);
+ }
+
markSuperRegs(Reserved, AArch64::FPCR);
+ if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
+ markSuperRegs(Reserved, AArch64::X27);
+ markSuperRegs(Reserved, AArch64::X28);
+ markSuperRegs(Reserved, AArch64::W27);
+ markSuperRegs(Reserved, AArch64::W28);
+ }
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
@@ -558,8 +571,6 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
- case CallingConv::WebKit_JS:
- return HasReg(CC_AArch64_WebKit_JS_ArgRegs, Reg);
case CallingConv::GHC:
return HasReg(CC_AArch64_GHC_ArgRegs, Reg);
case CallingConv::C:
@@ -570,8 +581,18 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
- if (STI.isTargetWindows() && IsVarArg)
- return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
+ if (STI.isTargetWindows()) {
+ if (IsVarArg)
+ return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
+ switch (CC) {
+ default:
+ return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
+ case CallingConv::Swift:
+ case CallingConv::SwiftTail:
+ return HasReg(CC_AArch64_Win64PCS_Swift_ArgRegs, Reg) ||
+ HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
+ }
+ }
if (!STI.isTargetDarwin()) {
switch (CC) {
default:
@@ -598,13 +619,15 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
case CallingConv::Win64:
if (IsVarArg)
HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
- return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
+ return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
case CallingConv::CFGuard_Check:
return HasReg(CC_AArch64_Win64_CFGuard_Check_ArgRegs, Reg);
case CallingConv::AArch64_VectorCall:
case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
+ if (STI.isTargetWindows())
+ return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
}
}
@@ -969,6 +992,8 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::FPR64_loRegClassID:
case AArch64::FPR16_loRegClassID:
return 16;
+ case AArch64::FPR128_0to7RegClassID:
+ return 8;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 4bb1f9413f2b..b70ab8568884 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -31,10 +31,6 @@ let Namespace = "AArch64" in {
def subo64 : SubRegIndex<64>;
// SVE
def zsub : SubRegIndex<128>;
- // Note: zsub_hi should never be used directly because it represents
- // the scalable part of the SVE vector and cannot be manipulated as a
- // subvector in the same way the lower 128bits can.
- def zsub_hi : SubRegIndex<128>;
// Note: Code depends on these having consecutive numbers
def dsub0 : SubRegIndex<64>;
def dsub1 : SubRegIndex<64>;
@@ -55,6 +51,8 @@ let Namespace = "AArch64" in {
def zasubd1 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits
def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits
def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits
+
+ def psub : SubRegIndex<16>;
}
let Namespace = "AArch64" in {
@@ -438,7 +436,7 @@ def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
def FPR8 : RegisterClass<"AArch64", [i8], 8, (sequence "B%u", 0, 31)> {
let Size = 8;
}
-def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> {
+def FPR16 : RegisterClass<"AArch64", [f16, bf16, i16], 16, (sequence "H%u", 0, 31)> {
let Size = 16;
}
@@ -469,6 +467,13 @@ def FPR128_lo : RegisterClass<"AArch64",
v8bf16],
128, (trunc FPR128, 16)>;
+// The lower 8 vector registers. Some instructions can only take registers
+// in this range.
+def FPR128_0to7 : RegisterClass<"AArch64",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16,
+ v8bf16],
+ 128, (trunc FPR128, 8)>;
+
// Pairs, triples, and quads of 64-bit vector registers.
def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
@@ -536,6 +541,15 @@ def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
let ParserMatchClass = VectorRegLoAsmOperand;
}
+def VectorReg0to7AsmOperand : AsmOperandClass {
+ let Name = "VectorReg0to7";
+ let PredicateMethod = "isNeonVectorReg0to7";
+}
+
+def V128_0to7 : RegisterOperand<FPR128_0to7, "printVRegOperand"> {
+ let ParserMatchClass = VectorReg0to7AsmOperand;
+}
+
class TypedVecListAsmOperand<int count, string vecty, int lanes, int eltsize>
: AsmOperandClass {
let Name = "TypedVectorList" # count # "_" # lanes # eltsize;
@@ -767,93 +781,78 @@ def GPR64x8 : RegisterOperand<GPR64x8Class, "printGPR64x8"> {
//===----- END: v8.7a accelerator extension register operands -------------===//
+// SVE predicate-as-counter registers
+ def PN0 : AArch64Reg<0, "pn0">, DwarfRegNum<[48]>;
+ def PN1 : AArch64Reg<1, "pn1">, DwarfRegNum<[49]>;
+ def PN2 : AArch64Reg<2, "pn2">, DwarfRegNum<[50]>;
+ def PN3 : AArch64Reg<3, "pn3">, DwarfRegNum<[51]>;
+ def PN4 : AArch64Reg<4, "pn4">, DwarfRegNum<[52]>;
+ def PN5 : AArch64Reg<5, "pn5">, DwarfRegNum<[53]>;
+ def PN6 : AArch64Reg<6, "pn6">, DwarfRegNum<[54]>;
+ def PN7 : AArch64Reg<7, "pn7">, DwarfRegNum<[55]>;
+ def PN8 : AArch64Reg<8, "pn8">, DwarfRegNum<[56]>;
+ def PN9 : AArch64Reg<9, "pn9">, DwarfRegNum<[57]>;
+ def PN10 : AArch64Reg<10, "pn10">, DwarfRegNum<[58]>;
+ def PN11 : AArch64Reg<11, "pn11">, DwarfRegNum<[59]>;
+ def PN12 : AArch64Reg<12, "pn12">, DwarfRegNum<[60]>;
+ def PN13 : AArch64Reg<13, "pn13">, DwarfRegNum<[61]>;
+ def PN14 : AArch64Reg<14, "pn14">, DwarfRegNum<[62]>;
+ def PN15 : AArch64Reg<15, "pn15">, DwarfRegNum<[63]>;
+
// SVE predicate registers
-def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>;
-def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>;
-def P2 : AArch64Reg<2, "p2">, DwarfRegNum<[50]>;
-def P3 : AArch64Reg<3, "p3">, DwarfRegNum<[51]>;
-def P4 : AArch64Reg<4, "p4">, DwarfRegNum<[52]>;
-def P5 : AArch64Reg<5, "p5">, DwarfRegNum<[53]>;
-def P6 : AArch64Reg<6, "p6">, DwarfRegNum<[54]>;
-def P7 : AArch64Reg<7, "p7">, DwarfRegNum<[55]>;
-def P8 : AArch64Reg<8, "p8">, DwarfRegNum<[56]>;
-def P9 : AArch64Reg<9, "p9">, DwarfRegNum<[57]>;
-def P10 : AArch64Reg<10, "p10">, DwarfRegNum<[58]>;
-def P11 : AArch64Reg<11, "p11">, DwarfRegNum<[59]>;
-def P12 : AArch64Reg<12, "p12">, DwarfRegNum<[60]>;
-def P13 : AArch64Reg<13, "p13">, DwarfRegNum<[61]>;
-def P14 : AArch64Reg<14, "p14">, DwarfRegNum<[62]>;
-def P15 : AArch64Reg<15, "p15">, DwarfRegNum<[63]>;
-
-// The part of SVE registers that don't overlap Neon registers.
-// These are only used as part of clobber lists.
-def Z0_HI : AArch64Reg<0, "z0_hi">;
-def Z1_HI : AArch64Reg<1, "z1_hi">;
-def Z2_HI : AArch64Reg<2, "z2_hi">;
-def Z3_HI : AArch64Reg<3, "z3_hi">;
-def Z4_HI : AArch64Reg<4, "z4_hi">;
-def Z5_HI : AArch64Reg<5, "z5_hi">;
-def Z6_HI : AArch64Reg<6, "z6_hi">;
-def Z7_HI : AArch64Reg<7, "z7_hi">;
-def Z8_HI : AArch64Reg<8, "z8_hi">;
-def Z9_HI : AArch64Reg<9, "z9_hi">;
-def Z10_HI : AArch64Reg<10, "z10_hi">;
-def Z11_HI : AArch64Reg<11, "z11_hi">;
-def Z12_HI : AArch64Reg<12, "z12_hi">;
-def Z13_HI : AArch64Reg<13, "z13_hi">;
-def Z14_HI : AArch64Reg<14, "z14_hi">;
-def Z15_HI : AArch64Reg<15, "z15_hi">;
-def Z16_HI : AArch64Reg<16, "z16_hi">;
-def Z17_HI : AArch64Reg<17, "z17_hi">;
-def Z18_HI : AArch64Reg<18, "z18_hi">;
-def Z19_HI : AArch64Reg<19, "z19_hi">;
-def Z20_HI : AArch64Reg<20, "z20_hi">;
-def Z21_HI : AArch64Reg<21, "z21_hi">;
-def Z22_HI : AArch64Reg<22, "z22_hi">;
-def Z23_HI : AArch64Reg<23, "z23_hi">;
-def Z24_HI : AArch64Reg<24, "z24_hi">;
-def Z25_HI : AArch64Reg<25, "z25_hi">;
-def Z26_HI : AArch64Reg<26, "z26_hi">;
-def Z27_HI : AArch64Reg<27, "z27_hi">;
-def Z28_HI : AArch64Reg<28, "z28_hi">;
-def Z29_HI : AArch64Reg<29, "z29_hi">;
-def Z30_HI : AArch64Reg<30, "z30_hi">;
-def Z31_HI : AArch64Reg<31, "z31_hi">;
+let SubRegIndices = [psub] in {
+ def P0 : AArch64Reg<0, "p0", [PN0]>, DwarfRegAlias<PN0>;
+ def P1 : AArch64Reg<1, "p1", [PN1]>, DwarfRegAlias<PN1>;
+ def P2 : AArch64Reg<2, "p2", [PN2]>, DwarfRegAlias<PN2>;
+ def P3 : AArch64Reg<3, "p3", [PN3]>, DwarfRegAlias<PN3>;
+ def P4 : AArch64Reg<4, "p4", [PN4]>, DwarfRegAlias<PN4>;
+ def P5 : AArch64Reg<5, "p5", [PN5]>, DwarfRegAlias<PN5>;
+ def P6 : AArch64Reg<6, "p6", [PN6]>, DwarfRegAlias<PN6>;
+ def P7 : AArch64Reg<7, "p7", [PN7]>, DwarfRegAlias<PN7>;
+ def P8 : AArch64Reg<8, "p8", [PN8]>, DwarfRegAlias<PN8>;
+ def P9 : AArch64Reg<9, "p9", [PN9]>, DwarfRegAlias<PN9>;
+ def P10 : AArch64Reg<10, "p10", [PN10]>, DwarfRegAlias<PN10>;
+ def P11 : AArch64Reg<11, "p11", [PN11]>, DwarfRegAlias<PN11>;
+ def P12 : AArch64Reg<12, "p12", [PN12]>, DwarfRegAlias<PN12>;
+ def P13 : AArch64Reg<13, "p13", [PN13]>, DwarfRegAlias<PN13>;
+ def P14 : AArch64Reg<14, "p14", [PN14]>, DwarfRegAlias<PN14>;
+ def P15 : AArch64Reg<15, "p15", [PN15]>, DwarfRegAlias<PN15>;
+}
// SVE variable-size vector registers
-let SubRegIndices = [zsub,zsub_hi] in {
-def Z0 : AArch64Reg<0, "z0", [Q0, Z0_HI]>, DwarfRegNum<[96]>;
-def Z1 : AArch64Reg<1, "z1", [Q1, Z1_HI]>, DwarfRegNum<[97]>;
-def Z2 : AArch64Reg<2, "z2", [Q2, Z2_HI]>, DwarfRegNum<[98]>;
-def Z3 : AArch64Reg<3, "z3", [Q3, Z3_HI]>, DwarfRegNum<[99]>;
-def Z4 : AArch64Reg<4, "z4", [Q4, Z4_HI]>, DwarfRegNum<[100]>;
-def Z5 : AArch64Reg<5, "z5", [Q5, Z5_HI]>, DwarfRegNum<[101]>;
-def Z6 : AArch64Reg<6, "z6", [Q6, Z6_HI]>, DwarfRegNum<[102]>;
-def Z7 : AArch64Reg<7, "z7", [Q7, Z7_HI]>, DwarfRegNum<[103]>;
-def Z8 : AArch64Reg<8, "z8", [Q8, Z8_HI]>, DwarfRegNum<[104]>;
-def Z9 : AArch64Reg<9, "z9", [Q9, Z9_HI]>, DwarfRegNum<[105]>;
-def Z10 : AArch64Reg<10, "z10", [Q10, Z10_HI]>, DwarfRegNum<[106]>;
-def Z11 : AArch64Reg<11, "z11", [Q11, Z11_HI]>, DwarfRegNum<[107]>;
-def Z12 : AArch64Reg<12, "z12", [Q12, Z12_HI]>, DwarfRegNum<[108]>;
-def Z13 : AArch64Reg<13, "z13", [Q13, Z13_HI]>, DwarfRegNum<[109]>;
-def Z14 : AArch64Reg<14, "z14", [Q14, Z14_HI]>, DwarfRegNum<[110]>;
-def Z15 : AArch64Reg<15, "z15", [Q15, Z15_HI]>, DwarfRegNum<[111]>;
-def Z16 : AArch64Reg<16, "z16", [Q16, Z16_HI]>, DwarfRegNum<[112]>;
-def Z17 : AArch64Reg<17, "z17", [Q17, Z17_HI]>, DwarfRegNum<[113]>;
-def Z18 : AArch64Reg<18, "z18", [Q18, Z18_HI]>, DwarfRegNum<[114]>;
-def Z19 : AArch64Reg<19, "z19", [Q19, Z19_HI]>, DwarfRegNum<[115]>;
-def Z20 : AArch64Reg<20, "z20", [Q20, Z20_HI]>, DwarfRegNum<[116]>;
-def Z21 : AArch64Reg<21, "z21", [Q21, Z21_HI]>, DwarfRegNum<[117]>;
-def Z22 : AArch64Reg<22, "z22", [Q22, Z22_HI]>, DwarfRegNum<[118]>;
-def Z23 : AArch64Reg<23, "z23", [Q23, Z23_HI]>, DwarfRegNum<[119]>;
-def Z24 : AArch64Reg<24, "z24", [Q24, Z24_HI]>, DwarfRegNum<[120]>;
-def Z25 : AArch64Reg<25, "z25", [Q25, Z25_HI]>, DwarfRegNum<[121]>;
-def Z26 : AArch64Reg<26, "z26", [Q26, Z26_HI]>, DwarfRegNum<[122]>;
-def Z27 : AArch64Reg<27, "z27", [Q27, Z27_HI]>, DwarfRegNum<[123]>;
-def Z28 : AArch64Reg<28, "z28", [Q28, Z28_HI]>, DwarfRegNum<[124]>;
-def Z29 : AArch64Reg<29, "z29", [Q29, Z29_HI]>, DwarfRegNum<[125]>;
-def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
-def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
+let SubRegIndices = [zsub] in {
+def Z0 : AArch64Reg<0, "z0", [Q0]>, DwarfRegNum<[96]>;
+def Z1 : AArch64Reg<1, "z1", [Q1]>, DwarfRegNum<[97]>;
+def Z2 : AArch64Reg<2, "z2", [Q2]>, DwarfRegNum<[98]>;
+def Z3 : AArch64Reg<3, "z3", [Q3]>, DwarfRegNum<[99]>;
+def Z4 : AArch64Reg<4, "z4", [Q4]>, DwarfRegNum<[100]>;
+def Z5 : AArch64Reg<5, "z5", [Q5]>, DwarfRegNum<[101]>;
+def Z6 : AArch64Reg<6, "z6", [Q6]>, DwarfRegNum<[102]>;
+def Z7 : AArch64Reg<7, "z7", [Q7]>, DwarfRegNum<[103]>;
+def Z8 : AArch64Reg<8, "z8", [Q8]>, DwarfRegNum<[104]>;
+def Z9 : AArch64Reg<9, "z9", [Q9]>, DwarfRegNum<[105]>;
+def Z10 : AArch64Reg<10, "z10", [Q10]>, DwarfRegNum<[106]>;
+def Z11 : AArch64Reg<11, "z11", [Q11]>, DwarfRegNum<[107]>;
+def Z12 : AArch64Reg<12, "z12", [Q12]>, DwarfRegNum<[108]>;
+def Z13 : AArch64Reg<13, "z13", [Q13]>, DwarfRegNum<[109]>;
+def Z14 : AArch64Reg<14, "z14", [Q14]>, DwarfRegNum<[110]>;
+def Z15 : AArch64Reg<15, "z15", [Q15]>, DwarfRegNum<[111]>;
+def Z16 : AArch64Reg<16, "z16", [Q16]>, DwarfRegNum<[112]>;
+def Z17 : AArch64Reg<17, "z17", [Q17]>, DwarfRegNum<[113]>;
+def Z18 : AArch64Reg<18, "z18", [Q18]>, DwarfRegNum<[114]>;
+def Z19 : AArch64Reg<19, "z19", [Q19]>, DwarfRegNum<[115]>;
+def Z20 : AArch64Reg<20, "z20", [Q20]>, DwarfRegNum<[116]>;
+def Z21 : AArch64Reg<21, "z21", [Q21]>, DwarfRegNum<[117]>;
+def Z22 : AArch64Reg<22, "z22", [Q22]>, DwarfRegNum<[118]>;
+def Z23 : AArch64Reg<23, "z23", [Q23]>, DwarfRegNum<[119]>;
+def Z24 : AArch64Reg<24, "z24", [Q24]>, DwarfRegNum<[120]>;
+def Z25 : AArch64Reg<25, "z25", [Q25]>, DwarfRegNum<[121]>;
+def Z26 : AArch64Reg<26, "z26", [Q26]>, DwarfRegNum<[122]>;
+def Z27 : AArch64Reg<27, "z27", [Q27]>, DwarfRegNum<[123]>;
+def Z28 : AArch64Reg<28, "z28", [Q28]>, DwarfRegNum<[124]>;
+def Z29 : AArch64Reg<29, "z29", [Q29]>, DwarfRegNum<[125]>;
+def Z30 : AArch64Reg<30, "z30", [Q30]>, DwarfRegNum<[126]>;
+def Z31 : AArch64Reg<31, "z31", [Q31]>, DwarfRegNum<[127]>;
}
// Enum describing the element size for destructive
@@ -881,8 +880,6 @@ class SVERegOp <string Suffix, AsmOperandClass C,
let ParserMatchClass = C;
}
-class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
- RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
@@ -891,7 +888,7 @@ class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
// SVE predicate register classes.
class PPRClass<int firstreg, int lastreg> : RegisterClass<
"AArch64",
- [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16,
+ [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
(sequence "P%u", firstreg, lastreg)> {
let Size = 16;
}
@@ -909,69 +906,90 @@ class PPRAsmOperand <string name, string RegClass, int Width>: AsmOperandClass {
let ParserMethod = "tryParseSVEPredicateVector<RegKind::SVEPredicateVector>";
}
-def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", 0>;
-def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>;
-def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>;
-def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>;
-def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>;
-
-def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>;
-def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>;
-def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>;
-def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
-def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
-
+def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", 0>;
+def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>;
+def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>;
+def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>;
+def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>;
def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>;
+class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
+
+def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>;
+def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>;
+def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>;
+def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
+def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
+class PNRClass<int firstreg, int lastreg> : RegisterClass<
+ "AArch64",
+ [ aarch64svcount ], 16,
+ (sequence "PN%u", firstreg, lastreg)> {
+ let Size = 16;
+}
+
+def PNR : PNRClass<0, 15>;
+def PNR_3b : PNRClass<0, 7>;
+def PNR_p8to15 : PNRClass<8, 15>;
// SVE predicate-as-counter operand
-class PNRAsmOperand<string name, string RegClass, int Width>
- : PPRAsmOperand<name, RegClass, Width> {
+class PNRAsmOperand<string name, string RegClass, int Width>: AsmOperandClass {
+ let Name = "SVE" # name # "Reg";
let PredicateMethod = "isSVEPredicateAsCounterRegOfWidth<"
# Width # ", " # "AArch64::"
# RegClass # "RegClassID>";
let DiagnosticType = "InvalidSVE" # name # "Reg";
+ let RenderMethod = "addRegOperands";
let ParserMethod = "tryParseSVEPredicateVector<RegKind::SVEPredicateAsCounter>";
}
-class PNRRegOp<string Suffix, AsmOperandClass C, int EltSize, RegisterClass RC>
- : PPRRegOp<Suffix, C, ElementSizeNone, RC> {
- let PrintMethod = "printPredicateAsCounter<" # EltSize # ">";
+let RenderMethod = "addPNRasPPRRegOperands" in {
+ def PNRasPPROpAny : PNRAsmOperand<"PNRasPPRPredicateAny", "PNR", 0>;
+ def PNRasPPROp8 : PNRAsmOperand<"PNRasPPRPredicateB", "PNR", 8>;
}
-def PNRAsmOpAny: PNRAsmOperand<"PNPredicateAny", "PPR", 0>;
-def PNRAsmOp8 : PNRAsmOperand<"PNPredicateB", "PPR", 8>;
-def PNRAsmOp16 : PNRAsmOperand<"PNPredicateH", "PPR", 16>;
-def PNRAsmOp32 : PNRAsmOperand<"PNPredicateS", "PPR", 32>;
-def PNRAsmOp64 : PNRAsmOperand<"PNPredicateD", "PPR", 64>;
+class PNRasPPRRegOp<string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
+
+def PNRasPPRAny : PNRasPPRRegOp<"", PNRasPPROpAny, ElementSizeNone, PPR>;
+def PNRasPPR8 : PNRasPPRRegOp<"b", PNRasPPROp8, ElementSizeB, PPR>;
-def PNRAny : PNRRegOp<"", PNRAsmOpAny, 0, PPR>;
-def PNR8 : PNRRegOp<"b", PNRAsmOp8, 8, PPR>;
-def PNR16 : PNRRegOp<"h", PNRAsmOp16, 16, PPR>;
-def PNR32 : PNRRegOp<"s", PNRAsmOp32, 32, PPR>;
-def PNR64 : PNRRegOp<"d", PNRAsmOp64, 64, PPR>;
+def PNRAsmOpAny: PNRAsmOperand<"PNPredicateAny", "PNR", 0>;
+def PNRAsmOp8 : PNRAsmOperand<"PNPredicateB", "PNR", 8>;
+def PNRAsmOp16 : PNRAsmOperand<"PNPredicateH", "PNR", 16>;
+def PNRAsmOp32 : PNRAsmOperand<"PNPredicateS", "PNR", 32>;
+def PNRAsmOp64 : PNRAsmOperand<"PNPredicateD", "PNR", 64>;
-class PNRP8to15RegOp<string Suffix, AsmOperandClass C, int EltSize, RegisterClass RC>
- : PPRRegOp<Suffix, C, ElementSizeNone, RC> {
- let PrintMethod = "printPredicateAsCounter<" # EltSize # ">";
- let EncoderMethod = "EncodePPR_p8to15";
- let DecoderMethod = "DecodePPR_p8to15RegisterClass";
+class PNRRegOp<string Suffix, AsmOperandClass C, int Size, RegisterClass RC>
+ : SVERegOp<Suffix, C, ElementSizeNone, RC> {
+ let PrintMethod = "printPredicateAsCounter<" # Size # ">";
}
+def PNRAny : PNRRegOp<"", PNRAsmOpAny, 0, PNR>;
+def PNR8 : PNRRegOp<"b", PNRAsmOp8, 8, PNR>;
+def PNR16 : PNRRegOp<"h", PNRAsmOp16, 16, PNR>;
+def PNR32 : PNRRegOp<"s", PNRAsmOp32, 32, PNR>;
+def PNR64 : PNRRegOp<"d", PNRAsmOp64, 64, PNR>;
-def PNRAsmAny_p8to15 : PNRAsmOperand<"PNPredicateAny_p8to15", "PPR_p8to15", 0>;
-def PNRAsmOp8_p8to15 : PNRAsmOperand<"PNPredicateB_p8to15", "PPR_p8to15", 8>;
-def PNRAsmOp16_p8to15 : PNRAsmOperand<"PNPredicateH_p8to15", "PPR_p8to15", 16>;
-def PNRAsmOp32_p8to15 : PNRAsmOperand<"PNPredicateS_p8to15", "PPR_p8to15", 32>;
-def PNRAsmOp64_p8to15 : PNRAsmOperand<"PNPredicateD_p8to15", "PPR_p8to15", 64>;
+def PNRAsmAny_p8to15 : PNRAsmOperand<"PNPredicateAny_p8to15", "PNR_p8to15", 0>;
+def PNRAsmOp8_p8to15 : PNRAsmOperand<"PNPredicateB_p8to15", "PNR_p8to15", 8>;
+def PNRAsmOp16_p8to15 : PNRAsmOperand<"PNPredicateH_p8to15", "PNR_p8to15", 16>;
+def PNRAsmOp32_p8to15 : PNRAsmOperand<"PNPredicateS_p8to15", "PNR_p8to15", 32>;
+def PNRAsmOp64_p8to15 : PNRAsmOperand<"PNPredicateD_p8to15", "PNR_p8to15", 64>;
-def PNRAny_p8to15 : PNRP8to15RegOp<"", PNRAsmAny_p8to15, 0, PPR_p8to15>;
-def PNR8_p8to15 : PNRP8to15RegOp<"b", PNRAsmOp8_p8to15, 8, PPR_p8to15>;
-def PNR16_p8to15 : PNRP8to15RegOp<"h", PNRAsmOp16_p8to15, 16, PPR_p8to15>;
-def PNR32_p8to15 : PNRP8to15RegOp<"s", PNRAsmOp32_p8to15, 32, PPR_p8to15>;
-def PNR64_p8to15 : PNRP8to15RegOp<"d", PNRAsmOp64_p8to15, 64, PPR_p8to15>;
+class PNRP8to15RegOp<string Suffix, AsmOperandClass C, int Width, RegisterClass RC>
+ : SVERegOp<Suffix, C, ElementSizeNone, RC> {
+ let PrintMethod = "printPredicateAsCounter<" # Width # ">";
+ let EncoderMethod = "EncodePNR_p8to15";
+ let DecoderMethod = "DecodePNR_p8to15RegisterClass";
+}
+def PNRAny_p8to15 : PNRP8to15RegOp<"", PNRAsmAny_p8to15, 0, PNR_p8to15>;
+def PNR8_p8to15 : PNRP8to15RegOp<"b", PNRAsmOp8_p8to15, 8, PNR_p8to15>;
+def PNR16_p8to15 : PNRP8to15RegOp<"h", PNRAsmOp16_p8to15, 16, PNR_p8to15>;
+def PNR32_p8to15 : PNRP8to15RegOp<"s", PNRAsmOp32_p8to15, 32, PNR_p8to15>;
+def PNR64_p8to15 : PNRP8to15RegOp<"d", PNRAsmOp64_p8to15, 64, PNR_p8to15>;
let Namespace = "AArch64" in {
def psub0 : SubRegIndex<16, -1>;
@@ -1250,6 +1268,10 @@ class ZPRVectorListMul<int ElementWidth, int NumRegs> : ZPRVectorList<ElementWid
let EncoderMethod = "EncodeRegAsMultipleOf<2>",
DecoderMethod = "DecodeZPR2Mul2RegisterClass" in {
+ def ZZ_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,0>"> {
+ let ParserMatchClass = ZPRVectorListMul<0, 2>;
+ }
+
def ZZ_b_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,'b'>"> {
let ParserMatchClass = ZPRVectorListMul<8, 2>;
}
@@ -1331,15 +1353,20 @@ def ZStridedQuadsHi : RegisterTuples<[zsub0, zsub1, zsub2, zsub3], [
(trunc (rotl ZPR, 24), 4), (trunc (rotl ZPR, 28), 4)
]>;
-def ZPR2Strided : RegisterClass<"AArch64", [untyped], 256,
+def ZPR2Strided : RegisterClass<"AArch64", [untyped], 128,
(add ZStridedPairsLo, ZStridedPairsHi)> {
let Size = 256;
}
-def ZPR4Strided : RegisterClass<"AArch64", [untyped], 512,
+def ZPR4Strided : RegisterClass<"AArch64", [untyped], 128,
(add ZStridedQuadsLo, ZStridedQuadsHi)> {
let Size = 512;
}
+def ZPR2StridedOrContiguous : RegisterClass<"AArch64", [untyped], 128,
+ (add ZStridedPairsLo, ZStridedPairsHi,
+ (decimate ZSeqPairs, 2))> {
+ let Size = 256;
+}
class ZPRVectorListStrided<int ElementWidth, int NumRegs, int Stride>
: ZPRVectorList<ElementWidth, NumRegs> {
@@ -1371,6 +1398,21 @@ let EncoderMethod = "EncodeZPR2StridedRegisterClass",
: RegisterOperand<ZPR2Strided, "printTypedVectorList<0,'d'>"> {
let ParserMatchClass = ZPRVectorListStrided<64, 2, 8>;
}
+
+ def ZZ_b_strided_and_contiguous
+ : RegisterOperand<ZPR2StridedOrContiguous, "printTypedVectorList<0,'b'>">;
+ def ZZ_h_strided_and_contiguous
+ : RegisterOperand<ZPR2StridedOrContiguous, "printTypedVectorList<0,'h'>">;
+ def ZZ_s_strided_and_contiguous
+ : RegisterOperand<ZPR2StridedOrContiguous, "printTypedVectorList<0,'s'>">;
+ def ZZ_d_strided_and_contiguous
+ : RegisterOperand<ZPR2StridedOrContiguous, "printTypedVectorList<0,'d'>">;
+}
+
+def ZPR4StridedOrContiguous : RegisterClass<"AArch64", [untyped], 128,
+ (add ZStridedQuadsLo, ZStridedQuadsHi,
+ (decimate ZSeqQuads, 4))> {
+ let Size = 512;
}
let EncoderMethod = "EncodeZPR4StridedRegisterClass",
@@ -1394,6 +1436,15 @@ let EncoderMethod = "EncodeZPR4StridedRegisterClass",
: RegisterOperand<ZPR4Strided, "printTypedVectorList<0,'d'>"> {
let ParserMatchClass = ZPRVectorListStrided<64, 4, 4>;
}
+
+ def ZZZZ_b_strided_and_contiguous
+ : RegisterOperand<ZPR4StridedOrContiguous, "printTypedVectorList<0,'b'>">;
+ def ZZZZ_h_strided_and_contiguous
+ : RegisterOperand<ZPR4StridedOrContiguous, "printTypedVectorList<0,'h'>">;
+ def ZZZZ_s_strided_and_contiguous
+ : RegisterOperand<ZPR4StridedOrContiguous, "printTypedVectorList<0,'s'>">;
+ def ZZZZ_d_strided_and_contiguous
+ : RegisterOperand<ZPR4StridedOrContiguous, "printTypedVectorList<0,'d'>">;
}
class ZPRExtendAsmOperand<string ShiftExtend, int RegWidth, int Scale,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index 8b45109d976a..5e89a531f7e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -50,6 +50,7 @@
#include "llvm/MC/MCSchedule.h"
#include "llvm/Pass.h"
#include <unordered_map>
+#include <map>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
index ff56259eb34a..76dd5a2d713e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
@@ -14,8 +14,6 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/IndirectThunks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -214,7 +212,7 @@ bool SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI,
void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
// FIXME: How to better communicate Register number, rather than through
// name and lookup table?
- assert(MF.getName().startswith(getThunkPrefix()));
+ assert(MF.getName().starts_with(getThunkPrefix()));
auto ThunkIt = llvm::find_if(
SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); });
assert(ThunkIt != std::end(SLSBLRThunks));
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index cabfe9def7c2..738a52eebad2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -23,8 +23,6 @@ def AArch64_restore_za : SDNode<"AArch64ISD::RESTORE_ZA", SDTypeProfile<0, 3,
[SDNPHasChain, SDNPSideEffect, SDNPVariadic,
SDNPOptInGlue]>;
-def AArch64ObscureCopy : SDNode<"AArch64ISD::OBSCURE_COPY", SDTypeProfile<1, 1, []>, []>;
-
//===----------------------------------------------------------------------===//
// Instruction naming conventions.
//===----------------------------------------------------------------------===//
@@ -68,8 +66,8 @@ let Predicates = [HasSME] in {
defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b000, "bfmopa", int_aarch64_sme_mopa_wide>;
defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b001, "bfmops", int_aarch64_sme_mops_wide>;
-defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa", int_aarch64_sme_mopa>;
-defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops", int_aarch64_sme_mops>;
+defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, 0b00, ZPR32, "fmopa", int_aarch64_sme_mopa>;
+defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, 0b00, ZPR32, "fmops", int_aarch64_sme_mops>;
}
let Predicates = [HasSMEF64F64] in {
@@ -134,52 +132,6 @@ defm ZERO_M : sme_zero<"zero">;
// Mode selection and state access instructions
//===----------------------------------------------------------------------===//
-// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
-// both fields:
-//
-// MSR SVCRSM, #<imm1>
-// MSR SVCRZA, #<imm1>
-// MSR SVCRSMZA, #<imm1>
-//
-// It's tricky to using the existing pstate operand defined in
-// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
-// when these fields are also encoded in CRm[3:1].
-def MSRpstatesvcrImm1
- : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr",
- "\t$pstatefield, $imm">,
- Sched<[WriteSys]> {
- bits<3> pstatefield;
- bit imm;
- let Inst{18-16} = 0b011; // op1
- let Inst{11-9} = pstatefield;
- let Inst{8} = imm;
- let Inst{7-5} = 0b011; // op2
-}
-
-def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>;
-def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
-def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
-
-def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>;
-def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>;
-def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>;
-
-
-// Pseudo to match to smstart/smstop. This expands:
-//
-// pseudonode (pstate_za|pstate_sm), before_call, expected_value
-//
-// Into:
-//
-// if (before_call != expected_value)
-// node (pstate_za|pstate_sm)
-//
-// where node can be either 'smstart' or 'smstop'.
-def MSRpstatePseudo :
- Pseudo<(outs),
- (ins svcr_op:$pstatefield, timm0_1:$imm, GPR64:$rtpstate, timm0_1:$expected_pstate, variable_ops), []>,
- Sched<[WriteSys]>;
-
// Pseudo to conditionally restore ZA state. This expands:
//
// pseudonode tpidr2_el0, tpidr2obj, restore_routine
@@ -226,52 +178,69 @@ def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 0), (i64 1)), // before
def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 0), (i64 1)), // after call
(MSRpstatesvcrImm1 svcr_op:$pstate, 0b0)>;
-// The generic case which gets expanded to a pseudo node.
-def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)),
- (MSRpstatePseudo svcr_op:$pstate, 0b1, GPR64:$rtpstate, timm0_1:$expected_pstate)>;
-def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)),
- (MSRpstatePseudo svcr_op:$pstate, 0b0, GPR64:$rtpstate, timm0_1:$expected_pstate)>;
-
// Read and write TPIDR2_EL0
def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
(MSR 0xde85, GPR64:$val)>;
def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
(MRS 0xde85)>;
-
-def OBSCURE_COPY : Pseudo<(outs GPR64:$dst), (ins GPR64:$idx), []>, Sched<[]> { }
-def : Pat<(i64 (AArch64ObscureCopy (i64 GPR64:$idx))),
- (OBSCURE_COPY GPR64:$idx)>;
} // End let Predicates = [HasSME]
+// Pseudo to match to smstart/smstop. This expands:
+//
+// pseudonode (pstate_za|pstate_sm), before_call, expected_value
+//
+// Into:
+//
+// if (before_call != expected_value)
+// node (pstate_za|pstate_sm)
+//
+// where node can be either 'smstart' or 'smstop'.
+//
+// This pseudo and corresponding patterns don't need to be predicated by SME,
+// because when they're emitted for streaming-compatible functions and run
+// in a non-SME context the generated code-paths will never execute any
+// SME instructions.
+def MSRpstatePseudo :
+ Pseudo<(outs),
+ (ins svcr_op:$pstatefield, timm0_1:$imm, GPR64:$rtpstate, timm0_1:$expected_pstate, variable_ops), []>,
+ Sched<[WriteSys]> {
+ let hasPostISelHook = 1;
+}
+
+def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)),
+ (MSRpstatePseudo svcr_op:$pstate, 0b1, GPR64:$rtpstate, timm0_1:$expected_pstate)>;
+def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)),
+ (MSRpstatePseudo svcr_op:$pstate, 0b0, GPR64:$rtpstate, timm0_1:$expected_pstate)>;
+
//===----------------------------------------------------------------------===//
// SME2 Instructions
//===----------------------------------------------------------------------===//
let Predicates = [HasSME2] in {
defm ADD_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b0011010, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x2>;
defm ADD_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b0111010, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x4>;
-defm ADD_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b011010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x2>;
-defm ADD_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b011010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x4>;
+defm ADD_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b0110010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x2>;
+defm ADD_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b0110010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x4>;
defm ADD_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"add", 0b0110000>;
defm ADD_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"add", 0b0110000>;
defm SUB_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b0011011, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x2>;
defm SUB_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b0111011, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x4>;
-defm SUB_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b011011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x2>;
-defm SUB_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b011011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x4>;
+defm SUB_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b0110011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x2>;
+defm SUB_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b0110011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x4>;
defm FMLA_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b0011000, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x2>;
defm FMLA_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b0111000, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x4>;
-defm FMLA_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b011000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x2>;
-defm FMLA_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b011000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x4>;
-defm FMLA_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmla", 0b0000, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x2>;
+defm FMLA_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0110000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x2>;
+defm FMLA_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0110000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x4>;
+defm FMLA_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmla", 0b01, 0b0000, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x2>;
defm FMLA_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmla", 0b0000, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x4>;
defm FMLS_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b0011001, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x2>;
defm FMLS_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b0111001, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x4>;
-defm FMLS_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b011001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x2>;
-defm FMLS_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b011001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x4>;
-defm FMLS_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmls", 0b0010, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x2>;
+defm FMLS_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0110001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x2>;
+defm FMLS_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0110001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x4>;
+defm FMLS_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmls", 0b01, 0b0010, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x2>;
defm FMLS_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmls", 0b0010, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x4>;
defm ADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"add", 0b0010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_za32_vg1x2>;
@@ -295,37 +264,37 @@ defm FMLAL_MZZI : sme2_mla_long_array_index<"fmlal", 0b10, 0b00, nxv8f16
defm FMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x2>;
defm FMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x4>;
defm FMLAL_MZZ : sme2_mla_long_array_single<"fmlal", 0b00, 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x1>;
-defm FMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x2>;
-defm FMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x4>;
-defm FMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_vg2x2>;
-defm FMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_vg2x4>;
+defm FMLAL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x2>;
+defm FMLAL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x4>;
+defm FMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x2>;
+defm FMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x4>;
defm FMLSL_MZZI : sme2_mla_long_array_index<"fmlsl", 0b10, 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x1>;
defm FMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x2>;
defm FMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x4>;
defm FMLSL_MZZ : sme2_mla_long_array_single<"fmlsl", 0b00, 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x1>;
-defm FMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x2>;
-defm FMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x4>;
-defm FMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_vg2x2>;
-defm FMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_vg2x4>;
+defm FMLSL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"fmlsl", 0b010, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x2>;
+defm FMLSL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"fmlsl", 0b010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x4>;
+defm FMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlsl", 0b001, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x2>;
+defm FMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlsl", 0b001, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x4>;
defm BFMLAL_MZZI : sme2_mla_long_array_index<"bfmlal", 0b10, 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x1>;
defm BFMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x2>;
defm BFMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x4>;
defm BFMLAL_MZZ : sme2_mla_long_array_single<"bfmlal", 0b00, 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x1>;
-defm BFMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x2>;
-defm BFMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x4>;
-defm BFMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_vg2x2>;
-defm BFMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_vg2x4>;
+defm BFMLAL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"bfmlal", 0b100, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x2>;
+defm BFMLAL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"bfmlal", 0b100, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x4>;
+defm BFMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlal", 0b010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x2>;
+defm BFMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlal", 0b010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x4>;
defm BFMLSL_MZZI : sme2_mla_long_array_index<"bfmlsl", 0b10, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x1>;
defm BFMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x2>;
defm BFMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x4>;
defm BFMLSL_MZZ : sme2_mla_long_array_single<"bfmlsl", 0b00, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x1>;
-defm BFMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x2>;
-defm BFMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x4>;
-defm BFMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_vg2x2>;
-defm BFMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_vg2x4>;
+defm BFMLSL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"bfmlsl", 0b110, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x2>;
+defm BFMLSL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"bfmlsl", 0b110, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x4>;
+defm BFMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlsl", 0b011, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x2>;
+defm BFMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlsl", 0b011, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x4>;
defm SMLAL_MZZI : sme2_mla_long_array_index<"smlal", 0b11, 0b00, nxv8i16, int_aarch64_sme_smlal_lane_vg2x1>;
defm SMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlal", 0b00, int_aarch64_sme_smlal_lane_vg2x2>;
@@ -363,14 +332,14 @@ defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aar
defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x2>;
defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x4>;
-defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
-defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
-defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
-defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
+defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
+defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b00001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
+defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b10000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
+defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b10001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
-defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
-defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
-defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
+defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b00110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
+defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b00111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
+defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000, int_aarch64_sve_sqcvt_x4>;
defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001, int_aarch64_sve_uqcvt_x4>;
defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100, int_aarch64_sve_sqcvtu_x4>;
@@ -446,122 +415,122 @@ defm SCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"sclamp", 0b0>;
defm UCLAMP_VG2_2Z2Z : sme2_int_clamp_vector_vg2_multi<"uclamp", 0b1>;
defm UCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"uclamp", 0b1>;
-defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x2>;
+defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x2>;
defm FDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b1001, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x4>;
defm FDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"fdot", 0b0010000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x2>;
defm FDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"fdot", 0b0110000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x4>;
-defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b010000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x2>;
-defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b010000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x4>;
+defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x2>;
+defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x4>;
-defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x2>;
+defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b01, 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x2>;
defm BFDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"bfdot", 0b1011, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x4>;
defm BFDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"bfdot", 0b0010010, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x2>;
defm BFDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"bfdot", 0b0110010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x4>;
-defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot", 0b010010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x2>;
-defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot", 0b010010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x4>;
+defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot", 0b0100010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x2>;
+defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot", 0b0100010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x4>;
-defm BFVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfvdot", 0b0011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fvdot_lane_za32_vg1x2>;
+defm BFVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfvdot", 0b01, 0b0011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fvdot_lane_za32_vg1x2>;
-defm FVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fvdot", 0b0001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fvdot_lane_za32_vg1x2>;
+defm FVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fvdot", 0b01, 0b0001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fvdot_lane_za32_vg1x2>;
-defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x2>;
-defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x2>;
+defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01, 0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x2>;
+defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01, 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x2>;
defm SDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1000, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x4>;
defm SDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x4>;
defm SDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010101, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x2>;
defm SDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110101, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x4>;
-defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110101, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x2>;
-defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110101, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x4>;
+defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101001, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x2>;
+defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101001, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x4>;
defm SDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b0010100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x2>;
defm SDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b0110100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x4>;
-defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b010100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x2>;
-defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b010100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x4>;
+defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b0101000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x2>;
+defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b0101000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x4>;
-defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x2>;
+defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b01, 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x2>;
defm SUDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sudot", 0b1111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x4>;
defm SUDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"sudot", 0b0010111, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x2>;
defm SUDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"sudot", 0b0110111, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x4>;
-defm SVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"svdot", 0b0100, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za32_vg1x2>;
+defm SVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"svdot", 0b01, 0b0100, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za32_vg1x2>;
defm SVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"svdot", 0b0100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_svdot_lane_za32_vg1x4>;
defm SUVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"suvdot", 0b0111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_suvdot_lane_za32_vg1x4>;
-defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x2>;
-defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x2>;
+defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x2>;
+defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x2>;
defm UDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x4>;
defm UDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1010, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x4>;
defm UDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010111, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x2>;
defm UDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110111, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x4>;
-defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110111, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x2>;
-defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110111, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x4>;
+defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101011, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x2>;
+defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101011, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x4>;
defm UDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b0010110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x2>;
defm UDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b0110110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x4>;
-defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b010110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x2>;
-defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b010110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x4>;
+defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b0101010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x2>;
+defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b0101010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x4>;
-defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x2>;
+defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b01, 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x2>;
defm USDOT_VG4_M4ZZI_BToS: sme2_multi_vec_array_vg4_index_32b<"usdot", 0b1101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x4>;
defm USDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"usdot", 0b0010101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x2>;
defm USDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"usdot", 0b0110101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x4>;
-defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b010101, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x2>;
-defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b010101, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x4>;
+defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b0101001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x2>;
+defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b0101001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x4>;
defm USVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"usvdot", 0b0101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usvdot_lane_za32_vg1x4>;
-defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b0110, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za32_vg1x2>;
+defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b01, 0b0110, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za32_vg1x2>;
defm UVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"uvdot", 0b0110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_uvdot_lane_za32_vg1x4>;
-defm SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x1>;
-defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x2>;
-defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x4>;
-defm SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b0000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x1>;
+defm SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x1>;
+defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x2>;
+defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b00, 0b0000, int_aarch64_sme_smla_za32_lane_vg4x4>;
+defm SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b00000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x1>;
defm SMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlall", 0b00000, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x2>;
defm SMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlall", 0b01000, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x4>;
-defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b0000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x2>;
-defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b0000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x4>;
+defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b00000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x2>;
+defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b00000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x4>;
-defm USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b001, int_aarch64_sme_usmla_za32_lane_vg4x1>;
-defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b100, int_aarch64_sme_usmla_za32_lane_vg4x2>;
-defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b100, int_aarch64_sme_usmla_za32_lane_vg4x4>;
-defm USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b0001, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x1>;
+defm USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b00, 0b001, int_aarch64_sme_usmla_za32_lane_vg4x1>;
+defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b00, 0b100, int_aarch64_sme_usmla_za32_lane_vg4x2>;
+defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b00, 0b0100, int_aarch64_sme_usmla_za32_lane_vg4x4>;
+defm USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b00001, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x1>;
defm USMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"usmlall", 0b00001, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x2>;
defm USMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"usmlall", 0b01001, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x4>;
-defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b0001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x2>;
-defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b0001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x4>;
+defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b00001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x2>;
+defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b00001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x4>;
-defm SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b010, int_aarch64_sme_smls_za32_lane_vg4x1>;
-defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b001, int_aarch64_sme_smls_za32_lane_vg4x2>;
-defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b001, int_aarch64_sme_smls_za32_lane_vg4x4>;
-defm SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b0010, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x1>;
+defm SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b00, 0b010, int_aarch64_sme_smls_za32_lane_vg4x1>;
+defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b00, 0b001, int_aarch64_sme_smls_za32_lane_vg4x2>;
+defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b00, 0b0001, int_aarch64_sme_smls_za32_lane_vg4x4>;
+defm SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b00010, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x1>;
defm SMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlsll", 0b00010, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x2>;
defm SMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlsll", 0b01010, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x4>;
-defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b0010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x2>;
-defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b0010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x4>;
+defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b00010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x2>;
+defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b00010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x4>;
-defm UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b100, int_aarch64_sme_umla_za32_lane_vg4x1>;
-defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b010, int_aarch64_sme_umla_za32_lane_vg4x2>;
-defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b010, int_aarch64_sme_umla_za32_lane_vg4x4>;
-defm UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b0100, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x1>;
+defm UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b00, 0b100, int_aarch64_sme_umla_za32_lane_vg4x1>;
+defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b00, 0b010, int_aarch64_sme_umla_za32_lane_vg4x2>;
+defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b00, 0b0010, int_aarch64_sme_umla_za32_lane_vg4x4>;
+defm UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b00100, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x1>;
defm UMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlall", 0b00100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x2>;
defm UMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlall", 0b01100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x4>;
-defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b0100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x2>;
-defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b0100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x4>;
+defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b00100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x2>;
+defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b00100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x4>;
-defm SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b101, int_aarch64_sme_sumla_za32_lane_vg4x1>;
-defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b110, int_aarch64_sme_sumla_za32_lane_vg4x2>;
-defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b110, int_aarch64_sme_sumla_za32_lane_vg4x4>;
+defm SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b00, 0b101, int_aarch64_sme_sumla_za32_lane_vg4x1>;
+defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b00, 0b110, int_aarch64_sme_sumla_za32_lane_vg4x2>;
+defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b00, 0b0110, int_aarch64_sme_sumla_za32_lane_vg4x4>;
defm SUMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"sumlall", 0b00101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x2>;
defm SUMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"sumlall", 0b01101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x4>;
-defm UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b110, int_aarch64_sme_umls_za32_lane_vg4x1>;
-defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b011, int_aarch64_sme_umls_za32_lane_vg4x2>;
-defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b011, int_aarch64_sme_umls_za32_lane_vg4x4>;
-defm UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b0110, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x1>;
+defm UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b00, 0b110, int_aarch64_sme_umls_za32_lane_vg4x1>;
+defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b00, 0b011, int_aarch64_sme_umls_za32_lane_vg4x2>;
+defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b00, 0b0011, int_aarch64_sme_umls_za32_lane_vg4x4>;
+defm UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b00110, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x1>;
defm UMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlsll", 0b00110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x2>;
defm UMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlsll", 0b01110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x4>;
-defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b0110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x2>;
-defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b0110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x4>;
+defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b00110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x2>;
+defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b00110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x4>;
defm BMOPA_MPPZZ_S : sme2_int_bmopx_tile<"bmopa", 0b100, int_aarch64_sme_bmopa_za32>;
defm BMOPS_MPPZZ_S : sme2_int_bmopx_tile<"bmops", 0b101, int_aarch64_sme_bmops_za32>;
@@ -572,19 +541,19 @@ defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001, int_aarch64_sme_smops
defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100, int_aarch64_sme_umopa_za32>;
defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops_za32>;
-def ZERO_T : sme2_zero_zt<"zero", 0b0001>;
+defm ZERO_T : sme2_zero_zt<"zero", 0b0001>;
-def LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100>;
-def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>;
+defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>;
+defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>;
def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>;
def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>;
-defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2">;
+defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2", int_aarch64_sme_luti2_lane_zt>;
defm LUTI2_2ZTZI : sme2_luti2_vector_vg2_index<"luti2">;
defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">;
-defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4">;
+defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4", int_aarch64_sme_luti4_lane_zt>;
defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">;
defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">;
@@ -707,13 +676,13 @@ defm STNT1D_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b1
let Predicates = [HasSME2, HasSMEI16I64] in {
defm ADD_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b1011010, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x2>;
defm ADD_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b1111010, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x4>;
-defm ADD_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b111010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x2>;
-defm ADD_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b111010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x4>;
+defm ADD_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b1110010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x2>;
+defm ADD_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b1110010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x4>;
defm SUB_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b1011011, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x2>;
defm SUB_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b1111011, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x4>;
-defm SUB_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b111011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x2>;
-defm SUB_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b111011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x4>;
+defm SUB_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b1110011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x2>;
+defm SUB_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b1110011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x4>;
defm ADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"add", 0b1010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x2>;
defm ADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"add", 0b1010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x4>;
@@ -725,8 +694,8 @@ defm SDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"sdot", 0b01, ZZ_h
defm SDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"sdot", 0b001, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za64_vg1x4>;
defm SDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x2>;
defm SDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x4>;
-defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x2>;
-defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x4>;
+defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x2>;
+defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x4>;
defm SVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"svdot", 0b101, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za64_vg1x4>;
@@ -734,46 +703,46 @@ defm UDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"udot", 0b11, ZZ_h
defm UDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"udot", 0b011, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za64_vg1x4>;
defm UDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x2>;
defm UDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x4>;
-defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x2>;
-defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x4>;
+defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x2>;
+defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x4>;
defm UVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"uvdot", 0b111, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za64_vg1x4>;
defm SMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x1>;
defm SMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x2>;
defm SMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x4>;
-defm SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b1000, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x1>;
+defm SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b10000, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x1>;
defm SMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlall", 0b10000, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x2>;
defm SMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlall", 0b11000, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x4>;
-defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b1000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x2>;
-defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b1000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x4>;
+defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b10000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x2>;
+defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b10000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x4>;
defm SMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x1>;
defm SMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x2>;
defm SMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x4>;
-defm SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b1010, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x1>;
+defm SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b10010, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x1>;
defm SMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlsll", 0b10010, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x2>;
defm SMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlsll", 0b11010, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x4>;
-defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b1010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x2>;
-defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b1010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x4>;
+defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b10010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x2>;
+defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b10010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x4>;
defm UMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x1>;
defm UMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x2>;
defm UMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x4>;
-defm UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b1100, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x1>;
+defm UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b10100, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x1>;
defm UMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlall", 0b10100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x2>;
defm UMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlall", 0b11100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x4>;
-defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b1100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x2>;
-defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b1100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x4>;
+defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b10100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x2>;
+defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b10100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x4>;
defm UMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x1>;
defm UMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x2>;
defm UMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x4>;
-defm UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b1110, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x1>;
+defm UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b10110, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x1>;
defm UMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlsll", 0b10110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x2>;
defm UMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlsll", 0b11110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x4>;
-defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b1110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x2>;
-defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b1110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x4>;
+defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b10110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x2>;
+defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b10110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x4>;
}
let Predicates = [HasSME2, HasSMEF64F64] in {
@@ -781,15 +750,15 @@ defm FMLA_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmla", 0b00, ZZ_d_mu
defm FMLA_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmla", 0b000, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_lane_vg1x4>;
defm FMLA_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b1011000, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x2>;
defm FMLA_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b1111000, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x4>;
-defm FMLA_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b111000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x2>;
-defm FMLA_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b111000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x4>;
+defm FMLA_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b1110000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x2>;
+defm FMLA_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b1110000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x4>;
defm FMLS_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmls", 0b10, ZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x2>;
defm FMLS_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmls", 0b010, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x4>;
defm FMLS_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b1011001, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x2>;
defm FMLS_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b1111001, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x4>;
-defm FMLS_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b111001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x2>;
-defm FMLS_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b111001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x4>;
+defm FMLS_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b1110001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x2>;
+defm FMLS_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b1110001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x4>;
defm FADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fadd", 0b1000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x2>;
defm FADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fadd", 0b1000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x4>;
@@ -820,25 +789,25 @@ defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16
defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>;
defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>;
-defm FMLA_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmla", 0b00>;
-defm FMLA_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmla", 0b00>;
+defm FMLA_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmla", 0b00, 0b100, ZZ_h_mul_r, ZPR4b16>;
+defm FMLA_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmla", 0b000, ZZZZ_h_mul_r, ZPR4b16>;
defm FMLA_VG2_M2ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmla", 0b0011100, MatrixOp16, ZZ_h, ZPR4b16>;
defm FMLA_VG4_M4ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmla", 0b0111100, MatrixOp16, ZZZZ_h, ZPR4b16>;
-defm FMLA_VG2_M2Z4Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b010001, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>;
-defm FMLA_VG4_M4Z4Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b010001, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>;
+defm FMLA_VG2_M2Z4Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0100001, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>;
+defm FMLA_VG4_M4Z4Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0100001, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>;
-defm FMLS_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmls", 0b01>;
-defm FMLS_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmls", 0b01>;
+defm FMLS_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmls", 0b00, 0b101, ZZ_h_mul_r, ZPR4b16>;
+defm FMLS_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmls", 0b001, ZZZZ_h_mul_r, ZPR4b16>;
defm FMLS_VG2_M2ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmls", 0b0011101, MatrixOp16, ZZ_h, ZPR4b16>;
defm FMLS_VG4_M4ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmls", 0b0111101, MatrixOp16, ZZZZ_h, ZPR4b16>;
-defm FMLS_VG2_M2Z2Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b010011, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>;
-defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b010011, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>;
+defm FMLS_VG2_M2Z2Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0100011, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>;
+defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>;
defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>;
defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>;
-defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0>;
-defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1>;
+defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>;
+defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>;
}
let Predicates = [HasSME2p1, HasB16B16] in {
@@ -847,19 +816,19 @@ defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp
defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
-defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b10>;
-defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b10>;
+defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b00, 0b110, ZZ_h_mul_r, ZPR4b16>;
+defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b010, ZZZZ_h_mul_r, ZPR4b16>;
defm BFMLA_VG2_M2ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmla", 0b1011100, MatrixOp16, ZZ_h, ZPR4b16>;
defm BFMLA_VG4_M4ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmla", 0b1111100, MatrixOp16, ZZZZ_h, ZPR4b16>;
-defm BFMLA_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmla", 0b110001, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
-defm BFMLA_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmla", 0b110001, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFMLA_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmla", 0b1100001, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFMLA_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmla", 0b1100001, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
-defm BFMLS_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmls", 0b11>;
-defm BFMLS_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmls", 0b11>;
+defm BFMLS_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmls", 0b00, 0b111, ZZ_h_mul_r, ZPR4b16>;
+defm BFMLS_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmls", 0b011, ZZZZ_h_mul_r, ZPR4b16>;
defm BFMLS_VG2_M2ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmls", 0b1011101, MatrixOp16, ZZ_h, ZPR4b16>;
defm BFMLS_VG4_M4ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmls", 0b1111101, MatrixOp16, ZZZZ_h, ZPR4b16>;
-defm BFMLS_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmls", 0b110011, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
-defm BFMLS_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmls", 0b110011, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFMLS_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmls", 0b1100011, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFMLS_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmls", 0b1100011, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
defm BFMAX_VG2_2ZZ : sme2p1_bf_max_min_vector_vg2_single<"bfmax", 0b0010000>;
@@ -885,6 +854,98 @@ defm BFMINNM_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfminnm", 0b0010011
defm BFCLAMP_VG2_2ZZZ: sme2p1_bfclamp_vector_vg2_multi<"bfclamp">;
defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">;
-defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0>;
-defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1>;
+defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, ZPR16>;
+defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, ZPR16>;
}
+
+let Predicates = [HasSME2, HasFP8] in {
+defm F1CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f1cvt", 0b00, 0b0>;
+defm F1CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f1cvtl", 0b00, 0b1>;
+defm BF1CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvt", 0b01, 0b0>;
+defm BF1CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvtl", 0b01, 0b1>;
+defm F2CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f2cvt", 0b10, 0b0>;
+defm F2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f2cvtl", 0b10, 0b1>;
+defm BF2CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvt", 0b11, 0b0>;
+defm BF2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvtl", 0b11, 0b1>;
+
+defm FCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"fcvt", 0b0>;
+defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt", 0b1>;
+defm FCVT_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvt", 0b0>;
+defm FCVTN_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvtn", 0b1>;
+
+defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b0011000>;
+defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>;
+defm FSCALE_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fscale", 0b0011000>;
+defm FSCALE_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fscale", 0b0011000>;
+
+} // [HasSME2, HasFP8]
+
+let Predicates = [HasSME2, HasFAMINMAX] in {
+defm FAMAX_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famax", 0b0010100>;
+defm FAMIN_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famin", 0b0010101>;
+
+defm FAMAX_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famax", 0b0010100>;
+defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
+} //[HasSME2, HasFAMINMAX]
+
+let Predicates = [HasSME2, HasSME_LUTv2] in {
+defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>;
+def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
+} //[HasSME2, HasSME_LUTv2]
+
+let Predicates = [HasSME2p1, HasSME_LUTv2] in {
+def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;
+} //[HasSME2p1, HasSME_LUTv2]
+
+let Predicates = [HasSMEF8F16] in {
+defm FVDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_16b<"fvdot", 0b11, 0b110, ZZ_b_mul_r, ZPR4b8>;
+defm FDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_16b<"fdot", 0b11, 0b010, ZZ_b_mul_r, ZPR4b8>;
+defm FDOT_VG4_M4ZZI_BtoH : sme2p1_multi_vec_array_vg4_index_16b<"fdot", 0b100, ZZZZ_b_mul_r, ZPR4b8>;
+defm FDOT_VG2_M2ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010001, MatrixOp16, ZZ_b, ZPR4b8>;
+defm FDOT_VG4_M4ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110001, MatrixOp16, ZZZZ_b, ZPR4b8>;
+// TODO: Replace nxv16i8 by nxv16f8
+defm FDOT_VG2_M2Z2Z_BtoH : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
+defm FDOT_VG4_M4Z4Z_BtoH : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;
+
+def FMLAL_MZZI_BtoH : sme2_mla_ll_array_index_16b<"fmlal", 0b11, 0b00>;
+defm FMLAL_VG2_M2ZZI_BtoH : sme2_multi_vec_array_vg2_index_16b<"fmlal", 0b10, 0b111>;
+defm FMLAL_VG4_M4ZZI_BtoH : sme2_multi_vec_array_vg4_index_16b<"fmlal", 0b10, 0b110>;
+def FMLAL_VG2_MZZ_BtoH : sme2_mla_long_array_single_16b<"fmlal">;
+// TODO: Replace nxv16i8 by nxv16f8
+defm FMLAL_VG2_M2ZZ_BtoH : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, null_frag>;
+defm FMLAL_VG4_M4ZZ_BtoH : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, MatrixOp16, ZZZZ_b, ZPR4b8, nxv16i8, null_frag>;
+defm FMLAL_VG2_M2Z2Z_BtoH : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
+defm FMLAL_VG4_M4Z4Z_BtoH : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;
+
+defm FMOPA_MPPZZ_BtoH : sme2p1_fmop_tile_fp16<"fmopa", 0b1, 0b0, 0b01, ZPR8>;
+
+} //[HasSMEF8F16]
+
+let Predicates = [HasSMEF8F32] in {
+// TODO : Replace nxv16i8 by nxv16f8
+defm FDOT_VG2_M2ZZI_BtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b0111, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
+defm FDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b0001, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
+defm FDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010011, MatrixOp32, ZZ_b, ZPR4b8>;
+defm FDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110011, MatrixOp32, ZZZZ_b, ZPR4b8>;
+// TODO : Replace nxv16i8 by nxv16f8
+defm FDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100110, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
+defm FDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
+
+def FVDOTB_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdotb", 0b0>;
+def FVDOTT_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdott", 0b1>;
+
+defm FMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"fmlall", 0b01, 0b000, null_frag>;
+defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, null_frag>;
+defm FMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"fmlall", 0b00, 0b1000, null_frag>;
+// TODO: Replace nxv16i8 by nxv16f8
+defm FMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"fmlall", 0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, null_frag>;
+defm FMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"fmlall", 0b000001, MatrixOp32, ZZ_b, ZPR4b8>;
+defm FMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"fmlall", 0b010001, MatrixOp32, ZZZZ_b, ZPR4b8>;
+defm FMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"fmlall", 0b01000, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
+defm FMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"fmlall", 0b01000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
+
+
+defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_frag>;
+
+} //[HasSMEF8F32]
+
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index b4f02e0dd203..50527e08a061 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -100,6 +100,8 @@ def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_Z
def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+// Gather vector base + scalar offset
+def AArch64ld1q_gather_z: SDNode<"AArch64ISD::GLD1Q_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
// Contiguous stores - node definitions
//
@@ -132,6 +134,9 @@ def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM_PRED",
def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
+// Scatter vector base + scalar offset
+def AArch64st1q_scatter : SDNode<"AArch64ISD::SST1Q_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
+
// AArch64 SVE/SVE2 - the remaining node definitions
//
@@ -1157,7 +1162,7 @@ let Predicates = [HasSVE] in {
defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather_z, nxv2i64>;
defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_z, nxv2i64>;
let Predicates = [HasSVE2p1] in {
- defm GLD1Q : sve_mem_128b_gld_64_unscaled<"ld1q">;
+ defm GLD1Q : sve_mem_128b_gld_64_unscaled<"ld1q", AArch64ld1q_gather_z>;
}
// Gathers using scaled 64-bit offsets, e.g.
@@ -1338,6 +1343,49 @@ let Predicates = [HasSVEorSME] in {
let Predicates = [HasSVE2p1] in {
defm ST1D_Q : sve_mem_cst_ss<0b1110, "st1d", Z_q, ZPR128, GPR64NoXZRshifted64>;
}
+
+ multiclass sve_ld1q_pat<ValueType Ty, ValueType PredTy, SDPatternOperator Load1qOp, Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> {
+ let AddedComplexity = 2 in {
+ def _reg_imm : Pat<(Ty (Load1qOp (PredTy PPR3bAny:$Pg), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$imm))),
+ (RegImmInst PPR3bAny:$Pg, GPR64sp:$base, simm4s1:$imm)>;
+ }
+
+ let AddedComplexity = 1 in {
+ def _reg_reg : Pat<(Ty (Load1qOp (PredTy PPR3bAny:$Pg), (AddrCP GPR64sp:$base, GPR64:$offset))),
+ (RegRegInst PPR3bAny:$Pg, GPR64sp:$base, GPR64:$offset)>;
+ }
+
+ def _default : Pat<(Ty (Load1qOp (PredTy PPR3bAny:$Pg), (i64 GPR64sp:$base))),
+ (RegImmInst PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>;
+ }
+
+ multiclass sve_st1q_pat<ValueType DataType, ValueType PredTy, SDPatternOperator Store1qOp, Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> {
+ let AddedComplexity = 2 in {
+ def _reg_imm : Pat<(Store1qOp (DataType ZPR128:$Zt), (PredTy PPR3bAny:$Pg), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$imm)),
+ (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, simm4s1:$imm)>;
+ }
+
+ let AddedComplexity = 1 in {
+ def _reg_reg : Pat<(Store1qOp (DataType ZPR128:$Zt), (PredTy PPR3bAny:$Pg), (AddrCP GPR64sp:$base, GPR64:$offset)),
+ (RegRegInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, GPR64:$offset)>;
+ }
+
+ def _default : Pat<(Store1qOp (DataType ZPR128:$Zt), (PredTy PPR3bAny:$Pg), (i64 GPR64sp:$base)),
+ (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>;
+ }
+
+ // ld1quw/st1quw
+ defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
+ defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
+ defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+ defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+
+ // ld1qud/st1qud
+ defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
+ defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
+ defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+ defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+
} // End HasSVEorSME
let Predicates = [HasSVE] in {
@@ -1385,7 +1433,7 @@ let Predicates = [HasSVE] in {
defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>;
defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>;
let Predicates = [HasSVE2p1] in {
- defm SST1Q : sve_mem_sst_128b_64_unscaled<"st1q">;
+ defm SST1Q : sve_mem_sst_128b_64_unscaled<"st1q", AArch64st1q_scatter>;
}
// Scatters using scaled 64-bit offsets, e.g.
@@ -1964,6 +2012,11 @@ let Predicates = [HasSVEorSME] in {
defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>;
defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>;
defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>;
+
+ def : Pat<(i64 (AArch64CttzElts nxv16i1:$Op1)),
+ (i64 (!cast<Instruction>(CNTP_XPP_B)
+ (nxv16i1 (!cast<Instruction>(BRKB_PPzP) (PTRUE_B 31), nxv16i1:$Op1)),
+ (nxv16i1 (!cast<Instruction>(BRKB_PPzP) (PTRUE_B 31), nxv16i1:$Op1))))>;
}
defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>;
@@ -2049,6 +2102,17 @@ let Predicates = [HasSVEorSME] in {
defm INCP_ZP : sve_int_count_v<0b10000, "incp">;
defm DECP_ZP : sve_int_count_v<0b10100, "decp">;
+ def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv16i1:$Op2)))),
+ (i64 (!cast<Instruction>(INCP_XP_B)
+ (nxv16i1 (!cast<Instruction>(BRKB_PPzP) (PTRUE_B 31), nxv16i1:$Op2)),
+ GPR64:$Op1))>;
+
+ def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv16i1:$Op2))))),
+ (i32 (EXTRACT_SUBREG (i64 (!cast<Instruction>(INCP_XP_B)
+ (nxv16i1 (!cast<Instruction>(BRKB_PPzP) (PTRUE_B 31), nxv16i1:$Op2)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Op1, sub_32))),
+ sub_32))>;
+
defm INDEX_RR : sve_int_index_rr<"index", AArch64mul_p_oneuse>;
defm INDEX_IR : sve_int_index_ir<"index", AArch64mul_p, AArch64mul_p_oneuse>;
defm INDEX_RI : sve_int_index_ri<"index">;
@@ -2204,8 +2268,8 @@ let Predicates = [HasSVEorSME] in {
} // End HasSVEorSME
let Predicates = [HasBF16, HasSVEorSME] in {
- defm BFDOT_ZZZ : sve_float_dot<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>;
- defm BFDOT_ZZI : sve_float_dot_indexed<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>;
+ defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>;
+ defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>;
} // End HasBF16, HasSVEorSME
let Predicates = [HasBF16, HasSVE] in {
@@ -2311,14 +2375,14 @@ let Predicates = [HasSVEorSME] in {
// These get expanded to individual LDR_ZXI/STR_ZXI instructions in
// AArch64ExpandPseudoInsts.
let mayLoad = 1, hasSideEffects = 0 in {
- def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
- def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
let mayStore = 1, hasSideEffects = 0 in {
- def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
- def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
let AddedComplexity = 1 in {
@@ -2407,14 +2471,6 @@ let Predicates = [HasSVEorSME] in {
}
let AddedComplexity = 5 in {
- def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
- (ADDVL_XXI GPR64:$op, $imm)>;
-
- def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))),
- (i32 (EXTRACT_SUBREG (ADDVL_XXI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- GPR32:$op, sub_32), $imm),
- sub_32))>;
-
def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
(INCH_ZPiI ZPR:$op, 31, $imm)>;
def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (splat_vector (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
@@ -2431,6 +2487,14 @@ let Predicates = [HasSVEorSME] in {
}
let Predicates = [HasSVEorSME, UseScalarIncVL], AddedComplexity = 5 in {
+ def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
+ (ADDVL_XXI GPR64:$op, $imm)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (ADDVL_XXI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), $imm),
+ sub_32))>;
+
def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))),
(INCH_XPiI GPR64:$op, 31, $imm)>;
def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))),
@@ -2472,12 +2536,9 @@ let Predicates = [HasSVEorSME] in {
sub_32))>;
}
- def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
- (ADDVL_XXI GPR64:$op, $imm)>;
-
// FIXME: BigEndian requires an additional REV instruction to satisfy the
// constraint that none of the bits change when stored to memory as one
- // type, and and reloaded as another type.
+ // type, and reloaded as another type.
let Predicates = [IsLE] in {
def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
@@ -2544,8 +2605,8 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(nxv4f32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
- def : Pat<(nxv16i1 (bitconvert (aarch64svcount PPR:$src))), (nxv16i1 PPR:$src)>;
- def : Pat<(aarch64svcount (bitconvert (nxv16i1 PPR:$src))), (aarch64svcount PPR:$src)>;
+ def : Pat<(nxv16i1 (bitconvert (aarch64svcount PNR:$src))), (nxv16i1 PPR:$src)>;
+ def : Pat<(aarch64svcount (bitconvert (nxv16i1 PPR:$src))), (aarch64svcount PNR:$src)>;
}
// These allow casting from/to unpacked predicate types.
@@ -3524,7 +3585,7 @@ let Predicates = [HasSVE2orSME] in {
defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb", int_aarch64_sve_sqshrunb>;
defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb", int_aarch64_sve_sqrshrunb>;
defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb", int_aarch64_sve_shrnb>;
- defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb", int_aarch64_sve_rshrnb>;
+ defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb", AArch64rshrnb_pf>;
defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb", int_aarch64_sve_sqshrnb>;
defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb", int_aarch64_sve_sqrshrnb>;
defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb", int_aarch64_sve_uqshrnb>;
@@ -3753,12 +3814,14 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
let Predicates = [HasSVE2p1_or_HasSME2] in {
defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>;
-defm FDOT_ZZZ_S : sve_float_dot<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>;
-defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>;
-def BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb">;
-def BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt">;
-def BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb">;
-def BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt">;
+
+defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>;
+defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>;
+
+defm BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb>;
+defm BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt>;
+defm BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb_lane>;
+defm BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt_lane>;
defm SDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>;
defm UDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>;
@@ -3778,40 +3841,40 @@ defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aa
defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>;
// Load to two registers
-def LD1B_2Z : sve2p1_mem_cld_ss_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8>;
-def LD1H_2Z : sve2p1_mem_cld_ss_2z<"ld1h", 0b01, 0b0, ZZ_h_mul_r, GPR64shifted16>;
-def LD1W_2Z : sve2p1_mem_cld_ss_2z<"ld1w", 0b10, 0b0, ZZ_s_mul_r, GPR64shifted32>;
-def LD1D_2Z : sve2p1_mem_cld_ss_2z<"ld1d", 0b11, 0b0, ZZ_d_mul_r, GPR64shifted64>;
-defm LD1B_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r>;
-defm LD1H_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1h", 0b01, 0b0, ZZ_h_mul_r>;
-defm LD1W_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1w", 0b10, 0b0, ZZ_s_mul_r>;
-defm LD1D_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1d", 0b11, 0b0, ZZ_d_mul_r>;
-def LDNT1B_2Z : sve2p1_mem_cld_ss_2z<"ldnt1b", 0b00, 0b1, ZZ_b_mul_r, GPR64shifted8>;
-def LDNT1H_2Z : sve2p1_mem_cld_ss_2z<"ldnt1h", 0b01, 0b1, ZZ_h_mul_r, GPR64shifted16>;
-def LDNT1W_2Z : sve2p1_mem_cld_ss_2z<"ldnt1w", 0b10, 0b1, ZZ_s_mul_r, GPR64shifted32>;
-def LDNT1D_2Z : sve2p1_mem_cld_ss_2z<"ldnt1d", 0b11, 0b1, ZZ_d_mul_r, GPR64shifted64>;
-defm LDNT1B_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1b", 0b00, 0b1, ZZ_b_mul_r>;
-defm LDNT1H_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1h", 0b01, 0b1, ZZ_h_mul_r>;
-defm LDNT1W_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1w", 0b10, 0b1, ZZ_s_mul_r>;
-defm LDNT1D_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1d", 0b11, 0b1, ZZ_d_mul_r>;
+defm LD1B_2Z : sve2p1_mem_cld_ss_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8, ZZ_b_strided_and_contiguous>;
+defm LD1H_2Z : sve2p1_mem_cld_ss_2z<"ld1h", 0b01, 0b0, ZZ_h_mul_r, GPR64shifted16, ZZ_h_strided_and_contiguous>;
+defm LD1W_2Z : sve2p1_mem_cld_ss_2z<"ld1w", 0b10, 0b0, ZZ_s_mul_r, GPR64shifted32, ZZ_s_strided_and_contiguous>;
+defm LD1D_2Z : sve2p1_mem_cld_ss_2z<"ld1d", 0b11, 0b0, ZZ_d_mul_r, GPR64shifted64, ZZ_d_strided_and_contiguous>;
+defm LD1B_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r, ZZ_b_strided_and_contiguous>;
+defm LD1H_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1h", 0b01, 0b0, ZZ_h_mul_r, ZZ_h_strided_and_contiguous>;
+defm LD1W_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1w", 0b10, 0b0, ZZ_s_mul_r, ZZ_s_strided_and_contiguous>;
+defm LD1D_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1d", 0b11, 0b0, ZZ_d_mul_r, ZZ_d_strided_and_contiguous>;
+defm LDNT1B_2Z : sve2p1_mem_cld_ss_2z<"ldnt1b", 0b00, 0b1, ZZ_b_mul_r, GPR64shifted8, ZZ_b_strided_and_contiguous>;
+defm LDNT1H_2Z : sve2p1_mem_cld_ss_2z<"ldnt1h", 0b01, 0b1, ZZ_h_mul_r, GPR64shifted16, ZZ_h_strided_and_contiguous>;
+defm LDNT1W_2Z : sve2p1_mem_cld_ss_2z<"ldnt1w", 0b10, 0b1, ZZ_s_mul_r, GPR64shifted32, ZZ_s_strided_and_contiguous>;
+defm LDNT1D_2Z : sve2p1_mem_cld_ss_2z<"ldnt1d", 0b11, 0b1, ZZ_d_mul_r, GPR64shifted64, ZZ_d_strided_and_contiguous>;
+defm LDNT1B_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1b", 0b00, 0b1, ZZ_b_mul_r, ZZ_b_strided_and_contiguous>;
+defm LDNT1H_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1h", 0b01, 0b1, ZZ_h_mul_r, ZZ_h_strided_and_contiguous>;
+defm LDNT1W_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1w", 0b10, 0b1, ZZ_s_mul_r, ZZ_s_strided_and_contiguous>;
+defm LDNT1D_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1d", 0b11, 0b1, ZZ_d_mul_r, ZZ_d_strided_and_contiguous>;
// Load to four registers
-def LD1B_4Z : sve2p1_mem_cld_ss_4z<"ld1b", 0b00, 0b0, ZZZZ_b_mul_r, GPR64shifted8>;
-def LD1H_4Z : sve2p1_mem_cld_ss_4z<"ld1h", 0b01, 0b0, ZZZZ_h_mul_r, GPR64shifted16>;
-def LD1W_4Z : sve2p1_mem_cld_ss_4z<"ld1w", 0b10, 0b0, ZZZZ_s_mul_r, GPR64shifted32>;
-def LD1D_4Z : sve2p1_mem_cld_ss_4z<"ld1d", 0b11, 0b0, ZZZZ_d_mul_r, GPR64shifted64>;
-defm LD1B_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1b", 0b00, 0b0, ZZZZ_b_mul_r>;
-defm LD1H_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1h", 0b01, 0b0, ZZZZ_h_mul_r>;
-defm LD1W_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1w", 0b10, 0b0, ZZZZ_s_mul_r>;
-defm LD1D_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1d", 0b11, 0b0, ZZZZ_d_mul_r>;
-def LDNT1B_4Z : sve2p1_mem_cld_ss_4z<"ldnt1b", 0b00, 0b1, ZZZZ_b_mul_r, GPR64shifted8>;
-def LDNT1H_4Z : sve2p1_mem_cld_ss_4z<"ldnt1h", 0b01, 0b1, ZZZZ_h_mul_r, GPR64shifted16>;
-def LDNT1W_4Z : sve2p1_mem_cld_ss_4z<"ldnt1w", 0b10, 0b1, ZZZZ_s_mul_r, GPR64shifted32>;
-def LDNT1D_4Z : sve2p1_mem_cld_ss_4z<"ldnt1d", 0b11, 0b1, ZZZZ_d_mul_r, GPR64shifted64>;
-defm LDNT1B_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1b", 0b00, 0b1, ZZZZ_b_mul_r>;
-defm LDNT1H_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1h", 0b01, 0b1, ZZZZ_h_mul_r>;
-defm LDNT1W_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1w", 0b10, 0b1, ZZZZ_s_mul_r>;
-defm LDNT1D_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1d", 0b11, 0b1, ZZZZ_d_mul_r>;
+defm LD1B_4Z : sve2p1_mem_cld_ss_4z<"ld1b", 0b00, 0b0, ZZZZ_b_mul_r, GPR64shifted8, ZZZZ_b_strided_and_contiguous>;
+defm LD1H_4Z : sve2p1_mem_cld_ss_4z<"ld1h", 0b01, 0b0, ZZZZ_h_mul_r, GPR64shifted16, ZZZZ_h_strided_and_contiguous>;
+defm LD1W_4Z : sve2p1_mem_cld_ss_4z<"ld1w", 0b10, 0b0, ZZZZ_s_mul_r, GPR64shifted32, ZZZZ_s_strided_and_contiguous>;
+defm LD1D_4Z : sve2p1_mem_cld_ss_4z<"ld1d", 0b11, 0b0, ZZZZ_d_mul_r, GPR64shifted64, ZZZZ_d_strided_and_contiguous>;
+defm LD1B_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1b", 0b00, 0b0, ZZZZ_b_mul_r, ZZZZ_b_strided_and_contiguous>;
+defm LD1H_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1h", 0b01, 0b0, ZZZZ_h_mul_r, ZZZZ_h_strided_and_contiguous>;
+defm LD1W_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1w", 0b10, 0b0, ZZZZ_s_mul_r, ZZZZ_s_strided_and_contiguous>;
+defm LD1D_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1d", 0b11, 0b0, ZZZZ_d_mul_r, ZZZZ_d_strided_and_contiguous>;
+defm LDNT1B_4Z : sve2p1_mem_cld_ss_4z<"ldnt1b", 0b00, 0b1, ZZZZ_b_mul_r, GPR64shifted8, ZZZZ_b_strided_and_contiguous>;
+defm LDNT1H_4Z : sve2p1_mem_cld_ss_4z<"ldnt1h", 0b01, 0b1, ZZZZ_h_mul_r, GPR64shifted16, ZZZZ_h_strided_and_contiguous>;
+defm LDNT1W_4Z : sve2p1_mem_cld_ss_4z<"ldnt1w", 0b10, 0b1, ZZZZ_s_mul_r, GPR64shifted32, ZZZZ_s_strided_and_contiguous>;
+defm LDNT1D_4Z : sve2p1_mem_cld_ss_4z<"ldnt1d", 0b11, 0b1, ZZZZ_d_mul_r, GPR64shifted64, ZZZZ_d_strided_and_contiguous>;
+defm LDNT1B_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1b", 0b00, 0b1, ZZZZ_b_mul_r, ZZZZ_b_strided_and_contiguous>;
+defm LDNT1H_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1h", 0b01, 0b1, ZZZZ_h_mul_r, ZZZZ_h_strided_and_contiguous>;
+defm LDNT1W_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1w", 0b10, 0b1, ZZZZ_s_mul_r, ZZZZ_s_strided_and_contiguous>;
+defm LDNT1D_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1d", 0b11, 0b1, ZZZZ_d_mul_r, ZZZZ_d_strided_and_contiguous>;
// Stores of two registers
def ST1B_2Z : sve2p1_mem_cst_ss_2z<"st1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8>;
@@ -3852,9 +3915,9 @@ defm STNT1D_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1d", 0b11, 0b1, ZZZZ_d_mul_r>;
multiclass store_pn_x2<ValueType Ty, SDPatternOperator Store,
Instruction RegImmInst> {
def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1),
- (aarch64svcount PPR:$PNg), GPR64:$base),
+ (aarch64svcount PNR:$PNg), GPR64:$base),
(RegImmInst (REG_SEQUENCE ZPR2Mul2, Ty:$vec0, zsub0, Ty:$vec1, zsub1),
- PPR:$PNg, GPR64:$base, (i64 0))>;
+ PNR:$PNg, GPR64:$base, (i64 0))>;
}
// Stores of 2 consecutive vectors
@@ -3878,10 +3941,10 @@ defm : store_pn_x2<nxv2f64, int_aarch64_sve_stnt1_pn_x2, STNT1D_2Z_IMM>;
multiclass store_pn_x4<ValueType Ty, SDPatternOperator Store,
Instruction RegImmInst> {
def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1), (Ty ZPR:$vec2), (Ty ZPR:$vec3),
- (aarch64svcount PPR:$PNg), GPR64:$base),
+ (aarch64svcount PNR:$PNg), GPR64:$base),
(RegImmInst (REG_SEQUENCE ZPR4Mul4, Ty:$vec0, zsub0, Ty:$vec1, zsub1,
Ty:$vec2, zsub2, Ty:$vec3, zsub3),
- PPR:$PNg, GPR64:$base, (i64 0))>;
+ PNR:$PNg, GPR64:$base, (i64 0))>;
}
// Stores of 4 consecutive vectors
@@ -3923,19 +3986,19 @@ defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>;
// Aliases for existing SVE instructions for which predicate-as-counter are
// accepted as an operand to the instruction
def : InstAlias<"ldr $Pt, [$Rn, $imm9, mul vl]",
- (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
+ (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
def : InstAlias<"ldr $Pt, [$Rn]",
- (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>;
+ (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>;
def : InstAlias<"str $Pt, [$Rn, $imm9, mul vl]",
- (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
+ (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
def : InstAlias<"str $Pt, [$Rn]",
- (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>;
+ (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>;
def : InstAlias<"mov $Pd, $Pn",
- (ORR_PPzPP PNR8:$Pd, PNR8:$Pn, PNR8:$Pn, PNR8:$Pn), 0>;
+ (ORR_PPzPP PNRasPPR8:$Pd, PNRasPPR8:$Pn, PNRasPPR8:$Pn, PNRasPPR8:$Pn), 0>;
-def : InstAlias<"pfalse\t$Pd", (PFALSE PNR8:$Pd), 0>;
+def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>;
} // End HasSVE2p1_or_HasSME2
@@ -3943,28 +4006,56 @@ def : InstAlias<"pfalse\t$Pd", (PFALSE PNR8:$Pd), 0>;
// SVE2.1 non-widening BFloat16 to BFloat16 instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasSVE2p1_or_HasSME2p1, HasB16B16] in {
-def BFADD_ZZZ : sve_fp_3op_u_zd<0b00, 0b000, "bfadd", ZPR16>;
-def BFSUB_ZZZ : sve_fp_3op_u_zd<0b00, 0b001, "bfsub", ZPR16>;
-def BFMUL_ZZZ : sve_fp_3op_u_zd<0b00, 0b010, "bfmul", ZPR16>;
+let Predicates = [HasSVE2p1, HasB16B16, UseExperimentalZeroingPseudos] in {
+defm BFADD_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fadd>;
+defm BFSUB_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fsub>;
+defm BFMUL_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmul>;
+defm BFMAXNM_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmaxnm>;
+defm BFMINNM_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fminnm>;
+defm BFMIN_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmin>;
+defm BFMAX_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmax>;
+} //HasSVE2p1_or_HasSME2p1, HasB16B16, UseExperimentalZeroingPseudos
+
+let Predicates = [HasSVE2p1, HasB16B16] in {
+
+defm BFMLA_ZPmZZ : sve_fp_3op_p_zds_a_bf<0b00, "bfmla", "BFMLA_ZPZZZ", AArch64fmla_m1>;
+defm BFMLS_ZPmZZ : sve_fp_3op_p_zds_a_bf<0b01, "bfmls", "BFMLS_ZPZZZ", AArch64fmls_m1>;
+
+defm BFMLA_ZPZZZ : sve_fp_3op_pred_bf<AArch64fmla_p>;
+defm BFMLS_ZPZZZ : sve_fp_3op_pred_bf<AArch64fmls_p>;
-def BFMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, 0b00, "bfmla", ZPR16>;
-def BFMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b00, 0b01, "bfmls", ZPR16>;
+defm BFMLA_ZZZI : sve2p1_fp_bfma_by_indexed_elem<"bfmla", 0b10, int_aarch64_sve_fmla_lane>;
+defm BFMLS_ZZZI : sve2p1_fp_bfma_by_indexed_elem<"bfmls", 0b11, int_aarch64_sve_fmls_lane>;
-def BFADD_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0000, "bfadd", ZPR16>;
-def BFSUB_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0001, "bfsub", ZPR16>;
-def BFMUL_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0010, "bfmul", ZPR16>;
-def BFMAXNM_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0100, "bfmaxnm", ZPR16>;
-def BFMINNM_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0101, "bfminnm", ZPR16>;
-def BFMAX_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0110, "bfmax", ZPR16>;
-def BFMIN_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0111, "bfmin", ZPR16>;
+defm BFADD_ZPmZZ : sve2p1_bf_2op_p_zds<0b0000, "bfadd", "BFADD_ZPZZ", AArch64fadd_m1, DestructiveBinaryComm>;
+defm BFSUB_ZPmZZ : sve2p1_bf_2op_p_zds<0b0001, "bfsub", "BFSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryComm>;
+defm BFMUL_ZPmZZ : sve2p1_bf_2op_p_zds<0b0010, "bfmul", "BFMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>;
-defm BFMLA_ZZZI : sve2p1_fp_bfma_by_indexed_elem<"bfmla", 0b10>;
-defm BFMLS_ZZZI : sve2p1_fp_bfma_by_indexed_elem<"bfmls", 0b11>;
+defm BFADD_ZZZ : sve2p1_bf_3op_u_zd<0b000, "bfadd", fadd, AArch64fadd_p>;
+defm BFSUB_ZZZ : sve2p1_bf_3op_u_zd<0b001, "bfsub", fsub, AArch64fsub_p>;
+defm BFMUL_ZZZ : sve2p1_bf_3op_u_zd<0b010, "bfmul", fmul, AArch64fmul_p>;
-defm BFMUL_ZZZI : sve2p1_fp_bfmul_by_indexed_elem<"bfmul">;
+defm BFADD_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fadd_p>;
+defm BFSUB_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fsub_p>;
+defm BFMUL_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fmul_p>;
-def BFCLAMP_ZZZ : sve2p1_fclamp<"bfclamp", 0b00, ZPR16>;
+
+defm BFMAX_ZPmZZ : sve2p1_bf_2op_p_zds<0b0110, "bfmax", "BFMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>;
+defm BFMIN_ZPmZZ : sve2p1_bf_2op_p_zds<0b0111, "bfmin", "BFMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>;
+
+defm BFMAX_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fmax_p>;
+defm BFMIN_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fmin_p>;
+
+
+defm BFMAXNM_ZPmZZ : sve2p1_bf_2op_p_zds<0b0100, "bfmaxnm", "BFMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>;
+defm BFMINNM_ZPmZZ : sve2p1_bf_2op_p_zds<0b0101, "bfminnm", "BFMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>;
+
+defm BFMAXNM_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fmaxnm_p>;
+defm BFMINNM_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fminnm_p>;
+
+defm BFMUL_ZZZI : sve2p1_fp_bfmul_by_indexed_elem<"bfmul", int_aarch64_sve_fmul_lane>;
+
+defm BFCLAMP_ZZZ : sve2p1_bfclamp<"bfclamp", int_aarch64_sve_fclamp>;
} // End HasSVE2p1_or_HasSME2p1, HasB16B16
@@ -3972,31 +4063,103 @@ def BFCLAMP_ZZZ : sve2p1_fclamp<"bfclamp", 0b00, ZPR16>;
// SME2.1 or SVE2.1 instructions
//===----------------------------------------------------------------------===//
let Predicates = [HasSVE2p1_or_HasSME2p1] in {
-defm FADDQV : sve2p1_fp_reduction_q<0b000, "faddqv">;
-defm FMAXNMQV : sve2p1_fp_reduction_q<0b100, "fmaxnmqv">;
-defm FMINNMQV : sve2p1_fp_reduction_q<0b101, "fminnmqv">;
-defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv">;
-defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv">;
+defm FADDQV : sve2p1_fp_reduction_q<0b000, "faddqv", int_aarch64_sve_addqv>;
+defm FMAXNMQV : sve2p1_fp_reduction_q<0b100, "fmaxnmqv", int_aarch64_sve_fmaxnmqv>;
+defm FMINNMQV : sve2p1_fp_reduction_q<0b101, "fminnmqv", int_aarch64_sve_fminnmqv>;
+defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>;
+defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>;
defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
-def EXTQ_ZZI : sve2p1_extq<"extq">;
-
-defm PMOV_PZI : sve2p1_vector_to_pred<"pmov">;
-defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov">;
-
-defm ORQV_VPZ : sve2p1_int_reduce_q<0b1100, "orqv">;
-defm EORQV_VPZ : sve2p1_int_reduce_q<0b1101, "eorqv">;
-defm ANDQV_VPZ : sve2p1_int_reduce_q<0b1110, "andqv">;
-defm ADDQV_VPZ : sve2p1_int_reduce_q<0b0001, "addqv">;
-defm SMAXQV_VPZ : sve2p1_int_reduce_q<0b0100, "smaxqv">;
-defm UMAXQV_VPZ : sve2p1_int_reduce_q<0b0101, "umaxqv">;
-defm SMINQV_VPZ : sve2p1_int_reduce_q<0b0110, "sminqv">;
-defm UMINQV_VPZ : sve2p1_int_reduce_q<0b0111, "uminqv">;
-
-defm TBXQ_ZZZ : sve2_int_perm_tbx<"tbxq", 0b10, null_frag>;
-defm ZIPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b000, "zipq1">;
-defm ZIPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b001, "zipq2">;
-defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">;
-defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">;
-defm TBLQ_ZZZ : sve2p1_tblq<"tblq">;
+defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>;
+
+defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;
+defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>;
+
+defm ORQV_VPZ : sve2p1_int_reduce_q<0b1100, "orqv", int_aarch64_sve_orqv>;
+defm EORQV_VPZ : sve2p1_int_reduce_q<0b1101, "eorqv", int_aarch64_sve_eorqv>;
+defm ANDQV_VPZ : sve2p1_int_reduce_q<0b1110, "andqv", int_aarch64_sve_andqv>;
+defm ADDQV_VPZ : sve2p1_int_reduce_q<0b0001, "addqv", int_aarch64_sve_addqv>;
+defm SMAXQV_VPZ : sve2p1_int_reduce_q<0b0100, "smaxqv", int_aarch64_sve_smaxqv>;
+defm UMAXQV_VPZ : sve2p1_int_reduce_q<0b0101, "umaxqv", int_aarch64_sve_umaxqv>;
+defm SMINQV_VPZ : sve2p1_int_reduce_q<0b0110, "sminqv", int_aarch64_sve_sminqv>;
+defm UMINQV_VPZ : sve2p1_int_reduce_q<0b0111, "uminqv", int_aarch64_sve_uminqv>;
+
+defm ZIPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b000, "zipq1", int_aarch64_sve_zipq1>;
+defm ZIPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b001, "zipq2", int_aarch64_sve_zipq2>;
+defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1", int_aarch64_sve_uzpq1>;
+defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2", int_aarch64_sve_uzpq2>;
+defm TBXQ_ZZZ : sve2_int_perm_tbx<"tbxq", 0b10, int_aarch64_sve_tbxq>;
+defm TBLQ_ZZZ : sve2p1_tblq<"tblq", int_aarch64_sve_tblq>;
} // End HasSVE2p1_or_HasSME2p1
+
+//===----------------------------------------------------------------------===//
+// SVE2 FP8 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2orSME2, HasFP8] in {
+// FP8 upconvert
+defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt">;
+defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt">;
+defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt">;
+defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt">;
+defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt">;
+defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt">;
+defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt">;
+defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt">;
+
+// FP8 downconvert
+defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>;
+defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>;
+defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>;
+defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
+} // End HasSVE2orSME2, HasFP8
+
+let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
+// FP8 Arithmetic - Predicated Group
+defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "", null_frag, DestructiveOther>;
+defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "", null_frag, DestructiveOther>;
+} // End HasSVE2orSME2, HasFAMINMAX
+
+let Predicates = [HasSSVE_FP8FMA] in {
+// FP8 Widening Multiply-Add Long - Indexed Group
+def FMLALB_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b0, "fmlalb">;
+def FMLALT_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b1, "fmlalt">;
+// FP8 Widening Multiply-Add Long Group
+def FMLALB_ZZZ : sve2_fp8_mla<0b100, ZPR16, "fmlalb">;
+def FMLALT_ZZZ : sve2_fp8_mla<0b101, ZPR16, "fmlalt">;
+// FP8 Widening Multiply-Add Long Long - Indexed Group
+def FMLALLBB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b00, "fmlallbb">;
+def FMLALLBT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b01, "fmlallbt">;
+def FMLALLTB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b10, "fmlalltb">;
+def FMLALLTT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b11, "fmlalltt">;
+// FP8 Widening Multiply-Add Long Long Group
+def FMLALLBB_ZZZ : sve2_fp8_mla<0b000, ZPR32, "fmlallbb">;
+def FMLALLBT_ZZZ : sve2_fp8_mla<0b001, ZPR32, "fmlallbt">;
+def FMLALLTB_ZZZ : sve2_fp8_mla<0b010, ZPR32, "fmlalltb">;
+def FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt">;
+} // End HasSSVE_FP8FMA
+
+let Predicates = [HasSSVE_FP8DOT2] in {
+// FP8 Widening Dot-Product - Indexed Group
+defm FDOT_ZZZI_BtoH : sve2_fp8_dot_indexed<"fdot">;
+// FP8 Widening Dot-Product - Group
+// TODO: Replace nxv16i8 by nxv16f8
+defm FDOT_ZZZ_BtoH : sve_float_dot<0b0, 0b1, ZPR16, ZPR8, "fdot", nxv16i8, null_frag>;
+}
+
+// TODO: Replace nxv16i8 by nxv16f8
+let Predicates = [HasSSVE_FP8DOT4] in {
+// FP8 Widening Dot-Product - Indexed Group
+defm FDOT_ZZZI_BtoS : sve_float_dot_indexed<0b1, 0b01, ZPR8, ZPR3b8, "fdot",
+ nxv16i8, null_frag>;
+// FP8 Widening Dot-Product - Group
+defm FDOT_ZZZ_BtoS : sve_float_dot<0b1, 0b1, ZPR32, ZPR8, "fdot", nxv16i8, null_frag>;
+}
+
+let Predicates = [HasSVE2orSME2, HasLUT] in {
+// LUTI2
+ defm LUTI2_ZZZI : sve2_luti2_vector_index<"luti2">;
+// LUTI4
+ defm LUTI4_ZZZI : sve2_luti4_vector_index<"luti4">;
+// LUTI4 (two contiguous registers)
+ defm LUTI4_Z2ZZI : sve2_luti4_vector_vg2_index<"luti4">;
+} // End HasSVE2orSME2, HasLUT
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA510.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 2526fe304190..1b66d6bb8fbd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -74,14 +74,14 @@ def : WriteRes<WriteIS, [CortexA510UnitALU]> { let Latency = 2; } // Shift/S
// MAC
def : WriteRes<WriteIM32, [CortexA510UnitMAC]> { let Latency = 3; } // 32-bit Multiply
-def : WriteRes<WriteIM64, [CortexA510UnitMAC]> { let Latency = 5; let ResourceCycles = [2];} // 64-bit Multiply
+def : WriteRes<WriteIM64, [CortexA510UnitMAC]> { let Latency = 5; let ReleaseAtCycles = [2];} // 64-bit Multiply
// Div
def : WriteRes<WriteID32, [CortexA510UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8];
+ let Latency = 8; let ReleaseAtCycles = [8];
}
def : WriteRes<WriteID64, [CortexA510UnitDiv]> {
- let Latency = 16; let ResourceCycles = [16];
+ let Latency = 16; let ReleaseAtCycles = [16];
}
//===----------------------------------------------------------------------===//
@@ -94,7 +94,7 @@ class CortexA510Write<int n, ProcResourceKind res> : SchedWriteRes<[res]> {
class CortexA510MCWrite<int n, int m, ProcResourceKind res> : SchedWriteRes<[res]> {
let Latency = n;
- let ResourceCycles = [m];
+ let ReleaseAtCycles = [m];
let BeginGroup = 1;
}
@@ -127,15 +127,15 @@ def : WriteRes<WriteLDHi, [CortexA510UnitLd]> { let Latency = 2; }
def CortexA510WriteVLD1 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 3; }
def CortexA510WriteVLD1SI : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 3; let SingleIssue = 1; }
def CortexA510WriteVLD2 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 4;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def CortexA510WriteVLD3 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 5;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def CortexA510WriteVLD4 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 6;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def CortexA510WriteVLD6 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 5;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def CortexA510WriteVLD8 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 6;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def CortexA510WriteLDP1 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 3; }
def CortexA510WriteLDP2 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 3; }
@@ -154,14 +154,14 @@ def : WriteRes<WriteSTX, [CortexA510UnitLdSt]> { let Latency = 3; }
// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
def : WriteRes<WriteVST, [CortexA510UnitLdSt]> { let Latency = 5;
- let ResourceCycles = [2];}
+ let ReleaseAtCycles = [2];}
def CortexA510WriteVST1 : SchedWriteRes<[CortexA510UnitLdSt]> { let Latency = 4; }
def CortexA510WriteVST2 : SchedWriteRes<[CortexA510UnitLdSt]> { let Latency = 5;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def CortexA510WriteVST3 : SchedWriteRes<[CortexA510UnitLdSt]> { let Latency = 5;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def CortexA510WriteVST4 : SchedWriteRes<[CortexA510UnitLdSt]> { let Latency = 5;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
@@ -183,7 +183,7 @@ def : WriteRes<WriteFImm, [CortexA510UnitVALU]> { let Latency = 3; }
class CortexA510VSt<int n> : SchedWriteRes<[CortexA510UnitLdSt]> {
let RetireOOO = 1;
- let ResourceCycles = [n];
+ let ReleaseAtCycles = [n];
}
def CortexA510VSt0 : SchedWriteRes<[CortexA510UnitLdSt]> {
@@ -202,20 +202,20 @@ def : WriteRes<WriteFMul, [CortexA510UnitVMAC]> { let Latency = 4; }
let RetireOOO = 1 in {
def : WriteRes<WriteFDiv, [CortexA510UnitVMC]> { let Latency = 22;
- let ResourceCycles = [29]; }
+ let ReleaseAtCycles = [29]; }
def CortexA510WriteVMAC : SchedWriteRes<[CortexA510UnitVMAC]> { let Latency = 4; }
def CortexA510WriteFDivHP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 8;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def CortexA510WriteFDivSP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 13;
- let ResourceCycles = [10]; }
+ let ReleaseAtCycles = [10]; }
def CortexA510WriteFDivDP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 22;
- let ResourceCycles = [19]; }
+ let ReleaseAtCycles = [19]; }
def CortexA510WriteFSqrtHP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 8;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def CortexA510WriteFSqrtSP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 12;
- let ResourceCycles = [9]; }
+ let ReleaseAtCycles = [9]; }
def CortexA510WriteFSqrtDP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 22;
- let ResourceCycles = [19]; }
+ let ReleaseAtCycles = [19]; }
}
//===----------------------------------------------------------------------===//
// Subtarget-specific SchedRead types.
@@ -295,16 +295,16 @@ def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD1], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
// 2-element structures
def : InstRW<[CortexA510WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
@@ -312,10 +312,10 @@ def : InstRW<[CortexA510WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$
def : InstRW<[CortexA510WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
def : InstRW<[CortexA510WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
-def : InstRW<[CortexA510WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
// 3-element structures
def : InstRW<[CortexA510WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
@@ -323,10 +323,10 @@ def : InstRW<[CortexA510WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$
def : InstRW<[CortexA510WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
def : InstRW<[CortexA510WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
// 4-element structures
def : InstRW<[CortexA510WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
@@ -334,10 +334,10 @@ def : InstRW<[CortexA510WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$
def : InstRW<[CortexA510WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
def : InstRW<[CortexA510WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA510WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
//---
// Vector Stores
@@ -347,28 +347,28 @@ def : InstRW<[CortexA510WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d
def : InstRW<[CortexA510WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[CortexA510WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[CortexA510WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[CortexA510WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
def : InstRW<[CortexA510WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
def : InstRW<[CortexA510WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
def : InstRW<[CortexA510WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
def : InstRW<[CortexA510WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
def : InstRW<[CortexA510WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
def : InstRW<[CortexA510WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA510WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
//---
// Floating Point Conversions, MAC, DIV, SQRT
@@ -1168,10 +1168,10 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instrs LDR_ZXI)>;
def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instrs LDR_PXI)>;
// Contiguous load, scalar + imm
-def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LD1[BHWD]_IMM_REAL$",
- "^LD1S?B_[HSD]_IMM_REAL$",
- "^LD1S?H_[SD]_IMM_REAL$",
- "^LD1S?W_D_IMM_REAL$" )>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LD1[BHWD]_IMM$",
+ "^LD1S?B_[HSD]_IMM$",
+ "^LD1S?H_[SD]_IMM$",
+ "^LD1S?W_D_IMM$" )>;
// Contiguous load, scalar + scalar
def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LD1[BHWD]$",
"^LD1S?B_[HSD]$",
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td
index e378b043d37e..3e4168f5f445 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td
@@ -83,16 +83,16 @@ def : WriteRes<WriteLDHi, [A53UnitLdSt]> { let Latency = 4; }
// May model this more carefully in the future. The remaining
// A53WriteVLD# types represent the 1-5 cycle issues explicitly.
def : WriteRes<WriteVLD, [A53UnitLdSt]> { let Latency = 6;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def A53WriteVLD1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; }
def A53WriteVLD2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def A53WriteVLD3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def A53WriteVLD4 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 7;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def A53WriteVLD5 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 8;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
// Pre/Post Indexing - Performed as part of address generation which is already
// accounted for in the WriteST* latencies below
@@ -106,12 +106,12 @@ def : WriteRes<WriteSTX, [A53UnitLdSt]> { let Latency = 4; }
// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
def : WriteRes<WriteVST, [A53UnitLdSt]> { let Latency = 5;
- let ResourceCycles = [2];}
+ let ReleaseAtCycles = [2];}
def A53WriteVST1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; }
def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
@@ -134,16 +134,16 @@ def : WriteRes<WriteVq, [A53UnitFPALU]> { let Latency = 6; }
// FP Mul, Div, Sqrt
def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
def : WriteRes<WriteFDiv, [A53UnitFPMDS]> { let Latency = 33;
- let ResourceCycles = [29]; }
+ let ReleaseAtCycles = [29]; }
def A53WriteFMAC : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 10; }
def A53WriteFDivSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 18;
- let ResourceCycles = [14]; }
+ let ReleaseAtCycles = [14]; }
def A53WriteFDivDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33;
- let ResourceCycles = [29]; }
+ let ReleaseAtCycles = [29]; }
def A53WriteFSqrtSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 17;
- let ResourceCycles = [13]; }
+ let ReleaseAtCycles = [13]; }
def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
- let ResourceCycles = [28]; }
+ let ReleaseAtCycles = [28]; }
//===----------------------------------------------------------------------===//
// Subtarget-specific SchedRead types.
@@ -215,39 +215,39 @@ def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
def : InstRW<[A53WriteVLD3], (instregex "LD3Threev2d$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev2d_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD3], (instregex "LD3Threev2d_POST$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD4], (instregex "LD4Fourv(2d)_POST$")>;
//---
// Vector Stores
@@ -257,32 +257,32 @@ def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>;
def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST3Threev(2d)_POST$")>;
def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST4Fourv(2d)_POST$")>;
//---
// Floating Point MAC, DIV, SQRT
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td
index 141cc6b79c8b..cb77be350d12 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -76,10 +76,10 @@ def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; } // 64-bit M
// Div
def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8];
+ let Latency = 8; let ReleaseAtCycles = [8];
}
def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8];
+ let Latency = 8; let ReleaseAtCycles = [8];
}
// Load
@@ -91,23 +91,23 @@ def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
// below, choosing the median of 3 which makes the latency 6.
// An extra cycle is needed to get the swizzling right.
def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
def CortexA55WriteVLD1SI : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; let SingleIssue = 1; }
def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
- let ResourceCycles = [6]; }
+ let ReleaseAtCycles = [6]; }
def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
- let ResourceCycles = [8]; }
+ let ReleaseAtCycles = [8]; }
def CortexA55WriteLDP1 : SchedWriteRes<[]> { let Latency = 4; }
def CortexA55WriteLDP2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; }
@@ -126,14 +126,14 @@ def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
- let ResourceCycles = [2];}
+ let ReleaseAtCycles = [2];}
def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
@@ -189,20 +189,20 @@ def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
let RetireOOO = 1 in {
def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
- let ResourceCycles = [29]; }
+ let ReleaseAtCycles = [29]; }
def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
- let ResourceCycles = [10]; }
+ let ReleaseAtCycles = [10]; }
def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
- let ResourceCycles = [19]; }
+ let ReleaseAtCycles = [19]; }
def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
- let ResourceCycles = [9]; }
+ let ReleaseAtCycles = [9]; }
def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
- let ResourceCycles = [19]; }
+ let ReleaseAtCycles = [19]; }
}
//===----------------------------------------------------------------------===//
// Subtarget-specific SchedRead types.
@@ -285,16 +285,16 @@ def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
// 2-element structures
def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
@@ -302,10 +302,10 @@ def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$"
def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
// 3-element structures
def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
@@ -313,10 +313,10 @@ def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$"
def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
// 4-element structures
def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
@@ -324,10 +324,10 @@ def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$"
def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
//---
// Vector Stores
@@ -337,28 +337,28 @@ def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)
def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
//---
// Floating Point Conversions, MAC, DIV, SQRT
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td
index 8ce229374000..277ec772cf0f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -183,165 +183,165 @@ def : InstRW<[A57Write_3cyc_1W], (instregex "^CRC32")>;
// -----------------------------------------------------------------------------
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>;
-def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)_POST$")>;
def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instregex "LD1i(64)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>;
-def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instregex "LD1Rv(1d)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>;
-def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)_POST$")>;
def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>;
-def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L], (instregex "LD2i(32)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>;
-def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_1V], (instregex "LD2i(64)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>;
-def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instregex "LD2Rv(1d)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>;
-def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>;
-def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L], (instregex "LD2Twov(2d)_POST$")>;
def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>;
-def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>;
-def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_2V], (instregex "LD3i(32)_POST$")>;
def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>;
-def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L], (instregex "LD3i(64)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>;
-def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>;
-def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L], (instregex "LD3Rv(1d)_POST$")>;
def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>;
-def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>;
-def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)_POST$")>;
def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>;
-def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>;
-def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_4L], (instregex "LD3Threev(2d)_POST$")>;
def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>;
-def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>;
-def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_2V], (instregex "LD4i(32)_POST$")>;
def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>;
-def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_2L_3V], (instregex "LD4i(64)_POST$")>;
def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>;
-def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>;
-def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L], (instregex "LD4Rv(1d)_POST$")>;
def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>;
-def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>;
-def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)_POST$")>;
def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>;
-def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>;
-def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_4L], (instregex "LD4Fourv(2d)_POST$")>;
// Vector Store
// -----------------------------------------------------------------------------
def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>;
-def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1S], (instregex "ST1i(8|16|32)_POST$")>;
def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>;
-def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_1S_1V], (instregex "ST1i(64)_POST$")>;
def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>;
-def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)_POST$")>;
def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>;
-def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_2cyc_2S], (instregex "ST2i(64)_POST$")>;
def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>;
-def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>;
-def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_4cyc_4S], (instregex "ST2Twov(2d)_POST$")>;
def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>;
-def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)_POST$")>;
def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>;
-def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_3S], (instregex "ST3i(32)_POST$")>;
def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>;
-def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_2S_1V], (instregex "ST3i(64)_POST$")>;
def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>;
-def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>;
-def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>;
-def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_6cyc_6S], (instregex "ST3Threev(2d)_POST$")>;
def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>;
-def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)_POST$")>;
def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>;
-def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_4cyc_4S], (instregex "ST4i(32)_POST$")>;
def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>;
-def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_3cyc_2S_1V], (instregex "ST4i(64)_POST$")>;
def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>;
-def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>;
-def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>;
-def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+def : InstRW<[WriteAdr, A57Write_8cyc_8S], (instregex "ST4Fourv(2d)_POST$")>;
// Vector - Integer
// -----------------------------------------------------------------------------
@@ -592,38 +592,38 @@ def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>;
def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>;
def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPSi)>;
def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPDi)>;
-def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpost)>;
-def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpre)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L, WriteLDHi], (instrs LDPDpost)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L, WriteLDHi], (instrs LDPDpre)>;
def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDPQi)>;
-def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpost)>;
-def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpre)>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L, WriteLDHi], (instrs LDPQpost)>;
+def : InstRW<[WriteAdr, A57Write_6cyc_2L, WriteLDHi], (instrs LDPQpre)>;
def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi], (instrs LDPSWi)>;
-def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
-def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1I_2L, WriteLDHi], (instrs LDPSWpost)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1I_2L, WriteLDHi], (instrs LDPSWpre)>;
def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPSi)>;
-def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpost)>;
-def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpre)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L, WriteLDHi], (instrs LDPSpost)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L, WriteLDHi], (instrs LDPSpre)>;
def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRBpost)>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instrs LDRBpre)>;
def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroW)>;
def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroX)>;
def : InstRW<[A57Write_5cyc_1L], (instrs LDRBui)>;
def : InstRW<[A57Write_5cyc_1L], (instrs LDRDl)>;
def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRDpost)>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instrs LDRDpre)>;
def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroW)>;
def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroX)>;
def : InstRW<[A57Write_5cyc_1L], (instrs LDRDui)>;
def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroW)>;
def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroX)>;
def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRHpost)>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instrs LDRHpre)>;
def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroW)>;
def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroX)>;
def : InstRW<[A57Write_5cyc_1L], (instrs LDRHui)>;
def : InstRW<[A57Write_5cyc_1L], (instrs LDRQl)>;
def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRQpost)>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instrs LDRQpre)>;
def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroW)>;
def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroX)>;
def : InstRW<[A57Write_5cyc_1L], (instrs LDRQui)>;
@@ -633,7 +633,7 @@ def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroW)>;
def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroX)>;
def : InstRW<[A57Write_5cyc_1L], (instrs LDRSl)>;
def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRSpost)>;
-def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[WriteAdr, A57Write_5cyc_1L], (instrs LDRSpre)>;
def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroW)>;
def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroX)>;
def : InstRW<[A57Write_5cyc_1L], (instrs LDRSui)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td
index a4c090d439db..fd16e98bff80 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -34,17 +34,17 @@ def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
def A57Write_5cyc_1W_Mul_Forward : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
- let ResourceCycles = [17]; }
+ let ReleaseAtCycles = [17]; }
def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
- let ResourceCycles = [19]; }
+ let ReleaseAtCycles = [19]; }
def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
- let ResourceCycles = [32]; }
+ let ReleaseAtCycles = [32]; }
def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
- let ResourceCycles = [35]; }
+ let ReleaseAtCycles = [35]; }
def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
@@ -63,7 +63,7 @@ def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
def A57Write_64cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
let Latency = 64;
let NumMicroOps = 2;
- let ResourceCycles = [32, 32];
+ let ReleaseAtCycles = [32, 32];
}
def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
A57UnitL]> {
@@ -160,7 +160,7 @@ def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
def A57Write_34cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
let Latency = 34;
let NumMicroOps = 2;
- let ResourceCycles = [17, 17];
+ let ReleaseAtCycles = [17, 17];
}
def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
A57UnitM]> {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index cb88eddc2b22..65b97ff6956a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -23,7 +23,8 @@ def A64FXModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures =
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
- HasSVE2p1_or_HasSME2p1, HasSMEF16F16];
+ HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32,
+ HasSMEFA64];
let FullInstRWOverlapCheck = 0;
}
@@ -672,13 +673,13 @@ def : WriteRes<WriteIS, [A64FXGI2456]> {
// Divide, W-form
def : WriteRes<WriteID32, [A64FXGI4]> {
let Latency = 39;
- let ResourceCycles = [39];
+ let ReleaseAtCycles = [39];
}
// Divide, X-form
def : WriteRes<WriteID64, [A64FXGI4]> {
let Latency = 23;
- let ResourceCycles = [23];
+ let ReleaseAtCycles = [23];
}
// Multiply accumulate, W-form
@@ -1254,7 +1255,7 @@ def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
// FP negate
def : WriteRes<WriteF, [A64FXGI03]> {
let Latency = 4;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// FP arithmetic
@@ -1265,7 +1266,7 @@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
// FP compare
def : WriteRes<WriteFCmp, [A64FXGI03]> {
let Latency = 4;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// FP Div, Sqrt
@@ -1327,21 +1328,21 @@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
// FP convert, from vec to gen reg
def : WriteRes<WriteFCvt, [A64FXGI03]> {
let Latency = 9;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// FP move, immed
// FP move, register
def : WriteRes<WriteFImm, [A64FXGI0]> {
let Latency = 4;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// FP transfer, from gen to vec reg
// FP transfer, from vec to gen reg
def : WriteRes<WriteFCopy, [A64FXGI0]> {
let Latency = 4;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
@@ -2113,14 +2114,14 @@ def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>;
def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> {
let Latency = 15;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>;
def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>;
@@ -2133,7 +2134,7 @@ def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>;
def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> {
let Latency = 46;
let NumMicroOps = 10;
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
}
def : InstRW<[A64FXWrite_Reduction4CycB],
(instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>;
@@ -2141,7 +2142,7 @@ def : InstRW<[A64FXWrite_Reduction4CycB],
def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> {
let Latency = 42;
let NumMicroOps = 9;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
def : InstRW<[A64FXWrite_Reduction4CycH],
(instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>;
@@ -2149,7 +2150,7 @@ def : InstRW<[A64FXWrite_Reduction4CycH],
def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> {
let Latency = 38;
let NumMicroOps = 8;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
def : InstRW<[A64FXWrite_Reduction4CycS],
(instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>;
@@ -2157,7 +2158,7 @@ def : InstRW<[A64FXWrite_Reduction4CycS],
def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> {
let Latency = 34;
let NumMicroOps = 7;
- let ResourceCycles = [7];
+ let ReleaseAtCycles = [7];
}
def : InstRW<[A64FXWrite_Reduction4CycD],
(instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>;
@@ -2184,7 +2185,7 @@ def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>;
def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>;
@@ -2202,42 +2203,42 @@ def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>;
def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> {
let Latency = 75;
let NumMicroOps = 11;
- let ResourceCycles = [11];
+ let ReleaseAtCycles = [11];
}
def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>;
def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> {
let Latency = 60;
let NumMicroOps = 9;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>;
def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> {
let Latency = 45;
let NumMicroOps = 7;
- let ResourceCycles = [7];
+ let ReleaseAtCycles = [7];
}
def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>;
def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> {
let Latency = 468;
let NumMicroOps = 63;
- let ResourceCycles = [63];
+ let ReleaseAtCycles = [63];
}
def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>;
def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> {
let Latency = 228;
let NumMicroOps = 31;
- let ResourceCycles = [31];
+ let ReleaseAtCycles = [31];
}
def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>;
def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> {
let Latency = 108;
let NumMicroOps = 15;
- let ResourceCycles = [15];
+ let ReleaseAtCycles = [15];
}
def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>;
@@ -2250,53 +2251,53 @@ def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>;
def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> {
let Latency = 15;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>;
def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> {
let Latency = 134;
- let ResourceCycles = [134];
+ let ReleaseAtCycles = [134];
}
def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>;
def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> {
let Latency = 98;
- let ResourceCycles = [98];
+ let ReleaseAtCycles = [98];
}
def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>;
def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> {
let Latency = 154;
- let ResourceCycles = [154];
+ let ReleaseAtCycles = [154];
}
def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>;
def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> {
let Latency = 54;
let NumMicroOps = 11;
- let ResourceCycles = [11];
+ let ReleaseAtCycles = [11];
}
def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>;
def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> {
let Latency = 44;
let NumMicroOps = 9;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>;
def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> {
let Latency = 34;
let NumMicroOps = 7;
- let ResourceCycles = [7];
+ let ReleaseAtCycles = [7];
}
def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>;
def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
let Latency = 17;
let NumMicroOps = 2;
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
}
def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>;
@@ -2309,21 +2310,21 @@ def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>;
def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> {
let Latency = 13;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>;
def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> {
let Latency = 17;
let NumMicroOps = 3;
- let ResourceCycles = [2, 2, 1];
+ let ReleaseAtCycles = [2, 2, 1];
}
def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>;
def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
let Latency = 17;
let NumMicroOps = 2;
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
}
def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>;
@@ -2339,28 +2340,28 @@ def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>;
def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
let Latency = 19;
- let ResourceCycles = [2, 4, 4];
+ let ReleaseAtCycles = [2, 4, 4];
}
def : InstRW<[A64FXWrite_GLD_S_ZI],
(instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>;
def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
let Latency = 16;
- let ResourceCycles = [1, 2, 2];
+ let ReleaseAtCycles = [1, 2, 2];
}
def : InstRW<[A64FXWrite_GLD_D_ZI],
(instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>;
def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
let Latency = 23;
- let ResourceCycles = [2, 1, 4, 4];
+ let ReleaseAtCycles = [2, 1, 4, 4];
}
def : InstRW<[A64FXWrite_GLD_S_RZ],
(instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>;
def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
let Latency = 20;
- let ResourceCycles = [1, 1, 2, 2];
+ let ReleaseAtCycles = [1, 1, 2, 2];
}
def : InstRW<[A64FXWrite_GLD_D_RZ],
(instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]",
@@ -2369,63 +2370,63 @@ def : InstRW<[A64FXWrite_GLD_D_RZ],
def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> {
let Latency = 15;
let NumMicroOps = 3;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>;
def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>;
def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>;
def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> {
let Latency = 15;
let NumMicroOps = 4;
- let ResourceCycles = [13];
+ let ReleaseAtCycles = [13];
}
def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>;
def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>;
def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> {
let Latency = 12;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>;
def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> {
let Latency = 15;
let NumMicroOps = 5;
- let ResourceCycles = [17];
+ let ReleaseAtCycles = [17];
}
def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>;
def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> {
let Latency = 11;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>;
def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> {
let Latency = 12;
let NumMicroOps = 5;
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
}
def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>;
@@ -2434,34 +2435,34 @@ def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> {
def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>;
def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
- let ResourceCycles = [2, 1, 4];
+ let ReleaseAtCycles = [2, 1, 4];
}
def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>;
def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
- let ResourceCycles = [2, 4];
+ let ReleaseAtCycles = [2, 4];
}
def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>;
def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
}
def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>;
def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>;
def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> {
let Latency = 114;
- let ResourceCycles = [114];
+ let ReleaseAtCycles = [114];
}
def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>;
def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> {
let Latency = 178;
- let ResourceCycles = [178];
+ let ReleaseAtCycles = [178];
}
def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>;
@@ -2473,14 +2474,14 @@ def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>;
def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>;
def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
}
def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>;
@@ -2497,7 +2498,7 @@ def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>;
def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
let Latency = 20;
let NumMicroOps = 8;
- let ResourceCycles = [8, 8, 8, 8];
+ let ReleaseAtCycles = [8, 8, 8, 8];
}
def : InstRW<[A64FXWrite_SST1_W_RZ],
(instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>;
@@ -2505,7 +2506,7 @@ def : InstRW<[A64FXWrite_SST1_W_RZ],
def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
let Latency = 20;
let NumMicroOps = 4;
- let ResourceCycles = [4, 4, 4, 4];
+ let ReleaseAtCycles = [4, 4, 4, 4];
}
def : InstRW<[A64FXWrite_SST1_D_RZ],
(instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>;
@@ -2513,7 +2514,7 @@ def : InstRW<[A64FXWrite_SST1_D_RZ],
def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
let Latency = 16;
let NumMicroOps = 8;
- let ResourceCycles = [12, 8, 8];
+ let ReleaseAtCycles = [12, 8, 8];
}
def : InstRW<[A64FXWrite_SST1_W_ZI],
(instregex "^SST1[BH]_S_I", "^SST1W_I")>;
@@ -2521,7 +2522,7 @@ def : InstRW<[A64FXWrite_SST1_W_ZI],
def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
let Latency = 16;
let NumMicroOps = 4;
- let ResourceCycles = [4, 4, 4];
+ let ReleaseAtCycles = [4, 4, 4];
}
def : InstRW<[A64FXWrite_SST1_D_ZI],
(instregex "^SST1[BHW]_D_I", "^SST1D_I")>;
@@ -2529,63 +2530,63 @@ def : InstRW<[A64FXWrite_SST1_D_ZI],
def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [8, 9];
+ let ReleaseAtCycles = [8, 9];
}
def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>;
def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
}
def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>;
def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [2, 3];
+ let ReleaseAtCycles = [2, 3];
}
def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>;
def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
let Latency = 15;
let NumMicroOps = 4;
- let ResourceCycles = [12, 13];
+ let ReleaseAtCycles = [12, 13];
}
def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>;
def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [3, 3];
+ let ReleaseAtCycles = [3, 3];
}
def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>;
def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
let Latency = 12;
let NumMicroOps = 4;
- let ResourceCycles = [3, 4];
+ let ReleaseAtCycles = [3, 4];
}
def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>;
def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
let Latency = 15;
let NumMicroOps = 5;
- let ResourceCycles = [16, 17];
+ let ReleaseAtCycles = [16, 17];
}
def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>;
def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
let Latency = 11;
let NumMicroOps = 4;
- let ResourceCycles = [4, 4];
+ let ReleaseAtCycles = [4, 4];
}
def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>;
def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
let Latency = 12;
let NumMicroOps = 5;
- let ResourceCycles = [4, 5];
+ let ReleaseAtCycles = [4, 5];
}
def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
index de09177d1dc0..cf9f50c2784b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
@@ -22,7 +22,7 @@ def Ampere1Model : SchedMachineModel {
let LoadLatency = 4; // Optimistic load latency
let MispredictPenalty = 10; // Branch mispredict penalty
let LoopMicroOpBufferSize = 32; // Instruction queue size
- let CompleteModel = 1;
+ let CompleteModel = 0;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
SMEUnsupported.F,
@@ -936,9 +936,13 @@ def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>;
def : InstRW<[Ampere1Write_1cyc_1A],
(instregex "ADC(W|X)r", "SBC(W|X)r")>;
def : InstRW<[Ampere1Write_Arith],
- (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r")>;
+ (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r[sx]")>;
+def : InstRW<[Ampere1Write_1cyc_1AB],
+ (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r[ri]")>;
def : InstRW<[Ampere1Write_ArithFlagsetting],
- (instregex "(ADD|AND|BIC|SUB)S(W|X)r")>;
+ (instregex "(ADD|AND|BIC|SUB)S(W|X)r[sx]")>;
+def : InstRW<[Ampere1Write_1cyc_1A],
+ (instregex "(ADD|AND|BIC|SUB)S(W|X)r[ri]")>;
def : InstRW<[Ampere1Write_1cyc_1A],
(instregex "(ADC|SBC)S(W|X)r")>;
def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
index e2d916954060..1ef3a2a06338 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -158,7 +158,7 @@ def : WriteRes<WriteI, [CyUnitI]>;
// EXAMPLE: ADDrs Xn, Xm LSL #imm
def : WriteRes<WriteISReg, [CyUnitIS]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// ADD with extended register operand is the same as shifted reg operand.
@@ -166,7 +166,7 @@ def : WriteRes<WriteISReg, [CyUnitIS]> {
// EXAMPLE: ADDXre Xn, Xm, UXTB #1
def : WriteRes<WriteIEReg, [CyUnitIS]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// Variable shift and bitfield operations.
@@ -209,14 +209,14 @@ def : WriteRes<WriteIM64, [CyUnitIM]> {
// SDIVW,UDIVW
def : WriteRes<WriteID32, [CyUnitID, CyUnitIntDiv]> {
let Latency = 10;
- let ResourceCycles = [2, 10];
+ let ReleaseAtCycles = [2, 10];
}
// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient.
// The ID pipe is consumed for 2 cycles: issue and writeback.
// SDIVX,UDIVX
def : WriteRes<WriteID64, [CyUnitID, CyUnitIntDiv]> {
let Latency = 13;
- let ResourceCycles = [2, 13];
+ let ReleaseAtCycles = [2, 13];
}
//---
@@ -555,7 +555,7 @@ def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>;
// TODO: Specialize FSQRT for longer latency.
def : WriteRes<WriteFDiv, [CyUnitVD, CyUnitFloatDiv]> {
let Latency = 17;
- let ResourceCycles = [2, 17];
+ let ReleaseAtCycles = [2, 17];
}
def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index 728eecfa645e..2127a34a58d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -115,7 +115,7 @@ def M3WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; }
def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M3WriteAB : SchedWriteRes<[M3UnitALU,
M3UnitC]> { let Latency = 1;
let NumMicroOps = 2; }
@@ -207,13 +207,13 @@ def : SchedAlias<WriteImm, M3WriteA1>;
// Divide and multiply instructions.
def : WriteRes<WriteID32, [M3UnitC,
M3UnitD]> { let Latency = 12;
- let ResourceCycles = [1, 12]; }
+ let ReleaseAtCycles = [1, 12]; }
def : WriteRes<WriteID64, [M3UnitC,
M3UnitD]> { let Latency = 21;
- let ResourceCycles = [1, 21]; }
+ let ReleaseAtCycles = [1, 21]; }
def : WriteRes<WriteIM32, [M3UnitC]> { let Latency = 3; }
def : WriteRes<WriteIM64, [M3UnitC]> { let Latency = 4;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
// Miscellaneous instructions.
def : SchedAlias<WriteExtr, M3WriteAY>;
@@ -238,7 +238,7 @@ def : SchedAlias<WriteSTIdx, M3WriteSB>;
def : WriteRes<WriteF, [M3UnitFADD]> { let Latency = 2; }
def : WriteRes<WriteFCmp, [M3UnitNMSC]> { let Latency = 2; }
def : WriteRes<WriteFDiv, [M3UnitFDIV]> { let Latency = 12;
- let ResourceCycles = [12]; }
+ let ReleaseAtCycles = [12]; }
def : WriteRes<WriteFMul, [M3UnitFMAC]> { let Latency = 4; }
// FP miscellaneous instructions.
@@ -302,19 +302,19 @@ def M3WriteNEONI : SchedWriteRes<[M3UnitNSHF,
def M3WriteNEONV : SchedWriteRes<[M3UnitFDIV0,
M3UnitFDIV1]> { let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [8, 8]; }
+ let ReleaseAtCycles = [8, 8]; }
def M3WriteNEONW : SchedWriteRes<[M3UnitFDIV0,
M3UnitFDIV1]> { let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [13, 13]; }
+ let ReleaseAtCycles = [13, 13]; }
def M3WriteNEONX : SchedWriteRes<[M3UnitFSQR,
M3UnitFSQR]> { let Latency = 18;
let NumMicroOps = 2;
- let ResourceCycles = [19, 19]; }
+ let ReleaseAtCycles = [19, 19]; }
def M3WriteNEONY : SchedWriteRes<[M3UnitFSQR,
M3UnitFSQR]> { let Latency = 25;
let NumMicroOps = 2;
- let ResourceCycles = [26, 26]; }
+ let ReleaseAtCycles = [26, 26]; }
def M3WriteNEONZ : SchedWriteRes<[M3UnitNMSC,
M3UnitNMSC]> { let Latency = 5;
let NumMicroOps = 2; }
@@ -325,16 +325,16 @@ def M3WriteFCVT3A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 3; }
def M3WriteFCVT4A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 4; }
def M3WriteFCVT4 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 4; }
def M3WriteFDIV10 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 7;
- let ResourceCycles = [8]; }
+ let ReleaseAtCycles = [8]; }
def M3WriteFDIV12 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 12;
- let ResourceCycles = [13]; }
+ let ReleaseAtCycles = [13]; }
def M3WriteFMAC3 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 3; }
def M3WriteFMAC4 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 4; }
def M3WriteFMAC5 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 5; }
def M3WriteFSQR17 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 18;
- let ResourceCycles = [19]; }
+ let ReleaseAtCycles = [19]; }
def M3WriteFSQR25 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 25;
- let ResourceCycles = [26]; }
+ let ReleaseAtCycles = [26]; }
def M3WriteNALU1 : SchedWriteRes<[M3UnitNALU]> { let Latency = 1; }
def M3WriteNCRY1A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 1; }
def M3WriteNCRY3A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 3; }
@@ -363,50 +363,50 @@ def M3WriteVLDC : SchedWriteRes<[M3UnitL,
def M3WriteVLDD : SchedWriteRes<[M3UnitL,
M3UnitNALU]> { let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [2, 1]; }
+ let ReleaseAtCycles = [2, 1]; }
def M3WriteVLDE : SchedWriteRes<[M3UnitL,
M3UnitNALU]> { let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [2, 1]; }
+ let ReleaseAtCycles = [2, 1]; }
def M3WriteVLDF : SchedWriteRes<[M3UnitL,
M3UnitL]> { let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [5, 5]; }
+ let ReleaseAtCycles = [5, 5]; }
def M3WriteVLDG : SchedWriteRes<[M3UnitL,
M3UnitNALU,
M3UnitNALU]> { let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2, 1, 1]; }
+ let ReleaseAtCycles = [2, 1, 1]; }
def M3WriteVLDH : SchedWriteRes<[M3UnitL,
M3UnitNALU,
M3UnitNALU]> { let Latency = 6;
let NumMicroOps = 3;
- let ResourceCycles = [2, 1, 1]; }
+ let ReleaseAtCycles = [2, 1, 1]; }
def M3WriteVLDI : SchedWriteRes<[M3UnitL,
M3UnitL,
M3UnitL]> { let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [6, 6, 6]; }
+ let ReleaseAtCycles = [6, 6, 6]; }
def M3WriteVLDJ : SchedWriteRes<[M3UnitL,
M3UnitNALU,
M3UnitNALU,
M3UnitNALU]> { let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [2, 1, 1, 1]; }
+ let ReleaseAtCycles = [2, 1, 1, 1]; }
def M3WriteVLDK : SchedWriteRes<[M3UnitL,
M3UnitNALU,
M3UnitNALU,
M3UnitNALU,
M3UnitNALU]> { let Latency = 9;
let NumMicroOps = 5;
- let ResourceCycles = [4, 1, 1, 1, 1]; }
+ let ReleaseAtCycles = [4, 1, 1, 1, 1]; }
def M3WriteVLDL : SchedWriteRes<[M3UnitL,
M3UnitNALU,
M3UnitNALU,
M3UnitL,
M3UnitNALU]> { let Latency = 6;
let NumMicroOps = 5;
- let ResourceCycles = [6, 1, 1, 6, 1]; }
+ let ReleaseAtCycles = [6, 1, 1, 6, 1]; }
def M3WriteVLDM : SchedWriteRes<[M3UnitL,
M3UnitNALU,
M3UnitNALU,
@@ -414,13 +414,13 @@ def M3WriteVLDM : SchedWriteRes<[M3UnitL,
M3UnitNALU,
M3UnitNALU]> { let Latency = 7;
let NumMicroOps = 6;
- let ResourceCycles = [6, 1, 1, 6, 1, 1]; }
+ let ReleaseAtCycles = [6, 1, 1, 6, 1, 1]; }
def M3WriteVLDN : SchedWriteRes<[M3UnitL,
M3UnitL,
M3UnitL,
M3UnitL]> { let Latency = 14;
let NumMicroOps = 4;
- let ResourceCycles = [6, 6, 6, 6]; }
+ let ReleaseAtCycles = [6, 6, 6, 6]; }
def M3WriteVSTA : WriteSequence<[WriteVST], 2>;
def M3WriteVSTB : WriteSequence<[WriteVST], 3>;
def M3WriteVSTC : WriteSequence<[WriteVST], 4>;
@@ -429,7 +429,7 @@ def M3WriteVSTD : SchedWriteRes<[M3UnitS,
M3UnitS,
M3UnitFST]> { let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [1, 3, 1, 3]; }
+ let ReleaseAtCycles = [1, 3, 1, 3]; }
def M3WriteVSTE : SchedWriteRes<[M3UnitS,
M3UnitFST,
M3UnitS,
@@ -437,7 +437,7 @@ def M3WriteVSTE : SchedWriteRes<[M3UnitS,
M3UnitS,
M3UnitFST]> { let Latency = 8;
let NumMicroOps = 6;
- let ResourceCycles = [1, 3, 1, 3, 1, 3]; }
+ let ReleaseAtCycles = [1, 3, 1, 3, 1, 3]; }
def M3WriteVSTF : SchedWriteRes<[M3UnitNALU,
M3UnitFST,
M3UnitFST,
@@ -446,7 +446,7 @@ def M3WriteVSTF : SchedWriteRes<[M3UnitNALU,
M3UnitS,
M3UnitFST]> { let Latency = 15;
let NumMicroOps = 7;
- let ResourceCycles = [1, 3, 3, 1, 3, 1, 3]; }
+ let ReleaseAtCycles = [1, 3, 3, 1, 3, 1, 3]; }
def M3WriteVSTG : SchedWriteRes<[M3UnitNALU,
M3UnitFST,
M3UnitFST,
@@ -457,14 +457,14 @@ def M3WriteVSTG : SchedWriteRes<[M3UnitNALU,
M3UnitS,
M3UnitFST]> { let Latency = 16;
let NumMicroOps = 9;
- let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; }
+ let ReleaseAtCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; }
def M3WriteVSTH : SchedWriteRes<[M3UnitNALU,
M3UnitFST,
M3UnitFST,
M3UnitS,
M3UnitFST]> { let Latency = 14;
let NumMicroOps = 5;
- let ResourceCycles = [1, 3, 3, 1, 3]; }
+ let ReleaseAtCycles = [1, 3, 3, 1, 3]; }
def M3WriteVSTI : SchedWriteRes<[M3UnitNALU,
M3UnitFST,
M3UnitFST,
@@ -475,7 +475,7 @@ def M3WriteVSTI : SchedWriteRes<[M3UnitNALU,
M3UnitS,
M3UnitFST]> { let Latency = 17;
let NumMicroOps = 9;
- let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; }
+ let ReleaseAtCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; }
// Special cases.
def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; }
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index 66e1c0b9ced1..83cf56088d4c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -142,7 +142,7 @@ def M4WriteZ4 : SchedWriteRes<[]> { let Latency = 4;
def M4WriteA1 : SchedWriteRes<[M4UnitALU]> { let Latency = 1; }
def M4WriteA2 : SchedWriteRes<[M4UnitALU]> { let Latency = 2; }
def M4WriteAA : SchedWriteRes<[M4UnitALU]> { let Latency = 2;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M4WriteAB : SchedWriteRes<[M4UnitALU,
M4UnitC]> { let Latency = 2;
let NumMicroOps = 2; }
@@ -176,12 +176,12 @@ def M4WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M4WriteAC]>
def M4WriteC1 : SchedWriteRes<[M4UnitC]> { let Latency = 1; }
def M4WriteC3 : SchedWriteRes<[M4UnitC]> { let Latency = 3; }
def M4WriteCA : SchedWriteRes<[M4UnitC]> { let Latency = 4;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M4WriteD12 : SchedWriteRes<[M4UnitD]> { let Latency = 12;
- let ResourceCycles = [12]; }
+ let ReleaseAtCycles = [12]; }
def M4WriteD21 : SchedWriteRes<[M4UnitD]> { let Latency = 21;
- let ResourceCycles = [21]; }
+ let ReleaseAtCycles = [21]; }
def M4WriteE2 : SchedWriteRes<[M4UnitE]> { let Latency = 2; }
@@ -265,22 +265,22 @@ def M4WriteNEONR : SchedWriteRes<[M4UnitFCVT0,
let NumMicroOps = 1; }
def M4WriteNEONV : SchedWriteRes<[M4UnitFDIV,
M4UnitFDIV]> { let Latency = 7;
- let ResourceCycles = [6, 6]; }
+ let ReleaseAtCycles = [6, 6]; }
def M4WriteNEONVH : SchedWriteRes<[M4UnitFDIVH,
M4UnitFDIVH]> { let Latency = 7;
- let ResourceCycles = [6, 6]; }
+ let ReleaseAtCycles = [6, 6]; }
def M4WriteNEONW : SchedWriteRes<[M4UnitFDIV,
M4UnitFDIV]> { let Latency = 12;
- let ResourceCycles = [9, 9]; }
+ let ReleaseAtCycles = [9, 9]; }
def M4WriteNEONX : SchedWriteRes<[M4UnitFSQR,
M4UnitFSQR]> { let Latency = 8;
- let ResourceCycles = [7, 7]; }
+ let ReleaseAtCycles = [7, 7]; }
def M4WriteNEONXH : SchedWriteRes<[M4UnitFSQRH,
M4UnitFSQRH]> { let Latency = 7;
- let ResourceCycles = [6, 6]; }
+ let ReleaseAtCycles = [6, 6]; }
def M4WriteNEONY : SchedWriteRes<[M4UnitFSQR,
M4UnitFSQR]> { let Latency = 12;
- let ResourceCycles = [9, 9]; }
+ let ReleaseAtCycles = [9, 9]; }
def M4WriteNEONZ : SchedWriteVariant<[SchedVar<ExynosQFormPred, [M4WriteNEONO]>,
SchedVar<NoSchedPred, [M4WriteNEONN]>]>;
@@ -298,11 +298,11 @@ def M4WriteFCVT4A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 4; }
def M4WriteFCVT6A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 6; }
def M4WriteFDIV7 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 7;
- let ResourceCycles = [6]; }
+ let ReleaseAtCycles = [6]; }
def M4WriteFDIV7H : SchedWriteRes<[M4UnitFDIVH]> { let Latency = 7;
- let ResourceCycles = [6]; }
+ let ReleaseAtCycles = [6]; }
def M4WriteFDIV12 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 12;
- let ResourceCycles = [9]; }
+ let ReleaseAtCycles = [9]; }
def M4WriteFMAC2H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 2; }
def M4WriteFMAC3H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 3; }
@@ -312,11 +312,11 @@ def M4WriteFMAC4H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 4; }
def M4WriteFMAC5 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 5; }
def M4WriteFSQR7H : SchedWriteRes<[M4UnitFSQRH]> { let Latency = 7;
- let ResourceCycles = [6]; }
+ let ReleaseAtCycles = [6]; }
def M4WriteFSQR8 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 8;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def M4WriteFSQR12 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 12;
- let ResourceCycles = [9]; }
+ let ReleaseAtCycles = [9]; }
def M4WriteNALU1 : SchedWriteRes<[M4UnitNALU]> { let Latency = 1; }
def M4WriteNALU1H : SchedWriteRes<[M4UnitNALUH]> { let Latency = 1; }
@@ -339,16 +339,16 @@ def M4WriteNSHF1 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1; }
def M4WriteNSHF1H : SchedWriteRes<[M4UnitNSHFH]> { let Latency = 1; }
def M4WriteNSHF3 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3; }
def M4WriteNSHFA : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M4WriteNSHFB : SchedWriteRes<[M4UnitNSHF]> { let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M4WriteNSHFC : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def M4WriteNSHFD : SchedWriteRes<[M4UnitNSHF]> { let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def M4WriteNSHT1 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 1; }
def M4WriteNSHT2 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 2; }
@@ -370,41 +370,41 @@ def M4WriteVLDC : SchedWriteRes<[M4UnitL,
def M4WriteVLDD : SchedWriteRes<[M4UnitL,
M4UnitNSHF]> { let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [2, 1]; }
+ let ReleaseAtCycles = [2, 1]; }
def M4WriteVLDF : SchedWriteRes<[M4UnitL,
M4UnitL]> { let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [3, 3]; }
+ let ReleaseAtCycles = [3, 3]; }
def M4WriteVLDG : SchedWriteRes<[M4UnitL,
M4UnitNSHF,
M4UnitNSHF]> { let Latency = 6;
let NumMicroOps = 3;
- let ResourceCycles = [2, 1, 1]; }
+ let ReleaseAtCycles = [2, 1, 1]; }
def M4WriteVLDI : SchedWriteRes<[M4UnitL,
M4UnitL,
M4UnitL]> { let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [3, 3, 3]; }
+ let ReleaseAtCycles = [3, 3, 3]; }
def M4WriteVLDJ : SchedWriteRes<[M4UnitL,
M4UnitNSHF,
M4UnitNSHF,
M4UnitNSHF]> { let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [3, 1, 1, 1]; }
+ let ReleaseAtCycles = [3, 1, 1, 1]; }
def M4WriteVLDK : SchedWriteRes<[M4UnitL,
M4UnitNSHF,
M4UnitNSHF,
M4UnitNSHF,
M4UnitNSHF]> { let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [3, 1, 1, 1, 1]; }
+ let ReleaseAtCycles = [3, 1, 1, 1, 1]; }
def M4WriteVLDL : SchedWriteRes<[M4UnitL,
M4UnitNSHF,
M4UnitNSHF,
M4UnitL,
M4UnitNSHF]> { let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [3, 1, 1, 6, 1]; }
+ let ReleaseAtCycles = [3, 1, 1, 6, 1]; }
def M4WriteVLDM : SchedWriteRes<[M4UnitL,
M4UnitNSHF,
M4UnitNSHF,
@@ -412,13 +412,13 @@ def M4WriteVLDM : SchedWriteRes<[M4UnitL,
M4UnitNSHF,
M4UnitNSHF]> { let Latency = 7;
let NumMicroOps = 6;
- let ResourceCycles = [3, 1, 1, 3, 1, 1]; }
+ let ReleaseAtCycles = [3, 1, 1, 3, 1, 1]; }
def M4WriteVLDN : SchedWriteRes<[M4UnitL,
M4UnitL,
M4UnitL,
M4UnitL]> { let Latency = 14;
let NumMicroOps = 4;
- let ResourceCycles = [3, 3, 3, 3]; }
+ let ReleaseAtCycles = [3, 3, 3, 3]; }
def M4WriteVST1 : SchedWriteRes<[M4UnitS,
M4UnitFST]> { let Latency = 1;
@@ -439,7 +439,7 @@ def M4WriteVSTF : SchedWriteRes<[M4UnitNSHF,
M4UnitS,
M4UnitFST]> { let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [1, 2, 1, 2, 1]; }
+ let ReleaseAtCycles = [1, 2, 1, 2, 1]; }
def M4WriteVSTG : SchedWriteRes<[M4UnitNSHF,
M4UnitNSHF,
M4UnitNSHF,
@@ -450,7 +450,7 @@ def M4WriteVSTG : SchedWriteRes<[M4UnitNSHF,
M4UnitS,
M4UnitFST]> { let Latency = 5;
let NumMicroOps = 6;
- let ResourceCycles = [1, 1, 1, 2, 1, 2, 1, 2, 1]; }
+ let ReleaseAtCycles = [1, 1, 1, 2, 1, 2, 1, 2, 1]; }
def M4WriteVSTI : SchedWriteRes<[M4UnitNSHF,
M4UnitNSHF,
M4UnitNSHF,
@@ -464,7 +464,7 @@ def M4WriteVSTI : SchedWriteRes<[M4UnitNSHF,
M4UnitS,
M4UnitFST]> { let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; }
+ let ReleaseAtCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; }
def M4WriteVSTJ : SchedWriteRes<[M4UnitA,
M4UnitS,
M4UnitFST,
@@ -482,7 +482,7 @@ def M4WriteVSTL : SchedWriteRes<[M4UnitNSHF,
M4UnitS,
M4UnitFST]> { let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [1, 1, 2, 1, 2, 1]; }
+ let ReleaseAtCycles = [1, 1, 2, 1, 2, 1]; }
def M4WriteVSTY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M4WriteVSTK]>,
SchedVar<NoSchedPred, [WriteVST]>]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
index a6405d4fc49c..85058af86dec 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@@ -142,9 +142,9 @@ def M5WriteZ4 : SchedWriteRes<[]> { let Latency = 4;
def M5WriteA1W : SchedWriteRes<[M5UnitAW]> { let Latency = 1; }
def M5WriteA1X : SchedWriteRes<[M5UnitAX]> { let Latency = 1; }
def M5WriteAAW : SchedWriteRes<[M5UnitAW]> { let Latency = 2;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M5WriteAAX : SchedWriteRes<[M5UnitAX]> { let Latency = 2;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M5WriteAB : SchedWriteRes<[M5UnitAX,
M5UnitC,
M5UnitE]> { let Latency = 2;
@@ -194,12 +194,12 @@ def M5WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M5WriteAC]>
def M5WriteC1 : SchedWriteRes<[M5UnitC]> { let Latency = 1; }
def M5WriteC2 : SchedWriteRes<[M5UnitC]> { let Latency = 2; }
def M5WriteCA : SchedWriteRes<[M5UnitC]> { let Latency = 3;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M5WriteD10 : SchedWriteRes<[M5UnitD]> { let Latency = 10;
- let ResourceCycles = [10]; }
+ let ReleaseAtCycles = [10]; }
def M5WriteD16 : SchedWriteRes<[M5UnitD]> { let Latency = 16;
- let ResourceCycles = [16]; }
+ let ReleaseAtCycles = [16]; }
def M5WriteF2 : SchedWriteRes<[M5UnitF]> { let Latency = 2; }
@@ -228,22 +228,22 @@ def M5WriteLFW : SchedWriteRes<[M5UnitAW,
M5UnitAW,
M5UnitL]> { let Latency = 15;
let NumMicroOps = 6;
- let ResourceCycles = [1, 1, 1, 1, 15]; }
+ let ReleaseAtCycles = [1, 1, 1, 1, 15]; }
def M5WriteLFX : SchedWriteRes<[M5UnitAX,
M5UnitAX,
M5UnitAX,
M5UnitAX,
M5UnitL]> { let Latency = 15;
let NumMicroOps = 6;
- let ResourceCycles = [1, 1, 1, 1, 15]; }
+ let ReleaseAtCycles = [1, 1, 1, 1, 15]; }
def M5WriteLGW : SchedWriteRes<[M5UnitAW,
M5UnitL]> { let Latency = 13;
let NumMicroOps = 1;
- let ResourceCycles = [1, 13]; }
+ let ReleaseAtCycles = [1, 13]; }
def M5WriteLGX : SchedWriteRes<[M5UnitAX,
M5UnitL]> { let Latency = 13;
let NumMicroOps = 1;
- let ResourceCycles = [1, 13]; }
+ let ReleaseAtCycles = [1, 13]; }
def M5WriteLH : SchedWriteRes<[]> { let Latency = 6;
let NumMicroOps = 0; }
def M5WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M5WriteL5]>,
@@ -282,12 +282,12 @@ def M5WriteNEONK : SchedWriteRes<[M5UnitNSHF,
def M5WriteNEONN : SchedWriteRes<[M5UnitNMSC,
M5UnitNMSC]> { let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [7, 7]; }
+ let ReleaseAtCycles = [7, 7]; }
def M5WriteNEONO : SchedWriteRes<[M5UnitNMSC,
M5UnitNMSC,
M5UnitNMSC]> { let Latency = 8;
let NumMicroOps = 3;
- let ResourceCycles = [10, 10, 10]; }
+ let ReleaseAtCycles = [10, 10, 10]; }
def M5WriteNEONP : SchedWriteRes<[M5UnitNSHF,
M5UnitS0,
M5UnitFCVT]> { let Latency = 7;
@@ -297,19 +297,19 @@ def M5WriteNEONQ : SchedWriteRes<[M5UnitNMSC,
let NumMicroOps = 1; }
def M5WriteNEONU : SchedWriteRes<[M5UnitFSQR,
M5UnitFSQR]> { let Latency = 7;
- let ResourceCycles = [4, 4]; }
+ let ReleaseAtCycles = [4, 4]; }
def M5WriteNEONV : SchedWriteRes<[M5UnitFDIV,
M5UnitFDIV]> { let Latency = 7;
- let ResourceCycles = [6, 6]; }
+ let ReleaseAtCycles = [6, 6]; }
def M5WriteNEONW : SchedWriteRes<[M5UnitFDIV,
M5UnitFDIV]> { let Latency = 12;
- let ResourceCycles = [9, 9]; }
+ let ReleaseAtCycles = [9, 9]; }
def M5WriteNEONX : SchedWriteRes<[M5UnitFSQR,
M5UnitFSQR]> { let Latency = 8;
- let ResourceCycles = [5, 5]; }
+ let ReleaseAtCycles = [5, 5]; }
def M5WriteNEONY : SchedWriteRes<[M5UnitFSQR,
M5UnitFSQR]> { let Latency = 12;
- let ResourceCycles = [9, 9]; }
+ let ReleaseAtCycles = [9, 9]; }
def M5WriteNEONZ : SchedWriteVariant<[SchedVar<ExynosQFormPred, [M5WriteNEONO]>,
SchedVar<NoSchedPred, [M5WriteNEONN]>]>;
@@ -330,24 +330,24 @@ def M5WriteFCVTC : SchedWriteRes<[M5UnitFCVT,
let NumMicroOps = 1; }
def M5WriteFDIV5 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 5;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M5WriteFDIV7 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 7;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def M5WriteFDIV12 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 12;
- let ResourceCycles = [9]; }
+ let ReleaseAtCycles = [9]; }
def M5WriteFMAC3 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 3; }
def M5WriteFMAC4 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 4; }
def M5WriteFMAC5 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 5; }
def M5WriteFSQR5 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 5;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def M5WriteFSQR7 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 7;
- let ResourceCycles = [4]; }
+ let ReleaseAtCycles = [4]; }
def M5WriteFSQR8 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 8;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def M5WriteFSQR12 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 12;
- let ResourceCycles = [9]; }
+ let ReleaseAtCycles = [9]; }
def M5WriteNALU1 : SchedWriteRes<[M5UnitNALU]> { let Latency = 1; }
def M5WriteNALU2 : SchedWriteRes<[M5UnitNALU]> { let Latency = 2; }
@@ -396,16 +396,16 @@ def M5WriteVLDC : SchedWriteRes<[M5UnitL,
def M5WriteVLDD : SchedWriteRes<[M5UnitL,
M5UnitNSHF]> { let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [2, 1]; }
+ let ReleaseAtCycles = [2, 1]; }
def M5WriteVLDF : SchedWriteRes<[M5UnitL,
M5UnitL]> { let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [6, 5]; }
+ let ReleaseAtCycles = [6, 5]; }
def M5WriteVLDG : SchedWriteRes<[M5UnitL,
M5UnitNSHF,
M5UnitNSHF]> { let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2, 1, 1]; }
+ let ReleaseAtCycles = [2, 1, 1]; }
def M5WriteVLDI : SchedWriteRes<[M5UnitL,
M5UnitL,
M5UnitL]> { let Latency = 13;
@@ -439,7 +439,7 @@ def M5WriteVLDN : SchedWriteRes<[M5UnitL,
M5UnitL,
M5UnitL]> { let Latency = 15;
let NumMicroOps = 4;
- let ResourceCycles = [2, 2, 2, 2]; }
+ let ReleaseAtCycles = [2, 2, 2, 2]; }
def M5WriteVST1 : SchedWriteRes<[M5UnitS,
M5UnitFST]> { let Latency = 1;
@@ -501,7 +501,7 @@ def M5WriteVSTI : SchedWriteRes<[M5UnitNSHF,
M5UnitS,
M5UnitFST]> { let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; }
+ let ReleaseAtCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; }
def M5WriteVSTJ : SchedWriteRes<[M5UnitA,
M5UnitS0,
M5UnitFST]> { let Latency = 1;
@@ -517,7 +517,7 @@ def M5WriteVSTL : SchedWriteRes<[M5UnitNSHF,
M5UnitS,
M5UnitFST]> { let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [1, 1, 2, 1, 2, 1]; }
+ let ReleaseAtCycles = [1, 1, 2, 1, 2, 1]; }
def M5WriteVSTY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M5WriteVSTK]>,
SchedVar<NoSchedPred, [WriteVST]>]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index d5788795c7a1..375e3652c9e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -210,13 +210,13 @@ def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [2, 8];
+ let ReleaseAtCycles = [2, 8];
}
def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [2, 11];
+ let ReleaseAtCycles = [2, 11];
}
def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td
index d689b9fa9c06..2ec9600f84f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td
@@ -84,13 +84,13 @@ def N1Write_1c_1I : SchedWriteRes<[N1UnitI]> { let Latency = 1; }
def N1Write_2c_1M : SchedWriteRes<[N1UnitM]> { let Latency = 2; }
def N1Write_3c_1M : SchedWriteRes<[N1UnitM]> { let Latency = 3; }
def N1Write_4c3_1M : SchedWriteRes<[N1UnitM]> { let Latency = 4;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def N1Write_5c3_1M : SchedWriteRes<[N1UnitM]> { let Latency = 5;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def N1Write_12c5_1M : SchedWriteRes<[N1UnitM]> { let Latency = 12;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def N1Write_20c5_1M : SchedWriteRes<[N1UnitM]> { let Latency = 20;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def N1Write_4c_1L : SchedWriteRes<[N1UnitL]> { let Latency = 4; }
def N1Write_5c_1L : SchedWriteRes<[N1UnitL]> { let Latency = 5; }
def N1Write_7c_1L : SchedWriteRes<[N1UnitL]> { let Latency = 7; }
@@ -102,15 +102,15 @@ def N1Write_2c_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 2; }
def N1Write_3c_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 3; }
def N1Write_4c_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 4; }
def N1Write_7c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 7;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def N1Write_10c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 10;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def N1Write_13c10_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 13;
- let ResourceCycles = [10]; }
+ let ReleaseAtCycles = [10]; }
def N1Write_15c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 15;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def N1Write_17c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 17;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def N1Write_2c_1V1 : SchedWriteRes<[N1UnitV1]> { let Latency = 2; }
def N1Write_3c_1V1 : SchedWriteRes<[N1UnitV1]> { let Latency = 3; }
def N1Write_4c_1V1 : SchedWriteRes<[N1UnitV1]> { let Latency = 4; }
@@ -377,7 +377,7 @@ def : InstRW<[N1Write_4c_1L, N1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
def : InstRW<[N1Write_5c_1I_1L, N1Write_0c_0Z], (instrs LDPSWi)>;
// Load pair, immed post or pre-index, signed words
-def : InstRW<[N1Write_5c_1I_1L, N1Write_0c_0Z, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_1I_1L, N1Write_0c_0Z],
(instrs LDPSWpost, LDPSWpre)>;
@@ -477,7 +477,7 @@ def : InstRW<[N1Write_5c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
// Load vector reg, immed post-index
// Load vector reg, immed pre-index
-def : InstRW<[N1Write_5c_1L, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_1L],
(instregex "^LDR[BHSDQ](post|pre)$")>;
// Load vector reg, unsigned immed
@@ -501,12 +501,12 @@ def : InstRW<[N1Write_7c_1I_1L, WriteLDHi], (instregex "^LDPN?[HQ]i$")>;
// Load vector pair, immed post-index, S/D-form
// Load vector pair, immed pre-index, S/D-form
-def : InstRW<[N1Write_5c_1L, WriteLDHi, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_1L, WriteLDHi],
(instregex "^LDP[SD](pre|post)$")>;
// Load vector pair, immed post-index, Q-form
// Load vector pair, immed pre-index, Q-form
-def : InstRW<[N1Write_7c_1L, WriteLDHi, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_7c_1L, WriteLDHi],
(instrs LDPQpost, LDPQpre)>;
@@ -521,11 +521,11 @@ def : InstRW<[N1Write_2c_2I_2L], (instrs STURQi)>;
// Store vector reg, immed post-index, B/H/S/D-form
// Store vector reg, immed pre-index, B/H/S/D-form
-def : InstRW<[N1Write_2c_1L_1V, WriteAdr], (instregex "^STR[BHSD](pre|post)$")>;
+def : InstRW<[WriteAdr, N1Write_2c_1L_1V], (instregex "^STR[BHSD](pre|post)$")>;
// Store vector reg, immed pre-index, Q-form
// Store vector reg, immed post-index, Q-form
-def : InstRW<[N1Write_2c_2L_2V, WriteAdr], (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, N1Write_2c_2L_2V], (instrs STRQpre, STRQpost)>;
// Store vector reg, unsigned immed, B/H/S/D-form
def : InstRW<[N1Write_2c_1L_1V], (instregex "^STR[BHSD]ui$")>;
@@ -562,15 +562,15 @@ def : InstRW<[N1Write_3c_4L_2V], (instrs STPQi, STNPQi)>;
// Store vector pair, immed post-index, S-form
// Store vector pair, immed pre-index, S-form
-def : InstRW<[N1Write_2c_1L_1V, WriteAdr], (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, N1Write_2c_1L_1V], (instrs STPSpre, STPSpost)>;
// Store vector pair, immed post-index, D-form
// Store vector pair, immed pre-index, D-form
-def : InstRW<[N1Write_2c_2L_2V, WriteAdr], (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, N1Write_2c_2L_2V], (instrs STPDpre, STPDpost)>;
// Store vector pair, immed post-index, Q-form
// Store vector pair, immed pre-index, Q-form
-def : InstRW<[N1Write_3c_4L_2V, WriteAdr], (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, N1Write_3c_4L_2V], (instrs STPQpre, STPQpost)>;
// ASIMD integer instructions
@@ -818,25 +818,25 @@ def : InstRW<[N1Write_5c_1M_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
// ASIMD load, 1 element, multiple, 1 reg
def : InstRW<[N1Write_5c_1L],
(instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[N1Write_5c_1L, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_1L],
(instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg
def : InstRW<[N1Write_5c_2L],
(instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[N1Write_5c_2L, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_2L],
(instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg
def : InstRW<[N1Write_6c_3L],
(instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[N1Write_6c_3L, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_6c_3L],
(instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg
def : InstRW<[N1Write_6c_4L],
(instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[N1Write_6c_4L, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_6c_4L],
(instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, one lane
@@ -844,7 +844,7 @@ def : InstRW<[N1Write_6c_4L, WriteAdr],
def : InstRW<[N1Write_7c_1L_1V],
(instregex "LD1(i|Rv)(8|16|32|64)$",
"LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[N1Write_7c_1L_1V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_7c_1L_1V],
(instregex "LD1i(8|16|32|64)_POST$",
"LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
@@ -855,7 +855,7 @@ def : InstRW<[N1Write_7c_2L_2V],
(instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)$",
"LD2i(8|16|32|64)$",
"LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[N1Write_7c_2L_2V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_7c_2L_2V],
(instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)_POST$",
"LD2i(8|16|32|64)_POST$",
"LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
@@ -863,7 +863,7 @@ def : InstRW<[N1Write_7c_2L_2V, WriteAdr],
// ASIMD load, 3 element, multiple
def : InstRW<[N1Write_8c_3L_3V],
(instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)$")>;
-def : InstRW<[N1Write_8c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_8c_3L_3V],
(instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)_POST$")>;
// ASIMD load, 3 element, one lane
@@ -871,20 +871,20 @@ def : InstRW<[N1Write_8c_3L_3V, WriteAdr],
def : InstRW<[N1Write_7c_2L_3V],
(instregex "LD3i(8|16|32|64)$",
"LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[N1Write_7c_2L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_7c_2L_3V],
(instregex "LD3i(8|16|32|64)_POST$",
"LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form
def : InstRW<[N1Write_8c_3L_4V],
(instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[N1Write_8c_3L_4V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_8c_3L_4V],
(instregex "LD4Fourv(8b|4h|2s)_POST$")>;
// ASIMD load, 4 element, multiple, Q-form
def : InstRW<[N1Write_10c_4L_4V],
(instregex "LD4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[N1Write_10c_4L_4V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_10c_4L_4V],
(instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, one lane
@@ -892,7 +892,7 @@ def : InstRW<[N1Write_10c_4L_4V, WriteAdr],
def : InstRW<[N1Write_8c_4L_4V],
(instregex "LD4i(8|16|32|64)$",
"LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[N1Write_8c_4L_4V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_8c_4L_4V],
(instregex "LD4i(8|16|32|64)_POST$",
"LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
@@ -903,127 +903,127 @@ def : InstRW<[N1Write_8c_4L_4V, WriteAdr],
// ASIMD store, 1 element, multiple, 1 reg, D-form
def : InstRW<[N1Write_2c_1L_1V],
(instregex "ST1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[N1Write_2c_1L_1V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_2c_1L_1V],
(instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 1 reg, Q-form
def : InstRW<[N1Write_2c_1L_1V],
(instregex "ST1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[N1Write_2c_1L_1V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_2c_1L_1V],
(instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, D-form
def : InstRW<[N1Write_2c_1L_2V],
(instregex "ST1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[N1Write_2c_1L_2V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_2c_1L_2V],
(instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, Q-form
def : InstRW<[N1Write_3c_2L_2V],
(instregex "ST1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[N1Write_3c_2L_2V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_3c_2L_2V],
(instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, D-form
def : InstRW<[N1Write_3c_2L_3V],
(instregex "ST1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[N1Write_3c_2L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_3c_2L_3V],
(instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, Q-form
def : InstRW<[N1Write_4c_3L_3V],
(instregex "ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[N1Write_4c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_4c_3L_3V],
(instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, D-form
def : InstRW<[N1Write_3c_2L_2V],
(instregex "ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[N1Write_3c_2L_2V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_3c_2L_2V],
(instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, Q-form
def : InstRW<[N1Write_5c_4L_4V],
(instregex "ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[N1Write_5c_4L_4V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_4L_4V],
(instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane
def : InstRW<[N1Write_4c_1L_1V],
(instregex "ST1i(8|16|32|64)$")>;
-def : InstRW<[N1Write_4c_1L_1V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_4c_1L_1V],
(instregex "ST1i(8|16|32|64)_POST$")>;
// ASIMD store, 2 element, multiple, D-form, B/H/S
def : InstRW<[N1Write_4c_1L_1V],
(instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[N1Write_4c_1L_1V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_4c_1L_1V],
(instregex "ST2Twov(8b|4h|2s)_POST$")>;
// ASIMD store, 2 element, multiple, Q-form
def : InstRW<[N1Write_5c_2L_2V],
(instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[N1Write_5c_2L_2V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_2L_2V],
(instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 2 element, one lane
def : InstRW<[N1Write_4c_1L_1V],
(instregex "ST2i(8|16|32|64)$")>;
-def : InstRW<[N1Write_4c_1L_1V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_4c_1L_1V],
(instregex "ST2i(8|16|32|64)_POST$")>;
// ASIMD store, 3 element, multiple, D-form, B/H/S
def : InstRW<[N1Write_5c_2L_2V],
(instregex "ST3Threev(8b|4h|2s)$")>;
-def : InstRW<[N1Write_5c_2L_2V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_2L_2V],
(instregex "ST3Threev(8b|4h|2s)_POST$")>;
// ASIMD store, 3 element, multiple, Q-form
def : InstRW<[N1Write_6c_3L_3V],
(instregex "ST3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[N1Write_6c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_6c_3L_3V],
(instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 3 element, one lane, B/H/S
def : InstRW<[N1Write_4c_3L_3V],
(instregex "ST3i(8|16|32)$")>;
-def : InstRW<[N1Write_4c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_4c_3L_3V],
(instregex "ST3i(8|16|32)_POST$")>;
// ASIMD store, 3 element, one lane, D
def : InstRW<[N1Write_5c_3L_3V],
(instrs ST3i64)>;
-def : InstRW<[N1Write_5c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_3L_3V],
(instrs ST3i64_POST)>;
// ASIMD store, 4 element, multiple, D-form, B/H/S
def : InstRW<[N1Write_7c_3L_3V],
(instregex "ST4Fourv(8b|4h|2s)$")>;
-def : InstRW<[N1Write_7c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_7c_3L_3V],
(instregex "ST4Fourv(8b|4h|2s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, B/H/S
def : InstRW<[N1Write_9c_6L_6V],
(instregex "ST4Fourv(16b|8h|4s)$")>;
-def : InstRW<[N1Write_9c_6L_6V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_9c_6L_6V],
(instregex "ST4Fourv(16b|8h|4s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, D
def : InstRW<[N1Write_6c_4L_4V],
(instrs ST4Fourv2d)>;
-def : InstRW<[N1Write_6c_4L_4V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_6c_4L_4V],
(instrs ST4Fourv2d_POST)>;
// ASIMD store, 4 element, one lane, B/H/S
def : InstRW<[N1Write_5c_3L_3V],
(instregex "ST4i(8|16|32)$")>;
-def : InstRW<[N1Write_5c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_5c_3L_3V],
(instregex "ST4i(8|16|32)_POST$")>;
// ASIMD store, 4 element, one lane, D
def : InstRW<[N1Write_4c_3L_3V],
(instrs ST4i64)>;
-def : InstRW<[N1Write_4c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, N1Write_4c_3L_3V],
(instrs ST4i64_POST)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 6bb71f2ce236..503de3bee2b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -82,15 +82,15 @@ def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; }
def N2Write_2cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 2; }
def N2Write_3cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 3; }
def N2Write_2cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 2;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def N2Write_3cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 3;
- let ResourceCycles = [3]; }
+ let ReleaseAtCycles = [3]; }
def N2Write_5cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 5;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 12;
- let ResourceCycles = [12]; }
+ let ReleaseAtCycles = [12]; }
def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 20;
- let ResourceCycles = [20]; }
+ let ReleaseAtCycles = [20]; }
def N2Write_4cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 4; }
def N2Write_6cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 6; }
def N2Write_2cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 2; }
@@ -102,7 +102,7 @@ def N2Write_2cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 2; }
def N2Write_3cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 3; }
def N2Write_4cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 4; }
def N2Write_7cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 7;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def N2Write_9cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 9; }
def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 10; }
def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 12; }
@@ -188,25 +188,25 @@ def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [5, 5];
+ let ReleaseAtCycles = [5, 5];
}
def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
let Latency = 13;
let NumMicroOps = 2;
- let ResourceCycles = [6, 7];
+ let ReleaseAtCycles = [6, 7];
}
def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
let Latency = 15;
let NumMicroOps = 2;
- let ResourceCycles = [7, 8];
+ let ReleaseAtCycles = [7, 8];
}
def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
let Latency = 16;
let NumMicroOps = 2;
- let ResourceCycles = [8, 8];
+ let ReleaseAtCycles = [8, 8];
}
def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
@@ -742,7 +742,7 @@ def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>;
// Load pair, signed immed offset, signed words
def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>;
// Load pair, immed post-index or immed pre-index, signed words
-def : InstRW<[N2Write_5cyc_1M0, WriteLDHi, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_5cyc_1M0, WriteLDHi],
(instregex "^LDPSW(post|pre)$")>;
// Store instructions
@@ -860,7 +860,7 @@ def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$",
// Load vector reg, immed post-index
def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
// Load vector reg, immed pre-index
-def : InstRW<[N2Write_6cyc_1I_1L, WriteAdr], (instregex "^LDR[BHSDQ]pre$")>;
+def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L], (instregex "^LDR[BHSDQ]pre$")>;
// Load vector reg, unsigned immed
def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
@@ -883,12 +883,12 @@ def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
// Load vector pair, immed post-index, S/D-form
// Load vector pair, immed pre-index, S/D-form
-def : InstRW<[N2Write_6cyc_1I_1L, WriteLDHi, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L, WriteLDHi],
(instregex "^LDP[SD](pre|post)$")>;
// Load vector pair, immed post-index, Q-form
// Load vector pair, immed pre-index, Q-form
-def : InstRW<[N2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost,
+def : InstRW<[WriteAdr, N2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
LDPQpre)>;
// FP store instructions
@@ -1238,223 +1238,223 @@ def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
// ASIMD load, 1 element, multiple, 1 reg, D-form
def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_6cyc_1L, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_6cyc_1L],
(instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 1 reg, Q-form
def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_6cyc_1L, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_6cyc_1L],
(instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, D-form
def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_6cyc_2L, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_6cyc_2L],
(instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, Q-form
def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_6cyc_2L, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_6cyc_2L],
(instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, D-form
def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_6cyc_3L, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_6cyc_3L],
(instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, Q-form
def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_6cyc_3L, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_6cyc_3L],
(instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_7cyc_4L, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_7cyc_4L],
(instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, Q-form
def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_7cyc_4L, WriteAdr],
+def : InstRW<[WriteAdr, N2Write_7cyc_4L],
(instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane, B/H/S
// ASIMD load, 1 element, one lane, D
def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>;
-def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
// ASIMD load, 1 element, all lanes, D-form, B/H/S
// ASIMD load, 1 element, all lanes, D-form, D
def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, all lanes, Q-form
def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form, B/H/S
def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
// ASIMD load, 2 element, multiple, Q-form, B/H/S
// ASIMD load, 2 element, multiple, Q-form, D
def : InstRW<[N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane, B/H
// ASIMD load, 2 element, one lane, S
// ASIMD load, 2 element, one lane, D
def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>;
-def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
// ASIMD load, 2 element, all lanes, D-form, B/H/S
// ASIMD load, 2 element, all lanes, D-form, D
def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 2 element, all lanes, Q-form
def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, multiple, D-form, B/H/S
def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
// ASIMD load, 3 element, multiple, Q-form, B/H/S
def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)$")>;
-def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
// ASIMD load, 3 element, multiple, Q-form, D
def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)$")>;
-def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)_POST$")>;
// ASIMD load, 3 element, one lane, B/H
// ASIMD load, 3 element, one lane, S
// ASIMD load, 3 element, one lane, D
def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>;
-def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
// ASIMD load, 3 element, all lanes, D-form, B/H/S
// ASIMD load, 3 element, all lanes, D-form, D
def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 3 element, all lanes, Q-form, B/H/S
// ASIMD load, 3 element, all lanes, Q-form, D
def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form, B/H/S
def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
// ASIMD load, 4 element, multiple, Q-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, D
def : InstRW<[N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_9cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, one lane, B/H
// ASIMD load, 4 element, one lane, S
// ASIMD load, 4 element, one lane, D
def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>;
-def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
// ASIMD load, 4 element, all lanes, D-form, B/H/S
// ASIMD load, 4 element, all lanes, D-form, D
def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 4 element, all lanes, Q-form, B/H/S
// ASIMD load, 4 element, all lanes, Q-form, D
def : InstRW<[N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
// ASIMD store instructions
// -----------------------------------------------------------------------------
// ASIMD store, 1 element, multiple, 1 reg, D-form
def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 1 reg, Q-form
def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, D-form
def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, Q-form
def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, D-form
def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, Q-form
def : InstRW<[N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_2cyc_3L01_3V, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, D-form
def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, Q-form
def : InstRW<[N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_2cyc_4L01_4V, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane, B/H/S
// ASIMD store, 1 element, one lane, D
def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)$")>;
-def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)_POST$")>;
// ASIMD store, 2 element, multiple, D-form, B/H/S
def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
// ASIMD store, 2 element, multiple, Q-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, D
def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_4cyc_2L01_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 2 element, one lane, B/H/S
// ASIMD store, 2 element, one lane, D
def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)$")>;
-def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)_POST$")>;
// ASIMD store, 3 element, multiple, D-form, B/H/S
def : InstRW<[N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)$")>;
-def : InstRW<[N2Write_5cyc_2L01_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
// ASIMD store, 3 element, multiple, Q-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, D
def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 3 element, one lane, B/H
// ASIMD store, 3 element, one lane, S
// ASIMD store, 3 element, one lane, D
def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)$")>;
-def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)_POST$")>;
// ASIMD store, 4 element, multiple, D-form, B/H/S
def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)$")>;
-def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, B/H/S
def : InstRW<[N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)$")>;
-def : InstRW<[N2Write_7cyc_6L01_6V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, D
def : InstRW<[N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)$")>;
-def : InstRW<[N2Write_5cyc_4L01_4V, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)_POST$")>;
// ASIMD store, 4 element, one lane, B/H/S
def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)$")>;
-def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)_POST$")>;
// ASIMD store, 4 element, one lane, D
def : InstRW<[N2Write_4cyc_3L01_3V], (instregex "ST4i(64)$")>;
-def : InstRW<[N2Write_4cyc_3L01_3V, WriteAdr], (instregex "ST4i(64)_POST$")>;
+def : InstRW<[WriteAdr, N2Write_4cyc_3L01_3V], (instregex "ST4i(64)_POST$")>;
// Cryptography extensions
// -----------------------------------------------------------------------------
@@ -2082,10 +2082,10 @@ def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>;
def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
// Contiguous load, scalar + imm
-def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$",
- "^LD1S?B_[HSD]_IMM_REAL$",
- "^LD1S?H_[SD]_IMM_REAL$",
- "^LD1S?W_D_IMM_REAL$" )>;
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
+ "^LD1S?B_[HSD]_IMM$",
+ "^LD1S?H_[SD]_IMM$",
+ "^LD1S?W_D_IMM$" )>;
// Contiguous load, scalar + scalar
def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$",
"^LD1S?B_[HSD]$",
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 571f290bbf83..726be1a547b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -111,9 +111,9 @@ def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; }
def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; }
def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; }
def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
@@ -123,17 +123,17 @@ def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; }
def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; }
def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13;
- let ResourceCycles = [10]; }
+ let ReleaseAtCycles = [10]; }
def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
@@ -141,17 +141,17 @@ def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
- let ResourceCycles = [5]; }
+ let ReleaseAtCycles = [5]; }
def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
- let ResourceCycles = [11]; }
+ let ReleaseAtCycles = [11]; }
def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; }
def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; }
def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; }
@@ -624,14 +624,14 @@ def : SchedAlias<WriteAdr, V1Write_1c_1I>;
// Load pair, immed offset
def : SchedAlias<WriteLDHi, V1Write_4c_1L>;
def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
-def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_4c_1L, V1Write_0c_0Z],
(instrs LDPWpost, LDPWpre)>;
// Load pair, signed immed offset, signed words
def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>;
// Load pair, immed post or pre-index, signed words
-def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z],
(instrs LDPSWpost, LDPSWpre)>;
@@ -735,7 +735,7 @@ def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
// Load vector reg, immed post-index
// Load vector reg, immed pre-index
-def : InstRW<[V1Write_6c_1L, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_6c_1L],
(instregex "^LDR[BHSDQ](post|pre)$")>;
// Load vector reg, register offset, basic
@@ -756,12 +756,12 @@ def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
// Load vector pair, immed post-index, S/D-form
// Load vector pair, immed pre-index, S/D-form
-def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_6c_1L, V1Write_0c_0Z],
(instregex "^LDP[SD](pre|post)$")>;
// Load vector pair, immed post-index, Q-form
// Load vector pair, immed pre-index, Q-form
-def : InstRW<[V1Write_6c_1L, WriteLDHi, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_6c_1L, WriteLDHi],
(instrs LDPQpost, LDPQpre)>;
@@ -773,7 +773,7 @@ def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
// Store vector reg, immed post-index, B/H/S/D/Q-form
// Store vector reg, immed pre-index, B/H/S/D/Q-form
-def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
(instregex "^STR[BHSDQ](pre|post)$")>;
// Store vector reg, unsigned immed, B/H/S/D/Q-form
@@ -798,12 +798,12 @@ def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>;
// Store vector pair, immed post-index, S/D-form
// Store vector pair, immed pre-index, S/D-form
-def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
(instregex "^STP[SD](pre|post)$")>;
// Store vector pair, immed post-index, Q-form
// Store vector pair, immed pre-index, Q-form
-def : InstRW<[V1Write_2c_2L01_1V01, WriteAdr], (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, V1Write_2c_2L01_1V01], (instrs STPQpre, STPQpost)>;
// ASIMD integer instructions
@@ -1074,31 +1074,31 @@ def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
// ASIMD load, 1 element, multiple, 1 reg
def : InstRW<[V1Write_6c_1L],
(instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[V1Write_6c_1L, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_6c_1L],
(instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg
def : InstRW<[V1Write_6c_2L],
(instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[V1Write_6c_2L, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_6c_2L],
(instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg
def : InstRW<[V1Write_6c_3L],
(instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[V1Write_6c_3L, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_6c_3L],
(instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
def : InstRW<[V1Write_6c_2L],
(instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[V1Write_6c_2L, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_6c_2L],
(instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, Q-form
def : InstRW<[V1Write_7c_4L],
(instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[V1Write_7c_4L, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_7c_4L],
(instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane
@@ -1106,20 +1106,20 @@ def : InstRW<[V1Write_7c_4L, WriteAdr],
def : InstRW<[V1Write_8c_1L_1V],
(instregex "^LD1(i|Rv)(8|16|32|64)$",
"^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[V1Write_8c_1L_1V, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_8c_1L_1V],
(instregex "^LD1i(8|16|32|64)_POST$",
"^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form
def : InstRW<[V1Write_8c_1L_2V],
(instregex "^LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[V1Write_8c_1L_2V, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
(instregex "^LD2Twov(8b|4h|2s)_POST$")>;
// ASIMD load, 2 element, multiple, Q-form
def : InstRW<[V1Write_8c_2L_2V],
(instregex "^LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[V1Write_8c_2L_2V, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_8c_2L_2V],
(instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane
@@ -1127,7 +1127,7 @@ def : InstRW<[V1Write_8c_2L_2V, WriteAdr],
def : InstRW<[V1Write_8c_1L_2V],
(instregex "^LD2i(8|16|32|64)$",
"^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[V1Write_8c_1L_2V, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
(instregex "^LD2i(8|16|32|64)_POST$",
"^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
@@ -1138,7 +1138,7 @@ def : InstRW<[V1Write_8c_2L_3V],
(instregex "^LD3Threev(8b|4h|2s)$",
"^LD3i(8|16|32|64)$",
"^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[V1Write_8c_2L_3V, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_8c_2L_3V],
(instregex "^LD3Threev(8b|4h|2s)_POST$",
"^LD3i(8|16|32|64)_POST$",
"^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
@@ -1146,7 +1146,7 @@ def : InstRW<[V1Write_8c_2L_3V, WriteAdr],
// ASIMD load, 3 element, multiple, Q-form
def : InstRW<[V1Write_8c_3L_3V],
(instregex "^LD3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[V1Write_8c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_8c_3L_3V],
(instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form
@@ -1156,7 +1156,7 @@ def : InstRW<[V1Write_8c_3L_4V],
(instregex "^LD4Fourv(8b|4h|2s)$",
"^LD4i(8|16|32|64)$",
"^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[V1Write_8c_3L_4V, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_8c_3L_4V],
(instregex "^LD4Fourv(8b|4h|2s)_POST$",
"^LD4i(8|16|32|64)_POST$",
"^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
@@ -1164,7 +1164,7 @@ def : InstRW<[V1Write_8c_3L_4V, WriteAdr],
// ASIMD load, 4 element, multiple, Q-form
def : InstRW<[V1Write_9c_4L_4V],
(instregex "^LD4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[V1Write_9c_4L_4V, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_9c_4L_4V],
(instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>;
@@ -1176,7 +1176,7 @@ def : InstRW<[V1Write_9c_4L_4V, WriteAdr],
def : InstRW<[V1Write_2c_1L01_1V01],
(instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$",
"^ST1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
(instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$",
"^ST1Twov(8b|4h|2s|1d)_POST$")>;
@@ -1187,7 +1187,7 @@ def : InstRW<[V1Write_2c_2L01_2V01],
(instregex "^ST1Twov(16b|8h|4s|2d)$",
"^ST1Threev(8b|4h|2s|1d)$",
"^ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[V1Write_2c_2L01_2V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_2c_2L01_2V01],
(instregex "^ST1Twov(16b|8h|4s|2d)_POST$",
"^ST1Threev(8b|4h|2s|1d)_POST$",
"^ST1Fourv(8b|4h|2s|1d)_POST$")>;
@@ -1195,13 +1195,13 @@ def : InstRW<[V1Write_2c_2L01_2V01, WriteAdr],
// ASIMD store, 1 element, multiple, 3 reg, Q-form
def : InstRW<[V1Write_2c_3L01_3V01],
(instregex "^ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[V1Write_2c_3L01_3V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_2c_3L01_3V01],
(instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, Q-form
def : InstRW<[V1Write_2c_4L01_4V01],
(instregex "^ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[V1Write_2c_4L01_4V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_2c_4L01_4V01],
(instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane
@@ -1211,7 +1211,7 @@ def : InstRW<[V1Write_4c_1L01_1V01],
(instregex "^ST1i(8|16|32|64)$",
"^ST2Twov(8b|4h|2s)$",
"^ST2i(8|16|32|64)$")>;
-def : InstRW<[V1Write_4c_1L01_1V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_4c_1L01_1V01],
(instregex "^ST1i(8|16|32|64)_POST$",
"^ST2Twov(8b|4h|2s)_POST$",
"^ST2i(8|16|32|64)_POST$")>;
@@ -1225,7 +1225,7 @@ def : InstRW<[V1Write_4c_2L01_2V01],
"^ST3Threev(8b|4h|2s)$",
"^ST3i(8|16|32|64)$",
"^ST4i64$")>;
-def : InstRW<[V1Write_4c_2L01_2V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_4c_2L01_2V01],
(instregex "^ST2Twov(16b|8h|4s|2d)_POST$",
"^ST3Threev(8b|4h|2s)_POST$",
"^ST3i(8|16|32|64)_POST$",
@@ -1234,31 +1234,31 @@ def : InstRW<[V1Write_4c_2L01_2V01, WriteAdr],
// ASIMD store, 3 element, multiple, Q-form
def : InstRW<[V1Write_5c_3L01_3V01],
(instregex "^ST3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[V1Write_5c_3L01_3V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_5c_3L01_3V01],
(instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 4 element, multiple, D-form
def : InstRW<[V1Write_6c_3L01_3V01],
(instregex "^ST4Fourv(8b|4h|2s)$")>;
-def : InstRW<[V1Write_6c_3L01_3V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_6c_3L01_3V01],
(instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, B/H/S
def : InstRW<[V1Write_7c_6L01_6V01],
(instregex "^ST4Fourv(16b|8h|4s)$")>;
-def : InstRW<[V1Write_7c_6L01_6V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_7c_6L01_6V01],
(instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, D
def : InstRW<[V1Write_4c_4L01_4V01],
(instrs ST4Fourv2d)>;
-def : InstRW<[V1Write_4c_4L01_4V01, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_4c_4L01_4V01],
(instrs ST4Fourv2d_POST)>;
// ASIMD store, 4 element, one lane, B/H/S
def : InstRW<[V1Write_6c_3L_3V],
(instregex "^ST4i(8|16|32)$")>;
-def : InstRW<[V1Write_6c_3L_3V, WriteAdr],
+def : InstRW<[WriteAdr, V1Write_6c_3L_3V],
(instregex "^ST4i(8|16|32)_POST$")>;
@@ -1687,10 +1687,10 @@ def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>;
// Contiguous load, scalar + scalar
// Contiguous load broadcast, scalar + imm
// Contiguous load broadcast, scalar + scalar
-def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM_REAL$",
- "^LD1S?B_[HSD]_IMM_REAL$",
- "^LD1S?H_[SD]_IMM_REAL$",
- "^LD1S?W_D_IMM_REAL$",
+def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM$",
+ "^LD1S?B_[HSD]_IMM$",
+ "^LD1S?H_[SD]_IMM$",
+ "^LD1S?W_D_IMM$",
"^LD1[BWD]$",
"^LD1S?B_[HSD]$",
"^LD1S?W_D$",
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 199ebc6ac650..3367d5d0cd31 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -88,6 +88,10 @@ def : WriteRes<WriteLDHi, []> { let Latency = 4; }
// Define customized scheduler read/write types specific to the Neoverse V2.
//===----------------------------------------------------------------------===//
+
+// Define generic 0 micro-op types
+def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+
// Define generic 1 micro-op types
def V2Write_1cyc_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; }
@@ -102,9 +106,9 @@ def V2Write_2cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
def V2Write_3cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 3; }
def V2Write_5cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 5; }
def V2Write_12cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 12;
- let ResourceCycles = [12]; }
+ let ReleaseAtCycles = [12]; }
def V2Write_20cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 20;
- let ResourceCycles = [20]; }
+ let ReleaseAtCycles = [20]; }
def V2Write_4cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 4; }
def V2Write_6cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 6; }
def V2Write_2cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 2; }
@@ -113,7 +117,7 @@ def V2Write_2cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 2; }
def V2Write_2cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 2; }
def V2Write_3cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
def V2Write_3cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 3;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def V2Write_3cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 3; }
def V2Write_4cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
def V2Write_5cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
@@ -124,26 +128,26 @@ def V2Write_3cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 3; }
def V2Write_4cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 4; }
def V2Write_4cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
def V2Write_7cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 7;
- let ResourceCycles = [7]; }
+ let ReleaseAtCycles = [7]; }
def V2Write_7cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 7;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def V2Write_9cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 9; }
def V2Write_9cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 9;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def V2Write_10cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 10; }
def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10;
- let ResourceCycles = [2]; }
+ let ReleaseAtCycles = [2]; }
def V2Write_12cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 12;
- let ResourceCycles = [11]; }
+ let ReleaseAtCycles = [11]; }
def V2Write_13cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 13; }
def V2Write_15cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 15; }
def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15;
- let ResourceCycles = [8]; }
+ let ReleaseAtCycles = [8]; }
def V2Write_16cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 16; }
def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16;
- let ResourceCycles = [8]; }
+ let ReleaseAtCycles = [8]; }
def V2Write_20cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 20;
- let ResourceCycles = [20]; }
+ let ReleaseAtCycles = [20]; }
def V2Write_2cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 2; }
def V2Write_2cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
def V2Write_3cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 3; }
@@ -900,6 +904,18 @@ def V2Write_StrHQ : SchedWriteVariant<[
SchedVar<NeoverseHQForm, [V2Write_2cyc_1L01_1V01_1I]>,
SchedVar<NoSchedPred, [V2Write_2cyc_1L01_1V01]>]>;
+def V2Write_0or1cyc_1I : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
+ SchedVar<NoSchedPred, [V2Write_1cyc_1I]>]>;
+
+def V2Write_0or2cyc_1V : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1V]>]>;
+
+def V2Write_0or3cyc_1M0 : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
+ SchedVar<NoSchedPred, [V2Write_3cyc_1M0]>]>;
+
def V2Write_2or3cyc_1M : SchedWriteVariant<[
SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M]>,
SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
@@ -1054,19 +1070,19 @@ def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>;
//===----------------------------------------------------------------------===//
// Define types with long resource cycles (rc)
-def V2Write_6cyc_1V1_5rc : SchedWriteRes<[V2UnitV1]> { let Latency = 6; let ResourceCycles = [ 5]; }
-def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let ResourceCycles = [ 7]; }
-def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [ 5]; }
-def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [ 9]; }
-def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [10]; }
-def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ResourceCycles = [ 9]; }
-def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ResourceCycles = [ 9]; }
-def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ResourceCycles = [12]; }
-def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ResourceCycles = [12]; }
-def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ResourceCycles = [13]; }
-def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ResourceCycles = [14]; }
-def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ResourceCycles = [15]; }
-def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ResourceCycles = [14]; }
+def V2Write_6cyc_1V1_5rc : SchedWriteRes<[V2UnitV1]> { let Latency = 6; let ReleaseAtCycles = [ 5]; }
+def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let ReleaseAtCycles = [ 7]; }
+def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; }
+def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
+def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; }
+def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
+def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
+def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ReleaseAtCycles = [12]; }
+def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; }
+def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; }
+def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; }
+def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; }
+def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ReleaseAtCycles = [14]; }
// Miscellaneous
// -----------------------------------------------------------------------------
@@ -1092,9 +1108,9 @@ def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>;
// ALU, basic
// ALU, basic, flagset
-def : SchedAlias<WriteI, V2Write_1cyc_1I>;
-def : InstRW<[V2Write_1cyc_1F],
- (instregex "^(ADC|SBC)S[WX]r$")>;
+def : SchedAlias<WriteI, V2Write_1cyc_1I>;
+def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADC|SBC)S[WX]r$")>;
+def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>;
// ALU, extend and shift
def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>;
@@ -1122,8 +1138,8 @@ def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>;
def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
// Logical, shift, no flagset
-def : InstRW<[V2Write_1cyc_1I],
- (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+def : InstRW<[V2Write_1cyc_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
+def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>;
// Logical, shift, flagset
def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
@@ -1213,7 +1229,7 @@ def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>;
// Load pair, immed post-index or immed pre-index, signed words
-def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_5cyc_1I_3L, WriteLDHi],
(instregex "^LDPSW(post|pre)$")>;
// Store instructions
@@ -1224,7 +1240,7 @@ def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi, WriteAdr],
def : SchedAlias<WriteST, V2Write_1cyc_1L01_1D>;
def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>;
def : SchedAlias<WriteSTP, V2Write_1cyc_1L01_1D>;
-def : SchedAlias<WriteAdr, V2Write_1cyc_1I>; // copied from A57.
+def : SchedAlias<WriteAdr, V2Write_1cyc_1I>;
// Tag load instructions
// -----------------------------------------------------------------------------
@@ -1318,7 +1334,8 @@ def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
def : SchedAlias<WriteFImm, V2Write_2cyc_1V>;
// FP transfer, from gen to low half of vec reg
-def : InstRW<[V2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+def : InstRW<[V2Write_0or3cyc_1M0],
+ (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
// FP transfer, from gen to high half of vec reg
def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
@@ -1337,7 +1354,7 @@ def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>;
// Load vector reg, immed post-index
// Load vector reg, immed pre-index
-def : InstRW<[V2Write_6cyc_1I_1L, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L],
(instregex "^LDR[BHSDQ](pre|post)$")>;
// Load vector reg, unsigned immed
@@ -1359,12 +1376,12 @@ def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
// Load vector pair, immed post-index, S/D-form
// Load vector pair, immed pre-index, S/D-form
-def : InstRW<[V2Write_6cyc_1I_1L, WriteLDHi, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L, WriteLDHi],
(instregex "^LDP[SD](pre|post)$")>;
// Load vector pair, immed post-index, Q-form
// Load vector pair, immed pre-index, Q-form
-def : InstRW<[V2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost,
+def : InstRW<[WriteAdr, V2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
LDPQpre)>;
// FP store instructions
@@ -1662,6 +1679,7 @@ def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>;
// ASIMD transpose
// ASIMD unzip/zip
// Handled by SchedAlias<WriteV[dq], ...>
+def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>;
// ASIMD duplicate, gen reg
def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
@@ -1725,220 +1743,220 @@ def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
// ASIMD load, 1 element, multiple, 1 reg, D-form
def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_6cyc_1L, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_6cyc_1L],
(instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 1 reg, Q-form
def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_6cyc_1L, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_6cyc_1L],
(instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, D-form
def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_6cyc_2L, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_6cyc_2L],
(instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, Q-form
def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_6cyc_2L, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_6cyc_2L],
(instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, D-form
def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_6cyc_3L, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_6cyc_3L],
(instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, Q-form
def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_6cyc_3L, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_6cyc_3L],
(instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_7cyc_4L, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_7cyc_4L],
(instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, Q-form
def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_7cyc_4L, WriteAdr],
+def : InstRW<[WriteAdr, V2Write_7cyc_4L],
(instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane, B/H/S
// ASIMD load, 1 element, one lane, D
def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>;
-def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
// ASIMD load, 1 element, all lanes, D-form, B/H/S
// ASIMD load, 1 element, all lanes, D-form, D
def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, all lanes, Q-form
def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form, B/H/S
def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
// ASIMD load, 2 element, multiple, Q-form, B/H/S
// ASIMD load, 2 element, multiple, Q-form, D
def : InstRW<[V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane, B/H
// ASIMD load, 2 element, one lane, S
// ASIMD load, 2 element, one lane, D
def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>;
-def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
// ASIMD load, 2 element, all lanes, D-form, B/H/S
// ASIMD load, 2 element, all lanes, D-form, D
def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 2 element, all lanes, Q-form
def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, multiple, D-form, B/H/S
def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
// ASIMD load, 3 element, multiple, Q-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, D
def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, one lane, B/H
// ASIMD load, 3 element, one lane, S
// ASIMD load, 3 element, one lane, D
def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>;
-def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
// ASIMD load, 3 element, all lanes, D-form, B/H/S
// ASIMD load, 3 element, all lanes, D-form, D
def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 3 element, all lanes, Q-form, B/H/S
// ASIMD load, 3 element, all lanes, Q-form, D
def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form, B/H/S
def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
// ASIMD load, 4 element, multiple, Q-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, D
def : InstRW<[V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_9cyc_6L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, one lane, B/H
// ASIMD load, 4 element, one lane, S
// ASIMD load, 4 element, one lane, D
def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>;
-def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
// ASIMD load, 4 element, all lanes, D-form, B/H/S
// ASIMD load, 4 element, all lanes, D-form, D
-def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 4 element, all lanes, Q-form, B/H/S
// ASIMD load, 4 element, all lanes, Q-form, D
-def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
// ASIMD store instructions
// -----------------------------------------------------------------------------
// ASIMD store, 1 element, multiple, 1 reg, D-form
def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 1 reg, Q-form
def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, D-form
def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, Q-form
def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, D-form
def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, Q-form
def : InstRW<[V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_2cyc_3L01_3V01, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, D-form
def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, Q-form
def : InstRW<[V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_2cyc_4L01_4V01, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane, B/H/S
// ASIMD store, 1 element, one lane, D
def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)$")>;
-def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>;
// ASIMD store, 2 element, multiple, D-form, B/H/S
def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
// ASIMD store, 2 element, multiple, Q-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, D
def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 2 element, one lane, B/H/S
// ASIMD store, 2 element, one lane, D
def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)$")>;
-def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>;
// ASIMD store, 3 element, multiple, D-form, B/H/S
def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)$")>;
-def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
// ASIMD store, 3 element, multiple, Q-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, D
def : InstRW<[V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[V2Write_6cyc_3L01_6V01, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 3 element, one lane, B/H
// ASIMD store, 3 element, one lane, S
// ASIMD store, 3 element, one lane, D
def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)$")>;
-def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>;
// ASIMD store, 4 element, multiple, D-form, B/H/S
def : InstRW<[V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)$")>;
-def : InstRW<[V2Write_6cyc_2L01_6V01, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, B/H/S
def : InstRW<[V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)$")>;
-def : InstRW<[V2Write_7cyc_4L01_12V01, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, D
def : InstRW<[V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)$")>;
-def : InstRW<[V2Write_5cyc_4L01_8V01, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>;
// ASIMD store, 4 element, one lane, B/H/S
def : InstRW<[V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)$")>;
-def : InstRW<[V2Write_6cyc_1L01_3V01, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>;
// ASIMD store, 4 element, one lane, D
def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)$")>;
-def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST4i(64)_POST$")>;
+def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)_POST$")>;
// Cryptography extensions
// -----------------------------------------------------------------------------
@@ -2590,10 +2608,10 @@ def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>;
def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
// Contiguous load, scalar + imm
-def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$",
- "^LD1S?B_[HSD]_IMM_REAL$",
- "^LD1S?H_[SD]_IMM_REAL$",
- "^LD1S?W_D_IMM_REAL$" )>;
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
+ "^LD1S?B_[HSD]_IMM$",
+ "^LD1S?H_[SD]_IMM$",
+ "^LD1S?W_D_IMM$" )>;
// Contiguous load, scalar + scalar
def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$",
"^LD1S?B_[HSD]$",
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
index a4c6cd4b978f..97abec10f794 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
@@ -54,3 +54,30 @@ def NeoverseMULIdiomPred : MCSchedPredicate< // <op> Rd, Rs, Rv, ZR
SMADDLrrr, UMADDLrrr,
SMSUBLrrr, UMSUBLrrr]>,
CheckIsReg3Zero]>>;
+
+def NeoverseZeroMove : MCSchedPredicate<
+ CheckAny<[
+ // MOV Wd, #0
+ // MOV Xd, #0
+ CheckAll<[CheckOpcode<[MOVZWi, MOVZXi]>,
+ CheckAll<[CheckImmOperand<1, 0>,
+ CheckImmOperand<2, 0>]>]>,
+ // MOV Wd, WZR
+ // MOV Xd, XZR
+ // MOV Wd, Wn
+ // MOV Xd, Xn
+ CheckAll<[CheckOpcode<[ORRWrs, ORRXrs]>,
+ CheckAll<[CheckIsReg1Zero,
+ CheckImmOperand<3, 0>]>]>,
+ // FMOV Hd, WZR
+ // FMOV Hd, XZR
+ // FMOV Sd, WZR
+ // FMOV Dd, XZR
+ CheckAll<[CheckOpcode<[FMOVWHr, FMOVXHr,
+ FMOVWSr, FMOVXDr]>,
+ CheckIsReg1Zero]>,
+ // MOVI Dd, #0
+ // MOVI Vd.2D, #0
+ CheckAll<[CheckOpcode<[MOVID, MOVIv2d_ns]>,
+ CheckImmOperand<1, 0>]>
+ ]>>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
index 542d9afb5975..9e5060f1f364 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -66,9 +66,9 @@ def : WriteRes<WriteIS, [TSV110UnitALUAB]> { let Latency = 1; }
// Integer Mul/MAC/Div
def : WriteRes<WriteID32, [TSV110UnitMDU]> { let Latency = 12;
- let ResourceCycles = [12]; }
+ let ReleaseAtCycles = [12]; }
def : WriteRes<WriteID64, [TSV110UnitMDU]> { let Latency = 20;
- let ResourceCycles = [20]; }
+ let ReleaseAtCycles = [20]; }
def : WriteRes<WriteIM32, [TSV110UnitMDU]> { let Latency = 3; }
def : WriteRes<WriteIM64, [TSV110UnitMDU]> { let Latency = 4; }
@@ -94,7 +94,7 @@ def : WriteRes<WriteFImm, [TSV110UnitF]> { let Latency = 2; }
def : WriteRes<WriteFMul, [TSV110UnitF]> { let Latency = 5; }
// FP Div, Sqrt
-def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; let ResourceCycles = [18]; }
+def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; let ReleaseAtCycles = [18]; }
def : WriteRes<WriteVd, [TSV110UnitF]> { let Latency = 4; }
def : WriteRes<WriteVq, [TSV110UnitF]> { let Latency = 4; }
@@ -146,7 +146,7 @@ def TSV110Wr_1cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 1; }
def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; }
def TSV110Wr_1cyc_1LdSt : SchedWriteRes<[TSV110UnitLd0St]> { let Latency = 1; }
-def TSV110Wr_2cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 2; let ResourceCycles = [2]; }
+def TSV110Wr_2cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 2; let ReleaseAtCycles = [2]; }
def TSV110Wr_2cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 2; }
def TSV110Wr_2cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 2; }
def TSV110Wr_2cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 2; }
@@ -172,25 +172,25 @@ def TSV110Wr_7cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 7; }
def TSV110Wr_8cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 8; }
-def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 11; let ResourceCycles = [11]; }
+def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 11; let ReleaseAtCycles = [11]; }
-def TSV110Wr_12cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 12; let ResourceCycles = [12]; }
+def TSV110Wr_12cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 12; let ReleaseAtCycles = [12]; }
-def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 17; let ResourceCycles = [17]; }
+def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 17; let ReleaseAtCycles = [17]; }
-def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 18; let ResourceCycles = [18]; }
+def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 18; let ReleaseAtCycles = [18]; }
-def TSV110Wr_20cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 20; let ResourceCycles = [20]; }
+def TSV110Wr_20cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 20; let ReleaseAtCycles = [20]; }
-def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 24; let ResourceCycles = [24]; }
+def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 24; let ReleaseAtCycles = [24]; }
-def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 31; let ResourceCycles = [31]; }
+def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 31; let ReleaseAtCycles = [31]; }
-def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 36; let ResourceCycles = [36]; }
+def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 36; let ReleaseAtCycles = [36]; }
-def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 38; let ResourceCycles = [38]; }
+def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 38; let ReleaseAtCycles = [38]; }
-def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 64; let ResourceCycles = [64]; }
+def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 64; let ReleaseAtCycles = [64]; }
//===----------------------------------------------------------------------===//
// Define Generic 2 micro-op types
@@ -443,8 +443,8 @@ def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs LDRSWl)>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(BB|HH|W|X)ui$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTR(B|H|W|X)i$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDUR(BB|HH|W|X)i$")>;
@@ -453,11 +453,11 @@ def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDP(W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi],(instregex "^LDP(W|X)(post|pre)$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWi)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWpost)>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWpre)>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFMl)>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFUMi)>;
@@ -469,13 +469,13 @@ def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMro(W|X)$")>;
// -----------------------------------------------------------------------------
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STN?P(W|X)i$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_1cyc_1LdSt], (instregex "^STP(W|X)(post|pre)$")>;
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR(BB|HH|W|X)i$")>;
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STTR(B|H|W|X)i$")>;
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)ui$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
// FP Data Processing Instructions
@@ -524,11 +524,11 @@ def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOV[SD][ir]$")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[DSQ]l")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDUR[BDHSQ]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[BDHSQ]ui")>;
def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDN?P[DQS]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDP[DQS](post|pre)")>;
// FP Store Instructions
@@ -539,7 +539,7 @@ def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR[BHSDQ]ui")>;
def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
def : InstRW<[TSV110Wr_2cyc_2LdSt], (instregex "^STN?P[SDQ]i")>;
-def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr], (instregex "^STP[SDQ](post|pre)")>;
+def : InstRW<[WriteAdr, TSV110Wr_2cyc_2LdSt], (instregex "^STP[SDQ](post|pre)")>;
// ASIMD Integer Instructions
@@ -704,70 +704,70 @@ def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
// -----------------------------------------------------------------------------
def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_1F_1LdSt], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_2LdSt], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_6cyc_3LdSt], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_3LdSt], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_6cyc_2LdSt], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_2LdSt], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD Store Instructions
// -----------------------------------------------------------------------------
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "ST1i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_3cyc_1F], (instregex "ST1i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "ST2i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1F], (instregex "ST2i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "ST3i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1F], (instregex "ST3i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_6cyc_1F], (instregex "ST4i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_1F], (instregex "ST4i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_3cyc_1F], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1F], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1F], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_6cyc_1F], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_1F], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1F], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1F], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_1F], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_1F], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
} // SchedModel = TSV110Model
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
index 8b380ae0e8f3..e1536f208e44 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -59,23 +59,23 @@ def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; }
// MAC
def : WriteRes<WriteIM32, [THXT8XUnitMAC]> {
let Latency = 4;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : WriteRes<WriteIM64, [THXT8XUnitMAC]> {
let Latency = 4;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
// Div
def : WriteRes<WriteID32, [THXT8XUnitDiv]> {
let Latency = 12;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
}
def : WriteRes<WriteID64, [THXT8XUnitDiv]> {
let Latency = 14;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
// Load
@@ -86,32 +86,32 @@ def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; }
// Vector Load
def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> {
let Latency = 8;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> {
let Latency = 6;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> {
let Latency = 11;
- let ResourceCycles = [7];
+ let ReleaseAtCycles = [7];
}
def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> {
let Latency = 12;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> {
let Latency = 13;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> {
let Latency = 13;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
// Pre/Post Indexing
@@ -129,12 +129,12 @@ def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>;
def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> {
let Latency = 10;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> {
let Latency = 11;
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
}
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
@@ -162,29 +162,29 @@ def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> {
let Latency = 22;
- let ResourceCycles = [19];
+ let ReleaseAtCycles = [19];
}
def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; }
def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
let Latency = 12;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
let Latency = 22;
- let ResourceCycles = [19];
+ let ReleaseAtCycles = [19];
}
def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
let Latency = 17;
- let ResourceCycles = [14];
+ let ReleaseAtCycles = [14];
}
def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
let Latency = 31;
- let ResourceCycles = [28];
+ let ReleaseAtCycles = [28];
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index cdafa33da054..89faa92155e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -113,14 +113,14 @@ def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
// 23 cycles on I1.
def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
let Latency = 23;
- let ResourceCycles = [13, 23];
+ let ReleaseAtCycles = [13, 23];
let NumMicroOps = 4;
}
// 39 cycles on I1.
def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
let Latency = 39;
- let ResourceCycles = [13, 39];
+ let ReleaseAtCycles = [13, 39];
let NumMicroOps = 4;
}
@@ -200,14 +200,14 @@ def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
let NumMicroOps = 3;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
// 23 cycles on F0 or F1.
def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 23;
let NumMicroOps = 3;
- let ResourceCycles = [11];
+ let ReleaseAtCycles = [11];
}
// 1 cycles on LS0 or LS1.
@@ -418,7 +418,7 @@ def : InstRW<[THX2T99Write_1Cyc_I2],
// Address generation
def : WriteRes<WriteI, [THX2T99I012]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
@@ -440,7 +440,7 @@ def : InstRW<[WriteI], (instrs COPY)>;
// ALU, extend and/or shift
def : WriteRes<WriteISReg, [THX2T99I012]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
@@ -459,7 +459,7 @@ def : InstRW<[WriteISReg],
def : WriteRes<WriteIEReg, [THX2T99I012]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
@@ -502,14 +502,14 @@ def : WriteRes<WriteIS, [THX2T99I012]> {
// Latency range of 13-23/13-39.
def : WriteRes<WriteID32, [THX2T99I1]> {
let Latency = 39;
- let ResourceCycles = [39];
+ let ReleaseAtCycles = [39];
let NumMicroOps = 4;
}
// Divide, X-form
def : WriteRes<WriteID64, [THX2T99I1]> {
let Latency = 23;
- let ResourceCycles = [23];
+ let ReleaseAtCycles = [23];
let NumMicroOps = 4;
}
@@ -1110,36 +1110,36 @@ def : WriteRes<WriteFCmp, [THX2T99F01]> {
// FP Mul, Div, Sqrt
def : WriteRes<WriteFDiv, [THX2T99F01]> {
let Latency = 22;
- let ResourceCycles = [19];
+ let ReleaseAtCycles = [19];
}
def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 4;
}
def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 4;
}
def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> {
let Latency = 23;
- let ResourceCycles = [12];
+ let ReleaseAtCycles = [12];
let NumMicroOps = 4;
}
def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 4;
}
def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> {
let Latency = 23;
- let ResourceCycles = [12];
+ let ReleaseAtCycles = [12];
let NumMicroOps = 4;
}
@@ -1163,19 +1163,19 @@ def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>;
// FP multiply accumulate
def : WriteRes<WriteFMul, [THX2T99F01]> {
let Latency = 6;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 3;
}
def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> {
let Latency = 6;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 3;
}
def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> {
let Latency = 6;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 3;
}
@@ -1254,12 +1254,12 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
def : WriteRes<WriteVd, [THX2T99F01]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def : WriteRes<WriteVq, [THX2T99F01]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
// ASIMD arith, reduce, 4H/4S
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
index b7d337dfa76d..8685554b00d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
@@ -143,14 +143,14 @@ def THX3T110Write_7Cyc_I1 : SchedWriteRes<[THX3T110I1]> {
// 23 cycles on I1.
def THX3T110Write_23Cyc_I1 : SchedWriteRes<[THX3T110I1]> {
let Latency = 23;
- let ResourceCycles = [13, 23];
+ let ReleaseAtCycles = [13, 23];
let NumMicroOps = 4;
}
// 39 cycles on I1.
def THX3T110Write_39Cyc_I1 : SchedWriteRes<[THX3T110I1]> {
let Latency = 39;
- let ResourceCycles = [13, 39];
+ let ReleaseAtCycles = [13, 39];
let NumMicroOps = 4;
}
@@ -278,14 +278,14 @@ def THX3T110Write_10Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
def THX3T110Write_16Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
let Latency = 16;
let NumMicroOps = 3;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
// 23 cycles on F0/F1/F2/F3.
def THX3T110Write_23Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
let Latency = 23;
let NumMicroOps = 3;
- let ResourceCycles = [11];
+ let ReleaseAtCycles = [11];
}
// 1 cycle on LS0/LS1.
@@ -304,7 +304,7 @@ def THX3T110Write_2Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
def THX3T110Write_4Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// 5 cycles on LS0/LS1.
@@ -326,7 +326,7 @@ def THX3T110Write_6Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
def THX3T110Write_4_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [4, 5];
+ let ReleaseAtCycles = [4, 5];
}
// 4 + 8 cycles on LS0/LS1.
@@ -336,7 +336,7 @@ def THX3T110Write_4_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
def THX3T110Write_4_8Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [4, 8];
+ let ReleaseAtCycles = [4, 8];
}
// 11 cycles on LS0/LS1 and I1.
@@ -607,7 +607,7 @@ def THX3T110Write_3_4Cyc_F23_F0123 :
SchedWriteRes<[THX3T110FP23, THX3T110FP0123]> {
let Latency = 3;
let NumMicroOps = 2;
- let ResourceCycles = [3, 4];
+ let ReleaseAtCycles = [3, 4];
}
@@ -678,7 +678,7 @@ def : InstRW<[THX3T110Write_1Cyc_I23],
// Address generation
def : WriteRes<WriteI, [THX3T110I0123]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
@@ -700,7 +700,7 @@ def : InstRW<[WriteI], (instrs COPY)>;
// ALU, extend and/or shift
def : WriteRes<WriteISReg, [THX3T110I0123]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
@@ -719,7 +719,7 @@ def : InstRW<[WriteISReg],
def : WriteRes<WriteIEReg, [THX3T110I0123]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
@@ -762,14 +762,14 @@ def : WriteRes<WriteIS, [THX3T110I0123]> {
// Latency range of 13-23/13-39.
def : WriteRes<WriteID32, [THX3T110I1]> {
let Latency = 39;
- let ResourceCycles = [39];
+ let ReleaseAtCycles = [39];
let NumMicroOps = 4;
}
// Divide, X-form
def : WriteRes<WriteID64, [THX3T110I1]> {
let Latency = 23;
- let ResourceCycles = [23];
+ let ReleaseAtCycles = [23];
let NumMicroOps = 4;
}
@@ -1218,36 +1218,36 @@ def : WriteRes<WriteFCmp, [THX3T110FP0123]> {
// FP Mul, Div, Sqrt
def : WriteRes<WriteFDiv, [THX3T110FP0123]> {
let Latency = 22;
- let ResourceCycles = [19];
+ let ReleaseAtCycles = [19];
}
def THX3T110XWriteFDiv : SchedWriteRes<[THX3T110FP0123]> {
let Latency = 16;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 4;
}
def THX3T110XWriteFDivSP : SchedWriteRes<[THX3T110FP0123]> {
let Latency = 16;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 4;
}
def THX3T110XWriteFDivDP : SchedWriteRes<[THX3T110FP0123]> {
let Latency = 23;
- let ResourceCycles = [12];
+ let ReleaseAtCycles = [12];
let NumMicroOps = 4;
}
def THX3T110XWriteFSqrtSP : SchedWriteRes<[THX3T110FP0123]> {
let Latency = 16;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 4;
}
def THX3T110XWriteFSqrtDP : SchedWriteRes<[THX3T110FP0123]> {
let Latency = 23;
- let ResourceCycles = [12];
+ let ReleaseAtCycles = [12];
let NumMicroOps = 4;
}
@@ -1271,19 +1271,19 @@ def : InstRW<[THX3T110Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>;
// FP multiply accumulate
def : WriteRes<WriteFMul, [THX3T110FP0123]> {
let Latency = 6;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 3;
}
def THX3T110XWriteFMul : SchedWriteRes<[THX3T110FP0123]> {
let Latency = 6;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 3;
}
def THX3T110XWriteFMulAcc : SchedWriteRes<[THX3T110FP0123]> {
let Latency = 6;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 3;
}
@@ -1362,12 +1362,12 @@ def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
def : WriteRes<WriteVd, [THX3T110FP0123]> {
let Latency = 5;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def : WriteRes<WriteVq, [THX3T110FP0123]> {
let Latency = 5;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
// ASIMD arith, reduce, 4H/4S
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 677797a6797b..1a76f354589e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -46,21 +46,16 @@ SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
}
}();
- MachineMemOperand::Flags Flags = MachineMemOperand::MOStore;
- if (isVolatile)
- Flags |= MachineMemOperand::MOVolatile;
- if (!IsSet)
- Flags |= MachineMemOperand::MOLoad;
-
MachineFunction &MF = DAG.getMachineFunction();
+ auto Vol =
+ isVolatile ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ auto DstFlags = MachineMemOperand::MOStore | Vol;
auto *DstOp =
- MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment);
- auto *SrcOp =
- MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment);
+ MF.getMachineMemOperand(DstPtrInfo, DstFlags, ConstSize, Alignment);
if (IsSet) {
- // Extend value to i64 if required
+ // Extend value to i64, if required.
if (SrcOrValue.getValueType() != MVT::i64)
SrcOrValue = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, SrcOrValue);
SDValue Ops[] = {Dst, Size, SrcOrValue, Chain};
@@ -72,6 +67,10 @@ SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
SDValue Ops[] = {Dst, SrcOrValue, Size, Chain};
const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other};
MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops);
+
+ auto SrcFlags = MachineMemOperand::MOLoad | Vol;
+ auto *SrcOp =
+ MF.getMachineMemOperand(SrcPtrInfo, SrcFlags, ConstSize, Alignment);
DAG.setNodeMemRefs(Node, {DstOp, SrcOp});
return SDValue(Node, 3);
}
@@ -141,8 +140,8 @@ static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
unsigned OffsetScaled = 0;
while (OffsetScaled < ObjSizeScaled) {
if (ObjSizeScaled - OffsetScaled >= 2) {
- SDValue AddrNode =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
+ SDValue AddrNode = DAG.getMemBasePlusOffset(
+ Ptr, TypeSize::getFixed(OffsetScaled * 16), dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode2, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
@@ -154,8 +153,8 @@ static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
}
if (ObjSizeScaled - OffsetScaled > 0) {
- SDValue AddrNode =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
+ SDValue AddrNode = DAG.getMemBasePlusOffset(
+ Ptr, TypeSize::getFixed(OffsetScaled * 16), dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode1, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
index 753f69461308..a991d645eb6f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -299,7 +299,7 @@ bool AArch64SpeculationHardening::instrumentControlFlow(
if (I == MBB.begin())
RS.enterBasicBlock(MBB);
else
- RS.backward(std::prev(I));
+ RS.backward(I);
// FIXME: The below just finds *a* unused register. Maybe code could be
// optimized more if this looks for the register that isn't used for the
// longest time around this place, to enable more scheduling freedom. Not
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index 3ac86b3cde2e..b5b15022cda4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -11,8 +11,6 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -435,7 +433,7 @@ void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
uint64_t Size) {
IRBuilder<> IRB(InsertBefore);
- IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
+ IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getPtrTy()),
ConstantInt::get(IRB.getInt64Ty(), Size)});
}
@@ -564,7 +562,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
}
} else {
uint64_t Size = *Info.AI->getAllocationSize(*DL);
- Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
+ Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getPtrTy());
tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
for (auto *RI : SInfo.RetVec) {
untagAlloca(AI, RI, Size);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index 41cd405c891e..090c63633df9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -10,7 +10,6 @@
#include "AArch64.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64InstrInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 93bd35b9c121..7324be48a415 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -11,6 +11,7 @@
// ===---------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -122,7 +123,10 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()) || MF.getFunction().hasOptSize())
return false;
- const TargetSubtargetInfo &ST = MF.getSubtarget();
+ const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
+ if (!ST.enableStorePairSuppress())
+ return false;
+
TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
TRI = ST.getRegisterInfo();
MRI = &MF.getRegInfo();
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 450e27b8a2af..cf57d950ae8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -70,6 +70,17 @@ static cl::opt<bool> ForceStreamingCompatibleSVE(
"Force the use of streaming-compatible SVE code for all functions"),
cl::Hidden);
+static cl::opt<AArch64PAuth::AuthCheckMethod>
+ AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
+ cl::Hidden,
+ cl::desc("Override the variant of check applied "
+ "to authenticated LR during tail call"),
+ cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
+
+static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
+ "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
+ cl::desc("Set minimum number of entries to use a jump table on AArch64"));
+
unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
return OverrideVectorInsertExtractBaseCost;
@@ -77,7 +88,8 @@ unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
}
AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
- StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
+ StringRef FS, StringRef CPUString, StringRef TuneCPUString,
+ bool HasMinSize) {
// Determine default and user-specified characteristics
if (CPUString.empty())
@@ -87,12 +99,12 @@ AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
TuneCPUString = CPUString;
ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
- initializeProperties();
+ initializeProperties(HasMinSize);
return *this;
}
-void AArch64Subtarget::initializeProperties() {
+void AArch64Subtarget::initializeProperties(bool HasMinSize) {
// Initialize CPU specific properties. We should add a tablegen feature for
// this in the future so we can specify it together with the subtarget
// features.
@@ -137,6 +149,7 @@ void AArch64Subtarget::initializeProperties() {
MaxBytesForLoopAlignment = 16;
break;
case CortexA510:
+ case CortexA520:
PrefFunctionAlignment = Align(16);
VScaleForTuning = 1;
PrefLoopAlignment = Align(16);
@@ -144,8 +157,10 @@ void AArch64Subtarget::initializeProperties() {
break;
case CortexA710:
case CortexA715:
+ case CortexA720:
case CortexX2:
case CortexX3:
+ case CortexX4:
PrefFunctionAlignment = Align(16);
VScaleForTuning = 1;
PrefLoopAlignment = Align(32);
@@ -169,6 +184,7 @@ void AArch64Subtarget::initializeProperties() {
case AppleA14:
case AppleA15:
case AppleA16:
+ case AppleA17:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;
@@ -177,6 +193,7 @@ void AArch64Subtarget::initializeProperties() {
case AppleA14:
case AppleA15:
case AppleA16:
+ case AppleA17:
MaxInterleaveFactor = 4;
break;
default:
@@ -285,6 +302,9 @@ void AArch64Subtarget::initializeProperties() {
MaxInterleaveFactor = 4;
break;
}
+
+ if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
+ MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
}
AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
@@ -293,17 +313,17 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
unsigned MinSVEVectorSizeInBitsOverride,
unsigned MaxSVEVectorSizeInBitsOverride,
bool StreamingSVEMode,
- bool StreamingCompatibleSVEMode)
+ bool StreamingCompatibleSVEMode,
+ bool HasMinSize)
: AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
- IsLittle(LittleEndian),
- StreamingSVEMode(StreamingSVEMode),
+ IsLittle(LittleEndian), StreamingSVEMode(StreamingSVEMode),
StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
- InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
+ InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
TLInfo(TM, *this) {
if (AArch64::isX18ReservedByDefault(TT))
ReserveXRegister.set(18);
@@ -335,6 +355,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
// X29 is named FP, so we can't use TRI->getName to check X29.
if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
ReserveXRegisterForRA.set(29);
+
+ AddressCheckPSV.reset(new AddressCheckPseudoSourceValue(TM));
}
const CallLowering *AArch64Subtarget::getCallLowering() const {
@@ -477,14 +499,39 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
bool AArch64Subtarget::useAA() const { return UseAA; }
+bool AArch64Subtarget::isStreamingCompatible() const {
+ return StreamingCompatibleSVEMode || ForceStreamingCompatibleSVE;
+}
+
bool AArch64Subtarget::isNeonAvailable() const {
- if (!hasNEON())
- return false;
+ return hasNEON() &&
+ (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
+}
- // The 'force-streaming-comaptible-sve' flag overrides the streaming
- // function attributes.
- if (ForceStreamingCompatibleSVE.getNumOccurrences() > 0)
- return !ForceStreamingCompatibleSVE;
+bool AArch64Subtarget::isSVEAvailable() const {
+ return hasSVE() &&
+ (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
+}
- return !isStreaming() && !isStreamingCompatible();
+// If return address signing is enabled, tail calls are emitted as follows:
+//
+// ```
+// <authenticate LR>
+// <check LR>
+// TCRETURN ; the callee may sign and spill the LR in its prologue
+// ```
+//
+// LR may require explicit checking because if FEAT_FPAC is not implemented
+// and LR was tampered with, then `<authenticate LR>` will not generate an
+// exception on its own. Later, if the callee spills the signed LR value and
+// neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
+// the higher bits of LR thus hiding the authentication failure.
+AArch64PAuth::AuthCheckMethod
+AArch64Subtarget::getAuthenticatedLRCheckMethod() const {
+ if (AuthenticatedLRCheckMethod.getNumOccurrences())
+ return AuthenticatedLRCheckMethod;
+
+ // At now, use None by default because checks may introduce an unexpected
+ // performance regression or incompatibility with execute-only mappings.
+ return AArch64PAuth::AuthCheckMethod::None;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5e20d16464c4..b17e215e200d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -16,6 +16,7 @@
#include "AArch64FrameLowering.h"
#include "AArch64ISelLowering.h"
#include "AArch64InstrInfo.h"
+#include "AArch64PointerAuth.h"
#include "AArch64RegisterInfo.h"
#include "AArch64SelectionDAGInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
@@ -25,7 +26,6 @@
#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
-#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "AArch64GenSubtargetInfo.inc"
@@ -50,11 +50,13 @@ public:
AppleA14,
AppleA15,
AppleA16,
+ AppleA17,
Carmel,
CortexA35,
CortexA53,
CortexA55,
CortexA510,
+ CortexA520,
CortexA57,
CortexA65,
CortexA72,
@@ -66,11 +68,13 @@ public:
CortexA78C,
CortexA710,
CortexA715,
+ CortexA720,
CortexR82,
CortexX1,
CortexX1C,
CortexX2,
CortexX3,
+ CortexX4,
ExynosM3,
Falkor,
Kryo,
@@ -103,7 +107,7 @@ protected:
#include "AArch64GenSubtargetInfo.inc"
uint8_t MaxInterleaveFactor = 2;
- uint8_t VectorInsertExtractBaseCost = 3;
+ uint8_t VectorInsertExtractBaseCost = 2;
uint16_t CacheLineSize = 0;
uint16_t PrefetchDistance = 0;
uint16_t MinPrefetchStride = 1;
@@ -111,6 +115,7 @@ protected:
Align PrefFunctionAlignment;
Align PrefLoopAlignment;
unsigned MaxBytesForLoopAlignment = 0;
+ unsigned MinimumJumpTableEntries = 4;
unsigned MaxJumpTableSize = 0;
// ReserveXRegister[i] - X#i is not available as a general purpose register.
@@ -152,10 +157,11 @@ private:
/// subtarget initialization.
AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
StringRef CPUString,
- StringRef TuneCPUString);
+ StringRef TuneCPUString,
+ bool HasMinSize);
/// Initialize properties based on the selected processor family.
- void initializeProperties();
+ void initializeProperties(bool HasMinSize);
public:
/// This constructor initializes the data members to match that
@@ -165,7 +171,8 @@ public:
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0,
bool StreamingSVEMode = false,
- bool StreamingCompatibleSVEMode = false);
+ bool StreamingCompatibleSVEMode = false,
+ bool HasMinSize = false);
// Getters for SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
@@ -204,20 +211,28 @@ public:
bool isXRaySupported() const override { return true; }
- /// Returns true if the function has the streaming attribute.
+ /// Returns true if the function has a streaming body.
bool isStreaming() const { return StreamingSVEMode; }
- /// Returns true if the function has the streaming-compatible attribute.
- bool isStreamingCompatible() const { return StreamingCompatibleSVEMode; }
+ /// Returns true if the function has a streaming-compatible body.
+ bool isStreamingCompatible() const;
/// Returns true if the target has NEON and the function at runtime is known
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
/// mode, which disables NEON instructions).
bool isNeonAvailable() const;
+ /// Returns true if the target has SVE and can use the full range of SVE
+ /// instructions, for example because it knows the function is known not to be
+ /// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
+ bool isSVEAvailable() const;
+
unsigned getMinVectorRegisterBitWidth() const {
- // Don't assume any minimum vector size when PSTATE.SM may not be 0.
- if (StreamingSVEMode || StreamingCompatibleSVEMode)
+ // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
+ // we don't yet support streaming-compatible codegen support that we trust
+ // is safe for functions that may be executed in streaming-SVE mode.
+ // By returning '0' here, we disable vectorization.
+ if (!isSVEAvailable() && !isNeonAvailable())
return 0;
return MinVectorRegisterBitWidth;
}
@@ -265,6 +280,9 @@ public:
}
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
+ unsigned getMinimumJumpTableEntries() const {
+ return MinimumJumpTableEntries;
+ }
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
@@ -394,10 +412,10 @@ public:
bool useSVEForFixedLengthVectors() const {
if (!isNeonAvailable())
- return hasSVE();
+ return hasSVEorSME();
// Prefer NEON unless larger SVE registers are available.
- return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
+ return hasSVEorSME() && getMinSVEVectorSizeInBits() >= 256;
}
bool useSVEForFixedLengthVectors(EVT VT) const {
@@ -424,6 +442,32 @@ public:
return "__security_check_cookie_arm64ec";
return "__security_check_cookie";
}
+
+ /// Choose a method of checking LR before performing a tail call.
+ AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod() const;
+
+ const PseudoSourceValue *getAddressCheckPSV() const {
+ return AddressCheckPSV.get();
+ }
+
+private:
+ /// Pseudo value representing memory load performed to check an address.
+ ///
+ /// This load operation is solely used for its side-effects: if the address
+ /// is not mapped (or not readable), it triggers CPU exception, otherwise
+ /// execution proceeds and the value is not used.
+ class AddressCheckPseudoSourceValue : public PseudoSourceValue {
+ public:
+ AddressCheckPseudoSourceValue(const TargetMachine &TM)
+ : PseudoSourceValue(TargetCustom, TM) {}
+
+ bool isConstant(const MachineFrameInfo *) const override { return false; }
+ bool isAliased(const MachineFrameInfo *) const override { return true; }
+ bool mayAlias(const MachineFrameInfo *) const override { return true; }
+ void printCustom(raw_ostream &OS) const override { OS << "AddressCheck"; }
+ };
+
+ std::unique_ptr<AddressCheckPseudoSourceValue> AddressCheckPSV;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 3e7d4d81b242..e8b5f6059c9e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -738,11 +738,13 @@ def : ROSysReg<"ID_AA64PFR1_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b001>;
def : ROSysReg<"ID_AA64PFR2_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b010>;
def : ROSysReg<"ID_AA64DFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b000>;
def : ROSysReg<"ID_AA64DFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b001>;
+def : ROSysReg<"ID_AA64DFR2_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b010>;
def : ROSysReg<"ID_AA64AFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b100>;
def : ROSysReg<"ID_AA64AFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b101>;
def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>;
def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>;
def : ROSysReg<"ID_AA64ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b010>;
+def : ROSysReg<"ID_AA64ISAR3_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b011>;
def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>;
def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>;
def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010>;
@@ -1927,3 +1929,20 @@ def : RWSysReg<"PFAR_EL2", 0b11, 0b100, 0b0110, 0b0000, 0b101>;
// v9.4a Exception-based event profiling (FEAT_EBEP)
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"PM", 0b11, 0b000, 0b0100, 0b0011, 0b001>;
+
+// 2023 ISA Extension
+// AArch64 Floating-point Mode Register controls behaviors of the FP8
+// instructions (FEAT_FPMR)
+let Requires = [{ {AArch64::FeatureFPMR} }] in {
+// Op0 Op1 CRn CRm Op2
+def : ROSysReg<"ID_AA64FPFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b111>;
+def : RWSysReg<"FPMR", 0b11, 0b011, 0b0100, 0b0100, 0b010>;
+}
+
+// v9.5a Software Stepping Enhancements (FEAT_STEP2)
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"MDSTEPOP_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b010>;
+
+// v9.5a System PMU zero register (FEAT_SPMU2)
+// Op0 Op1 CRn CRm Op2
+def : WOSysReg<"SPMZR_EL0", 0b10, 0b011, 0b1001, 0b1100, 0b100>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 559879139758..036719be06d8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -21,6 +21,7 @@
#include "TargetInfo/AArch64TargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CFIFixup.h"
#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
@@ -196,6 +197,11 @@ static cl::opt<bool> EnableGISelLoadStoreOptPostLegal(
cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ EnableSinkFold("aarch64-enable-sink-fold",
+ cl::desc("Enable sinking and folding of instruction copies"),
+ cl::init(true), cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -220,6 +226,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64O0PreLegalizerCombinerPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
+ initializeAArch64PointerAuthPass(*PR);
initializeAArch64PostLegalizerCombinerPass(*PR);
initializeAArch64PostLegalizerLoweringPass(*PR);
initializeAArch64PostSelectOptimizePass(*PR);
@@ -319,7 +326,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT,
+ CodeGenOptLevel OL, bool JIT,
bool LittleEndian)
: LLVMTargetMachine(T,
computeDataLayout(TT, Options.MCOptions, LittleEndian),
@@ -357,7 +364,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
// Enable GlobalISel at or below EnableGlobalISelAt0, unless this is
// MachO/CodeModel::Large, which GlobalISel does not support.
- if (getOptLevel() <= EnableGlobalISelAtO &&
+ if (static_cast<int>(getOptLevel()) <= EnableGlobalISelAtO &&
TT.getArch() != Triple::aarch64_32 &&
TT.getEnvironment() != Triple::GNUILP32 &&
!(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) {
@@ -390,6 +397,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
StringRef CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString() : TargetCPU;
StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
+ bool HasMinSize = F.hasMinSize();
bool StreamingSVEMode = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
F.hasFnAttribute("aarch64_pstate_sm_body");
@@ -398,11 +406,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
unsigned MinSVEVectorSize = 0;
unsigned MaxSVEVectorSize = 0;
- Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange);
- if (VScaleRangeAttr.isValid()) {
- std::optional<unsigned> VScaleMax = VScaleRangeAttr.getVScaleRangeMax();
- MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128;
- MaxSVEVectorSize = VScaleMax ? *VScaleMax * 128 : 0;
+ if (F.hasFnAttribute(Attribute::VScaleRange)) {
+ ConstantRange CR = getVScaleRange(&F, 64);
+ MinSVEVectorSize = CR.getUnsignedMin().getZExtValue() * 128;
+ MaxSVEVectorSize = CR.getUnsignedMax().getZExtValue() * 128;
} else {
MinSVEVectorSize = SVEVectorBitsMinOpt;
MaxSVEVectorSize = SVEVectorBitsMaxOpt;
@@ -416,13 +423,9 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
"Minimum SVE vector size should not be larger than its maximum!");
// Sanitize user input in case of no asserts
- if (MaxSVEVectorSize == 0)
- MinSVEVectorSize = (MinSVEVectorSize / 128) * 128;
- else {
- MinSVEVectorSize =
- (std::min(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128;
- MaxSVEVectorSize =
- (std::max(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128;
+ if (MaxSVEVectorSize != 0) {
+ MinSVEVectorSize = std::min(MinSVEVectorSize, MaxSVEVectorSize);
+ MaxSVEVectorSize = std::max(MinSVEVectorSize, MaxSVEVectorSize);
}
SmallString<512> Key;
@@ -430,8 +433,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
<< MaxSVEVectorSize
<< "StreamingSVEMode=" << StreamingSVEMode
<< "StreamingCompatibleSVEMode="
- << StreamingCompatibleSVEMode << CPU << TuneCPU
- << FS;
+ << StreamingCompatibleSVEMode << CPU << TuneCPU << FS
+ << "HasMinSize=" << HasMinSize;
auto &I = SubtargetMap[Key];
if (!I) {
@@ -441,13 +444,12 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
resetTargetOptions(F);
I = std::make_unique<AArch64Subtarget>(
TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
- MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode);
+ MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode,
+ HasMinSize);
}
assert((!StreamingSVEMode || I->hasSME()) &&
"Expected SME to be available");
- assert((!StreamingCompatibleSVEMode || I->hasSVEorSME()) &&
- "Expected SVE or SME to be available");
return I.get();
}
@@ -457,7 +459,7 @@ void AArch64leTargetMachine::anchor() { }
AArch64leTargetMachine::AArch64leTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
void AArch64beTargetMachine::anchor() { }
@@ -465,7 +467,7 @@ void AArch64beTargetMachine::anchor() { }
AArch64beTargetMachine::AArch64beTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
namespace {
@@ -475,8 +477,9 @@ class AArch64PassConfig : public TargetPassConfig {
public:
AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
- if (TM.getOptLevel() != CodeGenOpt::None)
+ if (TM.getOptLevel() != CodeGenOptLevel::None)
substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
+ setEnableSinkAndFold(EnableSinkFold);
}
AArch64TargetMachine &getAArch64TargetMachine() const {
@@ -553,13 +556,14 @@ void AArch64PassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
// Expand any SVE vector library calls that we can't code generate directly.
- if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive)
+ if (EnableSVEIntrinsicOpts &&
+ TM->getOptLevel() == CodeGenOptLevel::Aggressive)
addPass(createSVEIntrinsicOptsPass());
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass(SimplifyCFGOptions()
.forwardSwitchCondToPhi(true)
.convertSwitchRangeToICmp(true)
@@ -572,14 +576,14 @@ void AArch64PassConfig::addIRPasses() {
//
// Run this before LSR to remove the multiplies involved in computing the
// pointer values N iterations ahead.
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
if (EnableLoopDataPrefetch)
addPass(createLoopDataPrefetchPass());
if (EnableFalkorHWPFFix)
addPass(createFalkorMarkStridedAccessesPass());
}
- if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
+ if (TM->getOptLevel() == CodeGenOptLevel::Aggressive && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
// and lower a GEP with multiple indices to either arithmetic operations or
// multiple GEPs with single index.
@@ -594,19 +598,19 @@ void AArch64PassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
- if (getOptLevel() == CodeGenOpt::Aggressive && EnableSelectOpt)
+ if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt)
addPass(createSelectOptimizePass());
addPass(createAArch64GlobalsTaggingPass());
addPass(createAArch64StackTaggingPass(
- /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
+ /*IsOptNone=*/TM->getOptLevel() == CodeGenOptLevel::None));
// Match complex arithmetic patterns
- if (TM->getOptLevel() >= CodeGenOpt::Default)
+ if (TM->getOptLevel() >= CodeGenOptLevel::Default)
addPass(createComplexDeinterleavingPass(TM));
// Match interleaved memory accesses to ldN/stN intrinsics.
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createInterleavedLoadCombinePass());
addPass(createInterleavedAccessPass());
}
@@ -628,16 +632,17 @@ void AArch64PassConfig::addIRPasses() {
bool AArch64PassConfig::addPreISel() {
// Run promote constant before global merge, so that the promoted constants
// get a chance to be merged
- if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnablePromoteConstant)
addPass(createAArch64PromoteConstantPass());
// FIXME: On AArch64, this depends on the type.
// Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
- if ((TM->getOptLevel() != CodeGenOpt::None &&
+ if ((TM->getOptLevel() != CodeGenOptLevel::None &&
EnableGlobalMerge == cl::BOU_UNSET) ||
EnableGlobalMerge == cl::BOU_TRUE) {
- bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
- (EnableGlobalMerge == cl::BOU_UNSET);
+ bool OnlyOptimizeForSize =
+ (TM->getOptLevel() < CodeGenOptLevel::Aggressive) &&
+ (EnableGlobalMerge == cl::BOU_UNSET);
// Merging of extern globals is enabled by default on non-Mach-O as we
// expect it to be generally either beneficial or harmless. On Mach-O it
@@ -658,7 +663,7 @@ bool AArch64PassConfig::addPreISel() {
}
void AArch64PassConfig::addCodeGenPrepare() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createTypePromotionLegacyPass());
TargetPassConfig::addCodeGenPrepare();
}
@@ -669,7 +674,7 @@ bool AArch64PassConfig::addInstSelector() {
// For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
// references to _TLS_MODULE_BASE_ as possible.
if (TM->getTargetTriple().isOSBinFormatELF() &&
- getOptLevel() != CodeGenOpt::None)
+ getOptLevel() != CodeGenOptLevel::None)
addPass(createAArch64CleanupLocalDynamicTLSPass());
return false;
@@ -681,7 +686,7 @@ bool AArch64PassConfig::addIRTranslator() {
}
void AArch64PassConfig::addPreLegalizeMachineIR() {
- if (getOptLevel() == CodeGenOpt::None) {
+ if (getOptLevel() == CodeGenOptLevel::None) {
addPass(createAArch64O0PreLegalizerCombiner());
addPass(new Localizer());
} else {
@@ -698,7 +703,7 @@ bool AArch64PassConfig::addLegalizeMachineIR() {
}
void AArch64PassConfig::addPreRegBankSelect() {
- bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ bool IsOptNone = getOptLevel() == CodeGenOptLevel::None;
if (!IsOptNone) {
addPass(createAArch64PostLegalizerCombiner(IsOptNone));
if (EnableGISelLoadStoreOptPostLegal)
@@ -714,7 +719,7 @@ bool AArch64PassConfig::addRegBankSelect() {
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect(getOptLevel()));
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createAArch64PostSelectOptimize());
return false;
}
@@ -723,7 +728,7 @@ void AArch64PassConfig::addMachineSSAOptimization() {
// Run default MachineSSAOptimization first.
TargetPassConfig::addMachineSSAOptimization();
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createAArch64MIPeepholeOptPass());
}
@@ -741,18 +746,19 @@ bool AArch64PassConfig::addILPOpts() {
if (EnableStPairSuppress)
addPass(createAArch64StorePairSuppressPass());
addPass(createAArch64SIMDInstrOptPass());
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createAArch64StackTaggingPreRAPass());
return true;
}
void AArch64PassConfig::addPreRegAlloc() {
// Change dead register definitions to refer to the zero register.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
// Use AdvSIMD scalar instructions whenever profitable.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAdvSIMDScalar) {
addPass(createAArch64AdvSIMDScalar());
// The AdvSIMD pass may produce copies that can be rewritten to
// be register coalescer friendly.
@@ -762,10 +768,11 @@ void AArch64PassConfig::addPreRegAlloc() {
void AArch64PassConfig::addPostRegAlloc() {
// Remove redundant copy instructions.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableRedundantCopyElimination)
addPass(createAArch64RedundantCopyEliminationPass());
- if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
+ if (TM->getOptLevel() != CodeGenOptLevel::None && usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
}
@@ -777,7 +784,7 @@ void AArch64PassConfig::addPreSched2() {
// Expand some pseudo instructions to allow proper scheduling.
addPass(createAArch64ExpandPseudoPass());
// Use load/store pair instructions when possible.
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
if (EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
}
@@ -794,7 +801,7 @@ void AArch64PassConfig::addPreSched2() {
addPass(createAArch64IndirectThunks());
addPass(createAArch64SLSHardeningPass());
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
if (EnableFalkorHWPFFix)
addPass(createFalkorHWPFFixPass());
}
@@ -804,18 +811,15 @@ void AArch64PassConfig::addPreEmitPass() {
// Machine Block Placement might have created new opportunities when run
// at O3, where the Tail Duplication Threshold is set to 4 instructions.
// Run the load/store optimizer once more.
- if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
+ if (TM->getOptLevel() >= CodeGenOptLevel::Aggressive && EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
- if (TM->getOptLevel() >= CodeGenOpt::Aggressive &&
+ if (TM->getOptLevel() >= CodeGenOptLevel::Aggressive &&
EnableAArch64CopyPropagation)
addPass(createMachineCopyPropagationPass(true));
addPass(createAArch64A53Fix835769());
- if (EnableBranchTargets)
- addPass(createAArch64BranchTargetsPass());
-
if (TM->getTargetTriple().isOSWindows()) {
// Identify valid longjmp targets for Windows Control Flow Guard.
addPass(createCFGuardLongjmpPass());
@@ -823,18 +827,21 @@ void AArch64PassConfig::addPreEmitPass() {
addPass(createEHContGuardCatchretPass());
}
- if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableCollectLOH &&
TM->getTargetTriple().isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
}
void AArch64PassConfig::addPostBBSections() {
+ addPass(createAArch64PointerAuthPass());
+ if (EnableBranchTargets)
+ addPass(createAArch64BranchTargetsPass());
// Relax conditional branch instructions if they're otherwise out of
// range of their destination.
if (BranchRelaxation)
addPass(&BranchRelaxationPassID);
- if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableCompressJumpTables)
addPass(createAArch64CompressJumpTablesPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index e9b5f4820b79..12b971853f84 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -30,7 +30,7 @@ public:
AArch64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT, bool IsLittleEndian);
~AArch64TargetMachine() override;
@@ -80,8 +80,8 @@ public:
AArch64leTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT);
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
+ bool JIT);
};
// AArch64 big endian target machine.
@@ -93,8 +93,8 @@ public:
AArch64beTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT);
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
+ bool JIT);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index bee9ec4c7132..b5b8b6829178 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -46,6 +46,15 @@ static cl::opt<unsigned>
NeonNonConstStrideOverhead("neon-nonconst-stride-overhead", cl::init(10),
cl::Hidden);
+static cl::opt<unsigned> CallPenaltyChangeSM(
+ "call-penalty-sm-change", cl::init(5), cl::Hidden,
+ cl::desc(
+ "Penalty of calling a function that requires a change to PSTATE.SM"));
+
+static cl::opt<unsigned> InlineCallPenaltyChangeSM(
+ "inline-call-penalty-sm-change", cl::init(10), cl::Hidden,
+ cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));
+
namespace {
class TailFoldingOption {
// These bitfields will only ever be set to something non-zero in operator=,
@@ -190,16 +199,49 @@ static cl::opt<bool> EnableFixedwidthAutovecInStreamingMode(
static cl::opt<bool> EnableScalableAutovecInStreamingMode(
"enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
+static bool isSMEABIRoutineCall(const CallInst &CI) {
+ const auto *F = CI.getCalledFunction();
+ return F && StringSwitch<bool>(F->getName())
+ .Case("__arm_sme_state", true)
+ .Case("__arm_tpidr2_save", true)
+ .Case("__arm_tpidr2_restore", true)
+ .Case("__arm_za_disable", true)
+ .Default(false);
+}
+
+/// Returns true if the function has explicit operations that can only be
+/// lowered using incompatible instructions for the selected mode. This also
+/// returns true if the function F may use or modify ZA state.
+static bool hasPossibleIncompatibleOps(const Function *F) {
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ // Be conservative for now and assume that any call to inline asm or to
+ // intrinsics could could result in non-streaming ops (e.g. calls to
+ // @llvm.aarch64.* or @llvm.gather/scatter intrinsics). We can assume that
+ // all native LLVM instructions can be lowered to compatible instructions.
+ if (isa<CallInst>(I) && !I.isDebugOrPseudoInst() &&
+ (cast<CallInst>(I).isInlineAsm() || isa<IntrinsicInst>(I) ||
+ isSMEABIRoutineCall(cast<CallInst>(I))))
+ return true;
+ }
+ }
+ return false;
+}
+
bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
SMEAttrs CallerAttrs(*Caller);
SMEAttrs CalleeAttrs(*Callee);
- if (CallerAttrs.requiresSMChange(CalleeAttrs,
- /*BodyOverridesInterface=*/true) ||
- CallerAttrs.requiresLazySave(CalleeAttrs) ||
- CalleeAttrs.hasNewZAInterface())
+ if (CalleeAttrs.hasNewZABody())
return false;
+ if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
+ CallerAttrs.requiresSMChange(CalleeAttrs,
+ /*BodyOverridesInterface=*/true)) {
+ if (hasPossibleIncompatibleOps(Callee))
+ return false;
+ }
+
const TargetMachine &TM = getTLI()->getTargetMachine();
const FeatureBitset &CallerBits =
@@ -236,6 +278,40 @@ bool AArch64TTIImpl::areTypesABICompatible(
return true;
}
+unsigned
+AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
+ unsigned DefaultCallPenalty) const {
+ // This function calculates a penalty for executing Call in F.
+ //
+ // There are two ways this function can be called:
+ // (1) F:
+ // call from F -> G (the call here is Call)
+ //
+ // For (1), Call.getCaller() == F, so it will always return a high cost if
+ // a streaming-mode change is required (thus promoting the need to inline the
+ // function)
+ //
+ // (2) F:
+ // call from F -> G (the call here is not Call)
+ // G:
+ // call from G -> H (the call here is Call)
+ //
+ // For (2), if after inlining the body of G into F the call to H requires a
+ // streaming-mode change, and the call to G from F would also require a
+ // streaming-mode change, then there is benefit to do the streaming-mode
+ // change only once and avoid inlining of G into F.
+ SMEAttrs FAttrs(*F);
+ SMEAttrs CalleeAttrs(Call);
+ if (FAttrs.requiresSMChange(CalleeAttrs)) {
+ if (F == Call.getCaller()) // (1)
+ return CallPenaltyChangeSM * DefaultCallPenalty;
+ if (FAttrs.requiresSMChange(SMEAttrs(*Call.getCaller()))) // (2)
+ return InlineCallPenaltyChangeSM * DefaultCallPenalty;
+ }
+
+ return DefaultCallPenalty;
+}
+
bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const {
assert(K != TargetTransformInfo::RGK_Scalar);
@@ -822,10 +898,31 @@ instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) {
return IC.replaceInstUsesWith(II, EarliestReplacement);
}
+static bool isAllActivePredicate(Value *Pred) {
+ // Look through convert.from.svbool(convert.to.svbool(...) chain.
+ Value *UncastedPred;
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
+ m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
+ m_Value(UncastedPred)))))
+ // If the predicate has the same or less lanes than the uncasted
+ // predicate then we know the casting has no effect.
+ if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
+ cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
+ Pred = UncastedPred;
+
+ return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>()));
+}
+
static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
IntrinsicInst &II) {
- auto Select = IC.Builder.CreateSelect(II.getOperand(0), II.getOperand(1),
- II.getOperand(2));
+ // svsel(ptrue, x, y) => x
+ auto *OpPredicate = II.getOperand(0);
+ if (isAllActivePredicate(OpPredicate))
+ return IC.replaceInstUsesWith(II, II.getOperand(1));
+
+ auto Select =
+ IC.Builder.CreateSelect(OpPredicate, II.getOperand(1), II.getOperand(2));
return IC.replaceInstUsesWith(II, Select);
}
@@ -1224,22 +1321,6 @@ instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II,
return IC.replaceInstUsesWith(II, Res);
}
-static bool isAllActivePredicate(Value *Pred) {
- // Look through convert.from.svbool(convert.to.svbool(...) chain.
- Value *UncastedPred;
- if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
- m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
- m_Value(UncastedPred)))))
- // If the predicate has the same or less lanes than the uncasted
- // predicate then we know the casting has no effect.
- if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
- cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
- Pred = UncastedPred;
-
- return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
- m_ConstantInt<AArch64SVEPredPattern::all>()));
-}
-
static std::optional<Instruction *>
instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
Value *Pred = II.getOperand(0);
@@ -1967,8 +2048,7 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
case TargetTransformInfo::RGK_ScalableVector:
- if ((ST->isStreaming() || ST->isStreamingCompatible()) &&
- !EnableScalableAutovecInStreamingMode)
+ if (!ST->isSVEAvailable() && !EnableScalableAutovecInStreamingMode)
return TypeSize::getScalable(0);
return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
@@ -2068,6 +2148,54 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
}
+// s/urhadd instructions implement the following pattern, making the
+// extends free:
+// %x = add ((zext i8 -> i16), 1)
+// %y = (zext i8 -> i16)
+// trunc i16 (lshr (add %x, %y), 1) -> i8
+//
+bool AArch64TTIImpl::isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst,
+ Type *Src) {
+ // The source should be a legal vector type.
+ if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(DL, Src)) ||
+ (Src->isScalableTy() && !ST->hasSVE2()))
+ return false;
+
+ if (ExtUser->getOpcode() != Instruction::Add || !ExtUser->hasOneUse())
+ return false;
+
+ // Look for trunc/shl/add before trying to match the pattern.
+ const Instruction *Add = ExtUser;
+ auto *AddUser =
+ dyn_cast_or_null<Instruction>(Add->getUniqueUndroppableUser());
+ if (AddUser && AddUser->getOpcode() == Instruction::Add)
+ Add = AddUser;
+
+ auto *Shr = dyn_cast_or_null<Instruction>(Add->getUniqueUndroppableUser());
+ if (!Shr || Shr->getOpcode() != Instruction::LShr)
+ return false;
+
+ auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
+ if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
+ Src->getScalarSizeInBits() !=
+ cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
+ return false;
+
+ // Try to match the whole pattern. Ext could be either the first or second
+ // m_ZExtOrSExt matched.
+ Instruction *Ex1, *Ex2;
+ if (!(match(Add, m_c_Add(m_Instruction(Ex1),
+ m_c_Add(m_Instruction(Ex2), m_SpecificInt(1))))))
+ return false;
+
+ // Ensure both extends are of the same type
+ if (match(Ex1, m_ZExtOrSExt(m_Value())) &&
+ Ex1->getOpcode() == Ex2->getOpcode())
+ return true;
+
+ return false;
+}
+
InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src,
TTI::CastContextHint CCH,
@@ -2092,6 +2220,11 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
} else // Others are free so long as isWideningInstruction returned true.
return 0;
}
+
+ // The cast will be free for the s/urhadd instructions
+ if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ isExtPartOfAvgExpr(SingleUser, Dst, Src))
+ return 0;
}
// TODO: Allow non-throughput costs that aren't binary.
@@ -2433,6 +2566,25 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
+ if ((ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND) &&
+ CCH == TTI::CastContextHint::Masked && ST->hasSVEorSME() &&
+ TLI->getTypeAction(Src->getContext(), SrcTy) ==
+ TargetLowering::TypePromoteInteger &&
+ TLI->getTypeAction(Dst->getContext(), DstTy) ==
+ TargetLowering::TypeSplitVector) {
+ // The standard behaviour in the backend for these cases is to split the
+ // extend up into two parts:
+ // 1. Perform an extending load or masked load up to the legal type.
+ // 2. Extend the loaded data to the final type.
+ std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);
+ Type *LegalTy = EVT(SrcLT.second).getTypeForEVT(Src->getContext());
+ InstructionCost Part1 = AArch64TTIImpl::getCastInstrCost(
+ Opcode, LegalTy, Src, CCH, CostKind, I);
+ InstructionCost Part2 = AArch64TTIImpl::getCastInstrCost(
+ Opcode, Dst, LegalTy, TTI::CastContextHint::None, CostKind, I);
+ return Part1 + Part2;
+ }
+
// The BasicTTIImpl version only deals with CCH==TTI::CastContextHint::Normal,
// but we also want to include the TTI::CastContextHint::Masked case too.
if ((ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND) &&
@@ -2584,6 +2736,18 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
return getVectorInstrCostHelper(&I, Val, Index, true /* HasRealUse */);
}
+InstructionCost AArch64TTIImpl::getScalarizationOverhead(
+ VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind) {
+ if (isa<ScalableVectorType>(Ty))
+ return InstructionCost::getInvalid();
+ if (Ty->getElementType()->isFloatingPointTy())
+ return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+ CostKind);
+ return DemandedElts.popcount() * (Insert + Extract) *
+ ST->getVectorInsertExtractBaseCost();
+}
+
InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
@@ -2874,6 +3038,7 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
// they may wake up the FP unit, which raises the power consumption. Perhaps
// they could be used with no holds barred (-O3).
Options.LoadSizes = {8, 4, 2, 1};
+ Options.AllowedTailExpansions = {3, 5, 6};
return Options;
}
@@ -2909,7 +3074,7 @@ static unsigned getSVEGatherScatterOverhead(unsigned Opcode) {
InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
- if (useNeonVector(DataTy))
+ if (useNeonVector(DataTy) || !isLegalMaskedGatherScatter(DataTy))
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
auto *VT = cast<VectorType>(DataTy);
@@ -2917,6 +3082,10 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
if (!LT.first.isValid())
return InstructionCost::getInvalid();
+ if (!LT.second.isVector() ||
+ !isElementTypeLegalForScalableVector(VT->getElementType()))
+ return InstructionCost::getInvalid();
+
// The code-generator is currently not able to handle scalable vectors
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
// it. This change will be removed when code-generation for these types is
@@ -3296,9 +3465,9 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
case RecurKind::UMax:
case RecurKind::FMin:
case RecurKind::FMax:
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
case RecurKind::FMulAdd:
+ case RecurKind::IAnyOf:
+ case RecurKind::FAnyOf:
return true;
default:
return false;
@@ -3518,11 +3687,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// into smaller vectors and sum the cost of each shuffle.
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
- cast<FixedVectorType>(Tp)->getNumElements() >
- LT.second.getVectorNumElements() &&
- !Index && !SubTp) {
- unsigned TpNumElts = cast<FixedVectorType>(Tp)->getNumElements();
- assert(Mask.size() == TpNumElts && "Expected Mask and Tp size to match!");
+ Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
+ unsigned TpNumElts = Mask.size();
unsigned LTNumElts = LT.second.getVectorNumElements();
unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
VectorType *NTp =
@@ -3580,7 +3746,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return Cost;
}
- Kind = improveShuffleKindFromMask(Kind, Mask);
+ Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
// Check for broadcast loads, which are supported by the LD1R instruction.
// In terms of code-size, the shuffle vector is free when a load + dup get
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index d1977a62a76d..0b220069a388 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -80,6 +80,9 @@ public:
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
const ArrayRef<Type *> &Types) const;
+ unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
+ unsigned DefaultCallPenalty) const;
+
/// \name Scalar TTI Implementations
/// @{
@@ -166,6 +169,8 @@ public:
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
+ bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src);
+
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
@@ -255,7 +260,8 @@ public:
return false;
// For fixed vectors, avoid scalarization if using SVE for them.
- if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
+ if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors() &&
+ DataType->getPrimitiveSizeInBits() != 128)
return false; // Fall back to scalarization of masked operations.
return isElementTypeLegalForScalableVector(DataType->getScalarType());
@@ -285,6 +291,7 @@ public:
bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
return isLegalMaskedGatherScatter(DataType);
}
+
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
return isLegalMaskedGatherScatter(DataType);
}
@@ -388,6 +395,11 @@ public:
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
+ InstructionCost getScalarizationOverhead(VectorType *Ty,
+ const APInt &DemandedElts,
+ bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind);
+
/// Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 4756746063d5..238269cf27bd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -180,6 +180,8 @@ private:
bool showMatchError(SMLoc Loc, unsigned ErrCode, uint64_t ErrorInfo,
OperandVector &Operands);
+ bool parseAuthExpr(const MCExpr *&Res, SMLoc &EndLoc);
+
bool parseDirectiveArch(SMLoc L);
bool parseDirectiveArchExtension(SMLoc L);
bool parseDirectiveCPU(SMLoc L);
@@ -221,6 +223,7 @@ private:
bool parseDirectiveSEHTrapFrame(SMLoc L);
bool parseDirectiveSEHMachineFrame(SMLoc L);
bool parseDirectiveSEHContext(SMLoc L);
+ bool parseDirectiveSEHECContext(SMLoc L);
bool parseDirectiveSEHClearUnwoundToCall(SMLoc L);
bool parseDirectiveSEHPACSignLR(SMLoc L);
bool parseDirectiveSEHSaveAnyReg(SMLoc L, bool Paired, bool Writeback);
@@ -316,14 +319,15 @@ public:
const MCParsedAsmOperand &Op2) const override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseDirective(AsmToken DirectiveID) override;
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
+ bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
+
static bool classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
MCSymbolRefExpr::VariantKind &DarwinRefKind,
@@ -1220,6 +1224,12 @@ public:
Reg.RegNum));
}
+ bool isNeonVectorReg0to7() const {
+ return Kind == k_Register && Reg.Kind == RegKind::NeonVector &&
+ (AArch64MCRegisterClasses[AArch64::FPR128_0to7RegClassID].contains(
+ Reg.RegNum));
+ }
+
bool isMatrix() const { return Kind == k_MatrixRegister; }
bool isMatrixTileList() const { return Kind == k_MatrixTileList; }
@@ -1229,6 +1239,8 @@ public:
case AArch64::PPRRegClassID:
case AArch64::PPR_3bRegClassID:
case AArch64::PPR_p8to15RegClassID:
+ case AArch64::PNRRegClassID:
+ case AArch64::PNR_p8to15RegClassID:
RK = RegKind::SVEPredicateAsCounter;
break;
default:
@@ -1249,6 +1261,9 @@ public:
break;
case AArch64::PPRRegClassID:
case AArch64::PPR_3bRegClassID:
+ case AArch64::PPR_p8to15RegClassID:
+ case AArch64::PNRRegClassID:
+ case AArch64::PNR_p8to15RegClassID:
RK = RegKind::SVEPredicateVector;
break;
default:
@@ -1733,6 +1748,12 @@ public:
Inst.addOperand(MCOperand::createReg(AArch64::Z0 + getReg() - Base));
}
+ void addPNRasPPRRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(
+ MCOperand::createReg((getReg() - AArch64::PN0) + AArch64::P0));
+ }
+
void addVectorReg64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
assert(
@@ -1752,6 +1773,11 @@ public:
Inst.addOperand(MCOperand::createReg(getReg()));
}
+ void addVectorReg0to7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+
enum VecListIndexType {
VecListIdx_DReg = 0,
VecListIdx_QReg = 1,
@@ -2584,31 +2610,31 @@ static std::optional<std::pair<int, int>> parseVectorKind(StringRef Suffix,
switch (VectorKind) {
case RegKind::NeonVector:
- Res =
- StringSwitch<std::pair<int, int>>(Suffix.lower())
- .Case("", {0, 0})
- .Case(".1d", {1, 64})
- .Case(".1q", {1, 128})
- // '.2h' needed for fp16 scalar pairwise reductions
- .Case(".2h", {2, 16})
- .Case(".2s", {2, 32})
- .Case(".2d", {2, 64})
- // '.4b' is another special case for the ARMv8.2a dot product
- // operand
- .Case(".4b", {4, 8})
- .Case(".4h", {4, 16})
- .Case(".4s", {4, 32})
- .Case(".8b", {8, 8})
- .Case(".8h", {8, 16})
- .Case(".16b", {16, 8})
- // Accept the width neutral ones, too, for verbose syntax. If those
- // aren't used in the right places, the token operand won't match so
- // all will work out.
- .Case(".b", {0, 8})
- .Case(".h", {0, 16})
- .Case(".s", {0, 32})
- .Case(".d", {0, 64})
- .Default({-1, -1});
+ Res = StringSwitch<std::pair<int, int>>(Suffix.lower())
+ .Case("", {0, 0})
+ .Case(".1d", {1, 64})
+ .Case(".1q", {1, 128})
+ // '.2h' needed for fp16 scalar pairwise reductions
+ .Case(".2h", {2, 16})
+ .Case(".2b", {2, 8})
+ .Case(".2s", {2, 32})
+ .Case(".2d", {2, 64})
+ // '.4b' is another special case for the ARMv8.2a dot product
+ // operand
+ .Case(".4b", {4, 8})
+ .Case(".4h", {4, 16})
+ .Case(".4s", {4, 32})
+ .Case(".8b", {8, 8})
+ .Case(".8h", {8, 16})
+ .Case(".16b", {16, 8})
+ // Accept the width neutral ones, too, for verbose syntax. If
+ // those aren't used in the right places, the token operand won't
+ // match so all will work out.
+ .Case(".b", {0, 8})
+ .Case(".h", {0, 16})
+ .Case(".s", {0, 32})
+ .Case(".d", {0, 64})
+ .Default({-1, -1});
break;
case RegKind::SVEPredicateAsCounter:
case RegKind::SVEPredicateVector:
@@ -2697,22 +2723,22 @@ static unsigned matchSVEPredicateVectorRegName(StringRef Name) {
static unsigned matchSVEPredicateAsCounterRegName(StringRef Name) {
return StringSwitch<unsigned>(Name.lower())
- .Case("pn0", AArch64::P0)
- .Case("pn1", AArch64::P1)
- .Case("pn2", AArch64::P2)
- .Case("pn3", AArch64::P3)
- .Case("pn4", AArch64::P4)
- .Case("pn5", AArch64::P5)
- .Case("pn6", AArch64::P6)
- .Case("pn7", AArch64::P7)
- .Case("pn8", AArch64::P8)
- .Case("pn9", AArch64::P9)
- .Case("pn10", AArch64::P10)
- .Case("pn11", AArch64::P11)
- .Case("pn12", AArch64::P12)
- .Case("pn13", AArch64::P13)
- .Case("pn14", AArch64::P14)
- .Case("pn15", AArch64::P15)
+ .Case("pn0", AArch64::PN0)
+ .Case("pn1", AArch64::PN1)
+ .Case("pn2", AArch64::PN2)
+ .Case("pn3", AArch64::PN3)
+ .Case("pn4", AArch64::PN4)
+ .Case("pn5", AArch64::PN5)
+ .Case("pn6", AArch64::PN6)
+ .Case("pn7", AArch64::PN7)
+ .Case("pn8", AArch64::PN8)
+ .Case("pn9", AArch64::PN9)
+ .Case("pn10", AArch64::PN10)
+ .Case("pn11", AArch64::PN11)
+ .Case("pn12", AArch64::PN12)
+ .Case("pn13", AArch64::PN13)
+ .Case("pn14", AArch64::PN14)
+ .Case("pn15", AArch64::PN15)
.Default(0);
}
@@ -2835,16 +2861,15 @@ static unsigned matchMatrixRegName(StringRef Name) {
.Default(0);
}
-bool AArch64AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool AArch64AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
+ return !tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
}
-OperandMatchResultTy AArch64AsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus AArch64AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
StartLoc = getLoc();
- auto Res = tryParseScalarRegister(RegNo);
+ ParseStatus Res = tryParseScalarRegister(Reg);
EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1);
return Res;
}
@@ -3225,7 +3250,7 @@ ParseStatus AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
}
// Parse hexadecimal representation.
- if (Tok.is(AsmToken::Integer) && Tok.getString().startswith("0x")) {
+ if (Tok.is(AsmToken::Integer) && Tok.getString().starts_with("0x")) {
if (Tok.getIntVal() > 255 || isNegative)
return TokError("encoded floating point value out of range");
@@ -3625,6 +3650,20 @@ static const struct Extension {
{"sb", {AArch64::FeatureSB}},
{"ssbs", {AArch64::FeatureSSBS}},
{"tme", {AArch64::FeatureTME}},
+ {"fpmr", {AArch64::FeatureFPMR}},
+ {"fp8", {AArch64::FeatureFP8}},
+ {"faminmax", {AArch64::FeatureFAMINMAX}},
+ {"fp8fma", {AArch64::FeatureFP8FMA}},
+ {"ssve-fp8fma", {AArch64::FeatureSSVE_FP8FMA}},
+ {"fp8dot2", {AArch64::FeatureFP8DOT2}},
+ {"ssve-fp8dot2", {AArch64::FeatureSSVE_FP8DOT2}},
+ {"fp8dot4", {AArch64::FeatureFP8DOT4}},
+ {"ssve-fp8dot4", {AArch64::FeatureSSVE_FP8DOT4}},
+ {"lut", {AArch64::FeatureLUT}},
+ {"sme-lutv2", {AArch64::FeatureSME_LUTv2}},
+ {"sme-f8f16", {AArch64::FeatureSMEF8F16}},
+ {"sme-f8f32", {AArch64::FeatureSMEF8F32}},
+ {"sme-fa64", {AArch64::FeatureSMEFA64}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -3658,6 +3697,8 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv9.3a";
else if (FBS[AArch64::HasV9_4aOps])
Str += "ARMv9.4a";
+ else if (FBS[AArch64::HasV9_5aOps])
+ Str += "ARMv9.5a";
else if (FBS[AArch64::HasV8_0rOps])
Str += "ARMv8r";
else {
@@ -4520,24 +4561,29 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreateReg(
RegNum, RegKind::LookupTable, StartLoc, getLoc(), getContext()));
- Lex(); // Eat identifier token.
+ Lex(); // Eat register.
// Check if register is followed by an index
if (parseOptionalToken(AsmToken::LBrac)) {
+ Operands.push_back(
+ AArch64Operand::CreateToken("[", getLoc(), getContext()));
const MCExpr *ImmVal;
if (getParser().parseExpression(ImmVal))
return ParseStatus::NoMatch;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
if (!MCE)
return TokError("immediate value expected for vector index");
- if (parseToken(AsmToken::RBrac, "']' expected"))
- return ParseStatus::Failure;
-
Operands.push_back(AArch64Operand::CreateImm(
MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc,
getLoc(), getContext()));
+ if (parseOptionalToken(AsmToken::Comma))
+ if (parseOptionalMulOperand(Operands))
+ return ParseStatus::Failure;
+ if (parseToken(AsmToken::RBrac, "']' expected"))
+ return ParseStatus::Failure;
+ Operands.push_back(
+ AArch64Operand::CreateToken("]", getLoc(), getContext()));
}
-
return ParseStatus::Success;
}
@@ -6695,6 +6741,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveSEHMachineFrame(Loc);
else if (IDVal == ".seh_context")
parseDirectiveSEHContext(Loc);
+ else if (IDVal == ".seh_ec_context")
+ parseDirectiveSEHECContext(Loc);
else if (IDVal == ".seh_clear_unwound_to_call")
parseDirectiveSEHClearUnwoundToCall(Loc);
else if (IDVal == ".seh_pac_sign_lr")
@@ -7359,6 +7407,13 @@ bool AArch64AsmParser::parseDirectiveSEHContext(SMLoc L) {
return false;
}
+/// parseDirectiveSEHECContext
+/// ::= .seh_ec_context
+bool AArch64AsmParser::parseDirectiveSEHECContext(SMLoc L) {
+ getTargetStreamer().emitARM64WinCFIECContext();
+ return false;
+}
+
/// parseDirectiveSEHClearUnwoundToCall
/// ::= .seh_clear_unwound_to_call
bool AArch64AsmParser::parseDirectiveSEHClearUnwoundToCall(SMLoc L) {
@@ -7451,6 +7506,112 @@ bool AArch64AsmParser::parseDirectiveSEHSaveAnyReg(SMLoc L, bool Paired,
return false;
}
+bool AArch64AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ // Try @AUTH expressions: they're more complex than the usual symbol variants.
+ if (!parseAuthExpr(Res, EndLoc))
+ return false;
+ return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
+}
+
+/// parseAuthExpr
+/// ::= _sym@AUTH(ib,123[,addr])
+/// ::= (_sym + 5)@AUTH(ib,123[,addr])
+/// ::= (_sym - 5)@AUTH(ib,123[,addr])
+bool AArch64AsmParser::parseAuthExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
+ MCContext &Ctx = getContext();
+
+ AsmToken Tok = Parser.getTok();
+
+ // Look for '_sym@AUTH' ...
+ if (Tok.is(AsmToken::Identifier) && Tok.getIdentifier().ends_with("@AUTH")) {
+ StringRef SymName = Tok.getIdentifier().drop_back(strlen("@AUTH"));
+ if (SymName.contains('@'))
+ return TokError(
+ "combination of @AUTH with other modifiers not supported");
+ Res = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(SymName), Ctx);
+
+ Parser.Lex(); // Eat the identifier.
+ } else {
+ // ... or look for a more complex symbol reference, such as ...
+ SmallVector<AsmToken, 6> Tokens;
+
+ // ... '"_long sym"@AUTH' ...
+ if (Tok.is(AsmToken::String))
+ Tokens.resize(2);
+ // ... or '(_sym + 5)@AUTH'.
+ else if (Tok.is(AsmToken::LParen))
+ Tokens.resize(6);
+ else
+ return true;
+
+ if (Parser.getLexer().peekTokens(Tokens) != Tokens.size())
+ return true;
+
+ // In either case, the expression ends with '@' 'AUTH'.
+ if (Tokens[Tokens.size() - 2].isNot(AsmToken::At) ||
+ Tokens[Tokens.size() - 1].isNot(AsmToken::Identifier) ||
+ Tokens[Tokens.size() - 1].getIdentifier() != "AUTH")
+ return true;
+
+ if (Tok.is(AsmToken::String)) {
+ StringRef SymName;
+ if (Parser.parseIdentifier(SymName))
+ return true;
+ Res = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(SymName), Ctx);
+ } else {
+ if (Parser.parsePrimaryExpr(Res, EndLoc, nullptr))
+ return true;
+ }
+
+ Parser.Lex(); // '@'
+ Parser.Lex(); // 'AUTH'
+ }
+
+ // At this point, we encountered "<id>@AUTH". There is no fallback anymore.
+ if (parseToken(AsmToken::LParen, "expected '('"))
+ return true;
+
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return TokError("expected key name");
+
+ StringRef KeyStr = Parser.getTok().getIdentifier();
+ auto KeyIDOrNone = AArch64StringToPACKeyID(KeyStr);
+ if (!KeyIDOrNone)
+ return TokError("invalid key '" + KeyStr + "'");
+ Parser.Lex();
+
+ if (parseToken(AsmToken::Comma, "expected ','"))
+ return true;
+
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ return TokError("expected integer discriminator");
+ int64_t Discriminator = Parser.getTok().getIntVal();
+
+ if (!isUInt<16>(Discriminator))
+ return TokError("integer discriminator " + Twine(Discriminator) +
+ " out of range [0, 0xFFFF]");
+ Parser.Lex();
+
+ bool UseAddressDiversity = false;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::Identifier) ||
+ Parser.getTok().getIdentifier() != "addr")
+ return TokError("expected 'addr'");
+ UseAddressDiversity = true;
+ Parser.Lex();
+ }
+
+ EndLoc = Parser.getTok().getEndLoc();
+ if (parseToken(AsmToken::RParen, "expected ')'"))
+ return true;
+
+ Res = AArch64AuthMCExpr::create(Res, Discriminator, *KeyIDOrNone,
+ UseAddressDiversity, Ctx);
+ return false;
+}
+
bool
AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index e50ac5c92d50..cf2d3879292d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -44,6 +44,9 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeFPR128_0to7RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder);
@@ -140,11 +143,14 @@ DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask,
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder);
+static DecodeStatus DecodePNRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder);
static DecodeStatus
-DecodePPR_p8to15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+DecodePNR_p8to15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
const MCDisassembler *Decoder);
static DecodeStatus DecodePPR2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
@@ -434,6 +440,14 @@ DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
}
+static DecodeStatus
+DecodeFPR128_0to7RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ if (RegNo > 7)
+ return Fail;
+ return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
+}
+
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const MCDisassembler *Decoder) {
@@ -736,6 +750,18 @@ static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
+static DecodeStatus DecodePNRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ if (RegNo > 15)
+ return Fail;
+
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::PNRRegClassID].getRegister(RegNo);
+ Inst.addOperand(MCOperand::createReg(Register));
+ return Success;
+}
+
static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const MCDisassembler *Decoder) {
@@ -747,13 +773,13 @@ static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodePPR_p8to15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
+DecodePNR_p8to15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
const MCDisassembler *Decoder) {
if (RegNo > 7)
return Fail;
// Just reuse the PPR decode table
- return DecodePPRRegisterClass(Inst, RegNo + 8, Addr, Decoder);
+ return DecodePNRRegisterClass(Inst, RegNo + 8, Addr, Decoder);
}
static DecodeStatus DecodePPR2RegisterClass(MCInst &Inst, unsigned RegNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index c56e3373d3a7..84057ea8d221 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -158,13 +158,14 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
LLT ValTy(VA.getValVT());
@@ -283,14 +284,15 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
inferAlignFromPtrInfo(MF, MPO));
@@ -298,8 +300,9 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
- Register Addr, LLT MemTy, MachinePointerInfo &MPO,
- CCValAssign &VA) override {
+ Register Addr, LLT MemTy,
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
unsigned MaxSize = MemTy.getSizeInBytes() * 8;
// For varargs, we always want to extend them to 8 bytes, in which case
// we disable setting a max.
@@ -532,8 +535,8 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
}
SMEAttrs Attrs(F);
- if (Attrs.hasNewZAInterface() ||
- (!Attrs.hasStreamingInterface() && Attrs.hasStreamingBody()))
+ if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() ||
+ Attrs.hasStreamingCompatibleInterface())
return true;
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index f1ba1aa7ba89..bdaae4dd724d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
@@ -102,6 +103,11 @@ private:
// An early selection function that runs before the selectImpl() call.
bool earlySelect(MachineInstr &I);
+ /// Save state that is shared between select calls, call select on \p I and
+ /// then restore the saved state. This can be used to recursively call select
+ /// within a select call.
+ bool selectAndRestoreState(MachineInstr &I);
+
// Do some preprocessing of G_PHIs before we begin selection.
void processPHIs(MachineFunction &MF);
@@ -143,6 +149,12 @@ private:
const TargetRegisterClass *DstRC,
Register Scalar,
MachineIRBuilder &MIRBuilder) const;
+ /// Helper to narrow vector that was widened by emitScalarToVector.
+ /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
+ /// vector, correspondingly.
+ MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) const;
/// Emit a lane insert into \p DstReg, or a new vector register if
/// std::nullopt is provided.
@@ -164,6 +176,21 @@ private:
MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI);
+ MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
+ MachineIRBuilder &MIRBuilder);
+
+ MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
+ MachineIRBuilder &MIRBuilder, bool Inv);
+
+ MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
+ MachineIRBuilder &MIRBuilder, bool Inv);
+ MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
+ MachineIRBuilder &MIRBuilder);
+ MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
+ MachineIRBuilder &MIRBuilder, bool Inv);
+ MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
+ MachineIRBuilder &MIRBuilder);
+
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
@@ -186,12 +213,16 @@ private:
/// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
MachineInstr &I);
+ bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
+ MachineInstr &I);
+ void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
+ unsigned Opc);
+ bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
+ unsigned Opc);
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI);
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -199,6 +230,10 @@ private:
bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
+
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
@@ -2226,7 +2261,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// Before selecting a DUP instruction, check if it is better selected as a
// MOV or load from a constant pool.
Register Src = I.getOperand(1).getReg();
- auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
+ auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
if (!ValAndVReg)
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
@@ -2631,12 +2666,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
default:
llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
case 32:
- // For s32, use a cp load if we have optsize/minsize.
- if (!shouldOptForSize(&MF))
+ case 64: {
+ bool OptForSize = shouldOptForSize(&MF);
+ const auto &TLI = MF.getSubtarget().getTargetLowering();
+ // If TLI says that this fpimm is illegal, then we'll expand to a
+ // constant pool load.
+ if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
+ EVT::getFloatingPointVT(DefSize), OptForSize))
break;
[[fallthrough]];
+ }
case 16:
- case 64:
case 128: {
auto *FPImm = I.getOperand(1).getFPImm();
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
@@ -2650,11 +2690,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
}
+ assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
// Either emit a FMOV, or emit a copy to emit a normal mov.
- assert(DefSize == 32 &&
- "Expected constant pool loads for all sizes other than 32!");
- const Register DefGPRReg =
- MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ const Register DefGPRReg = MRI.createVirtualRegister(
+ DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
MachineOperand &RegOp = I.getOperand(0);
RegOp.setReg(DefGPRReg);
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
@@ -2810,7 +2849,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
- } else if (TM.getCodeModel() == CodeModel::Large) {
+ } else if (TM.getCodeModel() == CodeModel::Large &&
+ !TM.isPositionIndependent()) {
// Materialize the global using movz/movk instructions.
materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
@@ -3009,6 +3049,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
+ case TargetOpcode::G_INDEXED_ZEXTLOAD:
+ case TargetOpcode::G_INDEXED_SEXTLOAD:
+ return selectIndexedExtLoad(I, MRI);
+ case TargetOpcode::G_INDEXED_LOAD:
+ return selectIndexedLoad(I, MRI);
+ case TargetOpcode::G_INDEXED_STORE:
+ return selectIndexedStore(cast<GIndexedStore>(I), MRI);
+
case TargetOpcode::G_SMULH:
case TargetOpcode::G_UMULH: {
// Reject the various things we don't support yet.
@@ -3455,7 +3503,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return true;
}
case TargetOpcode::G_BLOCK_ADDR: {
- if (TM.getCodeModel() == CodeModel::Large) {
+ if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
I.eraseFromParent();
return true;
@@ -3494,10 +3542,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_INTRINSIC_TRUNC:
- return selectIntrinsicTrunc(I, MRI);
- case TargetOpcode::G_INTRINSIC_ROUND:
- return selectIntrinsicRound(I, MRI);
case TargetOpcode::G_BUILD_VECTOR:
return selectBuildVector(I, MRI);
case TargetOpcode::G_MERGE_VALUES:
@@ -3514,8 +3558,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
- case TargetOpcode::G_VECREDUCE_ADD:
- return selectReduction(I, MRI);
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMCPY_INLINE:
case TargetOpcode::G_MEMMOVE:
@@ -3527,43 +3569,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
}
-bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- Register VecReg = I.getOperand(1).getReg();
- LLT VecTy = MRI.getType(VecReg);
- if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
- // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
- // a subregister copy afterwards.
- if (VecTy == LLT::fixed_vector(2, 32)) {
- Register DstReg = I.getOperand(0).getReg();
- auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
- {VecReg, VecReg});
- auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addReg(AddP.getReg(0), 0, AArch64::ssub)
- .getReg(0);
- RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
- }
-
- unsigned Opc = 0;
- if (VecTy == LLT::fixed_vector(16, 8))
- Opc = AArch64::ADDVv16i8v;
- else if (VecTy == LLT::fixed_vector(8, 16))
- Opc = AArch64::ADDVv8i16v;
- else if (VecTy == LLT::fixed_vector(4, 32))
- Opc = AArch64::ADDVv4i32v;
- else if (VecTy == LLT::fixed_vector(2, 64))
- Opc = AArch64::ADDPv2i64p;
- else {
- LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
- return false;
- }
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
-
- return false;
+bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
+ MachineIRBuilderState OldMIBState = MIB.getState();
+ bool Success = select(I);
+ MIB.setState(OldMIBState);
+ return Success;
}
bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
@@ -3638,6 +3648,9 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
{TargetReg, ScratchReg}, {JTAddr, Index})
.addJumpTableIndex(JTI);
+ // Save the jump table info.
+ MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
+ {static_cast<int64_t>(JTI)});
// Build the indirect branch.
MIB.buildInstr(AArch64::BR, {}, {TargetReg});
I.eraseFromParent();
@@ -3696,116 +3709,6 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
return true;
}
-bool AArch64InstructionSelector::selectIntrinsicTrunc(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
- const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
-
- // Select the correct opcode.
- unsigned Opc = 0;
- if (!SrcTy.isVector()) {
- switch (SrcTy.getSizeInBits()) {
- default:
- case 16:
- Opc = AArch64::FRINTZHr;
- break;
- case 32:
- Opc = AArch64::FRINTZSr;
- break;
- case 64:
- Opc = AArch64::FRINTZDr;
- break;
- }
- } else {
- unsigned NumElts = SrcTy.getNumElements();
- switch (SrcTy.getElementType().getSizeInBits()) {
- default:
- break;
- case 16:
- if (NumElts == 4)
- Opc = AArch64::FRINTZv4f16;
- else if (NumElts == 8)
- Opc = AArch64::FRINTZv8f16;
- break;
- case 32:
- if (NumElts == 2)
- Opc = AArch64::FRINTZv2f32;
- else if (NumElts == 4)
- Opc = AArch64::FRINTZv4f32;
- break;
- case 64:
- if (NumElts == 2)
- Opc = AArch64::FRINTZv2f64;
- break;
- }
- }
-
- if (!Opc) {
- // Didn't get an opcode above, bail.
- LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
- return false;
- }
-
- // Legalization would have set us up perfectly for this; we just need to
- // set the opcode and move on.
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-}
-
-bool AArch64InstructionSelector::selectIntrinsicRound(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
- const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
-
- // Select the correct opcode.
- unsigned Opc = 0;
- if (!SrcTy.isVector()) {
- switch (SrcTy.getSizeInBits()) {
- default:
- case 16:
- Opc = AArch64::FRINTAHr;
- break;
- case 32:
- Opc = AArch64::FRINTASr;
- break;
- case 64:
- Opc = AArch64::FRINTADr;
- break;
- }
- } else {
- unsigned NumElts = SrcTy.getNumElements();
- switch (SrcTy.getElementType().getSizeInBits()) {
- default:
- break;
- case 16:
- if (NumElts == 4)
- Opc = AArch64::FRINTAv4f16;
- else if (NumElts == 8)
- Opc = AArch64::FRINTAv8f16;
- break;
- case 32:
- if (NumElts == 2)
- Opc = AArch64::FRINTAv2f32;
- else if (NumElts == 4)
- Opc = AArch64::FRINTAv4f32;
- break;
- case 64:
- if (NumElts == 2)
- Opc = AArch64::FRINTAv2f64;
- break;
- }
- }
-
- if (!Opc) {
- // Didn't get an opcode above, bail.
- LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
- return false;
- }
-
- // Legalization would have set us up perfectly for this; we just need to
- // set the opcode and move on.
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-}
-
bool AArch64InstructionSelector::selectVectorICmp(
MachineInstr &I, MachineRegisterInfo &MRI) {
Register DstReg = I.getOperand(0).getReg();
@@ -4006,6 +3909,31 @@ MachineInstr *AArch64InstructionSelector::emitScalarToVector(
}
}
+MachineInstr *
+AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ LLT DstTy = MRI.getType(DstReg);
+ const TargetRegisterClass *RC =
+ getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
+ if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
+ LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
+ return nullptr;
+ }
+ unsigned SubReg = 0;
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return nullptr;
+ if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
+ LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
+ << DstTy.getSizeInBits() << "\n");
+ return nullptr;
+ }
+ auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addReg(SrcReg, 0, SubReg);
+ RBI.constrainGenericRegister(DstReg, *RC, MRI);
+ return Copy;
+}
+
bool AArch64InstructionSelector::selectMergeValues(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
@@ -4802,11 +4730,17 @@ MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
// emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
// generated during legalization of wide add/sub. This optimization depends on
// these sequences not being interrupted by other instructions.
+ // We have to select the previous instruction before the carry-using
+ // instruction is deleted by the calling function, otherwise the previous
+ // instruction might become dead and would get deleted.
MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
if (SrcMI == I.getPrevNode()) {
if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
bool ProducesNegatedCarry = CarrySrcMI->isSub();
- if (NeedsNegatedCarry == ProducesNegatedCarry && CarrySrcMI->isUnsigned())
+ if (NeedsNegatedCarry == ProducesNegatedCarry &&
+ CarrySrcMI->isUnsigned() &&
+ CarrySrcMI->getCarryOutReg() == CarryReg &&
+ selectAndRestoreState(*SrcMI))
return nullptr;
}
}
@@ -5493,24 +5427,8 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
if (VecSize < 128) {
// If we had to widen to perform the insert, then we have to demote back to
// the original size to get the result we want.
- Register DemoteVec = InsMI->getOperand(0).getReg();
- const TargetRegisterClass *RC =
- getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
- if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
- LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
+ if (!emitNarrowVector(DstReg, InsMI->getOperand(0).getReg(), MIB, MRI))
return false;
- }
- unsigned SubReg = 0;
- if (!getSubRegForClass(RC, TRI, SubReg))
- return false;
- if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
- LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
- << "\n");
- return false;
- }
- MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addReg(DemoteVec, 0, SubReg);
- RBI.constrainGenericRegister(DstReg, *RC, MRI);
} else {
// No widening needed.
InsMI->getOperand(0).setReg(DstReg);
@@ -5521,6 +5439,359 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
return true;
}
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
+ Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
+ unsigned int Op;
+ if (DstSize == 128) {
+ if (Bits.getHiBits(64) != Bits.getLoBits(64))
+ return nullptr;
+ Op = AArch64::MOVIv16b_ns;
+ } else {
+ Op = AArch64::MOVIv8b_ns;
+ }
+
+ uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
+
+ if (AArch64_AM::isAdvSIMDModImmType9(Val)) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType9(Val);
+ auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+ }
+ return nullptr;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
+ Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
+ bool Inv) {
+
+ unsigned int Op;
+ if (DstSize == 128) {
+ if (Bits.getHiBits(64) != Bits.getLoBits(64))
+ return nullptr;
+ Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
+ } else {
+ Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
+ }
+
+ uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
+ uint64_t Shift;
+
+ if (AArch64_AM::isAdvSIMDModImmType5(Val)) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType5(Val);
+ Shift = 0;
+ } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType6(Val);
+ Shift = 8;
+ } else
+ return nullptr;
+
+ auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
+ Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
+ bool Inv) {
+
+ unsigned int Op;
+ if (DstSize == 128) {
+ if (Bits.getHiBits(64) != Bits.getLoBits(64))
+ return nullptr;
+ Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
+ } else {
+ Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
+ }
+
+ uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
+ uint64_t Shift;
+
+ if ((AArch64_AM::isAdvSIMDModImmType1(Val))) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType1(Val);
+ Shift = 0;
+ } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType2(Val);
+ Shift = 8;
+ } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType3(Val);
+ Shift = 16;
+ } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType4(Val);
+ Shift = 24;
+ } else
+ return nullptr;
+
+ auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
+ Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
+
+ unsigned int Op;
+ if (DstSize == 128) {
+ if (Bits.getHiBits(64) != Bits.getLoBits(64))
+ return nullptr;
+ Op = AArch64::MOVIv2d_ns;
+ } else {
+ Op = AArch64::MOVID;
+ }
+
+ uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
+ if (AArch64_AM::isAdvSIMDModImmType10(Val)) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType10(Val);
+ auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+ }
+ return nullptr;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
+ Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
+ bool Inv) {
+
+ unsigned int Op;
+ if (DstSize == 128) {
+ if (Bits.getHiBits(64) != Bits.getLoBits(64))
+ return nullptr;
+ Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
+ } else {
+ Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
+ }
+
+ uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
+ uint64_t Shift;
+
+ if (AArch64_AM::isAdvSIMDModImmType7(Val)) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType7(Val);
+ Shift = 264;
+ } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType8(Val);
+ Shift = 272;
+ } else
+ return nullptr;
+
+ auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
+ Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
+
+ unsigned int Op;
+ bool IsWide = false;
+ if (DstSize == 128) {
+ if (Bits.getHiBits(64) != Bits.getLoBits(64))
+ return nullptr;
+ Op = AArch64::FMOVv4f32_ns;
+ IsWide = true;
+ } else {
+ Op = AArch64::FMOVv2f32_ns;
+ }
+
+ uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
+
+ if (AArch64_AM::isAdvSIMDModImmType11(Val)) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType11(Val);
+ } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
+ Val = AArch64_AM::encodeAdvSIMDModImmType12(Val);
+ Op = AArch64::FMOVv2f64_ns;
+ } else
+ return nullptr;
+
+ auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+}
+
+bool AArch64InstructionSelector::selectIndexedExtLoad(
+ MachineInstr &MI, MachineRegisterInfo &MRI) {
+ auto &ExtLd = cast<GIndexedExtLoad>(MI);
+ Register Dst = ExtLd.getDstReg();
+ Register WriteBack = ExtLd.getWritebackReg();
+ Register Base = ExtLd.getBaseReg();
+ Register Offset = ExtLd.getOffsetReg();
+ LLT Ty = MRI.getType(Dst);
+ assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
+ unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
+ bool IsPre = ExtLd.isPre();
+ bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
+ bool InsertIntoXReg = false;
+ bool IsDst64 = Ty.getSizeInBits() == 64;
+
+ unsigned Opc = 0;
+ LLT NewLdDstTy;
+ LLT s32 = LLT::scalar(32);
+ LLT s64 = LLT::scalar(64);
+
+ if (MemSizeBits == 8) {
+ if (IsSExt) {
+ if (IsDst64)
+ Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
+ else
+ Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
+ NewLdDstTy = IsDst64 ? s64 : s32;
+ } else {
+ Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
+ InsertIntoXReg = IsDst64;
+ NewLdDstTy = s32;
+ }
+ } else if (MemSizeBits == 16) {
+ if (IsSExt) {
+ if (IsDst64)
+ Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
+ else
+ Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
+ NewLdDstTy = IsDst64 ? s64 : s32;
+ } else {
+ Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
+ InsertIntoXReg = IsDst64;
+ NewLdDstTy = s32;
+ }
+ } else if (MemSizeBits == 32) {
+ if (IsSExt) {
+ Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
+ NewLdDstTy = s64;
+ } else {
+ Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
+ InsertIntoXReg = IsDst64;
+ NewLdDstTy = s32;
+ }
+ } else {
+ llvm_unreachable("Unexpected size for indexed load");
+ }
+
+ if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ return false; // We should be on gpr.
+
+ auto Cst = getIConstantVRegVal(Offset, MRI);
+ if (!Cst)
+ return false; // Shouldn't happen, but just in case.
+
+ auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
+ .addImm(Cst->getSExtValue());
+ LdMI.cloneMemRefs(ExtLd);
+ constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
+ // Make sure to select the load with the MemTy as the dest type, and then
+ // insert into X reg if needed.
+ if (InsertIntoXReg) {
+ // Generate a SUBREG_TO_REG.
+ auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
+ .addImm(0)
+ .addUse(LdMI.getReg(1))
+ .addImm(AArch64::sub_32);
+ RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
+ MRI);
+ } else {
+ auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
+ selectCopy(*Copy, TII, MRI, TRI, RBI);
+ }
+ MI.eraseFromParent();
+
+ return true;
+}
+
+bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
+ MachineRegisterInfo &MRI) {
+ // TODO: extending loads.
+ if (isa<GIndexedExtLoad>(MI))
+ return false;
+
+ auto &Ld = cast<GIndexedLoad>(MI);
+ Register Dst = Ld.getDstReg();
+ Register WriteBack = Ld.getWritebackReg();
+ Register Base = Ld.getBaseReg();
+ Register Offset = Ld.getOffsetReg();
+ assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
+ "Unexpected type for indexed load");
+ unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
+
+ unsigned Opc = 0;
+ if (Ld.isPre()) {
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
+ AArch64::LDRXpre};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
+ AArch64::LDRQpre};
+ if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(MemSize)];
+ else
+ Opc = GPROpcodes[Log2_32(MemSize)];
+ } else {
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
+ AArch64::LDRXpost};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
+ AArch64::LDRDpost, AArch64::LDRQpost};
+ if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(MemSize)];
+ else
+ Opc = GPROpcodes[Log2_32(MemSize)];
+ }
+ auto Cst = getIConstantVRegVal(Offset, MRI);
+ if (!Cst)
+ return false; // Shouldn't happen, but just in case.
+ auto LdMI =
+ MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
+ LdMI.cloneMemRefs(Ld);
+ constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
+ MachineRegisterInfo &MRI) {
+ Register Dst = I.getWritebackReg();
+ Register Val = I.getValueReg();
+ Register Base = I.getBaseReg();
+ Register Offset = I.getOffsetReg();
+ LLT ValTy = MRI.getType(Val);
+ assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
+
+ unsigned Opc = 0;
+ if (I.isPre()) {
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
+ AArch64::STRXpre};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
+ AArch64::STRQpre};
+
+ if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ else
+ Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ } else {
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
+ AArch64::STRXpost};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
+ AArch64::STRDpost, AArch64::STRQpost};
+
+ if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ else
+ Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ }
+
+ auto Cst = getIConstantVRegVal(Offset, MRI);
+ if (!Cst)
+ return false; // Shouldn't happen, but just in case.
+ auto Str =
+ MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
+ Str.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Str, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
MachineInstr *
AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
MachineIRBuilder &MIRBuilder,
@@ -5547,6 +5818,28 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
}
}
+ if (CV->getSplatValue()) {
+ APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
+ MachineInstr *NewOp;
+ bool Inv = false;
+ if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
+ (NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp =
+ tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
+ (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
+ return NewOp;
+
+ DefBits = ~DefBits;
+ Inv = true;
+ if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp =
+ tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
+ return NewOp;
+ }
+
auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
if (!CPLoad) {
LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
@@ -5605,11 +5898,9 @@ bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
if (EltRB != DstRB)
return false;
- if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
- [&MRI](const MachineOperand &Op) {
- return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
- MRI);
- }))
+ if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
+ return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
+ }))
return false;
unsigned SubReg;
const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
@@ -5739,10 +6030,113 @@ bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
return true;
}
+bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
+ unsigned Opc, unsigned NumVecs, MachineInstr &I) {
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ assert(Opc && "Expected an opcode?");
+ assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
+ auto &MRI = *MIB.getMRI();
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ bool Narrow = Ty.getSizeInBits() == 64;
+
+ auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
+ SmallVector<Register, 4> Regs(NumVecs);
+ std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
+ [](auto MO) { return MO.getReg(); });
+
+ if (Narrow) {
+ transform(Regs, Regs.begin(), [this](Register Reg) {
+ return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
+ ->getOperand(0)
+ .getReg();
+ });
+ Ty = Ty.multiplyElements(2);
+ }
+
+ Register Tuple = createQTuple(Regs, MIB);
+ auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
+ if (!LaneNo)
+ return false;
+
+ Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
+ auto Load = MIB.buildInstr(Opc, {Ty}, {})
+ .addReg(Tuple)
+ .addImm(LaneNo->getZExtValue())
+ .addReg(Ptr);
+ Load.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
+ Register SelectedLoadDst = Load->getOperand(0).getReg();
+ unsigned SubReg = AArch64::qsub0;
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ auto Vec = MIB.buildInstr(TargetOpcode::COPY,
+ {Narrow ? DstOp(&AArch64::FPR128RegClass)
+ : DstOp(I.getOperand(Idx).getReg())},
+ {})
+ .addReg(SelectedLoadDst, 0, SubReg + Idx);
+ Register WideReg = Vec.getReg(0);
+ // Emit the subreg copies and immediately select them.
+ selectCopy(*Vec, TII, MRI, TRI, RBI);
+ if (Narrow &&
+ !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
+ return false;
+ }
+ return true;
+}
+
+void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
+ unsigned NumVecs,
+ unsigned Opc) {
+ MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ Register Ptr = I.getOperand(1 + NumVecs).getReg();
+
+ SmallVector<Register, 2> Regs(NumVecs);
+ std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
+ Regs.begin(), [](auto MO) { return MO.getReg(); });
+
+ Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
+ : createDTuple(Regs, MIB);
+ auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
+ Store.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
+}
+
+bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
+ MachineInstr &I, unsigned NumVecs, unsigned Opc) {
+ MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ bool Narrow = Ty.getSizeInBits() == 64;
+
+ SmallVector<Register, 2> Regs(NumVecs);
+ std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
+ Regs.begin(), [](auto MO) { return MO.getReg(); });
+
+ if (Narrow)
+ transform(Regs, Regs.begin(), [this](Register Reg) {
+ return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
+ ->getOperand(0)
+ .getReg();
+ });
+
+ Register Tuple = createQTuple(Regs, MIB);
+
+ auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
+ if (!LaneNo)
+ return false;
+ Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
+ auto Store = MIB.buildInstr(Opc, {}, {})
+ .addReg(Tuple)
+ .addImm(LaneNo->getZExtValue())
+ .addReg(Ptr);
+ Store.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
+ return true;
+}
+
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineRegisterInfo &MRI) {
// Find the intrinsic ID.
- unsigned IntrinID = I.getIntrinsicID();
+ unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
const LLT S8 = LLT::scalar(8);
const LLT S16 = LLT::scalar(16);
@@ -5773,6 +6167,78 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MIB.buildInstr(AArch64::BRK, {}, {})
.addImm(I.getOperand(1).getImm() | ('U' << 8));
break;
+ case Intrinsic::aarch64_neon_ld1x2: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD1Twov8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD1Twov16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD1Twov4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD1Twov8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD1Twov2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD1Twov4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD1Twov2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Twov1d;
+ else
+ llvm_unreachable("Unexpected type for ld1x2!");
+ selectVectorLoadIntrinsic(Opc, 2, I);
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld1x3: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD1Threev8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD1Threev16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD1Threev4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD1Threev8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD1Threev2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD1Threev4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD1Threev2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Threev1d;
+ else
+ llvm_unreachable("Unexpected type for ld1x3!");
+ selectVectorLoadIntrinsic(Opc, 3, I);
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld1x4: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD1Fourv8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD1Fourv16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD1Fourv4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD1Fourv8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD1Fourv2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD1Fourv4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD1Fourv2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Fourv1d;
+ else
+ llvm_unreachable("Unexpected type for ld1x4!");
+ selectVectorLoadIntrinsic(Opc, 4, I);
+ break;
+ }
case Intrinsic::aarch64_neon_ld2: {
LLT Ty = MRI.getType(I.getOperand(0).getReg());
unsigned Opc = 0;
@@ -5797,6 +6263,114 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
selectVectorLoadIntrinsic(Opc, 2, I);
break;
}
+ case Intrinsic::aarch64_neon_ld2lane: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD2i8;
+ else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD2i16;
+ else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD2i32;
+ else if (Ty == LLT::fixed_vector(2, S64) ||
+ Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
+ Opc = AArch64::LD2i64;
+ else
+ llvm_unreachable("Unexpected type for st2lane!");
+ if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
+ return false;
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld2r: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD2Rv8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD2Rv16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD2Rv4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD2Rv8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD2Rv2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD2Rv4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD2Rv2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD2Rv1d;
+ else
+ llvm_unreachable("Unexpected type for ld2r!");
+ selectVectorLoadIntrinsic(Opc, 2, I);
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld3: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD3Threev8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD3Threev16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD3Threev4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD3Threev8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD3Threev2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD3Threev4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD3Threev2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Threev1d;
+ else
+ llvm_unreachable("Unexpected type for ld3!");
+ selectVectorLoadIntrinsic(Opc, 3, I);
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld3lane: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD3i8;
+ else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD3i16;
+ else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD3i32;
+ else if (Ty == LLT::fixed_vector(2, S64) ||
+ Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
+ Opc = AArch64::LD3i64;
+ else
+ llvm_unreachable("Unexpected type for st3lane!");
+ if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
+ return false;
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld3r: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD3Rv8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD3Rv16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD3Rv4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD3Rv8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD3Rv2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD3Rv4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD3Rv2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD3Rv1d;
+ else
+ llvm_unreachable("Unexpected type for ld3r!");
+ selectVectorLoadIntrinsic(Opc, 3, I);
+ break;
+ }
case Intrinsic::aarch64_neon_ld4: {
LLT Ty = MRI.getType(I.getOperand(0).getReg());
unsigned Opc = 0;
@@ -5821,11 +6395,122 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
selectVectorLoadIntrinsic(Opc, 4, I);
break;
}
+ case Intrinsic::aarch64_neon_ld4lane: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD4i8;
+ else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD4i16;
+ else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD4i32;
+ else if (Ty == LLT::fixed_vector(2, S64) ||
+ Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
+ Opc = AArch64::LD4i64;
+ else
+ llvm_unreachable("Unexpected type for st4lane!");
+ if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
+ return false;
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld4r: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD4Rv8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD4Rv16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD4Rv4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD4Rv8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD4Rv2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD4Rv4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD4Rv2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD4Rv1d;
+ else
+ llvm_unreachable("Unexpected type for ld4r!");
+ selectVectorLoadIntrinsic(Opc, 4, I);
+ break;
+ }
+ case Intrinsic::aarch64_neon_st1x2: {
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::ST1Twov8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST1Twov16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::ST1Twov4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST1Twov8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::ST1Twov2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST1Twov4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::ST1Twov2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::ST1Twov1d;
+ else
+ llvm_unreachable("Unexpected type for st1x2!");
+ selectVectorStoreIntrinsic(I, 2, Opc);
+ break;
+ }
+ case Intrinsic::aarch64_neon_st1x3: {
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::ST1Threev8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST1Threev16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::ST1Threev4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST1Threev8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::ST1Threev2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST1Threev4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::ST1Threev2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::ST1Threev1d;
+ else
+ llvm_unreachable("Unexpected type for st1x3!");
+ selectVectorStoreIntrinsic(I, 3, Opc);
+ break;
+ }
+ case Intrinsic::aarch64_neon_st1x4: {
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::ST1Fourv8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST1Fourv16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::ST1Fourv4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST1Fourv8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::ST1Fourv2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST1Fourv4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::ST1Fourv2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::ST1Fourv1d;
+ else
+ llvm_unreachable("Unexpected type for st1x4!");
+ selectVectorStoreIntrinsic(I, 4, Opc);
+ break;
+ }
case Intrinsic::aarch64_neon_st2: {
- Register Src1 = I.getOperand(1).getReg();
- Register Src2 = I.getOperand(2).getReg();
- Register Ptr = I.getOperand(3).getReg();
- LLT Ty = MRI.getType(Src1);
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
unsigned Opc;
if (Ty == LLT::fixed_vector(8, S8))
Opc = AArch64::ST2Twov8b;
@@ -5845,12 +6530,109 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
Opc = AArch64::ST1Twov1d;
else
llvm_unreachable("Unexpected type for st2!");
- SmallVector<Register, 2> Regs = {Src1, Src2};
- Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
- : createDTuple(Regs, MIB);
- auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
- Store.cloneMemRefs(I);
- constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
+ selectVectorStoreIntrinsic(I, 2, Opc);
+ break;
+ }
+ case Intrinsic::aarch64_neon_st3: {
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::ST3Threev8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST3Threev16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::ST3Threev4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST3Threev8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::ST3Threev2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST3Threev4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::ST3Threev2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::ST1Threev1d;
+ else
+ llvm_unreachable("Unexpected type for st3!");
+ selectVectorStoreIntrinsic(I, 3, Opc);
+ break;
+ }
+ case Intrinsic::aarch64_neon_st4: {
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::ST4Fourv8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST4Fourv16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::ST4Fourv4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST4Fourv8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::ST4Fourv2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST4Fourv4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::ST4Fourv2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::ST1Fourv1d;
+ else
+ llvm_unreachable("Unexpected type for st4!");
+ selectVectorStoreIntrinsic(I, 4, Opc);
+ break;
+ }
+ case Intrinsic::aarch64_neon_st2lane: {
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST2i8;
+ else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST2i16;
+ else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST2i32;
+ else if (Ty == LLT::fixed_vector(2, S64) ||
+ Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
+ Opc = AArch64::ST2i64;
+ else
+ llvm_unreachable("Unexpected type for st2lane!");
+ if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
+ return false;
+ break;
+ }
+ case Intrinsic::aarch64_neon_st3lane: {
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST3i8;
+ else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST3i16;
+ else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST3i32;
+ else if (Ty == LLT::fixed_vector(2, S64) ||
+ Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
+ Opc = AArch64::ST3i64;
+ else
+ llvm_unreachable("Unexpected type for st3lane!");
+ if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
+ return false;
+ break;
+ }
+ case Intrinsic::aarch64_neon_st4lane: {
+ LLT Ty = MRI.getType(I.getOperand(1).getReg());
+ unsigned Opc;
+ if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::ST4i8;
+ else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::ST4i16;
+ else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::ST4i32;
+ else if (Ty == LLT::fixed_vector(2, S64) ||
+ Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
+ Opc = AArch64::ST4i64;
+ else
+ llvm_unreachable("Unexpected type for st4lane!");
+ if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
+ return false;
break;
}
case Intrinsic::aarch64_mops_memset_tag: {
@@ -5891,7 +6673,7 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MachineRegisterInfo &MRI) {
- unsigned IntrinID = I.getIntrinsicID();
+ unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
switch (IntrinID) {
default:
@@ -6195,7 +6977,7 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
// It's better to avoid folding and recomputing shifts when we don't have a
// fastpath.
- if (!STI.hasLSLFast())
+ if (!STI.hasAddrLSLFast())
return false;
// We have a fastpath, so folding a shift in and potentially computing it
@@ -6571,9 +7353,6 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
return std::nullopt;
RHSC = RHSOp1.getCImm()->getSExtValue();
- // If the offset is valid as a scaled immediate, don't match here.
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
- return std::nullopt;
if (RHSC >= -256 && RHSC < 256) {
MachineOperand &Base = RootDef->getOperand(1);
return {{
@@ -6886,11 +7665,29 @@ AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
MachineRegisterInfo &MRI =
Root.getParent()->getParent()->getParent()->getRegInfo();
- MachineInstr *Extract = getDefIgnoringCopies(Root.getReg(), MRI);
- if (Extract && Extract->getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
- Root.getReg() == Extract->getOperand(1).getReg()) {
- Register ExtReg = Extract->getOperand(2).getReg();
- return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
+ auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
+ while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
+ STI.isLittleEndian())
+ Extract =
+ getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
+ if (!Extract)
+ return std::nullopt;
+
+ if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
+ if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
+ Register ExtReg = Extract->MI->getOperand(2).getReg();
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
+ }
+ }
+ if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
+ LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
+ auto LaneIdx = getIConstantVRegValWithLookThrough(
+ Extract->MI->getOperand(2).getReg(), MRI);
+ if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
+ LaneIdx->Value.getSExtValue() == 1) {
+ Register ExtReg = Extract->MI->getOperand(1).getReg();
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
+ }
}
return std::nullopt;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d905da4eaec3..8b909f53c844 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -14,6 +14,8 @@
#include "AArch64LegalizerInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64Subtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
@@ -64,6 +66,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
/* End 128bit types */
/* Begin 64bit types */
v8s8, v4s16, v2s32};
+ std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
+ SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
+ SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
@@ -118,13 +123,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampScalar(0, s32, s64);
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
- .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
- .scalarizeIf(
- [=](const LegalityQuery &Query) {
- return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
- },
- 0)
- .legalFor({v2s64})
+ .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
.clampMaxNumElements(0, s8, 16)
@@ -234,17 +233,29 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampScalar(1, s32, s64)
.widenScalarToNextPow2(0);
- getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
- .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
- .clampScalar(0, MinFPScalar, s64)
+ getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
+ G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
+ G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
+ G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
+ G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
+ .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
+ .legalIf([=](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[0];
+ return (Ty == v8s16 || Ty == v4s16) && HasFP16;
+ })
+ .libcallFor({s128})
+ .minScalarOrElt(0, MinFPScalar)
+ .clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
- .clampNumElements(0, v2s64, v2s64);
+ .clampNumElements(0, v2s64, v2s64)
+ .moreElementsToNextPow2(0);
- getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
+ getActionDefinitionsBuilder(G_FREM)
+ .libcallFor({s32, s64})
+ .minScalar(0, s32)
+ .scalarize(0);
- getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
- G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
- G_FNEARBYINT, G_INTRINSIC_LRINT})
+ getActionDefinitionsBuilder(G_INTRINSIC_LRINT)
// If we don't have full FP16 support, then scalarize the elements of
// vectors containing fp16 types.
.fewerElementsIf(
@@ -264,12 +275,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
getActionDefinitionsBuilder(
- {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
+ {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10,
+ G_FEXP, G_FEXP2, G_FEXP10})
// We need a call for these, so we always need to scalarize.
.scalarize(0)
// Regardless of FP16 support, widen 16-bit elements to 32-bits.
.minScalar(0, s32)
- .libcallFor({s32, s64, v2s32, v4s32, v2s64});
+ .libcallFor({s32, s64});
getActionDefinitionsBuilder(G_INSERT)
.legalIf(all(typeInSet(0, {s32, s64, p0}),
@@ -405,6 +417,67 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.customIf(IsPtrVecPred)
.scalarizeIf(typeIs(0, v2s16), 0);
+ getActionDefinitionsBuilder(G_INDEXED_STORE)
+ // Idx 0 == Ptr, Idx 1 == Val
+ // TODO: we can implement legalizations but as of now these are
+ // generated in a very specific way.
+ .legalForTypesWithMemDesc({
+ {p0, s8, s8, 8},
+ {p0, s16, s16, 8},
+ {p0, s32, s8, 8},
+ {p0, s32, s16, 8},
+ {p0, s32, s32, 8},
+ {p0, s64, s64, 8},
+ {p0, p0, p0, 8},
+ {p0, v8s8, v8s8, 8},
+ {p0, v16s8, v16s8, 8},
+ {p0, v4s16, v4s16, 8},
+ {p0, v8s16, v8s16, 8},
+ {p0, v2s32, v2s32, 8},
+ {p0, v4s32, v4s32, 8},
+ {p0, v2s64, v2s64, 8},
+ {p0, v2p0, v2p0, 8},
+ {p0, s128, s128, 8},
+ })
+ .unsupported();
+
+ auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
+ LLT LdTy = Query.Types[0];
+ LLT PtrTy = Query.Types[1];
+ if (llvm::find(PackedVectorAllTypesVec, LdTy) ==
+ PackedVectorAllTypesVec.end() &&
+ llvm::find(ScalarAndPtrTypesVec, LdTy) == ScalarAndPtrTypesVec.end() &&
+ LdTy != s128)
+ return false;
+ if (PtrTy != p0)
+ return false;
+ return true;
+ };
+ getActionDefinitionsBuilder(G_INDEXED_LOAD)
+ .unsupportedIf(
+ atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
+ .legalIf(IndexedLoadBasicPred)
+ .unsupported();
+ getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
+ .unsupportedIf(
+ atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
+ .legalIf(all(typeInSet(0, {s16, s32, s64}),
+ LegalityPredicate([=](const LegalityQuery &Q) {
+ LLT LdTy = Q.Types[0];
+ LLT PtrTy = Q.Types[1];
+ LLT MemTy = Q.MMODescrs[0].MemoryTy;
+ if (PtrTy != p0)
+ return false;
+ if (LdTy == s16)
+ return MemTy == s8;
+ if (LdTy == s32)
+ return MemTy == s8 || MemTy == s16;
+ if (LdTy == s64)
+ return MemTy == s8 || MemTy == s16 || MemTy == s32;
+ return false;
+ })))
+ .unsupported();
+
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
.legalFor({p0, s8, s16, s32, s64})
@@ -485,20 +558,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
Ty.getElementType() != SrcTy.getElementType();
},
0, 1)
- .clampNumElements(0, v2s32, v4s32);
+ .clampNumElements(0, v2s32, v4s32)
+ .clampMaxNumElements(1, s64, 2);
// Extensions
auto ExtLegalFunc = [=](const LegalityQuery &Query) {
unsigned DstSize = Query.Types[0].getSizeInBits();
- if (DstSize == 128 && !Query.Types[0].isVector())
- return false; // Extending to a scalar s128 needs narrowing.
-
- // Make sure that we have something that will fit in a register, and
- // make sure it's a power of 2.
- if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
+ // Handle legal vectors using legalFor
+ if (Query.Types[0].isVector())
return false;
+ if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
+ return false; // Extending to a scalar s128 needs narrowing.
+
const LLT &SrcTy = Query.Types[1];
// Make sure we fit in a register otherwise. Don't bother checking that
@@ -512,22 +585,44 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
};
getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
.legalIf(ExtLegalFunc)
- .clampScalar(0, s64, s64); // Just for s128, others are handled above.
+ .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
+ .clampScalar(0, s64, s64) // Just for s128, others are handled above.
+ .moreElementsToNextPow2(1)
+ .clampMaxNumElements(1, s8, 8)
+ .clampMaxNumElements(1, s16, 4)
+ .clampMaxNumElements(1, s32, 2)
+ // Tries to convert a large EXTEND into two smaller EXTENDs
+ .lowerIf([=](const LegalityQuery &Query) {
+ return (Query.Types[0].getScalarSizeInBits() >
+ Query.Types[1].getScalarSizeInBits() * 2) &&
+ Query.Types[0].isVector() &&
+ (Query.Types[1].getScalarSizeInBits() == 8 ||
+ Query.Types[1].getScalarSizeInBits() == 16);
+ });
getActionDefinitionsBuilder(G_TRUNC)
+ .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
+ .moreElementsToNextPow2(0)
+ .clampMaxNumElements(0, s8, 8)
+ .clampMaxNumElements(0, s16, 4)
+ .clampMaxNumElements(0, s32, 2)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
0, s8)
- .customIf([=](const LegalityQuery &Query) {
+ .lowerIf([=](const LegalityQuery &Query) {
LLT DstTy = Query.Types[0];
LLT SrcTy = Query.Types[1];
- return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
+ return DstTy.isVector() && (SrcTy.getSizeInBits() > 128 ||
+ (DstTy.getScalarSizeInBits() * 2 <
+ SrcTy.getScalarSizeInBits()));
})
+
.alwaysLegal();
getActionDefinitionsBuilder(G_SEXT_INREG)
.legalFor({s32, s64})
.legalFor(PackedVectorAllTypeList)
+ .maxScalar(0, s64)
.lower();
// FP conversions
@@ -548,17 +643,63 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Conversions
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
+ .legalIf([=](const LegalityQuery &Query) {
+ return HasFP16 &&
+ (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
+ Query.Types[1] == v8s16) &&
+ (Query.Types[0] == s32 || Query.Types[0] == s64 ||
+ Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
+ })
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(1)
- .clampScalar(1, s32, s64);
+ .clampScalarOrElt(1, MinFPScalar, s64)
+ .moreElementsToNextPow2(0)
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getScalarSizeInBits() >
+ Query.Types[1].getScalarSizeInBits();
+ },
+ LegalizeMutations::changeElementSizeTo(1, 0))
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getScalarSizeInBits() <
+ Query.Types[1].getScalarSizeInBits();
+ },
+ LegalizeMutations::changeElementSizeTo(0, 1))
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampMaxNumElements(0, s64, 2);
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
+ .legalIf([=](const LegalityQuery &Query) {
+ return HasFP16 &&
+ (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
+ Query.Types[0] == v8s16) &&
+ (Query.Types[1] == s32 || Query.Types[1] == s64 ||
+ Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
+ })
+ .widenScalarToNextPow2(1)
.clampScalar(1, s32, s64)
- .minScalarSameAs(1, 0)
- .clampScalar(0, s32, s64)
- .widenScalarToNextPow2(0);
+ .widenScalarToNextPow2(0)
+ .clampScalarOrElt(0, MinFPScalar, s64)
+ .moreElementsToNextPow2(0)
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getScalarSizeInBits() <
+ Query.Types[1].getScalarSizeInBits();
+ },
+ LegalizeMutations::changeElementSizeTo(0, 1))
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getScalarSizeInBits() >
+ Query.Types[1].getScalarSizeInBits();
+ },
+ LegalizeMutations::changeElementSizeTo(1, 0))
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampMaxNumElements(0, s64, 2);
// Control-flow
getActionDefinitionsBuilder(G_BRCOND)
@@ -617,18 +758,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
+ .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
.customIf([](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() == 128;
})
- .clampScalar(0, s32, s64)
- .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+ .clampScalar(0, s32, s64);
getActionDefinitionsBuilder(
{G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
- .clampScalar(0, s32, s64)
- .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+ .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
+ .clampScalar(0, s32, s64);
getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
@@ -668,12 +809,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return Query.Types[0] != EltTy;
})
.minScalar(2, s64)
- .legalIf([=](const LegalityQuery &Query) {
+ .customIf([=](const LegalityQuery &Query) {
const LLT &VecTy = Query.Types[1];
return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
- VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
- VecTy == v2p0;
+ VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
})
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
@@ -705,8 +845,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
.legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64}))
- .clampMinNumElements(0, s16, 4)
- .clampMaxNumElements(0, s16, 8);
+ .widenVectorEltsToVectorMinSize(0, 64);
getActionDefinitionsBuilder(G_BUILD_VECTOR)
.legalFor({{v8s8, s8},
@@ -762,8 +901,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// to be the same size as the dest.
if (DstTy != SrcTy)
return false;
- return llvm::is_contained({v2s32, v4s32, v2s64, v2p0, v16s8, v8s16},
- DstTy);
+ return llvm::is_contained(
+ {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
})
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
// just want those lowered into G_BUILD_VECTOR
@@ -791,13 +930,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
- getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
+ getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
- getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
- return Query.Types[0] == p0 && Query.Types[1] == s64;
- });
+ getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
- getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
+ getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
+
+ getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
if (ST.hasMOPS()) {
// G_BZERO is not supported. Currently it is only emitted by
@@ -831,18 +970,68 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor(PackedVectorAllTypeList)
.lowerIf(isScalar(0));
+ // For fadd reductions we have pairwise operations available. We treat the
+ // usual legal types as legal and handle the lowering to pairwise instructions
+ // later.
getActionDefinitionsBuilder(G_VECREDUCE_FADD)
- // We only have FADDP to do reduction-like operations. Lower the rest.
- .legalFor({{s32, v2s32}, {s64, v2s64}})
+ .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
+ .legalIf([=](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[1];
+ return (Ty == v4s16 || Ty == v8s16) && HasFP16;
+ })
+ .minScalarOrElt(0, MinFPScalar)
.clampMaxNumElements(1, s64, 2)
- .clampMaxNumElements(1, s32, 2)
+ .clampMaxNumElements(1, s32, 4)
+ .clampMaxNumElements(1, s16, 8)
.lower();
getActionDefinitionsBuilder(G_VECREDUCE_ADD)
- .legalFor(
- {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
+ .legalFor({{s8, v16s8},
+ {s8, v8s8},
+ {s16, v8s16},
+ {s16, v4s16},
+ {s32, v4s32},
+ {s32, v2s32},
+ {s64, v2s64}})
+ .clampMaxNumElements(1, s64, 2)
+ .clampMaxNumElements(1, s32, 4)
+ .clampMaxNumElements(1, s16, 8)
+ .clampMaxNumElements(1, s8, 16)
+ .lower();
+
+ getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
+ G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
+ .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
+ .legalIf([=](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[1];
+ return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
+ })
+ .minScalarOrElt(0, MinFPScalar)
+ .clampMaxNumElements(1, s64, 2)
+ .clampMaxNumElements(1, s32, 4)
+ .clampMaxNumElements(1, s16, 8)
+ .lower();
+
+ getActionDefinitionsBuilder(G_VECREDUCE_MUL)
+ .clampMaxNumElements(1, s32, 2)
+ .clampMaxNumElements(1, s16, 4)
+ .clampMaxNumElements(1, s8, 8)
+ .scalarize(1)
+ .lower();
+
+ getActionDefinitionsBuilder(
+ {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
+ .legalFor({{s8, v8s8},
+ {s8, v16s8},
+ {s16, v4s16},
+ {s16, v8s16},
+ {s32, v2s32},
+ {s32, v4s32}})
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
+ .clampMaxNumElements(1, s16, 8)
+ .clampMaxNumElements(1, s8, 16)
+ .scalarize(1)
.lower();
getActionDefinitionsBuilder(
@@ -870,7 +1059,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
.lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
- getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
+ getActionDefinitionsBuilder({G_FSHL, G_FSHR})
+ .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
+ .lower();
getActionDefinitionsBuilder(G_ROTR)
.legalFor({{s32, s64}, {s64, s64}})
@@ -918,23 +1109,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// TODO: Vector types.
getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
- // TODO: Vector types.
- getActionDefinitionsBuilder({G_FMAXNUM, G_FMINNUM})
- .legalFor({MinFPScalar, s32, s64})
- .libcallFor({s128})
- .minScalar(0, MinFPScalar);
-
- getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
- .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
- .legalIf([=](const LegalityQuery &Query) {
- const auto &Ty = Query.Types[0];
- return (Ty == v8s16 || Ty == v4s16) && HasFP16;
- })
- .minScalar(0, MinFPScalar)
- .clampNumElements(0, v4s16, v8s16)
- .clampNumElements(0, v2s32, v4s32)
- .clampNumElements(0, v2s64, v2s64);
-
// TODO: Libcall support for s128.
// TODO: s16 should be legal with full FP16 support.
getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
@@ -947,6 +1121,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FMAD).lower();
+ // Access to floating-point environment.
+ getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
+ .libcall();
+
+ getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
+
+ getActionDefinitionsBuilder(G_PREFETCH).custom();
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -971,11 +1153,12 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
case TargetOpcode::G_GLOBAL_VALUE:
return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
- case TargetOpcode::G_TRUNC:
- return legalizeVectorTrunc(MI, Helper);
case TargetOpcode::G_SBFX:
case TargetOpcode::G_UBFX:
return legalizeBitfieldExtract(MI, MRI, Helper);
+ case TargetOpcode::G_FSHL:
+ case TargetOpcode::G_FSHR:
+ return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
case TargetOpcode::G_ROTR:
return legalizeRotate(MI, MRI, Helper);
case TargetOpcode::G_CTPOP:
@@ -991,11 +1174,70 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeMemOps(MI, Helper);
case TargetOpcode::G_FCOPYSIGN:
return legalizeFCopySign(MI, Helper);
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return legalizeExtractVectorElt(MI, MRI, Helper);
+ case TargetOpcode::G_DYN_STACKALLOC:
+ return legalizeDynStackAlloc(MI, Helper);
+ case TargetOpcode::G_PREFETCH:
+ return legalizePrefetch(MI, Helper);
}
llvm_unreachable("expected switch to return");
}
+bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer,
+ LegalizerHelper &Helper) const {
+ assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
+ MI.getOpcode() == TargetOpcode::G_FSHR);
+
+ // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
+ // lowering
+ Register ShiftNo = MI.getOperand(3).getReg();
+ LLT ShiftTy = MRI.getType(ShiftNo);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
+
+ // Adjust shift amount according to Opcode (FSHL/FSHR)
+ // Convert FSHL to FSHR
+ LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
+ APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
+
+ // Lower non-constant shifts and leave zero shifts to the optimizer.
+ if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
+ return (Helper.lowerFunnelShiftAsShifts(MI) ==
+ LegalizerHelper::LegalizeResult::Legalized);
+
+ APInt Amount = VRegAndVal->Value.urem(BitWidth);
+
+ Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
+
+ // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
+ // in the range of 0 <-> BitWidth, it is legal
+ if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
+ VRegAndVal->Value.ult(BitWidth))
+ return true;
+
+ // Cast the ShiftNumber to a 64-bit type
+ auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
+
+ if (MI.getOpcode() == TargetOpcode::G_FSHR) {
+ Observer.changingInstr(MI);
+ MI.getOperand(3).setReg(Cast64.getReg(0));
+ Observer.changedInstr(MI);
+ }
+ // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
+ // instruction
+ else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
+ MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
+ {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
+ Cast64.getReg(0)});
+ MI.eraseFromParent();
+ }
+ return true;
+}
+
bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const {
@@ -1013,54 +1255,6 @@ bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
return true;
}
-static void extractParts(Register Reg, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
- SmallVectorImpl<Register> &VRegs) {
- for (int I = 0; I < NumParts; ++I)
- VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- MIRBuilder.buildUnmerge(VRegs, Reg);
-}
-
-bool AArch64LegalizerInfo::legalizeVectorTrunc(
- MachineInstr &MI, LegalizerHelper &Helper) const {
- MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- // Similar to how operand splitting is done in SelectiondDAG, we can handle
- // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
- // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
- // %lo16(<4 x s16>) = G_TRUNC %inlo
- // %hi16(<4 x s16>) = G_TRUNC %inhi
- // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
- // %res(<8 x s8>) = G_TRUNC %in16
-
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- assert(llvm::has_single_bit<uint32_t>(DstTy.getSizeInBits()) &&
- llvm::has_single_bit<uint32_t>(SrcTy.getSizeInBits()));
-
- // Split input type.
- LLT SplitSrcTy =
- SrcTy.changeElementCount(SrcTy.getElementCount().divideCoefficientBy(2));
- // First, split the source into two smaller vectors.
- SmallVector<Register, 2> SplitSrcs;
- extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
-
- // Truncate the splits into intermediate narrower elements.
- LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
- for (unsigned I = 0; I < SplitSrcs.size(); ++I)
- SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
-
- auto Concat = MIRBuilder.buildConcatVectors(
- DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
-
- Helper.Observer.changingInstr(MI);
- MI.getOperand(1).setReg(Concat.getReg(0));
- Helper.Observer.changedInstr(MI);
- return true;
-}
-
bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const {
@@ -1090,7 +1284,7 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
// by creating a MOVK that sets bits 48-63 of the register to (global address
// + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
- // prevent an incorrect tag being generated during relocation when the the
+ // prevent an incorrect tag being generated during relocation when the
// global appears before the code section. Without the offset, a global at
// `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
// by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
@@ -1119,7 +1313,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
- switch (MI.getIntrinsicID()) {
+ Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
+ switch (IntrinsicID) {
case Intrinsic::vacopy: {
unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
unsigned VaListSize =
@@ -1150,35 +1345,12 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
}
case Intrinsic::aarch64_mops_memset_tag: {
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
- // Zext the value to 64 bit
+ // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
+ // the instruction).
MachineIRBuilder MIB(MI);
auto &Value = MI.getOperand(3);
- Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
- Value.setReg(ZExtValueReg);
- return true;
- }
- case Intrinsic::prefetch: {
- MachineIRBuilder MIB(MI);
- auto &AddrVal = MI.getOperand(1);
-
- int64_t IsWrite = MI.getOperand(2).getImm();
- int64_t Locality = MI.getOperand(3).getImm();
- int64_t IsData = MI.getOperand(4).getImm();
-
- bool IsStream = Locality == 0;
- if (Locality != 0) {
- assert(Locality <= 3 && "Prefetch locality out-of-range");
- // The locality degree is the opposite of the cache speed.
- // Put the number the other way around.
- // The encoding starts at 0 for level 1
- Locality = 3 - Locality;
- }
-
- unsigned PrfOp =
- (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
-
- MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
- MI.eraseFromParent();
+ Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
+ Value.setReg(ExtValueReg);
return true;
}
case Intrinsic::aarch64_prefetch: {
@@ -1195,10 +1367,67 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
(Target << 1) | // Cache level bits
(unsigned)IsStream; // Stream bit
- MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
+ MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
+ MI.eraseFromParent();
+ return true;
+ }
+ case Intrinsic::aarch64_neon_uaddv:
+ case Intrinsic::aarch64_neon_saddv:
+ case Intrinsic::aarch64_neon_umaxv:
+ case Intrinsic::aarch64_neon_smaxv:
+ case Intrinsic::aarch64_neon_uminv:
+ case Intrinsic::aarch64_neon_sminv: {
+ MachineIRBuilder MIB(MI);
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
+ IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
+ IntrinsicID == Intrinsic::aarch64_neon_sminv;
+
+ auto OldDst = MI.getOperand(0).getReg();
+ auto OldDstTy = MRI.getType(OldDst);
+ LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
+ if (OldDstTy == NewDstTy)
+ return true;
+
+ auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
+
+ Helper.Observer.changingInstr(MI);
+ MI.getOperand(0).setReg(NewDst);
+ Helper.Observer.changedInstr(MI);
+
+ MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
+ MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
+ OldDst, NewDst);
+
+ return true;
+ }
+ case Intrinsic::aarch64_neon_smax:
+ case Intrinsic::aarch64_neon_smin:
+ case Intrinsic::aarch64_neon_umax:
+ case Intrinsic::aarch64_neon_umin:
+ case Intrinsic::aarch64_neon_fmax:
+ case Intrinsic::aarch64_neon_fmin: {
+ MachineIRBuilder MIB(MI);
+ if (IntrinsicID == Intrinsic::aarch64_neon_smax)
+ MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
+ else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
+ MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
+ else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
+ MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
+ else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
+ MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
+ else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
+ MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
+ {MI.getOperand(2), MI.getOperand(3)});
+ else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
+ MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
+ {MI.getOperand(2), MI.getOperand(3)});
MI.eraseFromParent();
return true;
}
+ case Intrinsic::experimental_vector_reverse:
+ // TODO: Add support for vector_reverse
+ return false;
}
return true;
@@ -1489,8 +1718,7 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
llvm_unreachable("unexpected vector shape");
MachineInstrBuilder UADD;
for (LLT HTy : HAddTys) {
- UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}, /*HasSideEffects =*/false)
- .addUse(HSum);
+ UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
HSum = UADD.getReg(0);
}
@@ -1617,11 +1845,12 @@ bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
// Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
- // Zext the value operand to 64 bit
+ // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
+ // the instruction).
auto &Value = MI.getOperand(1);
- Register ZExtValueReg =
+ Register ExtValueReg =
MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
- Value.setReg(ZExtValueReg);
+ Value.setReg(ExtValueReg);
return true;
}
@@ -1676,7 +1905,7 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
if (DstSize == 64)
Mask = MIRBuilder.buildFNeg(VecTy, Mask);
- auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask});
+ auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins2, Ins1});
// Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
// want this to eventually become an EXTRACT_SUBREG.
@@ -1687,3 +1916,78 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
MI.eraseFromParent();
return true;
}
+
+bool AArch64LegalizerInfo::legalizeExtractVectorElt(
+ MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
+ assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
+ auto VRegAndVal =
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (VRegAndVal)
+ return true;
+ return Helper.lowerExtractInsertVectorElt(MI) !=
+ LegalizerHelper::LegalizeResult::UnableToLegalize;
+}
+
+bool AArch64LegalizerInfo::legalizeDynStackAlloc(
+ MachineInstr &MI, LegalizerHelper &Helper) const {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+ // If stack probing is not enabled for this function, use the default
+ // lowering.
+ if (!MF.getFunction().hasFnAttribute("probe-stack") ||
+ MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
+ "inline-asm") {
+ Helper.lowerDynStackAlloc(MI);
+ return true;
+ }
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register AllocSize = MI.getOperand(1).getReg();
+ Align Alignment = assumeAligned(MI.getOperand(2).getImm());
+
+ assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
+ "Unexpected type for dynamic alloca");
+ assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
+ "Unexpected type for dynamic alloca");
+
+ LLT PtrTy = MRI.getType(Dst);
+ Register SPReg =
+ Helper.getTargetLowering().getStackPointerRegisterToSaveRestore();
+ Register SPTmp =
+ Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
+ auto NewMI =
+ MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
+ MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
+ MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
+ MIRBuilder.buildCopy(Dst, SPTmp);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIB = Helper.MIRBuilder;
+ auto &AddrVal = MI.getOperand(0);
+
+ int64_t IsWrite = MI.getOperand(1).getImm();
+ int64_t Locality = MI.getOperand(2).getImm();
+ int64_t IsData = MI.getOperand(3).getImm();
+
+ bool IsStream = Locality == 0;
+ if (Locality != 0) {
+ assert(Locality <= 3 && "Prefetch locality out-of-range");
+ // The locality degree is the opposite of the cache speed.
+ // Put the number the other way around.
+ // The encoding starts at 0 for level 1
+ Locality = 3 - Locality;
+ }
+
+ unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
+
+ MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
+ MI.eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index c10f6e071ed4..19f77baa77f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -46,11 +46,14 @@ private:
bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
- bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeBitfieldExtract(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
+ bool legalizeFunnelShift(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer,
+ LegalizerHelper &Helper) const;
bool legalizeCTPOP(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -58,6 +61,10 @@ private:
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const;
+ bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
+ bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const;
+ bool legalizePrefetch(MachineInstr &MI, LegalizerHelper &Helper) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
index 590afbc29d6d..0b82ed1280dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -16,7 +16,6 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
@@ -42,27 +41,25 @@ namespace {
#include "AArch64GenO0PreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_TYPES
-class AArch64O0PreLegalizerCombinerImpl : public GIMatchTableExecutor {
+class AArch64O0PreLegalizerCombinerImpl : public Combiner {
protected:
- CombinerHelper &Helper;
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
const AArch64O0PreLegalizerCombinerImplRuleConfig &RuleConfig;
-
const AArch64Subtarget &STI;
- GISelChangeObserver &Observer;
- MachineIRBuilder &B;
- MachineFunction &MF;
-
- MachineRegisterInfo &MRI;
public:
AArch64O0PreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AArch64O0PreLegalizerCombinerImplRuleConfig &RuleConfig,
- GISelChangeObserver &Observer, MachineIRBuilder &B,
- CombinerHelper &Helper);
+ const AArch64Subtarget &STI);
static const char *getName() { return "AArch64O0PreLegalizerCombiner"; }
- bool tryCombineAll(MachineInstr &I) const;
+ bool tryCombineAll(MachineInstr &I) const override;
+
+ bool tryCombineAllImpl(MachineInstr &I) const;
private:
#define GET_GICOMBINER_CLASS_MEMBERS
@@ -75,45 +72,21 @@ private:
#undef GET_GICOMBINER_IMPL
AArch64O0PreLegalizerCombinerImpl::AArch64O0PreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AArch64O0PreLegalizerCombinerImplRuleConfig &RuleConfig,
- GISelChangeObserver &Observer, MachineIRBuilder &B, CombinerHelper &Helper)
- : Helper(Helper), RuleConfig(RuleConfig),
- STI(B.getMF().getSubtarget<AArch64Subtarget>()), Observer(Observer), B(B),
- MF(B.getMF()), MRI(*B.getMRI()),
+ const AArch64Subtarget &STI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB), RuleConfig(RuleConfig),
+ STI(STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AArch64GenO0PreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
{
}
-class AArch64O0PreLegalizerCombinerInfo : public CombinerInfo {
- GISelKnownBits *KB;
- MachineDominatorTree *MDT;
- AArch64O0PreLegalizerCombinerImplRuleConfig RuleConfig;
-
-public:
- AArch64O0PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- GISelKnownBits *KB,
- MachineDominatorTree *MDT)
- : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
- /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
- KB(KB), MDT(MDT) {
- if (!RuleConfig.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, /*IsPreLegalize*/ true, KB, MDT);
- AArch64O0PreLegalizerCombinerImpl Impl(RuleConfig, Observer, B, Helper);
- Impl.setupMF(*MI.getMF(), KB);
-
- if (Impl.tryCombineAll(MI))
+bool AArch64O0PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
+ if (tryCombineAllImpl(MI))
return true;
unsigned Opc = MI.getOpcode();
@@ -133,7 +106,7 @@ bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
return true;
if (Opc == TargetOpcode::G_MEMSET)
- return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize);
+ return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, CInfo.EnableMinSize);
return false;
}
}
@@ -157,6 +130,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ AArch64O0PreLegalizerCombinerImplRuleConfig RuleConfig;
};
} // end anonymous namespace
@@ -172,6 +148,9 @@ void AArch64O0PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AArch64O0PreLegalizerCombiner::AArch64O0PreLegalizerCombiner()
: MachineFunctionPass(ID) {
initializeAArch64O0PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
}
bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
@@ -182,10 +161,15 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
const Function &F = MF.getFunction();
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
- AArch64O0PreLegalizerCombinerInfo PCInfo(
- false, F.hasOptSize(), F.hasMinSize(), KB, nullptr /* MDT */);
- Combiner C(PCInfo, &TPC);
- return C.combineMachineInstrs(MF, nullptr /* CSEInfo */);
+
+ const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
+
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
+ F.hasOptSize(), F.hasMinSize());
+ AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,
+ /*CSEInfo*/ nullptr, RuleConfig, ST);
+ return Impl.combineMachineInstrs();
}
char AArch64O0PreLegalizerCombiner::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 303cf11d4f30..51c52aad3594 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -20,11 +20,12 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -149,7 +150,7 @@ bool matchAArch64MulConstCombine(
APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());
// The following code is ported from AArch64ISelLowering.
// Multiplication of a power of two plus/minus one can be done more
- // cheaply as as shift+add/sub. For now, this is true unilaterally. If
+ // cheaply as shift+add/sub. For now, this is true unilaterally. If
// future CPUs have a cheaper MADD instruction, this may need to be
// gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
// 64-bit is 5 cycles, so this is always a win.
@@ -339,26 +340,65 @@ void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
Store.eraseFromParent();
}
-class AArch64PostLegalizerCombinerImpl : public GIMatchTableExecutor {
+bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::tuple<Register, Register, Register> &MatchInfo) {
+ const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (!DstTy.isVector())
+ return false;
+
+ Register AO1, AO2, BVO1, BVO2;
+ if (!mi_match(MI, MRI,
+ m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)),
+ m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))
+ return false;
+
+ auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);
+ auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);
+ if (!BV1 || !BV2)
+ return false;
+
+ for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
+ auto ValAndVReg1 =
+ getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);
+ auto ValAndVReg2 =
+ getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);
+ if (!ValAndVReg1 || !ValAndVReg2 ||
+ ValAndVReg1->Value != ~ValAndVReg2->Value)
+ return false;
+ }
+
+ MatchInfo = {AO1, AO2, BVO1};
+ return true;
+}
+
+void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ std::tuple<Register, Register, Register> &MatchInfo) {
+ B.setInstrAndDebugLoc(MI);
+ B.buildInstr(
+ AArch64::G_BSP, {MI.getOperand(0).getReg()},
+ {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
+ MI.eraseFromParent();
+}
+
+class AArch64PostLegalizerCombinerImpl : public Combiner {
protected:
- CombinerHelper &Helper;
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
-
const AArch64Subtarget &STI;
- MachineRegisterInfo &MRI;
- GISelChangeObserver &Observer;
- MachineIRBuilder &B;
- MachineFunction &MF;
public:
AArch64PostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
- const AArch64Subtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper);
+ const AArch64Subtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
static const char *getName() { return "AArch64PostLegalizerCombiner"; }
- bool tryCombineAll(MachineInstr &I) const;
+ bool tryCombineAll(MachineInstr &I) const override;
private:
#define GET_GICOMBINER_CLASS_MEMBERS
@@ -371,49 +411,20 @@ private:
#undef GET_GICOMBINER_IMPL
AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
- const AArch64Subtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper)
- : Helper(Helper), RuleConfig(RuleConfig), STI(STI), MRI(*B.getMRI()),
- Observer(Observer), B(B), MF(B.getMF()),
+ const AArch64Subtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
+ RuleConfig(RuleConfig), STI(STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
{
}
-class AArch64PostLegalizerCombinerInfo : public CombinerInfo {
- GISelKnownBits *KB;
- MachineDominatorTree *MDT;
-
-public:
- AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
-
- AArch64PostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- GISelKnownBits *KB,
- MachineDominatorTree *MDT)
- : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
- /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
- KB(KB), MDT(MDT) {
- if (!RuleConfig.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool AArch64PostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- const auto &STI = MI.getMF()->getSubtarget<AArch64Subtarget>();
- const auto *LI = STI.getLegalizerInfo();
- CombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT, LI);
- AArch64PostLegalizerCombinerImpl Impl(RuleConfig, STI, Observer, B, Helper);
- Impl.setupMF(*MI.getMF(), KB);
- return Impl.tryCombineAll(MI);
-}
-
class AArch64PostLegalizerCombiner : public MachineFunctionPass {
public:
static char ID;
@@ -429,6 +440,23 @@ public:
private:
bool IsOptNone;
+ AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
+
+
+ struct StoreInfo {
+ GStore *St = nullptr;
+ // The G_PTR_ADD that's used by the store. We keep this to cache the
+ // MachineInstr def.
+ GPtrAdd *Ptr = nullptr;
+ // The signed offset to the Ptr instruction.
+ int64_t Offset = 0;
+ LLT StoredType;
+ };
+ bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,
+ CSEMIRBuilder &MIB);
+
+ bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,
+ CSEMIRBuilder &MIB);
};
} // end anonymous namespace
@@ -450,6 +478,9 @@ void AArch64PostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(bool IsOptNone)
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAArch64PostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
}
bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
@@ -462,17 +493,208 @@ bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
auto *TPC = &getAnalysis<TargetPassConfig>();
const Function &F = MF.getFunction();
bool EnableOpt =
- MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
+
+ const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
+ const auto *LI = ST.getLegalizerInfo();
+
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT =
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
- AArch64PostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
- F.hasMinSize(), KB, MDT);
GISelCSEAnalysisWrapper &Wrapper =
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
- Combiner C(PCInfo, TPC);
- return C.combineMachineInstrs(MF, CSEInfo);
+
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
+ F.hasMinSize());
+ AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo,
+ RuleConfig, ST, MDT, LI);
+ bool Changed = Impl.combineMachineInstrs();
+
+ auto MIB = CSEMIRBuilder(MF);
+ MIB.setCSEInfo(CSEInfo);
+ Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);
+ return Changed;
+}
+
+bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores(
+ SmallVectorImpl<StoreInfo> &Stores, CSEMIRBuilder &MIB) {
+ if (Stores.size() <= 2)
+ return false;
+
+ // Profitabity checks:
+ int64_t BaseOffset = Stores[0].Offset;
+ unsigned NumPairsExpected = Stores.size() / 2;
+ unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);
+ // Size savings will depend on whether we can fold the offset, as an
+ // immediate of an ADD.
+ auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();
+ if (!TLI.isLegalAddImmediate(BaseOffset))
+ TotalInstsExpected++;
+ int SavingsExpected = Stores.size() - TotalInstsExpected;
+ if (SavingsExpected <= 0)
+ return false;
+
+ auto &MRI = MIB.getMF().getRegInfo();
+
+ // We have a series of consecutive stores. Factor out the common base
+ // pointer and rewrite the offsets.
+ Register NewBase = Stores[0].Ptr->getReg(0);
+ for (auto &SInfo : Stores) {
+ // Compute a new pointer with the new base ptr and adjusted offset.
+ MIB.setInstrAndDebugLoc(*SInfo.St);
+ auto NewOff = MIB.buildConstant(LLT::scalar(64), SInfo.Offset - BaseOffset);
+ auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()),
+ NewBase, NewOff);
+ if (MIB.getObserver())
+ MIB.getObserver()->changingInstr(*SInfo.St);
+ SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));
+ if (MIB.getObserver())
+ MIB.getObserver()->changedInstr(*SInfo.St);
+ }
+ LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()
+ << " stores into a base pointer and offsets.\n");
+ return true;
+}
+
+static cl::opt<bool>
+ EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable consecutive memop optimization "
+ "in AArch64PostLegalizerCombiner"));
+
+bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing(
+ MachineFunction &MF, CSEMIRBuilder &MIB) {
+ // This combine needs to run after all reassociations/folds on pointer
+ // addressing have been done, specifically those that combine two G_PTR_ADDs
+ // with constant offsets into a single G_PTR_ADD with a combined offset.
+ // The goal of this optimization is to undo that combine in the case where
+ // doing so has prevented the formation of pair stores due to illegal
+ // addressing modes of STP. The reason that we do it here is because
+ // it's much easier to undo the transformation of a series consecutive
+ // mem ops, than it is to detect when doing it would be a bad idea looking
+ // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.
+ //
+ // An example:
+ // G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)
+ // %off1:_(s64) = G_CONSTANT i64 4128
+ // %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)
+ // G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)
+ // %off2:_(s64) = G_CONSTANT i64 4144
+ // %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)
+ // G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)
+ // %off3:_(s64) = G_CONSTANT i64 4160
+ // %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)
+ // G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)
+ bool Changed = false;
+ auto &MRI = MF.getRegInfo();
+
+ if (!EnableConsecutiveMemOpOpt)
+ return Changed;
+
+ SmallVector<StoreInfo, 8> Stores;
+ // If we see a load, then we keep track of any values defined by it.
+ // In the following example, STP formation will fail anyway because
+ // the latter store is using a load result that appears after the
+ // the prior store. In this situation if we factor out the offset then
+ // we increase code size for no benefit.
+ // G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))
+ // %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))
+ // G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))
+ SmallVector<Register> LoadValsSinceLastStore;
+
+ auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {
+ // Check if this store is consecutive to the last one.
+ if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||
+ (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=
+ New.Offset) ||
+ Last.StoredType != New.StoredType)
+ return false;
+
+ // Check if this store is using a load result that appears after the
+ // last store. If so, bail out.
+ if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) {
+ return New.St->getValueReg() == LoadVal;
+ }))
+ return false;
+
+ // Check if the current offset would be too large for STP.
+ // If not, then STP formation should be able to handle it, so we don't
+ // need to do anything.
+ int64_t MaxLegalOffset;
+ switch (New.StoredType.getSizeInBits()) {
+ case 32:
+ MaxLegalOffset = 252;
+ break;
+ case 64:
+ MaxLegalOffset = 504;
+ break;
+ case 128:
+ MaxLegalOffset = 1008;
+ break;
+ default:
+ llvm_unreachable("Unexpected stored type size");
+ }
+ if (New.Offset < MaxLegalOffset)
+ return false;
+
+ // If factoring it out still wouldn't help then don't bother.
+ return New.Offset - Stores[0].Offset <= MaxLegalOffset;
+ };
+
+ auto resetState = [&]() {
+ Stores.clear();
+ LoadValsSinceLastStore.clear();
+ };
+
+ for (auto &MBB : MF) {
+ // We're looking inside a single BB at a time since the memset pattern
+ // should only be in a single block.
+ resetState();
+ for (auto &MI : MBB) {
+ if (auto *St = dyn_cast<GStore>(&MI)) {
+ Register PtrBaseReg;
+ APInt Offset;
+ LLT StoredValTy = MRI.getType(St->getValueReg());
+ unsigned ValSize = StoredValTy.getSizeInBits();
+ if (ValSize < 32 || ValSize != St->getMMO().getSizeInBits())
+ continue;
+
+ Register PtrReg = St->getPointerReg();
+ if (mi_match(
+ PtrReg, MRI,
+ m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) {
+ GPtrAdd *PtrAdd = cast<GPtrAdd>(MRI.getVRegDef(PtrReg));
+ StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy};
+
+ if (Stores.empty()) {
+ Stores.push_back(New);
+ continue;
+ }
+
+ // Check if this store is a valid continuation of the sequence.
+ auto &Last = Stores.back();
+ if (storeIsValid(Last, New)) {
+ Stores.push_back(New);
+ LoadValsSinceLastStore.clear(); // Reset the load value tracking.
+ } else {
+ // The store isn't a valid to consider for the prior sequence,
+ // so try to optimize what we have so far and start a new sequence.
+ Changed |= tryOptimizeConsecStores(Stores, MIB);
+ resetState();
+ Stores.push_back(New);
+ }
+ }
+ } else if (auto *Ld = dyn_cast<GLoad>(&MI)) {
+ LoadValsSinceLastStore.push_back(Ld->getDstReg());
+ }
+ }
+ Changed |= tryOptimizeConsecStores(Stores, MIB);
+ resetState();
+ }
+
+ return Changed;
}
char AArch64PostLegalizerCombiner::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index feeef91bce19..687063873a16 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -457,11 +456,15 @@ void applyShuffleVectorPseudo(MachineInstr &MI,
/// for the imported tablegen patterns to work.
void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MachineIRBuilder MIRBuilder(MI);
- // Tablegen patterns expect an i32 G_CONSTANT as the final op.
- auto Cst =
- MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
- MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
- {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
+ if (MatchInfo.SrcOps[2].getImm() == 0)
+ MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
+ else {
+ // Tablegen patterns expect an i32 G_CONSTANT as the final op.
+ auto Cst =
+ MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
+ MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
+ {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
+ }
MI.eraseFromParent();
}
@@ -720,9 +723,13 @@ bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
case 4:
if (ScalarSize == 32)
Opc = AArch64::G_DUPLANE32;
+ else if (ScalarSize == 16)
+ Opc = AArch64::G_DUPLANE16;
break;
case 8:
- if (ScalarSize == 16)
+ if (ScalarSize == 8)
+ Opc = AArch64::G_DUPLANE8;
+ else if (ScalarSize == 16)
Opc = AArch64::G_DUPLANE16;
break;
case 16:
@@ -752,13 +759,10 @@ void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
Register DupSrc = MI.getOperand(1).getReg();
// For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
// To do this, we can use a G_CONCAT_VECTORS to do the widening.
- if (SrcTy == LLT::fixed_vector(2, LLT::scalar(32))) {
- assert(MRI.getType(MI.getOperand(0).getReg()).getNumElements() == 2 &&
- "Unexpected dest elements");
+ if (SrcTy.getSizeInBits() == 64) {
auto Undef = B.buildUndef(SrcTy);
- DupSrc = B.buildConcatVectors(
- SrcTy.changeElementCount(ElementCount::getFixed(4)),
- {Src1Reg, Undef.getReg(0)})
+ DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
+ {Src1Reg, Undef.getReg(0)})
.getReg(0);
}
B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
@@ -1062,27 +1066,136 @@ void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
Helper.lower(MI, 0, /* Unused hint type */ LLT());
}
-class AArch64PostLegalizerLoweringImpl : public GIMatchTableExecutor {
+/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
+/// => unused, <N x t> = unmerge v
+bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &MatchInfo) {
+ auto &Unmerge = cast<GUnmerge>(MI);
+ if (Unmerge.getNumDefs() != 2)
+ return false;
+ if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
+ return false;
+
+ LLT DstTy = MRI.getType(Unmerge.getReg(0));
+ if (!DstTy.isVector())
+ return false;
+
+ MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
+ if (!Ext)
+ return false;
+
+ Register ExtSrc1 = Ext->getOperand(1).getReg();
+ Register ExtSrc2 = Ext->getOperand(2).getReg();
+ auto LowestVal =
+ getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
+ if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
+ return false;
+
+ if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
+ return false;
+
+ MatchInfo = ExtSrc1;
+ return true;
+}
+
+void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer, Register &SrcReg) {
+ Observer.changingInstr(MI);
+ // Swap dst registers.
+ Register Dst1 = MI.getOperand(0).getReg();
+ MI.getOperand(0).setReg(MI.getOperand(1).getReg());
+ MI.getOperand(1).setReg(Dst1);
+ MI.getOperand(2).setReg(SrcReg);
+ Observer.changedInstr(MI);
+}
+
+// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
+// Match v2s64 mul instructions, which will then be scalarised later on
+// Doing these two matches in one function to ensure that the order of matching
+// will always be the same.
+// Try lowering MUL to MULL before trying to scalarize if needed.
+bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ // Get the instructions that defined the source operand
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
+
+ if (DstTy.isVector()) {
+ // If the source operands were EXTENDED before, then {U/S}MULL can be used
+ unsigned I1Opc = I1->getOpcode();
+ unsigned I2Opc = I2->getOpcode();
+ if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
+ (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
+ (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
+ MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
+ (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
+ MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
+ return true;
+ }
+ // If result type is v2s64, scalarise the instruction
+ else if (DstTy == LLT::fixed_vector(2, 64)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, GISelChangeObserver &Observer) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL &&
+ "Expected a G_MUL instruction");
+
+ // Get the instructions that defined the source operand
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
+
+ // If the source operands were EXTENDED before, then {U/S}MULL can be used
+ unsigned I1Opc = I1->getOpcode();
+ unsigned I2Opc = I2->getOpcode();
+ if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
+ (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
+ (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
+ MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
+ (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
+ MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
+
+ B.setInstrAndDebugLoc(MI);
+ B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
+ : AArch64::G_SMULL,
+ {MI.getOperand(0).getReg()},
+ {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
+ MI.eraseFromParent();
+ }
+ // If result type is v2s64, scalarise the instruction
+ else if (DstTy == LLT::fixed_vector(2, 64)) {
+ LegalizerHelper Helper(*MI.getMF(), Observer, B);
+ B.setInstrAndDebugLoc(MI);
+ Helper.fewerElementsVector(
+ MI, 0,
+ DstTy.changeElementCount(
+ DstTy.getElementCount().divideCoefficientBy(2)));
+ }
+}
+
+class AArch64PostLegalizerLoweringImpl : public Combiner {
protected:
- CombinerHelper &Helper;
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
-
const AArch64Subtarget &STI;
- GISelChangeObserver &Observer;
- MachineIRBuilder &B;
- MachineFunction &MF;
-
- MachineRegisterInfo &MRI;
public:
AArch64PostLegalizerLoweringImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelCSEInfo *CSEInfo,
const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
- const AArch64Subtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper);
+ const AArch64Subtarget &STI);
static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
- bool tryCombineAll(MachineInstr &I) const;
+ bool tryCombineAll(MachineInstr &I) const override;
private:
#define GET_GICOMBINER_CLASS_MEMBERS
@@ -1095,42 +1208,19 @@ private:
#undef GET_GICOMBINER_IMPL
AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelCSEInfo *CSEInfo,
const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
- const AArch64Subtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper)
- : Helper(Helper), RuleConfig(RuleConfig), STI(STI), Observer(Observer),
- B(B), MF(B.getMF()), MRI(*B.getMRI()),
+ const AArch64Subtarget &STI)
+ : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
+ STI(STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AArch64GenPostLegalizeGILowering.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
{
}
-class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
-public:
- AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
-
- AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
- : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
- /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
- MinSize) {
- if (!RuleConfig.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- const auto &STI = MI.getMF()->getSubtarget<AArch64Subtarget>();
- CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false);
- AArch64PostLegalizerLoweringImpl Impl(RuleConfig, STI, Observer, B, Helper);
- Impl.setupMF(*MI.getMF(), Helper.getKnownBits());
- return Impl.tryCombineAll(MI);
-}
class AArch64PostLegalizerLowering : public MachineFunctionPass {
public:
static char ID;
@@ -1143,6 +1233,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
};
} // end anonymous namespace
@@ -1156,6 +1249,9 @@ void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
: MachineFunctionPass(ID) {
initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
}
bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
@@ -1167,9 +1263,14 @@ bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
"Expected a legalized function?");
auto *TPC = &getAnalysis<TargetPassConfig>();
const Function &F = MF.getFunction();
- AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize());
- Combiner C(PCInfo, TPC);
- return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+
+ const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
+ F.hasOptSize(), F.hasMinSize());
+ AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
+ RuleConfig, ST);
+ return Impl.combineMachineInstrs();
}
char AArch64PostLegalizerLowering::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index a918e9f36e69..71632718857b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
@@ -215,7 +214,7 @@ void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
// %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
uint64_t Offset, MinOffset;
std::tie(Offset, MinOffset) = MatchInfo;
- B.setInstrAndDebugLoc(MI);
+ B.setInstrAndDebugLoc(*std::next(MI.getIterator()));
Observer.changingInstr(MI);
auto &GlobalOp = MI.getOperand(1);
auto *GV = GlobalOp.getGlobal();
@@ -229,12 +228,202 @@ void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
}
+// Combines vecreduce_add(mul(ext(x), ext(y))) -> vecreduce_add(udot(x, y))
+// Or vecreduce_add(ext(x)) -> vecreduce_add(udot(x, 1))
+// Similar to performVecReduceAddCombine in SelectionDAG
+bool matchExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const AArch64Subtarget &STI,
+ std::tuple<Register, Register, bool> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
+ "Expected a G_VECREDUCE_ADD instruction");
+ assert(STI.hasDotProd() && "Target should have Dot Product feature");
+
+ MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register MidReg = I1->getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT MidTy = MRI.getType(MidReg);
+ if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32)
+ return false;
+
+ LLT SrcTy;
+ auto I1Opc = I1->getOpcode();
+ if (I1Opc == TargetOpcode::G_MUL) {
+ // If result of this has more than 1 use, then there is no point in creating
+ // udot instruction
+ if (!MRI.hasOneNonDBGUse(MidReg))
+ return false;
+
+ MachineInstr *ExtMI1 =
+ getDefIgnoringCopies(I1->getOperand(1).getReg(), MRI);
+ MachineInstr *ExtMI2 =
+ getDefIgnoringCopies(I1->getOperand(2).getReg(), MRI);
+ LLT Ext1DstTy = MRI.getType(ExtMI1->getOperand(0).getReg());
+ LLT Ext2DstTy = MRI.getType(ExtMI2->getOperand(0).getReg());
+
+ if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy)
+ return false;
+ I1Opc = ExtMI1->getOpcode();
+ SrcTy = MRI.getType(ExtMI1->getOperand(1).getReg());
+ std::get<0>(MatchInfo) = ExtMI1->getOperand(1).getReg();
+ std::get<1>(MatchInfo) = ExtMI2->getOperand(1).getReg();
+ } else {
+ SrcTy = MRI.getType(I1->getOperand(1).getReg());
+ std::get<0>(MatchInfo) = I1->getOperand(1).getReg();
+ std::get<1>(MatchInfo) = 0;
+ }
+
+ if (I1Opc == TargetOpcode::G_ZEXT)
+ std::get<2>(MatchInfo) = 0;
+ else if (I1Opc == TargetOpcode::G_SEXT)
+ std::get<2>(MatchInfo) = 1;
+ else
+ return false;
+
+ if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0)
+ return false;
+
+ return true;
+}
+
+void applyExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &Builder,
+ GISelChangeObserver &Observer,
+ const AArch64Subtarget &STI,
+ std::tuple<Register, Register, bool> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
+ "Expected a G_VECREDUCE_ADD instruction");
+ assert(STI.hasDotProd() && "Target should have Dot Product feature");
+
+ // Initialise the variables
+ unsigned DotOpcode =
+ std::get<2>(MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT;
+ Register Ext1SrcReg = std::get<0>(MatchInfo);
+
+ // If there is one source register, create a vector of 0s as the second
+ // source register
+ Register Ext2SrcReg;
+ if (std::get<1>(MatchInfo) == 0)
+ Ext2SrcReg = Builder.buildConstant(MRI.getType(Ext1SrcReg), 1)
+ ->getOperand(0)
+ .getReg();
+ else
+ Ext2SrcReg = std::get<1>(MatchInfo);
+
+ // Find out how many DOT instructions are needed
+ LLT SrcTy = MRI.getType(Ext1SrcReg);
+ LLT MidTy;
+ unsigned NumOfDotMI;
+ if (SrcTy.getNumElements() % 16 == 0) {
+ NumOfDotMI = SrcTy.getNumElements() / 16;
+ MidTy = LLT::fixed_vector(4, 32);
+ } else if (SrcTy.getNumElements() % 8 == 0) {
+ NumOfDotMI = SrcTy.getNumElements() / 8;
+ MidTy = LLT::fixed_vector(2, 32);
+ } else {
+ llvm_unreachable("Source type number of elements is not multiple of 8");
+ }
+
+ // Handle case where one DOT instruction is needed
+ if (NumOfDotMI == 1) {
+ auto Zeroes = Builder.buildConstant(MidTy, 0)->getOperand(0).getReg();
+ auto Dot = Builder.buildInstr(DotOpcode, {MidTy},
+ {Zeroes, Ext1SrcReg, Ext2SrcReg});
+ Builder.buildVecReduceAdd(MI.getOperand(0), Dot->getOperand(0));
+ } else {
+ // If not pad the last v8 element with 0s to a v16
+ SmallVector<Register, 4> Ext1UnmergeReg;
+ SmallVector<Register, 4> Ext2UnmergeReg;
+ if (SrcTy.getNumElements() % 16 != 0) {
+ // Unmerge source to v8i8, append a new v8i8 of 0s and the merge to v16s
+ SmallVector<Register, 4> PadUnmergeDstReg1;
+ SmallVector<Register, 4> PadUnmergeDstReg2;
+ unsigned NumOfVec = SrcTy.getNumElements() / 8;
+
+ // Unmerge the source to v8i8
+ MachineInstr *PadUnmerge1 =
+ Builder.buildUnmerge(LLT::fixed_vector(8, 8), Ext1SrcReg);
+ MachineInstr *PadUnmerge2 =
+ Builder.buildUnmerge(LLT::fixed_vector(8, 8), Ext2SrcReg);
+ for (unsigned i = 0; i < NumOfVec; i++) {
+ PadUnmergeDstReg1.push_back(PadUnmerge1->getOperand(i).getReg());
+ PadUnmergeDstReg2.push_back(PadUnmerge2->getOperand(i).getReg());
+ }
+
+ // Pad the vectors with a v8i8 constant of 0s
+ MachineInstr *v8Zeroes =
+ Builder.buildConstant(LLT::fixed_vector(8, 8), 0);
+ PadUnmergeDstReg1.push_back(v8Zeroes->getOperand(0).getReg());
+ PadUnmergeDstReg2.push_back(v8Zeroes->getOperand(0).getReg());
+
+ // Merge them all back to v16i8
+ NumOfVec = (NumOfVec + 1) / 2;
+ for (unsigned i = 0; i < NumOfVec; i++) {
+ Ext1UnmergeReg.push_back(
+ Builder
+ .buildMergeLikeInstr(
+ LLT::fixed_vector(16, 8),
+ {PadUnmergeDstReg1[i * 2], PadUnmergeDstReg1[(i * 2) + 1]})
+ .getReg(0));
+ Ext2UnmergeReg.push_back(
+ Builder
+ .buildMergeLikeInstr(
+ LLT::fixed_vector(16, 8),
+ {PadUnmergeDstReg2[i * 2], PadUnmergeDstReg2[(i * 2) + 1]})
+ .getReg(0));
+ }
+ } else {
+ // Unmerge the source vectors to v16i8
+ MachineInstr *Ext1Unmerge =
+ Builder.buildUnmerge(LLT::fixed_vector(16, 8), Ext1SrcReg);
+ MachineInstr *Ext2Unmerge =
+ Builder.buildUnmerge(LLT::fixed_vector(16, 8), Ext2SrcReg);
+ for (unsigned i = 0, e = SrcTy.getNumElements() / 16; i < e; i++) {
+ Ext1UnmergeReg.push_back(Ext1Unmerge->getOperand(i).getReg());
+ Ext2UnmergeReg.push_back(Ext2Unmerge->getOperand(i).getReg());
+ }
+ }
+
+ // Build the UDOT instructions
+ SmallVector<Register, 2> DotReg;
+ unsigned NumElements = 0;
+ for (unsigned i = 0; i < Ext1UnmergeReg.size(); i++) {
+ LLT ZeroesLLT;
+ // Check if it is 16 or 8 elements. Set Zeroes to the according size
+ if (MRI.getType(Ext1UnmergeReg[i]).getNumElements() == 16) {
+ ZeroesLLT = LLT::fixed_vector(4, 32);
+ NumElements += 4;
+ } else {
+ ZeroesLLT = LLT::fixed_vector(2, 32);
+ NumElements += 2;
+ }
+ auto Zeroes = Builder.buildConstant(ZeroesLLT, 0)->getOperand(0).getReg();
+ DotReg.push_back(
+ Builder
+ .buildInstr(DotOpcode, {MRI.getType(Zeroes)},
+ {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]})
+ .getReg(0));
+ }
+
+ // Merge the output
+ auto ConcatMI =
+ Builder.buildConcatVectors(LLT::fixed_vector(NumElements, 32), DotReg);
+
+ // Put it through a vector reduction
+ Builder.buildVecReduceAdd(MI.getOperand(0).getReg(),
+ ConcatMI->getOperand(0).getReg());
+ }
+
+ // Erase the dead instructions
+ MI.eraseFromParent();
+}
+
bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
CombinerHelper &Helper, GISelChangeObserver &Observer) {
// Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
// result is only used in the no-overflow case. It is restricted to cases
// where we know that the high-bits of the operands are 0. If there's an
- // overflow, then the the 9th or 17th bit must be set, which can be checked
+ // overflow, then the 9th or 17th bit must be set, which can be checked
// using TBNZ.
//
// Change (for UADDOs on 8 and 16 bits):
@@ -343,27 +532,26 @@ bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
return true;
}
-class AArch64PreLegalizerCombinerImpl : public GIMatchTableExecutor {
+class AArch64PreLegalizerCombinerImpl : public Combiner {
protected:
- CombinerHelper &Helper;
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
-
const AArch64Subtarget &STI;
- GISelChangeObserver &Observer;
- MachineIRBuilder &B;
- MachineFunction &MF;
-
- MachineRegisterInfo &MRI;
public:
AArch64PreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
- const AArch64Subtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper);
+ const AArch64Subtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
- bool tryCombineAll(MachineInstr &I) const;
+ bool tryCombineAll(MachineInstr &I) const override;
+
+ bool tryCombineAllImpl(MachineInstr &I) const;
private:
#define GET_GICOMBINER_CLASS_MEMBERS
@@ -376,46 +564,22 @@ private:
#undef GET_GICOMBINER_IMPL
AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
- const AArch64Subtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper)
- : Helper(Helper), RuleConfig(RuleConfig), STI(STI), Observer(Observer),
- B(B), MF(B.getMF()), MRI(*B.getMRI()),
+ const AArch64Subtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
+ RuleConfig(RuleConfig), STI(STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AArch64GenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
{
}
-class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
- GISelKnownBits *KB;
- MachineDominatorTree *MDT;
- AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
-
-public:
- AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- GISelKnownBits *KB, MachineDominatorTree *MDT)
- : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
- /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
- KB(KB), MDT(MDT) {
- if (!RuleConfig.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- const auto &STI = MI.getMF()->getSubtarget<AArch64Subtarget>();
- const auto *LI = STI.getLegalizerInfo();
- CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ true, KB, MDT, LI);
- AArch64PreLegalizerCombinerImpl Impl(RuleConfig, STI, Observer, B, Helper);
- Impl.setupMF(*MI.getMF(), KB);
-
- if (Impl.tryCombineAll(MI))
+bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
+ if (tryCombineAllImpl(MI))
return true;
unsigned Opc = MI.getOpcode();
@@ -433,12 +597,12 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
case TargetOpcode::G_MEMSET: {
// If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
// heuristics decide.
- unsigned MaxLen = EnableOpt ? 0 : 32;
+ unsigned MaxLen = CInfo.EnableOpt ? 0 : 32;
// Try to inline memcpy type calls if optimizations are enabled.
if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
return true;
if (Opc == TargetOpcode::G_MEMSET)
- return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize);
+ return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, CInfo.EnableMinSize);
return false;
}
}
@@ -462,6 +626,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
};
} // end anonymous namespace
@@ -481,6 +648,9 @@ void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
: MachineFunctionPass(ID) {
initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
}
bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
@@ -494,15 +664,20 @@ bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig());
+ const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
+ const auto *LI = ST.getLegalizerInfo();
+
const Function &F = MF.getFunction();
bool EnableOpt =
- MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
- AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
- F.hasMinSize(), KB, MDT);
- Combiner C(PCInfo, &TPC);
- return C.combineMachineInstrs(MF, CSEInfo);
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
+ F.hasMinSize());
+ AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, CSEInfo,
+ RuleConfig, ST, MDT, LI);
+ return Impl.combineMachineInstrs();
}
char AArch64PreLegalizerCombiner::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 0314a3b65ebd..b8e5e7bbdaba 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Threading.h"
#include <algorithm>
#include <cassert>
@@ -216,7 +217,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(
unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
const RegisterBank &B,
- unsigned Size) const {
+ TypeSize Size) const {
// What do we do with different size?
// copy are same size.
// Will introduce other hooks for different size:
@@ -245,9 +246,10 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
case AArch64::FPR32RegClassID:
case AArch64::FPR64RegClassID:
- case AArch64::FPR64_loRegClassID:
case AArch64::FPR128RegClassID:
+ case AArch64::FPR64_loRegClassID:
case AArch64::FPR128_loRegClassID:
+ case AArch64::FPR128_0to7RegClassID:
case AArch64::DDRegClassID:
case AArch64::DDDRegClassID:
case AArch64::DDDDRegClassID:
@@ -339,12 +341,16 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
/*NumOperands*/ 2);
const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
/*ID*/ 3,
- /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
+ /*Cost*/
+ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
+ TypeSize::getFixed(Size)),
getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
/*NumOperands*/ 2);
const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
/*ID*/ 3,
- /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
+ /*Cost*/
+ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
+ TypeSize::getFixed(Size)),
getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
/*NumOperands*/ 2);
@@ -389,7 +395,7 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
}
void AArch64RegisterBankInfo::applyMappingImpl(
- const OperandsMapper &OpdMapper) const {
+ MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
switch (OpdMapper.getMI().getOpcode()) {
case TargetOpcode::G_OR:
case TargetOpcode::G_BITCAST:
@@ -431,6 +437,7 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FRINT:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXIMUM:
@@ -486,15 +493,18 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
/// \returns true if a given intrinsic only uses and defines FPRs.
static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
const MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC);
// TODO: Add more intrinsics.
- switch (MI.getIntrinsicID()) {
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
default:
return false;
case Intrinsic::aarch64_neon_uaddlv:
case Intrinsic::aarch64_neon_uaddv:
+ case Intrinsic::aarch64_neon_saddv:
case Intrinsic::aarch64_neon_umaxv:
+ case Intrinsic::aarch64_neon_smaxv:
case Intrinsic::aarch64_neon_uminv:
+ case Intrinsic::aarch64_neon_sminv:
+ case Intrinsic::aarch64_neon_faddv:
case Intrinsic::aarch64_neon_fmaxv:
case Intrinsic::aarch64_neon_fminv:
case Intrinsic::aarch64_neon_fmaxnmv:
@@ -505,13 +515,6 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
return SrcTy.getElementType().getSizeInBits() >= 16 &&
SrcTy.getElementCount().getFixedValue() >= 4;
}
- case Intrinsic::aarch64_neon_saddv:
- case Intrinsic::aarch64_neon_smaxv:
- case Intrinsic::aarch64_neon_sminv: {
- const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
- return SrcTy.getElementType().getSizeInBits() >= 32 &&
- SrcTy.getElementCount().getFixedValue() >= 2;
- }
}
}
@@ -583,12 +586,65 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
case TargetOpcode::G_BUILD_VECTOR:
case TargetOpcode::G_BUILD_VECTOR_TRUNC:
return true;
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_ld1x2:
+ case Intrinsic::aarch64_neon_ld1x3:
+ case Intrinsic::aarch64_neon_ld1x4:
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_ld2lane:
+ case Intrinsic::aarch64_neon_ld2r:
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_ld3lane:
+ case Intrinsic::aarch64_neon_ld3r:
+ case Intrinsic::aarch64_neon_ld4:
+ case Intrinsic::aarch64_neon_ld4lane:
+ case Intrinsic::aarch64_neon_ld4r:
+ return true;
+ default:
+ break;
+ }
+ break;
default:
break;
}
return hasFPConstraints(MI, MRI, TRI, Depth);
}
+bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
+ // GMemOperation because we also want to match indexed loads.
+ auto *MemOp = cast<GMemOperation>(&MI);
+ const Value *LdVal = MemOp->getMMO().getValue();
+ if (!LdVal)
+ return false;
+
+ Type *EltTy = nullptr;
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
+ EltTy = GV->getValueType();
+ // Look at the first element of the struct to determine the type we are
+ // loading
+ while (StructType *StructEltTy = dyn_cast<StructType>(EltTy))
+ EltTy = StructEltTy->getTypeAtIndex(0U);
+ // Look at the first element of the array to determine its type
+ if (isa<ArrayType>(EltTy))
+ EltTy = EltTy->getArrayElementType();
+ } else {
+ // FIXME: grubbing around uses is pretty ugly, but with no more
+ // `getPointerElementType` there's not much else we can do.
+ for (const auto *LdUser : LdVal->users()) {
+ if (isa<LoadInst>(LdUser)) {
+ EltTy = LdUser->getType();
+ break;
+ }
+ if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
+ EltTy = LdUser->getOperand(0)->getType();
+ break;
+ }
+ }
+ }
+ return EltTy && EltTy->isFPOrFPVectorTy();
+}
+
const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned Opc = MI.getOpcode();
@@ -665,7 +721,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
assert(DstRB && SrcRB && "Both RegBank were nullptr");
unsigned Size = getSizeInBits(DstReg, MRI, TRI);
return getInstructionMapping(
- DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
+ DefaultMappingID, copyCost(*DstRB, *SrcRB, TypeSize::getFixed(Size)),
getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
// We only care about the mapping of the destination.
/*NumOperands*/ 1);
@@ -684,7 +740,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const RegisterBank &SrcRB =
SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
return getInstructionMapping(
- DefaultMappingID, copyCost(DstRB, SrcRB, Size),
+ DefaultMappingID, copyCost(DstRB, SrcRB, TypeSize::getFixed(Size)),
getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
// We only care about the mapping of the destination for COPY.
/*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
@@ -725,10 +781,13 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
Register ScalarReg = MI.getOperand(1).getReg();
LLT ScalarTy = MRI.getType(ScalarReg);
auto ScalarDef = MRI.getVRegDef(ScalarReg);
+ // We want to select dup(load) into LD1R.
+ if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
// s8 is an exception for G_DUP, which we always want on gpr.
- if (ScalarTy.getSizeInBits() != 8 &&
- (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
- onlyDefinesFP(*ScalarDef, MRI, TRI)))
+ else if (ScalarTy.getSizeInBits() != 8 &&
+ (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
+ onlyDefinesFP(*ScalarDef, MRI, TRI)))
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
else
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
@@ -774,7 +833,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
Cost = copyCost(
*AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
*AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
- OpSize[0]);
+ TypeSize::getFixed(OpSize[0]));
break;
case TargetOpcode::G_LOAD: {
// Loading in vector unit is slightly more expensive.
@@ -795,30 +854,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
// Try to guess the type of the load from the MMO.
- const auto &MMO = **MI.memoperands_begin();
- const Value *LdVal = MMO.getValue();
- if (LdVal) {
- Type *EltTy = nullptr;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
- EltTy = GV->getValueType();
- } else {
- // FIXME: grubbing around uses is pretty ugly, but with no more
- // `getPointerElementType` there's not much else we can do.
- for (const auto *LdUser : LdVal->users()) {
- if (isa<LoadInst>(LdUser)) {
- EltTy = LdUser->getType();
- break;
- }
- if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
- EltTy = LdUser->getOperand(0)->getType();
- break;
- }
- }
- }
- if (EltTy && EltTy->isFPOrFPVectorTy()) {
- OpRegBankIdx[0] = PMI_FirstFPR;
- break;
- }
+ if (isLoadFromFPType(MI)) {
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ break;
}
// Check if that load feeds fp instructions.
@@ -851,6 +889,27 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
break;
+ case TargetOpcode::G_INDEXED_STORE:
+ if (OpRegBankIdx[1] == PMI_FirstGPR) {
+ Register VReg = MI.getOperand(1).getReg();
+ if (!VReg)
+ break;
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ if (onlyDefinesFP(*DefMI, MRI, TRI))
+ OpRegBankIdx[1] = PMI_FirstFPR;
+ break;
+ }
+ break;
+ case TargetOpcode::G_INDEXED_SEXTLOAD:
+ case TargetOpcode::G_INDEXED_ZEXTLOAD:
+ // These should always be GPR.
+ OpRegBankIdx[0] = PMI_FirstGPR;
+ break;
+ case TargetOpcode::G_INDEXED_LOAD: {
+ if (isLoadFromFPType(MI))
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ break;
+ }
case TargetOpcode::G_SELECT: {
// If the destination is FPR, preserve that.
if (OpRegBankIdx[0] != PMI_FirstGPR)
@@ -997,6 +1056,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_VECREDUCE_FMUL:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMIN:
+ case TargetOpcode::G_VECREDUCE_FMAXIMUM:
+ case TargetOpcode::G_VECREDUCE_FMINIMUM:
case TargetOpcode::G_VECREDUCE_ADD:
case TargetOpcode::G_VECREDUCE_MUL:
case TargetOpcode::G_VECREDUCE_AND:
@@ -1016,17 +1077,26 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Assign them FPR for now.
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
break;
- case TargetOpcode::G_INTRINSIC: {
+ case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
// Check if we know that the intrinsic has any constraints on its register
// banks. If it does, then update the mapping accordingly.
unsigned Idx = 0;
- if (!isFPIntrinsic(MRI, MI))
- break;
- for (const auto &Op : MI.explicit_operands()) {
- if (Op.isReg())
- OpRegBankIdx[Idx] = PMI_FirstFPR;
- ++Idx;
- }
+ if (onlyDefinesFP(MI, MRI, TRI))
+ for (const auto &Op : MI.defs()) {
+ if (Op.isReg())
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ ++Idx;
+ }
+ else
+ Idx += MI.getNumExplicitDefs();
+
+ if (onlyUsesFP(MI, MRI, TRI))
+ for (const auto &Op : MI.explicit_uses()) {
+ if (Op.isReg())
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ ++Idx;
+ }
break;
}
case TargetOpcode::G_LROUND:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index 01ef0bd92d50..b6364c6a6409 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERBANKINFO_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERBANKINFO_H
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
@@ -42,9 +43,9 @@ protected:
PMI_Min = PMI_FirstFPR,
};
- static RegisterBankInfo::PartialMapping PartMappings[];
- static RegisterBankInfo::ValueMapping ValMappings[];
- static PartialMappingIdx BankIDToCopyMapIdx[];
+ static const RegisterBankInfo::PartialMapping PartMappings[];
+ static const RegisterBankInfo::ValueMapping ValMappings[];
+ static const PartialMappingIdx BankIDToCopyMapIdx[];
enum ValueMappingIdx {
InvalidIdx = 0,
@@ -103,7 +104,8 @@ protected:
/// This class provides the information for the target register banks.
class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
/// See RegisterBankInfo::applyMapping.
- void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+ void applyMappingImpl(MachineIRBuilder &Builder,
+ const OperandsMapper &OpdMapper) const override;
/// Get an instruction mapping where all the operands map to
/// the same register bank and have similar size.
@@ -130,11 +132,15 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+ /// \returns true if the load \p MI is likely loading from a floating-point
+ /// type.
+ bool isLoadFromFPType(const MachineInstr &MI) const;
+
public:
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
- unsigned Size) const override;
+ TypeSize Size) const override;
const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
LLT) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 33c08bfc6de6..03cbd272757e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
-#include "AArch64ExpandImm.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/bit.h"
@@ -592,6 +591,27 @@ static inline uint64_t decodeAdvSIMDModImmType9(uint8_t Imm) {
// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
// cmode: 1110, op: 1
static inline bool isAdvSIMDModImmType10(uint64_t Imm) {
+#if defined(_MSC_VER) && _MSC_VER == 1937 && !defined(__clang__) && \
+ defined(_M_ARM64)
+ // The MSVC compiler 19.37 for ARM64 has an optimization bug that
+ // causes an incorrect behavior with the orignal version. Work around
+ // by using a slightly different variation.
+ // https://developercommunity.visualstudio.com/t/C-ARM64-compiler-optimization-bug/10481261
+ constexpr uint64_t Mask = 0xFFULL;
+ uint64_t ByteA = (Imm >> 56) & Mask;
+ uint64_t ByteB = (Imm >> 48) & Mask;
+ uint64_t ByteC = (Imm >> 40) & Mask;
+ uint64_t ByteD = (Imm >> 32) & Mask;
+ uint64_t ByteE = (Imm >> 24) & Mask;
+ uint64_t ByteF = (Imm >> 16) & Mask;
+ uint64_t ByteG = (Imm >> 8) & Mask;
+ uint64_t ByteH = Imm & Mask;
+
+ return (ByteA == 0ULL || ByteA == Mask) && (ByteB == 0ULL || ByteB == Mask) &&
+ (ByteC == 0ULL || ByteC == Mask) && (ByteD == 0ULL || ByteD == Mask) &&
+ (ByteE == 0ULL || ByteE == Mask) && (ByteF == 0ULL || ByteF == Mask) &&
+ (ByteG == 0ULL || ByteG == Mask) && (ByteH == 0ULL || ByteH == Mask);
+#else
uint64_t ByteA = Imm & 0xff00000000000000ULL;
uint64_t ByteB = Imm & 0x00ff000000000000ULL;
uint64_t ByteC = Imm & 0x0000ff0000000000ULL;
@@ -609,6 +629,7 @@ static inline bool isAdvSIMDModImmType10(uint64_t Imm) {
(ByteF == 0ULL || ByteF == 0x0000000000ff0000ULL) &&
(ByteG == 0ULL || ByteG == 0x000000000000ff00ULL) &&
(ByteH == 0ULL || ByteH == 0x00000000000000ffULL);
+#endif
}
static inline uint8_t encodeAdvSIMDModImmType10(uint64_t Imm) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index a7933c00f507..a6900b8963bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -40,7 +40,8 @@ protected:
public:
AArch64AsmBackend(const Target &T, const Triple &TT, bool IsLittleEndian)
- : MCAsmBackend(IsLittleEndian ? support::little : support::big),
+ : MCAsmBackend(IsLittleEndian ? llvm::endianness::little
+ : llvm::endianness::big),
TheTriple(TT) {}
unsigned getNumFixupKinds() const override {
@@ -99,7 +100,8 @@ public:
unsigned getFixupKindContainereSizeInBytes(unsigned Kind) const;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
};
} // end anonymous namespace
@@ -181,14 +183,14 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
Value &= 0xfff;
// Unsigned 12-bit immediate
- if (Value >= 0x1000)
+ if (!isUInt<12>(Value))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return Value;
case AArch64::fixup_aarch64_ldst_imm12_scale2:
if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
Value &= 0xfff;
// Unsigned 12-bit immediate which gets multiplied by 2
- if (Value >= 0x2000)
+ if (!isUInt<13>(Value))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
if (Value & 0x1)
Ctx.reportError(Fixup.getLoc(), "fixup must be 2-byte aligned");
@@ -197,7 +199,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
Value &= 0xfff;
// Unsigned 12-bit immediate which gets multiplied by 4
- if (Value >= 0x4000)
+ if (!isUInt<14>(Value))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
if (Value & 0x3)
Ctx.reportError(Fixup.getLoc(), "fixup must be 4-byte aligned");
@@ -206,7 +208,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
Value &= 0xfff;
// Unsigned 12-bit immediate which gets multiplied by 8
- if (Value >= 0x8000)
+ if (!isUInt<15>(Value))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
if (Value & 0x7)
Ctx.reportError(Fixup.getLoc(), "fixup must be 8-byte aligned");
@@ -215,7 +217,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
Value &= 0xfff;
// Unsigned 12-bit immediate which gets multiplied by 16
- if (Value >= 0x10000)
+ if (!isUInt<16>(Value))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
if (Value & 0xf)
Ctx.reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
@@ -306,7 +308,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
}
case AArch64::fixup_aarch64_pcrel_branch14:
// Signed 16-bit immediate
- if (SignedValue > 32767 || SignedValue < -32768)
+ if (!isInt<16>(SignedValue))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
if (Value & 0x3)
@@ -314,8 +316,15 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
return (Value >> 2) & 0x3fff;
case AArch64::fixup_aarch64_pcrel_branch26:
case AArch64::fixup_aarch64_pcrel_call26:
+ if (TheTriple.isOSBinFormatCOFF() && !IsResolved && SignedValue != 0) {
+ // MSVC link.exe and lld do not support this relocation type
+ // with a non-zero offset
+ Ctx.reportError(Fixup.getLoc(),
+ "cannot perform a PC-relative fixup with a non-zero "
+ "symbol offset");
+ }
// Signed 28-bit immediate
- if (SignedValue > 134217727 || SignedValue < -134217728)
+ if (!isInt<28>(SignedValue))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
if (Value & 0x3)
@@ -353,7 +362,7 @@ AArch64AsmBackend::getFixupKind(StringRef Name) const {
/// getFixupKindContainereSizeInBytes - The number of bytes of the
/// container involved in big endian or 0 if the item is little endian
unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) const {
- if (Endian == support::little)
+ if (Endian == llvm::endianness::little)
return 0;
switch (Kind) {
@@ -393,6 +402,19 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
MutableArrayRef<char> Data, uint64_t Value,
bool IsResolved,
const MCSubtargetInfo *STI) const {
+ if (Fixup.getTargetKind() == FK_Data_8 && TheTriple.isOSBinFormatELF()) {
+ auto RefKind = static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+ AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind);
+ if (SymLoc == AArch64AuthMCExpr::VK_AUTH ||
+ SymLoc == AArch64AuthMCExpr::VK_AUTHADDR) {
+ assert(Value == 0);
+ const auto *Expr = cast<AArch64AuthMCExpr>(Fixup.getValue());
+ Value = (uint64_t(Expr->getDiscriminator()) << 32) |
+ (uint64_t(Expr->getKey()) << 60) |
+ (uint64_t(Expr->hasAddressDiversity()) << 63);
+ }
+ }
+
if (!Value)
return; // Doesn't change encoding.
unsigned Kind = Fixup.getKind();
@@ -478,7 +500,8 @@ bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
- const MCValue &Target) {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) {
unsigned Kind = Fixup.getKind();
if (Kind >= FirstLiteralRelocationKind)
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index bcb6c7c1a0fa..9de40661298c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -39,6 +39,8 @@ public:
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
+ unsigned Type) const override;
bool IsILP32;
};
@@ -207,8 +209,12 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
"ILP32 8 byte absolute data "
"relocation not supported (LP64 eqv: ABS64)");
return ELF::R_AARCH64_NONE;
- } else
+ } else {
+ if (RefKind == AArch64MCExpr::VK_AUTH ||
+ RefKind == AArch64MCExpr::VK_AUTHADDR)
+ return ELF::R_AARCH64_AUTH_ABS64;
return ELF::R_AARCH64_ABS64;
+ }
case AArch64::fixup_aarch64_add_imm12:
if (RefKind == AArch64MCExpr::VK_DTPREL_HI12)
return R_CLS(TLSLD_ADD_DTPREL_HI12);
@@ -455,6 +461,12 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
llvm_unreachable("Unimplemented fixup -> relocation");
}
+bool AArch64ELFObjectWriter::needsRelocateWithSymbol(const MCValue &Val,
+ const MCSymbol &,
+ unsigned) const {
+ return (Val.getRefKind() & AArch64MCExpr::VK_GOT) == AArch64MCExpr::VK_GOT;
+}
+
MCSectionELF *
AArch64ELFObjectWriter::getMemtagRelocsSection(MCContext &Ctx) const {
return Ctx.getELFSection(".memtag.globals.static",
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 6a5f1430643d..ad21f2673a64 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -104,6 +104,7 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer {
void emitARM64WinCFITrapFrame() override { OS << "\t.seh_trap_frame\n"; }
void emitARM64WinCFIMachineFrame() override { OS << "\t.seh_pushframe\n"; }
void emitARM64WinCFIContext() override { OS << "\t.seh_context\n"; }
+ void emitARM64WinCFIECContext() override { OS << "\t.seh_ec_context\n"; }
void emitARM64WinCFIClearUnwoundToCall() override {
OS << "\t.seh_clear_unwound_to_call\n";
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 2983e9a9be92..c5de5b4de4ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -60,12 +60,12 @@ bool AArch64InstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
}
void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
- OS << markup("<reg:") << getRegisterName(Reg) << markup(">");
+ markup(OS, Markup::Register) << getRegisterName(Reg);
}
void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg,
unsigned AltIdx) const {
- OS << markup("<reg:") << getRegisterName(Reg, AltIdx) << markup(">");
+ markup(OS, Markup::Register) << getRegisterName(Reg, AltIdx);
}
StringRef AArch64InstPrinter::getRegName(MCRegister Reg) const {
@@ -175,7 +175,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
printRegName(O, Op0.getReg());
O << ", ";
printRegName(O, Op1.getReg());
- O << ", " << markup("<imm:") << "#" << shift << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << shift;
printAnnotation(O, Annot);
return;
}
@@ -187,9 +188,10 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
printRegName(O, Op0.getReg());
O << ", ";
printRegName(O, Op1.getReg());
- O << ", " << markup("<imm:") << "#" << (Is64Bit ? 64 : 32) - Op2.getImm()
- << markup(">") << ", " << markup("<imm:") << "#" << Op3.getImm() + 1
- << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << (Is64Bit ? 64 : 32) - Op2.getImm();
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << Op3.getImm() + 1;
printAnnotation(O, Annot);
return;
}
@@ -199,9 +201,10 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
printRegName(O, Op0.getReg());
O << ", ";
printRegName(O, Op1.getReg());
- O << ", " << markup("<imm:") << "#" << Op2.getImm() << markup(">") << ", "
- << markup("<imm:") << "#" << Op3.getImm() - Op2.getImm() + 1
- << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << Op2.getImm();
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << Op3.getImm() - Op2.getImm() + 1;
printAnnotation(O, Annot);
return;
}
@@ -221,8 +224,10 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
O << "\tbfc\t";
printRegName(O, Op0.getReg());
- O << ", " << markup("<imm:") << "#" << LSB << markup(">") << ", "
- << markup("<imm:") << "#" << Width << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << LSB;
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << Width;
printAnnotation(O, Annot);
return;
} else if (ImmS < ImmR) {
@@ -235,8 +240,10 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
printRegName(O, Op0.getReg());
O << ", ";
printRegName(O, Op2.getReg());
- O << ", " << markup("<imm:") << "#" << LSB << markup(">") << ", "
- << markup("<imm:") << "#" << Width << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << LSB;
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << Width;
printAnnotation(O, Annot);
return;
}
@@ -248,8 +255,10 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
printRegName(O, Op0.getReg());
O << ", ";
printRegName(O, Op2.getReg());
- O << ", " << markup("<imm:") << "#" << LSB << markup(">") << ", "
- << markup("<imm:") << "#" << Width << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << LSB;
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << Width;
printAnnotation(O, Annot);
return;
}
@@ -266,9 +275,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
O << "\tmovn\t";
printRegName(O, MI->getOperand(0).getReg());
- O << ", " << markup("<imm:") << "#";
- MI->getOperand(1).getExpr()->print(O, &MAI);
- O << markup(">");
+ O << ", ";
+ {
+ WithMarkup M = markup(O, Markup::Immediate);
+ O << "#";
+ MI->getOperand(1).getExpr()->print(O, &MAI);
+ }
return;
}
@@ -276,9 +288,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
MI->getOperand(2).isExpr()) {
O << "\tmovk\t";
printRegName(O, MI->getOperand(0).getReg());
- O << ", " << markup("<imm:") << "#";
- MI->getOperand(2).getExpr()->print(O, &MAI);
- O << markup(">");
+ O << ", ";
+ {
+ WithMarkup M = markup(O, Markup::Immediate);
+ O << "#";
+ MI->getOperand(2).getExpr()->print(O, &MAI);
+ }
return;
}
@@ -286,8 +301,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
int64_t SExtVal = SignExtend64(Value, RegWidth);
O << "\tmov\t";
printRegName(O, MI->getOperand(0).getReg());
- O << ", " << markup("<imm:") << "#"
- << formatImm(SExtVal) << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << formatImm(SExtVal);
if (CommentStream) {
// Do the opposite to that used for instruction operands.
if (getPrintImmHex())
@@ -813,8 +828,8 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, uint64_t Address,
printRegName(O, Reg);
} else {
assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
- O << ", " << markup("<imm:") << "#" << LdStDesc->NaturalOffset
- << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << LdStDesc->NaturalOffset;
}
}
@@ -1142,14 +1157,14 @@ void AArch64InstPrinter::printImm(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
- O << markup("<imm:") << "#" << formatImm(Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << "#" << formatImm(Op.getImm());
}
void AArch64InstPrinter::printImmHex(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
- O << markup("<imm:") << format("#%#llx", Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << format("#%#llx", Op.getImm());
}
template<int Size>
@@ -1158,13 +1173,11 @@ void AArch64InstPrinter::printSImm(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Size == 8)
- O << markup("<imm:") << "#" << formatImm((signed char)Op.getImm())
- << markup(">");
+ markup(O, Markup::Immediate) << "#" << formatImm((signed char)Op.getImm());
else if (Size == 16)
- O << markup("<imm:") << "#" << formatImm((signed short)Op.getImm())
- << markup(">");
+ markup(O, Markup::Immediate) << "#" << formatImm((signed short)Op.getImm());
else
- O << markup("<imm:") << "#" << formatImm(Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << "#" << formatImm(Op.getImm());
}
void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
@@ -1173,7 +1186,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
unsigned Reg = Op.getReg();
if (Reg == AArch64::XZR)
- O << markup("<imm:") << "#" << Imm << markup(">");
+ markup(O, Markup::Immediate) << "#" << Imm;
else
printRegName(O, Reg);
} else
@@ -1206,7 +1219,7 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
assert(Val == MO.getImm() && "Add/sub immediate out of range!");
unsigned Shift =
AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
- O << markup("<imm:") << '#' << formatImm(Val) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatImm(Val);
if (Shift != 0) {
printShifter(MI, OpNum + 1, STI, O);
if (CommentStream)
@@ -1224,9 +1237,9 @@ void AArch64InstPrinter::printLogicalImm(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
uint64_t Val = MI->getOperand(OpNum).getImm();
- O << markup("<imm:") << "#0x";
+ WithMarkup M = markup(O, Markup::Immediate);
+ O << "#0x";
O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 8 * sizeof(T)));
- O << markup(">");
}
void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
@@ -1238,8 +1251,8 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
AArch64_AM::getShiftValue(Val) == 0)
return;
O << ", " << AArch64_AM::getShiftExtendName(AArch64_AM::getShiftType(Val))
- << " " << markup("<imm:") << "#" << AArch64_AM::getShiftValue(Val)
- << markup(">");
+ << " ";
+ markup(O, Markup::Immediate) << "#" << AArch64_AM::getShiftValue(Val);
}
void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
@@ -1273,19 +1286,23 @@ void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
ExtType == AArch64_AM::UXTX) ||
((Dest == AArch64::WSP || Src1 == AArch64::WSP) &&
ExtType == AArch64_AM::UXTW) ) {
- if (ShiftVal != 0)
- O << ", lsl " << markup("<imm:") << "#" << ShiftVal << markup(">");
+ if (ShiftVal != 0) {
+ O << ", lsl ";
+ markup(O, Markup::Immediate) << "#" << ShiftVal;
+ }
return;
}
}
O << ", " << AArch64_AM::getShiftExtendName(ExtType);
- if (ShiftVal != 0)
- O << " " << markup("<imm:") << "#" << ShiftVal << markup(">");
+ if (ShiftVal != 0) {
+ O << " ";
+ markup(O, Markup::Immediate) << "#" << ShiftVal;
+ }
}
-static void printMemExtendImpl(bool SignExtend, bool DoShift, unsigned Width,
- char SrcRegKind, raw_ostream &O,
- bool UseMarkup) {
+void AArch64InstPrinter::printMemExtendImpl(bool SignExtend, bool DoShift,
+ unsigned Width, char SrcRegKind,
+ raw_ostream &O) {
// sxtw, sxtx, uxtw or lsl (== uxtx)
bool IsLSL = !SignExtend && SrcRegKind == 'x';
if (IsLSL)
@@ -1295,11 +1312,7 @@ static void printMemExtendImpl(bool SignExtend, bool DoShift, unsigned Width,
if (DoShift || IsLSL) {
O << " ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << Log2_32(Width / 8);
- if (UseMarkup)
- O << ">";
+ markup(O, Markup::Immediate) << "#" << Log2_32(Width / 8);
}
}
@@ -1308,7 +1321,7 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
unsigned Width) {
bool SignExtend = MI->getOperand(OpNum).getImm();
bool DoShift = MI->getOperand(OpNum + 1).getImm();
- printMemExtendImpl(SignExtend, DoShift, Width, SrcRegKind, O, UseMarkup);
+ printMemExtendImpl(SignExtend, DoShift, Width, SrcRegKind, O);
}
template <bool SignExtend, int ExtWidth, char SrcRegKind, char Suffix>
@@ -1325,7 +1338,7 @@ void AArch64InstPrinter::printRegWithShiftExtend(const MCInst *MI,
bool DoShift = ExtWidth != 8;
if (SignExtend || DoShift || SrcRegKind == 'w') {
O << ", ";
- printMemExtendImpl(SignExtend, DoShift, ExtWidth, SrcRegKind, O, UseMarkup);
+ printMemExtendImpl(SignExtend, DoShift, ExtWidth, SrcRegKind, O);
}
}
@@ -1335,9 +1348,10 @@ void AArch64InstPrinter::printPredicateAsCounter(const MCInst *MI,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (Reg < AArch64::PN0 || Reg > AArch64::PN15)
+ llvm_unreachable("Unsupported predicate-as-counter register");
+ O << "pn" << Reg - AArch64::PN0;
- assert(Reg <= AArch64::P15 && "Unsupported predicate register");
- O << "pn" << (Reg - AArch64::P0);
switch (EltSize) {
case 0:
break;
@@ -1384,8 +1398,8 @@ template <int Scale>
void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- O << markup("<imm:") << '#'
- << formatImm(Scale * MI->getOperand(OpNum).getImm()) << markup(">");
+ markup(O, Markup::Immediate)
+ << '#' << formatImm(Scale * MI->getOperand(OpNum).getImm());
}
template <int Scale, int Offset>
@@ -1401,8 +1415,7 @@ void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum,
unsigned Scale, raw_ostream &O) {
const MCOperand MO = MI->getOperand(OpNum);
if (MO.isImm()) {
- O << markup("<imm:") << '#' << formatImm(MO.getImm() * Scale)
- << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatImm(MO.getImm() * Scale);
} else {
assert(MO.isExpr() && "Unexpected operand type!");
MO.getExpr()->print(O, &MAI);
@@ -1415,8 +1428,8 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
O << '[';
printRegName(O, MI->getOperand(OpNum).getReg());
if (MO1.isImm()) {
- O << ", " << markup("<imm:") << "#" << formatImm(MO1.getImm() * Scale)
- << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << formatImm(MO1.getImm() * Scale);
} else {
assert(MO1.isExpr() && "Unexpected operand type!");
O << ", ";
@@ -1455,7 +1468,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
}
}
- O << markup("<imm:") << '#' << formatImm(prfop) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatImm(prfop);
}
void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
@@ -1466,7 +1479,7 @@ void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
if (PSB)
O << PSB->Name;
else
- O << markup("<imm:") << '#' << formatImm(psbhintop) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatImm(psbhintop);
}
void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
@@ -1477,7 +1490,7 @@ void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
if (BTI)
O << BTI->Name;
else
- O << markup("<imm:") << '#' << formatImm(btihintop) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatImm(btihintop);
}
void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1488,7 +1501,7 @@ void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
: AArch64_AM::getFPImmFloat(MO.getImm());
// 8 decimal places are enough to perfectly represent permitted floats.
- O << markup("<imm:") << format("#%.8f", FPImm) << markup(">");
+ markup(O, Markup::Immediate) << format("#%.8f", FPImm);
}
static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
@@ -1727,6 +1740,10 @@ template <unsigned NumLanes, char LaneKind>
void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ if (LaneKind == 0) {
+ printVectorList(MI, OpNum, STI, O, "");
+ return;
+ }
std::string Suffix(".");
if (NumLanes)
Suffix += itostr(NumLanes) + LaneKind;
@@ -1743,10 +1760,11 @@ void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
O << "[" << Scale * MI->getOperand(OpNum).getImm() << "]";
}
+template <unsigned Scale>
void AArch64InstPrinter::printMatrixIndex(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- O << MI->getOperand(OpNum).getImm();
+ O << Scale * MI->getOperand(OpNum).getImm();
}
void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
@@ -1758,13 +1776,11 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
// If the label has already been resolved to an immediate offset (say, when
// we're running the disassembler), just print the immediate.
if (Op.isImm()) {
- O << markup("<imm:");
int64_t Offset = Op.getImm() * 4;
if (PrintBranchImmAsAddress)
- O << formatHex(Address + Offset);
+ markup(O, Markup::Target) << formatHex(Address + Offset);
else
- O << "#" << formatImm(Offset);
- O << markup(">");
+ markup(O, Markup::Immediate) << "#" << formatImm(Offset);
return;
}
@@ -1773,7 +1789,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
int64_t TargetAddress;
if (BranchTarget && BranchTarget->evaluateAsAbsolute(TargetAddress)) {
- O << formatHex((uint64_t)TargetAddress);
+ markup(O, Markup::Target) << formatHex((uint64_t)TargetAddress);
} else {
// Otherwise, just print the expression.
MI->getOperand(OpNum).getExpr()->print(O, &MAI);
@@ -1794,12 +1810,11 @@ void AArch64InstPrinter::printAdrAdrpLabel(const MCInst *MI, uint64_t Address,
Offset = Offset * 4096;
Address = Address & -4096;
}
- O << markup("<imm:");
+ WithMarkup M = markup(O, Markup::Immediate);
if (PrintBranchImmAsAddress)
- O << formatHex(Address + Offset);
+ markup(O, Markup::Target) << formatHex(Address + Offset);
else
- O << "#" << Offset;
- O << markup(">");
+ markup(O, Markup::Immediate) << "#" << Offset;
return;
}
@@ -1827,7 +1842,7 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
if (!Name.empty())
O << Name;
else
- O << markup("<imm:") << "#" << Val << markup(">");
+ markup(O, Markup::Immediate) << "#" << Val;
}
void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
@@ -1843,7 +1858,7 @@ void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
if (!Name.empty())
O << Name;
else
- O << markup("<imm:") << "#" << Val << markup(">");
+ markup(O, Markup::Immediate) << "#" << Val;
}
static bool isValidSysReg(const AArch64SysReg::SysReg *Reg, bool Read,
@@ -1942,7 +1957,7 @@ void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned RawVal = MI->getOperand(OpNo).getImm();
uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
- O << markup("<imm:") << format("#%#016llx", Val) << markup(">");
+ markup(O, Markup::Immediate) << format("#%#016llx", Val);
}
template<int64_t Angle, int64_t Remainder>
@@ -1950,7 +1965,7 @@ void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
- O << markup("<imm:") << "#" << (Val * Angle) + Remainder << markup(">");
+ markup(O, Markup::Immediate) << "#" << (Val * Angle) + Remainder;
}
void AArch64InstPrinter::printSVEPattern(const MCInst *MI, unsigned OpNum,
@@ -1960,7 +1975,7 @@ void AArch64InstPrinter::printSVEPattern(const MCInst *MI, unsigned OpNum,
if (auto Pat = AArch64SVEPredPattern::lookupSVEPREDPATByEncoding(Val))
O << Pat->Name;
else
- O << markup("<imm:") << '#' << formatImm(Val) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatImm(Val);
}
void AArch64InstPrinter::printSVEVecLenSpecifier(const MCInst *MI,
@@ -2004,9 +2019,9 @@ void AArch64InstPrinter::printImmSVE(T Value, raw_ostream &O) {
std::make_unsigned_t<T> HexValue = Value;
if (getPrintImmHex())
- O << markup("<imm:") << '#' << formatHex((uint64_t)HexValue) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatHex((uint64_t)HexValue);
else
- O << markup("<imm:") << '#' << formatDec(Value) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatDec(Value);
if (CommentStream) {
// Do the opposite to that used for instruction operands.
@@ -2028,7 +2043,7 @@ void AArch64InstPrinter::printImm8OptLsl(const MCInst *MI, unsigned OpNum,
// #0 lsl #8 is never pretty printed
if ((UnscaledVal == 0) && (AArch64_AM::getShiftValue(Shift) != 0)) {
- O << markup("<imm:") << '#' << formatImm(UnscaledVal) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatImm(UnscaledVal);
printShifter(MI, OpNum + 1, STI, O);
return;
}
@@ -2058,7 +2073,7 @@ void AArch64InstPrinter::printSVELogicalImm(const MCInst *MI, unsigned OpNum,
else if ((uint16_t)PrintVal == PrintVal)
printImmSVE(PrintVal, O);
else
- O << markup("<imm:") << '#' << formatHex((uint64_t)PrintVal) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatHex((uint64_t)PrintVal);
}
template <int Width>
@@ -2086,8 +2101,8 @@ void AArch64InstPrinter::printExactFPImm(const MCInst *MI, unsigned OpNum,
auto *Imm0Desc = AArch64ExactFPImm::lookupExactFPImmByEnum(ImmIs0);
auto *Imm1Desc = AArch64ExactFPImm::lookupExactFPImmByEnum(ImmIs1);
unsigned Val = MI->getOperand(OpNum).getImm();
- O << markup("<imm:") << "#" << (Val ? Imm1Desc->Repr : Imm0Desc->Repr)
- << markup(">");
+ markup(O, Markup::Immediate)
+ << "#" << (Val ? Imm1Desc->Repr : Imm0Desc->Repr);
}
void AArch64InstPrinter::printGPR64as32(const MCInst *MI, unsigned OpNum,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index fcaa57402bc2..9dccdf42361b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -91,7 +91,8 @@ protected:
const MCSubtargetInfo &STI, raw_ostream &O);
void printArithExtend(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
-
+ void printMemExtendImpl(bool SignExtend, bool DoShift, unsigned Width,
+ char SrcRegKind, raw_ostream &O);
void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O,
char SrcRegKind, unsigned Width);
template <char SrcRegKind, unsigned Width>
@@ -172,6 +173,7 @@ protected:
template <unsigned Scale = 1>
void printVectorIndex(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ template <unsigned Scale = 1>
void printMatrixIndex(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printAdrAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
@@ -191,7 +193,6 @@ protected:
template <int EltSize>
void printPredicateAsCounter(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
-
template<int64_t Angle, int64_t Remainder>
void printComplexRotationOp(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 2dbbab13e8f3..dbc4323a860f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -189,7 +189,7 @@ public:
uint32_t EncodeRegAsMultipleOf(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- uint32_t EncodePPR_p8to15(const MCInst &MI, unsigned OpIdx,
+ uint32_t EncodePNR_p8to15(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -544,11 +544,11 @@ AArch64MCCodeEmitter::EncodeRegAsMultipleOf(const MCInst &MI, unsigned OpIdx,
}
uint32_t
-AArch64MCCodeEmitter::EncodePPR_p8to15(const MCInst &MI, unsigned OpIdx,
+AArch64MCCodeEmitter::EncodePNR_p8to15(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
auto RegOpnd = MI.getOperand(OpIdx).getReg();
- return RegOpnd - AArch64::P8;
+ return RegOpnd - AArch64::PN8;
}
uint32_t AArch64MCCodeEmitter::EncodeZPR2StridedRegisterClass(
@@ -685,7 +685,7 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI,
}
uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write<uint32_t>(CB, Binary, support::little);
+ support::endian::write<uint32_t>(CB, Binary, llvm::endianness::little);
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index cb39c2a11487..0c5a9d79f6cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -154,3 +154,47 @@ void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
}
+
+const AArch64AuthMCExpr *AArch64AuthMCExpr::create(const MCExpr *Expr,
+ uint16_t Discriminator,
+ AArch64PACKey::ID Key,
+ bool HasAddressDiversity,
+ MCContext &Ctx) {
+ return new (Ctx)
+ AArch64AuthMCExpr(Expr, Discriminator, Key, HasAddressDiversity);
+}
+
+void AArch64AuthMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ bool WrapSubExprInParens = !isa<MCSymbolRefExpr>(getSubExpr());
+ if (WrapSubExprInParens)
+ OS << '(';
+ getSubExpr()->print(OS, MAI);
+ if (WrapSubExprInParens)
+ OS << ')';
+
+ OS << "@AUTH(" << AArch64PACKeyIDToString(Key) << ',' << Discriminator;
+ if (hasAddressDiversity())
+ OS << ",addr";
+ OS << ')';
+}
+
+void AArch64AuthMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
+}
+
+MCFragment *AArch64AuthMCExpr::findAssociatedFragment() const {
+ llvm_unreachable("FIXME: what goes here?");
+}
+
+bool AArch64AuthMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
+ return false;
+
+ if (Res.getSymB())
+ report_fatal_error("Auth relocation can't reference two symbols");
+
+ Res = MCValue::get(Res.getSymA(), nullptr, Res.getConstant(), getKind());
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index f8938cdbbec3..48235988869c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -14,7 +14,9 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCEXPR_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCEXPR_H
+#include "Utils/AArch64BaseInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
@@ -34,6 +36,8 @@ public:
VK_TPREL = 0x007,
VK_TLSDESC = 0x008,
VK_SECREL = 0x009,
+ VK_AUTH = 0x00a,
+ VK_AUTHADDR = 0x00b,
VK_SymLocBits = 0x00f,
// Variants specifying which part of the final address calculation is
@@ -116,6 +120,7 @@ private:
const MCExpr *Expr;
const VariantKind Kind;
+protected:
explicit AArch64MCExpr(const MCExpr *Expr, VariantKind Kind)
: Expr(Expr), Kind(Kind) {}
@@ -171,6 +176,42 @@ public:
return E->getKind() == MCExpr::Target;
}
};
+
+class AArch64AuthMCExpr final : public AArch64MCExpr {
+ uint16_t Discriminator;
+ AArch64PACKey::ID Key;
+
+ explicit AArch64AuthMCExpr(const MCExpr *Expr, uint16_t Discriminator,
+ AArch64PACKey::ID Key, bool HasAddressDiversity)
+ : AArch64MCExpr(Expr, HasAddressDiversity ? VK_AUTHADDR : VK_AUTH),
+ Discriminator(Discriminator), Key(Key) {}
+
+public:
+ static const AArch64AuthMCExpr *
+ create(const MCExpr *Expr, uint16_t Discriminator, AArch64PACKey::ID Key,
+ bool HasAddressDiversity, MCContext &Ctx);
+
+ AArch64PACKey::ID getKey() const { return Key; }
+ uint16_t getDiscriminator() const { return Discriminator; }
+ bool hasAddressDiversity() const { return getKind() == VK_AUTHADDR; }
+
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+
+ void visitUsedExpr(MCStreamer &Streamer) const override;
+
+ MCFragment *findAssociatedFragment() const override;
+
+ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
+
+ static bool classof(const MCExpr *E) {
+ return isa<AArch64MCExpr>(E) && classof(cast<AArch64MCExpr>(E));
+ }
+
+ static bool classof(const AArch64MCExpr *E) {
+ return E->getKind() == VK_AUTH || E->getKind() == VK_AUTHADDR;
+ }
+};
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 11eb4bf0c402..e1d6dd7a056b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -12,7 +12,6 @@
#include "AArch64TargetStreamer.h"
#include "AArch64MCAsmInfo.h"
-#include "AArch64Subtarget.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/ConstantPools.h"
#include "llvm/MC/MCContext.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index b3bce9960772..7676d88a82b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -66,6 +66,7 @@ public:
virtual void emitARM64WinCFITrapFrame() {}
virtual void emitARM64WinCFIMachineFrame() {}
virtual void emitARM64WinCFIContext() {}
+ virtual void emitARM64WinCFIECContext() {}
virtual void emitARM64WinCFIClearUnwoundToCall() {}
virtual void emitARM64WinCFIPACSignLR() {}
virtual void emitARM64WinCFISaveAnyRegI(unsigned Reg, int Offset) {}
@@ -132,6 +133,7 @@ public:
void emitARM64WinCFITrapFrame() override;
void emitARM64WinCFIMachineFrame() override;
void emitARM64WinCFIContext() override;
+ void emitARM64WinCFIECContext() override;
void emitARM64WinCFIClearUnwoundToCall() override;
void emitARM64WinCFIPACSignLR() override;
void emitARM64WinCFISaveAnyRegI(unsigned Reg, int Offset) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 4c8c2b437069..438ac6cc4788 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -219,6 +219,10 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinCFIContext() {
emitARM64WinUnwindCode(Win64EH::UOP_Context, -1, 0);
}
+void AArch64TargetWinCOFFStreamer::emitARM64WinCFIECContext() {
+ emitARM64WinUnwindCode(Win64EH::UOP_ECContext, -1, 0);
+}
+
void AArch64TargetWinCOFFStreamer::emitARM64WinCFIClearUnwoundToCall() {
emitARM64WinUnwindCode(Win64EH::UOP_ClearUnwoundToCall, -1, 0);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEABIPass.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEABIPass.cpp
index 83010017c761..3315171798d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEABIPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEABIPass.cpp
@@ -15,7 +15,6 @@
#include "AArch64.h"
#include "Utils/AArch64BaseInfo.h"
#include "Utils/AArch64SMEAttributes.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
@@ -112,6 +111,12 @@ bool SMEABI::updateNewZAFunctions(Module *M, Function *F,
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable);
Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr);
+ // ZA state must be zeroed upon entry to a function with NewZA
+ Function *ZeroIntr =
+ Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero);
+ Builder.CreateCall(ZeroIntr->getFunctionType(), ZeroIntr,
+ Builder.getInt32(0xff));
+
// Before returning, disable pstate.za
for (BasicBlock &BB : *F) {
Instruction *T = BB.getTerminator();
@@ -137,7 +142,7 @@ bool SMEABI::runOnFunction(Function &F) {
bool Changed = false;
SMEAttrs FnAttrs(F);
- if (FnAttrs.hasNewZAInterface())
+ if (FnAttrs.hasNewZABody())
Changed |= updateNewZAFunctions(M, &F, Builder);
return Changed;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 6e3aadd5dd8c..4f8917618ea4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -10,11 +10,12 @@
//
//===----------------------------------------------------------------------===//
-def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>;
-def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>;
-def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>;
-def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>;
-def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>;
+def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAB0, 0>", []>;
+def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAH0, 1>", []>;
+def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAS0, 3>", []>;
+def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAD0, 7>", []>;
+def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAQ0, 15>", []>;
+def imm_to_zt : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZT0, 0>", []>;
def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>;
@@ -33,6 +34,12 @@ def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 4>", []>;
def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
+def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
+def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore,
+ [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;
+def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore,
+ [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
+
//===----------------------------------------------------------------------===//
// SME Pseudo Classes
//===----------------------------------------------------------------------===//
@@ -190,11 +197,48 @@ class SME_ZA_Tile_TwoPred_TwoVec_Pat<string name, SDPatternOperator intrinsic, O
: Pat<(intrinsic imm_ty:$tile, (pg_ty PPR3bAny:$Pn), (pg_ty PPR3bAny:$Pm), vt:$Zn, vt:$Zm),
(!cast<Instruction>(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>;
+
+//===----------------------------------------------------------------------===//
+// SME smstart/smstop
+//===----------------------------------------------------------------------===//
+
+// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
+// both fields:
+//
+// MSR SVCRSM, #<imm1>
+// MSR SVCRZA, #<imm1>
+// MSR SVCRSMZA, #<imm1>
+//
+// It's tricky to using the existing pstate operand defined in
+// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
+// when these fields are also encoded in CRm[3:1].
+def MSRpstatesvcrImm1
+ : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr",
+ "\t$pstatefield, $imm">,
+ Sched<[WriteSys]> {
+ bits<3> pstatefield;
+ bit imm;
+ let Inst{18-16} = 0b011; // op1
+ let Inst{11-9} = pstatefield;
+ let Inst{8} = imm;
+ let Inst{7-5} = 0b011; // op2
+ let hasPostISelHook = 1;
+}
+
+def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>;
+def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
+def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
+
+def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>;
+def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>;
+def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>;
+
+
//===----------------------------------------------------------------------===//
// SME Outer Products
//===----------------------------------------------------------------------===//
-class sme_fp_outer_product_inst<bit S, bits<2> sz, bit op, MatrixTileOperand za_ty,
+class sme_fp_outer_product_inst<bit S, bits<2> sz, bits<2> op, MatrixTileOperand za_ty,
ZPRRegOp zpr_ty, string mnemonic>
: I<(outs za_ty:$ZAda),
(ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
@@ -206,7 +250,7 @@ class sme_fp_outer_product_inst<bit S, bits<2> sz, bit op, MatrixTileOperand za_
bits<3> Pn;
bits<5> Zn;
let Inst{31-25} = 0b1000000;
- let Inst{24} = op;
+ let Inst{24} = op{1};
let Inst{23} = 0b1;
let Inst{22-21} = sz;
let Inst{20-16} = Zm;
@@ -214,25 +258,25 @@ class sme_fp_outer_product_inst<bit S, bits<2> sz, bit op, MatrixTileOperand za_
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4} = S;
- let Inst{3} = op;
+ let Inst{3} = op{0};
let Constraints = "$ZAda = $_ZAda";
}
-multiclass sme_outer_product_fp32<bit S, string mnemonic, SDPatternOperator op> {
- def NAME : sme_fp_outer_product_inst<S, 0b00, 0b0, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> {
+multiclass sme_outer_product_fp32<bit S, bits<2> sz, ZPRRegOp zpr_ty, string mnemonic, SDPatternOperator op> {
+ def NAME : sme_fp_outer_product_inst<S, sz, 0b00, TileOp32, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<2> ZAda;
let Inst{1-0} = ZAda;
let Inst{2} = 0b0;
}
- def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
+ def NAME # _PSEUDO : sme_outer_product_pseudo<zpr_ty, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv4i1, nxv4f32>;
}
multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
- def NAME : sme_fp_outer_product_inst<S, 0b10, 0b0, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
+ def NAME : sme_fp_outer_product_inst<S, 0b10, 0b00, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<3> ZAda;
let Inst{2-0} = ZAda;
}
@@ -242,8 +286,8 @@ multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op>
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv2i1, nxv2f64>;
}
-multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s>{
- def NAME : sme_fp_outer_product_inst<s, {0,bf}, 0b1, TileOp16, ZPR16, mnemonic> {
+multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, bits<2> op, ZPRRegOp zpr_ty>{
+ def NAME : sme_fp_outer_product_inst<s, {0,bf}, op, TileOp16, zpr_ty, mnemonic> {
bits<1> ZAda;
let Inst{2-1} = 0b00;
let Inst{0} = ZAda;
@@ -743,23 +787,23 @@ class sme_spill_inst<string opcodestr>
: sme_spill_fill_base<0b1, (outs),
(ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
- imm0_15:$offset),
+ imm32_0_15:$offset),
opcodestr>;
let mayLoad = 1 in
class sme_fill_inst<string opcodestr>
: sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv,
sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
- imm0_15:$offset),
+ imm32_0_15:$offset),
opcodestr>;
multiclass sme_spill<string opcodestr> {
def NAME : sme_spill_inst<opcodestr>;
def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
(!cast<Instruction>(NAME) MatrixOp:$ZAt,
MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
- // base
- def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
- (!cast<Instruction>(NAME) ZA, $idx, 0, $base, 0)>;
+
+ def : Pat<(AArch64SMEStr (i32 MatrixIndexGPR32Op12_15:$slice), (i64 GPR64sp:$base), (i32 sme_elm_idx0_15:$imm)),
+ (!cast<Instruction>(NAME) ZA, MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base, imm32_0_15:$imm)>;
}
multiclass sme_fill<string opcodestr> {
@@ -769,16 +813,15 @@ multiclass sme_fill<string opcodestr> {
MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
def NAME # _PSEUDO
: Pseudo<(outs),
- (ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4,
+ (ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4,
GPR64sp:$base), []>,
Sched<[]> {
// Translated to actual instruction in AArch64ISelLowering.cpp
let usesCustomInserter = 1;
let mayLoad = 1;
}
- // base
- def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
- (!cast<Instruction>(NAME # _PSEUDO) $idx, 0, $base)>;
+ def : Pat<(AArch64SMELdr MatrixIndexGPR32Op12_15:$slice, GPR64sp:$base, sme_elm_idx0_15:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base)>;
}
//===----------------------------------------------------------------------===//
@@ -1297,17 +1340,17 @@ multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
}
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
- (!cast<Instruction>(NAME # _B) PNRAny:$Pd,
- PNRAny:$Pn, PPR8:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm), 0>;
+ (!cast<Instruction>(NAME # _B) PNRasPPRAny:$Pd,
+ PNRasPPRAny:$Pn, PPR8:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm), 0>;
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
- (!cast<Instruction>(NAME # _H) PNRAny:$Pd,
- PNRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>;
+ (!cast<Instruction>(NAME # _H) PNRasPPRAny:$Pd,
+ PNRasPPRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>;
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
- (!cast<Instruction>(NAME # _S) PNRAny:$Pd,
- PNRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>;
+ (!cast<Instruction>(NAME # _S) PNRasPPRAny:$Pd,
+ PNRasPPRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>;
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
- (!cast<Instruction>(NAME # _D) PNRAny:$Pd,
- PNRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>;
+ (!cast<Instruction>(NAME # _D) PNRasPPRAny:$Pd,
+ PNRasPPRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>;
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
@@ -1413,7 +1456,7 @@ multiclass sme2_dot_mla_add_sub_array_vg4_single<string mnemonic, bits<7> op,
//===----------------------------------------------------------------------===//
// SME2 multiple vectors ternary INT/FP two and four registers
-class sme2_dot_mla_add_sub_array_vg2_multi<bits<6> op,
+class sme2_dot_mla_add_sub_array_vg2_multi<bits<7> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
string mnemonic>
@@ -1427,20 +1470,19 @@ class sme2_dot_mla_add_sub_array_vg2_multi<bits<6> op,
bits<2> Rv;
bits<3> imm3;
let Inst{31-23} = 0b110000011;
- let Inst{22} = op{5}; //sz
+ let Inst{22} = op{6}; //sz
let Inst{21} = 0b1;
let Inst{20-17} = Zm;
let Inst{16-15} = 0b00;
let Inst{14-13} = Rv;
- let Inst{12-10} = op{4-2};
+ let Inst{12-10} = op{5-3};
let Inst{9-6} = Zn;
- let Inst{5} = 0b0;
- let Inst{4-3} = op{1-0};
+ let Inst{5-3} = op{2-0};
let Inst{2-0} = imm3;
let Constraints = "$ZAd = $_ZAd";
}
-multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<6> op,
+multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<7> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty, ValueType zpr_ty,
SDPatternOperator intrinsic> {
@@ -1454,7 +1496,7 @@ multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<6> op,
(!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
}
-class sme2_dot_mla_add_sub_array_vg4_multi<bits<6> op,
+class sme2_dot_mla_add_sub_array_vg4_multi<bits<7> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
string mnemonic>
@@ -1468,20 +1510,20 @@ class sme2_dot_mla_add_sub_array_vg4_multi<bits<6> op,
bits<2> Rv;
bits<3> imm3;
let Inst{31-23} = 0b110000011;
- let Inst{22} = op{5}; //sz
+ let Inst{22} = op{6}; //sz
let Inst{21} = 0b1;
let Inst{20-18} = Zm;
let Inst{17-15} = 0b010;
let Inst{14-13} = Rv;
- let Inst{12-10} = op{4-2};
+ let Inst{12-10} = op{5-3};
let Inst{9-7} = Zn;
- let Inst{6-5} = 0b00;
- let Inst{4-3} = op{1-0};
+ let Inst{6} = 0b0;
+ let Inst{5-3} = op{2-0};
let Inst{2-0} = imm3;
let Constraints = "$ZAd = $_ZAd";
}
-multiclass sme2_dot_mla_add_sub_array_vg4_multi<string mnemonic, bits<6> op,
+multiclass sme2_dot_mla_add_sub_array_vg4_multi<string mnemonic, bits<7> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ValueType zpr_ty, SDPatternOperator intrinsic>{
@@ -1758,8 +1800,8 @@ class sme2_mla_long_array_index_base<bits<2> op0, bits<2> op, Operand index_ty,
}
multiclass sme2_mla_long_array_index<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_index_base<op0, op, uimm3s2range, ZPR16,
- mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
+ def _HtoS : sme2_mla_long_array_index_base<op0, op, uimm3s2range, ZPR16,
+ mnemonic>, SMEPseudo2Instr<NAME # _HtoS, 1> {
bits<3> i3;
bits<5> Zn;
bits<3> imm;
@@ -1769,9 +1811,9 @@ multiclass sme2_mla_long_array_index<string mnemonic, bits<2> op0, bits<2> op, V
let Inst{2-0} = imm;
}
- def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm3s2range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
+ def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME # _S, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange3s2>;
+ def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange3s2>;
}
class sme2_mla_long_array_vg2_index<string mnemonic, bits<2> op0, bits<2> op>
@@ -1789,14 +1831,14 @@ class sme2_mla_long_array_vg2_index<string mnemonic, bits<2> op0, bits<2> op>
}
multiclass sme2_fp_mla_long_array_vg2_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
+ def _HtoS : sme2_mla_long_array_vg2_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
+ def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
+ (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
}
multiclass sme2_int_mla_long_array_vg2_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
@@ -1825,33 +1867,35 @@ class sme2_mla_long_array_vg4_index<string mnemonic, bits<2> op0, bits<2> op>
}
multiclass sme2_fp_mla_long_array_vg4_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg4_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
+ def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
+ def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
+ (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
}
multiclass sme2_int_mla_long_array_vg4_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg4_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
+ def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
+ def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
+ (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
}
-class sme2_mla_long_array<bits<2>op0, bits<2> op, Operand index_ty,
+class sme2_mla_long_array<bits<2>op0, bits<2> op,
+ MatrixOperand matrix_ty,
+ Operand index_ty,
RegisterOperand first_vector_ty,
RegisterOperand second_vector_ty,
string mnemonic, string vg_acronym="">
- : I<(outs MatrixOp32:$ZAda),
- (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv,
+ : I<(outs matrix_ty:$ZAda),
+ (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv,
index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm),
mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm",
"", []> , Sched<[]> {
@@ -1869,8 +1913,8 @@ class sme2_mla_long_array<bits<2>op0, bits<2> op, Operand index_ty,
}
multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array<op0, op, uimm3s2range, ZPR16, ZPR4b16,
- mnemonic> , SMEPseudo2Instr<NAME # _S, 1>{
+ def _HtoS : sme2_mla_long_array<op0, op, MatrixOp32, uimm3s2range, ZPR16, ZPR4b16,
+ mnemonic> , SMEPseudo2Instr<NAME # _HtoS, 1>{
bits<4> Zm;
bits<5> Zn;
bits<3> imm;
@@ -1880,15 +1924,26 @@ multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op,
let Inst{2-0} = imm;
}
- def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm3s2range, ZPR16, ZPR4b16, SMEMatrixArray>;
+ def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _S, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
+ def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
+}
+
+class sme2_mla_long_array_single_16b<string mnemonic>
+ : sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8, mnemonic> {
+ bits<4> Zm;
+ bits<5> Zn;
+ bits<3> imm;
+ let Inst{20} = 0b1;
+ let Inst{19-16} = Zm;
+ let Inst{9-5} = Zn;
+ let Inst{2-0} = imm;
}
-class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op,
- RegisterOperand first_vector_ty,
- string mnemonic, string vg_acronym>
- : sme2_mla_long_array<op0, op, uimm2s2range, first_vector_ty, ZPR4b16,
+class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
+ MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
+ ZPRRegOp zpr_ty, string mnemonic, string vg_acronym>
+ : sme2_mla_long_array<op0, op, matrix_ty, uimm2s2range, multi_vector_ty, zpr_ty,
mnemonic, vg_acronym> {
bits<4> Zm;
bits<5> Zn;
@@ -1896,96 +1951,117 @@ class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op,
let Inst{20} = vg4;
let Inst{19-16} = Zm;
let Inst{9-5} = Zn;
- let Inst{2} = 0b0;
+ let Inst{2} = o2;
let Inst{1-0} = imm;
}
-multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg24_single<0b00, 0b0, op, ZZ_h, mnemonic,
- "vgx2">, SMEPseudo2Instr<NAME # _S, 1>;
+multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
+ RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
+ ValueType zpr_ty, SDPatternOperator intrinsic> {
+ def NAME : sme2_mla_long_array_vg24_single<0b00, 0b0, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
+ vector_ty, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
+ def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty,
+ vector_ty, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty,
+ tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
+ (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
+ uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>;
}
multiclass sme2_int_mla_long_array_vg2_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg24_single<0b01, 0b0, op, ZZ_h, mnemonic,
- "vgx2">, SMEPseudo2Instr<NAME # _S, 1>;
+ def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b0, op, 0b0, MatrixOp32, ZZ_h, ZPR4b16, mnemonic,
+ "vgx2">, SMEPseudo2Instr<NAME # _HtoS, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
+ def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
+ (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
}
-multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg24_single<0b00, 0b1, op, ZZZZ_h, mnemonic,
- "vgx4">, SMEPseudo2Instr<NAME # _S, 1>;
+multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
+ RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
+ ValueType zpr_ty, SDPatternOperator intrinsic> {
+ def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
+ vector_ty, mnemonic, "vgx4">,
+ SMEPseudo2Instr<NAME, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
+ def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty,
+ SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty,
+ tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
+ (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
+ uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>;
}
multiclass sme2_int_mla_long_array_vg4_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg24_single<0b01, 0b1, op, ZZZZ_h, mnemonic,
- "vgx4">, SMEPseudo2Instr<NAME # _S, 1>;
+ def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b1, op, 0b0, MatrixOp32, ZZZZ_h, ZPR4b16, mnemonic,
+ "vgx4">, SMEPseudo2Instr<NAME # _HtoS, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
+ def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
+ (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
}
-class sme2_mla_long_array_vg2_multi<string mnemonic, bits<2> op0, bits<2> op>
- : sme2_mla_long_array<op0, op, uimm2s2range, ZZ_h_mul_r, ZZ_h_mul_r, mnemonic,
- "vgx2"> {
+
+class sme2_mla_long_array_vg2_multi<string mnemonic, bits<2> op0, bits<3> op,
+ MatrixOperand matrix_ty, RegisterOperand multi_vector_ty>
+ : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty,
+ mnemonic, "vgx2"> {
bits<4> Zm;
bits<4> Zn;
bits<2> imm;
let Inst{20-17} = Zm;
let Inst{16} = 0b0;
let Inst{9-6} = Zn;
- let Inst{5} = 0b0;
+ let Inst{5} = op{2}; // fp8
let Inst{2} = 0b0;
let Inst{1-0} = imm;
}
-multiclass sme2_fp_mla_long_array_vg2_multi<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg2_multi<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
+multiclass sme2_fp_mla_long_array_vg2_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
+ RegisterOperand multi_vector_ty,
+ ValueType zpr_ty, SDPatternOperator intrinsic> {
+
+ def NAME : sme2_mla_long_array_vg2_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>,
+ SMEPseudo2Instr<NAME, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
+ def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
+ (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
+ uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
}
multiclass sme2_int_mla_long_array_vg2_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg2_multi<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
+ def _HtoS : sme2_mla_long_array_vg2_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZ_h_mul_r>,
+ SMEPseudo2Instr<NAME # _HtoS, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
+ def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
+ (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
}
-class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<2> op>
- : sme2_mla_long_array<op0, op, uimm2s2range, ZZZZ_h_mul_r, ZZZZ_h_mul_r, mnemonic,
- "vgx4"> {
+class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<3> op,
+ MatrixOperand matrix_ty,
+ RegisterOperand multi_vector_ty>
+ : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty,
+ mnemonic, "vgx4"> {
bits<3> Zm;
bits<3> Zn;
bits<2> imm;
@@ -1993,31 +2069,37 @@ class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<2> op>
let Inst{17} = 0b0;
let Inst{16} = 0b1;
let Inst{9-7} = Zn;
- let Inst{6-5} = 0b00;
+ let Inst{6} = 0b0;
+ let Inst{5} = op{2}; //fp8
let Inst{2} = 0b0;
let Inst{1-0} = imm;
}
-multiclass sme2_fp_mla_long_array_vg4_multi<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg4_multi<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
+multiclass sme2_fp_mla_long_array_vg4_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
+ RegisterOperand multi_vector_ty, ValueType zpr_ty,
+ SDPatternOperator intrinsic> {
+ def NAME : sme2_mla_long_array_vg4_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>,
+ SMEPseudo2Instr<NAME, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
+ def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
+ (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
+ uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
}
multiclass sme2_int_mla_long_array_vg4_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
- def _S : sme2_mla_long_array_vg4_multi<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
+ def _HtoS : sme2_mla_long_array_vg4_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZZZ_h_mul_r>,
+ SMEPseudo2Instr<NAME # _HtoS, 1>;
- def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
+ def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
- def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
+ def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
- (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
+ (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
}
//===----------------------------------------------------------------------===//
@@ -2097,15 +2179,16 @@ multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
mnemonic>;
}
-class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn),
+class sme2_cvt_vg2_single<string mnemonic, bits<5> op,
+ RegisterOperand first_ty, RegisterOperand second_ty>
+ : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<4> Zn;
bits<5> Zd;
let Inst{31-23} = 0b110000010;
- let Inst{22} = op{3};
- let Inst{21-18} = 0b1000;
- let Inst{17-16} = op{2-1};
+ let Inst{22} = op{4};
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = op{3-1};
let Inst{15-10} = 0b111000;
let Inst{9-6} = Zn;
let Inst{5} = op{0};
@@ -2114,12 +2197,17 @@ class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
// SME2 multi-vec FP down convert two registers
// SME2 multi-vec int down convert two registers
-multiclass sme2_cvt_vg2_single<string mnemonic, bits<4> op, ValueType out_vt,
+multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt,
ValueType in_vt, SDPatternOperator intrinsic> {
- def NAME : sme2_cvt_vg2_single<mnemonic, op>;
+ def NAME : sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>;
def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
}
+// SME2 multi-vec FP8 down convert two registers
+multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op> {
+ def NAME : sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>;
+}
+
class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
@@ -2148,7 +2236,13 @@ multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
}
-class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
+// SME2 multi-vec FP8 up convert two registers
+multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
+ def _NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>;
+}
+
+
+class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
@@ -2157,7 +2251,9 @@ class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
let Inst{31-24} = 0b11000001;
let Inst{23} = sz;
let Inst{22} = op{2};
- let Inst{21-10} = 0b110011111000;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op2;
+ let Inst{15-10} = 0b111000;
let Inst{9-7} = Zn;
let Inst{6-5} = op{1-0};
let Inst{4-0} = Zd;
@@ -2165,13 +2261,18 @@ class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
// SME2 multi-vec int down convert four registers
multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>;
- def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>;
+ def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>;
+ def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>;
def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
}
+//SME2 multi-vec FP8 down convert four registers
+multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N> {
+ def _NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>;
+}
+
class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
@@ -2307,8 +2408,7 @@ multiclass sme2_zip_vector_vg2<string mnemonic, bit op> {
//===----------------------------------------------------------------------===//
// SME2 Dot Products and MLA
-
-class sme2_multi_vec_array_vg2_index<bit sz, bits<6> op, MatrixOperand matrix_ty,
+class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ZPRRegOp vector_ty, Operand index_ty,
string mnemonic>
@@ -2321,8 +2421,8 @@ class sme2_multi_vec_array_vg2_index<bit sz, bits<6> op, MatrixOperand matrix_ty
bits<2> Rv;
bits<4> Zn;
bits<3> imm3;
- let Inst{31-23} = 0b110000010;
- let Inst{22} = sz;
+ let Inst{31-24} = 0b11000001;
+ let Inst{23-22} = sz;
let Inst{21-20} = 0b01;
let Inst{19-16} = Zm;
let Inst{15} = 0b0;
@@ -2336,16 +2436,15 @@ class sme2_multi_vec_array_vg2_index<bit sz, bits<6> op, MatrixOperand matrix_ty
}
// SME2 multi-vec ternary indexed two registers 32-bit
-multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<4> op,
+multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<4> op,
RegisterOperand multi_vector_ty,
ZPRRegOp vector_ty, ValueType vt,
SDPatternOperator intrinsic> {
- def NAME : sme2_multi_vec_array_vg2_index<0b1, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty,
+ def NAME : sme2_multi_vec_array_vg2_index<sz, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty,
VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<2> i;
let Inst{11-10} = i;
}
-
def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
@@ -2356,17 +2455,32 @@ multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<4> op,
}
// SME2.1 multi-vec ternary indexed two registers 16-bit
-multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> op> {
- def NAME : sme2_multi_vec_array_vg2_index<0b0, {0b1,?,?,op,?}, MatrixOp16,
- ZZ_h_mul_r, ZPR4b16,
+// SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers
+multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op,
+ RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> {
+ def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
+ multi_vector_ty, zpr_ty,
VectorIndexH, mnemonic> {
bits<3> i;
let Inst{11-10} = i{2-1};
let Inst{3} = i{0};
}
+
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
(!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
- ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>;
+ multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
+}
+
+// SME2 multi-vec indexed FP8 two-way vertical dot product to single precision
+// two registers
+class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T>
+ : sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32,
+ ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> {
+
+ bits<2> i;
+ let Inst{10} = i{1};
+ let Inst{3} = i{0};
+ let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}");
}
// SME2 multi-vec ternary indexed two registers 64-bit
@@ -2415,7 +2529,7 @@ multiclass sme2_multi_vec_array_vg2_index_64b<string mnemonic, bits<2> op,
multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
}
-class sme2_multi_vec_array_vg4_index<bit sz, bits<6> op, MatrixOperand matrix_ty,
+class sme2_multi_vec_array_vg4_index<bit sz, bits<7> op, MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ZPRRegOp vector_ty, Operand index_ty,
string mnemonic>
@@ -2434,10 +2548,9 @@ class sme2_multi_vec_array_vg4_index<bit sz, bits<6> op, MatrixOperand matrix_ty
let Inst{19-16} = Zm;
let Inst{15} = 0b1;
let Inst{14-13} = Rv;
- let Inst{12-10} = op{5-3};
+ let Inst{12-10} = op{6-4};
let Inst{9-7} = Zn;
- let Inst{6} = 0b0;
- let Inst{5-3} = op{2-0};
+ let Inst{6-3} = op{3-0};
let Inst{2-0} = imm3;
let Constraints = "$ZAda = $_ZAda";
@@ -2448,7 +2561,7 @@ multiclass sme2_multi_vec_array_vg4_index_32b<string mnemonic, bits<4> op,
RegisterOperand multi_vector_ty,
ZPRRegOp vector_ty, ValueType vt,
SDPatternOperator intrinsic> {
- def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty,
+ def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32, multi_vector_ty,
vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<2> i;
let Inst{11-10} = i;
@@ -2464,9 +2577,11 @@ multiclass sme2_multi_vec_array_vg4_index_32b<string mnemonic, bits<4> op,
}
// SME2.1 multi-vec ternary indexed four registers 16-bit
-multiclass sme2p1_multi_vec_array_vg4_index_16b<string mnemonic, bits<2> op> {
+multiclass sme2p1_multi_vec_array_vg4_index_16b<string mnemonic, bits<3> op,
+ RegisterOperand multi_vector_ty,
+ ZPRRegOp zpr_ty> {
def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16,
- ZZZZ_h_mul_r, ZPR4b16,
+ multi_vector_ty, zpr_ty,
VectorIndexH, mnemonic>{
bits<3> i;
let Inst{11-10} = i{2-1};
@@ -2475,7 +2590,7 @@ multiclass sme2p1_multi_vec_array_vg4_index_16b<string mnemonic, bits<2> op> {
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
(!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
- sme_elm_idx0_7:$imm3, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>;
+ sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
}
// SME2 multi-vec ternary indexed four registers 64-bit
@@ -2523,9 +2638,85 @@ multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
(!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
}
+
+// FMLAL (multiple and indexed vector, FP8 to FP16)
+class sme2_multi_vec_array_vg24_index_16b<bits<2> sz, bit vg4, bits<3> op,
+ RegisterOperand multi_vector_ty, string mnemonic>
+ : I<(outs MatrixOp16:$ZAda),
+ (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2,
+ multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
+ mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
+ "", []>, Sched<[]> {
+ bits<4> Zm;
+ bits<2> Rv;
+ bits<4> i;
+ bits<2> imm2;
+ let Inst{31-24} = 0b11000001;
+ let Inst{23-22} = sz;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = Zm;
+ let Inst{15} = vg4;
+ let Inst{14-13} = Rv;
+ let Inst{12} = op{2};
+ let Inst{11-10} = i{3-2};
+ let Inst{5-4} = op{1-0};
+ let Inst{3-2} = i{1-0};
+ let Inst{1-0} = imm2;
+
+ let Constraints = "$ZAda = $_ZAda";
+}
+
+multiclass sme2_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3>op> {
+ def NAME : sme2_multi_vec_array_vg24_index_16b<sz, 0b0, op, ZZ_b_mul_r, mnemonic> {
+ bits<4> Zn;
+ let Inst{9-6} = Zn;
+ }
+ def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
+ (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
+ uimm2s2range:$imm2, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
+}
+
+multiclass sme2_multi_vec_array_vg4_index_16b<string mnemonic, bits<2>sz, bits<3>op> {
+ def NAME: sme2_multi_vec_array_vg24_index_16b<sz, 0b1, op, ZZZZ_b_mul_r, mnemonic> {
+ bits<3> Zn;
+ let Inst{9-7} = Zn;
+ let Inst{6} = 0b0;
+ }
+ def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
+ (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
+ uimm2s2range:$imm2, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
+}
+
//===----------------------------------------------------------------------===//
+// SME2 multi-vec indexed long long MLA one source 16-bit
+class sme2_mla_ll_array_index_16b<string mnemonic, bits<2> sz,bits<2> op>
+ : I<(outs MatrixOp16:$ZAda),
+ (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
+ mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
+ "", []>, Sched<[]> {
+ bits<4> Zm;
+ bits<2> Rv;
+ bits<4> i;
+ bits<5> Zn;
+ bits<3> imm3;
+ let Inst{31-24} = 0b11000001;
+ let Inst{23-22} = sz;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = Zm;
+ let Inst{15} = i{3};
+ let Inst{14-13} = Rv;
+ let Inst{12} = op{1};
+ let Inst{11-10} = i{2-1};
+ let Inst{9-5} = Zn;
+ let Inst{4} = op{0};
+ let Inst{3} = i{0};
+ let Inst{2-0} = imm3;
+
+ let Constraints = "$ZAda = $_ZAda";
+}
+
// SME2 multi-vec indexed long long MLA one source 32-bit
-class sme2_mla_ll_array_index_32b<string mnemonic, bits<3> op>
+class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op>
: I<(outs MatrixOp32:$ZAda),
(ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
@@ -2535,7 +2726,9 @@ class sme2_mla_ll_array_index_32b<string mnemonic, bits<3> op>
bits<4> i;
bits<5> Zn;
bits<2> imm2;
- let Inst{31-20} = 0b110000010000;
+ let Inst{31-24} = 0b11000001;
+ let Inst{23-22} = sz;
+ let Inst{21-20} = 0b00;
let Inst{19-16} = Zm;
let Inst{15} = i{3};
let Inst{14-13} = Rv;
@@ -2547,8 +2740,8 @@ class sme2_mla_ll_array_index_32b<string mnemonic, bits<3> op>
let Constraints = "$ZAda = $_ZAda";
}
-multiclass sme2_mla_ll_array_index_32b<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def NAME : sme2_mla_ll_array_index_32b<mnemonic, op>, SMEPseudo2Instr<NAME, 1>;
+multiclass sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic> {
+ def NAME : sme2_mla_ll_array_index_32b<mnemonic, sz, op>, SMEPseudo2Instr<NAME, 1>;
def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR8, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
@@ -2589,7 +2782,7 @@ multiclass sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op, SDPatternOpe
def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s4>;
}
-class sme2_mla_ll_array_vg24_index_32b<bit vg4, bits<3> op,
+class sme2_mla_ll_array_vg24_index_32b<bits<2> sz, bit vg4, bits<3> op,
RegisterOperand vector_ty,
string mnemonic>
: I<(outs MatrixOp32:$ZAda),
@@ -2601,7 +2794,9 @@ class sme2_mla_ll_array_vg24_index_32b<bit vg4, bits<3> op,
bits<2> Rv;
bits<4> i;
bit imm;
- let Inst{31-20} = 0b110000010001;
+ let Inst{31-24} = 0b11000001;
+ let Inst{23-22} = sz;
+ let Inst{21-20} = 0b01;
let Inst{19-16} = Zm;
let Inst{15} = vg4;
let Inst{14-13} = Rv;
@@ -2616,8 +2811,8 @@ class sme2_mla_ll_array_vg24_index_32b<bit vg4, bits<3> op,
//SME2 multi-vec indexed long long MLA two sources 32-bit
-multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def NAME: sme2_mla_ll_array_vg24_index_32b<0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
+multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic> {
+ def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<4> Zn;
let Inst{9-6} = Zn;
}
@@ -2632,11 +2827,11 @@ multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<3> op, SDPatter
// SME2 multi-vec indexed long long MLA four sources 32-bit
-multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def NAME: sme2_mla_ll_array_vg24_index_32b<0b1, op, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
+multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<2> sz, bits<4> op, SDPatternOperator intrinsic> {
+ def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b1, op{2-0}, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<3> Zn;
let Inst{9-7} = Zn;
- let Inst{6} = 0b0;
+ let Inst{6} = op{3};
}
def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
@@ -2708,7 +2903,7 @@ multiclass sme2_mla_ll_array_vg4_index_64b<string mnemonic, bits<2> op, SDPatter
//SME2 multiple and single vector long long FMA one source
-class sme2_mla_ll_array_single<string mnemonic, bits<4> op,
+class sme2_mla_ll_array_single<string mnemonic, bits<5> op,
MatrixOperand matrix_ty, ZPRRegOp vector_ty,
ZPRRegOp zpr_ty>
: I<(outs matrix_ty:$ZAda),
@@ -2721,8 +2916,9 @@ class sme2_mla_ll_array_single<string mnemonic, bits<4> op,
bits<5> Zn;
bits<2> imm;
let Inst{31-23} = 0b110000010;
- let Inst{22} = op{3}; //sz
- let Inst{21-20} = 0b10;
+ let Inst{22} = op{4}; //sz
+ let Inst{21} = 0b1;
+ let Inst{20} = op{3}; //fp8
let Inst{19-16} = Zm;
let Inst{15} = 0b0;
let Inst{14-13} = Rv;
@@ -2734,7 +2930,7 @@ class sme2_mla_ll_array_single<string mnemonic, bits<4> op,
let Constraints = "$ZAda = $_ZAda";
}
-multiclass sme2_mla_ll_array_single<string mnemonic, bits<4> op,
+multiclass sme2_mla_ll_array_single<string mnemonic, bits<5> op,
MatrixOperand matrix_ty, ZPRRegOp vector_ty,
ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
def NAME : sme2_mla_ll_array_single<mnemonic, op, matrix_ty, vector_ty, zpr_ty>, SMEPseudo2Instr<NAME, 1>;
@@ -2744,29 +2940,28 @@ multiclass sme2_mla_ll_array_single<string mnemonic, bits<4> op,
def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME, intrinsic, uimm2s4range, zpr_ty, vt, tileslicerange2s4>;
}
-class sme2_mla_ll_array_vg24_single<bits<5> op, MatrixOperand matrix_ty,
+class sme2_mla_ll_array_vg24_single<bits<6> op, MatrixOperand matrix_ty,
RegisterOperand vector_ty, ZPRRegOp zpr_ty,
string mnemonic>
: I<(outs matrix_ty:$ZAda),
(ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
vector_ty:$Zn, zpr_ty:$Zm),
- mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{3}, "vgx4", "vgx2") # "], $Zn, $Zm",
+ mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{4}, "vgx4", "vgx2") # "], $Zn, $Zm",
"", []>, Sched<[]> {
bits<4> Zm;
bits<2> Rv;
bits<5> Zn;
bit imm;
let Inst{31-23} = 0b110000010;
- let Inst{22} = op{4}; //sz
+ let Inst{22} = op{5}; //sz
let Inst{21} = 0b1;
- let Inst{20} = op{3}; //vg4
+ let Inst{20} = op{4}; //vg4
let Inst{19-16} = Zm;
let Inst{15} = 0b0;
let Inst{14-13} = Rv;
let Inst{12-10} = 0b000;
let Inst{9-5} = Zn;
- let Inst{4-2} = op{2-0};
- let Inst{1} = 0b0;
+ let Inst{4-1} = op{3-0};
let Inst{0} = imm;
let Constraints = "$ZAda = $_ZAda";
@@ -2774,7 +2969,7 @@ class sme2_mla_ll_array_vg24_single<bits<5> op, MatrixOperand matrix_ty,
//SME2 single-multi long long MLA two and four sources
-multiclass sme2_mla_ll_array_vg24_single<string mnemonic, bits<5> op,
+multiclass sme2_mla_ll_array_vg24_single<string mnemonic, bits<6> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty> {
@@ -2792,7 +2987,7 @@ multiclass sme2_mla_ll_array_vg2_single<string mnemonic, bits<5> op,
RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
- defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, op, matrix_ty, multi_vector_ty, zpr_ty>;
+ defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, {op, 0b0}, matrix_ty, multi_vector_ty, zpr_ty>;
def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
}
@@ -2801,14 +2996,14 @@ multiclass sme2_mla_ll_array_vg4_single<string mnemonic, bits<5> op,
MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
- defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, op, matrix_ty, multi_vector_ty, zpr_ty>;
+ defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, {op, 0b0}, matrix_ty, multi_vector_ty, zpr_ty>;
def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
}
// SME2 multiple vectors long long MLA two sources
-class sme2_mla_ll_array_vg2_multi<bits<4> op, MatrixOperand matrix_ty,
+class sme2_mla_ll_array_vg2_multi<bits<5> op, MatrixOperand matrix_ty,
RegisterOperand vector_ty,string mnemonic>
: I<(outs matrix_ty:$ZAda),
(ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
@@ -2820,22 +3015,21 @@ class sme2_mla_ll_array_vg2_multi<bits<4> op, MatrixOperand matrix_ty,
bits<4> Zn;
bit imm;
let Inst{31-23} = 0b110000011;
- let Inst{22} = op{3}; // sz
+ let Inst{22} = op{4}; // sz
let Inst{21} = 0b1;
let Inst{20-17} = Zm;
let Inst{16-15} = 0b00;
let Inst{14-13} = Rv;
let Inst{12-10} = 0b000;
let Inst{9-6} = Zn;
- let Inst{5} = 0b0;
- let Inst{4-2} = op{2-0};
+ let Inst{5-2} = op{3-0};
let Inst{1} = 0b0;
let Inst{0} = imm;
let Constraints = "$ZAda = $_ZAda";
}
-multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<4> op,
+multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<5> op,
MatrixOperand matrix_ty,
RegisterOperand vector_ty,
ValueType vt, SDPatternOperator intrinsic> {
@@ -2851,7 +3045,7 @@ multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<4> op,
// SME2 multiple vectors long long MLA four sources
-class sme2_mla_ll_array_vg4_multi<bits<4> op,MatrixOperand matrix_ty,
+class sme2_mla_ll_array_vg4_multi<bits<5> op,MatrixOperand matrix_ty,
RegisterOperand vector_ty,
string mnemonic>
: I<(outs matrix_ty:$ZAda),
@@ -2864,22 +3058,22 @@ class sme2_mla_ll_array_vg4_multi<bits<4> op,MatrixOperand matrix_ty,
bits<3> Zn;
bit imm;
let Inst{31-23} = 0b110000011;
- let Inst{22} = op{3}; // sz
+ let Inst{22} = op{4}; // sz
let Inst{21} = 0b1;
let Inst{20-18} = Zm;
let Inst{17-15} = 0b010;
let Inst{14-13} = Rv;
let Inst{12-10} = 0b000;
let Inst{9-7} = Zn;
- let Inst{6-5} = 0b00;
- let Inst{4-2} = op{2-0};
+ let Inst{6} = 0b0;
+ let Inst{5-2} = op{3-0};
let Inst{1} = 0b0;
let Inst{0} = imm;
let Constraints = "$ZAda = $_ZAda";
}
-multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<4> op,
+multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<5> op,
MatrixOperand matrix_ty,
RegisterOperand vector_ty,
ValueType vt, SDPatternOperator intrinsic> {
@@ -2926,6 +3120,17 @@ class sme2_zero_zt<string mnemonic, bits<4> opc>
let Inst{3-0} = opc;
}
+multiclass sme2_zero_zt<string mnemonic, bits<4> opc> {
+ def NAME : sme2_zero_zt<mnemonic, opc>;
+ def NAME # _PSEUDO
+ : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> {
+ // Translated to actual instruction in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+ }
+ def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt)>;
+}
+
//===----------------------------------------------------------------------===//
// SME2 lookup table load/store
class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
@@ -2945,11 +3150,23 @@ class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
let mayStore = opc{7};
}
+
+multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> {
+ def NAME : sme2_spill_fill_vector<mnemonic, opc>;
+ def NAME # _PSEUDO
+ : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> {
+ // Translated to actual instruction in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+ }
+ def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base),
+ (!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>;
+}
+
//===----------------------------------------------------------------------===///
// SME2 move to/from lookup table
class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc>
: I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3),
- mnemonic, "\t$Rt, $ZTt$imm3",
+ mnemonic, "\t$Rt, $ZTt[$imm3]",
"", []>, Sched<[]> {
bits<3> imm3;
bits<5> Rt;
@@ -2961,7 +3178,7 @@ class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc>
class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc>
: I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt),
- mnemonic, "\t$ZTt$imm3, $Rt",
+ mnemonic, "\t$ZTt[$imm3], $Rt",
"", []>, Sched<[]> {
bits<3> imm3;
bits<5> Rt;
@@ -2971,6 +3188,25 @@ class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc>
let Inst{4-0} = Rt;
}
+// SME2 move vector to lookup table
+class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
+ : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt),
+ mnemonic, "\t$ZTt[$off2, mul vl], $Zt",
+ "", []>, Sched<[]> {
+ bits<5> Zt;
+ bits<2> off2;
+ let Inst{31-14} = 0b110000000100111100;
+ let Inst{13-12} = off2;
+ let Inst{11-5} = opc;
+ let Inst{4-0} = Zt;
+}
+
+multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> {
+ def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
+ def : InstAlias<mnemonic # "\t$ZTt, $Zt",
+ (!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
+}
+
//===----------------------------------------------------------------------===//
// SME2 lookup table expand one register
class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
@@ -2991,28 +3227,54 @@ class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
class sme2_luti2_vector_index<bits<2> sz, RegisterOperand vector_ty,
string mnemonic>
- : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB, mnemonic> {
+ : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB32b_timm, mnemonic> {
bits<4> i;
let Inst{17-14} = i;
}
-multiclass sme2_luti2_vector_index<string mnemonic> {
+multiclass sme2_luti2_vector_index<string mnemonic, SDPatternOperator intrinsic> {
def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>;
def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>;
def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>;
+
+ def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
+ def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
+ def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
+ def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
+ def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
+ def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
}
class sme2_luti4_vector_index<bits<2> sz, RegisterOperand vector_ty,
string mnemonic>
- : sme2_luti_vector_index<sz, {0,1,?,?,?,0,0}, vector_ty, VectorIndexH, mnemonic> {
+ : sme2_luti_vector_index<sz, {0,1,?,?,?,0,0}, vector_ty, VectorIndexH32b_timm, mnemonic> {
bits<3> i;
let Inst{16-14} = i;
}
-multiclass sme2_luti4_vector_index<string mnemonic> {
+multiclass sme2_luti4_vector_index<string mnemonic, SDPatternOperator intrinsic> {
def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>;
def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>;
def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>;
+
+ def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
+ def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
+ def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
+ def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
+ def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
+ def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
+ (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
}
// SME2 lookup table expand two contiguous registers
@@ -4625,3 +4887,36 @@ class sme2p1_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
multiclass sme2p1_luti4_vector_vg4_index<string mnemonic> {
def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>;
}
+
+// SME2 lookup table two source registers expand to four contiguous destination registers
+class sme2_luti4_vector_vg4<bits<2> sz, bits<2> op, string mnemonic>
+ : I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
+ mnemonic, "\t$Zd, $ZTt, $Zn",
+ "", []>, Sched<[]> {
+ bits<4> Zn;
+ bits<3> Zd;
+ let Inst{31-14} = 0b110000001000101100;
+ let Inst{13-12} = sz;
+ let Inst{11-10} = op;
+ let Inst{9-6} = Zn;
+ let Inst{5} = 0b0;
+ let Inst{4-2} = Zd;
+ let Inst{1-0} = 0b00;
+}
+
+// SME2 lookup table two source registers expand to four non-contiguous destination registers
+class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
+ : I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
+ mnemonic, "\t$Zd, $ZTt, $Zn",
+ "", []>, Sched<[]> {
+ bits<4> Zn;
+ bits<3> Zd;
+ let Inst{31-14} = 0b110000001001101100;
+ let Inst{13-12} = sz;
+ let Inst{11-10} = op;
+ let Inst{9-6} = Zn;
+ let Inst{5} = 0b0;
+ let Inst{4} = Zd{2};
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = Zd{1-0};
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 4902ec3639ec..9edf26052247 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2118,6 +2118,29 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let mayRaiseFPException = 1;
}
+multiclass sve2p1_bf_2op_p_zds<bits<4> opc, string asm, string Ps,
+ SDPatternOperator op, DestructiveInstTypeEnum flags,
+ string revname="", bit isReverseInstr=0> {
+let DestructiveInstType = flags in {
+ def NAME : sve_fp_2op_p_zds<0b00, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps, 1>, SVEInstr2Rev<NAME , revname , isReverseInstr>;
+ }
+
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
+}
+
+multiclass sve2p1_bf_bin_pred_zds<SDPatternOperator op> {
+ def _UNDEF : PredTwoOpPseudo<NAME, ZPR16, FalseLanesUndef>;
+
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Pseudo>(NAME # _UNDEF)>;
+}
+
+multiclass sve2p1_bf_2op_p_zds_zeroing<SDPatternOperator op> {
+ def _ZERO : PredTwoOpPseudo<NAME, ZPR16, FalseLanesZero>;
+
+ def : SVE_3_Op_Pat_SelZero<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Pseudo>(NAME # _ZERO)>;
+}
+
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm, string Ps,
SDPatternOperator op, DestructiveInstTypeEnum flags,
string revname="", bit isReverseInstr=0> {
@@ -2266,6 +2289,14 @@ multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve2p1_bf_3op_u_zd<bits<3> opc1, string asm, SDPatternOperator op,
+ SDPatternOperator predicated_op = null_frag> {
+ def NAME : sve_fp_3op_u_zd<0b00, opc1, asm, ZPR16>;
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
+
+ def : SVE_2_Op_Pred_All_Active<nxv8bf16, predicated_op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
+}
+
multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
@@ -2324,6 +2355,14 @@ multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm, string Ps,
def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve_fp_3op_p_zds_a_bf<bits<2> opc, string asm, string Ps,
+ SDPatternOperator op> {
+ def NAME : sve_fp_3op_p_zds_a<0b00, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps, 1>, SVEInstr2Rev<NAME, "", 0>;
+
+ def : SVE_4_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
+}
+
class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
ZPRRegOp zprty>
: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, zprty:$Za),
@@ -2391,7 +2430,7 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bits<2> opc, string asm,
let mayRaiseFPException = 1;
}
-multiclass sve2p1_fp_bfma_by_indexed_elem<string asm, bits<2> opc> {
+multiclass sve2p1_fp_bfma_by_indexed_elem<string asm, bits<2> opc, SDPatternOperator op> {
def NAME : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16,
VectorIndexH32b> {
bits<3> Zm;
@@ -2400,6 +2439,8 @@ multiclass sve2p1_fp_bfma_by_indexed_elem<string asm, bits<2> opc> {
let Inst{20-19} = iop{1-0};
let Inst{18-16} = Zm;
}
+ def : Pat<(nxv8bf16 (op nxv8bf16:$op1, nxv8bf16:$op2, nxv8bf16:$op3, (i32 VectorIndexH32b_timm:$idx))),
+ (!cast<Instruction>(NAME) $op1, $op2, $op3, VectorIndexH32b_timm:$idx)>;
}
multiclass sve_fp_fma_by_indexed_elem<bits<2> opc, string asm,
@@ -2456,7 +2497,7 @@ class sve_fp_fmul_by_indexed_elem<bits<2> sz, bit o2, string asm, ZPRRegOp zprty
let mayRaiseFPException = 1;
}
-multiclass sve2p1_fp_bfmul_by_indexed_elem<string asm> {
+multiclass sve2p1_fp_bfmul_by_indexed_elem<string asm, SDPatternOperator ir_intrinsic> {
def NAME : sve_fp_fmul_by_indexed_elem<{0, ?}, 0b1, asm, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
bits<3> iop;
@@ -2464,6 +2505,8 @@ multiclass sve2p1_fp_bfmul_by_indexed_elem<string asm> {
let Inst{20-19} = iop{1-0};
let Inst{18-16} = Zm;
}
+ def : Pat <(nxv8bf16 (ir_intrinsic nxv8bf16:$Op1, nxv8bf16:$Op2, (i32 VectorIndexH32b_timm:$idx))),
+ (!cast<Instruction>(NAME) $Op1, $Op2, VectorIndexH32b_timm:$idx)>;
}
multiclass sve_fp_fmul_by_indexed_elem<string asm, SDPatternOperator op> {
@@ -7275,29 +7318,18 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
let mayLoad = 1;
}
-multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
- RegisterOperand listty, ZPRRegOp zprty> {
- def _REAL : sve_mem_cld_si_base<dtype, nf, asm, listty>;
+multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty,
+ ZPRRegOp zprty> {
+ def "" : sve_mem_cld_si_base<dtype, 0, asm, listty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {
- def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4)>;
- }
+ (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
}
-multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty,
- ZPRRegOp zprty>
-: sve_mem_cld_si_base<dtype, 0, asm, listty, zprty>;
-
class sve_mem_cldnt_si_base<bits<2> msz, string asm, RegisterOperand VecList>
: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4),
asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
@@ -7516,8 +7548,23 @@ multiclass sve_mem_cldff_ss<bits<4> dtype, string asm, RegisterOperand listty,
}
multiclass sve_mem_cldnf_si<bits<4> dtype, string asm, RegisterOperand listty,
- ZPRRegOp zprty>
-: sve_mem_cld_si_base<dtype, 1, asm, listty, zprty>;
+ ZPRRegOp zprty> {
+ def _REAL : sve_mem_cld_si_base<dtype, 1, asm, listty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {
+ def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4)>;
+ }
+}
class sve_mem_eld_si<bits<2> sz, bits<3> nregs, RegisterOperand VecList,
string asm, Operand immtype>
@@ -8721,8 +8768,8 @@ multiclass sve2_crypto_unary_op<bit opc, string asm, SDPatternOperator op> {
// SVE BFloat16 Group
//===----------------------------------------------------------------------===//
-class sve_float_dot<bit bf, string asm>
-: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
+class sve_float_dot<bit bf, bit o2, ZPRRegOp dst_ty, ZPRRegOp src_ty, string asm>
+: I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, src_ty:$Zn, src_ty:$Zm),
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
@@ -8731,7 +8778,8 @@ class sve_float_dot<bit bf, string asm>
let Inst{22} = bf;
let Inst{21} = 0b1;
let Inst{20-16} = Zm;
- let Inst{15-10} = 0b100000;
+ let Inst{15-11} = 0b10000;
+ let Inst{10} = o2;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
@@ -8741,24 +8789,24 @@ class sve_float_dot<bit bf, string asm>
let mayRaiseFPException = 1;
}
-multiclass sve_float_dot<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
- def NAME : sve_float_dot<bf, asm>;
+multiclass sve_float_dot<bit bf, bit o2, ZPRRegOp dst_ty, ZPRRegOp src_ty,
+ string asm, ValueType InVT, SDPatternOperator op> {
+ def NAME : sve_float_dot<bf, o2, dst_ty, src_ty, asm>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, InVT, InVT, !cast<Instruction>(NAME)>;
}
-class sve_float_dot_indexed<bit bf, string asm>
-: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$iop),
+class sve_float_dot_indexed<bit bf, ZPRRegOp dst_ty, ZPRRegOp src1_ty,
+ ZPRRegOp src2_ty, Operand iop_ty, string asm>
+: I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, src1_ty:$Zn, src2_ty:$Zm, iop_ty:$iop),
asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<3> Zm;
- bits<2> iop;
let Inst{31-23} = 0b011001000;
let Inst{22} = bf;
let Inst{21} = 0b1;
- let Inst{20-19} = iop;
let Inst{18-16} = Zm;
- let Inst{15-10} = 0b010000;
+ let Inst{15-12} = 0b0100;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
@@ -8768,8 +8816,14 @@ class sve_float_dot_indexed<bit bf, string asm>
let mayRaiseFPException = 1;
}
-multiclass sve_float_dot_indexed<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
- def NAME : sve_float_dot_indexed<bf, asm>;
+multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
+ ZPRRegOp src2_ty, string asm, ValueType InVT,
+ SDPatternOperator op> {
+ def NAME : sve_float_dot_indexed<bf, ZPR32, src1_ty, src2_ty, VectorIndexS32b, asm> {
+ bits<2> iop;
+ let Inst{20-19} = iop;
+ let Inst{11-10} = opc;
+ }
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
}
@@ -9093,6 +9147,12 @@ multiclass sve_fp_3op_pred_hfd<SDPatternOperator op> {
def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D_UNDEF)>;
}
+multiclass sve_fp_3op_pred_bf<SDPatternOperator op> {
+ def _UNDEF : PredThreeOpPseudo<NAME, ZPR16, FalseLanesUndef>;
+
+ def : SVE_4_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _UNDEF)>;
+}
+
// Predicated pseudo integer two operand instructions.
multiclass sve_int_bin_pred_bhsd<SDPatternOperator op> {
def _B_UNDEF : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
@@ -9178,6 +9238,11 @@ multiclass sve2p1_fclamp<string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve2p1_bfclamp<string asm, SDPatternOperator op> {
+ def NAME : sve2p1_fclamp<asm, 0b00, ZPR16>;
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
+}
+
// SVE two-way dot product
class sve2p1_two_way_dot_vv<string mnemonic, bit u>
: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
@@ -9242,6 +9307,7 @@ class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty, SDPatte
let Inst{2-0} = PNd;
let hasSideEffects = 0;
+ let isReMaterializable = 1;
}
@@ -9386,6 +9452,12 @@ class sve2p1_mem_cld_ss_2z<string mnemonic, bits<2> msz, bit n,
let mayLoad = 1;
}
+multiclass sve2p1_mem_cld_ss_2z<string mnemonic, bits<2> msz, bit n,
+ RegisterOperand vector_ty, RegisterOperand gpr_ty, RegisterOperand vector_pseudo_ty> {
+ def NAME # _PSEUDO : Pseudo<(outs vector_pseudo_ty:$Zt), (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), []>;
+ def NAME : sve2p1_mem_cld_ss_2z<mnemonic, msz, n, vector_ty, gpr_ty>;
+}
+
// SME2 multi-vec contiguous load (scalar plus immediate, two registers)
class sve2p1_mem_cld_si_2z<string mnemonic, bits<2> msz, bit n,
RegisterOperand vector_ty>
@@ -9411,11 +9483,11 @@ class sve2p1_mem_cld_si_2z<string mnemonic, bits<2> msz, bit n,
}
multiclass sve2p1_mem_cld_si_2z<string mnemonic, bits<2> msz, bit n,
- RegisterOperand vector_ty> {
+ RegisterOperand vector_ty, RegisterOperand vector_pseudo_ty> {
def NAME : sve2p1_mem_cld_si_2z<mnemonic, msz, n, vector_ty>;
-
def : InstAlias<mnemonic # " $Zt, $PNg/z, [$Rn]",
(!cast<Instruction>(NAME) vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
+ def NAME # _PSEUDO : Pseudo<(outs vector_pseudo_ty:$Zt), (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, simm4s2:$imm4), []>;
}
// SME2 multi-vec contiguous load (scalar plus scalar, four registers)
@@ -9443,6 +9515,12 @@ class sve2p1_mem_cld_ss_4z<string mnemonic, bits<2> msz, bit n,
let mayLoad = 1;
}
+multiclass sve2p1_mem_cld_ss_4z<string mnemonic, bits<2> msz, bit n,
+ RegisterOperand vector_ty, RegisterOperand gpr_ty, RegisterOperand vector_pseudo_ty> {
+ def NAME # _PSEUDO : Pseudo<(outs vector_pseudo_ty:$Zt), (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), []>;
+ def NAME : sve2p1_mem_cld_ss_4z<mnemonic, msz, n, vector_ty, gpr_ty>;
+}
+
// SME2 multi-vec contiguous load (scalar plus immediate, four registers)
class sve2p1_mem_cld_si_4z<string mnemonic, bits<2> msz, bit n,
RegisterOperand vector_ty>
@@ -9469,14 +9547,13 @@ class sve2p1_mem_cld_si_4z<string mnemonic, bits<2> msz, bit n,
}
multiclass sve2p1_mem_cld_si_4z<string mnemonic, bits<2> msz, bit n,
- RegisterOperand vector_ty> {
+ RegisterOperand vector_ty, RegisterOperand vector_pseudo_ty> {
def NAME : sve2p1_mem_cld_si_4z<mnemonic, msz, n, vector_ty>;
-
def : InstAlias<mnemonic # " $Zt, $PNg/z, [$Rn]",
(!cast<Instruction>(NAME) vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
+ def NAME # _PSEUDO : Pseudo<(outs vector_pseudo_ty:$Zt), (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, simm4s4:$imm4), []>;
}
-
// SME2 multi-vec contiguous store (scalar plus scalar, two registers)
class sve2p1_mem_cst_ss_2z<string mnemonic, bits<2> msz, bit n,
RegisterOperand vector_ty, RegisterOperand gpr_ty>
@@ -9728,11 +9805,30 @@ class sve_mem_128b_gld_64_unscaled<string mnemonic>
}
-multiclass sve_mem_128b_gld_64_unscaled<string mnemonic> {
+multiclass sve_mem_128b_gld_64_unscaled<string mnemonic, SDPatternOperator op> {
def NAME : sve_mem_128b_gld_64_unscaled<mnemonic>;
def : InstAlias<mnemonic # " $Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;
+
+
+ def : Pat<(nxv2i64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv2i64)),
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(nxv4i32 (op (nxv4i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv4i32)),
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(nxv8i16 (op (nxv8i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8i16)),
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(nxv16i8 (op (nxv16i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv16i8)),
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
+
+ def : Pat<(nxv2f64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv2f64)),
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(nxv4f32 (op (nxv4i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv4f32)),
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(nxv8f16 (op (nxv8i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8f16)),
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(nxv8bf16 (op (nxv8i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8bf16)),
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
}
class sve_mem_sst_128b_64_unscaled<string mnemonic>
@@ -9755,11 +9851,29 @@ class sve_mem_sst_128b_64_unscaled<string mnemonic>
}
-multiclass sve_mem_sst_128b_64_unscaled<string mnemonic> {
+multiclass sve_mem_sst_128b_64_unscaled<string mnemonic, SDPatternOperator op> {
def NAME : sve_mem_sst_128b_64_unscaled<mnemonic>;
def : InstAlias<mnemonic # " $Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;
+
+ def : Pat<(op (nxv2i64 Z_q:$Zt), (nxv2i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv2i64),
+ (!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(op (nxv4i32 Z_q:$Zt), (nxv4i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv4i32),
+ (!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(op (nxv8i16 Z_q:$Zt), (nxv8i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8i16),
+ (!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$gp,ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(op (nxv16i8 Z_q:$Zt), (nxv16i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv16i8),
+ (!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>;
+
+ def : Pat<(op (nxv2f64 Z_q:$Zt), (nxv2i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv2f64),
+ (!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(op (nxv4f32 Z_q:$Zt), (nxv4i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv4f32),
+ (!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(op (nxv8f16 Z_q:$Zt), (nxv8i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8f16),
+ (!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>;
+ def : Pat<(op (nxv8bf16 Z_q:$Zt), (nxv8i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8bf16),
+ (!cast<Instruction>(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>;
}
@@ -9849,10 +9963,14 @@ class sve2p1_fp_reduction_q<bits<2> sz, bits<3> opc, string mnemonic,
let mayRaiseFPException = 1;
}
-multiclass sve2p1_fp_reduction_q<bits<3> opc, string mnemonic> {
+multiclass sve2p1_fp_reduction_q<bits<3> opc, string mnemonic, SDPatternOperator op> {
def _H : sve2p1_fp_reduction_q<0b01, opc, mnemonic, ZPR16, "8h">;
def _S : sve2p1_fp_reduction_q<0b10, opc, mnemonic, ZPR32, "4s">;
def _D : sve2p1_fp_reduction_q<0b11, opc, mnemonic, ZPR64, "2d">;
+
+ def : SVE_2_Op_Pat<v8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<v4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<v2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
@@ -9894,7 +10012,7 @@ multiclass sve2p1_dupq<string mnemonic> {
// SVE Permute Vector - Quadwords (EXTQ)
class sve2p1_extq<string mnemonic>
- : I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, imm0_15:$imm4),
+ : I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, timm32_0_15:$imm4),
mnemonic, "\t$Zdn, $_Zdn, $Zm, $imm4",
"", []>, Sched<[]> {
bits<5> Zdn;
@@ -9912,6 +10030,19 @@ class sve2p1_extq<string mnemonic>
let hasSideEffects = 0;
}
+multiclass sve2p1_extq<string mnemonic, SDPatternOperator Op> {
+ def NAME : sve2p1_extq<mnemonic>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, Op, nxv16i8, nxv16i8, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, Op, nxv8i16, nxv8i16, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, Op, nxv4i32, nxv4i32, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, Op, nxv2i64, nxv2i64, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+
+ def : SVE_3_Op_Imm_Pat<nxv8f16, Op, nxv8f16, nxv8f16, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv4f32, Op, nxv4f32, nxv4f32, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv2f64, Op, nxv2f64, nxv2f64, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv8bf16, Op, nxv8bf16, nxv8bf16, i32, timm32_0_15, !cast<Instruction>(NAME
+)>;
+}
// SVE move predicate from vector
class sve2p1_vector_to_pred<bits<4> opc, string mnemonic,
@@ -9933,8 +10064,8 @@ class sve2p1_vector_to_pred<bits<4> opc, string mnemonic,
let hasSideEffects = 0;
}
-multiclass sve2p1_vector_to_pred<string mnemonic> {
- def _B : sve2p1_vector_to_pred<{0, 0, 0, 1}, mnemonic, PPR8, VectorIndex0>;
+multiclass sve2p1_vector_to_pred<string mnemonic, SDPatternOperator Op_lane, SDPatternOperator Op> {
+ def _B : sve2p1_vector_to_pred<{0, 0, 0, 1}, mnemonic, PPR8, VectorIndex032b>;
def _H : sve2p1_vector_to_pred<{0, 0, 1, ?}, mnemonic, PPR16, VectorIndexD32b> {
bits<1> index;
let Inst{17} = index;
@@ -9951,6 +10082,25 @@ multiclass sve2p1_vector_to_pred<string mnemonic> {
def : InstAlias<mnemonic # "\t$Pd, $Zn",
(!cast<Instruction>(NAME # _B) PPR8:$Pd, ZPRAny:$Zn, 0), 1>;
+
+ // any_lane
+ def : Pat<(nxv16i1 (Op_lane (nxv16i8 ZPRAny:$Zn), (i32 timm32_0_0:$Idx))),
+ (!cast<Instruction>(NAME # _B) ZPRAny:$Zn, timm32_0_0:$Idx)>;
+ def : Pat<(nxv8i1 (Op_lane (nxv8i16 ZPRAny:$Zn), (i32 timm32_0_1:$Idx))),
+ (!cast<Instruction>(NAME # _H) ZPRAny:$Zn, timm32_0_1:$Idx)>;
+ def : Pat<(nxv4i1 (Op_lane (nxv4i32 ZPRAny:$Zn), (i32 timm32_0_3:$Idx))),
+ (!cast<Instruction>(NAME # _S) ZPRAny:$Zn, timm32_0_3:$Idx)>;
+ def : Pat<(nxv2i1 (Op_lane (nxv2i64 ZPRAny:$Zn), (i32 timm32_0_7:$Idx))),
+ (!cast<Instruction>(NAME # _D) ZPRAny:$Zn, timm32_0_7:$Idx)>;
+ // lane_0
+ def : Pat<(nxv16i1 (Op (nxv16i8 ZPRAny:$Zn))),
+ (!cast<Instruction>(NAME # _B) ZPRAny:$Zn, 0)>;
+ def : Pat<(nxv8i1 (Op (nxv8i16 ZPRAny:$Zn))),
+ (!cast<Instruction>(NAME # _H) ZPRAny:$Zn, 0)>;
+ def : Pat<(nxv4i1 (Op (nxv4i32 ZPRAny:$Zn))),
+ (!cast<Instruction>(NAME # _S) ZPRAny:$Zn, 0)>;
+ def : Pat<(nxv2i1 (Op (nxv2i64 ZPRAny:$Zn))),
+ (!cast<Instruction>(NAME # _D) ZPRAny:$Zn, 0)>;
}
@@ -9974,7 +10124,8 @@ class sve2p1_pred_to_vector<bits<4> opc, string mnemonic,
let hasSideEffects = 0;
}
-multiclass sve2p1_pred_to_vector<string mnemonic> {
+multiclass sve2p1_pred_to_vector<string mnemonic, SDPatternOperator MergeOp,
+ SDPatternOperator ZeroOp> {
def _B : sve2p1_pred_to_vector<{0, 0, 0, 1}, mnemonic, PPR8, VectorIndex0>;
def _H : sve2p1_pred_to_vector<{0, 0, 1, ?}, mnemonic, PPR16, VectorIndexD32b> {
bits<1> index;
@@ -9992,6 +10143,24 @@ multiclass sve2p1_pred_to_vector<string mnemonic> {
def : InstAlias<mnemonic # "\t$Zd, $Pn",
(!cast<Instruction>(NAME # _B) ZPRAny:$Zd, 0, PPR8:$Pn), 1>;
+
+ // Merge
+ def : Pat<(nxv8i16 (MergeOp (nxv8i16 ZPRAny:$Zd), (nxv8i1 PPR16:$Pn), (i32 timm32_1_1:$Idx))),
+ (!cast<Instruction>(NAME # _H) ZPRAny:$Zd, timm32_1_1:$Idx, PPR16:$Pn)>;
+ def : Pat<(nxv4i32 (MergeOp (nxv4i32 ZPRAny:$Zd), (nxv4i1 PPR32:$Pn), (i32 timm32_1_3:$Idx))),
+ (!cast<Instruction>(NAME # _S) ZPRAny:$Zd, timm32_1_3:$Idx, PPR32:$Pn)>;
+ def : Pat<(nxv2i64 (MergeOp (nxv2i64 ZPRAny:$Zd), (nxv2i1 PPR64:$Pn), (i32 timm32_1_7:$Idx))),
+ (!cast<Instruction>(NAME # _D) ZPRAny:$Zd, timm32_1_7:$Idx, PPR64:$Pn)>;
+
+ // Zero
+ def : Pat<(nxv16i8 (ZeroOp (nxv16i1 PPR8:$Pn))),
+ (!cast<Instruction>(NAME # _B) (IMPLICIT_DEF), 0, PPR8:$Pn)>;
+ def : Pat<(nxv8i16 (ZeroOp (nxv8i1 PPR16:$Pn))),
+ (!cast<Instruction>(NAME # _H) (IMPLICIT_DEF), 0, PPR16:$Pn)>;
+ def : Pat<(nxv4i32 (ZeroOp (nxv4i1 PPR32:$Pn))),
+ (!cast<Instruction>(NAME # _S) (IMPLICIT_DEF), 0, PPR32:$Pn)>;
+ def : Pat<(nxv2i64 (ZeroOp (nxv2i1 PPR64:$Pn))),
+ (!cast<Instruction>(NAME # _D) (IMPLICIT_DEF), 0, PPR64:$Pn)>;
}
@@ -10018,11 +10187,16 @@ class sve2p1_int_reduce_q<bits<2> sz, bits<4> opc, string mnemonic,
let hasSideEffects = 0;
}
-multiclass sve2p1_int_reduce_q<bits<4> opc, string mnemonic> {
+multiclass sve2p1_int_reduce_q<bits<4> opc, string mnemonic, SDPatternOperator op> {
def _B : sve2p1_int_reduce_q<0b00, opc, mnemonic, ZPR8, "16b">;
def _H : sve2p1_int_reduce_q<0b01, opc, mnemonic, ZPR16, "8h">;
def _S : sve2p1_int_reduce_q<0b10, opc, mnemonic, ZPR32, "4s">;
def _D : sve2p1_int_reduce_q<0b11, opc, mnemonic, ZPR64, "2d">;
+
+ def : SVE_2_Op_Pat<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
@@ -10047,16 +10221,215 @@ class sve2p1_permute_vec_elems_q<bits<2> sz, bits<3> opc, string mnemonic,
let hasSideEffects = 0;
}
-multiclass sve2p1_permute_vec_elems_q<bits<3> opc, string mnemonic> {
+multiclass sve2p1_permute_vec_elems_q<bits<3> opc, string mnemonic,
+ SDPatternOperator op> {
def _B : sve2p1_permute_vec_elems_q<0b00, opc, mnemonic, ZPR8, ZPR8>;
def _H : sve2p1_permute_vec_elems_q<0b01, opc, mnemonic, ZPR16, ZPR16>;
def _S : sve2p1_permute_vec_elems_q<0b10, opc, mnemonic, ZPR32, ZPR32>;
def _D : sve2p1_permute_vec_elems_q<0b11, opc, mnemonic, ZPR64, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
-multiclass sve2p1_tblq<string mnemonic> {
+multiclass sve2p1_tblq<string mnemonic, SDPatternOperator op> {
def _B : sve2p1_permute_vec_elems_q<0b00, 0b110, mnemonic, ZPR8, Z_b>;
def _H : sve2p1_permute_vec_elems_q<0b01, 0b110, mnemonic, ZPR16, Z_h>;
def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>;
def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 FP8 Instructions
+//===----------------------------------------------------------------------===//
+
+// FP8 upconvert
+class sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic,
+ ZPRRegOp dst_ty, ZPRRegOp src_ty>
+ : I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
+ mnemonic, "\t$Zd, $Zn",
+ "", []>, Sched<[]>{
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-17} = 0b011001010000100;
+ let Inst{16} = L;
+ let Inst{15-12} = 0b0011;
+ let Inst{11-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic> {
+ def _BtoH : sve2_fp8_cvt_single<L, opc, mnemonic, ZPR16, ZPR8>;
+}
+
+// FP8 downconvert
+class sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic,
+ ZPRRegOp dst_ty, RegisterOperand src_ty>
+ : I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
+ mnemonic, "\t$Zd, $Zn",
+ "", []>, Sched<[]>{
+ bits<5> Zd;
+ bits<4> Zn;
+ let Inst{31-12} = 0b01100101000010100011;
+ let Inst{11-10} = opc;
+ let Inst{9-6} = Zn;
+ let Inst{5} = 0b0;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src> {
+ def NAME : sve2_fp8_down_cvt_single<opc, mnemonic, ZPR8, src>;
+}
+
+// FP8 Widening Multiply-Add Long - Indexed Group
+class sve2_fp8_mla_long_by_indexed_elem<bit T, string mnemonic>
+ : I<(outs ZPR16:$Zda),
+ (ins ZPR16:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexB:$imm4),
+ mnemonic, "\t$Zda, $Zn, $Zm$imm4",
+ "", []>, Sched<[]>{
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<3> Zm;
+ bits<4> imm4;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23} = T;
+ let Inst{22-21} = 0b01;
+ let Inst{20-19} = imm4{3-2};
+ let Inst{18-16} = Zm;
+ let Inst{15-12} = 0b0101;
+ let Inst{11-10} = imm4{1-0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR16.ElementSize;
+}
+
+// FP8 Widening Multiply-Add (Long)/(Long Long) Group
+class sve2_fp8_mla<bits<3>opc, ZPRRegOp dst_ty, string mnemonic>
+ : I<(outs dst_ty:$Zda),
+ (ins dst_ty:$_Zda, ZPR8:$Zn, ZPR8:$Zm),
+ mnemonic, "\t$Zda, $Zn, $Zm",
+ "", []>, Sched<[]>{
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23} = opc{2};
+ let Inst{22-21} = 0b01;
+ let Inst{20-16} = Zm;
+ let Inst{15-14} = 0b10;
+ let Inst{13-12} = opc{1-0};
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = dst_ty.ElementSize;
+}
+
+// FP8 Widening Multiply-Add Long Long - Indexed Group
+class sve2_fp8_mla_long_long_by_indexed_elem<bits<2> TT, string mnemonic>
+ : I<(outs ZPR32:$Zda),
+ (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexB:$imm4),
+ mnemonic, "\t$Zda, $Zn, $Zm$imm4",
+ "", []>, Sched<[]>{
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<3> Zm;
+ bits<4> imm4;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23-22} = TT;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = imm4{3-2};
+ let Inst{18-16} = Zm;
+ let Inst{15-12} = 0b1100;
+ let Inst{11-10} = imm4{1-0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR32.ElementSize;
+}
+
+// FP8 Widening Dot-Product - Indexed Group
+multiclass sve2_fp8_dot_indexed<string mnemonic>{
+ def NAME : sve_float_dot_indexed<0b0, ZPR16, ZPR8, ZPR3b8, VectorIndexH, mnemonic> {
+ bits<3> iop;
+ let Inst{20-19} = iop{2-1};
+ let Inst{11} = iop{0};
+ let Inst{10} = 0b1;
+ }
+}
+
+// FP8 Look up table
+class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty,
+ Operand idx_ty, bits<4>opc, string mnemonic>
+ : I<(outs zd_ty:$Zd), (ins zn_ty:$Zn, ZPRAny:$Zm, idx_ty:$idx),
+ mnemonic, "\t$Zd, $Zn, $Zm$idx",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{22} = opc{3};
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = opc{2-0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+// FP8 Look up table read with 2-bit indices
+multiclass sve2_luti2_vector_index<string mnemonic> {
+ def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexS32b, {?, 0b100}, mnemonic> {
+ bits<2> idx;
+ let Inst{23-22} = idx;
+ }
+ def _H : sve2_lut_vector_index<ZPR16, Z_h, VectorIndexH32b, {?,?,0b10}, mnemonic> {
+ bits<3> idx;
+ let Inst{23-22} = idx{2-1};
+ let Inst{12} = idx{0};
+ }
+}
+
+// FP8 Look up table read with 4-bit indices
+multiclass sve2_luti4_vector_index<string mnemonic> {
+ def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexD32b, 0b1001, mnemonic> {
+ bit idx;
+ let Inst{23} = idx;
+ }
+ def _H : sve2_lut_vector_index<ZPR16, Z_h, VectorIndexS32b, {?, 0b111}, mnemonic> {
+ bits<2> idx;
+ let Inst{23-22} = idx;
+ }
+}
+
+// FP8 Look up table read with 4-bit indices (two contiguous registers)
+multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
+ def _H : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexS32b, {?, 0b101}, mnemonic> {
+ bits<2> idx;
+ let Inst{23-22} = idx;
+ }
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index c5a6cb7af405..880ff8498b87 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -325,10 +325,7 @@ bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) {
IRBuilder<> Builder(I->getContext());
Builder.SetInsertPoint(I);
- auto *PtrBitCast = Builder.CreateBitCast(
- Store->getPointerOperand(),
- PredType->getPointerTo(Store->getPointerAddressSpace()));
- Builder.CreateStore(BitCast->getOperand(0), PtrBitCast);
+ Builder.CreateStore(BitCast->getOperand(0), Store->getPointerOperand());
Store->eraseFromParent();
if (IntrI->getNumUses() == 0)
@@ -385,10 +382,7 @@ bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) {
IRBuilder<> Builder(I->getContext());
Builder.SetInsertPoint(Load);
- auto *PtrBitCast = Builder.CreateBitCast(
- Load->getPointerOperand(),
- PredType->getPointerTo(Load->getPointerAddressSpace()));
- auto *LoadPred = Builder.CreateLoad(PredType, PtrBitCast);
+ auto *LoadPred = Builder.CreateLoad(PredType, Load->getPointerOperand());
BitCast->replaceAllUsesWith(LoadPred);
BitCast->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index a43b1cf0dd0e..976e72e8aae4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -830,6 +830,7 @@ inline static StringRef AArch64PACKeyIDToString(AArch64PACKey::ID KeyID) {
case AArch64PACKey::DB:
return StringRef("db");
}
+ llvm_unreachable("Unhandled AArch64PACKey::ID enum");
}
/// Return numeric key ID for 2-letter identifier string.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 0edb7cb98640..0082b4017986 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -20,16 +20,30 @@ void SMEAttrs::set(unsigned M, bool Enable) {
assert(!(hasStreamingInterface() && hasStreamingCompatibleInterface()) &&
"SM_Enabled and SM_Compatible are mutually exclusive");
- assert(!(hasNewZAInterface() && hasSharedZAInterface()) &&
+ assert(!(hasNewZABody() && hasSharedZAInterface()) &&
"ZA_New and ZA_Shared are mutually exclusive");
- assert(!(hasNewZAInterface() && preservesZA()) &&
+ assert(!(hasNewZABody() && preservesZA()) &&
"ZA_New and ZA_Preserved are mutually exclusive");
+ assert(!(hasNewZABody() && (Bitmask & ZA_NoLazySave)) &&
+ "ZA_New and ZA_NoLazySave are mutually exclusive");
+ assert(!(hasSharedZAInterface() && (Bitmask & ZA_NoLazySave)) &&
+ "ZA_Shared and ZA_NoLazySave are mutually exclusive");
}
SMEAttrs::SMEAttrs(const CallBase &CB) {
*this = SMEAttrs(CB.getAttributes());
- if (auto *F = CB.getCalledFunction())
- set(SMEAttrs(*F).Bitmask);
+ if (auto *F = CB.getCalledFunction()) {
+ set(SMEAttrs(*F).Bitmask | SMEAttrs(F->getName()).Bitmask);
+ }
+}
+
+SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) {
+ if (FuncName == "__arm_tpidr2_save" || FuncName == "__arm_sme_state")
+ Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Preserved |
+ SMEAttrs::ZA_NoLazySave);
+ if (FuncName == "__arm_tpidr2_restore")
+ Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared |
+ SMEAttrs::ZA_NoLazySave);
}
SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index 1146fd4e3fa8..e766b778b541 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -35,6 +35,7 @@ public:
ZA_Shared = 1 << 3, // aarch64_pstate_sm_shared
ZA_New = 1 << 4, // aarch64_pstate_sm_new
ZA_Preserved = 1 << 5, // aarch64_pstate_sm_preserved
+ ZA_NoLazySave = 1 << 6, // Used for SME ABI routines to avoid lazy saves
All = ZA_Preserved - 1
};
@@ -42,6 +43,7 @@ public:
SMEAttrs(const Function &F) : SMEAttrs(F.getAttributes()) {}
SMEAttrs(const CallBase &CB);
SMEAttrs(const AttributeList &L);
+ SMEAttrs(StringRef FuncName);
void set(unsigned M, bool Enable = true);
@@ -73,16 +75,16 @@ public:
bool BodyOverridesInterface = false) const;
// Interfaces to query PSTATE.ZA
- bool hasNewZAInterface() const { return Bitmask & ZA_New; }
+ bool hasNewZABody() const { return Bitmask & ZA_New; }
bool hasSharedZAInterface() const { return Bitmask & ZA_Shared; }
bool hasPrivateZAInterface() const { return !hasSharedZAInterface(); }
bool preservesZA() const { return Bitmask & ZA_Preserved; }
bool hasZAState() const {
- return hasNewZAInterface() || hasSharedZAInterface();
+ return hasNewZABody() || hasSharedZAInterface();
}
bool requiresLazySave(const SMEAttrs &Callee) const {
return hasZAState() && Callee.hasPrivateZAInterface() &&
- !Callee.preservesZA();
+ !(Callee.Bitmask & ZA_NoLazySave);
}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
index c25194c02f72..35d33cb60bc4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -12,10 +12,12 @@
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Support/CodeGen.h"
namespace llvm {
+class AMDGPUTargetMachine;
class TargetMachine;
// GlobalISel passes
@@ -34,6 +36,7 @@ FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSIPeepholeSDWAPass();
FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILoadStoreOptimizerPass();
FunctionPass *createSIWholeQuadModePass();
@@ -41,25 +44,32 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
FunctionPass *createSIOptimizeVGPRLiveRangePass();
FunctionPass *createSIFixSGPRCopiesPass();
+FunctionPass *createLowerWWMCopiesPass();
FunctionPass *createSIMemoryLegalizerPass();
FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createSIPreAllocateWWMRegsPass();
FunctionPass *createSIFormMemoryClausesPass();
FunctionPass *createSIPostRABundlerPass();
-FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
-FunctionPass *createAMDGPUUseNativeCallsPass();
+FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPULateCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
FunctionPass *createAMDGPURewriteOutArgumentsPass();
-ModulePass *createAMDGPULowerModuleLDSPass();
+ModulePass *
+createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
FunctionPass *createSIModeRegisterPass();
FunctionPass *createGCNPreRAOptimizationsPass();
struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
- AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {}
+ AMDGPUSimplifyLibCallsPass() {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+struct AMDGPUImageIntrinsicOptimizerPass
+ : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
+ AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
private:
@@ -78,8 +88,8 @@ extern char &AMDGPUMachineCFGStructurizerID;
void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
Pass *createAMDGPUAnnotateKernelFeaturesPass();
-Pass *createAMDGPUAttributorPass();
-void initializeAMDGPUAttributorPass(PassRegistry &);
+Pass *createAMDGPUAttributorLegacyPass();
+void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
@@ -116,10 +126,13 @@ struct AMDGPULowerKernelAttributesPass
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-void initializeAMDGPULowerModuleLDSPass(PassRegistry &);
-extern char &AMDGPULowerModuleLDSID;
+void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &);
+extern char &AMDGPULowerModuleLDSLegacyPassID;
struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
+ const AMDGPUTargetMachine &TM;
+ AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
@@ -144,9 +157,15 @@ extern char &SIFixSGPRCopiesID;
void initializeSIFixVGPRCopiesPass(PassRegistry &);
extern char &SIFixVGPRCopiesID;
+void initializeSILowerWWMCopiesPass(PassRegistry &);
+extern char &SILowerWWMCopiesID;
+
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
+void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
+extern char &AMDGPUGlobalISelDivergenceLoweringID;
+
void initializeSILowerSGPRSpillsPass(PassRegistry &);
extern char &SILowerSGPRSpillsID;
@@ -171,15 +190,15 @@ extern char &SIOptimizeExecMaskingID;
void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
extern char &SIPreAllocateWWMRegsID;
-void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &);
-extern char &AMDGPUSimplifyLibCallsID;
-
-void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
-extern char &AMDGPUUseNativeCallsID;
+void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
+extern char &AMDGPUImageIntrinsicOptimizerID;
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
extern char &AMDGPUPerfHintAnalysisID;
+void initializeGCNRegPressurePrinterPass(PassRegistry &);
+extern char &GCNRegPressurePrinterID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
@@ -217,8 +236,7 @@ private:
};
Pass *createAMDGPUStructurizeCFGPass();
-FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
@@ -239,6 +257,25 @@ public:
PreservedAnalyses run(Function &, FunctionAnalysisManager &);
};
+class AMDGPULowerKernelArgumentsPass
+ : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
+private:
+ TargetMachine &TM;
+
+public:
+ AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){};
+ PreservedAnalyses run(Function &, FunctionAnalysisManager &);
+};
+
+class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
+private:
+ TargetMachine &TM;
+
+public:
+ AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
FunctionPass *createAMDGPUAnnotateUniformValues();
ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -279,9 +316,16 @@ extern char &AMDGPURemoveIncompatibleFunctionsID;
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
extern char &AMDGPULateCodeGenPrepareID;
-FunctionPass *createAMDGPURewriteUndefForPHIPass();
-void initializeAMDGPURewriteUndefForPHIPass(PassRegistry &);
-extern char &AMDGPURewriteUndefForPHIPassID;
+FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
+void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
+extern char &AMDGPURewriteUndefForPHILegacyPassID;
+
+class AMDGPURewriteUndefForPHIPass
+ : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
+public:
+ AMDGPURewriteUndefForPHIPass() = default;
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
void initializeSIAnnotateControlFlowPass(PassRegistry&);
extern char &SIAnnotateControlFlowPassID;
@@ -295,6 +339,9 @@ extern char &SIModeRegisterID;
void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
extern char &AMDGPUInsertDelayAluID;
+void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
+extern char &AMDGPUInsertSingleUseVDSTID;
+
void initializeSIInsertHardClausesPass(PassRegistry &);
extern char &SIInsertHardClausesID;
@@ -347,72 +394,6 @@ enum TargetIndex {
TI_SCRATCH_RSRC_DWORD2,
TI_SCRATCH_RSRC_DWORD3
};
-}
-
-/// OpenCL uses address spaces to differentiate between
-/// various memory regions on the hardware. On the CPU
-/// all of the address spaces point to the same memory,
-/// however on the GPU, each address space points to
-/// a separate piece of memory that is unique from other
-/// memory locations.
-namespace AMDGPUAS {
-enum : unsigned {
- // The maximum value for flat, generic, local, private, constant and region.
- MAX_AMDGPU_ADDRESS = 8,
-
- FLAT_ADDRESS = 0, ///< Address space for flat memory.
- GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
-
- CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
- LOCAL_ADDRESS = 3, ///< Address space for local memory.
- PRIVATE_ADDRESS = 5, ///< Address space for private memory.
-
- CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
-
- BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
- ///< Not used in backend.
-
- BUFFER_RESOURCE = 8, ///< Address space for 128-bit buffer resources.
-
- /// Internal address spaces. Can be freely renumbered.
- STREAMOUT_REGISTER = 128, ///< Address space for GS NGG Streamout registers.
- /// end Internal address spaces.
-
- /// Address space for direct addressable parameter memory (CONST0).
- PARAM_D_ADDRESS = 6,
- /// Address space for indirect addressable parameter memory (VTX1).
- PARAM_I_ADDRESS = 7,
-
- // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
- // this order to be able to dynamically index a constant buffer, for
- // example:
- //
- // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
-
- CONSTANT_BUFFER_0 = 8,
- CONSTANT_BUFFER_1 = 9,
- CONSTANT_BUFFER_2 = 10,
- CONSTANT_BUFFER_3 = 11,
- CONSTANT_BUFFER_4 = 12,
- CONSTANT_BUFFER_5 = 13,
- CONSTANT_BUFFER_6 = 14,
- CONSTANT_BUFFER_7 = 15,
- CONSTANT_BUFFER_8 = 16,
- CONSTANT_BUFFER_9 = 17,
- CONSTANT_BUFFER_10 = 18,
- CONSTANT_BUFFER_11 = 19,
- CONSTANT_BUFFER_12 = 20,
- CONSTANT_BUFFER_13 = 21,
- CONSTANT_BUFFER_14 = 22,
- CONSTANT_BUFFER_15 = 23,
-
- // Some places use this if the address space can't be determined.
- UNKNOWN_ADDRESS_SPACE = ~0u,
-};
-}
-
-namespace AMDGPU {
// FIXME: Missing constant_32bit
inline bool isFlatGlobalAddrSpace(unsigned AS) {
@@ -429,24 +410,25 @@ inline bool isExtendedGlobalAddrSpace(unsigned AS) {
}
static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
- static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 8, "Addr space out of range");
+ static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
return true;
- // This array is indexed by address space value enum elements 0 ... to 8
+ // This array is indexed by address space value enum elements 0 ... to 9
// clang-format off
- static const bool ASAliasRules[9][9] = {
- /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc */
- /* Flat */ {true, true, false, true, true, true, true, true, true},
- /* Global */ {true, true, false, false, true, false, true, true, true},
- /* Region */ {false, false, true, false, false, false, false, false, false},
- /* Group */ {true, false, false, true, false, false, false, false, false},
- /* Constant */ {true, true, false, false, false, false, true, true, true},
- /* Private */ {true, false, false, false, false, true, false, false, false},
- /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true},
- /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true},
- /* Buffer Resource */ {true, true, false, false, true, false, true, true, true},
+ static const bool ASAliasRules[10][10] = {
+ /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
+ /* Flat */ {true, true, false, true, true, true, true, true, true, true},
+ /* Global */ {true, true, false, false, true, false, true, true, true, true},
+ /* Region */ {false, false, true, false, false, false, false, false, false, false},
+ /* Group */ {true, false, false, true, false, false, false, false, false, false},
+ /* Constant */ {true, true, false, false, false, false, true, true, true, true},
+ /* Private */ {true, false, false, false, false, true, false, false, false, false},
+ /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true},
+ /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true},
+ /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true},
+ /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true},
};
// clang-format on
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
index b178623a319d..060fb66d38f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -281,6 +281,12 @@ def FeatureMADIntraFwdBug : SubtargetFeature<"mad-intra-fwd-bug",
"MAD_U64/I64 intra instruction forwarding bug"
>;
+def FeatureMSAALoadDstSelBug : SubtargetFeature<"msaa-load-dst-sel-bug",
+ "HasMSAALoadDstSelBug",
+ "true",
+ "MSAA loads not honoring dst_sel bug"
+>;
+
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
@@ -339,6 +345,12 @@ def FeatureGFX11Insts : SubtargetFeature<"gfx11-insts",
"Additional instructions for GFX11+"
>;
+def FeatureGFX12Insts : SubtargetFeature<"gfx12-insts",
+ "GFX12Insts",
+ "true",
+ "Additional instructions for GFX12+"
+>;
+
def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts",
"GFX10_3Insts",
"true",
@@ -375,6 +387,12 @@ def FeatureTrue16BitInsts : SubtargetFeature<"true16",
"True 16-bit operand instructions"
>;
+def FeatureRealTrue16Insts : SubtargetFeature<"real-true16",
+ "EnableRealTrue16Insts",
+ "true",
+ "Use true 16-bit registers"
+>;
+
def FeatureVOP3P : SubtargetFeature<"vop3p",
"HasVOP3PInsts",
"true",
@@ -393,6 +411,12 @@ def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
"Has VGPR mode register indexing"
>;
+def FeatureScalarDwordx3Loads : SubtargetFeature<"scalar-dwordx3-loads",
+ "HasScalarDwordx3Loads",
+ "true",
+ "Has 96-bit scalar load instructions"
+>;
+
def FeatureScalarStores : SubtargetFeature<"scalar-stores",
"HasScalarStores",
"true",
@@ -454,10 +478,16 @@ def FeatureDPP8 : SubtargetFeature<"dpp8",
"Support DPP8 (Data Parallel Primitives) extension"
>;
-def Feature64BitDPP : SubtargetFeature<"dpp-64bit",
- "Has64BitDPP",
+def FeatureDPALU_DPP : SubtargetFeature<"dpp-64bit",
+ "HasDPALU_DPP",
"true",
- "Support DPP (Data Parallel Primitives) extension"
+ "Support DPP (Data Parallel Primitives) extension in DP ALU"
+>;
+
+def FeatureDPPSrc1SGPR : SubtargetFeature<"dpp-src1-sgpr",
+ "HasDPPSrc1SGPR",
+ "true",
+ "Support SGPR for Src1 of DPP instructions"
>;
def FeaturePackedFP32Ops : SubtargetFeature<"packed-fp32-ops",
@@ -675,6 +705,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf
[FeatureFlatGlobalInsts]
>;
+def FeatureAtomicCSubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts",
+ "HasAtomicCSubNoRtnInsts",
+ "true",
+ "Has buffer_atomic_csub and global_atomic_csub instructions that don't "
+ "return original value"
+>;
+
def FeatureFlatAtomicFaddF32Inst
: SubtargetFeature<"flat-atomic-fadd-f32-inst",
"HasFlatAtomicFaddF32Inst",
@@ -773,6 +810,30 @@ def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1",
"Has SC0 and SC1 on stores"
>;
+def FeatureSALUFloatInsts : SubtargetFeature<"salu-float",
+ "HasSALUFloatInsts",
+ "true",
+ "Has SALU floating point instructions"
+>;
+
+def FeatureVGPRSingleUseHintInsts : SubtargetFeature<"vgpr-singleuse-hint",
+ "HasVGPRSingleUseHintInsts",
+ "true",
+ "Has single-use VGPR hint instructions"
+>;
+
+def FeaturePseudoScalarTrans : SubtargetFeature<"pseudo-scalar-trans",
+ "HasPseudoScalarTrans",
+ "true",
+ "Has Pseudo Scalar Transcendental instructions"
+>;
+
+def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
+ "HasRestrictedSOffset",
+ "true",
+ "Has restricted SOffset (immediate not supported)."
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -872,6 +933,12 @@ def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
"Requires use of fract on arguments to trig instructions"
>;
+def FeatureKernargPreload : SubtargetFeature <"kernarg-preload",
+ "KernargPreload",
+ "true",
+ "Hardware supports preloading of kernel arguments in user SGPRs."
+>;
+
// Alignment enforcement is controlled by a configuration register:
// SH_MEM_CONFIG.alignment_mode
def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
@@ -899,6 +966,18 @@ def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs",
"Enable the architected SGPRs"
>;
+def FeatureGDS : SubtargetFeature<"gds",
+ "HasGDS",
+ "true",
+ "Has Global Data Share"
+>;
+
+def FeatureGWS : SubtargetFeature<"gws",
+ "HasGWS",
+ "true",
+ "Has Global Wave Sync"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -917,7 +996,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
- FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts
+ FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
+ FeatureGDS, FeatureGWS
]
>;
@@ -928,7 +1008,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
- FeatureImageInsts
+ FeatureImageInsts, FeatureGDS, FeatureGWS
]
>;
@@ -943,7 +1023,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
- FeatureUnalignedBufferAccess, FeatureImageInsts
+ FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS
]
>;
@@ -961,7 +1041,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
- FeatureNegativeScratchOffsetBug
+ FeatureNegativeScratchOffsetBug, FeatureGWS
]
>;
@@ -980,7 +1060,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
- FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts
+ FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts,
+ FeatureGDS, FeatureGWS
]
>;
@@ -999,7 +1080,28 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureA16, FeatureFastDenormalF32, FeatureG16,
- FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
+ FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
+ FeatureGWS
+ ]
+>;
+
+def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
+ "gfx12",
+ [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
+ FeatureFlatAddressSpace, Feature16BitInsts,
+ FeatureInv2PiInlineImm, FeatureApertureRegs,
+ FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts,
+ FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts,
+ FeatureGFX11Insts, FeatureGFX12Insts, FeatureVOP3P, FeatureVOPD,
+ FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
+ FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
+ FeatureAddNoCarryInsts, FeatureFmaMixInsts,
+ FeatureNoSdstCMPX, FeatureVscnt,
+ FeatureVOP3Literal, FeatureDPP8,
+ FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
+ FeatureA16, FeatureFastDenormalF32, FeatureG16,
+ FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
+ FeatureGWS, FeatureTrue16BitInsts
]
>;
@@ -1104,28 +1206,32 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
def FeatureISAVersion9_0_0 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureMadMixInsts,
+ [FeatureGDS,
+ FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_2 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureMadMixInsts,
+ [FeatureGDS,
+ FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_4 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureDsSrc2Insts,
+ [FeatureGDS,
+ FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureFmaMixInsts,
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_6 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [HalfRate64Ops,
+ [FeatureGDS,
+ HalfRate64Ops,
FeatureFmaMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
@@ -1139,7 +1245,8 @@ def FeatureISAVersion9_0_6 : FeatureSet<
def FeatureISAVersion9_0_8 : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
- [HalfRate64Ops,
+ [FeatureGDS,
+ HalfRate64Ops,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureAtomicBufferGlobalPkAddF16NoRtnInsts,
@@ -1148,7 +1255,8 @@ def FeatureISAVersion9_0_8 : FeatureSet<
def FeatureISAVersion9_0_9 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureMadMixInsts,
+ [FeatureGDS,
+ FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageInsts,
@@ -1158,17 +1266,19 @@ def FeatureISAVersion9_0_A : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
[FeatureGFX90AInsts,
FeatureFmacF64Inst,
- Feature64BitDPP,
+ FeatureDPALU_DPP,
FeaturePackedFP32Ops,
FeatureAtomicFaddRtnInsts,
FeatureAtomicBufferGlobalPkAddF16Insts,
FeaturePackedTID,
FullRate64Ops,
- FeatureBackOffBarrier])>;
+ FeatureBackOffBarrier,
+ FeatureKernargPreload])>;
def FeatureISAVersion9_0_C : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureMadMixInsts,
+ [FeatureGDS,
+ FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageGather4D16Bug])>;
@@ -1191,7 +1301,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureDot10Insts,
FeatureAtomicDsPkAdd16Insts,
FeatureAtomicFlatPkAdd16Insts,
- Feature64BitDPP,
+ FeatureDPALU_DPP,
FeaturePackedFP32Ops,
FeatureMAIInsts,
FeatureFP8Insts,
@@ -1205,7 +1315,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeaturePackedTID,
FeatureArchitectedFlatScratch,
FullRate64Ops,
- FeatureBackOffBarrier]>;
+ FeatureBackOffBarrier,
+ FeatureKernargPreload]>;
def FeatureISAVersion9_4_0 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
@@ -1313,7 +1424,8 @@ def FeatureISAVersion11_Common : FeatureSet<
def FeatureISAVersion11_0_Common : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
- [FeatureVALUTransUseHazard])>;
+ [FeatureMSAALoadDstSelBug,
+ FeatureVALUTransUseHazard])>;
def FeatureISAVersion11_0_0 : FeatureSet<
!listconcat(FeatureISAVersion11_0_Common.Features,
@@ -1334,11 +1446,44 @@ def FeatureISAVersion11_0_3 : FeatureSet<
def FeatureISAVersion11_5_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
- [])>;
+ [FeatureSALUFloatInsts,
+ FeatureDPPSrc1SGPR,
+ FeatureVGPRSingleUseHintInsts])>;
def FeatureISAVersion11_5_1 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
- [FeatureGFX11FullVGPRs])>;
+ [FeatureSALUFloatInsts,
+ FeatureDPPSrc1SGPR,
+ FeatureVGPRSingleUseHintInsts,
+ FeatureGFX11FullVGPRs])>;
+
+def FeatureISAVersion12 : FeatureSet<
+ [FeatureGFX12,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot5Insts,
+ FeatureDot7Insts,
+ FeatureDot8Insts,
+ FeatureDot9Insts,
+ FeatureDot10Insts,
+ FeatureNSAEncoding,
+ FeaturePartialNSAEncoding,
+ FeatureWavefrontSize32,
+ FeatureShaderCyclesRegister,
+ FeatureArchitectedFlatScratch,
+ FeatureAtomicFaddRtnInsts,
+ FeatureAtomicFaddNoRtnInsts,
+ FeatureFlatAtomicFaddF32Inst,
+ FeatureImageInsts,
+ FeatureExtendedImageInsts,
+ FeaturePackedTID,
+ FeatureVcmpxPermlaneHazard,
+ FeatureSALUFloatInsts,
+ FeaturePseudoScalarTrans,
+ FeatureHasRestrictedSOffset,
+ FeatureVGPRSingleUseHintInsts,
+ FeatureMADIntraFwdBug,
+ FeatureScalarDwordx3Loads]>;
//===----------------------------------------------------------------------===//
@@ -1494,6 +1639,10 @@ def isGFX6GFX7GFX8GFX9GFX10 :
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
AssemblerPredicate<(all_of (not FeatureGFX11Insts))>;
+def isNotGFX12Plus :
+ Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::GFX11">,
+ AssemblerPredicate<(all_of (not FeatureGFX12Insts))>;
+
def isGFX7GFX8GFX9GFX10 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
@@ -1501,6 +1650,13 @@ def isGFX7GFX8GFX9GFX10 :
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
AssemblerPredicate<(all_of FeatureCIInsts, (not FeatureGFX11Insts))>;
+def isGFX8GFX9GFX10GFX11 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">,
+ AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX12Insts))>;
+
def isGFX7Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
AssemblerPredicate<(all_of FeatureCIInsts)>;
@@ -1573,6 +1729,11 @@ def isGFX10Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
AssemblerPredicate<(all_of FeatureGFX10Insts)>;
+def isGFX10GFX11 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">,
+ AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX12Insts))>;
+
def isGFX10Before1030 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 &&"
"!Subtarget->hasGFX10_3Insts()">,
@@ -1591,12 +1752,20 @@ def isGFX8GFX9GFX10 :
def isGFX11Only :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">,
- AssemblerPredicate<(all_of FeatureGFX11Insts)>;
+ AssemblerPredicate<(all_of FeatureGFX11Insts, (not FeatureGFX12Insts))>;
def isGFX11Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">,
AssemblerPredicate<(all_of FeatureGFX11Insts)>;
+def isGFX12Only :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12">,
+ AssemblerPredicate<(all_of FeatureGFX12Insts)>;
+
+def isGFX12Plus :
+ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">,
+ AssemblerPredicate<(all_of FeatureGFX12Insts)>;
+
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
@@ -1625,6 +1794,11 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>;
+def HasRestrictedSOffset : Predicate<"Subtarget->hasRestrictedSOffset()">,
+ AssemblerPredicate<(all_of FeatureHasRestrictedSOffset)>;
+def HasUnrestrictedSOffset : Predicate<"!Subtarget->hasRestrictedSOffset()">,
+ AssemblerPredicate<(all_of (not FeatureHasRestrictedSOffset))>;
+
def D16PreservesUnusedBits :
Predicate<"Subtarget->d16PreservesUnusedBits()">,
AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>;
@@ -1650,6 +1824,15 @@ def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">;
+// Control use of True16 instructions. The real True16 instructions are
+// True16 instructions as they are defined in the ISA. Fake True16
+// instructions have the same encoding as real ones but syntactically
+// only allow 32-bit registers in operands and use low halves thereof.
+def UseRealTrue16Insts : Predicate<"Subtarget->useRealTrue16Insts()">,
+ AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>;
+def UseFakeTrue16Insts : Predicate<"Subtarget->hasTrue16BitInsts() && "
+ "!Subtarget->useRealTrue16Insts()">;
+
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<(all_of FeatureVOP3P)>;
@@ -1677,12 +1860,15 @@ def HasDPP : Predicate<"Subtarget->hasDPP()">,
def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>;
-def Has64BitDPP : Predicate<"Subtarget->has64BitDPP()">,
- AssemblerPredicate<(all_of Feature64BitDPP)>;
+def HasDPALU_DPP : Predicate<"Subtarget->hasDPALU_DPP()">,
+ AssemblerPredicate<(all_of FeatureDPALU_DPP)>;
def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">,
AssemblerPredicate<(all_of FeaturePackedFP32Ops)>;
+def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">,
+ AssemblerPredicate<(all_of FeatureGFX90AInsts)>;
+
def HasFmaakFmamkF32Insts :
Predicate<"Subtarget->hasFmaakFmamkF32Insts()">,
AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>;
@@ -1836,6 +2022,26 @@ def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">;
def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
+def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
+ AssemblerPredicate<(all_of FeatureSALUFloatInsts)>;
+
+def HasVGPRSingleUseHintInsts : Predicate<"Subtarget->hasVGPRSingleUseHintInsts()">,
+ AssemblerPredicate<(all_of FeatureVGPRSingleUseHintInsts)>;
+
+def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
+ AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;
+
+def HasGDS : Predicate<"Subtarget->hasGDS()">;
+
+def HasGWS : Predicate<"Subtarget->hasGWS()">;
+
+def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
+def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
+
+def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">;
+
+def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">;
+
// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 63942414bf3c..8d3eac686831 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -93,8 +93,7 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
}
}
- // Forward the query to the next alias analysis.
- return AAResultBase::alias(LocA, LocB, AAQI, nullptr);
+ return AliasResult::MayAlias;
}
ModRefInfo AMDGPUAAResult::getModRefInfoMask(const MemoryLocation &Loc,
@@ -111,5 +110,5 @@ ModRefInfo AMDGPUAAResult::getModRefInfoMask(const MemoryLocation &Loc,
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+ return ModRefInfo::ModRef;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index 1c18cbd855fc..de25f9241a50 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -60,6 +60,7 @@ bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
return false;
}
+// TODO: Print preload kernargs?
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
for (const auto &FI : ArgInfoMap) {
OS << "Arguments for " << FI.first->getName() << '\n'
@@ -148,7 +149,7 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
llvm_unreachable("unexpected preloaded value type");
}
-constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
+AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
AMDGPUFunctionArgInfo AI;
AI.PrivateSegmentBuffer
= ArgDescriptor::createRegister(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index f595e469f998..42b33c50d9f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/Pass.h"
@@ -37,22 +38,19 @@ private:
bool IsSet : 1;
public:
- constexpr ArgDescriptor(unsigned Val = 0, unsigned Mask = ~0u,
- bool IsStack = false, bool IsSet = false)
- : Reg(Val), Mask(Mask), IsStack(IsStack), IsSet(IsSet) {}
+ ArgDescriptor(unsigned Val = 0, unsigned Mask = ~0u, bool IsStack = false,
+ bool IsSet = false)
+ : Reg(Val), Mask(Mask), IsStack(IsStack), IsSet(IsSet) {}
- static constexpr ArgDescriptor createRegister(Register Reg,
- unsigned Mask = ~0u) {
+ static ArgDescriptor createRegister(Register Reg, unsigned Mask = ~0u) {
return ArgDescriptor(Reg, Mask, false, true);
}
- static constexpr ArgDescriptor createStack(unsigned Offset,
- unsigned Mask = ~0u) {
+ static ArgDescriptor createStack(unsigned Offset, unsigned Mask = ~0u) {
return ArgDescriptor(Offset, Mask, true, true);
}
- static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg,
- unsigned Mask) {
+ static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) {
return ArgDescriptor(Arg.Reg, Mask, Arg.IsStack, Arg.IsSet);
}
@@ -94,7 +92,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
return OS;
}
+struct KernArgPreloadDescriptor : public ArgDescriptor {
+ KernArgPreloadDescriptor() {}
+ SmallVector<MCRegister> Regs;
+};
+
struct AMDGPUFunctionArgInfo {
+ // clang-format off
enum PreloadedValue {
// SGPRS:
PRIVATE_SEGMENT_BUFFER = 0,
@@ -117,6 +121,7 @@ struct AMDGPUFunctionArgInfo {
WORKITEM_ID_Z = 19,
FIRST_VGPR_VALUE = WORKITEM_ID_X
};
+ // clang-format on
// Kernel input registers setup for the HSA ABI in allocation order.
@@ -151,10 +156,13 @@ struct AMDGPUFunctionArgInfo {
ArgDescriptor WorkItemIDY;
ArgDescriptor WorkItemIDZ;
+ // Map the index of preloaded kernel arguments to its descriptor.
+ SmallDenseMap<int, KernArgPreloadDescriptor> PreloadKernArgs{};
+
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
getPreloadedValue(PreloadedValue Value) const;
- static constexpr AMDGPUFunctionArgInfo fixedABILayout();
+ static AMDGPUFunctionArgInfo fixedABILayout();
};
class AMDGPUArgumentUsageInfo : public ImmutablePass {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 7cd8e53e6521..d317a733d433 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -121,26 +121,13 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
- if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3)
- getTargetStreamer()->EmitDirectiveAMDGCNTarget();
+ getTargetStreamer()->EmitDirectiveAMDGCNTarget();
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
getTargetStreamer()->getPALMetadata()->readFromIR(M);
-
- if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3)
- return;
-
- // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
- if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
- getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
-
- // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
- IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
- getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2(
- Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
@@ -148,8 +135,7 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
if (!IsTargetStreamerInitialized)
initTargetStreamer(M);
- if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
- CodeObjectVersion == AMDGPU::AMDHSA_COV2)
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
getTargetStreamer()->EmitISAVersion();
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
@@ -162,20 +148,6 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
}
}
-bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
- const MachineBasicBlock *MBB) const {
- if (!AsmPrinter::isBlockOnlyReachableByFallthrough(MBB))
- return false;
-
- if (MBB->empty())
- return true;
-
- // If this is a block implementing a long branch, an expression relative to
- // the start of the block is needed. to the start of the block.
- // XXX - Is there a smarter way to check this?
- return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
-}
-
void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
@@ -209,7 +181,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (!MFI.isEntryFunction())
return;
- if ((STM.isMesaKernel(F) || CodeObjectVersion == AMDGPU::AMDHSA_COV2) &&
+ if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
amd_kernel_code_t KernelCode;
@@ -219,6 +191,11 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (STM.isAmdHsaOS())
HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
+
+ if (MFI.getNumKernargPreloadedSGPRs() > 0) {
+ assert(AMDGPU::hasKernargPreload(STM));
+ getTargetStreamer()->EmitKernargPreloadHeader(*getGlobalSTI());
+ }
}
void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
@@ -226,8 +203,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
if (!MFI.isEntryFunction())
return;
- if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
- CodeObjectVersion == AMDGPU::AMDHSA_COV2)
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
auto &Streamer = getTargetStreamer()->getStreamer();
@@ -260,9 +236,23 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
Streamer.popSection();
}
+void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
+ Register RegNo = MI->getOperand(0).getReg();
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "implicit-def: "
+ << printReg(RegNo, MF->getSubtarget().getRegisterInfo());
+
+ if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL)
+ OS << " : SGPR spill to VGPR lane";
+
+ OutStreamer->AddComment(OS.str());
+ OutStreamer->addBlankLine();
+}
+
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
- if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
- CodeObjectVersion >= AMDGPU::AMDHSA_COV3) {
+ if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
AsmPrinter::emitFunctionEntryLabel();
return;
}
@@ -337,12 +327,6 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
switch (CodeObjectVersion) {
- case AMDGPU::AMDHSA_COV2:
- HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2());
- break;
- case AMDGPU::AMDHSA_COV3:
- HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3());
- break;
case AMDGPU::AMDHSA_COV4:
HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4());
break;
@@ -393,28 +377,29 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
uint16_t KernelCodeProperties = 0;
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();
- if (MFI.hasPrivateSegmentBuffer()) {
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
- if (MFI.hasDispatchPtr()) {
+ if (UserSGPRInfo.hasDispatchPtr()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
}
- if (MFI.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {
+ if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
}
- if (MFI.hasKernargSegmentPtr()) {
+ if (UserSGPRInfo.hasKernargSegmentPtr()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
}
- if (MFI.hasDispatchID()) {
+ if (UserSGPRInfo.hasDispatchID()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
}
- if (MFI.hasFlatScratchInit()) {
+ if (UserSGPRInfo.hasFlatScratchInit()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
@@ -435,12 +420,13 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
const SIProgramInfo &PI) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
amdhsa::kernel_descriptor_t KernelDescriptor;
memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
assert(isUInt<32>(PI.ScratchSize));
- assert(isUInt<32>(PI.getComputePGMRSrc1()));
+ assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
assert(isUInt<32>(PI.getComputePGMRSrc2()));
KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
@@ -449,7 +435,7 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
Align MaxKernArgAlign;
KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
- KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
+ KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
@@ -458,6 +444,10 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
KernelDescriptor.compute_pgm_rsrc3 =
CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+ if (AMDGPU::hasKernargPreload(STM))
+ KernelDescriptor.kernarg_preload =
+ static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+
return KernelDescriptor;
}
@@ -949,6 +939,17 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
ProgInfo.NumSGPRsForWavesPerEU,
ProgInfo.NumVGPRsForWavesPerEU);
+ const auto [MinWEU, MaxWEU] =
+ AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
+ if (ProgInfo.Occupancy < MinWEU) {
+ DiagnosticInfoOptimizationFailure Diag(
+ F, F.getSubprogram(),
+ "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
+ "'" +
+ F.getName() + "': desired occupancy was " + Twine(MinWEU) +
+ ", final occupancy is " + Twine(ProgInfo.Occupancy));
+ F.getContext().diagnose(Diag);
+ }
}
static unsigned getRsrcReg(CallingConv::ID CallConv) {
@@ -973,7 +974,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
OutStreamer->emitInt32(R_00B848_COMPUTE_PGM_RSRC1);
- OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
+ OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1(STM));
OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);
OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc2());
@@ -1037,7 +1038,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
if (MD->getPALMajorVersion() < 3) {
- MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
+ MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM));
if (AMDGPU::isCompute(CC)) {
MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2());
} else {
@@ -1113,17 +1114,19 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
auto *MD = getTargetStreamer()->getPALMetadata();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- MD->setFunctionScratchSize(MF, MFI.getStackSize());
+ StringRef FnName = MF.getFunction().getName();
+ MD->setFunctionScratchSize(FnName, MFI.getStackSize());
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
// Set compute registers
MD->setRsrc1(CallingConv::AMDGPU_CS,
- CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
+ CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.getComputePGMRSrc2());
// Set optional info
- MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
- MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
- MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
+ MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
+ MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+ MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
}
// This is supposed to be log2(Size)
@@ -1153,7 +1156,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
Out.compute_pgm_resource_registers =
- CurrentProgramInfo.getComputePGMRSrc1() |
+ CurrentProgramInfo.getComputePGMRSrc1(STM) |
(CurrentProgramInfo.getComputePGMRSrc2() << 32);
Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
@@ -1164,27 +1167,28 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
- if (MFI->hasPrivateSegmentBuffer()) {
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
- if (MFI->hasDispatchPtr())
+ if (UserSGPRInfo.hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
- if (MFI->hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
+ if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
- if (MFI->hasKernargSegmentPtr())
+ if (UserSGPRInfo.hasKernargSegmentPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
- if (MFI->hasDispatchID())
+ if (UserSGPRInfo.hasDispatchID())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
- if (MFI->hasFlatScratchInit())
+ if (UserSGPRInfo.hasFlatScratchInit())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
- if (MFI->hasDispatchPtr())
+ if (UserSGPRInfo.hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
@@ -1293,6 +1297,9 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks(
EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
CurrentProgramInfo.ScratchSize);
+ StringRef DynamicStackStr =
+ CurrentProgramInfo.DynamicCallStack ? "True" : "False";
+ EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);
EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
CurrentProgramInfo.Occupancy);
EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index d490209ce35e..79326cd3d328 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -116,6 +116,8 @@ public:
void emitFunctionBodyEnd() override;
+ void emitImplicitDef(const MachineInstr *MI) const override;
+
void emitFunctionEntryLabel() override;
void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
@@ -126,9 +128,6 @@ public:
void emitEndOfAsmFile(Module &M) override;
- bool isBlockOnlyReachableByFallthrough(
- const MachineBasicBlock *MBB) const override;
-
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 9795928094f4..9ba74a23e8af 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -202,9 +202,18 @@ void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) {
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::FMin:
break;
}
+ // Only 32-bit floating point atomic ops are supported.
+ if (AtomicRMWInst::isFPOperation(Op) && !I.getType()->isFloatTy()) {
+ return;
+ }
+
const unsigned PtrIdx = 0;
const unsigned ValIdx = 1;
@@ -344,8 +353,12 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
llvm_unreachable("Unhandled atomic op");
case AtomicRMWInst::Add:
return B.CreateBinOp(Instruction::Add, LHS, RHS);
+ case AtomicRMWInst::FAdd:
+ return B.CreateFAdd(LHS, RHS);
case AtomicRMWInst::Sub:
return B.CreateBinOp(Instruction::Sub, LHS, RHS);
+ case AtomicRMWInst::FSub:
+ return B.CreateFSub(LHS, RHS);
case AtomicRMWInst::And:
return B.CreateBinOp(Instruction::And, LHS, RHS);
case AtomicRMWInst::Or:
@@ -365,6 +378,10 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
case AtomicRMWInst::UMin:
Pred = CmpInst::ICMP_ULT;
break;
+ case AtomicRMWInst::FMax:
+ return B.CreateMaxNum(LHS, RHS);
+ case AtomicRMWInst::FMin:
+ return B.CreateMinNum(LHS, RHS);
}
Value *Cond = B.CreateICmp(Pred, LHS, RHS);
return B.CreateSelect(Cond, LHS, RHS);
@@ -376,10 +393,11 @@ Value *AMDGPUAtomicOptimizerImpl::buildReduction(IRBuilder<> &B,
AtomicRMWInst::BinOp Op,
Value *V,
Value *const Identity) const {
- Type *const Ty = V->getType();
+ Type *AtomicTy = V->getType();
+ Type *IntNTy = B.getIntNTy(AtomicTy->getPrimitiveSizeInBits());
Module *M = B.GetInsertBlock()->getModule();
Function *UpdateDPP =
- Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, Ty);
+ Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, AtomicTy);
// Reduce within each row of 16 lanes.
for (unsigned Idx = 0; Idx < 4; Idx++) {
@@ -392,39 +410,47 @@ Value *AMDGPUAtomicOptimizerImpl::buildReduction(IRBuilder<> &B,
// Reduce within each pair of rows (i.e. 32 lanes).
assert(ST->hasPermLaneX16());
- V = buildNonAtomicBinOp(
- B, Op, V,
- B.CreateIntrinsic(
- Intrinsic::amdgcn_permlanex16, {},
- {V, V, B.getInt32(-1), B.getInt32(-1), B.getFalse(), B.getFalse()}));
-
- if (ST->isWave32())
+ V = B.CreateBitCast(V, IntNTy);
+ Value *Permlanex16Call = B.CreateIntrinsic(
+ Intrinsic::amdgcn_permlanex16, {},
+ {V, V, B.getInt32(-1), B.getInt32(-1), B.getFalse(), B.getFalse()});
+ V = buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy),
+ B.CreateBitCast(Permlanex16Call, AtomicTy));
+ if (ST->isWave32()) {
return V;
+ }
if (ST->hasPermLane64()) {
// Reduce across the upper and lower 32 lanes.
- return buildNonAtomicBinOp(
- B, Op, V, B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {}, V));
+ V = B.CreateBitCast(V, IntNTy);
+ Value *Permlane64Call =
+ B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {}, V);
+ return buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy),
+ B.CreateBitCast(Permlane64Call, AtomicTy));
}
// Pick an arbitrary lane from 0..31 and an arbitrary lane from 32..63 and
// combine them with a scalar operation.
Function *ReadLane =
Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, {});
- Value *const Lane0 = B.CreateCall(ReadLane, {V, B.getInt32(0)});
- Value *const Lane32 = B.CreateCall(ReadLane, {V, B.getInt32(32)});
- return buildNonAtomicBinOp(B, Op, Lane0, Lane32);
+ V = B.CreateBitCast(V, IntNTy);
+ Value *Lane0 = B.CreateCall(ReadLane, {V, B.getInt32(0)});
+ Value *Lane32 = B.CreateCall(ReadLane, {V, B.getInt32(32)});
+ return buildNonAtomicBinOp(B, Op, B.CreateBitCast(Lane0, AtomicTy),
+ B.CreateBitCast(Lane32, AtomicTy));
}
// Use the builder to create an inclusive scan of V across the wavefront, with
// all lanes active.
Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
AtomicRMWInst::BinOp Op, Value *V,
- Value *const Identity) const {
- Type *const Ty = V->getType();
+ Value *Identity) const {
+ Type *AtomicTy = V->getType();
+ Type *IntNTy = B.getIntNTy(AtomicTy->getPrimitiveSizeInBits());
+
Module *M = B.GetInsertBlock()->getModule();
Function *UpdateDPP =
- Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, Ty);
+ Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, AtomicTy);
for (unsigned Idx = 0; Idx < 4; Idx++) {
V = buildNonAtomicBinOp(
@@ -452,23 +478,29 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
// Combine lane 15 into lanes 16..31 (and, for wave 64, lane 47 into lanes
// 48..63).
assert(ST->hasPermLaneX16());
- Value *const PermX = B.CreateIntrinsic(
+ V = B.CreateBitCast(V, IntNTy);
+ Value *PermX = B.CreateIntrinsic(
Intrinsic::amdgcn_permlanex16, {},
{V, V, B.getInt32(-1), B.getInt32(-1), B.getFalse(), B.getFalse()});
- V = buildNonAtomicBinOp(
- B, Op, V,
- B.CreateCall(UpdateDPP,
- {Identity, PermX, B.getInt32(DPP::QUAD_PERM_ID),
- B.getInt32(0xa), B.getInt32(0xf), B.getFalse()}));
+
+ Value *UpdateDPPCall =
+ B.CreateCall(UpdateDPP, {Identity, B.CreateBitCast(PermX, AtomicTy),
+ B.getInt32(DPP::QUAD_PERM_ID), B.getInt32(0xa),
+ B.getInt32(0xf), B.getFalse()});
+ V = buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy), UpdateDPPCall);
+
if (!ST->isWave32()) {
// Combine lane 31 into lanes 32..63.
+ V = B.CreateBitCast(V, IntNTy);
Value *const Lane31 = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
{V, B.getInt32(31)});
- V = buildNonAtomicBinOp(
- B, Op, V,
- B.CreateCall(UpdateDPP,
- {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID),
- B.getInt32(0xc), B.getInt32(0xf), B.getFalse()}));
+
+ Value *UpdateDPPCall = B.CreateCall(
+ UpdateDPP, {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID),
+ B.getInt32(0xc), B.getInt32(0xf), B.getFalse()});
+
+ V = buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy),
+ UpdateDPPCall);
}
}
return V;
@@ -477,12 +509,13 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
// Use the builder to create a shift right of V across the wavefront, with all
// lanes active, to turn an inclusive scan into an exclusive scan.
Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V,
- Value *const Identity) const {
- Type *const Ty = V->getType();
+ Value *Identity) const {
+ Type *AtomicTy = V->getType();
+ Type *IntNTy = B.getIntNTy(AtomicTy->getPrimitiveSizeInBits());
+
Module *M = B.GetInsertBlock()->getModule();
Function *UpdateDPP =
- Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, Ty);
-
+ Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, AtomicTy);
if (ST->hasDPPWavefrontShifts()) {
// GFX9 has DPP wavefront shift operations.
V = B.CreateCall(UpdateDPP,
@@ -502,19 +535,24 @@ Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V,
B.getInt32(0xf), B.getInt32(0xf), B.getFalse()});
// Copy the old lane 15 to the new lane 16.
- V = B.CreateCall(WriteLane, {B.CreateCall(ReadLane, {Old, B.getInt32(15)}),
- B.getInt32(16), V});
-
+ V = B.CreateCall(
+ WriteLane,
+ {B.CreateCall(ReadLane, {B.CreateBitCast(Old, IntNTy), B.getInt32(15)}),
+ B.getInt32(16), B.CreateBitCast(V, IntNTy)});
+ V = B.CreateBitCast(V, AtomicTy);
if (!ST->isWave32()) {
// Copy the old lane 31 to the new lane 32.
- V = B.CreateCall(
- WriteLane,
- {B.CreateCall(ReadLane, {Old, B.getInt32(31)}), B.getInt32(32), V});
+ V = B.CreateBitCast(V, IntNTy);
+ V = B.CreateCall(WriteLane,
+ {B.CreateCall(ReadLane, {B.CreateBitCast(Old, IntNTy),
+ B.getInt32(31)}),
+ B.getInt32(32), V});
// Copy the old lane 47 to the new lane 48.
V = B.CreateCall(
WriteLane,
{B.CreateCall(ReadLane, {Old, B.getInt32(47)}), B.getInt32(48), V});
+ V = B.CreateBitCast(V, AtomicTy);
}
}
@@ -529,7 +567,6 @@ Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V,
std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *const Identity, Value *V,
Instruction &I, BasicBlock *ComputeLoop, BasicBlock *ComputeEnd) const {
-
auto *Ty = I.getType();
auto *WaveTy = B.getIntNTy(ST->getWavefrontSize());
auto *EntryBB = I.getParent();
@@ -554,18 +591,25 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
// Use llvm.cttz instrinsic to find the lowest remaining active lane.
auto *FF1 =
B.CreateIntrinsic(Intrinsic::cttz, WaveTy, {ActiveBits, B.getTrue()});
- auto *LaneIdxInt = B.CreateTrunc(FF1, Ty);
+
+ Type *IntNTy = B.getIntNTy(Ty->getPrimitiveSizeInBits());
+ auto *LaneIdxInt = B.CreateTrunc(FF1, IntNTy);
// Get the value required for atomic operation
- auto *LaneValue =
+ V = B.CreateBitCast(V, IntNTy);
+ Value *LaneValue =
B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, {V, LaneIdxInt});
+ LaneValue = B.CreateBitCast(LaneValue, Ty);
// Perform writelane if intermediate scan results are required later in the
// kernel computations
Value *OldValue = nullptr;
if (NeedResult) {
- OldValue = B.CreateIntrinsic(Intrinsic::amdgcn_writelane, {},
- {Accumulator, LaneIdxInt, OldValuePhi});
+ OldValue =
+ B.CreateIntrinsic(Intrinsic::amdgcn_writelane, {},
+ {B.CreateBitCast(Accumulator, IntNTy), LaneIdxInt,
+ B.CreateBitCast(OldValuePhi, IntNTy)});
+ OldValue = B.CreateBitCast(OldValue, Ty);
OldValuePhi->addIncoming(OldValue, ComputeLoop);
}
@@ -590,8 +634,10 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
return {OldValue, NewAccumulator};
}
-static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
- unsigned BitWidth) {
+static Constant *getIdentityValueForAtomicOp(Type *const Ty,
+ AtomicRMWInst::BinOp Op) {
+ LLVMContext &C = Ty->getContext();
+ const unsigned BitWidth = Ty->getPrimitiveSizeInBits();
switch (Op) {
default:
llvm_unreachable("Unhandled atomic op");
@@ -600,14 +646,22 @@ static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
case AtomicRMWInst::Or:
case AtomicRMWInst::Xor:
case AtomicRMWInst::UMax:
- return APInt::getMinValue(BitWidth);
+ return ConstantInt::get(C, APInt::getMinValue(BitWidth));
case AtomicRMWInst::And:
case AtomicRMWInst::UMin:
- return APInt::getMaxValue(BitWidth);
+ return ConstantInt::get(C, APInt::getMaxValue(BitWidth));
case AtomicRMWInst::Max:
- return APInt::getSignedMinValue(BitWidth);
+ return ConstantInt::get(C, APInt::getSignedMinValue(BitWidth));
case AtomicRMWInst::Min:
- return APInt::getSignedMaxValue(BitWidth);
+ return ConstantInt::get(C, APInt::getSignedMaxValue(BitWidth));
+ case AtomicRMWInst::FAdd:
+ return ConstantFP::get(C, APFloat::getZero(Ty->getFltSemantics(), true));
+ case AtomicRMWInst::FSub:
+ return ConstantFP::get(C, APFloat::getZero(Ty->getFltSemantics(), false));
+ case AtomicRMWInst::FMin:
+ return ConstantFP::get(C, APFloat::getInf(Ty->getFltSemantics(), false));
+ case AtomicRMWInst::FMax:
+ return ConstantFP::get(C, APFloat::getInf(Ty->getFltSemantics(), true));
}
}
@@ -623,6 +677,10 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
// Start building just before the instruction.
IRBuilder<> B(&I);
+ if (AtomicRMWInst::isFPOperation(Op)) {
+ B.setIsFPConstrained(I.getFunction()->hasFnAttribute(Attribute::StrictFP));
+ }
+
// If we are in a pixel shader, because of how we have to mask out helper
// lane invocations, we need to record the entry and exit BB's.
BasicBlock *PixelEntryBB = nullptr;
@@ -649,12 +707,15 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
}
Type *const Ty = I.getType();
+ Type *Int32Ty = B.getInt32Ty();
+ Type *IntNTy = B.getIntNTy(Ty->getPrimitiveSizeInBits());
+ bool isAtomicFloatingPointTy = Ty->isFloatingPointTy();
const unsigned TyBitWidth = DL->getTypeSizeInBits(Ty);
- auto *const VecTy = FixedVectorType::get(B.getInt32Ty(), 2);
+ auto *const VecTy = FixedVectorType::get(Int32Ty, 2);
// This is the value in the atomic operation we need to combine in order to
// reduce the number of atomic operations.
- Value *const V = I.getOperand(ValIdx);
+ Value *V = I.getOperand(ValIdx);
// We need to know how many lanes are active within the wavefront, and we do
// this by doing a ballot of active lanes.
@@ -671,39 +732,47 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
{Ballot, B.getInt32(0)});
} else {
- Value *const BitCast = B.CreateBitCast(Ballot, VecTy);
- Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
- Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
+ Value *const ExtractLo = B.CreateTrunc(Ballot, Int32Ty);
+ Value *const ExtractHi = B.CreateTrunc(B.CreateLShr(Ballot, 32), Int32Ty);
Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
{ExtractLo, B.getInt32(0)});
Mbcnt =
B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, {ExtractHi, Mbcnt});
}
- Mbcnt = B.CreateIntCast(Mbcnt, Ty, false);
- Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth));
+ Function *F = I.getFunction();
+ LLVMContext &C = F->getContext();
+
+ // For atomic sub, perform scan with add operation and allow one lane to
+ // subtract the reduced value later.
+ AtomicRMWInst::BinOp ScanOp = Op;
+ if (Op == AtomicRMWInst::Sub) {
+ ScanOp = AtomicRMWInst::Add;
+ } else if (Op == AtomicRMWInst::FSub) {
+ ScanOp = AtomicRMWInst::FAdd;
+ }
+ Value *Identity = getIdentityValueForAtomicOp(Ty, ScanOp);
Value *ExclScan = nullptr;
Value *NewV = nullptr;
const bool NeedResult = !I.use_empty();
- Function *F = I.getFunction();
- LLVMContext &C = F->getContext();
BasicBlock *ComputeLoop = nullptr;
BasicBlock *ComputeEnd = nullptr;
// If we have a divergent value in each lane, we need to combine the value
// using DPP.
if (ValDivergent) {
- const AtomicRMWInst::BinOp ScanOp =
- Op == AtomicRMWInst::Sub ? AtomicRMWInst::Add : Op;
if (ScanImpl == ScanOptions::DPP) {
// First we need to set all inactive invocations to the identity value, so
// that they can correctly contribute to the final result.
- NewV =
- B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
- const AtomicRMWInst::BinOp ScanOp =
- Op == AtomicRMWInst::Sub ? AtomicRMWInst::Add : Op;
+ V = B.CreateBitCast(V, IntNTy);
+ Identity = B.CreateBitCast(Identity, IntNTy);
+ NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, IntNTy,
+ {V, Identity});
+ NewV = B.CreateBitCast(NewV, Ty);
+ V = B.CreateBitCast(V, Ty);
+ Identity = B.CreateBitCast(Identity, Ty);
if (!NeedResult && ST->hasPermLaneX16()) {
// On GFX10 the permlanex16 instruction helps us build a reduction
// without too many readlanes and writelanes, which are generally bad
@@ -718,8 +787,10 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
// which we will provide to the atomic operation.
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
assert(TyBitWidth == 32);
+ NewV = B.CreateBitCast(NewV, IntNTy);
NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
{NewV, LastLaneIdx});
+ NewV = B.CreateBitCast(NewV, Ty);
}
// Finally mark the readlanes in the WWM section.
NewV = B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, NewV);
@@ -746,13 +817,22 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
NewV = buildMul(B, V, Ctpop);
break;
}
-
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub: {
+ Value *const Ctpop = B.CreateIntCast(
+ B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Int32Ty, false);
+ Value *const CtpopFP = B.CreateUIToFP(Ctpop, Ty);
+ NewV = B.CreateFMul(V, CtpopFP);
+ break;
+ }
case AtomicRMWInst::And:
case AtomicRMWInst::Or:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FMin:
+ case AtomicRMWInst::FMax:
// These operations with a uniform value are idempotent: doing the atomic
// operation multiple times has the same effect as doing it once.
NewV = V;
@@ -771,7 +851,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
// We only want a single lane to enter our new control flow, and we do this
// by checking if there are any active lanes below us. Only one lane will
// have 0 active lanes below us, so that will be the only one to progress.
- Value *const Cond = B.CreateICmpEQ(Mbcnt, B.getIntN(TyBitWidth, 0));
+ Value *const Cond = B.CreateICmpEQ(Mbcnt, B.getInt32(0));
// Store I's original basic block before we split the block.
BasicBlock *const EntryBB = I.getParent();
@@ -840,9 +920,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
Value *BroadcastI = nullptr;
if (TyBitWidth == 64) {
- Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty());
- Value *const ExtractHi =
- B.CreateTrunc(B.CreateLShr(PHI, 32), B.getInt32Ty());
+ Value *const ExtractLo = B.CreateTrunc(PHI, Int32Ty);
+ Value *const ExtractHi = B.CreateTrunc(B.CreateLShr(PHI, 32), Int32Ty);
CallInst *const ReadFirstLaneLo =
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
CallInst *const ReadFirstLaneHi =
@@ -853,8 +932,11 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
BroadcastI = B.CreateBitCast(Insert, Ty);
} else if (TyBitWidth == 32) {
+ Value *CastedPhi = B.CreateBitCast(PHI, IntNTy);
+ BroadcastI =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, CastedPhi);
+ BroadcastI = B.CreateBitCast(BroadcastI, Ty);
- BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
} else {
llvm_unreachable("Unhandled atomic bit width");
}
@@ -874,6 +956,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
llvm_unreachable("Atomic Optimzer is disabled for None strategy");
}
} else {
+ Mbcnt = isAtomicFloatingPointTy ? B.CreateUIToFP(Mbcnt, Ty)
+ : B.CreateIntCast(Mbcnt, Ty, false);
switch (Op) {
default:
llvm_unreachable("Unhandled atomic op");
@@ -887,18 +971,25 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FMin:
+ case AtomicRMWInst::FMax:
LaneOffset = B.CreateSelect(Cond, Identity, V);
break;
case AtomicRMWInst::Xor:
LaneOffset = buildMul(B, V, B.CreateAnd(Mbcnt, 1));
break;
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub: {
+ LaneOffset = B.CreateFMul(V, Mbcnt);
+ break;
+ }
}
}
Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset);
if (IsPixelShader) {
// Need a final PHI to reconverge to above the helper lane branch mask.
- B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
+ B.SetInsertPoint(PixelExitBB, PixelExitBB->getFirstNonPHIIt());
PHINode *const PHI = B.CreatePHI(Ty, 2);
PHI->addIncoming(PoisonValue::get(Ty), PixelEntryBB);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 57c873f00a4a..5fd9e571282d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -28,6 +28,10 @@ void initializeCycleInfoWrapperPassPass(PassRegistry &);
using namespace llvm;
+static cl::opt<unsigned> KernargPreloadCount(
+ "amdgpu-kernarg-preload-count",
+ cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
+
#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
enum ImplicitArgumentPositions {
@@ -914,9 +918,68 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
}
-class AMDGPUAttributor : public ModulePass {
+static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ for (unsigned I = 0;
+ I < F.arg_size() &&
+ I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
+ ++I) {
+ Argument &Arg = *F.getArg(I);
+ // Check for incompatible attributes.
+ if (Arg.hasByRefAttr() || Arg.hasNestAttr())
+ break;
+
+ Arg.addAttr(Attribute::InReg);
+ }
+}
+
+static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
+ SetVector<Function *> Functions;
+ for (Function &F : M) {
+ if (!F.isIntrinsic())
+ Functions.insert(&F);
+ }
+
+ CallGraphUpdater CGUpdater;
+ BumpPtrAllocator Allocator;
+ AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
+ DenseSet<const char *> Allowed(
+ {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
+ &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
+ &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
+ &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
+
+ AttributorConfig AC(CGUpdater);
+ AC.Allowed = &Allowed;
+ AC.IsModulePass = true;
+ AC.DefaultInitializeLiveInternals = false;
+ AC.IPOAmendableCB = [](const Function &F) {
+ return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
+ };
+
+ Attributor A(Functions, InfoCache, AC);
+
+ for (Function &F : M) {
+ if (!F.isIntrinsic()) {
+ A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
+ A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
+ CallingConv::ID CC = F.getCallingConv();
+ if (!AMDGPU::isEntryFunctionCC(CC)) {
+ A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
+ A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
+ } else if (CC == CallingConv::AMDGPU_KERNEL) {
+ addPreloadKernArgHint(F, TM);
+ }
+ }
+ }
+
+ ChangeStatus Change = A.run();
+ return Change == ChangeStatus::CHANGED;
+}
+
+class AMDGPUAttributorLegacy : public ModulePass {
public:
- AMDGPUAttributor() : ModulePass(ID) {}
+ AMDGPUAttributorLegacy() : ModulePass(ID) {}
/// doInitialization - Virtual method overridden by subclasses to do
/// any necessary initialization before any pass is run.
@@ -930,45 +993,8 @@ public:
}
bool runOnModule(Module &M) override {
- SetVector<Function *> Functions;
AnalysisGetter AG(this);
- for (Function &F : M) {
- if (!F.isIntrinsic())
- Functions.insert(&F);
- }
-
- CallGraphUpdater CGUpdater;
- BumpPtrAllocator Allocator;
- AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
- DenseSet<const char *> Allowed(
- {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
- &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
- &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
- &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
-
- AttributorConfig AC(CGUpdater);
- AC.Allowed = &Allowed;
- AC.IsModulePass = true;
- AC.DefaultInitializeLiveInternals = false;
- AC.IPOAmendableCB = [](const Function &F) {
- return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
- };
-
- Attributor A(Functions, InfoCache, AC);
-
- for (Function &F : M) {
- if (!F.isIntrinsic()) {
- A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
- A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
- if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
- A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
- A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
- }
- }
- }
-
- ChangeStatus Change = A.run();
- return Change == ChangeStatus::CHANGED;
+ return runImpl(M, AG, *TM);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -981,11 +1007,25 @@ public:
};
} // namespace
-char AMDGPUAttributor::ID = 0;
+PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ AnalysisGetter AG(FAM);
+
+ // TODO: Probably preserves CFG
+ return runImpl(M, AG, TM) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
+char AMDGPUAttributorLegacy::ID = 0;
-Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
-INITIALIZE_PASS_BEGIN(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
- false)
+Pass *llvm::createAMDGPUAttributorLegacyPass() {
+ return new AMDGPUAttributorLegacy();
+}
+INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass);
-INITIALIZE_PASS_END(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
- false)
+INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
+ false, false)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 9ba5ea8fb73f..cf2896f80f19 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -32,7 +32,7 @@ namespace {
/// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
- Register ValVReg, CCValAssign &VA) {
+ Register ValVReg, const CCValAssign &VA) {
if (VA.getLocVT().getSizeInBits() < 32) {
// 16-bit types are reported as legal for 32-bit registers. We need to
// extend and do a 32-bit copy to avoid the verifier complaining about it.
@@ -56,12 +56,13 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
llvm_unreachable("not implemented");
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
// If this is a scalar return, insert a readfirstlane just in case the value
@@ -82,9 +83,10 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
}
- auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
- {MRI.getType(ExtReg)}, false)
- .addReg(ExtReg);
+ auto ToSGPR = MIRBuilder
+ .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
+ {MRI.getType(ExtReg)})
+ .addReg(ExtReg);
ExtReg = ToSGPR.getReg(0);
}
@@ -116,7 +118,7 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
if (VA.getLocVT().getSizeInBits() < 32) {
@@ -136,7 +138,8 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
auto MMO = MF.getMachineMemOperand(
@@ -228,14 +231,15 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
uint64_t LocMemOffset = VA.getLocMemOffset();
const auto &ST = MF.getSubtarget<GCNSubtarget>();
@@ -248,7 +252,8 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
void assignValueToAddress(const CallLowering::ArgInfo &Arg,
unsigned ValRegIndex, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
? extendRegister(Arg.Regs[ValRegIndex], VA)
: Arg.Regs[ValRegIndex];
@@ -454,27 +459,28 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) {
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info.hasPrivateSegmentBuffer()) {
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo();
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
- if (Info.hasDispatchPtr()) {
+ if (UserSGPRInfo.hasDispatchPtr()) {
Register DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
const Module *M = MF.getFunction().getParent();
- if (Info.hasQueuePtr() &&
+ if (UserSGPRInfo.hasQueuePtr() &&
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
- if (Info.hasKernargSegmentPtr()) {
+ if (UserSGPRInfo.hasKernargSegmentPtr()) {
MachineRegisterInfo &MRI = MF.getRegInfo();
Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
@@ -485,13 +491,13 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(InputPtrReg);
}
- if (Info.hasDispatchID()) {
+ if (UserSGPRInfo.hasDispatchID()) {
Register DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
- if (Info.hasFlatScratchInit()) {
+ if (UserSGPRInfo.hasFlatScratchInit()) {
Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
@@ -596,15 +602,16 @@ bool AMDGPUCallLowering::lowerFormalArguments(
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info->getUserSGPRInfo();
- if (Info->hasImplicitBufferPtr()) {
+ if (UserSGPRInfo.hasImplicitBufferPtr()) {
Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
// FIXME: This probably isn't defined for mesa
- if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
+ if (UserSGPRInfo.hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
@@ -954,12 +961,18 @@ getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI) {
}
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
- bool IsTailCall, CallingConv::ID CC) {
- assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, "
- "because the address can be divergent");
+ bool IsTailCall, bool isWave32,
+ CallingConv::ID CC) {
+ // For calls to amdgpu_cs_chain functions, the address is known to be uniform.
+ assert((AMDGPU::isChainCC(CC) || !IsIndirect || !IsTailCall) &&
+ "Indirect calls can't be tail calls, "
+ "because the address can be divergent");
if (!IsTailCall)
return AMDGPU::G_SI_CALL;
+ if (AMDGPU::isChainCC(CC))
+ return isWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
+
return CC == CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX :
AMDGPU::SI_TCRETURN;
}
@@ -1147,14 +1160,20 @@ bool AMDGPUCallLowering::isEligibleForTailCallOptimization(
void AMDGPUCallLowering::handleImplicitCallArguments(
MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst,
const GCNSubtarget &ST, const SIMachineFunctionInfo &FuncInfo,
+ CallingConv::ID CalleeCC,
ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {
if (!ST.enableFlatScratch()) {
// Insert copies for the SRD. In the HSA case, this should be an identity
// copy.
auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::fixed_vector(4, 32),
FuncInfo.getScratchRSrcReg());
- MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
- CallInst.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit);
+
+ auto CalleeRSrcReg = AMDGPU::isChainCC(CalleeCC)
+ ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
+ : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
+
+ MIRBuilder.buildCopy(CalleeRSrcReg, ScratchRSrcReg);
+ CallInst.addReg(CalleeRSrcReg, RegState::Implicit);
}
for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
@@ -1186,7 +1205,8 @@ bool AMDGPUCallLowering::lowerTailCall(
if (!IsSibCall)
CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
- unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true, CalleeCC);
+ unsigned Opc =
+ getCallOpcode(MF, Info.Callee.isReg(), true, ST.isWave32(), CalleeCC);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
if (!addCallTargetOperands(MIB, MIRBuilder, Info))
return false;
@@ -1195,8 +1215,27 @@ bool AMDGPUCallLowering::lowerTailCall(
// be 0.
MIB.addImm(0);
- // Tell the call which registers are clobbered.
+ // If this is a chain call, we need to pass in the EXEC mask.
const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ if (AMDGPU::isChainCC(Info.CallConv)) {
+ ArgInfo ExecArg = Info.OrigArgs[1];
+ assert(ExecArg.Regs.size() == 1 && "Too many regs for EXEC");
+
+ if (!ExecArg.Ty->isIntegerTy(ST.getWavefrontSize()))
+ return false;
+
+ if (auto CI = dyn_cast<ConstantInt>(ExecArg.OrigValue)) {
+ MIB.addImm(CI->getSExtValue());
+ } else {
+ MIB.addReg(ExecArg.Regs[0]);
+ unsigned Idx = MIB->getNumOperands() - 1;
+ MIB->getOperand(Idx).setReg(constrainOperandRegClass(
+ MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
+ MIB->getDesc(), MIB->getOperand(Idx), Idx));
+ }
+ }
+
+ // Tell the call which registers are clobbered.
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
MIB.addRegMask(Mask);
@@ -1250,7 +1289,8 @@ bool AMDGPUCallLowering::lowerTailCall(
// after the ordinary user argument registers.
SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
- if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
+ if (Info.CallConv != CallingConv::AMDGPU_Gfx &&
+ !AMDGPU::isChainCC(Info.CallConv)) {
// With a fixed ABI, allocate fixed registers before user arguments.
if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
return false;
@@ -1266,7 +1306,8 @@ bool AMDGPUCallLowering::lowerTailCall(
if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
return false;
- handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, ImplicitArgRegs);
+ handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, CalleeCC,
+ ImplicitArgRegs);
// If we have -tailcallopt, we need to adjust the stack. We'll do the call
// sequence start and end here.
@@ -1300,8 +1341,62 @@ bool AMDGPUCallLowering::lowerTailCall(
return true;
}
+/// Lower a call to the @llvm.amdgcn.cs.chain intrinsic.
+bool AMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const {
+ ArgInfo Callee = Info.OrigArgs[0];
+ ArgInfo SGPRArgs = Info.OrigArgs[2];
+ ArgInfo VGPRArgs = Info.OrigArgs[3];
+ ArgInfo Flags = Info.OrigArgs[4];
+
+ assert(cast<ConstantInt>(Flags.OrigValue)->isZero() &&
+ "Non-zero flags aren't supported yet.");
+ assert(Info.OrigArgs.size() == 5 && "Additional args aren't supported yet.");
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ // The function to jump to is actually the first argument, so we'll change the
+ // Callee and other info to match that before using our existing helper.
+ const Value *CalleeV = Callee.OrigValue->stripPointerCasts();
+ if (const Function *F = dyn_cast<Function>(CalleeV)) {
+ Info.Callee = MachineOperand::CreateGA(F, 0);
+ Info.CallConv = F->getCallingConv();
+ } else {
+ assert(Callee.Regs.size() == 1 && "Too many regs for the callee");
+ Info.Callee = MachineOperand::CreateReg(Callee.Regs[0], false);
+ Info.CallConv = CallingConv::AMDGPU_CS_Chain; // amdgpu_cs_chain_preserve
+ // behaves the same here.
+ }
+
+ // The function that we're calling cannot be vararg (only the intrinsic is).
+ Info.IsVarArg = false;
+
+ assert(std::all_of(SGPRArgs.Flags.begin(), SGPRArgs.Flags.end(),
+ [](ISD::ArgFlagsTy F) { return F.isInReg(); }) &&
+ "SGPR arguments should be marked inreg");
+ assert(std::none_of(VGPRArgs.Flags.begin(), VGPRArgs.Flags.end(),
+ [](ISD::ArgFlagsTy F) { return F.isInReg(); }) &&
+ "VGPR arguments should not be marked inreg");
+
+ SmallVector<ArgInfo, 8> OutArgs;
+ splitToValueTypes(SGPRArgs, OutArgs, DL, Info.CallConv);
+ splitToValueTypes(VGPRArgs, OutArgs, DL, Info.CallConv);
+
+ Info.IsMustTailCall = true;
+ return lowerTailCall(MIRBuilder, Info, OutArgs);
+}
+
bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
+ if (Function *F = Info.CB->getCalledFunction())
+ if (F->isIntrinsic()) {
+ assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
+ "Unexpected intrinsic");
+ return lowerChainCall(MIRBuilder, Info);
+ }
+
if (Info.IsVarArg) {
LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
return false;
@@ -1350,11 +1445,15 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
- unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, Info.CallConv);
+ unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, ST.isWave32(),
+ Info.CallConv);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.addDef(TRI->getReturnAddressReg(MF));
+ if (!Info.IsConvergent)
+ MIB.setMIFlag(MachineInstr::NoConvergent);
+
if (!addCallTargetOperands(MIB, MIRBuilder, Info))
return false;
@@ -1389,7 +1488,8 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, ImplicitArgRegs);
+ handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, Info.CallConv,
+ ImplicitArgRegs);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getStackSize();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 569c6d75204d..a6e801f2a547 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -75,10 +75,13 @@ public:
void handleImplicitCallArguments(
MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst,
const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI,
+ CallingConv::ID CalleeCC,
ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const;
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
SmallVectorImpl<ArgInfo> &OutArgs) const;
+ bool lowerChainCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const;
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 2b70665ab95c..9036b26a6f6b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -176,6 +176,10 @@ def CSR_AMDGPU_SI_Gfx_GFX90AInsts : CalleeSavedRegs<
(add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs)
>;
+def CSR_AMDGPU_CS_ChainPreserve : CalleeSavedRegs<
+ (sequence "VGPR%u", 8, 255)
+>;
+
def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
// Calling convention for leaf functions
@@ -183,6 +187,11 @@ def CC_AMDGPU_Func : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
CCIfType<[i1], CCPromoteToType<i32>>,
CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
+
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
+ !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29
+ >>>,
+
CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
@@ -213,6 +222,16 @@ def CC_AMDGPU : CallingConv<[
CCDelegateTo<CC_AMDGPU_Func>>
]>;
+def CC_AMDGPU_CS_CHAIN : CallingConv<[
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
+ !foreach(i, !range(105), !cast<Register>("SGPR"#i))
+ >>>,
+
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
+ !foreach(i, !range(8, 255), !cast<Register>("VGPR"#i))
+ >>>
+]>;
+
// Trivial class to denote when a def is used only to get a RegMask, i.e.
// SaveList is ignored and the def is not used as part of any calling
// convention.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 4ec85f3c5588..87b1957c799e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -52,17 +52,17 @@ static cl::opt<bool> Widen16BitOps(
cl::init(true));
static cl::opt<bool>
- ScalarizeLargePHIs("amdgpu-codegenprepare-break-large-phis",
- cl::desc("Break large PHI nodes for DAGISel"),
- cl::ReallyHidden, cl::init(true));
+ BreakLargePHIs("amdgpu-codegenprepare-break-large-phis",
+ cl::desc("Break large PHI nodes for DAGISel"),
+ cl::ReallyHidden, cl::init(true));
static cl::opt<bool>
- ForceScalarizeLargePHIs("amdgpu-codegenprepare-force-break-large-phis",
- cl::desc("For testing purposes, always break large "
- "PHIs even if it isn't profitable."),
- cl::ReallyHidden, cl::init(false));
+ ForceBreakLargePHIs("amdgpu-codegenprepare-force-break-large-phis",
+ cl::desc("For testing purposes, always break large "
+ "PHIs even if it isn't profitable."),
+ cl::ReallyHidden, cl::init(false));
-static cl::opt<unsigned> ScalarizeLargePHIsThreshold(
+static cl::opt<unsigned> BreakLargePHIsThreshold(
"amdgpu-codegenprepare-break-large-phis-threshold",
cl::desc("Minimum type size in bits for breaking large PHI nodes"),
cl::ReallyHidden, cl::init(32));
@@ -108,9 +108,31 @@ public:
bool HasUnsafeFPMath = false;
bool HasFP32DenormalFlush = false;
bool FlowChanged = false;
+ mutable Function *SqrtF32 = nullptr;
+ mutable Function *LdexpF32 = nullptr;
DenseMap<const PHINode *, bool> BreakPhiNodesCache;
+ Function *getSqrtF32() const {
+ if (SqrtF32)
+ return SqrtF32;
+
+ LLVMContext &Ctx = Mod->getContext();
+ SqrtF32 = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_sqrt,
+ {Type::getFloatTy(Ctx)});
+ return SqrtF32;
+ }
+
+ Function *getLdexpF32() const {
+ if (LdexpF32)
+ return LdexpF32;
+
+ LLVMContext &Ctx = Mod->getContext();
+ LdexpF32 = Intrinsic::getDeclaration(
+ Mod, Intrinsic::ldexp, {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
+ return LdexpF32;
+ }
+
bool canBreakPHINode(const PHINode &I);
/// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
@@ -276,6 +298,8 @@ public:
bool IsNegative) const;
Value *emitFrexpDiv(IRBuilder<> &Builder, Value *LHS, Value *RHS,
FastMathFlags FMF) const;
+ Value *emitSqrtIEEE2ULP(IRBuilder<> &Builder, Value *Src,
+ FastMathFlags FMF) const;
public:
bool visitFDiv(BinaryOperator &I);
@@ -290,6 +314,7 @@ public:
bool visitIntrinsicInst(IntrinsicInst &I);
bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
bool visitMinNum(IntrinsicInst &I);
+ bool visitSqrt(IntrinsicInst &I);
bool run(Function &F);
};
@@ -319,6 +344,7 @@ public:
} // end anonymous namespace
bool AMDGPUCodeGenPrepareImpl::run(Function &F) {
+ BreakPhiNodesCache.clear();
bool MadeChange = false;
Function::iterator NextBB;
@@ -598,34 +624,6 @@ static Value *insertValues(IRBuilder<> &Builder,
return NewVal;
}
-// Returns 24-bit or 48-bit (as per `NumBits` and `Size`) mul of `LHS` and
-// `RHS`. `NumBits` is the number of KnownBits of the result and `Size` is the
-// width of the original destination.
-static Value *getMul24(IRBuilder<> &Builder, Value *LHS, Value *RHS,
- unsigned Size, unsigned NumBits, bool IsSigned) {
- if (Size <= 32 || NumBits <= 32) {
- Intrinsic::ID ID =
- IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
- return Builder.CreateIntrinsic(ID, {}, {LHS, RHS});
- }
-
- assert(NumBits <= 48);
-
- Intrinsic::ID LoID =
- IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
- Intrinsic::ID HiID =
- IsSigned ? Intrinsic::amdgcn_mulhi_i24 : Intrinsic::amdgcn_mulhi_u24;
-
- Value *Lo = Builder.CreateIntrinsic(LoID, {}, {LHS, RHS});
- Value *Hi = Builder.CreateIntrinsic(HiID, {}, {LHS, RHS});
-
- IntegerType *I64Ty = Builder.getInt64Ty();
- Lo = Builder.CreateZExtOrTrunc(Lo, I64Ty);
- Hi = Builder.CreateZExtOrTrunc(Hi, I64Ty);
-
- return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32));
-}
-
bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
if (I.getOpcode() != Instruction::Mul)
return false;
@@ -665,26 +663,20 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
extractValues(Builder, RHSVals, RHS);
IntegerType *I32Ty = Builder.getInt32Ty();
- for (int I = 0, E = LHSVals.size(); I != E; ++I) {
- Value *LHS, *RHS;
- if (IsSigned) {
- LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
- RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
- } else {
- LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
- RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
- }
-
- Value *Result =
- getMul24(Builder, LHS, RHS, Size, LHSBits + RHSBits, IsSigned);
+ IntegerType *IntrinTy = Size > 32 ? Builder.getInt64Ty() : I32Ty;
+ Type *DstTy = LHSVals[0]->getType();
- if (IsSigned) {
- ResultVals.push_back(
- Builder.CreateSExtOrTrunc(Result, LHSVals[I]->getType()));
- } else {
- ResultVals.push_back(
- Builder.CreateZExtOrTrunc(Result, LHSVals[I]->getType()));
- }
+ for (int I = 0, E = LHSVals.size(); I != E; ++I) {
+ Value *LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty)
+ : Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
+ Value *RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty)
+ : Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
+ Intrinsic::ID ID =
+ IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
+ Value *Result = Builder.CreateIntrinsic(ID, {IntrinTy}, {LHS, RHS});
+ Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
+ : Builder.CreateZExtOrTrunc(Result, DstTy);
+ ResultVals.push_back(Result);
}
Value *NewVal = insertValues(Builder, Ty, ResultVals);
@@ -809,14 +801,10 @@ Value *AMDGPUCodeGenPrepareImpl::emitRcpIEEE1ULP(IRBuilder<> &Builder,
// range won't underflow to denormal. The hard part is knowing the
// result. We need a range check, the result could be denormal for
// 0x1p+126 < den <= 0x1p+127.
-
- Type *Ty = Src->getType();
-
auto [FrexpMant, FrexpExp] = getFrexpResults(Builder, Src);
Value *ScaleFactor = Builder.CreateNeg(FrexpExp);
Value *Rcp = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, FrexpMant);
- return Builder.CreateIntrinsic(Intrinsic::ldexp, {Ty, Builder.getInt32Ty()},
- {Rcp, ScaleFactor});
+ return Builder.CreateCall(getLdexpF32(), {Rcp, ScaleFactor});
}
/// Emit a 2ulp expansion for fdiv by using frexp for input scaling.
@@ -832,8 +820,6 @@ Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
// We're scaling the LHS to avoid a denormal input, and scale the denominator
// to avoid large values underflowing the result.
- Type *Ty = LHS->getType();
-
auto [FrexpMantRHS, FrexpExpRHS] = getFrexpResults(Builder, RHS);
Value *Rcp =
@@ -845,8 +831,30 @@ Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
// We multiplied by 2^N/2^M, so we need to multiply by 2^(N-M) to scale the
// result.
Value *ExpDiff = Builder.CreateSub(FrexpExpLHS, FrexpExpRHS);
- return Builder.CreateIntrinsic(Intrinsic::ldexp, {Ty, Builder.getInt32Ty()},
- {Mul, ExpDiff});
+ return Builder.CreateCall(getLdexpF32(), {Mul, ExpDiff});
+}
+
+/// Emit a sqrt that handles denormals and is accurate to 2ulp.
+Value *AMDGPUCodeGenPrepareImpl::emitSqrtIEEE2ULP(IRBuilder<> &Builder,
+ Value *Src,
+ FastMathFlags FMF) const {
+ Type *Ty = Src->getType();
+ APFloat SmallestNormal =
+ APFloat::getSmallestNormalized(Ty->getFltSemantics());
+ Value *NeedScale =
+ Builder.CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
+
+ ConstantInt *Zero = Builder.getInt32(0);
+ Value *InputScaleFactor =
+ Builder.CreateSelect(NeedScale, Builder.getInt32(32), Zero);
+
+ Value *Scaled = Builder.CreateCall(getLdexpF32(), {Src, InputScaleFactor});
+
+ Value *Sqrt = Builder.CreateCall(getSqrtF32(), Scaled);
+
+ Value *OutputScaleFactor =
+ Builder.CreateSelect(NeedScale, Builder.getInt32(-16), Zero);
+ return Builder.CreateCall(getLdexpF32(), {Sqrt, OutputScaleFactor});
}
/// Emit an expansion of 1.0 / sqrt(Src) good for 1ulp that supports denormals.
@@ -890,8 +898,8 @@ bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
}
Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
- IRBuilder<> &Builder, Value *Num, Value *Den, FastMathFlags DivFMF,
- FastMathFlags SqrtFMF, const Instruction *CtxI) const {
+ IRBuilder<> &Builder, Value *Num, Value *Den, const FastMathFlags DivFMF,
+ const FastMathFlags SqrtFMF, const Instruction *CtxI) const {
// The rsqrt contraction increases accuracy from ~2ulp to ~1ulp.
assert(DivFMF.allowContract() && SqrtFMF.allowContract());
@@ -910,10 +918,9 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
if (CLHS->isExactlyValue(1.0) || (IsNegative = CLHS->isExactlyValue(-1.0))) {
// Add in the sqrt flags.
IRBuilder<>::FastMathFlagGuard Guard(Builder);
- DivFMF |= SqrtFMF;
- Builder.setFastMathFlags(DivFMF);
+ Builder.setFastMathFlags(DivFMF | SqrtFMF);
- if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
+ if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) || HasUnsafeFPMath ||
canIgnoreDenormalInput(Den, CtxI)) {
Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
// -1.0 / sqrt(x) -> fneg(rsq(x))
@@ -1077,6 +1084,21 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
const FastMathFlags DivFMF = FPOp->getFastMathFlags();
const float ReqdAccuracy = FPOp->getFPAccuracy();
+ FastMathFlags SqrtFMF;
+
+ Value *Num = FDiv.getOperand(0);
+ Value *Den = FDiv.getOperand(1);
+
+ Value *RsqOp = nullptr;
+ auto *DenII = dyn_cast<IntrinsicInst>(Den);
+ if (DenII && DenII->getIntrinsicID() == Intrinsic::sqrt &&
+ DenII->hasOneUse()) {
+ const auto *SqrtOp = cast<FPMathOperator>(DenII);
+ SqrtFMF = SqrtOp->getFastMathFlags();
+ if (canOptimizeWithRsq(SqrtOp, DivFMF, SqrtFMF))
+ RsqOp = SqrtOp->getOperand(0);
+ }
+
// Inaccurate rcp is allowed with unsafe-fp-math or afn.
//
// Defer to codegen to handle this.
@@ -1087,28 +1109,13 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
// don't need any pre-consideration here when we have better information. A
// more conservative interpretation could use handling here.
const bool AllowInaccurateRcp = HasUnsafeFPMath || DivFMF.approxFunc();
- if (AllowInaccurateRcp)
+ if (!RsqOp && AllowInaccurateRcp)
return false;
// Defer the correct implementations to codegen.
if (ReqdAccuracy < 1.0f)
return false;
- FastMathFlags SqrtFMF;
-
- Value *Num = FDiv.getOperand(0);
- Value *Den = FDiv.getOperand(1);
-
- Value *RsqOp = nullptr;
- auto *DenII = dyn_cast<IntrinsicInst>(Den);
- if (DenII && DenII->getIntrinsicID() == Intrinsic::sqrt &&
- DenII->hasOneUse()) {
- const auto *SqrtOp = cast<FPMathOperator>(DenII);
- SqrtFMF = SqrtOp->getFastMathFlags();
- if (canOptimizeWithRsq(SqrtOp, DivFMF, SqrtFMF))
- RsqOp = SqrtOp->getOperand(0);
- }
-
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
Builder.setFastMathFlags(DivFMF);
Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
@@ -1777,47 +1784,79 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
return false;
}
+static void collectPHINodes(const PHINode &I,
+ SmallPtrSet<const PHINode *, 8> &SeenPHIs) {
+ const auto [It, Inserted] = SeenPHIs.insert(&I);
+ if (!Inserted)
+ return;
+
+ for (const Value *Inc : I.incoming_values()) {
+ if (const auto *PhiInc = dyn_cast<PHINode>(Inc))
+ collectPHINodes(*PhiInc, SeenPHIs);
+ }
+
+ for (const User *U : I.users()) {
+ if (const auto *PhiU = dyn_cast<PHINode>(U))
+ collectPHINodes(*PhiU, SeenPHIs);
+ }
+}
+
bool AMDGPUCodeGenPrepareImpl::canBreakPHINode(const PHINode &I) {
- // Check in the cache, or add an entry for this node.
- //
- // We init with false because we consider all PHI nodes unbreakable until we
- // reach a conclusion. Doing the opposite - assuming they're break-able until
- // proven otherwise - can be harmful in some pathological cases so we're
- // conservative for now.
- const auto [It, DidInsert] = BreakPhiNodesCache.insert({&I, false});
- if (!DidInsert)
+ // Check in the cache first.
+ if (const auto It = BreakPhiNodesCache.find(&I);
+ It != BreakPhiNodesCache.end())
return It->second;
- // This function may recurse, so to guard against infinite looping, this PHI
- // is conservatively considered unbreakable until we reach a conclusion.
+ // We consider PHI nodes as part of "chains", so given a PHI node I, we
+ // recursively consider all its users and incoming values that are also PHI
+ // nodes. We then make a decision about all of those PHIs at once. Either they
+ // all get broken up, or none of them do. That way, we avoid cases where a
+ // single PHI is/is not broken and we end up reforming/exploding a vector
+ // multiple times, or even worse, doing it in a loop.
+ SmallPtrSet<const PHINode *, 8> WorkList;
+ collectPHINodes(I, WorkList);
+
+#ifndef NDEBUG
+ // Check that none of the PHI nodes in the worklist are in the map. If some of
+ // them are, it means we're not good enough at collecting related PHIs.
+ for (const PHINode *WLP : WorkList) {
+ assert(BreakPhiNodesCache.count(WLP) == 0);
+ }
+#endif
- // Don't break PHIs that have no interesting incoming values. That is, where
- // there is no clear opportunity to fold the "extractelement" instructions we
- // would add.
+ // To consider a PHI profitable to break, we need to see some interesting
+ // incoming values. At least 2/3rd (rounded up) of all PHIs in the worklist
+ // must have one to consider all PHIs breakable.
//
- // Note: IC does not run after this pass, so we're only interested in the
- // foldings that the DAG combiner can do.
- if (none_of(I.incoming_values(),
- [&](Value *V) { return isInterestingPHIIncomingValue(V); }))
- return false;
-
- // Now, check users for unbreakable PHI nodes. If we have an unbreakable PHI
- // node as user, we don't want to break this PHI either because it's unlikely
- // to be beneficial. We would just explode the vector and reassemble it
- // directly, wasting instructions.
+ // This threshold has been determined through performance testing.
+ //
+ // Note that the computation below is equivalent to
+ //
+ // (unsigned)ceil((K / 3.0) * 2)
//
- // In the case where multiple users are PHI nodes, we want at least half of
- // them to be breakable.
- int Score = 0;
- for (const Value *U : I.users()) {
- if (const auto *PU = dyn_cast<PHINode>(U))
- Score += canBreakPHINode(*PU) ? 1 : -1;
+ // It's simply written this way to avoid mixing integral/FP arithmetic.
+ const auto Threshold = (alignTo(WorkList.size() * 2, 3) / 3);
+ unsigned NumBreakablePHIs = 0;
+ bool CanBreak = false;
+ for (const PHINode *Cur : WorkList) {
+ // Don't break PHIs that have no interesting incoming values. That is, where
+ // there is no clear opportunity to fold the "extractelement" instructions
+ // we would add.
+ //
+ // Note: IC does not run after this pass, so we're only interested in the
+ // foldings that the DAG combiner can do.
+ if (any_of(Cur->incoming_values(), isInterestingPHIIncomingValue)) {
+ if (++NumBreakablePHIs >= Threshold) {
+ CanBreak = true;
+ break;
+ }
+ }
}
- if (Score < 0)
- return false;
+ for (const PHINode *Cur : WorkList)
+ BreakPhiNodesCache[Cur] = CanBreak;
- return BreakPhiNodesCache[&I] = true;
+ return CanBreak;
}
/// Helper class for "break large PHIs" (visitPHINode).
@@ -1898,14 +1937,15 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
// operations with most elements being "undef". This inhibits a lot of
// optimization opportunities and can result in unreasonably high register
// pressure and the inevitable stack spilling.
- if (!ScalarizeLargePHIs || getCGPassBuilderOption().EnableGlobalISelOption)
+ if (!BreakLargePHIs || getCGPassBuilderOption().EnableGlobalISelOption)
return false;
FixedVectorType *FVT = dyn_cast<FixedVectorType>(I.getType());
- if (!FVT || DL->getTypeSizeInBits(FVT) <= ScalarizeLargePHIsThreshold)
+ if (!FVT || FVT->getNumElements() == 1 ||
+ DL->getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
return false;
- if (!ForceScalarizeLargePHIs && !canBreakPHINode(I))
+ if (!ForceBreakLargePHIs && !canBreakPHINode(I))
return false;
std::vector<VectorSlice> Slices;
@@ -1930,8 +1970,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
Slices.emplace_back(EltTy, Idx, 1);
}
- if (Slices.size() == 1)
- return false;
+ assert(Slices.size() > 1);
// Create one PHI per vector piece. The "VectorSlice" class takes care of
// creating the necessary instruction to extract the relevant slices of each
@@ -1977,6 +2016,8 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
return visitBitreverseIntrinsicInst(I);
case Intrinsic::minnum:
return visitMinNum(I);
+ case Intrinsic::sqrt:
+ return visitSqrt(I);
default:
return false;
}
@@ -2070,9 +2111,75 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
return true;
}
+static bool isOneOrNegOne(const Value *Val) {
+ const APFloat *C;
+ return match(Val, m_APFloat(C)) && C->getExactLog2Abs() == 0;
+}
+
+// Expand llvm.sqrt.f32 calls with !fpmath metadata in a semi-fast way.
+bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
+ Type *Ty = Sqrt.getType()->getScalarType();
+ if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST->has16BitInsts()))
+ return false;
+
+ const FPMathOperator *FPOp = cast<const FPMathOperator>(&Sqrt);
+ FastMathFlags SqrtFMF = FPOp->getFastMathFlags();
+
+ // We're trying to handle the fast-but-not-that-fast case only. The lowering
+ // of fast llvm.sqrt will give the raw instruction anyway.
+ if (SqrtFMF.approxFunc() || HasUnsafeFPMath)
+ return false;
+
+ const float ReqdAccuracy = FPOp->getFPAccuracy();
+
+ // Defer correctly rounded expansion to codegen.
+ if (ReqdAccuracy < 1.0f)
+ return false;
+
+ // FIXME: This is an ugly hack for this pass using forward iteration instead
+ // of reverse. If it worked like a normal combiner, the rsq would form before
+ // we saw a sqrt call.
+ auto *FDiv =
+ dyn_cast_or_null<FPMathOperator>(Sqrt.getUniqueUndroppableUser());
+ if (FDiv && FDiv->getOpcode() == Instruction::FDiv &&
+ FDiv->getFPAccuracy() >= 1.0f &&
+ canOptimizeWithRsq(FPOp, FDiv->getFastMathFlags(), SqrtFMF) &&
+ // TODO: We should also handle the arcp case for the fdiv with non-1 value
+ isOneOrNegOne(FDiv->getOperand(0)))
+ return false;
+
+ Value *SrcVal = Sqrt.getOperand(0);
+ bool CanTreatAsDAZ = canIgnoreDenormalInput(SrcVal, &Sqrt);
+
+ // The raw instruction is 1 ulp, but the correction for denormal handling
+ // brings it to 2.
+ if (!CanTreatAsDAZ && ReqdAccuracy < 2.0f)
+ return false;
+
+ IRBuilder<> Builder(&Sqrt);
+ SmallVector<Value *, 4> SrcVals;
+ extractValues(Builder, SrcVals, SrcVal);
+
+ SmallVector<Value *, 4> ResultVals(SrcVals.size());
+ for (int I = 0, E = SrcVals.size(); I != E; ++I) {
+ if (CanTreatAsDAZ)
+ ResultVals[I] = Builder.CreateCall(getSqrtF32(), SrcVals[I]);
+ else
+ ResultVals[I] = emitSqrtIEEE2ULP(Builder, SrcVals[I], SqrtFMF);
+ }
+
+ Value *NewSqrt = insertValues(Builder, Sqrt.getType(), ResultVals);
+ NewSqrt->takeName(&Sqrt);
+ Sqrt.replaceAllUsesWith(NewSqrt);
+ Sqrt.eraseFromParent();
+ return true;
+}
+
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Impl.Mod = &M;
Impl.DL = &Impl.Mod->getDataLayout();
+ Impl.SqrtF32 = nullptr;
+ Impl.LdexpF32 = nullptr;
return false;
}
@@ -2092,7 +2199,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
- SIModeRegisterDefaults Mode(F);
+ SIModeRegisterDefaults Mode(F, *Impl.ST);
Impl.HasFP32DenormalFlush =
Mode.FP32Denormals == DenormalMode::getPreserveSign();
return Impl.run(F);
@@ -2109,7 +2216,7 @@ PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
Impl.UA = &FAM.getResult<UniformityInfoAnalysis>(F);
Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
- SIModeRegisterDefaults Mode(F);
+ SIModeRegisterDefaults Mode(F, *Impl.ST);
Impl.HasFP32DenormalFlush =
Mode.FP32Denormals == DenormalMode::getPreserveSign();
PreservedAnalyses PA = PreservedAnalyses::none();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 892e1eef27a8..8d4cad4c07bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -139,19 +139,21 @@ def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
// Combines which should only apply on VI
def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>;
-def AMDGPUPreLegalizerCombiner: GICombinerHelper<
+def AMDGPUPreLegalizerCombiner: GICombiner<
"AMDGPUPreLegalizerCombinerImpl",
[all_combines, clamp_i64_to_i16, foldable_fneg]> {
+ let CombineAllMethodName = "tryCombineAllImpl";
}
-def AMDGPUPostLegalizerCombiner: GICombinerHelper<
+def AMDGPUPostLegalizerCombiner: GICombiner<
"AMDGPUPostLegalizerCombinerImpl",
[all_combines, gfx6gfx7_combines, gfx8_combines,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
rcp_sqrt_to_rsq, sign_extension_in_reg]> {
+ let CombineAllMethodName = "tryCombineAllImpl";
}
-def AMDGPURegBankCombiner : GICombinerHelper<
+def AMDGPURegBankCombiner : GICombiner<
"AMDGPURegBankCombinerImpl",
[unmerge_merge, unmerge_cst, unmerge_undef,
zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index 78fdedc0b511..69dc78d33c83 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -9,6 +9,7 @@
#include "AMDGPUCombinerHelper.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Target/TargetMachine.h"
@@ -28,6 +29,8 @@ static bool fnegFoldsIntoMI(const MachineInstr &MI) {
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_FMINIMUM:
+ case AMDGPU::G_FMAXIMUM:
case AMDGPU::G_FSIN:
case AMDGPU::G_FPEXT:
case AMDGPU::G_INTRINSIC_TRUNC:
@@ -42,7 +45,7 @@ static bool fnegFoldsIntoMI(const MachineInstr &MI) {
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
return true;
case AMDGPU::G_INTRINSIC: {
- unsigned IntrinsicID = MI.getIntrinsicID();
+ unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
@@ -66,8 +69,7 @@ static bool fnegFoldsIntoMI(const MachineInstr &MI) {
LLVM_READONLY
static bool opMustUseVOP3Encoding(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
- return MI.getNumOperands() >
- (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
+ return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) ||
MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
}
@@ -85,14 +87,16 @@ static bool hasSourceMods(const MachineInstr &MI) {
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR:
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
case AMDGPU::G_BITCAST:
case AMDGPU::G_ANYEXT:
case AMDGPU::G_BUILD_VECTOR:
case AMDGPU::G_BUILD_VECTOR_TRUNC:
case AMDGPU::G_PHI:
return false;
- case AMDGPU::G_INTRINSIC: {
- unsigned IntrinsicID = MI.getIntrinsicID();
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
+ unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_interp_p1:
case Intrinsic::amdgcn_interp_p2:
@@ -172,6 +176,10 @@ static unsigned inverseMinMax(unsigned Opc) {
return AMDGPU::G_FMINNUM_IEEE;
case AMDGPU::G_FMINNUM_IEEE:
return AMDGPU::G_FMAXNUM_IEEE;
+ case AMDGPU::G_FMAXIMUM:
+ return AMDGPU::G_FMINIMUM;
+ case AMDGPU::G_FMINIMUM:
+ return AMDGPU::G_FMAXIMUM;
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
return AMDGPU::G_AMDGPU_FMIN_LEGACY;
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
@@ -205,6 +213,8 @@ bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_FMINIMUM:
+ case AMDGPU::G_FMAXIMUM:
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
// 0 doesn't have a negated inline immediate.
@@ -227,8 +237,9 @@ bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
return true;
- case AMDGPU::G_INTRINSIC: {
- unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
+ unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
@@ -301,6 +312,8 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_FMINIMUM:
+ case AMDGPU::G_FMAXIMUM:
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
NegateOperand(MatchInfo->getOperand(1));
@@ -326,8 +339,9 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
case AMDGPU::G_FPTRUNC:
NegateOperand(MatchInfo->getOperand(1));
break;
- case AMDGPU::G_INTRINSIC: {
- unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
+ unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index a13447586bd4..3afefcf55d49 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -53,13 +53,22 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
//
// extern "C" void * __init_array_start[];
// extern "C" void * __init_array_end[];
+// extern "C" void * __fini_array_start[];
+// extern "C" void * __fini_array_end[];
//
// using InitCallback = void();
+// using FiniCallback = void(void);
//
// void call_init_array_callbacks() {
// for (auto start = __init_array_start; start != __init_array_end; ++start)
// reinterpret_cast<InitCallback *>(*start)();
// }
+//
+// void call_fini_array_callbacks() {
+// size_t fini_array_size = __fini_array_end - __fini_array_start;
+// for (size_t i = fini_array_size; i > 0; --i)
+// reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
+// }
static void createInitOrFiniCalls(Function &F, bool IsCtor) {
Module &M = *F.getParent();
LLVMContext &C = M.getContext();
@@ -96,15 +105,37 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) {
// for now we just call them with no arguments.
auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});
- IRB.CreateCondBr(IRB.CreateICmpNE(Begin, End), LoopBB, ExitBB);
+ Value *Start = Begin;
+ Value *Stop = End;
+ // The destructor array must be called in reverse order. Get a constant
+ // expression to the end of the array and iterate backwards instead.
+ if (!IsCtor) {
+ Type *Int64Ty = IntegerType::getInt64Ty(C);
+ auto *EndPtr = IRB.CreatePtrToInt(End, Int64Ty);
+ auto *BeginPtr = IRB.CreatePtrToInt(Begin, Int64Ty);
+ auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr);
+ auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3));
+ auto *Offset = IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1));
+ Start = IRB.CreateInBoundsGEP(
+ ArrayType::get(IRB.getPtrTy(), 0), Begin,
+ ArrayRef<Value *>({ConstantInt::get(Int64Ty, 0), Offset}));
+ Stop = Begin;
+ }
+
+ IRB.CreateCondBr(
+ IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start,
+ Stop),
+ LoopBB, ExitBB);
IRB.SetInsertPoint(LoopBB);
auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");
- auto *CallBack = IRB.CreateLoad(CallBackTy->getPointerTo(F.getAddressSpace()),
+ auto *CallBack = IRB.CreateLoad(IRB.getPtrTy(F.getAddressSpace()),
CallBackPHI, "callback");
IRB.CreateCall(CallBackTy, CallBack);
- auto *NewCallBack = IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, 1, "next");
- auto *EndCmp = IRB.CreateICmpEQ(NewCallBack, End, "end");
- CallBackPHI->addIncoming(Begin, &F.getEntryBlock());
+ auto *NewCallBack =
+ IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, IsCtor ? 1 : -1, "next");
+ auto *EndCmp = IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,
+ NewCallBack, Stop, "end");
+ CallBackPHI->addIncoming(Start, &F.getEntryBlock());
CallBackPHI->addIncoming(NewCallBack, LoopBB);
IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);
IRB.SetInsertPoint(ExitBB);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 37df4f68c265..2b85024a9b40 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -105,6 +105,11 @@ def gi_global_saddr :
def gi_mubuf_scratch_offset :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
GIComplexPatternEquiv<MUBUFScratchOffset>;
+
+def gi_buf_soffset :
+ GIComplexOperandMatcher<s32, "selectBUFSOffset">,
+ GIComplexPatternEquiv<BUFSOffset>;
+
def gi_mubuf_scratch_offen :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
GIComplexPatternEquiv<MUBUFScratchOffen>;
@@ -379,3 +384,6 @@ def gi_set_glc : GICustomOperandRenderer<"renderSetGLC">,
def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameIndex">,
GISDNodeXFormEquiv<frameindex_to_targetframeindex>;
+
+def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
+ GISDNodeXFormEquiv<FPPow2ToExponentXForm>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
new file mode 100644
index 000000000000..4cd8b1ec1051
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -0,0 +1,68 @@
+//===-- AMDGPUGlobalISelDivergenceLowering.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// GlobalISel pass that selects divergent i1 phis as lane mask phis.
+/// Lane mask merging uses same algorithm as SDAG in SILowerI1Copies.
+/// Handles all cases of temporal divergence.
+/// For divergent non-phi i1 and uniform i1 uses outside of the cycle this pass
+/// currently depends on LCSSA to insert phis with one incoming.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ AMDGPUGlobalISelDivergenceLowering() : MachineFunctionPass(ID) {
+ initializeAMDGPUGlobalISelDivergenceLoweringPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "AMDGPU GlobalISel divergence lowering";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
+ "AMDGPU GlobalISel divergence lowering", false, false)
+INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
+ "AMDGPU GlobalISel divergence lowering", false, false)
+
+char AMDGPUGlobalISelDivergenceLowering::ID = 0;
+
+char &llvm::AMDGPUGlobalISelDivergenceLoweringID =
+ AMDGPUGlobalISelDivergenceLowering::ID;
+
+FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
+ return new AMDGPUGlobalISelDivergenceLowering();
+}
+
+bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
+ MachineFunction &MF) {
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 09930dc9612c..5a756602eb1a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -18,7 +18,7 @@ using namespace MIPatternMatch;
std::pair<Register, unsigned>
AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg,
- GISelKnownBits *KnownBits) {
+ GISelKnownBits *KnownBits, bool CheckNUW) {
MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (Def->getOpcode() == TargetOpcode::G_CONSTANT) {
unsigned Offset;
@@ -33,6 +33,12 @@ AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg,
int64_t Offset;
if (Def->getOpcode() == TargetOpcode::G_ADD) {
+ // A 32-bit (address + offset) should not cause unsigned 32-bit integer
+ // wraparound, because s_load instructions perform the addition in 64 bits.
+ if (CheckNUW && !Def->getFlag(MachineInstr::NoUWrap)) {
+ assert(MRI.getType(Reg).getScalarSizeInBits() == 32);
+ return std::pair(Reg, 0);
+ }
// TODO: Handle G_OR used for add case
if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(Offset)))
return std::pair(Def->getOperand(1).getReg(), Offset);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
index ff4edf02a84d..5ee888d9db00 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
@@ -9,7 +9,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/Register.h"
#include <utility>
@@ -25,7 +24,8 @@ namespace AMDGPU {
/// Returns base register and constant offset.
std::pair<Register, unsigned>
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg,
- GISelKnownBits *KnownBits = nullptr);
+ GISelKnownBits *KnownBits = nullptr,
+ bool CheckNUW = false);
bool hasAtomicFaddRtnForTy(const GCNSubtarget &Subtarget, const LLT &Ty);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index dadc0c92ef8b..b51a876750b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -49,443 +49,14 @@ namespace AMDGPU {
namespace HSAMD {
//===----------------------------------------------------------------------===//
-// HSAMetadataStreamerV2
-//===----------------------------------------------------------------------===//
-void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const {
- errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
-}
-
-void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const {
- errs() << "AMDGPU HSA Metadata Parser Test: ";
-
- HSAMD::Metadata FromHSAMetadataString;
- if (fromString(HSAMetadataString, FromHSAMetadataString)) {
- errs() << "FAIL\n";
- return;
- }
-
- std::string ToHSAMetadataString;
- if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
- errs() << "FAIL\n";
- return;
- }
-
- errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
- << '\n';
- if (HSAMetadataString != ToHSAMetadataString) {
- errs() << "Original input: " << HSAMetadataString << '\n'
- << "Produced output: " << ToHSAMetadataString << '\n';
- }
-}
-
-AccessQualifier
-MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const {
- if (AccQual.empty())
- return AccessQualifier::Unknown;
-
- return StringSwitch<AccessQualifier>(AccQual)
- .Case("read_only", AccessQualifier::ReadOnly)
- .Case("write_only", AccessQualifier::WriteOnly)
- .Case("read_write", AccessQualifier::ReadWrite)
- .Default(AccessQualifier::Default);
-}
-
-AddressSpaceQualifier
-MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const {
- switch (AddressSpace) {
- case AMDGPUAS::PRIVATE_ADDRESS:
- return AddressSpaceQualifier::Private;
- case AMDGPUAS::GLOBAL_ADDRESS:
- return AddressSpaceQualifier::Global;
- case AMDGPUAS::CONSTANT_ADDRESS:
- return AddressSpaceQualifier::Constant;
- case AMDGPUAS::LOCAL_ADDRESS:
- return AddressSpaceQualifier::Local;
- case AMDGPUAS::FLAT_ADDRESS:
- return AddressSpaceQualifier::Generic;
- case AMDGPUAS::REGION_ADDRESS:
- return AddressSpaceQualifier::Region;
- default:
- return AddressSpaceQualifier::Unknown;
- }
-}
-
-ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual,
- StringRef BaseTypeName) const {
- if (TypeQual.contains("pipe"))
- return ValueKind::Pipe;
-
- return StringSwitch<ValueKind>(BaseTypeName)
- .Case("image1d_t", ValueKind::Image)
- .Case("image1d_array_t", ValueKind::Image)
- .Case("image1d_buffer_t", ValueKind::Image)
- .Case("image2d_t", ValueKind::Image)
- .Case("image2d_array_t", ValueKind::Image)
- .Case("image2d_array_depth_t", ValueKind::Image)
- .Case("image2d_array_msaa_t", ValueKind::Image)
- .Case("image2d_array_msaa_depth_t", ValueKind::Image)
- .Case("image2d_depth_t", ValueKind::Image)
- .Case("image2d_msaa_t", ValueKind::Image)
- .Case("image2d_msaa_depth_t", ValueKind::Image)
- .Case("image3d_t", ValueKind::Image)
- .Case("sampler_t", ValueKind::Sampler)
- .Case("queue_t", ValueKind::Queue)
- .Default(isa<PointerType>(Ty) ?
- (Ty->getPointerAddressSpace() ==
- AMDGPUAS::LOCAL_ADDRESS ?
- ValueKind::DynamicSharedPointer :
- ValueKind::GlobalBuffer) :
- ValueKind::ByValue);
-}
-
-std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const {
- switch (Ty->getTypeID()) {
- case Type::IntegerTyID: {
- if (!Signed)
- return (Twine('u') + getTypeName(Ty, true)).str();
-
- auto BitWidth = Ty->getIntegerBitWidth();
- switch (BitWidth) {
- case 8:
- return "char";
- case 16:
- return "short";
- case 32:
- return "int";
- case 64:
- return "long";
- default:
- return (Twine('i') + Twine(BitWidth)).str();
- }
- }
- case Type::HalfTyID:
- return "half";
- case Type::FloatTyID:
- return "float";
- case Type::DoubleTyID:
- return "double";
- case Type::FixedVectorTyID: {
- auto VecTy = cast<FixedVectorType>(Ty);
- auto ElTy = VecTy->getElementType();
- auto NumElements = VecTy->getNumElements();
- return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
- }
- default:
- return "unknown";
- }
-}
-
-std::vector<uint32_t>
-MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const {
- std::vector<uint32_t> Dims;
- if (Node->getNumOperands() != 3)
- return Dims;
-
- for (auto &Op : Node->operands())
- Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
- return Dims;
-}
-
-Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps(
- const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
- const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
- HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
- const Function &F = MF.getFunction();
-
- assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
- F.getCallingConv() == CallingConv::SPIR_KERNEL);
-
- Align MaxKernArgAlign;
- HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
- MaxKernArgAlign);
- HSACodeProps.mKernargSegmentAlign =
- std::max(MaxKernArgAlign, Align(4)).value();
-
- HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
- HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
- HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
- HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
- HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
- HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
- HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
- HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
- HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
- HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
-
- return HSACodeProps;
-}
-
-Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps(
- const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
- return HSAMD::Kernel::DebugProps::Metadata();
-}
-
-void MetadataStreamerYamlV2::emitVersion() {
- auto &Version = HSAMetadata.mVersion;
-
- Version.push_back(VersionMajorV2);
- Version.push_back(VersionMinorV2);
-}
-
-void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) {
- auto &Printf = HSAMetadata.mPrintf;
-
- auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
- if (!Node)
- return;
-
- for (auto *Op : Node->operands())
- if (Op->getNumOperands())
- Printf.push_back(
- std::string(cast<MDString>(Op->getOperand(0))->getString()));
-}
-
-void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) {
- auto &Kernel = HSAMetadata.mKernels.back();
-
- // TODO: What about other languages?
- auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
- if (!Node || !Node->getNumOperands())
- return;
- auto Op0 = Node->getOperand(0);
- if (Op0->getNumOperands() <= 1)
- return;
-
- Kernel.mLanguage = "OpenCL C";
- Kernel.mLanguageVersion.push_back(
- mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
- Kernel.mLanguageVersion.push_back(
- mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
-}
-
-void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) {
- auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
-
- if (auto Node = Func.getMetadata("reqd_work_group_size"))
- Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
- if (auto Node = Func.getMetadata("work_group_size_hint"))
- Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
- if (auto Node = Func.getMetadata("vec_type_hint")) {
- Attrs.mVecTypeHint = getTypeName(
- cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
- mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
- }
- if (Func.hasFnAttribute("runtime-handle")) {
- Attrs.mRuntimeHandle =
- Func.getFnAttribute("runtime-handle").getValueAsString().str();
- }
-}
-
-void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func,
- const GCNSubtarget &ST) {
- for (auto &Arg : Func.args())
- emitKernelArg(Arg);
-
- emitHiddenKernelArgs(Func, ST);
-}
-
-void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) {
- auto Func = Arg.getParent();
- auto ArgNo = Arg.getArgNo();
- const MDNode *Node;
-
- StringRef Name;
- Node = Func->getMetadata("kernel_arg_name");
- if (Node && ArgNo < Node->getNumOperands())
- Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
- else if (Arg.hasName())
- Name = Arg.getName();
-
- StringRef TypeName;
- Node = Func->getMetadata("kernel_arg_type");
- if (Node && ArgNo < Node->getNumOperands())
- TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
-
- StringRef BaseTypeName;
- Node = Func->getMetadata("kernel_arg_base_type");
- if (Node && ArgNo < Node->getNumOperands())
- BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
-
- StringRef AccQual;
- if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
- Arg.hasNoAliasAttr()) {
- AccQual = "read_only";
- } else {
- Node = Func->getMetadata("kernel_arg_access_qual");
- if (Node && ArgNo < Node->getNumOperands())
- AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
- }
-
- StringRef TypeQual;
- Node = Func->getMetadata("kernel_arg_type_qual");
- if (Node && ArgNo < Node->getNumOperands())
- TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
-
- const DataLayout &DL = Func->getParent()->getDataLayout();
-
- MaybeAlign PointeeAlign;
- if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
- if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- // FIXME: Should report this for all address spaces
- PointeeAlign = Arg.getParamAlign().valueOrOne();
- }
- }
-
- Type *ArgTy;
- Align ArgAlign;
- std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
-
- emitKernelArg(DL, ArgTy, ArgAlign,
- getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
- TypeName, BaseTypeName, AccQual, TypeQual);
-}
-
-void MetadataStreamerYamlV2::emitKernelArg(
- const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind,
- MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName,
- StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) {
- HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
- auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
-
- Arg.mName = std::string(Name);
- Arg.mTypeName = std::string(TypeName);
- Arg.mSize = DL.getTypeAllocSize(Ty);
- Arg.mAlign = Alignment.value();
- Arg.mValueKind = ValueKind;
- Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
-
- if (auto PtrTy = dyn_cast<PointerType>(Ty))
- Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
-
- Arg.mAccQual = getAccessQualifier(AccQual);
-
- // TODO: Emit Arg.mActualAccQual.
-
- SmallVector<StringRef, 1> SplitTypeQuals;
- TypeQual.split(SplitTypeQuals, " ", -1, false);
- for (StringRef Key : SplitTypeQuals) {
- auto P = StringSwitch<bool*>(Key)
- .Case("const", &Arg.mIsConst)
- .Case("restrict", &Arg.mIsRestrict)
- .Case("volatile", &Arg.mIsVolatile)
- .Case("pipe", &Arg.mIsPipe)
- .Default(nullptr);
- if (P)
- *P = true;
- }
-}
-
-void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func,
- const GCNSubtarget &ST) {
- unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
- if (!HiddenArgNumBytes)
- return;
-
- auto &DL = Func.getParent()->getDataLayout();
- auto Int64Ty = Type::getInt64Ty(Func.getContext());
-
- if (HiddenArgNumBytes >= 8)
- emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
- if (HiddenArgNumBytes >= 16)
- emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
- if (HiddenArgNumBytes >= 24)
- emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
-
- auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
- AMDGPUAS::GLOBAL_ADDRESS);
-
- if (HiddenArgNumBytes >= 32) {
- // We forbid the use of features requiring hostcall when compiling OpenCL
- // before code object V5, which makes the mutual exclusion between the
- // "printf buffer" and "hostcall buffer" here sound.
- if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
- else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
- else
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
- }
-
- // Emit "default queue" and "completion action" arguments if enqueue kernel is
- // used, otherwise emit dummy "none" arguments.
- if (HiddenArgNumBytes >= 40) {
- if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
- } else {
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
- }
- }
-
- if (HiddenArgNumBytes >= 48) {
- if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
- } else {
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
- }
- }
-
- // Emit the pointer argument for multi-grid object.
- if (HiddenArgNumBytes >= 56) {
- if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
- else
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
- }
-}
-
-bool MetadataStreamerYamlV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
- return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
-}
-
-void MetadataStreamerYamlV2::begin(const Module &Mod,
- const IsaInfo::AMDGPUTargetID &TargetID) {
- emitVersion();
- emitPrintf(Mod);
-}
-
-void MetadataStreamerYamlV2::end() {
- std::string HSAMetadataString;
- if (toString(HSAMetadata, HSAMetadataString))
- return;
-
- if (DumpHSAMetadata)
- dump(HSAMetadataString);
- if (VerifyHSAMetadata)
- verify(HSAMetadataString);
-}
-
-void MetadataStreamerYamlV2::emitKernel(const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) {
- auto &Func = MF.getFunction();
- if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
- return;
-
- auto CodeProps = getHSACodeProps(MF, ProgramInfo);
- auto DebugProps = getHSADebugProps(MF, ProgramInfo);
-
- HSAMetadata.mKernels.push_back(Kernel::Metadata());
- auto &Kernel = HSAMetadata.mKernels.back();
-
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- Kernel.mName = std::string(Func.getName());
- Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
- emitKernelLanguage(Func);
- emitKernelAttrs(Func);
- emitKernelArgs(Func, ST);
- HSAMetadata.mKernels.back().mCodeProps = CodeProps;
- HSAMetadata.mKernels.back().mDebugProps = DebugProps;
-}
-
-//===----------------------------------------------------------------------===//
-// HSAMetadataStreamerV3
+// HSAMetadataStreamerV4
//===----------------------------------------------------------------------===//
-void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const {
+void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
}
-void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
+void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata Parser Test: ";
msgpack::Document FromHSAMetadataString;
@@ -507,7 +78,7 @@ void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
}
std::optional<StringRef>
-MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const {
+MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
return StringSwitch<std::optional<StringRef>>(AccQual)
.Case("read_only", StringRef("read_only"))
.Case("write_only", StringRef("write_only"))
@@ -515,7 +86,7 @@ MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const {
.Default(std::nullopt);
}
-std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier(
+std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
unsigned AddressSpace) const {
switch (AddressSpace) {
case AMDGPUAS::PRIVATE_ADDRESS:
@@ -536,7 +107,7 @@ std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier(
}
StringRef
-MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual,
+MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const {
if (TypeQual.contains("pipe"))
return "pipe";
@@ -563,7 +134,7 @@ MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual,
: "by_value");
}
-std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty,
+std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
bool Signed) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
@@ -602,7 +173,7 @@ std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty,
}
msgpack::ArrayDocNode
-MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const {
+MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
auto Dims = HSAMetadataDoc->getArrayNode();
if (Node->getNumOperands() != 3)
return Dims;
@@ -613,14 +184,20 @@ MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const {
return Dims;
}
-void MetadataStreamerMsgPackV3::emitVersion() {
+void MetadataStreamerMsgPackV4::emitVersion() {
auto Version = HSAMetadataDoc->getArrayNode();
- Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
- Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
+ Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
+ Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
getRootMetadata("amdhsa.version") = Version;
}
-void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) {
+void MetadataStreamerMsgPackV4::emitTargetID(
+ const IsaInfo::AMDGPUTargetID &TargetID) {
+ getRootMetadata("amdhsa.target") =
+ HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
+}
+
+void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
if (!Node)
return;
@@ -633,7 +210,7 @@ void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) {
getRootMetadata("amdhsa.printf") = Printf;
}
-void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func,
+void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
msgpack::MapDocNode Kern) {
// TODO: What about other languages?
auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
@@ -652,7 +229,7 @@ void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func,
Kern[".language_version"] = LanguageVersion;
}
-void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func,
+void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,
msgpack::MapDocNode Kern) {
if (auto Node = Func.getMetadata("reqd_work_group_size"))
@@ -677,7 +254,7 @@ void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func,
Kern[".kind"] = Kern.getDocument()->getNode("fini");
}
-void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF,
+void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
msgpack::MapDocNode Kern) {
auto &Func = MF.getFunction();
unsigned Offset = 0;
@@ -690,7 +267,7 @@ void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF,
Kern[".args"] = Args;
}
-void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg,
+void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
unsigned &Offset,
msgpack::ArrayDocNode Args) {
auto Func = Arg.getParent();
@@ -714,16 +291,20 @@ void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg,
if (Node && ArgNo < Node->getNumOperands())
BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
- StringRef AccQual;
- if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
- Arg.hasNoAliasAttr()) {
- AccQual = "read_only";
- } else {
- Node = Func->getMetadata("kernel_arg_access_qual");
- if (Node && ArgNo < Node->getNumOperands())
- AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ StringRef ActAccQual;
+ // Do we really need NoAlias check here?
+ if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
+ if (Arg.onlyReadsMemory())
+ ActAccQual = "read_only";
+ else if (Arg.hasAttribute(Attribute::WriteOnly))
+ ActAccQual = "write_only";
}
+ StringRef AccQual;
+ Node = Func->getMetadata("kernel_arg_access_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
StringRef TypeQual;
Node = Func->getMetadata("kernel_arg_type_qual");
if (Node && ArgNo < Node->getNumOperands())
@@ -747,14 +328,15 @@ void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg,
emitKernelArg(DL, ArgTy, ArgAlign,
getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
- PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
+ PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
+ AccQual, TypeQual);
}
-void MetadataStreamerMsgPackV3::emitKernelArg(
+void MetadataStreamerMsgPackV4::emitKernelArg(
const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
StringRef Name, StringRef TypeName, StringRef BaseTypeName,
- StringRef AccQual, StringRef TypeQual) {
+ StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
auto Arg = Args.getDocument()->getMapNode();
if (!Name.empty())
@@ -780,7 +362,8 @@ void MetadataStreamerMsgPackV3::emitKernelArg(
if (auto AQ = getAccessQualifier(AccQual))
Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
- // TODO: Emit Arg[".actual_access"].
+ if (auto AAQ = getAccessQualifier(ActAccQual))
+ Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true);
SmallVector<StringRef, 1> SplitTypeQuals;
TypeQual.split(SplitTypeQuals, " ", -1, false);
@@ -798,7 +381,7 @@ void MetadataStreamerMsgPackV3::emitKernelArg(
Args.push_back(Arg);
}
-void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
+void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
auto &Func = MF.getFunction();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -824,7 +407,7 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
Args);
auto Int8PtrTy =
- Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+ PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
if (HiddenArgNumBytes >= 32) {
// We forbid the use of features requiring hostcall when compiling OpenCL
@@ -871,9 +454,10 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
}
}
-msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
- const MachineFunction &MF, const SIProgramInfo &ProgramInfo,
- unsigned CodeObjectVersion) const {
+msgpack::MapDocNode
+MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo,
+ unsigned CodeObjectVersion) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
const Function &F = MF.getFunction();
@@ -918,18 +502,19 @@ msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
return Kern;
}
-bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
}
-void MetadataStreamerMsgPackV3::begin(const Module &Mod,
+void MetadataStreamerMsgPackV4::begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) {
emitVersion();
+ emitTargetID(TargetID);
emitPrintf(Mod);
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
}
-void MetadataStreamerMsgPackV3::end() {
+void MetadataStreamerMsgPackV4::end() {
std::string HSAMetadataString;
raw_string_ostream StrOS(HSAMetadataString);
HSAMetadataDoc->toYAML(StrOS);
@@ -940,7 +525,7 @@ void MetadataStreamerMsgPackV3::end() {
verify(StrOS.str());
}
-void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF,
+void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
@@ -966,31 +551,6 @@ void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF,
}
//===----------------------------------------------------------------------===//
-// HSAMetadataStreamerV4
-//===----------------------------------------------------------------------===//
-
-void MetadataStreamerMsgPackV4::emitVersion() {
- auto Version = HSAMetadataDoc->getArrayNode();
- Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
- Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
- getRootMetadata("amdhsa.version") = Version;
-}
-
-void MetadataStreamerMsgPackV4::emitTargetID(
- const IsaInfo::AMDGPUTargetID &TargetID) {
- getRootMetadata("amdhsa.target") =
- HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
-}
-
-void MetadataStreamerMsgPackV4::begin(const Module &Mod,
- const IsaInfo::AMDGPUTargetID &TargetID) {
- emitVersion();
- emitTargetID(TargetID);
- emitPrintf(Mod);
- getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
-}
-
-//===----------------------------------------------------------------------===//
// HSAMetadataStreamerV5
//===----------------------------------------------------------------------===//
@@ -1044,7 +604,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
Offset += 6; // Reserved.
auto Int8PtrTy =
- Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+ PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
if (M->getNamedMetadata("llvm.printf.fmts")) {
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
@@ -1097,13 +657,13 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
Offset += 8; // Skipped.
}
- if (MFI.hasQueuePtr())
+ if (MFI.getUserSGPRInfo().hasQueuePtr())
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
}
void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
msgpack::MapDocNode Kern) {
- MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern);
+ MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);
if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 7d7080e920f5..6d6bd86711b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -30,7 +30,6 @@ class MDNode;
class Module;
struct SIProgramInfo;
class Type;
-class GCNSubtarget;
namespace AMDGPU {
@@ -62,7 +61,7 @@ protected:
msgpack::MapDocNode Kern) = 0;
};
-class MetadataStreamerMsgPackV3 : public MetadataStreamer {
+class MetadataStreamerMsgPackV4 : public MetadataStreamer {
protected:
std::unique_ptr<msgpack::Document> HSAMetadataDoc =
std::make_unique<msgpack::Document>();
@@ -89,6 +88,8 @@ protected:
void emitVersion() override;
+ void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
+
void emitPrintf(const Module &Mod);
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
@@ -105,8 +106,8 @@ protected:
msgpack::ArrayDocNode Args,
MaybeAlign PointeeAlign = std::nullopt,
StringRef Name = "", StringRef TypeName = "",
- StringRef BaseTypeName = "", StringRef AccQual = "",
- StringRef TypeQual = "");
+ StringRef BaseTypeName = "", StringRef ActAccQual = "",
+ StringRef AccQual = "", StringRef TypeQual = "");
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
msgpack::ArrayDocNode Args) override;
@@ -120,8 +121,8 @@ protected:
}
public:
- MetadataStreamerMsgPackV3() = default;
- ~MetadataStreamerMsgPackV3() = default;
+ MetadataStreamerMsgPackV4() = default;
+ ~MetadataStreamerMsgPackV4() = default;
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
@@ -134,19 +135,6 @@ public:
const SIProgramInfo &ProgramInfo) override;
};
-class MetadataStreamerMsgPackV4 : public MetadataStreamerMsgPackV3 {
-protected:
- void emitVersion() override;
- void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
-
-public:
- MetadataStreamerMsgPackV4() = default;
- ~MetadataStreamerMsgPackV4() = default;
-
- void begin(const Module &Mod,
- const IsaInfo::AMDGPUTargetID &TargetID) override;
-};
-
class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 {
protected:
void emitVersion() override;
@@ -159,82 +147,6 @@ public:
~MetadataStreamerMsgPackV5() = default;
};
-// TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2.
-class MetadataStreamerYamlV2 final : public MetadataStreamer {
-private:
- Metadata HSAMetadata;
-
- void dump(StringRef HSAMetadataString) const;
-
- void verify(StringRef HSAMetadataString) const;
-
- AccessQualifier getAccessQualifier(StringRef AccQual) const;
-
- AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const;
-
- ValueKind getValueKind(Type *Ty, StringRef TypeQual,
- StringRef BaseTypeName) const;
-
- std::string getTypeName(Type *Ty, bool Signed) const;
-
- std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
-
- Kernel::CodeProps::Metadata getHSACodeProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const;
- Kernel::DebugProps::Metadata getHSADebugProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const;
-
- void emitPrintf(const Module &Mod);
-
- void emitKernelLanguage(const Function &Func);
-
- void emitKernelAttrs(const Function &Func);
-
- void emitKernelArgs(const Function &Func, const GCNSubtarget &ST);
-
- void emitKernelArg(const Argument &Arg);
-
- void emitKernelArg(const DataLayout &DL, Type *Ty, Align Alignment,
- ValueKind ValueKind,
- MaybeAlign PointeeAlign = std::nullopt,
- StringRef Name = "", StringRef TypeName = "",
- StringRef BaseTypeName = "", StringRef AccQual = "",
- StringRef TypeQual = "");
-
- void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST);
-
- const Metadata &getHSAMetadata() const {
- return HSAMetadata;
- }
-
-protected:
- void emitVersion() override;
- void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
- msgpack::ArrayDocNode Args) override {
- llvm_unreachable("Dummy override should not be invoked!");
- }
- void emitKernelAttrs(const Function &Func,
- msgpack::MapDocNode Kern) override {
- llvm_unreachable("Dummy override should not be invoked!");
- }
-
-public:
- MetadataStreamerYamlV2() = default;
- ~MetadataStreamerYamlV2() = default;
-
- bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
-
- void begin(const Module &Mod,
- const IsaInfo::AMDGPUTargetID &TargetID) override;
-
- void end() override;
-
- void emitKernel(const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) override;
-};
-
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index ffa6c88f9d41..0a17b1536040 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -345,13 +345,13 @@ class PipelineSolver {
// return the number of edges missed.
int addEdges(SmallVectorImpl<SchedGroup> &SyncPipeline, SUnit *SU, int SGID,
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
- // Link the pipeline as if \p SU was in the SchedGroup with ID \p SGID. It
- // returns the cost (in terms of missed pipeline edges), and tracks the edges
- // added in \p AddedEdges
+ /// Link the pipeline as if \p SU was in the SchedGroup with ID \p SGID. It
+ /// returns the cost (in terms of missed pipeline edges), and tracks the edges
+ /// added in \p AddedEdges
template <typename T>
int linkSUnit(SUnit *SU, int SGID,
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges, T I, T E);
- // Remove the edges passed via \p AddedEdges
+ /// Remove the edges passed via \p AddedEdges
void removeEdges(const std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
// Convert the passed in maps to arrays for bidirectional iterators
void convertSyncMapsToArrays();
@@ -593,11 +593,10 @@ void PipelineSolver::populateReadyList(
for (; I != E; ++I) {
std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
int CandSGID = *I;
- SchedGroup *Match;
- for (auto &SG : SyncPipeline) {
- if (SG.getSGID() == CandSGID)
- Match = &SG;
- }
+ SchedGroup *Match = llvm::find_if(SyncPipeline, [CandSGID](SchedGroup &SG) {
+ return SG.getSGID() == CandSGID;
+ });
+ assert(Match);
if (UseCostHeur) {
if (Match->isFull()) {
@@ -739,11 +738,10 @@ void PipelineSolver::greedyFind(
for (; I != E; ++I) {
std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
int CandSGID = *I;
- SchedGroup *Match;
- for (auto &SG : SyncPipeline) {
- if (SG.getSGID() == CandSGID)
- Match = &SG;
- }
+ SchedGroup *Match = llvm::find_if(SyncPipeline, [CandSGID](SchedGroup &SG) {
+ return SG.getSGID() == CandSGID;
+ });
+ assert(Match);
LLVM_DEBUG(dbgs() << "Trying SGID # " << CandSGID << " with Mask "
<< (int)Match->getMask() << "\n");
@@ -849,10 +847,11 @@ protected:
const SIInstrInfo *TII;
public:
- // Add SchedGroups to \p Pipeline to implement this Strategy.
+ /// Add SchedGroups to \p SyncedSchedGroups to implement this Strategy.
virtual void applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
- DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups) = 0;
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+ bool IsReentry) = 0;
// Returns true if this strategy should be applied to a ScheduleDAG.
virtual bool shouldApplyStrategy(ScheduleDAGInstrs *DAG) = 0;
@@ -870,7 +869,8 @@ private:
public:
void applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
- DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups) override;
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+ bool IsReentry) override;
bool shouldApplyStrategy(ScheduleDAGInstrs *DAG) override { return true; }
@@ -882,7 +882,8 @@ public:
void MFMASmallGemmOpt::applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
- DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups) {
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+ bool IsReentry) {
// Count the number of MFMA instructions.
unsigned MFMACount = 0;
for (const MachineInstr &I : *DAG)
@@ -964,11 +965,10 @@ private:
// Does the VALU have a DS_WRITE successor that is the same as other
// VALU already in the group. The V_PERMs will all share 1 DS_W succ
- return std::any_of(Cache->begin(), Cache->end(), [&SU](SUnit *Elt) {
- return std::any_of(SU->Succs.begin(), SU->Succs.end(),
- [&Elt](const SDep &ThisSucc) {
- return ThisSucc.getSUnit() == Elt;
- });
+ return llvm::any_of(*Cache, [&SU](SUnit *Elt) {
+ return llvm::any_of(SU->Succs, [&Elt](const SDep &ThisSucc) {
+ return ThisSucc.getSUnit() == Elt;
+ });
});
}
@@ -1045,8 +1045,8 @@ private:
: InstructionRule(TII, SGID, NeedsCache) {}
};
- // Whether the SU shares a V_PERM predecessor with any SU in the SchedGroup
- // that is /p Distance steps away
+ /// Whether the SU shares a V_PERM predecessor with any SU in the SchedGroup
+ /// that is \p Distance steps away
class SharesPredWithPrevNthGroup final : public InstructionRule {
private:
unsigned Distance = 1;
@@ -1078,16 +1078,18 @@ private:
Cache->push_back(Pred.getSUnit());
}
}
+
+ // If the other group has no PERM preds, then this group won't share any
+ if (!Cache->size())
+ return false;
}
- assert(Cache->size());
auto DAG = SyncPipe[0].DAG;
// Does the previous DS_WRITE share a V_PERM predecessor with this
// VMEM_READ
- return (
- std::any_of(Cache->begin(), Cache->end(), [&SU, &DAG](SUnit *Elt) {
- return DAG->IsReachable(const_cast<SUnit *>(SU), Elt);
- }));
+ return llvm::any_of(*Cache, [&SU, &DAG](SUnit *Elt) {
+ return DAG->IsReachable(const_cast<SUnit *>(SU), Elt);
+ });
}
SharesPredWithPrevNthGroup(unsigned Distance, const SIInstrInfo *TII,
unsigned SGID, bool NeedsCache = false)
@@ -1097,7 +1099,8 @@ private:
public:
void applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
- DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups) override;
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+ bool IsReentry) override;
bool shouldApplyStrategy(ScheduleDAGInstrs *DAG) override { return true; }
@@ -1107,14 +1110,20 @@ public:
}
};
+static unsigned DSWCount = 0;
+static unsigned DSWWithPermCount = 0;
+static unsigned DSWWithSharedVMEMCount = 0;
+
void MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
- DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups) {
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
+ bool IsReentry) {
unsigned MFMACount = 0;
- unsigned DSWCount = 0;
- unsigned DSWWithPermCount = 0;
- unsigned DSWWithSharedVMEMCount = 0;
unsigned DSRCount = 0;
+
+ assert((IsReentry || (DSWCount == 0 && DSWWithPermCount == 0 &&
+ DSWWithSharedVMEMCount == 0)) &&
+ "DSWCounters should be zero in pre-RA scheduling!");
SmallVector<SUnit *, 6> DSWithPerms;
for (auto &SU : DAG->SUnits) {
auto I = SU.getInstr();
@@ -1123,7 +1132,7 @@ void MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
else if (TII->isDS(*I)) {
if (I->mayLoad())
++DSRCount;
- else if (I->mayStore()) {
+ else if (I->mayStore() && !IsReentry) {
++DSWCount;
for (auto Pred : SU.Preds) {
if (Pred.getSUnit()->getInstr()->getOpcode() ==
@@ -1135,57 +1144,59 @@ void MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
}
}
}
- DSWWithPermCount = DSWithPerms.size();
- auto I = DSWithPerms.begin();
- auto E = DSWithPerms.end();
-
- // Get the count of DS_WRITES with V_PERM predecessors which
- // have loop carried dependencies (WAR) on the same VMEM_READs.
- // We consider partial overlap as a miss -- in other words,
- // for a given DS_W, we only consider another DS_W as matching
- // if there is a corresponding (in terms of the VMEM_R it uses) V_PERM pred
- // for every V_PERM pred of this DS_W.
- DenseMap<MachineInstr *, SUnit *> VMEMLookup;
- SmallVector<SUnit *, 6> Counted;
- for (; I != E; I++) {
- SUnit *Cand = nullptr;
- bool MissedAny = false;
- for (auto &Pred : (*I)->Preds) {
- if (Pred.getSUnit()->getInstr()->getOpcode() != AMDGPU::V_PERM_B32_e64)
- continue;
- if (Cand &&
- std::find(Counted.begin(), Counted.end(), Cand) != Counted.end())
- break;
-
- for (auto &Succ : Pred.getSUnit()->Succs) {
- auto MI = Succ.getSUnit()->getInstr();
- if (!TII->isVMEM(*MI) || !MI->mayLoad())
+ if (!IsReentry) {
+ DSWWithPermCount = DSWithPerms.size();
+ auto I = DSWithPerms.begin();
+ auto E = DSWithPerms.end();
+
+ // Get the count of DS_WRITES with V_PERM predecessors which
+ // have loop carried dependencies (WAR) on the same VMEM_READs.
+ // We consider partial overlap as a miss -- in other words,
+ // for a given DS_W, we only consider another DS_W as matching
+ // if there is a corresponding (in terms of the VMEM_R it uses) V_PERM pred
+ // for every V_PERM pred of this DS_W.
+ DenseMap<MachineInstr *, SUnit *> VMEMLookup;
+ SmallVector<SUnit *, 6> Counted;
+ for (; I != E; I++) {
+ SUnit *Cand = nullptr;
+ bool MissedAny = false;
+ for (auto &Pred : (*I)->Preds) {
+ if (Pred.getSUnit()->getInstr()->getOpcode() != AMDGPU::V_PERM_B32_e64)
continue;
- if (MissedAny || !VMEMLookup.size()) {
- MissedAny = true;
- VMEMLookup[MI] = *I;
- continue;
- }
+ if (Cand && llvm::is_contained(Counted, Cand))
+ break;
- if (!VMEMLookup.contains(MI)) {
- MissedAny = true;
- VMEMLookup[MI] = *I;
- continue;
- }
+ for (auto &Succ : Pred.getSUnit()->Succs) {
+ auto MI = Succ.getSUnit()->getInstr();
+ if (!TII->isVMEM(*MI) || !MI->mayLoad())
+ continue;
- Cand = VMEMLookup[MI];
- if (std::find(Counted.begin(), Counted.end(), Cand) != Counted.end()) {
- MissedAny = true;
- break;
+ if (MissedAny || !VMEMLookup.size()) {
+ MissedAny = true;
+ VMEMLookup[MI] = *I;
+ continue;
+ }
+
+ if (!VMEMLookup.contains(MI)) {
+ MissedAny = true;
+ VMEMLookup[MI] = *I;
+ continue;
+ }
+
+ Cand = VMEMLookup[MI];
+ if (llvm::is_contained(Counted, Cand)) {
+ MissedAny = true;
+ break;
+ }
}
}
- }
- if (!MissedAny && Cand) {
- DSWWithSharedVMEMCount += 2;
- Counted.push_back(Cand);
- Counted.push_back(*I);
+ if (!MissedAny && Cand) {
+ DSWWithSharedVMEMCount += 2;
+ Counted.push_back(Cand);
+ Counted.push_back(*I);
+ }
}
}
@@ -1401,7 +1412,11 @@ public:
// first created SchedGroup first.
bool IsBottomUp = 1;
+ // Whether or not this is a reentry into the IGroupLPDAGMutation.
+ bool IsReentry = false;
+
IGroupLPDAGMutation() = default;
+ IGroupLPDAGMutation(bool IsReentry) : IsReentry(IsReentry) {}
};
unsigned SchedGroup::NumSchedGroups = 0;
@@ -1689,7 +1704,7 @@ void IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
auto S = createIGLPStrategy(StrategyID, DAG, TII);
if (S->shouldApplyStrategy(DAG)) {
IsBottomUp = S->IsBottomUp;
- S->applyIGLPStrategy(SyncedInstrs, SyncedSchedGroups);
+ S->applyIGLPStrategy(SyncedInstrs, SyncedSchedGroups, IsReentry);
}
}
@@ -1697,8 +1712,13 @@ void IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
namespace llvm {
-std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation() {
- return std::make_unique<IGroupLPDAGMutation>();
+/// \p IsReentry specifes whether or not this is a reentry into the
+/// IGroupLPDAGMutation. Since there may be multiple scheduling passes on the
+/// same scheduling region (e.g. pre and post-RA scheduling / multiple
+/// scheduling "phases"), we can reenter this mutation framework more than once
+/// for a given region.
+std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(bool IsReentry) {
+ return std::make_unique<IGroupLPDAGMutation>(IsReentry);
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
index ae0faba0780d..3ec8be4f8892 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
@@ -14,7 +14,7 @@
namespace llvm {
-std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation();
+std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(bool IsReentry);
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 825c6f0acd0f..b0eac567ec9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -19,6 +19,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600RegisterInfo.h"
+#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -81,10 +82,9 @@ static bool isExtractHiElt(SDValue In, SDValue &Out) {
// same register.
static SDValue stripExtractLoElt(SDValue In) {
if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
- if (Idx->isZero() && In.getValueSizeInBits() <= 32)
- return In.getOperand(0);
- }
+ SDValue Idx = In.getOperand(1);
+ if (isNullConstant(Idx) && In.getValueSizeInBits() <= 32)
+ return In.getOperand(0);
}
if (In.getOpcode() == ISD::TRUNCATE) {
@@ -113,12 +113,12 @@ INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
/// This pass converts a legalized DAG into a AMDGPU-specific
// DAG, ready for instruction scheduling.
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new AMDGPUDAGToDAGISel(TM, OptLevel);
}
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM,
- CodeGenOpt::Level OptLevel)
+ CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, TM, OptLevel) {
EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
}
@@ -132,7 +132,7 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
}
#endif
Subtarget = &MF.getSubtarget<GCNSubtarget>();
- Mode = SIModeRegisterDefaults(MF.getFunction());
+ Mode = SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -164,6 +164,7 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
case ISD::FROUND:
case ISD::FFLOOR:
case ISD::FMINNUM:
@@ -596,11 +597,15 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
uint64_t Imm;
- if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
+ if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) {
Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
- else {
+ if (AMDGPU::isValid32BitLiteral(Imm, true))
+ break;
+ } else {
ConstantSDNode *C = cast<ConstantSDNode>(N);
Imm = C->getZExtValue();
+ if (AMDGPU::isValid32BitLiteral(Imm, false))
+ break;
}
SDLoc DL(N);
@@ -664,6 +669,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::BRCOND:
SelectBRCOND(N);
return;
+ case ISD::FP_EXTEND:
+ SelectFP_EXTEND(N);
+ return;
case AMDGPUISD::CVT_PKRTZ_F16_F32:
case AMDGPUISD::CVT_PKNORM_I16_F32:
case AMDGPUISD::CVT_PKNORM_U16_F32:
@@ -692,6 +700,14 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectINTRINSIC_VOID(N);
return;
}
+ case AMDGPUISD::WAVE_ADDRESS: {
+ SelectWAVE_ADDRESS(N);
+ return;
+ }
+ case ISD::STACKRESTORE: {
+ SelectSTACKRESTORE(N);
+ return;
+ }
}
SelectCode(N);
@@ -1136,13 +1152,69 @@ bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
return CurDAG->SignBitIsZero(Base);
}
-bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Base,
- uint64_t FlatVariant) const {
- if (FlatVariant != SIInstrFlags::FlatScratch)
+// Return whether the operation has NoUnsignedWrap property.
+static bool isNoUnsignedWrap(SDValue Addr) {
+ return (Addr.getOpcode() == ISD::ADD &&
+ Addr->getFlags().hasNoUnsignedWrap()) ||
+ Addr->getOpcode() == ISD::OR;
+}
+
+// Check that the base address of flat scratch load/store in the form of `base +
+// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
+// requirement). We always treat the first operand as the base address here.
+bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const {
+ if (isNoUnsignedWrap(Addr))
return true;
- // When value in 32-bit Base can be negative calculate scratch offset using
- // 32-bit add instruction, otherwise use Base(unsigned) + offset.
- return CurDAG->SignBitIsZero(Base);
+
+ // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
+ // values.
+ if (AMDGPU::isGFX12Plus(*Subtarget))
+ return true;
+
+ auto LHS = Addr.getOperand(0);
+ auto RHS = Addr.getOperand(1);
+
+ // If the immediate offset is negative and within certain range, the base
+ // address cannot also be negative. If the base is also negative, the sum
+ // would be either negative or much larger than the valid range of scratch
+ // memory a thread can access.
+ ConstantSDNode *ImmOp = nullptr;
+ if (Addr.getOpcode() == ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
+ if (ImmOp->getSExtValue() < 0 && ImmOp->getSExtValue() > -0x40000000)
+ return true;
+ }
+
+ return CurDAG->SignBitIsZero(LHS);
+}
+
+// Check address value in SGPR/VGPR are legal for flat scratch in the form
+// of: SGPR + VGPR.
+bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(SDValue Addr) const {
+ if (isNoUnsignedWrap(Addr))
+ return true;
+
+ auto LHS = Addr.getOperand(0);
+ auto RHS = Addr.getOperand(1);
+ return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
+}
+
+// Check address value in SGPR/VGPR are legal for flat scratch in the form
+// of: SGPR + VGPR + Imm.
+bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(SDValue Addr) const {
+ auto Base = Addr.getOperand(0);
+ auto *RHSImm = cast<ConstantSDNode>(Addr.getOperand(1));
+ // If the immediate offset is negative and within certain range, the base
+ // address cannot also be negative. If the base is also negative, the sum
+ // would be either negative or much larger than the valid range of scratch
+ // memory a thread can access.
+ if (isNoUnsignedWrap(Base) &&
+ (isNoUnsignedWrap(Addr) ||
+ (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
+ return true;
+
+ auto LHS = Base.getOperand(0);
+ auto RHS = Base.getOperand(1);
+ return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
}
// TODO: If offset is too big, put low 16-bit into offset.
@@ -1252,7 +1324,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ SOffset = Subtarget->hasRestrictedSOffset()
+ ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
+ : CurDAG->getTargetConstant(0, DL, MVT::i32);
ConstantSDNode *C1 = nullptr;
SDValue N0 = Addr;
@@ -1307,7 +1381,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
return true;
}
- if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) {
// Legal offset for instruction.
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
return true;
@@ -1381,7 +1456,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
// Don't fold null pointer.
if (Imm != NullPtr) {
- const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
SDValue HighBits =
CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
MachineSDNode *MovHighBits = CurDAG->getMachineNode(
@@ -1415,8 +1490,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
// Therefore it should be safe to fold any VGPR offset on gfx9 into the
// MUBUF vaddr, but not on older subtargets which can only do this if the
// sign bit is known 0.
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
+ if (TII->isLegalMUBUFImmOffset(C1->getZExtValue()) &&
(!Subtarget->privateMemoryResourceIsRangeChecked() ||
CurDAG->SignBitIsZero(N0))) {
std::tie(VAddr, SOffset) = foldFrameIndex(N0);
@@ -1448,6 +1524,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
SDValue &Offset) const {
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
MachineFunction &MF = CurDAG->getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SDLoc DL(Addr);
@@ -1464,14 +1541,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
if (Addr.getOpcode() == ISD::ADD) {
// Add (CopyFromReg <sgpr>) <constant>
CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
+ if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))
return false;
if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
return false;
SOffset = Addr.getOperand(0);
} else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
- SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
+ TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
// <constant>
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
} else {
@@ -1488,8 +1565,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset
) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
return false;
@@ -1510,6 +1586,21 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return false;
}
+bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
+ SDValue &SOffset) const {
+ if (Subtarget->hasRestrictedSOffset()) {
+ if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) {
+ if (SOffsetConst->isZero()) {
+ SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ SOffset = ByteOffsetNode;
+ return true;
+}
+
// Find a load or store from corresponding pattern root.
// Roots may be build_vector, bitconvert or their combinations.
static MemSDNode* findMemSDNode(SDNode *N) {
@@ -1539,7 +1630,8 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
SDValue N0, N1;
if (isBaseWithConstantOffset64(Addr, N0, N1) &&
- isFlatScratchBaseLegal(N0, FlatVariant)) {
+ (FlatVariant != SIInstrFlags::FlatScratch ||
+ isFlatScratchBaseLegal(Addr))) {
int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
const SIInstrInfo *TII = Subtarget->getInstrInfo();
@@ -1614,7 +1706,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
}
VAddr = Addr;
- Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
+ Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
return true;
}
@@ -1682,7 +1774,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
VOffset = SDValue(VMov, 0);
SAddr = LHS;
- Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+ Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
return true;
}
}
@@ -1722,7 +1814,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
}
if (SAddr) {
- Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+ Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}
}
@@ -1738,7 +1830,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
VOffset = SDValue(VMov, 0);
- Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+ Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}
@@ -1771,8 +1863,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
int64_t COffsetVal = 0;
- if (CurDAG->isBaseWithConstantOffset(Addr) &&
- isFlatScratchBaseLegal(Addr.getOperand(0))) {
+ if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
SAddr = Addr.getOperand(0);
} else {
@@ -1829,6 +1920,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
int64_t ImmOffset = 0;
SDValue LHS, RHS;
+ SDValue OrigAddr = Addr;
if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
const SIInstrInfo *TII = Subtarget->getInstrInfo();
@@ -1850,7 +1942,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
VAddr = SDValue(VMov, 0);
SAddr = LHS;
- if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
+ if (!isFlatScratchBaseLegal(Addr))
return false;
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
return false;
@@ -1876,8 +1968,13 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return false;
}
- if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
- return false;
+ if (OrigAddr != Addr) {
+ if (!isFlatScratchBaseLegalSVImm(OrigAddr))
+ return false;
+ } else {
+ if (!isFlatScratchBaseLegalSV(OrigAddr))
+ return false;
+ }
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
return false;
@@ -2249,6 +2346,33 @@ bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
return false;
}
+static SDValue combineBallotPattern(SDValue VCMP, bool &Negate) {
+ assert(VCMP->getOpcode() == AMDGPUISD::SETCC);
+ // Special case for amdgcn.ballot:
+ // %Cond = i1 (and/or combination of i1 ISD::SETCCs)
+ // %VCMP = i(WaveSize) AMDGPUISD::SETCC (ext %Cond), 0, setne/seteq
+ // =>
+ // Use i1 %Cond value instead of i(WaveSize) %VCMP.
+ // This is possible because divergent ISD::SETCC is selected as V_CMP and
+ // Cond becomes a i(WaveSize) full mask value.
+ // Note that ballot doesn't use SETEQ condition but its easy to support it
+ // here for completeness, so in this case Negate is set true on return.
+ auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
+ if ((VCMP_CC == ISD::SETEQ || VCMP_CC == ISD::SETNE) &&
+ isNullConstant(VCMP.getOperand(1))) {
+
+ auto Cond = VCMP.getOperand(0);
+ if (ISD::isExtOpcode(Cond->getOpcode())) // Skip extension.
+ Cond = Cond.getOperand(0);
+
+ if (isBoolSGPR(Cond)) {
+ Negate = VCMP_CC == ISD::SETEQ;
+ return Cond;
+ }
+ }
+ return SDValue();
+}
+
void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
SDValue Cond = N->getOperand(1);
@@ -2262,11 +2386,50 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
const SIRegisterInfo *TRI = ST->getRegisterInfo();
bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
- unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
+ bool AndExec = !UseSCCBr;
+ bool Negate = false;
+
+ if (Cond.getOpcode() == ISD::SETCC &&
+ Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
+ SDValue VCMP = Cond->getOperand(0);
+ auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ isNullConstant(Cond->getOperand(1)) &&
+ // TODO: make condition below an assert after fixing ballot bitwidth.
+ VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
+ // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
+ // %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
+ // BRCOND i1 %C, %BB
+ // =>
+ // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
+ // VCC = COPY i(WaveSize) %VCMP
+ // S_CBRANCH_VCCNZ/VCCZ %BB
+ Negate = CC == ISD::SETEQ;
+ bool NegatedBallot = false;
+ if (auto BallotCond = combineBallotPattern(VCMP, NegatedBallot)) {
+ Cond = BallotCond;
+ UseSCCBr = !BallotCond->isDivergent();
+ Negate = Negate ^ NegatedBallot;
+ } else {
+ // TODO: don't use SCC here assuming that AMDGPUISD::SETCC is always
+ // selected as V_CMP, but this may change for uniform condition.
+ Cond = VCMP;
+ UseSCCBr = false;
+ }
+ }
+ // Cond is either V_CMP resulted from AMDGPUISD::SETCC or a combination of
+ // V_CMPs resulted from ballot or ballot has uniform condition and SCC is
+ // used.
+ AndExec = false;
+ }
+
+ unsigned BrOp =
+ UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
+ : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
SDLoc SL(N);
- if (!UseSCCBr) {
+ if (AndExec) {
// This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
// analyzed what generates the vcc value, so we do not know whether vcc
// bits for disabled lanes are 0. Thus we need to mask out bits for
@@ -2296,6 +2459,22 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
VCC.getValue(0));
}
+void AMDGPUDAGToDAGISel::SelectFP_EXTEND(SDNode *N) {
+ if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 &&
+ !N->isDivergent()) {
+ SDValue Src = N->getOperand(0);
+ if (Src.getValueType() == MVT::f16) {
+ if (isExtractHiElt(Src, Src)) {
+ CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(),
+ {Src});
+ return;
+ }
+ }
+ }
+
+ SelectCode(N);
+}
+
void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
// The address is assumed to be uniform, so if it ends up in a VGPR, it will
// be copied to an SGPR with readfirstlane.
@@ -2369,8 +2548,9 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {
}
void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
- if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
- !Subtarget->hasGWSSemaReleaseAll()) {
+ if (!Subtarget->hasGWS() ||
+ (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+ !Subtarget->hasGWSSemaReleaseAll())) {
// Let this error.
SelectCode(N);
return;
@@ -2568,6 +2748,45 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
SelectCode(N);
}
+void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(SDNode *N) {
+ SDValue Log2WaveSize =
+ CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(N), MVT::i32);
+ CurDAG->SelectNodeTo(N, AMDGPU::S_LSHR_B32, N->getVTList(),
+ {N->getOperand(0), Log2WaveSize});
+}
+
+void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(SDNode *N) {
+ SDValue SrcVal = N->getOperand(1);
+ if (SrcVal.getValueType() != MVT::i32) {
+ SelectCode(N); // Emit default error
+ return;
+ }
+
+ SDValue CopyVal;
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+ SDLoc SL(N);
+
+ if (SrcVal.getOpcode() == AMDGPUISD::WAVE_ADDRESS) {
+ CopyVal = SrcVal.getOperand(0);
+ } else {
+ SDValue Log2WaveSize = CurDAG->getTargetConstant(
+ Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
+
+ if (N->isDivergent()) {
+ SrcVal = SDValue(CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
+ MVT::i32, SrcVal),
+ 0);
+ }
+
+ CopyVal = SDValue(CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
+ {SrcVal, Log2WaveSize}),
+ 0);
+ }
+
+ SDValue CopyToSP = CurDAG->getCopyToReg(N->getOperand(0), SL, SP, CopyVal);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyToSP);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
unsigned &Mods,
bool IsCanonicalizing,
@@ -2948,7 +3167,7 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
if (!RC || SIRI->isSGPRClass(RC))
return false;
- if (RC != &AMDGPU::VS_32RegClass) {
+ if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
AllUsesAcceptSReg = false;
SDNode * User = *U;
if (User->isMachineOpcode()) {
@@ -2960,7 +3179,8 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
- if (CommutedRC == &AMDGPU::VS_32RegClass)
+ if (CommutedRC == &AMDGPU::VS_32RegClass ||
+ CommutedRC == &AMDGPU::VS_64RegClass)
AllUsesAcceptSReg = true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 0605baf3a0cc..374108af08cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -92,7 +92,7 @@ public:
AMDGPUDAGToDAGISel() = delete;
- explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+ explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOptLevel OptLevel);
~AMDGPUDAGToDAGISel() override = default;
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -154,8 +154,10 @@ private:
bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
unsigned Size) const;
- bool isFlatScratchBaseLegal(
- SDValue Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
+
+ bool isFlatScratchBaseLegal(SDValue Addr) const;
+ bool isFlatScratchBaseLegalSV(SDValue Addr) const;
+ bool isFlatScratchBaseLegalSVImm(SDValue Addr) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
@@ -177,6 +179,7 @@ private:
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
+ bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const;
bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, uint64_t FlatVariant) const;
@@ -273,6 +276,7 @@ private:
bool isCBranchSCC(const SDNode *N) const;
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
+ void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
@@ -280,6 +284,8 @@ private:
void SelectINTRINSIC_W_CHAIN(SDNode *N);
void SelectINTRINSIC_WO_CHAIN(SDNode *N);
void SelectINTRINSIC_VOID(SDNode *N);
+ void SelectWAVE_ADDRESS(SDNode *N);
+ void SelectSTACKRESTORE(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 39e00a037bdd..9d7443012e3d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -323,24 +323,26 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand);
- // This is totally unsupported, just custom lower to produce an error.
+ // For R600, this is totally unsupported, just custom lower to produce an
+ // error.
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
// Library functions. These default to Expand, but we have instructions
// for them.
- setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR, ISD::FRINT,
- ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
+ setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
+ ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
- setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2}, MVT::f32,
- Custom);
+ setOperationAction(
+ {ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10}, MVT::f32,
+ Custom);
setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
- setOperationAction(ISD::FROUNDEVEN, {MVT::f16, MVT::f32, MVT::f64}, Custom);
+ setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
@@ -351,7 +353,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
}
- setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP}, MVT::f16, Custom);
+ setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP, ISD::FEXP10}, MVT::f16,
+ Custom);
// FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
// scalarization code. Can be removed when IS_FPCLASS expand isn't called by
@@ -383,7 +386,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
MVT::v12f32, MVT::v16f16, MVT::v16i16, MVT::v16f32, MVT::v16i32,
MVT::v32f32, MVT::v32i32, MVT::v2f64, MVT::v2i64, MVT::v3f64,
MVT::v3i64, MVT::v4f64, MVT::v4i64, MVT::v8f64, MVT::v8i64,
- MVT::v16f64, MVT::v16i64},
+ MVT::v16f64, MVT::v16i64, MVT::v32i16, MVT::v32f16},
Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
@@ -456,14 +459,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
for (MVT VT : FloatVectorTypes) {
setOperationAction(
- {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
- ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2,
- ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG,
- ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC,
- ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
- ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG,
- ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE,
- ISD::SETCC, ISD::FCANONICALIZE},
+ {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::FADD, ISD::FCEIL, ISD::FCOS,
+ ISD::FDIV, ISD::FEXP2, ISD::FEXP,
+ ISD::FEXP10, ISD::FLOG2, ISD::FREM,
+ ISD::FLOG, ISD::FLOG10, ISD::FPOW,
+ ISD::FFLOOR, ISD::FTRUNC, ISD::FMUL,
+ ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
+ ISD::FSQRT, ISD::FSIN, ISD::FSUB,
+ ISD::FNEG, ISD::VSELECT, ISD::SELECT_CC,
+ ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, ISD::SETCC,
+ ISD::FCANONICALIZE, ISD::FROUNDEVEN},
VT, Expand);
}
@@ -579,11 +585,14 @@ static bool fnegFoldsIntoOpcode(unsigned Opc) {
case ISD::FMAXNUM:
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::SELECT:
case ISD::FSIN:
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
@@ -1001,6 +1010,9 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_LS:
return CC_AMDGPU;
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ return CC_AMDGPU_CS_CHAIN;
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
@@ -1024,6 +1036,8 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
case CallingConv::AMDGPU_HS:
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_LS:
@@ -1315,6 +1329,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::FLOG10:
return LowerFLOGCommon(Op, DAG);
case ISD::FEXP:
+ case ISD::FEXP10:
return lowerFEXP(Op, DAG);
case ISD::FEXP2:
return lowerFEXP2(Op, DAG);
@@ -1360,6 +1375,7 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Lowered);
return;
case ISD::FEXP:
+ case ISD::FEXP10:
if (SDValue Lowered = lowerFEXP(SDValue(N, 0), DAG))
Results.push_back(Lowered);
return;
@@ -1714,7 +1730,7 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
Load->getChain(), BasePtr, SrcValue, LoMemVT,
BaseAlign, Load->getMemOperand()->getFlags());
- SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Size));
+ SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Size));
SDValue HiLoad =
DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
@@ -2362,7 +2378,8 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
}
-SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
@@ -2389,18 +2406,19 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
}
-SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op,
+ SelectionDAG &DAG) const {
// FNEARBYINT and FRINT are the same, except in their handling of FP
// exceptions. Those aren't really meaningful for us, and OpenCL only has
// rint, so just treat them as equivalent.
- return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
+ return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0));
}
-SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
auto VT = Op.getValueType();
auto Arg = Op.getOperand(0u);
- return DAG.getNode(ISD::FRINT, SDLoc(Op), VT, Arg);
+ return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);
}
// XXX - May require not supporting f32 denormals?
@@ -2423,18 +2441,16 @@ SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
const SDValue One = DAG.getConstantFP(1.0, SL, VT);
- const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
-
- SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
+ SDValue OneOrZeroFP = DAG.getNode(ISD::SELECT, SL, VT, Cmp, One, Zero);
- SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero);
-
- return DAG.getNode(ISD::FADD, SL, VT, T, Sel);
+ SDValue SignedOffset = DAG.getNode(ISD::FCOPYSIGN, SL, VT, OneOrZeroFP, X);
+ return DAG.getNode(ISD::FADD, SL, VT, T, SignedOffset);
}
SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
@@ -2468,7 +2484,18 @@ static bool valueIsKnownNeverF32Denorm(SDValue Src) {
case ISD::FP_EXTEND:
return Src.getOperand(0).getValueType() == MVT::f16;
case ISD::FP16_TO_FP:
+ case ISD::FFREXP:
return true;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Src.getOperand(0))->getZExtValue();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_frexp_mant:
+ return true;
+ default:
+ return false;
+ }
+ }
default:
return false;
}
@@ -2476,15 +2503,17 @@ static bool valueIsKnownNeverF32Denorm(SDValue Src) {
llvm_unreachable("covered opcode switch");
}
-static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags) {
+bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG,
+ SDNodeFlags Flags) {
if (Flags.hasApproximateFuncs())
return true;
auto &Options = DAG.getTarget().Options;
return Options.UnsafeFPMath || Options.ApproxFuncFPMath;
}
-static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src,
- SDNodeFlags Flags) {
+bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG,
+ SDValue Src,
+ SDNodeFlags Flags) {
return !valueIsKnownNeverF32Denorm(Src) &&
DAG.getMachineFunction()
.getDenormalMode(APFloat::IEEEsingle())
@@ -2697,7 +2726,8 @@ SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL,
SelectionDAG &DAG, bool IsLog10,
SDNodeFlags Flags) const {
EVT VT = Src.getValueType();
- unsigned LogOp = VT == MVT::f32 ? AMDGPUISD::LOG : ISD::FLOG2;
+ unsigned LogOp =
+ VT == MVT::f32 ? (unsigned)AMDGPUISD::LOG : (unsigned)ISD::FLOG2;
double Log2BaseInverted =
IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2;
@@ -2782,14 +2812,95 @@ SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);
}
-SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue Op, const SDLoc &SL,
+SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
SelectionDAG &DAG,
SDNodeFlags Flags) const {
- // exp2(M_LOG2E_F * f);
- EVT VT = Op.getValueType();
- const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Op, K, Flags);
- return DAG.getNode(VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2, SL, VT, Mul,
+ EVT VT = X.getValueType();
+ const SDValue Log2E = DAG.getConstantFP(numbers::log2e, SL, VT);
+
+ if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) {
+ // exp2(M_LOG2E_F * f);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Log2E, Flags);
+ return DAG.getNode(VT == MVT::f32 ? (unsigned)AMDGPUISD::EXP
+ : (unsigned)ISD::FEXP2,
+ SL, VT, Mul, Flags);
+ }
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ SDValue Threshold = DAG.getConstantFP(-0x1.5d58a0p+6f, SL, VT);
+ SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT);
+
+ SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+6f, SL, VT);
+
+ SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags);
+
+ SDValue AdjustedX =
+ DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
+
+ SDValue ExpInput = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, Log2E, Flags);
+
+ SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags);
+
+ SDValue ResultScaleFactor = DAG.getConstantFP(0x1.969d48p-93f, SL, VT);
+ SDValue AdjustedResult =
+ DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags);
+
+ return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2,
+ Flags);
+}
+
+/// Emit approx-funcs appropriate lowering for exp10. inf/nan should still be
+/// handled correctly.
+SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe(SDValue X, const SDLoc &SL,
+ SelectionDAG &DAG,
+ SDNodeFlags Flags) const {
+ const EVT VT = X.getValueType();
+ const unsigned Exp2Op = VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2;
+
+ if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) {
+ // exp2(x * 0x1.a92000p+1f) * exp2(x * 0x1.4f0978p-11f);
+ SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT);
+ SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT);
+
+ SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, X, K0, Flags);
+ SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);
+ SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, X, K1, Flags);
+ SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);
+ return DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1);
+ }
+
+ // bool s = x < -0x1.2f7030p+5f;
+ // x += s ? 0x1.0p+5f : 0.0f;
+ // exp10 = exp2(x * 0x1.a92000p+1f) *
+ // exp2(x * 0x1.4f0978p-11f) *
+ // (s ? 0x1.9f623ep-107f : 1.0f);
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ SDValue Threshold = DAG.getConstantFP(-0x1.2f7030p+5f, SL, VT);
+ SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT);
+
+ SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+5f, SL, VT);
+ SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags);
+ SDValue AdjustedX =
+ DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
+
+ SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT);
+ SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT);
+
+ SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K0, Flags);
+ SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);
+ SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K1, Flags);
+ SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);
+
+ SDValue MulExps = DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1, Flags);
+
+ SDValue ResultScaleFactor = DAG.getConstantFP(0x1.9f623ep-107f, SL, VT);
+ SDValue AdjustedResult =
+ DAG.getNode(ISD::FMUL, SL, VT, MulExps, ResultScaleFactor, Flags);
+
+ return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, MulExps,
Flags);
}
@@ -2798,7 +2909,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue X = Op.getOperand(0);
SDNodeFlags Flags = Op->getFlags();
- const bool IsExp10 = false; // TODO: For some reason exp10 is missing
+ const bool IsExp10 = Op.getOpcode() == ISD::FEXP10;
if (VT.getScalarType() == MVT::f16) {
// v_exp_f16 (fmul x, log2e)
@@ -2822,9 +2933,9 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
// TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
// library behavior. Also, is known-not-daz source sufficient?
- if (allowApproxFunc(DAG, Flags) && !needsDenormHandlingF32(DAG, X, Flags)) {
- assert(!IsExp10 && "todo exp10 support");
- return lowerFEXPUnsafe(X, SL, DAG, Flags);
+ if (allowApproxFunc(DAG, Flags)) {
+ return IsExp10 ? lowerFEXP10Unsafe(X, SL, DAG, Flags)
+ : lowerFEXPUnsafe(X, SL, DAG, Flags);
}
// Algorithm:
@@ -2891,7 +3002,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags);
}
- SDValue E = DAG.getNode(ISD::FRINT, SL, VT, PH, Flags);
+ SDValue E = DAG.getNode(ISD::FROUNDEVEN, SL, VT, PH, Flags);
// It is unsafe to contract this fsub into the PH multiply.
SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, FlagsNoContract);
@@ -3698,8 +3809,7 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
case Intrinsic::amdgcn_rsq:
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_rsq_legacy:
- case Intrinsic::amdgcn_rsq_clamp:
- case Intrinsic::amdgcn_ldexp: {
+ case Intrinsic::amdgcn_rsq_clamp: {
// FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
SDValue Src = N->getOperand(1);
return Src.isUndef() ? Src : SDValue();
@@ -4012,8 +4122,7 @@ static SDValue getAddOneOp(const SDNode *V) {
if (V->getOpcode() != ISD::ADD)
return SDValue();
- auto *C = dyn_cast<ConstantSDNode>(V->getOperand(1));
- return C && C->isOne() ? V->getOperand(0) : SDValue();
+ return isOneConstant(V->getOperand(1)) ? V->getOperand(0) : SDValue();
}
SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
@@ -4243,8 +4352,7 @@ SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond,
SDValue LHS, SDValue RHS,
DAGCombinerInfo &DCI) const {
- ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (!CmpRhs || !CmpRhs->isZero())
+ if (!isNullConstant(Cond.getOperand(1)))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -4466,6 +4574,10 @@ static unsigned inverseMinMax(unsigned Opc) {
return ISD::FMINNUM_IEEE;
case ISD::FMINNUM_IEEE:
return ISD::FMAXNUM_IEEE;
+ case ISD::FMAXIMUM:
+ return ISD::FMINIMUM;
+ case ISD::FMINIMUM:
+ return ISD::FMAXIMUM;
case AMDGPUISD::FMAX_LEGACY:
return AMDGPUISD::FMIN_LEGACY;
case AMDGPUISD::FMIN_LEGACY:
@@ -4589,6 +4701,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
case ISD::FMINNUM:
case ISD::FMAXNUM_IEEE:
case ISD::FMINNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case AMDGPUISD::FMAX_LEGACY:
case AMDGPUISD::FMIN_LEGACY: {
// fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
@@ -4638,6 +4752,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT: // XXX - Should fround be handled?
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
@@ -4999,6 +5114,36 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performAssertSZExtCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicWOChainCombine(N, DCI);
+ case AMDGPUISD::FMAD_FTZ: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+
+ // FMAD_FTZ is a FMAD + flush denormals to zero.
+ // We flush the inputs, the intermediate step, and the output.
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+ if (N0CFP && N1CFP && N2CFP) {
+ const auto FTZ = [](const APFloat &V) {
+ if (V.isDenormal()) {
+ APFloat Zero(V.getSemantics(), 0);
+ return V.isNegative() ? -Zero : Zero;
+ }
+ return V;
+ };
+
+ APFloat V0 = FTZ(N0CFP->getValueAPF());
+ APFloat V1 = FTZ(N1CFP->getValueAPF());
+ APFloat V2 = FTZ(N2CFP->getValueAPF());
+ V0.multiply(V1, APFloat::rmNearestTiesToEven);
+ V0 = FTZ(V0);
+ V0.add(V2, APFloat::rmNearestTiesToEven);
+ return DAG.getConstantFP(FTZ(V0), DL, VT);
+ }
+ break;
+ }
}
return SDValue();
}
@@ -5140,8 +5285,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CALL)
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(TC_RETURN_GFX)
+ NODE_NAME_CASE(TC_RETURN_CHAIN)
NODE_NAME_CASE(TRAP)
NODE_NAME_CASE(RET_GLUE)
+ NODE_NAME_CASE(WAVE_ADDRESS)
NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
NODE_NAME_CASE(ENDPGM_TRAP)
@@ -5166,6 +5313,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FMED3)
NODE_NAME_CASE(SMED3)
NODE_NAME_CASE(UMED3)
+ NODE_NAME_CASE(FMAXIMUM3)
+ NODE_NAME_CASE(FMINIMUM3)
NODE_NAME_CASE(FDOT2)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(DIV_SCALE)
@@ -5620,6 +5769,8 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
case AMDGPUISD::FMED3:
case AMDGPUISD::FMIN3:
case AMDGPUISD::FMAX3:
+ case AMDGPUISD::FMINIMUM3:
+ case AMDGPUISD::FMAXIMUM3:
case AMDGPUISD::FMAD_FTZ: {
if (SNaN)
return true;
@@ -5734,12 +5885,6 @@ AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
}
}
-bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtractLegal(
- unsigned Opc, LLT Ty1, LLT Ty2) const {
- return (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)) &&
- Ty2 == LLT::scalar(32);
-}
-
/// Whether it is profitable to sink the operands of an
/// Instruction I to the basic block of I.
/// This helps using several modifiers (like abs and neg) more often.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index c39093b9bb6b..827fb106b551 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -61,6 +61,9 @@ protected:
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+ static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags);
+ static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src,
+ SDNodeFlags Flags);
SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op,
SDNodeFlags Flags) const;
SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const;
@@ -77,6 +80,8 @@ protected:
SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
SDNodeFlags Flags) const;
+ SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
+ SDNodeFlags Flags) const;
SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
@@ -242,9 +247,7 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const;
-
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
void ReplaceNodeResults(SDNode * N,
@@ -371,9 +374,6 @@ public:
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
- bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
- LLT Ty2) const override;
-
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
};
@@ -391,6 +391,7 @@ enum NodeType : unsigned {
CALL,
TC_RETURN,
TC_RETURN_GFX,
+ TC_RETURN_CHAIN,
TRAP,
// Masked control flow nodes.
@@ -410,6 +411,10 @@ enum NodeType : unsigned {
// Return with values from a non-entry function.
RET_GLUE,
+ // Convert a unswizzled wave uniform stack address to an address compatible
+ // with a vector offset for use in stack access.
+ WAVE_ADDRESS,
+
DWORDADDR,
FRACT,
@@ -444,6 +449,8 @@ enum NodeType : unsigned {
FMED3,
SMED3,
UMED3,
+ FMAXIMUM3,
+ FMINIMUM3,
FDOT2,
URECIP,
DIV_SCALE,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
new file mode 100644
index 000000000000..e5fbcca1e7d1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
@@ -0,0 +1,336 @@
+//===- AMDGPUImageIntrinsicOptimizer.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to combine multiple image_load intrinsics with dim=2dmsaa
+// or dim=2darraymsaa into a single image_msaa_load intrinsic if:
+//
+// - they refer to the same vaddr except for sample_id,
+// - they use a constant sample_id and they fall into the same group,
+// - they have the same dmask and the number of intrinsics and the number of
+// vaddr/vdata dword transfers is reduced by the combine.
+//
+// Examples for the tradeoff (all are assuming 2DMsaa for vaddr):
+//
+// +----------+-----+-----+-------+---------+------------+---------+----------+
+// | popcount | a16 | d16 | #load | vaddr / | #msaa_load | vaddr / | combine? |
+// | (dmask) | | | | vdata | | vdata | |
+// +----------+-----+-----+-------+---------+------------+---------+----------+
+// | 1 | 0 | 0 | 4 | 12 / 4 | 1 | 3 / 4 | yes |
+// +----------+-----+-----+-------+---------+------------+---------+----------+
+// | 1 | 0 | 0 | 2 | 6 / 2 | 1 | 3 / 4 | yes? |
+// +----------+-----+-----+-------+---------+------------+---------+----------+
+// | 2 | 0 | 0 | 4 | 12 / 8 | 2 | 6 / 8 | yes |
+// +----------+-----+-----+-------+---------+------------+---------+----------+
+// | 2 | 0 | 0 | 2 | 6 / 4 | 2 | 6 / 8 | no |
+// +----------+-----+-----+-------+---------+------------+---------+----------+
+// | 1 | 0 | 1 | 2 | 6 / 2 | 1 | 3 / 2 | yes |
+// +----------+-----+-----+-------+---------+------------+---------+----------+
+//
+// Some cases are of questionable benefit, like the one marked with "yes?"
+// above: fewer intrinsics and fewer vaddr and fewer total transfers between SP
+// and TX, but higher vdata. We start by erring on the side of converting these
+// to MSAA_LOAD.
+//
+// clang-format off
+//
+// This pass will combine intrinsics such as (not neccessarily consecutive):
+// call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+// call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
+// call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
+// call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
+// ==>
+// call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+//
+// clang-format on
+//
+// Future improvements:
+//
+// - We may occasionally not want to do the combine if it increases the maximum
+// register pressure.
+//
+// - Ensure clausing when multiple MSAA_LOAD are generated.
+//
+// Note: Even though the image_msaa_load intrinsic already exists on gfx10, this
+// combine only applies to gfx11, due to a limitation in gfx10: the gfx10
+// IMAGE_MSAA_LOAD only works correctly with single-channel texture formats, and
+// we don't know the format at compile time.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-image-intrinsic-opt"
+
+namespace {
+class AMDGPUImageIntrinsicOptimizer : public FunctionPass {
+ const TargetMachine *TM;
+
+public:
+ static char ID;
+
+ AMDGPUImageIntrinsicOptimizer(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {}
+
+ bool runOnFunction(Function &F) override;
+
+}; // End of class AMDGPUImageIntrinsicOptimizer
+} // End anonymous namespace
+
+INITIALIZE_PASS(AMDGPUImageIntrinsicOptimizer, DEBUG_TYPE,
+ "AMDGPU Image Intrinsic Optimizer", false, false)
+
+char AMDGPUImageIntrinsicOptimizer::ID = 0;
+
+void addInstToMergeableList(
+ IntrinsicInst *II,
+ SmallVector<SmallVector<IntrinsicInst *, 4>> &MergeableInsts,
+ const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) {
+ for (SmallVector<IntrinsicInst *, 4> &IIList : MergeableInsts) {
+ // Check Dim.
+ if (IIList.front()->getIntrinsicID() != II->getIntrinsicID())
+ continue;
+
+ // Check D16.
+ if (IIList.front()->getType() != II->getType())
+ continue;
+
+ // Check all arguments (DMask, VAddr, RSrc etc).
+ bool AllEqual = true;
+ assert(IIList.front()->arg_size() == II->arg_size());
+ for (int I = 1, E = II->arg_size(); AllEqual && I != E; ++I) {
+ Value *ArgList = IIList.front()->getArgOperand(I);
+ Value *Arg = II->getArgOperand(I);
+ if (I == ImageDimIntr->VAddrEnd - 1) {
+ // Check FragId group.
+ auto FragIdList = cast<ConstantInt>(IIList.front()->getArgOperand(I));
+ auto FragId = cast<ConstantInt>(II->getArgOperand(I));
+ AllEqual = FragIdList->getValue().udiv(4) == FragId->getValue().udiv(4);
+ } else {
+ // Check all arguments except FragId.
+ AllEqual = ArgList == Arg;
+ }
+ }
+ if (!AllEqual)
+ continue;
+
+ // Add to the list.
+ IIList.emplace_back(II);
+ return;
+ }
+
+ // Similar instruction not found, so add a new list.
+ MergeableInsts.emplace_back(1, II);
+ LLVM_DEBUG(dbgs() << "New: " << *II << "\n");
+}
+
+// Collect list of all instructions we know how to merge in a subset of the
+// block. It returns an iterator to the instruction after the last one analyzed.
+BasicBlock::iterator collectMergeableInsts(
+ BasicBlock::iterator I, BasicBlock::iterator E,
+ SmallVector<SmallVector<IntrinsicInst *, 4>> &MergeableInsts) {
+ for (; I != E; ++I) {
+ // Don't combine if there is a store in the middle or if there is a memory
+ // barrier.
+ if (I->mayHaveSideEffects()) {
+ ++I;
+ break;
+ }
+
+ // Ignore non-intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ Intrinsic::ID IntrinID = II->getIntrinsicID();
+
+ // Ignore other intrinsics.
+ if (IntrinID != Intrinsic::amdgcn_image_load_2dmsaa &&
+ IntrinID != Intrinsic::amdgcn_image_load_2darraymsaa)
+ continue;
+
+ // Check for constant FragId.
+ const auto *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinID);
+ const uint8_t FragIdIndex = ImageDimIntr->VAddrEnd - 1;
+ if (!isa<ConstantInt>(II->getArgOperand(FragIdIndex)))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Merge: " << *II << "\n");
+ addInstToMergeableList(II, MergeableInsts, ImageDimIntr);
+ }
+ }
+
+ return I;
+}
+
+bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) {
+ bool Modified = false;
+
+ SmallVector<Instruction *, 4> InstrsToErase;
+ for (const auto &IIList : MergeableInsts) {
+ if (IIList.size() <= 1)
+ continue;
+
+ // Assume the arguments are unchanged and later override them, if needed.
+ SmallVector<Value *, 16> Args(IIList.front()->args());
+
+ // Validate function argument and return types, extracting overloaded
+ // types along the way.
+ SmallVector<Type *, 6> OverloadTys;
+ Function *F = IIList.front()->getCalledFunction();
+ if (!Intrinsic::getIntrinsicSignature(F, OverloadTys))
+ continue;
+
+ Intrinsic::ID IntrinID = IIList.front()->getIntrinsicID();
+ const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
+ AMDGPU::getImageDimIntrinsicInfo(IntrinID);
+
+ Type *EltTy = IIList.front()->getType()->getScalarType();
+ Type *NewTy = FixedVectorType::get(EltTy, 4);
+ OverloadTys[0] = NewTy;
+ bool isD16 = EltTy->isHalfTy();
+
+ ConstantInt *DMask = cast<ConstantInt>(
+ IIList.front()->getArgOperand(ImageDimIntr->DMaskIndex));
+ unsigned DMaskVal = DMask->getZExtValue() & 0xf;
+ unsigned NumElts = popcount(DMaskVal);
+
+ // Number of instructions and the number of vaddr/vdata dword transfers
+ // should be reduced.
+ unsigned NumLoads = IIList.size();
+ unsigned NumMsaas = NumElts;
+ unsigned NumVAddrLoads = 3 * NumLoads;
+ unsigned NumVDataLoads = divideCeil(NumElts, isD16 ? 2 : 1) * NumLoads;
+ unsigned NumVAddrMsaas = 3 * NumMsaas;
+ unsigned NumVDataMsaas = divideCeil(4, isD16 ? 2 : 1) * NumMsaas;
+
+ if (NumLoads < NumMsaas ||
+ (NumVAddrLoads + NumVDataLoads < NumVAddrMsaas + NumVDataMsaas))
+ continue;
+
+ const uint8_t FragIdIndex = ImageDimIntr->VAddrEnd - 1;
+ auto FragId = cast<ConstantInt>(IIList.front()->getArgOperand(FragIdIndex));
+ const APInt &NewFragIdVal = FragId->getValue().udiv(4) * 4;
+
+ // Create the new instructions.
+ IRBuilder<> B(IIList.front());
+
+ // Create the new image_msaa_load intrinsic.
+ SmallVector<Instruction *, 4> NewCalls;
+ while (DMaskVal != 0) {
+ unsigned NewMaskVal = 1 << countr_zero(DMaskVal);
+
+ Intrinsic::ID NewIntrinID;
+ if (IntrinID == Intrinsic::amdgcn_image_load_2dmsaa)
+ NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2dmsaa;
+ else
+ NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2darraymsaa;
+
+ Function *NewIntrin = Intrinsic::getDeclaration(
+ IIList.front()->getModule(), NewIntrinID, OverloadTys);
+ Args[ImageDimIntr->DMaskIndex] =
+ ConstantInt::get(DMask->getType(), NewMaskVal);
+ Args[FragIdIndex] = ConstantInt::get(FragId->getType(), NewFragIdVal);
+ CallInst *NewCall = B.CreateCall(NewIntrin, Args);
+ LLVM_DEBUG(dbgs() << "Optimize: " << *NewCall << "\n");
+
+ NewCalls.push_back(NewCall);
+ DMaskVal -= NewMaskVal;
+ }
+
+ // Create the new extractelement instructions.
+ for (auto &II : IIList) {
+ Value *VecOp = nullptr;
+ auto Idx = cast<ConstantInt>(II->getArgOperand(FragIdIndex));
+ B.SetCurrentDebugLocation(II->getDebugLoc());
+ if (NumElts == 1) {
+ VecOp = B.CreateExtractElement(NewCalls[0], Idx->getValue().urem(4));
+ LLVM_DEBUG(dbgs() << "Add: " << *VecOp << "\n");
+ } else {
+ VecOp = UndefValue::get(II->getType());
+ for (unsigned I = 0; I < NumElts; ++I) {
+ VecOp = B.CreateInsertElement(
+ VecOp,
+ B.CreateExtractElement(NewCalls[I], Idx->getValue().urem(4)), I);
+ LLVM_DEBUG(dbgs() << "Add: " << *VecOp << "\n");
+ }
+ }
+
+ // Replace the old instruction.
+ II->replaceAllUsesWith(VecOp);
+ VecOp->takeName(II);
+ InstrsToErase.push_back(II);
+ }
+
+ Modified = true;
+ }
+
+ for (auto I : InstrsToErase)
+ I->eraseFromParent();
+
+ return Modified;
+}
+
+static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM) {
+ if (!TM)
+ return false;
+
+ // This optimization only applies to GFX11 and beyond.
+ const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
+ if (!AMDGPU::isGFX11Plus(ST) || ST.hasMSAALoadDstSelBug())
+ return false;
+
+ Module *M = F.getParent();
+
+ // Early test to determine if the intrinsics are used.
+ if (std::none_of(M->begin(), M->end(), [](Function &F) {
+ return !F.users().empty() &&
+ (F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2dmsaa ||
+ F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2darraymsaa);
+ }))
+ return false;
+
+ bool Modified = false;
+ for (auto &BB : F) {
+ BasicBlock::iterator SectionEnd;
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;
+ I = SectionEnd) {
+ SmallVector<SmallVector<IntrinsicInst *, 4>> MergeableInsts;
+
+ SectionEnd = collectMergeableInsts(I, E, MergeableInsts);
+ Modified |= optimizeSection(MergeableInsts);
+ }
+ }
+
+ return Modified;
+}
+
+bool AMDGPUImageIntrinsicOptimizer::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ return imageIntrinsicOptimizerImpl(F, TM);
+}
+
+FunctionPass *
+llvm::createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *TM) {
+ return new AMDGPUImageIntrinsicOptimizer(TM);
+}
+
+PreservedAnalyses
+AMDGPUImageIntrinsicOptimizerPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+
+ bool Changed = imageIntrinsicOptimizerImpl(F, &TM);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
new file mode 100644
index 000000000000..93ed77bb6f7e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -0,0 +1,122 @@
+//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
+/// instructions that produce single-use VGPR values. If the value is forwarded
+/// to the consumer instruction prior to VGPR writeback, the hardware can
+/// then skip (kill) the VGPR write.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
+
+namespace {
+class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
+private:
+ const SIInstrInfo *SII;
+
+public:
+ static char ID;
+
+ AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
+
+ void emitSingleUseVDST(MachineInstr &MI) const {
+ // Mark the following instruction as a single-use producer:
+ // s_singleuse_vdst { supr0: 1 }
+ BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
+ .addImm(0x1);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ const auto &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasVGPRSingleUseHintInsts())
+ return false;
+
+ SII = ST.getInstrInfo();
+ const auto *TRI = &SII->getRegisterInfo();
+ bool InstructionEmitted = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits
+
+ // Handle boundaries at the end of basic block separately to avoid
+ // false positives. If they are live at the end of a basic block then
+ // assume it has more uses later on.
+ for (const auto &Liveouts : MBB.liveouts())
+ RegisterUseCount[Liveouts.PhysReg] = 2;
+
+ for (MachineInstr &MI : reverse(MBB.instrs())) {
+ // All registers in all operands need to be single use for an
+ // instruction to be marked as a single use producer.
+ bool AllProducerOperandsAreSingleUse = true;
+
+ for (const auto &Operand : MI.operands()) {
+ if (!Operand.isReg())
+ continue;
+ const auto Reg = Operand.getReg();
+
+ // Count the number of times each register is read.
+ if (Operand.readsReg())
+ RegisterUseCount[Reg]++;
+
+ // Do not attempt to optimise across exec mask changes.
+ if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+ for (auto &UsedReg : RegisterUseCount)
+ UsedReg.second = 2;
+ }
+
+ // If we are at the point where the register first became live,
+ // check if the operands are single use.
+ if (!MI.modifiesRegister(Reg, TRI))
+ continue;
+ if (RegisterUseCount[Reg] > 1)
+ AllProducerOperandsAreSingleUse = false;
+ // Reset uses count when a register is no longer live.
+ RegisterUseCount.erase(Reg);
+ }
+ if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
+ // TODO: Replace with candidate logging for instruction grouping
+ // later.
+ emitSingleUseVDST(MI);
+ InstructionEmitted = true;
+ }
+ }
+ }
+ return InstructionEmitted;
+ }
+};
+} // namespace
+
+char AMDGPUInsertSingleUseVDST::ID = 0;
+
+char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
+
+INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
+ "AMDGPU Insert SingleUseVDST", false, false)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 3c399e497227..ee93d9eb4c0a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -408,6 +408,13 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
int DMaskIdx = -1,
bool IsLoad = true);
+/// Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
+static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp) {
+ return (SqrtOp->getType()->isFloatTy() &&
+ (SqrtOp->hasApproxFunc() || SqrtOp->getFPAccuracy() >= 1.0f)) ||
+ SqrtOp->getType()->isHalfTy();
+}
+
std::optional<Instruction *>
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Intrinsic::ID IID = II.getIntrinsicID();
@@ -437,6 +444,37 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
}
+ FastMathFlags FMF = cast<FPMathOperator>(II).getFastMathFlags();
+ if (!FMF.allowContract())
+ break;
+ auto *SrcCI = dyn_cast<IntrinsicInst>(Src);
+ if (!SrcCI)
+ break;
+
+ auto IID = SrcCI->getIntrinsicID();
+ // llvm.amdgcn.rcp(llvm.amdgcn.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable
+ //
+ // llvm.amdgcn.rcp(llvm.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable and
+ // relaxed.
+ if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
+ const FPMathOperator *SqrtOp = cast<FPMathOperator>(SrcCI);
+ FastMathFlags InnerFMF = SqrtOp->getFastMathFlags();
+ if (!InnerFMF.allowContract() || !SrcCI->hasOneUse())
+ break;
+
+ if (IID == Intrinsic::sqrt && !canContractSqrtToRsq(SqrtOp))
+ break;
+
+ Function *NewDecl = Intrinsic::getDeclaration(
+ SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
+
+ InnerFMF |= FMF;
+ II.setFastMathFlags(InnerFMF);
+
+ II.setCalledFunction(NewDecl);
+ return IC.replaceOperand(II, 0, SrcCI->getArgOperand(0));
+ }
+
break;
}
case Intrinsic::amdgcn_sqrt:
@@ -450,6 +488,14 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, QNaN);
}
+ // f16 amdgcn.sqrt is identical to regular sqrt.
+ if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
+ Function *NewDecl = Intrinsic::getDeclaration(
+ II.getModule(), Intrinsic::sqrt, {II.getType()});
+ II.setCalledFunction(NewDecl);
+ return &II;
+ }
+
break;
}
case Intrinsic::amdgcn_log:
@@ -784,7 +830,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
if (CCmp->isNullValue()) {
return IC.replaceInstUsesWith(
- II, ConstantExpr::getSExt(CCmp, II.getType()));
+ II, IC.Builder.CreateSExt(CCmp, II.getType()));
}
// The result of V_ICMP/V_FCMP assembly instructions (which this
@@ -946,14 +992,27 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
}
case Intrinsic::amdgcn_permlane16:
- case Intrinsic::amdgcn_permlanex16: {
+ case Intrinsic::amdgcn_permlane16_var:
+ case Intrinsic::amdgcn_permlanex16:
+ case Intrinsic::amdgcn_permlanex16_var: {
// Discard vdst_in if it's not going to be read.
Value *VDstIn = II.getArgOperand(0);
if (isa<UndefValue>(VDstIn))
break;
- ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
- ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
+ // FetchInvalid operand idx.
+ unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
+ IID == Intrinsic::amdgcn_permlanex16)
+ ? 4 /* for permlane16 and permlanex16 */
+ : 3; /* for permlane16_var and permlanex16_var */
+
+ // BoundCtrl operand idx.
+ // For permlane16 and permlanex16 it should be 5
+ // For Permlane16_var and permlanex16_var it should be 4
+ unsigned int BcIdx = FiIdx + 1;
+
+ ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(FiIdx));
+ ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(BcIdx));
if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
break;
@@ -1002,50 +1061,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
- case Intrinsic::amdgcn_ldexp: {
- // FIXME: This doesn't introduce new instructions and belongs in
- // InstructionSimplify.
- Type *Ty = II.getType();
- Value *Op0 = II.getArgOperand(0);
- Value *Op1 = II.getArgOperand(1);
-
- // Folding undef to qnan is safe regardless of the FP mode.
- if (isa<UndefValue>(Op0)) {
- auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
- return IC.replaceInstUsesWith(II, QNaN);
- }
-
- const APFloat *C = nullptr;
- match(Op0, PatternMatch::m_APFloat(C));
-
- // FIXME: Should flush denorms depending on FP mode, but that's ignored
- // everywhere else.
- //
- // These cases should be safe, even with strictfp.
- // ldexp(0.0, x) -> 0.0
- // ldexp(-0.0, x) -> -0.0
- // ldexp(inf, x) -> inf
- // ldexp(-inf, x) -> -inf
- if (C && (C->isZero() || C->isInfinity())) {
- return IC.replaceInstUsesWith(II, Op0);
- }
-
- // With strictfp, be more careful about possibly needing to flush denormals
- // or not, and snan behavior depends on ieee_mode.
- if (II.isStrictFP())
- break;
-
- if (C && C->isNaN())
- return IC.replaceInstUsesWith(II, ConstantFP::get(Ty, C->makeQuiet()));
-
- // ldexp(x, 0) -> x
- // ldexp(x, undef) -> x
- if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
- return IC.replaceInstUsesWith(II, Op0);
- }
-
- break;
- }
case Intrinsic::amdgcn_fmul_legacy: {
Value *Op0 = II.getArgOperand(0);
Value *Op1 = II.getArgOperand(1);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index f2d62956e25b..d41e704a4a11 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -14,6 +14,7 @@
#include "AMDGPUInstrInfo.h"
#include "AMDGPU.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instruction.h"
@@ -26,6 +27,9 @@ using namespace llvm;
AMDGPUInstrInfo::AMDGPUInstrInfo(const GCNSubtarget &ST) { }
+Intrinsic::ID AMDGPU::getIntrinsicID(const MachineInstr &I) {
+ return I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
+}
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index e7ee36447682..515decea3921 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -21,6 +21,7 @@ namespace llvm {
class GCNSubtarget;
class MachineMemOperand;
+class MachineInstr;
class AMDGPUInstrInfo {
public:
@@ -31,6 +32,13 @@ public:
namespace AMDGPU {
+/// Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
+///
+/// These opcodes have an Intrinsic::ID operand similar to a GIntrinsic. But
+/// they are not actual instances of GIntrinsics, so we cannot use
+/// GIntrinsic::getIntrinsicID() on them.
+unsigned getIntrinsicID(const MachineInstr &I);
+
struct RsrcIntrinsic {
unsigned Intr;
uint8_t RsrcArg;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index b69cae0c73b3..82f58ea38fd0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -94,6 +94,11 @@ def AMDGPUtc_return_gfx: SDNode<"AMDGPUISD::TC_RETURN_GFX", AMDGPUTCReturnTP,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
+def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN",
+ SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
+>;
+
def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP",
SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>,
[SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue]
@@ -165,6 +170,11 @@ def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
[/*SDNPCommutative, SDNPAssociative*/]
>;
+// out = max(a, b, c) a, b and c are floats. Operation is IEEE2019 compliant.
+def AMDGPUfmaximum3 : SDNode<"AMDGPUISD::FMAXIMUM3", SDTFPTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
// out = max(a, b, c) a, b, and c are signed ints
def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
[/*SDNPCommutative, SDNPAssociative*/]
@@ -180,6 +190,11 @@ def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
[/*SDNPCommutative, SDNPAssociative*/]
>;
+// out = min(a, b, c) a, b and c are floats. Operation is IEEE2019 compliant.
+def AMDGPUfminimum3 : SDNode<"AMDGPUISD::FMINIMUM3", SDTFPTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
// out = min(a, b, c) a, b and c are signed ints
def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
[/*SDNPCommutative, SDNPAssociative*/]
@@ -265,9 +280,6 @@ def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP",
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
-def AMDGPUround : SDNode<"ISD::FROUND",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
-
def AMDGPUbfe_u32_impl : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfe_i32_impl : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
@@ -279,11 +291,15 @@ def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>;
// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
-// when performing the multiply. The result is a 32-bit value.
-def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
+// when performing the multiply. The result is a 32 or 64 bit value.
+def AMDGPUMul24Op : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
+]>;
+
+def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", AMDGPUMul24Op,
[SDNPCommutative, SDNPAssociative]
>;
-def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
+def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", AMDGPUMul24Op,
[SDNPCommutative, SDNPAssociative]
>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 747f9fe2f8ae..88ef4b577424 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -21,6 +21,7 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -71,6 +72,13 @@ void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
}
+// Return the wave level SGPR base address if this is a wave address.
+static Register getWaveAddress(const MachineInstr *Def) {
+ return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
+ ? Def->getOperand(1).getReg()
+ : Register();
+}
+
bool AMDGPUInstructionSelector::isVCC(Register Reg,
const MachineRegisterInfo &MRI) const {
// The verifier is oblivious to s1 being a valid value for wavesize registers.
@@ -158,11 +166,15 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
// TODO: Skip masking high bits if def is known boolean.
+ bool IsSGPR = TRI.isSGPRClass(SrcRC);
unsigned AndOpc =
- TRI.isSGPRClass(SrcRC) ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
- BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
+ IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
+ auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
.addImm(1)
.addReg(SrcReg);
+ if (IsSGPR)
+ And.setOperandDead(3); // Dead scc
+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
.addImm(0)
.addReg(MaskedReg);
@@ -322,7 +334,8 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
MachineInstr *Add =
BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
.add(I.getOperand(1))
- .add(I.getOperand(2));
+ .add(I.getOperand(2))
+ .setOperandDead(3); // Dead scc
I.eraseFromParent();
return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
}
@@ -369,7 +382,8 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
.add(Lo2);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
.add(Hi1)
- .add(Hi2);
+ .add(Hi2)
+ .setOperandDead(3); // Dead scc
} else {
const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
Register CarryReg = MRI->createVirtualRegister(CarryRC);
@@ -436,14 +450,18 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
- BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
+ auto CarryInst = BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
.add(I.getOperand(2))
.add(I.getOperand(3));
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
- .addReg(AMDGPU::SCC);
- if (!MRI->getRegClassOrNull(Dst1Reg))
- MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
+ if (MRI->use_nodbg_empty(Dst1Reg)) {
+ CarryInst.setOperandDead(3); // Dead scc
+ } else {
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
+ .addReg(AMDGPU::SCC);
+ if (!MRI->getRegClassOrNull(Dst1Reg))
+ MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
+ }
if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
!RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
@@ -740,7 +758,8 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
// build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
.addReg(ShiftSrc0)
- .addImm(16);
+ .addImm(16)
+ .setOperandDead(3); // Dead scc
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
@@ -1001,7 +1020,7 @@ bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
}
bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
- unsigned IntrinsicID = I.getIntrinsicID();
+ unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_if_break: {
MachineBasicBlock *BB = I.getParent();
@@ -1192,36 +1211,104 @@ int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
}
}
- if (Size != 32)
- return -1;
+ if (Size == 32) {
+ switch (P) {
+ case CmpInst::ICMP_NE:
+ return AMDGPU::S_CMP_LG_U32;
+ case CmpInst::ICMP_EQ:
+ return AMDGPU::S_CMP_EQ_U32;
+ case CmpInst::ICMP_SGT:
+ return AMDGPU::S_CMP_GT_I32;
+ case CmpInst::ICMP_SGE:
+ return AMDGPU::S_CMP_GE_I32;
+ case CmpInst::ICMP_SLT:
+ return AMDGPU::S_CMP_LT_I32;
+ case CmpInst::ICMP_SLE:
+ return AMDGPU::S_CMP_LE_I32;
+ case CmpInst::ICMP_UGT:
+ return AMDGPU::S_CMP_GT_U32;
+ case CmpInst::ICMP_UGE:
+ return AMDGPU::S_CMP_GE_U32;
+ case CmpInst::ICMP_ULT:
+ return AMDGPU::S_CMP_LT_U32;
+ case CmpInst::ICMP_ULE:
+ return AMDGPU::S_CMP_LE_U32;
+ case CmpInst::FCMP_OEQ:
+ return AMDGPU::S_CMP_EQ_F32;
+ case CmpInst::FCMP_OGT:
+ return AMDGPU::S_CMP_GT_F32;
+ case CmpInst::FCMP_OGE:
+ return AMDGPU::S_CMP_GE_F32;
+ case CmpInst::FCMP_OLT:
+ return AMDGPU::S_CMP_LT_F32;
+ case CmpInst::FCMP_OLE:
+ return AMDGPU::S_CMP_LE_F32;
+ case CmpInst::FCMP_ONE:
+ return AMDGPU::S_CMP_LG_F32;
+ case CmpInst::FCMP_ORD:
+ return AMDGPU::S_CMP_O_F32;
+ case CmpInst::FCMP_UNO:
+ return AMDGPU::S_CMP_U_F32;
+ case CmpInst::FCMP_UEQ:
+ return AMDGPU::S_CMP_NLG_F32;
+ case CmpInst::FCMP_UGT:
+ return AMDGPU::S_CMP_NLE_F32;
+ case CmpInst::FCMP_UGE:
+ return AMDGPU::S_CMP_NLT_F32;
+ case CmpInst::FCMP_ULT:
+ return AMDGPU::S_CMP_NGE_F32;
+ case CmpInst::FCMP_ULE:
+ return AMDGPU::S_CMP_NGT_F32;
+ case CmpInst::FCMP_UNE:
+ return AMDGPU::S_CMP_NEQ_F32;
+ default:
+ llvm_unreachable("Unknown condition code!");
+ }
+ }
- switch (P) {
- case CmpInst::ICMP_NE:
- return AMDGPU::S_CMP_LG_U32;
- case CmpInst::ICMP_EQ:
- return AMDGPU::S_CMP_EQ_U32;
- case CmpInst::ICMP_SGT:
- return AMDGPU::S_CMP_GT_I32;
- case CmpInst::ICMP_SGE:
- return AMDGPU::S_CMP_GE_I32;
- case CmpInst::ICMP_SLT:
- return AMDGPU::S_CMP_LT_I32;
- case CmpInst::ICMP_SLE:
- return AMDGPU::S_CMP_LE_I32;
- case CmpInst::ICMP_UGT:
- return AMDGPU::S_CMP_GT_U32;
- case CmpInst::ICMP_UGE:
- return AMDGPU::S_CMP_GE_U32;
- case CmpInst::ICMP_ULT:
- return AMDGPU::S_CMP_LT_U32;
- case CmpInst::ICMP_ULE:
- return AMDGPU::S_CMP_LE_U32;
- default:
- llvm_unreachable("Unknown condition code!");
+ if (Size == 16) {
+ if (!STI.hasSALUFloatInsts())
+ return -1;
+
+ switch (P) {
+ case CmpInst::FCMP_OEQ:
+ return AMDGPU::S_CMP_EQ_F16;
+ case CmpInst::FCMP_OGT:
+ return AMDGPU::S_CMP_GT_F16;
+ case CmpInst::FCMP_OGE:
+ return AMDGPU::S_CMP_GE_F16;
+ case CmpInst::FCMP_OLT:
+ return AMDGPU::S_CMP_LT_F16;
+ case CmpInst::FCMP_OLE:
+ return AMDGPU::S_CMP_LE_F16;
+ case CmpInst::FCMP_ONE:
+ return AMDGPU::S_CMP_LG_F16;
+ case CmpInst::FCMP_ORD:
+ return AMDGPU::S_CMP_O_F16;
+ case CmpInst::FCMP_UNO:
+ return AMDGPU::S_CMP_U_F16;
+ case CmpInst::FCMP_UEQ:
+ return AMDGPU::S_CMP_NLG_F16;
+ case CmpInst::FCMP_UGT:
+ return AMDGPU::S_CMP_NLE_F16;
+ case CmpInst::FCMP_UGE:
+ return AMDGPU::S_CMP_NLT_F16;
+ case CmpInst::FCMP_ULT:
+ return AMDGPU::S_CMP_NGE_F16;
+ case CmpInst::FCMP_ULE:
+ return AMDGPU::S_CMP_NGT_F16;
+ case CmpInst::FCMP_UNE:
+ return AMDGPU::S_CMP_NEQ_F16;
+ default:
+ llvm_unreachable("Unknown condition code!");
+ }
}
+
+ return -1;
}
-bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {
+
MachineBasicBlock *BB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
@@ -1247,6 +1334,9 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
return Ret;
}
+ if (I.getOpcode() == AMDGPU::G_FCMP)
+ return false;
+
int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget);
if (Opcode == -1)
return false;
@@ -1569,8 +1659,8 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {
bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
Intrinsic::ID IID) const {
- if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
- !STI.hasGWSSemaReleaseAll())
+ if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+ !STI.hasGWSSemaReleaseAll()))
return false;
// intrinsic ID, vsrc, offset
@@ -1629,7 +1719,8 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)
.addReg(BaseOffset)
- .addImm(16);
+ .addImm(16)
+ .setOperandDead(3); // Dead scc
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
.addReg(M0Base);
@@ -1690,7 +1781,7 @@ bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
}
bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
- if (TM.getOptLevel() > CodeGenOpt::None) {
+ if (TM.getOptLevel() > CodeGenOptLevel::None) {
unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second;
if (WGSize <= STI.getWavefrontSize()) {
MachineBasicBlock *MBB = MI.getParent();
@@ -1700,6 +1791,19 @@ bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
return true;
}
}
+
+ // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
+ if (STI.hasSplitBarriers()) {
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
+ .addImm(AMDGPU::Barrier::WORKGROUP);
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT))
+ .addImm(AMDGPU::Barrier::WORKGROUP);
+ MI.eraseFromParent();
+ return true;
+ }
+
return selectImpl(MI, *CoverageInfo);
}
@@ -1728,6 +1832,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned IntrOpcode = Intr->BaseOpcode;
const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI);
+ const bool IsGFX12Plus = AMDGPU::isGFX12Plus(STI);
const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;
@@ -1812,7 +1917,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
if (BaseOpcode->Atomic)
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
- if (CPol & ~AMDGPU::CPol::ALL)
+ if (CPol & ~(IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12))
return false;
int NumVAddrRegs = 0;
@@ -1847,7 +1952,10 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
++NumVDataDwords;
int Opcode = -1;
- if (IsGFX11Plus) {
+ if (IsGFX12Plus) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12,
+ NumVDataDwords, NumVAddrDwords);
+ } else if (IsGFX11Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx11Default,
@@ -1920,7 +2028,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
if (IsGFX10Plus)
MIB.addImm(DimInfo->Encoding);
- MIB.addImm(Unorm);
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::unorm))
+ MIB.addImm(Unorm);
MIB.addImm(CPol);
MIB.addImm(IsA16 && // a16 or r128
@@ -1935,7 +2044,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
return false;
}
- MIB.addImm(LWE); // lwe
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::lwe))
+ MIB.addImm(LWE); // lwe
if (!IsGFX10Plus)
MIB.addImm(DimInfo->DA ? -1 : 0);
if (BaseOpcode->HasD16)
@@ -2008,7 +2118,7 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
- unsigned IntrinsicID = I.getIntrinsicID();
+ unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_end_cf:
return selectEndCfIntrinsic(I);
@@ -2046,6 +2156,16 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
break;
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
return selectDSBvhStackIntrinsic(I);
+ case Intrinsic::amdgcn_s_barrier_init:
+ case Intrinsic::amdgcn_s_barrier_join:
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ case Intrinsic::amdgcn_s_get_barrier_state:
+ return selectNamedBarrierInst(I, IntrinsicID);
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst:
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
+ return selectSBarrierSignalIsfirst(I, IntrinsicID);
+ case Intrinsic::amdgcn_s_barrier_leave:
+ return selectSBarrierLeave(I);
}
return selectImpl(I, *CoverageInfo);
}
@@ -2194,7 +2314,8 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
} else {
BuildMI(*MBB, I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)
.addReg(HiReg)
- .addImm(16);
+ .addImm(16)
+ .setOperandDead(3); // Dead scc
}
unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
@@ -2203,12 +2324,17 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
BuildMI(*MBB, I, DL, TII.get(MovOpc), ImmReg)
.addImm(0xffff);
- BuildMI(*MBB, I, DL, TII.get(AndOpc), TmpReg1)
+ auto And = BuildMI(*MBB, I, DL, TII.get(AndOpc), TmpReg1)
.addReg(LoReg)
.addReg(ImmReg);
- BuildMI(*MBB, I, DL, TII.get(OrOpc), DstReg)
+ auto Or = BuildMI(*MBB, I, DL, TII.get(OrOpc), DstReg)
.addReg(TmpReg0)
.addReg(TmpReg1);
+
+ if (!IsVALU) {
+ And.setOperandDead(3); // Dead scc
+ Or.setOperandDead(3); // Dead scc
+ }
}
I.eraseFromParent();
@@ -2353,7 +2479,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
if (Signed) {
BuildMI(MBB, I, DL, TII.get(AMDGPU::S_ASHR_I32), HiReg)
.addReg(SrcReg, 0, SubReg)
- .addImm(31);
+ .addImm(31)
+ .setOperandDead(3); // Dead scc
} else {
BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
.addImm(0);
@@ -2397,7 +2524,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
.addReg(SrcReg)
- .addImm(Mask);
+ .addImm(Mask)
+ .setOperandDead(3); // Dead scc
} else {
BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
.addReg(SrcReg)
@@ -2411,16 +2539,54 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
return false;
}
+static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In,
+ Register &Out) {
+ Register LShlSrc;
+ if (mi_match(In, MRI,
+ m_GTrunc(m_GLShr(m_Reg(LShlSrc), m_SpecificICst(16))))) {
+ Out = LShlSrc;
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {
+ if (!Subtarget->hasSALUFloatInsts())
+ return false;
+
+ Register Dst = I.getOperand(0).getReg();
+ const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
+ if (DstRB->getID() != AMDGPU::SGPRRegBankID)
+ return false;
+
+ Register Src = I.getOperand(1).getReg();
+
+ if (MRI->getType(Dst) == LLT::scalar(32) &&
+ MRI->getType(Src) == LLT::scalar(16)) {
+ if (isExtractHiElt(*MRI, Src, Src)) {
+ MachineBasicBlock *BB = I.getParent();
+ BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
+ .addUse(Src);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
+ }
+ }
+
+ return false;
+}
+
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineOperand &ImmOp = I.getOperand(1);
Register DstReg = I.getOperand(0).getReg();
unsigned Size = MRI->getType(DstReg).getSizeInBits();
+ bool IsFP = false;
// The AMDGPU backend only supports Imm operands and not CImm or FPImm.
if (ImmOp.isFPImm()) {
const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
ImmOp.ChangeToImmediate(Imm.getZExtValue());
+ IsFP = true;
} else if (ImmOp.isCImm()) {
ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue());
} else {
@@ -2433,6 +2599,12 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
unsigned Opcode;
if (DstRB->getID() == AMDGPU::VCCRegBankID) {
Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ } else if (Size == 64 &&
+ AMDGPU::isValid32BitLiteral(I.getOperand(1).getImm(), IsFP)) {
+ Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
+ I.setDesc(TII.get(Opcode));
+ I.addImplicitDefUseOperands(*MF);
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
} else {
Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
@@ -2531,7 +2703,8 @@ bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {
unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg)
.addReg(HiReg)
- .addReg(ConstReg);
+ .addReg(ConstReg)
+ .setOperandDead(3); // Dead scc
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
.addReg(LoReg)
.addImm(AMDGPU::sub0)
@@ -2572,7 +2745,8 @@ bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const {
// TODO: Should this used S_BITSET0_*?
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg)
.addReg(HiReg)
- .addReg(ConstReg);
+ .addReg(ConstReg)
+ .setOperandDead(3); // Dead scc
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
.addReg(LoReg)
.addImm(AMDGPU::sub0)
@@ -2689,8 +2863,8 @@ static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) {
return isVCmpResult(MI.getOperand(1).getReg(), MRI) &&
isVCmpResult(MI.getOperand(2).getReg(), MRI);
- if (Opcode == TargetOpcode::G_INTRINSIC)
- return MI.getIntrinsicID() == Intrinsic::amdgcn_class;
+ if (auto *GI = dyn_cast<GIntrinsic>(&MI))
+ return GI->is(Intrinsic::amdgcn_class);
return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
}
@@ -2730,7 +2904,8 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)
.addReg(CondReg)
- .addReg(Exec);
+ .addReg(Exec)
+ .setOperandDead(3); // Dead scc
CondReg = TmpReg;
}
@@ -2793,7 +2968,8 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
!CanCopyLow32 && !CanCopyHi32) {
auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)
.addReg(SrcReg)
- .addReg(MaskReg);
+ .addReg(MaskReg)
+ .setOperandDead(3); // Dead scc
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
@@ -2816,9 +2992,12 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
assert(MaskTy.getSizeInBits() == 32 &&
"ptrmask should have been narrowed during legalize");
- BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
+ auto NewOp = BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
.addReg(SrcReg)
.addReg(MaskReg);
+
+ if (!IsVGPR)
+ NewOp.setOperandDead(3); // Dead scc
I.eraseFromParent();
return true;
}
@@ -3050,6 +3229,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
}
bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
+ assert(!AMDGPU::isGFX12Plus(STI));
unsigned Opc;
unsigned Size = MI.getOperand(3).getImm();
@@ -3116,8 +3296,8 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
MIB.add(MI.getOperand(5 + OpOffset)); // soffset
MIB.add(MI.getOperand(6 + OpOffset)); // imm offset
unsigned Aux = MI.getOperand(7 + OpOffset).getImm();
- MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
- MIB.addImm((Aux >> 3) & 1); // swz
+ MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
+ MIB.addImm(Aux & AMDGPU::CPol::SWZ_pregfx12 ? 1 : 0); // swz
MachineMemOperand *LoadMMO = *MI.memoperands_begin();
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
@@ -3252,7 +3432,7 @@ bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{
bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {
unsigned Opc;
- switch (MI.getIntrinsicID()) {
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
break;
@@ -3324,7 +3504,8 @@ bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {
} else {
BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg)
.addReg(SrcReg)
- .addImm(Subtarget->getWavefrontSizeLog2());
+ .addImm(Subtarget->getWavefrontSizeLog2())
+ .setOperandDead(3); // Dead scc
}
const TargetRegisterClass &RC =
@@ -3336,6 +3517,33 @@ bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {
return true;
}
+bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
+ Register SrcReg = MI.getOperand(0).getReg();
+ if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ Register SP =
+ Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
+ Register WaveAddr = getWaveAddress(DefMI);
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ if (!WaveAddr) {
+ WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), WaveAddr)
+ .addReg(SrcReg)
+ .addImm(Subtarget->getWavefrontSizeLog2())
+ .setOperandDead(3); // Dead scc
+ }
+
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), SP)
+ .addReg(WaveAddr);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);
@@ -3402,11 +3610,14 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_INSERT:
return selectG_INSERT(I);
case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT:
return selectG_INTRINSIC(I);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
case TargetOpcode::G_ICMP:
- if (selectG_ICMP(I))
+ case TargetOpcode::G_FCMP:
+ if (selectG_ICMP_or_FCMP(I))
return true;
return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_LOAD:
@@ -3443,6 +3654,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
selectImpl(I, *CoverageInfo))
return true;
return selectG_SZA_EXT(I);
+ case TargetOpcode::G_FPEXT:
+ if (selectG_FPEXT(I))
+ return true;
+ return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_BRCOND:
return selectG_BRCOND(I);
case TargetOpcode::G_GLOBAL_VALUE:
@@ -3457,8 +3672,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
- const AMDGPU::ImageDimIntrinsicInfo *Intr
- = AMDGPU::getImageDimIntrinsicInfo(I.getIntrinsicID());
+ const AMDGPU::ImageDimIntrinsicInfo *Intr =
+ AMDGPU::getImageDimIntrinsicInfo(AMDGPU::getIntrinsicID(I));
assert(Intr && "not an image intrinsic with image pseudo");
return selectImageIntrinsic(I, Intr);
}
@@ -3472,6 +3687,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return true;
case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
return selectWaveAddress(I);
+ case AMDGPU::G_STACKRESTORE:
+ return selectStackRestore(I);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -3916,7 +4133,9 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
int64_t ConstOffset;
std::tie(PtrBase, ConstOffset) =
getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
- if (ConstOffset == 0 || !isFlatScratchBaseLegal(PtrBase, FlatVariant))
+
+ if (ConstOffset == 0 || (FlatVariant == SIInstrFlags::FlatScratch &&
+ !isFlatScratchBaseLegal(Root.getReg())))
return Default;
unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
@@ -4079,7 +4298,7 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
// possible.
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
- if (ConstOffset != 0 && isFlatScratchBaseLegal(PtrBase) &&
+ if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch)) {
Addr = PtrBase;
@@ -4113,7 +4332,8 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
.addFrameIndex(FI)
- .addReg(RHSDef->Reg);
+ .addReg(RHSDef->Reg)
+ .setOperandDead(3); // Dead scc
}
}
@@ -4155,6 +4375,7 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
// possible.
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
+ Register OrigAddr = Addr;
if (ConstOffset != 0 &&
TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
Addr = PtrBase;
@@ -4172,8 +4393,13 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
Register LHS = AddrDef->MI->getOperand(1).getReg();
auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
- if (!isFlatScratchBaseLegal(LHS) || !isFlatScratchBaseLegal(RHS))
- return std::nullopt;
+ if (OrigAddr != Addr) {
+ if (!isFlatScratchBaseLegalSVImm(OrigAddr))
+ return std::nullopt;
+ } else {
+ if (!isFlatScratchBaseLegalSV(OrigAddr))
+ return std::nullopt;
+ }
if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
return std::nullopt;
@@ -4211,7 +4437,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
// TODO: Should this be inside the render function? The iterator seems to
// move.
- const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
HighBits)
.addImm(Offset & ~MaxOffset);
@@ -4243,7 +4469,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
int64_t ConstOffset;
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
if (ConstOffset != 0) {
- if (SIInstrInfo::isLegalMUBUFImmOffset(ConstOffset) &&
+ if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
(!STI.privateMemoryResourceIsRangeChecked() ||
KB->signBitIsZero(PtrBase))) {
const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
@@ -4306,14 +4532,83 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
return KB->signBitIsZero(Base);
}
-bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
- Register Base, uint64_t FlatVariant) const {
- if (FlatVariant != SIInstrFlags::FlatScratch)
+// Return whether the operation has NoUnsignedWrap property.
+static bool isNoUnsignedWrap(MachineInstr *Addr) {
+ return Addr->getOpcode() == TargetOpcode::G_OR ||
+ (Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
+ Addr->getFlag(MachineInstr::NoUWrap));
+}
+
+// Check that the base address of flat scratch load/store in the form of `base +
+// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
+// requirement). We always treat the first operand as the base address here.
+bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {
+ MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
+
+ if (isNoUnsignedWrap(AddrMI))
return true;
- // When value in 32-bit Base can be negative calculate scratch offset using
- // 32-bit add instruction, otherwise use Base(unsigned) + offset.
- return KB->signBitIsZero(Base);
+ // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
+ // values.
+ if (AMDGPU::isGFX12Plus(STI))
+ return true;
+
+ Register LHS = AddrMI->getOperand(1).getReg();
+ Register RHS = AddrMI->getOperand(2).getReg();
+
+ if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
+ std::optional<ValueAndVReg> RhsValReg =
+ getIConstantVRegValWithLookThrough(RHS, *MRI);
+ // If the immediate offset is negative and within certain range, the base
+ // address cannot also be negative. If the base is also negative, the sum
+ // would be either negative or much larger than the valid range of scratch
+ // memory a thread can access.
+ if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
+ RhsValReg->Value.getSExtValue() > -0x40000000)
+ return true;
+ }
+
+ return KB->signBitIsZero(LHS);
+}
+
+// Check address value in SGPR/VGPR are legal for flat scratch in the form
+// of: SGPR + VGPR.
+bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const {
+ MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
+
+ if (isNoUnsignedWrap(AddrMI))
+ return true;
+
+ Register LHS = AddrMI->getOperand(1).getReg();
+ Register RHS = AddrMI->getOperand(2).getReg();
+ return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS);
+}
+
+// Check address value in SGPR/VGPR are legal for flat scratch in the form
+// of: SGPR + VGPR + Imm.
+bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
+ Register Addr) const {
+ MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
+ Register Base = AddrMI->getOperand(1).getReg();
+ std::optional<DefinitionAndSourceRegister> BaseDef =
+ getDefSrcRegIgnoringCopies(Base, *MRI);
+ std::optional<ValueAndVReg> RHSOffset =
+ getIConstantVRegValWithLookThrough(AddrMI->getOperand(2).getReg(), *MRI);
+ assert(RHSOffset);
+
+ // If the immediate offset is negative and within certain range, the base
+ // address cannot also be negative. If the base is also negative, the sum
+ // would be either negative or much larger than the valid range of scratch
+ // memory a thread can access.
+ if (isNoUnsignedWrap(BaseDef->MI) &&
+ (isNoUnsignedWrap(AddrMI) ||
+ (RHSOffset->Value.getSExtValue() < 0 &&
+ RHSOffset->Value.getSExtValue() > -0x40000000)))
+ return true;
+
+ Register LHS = BaseDef->MI->getOperand(1).getReg();
+ Register RHS = BaseDef->MI->getOperand(2).getReg();
+ return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS);
}
bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
@@ -4332,21 +4627,18 @@ bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits;
}
-// Return the wave level SGPR base address if this is a wave address.
-static Register getWaveAddress(const MachineInstr *Def) {
- return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
- ? Def->getOperand(1).getReg()
- : Register();
-}
-
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffset(
MachineOperand &Root) const {
Register Reg = Root.getReg();
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
- const MachineInstr *Def = MRI->getVRegDef(Reg);
- if (Register WaveBase = getWaveAddress(Def)) {
+ std::optional<DefinitionAndSourceRegister> Def =
+ getDefSrcRegIgnoringCopies(Reg, *MRI);
+ assert(Def && "this shouldn't be an optional result");
+ Reg = Def->Reg;
+
+ if (Register WaveBase = getWaveAddress(Def->MI)) {
return {{
[=](MachineInstrBuilder &MIB) { // rsrc
MIB.addReg(Info->getScratchRSrcReg());
@@ -4362,10 +4654,12 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
// FIXME: Copy check is a hack
Register BasePtr;
- if (mi_match(Reg, *MRI, m_GPtrAdd(m_Reg(BasePtr), m_Copy(m_ICst(Offset))))) {
- if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+ if (mi_match(Reg, *MRI,
+ m_GPtrAdd(m_Reg(BasePtr),
+ m_any_of(m_ICst(Offset), m_Copy(m_ICst(Offset)))))) {
+ if (!TII.isLegalMUBUFImmOffset(Offset))
return {};
- const MachineInstr *BasePtrDef = MRI->getVRegDef(BasePtr);
+ MachineInstr *BasePtrDef = getDefIgnoringCopies(BasePtr, *MRI);
Register WaveBase = getWaveAddress(BasePtrDef);
if (!WaveBase)
return {};
@@ -4382,7 +4676,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
}
if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
- !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+ !TII.isLegalMUBUFImmOffset(Offset))
return {};
return {{
@@ -4625,7 +4919,7 @@ bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {
/// component.
void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
MachineIRBuilder &B, Register &SOffset, int64_t &ImmOffset) const {
- if (SIInstrInfo::isLegalMUBUFImmOffset(ImmOffset))
+ if (TII.isLegalMUBUFImmOffset(ImmOffset))
return;
// Illegal offset, store it in soffset.
@@ -4734,6 +5028,8 @@ AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // soffset
if (SOffset)
MIB.addReg(SOffset);
+ else if (STI.hasRestrictedSOffset())
+ MIB.addReg(AMDGPU::SGPR_NULL);
else
MIB.addImm(0);
},
@@ -4762,6 +5058,8 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // soffset
if (SOffset)
MIB.addReg(SOffset);
+ else if (STI.hasRestrictedSOffset())
+ MIB.addReg(AMDGPU::SGPR_NULL);
else
MIB.addImm(0);
},
@@ -4772,6 +5070,17 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
}};
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const {
+
+ Register SOffset = Root.getReg();
+
+ if (STI.hasRestrictedSOffset() && mi_match(SOffset, *MRI, m_ZeroInt()))
+ SOffset = AMDGPU::SGPR_NULL;
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}};
+}
+
/// Get an immediate that must be 32-bits, and treated as zero extended.
static std::optional<uint64_t>
getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) {
@@ -4818,8 +5127,8 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
// an immediate offset.
Register SOffset;
unsigned Offset;
- std::tie(SOffset, Offset) =
- AMDGPU::getBaseWithConstantOffset(*MRI, Root.getReg(), KB);
+ std::tie(SOffset, Offset) = AMDGPU::getBaseWithConstantOffset(
+ *MRI, Root.getReg(), KB, /*CheckNUW*/ true);
if (!SOffset)
return std::nullopt;
@@ -4980,6 +5289,135 @@ AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const {
}};
}
+bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
+ MachineInstr &I, Intrinsic::ID IntrID) const {
+ MachineBasicBlock *MBB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ Register CCReg = I.getOperand(0).getReg();
+
+ bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
+
+ if (HasM0) {
+ auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(I.getOperand(2).getReg());
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
+ if (!constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI))
+ return false;
+ } else {
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
+ .addImm(I.getOperand(2).getImm());
+ }
+
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC);
+
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
+ *MRI);
+}
+
+unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
+ if (HasInlineConst) {
+ switch (IntrID) {
+ default:
+ llvm_unreachable("not a named barrier op");
+ case Intrinsic::amdgcn_s_barrier_init:
+ return AMDGPU::S_BARRIER_INIT_IMM;
+ case Intrinsic::amdgcn_s_barrier_join:
+ return AMDGPU::S_BARRIER_JOIN_IMM;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ return AMDGPU::S_WAKEUP_BARRIER_IMM;
+ case Intrinsic::amdgcn_s_get_barrier_state:
+ return AMDGPU::S_GET_BARRIER_STATE_IMM;
+ };
+ } else {
+ switch (IntrID) {
+ default:
+ llvm_unreachable("not a named barrier op");
+ case Intrinsic::amdgcn_s_barrier_init:
+ return AMDGPU::S_BARRIER_INIT_M0;
+ case Intrinsic::amdgcn_s_barrier_join:
+ return AMDGPU::S_BARRIER_JOIN_M0;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ return AMDGPU::S_WAKEUP_BARRIER_M0;
+ case Intrinsic::amdgcn_s_get_barrier_state:
+ return AMDGPU::S_GET_BARRIER_STATE_M0;
+ };
+ }
+}
+
+bool AMDGPUInstructionSelector::selectNamedBarrierInst(
+ MachineInstr &I, Intrinsic::ID IntrID) const {
+ MachineBasicBlock *MBB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
+ ? I.getOperand(2)
+ : I.getOperand(1);
+ std::optional<int64_t> BarValImm =
+ getIConstantVRegSExtVal(BarOp.getReg(), *MRI);
+ Register M0Val;
+ Register TmpReg0;
+
+ // For S_BARRIER_INIT, member count will always be read from M0[16:22]
+ if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
+ Register MemberCount = I.getOperand(2).getReg();
+ TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ // TODO: This should be expanded during legalization so that the the S_LSHL
+ // and S_OR can be constant-folded
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)
+ .addImm(16)
+ .addReg(MemberCount);
+ M0Val = TmpReg0;
+ }
+
+ // If not inlinable, get reference to barrier depending on the instruction
+ if (!BarValImm) {
+ if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
+ // If reference to barrier id is not an inlinable constant then it must be
+ // referenced with M0[4:0]. Perform an OR with the member count to include
+ // it in M0 for S_BARRIER_INIT.
+ Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_OR_B32), TmpReg1)
+ .addReg(BarOp.getReg())
+ .addReg(TmpReg0);
+ M0Val = TmpReg1;
+ } else {
+ M0Val = BarOp.getReg();
+ }
+ }
+
+ // Build copy to M0 if needed. For S_BARRIER_INIT, M0 is always required.
+ if (M0Val) {
+ auto CopyMIB =
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0).addReg(M0Val);
+ constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI);
+ }
+
+ MachineInstrBuilder MIB;
+ unsigned Opc = getNamedBarrierOp(BarValImm.has_value(), IntrID);
+ MIB = BuildMI(*MBB, &I, DL, TII.get(Opc));
+
+ if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
+ MIB.addDef(I.getOperand(0).getReg());
+
+ if (BarValImm)
+ MIB.addImm(*BarValImm);
+
+ I.eraseFromParent();
+ return true;
+}
+bool AMDGPUInstructionSelector::selectSBarrierLeave(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ Register CCReg = I.getOperand(0).getReg();
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC);
+
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
+ *MRI);
+}
+
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
@@ -5037,14 +5475,19 @@ void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm(MI.getOperand(OpIdx).getImm() & AMDGPU::CPol::ALL);
+ MIB.addImm(MI.getOperand(OpIdx).getImm() &
+ (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL
+ : AMDGPU::CPol::ALL_pregfx12));
}
void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
+ const bool Swizzle = MI.getOperand(OpIdx).getImm() &
+ (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::SWZ
+ : AMDGPU::CPol::SWZ_pregfx12);
+ MIB.addImm(Swizzle);
}
void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB,
@@ -5057,7 +5500,16 @@ void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB,
void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
- MIB.addFrameIndex((MI.getOperand(1).getIndex()));
+ MIB.addFrameIndex(MI.getOperand(1).getIndex());
+}
+
+void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ const APFloat &APF = MI.getOperand(1).getFPImm()->getValueAPF();
+ int ExpVal = APF.getExactLog2Abs();
+ assert(ExpVal != INT_MIN);
+ MIB.addImm(ExpVal);
}
bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 243ff72e2979..ab7cc0a6beb8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -90,6 +90,7 @@ private:
bool selectPHI(MachineInstr &I) const;
bool selectG_TRUNC(MachineInstr &I) const;
bool selectG_SZA_EXT(MachineInstr &I) const;
+ bool selectG_FPEXT(MachineInstr &I) const;
bool selectG_CONSTANT(MachineInstr &I) const;
bool selectG_FNEG(MachineInstr &I) const;
bool selectG_FABS(MachineInstr &I) const;
@@ -129,7 +130,7 @@ private:
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
- bool selectG_ICMP(MachineInstr &I) const;
+ bool selectG_ICMP_or_FCMP(MachineInstr &I) const;
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
@@ -147,6 +148,10 @@ private:
bool selectBVHIntrinsic(MachineInstr &I) const;
bool selectSMFMACIntrin(MachineInstr &I) const;
bool selectWaveAddress(MachineInstr &I) const;
+ bool selectStackRestore(MachineInstr &MI) const;
+ bool selectNamedBarrierInst(MachineInstr &I, Intrinsic::ID IID) const;
+ bool selectSBarrierSignalIsfirst(MachineInstr &I, Intrinsic::ID IID) const;
+ bool selectSBarrierLeave(MachineInstr &I) const;
std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
bool IsCanonicalizing = true,
@@ -241,8 +246,9 @@ private:
bool isDSOffsetLegal(Register Base, int64_t Offset) const;
bool isDSOffset2Legal(Register Base, int64_t Offset0, int64_t Offset1,
unsigned Size) const;
- bool isFlatScratchBaseLegal(
- Register Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
+ bool isFlatScratchBaseLegal(Register Addr) const;
+ bool isFlatScratchBaseLegalSV(Register Addr) const;
+ bool isFlatScratchBaseLegalSVImm(Register Addr) const;
std::pair<Register, unsigned>
selectDS1Addr1OffsetImpl(MachineOperand &Root) const;
@@ -287,6 +293,9 @@ private:
Register &SOffset, int64_t &Offset) const;
InstructionSelector::ComplexRendererFns
+ selectBUFSOffset(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
selectMUBUFAddr64(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
@@ -328,6 +337,9 @@ private:
void renderFrameIndex(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
bool isInlineImmediate16(int64_t Imm) const;
bool isInlineImmediate32(int64_t Imm) const;
bool isInlineImmediate64(int64_t Imm) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 2305097e3f94..eaf72d7157ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -125,11 +125,11 @@ def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>;
-class CustomOperandClass<string name, bit optional, string parserMethod,
- string defaultMethod>
+class CustomOperandClass<string name, bit optional, string predicateMethod,
+ string parserMethod, string defaultMethod>
: AsmOperandClass {
let Name = name;
- let PredicateMethod = "is"#name;
+ let PredicateMethod = predicateMethod;
let ParserMethod = parserMethod;
let RenderMethod = "addImmOperands";
let IsOptional = optional;
@@ -138,6 +138,7 @@ class CustomOperandClass<string name, bit optional, string parserMethod,
class CustomOperandProps<bit optional = 0, string name = NAME> {
string ImmTy = "ImmTy"#name;
+ string PredicateMethod = "is"#name;
string ParserMethod = "parse"#name;
string DefaultValue = "0";
string DefaultMethod = "[this]() { return "#
@@ -145,7 +146,8 @@ class CustomOperandProps<bit optional = 0, string name = NAME> {
"AMDGPUOperand::"#ImmTy#"); }";
string PrintMethod = "print"#name;
AsmOperandClass ParserMatchClass =
- CustomOperandClass<name, optional, ParserMethod, DefaultMethod>;
+ CustomOperandClass<name, optional, PredicateMethod, ParserMethod,
+ DefaultMethod>;
string OperandType = "OPERAND_IMMEDIATE";
}
@@ -163,6 +165,20 @@ class ImmOperand<ValueType type, string name = NAME, bit optional = 0,
def s16imm : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">;
def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">;
+class ValuePredicatedOperand<CustomOperand op, string valuePredicate,
+ bit optional = 0>
+ : CustomOperand<op.Type, optional> {
+ let ImmTy = op.ImmTy;
+ defvar OpPredicate = op.ParserMatchClass.PredicateMethod;
+ let PredicateMethod =
+ "getPredicate([](const AMDGPUOperand &Op) -> bool { "#
+ "return Op."#OpPredicate#"() && "#valuePredicate#"; })";
+ let ParserMethod = op.ParserMatchClass.ParserMethod;
+ let DefaultValue = op.DefaultValue;
+ let DefaultMethod = op.DefaultMethod;
+ let PrintMethod = op.PrintMethod;
+}
+
//===--------------------------------------------------------------------===//
// Custom Operands
//===--------------------------------------------------------------------===//
@@ -236,6 +252,8 @@ def umin_oneuse : HasOneUseBinOp<umin>;
def fminnum_oneuse : HasOneUseBinOp<fminnum>;
def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
+def fminimum_oneuse : HasOneUseBinOp<fminimum>;
+def fmaximum_oneuse : HasOneUseBinOp<fmaximum>;
def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
@@ -544,19 +562,18 @@ def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
def store_hi16_#as : StoreHi16 <truncstorei16, i16>;
def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>;
def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>;
-
} // End let IsStore = 1, AddressSpaces = ...
let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
-def atomic_store_8_#as : PatFrag<(ops node:$ptr, node:$val),
- (atomic_store_8 node:$ptr, node:$val)>;
-def atomic_store_16_#as : PatFrag<(ops node:$ptr, node:$val),
- (atomic_store_16 node:$ptr, node:$val)>;
-def atomic_store_32_#as : PatFrag<(ops node:$ptr, node:$val),
- (atomic_store_32 node:$ptr, node:$val)>;
-def atomic_store_64_#as : PatFrag<(ops node:$ptr, node:$val),
- (atomic_store_64 node:$ptr, node:$val)>;
-}
+def atomic_store_8_#as : PatFrag<(ops node:$val, node:$ptr),
+ (atomic_store_8 node:$val, node:$ptr)>;
+def atomic_store_16_#as : PatFrag<(ops node:$val, node:$ptr),
+ (atomic_store_16 node:$val, node:$ptr)>;
+def atomic_store_32_#as : PatFrag<(ops node:$val, node:$ptr),
+ (atomic_store_32 node:$val, node:$ptr)>;
+def atomic_store_64_#as : PatFrag<(ops node:$val, node:$ptr),
+ (atomic_store_64 node:$val, node:$ptr)>;
+} // End let IsAtomic = 1, AddressSpaces = ...
} // End foreach as
multiclass noret_op {
@@ -622,8 +639,13 @@ defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
defm int_amdgcn_global_atomic_fmin : noret_op;
defm int_amdgcn_global_atomic_fmax : noret_op;
+defm int_amdgcn_global_atomic_csub : noret_op;
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
+defm int_amdgcn_flat_atomic_fmin_num : noret_op;
+defm int_amdgcn_flat_atomic_fmax_num : noret_op;
+defm int_amdgcn_global_atomic_fmin_num : noret_op;
+defm int_amdgcn_global_atomic_fmax_num : noret_op;
multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
let HasNoUse = true in
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index bbf4db12f5ab..fbee28889451 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -17,14 +17,19 @@
#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -455,8 +460,8 @@ static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy,
return false;
// If we have 96-bit memory operations, we shouldn't touch them. Note we may
- // end up widening these for a scalar load during RegBankSelect, since there
- // aren't 96-bit scalar loads.
+ // end up widening these for a scalar load during RegBankSelect, if we don't
+ // have 96-bit scalar loads.
if (SizeInBits == 96 && ST.hasDwordx3LoadStores())
return false;
@@ -628,6 +633,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
const LLT BufferFatPtr = GetAddrSpacePtr(AMDGPUAS::BUFFER_FAT_POINTER);
const LLT RsrcPtr = GetAddrSpacePtr(AMDGPUAS::BUFFER_RESOURCE);
+ const LLT BufferStridedPtr =
+ GetAddrSpacePtr(AMDGPUAS::BUFFER_STRIDED_POINTER);
const LLT CodePtr = FlatPtr;
@@ -676,13 +683,23 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.hasVOP3PInsts() && ST.hasAddNoCarry() && ST.hasIntClamp()) {
// Full set of gfx9 features.
- getActionDefinitionsBuilder({G_ADD, G_SUB})
- .legalFor({S32, S16, V2S16})
- .clampMaxNumElementsStrict(0, S16, 2)
- .scalarize(0)
- .minScalar(0, S16)
- .widenScalarToNextMultipleOf(0, 32)
- .maxScalar(0, S32);
+ if (ST.hasScalarAddSub64()) {
+ getActionDefinitionsBuilder({G_ADD, G_SUB})
+ .legalFor({S64, S32, S16, V2S16})
+ .clampMaxNumElementsStrict(0, S16, 2)
+ .scalarize(0)
+ .minScalar(0, S16)
+ .widenScalarToNextMultipleOf(0, 32)
+ .maxScalar(0, S32);
+ } else {
+ getActionDefinitionsBuilder({G_ADD, G_SUB})
+ .legalFor({S32, S16, V2S16})
+ .clampMaxNumElementsStrict(0, S16, 2)
+ .scalarize(0)
+ .minScalar(0, S16)
+ .widenScalarToNextMultipleOf(0, 32)
+ .maxScalar(0, S32);
+ }
getActionDefinitionsBuilder(G_MUL)
.legalFor({S32, S16, V2S16})
@@ -842,6 +859,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_DYN_STACKALLOC)
.legalFor({{PrivatePtr, S32}});
+ getActionDefinitionsBuilder(G_STACKSAVE)
+ .customFor({PrivatePtr});
+ getActionDefinitionsBuilder(G_STACKRESTORE)
+ .legalFor({PrivatePtr});
+
getActionDefinitionsBuilder(G_GLOBAL_VALUE)
.customIf(typeIsNot(0, PrivatePtr));
@@ -866,6 +888,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
FDIVActions.customFor({S16});
}
+ if (ST.hasPackedFP32Ops()) {
+ FPOpActions.legalFor({V2S32});
+ FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
+ }
+
auto &MinNumMaxNum = getActionDefinitionsBuilder({
G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
@@ -908,10 +935,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder(G_FSQRT)
- .legalFor({S32, S16})
- .customFor({S64})
+ .legalFor({S16})
+ .customFor({S32, S64})
.scalarize(0)
- .clampScalar(0, S16, S64);
+ .unsupported();
getActionDefinitionsBuilder(G_FFLOOR)
.legalFor({S32, S64, S16})
.scalarize(0)
@@ -930,10 +957,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.lower();
} else {
getActionDefinitionsBuilder(G_FSQRT)
- .legalFor({S32})
- .customFor({S64})
+ .customFor({S32, S64, S16})
.scalarize(0)
- .clampScalar(0, S32, S64);
+ .unsupported();
+
if (ST.hasFractBug()) {
getActionDefinitionsBuilder(G_FFLOOR)
@@ -1061,31 +1088,34 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.lower();
- // Lower roundeven into G_FRINT
- getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
- .scalarize(0)
- .lower();
+ // Lower G_FNEARBYINT and G_FRINT into G_INTRINSIC_ROUNDEVEN
+ getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_FRINT, G_FNEARBYINT})
+ .scalarize(0)
+ .lower();
if (ST.has16BitInsts()) {
- getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
- .legalFor({S16, S32, S64})
- .clampScalar(0, S16, S64)
- .scalarize(0);
+ getActionDefinitionsBuilder(
+ {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN})
+ .legalFor({S16, S32, S64})
+ .clampScalar(0, S16, S64)
+ .scalarize(0);
} else if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
- getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
- .legalFor({S32, S64})
- .clampScalar(0, S32, S64)
- .scalarize(0);
+ getActionDefinitionsBuilder(
+ {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN})
+ .legalFor({S32, S64})
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
} else {
- getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
- .legalFor({S32})
- .customFor({S64})
- .clampScalar(0, S32, S64)
- .scalarize(0);
+ getActionDefinitionsBuilder(
+ {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN})
+ .legalFor({S32})
+ .customFor({S64})
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
}
getActionDefinitionsBuilder(G_PTR_ADD)
- .unsupportedFor({BufferFatPtr, RsrcPtr})
+ .unsupportedFor({BufferFatPtr, BufferStridedPtr, RsrcPtr})
.legalIf(all(isPointer(0), sameSize(0, 1)))
.scalarize(0)
.scalarSameSizeAs(1, 0);
@@ -1121,8 +1151,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.legalIf(all(typeInSet(0, {S1, S32}), isPointer(1)));
- getActionDefinitionsBuilder(G_FCMP)
- .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
+ auto &FCmpBuilder =
+ getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct(
+ {S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase);
+
+ if (ST.hasSALUFloatInsts())
+ FCmpBuilder.legalForCartesianProduct({S32}, {S16, S32});
+
+ FCmpBuilder
.widenScalarToNextPow2(1)
.clampScalar(1, S32, S64)
.scalarize(0);
@@ -1149,7 +1185,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Log2Ops.scalarize(0)
.lower();
- auto &LogOps = getActionDefinitionsBuilder({G_FLOG, G_FLOG10, G_FEXP});
+ auto &LogOps =
+ getActionDefinitionsBuilder({G_FLOG, G_FLOG10, G_FEXP, G_FEXP10});
LogOps.customFor({S32, S16});
LogOps.clampScalar(0, MinScalarFPTy, S32)
.scalarize(0);
@@ -1219,7 +1256,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS})
.legalFor({S32, S16, V2S16})
- .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.clampMaxNumElements(0, S16, 2)
.minScalar(0, S16)
.widenScalarToNextPow2(0)
@@ -1369,7 +1405,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// The custom pointers (fat pointers, buffer resources) don't work with load
// and store at this level. Fat pointers should have been lowered to
// intrinsics before the translation to MIR.
- Actions.unsupportedIf(typeInSet(1, {BufferFatPtr, RsrcPtr}));
+ Actions.unsupportedIf(
+ typeInSet(1, {BufferFatPtr, BufferStridedPtr, RsrcPtr}));
// Address space 8 pointers are handled by a 4xs32 load, bitcast, and
// ptrtoint. This is needed to account for the fact that we can't have i128
@@ -1925,20 +1962,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.widenScalarToNextPow2(0)
.scalarize(0);
- getActionDefinitionsBuilder({
- // TODO: Verify V_BFI_B32 is generated from expanded bit ops
- G_FCOPYSIGN,
+ getActionDefinitionsBuilder(
+ {// TODO: Verify V_BFI_B32 is generated from expanded bit ops
+ G_FCOPYSIGN,
- G_ATOMIC_CMPXCHG_WITH_SUCCESS,
- G_ATOMICRMW_NAND,
- G_ATOMICRMW_FSUB,
- G_READ_REGISTER,
- G_WRITE_REGISTER,
+ G_ATOMIC_CMPXCHG_WITH_SUCCESS, G_ATOMICRMW_NAND, G_ATOMICRMW_FSUB,
+ G_READ_REGISTER, G_WRITE_REGISTER,
- G_SADDO, G_SSUBO,
+ G_SADDO, G_SSUBO})
+ .lower();
- // TODO: Implement
- G_FMINIMUM, G_FMAXIMUM}).lower();
+ if (ST.hasIEEEMinMax()) {
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
+ .legalFor(FPTypesPK16)
+ .clampMaxNumElements(0, S16, 2)
+ .scalarize(0);
+ } else {
+ // TODO: Implement
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
+ }
getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
.lower();
@@ -1948,6 +1990,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
G_INDEXED_ZEXTLOAD, G_INDEXED_STORE})
.unsupported();
+ getActionDefinitionsBuilder(G_PREFETCH).alwaysLegal();
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -1960,8 +2004,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
switch (MI.getOpcode()) {
case TargetOpcode::G_ADDRSPACE_CAST:
return legalizeAddrSpaceCast(MI, MRI, B);
- case TargetOpcode::G_FRINT:
- return legalizeFrint(MI, MRI, B);
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ return legalizeFroundeven(MI, MRI, B);
case TargetOpcode::G_FCEIL:
return legalizeFceil(MI, MRI, B);
case TargetOpcode::G_FREM:
@@ -2022,6 +2066,7 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_FEXP2:
return legalizeFExp2(MI, B);
case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FEXP10:
return legalizeFExp(MI, B);
case TargetOpcode::G_FPOW:
return legalizeFPow(MI, B);
@@ -2037,6 +2082,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeCTLZ_CTTZ(MI, MRI, B);
case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
return legalizeFPTruncRound(MI, B);
+ case TargetOpcode::G_STACKSAVE:
+ return legalizeStackSave(MI, B);
default:
return false;
}
@@ -2264,9 +2311,9 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
return true;
}
-bool AMDGPULegalizerInfo::legalizeFrint(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeFroundeven(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
Register Src = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(Src);
assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
@@ -2345,10 +2392,10 @@ static MachineInstrBuilder extractF64Exponent(Register Hi,
auto Const0 = B.buildConstant(S32, FractBits - 32);
auto Const1 = B.buildConstant(S32, ExpBits);
- auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
- .addUse(Hi)
- .addUse(Const0.getReg(0))
- .addUse(Const1.getReg(0));
+ auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32})
+ .addUse(Hi)
+ .addUse(Const0.getReg(0))
+ .addUse(Const1.getReg(0));
return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
}
@@ -2436,8 +2483,7 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
- auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32},
- /*HasSideEffects=*/false)
+ auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32})
.addUse(Unmerge.getReg(1));
auto LS2 = B.buildSub(S32, LS, One);
ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
@@ -2670,15 +2716,16 @@ bool AMDGPULegalizerInfo::legalizeSinCos(
auto OneOver2Pi = B.buildFConstant(Ty, 0.5 * numbers::inv_pi);
if (ST.hasTrigReducedRange()) {
auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
- TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
- .addUse(MulVal.getReg(0))
- .setMIFlags(Flags).getReg(0);
+ TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty})
+ .addUse(MulVal.getReg(0))
+ .setMIFlags(Flags)
+ .getReg(0);
} else
TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
- B.buildIntrinsic(TrigIntrin, ArrayRef<Register>(DstReg), false)
+ B.buildIntrinsic(TrigIntrin, ArrayRef<Register>(DstReg))
.addUse(TrigVal)
.setMIFlags(Flags);
MI.eraseFromParent();
@@ -2714,15 +2761,6 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
// $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
// which is a 64-bit pc-relative offset from the encoding of the $symbol
// operand to the global variable.
- //
- // What we want here is an offset from the value returned by s_getpc
- // (which is the address of the s_add_u32 instruction) to the global
- // variable, but since the encoding of $symbol starts 4 bytes after the start
- // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
- // small. This requires us to add 4 to the global variable offset in order to
- // compute the correct address. Similarly for the s_addc_u32 instruction, the
- // encoding of $symbol starts 12 bytes after the start of the s_add_u32
- // instruction.
LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
@@ -2732,11 +2770,11 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET)
.addDef(PCReg);
- MIB.addGlobalAddress(GV, Offset + 4, GAFlags);
+ MIB.addGlobalAddress(GV, Offset, GAFlags);
if (GAFlags == SIInstrInfo::MO_NONE)
MIB.addImm(0);
else
- MIB.addGlobalAddress(GV, Offset + 12, GAFlags + 1);
+ MIB.addGlobalAddress(GV, Offset, GAFlags + 1);
if (!B.getMRI()->getRegClassOrNull(PCReg))
B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
@@ -2744,7 +2782,63 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
if (PtrTy.getSizeInBits() == 32)
B.buildExtract(DstReg, PCReg, 0);
return true;
- }
+}
+
+// Emit a ABS32_LO / ABS32_HI relocation stub.
+void AMDGPULegalizerInfo::buildAbsGlobalAddress(
+ Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
+ MachineRegisterInfo &MRI) const {
+ bool RequiresHighHalf = PtrTy.getSizeInBits() != 32;
+
+ LLT S32 = LLT::scalar(32);
+
+ // Use the destination directly, if and only if we store the lower address
+ // part only and we don't have a register class being set.
+ Register AddrLo = !RequiresHighHalf && !MRI.getRegClassOrNull(DstReg)
+ ? DstReg
+ : MRI.createGenericVirtualRegister(S32);
+
+ if (!MRI.getRegClassOrNull(AddrLo))
+ MRI.setRegClass(AddrLo, &AMDGPU::SReg_32RegClass);
+
+ // Write the lower half.
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(AddrLo)
+ .addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO);
+
+ // If required, write the upper half as well.
+ if (RequiresHighHalf) {
+ assert(PtrTy.getSizeInBits() == 64 &&
+ "Must provide a 64-bit pointer type!");
+
+ Register AddrHi = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegClass(AddrHi, &AMDGPU::SReg_32RegClass);
+
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(AddrHi)
+ .addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_HI);
+
+ // Use the destination directly, if and only if we don't have a register
+ // class being set.
+ Register AddrDst = !MRI.getRegClassOrNull(DstReg)
+ ? DstReg
+ : MRI.createGenericVirtualRegister(LLT::scalar(64));
+
+ if (!MRI.getRegClassOrNull(AddrDst))
+ MRI.setRegClass(AddrDst, &AMDGPU::SReg_64RegClass);
+
+ B.buildMergeValues(AddrDst, {AddrLo, AddrHi});
+
+ // If we created a new register for the destination, cast the result into
+ // the final output.
+ if (AddrDst != DstReg)
+ B.buildCast(DstReg, AddrDst);
+ } else if (AddrLo != DstReg) {
+ // If we created a new register for the destination, cast the result into
+ // the final output.
+ B.buildCast(DstReg, AddrLo);
+ }
+}
bool AMDGPULegalizerInfo::legalizeGlobalValue(
MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -2771,7 +2865,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
// functions that use local objects. However, if these dead functions are
// not eliminated, we don't want a compile time error. Just emit a warning
// and a trap, since there should be no callable path here.
- B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
B.buildUndef(DstReg);
MI.eraseFromParent();
return true;
@@ -2797,8 +2891,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
// Adjust alignment for that dynamic shared memory array.
MFI->setDynLDSAlign(MF.getFunction(), *cast<GlobalVariable>(GV));
LLT S32 = LLT::scalar(32);
- auto Sz =
- B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
+ auto Sz = B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32});
B.buildIntToPtr(DstReg, Sz);
MI.eraseFromParent();
return true;
@@ -2811,6 +2904,12 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
return true;
}
+ if (ST.isAmdPalOS() || ST.isMesa3DOS()) {
+ buildAbsGlobalAddress(DstReg, Ty, B, GV, MRI);
+ MI.eraseFromParent();
+ return true;
+ }
+
const SITargetLowering *TLI = ST.getTargetLowering();
if (TLI->shouldEmitFixup(GV)) {
@@ -2973,10 +3072,10 @@ bool AMDGPULegalizerInfo::legalizeFMad(
// TODO: Always legal with future ftz flag.
// FIXME: Do we need just output?
- if (Ty == LLT::scalar(32) &&
+ if (Ty == LLT::float32() &&
MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign())
return true;
- if (Ty == LLT::scalar(16) &&
+ if (Ty == LLT::float16() &&
MFI->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign())
return true;
@@ -3014,9 +3113,30 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
/// Return true if it's known that \p Src can never be an f32 denormal value.
static bool valueIsKnownNeverF32Denorm(const MachineRegisterInfo &MRI,
Register Src) {
- Register ExtSrc;
- if (mi_match(Src, MRI, m_GFPExt(m_Reg(ExtSrc))))
- return MRI.getType(ExtSrc) == LLT::scalar(16);
+ const MachineInstr *DefMI = MRI.getVRegDef(Src);
+ switch (DefMI->getOpcode()) {
+ case TargetOpcode::G_INTRINSIC: {
+ switch (cast<GIntrinsic>(DefMI)->getIntrinsicID()) {
+ case Intrinsic::amdgcn_frexp_mant:
+ return true;
+ default:
+ break;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_FFREXP: {
+ if (DefMI->getOperand(0).getReg() == Src)
+ return true;
+ break;
+ }
+ case TargetOpcode::G_FPEXT: {
+ return MRI.getType(DefMI->getOperand(1).getReg()) == LLT::scalar(16);
+ }
+ default:
+ return false;
+ }
+
return false;
}
@@ -3072,9 +3192,9 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI,
const LLT F32 = LLT::scalar(32);
// Nothing in half is a denormal when promoted to f32.
auto Ext = B.buildFPExt(F32, Src, Flags);
- auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32}, false)
- .addUse(Ext.getReg(0))
- .setMIFlags(Flags);
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32})
+ .addUse(Ext.getReg(0))
+ .setMIFlags(Flags);
B.buildFPTrunc(Dst, Log2, Flags);
MI.eraseFromParent();
return true;
@@ -3084,14 +3204,14 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI,
auto [ScaledInput, IsLtSmallestNormal] = getScaledLogInput(B, Src, Flags);
if (!ScaledInput) {
- B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)}, false)
+ B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)})
.addUse(Src)
.setMIFlags(Flags);
MI.eraseFromParent();
return true;
}
- auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty})
.addUse(ScaledInput)
.setMIFlags(Flags);
@@ -3148,9 +3268,8 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
if (ScaledInput)
X = ScaledInput;
- auto Y = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
- .addUse(X)
- .setMIFlags(Flags);
+ auto Y =
+ B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}).addUse(X).setMIFlags(Flags);
Register R;
if (ST.hasFastFMAF32()) {
@@ -3231,7 +3350,7 @@ bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst,
if (Ty == LLT::scalar(32)) {
auto [ScaledInput, IsScaled] = getScaledLogInput(B, Src, Flags);
if (ScaledInput) {
- auto LogSrc = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ auto LogSrc = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty})
.addUse(Src)
.setMIFlags(Flags);
auto ScaledResultOffset = B.buildFConstant(Ty, -32.0 * Log2BaseInverted);
@@ -3253,7 +3372,7 @@ bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst,
auto Log2Operand = Ty == LLT::scalar(16)
? B.buildFLog2(Ty, Src, Flags)
- : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty})
.addUse(Src)
.setMIFlags(Flags);
auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
@@ -3276,9 +3395,9 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
if (Ty == F16) {
// Nothing in half is a denormal when promoted to f32.
auto Ext = B.buildFPExt(F32, Src, Flags);
- auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32}, false)
- .addUse(Ext.getReg(0))
- .setMIFlags(Flags);
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32})
+ .addUse(Ext.getReg(0))
+ .setMIFlags(Flags);
B.buildFPTrunc(Dst, Log2, Flags);
MI.eraseFromParent();
return true;
@@ -3287,7 +3406,7 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
assert(Ty == F32);
if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) {
- B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
+ B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
.addUse(Src)
.setMIFlags(Flags);
MI.eraseFromParent();
@@ -3307,7 +3426,7 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
auto AddOffset = B.buildSelect(F32, NeedsScaling, SixtyFour, Zero, Flags);
auto AddInput = B.buildFAdd(F32, Src, AddOffset, Flags);
- auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
+ auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
.addUse(AddInput.getReg(0))
.setMIFlags(Flags);
@@ -3320,20 +3439,42 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
}
bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst,
- Register Src,
- unsigned Flags) const {
+ Register X, unsigned Flags) const {
LLT Ty = B.getMRI()->getType(Dst);
- auto K = B.buildFConstant(Ty, numbers::log2e);
- auto Mul = B.buildFMul(Ty, Src, K, Flags);
+ LLT F32 = LLT::scalar(32);
- if (Ty == LLT::scalar(32)) {
- B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
- .addUse(Mul.getReg(0))
- .setMIFlags(Flags);
- } else {
- B.buildFExp2(Dst, Mul.getReg(0), Flags);
+ if (Ty != F32 || !needsDenormHandlingF32(B.getMF(), X, Flags)) {
+ auto Log2E = B.buildFConstant(Ty, numbers::log2e);
+ auto Mul = B.buildFMul(Ty, X, Log2E, Flags);
+
+ if (Ty == F32) {
+ B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
+ .addUse(Mul.getReg(0))
+ .setMIFlags(Flags);
+ } else {
+ B.buildFExp2(Dst, Mul.getReg(0), Flags);
+ }
+
+ return true;
}
+ auto Threshold = B.buildFConstant(Ty, -0x1.5d58a0p+6f);
+ auto NeedsScaling =
+ B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), X, Threshold, Flags);
+ auto ScaleOffset = B.buildFConstant(Ty, 0x1.0p+6f);
+ auto ScaledX = B.buildFAdd(Ty, X, ScaleOffset, Flags);
+ auto AdjustedX = B.buildSelect(Ty, NeedsScaling, ScaledX, X, Flags);
+
+ auto Log2E = B.buildFConstant(Ty, numbers::log2e);
+ auto ExpInput = B.buildFMul(Ty, AdjustedX, Log2E, Flags);
+
+ auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
+ .addUse(ExpInput.getReg(0))
+ .setMIFlags(Flags);
+
+ auto ResultScaleFactor = B.buildFConstant(Ty, 0x1.969d48p-93f);
+ auto AdjustedResult = B.buildFMul(Ty, Exp2, ResultScaleFactor, Flags);
+ B.buildSelect(Dst, NeedsScaling, AdjustedResult, Exp2, Flags);
return true;
}
@@ -3347,7 +3488,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
LLT Ty = MRI.getType(Dst);
const LLT F16 = LLT::scalar(16);
const LLT F32 = LLT::scalar(32);
- const bool IsExp10 = false; // TODO: For some reason exp10 is missing
+ const bool IsExp10 = MI.getOpcode() == TargetOpcode::G_FEXP10;
if (Ty == F16) {
// v_exp_f16 (fmul x, log2e)
@@ -3374,7 +3515,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
// TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
// library behavior. Also, is known-not-daz source sufficient?
- if (allowApproxFunc(MF, Flags) && !needsDenormHandlingF32(MF, X, Flags)) {
+ if (allowApproxFunc(MF, Flags)) {
legalizeFExpUnsafe(B, Dst, X, Flags);
MI.eraseFromParent();
return true;
@@ -3442,14 +3583,14 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
PL = getMad(B, Ty, XH.getReg(0), CL.getReg(0), Mad0, Flags);
}
- auto E = B.buildFRint(Ty, PH, Flags);
+ auto E = B.buildIntrinsicRoundeven(Ty, PH, Flags);
// It is unsafe to contract this fsub into the PH multiply.
auto PHSubE = B.buildFSub(Ty, PH, E, FlagsNoContract);
auto A = B.buildFAdd(Ty, PHSubE, PL, Flags);
auto IntE = B.buildFPTOSI(LLT::scalar(32), E);
- auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
+ auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
.addUse(A.getReg(0))
.setMIFlags(Flags);
auto R = B.buildFLdexp(Ty, Exp2, IntE, Flags);
@@ -3486,27 +3627,26 @@ bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI,
Register Src1 = MI.getOperand(2).getReg();
unsigned Flags = MI.getFlags();
LLT Ty = B.getMRI()->getType(Dst);
- const LLT S16 = LLT::scalar(16);
- const LLT S32 = LLT::scalar(32);
-
- if (Ty == S32) {
- auto Log = B.buildFLog2(S32, Src0, Flags);
- auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
- .addUse(Log.getReg(0))
- .addUse(Src1)
- .setMIFlags(Flags);
+ const LLT F16 = LLT::float16();
+ const LLT F32 = LLT::float32();
+
+ if (Ty == F32) {
+ auto Log = B.buildFLog2(F32, Src0, Flags);
+ auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {F32})
+ .addUse(Log.getReg(0))
+ .addUse(Src1)
+ .setMIFlags(Flags);
B.buildFExp2(Dst, Mul, Flags);
- } else if (Ty == S16) {
+ } else if (Ty == F16) {
// There's no f16 fmul_legacy, so we need to convert for it.
- auto Log = B.buildFLog2(S16, Src0, Flags);
- auto Ext0 = B.buildFPExt(S32, Log, Flags);
- auto Ext1 = B.buildFPExt(S32, Src1, Flags);
- auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
- .addUse(Ext0.getReg(0))
- .addUse(Ext1.getReg(0))
- .setMIFlags(Flags);
-
- B.buildFExp2(Dst, B.buildFPTrunc(S16, Mul), Flags);
+ auto Log = B.buildFLog2(F16, Src0, Flags);
+ auto Ext0 = B.buildFPExt(F32, Log, Flags);
+ auto Ext1 = B.buildFPExt(F32, Src1, Flags);
+ auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {F32})
+ .addUse(Ext0.getReg(0))
+ .addUse(Ext1.getReg(0))
+ .setMIFlags(Flags);
+ B.buildFExp2(Dst, B.buildFPTrunc(F16, Mul), Flags);
} else
return false;
@@ -3531,11 +3671,11 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
MachineIRBuilder &B) const {
const LLT S1 = LLT::scalar(1);
- const LLT S64 = LLT::scalar(64);
+ const LLT F64 = LLT::float64();
Register Dst = MI.getOperand(0).getReg();
Register OrigSrc = MI.getOperand(1).getReg();
unsigned Flags = MI.getFlags();
- assert(ST.hasFractBug() && MRI.getType(Dst) == S64 &&
+ assert(ST.hasFractBug() && MRI.getType(Dst) == F64 &&
"this should not have been custom lowered");
// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x))
@@ -3546,9 +3686,9 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
//
// Convert floor(x) to (x - fract(x))
- auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64}, false)
- .addUse(OrigSrc)
- .setMIFlags(Flags);
+ auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {F64})
+ .addUse(OrigSrc)
+ .setMIFlags(Flags);
// Give source modifier matching some assistance before obscuring a foldable
// pattern.
@@ -3558,9 +3698,9 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
Register ModSrc = stripAnySourceMods(OrigSrc, MRI);
auto Const =
- B.buildFConstant(S64, llvm::bit_cast<double>(0x3fefffffffffffff));
+ B.buildFConstant(F64, llvm::bit_cast<double>(0x3fefffffffffffff));
- Register Min = MRI.createGenericVirtualRegister(S64);
+ Register Min = MRI.createGenericVirtualRegister(F64);
// We don't need to concern ourselves with the snan handling difference, so
// use the one which will directly select.
@@ -3573,10 +3713,10 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
Register CorrectedFract = Min;
if (!MI.getFlag(MachineInstr::FmNoNans)) {
auto IsNan = B.buildFCmp(CmpInst::FCMP_ORD, S1, ModSrc, ModSrc, Flags);
- CorrectedFract = B.buildSelect(S64, IsNan, ModSrc, Min, Flags).getReg(0);
+ CorrectedFract = B.buildSelect(F64, IsNan, ModSrc, Min, Flags).getReg(0);
}
- auto NegFract = B.buildFNeg(S64, CorrectedFract, Flags);
+ auto NegFract = B.buildFNeg(F64, CorrectedFract, Flags);
B.buildFAdd(Dst, OrigSrc, NegFract, Flags);
MI.eraseFromParent();
@@ -4497,38 +4637,36 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
// 1 / x -> RCP(x)
if (CLHS->isExactlyValue(1.0)) {
- B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
- .addUse(RHS)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res)
+ .addUse(RHS)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
}
- // TODO: Match rsq
-
// -1 / x -> RCP( FNEG(x) )
if (CLHS->isExactlyValue(-1.0)) {
auto FNeg = B.buildFNeg(ResTy, RHS, Flags);
- B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
- .addUse(FNeg.getReg(0))
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res)
+ .addUse(FNeg.getReg(0))
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
}
}
- // For f16 require arcp only.
- // For f32 require afn+arcp.
+ // For f16 require afn or arcp.
+ // For f32 require afn.
if (!AllowInaccurateRcp && (ResTy != LLT::scalar(16) ||
!MI.getFlag(MachineInstr::FmArcp)))
return false;
// x / y -> x * (1.0 / y)
- auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
- .addUse(RHS)
- .setMIFlags(Flags);
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy})
+ .addUse(RHS)
+ .setMIFlags(Flags);
B.buildFMul(Res, LHS, RCP, Flags);
MI.eraseFromParent();
@@ -4554,9 +4692,9 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV64(MachineInstr &MI,
auto NegY = B.buildFNeg(ResTy, Y);
auto One = B.buildFConstant(ResTy, 1.0);
- auto R = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
- .addUse(Y)
- .setMIFlags(Flags);
+ auto R = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy})
+ .addUse(Y)
+ .setMIFlags(Flags);
auto Tmp0 = B.buildFMA(ResTy, NegY, R, One);
R = B.buildFMA(ResTy, Tmp0, R, R);
@@ -4590,23 +4728,27 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
auto LHSExt = B.buildFPExt(S32, LHS, Flags);
auto RHSExt = B.buildFPExt(S32, RHS, Flags);
- auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
- .addUse(RHSExt.getReg(0))
- .setMIFlags(Flags);
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
+ .addUse(RHSExt.getReg(0))
+ .setMIFlags(Flags);
auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags);
auto RDst = B.buildFPTrunc(S16, QUOT, Flags);
- B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
- .addUse(RDst.getReg(0))
- .addUse(RHS)
- .addUse(LHS)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res)
+ .addUse(RDst.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
}
+static const unsigned SPDenormModeBitField =
+ AMDGPU::Hwreg::ID_MODE | (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
+ (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
+
// Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions
// to enable denorm mode. When 'Enable' is false, disable denorm mode.
static void toggleSPDenormMode(bool Enable, MachineIRBuilder &B,
@@ -4625,11 +4767,6 @@ static void toggleSPDenormMode(bool Enable, MachineIRBuilder &B,
.addImm(NewDenormModeValue);
} else {
- // Select FP32 bit field in mode register.
- unsigned SPDenormModeBitField = AMDGPU::Hwreg::ID_MODE |
- (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
- (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
-
B.buildInstr(AMDGPU::S_SETREG_IMM32_B32)
.addImm(SPDenormMode)
.addImm(SPDenormModeBitField);
@@ -4656,27 +4793,38 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
auto One = B.buildFConstant(S32, 1.0f);
auto DenominatorScaled =
- B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
- .addUse(LHS)
- .addUse(RHS)
- .addImm(0)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(0)
+ .setMIFlags(Flags);
auto NumeratorScaled =
- B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
- .addUse(LHS)
- .addUse(RHS)
- .addImm(1)
- .setMIFlags(Flags);
-
- auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
- .addUse(DenominatorScaled.getReg(0))
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(1)
+ .setMIFlags(Flags);
+
+ auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
+ .addUse(DenominatorScaled.getReg(0))
+ .setMIFlags(Flags);
auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags);
- // FIXME: Doesn't correctly model the FP mode switch, and the FP operations
- // aren't modeled as reading it.
- if (Mode.FP32Denormals != DenormalMode::getIEEE())
+ const bool PreservesDenormals = Mode.FP32Denormals == DenormalMode::getIEEE();
+ const bool HasDynamicDenormals =
+ (Mode.FP32Denormals.Input == DenormalMode::Dynamic) ||
+ (Mode.FP32Denormals.Output == DenormalMode::Dynamic);
+
+ Register SavedSPDenormMode;
+ if (!PreservesDenormals) {
+ if (HasDynamicDenormals) {
+ SavedSPDenormMode = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ B.buildInstr(AMDGPU::S_GETREG_B32)
+ .addDef(SavedSPDenormMode)
+ .addImm(SPDenormModeBitField);
+ }
toggleSPDenormMode(true, B, ST, Mode);
+ }
auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags);
@@ -4685,23 +4833,28 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
- // FIXME: This mishandles dynamic denormal mode. We need to query the
- // current mode and restore the original.
- if (Mode.FP32Denormals != DenormalMode::getIEEE())
- toggleSPDenormMode(false, B, ST, Mode);
+ if (!PreservesDenormals) {
+ if (HasDynamicDenormals) {
+ assert(SavedSPDenormMode);
+ B.buildInstr(AMDGPU::S_SETREG_B32)
+ .addReg(SavedSPDenormMode)
+ .addImm(SPDenormModeBitField);
+ } else
+ toggleSPDenormMode(false, B, ST, Mode);
+ }
- auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
- .addUse(Fma4.getReg(0))
- .addUse(Fma1.getReg(0))
- .addUse(Fma3.getReg(0))
- .addUse(NumeratorScaled.getReg(1))
- .setMIFlags(Flags);
+ auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32})
+ .addUse(Fma4.getReg(0))
+ .addUse(Fma1.getReg(0))
+ .addUse(Fma3.getReg(0))
+ .addUse(NumeratorScaled.getReg(1))
+ .setMIFlags(Flags);
- B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
- .addUse(Fmas.getReg(0))
- .addUse(RHS)
- .addUse(LHS)
- .setMIFlags(Flags);
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res)
+ .addUse(Fmas.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -4724,27 +4877,27 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
auto One = B.buildFConstant(S64, 1.0);
- auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
- .addUse(LHS)
- .addUse(RHS)
- .addImm(0)
- .setMIFlags(Flags);
+ auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(0)
+ .setMIFlags(Flags);
auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags);
- auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64}, false)
- .addUse(DivScale0.getReg(0))
- .setMIFlags(Flags);
+ auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64})
+ .addUse(DivScale0.getReg(0))
+ .setMIFlags(Flags);
auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags);
auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags);
auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags);
- auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
- .addUse(LHS)
- .addUse(RHS)
- .addImm(1)
- .setMIFlags(Flags);
+ auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(1)
+ .setMIFlags(Flags);
auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags);
auto Mul = B.buildFMul(S64, DivScale1.getReg(0), Fma3, Flags);
@@ -4771,14 +4924,14 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
Scale = DivScale1.getReg(1);
}
- auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64}, false)
- .addUse(Fma4.getReg(0))
- .addUse(Fma3.getReg(0))
- .addUse(Mul.getReg(0))
- .addUse(Scale)
- .setMIFlags(Flags);
+ auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64})
+ .addUse(Fma4.getReg(0))
+ .addUse(Fma3.getReg(0))
+ .addUse(Mul.getReg(0))
+ .addUse(Scale)
+ .setMIFlags(Flags);
- B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, ArrayRef(Res), false)
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, ArrayRef(Res))
.addUse(Fmas.getReg(0))
.addUse(RHS)
.addUse(LHS)
@@ -4799,10 +4952,10 @@ bool AMDGPULegalizerInfo::legalizeFFREXP(MachineInstr &MI,
LLT Ty = MRI.getType(Res0);
LLT InstrExpTy = Ty == LLT::scalar(16) ? LLT::scalar(16) : LLT::scalar(32);
- auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty}, false)
+ auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty})
.addUse(Val)
.setMIFlags(Flags);
- auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy}, false)
+ auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy})
.addUse(Val)
.setMIFlags(Flags);
@@ -4846,9 +4999,9 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
- auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
- .addUse(Mul0.getReg(0))
- .setMIFlags(Flags);
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
+ .addUse(Mul0.getReg(0))
+ .setMIFlags(Flags);
auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
@@ -4858,9 +5011,107 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
return true;
}
-bool AMDGPULegalizerInfo::legalizeFSQRT(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeFSQRTF16(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ // Bypass the correct expansion a standard promotion through G_FSQRT would
+ // get. The f32 op is accurate enough for the f16 cas.
+ unsigned Flags = MI.getFlags();
+ assert(!ST.has16BitInsts());
+ const LLT F32 = LLT::scalar(32);
+ auto Ext = B.buildFPExt(F32, MI.getOperand(1), Flags);
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_sqrt, {F32})
+ .addUse(Ext.getReg(0))
+ .setMIFlags(Flags);
+ B.buildFPTrunc(MI.getOperand(0), Log2, Flags);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFSQRTF32(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ MachineFunction &MF = B.getMF();
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ const unsigned Flags = MI.getFlags();
+ const LLT S1 = LLT::scalar(1);
+ const LLT F32 = LLT::scalar(32);
+ const LLT I32 = LLT::scalar(32);
+
+ if (allowApproxFunc(MF, Flags)) {
+ B.buildIntrinsic(Intrinsic::amdgcn_sqrt, ArrayRef<Register>({Dst}))
+ .addUse(X)
+ .setMIFlags(Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto ScaleThreshold = B.buildFConstant(F32, 0x1.0p-96f);
+ auto NeedScale = B.buildFCmp(CmpInst::FCMP_OGT, S1, ScaleThreshold, X, Flags);
+ auto ScaleUpFactor = B.buildFConstant(F32, 0x1.0p+32f);
+ auto ScaledX = B.buildFMul(F32, X, ScaleUpFactor, Flags);
+ auto SqrtX = B.buildSelect(F32, NeedScale, ScaledX, X, Flags);
+
+ Register SqrtS = MRI.createGenericVirtualRegister(F32);
+ if (needsDenormHandlingF32(MF, X, Flags)) {
+ B.buildIntrinsic(Intrinsic::amdgcn_sqrt, ArrayRef<Register>({SqrtS}))
+ .addUse(SqrtX.getReg(0))
+ .setMIFlags(Flags);
+
+ auto NegOne = B.buildConstant(I32, -1);
+ auto SqrtSNextDown = B.buildAdd(I32, SqrtS, NegOne);
+
+ auto NegSqrtSNextDown = B.buildFNeg(F32, SqrtSNextDown, Flags);
+ auto SqrtVP = B.buildFMA(F32, NegSqrtSNextDown, SqrtS, SqrtX, Flags);
+
+ auto PosOne = B.buildConstant(I32, 1);
+ auto SqrtSNextUp = B.buildAdd(I32, SqrtS, PosOne);
+
+ auto NegSqrtSNextUp = B.buildFNeg(F32, SqrtSNextUp, Flags);
+ auto SqrtVS = B.buildFMA(F32, NegSqrtSNextUp, SqrtS, SqrtX, Flags);
+
+ auto Zero = B.buildFConstant(F32, 0.0f);
+ auto SqrtVPLE0 = B.buildFCmp(CmpInst::FCMP_OLE, S1, SqrtVP, Zero, Flags);
+
+ SqrtS =
+ B.buildSelect(F32, SqrtVPLE0, SqrtSNextDown, SqrtS, Flags).getReg(0);
+
+ auto SqrtVPVSGT0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, SqrtVS, Zero, Flags);
+ SqrtS =
+ B.buildSelect(F32, SqrtVPVSGT0, SqrtSNextUp, SqrtS, Flags).getReg(0);
+ } else {
+ auto SqrtR =
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F32}).addReg(SqrtX.getReg(0));
+ B.buildFMul(SqrtS, SqrtX, SqrtR, Flags);
+
+ auto Half = B.buildFConstant(F32, 0.5f);
+ auto SqrtH = B.buildFMul(F32, SqrtR, Half, Flags);
+ auto NegSqrtH = B.buildFNeg(F32, SqrtH, Flags);
+ auto SqrtE = B.buildFMA(F32, NegSqrtH, SqrtS, Half, Flags);
+ SqrtH = B.buildFMA(F32, SqrtH, SqrtE, SqrtH, Flags);
+ SqrtS = B.buildFMA(F32, SqrtS, SqrtE, SqrtS, Flags).getReg(0);
+ auto NegSqrtS = B.buildFNeg(F32, SqrtS, Flags);
+ auto SqrtD = B.buildFMA(F32, NegSqrtS, SqrtS, SqrtX, Flags);
+ SqrtS = B.buildFMA(F32, SqrtD, SqrtH, SqrtS, Flags).getReg(0);
+ }
+
+ auto ScaleDownFactor = B.buildFConstant(F32, 0x1.0p-16f);
+
+ auto ScaledDown = B.buildFMul(F32, SqrtS, ScaleDownFactor, Flags);
+
+ SqrtS = B.buildSelect(F32, NeedScale, ScaledDown, SqrtS, Flags).getReg(0);
+
+ auto IsZeroOrInf = B.buildIsFPClass(LLT::scalar(1), SqrtX, fcZero | fcPosInf);
+ B.buildSelect(Dst, IsZeroOrInf, SqrtX, SqrtS, Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
// For double type, the SQRT and RSQ instructions don't have required
// precision, we apply Goldschmidt's algorithm to improve the result:
//
@@ -4901,8 +5152,8 @@ bool AMDGPULegalizerInfo::legalizeFSQRT(MachineInstr &MI,
auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
auto SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags);
- auto SqrtY = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}, false)
- .addReg(SqrtX.getReg(0));
+ auto SqrtY =
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}).addReg(SqrtX.getReg(0));
auto Half = B.buildFConstant(F64, 0.5);
auto SqrtH0 = B.buildFMul(F64, SqrtY, Half);
@@ -4942,6 +5193,19 @@ bool AMDGPULegalizerInfo::legalizeFSQRT(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::legalizeFSQRT(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty == LLT::scalar(32))
+ return legalizeFSQRTF32(MI, MRI, B);
+ if (Ty == LLT::scalar(64))
+ return legalizeFSQRTF64(MI, MRI, B);
+ if (Ty == LLT::scalar(16))
+ return legalizeFSQRTF16(MI, MRI, B);
+ return false;
+}
+
// Expand llvm.amdgcn.rsq.clamp on targets that don't support the instruction.
// FIXME: Why do we handle this one but not other removed instructions?
//
@@ -4968,9 +5232,9 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
else
return false;
- auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty}, false)
- .addUse(Src)
- .setMIFlags(Flags);
+ auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty})
+ .addUse(Src)
+ .setMIFlags(Flags);
// We don't need to concern ourselves with the snan handling difference, since
// the rsq quieted (or not) so use the one which will directly select.
@@ -5153,7 +5417,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
std::pair<Register, unsigned>
AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(ST);
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
@@ -5631,31 +5895,23 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
IID == Intrinsic::amdgcn_struct_buffer_atomic_cmpswap ||
IID == Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap ||
IID == Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap;
- const bool HasReturn = MI.getNumExplicitDefs() != 0;
-
- Register Dst;
-
- int OpOffset = 0;
- if (HasReturn) {
- // A few FP atomics do not support return values.
- Dst = MI.getOperand(0).getReg();
- } else {
- OpOffset = -1;
- }
+ Register Dst = MI.getOperand(0).getReg();
// Since we don't have 128-bit atomics, we don't need to handle the case of
// p8 argmunents to the atomic itself
- Register VData = MI.getOperand(2 + OpOffset).getReg();
+ Register VData = MI.getOperand(2).getReg();
+
Register CmpVal;
+ int OpOffset = 0;
if (IsCmpSwap) {
- CmpVal = MI.getOperand(3 + OpOffset).getReg();
+ CmpVal = MI.getOperand(3).getReg();
++OpOffset;
}
castBufferRsrcArgToV4I32(MI, B, 3 + OpOffset);
Register RSrc = MI.getOperand(3 + OpOffset).getReg();
- const unsigned NumVIndexOps = (IsCmpSwap ? 8 : 7) + HasReturn;
+ const unsigned NumVIndexOps = IsCmpSwap ? 9 : 8;
// The struct intrinsic variants add one additional operand over raw.
const bool HasVIndex = MI.getNumOperands() == NumVIndexOps;
@@ -5676,12 +5932,9 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
unsigned ImmOffset;
std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
- auto MIB = B.buildInstr(getBufferAtomicPseudo(IID));
-
- if (HasReturn)
- MIB.addDef(Dst);
-
- MIB.addUse(VData); // vdata
+ auto MIB = B.buildInstr(getBufferAtomicPseudo(IID))
+ .addDef(Dst)
+ .addUse(VData); // vdata
if (IsCmpSwap)
MIB.addReg(CmpVal);
@@ -5903,53 +6156,52 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
return false;
}
- const unsigned NSAMaxSize = ST.getNSAMaxSize();
+ const unsigned NSAMaxSize = ST.getNSAMaxSize(BaseOpcode->Sampler);
const unsigned HasPartialNSA = ST.hasPartialNSAEncoding();
if (IsA16 || IsG16) {
- if (Intr->NumVAddrs > 1) {
- SmallVector<Register, 4> PackedRegs;
+ // Even if NumVAddrs == 1 we should pack it into a 32-bit value, because the
+ // instructions expect VGPR_32
+ SmallVector<Register, 4> PackedRegs;
- packImage16bitOpsToDwords(B, MI, PackedRegs, ArgOffset, Intr, IsA16,
- IsG16);
-
- // See also below in the non-a16 branch
- const bool UseNSA = ST.hasNSAEncoding() &&
- PackedRegs.size() >= ST.getNSAThreshold(MF) &&
- (PackedRegs.size() <= NSAMaxSize || HasPartialNSA);
- const bool UsePartialNSA =
- UseNSA && HasPartialNSA && PackedRegs.size() > NSAMaxSize;
-
- if (UsePartialNSA) {
- // Pack registers that would go over NSAMaxSize into last VAddr register
- LLT PackedAddrTy =
- LLT::fixed_vector(2 * (PackedRegs.size() - NSAMaxSize + 1), 16);
- auto Concat = B.buildConcatVectors(
- PackedAddrTy, ArrayRef(PackedRegs).slice(NSAMaxSize - 1));
- PackedRegs[NSAMaxSize - 1] = Concat.getReg(0);
- PackedRegs.resize(NSAMaxSize);
- } else if (!UseNSA && PackedRegs.size() > 1) {
- LLT PackedAddrTy = LLT::fixed_vector(2 * PackedRegs.size(), 16);
- auto Concat = B.buildConcatVectors(PackedAddrTy, PackedRegs);
- PackedRegs[0] = Concat.getReg(0);
- PackedRegs.resize(1);
- }
+ packImage16bitOpsToDwords(B, MI, PackedRegs, ArgOffset, Intr, IsA16, IsG16);
- const unsigned NumPacked = PackedRegs.size();
- for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) {
- MachineOperand &SrcOp = MI.getOperand(ArgOffset + I);
- if (!SrcOp.isReg()) {
- assert(SrcOp.isImm() && SrcOp.getImm() == 0);
- continue;
- }
+ // See also below in the non-a16 branch
+ const bool UseNSA = ST.hasNSAEncoding() &&
+ PackedRegs.size() >= ST.getNSAThreshold(MF) &&
+ (PackedRegs.size() <= NSAMaxSize || HasPartialNSA);
+ const bool UsePartialNSA =
+ UseNSA && HasPartialNSA && PackedRegs.size() > NSAMaxSize;
- assert(SrcOp.getReg() != AMDGPU::NoRegister);
+ if (UsePartialNSA) {
+ // Pack registers that would go over NSAMaxSize into last VAddr register
+ LLT PackedAddrTy =
+ LLT::fixed_vector(2 * (PackedRegs.size() - NSAMaxSize + 1), 16);
+ auto Concat = B.buildConcatVectors(
+ PackedAddrTy, ArrayRef(PackedRegs).slice(NSAMaxSize - 1));
+ PackedRegs[NSAMaxSize - 1] = Concat.getReg(0);
+ PackedRegs.resize(NSAMaxSize);
+ } else if (!UseNSA && PackedRegs.size() > 1) {
+ LLT PackedAddrTy = LLT::fixed_vector(2 * PackedRegs.size(), 16);
+ auto Concat = B.buildConcatVectors(PackedAddrTy, PackedRegs);
+ PackedRegs[0] = Concat.getReg(0);
+ PackedRegs.resize(1);
+ }
- if (I - Intr->VAddrStart < NumPacked)
- SrcOp.setReg(PackedRegs[I - Intr->VAddrStart]);
- else
- SrcOp.setReg(AMDGPU::NoRegister);
+ const unsigned NumPacked = PackedRegs.size();
+ for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) {
+ MachineOperand &SrcOp = MI.getOperand(ArgOffset + I);
+ if (!SrcOp.isReg()) {
+ assert(SrcOp.isImm() && SrcOp.getImm() == 0);
+ continue;
}
+
+ assert(SrcOp.getReg() != AMDGPU::NoRegister);
+
+ if (I - Intr->VAddrStart < NumPacked)
+ SrcOp.setReg(PackedRegs[I - Intr->VAddrStart]);
+ else
+ SrcOp.setReg(AMDGPU::NoRegister);
}
} else {
// If the register allocator cannot place the address registers contiguously
@@ -5964,7 +6216,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.
//
- // Partial NSA is allowed on GFX11 where the final register is a contiguous
+ // Partial NSA is allowed on GFX11+ where the final register is a contiguous
// set of the remaining addresses.
const bool UseNSA = ST.hasNSAEncoding() &&
CorrectedNumVAddrs >= ST.getNSAThreshold(MF) &&
@@ -6195,13 +6447,11 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
// Handle needing to s.buffer.load() a p8 value.
if (hasBufferRsrcWorkaround(Ty)) {
Ty = castBufferRsrcFromV4I32(MI, B, *B.getMRI(), 0);
- Dst = MI.getOperand(0).getReg();
B.setInsertPt(B.getMBB(), MI);
}
if (shouldBitcastLoadStoreType(ST, Ty, LLT::scalar(Size))) {
Ty = getBitcastRegisterType(Ty);
Helper.bitcastDst(MI, Ty, 0);
- Dst = MI.getOperand(0).getReg();
B.setInsertPt(B.getMBB(), MI);
}
@@ -6222,10 +6472,10 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
MemSize, MemAlign);
MI.addMemOperand(MF, MMO);
- // There are no 96-bit result scalar loads, but widening to 128-bit should
+ // If we don't have 96-bit result scalar loads, widening to 128-bit should
// always be legal. We may need to restore this to a 96-bit result if it turns
// out this needs to be converted to a vector load during RegBankSelect.
- if (!isPowerOf2_32(Size)) {
+ if (!isPowerOf2_32(Size) && (Size != 96 || !ST.hasScalarDwordx3Loads())) {
if (Ty.isVector())
Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0);
else
@@ -6244,11 +6494,6 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return legalizeTrapEndpgm(MI, MRI, B);
- const Module *M = B.getMF().getFunction().getParent();
- unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M);
- if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3)
- return legalizeTrapHsaQueuePtr(MI, MRI, B);
-
return ST.supportsGetDoorbellID() ?
legalizeTrapHsa(MI, MRI, B) : legalizeTrapHsaQueuePtr(MI, MRI, B);
}
@@ -6395,13 +6640,17 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return false;
}
+ const bool IsGFX11 = AMDGPU::isGFX11(ST);
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(ST);
+ const bool IsGFX12Plus = AMDGPU::isGFX12Plus(ST);
const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
const unsigned NumVDataDwords = 4;
const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
- const bool UseNSA = ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize();
+ const bool UseNSA =
+ IsGFX12Plus || (ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize());
+
const unsigned BaseOpcodes[2][2] = {
{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
@@ -6409,14 +6658,16 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
int Opcode;
if (UseNSA) {
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
- IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
+ IsGFX12Plus ? AMDGPU::MIMGEncGfx12
+ : IsGFX11 ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx10NSA,
NumVDataDwords, NumVAddrDwords);
} else {
- Opcode = AMDGPU::getMIMGOpcode(
- BaseOpcodes[Is64][IsA16],
- IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default : AMDGPU::MIMGEncGfx10Default,
- NumVDataDwords, NumVAddrDwords);
+ assert(!IsGFX12Plus);
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ IsGFX11 ? AMDGPU::MIMGEncGfx11Default
+ : AMDGPU::MIMGEncGfx10Default,
+ NumVDataDwords, NumVAddrDwords);
}
assert(Opcode != -1);
@@ -6539,13 +6790,23 @@ bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ const SITargetLowering *TLI = ST.getTargetLowering();
+ Register StackPtr = TLI->getStackPointerRegisterToSaveRestore();
+ Register DstReg = MI.getOperand(0).getReg();
+ B.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {DstReg}, {StackPtr});
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &B = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *B.getMRI();
// Replace the use G_BRCOND with the exec manipulate and branch pseudos.
- auto IntrID = MI.getIntrinsicID();
+ auto IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_if:
case Intrinsic::amdgcn_else: {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 534bb2c87ea3..855fa0ddc214 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -43,8 +43,8 @@ public:
bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
- bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const;
+ bool legalizeFroundeven(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeFrem(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -68,6 +68,10 @@ public:
const GlobalValue *GV, int64_t Offset,
unsigned GAFlags = SIInstrInfo::MO_NONE) const;
+ void buildAbsGlobalAddress(Register DstReg, LLT PtrTy, MachineIRBuilder &B,
+ const GlobalValue *GV,
+ MachineRegisterInfo &MRI) const;
+
bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
@@ -157,6 +161,12 @@ public:
bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeFSQRTF16(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeFSQRTF32(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeFSQRTF64(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeFSQRT(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
@@ -201,6 +211,7 @@ public:
bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeImageIntrinsic(
MachineInstr &MI, MachineIRBuilder &B,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 44ce1e15f0ef..0c21382e5c22 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -14,18 +14,22 @@
#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
#include "GCNSubtarget.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/AttributeMask.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Target/TargetMachine.h"
#include <cmath>
#define DEBUG_TYPE "amdgpu-simplifylib"
using namespace llvm;
+using namespace llvm::PatternMatch;
static cl::opt<bool> EnablePreLink("amdgpu-prelink",
cl::desc("Enable pre-link mode optimizations"),
@@ -46,10 +50,13 @@ namespace llvm {
class AMDGPULibCalls {
private:
+ const TargetLibraryInfo *TLInfo = nullptr;
+ AssumptionCache *AC = nullptr;
+ DominatorTree *DT = nullptr;
typedef llvm::AMDGPULibFunc FuncInfo;
- const TargetMachine *TM;
+ bool UnsafeFPMath = false;
// -fuse-native.
bool AllNative = false;
@@ -66,64 +73,76 @@ private:
/* Specialized optimizations */
- // recip (half or native)
- bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
-
- // divide (half or native)
- bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
-
// pow/powr/pown
- bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+ bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
// rootn
- bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
-
- // fma/mad
- bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+ bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
// -fuse-native for sincos
bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
// evaluate calls if calls' arguments are constants.
- bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0,
- double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
+ bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1,
+ Constant *copr0, Constant *copr1);
bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
// sqrt
- bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+ bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
+
+ /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
+ /// of cos, sincos call).
+ std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
+ FastMathFlags FMF,
+ IRBuilder<> &B,
+ FunctionCallee Fsincos);
// sin/cos
- bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
+ bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
// __read_pipe/__write_pipe
bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
const FuncInfo &FInfo);
- // llvm.amdgcn.wavefrontsize
- bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
-
- // Get insertion point at entry.
- BasicBlock::iterator getEntryIns(CallInst * UI);
- // Insert an Alloc instruction.
- AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
// Get a scalar native builtin single argument FP function
FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
+ /// Substitute a call to a known libcall with an intrinsic call. If \p
+ /// AllowMinSize is true, allow the replacement in a minsize function.
+ bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
+ bool AllowMinSizeF32 = false,
+ bool AllowF64 = false,
+ bool AllowStrictFP = false);
+ void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
+ Intrinsic::ID IntrID);
+
+ bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
+ Intrinsic::ID IntrID,
+ bool AllowMinSizeF32 = false,
+ bool AllowF64 = false,
+ bool AllowStrictFP = false);
+
protected:
- CallInst *CI;
+ bool isUnsafeMath(const FPMathOperator *FPOp) const;
+ bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const;
+
+ bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const;
- bool isUnsafeMath(const CallInst *CI) const;
+ static void replaceCall(Instruction *I, Value *With) {
+ I->replaceAllUsesWith(With);
+ I->eraseFromParent();
+ }
- void replaceCall(Value *With) {
- CI->replaceAllUsesWith(With);
- CI->eraseFromParent();
+ static void replaceCall(FPMathOperator *I, Value *With) {
+ replaceCall(cast<Instruction>(I), With);
}
public:
- AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
+ AMDGPULibCalls() {}
- bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
+ bool fold(CallInst *CI);
+ void initFunction(Function &F, FunctionAnalysisManager &FAM);
void initNativeFuncs();
// Replace a normal math function call with that native version
@@ -132,57 +151,6 @@ public:
} // end llvm namespace
-namespace {
-
- class AMDGPUSimplifyLibCalls : public FunctionPass {
-
- AMDGPULibCalls Simplifier;
-
- public:
- static char ID; // Pass identification
-
- AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), Simplifier(TM) {
- initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AAResultsWrapperPass>();
- }
-
- bool runOnFunction(Function &M) override;
- };
-
- class AMDGPUUseNativeCalls : public FunctionPass {
-
- AMDGPULibCalls Simplifier;
-
- public:
- static char ID; // Pass identification
-
- AMDGPUUseNativeCalls() : FunctionPass(ID) {
- initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
- Simplifier.initNativeFuncs();
- }
-
- bool runOnFunction(Function &F) override;
- };
-
-} // end anonymous namespace.
-
-char AMDGPUSimplifyLibCalls::ID = 0;
-char AMDGPUUseNativeCalls::ID = 0;
-
-INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
- "Simplify well-known AMD library calls", false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
- "Simplify well-known AMD library calls", false, false)
-
-INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
- "Replace builtin math calls with that native versions.",
- false, false)
-
template <typename IRB>
static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
const Twine &Name = "") {
@@ -201,6 +169,15 @@ static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
return R;
}
+static FunctionType *getPownType(FunctionType *FT) {
+ Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
+ if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
+ PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
+
+ return FunctionType::get(FT->getReturnType(),
+ {FT->getParamType(0), PowNExpTy}, false);
+}
+
// Data structures for table-driven optimizations.
// FuncTbl works for both f32 and f64 functions with 1 input argument
@@ -444,13 +421,26 @@ bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
return AMDGPULibFunc::parse(FMangledName, FInfo);
}
-bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
- if (auto Op = dyn_cast<FPMathOperator>(CI))
- if (Op->isFast())
- return true;
- const Function *F = CI->getParent()->getParent();
- Attribute Attr = F->getFnAttribute("unsafe-fp-math");
- return Attr.getValueAsBool();
+bool AMDGPULibCalls::isUnsafeMath(const FPMathOperator *FPOp) const {
+ return UnsafeFPMath || FPOp->isFast();
+}
+
+bool AMDGPULibCalls::isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const {
+ return UnsafeFPMath ||
+ (FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs());
+}
+
+bool AMDGPULibCalls::canIncreasePrecisionOfConstantFold(
+ const FPMathOperator *FPOp) const {
+ // TODO: Refine to approxFunc or contract
+ return isUnsafeMath(FPOp);
+}
+
+void AMDGPULibCalls::initFunction(Function &F, FunctionAnalysisManager &FAM) {
+ UnsafeFPMath = F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+ AC = &FAM.getResult<AssumptionAnalysis>(F);
+ TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
+ DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
}
bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
@@ -490,7 +480,7 @@ bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
<< " with native version of sin/cos");
- replaceCall(sinval);
+ replaceCall(aCI, sinval);
return true;
}
}
@@ -498,8 +488,9 @@ bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
}
bool AMDGPULibCalls::useNative(CallInst *aCI) {
- CI = aCI;
Function *Callee = aCI->getCalledFunction();
+ if (!Callee || aCI->isNoBuiltin())
+ return false;
FuncInfo FInfo;
if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
@@ -538,29 +529,25 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
auto *M = Callee->getParent();
- auto &Ctx = M->getContext();
std::string Name = std::string(Callee->getName());
auto NumArg = CI->arg_size();
if (NumArg != 4 && NumArg != 6)
return false;
- auto *PacketSize = CI->getArgOperand(NumArg - 2);
- auto *PacketAlign = CI->getArgOperand(NumArg - 1);
- if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
+ ConstantInt *PacketSize =
+ dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2));
+ ConstantInt *PacketAlign =
+ dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1));
+ if (!PacketSize || !PacketAlign)
return false;
- unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
- Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
+
+ unsigned Size = PacketSize->getZExtValue();
+ Align Alignment = PacketAlign->getAlignValue();
if (Alignment != Size)
return false;
- Type *PtrElemTy;
- if (Size <= 8)
- PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
- else
- PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
unsigned PtrArgLoc = CI->arg_size() - 3;
- auto PtrArg = CI->getArgOperand(PtrArgLoc);
- unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
- auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
+ Value *PtrArg = CI->getArgOperand(PtrArgLoc);
+ Type *PtrTy = PtrArg->getType();
SmallVector<llvm::Type *, 6> ArgTys;
for (unsigned I = 0; I != PtrArgLoc; ++I)
@@ -575,11 +562,10 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
if (!F)
return false;
- auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
SmallVector<Value *, 6> Args;
for (unsigned I = 0; I != PtrArgLoc; ++I)
Args.push_back(CI->getArgOperand(I));
- Args.push_back(BCast);
+ Args.push_back(PtrArg);
auto *NCI = B.CreateCall(F, Args);
NCI->setAttributes(CI->getAttributes());
@@ -590,99 +576,242 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
return true;
}
-// This function returns false if no change; return true otherwise.
-bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
- this->CI = CI;
- Function *Callee = CI->getCalledFunction();
+static bool isKnownIntegral(const Value *V, const DataLayout &DL,
+ FastMathFlags FMF) {
+ if (isa<UndefValue>(V))
+ return true;
- // Ignore indirect calls.
- if (Callee == nullptr)
- return false;
+ if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
+ return CF->getValueAPF().isInteger();
- BasicBlock *BB = CI->getParent();
- LLVMContext &Context = CI->getParent()->getContext();
- IRBuilder<> B(Context);
+ if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
+ for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
+ Constant *ConstElt = CDV->getElementAsConstant(i);
+ if (isa<UndefValue>(ConstElt))
+ continue;
+ const ConstantFP *CFP = dyn_cast<ConstantFP>(ConstElt);
+ if (!CFP || !CFP->getValue().isInteger())
+ return false;
+ }
- // Set the builder to the instruction after the call.
- B.SetInsertPoint(BB, CI->getIterator());
+ return true;
+ }
- // Copy fast flags from the original call.
- if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
- B.setFastMathFlags(FPOp->getFastMathFlags());
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ // TODO: Could check nofpclass(inf) on incoming argument
+ if (FMF.noInfs())
+ return true;
- switch (Callee->getIntrinsicID()) {
+ // Need to check int size cannot produce infinity, which computeKnownFPClass
+ // knows how to do already.
+ return isKnownNeverInfinity(I, DL);
+ case Instruction::Call: {
+ const CallInst *CI = cast<CallInst>(I);
+ switch (CI->getIntrinsicID()) {
+ case Intrinsic::trunc:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ case Intrinsic::roundeven:
+ return (FMF.noInfs() && FMF.noNaNs()) ||
+ isKnownNeverInfOrNaN(I, DL, nullptr);
+ default:
+ break;
+ }
+
+ break;
+ }
default:
break;
- case Intrinsic::amdgcn_wavefrontsize:
- return !EnablePreLink && fold_wavefrontsize(CI, B);
}
+ return false;
+}
+
+// This function returns false if no change; return true otherwise.
+bool AMDGPULibCalls::fold(CallInst *CI) {
+ Function *Callee = CI->getCalledFunction();
+ // Ignore indirect calls.
+ if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
+ return false;
+
FuncInfo FInfo;
if (!parseFunctionName(Callee->getName(), FInfo))
return false;
// Further check the number of arguments to see if they match.
- if (CI->arg_size() != FInfo.getNumArgs())
+ // TODO: Check calling convention matches too
+ if (!FInfo.isCompatibleSignature(CI->getFunctionType()))
return false;
- if (TDOFold(CI, FInfo))
- return true;
+ LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n');
- // Under unsafe-math, evaluate calls if possible.
- // According to Brian Sumner, we can do this for all f32 function calls
- // using host's double function calls.
- if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
+ if (TDOFold(CI, FInfo))
return true;
- // Specialized optimizations for each function call
- switch (FInfo.getId()) {
- case AMDGPULibFunc::EI_RECIP:
- // skip vector function
- assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
- FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
- "recip must be an either native or half function");
- return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
+ IRBuilder<> B(CI);
- case AMDGPULibFunc::EI_DIVIDE:
- // skip vector function
- assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
- FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
- "divide must be an either native or half function");
- return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
-
- case AMDGPULibFunc::EI_POW:
- case AMDGPULibFunc::EI_POWR:
- case AMDGPULibFunc::EI_POWN:
- return fold_pow(CI, B, FInfo);
+ if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) {
+ // Under unsafe-math, evaluate calls if possible.
+ // According to Brian Sumner, we can do this for all f32 function calls
+ // using host's double function calls.
+ if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo))
+ return true;
- case AMDGPULibFunc::EI_ROOTN:
- // skip vector function
- return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
+ // Copy fast flags from the original call.
+ FastMathFlags FMF = FPOp->getFastMathFlags();
+ B.setFastMathFlags(FMF);
+
+ // Specialized optimizations for each function call.
+ //
+ // TODO: Handle other simple intrinsic wrappers. Sqrt.
+ //
+ // TODO: Handle native functions
+ switch (FInfo.getId()) {
+ case AMDGPULibFunc::EI_EXP:
+ if (FMF.none())
+ return false;
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp,
+ FMF.approxFunc());
+ case AMDGPULibFunc::EI_EXP2:
+ if (FMF.none())
+ return false;
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2,
+ FMF.approxFunc());
+ case AMDGPULibFunc::EI_LOG:
+ if (FMF.none())
+ return false;
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log,
+ FMF.approxFunc());
+ case AMDGPULibFunc::EI_LOG2:
+ if (FMF.none())
+ return false;
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2,
+ FMF.approxFunc());
+ case AMDGPULibFunc::EI_LOG10:
+ if (FMF.none())
+ return false;
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10,
+ FMF.approxFunc());
+ case AMDGPULibFunc::EI_FMIN:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum,
+ true, true);
+ case AMDGPULibFunc::EI_FMAX:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum,
+ true, true);
+ case AMDGPULibFunc::EI_FMA:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true,
+ true);
+ case AMDGPULibFunc::EI_MAD:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd,
+ true, true);
+ case AMDGPULibFunc::EI_FABS:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true,
+ true, true);
+ case AMDGPULibFunc::EI_COPYSIGN:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign,
+ true, true, true);
+ case AMDGPULibFunc::EI_FLOOR:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true,
+ true);
+ case AMDGPULibFunc::EI_CEIL:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true,
+ true);
+ case AMDGPULibFunc::EI_TRUNC:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true,
+ true);
+ case AMDGPULibFunc::EI_RINT:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true,
+ true);
+ case AMDGPULibFunc::EI_ROUND:
+ return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true,
+ true);
+ case AMDGPULibFunc::EI_LDEXP: {
+ if (!shouldReplaceLibcallWithIntrinsic(CI, true, true))
+ return false;
- case AMDGPULibFunc::EI_FMA:
- case AMDGPULibFunc::EI_MAD:
- case AMDGPULibFunc::EI_NFMA:
- // skip vector function
- return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
+ Value *Arg1 = CI->getArgOperand(1);
+ if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType());
+ VecTy && !isa<VectorType>(Arg1->getType())) {
+ Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
+ CI->setArgOperand(1, SplatArg1);
+ }
- case AMDGPULibFunc::EI_SQRT:
- return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
- case AMDGPULibFunc::EI_COS:
- case AMDGPULibFunc::EI_SIN:
- if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
- getArgType(FInfo) == AMDGPULibFunc::F64)
- && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
- return fold_sincos(CI, B, AA);
+ CI->setCalledFunction(Intrinsic::getDeclaration(
+ CI->getModule(), Intrinsic::ldexp,
+ {CI->getType(), CI->getArgOperand(1)->getType()}));
+ return true;
+ }
+ case AMDGPULibFunc::EI_POW: {
+ Module *M = Callee->getParent();
+ AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo);
+ FunctionCallee PowrFunc = getFunction(M, PowrInfo);
+ CallInst *Call = cast<CallInst>(FPOp);
+
+ // pow(x, y) -> powr(x, y) for x >= -0.0
+ // TODO: Account for flags on current call
+ if (PowrFunc &&
+ cannotBeOrderedLessThanZero(FPOp->getOperand(0), M->getDataLayout(),
+ TLInfo, 0, AC, Call, DT)) {
+ Call->setCalledFunction(PowrFunc);
+ return fold_pow(FPOp, B, PowrInfo) || true;
+ }
- break;
- case AMDGPULibFunc::EI_READ_PIPE_2:
- case AMDGPULibFunc::EI_READ_PIPE_4:
- case AMDGPULibFunc::EI_WRITE_PIPE_2:
- case AMDGPULibFunc::EI_WRITE_PIPE_4:
- return fold_read_write_pipe(CI, B, FInfo);
+ // pow(x, y) -> pown(x, y) for known integral y
+ if (isKnownIntegral(FPOp->getOperand(1), M->getDataLayout(),
+ FPOp->getFastMathFlags())) {
+ FunctionType *PownType = getPownType(CI->getFunctionType());
+ AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true);
+ FunctionCallee PownFunc = getFunction(M, PownInfo);
+ if (PownFunc) {
+ // TODO: If the incoming integral value is an sitofp/uitofp, it won't
+ // fold out without a known range. We can probably take the source
+ // value directly.
+ Value *CastedArg =
+ B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
+ // Have to drop any nofpclass attributes on the original call site.
+ Call->removeParamAttrs(
+ 1, AttributeFuncs::typeIncompatible(CastedArg->getType()));
+ Call->setCalledFunction(PownFunc);
+ Call->setArgOperand(1, CastedArg);
+ return fold_pow(FPOp, B, PownInfo) || true;
+ }
+ }
- default:
- break;
+ return fold_pow(FPOp, B, FInfo);
+ }
+ case AMDGPULibFunc::EI_POWR:
+ case AMDGPULibFunc::EI_POWN:
+ return fold_pow(FPOp, B, FInfo);
+ case AMDGPULibFunc::EI_ROOTN:
+ return fold_rootn(FPOp, B, FInfo);
+ case AMDGPULibFunc::EI_SQRT:
+ return fold_sqrt(FPOp, B, FInfo);
+ case AMDGPULibFunc::EI_COS:
+ case AMDGPULibFunc::EI_SIN:
+ return fold_sincos(FPOp, B, FInfo);
+ default:
+ break;
+ }
+ } else {
+ // Specialized optimizations for each function call
+ switch (FInfo.getId()) {
+ case AMDGPULibFunc::EI_READ_PIPE_2:
+ case AMDGPULibFunc::EI_READ_PIPE_4:
+ case AMDGPULibFunc::EI_WRITE_PIPE_2:
+ case AMDGPULibFunc::EI_WRITE_PIPE_4:
+ return fold_read_write_pipe(CI, B, FInfo);
+ default:
+ break;
+ }
}
return false;
@@ -731,7 +860,7 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
nval = ConstantDataVector::get(context, tmp);
}
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
- replaceCall(nval);
+ replaceCall(CI, nval);
return true;
}
} else {
@@ -741,7 +870,7 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
if (CF->isExactlyValue(tr[i].input)) {
Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
- replaceCall(nval);
+ replaceCall(CI, nval);
return true;
}
}
@@ -751,45 +880,6 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
return false;
}
-// [native_]half_recip(c) ==> 1.0/c
-bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
- const FuncInfo &FInfo) {
- Value *opr0 = CI->getArgOperand(0);
- if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
- // Just create a normal div. Later, InstCombine will be able
- // to compute the divide into a constant (avoid check float infinity
- // or subnormal at this point).
- Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
- opr0,
- "recip2div");
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
- replaceCall(nval);
- return true;
- }
- return false;
-}
-
-// [native_]half_divide(x, c) ==> x/c
-bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
- const FuncInfo &FInfo) {
- Value *opr0 = CI->getArgOperand(0);
- Value *opr1 = CI->getArgOperand(1);
- ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
- ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
-
- if ((CF0 && CF1) || // both are constants
- (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
- // CF1 is constant && f32 divide
- {
- Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
- opr1, "__div2recip");
- Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
- replaceCall(nval);
- return true;
- }
- return false;
-}
-
namespace llvm {
static double log2(double V) {
#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
@@ -800,81 +890,62 @@ static double log2(double V) {
}
}
-bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
+bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
const FuncInfo &FInfo) {
assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
FInfo.getId() == AMDGPULibFunc::EI_POWR ||
FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
"fold_pow: encounter a wrong function call");
- Value *opr0, *opr1;
- ConstantFP *CF;
- ConstantInt *CINT;
- ConstantAggregateZero *CZero;
- Type *eltType;
+ Module *M = B.GetInsertBlock()->getModule();
+ Type *eltType = FPOp->getType()->getScalarType();
+ Value *opr0 = FPOp->getOperand(0);
+ Value *opr1 = FPOp->getOperand(1);
- opr0 = CI->getArgOperand(0);
- opr1 = CI->getArgOperand(1);
- CZero = dyn_cast<ConstantAggregateZero>(opr1);
- if (getVecSize(FInfo) == 1) {
- eltType = opr0->getType();
- CF = dyn_cast<ConstantFP>(opr1);
- CINT = dyn_cast<ConstantInt>(opr1);
- } else {
- VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
- assert(VTy && "Oprand of vector function should be of vectortype");
- eltType = VTy->getElementType();
- ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
-
- // Now, only Handle vector const whose elements have the same value.
- CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
- CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
- }
-
- // No unsafe math , no constant argument, do nothing
- if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
- return false;
+ const APFloat *CF = nullptr;
+ const APInt *CINT = nullptr;
+ if (!match(opr1, m_APFloatAllowUndef(CF)))
+ match(opr1, m_APIntAllowUndef(CINT));
// 0x1111111 means that we don't do anything for this call.
int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
- if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
+ if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
// pow/powr/pown(x, 0) == 1
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
Constant *cnval = ConstantFP::get(eltType, 1.0);
if (getVecSize(FInfo) > 1) {
cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
}
- replaceCall(cnval);
+ replaceCall(FPOp, cnval);
return true;
}
if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
// pow/powr/pown(x, 1.0) = x
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
- replaceCall(opr0);
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
+ replaceCall(FPOp, opr0);
return true;
}
if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
// pow/powr/pown(x, 2.0) = x*x
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
- << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * "
+ << *opr0 << "\n");
Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
}
if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
// pow/powr/pown(x, -1.0) = 1.0/x
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n");
Constant *cnval = ConstantFP::get(eltType, 1.0);
if (getVecSize(FInfo) > 1) {
cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
}
Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
}
- Module *M = CI->getModule();
if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
// pow[r](x, [-]0.5) = sqrt(x)
bool issqrt = CF->isExactlyValue(0.5);
@@ -882,16 +953,16 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
: AMDGPULibFunc::EI_RSQRT,
FInfo))) {
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
- << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName()
+ << '(' << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
: "__pow2rsqrt");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
}
}
- if (!isUnsafeMath(CI))
+ if (!isUnsafeFiniteOnlyMath(FPOp))
return false;
// Unsafe Math optimization
@@ -899,8 +970,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
// Remember that ci_opr1 is set if opr1 is integral
if (CF) {
double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
- ? (double)CF->getValueAPF().convertToFloat()
- : CF->getValueAPF().convertToDouble();
+ ? (double)CF->convertToFloat()
+ : CF->convertToDouble();
int ival = (int)dval;
if ((double)ival == dval) {
ci_opr1 = ival;
@@ -939,31 +1010,39 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
}
nval = B.CreateFDiv(cnval, nval, "__1powprod");
}
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
<< ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
<< ")\n");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
}
+ // If we should use the generic intrinsic instead of emitting a libcall
+ const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy();
+
// powr ---> exp2(y * log2(x))
// pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
- FunctionCallee ExpExpr =
- getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
- if (!ExpExpr)
- return false;
+ FunctionCallee ExpExpr;
+ if (ShouldUseIntrinsic)
+ ExpExpr = Intrinsic::getDeclaration(M, Intrinsic::exp2, {FPOp->getType()});
+ else {
+ ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
+ if (!ExpExpr)
+ return false;
+ }
bool needlog = false;
bool needabs = false;
bool needcopysign = false;
Constant *cnval = nullptr;
if (getVecSize(FInfo) == 1) {
- CF = dyn_cast<ConstantFP>(opr0);
+ CF = nullptr;
+ match(opr0, m_APFloatAllowUndef(CF));
if (CF) {
double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
- ? (double)CF->getValueAPF().convertToFloat()
- : CF->getValueAPF().convertToDouble();
+ ? (double)CF->convertToFloat()
+ : CF->convertToDouble();
V = log2(std::abs(V));
cnval = ConstantFP::get(eltType, V);
@@ -986,9 +1065,7 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
SmallVector<double, 0> DVal;
for (int i=0; i < getVecSize(FInfo); ++i) {
- double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
- ? (double)CDV->getElementAsFloat(i)
- : CDV->getElementAsDouble(i);
+ double V = CDV->getElementAsAPFloat(i).convertToDouble();
if (V < 0.0) needcopysign = true;
V = log2(std::abs(V));
DVal.push_back(V);
@@ -1010,44 +1087,27 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
// We cannot handle corner cases for a general pow() function, give up
// unless y is a constant integral value. Then proceed as if it were pown.
- if (getVecSize(FInfo) == 1) {
- if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
- double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
- ? (double)CF->getValueAPF().convertToFloat()
- : CF->getValueAPF().convertToDouble();
- if (y != (double)(int64_t)y)
- return false;
- } else
- return false;
- } else {
- if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
- for (int i=0; i < getVecSize(FInfo); ++i) {
- double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
- ? (double)CDV->getElementAsFloat(i)
- : CDV->getElementAsDouble(i);
- if (y != (double)(int64_t)y)
- return false;
- }
- } else
- return false;
- }
+ if (!isKnownIntegral(opr1, M->getDataLayout(), FPOp->getFastMathFlags()))
+ return false;
}
Value *nval;
if (needabs) {
- FunctionCallee AbsExpr =
- getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
- if (!AbsExpr)
- return false;
- nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
+ nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs");
} else {
nval = cnval ? cnval : opr0;
}
if (needlog) {
- FunctionCallee LogExpr =
- getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
- if (!LogExpr)
- return false;
+ FunctionCallee LogExpr;
+ if (ShouldUseIntrinsic) {
+ LogExpr =
+ Intrinsic::getDeclaration(M, Intrinsic::log2, {FPOp->getType()});
+ } else {
+ LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
+ if (!LogExpr)
+ return false;
+ }
+
nval = CreateCallEx(B,LogExpr, nval, "__log2");
}
@@ -1061,14 +1121,14 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
if (needcopysign) {
Value *opr_n;
Type* rTy = opr0->getType();
- Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
+ Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
Type *nTy = nTyS;
if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
nTy = FixedVectorType::get(nTyS, vTy);
unsigned size = nTy->getScalarSizeInBits();
- opr_n = CI->getArgOperand(1);
+ opr_n = FPOp->getOperand(1);
if (opr_n->getType()->isIntegerTy())
- opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
+ opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
else
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
@@ -1078,17 +1138,21 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
nval = B.CreateBitCast(nval, opr0->getType());
}
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
<< "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
}
-bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
+bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
const FuncInfo &FInfo) {
- Value *opr0 = CI->getArgOperand(0);
- Value *opr1 = CI->getArgOperand(1);
+ // skip vector function
+ if (getVecSize(FInfo) != 1)
+ return false;
+
+ Value *opr0 = FPOp->getOperand(0);
+ Value *opr1 = FPOp->getOperand(1);
ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
if (!CINT) {
@@ -1096,90 +1160,47 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
}
int ci_opr1 = (int)CINT->getSExtValue();
if (ci_opr1 == 1) { // rootn(x, 1) = x
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
- replaceCall(opr0);
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
+ replaceCall(FPOp, opr0);
return true;
}
- if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
- Module *M = CI->getModule();
+
+ Module *M = B.GetInsertBlock()->getModule();
+ if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
if (FunctionCallee FPExpr =
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0
+ << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
}
} else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
- Module *M = CI->getModule();
if (FunctionCallee FPExpr =
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0
+ << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
}
} else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n");
Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
opr0,
"__rootn2div");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
- } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
- Module *M = CI->getModule();
+ } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
if (FunctionCallee FPExpr =
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0
<< ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
- replaceCall(nval);
- return true;
- }
- }
- return false;
-}
-
-bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
- const FuncInfo &FInfo) {
- Value *opr0 = CI->getArgOperand(0);
- Value *opr1 = CI->getArgOperand(1);
- Value *opr2 = CI->getArgOperand(2);
-
- ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
- ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
- if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
- // fma/mad(a, b, c) = c if a=0 || b=0
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
- replaceCall(opr2);
- return true;
- }
- if (CF0 && CF0->isExactlyValue(1.0f)) {
- // fma/mad(a, b, c) = b+c if a=1
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
- << "\n");
- Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
- replaceCall(nval);
- return true;
- }
- if (CF1 && CF1->isExactlyValue(1.0f)) {
- // fma/mad(a, b, c) = a+c if b=1
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
- << "\n");
- Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
- replaceCall(nval);
- return true;
- }
- if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
- if (CF->isZero()) {
- // fma/mad(a, b, c) = a*b if c=0
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
- << *opr1 << "\n");
- Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
}
}
-
return false;
}
@@ -1193,185 +1214,243 @@ FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
return getFunction(M, nf);
}
+// Some library calls are just wrappers around llvm intrinsics, but compiled
+// conservatively. Preserve the flags from the original call site by
+// substituting them with direct calls with all the flags.
+bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
+ bool AllowMinSizeF32,
+ bool AllowF64,
+ bool AllowStrictFP) {
+ Type *FltTy = CI->getType()->getScalarType();
+ const bool IsF32 = FltTy->isFloatTy();
+
+ // f64 intrinsics aren't implemented for most operations.
+ if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy()))
+ return false;
+
+ // We're implicitly inlining by replacing the libcall with the intrinsic, so
+ // don't do it for noinline call sites.
+ if (CI->isNoInline())
+ return false;
+
+ const Function *ParentF = CI->getFunction();
+ // TODO: Handle strictfp
+ if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
+ return false;
+
+ if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
+ return false;
+ return true;
+}
+
+void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B,
+ CallInst *CI,
+ Intrinsic::ID IntrID) {
+ if (CI->arg_size() == 2) {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+ VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType());
+ VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType());
+ if (Arg0VecTy && !Arg1VecTy) {
+ Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
+ CI->setArgOperand(1, SplatRHS);
+ } else if (!Arg0VecTy && Arg1VecTy) {
+ Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
+ CI->setArgOperand(0, SplatLHS);
+ }
+ }
+
+ CI->setCalledFunction(
+ Intrinsic::getDeclaration(CI->getModule(), IntrID, {CI->getType()}));
+}
+
+bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
+ IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32,
+ bool AllowF64, bool AllowStrictFP) {
+ if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
+ AllowStrictFP))
+ return false;
+ replaceLibCallWithSimpleIntrinsic(B, CI, IntrID);
+ return true;
+}
+
// fold sqrt -> native_sqrt (x)
-bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
+bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B,
const FuncInfo &FInfo) {
+ if (!isUnsafeMath(FPOp))
+ return false;
+
if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
(FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
+ Module *M = B.GetInsertBlock()->getModule();
+
if (FunctionCallee FPExpr = getNativeFunction(
- CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
- Value *opr0 = CI->getArgOperand(0);
- LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
+ M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
+ Value *opr0 = FPOp->getOperand(0);
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
<< "sqrt(" << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
- replaceCall(nval);
+ replaceCall(FPOp, nval);
return true;
}
}
return false;
}
-// fold sin, cos -> sincos.
-bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
- AliasAnalysis *AA) {
- AMDGPULibFunc fInfo;
- if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
- return false;
+std::tuple<Value *, Value *, Value *>
+AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
+ FunctionCallee Fsincos) {
+ DebugLoc DL = B.getCurrentDebugLocation();
+ Function *F = B.GetInsertBlock()->getParent();
+ B.SetInsertPointPastAllocas(F);
+ AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_");
+
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+ // If the argument is an instruction, it must dominate all uses so put our
+ // sincos call there. Otherwise, right after the allocas works well enough
+ // if it's an argument or constant.
+
+ B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
+
+ // SetInsertPoint unwelcomely always tries to set the debug loc.
+ B.SetCurrentDebugLocation(DL);
+ }
+
+ Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1);
+
+ // The allocaInst allocates the memory in private address space. This need
+ // to be addrspacecasted to point to the address space of cos pointer type.
+ // In OpenCL 2.0 this is generic, while in 1.2 that is private.
+ Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy);
+
+ CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc);
+
+ // TODO: Is it worth trying to preserve the location for the cos calls for the
+ // load?
+
+ LoadInst *LoadCos = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
+ return {SinCos, LoadCos, SinCos};
+}
+
+// fold sin, cos -> sincos.
+bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
+ const FuncInfo &fInfo) {
assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
fInfo.getId() == AMDGPULibFunc::EI_COS);
+
+ if ((getArgType(fInfo) != AMDGPULibFunc::F32 &&
+ getArgType(fInfo) != AMDGPULibFunc::F64) ||
+ fInfo.getPrefix() != AMDGPULibFunc::NOPFX)
+ return false;
+
bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
- Value *CArgVal = CI->getArgOperand(0);
- BasicBlock * const CBB = CI->getParent();
+ Value *CArgVal = FPOp->getOperand(0);
+ CallInst *CI = cast<CallInst>(FPOp);
- int const MaxScan = 30;
- bool Changed = false;
+ Function *F = B.GetInsertBlock()->getParent();
+ Module *M = F->getParent();
- { // fold in load value.
- LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
- if (LI && LI->getParent() == CBB) {
- BasicBlock::iterator BBI = LI->getIterator();
- Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
- if (AvailableVal) {
- Changed = true;
- CArgVal->replaceAllUsesWith(AvailableVal);
- if (CArgVal->getNumUses() == 0)
- LI->eraseFromParent();
- CArgVal = CI->getArgOperand(0);
- }
- }
- }
+ // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer
+ // implementation. Prefer the private form if available.
+ AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo);
+ SinCosLibFuncPrivate.getLeads()[0].PtrKind =
+ AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::PRIVATE_ADDRESS);
- Module *M = CI->getModule();
- fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
- std::string const PairName = fInfo.mangle();
+ AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo);
+ SinCosLibFuncGeneric.getLeads()[0].PtrKind =
+ AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
+
+ FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
+ FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
+ FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
+ if (!FSinCos)
+ return false;
+
+ SmallVector<CallInst *> SinCalls;
+ SmallVector<CallInst *> CosCalls;
+ SmallVector<CallInst *> SinCosCalls;
+ FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
+ fInfo);
+ const std::string PairName = PartnerInfo.mangle();
+
+ StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName;
+ StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName();
+ const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
+ const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
+
+ // Intersect the two sets of flags.
+ FastMathFlags FMF = FPOp->getFastMathFlags();
+ MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath);
+
+ SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()};
- CallInst *UI = nullptr;
for (User* U : CArgVal->users()) {
- CallInst *XI = dyn_cast_or_null<CallInst>(U);
- if (!XI || XI == CI || XI->getParent() != CBB)
+ CallInst *XI = dyn_cast<CallInst>(U);
+ if (!XI || XI->getFunction() != F || XI->isNoBuiltin())
continue;
Function *UCallee = XI->getCalledFunction();
- if (!UCallee || !UCallee->getName().equals(PairName))
+ if (!UCallee)
continue;
- BasicBlock::iterator BBI = CI->getIterator();
- if (BBI == CI->getParent()->begin())
- break;
- --BBI;
- for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
- if (cast<Instruction>(BBI) == XI) {
- UI = XI;
- break;
- }
+ bool Handled = true;
+
+ if (UCallee->getName() == SinName)
+ SinCalls.push_back(XI);
+ else if (UCallee->getName() == CosName)
+ CosCalls.push_back(XI);
+ else if (UCallee->getName() == SinCosPrivateName ||
+ UCallee->getName() == SinCosGenericName)
+ SinCosCalls.push_back(XI);
+ else
+ Handled = false;
+
+ if (Handled) {
+ MergeDbgLocs.push_back(XI->getDebugLoc());
+ auto *OtherOp = cast<FPMathOperator>(XI);
+ FMF &= OtherOp->getFastMathFlags();
+ FPMath = MDNode::getMostGenericFPMath(
+ FPMath, XI->getMetadata(LLVMContext::MD_fpmath));
}
- if (UI) break;
}
- if (!UI)
- return Changed;
-
- // Merge the sin and cos.
+ if (SinCalls.empty() || CosCalls.empty())
+ return false;
- // for OpenCL 2.0 we have only generic implementation of sincos
- // function.
- AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
- nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
- FunctionCallee Fsincos = getFunction(M, nf);
- if (!Fsincos)
- return Changed;
+ B.setFastMathFlags(FMF);
+ B.setDefaultFPMathTag(FPMath);
+ DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs);
+ B.SetCurrentDebugLocation(DbgLoc);
- BasicBlock::iterator ItOld = B.GetInsertPoint();
- AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
- B.SetInsertPoint(UI);
+ auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos);
- Value *P = Alloc;
- Type *PTy = Fsincos.getFunctionType()->getParamType(1);
- // The allocaInst allocates the memory in private address space. This need
- // to be bitcasted to point to the address space of cos pointer type.
- // In OpenCL 2.0 this is generic, while in 1.2 that is private.
- if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
- P = B.CreateAddrSpaceCast(Alloc, PTy);
- CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
-
- LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
- << *Call << "\n");
-
- if (!isSin) { // CI->cos, UI->sin
- B.SetInsertPoint(&*ItOld);
- UI->replaceAllUsesWith(&*Call);
- Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
- CI->replaceAllUsesWith(Reload);
- UI->eraseFromParent();
- CI->eraseFromParent();
- } else { // CI->sin, UI->cos
- Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
- UI->replaceAllUsesWith(Reload);
- CI->replaceAllUsesWith(Call);
- UI->eraseFromParent();
- CI->eraseFromParent();
- }
- return true;
-}
-
-bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
- if (!TM)
- return false;
+ auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) {
+ for (CallInst *C : Calls)
+ C->replaceAllUsesWith(Res);
- StringRef CPU = TM->getTargetCPU();
- StringRef Features = TM->getTargetFeatureString();
- if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
- (Features.empty() || !Features.contains_insensitive("wavefrontsize")))
- return false;
-
- Function *F = CI->getParent()->getParent();
- const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
- unsigned N = ST.getWavefrontSize();
+ // Leave the other dead instructions to avoid clobbering iterators.
+ };
- LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
- << N << "\n");
+ replaceTrigInsts(SinCalls, Sin);
+ replaceTrigInsts(CosCalls, Cos);
+ replaceTrigInsts(SinCosCalls, SinCos);
- CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
+ // It's safe to delete the original now.
CI->eraseFromParent();
return true;
}
-// Get insertion point at entry.
-BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
- Function * Func = UI->getParent()->getParent();
- BasicBlock * BB = &Func->getEntryBlock();
- assert(BB && "Entry block not found!");
- BasicBlock::iterator ItNew = BB->begin();
- return ItNew;
-}
-
-// Insert a AllocsInst at the beginning of function entry block.
-AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
- const char *prefix) {
- BasicBlock::iterator ItNew = getEntryIns(UI);
- Function *UCallee = UI->getCalledFunction();
- Type *RetType = UCallee->getReturnType();
- B.SetInsertPoint(&*ItNew);
- AllocaInst *Alloc =
- B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName());
- Alloc->setAlignment(
- Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
- return Alloc;
-}
-
-bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
- double& Res0, double& Res1,
- Constant *copr0, Constant *copr1,
- Constant *copr2) {
+bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,
+ double &Res1, Constant *copr0,
+ Constant *copr1) {
// By default, opr0/opr1/opr3 holds values of float/double type.
// If they are not float/double, each function has to its
// operand separately.
- double opr0=0.0, opr1=0.0, opr2=0.0;
+ double opr0 = 0.0, opr1 = 0.0;
ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
- ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
if (fpopr0) {
opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
? fpopr0->getValueAPF().convertToDouble()
@@ -1384,12 +1463,6 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
: (double)fpopr1->getValueAPF().convertToFloat();
}
- if (fpopr2) {
- opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
- ? fpopr2->getValueAPF().convertToDouble()
- : (double)fpopr2->getValueAPF().convertToFloat();
- }
-
switch (FInfo.getId()) {
default : return false;
@@ -1460,10 +1533,6 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
Res0 = pow(10.0, opr0);
return true;
- case AMDGPULibFunc::EI_EXPM1:
- Res0 = exp(opr0) - 1.0;
- return true;
-
case AMDGPULibFunc::EI_LOG:
Res0 = log(opr0);
return true;
@@ -1492,10 +1561,6 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
Res0 = sin(MATH_PI * opr0);
return true;
- case AMDGPULibFunc::EI_SQRT:
- Res0 = sqrt(opr0);
- return true;
-
case AMDGPULibFunc::EI_TAN:
Res0 = tan(opr0);
return true;
@@ -1508,15 +1573,7 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
Res0 = tan(MATH_PI * opr0);
return true;
- case AMDGPULibFunc::EI_RECIP:
- Res0 = 1.0 / opr0;
- return true;
-
// two-arg functions
- case AMDGPULibFunc::EI_DIVIDE:
- Res0 = opr0 / opr1;
- return true;
-
case AMDGPULibFunc::EI_POW:
case AMDGPULibFunc::EI_POWR:
Res0 = pow(opr0, opr1);
@@ -1545,12 +1602,6 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
Res0 = sin(opr0);
Res1 = cos(opr0);
return true;
-
- // three-arg functions
- case AMDGPULibFunc::EI_FMA:
- case AMDGPULibFunc::EI_MAD:
- Res0 = opr0 * opr1 + opr2;
- return true;
}
return false;
@@ -1563,7 +1614,6 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
Constant *copr0 = nullptr;
Constant *copr1 = nullptr;
- Constant *copr2 = nullptr;
if (numArgs > 0) {
if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
return false;
@@ -1576,11 +1626,6 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
}
}
- if (numArgs > 2) {
- if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
- return false;
- }
-
// At this point, all arguments to aCI are constants.
// max vector size is 16, and sincos will generate two results.
@@ -1588,31 +1633,27 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
int FuncVecSize = getVecSize(FInfo);
bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
if (FuncVecSize == 1) {
- if (!evaluateScalarMathFunc(FInfo, DVal0[0],
- DVal1[0], copr0, copr1, copr2)) {
+ if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
return false;
}
} else {
ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
- ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
for (int i = 0; i < FuncVecSize; ++i) {
Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
- Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
- if (!evaluateScalarMathFunc(FInfo, DVal0[i],
- DVal1[i], celt0, celt1, celt2)) {
+ if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
return false;
}
}
}
- LLVMContext &context = CI->getParent()->getParent()->getContext();
+ LLVMContext &context = aCI->getContext();
Constant *nval0, *nval1;
if (FuncVecSize == 1) {
- nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
+ nval0 = ConstantFP::get(aCI->getType(), DVal0[0]);
if (hasTwoResults)
- nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
+ nval1 = ConstantFP::get(aCI->getType(), DVal1[0]);
} else {
if (getArgType(FInfo) == AMDGPULibFunc::F32) {
SmallVector <float, 0> FVal0, FVal1;
@@ -1643,59 +1684,17 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
new StoreInst(nval1, aCI->getArgOperand(1), aCI);
}
- replaceCall(nval0);
+ replaceCall(aCI, nval0);
return true;
}
-// Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) {
- return new AMDGPUSimplifyLibCalls(TM);
-}
-
-FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
- return new AMDGPUUseNativeCalls();
-}
-
-bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- bool Changed = false;
- auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
-
- LLVM_DEBUG(dbgs() << "AMDIC: process function ";
- F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
-
- for (auto &BB : F) {
- for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
- // Ignore non-calls.
- CallInst *CI = dyn_cast<CallInst>(I);
- ++I;
- // Ignore intrinsics that do not become real instructions.
- if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
- continue;
-
- // Ignore indirect calls.
- Function *Callee = CI->getCalledFunction();
- if (Callee == nullptr)
- continue;
-
- LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
- dbgs().flush());
- if(Simplifier.fold(CI, AA))
- Changed = true;
- }
- }
- return Changed;
-}
-
PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
FunctionAnalysisManager &AM) {
- AMDGPULibCalls Simplifier(&TM);
+ AMDGPULibCalls Simplifier;
Simplifier.initNativeFuncs();
+ Simplifier.initFunction(F, AM);
bool Changed = false;
- auto AA = &AM.getResult<AAManager>(F);
LLVM_DEBUG(dbgs() << "AMDIC: process function ";
F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
@@ -1705,48 +1704,16 @@ PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
// Ignore non-calls.
CallInst *CI = dyn_cast<CallInst>(I);
++I;
- // Ignore intrinsics that do not become real instructions.
- if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
- continue;
- // Ignore indirect calls.
- Function *Callee = CI->getCalledFunction();
- if (Callee == nullptr)
- continue;
-
- LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
- dbgs().flush());
- if (Simplifier.fold(CI, AA))
- Changed = true;
+ if (CI) {
+ if (Simplifier.fold(CI))
+ Changed = true;
+ }
}
}
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
-bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
- if (skipFunction(F) || UseNative.empty())
- return false;
-
- bool Changed = false;
- for (auto &BB : F) {
- for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
- // Ignore non-calls.
- CallInst *CI = dyn_cast<CallInst>(I);
- ++I;
- if (!CI) continue;
-
- // Ignore indirect calls.
- Function *Callee = CI->getCalledFunction();
- if (Callee == nullptr)
- continue;
-
- if (Simplifier.useNative(CI))
- Changed = true;
- }
- }
- return Changed;
-}
-
PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (UseNative.empty())
@@ -1754,6 +1721,7 @@ PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
AMDGPULibCalls Simplifier;
Simplifier.initNativeFuncs();
+ Simplifier.initFunction(F, AM);
bool Changed = false;
for (auto &BB : F) {
@@ -1761,15 +1729,7 @@ PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
// Ignore non-calls.
CallInst *CI = dyn_cast<CallInst>(I);
++I;
- if (!CI)
- continue;
-
- // Ignore indirect calls.
- Function *Callee = CI->getCalledFunction();
- if (Callee == nullptr)
- continue;
-
- if (Simplifier.useNative(CI))
+ if (CI && Simplifier.useNative(CI))
Changed = true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index 169a242d74e4..3437b6dc8ae0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -478,7 +478,7 @@ static bool eatTerm(StringRef& mangledName, const char c) {
template <size_t N>
static bool eatTerm(StringRef& mangledName, const char (&str)[N]) {
- if (mangledName.startswith(StringRef(str, N-1))) {
+ if (mangledName.starts_with(StringRef(str, N - 1))) {
drop_front(mangledName, N-1);
return true;
}
@@ -527,6 +527,16 @@ AMDGPUMangledLibFunc::AMDGPUMangledLibFunc(
Leads[1] = copyFrom.Leads[1];
}
+AMDGPUMangledLibFunc::AMDGPUMangledLibFunc(EFuncId id, FunctionType *FT,
+ bool SignedInts) {
+ FuncId = id;
+ unsigned NumArgs = FT->getNumParams();
+ if (NumArgs >= 1)
+ Leads[0] = Param::getFromTy(FT->getParamType(0), SignedInts);
+ if (NumArgs >= 2)
+ Leads[1] = Param::getFromTy(FT->getParamType(1), SignedInts);
+}
+
///////////////////////////////////////////////////////////////////////////////
// Demangling
@@ -875,6 +885,50 @@ std::string AMDGPUMangledLibFunc::mangleNameItanium() const {
///////////////////////////////////////////////////////////////////////////////
// Misc
+AMDGPULibFuncBase::Param AMDGPULibFuncBase::Param::getFromTy(Type *Ty,
+ bool Signed) {
+ Param P;
+ if (FixedVectorType *VT = dyn_cast<FixedVectorType>(Ty)) {
+ P.VectorSize = VT->getNumElements();
+ Ty = VT->getElementType();
+ }
+
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ P.ArgType = AMDGPULibFunc::F32;
+ break;
+ case Type::DoubleTyID:
+ P.ArgType = AMDGPULibFunc::F64;
+ break;
+ case Type::HalfTyID:
+ P.ArgType = AMDGPULibFunc::F16;
+ break;
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 8:
+ P.ArgType = Signed ? AMDGPULibFunc::I8 : AMDGPULibFunc::U8;
+ break;
+ case 16:
+ P.ArgType = Signed ? AMDGPULibFunc::I16 : AMDGPULibFunc::U16;
+ break;
+ case 32:
+ P.ArgType = Signed ? AMDGPULibFunc::I32 : AMDGPULibFunc::U32;
+ break;
+ case 64:
+ P.ArgType = Signed ? AMDGPULibFunc::I64 : AMDGPULibFunc::U64;
+ break;
+ default:
+ llvm_unreachable("unhandled libcall argument type");
+ }
+
+ break;
+ default:
+ llvm_unreachable("unhandled libcall argument type");
+ }
+
+ return P;
+}
+
static Type* getIntrinsicParamType(
LLVMContext& C,
const AMDGPULibFunc::Param& P,
@@ -945,18 +999,25 @@ std::string AMDGPUMangledLibFunc::getName() const {
return std::string(OS.str());
}
+bool AMDGPULibFunc::isCompatibleSignature(const FunctionType *FuncTy) const {
+ // TODO: Validate types make sense
+ return !FuncTy->isVarArg() && FuncTy->getNumParams() == getNumArgs();
+}
+
Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc &fInfo) {
std::string FuncName = fInfo.mangle();
Function *F = dyn_cast_or_null<Function>(
M->getValueSymbolTable().lookup(FuncName));
+ if (!F || F->isDeclaration())
+ return nullptr;
- // check formal with actual types conformance
- if (F && !F->isDeclaration()
- && !F->isVarArg()
- && F->arg_size() == fInfo.getNumArgs()) {
- return F;
- }
- return nullptr;
+ if (F->hasFnAttribute(Attribute::NoBuiltin))
+ return nullptr;
+
+ if (!fInfo.isCompatibleSignature(F->getFunctionType()))
+ return nullptr;
+
+ return F;
}
FunctionCallee AMDGPULibFunc::getOrInsertFunction(Module *M,
@@ -965,11 +1026,12 @@ FunctionCallee AMDGPULibFunc::getOrInsertFunction(Module *M,
Function *F = dyn_cast_or_null<Function>(
M->getValueSymbolTable().lookup(FuncName));
- // check formal with actual types conformance
- if (F && !F->isDeclaration()
- && !F->isVarArg()
- && F->arg_size() == fInfo.getNumArgs()) {
- return F;
+ if (F) {
+ if (F->hasFnAttribute(Attribute::NoBuiltin))
+ return nullptr;
+ if (!F->isDeclaration() &&
+ fInfo.isCompatibleSignature(F->getFunctionType()))
+ return F;
}
FunctionType *FuncTy = fInfo.getFunctionType(*M);
@@ -1043,6 +1105,10 @@ AMDGPULibFunc::AMDGPULibFunc(EFuncId Id, const AMDGPULibFunc &CopyFrom) {
Id, *cast<AMDGPUMangledLibFunc>(CopyFrom.Impl.get())));
}
+AMDGPULibFunc::AMDGPULibFunc(EFuncId Id, FunctionType *FT, bool SignedInts) {
+ Impl.reset(new AMDGPUMangledLibFunc(Id, FT, SignedInts));
+}
+
AMDGPULibFunc::AMDGPULibFunc(StringRef Name, FunctionType *FT) {
Impl.reset(new AMDGPUUnmangledLibFunc(Name, FT));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
index bf0fda25b2c0..10551bee3fa8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -18,6 +18,7 @@ class FunctionCallee;
class FunctionType;
class Function;
class Module;
+class Type;
class AMDGPULibFuncBase {
public:
@@ -290,18 +291,23 @@ public:
};
struct Param {
- unsigned char ArgType;
- unsigned char VectorSize;
- unsigned char PtrKind;
+ unsigned char ArgType = 0;
+ unsigned char VectorSize = 1;
+ unsigned char PtrKind = 0;
- unsigned char Reserved;
+ unsigned char Reserved = 0;
void reset() {
ArgType = 0;
VectorSize = 1;
PtrKind = 0;
}
- Param() { reset(); }
+
+ static Param getIntN(unsigned char NumElts) {
+ return Param{I32, NumElts, 0, 0};
+ }
+
+ static Param getFromTy(Type *Ty, bool Signed);
template <typename Stream>
void mangleItanium(Stream& os);
@@ -351,7 +357,7 @@ public:
protected:
EFuncId FuncId;
std::string Name;
- ENamePrefix FKind;
+ ENamePrefix FKind = NOPFX;
};
/// Wrapper class for AMDGPULIbFuncImpl
@@ -362,6 +368,8 @@ public:
/// Clone a mangled library func with the Id \p Id and argument info from \p
/// CopyFrom.
explicit AMDGPULibFunc(EFuncId Id, const AMDGPULibFunc &CopyFrom);
+ explicit AMDGPULibFunc(EFuncId Id, FunctionType *FT, bool SignedInts);
+
/// Construct an unmangled library function on the fly.
explicit AMDGPULibFunc(StringRef FName, FunctionType *FT);
@@ -383,6 +391,9 @@ public:
return Impl->parseFuncName(MangledName);
}
+ // Validate the call type matches the expected libfunc type.
+ bool isCompatibleSignature(const FunctionType *FuncTy) const;
+
/// \return The mangled function name for mangled library functions
/// and unmangled function name for unmangled library functions.
std::string mangle() const { return Impl->mangle(); }
@@ -412,6 +423,8 @@ public:
explicit AMDGPUMangledLibFunc();
explicit AMDGPUMangledLibFunc(EFuncId id,
const AMDGPUMangledLibFunc &copyFrom);
+ explicit AMDGPUMangledLibFunc(EFuncId id, FunctionType *FT,
+ bool SignedInts = true);
std::string getName() const override;
unsigned getNumArgs() const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index f5323725250f..c32303defe7f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -14,17 +14,59 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Target/TargetMachine.h"
+
#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"
using namespace llvm;
namespace {
-class AMDGPULowerKernelArguments : public FunctionPass{
+class PreloadKernelArgInfo {
+private:
+ Function &F;
+ const GCNSubtarget &ST;
+ unsigned NumFreeUserSGPRs;
+
+public:
+ SmallVector<llvm::Metadata *, 8> KernelArgMetadata;
+
+ PreloadKernelArgInfo(Function &F, const GCNSubtarget &ST) : F(F), ST(ST) {
+ setInitialFreeUserSGPRsCount();
+ }
+
+ // Returns the maximum number of user SGPRs that we have available to preload
+ // arguments.
+ void setInitialFreeUserSGPRsCount() {
+ const unsigned MaxUserSGPRs = ST.getMaxNumUserSGPRs();
+ GCNUserSGPRUsageInfo UserSGPRInfo(F, ST);
+
+ NumFreeUserSGPRs = MaxUserSGPRs - UserSGPRInfo.getNumUsedUserSGPRs();
+ }
+
+ bool tryAllocPreloadSGPRs(unsigned AllocSize, uint64_t ArgOffset,
+ uint64_t LastExplicitArgOffset) {
+ // Check if this argument may be loaded into the same register as the
+ // previous argument.
+ if (!isAligned(Align(4), ArgOffset) && AllocSize < 4)
+ return true;
+
+ // Pad SGPRs for kernarg alignment.
+ unsigned Padding = ArgOffset - LastExplicitArgOffset;
+ unsigned PaddingSGPRs = alignTo(Padding, 4) / 4;
+ unsigned NumPreloadSGPRs = alignTo(AllocSize, 4) / 4;
+ if (NumPreloadSGPRs + PaddingSGPRs > NumFreeUserSGPRs)
+ return false;
+
+ NumFreeUserSGPRs -= (NumPreloadSGPRs + PaddingSGPRs);
+ return true;
+ }
+};
+
+class AMDGPULowerKernelArguments : public FunctionPass {
public:
static char ID;
@@ -55,14 +97,11 @@ static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
return InsPt;
}
-bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
+static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
return false;
- auto &TPC = getAnalysis<TargetPassConfig>();
-
- const TargetMachine &TM = TPC.getTM<TargetMachine>();
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
LLVMContext &Ctx = F.getParent()->getContext();
const DataLayout &DL = F.getParent()->getDataLayout();
@@ -87,6 +126,9 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
uint64_t ExplicitArgOffset = 0;
+ // Preloaded kernel arguments must be sequential.
+ bool InPreloadSequence = true;
+ PreloadKernelArgInfo PreloadInfo(F, ST);
for (Argument &Arg : F.args()) {
const bool IsByRef = Arg.hasByRefAttr();
@@ -98,8 +140,19 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
+ uint64_t LastExplicitArgOffset = ExplicitArgOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
+ // Try to preload this argument into user SGPRs.
+ if (Arg.hasInRegAttr() && InPreloadSequence && ST.hasKernargPreload() &&
+ !ST.needsKernargPreloadBackwardsCompatibility() &&
+ !Arg.getType()->isAggregateType())
+ if (PreloadInfo.tryAllocPreloadSGPRs(AllocSize, EltOffset,
+ LastExplicitArgOffset))
+ continue;
+
+ InPreloadSequence = false;
+
if (Arg.use_empty())
continue;
@@ -232,6 +285,12 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
return true;
}
+bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ const TargetMachine &TM = TPC.getTM<TargetMachine>();
+ return lowerKernelArguments(F, TM);
+}
+
INITIALIZE_PASS_BEGIN(AMDGPULowerKernelArguments, DEBUG_TYPE,
"AMDGPU Lower Kernel Arguments", false, false)
INITIALIZE_PASS_END(AMDGPULowerKernelArguments, DEBUG_TYPE, "AMDGPU Lower Kernel Arguments",
@@ -242,3 +301,16 @@ char AMDGPULowerKernelArguments::ID = 0;
FunctionPass *llvm::createAMDGPULowerKernelArgumentsPass() {
return new AMDGPULowerKernelArguments();
}
+
+PreservedAnalyses
+AMDGPULowerKernelArgumentsPass::run(Function &F, FunctionAnalysisManager &AM) {
+ bool Changed = lowerKernelArguments(F, TM);
+ if (Changed) {
+ // TODO: Preserves a lot more.
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
index 26074cf06071..097722157d41 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
@@ -14,6 +14,7 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -286,8 +287,8 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
if (HasReqdWorkGroupSize) {
ConstantInt *KnownSize
= mdconst::extract<ConstantInt>(MD->getOperand(I));
- UMin->replaceAllUsesWith(ConstantExpr::getIntegerCast(
- KnownSize, UMin->getType(), false));
+ UMin->replaceAllUsesWith(ConstantFoldIntegerCast(
+ KnownSize, UMin->getType(), false, DL));
} else {
UMin->replaceAllUsesWith(ZextGroupSize);
}
@@ -310,7 +311,7 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(I));
GroupSize->replaceAllUsesWith(
- ConstantExpr::getIntegerCast(KnownSize, GroupSize->getType(), false));
+ ConstantFoldIntegerCast(KnownSize, GroupSize->getType(), false, DL));
MadeChange = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index e3a645977f92..d2a02143e4e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -177,6 +177,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/ADT/BitVector.h"
@@ -184,8 +185,8 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
@@ -204,7 +205,6 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include <tuple>
#include <vector>
#include <cstdio>
@@ -252,7 +252,8 @@ template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
return {std::move(V)};
}
-class AMDGPULowerModuleLDS : public ModulePass {
+class AMDGPULowerModuleLDS {
+ const AMDGPUTargetMachine &TM;
static void
removeLocalVarsFromUsedLists(Module &M,
@@ -291,7 +292,8 @@ class AMDGPULowerModuleLDS : public ModulePass {
// equivalent target specific intrinsic which lasts until immediately after
// codegen would suffice for that, but one would still need to ensure that
// the variables are allocated in the anticpated order.
- IRBuilder<> Builder(Func->getEntryBlock().getFirstNonPHI());
+ BasicBlock *Entry = &Func->getEntryBlock();
+ IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
Function *Decl =
Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {});
@@ -326,11 +328,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
}
public:
- static char ID;
-
- AMDGPULowerModuleLDS() : ModulePass(ID) {
- initializeAMDGPULowerModuleLDSPass(*PassRegistry::getPassRegistry());
- }
+ AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
@@ -854,7 +852,7 @@ public:
appendToCompilerUsed(M, {static_cast<GlobalValue *>(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
cast<Constant>(ModuleScopeReplacement.SGV),
- Type::getInt8PtrTy(Ctx)))});
+ PointerType::getUnqual(Ctx)))});
// module.lds will be allocated at zero in any kernel that allocates it
recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
@@ -1089,7 +1087,7 @@ public:
return KernelToCreatedDynamicLDS;
}
- bool runOnModule(Module &M) override {
+ bool runOnModule(Module &M) {
CallGraph CG = CallGraph(M);
bool Changed = superAlignLDSGlobals(M);
@@ -1241,6 +1239,7 @@ public:
}
if (Offset != 0) {
+ (void)TM; // TODO: Account for target maximum LDS
std::string Buffer;
raw_string_ostream SS{Buffer};
SS << format("%u", Offset);
@@ -1367,9 +1366,9 @@ private:
Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding);
LocalVars.push_back(new GlobalVariable(
- M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy),
- "", nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS,
- false));
+ M, ATy, false, GlobalValue::InternalLinkage,
+ PoisonValue::get(ATy), "", nullptr, GlobalValue::NotThreadLocal,
+ AMDGPUAS::LOCAL_ADDRESS, false));
IsPaddingField.push_back(true);
CurrentOffset += Padding;
}
@@ -1391,7 +1390,7 @@ private:
Align StructAlign = AMDGPU::getAlign(DL, LocalVars[0]);
GlobalVariable *SGV = new GlobalVariable(
- M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy),
+ M, LDSTy, false, GlobalValue::InternalLinkage, PoisonValue::get(LDSTy),
VarName, nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS,
false);
SGV->setAlignment(StructAlign);
@@ -1530,21 +1529,51 @@ private:
}
};
+class AMDGPULowerModuleLDSLegacy : public ModulePass {
+public:
+ const AMDGPUTargetMachine *TM;
+ static char ID;
+
+ AMDGPULowerModuleLDSLegacy(const AMDGPUTargetMachine *TM_ = nullptr)
+ : ModulePass(ID), TM(TM_) {
+ initializeAMDGPULowerModuleLDSLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ if (!TM)
+ AU.addRequired<TargetPassConfig>();
+ }
+
+ bool runOnModule(Module &M) override {
+ if (!TM) {
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ TM = &TPC.getTM<AMDGPUTargetMachine>();
+ }
+
+ return AMDGPULowerModuleLDS(*TM).runOnModule(M);
+ }
+};
+
} // namespace
-char AMDGPULowerModuleLDS::ID = 0;
+char AMDGPULowerModuleLDSLegacy::ID = 0;
-char &llvm::AMDGPULowerModuleLDSID = AMDGPULowerModuleLDS::ID;
+char &llvm::AMDGPULowerModuleLDSLegacyPassID = AMDGPULowerModuleLDSLegacy::ID;
-INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE,
- "Lower uses of LDS variables from non-kernel functions", false,
- false)
+INITIALIZE_PASS_BEGIN(AMDGPULowerModuleLDSLegacy, DEBUG_TYPE,
+ "Lower uses of LDS variables from non-kernel functions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AMDGPULowerModuleLDSLegacy, DEBUG_TYPE,
+ "Lower uses of LDS variables from non-kernel functions",
+ false, false)
-ModulePass *llvm::createAMDGPULowerModuleLDSPass() {
- return new AMDGPULowerModuleLDS();
+ModulePass *
+llvm::createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM) {
+ return new AMDGPULowerModuleLDSLegacy(TM);
}
PreservedAnalyses AMDGPULowerModuleLDSPass::run(Module &M,
ModuleAnalysisManager &) {
- return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
+ return AMDGPULowerModuleLDS(TM).runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 44bbfe6f13d9..323462e60a29 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -24,6 +24,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
: IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
IsModuleEntryFunction(
AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
+ IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())),
NoSignedZerosFPMath(false) {
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 5780fa64a7e4..248ee26a47eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -20,7 +20,6 @@
namespace llvm {
class AMDGPUSubtarget;
-class GCNSubtarget;
class AMDGPUMachineFunction : public MachineFunctionInfo {
/// A map to keep track of local memory objects and their offsets within the
@@ -54,6 +53,9 @@ protected:
// Entry points called by other functions instead of directly by the hardware.
bool IsModuleEntryFunction = false;
+ // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC.
+ bool IsChainFunction = false;
+
bool NoSignedZerosFPMath = false;
// Function may be memory bound.
@@ -85,6 +87,13 @@ public:
bool isModuleEntryFunction() const { return IsModuleEntryFunction; }
+ bool isChainFunction() const { return IsChainFunction; }
+
+ // The stack is empty upon entry to this function.
+ bool isBottomOfStack() const {
+ return isEntryFunction() || isChainFunction();
+ }
+
bool hasNoSignedZerosFPMath() const {
return NoSignedZerosFPMath;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index c15c94ee17f8..0cbabf3895a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -59,7 +59,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
namespace llvm {
-std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation () {
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation() {
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 2092707c8a3f..4f5ca08b46c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -92,9 +92,9 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
if (!HandleTy) {
Type *Int32 = Type::getInt32Ty(C);
- HandleTy = StructType::create(
- C, {Type::getInt8Ty(C)->getPointerTo(0), Int32, Int32},
- "block.runtime.handle.t");
+ HandleTy =
+ StructType::create(C, {PointerType::getUnqual(C), Int32, Int32},
+ "block.runtime.handle.t");
}
auto *GV = new GlobalVariable(
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 536fb02cb4ec..7b18e1f805d8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -19,9 +19,9 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -42,27 +42,26 @@ namespace {
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_TYPES
-class AMDGPUPostLegalizerCombinerImpl : public GIMatchTableExecutor {
+class AMDGPUPostLegalizerCombinerImpl : public Combiner {
protected:
const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
-
- MachineIRBuilder &B;
- MachineFunction &MF;
- MachineRegisterInfo &MRI;
const GCNSubtarget &STI;
const SIInstrInfo &TII;
- AMDGPUCombinerHelper &Helper;
- GISelChangeObserver &Observer;
+ // TODO: Make CombinerHelper methods const.
+ mutable AMDGPUCombinerHelper Helper;
public:
AMDGPUPostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
- MachineIRBuilder &B, AMDGPUCombinerHelper &Helper,
- GISelChangeObserver &Observer);
+ const GCNSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
- bool tryCombineAll(MachineInstr &I) const;
+ bool tryCombineAllImpl(MachineInstr &I) const;
+ bool tryCombineAll(MachineInstr &I) const override;
struct FMinFMaxLegacyInfo {
Register LHS;
@@ -120,18 +119,36 @@ private:
#undef GET_GICOMBINER_IMPL
AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
- MachineIRBuilder &B, AMDGPUCombinerHelper &Helper,
- GISelChangeObserver &Observer)
- : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()),
- STI(MF.getSubtarget<GCNSubtarget>()), TII(*STI.getInstrInfo()),
- Helper(Helper), Observer(Observer),
+ const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
+ TII(*STI.getInstrInfo()),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
{
}
+bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
+ if (tryCombineAllImpl(MI))
+ return true;
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR:
+ // On some subtargets, 64-bit shift is a quarter rate instruction. In the
+ // common case, splitting this into a move and a 32-bit shift is faster and
+ // the same code size.
+ return Helper.tryCombineShiftToUnmerge(MI, 32);
+ }
+
+ return false;
+}
+
bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {
// FIXME: Type predicate on pattern
@@ -265,17 +282,20 @@ void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo) const {
-
- auto getRcpSrc = [=](const MachineInstr &MI) {
- MachineInstr *ResMI = nullptr;
- if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
- MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
- ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
-
- return ResMI;
+ auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
+ if (!MI.getFlag(MachineInstr::FmContract))
+ return nullptr;
+
+ if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
+ if (GI->is(Intrinsic::amdgcn_rcp))
+ return MRI.getVRegDef(MI.getOperand(2).getReg());
+ }
+ return nullptr;
};
- auto getSqrtSrc = [=](const MachineInstr &MI) {
+ auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
+ if (!MI.getFlag(MachineInstr::FmContract))
+ return nullptr;
MachineInstr *SqrtSrcMI = nullptr;
auto Match =
mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
@@ -287,7 +307,7 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
// rcp(sqrt(x))
if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
- B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
.addUse(SqrtSrcMI->getOperand(0).getReg())
.setMIFlags(MI.getFlags());
};
@@ -297,13 +317,12 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
// sqrt(rcp(x))
if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
- B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
.addUse(RcpSrcMI->getOperand(0).getReg())
.setMIFlags(MI.getFlags());
};
return true;
}
-
return false;
}
@@ -400,51 +419,6 @@ void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
MI.eraseFromParent();
}
-class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
- GISelKnownBits *KB;
- MachineDominatorTree *MDT;
- AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
-
-public:
- AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- const AMDGPULegalizerInfo *LI,
- GISelKnownBits *KB, MachineDominatorTree *MDT)
- : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
- /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
- KB(KB), MDT(MDT) {
- if (!RuleConfig.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT,
- LInfo);
- // TODO: Do not re-create the Impl on every inst, it should be per function.
- AMDGPUPostLegalizerCombinerImpl Impl(RuleConfig, B, Helper, Observer);
- Impl.setupMF(*MI.getMF(), KB);
-
- if (Impl.tryCombineAll(MI))
- return true;
-
- switch (MI.getOpcode()) {
- case TargetOpcode::G_SHL:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_ASHR:
- // On some subtargets, 64-bit shift is a quarter rate instruction. In the
- // common case, splitting this into a move and a 32-bit shift is faster and
- // the same code size.
- return Helper.tryCombineShiftToUnmerge(MI, 32);
- }
-
- return false;
-}
-
// Pass boilerplate
// ================
@@ -461,8 +435,10 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+
private:
bool IsOptNone;
+ AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
};
} // end anonymous namespace
@@ -482,6 +458,9 @@ void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
}
bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
@@ -491,7 +470,7 @@ bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
auto *TPC = &getAnalysis<TargetPassConfig>();
const Function &F = MF.getFunction();
bool EnableOpt =
- MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const AMDGPULegalizerInfo *LI =
@@ -500,10 +479,13 @@ bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT =
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
- AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
- F.hasMinSize(), LI, KB, MDT);
- Combiner C(PCInfo, TPC);
- return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+
+ CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
+ LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
+
+ AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
+ RuleConfig, ST, MDT, LI);
+ return Impl.combineMachineInstrs();
}
char AMDGPUPostLegalizerCombiner::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 936ca54fcf2e..0c7e198810da 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
@@ -42,26 +41,25 @@ namespace {
#include "AMDGPUGenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_TYPES
-class AMDGPUPreLegalizerCombinerImpl : public GIMatchTableExecutor {
+class AMDGPUPreLegalizerCombinerImpl : public Combiner {
protected:
const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
const GCNSubtarget &STI;
-
- GISelChangeObserver &Observer;
- MachineIRBuilder &B;
- MachineFunction &MF;
- MachineRegisterInfo &MRI;
- AMDGPUCombinerHelper &Helper;
+ // TODO: Make CombinerHelper methods const.
+ mutable AMDGPUCombinerHelper Helper;
public:
AMDGPUPreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
- const GCNSubtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, AMDGPUCombinerHelper &Helper);
+ const GCNSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
- bool tryCombineAll(MachineInstr &I) const;
+ bool tryCombineAllImpl(MachineInstr &MI) const;
+ bool tryCombineAll(MachineInstr &I) const override;
struct ClampI64ToI16MatchInfo {
int64_t Cmp1 = 0;
@@ -91,17 +89,32 @@ private:
#undef GET_GICOMBINER_IMPL
AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
- const GCNSubtarget &STI, GISelChangeObserver &Observer, MachineIRBuilder &B,
- AMDGPUCombinerHelper &Helper)
- : RuleConfig(RuleConfig), STI(STI), Observer(Observer), B(B), MF(B.getMF()),
- MRI(*B.getMRI()), Helper(Helper),
+ const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
{
}
+bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
+ if (tryCombineAllImpl(MI))
+ return true;
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return Helper.tryCombineConcatVectors(MI);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return Helper.tryCombineShuffleVector(MI);
+ }
+
+ return false;
+}
+
bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
ClampI64ToI16MatchInfo &MatchInfo) const {
@@ -199,49 +212,6 @@ void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
MI.eraseFromParent();
}
-class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo {
- GISelKnownBits *KB;
- MachineDominatorTree *MDT;
- AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
-
-public:
- AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- GISelKnownBits *KB, MachineDominatorTree *MDT)
- : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
- /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
- KB(KB), MDT(MDT) {
- if (!RuleConfig.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- const auto *LI = MI.getMF()->getSubtarget().getLegalizerInfo();
- AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ true, KB, MDT, LI);
-
- const GCNSubtarget &STI = MI.getMF()->getSubtarget<GCNSubtarget>();
- // TODO: Do not re-create the Impl on every inst, it should be per function.
- AMDGPUPreLegalizerCombinerImpl Impl(RuleConfig, STI, Observer, B, Helper);
- Impl.setupMF(*MI.getMF(), KB);
-
- if (Impl.tryCombineAll(MI))
- return true;
-
- switch (MI.getOpcode()) {
- case TargetOpcode::G_CONCAT_VECTORS:
- return Helper.tryCombineConcatVectors(MI);
- case TargetOpcode::G_SHUFFLE_VECTOR:
- return Helper.tryCombineShuffleVector(MI);
- }
-
- return false;
-}
-
// Pass boilerplate
// ================
@@ -261,6 +231,7 @@ public:
private:
bool IsOptNone;
+ AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
};
} // end anonymous namespace
@@ -283,6 +254,9 @@ void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
}
bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
@@ -292,19 +266,22 @@ bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
auto *TPC = &getAnalysis<TargetPassConfig>();
const Function &F = MF.getFunction();
bool EnableOpt =
- MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
- MachineDominatorTree *MDT =
- IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
- AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
- F.hasMinSize(), KB, MDT);
+
// Enable CSE.
GISelCSEAnalysisWrapper &Wrapper =
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
- Combiner C(PCInfo, TPC);
- return C.combineMachineInstrs(MF, CSEInfo);
+ const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
+ MachineDominatorTree *MDT =
+ IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
+ AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
+ STI, MDT, STI.getLegalizerInfo());
+ return Impl.combineMachineInstrs();
}
char AMDGPUPreLegalizerCombiner::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 13f83e298cf4..7b5dc3795b02 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -102,7 +102,7 @@ void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
bool ArgDump = false;
StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx,
CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
- size_t pTag = CurFmt.find_last_of("%");
+ size_t pTag = CurFmt.find_last_of('%');
if (pTag != StringRef::npos) {
ArgDump = true;
while (pTag && CurFmt[--pTag] == '%') {
@@ -439,7 +439,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
for (auto &U : PrintfFunction->uses()) {
if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
- if (CI->isCallee(&U))
+ if (CI->isCallee(&U) && !CI->isNoBuiltin())
Printfs.push_back(CI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 17025867c1da..1bed516fb5c7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -185,7 +185,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUPromoteAlloca, DEBUG_TYPE,
"AMDGPU promote alloca to vector or LDS", false, false)
// Move LDS uses from functions to kernels before promote alloca for accurate
// estimation of LDS available
-INITIALIZE_PASS_DEPENDENCY(AMDGPULowerModuleLDS)
+INITIALIZE_PASS_DEPENDENCY(AMDGPULowerModuleLDSLegacy)
INITIALIZE_PASS_END(AMDGPUPromoteAlloca, DEBUG_TYPE,
"AMDGPU promote alloca to vector or LDS", false, false)
@@ -386,7 +386,6 @@ static Value *promoteAllocaUserToVector(
};
Type *VecEltTy = VectorTy->getElementType();
- const unsigned NumVecElts = VectorTy->getNumElements();
switch (Inst->getOpcode()) {
case Instruction::Load: {
@@ -419,11 +418,12 @@ static Value *promoteAllocaUserToVector(
auto *SubVecTy = FixedVectorType::get(VecEltTy, NumLoadedElts);
assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
- unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
Value *SubVec = PoisonValue::get(SubVecTy);
for (unsigned K = 0; K < NumLoadedElts; ++K) {
+ Value *CurIdx =
+ Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
SubVec = Builder.CreateInsertElement(
- SubVec, Builder.CreateExtractElement(CurVal, IndexVal + K), K);
+ SubVec, Builder.CreateExtractElement(CurVal, CurIdx), K);
}
if (AccessTy->isPtrOrPtrVectorTy())
@@ -469,6 +469,7 @@ static Value *promoteAllocaUserToVector(
assert(AccessSize.isKnownMultipleOf(DL.getTypeStoreSize(VecEltTy)));
const unsigned NumWrittenElts =
AccessSize / DL.getTypeStoreSize(VecEltTy);
+ const unsigned NumVecElts = VectorTy->getNumElements();
auto *SubVecTy = FixedVectorType::get(VecEltTy, NumWrittenElts);
assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
@@ -479,12 +480,13 @@ static Value *promoteAllocaUserToVector(
Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
- unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
Value *CurVec = GetOrLoadCurrentVectorValue();
- for (unsigned K = 0; K < NumWrittenElts && ((IndexVal + K) < NumVecElts);
- ++K) {
+ for (unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
+ K < NumElts; ++K) {
+ Value *CurIdx =
+ Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
CurVec = Builder.CreateInsertElement(
- CurVec, Builder.CreateExtractElement(Val, K), IndexVal + K);
+ CurVec, Builder.CreateExtractElement(Val, K), CurIdx);
}
return CurVec;
}
@@ -679,6 +681,12 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
return RejectUser(Inst, "unsupported load/store as aggregate");
assert(!AccessTy->isAggregateType() || AccessTy->isArrayTy());
+ // Check that this is a simple access of a vector element.
+ bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
+ : cast<StoreInst>(Inst)->isSimple();
+ if (!IsSimple)
+ return RejectUser(Inst, "not a simple load or store");
+
Ptr = Ptr->stripPointerCasts();
// Alloca already accessed as vector.
@@ -688,11 +696,6 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
continue;
}
- // Check that this is a simple access of a vector element.
- bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
- : cast<StoreInst>(Inst)->isSimple();
- if (!IsSimple)
- return RejectUser(Inst, "not a simple load or store");
if (!isSupportedAccessType(VectorTy, AccessTy, *DL))
return RejectUser(Inst, "not a supported access type");
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index c935e384da8e..20e1aaa5419a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
@@ -43,29 +42,27 @@ namespace {
#include "AMDGPUGenRegBankGICombiner.inc"
#undef GET_GICOMBINER_TYPES
-class AMDGPURegBankCombinerImpl : public GIMatchTableExecutor {
+class AMDGPURegBankCombinerImpl : public Combiner {
protected:
const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
-
- MachineIRBuilder &B;
- MachineFunction &MF;
- MachineRegisterInfo &MRI;
const GCNSubtarget &STI;
const RegisterBankInfo &RBI;
const TargetRegisterInfo &TRI;
const SIInstrInfo &TII;
- CombinerHelper &Helper;
- GISelChangeObserver &Observer;
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
public:
AMDGPURegBankCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
- MachineIRBuilder &B, CombinerHelper &Helper,
- GISelChangeObserver &Observer);
+ const GCNSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
- bool tryCombineAll(MachineInstr &I) const;
+ bool tryCombineAll(MachineInstr &I) const override;
bool isVgprRegBank(Register Reg) const;
Register getAsVgpr(Register Reg) const;
@@ -114,12 +111,14 @@ private:
#undef GET_GICOMBINER_IMPL
AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
- const AMDGPURegBankCombinerImplRuleConfig &RuleConfig, MachineIRBuilder &B,
- CombinerHelper &Helper, GISelChangeObserver &Observer)
- : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()),
- STI(MF.getSubtarget<GCNSubtarget>()), RBI(*STI.getRegBankInfo()),
- TRI(*STI.getRegisterInfo()), TII(*STI.getInstrInfo()), Helper(Helper),
- Observer(Observer),
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
+ const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
+ RBI(*STI.getRegBankInfo()), TRI(*STI.getRegisterInfo()),
+ TII(*STI.getInstrInfo()),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenRegBankGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
@@ -396,36 +395,6 @@ bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
return false;
}
-class AMDGPURegBankCombinerInfo final : public CombinerInfo {
- GISelKnownBits *KB;
- MachineDominatorTree *MDT;
- AMDGPURegBankCombinerImplRuleConfig RuleConfig;
-
-public:
- AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- const AMDGPULegalizerInfo *LI, GISelKnownBits *KB,
- MachineDominatorTree *MDT)
- : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
- /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
- KB(KB), MDT(MDT) {
- if (!RuleConfig.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT);
- // TODO: Do not re-create the Impl on every inst, it should be per function.
- AMDGPURegBankCombinerImpl Impl(RuleConfig, B, Helper, Observer);
- Impl.setupMF(*MI.getMF(), KB);
- return Impl.tryCombineAll(MI);
-}
-
// Pass boilerplate
// ================
@@ -440,8 +409,10 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+
private:
bool IsOptNone;
+ AMDGPURegBankCombinerImplRuleConfig RuleConfig;
};
} // end anonymous namespace
@@ -461,6 +432,9 @@ void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
}
bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
@@ -470,19 +444,20 @@ bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
auto *TPC = &getAnalysis<TargetPassConfig>();
const Function &F = MF.getFunction();
bool EnableOpt =
- MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const AMDGPULegalizerInfo *LI =
- static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
-
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+
+ const auto *LI = ST.getLegalizerInfo();
MachineDominatorTree *MDT =
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
- AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(),
- LI, KB, MDT);
- Combiner C(PCInfo, TPC);
- return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+
+ CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
+ LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
+ AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
+ RuleConfig, ST, MDT, LI);
+ return Impl.combineMachineInstrs();
}
char AMDGPURegBankCombiner::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 0203af32e389..c9412f720c62 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -97,19 +97,25 @@ namespace {
// Observer to apply a register bank to new registers created by LegalizerHelper.
class ApplyRegBankMapping final : public GISelChangeObserver {
private:
+ MachineIRBuilder &B;
const AMDGPURegisterBankInfo &RBI;
MachineRegisterInfo &MRI;
const RegisterBank *NewBank;
SmallVector<MachineInstr *, 4> NewInsts;
public:
- ApplyRegBankMapping(const AMDGPURegisterBankInfo &RBI_,
+ ApplyRegBankMapping(MachineIRBuilder &B, const AMDGPURegisterBankInfo &RBI_,
MachineRegisterInfo &MRI_, const RegisterBank *RB)
- : RBI(RBI_), MRI(MRI_), NewBank(RB) {}
+ : B(B), RBI(RBI_), MRI(MRI_), NewBank(RB) {
+ assert(!B.isObservingChanges());
+ B.setChangeObserver(*this);
+ }
~ApplyRegBankMapping() {
for (MachineInstr *MI : NewInsts)
applyBank(*MI);
+
+ B.stopObservingChanges();
}
/// Set any registers that don't have a set register class or bank to SALU.
@@ -131,7 +137,8 @@ public:
// Replace the extension with a select, which really uses the boolean
// source.
- MachineIRBuilder B(MI);
+ B.setInsertPt(*MI.getParent(), MI);
+
auto True = B.buildConstant(S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
auto False = B.buildConstant(S32, 0);
B.buildSelect(DstReg, SrcReg, True, False);
@@ -193,6 +200,7 @@ public:
};
}
+
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
: Subtarget(ST), TRI(Subtarget.getRegisterInfo()),
TII(Subtarget.getInstrInfo()) {
@@ -221,7 +229,7 @@ bool AMDGPURegisterBankInfo::isDivergentRegBank(const RegisterBank *RB) const {
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
const RegisterBank &Src,
- unsigned Size) const {
+ TypeSize Size) const {
// TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
if (Dst.getID() == AMDGPU::SGPRRegBankID &&
(isVectorRegisterBank(Src) || Src.getID() == AMDGPU::VCCRegBankID)) {
@@ -337,7 +345,7 @@ AMDGPURegisterBankInfo::addMappingFromTable(
RegisterBankInfo::InstructionMappings
AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
- switch (MI.getIntrinsicID()) {
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::amdgcn_readlane: {
static const OpRegBankEntry<3> Table[2] = {
// Perfectly legal.
@@ -378,7 +386,7 @@ RegisterBankInfo::InstructionMappings
AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
- switch (MI.getIntrinsicID()) {
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::amdgcn_s_buffer_load: {
static const OpRegBankEntry<2> Table[4] = {
// Perfectly legal.
@@ -632,8 +640,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
return AltMappings;
}
case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT:
return getInstrAlternativeMappingsIntrinsic(MI, MRI);
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
default:
break;
@@ -758,11 +768,8 @@ Register AMDGPURegisterBankInfo::buildReadFirstLane(MachineIRBuilder &B,
/// There is additional complexity to try for compare values to identify the
/// unique values used.
bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
- MachineIRBuilder &B,
- iterator_range<MachineBasicBlock::iterator> Range,
- SmallSet<Register, 4> &SGPROperandRegs,
- MachineRegisterInfo &MRI) const {
-
+ MachineIRBuilder &B, iterator_range<MachineBasicBlock::iterator> Range,
+ SmallSet<Register, 4> &SGPROperandRegs) const {
// Track use registers which have already been expanded with a readfirstlane
// sequence. This may have multiple uses if moving a sequence.
DenseMap<Register, Register> WaterfalledRegMap;
@@ -787,6 +794,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
const int OrigRangeSize = std::distance(Range.begin(), Range.end());
#endif
+ MachineRegisterInfo &MRI = *B.getMRI();
Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
@@ -922,8 +930,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
// The ballot becomes a no-op during instruction selection.
CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot,
- {LLT::scalar(Subtarget.isWave32() ? 32 : 64)},
- false)
+ {LLT::scalar(Subtarget.isWave32() ? 32 : 64)})
.addReg(CondReg)
.getReg(0);
MRI.setRegClass(CondReg, WaveRC);
@@ -986,37 +993,28 @@ bool AMDGPURegisterBankInfo::collectWaterfallOperands(
}
bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
- MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI,
- ArrayRef<unsigned> OpIndices) const {
+ MachineIRBuilder &B, MachineInstr &MI, ArrayRef<unsigned> OpIndices) const {
// Use a set to avoid extra readfirstlanes in the case where multiple operands
// are the same register.
SmallSet<Register, 4> SGPROperandRegs;
- if (!collectWaterfallOperands(SGPROperandRegs, MI, MRI, OpIndices))
+ if (!collectWaterfallOperands(SGPROperandRegs, MI, *B.getMRI(), OpIndices))
return false;
MachineBasicBlock::iterator I = MI.getIterator();
return executeInWaterfallLoop(B, make_range(I, std::next(I)),
- SGPROperandRegs, MRI);
-}
-
-bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- ArrayRef<unsigned> OpIndices) const {
- MachineIRBuilder B(MI);
- return executeInWaterfallLoop(B, MI, MRI, OpIndices);
+ SGPROperandRegs);
}
// Legalize an operand that must be an SGPR by inserting a readfirstlane.
void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
- MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const {
+ MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const {
Register Reg = MI.getOperand(OpIdx).getReg();
+ MachineRegisterInfo &MRI = *B.getMRI();
const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
if (Bank == &AMDGPU::SGPRRegBank)
return;
- MachineIRBuilder B(MI);
-
Reg = buildReadFirstLane(B, MRI, Reg);
MI.getOperand(OpIdx).setReg(Reg);
}
@@ -1048,9 +1046,11 @@ static LLT widen96To128(LLT Ty) {
return LLT::fixed_vector(128 / EltTy.getSizeInBits(), EltTy);
}
-bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
- const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
- MachineRegisterInfo &MRI) const {
+bool AMDGPURegisterBankInfo::applyMappingLoad(
+ MachineIRBuilder &B,
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ MachineInstr &MI) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
Register DstReg = MI.getOperand(0).getReg();
const LLT LoadTy = MRI.getType(DstReg);
unsigned LoadSize = LoadTy.getSizeInBits();
@@ -1061,7 +1061,7 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
if (DstBank == &AMDGPU::SGPRRegBank) {
// There are some special cases that we need to look at for 32 bit and 96
// bit SGPR loads otherwise we have nothing to do.
- if (LoadSize != 32 && LoadSize != 96)
+ if (LoadSize != 32 && (LoadSize != 96 || Subtarget.hasScalarDwordx3Loads()))
return false;
MachineMemOperand *MMO = *MI.memoperands_begin();
@@ -1076,8 +1076,7 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
Register PtrReg = MI.getOperand(1).getReg();
- ApplyRegBankMapping O(*this, MRI, &AMDGPU::SGPRRegBank);
- MachineIRBuilder B(MI, O);
+ ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank);
if (LoadSize == 32) {
// This is an extending load from a sub-dword size. Widen the memory
@@ -1098,10 +1097,7 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
// 96-bit loads are only available for vector loads. We need to split this
// into a 64-bit part, and 32 (unless we can widen to a 128-bit load).
if (MMO->getAlign() < Align(16)) {
- MachineFunction *MF = MI.getParent()->getParent();
- ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
- MachineIRBuilder B(MI, ApplyBank);
- LegalizerHelper Helper(*MF, ApplyBank, B);
+ LegalizerHelper Helper(B.getMF(), ApplyBank, B);
LLT Part64, Part32;
std::tie(Part64, Part32) = splitUnequalType(LoadTy, 64);
if (Helper.reduceLoadStoreWidth(cast<GAnyLoad>(MI), 0, Part64) !=
@@ -1144,9 +1140,8 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize;
const LLT LoadSplitTy = LoadTy.divide(NumSplitParts);
- ApplyRegBankMapping Observer(*this, MRI, &AMDGPU::VGPRRegBank);
- MachineIRBuilder B(MI, Observer);
- LegalizerHelper Helper(B.getMF(), Observer, B);
+ ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);
+ LegalizerHelper Helper(B.getMF(), O, B);
if (LoadTy.isVector()) {
if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
@@ -1161,10 +1156,11 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
}
bool AMDGPURegisterBankInfo::applyMappingDynStackAlloc(
- MachineInstr &MI,
- const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
- MachineRegisterInfo &MRI) const {
- const MachineFunction &MF = *MI.getMF();
+ MachineIRBuilder &B,
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ MachineInstr &MI) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ const MachineFunction &MF = B.getMF();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const auto &TFI = *ST.getFrameLowering();
@@ -1188,8 +1184,7 @@ bool AMDGPURegisterBankInfo::applyMappingDynStackAlloc(
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register SPReg = Info->getStackPtrOffsetReg();
- ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::SGPRRegBank);
- MachineIRBuilder B(MI, ApplyBank);
+ ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);
auto WaveSize = B.buildConstant(LLT::scalar(32), ST.getWavefrontSizeLog2());
auto ScaledSize = B.buildShl(IntPtrTy, AllocSize, WaveSize);
@@ -1208,8 +1203,9 @@ bool AMDGPURegisterBankInfo::applyMappingDynStackAlloc(
}
bool AMDGPURegisterBankInfo::applyMappingImage(
- MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
- MachineRegisterInfo &MRI, int RsrcIdx) const {
+ MachineIRBuilder &B, MachineInstr &MI,
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ int RsrcIdx) const {
const int NumDefs = MI.getNumExplicitDefs();
// The reported argument index is relative to the IR intrinsic call arguments,
@@ -1230,7 +1226,7 @@ bool AMDGPURegisterBankInfo::applyMappingImage(
SGPRIndexes.push_back(I);
}
- executeInWaterfallLoop(MI, MRI, SGPRIndexes);
+ executeInWaterfallLoop(B, MI, SGPRIndexes);
return true;
}
@@ -1320,7 +1316,7 @@ unsigned AMDGPURegisterBankInfo::setBufferOffsets(
}
bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
- const OperandsMapper &OpdMapper) const {
+ MachineIRBuilder &B, const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
@@ -1350,7 +1346,6 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
// immediate offsets.
const Align Alignment = NumLoads > 1 ? Align(16 * NumLoads) : Align(1);
- MachineIRBuilder B(MI);
MachineFunction &MF = B.getMF();
Register SOffset;
@@ -1421,7 +1416,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
OpsToWaterfall.insert(RSrc);
executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()),
- OpsToWaterfall, MRI);
+ OpsToWaterfall);
}
if (NumLoads != 1) {
@@ -1438,7 +1433,8 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
return true;
}
-bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
+bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
+ const OperandsMapper &OpdMapper,
bool Signed) const {
MachineInstr &MI = OpdMapper.getMI();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
@@ -1451,7 +1447,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
const LLT S32 = LLT::scalar(32);
- unsigned FirstOpnd = MI.getOpcode() == AMDGPU::G_INTRINSIC ? 2 : 1;
+ unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
Register SrcReg = MI.getOperand(FirstOpnd).getReg();
Register OffsetReg = MI.getOperand(FirstOpnd + 1).getReg();
Register WidthReg = MI.getOperand(FirstOpnd + 2).getReg();
@@ -1464,8 +1460,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
// There is no 64-bit vgpr bitfield extract instructions so the operation
// is expanded to a sequence of instructions that implement the operation.
- ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::VGPRRegBank);
- MachineIRBuilder B(MI, ApplyBank);
+ ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank);
const LLT S64 = LLT::scalar(64);
// Shift the source operand so that extracted bits start at bit 0.
@@ -1517,8 +1512,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
// The scalar form packs the offset and width in a single operand.
- ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::SGPRRegBank);
- MachineIRBuilder B(MI, ApplyBank);
+ ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);
// Ensure the high bits are clear to insert the offset.
auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
@@ -1546,7 +1540,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
}
bool AMDGPURegisterBankInfo::applyMappingMAD_64_32(
- const OperandsMapper &OpdMapper) const {
+ MachineIRBuilder &B, const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
@@ -1575,8 +1569,6 @@ bool AMDGPURegisterBankInfo::applyMappingMAD_64_32(
}
// Keep the multiplication on the SALU.
- MachineIRBuilder B(MI);
-
Register DstHi;
Register DstLo = B.buildMul(S32, Src0, Src1).getReg(0);
bool MulHiInVgpr = false;
@@ -1792,7 +1784,7 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
std::pair<Register, unsigned>
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(Subtarget);
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
@@ -1916,8 +1908,9 @@ static void extendLow32IntoHigh32(MachineIRBuilder &B,
}
bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- const OperandsMapper &OpdMapper) const {
+ MachineIRBuilder &B, MachineInstr &MI,
+ const OperandsMapper &OpdMapper) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
Register VecReg = MI.getOperand(1).getReg();
Register Idx = MI.getOperand(2).getReg();
@@ -1935,7 +1928,6 @@ bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
IsDivergentIdx, &Subtarget))
return false;
- MachineIRBuilder B(MI);
LLT S32 = LLT::scalar(32);
const RegisterBank &DstBank =
@@ -2014,9 +2006,10 @@ static Register constrainRegToBank(MachineRegisterInfo &MRI,
}
bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- const OperandsMapper &OpdMapper) const {
+ MachineIRBuilder &B, MachineInstr &MI,
+ const OperandsMapper &OpdMapper) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
Register VecReg = MI.getOperand(1).getReg();
Register Idx = MI.getOperand(3).getReg();
@@ -2033,7 +2026,6 @@ bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
IsDivergentIdx, &Subtarget))
return false;
- MachineIRBuilder B(MI);
LLT S32 = LLT::scalar(32);
const RegisterBank &DstBank =
@@ -2103,8 +2095,9 @@ bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
}
void AMDGPURegisterBankInfo::applyMappingImpl(
- const OperandsMapper &OpdMapper) const {
+ MachineIRBuilder &B, const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
+ B.setInstrAndDebugLoc(MI);
unsigned Opc = MI.getOpcode();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
switch (Opc) {
@@ -2123,7 +2116,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (DefRegs.empty())
DefRegs.push_back(DstReg);
- MachineIRBuilder B(MI);
B.setInsertPt(*MI.getParent(), ++MI.getIterator());
Register NewDstReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
@@ -2156,8 +2148,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// produce an invalid copy. We can only copy with some kind of compare to
// get a vector boolean result. Insert a register bank copy that will be
// correctly lowered to a compare.
- MachineIRBuilder B(*MI.getParent()->getParent());
-
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
Register SrcReg = MI.getOperand(I).getReg();
const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
@@ -2179,16 +2169,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
substituteSimpleCopyRegs(OpdMapper, 0);
// Promote SGPR/VGPR booleans to s32
- MachineFunction *MF = MI.getParent()->getParent();
- ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
- MachineIRBuilder B(MI, ApplyBank);
- LegalizerHelper Helper(*MF, ApplyBank, B);
+ ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank);
+ B.setInsertPt(B.getMBB(), MI);
+ LegalizerHelper Helper(B.getMF(), ApplyBank, B);
if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
llvm_unreachable("widen scalar should have succeeded");
return;
}
+ case AMDGPU::G_FCMP:
+ if (!Subtarget.hasSALUFloatInsts())
+ break;
+ LLVM_FALLTHROUGH;
case AMDGPU::G_ICMP:
case AMDGPU::G_UADDO:
case AMDGPU::G_USUBO:
@@ -2196,7 +2189,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_SADDE:
case AMDGPU::G_USUBE:
case AMDGPU::G_SSUBE: {
- unsigned BoolDstOp = Opc == AMDGPU::G_ICMP ? 0 : 1;
+ unsigned BoolDstOp =
+ (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
Register DstReg = MI.getOperand(BoolDstOp).getReg();
const RegisterBank *DstBank =
@@ -2212,7 +2206,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
Register NewDstReg = MRI.createGenericVirtualRegister(S32);
MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
MI.getOperand(BoolDstOp).setReg(NewDstReg);
- MachineIRBuilder B(MI);
if (HasCarryIn) {
Register NewSrcReg = MRI.createGenericVirtualRegister(S32);
@@ -2245,7 +2238,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
const RegisterBank *CondBank = getRegBank(CondRegs[0], MRI, *TRI);
if (CondBank == &AMDGPU::SGPRRegBank) {
- MachineIRBuilder B(MI);
const LLT S32 = LLT::scalar(32);
Register NewCondReg = MRI.createGenericVirtualRegister(S32);
MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
@@ -2257,7 +2249,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (DstTy.getSizeInBits() != 64)
break;
- MachineIRBuilder B(MI);
LLT HalfTy = getHalfSizedType(DstTy);
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
@@ -2297,7 +2288,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
if (CondBank == &AMDGPU::SGPRRegBank) {
- MachineIRBuilder B(MI);
const LLT S32 = LLT::scalar(32);
Register NewCondReg = MRI.createGenericVirtualRegister(S32);
MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
@@ -2324,8 +2314,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
break;
MachineFunction *MF = MI.getParent()->getParent();
- ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
- MachineIRBuilder B(MI, ApplyBank);
+ ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank);
LegalizerHelper Helper(*MF, ApplyBank, B);
if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
@@ -2355,7 +2344,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Depending on where the source registers came from, the generic code may
// have decided to split the inputs already or not. If not, we still need to
// extract the values.
- MachineIRBuilder B(MI);
if (Src0Regs.empty())
split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
@@ -2384,8 +2372,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// max combination.
if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
MachineFunction *MF = MI.getParent()->getParent();
- ApplyRegBankMapping Apply(*this, MRI, &AMDGPU::VGPRRegBank);
- MachineIRBuilder B(MI, Apply);
+ ApplyRegBankMapping Apply(B, *this, MRI, &AMDGPU::VGPRRegBank);
LegalizerHelper Helper(*MF, Apply, B);
if (Helper.lowerAbsToMaxNeg(MI) != LegalizerHelper::Legalized)
@@ -2420,8 +2407,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
const LLT S32 = LLT::scalar(32);
MachineBasicBlock *MBB = MI.getParent();
MachineFunction *MF = MBB->getParent();
- ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
- MachineIRBuilder B(MI, ApplySALU);
+ ApplyRegBankMapping ApplySALU(B, *this, MRI, &AMDGPU::SGPRRegBank);
+
+ if (DstTy.isVector() && Opc == AMDGPU::G_ABS) {
+ Register WideSrcLo, WideSrcHi;
+
+ std::tie(WideSrcLo, WideSrcHi) =
+ unpackV2S16ToS32(B, MI.getOperand(1).getReg(), TargetOpcode::G_SEXT);
+ auto Lo = B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcLo});
+ auto Hi = B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcHi});
+ B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});
+ MI.eraseFromParent();
+ return;
+ }
if (DstTy.isVector()) {
Register WideSrc0Lo, WideSrc0Hi;
@@ -2459,10 +2457,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
break; // Nothing to repair
const LLT S32 = LLT::scalar(32);
- MachineIRBuilder B(MI);
- ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank);
- GISelObserverWrapper Observer(&O);
- B.setChangeObserver(Observer);
+ ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);
// Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs
// we would need to further expand, and doesn't let us directly set the
@@ -2508,8 +2503,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (Ty == S32)
break;
- ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
- MachineIRBuilder B(MI, ApplyVALU);
+ ApplyRegBankMapping ApplyVALU(B, *this, MRI, &AMDGPU::VGPRRegBank);
MachineFunction &MF = B.getMF();
LegalizerHelper Helper(MF, ApplyVALU, B);
@@ -2539,8 +2533,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))
// (ffbh hi:lo) -> (umin (ffbh hi), (uaddsat (ffbh lo), 32))
// (ffbl hi:lo) -> (umin (uaddsat (ffbh hi), 32), (ffbh lo))
- ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
- MachineIRBuilder B(MI, ApplyVALU);
+ ApplyRegBankMapping ApplyVALU(B, *this, MRI, &AMDGPU::VGPRRegBank);
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
? (unsigned)AMDGPU::G_AMDGPU_FFBH_U32
@@ -2569,7 +2562,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
assert(OpdMapper.getVRegs(1).empty());
- MachineIRBuilder B(MI);
const RegisterBank *SrcBank =
OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
@@ -2654,11 +2646,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
- if (foldExtractEltToCmpSelect(MI, MRI, OpdMapper))
+ if (foldExtractEltToCmpSelect(B, MI, OpdMapper))
return;
- MachineIRBuilder B(MI);
-
const ValueMapping &DstMapping
= OpdMapper.getInstrMapping().getOperandMapping(0);
const RegisterBank *DstBank = DstMapping.BreakDown[0].RegBank;
@@ -2693,7 +2683,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (DstRegs.empty()) {
applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, { 2 });
+ executeInWaterfallLoop(B, MI, {2});
if (NeedCopyToVGPR) {
// We don't want a phi for this temporary reg.
@@ -2752,7 +2742,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
B.setInstr(*Span.begin());
MI.eraseFromParent();
executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()),
- OpsToWaterfall, MRI);
+ OpsToWaterfall);
if (NeedCopyToVGPR) {
MachineBasicBlock *LoopBB = Extract1->getParent();
@@ -2787,7 +2777,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (substituteSimpleCopyRegs(OpdMapper, 1))
MRI.setType(MI.getOperand(1).getReg(), VecTy);
- if (foldInsertEltToCmpSelect(MI, MRI, OpdMapper))
+ if (foldInsertEltToCmpSelect(B, MI, OpdMapper))
return;
const RegisterBank *IdxBank =
@@ -2817,24 +2807,21 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (InsRegs.empty()) {
- executeInWaterfallLoop(MI, MRI, { 3 });
+ executeInWaterfallLoop(B, MI, {3});
// Re-insert the constant offset add inside the waterfall loop.
if (ShouldMoveIndexIntoLoop) {
- MachineIRBuilder B(MI);
reinsertVectorIndexAdd(B, MI, 3, ConstOffset);
}
return;
}
-
assert(InsTy.getSizeInBits() == 64);
const LLT S32 = LLT::scalar(32);
LLT Vec32 = LLT::fixed_vector(2 * VecTy.getNumElements(), 32);
- MachineIRBuilder B(MI);
auto CastSrc = B.buildBitcast(Vec32, SrcReg);
auto One = B.buildConstant(S32, 1);
@@ -2881,7 +2868,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Figure out the point after the waterfall loop before mangling the control
// flow.
executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()),
- OpsToWaterfall, MRI);
+ OpsToWaterfall);
// The insertion point is now right after the original instruction.
//
@@ -2913,7 +2900,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, {1, 4});
+ executeInWaterfallLoop(B, MI, {1, 4});
return;
}
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
@@ -2929,27 +2916,28 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, {2, 5});
+ executeInWaterfallLoop(B, MI, {2, 5});
return;
}
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, {2, 5});
+ executeInWaterfallLoop(B, MI, {2, 5});
return;
}
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, {3, 6});
+ executeInWaterfallLoop(B, MI, {3, 6});
return;
}
case AMDGPU::G_AMDGPU_S_BUFFER_LOAD: {
- applyMappingSBufferLoad(OpdMapper);
+ applyMappingSBufferLoad(B, OpdMapper);
return;
}
- case AMDGPU::G_INTRINSIC: {
- switch (MI.getIntrinsicID()) {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::amdgcn_readlane: {
substituteSimpleCopyRegs(OpdMapper, 2);
@@ -2958,7 +2946,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Make sure the index is an SGPR. It doesn't make sense to run this in a
// waterfall loop, so assume it's a uniform value.
- constrainOpWithReadfirstlane(MI, MRI, 3); // Index
+ constrainOpWithReadfirstlane(B, MI, 3); // Index
return;
}
case Intrinsic::amdgcn_writelane: {
@@ -2967,8 +2955,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
assert(OpdMapper.getVRegs(3).empty());
substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
- constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
- constrainOpWithReadfirstlane(MI, MRI, 3); // Index
+ constrainOpWithReadfirstlane(B, MI, 2); // Source value
+ constrainOpWithReadfirstlane(B, MI, 3); // Index
return;
}
case Intrinsic::amdgcn_interp_p1:
@@ -2981,7 +2969,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Readlane for m0 value, which is always the last operand.
// FIXME: Should this be a waterfall loop instead?
- constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index
+ constrainOpWithReadfirstlane(B, MI, MI.getNumOperands() - 1); // Index
return;
}
case Intrinsic::amdgcn_interp_inreg_p10:
@@ -2995,19 +2983,22 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Doing a waterfall loop over these wouldn't make any sense.
substituteSimpleCopyRegs(OpdMapper, 2);
substituteSimpleCopyRegs(OpdMapper, 3);
- constrainOpWithReadfirstlane(MI, MRI, 4);
- constrainOpWithReadfirstlane(MI, MRI, 5);
+ constrainOpWithReadfirstlane(B, MI, 4);
+ constrainOpWithReadfirstlane(B, MI, 5);
return;
}
case Intrinsic::amdgcn_sbfe:
- applyMappingBFE(OpdMapper, true);
+ applyMappingBFE(B, OpdMapper, true);
return;
case Intrinsic::amdgcn_ubfe:
- applyMappingBFE(OpdMapper, false);
+ applyMappingBFE(B, OpdMapper, false);
return;
case Intrinsic::amdgcn_inverse_ballot:
+ case Intrinsic::amdgcn_s_bitreplicate:
+ case Intrinsic::amdgcn_s_quadmask:
+ case Intrinsic::amdgcn_s_wqm:
applyDefaultMapping(OpdMapper);
- constrainOpWithReadfirstlane(MI, MRI, 2); // Mask
+ constrainOpWithReadfirstlane(B, MI, 2); // Mask
return;
case Intrinsic::amdgcn_ballot:
// Use default handling and insert copy to vcc source.
@@ -3019,30 +3010,31 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
- const AMDGPU::RsrcIntrinsic *RSrcIntrin
- = AMDGPU::lookupRsrcIntrinsic(MI.getIntrinsicID());
+ const AMDGPU::RsrcIntrinsic *RSrcIntrin =
+ AMDGPU::lookupRsrcIntrinsic(AMDGPU::getIntrinsicID(MI));
assert(RSrcIntrin && RSrcIntrin->IsImage);
// Non-images can have complications from operands that allow both SGPR
// and VGPR. For now it's too complicated to figure out the final opcode
// to derive the register bank from the MCInstrDesc.
- applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
+ applyMappingImage(B, MI, OpdMapper, RSrcIntrin->RsrcArg);
return;
}
case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
unsigned N = MI.getNumExplicitOperands() - 2;
applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, { N });
+ executeInWaterfallLoop(B, MI, {N});
return;
}
- case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
- auto IntrID = MI.getIntrinsicID();
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
+ auto IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
// This is only allowed to execute with 1 lane, so readfirstlane is safe.
assert(OpdMapper.getVRegs(0).empty());
substituteSimpleCopyRegs(OpdMapper, 3);
- constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ constrainOpWithReadfirstlane(B, MI, 2); // M0
return;
}
case Intrinsic::amdgcn_ds_gws_init:
@@ -3050,62 +3042,85 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case Intrinsic::amdgcn_ds_gws_sema_br: {
// Only the first lane is executes, so readfirstlane is safe.
substituteSimpleCopyRegs(OpdMapper, 1);
- constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ constrainOpWithReadfirstlane(B, MI, 2); // M0
return;
}
case Intrinsic::amdgcn_ds_gws_sema_v:
case Intrinsic::amdgcn_ds_gws_sema_p:
case Intrinsic::amdgcn_ds_gws_sema_release_all: {
// Only the first lane is executes, so readfirstlane is safe.
- constrainOpWithReadfirstlane(MI, MRI, 1); // M0
+ constrainOpWithReadfirstlane(B, MI, 1); // M0
return;
}
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume: {
- constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ constrainOpWithReadfirstlane(B, MI, 2); // M0
return;
}
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// FIXME: Should this use a waterfall loop?
- constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ constrainOpWithReadfirstlane(B, MI, 2); // M0
return;
}
case Intrinsic::amdgcn_s_setreg: {
- constrainOpWithReadfirstlane(MI, MRI, 2);
+ constrainOpWithReadfirstlane(B, MI, 2);
return;
}
+ case Intrinsic::amdgcn_s_ttracedata:
+ constrainOpWithReadfirstlane(B, MI, 1); // M0
+ return;
case Intrinsic::amdgcn_raw_buffer_load_lds:
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
applyDefaultMapping(OpdMapper);
- constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc
- constrainOpWithReadfirstlane(MI, MRI, 2); // M0
- constrainOpWithReadfirstlane(MI, MRI, 5); // soffset
+ constrainOpWithReadfirstlane(B, MI, 1); // rsrc
+ constrainOpWithReadfirstlane(B, MI, 2); // M0
+ constrainOpWithReadfirstlane(B, MI, 5); // soffset
return;
}
case Intrinsic::amdgcn_struct_buffer_load_lds:
case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
applyDefaultMapping(OpdMapper);
- constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc
- constrainOpWithReadfirstlane(MI, MRI, 2); // M0
- constrainOpWithReadfirstlane(MI, MRI, 6); // soffset
+ constrainOpWithReadfirstlane(B, MI, 1); // rsrc
+ constrainOpWithReadfirstlane(B, MI, 2); // M0
+ constrainOpWithReadfirstlane(B, MI, 6); // soffset
return;
}
case Intrinsic::amdgcn_global_load_lds: {
applyDefaultMapping(OpdMapper);
- constrainOpWithReadfirstlane(MI, MRI, 2);
+ constrainOpWithReadfirstlane(B, MI, 2);
return;
}
case Intrinsic::amdgcn_lds_direct_load: {
applyDefaultMapping(OpdMapper);
// Readlane for m0 value, which is always the last operand.
- constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index
+ constrainOpWithReadfirstlane(B, MI, MI.getNumOperands() - 1); // Index
return;
}
case Intrinsic::amdgcn_exp_row:
applyDefaultMapping(OpdMapper);
- constrainOpWithReadfirstlane(MI, MRI, 8); // M0
+ constrainOpWithReadfirstlane(B, MI, 8); // M0
+ return;
+ case Intrinsic::amdgcn_s_sleep_var:
+ assert(OpdMapper.getVRegs(1).empty());
+ constrainOpWithReadfirstlane(B, MI, 1);
+ return;
+ case Intrinsic::amdgcn_s_barrier_signal_var:
+ case Intrinsic::amdgcn_s_barrier_join:
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ constrainOpWithReadfirstlane(B, MI, 1);
return;
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
+ constrainOpWithReadfirstlane(B, MI, 2);
+ return;
+ case Intrinsic::amdgcn_s_barrier_init:
+ constrainOpWithReadfirstlane(B, MI, 1);
+ constrainOpWithReadfirstlane(B, MI, 2);
+ return;
+ case Intrinsic::amdgcn_s_get_barrier_state: {
+ constrainOpWithReadfirstlane(B, MI, 2);
+ return;
+ }
default: {
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
@@ -3113,7 +3128,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// and VGPR. For now it's too complicated to figure out the final opcode
// to derive the register bank from the MCInstrDesc.
if (RSrcIntrin->IsImage) {
- applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
+ applyMappingImage(B, MI, OpdMapper, RSrcIntrin->RsrcArg);
return;
}
}
@@ -3214,30 +3229,53 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
}
++End;
- MachineIRBuilder B(*Start);
- executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs, MRI);
+ B.setInsertPt(B.getMBB(), Start);
+ executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs);
break;
}
case AMDGPU::G_LOAD:
case AMDGPU::G_ZEXTLOAD:
case AMDGPU::G_SEXTLOAD: {
- if (applyMappingLoad(MI, OpdMapper, MRI))
+ if (applyMappingLoad(B, OpdMapper, MI))
return;
break;
}
case AMDGPU::G_DYN_STACKALLOC:
- applyMappingDynStackAlloc(MI, OpdMapper, MRI);
+ applyMappingDynStackAlloc(B, OpdMapper, MI);
+ return;
+ case AMDGPU::G_STACKRESTORE: {
+ applyDefaultMapping(OpdMapper);
+ constrainOpWithReadfirstlane(B, MI, 0);
return;
+ }
case AMDGPU::G_SBFX:
- applyMappingBFE(OpdMapper, /*Signed*/ true);
+ applyMappingBFE(B, OpdMapper, /*Signed*/ true);
return;
case AMDGPU::G_UBFX:
- applyMappingBFE(OpdMapper, /*Signed*/ false);
+ applyMappingBFE(B, OpdMapper, /*Signed*/ false);
return;
case AMDGPU::G_AMDGPU_MAD_U64_U32:
case AMDGPU::G_AMDGPU_MAD_I64_I32:
- applyMappingMAD_64_32(OpdMapper);
+ applyMappingMAD_64_32(B, OpdMapper);
return;
+ case AMDGPU::G_PREFETCH: {
+ if (!Subtarget.hasPrefetch()) {
+ MI.eraseFromParent();
+ return;
+ }
+ unsigned PtrBank =
+ getRegBankID(MI.getOperand(0).getReg(), MRI, AMDGPU::SGPRRegBankID);
+ if (PtrBank == AMDGPU::VGPRRegBankID) {
+ MI.eraseFromParent();
+ return;
+ }
+ // FIXME: There is currently no support for prefetch in global isel.
+ // There is no node equivalence and what's worse there is no MMO produced
+ // for a prefetch on global isel path.
+ // Prefetch does not affect execution so erase it for now.
+ MI.eraseFromParent();
+ return;
+ }
default:
break;
}
@@ -3542,7 +3580,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
if (MI.getOpcode() != AMDGPU::G_FREEZE &&
- cannotCopy(*DstBank, *SrcBank, Size))
+ cannotCopy(*DstBank, *SrcBank, TypeSize::getFixed(Size)))
return getInvalidInstructionMapping();
const ValueMapping &ValMap = getValueMapping(0, Size, *DstBank);
@@ -3717,40 +3755,68 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_UBFX:
if (isSALUMapping(MI))
return getDefaultMappingSOP(MI);
- [[fallthrough]];
-
- case AMDGPU::G_SADDSAT: // FIXME: Could lower sat ops for SALU
- case AMDGPU::G_SSUBSAT:
- case AMDGPU::G_UADDSAT:
- case AMDGPU::G_USUBSAT:
+ return getDefaultMappingVOP(MI);
case AMDGPU::G_FADD:
case AMDGPU::G_FSUB:
- case AMDGPU::G_FPTOSI:
- case AMDGPU::G_FPTOUI:
case AMDGPU::G_FMUL:
case AMDGPU::G_FMA:
- case AMDGPU::G_FMAD:
- case AMDGPU::G_FSQRT:
case AMDGPU::G_FFLOOR:
case AMDGPU::G_FCEIL:
- case AMDGPU::G_FRINT:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINIMUM:
+ case AMDGPU::G_FMAXIMUM:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_STRICT_FADD:
+ case AMDGPU::G_STRICT_FSUB:
+ case AMDGPU::G_STRICT_FMUL:
+ case AMDGPU::G_STRICT_FMA: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = Ty.getSizeInBits();
+ if (Subtarget.hasSALUFloatInsts() && Ty.isScalar() &&
+ (Size == 32 || Size == 16) && isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
+ case AMDGPU::G_FPTOSI:
+ case AMDGPU::G_FPTOUI:
case AMDGPU::G_SITOFP:
- case AMDGPU::G_UITOFP:
+ case AMDGPU::G_UITOFP: {
+ unsigned SizeDst = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned SizeSrc = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ if (Subtarget.hasSALUFloatInsts() && SizeDst == 32 && SizeSrc == 32 &&
+ isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
case AMDGPU::G_FPTRUNC:
- case AMDGPU::G_FPEXT:
+ case AMDGPU::G_FPEXT: {
+ unsigned SizeDst = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned SizeSrc = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ if (Subtarget.hasSALUFloatInsts() && SizeDst != 64 && SizeSrc != 64 &&
+ isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
+ case AMDGPU::G_FSQRT:
case AMDGPU::G_FEXP2:
- case AMDGPU::G_FLOG2:
+ case AMDGPU::G_FLOG2: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if (Subtarget.hasPseudoScalarTrans() && (Size == 16 || Size == 32) &&
+ isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
+ case AMDGPU::G_SADDSAT: // FIXME: Could lower sat ops for SALU
+ case AMDGPU::G_SSUBSAT:
+ case AMDGPU::G_UADDSAT:
+ case AMDGPU::G_USUBSAT:
+ case AMDGPU::G_FMAD:
case AMDGPU::G_FLDEXP:
- case AMDGPU::G_FMINNUM:
- case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE:
case AMDGPU::G_FCANONICALIZE:
- case AMDGPU::G_INTRINSIC_TRUNC:
- case AMDGPU::G_STRICT_FADD:
- case AMDGPU::G_STRICT_FSUB:
- case AMDGPU::G_STRICT_FMUL:
- case AMDGPU::G_STRICT_FMA:
case AMDGPU::G_STRICT_FLDEXP:
case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar?
case AMDGPU::G_FSHR: // TODO: Expand for scalar
@@ -3845,9 +3911,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// This case is weird because we expect a physical register in the source,
// but need to set a bank anyway.
//
- // We could select the result to SGPR or VGPR, but for the one current use
- // it's more practical to always use VGPR.
- OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ // TODO: We could select the result to SGPR or VGPR
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
}
@@ -3971,14 +4036,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
SrcSize);
break;
}
- case AMDGPU::G_FCMP: {
- unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
- OpdsMapping[1] = nullptr; // Predicate Operand.
- OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
- OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
- break;
- }
case AMDGPU::G_IS_FPCLASS: {
Register SrcReg = MI.getOperand(1).getReg();
unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
@@ -3999,8 +4056,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());
break;
}
- case AMDGPU::G_ICMP: {
- auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ case AMDGPU::G_ICMP:
+ case AMDGPU::G_FCMP: {
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
// See if the result register has already been constrained to vcc, which may
@@ -4010,12 +4067,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI);
unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI);
+ auto canUseSCCICMP = [&]() {
+ auto Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ return Size == 32 ||
+ (Size == 64 &&
+ (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
+ Subtarget.hasScalarCompareEq64());
+ };
+ auto canUseSCCFCMP = [&]() {
+ return Subtarget.hasSALUFloatInsts() && (Size == 32 || Size == 16);
+ };
+
+ bool isICMP = MI.getOpcode() == AMDGPU::G_ICMP;
bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
Op2Bank == AMDGPU::SGPRRegBankID &&
Op3Bank == AMDGPU::SGPRRegBankID &&
- (Size == 32 || (Size == 64 &&
- (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
- Subtarget.hasScalarCompareEq64()));
+ (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
@@ -4025,6 +4093,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned ResultSize = 1;
OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
+ OpdsMapping[1] = nullptr; // Predicate Operand.
OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, Size);
OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank, Size);
break;
@@ -4197,8 +4266,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
break;
}
- case AMDGPU::G_INTRINSIC: {
- switch (MI.getIntrinsicID()) {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
default:
return getInvalidInstructionMapping();
case Intrinsic::amdgcn_div_fmas:
@@ -4207,12 +4277,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_sin:
case Intrinsic::amdgcn_cos:
case Intrinsic::amdgcn_log_clamp:
- case Intrinsic::amdgcn_log:
- case Intrinsic::amdgcn_exp2:
- case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
- case Intrinsic::amdgcn_sqrt:
- case Intrinsic::amdgcn_rsq:
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_rsq_clamp:
case Intrinsic::amdgcn_fmul_legacy:
@@ -4220,7 +4285,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_frexp_exp:
case Intrinsic::amdgcn_fract:
- case Intrinsic::amdgcn_cvt_pkrtz:
case Intrinsic::amdgcn_cvt_pknorm_i16:
case Intrinsic::amdgcn_cvt_pknorm_u16:
case Intrinsic::amdgcn_cvt_pk_i16:
@@ -4263,11 +4327,24 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_sudot8:
case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
+ case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
+ case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
return getDefaultMappingVOP(MI);
+ case Intrinsic::amdgcn_log:
+ case Intrinsic::amdgcn_exp2:
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_sqrt: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if (Subtarget.hasPseudoScalarTrans() && (Size == 16 || Size == 32) &&
+ isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
case Intrinsic::amdgcn_sbfe:
case Intrinsic::amdgcn_ubfe:
if (isSALUMapping(MI))
@@ -4285,8 +4362,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_wqm:
case Intrinsic::amdgcn_softwqm:
case Intrinsic::amdgcn_set_inactive:
+ case Intrinsic::amdgcn_set_inactive_chain_arg:
case Intrinsic::amdgcn_permlane64:
return getDefaultMappingAllVGPR(MI);
+ case Intrinsic::amdgcn_cvt_pkrtz:
+ if (Subtarget.hasSALUFloatInsts() && isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_kernarg_segment_ptr:
case Intrinsic::amdgcn_s_getpc:
case Intrinsic::amdgcn_groupstaticsize:
@@ -4387,6 +4469,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_permlane16_var:
+ case Intrinsic::amdgcn_permlanex16_var: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ break;
+ }
case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
@@ -4514,6 +4605,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
break;
}
+ case Intrinsic::amdgcn_s_quadmask:
+ case Intrinsic::amdgcn_s_wqm: {
+ Register MaskReg = MI.getOperand(2).getReg();
+ unsigned MaskSize = MRI.getType(MaskReg).getSizeInBits();
+ unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
+ break;
+ }
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax: {
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -4524,6 +4624,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
break;
}
+ case Intrinsic::amdgcn_s_bitreplicate:
+ Register MaskReg = MI.getOperand(2).getReg();
+ unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
+ OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
}
break;
}
@@ -4531,7 +4636,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
- auto IntrID = MI.getIntrinsicID();
+ auto IntrID = AMDGPU::getIntrinsicID(MI);
const AMDGPU::RsrcIntrinsic *RSrcIntrin = AMDGPU::lookupRsrcIntrinsic(IntrID);
assert(RSrcIntrin && "missing RsrcIntrinsic for image intrinsic");
// Non-images can have complications from operands that allow both SGPR
@@ -4559,8 +4664,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
- case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
- auto IntrID = MI.getIntrinsicID();
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
+ auto IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_s_getreg:
case Intrinsic::amdgcn_s_memtime:
@@ -4575,9 +4681,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_global_atomic_csub:
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmax:
+ case Intrinsic::amdgcn_global_atomic_fmin_num:
+ case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
+ case Intrinsic::amdgcn_flat_atomic_fmin_num:
+ case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
return getDefaultMappingAllVGPR(MI);
@@ -4632,6 +4742,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
+ case Intrinsic::amdgcn_s_ttracedata: {
+ // This must be an SGPR, but accept a VGPR.
+ unsigned Bank =
+ getRegBankID(MI.getOperand(1).getReg(), MRI, AMDGPU::SGPRRegBankID);
+ OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
+ break;
+ }
case Intrinsic::amdgcn_end_cf: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
@@ -4779,7 +4896,37 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); // %data1
break;
}
-
+ case Intrinsic::amdgcn_s_sleep_var:
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ break;
+ case Intrinsic::amdgcn_s_barrier_signal_var:
+ case Intrinsic::amdgcn_s_barrier_join:
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ break;
+ case Intrinsic::amdgcn_s_barrier_init:
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst_var: {
+ const unsigned ResultSize = 1;
+ OpdsMapping[0] =
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst:
+ case Intrinsic::amdgcn_s_barrier_leave: {
+ const unsigned ResultSize = 1;
+ OpdsMapping[0] =
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
+ break;
+ }
+ case Intrinsic::amdgcn_s_get_barrier_state: {
+ OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ }
default:
return getInvalidInstructionMapping();
}
@@ -4887,6 +5034,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FPTRUNC_ROUND_UPWARD:
case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD:
return getDefaultMappingVOP(MI);
+ case AMDGPU::G_PREFETCH:
+ OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ break;
}
return getInstructionMapping(/*ID*/1, /*Cost*/1,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 78214d7a1058..b5d16e70ab23 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -53,43 +53,36 @@ public:
MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const;
- bool executeInWaterfallLoop(
- MachineIRBuilder &B,
- iterator_range<MachineBasicBlock::iterator> Range,
- SmallSet<Register, 4> &SGPROperandRegs,
- MachineRegisterInfo &MRI) const;
+ bool executeInWaterfallLoop(MachineIRBuilder &B,
+ iterator_range<MachineBasicBlock::iterator> Range,
+ SmallSet<Register, 4> &SGPROperandRegs) const;
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Src) const;
- bool executeInWaterfallLoop(MachineIRBuilder &B,
- MachineInstr &MI,
- MachineRegisterInfo &MRI,
- ArrayRef<unsigned> OpIndices) const;
- bool executeInWaterfallLoop(MachineInstr &MI,
- MachineRegisterInfo &MRI,
+ bool executeInWaterfallLoop(MachineIRBuilder &B, MachineInstr &MI,
ArrayRef<unsigned> OpIndices) const;
- void constrainOpWithReadfirstlane(MachineInstr &MI, MachineRegisterInfo &MRI,
+ void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI,
unsigned OpIdx) const;
- bool applyMappingDynStackAlloc(MachineInstr &MI,
+ bool applyMappingDynStackAlloc(MachineIRBuilder &B,
const OperandsMapper &OpdMapper,
- MachineRegisterInfo &MRI) const;
- bool applyMappingLoad(MachineInstr &MI,
- const OperandsMapper &OpdMapper,
- MachineRegisterInfo &MRI) const;
- bool
- applyMappingImage(MachineInstr &MI,
- const OperandsMapper &OpdMapper,
- MachineRegisterInfo &MRI, int RSrcIdx) const;
+ MachineInstr &MI) const;
+ bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper,
+ MachineInstr &MI) const;
+ bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI,
+ const OperandsMapper &OpdMapper, int RSrcIdx) const;
unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset,
Register &VOffsetReg, Register &SOffsetReg,
int64_t &InstOffsetVal, Align Alignment) const;
- bool applyMappingSBufferLoad(const OperandsMapper &OpdMapper) const;
+ bool applyMappingSBufferLoad(MachineIRBuilder &B,
+ const OperandsMapper &OpdMapper) const;
- bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const;
+ bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper,
+ bool Signed) const;
- bool applyMappingMAD_64_32(const OperandsMapper &OpdMapper) const;
+ bool applyMappingMAD_64_32(MachineIRBuilder &B,
+ const OperandsMapper &OpdMapper) const;
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg) const;
@@ -98,7 +91,8 @@ public:
splitBufferOffsets(MachineIRBuilder &B, Register Offset) const;
/// See RegisterBankInfo::applyMapping.
- void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+ void applyMappingImpl(MachineIRBuilder &Builder,
+ const OperandsMapper &OpdMapper) const override;
const ValueMapping *getValueMappingForPtr(const MachineRegisterInfo &MRI,
Register Ptr) const;
@@ -171,7 +165,7 @@ public:
bool isDivergentRegBank(const RegisterBank *RB) const override;
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
- unsigned Size) const override;
+ TypeSize Size) const override;
unsigned getBreakDownCost(const ValueMapping &ValMapping,
const RegisterBank *CurBank = nullptr) const override;
@@ -186,12 +180,9 @@ public:
getInstrMapping(const MachineInstr &MI) const override;
private:
-
- bool foldExtractEltToCmpSelect(MachineInstr &MI,
- MachineRegisterInfo &MRI,
+ bool foldExtractEltToCmpSelect(MachineIRBuilder &B, MachineInstr &MI,
const OperandsMapper &OpdMapper) const;
- bool foldInsertEltToCmpSelect(MachineInstr &MI,
- MachineRegisterInfo &MRI,
+ bool foldInsertEltToCmpSelect(MachineIRBuilder &B, MachineInstr &MI,
const OperandsMapper &OpdMapper) const;
};
} // End llvm namespace.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
index 580352fb8cf4..552380d54dfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
@@ -89,15 +89,23 @@ const SubtargetSubTypeKV *getGPUInfo(const GCNSubtarget &ST,
return nullptr;
}
-constexpr unsigned FeaturesToCheck[] = {
- AMDGPU::FeatureGFX11Insts, AMDGPU::FeatureGFX10Insts,
- AMDGPU::FeatureGFX9Insts, AMDGPU::FeatureGFX8Insts,
- AMDGPU::FeatureDPP, AMDGPU::Feature16BitInsts,
- AMDGPU::FeatureDot1Insts, AMDGPU::FeatureDot2Insts,
- AMDGPU::FeatureDot3Insts, AMDGPU::FeatureDot4Insts,
- AMDGPU::FeatureDot5Insts, AMDGPU::FeatureDot6Insts,
- AMDGPU::FeatureDot7Insts, AMDGPU::FeatureDot8Insts,
-};
+constexpr unsigned FeaturesToCheck[] = {AMDGPU::FeatureGFX11Insts,
+ AMDGPU::FeatureGFX10Insts,
+ AMDGPU::FeatureGFX9Insts,
+ AMDGPU::FeatureGFX8Insts,
+ AMDGPU::FeatureDPP,
+ AMDGPU::Feature16BitInsts,
+ AMDGPU::FeatureDot1Insts,
+ AMDGPU::FeatureDot2Insts,
+ AMDGPU::FeatureDot3Insts,
+ AMDGPU::FeatureDot4Insts,
+ AMDGPU::FeatureDot5Insts,
+ AMDGPU::FeatureDot6Insts,
+ AMDGPU::FeatureDot7Insts,
+ AMDGPU::FeatureDot8Insts,
+ AMDGPU::FeatureExtendedImageInsts,
+ AMDGPU::FeatureSMemRealTime,
+ AMDGPU::FeatureSMemTimeInst};
FeatureBitset expandImpliedFeatures(const FeatureBitset &Features) {
FeatureBitset Result = Features;
@@ -120,7 +128,6 @@ void reportFunctionRemoved(Function &F, unsigned Feature) {
<< getFeatureName(Feature)
<< " is not supported on the current target";
});
- return;
}
} // end anonymous namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 804bf503e4f9..db5d2bbcf5bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -185,7 +185,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
//
// If we only have implicit uses of flat_scr on flat instructions, it is not
// really needed.
- if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
+ if (Info.UsesFlatScratch && !MFI->getUserSGPRInfo().hasFlatScratchInit() &&
(!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 2fde7afc0c14..5087f1a90245 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -43,7 +43,6 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/IR/AttributeMask.h"
@@ -331,6 +330,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
NewFunc->removeRetAttrs(RetAttrs);
// TODO: How to preserve metadata?
+ NewFunc->setIsNewDbgInfoFormat(F.IsNewDbgInfoFormat);
+
// Move the body of the function into the new rewritten function, and replace
// this function with a stub.
NewFunc->splice(NewFunc->begin(), &F);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
index 9c07851243c9..459400e3359c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
@@ -69,11 +69,11 @@ using namespace llvm;
namespace {
-class AMDGPURewriteUndefForPHI : public FunctionPass {
+class AMDGPURewriteUndefForPHILegacy : public FunctionPass {
public:
static char ID;
- AMDGPURewriteUndefForPHI() : FunctionPass(ID) {
- initializeAMDGPURewriteUndefForPHIPass(*PassRegistry::getPassRegistry());
+ AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) {
+ initializeAMDGPURewriteUndefForPHILegacyPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
StringRef getPassName() const override {
@@ -91,13 +91,13 @@ public:
};
} // end anonymous namespace
-char AMDGPURewriteUndefForPHI::ID = 0;
+char AMDGPURewriteUndefForPHILegacy::ID = 0;
-INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
"Rewrite undef for PHI", false, false)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
+INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
"Rewrite undef for PHI", false, false)
bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) {
@@ -170,13 +170,27 @@ bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) {
return Changed;
}
-bool AMDGPURewriteUndefForPHI::runOnFunction(Function &F) {
+bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) {
UniformityInfo &UA =
getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return rewritePHIs(F, UA, DT);
}
-FunctionPass *llvm::createAMDGPURewriteUndefForPHIPass() {
- return new AMDGPURewriteUndefForPHI();
+PreservedAnalyses
+AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) {
+ UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(F);
+ DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ bool Changed = rewritePHIs(F, UA, DT);
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+
+ return PreservedAnalyses::all();
+}
+
+FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() {
+ return new AMDGPURewriteUndefForPHILegacy();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 317f3f21d240..beb670669581 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -241,9 +241,13 @@ def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
+def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>;
+def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>;
+def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>;
+def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_ds_fadd>;
@@ -333,6 +337,8 @@ def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
def : SourceOfDivergence<int_amdgcn_permlane16>;
def : SourceOfDivergence<int_amdgcn_permlanex16>;
+def : SourceOfDivergence<int_amdgcn_permlane16_var>;
+def : SourceOfDivergence<int_amdgcn_permlanex16_var>;
def : SourceOfDivergence<int_amdgcn_mov_dpp>;
def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
def : SourceOfDivergence<int_amdgcn_update_dpp>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 9b50f4fa53ac..f19c57668564 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -17,6 +17,7 @@
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
#include "R600Subtarget.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -166,6 +167,10 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {}
+bool AMDGPUSubtarget::useRealTrue16Insts() const {
+ return hasTrue16BitInsts() && EnableRealTrue16Insts;
+}
+
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const GCNTargetMachine &TM)
: // clang-format off
@@ -196,14 +201,18 @@ unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
case AMDGPU::V_LSHLREV_B64_e64_gfx11:
+ case AMDGPU::V_LSHLREV_B64_e32_gfx12:
+ case AMDGPU::V_LSHLREV_B64_e64_gfx12:
case AMDGPU::V_LSHL_B64_e64:
case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
case AMDGPU::V_LSHRREV_B64_e64_gfx11:
+ case AMDGPU::V_LSHRREV_B64_e64_gfx12:
case AMDGPU::V_LSHR_B64_e64:
case AMDGPU::V_ASHRREV_I64_e64:
case AMDGPU::V_ASHRREV_I64_gfx10:
case AMDGPU::V_ASHRREV_I64_e64_gfx11:
+ case AMDGPU::V_ASHRREV_I64_e64_gfx12:
case AMDGPU::V_ASHR_I64_e64:
return 1;
}
@@ -692,7 +701,7 @@ GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratch) const {
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
- return getBaseReservedNumSGPRs(MFI.hasFlatScratchInit());
+ return getBaseReservedNumSGPRs(MFI.getUserSGPRInfo().hasFlatScratchInit());
}
unsigned GCNSubtarget::getReservedNumSGPRs(const Function &F) const {
@@ -771,24 +780,26 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
}
static unsigned getMaxNumPreloadedSGPRs() {
+ using USI = GCNUserSGPRUsageInfo;
// Max number of user SGPRs
- unsigned MaxUserSGPRs = 4 + // private segment buffer
- 2 + // Dispatch ptr
- 2 + // queue ptr
- 2 + // kernel segment ptr
- 2 + // dispatch ID
- 2 + // flat scratch init
- 2; // Implicit buffer ptr
+ const unsigned MaxUserSGPRs =
+ USI::getNumUserSGPRForField(USI::PrivateSegmentBufferID) +
+ USI::getNumUserSGPRForField(USI::DispatchPtrID) +
+ USI::getNumUserSGPRForField(USI::QueuePtrID) +
+ USI::getNumUserSGPRForField(USI::KernargSegmentPtrID) +
+ USI::getNumUserSGPRForField(USI::DispatchIdID) +
+ USI::getNumUserSGPRForField(USI::FlatScratchInitID) +
+ USI::getNumUserSGPRForField(USI::ImplicitBufferPtrID);
// Max number of system SGPRs
- unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
- 1 + // WorkGroupIDY
- 1 + // WorkGroupIDZ
- 1 + // WorkGroupInfo
- 1; // private segment wave byte offset
+ const unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
+ 1 + // WorkGroupIDY
+ 1 + // WorkGroupIDZ
+ 1 + // WorkGroupInfo
+ 1; // private segment wave byte offset
// Max number of synthetic SGPRs
- unsigned SyntheticSGPRs = 1; // LDSKernelId
+ const unsigned SyntheticSGPRs = 1; // LDSKernelId
return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;
}
@@ -994,6 +1005,9 @@ GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const {
}
unsigned GCNSubtarget::getNSAThreshold(const MachineFunction &MF) const {
+ if (getGeneration() >= AMDGPUSubtarget::GFX12)
+ return 0; // Not MIMG encoding.
+
if (NSAThreshold.getNumOccurrences() > 0)
return std::max(NSAThreshold.getValue(), 2u);
@@ -1018,3 +1032,79 @@ const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Funct
else
return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
}
+
+GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
+ const GCNSubtarget &ST)
+ : ST(ST) {
+ const CallingConv::ID CC = F.getCallingConv();
+ const bool IsKernel =
+ CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL;
+ // FIXME: Should have analysis or something rather than attribute to detect
+ // calls.
+ const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
+ // FIXME: This attribute is a hack, we just need an analysis on the function
+ // to look for allocas.
+ const bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
+
+ if (IsKernel && (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0))
+ KernargSegmentPtr = true;
+
+ bool IsAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
+ if (IsAmdHsaOrMesa && !ST.enableFlatScratch())
+ PrivateSegmentBuffer = true;
+ else if (ST.isMesaGfxShader(F))
+ ImplicitBufferPtr = true;
+
+ if (!AMDGPU::isGraphics(CC)) {
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
+ DispatchPtr = true;
+
+ // FIXME: Can this always be disabled with < COv5?
+ if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
+ QueuePtr = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
+ DispatchID = true;
+ }
+
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls or stack objects that may require it before argument
+ // lowering.
+ if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) &&
+ (IsAmdHsaOrMesa || ST.enableFlatScratch()) &&
+ (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
+ !ST.flatScratchIsArchitected()) {
+ FlatScratchInit = true;
+ }
+
+ if (hasImplicitBufferPtr())
+ NumUsedUserSGPRs += getNumUserSGPRForField(ImplicitBufferPtrID);
+
+ if (hasPrivateSegmentBuffer())
+ NumUsedUserSGPRs += getNumUserSGPRForField(PrivateSegmentBufferID);
+
+ if (hasDispatchPtr())
+ NumUsedUserSGPRs += getNumUserSGPRForField(DispatchPtrID);
+
+ if (hasQueuePtr())
+ NumUsedUserSGPRs += getNumUserSGPRForField(QueuePtrID);
+
+ if (hasKernargSegmentPtr())
+ NumUsedUserSGPRs += getNumUserSGPRForField(KernargSegmentPtrID);
+
+ if (hasDispatchID())
+ NumUsedUserSGPRs += getNumUserSGPRForField(DispatchIdID);
+
+ if (hasFlatScratchInit())
+ NumUsedUserSGPRs += getNumUserSGPRForField(FlatScratchInitID);
+}
+
+void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
+ assert(NumKernargPreloadSGPRs + NumSGPRs <= AMDGPU::getMaxNumUserSGPRs(ST));
+ NumKernargPreloadSGPRs += NumSGPRs;
+ NumUsedUserSGPRs += NumSGPRs;
+}
+
+unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() {
+ return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 10ce00fe68ca..b72697973be7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -39,7 +39,8 @@ public:
VOLCANIC_ISLANDS = 7,
GFX9 = 8,
GFX10 = 9,
- GFX11 = 10
+ GFX11 = 10,
+ GFX12 = 11,
};
private:
@@ -49,6 +50,7 @@ protected:
bool GCN3Encoding = false;
bool Has16BitInsts = false;
bool HasTrue16BitInsts = false;
+ bool EnableRealTrue16Insts = false;
bool HasMadMixInsts = false;
bool HasMadMacF32Insts = false;
bool HasDsSrc2Insts = false;
@@ -153,8 +155,17 @@ public:
return Has16BitInsts;
}
+ /// Return true if the subtarget supports True16 instructions.
bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
+ /// Return true if real (non-fake) variants of True16 instructions using
+ /// 16-bit registers should be code-generated. Fake True16 instructions are
+ /// identical to non-fake ones except that they take 32-bit registers as
+ /// operands and always use their low halves.
+ // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully
+ // supported and the support for fake True16 instructions is removed.
+ bool useRealTrue16Insts() const;
+
bool hasMadMixInsts() const {
return HasMadMixInsts;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 87ef2333e2ea..e8c04ecf39ba 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -50,6 +50,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
@@ -173,12 +174,6 @@ static VGPRRegisterRegAlloc fastRegAllocVGPR(
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
}
-static cl::opt<bool> EnableSROA(
- "amdgpu-sroa",
- cl::desc("Run SROA after promote alloca pass"),
- cl::ReallyHidden,
- cl::init(true));
-
static cl::opt<bool>
EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
cl::desc("Run early if-conversion"),
@@ -291,6 +286,12 @@ static cl::opt<bool> EnableSIModeRegisterPass(
cl::init(true),
cl::Hidden);
+// Enable GFX11.5+ s_singleuse_vdst insertion
+static cl::opt<bool>
+ EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
+ cl::desc("Enable s_singleuse_vdst insertion"),
+ cl::init(false), cl::Hidden);
+
// Enable GFX11+ s_delay_alu insertion
static cl::opt<bool>
EnableInsertDelayAlu("amdgpu-enable-delay-alu",
@@ -339,6 +340,11 @@ static cl::opt<bool> EnablePromoteKernelArguments(
cl::desc("Enable promotion of flat kernel pointer arguments to global"),
cl::Hidden, cl::init(true));
+static cl::opt<bool> EnableImageIntrinsicOptimizer(
+ "amdgpu-enable-image-intrinsic-optimizer",
+ cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),
+ cl::Hidden);
+
static cl::opt<bool> EnableMaxIlpSchedStrategy(
"amdgpu-enable-max-ilp-scheduling-strategy",
cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
@@ -346,9 +352,14 @@ static cl::opt<bool> EnableMaxIlpSchedStrategy(
static cl::opt<bool> EnableRewritePartialRegUses(
"amdgpu-enable-rewrite-partial-reg-uses",
- cl::desc("Enable rewrite partial reg uses pass"), cl::init(false),
+ cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),
cl::Hidden);
+static cl::opt<bool> EnableHipStdPar(
+ "amdgpu-enable-hipstdpar",
+ cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
+ cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
@@ -364,6 +375,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUDAGToDAGISelPass(*PR);
initializeGCNDPPCombinePass(*PR);
initializeSILowerI1CopiesPass(*PR);
+ initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
+ initializeSILowerWWMCopiesPass(*PR);
initializeSILowerSGPRSpillsPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
@@ -375,7 +388,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSILoadStoreOptimizerPass(*PR);
initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
- initializeAMDGPUAttributorPass(*PR);
+ initializeAMDGPUAttributorLegacyPass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
initializeAMDGPUArgumentUsageInfoPass(*PR);
@@ -393,11 +406,12 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPULateCodeGenPreparePass(*PR);
initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
- initializeAMDGPULowerModuleLDSPass(*PR);
+ initializeAMDGPULowerModuleLDSLegacyPass(*PR);
initializeAMDGPURewriteOutArgumentsPass(*PR);
- initializeAMDGPURewriteUndefForPHIPass(*PR);
+ initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
+ initializeAMDGPUInsertSingleUseVDSTPass(*PR);
initializeAMDGPUInsertDelayAluPass(*PR);
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
@@ -415,14 +429,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
initializeAMDGPUExternalAAWrapperPass(*PR);
- initializeAMDGPUUseNativeCallsPass(*PR);
- initializeAMDGPUSimplifyLibCallsPass(*PR);
+ initializeAMDGPUImageIntrinsicOptimizerPass(*PR);
initializeAMDGPUPrintfRuntimeBindingPass(*PR);
initializeAMDGPUResourceUsageAnalysisPass(*PR);
initializeGCNNSAReassignPass(*PR);
initializeGCNPreRAOptimizationsPass(*PR);
initializeGCNPreRALongBranchRegPass(*PR);
initializeGCNRewritePartialRegUsesPass(*PR);
+ initializeGCNRegPressurePrinterPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -441,7 +455,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createIGroupLPDAGMutation());
+ DAG->addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/false));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
return DAG;
@@ -451,7 +465,7 @@ static ScheduleDAGInstrs *
createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
- DAG->addMutation(createIGroupLPDAGMutation());
+ DAG->addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/false));
return DAG;
}
@@ -525,9 +539,10 @@ static StringRef computeDataLayout(const Triple &TT) {
// space 8) which cannot be non-trivilally accessed by LLVM memory operations
// like getelementptr.
return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
- "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:"
+ "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"
+ "v32:32-v48:64-v96:"
"128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
- "G1-ni:7:8";
+ "G1-ni:7:8:9";
}
LLVM_READNONE
@@ -553,7 +568,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
TargetOptions Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OptLevel)
+ CodeGenOptLevel OptLevel)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
FS, Options, getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
@@ -588,8 +603,8 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
/// Predicate for Internalize pass.
static bool mustPreserveGV(const GlobalValue &GV) {
if (const Function *F = dyn_cast<Function>(&GV))
- return F->isDeclaration() || F->getName().startswith("__asan_") ||
- F->getName().startswith("__sanitizer_") ||
+ return F->isDeclaration() || F->getName().starts_with("__asan_") ||
+ F->getName().starts_with("__sanitizer_") ||
AMDGPU::isEntryFunctionCC(F->getCallingConv());
GV.removeDeadConstantUsers();
@@ -602,8 +617,12 @@ void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineParsingCallback(
- [](StringRef PassName, ModulePassManager &PM,
- ArrayRef<PassBuilder::PipelineElement>) {
+ [this](StringRef PassName, ModulePassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+ if (PassName == "amdgpu-attributor") {
+ PM.addPass(AMDGPUAttributorPass(*this));
+ return true;
+ }
if (PassName == "amdgpu-unify-metadata") {
PM.addPass(AMDGPUUnifyMetadataPass());
return true;
@@ -617,7 +636,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
return true;
}
if (PassName == "amdgpu-lower-module-lds") {
- PM.addPass(AMDGPULowerModuleLDSPass());
+ PM.addPass(AMDGPULowerModuleLDSPass(*this));
return true;
}
if (PassName == "amdgpu-lower-ctor-dtor") {
@@ -630,7 +649,11 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
[this](StringRef PassName, FunctionPassManager &PM,
ArrayRef<PassBuilder::PipelineElement>) {
if (PassName == "amdgpu-simplifylib") {
- PM.addPass(AMDGPUSimplifyLibCallsPass(*this));
+ PM.addPass(AMDGPUSimplifyLibCallsPass());
+ return true;
+ }
+ if (PassName == "amdgpu-image-intrinsic-opt") {
+ PM.addPass(AMDGPUImageIntrinsicOptimizerPass(*this));
return true;
}
if (PassName == "amdgpu-usenative") {
@@ -666,6 +689,14 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUCodeGenPreparePass(*this));
return true;
}
+ if (PassName == "amdgpu-lower-kernel-arguments") {
+ PM.addPass(AMDGPULowerKernelArgumentsPass(*this));
+ return true;
+ }
+ if (PassName == "amdgpu-rewrite-undef-for-phi") {
+ PM.addPass(AMDGPURewriteUndefForPHIPass());
+ return true;
+ }
return false;
});
@@ -682,12 +713,14 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerPipelineStartEPCallback(
- [this](ModulePassManager &PM, OptimizationLevel Level) {
+ [](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(AMDGPUUseNativeCallsPass());
if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
- FPM.addPass(AMDGPUSimplifyLibCallsPass(*this));
+ FPM.addPass(AMDGPUSimplifyLibCallsPass());
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ if (EnableHipStdPar)
+ PM.addPass(HipStdParAcceleratorCodeSelectionPass());
});
PB.registerPipelineEarlySimplificationEPCallback(
@@ -826,7 +859,7 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
TargetOptions Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
const TargetSubtargetInfo *
@@ -894,8 +927,8 @@ public:
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
- DAG->addMutation(createIGroupLPDAGMutation());
- if (isPassEnabled(EnableVOPD, CodeGenOpt::Less))
+ DAG->addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/true));
+ if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
DAG->addMutation(createVOPDPairingMutation());
return DAG;
}
@@ -942,7 +975,7 @@ AMDGPUPassConfig::AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
}
void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
- if (getOptLevel() == CodeGenOpt::Aggressive)
+ if (getOptLevel() == CodeGenOptLevel::Aggressive)
addPass(createGVNPass());
else
addPass(createEarlyCSEPass());
@@ -966,6 +999,10 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
void AMDGPUPassConfig::addIRPasses() {
const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
+ Triple::ArchType Arch = TM.getTargetTriple().getArch();
+ if (RemoveIncompatibleFunctions && Arch == Triple::amdgcn)
+ addPass(createAMDGPURemoveIncompatibleFunctionsPass(&TM));
+
// There is no reason to run these.
disablePass(&StackMapLivenessID);
disablePass(&FuncletLayoutID);
@@ -975,12 +1012,15 @@ void AMDGPUPassConfig::addIRPasses() {
if (LowerCtorDtor)
addPass(createAMDGPUCtorDtorLoweringLegacyPass());
+ if (isPassEnabled(EnableImageIntrinsicOptimizer))
+ addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
+
// Function calls are not supported, so make sure we inline everything.
addPass(createAMDGPUAlwaysInlinePass());
addPass(createAlwaysInlinerLegacyPass());
// Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
- if (TM.getTargetTriple().getArch() == Triple::r600)
+ if (Arch == Triple::r600)
addPass(createR600OpenCLImageTypeLoweringPass());
// Replace OpenCL enqueued block function pointers with global variables.
@@ -988,24 +1028,29 @@ void AMDGPUPassConfig::addIRPasses() {
// Runs before PromoteAlloca so the latter can account for function uses
if (EnableLowerModuleLDS) {
- addPass(createAMDGPULowerModuleLDSPass());
+ addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));
}
// AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run
// after their introduction
- if (TM.getOptLevel() > CodeGenOpt::None)
- addPass(createAMDGPUAttributorPass());
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(createAMDGPUAttributorLegacyPass());
- if (TM.getOptLevel() > CodeGenOpt::None)
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(createInferAddressSpacesPass());
+ // Run atomic optimizer before Atomic Expand
+ if ((TM.getTargetTriple().getArch() == Triple::amdgcn) &&
+ (TM.getOptLevel() >= CodeGenOptLevel::Less) &&
+ (AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) {
+ addPass(createAMDGPUAtomicOptimizerPass(AMDGPUAtomicOptimizerStrategy));
+ }
+
addPass(createAtomicExpandPass());
- if (TM.getOptLevel() > CodeGenOpt::None) {
+ if (TM.getOptLevel() > CodeGenOptLevel::None) {
addPass(createAMDGPUPromoteAlloca());
- if (EnableSROA)
- addPass(createSROAPass());
if (isPassEnabled(EnableScalarIRPasses))
addStraightLineScalarOptimizationPasses();
@@ -1025,7 +1070,7 @@ void AMDGPUPassConfig::addIRPasses() {
// Try to hoist loop invariant parts of divisions AMDGPUCodeGenPrepare may
// have expanded.
- if (TM.getOptLevel() > CodeGenOpt::Less)
+ if (TM.getOptLevel() > CodeGenOptLevel::Less)
addPass(createLICMPass());
}
@@ -1049,9 +1094,6 @@ void AMDGPUPassConfig::addIRPasses() {
void AMDGPUPassConfig::addCodeGenPrepare() {
if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
- if (RemoveIncompatibleFunctions)
- addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM));
-
// FIXME: This pass adds 2 hacky attributes that can be replaced with an
// analysis, and should be removed.
addPass(createAMDGPUAnnotateKernelFeaturesPass());
@@ -1074,7 +1116,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
}
bool AMDGPUPassConfig::addPreISel() {
- if (TM->getOptLevel() > CodeGenOpt::None)
+ if (TM->getOptLevel() > CodeGenOptLevel::None)
addPass(createFlattenCFGPass());
return false;
}
@@ -1125,15 +1167,10 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
- if (TM->getOptLevel() > CodeGenOpt::None)
+ if (TM->getOptLevel() > CodeGenOptLevel::None)
addPass(createAMDGPULateCodeGenPreparePass());
- if ((TM->getOptLevel() >= CodeGenOpt::Less) &&
- (AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) {
- addPass(createAMDGPUAtomicOptimizerPass(AMDGPUAtomicOptimizerStrategy));
- }
-
- if (TM->getOptLevel() > CodeGenOpt::None)
+ if (TM->getOptLevel() > CodeGenOptLevel::None)
addPass(createSinkingPass());
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
@@ -1152,11 +1189,11 @@ bool GCNPassConfig::addPreISel() {
// TODO: Move this right after structurizeCFG to avoid extra divergence
// analysis. This depends on stopping SIAnnotateControlFlow from making
// control flow modifications.
- addPass(createAMDGPURewriteUndefForPHIPass());
+ addPass(createAMDGPURewriteUndefForPHILegacyPass());
}
addPass(createLCSSAPass());
- if (TM->getOptLevel() > CodeGenOpt::Less)
+ if (TM->getOptLevel() > CodeGenOptLevel::Less)
addPass(&AMDGPUPerfHintAnalysisID);
return false;
@@ -1207,7 +1244,7 @@ bool GCNPassConfig::addIRTranslator() {
}
void GCNPassConfig::addPreLegalizeMachineIR() {
- bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ bool IsOptNone = getOptLevel() == CodeGenOptLevel::None;
addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
addPass(new Localizer());
}
@@ -1218,8 +1255,9 @@ bool GCNPassConfig::addLegalizeMachineIR() {
}
void GCNPassConfig::addPreRegBankSelect() {
- bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ bool IsOptNone = getOptLevel() == CodeGenOptLevel::None;
addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
+ addPass(createAMDGPUGlobalISelDivergenceLoweringPass());
}
bool GCNPassConfig::addRegBankSelect() {
@@ -1228,7 +1266,7 @@ bool GCNPassConfig::addRegBankSelect() {
}
void GCNPassConfig::addPreGlobalInstructionSelect() {
- bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ bool IsOptNone = getOptLevel() == CodeGenOptLevel::None;
addPass(createAMDGPURegBankCombiner(IsOptNone));
}
@@ -1253,7 +1291,6 @@ void GCNPassConfig::addFastRegAlloc() {
insertPass(&PHIEliminationID, &SILowerControlFlowID);
insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
- insertPass(&TwoAddressInstructionPassID, &SIPreAllocateWWMRegsID);
TargetPassConfig::addFastRegAlloc();
}
@@ -1262,7 +1299,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
// Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
// instructions that cause scheduling barriers.
insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
- insertPass(&MachineSchedulerID, &SIPreAllocateWWMRegsID);
if (OptExecMaskPreRA)
insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
@@ -1275,7 +1311,7 @@ void GCNPassConfig::addOptimizedRegAlloc() {
// This is not an essential optimization and it has a noticeable impact on
// compilation time, so we only enable it from O2.
- if (TM->getOptLevel() > CodeGenOpt::Less)
+ if (TM->getOptLevel() > CodeGenOptLevel::Less)
insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
// FIXME: when an instruction has a Killed operand, and the instruction is
@@ -1296,6 +1332,7 @@ void GCNPassConfig::addOptimizedRegAlloc() {
}
bool GCNPassConfig::addPreRewrite() {
+ addPass(&SILowerWWMCopiesID);
if (EnableRegReassign)
addPass(&GCNNSAReassignID);
return true;
@@ -1348,8 +1385,11 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
// Equivalent of PEI for SGPRs.
addPass(&SILowerSGPRSpillsID);
+ addPass(&SIPreAllocateWWMRegsID);
addPass(createVGPRAllocPass(false));
+
+ addPass(&SILowerWWMCopiesID);
return true;
}
@@ -1369,6 +1409,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
// Equivalent of PEI for SGPRs.
addPass(&SILowerSGPRSpillsID);
+ addPass(&SIPreAllocateWWMRegsID);
addPass(createVGPRAllocPass(true));
@@ -1380,32 +1421,32 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
void GCNPassConfig::addPostRegAlloc() {
addPass(&SIFixVGPRCopiesID);
- if (getOptLevel() > CodeGenOpt::None)
+ if (getOptLevel() > CodeGenOptLevel::None)
addPass(&SIOptimizeExecMaskingID);
TargetPassConfig::addPostRegAlloc();
}
void GCNPassConfig::addPreSched2() {
- if (TM->getOptLevel() > CodeGenOpt::None)
+ if (TM->getOptLevel() > CodeGenOptLevel::None)
addPass(createSIShrinkInstructionsPass());
addPass(&SIPostRABundlerID);
}
void GCNPassConfig::addPreEmitPass() {
- if (isPassEnabled(EnableVOPD, CodeGenOpt::Less))
+ if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
addPass(&GCNCreateVOPDID);
addPass(createSIMemoryLegalizerPass());
addPass(createSIInsertWaitcntsPass());
addPass(createSIModeRegisterPass());
- if (getOptLevel() > CodeGenOpt::None)
+ if (getOptLevel() > CodeGenOptLevel::None)
addPass(&SIInsertHardClausesID);
addPass(&SILateBranchLoweringPassID);
- if (isPassEnabled(EnableSetWavePriority, CodeGenOpt::Less))
+ if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
addPass(createAMDGPUSetWavePriorityPass());
- if (getOptLevel() > CodeGenOpt::None)
+ if (getOptLevel() > CodeGenOptLevel::None)
addPass(&SIPreEmitPeepholeID);
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
@@ -1417,7 +1458,10 @@ void GCNPassConfig::addPreEmitPass() {
// cases.
addPass(&PostRAHazardRecognizerID);
- if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less))
+ if (isPassEnabled(EnableInsertSingleUseVDST, CodeGenOptLevel::Less))
+ addPass(&AMDGPUInsertSingleUseVDSTID);
+
+ if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
addPass(&AMDGPUInsertDelayAluID);
addPass(&BranchRelaxationPassID);
@@ -1458,13 +1502,13 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
static_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
MachineFunction &MF = PFS.MF;
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
return true;
if (MFI->Occupancy == 0) {
// Fixup the subtarget dependent default value.
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
}
@@ -1618,8 +1662,10 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->ArgInfo.WorkItemIDZ, 0, 0)))
return true;
- MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
- MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
+ if (ST.hasIEEEMode())
+ MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
+ if (ST.hasDX10ClampMode())
+ MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
// FIXME: Move proper support for denormal-fp-math into base MachineFunction
MFI->Mode.FP32Denormals.Input = YamlMFI.Mode.FP32InputDenormals
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 2426be405a65..9051a61e6557 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -41,7 +41,7 @@ public:
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, TargetOptions Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL);
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL);
~AMDGPUTargetMachine() override;
const TargetSubtargetInfo *getSubtargetImpl() const;
@@ -79,7 +79,7 @@ public:
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, TargetOptions Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
@@ -137,7 +137,7 @@ public:
/// be used given that a pass shall work at an optimization \p Level
/// minimum.
bool isPassEnabled(const cl::opt<bool> &Opt,
- CodeGenOpt::Level Level = CodeGenOpt::Default) const {
+ CodeGenOptLevel Level = CodeGenOptLevel::Default) const {
if (Opt.getNumOccurrences())
return Opt;
if (TM->getOptLevel() < Level)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index f854c8c16e5a..584e41bfd546 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -30,7 +30,7 @@ MCSection *AMDGPUTargetObjectFile::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
// Set metadata access for the explicit section
StringRef SectionName = GO->getSection();
- if (SectionName.startswith(".AMDGPU.comment."))
+ if (SectionName.starts_with(".AMDGPU.comment."))
SK = SectionKind::getMetadata();
return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 81d083c1c88a..f1da1a61bf4d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -296,7 +296,7 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()), CommonTTI(TM, F),
IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) {
- SIModeRegisterDefaults Mode(F);
+ SIModeRegisterDefaults Mode(F, *ST);
HasFP32Denormals = Mode.FP32Denormals != DenormalMode::getPreserveSign();
HasFP64FP16Denormals =
Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
@@ -368,7 +368,8 @@ unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER ||
- AddrSpace == AMDGPUAS::BUFFER_RESOURCE) {
+ AddrSpace == AMDGPUAS::BUFFER_RESOURCE ||
+ AddrSpace == AMDGPUAS::BUFFER_STRIDED_POINTER) {
return 512;
}
@@ -650,6 +651,15 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
return LT.first * Cost * NElts;
}
+ if (SLT == MVT::f32 && ((CxtI && CxtI->hasApproxFunc()) ||
+ TLI->getTargetMachine().Options.UnsafeFPMath)) {
+ // Fast unsafe fdiv lowering:
+ // f32 rcp
+ // f32 fmul
+ int Cost = getQuarterRateInstrCost(CostKind) + getFullRateInstrCost();
+ return LT.first * Cost * NElts;
+ }
+
if (SLT == MVT::f32 || SLT == MVT::f16) {
// 4 more v_cvt_* insts without f16 insts support
int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() +
@@ -883,7 +893,7 @@ bool GCNTTIImpl::isReadRegisterSourceOfDivergence(
return true;
// Special case scalar registers that start with 'v'.
- if (RegName.startswith("vcc") || RegName.empty())
+ if (RegName.starts_with("vcc") || RegName.empty())
return false;
// VGPR or AGPR is divergent. There aren't any specially named vector
@@ -1017,6 +1027,8 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmin:
+ case Intrinsic::amdgcn_flat_atomic_fmax_num:
+ case Intrinsic::amdgcn_flat_atomic_fmin_num:
OpIndexes.push_back(0);
return true;
default:
@@ -1091,7 +1103,9 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
}
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmax:
- case Intrinsic::amdgcn_flat_atomic_fmin: {
+ case Intrinsic::amdgcn_flat_atomic_fmin:
+ case Intrinsic::amdgcn_flat_atomic_fmax_num:
+ case Intrinsic::amdgcn_flat_atomic_fmin_num: {
Type *DestTy = II->getType();
Type *SrcTy = NewV->getType();
unsigned NewAS = SrcTy->getPointerAddressSpace();
@@ -1114,7 +1128,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
- Kind = improveShuffleKindFromMask(Kind, Mask);
+ Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp);
+
if (ST->hasVOP3PInsts()) {
if (cast<FixedVectorType>(VT)->getNumElements() == 2 &&
DL.getTypeSizeInBits(VT->getElementType()) == 16) {
@@ -1153,8 +1168,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
// FIXME: dx10_clamp can just take the caller setting, but there seems to be
// no way to support merge for backend defined attributes.
- SIModeRegisterDefaults CallerMode(*Caller);
- SIModeRegisterDefaults CalleeMode(*Callee);
+ SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
+ SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
if (!CallerMode.isInlineCompatible(CalleeMode))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 9ad841c3c8a5..9bc3ba161c9e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -46,6 +46,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -114,8 +115,6 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const {
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
- // This is a cluster of orthogonal Transforms
- AU.addPreservedID(LowerSwitchID);
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<TargetTransformInfoWrapperPass>();
@@ -192,6 +191,8 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
const PostDominatorTree &PDT,
const UniformityInfo &UA) {
+ assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
+
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b9443559132f..3b69a37728ea 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -75,6 +75,7 @@ public:
bool Abs = false;
bool Neg = false;
bool Sext = false;
+ bool Lit = false;
bool hasFPModifiers() const { return Abs || Neg; }
bool hasIntModifiers() const { return Sext; }
@@ -273,6 +274,10 @@ public:
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
+ bool isRegOrImmWithIntT16InputMods() const {
+ return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
+ }
+
bool isRegOrImmWithInt32InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
@@ -293,6 +298,10 @@ public:
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
+ bool isRegOrImmWithFPT16InputMods() const {
+ return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
+ }
+
bool isRegOrImmWithFP32InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
@@ -347,29 +356,24 @@ public:
return isImm() && Imm.Type == ImmT;
}
+ template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
+
bool isImmLiteral() const { return isImmTy(ImmTyNone); }
bool isImmModifier() const {
return isImm() && Imm.Type != ImmTyNone;
}
- bool isClampSI() const { return isImmTy(ImmTyClampSI); }
bool isOModSI() const { return isImmTy(ImmTyOModSI); }
bool isDMask() const { return isImmTy(ImmTyDMask); }
bool isDim() const { return isImmTy(ImmTyDim); }
- bool isUNorm() const { return isImmTy(ImmTyUNorm); }
- bool isDA() const { return isImmTy(ImmTyDA); }
bool isR128A16() const { return isImmTy(ImmTyR128A16); }
- bool isA16() const { return isImmTy(ImmTyA16); }
- bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
- bool isExpVM() const { return isImmTy(ImmTyExpVM); }
- bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
bool isOffen() const { return isImmTy(ImmTyOffen); }
bool isIdxen() const { return isImmTy(ImmTyIdxen); }
bool isAddr64() const { return isImmTy(ImmTyAddr64); }
- bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
+ bool isOffset() const { return isImmTy(ImmTyOffset); }
bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
@@ -378,7 +382,6 @@ public:
bool isLDS() const { return isImmTy(ImmTyLDS); }
bool isCPol() const { return isImmTy(ImmTyCPol); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
- bool isD16() const { return isImmTy(ImmTyD16); }
bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
@@ -395,7 +398,6 @@ public:
bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
bool isNegLo() const { return isImmTy(ImmTyNegLo); }
bool isNegHi() const { return isImmTy(ImmTyNegHi); }
- bool isHigh() const { return isImmTy(ImmTyHigh); }
bool isRegOrImm() const {
return isReg() || isImm();
@@ -512,7 +514,15 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
+ bool isVCSrcTB16() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
+ }
+
bool isVCSrcTB16_Lo128() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
+ }
+
+ bool isVCSrcFake16B16_Lo128() const {
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
}
@@ -532,7 +542,15 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
}
+ bool isVCSrcTF16() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
+ }
+
bool isVCSrcTF16_Lo128() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
+ }
+
+ bool isVCSrcFake16F16_Lo128() const {
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
}
@@ -552,10 +570,16 @@ public:
return isVCSrcF64() || isLiteralImm(MVT::i64);
}
+ bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
+
bool isVSrcTB16_Lo128() const {
return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
}
+ bool isVSrcFake16B16_Lo128() const {
+ return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
+ }
+
bool isVSrcB16() const {
return isVCSrcB16() || isLiteralImm(MVT::i16);
}
@@ -588,10 +612,16 @@ public:
return isVCSrcF64() || isLiteralImm(MVT::f64);
}
+ bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
+
bool isVSrcTF16_Lo128() const {
return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
}
+ bool isVSrcFake16F16_Lo128() const {
+ return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
+ }
+
bool isVSrcF16() const {
return isVCSrcF16() || isLiteralImm(MVT::f16);
}
@@ -863,6 +893,7 @@ public:
bool isSDelayALU() const;
bool isHwreg() const;
bool isSendMsg() const;
+ bool isSplitBarrier() const;
bool isSwizzle() const;
bool isSMRDOffset8() const;
bool isSMEMOffset() const;
@@ -879,6 +910,10 @@ public:
bool isWaitVDST() const;
bool isWaitEXP() const;
+ auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
+ return std::bind(P, *this);
+ }
+
StringRef getToken() const {
assert(isToken());
return StringRef(Tok.Data, Tok.Length);
@@ -1344,7 +1379,7 @@ public:
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
- if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
@@ -1361,7 +1396,7 @@ public:
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
- if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
@@ -1381,6 +1416,8 @@ public:
bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
+ bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
+
bool isSI() const {
return AMDGPU::isSI(getSTI());
}
@@ -1424,6 +1461,10 @@ public:
return AMDGPU::isGFX11Plus(getSTI());
}
+ bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
+
+ bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
+
bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
bool isGFX10_BEncoding() const {
@@ -1456,10 +1497,16 @@ public:
return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
}
- unsigned getNSAMaxSize() const {
- return AMDGPU::getNSAMaxSize(getSTI());
+ unsigned getNSAMaxSize(bool HasSampler = false) const {
+ return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
+ }
+
+ unsigned getMaxNumUserSGPRs() const {
+ return AMDGPU::getMaxNumUserSGPRs(getSTI());
}
+ bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
+
AMDGPUTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AMDGPUTargetStreamer &>(TS);
@@ -1493,10 +1540,9 @@ public:
std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
bool RestoreOnFailure);
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
@@ -1531,6 +1577,8 @@ public:
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
ParseStatus parseCPol(OperandVector &Operands);
+ ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
+ ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
SMLoc &StringLoc);
@@ -1540,9 +1588,11 @@ public:
bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
bool parseSP3NegModifier();
- ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
+ ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
+ bool HasLit = false);
ParseStatus parseReg(OperandVector &Operands);
- ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
+ ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
+ bool HasLit = false);
ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
bool AllowImm = true);
ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
@@ -1616,6 +1666,7 @@ private:
SMLoc getInstLoc(const OperandVector &Operands) const;
bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
+ bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSOPLiteral(const MCInst &Inst) const;
@@ -1640,11 +1691,14 @@ private:
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
+ bool validateDS(const MCInst &Inst, const OperandVector &Operands);
bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
bool validateDivScale(const MCInst &Inst);
bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
const SMLoc &IDLoc);
+ bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
+ const unsigned CPol);
bool validateExeczVcczOperands(const OperandVector &Operands);
bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
@@ -1733,7 +1787,6 @@ public:
void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
- void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
bool parseDimId(unsigned &Encoding);
ParseStatus parseDim(OperandVector &Operands);
@@ -1805,6 +1858,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
return &APFloat::IEEEsingle();
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -1987,7 +2041,7 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
// GFX90A allows DPP on 64-bit operands.
(isRegClass(AMDGPU::VReg_64RegClassID) &&
- AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
+ AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
}
bool AMDGPUOperand::isT16VRegWithInputMods() const {
@@ -2096,9 +2150,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
"Can't encode literal as exact 64-bit floating-point operand. "
"Low 32-bits will be set to zero");
+ Val &= 0xffffffff00000000u;
}
- Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
+ Inst.addOperand(MCOperand::createImm(Val));
setImmKindLiteral();
return;
}
@@ -2133,7 +2188,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_KIMM32:
- case AMDGPU::OPERAND_KIMM16: {
+ case AMDGPU::OPERAND_KIMM16:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
@@ -2174,6 +2230,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
if (isSafeTruncation(Val, 32) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -2197,7 +2254,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;
}
- Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
+ Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
+ : Lo_32(Val);
+
+ Inst.addOperand(MCOperand::createImm(Val));
setImmKindLiteral();
return;
@@ -2424,23 +2484,21 @@ bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
return false;
}
-bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
+ return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
}
-OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
- bool Result =
- ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
+ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
bool PendingErrors = getParser().hasPendingError();
getParser().clearPendingErrors();
if (PendingErrors)
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (Result)
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
}
bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
@@ -2517,7 +2575,7 @@ static bool isRegularReg(RegisterKind Kind) {
static const RegInfo* getRegularRegInfo(StringRef Str) {
for (const RegInfo &Reg : RegularRegisters)
- if (Str.startswith(Reg.Name))
+ if (Str.starts_with(Reg.Name))
return &Reg;
return nullptr;
}
@@ -2577,7 +2635,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
// SGPR and TTMP registers must be aligned.
// Max required alignment is 4 dwords.
- AlignSize = std::min(RegWidth / 32, 4u);
+ AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
}
if (RegNum % AlignSize != 0) {
@@ -2855,7 +2913,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
return nullptr;
}
- if (isHsaAbiVersion3AndAbove(&getSTI())) {
+ if (isHsaAbi(getSTI())) {
if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
@@ -2864,13 +2922,26 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
}
ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
- bool HasSP3AbsModifier) {
+ bool HasSP3AbsModifier, bool HasLit) {
// TODO: add syntactic sugar for 1/(2*PI)
if (isRegister())
return ParseStatus::NoMatch;
assert(!isModifier());
+ if (!HasLit) {
+ HasLit = trySkipId("lit");
+ if (HasLit) {
+ if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
+ return ParseStatus::Failure;
+ ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
+ if (S.isSuccess() &&
+ !skipToken(AsmToken::RParen, "expected closing parentheses"))
+ return ParseStatus::Failure;
+ return S;
+ }
+ }
+
const auto& Tok = getToken();
const auto& NextTok = peekToken();
bool IsReal = Tok.is(AsmToken::Real);
@@ -2883,6 +2954,9 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
Negate = true;
}
+ AMDGPUOperand::Modifiers Mods;
+ Mods.Lit = HasLit;
+
if (IsReal) {
// Floating-point expressions are not supported.
// Can only allow floating-point literals with an
@@ -2901,6 +2975,8 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
Operands.push_back(
AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
AMDGPUOperand::ImmTyNone, true));
+ AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+ Op.setModifiers(Mods);
return ParseStatus::Success;
@@ -2927,7 +3003,11 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
if (Expr->evaluateAsAbsolute(IntVal)) {
Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
+ AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+ Op.setModifiers(Mods);
} else {
+ if (HasLit)
+ return ParseStatus::NoMatch;
Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
}
@@ -2950,13 +3030,13 @@ ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
}
ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
- bool HasSP3AbsMod) {
+ bool HasSP3AbsMod, bool HasLit) {
ParseStatus Res = parseReg(Operands);
if (!Res.isNoMatch())
return Res;
if (isModifier())
return ParseStatus::NoMatch;
- return parseImm(Operands, HasSP3AbsMod);
+ return parseImm(Operands, HasSP3AbsMod, HasLit);
}
bool
@@ -3052,6 +3132,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
bool AllowImm) {
bool Neg, SP3Neg;
bool Abs, SP3Abs;
+ bool Lit;
SMLoc Loc;
// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
@@ -3071,6 +3152,10 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
return ParseStatus::Failure;
+ Lit = trySkipId("lit");
+ if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
+ return ParseStatus::Failure;
+
Loc = getLoc();
SP3Abs = trySkipToken(AsmToken::Pipe);
if (Abs && SP3Abs)
@@ -3078,12 +3163,15 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
ParseStatus Res;
if (AllowImm) {
- Res = parseRegOrImm(Operands, SP3Abs);
+ Res = parseRegOrImm(Operands, SP3Abs, Lit);
} else {
Res = parseReg(Operands);
}
if (!Res.isSuccess())
- return (SP3Neg || Neg || SP3Abs || Abs) ? ParseStatus::Failure : Res;
+ return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
+
+ if (Lit && !Operands.back()->isImm())
+ Error(Loc, "expected immediate with lit modifier");
if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
return ParseStatus::Failure;
@@ -3091,12 +3179,15 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
return ParseStatus::Failure;
if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
return ParseStatus::Failure;
+ if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
+ return ParseStatus::Failure;
AMDGPUOperand::Modifiers Mods;
Mods.Abs = Abs || SP3Abs;
Mods.Neg = Neg || SP3Neg;
+ Mods.Lit = Lit;
- if (Mods.hasFPModifiers()) {
+ if (Mods.hasFPModifiers() || Lit) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
if (Op.isExpr())
return Error(Op.getStartLoc(), "expected an absolute expression");
@@ -3325,12 +3416,16 @@ unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
case AMDGPU::V_LSHLREV_B64_e64_gfx11:
+ case AMDGPU::V_LSHLREV_B64_e32_gfx12:
+ case AMDGPU::V_LSHLREV_B64_e64_gfx12:
case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
case AMDGPU::V_LSHRREV_B64_e64_gfx11:
+ case AMDGPU::V_LSHRREV_B64_e64_gfx12:
case AMDGPU::V_ASHRREV_I64_e64:
case AMDGPU::V_ASHRREV_I64_gfx10:
case AMDGPU::V_ASHRREV_I64_e64_gfx11:
+ case AMDGPU::V_ASHRREV_I64_e64_gfx12:
case AMDGPU::V_LSHL_B64_e64:
case AMDGPU::V_LSHR_B64_e64:
case AMDGPU::V_ASHR_I64_e64:
@@ -3485,8 +3580,12 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
: MCRegister::NoRegister;
};
+ // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
+
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
- auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
+ auto InvalidCompOprIdx =
+ InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
if (!InvalidCompOprIdx)
return true;
@@ -3522,13 +3621,16 @@ bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
return true;
}
+constexpr uint64_t MIMGFlags =
+ SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
+
bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
const SMLoc &IDLoc) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
@@ -3574,7 +3676,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
+ if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
return true;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
@@ -3582,7 +3684,9 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
- int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
+ : AMDGPU::OpName::rsrc;
+ int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
@@ -3590,7 +3694,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
assert(SrsrcIdx != -1);
assert(SrsrcIdx > VAddr0Idx);
- bool IsA16 = Inst.getOperand(A16Idx).getImm();
+ bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
if (BaseOpcode->BVH) {
if (IsA16 == BaseOpcode->A16)
return true;
@@ -3609,7 +3713,9 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
if (IsNSA) {
- if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
+ if (hasPartialNSAEncoding() &&
+ ExpectedAddrSize >
+ getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
int VAddrLastIdx = SrsrcIdx - 1;
unsigned VAddrLastSize =
AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
@@ -3639,7 +3745,7 @@ bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
if (!Desc.mayLoad() || !Desc.mayStore())
return true; // Not atomic
@@ -3677,7 +3783,7 @@ bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
@@ -3854,7 +3960,7 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
@@ -4038,6 +4144,40 @@ SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
return getLoc();
}
+bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
+ const OperandVector &Operands) {
+ auto Opcode = Inst.getOpcode();
+ auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
+ if (OpNum == -1)
+ return true;
+
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & SIInstrFlags::FLAT))
+ return validateFlatOffset(Inst, Operands);
+
+ if ((TSFlags & SIInstrFlags::SMRD))
+ return validateSMEMOffset(Inst, Operands);
+
+ const auto &Op = Inst.getOperand(OpNum);
+ if (isGFX12Plus() &&
+ (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
+ const unsigned OffsetSize = 24;
+ if (!isIntN(OffsetSize, Op.getImm())) {
+ Error(getFlatOffsetLoc(Operands),
+ Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
+ return false;
+ }
+ } else {
+ const unsigned OffsetSize = 16;
+ if (!isUIntN(OffsetSize, Op.getImm())) {
+ Error(getFlatOffsetLoc(Operands),
+ Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
+ return false;
+ }
+ }
+ return true;
+}
+
bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
const OperandVector &Operands) {
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
@@ -4055,11 +4195,12 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
return false;
}
- // For FLAT segment the offset must be positive;
+ // For pre-GFX12 FLAT instructions the offset must be positive;
// MSB is ignored and forced to zero.
unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
bool AllowNegative =
- TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
+ (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
+ isGFX12Plus();
if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
Error(getFlatOffsetLoc(Operands),
Twine("expected a ") +
@@ -4106,8 +4247,9 @@ bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
return true;
Error(getSMEMOffsetLoc(Operands),
- (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
- "expected a 21-bit signed offset");
+ isGFX12Plus() ? "expected a 24-bit signed offset"
+ : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
+ : "expected a 21-bit signed offset");
return false;
}
@@ -4189,21 +4331,35 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
const OperandVector &Operands) {
const unsigned Opc = Inst.getOpcode();
int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
- if (DppCtrlIdx < 0)
- return true;
- unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
+ if (DppCtrlIdx >= 0) {
+ unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
- if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
- // DPP64 is supported for row_newbcast only.
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- if (Src0Idx >= 0 &&
- getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
+ if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
+ AMDGPU::isDPALU_DPP(MII.get(Opc))) {
+ // DP ALU DPP is supported for row_newbcast only on GFX9*
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
- Error(S, "64 bit dpp only supports row_newbcast");
+ Error(S, "DP ALU dpp only supports row_newbcast");
return false;
}
}
+ int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
+ bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
+
+ if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ if (Src1Idx >= 0) {
+ const MCOperand &Src1 = Inst.getOperand(Src1Idx);
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ if (Src1.isImm() ||
+ (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
+ Error(Op.getStartLoc(), "invalid operand for instruction");
+ return false;
+ }
+ }
+ }
+
return true;
}
@@ -4241,7 +4397,19 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
continue;
if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
- uint32_t Value = static_cast<uint32_t>(MO.getImm());
+ uint64_t Value = static_cast<uint64_t>(MO.getImm());
+ bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
+ AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
+ bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
+
+ if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
+ Error(getLitLoc(Operands), "invalid operand for instruction");
+ return false;
+ }
+
+ if (IsFP64 && IsValid32Op)
+ Value = Hi_32(Value);
+
if (NumLiterals == 0 || LiteralValue != Value) {
LiteralValue = Value;
++NumLiterals;
@@ -4359,7 +4527,7 @@ bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
SMLoc BLGPLoc = getBLGPLoc(Operands);
if (!BLGPLoc.isValid())
return true;
- bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
+ bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
auto FB = getFeatureBits();
bool UsesNeg = false;
if (FB[AMDGPU::FeatureGFX940Insts]) {
@@ -4405,6 +4573,29 @@ bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
return false;
}
+bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
+ const OperandVector &Operands) {
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & SIInstrFlags::DS) == 0)
+ return true;
+ if (TSFlags & SIInstrFlags::GWS)
+ return validateGWS(Inst, Operands);
+ // Only validate GDS for non-GWS instructions.
+ if (hasGDS())
+ return true;
+ int GDSIdx =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
+ if (GDSIdx < 0)
+ return true;
+ unsigned GDS = Inst.getOperand(GDSIdx).getImm();
+ if (GDS) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
+ Error(S, "gds modifier is not supported on this GPU");
+ return false;
+ }
+ return true;
+}
+
// gfx90a has an undocumented limitation:
// DS_GWS opcodes must use even aligned registers.
bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
@@ -4443,6 +4634,9 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
unsigned CPol = Inst.getOperand(CPolPos).getImm();
+ if (isGFX12Plus())
+ return validateTHAndScopeBits(Inst, Operands, CPol);
+
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if (TSFlags & SIInstrFlags::SMRD) {
if (CPol && (isSI() || isCI())) {
@@ -4457,11 +4651,17 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
}
if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
- SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
- StringRef CStr(S.getPointer());
- S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
- Error(S, "scc is not supported on this GPU");
- return false;
+ const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
+ SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
+ SIInstrFlags::FLAT;
+ if (!(TSFlags & AllowSCCModifier)) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ StringRef CStr(S.getPointer());
+ S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
+ Error(S,
+ "scc modifier is not supported for this instruction on this GPU");
+ return false;
+ }
}
if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
@@ -4488,6 +4688,60 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
return true;
}
+bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
+ const OperandVector &Operands,
+ const unsigned CPol) {
+ const unsigned TH = CPol & AMDGPU::CPol::TH;
+ const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
+
+ const unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &TID = MII.get(Opcode);
+
+ auto PrintError = [&](StringRef Msg) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ Error(S, Msg);
+ return false;
+ };
+
+ if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
+ (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
+ (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
+ return PrintError("instruction must use th:TH_ATOMIC_RETURN");
+
+ if (TH == 0)
+ return true;
+
+ if ((TID.TSFlags & SIInstrFlags::SMRD) &&
+ ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
+ (TH == AMDGPU::CPol::TH_NT_HT)))
+ return PrintError("invalid th value for SMEM instruction");
+
+ if (TH == AMDGPU::CPol::TH_BYPASS) {
+ if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
+ CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
+ (Scope == AMDGPU::CPol::SCOPE_SYS &&
+ !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
+ return PrintError("scope and th combination is not valid");
+ }
+
+ bool IsStore = TID.mayStore();
+ bool IsAtomic =
+ TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
+
+ if (IsAtomic) {
+ if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
+ return PrintError("invalid th value for atomic instructions");
+ } else if (IsStore) {
+ if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
+ return PrintError("invalid th value for store instructions");
+ } else {
+ if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
+ return PrintError("invalid th value for load instructions");
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
if (!isGFX11Plus())
return true;
@@ -4582,10 +4836,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateMovrels(Inst, Operands)) {
return false;
}
- if (!validateFlatOffset(Inst, Operands)) {
- return false;
- }
- if (!validateSMEMOffset(Inst, Operands)) {
+ if (!validateOffset(Inst, Operands)) {
return false;
}
if (!validateMAIAccWrite(Inst, Operands)) {
@@ -4613,7 +4864,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"invalid register class: vgpr tuples must be 64 bit aligned");
return false;
}
- if (!validateGWS(Inst, Operands)) {
+ if (!validateDS(Inst, Operands)) {
return false;
}
@@ -4888,7 +5139,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");
- if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
+ if (!isHsaAbi(getSTI()))
return TokError("directive only supported for amdhsa OS");
StringRef KernelName;
@@ -4905,6 +5156,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
uint64_t NextFreeVGPR = 0;
uint64_t AccumOffset = 0;
uint64_t SharedVGPRCount = 0;
+ uint64_t PreloadLength = 0;
+ uint64_t PreloadOffset = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;
@@ -4973,6 +5226,28 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
Val, ValRange);
if (Val)
ImpliedUserSGPRCount += 4;
+ } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+ if (!hasKernargPreload())
+ return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+
+ if (Val > getMaxNumUserSGPRs())
+ return OutOfRangeError(ValRange);
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+ ValRange);
+ if (Val) {
+ ImpliedUserSGPRCount += Val;
+ PreloadLength = Val;
+ }
+ } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+ if (!hasKernargPreload())
+ return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+
+ if (Val >= 1024)
+ return OutOfRangeError(ValRange);
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+ ValRange);
+ if (Val)
+ PreloadOffset = Val;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
@@ -5104,15 +5379,21 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
+ if (IVersion.Major >= 12)
+ return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+ ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
- Val, ValRange);
+ if (IVersion.Major >= 12)
+ return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+ ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
ValRange);
} else if (ID == ".amdhsa_tg_split") {
if (!isGFX90A())
@@ -5122,17 +5403,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
if (IVersion.Major < 10)
@@ -5171,6 +5452,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
Val, ValRange);
+ } else if (ID == ".amdhsa_round_robin_scheduling") {
+ if (IVersion.Major < 12)
+ return Error(IDRange.Start, "directive requires gfx12+", IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
+ ValRange);
} else {
return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
}
@@ -5218,6 +5505,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
UserSGPRCount);
+ if (PreloadLength && KD.kernarg_size &&
+ (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
+ return TokError("Kernarg preload length + offset is larger than the "
+ "kernarg segment size");
+
if (isGFX90A()) {
if (!Seen.contains(".amdhsa_accum_offset"))
return TokError(".amdhsa_accum_offset directive is required");
@@ -5319,6 +5611,18 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}
Lex();
+ if (ID == "enable_dx10_clamp") {
+ if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
+ isGFX12Plus())
+ return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
+ }
+
+ if (ID == "enable_ieee_mode") {
+ if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
+ isGFX12Plus())
+ return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
+ }
+
if (ID == "enable_wavefront_size32") {
if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
@@ -5419,33 +5723,15 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
}
bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
- const char *AssemblerDirectiveBegin;
- const char *AssemblerDirectiveEnd;
- std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
- isHsaAbiVersion3AndAbove(&getSTI())
- ? std::pair(HSAMD::V3::AssemblerDirectiveBegin,
- HSAMD::V3::AssemblerDirectiveEnd)
- : std::pair(HSAMD::AssemblerDirectiveBegin,
- HSAMD::AssemblerDirectiveEnd);
-
- if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
- return Error(getLoc(),
- (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
- "not available on non-amdhsa OSes")).str());
- }
+ assert(isHsaAbi(getSTI()));
std::string HSAMetadataString;
- if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
- HSAMetadataString))
+ if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
+ HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
return true;
- if (isHsaAbiVersion3AndAbove(&getSTI())) {
- if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
- return Error(getLoc(), "invalid HSA metadata");
- } else {
- if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
- return Error(getLoc(), "invalid HSA metadata");
- }
+ if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
+ return Error(getLoc(), "invalid HSA metadata");
return false;
}
@@ -5588,7 +5874,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
- if (isHsaAbiVersion3AndAbove(&getSTI())) {
+ if (isHsaAbi(getSTI())) {
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();
@@ -5611,8 +5897,12 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_amdgpu_isa")
return ParseDirectiveISAVersion();
- if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
- return ParseDirectiveHSAMetadata();
+ if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
+ return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
+ Twine(" directive is "
+ "not available on non-amdhsa OSes"))
+ .str());
+ }
}
if (IDVal == ".amdgcn_target")
@@ -5753,20 +6043,20 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
setForcedDPP(false);
setForcedSDWA(false);
- if (Name.endswith("_e64_dpp")) {
+ if (Name.ends_with("_e64_dpp")) {
setForcedDPP(true);
setForcedEncodingSize(64);
return Name.substr(0, Name.size() - 8);
- } else if (Name.endswith("_e64")) {
+ } else if (Name.ends_with("_e64")) {
setForcedEncodingSize(64);
return Name.substr(0, Name.size() - 4);
- } else if (Name.endswith("_e32")) {
+ } else if (Name.ends_with("_e32")) {
setForcedEncodingSize(32);
return Name.substr(0, Name.size() - 4);
- } else if (Name.endswith("_dpp")) {
+ } else if (Name.ends_with("_dpp")) {
setForcedDPP(true);
return Name.substr(0, Name.size() - 4);
- } else if (Name.endswith("_sdwa")) {
+ } else if (Name.ends_with("_sdwa")) {
setForcedSDWA(true);
return Name.substr(0, Name.size() - 5);
}
@@ -5789,7 +6079,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
- bool IsMIMG = Name.startswith("image_");
+ bool IsMIMG = Name.starts_with("image_");
while (!trySkipToken(AsmToken::EndOfStatement)) {
OperandMode Mode = OperandMode_Default;
@@ -5929,7 +6219,7 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling) const {
Disabling = Id.consume_front("no");
- if (isGFX940() && !Mnemo.startswith("s_")) {
+ if (isGFX940() && !Mnemo.starts_with("s_")) {
return StringSwitch<unsigned>(Id)
.Case("nt", AMDGPU::CPol::NT)
.Case("sc0", AMDGPU::CPol::SC0)
@@ -5946,6 +6236,47 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
}
ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
+ if (isGFX12Plus()) {
+ SMLoc StringLoc = getLoc();
+
+ int64_t CPolVal = 0;
+ ParseStatus ResTH = ParseStatus::NoMatch;
+ ParseStatus ResScope = ParseStatus::NoMatch;
+
+ for (;;) {
+ if (ResTH.isNoMatch()) {
+ int64_t TH;
+ ResTH = parseTH(Operands, TH);
+ if (ResTH.isFailure())
+ return ResTH;
+ if (ResTH.isSuccess()) {
+ CPolVal |= TH;
+ continue;
+ }
+ }
+
+ if (ResScope.isNoMatch()) {
+ int64_t Scope;
+ ResScope = parseScope(Operands, Scope);
+ if (ResScope.isFailure())
+ return ResScope;
+ if (ResScope.isSuccess()) {
+ CPolVal |= Scope;
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ if (ResTH.isNoMatch() && ResScope.isNoMatch())
+ return ParseStatus::NoMatch;
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
+ AMDGPUOperand::ImmTyCPol));
+ return ParseStatus::Success;
+ }
+
StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
SMLoc OpLoc = getLoc();
unsigned Enabled = 0, Seen = 0;
@@ -5981,6 +6312,95 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
return ParseStatus::Success;
}
+ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
+ int64_t &Scope) {
+ Scope = AMDGPU::CPol::SCOPE_CU; // default;
+
+ StringRef Value;
+ SMLoc StringLoc;
+ ParseStatus Res;
+
+ Res = parseStringWithPrefix("scope", Value, StringLoc);
+ if (!Res.isSuccess())
+ return Res;
+
+ Scope = StringSwitch<int64_t>(Value)
+ .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
+ .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
+ .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
+ .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
+ .Default(0xffffffff);
+
+ if (Scope == 0xffffffff)
+ return Error(StringLoc, "invalid scope value");
+
+ return ParseStatus::Success;
+}
+
+ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
+ TH = AMDGPU::CPol::TH_RT; // default
+
+ StringRef Value;
+ SMLoc StringLoc;
+ ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
+ if (!Res.isSuccess())
+ return Res;
+
+ if (Value == "TH_DEFAULT")
+ TH = AMDGPU::CPol::TH_RT;
+ else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
+ Value == "TH_LOAD_NT_WB") {
+ return Error(StringLoc, "invalid th value");
+ } else if (Value.starts_with("TH_ATOMIC_")) {
+ Value = Value.drop_front(10);
+ TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
+ } else if (Value.starts_with("TH_LOAD_")) {
+ Value = Value.drop_front(8);
+ TH = AMDGPU::CPol::TH_TYPE_LOAD;
+ } else if (Value.starts_with("TH_STORE_")) {
+ Value = Value.drop_front(9);
+ TH = AMDGPU::CPol::TH_TYPE_STORE;
+ } else {
+ return Error(StringLoc, "invalid th value");
+ }
+
+ if (Value == "BYPASS")
+ TH |= AMDGPU::CPol::TH_REAL_BYPASS;
+
+ if (TH != 0) {
+ if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
+ TH |= StringSwitch<int64_t>(Value)
+ .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
+ .Case("RT", AMDGPU::CPol::TH_RT)
+ .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
+ .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
+ .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
+ AMDGPU::CPol::TH_ATOMIC_RETURN)
+ .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
+ .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
+ AMDGPU::CPol::TH_ATOMIC_NT)
+ .Default(0xffffffff);
+ else
+ TH |= StringSwitch<int64_t>(Value)
+ .Case("RT", AMDGPU::CPol::TH_RT)
+ .Case("NT", AMDGPU::CPol::TH_NT)
+ .Case("HT", AMDGPU::CPol::TH_HT)
+ .Case("LU", AMDGPU::CPol::TH_LU)
+ .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
+ .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
+ .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
+ .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
+ .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
+ .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
+ .Default(0xffffffff);
+ }
+
+ if (TH == 0xffffffff)
+ return Error(StringLoc, "invalid th value");
+
+ return ParseStatus::Success;
+}
+
static void addOptionalImmOperand(
MCInst& Inst, const OperandVector& Operands,
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
@@ -6382,7 +6802,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
bool Failed = true;
- bool Sat = CntName.endswith("_sat");
+ bool Sat = CntName.ends_with("_sat");
if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
@@ -6855,7 +7275,7 @@ ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
if (!parseId(Str))
return ParseStatus::NoMatch;
- if (!Str.startswith("attr"))
+ if (!Str.starts_with("attr"))
return Error(S, "invalid interpolation attribute");
StringRef Chan = Str.take_back(2);
@@ -6946,7 +7366,7 @@ bool
AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
if (isToken(AsmToken::Identifier)) {
StringRef Tok = getTokenStr();
- if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
+ if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
lex();
return true;
}
@@ -7578,66 +7998,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
}
//===----------------------------------------------------------------------===//
-// SMEM
-//===----------------------------------------------------------------------===//
-
-void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
- OptionalImmIndexMap OptionalIdx;
- bool IsAtomicReturn = false;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
- if (!Op.isCPol())
- continue;
- IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
- break;
- }
-
- if (!IsAtomicReturn) {
- int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
- if (NewOpc != -1)
- Inst.setOpcode(NewOpc);
- }
-
- IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
- SIInstrFlags::IsAtomicRet;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
-
- // Add the register arguments
- if (Op.isReg()) {
- Op.addRegOperands(Inst, 1);
- if (IsAtomicReturn && i == 1)
- Op.addRegOperands(Inst, 1);
- continue;
- }
-
- // Handle the case where soffset is an immediate
- if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
- Op.addImmOperands(Inst, 1);
- continue;
- }
-
- // Handle tokens like 'offen' which are sometimes hard-coded into the
- // asm string. There are no MCInst operands for these.
- if (Op.isToken()) {
- continue;
- }
- assert(Op.isImm());
-
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = i;
- }
-
- if ((int)Inst.getNumOperands() <=
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
- addOptionalImmOperand(Inst, Operands, OptionalIdx,
- AMDGPUOperand::ImmTySMEMOffsetMod);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
-}
-
-//===----------------------------------------------------------------------===//
// smrd
//===----------------------------------------------------------------------===//
@@ -7704,7 +8064,7 @@ void AMDGPUAsmParser::onBeginOfFile() {
// TODO: Should try to check code object version from directive???
AMDGPU::getAmdhsaCodeObjectVersion());
- if (isHsaAbiVersion3AndAbove(&getSTI()))
+ if (isHsaAbi(getSTI()))
getTargetStreamer().EmitDirectiveAMDGCNTarget();
}
@@ -8155,7 +8515,7 @@ bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
Token += Suffix;
StringRef DimId = Token;
- if (DimId.startswith("SQ_RSRC_IMG_"))
+ if (DimId.starts_with("SQ_RSRC_IMG_"))
DimId = DimId.drop_front(12);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
@@ -8838,3 +9198,9 @@ bool AMDGPUOperand::isWaitVDST() const {
bool AMDGPUOperand::isWaitEXP() const {
return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
}
+
+//===----------------------------------------------------------------------===//
+// Split Barrier
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
index ea1578e30ae8..43d35fa5291c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -12,6 +12,8 @@ def MUBUFOffset : ComplexPattern<iPTR, 3, "SelectMUBUFOffset">;
def MUBUFScratchOffen : ComplexPattern<iPTR, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
def MUBUFScratchOffset : ComplexPattern<iPTR, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
+def BUFSOffset : ComplexPattern<iPTR, 1, "SelectBUFSOffset">;
+
def BUFAddrKind {
int Offset = 0;
int OffEn = 1;
@@ -152,24 +154,32 @@ class MTBUF_Real <MTBUF_Pseudo ps, string real_name = ps.Mnemonic> :
}
class getMTBUFInsDA<list<RegisterClass> vdataList,
- list<RegisterClass> vaddrList=[]> {
+ list<RegisterClass> vaddrList=[], bit hasGFX12Enc> {
RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList));
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
- dag NonVaddrInputs = (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, i1imm:$swz);
- dag Inputs = !if(!empty(vaddrList), NonVaddrInputs, !con((ins vaddrClass:$vaddr), NonVaddrInputs));
- dag ret = !if(!empty(vdataList), Inputs, !con((ins vdata_op:$vdata), Inputs));
-}
+ dag SOffset = !if(hasGFX12Enc, (ins SReg_32:$soffset),
+ (ins SCSrc_b32:$soffset));
+ dag NonVaddrInputs = !con((ins SReg_128:$srsrc), SOffset,
+ (ins offset:$offset, FORMAT:$format, CPol_0:$cpol, i1imm_0:$swz));
-class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
+ dag Inputs = !if(!empty(vaddrList),
+ NonVaddrInputs,
+ !con((ins vaddrClass:$vaddr), NonVaddrInputs));
+ dag ret = !if(!empty(vdataList),
+ Inputs,
+ !con((ins vdata_op:$vdata), Inputs));
+}
+
+class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[], bit hasGFX12Enc> {
dag ret =
- !if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA<vdataList>.ret,
- !if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
- !if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
- !if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
- !if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
+ !if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA<vdataList, [], hasGFX12Enc>.ret,
+ !if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA<vdataList, [VGPR_32], hasGFX12Enc>.ret,
+ !if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA<vdataList, [VGPR_32], hasGFX12Enc>.ret,
+ !if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA<vdataList, [VReg_64], hasGFX12Enc>.ret,
+ !if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA<vdataList, [VReg_64], hasGFX12Enc>.ret,
(ins))))));
}
@@ -204,12 +214,13 @@ class MTBUF_Load_Pseudo <string opName,
int addrKind,
RegisterClass vdataClass,
int elems,
+ bit hasGFX12Enc = 0,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind>
: MTBUF_Pseudo<opName,
(outs getLdStRegisterOperand<vdataClass>.ret:$vdata),
- getMTBUFIns<addrKindCopy>.ret,
+ getMTBUFIns<addrKindCopy, [], hasGFX12Enc>.ret,
getMTBUFAsmOps<addrKindCopy>.ret,
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
@@ -219,38 +230,45 @@ class MTBUF_Load_Pseudo <string opName,
let elements = elems;
}
-multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
- int elems> {
+multiclass MTBUF_Pseudo_Loads_Helper<string opName, RegisterClass vdataClass,
+ int elems, bit hasGFX12Enc> {
- def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>,
+ def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, hasGFX12Enc>,
MTBUFAddr64Table<0, NAME>;
- def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems>,
+ def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems, hasGFX12Enc>,
MTBUFAddr64Table<1, NAME>;
- def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
- def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
- def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
+ def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems, hasGFX12Enc>;
+ def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems, hasGFX12Enc>;
+ def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems, hasGFX12Enc>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>;
- def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
- def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
- def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
+ def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, hasGFX12Enc>;
+ def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems, hasGFX12Enc>;
+ def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems, hasGFX12Enc>;
+ def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems, hasGFX12Enc>;
}
}
+multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
+ int elems> {
+ defm NAME : MTBUF_Pseudo_Loads_Helper<opName, vdataClass, elems, 0>;
+ defm _VBUFFER : MTBUF_Pseudo_Loads_Helper<opName, vdataClass, elems, 1>;
+}
+
class MTBUF_Store_Pseudo <string opName,
int addrKind,
RegisterClass vdataClass,
int elems,
+ bit hasGFX12Enc = 0,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind,
RegisterClass vdataClassCopy = vdataClass>
: MTBUF_Pseudo<opName,
(outs),
- getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
+ getMTBUFIns<addrKindCopy, [vdataClassCopy], hasGFX12Enc>.ret,
getMTBUFAsmOps<addrKindCopy>.ret,
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
@@ -260,27 +278,32 @@ class MTBUF_Store_Pseudo <string opName,
let elements = elems;
}
-multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
- int elems> {
+multiclass MTBUF_Pseudo_Stores_Helper<string opName, RegisterClass vdataClass,
+ int elems, bit hasGFX12Enc> {
- def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>,
+ def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, hasGFX12Enc>,
MTBUFAddr64Table<0, NAME>;
- def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems>,
+ def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems, hasGFX12Enc>,
MTBUFAddr64Table<1, NAME>;
- def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
- def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
- def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
+ def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems, hasGFX12Enc>;
+ def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems, hasGFX12Enc>;
+ def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems, hasGFX12Enc>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>;
- def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
- def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
- def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
+ def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, hasGFX12Enc>;
+ def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems, hasGFX12Enc>;
+ def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems, hasGFX12Enc>;
+ def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems, hasGFX12Enc>;
}
}
+multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
+ int elems> {
+ defm NAME : MTBUF_Pseudo_Stores_Helper<opName, vdataClass, elems, 0>;
+ defm _VBUFFER : MTBUF_Pseudo_Stores_Helper<opName, vdataClass, elems, 1>;
+}
//===----------------------------------------------------------------------===//
// MUBUF classes
@@ -381,12 +404,14 @@ class getLdStVDataRegisterOperand<RegisterClass RC, bit isTFE> {
}
class getMUBUFInsDA<list<RegisterClass> vdataList,
- list<RegisterClass> vaddrList, bit isTFE> {
+ list<RegisterClass> vaddrList, bit isTFE, bit hasGFX12Enc> {
RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList));
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
RegisterOperand vdata_op = getLdStVDataRegisterOperand<vdataClass, isTFE>.ret;
- dag NonVaddrInputs = (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol, i1imm_0:$swz);
+ dag SOffset = !if(hasGFX12Enc, (ins SReg_32:$soffset), (ins SCSrc_b32:$soffset));
+ dag NonVaddrInputs = !con((ins SReg_128:$srsrc), SOffset, (ins offset:$offset, CPol_0:$cpol, i1imm_0:$swz));
+
dag Inputs = !if(!empty(vaddrList), NonVaddrInputs, !con((ins vaddrClass:$vaddr), NonVaddrInputs));
dag ret = !if(!empty(vdataList), Inputs, !con((ins vdata_op:$vdata), Inputs));
}
@@ -410,13 +435,13 @@ class getMUBUFElements<ValueType vt> {
);
}
-class getMUBUFIns<int addrKind, list<RegisterClass> vdataList, bit isTFE> {
+class getMUBUFIns<int addrKind, list<RegisterClass> vdataList, bit isTFE, bit hasGFX12Enc> {
dag ret =
- !if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA<vdataList, [], isTFE>.ret,
- !if(!eq(addrKind, BUFAddrKind.OffEn), getMUBUFInsDA<vdataList, [VGPR_32], isTFE>.ret,
- !if(!eq(addrKind, BUFAddrKind.IdxEn), getMUBUFInsDA<vdataList, [VGPR_32], isTFE>.ret,
- !if(!eq(addrKind, BUFAddrKind.BothEn), getMUBUFInsDA<vdataList, [VReg_64], isTFE>.ret,
- !if(!eq(addrKind, BUFAddrKind.Addr64), getMUBUFInsDA<vdataList, [VReg_64], isTFE>.ret,
+ !if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA<vdataList, [], isTFE, hasGFX12Enc>.ret,
+ !if(!eq(addrKind, BUFAddrKind.OffEn), getMUBUFInsDA<vdataList, [VGPR_32], isTFE, hasGFX12Enc>.ret,
+ !if(!eq(addrKind, BUFAddrKind.IdxEn), getMUBUFInsDA<vdataList, [VGPR_32], isTFE, hasGFX12Enc>.ret,
+ !if(!eq(addrKind, BUFAddrKind.BothEn), getMUBUFInsDA<vdataList, [VReg_64], isTFE, hasGFX12Enc>.ret,
+ !if(!eq(addrKind, BUFAddrKind.Addr64), getMUBUFInsDA<vdataList, [VReg_64], isTFE, hasGFX12Enc>.ret,
(ins))))));
}
@@ -456,6 +481,7 @@ class MUBUF_Load_Pseudo <string opName,
bit isLds = 0,
bit isLdsOpc = 0,
bit isTFE = 0,
+ bit hasGFX12Enc = 0,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind,
@@ -463,7 +489,7 @@ class MUBUF_Load_Pseudo <string opName,
RegisterOperand vdata_op = getLdStVDataRegisterOperand<vdata_rc, isTFE>.ret>
: MUBUF_Pseudo<opName,
!if(!or(isLds, isLdsOpc), (outs), (outs vdata_op:$vdata)),
- !con(getMUBUFIns<addrKindCopy, [], isTFE>.ret,
+ !con(getMUBUFIns<addrKindCopy, [], isTFE, hasGFX12Enc>.ret,
!if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))),
getMUBUFAsmOps<addrKindCopy, !or(isLds, isLdsOpc), isLds, isTFE>.ret,
pattern>,
@@ -485,50 +511,61 @@ class MUBUF_Load_Pseudo <string opName,
let VALU = isLds;
}
-class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
+class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : GCNPat <
(load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset))),
(load_vt (inst v4i32:$srsrc, i32:$soffset, i32:$offset))
>;
class MUBUF_Addr64_Load_Pat <Instruction inst,
ValueType load_vt = i32,
- SDPatternOperator ld = null_frag> : Pat <
+ SDPatternOperator ld = null_frag> : GCNPat <
(load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset))),
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i32:$offset))
>;
-multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
+multiclass MUBUF_Pseudo_Load_Pats_Common<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
def : MUBUF_Offset_Load_Pat<!cast<Instruction>(BaseInst#"_OFFSET"), load_vt, ld>;
def : MUBUF_Addr64_Load_Pat<!cast<Instruction>(BaseInst#"_ADDR64"), load_vt, ld>;
}
+multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag>{
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst, load_vt, ld>;
+ }
+ defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst # "_VBUFFER", load_vt, ld>;
+}
+
multiclass MUBUF_Pseudo_Loads_Helper<string opName, ValueType load_vt,
- bit TiedDest, bit isLds, bit isTFE> {
+ bit TiedDest, bit isLds, bit isTFE, bit hasGFX12Enc> {
defvar legal_load_vt = !if(!eq(load_vt, v3f16), v4f16, load_vt);
- def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds, 0, isTFE>,
+ def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds, 0, isTFE, hasGFX12Enc>,
MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
- def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, legal_load_vt, TiedDest, isLds, 0, isTFE>,
+ def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, legal_load_vt, TiedDest, isLds, 0, isTFE, hasGFX12Enc>,
MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
- def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds, 0, isTFE>;
- def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds, 0, isTFE>;
- def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds, 0, isTFE>;
+ def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds, 0, isTFE, hasGFX12Enc>;
+ def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds, 0, isTFE, hasGFX12Enc>;
+ def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds, 0, isTFE, hasGFX12Enc>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds, 0, isTFE>;
- def _OFFEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds, 0, isTFE>;
- def _IDXEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds, 0, isTFE>;
- def _BOTHEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds, 0, isTFE>;
+ def _OFFSET_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds, 0, isTFE, hasGFX12Enc>;
+ def _OFFEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds, 0, isTFE, hasGFX12Enc>;
+ def _IDXEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds, 0, isTFE, hasGFX12Enc>;
+ def _BOTHEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds, 0, isTFE, hasGFX12Enc>;
}
}
multiclass MUBUF_Pseudo_Loads<string opName, ValueType load_vt = i32,
bit TiedDest = 0, bit isLds = 0> {
- defm NAME : MUBUF_Pseudo_Loads_Helper<opName, load_vt, TiedDest, isLds, 0>;
- if !not(isLds) then
- defm _TFE : MUBUF_Pseudo_Loads_Helper<opName, load_vt, TiedDest, isLds, 1>;
+ defm NAME : MUBUF_Pseudo_Loads_Helper<opName, load_vt, TiedDest, isLds, 0, 0>;
+ defm _VBUFFER : MUBUF_Pseudo_Loads_Helper<opName, load_vt, TiedDest, isLds, 0, 1>;
+
+ if !not(isLds) then {
+ defm _TFE : MUBUF_Pseudo_Loads_Helper<opName, load_vt, TiedDest, isLds, 1, 0>;
+ defm _TFE_VBUFFER : MUBUF_Pseudo_Loads_Helper<opName, load_vt, TiedDest, isLds, 1, 1>;
+ }
}
multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32> {
@@ -548,18 +585,24 @@ multiclass MUBUF_Pseudo_Loads_LDSOpc<string opName,
def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds, isLdsOpc>;
def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds, isLdsOpc>;
def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds, isLdsOpc>;
+
+ def _VBUFFER_OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds, isLdsOpc, 0, 1>;
+ def _VBUFFER_OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds, isLdsOpc, 0, 1>;
+ def _VBUFFER_IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds, isLdsOpc, 0, 1>;
+ def _VBUFFER_BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds, isLdsOpc, 0, 1>;
}
class MUBUF_Store_Pseudo <string opName,
int addrKind,
ValueType store_vt,
bit isTFE = 0,
+ bit hasGFX12Enc = 0,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind>
: MUBUF_Pseudo<opName,
(outs),
- getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret], isTFE>.ret,
+ getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret], isTFE, hasGFX12Enc>.ret,
getMUBUFAsmOps<addrKindCopy, 0, 0, isTFE>.ret,
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
@@ -572,36 +615,52 @@ class MUBUF_Store_Pseudo <string opName,
let tfe = isTFE;
}
+multiclass MUBUF_Pseudo_Store_Pats_Common<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
+
+ def : GCNPat <
+ (st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset)),
+ (!cast<MUBUF_Pseudo>(BaseInst # _OFFSET) store_vt:$vdata, v4i32:$srsrc, i32:$soffset, i32:$offset)>;
+
+ def : GCNPat <
+ (st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset)),
+ (!cast<MUBUF_Pseudo>(BaseInst # _ADDR64) store_vt:$vdata, i64:$vaddr, v4i32:$srsrc, i32:$soffset, i32:$offset)>;
+}
+
+multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst, store_vt, st>;
+ }
+ defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst # "_VBUFFER", store_vt, st>;
+}
+
multiclass MUBUF_Pseudo_Stores_Helper<string opName, ValueType store_vt,
- SDPatternOperator st, bit isTFE> {
+ bit isTFE, bit hasGFX12Enc> {
defvar legal_store_vt = !if(!eq(store_vt, v3f16), v4f16, store_vt);
- def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt, isTFE,
- [(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i32:$offset))]>,
+ def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt, isTFE, hasGFX12Enc>,
MUBUFAddr64Table<0, NAME>;
- def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt, isTFE,
- [(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i32:$offset))]>,
+ def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt, isTFE, hasGFX12Enc>,
MUBUFAddr64Table<1, NAME>;
- def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt, isTFE>;
- def _IDXEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, legal_store_vt, isTFE>;
- def _BOTHEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, legal_store_vt, isTFE>;
+ def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt, isTFE, hasGFX12Enc>;
+ def _IDXEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, legal_store_vt, isTFE, hasGFX12Enc>;
+ def _BOTHEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, legal_store_vt, isTFE, hasGFX12Enc>;
let DisableWQM = 1 in {
- def _OFFSET_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt, isTFE>;
- def _OFFEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt, isTFE>;
- def _IDXEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, legal_store_vt, isTFE>;
- def _BOTHEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, legal_store_vt, isTFE>;
+ def _OFFSET_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt, isTFE, hasGFX12Enc>;
+ def _OFFEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt, isTFE, hasGFX12Enc>;
+ def _IDXEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, legal_store_vt, isTFE, hasGFX12Enc>;
+ def _BOTHEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, legal_store_vt, isTFE, hasGFX12Enc>;
}
}
-multiclass MUBUF_Pseudo_Stores<string opName, ValueType store_vt = i32,
- SDPatternOperator st = null_frag> {
- defm NAME : MUBUF_Pseudo_Stores_Helper<opName, store_vt, st, 0>;
- defm _TFE : MUBUF_Pseudo_Stores_Helper<opName, store_vt, null_frag, 1>;
+multiclass MUBUF_Pseudo_Stores<string opName, ValueType store_vt = i32> {
+ defm NAME : MUBUF_Pseudo_Stores_Helper<opName, store_vt, 0, 0>;
+ defm _TFE : MUBUF_Pseudo_Stores_Helper<opName, store_vt, 1, 0>;
+
+ defm _VBUFFER : MUBUF_Pseudo_Stores_Helper<opName, store_vt, 0, 1>;
+ defm _TFE_VBUFFER : MUBUF_Pseudo_Stores_Helper<opName, store_vt, 1, 1>;
}
class MUBUF_Pseudo_Store_Lds<string opName>
@@ -623,15 +682,17 @@ class MUBUF_Pseudo_Store_Lds<string opName>
let AsmMatchConverter = "cvtMubuf";
}
-class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in,
+class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in, bit hasGFX12Enc,
list<RegisterClass> vaddrList=[]> {
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
dag VData = !if(vdata_in, (ins vdata_op:$vdata_in), (ins vdata_op:$vdata));
dag Data = !if(!empty(vaddrList), VData, !con(VData, (ins vaddrClass:$vaddr)));
- dag MainInputs = (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset);
- dag CPol = !if(vdata_in, (ins CPol_GLC1:$cpol), (ins CPol_0:$cpol));
+ dag SOffset = !if(hasGFX12Enc, (ins SReg_32:$soffset), (ins SCSrc_b32:$soffset));
+ dag MainInputs = !con((ins SReg_128:$srsrc), SOffset, (ins offset:$offset));
+ dag CPol = !if(vdata_in, (ins CPol_GLC_WithDefault:$cpol),
+ (ins CPol_NonGLC_WithDefault:$cpol));
dag ret = !con(Data, MainInputs, CPol);
}
@@ -639,19 +700,20 @@ class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in,
class getMUBUFAtomicIns<int addrKind,
RegisterClass vdataClass,
bit vdata_in,
+ bit hasGFX12Enc,
// Workaround bug bz30254
RegisterClass vdataClassCopy=vdataClass> {
dag ret =
!if(!eq(addrKind, BUFAddrKind.Offset),
- getMUBUFAtomicInsDA<vdataClassCopy, vdata_in>.ret,
+ getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, hasGFX12Enc>.ret,
!if(!eq(addrKind, BUFAddrKind.OffEn),
- getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VGPR_32]>.ret,
+ getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, hasGFX12Enc, [VGPR_32]>.ret,
!if(!eq(addrKind, BUFAddrKind.IdxEn),
- getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VGPR_32]>.ret,
+ getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, hasGFX12Enc, [VGPR_32]>.ret,
!if(!eq(addrKind, BUFAddrKind.BothEn),
- getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VReg_64]>.ret,
+ getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, hasGFX12Enc, [VReg_64]>.ret,
!if(!eq(addrKind, BUFAddrKind.Addr64),
- getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VReg_64]>.ret,
+ getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, hasGFX12Enc, [VReg_64]>.ret,
(ins))))));
}
@@ -679,13 +741,14 @@ class MUBUF_Atomic_Pseudo<string opName,
class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
RegisterClass vdataClass,
+ bit hasGFX12Enc = 0,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind,
RegisterClass vdataClassCopy = vdataClass>
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
(outs),
- getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0>.ret,
+ getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0, hasGFX12Enc>.ret,
getMUBUFAsmOps<addrKindCopy>.ret,
pattern>,
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
@@ -698,6 +761,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
RegisterClass vdataClass,
+ bit hasGFX12Enc = 0,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind,
@@ -705,7 +769,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret>
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
(outs vdata_op:$vdata),
- getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret,
+ getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1, hasGFX12Enc>.ret,
getMUBUFAsmOps<addrKindCopy>.ret,
pattern>,
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
@@ -723,13 +787,21 @@ multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
ValueType vdataType,
bit isFP = isFloatType<vdataType>.ret> {
let FPAtomic = isFP in {
- def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
+ def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass, 0>,
MUBUFAddr64Table <0, NAME>;
- def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
+ def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, 0>,
MUBUFAddr64Table <1, NAME>;
- def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
- def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
- def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, 0>;
+ def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, 0>;
+ def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, 0>;
+
+ def _VBUFFER_OFFSET : MUBUF_AtomicNoRet_Pseudo <opName #_vbuffer, BUFAddrKind.Offset, vdataClass, 1>,
+ MUBUFAddr64Table <0, NAME # "_VBUFFER">;
+ def _VBUFFER_ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName #_vbuffer, BUFAddrKind.Addr64, vdataClass, 1>,
+ MUBUFAddr64Table <1, NAME # "_VBUFFER">;
+ def _VBUFFER_OFFEN : MUBUF_AtomicNoRet_Pseudo <opName #_vbuffer, BUFAddrKind.OffEn, vdataClass, 1>;
+ def _VBUFFER_IDXEN : MUBUF_AtomicNoRet_Pseudo <opName #_vbuffer, BUFAddrKind.IdxEn, vdataClass, 1>;
+ def _VBUFFER_BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName #_vbuffer, BUFAddrKind.BothEn, vdataClass, 1>;
}
}
@@ -739,21 +811,37 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
SDPatternOperator atomic,
bit isFP = isFloatType<vdataType>.ret> {
let FPAtomic = isFP in {
- def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+ def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass, 0,
[(set vdataType:$vdata,
(atomic (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <0, NAME # "_RTN">;
- def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+ def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, 0,
[(set vdataType:$vdata,
(atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <1, NAME # "_RTN">;
- def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
- def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
- def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, 0>;
+ def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, 0>;
+ def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, 0>;
+
+ def _VBUFFER_OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.Offset, vdataClass, 1,
+ [(set vdataType:$vdata,
+ (atomic (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset),
+ vdataType:$vdata_in))]>,
+ MUBUFAddr64Table <0, NAME # "_VBUFFER_RTN">;
+
+ def _VBUFFER_ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.Addr64, vdataClass, 1,
+ [(set vdataType:$vdata,
+ (atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset),
+ vdataType:$vdata_in))]>,
+ MUBUFAddr64Table <1, NAME # "_VBUFFER_RTN">;
+
+ def _VBUFFER_OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.OffEn, vdataClass, 1>;
+ def _VBUFFER_IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.IdxEn, vdataClass, 1>;
+ def _VBUFFER_BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.BothEn, vdataClass, 1>;
}
}
@@ -794,7 +882,7 @@ defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores <
"buffer_store_format_xyzw", v4f32
>;
-let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
+let OtherPredicates = [HasUnpackedD16VMem], D16Buf = 1 in {
let TiedSourceNotRead = 1 in {
defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads <
"buffer_load_format_d16_x", i32
@@ -821,9 +909,9 @@ let TiedSourceNotRead = 1 in {
defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Stores <
"buffer_store_format_d16_xyzw", v4i32
>;
-} // End HasUnpackedD16VMem.
+} // End OtherPredicates = [HasUnpackedD16VMem], D16Buf = 1.
-let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
+let OtherPredicates = [HasPackedD16VMem], D16Buf = 1 in {
let TiedSourceNotRead = 1 in {
defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads <
"buffer_load_format_d16_x", f16
@@ -850,7 +938,7 @@ let TiedSourceNotRead = 1 in {
defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Pseudo_Stores <
"buffer_store_format_d16_xyzw", v4f16
>;
-} // End HasPackedD16VMem.
+} // End OtherPredicates = [HasPackedD16VMem], D16Buf = 1.
defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds <
"buffer_load_ubyte", i32
@@ -906,29 +994,61 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
-defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", i32, load_global>;
-defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>;
-defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>;
-defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
+
+foreach vt = Reg32Types.types in {
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", vt, load_global>;
+}
+
+foreach vt = VReg_64.RegTypes in {
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", vt, load_global>;
+}
+
+foreach vt = VReg_96.RegTypes in {
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", vt, load_global>;
+}
+
+foreach vt = VReg_128.RegTypes in {
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", vt, load_global>;
+}
defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores <
- "buffer_store_byte", i32, truncstorei8_global
+ "buffer_store_byte", i32
>;
defm BUFFER_STORE_SHORT : MUBUF_Pseudo_Stores <
- "buffer_store_short", i32, truncstorei16_global
+ "buffer_store_short", i32
>;
defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores <
- "buffer_store_dword", i32, store_global
+ "buffer_store_dword", i32
>;
defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores <
- "buffer_store_dwordx2", v2i32, store_global
+ "buffer_store_dwordx2", v2i32
>;
defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores <
- "buffer_store_dwordx3", v3i32, store_global
+ "buffer_store_dwordx3", v3i32
>;
defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores <
- "buffer_store_dwordx4", v4i32, store_global
+ "buffer_store_dwordx4", v4i32
>;
+
+defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_BYTE", i32, truncstorei8_global>;
+defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_SHORT", i32, truncstorei16_global>;
+
+foreach vt = Reg32Types.types in {
+defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_DWORD", vt, store_global>;
+}
+
+foreach vt = VReg_64.RegTypes in {
+defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_DWORDX2", vt, store_global>;
+}
+
+foreach vt = VReg_96.RegTypes in {
+defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_DWORDX3", vt, store_global>;
+}
+
+foreach vt = VReg_128.RegTypes in {
+defm : MUBUF_Pseudo_Store_Pats<"BUFFER_STORE_DWORDX4", vt, store_global>;
+}
+
defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics <
"buffer_atomic_swap", VGPR_32, i32
>;
@@ -1008,10 +1128,11 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_dec_x2", VReg_64, i64
>;
-let SubtargetPredicate = HasGFX10_BEncoding in
-defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
- "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
->;
+let OtherPredicates = [HasGFX10_BEncoding] in {
+ defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics <
+ "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
+ >;
+}
let SubtargetPredicate = isGFX8GFX9 in {
def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
@@ -1198,10 +1319,8 @@ def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
let AsmOperands = "$cpol";
}
-let SubtargetPredicate = isGFX10Plus in {
- def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
- def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
-} // End SubtargetPredicate = isGFX10Plus
+def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
+def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
//===----------------------------------------------------------------------===//
// MUBUF Patterns
@@ -1211,33 +1330,33 @@ let SubtargetPredicate = isGFX10Plus in {
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
-multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
+multiclass MUBUF_LoadIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
defvar st = !if(!eq(memoryVt, vt), name, mubuf_intrinsic_load<name, memoryVt>);
def : GCNPat<
- (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, i32:$vindex, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
@@ -1246,6 +1365,14 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
}
+multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
+ string opcode, ValueType memoryVt = vt>{
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUF_LoadIntrinsicPat_Common<name, vt, opcode, memoryVt>;
+ }
+ defm : MUBUF_LoadIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
+}
+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, i32, "BUFFER_LOAD_FORMAT_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
@@ -1260,16 +1387,16 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_tfe, v3i32, "BUFFER_LOAD_FORM
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_tfe, v4i32, "BUFFER_LOAD_FORMAT_XYZ_TFE">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_tfe, v5i32, "BUFFER_LOAD_FORMAT_XYZW_TFE">;
-let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
- defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
- defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
- defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
- defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v3i32, "BUFFER_LOAD_FORMAT_D16_XYZ_gfx80">;
- defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
+let OtherPredicates = [HasUnpackedD16VMem] in {
+ defm : MUBUF_LoadIntrinsicPat_Common<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat_Common<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat_Common<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat_Common<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat_Common<SIbuffer_load_format_d16, v3i32, "BUFFER_LOAD_FORMAT_D16_XYZ_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat_Common<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
-let SubtargetPredicate = HasPackedD16VMem in {
+let OtherPredicates = [HasPackedD16VMem] in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X">;
@@ -1298,33 +1425,33 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_short, i32, "BUFFER_LOAD_SSHORT">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ubyte, i32, "BUFFER_LOAD_UBYTE">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
-multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
+multiclass MUBUF_StoreIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
defvar st = !if(!eq(memoryVt, vt), name, mubuf_intrinsic_store<name, memoryVt>);
def : GCNPat<
- (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, 0, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
- (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
timm:$offset, (extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
- (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
timm:$offset, (extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
- (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
getVregSrcForVT<vt>.ret:$vdata,
@@ -1334,6 +1461,14 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
}
+multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
+ string opcode, ValueType memoryVt = vt> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUF_StoreIntrinsicPat_Common<name, vt, opcode, memoryVt>;
+ }
+ defm : MUBUF_StoreIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
+}
+
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, i32, "BUFFER_STORE_FORMAT_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
@@ -1343,16 +1478,16 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v3i32, "BUFFER_STORE_FORMA
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMAT_XYZW">;
-let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
- defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
- defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
- defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
- defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v3i32, "BUFFER_STORE_FORMAT_D16_XYZ_gfx80">;
- defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
+let OtherPredicates = [HasUnpackedD16VMem] in {
+ defm : MUBUF_StoreIntrinsicPat_Common<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat_Common<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat_Common<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat_Common<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat_Common<SIbuffer_store_format_d16, v3i32, "BUFFER_STORE_FORMAT_D16_XYZ_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat_Common<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
-let SubtargetPredicate = HasPackedD16VMem in {
+let OtherPredicates = [HasPackedD16VMem] in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_X">;
@@ -1383,7 +1518,7 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">;
// buffer_atomic patterns
//===----------------------------------------------------------------------===//
-multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isIntr = 0> {
+multiclass BufferAtomicPat_Common<string OpPrefix, ValueType vt, string Inst, bit isIntr = 0> {
foreach RtnMode = ["ret", "noret"] in {
defvar Op = !cast<SDPatternOperator>(OpPrefix
@@ -1409,11 +1544,18 @@ multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isInt
} // end foreach RtnMode
}
+multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isIntr = 0> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : BufferAtomicPat_Common<OpPrefix, vt, Inst, isIntr>;
+ }
+ defm : BufferAtomicPat_Common<OpPrefix, vt, Inst # "_VBUFFER", isIntr>;
+}
+
multiclass BufferAtomicIntrPat<string OpPrefix, ValueType vt, string Inst> {
defm : BufferAtomicPat<OpPrefix, vt, Inst, /* isIntr */ 1>;
}
-multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> {
+multiclass BufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, string Inst> {
foreach RtnMode = ["ret", "noret"] in {
defvar Op = !cast<SDPatternOperator>("AMDGPUatomic_cmp_swap_global"
@@ -1449,6 +1591,14 @@ multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst>
} // end foreach RtnMode
}
+multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : BufferAtomicCmpSwapPat_Common<vt, data_vt, Inst>;
+ }
+ defm : BufferAtomicCmpSwapPat_Common<vt, data_vt, Inst # "_VBUFFER">;
+}
+
+
foreach Ty = [i32, i64] in {
defvar Suffix = !if(!eq(Ty, i64), "_X2", "");
@@ -1471,7 +1621,7 @@ defm : BufferAtomicPat<"atomic_load_udec_wrap_global", Ty, "BUFFER_ATOMIC_DEC" #
defm : BufferAtomicCmpSwapPat<i32, v2i32, "BUFFER_ATOMIC_CMPSWAP">;
defm : BufferAtomicCmpSwapPat<i64, v2i64, "BUFFER_ATOMIC_CMPSWAP_X2">;
-multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
+multiclass SIBufferAtomicPat_Common<string OpPrefix, ValueType vt, string Inst,
list<string> RtnModes = ["ret", "noret"]> {
foreach RtnMode = RtnModes in {
@@ -1484,7 +1634,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in {
def : GCNPat<
- (vt (Op vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset,
+ (vt (Op vt:$vdata_in, v4i32:$rsrc, 0, 0, (BUFSOffset i32:$soffset),
timm:$offset, timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
@@ -1492,7 +1642,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
>;
def : GCNPat<
- (vt (Op vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset,
+ (vt (Op vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, (BUFSOffset i32:$soffset),
timm:$offset, timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(Inst # "_IDXEN" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc,
@@ -1501,7 +1651,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
def : GCNPat<
(vt (Op vt:$vdata_in, v4i32:$rsrc, 0, i32:$voffset,
- i32:$soffset, timm:$offset, timm:$cachepolicy, 0)),
+ (BUFSOffset i32:$soffset), timm:$offset, timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(Inst # "_OFFEN" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc,
SCSrc_b32:$soffset, timm:$offset, CachePolicy)
@@ -1509,7 +1659,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
def : GCNPat<
(vt (Op vt:$vdata_in, v4i32:$rsrc, i32:$vindex, i32:$voffset,
- i32:$soffset, timm:$offset, timm:$cachepolicy, timm)),
+ (BUFSOffset i32:$soffset), timm:$offset, timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(Inst # "_BOTHEN" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
@@ -1520,6 +1670,14 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
} // end foreach RtnMode
}
+multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
+ list<string> RtnModes = ["ret", "noret"]> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst, RtnModes>;
+ }
+ defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst # "_VBUFFER", RtnModes>;
+}
+
defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i32, "BUFFER_ATOMIC_SWAP">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", f32, "BUFFER_ATOMIC_SWAP">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i32, "BUFFER_ATOMIC_ADD">;
@@ -1547,6 +1705,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
+let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
+defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;
+
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
@@ -1562,11 +1723,11 @@ class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
let HasNoUse = true;
}
-multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
+multiclass BufferAtomicPatterns_NO_RTN_Common<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, 0,
- 0, i32:$soffset, timm:$offset,
+ 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
timm:$offset, timm:$cachepolicy)
@@ -1574,7 +1735,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
def : GCNPat<
(NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, timm:$offset,
+ 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
timm:$offset, timm:$cachepolicy)
@@ -1582,7 +1743,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
def : GCNPat<
(NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, timm:$offset,
+ i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
timm:$offset, timm:$cachepolicy)
@@ -1590,7 +1751,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
def : GCNPat<
(NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, timm:$offset,
+ i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
getVregSrcForVT<vt>.ret:$vdata_in,
@@ -1599,87 +1760,111 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
>;
}
-let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
-defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["noret"]>;
+multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
+ string opcode> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : BufferAtomicPatterns_NO_RTN_Common<name, vt, opcode>;
+ }
+ defm : BufferAtomicPatterns_NO_RTN_Common<name, vt, opcode # "_VBUFFER">;
+}
-let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in
-defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["noret"]>;
+let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["noret"]>;
-let SubtargetPredicate = HasAtomicFaddRtnInsts in
-defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["ret"]>;
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
+ defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["noret"]>;
+} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts]
-let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in
-defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
+let OtherPredicates = [HasAtomicFaddRtnInsts] in
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["ret"]>;
-let SubtargetPredicate = isGFX90APlus in {
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
+ defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
+} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts]
+
+let OtherPredicates = [isGFX90APlus] in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
} // End SubtargetPredicate = isGFX90APlus
-foreach RtnMode = ["ret", "noret"] in {
-
-defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap
- # !if(!eq(RtnMode, "ret"), "", "_noret"));
-defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
-defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy),
- (timm:$cachepolicy));
-
-defvar OffsetResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_OFFSET" # InstSuffix)
- (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy);
-def : GCNPat<
- (Op
- i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset,
- timm:$offset, timm:$cachepolicy, 0),
- !if(!eq(RtnMode, "ret"),
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffsetResDag, VReg_64)), sub0),
- OffsetResDag)
->;
-
-defvar IdxenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_IDXEN" # InstSuffix)
- (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
- CachePolicy);
-def : GCNPat<
- (Op
- i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, timm:$offset,
- timm:$cachepolicy, timm),
- !if(!eq(RtnMode, "ret"),
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS IdxenResDag, VReg_64)), sub0),
- IdxenResDag)
->;
-
-defvar OffenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_OFFEN" # InstSuffix)
- (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
- CachePolicy);
-def : GCNPat<
- (Op
- i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, timm:$offset,
- timm:$cachepolicy, 0),
- !if(!eq(RtnMode, "ret"),
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffenResDag, VReg_64)), sub0),
- OffenResDag)
->;
-
-defvar BothenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_BOTHEN" # InstSuffix)
- (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy);
-def : GCNPat<
- (Op
- i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, timm:$offset,
- timm:$cachepolicy, timm),
- !if(!eq(RtnMode, "ret"),
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS BothenResDag, VReg_64)), sub0),
- BothenResDag)
->;
-
-} // end foreach RtnMode
+multiclass SIBufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, string Inst> {
+ foreach RtnMode = ["ret", "noret"] in {
+ defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap
+ # !if(!eq(RtnMode, "ret"), "", "_noret"));
+ defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
+ defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy),
+ (timm:$cachepolicy));
+ defvar SrcRC = getVregSrcForVT<vt>.ret;
+ defvar DataRC = getVregSrcForVT<data_vt>.ret;
+ defvar SubLo = !if(!eq(vt, i32), sub0, sub0_sub1);
+ defvar SubHi = !if(!eq(vt, i32), sub1, sub2_sub3);
+
+ defvar OffsetResDag = (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix)
+ (REG_SEQUENCE DataRC, SrcRC:$data, SubLo, SrcRC:$cmp, SubHi),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy);
+ def : GCNPat<
+ (vt (Op
+ vt:$data, vt:$cmp, v4i32:$rsrc, 0, 0, (BUFSOffset i32:$soffset),
+ timm:$offset, timm:$cachepolicy, 0)),
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG OffsetResDag, SubLo),
+ OffsetResDag)
+ >;
+
+ defvar IdxenResDag = (!cast<MUBUF_Pseudo>(Inst # "_IDXEN" # InstSuffix)
+ (REG_SEQUENCE DataRC, SrcRC:$data, SubLo, SrcRC:$cmp, SubHi),
+ VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
+ CachePolicy);
+ def : GCNPat<
+ (vt (Op
+ vt:$data, vt:$cmp, v4i32:$rsrc, i32:$vindex,
+ 0, (BUFSOffset i32:$soffset), timm:$offset,
+ timm:$cachepolicy, timm)),
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG IdxenResDag, SubLo),
+ IdxenResDag)
+ >;
+
+ defvar OffenResDag = (!cast<MUBUF_Pseudo>(Inst # "_OFFEN" # InstSuffix)
+ (REG_SEQUENCE DataRC, SrcRC:$data, SubLo, SrcRC:$cmp, SubHi),
+ VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
+ CachePolicy);
+ def : GCNPat<
+ (vt (Op
+ vt:$data, vt:$cmp, v4i32:$rsrc, 0,
+ i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
+ timm:$cachepolicy, 0)),
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG OffenResDag, SubLo),
+ OffenResDag)
+ >;
+
+ defvar BothenResDag = (!cast<MUBUF_Pseudo>(Inst # "_BOTHEN" # InstSuffix)
+ (REG_SEQUENCE DataRC, SrcRC:$data, SubLo, SrcRC:$cmp, SubHi),
+ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy);
+ def : GCNPat<
+ (vt (Op
+ vt:$data, vt:$cmp, v4i32:$rsrc, i32:$vindex,
+ i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
+ timm:$cachepolicy, timm)),
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG BothenResDag, SubLo),
+ BothenResDag)
+ >;
+ } // end foreach RtnMode
+}
+
+multiclass SIBufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : SIBufferAtomicCmpSwapPat_Common<vt, data_vt, Inst>;
+ }
+ defm : SIBufferAtomicCmpSwapPat_Common<vt, data_vt, Inst # "_VBUFFER">;
+}
+
+defm : SIBufferAtomicCmpSwapPat<i32, v2i32, "BUFFER_ATOMIC_CMPSWAP">;
+defm : SIBufferAtomicCmpSwapPat<i64, v2i64, "BUFFER_ATOMIC_CMPSWAP_X2">;
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : GCNPat <
@@ -1713,105 +1898,125 @@ defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFF
defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, atomic_load_64_global>;
} // End SubtargetPredicate = isGFX6GFX7
-multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
+multiclass MUBUFLoad_PatternOffset_Common <string Instr, ValueType vt,
PatFrag ld> {
-
def : GCNPat <
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset))),
- (Instr_OFFSET $srsrc, $soffset, $offset)
+ (!cast<MUBUF_Pseudo>(Instr # "_OFFSET") $srsrc, $soffset, $offset)
>;
}
+multiclass MUBUFLoad_PatternOffset <string Instr, ValueType vt,
+ PatFrag ld> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUFLoad_PatternOffset_Common<Instr, vt, ld>;
+ }
+ defm : MUBUFLoad_PatternOffset_Common<Instr # "_VBUFFER", vt, ld>;
+}
+
let OtherPredicates = [Has16BitInsts] in {
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_global>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_global>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_global>;
+defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_SBYTE", i16, sextloadi8_constant>;
+defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_UBYTE", i16, extloadi8_constant>;
+defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_UBYTE", i16, zextloadi8_constant>;
+defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_SBYTE", i16, sextloadi8_global>;
+defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_UBYTE", i16, extloadi8_global>;
+defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_UBYTE", i16, zextloadi8_global>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, load_global>;
+defm : MUBUFLoad_PatternOffset <"BUFFER_LOAD_USHORT", i16, load_global>;
} // End OtherPredicates = [Has16BitInsts]
-multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
- MUBUF_Pseudo InstrOffset,
+multiclass MUBUFScratchLoadPat_Common <string Instr,
ValueType vt, PatFrag ld> {
def : GCNPat <
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, i32:$offset))),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0)
+ (!cast<MUBUF_Pseudo>(Instr # _OFFEN) $vaddr, $srsrc, $soffset, $offset, 0, 0)
>;
def : GCNPat <
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, i32:$offset))),
- (InstrOffset $srsrc, $soffset, $offset, 0, 0)
+ (!cast<MUBUF_Pseudo>(Instr # _OFFSET) $srsrc, $soffset, $offset, 0, 0)
>;
}
+multiclass MUBUFScratchLoadPat <string Instr,
+ ValueType vt, PatFrag ld> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUFScratchLoadPat_Common<Instr, vt, ld>;
+ }
+ defm : MUBUFScratchLoadPat_Common<Instr # "_VBUFFER", vt, ld>;
+}
+
// XXX - Is it possible to have a complex pattern in a PatFrag?
-multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
- MUBUF_Pseudo InstrOffset,
+multiclass MUBUFScratchLoadPat_D16_Common <string Instr,
ValueType vt, PatFrag ld_frag> {
def : GCNPat <
(ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, i32:$offset), vt:$in),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, $in)
+ (!cast<MUBUF_Pseudo>(Instr # _OFFEN) $vaddr, $srsrc, $soffset, $offset, $in)
>;
def : GCNPat <
(ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, i32:$offset), vt:$in),
- (InstrOffset $srsrc, $soffset, $offset, $in)
+ (!cast<MUBUF_Pseudo>(Instr # _OFFSET) $srsrc, $soffset, $offset, $in)
>;
}
+multiclass MUBUFScratchLoadPat_D16 <string Instr,
+ ValueType vt, PatFrag ld_frag> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUFScratchLoadPat_D16_Common<Instr, vt, ld_frag>;
+ }
+ defm : MUBUFScratchLoadPat_D16_Common<Instr # "_VBUFFER", vt, ld_frag>;
+}
+
let OtherPredicates = [DisableFlatScratch] in {
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, zextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, zextloadi16_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_SBYTE", i32, sextloadi8_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_UBYTE", i32, extloadi8_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_UBYTE", i32, zextloadi8_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_SBYTE", i16, sextloadi8_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_UBYTE", i16, extloadi8_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_UBYTE", i16, zextloadi8_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_SSHORT", i32, sextloadi16_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_USHORT", i32, extloadi16_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_USHORT", i32, zextloadi16_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_USHORT", i16, load_private>;
foreach vt = Reg32Types.types in {
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, vt, load_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_DWORD", vt, load_private>;
}
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX3_OFFEN, BUFFER_LOAD_DWORDX3_OFFSET, v3i32, load_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_DWORDX2", v2i32, load_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_DWORDX3", v3i32, load_private>;
+defm : MUBUFScratchLoadPat <"BUFFER_LOAD_DWORDX4", v4i32, load_private>;
let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in {
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2i16, load_d16_hi_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2i16, az_extloadi8_d16_hi_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2i16, sextloadi8_d16_hi_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2f16, load_d16_hi_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2f16, az_extloadi8_d16_hi_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2f16, sextloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SHORT_D16_HI", v2i16, load_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_UBYTE_D16_HI", v2i16, az_extloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SBYTE_D16_HI", v2i16, sextloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SHORT_D16_HI", v2f16, load_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_UBYTE_D16_HI", v2f16, az_extloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SBYTE_D16_HI", v2f16, sextloadi8_d16_hi_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2i16, load_d16_lo_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2i16, az_extloadi8_d16_lo_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2i16, sextloadi8_d16_lo_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2f16, load_d16_lo_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2f16, az_extloadi8_d16_lo_private>;
-defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2f16, sextloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SHORT_D16", v2i16, load_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_UBYTE_D16", v2i16, az_extloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SBYTE_D16", v2i16, sextloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SHORT_D16", v2f16, load_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_UBYTE_D16", v2f16, az_extloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<"BUFFER_LOAD_SBYTE_D16", v2f16, sextloadi8_d16_lo_private>;
}
} // End OtherPredicates = [DisableFlatScratch]
multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_st> {
- // Store follows atomic op convention so address is first
def : GCNPat <
- (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset), vt:$val),
+ (atomic_st vt:$val, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset)),
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset)
>;
def : GCNPat <
- (atomic_st (MUBUFOffset v4i32:$rsrc, i32:$soffset, i32:$offset), vt:$val),
+ (atomic_st vt:$val, (MUBUFOffset v4i32:$rsrc, i32:$soffset, i32:$offset)),
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset))
>;
}
@@ -1825,56 +2030,72 @@ defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWOR
} // End Predicates = isGFX6GFX7
-multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
- PatFrag st> {
+multiclass MUBUFStore_PatternOffset_Common <string Instr, ValueType vt,
+ PatFrag st> {
def : GCNPat <
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset)),
- (Instr_OFFSET $vdata, $srsrc, $soffset, $offset)
+ (!cast<MUBUF_Pseudo>(Instr # "_OFFSET") $vdata, $srsrc, $soffset, $offset)
>;
}
-defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, store_global>;
+multiclass MUBUFStore_PatternOffset <string Instr, ValueType vt,
+ PatFrag st> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUFStore_PatternOffset_Common<Instr, vt, st>;
+ }
+ defm : MUBUFStore_PatternOffset_Common<Instr # "_VBUFFER", vt, st>;
+}
+
+defm : MUBUFStore_PatternOffset <"BUFFER_STORE_BYTE", i16, truncstorei8_global>;
+defm : MUBUFStore_PatternOffset <"BUFFER_STORE_SHORT", i16, store_global>;
-multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
- MUBUF_Pseudo InstrOffset,
+multiclass MUBUFScratchStorePat_Common <string Instr,
ValueType vt, PatFrag st,
RegisterClass rc = VGPR_32> {
def : GCNPat <
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, i32:$offset)),
- (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0)
+ (!cast<MUBUF_Pseudo>(Instr # _OFFEN) rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0)
>;
def : GCNPat <
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
i32:$offset)),
- (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0)
+ (!cast<MUBUF_Pseudo>(Instr # _OFFSET) rc:$value, $srsrc, $soffset, $offset, 0, 0)
>;
}
+multiclass MUBUFScratchStorePat <string Instr,
+ ValueType vt, PatFrag st,
+ RegisterClass rc = VGPR_32> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUFScratchStorePat_Common<Instr, vt, st, rc>;
+ }
+ defm : MUBUFScratchStorePat_Common<Instr # "_VBUFFER", vt, st, rc>;
+}
+
let OtherPredicates = [DisableFlatScratch] in {
-defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i32, truncstorei8_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_BYTE", i32, truncstorei8_private>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_SHORT", i32, truncstorei16_private>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_BYTE", i16, truncstorei8_private>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_SHORT", i16, store_private>;
foreach vt = Reg32Types.types in {
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, vt, store_private>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_DWORD", vt, store_private>;
}
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_DWORDX2", v2i32, store_private, VReg_64>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_DWORDX3", v3i32, store_private, VReg_96>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_DWORDX4", v4i32, store_private, VReg_128>;
let OtherPredicates = [HasD16LoadStore, DisableFlatScratch] in {
// Hiding the extract high pattern in the PatFrag seems to not
// automatically increase the complexity.
let AddedComplexity = 1 in {
-defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_D16_HI_OFFEN, BUFFER_STORE_SHORT_D16_HI_OFFSET, i32, store_hi16_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D16_HI_OFFSET, i32, truncstorei8_hi16_private>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_SHORT_D16_HI", i32, store_hi16_private>;
+defm : MUBUFScratchStorePat <"BUFFER_STORE_BYTE_D16_HI", i32, truncstorei8_hi16_private>;
}
}
} // End OtherPredicates = [DisableFlatScratch]
@@ -1887,12 +2108,12 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D
// tbuffer_load/store_format patterns
//===----------------------------------------------------------------------===//
-multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
+multiclass MTBUF_LoadIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
defvar st = !if(!eq(memoryVt, vt), name, mtbuf_intrinsic_load<name, memoryVt>);
def : GCNPat<
- (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(as_i8timm $format),
@@ -1900,7 +2121,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, i32:$vindex, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(as_i8timm $format),
@@ -1908,7 +2129,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(as_i8timm $format),
@@ -1916,7 +2137,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
@@ -1926,6 +2147,14 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
}
+multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
+ string opcode, ValueType memoryVt = vt> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MTBUF_LoadIntrinsicPat_Common<name, vt, opcode, memoryVt>;
+ }
+ defm : MTBUF_LoadIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
+}
+
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, i32, "TBUFFER_LOAD_FORMAT_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2i32, "TBUFFER_LOAD_FORMAT_XY">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3i32, "TBUFFER_LOAD_FORMAT_XYZ">;
@@ -1935,15 +2164,15 @@ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3f32, "TBUFFER_LOAD_FORMAT_XYZ">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
-let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
- defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
- defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
- defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v3i32, "TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80">;
- defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
+let OtherPredicates = [HasUnpackedD16VMem] in {
+ defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
+ defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v3i32, "TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80">;
+ defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
-let SubtargetPredicate = HasPackedD16VMem in {
+let OtherPredicates = [HasPackedD16VMem] in {
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
@@ -1951,12 +2180,12 @@ let SubtargetPredicate = HasPackedD16VMem in {
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4f16, "TBUFFER_LOAD_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
-multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
+multiclass MTBUF_StoreIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
defvar st = !if(!eq(memoryVt, vt), name, mtbuf_intrinsic_store<name, memoryVt>);
def : GCNPat<
- (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, 0, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset,
timm:$offset, (as_i8timm $format),
@@ -1964,7 +2193,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
timm:$offset, (as_i8timm $format),
@@ -1972,7 +2201,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
timm:$offset, (as_i8timm $format),
@@ -1980,7 +2209,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
def : GCNPat<
- (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
+ (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, (BUFSOffset i32:$soffset),
timm:$offset, timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
getVregSrcForVT<vt>.ret:$vdata,
@@ -1990,6 +2219,14 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
>;
}
+multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
+ string opcode, ValueType memoryVt = vt> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MTBUF_StoreIntrinsicPat_Common<name, vt, opcode, memoryVt>;
+ }
+ defm : MTBUF_StoreIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
+}
+
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, i32, "TBUFFER_STORE_FORMAT_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2i32, "TBUFFER_STORE_FORMAT_XY">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3i32, "TBUFFER_STORE_FORMAT_XYZ">;
@@ -1999,15 +2236,15 @@ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY"
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3f32, "TBUFFER_STORE_FORMAT_XYZ">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
-let SubtargetPredicate = HasUnpackedD16VMem in {
- defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
- defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
- defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">;
- defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v3i32, "TBUFFER_STORE_FORMAT_D16_XYZ_gfx80">;
- defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
+let OtherPredicates = [HasUnpackedD16VMem] in {
+ defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">;
+ defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v3i32, "TBUFFER_STORE_FORMAT_D16_XYZ_gfx80">;
+ defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
-let SubtargetPredicate = HasPackedD16VMem in {
+let OtherPredicates = [HasPackedD16VMem] in {
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">;
@@ -2044,6 +2281,7 @@ class MUBUF_Real_gfx11<bits<8> op, MUBUF_Pseudo ps,
let Inst{53} = ps.tfe;
let Inst{54} = ps.offen;
let Inst{55} = ps.idxen;
+ let SubtargetPredicate = isGFX11Only;
}
class Base_MUBUF_Real_Atomic_gfx11<bits<8> op, MUBUF_Pseudo ps,
@@ -2067,15 +2305,98 @@ class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
let Inst{25} = op{7};
+ let SubtargetPredicate = isGFX10Only;
}
class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> :
Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
let Inst{15} = ps.addr64;
+ let SubtargetPredicate = isGFX6GFX7;
+}
+
+//===----------------------------------------------------------------------===//
+// Base ENC_VBUFFER for GFX12.
+//===----------------------------------------------------------------------===//
+
+class VBUFFER_Real <BUF_Pseudo ps, string real_name = ps.Mnemonic> :
+ InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, Enc96 {
+
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+
+ // copy relevant pseudo op flags
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let OtherPredicates = ps.OtherPredicates;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+ let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let SchedRW = ps.SchedRW;
+ let mayLoad = ps.mayLoad;
+ let mayStore = ps.mayStore;
+ let IsAtomicRet = ps.IsAtomicRet;
+ let IsAtomicNoRet = ps.IsAtomicNoRet;
+ let VALU = ps.VALU;
+ let LGKM_CNT = ps.LGKM_CNT;
+
+ bits<24> offset;
+ bits<8> vaddr;
+ bits<10> vdata;
+
+ bits<7> srsrc;
+ bits<7> soffset;
+ bits<6> cpol;
+
+ let Inst{95-72} = !if(ps.has_offset, offset, ?);
+ let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?);
+ let Inst{39-32} = !if(ps.has_vdata, vdata{7-0}, ?);
+
+ let Inst{47-41} = !if(ps.has_srsrc, srsrc, ?);
+ let Inst{49-48} = 0b00;
+ let Inst{6-0} = !if(ps.has_soffset, soffset, ?);
+ let Inst{22} = ps.tfe;
+ let Inst{62} = ps.offen;
+ let Inst{63} = ps.idxen;
+
+ let Inst{54-53} = cpol{2-1}; // th{2-1}
+ let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
+ let Inst{51-50} = cpol{4-3}; // scope
+
+ let Inst{31-26} = 0b110001;
+}
+
+class VBUFFER_MUBUF_Real_gfx12<bits<8> op, MUBUF_Pseudo ps,
+ string real_name = ps.Mnemonic> :
+ VBUFFER_Real<ps, real_name>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX12> {
+
+ let MUBUF = 1;
+
+ // Set the last bit of format to 1 to avoid round-trip issues, as some tools
+ // print BUF_FMT_INVALID for format 0.
+ let Inst{55} = 0b1;
+ let Inst{21-14} = op;
+ let SubtargetPredicate = isGFX12Only;
+}
+
+class VBUFFER_MTBUF_Real_gfx12<bits<4> op, MTBUF_Pseudo ps,
+ string real_name = ps.Mnemonic> :
+ VBUFFER_Real<ps, real_name>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX12> {
+
+ let MTBUF = 1;
+
+ bits<7> format;
+
+ let Inst{17-14} = op;
+ let Inst{21-18} = 0b1000;
+ let Inst{61-55} = format;
}
//===----------------------------------------------------------------------===//
-// MUBUF - GFX11.
+// MUBUF - GFX11, GFX12.
//===----------------------------------------------------------------------===//
// Shortcut to default Mnemonic from MUBUF_Pseudo. Hides the cast to the
@@ -2085,19 +2406,43 @@ class get_MUBUF_ps<string name> {
}
// gfx11 instruction that accept both old and new assembler name.
-class Pre_gfx11_MUBUF_Name <string mnemonic, string real_name> :
+class Mnem_gfx11_gfx12 <string mnemonic, string real_name> :
MnemonicAlias<mnemonic, real_name>, Requires<[isGFX11Plus]>;
+class Mnem_gfx11 <string mnemonic, string real_name> :
+ MnemonicAlias<mnemonic, real_name>, Requires<[isGFX11Only]>;
+
+class Mnem_gfx12 <string mnemonic, string real_name> :
+ MnemonicAlias<mnemonic, real_name>, Requires<[isGFX12Plus]>;
+
class MUBUF_Real_gfx11_impl<bits<8> op, string ps_name, string real_name> :
MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(ps_name), real_name>;
-let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in
+
+class VBUFFER_MUBUF_Real_gfx12_impl<bits<8> op, string ps_name, string real_name> :
+ VBUFFER_MUBUF_Real_gfx12<op, !cast<MUBUF_Pseudo>(ps_name), real_name>;
+
multiclass MUBUF_Real_AllAddr_gfx11_Renamed_Impl2<bits<8> op, string real_name> {
- def _BOTHEN_gfx11 : MUBUF_Real_gfx11_impl<op, NAME # "_BOTHEN", real_name>;
- def _IDXEN_gfx11 : MUBUF_Real_gfx11_impl<op, NAME # "_IDXEN", real_name>;
- def _OFFEN_gfx11 : MUBUF_Real_gfx11_impl<op, NAME # "_OFFEN", real_name>;
- def _OFFSET_gfx11 : MUBUF_Real_gfx11_impl<op, NAME # "_OFFSET", real_name>;
+ let DecoderNamespace = "GFX11" in {
+ def _BOTHEN_gfx11 : MUBUF_Real_gfx11_impl<op, NAME # "_BOTHEN", real_name>;
+ def _IDXEN_gfx11 : MUBUF_Real_gfx11_impl<op, NAME # "_IDXEN", real_name>;
+ def _OFFEN_gfx11 : MUBUF_Real_gfx11_impl<op, NAME # "_OFFEN", real_name>;
+ def _OFFSET_gfx11 : MUBUF_Real_gfx11_impl<op, NAME # "_OFFSET", real_name>;
+ }
}
+multiclass MUBUF_Real_AllAddr_gfx12_Renamed_Impl2<bits<8> op, string real_name> {
+ let DecoderNamespace = "GFX12" in {
+ def _BOTHEN_gfx12 : VBUFFER_MUBUF_Real_gfx12_impl<op, NAME # "_VBUFFER_BOTHEN", real_name>;
+ def _IDXEN_gfx12 : VBUFFER_MUBUF_Real_gfx12_impl<op, NAME # "_VBUFFER_IDXEN", real_name>;
+ def _OFFEN_gfx12 : VBUFFER_MUBUF_Real_gfx12_impl<op, NAME # "_VBUFFER_OFFEN", real_name>;
+ def _OFFSET_gfx12 : VBUFFER_MUBUF_Real_gfx12_impl<op, NAME # "_VBUFFER_OFFSET", real_name>;
+ }
+}
+
+multiclass MUBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl2<bits<8> op, string real_name> :
+ MUBUF_Real_AllAddr_gfx11_Renamed_Impl2<op, real_name>,
+ MUBUF_Real_AllAddr_gfx12_Renamed_Impl2<op, real_name>;
+
multiclass MUBUF_Real_AllAddr_gfx11_Renamed_Impl<bits<8> op, string real_name,
bit hasTFE = 1> {
defm NAME : MUBUF_Real_AllAddr_gfx11_Renamed_Impl2<op, real_name>;
@@ -2105,136 +2450,196 @@ multiclass MUBUF_Real_AllAddr_gfx11_Renamed_Impl<bits<8> op, string real_name,
defm _TFE : MUBUF_Real_AllAddr_gfx11_Renamed_Impl2<op, real_name>;
}
-// Non-renamed, non-atomic gfx11 mubuf instructions.
+multiclass MUBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl<bits<8> op, string real_name,
+ bit hasTFE = 1> {
+ defm NAME : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl2<op, real_name>;
+ if hasTFE then
+ defm _TFE : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl2<op, real_name>;
+}
+
+// Non-renamed, non-atomic gfx11/gfx12 mubuf instructions.
multiclass MUBUF_Real_AllAddr_gfx11<bits<8> op, bit hasTFE = 1> :
MUBUF_Real_AllAddr_gfx11_Renamed_Impl<op, get_MUBUF_ps<NAME>.Mnemonic, hasTFE>;
-multiclass MUBUF_Real_AllAddr_gfx11_Renamed<bits<8> op, string real_name> :
- MUBUF_Real_AllAddr_gfx11_Renamed_Impl<op, real_name> {
- def : Pre_gfx11_MUBUF_Name<get_MUBUF_ps<NAME>.Mnemonic, real_name>;
+multiclass MUBUF_Real_AllAddr_gfx11_gfx12<bits<8> op, bit hasTFE = 1> :
+ MUBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl<op, get_MUBUF_ps<NAME>.Mnemonic, hasTFE>;
+
+multiclass MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<bits<8> op, string real_name> :
+ MUBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl<op, real_name> {
+ def : Mnem_gfx11_gfx12<get_MUBUF_ps<NAME>.Mnemonic, real_name>;
}
class MUBUF_Real_Atomic_gfx11_impl<bits<8> op, string ps_name,
string real_name> :
Base_MUBUF_Real_Atomic_gfx11<op, !cast<MUBUF_Pseudo>(ps_name), real_name>;
-let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in
+
+class MUBUF_Real_Atomic_gfx12_impl<bits<8> op, string ps_name,
+ string real_name> :
+ VBUFFER_MUBUF_Real_gfx12<op, !cast<MUBUF_Pseudo>(ps_name), real_name>;
+
multiclass MUBUF_Real_Atomic_gfx11_Renamed_impl<bits<8> op, bit is_return,
string real_name> {
- defvar Rtn = !if(!eq(is_return, 1), "_RTN", "");
- def _BOTHEN#Rtn#_gfx11 :
- MUBUF_Real_Atomic_gfx11_impl<op, NAME # "_BOTHEN" # Rtn, real_name>,
- AtomicNoRet<NAME # "_BOTHEN_gfx11", is_return>;
- def _IDXEN#Rtn#_gfx11 :
- MUBUF_Real_Atomic_gfx11_impl<op, NAME # "_IDXEN" # Rtn, real_name>,
- AtomicNoRet<NAME # "_IDXEN_gfx11", is_return>;
- def _OFFEN#Rtn#_gfx11 :
- MUBUF_Real_Atomic_gfx11_impl<op, NAME # "_OFFEN" # Rtn, real_name>,
- AtomicNoRet<NAME # "_OFFEN_gfx11", is_return>;
- def _OFFSET#Rtn#_gfx11 :
- MUBUF_Real_Atomic_gfx11_impl<op, NAME # "_OFFSET" # Rtn, real_name>,
- AtomicNoRet<NAME # "_OFFSET_gfx11", is_return>;
-}
-
-// Non-renamed gfx11 mubuf atomic.
-multiclass MUBUF_Real_Atomic_gfx11<bits<8> op> :
- MUBUF_Real_Atomic_gfx11_Renamed_impl<op, 0, get_MUBUF_ps<NAME>.Mnemonic>,
- MUBUF_Real_Atomic_gfx11_Renamed_impl<op, 1, get_MUBUF_ps<NAME>.Mnemonic>;
+ let DecoderNamespace = "GFX11" in {
+ defvar Rtn = !if(!eq(is_return, 1), "_RTN", "");
+ def _BOTHEN#Rtn#_gfx11 :
+ MUBUF_Real_Atomic_gfx11_impl<op, NAME # "_BOTHEN" # Rtn, real_name>,
+ AtomicNoRet<NAME # "_BOTHEN_gfx11", is_return>;
+ def _IDXEN#Rtn#_gfx11 :
+ MUBUF_Real_Atomic_gfx11_impl<op, NAME # "_IDXEN" # Rtn, real_name>,
+ AtomicNoRet<NAME # "_IDXEN_gfx11", is_return>;
+ def _OFFEN#Rtn#_gfx11 :
+ MUBUF_Real_Atomic_gfx11_impl<op, NAME # "_OFFEN" # Rtn, real_name>,
+ AtomicNoRet<NAME # "_OFFEN_gfx11", is_return>;
+ def _OFFSET#Rtn#_gfx11 :
+ MUBUF_Real_Atomic_gfx11_impl<op, NAME # "_OFFSET" # Rtn, real_name>,
+ AtomicNoRet<NAME # "_OFFSET_gfx11", is_return>;
+ }
+}
+
+multiclass MUBUF_Real_Atomic_gfx12_Renamed_impl<bits<8> op, bit is_return,
+ string real_name> {
+ let DecoderNamespace = "GFX12" in {
+ defvar Rtn = !if(!eq(is_return, 1), "_RTN", "");
+ def _BOTHEN#Rtn#_gfx12 :
+ MUBUF_Real_Atomic_gfx12_impl<op, NAME # "_VBUFFER_BOTHEN" # Rtn, real_name>,
+ AtomicNoRet<NAME # "_BOTHEN_gfx12", is_return>;
+ def _IDXEN#Rtn#_gfx12 :
+ MUBUF_Real_Atomic_gfx12_impl<op, NAME # "_VBUFFER_IDXEN" # Rtn, real_name>,
+ AtomicNoRet<NAME # "_IDXEN_gfx12", is_return>;
+ def _OFFEN#Rtn#_gfx12 :
+ MUBUF_Real_Atomic_gfx12_impl<op, NAME # "_VBUFFER_OFFEN" # Rtn, real_name>,
+ AtomicNoRet<NAME # "_OFFEN_gfx12", is_return>;
+ def _OFFSET#Rtn#_gfx12 :
+ MUBUF_Real_Atomic_gfx12_impl<op, NAME # "_VBUFFER_OFFSET" # Rtn, real_name>,
+ AtomicNoRet<NAME # "_OFFSET_gfx12", is_return>;
+ }
+}
+
+multiclass MUBUF_Real_Atomic_gfx11_gfx12_Renamed_impl<bits<8> op, bit is_return,
+ string real_name> :
+ MUBUF_Real_Atomic_gfx11_Renamed_impl<op, is_return, real_name>,
+ MUBUF_Real_Atomic_gfx12_Renamed_impl<op, is_return, real_name>;
+
+// Non-renamed gfx11/gfx12 mubuf atomic.
+multiclass MUBUF_Real_Atomic_gfx11_gfx12<bits<8> op> :
+ MUBUF_Real_Atomic_gfx11_gfx12_Renamed_impl<op, 0, get_MUBUF_ps<NAME>.Mnemonic>,
+ MUBUF_Real_Atomic_gfx11_gfx12_Renamed_impl<op, 1, get_MUBUF_ps<NAME>.Mnemonic>;
+
+multiclass MUBUF_Real_Atomic_gfx12<bits<8> op> :
+ MUBUF_Real_Atomic_gfx12_Renamed_impl<op, 0, get_MUBUF_ps<NAME>.Mnemonic>,
+ MUBUF_Real_Atomic_gfx12_Renamed_impl<op, 1, get_MUBUF_ps<NAME>.Mnemonic>;
multiclass MUBUF_Real_Atomic_gfx11_Renamed<bits<8> op, string real_name> :
MUBUF_Real_Atomic_gfx11_Renamed_impl<op, 0, real_name>,
- MUBUF_Real_Atomic_gfx11_Renamed_impl<op, 1, real_name> {
- def : Pre_gfx11_MUBUF_Name<get_MUBUF_ps<NAME>.Mnemonic, real_name>;
+ MUBUF_Real_Atomic_gfx11_Renamed_impl<op, 1, real_name> {
+ def : Mnem_gfx11_gfx12<get_MUBUF_ps<NAME>.Mnemonic, real_name>;
+}
+
+multiclass MUBUF_Real_Atomic_gfx11_gfx12_Renamed<bits<8> op, string real_name> :
+ MUBUF_Real_Atomic_gfx11_gfx12_Renamed_impl<op, 0, real_name>,
+ MUBUF_Real_Atomic_gfx11_gfx12_Renamed_impl<op, 1, real_name> {
+ def : Mnem_gfx11_gfx12<get_MUBUF_ps<NAME>.Mnemonic, real_name>;
}
-let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
+multiclass MUBUF_Real_Atomic_gfx11_gfx12_Renamed_gfx12_Renamed<bits<8> op, string gfx12_name, string gfx11_name> :
+ MUBUF_Real_Atomic_gfx11_Renamed_impl<op, 0, gfx11_name>,
+ MUBUF_Real_Atomic_gfx11_Renamed_impl<op, 1, gfx11_name>,
+ MUBUF_Real_Atomic_gfx12_Renamed_impl<op, 0, gfx12_name>,
+ MUBUF_Real_Atomic_gfx12_Renamed_impl<op, 1, gfx12_name> {
+ def : Mnem_gfx11<get_MUBUF_ps<NAME>.Mnemonic, gfx11_name>;
+ def : Mnem_gfx12<get_MUBUF_ps<NAME>.Mnemonic, gfx12_name>;
+ def : Mnem_gfx12<gfx11_name, gfx12_name>;
+}
+
+let DecoderNamespace = "GFX11" in {
def BUFFER_GL0_INV_gfx11 : MUBUF_Real_gfx11<0x02B, BUFFER_GL0_INV>;
def BUFFER_GL1_INV_gfx11 : MUBUF_Real_gfx11<0x02C, BUFFER_GL1_INV>;
}
-defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_gfx11_Renamed<0x014, "buffer_load_b32">;
-defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx11_Renamed<0x015, "buffer_load_b64">;
-defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx11_Renamed<0x016, "buffer_load_b96">;
-defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx11_Renamed<0x017, "buffer_load_b128">;
-defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x020, "buffer_load_d16_b16">;
-defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x008, "buffer_load_d16_format_x">;
-defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_Renamed<0x009, "buffer_load_d16_format_xy">;
-defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_Renamed<0x00a, "buffer_load_d16_format_xyz">;
-defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_Renamed<0x00b, "buffer_load_d16_format_xyzw">;
-defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x023, "buffer_load_d16_hi_b16">;
-defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x026, "buffer_load_d16_hi_format_x">;
-defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x022, "buffer_load_d16_hi_i8">;
-defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x021, "buffer_load_d16_hi_u8">;
-defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01f, "buffer_load_d16_i8">;
-defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01e, "buffer_load_d16_u8">;
-defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x000>;
-defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx11<0x001>;
-defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11<0x002>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11<0x003>;
-defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x011, "buffer_load_i8">;
-defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x013, "buffer_load_i16">;
-defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x010, "buffer_load_u8">;
-defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x012, "buffer_load_u16">;
+defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x014, "buffer_load_b32">;
+defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x015, "buffer_load_b64">;
+defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x016, "buffer_load_b96">;
+defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x017, "buffer_load_b128">;
+defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x020, "buffer_load_d16_b16">;
+defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x008, "buffer_load_d16_format_x">;
+defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x009, "buffer_load_d16_format_xy">;
+defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00a, "buffer_load_d16_format_xyz">;
+defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00b, "buffer_load_d16_format_xyzw">;
+defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x023, "buffer_load_d16_hi_b16">;
+defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x026, "buffer_load_d16_hi_format_x">;
+defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x022, "buffer_load_d16_hi_i8">;
+defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x021, "buffer_load_d16_hi_u8">;
+defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x01f, "buffer_load_d16_i8">;
+defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x01e, "buffer_load_d16_u8">;
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_gfx11_gfx12<0x000>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx11_gfx12<0x001>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11_gfx12<0x002>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11_gfx12<0x003>;
+defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x011, "buffer_load_i8">;
+defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x013, "buffer_load_i16">;
+defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x010, "buffer_load_u8">;
+defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x012, "buffer_load_u16">;
defm BUFFER_LOAD_LDS_B32 : MUBUF_Real_AllAddr_gfx11<0x031, 0>;
defm BUFFER_LOAD_LDS_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x032, 0>;
defm BUFFER_LOAD_LDS_I8 : MUBUF_Real_AllAddr_gfx11<0x02e, 0>;
defm BUFFER_LOAD_LDS_I16 : MUBUF_Real_AllAddr_gfx11<0x030, 0>;
defm BUFFER_LOAD_LDS_U8 : MUBUF_Real_AllAddr_gfx11<0x02d, 0>;
defm BUFFER_LOAD_LDS_U16 : MUBUF_Real_AllAddr_gfx11<0x02f, 0>;
-defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x018, "buffer_store_b8">;
-defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x019, "buffer_store_b16">;
-defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx11_Renamed<0x01A, "buffer_store_b32">;
-defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01B, "buffer_store_b64">;
-defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01C, "buffer_store_b96">;
-defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01D, "buffer_store_b128">;
-defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x00C, "buffer_store_d16_format_x">;
-defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_Renamed<0x00D, "buffer_store_d16_format_xy">;
-defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_Renamed<0x00E, "buffer_store_d16_format_xyz">;
-defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_Renamed<0x00F, "buffer_store_d16_format_xyzw">;
-defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x024, "buffer_store_d16_hi_b8">;
-defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x025, "buffer_store_d16_hi_b16">;
-defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x027, "buffer_store_d16_hi_format_x">;
-defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x004>;
-defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx11<0x005>;
-defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11<0x006>;
-defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11<0x007>;
-defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_gfx11<0x056>;
-defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_gfx11_Renamed<0x035, "buffer_atomic_add_u32">;
-defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x043, "buffer_atomic_add_u64">;
-defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_gfx11_Renamed<0x03C, "buffer_atomic_and_b32">;
-defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x049, "buffer_atomic_and_b64">;
-defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_gfx11_Renamed<0x034, "buffer_atomic_cmpswap_b32">;
-defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x042, "buffer_atomic_cmpswap_b64">;
+defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x018, "buffer_store_b8">;
+defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x019, "buffer_store_b16">;
+defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x01A, "buffer_store_b32">;
+defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x01B, "buffer_store_b64">;
+defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x01C, "buffer_store_b96">;
+defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x01D, "buffer_store_b128">;
+defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00C, "buffer_store_d16_format_x">;
+defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00D, "buffer_store_d16_format_xy">;
+defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00E, "buffer_store_d16_format_xyz">;
+defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00F, "buffer_store_d16_format_xyzw">;
+defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x024, "buffer_store_d16_hi_b8">;
+defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x025, "buffer_store_d16_hi_b16">;
+defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x027, "buffer_store_d16_hi_format_x">;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx11_gfx12<0x004>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx11_gfx12<0x005>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11_gfx12<0x006>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11_gfx12<0x007>;
+defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_gfx11_gfx12<0x056>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x035, "buffer_atomic_add_u32">;
+defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x043, "buffer_atomic_add_u64">;
+defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x03C, "buffer_atomic_and_b32">;
+defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x049, "buffer_atomic_and_b64">;
+defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x034, "buffer_atomic_cmpswap_b32">;
+defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x042, "buffer_atomic_cmpswap_b64">;
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_gfx11_Renamed<0x050, "buffer_atomic_cmpswap_f32">;
-defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomic_gfx11_Renamed_impl<0x037, 1, "buffer_atomic_csub_u32">;
-def : Pre_gfx11_MUBUF_Name<"buffer_atomic_csub", "buffer_atomic_csub_u32">;
-defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_gfx11_Renamed<0x040, "buffer_atomic_dec_u32">;
-defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x04D, "buffer_atomic_dec_u64">;
-defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_gfx11_Renamed<0x03F, "buffer_atomic_inc_u32">;
-defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x04C, "buffer_atomic_inc_u64">;
-defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomic_gfx11_Renamed<0x052, "buffer_atomic_max_f32">;
-defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_gfx11_Renamed<0x03A, "buffer_atomic_max_i32">;
-defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x047, "buffer_atomic_max_i64">;
-defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_gfx11_Renamed<0x03B, "buffer_atomic_max_u32">;
-defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x048, "buffer_atomic_max_u64">;
-defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomic_gfx11_Renamed<0x051, "buffer_atomic_min_f32">;
-defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_gfx11_Renamed<0x038, "buffer_atomic_min_i32">;
-defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x045, "buffer_atomic_min_i64">;
-defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_gfx11_Renamed<0x039, "buffer_atomic_min_u32">;
-defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x046, "buffer_atomic_min_u64">;
-defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_gfx11_Renamed<0x03D, "buffer_atomic_or_b32">;
-defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x04A, "buffer_atomic_or_b64">;
-defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_gfx11_Renamed<0x036, "buffer_atomic_sub_u32">;
-defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x044, "buffer_atomic_sub_u64">;
-defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_gfx11_Renamed<0x033, "buffer_atomic_swap_b32">;
-defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x041, "buffer_atomic_swap_b64">;
-defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_gfx11_Renamed<0x03E, "buffer_atomic_xor_b32">;
-defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x04B, "buffer_atomic_xor_b64">;
+defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomic_gfx11_gfx12_Renamed_gfx12_Renamed<0x037, "buffer_atomic_sub_clamp_u32", "buffer_atomic_csub_u32">;
+def : Mnem_gfx11_gfx12<"buffer_atomic_csub", "buffer_atomic_csub_u32">;
+defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x040, "buffer_atomic_dec_u32">;
+defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x04D, "buffer_atomic_dec_u64">;
+defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x03F, "buffer_atomic_inc_u32">;
+defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x04C, "buffer_atomic_inc_u64">;
+defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomic_gfx11_gfx12_Renamed_gfx12_Renamed<0x052, "buffer_atomic_max_num_f32", "buffer_atomic_max_f32">;
+defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x03A, "buffer_atomic_max_i32">;
+defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x047, "buffer_atomic_max_i64">;
+defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x03B, "buffer_atomic_max_u32">;
+defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x048, "buffer_atomic_max_u64">;
+defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomic_gfx11_gfx12_Renamed_gfx12_Renamed<0x051, "buffer_atomic_min_num_f32", "buffer_atomic_min_f32">;
+defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x038, "buffer_atomic_min_i32">;
+defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x045, "buffer_atomic_min_i64">;
+defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x039, "buffer_atomic_min_u32">;
+defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x046, "buffer_atomic_min_u64">;
+defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x03D, "buffer_atomic_or_b32">;
+defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x04A, "buffer_atomic_or_b64">;
+defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x036, "buffer_atomic_sub_u32">;
+defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x044, "buffer_atomic_sub_u64">;
+defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x033, "buffer_atomic_swap_b32">;
+defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x041, "buffer_atomic_swap_b64">;
+defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x03E, "buffer_atomic_xor_b32">;
+defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x04B, "buffer_atomic_xor_b64">;
//===----------------------------------------------------------------------===//
// MUBUF - GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
+let DecoderNamespace = "GFX10" in {
multiclass MUBUF_Real_AllAddr_Helper_gfx10<bits<8> op> {
def _BOTHEN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
@@ -2291,7 +2696,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
AtomicNoRet<NAME # "_OFFSET_gfx10", 0>;
}
-} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
+} // End DecoderNamespace = "GFX10"
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>;
@@ -2477,7 +2882,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
-defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_RTN_gfx10<0x034>;
+defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_gfx10<0x034>;
defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
@@ -2524,47 +2929,59 @@ class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
// MTBUF - GFX11.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in
-multiclass MTBUF_Real_AllAddr_gfx11_Renamed_Impl<bits<4> op, string real_name> {
- def _BOTHEN_gfx11 :
- Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN"), real_name>;
- def _IDXEN_gfx11 :
- Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN"), real_name>;
- def _OFFEN_gfx11 :
- Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN"), real_name>;
- def _OFFSET_gfx11 :
- Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET"), real_name>;
+multiclass MTBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl<bits<4> op, string real_name> {
+ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
+ def _BOTHEN_gfx11 :
+ Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN"), real_name>;
+ def _IDXEN_gfx11 :
+ Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN"), real_name>;
+ def _OFFEN_gfx11 :
+ Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN"), real_name>;
+ def _OFFSET_gfx11 :
+ Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET"), real_name>;
+ }
+
+ let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in {
+ def _BOTHEN_gfx12 :
+ VBUFFER_MTBUF_Real_gfx12<op, !cast<MTBUF_Pseudo>(NAME#"_VBUFFER_BOTHEN"), real_name>;
+ def _IDXEN_gfx12 :
+ VBUFFER_MTBUF_Real_gfx12<op, !cast<MTBUF_Pseudo>(NAME#"_VBUFFER_IDXEN"), real_name>;
+ def _OFFEN_gfx12 :
+ VBUFFER_MTBUF_Real_gfx12<op, !cast<MTBUF_Pseudo>(NAME#"_VBUFFER_OFFEN"), real_name>;
+ def _OFFSET_gfx12 :
+ VBUFFER_MTBUF_Real_gfx12<op, !cast<MTBUF_Pseudo>(NAME#"_VBUFFER_OFFSET"), real_name>;
+ }
}
-multiclass MTBUF_Real_AllAddr_gfx11_Impl<bits<4> op, MTBUF_Pseudo ps>
- : MTBUF_Real_AllAddr_gfx11_Renamed_Impl<op, ps.Mnemonic>;
-multiclass MTBUF_Real_AllAddr_gfx11<bits<4> op>
- : MTBUF_Real_AllAddr_gfx11_Impl<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+multiclass MTBUF_Real_AllAddr_gfx11_gfx12_Impl<bits<4> op, MTBUF_Pseudo ps>
+ : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl<op, ps.Mnemonic>;
+multiclass MTBUF_Real_AllAddr_gfx11_gfx12<bits<4> op>
+ : MTBUF_Real_AllAddr_gfx11_gfx12_Impl<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
class Pre_gfx11_MTBUF_Name <MTBUF_Pseudo ps, string real_name>
: MnemonicAlias<ps.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
-multiclass MTBUF_Real_AllAddr_gfx11_Renamed<bits<4> op, string real_name>
- : MTBUF_Real_AllAddr_gfx11_Renamed_Impl<op, real_name> {
+multiclass MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<bits<4> op, string real_name>
+ : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed_Impl<op, real_name> {
def : Pre_gfx11_MTBUF_Name<!cast<MTBUF_Pseudo>(NAME#"_BOTHEN"), real_name>;
}
-defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_Renamed<0x008, "tbuffer_load_d16_format_x">;
-defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_Renamed<0x009, "tbuffer_load_d16_format_xy">;
-defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_Renamed<0x00a, "tbuffer_load_d16_format_xyz">;
-defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_Renamed<0x00b, "tbuffer_load_d16_format_xyzw">;
-defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx11<0x000>;
-defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx11<0x001>;
-defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11<0x002>;
-defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11<0x003>;
-defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_Renamed<0x00c, "tbuffer_store_d16_format_x">;
-defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_Renamed<0x00d, "tbuffer_store_d16_format_xy">;
-defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_Renamed<0x00e, "tbuffer_store_d16_format_xyz">;
-defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_Renamed<0x00f, "tbuffer_store_d16_format_xyzw">;
-defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx11<0x004>;
-defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx11<0x005>;
-defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11<0x006>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11<0x007>;
+defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x008, "tbuffer_load_d16_format_x">;
+defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x009, "tbuffer_load_d16_format_xy">;
+defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00a, "tbuffer_load_d16_format_xyz">;
+defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00b, "tbuffer_load_d16_format_xyzw">;
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx11_gfx12<0x000>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx11_gfx12<0x001>;
+defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11_gfx12<0x002>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11_gfx12<0x003>;
+defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00c, "tbuffer_store_d16_format_x">;
+defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00d, "tbuffer_store_d16_format_xy">;
+defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00e, "tbuffer_store_d16_format_xyz">;
+defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_gfx12_Renamed<0x00f, "tbuffer_store_d16_format_xyzw">;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx11_gfx12<0x004>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx11_gfx12<0x005>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11_gfx12<0x006>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11_gfx12<0x007>;
//===----------------------------------------------------------------------===//
// MTBUF - GFX10.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
index 85a3f763cd5a..3a895923fa4b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -12,6 +12,7 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
let LGKM_CNT = 1;
let DS = 1;
+ let GWS = 0;
let Size = 8;
let UseNamedOperandTable = 1;
@@ -61,6 +62,7 @@ class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
let UseNamedOperandTable = 1;
// copy relevant pseudo op flags
+ let GWS = ps.GWS;
let SubtargetPredicate = ps.SubtargetPredicate;
let OtherPredicates = ps.OtherPredicates;
let SchedRW = ps.SchedRW;
@@ -376,6 +378,7 @@ multiclass DS_1A_mc <string opName> {
class DS_GWS <string opName, dag ins, string asmOps>
: DS_Pseudo<opName, (outs), ins, asmOps> {
+ let GWS = 1;
let has_vdst = 0;
let has_addr = 0;
@@ -708,18 +711,34 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
//===----------------------------------------------------------------------===//
-// Instruction definitions for GFX11 and newer.
+// Instruction definitions for GFX11.
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isGFX11Plus in {
+let SubtargetPredicate = isGFX11Only in {
def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>;
def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;
+
+} // let SubtargetPredicate = isGFX11Only
+
+let SubtargetPredicate = isGFX11Plus in {
+
def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;
} // let SubtargetPredicate = isGFX11Plus
//===----------------------------------------------------------------------===//
+// Instruction definitions for GFX12 and newer.
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isGFX12Plus in {
+
+defm DS_SUB_CLAMP_U32 : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
+defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32, "ds_sub_clamp_u32">;
+
+} // let SubtargetPredicate = isGFX12Plus
+
+//===----------------------------------------------------------------------===//
// DS Patterns
//===----------------------------------------------------------------------===//
@@ -803,23 +822,6 @@ multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
}
}
-// Irritatingly, atomic_store reverses the order of operands from a
-// normal store.
-class DSAtomicWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
- (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 0))
->;
-
-multiclass DSAtomicWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
- let OtherPredicates = [LDSRequiresM0Init] in {
- def : DSAtomicWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
- }
-
- let OtherPredicates = [NotLDSRequiresM0Init] in {
- def : DSAtomicWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
- }
-}
-
defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
@@ -829,12 +831,12 @@ foreach vt = Reg32Types.types in {
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
}
-defm : DSAtomicWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
+defm : DSWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
+defm : DSWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
+defm : DSWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
+defm : DSWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
+defm : DSWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
+defm : DSWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
let OtherPredicates = [HasD16LoadStore] in {
def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
@@ -969,8 +971,10 @@ multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
!cast<PatFrag>(frag#"_local_"#vt.Size)>;
}
- def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
- /* complexity */ 0, /* gds */ 1>;
+ let OtherPredicates = [HasGDS] in {
+ def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ }
}
multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
@@ -989,12 +993,14 @@ multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
}
- def : DSAtomicRetPat<inst, vt,
- !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
- /* complexity */ 0, /* gds */ 1>;
- def : DSAtomicRetPat<noRetInst, vt,
- !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
- /* complexity */ 1, /* gds */ 1>;
+ let OtherPredicates = [HasGDS] in {
+ def : DSAtomicRetPat<inst, vt,
+ !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ def : DSAtomicRetPat<noRetInst, vt,
+ !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+ /* complexity */ 1, /* gds */ 1>;
+ }
}
@@ -1024,10 +1030,12 @@ multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueT
/* complexity */ 1>;
}
- def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
- /* complexity */ 0, /* gds */ 1>;
- def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
- /* complexity */ 1, /* gds */ 1>;
+ let OtherPredicates = [HasGDS] in {
+ def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+ /* complexity */ 1, /* gds */ 1>;
+ }
}
} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
@@ -1047,10 +1055,12 @@ multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
- def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
- /* complexity */ 0, /* gds */ 1>;
- def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
- /* complexity */ 1, /* gds */ 1>;
+ let OtherPredicates = [HasGDS] in {
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+ /* complexity */ 1, /* gds */ 1>;
+ }
}
} // End SubtargetPredicate = isGFX11Plus
@@ -1175,11 +1185,12 @@ def : GCNPat <
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Base ENC_DS for GFX6, GFX7, GFX10, GFX11.
+// Base ENC_DS for GFX6, GFX7, GFX10, GFX11, GFX12.
//===----------------------------------------------------------------------===//
-class Base_DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op, DS_Pseudo ps, int ef, string opName = ps.Mnemonic> :
- DS_Real<ps, opName>, SIMCInstr <ps.Mnemonic, ef> {
+class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef,
+ string opName = ps.Mnemonic>
+ : DS_Real<ps, opName>, SIMCInstr <ps.Mnemonic, ef> {
let Inst{7-0} = !if(ps.has_offset0, offset0, 0);
let Inst{15-8} = !if(ps.has_offset1, offset1, 0);
@@ -1193,74 +1204,117 @@ class Base_DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op, DS_Pseudo ps, int ef, strin
}
//===----------------------------------------------------------------------===//
+// GFX12.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in {
+ multiclass DS_Real_gfx12<bits<8> op> {
+ defvar ps = !cast<DS_Pseudo>(NAME);
+ def _gfx12 :
+ Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX12,
+ ps.Mnemonic>;
+ }
+
+ multiclass DS_Real_Renamed_gfx12<bits<8> op, DS_Pseudo backing_pseudo,
+ string real_name> {
+ def _gfx12 :
+ Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, backing_pseudo,
+ SIEncodingFamily.GFX12,
+ real_name>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>,
+ Requires<[isGFX12Plus]>;
+ }
+} // End AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12"
+
+defm DS_MIN_NUM_F32 : DS_Real_Renamed_gfx12<0x012, DS_MIN_F32, "ds_min_num_f32">;
+defm DS_MAX_NUM_F32 : DS_Real_Renamed_gfx12<0x013, DS_MAX_F32, "ds_max_num_f32">;
+defm DS_MIN_NUM_RTN_F32 : DS_Real_Renamed_gfx12<0x032, DS_MIN_RTN_F32, "ds_min_num_rtn_f32">;
+defm DS_MAX_NUM_RTN_F32 : DS_Real_Renamed_gfx12<0x033, DS_MAX_RTN_F32, "ds_max_num_rtn_f32">;
+defm DS_MIN_NUM_F64 : DS_Real_Renamed_gfx12<0x052, DS_MIN_F64, "ds_min_num_f64">;
+defm DS_MAX_NUM_F64 : DS_Real_Renamed_gfx12<0x053, DS_MAX_F64, "ds_max_num_f64">;
+defm DS_MIN_NUM_RTN_F64 : DS_Real_Renamed_gfx12<0x072, DS_MIN_RTN_F64, "ds_min_num_rtn_f64">;
+defm DS_MAX_NUM_RTN_F64 : DS_Real_Renamed_gfx12<0x073, DS_MAX_RTN_F64, "ds_max_num_rtn_f64">;
+defm DS_SUB_CLAMP_U32 : DS_Real_gfx12<0x099>;
+defm DS_SUB_CLAMP_RTN_U32 : DS_Real_gfx12<0x0a9>;
+
+//===----------------------------------------------------------------------===//
// GFX11.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11" in {
+let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
multiclass DS_Real_gfx11<bits<8> op> {
- def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, !cast<DS_Pseudo>(NAME),
+ def _gfx11 :
+ Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, !cast<DS_Pseudo>(NAME),
SIEncodingFamily.GFX11>;
}
multiclass DS_Real_Renamed_gfx11<bits<8> op, DS_Pseudo backing_pseudo, string real_name> {
- def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, backing_pseudo, SIEncodingFamily.GFX11, real_name>,
- MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
+ def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, backing_pseudo, SIEncodingFamily.GFX11, real_name>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Only]>;
}
-} // End AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11"
-
-defm DS_STORE_B32 : DS_Real_Renamed_gfx11<0x00d, DS_WRITE_B32, "ds_store_b32">;
-defm DS_STORE_2ADDR_B32 : DS_Real_Renamed_gfx11<0x00e, DS_WRITE2_B32, "ds_store_2addr_b32">;
-defm DS_STORE_2ADDR_STRIDE64_B32 : DS_Real_Renamed_gfx11<0x00f, DS_WRITE2ST64_B32, "ds_store_2addr_stride64_b32">;
-defm DS_STORE_B8 : DS_Real_Renamed_gfx11<0x01e, DS_WRITE_B8, "ds_store_b8">;
-defm DS_STORE_B16 : DS_Real_Renamed_gfx11<0x01f, DS_WRITE_B16, "ds_store_b16">;
-defm DS_STOREXCHG_RTN_B32 : DS_Real_Renamed_gfx11<0x02d, DS_WRXCHG_RTN_B32, "ds_storexchg_rtn_b32">;
-defm DS_STOREXCHG_2ADDR_RTN_B32 : DS_Real_Renamed_gfx11<0x02e, DS_WRXCHG2_RTN_B32, "ds_storexchg_2addr_rtn_b32">;
-defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B32 : DS_Real_Renamed_gfx11<0x02f, DS_WRXCHG2ST64_RTN_B32, "ds_storexchg_2addr_stride64_rtn_b32">;
-defm DS_LOAD_B32 : DS_Real_Renamed_gfx11<0x036, DS_READ_B32, "ds_load_b32">;
-defm DS_LOAD_2ADDR_B32 : DS_Real_Renamed_gfx11<0x037, DS_READ2_B32, "ds_load_2addr_b32">;
-defm DS_LOAD_2ADDR_STRIDE64_B32 : DS_Real_Renamed_gfx11<0x038, DS_READ2ST64_B32, "ds_load_2addr_stride64_b32">;
-defm DS_LOAD_I8 : DS_Real_Renamed_gfx11<0x039, DS_READ_I8, "ds_load_i8">;
-defm DS_LOAD_U8 : DS_Real_Renamed_gfx11<0x03a, DS_READ_U8, "ds_load_u8">;
-defm DS_LOAD_I16 : DS_Real_Renamed_gfx11<0x03b, DS_READ_I16, "ds_load_i16">;
-defm DS_LOAD_U16 : DS_Real_Renamed_gfx11<0x03c, DS_READ_U16, "ds_load_u16">;
-defm DS_STORE_B64 : DS_Real_Renamed_gfx11<0x04d, DS_WRITE_B64, "ds_store_b64">;
-defm DS_STORE_2ADDR_B64 : DS_Real_Renamed_gfx11<0x04e, DS_WRITE2_B64, "ds_store_2addr_b64">;
-defm DS_STORE_2ADDR_STRIDE64_B64 : DS_Real_Renamed_gfx11<0x04f, DS_WRITE2ST64_B64, "ds_store_2addr_stride64_b64">;
-defm DS_STOREXCHG_RTN_B64 : DS_Real_Renamed_gfx11<0x06d, DS_WRXCHG_RTN_B64, "ds_storexchg_rtn_b64">;
-defm DS_STOREXCHG_2ADDR_RTN_B64 : DS_Real_Renamed_gfx11<0x06e, DS_WRXCHG2_RTN_B64, "ds_storexchg_2addr_rtn_b64">;
-defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B64 : DS_Real_Renamed_gfx11<0x06f, DS_WRXCHG2ST64_RTN_B64, "ds_storexchg_2addr_stride64_rtn_b64">;
-defm DS_LOAD_B64 : DS_Real_Renamed_gfx11<0x076, DS_READ_B64, "ds_load_b64">;
-defm DS_LOAD_2ADDR_B64 : DS_Real_Renamed_gfx11<0x077, DS_READ2_B64, "ds_load_2addr_b64">;
-defm DS_LOAD_2ADDR_STRIDE64_B64 : DS_Real_Renamed_gfx11<0x078, DS_READ2ST64_B64, "ds_load_2addr_stride64_b64">;
-defm DS_STORE_B8_D16_HI : DS_Real_Renamed_gfx11<0x0a0, DS_WRITE_B8_D16_HI, "ds_store_b8_d16_hi">;
-defm DS_STORE_B16_D16_HI : DS_Real_Renamed_gfx11<0x0a1, DS_WRITE_B16_D16_HI, "ds_store_b16_d16_hi">;
-defm DS_LOAD_U8_D16 : DS_Real_Renamed_gfx11<0x0a2, DS_READ_U8_D16, "ds_load_u8_d16">;
-defm DS_LOAD_U8_D16_HI : DS_Real_Renamed_gfx11<0x0a3, DS_READ_U8_D16_HI, "ds_load_u8_d16_hi">;
-defm DS_LOAD_I8_D16 : DS_Real_Renamed_gfx11<0x0a4, DS_READ_I8_D16, "ds_load_i8_d16">;
-defm DS_LOAD_I8_D16_HI : DS_Real_Renamed_gfx11<0x0a5, DS_READ_I8_D16_HI, "ds_load_i8_d16_hi">;
-defm DS_LOAD_U16_D16 : DS_Real_Renamed_gfx11<0x0a6, DS_READ_U16_D16, "ds_load_u16_d16">;
-defm DS_LOAD_U16_D16_HI : DS_Real_Renamed_gfx11<0x0a7, DS_READ_U16_D16_HI, "ds_load_u16_d16_hi">;
-defm DS_STORE_ADDTID_B32 : DS_Real_Renamed_gfx11<0x0b0, DS_WRITE_ADDTID_B32, "ds_store_addtid_b32">;
-defm DS_LOAD_ADDTID_B32 : DS_Real_Renamed_gfx11<0x0b1, DS_READ_ADDTID_B32, "ds_load_addtid_b32">;
-defm DS_STORE_B96 : DS_Real_Renamed_gfx11<0x0de, DS_WRITE_B96, "ds_store_b96">;
-defm DS_STORE_B128 : DS_Real_Renamed_gfx11<0x0df, DS_WRITE_B128, "ds_store_b128">;
-defm DS_LOAD_B96 : DS_Real_Renamed_gfx11<0x0fe, DS_READ_B96, "ds_load_b96">;
-defm DS_LOAD_B128 : DS_Real_Renamed_gfx11<0x0ff, DS_READ_B128, "ds_load_b128">;
+} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
+
+multiclass DS_Real_gfx11_gfx12<bits<8> op>
+ : DS_Real_gfx11<op>, DS_Real_gfx12<op>;
+
+multiclass DS_Real_Renamed_gfx11_gfx12<bits<8> op, DS_Pseudo backing_pseudo,
+ string real_name>
+ : DS_Real_Renamed_gfx11<op, backing_pseudo, real_name>,
+ DS_Real_Renamed_gfx12<op, backing_pseudo, real_name>;
+
+defm DS_STORE_B32 : DS_Real_Renamed_gfx11_gfx12<0x00d, DS_WRITE_B32, "ds_store_b32">;
+defm DS_STORE_2ADDR_B32 : DS_Real_Renamed_gfx11_gfx12<0x00e, DS_WRITE2_B32, "ds_store_2addr_b32">;
+defm DS_STORE_2ADDR_STRIDE64_B32 : DS_Real_Renamed_gfx11_gfx12<0x00f, DS_WRITE2ST64_B32, "ds_store_2addr_stride64_b32">;
+defm DS_STORE_B8 : DS_Real_Renamed_gfx11_gfx12<0x01e, DS_WRITE_B8, "ds_store_b8">;
+defm DS_STORE_B16 : DS_Real_Renamed_gfx11_gfx12<0x01f, DS_WRITE_B16, "ds_store_b16">;
+defm DS_STOREXCHG_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02d, DS_WRXCHG_RTN_B32, "ds_storexchg_rtn_b32">;
+defm DS_STOREXCHG_2ADDR_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02e, DS_WRXCHG2_RTN_B32, "ds_storexchg_2addr_rtn_b32">;
+defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02f, DS_WRXCHG2ST64_RTN_B32, "ds_storexchg_2addr_stride64_rtn_b32">;
+defm DS_LOAD_B32 : DS_Real_Renamed_gfx11_gfx12<0x036, DS_READ_B32, "ds_load_b32">;
+defm DS_LOAD_2ADDR_B32 : DS_Real_Renamed_gfx11_gfx12<0x037, DS_READ2_B32, "ds_load_2addr_b32">;
+defm DS_LOAD_2ADDR_STRIDE64_B32 : DS_Real_Renamed_gfx11_gfx12<0x038, DS_READ2ST64_B32, "ds_load_2addr_stride64_b32">;
+defm DS_LOAD_I8 : DS_Real_Renamed_gfx11_gfx12<0x039, DS_READ_I8, "ds_load_i8">;
+defm DS_LOAD_U8 : DS_Real_Renamed_gfx11_gfx12<0x03a, DS_READ_U8, "ds_load_u8">;
+defm DS_LOAD_I16 : DS_Real_Renamed_gfx11_gfx12<0x03b, DS_READ_I16, "ds_load_i16">;
+defm DS_LOAD_U16 : DS_Real_Renamed_gfx11_gfx12<0x03c, DS_READ_U16, "ds_load_u16">;
+defm DS_STORE_B64 : DS_Real_Renamed_gfx11_gfx12<0x04d, DS_WRITE_B64, "ds_store_b64">;
+defm DS_STORE_2ADDR_B64 : DS_Real_Renamed_gfx11_gfx12<0x04e, DS_WRITE2_B64, "ds_store_2addr_b64">;
+defm DS_STORE_2ADDR_STRIDE64_B64 : DS_Real_Renamed_gfx11_gfx12<0x04f, DS_WRITE2ST64_B64, "ds_store_2addr_stride64_b64">;
+defm DS_STOREXCHG_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06d, DS_WRXCHG_RTN_B64, "ds_storexchg_rtn_b64">;
+defm DS_STOREXCHG_2ADDR_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06e, DS_WRXCHG2_RTN_B64, "ds_storexchg_2addr_rtn_b64">;
+defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06f, DS_WRXCHG2ST64_RTN_B64, "ds_storexchg_2addr_stride64_rtn_b64">;
+defm DS_LOAD_B64 : DS_Real_Renamed_gfx11_gfx12<0x076, DS_READ_B64, "ds_load_b64">;
+defm DS_LOAD_2ADDR_B64 : DS_Real_Renamed_gfx11_gfx12<0x077, DS_READ2_B64, "ds_load_2addr_b64">;
+defm DS_LOAD_2ADDR_STRIDE64_B64 : DS_Real_Renamed_gfx11_gfx12<0x078, DS_READ2ST64_B64, "ds_load_2addr_stride64_b64">;
+defm DS_STORE_B8_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a0, DS_WRITE_B8_D16_HI, "ds_store_b8_d16_hi">;
+defm DS_STORE_B16_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a1, DS_WRITE_B16_D16_HI, "ds_store_b16_d16_hi">;
+defm DS_LOAD_U8_D16 : DS_Real_Renamed_gfx11_gfx12<0x0a2, DS_READ_U8_D16, "ds_load_u8_d16">;
+defm DS_LOAD_U8_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a3, DS_READ_U8_D16_HI, "ds_load_u8_d16_hi">;
+defm DS_LOAD_I8_D16 : DS_Real_Renamed_gfx11_gfx12<0x0a4, DS_READ_I8_D16, "ds_load_i8_d16">;
+defm DS_LOAD_I8_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a5, DS_READ_I8_D16_HI, "ds_load_i8_d16_hi">;
+defm DS_LOAD_U16_D16 : DS_Real_Renamed_gfx11_gfx12<0x0a6, DS_READ_U16_D16, "ds_load_u16_d16">;
+defm DS_LOAD_U16_D16_HI : DS_Real_Renamed_gfx11_gfx12<0x0a7, DS_READ_U16_D16_HI, "ds_load_u16_d16_hi">;
+defm DS_STORE_ADDTID_B32 : DS_Real_Renamed_gfx11_gfx12<0x0b0, DS_WRITE_ADDTID_B32, "ds_store_addtid_b32">;
+defm DS_LOAD_ADDTID_B32 : DS_Real_Renamed_gfx11_gfx12<0x0b1, DS_READ_ADDTID_B32, "ds_load_addtid_b32">;
+defm DS_STORE_B96 : DS_Real_Renamed_gfx11_gfx12<0x0de, DS_WRITE_B96, "ds_store_b96">;
+defm DS_STORE_B128 : DS_Real_Renamed_gfx11_gfx12<0x0df, DS_WRITE_B128, "ds_store_b128">;
+defm DS_LOAD_B96 : DS_Real_Renamed_gfx11_gfx12<0x0fe, DS_READ_B96, "ds_load_b96">;
+defm DS_LOAD_B128 : DS_Real_Renamed_gfx11_gfx12<0x0ff, DS_READ_B128, "ds_load_b128">;
// DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped
// comparing to pre-GFX11.
// Note: the mnemonic alias is not generated to avoid a potential ambiguity due to the semantics change.
-defm DS_CMPSTORE_B32 : DS_Real_gfx11<0x010>;
+defm DS_CMPSTORE_B32 : DS_Real_gfx11_gfx12<0x010>;
defm DS_CMPSTORE_F32 : DS_Real_gfx11<0x011>;
-defm DS_CMPSTORE_RTN_B32 : DS_Real_gfx11<0x030>;
+defm DS_CMPSTORE_RTN_B32 : DS_Real_gfx11_gfx12<0x030>;
defm DS_CMPSTORE_RTN_F32 : DS_Real_gfx11<0x031>;
-defm DS_CMPSTORE_B64 : DS_Real_gfx11<0x050>;
+defm DS_CMPSTORE_B64 : DS_Real_gfx11_gfx12<0x050>;
defm DS_CMPSTORE_F64 : DS_Real_gfx11<0x051>;
-defm DS_CMPSTORE_RTN_B64 : DS_Real_gfx11<0x070>;
+defm DS_CMPSTORE_RTN_B64 : DS_Real_gfx11_gfx12<0x070>;
defm DS_CMPSTORE_RTN_F64 : DS_Real_gfx11<0x071>;
-defm DS_ADD_RTN_F32 : DS_Real_gfx11<0x079>;
+defm DS_ADD_RTN_F32 : DS_Real_gfx11_gfx12<0x079>;
defm DS_ADD_GS_REG_RTN : DS_Real_gfx11<0x07a>;
defm DS_SUB_GS_REG_RTN : DS_Real_gfx11<0x07b>;
defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx11<0x0ad>;
@@ -1271,8 +1325,8 @@ defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx11<0x0ad>;
let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
multiclass DS_Real_gfx10<bits<8> op> {
- def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, !cast<DS_Pseudo>(NAME),
- SIEncodingFamily.GFX10>;
+ def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
+ !cast<DS_Pseudo>(NAME), SIEncodingFamily.GFX10>;
}
} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
@@ -1289,28 +1343,34 @@ defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
defm DS_READ_ADDTID_B32 : DS_Real_gfx10<0x0b1>;
//===----------------------------------------------------------------------===//
-// GFX10, GFX11.
+// GFX10, GFX11, GFX12.
//===----------------------------------------------------------------------===//
+multiclass DS_Real_gfx10_gfx11_gfx12<bits<8> op> :
+ DS_Real_gfx10<op>, DS_Real_gfx11<op>, DS_Real_gfx12<op>;
+
multiclass DS_Real_gfx10_gfx11<bits<8> op> :
DS_Real_gfx10<op>, DS_Real_gfx11<op>;
-defm DS_ADD_F32 : DS_Real_gfx10_gfx11<0x015>;
+defm DS_ADD_F32 : DS_Real_gfx10_gfx11_gfx12<0x015>;
defm DS_ADD_SRC2_F32 : DS_Real_gfx10<0x095>;
-defm DS_PERMUTE_B32 : DS_Real_gfx10_gfx11<0x0b2>;
-defm DS_BPERMUTE_B32 : DS_Real_gfx10_gfx11<0x0b3>;
+defm DS_PERMUTE_B32 : DS_Real_gfx10_gfx11_gfx12<0x0b2>;
+defm DS_BPERMUTE_B32 : DS_Real_gfx10_gfx11_gfx12<0x0b3>;
//===----------------------------------------------------------------------===//
-// GFX7, GFX10, GFX11.
+// GFX7, GFX10, GFX11, GFX12.
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
multiclass DS_Real_gfx7<bits<8> op> {
- def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, !cast<DS_Pseudo>(NAME),
- SIEncodingFamily.SI>;
+ def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
+ !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
}
} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
+multiclass DS_Real_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
+ DS_Real_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
+
multiclass DS_Real_gfx7_gfx10_gfx11<bits<8> op> :
DS_Real_gfx7<op>, DS_Real_gfx10_gfx11<op>;
@@ -1320,7 +1380,7 @@ multiclass DS_Real_gfx7_gfx10<bits<8> op> :
// FIXME-GFX7: Add tests when upstreaming this part.
defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10_gfx11<0x018>;
defm DS_WRAP_RTN_B32 : DS_Real_gfx7_gfx10_gfx11<0x034>;
-defm DS_CONDXCHG32_RTN_B64 : DS_Real_gfx7_gfx10_gfx11<0x07e>;
+defm DS_CONDXCHG32_RTN_B64 : DS_Real_gfx7_gfx10_gfx11_gfx12<0x07e>;
defm DS_WRITE_B96 : DS_Real_gfx7_gfx10<0x0de>;
defm DS_WRITE_B128 : DS_Real_gfx7_gfx10<0x0df>;
defm DS_READ_B96 : DS_Real_gfx7_gfx10<0x0fe>;
@@ -1332,30 +1392,33 @@ defm DS_READ_B128 : DS_Real_gfx7_gfx10<0x0ff>;
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
multiclass DS_Real_gfx6_gfx7<bits<8> op> {
- def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, !cast<DS_Pseudo>(NAME),
- SIEncodingFamily.SI>;
+ def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
+ !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
}
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+multiclass DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
+ DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
+
multiclass DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11<op>;
multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
-defm DS_ADD_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x000>;
-defm DS_SUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x001>;
-defm DS_RSUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x002>;
-defm DS_INC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x003>;
-defm DS_DEC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x004>;
-defm DS_MIN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x005>;
-defm DS_MAX_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x006>;
-defm DS_MIN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x007>;
-defm DS_MAX_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x008>;
-defm DS_AND_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x009>;
-defm DS_OR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x00a>;
-defm DS_XOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x00b>;
-defm DS_MSKOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x00c>;
+defm DS_ADD_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x000>;
+defm DS_SUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x001>;
+defm DS_RSUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
+defm DS_INC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>;
+defm DS_DEC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>;
+defm DS_MIN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>;
+defm DS_MAX_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x006>;
+defm DS_MIN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x007>;
+defm DS_MAX_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>;
+defm DS_AND_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>;
+defm DS_OR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>;
+defm DS_XOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>;
+defm DS_MSKOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>;
defm DS_WRITE_B32 : DS_Real_gfx6_gfx7_gfx10<0x00d>;
defm DS_WRITE2_B32 : DS_Real_gfx6_gfx7_gfx10<0x00e>;
@@ -1365,7 +1428,7 @@ defm DS_CMPST_F32 : DS_Real_gfx6_gfx7_gfx10<0x011>;
defm DS_MIN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
defm DS_MAX_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
-defm DS_NOP : DS_Real_gfx6_gfx7_gfx10_gfx11<0x014>;
+defm DS_NOP : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>;
defm DS_GWS_INIT : DS_Real_gfx6_gfx7_gfx10_gfx11<0x019>;
defm DS_GWS_SEMA_V : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01a>;
defm DS_GWS_SEMA_BR : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
@@ -1375,19 +1438,19 @@ defm DS_GWS_BARRIER : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
defm DS_WRITE_B8 : DS_Real_gfx6_gfx7_gfx10<0x01e>;
defm DS_WRITE_B16 : DS_Real_gfx6_gfx7_gfx10<0x01f>;
-defm DS_ADD_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x020>;
-defm DS_SUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x021>;
-defm DS_RSUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x022>;
-defm DS_INC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x023>;
-defm DS_DEC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x024>;
-defm DS_MIN_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x025>;
-defm DS_MAX_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x026>;
-defm DS_MIN_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x027>;
-defm DS_MAX_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x028>;
-defm DS_AND_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x029>;
-defm DS_OR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x02a>;
-defm DS_XOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x02b>;
-defm DS_MSKOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x02c>;
+defm DS_ADD_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x020>;
+defm DS_SUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x021>;
+defm DS_RSUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x022>;
+defm DS_INC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x023>;
+defm DS_DEC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x024>;
+defm DS_MIN_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x025>;
+defm DS_MAX_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x026>;
+defm DS_MIN_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x027>;
+defm DS_MAX_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x028>;
+defm DS_AND_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x029>;
+defm DS_OR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02a>;
+defm DS_XOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02b>;
+defm DS_MSKOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02c>;
defm DS_WRXCHG_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02d>;
defm DS_WRXCHG2_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02e>;
@@ -1397,7 +1460,7 @@ defm DS_CMPST_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x031>;
defm DS_MIN_RTN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x032>;
defm DS_MAX_RTN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x033>;
-defm DS_SWIZZLE_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x035>;
+defm DS_SWIZZLE_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x035>;
defm DS_READ_B32 : DS_Real_gfx6_gfx7_gfx10<0x036>;
defm DS_READ2_B32 : DS_Real_gfx6_gfx7_gfx10<0x037>;
@@ -1407,22 +1470,22 @@ defm DS_READ_U8 : DS_Real_gfx6_gfx7_gfx10<0x03a>;
defm DS_READ_I16 : DS_Real_gfx6_gfx7_gfx10<0x03b>;
defm DS_READ_U16 : DS_Real_gfx6_gfx7_gfx10<0x03c>;
-defm DS_CONSUME : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03d>;
-defm DS_APPEND : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03e>;
+defm DS_CONSUME : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03d>;
+defm DS_APPEND : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03e>;
defm DS_ORDERED_COUNT : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03f>;
-defm DS_ADD_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x040>;
-defm DS_SUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x041>;
-defm DS_RSUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x042>;
-defm DS_INC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x043>;
-defm DS_DEC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x044>;
-defm DS_MIN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x045>;
-defm DS_MAX_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x046>;
-defm DS_MIN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x047>;
-defm DS_MAX_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x048>;
-defm DS_AND_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x049>;
-defm DS_OR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x04a>;
-defm DS_XOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x04b>;
-defm DS_MSKOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x04c>;
+defm DS_ADD_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x040>;
+defm DS_SUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x041>;
+defm DS_RSUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x042>;
+defm DS_INC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x043>;
+defm DS_DEC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x044>;
+defm DS_MIN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x045>;
+defm DS_MAX_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x046>;
+defm DS_MIN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x047>;
+defm DS_MAX_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x048>;
+defm DS_AND_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x049>;
+defm DS_OR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04a>;
+defm DS_XOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04b>;
+defm DS_MSKOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04c>;
defm DS_WRITE_B64 : DS_Real_gfx6_gfx7_gfx10<0x04d>;
defm DS_WRITE2_B64 : DS_Real_gfx6_gfx7_gfx10<0x04e>;
@@ -1432,19 +1495,19 @@ defm DS_CMPST_F64 : DS_Real_gfx6_gfx7_gfx10<0x051>;
defm DS_MIN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x052>;
defm DS_MAX_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x053>;
-defm DS_ADD_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x060>;
-defm DS_SUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x061>;
-defm DS_RSUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x062>;
-defm DS_INC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x063>;
-defm DS_DEC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x064>;
-defm DS_MIN_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x065>;
-defm DS_MAX_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x066>;
-defm DS_MIN_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x067>;
-defm DS_MAX_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x068>;
-defm DS_AND_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x069>;
-defm DS_OR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x06a>;
-defm DS_XOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x06b>;
-defm DS_MSKOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x06c>;
+defm DS_ADD_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x060>;
+defm DS_SUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x061>;
+defm DS_RSUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x062>;
+defm DS_INC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x063>;
+defm DS_DEC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x064>;
+defm DS_MIN_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x065>;
+defm DS_MAX_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x066>;
+defm DS_MIN_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x067>;
+defm DS_MAX_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x068>;
+defm DS_AND_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x069>;
+defm DS_OR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06a>;
+defm DS_XOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06b>;
+defm DS_MSKOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06c>;
defm DS_WRXCHG_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06d>;
defm DS_WRXCHG2_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06e>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1b05acd5c90a..ed2e7e4f189e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -91,9 +91,11 @@ static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
int64_t Offset;
- if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
+ if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
+ Offset = SignExtend64<24>(Imm);
+ } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
Offset = Imm & 0xFFFFF;
- } else { // GFX9+ supports 21-bit signed offsets.
+ } else { // GFX9+ supports 21-bit signed offsets.
Offset = SignExtend64<21>(Imm);
}
return addOperand(Inst, MCOperand::createImm(Offset));
@@ -105,6 +107,13 @@ static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
return addOperand(Inst, DAsm->decodeBoolReg(Val));
}
+static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
+}
+
#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
uint64_t /*Addr*/, \
@@ -200,10 +209,12 @@ DECODE_OPERAND_REG_8(VReg_512)
DECODE_OPERAND_REG_8(VReg_1024)
DECODE_OPERAND_REG_7(SReg_32, OPW32)
+DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
DECODE_OPERAND_REG_7(SReg_64, OPW64)
DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
+DECODE_OPERAND_REG_7(SReg_96, OPW96)
DECODE_OPERAND_REG_7(SReg_128, OPW128)
DECODE_OPERAND_REG_7(SReg_256, OPW256)
DECODE_OPERAND_REG_7(SReg_512, OPW512)
@@ -238,6 +249,7 @@ DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
@@ -259,6 +271,62 @@ DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32)
+
+static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
+ uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "10-bit encoding expected");
+ assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
+
+ bool IsHi = Imm & (1 << 9);
+ unsigned RegIdx = Imm & 0xff;
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+}
+
+static DecodeStatus
+DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<8>(Imm) && "8-bit encoding expected");
+
+ bool IsHi = Imm & (1 << 7);
+ unsigned RegIdx = Imm & 0x7f;
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+}
+
+static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
+ uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<9>(Imm) && "9-bit encoding expected");
+
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ bool IsVGPR = Imm & (1 << 8);
+ if (IsVGPR) {
+ bool IsHi = Imm & (1 << 7);
+ unsigned RegIdx = Imm & 0x7f;
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+ }
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
+ Imm & 0xFF, false, 16));
+}
+
+static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
+ uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "10-bit encoding expected");
+
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ bool IsVGPR = Imm & (1 << 8);
+ if (IsVGPR) {
+ bool IsHi = Imm & (1 << 9);
+ unsigned RegIdx = Imm & 0xff;
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+ }
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
+ Imm & 0xFF, false, 16));
+}
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
uint64_t Addr,
@@ -321,6 +389,15 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
}
+static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ assert(Imm < (1 << 9) && "9-bit encoding");
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(
+ Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true));
+}
+
static DecodeStatus
DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
const MCDisassembler *Decoder) {
@@ -371,18 +448,19 @@ DECODE_SDWA(VopcDst)
template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
assert(Bytes.size() >= sizeof(T));
- const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
+ const auto Res =
+ support::endian::read<T, llvm::endianness::little>(Bytes.data());
Bytes = Bytes.slice(sizeof(T));
return Res;
}
static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
assert(Bytes.size() >= 12);
- uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>(
- Bytes.data());
+ uint64_t Lo =
+ support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
Bytes = Bytes.slice(8);
- uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>(
- Bytes.data());
+ uint64_t Hi =
+ support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
Bytes = Bytes.slice(4);
return DecoderUInt128(Lo, Hi);
}
@@ -418,25 +496,48 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// encodings
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
- Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS);
+ Res =
+ tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
+ MI, DecW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS);
- if (Res) {
- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
+ Res =
+ tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696,
+ MI, DecW, Address, CS);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+ MI = MCInst(); // clear
+
+ const auto convertVOPDPP = [&]() {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) {
convertVOP3PDPPInst(MI);
- else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
+ } else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) {
convertVOPCDPPInst(MI); // Special VOP3 case
- else {
+ } else {
assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
convertVOP3DPPInst(MI); // Regular VOP3 case
}
+ };
+ Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
+ MI, DecW, Address, CS);
+ if (Res) {
+ convertVOPDPP();
+ break;
+ }
+ Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696,
+ MI, DecW, Address, CS);
+ if (Res) {
+ convertVOPDPP();
break;
}
Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
if (Res)
break;
+
+ Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS);
+ if (Res)
+ break;
}
// Reinitialize Bytes
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -461,7 +562,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS);
+ Res = tryDecodeInst(DecoderTableDPP8GFX1164,
+ DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+ MI = MCInst(); // clear
+
+ Res = tryDecodeInst(DecoderTableDPP8GFX1264,
+ DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
@@ -469,7 +577,16 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS);
+ Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
+ MI, QW, Address, CS);
+ if (Res) {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
+ convertVOPCDPPInst(MI);
+ break;
+ }
+
+ Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664,
+ MI, QW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
convertVOPCDPPInst(MI);
@@ -530,9 +647,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS);
+ Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
+ Address, CS);
if (Res) break;
+ Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
+ Address, CS);
+ if (Res)
+ break;
+
if (Bytes.size() < 4) break;
const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
@@ -560,7 +683,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS);
+ Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
+ Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
+ Address, CS);
if (Res)
break;
@@ -640,6 +769,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = convertMIMGInst(MI);
}
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)))
+ Res = convertMIMGInst(MI);
+
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
Res = convertEXPInst(MI);
@@ -679,7 +812,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
- if (STI.hasFeature(AMDGPU::FeatureGFX11)) {
+ if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
// The MCInst still has these fields even though they are no longer encoded
// in the GFX11 instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
@@ -690,9 +823,13 @@ DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
- MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
// The MCInst has this field that is not directly encoded in the
// instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
@@ -840,6 +977,7 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
// VADDR size. Consequently, decoded instructions always show address as if it
// has 1 dword, which could be not really so.
DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
+ auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vdst);
@@ -848,8 +986,9 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::OpName::vdata);
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
- int RsrcIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
+ int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
+ : AMDGPU::OpName::rsrc;
+ int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::dmask);
@@ -870,7 +1009,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
}
bool IsAtomic = (VDstIdx != -1);
- bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
+ bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
+ bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
bool IsNSA = false;
bool IsPartialNSA = false;
unsigned AddrSize = Info->VAddrDwords;
@@ -887,10 +1027,13 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
+ // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
+ // VIMAGE insts other than BVH never use vaddr4.
IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
- Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
+ Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
+ Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
if (!IsNSA) {
- if (AddrSize > 12)
+ if (!IsVSample && AddrSize > 12)
AddrSize = 16;
} else {
if (AddrSize > Info->VAddrDwords) {
@@ -1098,6 +1241,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
case AMDGPU::TTMP_64RegClassID:
shift = 1;
break;
+ case AMDGPU::SGPR_96RegClassID:
+ case AMDGPU::TTMP_96RegClassID:
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::TTMP_128RegClassID:
// ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
@@ -1132,6 +1277,13 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
return createRegOperand(SRegClassID, Val >> shift);
}
+MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
+ bool IsHi) const {
+ unsigned RCID =
+ IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID;
+ return createRegOperand(RCID, RegIdx);
+}
+
// Decode Literals for insts which always have a literal in the encoding
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
@@ -1147,7 +1299,7 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
return MCOperand::createImm(Literal);
}
-MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
+MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
// ToDo: deal with float/double constants
@@ -1157,9 +1309,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
Twine(Bytes.size()));
}
HasLiteral = true;
- Literal = eatBytes<uint32_t>(Bytes);
+ Literal = Literal64 = eatBytes<uint32_t>(Bytes);
+ if (ExtendFP64)
+ Literal64 <<= 32;
}
- return MCOperand::createImm(Literal);
+ return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
}
MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
@@ -1376,7 +1530,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
bool MandatoryLiteral,
- unsigned ImmWidth) const {
+ unsigned ImmWidth, bool IsFP) const {
using namespace AMDGPU::EncValues;
assert(Val < 1024); // enum10
@@ -1388,6 +1542,20 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
return createRegOperand(IsAGPR ? getAgprClassId(Width)
: getVgprClassId(Width), Val - VGPR_MIN);
}
+ return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
+ IsFP);
+}
+
+MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
+ unsigned Val,
+ bool MandatoryLiteral,
+ unsigned ImmWidth,
+ bool IsFP) const {
+ // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
+ // decoded earlier.
+ assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
+ using namespace AMDGPU::EncValues;
+
if (Val <= SGPR_MAX) {
// "SGPR_MIN <= Val" is always true and causes compilation warning.
static_assert(SGPR_MIN == 0);
@@ -1410,7 +1578,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
// Keep a sentinel value for deferred setting
return MCOperand::createImm(LITERAL_CONST);
else
- return decodeLiteralConstant();
+ return decodeLiteralConstant(IsFP && ImmWidth == 64);
}
switch (Width) {
@@ -1590,6 +1758,10 @@ MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
: decodeSrcOp(OPW32, Val);
}
+MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
+ return decodeSrcOp(OPW32, Val);
+}
+
bool AMDGPUDisassembler::isVI() const {
return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
}
@@ -1616,11 +1788,18 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
return AMDGPU::isGFX11Plus(STI);
}
+bool AMDGPUDisassembler::isGFX12Plus() const {
+ return AMDGPU::isGFX12Plus(STI);
+}
bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
}
+bool AMDGPUDisassembler::hasKernargPreload() const {
+ return AMDGPU::hasKernargPreload(STI);
+}
+
//===----------------------------------------------------------------------===//
// AMDGPU specific symbol handling
//===----------------------------------------------------------------------===//
@@ -1704,12 +1883,16 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
return MCDisassembler::Fail;
- PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
+ if (!isGFX12Plus())
+ PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
return MCDisassembler::Fail;
- PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+ if (!isGFX12Plus())
+ PRINT_DIRECTIVE(".amdhsa_ieee_mode",
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
return MCDisassembler::Fail;
@@ -1717,17 +1900,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
return MCDisassembler::Fail;
- PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
+ if (isGFX9Plus())
+ PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
+ if (!isGFX9Plus())
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
+ return MCDisassembler::Fail;
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
return MCDisassembler::Fail;
+ if (!isGFX10Plus())
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
+ return MCDisassembler::Fail;
if (isGFX10Plus()) {
PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
- COMPUTE_PGM_RSRC1_WGP_MODE);
- PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
- PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
+ PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
+ PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
}
+
+ if (isGFX12Plus())
+ PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
+ COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
+
return MCDisassembler::Success;
}
@@ -1807,16 +2002,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
PRINT_PSEUDO_DIRECTIVE_COMMENT(
"SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
- PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
- PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
- PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
+
+ if (isGFX11Plus()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
+ } else {
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
+ return MCDisassembler::Fail;
+ }
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
return MCDisassembler::Fail;
- PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+
+ if (isGFX11Plus()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+ } else {
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
+ return MCDisassembler::Fail;
+ }
} else if (FourByteBuffer) {
return MCDisassembler::Fail;
}
@@ -1945,10 +2153,24 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
return MCDisassembler::Success;
- case amdhsa::RESERVED2_OFFSET:
- // 6 bytes from here are reserved, must be 0.
- ReservedBytes = DE.getBytes(Cursor, 6);
- for (int I = 0; I < 6; ++I) {
+ case amdhsa::KERNARG_PRELOAD_OFFSET:
+ using namespace amdhsa;
+ TwoByteBuffer = DE.getU16(Cursor);
+ if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
+ KERNARG_PRELOAD_SPEC_LENGTH);
+ }
+
+ if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
+ KERNARG_PRELOAD_SPEC_OFFSET);
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::RESERVED3_OFFSET:
+ // 4 bytes from here are reserved, must be 0.
+ ReservedBytes = DE.getBytes(Cursor, 4);
+ for (int I = 0; I < 4; ++I) {
if (ReservedBytes[I] != 0)
return MCDisassembler::Fail;
}
@@ -1975,7 +2197,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
if (isGFX10Plus()) {
uint16_t KernelCodeProperties =
support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
- support::endianness::little);
+ llvm::endianness::little);
EnableWavefrontSize32 =
AMDHSA_BITS_GET(KernelCodeProperties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
@@ -2018,7 +2240,7 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
// Code Object V3 kernel descriptors.
StringRef Name = Symbol.Name;
- if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
+ if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
Size = 64; // Size = 64 regardless of success or failure.
return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 444312473a5f..233581949d71 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -97,6 +97,7 @@ private:
const unsigned TargetMaxInstBytes;
mutable ArrayRef<uint8_t> Bytes;
mutable uint32_t Literal;
+ mutable uint64_t Literal64;
mutable bool HasLiteral;
mutable std::optional<bool> EnableWavefrontSize32;
@@ -114,6 +115,7 @@ public:
MCOperand createRegOperand(unsigned int RegId) const;
MCOperand createRegOperand(unsigned RegClassID, unsigned Val) const;
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const;
+ MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const;
MCOperand errOperand(unsigned V, const Twine& ErrMsg) const;
@@ -144,6 +146,17 @@ public:
return MCDisassembler::Fail;
}
+ template <typename InsnType>
+ DecodeStatus tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
+ MCInst &MI, InsnType Inst, uint64_t Address,
+ raw_ostream &Comments) const {
+ for (const uint8_t *T : {Table1, Table2}) {
+ if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
+ return Res;
+ }
+ return MCDisassembler::Fail;
+ }
+
std::optional<DecodeStatus>
onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
uint64_t Address, raw_ostream &CStream) const override;
@@ -217,11 +230,15 @@ public:
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm);
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
- MCOperand decodeLiteralConstant() const;
+ MCOperand decodeLiteralConstant(bool ExtendFP64) const;
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
- bool MandatoryLiteral = false,
- unsigned ImmWidth = 0) const;
+ bool MandatoryLiteral = false, unsigned ImmWidth = 0,
+ bool IsFP = false) const;
+
+ MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
+ bool MandatoryLiteral = false,
+ unsigned ImmWidth = 0, bool IsFP = false) const;
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
@@ -234,6 +251,7 @@ public:
MCOperand decodeSDWAVopcDst(unsigned Val) const;
MCOperand decodeBoolReg(unsigned Val) const;
+ MCOperand decodeSplitBarrier(unsigned Val) const;
int getTTmpIdx(unsigned Val) const;
@@ -247,8 +265,10 @@ public:
bool isGFX10Plus() const;
bool isGFX11() const;
bool isGFX11Plus() const;
+ bool isGFX12Plus() const;
bool hasArchitectedFlatScratch() const;
+ bool hasKernargPreload() const;
bool isMacDPP(MCInst &MI) const;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/EXPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EXPInstructions.td
index 14ba01f0d67c..ff1d661ef6fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/EXPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EXPInstructions.td
@@ -41,8 +41,8 @@ class EXP_Real_ComprVM<bit done, string pseudo, int subtarget>
}
// Real instruction with optional asm operand "row_en".
-class EXP_Real_Row<bit row, bit done, string pseudo, int subtarget>
- : EXPCommon<row, done, "exp$tgt $src0, $src1, $src2, $src3"
+class EXP_Real_Row<bit row, bit done, string pseudo, int subtarget, string name = "exp">
+ : EXPCommon<row, done, name#"$tgt $src0, $src1, $src2, $src3"
#!if(done, " done", "")#!if(row, " row_en", "")>,
SIMCInstr<pseudo, subtarget> {
let AsmMatchConverter = "cvtExp";
@@ -105,12 +105,12 @@ def EXP_gfx10 : EXP_Real_gfx10<0, "EXP">;
def EXP_DONE_gfx10 : EXP_Real_gfx10<1, "EXP_DONE">;
//===----------------------------------------------------------------------===//
-// GFX11+
+// GFX11
//===----------------------------------------------------------------------===//
class EXP_Real_gfx11<bit _row, bit _done, string pseudo>
: EXP_Real_Row<_row, _done, pseudo, SIEncodingFamily.GFX11>, EXPe_Row {
- let AssemblerPredicate = isGFX11Plus;
+ let AssemblerPredicate = isGFX11Only;
let DecoderNamespace = "GFX11";
let row = _row;
let done = _done;
@@ -122,6 +122,24 @@ def EXP_ROW_gfx11 : EXP_Real_gfx11<1, 0, "EXP_ROW">;
def EXP_ROW_DONE_gfx11 : EXP_Real_gfx11<1, 1, "EXP_ROW_DONE">;
//===----------------------------------------------------------------------===//
+// GFX12+
+//===----------------------------------------------------------------------===//
+
+class VEXPORT_Real_gfx12<bit _row, bit _done, string pseudo>
+ : EXP_Real_Row<_row, _done, pseudo, SIEncodingFamily.GFX12, "export">,
+ EXPe_Row, MnemonicAlias<"exp", "export">, Requires<[isGFX12Plus]> {
+ let AssemblerPredicate = isGFX12Plus;
+ let DecoderNamespace = "GFX12";
+ let row = _row;
+ let done = _done;
+}
+
+def EXPORT_gfx12 : VEXPORT_Real_gfx12<0, 0, "EXP">;
+def EXPORT_DONE_gfx12 : VEXPORT_Real_gfx12<0, 1, "EXP_DONE">;
+def EXPORT_ROW_gfx12 : VEXPORT_Real_gfx12<1, 0, "EXP_ROW">;
+def EXPORT_ROW_DONE_gfx12 : VEXPORT_Real_gfx12<1, 1, "EXP_ROW_DONE">;
+
+//===----------------------------------------------------------------------===//
// EXP Patterns
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 5c86d80e7dd2..0dd2b3f5c2c9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -144,6 +144,47 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?);
}
+class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
+ InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
+ Enc96 {
+
+ let FLAT = 1;
+
+ // copy relevant pseudo op flags
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let OtherPredicates = ps.OtherPredicates;
+ let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let SchedRW = ps.SchedRW;
+ let mayLoad = ps.mayLoad;
+ let mayStore = ps.mayStore;
+ let IsAtomicRet = ps.IsAtomicRet;
+ let IsAtomicNoRet = ps.IsAtomicNoRet;
+ let VM_CNT = ps.VM_CNT;
+ let LGKM_CNT = ps.LGKM_CNT;
+ let VALU = ps.VALU;
+
+ bits<7> saddr;
+ bits<8> vdst;
+ bits<6> cpol;
+ bits<8> vdata; // vsrc
+ bits<8> vaddr;
+ bits<24> offset;
+
+ let Inst{6-0} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
+ let Inst{21-14} = op;
+ let Inst{31-26} = 0x3b;
+ let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
+ let Inst{49} = ps.sve;
+ let Inst{54-53} = cpol{2-1}; // th{2-1}
+ let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
+ let Inst{51-50} = cpol{4-3}; // scope
+ let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?);
+ let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?);
+ let Inst{95-72} = offset;
+}
+
class GlobalSaddrTable <bit is_saddr, string Name = ""> {
bit IsSaddr = is_saddr;
string SaddrOp = Name;
@@ -758,6 +799,10 @@ let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>;
} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst
+let SubtargetPredicate = isGFX12Plus in {
+ defm FLAT_ATOMIC_CSUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_csub_u32", VGPR_32, i32>;
+} // End SubtargetPredicate = isGFX12Plus
+
defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
@@ -870,9 +915,10 @@ defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
VReg_64, i64>;
-let SubtargetPredicate = HasGFX10_BEncoding in
-defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
- VGPR_32, i32>;
+let SubtargetPredicate = HasGFX10_BEncoding in {
+ defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo <"global_atomic_csub",
+ VGPR_32, i32>;
+}
defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
@@ -996,12 +1042,6 @@ class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
(inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
-class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
- ValueType vt> : GCNPat <
- (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
- (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
->;
-
class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt, ValueType data_vt = vt> : GCNPat <
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)),
@@ -1024,13 +1064,6 @@ class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt
(inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;
-class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- // atomic store follows atomic binop convention so the address comes
- // first.
- (node (FlatOffset i64:$vaddr, i32:$offset), vt:$data),
- (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
->;
-
class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt, ValueType data_vt = vt> : GCNPat <
// atomic store follows atomic binop convention so the address comes
@@ -1039,19 +1072,43 @@ class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
(inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
>;
-multiclass FlatAtomicPat <string inst, string node, ValueType vt,
- ValueType data_vt = vt> {
- defvar rtnNode = !cast<PatFrags>(node#"_"#vt.Size);
- defvar noRtnNode = !cast<PatFrags>(node#"_noret_"#vt.Size);
-
- def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
- (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt,
+ ValueType data_vt = vt, bit isIntr = 0> {
+ defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_"#vt.Size));
let AddedComplexity = 1 in
def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
(!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
}
+multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt,
+ ValueType data_vt = vt, bit isIntr = 0> {
+ defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_"#vt.Size));
+
+ def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
+ (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+}
+
+multiclass FlatAtomicPat <string inst, string node, ValueType vt,
+ ValueType data_vt = vt, bit isIntr = 0> :
+ FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>,
+ FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>;
+
+multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt,
+ ValueType data_vt = vt> {
+ defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
+}
+
+multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt,
+ ValueType data_vt = vt> {
+ defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
+}
+
+multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt,
+ ValueType data_vt = vt> :
+ FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>,
+ FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
+
class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt, ValueType data_vt = vt> : GCNPat <
(vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)),
@@ -1174,12 +1231,12 @@ def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
}
-def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
-def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
-def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
+def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
+def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
+def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
+def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
+def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
foreach as = [ "flat", "global" ] in {
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
@@ -1269,24 +1326,13 @@ multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
}
}
-// Deal with swapped operands for atomic_store vs. regular store
-multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
- def : FlatStoreSignedAtomicPat <inst, node, vt> {
- let AddedComplexity = 10;
- }
-
- def : GlobalAtomicStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
- let AddedComplexity = 11;
- }
-}
-
multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
ValueType data_vt = vt> {
let AddedComplexity = 11 in
- def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<PatFrags>(node), vt, data_vt>;
+ def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>;
let AddedComplexity = 13 in
- def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<PatFrags>(node), vt, data_vt>;
+ def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>;
}
multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
@@ -1444,12 +1490,12 @@ defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
}
-defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>;
-defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
-defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>;
-defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>;
-defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>;
-defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>;
@@ -1466,6 +1512,9 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_glo
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
+let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
+defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
+
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>;
@@ -1483,10 +1532,14 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i
let OtherPredicates = [isGFX10Plus] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
-defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
-defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
+}
+
+let OtherPredicates = [isGFX10GFX11] in {
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
+
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
}
@@ -1502,6 +1555,13 @@ defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_f
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>;
}
+let OtherPredicates = [isGFX12Only] in {
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>;
+ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>;
+ defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
+ defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>;
+}
+
let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;
@@ -1998,7 +2058,7 @@ multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
multiclass FLAT_Real_ST_gfx10<bits<7> op> {
def _ST_gfx10 :
FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> {
- let Inst{54-48} = !cast<int>(EXEC_HI.HWEncoding);
+ let Inst{54-48} = EXEC_HI.Index;
let OtherPredicates = [HasFlatScratchSTMode];
}
}
@@ -2126,7 +2186,7 @@ defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>;
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>;
defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>;
defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>;
-defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>;
+defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_gfx10<0x034>;
defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>;
defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>;
defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>;
@@ -2201,7 +2261,7 @@ defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>;
class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
FLAT_Real <op, ps, opName>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> {
- let AssemblerPredicate = isGFX11Plus;
+ let AssemblerPredicate = isGFX11Only;
let DecoderNamespace = "GFX11";
let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
@@ -2213,19 +2273,19 @@ class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic>
multiclass FLAT_Aliases_gfx11<string ps, string opName, int renamed> {
if renamed then
- def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Plus]>;
+ def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Only]>;
}
multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
FLAT_Aliases_gfx11<ps, opName, renamed> {
def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> {
- let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+ let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
}
}
multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> {
def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
- let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+ let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
}
}
@@ -2239,7 +2299,7 @@ multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> {
multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> {
def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
- let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+ let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
let OtherPredicates = [HasFlatScratchSTMode];
}
}
@@ -2357,7 +2417,7 @@ defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATO
defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
-defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
+defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
@@ -2408,3 +2468,213 @@ defm SCRATCH_LOAD_D16_HI_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_
defm SCRATCH_LOAD_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">;
defm SCRATCH_STORE_D16_HI_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">;
defm SCRATCH_STORE_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">;
+
+//===----------------------------------------------------------------------===//
+// GFX12
+//===----------------------------------------------------------------------===//
+
+class VFLAT_Real_gfx12 <bits<8> op, FLAT_Pseudo ps,
+ string opName = ps.Mnemonic> :
+ VFLAT_Real <op, ps, opName>,
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX12> {
+ let AssemblerPredicate = isGFX12Plus;
+ let DecoderNamespace = "GFX12";
+
+ let Inst{25-24} = !if(ps.is_flat_scratch, 0b01,
+ !if(ps.is_flat_global, 0b10, 0b00));
+}
+
+multiclass VFLAT_Aliases_gfx12<string ps, string opName, int renamed, string alias> {
+ if renamed then
+ def _renamed_gfx12 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX12Plus]>;
+ if !not(!empty(alias)) then
+ def _alias_gfx12 : MnemonicAlias<alias, opName>, Requires<[isGFX12Plus]>;
+}
+
+multiclass VFLAT_Real_Base_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
+ VFLAT_Aliases_gfx12<ps, opName, renamed, alias> {
+ def _gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps), opName> {
+ let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+ }
+}
+
+multiclass VFLAT_Real_RTN_gfx12<bits<8> op, string ps, string opName> {
+ def _RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
+ let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+ }
+}
+
+multiclass VFLAT_Real_SADDR_gfx12<bits<8> op, string ps, string opName> {
+ def _SADDR_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>;
+}
+
+multiclass VFLAT_Real_SADDR_RTN_gfx12<bits<8> op, string ps, string opName> {
+ def _SADDR_RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>;
+}
+
+multiclass VFLAT_Real_ST_gfx12<bits<8> op, string ps, string opName> {
+ def _ST_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
+ let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+ let OtherPredicates = [HasFlatScratchSTMode];
+ }
+}
+
+multiclass VFLAT_Real_SVS_gfx12<bits<8> op, string ps, string opName> {
+ def _SVS_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> {
+ let OtherPredicates = [HasFlatScratchSVSMode];
+ }
+}
+
+multiclass VFLAT_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
+ VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>,
+ VFLAT_Real_RTN_gfx12<op, ps, opName>;
+
+multiclass VGLOBAL_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
+ VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>,
+ VFLAT_Real_SADDR_gfx12<op, ps, opName>;
+
+multiclass VGLOBAL_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
+ VGLOBAL_Real_AllAddr_gfx12<op, ps, opName, renamed, alias>,
+ VFLAT_Real_RTN_gfx12<op, ps, opName>,
+ VFLAT_Real_SADDR_RTN_gfx12<op, ps, opName>;
+
+multiclass VSCRATCH_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false> :
+ VFLAT_Real_Base_gfx12<op, ps, opName, renamed>,
+ VFLAT_Real_SADDR_gfx12<op, ps, opName>,
+ VFLAT_Real_ST_gfx12<op, ps, opName>,
+ VFLAT_Real_SVS_gfx12<op, ps, opName>;
+
+// ENC_VFLAT.
+defm FLAT_LOAD_U8 : VFLAT_Real_Base_gfx12<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>;
+defm FLAT_LOAD_I8 : VFLAT_Real_Base_gfx12<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>;
+defm FLAT_LOAD_U16 : VFLAT_Real_Base_gfx12<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>;
+defm FLAT_LOAD_I16 : VFLAT_Real_Base_gfx12<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>;
+defm FLAT_LOAD_B32 : VFLAT_Real_Base_gfx12<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>;
+defm FLAT_LOAD_B64 : VFLAT_Real_Base_gfx12<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>;
+defm FLAT_LOAD_B96 : VFLAT_Real_Base_gfx12<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>;
+defm FLAT_LOAD_B128 : VFLAT_Real_Base_gfx12<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>;
+defm FLAT_STORE_B8 : VFLAT_Real_Base_gfx12<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>;
+defm FLAT_STORE_B16 : VFLAT_Real_Base_gfx12<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>;
+defm FLAT_STORE_B32 : VFLAT_Real_Base_gfx12<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>;
+defm FLAT_STORE_B64 : VFLAT_Real_Base_gfx12<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>;
+defm FLAT_STORE_B96 : VFLAT_Real_Base_gfx12<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>;
+defm FLAT_STORE_B128 : VFLAT_Real_Base_gfx12<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>;
+defm FLAT_LOAD_D16_U8 : VFLAT_Real_Base_gfx12<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">;
+defm FLAT_LOAD_D16_I8 : VFLAT_Real_Base_gfx12<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">;
+defm FLAT_LOAD_D16_B16 : VFLAT_Real_Base_gfx12<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">;
+defm FLAT_LOAD_D16_HI_U8 : VFLAT_Real_Base_gfx12<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">;
+defm FLAT_LOAD_D16_HI_I8 : VFLAT_Real_Base_gfx12<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">;
+defm FLAT_LOAD_D16_HI_B16 : VFLAT_Real_Base_gfx12<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">;
+defm FLAT_STORE_D16_HI_B8 : VFLAT_Real_Base_gfx12<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">;
+defm FLAT_STORE_D16_HI_B16 : VFLAT_Real_Base_gfx12<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">;
+defm FLAT_ATOMIC_SWAP_B32 : VFLAT_Real_Atomics_gfx12<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>;
+defm FLAT_ATOMIC_CMPSWAP_B32 : VFLAT_Real_Atomics_gfx12<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>;
+defm FLAT_ATOMIC_ADD_U32 : VFLAT_Real_Atomics_gfx12<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>;
+defm FLAT_ATOMIC_SUB_U32 : VFLAT_Real_Atomics_gfx12<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>;
+defm FLAT_ATOMIC_SUB_CLAMP_U32 : VFLAT_Real_Atomics_gfx12<0x037, "FLAT_ATOMIC_CSUB_U32", "flat_atomic_sub_clamp_u32", true>;
+defm FLAT_ATOMIC_MIN_I32 : VFLAT_Real_Atomics_gfx12<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>;
+defm FLAT_ATOMIC_MIN_U32 : VFLAT_Real_Atomics_gfx12<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>;
+defm FLAT_ATOMIC_MAX_I32 : VFLAT_Real_Atomics_gfx12<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>;
+defm FLAT_ATOMIC_MAX_U32 : VFLAT_Real_Atomics_gfx12<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>;
+defm FLAT_ATOMIC_AND_B32 : VFLAT_Real_Atomics_gfx12<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>;
+defm FLAT_ATOMIC_OR_B32 : VFLAT_Real_Atomics_gfx12<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>;
+defm FLAT_ATOMIC_XOR_B32 : VFLAT_Real_Atomics_gfx12<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>;
+defm FLAT_ATOMIC_INC_U32 : VFLAT_Real_Atomics_gfx12<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>;
+defm FLAT_ATOMIC_DEC_U32 : VFLAT_Real_Atomics_gfx12<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>;
+defm FLAT_ATOMIC_SWAP_B64 : VFLAT_Real_Atomics_gfx12<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>;
+defm FLAT_ATOMIC_CMPSWAP_B64 : VFLAT_Real_Atomics_gfx12<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>;
+defm FLAT_ATOMIC_ADD_U64 : VFLAT_Real_Atomics_gfx12<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>;
+defm FLAT_ATOMIC_SUB_U64 : VFLAT_Real_Atomics_gfx12<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>;
+defm FLAT_ATOMIC_MIN_I64 : VFLAT_Real_Atomics_gfx12<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>;
+defm FLAT_ATOMIC_MIN_U64 : VFLAT_Real_Atomics_gfx12<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>;
+defm FLAT_ATOMIC_MAX_I64 : VFLAT_Real_Atomics_gfx12<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>;
+defm FLAT_ATOMIC_MAX_U64 : VFLAT_Real_Atomics_gfx12<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>;
+defm FLAT_ATOMIC_AND_B64 : VFLAT_Real_Atomics_gfx12<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>;
+defm FLAT_ATOMIC_OR_B64 : VFLAT_Real_Atomics_gfx12<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>;
+defm FLAT_ATOMIC_XOR_B64 : VFLAT_Real_Atomics_gfx12<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>;
+defm FLAT_ATOMIC_INC_U64 : VFLAT_Real_Atomics_gfx12<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>;
+defm FLAT_ATOMIC_DEC_U64 : VFLAT_Real_Atomics_gfx12<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>;
+defm FLAT_ATOMIC_MIN_NUM_F32 : VFLAT_Real_Atomics_gfx12<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_num_f32", true, "flat_atomic_min_f32">;
+defm FLAT_ATOMIC_MAX_NUM_F32 : VFLAT_Real_Atomics_gfx12<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_num_f32", true, "flat_atomic_max_f32">;
+defm FLAT_ATOMIC_ADD_F32 : VFLAT_Real_Atomics_gfx12<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">;
+
+// ENC_VGLOBAL.
+defm GLOBAL_LOAD_U8 : VGLOBAL_Real_AllAddr_gfx12<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>;
+defm GLOBAL_LOAD_I8 : VGLOBAL_Real_AllAddr_gfx12<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>;
+defm GLOBAL_LOAD_U16 : VGLOBAL_Real_AllAddr_gfx12<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>;
+defm GLOBAL_LOAD_I16 : VGLOBAL_Real_AllAddr_gfx12<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>;
+defm GLOBAL_LOAD_B32 : VGLOBAL_Real_AllAddr_gfx12<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>;
+defm GLOBAL_LOAD_B64 : VGLOBAL_Real_AllAddr_gfx12<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>;
+defm GLOBAL_LOAD_B96 : VGLOBAL_Real_AllAddr_gfx12<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>;
+defm GLOBAL_LOAD_B128 : VGLOBAL_Real_AllAddr_gfx12<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>;
+defm GLOBAL_STORE_B8 : VGLOBAL_Real_AllAddr_gfx12<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>;
+defm GLOBAL_STORE_B16 : VGLOBAL_Real_AllAddr_gfx12<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>;
+defm GLOBAL_STORE_B32 : VGLOBAL_Real_AllAddr_gfx12<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>;
+defm GLOBAL_STORE_B64 : VGLOBAL_Real_AllAddr_gfx12<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>;
+defm GLOBAL_STORE_B96 : VGLOBAL_Real_AllAddr_gfx12<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>;
+defm GLOBAL_STORE_B128 : VGLOBAL_Real_AllAddr_gfx12<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>;
+defm GLOBAL_LOAD_D16_U8 : VGLOBAL_Real_AllAddr_gfx12<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">;
+defm GLOBAL_LOAD_D16_I8 : VGLOBAL_Real_AllAddr_gfx12<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">;
+defm GLOBAL_LOAD_D16_B16 : VGLOBAL_Real_AllAddr_gfx12<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">;
+defm GLOBAL_LOAD_D16_HI_U8 : VGLOBAL_Real_AllAddr_gfx12<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">;
+defm GLOBAL_LOAD_D16_HI_I8 : VGLOBAL_Real_AllAddr_gfx12<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">;
+defm GLOBAL_LOAD_D16_HI_B16 : VGLOBAL_Real_AllAddr_gfx12<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">;
+defm GLOBAL_STORE_D16_HI_B8 : VGLOBAL_Real_AllAddr_gfx12<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">;
+defm GLOBAL_STORE_D16_HI_B16 : VGLOBAL_Real_AllAddr_gfx12<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">;
+defm GLOBAL_LOAD_ADDTID_B32 : VGLOBAL_Real_AllAddr_gfx12<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">;
+defm GLOBAL_STORE_ADDTID_B32 : VGLOBAL_Real_AllAddr_gfx12<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">;
+
+defm GLOBAL_ATOMIC_SWAP_B32 : VGLOBAL_Real_Atomics_gfx12<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>;
+defm GLOBAL_ATOMIC_CMPSWAP_B32 : VGLOBAL_Real_Atomics_gfx12<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
+defm GLOBAL_ATOMIC_ADD_U32 : VGLOBAL_Real_Atomics_gfx12<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
+defm GLOBAL_ATOMIC_SUB_U32 : VGLOBAL_Real_Atomics_gfx12<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
+defm GLOBAL_ATOMIC_SUB_CLAMP_U32 : VGLOBAL_Real_Atomics_gfx12<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_sub_clamp_u32", true, "global_atomic_csub_u32">;
+defm GLOBAL_ATOMIC_MIN_I32 : VGLOBAL_Real_Atomics_gfx12<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
+defm GLOBAL_ATOMIC_MIN_U32 : VGLOBAL_Real_Atomics_gfx12<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
+defm GLOBAL_ATOMIC_MAX_I32 : VGLOBAL_Real_Atomics_gfx12<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
+defm GLOBAL_ATOMIC_MAX_U32 : VGLOBAL_Real_Atomics_gfx12<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>;
+defm GLOBAL_ATOMIC_AND_B32 : VGLOBAL_Real_Atomics_gfx12<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>;
+defm GLOBAL_ATOMIC_OR_B32 : VGLOBAL_Real_Atomics_gfx12<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>;
+defm GLOBAL_ATOMIC_XOR_B32 : VGLOBAL_Real_Atomics_gfx12<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>;
+defm GLOBAL_ATOMIC_INC_U32 : VGLOBAL_Real_Atomics_gfx12<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>;
+defm GLOBAL_ATOMIC_DEC_U32 : VGLOBAL_Real_Atomics_gfx12<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>;
+defm GLOBAL_ATOMIC_SWAP_B64 : VGLOBAL_Real_Atomics_gfx12<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>;
+defm GLOBAL_ATOMIC_CMPSWAP_B64 : VGLOBAL_Real_Atomics_gfx12<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>;
+defm GLOBAL_ATOMIC_ADD_U64 : VGLOBAL_Real_Atomics_gfx12<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>;
+defm GLOBAL_ATOMIC_SUB_U64 : VGLOBAL_Real_Atomics_gfx12<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>;
+defm GLOBAL_ATOMIC_MIN_I64 : VGLOBAL_Real_Atomics_gfx12<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>;
+defm GLOBAL_ATOMIC_MIN_U64 : VGLOBAL_Real_Atomics_gfx12<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>;
+defm GLOBAL_ATOMIC_MAX_I64 : VGLOBAL_Real_Atomics_gfx12<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>;
+defm GLOBAL_ATOMIC_MAX_U64 : VGLOBAL_Real_Atomics_gfx12<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>;
+defm GLOBAL_ATOMIC_AND_B64 : VGLOBAL_Real_Atomics_gfx12<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>;
+defm GLOBAL_ATOMIC_OR_B64 : VGLOBAL_Real_Atomics_gfx12<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>;
+defm GLOBAL_ATOMIC_XOR_B64 : VGLOBAL_Real_Atomics_gfx12<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>;
+defm GLOBAL_ATOMIC_INC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>;
+defm GLOBAL_ATOMIC_DEC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>;
+defm GLOBAL_ATOMIC_MIN_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">;
+defm GLOBAL_ATOMIC_MAX_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">;
+defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">;
+
+// ENC_VSCRATCH.
+defm SCRATCH_LOAD_U8 : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>;
+defm SCRATCH_LOAD_I8 : VSCRATCH_Real_AllAddr_gfx12<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>;
+defm SCRATCH_LOAD_U16 : VSCRATCH_Real_AllAddr_gfx12<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>;
+defm SCRATCH_LOAD_I16 : VSCRATCH_Real_AllAddr_gfx12<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>;
+defm SCRATCH_LOAD_B32 : VSCRATCH_Real_AllAddr_gfx12<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>;
+defm SCRATCH_LOAD_B64 : VSCRATCH_Real_AllAddr_gfx12<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>;
+defm SCRATCH_LOAD_B96 : VSCRATCH_Real_AllAddr_gfx12<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>;
+defm SCRATCH_LOAD_B128 : VSCRATCH_Real_AllAddr_gfx12<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>;
+defm SCRATCH_STORE_B8 : VSCRATCH_Real_AllAddr_gfx12<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>;
+defm SCRATCH_STORE_B16 : VSCRATCH_Real_AllAddr_gfx12<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>;
+defm SCRATCH_STORE_B32 : VSCRATCH_Real_AllAddr_gfx12<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>;
+defm SCRATCH_STORE_B64 : VSCRATCH_Real_AllAddr_gfx12<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>;
+defm SCRATCH_STORE_B96 : VSCRATCH_Real_AllAddr_gfx12<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>;
+defm SCRATCH_STORE_B128 : VSCRATCH_Real_AllAddr_gfx12<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>;
+defm SCRATCH_LOAD_D16_U8 : VSCRATCH_Real_AllAddr_gfx12<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">;
+defm SCRATCH_LOAD_D16_I8 : VSCRATCH_Real_AllAddr_gfx12<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">;
+defm SCRATCH_LOAD_D16_B16 : VSCRATCH_Real_AllAddr_gfx12<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">;
+defm SCRATCH_LOAD_D16_HI_U8 : VSCRATCH_Real_AllAddr_gfx12<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">;
+defm SCRATCH_LOAD_D16_HI_I8 : VSCRATCH_Real_AllAddr_gfx12<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">;
+defm SCRATCH_LOAD_D16_HI_B16 : VSCRATCH_Real_AllAddr_gfx12<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">;
+defm SCRATCH_STORE_D16_HI_B8 : VSCRATCH_Real_AllAddr_gfx12<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">;
+defm SCRATCH_STORE_D16_HI_B16 : VSCRATCH_Real_AllAddr_gfx12<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index c9e0c6849568..05e10a95b157 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -25,7 +25,6 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -72,8 +71,11 @@ public:
auto *SecondMI = CI.SecondMI;
unsigned Opc1 = FirstMI->getOpcode();
unsigned Opc2 = SecondMI->getOpcode();
- int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
- AMDGPU::getVOPDOpcode(Opc2));
+ unsigned EncodingFamily =
+ AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
+ int NewOpcode =
+ AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
+ AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
assert(NewOpcode != -1 &&
"Should have previously determined this as a possible VOPD\n");
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 2592584b89c6..a75082268c77 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -191,6 +191,16 @@ MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
return &OldOpnd;
}
+[[maybe_unused]] static unsigned getOperandSize(MachineInstr &MI, unsigned Idx,
+ MachineRegisterInfo &MRI) {
+ int16_t RegClass = MI.getDesc().operands()[Idx].RegClass;
+ if (RegClass == -1)
+ return 0;
+
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ return TRI->getRegSizeInBits(*TRI->getRegClass(RegClass));
+}
+
MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
MachineInstr &MovMI,
RegSubRegPair CombOldVGPR,
@@ -278,6 +288,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
assert(Src0);
+ int Src0Idx = NumOperands;
if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
Fail = true;
@@ -301,7 +312,17 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
if (Src1) {
- if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
+ int OpNum = NumOperands;
+ // If subtarget does not support SGPRs for src1 operand then the
+ // requirements are the same as for src0. We check src0 instead because
+ // pseudos are shared between subtargets and allow SGPR for src1 on all.
+ if (!ST->hasDPPSrc1SGPR()) {
+ assert(getOperandSize(*DPPInst, Src0Idx, *MRI) ==
+ getOperandSize(*DPPInst, NumOperands, *MRI) &&
+ "Src0 and Src1 operands should have the same size");
+ OpNum = Src0Idx;
+ }
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
Fail = true;
break;
@@ -505,7 +526,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
assert(DppCtrl && DppCtrl->isImm());
- if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
+ if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl->getImm())) {
LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported"
" control value\n");
// Let it split, then control may become legal.
@@ -728,7 +749,7 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
++NumDPPMovsCombined;
} else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
- if (ST->has64BitDPP() && combineDPPMov(MI)) {
+ if (ST->hasDPALU_DPP() && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
} else {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 2d53b2a70dbe..a7d8ff0242b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -163,7 +163,9 @@ static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
static bool isPermlane(const MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
return Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
- Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
+ Opcode == AMDGPU::V_PERMLANEX16_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64;
}
static bool isLdsDma(const MachineInstr &MI) {
@@ -271,7 +273,7 @@ GCNHazardRecognizer::getMFMAPipelineWaitStates(const MachineInstr &MI) const {
const MCSchedClassDesc *SC = TSchedModel.resolveSchedClass(&MI);
assert(TSchedModel.getWriteProcResBegin(SC) !=
TSchedModel.getWriteProcResEnd(SC));
- return TSchedModel.getWriteProcResBegin(SC)->Cycles;
+ return TSchedModel.getWriteProcResBegin(SC)->ReleaseAtCycle;
}
void GCNHazardRecognizer::processBundle() {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index d89c9b1febde..cdc9de7f65e3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -251,7 +251,7 @@ GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
assert(UPTracker.isValid() ||
(dbgs() << "Tracked region ",
printRegion(dbgs(), Begin, End, LIS), false));
- return UPTracker.moveMaxPressure();
+ return UPTracker.getMaxPressureAndReset();
}
// returns max pressure for a tentative schedule
@@ -272,7 +272,7 @@ GCNIterativeScheduler::getSchedulePressure(const Region &R,
for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
RPTracker.recede(*getMachineInstr(*--I));
}
- return RPTracker.moveMaxPressure();
+ return RPTracker.getMaxPressureAndReset();
}
void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td
index b9c9358f88b9..96af1a6aab3d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -9,11 +9,11 @@
// The code produced for "generic" is only useful for tests and cannot
// reasonably be expected to execute on any particular target.
def : ProcessorModel<"generic", NoSchedModel,
- [FeatureWavefrontSize64]
+ [FeatureWavefrontSize64, FeatureGDS, FeatureGWS]
>;
def : ProcessorModel<"generic-hsa", NoSchedModel,
- [FeatureWavefrontSize64, FeatureFlatAddressSpace]
+ [FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace]
>;
//===------------------------------------------------------------===//
@@ -279,3 +279,15 @@ def : ProcessorModel<"gfx1150", GFX11SpeedModel,
def : ProcessorModel<"gfx1151", GFX11SpeedModel,
FeatureISAVersion11_5_1.Features
>;
+
+//===----------------------------------------------------------------------===//
+// GCN GFX12.
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"gfx1200", GFX12SpeedModel,
+ FeatureISAVersion12.Features
+>;
+
+def : ProcessorModel<"gfx1201", GFX12SpeedModel,
+ FeatureISAVersion12.Features
+>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 68cf97170369..fd8f0bebd3be 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "GCNRegPressure.h"
+#include "AMDGPU.h"
#include "llvm/CodeGen/RegisterPressure.h"
using namespace llvm;
@@ -31,7 +32,6 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
return true;
}
-
///////////////////////////////////////////////////////////////////////////////
// GCNRegPressure
@@ -78,7 +78,9 @@ void GCNRegPressure::inc(unsigned Reg,
if (PrevMask.none()) {
assert(NewMask.any());
- Value[Kind] += Sign * MRI.getPressureSets(Reg).getWeight();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ Value[Kind] +=
+ Sign * TRI->getRegClassWeight(MRI.getRegClass(Reg)).RegWeight;
}
break;
@@ -133,8 +135,6 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
O.getVGPRNum(ST.hasGFX90AInsts()));
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
return Printable([&RP, ST](raw_ostream &OS) {
OS << "VGPRs: " << RP.Value[GCNRegPressure::VGPR32] << ' '
@@ -153,7 +153,6 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
OS << '\n';
});
}
-#endif
static LaneBitmask getDefRegMask(const MachineOperand &MO,
const MachineRegisterInfo &MRI) {
@@ -167,66 +166,60 @@ static LaneBitmask getDefRegMask(const MachineOperand &MO,
MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg());
}
-static LaneBitmask getUsedRegMask(const MachineOperand &MO,
- const MachineRegisterInfo &MRI,
- const LiveIntervals &LIS) {
- assert(MO.isUse() && MO.isReg() && MO.getReg().isVirtual());
-
- if (auto SubReg = MO.getSubReg())
- return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
-
- auto MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg());
- if (SIRegisterInfo::getNumCoveredRegs(MaxMask) > 1) // cannot have subregs
- return MaxMask;
-
- // For a tentative schedule LIS isn't updated yet but livemask should remain
- // the same on any schedule. Subreg defs can be reordered but they all must
- // dominate uses anyway.
- auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
- return getLiveLaneMask(MO.getReg(), SI, LIS, MRI);
-}
-
-static SmallVector<RegisterMaskPair, 8>
-collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS,
+static void
+collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
+ const MachineInstr &MI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
- SmallVector<RegisterMaskPair, 8> Res;
+ SlotIndex InstrSI;
for (const auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
if (!MO.isUse() || !MO.readsReg())
continue;
- auto const UsedMask = getUsedRegMask(MO, MRI, LIS);
+ Register Reg = MO.getReg();
+ if (llvm::any_of(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
+ return RM.RegUnit == Reg;
+ }))
+ continue;
- auto Reg = MO.getReg();
- auto I = llvm::find_if(
- Res, [Reg](const RegisterMaskPair &RM) { return RM.RegUnit == Reg; });
- if (I != Res.end())
- I->LaneMask |= UsedMask;
- else
- Res.push_back(RegisterMaskPair(Reg, UsedMask));
+ LaneBitmask UseMask;
+ auto &LI = LIS.getInterval(Reg);
+ if (!LI.hasSubRanges())
+ UseMask = MRI.getMaxLaneMaskForVReg(Reg);
+ else {
+ // For a tentative schedule LIS isn't updated yet but livemask should
+ // remain the same on any schedule. Subreg defs can be reordered but they
+ // all must dominate uses anyway.
+ if (!InstrSI)
+ InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
+ UseMask = getLiveLaneMask(LI, InstrSI, MRI);
+ }
+
+ RegMaskPairs.emplace_back(Reg, UseMask);
}
- return Res;
}
///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker
-LaneBitmask llvm::getLiveLaneMask(unsigned Reg,
- SlotIndex SI,
+LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
+ return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI);
+}
+
+LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
+ const MachineRegisterInfo &MRI) {
LaneBitmask LiveMask;
- const auto &LI = LIS.getInterval(Reg);
if (LI.hasSubRanges()) {
for (const auto &S : LI.subranges())
if (S.liveAt(SI)) {
LiveMask |= S.LaneMask;
- assert(LiveMask < MRI.getMaxLaneMaskForVReg(Reg) ||
- LiveMask == MRI.getMaxLaneMaskForVReg(Reg));
+ assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
}
} else if (LI.liveAt(SI)) {
- LiveMask = MRI.getMaxLaneMaskForVReg(Reg);
+ LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
}
return LiveMask;
}
@@ -262,9 +255,15 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}
-void GCNUpwardRPTracker::reset(const MachineInstr &MI,
- const LiveRegSet *LiveRegsCopy) {
- GCNRPTracker::reset(MI, LiveRegsCopy, true);
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
+void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
+ const LiveRegSet &LiveRegs_) {
+ MRI = &MRI_;
+ LiveRegs = LiveRegs_;
+ LastTrackedMI = nullptr;
+ MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
}
void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
@@ -275,41 +274,61 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (MI.isDebugInstr())
return;
- auto const RegUses = collectVirtualRegUses(MI, LIS, *MRI);
+ // Kill all defs.
+ GCNRegPressure DefPressure, ECDefPressure;
+ bool HasECDefs = false;
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
+ continue;
- // calc pressure at the MI (defs + uses)
- auto AtMIPressure = CurPressure;
- for (const auto &U : RegUses) {
- auto LiveMask = LiveRegs[U.RegUnit];
- AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI);
- }
- // update max pressure
- MaxPressure = max(AtMIPressure, MaxPressure);
+ Register Reg = MO.getReg();
+ LaneBitmask DefMask = getDefRegMask(MO, *MRI);
- for (const auto &MO : MI.all_defs()) {
- if (!MO.getReg().isVirtual() || MO.isDead())
- continue;
+ // Treat a def as fully live at the moment of definition: keep a record.
+ if (MO.isEarlyClobber()) {
+ ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+ HasECDefs = true;
+ } else
+ DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
- auto Reg = MO.getReg();
auto I = LiveRegs.find(Reg);
if (I == LiveRegs.end())
continue;
- auto &LiveMask = I->second;
- auto PrevMask = LiveMask;
- LiveMask &= ~getDefRegMask(MO, *MRI);
+
+ LaneBitmask &LiveMask = I->second;
+ LaneBitmask PrevMask = LiveMask;
+ LiveMask &= ~DefMask;
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
if (LiveMask.none())
LiveRegs.erase(I);
}
- for (const auto &U : RegUses) {
- auto &LiveMask = LiveRegs[U.RegUnit];
- auto PrevMask = LiveMask;
+
+ // Update MaxPressure with defs pressure.
+ DefPressure += CurPressure;
+ if (HasECDefs)
+ DefPressure += ECDefPressure;
+ MaxPressure = max(DefPressure, MaxPressure);
+
+ // Make uses alive.
+ SmallVector<RegisterMaskPair, 8> RegUses;
+ collectVirtualRegUses(RegUses, MI, LIS, *MRI);
+ for (const RegisterMaskPair &U : RegUses) {
+ LaneBitmask &LiveMask = LiveRegs[U.RegUnit];
+ LaneBitmask PrevMask = LiveMask;
LiveMask |= U.LaneMask;
CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
}
+
+ // Update MaxPressure with uses plus early-clobber defs pressure.
+ MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
+ : max(CurPressure, MaxPressure);
+
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
+////////////////////////////////////////////////////////////////////////////////
+// GCNDownwardRPTracker
+
bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
const LiveRegSet *LiveRegsCopy) {
MRI = &MI.getParent()->getParent()->getRegInfo();
@@ -416,19 +435,17 @@ bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin,
return advance(End);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedLR,
- const TargetRegisterInfo *TRI) {
- return Printable([&LISLR, &TrackedLR, TRI](raw_ostream &OS) {
+ const TargetRegisterInfo *TRI, StringRef Pfx) {
+ return Printable([&LISLR, &TrackedLR, TRI, Pfx](raw_ostream &OS) {
for (auto const &P : TrackedLR) {
auto I = LISLR.find(P.first);
if (I == LISLR.end()) {
- OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
+ OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
<< " isn't found in LIS reported set\n";
} else if (I->second != P.second) {
- OS << " " << printReg(P.first, TRI)
+ OS << Pfx << printReg(P.first, TRI)
<< " masks doesn't match: LIS reported " << PrintLaneMask(I->second)
<< ", tracked " << PrintLaneMask(P.second) << '\n';
}
@@ -436,7 +453,7 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
for (auto const &P : LISLR) {
auto I = TrackedLR.find(P.first);
if (I == TrackedLR.end()) {
- OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
+ OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
<< " isn't found in tracked set\n";
}
}
@@ -465,7 +482,6 @@ bool GCNUpwardRPTracker::isValid() const {
return true;
}
-LLVM_DUMP_METHOD
Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
const MachineRegisterInfo &MRI) {
return Printable([&LiveRegs, &MRI](raw_ostream &OS) {
@@ -481,7 +497,163 @@ Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
});
}
-LLVM_DUMP_METHOD
void GCNRegPressure::dump() const { dbgs() << print(*this); }
-#endif
+static cl::opt<bool> UseDownwardTracker(
+ "amdgpu-print-rp-downward",
+ cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"),
+ cl::init(false), cl::Hidden);
+
+char llvm::GCNRegPressurePrinter::ID = 0;
+char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
+
+INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
+
+// Return lanemask of Reg's subregs that are live-through at [Begin, End] and
+// are fully covered by Mask.
+static LaneBitmask
+getRegLiveThroughMask(const MachineRegisterInfo &MRI, const LiveIntervals &LIS,
+ Register Reg, SlotIndex Begin, SlotIndex End,
+ LaneBitmask Mask = LaneBitmask::getAll()) {
+
+ auto IsInOneSegment = [Begin, End](const LiveRange &LR) -> bool {
+ auto *Segment = LR.getSegmentContaining(Begin);
+ return Segment && Segment->contains(End);
+ };
+
+ LaneBitmask LiveThroughMask;
+ const LiveInterval &LI = LIS.getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ for (auto &SR : LI.subranges()) {
+ if ((SR.LaneMask & Mask) == SR.LaneMask && IsInOneSegment(SR))
+ LiveThroughMask |= SR.LaneMask;
+ }
+ } else {
+ LaneBitmask RegMask = MRI.getMaxLaneMaskForVReg(Reg);
+ if ((RegMask & Mask) == RegMask && IsInOneSegment(LI))
+ LiveThroughMask = RegMask;
+ }
+
+ return LiveThroughMask;
+}
+
+bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+ auto &OS = dbgs();
+
+// Leading spaces are important for YAML syntax.
+#define PFX " "
+
+ OS << "---\nname: " << MF.getName() << "\nbody: |\n";
+
+ auto printRP = [](const GCNRegPressure &RP) {
+ return Printable([&RP](raw_ostream &OS) {
+ OS << format(PFX " %-5d", RP.getSGPRNum())
+ << format(" %-5d", RP.getVGPRNum(false));
+ });
+ };
+
+ auto ReportLISMismatchIfAny = [&](const GCNRPTracker::LiveRegSet &TrackedLR,
+ const GCNRPTracker::LiveRegSet &LISLR) {
+ if (LISLR != TrackedLR) {
+ OS << PFX " mis LIS: " << llvm::print(LISLR, MRI)
+ << reportMismatch(LISLR, TrackedLR, TRI, PFX " ");
+ }
+ };
+
+ // Register pressure before and at an instruction (in program order).
+ SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
+
+ for (auto &MBB : MF) {
+ RP.clear();
+ RP.reserve(MBB.size());
+
+ OS << PFX;
+ MBB.printName(OS);
+ OS << ":\n";
+
+ SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
+ SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB);
+
+ GCNRPTracker::LiveRegSet LiveIn, LiveOut;
+ GCNRegPressure RPAtMBBEnd;
+
+ if (UseDownwardTracker) {
+ if (MBB.empty()) {
+ LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI);
+ RPAtMBBEnd = getRegPressure(MRI, LiveIn);
+ } else {
+ GCNDownwardRPTracker RPT(LIS);
+ RPT.reset(MBB.front());
+
+ LiveIn = RPT.getLiveRegs();
+
+ while (!RPT.advanceBeforeNext()) {
+ GCNRegPressure RPBeforeMI = RPT.getPressure();
+ RPT.advanceToNext();
+ RP.emplace_back(RPBeforeMI, RPT.getPressure());
+ }
+
+ LiveOut = RPT.getLiveRegs();
+ RPAtMBBEnd = RPT.getPressure();
+ }
+ } else {
+ GCNUpwardRPTracker RPT(LIS);
+ RPT.reset(MRI, MBBEndSlot);
+
+ LiveOut = RPT.getLiveRegs();
+ RPAtMBBEnd = RPT.getPressure();
+
+ for (auto &MI : reverse(MBB)) {
+ RPT.resetMaxPressure();
+ RPT.recede(MI);
+ if (!MI.isDebugInstr())
+ RP.emplace_back(RPT.getPressure(), RPT.getMaxPressure());
+ }
+
+ LiveIn = RPT.getLiveRegs();
+ }
+
+ OS << PFX " Live-in: " << llvm::print(LiveIn, MRI);
+ if (!UseDownwardTracker)
+ ReportLISMismatchIfAny(LiveIn, getLiveRegs(MBBStartSlot, LIS, MRI));
+
+ OS << PFX " SGPR VGPR\n";
+ int I = 0;
+ for (auto &MI : MBB) {
+ if (!MI.isDebugInstr()) {
+ auto &[RPBeforeInstr, RPAtInstr] =
+ RP[UseDownwardTracker ? I : (RP.size() - 1 - I)];
+ ++I;
+ OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
+ } else
+ OS << PFX " ";
+ MI.print(OS);
+ }
+ OS << printRP(RPAtMBBEnd) << '\n';
+
+ OS << PFX " Live-out:" << llvm::print(LiveOut, MRI);
+ if (UseDownwardTracker)
+ ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBEndSlot, LIS, MRI));
+
+ GCNRPTracker::LiveRegSet LiveThrough;
+ for (auto [Reg, Mask] : LiveIn) {
+ LaneBitmask MaskIntersection = Mask & LiveOut.lookup(Reg);
+ if (MaskIntersection.any()) {
+ LaneBitmask LTMask = getRegLiveThroughMask(
+ MRI, LIS, Reg, MBBStartSlot, MBBEndSlot, MaskIntersection);
+ if (LTMask.any())
+ LiveThrough[Reg] = LTMask;
+ }
+ }
+ OS << PFX " Live-thr:" << llvm::print(LiveThrough, MRI);
+ OS << printRP(getRegPressure(MRI, LiveThrough)) << '\n';
+ }
+ OS << "...\n";
+ return false;
+
+#undef PFX
+} \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 72e18acc1b8e..4100970fe1a9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -85,6 +85,18 @@ struct GCNRegPressure {
return !(*this == O);
}
+ GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] += RHS.Value[I];
+ return *this;
+ }
+
+ GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] -= RHS.Value[I];
+ return *this;
+ }
+
void dump() const;
private:
@@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
return Res;
}
+inline GCNRegPressure operator+(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Sum = P1;
+ Sum += P2;
+ return Sum;
+}
+
+inline GCNRegPressure operator-(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Diff = P1;
+ Diff -= P2;
+ return Diff;
+}
+
class GCNRPTracker {
public:
using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
@@ -128,32 +154,55 @@ public:
void clearMaxPressure() { MaxPressure.clear(); }
- // returns MaxPressure, resetting it
- decltype(MaxPressure) moveMaxPressure() {
- auto Res = MaxPressure;
- MaxPressure.clear();
- return Res;
- }
+ GCNRegPressure getPressure() const { return CurPressure; }
decltype(LiveRegs) moveLiveRegs() {
return std::move(LiveRegs);
}
};
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
class GCNUpwardRPTracker : public GCNRPTracker {
public:
GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
- // reset tracker to the point just below MI
- // filling live regs upon this point using LIS
- void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
+ // reset tracker and set live register set to the specified value.
+ void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+
+ // reset tracker at the specified slot index.
+ void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
+ reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
+ }
+
+ // reset tracker to the end of the MBB.
+ void reset(const MachineBasicBlock &MBB) {
+ reset(MBB.getParent()->getRegInfo(),
+ LIS.getSlotIndexes()->getMBBEndIdx(&MBB));
+ }
+
+ // reset tracker to the point just after MI (in program order).
+ void reset(const MachineInstr &MI) {
+ reset(MI.getMF()->getRegInfo(), LIS.getInstructionIndex(MI).getDeadSlot());
+ }
- // move to the state just above the MI
+ // move to the state just before the MI (in program order).
void recede(const MachineInstr &MI);
// checks whether the tracker's state after receding MI corresponds
- // to reported by LIS
+ // to reported by LIS.
bool isValid() const;
+
+ const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
+
+ void resetMaxPressure() { MaxPressure = CurPressure; }
+
+ GCNRegPressure getMaxPressureAndReset() {
+ GCNRegPressure RP = MaxPressure;
+ resetMaxPressure();
+ return RP;
+ }
};
class GCNDownwardRPTracker : public GCNRPTracker {
@@ -167,6 +216,13 @@ public:
MachineBasicBlock::const_iterator getNext() const { return NextMI; }
+ // Return MaxPressure and clear it.
+ GCNRegPressure moveMaxPressure() {
+ auto Res = MaxPressure;
+ MaxPressure.clear();
+ return Res;
+ }
+
// Reset tracker to the point before the MI
// filling live regs upon this point using LIS.
// Returns false if block is empty except debug values.
@@ -196,8 +252,10 @@ LaneBitmask getLiveLaneMask(unsigned Reg,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
- const LiveIntervals &LIS,
+LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
+ const MachineRegisterInfo &MRI);
+
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
/// creates a map MachineInstr -> LiveRegSet
@@ -275,7 +333,22 @@ Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedL,
- const TargetRegisterInfo *TRI);
+ const TargetRegisterInfo *TRI, StringRef Pfx = " ");
+
+struct GCNRegPressurePrinter : public MachineFunctionPass {
+ static char ID;
+
+public:
+ GCNRegPressurePrinter() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
index 99db7e4af9fd..019b64dd871e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
@@ -101,17 +101,16 @@ private:
/// find new regclass such that:
/// 1. It has subregs obtained by shifting each OldSubReg by RShift number
/// of bits to the right. Every "shifted" subreg should have the same
- /// SubRegRC. SubRegRC can be null, in this case it initialized using
- /// getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that
- /// "covers" all other subregs in pairs. Basically such subreg becomes a
- /// whole register.
+ /// SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
+ /// all other subregs in pairs. Basically such subreg becomes a whole
+ /// register.
/// 2. Resulting register class contains registers of minimal size but not
/// less than RegNumBits.
///
/// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
/// parameter:
/// OldSubReg - input parameter,
- /// SubRegRC - in/out, should be changed for unknown regclass,
+ /// SubRegRC - input parameter (cannot be null),
/// NewSubReg - output, contains shifted subregs on return.
const TargetRegisterClass *
getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
@@ -228,19 +227,7 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
for (auto &[OldSubReg, SRI] : SubRegs) {
auto &[SubRegRC, NewSubReg] = SRI;
-
- // Register class may be unknown, for example:
- // undef %0.sub4:sgpr_1024 = S_MOV_B32 01
- // %0.sub5:sgpr_1024 = S_MOV_B32 02
- // %1:vreg_64 = COPY %0.sub4_sub5
- // Register classes for subregs 'sub4' and 'sub5' are known from the
- // description of destination operand of S_MOV_B32 instruction but the
- // class for the subreg 'sub4_sub5' isn't specified by the COPY instruction.
- if (!SubRegRC)
- SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);
-
- if (!SubRegRC)
- return nullptr;
+ assert(SubRegRC);
LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':'
<< TRI->getRegClassName(SubRegRC)
@@ -248,6 +235,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
<< " -> ");
if (OldSubReg == CoverSubregIdx) {
+ // Covering subreg will become a full register, RC should be allocatable.
+ assert(SubRegRC->isAllocatable());
NewSubReg = AMDGPU::NoSubRegister;
LLVM_DEBUG(dbgs() << "whole reg");
} else {
@@ -421,33 +410,42 @@ GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
auto Range = MRI->reg_nodbg_operands(Reg);
- if (Range.begin() == Range.end())
+ if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
+ return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
+ }))
return false;
- for (MachineOperand &MO : Range) {
- if (MO.getSubReg() == AMDGPU::NoSubRegister) // Whole reg used, quit.
- return false;
- }
-
auto *RC = MRI->getRegClass(Reg);
LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
<< ':' << TRI->getRegClassName(RC) << '\n');
- // Collect used subregs and constrained reg classes infered from instruction
+ // Collect used subregs and their reg classes infered from instruction
// operands.
SubRegMap SubRegs;
- for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
- assert(MO.getSubReg() != AMDGPU::NoSubRegister);
- auto *OpDescRC = getOperandRegClass(MO);
- const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC);
- if (!Inserted && OpDescRC) {
- SubRegInfo &SRI = I->second;
- SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC;
- if (!SRI.RC) {
- LLVM_DEBUG(dbgs() << " Couldn't find common target regclass\n");
- return false;
+ for (MachineOperand &MO : Range) {
+ const unsigned SubReg = MO.getSubReg();
+ assert(SubReg != AMDGPU::NoSubRegister); // Due to [1].
+ LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(SubReg) << ':');
+
+ const auto [I, Inserted] = SubRegs.try_emplace(SubReg);
+ const TargetRegisterClass *&SubRegRC = I->second.RC;
+
+ if (Inserted)
+ SubRegRC = TRI->getSubRegisterClass(RC, SubReg);
+
+ if (SubRegRC) {
+ if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) {
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & "
+ << TRI->getRegClassName(OpDescRC) << " = ");
+ SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC);
}
}
+
+ if (!SubRegRC) {
+ LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n');
}
auto *NewRC = getMinSizeReg(RC, SubRegs);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 994cfea1fd7d..342d518f38bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -32,12 +32,18 @@
using namespace llvm;
-static cl::opt<bool>
- DisableUnclusterHighRP("amdgpu-disable-unclustred-high-rp-reschedule",
- cl::Hidden,
- cl::desc("Disable unclustred high register pressure "
- "reduction scheduling stage."),
- cl::init(false));
+static cl::opt<bool> DisableUnclusterHighRP(
+ "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
+ cl::desc("Disable unclustered high register pressure "
+ "reduction scheduling stage."),
+ cl::init(false));
+
+static cl::opt<bool> DisableClusteredLowOccupancy(
+ "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
+ cl::desc("Disable clustered low occupancy "
+ "rescheduling for ILP scheduling stage."),
+ cl::init(false));
+
static cl::opt<unsigned> ScheduleMetricBias(
"amdgpu-schedule-metric-bias", cl::Hidden,
cl::desc(
@@ -707,7 +713,7 @@ bool UnclusteredHighRPStage::initGCNSchedStage() {
return false;
SavedMutations.swap(DAG.Mutations);
- DAG.addMutation(createIGroupLPDAGMutation());
+ DAG.addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/false));
InitialOccupancy = DAG.MinOccupancy;
// Aggressivly try to reduce register pressure in the unclustered high RP
@@ -727,6 +733,9 @@ bool UnclusteredHighRPStage::initGCNSchedStage() {
}
bool ClusteredLowOccStage::initGCNSchedStage() {
+ if (DisableClusteredLowOccupancy)
+ return false;
+
if (!GCNSchedStage::initGCNSchedStage())
return false;
@@ -844,7 +853,9 @@ bool GCNSchedStage::initGCNRegion() {
StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {
SavedMutations.clear();
SavedMutations.swap(DAG.Mutations);
- DAG.addMutation(createIGroupLPDAGMutation());
+ bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||
+ StageID == GCNSchedStageID::ILPInitialSchedule;
+ DAG.addMutation(createIGroupLPDAGMutation(/*IsReentry=*/!IsInitialStage));
}
return true;
@@ -1116,7 +1127,7 @@ bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
}
bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
- // If RP is not reduced in the unclustred reschedule stage, revert to the
+ // If RP is not reduced in the unclustered reschedule stage, revert to the
// old schedule.
if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
mayCauseSpilling(WavesAfter)) ||
@@ -1558,7 +1569,7 @@ void GCNPostScheduleDAGMILive::schedule() {
if (HasIGLPInstrs) {
SavedMutations.clear();
SavedMutations.swap(Mutations);
- addMutation(createIGroupLPDAGMutation());
+ addMutation(createIGroupLPDAGMutation(/*IsReentry=*/true));
}
ScheduleDAGMI::schedule();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ef5470df876d..91a709303269 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -22,6 +22,7 @@
#include "SIInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
@@ -77,6 +78,7 @@ protected:
bool UnalignedAccessMode = false;
bool HasApertureRegs = false;
bool SupportsXNACK = false;
+ bool KernargPreload = false;
// This should not be used directly. 'TargetID' tracks the dynamic settings
// for XNACK.
@@ -105,6 +107,7 @@ protected:
bool GFX940Insts = false;
bool GFX10Insts = false;
bool GFX11Insts = false;
+ bool GFX12Insts = false;
bool GFX10_3Insts = false;
bool GFX7GFX8GFX9Insts = false;
bool SGPRInitBug = false;
@@ -116,6 +119,7 @@ protected:
bool HasFmaMixInsts = false;
bool HasMovrel = false;
bool HasVGPRIndexMode = false;
+ bool HasScalarDwordx3Loads = false;
bool HasScalarStores = false;
bool HasScalarAtomics = false;
bool HasSDWAOmod = false;
@@ -125,7 +129,8 @@ protected:
bool HasSDWAOutModsVOPC = false;
bool HasDPP = false;
bool HasDPP8 = false;
- bool Has64BitDPP = false;
+ bool HasDPALU_DPP = false;
+ bool HasDPPSrc1SGPR = false;
bool HasPackedFP32Ops = false;
bool HasImageInsts = false;
bool HasExtendedImageInsts = false;
@@ -157,6 +162,7 @@ protected:
bool HasAtomicFaddNoRtnInsts = false;
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
bool HasAtomicBufferGlobalPkAddF16Insts = false;
+ bool HasAtomicCSubNoRtnInsts = false;
bool HasAtomicGlobalPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
bool SupportsSRAMECC = false;
@@ -180,6 +186,8 @@ protected:
bool HasArchitectedFlatScratch = false;
bool EnableFlatScratch = false;
bool HasArchitectedSGPRs = false;
+ bool HasGDS = false;
+ bool HasGWS = false;
bool AddNoCarryInsts = false;
bool HasUnpackedD16VMem = false;
bool LDSMisalignedBug = false;
@@ -188,6 +196,10 @@ protected:
bool UnalignedDSAccess = false;
bool HasPackedTID = false;
bool ScalarizeGlobal = false;
+ bool HasSALUFloatInsts = false;
+ bool HasVGPRSingleUseHintInsts = false;
+ bool HasPseudoScalarTrans = false;
+ bool HasRestrictedSOffset = false;
bool HasVcmpxPermlaneHazard = false;
bool HasVMEMtoScalarWriteHazard = false;
@@ -201,6 +213,7 @@ protected:
bool HasFlatSegmentOffsetBug = false;
bool HasImageStoreD16Bug = false;
bool HasImageGather4D16Bug = false;
+ bool HasMSAALoadDstSelBug = false;
bool HasGFX11FullVGPRs = false;
bool HasMADIntraFwdBug = false;
bool HasVOPDInsts = false;
@@ -667,6 +680,8 @@ public:
return AddNoCarryInsts;
}
+ bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
+
bool hasUnpackedD16VMem() const {
return HasUnpackedD16VMem;
}
@@ -818,6 +833,11 @@ public:
bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
+ bool hasPrefetch() const { return GFX12Insts; }
+
+ // Has s_cmpk_* instructions.
+ bool hasSCmpK() const { return getGeneration() < GFX12; }
+
// Scratch is allocated in 256 dword per wave blocks for the entire
// wavefront. When viewed from the perspective of an arbitrary workitem, this
// is 4-byte aligned.
@@ -853,7 +873,7 @@ public:
unsigned NumRegionInstrs) const override;
unsigned getMaxNumUserSGPRs() const {
- return 16;
+ return AMDGPU::getMaxNumUserSGPRs(*this);
}
bool hasSMemRealTime() const {
@@ -874,6 +894,8 @@ public:
return getGeneration() >= VOLCANIC_ISLANDS;
}
+ bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; }
+
bool hasScalarStores() const {
return HasScalarStores;
}
@@ -906,14 +928,21 @@ public:
return HasDPP8;
}
- bool has64BitDPP() const {
- return Has64BitDPP;
+ bool hasDPALU_DPP() const {
+ return HasDPALU_DPP;
}
+ bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
+
bool hasPackedFP32Ops() const {
return HasPackedFP32Ops;
}
+ // Has V_PK_MOV_B32 opcode
+ bool hasPkMovB32() const {
+ return GFX90AInsts;
+ }
+
bool hasFmaakFmamkF32Insts() const {
return getGeneration() >= GFX10 || hasGFX940Insts();
}
@@ -944,11 +973,15 @@ public:
bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
+ bool hasMSAALoadDstSelBug() const { return HasMSAALoadDstSelBug; }
+
bool hasNSAEncoding() const { return HasNSAEncoding; }
bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; }
- unsigned getNSAMaxSize() const { return AMDGPU::getNSAMaxSize(*this); }
+ unsigned getNSAMaxSize(bool HasSampler = false) const {
+ return AMDGPU::getNSAMaxSize(*this, HasSampler);
+ }
bool hasGFX10_AEncoding() const {
return GFX10_AEncoding;
@@ -1127,6 +1160,14 @@ public:
// hasGFX90AInsts is also true.
bool hasGFX940Insts() const { return GFX940Insts; }
+ bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
+
+ bool hasVGPRSingleUseHintInsts() const { return HasVGPRSingleUseHintInsts; }
+
+ bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; }
+
+ bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
+
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
@@ -1155,6 +1196,12 @@ public:
/// \returns true if the architected SGPRs are enabled.
bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }
+ /// \returns true if Global Data Share is supported.
+ bool hasGDS() const { return HasGDS; }
+
+ /// \returns true if Global Wave Sync is supported.
+ bool hasGWS() const { return HasGWS; }
+
/// \returns true if the machine has merged shaders in which s0-s7 are
/// reserved by the hardware and user SGPRs start at s8
bool hasMergedShaders() const {
@@ -1164,6 +1211,37 @@ public:
// \returns true if the target supports the pre-NGG legacy geometry path.
bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
+ // \returns true if preloading kernel arguments is supported.
+ bool hasKernargPreload() const { return KernargPreload; }
+
+ // \returns true if we need to generate backwards compatible code when
+ // preloading kernel arguments.
+ bool needsKernargPreloadBackwardsCompatibility() const {
+ return hasKernargPreload() && !hasGFX940Insts();
+ }
+
+ // \returns true if the target has split barriers feature
+ bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
+
+ // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
+ bool hasCvtFP8VOP1Bug() const { return true; }
+
+ // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
+ // no-return form.
+ bool hasAtomicCSubNoRtnInsts() const { return HasAtomicCSubNoRtnInsts; }
+
+ // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
+ bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
+
+ // \returns true if the target has IEEE kernel descriptor mode bit
+ bool hasIEEEMode() const { return getGeneration() < GFX12; }
+
+ // \returns true if the target has IEEE fminimum/fmaximum instructions
+ bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }
+
+ // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
+ bool hasRrWGMode() const { return getGeneration() >= GFX12; }
+
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
@@ -1362,6 +1440,91 @@ public:
}
};
+class GCNUserSGPRUsageInfo {
+public:
+ bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
+
+ bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
+
+ bool hasDispatchPtr() const { return DispatchPtr; }
+
+ bool hasQueuePtr() const { return QueuePtr; }
+
+ bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
+
+ bool hasDispatchID() const { return DispatchID; }
+
+ bool hasFlatScratchInit() const { return FlatScratchInit; }
+
+ unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
+
+ unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
+
+ unsigned getNumFreeUserSGPRs();
+
+ void allocKernargPreloadSGPRs(unsigned NumSGPRs);
+
+ enum UserSGPRID : unsigned {
+ ImplicitBufferPtrID = 0,
+ PrivateSegmentBufferID = 1,
+ DispatchPtrID = 2,
+ QueuePtrID = 3,
+ KernargSegmentPtrID = 4,
+ DispatchIdID = 5,
+ FlatScratchInitID = 6,
+ PrivateSegmentSizeID = 7
+ };
+
+ // Returns the size in number of SGPRs for preload user SGPR field.
+ static unsigned getNumUserSGPRForField(UserSGPRID ID) {
+ switch (ID) {
+ case ImplicitBufferPtrID:
+ return 2;
+ case PrivateSegmentBufferID:
+ return 4;
+ case DispatchPtrID:
+ return 2;
+ case QueuePtrID:
+ return 2;
+ case KernargSegmentPtrID:
+ return 2;
+ case DispatchIdID:
+ return 2;
+ case FlatScratchInitID:
+ return 2;
+ case PrivateSegmentSizeID:
+ return 1;
+ }
+ llvm_unreachable("Unknown UserSGPRID.");
+ }
+
+ GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
+
+private:
+ const GCNSubtarget &ST;
+
+ // Private memory buffer
+ // Compute directly in sgpr[0:1]
+ // Other shaders indirect 64-bits at sgpr[0:1]
+ bool ImplicitBufferPtr = false;
+
+ bool PrivateSegmentBuffer = false;
+
+ bool DispatchPtr = false;
+
+ bool QueuePtr = false;
+
+ bool KernargSegmentPtr = false;
+
+ bool DispatchID = false;
+
+ bool FlatScratchInit = false;
+
+ unsigned NumKernargPreloadSGPRs = 0;
+
+ unsigned NumUsedUserSGPRs = 0;
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
index 29c9b9ccf276..33c208495c50 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
@@ -103,7 +103,13 @@ bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
return false;
if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
return false;
- if (InstInfo.hasInvalidOperand(getVRegIdx))
+
+ // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
+ bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
+ FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
+ SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
+
+ if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc))
return false;
LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
@@ -142,10 +148,10 @@ namespace {
/// be turned into VOPD instructions
/// Greedily pairs instruction candidates. O(n^2) algorithm.
struct VOPDPairingMutation : ScheduleDAGMutation {
- ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer
+ MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
VOPDPairingMutation(
- ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
+ MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
: shouldScheduleAdjacent(shouldScheduleAdjacent) {}
void apply(ScheduleDAGInstrs *DAG) override {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
index a1f8be403c44..c8ce1903d315 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
@@ -13,7 +13,7 @@
#include "AMDGPUCustomBehaviour.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/WithColor.h"
@@ -25,10 +25,12 @@ void AMDGPUInstrPostProcess::postProcessInstruction(
std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
switch (MCI.getOpcode()) {
case AMDGPU::S_WAITCNT:
+ case AMDGPU::S_WAITCNT_soft:
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAITCNT_VMCNT:
case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_VSCNT_soft:
case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
case AMDGPU::S_WAITCNT_VMCNT_gfx10:
@@ -77,10 +79,12 @@ unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
default:
return 0;
case AMDGPU::S_WAITCNT: // This instruction
+ case AMDGPU::S_WAITCNT_soft:
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAITCNT_VMCNT:
- case AMDGPU::S_WAITCNT_VSCNT: // to this instruction are all pseudo.
+ case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_VSCNT_soft: // to this instruction are all pseudo.
case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
case AMDGPU::S_WAITCNT_VMCNT_gfx10:
@@ -317,13 +321,15 @@ bool AMDGPUCustomBehaviour::hasModifiersSet(
return true;
}
+// taken from SIInstrInfo::isGWS()
+bool AMDGPUCustomBehaviour::isGWS(uint16_t Opcode) const {
+ const MCInstrDesc &MCID = MCII.get(Opcode);
+ return MCID.TSFlags & SIInstrFlags::GWS;
+}
+
// taken from SIInstrInfo::isAlwaysGDS()
bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const {
- return Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::DS_GWS_INIT ||
- Opcode == AMDGPU::DS_GWS_SEMA_V || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
- Opcode == AMDGPU::DS_GWS_SEMA_P ||
- Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
- Opcode == AMDGPU::DS_GWS_BARRIER;
+ return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode);
}
} // namespace mca
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
index cb1436d319c9..3a231758887b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
@@ -68,6 +68,8 @@ class AMDGPUCustomBehaviour : public CustomBehaviour {
bool hasModifiersSet(const std::unique_ptr<Instruction> &Inst,
unsigned OpName) const;
/// Helper function used in generateWaitCntInfo()
+ bool isGWS(uint16_t Opcode) const;
+ /// Helper function used in generateWaitCntInfo()
bool isAlwaysGDS(uint16_t Opcode) const;
/// Helper function used in generateWaitCntInfo()
bool isVMEM(const MCInstrDesc &MCID);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 44109b9d2919..f91f36ed851b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -28,7 +28,7 @@ namespace {
class AMDGPUAsmBackend : public MCAsmBackend {
public:
- AMDGPUAsmBackend(const Target &T) : MCAsmBackend(support::little) {}
+ AMDGPUAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::little) {}
unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; };
@@ -53,7 +53,8 @@ public:
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
};
} //End anonymous namespace
@@ -185,12 +186,15 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
return Infos[Kind - FirstTargetFixupKind];
}
bool AMDGPUAsmBackend::shouldForceRelocation(const MCAssembler &,
const MCFixup &Fixup,
- const MCValue &) {
+ const MCValue &,
+ const MCSubtargetInfo *STI) {
return Fixup.getKind() >= FirstLiteralRelocationKind;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 3f188478ca8b..58eed81e0755 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -63,6 +63,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AMDGPU_REL32_HI;
case MCSymbolRefExpr::VK_AMDGPU_REL64:
return ELF::R_AMDGPU_REL64;
+ case MCSymbolRefExpr::VK_AMDGPU_ABS32_LO:
+ return ELF::R_AMDGPU_ABS32_LO;
+ case MCSymbolRefExpr::VK_AMDGPU_ABS32_HI:
+ return ELF::R_AMDGPU_ABS32_HI;
}
MCFixupKind Kind = Fixup.getKind();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index ad55c73b22ea..edc244db613d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -10,13 +10,13 @@
#include "AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
-#include "SIRegisterInfo.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/TargetParser/TargetParser.h"
@@ -24,12 +24,6 @@
using namespace llvm;
using namespace llvm::AMDGPU;
-static cl::opt<bool> Keep16BitSuffixes(
- "amdgpu-keep-16-bit-reg-suffixes",
- cl::desc("Keep .l and .h suffixes in asm for debugging purposes"),
- cl::init(false),
- cl::ReallyHidden);
-
void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
// FIXME: The current implementation of
// AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
@@ -103,28 +97,36 @@ void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint16_t Imm = MI->getOperand(OpNo).getImm();
+ uint32_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << " offset:";
- printU16ImmDecOperand(MI, OpNo, O);
+
+ // GFX12 uses a 24-bit signed offset for VBUFFER.
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ bool IsVBuffer = Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF);
+ if (AMDGPU::isGFX12(STI) && IsVBuffer)
+ O << formatDec(SignExtend32<24>(Imm));
+ else
+ printU16ImmDecOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint16_t Imm = MI->getOperand(OpNo).getImm();
+ uint32_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << " offset:";
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- bool IsFlatSeg = !(Desc.TSFlags &
- (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch));
+ bool AllowNegative = (Desc.TSFlags & (SIInstrFlags::FlatGlobal |
+ SIInstrFlags::FlatScratch)) ||
+ AMDGPU::isGFX12(STI);
- if (IsFlatSeg) // Unsigned offset
- printU16ImmDecOperand(MI, OpNo, O);
- else // Signed offset
+ if (AllowNegative) // Signed offset
O << formatDec(SignExtend32(Imm, AMDGPU::getNumFlatOffsetBits(STI)));
+ else // Unsigned offset
+ printU16ImmDecOperand(MI, OpNo, O);
}
}
@@ -174,6 +176,17 @@ void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
auto Imm = MI->getOperand(OpNo).getImm();
+
+ if (AMDGPU::isGFX12Plus(STI)) {
+ const int64_t TH = Imm & CPol::TH;
+ const int64_t Scope = Imm & CPol::SCOPE;
+
+ printTH(MI, TH, Scope, O);
+ printScope(Scope, O);
+
+ return;
+ }
+
if (Imm & CPol::GLC)
O << ((AMDGPU::isGFX940(STI) &&
!(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
@@ -188,6 +201,89 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
O << " /* unexpected cache policy bit */";
}
+void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
+ raw_ostream &O) {
+ // For th = 0 do not print this field
+ if (TH == 0)
+ return;
+
+ const unsigned Opcode = MI->getOpcode();
+ const MCInstrDesc &TID = MII.get(Opcode);
+ bool IsStore = TID.mayStore();
+ bool IsAtomic =
+ TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
+
+ O << " th:";
+
+ if (IsAtomic) {
+ O << "TH_ATOMIC_";
+ if (TH & AMDGPU::CPol::TH_ATOMIC_CASCADE) {
+ if (Scope >= AMDGPU::CPol::SCOPE_DEV)
+ O << "CASCADE" << (TH & AMDGPU::CPol::TH_ATOMIC_NT ? "_NT" : "_RT");
+ else
+ O << formatHex(TH);
+ } else if (TH & AMDGPU::CPol::TH_ATOMIC_NT)
+ O << "NT" << (TH & AMDGPU::CPol::TH_ATOMIC_RETURN ? "_RETURN" : "");
+ else if (TH & AMDGPU::CPol::TH_ATOMIC_RETURN)
+ O << "RETURN";
+ else
+ O << formatHex(TH);
+ } else {
+ if (!IsStore && TH == AMDGPU::CPol::TH_RESERVED)
+ O << formatHex(TH);
+ else {
+ // This will default to printing load variants when neither MayStore nor
+ // MayLoad flag is present which is the case with instructions like
+ // image_get_resinfo.
+ O << (IsStore ? "TH_STORE_" : "TH_LOAD_");
+ switch (TH) {
+ case AMDGPU::CPol::TH_NT:
+ O << "NT";
+ break;
+ case AMDGPU::CPol::TH_HT:
+ O << "HT";
+ break;
+ case AMDGPU::CPol::TH_BYPASS: // or LU or RT_WB
+ O << (Scope == AMDGPU::CPol::SCOPE_SYS ? "BYPASS"
+ : (IsStore ? "RT_WB" : "LU"));
+ break;
+ case AMDGPU::CPol::TH_NT_RT:
+ O << "NT_RT";
+ break;
+ case AMDGPU::CPol::TH_RT_NT:
+ O << "RT_NT";
+ break;
+ case AMDGPU::CPol::TH_NT_HT:
+ O << "NT_HT";
+ break;
+ case AMDGPU::CPol::TH_NT_WB:
+ O << "NT_WB";
+ break;
+ default:
+ llvm_unreachable("unexpected th value");
+ }
+ }
+ }
+}
+
+void AMDGPUInstPrinter::printScope(int64_t Scope, raw_ostream &O) {
+ if (Scope == CPol::SCOPE_CU)
+ return;
+
+ O << " scope:";
+
+ if (Scope == CPol::SCOPE_SE)
+ O << "SCOPE_SE";
+ else if (Scope == CPol::SCOPE_DEV)
+ O << "SCOPE_DEV";
+ else if (Scope == CPol::SCOPE_SYS)
+ O << "SCOPE_SYS";
+ else
+ llvm_unreachable("unexpected scope policy value");
+
+ return;
+}
+
void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (MI->getOperand(OpNo).getImm()) {
@@ -278,12 +374,7 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
}
#endif
- StringRef RegName(getRegisterName(RegNo));
- if (!Keep16BitSuffixes)
- if (!RegName.consume_back(".l"))
- RegName.consume_back(".h");
-
- O << RegName;
+ O << getRegisterName(RegNo);
}
void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
@@ -333,6 +424,15 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
printDefaultVccOperand(false, STI, O);
break;
}
@@ -437,7 +537,7 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
const MCSubtargetInfo &STI,
- raw_ostream &O) {
+ raw_ostream &O, bool IsFP) {
int64_t SImm = static_cast<int64_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
O << SImm;
@@ -465,7 +565,10 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
else if (Imm == 0x3fc45f306dc9c882 &&
STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494309189532";
- else {
+ else if (IsFP) {
+ assert(AMDGPU::isValid32BitLiteral(Imm, true));
+ O << formatHex(static_cast<uint64_t>(Hi_32(Imm)));
+ } else {
assert(isUInt<32>(Imm) || isInt<32>(Imm));
// In rare situations, we will have a 32-bit literal in a 64-bit
@@ -532,21 +635,15 @@ void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint8_t Imm = MI->getOperand(OpNo).getImm();
- if (Imm != 0) {
- O << " wait_vdst:";
- printU4ImmDecOperand(MI, OpNo, O);
- }
+ O << " wait_vdst:";
+ printU4ImmDecOperand(MI, OpNo, O);
}
void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint8_t Imm = MI->getOperand(OpNo).getImm();
- if (Imm != 0) {
- O << " wait_exp:";
- printU4ImmDecOperand(MI, OpNo, O);
- }
+ O << " wait_exp:";
+ printU4ImmDecOperand(MI, OpNo, O);
}
bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc,
@@ -619,14 +716,17 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case MCOI::OPERAND_IMMEDIATE:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
printImmediate32(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_IMM_INT64:
- case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ printImmediate64(Op.getImm(), STI, O, false);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
- printImmediate64(Op.getImm(), STI, O);
+ printImmediate64(Op.getImm(), STI, O, true);
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -688,7 +788,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
if (RCBits == 32)
printImmediate32(llvm::bit_cast<uint32_t>((float)Value), STI, O);
else if (RCBits == 64)
- printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O);
+ printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O, true);
else
llvm_unreachable("Invalid register class size");
}
@@ -725,6 +825,18 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_CNDMASK_B32_e32_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_CNDMASK_B32_dpp_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_CNDMASK_B32_dpp8_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
case AMDGPU::V_CNDMASK_B32_e32_vi:
@@ -846,13 +958,9 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
unsigned Imm = MI->getOperand(OpNo).getImm();
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src0);
- if (Src0Idx >= 0 &&
- Desc.operands()[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID &&
- !AMDGPU::isLegal64BitDPPControl(Imm)) {
- O << " /* 64 bit dpp only supports row_newbcast */";
+ if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) {
+ O << " /* DP ALU dpp only supports row_newbcast */";
return;
} else if (Imm <= DppCtrl::QUAD_PERM_LAST) {
O << "quad_perm:[";
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 3b14faab136b..95c26de6299e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -66,6 +66,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printTH(const MCInst *MI, int64_t TH, int64_t Scope, raw_ostream &O);
+ void printScope(int64_t Scope, raw_ostream &O);
void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printDim(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -91,7 +93,7 @@ private:
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ raw_ostream &O, bool IsFP);
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printRegularOperand(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 5e77a8caa04e..b403d69d9ff1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -49,6 +49,14 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ void getMachineOpValueT16(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void getMachineOpValueT16Lo128(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
/// Use a fixup to encode the simm16 field for SOPP branch
/// instructions.
void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
@@ -254,6 +262,7 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -345,7 +354,8 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
// However, dst is encoded as EXEC for compatibility with SP3.
if (AMDGPU::isGFX10Plus(STI) && isVCMPX64(Desc)) {
assert((Encoding & 0xFF) == 0);
- Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO);
+ Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO) &
+ AMDGPU::HWEncoding::REG_IDX_MASK;
}
for (unsigned i = 0; i < bytes; i++) {
@@ -403,7 +413,10 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
} else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
llvm_unreachable("Must be immediate or expr");
- support::endian::write<uint32_t>(CB, Imm, support::endianness::little);
+ if (Desc.operands()[i].OperandType == AMDGPU::OPERAND_REG_IMM_FP64)
+ Imm = Hi_32(Imm);
+
+ support::endian::write<uint32_t>(CB, Imm, llvm::endianness::little);
// Only one literal value allowed
break;
@@ -488,11 +501,14 @@ void AMDGPUMCCodeEmitter::getAVOperandEncoding(
const MCInst &MI, unsigned OpNo, APInt &Op,
SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
unsigned Reg = MI.getOperand(OpNo).getReg();
- uint64_t Enc = MRI.getEncodingValue(Reg);
+ unsigned Enc = MRI.getEncodingValue(Reg);
+ unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
+ bool IsVGPROrAGPR = Enc & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR;
// VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
// instructions use acc[0:1] modifier bits to distinguish. These bits are
// encoded as a virtual 9th bit of the register for these operands.
+ bool IsAGPR = false;
if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
@@ -507,9 +523,9 @@ void AMDGPUMCCodeEmitter::getAVOperandEncoding(
MRI.getRegClass(AMDGPU::AReg_384RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
- Enc |= 512;
+ IsAGPR = true;
- Op = Enc;
+ Op = Idx | (IsVGPROrAGPR << 8) | (IsAGPR << 9);
}
static bool needsPCRel(const MCExpr *Expr) {
@@ -540,13 +556,38 @@ void AMDGPUMCCodeEmitter::getMachineOpValue(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
if (MO.isReg()){
- Op = MRI.getEncodingValue(MO.getReg());
+ unsigned Enc = MRI.getEncodingValue(MO.getReg());
+ unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
+ bool IsVGPR = Enc & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR;
+ Op = Idx | (IsVGPR << 8);
return;
}
unsigned OpNo = &MO - MI.begin();
getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI);
}
+void AMDGPUMCCodeEmitter::getMachineOpValueT16(
+ const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+ llvm_unreachable("TODO: Implement getMachineOpValueT16().");
+}
+
+void AMDGPUMCCodeEmitter::getMachineOpValueT16Lo128(
+ const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) {
+ uint16_t Encoding = MRI.getEncodingValue(MO.getReg());
+ unsigned RegIdx = Encoding & AMDGPU::HWEncoding::REG_IDX_MASK;
+ bool IsHi = Encoding & AMDGPU::HWEncoding::IS_HI;
+ bool IsVGPR = Encoding & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR;
+ assert((!IsVGPR || isUInt<7>(RegIdx)) && "VGPR0-VGPR127 expected!");
+ Op = (IsVGPR ? 0x100 : 0) | (IsHi ? 0x80 : 0) | RegIdx;
+ return;
+ }
+ getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI);
+}
+
void AMDGPUMCCodeEmitter::getMachineOpValueCommon(
const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op,
SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 1bd3cdc67800..a855cf585205 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -66,8 +66,8 @@ bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
AMDGPU::GPUKind AK;
+ // clang-format off
switch (ElfMach) {
- default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type");
case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
@@ -126,8 +126,12 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
+ default: AK = GK_NONE; break;
}
+ // clang-format on
StringRef GPUName = getArchNameAMDGCN(AK);
if (GPUName != "")
@@ -140,6 +144,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
if (AK == AMDGPU::GPUKind::GK_NONE)
AK = parseArchR600(GPU);
+ // clang-format off
switch (AK) {
case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
@@ -199,8 +204,11 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
+ case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
+ case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
+ // clang-format on
llvm_unreachable("unknown GPU");
}
@@ -368,6 +376,12 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ if (hasKernargPreload(STI)) {
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD,
+ kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD,
+ kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET);
+ }
PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
@@ -418,9 +432,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
switch (CodeObjectVersion) {
default:
break;
- case AMDGPU::AMDHSA_COV2:
- break;
- case AMDGPU::AMDHSA_COV3:
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
if (getTargetID()->isXnackSupported())
@@ -440,16 +451,16 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
- PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
- PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+ if (IVersion.Major < 12) {
+ PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
+ PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
+ }
if (IVersion.Major >= 9)
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
if (AMDGPU::isGFX90A(STI))
PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
compute_pgm_rsrc3,
@@ -457,16 +468,19 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
if (IVersion.Major >= 10) {
PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
+ if (IVersion.Major >= 12)
+ PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
PRINT_FIELD(
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
compute_pgm_rsrc2,
@@ -539,7 +553,7 @@ void AMDGPUTargetELFStreamer::EmitNote(
unsigned NoteFlags = 0;
// TODO Apparently, this is currently needed for OpenCL as mentioned in
// https://reviews.llvm.org/D74995
- if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
+ if (isHsaAbi(STI))
NoteFlags = ELF::SHF_ALLOC;
S.pushSection();
@@ -598,11 +612,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
}
unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
- assert(STI.getTargetTriple().getOS() == Triple::AMDHSA);
+ assert(isHsaAbi(STI));
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
switch (*HsaAbiVer) {
- case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return getEFlagsV3();
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
@@ -827,6 +840,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
return true;
}
+bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
+ const MCSubtargetInfo &STI) {
+ for (int i = 0; i < 64; ++i) {
+ OS << "\ts_nop 0\n";
+ }
+ return true;
+}
+
+bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
+ const MCSubtargetInfo &STI) {
+ const uint32_t Encoded_s_nop = 0xbf800000;
+ MCStreamer &OS = getStreamer();
+ for (int i = 0; i < 64; ++i) {
+ OS.emitInt32(Encoded_s_nop);
+ }
+ return true;
+}
+
bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
const uint32_t Encoded_s_code_end = 0xbf9f0000;
const uint32_t Encoded_s_nop = 0xbf800000;
@@ -906,6 +937,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
- for (uint8_t Res : KernelDescriptor.reserved2)
+ Streamer.emitInt16(KernelDescriptor.kernarg_preload);
+ for (uint8_t Res : KernelDescriptor.reserved3)
Streamer.emitInt8(Res);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index db43de8fcc5f..55b5246c9210 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -90,6 +90,11 @@ public:
/// \returns True on success, false on failure.
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI) { return true; }
+ /// \returns True on success, false on failure.
+ virtual bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) {
+ return true;
+ }
+
virtual void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
@@ -154,6 +159,9 @@ public:
/// \returns True on success, false on failure.
bool EmitCodeEnd(const MCSubtargetInfo &STI) override;
+ /// \returns True on success, false on failure.
+ bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override;
+
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
@@ -215,6 +223,9 @@ public:
/// \returns True on success, false on failure.
bool EmitCodeEnd(const MCSubtargetInfo &STI) override;
+ /// \returns True on success, false on failure.
+ bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override;
+
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index bbbfbe4faa0f..6c539df7677e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -142,11 +142,11 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI,
}
void R600MCCodeEmitter::emit(uint32_t Value, SmallVectorImpl<char> &CB) const {
- support::endian::write(CB, Value, support::little);
+ support::endian::write(CB, Value, llvm::endianness::little);
}
void R600MCCodeEmitter::emit(uint64_t Value, SmallVectorImpl<char> &CB) const {
- support::endian::write(CB, Value, support::little);
+ support::endian::write(CB, Value, llvm::endianness::little);
}
unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index d924f733624a..240366c8e7da 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -25,6 +25,7 @@ def MIMGEncGfx10Default : MIMGEncoding;
def MIMGEncGfx10NSA : MIMGEncoding;
def MIMGEncGfx11Default : MIMGEncoding;
def MIMGEncGfx11NSA : MIMGEncoding;
+def MIMGEncGfx12 : MIMGEncoding;
def MIMGEncoding : GenericEnum {
let FilterClass = "MIMGEncoding";
@@ -95,11 +96,13 @@ def MIMG {
int NOP = -1;
}
-class mimgopc <int gfx11, int gfx10m, int vi = gfx10m, int si = gfx10m> {
+class mimgopc <int gfx12, int gfx11, int gfx10m, int vi = gfx10m, int si = gfx10m> {
+ field bits<8> GFX12 = gfx12;
field bits<8> GFX11 = gfx11;
field bits<8> GFX10M = gfx10m; // GFX10minus for all but atomics
field bits<8> VI = vi; // VI is only used for atomic/sampler/gather instructions
field bits<8> SI = si; // SI is only used for atomic instructions
+ bit HAS_GFX12 = !ne(gfx12, MIMG.NOP);
bit HAS_GFX11 = !ne(gfx11, MIMG.NOP);
bit HAS_GFX10M = !ne(gfx10m, MIMG.NOP);
bit HAS_VI = !ne(vi, MIMG.NOP);
@@ -218,6 +221,16 @@ class MIMG <dag outs, string dns = "">
bits<8> VAddrOperands;
}
+class VIMAGE <dag outs, string dns = ""> : MIMG<outs, dns> {
+ let MIMG = 0;
+ let VIMAGE = 1;
+}
+
+class VSAMPLE <dag outs, string dns = ""> : MIMG<outs, dns> {
+ let MIMG = 0;
+ let VSAMPLE = 1;
+}
+
def MIMGInfoTable : GenericTable {
let FilterClass = "MIMG";
let CppTypeName = "MIMGInfo";
@@ -327,8 +340,8 @@ class MIMG_nsa_gfx10<int op, dag outs, int num_addrs, string dns="">
// Base class of all non-NSA gfx11 MIMG instructions.
class MIMG_gfx11<int op, dag outs, string dns = "">
: MIMG<outs, dns>, MIMGe_gfx11<op> {
- let SubtargetPredicate = isGFX11Plus;
- let AssemblerPredicate = isGFX11Plus;
+ let SubtargetPredicate = isGFX11Only;
+ let AssemblerPredicate = isGFX11Only;
let MIMGEncoding = MIMGEncGfx11Default;
let VAddrOperands = 1;
@@ -343,8 +356,8 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
list<RegisterClass> addr_types=[],
RegisterClass LastAddrRC = VGPR_32>
: MIMG<outs, dns>, MIMGe_gfx11<op> {
- let SubtargetPredicate = isGFX11Plus;
- let AssemblerPredicate = isGFX11Plus;
+ let SubtargetPredicate = isGFX11Only;
+ let AssemblerPredicate = isGFX11Only;
let MIMGEncoding = MIMGEncGfx11NSA;
let VAddrOperands = num_addrs;
@@ -359,6 +372,48 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
let nsa = nsah.NSA;
}
+class VIMAGE_gfx12<int op, dag outs, int num_addrs, string dns="",
+ list<RegisterClass> addr_types=[]>
+ : VIMAGE<outs, dns>, VIMAGEe<op> {
+ let SubtargetPredicate = isGFX12Plus;
+ let AssemblerPredicate = isGFX12Plus;
+
+ let MIMGEncoding = MIMGEncGfx12;
+ let VAddrOperands = num_addrs;
+
+ MIMGNSAHelper nsah = !if(!empty(addr_types),
+ MIMGNSAHelper<num_addrs>,
+ MIMGNSAHelper<num_addrs, addr_types>);
+ dag AddrIns = nsah.AddrIns;
+ string AddrAsm = !if(!eq(num_addrs, 1), "$vaddr0", nsah.AddrAsm);
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+ let vaddr1 = !if(!lt(num_addrs, 2), 0, ?);
+ let vaddr2 = !if(!lt(num_addrs, 3), 0, ?);
+ let vaddr3 = !if(!lt(num_addrs, 4), 0, ?);
+ let vaddr4 = !if(!lt(num_addrs, 5), 0, ?);
+}
+
+class VSAMPLE_gfx12<int op, dag outs, int num_addrs, string dns="",
+ RegisterClass Addr3RC>
+ : VSAMPLE<outs, dns>, VSAMPLEe<op> {
+ let SubtargetPredicate = isGFX12Plus;
+ let AssemblerPredicate = isGFX12Plus;
+
+ let MIMGEncoding = MIMGEncGfx12;
+ let VAddrOperands = num_addrs;
+
+ PartialNSAHelper nsah = PartialNSAHelper<num_addrs, 4, Addr3RC>;
+
+ dag AddrIns = nsah.AddrIns;
+ string AddrAsm = !if(!eq(num_addrs, 1), "$vaddr0", nsah.AddrAsm);
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+ let vaddr1 = !if(!lt(num_addrs, 2), 0, ?);
+ let vaddr2 = !if(!lt(num_addrs, 3), 0, ?);
+ let vaddr3 = !if(!lt(num_addrs, 4), 0, ?);
+}
+
class MIMG_NoSampler_Helper <mimgopc op, string asm,
RegisterClass dst_rc,
RegisterClass addr_rc,
@@ -435,12 +490,41 @@ class MIMG_NoSampler_nsa_gfx11<mimgopc op, string opcode,
#!if(BaseOpcode.HasD16, "$d16", "");
}
+class VIMAGE_NoSampler_gfx12<mimgopc op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : VIMAGE_gfx12<op.GFX11, (outs DataRC:$vdata), num_addrs, dns> {
+ let InOperandList = !con(AddrIns,
+ (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim,
+ CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class VSAMPLE_Sampler_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
+ int num_addrs, RegisterClass Addr3RC = VGPR_32,
+ string dns="">
+ : VSAMPLE_gfx12<op.GFX12, (outs DataRC:$vdata), num_addrs, dns, Addr3RC> {
+ let InOperandList = !con(AddrIns,
+ (ins SReg_256:$rsrc),
+ !if(BaseOpcode.Sampler, (ins SReg_128:$samp), (ins)),
+ (ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
+ CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
+ LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc"
+ #!if(BaseOpcode.Sampler, ", $samp", "")
+ #"$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
- RegisterClass dst_rc,
- bit enableDisasm,
- bit ExtendedImageInst = 1> {
- let ssamp = 0 in {
- let VAddrDwords = 1 in {
+ RegisterClass dst_rc, bit enableDisasm,
+ bit ExtendedImageInst = 1,
+ bit isVSample = 0> {
+ let VAddrDwords = 1 in {
+ let ssamp = 0 in {
if op.HAS_GFX10M then {
def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
!if(enableDisasm, "AMDGPU", "")>;
@@ -455,8 +539,19 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
!if(enableDisasm, "AMDGPU", "")>;
}
}
-
- let VAddrDwords = 2 in {
+ if op.HAS_GFX12 then {
+ if isVSample then {
+ let samp = 0 in
+ def _V1_gfx12 : VSAMPLE_Sampler_gfx12<op, asm, dst_rc, 1>;
+ }
+ else {
+ def _V1_gfx12 : VIMAGE_NoSampler_gfx12<op, asm, dst_rc, 1,
+ !if(enableDisasm, "GFX12", "")>;
+ }
+ }
+ }
+ let VAddrDwords = 2 in {
+ let ssamp = 0 in {
if op.HAS_GFX10M then {
def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
if !not(ExtendedImageInst) then
@@ -469,8 +564,18 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
def _V2_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11<op, asm, dst_rc, 2>;
}
}
-
- let VAddrDwords = 3 in {
+ if op.HAS_GFX12 then {
+ if isVSample then {
+ let samp = 0 in
+ def _V2_gfx12 : VSAMPLE_Sampler_gfx12<op, asm, dst_rc, 2>;
+ }
+ else {
+ def _V2_gfx12 : VIMAGE_NoSampler_gfx12<op, asm, dst_rc, 2>;
+ }
+ }
+ }
+ let VAddrDwords = 3 in {
+ let ssamp = 0 in {
if op.HAS_GFX10M then {
def _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
if !not(ExtendedImageInst) then
@@ -483,8 +588,18 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
def _V3_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11<op, asm, dst_rc, 3>;
}
}
-
- let VAddrDwords = 4 in {
+ if op.HAS_GFX12 then {
+ if isVSample then {
+ let samp = 0 in
+ def _V3_gfx12 : VSAMPLE_Sampler_gfx12<op, asm, dst_rc, 3>;
+ }
+ else {
+ def _V3_gfx12 : VIMAGE_NoSampler_gfx12<op, asm, dst_rc, 3>;
+ }
+ }
+ }
+ let VAddrDwords = 4 in {
+ let ssamp = 0 in {
if op.HAS_GFX10M then {
def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
if !not(ExtendedImageInst) then
@@ -499,6 +614,17 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
!if(enableDisasm, "AMDGPU", "")>;
}
}
+ if op.HAS_GFX12 then {
+ if isVSample then {
+ let samp = 0 in
+ def _V4_gfx12 : VSAMPLE_Sampler_gfx12<op, asm, dst_rc, 4, VGPR_32,
+ !if(enableDisasm, "GFX12", "")>;
+ }
+ else {
+ def _V4_gfx12 : VIMAGE_NoSampler_gfx12<op, asm, dst_rc, 4,
+ !if(enableDisasm, "GFX12", "")>;
+ }
+ }
}
}
@@ -606,62 +732,97 @@ class MIMG_Store_nsa_gfx11<mimgopc op, string opcode,
#!if(BaseOpcode.HasD16, "$d16", "");
}
+class VIMAGE_Store_gfx12<mimgopc op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : VIMAGE_gfx12<op.GFX12, (outs), num_addrs, dns> {
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim,
+ CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
RegisterClass data_rc,
bit enableDisasm> {
let mayLoad = 0, mayStore = 1, hasSideEffects = 0, hasPostISelHook = 0,
- DisableWQM = 1, ssamp = 0 in {
+ DisableWQM = 1 in {
let VAddrDwords = 1 in {
- if op.HAS_GFX10M then {
- def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
- !if(enableDisasm, "AMDGPU", "")>;
- let hasPostISelHook = 1 in
- def _V1_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VGPR_32,
- !if(enableDisasm, "GFX90A", "")>;
- def _V1_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VGPR_32,
- !if(enableDisasm, "AMDGPU", "")>;
+ let ssamp = 0 in {
+ if op.HAS_GFX10M then {
+ def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ let hasPostISelHook = 1 in
+ def _V1_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VGPR_32,
+ !if(enableDisasm, "GFX90A", "")>;
+ def _V1_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+ if op.HAS_GFX11 then {
+ def _V1_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
}
- if op.HAS_GFX11 then {
- def _V1_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VGPR_32,
- !if(enableDisasm, "AMDGPU", "")>;
+ if op.HAS_GFX12 then {
+ def _V1_gfx12 : VIMAGE_Store_gfx12 <op, asm, data_rc, 1,
+ !if(enableDisasm, "GFX12", "")>;
}
}
let VAddrDwords = 2 in {
- if op.HAS_GFX10M then {
- def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
- def _V2_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_64>;
- def _V2_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_64>;
- def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 2>;
+ let ssamp = 0 in {
+ if op.HAS_GFX10M then {
+ def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
+ def _V2_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_64>;
+ def _V2_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_64>;
+ def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 2>;
+ }
+ if op.HAS_GFX11 then {
+ def _V2_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VReg_64>;
+ def _V2_nsa_gfx11 : MIMG_Store_nsa_gfx11 <op, asm, data_rc, 2>;
+ }
}
- if op.HAS_GFX11 then {
- def _V2_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VReg_64>;
- def _V2_nsa_gfx11 : MIMG_Store_nsa_gfx11 <op, asm, data_rc, 2>;
+ if op.HAS_GFX12 then {
+ def _V2_gfx12 : VIMAGE_Store_gfx12 <op, asm, data_rc, 2>;
}
}
let VAddrDwords = 3 in {
- if op.HAS_GFX10M then {
- def _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
- def _V3_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_96>;
- def _V3_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_96>;
- def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 3>;
+ let ssamp = 0 in {
+ if op.HAS_GFX10M then {
+ def _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
+ def _V3_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_96>;
+ def _V3_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_96>;
+ def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 3>;
+ }
+ if op.HAS_GFX11 then {
+ def _V3_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VReg_96>;
+ def _V3_nsa_gfx11 : MIMG_Store_nsa_gfx11 <op, asm, data_rc, 3>;
+ }
}
- if op.HAS_GFX11 then {
- def _V3_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VReg_96>;
- def _V3_nsa_gfx11 : MIMG_Store_nsa_gfx11 <op, asm, data_rc, 3>;
+ if op.HAS_GFX12 then {
+ def _V3_gfx12 : VIMAGE_Store_gfx12 <op, asm, data_rc, 3>;
}
}
let VAddrDwords = 4 in {
- if op.HAS_GFX10M then {
- def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
- def _V4_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_128>;
- def _V4_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_128>;
- def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 4,
- !if(enableDisasm, "AMDGPU", "")>;
+ let ssamp = 0 in {
+ if op.HAS_GFX10M then {
+ def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
+ def _V4_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_128>;
+ def _V4_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_128>;
+ def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 4,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+ if op.HAS_GFX11 then {
+ def _V4_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VReg_128>;
+ def _V4_nsa_gfx11 : MIMG_Store_nsa_gfx11 <op, asm, data_rc, 4,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
}
- if op.HAS_GFX11 then {
- def _V4_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VReg_128>;
- def _V4_nsa_gfx11 : MIMG_Store_nsa_gfx11 <op, asm, data_rc, 4,
- !if(enableDisasm, "AMDGPU", "")>;
+ if op.HAS_GFX12 then {
+ def _V4_gfx12 : VIMAGE_Store_gfx12 <op, asm, data_rc, 4,
+ !if(enableDisasm, "GFX12", "")>;
}
}
}
@@ -788,84 +949,137 @@ class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
+class VIMAGE_Atomic_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
+ int num_addrs, bit enableDisasm = 0>
+ : VIMAGE_gfx12<!cast<int>(op.GFX12), (outs DataRC:$vdst), num_addrs,
+ !if(enableDisasm, "GFX12", "")> {
+ let Constraints = "$vdst = $vdata";
+
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim,
+ CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe";
+}
+
+class VIMAGE_Atomic_gfx12_Renamed<mimgopc op, string opcode, string renamed,
+ RegisterClass DataRC, int num_addrs,
+ bit enableDisasm = 0>
+ : VIMAGE_Atomic_gfx12<op, renamed, DataRC, num_addrs, enableDisasm>,
+ MnemonicAlias<opcode, renamed>, Requires<[isGFX12Plus]>;
+
multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
RegisterClass data_rc,
bit enableDasm = 0,
- bit isFP = 0> {
+ bit isFP = 0,
+ string renamed = ""> {
let hasSideEffects = 1, // FIXME: remove this
mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1,
- ssamp = 0, FPAtomic = isFP in {
+ FPAtomic = isFP in {
let VAddrDwords = 1 in {
- if op.HAS_SI then {
- def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, enableDasm>;
- }
- if op.HAS_VI then {
- def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, enableDasm>;
- let hasPostISelHook = 1 in
- def _V1_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VGPR_32, enableDasm>;
+ let ssamp = 0 in {
+ if op.HAS_SI then {
+ def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
+ if op.HAS_VI then {
+ def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, enableDasm>;
+ let hasPostISelHook = 1 in
+ def _V1_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
+ if op.HAS_GFX10M then {
+ def _V1_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
+ if op.HAS_GFX11 then {
+ def _V1_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
}
- if op.HAS_GFX10M then {
- def _V1_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VGPR_32, enableDasm>;
- }
- if op.HAS_GFX11 then {
- def _V1_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VGPR_32, enableDasm>;
+ if op.HAS_GFX12 then {
+ if !empty(renamed) then
+ def _V1_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 1, enableDasm>;
+ else
+ def _V1_gfx12 : VIMAGE_Atomic_gfx12_Renamed <op, asm, renamed, data_rc, 1, enableDasm>;
}
}
let VAddrDwords = 2 in {
- if op.HAS_SI then {
- def _V2_si : MIMG_Atomic_si <op, asm, data_rc, VReg_64, 0>;
+ let ssamp = 0 in {
+ if op.HAS_SI then {
+ def _V2_si : MIMG_Atomic_si <op, asm, data_rc, VReg_64, 0>;
+ }
+ if op.HAS_VI then {
+ def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
+ def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64, 0>;
+ }
+ if op.HAS_GFX10M then {
+ def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, 0>;
+ def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 2, 0>;
+ }
+ if op.HAS_GFX11 then {
+ def _V2_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_64, 0>;
+ def _V2_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 2, 0>;
+ }
}
- if op.HAS_VI then {
- def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
- def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64, 0>;
- }
- if op.HAS_GFX10M then {
- def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, 0>;
- def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 2, 0>;
- }
- if op.HAS_GFX11 then {
- def _V2_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_64, 0>;
- def _V2_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 2, 0>;
+ if op.HAS_GFX12 then {
+ if !empty(renamed) then
+ def _V2_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 2, 0>;
+ else
+ def _V2_gfx12 : VIMAGE_Atomic_gfx12_Renamed <op, asm, renamed, data_rc, 2, 0>;
}
}
let VAddrDwords = 3 in {
- if op.HAS_SI then {
- def _V3_si : MIMG_Atomic_si <op, asm, data_rc, VReg_96, 0>;
- }
- if op.HAS_VI then {
- def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
- def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96, 0>;
- }
- if op.HAS_GFX10M then {
- def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, 0>;
- def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 3, 0>;
+ let ssamp = 0 in {
+ if op.HAS_SI then {
+ def _V3_si : MIMG_Atomic_si <op, asm, data_rc, VReg_96, 0>;
+ }
+ if op.HAS_VI then {
+ def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
+ def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96, 0>;
+ }
+ if op.HAS_GFX10M then {
+ def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, 0>;
+ def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 3, 0>;
+ }
+ if op.HAS_GFX11 then {
+ def _V3_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_96, 0>;
+ def _V3_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 3, 0>;
+ }
}
- if op.HAS_GFX11 then {
- def _V3_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_96, 0>;
- def _V3_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 3, 0>;
+ if op.HAS_GFX12 then {
+ if !empty(renamed) then
+ def _V3_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 3, 0>;
+ else
+ def _V3_gfx12 : VIMAGE_Atomic_gfx12_Renamed <op, asm, renamed, data_rc, 3, 0>;
}
}
let VAddrDwords = 4 in {
- if op.HAS_SI then {
- def _V4_si : MIMG_Atomic_si <op, asm, data_rc, VReg_128, 0>;
- }
- if op.HAS_VI then {
- def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
- def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128, 0>;
+ let ssamp = 0 in {
+ if op.HAS_SI then {
+ def _V4_si : MIMG_Atomic_si <op, asm, data_rc, VReg_128, 0>;
+ }
+ if op.HAS_VI then {
+ def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
+ def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128, 0>;
+ }
+ if op.HAS_GFX10M then {
+ def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, 0>;
+ def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 4, enableDasm>;
+ }
+ if op.HAS_GFX11 then {
+ def _V4_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_128, 0>;
+ def _V4_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 4, enableDasm>;
+ }
}
- if op.HAS_GFX10M then {
- def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, 0>;
- def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 4, enableDasm>;
- }
- if op.HAS_GFX11 then {
- def _V4_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_128, 0>;
- def _V4_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 4, enableDasm>;
+ if op.HAS_GFX12 then {
+ if !empty(renamed) then
+ def _V4_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 4, enableDasm>;
+ else
+ def _V4_gfx12 : VIMAGE_Atomic_gfx12_Renamed <op, asm, renamed, data_rc, 4, enableDasm>;
}
}
}
}
-multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0> { // 64-bit atomics
+multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
+ string renamed = ""> { // 64-bit atomics
let IsAtomicRet = 1 in {
def "" : MIMGBaseOpcode {
let Atomic = 1;
@@ -877,13 +1091,17 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0>
// using dmask and tfe. Only 32-bit variant is registered with disassembler.
// Other variants are reconstructed by disassembler using dmask and tfe.
let VDataDwords = !if(isCmpSwap, 2, 1) in
- defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;
+ defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP, renamed>;
let VDataDwords = !if(isCmpSwap, 4, 2) in
- defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;
+ defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP, renamed>;
}
} // End IsAtomicRet = 1
}
+multiclass MIMG_Atomic_Renamed <mimgopc op, string asm, string renamed,
+ bit isCmpSwap = 0, bit isFP = 0>
+ : MIMG_Atomic <op, asm, isCmpSwap, isFP, renamed>;
+
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx6789 <op.VI, (outs dst_rc:$vdata), dns> {
@@ -1006,7 +1224,7 @@ class MIMGAddrSizes_dw_range<list<int> range> {
}
class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample, bit isG16,
- int nsa_max_addr = 5> {
+ int nsa_max_addr = 5, bit includeNSA1 = 0> {
// List of all possible numbers of address words, taking all combinations of
// A16 and image dimension into account (note: no MSAA, since this is for
// sample/gather ops).
@@ -1061,8 +1279,10 @@ class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample, bit isG16,
// it is the only one that could have a register other than VGPR32.
int EnableDisasmNum = !foldl(!head(AllNumAddrWords), !tail(AllNumAddrWords),
acc, var, !if(!le(var, nsa_max_addr), var, acc));
+ list<int> PossibleVariants =
+ !listconcat([12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2], !if(includeNSA1, [1], []));
list<LastVAddrSize> PartialNSAInstrs =
- !foldl([]<LastVAddrSize>, [12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2], lhs, dw,
+ !foldl([]<LastVAddrSize>, PossibleVariants, lhs, dw,
!if(isIntInList<dw, AllNumAddrWords>.ret,
!listconcat(lhs, [LastVAddrSize<dw, !sub(nsa_max_addr, 1),
!eq(dw, EnableDisasmNum)>]),
@@ -1114,6 +1334,16 @@ multiclass MIMG_Sampler_Src_Helper <mimgopc op, string asm,
}
}
}
+
+ foreach addr = MIMG_Sampler_AddrSizes<sample, isG16, 4/*MaxNSASize*/, 1>.PartialNSAInstrs in {
+ let VAddrDwords = addr.NumWords in {
+ if op.HAS_GFX12 then {
+ def _V # addr.NumWords # _gfx12
+ : VSAMPLE_Sampler_gfx12<op, asm, dst_rc, addr.NumWords, addr.RegClass,
+ !if(!and(enableDisasm, addr.Disassemble), "GFX12", "")>;
+ }
+ }
+ }
}
class MIMG_Sampler_BaseOpcode<AMDGPUSampleVariant sample>
@@ -1177,12 +1407,12 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
int VAddrDwords = !srl(RegClass.Size, 5);
- int gfx11_nsa_addrs = !if(IsA16, 4, 5);
+ int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
- list<RegisterClass> gfx11_addr_types =
- !if(IsA16,
- [node_ptr_type, VGPR_32, VReg_96, VReg_96],
- [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
+ list<RegisterClass> GFX11PlusAddrTypes =
+ !if(IsA16,
+ [node_ptr_type, VGPR_32, VReg_96, VReg_96],
+ [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
@@ -1215,6 +1445,14 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}
+class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
+ list<RegisterClass> addr_types>
+ : VIMAGE_gfx12<op.GFX12, (outs VReg_128:$vdata),
+ num_addrs, "GFX12", addr_types> {
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$rsrc, A16:$a16));
+ let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc$a16";
+}
+
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16>;
def "" : MIMGBaseOpcode {
@@ -1222,30 +1460,39 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
let A16 = IsA16;
}
let dmask = 0xf,
- unorm = 1,
d16 = 0,
cpol = 0,
tfe = 0,
- lwe = 0,
r128 = 1,
- ssamp = 0,
dim = {0, 0, 0},
a16 = IsA16,
d16 = 0,
BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
VDataDwords = 4 in {
- def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass> {
- let VAddrDwords = info.VAddrDwords;
- }
- def _sa_gfx11 : MIMG_IntersectRay_gfx11<op, opcode, info.RegClass> {
- let VAddrDwords = info.VAddrDwords;
- }
- def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs> {
- let VAddrDwords = info.num_addrs;
+ let unorm = 1,
+ lwe = 0,
+ ssamp = 0 in {
+ if op.HAS_GFX10M then
+ def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass> {
+ let VAddrDwords = info.VAddrDwords;
+ }
+ if op.HAS_GFX11 then
+ def _sa_gfx11 : MIMG_IntersectRay_gfx11<op, opcode, info.RegClass> {
+ let VAddrDwords = info.VAddrDwords;
+ }
+ if op.HAS_GFX10M then
+ def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs> {
+ let VAddrDwords = info.num_addrs;
+ }
+ if op.HAS_GFX11 then
+ def _nsa_gfx11 : MIMG_IntersectRay_nsa_gfx11<op, opcode,
+ info.GFX11PlusNSAAddrs,
+ info.GFX11PlusAddrTypes> {
+ let VAddrDwords = info.num_addrs;
+ }
}
- def _nsa_gfx11 : MIMG_IntersectRay_nsa_gfx11<op, opcode,
- info.gfx11_nsa_addrs,
- info.gfx11_addr_types> {
+ def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs,
+ info.GFX11PlusAddrTypes> {
let VAddrDwords = info.num_addrs;
}
}
@@ -1261,13 +1508,13 @@ multiclass MIMG_MSAA_Load <mimgopc op, string asm> {
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
Gather4 = 1, hasPostISelHook = 0, mayLoad = 1 in {
let VDataDwords = 2 in
- defm _V2 : MIMG_NoSampler_Src_Helper<op, asm, VReg_64, 0>; /* packed D16 */
+ defm _V2 : MIMG_NoSampler_Src_Helper<op, asm, VReg_64, 0, 0, 1>; /* packed D16 */
let VDataDwords = 3 in
- defm _V3 : MIMG_NoSampler_Src_Helper<op, asm, VReg_96, 0>; /* packed D16 + tfe */
+ defm _V3 : MIMG_NoSampler_Src_Helper<op, asm, VReg_96, 0, 0, 1>; /* packed D16 + tfe */
let VDataDwords = 4 in
- defm _V4 : MIMG_NoSampler_Src_Helper<op, asm, VReg_128, 1>;
+ defm _V4 : MIMG_NoSampler_Src_Helper<op, asm, VReg_128, 1, 0, 1>;
let VDataDwords = 5 in
- defm _V5 : MIMG_NoSampler_Src_Helper<op, asm, VReg_160, 0>;
+ defm _V5 : MIMG_NoSampler_Src_Helper<op, asm, VReg_160, 0, 0, 1>;
}
}
@@ -1276,143 +1523,143 @@ multiclass MIMG_MSAA_Load <mimgopc op, string asm> {
//===----------------------------------------------------------------------===//
let OtherPredicates = [HasImageInsts] in {
-defm IMAGE_LOAD : MIMG_NoSampler <mimgopc<0x00, 0x00>, "image_load", 1>;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <mimgopc<0x01, 0x01>, "image_load_mip", 1, 1>;
-defm IMAGE_LOAD_PCK : MIMG_NoSampler <mimgopc<0x02, 0x02>, "image_load_pck", 0>;
-defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler <mimgopc<0x03, 0x03>, "image_load_pck_sgn", 0>;
-defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler <mimgopc<0x04, 0x04>, "image_load_mip_pck", 0, 1>;
-defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler <mimgopc<0x05, 0x05>, "image_load_mip_pck_sgn", 0, 1>;
-defm IMAGE_STORE : MIMG_Store <mimgopc<0x06, 0x08>, "image_store", 1>;
-defm IMAGE_STORE_MIP : MIMG_Store <mimgopc<0x07, 0x09>, "image_store_mip", 1, 1>;
-defm IMAGE_STORE_PCK : MIMG_Store <mimgopc<0x08, 0x0a>, "image_store_pck", 0>;
-defm IMAGE_STORE_MIP_PCK : MIMG_Store <mimgopc<0x09, 0x0b>, "image_store_mip_pck", 0, 1>;
-
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <mimgopc<0x17, 0x0e>, "image_get_resinfo", 0, 1, 1>;
-
-defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimgopc<0x0a, 0x0f, 0x10, 0x0f>, "image_atomic_swap">;
-defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimgopc<0x0b, 0x10, 0x11, 0x10>, "image_atomic_cmpswap", 1>;
-defm IMAGE_ATOMIC_ADD : MIMG_Atomic <mimgopc<0x0c, 0x11, 0x12, 0x11>, "image_atomic_add">;
-defm IMAGE_ATOMIC_SUB : MIMG_Atomic <mimgopc<0x0d, 0x12, 0x13, 0x12>, "image_atomic_sub">;
-defm IMAGE_ATOMIC_RSUB : MIMG_Atomic <mimgopc<MIMG.NOP, MIMG.NOP, MIMG.NOP, 0x13>, "image_atomic_rsub">;
-defm IMAGE_ATOMIC_SMIN : MIMG_Atomic <mimgopc<0x0e, 0x14>, "image_atomic_smin">;
-defm IMAGE_ATOMIC_UMIN : MIMG_Atomic <mimgopc<0x0f, 0x15>, "image_atomic_umin">;
-defm IMAGE_ATOMIC_SMAX : MIMG_Atomic <mimgopc<0x10, 0x16>, "image_atomic_smax">;
-defm IMAGE_ATOMIC_UMAX : MIMG_Atomic <mimgopc<0x11, 0x17>, "image_atomic_umax">;
-defm IMAGE_ATOMIC_AND : MIMG_Atomic <mimgopc<0x12, 0x18>, "image_atomic_and">;
-defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimgopc<0x13, 0x19>, "image_atomic_or">;
-defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimgopc<0x14, 0x1a>, "image_atomic_xor">;
-defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimgopc<0x15, 0x1b>, "image_atomic_inc">;
-defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimgopc<0x16, 0x1c>, "image_atomic_dec">;
-defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic <mimgopc<MIMG.NOP, 0x1d, MIMG.NOP>, "image_atomic_fcmpswap", 1, 1>;
-defm IMAGE_ATOMIC_FMIN : MIMG_Atomic <mimgopc<MIMG.NOP, 0x1e, MIMG.NOP>, "image_atomic_fmin", 0, 1>;
-defm IMAGE_ATOMIC_FMAX : MIMG_Atomic <mimgopc<MIMG.NOP, 0x1f, MIMG.NOP>, "image_atomic_fmax", 0, 1>;
-
-defm IMAGE_SAMPLE : MIMG_Sampler_WQM <mimgopc<0x1b, 0x20>, AMDGPUSample>;
+defm IMAGE_LOAD : MIMG_NoSampler <mimgopc<0x00, 0x00, 0x00>, "image_load", 1>;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <mimgopc<0x01, 0x01, 0x01>, "image_load_mip", 1, 1>;
+defm IMAGE_LOAD_PCK : MIMG_NoSampler <mimgopc<0x02, 0x02, 0x02>, "image_load_pck", 0>;
+defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler <mimgopc<0x03, 0x03, 0x03>, "image_load_pck_sgn", 0>;
+defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler <mimgopc<0x04, 0x04, 0x04>, "image_load_mip_pck", 0, 1>;
+defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler <mimgopc<0x05, 0x05, 0x05>, "image_load_mip_pck_sgn", 0, 1>;
+defm IMAGE_STORE : MIMG_Store <mimgopc<0x06, 0x06, 0x08>, "image_store", 1>;
+defm IMAGE_STORE_MIP : MIMG_Store <mimgopc<0x07, 0x07, 0x09>, "image_store_mip", 1, 1>;
+defm IMAGE_STORE_PCK : MIMG_Store <mimgopc<0x08, 0x08, 0x0a>, "image_store_pck", 0>;
+defm IMAGE_STORE_MIP_PCK : MIMG_Store <mimgopc<0x09, 0x09, 0x0b>, "image_store_mip_pck", 0, 1>;
+
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <mimgopc<0x17, 0x17, 0x0e, 0x0e, 0x0e>, "image_get_resinfo", 0, 1, 1>;
+
+defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimgopc<0x0a, 0x0a, 0x0f, 0x10, 0x0f>, "image_atomic_swap">;
+defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimgopc<0x0b, 0x0b, 0x10, 0x11, 0x10>, "image_atomic_cmpswap", 1>;
+defm IMAGE_ATOMIC_ADD : MIMG_Atomic_Renamed <mimgopc<0x0c, 0x0c, 0x11, 0x12, 0x11>, "image_atomic_add", "image_atomic_add_uint">;
+defm IMAGE_ATOMIC_SUB : MIMG_Atomic_Renamed <mimgopc<0x0d, 0x0d, 0x12, 0x13, 0x12>, "image_atomic_sub", "image_atomic_sub_uint">;
+defm IMAGE_ATOMIC_RSUB : MIMG_Atomic <mimgopc<MIMG.NOP, MIMG.NOP, MIMG.NOP, MIMG.NOP, 0x13>, "image_atomic_rsub">;
+defm IMAGE_ATOMIC_SMIN : MIMG_Atomic_Renamed <mimgopc<0x0e, 0x0e, 0x14>, "image_atomic_smin", "image_atomic_min_int">;
+defm IMAGE_ATOMIC_UMIN : MIMG_Atomic_Renamed <mimgopc<0x0f, 0x0f, 0x15>, "image_atomic_umin", "image_atomic_min_uint">;
+defm IMAGE_ATOMIC_SMAX : MIMG_Atomic_Renamed <mimgopc<0x10, 0x10, 0x16>, "image_atomic_smax", "image_atomic_max_int">;
+defm IMAGE_ATOMIC_UMAX : MIMG_Atomic_Renamed <mimgopc<0x11, 0x11, 0x17>, "image_atomic_umax", "image_atomic_max_uint">;
+defm IMAGE_ATOMIC_AND : MIMG_Atomic <mimgopc<0x12, 0x12, 0x18>, "image_atomic_and">;
+defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimgopc<0x13, 0x13, 0x19>, "image_atomic_or">;
+defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimgopc<0x14, 0x14, 0x1a>, "image_atomic_xor">;
+defm IMAGE_ATOMIC_INC : MIMG_Atomic_Renamed <mimgopc<0x15, 0x15, 0x1b>, "image_atomic_inc", "image_atomic_inc_uint">;
+defm IMAGE_ATOMIC_DEC : MIMG_Atomic_Renamed <mimgopc<0x16, 0x16, 0x1c>, "image_atomic_dec", "image_atomic_dec_uint">;
+defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic <mimgopc<MIMG.NOP, MIMG.NOP, 0x1d, MIMG.NOP>, "image_atomic_fcmpswap", 1, 1>;
+defm IMAGE_ATOMIC_FMIN : MIMG_Atomic <mimgopc<MIMG.NOP, MIMG.NOP, 0x1e, MIMG.NOP>, "image_atomic_fmin", 0, 1>;
+defm IMAGE_ATOMIC_FMAX : MIMG_Atomic <mimgopc<MIMG.NOP, MIMG.NOP, 0x1f, MIMG.NOP>, "image_atomic_fmax", 0, 1>;
+
+defm IMAGE_SAMPLE : MIMG_Sampler_WQM <mimgopc<0x1b, 0x1b, 0x20>, AMDGPUSample>;
let OtherPredicates = [HasExtendedImageInsts] in {
-defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <mimgopc<0x40, 0x21>, AMDGPUSample_cl>;
-defm IMAGE_SAMPLE_D : MIMG_Sampler <mimgopc<0x1c, 0x22>, AMDGPUSample_d>;
-defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <mimgopc<0x41, 0x23>, AMDGPUSample_d_cl>;
-defm IMAGE_SAMPLE_L : MIMG_Sampler <mimgopc<0x1d, 0x24>, AMDGPUSample_l>;
-defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <mimgopc<0x1e, 0x25>, AMDGPUSample_b>;
-defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <mimgopc<0x42, 0x26>, AMDGPUSample_b_cl>;
-defm IMAGE_SAMPLE_LZ : MIMG_Sampler <mimgopc<0x1f, 0x27>, AMDGPUSample_lz>;
-defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <mimgopc<0x20, 0x28>, AMDGPUSample_c>;
-defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <mimgopc<0x43, 0x29>, AMDGPUSample_c_cl>;
-defm IMAGE_SAMPLE_C_D : MIMG_Sampler <mimgopc<0x21, 0x2a>, AMDGPUSample_c_d>;
-defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <mimgopc<0x44, 0x2b>, AMDGPUSample_c_d_cl>;
-defm IMAGE_SAMPLE_C_L : MIMG_Sampler <mimgopc<0x22, 0x2c>, AMDGPUSample_c_l>;
-defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <mimgopc<0x23, 0x2d>, AMDGPUSample_c_b>;
-defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <mimgopc<0x45, 0x2e>, AMDGPUSample_c_b_cl>;
-defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <mimgopc<0x24, 0x2f>, AMDGPUSample_c_lz>;
-defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <mimgopc<0x25, 0x30>, AMDGPUSample_o>;
-defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <mimgopc<0x46, 0x31>, AMDGPUSample_cl_o>;
-defm IMAGE_SAMPLE_D_O : MIMG_Sampler <mimgopc<0x26, 0x32>, AMDGPUSample_d_o>;
-defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <mimgopc<0x47, 0x33>, AMDGPUSample_d_cl_o>;
-defm IMAGE_SAMPLE_L_O : MIMG_Sampler <mimgopc<0x27, 0x34>, AMDGPUSample_l_o>;
-defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <mimgopc<0x28, 0x35>, AMDGPUSample_b_o>;
-defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <mimgopc<0x48, 0x36>, AMDGPUSample_b_cl_o>;
-defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <mimgopc<0x29, 0x37>, AMDGPUSample_lz_o>;
-defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <mimgopc<0x2a, 0x38>, AMDGPUSample_c_o>;
-defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <mimgopc<0x49, 0x39>, AMDGPUSample_c_cl_o>;
-defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <mimgopc<0x2b, 0x3a>, AMDGPUSample_c_d_o>;
-defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <mimgopc<0x4a, 0x3b>, AMDGPUSample_c_d_cl_o>;
-defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <mimgopc<0x2c, 0x3c>, AMDGPUSample_c_l_o>;
-defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <mimgopc<0x4b, 0x3e>, AMDGPUSample_c_b_cl_o>;
-defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <mimgopc<0x2d, 0x3d>, AMDGPUSample_c_b_o>;
-defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <mimgopc<0x2e, 0x3f>, AMDGPUSample_c_lz_o>;
-defm IMAGE_GATHER4 : MIMG_Gather_WQM <mimgopc<0x2f, 0x40>, AMDGPUSample>;
-defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <mimgopc<0x60, 0x41>, AMDGPUSample_cl>;
-defm IMAGE_GATHER4_L : MIMG_Gather <mimgopc<0x30, 0x44>, AMDGPUSample_l>;
-defm IMAGE_GATHER4_B : MIMG_Gather_WQM <mimgopc<0x31, 0x45>, AMDGPUSample_b>;
-defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <mimgopc<0x61, 0x46>, AMDGPUSample_b_cl>;
-defm IMAGE_GATHER4_LZ : MIMG_Gather <mimgopc<0x32, 0x47>, AMDGPUSample_lz>;
-defm IMAGE_GATHER4_C : MIMG_Gather_WQM <mimgopc<0x33, 0x48>, AMDGPUSample_c>;
-defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <mimgopc<0x62, 0x49>, AMDGPUSample_c_cl>;
-defm IMAGE_GATHER4_C_L : MIMG_Gather <mimgopc<0x63, 0x4c>, AMDGPUSample_c_l>;
-defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <mimgopc<0x64, 0x4d>, AMDGPUSample_c_b>;
-defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <mimgopc<0x65, 0x4e>, AMDGPUSample_c_b_cl>;
-defm IMAGE_GATHER4_C_LZ : MIMG_Gather <mimgopc<0x34, 0x4f>, AMDGPUSample_c_lz>;
-defm IMAGE_GATHER4_O : MIMG_Gather_WQM <mimgopc<0x35, 0x50>, AMDGPUSample_o>;
-defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x51>, AMDGPUSample_cl_o>;
-defm IMAGE_GATHER4_L_O : MIMG_Gather <mimgopc<MIMG.NOP, 0x54>, AMDGPUSample_l_o>;
-defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x55>, AMDGPUSample_b_o>;
-defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <mimgopc<MIMG.NOP, 0x56>, AMDGPUSample_b_cl_o>;
-defm IMAGE_GATHER4_LZ_O : MIMG_Gather <mimgopc<0x36, 0x57>, AMDGPUSample_lz_o>;
-defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x58>, AMDGPUSample_c_o>;
-defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x59>, AMDGPUSample_c_cl_o>;
-defm IMAGE_GATHER4_C_L_O : MIMG_Gather <mimgopc<MIMG.NOP, 0x5c>, AMDGPUSample_c_l_o>;
-defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x5d>, AMDGPUSample_c_b_o>;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x5e>, AMDGPUSample_c_b_cl_o>;
-defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <mimgopc<0x37, 0x5f>, AMDGPUSample_c_lz_o>;
+defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <mimgopc<0x40, 0x40, 0x21>, AMDGPUSample_cl>;
+defm IMAGE_SAMPLE_D : MIMG_Sampler <mimgopc<0x1c, 0x1c, 0x22>, AMDGPUSample_d>;
+defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <mimgopc<0x41, 0x41, 0x23>, AMDGPUSample_d_cl>;
+defm IMAGE_SAMPLE_L : MIMG_Sampler <mimgopc<0x1d, 0x1d, 0x24>, AMDGPUSample_l>;
+defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <mimgopc<0x1e, 0x1e, 0x25>, AMDGPUSample_b>;
+defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <mimgopc<0x42, 0x42, 0x26>, AMDGPUSample_b_cl>;
+defm IMAGE_SAMPLE_LZ : MIMG_Sampler <mimgopc<0x1f, 0x1f, 0x27>, AMDGPUSample_lz>;
+defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <mimgopc<0x20, 0x20, 0x28>, AMDGPUSample_c>;
+defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <mimgopc<0x43, 0x43, 0x29>, AMDGPUSample_c_cl>;
+defm IMAGE_SAMPLE_C_D : MIMG_Sampler <mimgopc<0x21, 0x21, 0x2a>, AMDGPUSample_c_d>;
+defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <mimgopc<0x44, 0x44, 0x2b>, AMDGPUSample_c_d_cl>;
+defm IMAGE_SAMPLE_C_L : MIMG_Sampler <mimgopc<0x22, 0x22, 0x2c>, AMDGPUSample_c_l>;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <mimgopc<0x23, 0x23, 0x2d>, AMDGPUSample_c_b>;
+defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <mimgopc<0x45, 0x45, 0x2e>, AMDGPUSample_c_b_cl>;
+defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <mimgopc<0x24, 0x24, 0x2f>, AMDGPUSample_c_lz>;
+defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <mimgopc<0x25, 0x25, 0x30>, AMDGPUSample_o>;
+defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <mimgopc<0x46, 0x46, 0x31>, AMDGPUSample_cl_o>;
+defm IMAGE_SAMPLE_D_O : MIMG_Sampler <mimgopc<0x26, 0x26, 0x32>, AMDGPUSample_d_o>;
+defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <mimgopc<0x47, 0x47, 0x33>, AMDGPUSample_d_cl_o>;
+defm IMAGE_SAMPLE_L_O : MIMG_Sampler <mimgopc<0x27, 0x27, 0x34>, AMDGPUSample_l_o>;
+defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <mimgopc<0x28, 0x28, 0x35>, AMDGPUSample_b_o>;
+defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <mimgopc<0x48, 0x48, 0x36>, AMDGPUSample_b_cl_o>;
+defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <mimgopc<0x29, 0x29, 0x37>, AMDGPUSample_lz_o>;
+defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <mimgopc<0x2a, 0x2a, 0x38>, AMDGPUSample_c_o>;
+defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <mimgopc<0x49, 0x49, 0x39>, AMDGPUSample_c_cl_o>;
+defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <mimgopc<0x2b, 0x2b, 0x3a>, AMDGPUSample_c_d_o>;
+defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <mimgopc<0x4a, 0x4a, 0x3b>, AMDGPUSample_c_d_cl_o>;
+defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <mimgopc<0x2c, 0x2c, 0x3c>, AMDGPUSample_c_l_o>;
+defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <mimgopc<0x4b, 0x4b, 0x3e>, AMDGPUSample_c_b_cl_o>;
+defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <mimgopc<0x2d, 0x2d, 0x3d>, AMDGPUSample_c_b_o>;
+defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <mimgopc<0x2e, 0x2e, 0x3f>, AMDGPUSample_c_lz_o>;
+defm IMAGE_GATHER4 : MIMG_Gather_WQM <mimgopc<0x2f, 0x2f, 0x40>, AMDGPUSample>;
+defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <mimgopc<0x60, 0x60, 0x41>, AMDGPUSample_cl>;
+defm IMAGE_GATHER4_L : MIMG_Gather <mimgopc<0x30, 0x30, 0x44>, AMDGPUSample_l>;
+defm IMAGE_GATHER4_B : MIMG_Gather_WQM <mimgopc<0x31, 0x31, 0x45>, AMDGPUSample_b>;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <mimgopc<0x61, 0x61, 0x46>, AMDGPUSample_b_cl>;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <mimgopc<0x32, 0x32, 0x47>, AMDGPUSample_lz>;
+defm IMAGE_GATHER4_C : MIMG_Gather_WQM <mimgopc<0x33, 0x33, 0x48>, AMDGPUSample_c>;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <mimgopc<0x62, 0x62, 0x49>, AMDGPUSample_c_cl>;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <mimgopc<0x63, 0x63, 0x4c>, AMDGPUSample_c_l>;
+defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <mimgopc<0x64, 0x64, 0x4d>, AMDGPUSample_c_b>;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <mimgopc<0x65, 0x65, 0x4e>, AMDGPUSample_c_b_cl>;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <mimgopc<0x34, 0x34, 0x4f>, AMDGPUSample_c_lz>;
+defm IMAGE_GATHER4_O : MIMG_Gather_WQM <mimgopc<0x35, 0x35, 0x50>, AMDGPUSample_o>;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, MIMG.NOP, 0x51>, AMDGPUSample_cl_o>;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <mimgopc<MIMG.NOP, MIMG.NOP, 0x54>, AMDGPUSample_l_o>;
+defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, MIMG.NOP, 0x55>, AMDGPUSample_b_o>;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <mimgopc<MIMG.NOP, MIMG.NOP, 0x56>, AMDGPUSample_b_cl_o>;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <mimgopc<0x36, 0x36, 0x57>, AMDGPUSample_lz_o>;
+defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, MIMG.NOP, 0x58>, AMDGPUSample_c_o>;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, MIMG.NOP, 0x59>, AMDGPUSample_c_cl_o>;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <mimgopc<MIMG.NOP, MIMG.NOP, 0x5c>, AMDGPUSample_c_l_o>;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, MIMG.NOP, 0x5d>, AMDGPUSample_c_b_o>;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, MIMG.NOP, 0x5e>, AMDGPUSample_c_b_cl_o>;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <mimgopc<0x37, 0x37, 0x5f>, AMDGPUSample_c_lz_o>;
let SubtargetPredicate = isGFX9Plus in
-defm IMAGE_GATHER4H : MIMG_Gather <mimgopc<0x90, 0x61, 0x42>, AMDGPUSample, 1, "image_gather4h">;
-
-defm IMAGE_GET_LOD : MIMG_Sampler <mimgopc<0x38, 0x60>, AMDGPUSample, 1, 0, 1, "image_get_lod">;
-
-defm IMAGE_SAMPLE_CD : MIMG_Sampler <mimgopc<MIMG.NOP, 0x68>, AMDGPUSample_cd>;
-defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <mimgopc<MIMG.NOP, 0x69>, AMDGPUSample_cd_cl>;
-defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6a>, AMDGPUSample_c_cd>;
-defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6b>, AMDGPUSample_c_cd_cl>;
-defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6c>, AMDGPUSample_cd_o>;
-defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6d>, AMDGPUSample_cd_cl_o>;
-defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6e>, AMDGPUSample_c_cd_o>;
-defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6f>, AMDGPUSample_c_cd_cl_o>;
+defm IMAGE_GATHER4H : MIMG_Gather <mimgopc<0x90, 0x90, 0x61, 0x42>, AMDGPUSample, 1, "image_gather4h">;
+
+defm IMAGE_GET_LOD : MIMG_Sampler <mimgopc<0x38, 0x38, 0x60>, AMDGPUSample, 1, 0, 1, "image_get_lod">;
+
+defm IMAGE_SAMPLE_CD : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x68>, AMDGPUSample_cd>;
+defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x69>, AMDGPUSample_cd_cl>;
+defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x6a>, AMDGPUSample_c_cd>;
+defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x6b>, AMDGPUSample_c_cd_cl>;
+defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x6c>, AMDGPUSample_cd_o>;
+defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x6d>, AMDGPUSample_cd_cl_o>;
+defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x6e>, AMDGPUSample_c_cd_o>;
+defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x6f>, AMDGPUSample_c_cd_cl_o>;
} // End OtherPredicates = [HasExtendedImageInsts]
let OtherPredicates = [HasExtendedImageInsts,HasG16] in {
-defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler <mimgopc<0x39, 0xa2>, AMDGPUSample_d, 0, 1>;
-defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler <mimgopc<0x5f, 0xa3>, AMDGPUSample_d_cl, 0, 1>;
-defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler <mimgopc<0x3a, 0xaa>, AMDGPUSample_c_d, 0, 1>;
-defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler <mimgopc<0x54, 0xab>, AMDGPUSample_c_d_cl, 0, 1>;
-defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler <mimgopc<0x3b, 0xb2>, AMDGPUSample_d_o, 0, 1>;
-defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler <mimgopc<0x55, 0xb3>, AMDGPUSample_d_cl_o, 0, 1>;
-defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler <mimgopc<0x3c, 0xba>, AMDGPUSample_c_d_o, 0, 1>;
-defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler <mimgopc<0x56, 0xbb>, AMDGPUSample_c_d_cl_o, 0, 1>;
-defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xe8>, AMDGPUSample_cd, 0, 1>;
-defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xe9>, AMDGPUSample_cd_cl, 0, 1>;
-defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xea>, AMDGPUSample_c_cd, 0, 1>;
-defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xeb>, AMDGPUSample_c_cd_cl, 0, 1>;
-defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xec>, AMDGPUSample_cd_o, 0, 1>;
-defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xed>, AMDGPUSample_cd_cl_o, 0, 1>;
-defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xee>, AMDGPUSample_c_cd_o, 0, 1>;
-defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xef>, AMDGPUSample_c_cd_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler <mimgopc<0x39, 0x39, 0xa2>, AMDGPUSample_d, 0, 1>;
+defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler <mimgopc<0x5f, 0x5f, 0xa3>, AMDGPUSample_d_cl, 0, 1>;
+defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler <mimgopc<0x3a, 0x3a, 0xaa>, AMDGPUSample_c_d, 0, 1>;
+defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler <mimgopc<0x54, 0x54, 0xab>, AMDGPUSample_c_d_cl, 0, 1>;
+defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler <mimgopc<0x3b, 0x3b, 0xb2>, AMDGPUSample_d_o, 0, 1>;
+defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler <mimgopc<0x55, 0x55, 0xb3>, AMDGPUSample_d_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler <mimgopc<0x3c, 0x3c, 0xba>, AMDGPUSample_c_d_o, 0, 1>;
+defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler <mimgopc<0x56, 0x56, 0xbb>, AMDGPUSample_c_d_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0xe8>, AMDGPUSample_cd, 0, 1>;
+defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0xe9>, AMDGPUSample_cd_cl, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0xea>, AMDGPUSample_c_cd, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0xeb>, AMDGPUSample_c_cd_cl, 0, 1>;
+defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0xec>, AMDGPUSample_cd_o, 0, 1>;
+defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0xed>, AMDGPUSample_cd_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0xee>, AMDGPUSample_c_cd_o, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, MIMG.NOP, 0xef>, AMDGPUSample_c_cd_cl_o, 0, 1>;
} // End OtherPredicates = [HasExtendedImageInsts,HasG16]
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", mimgopc<0x7e>>;
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", mimgopc<0x7f>>;
let SubtargetPredicate = isGFX10Only, OtherPredicates = [HasGFX10_AEncoding] in
-defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<MIMG.NOP, 0x80>, "image_msaa_load", 1, 0, 0, 1>;
+defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<MIMG.NOP, MIMG.NOP, 0x80>, "image_msaa_load", 1, 0, 0, 1>;
let OtherPredicates = [HasGFX10_AEncoding] in
-defm IMAGE_MSAA_LOAD : MIMG_MSAA_Load <mimgopc<0x18, MIMG.NOP>, "image_msaa_load">;
+defm IMAGE_MSAA_LOAD : MIMG_MSAA_Load <mimgopc<0x18, 0x18, MIMG.NOP>, "image_msaa_load">;
let OtherPredicates = [HasGFX10_AEncoding] in {
-defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x19, 0xe6>, "image_bvh_intersect_ray", 0, 0>;
-defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x19, 0xe6>, "image_bvh_intersect_ray", 0, 1>;
-defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 0>;
-defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 1>;
+defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 0>;
+defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x19, 0x19, 0xe6>, "image_bvh_intersect_ray", 0, 1>;
+defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 0>;
+defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 1>;
} // End OtherPredicates = [HasGFX10_AEncoding]
} // End let OtherPredicates = [HasImageInsts]
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600.h
index 2b0a887c61fa..6c40c2813e20 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600.h
@@ -27,7 +27,7 @@ FunctionPass *createR600ClauseMergePass();
FunctionPass *createR600Packetizer();
FunctionPass *createR600ControlFlowFinalizer();
FunctionPass *createR600MachineCFGStructurizerPass();
-FunctionPass *createR600ISelDag(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass *createR600ISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
ModulePass *createR600OpenCLImageTypeLoweringPass();
void initializeR600ClauseMergePassPass(PassRegistry &);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
index 20c2ff8a4fd7..293db13f34f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
@@ -30,7 +30,7 @@ class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
public:
R600DAGToDAGISel() = delete;
- explicit R600DAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ explicit R600DAGToDAGISel(TargetMachine &TM, CodeGenOptLevel OptLevel)
: AMDGPUDAGToDAGISel(TM, OptLevel) {}
void Select(SDNode *N) override;
@@ -183,6 +183,6 @@ bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
/// This pass converts a legalized DAG into a R600-specific
// DAG, ready for instruction scheduling.
FunctionPass *llvm::createR600ISelDag(TargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new R600DAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index ad072cfe23b1..c1ba9c514874 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -101,7 +101,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSUB, MVT::f32, Expand);
- setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR},
+ setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},
MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
index f4dfbe8adc75..f82bd55beccc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -782,7 +782,7 @@ def SETNE_DX10 : R600_2OP <
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
-def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
+def RNDNE : R600_1OP_Helper <0x13, "RNDNE", froundeven>;
def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
def MOV : R600_1OP <0x19, "MOV", []>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
index 2a15c0123b74..195dc4f9a0f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
@@ -163,11 +163,11 @@ class R600OpenCLImageTypeLoweringPass : public ModulePass {
Value *Replacement = nullptr;
StringRef Name = F->getName();
- if (Name.startswith(GetImageResourceIDFunc)) {
+ if (Name.starts_with(GetImageResourceIDFunc)) {
Replacement = ConstantInt::get(Int32Type, ResourceID);
- } else if (Name.startswith(GetImageSizeFunc)) {
+ } else if (Name.starts_with(GetImageSizeFunc)) {
Replacement = &ImageSizeArg;
- } else if (Name.startswith(GetImageFormatFunc)) {
+ } else if (Name.starts_with(GetImageFormatFunc)) {
Replacement = &ImageFormatArg;
} else {
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
index 36840587d219..6cd4fd42444d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
@@ -53,7 +53,7 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
TargetOptions Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
setRequiresStructuredCFG(true);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.h
index f0e3cd352642..3fe54c778fe1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetMachine.h
@@ -33,7 +33,7 @@ public:
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, TargetOptions Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index b87cd8c66cc8..932c0d6216ce 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
@@ -206,9 +207,12 @@ bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
if (isUniform(Term))
return false;
- Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
- Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
- push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+ IRBuilder<> IRB(Term);
+ Value *IfCall = IRB.CreateCall(If, {Term->getCondition()});
+ Value *Cond = IRB.CreateExtractValue(IfCall, {0});
+ Value *Mask = IRB.CreateExtractValue(IfCall, {1});
+ Term->setCondition(Cond);
+ push(Term->getSuccessor(1), Mask);
return true;
}
@@ -217,15 +221,24 @@ bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
if (isUniform(Term)) {
return false;
}
- Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
- Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
- push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+
+ IRBuilder<> IRB(Term);
+ Value *ElseCall = IRB.CreateCall(Else, {popSaved()});
+ Value *Cond = IRB.CreateExtractValue(ElseCall, {0});
+ Value *Mask = IRB.CreateExtractValue(ElseCall, {1});
+ Term->setCondition(Cond);
+ push(Term->getSuccessor(1), Mask);
return true;
}
/// Recursively handle the condition leading to a loop
Value *SIAnnotateControlFlow::handleLoopCondition(
Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
+
+ auto CreateBreak = [this, Cond, Broken](Instruction *I) -> CallInst * {
+ return IRBuilder<>(I).CreateCall(IfBreak, {Cond, Broken});
+ };
+
if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
Instruction *Insert;
@@ -235,8 +248,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
}
- Value *Args[] = { Cond, Broken };
- return CallInst::Create(IfBreak, Args, "", Insert);
+ return CreateBreak(Insert);
}
// Insert IfBreak in the loop header TERM for constant COND other than true.
@@ -244,14 +256,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
Instruction *Insert = Cond == BoolTrue ?
Term : L->getHeader()->getTerminator();
- Value *Args[] = { Cond, Broken };
- return CallInst::Create(IfBreak, Args, "", Insert);
+ return CreateBreak(Insert);
}
if (isa<Argument>(Cond)) {
Instruction *Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
- Value *Args[] = { Cond, Broken };
- return CallInst::Create(IfBreak, Args, "", Insert);
+ return CreateBreak(Insert);
}
llvm_unreachable("Unhandled loop condition!");
@@ -268,7 +278,8 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
return false;
BasicBlock *Target = Term->getSuccessor(1);
- PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front());
+ PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken");
+ Broken->insertBefore(Target->begin());
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
@@ -286,7 +297,8 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
Broken->addIncoming(PHIValue, Pred);
}
- Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
+ CallInst *LoopCall = IRBuilder<>(Term).CreateCall(Loop, {Arg});
+ Term->setCondition(LoopCall);
push(Term->getSuccessor(0), Arg);
@@ -325,7 +337,7 @@ bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
// Split edge to make Def dominate Use
FirstInsertionPt = &*SplitEdge(DefBB, BB, DT, LI)->getFirstInsertionPt();
}
- CallInst::Create(EndCf, Exec, "", FirstInsertionPt);
+ IRBuilder<>(FirstInsertionPt).CreateCall(EndCf, {Exec});
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
index cd1818285e3e..b291400a947c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -44,6 +44,7 @@ enum {
GFX90A = 8,
GFX940 = 9,
GFX11 = 10,
+ GFX12 = 11,
};
}
@@ -80,19 +81,21 @@ enum : uint64_t {
MTBUF = 1 << 18,
SMRD = 1 << 19,
MIMG = 1 << 20,
- EXP = 1 << 21,
- FLAT = 1 << 22,
- DS = 1 << 23,
+ VIMAGE = 1 << 21,
+ VSAMPLE = 1 << 22,
+ EXP = 1 << 23,
+ FLAT = 1 << 24,
+ DS = 1 << 25,
// Pseudo instruction formats.
- VGPRSpill = 1 << 24,
- SGPRSpill = 1 << 25,
+ VGPRSpill = 1 << 26,
+ SGPRSpill = 1 << 27,
// LDSDIR instruction format.
- LDSDIR = 1 << 26,
+ LDSDIR = 1 << 28,
// VINTERP instruction format.
- VINTERP = 1 << 27,
+ VINTERP = 1 << 29,
// High bits - other information.
VM_CNT = UINT64_C(1) << 32,
@@ -161,6 +164,9 @@ enum : uint64_t {
// Is never uniform.
IsNeverUniform = UINT64_C(1) << 61,
+
+ // ds_gws_* instructions.
+ GWS = UINT64_C(1) << 62,
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
@@ -207,6 +213,9 @@ enum OperandType : unsigned {
OPERAND_REG_INLINE_C_V2INT32,
OPERAND_REG_INLINE_C_V2FP32,
+ // Operand for split barrier inline constant
+ OPERAND_INLINE_SPLIT_BARRIER_INT32,
+
/// Operand with 32-bit immediate that uses the constant bus.
OPERAND_KIMM32,
OPERAND_KIMM16,
@@ -326,13 +335,20 @@ enum : unsigned {
LITERAL_CONST = 255,
VGPR_MIN = 256,
VGPR_MAX = 511,
- IS_VGPR = 256 // Indicates VGPR or AGPR
+ IS_VGPR = 256, // Indicates VGPR or AGPR
};
} // namespace EncValues
-} // namespace AMDGPU
-namespace AMDGPU {
+// Register codes as defined in the TableGen's HWEncoding field.
+namespace HWEncoding {
+enum : unsigned {
+ REG_IDX_MASK = 0xff,
+ IS_VGPR_OR_AGPR = 1 << 8,
+ IS_HI = 1 << 9, // High 16-bit register.
+};
+} // namespace HWEncoding
+
namespace CPol {
enum CPol {
@@ -343,7 +359,47 @@ enum CPol {
SC0 = GLC,
SC1 = SCC,
NT = SLC,
- ALL = GLC | SLC | DLC | SCC
+ ALL_pregfx12 = GLC | SLC | DLC | SCC,
+ SWZ_pregfx12 = 8,
+
+ // Below are GFX12+ cache policy bits
+
+ // Temporal hint
+ TH = 0x7, // All TH bits
+ TH_RT = 0, // regular
+ TH_NT = 1, // non-temporal
+ TH_HT = 2, // high-temporal
+ TH_LU = 3, // last use
+ TH_RT_WB = 3, // regular (CU, SE), high-temporal with write-back (MALL)
+ TH_NT_RT = 4, // non-temporal (CU, SE), regular (MALL)
+ TH_RT_NT = 5, // regular (CU, SE), non-temporal (MALL)
+ TH_NT_HT = 6, // non-temporal (CU, SE), high-temporal (MALL)
+ TH_NT_WB = 7, // non-temporal (CU, SE), high-temporal with write-back (MALL)
+ TH_BYPASS = 3, // only to be used with scope = 3
+
+ TH_RESERVED = 7, // unused value for load insts
+
+ // Bits of TH for atomics
+ TH_ATOMIC_RETURN = GLC, // Returning vs non-returning
+ TH_ATOMIC_NT = SLC, // Non-temporal vs regular
+ TH_ATOMIC_CASCADE = 4, // Cascading vs regular
+
+ // Scope
+ SCOPE = 0x3 << 3, // All Scope bits
+ SCOPE_CU = 0 << 3,
+ SCOPE_SE = 1 << 3,
+ SCOPE_DEV = 2 << 3,
+ SCOPE_SYS = 3 << 3,
+
+ SWZ = 1 << 6, // Swizzle bit
+
+ ALL = TH | SCOPE,
+
+ // Helper bits
+ TH_TYPE_LOAD = 1 << 7, // TH_LOAD policy
+ TH_TYPE_STORE = 1 << 8, // TH_STORE policy
+ TH_TYPE_ATOMIC = 1 << 9, // TH_ATOMIC policy
+ TH_REAL_BYPASS = 1 << 10, // is TH=3 bypass policy or not
};
} // namespace CPol
@@ -360,8 +416,8 @@ enum Id { // Message ID, width(4) [3:0].
ID_DEALLOC_VGPRS_GFX11Plus = 3, // reused in GFX11
ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11
- ID_STALL_WAVE_GEN = 5, // added in GFX9
- ID_HALT_WAVES = 6, // added in GFX9
+ ID_STALL_WAVE_GEN = 5, // added in GFX9, removed in GFX12
+ ID_HALT_WAVES = 6, // added in GFX9, removed in GFX12
ID_ORDERED_PS_DONE = 7, // added in GFX9, removed in GFX11
ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10
ID_GS_ALLOC_REQ = 9, // added in GFX9
@@ -375,6 +431,7 @@ enum Id { // Message ID, width(4) [3:0].
ID_RTN_GET_REALTIME = 131,
ID_RTN_SAVE_WAVE = 132,
ID_RTN_GET_TBA = 133,
+ ID_RTN_GET_SE_AID_ID = 134,
ID_MASK_PreGFX11_ = 0xF,
ID_MASK_GFX11Plus_ = 0xFF
@@ -425,6 +482,9 @@ enum Id { // HwRegCode, (6) [5:0]
ID_GPR_ALLOC = 5,
ID_LDS_ALLOC = 6,
ID_IB_STS = 7,
+ ID_PERF_SNAPSHOT_DATA_gfx12 = 10,
+ ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11,
+ ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12,
ID_MEM_BASES = 15,
ID_TBA_LO = 16,
ID_TBA_HI = 17,
@@ -436,12 +496,23 @@ enum Id { // HwRegCode, (6) [5:0]
ID_HW_ID1 = 23,
ID_HW_ID2 = 24,
ID_POPS_PACKER = 25,
- ID_PERF_SNAPSHOT_DATA = 27,
+ ID_PERF_SNAPSHOT_DATA_gfx11 = 27,
ID_SHADER_CYCLES = 29,
-
- // Register numbers reused in GFX11+
- ID_PERF_SNAPSHOT_PC_LO = 18,
- ID_PERF_SNAPSHOT_PC_HI = 19,
+ ID_SHADER_CYCLES_HI = 30,
+ ID_DVGPR_ALLOC_LO = 31,
+ ID_DVGPR_ALLOC_HI = 32,
+
+ // Register numbers reused in GFX11
+ ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18,
+ ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19,
+
+ // Register numbers reused in GFX12+
+ ID_STATE_PRIV = 4,
+ ID_PERF_SNAPSHOT_DATA1 = 15,
+ ID_PERF_SNAPSHOT_DATA2 = 16,
+ ID_EXCP_FLAG_PRIV = 17,
+ ID_EXCP_FLAG_USER = 18,
+ ID_TRAP_CTRL = 19,
// GFX940 specific registers
ID_XCC_ID = 20,
@@ -958,6 +1029,14 @@ enum Register_Flag : uint8_t {
} // namespace AMDGPU
+namespace AMDGPU {
+namespace Barrier {
+enum Type { TRAP = -2, WORKGROUP = -1 };
+} // namespace Barrier
+} // namespace AMDGPU
+
+// clang-format off
+
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
@@ -1050,6 +1129,9 @@ enum Register_Flag : uint8_t {
#define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21)
#define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1)
#define C_00B848_DX10_CLAMP 0xFFDFFFFF
+#define S_00B848_RR_WG_MODE(x) (((x) & 0x1) << 21)
+#define G_00B848_RR_WG_MODE(x) (((x) >> 21) & 0x1)
+#define C_00B848_RR_WG_MODE 0xFFDFFFFF
#define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22)
#define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1)
#define C_00B848_DEBUG_MODE 0xFFBFFFFF
@@ -1066,7 +1148,6 @@ enum Register_Flag : uint8_t {
#define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1)
#define C_00B848_FWD_PROGRESS 0x7FFFFFFF
-
// Helpers for setting FLOAT_MODE
#define FP_ROUND_ROUND_TO_NEAREST 0
#define FP_ROUND_ROUND_TO_INF 1
@@ -1108,6 +1189,9 @@ enum Register_Flag : uint8_t {
#define R_SPILLED_SGPRS 0x4
#define R_SPILLED_VGPRS 0x8
+
+// clang-format on
+
} // End namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index db323465c153..86980ee851bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -88,7 +88,7 @@ public:
// VGPR to SGPR copy being processed
MachineInstr *Copy;
// All SALU instructions reachable from this copy in SSA graph
- DenseSet<MachineInstr *> SChain;
+ SetVector<MachineInstr *> SChain;
// Number of SGPR to VGPR copies that are used to put the SALU computation
// results back to VALU.
unsigned NumSVCopies;
@@ -125,7 +125,7 @@ class SIFixSGPRCopies : public MachineFunctionPass {
SmallVector<MachineInstr*, 4> PHINodes;
SmallVector<MachineInstr*, 4> S2VCopies;
unsigned NextVGPRToSGPRCopyID;
- DenseMap<unsigned, V2SCopyInfo> V2SCopies;
+ MapVector<unsigned, V2SCopyInfo> V2SCopies;
DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty;
public:
@@ -152,6 +152,13 @@ public:
void processPHINode(MachineInstr &MI);
+ // Check if MO is an immediate materialized into a VGPR, and if so replace it
+ // with an SGPR immediate. The VGPR immediate is also deleted if it does not
+ // have any other uses.
+ bool tryMoveVGPRConstToSGPR(MachineOperand &MO, Register NewDst,
+ MachineBasicBlock *BlockToInsertTo,
+ MachineBasicBlock::iterator PointToInsertTo);
+
StringRef getPassName() const override { return "SI Fix SGPR copies"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -350,7 +357,7 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
return false;
// FIXME: Handle copies with sub-regs.
- if (Copy->getOperand(0).getSubReg())
+ if (Copy->getOperand(1).getSubReg())
return false;
switch (MoveImm->getOpcode()) {
@@ -360,7 +367,7 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
SMovOp = AMDGPU::S_MOV_B32;
break;
case AMDGPU::V_MOV_B64_PSEUDO:
- SMovOp = AMDGPU::S_MOV_B64;
+ SMovOp = AMDGPU::S_MOV_B64_IMM_PSEUDO;
break;
}
Imm = ImmOp->getImm();
@@ -662,13 +669,17 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
: MBB;
MachineBasicBlock::iterator PointToInsertCopy =
MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I;
- MachineInstr *NewCopy =
- BuildMI(*BlockToInsertCopy, PointToInsertCopy,
- PointToInsertCopy->getDebugLoc(),
- TII->get(AMDGPU::COPY), NewDst)
- .addReg(MO.getReg());
- MO.setReg(NewDst);
- analyzeVGPRToSGPRCopy(NewCopy);
+
+ if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy,
+ PointToInsertCopy)) {
+ MachineInstr *NewCopy =
+ BuildMI(*BlockToInsertCopy, PointToInsertCopy,
+ PointToInsertCopy->getDebugLoc(),
+ TII->get(AMDGPU::COPY), NewDst)
+ .addReg(MO.getReg());
+ MO.setReg(NewDst);
+ analyzeVGPRToSGPRCopy(NewCopy);
+ }
}
}
}
@@ -765,7 +776,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
for (auto MI : PHINodes) {
processPHINode(*MI);
}
- if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
+ if (MF.getTarget().getOptLevel() > CodeGenOptLevel::None && EnableM0Merge)
hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII);
SiblingPenalty.clear();
@@ -829,6 +840,32 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
}
}
+bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
+ MachineOperand &MaybeVGPRConstMO, Register DstReg,
+ MachineBasicBlock *BlockToInsertTo,
+ MachineBasicBlock::iterator PointToInsertTo) {
+
+ MachineInstr *DefMI = MRI->getVRegDef(MaybeVGPRConstMO.getReg());
+ if (!DefMI || !DefMI->isMoveImmediate())
+ return false;
+
+ MachineOperand *SrcConst = TII->getNamedOperand(*DefMI, AMDGPU::OpName::src0);
+ if (SrcConst->isReg())
+ return false;
+
+ const TargetRegisterClass *SrcRC =
+ MRI->getRegClass(MaybeVGPRConstMO.getReg());
+ unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC);
+ unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
+ BuildMI(*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc(),
+ TII->get(MoveOp), DstReg)
+ .add(*SrcConst);
+ if (MRI->hasOneUse(MaybeVGPRConstMO.getReg()))
+ DefMI->eraseFromParent();
+ MaybeVGPRConstMO.setReg(DstReg);
+ return true;
+}
+
bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
MachineBasicBlock::iterator &I) {
Register DstReg = MI.getOperand(0).getReg();
@@ -846,25 +883,10 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
.add(MI.getOperand(1));
MI.getOperand(1).setReg(TmpReg);
- } else {
- MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
- if (DefMI && DefMI->isMoveImmediate()) {
- MachineOperand SrcConst = DefMI->getOperand(AMDGPU::getNamedOperandIdx(
- DefMI->getOpcode(), AMDGPU::OpName::src0));
- if (!SrcConst.isReg()) {
- const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
- unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC);
- unsigned MoveOp =
- MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(MoveOp),
- DstReg)
- .add(SrcConst);
- I = std::next(I);
- if (MRI->hasOneUse(SrcReg))
- DefMI->eraseFromParent();
- MI.eraseFromParent();
- }
- }
+ } else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
+ MI)) {
+ I = std::next(I);
+ MI.eraseFromParent();
}
return true;
}
@@ -966,7 +988,7 @@ bool SIFixSGPRCopies::needToBeConvertedToVALU(V2SCopyInfo *Info) {
for (auto J : Info->Siblings) {
auto InfoIt = V2SCopies.find(J);
if (InfoIt != V2SCopies.end()) {
- MachineInstr *SiblingCopy = InfoIt->getSecond().Copy;
+ MachineInstr *SiblingCopy = InfoIt->second.Copy;
if (SiblingCopy->isImplicitDef())
// the COPY has already been MoveToVALUed
continue;
@@ -1001,15 +1023,15 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
unsigned CurID = LoweringWorklist.pop_back_val();
auto CurInfoIt = V2SCopies.find(CurID);
if (CurInfoIt != V2SCopies.end()) {
- V2SCopyInfo C = CurInfoIt->getSecond();
+ V2SCopyInfo C = CurInfoIt->second;
LLVM_DEBUG(dbgs() << "Processing ...\n"; C.dump());
for (auto S : C.Siblings) {
auto SibInfoIt = V2SCopies.find(S);
if (SibInfoIt != V2SCopies.end()) {
- V2SCopyInfo &SI = SibInfoIt->getSecond();
+ V2SCopyInfo &SI = SibInfoIt->second;
LLVM_DEBUG(dbgs() << "Sibling:\n"; SI.dump());
if (!SI.NeedToBeConvertedToVALU) {
- set_subtract(SI.SChain, C.SChain);
+ SI.SChain.set_subtract(C.SChain);
if (needToBeConvertedToVALU(&SI))
LoweringWorklist.push_back(SI.ID);
}
@@ -1018,6 +1040,8 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
}
LLVM_DEBUG(dbgs() << "V2S copy " << *C.Copy
<< " is being turned to VALU\n");
+ // TODO: MapVector::erase is inefficient. Do bulk removal with remove_if
+ // instead.
V2SCopies.erase(C.ID);
Copies.insert(C.Copy);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9f1d6038f1b6..709de612d81d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -80,6 +80,10 @@ public:
bool updateOperand(FoldCandidate &Fold) const;
+ bool canUseImmWithOpSel(FoldCandidate &Fold) const;
+
+ bool tryFoldImmWithOpSel(FoldCandidate &Fold) const;
+
bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold) const;
@@ -196,61 +200,86 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
return new SIFoldOperands();
}
-bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
+bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
- assert(Old.isReg());
+ const uint64_t TSFlags = MI->getDesc().TSFlags;
+ assert(Old.isReg() && Fold.isImm());
- const uint64_t TSFlags = MI->getDesc().TSFlags;
- if (Fold.isImm()) {
- if (TSFlags & SIInstrFlags::IsPacked && !(TSFlags & SIInstrFlags::IsMAI) &&
- (!ST->hasDOTOpSelHazard() || !(TSFlags & SIInstrFlags::IsDOT)) &&
- AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
- ST->hasInv2PiInlineImm())) {
- // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
- // already set.
- unsigned Opcode = MI->getOpcode();
- int OpNo = MI->getOperandNo(&Old);
- int ModIdx = -1;
- if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
- ModIdx = AMDGPU::OpName::src0_modifiers;
- else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
- ModIdx = AMDGPU::OpName::src1_modifiers;
- else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
- ModIdx = AMDGPU::OpName::src2_modifiers;
- assert(ModIdx != -1);
- ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
- MachineOperand &Mod = MI->getOperand(ModIdx);
- unsigned Val = Mod.getImm();
- if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
- // Only apply the following transformation if that operand requires
- // a packed immediate.
- switch (TII->get(Opcode).operands()[OpNo].OperandType) {
- case AMDGPU::OPERAND_REG_IMM_V2FP16:
- case AMDGPU::OPERAND_REG_IMM_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- // If upper part is all zero we do not need op_sel_hi.
- if (!isUInt<16>(Fold.ImmToFold)) {
- if (!(Fold.ImmToFold & 0xffff)) {
- Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
- Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
- Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
- return true;
- }
- Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
- Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
- return true;
- }
- break;
- default:
- break;
- }
- }
- }
+ if (!(TSFlags & SIInstrFlags::IsPacked) || (TSFlags & SIInstrFlags::IsMAI) ||
+ (ST->hasDOTOpSelHazard() && (TSFlags & SIInstrFlags::IsDOT)) ||
+ isUInt<16>(Fold.ImmToFold) ||
+ !AMDGPU::isFoldableLiteralV216(Fold.ImmToFold, ST->hasInv2PiInlineImm()))
+ return false;
+
+ unsigned Opcode = MI->getOpcode();
+ int OpNo = MI->getOperandNo(&Old);
+ uint8_t OpType = TII->get(Opcode).operands()[OpNo].OperandType;
+ switch (OpType) {
+ default:
+ return false;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ break;
}
+ return true;
+}
+
+bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
+ MachineInstr *MI = Fold.UseMI;
+ MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
+ unsigned Opcode = MI->getOpcode();
+ int OpNo = MI->getOperandNo(&Old);
+
+ // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
+ // already set.
+ int ModIdx = -1;
+ if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
+ ModIdx = AMDGPU::OpName::src0_modifiers;
+ else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
+ ModIdx = AMDGPU::OpName::src1_modifiers;
+ else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
+ ModIdx = AMDGPU::OpName::src2_modifiers;
+ assert(ModIdx != -1);
+ ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
+ MachineOperand &Mod = MI->getOperand(ModIdx);
+ unsigned Val = Mod.getImm();
+ if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
+ return false;
+
+ // Only apply the following transformation if that operand requires
+ // a packed immediate.
+ // If upper part is all zero we do not need op_sel_hi.
+ if (!(Fold.ImmToFold & 0xffff)) {
+ MachineOperand New =
+ MachineOperand::CreateImm((Fold.ImmToFold >> 16) & 0xffff);
+ if (!TII->isOperandLegal(*MI, OpNo, &New))
+ return false;
+ Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+ Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+ Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+ return true;
+ }
+ MachineOperand New = MachineOperand::CreateImm(Fold.ImmToFold & 0xffff);
+ if (!TII->isOperandLegal(*MI, OpNo, &New))
+ return false;
+ Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+ Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
+ return true;
+}
+
+bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
+ MachineInstr *MI = Fold.UseMI;
+ MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
+ assert(Old.isReg());
+
+ if (Fold.isImm() && canUseImmWithOpSel(Fold))
+ return tryFoldImmWithOpSel(Fold);
+
if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
MachineBasicBlock *MBB = MI->getParent();
auto Liveness = MBB->computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 16);
@@ -345,9 +374,50 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold) const {
- if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
+ const unsigned Opc = MI->getOpcode();
+
+ auto tryToFoldAsFMAAKorMK = [&]() {
+ if (!OpToFold->isImm())
+ return false;
+
+ const bool TryAK = OpNo == 3;
+ const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
+ MI->setDesc(TII->get(NewOpc));
+
+ // We have to fold into operand which would be Imm not into OpNo.
+ bool FoldAsFMAAKorMK =
+ tryAddToFoldList(FoldList, MI, TryAK ? 3 : 2, OpToFold);
+ if (FoldAsFMAAKorMK) {
+ // Untie Src2 of fmac.
+ MI->untieRegOperand(3);
+ // For fmamk swap operands 1 and 2 if OpToFold was meant for operand 1.
+ if (OpNo == 1) {
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ Register OldReg = Op1.getReg();
+ // Operand 2 might be an inlinable constant
+ if (Op2.isImm()) {
+ Op1.ChangeToImmediate(Op2.getImm());
+ Op2.ChangeToRegister(OldReg, false);
+ } else {
+ Op1.setReg(Op2.getReg());
+ Op2.setReg(OldReg);
+ }
+ }
+ return true;
+ }
+ MI->setDesc(TII->get(Opc));
+ return false;
+ };
+
+ bool IsLegal = TII->isOperandLegal(*MI, OpNo, OpToFold);
+ if (!IsLegal && OpToFold->isImm()) {
+ FoldCandidate Fold(MI, OpNo, OpToFold);
+ IsLegal = canUseImmWithOpSel(Fold);
+ }
+
+ if (!IsLegal) {
// Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
- unsigned Opc = MI->getOpcode();
unsigned NewOpc = macToMad(Opc);
if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
// Check if changing this to a v_mad_{f16, f32} instruction will allow us
@@ -367,6 +437,13 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MI->setDesc(TII->get(Opc));
}
+ // Special case for s_fmac_f32 if we are trying to fold into Src2.
+ // By transforming into fmaak we can untie Src2 and make folding legal.
+ if (Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
+ if (tryToFoldAsFMAAKorMK())
+ return true;
+ }
+
// Special case for s_setreg_b32
if (OpToFold->isImm()) {
unsigned ImmOpc = 0;
@@ -387,66 +464,72 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (isUseMIInFoldList(FoldList, MI))
return false;
- unsigned CommuteOpNo = OpNo;
-
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
- unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
- unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
- bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
-
- if (CanCommute) {
- if (CommuteIdx0 == OpNo)
- CommuteOpNo = CommuteIdx1;
- else if (CommuteIdx1 == OpNo)
- CommuteOpNo = CommuteIdx0;
- }
-
+ unsigned CommuteOpNo = TargetInstrInfo::CommuteAnyOperandIndex;
+ bool CanCommute = TII->findCommutedOpIndices(*MI, OpNo, CommuteOpNo);
+ if (!CanCommute)
+ return false;
// One of operands might be an Imm operand, and OpNo may refer to it after
// the call of commuteInstruction() below. Such situations are avoided
// here explicitly as OpNo must be a register operand to be a candidate
// for memory folding.
- if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
- !MI->getOperand(CommuteIdx1).isReg()))
+ if (!MI->getOperand(OpNo).isReg() || !MI->getOperand(CommuteOpNo).isReg())
return false;
- if (!CanCommute ||
- !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
+ if (!TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo))
return false;
+ int Op32 = -1;
if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
- if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
- Opc == AMDGPU::V_SUB_CO_U32_e64 ||
- Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME
- (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
-
- // Verify the other operand is a VGPR, otherwise we would violate the
- // constant bus restriction.
- unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
- MachineOperand &OtherOp = MI->getOperand(OtherIdx);
- if (!OtherOp.isReg() ||
- !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg()))
- return false;
-
- assert(MI->getOperand(1).isDef());
+ if ((Opc != AMDGPU::V_ADD_CO_U32_e64 && Opc != AMDGPU::V_SUB_CO_U32_e64 &&
+ Opc != AMDGPU::V_SUBREV_CO_U32_e64) || // FIXME
+ (!OpToFold->isImm() && !OpToFold->isFI() && !OpToFold->isGlobal())) {
+ TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo);
+ return false;
+ }
- // Make sure to get the 32-bit version of the commuted opcode.
- unsigned MaybeCommutedOpc = MI->getOpcode();
- int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
+ // Verify the other operand is a VGPR, otherwise we would violate the
+ // constant bus restriction.
+ MachineOperand &OtherOp = MI->getOperand(OpNo);
+ if (!OtherOp.isReg() ||
+ !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg()))
+ return false;
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
- return true;
- }
+ assert(MI->getOperand(1).isDef());
- TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
- return false;
+ // Make sure to get the 32-bit version of the commuted opcode.
+ unsigned MaybeCommutedOpc = MI->getOpcode();
+ Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
}
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
return true;
}
+ // Inlineable constant might have been folded into Imm operand of fmaak or
+ // fmamk and we are trying to fold a non-inlinable constant.
+ if ((Opc == AMDGPU::S_FMAAK_F32 || Opc == AMDGPU::S_FMAMK_F32) &&
+ !OpToFold->isReg() && !TII->isInlineConstant(*OpToFold)) {
+ unsigned ImmIdx = Opc == AMDGPU::S_FMAAK_F32 ? 3 : 2;
+ MachineOperand &OpImm = MI->getOperand(ImmIdx);
+ if (!OpImm.isReg() &&
+ TII->isInlineConstant(*MI, MI->getOperand(OpNo), OpImm))
+ return tryToFoldAsFMAAKorMK();
+ }
+
+ // Special case for s_fmac_f32 if we are trying to fold into Src0 or Src1.
+ // By changing into fmamk we can untie Src2.
+ // If folding for Src0 happens first and it is identical operand to Src1 we
+ // should avoid transforming into fmamk which requires commuting as it would
+ // cause folding into Src1 to fail later on due to wrong OpNo used.
+ if (Opc == AMDGPU::S_FMAC_F32 &&
+ (OpNo != 1 || !MI->getOperand(1).isIdenticalTo(MI->getOperand(2)))) {
+ if (tryToFoldAsFMAAKorMK())
+ return true;
+ }
+
// Check the case where we might introduce a second constant operand to a
// scalar instruction
if (TII->isSALU(MI->getOpcode())) {
@@ -458,7 +541,8 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
// Otherwise check for another constant
for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) {
auto &Op = MI->getOperand(i);
- if (OpNo != i && !Op.isReg() && !TII->isInlineConstant(Op, OpInfo))
+ if (OpNo != i && !Op.isReg() &&
+ !TII->isInlineConstant(Op, InstDesc.operands()[i]))
return false;
}
}
@@ -516,13 +600,10 @@ bool SIFoldOperands::tryToFoldACImm(
if (UseOpIdx >= Desc.getNumOperands())
return false;
- uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
- if ((OpTy < AMDGPU::OPERAND_REG_INLINE_AC_FIRST ||
- OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST) &&
- (OpTy < AMDGPU::OPERAND_REG_INLINE_C_FIRST ||
- OpTy > AMDGPU::OPERAND_REG_INLINE_C_LAST))
+ if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx))
return false;
+ uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
@@ -671,24 +752,6 @@ void SIFoldOperands::foldOperand(
const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
if (!DestReg.isPhysical()) {
- if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
- SmallVector<FoldCandidate, 4> CopyUses;
- for (auto &Use : MRI->use_nodbg_operands(DestReg)) {
- // There's no point trying to fold into an implicit operand.
- if (Use.isImplicit())
- continue;
-
- CopyUses.emplace_back(Use.getParent(),
- Use.getParent()->getOperandNo(&Use),
- &UseMI->getOperand(1));
- }
-
- for (auto &F : CopyUses) {
- foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList,
- CopiesToReplace);
- }
- }
-
if (DestRC == &AMDGPU::AGPR_32RegClass &&
TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
@@ -1035,6 +1098,9 @@ SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
// selection.
// TODO: See if a frame index with a fixed offset can fold.
bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
+ if (!MI->allImplicitDefsAreDead())
+ return false;
+
unsigned Opc = MI->getOpcode();
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
@@ -1340,6 +1406,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
case AMDGPU::V_MAX_F32_e64:
case AMDGPU::V_MAX_F16_e64:
case AMDGPU::V_MAX_F16_t16_e64:
+ case AMDGPU::V_MAX_F16_fake16_e64:
case AMDGPU::V_MAX_F64_e64:
case AMDGPU::V_PK_MAX_F16: {
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
@@ -1435,7 +1502,8 @@ static int getOModValue(unsigned Opc, int64_t Val) {
}
}
case AMDGPU::V_MUL_F16_e64:
- case AMDGPU::V_MUL_F16_t16_e64: {
+ case AMDGPU::V_MUL_F16_t16_e64:
+ case AMDGPU::V_MUL_F16_fake16_e64: {
switch (static_cast<uint16_t>(Val)) {
case 0x3800: // 0.5
return SIOutMods::DIV2;
@@ -1462,12 +1530,14 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_MUL_F64_e64:
case AMDGPU::V_MUL_F32_e64:
case AMDGPU::V_MUL_F16_t16_e64:
+ case AMDGPU::V_MUL_F16_fake16_e64:
case AMDGPU::V_MUL_F16_e64: {
// If output denormals are enabled, omod is ignored.
if ((Op == AMDGPU::V_MUL_F32_e64 &&
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 ||
- Op == AMDGPU::V_MUL_F16_t16_e64) &&
+ Op == AMDGPU::V_MUL_F16_t16_e64 ||
+ Op == AMDGPU::V_MUL_F16_fake16_e64) &&
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
return std::pair(nullptr, SIOutMods::NONE);
@@ -1497,12 +1567,14 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_ADD_F64_e64:
case AMDGPU::V_ADD_F32_e64:
case AMDGPU::V_ADD_F16_e64:
- case AMDGPU::V_ADD_F16_t16_e64: {
+ case AMDGPU::V_ADD_F16_t16_e64:
+ case AMDGPU::V_ADD_F16_fake16_e64: {
// If output denormals are enabled, omod is ignored.
if ((Op == AMDGPU::V_ADD_F32_e64 &&
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 ||
- Op == AMDGPU::V_ADD_F16_t16_e64) &&
+ Op == AMDGPU::V_ADD_F16_t16_e64 ||
+ Op == AMDGPU::V_ADD_F16_fake16_e64) &&
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
return std::pair(nullptr, SIOutMods::NONE);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 903e726c667d..0f89df144486 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -11,7 +11,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetMachine.h"
@@ -26,13 +26,17 @@ static cl::opt<bool> EnableSpillVGPRToAGPR(
cl::ReallyHidden,
cl::init(true));
-// Find a register matching \p RC from \p LiveRegs which is unused and available
-// throughout the function. On failure, returns AMDGPU::NoRegister.
+// Find a register matching \p RC from \p LiveUnits which is unused and
+// available throughout the function. On failure, returns AMDGPU::NoRegister.
+// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
+// MCRegisters. This should reduce the number of iterations and avoid redundant
+// checking.
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
- const LivePhysRegs &LiveRegs,
+ const LiveRegUnits &LiveUnits,
const TargetRegisterClass &RC) {
for (MCRegister Reg : RC) {
- if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
+ if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
+ !MRI.isReserved(Reg))
return Reg;
}
return MCRegister();
@@ -42,22 +46,21 @@ static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
// callee-save registers since they may appear to be free when this is called
// from canUseAsPrologue (during shrink wrapping), but then no longer be free
// when this is called from emitPrologue.
-static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
- LivePhysRegs &LiveRegs,
- const TargetRegisterClass &RC,
- bool Unused = false) {
+static MCRegister findScratchNonCalleeSaveRegister(
+ MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
+ const TargetRegisterClass &RC, bool Unused = false) {
// Mark callee saved registers as used so we will not choose them.
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
- LiveRegs.addReg(CSRegs[i]);
+ LiveUnits.addReg(CSRegs[i]);
// We are looking for a register that can be used throughout the entire
// function, so any use is unacceptable.
if (Unused)
- return findUnusedRegister(MRI, LiveRegs, RC);
+ return findUnusedRegister(MRI, LiveUnits, RC);
for (MCRegister Reg : RC) {
- if (LiveRegs.available(MRI, Reg))
+ if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
return Reg;
}
@@ -65,9 +68,9 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
}
/// Query target location for spilling SGPRs
-/// \p IncludeScratchCopy : Also look for free scratch SGPRs
+/// \p IncludeScratchCopy : Also look for free scratch SGPRs
static void getVGPRSpillLaneOrTempRegister(
- MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR,
+ MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
bool IncludeScratchCopy = true) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -81,11 +84,11 @@ static void getVGPRSpillLaneOrTempRegister(
// We need to save and restore the given SGPR.
Register ScratchSGPR;
- // 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
- // should have all the callee saved registers marked as used. For certain
- // cases we skip copy to scratch SGPR.
+ // 1: Try to save the given register into an unused scratch SGPR. The
+ // LiveUnits should have all the callee saved registers marked as used. For
+ // certain cases we skip copy to scratch SGPR.
if (IncludeScratchCopy)
- ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
+ ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
if (!ScratchSGPR) {
int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
@@ -99,10 +102,10 @@ static void getVGPRSpillLaneOrTempRegister(
SGPR, PrologEpilogSGPRSaveRestoreInfo(
SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
- LLVM_DEBUG(
- auto Spill = MFI->getPrologEpilogSGPRSpillToVGPRLanes(FI).front();
- dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
- << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
+ LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
+ dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
+ << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
+ << '\n';);
} else {
// Remove dead <FI> index
MF.getFrameInfo().RemoveStackObject(FI);
@@ -118,7 +121,7 @@ static void getVGPRSpillLaneOrTempRegister(
MFI->addToPrologEpilogSGPRSpills(
SGPR, PrologEpilogSGPRSaveRestoreInfo(
SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
- LiveRegs.addReg(ScratchSGPR);
+ LiveUnits.addReg(ScratchSGPR);
LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
<< printReg(ScratchSGPR, TRI) << '\n');
}
@@ -129,7 +132,7 @@ static void getVGPRSpillLaneOrTempRegister(
// use.
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
- LivePhysRegs &LiveRegs, MachineFunction &MF,
+ LiveRegUnits &LiveUnits, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register SpillReg, int FI, Register FrameReg,
@@ -142,18 +145,18 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
- LiveRegs.addReg(SpillReg);
+ LiveUnits.addReg(SpillReg);
bool IsKill = !MBB.isLiveIn(SpillReg);
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
- DwordOff, MMO, nullptr, &LiveRegs);
+ DwordOff, MMO, nullptr, &LiveUnits);
if (IsKill)
- LiveRegs.removeReg(SpillReg);
+ LiveUnits.removeReg(SpillReg);
}
static void buildEpilogRestore(const GCNSubtarget &ST,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
- LivePhysRegs &LiveRegs, MachineFunction &MF,
+ LiveRegUnits &LiveUnits, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register SpillReg, int FI,
@@ -167,7 +170,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
- DwordOff, MMO, nullptr, &LiveRegs);
+ DwordOff, MMO, nullptr, &LiveUnits);
}
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -195,18 +198,18 @@ static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(GitPtrLo);
}
-static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
- const SIMachineFunctionInfo *FuncInfo,
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, bool IsProlog) {
- if (LiveRegs.empty()) {
- LiveRegs.init(TRI);
+static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
+ const SIMachineFunctionInfo *FuncInfo,
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, bool IsProlog) {
+ if (LiveUnits.empty()) {
+ LiveUnits.init(TRI);
if (IsProlog) {
- LiveRegs.addLiveIns(MBB);
+ LiveUnits.addLiveIns(MBB);
} else {
// In epilog.
- LiveRegs.addLiveOuts(MBB);
- LiveRegs.stepBackward(*MBBI);
+ LiveUnits.addLiveOuts(MBB);
+ LiveUnits.stepBackward(*MBBI);
}
}
}
@@ -228,7 +231,7 @@ class PrologEpilogSGPRSpillBuilder {
const SIRegisterInfo &TRI;
Register SuperReg;
const PrologEpilogSGPRSaveRestoreInfo SI;
- LivePhysRegs &LiveRegs;
+ LiveRegUnits &LiveUnits;
const DebugLoc &DL;
Register FrameReg;
ArrayRef<int16_t> SplitParts;
@@ -239,10 +242,10 @@ class PrologEpilogSGPRSpillBuilder {
MachineRegisterInfo &MRI = MF.getRegInfo();
assert(!MFI.isDeadObjectIndex(FI));
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
@@ -253,7 +256,7 @@ class PrologEpilogSGPRSpillBuilder {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(SubReg);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
FI, FrameReg, DwordOff);
DwordOff += 4;
}
@@ -264,14 +267,15 @@ class PrologEpilogSGPRSpillBuilder {
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
ArrayRef<SIRegisterInfo::SpilledReg> Spill =
- FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
+ FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
assert(Spill.size() == NumSubRegs);
for (unsigned I = 0; I < NumSubRegs; ++I) {
Register SubReg = NumSubRegs == 1
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR)
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
+ Spill[I].VGPR)
.addReg(SubReg)
.addImm(Spill[I].Lane)
.addReg(Spill[I].VGPR, RegState::Undef);
@@ -287,9 +291,9 @@ class PrologEpilogSGPRSpillBuilder {
void restoreFromMemory(const int FI) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
@@ -298,8 +302,8 @@ class PrologEpilogSGPRSpillBuilder {
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
- FI, FrameReg, DwordOff);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
+ TmpVGPR, FI, FrameReg, DwordOff);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
.addReg(TmpVGPR, RegState::Kill);
DwordOff += 4;
@@ -309,14 +313,14 @@ class PrologEpilogSGPRSpillBuilder {
void restoreFromVGPRLane(const int FI) {
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
ArrayRef<SIRegisterInfo::SpilledReg> Spill =
- FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
+ FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
assert(Spill.size() == NumSubRegs);
for (unsigned I = 0; I < NumSubRegs; ++I) {
Register SubReg = NumSubRegs == 1
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
.addReg(Spill[I].VGPR)
.addImm(Spill[I].Lane);
}
@@ -335,11 +339,12 @@ public:
MachineBasicBlock::iterator MI,
const DebugLoc &DL, const SIInstrInfo *TII,
const SIRegisterInfo &TRI,
- LivePhysRegs &LiveRegs, Register FrameReg)
+ LiveRegUnits &LiveUnits, Register FrameReg)
: MI(MI), MBB(MBB), MF(*MBB.getParent()),
ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
- SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL), FrameReg(FrameReg) {
+ SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
+ FrameReg(FrameReg) {
const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
SplitParts = TRI.getRegSplitParts(RC, EltSize);
NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
@@ -396,9 +401,9 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
if (ST.isAmdPalOS()) {
// Extract the scratch offset from the descriptor in the GIT
- LivePhysRegs LiveRegs;
- LiveRegs.init(*TRI);
- LiveRegs.addLiveIns(MBB);
+ LiveRegUnits LiveUnits;
+ LiveUnits.init(*TRI);
+ LiveUnits.addLiveIns(MBB);
// Find unused reg to load flat scratch init into
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -409,8 +414,8 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
for (MCPhysReg Reg : AllSGPR64s) {
- if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
- !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
+ if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
+ MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
FlatScrInit = Reg;
break;
}
@@ -692,7 +697,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
bool NeedsFlatScratchInit =
- MFI->hasFlatScratchInit() &&
+ MFI->getUserSGPRInfo().hasFlatScratchInit() &&
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
@@ -775,7 +780,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
// Use relocations to get the pointer, and setup the other bits manually.
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
- if (MFI->hasImplicitBufferPtr()) {
+ if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) {
Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
@@ -814,7 +819,6 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
BuildMI(MBB, I, DL, SMovB32, Rsrc1)
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
}
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
@@ -873,7 +877,7 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
// Activate only the inactive lanes when \p EnableInactiveLanes is true.
// Otherwise, activate all lanes. It returns the saved exec.
-static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
+static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -886,14 +890,14 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
const SIRegisterInfo &TRI = TII->getRegisterInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
ScratchExecCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, *TRI.getWaveMaskRegClass());
+ MRI, LiveUnits, *TRI.getWaveMaskRegClass());
if (!ScratchExecCopy)
report_fatal_error("failed to find free scratch register");
- LiveRegs.addReg(ScratchExecCopy);
+ LiveUnits.addReg(ScratchExecCopy);
const unsigned SaveExecOpc =
ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
@@ -909,7 +913,7 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
void SIFrameLowering::emitCSRSpillStores(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
Register FrameReg, Register FramePtrRegScratchCopy) const {
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -924,7 +928,7 @@ void SIFrameLowering::emitCSRSpillStores(
FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty())
ScratchExecCopy =
- buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ true, /*EnableInactiveLanes*/ true);
auto StoreWWMRegisters =
@@ -932,7 +936,7 @@ void SIFrameLowering::emitCSRSpillStores(
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first;
int FI = Reg.second;
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
@@ -943,7 +947,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
- ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ true,
/*EnableInactiveLanes*/ false);
}
@@ -955,7 +959,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
.addReg(ScratchExecCopy, RegState::Kill);
- LiveRegs.addReg(ScratchExecCopy);
+ LiveUnits.addReg(ScratchExecCopy);
}
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
@@ -971,7 +975,7 @@ void SIFrameLowering::emitCSRSpillStores(
continue;
PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
- LiveRegs, FrameReg);
+ LiveUnits, FrameReg);
SB.save();
}
@@ -986,16 +990,16 @@ void SIFrameLowering::emitCSRSpillStores(
MBB.sortUniqueLiveIns();
}
- if (!LiveRegs.empty()) {
+ if (!LiveUnits.empty()) {
for (MCPhysReg Reg : ScratchSGPRs)
- LiveRegs.addReg(Reg);
+ LiveUnits.addReg(Reg);
}
}
}
void SIFrameLowering::emitCSRSpillRestores(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
Register FrameReg, Register FramePtrRegScratchCopy) const {
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -1015,7 +1019,7 @@ void SIFrameLowering::emitCSRSpillRestores(
continue;
PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
- LiveRegs, FrameReg);
+ LiveUnits, FrameReg);
SB.restore();
}
@@ -1027,7 +1031,7 @@ void SIFrameLowering::emitCSRSpillRestores(
FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty())
ScratchExecCopy =
- buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ false, /*EnableInactiveLanes*/ true);
auto RestoreWWMRegisters =
@@ -1035,7 +1039,7 @@ void SIFrameLowering::emitCSRSpillRestores(
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first;
int FI = Reg.second;
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
@@ -1046,7 +1050,7 @@ void SIFrameLowering::emitCSRSpillRestores(
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
- ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ false,
/*EnableInactiveLanes*/ false);
}
@@ -1079,13 +1083,25 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
Register BasePtrReg =
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
- LivePhysRegs LiveRegs;
+ LiveRegUnits LiveUnits;
MachineBasicBlock::iterator MBBI = MBB.begin();
// DebugLoc must be unknown since the first instruction with DebugLoc is used
// to determine the end of the prologue.
DebugLoc DL;
+ if (FuncInfo->isChainFunction()) {
+ // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but
+ // are free to set one up if they need it.
+ bool UseSP = requiresStackPointerReference(MF);
+ if (UseSP) {
+ assert(StackPtrReg != AMDGPU::SP_REG);
+
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
+ .addImm(MFI.getStackSize() * getScratchScaleFactor(ST));
+ }
+ }
+
bool HasFP = false;
bool HasBP = false;
uint32_t NumBytes = MFI.getStackSize();
@@ -1097,14 +1113,15 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrRegScratchCopy;
if (!HasFP && !hasFP(MF)) {
// Emit the CSR spill stores with SP base register.
- emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
+ emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,
+ FuncInfo->isChainFunction() ? Register() : StackPtrReg,
FramePtrRegScratchCopy);
} else {
// CSR spill stores will use FP as base register.
Register SGPRForFPSaveRestoreCopy =
FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
if (SGPRForFPSaveRestoreCopy) {
// Copy FP to the scratch register now and emit the CFI entry. It avoids
// the extra FP copy needed in the other two cases when FP is spilled to
@@ -1112,18 +1129,18 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
PrologEpilogSGPRSpillBuilder SB(
FramePtrReg,
FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
- DL, TII, TRI, LiveRegs, FramePtrReg);
+ DL, TII, TRI, LiveUnits, FramePtrReg);
SB.save();
- LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
+ LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
} else {
// Copy FP into a new scratch register so that its previous value can be
// spilled after setting up the new frame.
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
+ MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error("failed to find free scratch register");
- LiveRegs.addReg(FramePtrRegScratchCopy);
+ LiveUnits.addReg(FramePtrRegScratchCopy);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
.addReg(FramePtrReg);
}
@@ -1133,9 +1150,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
const unsigned Alignment = MFI.getMaxAlign().value();
RoundedSize += Alignment;
- if (LiveRegs.empty()) {
- LiveRegs.init(TRI);
- LiveRegs.addLiveIns(MBB);
+ if (LiveUnits.empty()) {
+ LiveUnits.init(TRI);
+ LiveUnits.addLiveIns(MBB);
}
// s_add_i32 s33, s32, NumBytes
@@ -1158,10 +1175,10 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// If FP is used, emit the CSR spills with FP base register.
if (HasFP) {
- emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
+ emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
FramePtrRegScratchCopy);
if (FramePtrRegScratchCopy)
- LiveRegs.removeReg(FramePtrRegScratchCopy);
+ LiveUnits.removeReg(FramePtrRegScratchCopy);
}
// If we need a base pointer, set it up here. It's whatever the value of
@@ -1210,7 +1227,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- LivePhysRegs LiveRegs;
+ LiveRegUnits LiveUnits;
// Get the insert location for the epilogue. If there were no terminators in
// the block, get the last instruction.
MachineBasicBlock::iterator MBBI = MBB.end();
@@ -1240,19 +1257,19 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
// SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
// into a new scratch register and copy to FP later when other registers are
// restored from the current stack frame.
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
if (SGPRForFPSaveRestoreCopy) {
- LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
+ LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
} else {
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
+ MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error("failed to find free scratch register");
- LiveRegs.addReg(FramePtrRegScratchCopy);
+ LiveUnits.addReg(FramePtrRegScratchCopy);
}
- emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
+ emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
FramePtrRegScratchCopy);
}
@@ -1275,7 +1292,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MIB.setMIFlag(MachineInstr::FrameDestroy);
} else {
// Insert the CSR spill restores with SP as the base register.
- emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
+ emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
FramePtrRegScratchCopy);
}
}
@@ -1318,7 +1335,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
// Allocate spill slots for WWM reserved VGPRs.
- if (!FuncInfo->isEntryFunction()) {
+ // For chain functions, we only need to do this if we have calls to
+ // llvm.amdgcn.cs.chain.
+ bool IsChainWithoutCalls =
+ FuncInfo->isChainFunction() && !MF.getFrameInfo().hasTailCall();
+ if (!FuncInfo->isEntryFunction() && !IsChainWithoutCalls) {
for (Register Reg : FuncInfo->getWWMReservedRegs()) {
const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
@@ -1353,8 +1374,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
TRI->isAGPR(MRI, VReg))) {
assert(RS != nullptr);
- // FIXME: change to enterBasicBlockEnd()
- RS->enterBasicBlock(MBB);
+ RS->enterBasicBlockEnd(MBB);
+ RS->backward(std::next(MI.getIterator()));
TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
SpillFIs.set(FI);
continue;
@@ -1472,30 +1493,30 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- LivePhysRegs LiveRegs;
- LiveRegs.init(*TRI);
+ LiveRegUnits LiveUnits;
+ LiveUnits.init(*TRI);
// Initially mark callee saved registers as used so we will not choose them
// while looking for scratch SGPRs.
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned I = 0; CSRegs[I]; ++I)
- LiveRegs.addReg(CSRegs[I]);
+ LiveUnits.addReg(CSRegs[I]);
const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
if (NeedExecCopyReservedReg) {
Register ReservedReg = MFI->getSGPRForEXECCopy();
assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
- Register UnusedScratchReg = findUnusedRegister(MRI, LiveRegs, RC);
+ Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
if (UnusedScratchReg) {
// If found any unused scratch SGPR, reserve the register itself for Exec
// copy and there is no need for any spills in that case.
MFI->setSGPRForEXECCopy(UnusedScratchReg);
- LiveRegs.addReg(UnusedScratchReg);
+ LiveUnits.addReg(UnusedScratchReg);
} else {
// Needs spill.
assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
"Re-reserving spill slot for EXEC copy register");
- getVGPRSpillLaneOrTempRegister(MF, LiveRegs, ReservedReg, RC,
+ getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedReg, RC,
/*IncludeScratchCopy=*/false);
}
}
@@ -1516,14 +1537,14 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
Register FramePtrReg = MFI->getFrameOffsetReg();
assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
"Re-reserving spill slot for FP");
- getVGPRSpillLaneOrTempRegister(MF, LiveRegs, FramePtrReg);
+ getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
}
if (TRI->hasBasePointer(MF)) {
Register BasePtrReg = TRI->getBaseRegister();
assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
"Re-reserving spill slot for BP");
- getVGPRSpillLaneOrTempRegister(MF, LiveRegs, BasePtrReg);
+ getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
}
}
@@ -1531,8 +1552,15 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedVGPRs,
RegScavenger *RS) const {
- TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // If this is a function with the amdgpu_cs_chain[_preserve] calling
+ // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
+ // we don't need to save and restore anything.
+ if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
+ return;
+
+ TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
if (MFI->isEntryFunction())
return;
@@ -1551,17 +1579,17 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
// TODO: Handle this elsewhere at an early point. Walking through all MBBs
// here would be a bad heuristic. A better way should be by calling
// allocateWWMSpill during the regalloc pipeline whenever a physical
- // register is allocated for the intended virtual registers. That will
- // also help excluding the general use of WRITELANE/READLANE intrinsics
- // that won't really need any such special handling.
- if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32)
+ // register is allocated for the intended virtual registers.
+ if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
- else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
+ else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
NeedExecCopyReservedReg = true;
else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
- MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
+ (MFI->isChainFunction() &&
+ TII->isChainCallOpcode(MI.getOpcode()))) {
// We expect all return to be the same size.
assert(!ReturnMI ||
(count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
@@ -1695,6 +1723,7 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
uint64_t EstStackSize = MFI.estimateStackSize(MF);
uint64_t MaxOffset = EstStackSize - 1;
@@ -1706,12 +1735,11 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
// rather than allocating as close to possible. This could save a lot of space
// on frames with alignment requirements.
if (ST.enableFlatScratch()) {
- const SIInstrInfo *TII = ST.getInstrInfo();
if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch))
return false;
} else {
- if (SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset))
+ if (TII->isLegalMUBUFImmOffset(MaxOffset))
return false;
}
@@ -1770,10 +1798,11 @@ static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- // For entry functions we can use an immediate offset in most cases, so the
- // presence of calls doesn't imply we need a distinct frame pointer.
+ // For entry & chain functions we can use an immediate offset in most cases,
+ // so the presence of calls doesn't imply we need a distinct frame pointer.
if (MFI.hasCalls() &&
- !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
+ !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
+ !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) {
// All offsets are unsigned, so need to be addressed in the same direction
// as stack growth.
@@ -1793,11 +1822,14 @@ bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
// register. We may need to initialize the stack pointer depending on the frame
// properties, which logically overlaps many of the cases where an ordinary
// function would require an FP.
+// Also used for chain functions. While not technically entry functions, chain
+// functions may need to set up a stack pointer in some situations.
bool SIFrameLowering::requiresStackPointerReference(
const MachineFunction &MF) const {
// Callable functions always require a stack pointer reference.
- assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
- "only expected to call this for entry points");
+ assert((MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() ||
+ MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) &&
+ "only expected to call this for entry points and chain functions");
const MachineFrameInfo &MFI = MF.getFrameInfo();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 0060fc0be431..b3feb759ed81 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -38,11 +38,11 @@ public:
bool NeedExecCopyReservedReg) const;
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
- LivePhysRegs &LiveRegs, Register FrameReg,
+ LiveRegUnits &LiveUnits, Register FrameReg,
Register FramePtrRegScratchCopy) const;
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
- LivePhysRegs &LiveRegs, Register FrameReg,
+ LiveRegUnits &LiveUnits, Register FrameReg,
Register FramePtrRegScratchCopy) const;
bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b7b90e23e895..34826809c1a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
@@ -28,6 +29,7 @@
#include "llvm/CodeGen/ByteProvider.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -146,8 +148,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
if (Subtarget->has16BitInsts()) {
- addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
- addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
+ if (Subtarget->useRealTrue16Insts()) {
+ addRegisterClass(MVT::i16, &AMDGPU::VGPR_16RegClass);
+ addRegisterClass(MVT::f16, &AMDGPU::VGPR_16RegClass);
+ } else {
+ addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
+ addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
+ }
// Unless there are also VOP3P operations, not operations are really legal.
addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass);
@@ -158,6 +165,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v16i16, &AMDGPU::SGPR_256RegClass);
addRegisterClass(MVT::v16f16, &AMDGPU::SGPR_256RegClass);
+ addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass);
+ addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass);
}
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -219,7 +228,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
- setOperationAction(ISD::FSQRT, MVT::f64, Custom);
+ setOperationAction(ISD::FSQRT, {MVT::f32, MVT::f64}, Custom);
setOperationAction(ISD::SELECT_CC,
{MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1}, Expand);
@@ -262,13 +271,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
for (MVT VT :
- {MVT::v8i32, MVT::v8f32, MVT::v9i32, MVT::v9f32, MVT::v10i32,
- MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32,
- MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16,
- MVT::v4f16, MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32,
- MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, MVT::v8i16,
- MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v16i64, MVT::v16f64,
- MVT::v32i32, MVT::v32f32}) {
+ {MVT::v8i32, MVT::v8f32, MVT::v9i32, MVT::v9f32, MVT::v10i32,
+ MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32,
+ MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16,
+ MVT::v4f16, MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32,
+ MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, MVT::v8i16,
+ MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v16i64, MVT::v16f64,
+ MVT::v32i32, MVT::v32f32, MVT::v32i16, MVT::v32f16}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -420,6 +429,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->has16BitInsts()) {
setOperationAction({ISD::FPOW, ISD::FPOWI}, MVT::f16, Promote);
setOperationAction({ISD::FLOG, ISD::FEXP, ISD::FLOG10}, MVT::f16, Custom);
+ } else {
+ setOperationAction(ISD::FSQRT, MVT::f16, Custom);
}
if (Subtarget->hasMadMacF32Insts())
@@ -470,9 +481,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
{MVT::f32, MVT::f64}, Legal);
if (Subtarget->haveRoundOpsF64())
- setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FRINT}, MVT::f64, Legal);
+ setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FROUNDEVEN}, MVT::f64,
+ Legal);
else
- setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR},
+ setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},
MVT::f64, Custom);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
@@ -544,8 +556,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (STI.hasMadF16())
setOperationAction(ISD::FMAD, MVT::f16, Legal);
- for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16,
- MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
+ for (MVT VT :
+ {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16,
+ MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v32i16, MVT::v32f16}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -631,6 +644,16 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v16f16, Promote);
AddPromotedToType(ISD::STORE, MVT::v16f16, MVT::v8i32);
+ setOperationAction(ISD::LOAD, MVT::v32i16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v32i16, MVT::v16i32);
+ setOperationAction(ISD::LOAD, MVT::v32f16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v32f16, MVT::v16i32);
+
+ setOperationAction(ISD::STORE, MVT::v32i16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v32i16, MVT::v16i32);
+ setOperationAction(ISD::STORE, MVT::v32f16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v32f16, MVT::v16i32);
+
setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
MVT::v2i32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand);
@@ -653,12 +676,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal);
setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE},
- {MVT::v4f16, MVT::v8f16, MVT::v16f16}, Custom);
+ {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
+ Custom);
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM},
- {MVT::v4f16, MVT::v8f16, MVT::v16f16}, Expand);
+ {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
+ Expand);
- for (MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
+ for (MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16,
+ MVT::v32i16, MVT::v32f16}) {
setOperationAction(
{ISD::BUILD_VECTOR, ISD::EXTRACT_VECTOR_ELT, ISD::SCALAR_TO_VECTOR},
Vec16, Custom);
@@ -681,10 +707,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE,
{MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16,
- MVT::v16f16, MVT::v16i16},
+ MVT::v16f16, MVT::v16i16, MVT::v32f16, MVT::v32i16},
Custom);
- for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16})
+ for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16, MVT::v32i16})
// Split vector operations.
setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL, ISD::ADD, ISD::SUB,
ISD::MUL, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
@@ -692,7 +718,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
ISD::SSUBSAT},
VT, Custom);
- for (MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16})
+ for (MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16})
// Split vector operations.
setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE},
VT, Custom);
@@ -728,7 +754,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT,
{MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
- MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16},
+ MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16,
+ MVT::v32i16, MVT::v32f16},
Custom);
setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom);
@@ -736,6 +763,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasMad64_32())
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom);
+ if (Subtarget->hasPrefetch())
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
+ if (Subtarget->hasIEEEMinMax())
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
+ {MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN,
{MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
MVT::v2i16, MVT::v2f16, MVT::i128},
@@ -753,16 +787,28 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
MVT::i8, MVT::i128},
Custom);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
+ setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
+
+ // TODO: Could move this to custom lowering, could benefit from combines on
+ // extract of relevant bits.
+ setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal);
+
+ setOperationAction(ISD::MUL, MVT::i1, Promote);
+
setTargetDAGCombine({ISD::ADD,
ISD::UADDO_CARRY,
ISD::SUB,
ISD::USUBO_CARRY,
ISD::FADD,
ISD::FSUB,
+ ISD::FDIV,
ISD::FMINNUM,
ISD::FMAXNUM,
ISD::FMINNUM_IEEE,
ISD::FMAXNUM_IEEE,
+ ISD::FMINIMUM,
+ ISD::FMAXIMUM,
ISD::FMA,
ISD::SMIN,
ISD::SMAX,
@@ -772,6 +818,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
ISD::AND,
ISD::OR,
ISD::XOR,
+ ISD::FSHR,
ISD::SINT_TO_FP,
ISD::UINT_TO_FP,
ISD::FCANONICALIZE,
@@ -1002,12 +1049,20 @@ static EVT memVTFromLoadIntrReturn(Type *Ty, unsigned MaxNumLanes) {
MVT SITargetLowering::getPointerTy(const DataLayout &DL, unsigned AS) const {
if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
return MVT::v5i32;
+ if (AMDGPUAS::BUFFER_STRIDED_POINTER == AS &&
+ DL.getPointerSizeInBits(AS) == 192)
+ return MVT::v6i32;
return AMDGPUTargetLowering::getPointerTy(DL, AS);
}
/// Similarly, the in-memory representation of a p7 is {p8, i32}, aka
/// v8i32 when padding is added.
+/// The in-memory representation of a p9 is {p8, i32, i32}, which is
+/// also v8i32 with padding.
MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const {
- if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
+ if ((AMDGPUAS::BUFFER_FAT_POINTER == AS &&
+ DL.getPointerSizeInBits(AS) == 160) ||
+ (AMDGPUAS::BUFFER_STRIDED_POINTER == AS &&
+ DL.getPointerSizeInBits(AS) == 192))
return MVT::v8i32;
return AMDGPUTargetLowering::getPointerMemTy(DL, AS);
}
@@ -1186,9 +1241,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_global_atomic_fadd:
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmax:
+ case Intrinsic::amdgcn_global_atomic_fmin_num:
+ case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
+ case Intrinsic::amdgcn_flat_atomic_fmin_num:
+ case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
@@ -1271,6 +1330,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
+ case Intrinsic::amdgcn_flat_atomic_fmin_num:
+ case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_global_atomic_csub: {
@@ -1284,7 +1345,9 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
}
}
-bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
+bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM,
+ unsigned AddrSpace,
+ uint64_t FlatVariant) const {
if (!Subtarget->hasFlatInstOffsets()) {
// Flat instructions do not have offsets, and only have the register
// address.
@@ -1292,29 +1355,27 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
}
return AM.Scale == 0 &&
- (AM.BaseOffs == 0 ||
- Subtarget->getInstrInfo()->isLegalFLATOffset(
- AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS, SIInstrFlags::FLAT));
+ (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
+ AM.BaseOffs, AddrSpace, FlatVariant));
}
bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const {
if (Subtarget->hasFlatGlobalInsts())
- return AM.Scale == 0 &&
- (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
- AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS,
- SIInstrFlags::FlatGlobal));
+ return isLegalFlatAddressingMode(AM, AMDGPUAS::GLOBAL_ADDRESS,
+ SIInstrFlags::FlatGlobal);
if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) {
- // Assume the we will use FLAT for all global memory accesses
- // on VI.
- // FIXME: This assumption is currently wrong. On VI we still use
- // MUBUF instructions for the r + i addressing mode. As currently
- // implemented, the MUBUF instructions only work on buffer < 4GB.
- // It may be possible to support > 4GB buffers with MUBUF instructions,
- // by setting the stride value in the resource descriptor which would
- // increase the size limit to (stride * 4GB). However, this is risky,
- // because it has never been validated.
- return isLegalFlatAddressingMode(AM);
+ // Assume the we will use FLAT for all global memory accesses
+ // on VI.
+ // FIXME: This assumption is currently wrong. On VI we still use
+ // MUBUF instructions for the r + i addressing mode. As currently
+ // implemented, the MUBUF instructions only work on buffer < 4GB.
+ // It may be possible to support > 4GB buffers with MUBUF instructions,
+ // by setting the stride value in the resource descriptor which would
+ // increase the size limit to (stride * 4GB). However, this is risky,
+ // because it has never been validated.
+ return isLegalFlatAddressingMode(AM, AMDGPUAS::FLAT_ADDRESS,
+ SIInstrFlags::FLAT);
}
return isLegalMUBUFAddressingMode(AM);
@@ -1330,7 +1391,8 @@ bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
// assume those use MUBUF instructions. Scratch loads / stores are currently
// implemented as mubuf instructions with offen bit set, so slightly
// different than the normal addr64.
- if (!SIInstrInfo::isLegalMUBUFImmOffset(AM.BaseOffs))
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ if (!TII->isLegalMUBUFImmOffset(AM.BaseOffs))
return false;
// FIXME: Since we can split immediate into soffset and immediate offset,
@@ -1367,7 +1429,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
- AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE) {
+ AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE ||
+ AS == AMDGPUAS::BUFFER_STRIDED_POINTER) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
@@ -1394,11 +1457,15 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
// On VI, these use the SMEM format and the offset is 20-bit in bytes.
if (!isUInt<20>(AM.BaseOffs))
return false;
- } else {
+ } else if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX12) {
// On GFX9 the offset is signed 21-bit in bytes (but must not be negative
// for S_BUFFER_* instructions).
if (!isInt<21>(AM.BaseOffs))
return false;
+ } else {
+ // On GFX12, all offsets are signed 24-bit in bytes.
+ if (!isInt<24>(AM.BaseOffs))
+ return false;
}
if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
@@ -1411,9 +1478,13 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
- return isLegalMUBUFAddressingMode(AM);
+ return Subtarget->enableFlatScratch()
+ ? isLegalFlatAddressingMode(AM, AMDGPUAS::PRIVATE_ADDRESS,
+ SIInstrFlags::FlatScratch)
+ : isLegalMUBUFAddressingMode(AM);
- if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
+ if (AS == AMDGPUAS::LOCAL_ADDRESS ||
+ (AS == AMDGPUAS::REGION_ADDRESS && Subtarget->hasGDS())) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
@@ -1436,7 +1507,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
// computation. We don't have instructions that compute pointers with any
// addressing modes, so treat them as having no offset like flat
// instructions.
- return isLegalFlatAddressingMode(AM);
+ return isLegalFlatAddressingMode(AM, AMDGPUAS::FLAT_ADDRESS,
+ SIInstrFlags::FLAT);
}
// Assume a user alias of global for unknown address spaces.
@@ -1748,13 +1820,13 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
// We may not have the kernarg segment argument if we have no kernel
// arguments.
if (!InputPtrReg)
- return DAG.getConstant(0, SL, PtrVT);
+ return DAG.getConstant(Offset, SL, PtrVT);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
- return DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Offset));
+ return DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Offset));
}
SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
@@ -2133,13 +2205,14 @@ void SITargetLowering::allocateSpecialInputSGPRs(
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo();
// TODO: Unify handling with private memory pointers.
- if (Info.hasDispatchPtr())
+ if (UserSGPRInfo.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
const Module *M = MF.getFunction().getParent();
- if (Info.hasQueuePtr() &&
+ if (UserSGPRInfo.hasQueuePtr() &&
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5)
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
@@ -2148,7 +2221,7 @@ void SITargetLowering::allocateSpecialInputSGPRs(
if (Info.hasImplicitArgPtr())
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
- if (Info.hasDispatchID())
+ if (UserSGPRInfo.hasDispatchID())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
// flat_scratch_init is not applicable for non-kernel functions.
@@ -2171,34 +2244,35 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const {
- if (Info.hasImplicitBufferPtr()) {
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo();
+ if (UserSGPRInfo.hasImplicitBufferPtr()) {
Register ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info.hasPrivateSegmentBuffer()) {
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
- if (Info.hasDispatchPtr()) {
+ if (UserSGPRInfo.hasDispatchPtr()) {
Register DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
const Module *M = MF.getFunction().getParent();
- if (Info.hasQueuePtr() &&
+ if (UserSGPRInfo.hasQueuePtr() &&
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
- if (Info.hasKernargSegmentPtr()) {
+ if (UserSGPRInfo.hasKernargSegmentPtr()) {
MachineRegisterInfo &MRI = MF.getRegInfo();
Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
CCInfo.AllocateReg(InputPtrReg);
@@ -2207,26 +2281,100 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
}
- if (Info.hasDispatchID()) {
+ if (UserSGPRInfo.hasDispatchID()) {
Register DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
- if (Info.hasFlatScratchInit() && !getSubtarget()->isAmdPalOS()) {
+ if (UserSGPRInfo.hasFlatScratchInit() && !getSubtarget()->isAmdPalOS()) {
Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
}
+ // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
+ // these from the dispatch pointer.
+}
+
+// Allocate pre-loaded kernel arguemtns. Arguments to be preloading must be
+// sequential starting from the first argument.
+void SITargetLowering::allocatePreloadKernArgSGPRs(
+ CCState &CCInfo, SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<ISD::InputArg> &Ins, MachineFunction &MF,
+ const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const {
+ Function &F = MF.getFunction();
+ unsigned LastExplicitArgOffset =
+ MF.getSubtarget<GCNSubtarget>().getExplicitKernelArgOffset();
+ GCNUserSGPRUsageInfo &SGPRInfo = Info.getUserSGPRInfo();
+ bool InPreloadSequence = true;
+ unsigned InIdx = 0;
+ for (auto &Arg : F.args()) {
+ if (!InPreloadSequence || !Arg.hasInRegAttr())
+ break;
+
+ int ArgIdx = Arg.getArgNo();
+ // Don't preload non-original args or parts not in the current preload
+ // sequence.
+ if (InIdx < Ins.size() && (!Ins[InIdx].isOrigArg() ||
+ (int)Ins[InIdx].getOrigArgIndex() != ArgIdx))
+ break;
+
+ for (; InIdx < Ins.size() && Ins[InIdx].isOrigArg() &&
+ (int)Ins[InIdx].getOrigArgIndex() == ArgIdx;
+ InIdx++) {
+ assert(ArgLocs[ArgIdx].isMemLoc());
+ auto &ArgLoc = ArgLocs[InIdx];
+ const Align KernelArgBaseAlign = Align(16);
+ unsigned ArgOffset = ArgLoc.getLocMemOffset();
+ Align Alignment = commonAlignment(KernelArgBaseAlign, ArgOffset);
+ unsigned NumAllocSGPRs =
+ alignTo(ArgLoc.getLocVT().getFixedSizeInBits(), 32) / 32;
+
+ // Arg is preloaded into the previous SGPR.
+ if (ArgLoc.getLocVT().getStoreSize() < 4 && Alignment < 4) {
+ Info.getArgInfo().PreloadKernArgs[InIdx].Regs.push_back(
+ Info.getArgInfo().PreloadKernArgs[InIdx - 1].Regs[0]);
+ continue;
+ }
+
+ unsigned Padding = ArgOffset - LastExplicitArgOffset;
+ unsigned PaddingSGPRs = alignTo(Padding, 4) / 4;
+ // Check for free user SGPRs for preloading.
+ if (PaddingSGPRs + NumAllocSGPRs + 1 /*Synthetic SGPRs*/ >
+ SGPRInfo.getNumFreeUserSGPRs()) {
+ InPreloadSequence = false;
+ break;
+ }
+
+ // Preload this argument.
+ const TargetRegisterClass *RC =
+ TRI.getSGPRClassForBitWidth(NumAllocSGPRs * 32);
+ SmallVectorImpl<MCRegister> *PreloadRegs =
+ Info.addPreloadedKernArg(TRI, RC, NumAllocSGPRs, InIdx, PaddingSGPRs);
+
+ if (PreloadRegs->size() > 1)
+ RC = &AMDGPU::SGPR_32RegClass;
+ for (auto &Reg : *PreloadRegs) {
+ assert(Reg);
+ MF.addLiveIn(Reg, RC);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ LastExplicitArgOffset = NumAllocSGPRs * 4 + ArgOffset;
+ }
+ }
+}
+
+void SITargetLowering::allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const {
+ // Always allocate this last since it is a synthetic preload.
if (Info.hasLDSKernelId()) {
Register Reg = Info.addLDSKernelId();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(Reg);
}
-
- // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
- // these from the dispatch pointer.
}
// Allocate special input registers that are initialized per-wave.
@@ -2331,7 +2479,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// Everything live out of a block is spilled with fast regalloc, so it's
// almost certain that spilling will be required.
- if (TM.getOptLevel() == CodeGenOpt::None)
+ if (TM.getOptLevel() == CodeGenOptLevel::None)
HasStackObjects = true;
// For now assume stack access is needed in any callee functions, so we need
@@ -2477,12 +2625,14 @@ SDValue SITargetLowering::LowerFormalArguments(
bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
if (IsGraphics) {
- assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() &&
- !Info->hasWorkGroupInfo() && !Info->hasLDSKernelId() &&
- !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
- !Info->hasWorkItemIDZ());
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = Info->getUserSGPRInfo();
+ assert(!UserSGPRInfo.hasDispatchPtr() &&
+ !UserSGPRInfo.hasKernargSegmentPtr() && !Info->hasWorkGroupInfo() &&
+ !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
+ !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
+ (void)UserSGPRInfo;
if (!Subtarget->enableFlatScratch())
- assert(!Info->hasFlatScratchInit());
+ assert(!UserSGPRInfo.hasFlatScratchInit());
if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs())
assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ());
@@ -2531,18 +2681,29 @@ SDValue SITargetLowering::LowerFormalArguments(
Splits.append(Ins.begin(), Ins.end());
}
+ if (IsKernel)
+ analyzeFormalArgumentsCompute(CCInfo, Ins);
+
if (IsEntryFunc) {
allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
+ if (IsKernel && Subtarget->hasKernargPreload() &&
+ !Subtarget->needsKernargPreloadBackwardsCompatibility())
+ allocatePreloadKernArgSGPRs(CCInfo, ArgLocs, Ins, MF, *TRI, *Info);
+
+ allocateLDSKernelId(CCInfo, MF, *TRI, *Info);
} else if (!IsGraphics) {
// For the fixed ABI, pass workitem IDs in the last argument register.
allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
}
- if (IsKernel) {
- analyzeFormalArgumentsCompute(CCInfo, Ins);
- } else {
+ if (!IsKernel) {
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg);
+ if (!IsGraphics && !Subtarget->enableFlatScratch()) {
+ CCInfo.AllocateRegBlock(ArrayRef<MCPhysReg>{AMDGPU::SGPR0, AMDGPU::SGPR1,
+ AMDGPU::SGPR2, AMDGPU::SGPR3},
+ 4);
+ }
CCInfo.AnalyzeFormalArguments(Splits, AssignFn);
}
@@ -2587,9 +2748,81 @@ SDValue SITargetLowering::LowerFormalArguments(
continue;
}
- SDValue Arg = lowerKernargMemParameter(
- DAG, VT, MemVT, DL, Chain, Offset, Alignment, Ins[i].Flags.isSExt(), &Ins[i]);
- Chains.push_back(Arg.getValue(1));
+ SDValue NewArg;
+ if (Arg.isOrigArg() && Info->getArgInfo().PreloadKernArgs.count(i)) {
+ if (MemVT.getStoreSize() < 4 && Alignment < 4) {
+ // In this case the argument is packed into the previous preload SGPR.
+ int64_t AlignDownOffset = alignDown(Offset, 4);
+ int64_t OffsetDiff = Offset - AlignDownOffset;
+ EVT IntVT = MemVT.changeTypeToInteger();
+
+ const SIMachineFunctionInfo *Info =
+ MF.getInfo<SIMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ Register Reg =
+ Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs[0];
+
+ assert(Reg);
+ Register VReg = MRI.getLiveInVirtReg(Reg);
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i32);
+
+ SDValue ShiftAmt = DAG.getConstant(OffsetDiff * 8, DL, MVT::i32);
+ SDValue Extract = DAG.getNode(ISD::SRL, DL, MVT::i32, Copy, ShiftAmt);
+
+ SDValue ArgVal = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Extract);
+ ArgVal = DAG.getNode(ISD::BITCAST, DL, MemVT, ArgVal);
+ NewArg = convertArgType(DAG, VT, MemVT, DL, ArgVal,
+ Ins[i].Flags.isSExt(), &Ins[i]);
+
+ NewArg = DAG.getMergeValues({NewArg, Copy.getValue(1)}, DL);
+ } else {
+ const SIMachineFunctionInfo *Info =
+ MF.getInfo<SIMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ const SmallVectorImpl<MCRegister> &PreloadRegs =
+ Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs;
+
+ SDValue Copy;
+ if (PreloadRegs.size() == 1) {
+ Register VReg = MRI.getLiveInVirtReg(PreloadRegs[0]);
+ const TargetRegisterClass *RC = MRI.getRegClass(VReg);
+ NewArg = DAG.getCopyFromReg(
+ Chain, DL, VReg,
+ EVT::getIntegerVT(*DAG.getContext(),
+ TRI->getRegSizeInBits(*RC)));
+
+ } else {
+ // If the kernarg alignment does not match the alignment of the SGPR
+ // tuple RC that can accommodate this argument, it will be built up
+ // via copies from from the individual SGPRs that the argument was
+ // preloaded to.
+ SmallVector<SDValue, 4> Elts;
+ for (auto Reg : PreloadRegs) {
+ Register VReg = MRI.getLiveInVirtReg(Reg);
+ Copy = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i32);
+ Elts.push_back(Copy);
+ }
+ NewArg =
+ DAG.getBuildVector(EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ PreloadRegs.size()),
+ DL, Elts);
+ }
+
+ SDValue CMemVT;
+ if (VT.isScalarInteger() && VT.bitsLT(NewArg.getSimpleValueType()))
+ CMemVT = DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewArg);
+ else
+ CMemVT = DAG.getBitcast(MemVT, NewArg);
+ NewArg = convertArgType(DAG, VT, MemVT, DL, CMemVT,
+ Ins[i].Flags.isSExt(), &Ins[i]);
+ NewArg = DAG.getMergeValues({NewArg, Chain}, DL);
+ }
+ } else {
+ NewArg =
+ lowerKernargMemParameter(DAG, VT, MemVT, DL, Chain, Offset,
+ Alignment, Ins[i].Flags.isSExt(), &Ins[i]);
+ }
+ Chains.push_back(NewArg.getValue(1));
auto *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
@@ -2599,11 +2832,11 @@ SDValue SITargetLowering::LowerFormalArguments(
// On SI local pointers are just offsets into LDS, so they are always
// less than 16-bits. On CI and newer they could potentially be
// real pointers, so we can't guarantee their size.
- Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
- DAG.getValueType(MVT::i16));
+ NewArg = DAG.getNode(ISD::AssertZext, DL, NewArg.getValueType(), NewArg,
+ DAG.getValueType(MVT::i16));
}
- InVals.push_back(Arg);
+ InVals.push_back(NewArg);
continue;
} else if (!IsEntryFunc && VA.isMemLoc()) {
SDValue Val = lowerStackParameter(DAG, VA, DL, Chain, Arg);
@@ -3084,6 +3317,9 @@ bool SITargetLowering::isEligibleForTailCallOptimization(
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
+ if (AMDGPU::isChainCC(CalleeCC))
+ return true;
+
if (!mayTailCallThisCC(CalleeCC))
return false;
@@ -3168,7 +3404,36 @@ bool SITargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// The wave scratch offset register is used as the global base pointer.
SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsChainCallConv = AMDGPU::isChainCC(CallConv);
+
SelectionDAG &DAG = CLI.DAG;
+
+ TargetLowering::ArgListEntry RequestedExec;
+ if (IsChainCallConv) {
+ // The last argument should be the value that we need to put in EXEC.
+ // Pop it out of CLI.Outs and CLI.OutVals before we do any processing so we
+ // don't treat it like the rest of the arguments.
+ RequestedExec = CLI.Args.back();
+ assert(RequestedExec.Node && "No node for EXEC");
+
+ if (!RequestedExec.Ty->isIntegerTy(Subtarget->getWavefrontSize()))
+ return lowerUnhandledCall(CLI, InVals, "Invalid value for EXEC");
+
+ assert(CLI.Outs.back().OrigArgIndex == 2 && "Unexpected last arg");
+ CLI.Outs.pop_back();
+ CLI.OutVals.pop_back();
+
+ if (RequestedExec.Ty->isIntegerTy(64)) {
+ assert(CLI.Outs.back().OrigArgIndex == 2 && "Exec wasn't split up");
+ CLI.Outs.pop_back();
+ CLI.OutVals.pop_back();
+ }
+
+ assert(CLI.Outs.back().OrigArgIndex != 2 &&
+ "Haven't popped all the pieces of the EXEC mask");
+ }
+
const SDLoc &DL = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
@@ -3176,7 +3441,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
- CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
bool IsSibCall = false;
bool IsThisReturn = false;
@@ -3207,9 +3471,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
- if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
+ if (!IsTailCall &&
+ ((CLI.CB && CLI.CB->isMustTailCall()) || IsChainCallConv)) {
report_fatal_error("failed to perform tail call elimination on a call "
- "site marked musttail");
+ "site marked musttail or on llvm.amdgcn.cs.chain");
}
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
@@ -3232,7 +3497,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
- if (CallConv != CallingConv::AMDGPU_Gfx) {
+ if (CallConv != CallingConv::AMDGPU_Gfx && !AMDGPU::isChainCC(CallConv)) {
// With a fixed ABI, allocate fixed registers before user arguments.
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
}
@@ -3258,16 +3523,20 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- if (!IsSibCall) {
+ if (!IsSibCall)
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
+ if (!IsSibCall || IsChainCallConv) {
if (!Subtarget->enableFlatScratch()) {
SmallVector<SDValue, 4> CopyFromChains;
// In the HSA case, this should be an identity copy.
SDValue ScratchRSrcReg
= DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
- RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
+ RegsToPass.emplace_back(IsChainCallConv
+ ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
+ : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
+ ScratchRSrcReg);
CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
Chain = DAG.getTokenFactor(DL, CopyFromChains);
}
@@ -3412,6 +3681,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
}
+ if (IsChainCallConv)
+ Ops.push_back(RequestedExec.Node);
+
// Add argument registers to the end of the list so that they are known live
// into the call.
for (auto &RegToPass : RegsToPass) {
@@ -3420,8 +3692,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Add a register mask operand representing the call-preserved registers.
-
- auto *TRI = static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
+ auto *TRI = static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3435,8 +3706,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// actual call instruction.
if (IsTailCall) {
MFI.setHasTailCall();
- unsigned OPC = CallConv == CallingConv::AMDGPU_Gfx ?
- AMDGPUISD::TC_RETURN_GFX : AMDGPUISD::TC_RETURN;
+ unsigned OPC = AMDGPUISD::TC_RETURN;
+ switch (CallConv) {
+ case CallingConv::AMDGPU_Gfx:
+ OPC = AMDGPUISD::TC_RETURN_GFX;
+ break;
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ OPC = AMDGPUISD::TC_RETURN_CHAIN;
+ break;
+ }
+
return DAG.getNode(OPC, DL, NodeTys, Ops);
}
@@ -3481,22 +3761,21 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
MaybeAlign Alignment = cast<ConstantSDNode>(Tmp3)->getMaybeAlignValue();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const TargetFrameLowering *TFL = ST.getFrameLowering();
+ const TargetFrameLowering *TFL = Subtarget->getFrameLowering();
unsigned Opc =
TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ?
ISD::ADD : ISD::SUB;
SDValue ScaledSize = DAG.getNode(
ISD::SHL, dl, VT, Size,
- DAG.getConstant(ST.getWavefrontSizeLog2(), dl, MVT::i32));
+ DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
Align StackAlign = TFL->getStackAlign();
Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
if (Alignment && *Alignment > StackAlign) {
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
DAG.getConstant(-(uint64_t)Alignment->value()
- << ST.getWavefrontSizeLog2(),
+ << Subtarget->getWavefrontSizeLog2(),
dl, VT));
}
@@ -3520,6 +3799,111 @@ SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(Op, DAG);
}
+SDValue SITargetLowering::LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType() != MVT::i32)
+ return Op; // Defer to cannot select error.
+
+ Register SP = getStackPointerRegisterToSaveRestore();
+ SDLoc SL(Op);
+
+ SDValue CopyFromSP = DAG.getCopyFromReg(Op->getOperand(0), SL, SP, MVT::i32);
+
+ // Convert from wave uniform to swizzled vector address. This should protect
+ // from any edge cases where the stacksave result isn't directly used with
+ // stackrestore.
+ SDValue VectorAddress =
+ DAG.getNode(AMDGPUISD::WAVE_ADDRESS, SL, MVT::i32, CopyFromSP);
+ return DAG.getMergeValues({VectorAddress, CopyFromSP.getValue(1)}, SL);
+}
+
+SDValue SITargetLowering::lowerGET_ROUNDING(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ assert(Op.getValueType() == MVT::i32);
+
+ uint32_t BothRoundHwReg =
+ AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_MODE, 0, 4);
+ SDValue GetRoundBothImm = DAG.getTargetConstant(BothRoundHwReg, SL, MVT::i32);
+
+ SDValue IntrinID =
+ DAG.getTargetConstant(Intrinsic::amdgcn_s_getreg, SL, MVT::i32);
+ SDValue GetReg = DAG.getNode(ISD::INTRINSIC_W_CHAIN, SL, Op->getVTList(),
+ Op.getOperand(0), IntrinID, GetRoundBothImm);
+
+ // There are two rounding modes, one for f32 and one for f64/f16. We only
+ // report in the standard value range if both are the same.
+ //
+ // The raw values also differ from the expected FLT_ROUNDS values. Nearest
+ // ties away from zero is not supported, and the other values are rotated by
+ // 1.
+ //
+ // If the two rounding modes are not the same, report a target defined value.
+
+ // Mode register rounding mode fields:
+ //
+ // [1:0] Single-precision round mode.
+ // [3:2] Double/Half-precision round mode.
+ //
+ // 0=nearest even; 1= +infinity; 2= -infinity, 3= toward zero.
+ //
+ // Hardware Spec
+ // Toward-0 3 0
+ // Nearest Even 0 1
+ // +Inf 1 2
+ // -Inf 2 3
+ // NearestAway0 N/A 4
+ //
+ // We have to handle 16 permutations of a 4-bit value, so we create a 64-bit
+ // table we can index by the raw hardware mode.
+ //
+ // (trunc (FltRoundConversionTable >> MODE.fp_round)) & 0xf
+
+ SDValue BitTable =
+ DAG.getConstant(AMDGPU::FltRoundConversionTable, SL, MVT::i64);
+
+ SDValue Two = DAG.getConstant(2, SL, MVT::i32);
+ SDValue RoundModeTimesNumBits =
+ DAG.getNode(ISD::SHL, SL, MVT::i32, GetReg, Two);
+
+ // TODO: We could possibly avoid a 64-bit shift and use a simpler table if we
+ // knew only one mode was demanded.
+ SDValue TableValue =
+ DAG.getNode(ISD::SRL, SL, MVT::i64, BitTable, RoundModeTimesNumBits);
+ SDValue TruncTable = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
+
+ SDValue EntryMask = DAG.getConstant(0xf, SL, MVT::i32);
+ SDValue TableEntry =
+ DAG.getNode(ISD::AND, SL, MVT::i32, TruncTable, EntryMask);
+
+ // There's a gap in the 4-bit encoded table and actual enum values, so offset
+ // if it's an extended value.
+ SDValue Four = DAG.getConstant(4, SL, MVT::i32);
+ SDValue IsStandardValue =
+ DAG.getSetCC(SL, MVT::i1, TableEntry, Four, ISD::SETULT);
+ SDValue EnumOffset = DAG.getNode(ISD::ADD, SL, MVT::i32, TableEntry, Four);
+ SDValue Result = DAG.getNode(ISD::SELECT, SL, MVT::i32, IsStandardValue,
+ TableEntry, EnumOffset);
+
+ return DAG.getMergeValues({Result, GetReg.getValue(1)}, SL);
+}
+
+SDValue SITargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
+ if (Op->isDivergent())
+ return SDValue();
+
+ switch (cast<MemSDNode>(Op)->getAddressSpace()) {
+ case AMDGPUAS::FLAT_ADDRESS:
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
+ break;
+ default:
+ return SDValue();
+ }
+
+ return Op;
+}
+
Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = StringSwitch<Register>(RegName)
@@ -4217,40 +4601,51 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
case AMDGPU::S_ADD_U64_PSEUDO:
case AMDGPU::S_SUB_U64_PSEUDO: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ // For targets older than GFX12, we emit a sequence of 32-bit operations.
+ // For GFX12, we emit s_add_u64 and s_sub_u64.
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const TargetRegisterClass *BoolRC = TRI->getBoolRC();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const DebugLoc &DL = MI.getDebugLoc();
-
MachineOperand &Dest = MI.getOperand(0);
MachineOperand &Src0 = MI.getOperand(1);
MachineOperand &Src1 = MI.getOperand(2);
-
- Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-
- MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(
- MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
- MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(
- MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
-
- MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(
- MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
- MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(
- MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
-
bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
-
- unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
- unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
- BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0).add(Src0Sub0).add(Src1Sub0);
- BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1).add(Src0Sub1).add(Src1Sub1);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
- .addReg(DestSub0)
- .addImm(AMDGPU::sub0)
- .addReg(DestSub1)
- .addImm(AMDGPU::sub1);
+ if (Subtarget->hasScalarAddSub64()) {
+ unsigned Opc = IsAdd ? AMDGPU::S_ADD_U64 : AMDGPU::S_SUB_U64;
+ BuildMI(*BB, MI, DL, TII->get(Opc), Dest.getReg())
+ .addReg(Src0.getReg())
+ .addReg(Src1.getReg());
+ } else {
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetRegisterClass *BoolRC = TRI->getBoolRC();
+
+ Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
+ MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
+
+ MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
+ MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
+
+ unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
+ unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+ BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0)
+ .add(Src0Sub0)
+ .add(Src1Sub0);
+ BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1)
+ .add(Src0Sub1)
+ .add(Src1Sub1);
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+ }
MI.eraseFromParent();
return BB;
}
@@ -4463,8 +4858,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
const SIRegisterInfo *TRI = ST.getRegisterInfo();
Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ const MachineOperand &Src0 = MI.getOperand(1);
+ const MachineOperand &Src1 = MI.getOperand(2);
const DebugLoc &DL = MI.getDebugLoc();
Register SrcCond = MI.getOperand(3).getReg();
@@ -4473,20 +4868,42 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
Register SrcCondCopy = MRI.createVirtualRegister(CondRC);
+ const TargetRegisterClass *Src0RC = Src0.isReg()
+ ? MRI.getRegClass(Src0.getReg())
+ : &AMDGPU::VReg_64RegClass;
+ const TargetRegisterClass *Src1RC = Src1.isReg()
+ ? MRI.getRegClass(Src1.getReg())
+ : &AMDGPU::VReg_64RegClass;
+
+ const TargetRegisterClass *Src0SubRC =
+ TRI->getSubRegisterClass(Src0RC, AMDGPU::sub0);
+ const TargetRegisterClass *Src1SubRC =
+ TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1);
+
+ MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
+ MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
+
+ MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
+ MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
+
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
.addReg(SrcCond);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
- .addImm(0)
- .addReg(Src0, 0, AMDGPU::sub0)
- .addImm(0)
- .addReg(Src1, 0, AMDGPU::sub0)
- .addReg(SrcCondCopy);
+ .addImm(0)
+ .add(Src0Sub0)
+ .addImm(0)
+ .add(Src1Sub0)
+ .addReg(SrcCondCopy);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
- .addImm(0)
- .addReg(Src0, 0, AMDGPU::sub1)
- .addImm(0)
- .addReg(Src1, 0, AMDGPU::sub1)
- .addReg(SrcCondCopy);
+ .addImm(0)
+ .add(Src0Sub1)
+ .addImm(0)
+ .add(Src1Sub1)
+ .addReg(SrcCondCopy);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst)
.addReg(DstLo)
@@ -4843,7 +5260,7 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
- VT == MVT::v32f32);
+ VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16);
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
@@ -4866,7 +5283,7 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
- VT == MVT::v32f32);
+ VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16);
SDValue Lo0, Hi0;
std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
@@ -4926,10 +5343,14 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
"Load should return a value and a chain");
return Result;
}
- case ISD::FSQRT:
- if (Op.getValueType() == MVT::f64)
+ case ISD::FSQRT: {
+ EVT VT = Op.getValueType();
+ if (VT == MVT::f32)
+ return lowerFSQRTF32(Op, DAG);
+ if (VT == MVT::f64)
return lowerFSQRTF64(Op, DAG);
return SDValue();
+ }
case ISD::FSIN:
case ISD::FCOS:
return LowerTrig(Op, DAG);
@@ -5027,6 +5448,12 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerXMUL_LOHI(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::STACKSAVE:
+ return LowerSTACKSAVE(Op, DAG);
+ case ISD::GET_ROUNDING:
+ return lowerGET_ROUNDING(Op, DAG);
+ case ISD::PREFETCH:
+ return lowerPREFETCH(Op, DAG);
}
return SDValue();
}
@@ -5382,6 +5809,12 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Op));
return;
}
+ case ISD::FSQRT: {
+ if (N->getValueType(0) != MVT::f16)
+ break;
+ Results.push_back(lowerFSQRTF16(SDValue(N, 0), DAG));
+ break;
+ }
default:
AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
break;
@@ -5433,6 +5866,9 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
}
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
+ if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS())
+ return false;
+
// FIXME: Either avoid relying on address space here or change the default
// address space for functions to avoid the explicit check.
return (GV->getValueType()->isFunctionTy() ||
@@ -5616,7 +6052,8 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
if (IsIEEEMode)
return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
- if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16)
+ if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 ||
+ VT == MVT::v16f16)
return splitBinaryVectorOp(Op, DAG);
return Op;
}
@@ -5711,11 +6148,6 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return lowerTrapEndpgm(Op, DAG);
- const Module *M = DAG.getMachineFunction().getFunction().getParent();
- unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M);
- if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3)
- return lowerTrapHsaQueuePtr(Op, DAG);
-
return Subtarget->supportsGetDoorbellID() ? lowerTrapHsa(Op, DAG) :
lowerTrapHsaQueuePtr(Op, DAG);
}
@@ -5873,7 +6305,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
SDValue Ptr =
- DAG.getObjectPtrOffset(DL, QueuePtr, TypeSize::Fixed(StructOffset));
+ DAG.getObjectPtrOffset(DL, QueuePtr, TypeSize::getFixed(StructOffset));
// TODO: Use custom target PseudoSourceValue.
// TODO: We should use the value from the IR intrinsic call, but it might not
@@ -6134,7 +6566,7 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
return Combined;
- if (VecSize == 128 || VecSize == 256) {
+ if (VecSize == 128 || VecSize == 256 || VecSize == 512) {
SDValue Lo, Hi;
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
@@ -6147,9 +6579,7 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
Hi = DAG.getBitcast(HiVT,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
DAG.getConstant(1, SL, MVT::i32)));
- } else {
- assert(VecSize == 256);
-
+ } else if (VecSize == 256) {
SDValue V2 = DAG.getBitcast(MVT::v4i64, Vec);
SDValue Parts[4];
for (unsigned P = 0; P < 4; ++P) {
@@ -6161,6 +6591,22 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
Parts[0], Parts[1]));
Hi = DAG.getBitcast(HiVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64,
Parts[2], Parts[3]));
+ } else {
+ assert(VecSize == 512);
+
+ SDValue V2 = DAG.getBitcast(MVT::v8i64, Vec);
+ SDValue Parts[8];
+ for (unsigned P = 0; P < 8; ++P) {
+ Parts[P] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
+ DAG.getConstant(P, SL, MVT::i32));
+ }
+
+ Lo = DAG.getBitcast(LoVT,
+ DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v4i64,
+ Parts[0], Parts[1], Parts[2], Parts[3]));
+ Hi = DAG.getBitcast(HiVT,
+ DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v4i64,
+ Parts[4], Parts[5],Parts[6], Parts[7]));
}
EVT IdxVT = Idx.getValueType();
@@ -6326,6 +6772,27 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
}
+ if (VT == MVT::v32i16 || VT == MVT::v32f16) {
+ EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
+ VT.getVectorNumElements() / 8);
+ MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits());
+
+ SmallVector<SDValue, 8> Parts[8];
+ for (unsigned I = 0, E = VT.getVectorNumElements() / 8; I != E; ++I) {
+ for (unsigned P = 0; P < 8; ++P)
+ Parts[P].push_back(Op.getOperand(I + P * E));
+ }
+ SDValue Casts[8];
+ for (unsigned P = 0; P < 8; ++P) {
+ SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]);
+ Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec);
+ }
+
+ SDValue Blend =
+ DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 8), SL, Casts);
+ return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
+ }
+
assert(VT == MVT::v2f16 || VT == MVT::v2i16);
assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
@@ -6391,24 +6858,12 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
// $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
// which is a 64-bit pc-relative offset from the encoding of the $symbol
// operand to the global variable.
- //
- // What we want here is an offset from the value returned by s_getpc
- // (which is the address of the s_add_u32 instruction) to the global
- // variable, but since the encoding of $symbol starts 4 bytes after the start
- // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
- // small. This requires us to add 4 to the global variable offset in order to
- // compute the correct address. Similarly for the s_addc_u32 instruction, the
- // encoding of $symbol starts 12 bytes after the start of the s_add_u32
- // instruction.
- SDValue PtrLo =
- DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags);
+ SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset, GAFlags);
SDValue PtrHi;
- if (GAFlags == SIInstrInfo::MO_NONE) {
+ if (GAFlags == SIInstrInfo::MO_NONE)
PtrHi = DAG.getTargetConstant(0, DL, MVT::i32);
- } else {
- PtrHi =
- DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 12, GAFlags + 1);
- }
+ else
+ PtrHi = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset, GAFlags + 1);
return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi);
}
@@ -6450,9 +6905,22 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
}
+ if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS()) {
+ SDValue AddrLo = DAG.getTargetGlobalAddress(
+ GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_LO);
+ AddrLo = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrLo), 0};
+
+ SDValue AddrHi = DAG.getTargetGlobalAddress(
+ GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_HI);
+ AddrHi = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrHi), 0};
+
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddrLo, AddrHi);
+ }
+
if (shouldEmitFixup(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
- else if (shouldEmitPCReloc(GV))
+
+ if (shouldEmitPCReloc(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT,
SIInstrInfo::MO_REL32);
@@ -6699,6 +7167,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
unsigned IntrOpcode = Intr->BaseOpcode;
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
+ bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
SmallVector<EVT, 3> ResultTypes(Op->values());
SmallVector<EVT, 3> OrigResultTypes(Op->values());
@@ -6718,7 +7187,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
if (BaseOpcode->Atomic) {
VData = Op.getOperand(2);
- bool Is64Bit = VData.getValueType() == MVT::i64;
+ bool Is64Bit = VData.getValueSizeInBits() == 64;
if (BaseOpcode->AtomicX2) {
SDValue VData2 = Op.getOperand(3);
VData = DAG.getBuildVector(Is64Bit ? MVT::v2i64 : MVT::v2i32, DL,
@@ -6878,9 +7347,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.
//
- // Partial NSA is allowed on GFX11 where the final register is a contiguous
+ // Partial NSA is allowed on GFX11+ where the final register is a contiguous
// set of the remaining addresses.
- const unsigned NSAMaxSize = ST->getNSAMaxSize();
+ const unsigned NSAMaxSize = ST->getNSAMaxSize(BaseOpcode->Sampler);
const bool HasPartialNSAEncoding = ST->hasPartialNSAEncoding();
const bool UseNSA = ST->hasNSAEncoding() &&
VAddrs.size() >= ST->getNSAThreshold(MF) &&
@@ -6957,7 +7426,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Op.getOperand(ArgOffset + Intr->CachePolicyIndex))->getZExtValue();
if (BaseOpcode->Atomic)
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
- if (CPol & ~AMDGPU::CPol::ALL)
+ if (CPol & ~(IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12))
return Op;
SmallVector<SDValue, 26> Ops;
@@ -6977,7 +7446,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
if (IsGFX10Plus)
Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
- Ops.push_back(Unorm);
+ if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
+ Ops.push_back(Unorm);
Ops.push_back(DAG.getTargetConstant(CPol, DL, MVT::i32));
Ops.push_back(IsA16 && // r128, a16 for gfx9
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
@@ -6988,7 +7458,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
} else if (cast<ConstantSDNode>(TFE)->getZExtValue()) {
report_fatal_error("TFE is not supported on this GPU");
}
- Ops.push_back(LWE); // lwe
+ if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
+ Ops.push_back(LWE); // lwe
if (!IsGFX10Plus)
Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
@@ -7000,7 +7471,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
int Opcode = -1;
- if (IsGFX11Plus) {
+ if (IsGFX12Plus) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12,
+ NumVDataDwords, NumVAddrDwords);
+ } else if (IsGFX11Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx11Default,
@@ -7071,7 +7545,8 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
};
// Widen vec3 load to vec4.
- if (VT.isVector() && VT.getVectorNumElements() == 3) {
+ if (VT.isVector() && VT.getVectorNumElements() == 3 &&
+ !Subtarget->hasScalarDwordx3Loads()) {
EVT WidenedVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
auto WidenedOp = DAG.getMemIntrinsicNode(
@@ -7317,7 +7792,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc(Op), MVT::i32);
case Intrinsic::amdgcn_s_buffer_load: {
unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- if (CPol & ~AMDGPU::CPol::ALL)
+ if (CPol & ~((Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12)
+ ? AMDGPU::CPol::ALL
+ : AMDGPU::CPol::ALL_pregfx12))
return Op;
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
DAG);
@@ -7341,9 +7818,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return emitRemovedIntrinsicError(DAG, DL, VT);
}
- case Intrinsic::amdgcn_ldexp:
- return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2));
-
case Intrinsic::amdgcn_fract:
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
@@ -7490,6 +7964,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
+// On targets not supporting constant in soffset field, turn zero to
+// SGPR_NULL to avoid generating an extra s_mov with zero.
+static SDValue selectSOffset(SDValue SOffset, SelectionDAG &DAG,
+ const GCNSubtarget *Subtarget) {
+ if (Subtarget->hasRestrictedSOffset())
+ if (auto SOffsetConst = dyn_cast<ConstantSDNode>(SOffset)) {
+ if (SOffsetConst->isZero()) {
+ return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32);
+ }
+ }
+ return SOffset;
+}
+
SDValue SITargetLowering::lowerRawBufferAtomicIntrin(SDValue Op,
SelectionDAG &DAG,
unsigned NewOpcode) const {
@@ -7498,13 +7985,14 @@ SDValue SITargetLowering::lowerRawBufferAtomicIntrin(SDValue Op,
SDValue VData = Op.getOperand(2);
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget);
SDValue Ops[] = {
Op.getOperand(0), // Chain
VData, // vdata
Rsrc, // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
- Op.getOperand(5), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
@@ -7531,13 +8019,14 @@ SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
SDValue VData = Op.getOperand(2);
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(6), DAG, Subtarget);
SDValue Ops[] = {
Op.getOperand(0), // Chain
VData, // vdata
Rsrc, // rsrc
Op.getOperand(4), // vindex
Offsets.first, // voffset
- Op.getOperand(6), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
@@ -7693,12 +8182,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(4), DAG, Subtarget);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Rsrc, // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
- Op.getOperand(4), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(5), // cachepolicy, swizzled buffer
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
@@ -7717,12 +8207,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Rsrc, // rsrc
Op.getOperand(3), // vindex
Offsets.first, // voffset
- Op.getOperand(5), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy, swizzled buffer
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
@@ -7734,21 +8225,22 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
MemSDNode *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
+ auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget);
unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
unsigned IdxEn = getIdxEn(Op.getOperand(3));
SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // rsrc
- Op.getOperand(3), // vindex
- Op.getOperand(4), // voffset
- Op.getOperand(5), // soffset
- Op.getOperand(6), // offset
- DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
- DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Op.getOperand(4), // voffset
+ SOffset, // soffset
+ Op.getOperand(6), // offset
+ DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -7764,13 +8256,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
EVT LoadVT = Op.getValueType();
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(4), DAG, Subtarget);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Rsrc, // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
- Op.getOperand(4), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(5), // format
Op.getOperand(6), // cachepolicy, swizzled buffer
@@ -7790,13 +8283,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
EVT LoadVT = Op.getValueType();
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Rsrc, // rsrc
Op.getOperand(3), // vindex
Offsets.first, // voffset
- Op.getOperand(5), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy, swizzled buffer
@@ -8009,6 +8503,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap: {
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(4), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(6), DAG, Subtarget);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // src
@@ -8016,7 +8511,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Rsrc, // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
- Op.getOperand(6), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
@@ -8031,6 +8526,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap: {
SDValue Rsrc = bufferRsrcPtrToVector(Op->getOperand(4), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(6), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(7), DAG, Subtarget);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // src
@@ -8038,7 +8534,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Rsrc, // rsrc
Op.getOperand(5), // vindex
Offsets.first, // voffset
- Op.getOperand(7), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(8), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
@@ -8068,14 +8564,17 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return SDValue();
}
+ const bool IsGFX11 = AMDGPU::isGFX11(*Subtarget);
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
+ const bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
const bool Is64 = NodePtr.getValueType() == MVT::i64;
const unsigned NumVDataDwords = 4;
const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
- const bool UseNSA =
- Subtarget->hasNSAEncoding() && NumVAddrs <= Subtarget->getNSAMaxSize();
+ const bool UseNSA = (Subtarget->hasNSAEncoding() &&
+ NumVAddrs <= Subtarget->getNSAMaxSize()) ||
+ IsGFX12Plus;
const unsigned BaseOpcodes[2][2] = {
{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
@@ -8083,15 +8582,16 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
int Opcode;
if (UseNSA) {
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
- IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
+ IsGFX12Plus ? AMDGPU::MIMGEncGfx12
+ : IsGFX11 ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx10NSA,
NumVDataDwords, NumVAddrDwords);
} else {
- Opcode =
- AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
- IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default
- : AMDGPU::MIMGEncGfx10Default,
- NumVDataDwords, NumVAddrDwords);
+ assert(!IsGFX12Plus);
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ IsGFX11 ? AMDGPU::MIMGEncGfx11Default
+ : AMDGPU::MIMGEncGfx10Default,
+ NumVDataDwords, NumVAddrDwords);
}
assert(Opcode != -1);
@@ -8179,8 +8679,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmax:
+ case Intrinsic::amdgcn_global_atomic_fmin_num:
+ case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin:
- case Intrinsic::amdgcn_flat_atomic_fmax: {
+ case Intrinsic::amdgcn_flat_atomic_fmax:
+ case Intrinsic::amdgcn_flat_atomic_fmin_num:
+ case Intrinsic::amdgcn_flat_atomic_fmax_num: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue Ops[] = {
M->getOperand(0), // Chain
@@ -8190,12 +8694,16 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
unsigned Opcode = 0;
switch (IntrID) {
case Intrinsic::amdgcn_global_atomic_fmin:
- case Intrinsic::amdgcn_flat_atomic_fmin: {
+ case Intrinsic::amdgcn_global_atomic_fmin_num:
+ case Intrinsic::amdgcn_flat_atomic_fmin:
+ case Intrinsic::amdgcn_flat_atomic_fmin_num: {
Opcode = AMDGPUISD::ATOMIC_LOAD_FMIN;
break;
}
case Intrinsic::amdgcn_global_atomic_fmax:
- case Intrinsic::amdgcn_flat_atomic_fmax: {
+ case Intrinsic::amdgcn_global_atomic_fmax_num:
+ case Intrinsic::amdgcn_flat_atomic_fmax:
+ case Intrinsic::amdgcn_flat_atomic_fmax_num: {
Opcode = AMDGPUISD::ATOMIC_LOAD_FMAX;
break;
}
@@ -8206,6 +8714,31 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
M->getVTList(), Ops, M->getMemoryVT(),
M->getMemOperand());
}
+ case Intrinsic::amdgcn_s_get_barrier_state: {
+ SDValue Chain = Op->getOperand(0);
+ SmallVector<SDValue, 2> Ops;
+ unsigned Opc;
+ bool IsInlinableBarID = false;
+ int64_t BarID;
+
+ if (isa<ConstantSDNode>(Op->getOperand(2))) {
+ BarID = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
+ IsInlinableBarID = AMDGPU::isInlinableIntLiteral(BarID);
+ }
+
+ if (IsInlinableBarID) {
+ Opc = AMDGPU::S_GET_BARRIER_STATE_IMM;
+ SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32);
+ Ops.push_back(K);
+ } else {
+ Opc = AMDGPU::S_GET_BARRIER_STATE_M0;
+ SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(2));
+ Ops.push_back(M0Val.getValue(0));
+ }
+
+ auto NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
+ return SDValue(NewMI, 0);
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
@@ -8383,13 +8916,29 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
}
case Intrinsic::amdgcn_s_barrier: {
- if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (getTargetMachine().getOptLevel() > CodeGenOptLevel::None) {
unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second;
if (WGSize <= ST.getWavefrontSize())
return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
Op.getOperand(0)), 0);
}
+
+ // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
+ if (ST.hasSplitBarriers()) {
+ SDValue K =
+ DAG.getTargetConstant(AMDGPU::Barrier::WORKGROUP, DL, MVT::i32);
+ SDValue BarSignal =
+ SDValue(DAG.getMachineNode(AMDGPU::S_BARRIER_SIGNAL_IMM, DL,
+ MVT::Other, K, Op.getOperand(0)),
+ 0);
+ SDValue BarWait =
+ SDValue(DAG.getMachineNode(AMDGPU::S_BARRIER_WAIT, DL, MVT::Other, K,
+ BarSignal.getValue(0)),
+ 0);
+ return BarWait;
+ }
+
return SDValue();
};
case Intrinsic::amdgcn_tbuffer_store: {
@@ -8429,13 +8978,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
VData = handleD16VData(VData, DAG);
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(6), DAG, Subtarget);
SDValue Ops[] = {
Chain,
VData, // vdata
Rsrc, // rsrc
Op.getOperand(4), // vindex
Offsets.first, // voffset
- Op.getOperand(6), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(7), // format
Op.getOperand(8), // cachepolicy, swizzled buffer
@@ -8456,13 +9006,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
VData = handleD16VData(VData, DAG);
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget);
SDValue Ops[] = {
Chain,
VData, // vdata
Rsrc, // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
- Op.getOperand(5), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy, swizzled buffer
@@ -8536,13 +9087,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget);
SDValue Ops[] = {
Chain,
VData,
Rsrc,
DAG.getConstant(0, DL, MVT::i32), // vindex
Offsets.first, // voffset
- Op.getOperand(5), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy, swizzled buffer
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
@@ -8586,13 +9138,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
auto Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ auto SOffset = selectSOffset(Op.getOperand(6), DAG, Subtarget);
SDValue Ops[] = {
Chain,
VData,
Rsrc,
Op.getOperand(4), // vindex
Offsets.first, // voffset
- Op.getOperand(6), // soffset
+ SOffset, // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy, swizzled buffer
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
@@ -8620,8 +9173,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
IntrinsicID == Intrinsic::amdgcn_struct_ptr_buffer_load_lds;
unsigned OpOffset = HasVIndex ? 1 : 0;
SDValue VOffset = Op.getOperand(5 + OpOffset);
- auto CVOffset = dyn_cast<ConstantSDNode>(VOffset);
- bool HasVOffset = !CVOffset || !CVOffset->isZero();
+ bool HasVOffset = !isNullConstant(VOffset);
unsigned Size = Op->getConstantOperandVal(4);
switch (Size) {
@@ -8684,12 +9236,13 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
auto F = LoadMMO->getFlags() &
~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
- LoadMMO = MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
- Size, LoadMMO->getBaseAlign());
+ LoadMMO =
+ MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, Size,
+ LoadMMO->getBaseAlign(), LoadMMO->getAAInfo());
- MachineMemOperand *StoreMMO =
- MF.getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
- sizeof(int32_t), LoadMMO->getBaseAlign());
+ MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+ StorePtrI, F | MachineMemOperand::MOStore, sizeof(int32_t),
+ LoadMMO->getBaseAlign(), LoadMMO->getAAInfo());
auto Load = DAG.getMachineNode(Opc, DL, M->getVTList(), Ops);
DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});
@@ -8760,11 +9313,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
auto F = LoadMMO->getFlags() &
~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
- LoadMMO = MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
- Size, LoadMMO->getBaseAlign());
- MachineMemOperand *StoreMMO =
- MF.getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
- sizeof(int32_t), Align(4));
+ LoadMMO =
+ MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, Size,
+ LoadMMO->getBaseAlign(), LoadMMO->getAAInfo());
+ MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+ StorePtrI, F | MachineMemOperand::MOStore, sizeof(int32_t), Align(4),
+ LoadMMO->getAAInfo());
auto Load = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});
@@ -8774,7 +9328,76 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
case Intrinsic::amdgcn_end_cf:
return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
Op->getOperand(2), Chain), 0);
+ case Intrinsic::amdgcn_s_barrier_init:
+ case Intrinsic::amdgcn_s_barrier_join:
+ case Intrinsic::amdgcn_s_wakeup_barrier: {
+ SDValue Chain = Op->getOperand(0);
+ SmallVector<SDValue, 2> Ops;
+ SDValue BarOp = Op->getOperand(2);
+ unsigned Opc;
+ bool IsInlinableBarID = false;
+ int64_t BarVal;
+
+ if (isa<ConstantSDNode>(BarOp)) {
+ BarVal = cast<ConstantSDNode>(BarOp)->getSExtValue();
+ IsInlinableBarID = AMDGPU::isInlinableIntLiteral(BarVal);
+ }
+
+ if (IsInlinableBarID) {
+ switch (IntrinsicID) {
+ default:
+ return SDValue();
+ case Intrinsic::amdgcn_s_barrier_init:
+ Opc = AMDGPU::S_BARRIER_INIT_IMM;
+ break;
+ case Intrinsic::amdgcn_s_barrier_join:
+ Opc = AMDGPU::S_BARRIER_JOIN_IMM;
+ break;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ Opc = AMDGPU::S_WAKEUP_BARRIER_IMM;
+ break;
+ }
+
+ SDValue K = DAG.getTargetConstant(BarVal, DL, MVT::i32);
+ Ops.push_back(K);
+ } else {
+ switch (IntrinsicID) {
+ default:
+ return SDValue();
+ case Intrinsic::amdgcn_s_barrier_init:
+ Opc = AMDGPU::S_BARRIER_INIT_M0;
+ break;
+ case Intrinsic::amdgcn_s_barrier_join:
+ Opc = AMDGPU::S_BARRIER_JOIN_M0;
+ break;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ Opc = AMDGPU::S_WAKEUP_BARRIER_M0;
+ break;
+ }
+ }
+
+ if (IntrinsicID == Intrinsic::amdgcn_s_barrier_init) {
+ SDValue M0Val;
+ // Member count will be read from M0[16:22]
+ M0Val = DAG.getNode(ISD::SHL, DL, MVT::i32, Op.getOperand(3),
+ DAG.getShiftAmountConstant(16, MVT::i32, DL));
+ if (!IsInlinableBarID) {
+ // If reference to barrier id is not an inline constant then it must be
+ // referenced with M0[4:0]. Perform an OR with the member count to
+ // include it in M0.
+ M0Val = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32,
+ Op.getOperand(2), M0Val),
+ 0);
+ }
+ Ops.push_back(copyToM0(DAG, Chain, DL, M0Val).getValue(0));
+ } else if (!IsInlinableBarID) {
+ Ops.push_back(copyToM0(DAG, Chain, DL, BarOp).getValue(0));
+ }
+
+ auto NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
+ return SDValue(NewMI, 0);
+ }
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -8794,7 +9417,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
SDValue Offset, SelectionDAG &DAG) const {
SDLoc DL(Offset);
- const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
SDValue N0 = Offset;
ConstantSDNode *C1 = nullptr;
@@ -8870,8 +9493,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
return;
}
}
+
+ SDValue SOffsetZero = Subtarget->hasRestrictedSOffset()
+ ? DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32)
+ : DAG.getConstant(0, DL, MVT::i32);
+
Offsets[0] = CombinedOffset;
- Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
+ Offsets[1] = SOffsetZero;
Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
}
@@ -9051,7 +9679,7 @@ static bool addressMayBeAccessedAsPrivate(const MachineMemOperand *MMO,
const SIMachineFunctionInfo &Info) {
// TODO: Should check if the address can definitely not access stack.
if (Info.isEntryFunction())
- return Info.hasFlatScratchInit();
+ return Info.getUserSGPRInfo().hasFlatScratchInit();
return true;
}
@@ -9129,7 +9757,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
if (!Op->isDivergent() && Alignment >= Align(4) && NumElements < 32) {
- if (MemVT.isPow2VectorType())
+ if (MemVT.isPow2VectorType() ||
+ (Subtarget->hasScalarDwordx3Loads() && NumElements == 3))
return SDValue();
return WidenOrSplitVectorLoad(Op, DAG);
}
@@ -9145,7 +9774,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
Load->isSimple() && isMemOpHasNoClobberedMemOperand(Load) &&
Alignment >= Align(4) && NumElements < 32) {
- if (MemVT.isPow2VectorType())
+ if (MemVT.isPow2VectorType() ||
+ (Subtarget->hasScalarDwordx3Loads() && NumElements == 3))
return SDValue();
return WidenOrSplitVectorLoad(Op, DAG);
}
@@ -9217,7 +9847,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256)
+ if (VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256 ||
+ VT.getSizeInBits() == 512)
return splitTernaryVectorOp(Op, DAG);
assert(VT.getSizeInBits() == 64);
@@ -9277,11 +9908,6 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
// XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
// error seems really high at 2^29 ULP.
-
- // XXX - do we need afn for this or is arcp sufficent?
- if (RHS.getOpcode() == ISD::FSQRT)
- return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
-
// 1.0 / x -> rcp(x)
return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
}
@@ -9294,8 +9920,8 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
}
}
- // For f16 require arcp only.
- // For f32 require afn+arcp.
+ // For f16 require afn or arcp.
+ // For f32 require afn.
if (!AllowInaccurateRcp && (VT != MVT::f16 || !Flags.hasAllowReciprocal()))
return SDValue();
@@ -9480,28 +10106,44 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const DenormalMode DenormMode = Info->getMode().FP32Denormals;
- const bool HasFP32Denormals = DenormMode == DenormalMode::getIEEE();
+ const bool PreservesDenormals = DenormMode == DenormalMode::getIEEE();
+ const bool HasDynamicDenormals =
+ (DenormMode.Input == DenormalMode::Dynamic) ||
+ (DenormMode.Output == DenormalMode::Dynamic);
+
+ SDValue SavedDenormMode;
- if (!HasFP32Denormals) {
+ if (!PreservesDenormals) {
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
// lowering. The chain dependence is insufficient, and we need glue. We do
// not need the glue variants in a strictfp function.
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Glue = DAG.getEntryNode();
+ if (HasDynamicDenormals) {
+ SDNode *GetReg = DAG.getMachineNode(AMDGPU::S_GETREG_B32, SL,
+ DAG.getVTList(MVT::i32, MVT::Glue),
+ {BitField, Glue});
+ SavedDenormMode = SDValue(GetReg, 0);
+
+ Glue = DAG.getMergeValues(
+ {DAG.getEntryNode(), SDValue(GetReg, 0), SDValue(GetReg, 1)}, SL);
+ }
+
SDNode *EnableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue EnableDenormValue =
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, Info, Subtarget);
- EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
- DAG.getEntryNode(), EnableDenormValue).getNode();
+ EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs, Glue,
+ EnableDenormValue)
+ .getNode();
} else {
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
SL, MVT::i32);
- EnableDenorm =
- DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
- {EnableDenormValue, BitField, DAG.getEntryNode()});
+ EnableDenorm = DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
+ {EnableDenormValue, BitField, Glue});
}
SDValue Ops[3] = {
@@ -9531,12 +10173,9 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
NumeratorScaled, Fma3, Flags);
- if (!HasFP32Denormals) {
- // FIXME: This mishandles dynamic denormal mode. We need to query the
- // current mode and restore the original.
-
+ if (!PreservesDenormals) {
SDNode *DisableDenorm;
- if (Subtarget->hasDenormModeInst()) {
+ if (!HasDynamicDenormals && Subtarget->hasDenormModeInst()) {
const SDValue DisableDenormValue = getSPDenormModeValue(
FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, Info, Subtarget);
@@ -9544,8 +10183,11 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
Fma4.getValue(1), DisableDenormValue,
Fma4.getValue(2)).getNode();
} else {
+ assert(HasDynamicDenormals == (bool)SavedDenormMode);
const SDValue DisableDenormValue =
- DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
+ HasDynamicDenormals
+ ? SavedDenormMode
+ : DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
DisableDenorm = DAG.getMachineNode(
AMDGPU::S_SETREG_B32, SL, MVT::Other,
@@ -9754,6 +10396,111 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+// Avoid the full correct expansion for f32 sqrt when promoting from f16.
+SDValue SITargetLowering::lowerFSQRTF16(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ assert(!Subtarget->has16BitInsts());
+ SDNodeFlags Flags = Op->getFlags();
+ SDValue Ext =
+ DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op.getOperand(0), Flags);
+
+ SDValue SqrtID = DAG.getTargetConstant(Intrinsic::amdgcn_sqrt, SL, MVT::i32);
+ SDValue Sqrt =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, MVT::f32, SqrtID, Ext, Flags);
+
+ return DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Sqrt,
+ DAG.getTargetConstant(0, SL, MVT::i32), Flags);
+}
+
+SDValue SITargetLowering::lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDNodeFlags Flags = Op->getFlags();
+ MVT VT = Op.getValueType().getSimpleVT();
+ const SDValue X = Op.getOperand(0);
+
+ if (allowApproxFunc(DAG, Flags)) {
+ // Instruction is 1ulp but ignores denormals.
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getTargetConstant(Intrinsic::amdgcn_sqrt, DL, MVT::i32), X, Flags);
+ }
+
+ SDValue ScaleThreshold = DAG.getConstantFP(0x1.0p-96f, DL, VT);
+ SDValue NeedScale = DAG.getSetCC(DL, MVT::i1, X, ScaleThreshold, ISD::SETOLT);
+
+ SDValue ScaleUpFactor = DAG.getConstantFP(0x1.0p+32f, DL, VT);
+
+ SDValue ScaledX = DAG.getNode(ISD::FMUL, DL, VT, X, ScaleUpFactor, Flags);
+
+ SDValue SqrtX =
+ DAG.getNode(ISD::SELECT, DL, VT, NeedScale, ScaledX, X, Flags);
+
+ SDValue SqrtS;
+ if (needsDenormHandlingF32(DAG, X, Flags)) {
+ SDValue SqrtID =
+ DAG.getTargetConstant(Intrinsic::amdgcn_sqrt, DL, MVT::i32);
+ SqrtS = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, SqrtID, SqrtX, Flags);
+
+ SDValue SqrtSAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, SqrtS);
+ SDValue SqrtSNextDownInt = DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt,
+ DAG.getConstant(-1, DL, MVT::i32));
+ SDValue SqrtSNextDown = DAG.getNode(ISD::BITCAST, DL, VT, SqrtSNextDownInt);
+
+ SDValue NegSqrtSNextDown =
+ DAG.getNode(ISD::FNEG, DL, VT, SqrtSNextDown, Flags);
+
+ SDValue SqrtVP =
+ DAG.getNode(ISD::FMA, DL, VT, NegSqrtSNextDown, SqrtS, SqrtX, Flags);
+
+ SDValue SqrtSNextUpInt = DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt,
+ DAG.getConstant(1, DL, MVT::i32));
+ SDValue SqrtSNextUp = DAG.getNode(ISD::BITCAST, DL, VT, SqrtSNextUpInt);
+
+ SDValue NegSqrtSNextUp = DAG.getNode(ISD::FNEG, DL, VT, SqrtSNextUp, Flags);
+ SDValue SqrtVS =
+ DAG.getNode(ISD::FMA, DL, VT, NegSqrtSNextUp, SqrtS, SqrtX, Flags);
+
+ SDValue Zero = DAG.getConstantFP(0.0f, DL, VT);
+ SDValue SqrtVPLE0 = DAG.getSetCC(DL, MVT::i1, SqrtVP, Zero, ISD::SETOLE);
+
+ SqrtS = DAG.getNode(ISD::SELECT, DL, VT, SqrtVPLE0, SqrtSNextDown, SqrtS,
+ Flags);
+
+ SDValue SqrtVPVSGT0 = DAG.getSetCC(DL, MVT::i1, SqrtVS, Zero, ISD::SETOGT);
+ SqrtS = DAG.getNode(ISD::SELECT, DL, VT, SqrtVPVSGT0, SqrtSNextUp, SqrtS,
+ Flags);
+ } else {
+ SDValue SqrtR = DAG.getNode(AMDGPUISD::RSQ, DL, VT, SqrtX, Flags);
+
+ SqrtS = DAG.getNode(ISD::FMUL, DL, VT, SqrtX, SqrtR, Flags);
+
+ SDValue Half = DAG.getConstantFP(0.5f, DL, VT);
+ SDValue SqrtH = DAG.getNode(ISD::FMUL, DL, VT, SqrtR, Half, Flags);
+ SDValue NegSqrtH = DAG.getNode(ISD::FNEG, DL, VT, SqrtH, Flags);
+
+ SDValue SqrtE = DAG.getNode(ISD::FMA, DL, VT, NegSqrtH, SqrtS, Half, Flags);
+ SqrtH = DAG.getNode(ISD::FMA, DL, VT, SqrtH, SqrtE, SqrtH, Flags);
+ SqrtS = DAG.getNode(ISD::FMA, DL, VT, SqrtS, SqrtE, SqrtS, Flags);
+
+ SDValue NegSqrtS = DAG.getNode(ISD::FNEG, DL, VT, SqrtS, Flags);
+ SDValue SqrtD =
+ DAG.getNode(ISD::FMA, DL, VT, NegSqrtS, SqrtS, SqrtX, Flags);
+ SqrtS = DAG.getNode(ISD::FMA, DL, VT, SqrtD, SqrtH, SqrtS, Flags);
+ }
+
+ SDValue ScaleDownFactor = DAG.getConstantFP(0x1.0p-16f, DL, VT);
+
+ SDValue ScaledDown =
+ DAG.getNode(ISD::FMUL, DL, VT, SqrtS, ScaleDownFactor, Flags);
+
+ SqrtS = DAG.getNode(ISD::SELECT, DL, VT, NeedScale, ScaledDown, SqrtS, Flags);
+ SDValue IsZeroOrInf =
+ DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, SqrtX,
+ DAG.getTargetConstant(fcZero | fcPosInf, DL, MVT::i32));
+
+ return DAG.getNode(ISD::SELECT, DL, VT, IsZeroOrInf, SqrtX, SqrtS, Flags);
+}
+
SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const {
// For double type, the SQRT and RSQ instructions don't have required
// precision, we apply Goldschmidt's algorithm to improve the result:
@@ -10111,9 +10858,7 @@ SDValue SITargetLowering::splitBinaryBitConstantOp(
return SDValue();
}
-// Returns true if argument is a boolean value which is not serialized into
-// memory or argument and does not require v_cndmask_b32 to be deserialized.
-static bool isBoolSGPR(SDValue V) {
+bool llvm::isBoolSGPR(SDValue V) {
if (V.getValueType() != MVT::i1)
return false;
switch (V.getOpcode()) {
@@ -10427,13 +11172,34 @@ calculateSrcByte(const SDValue Op, uint64_t DestByte, uint64_t SrcIndex = 0,
if (Depth >= 6)
return std::nullopt;
+ auto ValueSize = Op.getValueSizeInBits();
+ if (ValueSize != 8 && ValueSize != 16 && ValueSize != 32)
+ return std::nullopt;
+
switch (Op->getOpcode()) {
case ISD::TRUNCATE: {
- if (Op->getOperand(0).getScalarValueSizeInBits() != 32)
+ return calculateSrcByte(Op->getOperand(0), DestByte, SrcIndex, Depth + 1);
+ }
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND_INREG: {
+ SDValue NarrowOp = Op->getOperand(0);
+ auto NarrowVT = NarrowOp.getValueType();
+ if (Op->getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ auto *VTSign = cast<VTSDNode>(Op->getOperand(1));
+ NarrowVT = VTSign->getVT();
+ }
+ if (!NarrowVT.isByteSized())
+ return std::nullopt;
+ uint64_t NarrowByteWidth = NarrowVT.getStoreSize();
+
+ if (SrcIndex >= NarrowByteWidth)
return std::nullopt;
return calculateSrcByte(Op->getOperand(0), DestByte, SrcIndex, Depth + 1);
}
+ case ISD::SRA:
case ISD::SRL: {
auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!ShiftOp)
@@ -10450,9 +11216,6 @@ calculateSrcByte(const SDValue Op, uint64_t DestByte, uint64_t SrcIndex = 0,
}
default: {
- if (Op.getScalarValueSizeInBits() != 32)
- return std::nullopt;
-
return ByteProvider<SDValue>::getSrc(Op, DestByte, SrcIndex);
}
}
@@ -10476,7 +11239,8 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
unsigned BitWidth = Op.getScalarValueSizeInBits();
if (BitWidth % 8 != 0)
return std::nullopt;
- assert(Index < BitWidth / 8 && "invalid index requested");
+ if (Index > BitWidth / 8 - 1)
+ return std::nullopt;
switch (Op.getOpcode()) {
case ISD::OR: {
@@ -10519,6 +11283,31 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
return calculateSrcByte(Op->getOperand(0), StartingIndex, Index);
}
+ case ISD::FSHR: {
+ // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(2));
+ if (!ShiftOp || Op.getValueType().isVector())
+ return std::nullopt;
+
+ uint64_t BitsProvided = Op.getValueSizeInBits();
+ if (BitsProvided % 8 != 0)
+ return std::nullopt;
+
+ uint64_t BitShift = ShiftOp->getAPIntValue().urem(BitsProvided);
+ if (BitShift % 8)
+ return std::nullopt;
+
+ uint64_t ConcatSizeInBytes = BitsProvided / 4;
+ uint64_t ByteShift = BitShift / 8;
+
+ uint64_t NewIndex = (Index + ByteShift) % ConcatSizeInBytes;
+ uint64_t BytesProvided = BitsProvided / 8;
+ SDValue NextOp = Op.getOperand(NewIndex >= BytesProvided ? 0 : 1);
+ NewIndex %= BytesProvided;
+ return calculateByteProvider(NextOp, NewIndex, Depth + 1, StartingIndex);
+ }
+
+ case ISD::SRA:
case ISD::SRL: {
auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!ShiftOp)
@@ -10565,9 +11354,18 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
}
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND: {
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::AssertZext:
+ case ISD::AssertSext: {
SDValue NarrowOp = Op->getOperand(0);
- unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ unsigned NarrowBitWidth = NarrowOp.getValueSizeInBits();
+ if (Op->getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ Op->getOpcode() == ISD::AssertZext ||
+ Op->getOpcode() == ISD::AssertSext) {
+ auto *VTSign = cast<VTSDNode>(Op->getOperand(1));
+ NarrowBitWidth = VTSign->getVT().getSizeInBits();
+ }
if (NarrowBitWidth % 8 != 0)
return std::nullopt;
uint64_t NarrowByteWidth = NarrowBitWidth / 8;
@@ -10581,10 +11379,7 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
}
case ISD::TRUNCATE: {
- unsigned NarrowBitWidth = Op.getScalarValueSizeInBits();
- if (NarrowBitWidth % 8 != 0)
- return std::nullopt;
- uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+ uint64_t NarrowByteWidth = BitWidth / 8;
if (NarrowByteWidth >= Index) {
return calculateByteProvider(Op.getOperand(0), Index, Depth + 1,
@@ -10594,8 +11389,16 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
return std::nullopt;
}
+ case ISD::CopyFromReg: {
+ if (BitWidth / 8 > Index)
+ return calculateSrcByte(Op, StartingIndex, Index);
+
+ return std::nullopt;
+ }
+
case ISD::LOAD: {
auto L = cast<LoadSDNode>(Op.getNode());
+
unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
if (NarrowBitWidth % 8 != 0)
return std::nullopt;
@@ -10621,6 +11424,41 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
case ISD::BSWAP:
return calculateByteProvider(Op->getOperand(0), BitWidth / 8 - Index - 1,
Depth + 1, StartingIndex);
+
+ case ISD::EXTRACT_VECTOR_ELT: {
+ auto IdxOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!IdxOp)
+ return std::nullopt;
+ auto VecIdx = IdxOp->getZExtValue();
+ auto ScalarSize = Op.getScalarValueSizeInBits();
+ if (ScalarSize != 32) {
+ if ((VecIdx + 1) * ScalarSize > 32)
+ return std::nullopt;
+ Index = ScalarSize == 8 ? VecIdx : VecIdx * 2 + Index;
+ }
+
+ return calculateSrcByte(ScalarSize == 32 ? Op : Op.getOperand(0),
+ StartingIndex, Index);
+ }
+
+ case AMDGPUISD::PERM: {
+ auto PermMask = dyn_cast<ConstantSDNode>(Op->getOperand(2));
+ if (!PermMask)
+ return std::nullopt;
+
+ auto IdxMask =
+ (PermMask->getZExtValue() & (0xFF << (Index * 8))) >> (Index * 8);
+ if (IdxMask > 0x07 && IdxMask != 0x0c)
+ return std::nullopt;
+
+ auto NextOp = Op.getOperand(IdxMask > 0x03 ? 0 : 1);
+ auto NextIndex = IdxMask > 0x03 ? IdxMask % 4 : IdxMask;
+
+ return IdxMask != 0x0c ? calculateSrcByte(NextOp, StartingIndex, NextIndex)
+ : ByteProvider<SDValue>(
+ ByteProvider<SDValue>::getConstantZero());
+ }
+
default: {
return std::nullopt;
}
@@ -10630,7 +11468,8 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
}
// Returns true if the Operand is a scalar and is 16 bits
-static bool is16BitScalarOp(SDValue &Operand) {
+static bool isExtendedFrom16Bits(SDValue &Operand) {
+
switch (Operand.getOpcode()) {
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -10646,7 +11485,7 @@ static bool is16BitScalarOp(SDValue &Operand) {
auto MemVT = L->getMemoryVT();
return !MemVT.isVector() && MemVT.getSizeInBits() == 16;
}
- return false;
+ return L->getMemoryVT().getSizeInBits() == 16;
}
default:
return false;
@@ -10674,29 +11513,118 @@ static bool addresses16Bits(int Mask) {
// Do not lower into v_perm if the operands are actually 16 bit
// and the selected bits (based on PermMask) correspond with two
// easily addressable 16 bit operands.
-static bool hasEightBitAccesses(uint64_t PermMask, SDValue &Op,
+static bool hasNon16BitAccesses(uint64_t PermMask, SDValue &Op,
SDValue &OtherOp) {
int Low16 = PermMask & 0xffff;
int Hi16 = (PermMask & 0xffff0000) >> 16;
- // ByteProvider only accepts 32 bit operands
- assert(Op.getValueType().getSizeInBits() == 32);
- assert(OtherOp.getValueType().getSizeInBits() == 32);
+ assert(Op.getValueType().isByteSized());
+ assert(OtherOp.getValueType().isByteSized());
- auto OpIs16Bit = is16BitScalarOp(Op);
- auto OtherOpIs16Bit = is16BitScalarOp(Op);
+ auto TempOp = peekThroughBitcasts(Op);
+ auto TempOtherOp = peekThroughBitcasts(OtherOp);
- // If there is a size mismatch, then we must use masking on at least one
- // operand
- if (OpIs16Bit != OtherOpIs16Bit)
+ auto OpIs16Bit =
+ TempOtherOp.getValueSizeInBits() == 16 || isExtendedFrom16Bits(TempOp);
+ if (!OpIs16Bit)
return true;
- // If both operands are 16 bit, return whether or not we cleanly address both
- if (is16BitScalarOp(Op) && is16BitScalarOp(OtherOp))
- return !addresses16Bits(Low16) || !addresses16Bits(Hi16);
+ auto OtherOpIs16Bit = TempOtherOp.getValueSizeInBits() == 16 ||
+ isExtendedFrom16Bits(TempOtherOp);
+ if (!OtherOpIs16Bit)
+ return true;
- // Both are 32 bit operands
- return true;
+ // Do we cleanly address both
+ return !addresses16Bits(Low16) || !addresses16Bits(Hi16);
+}
+
+static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ // VT is known to be MVT::i32, so we need to provide 4 bytes.
+ SmallVector<ByteProvider<SDValue>, 8> PermNodes;
+ for (int i = 0; i < 4; i++) {
+ // Find the ByteProvider that provides the ith byte of the result of OR
+ std::optional<ByteProvider<SDValue>> P =
+ calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
+ // TODO support constantZero
+ if (!P || P->isConstantZero())
+ return SDValue();
+
+ PermNodes.push_back(*P);
+ }
+ if (PermNodes.size() != 4)
+ return SDValue();
+
+ int FirstSrc = 0;
+ std::optional<int> SecondSrc;
+ uint64_t PermMask = 0x00000000;
+ for (size_t i = 0; i < PermNodes.size(); i++) {
+ auto PermOp = PermNodes[i];
+ // Since the mask is applied to Src1:Src2, Src1 bytes must be offset
+ // by sizeof(Src2) = 4
+ int SrcByteAdjust = 4;
+
+ if (!PermOp.hasSameSrc(PermNodes[FirstSrc])) {
+ if (SecondSrc.has_value())
+ if (!PermOp.hasSameSrc(PermNodes[*SecondSrc]))
+ return SDValue();
+
+ // Set the index of the second distinct Src node
+ SecondSrc = i;
+ assert(!(PermNodes[*SecondSrc].Src->getValueSizeInBits() % 8));
+ SrcByteAdjust = 0;
+ }
+ assert(PermOp.SrcOffset + SrcByteAdjust < 8);
+ assert(!DAG.getDataLayout().isBigEndian());
+ PermMask |= (PermOp.SrcOffset + SrcByteAdjust) << (i * 8);
+ }
+
+ SDValue Op = *PermNodes[FirstSrc].Src;
+ SDValue OtherOp = SecondSrc.has_value() ? *PermNodes[*SecondSrc].Src
+ : *PermNodes[FirstSrc].Src;
+
+ // Check that we haven't just recreated the same FSHR node.
+ if (N->getOpcode() == ISD::FSHR &&
+ (N->getOperand(0) == Op || N->getOperand(0) == OtherOp) &&
+ (N->getOperand(1) == Op || N->getOperand(1) == OtherOp))
+ return SDValue();
+
+ // Check that we are not just extracting the bytes in order from an op
+ if (Op == OtherOp && Op.getValueSizeInBits() == 32) {
+ int Low16 = PermMask & 0xffff;
+ int Hi16 = (PermMask & 0xffff0000) >> 16;
+
+ bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
+ bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);
+
+ // The perm op would really just produce Op. So combine into Op
+ if (WellFormedLow && WellFormedHi)
+ return DAG.getBitcast(MVT::getIntegerVT(32), Op);
+ }
+
+ if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {
+ SDLoc DL(N);
+ assert(Op.getValueType().isByteSized() &&
+ OtherOp.getValueType().isByteSized());
+
+ // If the ultimate src is less than 32 bits, then we will only be
+ // using bytes 0: Op.getValueSizeInBytes() - 1 in the or.
+ // CalculateByteProvider would not have returned Op as source if we
+ // used a byte that is outside its ValueType. Thus, we are free to
+ // ANY_EXTEND as the extended bits are dont-cares.
+ Op = DAG.getBitcastedAnyExtOrTrunc(Op, DL, MVT::i32);
+ OtherOp = DAG.getBitcastedAnyExtOrTrunc(OtherOp, DL, MVT::i32);
+
+ return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op, OtherOp,
+ DAG.getConstant(PermMask, DL, MVT::i32));
+ }
+
+ return SDValue();
}
SDValue SITargetLowering::performOrCombine(SDNode *N,
@@ -10812,69 +11740,8 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
}
}
if (LHSMask == ~0u || RHSMask == ~0u) {
- SmallVector<ByteProvider<SDValue>, 8> PermNodes;
-
- // VT is known to be MVT::i32, so we need to provide 4 bytes.
- assert(VT == MVT::i32);
- for (int i = 0; i < 4; i++) {
- // Find the ByteProvider that provides the ith byte of the result of OR
- std::optional<ByteProvider<SDValue>> P =
- calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
- // TODO support constantZero
- if (!P || P->isConstantZero())
- return SDValue();
-
- PermNodes.push_back(*P);
- }
- if (PermNodes.size() != 4)
- return SDValue();
-
- int FirstSrc = 0;
- std::optional<int> SecondSrc;
- uint64_t permMask = 0x00000000;
- for (size_t i = 0; i < PermNodes.size(); i++) {
- auto PermOp = PermNodes[i];
- // Since the mask is applied to Src1:Src2, Src1 bytes must be offset
- // by sizeof(Src2) = 4
- int SrcByteAdjust = 4;
-
- if (!PermOp.hasSameSrc(PermNodes[FirstSrc])) {
- if (SecondSrc.has_value())
- if (!PermOp.hasSameSrc(PermNodes[*SecondSrc]))
- return SDValue();
- // Set the index of the second distinct Src node
- SecondSrc = i;
- assert(PermNodes[*SecondSrc].Src->getValueType().getSizeInBits() ==
- 32);
- SrcByteAdjust = 0;
- }
- assert(PermOp.SrcOffset + SrcByteAdjust < 8);
- assert(!DAG.getDataLayout().isBigEndian());
- permMask |= (PermOp.SrcOffset + SrcByteAdjust) << (i * 8);
- }
-
- SDValue Op = *PermNodes[FirstSrc].Src;
- SDValue OtherOp = SecondSrc.has_value() ? *PermNodes[*SecondSrc].Src
- : *PermNodes[FirstSrc].Src;
-
- // Check that we are not just extracting the bytes in order from an op
- if (Op == OtherOp) {
- int Low16 = permMask & 0xffff;
- int Hi16 = (permMask & 0xffff0000) >> 16;
-
- bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
- bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);
-
- // The perm op would really just produce Op. So combine into Op
- if (WellFormedLow && WellFormedHi)
- return Op;
- }
-
- if (hasEightBitAccesses(permMask, Op, OtherOp)) {
- SDLoc DL(N);
- return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op, OtherOp,
- DAG.getConstant(permMask, DL, MVT::i32));
- }
+ if (SDValue Perm = matchPERM(N, DCI))
+ return Perm;
}
}
@@ -11021,10 +11888,8 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
SDValue Mask = N->getOperand(1);
// fp_class x, 0 -> false
- if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
- if (CMask->isZero())
- return DAG.getConstant(0, SDLoc(N), MVT::i1);
- }
+ if (isNullConstant(Mask))
+ return DAG.getConstant(0, SDLoc(N), MVT::i1);
if (N->getOperand(0).isUndef())
return DAG.getUNDEF(MVT::i1);
@@ -11049,7 +11914,9 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
N->getFlags());
}
- if ((VT == MVT::f32 || VT == MVT::f16) && N0.getOpcode() == ISD::FSQRT) {
+ // TODO: Could handle f32 + amdgcn.sqrt but probably never reaches here.
+ if ((VT == MVT::f16 && N0.getOpcode() == ISD::FSQRT) &&
+ N->getFlags().hasAllowContract() && N0->getFlags().hasAllowContract()) {
return DCI.DAG.getNode(AMDGPUISD::RSQ, SDLoc(N), VT,
N0.getOperand(0), N->getFlags());
}
@@ -11131,10 +11998,14 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case ISD::FMAXNUM:
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case AMDGPUISD::CLAMP:
case AMDGPUISD::FMED3:
case AMDGPUISD::FMAX3:
- case AMDGPUISD::FMIN3: {
+ case AMDGPUISD::FMIN3:
+ case AMDGPUISD::FMAXIMUM3:
+ case AMDGPUISD::FMINIMUM3: {
// FIXME: Shouldn't treat the generic operations different based these.
// However, we aren't really required to flush the result from
// minnum/maxnum..
@@ -11288,7 +12159,9 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
case AMDGPU::G_FMINNUM:
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
- case AMDGPU::G_FMAXNUM_IEEE: {
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_FMINIMUM:
+ case AMDGPU::G_FMAXIMUM: {
if (Subtarget->supportsMinMaxDenormModes() ||
// FIXME: denormalsEnabledForType is broken for dynamic
denormalsEnabledForType(MRI.getType(Reg), MF))
@@ -11302,7 +12175,8 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
return false;
return true;
case AMDGPU::G_INTRINSIC:
- switch (MI->getIntrinsicID()) {
+ case AMDGPU::G_INTRINSIC_CONVERGENT:
+ switch (cast<GIntrinsic>(MI)->getIntrinsicID()) {
case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_fmad_ftz:
case Intrinsic::amdgcn_sqrt:
@@ -11321,7 +12195,6 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
case Intrinsic::amdgcn_div_fmas:
case Intrinsic::amdgcn_div_fixup:
case Intrinsic::amdgcn_fract:
- case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_cvt_pkrtz:
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
@@ -11476,6 +12349,8 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
case ISD::FMAXNUM:
case ISD::FMAXNUM_IEEE:
return AMDGPUISD::FMAX3;
+ case ISD::FMAXIMUM:
+ return AMDGPUISD::FMAXIMUM3;
case ISD::SMAX:
return AMDGPUISD::SMAX3;
case ISD::UMAX:
@@ -11483,6 +12358,8 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
case ISD::FMINNUM:
case ISD::FMINNUM_IEEE:
return AMDGPUISD::FMIN3;
+ case ISD::FMINIMUM:
+ return AMDGPUISD::FMINIMUM3;
case ISD::SMIN:
return AMDGPUISD::SMIN3;
case ISD::UMIN:
@@ -11842,7 +12719,9 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
case ISD::FMAXNUM:
case ISD::FMINNUM:
case ISD::FMAXNUM_IEEE:
- case ISD::FMINNUM_IEEE: {
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM: {
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
Vec.getOperand(0), Idx);
SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
@@ -12203,6 +13082,256 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
return Accum;
}
+// Collect the ultimate src of each of the mul node's operands, and confirm
+// each operand is 8 bytes.
+static std::optional<ByteProvider<SDValue>>
+handleMulOperand(const SDValue &MulOperand) {
+ auto Byte0 = calculateByteProvider(MulOperand, 0, 0);
+ if (!Byte0 || Byte0->isConstantZero()) {
+ return std::nullopt;
+ }
+ auto Byte1 = calculateByteProvider(MulOperand, 1, 0);
+ if (Byte1 && !Byte1->isConstantZero()) {
+ return std::nullopt;
+ }
+ return Byte0;
+}
+
+static unsigned addPermMasks(unsigned First, unsigned Second) {
+ unsigned FirstCs = First & 0x0c0c0c0c;
+ unsigned SecondCs = Second & 0x0c0c0c0c;
+ unsigned FirstNoCs = First & ~0x0c0c0c0c;
+ unsigned SecondNoCs = Second & ~0x0c0c0c0c;
+
+ assert((FirstCs & 0xFF) | (SecondCs & 0xFF));
+ assert((FirstCs & 0xFF00) | (SecondCs & 0xFF00));
+ assert((FirstCs & 0xFF0000) | (SecondCs & 0xFF0000));
+ assert((FirstCs & 0xFF000000) | (SecondCs & 0xFF000000));
+
+ return (FirstNoCs | SecondNoCs) | (FirstCs & SecondCs);
+}
+
+static void placeSources(ByteProvider<SDValue> &Src0,
+ ByteProvider<SDValue> &Src1,
+ SmallVectorImpl<std::pair<SDValue, unsigned>> &Src0s,
+ SmallVectorImpl<std::pair<SDValue, unsigned>> &Src1s,
+ int Step) {
+
+ assert(Src0.Src.has_value() && Src1.Src.has_value());
+ // Src0s and Src1s are empty, just place arbitrarily.
+ if (Step == 0) {
+ Src0s.push_back({*Src0.Src, (Src0.SrcOffset << 24) + 0x0c0c0c});
+ Src1s.push_back({*Src1.Src, (Src1.SrcOffset << 24) + 0x0c0c0c});
+ return;
+ }
+
+ for (int BPI = 0; BPI < 2; BPI++) {
+ std::pair<ByteProvider<SDValue>, ByteProvider<SDValue>> BPP = {Src0, Src1};
+ if (BPI == 1) {
+ BPP = {Src1, Src0};
+ }
+ unsigned ZeroMask = 0x0c0c0c0c;
+ unsigned FMask = 0xFF << (8 * (3 - Step));
+
+ unsigned FirstMask =
+ BPP.first.SrcOffset << (8 * (3 - Step)) | (ZeroMask & ~FMask);
+ unsigned SecondMask =
+ BPP.second.SrcOffset << (8 * (3 - Step)) | (ZeroMask & ~FMask);
+ // Attempt to find Src vector which contains our SDValue, if so, add our
+ // perm mask to the existing one. If we are unable to find a match for the
+ // first SDValue, attempt to find match for the second.
+ int FirstGroup = -1;
+ for (int I = 0; I < 2; I++) {
+ SmallVectorImpl<std::pair<SDValue, unsigned>> &Srcs =
+ I == 0 ? Src0s : Src1s;
+ auto MatchesFirst = [&BPP](std::pair<SDValue, unsigned> IterElt) {
+ return IterElt.first == *BPP.first.Src;
+ };
+
+ auto Match = llvm::find_if(Srcs, MatchesFirst);
+ if (Match != Srcs.end()) {
+ Match->second = addPermMasks(FirstMask, Match->second);
+ FirstGroup = I;
+ break;
+ }
+ }
+ if (FirstGroup != -1) {
+ SmallVectorImpl<std::pair<SDValue, unsigned>> &Srcs =
+ FirstGroup == 1 ? Src0s : Src1s;
+ auto MatchesSecond = [&BPP](std::pair<SDValue, unsigned> IterElt) {
+ return IterElt.first == *BPP.second.Src;
+ };
+ auto Match = llvm::find_if(Srcs, MatchesSecond);
+ if (Match != Srcs.end()) {
+ Match->second = addPermMasks(SecondMask, Match->second);
+ } else
+ Srcs.push_back({*BPP.second.Src, SecondMask});
+ return;
+ }
+ }
+
+ // If we have made it here, then we could not find a match in Src0s or Src1s
+ // for either Src0 or Src1, so just place them arbitrarily.
+
+ unsigned ZeroMask = 0x0c0c0c0c;
+ unsigned FMask = 0xFF << (8 * (3 - Step));
+
+ Src0s.push_back(
+ {*Src0.Src, (Src0.SrcOffset << (8 * (3 - Step)) | (ZeroMask & ~FMask))});
+ Src1s.push_back(
+ {*Src1.Src, (Src1.SrcOffset << (8 * (3 - Step)) | (ZeroMask & ~FMask))});
+
+ return;
+}
+
+static SDValue
+resolveSources(SelectionDAG &DAG, SDLoc SL,
+ SmallVectorImpl<std::pair<SDValue, unsigned>> &Srcs,
+ bool IsSigned, bool IsAny) {
+
+ // If we just have one source, just permute it accordingly.
+ if (Srcs.size() == 1) {
+ auto Elt = Srcs.begin();
+ auto EltVal = DAG.getBitcastedAnyExtOrTrunc(Elt->first, SL, MVT::i32);
+
+ // v_perm will produce the original value.
+ if (Elt->second == 0x3020100)
+ return EltVal;
+
+ return DAG.getNode(AMDGPUISD::PERM, SL, MVT::i32, EltVal, EltVal,
+ DAG.getConstant(Elt->second, SL, MVT::i32));
+ }
+
+ auto FirstElt = Srcs.begin();
+ auto SecondElt = std::next(FirstElt);
+
+ SmallVector<SDValue, 2> Perms;
+
+ // If we have multiple sources in the chain, combine them via perms (using
+ // calculated perm mask) and Ors.
+ while (true) {
+ auto FirstMask = FirstElt->second;
+ auto SecondMask = SecondElt->second;
+
+ unsigned FirstCs = FirstMask & 0x0c0c0c0c;
+ unsigned FirstPlusFour = FirstMask | 0x04040404;
+ // 0x0c + 0x04 = 0x10, so anding with 0x0F will produced 0x00 for any
+ // original 0x0C.
+ FirstMask = (FirstPlusFour & 0x0F0F0F0F) | FirstCs;
+
+ auto PermMask = addPermMasks(FirstMask, SecondMask);
+ auto FirstVal =
+ DAG.getBitcastedAnyExtOrTrunc(FirstElt->first, SL, MVT::i32);
+ auto SecondVal =
+ DAG.getBitcastedAnyExtOrTrunc(SecondElt->first, SL, MVT::i32);
+
+ Perms.push_back(DAG.getNode(AMDGPUISD::PERM, SL, MVT::i32, FirstVal,
+ SecondVal,
+ DAG.getConstant(PermMask, SL, MVT::i32)));
+
+ FirstElt = std::next(SecondElt);
+ if (FirstElt == Srcs.end())
+ break;
+
+ SecondElt = std::next(FirstElt);
+ // If we only have a FirstElt, then just combine that into the cumulative
+ // source node.
+ if (SecondElt == Srcs.end()) {
+ auto EltVal =
+ DAG.getBitcastedAnyExtOrTrunc(FirstElt->first, SL, MVT::i32);
+
+ Perms.push_back(
+ DAG.getNode(AMDGPUISD::PERM, SL, MVT::i32, EltVal, EltVal,
+ DAG.getConstant(FirstElt->second, SL, MVT::i32)));
+ break;
+ }
+ }
+
+ assert(Perms.size() == 1 || Perms.size() == 2);
+ return Perms.size() == 2
+ ? DAG.getNode(ISD::OR, SL, MVT::i32, Perms[0], Perms[1])
+ : Perms[0];
+}
+
+static void fixMasks(SmallVectorImpl<std::pair<SDValue, unsigned>> &Srcs,
+ unsigned ChainLength) {
+ for (auto &[EntryVal, EntryMask] : Srcs) {
+ EntryMask = EntryMask >> ((4 - ChainLength) * 8);
+ auto ZeroMask = ChainLength == 2 ? 0x0c0c0000 : 0x0c000000;
+ EntryMask += ZeroMask;
+ }
+}
+
+static bool isMul(const SDValue Op) {
+ auto Opcode = Op.getOpcode();
+
+ return (Opcode == ISD::MUL || Opcode == AMDGPUISD::MUL_U24 ||
+ Opcode == AMDGPUISD::MUL_I24);
+}
+
+static std::optional<bool>
+checkDot4MulSignedness(const SDValue &N, ByteProvider<SDValue> &Src0,
+ ByteProvider<SDValue> &Src1, const SDValue &S0Op,
+ const SDValue &S1Op, const SelectionDAG &DAG) {
+ // If we both ops are i8s (pre legalize-dag), then the signedness semantics
+ // of the dot4 is irrelevant.
+ if (S0Op.getValueSizeInBits() == 8 && S1Op.getValueSizeInBits() == 8)
+ return false;
+
+ auto Known0 = DAG.computeKnownBits(S0Op, 0);
+ bool S0IsUnsigned = Known0.countMinLeadingZeros() > 0;
+ bool S0IsSigned = Known0.countMinLeadingOnes() > 0;
+ auto Known1 = DAG.computeKnownBits(S1Op, 0);
+ bool S1IsUnsigned = Known1.countMinLeadingZeros() > 0;
+ bool S1IsSigned = Known1.countMinLeadingOnes() > 0;
+
+ assert(!(S0IsUnsigned && S0IsSigned));
+ assert(!(S1IsUnsigned && S1IsSigned));
+
+ // There are 9 possible permutations of
+ // {S0IsUnsigned, S0IsSigned, S1IsUnsigned, S1IsSigned}
+
+ // In two permutations, the sign bits are known to be the same for both Ops,
+ // so simply return Signed / Unsigned corresponding to the MSB
+
+ if ((S0IsUnsigned && S1IsUnsigned) || (S0IsSigned && S1IsSigned))
+ return S0IsSigned;
+
+ // In another two permutations, the sign bits are known to be opposite. In
+ // this case return std::nullopt to indicate a bad match.
+
+ if ((S0IsUnsigned && S1IsSigned) || (S0IsSigned && S1IsUnsigned))
+ return std::nullopt;
+
+ // In the remaining five permutations, we don't know the value of the sign
+ // bit for at least one Op. Since we have a valid ByteProvider, we know that
+ // the upper bits must be extension bits. Thus, the only ways for the sign
+ // bit to be unknown is if it was sign extended from unknown value, or if it
+ // was any extended. In either case, it is correct to use the signed
+ // version of the signedness semantics of dot4
+
+ // In two of such permutations, we known the sign bit is set for
+ // one op, and the other is unknown. It is okay to used signed version of
+ // dot4.
+ if ((S0IsSigned && !(S1IsSigned || S1IsUnsigned)) ||
+ ((S1IsSigned && !(S0IsSigned || S0IsUnsigned))))
+ return true;
+
+ // In one such permutation, we don't know either of the sign bits. It is okay
+ // to used the signed version of dot4.
+ if ((!(S1IsSigned || S1IsUnsigned) && !(S0IsSigned || S0IsUnsigned)))
+ return true;
+
+ // In two of such permutations, we known the sign bit is unset for
+ // one op, and the other is unknown. Return std::nullopt to indicate a
+ // bad match.
+ if ((S0IsUnsigned && !(S1IsSigned || S1IsUnsigned)) ||
+ ((S1IsUnsigned && !(S0IsSigned || S0IsUnsigned))))
+ return std::nullopt;
+
+ llvm_unreachable("Fully covered condition");
+}
+
SDValue SITargetLowering::performAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -12216,14 +13345,146 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
if (SDValue Folded = tryFoldToMad64_32(N, DCI))
return Folded;
}
-
- return SDValue();
}
if (SDValue V = reassociateScalarOps(N, DAG)) {
return V;
}
+ if ((isMul(LHS) || isMul(RHS)) && Subtarget->hasDot7Insts() &&
+ (Subtarget->hasDot1Insts() || Subtarget->hasDot8Insts())) {
+ SDValue TempNode(N, 0);
+ std::optional<bool> IsSigned;
+ SmallVector<std::pair<SDValue, unsigned>, 4> Src0s;
+ SmallVector<std::pair<SDValue, unsigned>, 4> Src1s;
+ SmallVector<SDValue, 4> Src2s;
+
+ // Match the v_dot4 tree, while collecting src nodes.
+ int ChainLength = 0;
+ for (int I = 0; I < 4; I++) {
+ auto MulIdx = isMul(LHS) ? 0 : isMul(RHS) ? 1 : -1;
+ if (MulIdx == -1)
+ break;
+ auto Src0 = handleMulOperand(TempNode->getOperand(MulIdx)->getOperand(0));
+ if (!Src0)
+ break;
+ auto Src1 = handleMulOperand(TempNode->getOperand(MulIdx)->getOperand(1));
+ if (!Src1)
+ break;
+
+ auto IterIsSigned = checkDot4MulSignedness(
+ TempNode->getOperand(MulIdx), *Src0, *Src1,
+ TempNode->getOperand(MulIdx)->getOperand(0),
+ TempNode->getOperand(MulIdx)->getOperand(1), DAG);
+ if (!IterIsSigned)
+ break;
+ if (!IsSigned)
+ IsSigned = *IterIsSigned;
+ if (*IterIsSigned != *IsSigned)
+ break;
+ placeSources(*Src0, *Src1, Src0s, Src1s, I);
+ auto AddIdx = 1 - MulIdx;
+ // Allow the special case where add (add (mul24, 0), mul24) became ->
+ // add (mul24, mul24).
+ if (I == 2 && isMul(TempNode->getOperand(AddIdx))) {
+ Src2s.push_back(TempNode->getOperand(AddIdx));
+ auto Src0 =
+ handleMulOperand(TempNode->getOperand(AddIdx)->getOperand(0));
+ if (!Src0)
+ break;
+ auto Src1 =
+ handleMulOperand(TempNode->getOperand(AddIdx)->getOperand(1));
+ if (!Src1)
+ break;
+ auto IterIsSigned = checkDot4MulSignedness(
+ TempNode->getOperand(AddIdx), *Src0, *Src1,
+ TempNode->getOperand(AddIdx)->getOperand(0),
+ TempNode->getOperand(AddIdx)->getOperand(1), DAG);
+ if (!IterIsSigned)
+ break;
+ assert(IsSigned);
+ if (*IterIsSigned != *IsSigned)
+ break;
+ placeSources(*Src0, *Src1, Src0s, Src1s, I + 1);
+ Src2s.push_back(DAG.getConstant(0, SL, MVT::i32));
+ ChainLength = I + 2;
+ break;
+ }
+
+ TempNode = TempNode->getOperand(AddIdx);
+ Src2s.push_back(TempNode);
+ ChainLength = I + 1;
+ if (TempNode->getNumOperands() < 2)
+ break;
+ LHS = TempNode->getOperand(0);
+ RHS = TempNode->getOperand(1);
+ }
+
+ if (ChainLength < 2)
+ return SDValue();
+
+ // Masks were constructed with assumption that we would find a chain of
+ // length 4. If not, then we need to 0 out the MSB bits (via perm mask of
+ // 0x0c) so they do not affect dot calculation.
+ if (ChainLength < 4) {
+ fixMasks(Src0s, ChainLength);
+ fixMasks(Src1s, ChainLength);
+ }
+
+ SDValue Src0, Src1;
+
+ // If we are just using a single source for both, and have permuted the
+ // bytes consistently, we can just use the sources without permuting
+ // (commutation).
+ bool UseOriginalSrc = false;
+ if (ChainLength == 4 && Src0s.size() == 1 && Src1s.size() == 1 &&
+ Src0s.begin()->second == Src1s.begin()->second &&
+ Src0s.begin()->first.getValueSizeInBits() == 32 &&
+ Src1s.begin()->first.getValueSizeInBits() == 32) {
+ SmallVector<unsigned, 4> SrcBytes;
+ auto Src0Mask = Src0s.begin()->second;
+ SrcBytes.push_back(Src0Mask & 0xFF000000);
+ bool UniqueEntries = true;
+ for (auto I = 1; I < 4; I++) {
+ auto NextByte = Src0Mask & (0xFF << ((3 - I) * 8));
+
+ if (is_contained(SrcBytes, NextByte)) {
+ UniqueEntries = false;
+ break;
+ }
+ SrcBytes.push_back(NextByte);
+ }
+
+ if (UniqueEntries) {
+ UseOriginalSrc = true;
+ // Must be 32 bits to enter above conditional.
+ assert(Src0s.begin()->first.getValueSizeInBits() == 32);
+ assert(Src1s.begin()->first.getValueSizeInBits() == 32);
+ Src0 = DAG.getBitcast(MVT::getIntegerVT(32), Src0s.begin()->first);
+ Src1 = DAG.getBitcast(MVT::getIntegerVT(32), Src1s.begin()->first);
+ }
+ }
+
+ if (!UseOriginalSrc) {
+ Src0 = resolveSources(DAG, SL, Src0s, false, true);
+ Src1 = resolveSources(DAG, SL, Src1s, false, true);
+ }
+
+ assert(IsSigned);
+ SDValue Src2 =
+ DAG.getExtOrTrunc(*IsSigned, Src2s[ChainLength - 1], SL, MVT::i32);
+
+ SDValue IID = DAG.getTargetConstant(*IsSigned ? Intrinsic::amdgcn_sdot4
+ : Intrinsic::amdgcn_udot4,
+ SL, MVT::i64);
+
+ assert(!VT.isVector());
+ auto Dot = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, MVT::i32, IID, Src0,
+ Src1, Src2, DAG.getTargetConstant(0, SL, MVT::i1));
+
+ return DAG.getExtOrTrunc(*IsSigned, Dot, SL, VT);
+ }
+
if (VT != MVT::i32 || !DCI.isAfterLegalizeDAG())
return SDValue();
@@ -12295,8 +13556,7 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
if (LHS.getOpcode() == ISD::USUBO_CARRY) {
// sub (usubo_carry x, 0, cc), y => usubo_carry x, y, cc
- auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
- if (!C || !C->isZero())
+ if (!isNullConstant(LHS.getOperand(1)))
return SDValue();
SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
return DAG.getNode(ISD::USUBO_CARRY, SDLoc(N), LHS->getVTList(), Args);
@@ -12417,6 +13677,41 @@ SDValue SITargetLowering::performFSubCombine(SDNode *N,
return SDValue();
}
+SDValue SITargetLowering::performFDivCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::f16 || !Subtarget->has16BitInsts())
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ SDNodeFlags Flags = N->getFlags();
+ SDNodeFlags RHSFlags = RHS->getFlags();
+ if (!Flags.hasAllowContract() || !RHSFlags.hasAllowContract() ||
+ !RHS->hasOneUse())
+ return SDValue();
+
+ if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
+ bool IsNegative = false;
+ if (CLHS->isExactlyValue(1.0) ||
+ (IsNegative = CLHS->isExactlyValue(-1.0))) {
+ // fdiv contract 1.0, (sqrt contract x) -> rsq for f16
+ // fdiv contract -1.0, (sqrt contract x) -> fneg(rsq) for f16
+ if (RHS.getOpcode() == ISD::FSQRT) {
+ // TODO: Or in RHS flags, somehow missing from SDNodeFlags
+ SDValue Rsq =
+ DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0), Flags);
+ return IsNegative ? DAG.getNode(ISD::FNEG, SL, VT, Rsq, Flags) : Rsq;
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue SITargetLowering::performFMACombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -12666,7 +13961,7 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
return SDValue();
switch (N->getOpcode()) {
case ISD::ADD:
@@ -12680,12 +13975,16 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performFAddCombine(N, DCI);
case ISD::FSUB:
return performFSubCombine(N, DCI);
+ case ISD::FDIV:
+ return performFDivCombine(N, DCI);
case ISD::SETCC:
return performSetCCCombine(N, DCI);
case ISD::FMAXNUM:
case ISD::FMINNUM:
case ISD::FMAXNUM_IEEE:
case ISD::FMINNUM_IEEE:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
@@ -12699,6 +13998,14 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performAndCombine(N, DCI);
case ISD::OR:
return performOrCombine(N, DCI);
+ case ISD::FSHR: {
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+ if (N->getValueType(0) == MVT::i32 && N->isDivergent() &&
+ TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
+ return matchPERM(N, DCI);
+ }
+ break;
+ }
case ISD::XOR:
return performXorCombine(N, DCI);
case ISD::ZERO_EXTEND:
@@ -12793,7 +14100,7 @@ static unsigned SubIdx2Lane(unsigned Idx) {
}
}
-/// Adjust the writemask of MIMG instructions
+/// Adjust the writemask of MIMG, VIMAGE or VSAMPLE instructions
SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
SelectionDAG &DAG) const {
unsigned Opcode = Node->getMachineOpcode();
@@ -12811,7 +14118,7 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
unsigned TFEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::tfe) - 1;
unsigned LWEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::lwe) - 1;
bool UsesTFC = ((int(TFEIdx) >= 0 && Node->getConstantOperandVal(TFEIdx)) ||
- Node->getConstantOperandVal(LWEIdx))
+ (int(LWEIdx) >= 0 && Node->getConstantOperandVal(LWEIdx)))
? true
: false;
unsigned TFCLane = 0;
@@ -12943,7 +14250,11 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
continue;
} else {
SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
- DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
+ SDNode *NewUser = DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
+ if (NewUser != User) {
+ DAG.ReplaceAllUsesWith(SDValue(User, 0), SDValue(NewUser, 0));
+ DAG.RemoveDeadNode(User);
+ }
}
switch (Idx) {
@@ -13019,7 +14330,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
unsigned Opcode = Node->getMachineOpcode();
- if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
+ if (TII->isImage(Opcode) && !TII->get(Opcode).mayStore() &&
!TII->isGather4(Opcode) &&
AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::dmask)) {
return adjustWritemask(Node, DAG);
@@ -13106,7 +14417,7 @@ void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
return;
unsigned TFEVal = TFE ? TFE->getImm() : 0;
- unsigned LWEVal = LWE->getImm();
+ unsigned LWEVal = LWE ? LWE->getImm() : 0;
unsigned D16Val = D16 ? D16->getImm() : 0;
if (!TFEVal && !LWEVal)
@@ -13183,7 +14494,9 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
- MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
if (TII->isVOP3(MI.getOpcode())) {
// Make sure constant bus requirements are respected.
@@ -13194,11 +14507,16 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// use between vgpr and agpr as agpr tuples tend to be big.
if (!MI.getDesc().operands().empty()) {
unsigned Opc = MI.getOpcode();
+ bool HasAGPRs = Info->mayNeedAGPRs();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
- for (auto I : { AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) }) {
+ int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ for (auto I :
+ {AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) {
if (I == -1)
break;
+ if ((I == Src2Idx) && (HasAGPRs))
+ break;
MachineOperand &Op = MI.getOperand(I);
if (!Op.isReg() || !Op.getReg().isVirtual())
continue;
@@ -13216,6 +14534,9 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
MRI.setRegClass(Op.getReg(), NewRC);
}
+ if (!HasAGPRs)
+ return;
+
// Resolve the rest of AV operands to AGPRs.
if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
if (Src2->isReg() && Src2->getReg().isVirtual()) {
@@ -13233,7 +14554,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
return;
}
- if (TII->isMIMG(MI)) {
+ if (TII->isImage(MI)) {
if (!MI.mayStore())
AddIMGInit(MI);
TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::vaddr);
@@ -13377,7 +14698,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
return std::pair(0U, RC);
}
- if (Constraint.startswith("{") && Constraint.endswith("}")) {
+ if (Constraint.starts_with("{") && Constraint.ends_with("}")) {
StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
if (RegName.consume_front("v")) {
RC = &AMDGPU::VGPR_32RegClass;
@@ -13467,7 +14788,7 @@ static uint64_t clearUnusedBits(uint64_t Val, unsigned Size) {
}
void SITargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
if (isImmConstraint(Constraint)) {
@@ -13516,8 +14837,7 @@ bool SITargetLowering::getAsmOperandConstVal(SDValue Op, uint64_t &Val) const {
return false;
}
-bool SITargetLowering::checkAsmConstraintVal(SDValue Op,
- const std::string &Constraint,
+bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
uint64_t Val) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
@@ -13735,8 +15055,9 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
const MachineRegisterInfo &MRI, unsigned Depth) const {
const MachineInstr *MI = MRI.getVRegDef(R);
switch (MI->getOpcode()) {
- case AMDGPU::G_INTRINSIC: {
- switch (MI->getIntrinsicID()) {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_CONVERGENT: {
+ switch (cast<GIntrinsic>(MI)->getIntrinsicID()) {
case Intrinsic::amdgcn_workitem_id_x:
knownBitsForWorkitemID(*getSubtarget(), KB, Known, 0);
break;
@@ -13801,21 +15122,16 @@ Align SITargetLowering::computeKnownAlignForTargetInstr(
GISelKnownBits &KB, Register R, const MachineRegisterInfo &MRI,
unsigned Depth) const {
const MachineInstr *MI = MRI.getVRegDef(R);
- switch (MI->getOpcode()) {
- case AMDGPU::G_INTRINSIC:
- case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
+ if (auto *GI = dyn_cast<GIntrinsic>(MI)) {
// FIXME: Can this move to generic code? What about the case where the call
// site specifies a lower alignment?
- Intrinsic::ID IID = MI->getIntrinsicID();
+ Intrinsic::ID IID = GI->getIntrinsicID();
LLVMContext &Ctx = KB.getMachineFunction().getFunction().getContext();
AttributeList Attrs = Intrinsic::getAttributes(Ctx, IID);
if (MaybeAlign RetAlign = Attrs.getRetAlignment())
return *RetAlign;
- return Align(1);
- }
- default:
- return Align(1);
}
+ return Align(1);
}
Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1745c0b9e88e..5bc091d6e84d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -109,6 +109,8 @@ private:
SDValue LowerFFREXP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFSQRTF16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
@@ -212,13 +214,15 @@ private:
SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- bool isLegalFlatAddressingMode(const AddrMode &AM) const;
+ bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace,
+ uint64_t FlatVariant) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
unsigned isCFIntrinsic(const SDNode *Intr) const;
@@ -409,6 +413,10 @@ public:
SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
@@ -463,13 +471,11 @@ public:
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
- void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const;
- bool checkAsmConstraintVal(SDValue Op,
- const std::string &Constraint,
+ bool checkAsmConstraintVal(SDValue Op, StringRef Constraint,
uint64_t Val) const;
bool checkAsmConstraintValA(SDValue Op,
uint64_t Val,
@@ -543,6 +549,17 @@ public:
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const;
+ void allocatePreloadKernArgSGPRs(CCState &CCInfo,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
+
+ void allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
+
void allocateSystemSGPRs(CCState &CCInfo,
MachineFunction &MF,
SIMachineFunctionInfo &Info,
@@ -572,6 +589,10 @@ public:
getTargetMMOFlags(const Instruction &I) const override;
};
+// Returns true if argument is a boolean value which is not serialized into
+// memory or argument and does not require v_cndmask_b32 to be deserialized.
+bool isBoolSGPR(SDValue V);
+
} // End namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 50f8ad4433c6..442ae4dd7b34 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -232,7 +232,10 @@ public:
// scheduler it limits the size of the cluster to avoid increasing
// register pressure too much, but this pass runs after register
// allocation so there is no need for that kind of limit.
- !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
+ // We also lie about the Offset and OffsetIsScalable parameters,
+ // as they aren't used in the SIInstrInfo implementation.
+ !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
+ 2, 2)))) {
// Finish the current clause.
Changed |= emitClause(CI, SII);
CI = ClauseInfo();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index a74b917f82bf..8415a3d77d3b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -292,6 +292,11 @@ public:
VgprVmemTypes[GprNo] = 0;
}
+ void setNonKernelFunctionInitialState() {
+ setScoreUB(VS_CNT, getWaitCountMax(VS_CNT));
+ PendingEvents |= WaitEventMaskForInst[VS_CNT];
+ }
+
void print(raw_ostream &);
void dump() { print(dbgs()); }
@@ -364,7 +369,6 @@ private:
const MachineRegisterInfo *MRI = nullptr;
AMDGPU::IsaVersion IV;
- DenseSet<MachineInstr *> TrackedWaitcntSet;
DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
DenseMap<MachineBasicBlock *, bool> PreheadersToFlush;
MachineLoopInfo *MLI;
@@ -452,7 +456,9 @@ public:
// FLAT instruction.
WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
assert(SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLAT(Inst));
- if (!ST->hasVscnt())
+ // LDS DMA loads are also stores, but on the LDS side. On the VMEM side
+ // these should use VM_CNT.
+ if (!ST->hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst))
return VMEM_ACCESS;
if (Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst)) {
// FLAT and SCRATCH instructions may access scratch. Other VMEM
@@ -486,6 +492,9 @@ public:
MachineInstr &OldWaitcntInstr,
AMDGPU::Waitcnt &Wait,
MachineBasicBlock::instr_iterator It) const;
+
+ // Transform a soft waitcnt into a normal one.
+ bool promoteSoftWaitCnt(MachineInstr *Waitcnt) const;
};
} // end anonymous namespace
@@ -505,7 +514,8 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
RegInterval Result;
- unsigned Reg = TRI->getEncodingValue(AMDGPU::getMCReg(Op.getReg(), *ST));
+ unsigned Reg = TRI->getEncodingValue(AMDGPU::getMCReg(Op.getReg(), *ST)) &
+ AMDGPU::HWEncoding::REG_IDX_MASK;
if (TRI->isVectorRegister(*MRI, Op.getReg())) {
assert(Reg >= Encoding.VGPR0 && Reg <= Encoding.VGPRL);
@@ -543,14 +553,6 @@ void WaitcntBrackets::setExpScore(const MachineInstr *MI,
}
}
-// MUBUF and FLAT LDS DMA operations need a wait on vmcnt before LDS written
-// can be accessed. A load from LDS to VMEM does not need a wait.
-static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
- return SIInstrInfo::isVALU(MI) &&
- (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI)) &&
- MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
-}
-
void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
const MachineRegisterInfo *MRI,
@@ -590,12 +592,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
AMDGPU::OpName::data1),
CurrScore);
}
- } else if (SIInstrInfo::isAtomicRet(Inst) &&
- Inst.getOpcode() != AMDGPU::DS_GWS_INIT &&
- Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_V &&
- Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_BR &&
- Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_P &&
- Inst.getOpcode() != AMDGPU::DS_GWS_BARRIER &&
+ } else if (SIInstrInfo::isAtomicRet(Inst) && !SIInstrInfo::isGWS(Inst) &&
Inst.getOpcode() != AMDGPU::DS_APPEND &&
Inst.getOpcode() != AMDGPU::DS_CONSUME &&
Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
@@ -683,7 +680,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore);
}
#endif
- } else {
+ } else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
// Match the score to the destination registers.
for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
auto &Op = Inst.getOperand(I);
@@ -694,6 +691,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
if (Interval.first >= NUM_ALL_VGPRS)
continue;
if (updateVMCntOnly(Inst)) {
+ // updateVMCntOnly should only leave us with VGPRs
+ // MUBUF, MTBUF, MIMG, FlatGlobal, and FlatScratch only have VGPR/AGPR
+ // defs. That's required for a sane index into `VgprMemTypes` below
+ assert(TRI->isVectorRegister(*MRI, Op.getReg()));
VmemType V = getVmemType(Inst);
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo)
VgprVmemTypes[RegNo] |= 1 << V;
@@ -703,7 +704,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
setRegScore(RegNo, T, CurrScore);
}
}
- if (Inst.mayStore() && (TII->isDS(Inst) || mayWriteLDSThroughDMA(Inst))) {
+ if (Inst.mayStore() &&
+ (TII->isDS(Inst) || TII->mayWriteLDSThroughDMA(Inst))) {
+ // MUBUF and FLAT LDS DMA operations need a wait on vmcnt before LDS
+ // written can be accessed. A load from LDS to VMEM does not need a wait.
setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, CurrScore);
}
}
@@ -870,6 +874,15 @@ static bool updateOperandIfDifferent(MachineInstr &MI, uint16_t OpName,
return true;
}
+bool SIInsertWaitcnts::promoteSoftWaitCnt(MachineInstr *Waitcnt) const {
+ unsigned Opcode = Waitcnt->getOpcode();
+ if (!SIInstrInfo::isSoftWaitcnt(Opcode))
+ return false;
+
+ Waitcnt->setDesc(TII->get(SIInstrInfo::getNonSoftWaitcntOpcode(Opcode)));
+ return true;
+}
+
/// Combine consecutive waitcnt instructions that precede \p It and follow
/// \p OldWaitcntInstr and apply any extra wait from waitcnt that were added
/// by previous passes. Currently this pass conservatively assumes that these
@@ -886,86 +899,77 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(
if (II.isMetaInstruction())
continue;
- if (II.getOpcode() == AMDGPU::S_WAITCNT) {
- // Conservatively update required wait if this waitcnt was added in an
- // earlier pass. In this case it will not exist in the tracked waitcnt
- // set.
- if (!TrackedWaitcntSet.count(&II)) {
- unsigned IEnc = II.getOperand(0).getImm();
- AMDGPU::Waitcnt OldWait = AMDGPU::decodeWaitcnt(IV, IEnc);
- Wait = Wait.combined(OldWait);
- }
+ unsigned Opcode = II.getOpcode();
+ bool IsSoft = SIInstrInfo::isSoftWaitcnt(Opcode);
+
+ if (SIInstrInfo::isWaitcnt(Opcode)) {
+ // Update required wait count. If this is a soft waitcnt (= it was added
+ // by an earlier pass), it may be entirely removed.
+ unsigned IEnc = II.getOperand(0).getImm();
+ AMDGPU::Waitcnt OldWait = AMDGPU::decodeWaitcnt(IV, IEnc);
+ if (IsSoft)
+ ScoreBrackets.simplifyWaitcnt(OldWait);
+ Wait = Wait.combined(OldWait);
// Merge consecutive waitcnt of the same type by erasing multiples.
- if (!WaitcntInstr) {
- WaitcntInstr = &II;
- } else {
+ if (WaitcntInstr || (!Wait.hasWaitExceptVsCnt() && IsSoft)) {
II.eraseFromParent();
Modified = true;
- }
+ } else
+ WaitcntInstr = &II;
} else {
- assert(II.getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
+ assert(SIInstrInfo::isWaitcntVsCnt(Opcode));
assert(II.getOperand(0).getReg() == AMDGPU::SGPR_NULL);
- if (!TrackedWaitcntSet.count(&II)) {
- unsigned OldVSCnt =
- TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm();
- Wait.VsCnt = std::min(Wait.VsCnt, OldVSCnt);
- }
- if (!WaitcntVsCntInstr) {
- WaitcntVsCntInstr = &II;
- } else {
+ unsigned OldVSCnt =
+ TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm();
+ if (IsSoft)
+ ScoreBrackets.simplifyWaitcnt(InstCounterType::VS_CNT, OldVSCnt);
+ Wait.VsCnt = std::min(Wait.VsCnt, OldVSCnt);
+
+ if (WaitcntVsCntInstr || (!Wait.hasWaitVsCnt() && IsSoft)) {
II.eraseFromParent();
Modified = true;
- }
+ } else
+ WaitcntVsCntInstr = &II;
}
}
// Updated encoding of merged waitcnt with the required wait.
if (WaitcntInstr) {
- if (Wait.hasWaitExceptVsCnt()) {
- Modified |=
- updateOperandIfDifferent(*WaitcntInstr, AMDGPU::OpName::simm16,
- AMDGPU::encodeWaitcnt(IV, Wait));
- ScoreBrackets.applyWaitcnt(Wait);
- Wait.VmCnt = ~0u;
- Wait.LgkmCnt = ~0u;
- Wait.ExpCnt = ~0u;
-
- LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
- << "New Instr at block end: " << *WaitcntInstr
- << '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
- << "Old Instr: " << *It
- << "New Instr: " << *WaitcntInstr << '\n');
+ Modified |= updateOperandIfDifferent(*WaitcntInstr, AMDGPU::OpName::simm16,
+ AMDGPU::encodeWaitcnt(IV, Wait));
+ Modified |= promoteSoftWaitCnt(WaitcntInstr);
- } else {
- WaitcntInstr->eraseFromParent();
- Modified = true;
- }
+ ScoreBrackets.applyWaitcnt(Wait);
+ Wait.VmCnt = ~0u;
+ Wait.LgkmCnt = ~0u;
+ Wait.ExpCnt = ~0u;
+
+ LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
+ ? dbgs()
+ << "applyPreexistingWaitcnt\n"
+ << "New Instr at block end: " << *WaitcntInstr << '\n'
+ : dbgs() << "applyPreexistingWaitcnt\n"
+ << "Old Instr: " << *It
+ << "New Instr: " << *WaitcntInstr << '\n');
}
if (WaitcntVsCntInstr) {
- if (Wait.hasWaitVsCnt()) {
- assert(ST->hasVscnt());
- Modified |= updateOperandIfDifferent(*WaitcntVsCntInstr,
- AMDGPU::OpName::simm16, Wait.VsCnt);
- ScoreBrackets.applyWaitcnt(Wait);
- Wait.VsCnt = ~0u;
-
- LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
- << "New Instr at block end: "
- << *WaitcntVsCntInstr << '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
- << "Old Instr: " << *It
- << "New Instr: " << *WaitcntVsCntInstr << '\n');
- } else {
- WaitcntVsCntInstr->eraseFromParent();
- Modified = true;
- }
+ Modified |= updateOperandIfDifferent(*WaitcntVsCntInstr,
+ AMDGPU::OpName::simm16, Wait.VsCnt);
+ Modified |= promoteSoftWaitCnt(WaitcntVsCntInstr);
+ ScoreBrackets.applyWaitcnt(Wait);
+ Wait.VsCnt = ~0u;
+
+ LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
+ ? dbgs() << "applyPreexistingWaitcnt\n"
+ << "New Instr at block end: " << *WaitcntVsCntInstr
+ << '\n'
+ : dbgs() << "applyPreexistingWaitcnt\n"
+ << "Old Instr: " << *It
+ << "New Instr: " << *WaitcntVsCntInstr << '\n');
}
return Modified;
@@ -1178,7 +1182,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::FLAT_ADDRESS)
continue;
// No need to wait before load from VMEM to LDS.
- if (mayWriteLDSThroughDMA(MI))
+ if (TII->mayWriteLDSThroughDMA(MI))
continue;
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
// VM_CNT is only relevant to vgpr or LDS.
@@ -1315,9 +1319,8 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
// instruction was modified to handle the required wait.
if (Wait.hasWaitExceptVsCnt()) {
unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
- auto SWaitInst =
+ [[maybe_unused]] auto SWaitInst =
BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Enc);
- TrackedWaitcntSet.insert(SWaitInst);
Modified = true;
LLVM_DEBUG(dbgs() << "generateWaitcnt\n";
@@ -1328,10 +1331,9 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
if (Wait.hasWaitVsCnt()) {
assert(ST->hasVscnt());
- auto SWaitInst = BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ [[maybe_unused]] auto SWaitInst = BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
.addImm(Wait.VsCnt);
- TrackedWaitcntSet.insert(SWaitInst);
Modified = true;
LLVM_DEBUG(dbgs() << "generateWaitcnt\n";
@@ -1504,6 +1506,11 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
break;
case AMDGPU::S_MEMTIME:
case AMDGPU::S_MEMREALTIME:
+ case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0:
+ case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM:
+ case AMDGPU::S_BARRIER_LEAVE:
+ case AMDGPU::S_GET_BARRIER_STATE_M0:
+ case AMDGPU::S_GET_BARRIER_STATE_IMM:
ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
break;
}
@@ -1574,9 +1581,9 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
}
static bool isWaitInstr(MachineInstr &Inst) {
- return Inst.getOpcode() == AMDGPU::S_WAITCNT ||
- (Inst.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
- Inst.getOperand(0).isReg() &&
+ auto Opcode = Inst.getOpcode();
+ return SIInstrInfo::isWaitcnt(Opcode) ||
+ (SIInstrInfo::isWaitcntVsCnt(Opcode) && Inst.getOperand(0).isReg() &&
Inst.getOperand(0).getReg() == AMDGPU::SGPR_NULL);
}
@@ -1721,26 +1728,25 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
// which we want to flush the vmcnt counter, and false otherwise.
bool SIInsertWaitcnts::isPreheaderToFlush(MachineBasicBlock &MBB,
WaitcntBrackets &ScoreBrackets) {
- if (PreheadersToFlush.count(&MBB))
- return PreheadersToFlush[&MBB];
-
- auto UpdateCache = [&](bool val) {
- PreheadersToFlush[&MBB] = val;
- return val;
- };
+ auto [Iterator, IsInserted] = PreheadersToFlush.try_emplace(&MBB, false);
+ if (!IsInserted)
+ return Iterator->second;
MachineBasicBlock *Succ = MBB.getSingleSuccessor();
if (!Succ)
- return UpdateCache(false);
+ return false;
MachineLoop *Loop = MLI->getLoopFor(Succ);
if (!Loop)
- return UpdateCache(false);
+ return false;
- if (Loop->getLoopPreheader() == &MBB && shouldFlushVmCnt(Loop, ScoreBrackets))
- return UpdateCache(true);
+ if (Loop->getLoopPreheader() == &MBB &&
+ shouldFlushVmCnt(Loop, ScoreBrackets)) {
+ Iterator->second = true;
+ return true;
+ }
- return UpdateCache(false);
+ return false;
}
bool SIInsertWaitcnts::isVMEMOrFlatVMEM(const MachineInstr &MI) const {
@@ -1825,7 +1831,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
ForceEmitWaitcnt[T] = false;
OptNone = MF.getFunction().hasOptNone() ||
- MF.getTarget().getOptLevel() == CodeGenOpt::None;
+ MF.getTarget().getOptLevel() == CodeGenOptLevel::None;
HardwareLimits Limits = {};
Limits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
@@ -1839,12 +1845,13 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
assert(NumSGPRsMax <= SQ_MAX_PGM_SGPRS);
RegisterEncoding Encoding = {};
- Encoding.VGPR0 = TRI->getEncodingValue(AMDGPU::VGPR0);
+ Encoding.VGPR0 =
+ TRI->getEncodingValue(AMDGPU::VGPR0) & AMDGPU::HWEncoding::REG_IDX_MASK;
Encoding.VGPRL = Encoding.VGPR0 + NumVGPRsMax - 1;
- Encoding.SGPR0 = TRI->getEncodingValue(AMDGPU::SGPR0);
+ Encoding.SGPR0 =
+ TRI->getEncodingValue(AMDGPU::SGPR0) & AMDGPU::HWEncoding::REG_IDX_MASK;
Encoding.SGPRL = Encoding.SGPR0 + NumSGPRsMax - 1;
- TrackedWaitcntSet.clear();
BlockInfos.clear();
bool Modified = false;
@@ -1862,6 +1869,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
;
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
+ auto NonKernelInitialState =
+ std::make_unique<WaitcntBrackets>(ST, Limits, Encoding);
+ NonKernelInitialState->setNonKernelFunctionInitialState();
+ BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState);
+
Modified = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index f674777724eb..585a3eb78618 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -40,6 +40,8 @@ class InstSI <dag outs, dag ins, string asm = "",
field bit MTBUF = 0;
field bit SMRD = 0;
field bit MIMG = 0;
+ field bit VIMAGE = 0;
+ field bit VSAMPLE = 0;
field bit EXP = 0;
field bit FLAT = 0;
field bit DS = 0;
@@ -156,6 +158,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that the instruction is never-uniform/divergent
field bit IsNeverUniform = 0;
+ // ds_gws_* instructions.
+ field bit GWS = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -181,15 +186,17 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{18} = MTBUF;
let TSFlags{19} = SMRD;
let TSFlags{20} = MIMG;
- let TSFlags{21} = EXP;
- let TSFlags{22} = FLAT;
- let TSFlags{23} = DS;
+ let TSFlags{21} = VIMAGE;
+ let TSFlags{22} = VSAMPLE;
+ let TSFlags{23} = EXP;
+ let TSFlags{24} = FLAT;
+ let TSFlags{25} = DS;
- let TSFlags{24} = VGPRSpill;
- let TSFlags{25} = SGPRSpill;
+ let TSFlags{26} = VGPRSpill;
+ let TSFlags{27} = SGPRSpill;
- let TSFlags{26} = LDSDIR;
- let TSFlags{27} = VINTERP;
+ let TSFlags{28} = LDSDIR;
+ let TSFlags{29} = VINTERP;
let TSFlags{32} = VM_CNT;
let TSFlags{33} = EXP_CNT;
@@ -239,6 +246,8 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{61} = IsNeverUniform;
+ let TSFlags{62} = GWS;
+
let SchedRW = [Write32Bit];
let AsmVariantName = AMDGPUAsmVariants.Default;
@@ -299,6 +308,16 @@ def CPolBit {
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
+def VOPDstOperand_t16 : VOPDstOperand <VGPR_16> {
+ let EncoderMethod = "getMachineOpValueT16";
+ let DecoderMethod = "DecodeVGPR_16RegisterClass";
+}
+
+def VOPDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
+ let EncoderMethod = "getMachineOpValueT16Lo128";
+ let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
+}
+
class VINTRPe <bits<2> op> : Enc32 {
bits<8> vdst;
bits<8> vsrc;
@@ -414,6 +433,57 @@ class MIMGe_gfx11 <bits<8> op> : Enc64 {
let Inst{62-58} = ssamp{6-2};
}
+class VIMAGE_VSAMPLE_Common <bits<8> op> : Enc96 {
+ bits<3> dim;
+ bits<1> tfe;
+ bits<1> r128;
+ bit d16;
+ bits<1> a16;
+ bits<4> dmask;
+ bits<8> vdata;
+ bits<9> rsrc;
+ bits<6> cpol;
+ bits<8> vaddr0;
+ bits<8> vaddr1;
+ bits<8> vaddr2;
+ bits<8> vaddr3;
+
+ let Inst{2-0} = dim;
+ let Inst{4} = r128;
+ let Inst{5} = d16;
+ let Inst{6} = a16;
+ let Inst{21-14} = op;
+ let Inst{25-22} = dmask;
+ let Inst{39-32} = vdata;
+ let Inst{49-41} = rsrc;
+ let Inst{51-50} = cpol{4-3}; // scope
+ let Inst{54-52} = cpol{2-0}; // th
+ let Inst{71-64} = vaddr0;
+ let Inst{79-72} = vaddr1;
+ let Inst{87-80} = vaddr2;
+ let Inst{95-88} = vaddr3;
+}
+
+class VSAMPLEe <bits<8> op> : VIMAGE_VSAMPLE_Common<op> {
+ bits<1> unorm;
+ bits<1> lwe;
+ bits<9> samp;
+
+ let Inst{3} = tfe;
+ let Inst{13} = unorm;
+ let Inst{31-26} = 0x39;
+ let Inst{40} = lwe;
+ let Inst{63-55} = samp;
+}
+
+class VIMAGEe <bits<8> op> : VIMAGE_VSAMPLE_Common<op> {
+ bits<8> vaddr4;
+
+ let Inst{31-26} = 0x34;
+ let Inst{55} = tfe;
+ let Inst{63-56} = vaddr4;
+}
+
class EXPe : Enc64 {
bits<4> en;
bits<6> tgt;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0f954732a5ee..70ef1fff274a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -17,7 +17,9 @@
#include "GCNHazardRecognizer.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -105,9 +107,27 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
}
+static bool canRemat(const MachineInstr &MI) {
+
+ if (SIInstrInfo::isVOP1(MI) || SIInstrInfo::isVOP2(MI) ||
+ SIInstrInfo::isVOP3(MI) || SIInstrInfo::isSDWA(MI) ||
+ SIInstrInfo::isSALU(MI))
+ return true;
+
+ if (SIInstrInfo::isSMRD(MI)) {
+ return !MI.memoperands_empty() &&
+ llvm::all_of(MI.memoperands(), [](const MachineMemOperand *MMO) {
+ return MMO->isLoad() && MMO->isInvariant();
+ });
+ }
+
+ return false;
+}
+
bool SIInstrInfo::isReallyTriviallyReMaterializable(
const MachineInstr &MI) const {
- if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) {
+
+ if (canRemat(MI)) {
// Normally VALU use of exec would block the rematerialization, but that
// is OK in this case to have an implicit exec read as all VALU do.
// We really want all of the generic logic for this except for this.
@@ -119,12 +139,13 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(
// There is difference to generic method which does not allow
// rematerialization if there are virtual register uses. We allow this,
// therefore this method includes SOP instructions as well.
- return !MI.hasImplicitDef() &&
- MI.getNumImplicitOperands() == MI.getDesc().implicit_uses().size() &&
- !MI.mayRaiseFPException();
+ if (!MI.hasImplicitDef() &&
+ MI.getNumImplicitOperands() == MI.getDesc().implicit_uses().size() &&
+ !MI.mayRaiseFPException())
+ return true;
}
- return false;
+ return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
// Returns true if the scalar result of a VALU instruction depends on exec.
@@ -169,6 +190,48 @@ bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent());
}
+bool SIInstrInfo::isSafeToSink(MachineInstr &MI,
+ MachineBasicBlock *SuccToSinkTo,
+ MachineCycleInfo *CI) const {
+ // Allow sinking if MI edits lane mask (divergent i1 in sgpr).
+ if (MI.getOpcode() == AMDGPU::SI_IF_BREAK)
+ return true;
+
+ MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ // Check if sinking of MI would create temporal divergent use.
+ for (auto Op : MI.uses()) {
+ if (Op.isReg() && Op.getReg().isVirtual() &&
+ RI.isSGPRClass(MRI.getRegClass(Op.getReg()))) {
+ MachineInstr *SgprDef = MRI.getVRegDef(Op.getReg());
+
+ // SgprDef defined inside cycle
+ MachineCycle *FromCycle = CI->getCycle(SgprDef->getParent());
+ if (FromCycle == nullptr)
+ continue;
+
+ MachineCycle *ToCycle = CI->getCycle(SuccToSinkTo);
+ // Check if there is a FromCycle that contains SgprDef's basic block but
+ // does not contain SuccToSinkTo and also has divergent exit condition.
+ while (FromCycle && !FromCycle->contains(ToCycle)) {
+ // After structurize-cfg, there should be exactly one cycle exit.
+ SmallVector<MachineBasicBlock *, 1> ExitBlocks;
+ FromCycle->getExitBlocks(ExitBlocks);
+ assert(ExitBlocks.size() == 1);
+ assert(ExitBlocks[0]->getSinglePredecessor());
+
+ // FromCycle has divergent exit condition.
+ if (hasDivergentBranch(ExitBlocks[0]->getSinglePredecessor())) {
+ return false;
+ }
+
+ FromCycle = FromCycle->getParentCycle();
+ }
+ }
+ }
+
+ return true;
+}
+
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
int64_t &Offset0,
int64_t &Offset1) const {
@@ -421,6 +484,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
Offset = OffsetOp ? OffsetOp->getImm() : 0;
// Get appropriate operand, and compute width accordingly.
DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
+ if (DataOpIdx == -1)
+ return false;
Width = getOpSize(LdSt, DataOpIdx);
return true;
}
@@ -479,8 +544,10 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
}
bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
- unsigned NumLoads,
+ int64_t Offset2, bool OffsetIsScalable2,
+ unsigned ClusterSize,
unsigned NumBytes) const {
// If the mem ops (to be clustered) do not have the same base ptr, then they
// should not be clustered
@@ -506,8 +573,8 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
// (3) 9 <= LoadSize <= 12: cluster at max 2 mem ops
// (4) 13 <= LoadSize <= 16: cluster at max 2 mem ops
// (5) LoadSize >= 17: do not cluster
- const unsigned LoadSize = NumBytes / NumLoads;
- const unsigned NumDWORDs = ((LoadSize + 3) / 4) * NumLoads;
+ const unsigned LoadSize = NumBytes / ClusterSize;
+ const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize;
return NumDWORDs <= 8;
}
@@ -619,7 +686,7 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
}
RS.enterBasicBlockEnd(MBB);
- RS.backward(MI);
+ RS.backward(std::next(MI));
// Ideally we want to have three registers for a long reg_sequence copy
// to hide 2 waitstates between v_mov_b32 and accvgpr_write.
@@ -680,23 +747,27 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,
for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) {
int16_t SubIdx = BaseIndices[Idx];
- Register Reg = RI.getSubReg(DestReg, SubIdx);
+ Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
+ Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
+ assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
unsigned Opcode = AMDGPU::S_MOV_B32;
// Is SGPR aligned? If so try to combine with next.
- Register Src = RI.getSubReg(SrcReg, SubIdx);
- bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0;
- bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
+ bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
+ bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) {
// Can use SGPR64 copy
unsigned Channel = RI.getChannelFromSubReg(SubIdx);
SubIdx = RI.getSubRegFromChannel(Channel, 2);
+ DestSubReg = RI.getSubReg(DestReg, SubIdx);
+ SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
+ assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
Opcode = AMDGPU::S_MOV_B64;
Idx++;
}
- LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
- .addReg(RI.getSubReg(SrcReg, SubIdx))
+ LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), DestSubReg)
+ .addReg(SrcSubReg)
.addReg(SrcReg, RegState::Implicit);
if (!FirstMI)
@@ -722,24 +793,32 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
const TargetRegisterClass *RC = RI.getPhysRegBaseClass(DestReg);
-
- // FIXME: This is hack to resolve copies between 16 bit and 32 bit
- // registers until all patterns are fixed.
- if (Fix16BitCopies &&
- ((RI.getRegSizeInBits(*RC) == 16) ^
- (RI.getRegSizeInBits(*RI.getPhysRegBaseClass(SrcReg)) == 16))) {
- MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
- MCRegister Super = RI.get32BitRegister(RegToFix);
- assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix);
- RegToFix = Super;
-
- if (DestReg == SrcReg) {
- // Insert empty bundle since ExpandPostRA expects an instruction here.
- BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE));
- return;
+ unsigned Size = RI.getRegSizeInBits(*RC);
+ const TargetRegisterClass *SrcRC = RI.getPhysRegBaseClass(SrcReg);
+ unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
+
+ // The rest of copyPhysReg assumes Src and Dst size are the same size.
+ // TODO-GFX11_16BIT If all true 16 bit instruction patterns are completed can
+ // we remove Fix16BitCopies and this code block?
+ if (Fix16BitCopies) {
+ if (((Size == 16) != (SrcSize == 16))) {
+ // Non-VGPR Src and Dst will later be expanded back to 32 bits.
+ assert(ST.hasTrue16BitInsts());
+ MCRegister &RegToFix = (Size == 32) ? DestReg : SrcReg;
+ MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
+ RegToFix = SubReg;
+
+ if (DestReg == SrcReg) {
+ // Identity copy. Insert empty bundle since ExpandPostRA expects an
+ // instruction here.
+ BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE));
+ return;
+ }
+ RC = RI.getPhysRegBaseClass(DestReg);
+ Size = RI.getRegSizeInBits(*RC);
+ SrcRC = RI.getPhysRegBaseClass(SrcReg);
+ SrcSize = RI.getRegSizeInBits(*SrcRC);
}
-
- RC = RI.getPhysRegBaseClass(DestReg);
}
if (RC == &AMDGPU::VGPR_32RegClass) {
@@ -863,10 +942,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- const unsigned Size = RI.getRegSizeInBits(*RC);
if (Size == 16) {
- assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
- AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
+ assert(AMDGPU::VGPR_16RegClass.contains(SrcReg) ||
AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
AMDGPU::AGPR_LO16RegClass.contains(SrcReg));
@@ -904,6 +981,25 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ if (ST.hasTrue16BitInsts()) {
+ if (IsSGPRSrc) {
+ assert(SrcLow);
+ SrcReg = NewSrcReg;
+ }
+ // Use the smaller instruction encoding if possible.
+ if (AMDGPU::VGPR_16_Lo128RegClass.contains(DestReg) &&
+ (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.contains(SrcReg))) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B16_t16_e32), DestReg)
+ .addReg(SrcReg);
+ } else {
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B16_t16_e64), DestReg)
+ .addImm(0) // src0_modifiers
+ .addReg(SrcReg)
+ .addImm(0); // op_sel
+ }
+ return;
+ }
+
if (IsSGPRSrc && !ST.hasSDWAScalar()) {
if (!DstLow || !SrcLow) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
@@ -930,14 +1026,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- const TargetRegisterClass *SrcRC = RI.getPhysRegBaseClass(SrcReg);
if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
if (ST.hasMovB64()) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
- if (ST.hasPackedFP32Ops()) {
+ if (ST.hasPkMovB32()) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg)
.addImm(SISrcMods::OP_SEL_1)
.addReg(SrcReg)
@@ -984,7 +1079,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (ST.hasMovB64()) {
Opcode = AMDGPU::V_MOV_B64_e32;
EltSize = 8;
- } else if (ST.hasPackedFP32Ops()) {
+ } else if (ST.hasPkMovB32()) {
Opcode = AMDGPU::V_PK_MOV_B32;
EltSize = 8;
}
@@ -1012,6 +1107,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SubIdx = SubIndices[Idx];
else
SubIdx = SubIndices[SubIndices.size() - Idx - 1];
+ Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
+ Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
+ assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
bool IsFirstSubreg = Idx == 0;
bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1;
@@ -1019,30 +1117,26 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
Register ImpDefSuper = IsFirstSubreg ? Register(DestReg) : Register();
Register ImpUseSuper = SrcReg;
- indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
- RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, Overlap,
- ImpDefSuper, ImpUseSuper);
+ indirectCopyToAGPR(*this, MBB, MI, DL, DestSubReg, SrcSubReg, UseKill,
+ *RS, Overlap, ImpDefSuper, ImpUseSuper);
} else if (Opcode == AMDGPU::V_PK_MOV_B32) {
- Register DstSubReg = RI.getSubReg(DestReg, SubIdx);
- Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DstSubReg)
- .addImm(SISrcMods::OP_SEL_1)
- .addReg(SrcSubReg)
- .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
- .addReg(SrcSubReg)
- .addImm(0) // op_sel_lo
- .addImm(0) // op_sel_hi
- .addImm(0) // neg_lo
- .addImm(0) // neg_hi
- .addImm(0) // clamp
- .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestSubReg)
+ .addImm(SISrcMods::OP_SEL_1)
+ .addReg(SrcSubReg)
+ .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
+ .addReg(SrcSubReg)
+ .addImm(0) // op_sel_lo
+ .addImm(0) // op_sel_hi
+ .addImm(0) // neg_lo
+ .addImm(0) // neg_hi
+ .addImm(0) // clamp
+ .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
if (IsFirstSubreg)
MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
} else {
MachineInstrBuilder Builder =
- BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx))
- .addReg(RI.getSubReg(SrcReg, SubIdx));
+ BuildMI(MBB, MI, DL, get(Opcode), DestSubReg).addReg(SrcSubReg);
if (IsFirstSubreg)
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
@@ -1286,7 +1380,11 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
if (RI.isAGPRClass(DstRC))
return AMDGPU::COPY;
- if (RI.getRegSizeInBits(*DstRC) == 32) {
+ if (RI.getRegSizeInBits(*DstRC) == 16) {
+ // Assume hi bits are unneeded. Only _e64 true16 instructions are legal
+ // before RA.
+ return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
+ } else if (RI.getRegSizeInBits(*DstRC) == 32) {
return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
} else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
return AMDGPU::S_MOV_B64;
@@ -1587,11 +1685,15 @@ static unsigned getAVSpillSaveOpcode(unsigned Size) {
}
}
-static unsigned getWWMRegSpillSaveOpcode(unsigned Size) {
+static unsigned getWWMRegSpillSaveOpcode(unsigned Size,
+ bool IsVectorSuperClass) {
// Currently, there is only 32-bit WWM register spills needed.
if (Size != 4)
llvm_unreachable("unknown wwm register spill size");
+ if (IsVectorSuperClass)
+ return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
+
return AMDGPU::SI_SPILL_WWM_V32_SAVE;
}
@@ -1600,11 +1702,13 @@ static unsigned getVectorRegSpillSaveOpcode(Register Reg,
unsigned Size,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &MFI) {
+ bool IsVectorSuperClass = TRI.isVectorSuperClass(RC);
+
// Choose the right opcode if spilling a WWM register.
if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
- return getWWMRegSpillSaveOpcode(Size);
+ return getWWMRegSpillSaveOpcode(Size, IsVectorSuperClass);
- if (TRI.isVectorSuperClass(RC))
+ if (IsVectorSuperClass)
return getAVSpillSaveOpcode(Size);
return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size)
@@ -1807,11 +1911,15 @@ static unsigned getAVSpillRestoreOpcode(unsigned Size) {
}
}
-static unsigned getWWMRegSpillRestoreOpcode(unsigned Size) {
+static unsigned getWWMRegSpillRestoreOpcode(unsigned Size,
+ bool IsVectorSuperClass) {
// Currently, there is only 32-bit WWM register spills needed.
if (Size != 4)
llvm_unreachable("unknown wwm register spill size");
+ if (IsVectorSuperClass)
+ return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
+
return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
}
@@ -1819,11 +1927,13 @@ static unsigned
getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
unsigned Size, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &MFI) {
+ bool IsVectorSuperClass = TRI.isVectorSuperClass(RC);
+
// Choose the right opcode if restoring a WWM register.
if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
- return getWWMRegSpillRestoreOpcode(Size);
+ return getWWMRegSpillRestoreOpcode(Size, IsVectorSuperClass);
- if (TRI.isVectorSuperClass(RC))
+ if (IsVectorSuperClass)
return getAVSpillRestoreOpcode(Size);
return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size)
@@ -2006,6 +2116,14 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B32));
break;
+ case AMDGPU::SI_SPILL_S32_TO_VGPR:
+ MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
+ break;
+
+ case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
+ MI.setDesc(get(AMDGPU::V_READLANE_B32));
+ break;
+
case AMDGPU::V_MOV_B64_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -2024,7 +2142,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
APInt Imm(64, SrcOp.getImm());
APInt Lo(32, Imm.getLoBits(32).getZExtValue());
APInt Hi(32, Imm.getHiBits(32).getZExtValue());
- if (ST.hasPackedFP32Ops() && Lo == Hi && isInlineConstant(Lo)) {
+ if (ST.hasPkMovB32() && Lo == Hi && isInlineConstant(Lo)) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst)
.addImm(SISrcMods::OP_SEL_1)
.addImm(Lo.getSExtValue())
@@ -2045,7 +2163,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
} else {
assert(SrcOp.isReg());
- if (ST.hasPackedFP32Ops() &&
+ if (ST.hasPkMovB32() &&
!RI.isAGPR(MBB.getParent()->getRegInfo(), SrcOp.getReg())) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst)
.addImm(SISrcMods::OP_SEL_1) // src0_mod
@@ -2275,23 +2393,34 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
Register Reg = MI.getOperand(0).getReg();
Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
+ MachineOperand OpLo = MI.getOperand(1);
+ MachineOperand OpHi = MI.getOperand(2);
// Create a bundle so these instructions won't be re-ordered by the
// post-RA scheduler.
MIBundleBuilder Bundler(MBB, MI);
Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
- // Add 32-bit offset from this instruction to the start of the
- // constant data.
- Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
- .addReg(RegLo)
- .add(MI.getOperand(1)));
+ // What we want here is an offset from the value returned by s_getpc (which
+ // is the address of the s_add_u32 instruction) to the global variable, but
+ // since the encoding of $symbol starts 4 bytes after the start of the
+ // s_add_u32 instruction, we end up with an offset that is 4 bytes too
+ // small. This requires us to add 4 to the global variable offset in order
+ // to compute the correct address. Similarly for the s_addc_u32 instruction,
+ // the encoding of $symbol starts 12 bytes after the start of the s_add_u32
+ // instruction.
+
+ if (OpLo.isGlobal())
+ OpLo.setOffset(OpLo.getOffset() + 4);
+ Bundler.append(
+ BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
+
+ if (OpHi.isGlobal())
+ OpHi.setOffset(OpHi.getOffset() + 12);
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+ .addReg(RegHi)
+ .add(OpHi));
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
- .addReg(RegHi);
- MIB.add(MI.getOperand(2));
-
- Bundler.append(MIB);
finalizeBundle(MBB, Bundler.begin());
MI.eraseFromParent();
@@ -2350,12 +2479,98 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, Register DestReg,
+ unsigned SubIdx, const MachineInstr &Orig,
+ const TargetRegisterInfo &RI) const {
+
+ // Try shrinking the instruction to remat only the part needed for current
+ // context.
+ // TODO: Handle more cases.
+ unsigned Opcode = Orig.getOpcode();
+ switch (Opcode) {
+ case AMDGPU::S_LOAD_DWORDX16_IMM:
+ case AMDGPU::S_LOAD_DWORDX8_IMM: {
+ if (SubIdx != 0)
+ break;
+
+ if (I == MBB.end())
+ break;
+
+ if (I->isBundled())
+ break;
+
+ // Look for a single use of the register that is also a subreg.
+ Register RegToFind = Orig.getOperand(0).getReg();
+ MachineOperand *UseMO = nullptr;
+ for (auto &CandMO : I->operands()) {
+ if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
+ continue;
+ if (UseMO) {
+ UseMO = nullptr;
+ break;
+ }
+ UseMO = &CandMO;
+ }
+ if (!UseMO || UseMO->getSubReg() == AMDGPU::NoSubRegister)
+ break;
+
+ unsigned Offset = RI.getSubRegIdxOffset(UseMO->getSubReg());
+ unsigned SubregSize = RI.getSubRegIdxSize(UseMO->getSubReg());
+
+ MachineFunction *MF = MBB.getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ assert(MRI.use_nodbg_empty(DestReg) && "DestReg should have no users yet.");
+
+ unsigned NewOpcode = -1;
+ if (SubregSize == 256)
+ NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
+ else if (SubregSize == 128)
+ NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
+ else
+ break;
+
+ const MCInstrDesc &TID = get(NewOpcode);
+ const TargetRegisterClass *NewRC =
+ RI.getAllocatableClass(getRegClass(TID, 0, &RI, *MF));
+ MRI.setRegClass(DestReg, NewRC);
+
+ UseMO->setReg(DestReg);
+ UseMO->setSubReg(AMDGPU::NoSubRegister);
+
+ // Use a smaller load with the desired size, possibly with updated offset.
+ MachineInstr *MI = MF->CloneMachineInstr(&Orig);
+ MI->setDesc(TID);
+ MI->getOperand(0).setReg(DestReg);
+ MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
+ if (Offset) {
+ MachineOperand *OffsetMO = getNamedOperand(*MI, AMDGPU::OpName::offset);
+ int64_t FinalOffset = OffsetMO->getImm() + Offset / 8;
+ OffsetMO->setImm(FinalOffset);
+ }
+ SmallVector<MachineMemOperand *> NewMMOs;
+ for (const MachineMemOperand *MemOp : Orig.memoperands())
+ NewMMOs.push_back(MF->getMachineMemOperand(MemOp, MemOp->getPointerInfo(),
+ SubregSize / 8));
+ MI->setMemRefs(*MF, NewMMOs);
+
+ MBB.insert(I, MI);
+ return;
+ }
+
+ default:
+ break;
+ }
+
+ TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, RI);
+}
+
std::pair<MachineInstr*, MachineInstr*>
SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
if (ST.hasMovB64() &&
- AMDGPU::isLegal64BitDPPControl(
+ AMDGPU::isLegalDPALU_DPPControl(
getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
MI.setDesc(get(AMDGPU::V_MOV_B64_dpp));
return std::pair(&MI, nullptr);
@@ -2414,6 +2629,14 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
return std::pair(Split[0], Split[1]);
}
+std::optional<DestSourcePair>
+SIInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
+ if (MI.getOpcode() == AMDGPU::WWM_COPY)
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+
+ return std::nullopt;
+}
+
bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
MachineOperand &Src0,
unsigned Src0OpName,
@@ -2474,6 +2697,9 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (CommutedOpcode == -1)
return nullptr;
+ if (Src0Idx > Src1Idx)
+ std::swap(Src0Idx, Src1Idx);
+
assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
static_cast<int>(Src0Idx) &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
@@ -2556,14 +2782,8 @@ bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
return isIntN(BranchOffsetBits, BrOffset);
}
-MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
- const MachineInstr &MI) const {
- if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
- // This would be a difficult analysis to perform, but can always be legal so
- // there's no need to analyze it.
- return nullptr;
- }
-
+MachineBasicBlock *
+SIInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
return MI.getOperand(0).getMBB();
}
@@ -2874,7 +3094,6 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
= getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
if (!FBB) {
- Cond[1].isUndef();
MachineInstr *CondBr =
BuildMI(&MBB, DL, get(Opcode))
.addMBB(TBB);
@@ -3079,7 +3298,9 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::V_MOV_B64_e64:
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
+ case AMDGPU::S_MOV_B64_IMM_PSEUDO:
case AMDGPU::COPY:
+ case AMDGPU::WWM_COPY:
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
case AMDGPU::V_ACCVGPR_READ_B32_e64:
case AMDGPU::V_ACCVGPR_MOV_B32:
@@ -3111,11 +3332,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
switch (DefMI.getOpcode()) {
default:
return false;
+ case AMDGPU::V_MOV_B64_e32:
case AMDGPU::S_MOV_B64:
- // TODO: We could fold 64-bit immediates, but this get complicated
- // when there are sub-registers.
- return false;
-
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::S_MOV_B64_IMM_PSEUDO:
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::S_MOV_B32:
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
@@ -3128,19 +3348,45 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (!ImmOp->isImm())
return false;
+ auto getImmFor = [ImmOp](const MachineOperand &UseOp) -> int64_t {
+ int64_t Imm = ImmOp->getImm();
+ switch (UseOp.getSubReg()) {
+ default:
+ return Imm;
+ case AMDGPU::sub0:
+ return Lo_32(Imm);
+ case AMDGPU::sub1:
+ return Hi_32(Imm);
+ case AMDGPU::lo16:
+ return APInt(16, Imm).getSExtValue();
+ case AMDGPU::hi16:
+ return APInt(32, Imm).ashr(16).getSExtValue();
+ case AMDGPU::sub1_lo16:
+ return APInt(16, Hi_32(Imm)).getSExtValue();
+ case AMDGPU::sub1_hi16:
+ return APInt(32, Hi_32(Imm)).ashr(16).getSExtValue();
+ }
+ };
+
+ assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form");
+
unsigned Opc = UseMI.getOpcode();
if (Opc == AMDGPU::COPY) {
+ assert(!UseMI.getOperand(0).getSubReg() && "Expected SSA form");
+
Register DstReg = UseMI.getOperand(0).getReg();
- bool Is16Bit = getOpSize(UseMI, 0) == 2;
+ unsigned OpSize = getOpSize(UseMI, 0);
+ bool Is16Bit = OpSize == 2;
+ bool Is64Bit = OpSize == 8;
bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
- unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
- APInt Imm(32, ImmOp->getImm());
-
- if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
- Imm = Imm.ashr(16);
+ unsigned NewOpc = isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
+ : AMDGPU::V_MOV_B32_e32
+ : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
+ : AMDGPU::S_MOV_B32;
+ APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)));
if (RI.isAGPR(*MRI, DstReg)) {
- if (!isInlineConstant(Imm))
+ if (Is64Bit || !isInlineConstant(Imm))
return false;
NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
}
@@ -3200,14 +3446,32 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
// Multiplied part is the constant: Use v_madmk_{f16, f32}.
- // We should only expect these to be on src0 due to canonicalization.
- if (Src0->isReg() && Src0->getReg() == Reg) {
- if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
+ if ((Src0->isReg() && Src0->getReg() == Reg) ||
+ (Src1->isReg() && Src1->getReg() == Reg)) {
+ MachineOperand *RegSrc =
+ Src1->isReg() && Src1->getReg() == Reg ? Src0 : Src1;
+ if (!RegSrc->isReg())
+ return false;
+ if (RI.isSGPRClass(MRI->getRegClass(RegSrc->getReg())) &&
+ ST.getConstantBusLimit(Opc) < 2)
return false;
if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
return false;
+ // If src2 is also a literal constant then we have to choose which one to
+ // fold. In general it is better to choose madak so that the other literal
+ // can be materialized in an sgpr instead of a vgpr:
+ // s_mov_b32 s0, literal
+ // v_madak_f32 v0, s0, v0, literal
+ // Instead of:
+ // v_mov_b32 v1, literal
+ // v_madmk_f32 v0, v0, literal, v1
+ MachineInstr *Def = MRI->getUniqueVRegDef(Src2->getReg());
+ if (Def && Def->isMoveImmediate() &&
+ !isInlineConstant(Def->getOperand(1)))
+ return false;
+
unsigned NewOpc =
IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
: ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
@@ -3216,18 +3480,22 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (pseudoToMCOpcode(NewOpc) == -1)
return false;
- // We need to swap operands 0 and 1 since madmk constant is at operand 1.
+ // V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
+ // would also require restricting their register classes. For now
+ // just bail out.
+ if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
+ return false;
- const int64_t Imm = ImmOp->getImm();
+ const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
- Register Src1Reg = Src1->getReg();
- unsigned Src1SubReg = Src1->getSubReg();
- Src0->setReg(Src1Reg);
- Src0->setSubReg(Src1SubReg);
- Src0->setIsKill(Src1->isKill());
+ Register SrcReg = RegSrc->getReg();
+ unsigned SrcSubReg = RegSrc->getSubReg();
+ Src0->setReg(SrcReg);
+ Src0->setSubReg(SrcSubReg);
+ Src0->setIsKill(RegSrc->isKill());
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
@@ -3249,43 +3517,38 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
// Added part is the constant: Use v_madak_{f16, f32}.
if (Src2->isReg() && Src2->getReg() == Reg) {
- // Not allowed to use constant bus for another operand.
- // We can however allow an inline immediate as src0.
- bool Src0Inlined = false;
- if (Src0->isReg()) {
- // Try to inline constant if possible.
- // If the Def moves immediate and the use is single
- // We are saving VGPR here.
- MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
- if (Def && Def->isMoveImmediate() &&
- isInlineConstant(Def->getOperand(1)) &&
- MRI->hasOneUse(Src0->getReg())) {
- Src0->ChangeToImmediate(Def->getOperand(1).getImm());
- Src0Inlined = true;
- } else if ((Src0->getReg().isPhysical() &&
- (ST.getConstantBusLimit(Opc) <= 1 &&
- RI.isSGPRClass(RI.getPhysRegBaseClass(Src0->getReg())))) ||
- (Src0->getReg().isVirtual() &&
- (ST.getConstantBusLimit(Opc) <= 1 &&
- RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
- return false;
+ if (ST.getConstantBusLimit(Opc) < 2) {
+ // Not allowed to use constant bus for another operand.
+ // We can however allow an inline immediate as src0.
+ bool Src0Inlined = false;
+ if (Src0->isReg()) {
+ // Try to inline constant if possible.
+ // If the Def moves immediate and the use is single
+ // We are saving VGPR here.
+ MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
+ if (Def && Def->isMoveImmediate() &&
+ isInlineConstant(Def->getOperand(1)) &&
+ MRI->hasOneUse(Src0->getReg())) {
+ Src0->ChangeToImmediate(Def->getOperand(1).getImm());
+ Src0Inlined = true;
+ } else if (ST.getConstantBusLimit(Opc) <= 1 &&
+ RI.isSGPRReg(*MRI, Src0->getReg())) {
+ return false;
+ }
// VGPR is okay as Src0 - fallthrough
- }
+ }
- if (Src1->isReg() && !Src0Inlined ) {
- // We have one slot for inlinable constant so far - try to fill it
- MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
- if (Def && Def->isMoveImmediate() &&
- isInlineConstant(Def->getOperand(1)) &&
- MRI->hasOneUse(Src1->getReg()) &&
- commuteInstruction(UseMI)) {
+ if (Src1->isReg() && !Src0Inlined) {
+ // We have one slot for inlinable constant so far - try to fill it
+ MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
+ if (Def && Def->isMoveImmediate() &&
+ isInlineConstant(Def->getOperand(1)) &&
+ MRI->hasOneUse(Src1->getReg()) && commuteInstruction(UseMI))
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
- } else if ((Src1->getReg().isPhysical() &&
- RI.isSGPRClass(RI.getPhysRegBaseClass(Src1->getReg()))) ||
- (Src1->getReg().isVirtual() &&
- RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
- return false;
+ else if (RI.isSGPRReg(*MRI, Src1->getReg()))
+ return false;
// VGPR is okay as Src1 - fallthrough
+ }
}
unsigned NewOpc =
@@ -3296,7 +3559,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (pseudoToMCOpcode(NewOpc) == -1)
return false;
- const int64_t Imm = ImmOp->getImm();
+ // V_FMAAK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
+ // would also require restricting their register classes. For now
+ // just bail out.
+ if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
+ return false;
// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
@@ -3308,7 +3575,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
// ChangingToImmediate adds Src2 back to the instruction.
- Src2->ChangeToImmediate(Imm);
+ Src2->ChangeToImmediate(getImmFor(*Src2));
// These come before src2.
removeModOperands(UseMI);
@@ -3403,19 +3670,30 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
if (isMUBUF(MIb) || isMTBUF(MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(MIb) && !isSMRD(MIb);
+ if (isFLAT(MIb))
+ return isFLATScratch(MIb);
+
+ return !isSMRD(MIb);
}
if (isSMRD(MIa)) {
if (isSMRD(MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(MIb) && !isMUBUF(MIb) && !isMTBUF(MIb);
+ if (isFLAT(MIb))
+ return isFLATScratch(MIb);
+
+ return !isMUBUF(MIb) && !isMTBUF(MIb);
}
if (isFLAT(MIa)) {
- if (isFLAT(MIb))
+ if (isFLAT(MIb)) {
+ if ((isFLATScratch(MIa) && isFLATGlobal(MIb)) ||
+ (isFLATGlobal(MIa) && isFLATScratch(MIb)))
+ return true;
+
return checkInstOffsetsDoNotOverlap(MIa, MIb);
+ }
return false;
}
@@ -3722,13 +4000,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
}
bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
- return Opcode == AMDGPU::DS_ORDERED_COUNT ||
- Opcode == AMDGPU::DS_GWS_INIT ||
- Opcode == AMDGPU::DS_GWS_SEMA_V ||
- Opcode == AMDGPU::DS_GWS_SEMA_BR ||
- Opcode == AMDGPU::DS_GWS_SEMA_P ||
- Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
- Opcode == AMDGPU::DS_GWS_BARRIER;
+ return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode);
}
bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
@@ -3773,7 +4045,9 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
// However, executing them with EXEC = 0 causes them to operate on undefined
// data, which we avoid by returning true here.
if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
- Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
+ Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
+ Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
+ Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
return true;
return false;
@@ -3827,9 +4101,7 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
uint8_t OperandType) const {
assert(!MO.isReg() && "isInlineConstant called on register operand!");
- if (!MO.isImm() ||
- OperandType < AMDGPU::OPERAND_SRC_FIRST ||
- OperandType > AMDGPU::OPERAND_SRC_LAST)
+ if (!MO.isImm())
return false;
// MachineOperand provides no way to tell the true operand size, since it only
@@ -3849,7 +4121,8 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
- case AMDGPU::OPERAND_REG_INLINE_AC_FP32: {
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
int32_t Trunc = static_cast<int32_t>(Imm);
return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
}
@@ -3877,12 +4150,15 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
- // This suffers the same problem as the scalar 16-bit cases.
- return AMDGPU::isInlinableIntLiteralV216(Imm);
+ return (isInt<16>(Imm) || isUInt<16>(Imm)) &&
+ AMDGPU::isInlinableIntLiteral((int16_t)Imm);
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
// A few special case instructions have 16-bit operands on subtargets
// where 16-bit instructions are not legal.
@@ -3895,17 +4171,26 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
return false;
}
- case AMDGPU::OPERAND_REG_IMM_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
- uint32_t Trunc = static_cast<uint32_t>(Imm);
- return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
- }
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
return false;
+ case AMDGPU::OPERAND_INPUT_MODS:
+ case MCOI::OPERAND_IMMEDIATE:
+ // Always embedded in the instruction for free.
+ return true;
+ case MCOI::OPERAND_UNKNOWN:
+ case MCOI::OPERAND_REGISTER:
+ case MCOI::OPERAND_PCREL:
+ case MCOI::OPERAND_GENERIC_0:
+ case MCOI::OPERAND_GENERIC_1:
+ case MCOI::OPERAND_GENERIC_2:
+ case MCOI::OPERAND_GENERIC_3:
+ case MCOI::OPERAND_GENERIC_4:
+ case MCOI::OPERAND_GENERIC_5:
+ // Just ignore anything else.
+ return true;
default:
- llvm_unreachable("invalid bitwidth");
+ llvm_unreachable("invalid operand type");
}
}
@@ -4154,7 +4439,9 @@ static bool shouldReadExec(const MachineInstr &MI) {
if (SIInstrInfo::isVALU(MI)) {
switch (MI.getOpcode()) {
case AMDGPU::V_READLANE_B32:
+ case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
case AMDGPU::V_WRITELANE_B32:
+ case AMDGPU::SI_SPILL_S32_TO_VGPR:
return false;
}
@@ -4231,8 +4518,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
- if (isMIMG(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) {
- ErrInfo = "missing memory operand from MIMG instruction.";
+ if (isImage(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) {
+ ErrInfo = "missing memory operand from image instruction.";
return false;
}
@@ -4276,6 +4563,12 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
break;
}
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
+ if (!MI.getOperand(i).isImm() || !isInlineConstant(MI, i)) {
+ ErrInfo = "Expected inline constant for operand.";
+ return false;
+ }
+ break;
case MCOI::OPERAND_IMMEDIATE:
case AMDGPU::OPERAND_KIMM32:
// Check if this operand is an immediate.
@@ -4418,8 +4711,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
- // Verify MIMG
- if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
+ // Verify MIMG / VIMAGE / VSAMPLE
+ if (isImage(MI.getOpcode()) && !MI.mayStore()) {
// Ensure that the return type used is large enough for all the options
// being used TFE/LWE require an extra result register.
const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
@@ -4683,12 +4976,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
- if (isMIMG(MI)) {
+ if (isImage(MI)) {
const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim);
if (DimOp) {
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
AMDGPU::OpName::vaddr0);
- int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
+ int RSrcOpName =
+ isMIMG(MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
+ int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode);
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
@@ -4709,16 +5004,17 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
IsA16 = A16->getImm() != 0;
}
- bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
+ bool IsNSA = RsrcIdx - VAddr0Idx > 1;
unsigned AddrWords =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, ST.hasG16());
unsigned VAddrWords;
if (IsNSA) {
- VAddrWords = SRsrcIdx - VAddr0Idx;
- if (ST.hasPartialNSAEncoding() && AddrWords > ST.getNSAMaxSize()) {
- unsigned LastVAddrIdx = SRsrcIdx - 1;
+ VAddrWords = RsrcIdx - VAddr0Idx;
+ if (ST.hasPartialNSAEncoding() &&
+ AddrWords > ST.getNSAMaxSize(isVSAMPLE(MI))) {
+ unsigned LastVAddrIdx = RsrcIdx - 1;
VAddrWords += getOpSize(MI, LastVAddrIdx) / 4 - 1;
}
} else {
@@ -4779,20 +5075,10 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
- int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
-
if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
- ((DstIdx >= 0 &&
- (Desc.operands()[DstIdx].RegClass == AMDGPU::VReg_64RegClassID ||
- Desc.operands()[DstIdx].RegClass ==
- AMDGPU::VReg_64_Align2RegClassID)) ||
- ((Src0Idx >= 0 &&
- (Desc.operands()[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID ||
- Desc.operands()[Src0Idx].RegClass ==
- AMDGPU::VReg_64_Align2RegClassID)))) &&
- !AMDGPU::isLegal64BitDPPControl(DC)) {
+ !AMDGPU::isLegalDPALU_DPPControl(DC) && AMDGPU::isDPALU_DPP(Desc)) {
ErrInfo = "Invalid dpp_ctrl value: "
- "64 bit dpp only support row_newbcast";
+ "DP ALU dpp only support row_newbcast";
return false;
}
}
@@ -4884,6 +5170,9 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
+// It is more readable to list mapped opcodes on the same line.
+// clang-format off
+
unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default: return AMDGPU::INSTRUCTION_LIST_END;
@@ -4960,16 +5249,91 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
+ case AMDGPU::S_CVT_F32_I32: return AMDGPU::V_CVT_F32_I32_e64;
+ case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64;
+ case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64;
+ case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64;
+ case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
+ case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
+ case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64;
+ case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64;
+ case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
+ case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;
+ case AMDGPU::S_RNDNE_F32: return AMDGPU::V_RNDNE_F32_e64;
+ case AMDGPU::S_CEIL_F16: return AMDGPU::V_CEIL_F16_t16_e64;
+ case AMDGPU::S_FLOOR_F16: return AMDGPU::V_FLOOR_F16_t16_e64;
+ case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_t16_e64;
+ case AMDGPU::S_RNDNE_F16: return AMDGPU::V_RNDNE_F16_t16_e64;
+ case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
+ case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
+ case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
+ case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64;
+ case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64;
+ case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64;
+ case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64;
+ case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64;
+ case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
+ case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
+ case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
+ case AMDGPU::S_MINIMUM_F16: return AMDGPU::V_MINIMUM_F16_e64;
+ case AMDGPU::S_MAXIMUM_F16: return AMDGPU::V_MAXIMUM_F16_e64;
+ case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
+ case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
+ case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
+ case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_t16_e64;
+ case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
+ case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
+ case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
+ case AMDGPU::S_CMP_EQ_F32: return AMDGPU::V_CMP_EQ_F32_e64;
+ case AMDGPU::S_CMP_LE_F32: return AMDGPU::V_CMP_LE_F32_e64;
+ case AMDGPU::S_CMP_GT_F32: return AMDGPU::V_CMP_GT_F32_e64;
+ case AMDGPU::S_CMP_LG_F32: return AMDGPU::V_CMP_LG_F32_e64;
+ case AMDGPU::S_CMP_GE_F32: return AMDGPU::V_CMP_GE_F32_e64;
+ case AMDGPU::S_CMP_O_F32: return AMDGPU::V_CMP_O_F32_e64;
+ case AMDGPU::S_CMP_U_F32: return AMDGPU::V_CMP_U_F32_e64;
+ case AMDGPU::S_CMP_NGE_F32: return AMDGPU::V_CMP_NGE_F32_e64;
+ case AMDGPU::S_CMP_NLG_F32: return AMDGPU::V_CMP_NLG_F32_e64;
+ case AMDGPU::S_CMP_NGT_F32: return AMDGPU::V_CMP_NGT_F32_e64;
+ case AMDGPU::S_CMP_NLE_F32: return AMDGPU::V_CMP_NLE_F32_e64;
+ case AMDGPU::S_CMP_NEQ_F32: return AMDGPU::V_CMP_NEQ_F32_e64;
+ case AMDGPU::S_CMP_NLT_F32: return AMDGPU::V_CMP_NLT_F32_e64;
+ case AMDGPU::S_CMP_LT_F16: return AMDGPU::V_CMP_LT_F16_t16_e64;
+ case AMDGPU::S_CMP_EQ_F16: return AMDGPU::V_CMP_EQ_F16_t16_e64;
+ case AMDGPU::S_CMP_LE_F16: return AMDGPU::V_CMP_LE_F16_t16_e64;
+ case AMDGPU::S_CMP_GT_F16: return AMDGPU::V_CMP_GT_F16_t16_e64;
+ case AMDGPU::S_CMP_LG_F16: return AMDGPU::V_CMP_LG_F16_t16_e64;
+ case AMDGPU::S_CMP_GE_F16: return AMDGPU::V_CMP_GE_F16_t16_e64;
+ case AMDGPU::S_CMP_O_F16: return AMDGPU::V_CMP_O_F16_t16_e64;
+ case AMDGPU::S_CMP_U_F16: return AMDGPU::V_CMP_U_F16_t16_e64;
+ case AMDGPU::S_CMP_NGE_F16: return AMDGPU::V_CMP_NGE_F16_t16_e64;
+ case AMDGPU::S_CMP_NLG_F16: return AMDGPU::V_CMP_NLG_F16_t16_e64;
+ case AMDGPU::S_CMP_NGT_F16: return AMDGPU::V_CMP_NGT_F16_t16_e64;
+ case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64;
+ case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64;
+ case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64;
+ case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
+ case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_t16_e64;
+ case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
+ case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_t16_e64;
+ case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
+ case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_t16_e64;
+ case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
+ case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_t16_e64;
+ case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
+ case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_t16_e64;
}
llvm_unreachable(
"Unexpected scalar opcode without corresponding vector one!");
}
+// clang-format on
+
void SIInstrInfo::insertScratchExecCopy(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register Reg,
- bool IsSCCLive) const {
+ bool IsSCCLive,
+ SlotIndexes *Indexes) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
bool IsWave32 = ST.isWave32();
@@ -4979,23 +5343,34 @@ void SIInstrInfo::insertScratchExecCopy(MachineFunction &MF,
// the single instruction S_OR_SAVEEXEC that clobbers SCC.
unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg).addReg(Exec, RegState::Kill);
- BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1);
+ auto StoreExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg)
+ .addReg(Exec, RegState::Kill);
+ auto FlipExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1);
+ if (Indexes) {
+ Indexes->insertMachineInstrInMaps(*StoreExecMI);
+ Indexes->insertMachineInstrInMaps(*FlipExecMI);
+ }
} else {
const unsigned OrSaveExec =
IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
auto SaveExec =
BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), Reg).addImm(-1);
SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
+ if (Indexes)
+ Indexes->insertMachineInstrInMaps(*SaveExec);
}
}
void SIInstrInfo::restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, Register Reg) const {
+ const DebugLoc &DL, Register Reg,
+ SlotIndexes *Indexes) const {
unsigned ExecMov = isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
MCRegister Exec = isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MBBI, DL, get(ExecMov), Exec).addReg(Reg, RegState::Kill);
+ auto ExecRestoreMI =
+ BuildMI(MBB, MBBI, DL, get(ExecMov), Exec).addReg(Reg, RegState::Kill);
+ if (Indexes)
+ Indexes->insertMachineInstrInMaps(*ExecRestoreMI);
}
static const TargetRegisterClass *
@@ -5102,13 +5477,10 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
MO.ChangeToRegister(Reg, false);
}
-unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- MachineOperand &SuperReg,
- const TargetRegisterClass *SuperRC,
- unsigned SubIdx,
- const TargetRegisterClass *SubRC)
- const {
+unsigned SIInstrInfo::buildExtractSubReg(
+ MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
+ const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
+ unsigned SubIdx, const TargetRegisterClass *SubRC) const {
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
Register SubReg = MRI.createVirtualRegister(SubRC);
@@ -5135,12 +5507,9 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
}
MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
- MachineBasicBlock::iterator MII,
- MachineRegisterInfo &MRI,
- MachineOperand &Op,
- const TargetRegisterClass *SuperRC,
- unsigned SubIdx,
- const TargetRegisterClass *SubRC) const {
+ MachineBasicBlock::iterator MII, MachineRegisterInfo &MRI,
+ const MachineOperand &Op, const TargetRegisterClass *SuperRC,
+ unsigned SubIdx, const TargetRegisterClass *SubRC) const {
if (Op.isImm()) {
if (SubIdx == AMDGPU::sub0)
return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
@@ -5235,9 +5604,8 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
SGPRsUsed.insert(SGPR);
}
- } else if (InstDesc.operands()[i].OperandType == AMDGPU::OPERAND_KIMM32 ||
- (AMDGPU::isSISrcOperand(InstDesc, i) &&
- !isInlineConstant(Op, InstDesc.operands()[i]))) {
+ } else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
+ !isInlineConstant(Op, InstDesc.operands()[i])) {
if (!LiteralLimit--)
return false;
if (--ConstantBusLimit <= 0)
@@ -5285,6 +5653,27 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return true;
}
+ if (MO->isImm()) {
+ uint64_t Imm = MO->getImm();
+ bool Is64BitFPOp = OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_FP64;
+ bool Is64BitOp = Is64BitFPOp ||
+ OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
+ OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 ||
+ OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
+ if (Is64BitOp &&
+ !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) {
+ if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp))
+ return false;
+
+ // FIXME: We can use sign extended 64-bit literals, but only for signed
+ // operands. At the moment we do not know if an operand is signed.
+ // Such operand will be encoded as its low 32 bits and then either
+ // correctly sign extended or incorrectly zero extended by HW.
+ if (!Is64BitFPOp && (int32_t)Imm < 0)
+ return false;
+ }
+ }
+
// Handle non-register types that are treated like immediates.
assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal());
@@ -5342,6 +5731,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
if (Src1.isReg() && RI.isAGPR(MRI, Src1.getReg()))
legalizeOpWithMove(MI, Src1Idx);
+ // Special case: V_FMAC_F32 and V_FMAC_F16 have src2.
+ if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ if (!RI.isVGPR(MRI, MI.getOperand(Src2Idx).getReg()))
+ legalizeOpWithMove(MI, Src2Idx);
+ }
+
// VOP2 src0 instructions support all operand types, so we don't need to check
// their legality. If src1 is already legal, we don't need to do anything.
if (isLegalRegOperand(MRI, InstrDesc.operands()[Src1Idx], Src1))
@@ -5491,6 +5887,11 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
// legalize it.
legalizeOpWithMove(MI, Idx);
}
+
+ // Special case: V_FMAC_F32 and V_FMAC_F16 have src2 tied to vdst.
+ if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
+ !RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg()))
+ legalizeOpWithMove(MI, VOP3Idx[2]);
}
Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
@@ -5862,6 +6263,17 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+ // Save SCC. Waterfall Loop may overwrite SCC.
+ Register SaveSCCReg;
+ bool SCCNotDead = (MBB.computeRegisterLiveness(TRI, AMDGPU::SCC, MI, 30) !=
+ MachineBasicBlock::LQR_Dead);
+ if (SCCNotDead) {
+ SaveSCCReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(MBB, Begin, DL, TII.get(AMDGPU::S_CSELECT_B32), SaveSCCReg)
+ .addImm(1)
+ .addImm(0);
+ }
+
Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
// Save the EXEC mask
@@ -5917,8 +6329,15 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
emitLoadScalarOpsFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, ScalarOps);
- // Restore the EXEC mask
MachineBasicBlock::iterator First = RemainderBB->begin();
+ // Restore SCC
+ if (SCCNotDead) {
+ BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_CMP_LG_U32))
+ .addReg(SaveSCCReg, RegState::Kill)
+ .addImm(0);
+ }
+
+ // Restore the EXEC mask
BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec);
return BodyBB;
}
@@ -6103,18 +6522,33 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
return CreatedBB;
}
- // Legalize MIMG and MUBUF/MTBUF for shaders.
+ // Legalize S_BITREPLICATE, S_QUADMASK and S_WQM
+ if (MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
+ MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
+ MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
+ MI.getOpcode() == AMDGPU::S_WQM_B32 ||
+ MI.getOpcode() == AMDGPU::S_WQM_B64) {
+ MachineOperand &Src = MI.getOperand(1);
+ if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
+ Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
+ return CreatedBB;
+ }
+
+ // Legalize MIMG/VIMAGE/VSAMPLE and MUBUF/MTBUF for shaders.
//
// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
// scratch memory access. In both cases, the legalization never involves
// conversion to the addr64 form.
- if (isMIMG(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) &&
- (isMUBUF(MI) || isMTBUF(MI)))) {
- MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
+ if (isImage(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) &&
+ (isMUBUF(MI) || isMTBUF(MI)))) {
+ int RSrcOpName = (isVIMAGE(MI) || isVSAMPLE(MI)) ? AMDGPU::OpName::rsrc
+ : AMDGPU::OpName::srsrc;
+ MachineOperand *SRsrc = getNamedOperand(MI, RSrcOpName);
if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg())))
CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SRsrc}, MDT);
- MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
+ int SampOpName = isMIMG(MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
+ MachineOperand *SSamp = getNamedOperand(MI, SampOpName);
if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg())))
CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SSamp}, MDT);
@@ -6149,13 +6583,26 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
}
}
+ // Legalize s_sleep_var.
+ if (MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
+ const DebugLoc &DL = MI.getDebugLoc();
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ int Src0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
+ BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .add(Src0);
+ Src0.ChangeToRegister(Reg, false);
+ return nullptr;
+ }
+
// Legalize MUBUF instructions.
bool isSoffsetLegal = true;
int SoffsetIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::soffset);
if (SoffsetIdx != -1) {
MachineOperand *Soffset = &MI.getOperand(SoffsetIdx);
- if (Soffset->isReg() &&
+ if (Soffset->isReg() && Soffset->getReg().isVirtual() &&
!RI.isSGPRClass(MRI.getRegClass(Soffset->getReg()))) {
isSoffsetLegal = false;
}
@@ -6370,10 +6817,11 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
default:
break;
case AMDGPU::S_ADD_U64_PSEUDO:
+ NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
+ break;
case AMDGPU::S_SUB_U64_PSEUDO:
- splitScalar64BitAddSub(Worklist, Inst, MDT);
- Inst.eraseFromParent();
- return;
+ NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
+ break;
case AMDGPU::S_ADD_I32:
case AMDGPU::S_SUB_I32: {
// FIXME: The u32 versions currently selected use the carry.
@@ -6469,7 +6917,9 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
break;
case AMDGPU::S_LSHL_B64:
if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_LSHLREV_B64_e64;
+ NewOpcode = ST.getGeneration() >= AMDGPUSubtarget::GFX12
+ ? AMDGPU::V_LSHLREV_B64_pseudo_e64
+ : AMDGPU::V_LSHLREV_B64_e64;
swapOperands(Inst);
}
break;
@@ -6623,21 +7073,98 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
case AMDGPU::S_CMP_LT_U32:
case AMDGPU::S_CMP_LE_U32:
case AMDGPU::S_CMP_EQ_U64:
- case AMDGPU::S_CMP_LG_U64: {
- const MCInstrDesc &NewDesc = get(NewOpcode);
+ case AMDGPU::S_CMP_LG_U64:
+ case AMDGPU::S_CMP_LT_F32:
+ case AMDGPU::S_CMP_EQ_F32:
+ case AMDGPU::S_CMP_LE_F32:
+ case AMDGPU::S_CMP_GT_F32:
+ case AMDGPU::S_CMP_LG_F32:
+ case AMDGPU::S_CMP_GE_F32:
+ case AMDGPU::S_CMP_O_F32:
+ case AMDGPU::S_CMP_U_F32:
+ case AMDGPU::S_CMP_NGE_F32:
+ case AMDGPU::S_CMP_NLG_F32:
+ case AMDGPU::S_CMP_NGT_F32:
+ case AMDGPU::S_CMP_NLE_F32:
+ case AMDGPU::S_CMP_NEQ_F32:
+ case AMDGPU::S_CMP_NLT_F32:
+ case AMDGPU::S_CMP_LT_F16:
+ case AMDGPU::S_CMP_EQ_F16:
+ case AMDGPU::S_CMP_LE_F16:
+ case AMDGPU::S_CMP_GT_F16:
+ case AMDGPU::S_CMP_LG_F16:
+ case AMDGPU::S_CMP_GE_F16:
+ case AMDGPU::S_CMP_O_F16:
+ case AMDGPU::S_CMP_U_F16:
+ case AMDGPU::S_CMP_NGE_F16:
+ case AMDGPU::S_CMP_NLG_F16:
+ case AMDGPU::S_CMP_NGT_F16:
+ case AMDGPU::S_CMP_NLE_F16:
+ case AMDGPU::S_CMP_NEQ_F16:
+ case AMDGPU::S_CMP_NLT_F16: {
Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass());
- MachineInstr *NewInstr =
- BuildMI(*MBB, Inst, Inst.getDebugLoc(), NewDesc, CondReg)
- .add(Inst.getOperand(0))
- .add(Inst.getOperand(1));
+ auto NewInstr =
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
+ .setMIFlags(Inst.getFlags());
+ if (AMDGPU::getNamedOperandIdx(NewOpcode,
+ AMDGPU::OpName::src0_modifiers) >= 0) {
+ NewInstr
+ .addImm(0) // src0_modifiers
+ .add(Inst.getOperand(0)) // src0
+ .addImm(0) // src1_modifiers
+ .add(Inst.getOperand(1)) // src1
+ .addImm(0); // clamp
+ } else {
+ NewInstr
+ .add(Inst.getOperand(0))
+ .add(Inst.getOperand(1));
+ }
legalizeOperands(*NewInstr, MDT);
int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC);
MachineOperand SCCOp = Inst.getOperand(SCCIdx);
addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
Inst.eraseFromParent();
+ return;
+ }
+ case AMDGPU::S_CVT_HI_F32_F16: {
+ const DebugLoc &DL = Inst.getDebugLoc();
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
+ .addImm(16)
+ .add(Inst.getOperand(1));
+ BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
+ .addImm(0) // src0_modifiers
+ .addReg(TmpReg)
+ .addImm(0) // clamp
+ .addImm(0); // omod
+
+ MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
+ addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
+ Inst.eraseFromParent();
+ return;
}
+ case AMDGPU::S_MINIMUM_F32:
+ case AMDGPU::S_MAXIMUM_F32:
+ case AMDGPU::S_MINIMUM_F16:
+ case AMDGPU::S_MAXIMUM_F16: {
+ const DebugLoc &DL = Inst.getDebugLoc();
+ Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
+ .addImm(0) // src0_modifiers
+ .add(Inst.getOperand(1))
+ .addImm(0) // src1_modifiers
+ .add(Inst.getOperand(2))
+ .addImm(0) // clamp
+ .addImm(0); // omod
+ MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
+
+ legalizeOperands(*NewInstr, MDT);
+ addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
+ Inst.eraseFromParent();
return;
}
+ }
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
@@ -6681,8 +7208,61 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
// Use the new VALU Opcode.
auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode))
.setMIFlags(Inst.getFlags());
- for (const MachineOperand &Op : Inst.explicit_operands())
- NewInstr->addOperand(Op);
+ if (isVOP3(NewOpcode) && !isVOP3(Opcode)) {
+ // Intersperse VOP3 modifiers among the SALU operands.
+ NewInstr->addOperand(Inst.getOperand(0));
+ if (AMDGPU::getNamedOperandIdx(NewOpcode,
+ AMDGPU::OpName::src0_modifiers) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0) >= 0)
+ NewInstr->addOperand(Inst.getOperand(1));
+
+ if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
+ // We are converting these to a BFE, so we need to add the missing
+ // operands for the size and offset.
+ unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
+ NewInstr.addImm(0);
+ NewInstr.addImm(Size);
+ } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
+ // The VALU version adds the second operand to the result, so insert an
+ // extra 0 operand.
+ NewInstr.addImm(0);
+ } else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
+ const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
+ // If we need to move this to VGPRs, we need to unpack the second
+ // operand back into the 2 separate ones for bit offset and width.
+ assert(OffsetWidthOp.isImm() &&
+ "Scalar BFE is only implemented for constant width and offset");
+ uint32_t Imm = OffsetWidthOp.getImm();
+
+ uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+ uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
+ NewInstr.addImm(Offset);
+ NewInstr.addImm(BitWidth);
+ } else {
+ if (AMDGPU::getNamedOperandIdx(NewOpcode,
+ AMDGPU::OpName::src1_modifiers) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
+ NewInstr->addOperand(Inst.getOperand(2));
+ if (AMDGPU::getNamedOperandIdx(NewOpcode,
+ AMDGPU::OpName::src2_modifiers) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
+ NewInstr->addOperand(Inst.getOperand(3));
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
+ NewInstr.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
+ NewInstr.addImm(0);
+ }
+ } else {
+ // Just copy the SALU operands.
+ for (const MachineOperand &Op : Inst.explicit_operands())
+ NewInstr->addOperand(Op);
+ }
+
// Remove any references to SCC. Vector instructions can't read from it, and
// We're just about to add the implicit use / defs of VCC, and we don't want
// both.
@@ -6706,30 +7286,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
NewDstReg = MRI.createVirtualRegister(NewDstRC);
MRI.replaceRegWith(DstReg, NewDstReg);
}
- if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
- // We are converting these to a BFE, so we need to add the missing
- // operands for the size and offset.
- unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
- NewInstr.addImm(0);
- NewInstr.addImm(Size);
- } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
- // The VALU version adds the second operand to the result, so insert an
- // extra 0 operand.
- NewInstr.addImm(0);
- }
- if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
- const MachineOperand &OffsetWidthOp = NewInstr->getOperand(2);
- // If we need to move this to VGPRs, we need to unpack the second operand
- // back into the 2 separate ones for bit offset and width.
- assert(OffsetWidthOp.isImm() &&
- "Scalar BFE is only implemented for constant width and offset");
- uint32_t Imm = OffsetWidthOp.getImm();
- uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
- uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
- NewInstr->removeOperand(2);
- NewInstr.addImm(Offset);
- NewInstr.addImm(BitWidth);
- }
fixImplicitOperands(*NewInstr);
// Legalize the operands
legalizeOperands(*NewInstr, MDT);
@@ -6787,27 +7343,27 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
MachineOperand &Src1 = Inst.getOperand(2);
MachineOperand &Cond = Inst.getOperand(3);
- Register SCCSource = Cond.getReg();
- bool IsSCC = (SCCSource == AMDGPU::SCC);
+ Register CondReg = Cond.getReg();
+ bool IsSCC = (CondReg == AMDGPU::SCC);
// If this is a trivial select where the condition is effectively not SCC
- // (SCCSource is a source of copy to SCC), then the select is semantically
- // equivalent to copying SCCSource. Hence, there is no need to create
+ // (CondReg is a source of copy to SCC), then the select is semantically
+ // equivalent to copying CondReg. Hence, there is no need to create
// V_CNDMASK, we can just use that and bail out.
if (!IsSCC && Src0.isImm() && (Src0.getImm() == -1) && Src1.isImm() &&
(Src1.getImm() == 0)) {
- MRI.replaceRegWith(Dest.getReg(), SCCSource);
+ MRI.replaceRegWith(Dest.getReg(), CondReg);
return;
}
- const TargetRegisterClass *TC =
- RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
-
- Register CopySCC = MRI.createVirtualRegister(TC);
-
+ Register NewCondReg = CondReg;
if (IsSCC) {
+ const TargetRegisterClass *TC =
+ RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+ NewCondReg = MRI.createVirtualRegister(TC);
+
// Now look for the closest SCC def if it is a copy
- // replacing the SCCSource with the COPY source register
+ // replacing the CondReg with the COPY source register
bool CopyFound = false;
for (MachineInstr &CandI :
make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)),
@@ -6815,7 +7371,7 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) !=
-1) {
if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
- BuildMI(MBB, MII, DL, get(AMDGPU::COPY), CopySCC)
+ BuildMI(MBB, MII, DL, get(AMDGPU::COPY), NewCondReg)
.addReg(CandI.getOperand(1).getReg());
CopyFound = true;
}
@@ -6830,24 +7386,31 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64
: AMDGPU::S_CSELECT_B32;
auto NewSelect =
- BuildMI(MBB, MII, DL, get(Opcode), CopySCC).addImm(-1).addImm(0);
+ BuildMI(MBB, MII, DL, get(Opcode), NewCondReg).addImm(-1).addImm(0);
NewSelect->getOperand(3).setIsUndef(Cond.isUndef());
}
}
- Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- auto UpdatedInst =
- BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B32_e64), ResultReg)
- .addImm(0)
- .add(Src1) // False
- .addImm(0)
- .add(Src0) // True
- .addReg(IsSCC ? CopySCC : SCCSource);
-
- MRI.replaceRegWith(Dest.getReg(), ResultReg);
- legalizeOperands(*UpdatedInst, MDT);
- addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+ Register NewDestReg = MRI.createVirtualRegister(
+ RI.getEquivalentVGPRClass(MRI.getRegClass(Dest.getReg())));
+ MachineInstr *NewInst;
+ if (Inst.getOpcode() == AMDGPU::S_CSELECT_B32) {
+ NewInst = BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
+ .addImm(0)
+ .add(Src1) // False
+ .addImm(0)
+ .add(Src0) // True
+ .addReg(NewCondReg);
+ } else {
+ NewInst =
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B64_PSEUDO), NewDestReg)
+ .add(Src1) // False
+ .add(Src0) // True
+ .addReg(NewCondReg);
+ }
+ MRI.replaceRegWith(Dest.getReg(), NewDestReg);
+ legalizeOperands(*NewInst, MDT);
+ addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist);
}
void SIInstrInfo::lowerScalarAbs(SIInstrWorklist &Worklist,
@@ -7059,80 +7622,6 @@ void SIInstrInfo::splitScalar64BitUnaryOp(SIInstrWorklist &Worklist,
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitAddSub(SIInstrWorklist &Worklist,
- MachineInstr &Inst,
- MachineDominatorTree *MDT) const {
- bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
-
- MachineBasicBlock &MBB = *Inst.getParent();
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
-
- Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- Register CarryReg = MRI.createVirtualRegister(CarryRC);
- Register DeadCarryReg = MRI.createVirtualRegister(CarryRC);
-
- MachineOperand &Dest = Inst.getOperand(0);
- MachineOperand &Src0 = Inst.getOperand(1);
- MachineOperand &Src1 = Inst.getOperand(2);
- const DebugLoc &DL = Inst.getDebugLoc();
- MachineBasicBlock::iterator MII = Inst;
-
- const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg());
- const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg());
- const TargetRegisterClass *Src0SubRC =
- RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
- const TargetRegisterClass *Src1SubRC =
- RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
-
- MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub0, Src0SubRC);
- MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub0, Src1SubRC);
-
-
- MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub1, Src0SubRC);
- MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub1, Src1SubRC);
-
- unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
- MachineInstr *LoHalf =
- BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
- .addReg(CarryReg, RegState::Define)
- .add(SrcReg0Sub0)
- .add(SrcReg1Sub0)
- .addImm(0); // clamp bit
-
- unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
- MachineInstr *HiHalf =
- BuildMI(MBB, MII, DL, get(HiOpc), DestSub1)
- .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
- .add(SrcReg0Sub1)
- .add(SrcReg1Sub1)
- .addReg(CarryReg, RegState::Kill)
- .addImm(0); // clamp bit
-
- BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
- .addReg(DestSub0)
- .addImm(AMDGPU::sub0)
- .addReg(DestSub1)
- .addImm(AMDGPU::sub1);
-
- MRI.replaceRegWith(Dest.getReg(), FullDestReg);
-
- // Try to legalize the operands in case we need to swap the order to keep it
- // valid.
- legalizeOperands(*LoHalf, MDT);
- legalizeOperands(*HiHalf, MDT);
-
- // Move all users of this moved value.
- addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
-}
-
void SIInstrInfo::splitScalar64BitBinaryOp(SIInstrWorklist &Worklist,
MachineInstr &Inst, unsigned Opcode,
MachineDominatorTree *MDT) const {
@@ -7980,9 +8469,36 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}
-bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
- return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
- MI.modifiesRegister(AMDGPU::EXEC, &RI);
+unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg,
+ const MachineFunction &MF) const {
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ assert(SrcReg.isVirtual());
+ if (MFI->checkFlag(SrcReg, AMDGPU::VirtRegFlag::WWM_REG))
+ return AMDGPU::WWM_COPY;
+
+ return AMDGPU::COPY;
+}
+
+bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
+ Register Reg) const {
+ // We need to handle instructions which may be inserted during register
+ // allocation to handle the prolog. The initial prolog instruction may have
+ // been separated from the start of the block by spills and copies inserted
+ // needed by the prolog. However, the insertions for scalar registers can
+ // always be placed at the BB top as they are independent of the exec mask
+ // value.
+ bool IsNullOrVectorRegister = true;
+ if (Reg) {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
+ }
+
+ uint16_t Opc = MI.getOpcode();
+ // FIXME: Copies inserted in the block prolog for live-range split should also
+ // be included.
+ return IsNullOrVectorRegister &&
+ (isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY &&
+ MI.modifiesRegister(AMDGPU::EXEC, &RI)));
}
MachineInstrBuilder
@@ -8045,7 +8561,16 @@ const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) con
}
}
-unsigned SIInstrInfo::getMaxMUBUFImmOffset() { return (1 << 12) - 1; }
+bool SIInstrInfo::isLegalMUBUFImmOffset(unsigned Imm) const {
+ return Imm <= getMaxMUBUFImmOffset(ST);
+}
+
+unsigned SIInstrInfo::getMaxMUBUFImmOffset(const GCNSubtarget &ST) {
+ // GFX12 field is non-negative 24-bit signed byte offset.
+ const unsigned OffsetBits =
+ ST.getGeneration() >= AMDGPUSubtarget::GFX12 ? 23 : 12;
+ return (1 << OffsetBits) - 1;
+}
void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const {
if (!ST.isWave32())
@@ -8082,7 +8607,7 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
// offsets within the given alignment can be added to the resulting ImmOffset.
bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
uint32_t &ImmOffset, Align Alignment) const {
- const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(ST);
const uint32_t MaxImm = alignDown(MaxOffset, Alignment.value());
uint32_t Overflow = 0;
@@ -8108,11 +8633,17 @@ bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
}
}
- // There is a hardware bug in SI and CI which prevents address clamping in
- // MUBUF instructions from working correctly with SOffsets. The immediate
- // offset is unaffected.
- if (Overflow > 0 && ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
- return false;
+ if (Overflow > 0) {
+ // There is a hardware bug in SI and CI which prevents address clamping in
+ // MUBUF instructions from working correctly with SOffsets. The immediate
+ // offset is unaffected.
+ if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
+
+ // It is not possible to set immediate in SOffset field on some targets.
+ if (ST.hasRestrictedSOffset())
+ return false;
+ }
ImmOffset = Imm;
SOffset = Overflow;
@@ -8160,16 +8691,13 @@ bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
AddrSpace == AMDGPUAS::GLOBAL_ADDRESS))
return false;
- bool AllowNegative = FlatVariant != SIInstrFlags::FLAT;
- if (ST.hasNegativeScratchOffsetBug() &&
- FlatVariant == SIInstrFlags::FlatScratch)
- AllowNegative = false;
if (ST.hasNegativeUnalignedScratchOffsetBug() &&
FlatVariant == SIInstrFlags::FlatScratch && Offset < 0 &&
(Offset % 4) != 0) {
return false;
}
+ bool AllowNegative = allowNegativeFlatOffset(FlatVariant);
unsigned N = AMDGPU::getNumFlatOffsetBits(ST);
return isIntN(N, Offset) && (AllowNegative || Offset >= 0);
}
@@ -8180,12 +8708,10 @@ SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace,
uint64_t FlatVariant) const {
int64_t RemainderOffset = COffsetVal;
int64_t ImmField = 0;
- bool AllowNegative = FlatVariant != SIInstrFlags::FLAT;
- if (ST.hasNegativeScratchOffsetBug() &&
- FlatVariant == SIInstrFlags::FlatScratch)
- AllowNegative = false;
+ bool AllowNegative = allowNegativeFlatOffset(FlatVariant);
const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST) - 1;
+
if (AllowNegative) {
// Use signed division by a power of two to truncate towards 0.
int64_t D = 1LL << NumBits;
@@ -8209,6 +8735,14 @@ SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace,
return {ImmField, RemainderOffset};
}
+bool SIInstrInfo::allowNegativeFlatOffset(uint64_t FlatVariant) const {
+ if (ST.hasNegativeScratchOffsetBug() &&
+ FlatVariant == SIInstrFlags::FlatScratch)
+ return false;
+
+ return FlatVariant != SIInstrFlags::FLAT || AMDGPU::isGFX12Plus(ST);
+}
+
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) {
switch (ST.getGeneration()) {
default:
@@ -8223,6 +8757,8 @@ static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) {
return SIEncodingFamily::GFX10;
case AMDGPUSubtarget::GFX11:
return SIEncodingFamily::GFX11;
+ case AMDGPUSubtarget::GFX12:
+ return SIEncodingFamily::GFX12;
}
llvm_unreachable("Unknown subtarget generation!");
}
@@ -8248,6 +8784,9 @@ bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const {
}
int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
+ if (SIInstrInfo::isSoftWaitcnt(Opcode))
+ Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(Opcode);
+
unsigned Gen = subtargetEncodingFamily(ST);
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
@@ -8282,6 +8821,12 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
+ // TODO-GFX12: Remove this.
+ // Hack to allow some GFX12 codegen tests to run before all the encodings are
+ // implemented.
+ if (MCOp == (uint16_t)-1 && Gen == SIEncodingFamily::GFX12)
+ MCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX11);
+
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
return Opcode;
@@ -8531,7 +9076,7 @@ MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
// A similar issue also exists with spilling and reloading $exec registers.
//
// To prevent that, constrain the %0 register class here.
- if (MI.isFullCopy()) {
+ if (isFullCopyInstr(MI)) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
if ((DstReg.isVirtual() || SrcReg.isVirtual()) &&
@@ -8572,9 +9117,8 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
unsigned opcode = MI.getOpcode();
- if (opcode == AMDGPU::G_INTRINSIC ||
- opcode == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS) {
- auto IID = static_cast<Intrinsic::ID>(MI.getIntrinsicID());
+ if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
+ auto IID = GI->getIntrinsicID();
if (AMDGPU::isIntrinsicSourceOfDivergence(IID))
return InstructionUniformity::NeverUniform;
if (AMDGPU::isIntrinsicAlwaysUniform(IID))
@@ -8612,7 +9156,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
- opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
+ opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
+ AMDGPU::isGenericAtomic(opcode)) {
return InstructionUniformity::NeverUniform;
}
return InstructionUniformity::Default;
@@ -8625,10 +9170,12 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
return InstructionUniformity::NeverUniform;
unsigned opcode = MI.getOpcode();
- if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
+ if (opcode == AMDGPU::V_READLANE_B32 ||
+ opcode == AMDGPU::V_READFIRSTLANE_B32 ||
+ opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
return InstructionUniformity::AlwaysUniform;
- if (MI.isCopy()) {
+ if (isCopyInstr(MI)) {
const MachineOperand &srcOp = MI.getOperand(1);
if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
const TargetRegisterClass *regClass =
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 66f93e5640d6..affe52046752 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -43,7 +43,7 @@ static const MachineMemOperand::Flags MONoClobber =
/// Utility to store machine instructions worklist.
struct SIInstrWorklist {
- SIInstrWorklist() : InstrList() {}
+ SIInstrWorklist() = default;
void insert(MachineInstr *MI);
@@ -102,16 +102,15 @@ private:
public:
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
MachineRegisterInfo &MRI,
- MachineOperand &SuperReg,
+ const MachineOperand &SuperReg,
const TargetRegisterClass *SuperRC,
unsigned SubIdx,
const TargetRegisterClass *SubRC) const;
- MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- MachineOperand &SuperReg,
- const TargetRegisterClass *SuperRC,
- unsigned SubIdx,
- const TargetRegisterClass *SubRC) const;
+ MachineOperand buildExtractSubRegOrImm(
+ MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
+ const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
+ unsigned SubIdx, const TargetRegisterClass *SubRC) const;
+
private:
void swapOperands(MachineInstr &Inst) const;
@@ -135,9 +134,6 @@ private:
void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
unsigned Opcode, bool Swap = false) const;
- void splitScalar64BitAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT = nullptr) const;
-
void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
unsigned Opcode,
MachineDominatorTree *MDT = nullptr) const;
@@ -170,6 +166,12 @@ private:
Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
protected:
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ std::optional<DestSourcePair>
+ isCopyInstrImpl(const MachineInstr &MI) const override;
+
bool swapSourceModifiers(MachineInstr &MI,
MachineOperand &Src0, unsigned Src0OpName,
MachineOperand &Src1, unsigned Src1OpName) const;
@@ -216,6 +218,9 @@ public:
bool isIgnorableUse(const MachineOperand &MO) const override;
+ bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
+ MachineCycleInfo *CI) const override;
+
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
int64_t &Offset1) const override;
@@ -226,8 +231,11 @@ public:
const TargetRegisterInfo *TRI) const final;
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
- unsigned NumLoads, unsigned NumBytes) const override;
+ int64_t Offset2, bool OffsetIsScalable2,
+ unsigned ClusterSize,
+ unsigned NumBytes) const override;
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
int64_t Offset1, unsigned NumLoads) const override;
@@ -266,6 +274,11 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ Register DestReg, unsigned SubIdx,
+ const MachineInstr &Orig,
+ const TargetRegisterInfo &TRI) const override;
+
// Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
// instructions. Returns a pair of generated instructions.
// Can split either post-RA with physical registers or pre-RA with
@@ -395,12 +408,20 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::VALU;
}
+ static bool isImage(const MachineInstr &MI) {
+ return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI);
+ }
+
+ bool isImage(uint16_t Opcode) const {
+ return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode);
+ }
+
static bool isVMEM(const MachineInstr &MI) {
- return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
+ return isMUBUF(MI) || isMTBUF(MI) || isImage(MI);
}
bool isVMEM(uint16_t Opcode) const {
- return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
+ return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode);
}
static bool isSOP1(const MachineInstr &MI) {
@@ -525,6 +546,22 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::DS;
}
+ static bool isLDSDMA(const MachineInstr &MI) {
+ return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI));
+ }
+
+ bool isLDSDMA(uint16_t Opcode) {
+ return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode));
+ }
+
+ static bool isGWS(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::GWS;
+ }
+
+ bool isGWS(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::GWS;
+ }
+
bool isAlwaysGDS(uint16_t Opcode) const;
static bool isMIMG(const MachineInstr &MI) {
@@ -535,6 +572,22 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::MIMG;
}
+ static bool isVIMAGE(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
+ }
+
+ bool isVIMAGE(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
+ }
+
+ static bool isVSAMPLE(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
+ }
+
+ bool isVSAMPLE(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
+ }
+
static bool isGather4(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
}
@@ -622,6 +675,10 @@ public:
SIInstrFlags::IsAtomicNoRet);
}
+ static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
+ return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
+ }
+
static bool isWQM(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::WQM;
}
@@ -654,9 +711,21 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
}
+ bool isSpillOpcode(uint16_t Opcode) const {
+ return get(Opcode).TSFlags &
+ (SIInstrFlags::SGPRSpill | SIInstrFlags::VGPRSpill);
+ }
+
static bool isWWMRegSpillOpcode(uint16_t Opcode) {
return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
- Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE;
+ Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
+ Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
+ Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
+ }
+
+ static bool isChainCallOpcode(uint64_t Opcode) {
+ return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 ||
+ Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
}
static bool isDPP(const MachineInstr &MI) {
@@ -826,8 +895,34 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
}
+ static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
+ if (isWaitcnt(Opcode))
+ return AMDGPU::S_WAITCNT;
+
+ if (isWaitcntVsCnt(Opcode))
+ return AMDGPU::S_WAITCNT_VSCNT;
+
+ llvm_unreachable("Expected opcode S_WAITCNT/S_WAITCNT_VSCNT");
+ }
+
+ static bool isWaitcnt(unsigned Opcode) {
+ return Opcode == AMDGPU::S_WAITCNT || Opcode == AMDGPU::S_WAITCNT_soft;
+ }
+
+ static bool isWaitcntVsCnt(unsigned Opcode) {
+ return Opcode == AMDGPU::S_WAITCNT_VSCNT ||
+ Opcode == AMDGPU::S_WAITCNT_VSCNT_soft;
+ }
+
+ // "Soft" waitcnt instructions can be relaxed/optimized out by
+ // SIInsertWaitcnts.
+ static bool isSoftWaitcnt(unsigned Opcode) {
+ return Opcode == AMDGPU::S_WAITCNT_soft ||
+ Opcode == AMDGPU::S_WAITCNT_VSCNT_soft;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
- assert(MI.isCopy());
+ assert(isCopyInstr(MI));
Register Dest = MI.getOperand(0).getReg();
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -897,7 +992,7 @@ public:
if (OpIdx >= MI.getDesc().NumOperands)
return false;
- if (MI.isCopy()) {
+ if (isCopyInstr(MI)) {
unsigned Size = getOpSize(MI, OpIdx);
assert(Size == 8 || Size == 4);
@@ -946,12 +1041,12 @@ public:
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, Register Reg,
- bool IsSCCLive) const;
+ const DebugLoc &DL, Register Reg, bool IsSCCLive,
+ SlotIndexes *Indexes = nullptr) const;
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- Register Reg) const;
+ Register Reg, SlotIndexes *Indexes = nullptr) const;
/// Return the correct register class for \p OpNo. For target-specific
/// instructions, this will return the register class that has been defined
@@ -1143,7 +1238,11 @@ public:
CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAGMI *DAG) const override;
- bool isBasicBlockPrologue(const MachineInstr &MI) const override;
+ unsigned getLiveRangeSplitOpcode(Register Reg,
+ const MachineFunction &MF) const override;
+
+ bool isBasicBlockPrologue(const MachineInstr &MI,
+ Register Reg = Register()) const override;
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsPt,
@@ -1176,11 +1275,9 @@ public:
static bool isKillTerminator(unsigned Opcode);
const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
- static bool isLegalMUBUFImmOffset(unsigned Imm) {
- return isUInt<12>(Imm);
- }
+ bool isLegalMUBUFImmOffset(unsigned Imm) const;
- static unsigned getMaxMUBUFImmOffset();
+ static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
Align Alignment = Align(4)) const;
@@ -1197,6 +1294,9 @@ public:
unsigned AddrSpace,
uint64_t FlatVariant) const;
+ /// Returns true if negative offsets are allowed for the given \p FlatVariant.
+ bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
+
/// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
@@ -1378,6 +1478,13 @@ namespace AMDGPU {
} // end namespace AMDGPU
+namespace AMDGPU {
+enum AsmComments {
+ // For sgpr to vgpr spill instructions
+ SGPR_SPILL = MachineInstr::TAsmComments
+};
+} // namespace AMDGPU
+
namespace SI {
namespace KernelInputOffsets {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 044bc4507d3a..173c877b8d29 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -32,6 +32,7 @@ def SIEncodingFamily {
int GFX90A = 8;
int GFX940 = 9;
int GFX11 = 10;
+ int GFX12 = 11;
}
//===----------------------------------------------------------------------===//
@@ -158,36 +159,18 @@ def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
SDTBufferStore,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
-class SDBufferAtomic<string opcode> : SDNode <opcode,
- SDTypeProfile<1, 8,
- [SDTCisVT<2, v4i32>, // rsrc
- SDTCisVT<3, i32>, // vindex(VGPR)
- SDTCisVT<4, i32>, // voffset(VGPR)
- SDTCisVT<5, i32>, // soffset(SGPR)
- SDTCisVT<6, i32>, // offset(imm)
- SDTCisVT<7, i32>, // cachepolicy(imm)
- SDTCisVT<8, i1>]>, // idxen(imm)
- [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
->;
-
-def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
-def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
-def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
-def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
-def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
-def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
-def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
-def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
-def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
-def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
-def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
-def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
-def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
-def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
-def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
-def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
-
-multiclass SDBufferAtomicNoRet {
+multiclass SDBufferAtomic<string opcode> {
+ def "" : SDNode <opcode,
+ SDTypeProfile<1, 8,
+ [SDTCisVT<2, v4i32>, // rsrc
+ SDTCisVT<3, i32>, // vindex(VGPR)
+ SDTCisVT<4, i32>, // voffset(VGPR)
+ SDTCisVT<5, i32>, // soffset(SGPR)
+ SDTCisVT<6, i32>, // offset(imm)
+ SDTCisVT<7, i32>, // cachepolicy(imm)
+ SDTCisVT<8, i1>]>, // idxen(imm)
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
+ >;
def "_noret" : PatFrag<
(ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
node:$offset, node:$cachepolicy, node:$idxen),
@@ -198,28 +181,26 @@ multiclass SDBufferAtomicNoRet {
}
}
-defm SIbuffer_atomic_swap : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_add : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_sub : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_smin : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_umin : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_smax : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_umax : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_and : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_or : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_xor : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_inc : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_dec : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet;
-defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
+defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
+defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
+defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
+defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
+defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
+defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
+defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
+defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
+defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
+defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
+defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
+defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
+defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
+defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
+defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
SDTypeProfile<1, 9,
- [SDTCisVT<0, i32>, // dst
- SDTCisVT<1, i32>, // src
- SDTCisVT<2, i32>, // cmp
- SDTCisVT<3, v4i32>, // rsrc
+ [SDTCisVT<3, v4i32>, // rsrc
SDTCisVT<4, i32>, // vindex(VGPR)
SDTCisVT<5, i32>, // voffset(VGPR)
SDTCisVT<6, i32>, // soffset(SGPR)
@@ -604,14 +585,14 @@ def atomic_store_64_glue : PatFrag <
}
let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
-def atomic_store_8_local_m0 : PatFrag<(ops node:$ptr, node:$val),
- (atomic_store_8_glue node:$ptr, node:$val)>;
-def atomic_store_16_local_m0 : PatFrag<(ops node:$ptr, node:$val),
- (atomic_store_16_glue node:$ptr, node:$val)>;
-def atomic_store_32_local_m0 : PatFrag<(ops node:$ptr, node:$val),
- (atomic_store_32_glue node:$ptr, node:$val)>;
-def atomic_store_64_local_m0 : PatFrag<(ops node:$ptr, node:$val),
- (atomic_store_64_glue node:$ptr, node:$val)>;
+def atomic_store_8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (atomic_store_8_glue node:$val, node:$ptr)>;
+def atomic_store_16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (atomic_store_16_glue node:$val, node:$ptr)>;
+def atomic_store_32_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (atomic_store_32_glue node:$val, node:$ptr)>;
+def atomic_store_64_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (atomic_store_64_glue node:$val, node:$ptr)>;
} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces
@@ -906,11 +887,19 @@ def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
//===----------------------------------------------------------------------===//
def extract_cpol : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8);
+ return CurDAG->getTargetConstant(
+ N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
+ ? AMDGPU::CPol::ALL
+ : AMDGPU::CPol::ALL_pregfx12),
+ SDLoc(N), MVT::i8);
}]>;
def extract_swz : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
+ const bool Swizzle =
+ N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
+ ? AMDGPU::CPol::SWZ
+ : AMDGPU::CPol::SWZ_pregfx12);
+ return CurDAG->getTargetConstant(Swizzle, SDLoc(N), MVT::i8);
}]>;
def set_glc : SDNodeXForm<timm, [{
@@ -938,6 +927,13 @@ def InterpAttr : CustomOperand<i32>;
def InterpAttrChan : ImmOperand<i32>;
+def SplitBarrier : ImmOperand<i32> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_INLINE_SPLIT_BARRIER_INT32";
+ let DecoderMethod = "decodeSplitBarrier";
+ let PrintMethod = "printOperand";
+}
+
def VReg32OrOffClass : AsmOperandClass {
let Name = "VReg32OrOff";
let ParserMethod = "parseVReg32OrOff";
@@ -1044,6 +1040,7 @@ class NamedIntOperand<ValueType Type, string Prefix, string Name = NAME,
class NamedBitOperand<string Id, string Name = NAME>
: CustomOperand<i1, 1, Name> {
+ let PredicateMethod = "isImmTy<AMDGPUOperand::"#ImmTy#">";
let ParserMethod =
"[this](OperandVector &Operands) -> ParseStatus { "#
"return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }";
@@ -1054,8 +1051,8 @@ class NamedBitOperand<string Id, string Name = NAME>
class DefaultOperand<CustomOperand Op, int Value>
: OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>,
- CustomOperandProps<1, Op.ParserMatchClass.Name> {
- let ParserMethod = Op.ParserMatchClass.ParserMethod;
+ CustomOperandProps<1> {
+ let ParserMatchClass = Op.ParserMatchClass;
let PrintMethod = Op.PrintMethod;
}
@@ -1096,6 +1093,10 @@ def highmod : NamedBitOperand<"high", "High">;
def CPol : CustomOperand<i32, 1>;
def CPol_0 : DefaultOperand<CPol, 0>;
def CPol_GLC1 : DefaultOperand<CPol, 1>;
+def CPol_GLC : ValuePredicatedOperand<CPol, "Op.getImm() & CPol::GLC">;
+def CPol_NonGLC : ValuePredicatedOperand<CPol, "!(Op.getImm() & CPol::GLC)", 1>;
+def CPol_GLC_WithDefault : DefaultOperand<CPol_GLC, !shl(1, CPolBit.GLC)>;
+def CPol_NonGLC_WithDefault : DefaultOperand<CPol_NonGLC, 0>;
def TFE : NamedBitOperand<"tfe">;
def UNorm : NamedBitOperand<"unorm">;
@@ -1170,6 +1171,10 @@ class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
}
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
+def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
+ let Name = "RegOrImmWithFPT16InputMods";
+ let PredicateMethod = "isRegOrImmWithFPT16InputMods";
+}
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
@@ -1187,6 +1192,7 @@ class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
}
def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
+def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
@@ -1202,6 +1208,10 @@ class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize>
let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
}
+def IntT16InputModsMatchClass : IntInputModsMatchClass<16> {
+ let Name = "RegOrImmWithIntT16InputMods";
+ let PredicateMethod = "isRegOrImmWithIntT16InputMods";
+}
def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
@@ -1209,6 +1219,7 @@ def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
+def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>;
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
@@ -1463,15 +1474,18 @@ class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
// Returns the register class to use for the destination of VOP[123C]
// instructions for the given VT.
-class getVALUDstForVT<ValueType VT> {
+class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
+ defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
+ VOPDstOperand_t16Lo128),
+ VOPDstOperand<VGPR_32>);
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
- !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
+ !if(!eq(VT.Size, 16), op16,
VOPDstS64orS32)))); // else VT == i1
}
-class getVALUDstForVT_t16<ValueType VT> {
+class getVALUDstForVT_fake16<ValueType VT> {
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
@@ -1489,7 +1503,7 @@ class getSDWADstForVT<ValueType VT> {
// Returns the register class to use for source 0 of VOP[12C]
// instructions for the given VT.
-class getVOPSrc0ForVT<ValueType VT, bit IsTrue16> {
+class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> {
bit isFP = isFloatType<VT>.ret;
RegisterOperand ret =
@@ -1498,7 +1512,7 @@ class getVOPSrc0ForVT<ValueType VT, bit IsTrue16> {
VSrc_f64,
!if(!eq(VT.Value, f16.Value),
!if(IsTrue16,
- VSrcT_f16_Lo128,
+ !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128),
VSrc_f16
),
!if(!eq(VT.Value, v2f16.Value),
@@ -1514,7 +1528,7 @@ class getVOPSrc0ForVT<ValueType VT, bit IsTrue16> {
VSrc_b64,
!if(!eq(VT.Value, i16.Value),
!if(IsTrue16,
- VSrcT_b16_Lo128,
+ !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128),
VSrc_b16
),
!if(!eq(VT.Value, v2i16.Value),
@@ -1539,13 +1553,17 @@ class getVregSrcForVT<ValueType VT> {
VGPR_32))));
}
-class getVregSrcForVT_t16<ValueType VT> {
+class getVregSrcForVT_t16<ValueType VT, bit IsFake16 = 1> {
RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
!if(!eq(VT.Size, 96), VReg_96,
!if(!eq(VT.Size, 64), VReg_64,
!if(!eq(VT.Size, 48), VReg_64,
- !if(!eq(VT.Size, 16), VGPR_32_Lo128,
+ !if(!eq(VT.Size, 16),
+ !if(IsFake16, VGPR_32_Lo128, VGPR_16_Lo128),
VGPR_32)))));
+
+ RegisterOperand op = !if (!and(!eq(VT.Size, 16), !not(IsFake16)),
+ VGPRSrc_16_Lo128, RegisterOperand<ret>);
}
class getSDWASrcForVT <ValueType VT> {
@@ -1557,7 +1575,7 @@ class getSDWASrcForVT <ValueType VT> {
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
-class getVOP3SrcForVT<ValueType VT> {
+class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
bit isFP = isFloatType<VT>.ret;
RegisterOperand ret =
!if(!eq(VT.Size, 128),
@@ -1574,7 +1592,7 @@ class getVOP3SrcForVT<ValueType VT> {
SSrc_i1,
!if(isFP,
!if(!eq(VT.Value, f16.Value),
- VSrc_f16,
+ !if(IsTrue16, VSrcT_f16, VSrc_f16),
!if(!eq(VT.Value, v2f16.Value),
VSrc_v2f16,
!if(!eq(VT.Value, v4f16.Value),
@@ -1584,7 +1602,7 @@ class getVOP3SrcForVT<ValueType VT> {
)
),
!if(!eq(VT.Value, i16.Value),
- VSrc_b16,
+ !if(IsTrue16, VSrcT_b16, VSrc_b16),
!if(!eq(VT.Value, v2i16.Value),
VSrc_v2b16,
VSrc_b32
@@ -1631,18 +1649,15 @@ class isModifierType<ValueType SrcVT> {
}
// Return type of input modifiers operand for specified input operand
-class getSrcMod <ValueType VT> {
+class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
bit isFP = isFloatType<VT>.ret;
bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
- !if(isFP,
- !if(!eq(VT.Value, f16.Value),
- FP16InputMods,
- FP32InputMods
- ),
- Int32InputMods)
- );
+ !if(!eq(VT.Size, 16),
+ !if(isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods),
+ !if(IsTrue16, IntT16InputMods, IntOpSelMods)),
+ !if(isFP, FP32InputMods, Int32InputMods)));
}
class getOpSelMod <ValueType VT> {
@@ -2262,6 +2277,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field list<ValueType> ArgVT = _ArgVT;
field bit EnableClamp = _EnableClamp;
field bit IsTrue16 = 0;
+ field bit IsRealTrue16 = 0;
field ValueType DstVT = ArgVT[0];
field ValueType Src0VT = ArgVT[1];
@@ -2281,7 +2297,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret;
field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
- field RegisterOperand Src1VOP3DPP = VRegSrc_32;
+ field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
@@ -2454,8 +2470,32 @@ class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.
// class, so copy changes to this class in those profiles
class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let IsTrue16 = 1;
+ let IsRealTrue16 = 1;
+ // Most DstVT are 16-bit, but not all.
+ let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
+ let DstRC64 = getVALUDstForVT<DstVT>.ret;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
+ let Src1RC32 = getVregSrcForVT_t16<Src1VT, 0 /*IsFake16*/>.op;
+ let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
+ let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
+ let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+
+ let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret;
+ let Src0Mod = getSrcMod<Src0VT, 1 /*IsTrue16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret;
+}
+
+class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
+ let IsTrue16 = 1;
// Most DstVT are 16-bit, but not all
- let DstRC = getVALUDstForVT_t16<DstVT>.ret;
+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
let DstRC64 = getVALUDstForVT<DstVT>.ret;
let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
@@ -2733,7 +2773,8 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.SDWA10)],
[!cast<string>(SIEncodingFamily.GFX90A)],
[!cast<string>(SIEncodingFamily.GFX940)],
- [!cast<string>(SIEncodingFamily.GFX11)]];
+ [!cast<string>(SIEncodingFamily.GFX11)],
+ [!cast<string>(SIEncodingFamily.GFX12)]];
}
// Get equivalent SOPK instruction.
@@ -2872,14 +2913,14 @@ def getVOPDBaseFromComponent : SearchIndex {
def VOPDPairs : GenericTable {
let FilterClass = "VOPD_Base";
let CppTypeName = "VOPDInfo";
- let Fields = ["Opcode", "OpX", "OpY"];
+ let Fields = ["Opcode", "OpX", "OpY", "SubTgt"];
let PrimaryKey = ["Opcode"];
let PrimaryKeyName = "getVOPDOpcodeHelper";
}
def getVOPDInfoFromComponentOpcodes : SearchIndex {
let Table = VOPDPairs;
- let Key = ["OpX", "OpY"];
+ let Key = ["OpX", "OpY", "SubTgt"];
}
include "SIInstructions.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2edebccef7d8..f9bc623abcd0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -132,7 +132,7 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
let isAsCheapAsAMove = 1;
let isMoveImm = 1;
let SchedRW = [Write64Bit];
- let Size = 16; // Needs maximum 2 v_mov_b32 instructions 8 byte long each.
+ let Size = 4;
let UseNamedOperandTable = 1;
}
@@ -149,8 +149,9 @@ def S_MOV_B64_IMM_PSEUDO : SPseudoInstSI <(outs SReg_64:$sdst),
let isAsCheapAsAMove = 1;
let isMoveImm = 1;
let SchedRW = [WriteSALU, Write64Bit];
- let Size = 16; // Needs maximum 2 s_mov_b32 instructions 8 byte long each.
+ let Size = 4;
let Uses = [];
+ let UseNamedOperandTable = 1;
}
// Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the
@@ -172,6 +173,13 @@ def STRICT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
+def WWM_COPY : SPseudoInstSI <
+ (outs unknown:$dst), (ins unknown:$src)> {
+ let hasSideEffects = 0;
+ let isAsCheapAsAMove = 1;
+ let isConvergent = 1;
+}
+
def ENTER_STRICT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
let Uses = [EXEC];
let Defs = [EXEC, SCC];
@@ -251,6 +259,12 @@ def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst),
}
} // End Defs = [SCC]
+def : GCNPat<(i32 (int_amdgcn_set_inactive_chain_arg i32:$src, i32:$inactive)),
+ (V_SET_INACTIVE_B32 VGPR_32:$src, VGPR_32:$inactive)>;
+
+def : GCNPat<(i64 (int_amdgcn_set_inactive_chain_arg i64:$src, i64:$inactive)),
+ (V_SET_INACTIVE_B64 VReg_64:$src, VReg_64:$inactive)>;
+
let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
def WAVE_REDUCE_UMIN_PSEUDO_U32 : VPseudoInstSI <(outs SGPR_32:$sdst),
(ins VSrc_b32: $src, VSrc_b32:$strategy),
@@ -263,7 +277,7 @@ let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses
}
}
-let usesCustomInserter = 1, Defs = [VCC, EXEC] in {
+let usesCustomInserter = 1, Defs = [VCC] in {
def V_ADD_U64_PSEUDO : VPseudoInstSI <
(outs VReg_64:$vdst), (ins VSrc_b64:$src0, VSrc_b64:$src1),
[(set VReg_64:$vdst, (DivergentBinFrag<add> i64:$src0, i64:$src1))]
@@ -273,7 +287,7 @@ def V_SUB_U64_PSEUDO : VPseudoInstSI <
(outs VReg_64:$vdst), (ins VSrc_b64:$src0, VSrc_b64:$src1),
[(set VReg_64:$vdst, (DivergentBinFrag<sub> i64:$src0, i64:$src1))]
>;
-} // End usesCustomInserter = 1, Defs = [VCC, EXEC]
+} // End usesCustomInserter = 1, Defs = [VCC]
let usesCustomInserter = 1, Defs = [SCC] in {
def S_ADD_U64_PSEUDO : SPseudoInstSI <
@@ -657,6 +671,50 @@ def : GCNPat<
(SI_TCRETURN_GFX Gfx_CCR_SGPR_64:$src0, (i64 0), i32imm:$fpdiff)
>;
+// Pseudo for the llvm.amdgcn.cs.chain intrinsic.
+// This is essentially a tail call, but it also takes a mask to put in EXEC
+// right before jumping to the callee.
+class SI_CS_CHAIN_TC<
+ ValueType execvt, Predicate wavesizepred,
+ RegisterOperand execrc = getSOPSrcForVT<execvt>.ret>
+ : SPseudoInstSI <(outs),
+ (ins CCR_SGPR_64:$src0, unknown:$callee, i32imm:$fpdiff, execrc:$exec)> {
+ let FixedSize = 0;
+ let isCall = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isReturn = 1;
+ let UseNamedOperandTable = 1;
+ let SchedRW = [WriteBranch];
+ let isConvergent = 1;
+
+ let WaveSizePredicate = wavesizepred;
+}
+
+def SI_CS_CHAIN_TC_W32 : SI_CS_CHAIN_TC<i32, isWave32>;
+def SI_CS_CHAIN_TC_W64 : SI_CS_CHAIN_TC<i64, isWave64>;
+
+// Handle selecting direct & indirect calls via SI_CS_CHAIN_TC_W32/64
+multiclass si_cs_chain_tc_pattern<
+ dag callee, ValueType execvt, RegisterOperand execrc, Instruction tc> {
+def : GCNPat<
+ (AMDGPUtc_return_chain i64:$src0, callee, (i32 timm:$fpdiff), execvt:$exec),
+ (tc CCR_SGPR_64:$src0, callee, i32imm:$fpdiff, execrc:$exec)
+>;
+}
+
+multiclass si_cs_chain_tc_patterns<
+ ValueType execvt,
+ RegisterOperand execrc = getSOPSrcForVT<execvt>.ret,
+ Instruction tc = !if(!eq(execvt, i32), SI_CS_CHAIN_TC_W32, SI_CS_CHAIN_TC_W64)
+ > {
+ defm direct: si_cs_chain_tc_pattern<(tglobaladdr:$callee), execvt, execrc, tc>;
+ defm indirect: si_cs_chain_tc_pattern<(i64 0), execvt, execrc, tc>;
+}
+
+defm : si_cs_chain_tc_patterns<i32>;
+defm : si_cs_chain_tc_patterns<i64>;
+
def ADJCALLSTACKUP : SPseudoInstSI<
(outs), (ins i32imm:$amt0, i32imm:$amt1),
[(callseq_start timm:$amt0, timm:$amt1)],
@@ -867,6 +925,28 @@ defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
+let SGPRSpill = 1, VALU = 1, isConvergent = 1 in {
+def SI_SPILL_S32_TO_VGPR : PseudoInstSI <(outs VGPR_32:$vdst),
+ (ins SReg_32:$src0, i32imm:$src1, VGPR_32:$vdst_in)> {
+ let Size = 4;
+ let FixedSize = 1;
+ let IsNeverUniform = 1;
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Constraints = "$vdst = $vdst_in";
+}
+
+def SI_RESTORE_S32_FROM_VGPR : PseudoInstSI <(outs SReg_32:$sdst),
+ (ins VGPR_32:$src0, i32imm:$src1)> {
+ let Size = 4;
+ let FixedSize = 1;
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+}
+} // End SGPRSpill = 1, VALU = 1, isConvergent = 1
+
// VGPR or AGPR spill instructions. In case of AGPR spilling a temp register
// needs to be used and an extra instruction to move between VGPR and AGPR.
// UsesTmp adds to the total size of an expanded spill in this case.
@@ -945,8 +1025,10 @@ defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>;
defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;
-let isConvergent = 1 in
-defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR <VGPR_32>;
+let isConvergent = 1 in {
+ defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR <VGPR_32>;
+ defm SI_SPILL_WWM_AV32 : SI_SPILL_VGPR <AV_32, 1>;
+}
def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
(outs SReg_64:$dst),
@@ -1587,6 +1669,16 @@ def : BitConvert <v12i32, v12f32, VReg_384>;
def : BitConvert <v12f32, v12i32, VReg_384>;
// 512-bit bitcast
+def : BitConvert <v32f16, v32i16, VReg_512>;
+def : BitConvert <v32i16, v32f16, VReg_512>;
+def : BitConvert <v32f16, v16i32, VReg_512>;
+def : BitConvert <v32f16, v16f32, VReg_512>;
+def : BitConvert <v16f32, v32f16, VReg_512>;
+def : BitConvert <v16i32, v32f16, VReg_512>;
+def : BitConvert <v32i16, v16i32, VReg_512>;
+def : BitConvert <v32i16, v16f32, VReg_512>;
+def : BitConvert <v16f32, v32i16, VReg_512>;
+def : BitConvert <v16i32, v32i16, VReg_512>;
def : BitConvert <v16i32, v16f32, VReg_512>;
def : BitConvert <v16f32, v16i32, VReg_512>;
def : BitConvert <v8i64, v8f64, VReg_512>;
@@ -1632,8 +1724,10 @@ def : ClampPat<V_MAX_F32_e64, f32>;
def : ClampPat<V_MAX_F64_e64, f64>;
let SubtargetPredicate = NotHasTrue16BitInsts in
def : ClampPat<V_MAX_F16_e64, f16>;
-let SubtargetPredicate = HasTrue16BitInsts in
+let SubtargetPredicate = UseRealTrue16Insts in
def : ClampPat<V_MAX_F16_t16_e64, f16>;
+let SubtargetPredicate = UseFakeTrue16Insts in
+def : ClampPat<V_MAX_F16_fake16_e64, f16>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
@@ -1922,6 +2016,29 @@ def : GCNPat <
(V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
>;
+// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
+// immediate and wil be expanded as needed, but we will only use these patterns
+// for values which can be encoded.
+def : GCNPat <
+ (VGPRImm<(i64 imm)>:$imm),
+ (V_MOV_B64_PSEUDO imm:$imm)
+>;
+
+def : GCNPat <
+ (VGPRImm<(f64 fpimm)>:$imm),
+ (V_MOV_B64_PSEUDO (f64 (bitcast_fpimm_to_i64 $imm)))
+>;
+
+def : GCNPat <
+ (i64 imm:$imm),
+ (S_MOV_B64_IMM_PSEUDO imm:$imm)
+>;
+
+def : GCNPat <
+ (f64 fpimm:$imm),
+ (S_MOV_B64_IMM_PSEUDO (i64 (bitcast_fpimm_to_i64 fpimm:$imm)))
+>;
+
def : GCNPat <
(f32 fpimm:$imm),
(S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
@@ -2306,8 +2423,16 @@ class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, S
(i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE))
>;
-def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
-def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
+let OtherPredicates = [NotHasTrue16BitInsts] in {
+ def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
+} // end OtherPredicates = [NotHasTrue16BitInsts]
+
+let OtherPredicates = [HasTrue16BitInsts] in {
+ def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
+} // end OtherPredicates = [HasTrue16BitInsts]
+
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>;
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, i64, f64, fp_to_uint>;
@@ -2679,12 +2804,12 @@ def : GCNPat<
let OtherPredicates = [HasTrue16BitInsts] in {
def : GCNPat<
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
- (V_MUL_F16_t16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
+ (V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
>;
def : GCNPat<
(fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
- (V_MUL_F16_t16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
+ (V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
>;
} // End OtherPredicates
@@ -2703,6 +2828,13 @@ def : GCNPat<
(V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src)
>;
+let SubtargetPredicate = HasPackedFP32Ops in {
+def : GCNPat<
+ (fcanonicalize (v2f32 (VOP3PMods v2f32:$src, i32:$src_mods))),
+ (V_PK_MUL_F32 0, CONST.FP32_ONE, $src_mods, $src)
+>;
+}
+
// TODO: Handle fneg like other types.
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
@@ -2734,7 +2866,7 @@ multiclass SelectCanonicalizeAsMax<
def : GCNPat<
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
- (V_MAX_F16_t16_e64 $src_mods, $src, $src_mods, $src, 0, 0)> {
+ (V_MAX_F16_fake16_e64 $src_mods, $src, $src_mods, $src, 0, 0)> {
let OtherPredicates = !listconcat(f16_preds, [Has16BitInsts, HasTrue16BitInsts]);
}
@@ -3309,6 +3441,81 @@ defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax>;
defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax>;
} // End Predicates = [isGFX9Plus]
+let OtherPredicates = [isGFX12Plus] in {
+def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
+def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
+def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
+def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
+}
+
+// Convert a floating-point power of 2 to the integer exponent.
+def FPPow2ToExponentXForm : SDNodeXForm<fpimm, [{
+ const auto &APF = N->getValueAPF();
+ int Log2 = APF.getExactLog2Abs();
+ assert(Log2 != INT_MIN);
+ return CurDAG->getTargetConstant(Log2, SDLoc(N), MVT::i32);
+}]>;
+
+// Check if a floating point value is a power of 2 floating-point
+// immediate where it's preferable to emit a multiply by as an
+// ldexp. We skip over 0.5 to 4.0 as those are inline immediates
+// anyway.
+def fpimm_pos_pow2_prefer_ldexp_f64 : FPImmLeaf<f64, [{
+ if (Imm.isNegative())
+ return false;
+
+ int Exp = Imm.getExactLog2Abs();
+ // Prefer leaving the FP inline immediates as they are.
+ // 0.5, 1.0, 2.0, 4.0
+
+ // For f64 ldexp is always better than materializing a 64-bit
+ // constant.
+ return Exp != INT_MIN && (Exp < -1 || Exp > 2);
+ }], FPPow2ToExponentXForm
+>;
+
+def fpimm_neg_pow2_prefer_ldexp_f64 : FPImmLeaf<f64, [{
+ if (!Imm.isNegative())
+ return false;
+ int Exp = Imm.getExactLog2Abs();
+ // Prefer leaving the FP inline immediates as they are.
+ // 0.5, 1.0, 2.0, 4.0
+
+ // For f64 ldexp is always better than materializing a 64-bit
+ // constant.
+ return Exp != INT_MIN && (Exp < -1 || Exp > 2);
+ }], FPPow2ToExponentXForm
+>;
+
+// f64 is different because we also want to handle cases that may
+// require materialization of the exponent.
+// TODO: If we know f64 ops are fast, prefer add (ldexp x, N), y over fma
+// TODO: For f32/f16, it's not a clear win on code size to use ldexp
+// in place of mul since we have to use the vop3 form. Are there power
+// savings or some other reason to prefer ldexp over mul?
+def : GCNPat<
+ (any_fmul (f64 (VOP3Mods f64:$src0, i32:$src0_mods)),
+ fpimm_pos_pow2_prefer_ldexp_f64:$src1),
+ (V_LDEXP_F64_e64 i32:$src0_mods, VSrc_b64:$src0,
+ 0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1))))
+>;
+
+def : GCNPat<
+ (any_fmul f64:$src0, fpimm_neg_pow2_prefer_ldexp_f64:$src1),
+ (V_LDEXP_F64_e64 SRCMODS.NEG, VSrc_b64:$src0,
+ 0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1))))
+>;
+
+// We want to avoid using VOP3Mods which could pull in another fneg
+// which we would need to be re-negated (which should never happen in
+// practice). I don't see a way to apply an SDNodeXForm that accounts
+// for a second operand.
+def : GCNPat<
+ (any_fmul (fabs f64:$src0), fpimm_neg_pow2_prefer_ldexp_f64:$src1),
+ (V_LDEXP_F64_e64 SRCMODS.NEG_ABS, VSrc_b64:$src0,
+ 0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1))))
+>;
+
class AMDGPUGenericInstruction : GenericInstruction {
let Namespace = "AMDGPU";
}
@@ -3477,8 +3684,8 @@ def G_AMDGPU_ATOMIC_FMIN : G_ATOMICRMW_OP;
def G_AMDGPU_ATOMIC_FMAX : G_ATOMICRMW_OP;
}
-class BufferAtomicGenericInstruction<bit NoRtn = 0> : AMDGPUGenericInstruction {
- let OutOperandList = !if(NoRtn, (outs), (outs type0:$dst));
+class BufferAtomicGenericInstruction : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$vdata, type1:$rsrc, type2:$vindex, type2:$voffset,
type2:$soffset, untyped_imm_0:$offset,
untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index df522a9099c0..abb72e8e63c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -30,6 +30,7 @@ private:
const SIInstrInfo *TII = nullptr;
MachineDominatorTree *MDT = nullptr;
+ void expandChainCall(MachineInstr &MI);
void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
public:
@@ -116,6 +117,18 @@ static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
MDT->getBase().applyUpdates(DTUpdates);
}
+void SILateBranchLowering::expandChainCall(MachineInstr &MI) {
+ // This is a tail call that needs to be expanded into at least
+ // 2 instructions, one for setting EXEC and one for the actual tail call.
+ constexpr unsigned ExecIdx = 3;
+
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(MovOpc), ExecReg)
+ ->addOperand(MI.getOperand(ExecIdx));
+ MI.removeOperand(ExecIdx);
+
+ MI.setDesc(TII->get(AMDGPU::SI_TCRETURN));
+}
+
void SILateBranchLowering::earlyTerm(MachineInstr &MI,
MachineBasicBlock *EarlyExitBlock) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -158,6 +171,12 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
}
break;
+ case AMDGPU::SI_CS_CHAIN_TC_W32:
+ case AMDGPU::SI_CS_CHAIN_TC_W64:
+ expandChainCall(MI);
+ MadeChange = true;
+ break;
+
case AMDGPU::SI_EARLY_TERMINATE_SCC0:
EarlyTermInstrs.push_back(&MI);
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index c252d30e250e..9c85ff3c43e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -161,8 +161,10 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
if (!AddrOp->isReg())
return false;
- // TODO: We should be able to merge physical reg addresses.
- if (AddrOp->getReg().isPhysical())
+ // TODO: We should be able to merge instructions with other physical reg
+ // addresses too.
+ if (AddrOp->getReg().isPhysical() &&
+ AddrOp->getReg() != AMDGPU::SGPR_NULL)
return false;
// If an address has only one use then there will be no other
@@ -320,7 +322,7 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
// FIXME: Handle d16 correctly
return AMDGPU::getMUBUFElements(Opc);
}
- if (TII.isMIMG(MI)) {
+ if (TII.isImage(MI)) {
uint64_t DMaskImm =
TII.getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm();
return llvm::popcount(DMaskImm);
@@ -350,6 +352,9 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::FLAT_LOAD_DWORDX2:
case AMDGPU::FLAT_STORE_DWORDX2:
return 2;
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
+ case AMDGPU::S_LOAD_DWORDX3_IMM:
case AMDGPU::GLOBAL_LOAD_DWORDX3:
case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
case AMDGPU::GLOBAL_STORE_DWORDX3:
@@ -398,15 +403,23 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact:
case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact:
+ case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_OFFEN:
+ case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_OFFEN_exact:
+ case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_OFFSET:
+ case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_OFFSET_exact:
return BUFFER_LOAD;
case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
+ case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_OFFEN:
+ case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact:
+ case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_OFFSET:
+ case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_OFFSET_exact:
return BUFFER_STORE;
}
}
- if (TII.isMIMG(Opc)) {
+ if (TII.isImage(Opc)) {
// Ignore instructions encoded without vaddr.
if (!AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) &&
!AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr0))
@@ -424,35 +437,50 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
switch (AMDGPU::getMTBUFBaseOpcode(Opc)) {
default:
return UNKNOWN;
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_BOTHEN:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_BOTHEN_exact:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_IDXEN:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_IDXEN_exact:
case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFEN:
case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFEN_exact:
case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET:
case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET_exact:
- case AMDGPU::TBUFFER_LOAD_FORMAT_X_IDXEN:
- case AMDGPU::TBUFFER_LOAD_FORMAT_X_IDXEN_exact:
- case AMDGPU::TBUFFER_LOAD_FORMAT_X_BOTHEN:
- case AMDGPU::TBUFFER_LOAD_FORMAT_X_BOTHEN_exact:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN_exact:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_VBUFFER_IDXEN:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_VBUFFER_IDXEN_exact:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN_exact:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET_exact:
return TBUFFER_LOAD;
case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN:
case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN_exact:
case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFSET:
case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFSET_exact:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact:
return TBUFFER_STORE;
}
}
return UNKNOWN;
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return S_BUFFER_LOAD_IMM;
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
return S_BUFFER_LOAD_SGPR_IMM;
case AMDGPU::S_LOAD_DWORD_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_LOAD_DWORDX3_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM:
return S_LOAD_IMM;
@@ -505,7 +533,7 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
default:
if (TII.isMUBUF(Opc))
return AMDGPU::getMUBUFBaseOpcode(Opc);
- if (TII.isMIMG(Opc)) {
+ if (TII.isImage(Opc)) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
assert(Info);
return Info->BaseOpcode;
@@ -524,16 +552,19 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
return Opc;
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return AMDGPU::S_BUFFER_LOAD_DWORD_IMM;
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
return AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM;
case AMDGPU::S_LOAD_DWORD_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_LOAD_DWORDX3_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM:
return AMDGPU::S_LOAD_DWORD_IMM;
@@ -600,11 +631,13 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
return Result;
}
- if (TII.isMIMG(Opc)) {
+ if (TII.isImage(Opc)) {
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
if (VAddr0Idx >= 0) {
- int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
- Result.NumVAddrs = SRsrcIdx - VAddr0Idx;
+ int RsrcName =
+ TII.isMIMG(Opc) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
+ int RsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RsrcName);
+ Result.NumVAddrs = RsrcIdx - VAddr0Idx;
} else {
Result.VAddr = true;
}
@@ -631,16 +664,19 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
return Result;
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
Result.SOffset = true;
[[fallthrough]];
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
case AMDGPU::S_LOAD_DWORD_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_LOAD_DWORDX3_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM:
Result.SBase = true;
@@ -739,6 +775,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
}
AddressRegs Regs = getRegs(Opc, *LSO.TII);
+ bool isVIMAGEorVSAMPLE = LSO.TII->isVIMAGE(*I) || LSO.TII->isVSAMPLE(*I);
NumAddresses = 0;
for (unsigned J = 0; J < Regs.NumVAddrs; J++)
@@ -751,8 +788,8 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
AddrIdx[NumAddresses++] =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sbase);
if (Regs.SRsrc)
- AddrIdx[NumAddresses++] =
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ AddrIdx[NumAddresses++] = AMDGPU::getNamedOperandIdx(
+ Opc, isVIMAGEorVSAMPLE ? AMDGPU::OpName::rsrc : AMDGPU::OpName::srsrc);
if (Regs.SOffset)
AddrIdx[NumAddresses++] =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset);
@@ -763,8 +800,8 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
AddrIdx[NumAddresses++] =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
if (Regs.SSamp)
- AddrIdx[NumAddresses++] =
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::ssamp);
+ AddrIdx[NumAddresses++] = AMDGPU::getNamedOperandIdx(
+ Opc, isVIMAGEorVSAMPLE ? AMDGPU::OpName::samp : AMDGPU::OpName::ssamp);
assert(NumAddresses <= MaxAddressRegs);
for (unsigned J = 0; J < NumAddresses; J++)
@@ -871,6 +908,9 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
unsigned MaxMask = std::max(CI.DMask, Paired.DMask);
unsigned MinMask = std::min(CI.DMask, Paired.DMask);
+ if (!MaxMask)
+ return false;
+
unsigned AllowedBitsForMin = llvm::countr_zero(MaxMask);
if ((1u << AllowedBitsForMin) <= MinMask)
return false;
@@ -964,6 +1004,17 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
return false;
if (CI.CPol != Paired.CPol)
return false;
+ if (CI.InstClass == S_LOAD_IMM || CI.InstClass == S_BUFFER_LOAD_IMM ||
+ CI.InstClass == S_BUFFER_LOAD_SGPR_IMM) {
+ // Reject cases like:
+ // dword + dwordx2 -> dwordx3
+ // dword + dwordx3 -> dwordx4
+ // If we tried to combine these cases, we would fail to extract a subreg
+ // for the result of the second load due to SGPR alignment requirements.
+ if (CI.Width != Paired.Width &&
+ (CI.Width < Paired.Width) == (CI.Offset < Paired.Offset))
+ return false;
+ }
return true;
}
@@ -1043,6 +1094,8 @@ bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
case 4:
case 8:
return true;
+ case 3:
+ return STM.hasScalarDwordx3Loads();
}
}
}
@@ -1671,6 +1724,8 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
return 0;
case 2:
return AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
+ case 3:
+ return AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM;
case 4:
return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
case 8:
@@ -1682,6 +1737,8 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
return 0;
case 2:
return AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM;
+ case 3:
+ return AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM;
case 4:
return AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM;
case 8:
@@ -1693,6 +1750,8 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
return 0;
case 2:
return AMDGPU::S_LOAD_DWORDX2_IMM;
+ case 3:
+ return AMDGPU::S_LOAD_DWORDX3_IMM;
case 4:
return AMDGPU::S_LOAD_DWORDX4_IMM;
case 8:
@@ -1814,6 +1873,8 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
return nullptr;
case 2:
return &AMDGPU::SReg_64_XEXECRegClass;
+ case 3:
+ return &AMDGPU::SGPR_96RegClass;
case 4:
return &AMDGPU::SGPR_128RegClass;
case 8:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 00cb5b2878f4..f178324dbbe2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -79,6 +79,7 @@ private:
SetVector<MachineInstr*> LoweredEndCf;
DenseSet<Register> LoweredIf;
SmallSet<MachineBasicBlock *, 4> KillBlocks;
+ SmallSet<Register, 8> RecomputeRegs;
const TargetRegisterClass *BoolRC = nullptr;
unsigned AndOpc;
@@ -297,8 +298,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
// FIXME: Is there a better way of adjusting the liveness? It shouldn't be
// hard to add another def here but I'm not sure how to correctly update the
// valno.
- LIS->removeInterval(SaveExecReg);
- LIS->createAndComputeVirtRegInterval(SaveExecReg);
+ RecomputeRegs.insert(SaveExecReg);
LIS->createAndComputeVirtRegInterval(Tmp);
if (!SimpleIf)
LIS->createAndComputeVirtRegInterval(CopyReg);
@@ -309,6 +309,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
const DebugLoc &DL = MI.getDebugLoc();
Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
MachineBasicBlock::iterator Start = MBB.begin();
@@ -319,7 +320,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
.add(MI.getOperand(1)); // Saved EXEC
if (LV)
- LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *OrSaveExec);
+ LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec);
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
@@ -331,9 +332,6 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
.addReg(Exec)
.addReg(SaveReg);
- if (LIS)
- LIS->InsertMachineInstrInMaps(*And);
-
MachineInstr *Xor =
BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec)
.addReg(Exec)
@@ -356,12 +354,13 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
MI.eraseFromParent();
LIS->InsertMachineInstrInMaps(*OrSaveExec);
+ LIS->InsertMachineInstrInMaps(*And);
LIS->InsertMachineInstrInMaps(*Xor);
LIS->InsertMachineInstrInMaps(*Branch);
- LIS->removeInterval(DstReg);
- LIS->createAndComputeVirtRegInterval(DstReg);
+ RecomputeRegs.insert(SrcReg);
+ RecomputeRegs.insert(DstReg);
LIS->createAndComputeVirtRegInterval(SaveReg);
// Let this be recomputed.
@@ -388,8 +387,9 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
// AND the break condition operand with exec, then OR that into the "loop
// exit" mask.
MachineInstr *And = nullptr, *Or = nullptr;
+ Register AndReg;
if (!SkipAnding) {
- Register AndReg = MRI->createVirtualRegister(BoolRC);
+ AndReg = MRI->createVirtualRegister(BoolRC);
And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
.addReg(Exec)
.add(MI.getOperand(1));
@@ -398,8 +398,6 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.addReg(AndReg)
.add(MI.getOperand(2));
- if (LIS)
- LIS->createAndComputeVirtRegInterval(AndReg);
} else {
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.add(MI.getOperand(1))
@@ -411,9 +409,13 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
LV->replaceKillInstruction(MI.getOperand(2).getReg(), MI, *Or);
if (LIS) {
- if (And)
- LIS->InsertMachineInstrInMaps(*And);
LIS->ReplaceMachineInstrInMaps(MI, *Or);
+ if (And) {
+ // Read of original operand 1 is on And now not Or.
+ RecomputeRegs.insert(And->getOperand(2).getReg());
+ LIS->InsertMachineInstrInMaps(*And);
+ LIS->createAndComputeVirtRegInterval(AndReg);
+ }
}
MI.eraseFromParent();
@@ -436,6 +438,7 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
.add(MI.getOperand(1));
if (LIS) {
+ RecomputeRegs.insert(MI.getOperand(0).getReg());
LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
LIS->InsertMachineInstrInMaps(*Branch);
}
@@ -714,11 +717,13 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
// This should be before all vector instructions.
- BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
+ MachineInstr *InitMI = BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
.addImm(MI.getOperand(0).getImm());
- if (LIS)
+ if (LIS) {
LIS->RemoveMachineInstrFromMaps(MI);
+ LIS->InsertMachineInstrInMaps(*InitMI);
+ }
MI.eraseFromParent();
return;
}
@@ -789,8 +794,7 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
LIS->InsertMachineInstrInMaps(*CmpMI);
LIS->InsertMachineInstrInMaps(*CmovMI);
- LIS->removeInterval(InputReg);
- LIS->createAndComputeVirtRegInterval(InputReg);
+ RecomputeRegs.insert(InputReg);
LIS->createAndComputeVirtRegInterval(CountReg);
}
@@ -807,7 +811,7 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
while (!MBB.predecessors().empty()) {
MachineBasicBlock *P = *MBB.pred_begin();
- if (P->getFallThrough() == &MBB)
+ if (P->getFallThrough(false) == &MBB)
FallThrough = P;
P->ReplaceUsesOfBlockWith(&MBB, Succ);
}
@@ -828,14 +832,13 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
MBB.clear();
MBB.eraseFromParent();
if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
- if (!Succ->canFallThrough()) {
- MachineFunction *MF = FallThrough->getParent();
- MachineFunction::iterator FallThroughPos(FallThrough);
- MF->splice(std::next(FallThroughPos), Succ);
- } else
- BuildMI(*FallThrough, FallThrough->end(),
- FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
- .addMBB(Succ);
+ // Note: we cannot update block layout and preserve live intervals;
+ // hence we must insert a branch.
+ MachineInstr *BranchMI = BuildMI(*FallThrough, FallThrough->end(),
+ FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+ .addMBB(Succ);
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*BranchMI);
}
return true;
@@ -845,8 +848,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
- EnableOptimizeEndCf =
- RemoveRedundantEndcf && MF.getTarget().getOptLevel() > CodeGenOpt::None;
+ EnableOptimizeEndCf = RemoveRedundantEndcf &&
+ MF.getTarget().getOptLevel() > CodeGenOptLevel::None;
// This doesn't actually need LiveIntervals, but we can preserve them.
LIS = getAnalysisIfAvailable<LiveIntervals>();
@@ -947,6 +950,14 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
optimizeEndCf();
+ if (LIS) {
+ for (Register Reg : RecomputeRegs) {
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ }
+
+ RecomputeRegs.clear();
LoweredEndCf.clear();
LoweredIf.clear();
KillBlocks.clear();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index d4f0906f020a..cfa0c21def79 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -21,21 +21,19 @@
//
//===----------------------------------------------------------------------===//
+#include "SILowerI1Copies.h"
#include "AMDGPU.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#define DEBUG_TYPE "si-i1-copies"
using namespace llvm;
-static unsigned createLaneMaskReg(MachineFunction &MF);
-static unsigned insertUndefLaneMask(MachineBasicBlock &MBB);
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ Register LaneMaskRegAttrs);
namespace {
@@ -43,26 +41,6 @@ class SILowerI1Copies : public MachineFunctionPass {
public:
static char ID;
-private:
- bool IsWave32 = false;
- MachineFunction *MF = nullptr;
- MachineDominatorTree *DT = nullptr;
- MachinePostDominatorTree *PDT = nullptr;
- MachineRegisterInfo *MRI = nullptr;
- const GCNSubtarget *ST = nullptr;
- const SIInstrInfo *TII = nullptr;
-
- unsigned ExecReg;
- unsigned MovOp;
- unsigned AndOp;
- unsigned OrOp;
- unsigned XorOp;
- unsigned AndN2Op;
- unsigned OrN2Op;
-
- DenseSet<unsigned> ConstrainRegs;
-
-public:
SILowerI1Copies() : MachineFunctionPass(ID) {
initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
}
@@ -77,29 +55,53 @@ public:
AU.addRequired<MachinePostDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+};
+
+class Vreg1LoweringHelper : public PhiLoweringHelper {
+public:
+ Vreg1LoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT);
private:
- bool lowerCopiesFromI1();
- bool lowerPhis();
- bool lowerCopiesToI1();
- bool isConstantLaneMask(Register Reg, bool &Val) const;
+ DenseSet<Register> ConstrainRegs;
+
+public:
+ void markAsLaneMask(Register DstReg) const override;
+ void getCandidatesForLowering(
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
+ void collectIncomingValuesFromPhi(
+ const MachineInstr *MI,
+ SmallVectorImpl<Incoming> &Incomings) const override;
+ void replaceDstReg(Register NewReg, Register OldReg,
+ MachineBasicBlock *MBB) override;
void buildMergeLaneMasks(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
- unsigned DstReg, unsigned PrevReg, unsigned CurReg);
- MachineBasicBlock::iterator
- getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+ Register DstReg, Register PrevReg,
+ Register CurReg) override;
+ void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
+ bool lowerCopiesFromI1();
+ bool lowerCopiesToI1();
+ bool cleanConstrainRegs(bool Changed);
bool isVreg1(Register Reg) const {
return Reg.isVirtual() && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
}
-
- bool isLaneMaskReg(unsigned Reg) const {
- return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
- TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
- ST->getWavefrontSize();
- }
};
+Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,
+ MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT)
+ : PhiLoweringHelper(MF, DT, PDT) {}
+
+bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {
+ assert(Changed || ConstrainRegs.empty());
+ for (Register Reg : ConstrainRegs)
+ MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
+ ConstrainRegs.clear();
+
+ return Changed;
+}
+
/// Helper class that determines the relationship between incoming values of a
/// phi in the control flow graph to determine where an incoming value can
/// simply be taken as a scalar lane mask as-is, and where it needs to be
@@ -145,8 +147,7 @@ public:
ArrayRef<MachineBasicBlock *> predecessors() const { return Predecessors; }
- void analyze(MachineBasicBlock &DefBlock,
- ArrayRef<MachineBasicBlock *> IncomingBlocks) {
+ void analyze(MachineBasicBlock &DefBlock, ArrayRef<Incoming> Incomings) {
assert(Stack.empty());
ReachableMap.clear();
ReachableOrdered.clear();
@@ -157,7 +158,8 @@ public:
ReachableMap.try_emplace(&DefBlock, false);
ReachableOrdered.push_back(&DefBlock);
- for (MachineBasicBlock *MBB : IncomingBlocks) {
+ for (auto Incoming : Incomings) {
+ MachineBasicBlock *MBB = Incoming.Block;
if (MBB == &DefBlock) {
ReachableMap[&DefBlock] = true; // self-loop on DefBlock
continue;
@@ -302,34 +304,38 @@ public:
/// blocks, so that the SSA updater doesn't have to search all the way to the
/// function entry.
void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
- ArrayRef<MachineBasicBlock *> Blocks = {}) {
+ MachineRegisterInfo &MRI, Register LaneMaskRegAttrs,
+ ArrayRef<Incoming> Incomings = {}) {
assert(LoopLevel < CommonDominators.size());
MachineBasicBlock *Dom = CommonDominators[LoopLevel];
- for (MachineBasicBlock *MBB : Blocks)
- Dom = DT.findNearestCommonDominator(Dom, MBB);
+ for (auto &Incoming : Incomings)
+ Dom = DT.findNearestCommonDominator(Dom, Incoming.Block);
- if (!inLoopLevel(*Dom, LoopLevel, Blocks)) {
- SSAUpdater.AddAvailableValue(Dom, insertUndefLaneMask(*Dom));
+ if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
+ SSAUpdater.AddAvailableValue(
+ Dom, insertUndefLaneMask(Dom, &MRI, LaneMaskRegAttrs));
} else {
// The dominator is part of the loop or the given blocks, so add the
// undef value to unreachable predecessors instead.
for (MachineBasicBlock *Pred : Dom->predecessors()) {
- if (!inLoopLevel(*Pred, LoopLevel, Blocks))
- SSAUpdater.AddAvailableValue(Pred, insertUndefLaneMask(*Pred));
+ if (!inLoopLevel(*Pred, LoopLevel, Incomings))
+ SSAUpdater.AddAvailableValue(
+ Pred, insertUndefLaneMask(Pred, &MRI, LaneMaskRegAttrs));
}
}
}
private:
bool inLoopLevel(MachineBasicBlock &MBB, unsigned LoopLevel,
- ArrayRef<MachineBasicBlock *> Blocks) const {
+ ArrayRef<Incoming> Incomings) const {
auto DomIt = Visited.find(&MBB);
if (DomIt != Visited.end() && DomIt->second <= LoopLevel)
return true;
- if (llvm::is_contained(Blocks, &MBB))
- return true;
+ for (auto &Incoming : Incomings)
+ if (Incoming.Block == &MBB)
+ return true;
return false;
}
@@ -405,19 +411,19 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
return new SILowerI1Copies();
}
-static unsigned createLaneMaskReg(MachineFunction &MF) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- return MRI.createVirtualRegister(ST.isWave32() ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::SReg_64RegClass);
+Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
+ Register LaneMaskRegAttrs) {
+ return MRI->cloneVirtualRegister(LaneMaskRegAttrs);
}
-static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
- MachineFunction &MF = *MBB.getParent();
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ Register LaneMaskRegAttrs) {
+ MachineFunction &MF = *MBB->getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
- unsigned UndefReg = createLaneMaskReg(MF);
- BuildMI(MBB, MBB.getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF),
+ Register UndefReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
+ BuildMI(*MBB, MBB->getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF),
UndefReg);
return UndefReg;
}
@@ -434,47 +440,17 @@ static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
// Only need to run this in SelectionDAG path.
if (TheMF.getProperties().hasProperty(
- MachineFunctionProperties::Property::Selected))
+ MachineFunctionProperties::Property::Selected))
return false;
- MF = &TheMF;
- MRI = &MF->getRegInfo();
- DT = &getAnalysis<MachineDominatorTree>();
- PDT = &getAnalysis<MachinePostDominatorTree>();
-
- ST = &MF->getSubtarget<GCNSubtarget>();
- TII = ST->getInstrInfo();
- IsWave32 = ST->isWave32();
-
- if (IsWave32) {
- ExecReg = AMDGPU::EXEC_LO;
- MovOp = AMDGPU::S_MOV_B32;
- AndOp = AMDGPU::S_AND_B32;
- OrOp = AMDGPU::S_OR_B32;
- XorOp = AMDGPU::S_XOR_B32;
- AndN2Op = AMDGPU::S_ANDN2_B32;
- OrN2Op = AMDGPU::S_ORN2_B32;
- } else {
- ExecReg = AMDGPU::EXEC;
- MovOp = AMDGPU::S_MOV_B64;
- AndOp = AMDGPU::S_AND_B64;
- OrOp = AMDGPU::S_OR_B64;
- XorOp = AMDGPU::S_XOR_B64;
- AndN2Op = AMDGPU::S_ANDN2_B64;
- OrN2Op = AMDGPU::S_ORN2_B64;
- }
+ Vreg1LoweringHelper Helper(&TheMF, &getAnalysis<MachineDominatorTree>(),
+ &getAnalysis<MachinePostDominatorTree>());
bool Changed = false;
- Changed |= lowerCopiesFromI1();
- Changed |= lowerPhis();
- Changed |= lowerCopiesToI1();
-
- assert(Changed || ConstrainRegs.empty());
- for (unsigned Reg : ConstrainRegs)
- MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
- ConstrainRegs.clear();
-
- return Changed;
+ Changed |= Helper.lowerCopiesFromI1();
+ Changed |= Helper.lowerPhis();
+ Changed |= Helper.lowerCopiesToI1();
+ return Helper.cleanConstrainRegs(Changed);
}
#ifndef NDEBUG
@@ -486,7 +462,7 @@ static bool isVRegCompatibleReg(const SIRegisterInfo &TRI,
}
#endif
-bool SILowerI1Copies::lowerCopiesFromI1() {
+bool Vreg1LoweringHelper::lowerCopiesFromI1() {
bool Changed = false;
SmallVector<MachineInstr *, 4> DeadCopies;
@@ -529,27 +505,47 @@ bool SILowerI1Copies::lowerCopiesFromI1() {
return Changed;
}
-bool SILowerI1Copies::lowerPhis() {
+PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
+ MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT)
+ : MF(MF), DT(DT), PDT(PDT) {
+ MRI = &MF->getRegInfo();
+
+ ST = &MF->getSubtarget<GCNSubtarget>();
+ TII = ST->getInstrInfo();
+ IsWave32 = ST->isWave32();
+
+ if (IsWave32) {
+ ExecReg = AMDGPU::EXEC_LO;
+ MovOp = AMDGPU::S_MOV_B32;
+ AndOp = AMDGPU::S_AND_B32;
+ OrOp = AMDGPU::S_OR_B32;
+ XorOp = AMDGPU::S_XOR_B32;
+ AndN2Op = AMDGPU::S_ANDN2_B32;
+ OrN2Op = AMDGPU::S_ORN2_B32;
+ } else {
+ ExecReg = AMDGPU::EXEC;
+ MovOp = AMDGPU::S_MOV_B64;
+ AndOp = AMDGPU::S_AND_B64;
+ OrOp = AMDGPU::S_OR_B64;
+ XorOp = AMDGPU::S_XOR_B64;
+ AndN2Op = AMDGPU::S_ANDN2_B64;
+ OrN2Op = AMDGPU::S_ORN2_B64;
+ }
+}
+
+bool PhiLoweringHelper::lowerPhis() {
MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
PhiIncomingAnalysis PIA(*PDT, TII);
SmallVector<MachineInstr *, 4> Vreg1Phis;
- SmallVector<MachineBasicBlock *, 4> IncomingBlocks;
- SmallVector<unsigned, 4> IncomingRegs;
- SmallVector<unsigned, 4> IncomingUpdated;
-#ifndef NDEBUG
- DenseSet<unsigned> PhiRegisters;
-#endif
+ SmallVector<Incoming, 4> Incomings;
- for (MachineBasicBlock &MBB : *MF) {
- for (MachineInstr &MI : MBB.phis()) {
- if (isVreg1(MI.getOperand(0).getReg()))
- Vreg1Phis.push_back(&MI);
- }
- }
+ getCandidatesForLowering(Vreg1Phis);
if (Vreg1Phis.empty())
return false;
+ DT->getBase().updateDFSNumbers();
MachineBasicBlock *PrevMBB = nullptr;
for (MachineInstr *MI : Vreg1Phis) {
MachineBasicBlock &MBB = *MI->getParent();
@@ -561,29 +557,19 @@ bool SILowerI1Copies::lowerPhis() {
LLVM_DEBUG(dbgs() << "Lower PHI: " << *MI);
Register DstReg = MI->getOperand(0).getReg();
- MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::SReg_64RegClass);
-
- // Collect incoming values.
- for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
- assert(i + 1 < MI->getNumOperands());
- Register IncomingReg = MI->getOperand(i).getReg();
- MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB();
- MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
-
- if (IncomingDef->getOpcode() == AMDGPU::COPY) {
- IncomingReg = IncomingDef->getOperand(1).getReg();
- assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
- assert(!IncomingDef->getOperand(1).getSubReg());
- } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
- continue;
- } else {
- assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
- }
+ markAsLaneMask(DstReg);
+ initializeLaneMaskRegisterAttributes(DstReg);
- IncomingBlocks.push_back(IncomingMBB);
- IncomingRegs.push_back(IncomingReg);
- }
+ collectIncomingValuesFromPhi(MI, Incomings);
+
+ // Sort the incomings such that incoming values that dominate other incoming
+ // values are sorted earlier. This allows us to do some amount of on-the-fly
+ // constant folding.
+ // Incoming with smaller DFSNumIn goes first, DFSNumIn is 0 for entry block.
+ llvm::sort(Incomings, [this](Incoming LHS, Incoming RHS) {
+ return DT->getNode(LHS.Block)->getDFSNumIn() <
+ DT->getNode(RHS.Block)->getDFSNumIn();
+ });
#ifndef NDEBUG
PhiRegisters.insert(DstReg);
@@ -607,64 +593,63 @@ bool SILowerI1Copies::lowerPhis() {
SSAUpdater.Initialize(DstReg);
if (FoundLoopLevel) {
- LF.addLoopEntries(FoundLoopLevel, SSAUpdater, IncomingBlocks);
+ LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs,
+ Incomings);
- for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
- IncomingUpdated.push_back(createLaneMaskReg(*MF));
- SSAUpdater.AddAvailableValue(IncomingBlocks[i],
- IncomingUpdated.back());
+ for (auto &Incoming : Incomings) {
+ Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
+ SSAUpdater.AddAvailableValue(Incoming.Block, Incoming.UpdatedReg);
}
- for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
- MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ for (auto &Incoming : Incomings) {
+ MachineBasicBlock &IMBB = *Incoming.Block;
buildMergeLaneMasks(
- IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
- SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
+ IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
+ SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
}
} else {
// The phi is not observed from outside a loop. Use a more accurate
// lowering.
- PIA.analyze(MBB, IncomingBlocks);
+ PIA.analyze(MBB, Incomings);
for (MachineBasicBlock *MBB : PIA.predecessors())
- SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB));
+ SSAUpdater.AddAvailableValue(
+ MBB, insertUndefLaneMask(MBB, MRI, LaneMaskRegAttrs));
- for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
- MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ for (auto &Incoming : Incomings) {
+ MachineBasicBlock &IMBB = *Incoming.Block;
if (PIA.isSource(IMBB)) {
- IncomingUpdated.push_back(0);
- SSAUpdater.AddAvailableValue(&IMBB, IncomingRegs[i]);
+ constrainIncomingRegisterTakenAsIs(Incoming);
+ SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
} else {
- IncomingUpdated.push_back(createLaneMaskReg(*MF));
- SSAUpdater.AddAvailableValue(&IMBB, IncomingUpdated.back());
+ Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
+ SSAUpdater.AddAvailableValue(&IMBB, Incoming.UpdatedReg);
}
}
- for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
- if (!IncomingUpdated[i])
+ for (auto &Incoming : Incomings) {
+ if (!Incoming.UpdatedReg.isValid())
continue;
- MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ MachineBasicBlock &IMBB = *Incoming.Block;
buildMergeLaneMasks(
- IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
- SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
+ IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
+ SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
}
}
Register NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
if (NewReg != DstReg) {
- MRI->replaceRegWith(NewReg, DstReg);
+ replaceDstReg(NewReg, DstReg, &MBB);
MI->eraseFromParent();
}
- IncomingBlocks.clear();
- IncomingRegs.clear();
- IncomingUpdated.clear();
+ Incomings.clear();
}
return true;
}
-bool SILowerI1Copies::lowerCopiesToI1() {
+bool Vreg1LoweringHelper::lowerCopiesToI1() {
bool Changed = false;
MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
@@ -691,8 +676,9 @@ bool SILowerI1Copies::lowerCopiesToI1() {
LLVM_DEBUG(dbgs() << "Lower Other: " << MI);
- MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::SReg_64RegClass);
+ markAsLaneMask(DstReg);
+ initializeLaneMaskRegisterAttributes(DstReg);
+
if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
continue;
@@ -702,12 +688,15 @@ bool SILowerI1Copies::lowerCopiesToI1() {
if (!SrcReg.isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
- unsigned TmpReg = createLaneMaskReg(*MF);
+ Register TmpReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
.addReg(SrcReg)
.addImm(0);
MI.getOperand(1).setReg(TmpReg);
SrcReg = TmpReg;
+ } else {
+ // SrcReg needs to be live beyond copy.
+ MI.getOperand(1).setIsKill(false);
}
// Defs in a loop that are observed outside the loop must be transformed
@@ -722,7 +711,7 @@ bool SILowerI1Copies::lowerCopiesToI1() {
if (FoundLoopLevel) {
SSAUpdater.Initialize(DstReg);
SSAUpdater.AddAvailableValue(&MBB, DstReg);
- LF.addLoopEntries(FoundLoopLevel, SSAUpdater);
+ LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs);
buildMergeLaneMasks(MBB, MI, DL, DstReg,
SSAUpdater.GetValueInMiddleOfBlock(&MBB), SrcReg);
@@ -737,7 +726,7 @@ bool SILowerI1Copies::lowerCopiesToI1() {
return Changed;
}
-bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const {
+bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
const MachineInstr *MI;
for (;;) {
MI = MRI->getUniqueVRegDef(Reg);
@@ -790,7 +779,7 @@ static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
/// Return a point at the end of the given \p MBB to insert SALU instructions
/// for lane mask calculation. Take terminators and SCC into account.
MachineBasicBlock::iterator
-SILowerI1Copies::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
+PhiLoweringHelper::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
auto InsertionPt = MBB.getFirstTerminator();
bool TerminatorsUseSCC = false;
for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) {
@@ -816,10 +805,53 @@ SILowerI1Copies::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
llvm_unreachable("SCC used by terminator but no def in block");
}
-void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DstReg,
- unsigned PrevReg, unsigned CurReg) {
+// VReg_1 -> SReg_32 or SReg_64
+void Vreg1LoweringHelper::markAsLaneMask(Register DstReg) const {
+ MRI->setRegClass(DstReg, ST->getBoolRC());
+}
+
+void Vreg1LoweringHelper::getCandidatesForLowering(
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB.phis()) {
+ if (isVreg1(MI.getOperand(0).getReg()))
+ Vreg1Phis.push_back(&MI);
+ }
+ }
+}
+
+void Vreg1LoweringHelper::collectIncomingValuesFromPhi(
+ const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
+ for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
+ assert(i + 1 < MI->getNumOperands());
+ Register IncomingReg = MI->getOperand(i).getReg();
+ MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB();
+ MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
+
+ if (IncomingDef->getOpcode() == AMDGPU::COPY) {
+ IncomingReg = IncomingDef->getOperand(1).getReg();
+ assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
+ assert(!IncomingDef->getOperand(1).getSubReg());
+ } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
+ continue;
+ } else {
+ assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
+ }
+
+ Incomings.emplace_back(IncomingReg, IncomingMBB, Register());
+ }
+}
+
+void Vreg1LoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
+ MachineBasicBlock *MBB) {
+ MRI->replaceRegWith(NewReg, OldReg);
+}
+
+void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ Register DstReg, Register PrevReg,
+ Register CurReg) {
bool PrevVal = false;
bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
bool CurVal = false;
@@ -838,13 +870,13 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
return;
}
- unsigned PrevMaskedReg = 0;
- unsigned CurMaskedReg = 0;
+ Register PrevMaskedReg;
+ Register CurMaskedReg;
if (!PrevConstant) {
if (CurConstant && CurVal) {
PrevMaskedReg = PrevReg;
} else {
- PrevMaskedReg = createLaneMaskReg(*MF);
+ PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
.addReg(PrevReg)
.addReg(ExecReg);
@@ -855,7 +887,7 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
if (PrevConstant && PrevVal) {
CurMaskedReg = CurReg;
} else {
- CurMaskedReg = createLaneMaskReg(*MF);
+ CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
.addReg(CurReg)
.addReg(ExecReg);
@@ -878,3 +910,7 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
.addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
}
}
+
+void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
+ return;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
new file mode 100644
index 000000000000..5099d39c2d14
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -0,0 +1,97 @@
+//===-- SILowerI1Copies.h --------------------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Interface definition of the PhiLoweringHelper class that implements lane
+/// mask merging algorithm for divergent i1 phis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "GCNSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+
+namespace llvm {
+
+/// Incoming for lane maks phi as machine instruction, incoming register \p Reg
+/// and incoming block \p Block are taken from machine instruction.
+/// \p UpdatedReg (if valid) is \p Reg lane mask merged with another lane mask.
+struct Incoming {
+ Register Reg;
+ MachineBasicBlock *Block;
+ Register UpdatedReg;
+
+ Incoming(Register Reg, MachineBasicBlock *Block, Register UpdatedReg)
+ : Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
+};
+
+Register createLaneMaskReg(MachineRegisterInfo *MRI, Register LaneMaskRegAttrs);
+
+class PhiLoweringHelper {
+public:
+ PhiLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT);
+ virtual ~PhiLoweringHelper() = default;
+
+protected:
+ bool IsWave32 = false;
+ MachineFunction *MF = nullptr;
+ MachineDominatorTree *DT = nullptr;
+ MachinePostDominatorTree *PDT = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const GCNSubtarget *ST = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ Register LaneMaskRegAttrs;
+
+#ifndef NDEBUG
+ DenseSet<Register> PhiRegisters;
+#endif
+
+ Register ExecReg;
+ unsigned MovOp;
+ unsigned AndOp;
+ unsigned OrOp;
+ unsigned XorOp;
+ unsigned AndN2Op;
+ unsigned OrN2Op;
+
+public:
+ bool lowerPhis();
+ bool isConstantLaneMask(Register Reg, bool &Val) const;
+ MachineBasicBlock::iterator
+ getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+
+ void initializeLaneMaskRegisterAttributes(Register LaneMask) {
+ LaneMaskRegAttrs = LaneMask;
+ }
+
+ bool isLaneMaskReg(Register Reg) const {
+ return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
+ TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
+ ST->getWavefrontSize();
+ }
+
+ // Helpers from lowerPhis that are different between sdag and global-isel.
+
+ virtual void markAsLaneMask(Register DstReg) const = 0;
+ virtual void getCandidatesForLowering(
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const = 0;
+ virtual void
+ collectIncomingValuesFromPhi(const MachineInstr *MI,
+ SmallVectorImpl<Incoming> &Incomings) const = 0;
+ virtual void replaceDstReg(Register NewReg, Register OldReg,
+ MachineBasicBlock *MBB) = 0;
+ virtual void buildMergeLaneMasks(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, Register DstReg,
+ Register PrevReg, Register CurReg) = 0;
+ virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
+};
+
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index d21107c02ef7..0ba7792ac436 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -50,7 +50,9 @@ public:
SILowerSGPRSpills() : MachineFunctionPass(ID) {}
void calculateSaveRestoreBlocks(MachineFunction &MF);
- bool spillCalleeSavedRegs(MachineFunction &MF);
+ bool spillCalleeSavedRegs(MachineFunction &MF,
+ SmallVectorImpl<int> &CalleeSavedFIs);
+ void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS);
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -58,6 +60,13 @@ public:
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+ MachineFunctionProperties getClearedProperties() const override {
+ // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA)
+ .set(MachineFunctionProperties::Property::NoVRegs);
+ }
};
} // end anonymous namespace
@@ -197,7 +206,8 @@ static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
EntryBB.sortUniqueLiveIns();
}
-bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
+bool SILowerSGPRSpills::spillCalleeSavedRegs(
+ MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -228,6 +238,7 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
TRI->getSpillAlign(*RC), true);
CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
+ CalleeSavedFIs.push_back(JunkFI);
}
}
@@ -248,6 +259,52 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
return false;
}
+void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
+ LiveIntervals *LIS) {
+ // TODO: This is a workaround to avoid the unmodelled liveness computed with
+ // whole-wave virtual registers when allocated together with the regular VGPR
+ // virtual registers. Presently, the liveness computed during the regalloc is
+ // only uniform (or single lane aware) and it doesn't take account of the
+ // divergent control flow that exists for our GPUs. Since the WWM registers
+ // can modify inactive lanes, the wave-aware liveness should be computed for
+ // the virtual registers to accurately plot their interferences. Without
+ // having the divergent CFG for the function, it is difficult to implement the
+ // wave-aware liveness info. Until then, we conservatively extend the liveness
+ // of the wwm registers into the entire function so that they won't be reused
+ // without first spilling/splitting their liveranges.
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks.
+ for (auto Reg : MFI->getSGPRSpillVGPRs()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ MachineBasicBlock::iterator InsertBefore = SaveBlock->begin();
+ auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(),
+ TII->get(AMDGPU::IMPLICIT_DEF), Reg);
+ MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
+ // Set SGPR_SPILL asm printer flag
+ MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*MIB);
+ }
+ }
+ }
+
+ // Insert the KILL in the return blocks to extend their liveness untill the
+ // end of function. Insert a separate KILL for each VGPR.
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
+ MachineBasicBlock::iterator InsertBefore =
+ RestoreBlock->getFirstTerminator();
+ for (auto Reg : MFI->getSGPRSpillVGPRs()) {
+ auto MIB =
+ BuildMI(*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc(),
+ TII->get(TargetOpcode::KILL));
+ MIB.addReg(Reg);
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*MIB);
+ }
+ }
+}
+
bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -261,7 +318,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
// First, expose any CSR SGPR spills. This is mostly the same as what PEI
// does, but somewhat simpler.
calculateSaveRestoreBlocks(MF);
- bool HasCSRs = spillCalleeSavedRegs(MF);
+ SmallVector<int> CalleeSavedFIs;
+ bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -275,6 +333,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
bool NewReservedRegs = false;
+ bool SpilledToVirtVGPRLanes = false;
// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
// handled as SpilledToReg in regular PrologEpilogInserter.
@@ -297,23 +356,51 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
- if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
- NewReservedRegs = true;
- bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
- MI, FI, nullptr, Indexes, LIS);
- (void)Spilled;
- assert(Spilled && "failed to spill SGPR to VGPR when allocated");
- SpillFIs.set(FI);
+
+ bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI);
+ if (IsCalleeSaveSGPRSpill) {
+ // Spill callee-saved SGPRs into physical VGPR lanes.
+
+ // TODO: This is to ensure the CFIs are static for efficient frame
+ // unwinding in the debugger. Spilling them into virtual VGPR lanes
+ // involve regalloc to allocate the physical VGPRs and that might
+ // cause intermediate spill/split of such liveranges for successful
+ // allocation. This would result in broken CFI encoding unless the
+ // regalloc aware CFI generation to insert new CFIs along with the
+ // intermediate spills is implemented. There is no such support
+ // currently exist in the LLVM compiler.
+ if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI, true)) {
+ NewReservedRegs = true;
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
+ MI, FI, nullptr, Indexes, LIS, true);
+ if (!Spilled)
+ llvm_unreachable(
+ "failed to spill SGPR to physical VGPR lane when allocated");
+ }
+ } else {
+ if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
+ MI, FI, nullptr, Indexes, LIS);
+ if (!Spilled)
+ llvm_unreachable(
+ "failed to spill SGPR to virtual VGPR lane when allocated");
+ SpillFIs.set(FI);
+ SpilledToVirtVGPRLanes = true;
+ }
}
}
}
- // FIXME: Adding to live-ins redundant with reserving registers.
- for (MachineBasicBlock &MBB : MF) {
- for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
- MBB.addLiveIn(Reg);
- MBB.sortUniqueLiveIns();
+ if (SpilledToVirtVGPRLanes) {
+ extendWWMVirtRegLiveness(MF, LIS);
+ if (LIS) {
+ // Compute the LiveInterval for the newly created virtual registers.
+ for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ }
+ for (MachineBasicBlock &MBB : MF) {
// FIXME: The dead frame indices are replaced with a null register from
// the debug value instructions. We should instead, update it with the
// correct register value. But not sure the register value alone is
@@ -334,6 +421,10 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
// lane".
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
+ MadeChange = true;
+ }
+
+ if (SpilledToVirtVGPRLanes) {
const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
// Shift back the reserved SGPR for EXEC copy into the lowest range.
// This SGPR is reserved to handle the whole-wave spill/copy operations
@@ -342,20 +433,21 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
-
- MadeChange = true;
} else {
- // No SGPR spills and hence there won't be any WWM spills/copies. Reset the
- // SGPR reserved for EXEC copy.
+ // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
+ // spills/copies. Reset the SGPR reserved for EXEC copy.
FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
}
SaveBlocks.clear();
RestoreBlocks.clear();
- // Updated the reserved registers with any VGPRs added for SGPR spills.
- if (NewReservedRegs)
- MRI.freezeReservedRegs(MF);
+ // Updated the reserved registers with any physical VGPRs added for SGPR
+ // spills.
+ if (NewReservedRegs) {
+ for (Register Reg : FuncInfo->getWWMReservedRegs())
+ MRI.reserveReg(Reg, TRI);
+ }
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp
new file mode 100644
index 000000000000..9c3cd1bbd6b0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp
@@ -0,0 +1,141 @@
+//===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Lowering the WWM_COPY instructions for various register classes.
+/// AMDGPU target generates WWM_COPY instruction to differentiate WWM
+/// copy from COPY. This pass generates the necessary exec mask manipulation
+/// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to
+/// COPY.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-lower-wwm-copies"
+
+namespace {
+
+class SILowerWWMCopies : public MachineFunctionPass {
+public:
+ static char ID;
+
+ SILowerWWMCopies() : MachineFunctionPass(ID) {
+ initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "SI Lower WWM Copies"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool isSCCLiveAtMI(const MachineInstr &MI);
+ void addToWWMSpills(MachineFunction &MF, Register Reg);
+
+ LiveIntervals *LIS;
+ SlotIndexes *Indexes;
+ VirtRegMap *VRM;
+ const SIRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ SIMachineFunctionInfo *MFI;
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false,
+ false)
+
+char SILowerWWMCopies::ID = 0;
+
+char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID;
+
+bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
+ // We can't determine the liveness info if LIS isn't available. Early return
+ // in that case and always assume SCC is live.
+ if (!LIS)
+ return true;
+
+ LiveRange &LR =
+ LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
+ SlotIndex Idx = LIS->getInstructionIndex(MI);
+ return LR.liveAt(Idx);
+}
+
+// If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills
+// for preserving its entire lanes at function prolog/epilog.
+void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
+ if (Reg.isPhysical())
+ return;
+
+ Register PhysReg = VRM->getPhys(Reg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
+ "should have allocated a physical register");
+
+ MFI->allocateWWMSpill(MF, PhysReg);
+}
+
+bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ MFI = MF.getInfo<SIMachineFunctionInfo>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+ Indexes = getAnalysisIfAvailable<SlotIndexes>();
+ VRM = getAnalysisIfAvailable<VirtRegMap>();
+ TRI = ST.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ if (!MFI->hasVRegFlags())
+ return false;
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() != AMDGPU::WWM_COPY)
+ continue;
+
+ // TODO: Club adjacent WWM ops between same exec save/restore
+ assert(TII->isVGPRCopy(MI));
+
+ // For WWM vector copies, manipulate the exec mask around the copy
+ // instruction.
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator InsertPt = MI.getIterator();
+ Register RegForExecCopy = MFI->getSGPRForEXECCopy();
+ TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy,
+ isSCCLiveAtMI(MI), Indexes);
+ TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes);
+ addToWWMSpills(MF, MI.getOperand(0).getReg());
+ LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI);
+
+ // Lower WWM_COPY back to COPY
+ MI.setDesc(TII->get(AMDGPU::COPY));
+ Changed |= true;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index c9376d0ea653..e8142244b7db 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -7,17 +7,18 @@
//===----------------------------------------------------------------------===//
#include "SIMachineFunctionInfo.h"
-#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
-#include "SIRegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
@@ -36,28 +37,12 @@ const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
const GCNSubtarget *STI)
- : AMDGPUMachineFunction(F, *STI),
- Mode(F),
- GWSResourcePSV(getTM(STI)),
- PrivateSegmentBuffer(false),
- DispatchPtr(false),
- QueuePtr(false),
- KernargSegmentPtr(false),
- DispatchID(false),
- FlatScratchInit(false),
- WorkGroupIDX(false),
- WorkGroupIDY(false),
- WorkGroupIDZ(false),
- WorkGroupInfo(false),
- LDSKernelId(false),
- PrivateSegmentWaveByteOffset(false),
- WorkItemIDX(false),
- WorkItemIDY(false),
- WorkItemIDZ(false),
- ImplicitBufferPtr(false),
- ImplicitArgPtr(false),
- GITPtrHigh(0xffffffff),
- HighBitsOf32BitAddress(0) {
+ : AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),
+ UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),
+ WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
+ PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
+ WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
+ GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
@@ -67,16 +52,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
VRegFlags.reserve(1024);
- // FIXME: Should have analysis or something rather than attribute to detect
- // calls.
- const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
-
const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
CC == CallingConv::SPIR_KERNEL;
if (IsKernel) {
- if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
- KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
@@ -85,7 +64,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
MayNeedAGPRs = ST.hasMAIInsts();
- if (!isEntryFunction()) {
+ if (AMDGPU::isChainCC(CC)) {
+ // Chain functions don't receive an SP from their caller, but are free to
+ // set one up. For now, we can use s32 to match what amdgpu_gfx functions
+ // would use if called, but this can be revisited.
+ // FIXME: Only reserve this if we actually need it.
+ StackPtrOffsetReg = AMDGPU::SGPR32;
+
+ ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
+
+ ArgInfo.PrivateSegmentBuffer =
+ ArgDescriptor::createRegister(ScratchRSrcReg);
+
+ ImplicitArgPtr = false;
+ } else if (!isEntryFunction()) {
if (CC != CallingConv::AMDGPU_Gfx)
ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
@@ -115,12 +107,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
}
- bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
- if (isAmdHsaOrMesa && !ST.enableFlatScratch())
- PrivateSegmentBuffer = true;
- else if (ST.isMesaGfxShader(F))
- ImplicitBufferPtr = true;
-
if (!AMDGPU::isGraphics(CC) ||
(CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
@@ -145,33 +131,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
ST.getMaxWorkitemID(F, 2) != 0)
WorkItemIDZ = true;
- if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
- DispatchPtr = true;
-
- if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
- QueuePtr = true;
-
- if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
- DispatchID = true;
-
if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
LDSKernelId = true;
}
- // FIXME: This attribute is a hack, we just need an analysis on the function
- // to look for allocas.
- bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
-
- // TODO: This could be refined a lot. The attribute is a poor way of
- // detecting calls or stack objects that may require it before argument
- // lowering.
- if (ST.hasFlatAddressSpace() && isEntryFunction() &&
- (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
- (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
- !ST.flatScratchIsArchitected()) {
- FlatScratchInit = true;
- }
-
if (isEntryFunction()) {
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
@@ -280,12 +243,47 @@ Register SIMachineFunctionInfo::addLDSKernelId() {
return ArgInfo.LDSKernelId.getRegister();
}
+SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
+ const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
+ unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {
+ assert(!ArgInfo.PreloadKernArgs.count(KernArgIdx) &&
+ "Preload kernel argument allocated twice.");
+ NumUserSGPRs += PaddingSGPRs;
+ // If the available register tuples are aligned with the kernarg to be
+ // preloaded use that register, otherwise we need to use a set of SGPRs and
+ // merge them.
+ Register PreloadReg =
+ TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
+ if (PreloadReg &&
+ (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
+ ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(PreloadReg);
+ NumUserSGPRs += AllocSizeDWord;
+ } else {
+ for (unsigned I = 0; I < AllocSizeDWord; ++I) {
+ ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(getNextUserSGPR());
+ NumUserSGPRs++;
+ }
+ }
+
+ // Track the actual number of SGPRs that HW will preload to.
+ UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);
+ return &ArgInfo.PreloadKernArgs[KernArgIdx].Regs;
+}
+
void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,
uint64_t Size, Align Alignment) {
// Skip if it is an entry function or the register is already added.
if (isEntryFunction() || WWMSpills.count(VGPR))
return;
+ // Skip if this is a function with the amdgpu_cs_chain or
+ // amdgpu_cs_chain_preserve calling convention and this is a scratch register.
+ // We never need to allocate a spill for these because we don't even need to
+ // restore the inactive lanes for them (they're scratchier than the usual
+ // scratch registers).
+ if (isChainFunction() && SIRegisterInfo::isChainScratchRegister(VGPR))
+ return;
+
WWMSpills.insert(std::make_pair(
VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));
}
@@ -314,37 +312,23 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
return false;
}
-bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF,
- int FI,
- unsigned LaneIndex) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
+bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
+ MachineFunction &MF, int FI, unsigned LaneIndex) {
MachineRegisterInfo &MRI = MF.getRegInfo();
Register LaneVGPR;
if (!LaneIndex) {
- LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
- if (LaneVGPR == AMDGPU::NoRegister) {
- // We have no VGPRs left for spilling SGPRs. Reset because we will not
- // partially spill the SGPR to VGPRs.
- SGPRSpillToVGPRLanes.erase(FI);
- return false;
- }
-
+ LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
SpillVGPRs.push_back(LaneVGPR);
- // Add this register as live-in to all blocks to avoid machine verifier
- // complaining about use of an undefined physical register.
- for (MachineBasicBlock &BB : MF)
- BB.addLiveIn(LaneVGPR);
} else {
LaneVGPR = SpillVGPRs.back();
}
- SGPRSpillToVGPRLanes[FI].push_back(
+ SGPRSpillsToVirtualVGPRLanes[FI].push_back(
SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
return true;
}
-bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills(
+bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
MachineFunction &MF, int FI, unsigned LaneIndex) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -355,16 +339,22 @@ bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills(
if (LaneVGPR == AMDGPU::NoRegister) {
// We have no VGPRs left for spilling SGPRs. Reset because we will not
// partially spill the SGPR to VGPRs.
- PrologEpilogSGPRSpillToVGPRLanes.erase(FI);
+ SGPRSpillsToPhysicalVGPRLanes.erase(FI);
return false;
}
allocateWWMSpill(MF, LaneVGPR);
+ reserveWWMRegister(LaneVGPR);
+ for (MachineBasicBlock &MBB : MF) {
+ MBB.addLiveIn(LaneVGPR);
+ MBB.sortUniqueLiveIns();
+ }
+ SpillPhysVGPRs.push_back(LaneVGPR);
} else {
- LaneVGPR = WWMSpills.back().first;
+ LaneVGPR = SpillPhysVGPRs.back();
}
- PrologEpilogSGPRSpillToVGPRLanes[FI].push_back(
+ SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
return true;
}
@@ -373,8 +363,8 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
int FI,
bool IsPrologEpilog) {
std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
- IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI]
- : SGPRSpillToVGPRLanes[FI];
+ IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI]
+ : SGPRSpillsToVirtualVGPRLanes[FI];
// This has already been allocated.
if (!SpillLanes.empty())
@@ -395,15 +385,14 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
"not spilling SGPRs to VGPRs");
unsigned &NumSpillLanes =
- IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes;
+ IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes;
for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
unsigned LaneIndex = (NumSpillLanes % WaveSize);
- bool Allocated =
- IsPrologEpilog
- ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex)
- : allocateVGPRForSGPRSpills(MF, FI, LaneIndex);
+ bool Allocated = IsPrologEpilog
+ ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex)
+ : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
if (!Allocated) {
NumSpillLanes -= I;
return false;
@@ -484,16 +473,25 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
bool SIMachineFunctionInfo::removeDeadFrameIndices(
MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
- // Remove dead frame indices from function frame. And also make sure to remove
- // the frame indices from `SGPRSpillToVGPRLanes` data structure, otherwise, it
- // could result in an unexpected side effect and bug, in case of any
- // re-mapping of freed frame indices by later pass(es) like "stack slot
+ // Remove dead frame indices from function frame, however keep FP & BP since
+ // spills for them haven't been inserted yet. And also make sure to remove the
+ // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
+ // otherwise, it could result in an unexpected side effect and bug, in case of
+ // any re-mapping of freed frame indices by later pass(es) like "stack slot
// coloring".
- for (auto &R : make_early_inc_range(SGPRSpillToVGPRLanes)) {
+ for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) {
MFI.RemoveStackObject(R.first);
- SGPRSpillToVGPRLanes.erase(R.first);
+ SGPRSpillsToVirtualVGPRLanes.erase(R.first);
}
+ // Remove the dead frame indices of CSR SGPRs which are spilled to physical
+ // VGPR lanes during SILowerSGPRSpills pass.
+ if (!ResetSGPRSpillStackIDs) {
+ for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) {
+ MFI.RemoveStackObject(R.first);
+ SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
+ }
+ }
bool HaveSGPRToMemory = false;
if (ResetSGPRSpillStackIDs) {
@@ -522,7 +520,7 @@ int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
const SIRegisterInfo &TRI) {
if (ScavengeFI)
return *ScavengeFI;
- if (isEntryFunction()) {
+ if (isBottomOfStack()) {
ScavengeFI = MFI.CreateFixedObject(
TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
} else {
@@ -608,6 +606,7 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
return true;
};
+ // TODO: Need to serialize kernarg preloads.
bool Any = false;
Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
@@ -730,7 +729,7 @@ bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
for (const auto &CI : IA->ParseConstraints()) {
for (StringRef Code : CI.Codes) {
Code.consume_front("{");
- if (Code.startswith("a"))
+ if (Code.starts_with("a"))
return true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 3b4747adf125..dc63ae44c528 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -16,10 +16,12 @@
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUMachineFunction.h"
#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIModeRegisterDefaults.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/raw_ostream.h"
@@ -256,6 +258,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint32_t GDSSize = 0;
Align DynLDSAlign;
bool IsEntryFunction = false;
+ bool IsChainFunction = false;
bool NoSignedZerosFPMath = false;
bool MemoryBound = false;
bool WaveLimiter = false;
@@ -304,6 +307,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u);
YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
+ YamlIO.mapOptional("isChainFunction", MFI.IsChainFunction, false);
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
@@ -434,13 +438,9 @@ private:
unsigned NumSpilledSGPRs = 0;
unsigned NumSpilledVGPRs = 0;
- // Feature bits required for inputs passed in user SGPRs.
- bool PrivateSegmentBuffer : 1;
- bool DispatchPtr : 1;
- bool QueuePtr : 1;
- bool KernargSegmentPtr : 1;
- bool DispatchID : 1;
- bool FlatScratchInit : 1;
+ // Tracks information about user SGPRs that will be setup by hardware which
+ // will apply to all wavefronts of the grid.
+ GCNUserSGPRUsageInfo UserSGPRInfo;
// Feature bits required for inputs passed in system SGPRs.
bool WorkGroupIDX : 1; // Always initialized.
@@ -454,11 +454,6 @@ private:
bool WorkItemIDY : 1;
bool WorkItemIDZ : 1;
- // Private memory buffer
- // Compute directly in sgpr[0:1]
- // Other shaders indirect 64-bits at sgpr[0:1]
- bool ImplicitBufferPtr : 1;
-
// Pointer to where the ABI inserts special kernel arguments separate from the
// user arguments. This is an offset from the KernargSegmentPtr.
bool ImplicitArgPtr : 1;
@@ -496,16 +491,18 @@ public:
};
private:
- // To track VGPR + lane index for each subregister of the SGPR spilled to
- // frameindex key during SILowerSGPRSpills pass.
- DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> SGPRSpillToVGPRLanes;
- // To track VGPR + lane index for spilling special SGPRs like Frame Pointer
- // identified during PrologEpilogInserter.
+ // To track virtual VGPR + lane index for each subregister of the SGPR spilled
+ // to frameindex key during SILowerSGPRSpills pass.
+ DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
+ SGPRSpillsToVirtualVGPRLanes;
+ // To track physical VGPR + lane index for CSR SGPR spills and special SGPRs
+ // like Frame Pointer identified during PrologEpilogInserter.
DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
- PrologEpilogSGPRSpillToVGPRLanes;
- unsigned NumVGPRSpillLanes = 0;
- unsigned NumVGPRPrologEpilogSpillLanes = 0;
+ SGPRSpillsToPhysicalVGPRLanes;
+ unsigned NumVirtualVGPRSpillLanes = 0;
+ unsigned NumPhysicalVGPRSpillLanes = 0;
SmallVector<Register, 2> SpillVGPRs;
+ SmallVector<Register, 2> SpillPhysVGPRs;
using WWMSpillsMap = MapVector<Register, int>;
// To track the registers used in instructions that can potentially modify the
// inactive lanes. The WWM instructions and the writelane instructions for
@@ -548,10 +545,10 @@ private:
private:
Register VGPRForAGPRCopy;
- bool allocateVGPRForSGPRSpills(MachineFunction &MF, int FI,
- unsigned LaneIndex);
- bool allocateVGPRForPrologEpilogSGPRSpills(MachineFunction &MF, int FI,
- unsigned LaneIndex);
+ bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI,
+ unsigned LaneIndex);
+ bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI,
+ unsigned LaneIndex);
public:
Register getVGPRForAGPRCopy() const {
@@ -583,9 +580,9 @@ public:
SIModeRegisterDefaults getMode() const { return Mode; }
ArrayRef<SIRegisterInfo::SpilledReg>
- getSGPRSpillToVGPRLanes(int FrameIndex) const {
- auto I = SGPRSpillToVGPRLanes.find(FrameIndex);
- return (I == SGPRSpillToVGPRLanes.end())
+ getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const {
+ auto I = SGPRSpillsToVirtualVGPRLanes.find(FrameIndex);
+ return (I == SGPRSpillsToVirtualVGPRLanes.end())
? ArrayRef<SIRegisterInfo::SpilledReg>()
: ArrayRef(I->second);
}
@@ -598,6 +595,10 @@ public:
return PrologEpilogSGPRSpills;
}
+ GCNUserSGPRUsageInfo &getUserSGPRInfo() { return UserSGPRInfo; }
+
+ const GCNUserSGPRUsageInfo &getUserSGPRInfo() const { return UserSGPRInfo; }
+
void addToPrologEpilogSGPRSpills(Register Reg,
PrologEpilogSGPRSaveRestoreInfo SI) {
PrologEpilogSGPRSpills.insert(std::make_pair(Reg, SI));
@@ -647,9 +648,9 @@ public:
}
ArrayRef<SIRegisterInfo::SpilledReg>
- getPrologEpilogSGPRSpillToVGPRLanes(int FrameIndex) const {
- auto I = PrologEpilogSGPRSpillToVGPRLanes.find(FrameIndex);
- return (I == PrologEpilogSGPRSpillToVGPRLanes.end())
+ getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const {
+ auto I = SGPRSpillsToPhysicalVGPRLanes.find(FrameIndex);
+ return (I == SGPRSpillsToPhysicalVGPRLanes.end())
? ArrayRef<SIRegisterInfo::SpilledReg>()
: ArrayRef(I->second);
}
@@ -667,6 +668,8 @@ public:
return VRegFlags.inBounds(Reg) && VRegFlags[Reg] & Flag;
}
+ bool hasVRegFlags() { return VRegFlags.size(); }
+
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = 4,
Align Alignment = Align(4));
@@ -728,6 +731,10 @@ public:
Register addFlatScratchInit(const SIRegisterInfo &TRI);
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
Register addLDSKernelId();
+ SmallVectorImpl<MCRegister> *
+ addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
+ unsigned AllocSizeDWord, int KernArgIdx,
+ int PaddingSGPRs);
/// Increment user SGPRs used for padding the argument list only.
Register addReservedUserSGPR() {
@@ -775,6 +782,8 @@ public:
return ArgInfo.WorkGroupInfo.getRegister();
}
+ bool hasLDSKernelId() const { return LDSKernelId; }
+
// Add special VGPR inputs
void setWorkItemIDX(ArgDescriptor Arg) {
ArgInfo.WorkItemIDX = Arg;
@@ -799,30 +808,6 @@ public:
ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
}
- bool hasPrivateSegmentBuffer() const {
- return PrivateSegmentBuffer;
- }
-
- bool hasDispatchPtr() const {
- return DispatchPtr;
- }
-
- bool hasQueuePtr() const {
- return QueuePtr;
- }
-
- bool hasKernargSegmentPtr() const {
- return KernargSegmentPtr;
- }
-
- bool hasDispatchID() const {
- return DispatchID;
- }
-
- bool hasFlatScratchInit() const {
- return FlatScratchInit;
- }
-
bool hasWorkGroupIDX() const {
return WorkGroupIDX;
}
@@ -839,8 +824,6 @@ public:
return WorkGroupInfo;
}
- bool hasLDSKernelId() const { return LDSKernelId; }
-
bool hasPrivateSegmentWaveByteOffset() const {
return PrivateSegmentWaveByteOffset;
}
@@ -861,10 +844,6 @@ public:
return ImplicitArgPtr;
}
- bool hasImplicitBufferPtr() const {
- return ImplicitBufferPtr;
- }
-
AMDGPUFunctionArgInfo &getArgInfo() {
return ArgInfo;
}
@@ -901,6 +880,10 @@ public:
return NumUserSGPRs + NumSystemSGPRs;
}
+ unsigned getNumKernargPreloadedSGPRs() const {
+ return UserSGPRInfo.getNumKernargPreloadSGPRs();
+ }
+
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index bc48f7b76c6d..10ec54d3317f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -1055,7 +1055,8 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
VMCnt ? 0 : getVmcntBitMask(IV),
getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft))
+ .addImm(WaitCntImmediate);
Changed = true;
}
@@ -1963,14 +1964,15 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
VMCnt ? 0 : getVmcntBitMask(IV),
getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft))
+ .addImm(WaitCntImmediate);
Changed = true;
}
if (VSCnt) {
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
- .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
- .addImm(0);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT_soft))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(0);
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
index 413ef5d162a7..2684a1e3c335 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
@@ -7,20 +7,26 @@
//===----------------------------------------------------------------------===//
#include "SIModeRegisterDefaults.h"
+#include "GCNSubtarget.h"
using namespace llvm;
-SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
+ const GCNSubtarget &ST) {
*this = getDefaultForCallingConv(F.getCallingConv());
- StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
- if (!IEEEAttr.empty())
- IEEE = IEEEAttr == "true";
+ if (ST.hasIEEEMode()) {
+ StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
+ if (!IEEEAttr.empty())
+ IEEE = IEEEAttr == "true";
+ }
- StringRef DX10ClampAttr =
- F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
- if (!DX10ClampAttr.empty())
- DX10Clamp = DX10ClampAttr == "true";
+ if (ST.hasDX10ClampMode()) {
+ StringRef DX10ClampAttr =
+ F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
+ if (!DX10ClampAttr.empty())
+ DX10Clamp = DX10ClampAttr == "true";
+ }
StringRef DenormF32Attr =
F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
@@ -36,3 +42,135 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
FP64FP16Denormals = DenormMode;
}
}
+
+using namespace AMDGPU;
+
+/// Combine f32 and f64 rounding modes into a combined rounding mode value.
+static constexpr uint32_t getModeRegisterRoundMode(uint32_t HWFP32Val,
+ uint32_t HWFP64Val) {
+ return HWFP32Val << F32FltRoundOffset | HWFP64Val << F64FltRoundOffset;
+}
+
+static constexpr uint64_t encodeFltRoundsTable(uint32_t FltRoundsVal,
+ uint32_t HWF32Val,
+ uint32_t HWF64Val) {
+ uint32_t ModeVal = getModeRegisterRoundMode(HWF32Val, HWF64Val);
+ if (FltRoundsVal > TowardNegative)
+ FltRoundsVal -= ExtendedFltRoundOffset;
+
+ uint32_t BitIndex = ModeVal << 2;
+ return static_cast<uint64_t>(FltRoundsVal) << BitIndex;
+}
+
+// Encode FLT_ROUNDS value where the two rounding modes are the same and use a
+// standard value
+static constexpr uint64_t
+encodeFltRoundsTableSame(AMDGPUFltRounds FltRoundsMode, uint32_t HWVal) {
+ return encodeFltRoundsTable(FltRoundsMode, HWVal, HWVal);
+}
+
+// Convert mode register encoded rounding mode to AMDGPUFltRounds
+static constexpr AMDGPUFltRounds
+decodeIndexFltRoundConversionTable(uint32_t HWMode) {
+ uint32_t TableRead = (FltRoundConversionTable >> (HWMode << 2)) & 0xf;
+ if (TableRead > TowardNegative)
+ TableRead += ExtendedFltRoundOffset;
+ return static_cast<AMDGPUFltRounds>(TableRead);
+}
+
+static constexpr uint32_t HWTowardZero = FP_ROUND_ROUND_TO_ZERO;
+static constexpr uint32_t HWNearestTiesToEven = FP_ROUND_ROUND_TO_NEAREST;
+static constexpr uint32_t HWTowardPositive = FP_ROUND_ROUND_TO_INF;
+static constexpr uint32_t HWTowardNegative = FP_ROUND_ROUND_TO_NEGINF;
+
+const uint64_t AMDGPU::FltRoundConversionTable =
+ encodeFltRoundsTableSame(TowardZeroF32_TowardZeroF64, HWTowardZero) |
+ encodeFltRoundsTableSame(NearestTiesToEvenF32_NearestTiesToEvenF64,
+ HWNearestTiesToEven) |
+ encodeFltRoundsTableSame(TowardPositiveF32_TowardPositiveF64,
+ HWTowardPositive) |
+ encodeFltRoundsTableSame(TowardNegativeF32_TowardNegativeF64,
+ HWTowardNegative) |
+
+ encodeFltRoundsTable(TowardZeroF32_NearestTiesToEvenF64, HWTowardZero,
+ HWNearestTiesToEven) |
+ encodeFltRoundsTable(TowardZeroF32_TowardPositiveF64, HWTowardZero,
+ HWTowardPositive) |
+ encodeFltRoundsTable(TowardZeroF32_TowardNegativeF64, HWTowardZero,
+ HWTowardNegative) |
+
+ encodeFltRoundsTable(NearestTiesToEvenF32_TowardZeroF64,
+ HWNearestTiesToEven, HWTowardZero) |
+ encodeFltRoundsTable(NearestTiesToEvenF32_TowardPositiveF64,
+ HWNearestTiesToEven, HWTowardPositive) |
+ encodeFltRoundsTable(NearestTiesToEvenF32_TowardNegativeF64,
+ HWNearestTiesToEven, HWTowardNegative) |
+
+ encodeFltRoundsTable(TowardPositiveF32_TowardZeroF64, HWTowardPositive,
+ HWTowardZero) |
+ encodeFltRoundsTable(TowardPositiveF32_NearestTiesToEvenF64,
+ HWTowardPositive, HWNearestTiesToEven) |
+ encodeFltRoundsTable(TowardPositiveF32_TowardNegativeF64, HWTowardPositive,
+ HWTowardNegative) |
+
+ encodeFltRoundsTable(TowardNegativeF32_TowardZeroF64, HWTowardNegative,
+ HWTowardZero) |
+ encodeFltRoundsTable(TowardNegativeF32_NearestTiesToEvenF64,
+ HWTowardNegative, HWNearestTiesToEven) |
+ encodeFltRoundsTable(TowardNegativeF32_TowardPositiveF64, HWTowardNegative,
+ HWTowardPositive);
+
+// Verify evaluation of FltRoundConversionTable
+
+// If both modes are the same, should return the standard values.
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWTowardZero, HWTowardZero)) == AMDGPUFltRounds::TowardZero);
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWNearestTiesToEven, HWNearestTiesToEven)) ==
+ AMDGPUFltRounds::NearestTiesToEven);
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWTowardPositive, HWTowardPositive)) ==
+ AMDGPUFltRounds::TowardPositive);
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWTowardNegative, HWTowardNegative)) ==
+ AMDGPUFltRounds::TowardNegative);
+
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWTowardZero, HWNearestTiesToEven)) ==
+ TowardZeroF32_NearestTiesToEvenF64);
+static_assert(decodeIndexFltRoundConversionTable(
+ getModeRegisterRoundMode(HWTowardZero, HWTowardPositive)) ==
+ TowardZeroF32_TowardPositiveF64);
+static_assert(decodeIndexFltRoundConversionTable(
+ getModeRegisterRoundMode(HWTowardZero, HWTowardNegative)) ==
+ TowardZeroF32_TowardNegativeF64);
+
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWNearestTiesToEven, HWTowardZero)) ==
+ NearestTiesToEvenF32_TowardZeroF64);
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWNearestTiesToEven, HWTowardPositive)) ==
+ NearestTiesToEvenF32_TowardPositiveF64);
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWNearestTiesToEven, HWTowardNegative)) ==
+ NearestTiesToEvenF32_TowardNegativeF64);
+
+static_assert(decodeIndexFltRoundConversionTable(
+ getModeRegisterRoundMode(HWTowardPositive, HWTowardZero)) ==
+ TowardPositiveF32_TowardZeroF64);
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWTowardPositive, HWNearestTiesToEven)) ==
+ TowardPositiveF32_NearestTiesToEvenF64);
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWTowardPositive, HWTowardNegative)) ==
+ TowardPositiveF32_TowardNegativeF64);
+
+static_assert(decodeIndexFltRoundConversionTable(
+ getModeRegisterRoundMode(HWTowardNegative, HWTowardZero)) ==
+ TowardNegativeF32_TowardZeroF64);
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWTowardNegative, HWNearestTiesToEven)) ==
+ TowardNegativeF32_NearestTiesToEvenF64);
+static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
+ HWTowardNegative, HWTowardPositive)) ==
+ TowardNegativeF32_TowardPositiveF64);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
index df2e3f9bff32..9fbd74c3eede 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
@@ -14,6 +14,8 @@
namespace llvm {
+class GCNSubtarget;
+
// Track defaults for fields in the MODE register.
struct SIModeRegisterDefaults {
/// Floating point opcodes that support exception flag gathering quiet and
@@ -40,7 +42,7 @@ struct SIModeRegisterDefaults {
FP32Denormals(DenormalMode::getIEEE()),
FP64FP16Denormals(DenormalMode::getIEEE()) {}
- SIModeRegisterDefaults(const Function &F);
+ SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
SIModeRegisterDefaults Mode;
@@ -85,6 +87,65 @@ struct SIModeRegisterDefaults {
}
};
+namespace AMDGPU {
+
+/// Return values used for llvm.get.rounding
+///
+/// When both the F32 and F64/F16 modes are the same, returns the standard
+/// values. If they differ, returns an extended mode starting at 8.
+enum AMDGPUFltRounds : int8_t {
+ // Inherit everything from RoundingMode
+ TowardZero = static_cast<int8_t>(RoundingMode::TowardZero),
+ NearestTiesToEven = static_cast<int8_t>(RoundingMode::NearestTiesToEven),
+ TowardPositive = static_cast<int8_t>(RoundingMode::TowardPositive),
+ TowardNegative = static_cast<int8_t>(RoundingMode::TowardNegative),
+ NearestTiesToAwayUnsupported =
+ static_cast<int8_t>(RoundingMode::NearestTiesToAway),
+
+ Dynamic = static_cast<int8_t>(RoundingMode::Dynamic),
+
+ // Permute the mismatched rounding mode cases. If the modes are the same, use
+ // the standard values, otherwise, these values are sorted such that higher
+ // hardware encoded values have higher enum values.
+ NearestTiesToEvenF32_NearestTiesToEvenF64 = NearestTiesToEven,
+ NearestTiesToEvenF32_TowardPositiveF64 = 8,
+ NearestTiesToEvenF32_TowardNegativeF64 = 9,
+ NearestTiesToEvenF32_TowardZeroF64 = 10,
+
+ TowardPositiveF32_NearestTiesToEvenF64 = 11,
+ TowardPositiveF32_TowardPositiveF64 = TowardPositive,
+ TowardPositiveF32_TowardNegativeF64 = 12,
+ TowardPositiveF32_TowardZeroF64 = 13,
+
+ TowardNegativeF32_NearestTiesToEvenF64 = 14,
+ TowardNegativeF32_TowardPositiveF64 = 15,
+ TowardNegativeF32_TowardNegativeF64 = TowardNegative,
+ TowardNegativeF32_TowardZeroF64 = 16,
+
+ TowardZeroF32_NearestTiesToEvenF64 = 17,
+ TowardZeroF32_TowardPositiveF64 = 18,
+ TowardZeroF32_TowardNegativeF64 = 19,
+ TowardZeroF32_TowardZeroF64 = TowardZero,
+
+ Invalid = static_cast<int8_t>(RoundingMode::Invalid)
+};
+
+/// Offset of nonstandard values for llvm.get.rounding results from the largest
+/// supported mode.
+static constexpr uint32_t ExtendedFltRoundOffset = 4;
+
+/// Offset in mode register of f32 rounding mode.
+static constexpr uint32_t F32FltRoundOffset = 0;
+
+/// Offset in mode register of f64/f16 rounding mode.
+static constexpr uint32_t F64FltRoundOffset = 2;
+
+// Bit indexed table to convert from hardware rounding mode values to FLT_ROUNDS
+// values.
+extern const uint64_t FltRoundConversionTable;
+
+} // end namespace AMDGPU
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 04c9a6457944..e3f54d01eb22 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -10,6 +10,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -32,6 +33,7 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping;
SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1> OrXors;
+ SmallVector<MachineOperand *, 1> KillFlagCandidates;
Register isCopyFromExec(const MachineInstr &MI) const;
Register isCopyToExec(const MachineInstr &MI) const;
@@ -41,15 +43,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
MachineBasicBlock::reverse_iterator
findExecCopy(MachineBasicBlock &MBB,
MachineBasicBlock::reverse_iterator I) const;
-
bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start,
MCRegister Reg, bool UseLiveOuts = false,
bool IgnoreStart = false) const;
bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg) const;
- MachineInstr *findInstrBackwards(MachineInstr &Origin,
- std::function<bool(MachineInstr *)> Pred,
- ArrayRef<MCRegister> NonModifiableRegs,
- unsigned MaxInstructions = 20) const;
+ MachineInstr *findInstrBackwards(
+ MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred,
+ ArrayRef<MCRegister> NonModifiableRegs,
+ MachineInstr *Terminator = nullptr,
+ SmallVectorImpl<MachineOperand *> *KillFlagCandidates = nullptr,
+ unsigned MaxInstructions = 20) const;
bool optimizeExecSequence();
void tryRecordVCmpxAndSaveexecSequence(MachineInstr &MI);
bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr,
@@ -325,11 +328,13 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
// Backwards-iterate from Origin (for n=MaxInstructions iterations) until either
// the beginning of the BB is reached or Pred evaluates to true - which can be
// an arbitrary condition based on the current MachineInstr, for instance an
-// target instruction. Breaks prematurely by returning nullptr if one of the
+// target instruction. Breaks prematurely by returning nullptr if one of the
// registers given in NonModifiableRegs is modified by the current instruction.
MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred,
- ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions) const {
+ ArrayRef<MCRegister> NonModifiableRegs, MachineInstr *Terminator,
+ SmallVectorImpl<MachineOperand *> *KillFlagCandidates,
+ unsigned MaxInstructions) const {
MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(),
E = Origin.getParent()->rend();
unsigned CurrentIteration = 0;
@@ -344,6 +349,21 @@ MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
for (MCRegister Reg : NonModifiableRegs) {
if (A->modifiesRegister(Reg, TRI))
return nullptr;
+
+ // Check for kills that appear after the terminator instruction, that
+ // would not be detected by clearKillFlags, since they will cause the
+ // register to be dead at a later place, causing the verifier to fail.
+ // We use the candidates to clear the kill flags later.
+ if (Terminator && KillFlagCandidates && A != Terminator &&
+ A->killsRegister(Reg, TRI)) {
+ for (MachineOperand &MO : A->operands()) {
+ if (MO.isReg() && MO.isKill()) {
+ Register Candidate = MO.getReg();
+ if (Candidate != Reg && TRI->regsOverlap(Candidate, Reg))
+ KillFlagCandidates->push_back(&MO);
+ }
+ }
+ }
}
++CurrentIteration;
@@ -599,6 +619,9 @@ bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence(
if (Src1->isReg())
MRI->clearKillFlags(Src1->getReg());
+ for (MachineOperand *MO : KillFlagCandidates)
+ MO->setIsKill(false);
+
SaveExecInstr.eraseFromParent();
VCmp.eraseFromParent();
@@ -690,7 +713,8 @@ void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence(
NonDefRegs.push_back(Src1->getReg());
if (!findInstrBackwards(
- MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs))
+ MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs,
+ VCmp, &KillFlagCandidates))
return;
if (VCmp)
@@ -777,6 +801,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
OrXors.clear();
SaveExecVCmpMapping.clear();
+ KillFlagCandidates.clear();
static unsigned SearchWindow = 10;
for (MachineBasicBlock &MBB : MF) {
unsigned SearchCount = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
index e95abae88d7a..8204a70e72d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
@@ -522,9 +522,11 @@ void SIOptimizeVGPRLiveRange::optimizeLiveRange(
auto *UseBlock = UseMI->getParent();
// Replace uses in Endif block
if (UseBlock == Endif) {
- if (UseMI->isPHI()) {
+ if (UseMI->isPHI())
O.setReg(NewReg);
- } else {
+ else if (UseMI->isDebugInstr())
+ continue;
+ else {
// DetectDeadLanes may mark register uses as undef without removing
// them, in which case a non-phi instruction using the original register
// may exist in the Endif block even though the register is not live
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 97b3161c7f98..53fc2c068624 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -546,7 +546,8 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
+ if (!Src1->isReg() || Src1->getReg().isPhysical() ||
+ Dst->getReg().isPhysical())
break;
if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
@@ -584,7 +585,8 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
+ if (!Src1->isReg() || Src1->getReg().isPhysical() ||
+ Dst->getReg().isPhysical())
break;
if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
@@ -647,7 +649,8 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Src0->getReg().isPhysical() || Dst->getReg().isPhysical())
+ if (!Src0->isReg() || Src0->getReg().isPhysical() ||
+ Dst->getReg().isPhysical())
break;
return std::make_unique<SDWASrcOperand>(
@@ -675,7 +678,8 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (ValSrc->getReg().isPhysical() || Dst->getReg().isPhysical())
+ if (!ValSrc->isReg() || ValSrc->getReg().isPhysical() ||
+ Dst->getReg().isPhysical())
break;
return std::make_unique<SDWASrcOperand>(
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index c2ddfd7881ab..0c57110b4eb1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -28,6 +28,10 @@ using namespace llvm;
#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
+static cl::opt<bool>
+ EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
+ cl::init(false), cl::Hidden);
+
namespace {
class SIPreAllocateWWMRegs : public MachineFunctionPass {
@@ -56,11 +60,9 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
AU.addRequired<VirtRegMap>();
AU.addRequired<LiveRegMatrix>();
- AU.addPreserved<SlotIndexes>();
- AU.setPreservesCFG();
+ AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -101,7 +103,7 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
LiveInterval &LI = LIS->getInterval(Reg);
for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
- if (!MRI->isPhysRegUsed(PhysReg) &&
+ if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
Matrix->assign(LI, PhysReg);
assert(PhysReg != 0);
@@ -201,6 +203,10 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
RegClassInfo.runOnMachineFunction(MF);
+ bool PreallocateSGPRSpillVGPRs =
+ EnablePreallocateSGPRSpillVGPRs ||
+ MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs");
+
bool RegsAssigned = false;
// We use a reverse post-order traversal of the control-flow graph to
@@ -217,6 +223,12 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
RegsAssigned |= processDef(MI.getOperand(0));
+ if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
+ if (!PreallocateSGPRSpillVGPRs)
+ continue;
+ RegsAssigned |= processDef(MI.getOperand(0));
+ }
+
if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM ||
MI.getOpcode() == AMDGPU::ENTER_PSEUDO_WM) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 61444b14a56b..87242a4740c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -320,6 +320,9 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
if (MI.isConditionalBranch())
return true;
+ if (MI.isMetaInstruction())
+ continue;
+
if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index b6839c8308d8..9ed7aacc0538 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -15,27 +15,48 @@
//
#include "SIProgramInfo.h"
+#include "GCNSubtarget.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
using namespace llvm;
-uint64_t SIProgramInfo::getComputePGMRSrc1() const {
- return S_00B848_VGPRS(VGPRBlocks) | S_00B848_SGPRS(SGPRBlocks) |
- S_00B848_PRIORITY(Priority) | S_00B848_FLOAT_MODE(FloatMode) |
- S_00B848_PRIV(Priv) | S_00B848_DX10_CLAMP(DX10Clamp) |
- S_00B848_DEBUG_MODE(DebugMode) | S_00B848_IEEE_MODE(IEEEMode) |
- S_00B848_WGP_MODE(WgpMode) | S_00B848_MEM_ORDERED(MemOrdered);
+uint64_t SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST) const {
+ uint64_t Reg = S_00B848_VGPRS(VGPRBlocks) | S_00B848_SGPRS(SGPRBlocks) |
+ S_00B848_PRIORITY(Priority) | S_00B848_FLOAT_MODE(FloatMode) |
+ S_00B848_PRIV(Priv) | S_00B848_DEBUG_MODE(DebugMode) |
+ S_00B848_WGP_MODE(WgpMode) | S_00B848_MEM_ORDERED(MemOrdered);
+
+ if (ST.hasDX10ClampMode())
+ Reg |= S_00B848_DX10_CLAMP(DX10Clamp);
+
+ if (ST.hasIEEEMode())
+ Reg |= S_00B848_IEEE_MODE(IEEEMode);
+
+ if (ST.hasRrWGMode())
+ Reg |= S_00B848_RR_WG_MODE(RrWgMode);
+
+ return Reg;
}
-uint64_t SIProgramInfo::getPGMRSrc1(CallingConv::ID CC) const {
+uint64_t SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
+ const GCNSubtarget &ST) const {
if (AMDGPU::isCompute(CC)) {
- return getComputePGMRSrc1();
+ return getComputePGMRSrc1(ST);
}
uint64_t Reg = S_00B848_VGPRS(VGPRBlocks) | S_00B848_SGPRS(SGPRBlocks) |
S_00B848_PRIORITY(Priority) | S_00B848_FLOAT_MODE(FloatMode) |
- S_00B848_PRIV(Priv) | S_00B848_DX10_CLAMP(DX10Clamp) |
- S_00B848_DEBUG_MODE(DebugMode) | S_00B848_IEEE_MODE(IEEEMode);
+ S_00B848_PRIV(Priv) | S_00B848_DEBUG_MODE(DebugMode);
+
+ if (ST.hasDX10ClampMode())
+ Reg |= S_00B848_DX10_CLAMP(DX10Clamp);
+
+ if (ST.hasIEEEMode())
+ Reg |= S_00B848_IEEE_MODE(IEEEMode);
+
+ if (ST.hasRrWGMode())
+ Reg |= S_00B848_RR_WG_MODE(RrWgMode);
+
switch (CC) {
case CallingConv::AMDGPU_PS:
Reg |= S_00B028_MEM_ORDERED(MemOrdered);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index aab127e49463..8c26789f936c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -21,6 +21,8 @@
namespace llvm {
+class GCNSubtarget;
+
/// Track resource usage for kernels / entry functions.
struct SIProgramInfo {
// Fields set in PGM_RSRC1 pm4 packet.
@@ -34,6 +36,7 @@ struct SIProgramInfo {
uint32_t IEEEMode = 0;
uint32_t WgpMode = 0; // GFX10+
uint32_t MemOrdered = 0; // GFX10+
+ uint32_t RrWgMode = 0; // GFX12+
uint64_t ScratchSize = 0;
// State used to calculate fields set in PGM_RSRC2 pm4 packet.
@@ -85,8 +88,8 @@ struct SIProgramInfo {
SIProgramInfo() = default;
/// Compute the value of the ComputePGMRsrc1 register.
- uint64_t getComputePGMRSrc1() const;
- uint64_t getPGMRSrc1(CallingConv::ID CC) const;
+ uint64_t getComputePGMRSrc1(const GCNSubtarget &ST) const;
+ uint64_t getPGMRSrc1(CallingConv::ID CC, const GCNSubtarget &ST) const;
/// Compute the value of the ComputePGMRsrc2 register.
uint64_t getComputePGMRSrc2() const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index c2a272166241..021d797344c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -19,7 +19,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -397,6 +397,8 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
case CallingConv::AMDGPU_Gfx:
return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
: CSR_AMDGPU_SI_Gfx_SaveList;
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ return CSR_AMDGPU_CS_ChainPreserve_SaveList;
default: {
// Dummy to not crash RegisterClassInfo.
static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
@@ -421,6 +423,11 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
case CallingConv::AMDGPU_Gfx:
return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
: CSR_AMDGPU_SI_Gfx_RegMask;
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ // Calls to these functions never return, so we can pretend everything is
+ // preserved.
+ return AMDGPU_AllVGPRs_RegMask;
default:
return nullptr;
}
@@ -430,6 +437,10 @@ const uint32_t *SIRegisterInfo::getNoPreservedMask() const {
return CSR_AMDGPU_NoRegs_RegMask;
}
+bool SIRegisterInfo::isChainScratchRegister(Register VGPR) {
+ return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
+}
+
const TargetRegisterClass *
SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &MF) const {
@@ -488,11 +499,11 @@ SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const SIFrameLowering *TFI = ST.getFrameLowering();
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- // During ISel lowering we always reserve the stack pointer in entry
+ // During ISel lowering we always reserve the stack pointer in entry and chain
// functions, but never actually want to reference it when accessing our own
// frame. If we need a frame pointer we use it, but otherwise we can just use
// an immediate "0" which we represent by returning NoRegister.
- if (FuncInfo->isEntryFunction()) {
+ if (FuncInfo->isBottomOfStack()) {
return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
}
return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
@@ -712,9 +723,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
reserveRegisterTuples(Reserved, Reg);
- for (auto Reg : MFI->getSGPRSpillVGPRs())
- reserveRegisterTuples(Reserved, Reg);
-
return Reserved;
}
@@ -725,12 +733,12 @@ bool SIRegisterInfo::isAsmClobberable(const MachineFunction &MF,
bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- // On entry, the base address is 0, so it can't possibly need any more
- // alignment.
+ // On entry or in chain functions, the base address is 0, so it can't possibly
+ // need any more alignment.
// FIXME: Should be able to specify the entry frame alignment per calling
// convention instead.
- if (Info->isEntryFunction())
+ if (Info->isBottomOfStack())
return false;
return TargetRegisterInfo::shouldRealignStack(MF);
@@ -796,10 +804,10 @@ bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
int64_t FullOffset = Offset + getScratchInstrOffset(MI);
+ const SIInstrInfo *TII = ST.getInstrInfo();
if (SIInstrInfo::isMUBUF(*MI))
- return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset);
+ return !TII->isLegalMUBUFImmOffset(FullOffset);
- const SIInstrInfo *TII = ST.getInstrInfo();
return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch);
}
@@ -897,8 +905,7 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
assert(SOffset->isImm() && SOffset->getImm() == 0);
#endif
- assert(SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
- "offset should be legal");
+ assert(TII->isLegalMUBUFImmOffset(NewOffset) && "offset should be legal");
FIOp->ChangeToRegister(BaseReg, false);
OffsetOp->setImm(NewOffset);
@@ -912,10 +919,10 @@ bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
int64_t NewOffset = Offset + getScratchInstrOffset(MI);
+ const SIInstrInfo *TII = ST.getInstrInfo();
if (SIInstrInfo::isMUBUF(*MI))
- return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset);
+ return TII->isLegalMUBUFImmOffset(NewOffset);
- const SIInstrInfo *TII = ST.getInstrInfo();
return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch);
}
@@ -1068,6 +1075,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_AV32_RESTORE:
case AMDGPU::SI_SPILL_WWM_V32_SAVE:
case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
+ case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
+ case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
return 1;
default: llvm_unreachable("Invalid spill opcode");
}
@@ -1310,8 +1319,8 @@ void SIRegisterInfo::buildSpillLoadStore(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
- RegScavenger *RS, LivePhysRegs *LiveRegs) const {
- assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
+ RegScavenger *RS, LiveRegUnits *LiveUnits) const {
+ assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
MachineFunction *MF = MBB.getParent();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -1394,12 +1403,12 @@ void SIRegisterInfo::buildSpillLoadStore(
bool IsOffsetLegal =
IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch)
- : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset);
+ : TII->isLegalMUBUFImmOffset(MaxOffset);
if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
SOffset = MCRegister();
// We don't have access to the register scavenger if this function is called
- // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
+ // during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
// TODO: Clobbering SCC is not necessary for scratch instructions in the
// entry.
if (RS) {
@@ -1407,10 +1416,10 @@ void SIRegisterInfo::buildSpillLoadStore(
// Piggy back on the liveness scan we just did see if SCC is dead.
CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
- } else if (LiveRegs) {
- CanClobberSCC = !LiveRegs->contains(AMDGPU::SCC);
+ } else if (LiveUnits) {
+ CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
- if (LiveRegs->available(MF->getRegInfo(), Reg)) {
+ if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
SOffset = Reg;
break;
}
@@ -1426,9 +1435,9 @@ void SIRegisterInfo::buildSpillLoadStore(
if (RS) {
TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
} else {
- assert(LiveRegs);
+ assert(LiveUnits);
for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
- if (LiveRegs->available(MF->getRegInfo(), Reg)) {
+ if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
TmpOffsetVGPR = Reg;
break;
}
@@ -1639,7 +1648,7 @@ void SIRegisterInfo::buildSpillLoadStore(
if (UseVGPROffset && ScratchOffsetReg) {
MIB.addReg(ScratchOffsetReg);
} else {
- assert(FuncInfo->isEntryFunction());
+ assert(FuncInfo->isBottomOfStack());
MIB.addImm(0);
}
}
@@ -1736,10 +1745,13 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS, SlotIndexes *Indexes,
- LiveIntervals *LIS, bool OnlyToVGPR) const {
+ LiveIntervals *LIS, bool OnlyToVGPR,
+ bool SpillToPhysVGPRLane) const {
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
- ArrayRef<SpilledReg> VGPRSpills = SB.MFI.getSGPRSpillToVGPRLanes(Index);
+ ArrayRef<SpilledReg> VGPRSpills =
+ SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
+ : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index);
bool SpillToVGPR = !VGPRSpills.empty();
if (OnlyToVGPR && !SpillToVGPR)
return false;
@@ -1767,7 +1779,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
- SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
+ SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
.addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane)
.addReg(Spill.VGPR);
@@ -1813,8 +1825,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
MachineInstrBuilder WriteLane =
- BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
- SB.TmpVGPR)
+ BuildMI(*SB.MBB, MI, SB.DL,
+ SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
.addReg(SubReg, SubKillState)
.addImm(i % PVD.PerVGPR)
.addReg(SB.TmpVGPR, TmpVGPRFlags);
@@ -1856,10 +1868,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS, SlotIndexes *Indexes,
- LiveIntervals *LIS, bool OnlyToVGPR) const {
+ LiveIntervals *LIS, bool OnlyToVGPR,
+ bool SpillToPhysVGPRLane) const {
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
- ArrayRef<SpilledReg> VGPRSpills = SB.MFI.getSGPRSpillToVGPRLanes(Index);
+ ArrayRef<SpilledReg> VGPRSpills =
+ SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
+ : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index);
bool SpillToVGPR = !VGPRSpills.empty();
if (OnlyToVGPR && !SpillToVGPR)
return false;
@@ -1872,8 +1887,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
SpilledReg Spill = VGPRSpills[i];
- auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
- SubReg)
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
+ SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane);
if (SB.NumSubRegs > 1 && i == 0)
@@ -1906,7 +1921,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
bool LastSubReg = (i + 1 == e);
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
- SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
+ SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
.addImm(i);
if (SB.NumSubRegs > 1 && i == 0)
@@ -2005,7 +2020,7 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
/// handled.
bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
- SlotIndexes *Indexes, LiveIntervals *LIS) const {
+ SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const {
switch (MI->getOpcode()) {
case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE:
@@ -2021,7 +2036,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
case AMDGPU::SI_SPILL_S96_SAVE:
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S32_SAVE:
- return spillSGPR(MI, FI, RS, Indexes, LIS, true);
+ return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S384_RESTORE:
@@ -2036,7 +2051,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
case AMDGPU::SI_SPILL_S96_RESTORE:
case AMDGPU::SI_SPILL_S64_RESTORE:
case AMDGPU::SI_SPILL_S32_RESTORE:
- return restoreSGPR(MI, FI, RS, Indexes, LIS, true);
+ return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
default:
llvm_unreachable("not an SGPR spill instruction");
}
@@ -2141,7 +2156,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_AV96_SAVE:
case AMDGPU::SI_SPILL_AV64_SAVE:
case AMDGPU::SI_SPILL_AV32_SAVE:
- case AMDGPU::SI_SPILL_WWM_V32_SAVE: {
+ case AMDGPU::SI_SPILL_WWM_V32_SAVE:
+ case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
@@ -2208,7 +2224,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_AV384_RESTORE:
case AMDGPU::SI_SPILL_AV512_RESTORE:
case AMDGPU::SI_SPILL_AV1024_RESTORE:
- case AMDGPU::SI_SPILL_WWM_V32_RESTORE: {
+ case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
+ case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
@@ -2406,7 +2423,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
bool IsMUBUF = TII->isMUBUF(*MI);
- if (!IsMUBUF && !MFI->isEntryFunction()) {
+ if (!IsMUBUF && !MFI->isBottomOfStack()) {
// Convert to a swizzled stack address by scaling by the wave size.
// In an entry function/kernel the offset is already swizzled.
bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
@@ -2425,10 +2442,13 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (Offset == 0) {
unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
: AMDGPU::V_LSHRREV_B32_e64;
- // XXX - This never happens because of emergency scavenging slot at 0?
- auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg)
- .addImm(ST.getWavefrontSizeLog2())
- .addReg(FrameReg);
+ auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg);
+ if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
+ // For V_LSHRREV, the operands are reversed (the shift count goes
+ // first).
+ Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
+ else
+ Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
if (IsSALU && !LiveSCC)
Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
if (IsSALU && LiveSCC) {
@@ -2541,7 +2561,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
= TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
int64_t NewOffset = OldImm + Offset;
- if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
+ if (TII->isLegalMUBUFImmOffset(NewOffset) &&
buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
MI->eraseFromParent();
return true;
@@ -2568,6 +2588,10 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
return AMDGPUInstPrinter::getRegisterName(Reg);
}
+unsigned AMDGPU::getRegBitWidth(const TargetRegisterClass &RC) {
+ return getRegBitWidth(RC.getID());
+}
+
static const TargetRegisterClass *
getAnyVGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth == 64)
@@ -3059,7 +3083,8 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
const RegisterBank &RB) const {
switch (RB.getID()) {
case AMDGPU::VGPRRegBankID:
- return getVGPRClassForBitWidth(std::max(32u, Size));
+ return getVGPRClassForBitWidth(
+ std::max(ST.useRealTrue16Insts() ? 16u : 32u, Size));
case AMDGPU::VCCRegBankID:
assert(Size == 1);
return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 2120b47c581e..88d568672098 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
+#include "llvm/ADT/BitVector.h"
+
#define GET_REGINFO_HEADER
#include "AMDGPUGenRegisterInfo.inc"
@@ -23,7 +25,7 @@ namespace llvm {
class GCNSubtarget;
class LiveIntervals;
-class LivePhysRegs;
+class LiveRegUnits;
class RegisterBank;
struct SGPRSpillBuilder;
@@ -90,6 +92,11 @@ public:
CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const override;
+ // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling
+ // conventions are free to use certain VGPRs without saving and restoring any
+ // lanes (not even inactive ones).
+ static bool isChainScratchRegister(Register VGPR);
+
// Stack access is very expensive. CSRs are also the high registers, and we
// want to minimize the number of used registers.
unsigned getCSRFirstUseCost() const override {
@@ -142,31 +149,30 @@ public:
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
bool IsLoad, bool IsKill = true) const;
- /// If \p OnlyToVGPR is true, this will only succeed if this
+ /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
+ /// free VGPR lane to spill.
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
- bool OnlyToVGPR = false) const;
+ bool OnlyToVGPR = false,
+ bool SpillToPhysVGPRLane = false) const;
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
- bool OnlyToVGPR = false) const;
+ bool OnlyToVGPR = false,
+ bool SpillToPhysVGPRLane = false) const;
bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock &RestoreMBB, Register SGPR,
RegScavenger *RS) const;
- bool supportsBackwardScavenger() const override {
- return true;
- }
-
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS) const override;
- bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
- int FI, RegScavenger *RS,
- SlotIndexes *Indexes = nullptr,
- LiveIntervals *LIS = nullptr) const;
+ bool eliminateSGPRToVGPRSpillFrameIndex(
+ MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
+ SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
+ bool SpillToPhysVGPRLane = false) const;
StringRef getRegAsmName(MCRegister Reg) const override;
@@ -416,14 +422,14 @@ public:
// Insert spill or restore instructions.
// When lowering spill pseudos, the RegScavenger should be set.
// For creating spill instructions during frame lowering, where no scavenger
- // is available, LiveRegs can be used.
+ // is available, LiveUnits can be used.
void buildSpillLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg,
bool ValueIsKill, MCRegister ScratchOffsetReg,
int64_t InstrOffset, MachineMemOperand *MMO,
RegScavenger *RS,
- LivePhysRegs *LiveRegs = nullptr) const;
+ LiveRegUnits *LiveUnits = nullptr) const;
// Return alignment in register file of first register in a register tuple.
unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
@@ -445,6 +451,11 @@ public:
unsigned SubReg) const;
};
+namespace AMDGPU {
+/// Get the size in bits of a register from the register class \p RC.
+unsigned getRegBitWidth(const TargetRegisterClass &RC);
+} // namespace AMDGPU
+
} // End namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index b2b1b458a63a..981da13fe089 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -122,10 +122,18 @@ class SIRegisterTuples<list<SubRegIndex> Indices, RegisterClass RC,
//===----------------------------------------------------------------------===//
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
-class SIReg <string n, bits<16> regIdx = 0> :
- Register<n> {
+class SIReg <string n, bits<8> regIdx = 0, bit isAGPROrVGPR = 0,
+ bit isHi = 0> : Register<n> {
let Namespace = "AMDGPU";
- let HWEncoding = regIdx;
+
+ // These are generic helper values we use to form actual register
+ // codes. They should not be assumed to match any particular register
+ // encodings on any particular subtargets.
+ let HWEncoding{7-0} = regIdx;
+ let HWEncoding{8} = isAGPROrVGPR;
+ let HWEncoding{9} = isHi;
+
+ int Index = !cast<int>(regIdx);
}
// For register classes that use TSFlags.
@@ -148,28 +156,22 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
let TSFlags{4} = HasSGPR;
}
-multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
- bit HWEncodingHigh = 0> {
- // There is no special encoding for 16 bit subregs, these are not real
- // registers but rather operands for instructions preserving other 16 bits
- // of the result or reading just 16 bits of a 32 bit VGPR.
- // It is encoded as a corresponding 32 bit register.
- // Non-VGPR register classes use it as we need to have matching subregisters
- // to move instructions and data between ALUs.
- def _LO16 : SIReg<n#".l", regIdx> {
- let HWEncoding{8} = HWEncodingHigh;
- }
- def _HI16 : SIReg<!if(ArtificialHigh, "", n#".h"), regIdx> {
+multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1,
+ bit isAGPROrVGPR = 0> {
+ def _LO16 : SIReg<n#".l", regIdx, isAGPROrVGPR>;
+ def _HI16 : SIReg<!if(ArtificialHigh, "", n#".h"), regIdx, isAGPROrVGPR,
+ /* isHi */ 1> {
let isArtificial = ArtificialHigh;
- let HWEncoding{8} = HWEncodingHigh;
}
def "" : RegisterWithSubRegs<n, [!cast<Register>(NAME#"_LO16"),
!cast<Register>(NAME#"_HI16")]> {
let Namespace = "AMDGPU";
let SubRegIndices = [lo16, hi16];
let CoveredBySubRegs = !not(ArtificialHigh);
- let HWEncoding = regIdx;
- let HWEncoding{8} = HWEncodingHigh;
+ let HWEncoding{7-0} = regIdx;
+ let HWEncoding{8} = isAGPROrVGPR;
+
+ int Index = !cast<int>(regIdx);
}
}
@@ -247,7 +249,7 @@ def SGPR_NULL64 :
// the high 32 bits. The lower 32 bits are always zero (for base) or
// -1 (for limit). Since we cannot access the high 32 bits, when we
// need them, we need to do a 64 bit load and extract the bits manually.
-multiclass ApertureRegister<string name, bits<16> regIdx> {
+multiclass ApertureRegister<string name, bits<8> regIdx> {
let isConstant = true in {
// FIXME: We shouldn't need to define subregisters for these (nor add them to any 16 bit
// register classes), but if we don't it seems to confuse the TableGen
@@ -315,7 +317,7 @@ foreach Index = 0...15 in {
defm TTMP#Index : SIRegLoHi16<"ttmp"#Index, 0>;
}
-multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
+multiclass FLAT_SCR_LOHI_m <string n, bits<8> ci_e, bits<8> vi_e> {
defm _ci : SIRegLoHi16<n, ci_e>;
defm _vi : SIRegLoHi16<n, vi_e>;
defm "" : SIRegLoHi16<n, 0>;
@@ -412,7 +414,7 @@ def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
// SGPR 64-bit registers
def SGPR_64Regs : SIRegisterTuples<getSubRegs<2>.ret, SGPR_32, 105, 2, 2, "s">;
-// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
+// SGPR 96-bit registers.
def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 4, 3, "s">;
// SGPR 128-bit registers
@@ -591,7 +593,6 @@ def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
let AllocationPriority = 0;
let Size = 16;
let GeneratePressureSet = 0;
- let BaseClassOrder = 16;
}
def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
@@ -599,9 +600,34 @@ def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
let AllocationPriority = 0;
let Size = 16;
let GeneratePressureSet = 0;
+}
+
+// VOP3 and VINTERP can access 256 lo and 256 hi registers.
+def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
+ (add (interleave (sequence "VGPR%u_LO16", 0, 255),
+ (sequence "VGPR%u_HI16", 0, 255)))> {
+ let AllocationPriority = 2;
+ let Size = 16;
+ let GeneratePressureSet = 0;
+
+ // This is the base class for VGPR{128..255}_{LO16,HI16}.
let BaseClassOrder = 17;
}
+// VOP1/2/C can access the First 128 lo and 128 hi registers.
+// The order of registers in the class determines order of allocation, so it is
+// important to interleave lo and hi registers.
+def VGPR_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
+ (add (interleave (sequence "VGPR%u_LO16", 0, 127),
+ (sequence "VGPR%u_HI16", 0, 127)))> {
+ let Size = 16;
+ let GeneratePressureSet = 0;
+ let isAllocatable = 0;
+
+ // This is the base class for VGPR{0..127}_{LO16,HI16}.
+ let BaseClassOrder = 16;
+}
+
// VGPR 32-bit registers
// i16/f16 only on VI+
def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
@@ -904,7 +930,7 @@ defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
defm "" : SRegClass<12, [v12i32, v12f32], SGPR_384Regs, TTMP_384Regs>;
let GlobalPriority = true in {
-defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
+defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512Regs, TTMP_512Regs>;
defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
}
@@ -958,7 +984,7 @@ defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>;
defm VReg_384 : VRegClass<12, [v12i32, v12f32], (add VGPR_384)>;
let GlobalPriority = true in {
-defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
+defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], (add VGPR_512)>;
defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
}
@@ -1008,6 +1034,18 @@ def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add)> {
let HasVGPR = 1;
}
+def VS_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
+ (add VGPR_16, SReg_32, LDS_DIRECT_CLASS)> {
+ let isAllocatable = 0;
+ let HasVGPR = 1;
+}
+
+def VS_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
+ (add VGPR_16_Lo128, SReg_32, LDS_DIRECT_CLASS)> {
+ let isAllocatable = 0;
+ let HasVGPR = 1;
+}
+
def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
@@ -1094,6 +1132,30 @@ class RegOrF16 <string RegisterClass, string OperandTypePrefix>
: RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP16",
!subst("_f16", "F16", NAME), "_Imm16">;
+class RegOrB16T <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_INT16",
+ !subst("_b16", "B16", NAME), "_Imm16"> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
+
+class RegOrF16T <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP16",
+ !subst("_f16", "F16", NAME), "_Imm16"> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
+
+class RegOrB16_Lo128T <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_INT16",
+ !subst("_b16_Lo128", "B16_Lo128", NAME), "_Imm16"> {
+ let EncoderMethod = "getMachineOpValueT16Lo128";
+}
+
+class RegOrF16_Lo128T <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP16",
+ !subst("_f16_Lo128", "F16_Lo128", NAME), "_Imm16"> {
+ let EncoderMethod = "getMachineOpValueT16Lo128";
+}
+
class RegOrB32 <string RegisterClass, string OperandTypePrefix>
: RegOrImmOperand <RegisterClass, OperandTypePrefix # "_INT32",
!subst("_b32", "B32", NAME), "_Imm32">;
@@ -1149,10 +1211,13 @@ class RegOrF16_Lo128_Deferred <string RegisterClass,
: RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP16_DEFERRED",
!subst("_f16_Lo128_Deferred", "F16_Lo128", NAME),
"_Deferred_Imm16">;
+
//===----------------------------------------------------------------------===//
// SSrc_* Operands with an SGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
+def SSrc_b16 : RegOrB16 <"SReg_32", "OPERAND_REG_IMM">;
+def SSrc_f16 : RegOrF16 <"SReg_32", "OPERAND_REG_IMM">;
def SSrc_b32 : RegOrB32 <"SReg_32", "OPERAND_REG_IMM">;
def SSrc_f32 : RegOrF32 <"SReg_32", "OPERAND_REG_IMM">;
def SSrc_b64 : RegOrB64 <"SReg_64", "OPERAND_REG_IMM">;
@@ -1160,6 +1225,13 @@ def SSrc_b64 : RegOrB64 <"SReg_64", "OPERAND_REG_IMM">;
def SSrcOrLds_b32 : RegOrB32 <"SRegOrLds_32", "OPERAND_REG_IMM">;
//===----------------------------------------------------------------------===//
+// SSrc_32_Deferred Operands with an SGPR or a 32-bit immediate for use with
+// FMAMK/FMAAK
+//===----------------------------------------------------------------------===//
+
+def SSrc_f32_Deferred : RegOrF32_Deferred<"SReg_32", "OPERAND_REG_IMM">;
+
+//===----------------------------------------------------------------------===//
// SCSrc_* Operands with an SGPR or a inline constant
//===----------------------------------------------------------------------===//
@@ -1170,20 +1242,41 @@ def SCSrc_b64 : RegOrB64 <"SReg_64", "OPERAND_REG_INLINE_C">;
// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
+// The current and temporary future default used case for VOP3.
def VSrc_b16 : RegOrB16 <"VS_32", "OPERAND_REG_IMM">;
def VSrc_f16 : RegOrF16 <"VS_32", "OPERAND_REG_IMM">;
+
+// True16 VOP3 operands.
+def VSrcT_b16 : RegOrB16T <"VS_16", "OPERAND_REG_IMM"> {
+ let DecoderMethod = "decodeOperand_VSrcT16";
+}
+def VSrcT_f16 : RegOrF16T <"VS_16", "OPERAND_REG_IMM"> {
+ let DecoderMethod = "decodeOperand_VSrcT16";
+}
+
+// True16 VOP1/2/C operands.
+def VSrcT_b16_Lo128 : RegOrB16_Lo128T <"VS_16_Lo128", "OPERAND_REG_IMM"> {
+ let DecoderMethod = "decodeOperand_VSrcT16_Lo128";
+}
+def VSrcT_f16_Lo128 : RegOrF16_Lo128T <"VS_16_Lo128", "OPERAND_REG_IMM"> {
+ let DecoderMethod = "decodeOperand_VSrcT16_Lo128";
+}
+
+// The current and temporary future default used case for fake VOP1/2/C.
+def VSrcFake16_b16_Lo128 : RegOrB16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">;
+def VSrcFake16_f16_Lo128 : RegOrF16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">;
+
def VSrc_b32 : RegOrB32 <"VS_32", "OPERAND_REG_IMM">;
def VSrc_f32 : RegOrF32 <"VS_32", "OPERAND_REG_IMM">;
def VSrc_v2b16 : RegOrV2B16 <"VS_32", "OPERAND_REG_IMM">;
def VSrc_v2f16 : RegOrV2F16 <"VS_32", "OPERAND_REG_IMM">;
def VSrc_b64 : RegOrB64 <"VS_64", "OPERAND_REG_IMM">;
-def VSrc_f64 : RegOrF64 <"VS_64", "OPERAND_REG_IMM">;
+def VSrc_f64 : RegOrF64 <"VS_64", "OPERAND_REG_IMM"> {
+ let DecoderMethod = "decodeOperand_VSrc_f64";
+}
def VSrc_v2b32 : RegOrV2B32 <"VS_64", "OPERAND_REG_IMM">;
def VSrc_v2f32 : RegOrV2F32 <"VS_64", "OPERAND_REG_IMM">;
-def VSrcT_b16_Lo128 : RegOrB16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">;
-def VSrcT_f16_Lo128 : RegOrF16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">;
-
//===----------------------------------------------------------------------===//
// VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use
// with FMAMK/FMAAK
@@ -1192,8 +1285,8 @@ def VSrcT_f16_Lo128 : RegOrF16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">;
def VSrc_f16_Deferred : RegOrF16_Deferred<"VS_32", "OPERAND_REG_IMM">;
def VSrc_f32_Deferred : RegOrF32_Deferred<"VS_32", "OPERAND_REG_IMM">;
-def VSrcT_f16_Lo128_Deferred : RegOrF16_Lo128_Deferred<"VS_32_Lo128",
- "OPERAND_REG_IMM">;
+def VSrcFake16_f16_Lo128_Deferred : RegOrF16_Lo128_Deferred<"VS_32_Lo128",
+ "OPERAND_REG_IMM">;
//===----------------------------------------------------------------------===//
// VRegSrc_* Operands with a VGPR
@@ -1233,6 +1326,11 @@ def VGPRSrc_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
let DecoderMethod = "DecodeVGPR_32RegisterClass";
}
+def VGPRSrc_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
+ let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
+ let EncoderMethod = "getMachineOpValueT16Lo128";
+}
+
//===----------------------------------------------------------------------===//
// ASrc_* Operands with an AccVGPR
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
index 53441b5a4ced..b0e8e4112254 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -65,6 +65,12 @@ def Write16PassMAI : SchedWrite;
def Write4PassDGEMM : SchedWrite;
def Write8PassDGEMM : SchedWrite;
+// Scalar float instructions
+def WriteSFPU : SchedWrite;
+
+// F16 or F32 pseudo scalar transcendental instructions
+def WritePseudoScalarTrans : SchedWrite;
+
// FIXME: Should there be a class for instructions which are VALU
// instructions and have VALU rates, but write to the SALU (i.e. VOPC
// instructions)
@@ -90,6 +96,7 @@ def SIDPFullSpeedModel : SISchedMachineModel;
def SIDPGFX940FullSpeedModel : SISchedMachineModel;
def GFX10SpeedModel : SISchedMachineModel;
def GFX11SpeedModel : SISchedMachineModel;
+def GFX12SpeedModel : SISchedMachineModel;
// XXX: Are the resource counts correct?
def HWBranch : ProcResource<1> {
@@ -128,6 +135,10 @@ class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
class HWVALUWriteRes<SchedWrite write, int latency> :
HWWriteRes<write, [HWVALU], latency>;
+class UnsupportedWriteRes<SchedWrite write> : WriteRes<write, []> {
+ let Unsupported = 1;
+}
+
def PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>;
def MIReadVGPR : SchedReadVariant<[
@@ -157,14 +168,17 @@ multiclass SICommonWriteRes {
def : HWVALUWriteRes<Write4PassDGEMM, 4>;
def : HWVALUWriteRes<Write8PassDGEMM, 16>;
- let ResourceCycles = [2] in
+ let ReleaseAtCycles = [2] in
def : HWWriteRes<Write2PassMAI, [HWXDL], 2>;
- let ResourceCycles = [4] in
+ let ReleaseAtCycles = [4] in
def : HWWriteRes<Write4PassMAI, [HWXDL], 4>;
- let ResourceCycles = [8] in
+ let ReleaseAtCycles = [8] in
def : HWWriteRes<Write8PassMAI, [HWXDL], 8>;
- let ResourceCycles = [16] in
+ let ReleaseAtCycles = [16] in
def : HWWriteRes<Write16PassMAI, [HWXDL], 16>;
+
+ def : UnsupportedWriteRes<WriteSFPU>;
+ def : UnsupportedWriteRes<WritePseudoScalarTrans>;
} // End RetireOOO = 1
def : ReadAdvance<MIVGPRRead, -2>;
@@ -307,6 +321,9 @@ def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+
+def : UnsupportedWriteRes<WriteSFPU>;
+def : UnsupportedWriteRes<WritePseudoScalarTrans>;
} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -315,26 +332,61 @@ def : InstRW<[WriteCopy], (instrs COPY)>;
let SchedModel = GFX11SpeedModel in {
+// The latency values are 1 / (operations / cycle).
+// Add 1 stall cycle for VGPR read.
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
-def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>;
+def : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 10>;
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>;
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>;
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>;
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>;
def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>;
-def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 40>;
+def : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 40>;
def : HWWriteRes<WriteBranch, [HWBranch], 32>;
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
+def : HWWriteRes<WriteSFPU, [HWSALU, HWRC], 4>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+} // End RetireOOO = 1
+
+def : UnsupportedWriteRes<WritePseudoScalarTrans>;
def : InstRW<[WriteCopy], (instrs COPY)>;
} // End SchedModel = GFX11SpeedModel
+
+let SchedModel = GFX12SpeedModel in {
+
+def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
+def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
+def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
+def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>;
+def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>;
+def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
+def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>;
+def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>;
+def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>;
+def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>;
+def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 40>;
+def : HWWriteRes<WritePseudoScalarTrans, [HWVALU, HWRC], 7>;
+
+def : HWWriteRes<WriteBranch, [HWBranch], 32>;
+def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
+def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
+def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
+def : HWWriteRes<WriteSFPU, [HWSALU, HWRC], 4>;
+def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
+def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
+def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+
+def : InstRW<[WriteCopy], (instrs COPY)>;
+
+} // End SchedModel = GFX12SpeedModel
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 4159dc694c1e..d290dd82b760 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -104,8 +104,7 @@ bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
bool ConstantFolded = false;
if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
- if (MovSrc.isImm() &&
- (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) {
+ if (MovSrc.isImm()) {
Src0.ChangeToImmediate(MovSrc.getImm());
ConstantFolded = true;
} else if (MovSrc.isFI()) {
@@ -160,7 +159,7 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
}
bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
- return isInt<16>(Src.getImm()) &&
+ return isInt<16>(SignExtend64(Src.getImm(), 32)) &&
!TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
}
@@ -171,7 +170,7 @@ bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
bool &IsUnsigned) const {
- if (isInt<16>(Src.getImm())) {
+ if (isInt<16>(SignExtend64(Src.getImm(), 32))) {
IsUnsigned = false;
return !TII->isInlineConstant(Src);
}
@@ -212,6 +211,9 @@ void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
}
void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
+ if (!ST->hasSCmpK())
+ return;
+
// cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
// get constants on the RHS.
if (!MI.getOperand(0).isReg())
@@ -222,7 +224,7 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
if (!Src0.isReg())
return;
- const MachineOperand &Src1 = MI.getOperand(1);
+ MachineOperand &Src1 = MI.getOperand(1);
if (!Src1.isImm())
return;
@@ -238,6 +240,7 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
if (!HasUImm) {
SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
+ Src1.setImm(SignExtend32(Src1.getImm(), 32));
}
MI.setDesc(TII->get(SOPKOpc));
@@ -250,6 +253,8 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||
(!TII->sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {
+ if (!TII->sopkIsZext(SOPKOpc))
+ Src1.setImm(SignExtend64(Src1.getImm(), 32));
MI.setDesc(NewDesc);
}
}
@@ -839,6 +844,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
+ Src1->setImm(SignExtend64(Src1->getImm(), 32));
MI.setDesc(TII->get(Opc));
MI.tieOperands(0, 1);
}
@@ -858,9 +864,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (Src.isImm() && Dst.getReg().isPhysical()) {
int32_t ReverseImm;
- if (isKImmOperand(Src))
+ if (isKImmOperand(Src)) {
MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
- else if (isReverseInlineImm(Src, ReverseImm)) {
+ Src.setImm(SignExtend64(Src.getImm(), 32));
+ } else if (isReverseInlineImm(Src, ReverseImm)) {
MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
Src.setImm(ReverseImm);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 3143d437e370..59d6ccf513bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -238,9 +238,7 @@ public:
AU.addRequired<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
- AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
AU.addPreserved<MachinePostDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -1320,7 +1318,8 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
auto II = MBB.getFirstNonPHI(), IE = MBB.end();
if (IsEntry) {
// Skip the instruction that saves LiveMask
- if (II != IE && II->getOpcode() == AMDGPU::COPY)
+ if (II != IE && II->getOpcode() == AMDGPU::COPY &&
+ II->getOperand(1).getReg() == TRI->getExec())
++II;
}
@@ -1594,8 +1593,8 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
- MDT = &getAnalysis<MachineDominatorTree>();
- PDT = &getAnalysis<MachinePostDominatorTree>();
+ MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ PDT = getAnalysisIfAvailable<MachinePostDominatorTree>();
if (ST->isWave32()) {
AndOpc = AMDGPU::S_AND_B32;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
index 7ca685a0cc5d..3297847b0360 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -74,7 +74,7 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
bits<7> sdst;
bits<32> offset;
bits<8> soffset;
- bits<5> cpol;
+ bits<5> cpol;
}
class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
@@ -211,6 +211,23 @@ class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
let has_sbase = 0;
}
+class SM_Prefetch_Pseudo <string opName, RegisterClass baseClass, bit hasSBase>
+ : SM_Pseudo<opName, (outs), !con(!if(hasSBase, (ins baseClass:$sbase), (ins)),
+ (ins smem_offset:$offset, SReg_32:$soffset, i8imm:$sdata)),
+ !if(hasSBase, " $sbase,", "") # " $offset, $soffset, $sdata"> {
+ // Mark prefetches as both load and store to prevent reordering with loads
+ // and stores. This is also needed for pattern to match prefetch intrinsic.
+ let mayLoad = 1;
+ let mayStore = 1;
+ let has_glc = 0;
+ let LGKM_CNT = 0;
+ let has_sbase = hasSBase;
+ let ScalarStore = 0;
+ let has_offset = 1;
+ let has_soffset = 1;
+ let PseudoInstr = opName;
+}
+
//===----------------------------------------------------------------------===//
// Scalar Atomic Memory Classes
//===----------------------------------------------------------------------===//
@@ -234,8 +251,6 @@ class SM_Atomic_Pseudo <string opName,
let IsAtomicNoRet = !not(isRet);
let IsAtomicRet = isRet;
-
- let AsmMatchConverter = "cvtSMEMAtomic";
}
class SM_Pseudo_Atomic<string opName,
@@ -245,7 +260,7 @@ class SM_Pseudo_Atomic<string opName,
bit isRet,
string opNameWithSuffix =
opName # offsets.Variant # !if(isRet, "_RTN", ""),
- Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
+ Operand CPolTy = !if(isRet, CPol_GLC, CPol_NonGLC)> :
SM_Atomic_Pseudo<opName,
!if(isRet, (outs dataClass:$sdst), (outs)),
!con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins,
@@ -285,6 +300,8 @@ multiclass SM_Pseudo_Atomics<RegisterClass baseClass,
// does sdst for SMRD on SI/CI?
defm S_LOAD_DWORD : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
+let SubtargetPredicate = HasScalarDwordx3Loads in
+ defm S_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_64, SReg_96>;
defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
@@ -294,6 +311,8 @@ defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
// SI/CI, bit disallowed for SMEM on VI.
defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>;
+let SubtargetPredicate = HasScalarDwordx3Loads in
+ defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>;
defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
@@ -417,6 +436,16 @@ defm S_DCACHE_DISCARD : SM_Pseudo_Discards;
defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards;
}
+let SubtargetPredicate = isGFX12Plus in {
+def S_PREFETCH_INST : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>;
+def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>;
+def S_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>;
+def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>;
+def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> {
+ let is_buffer = 1;
+}
+} // end let SubtargetPredicate = isGFX12Plus
+
//===----------------------------------------------------------------------===//
// Targets
//===----------------------------------------------------------------------===//
@@ -789,6 +818,14 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformL
}];
}
+def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
+ (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
+ [{ return !N->getOperand(1)->isDivergent();}]> {
+ let GISelPredicateCode = [{
+ return isInstrUniform(MI);
+ }];
+}
+
def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">;
def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">;
def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
@@ -797,7 +834,7 @@ def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
-multiclass SMRD_Pattern <string Instr, ValueType vt> {
+multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
// 1. IMM offset
def : GCNPat <
@@ -806,7 +843,7 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
>;
// 2. 32-bit IMM offset on CI
- def : GCNPat <
+ if immci then def : GCNPat <
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
(vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
let OtherPredicates = [isGFX7Only];
@@ -838,7 +875,7 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
>;
}
-multiclass SMLoad_Pattern <string Instr, ValueType vt> {
+multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> {
// 1. Offset as an immediate
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
@@ -847,7 +884,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
}
// 2. 32-bit IMM offset on CI
- def : GCNPat <
+ if immci then def : GCNPat <
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
(!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
(extract_cpol $cachepolicy))> {
@@ -890,6 +927,10 @@ foreach vt = SReg_64.RegTypes in {
defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>;
}
+foreach vt = SReg_96.RegTypes in {
+defm : SMRD_Pattern <"S_LOAD_DWORDX3", vt, false>;
+}
+
foreach vt = SReg_128.RegTypes in {
defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
}
@@ -906,12 +947,14 @@ defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3", v3i32, false>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3", v3f32, false>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
@@ -934,6 +977,21 @@ def : GCNPat <
}
} // let OtherPredicates = [HasShaderCyclesRegister]
+multiclass SMPrefetchPat<string type, int cache_type> {
+ def : GCNPat <
+ (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, (i32 cache_type)),
+ (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0))
+ >;
+
+ def : GCNPat <
+ (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, (i32 cache_type)),
+ (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0))
+ >;
+}
+
+defm : SMPrefetchPat<"INST", 0>;
+defm : SMPrefetchPat<"DATA", 1>;
+
//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
@@ -1154,7 +1212,7 @@ def SMInfoTable : GenericTable {
class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11,
SGPR_NULL_gfx11plus> {
- let AssemblerPredicate = isGFX11Plus;
+ let AssemblerPredicate = isGFX11Only;
let DecoderNamespace = "GFX11";
let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
@@ -1205,3 +1263,84 @@ multiclass SM_Real_Probe_gfx11<bits<8> op> {
defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22>;
defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>;
+
+//===----------------------------------------------------------------------===//
+// GFX12.
+//===----------------------------------------------------------------------===//
+
+class SMEM_Real_gfx12Plus<bits<6> op, SM_Pseudo ps, string opName,
+ int subtarget, RegisterWithSubRegs sgpr_null> :
+ SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
+
+ let Inst{18-13} = op;
+ let Inst{31-26} = 0x3d;
+
+ let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?));
+ let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
+ !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
+}
+
+class SMEM_Real_gfx12<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
+ SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX12,
+ SGPR_NULL_gfx11plus> {
+ let AssemblerPredicate = isGFX12Plus;
+ let DecoderNamespace = "GFX12";
+
+ let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
+ let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
+}
+
+class SMEM_Real_Prefetch_gfx12<bits<6> op, SM_Pseudo ps> :
+ SMEM_Real_gfx12<op, ps> {
+ bits<7> sdata; // Only 5 bits of sdata are supported.
+
+ let sdst = ?;
+ let Inst{12-11} = 0; // Unused sdata bits.
+ let Inst{10-6} = !if(ps.has_sdst, sdata{4-0}, ?);
+}
+
+class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offsets> :
+ SMEM_Real_gfx12<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
+ RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
+ let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
+
+ let Inst{22-21} = cpol{4-3}; // scope
+ let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
+}
+
+multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> {
+ defvar opName = !tolower(NAME);
+ def _IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, IMM_Offset>;
+ def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, SGPR_IMM_Offset>;
+}
+
+defm S_LOAD_B32 : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">;
+defm S_LOAD_B64 : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">;
+defm S_LOAD_B96 : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">;
+defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">;
+defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">;
+defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">;
+
+defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">;
+defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">;
+
+def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>;
+
+def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
+def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>;
+def S_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>;
+def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>;
+def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>;
+
+multiclass SMEM_Real_Probe_gfx12<bits<6> op> {
+ defvar ps = NAME;
+ def _IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
+ def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
+}
+
+defm S_ATC_PROBE : SMEM_Real_Probe_gfx12<0x22>;
+defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
index bee996d1b0df..c9687ac368d3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -15,6 +15,7 @@ class SOP_Pseudo<string opName, dag outs, dag ins, string asmOps,
let isPseudo = 1;
let isCodeGenOnly = 1;
+ let Size = 4;
string Mnemonic = opName;
string AsmOperands = asmOps;
@@ -36,7 +37,6 @@ class SOP1_Pseudo <string opName, dag outs, dag ins,
let SALU = 1;
let SOP1 = 1;
let SchedRW = [WriteSALU];
- let Size = 4;
let UseNamedOperandTable = 1;
bits<1> has_src0 = 1;
@@ -216,8 +216,10 @@ let Defs = [SCC] in {
def S_NOT_B64 : SOP1_64 <"s_not_b64",
[(set i64:$sdst, (UniformUnaryFrag<not> i64:$src0))]
>;
- def S_WQM_B32 : SOP1_32 <"s_wqm_b32">;
- def S_WQM_B64 : SOP1_64 <"s_wqm_b64">;
+ def S_WQM_B32 : SOP1_32 <"s_wqm_b32",
+ [(set i32:$sdst, (int_amdgcn_s_wqm i32:$src0))]>;
+ def S_WQM_B64 : SOP1_64 <"s_wqm_b64",
+ [(set i64:$sdst, (int_amdgcn_s_wqm i64:$src0))]>;
} // End Defs = [SCC]
@@ -290,6 +292,7 @@ def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64", [], 1>;
def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32", [], 1>;
def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64", [], 1>;
+let isReMaterializable = 1 in
def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64",
[(set i64:$sdst, (int_amdgcn_s_getpc))]
>;
@@ -326,8 +329,10 @@ def S_XNOR_SAVEEXEC_B64 : SOP1_64 <"s_xnor_saveexec_b64">;
} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
-def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32">;
-def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64">;
+def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32",
+ [(set i32:$sdst, (int_amdgcn_s_quadmask i32:$src0))]>;
+def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64",
+ [(set i64:$sdst, (int_amdgcn_s_quadmask i64:$src0))]>;
let Uses = [M0] in {
def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">;
@@ -362,7 +367,8 @@ let SubtargetPredicate = isGFX9Plus in {
} // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC]
let isReMaterializable = 1 in
- def S_BITREPLICATE_B64_B32 : SOP1_64_32<"s_bitreplicate_b64_b32">;
+ def S_BITREPLICATE_B64_B32 : SOP1_64_32<"s_bitreplicate_b64_b32",
+ [(set i64:$sdst, (int_amdgcn_s_bitreplicate i32:$src0))]>;
} // End SubtargetPredicate = isGFX9Plus
let SubtargetPredicate = isGFX10Plus in {
@@ -401,6 +407,120 @@ let SubtargetPredicate = isGFX11Plus in {
}
} // End SubtargetPredicate = isGFX11Plus
+class SOP1_F32_Inst<string opName, SDPatternOperator Op, ValueType vt0=f32,
+ ValueType vt1=vt0> :
+ SOP1_32<opName, [(set vt0:$sdst, (UniformUnaryFrag<Op> vt1:$src0))]>;
+
+let SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE],
+ SchedRW = [WriteSFPU], isReMaterializable = 1 in {
+ def S_CVT_F32_I32 : SOP1_F32_Inst<"s_cvt_f32_i32", sint_to_fp, f32, i32>;
+ def S_CVT_F32_U32 : SOP1_F32_Inst<"s_cvt_f32_u32", uint_to_fp, f32, i32>;
+
+ let mayRaiseFPException = 1 in {
+ def S_CVT_I32_F32 : SOP1_F32_Inst<"s_cvt_i32_f32", fp_to_sint, i32, f32>;
+ def S_CVT_U32_F32 : SOP1_F32_Inst<"s_cvt_u32_f32", fp_to_uint, i32, f32>;
+ def S_CVT_F32_F16 : SOP1_F32_Inst<"s_cvt_f32_f16", fpextend, f32, f16>;
+ def S_CVT_HI_F32_F16 : SOP1_32<"s_cvt_hi_f32_f16">;
+
+ def S_CEIL_F32 : SOP1_F32_Inst<"s_ceil_f32", fceil>;
+ def S_FLOOR_F32 : SOP1_F32_Inst<"s_floor_f32", ffloor>;
+ def S_TRUNC_F32 : SOP1_F32_Inst<"s_trunc_f32", ftrunc>;
+ def S_RNDNE_F32 : SOP1_F32_Inst<"s_rndne_f32", froundeven>;
+
+ let FPDPRounding = 1 in
+ def S_CVT_F16_F32 : SOP1_F32_Inst<"s_cvt_f16_f32", fpround, f16, f32>;
+
+ def S_CEIL_F16 : SOP1_F32_Inst<"s_ceil_f16", fceil, f16>;
+ def S_FLOOR_F16 : SOP1_F32_Inst<"s_floor_f16", ffloor, f16>;
+ def S_TRUNC_F16 : SOP1_F32_Inst<"s_trunc_f16", ftrunc, f16>;
+ def S_RNDNE_F16 : SOP1_F32_Inst<"s_rndne_f16", froundeven, f16>;
+ } // End mayRaiseFPException = 1
+} // End SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE]
+ // SchedRW = [WriteSFPU], isReMaterializable = 1
+
+let hasSideEffects = 1 in {
+let has_sdst = 0 in {
+let Uses = [M0] in {
+def S_BARRIER_SIGNAL_M0 : SOP1_Pseudo <"s_barrier_signal m0", (outs), (ins),
+ "", [(int_amdgcn_s_barrier_signal_var M0)]>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_Pseudo <"s_barrier_signal_isfirst m0", (outs), (ins),
+ "", [(set SCC, (int_amdgcn_s_barrier_signal_isfirst_var M0))]>{
+ let Defs = [SCC];
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_BARRIER_INIT_M0 : SOP1_Pseudo <"s_barrier_init m0", (outs), (ins),
+ "", []>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_BARRIER_INIT_IMM : SOP1_Pseudo <"s_barrier_init", (outs),
+ (ins SplitBarrier:$src0), "$src0", []>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_BARRIER_JOIN_M0 : SOP1_Pseudo <"s_barrier_join m0", (outs), (ins),
+ "", []>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins),
+ "", []>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+} // End Uses = [M0]
+
+def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
+ (ins SplitBarrier:$src0), "$src0", [(int_amdgcn_s_barrier_signal timm:$src0)]>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_Pseudo <"s_barrier_signal_isfirst", (outs),
+ (ins SplitBarrier:$src0), "$src0", [(set SCC, (int_amdgcn_s_barrier_signal_isfirst timm:$src0))]>{
+ let Defs = [SCC];
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_BARRIER_JOIN_IMM : SOP1_Pseudo <"s_barrier_join", (outs),
+ (ins SplitBarrier:$src0), "$src0", []>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs),
+ (ins SplitBarrier:$src0), "$src0", []>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+
+
+}
+} // End has_sdst = 0
+
+def S_GET_BARRIER_STATE_IMM : SOP1_Pseudo <"s_get_barrier_state", (outs SSrc_b32:$sdst),
+ (ins SplitBarrier:$src0), "$sdst, $src0", []>{
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_GET_BARRIER_STATE_M0 : SOP1_Pseudo <"s_get_barrier_state $sdst, m0", (outs SSrc_b32:$sdst),
+ (ins), "", []>{
+ let Uses = [M0];
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+} // End hasSideEffects = 1
+
//===----------------------------------------------------------------------===//
// SOP2 Instructions
//===----------------------------------------------------------------------===//
@@ -424,13 +544,11 @@ class SOP2_Pseudo<string opName, dag outs, dag ins,
// let sdst = xxx in {
// for multiclasses that include both real and pseudo instructions.
// field bits<7> sdst = 0;
- // let Size = 4; // Do we need size here?
}
-class SOP2_Real<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
+class SOP2_Real<SOP_Pseudo ps, string real_name = ps.Mnemonic> :
InstSI <ps.OutOperandList, ps.InOperandList,
- real_name # ps.AsmOperands>,
- Enc32 {
+ real_name # ps.AsmOperands> {
let SALU = 1;
let SOP2 = 1;
let isPseudo = 0;
@@ -444,12 +562,18 @@ class SOP2_Real<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
let SchedRW = ps.SchedRW;
let mayLoad = ps.mayLoad;
let mayStore = ps.mayStore;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
// encoding
bits<7> sdst;
bits<8> src0;
bits<8> src1;
+ bits<32> imm;
+}
+class SOP2_Real32<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
+ SOP2_Real<ps, real_name>, Enc32 {
let Inst{7-0} = src0;
let Inst{15-8} = src1;
let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
@@ -457,12 +581,31 @@ class SOP2_Real<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
let Inst{31-30} = 0x2; // encoding
}
+class SOP2_Real64<bits<7> op, SOP_Pseudo ps, string real_name = ps.Mnemonic> :
+ SOP2_Real<ps, real_name>, Enc64 {
+ let Inst{7-0} = src0;
+ let Inst{15-8} = src1;
+ let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
+ let Inst{29-23} = op;
+ let Inst{31-30} = 0x2; // encoding
+ let Inst{63-32} = imm;
+}
+
+class SOP2_F16 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
+ opName, (outs SReg_32:$sdst), (ins SSrc_f16:$src0, SSrc_f16:$src1),
+ "$sdst, $src0, $src1", pattern
+>;
class SOP2_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0, SSrc_b32:$src1),
"$sdst, $src0, $src1", pattern
>;
+class SOP2_F32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
+ opName, (outs SReg_32:$sdst), (ins SSrc_f32:$src0, SSrc_f32:$src1),
+ "$sdst, $src0, $src1", pattern
+>;
+
class SOP2_64 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1),
"$sdst, $src0, $src1", pattern
@@ -518,19 +661,22 @@ def S_MAX_U32 : SOP2_32 <"s_max_u32",
} // End isCommutable = 1
} // End Defs = [SCC]
-def SelectPat : PatFrag <
- (ops node:$src1, node:$src2),
- (select SCC, $src1, $src2),
- [{ return !N->isDivergent(); }]
->;
+let SubtargetPredicate = isGFX12Plus in {
+ def S_ADD_U64 : SOP2_64<"s_add_u64">{
+ let isCommutable = 1;
+ }
-let Uses = [SCC] in {
- let AddedComplexity = 20 in {
- def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32",
- [(set i32:$sdst, (SelectPat i32:$src0, i32:$src1))]
- >;
+ def S_SUB_U64 : SOP2_64<"s_sub_u64">;
+
+ def S_MUL_U64 : SOP2_64 <"s_mul_u64",
+ [(set i64:$sdst, (UniformBinFrag<mul> i64:$src0, i64:$src1))]> {
+ let isCommutable = 1;
}
+} // End SubtargetPredicate = isGFX12Plus
+
+let Uses = [SCC] in {
+ def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32">;
def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">;
} // End Uses = [SCC]
@@ -705,6 +851,83 @@ let SubtargetPredicate = isGFX11Plus in {
def S_PACK_HL_B32_B16 : SOP2_32<"s_pack_hl_b32_b16">;
} // End SubtargetPredicate = isGFX11Plus
+class SOP2_F32_Inst<string opName, SDPatternOperator Op, ValueType dstVt=f32> :
+ SOP2_F32<opName,
+ [(set dstVt:$sdst, (UniformBinFrag<Op> SSrc_f32:$src0, SSrc_f32:$src1))]>;
+
+class SOP2_F16_Inst<string opName, SDPatternOperator Op> :
+ SOP2_F16<opName,
+ [(set f16:$sdst, (UniformBinFrag<Op> SSrc_f16:$src0, SSrc_f16:$src1))]>;
+
+let SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
+ Uses = [MODE], SchedRW = [WriteSFPU] in {
+ let isReMaterializable = 1 in {
+ let isCommutable = 1 in {
+ def S_ADD_F32 : SOP2_F32_Inst<"s_add_f32", any_fadd>;
+ def S_MIN_F32 : SOP2_F32_Inst<"s_min_f32", fminnum_like>;
+ def S_MAX_F32 : SOP2_F32_Inst<"s_max_f32", fmaxnum_like>;
+ def S_MUL_F32 : SOP2_F32_Inst<"s_mul_f32", any_fmul>;
+
+ let FixedSize = 1 in
+ def S_FMAAK_F32 : SOP2_Pseudo<
+ "s_fmaak_f32", (outs SReg_32:$sdst),
+ (ins SSrc_f32_Deferred:$src0, SSrc_f32_Deferred:$src1, KImmFP32:$imm),
+ "$sdst, $src0, $src1, $imm"
+ >;
+
+ let FPDPRounding = 1 in {
+ def S_ADD_F16 : SOP2_F16_Inst<"s_add_f16", any_fadd>;
+ def S_MUL_F16 : SOP2_F16_Inst<"s_mul_f16", any_fmul>;
+ } // End FPDPRounding
+
+ def S_MIN_F16 : SOP2_F16_Inst<"s_min_f16", fminnum_like>;
+ def S_MAX_F16 : SOP2_F16_Inst<"s_max_f16", fmaxnum_like>;
+ } // End isCommutable = 1
+
+ let FPDPRounding = 1 in
+ def S_SUB_F16 : SOP2_F16_Inst<"s_sub_f16", any_fsub>;
+
+ def S_SUB_F32 : SOP2_F32_Inst<"s_sub_f32", any_fsub>;
+ def S_CVT_PK_RTZ_F16_F32 : SOP2_F32_Inst<"s_cvt_pk_rtz_f16_f32",
+ AMDGPUpkrtz_f16_f32, v2f16>;
+
+ let FixedSize = 1 in
+ def S_FMAMK_F32 : SOP2_Pseudo<
+ "s_fmamk_f32", (outs SReg_32:$sdst),
+ (ins SSrc_f32_Deferred:$src0, KImmFP32:$imm, SSrc_f32_Deferred:$src1),
+ "$sdst, $src0, $imm, $src1"
+ >;
+ } // End isReMaterializable = 1
+
+ let Constraints = "$sdst = $src2", DisableEncoding="$src2",
+ isCommutable = 1, AddedComplexity = 20 in {
+ def S_FMAC_F32 : SOP2_Pseudo<
+ "s_fmac_f32", (outs SReg_32:$sdst),
+ (ins SSrc_f32:$src0, SSrc_f32:$src1, SReg_32:$src2),
+ "$sdst, $src0, $src1",
+ [(set f32:$sdst, (UniformTernaryFrag<any_fma> SSrc_f32:$src0, SSrc_f32:$src1, SReg_32:$src2))]
+ >;
+
+ def S_FMAC_F16 : SOP2_Pseudo<
+ "s_fmac_f16", (outs SReg_32:$sdst),
+ (ins SSrc_f16:$src0, SSrc_f16:$src1, SReg_32:$src2),
+ "$sdst, $src0, $src1",
+ [(set f16:$sdst, (UniformTernaryFrag<any_fma> SSrc_f16:$src0, SSrc_f16:$src1, SReg_32:$src2))]
+ >;
+ } // End Constraints = "$sdst = $src2", DisableEncoding="$src2",
+ // isCommutable = 1, AddedComplexity = 20
+} // End SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
+ // Uses = [MODE], SchedRW = [WriteSFPU]
+
+// On GFX12 MIN/MAX instructions do not read MODE register.
+let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 1, isCommutable = 1,
+ isReMaterializable = 1, SchedRW = [WriteSFPU] in {
+ def S_MINIMUM_F32 : SOP2_F32_Inst<"s_minimum_f32", fminimum>;
+ def S_MAXIMUM_F32 : SOP2_F32_Inst<"s_maximum_f32", fmaximum>;
+ def S_MINIMUM_F16 : SOP2_F16_Inst<"s_minimum_f16", fminimum>;
+ def S_MAXIMUM_F16 : SOP2_F16_Inst<"s_maximum_f16", fmaximum>;
+}
+
//===----------------------------------------------------------------------===//
// SOPK Instructions
//===----------------------------------------------------------------------===//
@@ -724,9 +947,9 @@ class SOPK_Pseudo <string opName, dag outs, dag ins,
let has_sdst = 1;
}
-class SOPK_Real<SOPK_Pseudo ps> :
+class SOPK_Real<SOPK_Pseudo ps, string real_name = ps.Mnemonic> :
InstSI <ps.OutOperandList, ps.InOperandList,
- ps.Mnemonic # ps.AsmOperands> {
+ real_name # ps.AsmOperands> {
let SALU = 1;
let SOPK = 1;
let isPseudo = 0;
@@ -750,8 +973,8 @@ class SOPK_Real<SOPK_Pseudo ps> :
bits<32> imm;
}
-class SOPK_Real32<bits<5> op, SOPK_Pseudo ps> :
- SOPK_Real <ps>,
+class SOPK_Real32<bits<5> op, SOPK_Pseudo ps, string real_name = ps.Mnemonic> :
+ SOPK_Real <ps, real_name>,
Enc32 {
let Inst{15-0} = simm16;
let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
@@ -870,6 +1093,8 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
// This is hasSideEffects to allow its use in readcyclecounter selection.
// FIXME: Need to truncate immediate to 16-bits.
+// FIXME: Missing mode register use. Should have separate pseudos for
+// known may read MODE and only read MODE.
def S_GETREG_B32 : SOPK_Pseudo <
"s_getreg_b32",
(outs SReg_32:$sdst), (ins hwreg:$simm16),
@@ -956,10 +1181,14 @@ let SubtargetPredicate = isGFX10Plus in {
"$simm16"> {
let has_sdst = 0;
}
+} // End SubtargetPredicate = isGFX10Plus
+let SubtargetPredicate = isGFX10GFX11 in {
def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
def S_SUBVECTOR_LOOP_END : SOPK_32_BR<"s_subvector_loop_end">;
+} // End SubtargetPredicate = isGFX10GFX11
+let SubtargetPredicate = isGFX10Plus in {
def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">;
def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
def S_WAITCNT_EXPCNT : SOPK_WAITCNT<"s_waitcnt_expcnt">;
@@ -1033,6 +1262,30 @@ class SOPC_CMP_32<string opName,
let isCommutable = 1;
}
+class SOPC_CMP_F32<string opName,
+ SDPatternOperator cond = COND_NULL, string revOp = opName>
+ : SOPC_Helper<SSrc_b32, f32, opName, cond>,
+ Commutable_REV<revOp, !eq(revOp, opName)>,
+ SOPKInstTable<0, opName> {
+ let isCompare = 1;
+ let isCommutable = 1;
+ let mayRaiseFPException = 1;
+ let Uses = [MODE];
+ let SchedRW = [WriteSFPU];
+}
+
+class SOPC_CMP_F16<string opName,
+ SDPatternOperator cond = COND_NULL, string revOp = opName>
+ : SOPC_Helper<SSrc_b16, f16, opName, cond>,
+ Commutable_REV<revOp, !eq(revOp, opName)>,
+ SOPKInstTable<0, opName> {
+ let isCompare = 1;
+ let isCommutable = 1;
+ let mayRaiseFPException = 1;
+ let Uses = [MODE];
+ let SchedRW = [WriteSFPU];
+}
+
class SOPC_CMP_64<string opName,
SDPatternOperator cond = COND_NULL, string revOp = opName>
: SOPC_Helper<SSrc_b64, i64, opName, cond>,
@@ -1089,6 +1342,40 @@ def S_SET_GPR_IDX_ON : SOPC_Pseudo <
}
}
+let SubtargetPredicate = HasSALUFloatInsts in {
+
+def S_CMP_LT_F32 : SOPC_CMP_F32<"s_cmp_lt_f32", COND_OLT, "s_cmp_gt_f32">;
+def S_CMP_EQ_F32 : SOPC_CMP_F32<"s_cmp_eq_f32", COND_OEQ>;
+def S_CMP_LE_F32 : SOPC_CMP_F32<"s_cmp_le_f32", COND_OLE, "s_cmp_ge_f32">;
+def S_CMP_GT_F32 : SOPC_CMP_F32<"s_cmp_gt_f32", COND_OGT>;
+def S_CMP_LG_F32 : SOPC_CMP_F32<"s_cmp_lg_f32", COND_ONE>;
+def S_CMP_GE_F32 : SOPC_CMP_F32<"s_cmp_ge_f32", COND_OGE>;
+def S_CMP_O_F32 : SOPC_CMP_F32<"s_cmp_o_f32", COND_O>;
+def S_CMP_U_F32 : SOPC_CMP_F32<"s_cmp_u_f32", COND_UO>;
+def S_CMP_NGE_F32 : SOPC_CMP_F32<"s_cmp_nge_f32", COND_ULT, "s_cmp_nle_f32">;
+def S_CMP_NLG_F32 : SOPC_CMP_F32<"s_cmp_nlg_f32", COND_UEQ>;
+def S_CMP_NGT_F32 : SOPC_CMP_F32<"s_cmp_ngt_f32", COND_ULE, "s_cmp_nlt_f32">;
+def S_CMP_NLE_F32 : SOPC_CMP_F32<"s_cmp_nle_f32", COND_UGT>;
+def S_CMP_NEQ_F32 : SOPC_CMP_F32<"s_cmp_neq_f32", COND_UNE>;
+def S_CMP_NLT_F32 : SOPC_CMP_F32<"s_cmp_nlt_f32", COND_UGE>;
+
+def S_CMP_LT_F16 : SOPC_CMP_F16<"s_cmp_lt_f16", COND_OLT, "s_cmp_gt_f16">;
+def S_CMP_EQ_F16 : SOPC_CMP_F16<"s_cmp_eq_f16", COND_OEQ>;
+def S_CMP_LE_F16 : SOPC_CMP_F16<"s_cmp_le_f16", COND_OLE, "s_cmp_ge_f16">;
+def S_CMP_GT_F16 : SOPC_CMP_F16<"s_cmp_gt_f16", COND_OGT>;
+def S_CMP_LG_F16 : SOPC_CMP_F16<"s_cmp_lg_f16", COND_ONE>;
+def S_CMP_GE_F16 : SOPC_CMP_F16<"s_cmp_ge_f16", COND_OGE>;
+def S_CMP_O_F16 : SOPC_CMP_F16<"s_cmp_o_f16", COND_O>;
+def S_CMP_U_F16 : SOPC_CMP_F16<"s_cmp_u_f16", COND_UO>;
+def S_CMP_NGE_F16 : SOPC_CMP_F16<"s_cmp_nge_f16", COND_ULT, "s_cmp_nle_f16">;
+def S_CMP_NLG_F16 : SOPC_CMP_F16<"s_cmp_nlg_f16", COND_UEQ>;
+def S_CMP_NGT_F16 : SOPC_CMP_F16<"s_cmp_ngt_f16", COND_ULE, "s_cmp_nlt_f16">;
+def S_CMP_NLE_F16 : SOPC_CMP_F16<"s_cmp_nle_f16", COND_UGT>;
+def S_CMP_NEQ_F16 : SOPC_CMP_F16<"s_cmp_neq_f16", COND_UNE>;
+def S_CMP_NLT_F16 : SOPC_CMP_F16<"s_cmp_nlt_f16", COND_UGE>;
+
+} // End SubtargetPredicate = HasSALUFloatInsts
+
//===----------------------------------------------------------------------===//
// SOPP Instructions
//===----------------------------------------------------------------------===//
@@ -1161,7 +1448,10 @@ multiclass SOPP_With_Relaxation <string opName, dag ins,
def _pad_s_nop : SOPP_Pseudo <opName # "_pad_s_nop", ins, asmOps, pattern, " ", opName>;
}
-def S_NOP : SOPP_Pseudo<"s_nop" , (ins i16imm:$simm16), "$simm16">;
+def S_NOP : SOPP_Pseudo<"s_nop" , (ins i16imm:$simm16), "$simm16",
+ [(int_amdgcn_s_nop timm:$simm16)]> {
+ let hasSideEffects = 1;
+}
let isTerminator = 1 in {
def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins Endpgm:$simm16), "$simm16", [], ""> {
@@ -1264,6 +1554,21 @@ def S_BARRIER : SOPP_Pseudo <"s_barrier", (ins), "",
let isConvergent = 1;
}
+def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm16",
+ [(int_amdgcn_s_barrier_wait timm:$simm16)]> {
+ let SchedRW = [WriteBarrier];
+ let isConvergent = 1;
+}
+
+def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins), "",
+ [(set SCC, (int_amdgcn_s_barrier_leave))]> {
+ let SchedRW = [WriteBarrier];
+ let simm16 = 0;
+ let fixed_imm = 1;
+ let isConvergent = 1;
+ let Defs = [SCC];
+}
+
def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
let SubtargetPredicate = isGFX8Plus;
let simm16 = 0;
@@ -1272,9 +1577,19 @@ def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
let mayStore = 1;
}
-let hasSideEffects = 1 in
def S_WAITCNT : SOPP_Pseudo <"s_waitcnt" , (ins SWaitCnt:$simm16), "$simm16",
[(int_amdgcn_s_waitcnt timm:$simm16)]>;
+
+// "_soft" waitcnts are waitcnts that are either relaxed into their non-soft
+// counterpart, or completely removed.
+//
+// These are inserted by SIMemoryLegalizer to resolve memory dependencies
+// and later optimized by SIInsertWaitcnts
+// For example, a S_WAITCNT_soft 0 can be completely removed in a function
+// that doesn't access memory.
+def S_WAITCNT_soft : SOPP_Pseudo <"s_soft_waitcnt" , (ins SWaitCnt:$simm16), "$simm16">;
+def S_WAITCNT_VSCNT_soft : SOPK_WAITCNT<"s_soft_waitcnt_vscnt">;
+
def S_SETHALT : SOPP_Pseudo <"s_sethalt" , (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_sethalt timm:$simm16)]>;
def S_SETKILL : SOPP_Pseudo <"s_setkill" , (ins i16imm:$simm16), "$simm16">;
@@ -1285,23 +1600,23 @@ def S_SETKILL : SOPP_Pseudo <"s_setkill" , (ins i16imm:$simm16), "$simm16">;
// maximum really 15 on VI?
def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16),
"$simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
+}
+
+def S_SLEEP_VAR : SOP1_0_32 <"s_sleep_var", [(int_amdgcn_s_sleep_var SSrc_b32:$src0)]> {
let hasSideEffects = 1;
}
def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16",
[(int_amdgcn_s_setprio timm:$simm16)]> {
- let hasSideEffects = 1;
}
let Uses = [EXEC, M0] in {
def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsg:$simm16), "$simm16",
[(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]> {
- let hasSideEffects = 1;
}
def S_SENDMSGHALT : SOPP_Pseudo <"s_sendmsghalt" , (ins SendMsg:$simm16), "$simm16",
[(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]> {
- let hasSideEffects = 1;
}
} // End Uses = [EXEC, M0]
@@ -1316,13 +1631,14 @@ def S_ICACHE_INV : SOPP_Pseudo <"s_icache_inv", (ins)> {
}
def S_INCPERFLEVEL : SOPP_Pseudo <"s_incperflevel", (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_incperflevel timm:$simm16)]> {
- let hasSideEffects = 1;
}
def S_DECPERFLEVEL : SOPP_Pseudo <"s_decperflevel", (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
- let hasSideEffects = 1;
}
-def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins)> {
+
+let Uses = [M0] in
+def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins), "",
+ [(int_amdgcn_s_ttracedata M0)]> {
let simm16 = 0;
let fixed_imm = 1;
}
@@ -1366,8 +1682,10 @@ let SubtargetPredicate = isGFX10Plus in {
[(SIdenorm_mode (i32 timm:$simm16))]>;
}
+ let hasSideEffects = 1 in
def S_TTRACEDATA_IMM :
- SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">;
+ SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16",
+ [(int_amdgcn_s_ttracedata_imm timm:$simm16)]>;
} // End SubtargetPredicate = isGFX10Plus
let SubtargetPredicate = isGFX11Plus in {
@@ -1379,6 +1697,11 @@ let SubtargetPredicate = isGFX11Plus in {
"$simm16">;
} // End SubtargetPredicate = isGFX11Plus
+let SubtargetPredicate = HasVGPRSingleUseHintInsts in {
+ def S_SINGLEUSE_VDST :
+ SOPP_Pseudo<"s_singleuse_vdst", (ins s16imm:$simm16), "$simm16">;
+} // End SubtargetPredicate = HasVGPRSingeUseHintInsts
+
//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
@@ -1421,10 +1744,91 @@ def : GCNPat <
(S_WAIT_EVENT (i16 0))
>;
+// The first 10 bits of the mode register are the core FP mode on all
+// subtargets.
+//
+// The high bits include additional fields, intermixed with some
+// non-floating point environment information. We extract the full
+// register and clear non-relevant bits.
+//
+// EXCP_EN covers floating point exceptions, but also some other
+// non-FP exceptions.
+//
+// Bits 12-18 cover the relevant exception mask on all subtargets.
+//
+// FIXME: Bit 18 is int_div0, should this be in the FP environment? I
+// think the only source is v_rcp_iflag_i32.
+//
+// On GFX9+:
+// Bit 23 is the additional FP16_OVFL mode.
+//
+// Bits 19, 20, and 21 cover non-FP exceptions and differ between
+// gfx9/10/11, so we ignore them here.
+
+// TODO: Would it be cheaper to emit multiple s_getreg_b32 calls for
+// the ranges and combine the results?
+
+defvar fp_round_mask = !add(!shl(1, 4), -1);
+defvar fp_denorm_mask = !shl(!add(!shl(1, 4), -1), 4);
+defvar dx10_clamp_mask = !shl(1, 8);
+defvar ieee_mode_mask = !shl(1, 9);
+
+// Covers fp_round, fp_denorm, dx10_clamp, and IEEE bit.
+defvar fpmode_mask =
+ !or(fp_round_mask, fp_denorm_mask, dx10_clamp_mask, ieee_mode_mask);
+
+defvar fp_excp_en_mask = !shl(!add(!shl(1, 7), -1), 12);
+defvar fp16_ovfl = !shl(1, 23);
+defvar fpmode_mask_gfx6plus = !or(fpmode_mask, fp_excp_en_mask);
+defvar fpmode_mask_gfx9plus = !or(fpmode_mask_gfx6plus, fp16_ovfl);
+
+class GetFPModePat<int fpmode_mask> : GCNPat<
+ (i32 get_fpmode),
+ (S_AND_B32 (i32 fpmode_mask),
+ (S_GETREG_B32 getHwRegImm<
+ HWREG.MODE, 0,
+ !add(!logtwo(fpmode_mask), 1)>.ret))
+>;
+
+// TODO: Might be worth moving to custom lowering so the and is
+// exposed to demanded bits optimizations. Most users probably only
+// care about the rounding or denorm mode bits. We also can reduce the
+// demanded read from the getreg immediate.
+let SubtargetPredicate = isGFX9Plus in {
+// Last bit = FP16_OVFL
+def : GetFPModePat<fpmode_mask_gfx9plus>;
+}
+
+// Last bit = EXCP_EN.int_div0
+let SubtargetPredicate = isNotGFX9Plus in {
+def : GetFPModePat<fpmode_mask_gfx6plus>;
+}
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
+def UniformSelect : PatFrag<
+ (ops node:$src0, node:$src1),
+ (select SCC, $src0, $src1),
+ [{ return !N->isDivergent(); }]
+>;
+
+let AddedComplexity = 20 in {
+ def : GCNPat<
+ (i32 (UniformSelect i32:$src0, i32:$src1)),
+ (S_CSELECT_B32 SSrc_b32:$src0, SSrc_b32:$src1)
+ >;
+
+ // TODO: The predicate should not be necessary, but enabling this pattern for
+ // all subtargets generates worse code in some cases.
+ let OtherPredicates = [HasPseudoScalarTrans] in
+ def : GCNPat<
+ (f32 (UniformSelect f32:$src0, f32:$src1)),
+ (S_CSELECT_B32 SSrc_b32:$src0, SSrc_b32:$src1)
+ >;
+}
+
// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector
// case, the sgpr-copies pass will fix this to use the vector version.
def : GCNPat <
@@ -1476,6 +1880,11 @@ def : ScalarNot2Pat<S_ORN2_B64, or, v2i32>;
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
+class Select_gfx12<string opName> : SIMCInstr<opName, SIEncodingFamily.GFX12> {
+ Predicate AssemblerPredicate = isGFX12Only;
+ string DecoderNamespace = "GFX12";
+}
+
class Select_gfx11<string opName> : SIMCInstr<opName, SIEncodingFamily.GFX11> {
Predicate AssemblerPredicate = isGFX11Only;
string DecoderNamespace = "GFX11";
@@ -1497,85 +1906,143 @@ class Select_gfx6_gfx7<string opName> : SIMCInstr<opName, SIEncodingFamily.SI> {
}
//===----------------------------------------------------------------------===//
-// GFX11.
+// SOP1 - GFX11, GFX12
//===----------------------------------------------------------------------===//
+multiclass SOP1_Real_gfx12<bits<8> op> {
+ def _gfx12 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
+ Select_gfx12<!cast<SOP1_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOP1_M0_Real_gfx12<bits<8> op> {
+ def _gfx12 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
+ Select_gfx12<!cast<SOP1_Pseudo>(NAME).Mnemonic> {
+ let Inst{7-0} = M0_gfx11plus.HWEncoding{7-0}; // Set Src0 encoding to M0
+ }
+}
+
multiclass SOP1_Real_gfx11<bits<8> op> {
def _gfx11 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
Select_gfx11<!cast<SOP1_Pseudo>(NAME).Mnemonic>;
}
+multiclass SOP1_Real_Renamed_gfx12<bits<8> op, SOP1_Pseudo backing_pseudo, string real_name> {
+ def _gfx12 : SOP1_Real<op, backing_pseudo, real_name>,
+ Select_gfx12<backing_pseudo.Mnemonic>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX12Plus]>;
+}
+
multiclass SOP1_Real_Renamed_gfx11<bits<8> op, SOP1_Pseudo backing_pseudo, string real_name> {
def _gfx11 : SOP1_Real<op, backing_pseudo, real_name>,
Select_gfx11<backing_pseudo.Mnemonic>,
- MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
-}
-
-defm S_MOV_B32 : SOP1_Real_gfx11<0x000>;
-defm S_MOV_B64 : SOP1_Real_gfx11<0x001>;
-defm S_CMOV_B32 : SOP1_Real_gfx11<0x002>;
-defm S_CMOV_B64 : SOP1_Real_gfx11<0x003>;
-defm S_BREV_B32 : SOP1_Real_gfx11<0x004>;
-defm S_BREV_B64 : SOP1_Real_gfx11<0x005>;
-defm S_CTZ_I32_B32 : SOP1_Real_Renamed_gfx11<0x008, S_FF1_I32_B32, "s_ctz_i32_b32">;
-defm S_CTZ_I32_B64 : SOP1_Real_Renamed_gfx11<0x009, S_FF1_I32_B64, "s_ctz_i32_b64">;
-defm S_CLZ_I32_U32 : SOP1_Real_Renamed_gfx11<0x00a, S_FLBIT_I32_B32, "s_clz_i32_u32">;
-defm S_CLZ_I32_U64 : SOP1_Real_Renamed_gfx11<0x00b, S_FLBIT_I32_B64, "s_clz_i32_u64">;
-defm S_CLS_I32 : SOP1_Real_Renamed_gfx11<0x00c, S_FLBIT_I32, "s_cls_i32">;
-defm S_CLS_I32_I64 : SOP1_Real_Renamed_gfx11<0x00d, S_FLBIT_I32_I64, "s_cls_i32_i64">;
-defm S_SEXT_I32_I8 : SOP1_Real_gfx11<0x00e>;
-defm S_SEXT_I32_I16 : SOP1_Real_gfx11<0x00f>;
-defm S_BITSET0_B32 : SOP1_Real_gfx11<0x010>;
-defm S_BITSET0_B64 : SOP1_Real_gfx11<0x011>;
-defm S_BITSET1_B32 : SOP1_Real_gfx11<0x012>;
-defm S_BITSET1_B64 : SOP1_Real_gfx11<0x013>;
-defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx11<0x014>;
-defm S_ABS_I32 : SOP1_Real_gfx11<0x015>;
-defm S_BCNT0_I32_B32 : SOP1_Real_gfx11<0x016>;
-defm S_BCNT0_I32_B64 : SOP1_Real_gfx11<0x017>;
-defm S_BCNT1_I32_B32 : SOP1_Real_gfx11<0x018>;
-defm S_BCNT1_I32_B64 : SOP1_Real_gfx11<0x019>;
-defm S_QUADMASK_B32 : SOP1_Real_gfx11<0x01a>;
-defm S_QUADMASK_B64 : SOP1_Real_gfx11<0x01b>;
-defm S_WQM_B32 : SOP1_Real_gfx11<0x01c>;
-defm S_WQM_B64 : SOP1_Real_gfx11<0x01d>;
-defm S_NOT_B32 : SOP1_Real_gfx11<0x01e>;
-defm S_NOT_B64 : SOP1_Real_gfx11<0x01f>;
-defm S_AND_SAVEEXEC_B32 : SOP1_Real_gfx11<0x020>;
-defm S_AND_SAVEEXEC_B64 : SOP1_Real_gfx11<0x021>;
-defm S_OR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x022>;
-defm S_OR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x023>;
-defm S_XOR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x024>;
-defm S_XOR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x025>;
-defm S_NAND_SAVEEXEC_B32 : SOP1_Real_gfx11<0x026>;
-defm S_NAND_SAVEEXEC_B64 : SOP1_Real_gfx11<0x027>;
-defm S_NOR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x028>;
-defm S_NOR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x029>;
-defm S_XNOR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x02a>;
-/*defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x02b>; //same as older arch, handled there*/
-defm S_AND_NOT0_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x02c, S_ANDN1_SAVEEXEC_B32, "s_and_not0_saveexec_b32">;
-defm S_AND_NOT0_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x02d, S_ANDN1_SAVEEXEC_B64, "s_and_not0_saveexec_b64">;
-defm S_OR_NOT0_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x02e, S_ORN1_SAVEEXEC_B32, "s_or_not0_saveexec_b32">;
-defm S_OR_NOT0_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x02f, S_ORN1_SAVEEXEC_B64, "s_or_not0_saveexec_b64">;
-defm S_AND_NOT1_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x030, S_ANDN2_SAVEEXEC_B32, "s_and_not1_saveexec_b32">;
-defm S_AND_NOT1_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x031, S_ANDN2_SAVEEXEC_B64, "s_and_not1_saveexec_b64">;
-defm S_OR_NOT1_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x032, S_ORN2_SAVEEXEC_B32, "s_or_not1_saveexec_b32">;
-defm S_OR_NOT1_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x033, S_ORN2_SAVEEXEC_B64, "s_or_not1_saveexec_b64">;
-defm S_AND_NOT0_WREXEC_B32 : SOP1_Real_Renamed_gfx11<0x034, S_ANDN1_WREXEC_B32, "s_and_not0_wrexec_b32">;
-defm S_AND_NOT0_WREXEC_B64 : SOP1_Real_Renamed_gfx11<0x035, S_ANDN1_WREXEC_B64, "s_and_not0_wrexec_b64">;
-defm S_AND_NOT1_WREXEC_B32 : SOP1_Real_Renamed_gfx11<0x036, S_ANDN2_WREXEC_B32, "s_and_not1_wrexec_b32">;
-defm S_AND_NOT1_WREXEC_B64 : SOP1_Real_Renamed_gfx11<0x037, S_ANDN2_WREXEC_B64, "s_and_not1_wrexec_b64">;
-defm S_MOVRELS_B32 : SOP1_Real_gfx11<0x040>;
-defm S_MOVRELS_B64 : SOP1_Real_gfx11<0x041>;
-defm S_MOVRELD_B32 : SOP1_Real_gfx11<0x042>;
-defm S_MOVRELD_B64 : SOP1_Real_gfx11<0x043>;
-defm S_MOVRELSD_2_B32 : SOP1_Real_gfx11<0x044>;
-defm S_GETPC_B64 : SOP1_Real_gfx11<0x047>;
-defm S_SETPC_B64 : SOP1_Real_gfx11<0x048>;
-defm S_SWAPPC_B64 : SOP1_Real_gfx11<0x049>;
-defm S_RFE_B64 : SOP1_Real_gfx11<0x04a>;
-defm S_SENDMSG_RTN_B32 : SOP1_Real_gfx11<0x04c>;
-defm S_SENDMSG_RTN_B64 : SOP1_Real_gfx11<0x04d>;
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Only]>;
+}
+
+multiclass SOP1_Real_gfx11_gfx12<bits<8> op> :
+ SOP1_Real_gfx11<op>, SOP1_Real_gfx12<op>;
+
+multiclass SOP1_Real_Renamed_gfx11_gfx12<bits<8> op, SOP1_Pseudo backing_pseudo, string real_name> :
+ SOP1_Real_Renamed_gfx11<op, backing_pseudo, real_name>,
+ SOP1_Real_Renamed_gfx12<op, backing_pseudo, real_name>;
+
+defm S_MOV_B32 : SOP1_Real_gfx11_gfx12<0x000>;
+defm S_MOV_B64 : SOP1_Real_gfx11_gfx12<0x001>;
+defm S_CMOV_B32 : SOP1_Real_gfx11_gfx12<0x002>;
+defm S_CMOV_B64 : SOP1_Real_gfx11_gfx12<0x003>;
+defm S_BREV_B32 : SOP1_Real_gfx11_gfx12<0x004>;
+defm S_BREV_B64 : SOP1_Real_gfx11_gfx12<0x005>;
+defm S_CTZ_I32_B32 : SOP1_Real_Renamed_gfx11_gfx12<0x008, S_FF1_I32_B32, "s_ctz_i32_b32">;
+defm S_CTZ_I32_B64 : SOP1_Real_Renamed_gfx11_gfx12<0x009, S_FF1_I32_B64, "s_ctz_i32_b64">;
+defm S_CLZ_I32_U32 : SOP1_Real_Renamed_gfx11_gfx12<0x00a, S_FLBIT_I32_B32, "s_clz_i32_u32">;
+defm S_CLZ_I32_U64 : SOP1_Real_Renamed_gfx11_gfx12<0x00b, S_FLBIT_I32_B64, "s_clz_i32_u64">;
+defm S_CLS_I32 : SOP1_Real_Renamed_gfx11_gfx12<0x00c, S_FLBIT_I32, "s_cls_i32">;
+defm S_CLS_I32_I64 : SOP1_Real_Renamed_gfx11_gfx12<0x00d, S_FLBIT_I32_I64, "s_cls_i32_i64">;
+defm S_SEXT_I32_I8 : SOP1_Real_gfx11_gfx12<0x00e>;
+defm S_SEXT_I32_I16 : SOP1_Real_gfx11_gfx12<0x00f>;
+defm S_BITSET0_B32 : SOP1_Real_gfx11_gfx12<0x010>;
+defm S_BITSET0_B64 : SOP1_Real_gfx11_gfx12<0x011>;
+defm S_BITSET1_B32 : SOP1_Real_gfx11_gfx12<0x012>;
+defm S_BITSET1_B64 : SOP1_Real_gfx11_gfx12<0x013>;
+defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx11_gfx12<0x014>;
+defm S_ABS_I32 : SOP1_Real_gfx11_gfx12<0x015>;
+defm S_BCNT0_I32_B32 : SOP1_Real_gfx11_gfx12<0x016>;
+defm S_BCNT0_I32_B64 : SOP1_Real_gfx11_gfx12<0x017>;
+defm S_BCNT1_I32_B32 : SOP1_Real_gfx11_gfx12<0x018>;
+defm S_BCNT1_I32_B64 : SOP1_Real_gfx11_gfx12<0x019>;
+defm S_QUADMASK_B32 : SOP1_Real_gfx11_gfx12<0x01a>;
+defm S_QUADMASK_B64 : SOP1_Real_gfx11_gfx12<0x01b>;
+defm S_WQM_B32 : SOP1_Real_gfx11_gfx12<0x01c>;
+defm S_WQM_B64 : SOP1_Real_gfx11_gfx12<0x01d>;
+defm S_NOT_B32 : SOP1_Real_gfx11_gfx12<0x01e>;
+defm S_NOT_B64 : SOP1_Real_gfx11_gfx12<0x01f>;
+defm S_AND_SAVEEXEC_B32 : SOP1_Real_gfx11_gfx12<0x020>;
+defm S_AND_SAVEEXEC_B64 : SOP1_Real_gfx11_gfx12<0x021>;
+defm S_OR_SAVEEXEC_B32 : SOP1_Real_gfx11_gfx12<0x022>;
+defm S_OR_SAVEEXEC_B64 : SOP1_Real_gfx11_gfx12<0x023>;
+defm S_XOR_SAVEEXEC_B32 : SOP1_Real_gfx11_gfx12<0x024>;
+defm S_XOR_SAVEEXEC_B64 : SOP1_Real_gfx11_gfx12<0x025>;
+defm S_NAND_SAVEEXEC_B32 : SOP1_Real_gfx11_gfx12<0x026>;
+defm S_NAND_SAVEEXEC_B64 : SOP1_Real_gfx11_gfx12<0x027>;
+defm S_NOR_SAVEEXEC_B32 : SOP1_Real_gfx11_gfx12<0x028>;
+defm S_NOR_SAVEEXEC_B64 : SOP1_Real_gfx11_gfx12<0x029>;
+defm S_XNOR_SAVEEXEC_B32 : SOP1_Real_gfx11_gfx12<0x02a>;
+/*defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx11_gfx12<0x02b>; //same as older arch, handled there*/
+defm S_AND_NOT0_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11_gfx12<0x02c, S_ANDN1_SAVEEXEC_B32, "s_and_not0_saveexec_b32">;
+defm S_AND_NOT0_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11_gfx12<0x02d, S_ANDN1_SAVEEXEC_B64, "s_and_not0_saveexec_b64">;
+defm S_OR_NOT0_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11_gfx12<0x02e, S_ORN1_SAVEEXEC_B32, "s_or_not0_saveexec_b32">;
+defm S_OR_NOT0_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11_gfx12<0x02f, S_ORN1_SAVEEXEC_B64, "s_or_not0_saveexec_b64">;
+defm S_AND_NOT1_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11_gfx12<0x030, S_ANDN2_SAVEEXEC_B32, "s_and_not1_saveexec_b32">;
+defm S_AND_NOT1_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11_gfx12<0x031, S_ANDN2_SAVEEXEC_B64, "s_and_not1_saveexec_b64">;
+defm S_OR_NOT1_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11_gfx12<0x032, S_ORN2_SAVEEXEC_B32, "s_or_not1_saveexec_b32">;
+defm S_OR_NOT1_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11_gfx12<0x033, S_ORN2_SAVEEXEC_B64, "s_or_not1_saveexec_b64">;
+defm S_AND_NOT0_WREXEC_B32 : SOP1_Real_Renamed_gfx11_gfx12<0x034, S_ANDN1_WREXEC_B32, "s_and_not0_wrexec_b32">;
+defm S_AND_NOT0_WREXEC_B64 : SOP1_Real_Renamed_gfx11_gfx12<0x035, S_ANDN1_WREXEC_B64, "s_and_not0_wrexec_b64">;
+defm S_AND_NOT1_WREXEC_B32 : SOP1_Real_Renamed_gfx11_gfx12<0x036, S_ANDN2_WREXEC_B32, "s_and_not1_wrexec_b32">;
+defm S_AND_NOT1_WREXEC_B64 : SOP1_Real_Renamed_gfx11_gfx12<0x037, S_ANDN2_WREXEC_B64, "s_and_not1_wrexec_b64">;
+defm S_MOVRELS_B32 : SOP1_Real_gfx11_gfx12<0x040>;
+defm S_MOVRELS_B64 : SOP1_Real_gfx11_gfx12<0x041>;
+defm S_MOVRELD_B32 : SOP1_Real_gfx11_gfx12<0x042>;
+defm S_MOVRELD_B64 : SOP1_Real_gfx11_gfx12<0x043>;
+defm S_MOVRELSD_2_B32 : SOP1_Real_gfx11_gfx12<0x044>;
+defm S_GETPC_B64 : SOP1_Real_gfx11_gfx12<0x047>;
+defm S_SETPC_B64 : SOP1_Real_gfx11_gfx12<0x048>;
+defm S_SWAPPC_B64 : SOP1_Real_gfx11_gfx12<0x049>;
+defm S_RFE_B64 : SOP1_Real_gfx11_gfx12<0x04a>;
+defm S_SENDMSG_RTN_B32 : SOP1_Real_gfx11_gfx12<0x04c>;
+defm S_SENDMSG_RTN_B64 : SOP1_Real_gfx11_gfx12<0x04d>;
+defm S_BARRIER_SIGNAL_M0 : SOP1_M0_Real_gfx12<0x04e>;
+defm S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_M0_Real_gfx12<0x04f>;
+defm S_GET_BARRIER_STATE_M0 : SOP1_M0_Real_gfx12<0x050>;
+defm S_BARRIER_INIT_M0 : SOP1_M0_Real_gfx12<0x051>;
+defm S_BARRIER_JOIN_M0 : SOP1_M0_Real_gfx12<0x052>;
+defm S_WAKEUP_BARRIER_M0 : SOP1_M0_Real_gfx12<0x057>;
+defm S_BARRIER_SIGNAL_IMM : SOP1_Real_gfx12<0x04e>;
+defm S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_Real_gfx12<0x04f>;
+defm S_GET_BARRIER_STATE_IMM : SOP1_Real_gfx12<0x050>;
+defm S_BARRIER_INIT_IMM : SOP1_Real_gfx12<0x051>;
+defm S_BARRIER_JOIN_IMM : SOP1_Real_gfx12<0x052>;
+defm S_WAKEUP_BARRIER_IMM : SOP1_Real_gfx12<0x057>;
+defm S_SLEEP_VAR : SOP1_Real_gfx12<0x058>;
+
+//===----------------------------------------------------------------------===//
+// SOP1 - GFX1150, GFX12
+//===----------------------------------------------------------------------===//
+
+defm S_CEIL_F32 : SOP1_Real_gfx11_gfx12<0x060>;
+defm S_FLOOR_F32 : SOP1_Real_gfx11_gfx12<0x061>;
+defm S_TRUNC_F32 : SOP1_Real_gfx11_gfx12<0x062>;
+defm S_RNDNE_F32 : SOP1_Real_gfx11_gfx12<0x063>;
+defm S_CVT_F32_I32 : SOP1_Real_gfx11_gfx12<0x064>;
+defm S_CVT_F32_U32 : SOP1_Real_gfx11_gfx12<0x065>;
+defm S_CVT_I32_F32 : SOP1_Real_gfx11_gfx12<0x066>;
+defm S_CVT_U32_F32 : SOP1_Real_gfx11_gfx12<0x067>;
+defm S_CVT_F16_F32 : SOP1_Real_gfx11_gfx12<0x068>;
+defm S_CVT_F32_F16 : SOP1_Real_gfx11_gfx12<0x069>;
+defm S_CVT_HI_F32_F16 : SOP1_Real_gfx11_gfx12<0x06a>;
+defm S_CEIL_F16 : SOP1_Real_gfx11_gfx12<0x06b>;
+defm S_FLOOR_F16 : SOP1_Real_gfx11_gfx12<0x06c>;
+defm S_TRUNC_F16 : SOP1_Real_gfx11_gfx12<0x06d>;
+defm S_RNDNE_F16 : SOP1_Real_gfx11_gfx12<0x06e>;
//===----------------------------------------------------------------------===//
// SOP1 - GFX10.
@@ -1587,8 +2054,8 @@ multiclass SOP1_Real_gfx10<bits<8> op> {
Select_gfx10<ps.Mnemonic>;
}
-multiclass SOP1_Real_gfx10_gfx11<bits<8> op> :
- SOP1_Real_gfx10<op>, SOP1_Real_gfx11<op>;
+multiclass SOP1_Real_gfx10_gfx11_gfx12<bits<8> op> :
+ SOP1_Real_gfx10<op>, SOP1_Real_gfx11_gfx12<op>;
defm S_ANDN1_SAVEEXEC_B64 : SOP1_Real_gfx10<0x037>;
defm S_ORN1_SAVEEXEC_B64 : SOP1_Real_gfx10<0x038>;
@@ -1623,8 +2090,8 @@ multiclass SOP1_Real_gfx6_gfx7<bits<8> op> {
multiclass SOP1_Real_gfx6_gfx7_gfx10<bits<8> op> :
SOP1_Real_gfx6_gfx7<op>, SOP1_Real_gfx10<op>;
-multiclass SOP1_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
- SOP1_Real_gfx6_gfx7<op>, SOP1_Real_gfx10_gfx11<op>;
+multiclass SOP1_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
+ SOP1_Real_gfx6_gfx7<op>, SOP1_Real_gfx10_gfx11_gfx12<op>;
defm S_CBRANCH_JOIN : SOP1_Real_gfx6_gfx7<0x032>;
@@ -1667,7 +2134,7 @@ defm S_ANDN2_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x027>;
defm S_ORN2_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x028>;
defm S_NAND_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x029>;
defm S_NOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02a>;
-defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10_gfx11<0x02b>;
+defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02b>;
defm S_QUADMASK_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x02c>;
defm S_QUADMASK_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02d>;
defm S_MOVRELS_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x02e>;
@@ -1677,63 +2144,142 @@ defm S_MOVRELD_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x031>;
defm S_ABS_I32 : SOP1_Real_gfx6_gfx7_gfx10<0x034>;
//===----------------------------------------------------------------------===//
-// SOP2 - GFX11.
+// SOP2 - GFX12
+//===----------------------------------------------------------------------===//
+
+multiclass SOP2_Real_gfx12<bits<7> op> {
+ def _gfx12 : SOP2_Real32<op, !cast<SOP2_Pseudo>(NAME)>,
+ Select_gfx12<!cast<SOP2_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOP2_Real_Renamed_gfx12<bits<7> op, SOP2_Pseudo backing_pseudo, string real_name> {
+ def _gfx12 : SOP2_Real32<op, backing_pseudo, real_name>,
+ Select_gfx12<backing_pseudo.Mnemonic>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX12Plus]>;
+}
+
+defm S_MIN_NUM_F32 : SOP2_Real_Renamed_gfx12<0x042, S_MIN_F32, "s_min_num_f32">;
+defm S_MAX_NUM_F32 : SOP2_Real_Renamed_gfx12<0x043, S_MAX_F32, "s_max_num_f32">;
+defm S_MIN_NUM_F16 : SOP2_Real_Renamed_gfx12<0x04b, S_MIN_F16, "s_min_num_f16">;
+defm S_MAX_NUM_F16 : SOP2_Real_Renamed_gfx12<0x04c, S_MAX_F16, "s_max_num_f16">;
+defm S_MINIMUM_F32 : SOP2_Real_gfx12<0x04f>;
+defm S_MAXIMUM_F32 : SOP2_Real_gfx12<0x050>;
+defm S_MINIMUM_F16 : SOP2_Real_gfx12<0x051>;
+defm S_MAXIMUM_F16 : SOP2_Real_gfx12<0x052>;
+
+defm S_ADD_CO_U32 : SOP2_Real_Renamed_gfx12<0x000, S_ADD_U32, "s_add_co_u32">;
+defm S_SUB_CO_U32 : SOP2_Real_Renamed_gfx12<0x001, S_SUB_U32, "s_sub_co_u32">;
+defm S_ADD_CO_I32 : SOP2_Real_Renamed_gfx12<0x002, S_ADD_I32, "s_add_co_i32">;
+defm S_SUB_CO_I32 : SOP2_Real_Renamed_gfx12<0x003, S_SUB_I32, "s_sub_co_i32">;
+defm S_ADD_CO_CI_U32 : SOP2_Real_Renamed_gfx12<0x004, S_ADDC_U32, "s_add_co_ci_u32">;
+defm S_SUB_CO_CI_U32 : SOP2_Real_Renamed_gfx12<0x005, S_SUBB_U32, "s_sub_co_ci_u32">;
+
+//===----------------------------------------------------------------------===//
+// SOP2 - GFX11, GFX12.
//===----------------------------------------------------------------------===//
multiclass SOP2_Real_gfx11<bits<7> op> {
- def _gfx11 : SOP2_Real<op, !cast<SOP2_Pseudo>(NAME)>,
+ def _gfx11 : SOP2_Real32<op, !cast<SOP2_Pseudo>(NAME)>,
Select_gfx11<!cast<SOP2_Pseudo>(NAME).Mnemonic>;
}
multiclass SOP2_Real_Renamed_gfx11<bits<7> op, SOP2_Pseudo backing_pseudo, string real_name> {
- def _gfx11 : SOP2_Real<op, backing_pseudo, real_name>,
+ def _gfx11 : SOP2_Real32<op, backing_pseudo, real_name>,
Select_gfx11<backing_pseudo.Mnemonic>,
- MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
-}
-
-defm S_ABSDIFF_I32 : SOP2_Real_gfx11<0x006>;
-defm S_LSHL_B32 : SOP2_Real_gfx11<0x008>;
-defm S_LSHL_B64 : SOP2_Real_gfx11<0x009>;
-defm S_LSHR_B32 : SOP2_Real_gfx11<0x00a>;
-defm S_LSHR_B64 : SOP2_Real_gfx11<0x00b>;
-defm S_ASHR_I32 : SOP2_Real_gfx11<0x00c>;
-defm S_ASHR_I64 : SOP2_Real_gfx11<0x00d>;
-defm S_LSHL1_ADD_U32 : SOP2_Real_gfx11<0x00e>;
-defm S_LSHL2_ADD_U32 : SOP2_Real_gfx11<0x00f>;
-defm S_LSHL3_ADD_U32 : SOP2_Real_gfx11<0x010>;
-defm S_LSHL4_ADD_U32 : SOP2_Real_gfx11<0x011>;
-defm S_MIN_I32 : SOP2_Real_gfx11<0x012>;
-defm S_MIN_U32 : SOP2_Real_gfx11<0x013>;
-defm S_MAX_I32 : SOP2_Real_gfx11<0x014>;
-defm S_MAX_U32 : SOP2_Real_gfx11<0x015>;
-defm S_AND_B32 : SOP2_Real_gfx11<0x016>;
-defm S_AND_B64 : SOP2_Real_gfx11<0x017>;
-defm S_OR_B32 : SOP2_Real_gfx11<0x018>;
-defm S_OR_B64 : SOP2_Real_gfx11<0x019>;
-defm S_XOR_B32 : SOP2_Real_gfx11<0x01a>;
-defm S_XOR_B64 : SOP2_Real_gfx11<0x01b>;
-defm S_NAND_B32 : SOP2_Real_gfx11<0x01c>;
-defm S_NAND_B64 : SOP2_Real_gfx11<0x01d>;
-defm S_NOR_B32 : SOP2_Real_gfx11<0x01e>;
-defm S_NOR_B64 : SOP2_Real_gfx11<0x01f>;
-defm S_XNOR_B32 : SOP2_Real_gfx11<0x020>;
-defm S_XNOR_B64 : SOP2_Real_gfx11<0x021>;
-defm S_AND_NOT1_B32 : SOP2_Real_Renamed_gfx11<0x022, S_ANDN2_B32, "s_and_not1_b32">;
-defm S_AND_NOT1_B64 : SOP2_Real_Renamed_gfx11<0x023, S_ANDN2_B64, "s_and_not1_b64">;
-defm S_OR_NOT1_B32 : SOP2_Real_Renamed_gfx11<0x024, S_ORN2_B32, "s_or_not1_b32">;
-defm S_OR_NOT1_B64 : SOP2_Real_Renamed_gfx11<0x025, S_ORN2_B64, "s_or_not1_b64">;
-defm S_BFE_U32 : SOP2_Real_gfx11<0x026>;
-defm S_BFE_I32 : SOP2_Real_gfx11<0x027>;
-defm S_BFE_U64 : SOP2_Real_gfx11<0x028>;
-defm S_BFE_I64 : SOP2_Real_gfx11<0x029>;
-defm S_BFM_B32 : SOP2_Real_gfx11<0x02a>;
-defm S_BFM_B64 : SOP2_Real_gfx11<0x02b>;
-defm S_MUL_I32 : SOP2_Real_gfx11<0x02c>;
-defm S_MUL_HI_U32 : SOP2_Real_gfx11<0x02d>;
-defm S_MUL_HI_I32 : SOP2_Real_gfx11<0x02e>;
-defm S_CSELECT_B32 : SOP2_Real_gfx11<0x030>;
-defm S_CSELECT_B64 : SOP2_Real_gfx11<0x031>;
-defm S_PACK_HL_B32_B16 : SOP2_Real_gfx11<0x035>;
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Only]>;
+}
+
+multiclass SOP2_Real_gfx11_gfx12<bits<7> op> :
+ SOP2_Real_gfx11<op>, SOP2_Real_gfx12<op>;
+
+multiclass SOP2_Real_Renamed_gfx11_gfx12<bits<8> op, SOP2_Pseudo backing_pseudo, string real_name> :
+ SOP2_Real_Renamed_gfx11<op, backing_pseudo, real_name>,
+ SOP2_Real_Renamed_gfx12<op, backing_pseudo, real_name>;
+
+defm S_ABSDIFF_I32 : SOP2_Real_gfx11_gfx12<0x006>;
+defm S_LSHL_B32 : SOP2_Real_gfx11_gfx12<0x008>;
+defm S_LSHL_B64 : SOP2_Real_gfx11_gfx12<0x009>;
+defm S_LSHR_B32 : SOP2_Real_gfx11_gfx12<0x00a>;
+defm S_LSHR_B64 : SOP2_Real_gfx11_gfx12<0x00b>;
+defm S_ASHR_I32 : SOP2_Real_gfx11_gfx12<0x00c>;
+defm S_ASHR_I64 : SOP2_Real_gfx11_gfx12<0x00d>;
+defm S_LSHL1_ADD_U32 : SOP2_Real_gfx11_gfx12<0x00e>;
+defm S_LSHL2_ADD_U32 : SOP2_Real_gfx11_gfx12<0x00f>;
+defm S_LSHL3_ADD_U32 : SOP2_Real_gfx11_gfx12<0x010>;
+defm S_LSHL4_ADD_U32 : SOP2_Real_gfx11_gfx12<0x011>;
+defm S_MIN_I32 : SOP2_Real_gfx11_gfx12<0x012>;
+defm S_MIN_U32 : SOP2_Real_gfx11_gfx12<0x013>;
+defm S_MAX_I32 : SOP2_Real_gfx11_gfx12<0x014>;
+defm S_MAX_U32 : SOP2_Real_gfx11_gfx12<0x015>;
+defm S_AND_B32 : SOP2_Real_gfx11_gfx12<0x016>;
+defm S_AND_B64 : SOP2_Real_gfx11_gfx12<0x017>;
+defm S_OR_B32 : SOP2_Real_gfx11_gfx12<0x018>;
+defm S_OR_B64 : SOP2_Real_gfx11_gfx12<0x019>;
+defm S_XOR_B32 : SOP2_Real_gfx11_gfx12<0x01a>;
+defm S_XOR_B64 : SOP2_Real_gfx11_gfx12<0x01b>;
+defm S_NAND_B32 : SOP2_Real_gfx11_gfx12<0x01c>;
+defm S_NAND_B64 : SOP2_Real_gfx11_gfx12<0x01d>;
+defm S_NOR_B32 : SOP2_Real_gfx11_gfx12<0x01e>;
+defm S_NOR_B64 : SOP2_Real_gfx11_gfx12<0x01f>;
+defm S_XNOR_B32 : SOP2_Real_gfx11_gfx12<0x020>;
+defm S_XNOR_B64 : SOP2_Real_gfx11_gfx12<0x021>;
+defm S_AND_NOT1_B32 : SOP2_Real_Renamed_gfx11_gfx12<0x022, S_ANDN2_B32, "s_and_not1_b32">;
+defm S_AND_NOT1_B64 : SOP2_Real_Renamed_gfx11_gfx12<0x023, S_ANDN2_B64, "s_and_not1_b64">;
+defm S_OR_NOT1_B32 : SOP2_Real_Renamed_gfx11_gfx12<0x024, S_ORN2_B32, "s_or_not1_b32">;
+defm S_OR_NOT1_B64 : SOP2_Real_Renamed_gfx11_gfx12<0x025, S_ORN2_B64, "s_or_not1_b64">;
+defm S_BFE_U32 : SOP2_Real_gfx11_gfx12<0x026>;
+defm S_BFE_I32 : SOP2_Real_gfx11_gfx12<0x027>;
+defm S_BFE_U64 : SOP2_Real_gfx11_gfx12<0x028>;
+defm S_BFE_I64 : SOP2_Real_gfx11_gfx12<0x029>;
+defm S_BFM_B32 : SOP2_Real_gfx11_gfx12<0x02a>;
+defm S_BFM_B64 : SOP2_Real_gfx11_gfx12<0x02b>;
+defm S_MUL_I32 : SOP2_Real_gfx11_gfx12<0x02c>;
+defm S_MUL_HI_U32 : SOP2_Real_gfx11_gfx12<0x02d>;
+defm S_MUL_HI_I32 : SOP2_Real_gfx11_gfx12<0x02e>;
+defm S_CSELECT_B32 : SOP2_Real_gfx11_gfx12<0x030>;
+defm S_CSELECT_B64 : SOP2_Real_gfx11_gfx12<0x031>;
+defm S_PACK_HL_B32_B16 : SOP2_Real_gfx11_gfx12<0x035>;
+defm S_ADD_NC_U64 : SOP2_Real_Renamed_gfx12<0x053, S_ADD_U64, "s_add_nc_u64">;
+defm S_SUB_NC_U64 : SOP2_Real_Renamed_gfx12<0x054, S_SUB_U64, "s_sub_nc_u64">;
+defm S_MUL_U64 : SOP2_Real_gfx12<0x055>;
+
+//===----------------------------------------------------------------------===//
+// SOP2 - GFX1150, GFX12
+//===----------------------------------------------------------------------===//
+
+multiclass SOP2_Real_FMAK_gfx12<bits<7> op> {
+ def _gfx12 : SOP2_Real64<op, !cast<SOP2_Pseudo>(NAME)>,
+ Select_gfx12<!cast<SOP2_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOP2_Real_FMAK_gfx11<bits<7> op> {
+ def _gfx11 : SOP2_Real64<op, !cast<SOP2_Pseudo>(NAME)>,
+ Select_gfx11<!cast<SOP2_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOP2_Real_FMAK_gfx11_gfx12<bits<7> op> :
+ SOP2_Real_FMAK_gfx11<op>, SOP2_Real_FMAK_gfx12<op>;
+
+defm S_ADD_F32 : SOP2_Real_gfx11_gfx12<0x040>;
+defm S_SUB_F32 : SOP2_Real_gfx11_gfx12<0x041>;
+defm S_MUL_F32 : SOP2_Real_gfx11_gfx12<0x044>;
+defm S_FMAAK_F32 : SOP2_Real_FMAK_gfx11_gfx12<0x045>;
+defm S_FMAMK_F32 : SOP2_Real_FMAK_gfx11_gfx12<0x046>;
+defm S_FMAC_F32 : SOP2_Real_gfx11_gfx12<0x047>;
+defm S_CVT_PK_RTZ_F16_F32 : SOP2_Real_gfx11_gfx12<0x048>;
+defm S_ADD_F16 : SOP2_Real_gfx11_gfx12<0x049>;
+defm S_SUB_F16 : SOP2_Real_gfx11_gfx12<0x04a>;
+defm S_MUL_F16 : SOP2_Real_gfx11_gfx12<0x04d>;
+defm S_FMAC_F16 : SOP2_Real_gfx11_gfx12<0x04e>;
+
+//===----------------------------------------------------------------------===//
+// SOP2 - GFX1150
+//===----------------------------------------------------------------------===//
+
+defm S_MIN_F32 : SOP2_Real_gfx11<0x042>;
+defm S_MAX_F32 : SOP2_Real_gfx11<0x043>;
+defm S_MIN_F16 : SOP2_Real_gfx11<0x04b>;
+defm S_MAX_F16 : SOP2_Real_gfx11<0x04c>;
//===----------------------------------------------------------------------===//
// SOP2 - GFX10.
@@ -1741,20 +2287,20 @@ defm S_PACK_HL_B32_B16 : SOP2_Real_gfx11<0x035>;
multiclass SOP2_Real_gfx10<bits<7> op> {
defvar ps = !cast<SOP2_Pseudo>(NAME);
- def _gfx10 : SOP2_Real<op, ps>,
+ def _gfx10 : SOP2_Real32<op, ps>,
Select_gfx10<ps.Mnemonic>;
}
-multiclass SOP2_Real_gfx10_gfx11<bits<7> op> :
- SOP2_Real_gfx10<op>, SOP2_Real_gfx11<op>;
+multiclass SOP2_Real_gfx10_gfx11_gfx12<bits<7> op> :
+ SOP2_Real_gfx10<op>, SOP2_Real_gfx11_gfx12<op>;
defm S_LSHL1_ADD_U32 : SOP2_Real_gfx10<0x02e>;
defm S_LSHL2_ADD_U32 : SOP2_Real_gfx10<0x02f>;
defm S_LSHL3_ADD_U32 : SOP2_Real_gfx10<0x030>;
defm S_LSHL4_ADD_U32 : SOP2_Real_gfx10<0x031>;
-defm S_PACK_LL_B32_B16 : SOP2_Real_gfx10_gfx11<0x032>;
-defm S_PACK_LH_B32_B16 : SOP2_Real_gfx10_gfx11<0x033>;
-defm S_PACK_HH_B32_B16 : SOP2_Real_gfx10_gfx11<0x034>;
+defm S_PACK_LL_B32_B16 : SOP2_Real_gfx10_gfx11_gfx12<0x032>;
+defm S_PACK_LH_B32_B16 : SOP2_Real_gfx10_gfx11_gfx12<0x033>;
+defm S_PACK_HH_B32_B16 : SOP2_Real_gfx10_gfx11_gfx12<0x034>;
defm S_MUL_HI_U32 : SOP2_Real_gfx10<0x035>;
defm S_MUL_HI_I32 : SOP2_Real_gfx10<0x036>;
@@ -1764,7 +2310,7 @@ defm S_MUL_HI_I32 : SOP2_Real_gfx10<0x036>;
multiclass SOP2_Real_gfx6_gfx7<bits<7> op> {
defvar ps = !cast<SOP_Pseudo>(NAME);
- def _gfx6_gfx7 : SOP2_Real<op, ps>,
+ def _gfx6_gfx7 : SOP2_Real32<op, ps>,
Select_gfx6_gfx7<ps.Mnemonic>;
}
@@ -1772,7 +2318,10 @@ multiclass SOP2_Real_gfx6_gfx7_gfx10<bits<7> op> :
SOP2_Real_gfx6_gfx7<op>, SOP2_Real_gfx10<op>;
multiclass SOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<7> op> :
- SOP2_Real_gfx6_gfx7<op>, SOP2_Real_gfx10_gfx11<op>;
+ SOP2_Real_gfx6_gfx7<op>, SOP2_Real_gfx10<op>, SOP2_Real_gfx11<op>;
+
+multiclass SOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<7> op> :
+ SOP2_Real_gfx6_gfx7<op>, SOP2_Real_gfx10_gfx11_gfx12<op>;
defm S_CBRANCH_G_FORK : SOP2_Real_gfx6_gfx7<0x02b>;
@@ -1820,29 +2369,52 @@ defm S_BFE_I64 : SOP2_Real_gfx6_gfx7_gfx10<0x02a>;
defm S_ABSDIFF_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x02c>;
//===----------------------------------------------------------------------===//
-// SOPK - GFX11.
+// SOPK - GFX11, GFX12.
//===----------------------------------------------------------------------===//
+multiclass SOPK_Real32_gfx12<bits<5> op> {
+ def _gfx12 : SOPK_Real32<op, !cast<SOPK_Pseudo>(NAME)>,
+ Select_gfx12<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPK_Real32_Renamed_gfx12<bits<5> op, SOPK_Pseudo backing_pseudo, string real_name> {
+ def _gfx12 : SOPK_Real32<op, backing_pseudo, real_name>,
+ Select_gfx12<backing_pseudo.Mnemonic>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX12Plus]>;
+}
+
multiclass SOPK_Real32_gfx11<bits<5> op> {
def _gfx11 : SOPK_Real32<op, !cast<SOPK_Pseudo>(NAME)>,
Select_gfx11<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
}
+multiclass SOPK_Real64_gfx12<bits<5> op> {
+ def _gfx12 : SOPK_Real64<op, !cast<SOPK_Pseudo>(NAME)>,
+ Select_gfx12<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
multiclass SOPK_Real64_gfx11<bits<5> op> {
def _gfx11 : SOPK_Real64<op, !cast<SOPK_Pseudo>(NAME)>,
Select_gfx11<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
}
-defm S_GETREG_B32 : SOPK_Real32_gfx11<0x011>;
-defm S_SETREG_B32 : SOPK_Real32_gfx11<0x012>;
-defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx11<0x013>;
-defm S_CALL_B64 : SOPK_Real32_gfx11<0x014>;
+multiclass SOPK_Real32_gfx11_gfx12<bits<5> op> :
+ SOPK_Real32_gfx11<op>, SOPK_Real32_gfx12<op>;
+
+multiclass SOPK_Real64_gfx11_gfx12<bits<5> op> :
+ SOPK_Real64_gfx11<op>, SOPK_Real64_gfx12<op>;
+
+defm S_ADDK_CO_I32 : SOPK_Real32_Renamed_gfx12<0x00f, S_ADDK_I32, "s_addk_co_i32">;
+defm S_GETREG_B32 : SOPK_Real32_gfx11_gfx12<0x011>;
+defm S_SETREG_B32 : SOPK_Real32_gfx11_gfx12<0x012>;
+defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx11_gfx12<0x013>;
+defm S_CALL_B64 : SOPK_Real32_gfx11_gfx12<0x014>;
defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx11<0x016>;
defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx11<0x017>;
-defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11<0x018>;
-defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11<0x019>;
-defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11<0x01a>;
-defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11<0x01b>;
+defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11_gfx12<0x018>;
+defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11_gfx12<0x019>;
+defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11_gfx12<0x01a>;
+defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11_gfx12<0x01b>;
//===----------------------------------------------------------------------===//
// SOPK - GFX10.
@@ -1863,7 +2435,10 @@ multiclass SOPK_Real64_gfx10<bits<5> op> {
multiclass SOPK_Real32_gfx10_gfx11<bits<5> op> :
SOPK_Real32_gfx10<op>, SOPK_Real32_gfx11<op>;
-defm S_VERSION : SOPK_Real32_gfx10_gfx11<0x001>;
+multiclass SOPK_Real32_gfx10_gfx11_gfx12<bits<5> op> :
+ SOPK_Real32_gfx10<op>, SOPK_Real32_gfx11_gfx12<op>;
+
+defm S_VERSION : SOPK_Real32_gfx10_gfx11_gfx12<0x001>;
defm S_CALL_B64 : SOPK_Real32_gfx10<0x016>;
defm S_WAITCNT_VSCNT : SOPK_Real32_gfx10<0x017>;
defm S_WAITCNT_VMCNT : SOPK_Real32_gfx10<0x018>;
@@ -1897,10 +2472,13 @@ multiclass SOPK_Real64_gfx6_gfx7_gfx10<bits<5> op> :
multiclass SOPK_Real32_gfx6_gfx7_gfx10_gfx11<bits<5> op> :
SOPK_Real32_gfx6_gfx7<op>, SOPK_Real32_gfx10_gfx11<op>;
+multiclass SOPK_Real32_gfx6_gfx7_gfx10_gfx11_gfx12<bits<5> op> :
+ SOPK_Real32_gfx6_gfx7<op>, SOPK_Real32_gfx10_gfx11_gfx12<op>;
+
defm S_CBRANCH_I_FORK : SOPK_Real32_gfx6_gfx7<0x011>;
-defm S_MOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x000>;
-defm S_CMOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x002>;
+defm S_MOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11_gfx12<0x000>;
+defm S_CMOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
defm S_CMPK_EQ_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x003>;
defm S_CMPK_LG_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x004>;
defm S_CMPK_GT_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x005>;
@@ -1914,21 +2492,48 @@ defm S_CMPK_GE_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00c>;
defm S_CMPK_LT_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00d>;
defm S_CMPK_LE_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00e>;
defm S_ADDK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00f>;
-defm S_MULK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x010>;
+defm S_MULK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11_gfx12<0x010>;
defm S_GETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x012>;
defm S_SETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x013>;
defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>;
//===----------------------------------------------------------------------===//
-// SOPP - GFX11
+// SOPP - GFX12 only.
//===----------------------------------------------------------------------===//
+multiclass SOPP_Real_32_gfx12<bits<7> op> {
+ def _gfx12 : SOPP_Real_32<op, !cast<SOPP_Pseudo>(NAME), !cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ Select_gfx12<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<0, !cast<SOPP_Pseudo>(NAME).KeyName, "_gfx12">;
+}
+
+multiclass SOPP_Real_32_Renamed_gfx12<bits<7> op, SOPP_Pseudo backing_pseudo, string real_name> {
+ def _gfx12 : SOPP_Real_32<op, backing_pseudo, real_name>,
+ Select_gfx12<backing_pseudo.Mnemonic>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX12Plus]>;
+}
+
+defm S_WAIT_ALU : SOPP_Real_32_Renamed_gfx12<0x008, S_WAITCNT_DEPCTR, "s_wait_alu">;
+defm S_BARRIER_WAIT : SOPP_Real_32_gfx12<0x014>;
+defm S_BARRIER_LEAVE : SOPP_Real_32_gfx12<0x015>;
+
+//===----------------------------------------------------------------------===//
+// SOPP - GFX11, GFX12.
+//===----------------------------------------------------------------------===//
+
+
multiclass SOPP_Real_32_gfx11<bits<7> op> {
def _gfx11 : SOPP_Real_32<op, !cast<SOPP_Pseudo>(NAME), !cast<SOPP_Pseudo>(NAME).Mnemonic>,
Select_gfx11<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
SOPPRelaxTable<0, !cast<SOPP_Pseudo>(NAME).KeyName, "_gfx11">;
}
+multiclass SOPP_Real_64_gfx12<bits<7> op> {
+ def _gfx12 : SOPP_Real_64<op, !cast<SOPP_Pseudo>(NAME), !cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ Select_gfx12<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<1, !cast<SOPP_Pseudo>(NAME).KeyName, "_gfx12">;
+}
+
multiclass SOPP_Real_64_gfx11<bits<7> op> {
def _gfx11 : SOPP_Real_64<op, !cast<SOPP_Pseudo>(NAME), !cast<SOPP_Pseudo>(NAME).Mnemonic>,
Select_gfx11<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
@@ -1938,7 +2543,22 @@ multiclass SOPP_Real_64_gfx11<bits<7> op> {
multiclass SOPP_Real_32_Renamed_gfx11<bits<7> op, SOPP_Pseudo backing_pseudo, string real_name> {
def _gfx11 : SOPP_Real_32<op, backing_pseudo, real_name>,
Select_gfx11<backing_pseudo.Mnemonic>,
- MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Only]>;
+}
+
+multiclass SOPP_Real_32_gfx11_gfx12<bits<7> op> :
+ SOPP_Real_32_gfx11<op>, SOPP_Real_32_gfx12<op>;
+
+multiclass SOPP_Real_64_gfx11_gfx12<bits<7> op> :
+ SOPP_Real_64_gfx11<op>, SOPP_Real_64_gfx12<op>;
+
+multiclass SOPP_Real_32_Renamed_gfx11_gfx12<bits<7> op, SOPP_Pseudo backing_pseudo, string real_name> :
+ SOPP_Real_32_Renamed_gfx11<op, backing_pseudo, real_name>,
+ SOPP_Real_32_Renamed_gfx12<op, backing_pseudo, real_name>;
+
+multiclass SOPP_Real_With_Relaxation_gfx12<bits<7> op> {
+ defm "" : SOPP_Real_32_gfx12<op>;
+ defm _pad_s_nop : SOPP_Real_64_gfx12<op>;
}
multiclass SOPP_Real_With_Relaxation_gfx11<bits<7> op> {
@@ -1946,42 +2566,51 @@ multiclass SOPP_Real_With_Relaxation_gfx11<bits<7> op> {
defm _pad_s_nop : SOPP_Real_64_gfx11<op>;
}
-defm S_SETKILL : SOPP_Real_32_gfx11<0x001>;
-defm S_SETHALT : SOPP_Real_32_gfx11<0x002>;
-defm S_SLEEP : SOPP_Real_32_gfx11<0x003>;
-defm S_SET_INST_PREFETCH_DISTANCE : SOPP_Real_32_Renamed_gfx11<0x004, S_INST_PREFETCH, "s_set_inst_prefetch_distance">;
-defm S_CLAUSE : SOPP_Real_32_gfx11<0x005>;
-defm S_DELAY_ALU : SOPP_Real_32_gfx11<0x007>;
+multiclass SOPP_Real_With_Relaxation_gfx11_gfx12<bits<7>op> :
+ SOPP_Real_With_Relaxation_gfx11<op>, SOPP_Real_With_Relaxation_gfx12<op>;
+
+defm S_SETKILL : SOPP_Real_32_gfx11_gfx12<0x001>;
+defm S_SETHALT : SOPP_Real_32_gfx11_gfx12<0x002>;
+defm S_SLEEP : SOPP_Real_32_gfx11_gfx12<0x003>;
+defm S_SET_INST_PREFETCH_DISTANCE : SOPP_Real_32_Renamed_gfx11_gfx12<0x004, S_INST_PREFETCH, "s_set_inst_prefetch_distance">;
+defm S_CLAUSE : SOPP_Real_32_gfx11_gfx12<0x005>;
+defm S_DELAY_ALU : SOPP_Real_32_gfx11_gfx12<0x007>;
defm S_WAITCNT_DEPCTR : SOPP_Real_32_gfx11<0x008>;
-defm S_WAITCNT : SOPP_Real_32_gfx11<0x009>;
-defm S_WAIT_IDLE : SOPP_Real_32_gfx11<0x00a>;
-defm S_WAIT_EVENT : SOPP_Real_32_gfx11<0x00b>;
-defm S_TRAP : SOPP_Real_32_gfx11<0x010>;
-defm S_ROUND_MODE : SOPP_Real_32_gfx11<0x011>;
-defm S_DENORM_MODE : SOPP_Real_32_gfx11<0x012>;
-defm S_BRANCH : SOPP_Real_With_Relaxation_gfx11<0x020>;
-defm S_CBRANCH_SCC0 : SOPP_Real_With_Relaxation_gfx11<0x021>;
-defm S_CBRANCH_SCC1 : SOPP_Real_With_Relaxation_gfx11<0x022>;
-defm S_CBRANCH_VCCZ : SOPP_Real_With_Relaxation_gfx11<0x023>;
-defm S_CBRANCH_VCCNZ : SOPP_Real_With_Relaxation_gfx11<0x024>;
-defm S_CBRANCH_EXECZ : SOPP_Real_With_Relaxation_gfx11<0x025>;
-defm S_CBRANCH_EXECNZ : SOPP_Real_With_Relaxation_gfx11<0x026>;
+defm S_WAITCNT : SOPP_Real_32_gfx11_gfx12<0x009>;
+defm S_WAIT_IDLE : SOPP_Real_32_gfx11_gfx12<0x00a>;
+defm S_WAIT_EVENT : SOPP_Real_32_gfx11_gfx12<0x00b>;
+defm S_TRAP : SOPP_Real_32_gfx11_gfx12<0x010>;
+defm S_ROUND_MODE : SOPP_Real_32_gfx11_gfx12<0x011>;
+defm S_DENORM_MODE : SOPP_Real_32_gfx11_gfx12<0x012>;
+defm S_BRANCH : SOPP_Real_With_Relaxation_gfx11_gfx12<0x020>;
+defm S_CBRANCH_SCC0 : SOPP_Real_With_Relaxation_gfx11_gfx12<0x021>;
+defm S_CBRANCH_SCC1 : SOPP_Real_With_Relaxation_gfx11_gfx12<0x022>;
+defm S_CBRANCH_VCCZ : SOPP_Real_With_Relaxation_gfx11_gfx12<0x023>;
+defm S_CBRANCH_VCCNZ : SOPP_Real_With_Relaxation_gfx11_gfx12<0x024>;
+defm S_CBRANCH_EXECZ : SOPP_Real_With_Relaxation_gfx11_gfx12<0x025>;
+defm S_CBRANCH_EXECNZ : SOPP_Real_With_Relaxation_gfx11_gfx12<0x026>;
defm S_CBRANCH_CDBGSYS : SOPP_Real_With_Relaxation_gfx11<0x027>;
defm S_CBRANCH_CDBGUSER : SOPP_Real_With_Relaxation_gfx11<0x028>;
defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_Real_With_Relaxation_gfx11<0x029>;
defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_Real_With_Relaxation_gfx11<0x02a>;
-defm S_ENDPGM : SOPP_Real_32_gfx11<0x030>;
-defm S_ENDPGM_SAVED : SOPP_Real_32_gfx11<0x031>;
-defm S_WAKEUP : SOPP_Real_32_gfx11<0x034>;
-defm S_SETPRIO : SOPP_Real_32_gfx11<0x035>;
-defm S_SENDMSG : SOPP_Real_32_gfx11<0x036>;
-defm S_SENDMSGHALT : SOPP_Real_32_gfx11<0x037>;
-defm S_INCPERFLEVEL : SOPP_Real_32_gfx11<0x038>;
-defm S_DECPERFLEVEL : SOPP_Real_32_gfx11<0x039>;
-defm S_TTRACEDATA : SOPP_Real_32_gfx11<0x03a>;
-defm S_TTRACEDATA_IMM : SOPP_Real_32_gfx11<0x03b>;
-defm S_ICACHE_INV : SOPP_Real_32_gfx11<0x03c>;
-defm S_BARRIER : SOPP_Real_32_gfx11<0x03d>;
+defm S_ENDPGM : SOPP_Real_32_gfx11_gfx12<0x030>;
+defm S_ENDPGM_SAVED : SOPP_Real_32_gfx11_gfx12<0x031>;
+defm S_WAKEUP : SOPP_Real_32_gfx11_gfx12<0x034>;
+defm S_SETPRIO : SOPP_Real_32_gfx11_gfx12<0x035>;
+defm S_SENDMSG : SOPP_Real_32_gfx11_gfx12<0x036>;
+defm S_SENDMSGHALT : SOPP_Real_32_gfx11_gfx12<0x037>;
+defm S_INCPERFLEVEL : SOPP_Real_32_gfx11_gfx12<0x038>;
+defm S_DECPERFLEVEL : SOPP_Real_32_gfx11_gfx12<0x039>;
+defm S_TTRACEDATA : SOPP_Real_32_gfx11_gfx12<0x03a>;
+defm S_TTRACEDATA_IMM : SOPP_Real_32_gfx11_gfx12<0x03b>;
+defm S_ICACHE_INV : SOPP_Real_32_gfx11_gfx12<0x03c>;
+defm S_BARRIER : SOPP_Real_32_gfx11_gfx12<0x03d>;
+
+//===----------------------------------------------------------------------===//
+// SOPP - GFX1150, GFX12.
+//===----------------------------------------------------------------------===//
+
+defm S_SINGLEUSE_VDST : SOPP_Real_32_gfx11_gfx12<0x013>;
//===----------------------------------------------------------------------===//
// SOPP - GFX6, GFX7, GFX8, GFX9, GFX10
@@ -2017,11 +2646,11 @@ multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9<bits<7> op> :
multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> :
SOPP_Real_32_gfx6_gfx7_gfx8_gfx9<op>, SOPP_Real_32_gfx10<op>;
-multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11<bits<7> op> :
- SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<op>, SOPP_Real_32_gfx11<op>;
+multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11_gfx12<bits<7> op> :
+ SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<op>, SOPP_Real_32_gfx11_gfx12<op>;
-multiclass SOPP_Real_32_gfx10_gfx11<bits<7> op> :
- SOPP_Real_32_gfx10<op>, SOPP_Real_32_gfx11<op>;
+multiclass SOPP_Real_32_gfx10_gfx11_gfx12<bits<7> op> :
+ SOPP_Real_32_gfx10<op>, SOPP_Real_32_gfx11_gfx12<op>;
//64 bit encodings, for Relaxation
multiclass SOPP_Real_64_gfx6_gfx7<bits<7> op> {
@@ -2054,8 +2683,8 @@ multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9<bits<7> op> :
multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> :
SOPP_Real_64_gfx6_gfx7_gfx8_gfx9<op>, SOPP_Real_64_gfx10<op>;
-multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11<bits<7> op> :
- SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<op>, SOPP_Real_64_gfx11<op>;
+multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11_gfx12<bits<7> op> :
+ SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<op>, SOPP_Real_64_gfx11_gfx12<op>;
//relaxation for insts with no operands not implemented
multiclass SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> {
@@ -2063,7 +2692,7 @@ multiclass SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> {
defm _pad_s_nop : SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<op>;
}
-defm S_NOP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11<0x000>;
+defm S_NOP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11_gfx12<0x000>;
defm S_ENDPGM : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x001>;
defm S_WAKEUP : SOPP_Real_32_gfx8_gfx9_gfx10<0x003>;
defm S_BARRIER : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00a>;
@@ -2083,7 +2712,7 @@ defm S_ENDPGM_SAVED : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x01B>;
defm S_SET_GPR_IDX_OFF : SOPP_Real_32_gfx8_gfx9<0x01c>;
defm S_SET_GPR_IDX_MODE : SOPP_Real_32_gfx8_gfx9<0x01d>;
defm S_ENDPGM_ORDERED_PS_DONE : SOPP_Real_32_gfx8_gfx9_gfx10<0x01e>;
-defm S_CODE_END : SOPP_Real_32_gfx10_gfx11<0x01f>;
+defm S_CODE_END : SOPP_Real_32_gfx10_gfx11_gfx12<0x01f>;
defm S_INST_PREFETCH : SOPP_Real_32_gfx10<0x020>;
defm S_CLAUSE : SOPP_Real_32_gfx10<0x021>;
defm S_WAIT_IDLE : SOPP_Real_32_gfx10<0x022>;
@@ -2107,32 +2736,74 @@ defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_
}
//===----------------------------------------------------------------------===//
-// SOPC - GFX11
+// SOPC - GFX11, GFX12.
//===----------------------------------------------------------------------===//
+multiclass SOPC_Real_gfx12<bits<7> op> {
+ def _gfx12 : SOPC_Real<op, !cast<SOPC_Pseudo>(NAME)>,
+ Select_gfx12<!cast<SOPC_Pseudo>(NAME).Mnemonic>;
+}
+
multiclass SOPC_Real_gfx11<bits<7> op> {
def _gfx11 : SOPC_Real<op, !cast<SOPC_Pseudo>(NAME)>,
Select_gfx11<!cast<SOPC_Pseudo>(NAME).Mnemonic>;
}
-defm S_CMP_EQ_I32 : SOPC_Real_gfx11<0x00>;
-defm S_CMP_LG_I32 : SOPC_Real_gfx11<0x01>;
-defm S_CMP_GT_I32 : SOPC_Real_gfx11<0x02>;
-defm S_CMP_GE_I32 : SOPC_Real_gfx11<0x03>;
-defm S_CMP_LT_I32 : SOPC_Real_gfx11<0x04>;
-defm S_CMP_LE_I32 : SOPC_Real_gfx11<0x05>;
-defm S_CMP_EQ_U32 : SOPC_Real_gfx11<0x06>;
-defm S_CMP_LG_U32 : SOPC_Real_gfx11<0x07>;
-defm S_CMP_GT_U32 : SOPC_Real_gfx11<0x08>;
-defm S_CMP_GE_U32 : SOPC_Real_gfx11<0x09>;
-defm S_CMP_LT_U32 : SOPC_Real_gfx11<0x0a>;
-defm S_CMP_LE_U32 : SOPC_Real_gfx11<0x0b>;
-defm S_BITCMP0_B32 : SOPC_Real_gfx11<0x0c>;
-defm S_BITCMP1_B32 : SOPC_Real_gfx11<0x0d>;
-defm S_BITCMP0_B64 : SOPC_Real_gfx11<0x0e>;
-defm S_BITCMP1_B64 : SOPC_Real_gfx11<0x0f>;
-defm S_CMP_EQ_U64 : SOPC_Real_gfx11<0x10>;
-defm S_CMP_LG_U64 : SOPC_Real_gfx11<0x11>;
+multiclass SOPC_Real_gfx11_gfx12<bits<7> op> :
+ SOPC_Real_gfx11<op>, SOPC_Real_gfx12<op>;
+
+defm S_CMP_EQ_I32 : SOPC_Real_gfx11_gfx12<0x00>;
+defm S_CMP_LG_I32 : SOPC_Real_gfx11_gfx12<0x01>;
+defm S_CMP_GT_I32 : SOPC_Real_gfx11_gfx12<0x02>;
+defm S_CMP_GE_I32 : SOPC_Real_gfx11_gfx12<0x03>;
+defm S_CMP_LT_I32 : SOPC_Real_gfx11_gfx12<0x04>;
+defm S_CMP_LE_I32 : SOPC_Real_gfx11_gfx12<0x05>;
+defm S_CMP_EQ_U32 : SOPC_Real_gfx11_gfx12<0x06>;
+defm S_CMP_LG_U32 : SOPC_Real_gfx11_gfx12<0x07>;
+defm S_CMP_GT_U32 : SOPC_Real_gfx11_gfx12<0x08>;
+defm S_CMP_GE_U32 : SOPC_Real_gfx11_gfx12<0x09>;
+defm S_CMP_LT_U32 : SOPC_Real_gfx11_gfx12<0x0a>;
+defm S_CMP_LE_U32 : SOPC_Real_gfx11_gfx12<0x0b>;
+defm S_BITCMP0_B32 : SOPC_Real_gfx11_gfx12<0x0c>;
+defm S_BITCMP1_B32 : SOPC_Real_gfx11_gfx12<0x0d>;
+defm S_BITCMP0_B64 : SOPC_Real_gfx11_gfx12<0x0e>;
+defm S_BITCMP1_B64 : SOPC_Real_gfx11_gfx12<0x0f>;
+defm S_CMP_EQ_U64 : SOPC_Real_gfx11_gfx12<0x10>;
+defm S_CMP_LG_U64 : SOPC_Real_gfx11_gfx12<0x11>;
+
+//===----------------------------------------------------------------------===//
+// SOPC - GFX1150, GFX12
+//===----------------------------------------------------------------------===//
+
+defm S_CMP_LT_F32 : SOPC_Real_gfx11_gfx12<0x41>;
+defm S_CMP_EQ_F32 : SOPC_Real_gfx11_gfx12<0x42>;
+defm S_CMP_LE_F32 : SOPC_Real_gfx11_gfx12<0x43>;
+defm S_CMP_GT_F32 : SOPC_Real_gfx11_gfx12<0x44>;
+defm S_CMP_LG_F32 : SOPC_Real_gfx11_gfx12<0x45>;
+defm S_CMP_GE_F32 : SOPC_Real_gfx11_gfx12<0x46>;
+defm S_CMP_O_F32 : SOPC_Real_gfx11_gfx12<0x47>;
+defm S_CMP_U_F32 : SOPC_Real_gfx11_gfx12<0x48>;
+defm S_CMP_NGE_F32 : SOPC_Real_gfx11_gfx12<0x49>;
+defm S_CMP_NLG_F32 : SOPC_Real_gfx11_gfx12<0x4a>;
+defm S_CMP_NGT_F32 : SOPC_Real_gfx11_gfx12<0x4b>;
+defm S_CMP_NLE_F32 : SOPC_Real_gfx11_gfx12<0x4c>;
+defm S_CMP_NEQ_F32 : SOPC_Real_gfx11_gfx12<0x4d>;
+defm S_CMP_NLT_F32 : SOPC_Real_gfx11_gfx12<0x4e>;
+
+defm S_CMP_LT_F16 : SOPC_Real_gfx11_gfx12<0x51>;
+defm S_CMP_EQ_F16 : SOPC_Real_gfx11_gfx12<0x52>;
+defm S_CMP_LE_F16 : SOPC_Real_gfx11_gfx12<0x53>;
+defm S_CMP_GT_F16 : SOPC_Real_gfx11_gfx12<0x54>;
+defm S_CMP_LG_F16 : SOPC_Real_gfx11_gfx12<0x55>;
+defm S_CMP_GE_F16 : SOPC_Real_gfx11_gfx12<0x56>;
+defm S_CMP_O_F16 : SOPC_Real_gfx11_gfx12<0x57>;
+defm S_CMP_U_F16 : SOPC_Real_gfx11_gfx12<0x58>;
+defm S_CMP_NGE_F16 : SOPC_Real_gfx11_gfx12<0x59>;
+defm S_CMP_NLG_F16 : SOPC_Real_gfx11_gfx12<0x5a>;
+defm S_CMP_NGT_F16 : SOPC_Real_gfx11_gfx12<0x5b>;
+defm S_CMP_NLE_F16 : SOPC_Real_gfx11_gfx12<0x5c>;
+defm S_CMP_NEQ_F16 : SOPC_Real_gfx11_gfx12<0x5d>;
+defm S_CMP_NLT_F16 : SOPC_Real_gfx11_gfx12<0x5e>;
//===----------------------------------------------------------------------===//
// SOPC - GFX6, GFX7, GFX8, GFX9, GFX10
@@ -2194,9 +2865,8 @@ class SOP1_Real_vi<bits<8> op, SOP1_Pseudo ps> :
SOP1_Real<op, ps>,
Select_vi<ps.Mnemonic>;
-
class SOP2_Real_vi<bits<7> op, SOP2_Pseudo ps> :
- SOP2_Real<op, ps>,
+ SOP2_Real32<op, ps>,
Select_vi<ps.Mnemonic>;
class SOPK_Real_vi<bits<5> op, SOPK_Pseudo ps> :
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index ce40d82021cf..23434d2de0fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -36,14 +36,15 @@ namespace SendMsg {
// Disable lint checking for this block since it makes the table unreadable.
// NOLINTBEGIN
+// clang-format off
const CustomOperand<const MCSubtargetInfo &> Msg[] = {
{{""}},
{{"MSG_INTERRUPT"}, ID_INTERRUPT},
{{"MSG_GS"}, ID_GS_PreGFX11, isNotGFX11Plus},
{{"MSG_GS_DONE"}, ID_GS_DONE_PreGFX11, isNotGFX11Plus},
{{"MSG_SAVEWAVE"}, ID_SAVEWAVE, isGFX8_GFX9_GFX10},
- {{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9Plus},
- {{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9Plus},
+ {{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9_GFX10_GFX11},
+ {{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9_GFX10_GFX11},
{{"MSG_ORDERED_PS_DONE"}, ID_ORDERED_PS_DONE, isGFX9_GFX10},
{{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10},
{{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus},
@@ -59,7 +60,9 @@ const CustomOperand<const MCSubtargetInfo &> Msg[] = {
{{"MSG_RTN_GET_REALTIME"}, ID_RTN_GET_REALTIME, isGFX11Plus},
{{"MSG_RTN_SAVE_WAVE"}, ID_RTN_SAVE_WAVE, isGFX11Plus},
{{"MSG_RTN_GET_TBA"}, ID_RTN_GET_TBA, isGFX11Plus},
+ {{"MSG_RTN_GET_SE_AID_ID"}, ID_RTN_GET_SE_AID_ID, isGFX12Plus},
};
+// clang-format on
// NOLINTEND
const int MSG_SIZE = static_cast<int>(
@@ -87,41 +90,56 @@ namespace Hwreg {
// Disable lint checking for this block since it makes the table unreadable.
// NOLINTBEGIN
+// clang-format off
const CustomOperand<const MCSubtargetInfo &> Opr[] = {
{{""}},
{{"HW_REG_MODE"}, ID_MODE},
{{"HW_REG_STATUS"}, ID_STATUS},
- {{"HW_REG_TRAPSTS"}, ID_TRAPSTS},
+ {{"HW_REG_TRAPSTS"}, ID_TRAPSTS, isNotGFX12Plus},
{{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus},
{{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC},
{{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC},
{{"HW_REG_IB_STS"}, ID_IB_STS},
{{""}},
{{""}},
+ {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
+ {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
+ {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
{{""}},
{{""}},
- {{""}},
- {{""}},
- {{""}},
- {{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9Plus},
+ {{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9_GFX10_GFX11},
{{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10},
{{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10},
{{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10},
{{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10},
- {{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10Plus},
- {{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10Plus},
+ {{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10_GFX11},
+ {{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10_GFX11},
{{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030},
{{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus},
{{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus},
{{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10},
{{""}},
- {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA, isGFX11Plus},
+ {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
{{""}},
- {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding},
-
- // Register numbers reused in GFX11+
- {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO, isGFX11Plus},
- {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI, isGFX11Plus},
+ {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11},
+ {{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus},
+ {{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus},
+ {{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus},
+
+ // Register numbers reused in GFX11
+ {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
+ {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
+
+ // Register numbers reused in GFX12+
+ {{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus},
+ {{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
+ {{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
+ {{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus},
+ {{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus},
+ {{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus},
+ {{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus},
+ {{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
+ {{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},
// GFX940 specific registers
{{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
@@ -133,6 +151,7 @@ const CustomOperand<const MCSubtargetInfo &> Opr[] = {
// Aliases
{{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10},
};
+// clang-format on
// NOLINTEND
const int OPR_SIZE = static_cast<int>(
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 296ea18b2a8d..0f92a56237ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -12,7 +12,6 @@
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -119,15 +118,16 @@ namespace llvm {
namespace AMDGPU {
+/// \returns True if \p STI is AMDHSA.
+bool isHsaAbi(const MCSubtargetInfo &STI) {
+ return STI.getTargetTriple().getOS() == Triple::AMDHSA;
+}
+
std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
return std::nullopt;
switch (AmdhsaCodeObjectVersion) {
- case 2:
- return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
- case 3:
- return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
case 4:
return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
case 5:
@@ -138,18 +138,6 @@ std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
}
}
-bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
- if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
- return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
- return false;
-}
-
-bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
- if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
- return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
- return false;
-}
-
bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
@@ -162,11 +150,6 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
return false;
}
-bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
- return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) ||
- isHsaAbiVersion5(STI);
-}
-
unsigned getAmdhsaCodeObjectVersion() {
return AmdhsaCodeObjectVersion;
}
@@ -183,8 +166,6 @@ unsigned getCodeObjectVersion(const Module &M) {
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
- case AMDHSA_COV2:
- case AMDHSA_COV3:
case AMDHSA_COV4:
return 48;
case AMDHSA_COV5:
@@ -198,8 +179,6 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
// central TD file.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
- case AMDHSA_COV2:
- case AMDHSA_COV3:
case AMDHSA_COV4:
return 24;
case AMDHSA_COV5:
@@ -210,8 +189,6 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
- case AMDHSA_COV2:
- case AMDHSA_COV3:
case AMDHSA_COV4:
return 32;
case AMDHSA_COV5:
@@ -222,8 +199,6 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
- case AMDHSA_COV2:
- case AMDHSA_COV3:
case AMDHSA_COV4:
return 40;
case AMDHSA_COV5:
@@ -334,6 +309,7 @@ struct VOPDInfo {
uint16_t Opcode;
uint16_t OpX;
uint16_t OpY;
+ uint16_t Subtarget;
};
struct VOPTrue16Info {
@@ -468,6 +444,14 @@ bool getMAIIsGFX940XDL(unsigned Opc) {
return Info ? Info->is_gfx940_xdl : false;
}
+unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
+ if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
+ return SIEncodingFamily::GFX12;
+ if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
+ return SIEncodingFamily::GFX11;
+ llvm_unreachable("Subtarget generation does not support VOPD!");
+}
+
CanBeVOPD getCanBeVOPD(unsigned Opc) {
const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
if (Info)
@@ -495,11 +479,13 @@ bool isMAC(unsigned Opc) {
Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
+ Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
Opc == AMDGPU::V_FMAC_F32_e64_vi ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
+ Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
@@ -510,7 +496,33 @@ bool isPermlane16(unsigned Opc) {
return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
- Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
+ Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
+ Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
+ Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
+ Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
+ Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
+}
+
+bool isGenericAtomic(unsigned Opc) {
+ return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
+ Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
+ Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
}
bool isTrue16Inst(unsigned Opc) {
@@ -535,8 +547,9 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
}
-int getVOPDFull(unsigned OpX, unsigned OpY) {
- const VOPDInfo *Info = getVOPDInfoFromComponentOpcodes(OpX, OpY);
+int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
+ const VOPDInfo *Info =
+ getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
return Info ? Info->Opcode : -1;
}
@@ -588,13 +601,15 @@ unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
}
std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
- std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
+ std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
+ const unsigned CompOprNum =
+ SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM;
unsigned CompOprIdx;
- for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
+ for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
((OpXRegs[CompOprIdx] & BanksMasks) ==
@@ -719,9 +734,9 @@ void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
static TargetIDSetting
getTargetIDSettingFromFeatureString(StringRef FeatureString) {
- if (FeatureString.endswith("-"))
+ if (FeatureString.ends_with("-"))
return TargetIDSetting::Off;
- if (FeatureString.endswith("+"))
+ if (FeatureString.ends_with("+"))
return TargetIDSetting::On;
llvm_unreachable("Malformed feature string");
@@ -732,9 +747,9 @@ void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
TargetID.split(TargetIDSplit, ':');
for (const auto &FeatureString : TargetIDSplit) {
- if (FeatureString.startswith("xnack"))
+ if (FeatureString.starts_with("xnack"))
XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
- if (FeatureString.startswith("sramecc"))
+ if (FeatureString.starts_with("sramecc"))
SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
}
}
@@ -765,63 +780,6 @@ std::string AMDGPUTargetID::toString() const {
std::string Features;
if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
switch (CodeObjectVersion) {
- case AMDGPU::AMDHSA_COV2:
- // Code object V2 only supported specific processors and had fixed
- // settings for the XNACK.
- if (Processor == "gfx600") {
- } else if (Processor == "gfx601") {
- } else if (Processor == "gfx602") {
- } else if (Processor == "gfx700") {
- } else if (Processor == "gfx701") {
- } else if (Processor == "gfx702") {
- } else if (Processor == "gfx703") {
- } else if (Processor == "gfx704") {
- } else if (Processor == "gfx705") {
- } else if (Processor == "gfx801") {
- if (!isXnackOnOrAny())
- report_fatal_error(
- "AMD GPU code object V2 does not support processor " +
- Twine(Processor) + " without XNACK");
- } else if (Processor == "gfx802") {
- } else if (Processor == "gfx803") {
- } else if (Processor == "gfx805") {
- } else if (Processor == "gfx810") {
- if (!isXnackOnOrAny())
- report_fatal_error(
- "AMD GPU code object V2 does not support processor " +
- Twine(Processor) + " without XNACK");
- } else if (Processor == "gfx900") {
- if (isXnackOnOrAny())
- Processor = "gfx901";
- } else if (Processor == "gfx902") {
- if (isXnackOnOrAny())
- Processor = "gfx903";
- } else if (Processor == "gfx904") {
- if (isXnackOnOrAny())
- Processor = "gfx905";
- } else if (Processor == "gfx906") {
- if (isXnackOnOrAny())
- Processor = "gfx907";
- } else if (Processor == "gfx90c") {
- if (isXnackOnOrAny())
- report_fatal_error(
- "AMD GPU code object V2 does not support processor " +
- Twine(Processor) + " with XNACK being ON or ANY");
- } else {
- report_fatal_error(
- "AMD GPU code object V2 does not support processor " +
- Twine(Processor));
- }
- break;
- case AMDGPU::AMDHSA_COV3:
- // xnack.
- if (isXnackOnOrAny())
- Features += "+xnack";
- // In code object v2 and v3, "sramecc" feature was spelled with a
- // hyphen ("sram-ecc").
- if (isSramEccOnOrAny())
- Features += "+sram-ecc";
- break;
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
// sramecc.
@@ -1191,10 +1149,17 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
+ if (Version.Major >= 12) {
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
+ } else {
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
+ }
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
if (Version.Major >= 10) {
@@ -1202,10 +1167,10 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
}
if (AMDGPU::isGFX90A(*STI)) {
AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
@@ -1638,7 +1603,7 @@ unsigned getTgtId(const StringRef Name) {
if (Val.MaxIndex == 0 && Name == Val.Name)
return Val.Tgt;
- if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) {
+ if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
StringRef Suffix = Name.drop_front(Val.Name.size());
unsigned Id;
@@ -1931,6 +1896,8 @@ bool isShader(CallingConv::ID cc) {
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
case CallingConv::AMDGPU_CS:
return true;
default:
@@ -1968,7 +1935,17 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC) {
case CallingConv::AMDGPU_Gfx:
return true;
default:
- return isEntryFunctionCC(CC);
+ return isEntryFunctionCC(CC) || isChainCC(CC);
+ }
+}
+
+bool isChainCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ return true;
+ default:
+ return false;
}
}
@@ -2001,15 +1978,23 @@ bool hasPackedD16(const MCSubtargetInfo &STI) {
!isSI(STI);
}
-unsigned getNSAMaxSize(const MCSubtargetInfo &STI) {
+bool hasGDS(const MCSubtargetInfo &STI) {
+ return STI.hasFeature(AMDGPU::FeatureGDS);
+}
+
+unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
auto Version = getIsaVersion(STI.getCPU());
if (Version.Major == 10)
return Version.Minor >= 3 ? 13 : 5;
if (Version.Major == 11)
return 5;
+ if (Version.Major >= 12)
+ return HasSampler ? 4 : 5;
return 0;
}
+unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
+
bool isSI(const MCSubtargetInfo &STI) {
return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
}
@@ -2030,6 +2015,10 @@ bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
return isGFX9(STI) || isGFX10(STI);
}
+bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
+ return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
+}
+
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
return isVI(STI) || isGFX9(STI) || isGFX10(STI);
}
@@ -2046,6 +2035,10 @@ bool isGFX10(const MCSubtargetInfo &STI) {
return STI.hasFeature(AMDGPU::FeatureGFX10);
}
+bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
+ return isGFX10(STI) || isGFX11(STI);
+}
+
bool isGFX10Plus(const MCSubtargetInfo &STI) {
return isGFX10(STI) || isGFX11Plus(STI);
}
@@ -2055,9 +2048,17 @@ bool isGFX11(const MCSubtargetInfo &STI) {
}
bool isGFX11Plus(const MCSubtargetInfo &STI) {
- return isGFX11(STI);
+ return isGFX11(STI) || isGFX12Plus(STI);
}
+bool isGFX12(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
+}
+
+bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
+
+bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
+
bool isNotGFX11Plus(const MCSubtargetInfo &STI) {
return !isGFX11Plus(STI);
}
@@ -2086,6 +2087,10 @@ bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
}
+bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
+ return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
+}
+
bool isGFX90A(const MCSubtargetInfo &STI) {
return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
}
@@ -2106,6 +2111,14 @@ bool hasVOPD(const MCSubtargetInfo &STI) {
return STI.hasFeature(AMDGPU::FeatureVOPD);
}
+bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
+ return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
+}
+
+unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
+ return STI.hasFeature(AMDGPU::FeatureKernargPreload);
+}
+
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
int32_t ArgNumVGPR) {
if (has90AInsts && ArgNumAGPR)
@@ -2120,6 +2133,10 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
Reg == AMDGPU::SCC;
}
+bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
+ return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
+}
+
#define MAP_REG2REG \
using namespace AMDGPU; \
switch(Reg) { \
@@ -2250,16 +2267,13 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
- case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
@@ -2272,8 +2286,10 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.operands()[OpNo].OperandType;
- return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
- OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
+ return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
+ OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
+ (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
+ OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
}
// Avoid using MCRegisterClass::getSize, since that function will go away
@@ -2423,10 +2439,6 @@ unsigned getRegBitWidth(const MCRegisterClass &RC) {
return getRegBitWidth(RC.getID());
}
-unsigned getRegBitWidth(const TargetRegisterClass &RC) {
- return getRegBitWidth(RC.getID());
-}
-
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
unsigned OpNo) {
assert(OpNo < Desc.NumOperands);
@@ -2522,6 +2534,16 @@ bool isInlinableIntLiteralV216(int32_t Literal) {
return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
}
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi, uint8_t OpType) {
+ switch (OpType) {
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ return isInlinableLiteralV216(Literal, HasInv2Pi);
+ default:
+ return isInlinableIntLiteralV216(Literal);
+ }
+}
+
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
assert(HasInv2Pi);
@@ -2535,6 +2557,13 @@ bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
return Lo16 == Hi16;
}
+bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
+ if (IsFP64)
+ return !(Val & 0xffffffffu);
+
+ return isUInt<32>(Val) || isInt<32>(Val);
+}
+
bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
@@ -2552,13 +2581,15 @@ bool isArgPassedInSGPR(const Argument *A) {
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_Gfx:
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
// For non-compute shaders, SGPR inputs are marked with either inreg or
// byval. Everything else is in VGPRs.
return A->hasAttribute(Attribute::InReg) ||
A->hasAttribute(Attribute::ByVal);
default:
- // TODO: Should calls support inreg for SGPR inputs?
- return false;
+ // TODO: treat i1 as divergent?
+ return A->hasAttribute(Attribute::InReg);
}
}
@@ -2577,13 +2608,14 @@ bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_Gfx:
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
// For non-compute shaders, SGPR inputs are marked with either inreg or
// byval. Everything else is in VGPRs.
return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
CB->paramHasAttr(ArgNo, Attribute::ByVal);
default:
- // TODO: Should calls support inreg for SGPR inputs?
- return false;
+ return CB->paramHasAttr(ArgNo, Attribute::InReg);
}
}
@@ -2597,6 +2629,9 @@ static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset) {
+ if (isGFX12Plus(ST))
+ return isUInt<23>(EncodedOffset);
+
return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
: isUInt<8>(EncodedOffset);
}
@@ -2604,6 +2639,9 @@ bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset,
bool IsBuffer) {
+ if (isGFX12Plus(ST))
+ return isInt<24>(EncodedOffset);
+
return !IsBuffer &&
hasSMRDSignedImmOffset(ST) &&
isInt<21>(EncodedOffset);
@@ -2624,6 +2662,10 @@ uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
int64_t ByteOffset, bool IsBuffer) {
+ if (isGFX12Plus(ST)) // 24 bit signed offsets
+ return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
+ : std::nullopt;
+
// The signed version is always a byte offset.
if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
assert(hasSMEMByteOffset(ST));
@@ -2651,10 +2693,11 @@ std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
}
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
- // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+.
if (AMDGPU::isGFX10(ST))
return 12;
+ if (AMDGPU::isGFX12(ST))
+ return 24;
return 13;
}
@@ -2707,6 +2750,25 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
: getGfx9BufferFormatInfo(Format);
}
+bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
+ for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
+ OpName::src2 }) {
+ int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
+ if (Idx == -1)
+ continue;
+
+ if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
+ OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
+ return true;
+ }
+
+ return false;
+}
+
+bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
+ return hasAny64BitVGPROperands(OpDesc);
+}
+
} // namespace AMDGPU
raw_ostream &operator<<(raw_ostream &OS,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index bdf7ccad9c76..3c9f330cbcde 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -31,7 +31,6 @@ class MCRegisterClass;
class MCRegisterInfo;
class MCSubtargetInfo;
class StringRef;
-class TargetRegisterClass;
class Triple;
class raw_ostream;
@@ -43,30 +42,18 @@ namespace AMDGPU {
struct IsaVersion;
-enum {
- AMDHSA_COV2 = 2,
- AMDHSA_COV3 = 3,
- AMDHSA_COV4 = 4,
- AMDHSA_COV5 = 5
-};
+enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 };
+/// \returns True if \p STI is AMDHSA.
+bool isHsaAbi(const MCSubtargetInfo &STI);
/// \returns HSA OS ABI Version identification.
std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
-/// \returns True if HSA OS ABI Version identification is 2,
-/// false otherwise.
-bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
-/// \returns True if HSA OS ABI Version identification is 3,
-/// false otherwise.
-bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 4,
/// false otherwise.
bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 5,
/// false otherwise.
bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
-/// \returns True if HSA OS ABI Version identification is 3 and above,
-/// false otherwise.
-bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
@@ -518,6 +505,10 @@ struct CanBeVOPD {
bool Y;
};
+/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
+LLVM_READONLY
+unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
+
LLVM_READONLY
CanBeVOPD getCanBeVOPD(unsigned Opc);
@@ -537,7 +528,7 @@ LLVM_READONLY
unsigned getVOPDOpcode(unsigned Opc);
LLVM_READONLY
-int getVOPDFull(unsigned OpX, unsigned OpY);
+int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
LLVM_READONLY
bool isVOPD(unsigned Opc);
@@ -548,6 +539,9 @@ bool isMAC(unsigned Opc);
LLVM_READNONE
bool isPermlane16(unsigned Opc);
+LLVM_READNONE
+bool isGenericAtomic(unsigned Opc);
+
namespace VOPD {
enum Component : unsigned {
@@ -757,15 +751,20 @@ public:
// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
// for the specified component and MC operand. The callback must return 0
// if the operand is not a register or not a VGPR.
- bool hasInvalidOperand(
- std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
- return getInvalidCompOperandIndex(GetRegIdx).has_value();
+ // If \p SkipSrc is set to true then constraints for source operands are not
+ // checked.
+ bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
+ bool SkipSrc = false) const {
+ return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value();
}
// Check VOPD operands constraints.
// Return the index of an invalid component operand, if any.
+ // If \p SkipSrc is set to true then constraints for source operands are not
+ // checked.
std::optional<unsigned> getInvalidCompOperandIndex(
- std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
+ std::function<unsigned(unsigned, unsigned)> GetRegIdx,
+ bool SkipSrc = false) const;
private:
RegIndices
@@ -1121,6 +1120,9 @@ bool isEntryFunctionCC(CallingConv::ID CC);
LLVM_READNONE
bool isModuleEntryFunctionCC(CallingConv::ID CC);
+LLVM_READNONE
+bool isChainCC(CallingConv::ID CC);
+
bool isKernelCC(const Function *Func);
// FIXME: Remove this when calling conventions cleaned up
@@ -1141,37 +1143,51 @@ bool hasMIMG_R128(const MCSubtargetInfo &STI);
bool hasA16(const MCSubtargetInfo &STI);
bool hasG16(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);
-unsigned getNSAMaxSize(const MCSubtargetInfo &STI);
+bool hasGDS(const MCSubtargetInfo &STI);
+unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
+unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
bool isGFX9_GFX10(const MCSubtargetInfo &STI);
+bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
bool isGFX8Plus(const MCSubtargetInfo &STI);
bool isGFX9Plus(const MCSubtargetInfo &STI);
bool isGFX10(const MCSubtargetInfo &STI);
+bool isGFX10_GFX11(const MCSubtargetInfo &STI);
bool isGFX10Plus(const MCSubtargetInfo &STI);
bool isNotGFX10Plus(const MCSubtargetInfo &STI);
bool isGFX10Before1030(const MCSubtargetInfo &STI);
bool isGFX11(const MCSubtargetInfo &STI);
bool isGFX11Plus(const MCSubtargetInfo &STI);
+bool isGFX12(const MCSubtargetInfo &STI);
+bool isGFX12Plus(const MCSubtargetInfo &STI);
+bool isNotGFX12Plus(const MCSubtargetInfo &STI);
bool isNotGFX11Plus(const MCSubtargetInfo &STI);
bool isGCN3Encoding(const MCSubtargetInfo &STI);
bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
+bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
bool isGFX90A(const MCSubtargetInfo &STI);
bool isGFX940(const MCSubtargetInfo &STI);
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
bool hasMAIInsts(const MCSubtargetInfo &STI);
bool hasVOPD(const MCSubtargetInfo &STI);
+bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
+unsigned hasKernargPreload(const MCSubtargetInfo &STI);
/// Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
+/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
+/// The bit indicating isHi is the LSB of the encoding.
+bool isHi(unsigned Reg, const MCRegisterInfo &MRI);
+
/// If \p Reg is a pseudo reg, return the correct hardware register given
/// \p STI otherwise return \p Reg.
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
@@ -1202,9 +1218,6 @@ unsigned getRegBitWidth(unsigned RCID);
/// Get the size in bits of a register from the register class \p RC.
unsigned getRegBitWidth(const MCRegisterClass &RC);
-/// Get the size in bits of a register from the register class \p RC.
-unsigned getRegBitWidth(const TargetRegisterClass &RC);
-
/// Get size of register operand
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
unsigned OpNo);
@@ -1225,6 +1238,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
return 4;
case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -1283,8 +1297,14 @@ LLVM_READNONE
bool isInlinableIntLiteralV216(int32_t Literal);
LLVM_READNONE
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi, uint8_t OpType);
+
+LLVM_READNONE
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
+LLVM_READNONE
+bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
+
bool isArgPassedInSGPR(const Argument *Arg);
bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
@@ -1314,7 +1334,7 @@ std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
int64_t ByteOffset);
-/// For FLAT segment the offset must be positive;
+/// For pre-GFX12 FLAT instructions the offset must be positive;
/// MSB is ignored and forced to zero.
///
/// \return The number of bits available for the signed offset field in flat
@@ -1328,10 +1348,16 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
LLVM_READNONE
-inline bool isLegal64BitDPPControl(unsigned DC) {
+inline bool isLegalDPALU_DPPControl(unsigned DC) {
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
}
+/// \returns true if an instruction may have a 64-bit VGPR operand.
+bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
+
+/// \returns true if an instruction is a DP ALU DPP.
+bool isDPALU_DPP(const MCInstrDesc &OpDesc);
+
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index cbdbf1c16f9f..25e628e5cbc5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -74,6 +74,16 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_s_barrier:
+ case Intrinsic::amdgcn_s_barrier_signal:
+ case Intrinsic::amdgcn_s_barrier_signal_var:
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst:
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
+ case Intrinsic::amdgcn_s_barrier_init:
+ case Intrinsic::amdgcn_s_barrier_join:
+ case Intrinsic::amdgcn_s_barrier_wait:
+ case Intrinsic::amdgcn_s_barrier_leave:
+ case Intrinsic::amdgcn_s_get_barrier_state:
+ case Intrinsic::amdgcn_s_wakeup_barrier:
case Intrinsic::amdgcn_wave_barrier:
case Intrinsic::amdgcn_sched_barrier:
case Intrinsic::amdgcn_sched_group_barrier:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index df37c420fa72..e42b27f8e09e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -9,19 +9,15 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
-#include <vector>
-
namespace llvm {
struct Align;
class AAResults;
class DataLayout;
-class Function;
class GlobalVariable;
class LoadInst;
class MemoryDef;
class MemorySSA;
-class Module;
class Value;
namespace AMDGPU {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index a92d574b1848..0fa67c559cb2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -18,7 +18,6 @@
#include "AMDGPUPTNote.h"
#include "SIDefines.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/AMDGPUMetadata.h"
@@ -84,7 +83,6 @@ bool AMDGPUPALMetadata::setFromLegacyBlob(StringRef Blob) {
// Set PAL metadata from msgpack blob.
bool AMDGPUPALMetadata::setFromMsgPackBlob(StringRef Blob) {
- msgpack::Reader Reader(Blob);
return MsgPackDoc.readFromBlob(Blob, /*Multi=*/false);
}
@@ -242,30 +240,29 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
}
// Set the stack frame size of a function in the metadata.
-void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
- unsigned Val) {
- auto Node = getShaderFunction(MF.getFunction().getName());
+void AMDGPUPALMetadata::setFunctionScratchSize(StringRef FnName, unsigned Val) {
+ auto Node = getShaderFunction(FnName);
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
+ Node[".backend_stack_size"] = MsgPackDoc.getNode(Val);
}
// Set the amount of LDS used in bytes in the metadata.
-void AMDGPUPALMetadata::setFunctionLdsSize(const MachineFunction &MF,
- unsigned Val) {
- auto Node = getShaderFunction(MF.getFunction().getName());
+void AMDGPUPALMetadata::setFunctionLdsSize(StringRef FnName, unsigned Val) {
+ auto Node = getShaderFunction(FnName);
Node[".lds_size"] = MsgPackDoc.getNode(Val);
}
// Set the number of used vgprs in the metadata.
-void AMDGPUPALMetadata::setFunctionNumUsedVgprs(const MachineFunction &MF,
+void AMDGPUPALMetadata::setFunctionNumUsedVgprs(StringRef FnName,
unsigned Val) {
- auto Node = getShaderFunction(MF.getFunction().getName());
+ auto Node = getShaderFunction(FnName);
Node[".vgpr_count"] = MsgPackDoc.getNode(Val);
}
// Set the number of used vgprs in the metadata.
-void AMDGPUPALMetadata::setFunctionNumUsedSgprs(const MachineFunction &MF,
+void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName,
unsigned Val) {
- auto Node = getShaderFunction(MF.getFunction().getName());
+ auto Node = getShaderFunction(FnName);
Node[".sgpr_count"] = MsgPackDoc.getNode(Val);
}
@@ -726,7 +723,7 @@ void AMDGPUPALMetadata::toLegacyBlob(std::string &Blob) {
if (Registers.getMap().empty())
return;
raw_string_ostream OS(Blob);
- support::endian::Writer EW(OS, support::endianness::little);
+ support::endian::Writer EW(OS, llvm::endianness::little);
for (auto I : Registers.getMap()) {
EW.write(uint32_t(I.first.getUInt()));
EW.write(uint32_t(I.second.getUInt()));
@@ -911,6 +908,7 @@ void AMDGPUPALMetadata::reset() {
MsgPackDoc.clear();
Registers = MsgPackDoc.getEmptyNode();
HwStages = MsgPackDoc.getEmptyNode();
+ ShaderFunctions = MsgPackDoc.getEmptyNode();
}
unsigned AMDGPUPALMetadata::getPALVersion(unsigned idx) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index e477904cb81f..158f766d0485 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -17,7 +17,6 @@
namespace llvm {
-class MachineFunction;
class Module;
class StringRef;
@@ -87,22 +86,22 @@ public:
void setScratchSize(unsigned CC, unsigned Val);
// Set the stack frame size of a function in the metadata.
- void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
+ void setFunctionScratchSize(StringRef FnName, unsigned Val);
// Set the amount of LDS used in bytes in the metadata. This is an optional
// advisory record for logging etc; wave dispatch actually uses the rsrc1
// register for the shader stage to determine the amount of LDS to allocate.
- void setFunctionLdsSize(const MachineFunction &MF, unsigned Val);
+ void setFunctionLdsSize(StringRef FnName, unsigned Val);
// Set the number of used vgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of vgprs to allocate.
- void setFunctionNumUsedVgprs(const MachineFunction &MF, unsigned Val);
+ void setFunctionNumUsedVgprs(StringRef FnName, unsigned Val);
// Set the number of used sgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of sgprs to allocate.
- void setFunctionNumUsedSgprs(const MachineFunction &MF, unsigned Val);
+ void setFunctionNumUsedSgprs(StringRef FnName, unsigned Val);
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
index 7d03150bf5b1..fc563b7493ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
@@ -10,7 +10,7 @@
// VINTERP encoding
//===----------------------------------------------------------------------===//
-class VINTERPe_gfx11 <bits<7> op, VOPProfile P> : Enc64 {
+class VINTERPe <VOPProfile P> : Enc64 {
bits<8> vdst;
bits<4> src0_modifiers;
bits<9> src0;
@@ -31,7 +31,6 @@ class VINTERPe_gfx11 <bits<7> op, VOPProfile P> : Enc64 {
let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
let Inst{14} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel(3)
let Inst{15} = clamp;
- let Inst{22-16} = op;
let Inst{40-32} = src0;
let Inst{49-41} = src1;
let Inst{58-50} = src2;
@@ -40,6 +39,14 @@ class VINTERPe_gfx11 <bits<7> op, VOPProfile P> : Enc64 {
let Inst{63} = src2_modifiers{0}; // neg(2)
}
+class VINTERPe_gfx11 <bits<7> op, VOPProfile P> : VINTERPe<P> {
+ let Inst{22-16} = op;
+}
+
+class VINTERPe_gfx12 <bits<7> op, VOPProfile P> : VINTERPe<P> {
+ let Inst{20-16} = op{4-0};
+}
+
//===----------------------------------------------------------------------===//
// VOP3 VINTERP
//===----------------------------------------------------------------------===//
@@ -171,17 +178,28 @@ defm : VInterpF16Pat<int_amdgcn_interp_inreg_p2_f16,
// VINTERP Real Instructions
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11" in {
- multiclass VINTERP_Real_gfx11 <bits<7> op> {
+multiclass VINTERP_Real_gfx11 <bits<7> op> {
+ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
def _gfx11 :
VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX11>,
VINTERPe_gfx11<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
}
-defm V_INTERP_P10_F32_inreg : VINTERP_Real_gfx11<0x000>;
-defm V_INTERP_P2_F32_inreg : VINTERP_Real_gfx11<0x001>;
-defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_gfx11<0x002>;
-defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_gfx11<0x003>;
-defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_gfx11<0x004>;
-defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_gfx11<0x005>;
+multiclass VINTERP_Real_gfx12 <bits<7> op> {
+ let AssemblerPredicate = isGFX12Only, DecoderNamespace = "GFX12" in {
+ def _gfx12 :
+ VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX12>,
+ VINTERPe_gfx12<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ }
+}
+
+multiclass VINTERP_Real_gfx11_gfx12 <bits<7> op> :
+ VINTERP_Real_gfx11<op>, VINTERP_Real_gfx12<op>;
+
+defm V_INTERP_P10_F32_inreg : VINTERP_Real_gfx11_gfx12<0x000>;
+defm V_INTERP_P2_F32_inreg : VINTERP_Real_gfx11_gfx12<0x001>;
+defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x002>;
+defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x003>;
+defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x004>;
+defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x005>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1a8efc6e3df2..27a7c29cb1ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -88,6 +88,12 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
let TRANS = ps.TRANS;
}
+class VOP1_Real_Gen <VOP1_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> :
+ VOP1_Real <ps, Gen.Subtarget, real_name> {
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = Gen.DecoderNamespace;
+}
+
class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
VOP_SDWA_Pseudo <OpName, P, pattern> {
let AsmMatchConverter = "cvtSdwaVOP1";
@@ -152,7 +158,7 @@ multiclass VOP1Inst_t16<string opName,
defm NAME : VOP1Inst<opName, P, node>;
}
let OtherPredicates = [HasTrue16BitInsts] in {
- defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>;
+ defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>;
}
}
@@ -170,7 +176,7 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
}
class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
- VOPProfile_True16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
+ VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod);
@@ -199,7 +205,7 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
-def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> {
+def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
let HasOMod = 1;
}
@@ -221,7 +227,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
-let SubtargetPredicate = isGFX940Plus in
+let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
@@ -292,13 +298,13 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
let OtherPredicates = [NotHasTrue16BitInsts] in
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
let OtherPredicates = [HasTrue16BitInsts] in
- defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
+ defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
} // End FPDPRounding = 1, isReMaterializable = 0
let OtherPredicates = [NotHasTrue16BitInsts] in
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
let OtherPredicates = [HasTrue16BitInsts] in
-defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
+defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
@@ -317,7 +323,7 @@ defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f3
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
-defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
+defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, froundeven>;
defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
let TRANS = 1, SchedRW = [WriteTrans32] in {
@@ -326,7 +332,7 @@ defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>;
defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>;
defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
-defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>;
+defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>;
} // End TRANS = 1, SchedRW = [WriteTrans32]
let TRANS = 1, SchedRW = [WriteTrans64] in {
@@ -458,7 +464,7 @@ let SubtargetPredicate = isGFX7Plus in {
let SchedRW = [WriteDoubleAdd] in {
defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>;
defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>;
- defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>;
+ defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, froundeven>;
defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>;
} // End SchedRW = [WriteDoubleAdd]
} // End SubtargetPredicate = isGFX7Plus
@@ -502,7 +508,7 @@ defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_
defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>;
defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>;
defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>;
-defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, frint>;
+defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, froundeven>;
let FPDPRounding = 1 in {
defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>;
} // End FPDPRounding = 1
@@ -584,18 +590,28 @@ let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
}
class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
- VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
+ VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
(f32 (node i32:$src, index)),
- !if (index,
- (inst_sdwa 0, $src, 0, 0, index),
- (inst_e32 $src))
+ (inst_sdwa 0, $src, 0, 0, index)
>;
-foreach Index = [0, 1, 2, 3] in {
- def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index,
- V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>;
- def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index,
- V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>;
+let OtherPredicates = [HasCvtFP8VOP1Bug] in {
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
+ (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
+ (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>;
+}
+
+let OtherPredicates = [HasNoCvtFP8VOP1Bug] in {
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
+ (V_CVT_F32_FP8_e32 $src)>;
+ def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
+ (V_CVT_F32_BF8_e32 $src)>;
+}
+
+foreach Index = [1, 2, 3] in {
+ def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
+ def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
}
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
@@ -646,6 +662,7 @@ let SubtargetPredicate = isGFX11Plus in {
getVOP1Pat64<int_amdgcn_permlane64,
VOP_MOVRELS>.ret,
/*VOP1Only=*/ 1>;
+ defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>;
defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>;
defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>;
@@ -677,6 +694,13 @@ class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = p
let SubtargetPredicate = HasDPP16;
}
+class VOP1_DPP16_Gen<bits<8> op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> :
+ VOP1_DPP16 <op, ps, Gen.Subtarget, p> {
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace;
+}
+
+
class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
VOP_DPP8<ps.OpName, p> {
let hasSideEffects = ps.hasSideEffects;
@@ -691,137 +715,173 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
let Inst{31-25} = 0x3f;
}
+class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> :
+ VOP1_DPP8<op, ps, p> {
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+}
+
//===----------------------------------------------------------------------===//
-// GFX11.
+// GFX11, GFX12
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
- multiclass VOP1Only_Real_gfx11<bits<9> op> {
- let IsSingle = 1 in
- def _gfx11 :
- VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX11>,
- VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
- }
- multiclass VOP1_Real_e32_gfx11<bits<9> op, string opName = NAME> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- def _e32_gfx11 :
- VOP1_Real<ps, SIEncodingFamily.GFX11>,
- VOP1e<op{7-0}, ps.Pfl>;
- }
- multiclass VOP1_Real_e32_with_name_gfx11<bits<9> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.AsmOperands in {
- defm NAME : VOP1_Real_e32_gfx11<op, opName>;
- }
- }
- multiclass VOP1_Real_e64_gfx11<bits<9> op> {
- def _e64_gfx11 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>,
- VOP3e_gfx11<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
- }
- multiclass VOP1_Real_dpp_gfx11<bits<9> op, string opName = NAME> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- def _dpp_gfx11 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11> {
- let DecoderNamespace = "DPPGFX11";
- }
- }
- multiclass VOP1_Real_dpp_with_name_gfx11<bits<9> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in {
- defm NAME : VOP1_Real_dpp_gfx11<op, opName>;
- }
+multiclass VOP1Only_Real<GFXGen Gen, bits<9> op> {
+ let IsSingle = 1 in
+ def Gen.Suffix :
+ VOP1_Real_Gen<!cast<VOP1_Pseudo>(NAME), Gen>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ def _e32#Gen.Suffix :
+ VOP1_Real_Gen<ps, Gen>,
+ VOP1e<op{7-0}, ps.Pfl>;
+}
+
+multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.AsmOperands in {
+ defm NAME : VOP1_Real_e32<Gen, op, opName>;
}
- multiclass VOP1_Real_dpp8_gfx11<bits<9> op, string opName = NAME> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- def _dpp8_gfx11 : VOP1_DPP8<op{7-0}, ps> {
- let DecoderNamespace = "DPP8GFX11";
- }
+}
+
+multiclass VOP1_Real_e64<GFXGen Gen, bits<9> op> {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>,
+ VOP3e_gfx11_gfx12<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ def _dpp#Gen.Suffix : VOP1_DPP16_Gen<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), Gen>;
+}
+
+multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP16 in {
+ defm NAME : VOP1_Real_dpp<Gen, op, opName>;
}
- multiclass VOP1_Real_dpp8_with_name_gfx11<bits<9> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in {
- defm NAME : VOP1_Real_dpp8_gfx11<op, opName>;
- }
+}
+
+multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ def _dpp8#Gen.Suffix : VOP1_DPP8_Gen<op{7-0}, ps, Gen>;
+}
+
+multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP8 in {
+ defm NAME : VOP1_Real_dpp8<Gen, op, opName>;
}
-} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
+}
-multiclass VOP1_Realtriple_e64_gfx11<bits<9> op> {
- defm NAME : VOP3_Realtriple_gfx11<{0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>;
+multiclass VOP1_Realtriple_e64<GFXGen Gen, bits<9> op> {
+ defm NAME : VOP3_Realtriple<Gen, {0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>;
}
-multiclass VOP1_Realtriple_e64_with_name_gfx11<bits<9> op, string opName,
+
+multiclass VOP1_Realtriple_e64_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
- defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 1, op{6-0}}, opName,
+ defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 1, op{6-0}}, opName,
asmName>;
}
-multiclass VOP1_Real_FULL_gfx11<bits<9> op> :
- VOP1_Real_e32_gfx11<op>, VOP1_Realtriple_e64_gfx11<op>,
- VOP1_Real_dpp_gfx11<op>, VOP1_Real_dpp8_gfx11<op>;
+multiclass VOP1_Real_FULL<GFXGen Gen, bits<9> op> :
+ VOP1_Real_e32<Gen, op>, VOP1_Realtriple_e64<Gen, op>,
+ VOP1_Real_dpp<Gen, op>, VOP1_Real_dpp8<Gen, op>;
multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName,
- string asmName> {
- defm NAME : VOP1_Real_e32_with_name_gfx11<op, opName, asmName>,
- VOP1_Real_dpp_with_name_gfx11<op, opName, asmName>,
- VOP1_Real_dpp8_with_name_gfx11<op, opName, asmName>;
+ string asmName> {
+ defm NAME : VOP1_Real_e32_with_name<GFX11Gen, op, opName, asmName>,
+ VOP1_Real_dpp_with_name<GFX11Gen, op, opName, asmName>,
+ VOP1_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>;
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
def gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>,
Requires<[isGFX11Plus]>;
}
-multiclass VOP1_Real_FULL_with_name_gfx11<bits<9> op, string opName,
+multiclass VOP1_Real_NO_VOP3_with_name_gfx12<bits<9> op, string opName,
+ string asmName> {
+ defm NAME : VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>,
+ VOP1_Real_dpp_with_name<GFX12Gen, op, opName, asmName>,
+ VOP1_Real_dpp8_with_name<GFX12Gen, op, opName, asmName>;
+}
+
+multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> :
- VOP1_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>,
- VOP1_Realtriple_e64_with_name_gfx11<op, opName, asmName>;
+ VOP1_Real_e32_with_name<Gen, op, opName, asmName>,
+ VOP1_Real_dpp_with_name<Gen, op, opName, asmName>,
+ VOP1_Real_dpp8_with_name<Gen, op, opName, asmName>,
+ VOP1_Realtriple_e64_with_name<Gen, op, opName, asmName>;
-multiclass VOP1_Real_FULL_t16_gfx11<bits<9> op, string asmName,
- string opName = NAME> :
- VOP1_Real_FULL_with_name_gfx11<op, opName, asmName>;
+multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> :
+ VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>;
-multiclass VOP1_Real_NO_DPP_gfx11<bits<9> op> :
- VOP1_Real_e32_gfx11<op>, VOP1_Real_e64_gfx11<op>;
+multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName,
+ string opName = NAME> :
+ VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
+ VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
-defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00c,
+multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName,
+ string asmName> :
+ VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
+ VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> :
+ VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>;
+
+multiclass VOP1_Real_FULL_gfx11_gfx12<bits<9> op> :
+ VOP1_Real_FULL<GFX11Gen, op>, VOP1_Real_FULL<GFX12Gen, op>;
+
+multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
+ string opName, string asmName> :
+ VOP1_Real_e32_with_name<Gen, op, opName, asmName>,
+ VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;
+
+
+defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c,
"V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">;
-defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00d,
+defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d,
"V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">;
-defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11<0x039,
+defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x039,
"V_FFBH_U32", "v_clz_i32_u32">;
-defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11<0x03a,
+defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a,
"V_FFBL_B32", "v_ctz_i32_b32">;
-defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b,
+defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
"V_FFBH_I32", "v_cls_i32">;
-defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>;
-defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11<0x069, "v_not_b16">;
-defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x06a, "v_cvt_i32_i16">;
-defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x06b, "v_cvt_u32_u16">;
-
-defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x050, "v_cvt_f16_u16">;
-defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x051, "v_cvt_f16_i16">;
-defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x052, "v_cvt_u16_f16">;
-defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x053, "v_cvt_i16_f16">;
-defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x054, "v_rcp_f16">;
-defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x055, "v_sqrt_f16">;
-defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x056, "v_rsq_f16">;
-defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x057, "v_log_f16">;
-defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x058, "v_exp_f16">;
-defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x059, "v_frexp_mant_f16">;
-defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05a, "v_frexp_exp_i16_f16">;
-defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05b, "v_floor_f16">;
-defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05c, "v_ceil_f16">;
-defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05d, "v_trunc_f16">;
-defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05e, "v_rndne_f16">;
-defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05f, "v_fract_f16">;
-defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x060, "v_sin_f16">;
-defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x061, "v_cos_f16">;
-defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x062, "v_sat_pk_u8_i16">;
-defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x063, "v_cvt_norm_i16_f16">;
-defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x064, "v_cvt_norm_u16_f16">;
-
-defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11<0x00a, "v_cvt_f16_f32">;
-defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x00b, "v_cvt_f32_f16">;
+defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
+defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
+defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
+defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
+defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
+
+defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
+defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
+defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
+defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
+defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
+defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
+defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
+defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
+defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
+defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
+defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
+defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
+defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
+defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
+defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
+defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
+defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
+defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
+defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
+defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
+defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
+
+defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
+defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
//===----------------------------------------------------------------------===//
// GFX10.
@@ -870,17 +930,23 @@ multiclass VOP1_Real_gfx10<bits<9> op> :
VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>,
VOP1_Real_dpp8_gfx10<op>;
-multiclass VOP1_Real_gfx10_FULL_gfx11<bits<9> op> :
- VOP1_Real_gfx10<op>, VOP1_Real_FULL_gfx11<op>;
+multiclass VOP1_Real_gfx10_FULL_gfx11_gfx12<bits<9> op> :
+ VOP1_Real_gfx10<op>,
+ VOP1_Real_FULL<GFX11Gen, op>,
+ VOP1_Real_FULL<GFX12Gen, op>;
-multiclass VOP1_Real_gfx10_NO_DPP_gfx11<bits<9> op> :
- VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>;
+multiclass VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> :
+ VOP1_Real_gfx10<op>,
+ VOP1_Real_NO_DPP<GFX11Gen, op>,
+ VOP1_Real_NO_DPP<GFX12Gen, op>;
-multiclass VOP1Only_Real_gfx10_gfx11<bits<9> op> :
- VOP1Only_Real_gfx10<op>, VOP1Only_Real_gfx11<op>;
+multiclass VOP1Only_Real_gfx10_gfx11_gfx12<bits<9> op> :
+ VOP1Only_Real_gfx10<op>,
+ VOP1Only_Real<GFX11Gen, op>,
+ VOP1Only_Real<GFX12Gen, op>;
-defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11<0x01b>;
-defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11<0x048>;
+defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<0x01b>;
+defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11_gfx12<0x048>;
defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>;
defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>;
defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>;
@@ -903,11 +969,11 @@ defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>;
defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>;
defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>;
-defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11<0x065>;
-defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11<0x068>;
+defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x065>;
+defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x068>;
//===----------------------------------------------------------------------===//
-// GFX7, GFX10.
+// GFX7, GFX10, GFX11, GFX12
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
@@ -926,22 +992,20 @@ let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
multiclass VOP1_Real_gfx7<bits<9> op> :
VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>;
-multiclass VOP1_Real_gfx7_gfx10<bits<9> op> :
- VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>;
-
-multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> :
- VOP1_Real_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>;
+multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> :
+ VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
+ VOP1_Real_NO_DPP<GFX12Gen, op>;
defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>;
defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>;
-defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x017>;
-defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x018>;
-defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x019>;
-defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x01a>;
+defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x017>;
+defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x018>;
+defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x019>;
+defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x01a>;
//===----------------------------------------------------------------------===//
-// GFX6, GFX7, GFX10, GFX11.
+// GFX6, GFX7, GFX10, GFX11, GFX12
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
@@ -963,11 +1027,13 @@ multiclass VOP1_Real_gfx6_gfx7<bits<9> op> :
multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> :
VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>;
-multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<bits<9> op> :
- VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL_gfx11<op>;
+multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<bits<9> op> :
+ VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL<GFX11Gen, op>,
+ VOP1_Real_FULL<GFX12Gen, op>;
-multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> :
- VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>;
+multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> :
+ VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
+ VOP1_Real_NO_DPP<GFX12Gen, op>;
defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>;
defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>;
@@ -977,57 +1043,57 @@ defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>;
defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>;
defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>;
-defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x000>;
-defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x001>;
-defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x003>;
-defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x004>;
-defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x005>;
-defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x006>;
-defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x007>;
-defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x008>;
+defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>;
+defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>;
+defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x003>;
+defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x004>;
+defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>;
+defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>;
+defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>;
+defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x008>;
defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>;
defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>;
defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>;
defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>;
-defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00e>;
-defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x00f>;
-defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x010>;
-defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x011>;
-defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x012>;
-defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x013>;
-defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x014>;
-defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x015>;
-defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x016>;
-defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x020>;
-defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x021>;
-defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x022>;
-defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x023>;
-defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x024>;
-defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x025>;
-defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x027>;
-defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02a>;
-defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02b>;
-defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02e>;
-defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x02f>;
-defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x031>;
-defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x033>;
-defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x034>;
-defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x035>;
-defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x036>;
-defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x037>;
-defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x038>;
+defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>;
+defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x00f>;
+defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x010>;
+defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>;
+defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>;
+defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>;
+defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>;
+defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x015>;
+defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x016>;
+defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>;
+defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>;
+defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>;
+defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x023>;
+defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x024>;
+defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x025>;
+defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x027>;
+defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02a>;
+defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02b>;
+defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02e>;
+defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x02f>;
+defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x031>;
+defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x033>;
+defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x034>;
+defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x035>;
+defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x036>;
+defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x037>;
+defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>;
defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>;
defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>;
defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>;
-defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03c>;
-defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03d>;
-defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03e>;
-defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x03f>;
-defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x040>;
+defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03c>;
+defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03d>;
+defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03e>;
+defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>;
+defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>;
defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
-defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x042>;
-defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x043>;
-defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x044>;
+defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x042>;
+defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x043>;
+defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x044>;
//===----------------------------------------------------------------------===//
// GFX8, GFX9 (VI).
@@ -1163,7 +1229,7 @@ defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>;
defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>;
-let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0] in {
+let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0], Size = V_MOV_B32_e32.Size in {
// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR
// indexing mode. vdst can't be treated as a def for codegen purposes,
@@ -1193,8 +1259,8 @@ def : GCNPat <
(as_i1timm $bound_ctrl))
>;
-def : GCNPat <
- (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl,
+class UpdateDPPPat<ValueType vt> : GCNPat <
+ (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl,
timm:$row_mask, timm:$bank_mask,
timm:$bound_ctrl)),
(V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
@@ -1202,6 +1268,11 @@ def : GCNPat <
(as_i1timm $bound_ctrl))
>;
+def : UpdateDPPPat<i32>;
+def : UpdateDPPPat<f32>;
+def : UpdateDPPPat<v2i16>;
+def : UpdateDPPPat<v2f16>;
+
} // End OtherPredicates = [isGFX8Plus]
let OtherPredicates = [isGFX8Plus] in {
@@ -1303,3 +1374,15 @@ def : GCNPat <
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX11Only]
+
+//===----------------------------------------------------------------------===//
+// GFX12
+//===----------------------------------------------------------------------===//
+
+let OtherPredicates = [isGFX12Only] in {
+def : GCNPat <
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, VGPR_32:$src,
+ (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
+>;
+} // End OtherPredicates = [isGFX12Only]
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 481a162748e6..0aa62ea77b11 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -109,6 +109,14 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
let mayStore = ps.mayStore;
}
+class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> :
+ VOP2_Real <ps, Gen.Subtarget, real_name> {
+ let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
+ Gen.AssemblerPredicate);
+ let DecoderNamespace = Gen.DecoderNamespace#
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+}
+
class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
VOP_SDWA_Pseudo <OpName, P, pattern> {
let AsmMatchConverter = "cvtSdwaVOP2";
@@ -194,9 +202,12 @@ multiclass VOP2Inst_t16<string opName,
let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in {
defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
}
- let SubtargetPredicate = HasTrue16BitInsts in {
+ let SubtargetPredicate = UseRealTrue16Insts in {
defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
}
+ let SubtargetPredicate = UseFakeTrue16Insts in {
+ defm _fake16 : VOP2Inst<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16", GFX9Renamed>;
+ }
}
// Creating a _t16_e32 pseudo when there is no corresponding real instruction on
@@ -212,7 +223,7 @@ multiclass VOP2Inst_e64_t16<string opName,
defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
}
let SubtargetPredicate = HasTrue16BitInsts in {
- defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
+ defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_Fake16<P>, node, revOp#"_t16", GFX9Renamed>;
}
}
@@ -378,7 +389,7 @@ def VOP_MADAK_F16 : VOP_MADAK <f16>;
def VOP_MADAK_F16_t16 : VOP_MADAK <f16> {
let IsTrue16 = 1;
let DstRC = VOPDstOperand<VGPR_32_Lo128>;
- let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm);
+ let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm);
}
def VOP_MADAK_F32 : VOP_MADAK <f32>;
@@ -403,7 +414,7 @@ def VOP_MADMK_F16 : VOP_MADMK <f16>;
def VOP_MADMK_F16_t16 : VOP_MADMK <f16> {
let IsTrue16 = 1;
let DstRC = VOPDstOperand<VGPR_32_Lo128>;
- let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1);
+ let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1);
}
def VOP_MADMK_F32 : VOP_MADMK <f32>;
@@ -859,6 +870,17 @@ def : divergent_i64_BinOp <and, V_AND_B32_e64>;
def : divergent_i64_BinOp <or, V_OR_B32_e64>;
def : divergent_i64_BinOp <xor, V_XOR_B32_e64>;
+// mul24 w/ 64 bit output.
+class mul24_64_Pat<SDPatternOperator Op, Instruction InstLo, Instruction InstHi> : GCNPat<
+ (i64 (Op i32:$src0, i32:$src1)),
+ (REG_SEQUENCE VReg_64,
+ (InstLo $src0, $src1), sub0,
+ (InstHi $src0, $src1), sub1)
+>;
+
+def : mul24_64_Pat<AMDGPUmul_i24, V_MUL_I32_I24_e64, V_MUL_HI_I32_I24_e64>;
+def : mul24_64_Pat<AMDGPUmul_u24, V_MUL_U32_U24_e64, V_MUL_HI_U32_U24_e64>;
+
//===----------------------------------------------------------------------===//
// 16-Bit Operand Instructions
//===----------------------------------------------------------------------===//
@@ -874,7 +896,7 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
let HasSrc1FloatMods = 0;
let Src1ModSDWA = Int16SDWAInputMods;
}
-def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
+def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
let Src1DPP = VGPR_32_Lo128;
let Src1ModDPP = IntT16VRegInputMods;
@@ -925,13 +947,13 @@ def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
let SubtargetPredicate = isGFX11Plus in {
let isCommutable = 1 in {
- defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
- defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
- defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
+ defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
+ defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
+ defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
} // End isCommutable = 1
} // End SubtargetPredicate = isGFX11Plus
-let FPDPRounding = 1, isReMaterializable = 1 in {
+let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in {
let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in {
def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
}
@@ -947,7 +969,7 @@ let SubtargetPredicate = HasTrue16BitInsts in {
def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">;
}
} // End isCommutable = 1
-} // End FPDPRounding = 1, isReMaterializable = 1
+} // End FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1
let Constraints = "$vdst = $src2",
DisableEncoding="$src2",
@@ -1089,12 +1111,12 @@ let AddedComplexity = 30 in {
}
} // End AddedComplexity = 30
-let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in {
+let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in {
def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">;
let isCommutable = 1 in
def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
-}
+} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1
let SubtargetPredicate = HasPkFmacF16Inst in {
defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
@@ -1201,6 +1223,20 @@ def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>;
def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>;
}
+let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in {
+ let SchedRW = [WriteDoubleAdd], isCommutable = 1 in {
+ let FPDPRounding = 1 in {
+ defm V_ADD_F64_pseudo : VOP2Inst <"v_add_f64_pseudo", VOP_F64_F64_F64, any_fadd>;
+ defm V_MUL_F64_pseudo : VOP2Inst <"v_mul_f64_pseudo", VOP_F64_F64_F64, fmul>;
+ } // End FPDPRounding = 1
+ defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>;
+ defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>;
+ } // End SchedRW = [WriteDoubleAdd], isCommutable = 1
+ let SchedRW = [Write64Bit] in {
+ defm V_LSHLREV_B64_pseudo : VOP2Inst <"v_lshlrev_b64_pseudo", VOP_I64_I32_I64, clshl_rev_64>;
+ } // End SchedRW = [Write64Bit]
+} // End SubtargetPredicate = isGFX12Plus, isReMaterializable = 1
+
//===----------------------------------------------------------------------===//
// DPP Encodings
//===----------------------------------------------------------------------===//
@@ -1236,6 +1272,15 @@ class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget,
Base_VOP2_DPP16<op, ps, opName, p>,
SIMCInstr <ps.PseudoInstr, subtarget>;
+class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen,
+ string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+ VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> {
+ let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
+ Gen.AssemblerPredicate);
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace#
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+}
+
class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
VOPProfile p = ps.Pfl> :
VOP_DPP8<ps.OpName, p> {
@@ -1255,230 +1300,362 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
let OtherPredicates = ps.OtherPredicates;
}
+
+class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen,
+ VOPProfile p = ps.Pfl> :
+ VOP2_DPP8<op, ps, p> {
+ let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
+ Gen.AssemblerPredicate);
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace#
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+}
//===----------------------------------------------------------------------===//
-// GFX11.
+// GFX11, GFX12
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
- //===------------------------------- VOP2 -------------------------------===//
- multiclass VOP2Only_Real_MADK_gfx11<bits<6> op> {
- def _gfx11 :
- VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX11>,
- VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+//===------------------------------- VOP2 -------------------------------===//
+multiclass VOP2Only_Real_MADK<GFXGen Gen, bits<6> op> {
+ def Gen.Suffix :
+ VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME), Gen>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP2Only_Real_MADK_with_name<GFXGen Gen, bits<6> op, string asmName,
+ string opName = NAME> {
+ def Gen.Suffix :
+ VOP2_Real_Gen<!cast<VOP2_Pseudo>(opName), Gen>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName);
+ let AsmString = asmName # ps.AsmOperands;
}
- multiclass VOP2Only_Real_MADK_gfx11_with_name<bits<6> op, string asmName,
- string opName = NAME> {
- def _gfx11 :
- VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX11>,
- VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> {
- VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName);
+}
+
+multiclass VOP2_Real_e32<GFXGen Gen, bits<6> op> {
+ def _e32#Gen.Suffix :
+ VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME#"_e32"), Gen>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+}
+
+multiclass VOP2Only_Real_e32<GFXGen Gen, bits<6> op> {
+ let IsSingle = 1 in
+ defm NAME: VOP2_Real_e32<Gen, op>;
+}
+
+multiclass VOP2_Real_e64<GFXGen Gen, bits<6> op> {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>,
+ VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+multiclass VOP2_Real_dpp<GFXGen Gen, bits<6> op> {
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
+ def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), Gen>;
+}
+
+multiclass VOP2_Real_dpp8<GFXGen Gen, bits<6> op> {
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
+ def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(NAME#"_e32"), Gen>;
+}
+
+//===------------------------- VOP2 (with name) -------------------------===//
+multiclass VOP2_Real_e32_with_name<GFXGen Gen, bits<6> op, string opName,
+ string asmName, bit single = 0> {
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ def _e32#Gen.Suffix :
+ VOP2_Real_Gen<ps, Gen, asmName>,
+ VOP2e<op{5-0}, ps.Pfl> {
let AsmString = asmName # ps.AsmOperands;
+ let IsSingle = single;
}
- }
- multiclass VOP2_Real_e32_gfx11<bits<6> op> {
- def _e32_gfx11 :
- VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX11>,
- VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
- }
- multiclass VOP2Only_Real_e32_gfx11<bits<6> op> {
- let IsSingle = 1 in
- defm NAME: VOP2_Real_e32_gfx11<op>;
- }
- multiclass VOP2_Real_e64_gfx11<bits<6> op> {
- def _e64_gfx11 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>,
- VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
- }
- multiclass VOP2_Real_dpp_gfx11<bits<6> op> {
- if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
- def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX11> {
- let DecoderNamespace = "DPPGFX11";
- }
- }
- multiclass VOP2_Real_dpp8_gfx11<bits<6> op> {
- if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
- def _dpp8_gfx11 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
- let DecoderNamespace = "DPP8GFX11";
+}
+multiclass VOP2_Real_e64_with_name<GFXGen Gen, bits<6> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, ps.Pfl> {
+ let AsmString = asmName # ps.AsmOperands;
}
- }
+}
- //===------------------------- VOP2 (with name) -------------------------===//
- multiclass VOP2_Real_e32_with_name_gfx11<bits<6> op, string opName,
- string asmName, bit single = 0> {
- defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
- def _e32_gfx11 :
- VOP2_Real<ps, SIEncodingFamily.GFX11, asmName>,
- VOP2e<op{5-0}, ps.Pfl> {
- let AsmString = asmName # ps.AsmOperands;
- let IsSingle = single;
- }
+multiclass VOP2_Real_dpp_with_name<GFXGen Gen, bits<6> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ if ps.Pfl.HasExtDPP then
+ def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen> {
+ let AsmString = asmName # ps.Pfl.AsmDPP16;
}
- multiclass VOP2_Real_e64_with_name_gfx11<bits<6> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
- def _e64_gfx11 :
- VOP3_Real<ps, SIEncodingFamily.GFX11>,
- VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl> {
- let AsmString = asmName # ps.AsmOperands;
- }
+}
+multiclass VOP2_Real_dpp8_with_name<GFXGen Gen, bits<6> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ if ps.Pfl.HasExtDPP then
+ def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, ps, Gen> {
+ let AsmString = asmName # ps.Pfl.AsmDPP8;
}
+}
- multiclass VOP2_Real_dpp_with_name_gfx11<bits<6> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
- if ps.Pfl.HasExtDPP then
- def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"),
- SIEncodingFamily.GFX11> {
- let AsmString = asmName # ps.Pfl.AsmDPP16;
- let DecoderNamespace = "DPPGFX11";
+//===------------------------------ VOP2be ------------------------------===//
+multiclass VOP2be_Real_e32<GFXGen Gen, bits<6> op, string opName, string asmName> {
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ def _e32#Gen.Suffix :
+ VOP2_Real_Gen<ps, Gen>,
+ VOP2e<op{5-0}, ps.Pfl> {
+ let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands);
}
- }
- multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
- if ps.Pfl.HasExtDPP then
- def _dpp8_gfx11 : VOP2_DPP8<op, ps> {
- let AsmString = asmName # ps.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8GFX11";
+}
+multiclass VOP2be_Real_dpp<GFXGen Gen, bits<6> op, string opName, string asmName> {
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
+ def _dpp#Gen.Suffix :
+ VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen, asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # !subst(", vcc", "", AsmDPP);
}
- }
-
- //===------------------------------ VOP2be ------------------------------===//
- multiclass VOP2be_Real_e32_gfx11<bits<6> op, string opName, string asmName> {
- defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
- def _e32_gfx11 :
- VOP2_Real<ps, SIEncodingFamily.GFX11>,
- VOP2e<op{5-0}, ps.Pfl> {
- let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands);
- }
- }
- multiclass VOP2be_Real_dpp_gfx11<bits<6> op, string opName, string asmName> {
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
- def _dpp_gfx11 :
- VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11, asmName> {
- string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
- let AsmString = asmName # !subst(", vcc", "", AsmDPP);
- let DecoderNamespace = "DPPGFX11";
- }
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
- def _dpp_w32_gfx11 :
- Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
- string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
- let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
- def _dpp_w64_gfx11 :
- Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
- string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
- let AsmString = asmName # AsmDPP;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
- }
- multiclass VOP2be_Real_dpp8_gfx11<bits<6> op, string opName, string asmName> {
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
- def _dpp8_gfx11 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
- string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
- let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
- let DecoderNamespace = "DPP8GFX11";
- }
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
- def _dpp8_w32_gfx11 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
- string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
- let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
- def _dpp8_w64_gfx11 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
- string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
- let AsmString = asmName # AsmDPP8;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
- }
-
-} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
+ def _dpp_w32#Gen.Suffix :
+ Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = Gen.DecoderNamespace;
+ }
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
+ def _dpp_w64#Gen.Suffix :
+ Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = Gen.DecoderNamespace;
+ }
+}
+multiclass VOP2be_Real_dpp8<GFXGen Gen, bits<6> op, string opName, string asmName> {
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
+ def _dpp8#Gen.Suffix :
+ VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(opName#"_e32"), Gen> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
+ }
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
+ def _dpp8_w32#Gen.Suffix :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = Gen.DecoderNamespace;
+ }
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
+ def _dpp8_w64#Gen.Suffix :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = Gen.DecoderNamespace;
+ }
+}
// We don't want to override separate decoderNamespaces within these
-multiclass VOP2_Realtriple_e64_gfx11<bits<6> op> {
- defm NAME : VOP3_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ;
+multiclass VOP2_Realtriple_e64<GFXGen Gen, bits<6> op> {
+ defm NAME : VOP3_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ;
}
-multiclass VOP2_Realtriple_e64_with_name_gfx11<bits<6> op, string opName,
+
+multiclass VOP2_Realtriple_e64_with_name<GFXGen Gen, bits<6> op, string opName,
string asmName> {
- defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 0, 0, op{5-0}}, opName, asmName> ;
+ defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 0, 0, op{5-0}}, opName, asmName> ;
}
-multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> :
- VOP2be_Real_e32_gfx11<op, opName, asmName>,
- VOP3be_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>,
- VOP2be_Real_dpp_gfx11<op, opName, asmName>,
- VOP2be_Real_dpp8_gfx11<op, opName, asmName>;
+multiclass VOP2be_Real<GFXGen Gen, bits<6> op, string opName, string asmName> :
+ VOP2be_Real_e32<Gen, op, opName, asmName>,
+ VOP3be_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>,
+ VOP2be_Real_dpp<Gen, op, opName, asmName>,
+ VOP2be_Real_dpp8<Gen, op, opName, asmName>;
// Only for CNDMASK
-multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> :
- VOP2_Real_e32_gfx11<op>,
- VOP2_Realtriple_e64_gfx11<op>,
- VOP2be_Real_dpp_gfx11<op, opName, asmName>,
- VOP2be_Real_dpp8_gfx11<op, opName, asmName>;
+multiclass VOP2e_Real<GFXGen Gen, bits<6> op, string opName, string asmName> :
+ VOP2_Real_e32<Gen, op>,
+ VOP2_Realtriple_e64<Gen, op>,
+ VOP2be_Real_dpp<Gen, op, opName, asmName>,
+ VOP2be_Real_dpp8<Gen, op, opName, asmName>;
+
+multiclass VOP2Only_Real<GFXGen Gen, bits<6> op> :
+ VOP2Only_Real_e32<Gen, op>,
+ VOP2_Real_dpp<Gen, op>,
+ VOP2_Real_dpp8<Gen, op>;
+
+multiclass VOP2_Real_FULL<GFXGen Gen, bits<6> op> :
+ VOP2_Realtriple_e64<Gen, op>,
+ VOP2_Real_e32<Gen, op>,
+ VOP2_Real_dpp<Gen, op>,
+ VOP2_Real_dpp8<Gen, op>;
+
+multiclass VOP2_Real_NO_VOP3_with_name<GFXGen Gen, bits<6> op, string opName,
+ string asmName, bit isSingle = 0> {
+ defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName, isSingle>,
+ VOP2_Real_dpp_with_name<Gen, op, opName, asmName>,
+ VOP2_Real_dpp8_with_name<Gen, op, opName, asmName>;
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ def Gen.Suffix#"_alias" : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[Gen.AssemblerPredicate]>;
+}
-multiclass VOP2Only_Real_gfx11<bits<6> op> :
- VOP2Only_Real_e32_gfx11<op>,
- VOP2_Real_dpp_gfx11<op>,
- VOP2_Real_dpp8_gfx11<op>;
+multiclass VOP2_Real_FULL_with_name<GFXGen Gen, bits<6> op, string opName,
+ string asmName> :
+ VOP2_Realtriple_e64_with_name<Gen, op, opName, asmName>,
+ VOP2_Real_NO_VOP3_with_name<Gen, op, opName, asmName>;
-multiclass VOP2_Real_NO_VOP3_gfx11<bits<6> op> :
- VOP2_Real_e32_gfx11<op>, VOP2_Real_dpp_gfx11<op>, VOP2_Real_dpp8_gfx11<op>;
+multiclass VOP2_Real_NO_DPP_with_name<GFXGen Gen, bits<6> op, string opName,
+ string asmName> {
+ defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName>,
+ VOP2_Real_e64_with_name<Gen, op, opName, asmName>;
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ def Gen.Suffix#"_alias" : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[Gen.AssemblerPredicate]>;
+}
-multiclass VOP2_Real_FULL_gfx11<bits<6> op> :
- VOP2_Realtriple_e64_gfx11<op>, VOP2_Real_NO_VOP3_gfx11<op>;
+multiclass VOP2_Real_NO_DPP_with_alias<GFXGen Gen, bits<6> op, string alias> {
+ defm NAME : VOP2_Real_e32<Gen, op>,
+ VOP2_Real_e64<Gen, op>;
+ def Gen.Suffix#"_alias" : MnemonicAlias<alias, NAME>, Requires<[Gen.AssemblerPredicate]>;
+}
-multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName,
- string asmName, bit isSingle = 0> {
+//===----------------------------------------------------------------------===//
+// GFX12.
+//===----------------------------------------------------------------------===//
- defm NAME : VOP2_Real_e32_with_name_gfx11<op, opName, asmName, isSingle>,
- VOP2_Real_dpp_with_name_gfx11<op, opName, asmName>,
- VOP2_Real_dpp8_with_name_gfx11<op, opName, asmName>;
- defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
- def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>;
+multiclass VOP2be_Real_gfx12<bits<6> op, string opName, string asmName> :
+ VOP2be_Real<GFX12Gen, op, opName, asmName>;
+
+// Only for CNDMASK
+multiclass VOP2e_Real_gfx12<bits<6> op, string opName, string asmName> :
+ VOP2e_Real<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP2_Real_FULL_with_name_gfx12<bits<6> op, string opName,
+ string asmName> :
+ VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP2_Real_FULL_t16_with_name_gfx12<bits<6> op, string opName,
+ string asmName, string alias> {
+ defm NAME : VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+ def _gfx12_2nd_alias : MnemonicAlias<alias, asmName>, Requires<[isGFX12Only]>;
}
-multiclass VOP2_Real_FULL_with_name_gfx11<bits<6> op, string opName,
- string asmName> :
- VOP2_Realtriple_e64_with_name_gfx11<op, opName, asmName>,
- VOP2_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>;
+multiclass VOP2_Real_NO_DPP_with_name_gfx12<bits<6> op, string opName,
+ string asmName> :
+ VOP2_Real_NO_DPP_with_name<GFX12Gen, op, opName, asmName>;
-multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, string opName = NAME>
- : VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>;
+multiclass VOP2_Real_NO_DPP_with_alias_gfx12<bits<6> op, string alias> :
+ VOP2_Real_NO_DPP_with_alias<GFX12Gen, op, alias>;
-multiclass VOP2_Real_NO_DPP_gfx11<bits<6> op> :
- VOP2_Real_e32_gfx11<op>, VOP2_Real_e64_gfx11<op>;
+defm V_ADD_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">;
+defm V_MUL_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">;
+defm V_LSHLREV_B64 : VOP2_Real_NO_DPP_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">;
+defm V_MIN_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00d, "v_min_f64">;
+defm V_MAX_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00e, "v_max_f64">;
-multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName,
- string asmName> {
- defm NAME : VOP2_Real_e32_with_name_gfx11<op, opName, asmName>,
- VOP2_Real_e64_with_name_gfx11<op, opName, asmName>;
+defm V_CNDMASK_B32 : VOP2e_Real_gfx12<0x001, "V_CNDMASK_B32", "v_cndmask_b32">;
+defm V_ADD_CO_CI_U32 :
+ VOP2be_Real_gfx12<0x020, "V_ADDC_U32", "v_add_co_ci_u32">;
+defm V_SUB_CO_CI_U32 :
+ VOP2be_Real_gfx12<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">;
+defm V_SUBREV_CO_CI_U32 :
+ VOP2be_Real_gfx12<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
+
+defm V_MIN_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x015, "V_MIN_F32", "v_min_num_f32">;
+defm V_MAX_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x016, "V_MAX_F32", "v_max_num_f32">;
+defm V_MIN_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_t16", "v_min_num_f16", "v_min_f16">;
+defm V_MIN_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_fake16", "v_min_num_f16", "v_min_f16">;
+defm V_MAX_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_t16", "v_max_num_f16", "v_max_f16">;
+defm V_MAX_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_fake16", "v_max_num_f16", "v_max_f16">;
+
+let SubtargetPredicate = isGFX12Plus in {
+ defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx12>;
+
+ defm : VOP2bInstAliases<
+ V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx12, "v_add_co_ci_u32">;
+ defm : VOP2bInstAliases<
+ V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx12, "v_sub_co_ci_u32">;
+ defm : VOP2bInstAliases<
+ V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">;
+} // End SubtargetPredicate = isGFX12Plus
+
+//===----------------------------------------------------------------------===//
+// GFX11.
+//===----------------------------------------------------------------------===//
+
+multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> :
+ VOP2be_Real<GFX11Gen, op, opName, asmName>;
+
+// Only for CNDMASK
+multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> :
+ VOP2e_Real<GFX11Gen, op, opName, asmName>;
+
+multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName,
+ string asmName, bit isSingle = 0> {
+ defm NAME : VOP2_Real_e32_with_name<GFX11Gen, op, opName, asmName, isSingle>,
+ VOP2_Real_dpp_with_name<GFX11Gen, op, opName, asmName>,
+ VOP2_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>;
defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
- def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>;
+ def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Only]>;
}
+multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName,
+ string asmName> :
+ VOP2_Real_NO_DPP_with_name<GFX11Gen, op, opName, asmName>;
+
+multiclass VOP2_Real_FULL_gfx11_gfx12<bits<6> op> :
+ VOP2_Real_FULL<GFX11Gen, op>, VOP2_Real_FULL<GFX12Gen, op>;
+
+multiclass VOP2_Real_FULL_with_name_gfx11_gfx12<bits<6> op, string opName,
+ string asmName> :
+ VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
+ VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> :
+ VOP2Only_Real<GFX11Gen, op>, VOP2Only_Real<GFX12Gen, op>;
+
+multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
+ VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
+
+multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName> :
+ VOP3Only_Realtriple_t16<GFX11Gen, op, asmName>,
+ VOP3Only_Realtriple_t16<GFX12Gen, op, asmName>;
+
+multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> :
+ VOP3beOnly_Realtriple<GFX11Gen, op>, VOP3beOnly_Realtriple<GFX12Gen, op>;
+
+multiclass VOP2Only_Real_MADK_with_name_gfx11_gfx12<bits<6> op, string asmName,
+ string opName = NAME> :
+ VOP2Only_Real_MADK_with_name<GFX11Gen, op, asmName, opName>,
+ VOP2Only_Real_MADK_with_name<GFX12Gen, op, asmName, opName>;
+
+multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName,
+ string opName = NAME> :
+ VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>;
+
+multiclass VOP2_Real_FULL_t16_gfx11_gfx12<bits<6> op, string asmName,
+ string opName = NAME> :
+ VOP2_Real_FULL_with_name_gfx11_gfx12<op, opName, asmName>;
+
+multiclass VOP2_Real_FULL_gfx11<bits<6> op> :
+ VOP2_Real_FULL<GFX11Gen, op>;
+
defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32",
"v_cndmask_b32">;
defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002,
"V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>;
defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006,
"V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">;
-defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11<0x007,
+defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x007,
"V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">;
-defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11<0x018>;
-defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11<0x019>;
-defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11<0x01a>;
+defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x018>;
+defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x019>;
+defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11_gfx12<0x01a>;
defm V_ADD_CO_CI_U32 :
VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">;
defm V_SUB_CO_CI_U32 :
@@ -1486,37 +1663,43 @@ defm V_SUB_CO_CI_U32 :
defm V_SUBREV_CO_CI_U32 :
VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
-defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f,
+defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f,
"V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">;
-defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>;
-
-defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">;
-defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">;
-defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">;
-defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">;
-defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x036, "v_fmac_f16">;
-defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03b, "v_ldexp_f16">;
+defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx11_gfx12<0x03c>;
+
+defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">;
+defm V_ADD_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">;
+defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x033, "v_sub_f16">;
+defm V_SUB_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x033, "v_sub_f16">;
+defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">;
+defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">;
+defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
+defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
+defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">;
+defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
+defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
-defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">;
-defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">;
+defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
+defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_with_name_gfx11_gfx12<0x037, "v_fmamk_f16">;
+defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_with_name_gfx11_gfx12<0x038, "v_fmaak_f16">;
// VOP3 only.
-defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>;
-defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>;
-defm V_BFM_B32 : VOP3Only_Realtriple_gfx11<0x31d>;
-defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11<0x31e>;
-defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11<0x31f>;
-defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11<0x320>;
-defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">;
-defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">;
-defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11<0x323>;
-defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11<0x324>;
-defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x300>;
-defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x301>;
-defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x302>;
-
-let SubtargetPredicate = isGFX11Plus in {
+defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>;
+defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11_gfx12<0x31c>;
+defm V_BFM_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31d>;
+defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31e>;
+defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31f>;
+defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x320>;
+defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">;
+defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">;
+defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11_gfx12<0x323>;
+defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11_gfx12<0x324>;
+defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x300>;
+defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x301>;
+defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x302>;
+
+let SubtargetPredicate = isGFX11Only in {
defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>;
defm : VOP2bInstAliases<
@@ -1525,7 +1708,7 @@ let SubtargetPredicate = isGFX11Plus in {
V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">;
defm : VOP2bInstAliases<
V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">;
-} // End SubtargetPredicate = isGFX11Plus
+} // End SubtargetPredicate = isGFX11Only
//===----------------------------------------------------------------------===//
// GFX10.
@@ -1747,7 +1930,10 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> :
- VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK_gfx11<op>;
+ VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK<GFX11Gen, op>;
+
+multiclass VOP2Only_Real_MADK_gfx10_gfx11_gfx12<bits<6> op> :
+ VOP2Only_Real_MADK_gfx10_gfx11<op>, VOP2Only_Real_MADK<GFX12Gen, op>;
multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> :
VOP2be_Real_e32_gfx10<op, opName, asmName>,
@@ -1768,7 +1954,10 @@ multiclass VOP2_Real_gfx10<bits<6> op> :
VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
multiclass VOP2_Real_gfx10_gfx11<bits<6> op> :
- VOP2_Real_gfx10<op>, VOP2_Real_FULL_gfx11<op>;
+ VOP2_Real_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>;
+
+multiclass VOP2_Real_gfx10_gfx11_gfx12<bits<6> op> :
+ VOP2_Real_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>;
multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName,
string asmName> :
@@ -1778,19 +1967,20 @@ multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName,
VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
-multiclass VOP2_Real_with_name_gfx10_gfx11<bits<6> op, string opName,
- string asmName> :
+multiclass VOP2_Real_with_name_gfx10_gfx11_gfx12<bits<6> op, string opName,
+ string asmName> :
VOP2_Real_with_name_gfx10<op, opName, asmName>,
- VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>;
+ VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
+ VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
// NB: Same opcode as v_mac_legacy_f32
let DecoderNamespace = "GFX10_B" in
defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>;
-defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11<0x01e>;
-defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11<0x02b>;
-defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02c>;
-defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02d>;
+defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11_gfx12<0x01e>;
+defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11_gfx12<0x02b>;
+defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02c>;
+defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02d>;
defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
@@ -1808,11 +1998,11 @@ let IsSingle = 1 in {
// VOP2 no carry-in, carry-out.
defm V_ADD_NC_U32 :
- VOP2_Real_with_name_gfx10_gfx11<0x025, "V_ADD_U32", "v_add_nc_u32">;
+ VOP2_Real_with_name_gfx10_gfx11_gfx12<0x025, "V_ADD_U32", "v_add_nc_u32">;
defm V_SUB_NC_U32 :
- VOP2_Real_with_name_gfx10_gfx11<0x026, "V_SUB_U32", "v_sub_nc_u32">;
+ VOP2_Real_with_name_gfx10_gfx11_gfx12<0x026, "V_SUB_U32", "v_sub_nc_u32">;
defm V_SUBREV_NC_U32 :
- VOP2_Real_with_name_gfx10_gfx11<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">;
+ VOP2_Real_with_name_gfx10_gfx11_gfx12<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">;
// VOP2 carry-in, carry-out.
defm V_ADD_CO_CI_U32 :
@@ -1905,7 +2095,10 @@ multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> :
VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>;
multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> :
- VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL_gfx11<op>;
+ VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>;
+
+multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<6> op> :
+ VOP2_Real_gfx6_gfx7_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>;
multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> :
VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>;
@@ -1967,28 +2160,28 @@ let SubtargetPredicate = isGFX6GFX7 in {
def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>;
} // End SubtargetPredicate = isGFX6GFX7
-defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x003>;
-defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x004>;
-defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x005>;
+defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>;
+defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>;
+defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>;
defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
-defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x008>;
-defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x009>;
-defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00a>;
-defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00b>;
-defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00c>;
+defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>;
+defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>;
+defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>;
+defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>;
+defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>;
defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>;
defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>;
-defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x011>;
-defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
-defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
-defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x014>;
+defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x011>;
+defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x012>;
+defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x013>;
+defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>;
defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>;
defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>;
defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>;
-defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
-defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
-defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
+defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01b>;
+defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01c>;
+defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01d>;
defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>;
defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>;
defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index c0e0ac1b4ec8..eebd323210f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -144,11 +144,15 @@ defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_a
let SchedRW = [WriteDoubleAdd] in {
let FPDPRounding = 1 in {
defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, any_fma>;
+let SubtargetPredicate = isNotGFX12Plus in {
defm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd>;
defm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fmul>;
+} // End SubtargetPredicate = isNotGFX12Plus
} // End FPDPRounding = 1
+let SubtargetPredicate = isNotGFX12Plus in {
defm V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like>;
defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like>;
+} // End SubtargetPredicate = isNotGFX12Plus
} // End SchedRW = [WriteDoubleAdd]
let SchedRW = [WriteIntMul] in {
@@ -157,6 +161,19 @@ defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF<VOP_I32_I32_I32>, mulhu
defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>;
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>;
} // End SchedRW = [WriteIntMul]
+
+let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
+defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
+defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fminimum>>;
+defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fmaximum>>;
+
+let SchedRW = [WriteDoubleAdd] in {
+defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
+defm V_MAXIMUM_F64 : VOP3Inst <"v_maximum_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaximum>;
+} // End SchedRW = [WriteDoubleAdd]
+} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
+
} // End isReMaterializable = 1
let Uses = [MODE, VCC, EXEC] in {
@@ -207,6 +224,11 @@ let mayRaiseFPException = 0 in {
defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
} // End mayRaiseFPException = 0
+let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+ defm V_MINIMUM3_F32 : VOP3Inst <"v_minimum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfminimum3>;
+ defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
+} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
+
let isCommutable = 1 in {
defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
@@ -254,10 +276,13 @@ let SchedRW = [Write64Bit] in {
} // End SubtargetPredicate = isGFX6GFX7
let SubtargetPredicate = isGFX8Plus in {
- defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshl_rev_64>;
defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshr_rev_64>;
defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, cashr_rev_64>;
} // End SubtargetPredicate = isGFX8Plus
+
+ let SubtargetPredicate = isGFX8GFX9GFX10GFX11 in {
+ defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshl_rev_64>;
+ } // End SubtargetPredicate = isGFX8GFX9GFX10GFX11
} // End SchedRW = [Write64Bit]
} // End isReMaterializable = 1
@@ -515,6 +540,16 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
let HasExtVOP3DPP = 0;
}
+def IsPow2Plus1: PatLeaf<(i32 imm), [{
+ uint32_t V = N->getZExtValue();
+ return isPowerOf2_32(V - 1);
+}]>;
+
+def Log2_32: SDNodeXForm<imm, [{
+ uint32_t V = N->getZExtValue();
+ return CurDAG->getTargetConstant(Log2_32(V - 1), SDLoc(N), MVT::i32);
+}]>;
+
let SubtargetPredicate = isGFX9Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -538,6 +573,11 @@ defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3
defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
+let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+ defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
+ defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
+} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
+
defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
@@ -612,6 +652,10 @@ def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>;
def : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>;
def : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>;
+def : GCNPat<
+ (DivergentBinFrag<mul> i32:$src0, IsPow2Plus1:$src1),
+ (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;
+
let SubtargetPredicate = isGFX940Plus in
def : GCNPat<
(ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2),
@@ -664,11 +708,22 @@ multiclass IMAD32_Pats <VOP3_Pseudo inst> {
>;
}
+// Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul.
+// We need to separate this because otherwise OtherPredicates would be overriden.
+class IMAD32_Mul24_Pat<VOP3_Pseudo inst>: GCNPat <
+ (i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
+ (inst $src0, $src1, $src2, 0 /* clamp */)
+ >;
+
// exclude pre-GFX9 where it was slow
-let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in
+let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in {
defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
-let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in
+ def : IMAD32_Mul24_Pat<V_MAD_U64_U32_e64>;
+}
+let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in {
defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
+ def : IMAD32_Mul24_Pat<V_MAD_U64_U32_gfx11_e64>;
+}
def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
@@ -680,6 +735,15 @@ def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3
let HasExtDPP = 0;
}
+def VOP3_PERMLANE_VAR_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, untyped]>, VOP3_OPSEL> {
+ let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
+ IntOpSelMods:$src1_modifiers, VRegSrc_32:$src1,
+ VGPR_32:$vdst_in, op_sel0:$op_sel);
+ let HasClamp = 0;
+ let HasExtVOP3DPP = 0;
+ let HasExtDPP = 0;
+}
+
def opsel_i1timm : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(
N->getZExtValue() ? SISrcMods::OP_SEL_0 : SISrcMods::NONE,
@@ -696,6 +760,13 @@ class PermlanePat<SDPatternOperator permlane,
SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in)
>;
+class PermlaneVarPat<SDPatternOperator permlane,
+ Instruction inst> : GCNPat<
+ (permlane i32:$vdst_in, i32:$src0, i32:$src1,
+ timm:$fi, timm:$bc),
+ (inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc),
+ VGPR_32:$src1, VGPR_32:$vdst_in)
+>;
let SubtargetPredicate = isGFX10Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
@@ -726,6 +797,17 @@ let SubtargetPredicate = isGFX10Plus in {
} // End SubtargetPredicate = isGFX10Plus
+let SubtargetPredicate = isGFX12Plus in {
+ let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+ defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>;
+ defm V_PERMLANEX16_VAR_B32 : VOP3Inst<"v_permlanex16_var_b32", VOP3_PERMLANE_VAR_Profile>;
+ } // End $vdst = $vdst_in, DisableEncoding $vdst_in
+
+ def : PermlaneVarPat<int_amdgcn_permlane16_var, V_PERMLANE16_VAR_B32_e64>;
+ def : PermlaneVarPat<int_amdgcn_permlanex16_var, V_PERMLANEX16_VAR_B32_e64>;
+
+} // End SubtargetPredicate = isGFX12Plus
+
class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
(AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)),
@@ -773,11 +855,61 @@ let SubtargetPredicate = isGFX11Plus in {
defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
} // End SubtargetPredicate = isGFX11Plus
+let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+ defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+ defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+ defm V_MAXIMUMMINIMUM_F16 : VOP3Inst<"v_maximumminimum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
+ defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
+} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
+
let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>;
defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>;
}
+class VOP_Pseudo_Scalar<RegisterClass Dst, RegisterOperand SrcOp,
+ ValueType dstVt, ValueType srcVt = dstVt>
+ : VOPProfile<[dstVt, srcVt, untyped, untyped]> {
+ let DstRC = VOPDstOperand<Dst>;
+ let Src0RC64 = SrcOp;
+
+ let HasOMod = 1;
+ let HasModifiers = 1;
+}
+
+def VOP_Pseudo_Scalar_F32 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f32, f32>;
+def VOP_Pseudo_Scalar_F16 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f16, f32, f16>;
+
+let SubtargetPredicate = HasPseudoScalarTrans, TRANS = 1,
+ isReMaterializable = 1, SchedRW = [WritePseudoScalarTrans] in {
+ defm V_S_EXP_F32 : VOP3PseudoScalarInst<"v_s_exp_f32", VOP_Pseudo_Scalar_F32, AMDGPUexp>;
+ defm V_S_EXP_F16 : VOP3PseudoScalarInst<"v_s_exp_f16", VOP_Pseudo_Scalar_F16>;
+ defm V_S_LOG_F32 : VOP3PseudoScalarInst<"v_s_log_f32", VOP_Pseudo_Scalar_F32, AMDGPUlog>;
+ defm V_S_LOG_F16 : VOP3PseudoScalarInst<"v_s_log_f16", VOP_Pseudo_Scalar_F16>;
+ defm V_S_RCP_F32 : VOP3PseudoScalarInst<"v_s_rcp_f32", VOP_Pseudo_Scalar_F32, AMDGPUrcp>;
+ defm V_S_RCP_F16 : VOP3PseudoScalarInst<"v_s_rcp_f16", VOP_Pseudo_Scalar_F16>;
+ defm V_S_RSQ_F32 : VOP3PseudoScalarInst<"v_s_rsq_f32", VOP_Pseudo_Scalar_F32, AMDGPUrsq>;
+ defm V_S_RSQ_F16 : VOP3PseudoScalarInst<"v_s_rsq_f16", VOP_Pseudo_Scalar_F16>;
+ defm V_S_SQRT_F32 : VOP3PseudoScalarInst<"v_s_sqrt_f32", VOP_Pseudo_Scalar_F32, any_amdgcn_sqrt>;
+ defm V_S_SQRT_F16 : VOP3PseudoScalarInst<"v_s_sqrt_f16", VOP_Pseudo_Scalar_F16>;
+}
+
+class PseudoScalarPatF16<SDPatternOperator node, VOP3_Pseudo inst> : GCNPat <
+ (f16 (UniformUnaryFrag<node> (f16 (VOP3Mods0 f16:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)))),
+ (f16 (COPY_TO_REGCLASS (f32 (inst i32:$src0_modifiers, f16:$src0, i1:$clamp,
+ i32:$omod)),
+ SReg_32_XEXEC))
+>;
+
+let SubtargetPredicate = HasPseudoScalarTrans in {
+ def : PseudoScalarPatF16<AMDGPUexpf16, V_S_EXP_F16_e64>;
+ def : PseudoScalarPatF16<AMDGPUlogf16, V_S_LOG_F16_e64>;
+ def : PseudoScalarPatF16<AMDGPUrcp, V_S_RCP_F16_e64>;
+ def : PseudoScalarPatF16<AMDGPUrsq, V_S_RSQ_F16_e64>;
+ def : PseudoScalarPatF16<any_amdgcn_sqrt, V_S_SQRT_F16_e64>;
+}
+
//===----------------------------------------------------------------------===//
// Integer Clamp Patterns
//===----------------------------------------------------------------------===//
@@ -823,125 +955,195 @@ def : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>;
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// GFX11.
+// GFX12.
+//===----------------------------------------------------------------------===//
+
+defm V_MIN3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x229, "V_MIN3_F32", "v_min3_num_f32">;
+defm V_MAX3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x22a, "V_MAX3_F32", "v_max3_num_f32">;
+defm V_MIN3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22b, "V_MIN3_F16", "v_min3_num_f16">;
+defm V_MAX3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22c, "V_MAX3_F16", "v_max3_num_f16">;
+defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>;
+defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
+defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>;
+defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>;
+defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
+defm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x232, "V_MED3_F16", "v_med3_num_f16">;
+defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
+defm V_MAXMIN_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x269, "V_MAXMIN_F32", "v_maxmin_num_f32">;
+defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16", "v_minmax_num_f16">;
+defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26b, "V_MAXMIN_F16", "v_maxmin_num_f16">;
+defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
+defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
+defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>;
+defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26f>;
+defm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>;
+defm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>;
+defm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>;
+defm V_S_LOG_F16 : VOP3Only_Real_Base_gfx12<0x283>;
+defm V_S_RCP_F32 : VOP3Only_Real_Base_gfx12<0x284>;
+defm V_S_RCP_F16 : VOP3Only_Real_Base_gfx12<0x285>;
+defm V_S_RSQ_F32 : VOP3Only_Real_Base_gfx12<0x286>;
+defm V_S_RSQ_F16 : VOP3Only_Real_Base_gfx12<0x287>;
+defm V_S_SQRT_F32 : VOP3Only_Real_Base_gfx12<0x288>;
+defm V_S_SQRT_F16 : VOP3Only_Real_Base_gfx12<0x289>;
+defm V_MAD_CO_U64_U32 : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">;
+defm V_MAD_CO_I64_I32 : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">;
+defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
+defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
+defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
+defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
+defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x367>;
+defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>;
+
+defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
+defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
+
+//===----------------------------------------------------------------------===//
+// GFX11, GFX12
//===----------------------------------------------------------------------===//
-defm V_FMA_DX9_ZERO_F32 : VOP3_Real_with_name_gfx11<0x209, "V_FMA_LEGACY_F32", "v_fma_dx9_zero_f32">;
-defm V_MAD_I32_I24 : VOP3_Realtriple_gfx11<0x20a>;
-defm V_MAD_U32_U24 : VOP3_Realtriple_gfx11<0x20b>;
-defm V_CUBEID_F32 : VOP3_Realtriple_gfx11<0x20c>;
-defm V_CUBESC_F32 : VOP3_Realtriple_gfx11<0x20d>;
-defm V_CUBETC_F32 : VOP3_Realtriple_gfx11<0x20e>;
-defm V_CUBEMA_F32 : VOP3_Realtriple_gfx11<0x20f>;
-defm V_BFE_U32 : VOP3_Realtriple_gfx11<0x210>;
-defm V_BFE_I32 : VOP3_Realtriple_gfx11<0x211>;
-defm V_BFI_B32 : VOP3_Realtriple_gfx11<0x212>;
-defm V_FMA_F32 : VOP3_Realtriple_gfx11<0x213>;
-defm V_FMA_F64 : VOP3_Real_Base_gfx11<0x214>;
-defm V_LERP_U8 : VOP3_Realtriple_gfx11<0x215>;
-defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11<0x216>;
-defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11<0x217>;
-defm V_MULLIT_F32 : VOP3_Realtriple_gfx11<0x218>;
+multiclass VOP3_Real_with_name_gfx11_gfx12<bits<10> op, string opName,
+ string asmName> :
+ VOP3_Real_with_name<GFX11Gen, op, opName, asmName>,
+ VOP3_Real_with_name<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP3_Realtriple_gfx11_gfx12<bits<10> op> :
+ VOP3_Realtriple<GFX11Gen, op>, VOP3_Realtriple<GFX12Gen, op>;
+
+multiclass VOP3_Real_Base_gfx11_gfx12<bits<10> op> :
+ VOP3_Real_Base<GFX11Gen, op>, VOP3_Real_Base<GFX12Gen, op>;
+
+multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
+ string asmName> :
+ VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
+ VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
+
+multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
+ VOP3be_Real<GFX11Gen, op, opName, asmName>,
+ VOP3be_Real<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP3_Real_No_Suffix_gfx11_gfx12<bits<10> op> :
+ VOP3_Real_No_Suffix<GFX11Gen, op>, VOP3_Real_No_Suffix<GFX12Gen, op>;
+
+defm V_FMA_DX9_ZERO_F32 : VOP3_Real_with_name_gfx11_gfx12<0x209, "V_FMA_LEGACY_F32", "v_fma_dx9_zero_f32">;
+defm V_MAD_I32_I24 : VOP3_Realtriple_gfx11_gfx12<0x20a>;
+defm V_MAD_U32_U24 : VOP3_Realtriple_gfx11_gfx12<0x20b>;
+defm V_CUBEID_F32 : VOP3_Realtriple_gfx11_gfx12<0x20c>;
+defm V_CUBESC_F32 : VOP3_Realtriple_gfx11_gfx12<0x20d>;
+defm V_CUBETC_F32 : VOP3_Realtriple_gfx11_gfx12<0x20e>;
+defm V_CUBEMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x20f>;
+defm V_BFE_U32 : VOP3_Realtriple_gfx11_gfx12<0x210>;
+defm V_BFE_I32 : VOP3_Realtriple_gfx11_gfx12<0x211>;
+defm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>;
+defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>;
+defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>;
+defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>;
+defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11_gfx12<0x216>;
+defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>;
+defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>;
defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>;
-defm V_MIN3_I32 : VOP3_Realtriple_gfx11<0x21a>;
-defm V_MIN3_U32 : VOP3_Realtriple_gfx11<0x21b>;
+defm V_MIN3_I32 : VOP3_Realtriple_gfx11_gfx12<0x21a>;
+defm V_MIN3_U32 : VOP3_Realtriple_gfx11_gfx12<0x21b>;
defm V_MAX3_F32 : VOP3_Realtriple_gfx11<0x21c>;
-defm V_MAX3_I32 : VOP3_Realtriple_gfx11<0x21d>;
-defm V_MAX3_U32 : VOP3_Realtriple_gfx11<0x21e>;
+defm V_MAX3_I32 : VOP3_Realtriple_gfx11_gfx12<0x21d>;
+defm V_MAX3_U32 : VOP3_Realtriple_gfx11_gfx12<0x21e>;
defm V_MED3_F32 : VOP3_Realtriple_gfx11<0x21f>;
-defm V_MED3_I32 : VOP3_Realtriple_gfx11<0x220>;
-defm V_MED3_U32 : VOP3_Realtriple_gfx11<0x221>;
-defm V_SAD_U8 : VOP3_Realtriple_gfx11<0x222>;
-defm V_SAD_HI_U8 : VOP3_Realtriple_gfx11<0x223>;
-defm V_SAD_U16 : VOP3_Realtriple_gfx11<0x224>;
-defm V_SAD_U32 : VOP3_Realtriple_gfx11<0x225>;
-defm V_CVT_PK_U8_F32 : VOP3_Realtriple_gfx11<0x226>;
-defm V_DIV_FIXUP_F32 : VOP3_Real_Base_gfx11<0x227>;
-defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11<0x228>;
-defm V_DIV_FMAS_F32 : VOP3_Real_Base_gfx11<0x237>;
-defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11<0x238>;
-defm V_MSAD_U8 : VOP3_Realtriple_gfx11<0x239>;
-defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11<0x23a>;
-defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11<0x23b>;
-defm V_MQSAD_U32_U8 : VOP3_Real_Base_gfx11<0x23d>;
-defm V_XOR3_B32 : VOP3_Realtriple_gfx11<0x240>;
-defm V_MAD_U16 : VOP3_Realtriple_with_name_gfx11<0x241, "V_MAD_U16_gfx9", "v_mad_u16">;
-defm V_PERM_B32 : VOP3_Realtriple_gfx11<0x244>;
-defm V_XAD_U32 : VOP3_Realtriple_gfx11<0x245>;
-defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11<0x246>;
-defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11<0x247>;
-defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
+defm V_MED3_I32 : VOP3_Realtriple_gfx11_gfx12<0x220>;
+defm V_MED3_U32 : VOP3_Realtriple_gfx11_gfx12<0x221>;
+defm V_SAD_U8 : VOP3_Realtriple_gfx11_gfx12<0x222>;
+defm V_SAD_HI_U8 : VOP3_Realtriple_gfx11_gfx12<0x223>;
+defm V_SAD_U16 : VOP3_Realtriple_gfx11_gfx12<0x224>;
+defm V_SAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x225>;
+defm V_CVT_PK_U8_F32 : VOP3_Realtriple_gfx11_gfx12<0x226>;
+defm V_DIV_FIXUP_F32 : VOP3_Real_Base_gfx11_gfx12<0x227>;
+defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12<0x228>;
+defm V_DIV_FMAS_F32 : VOP3_Real_Base_gfx11_gfx12<0x237>;
+defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12<0x238>;
+defm V_MSAD_U8 : VOP3_Realtriple_gfx11_gfx12<0x239>;
+defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23a>;
+defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23b>;
+defm V_MQSAD_U32_U8 : VOP3_Real_Base_gfx11_gfx12<0x23d>;
+defm V_XOR3_B32 : VOP3_Realtriple_gfx11_gfx12<0x240>;
+defm V_MAD_U16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x241, "V_MAD_U16_gfx9", "v_mad_u16">;
+defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>;
+defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>;
+defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>;
+defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>;
+defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
defm V_MIN3_F16 : VOP3_Realtriple_gfx11<0x249>;
-defm V_MIN3_I16 : VOP3_Realtriple_gfx11<0x24a>;
-defm V_MIN3_U16 : VOP3_Realtriple_gfx11<0x24b>;
+defm V_MIN3_I16 : VOP3_Realtriple_gfx11_gfx12<0x24a>;
+defm V_MIN3_U16 : VOP3_Realtriple_gfx11_gfx12<0x24b>;
defm V_MAX3_F16 : VOP3_Realtriple_gfx11<0x24c>;
-defm V_MAX3_I16 : VOP3_Realtriple_gfx11<0x24d>;
-defm V_MAX3_U16 : VOP3_Realtriple_gfx11<0x24e>;
+defm V_MAX3_I16 : VOP3_Realtriple_gfx11_gfx12<0x24d>;
+defm V_MAX3_U16 : VOP3_Realtriple_gfx11_gfx12<0x24e>;
defm V_MED3_F16 : VOP3_Realtriple_gfx11<0x24f>;
-defm V_MED3_I16 : VOP3_Realtriple_gfx11<0x250>;
-defm V_MED3_U16 : VOP3_Realtriple_gfx11<0x251>;
-defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11<0x253, "V_MAD_I16_gfx9", "v_mad_i16">;
-defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
-defm V_ADD3_U32 : VOP3_Realtriple_gfx11<0x255>;
-defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11<0x256>;
-defm V_AND_OR_B32 : VOP3_Realtriple_gfx11<0x257>;
-defm V_OR3_B32 : VOP3_Realtriple_gfx11<0x258>;
-defm V_MAD_U32_U16 : VOP3_Realtriple_gfx11<0x259>;
-defm V_MAD_I32_I16 : VOP3_Realtriple_gfx11<0x25a>;
-defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11<0x25b>;
-defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11<0x25c>;
+defm V_MED3_I16 : VOP3_Realtriple_gfx11_gfx12<0x250>;
+defm V_MED3_U16 : VOP3_Realtriple_gfx11_gfx12<0x251>;
+defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x253, "V_MAD_I16_gfx9", "v_mad_i16">;
+defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
+defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
+defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;
+defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>;
+defm V_OR3_B32 : VOP3_Realtriple_gfx11_gfx12<0x258>;
+defm V_MAD_U32_U16 : VOP3_Realtriple_gfx11_gfx12<0x259>;
+defm V_MAD_I32_I16 : VOP3_Realtriple_gfx11_gfx12<0x25a>;
+defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>;
+defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>;
defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>;
defm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>;
defm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>;
defm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>;
-defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11<0x262>;
-defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>;
-defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>;
-defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>;
-defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11<0x266>;
-defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11<0x267>;
-defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
-defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
+defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>;
+defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>;
+defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>;
+defm V_MINMAX_I32 : VOP3_Realtriple_gfx11_gfx12<0x265>;
+defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11_gfx12<0x266>;
+defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11_gfx12<0x267>;
+defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
+defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
-defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11<0x303>;
-defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11<0x304>;
-defm V_MUL_LO_U16_t16 : VOP3Only_Realtriple_t16_gfx11<0x305, "v_mul_lo_u16">;
-defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11<0x306>;
-defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11<0x307>;
-defm V_MAX_U16_t16 : VOP3Only_Realtriple_t16_gfx11<0x309, "v_max_u16">;
-defm V_MAX_I16_t16 : VOP3Only_Realtriple_t16_gfx11<0x30a, "v_max_i16">;
-defm V_MIN_U16_t16 : VOP3Only_Realtriple_t16_gfx11<0x30b, "v_min_u16">;
-defm V_MIN_I16_t16 : VOP3Only_Realtriple_t16_gfx11<0x30c, "v_min_i16">;
-defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30d, "V_ADD_I16", "v_add_nc_i16">;
-defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
-defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11<0x311>;
-defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
-defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
-defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11<0x325, "V_SUB_I32", "v_sub_nc_i32">;
-defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11<0x326, "V_ADD_I32", "v_add_nc_i32">;
+defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x303>;
+defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x304>;
+defm V_MUL_LO_U16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
+defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>;
+defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>;
+defm V_MAX_U16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x309, "v_max_u16">;
+defm V_MAX_I16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x30a, "v_max_i16">;
+defm V_MIN_U16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x30b, "v_min_u16">;
+defm V_MIN_I16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x30c, "v_min_i16">;
+defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
+defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
+defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
+defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
+defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
+defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">;
+defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x326, "V_ADD_I32", "v_add_nc_i32">;
defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>;
defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>;
defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>;
defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>;
-defm V_LDEXP_F64 : VOP3_Real_Base_gfx11<0x32b>;
-defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11<0x32c>;
-defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11<0x32d>;
-defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11<0x32e>;
-defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11<0x32f>;
-defm V_LSHLREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x338, "v_lshlrev_b16">;
-defm V_LSHRREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x339, "v_lshrrev_b16">;
-defm V_ASHRREV_I16_t16 : VOP3Only_Realtriple_t16_gfx11<0x33a, "v_ashrrev_i16">;
+defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32b>;
+defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12<0x32c>;
+defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12<0x32d>;
+defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12<0x32e>;
+defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32f>;
+defm V_LSHLREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x338, "v_lshlrev_b16">;
+defm V_LSHRREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x339, "v_lshrrev_b16">;
+defm V_ASHRREV_I16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x33a, "v_ashrrev_i16">;
defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>;
-defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11<0x33d>;
-defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11<0x33e>;
-defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11<0x360>; // Pseudo in VOP2
+defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12<0x33d>;
+defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12<0x33e>;
+defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x360>; // Pseudo in VOP2
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
- defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11<0x361>; // Pseudo in VOP2
+ defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2
} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
-defm V_AND_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x362, "v_and_b16">;
-defm V_OR_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x363, "v_or_b16">;
-defm V_XOR_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x364, "v_xor_b16">;
+defm V_AND_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x362, "v_and_b16">;
+defm V_OR_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x363, "v_or_b16">;
+defm V_XOR_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x364, "v_xor_b16">;
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 71e09611e74e..d3cefb339d9e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -108,6 +108,11 @@ defm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I1
defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umin>;
defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
+
+let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+defm V_PK_MAXIMUM_F16 : VOP3PInst<"v_pk_maximum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fmaximum>;
+defm V_PK_MINIMUM_F16 : VOP3PInst<"v_pk_minimum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fminimum>;
+} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
}
defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>;
@@ -353,56 +358,51 @@ foreach Type = ["I", "U"] in
(!cast<Extract>(Type#Index#"_4bit") node:$src1))>;
}
-class UDot2Pat<Instruction Inst> : GCNPat <
+class UDot2Pat<VOP_Pseudo Inst> : GCNPat <
(add (add_oneuse (AMDGPUmul_u24_oneuse (srl i32:$src0, (i32 16)),
(srl i32:$src1, (i32 16))), i32:$src2),
(AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),
(and i32:$src1, (i32 65535)))
),
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
- let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
+ let Predicates = Inst.Predicates;
}
-class SDot2Pat<Instruction Inst> : GCNPat <
+class SDot2Pat<VOP_Pseudo Inst> : GCNPat <
(add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),
(sra i32:$src1, (i32 16))), i32:$src2),
(AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
(sext_inreg i32:$src1, i16))),
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
- let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
+ let Predicates = Inst.Predicates;
}
let IsDOT = 1 in {
-let SubtargetPredicate = HasDot2Insts in {
-
+let OtherPredicates = [HasDot2Insts] in {
defm V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16",
VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>;
defm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>;
+} // End OtherPredicates = [HasDot2Insts]
-} // End SubtargetPredicate = HasDot2Insts
-
-let SubtargetPredicate = HasDot10Insts in
+let OtherPredicates = [HasDot10Insts] in
defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR, /*HasDPP*/ 1>,
AMDGPUfdot2, 1/*ExplicitClamp*/>;
-let SubtargetPredicate = HasDot7Insts in {
+let OtherPredicates = [HasDot7Insts] in {
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
+} // End OtherPredicates = [HasDot7Insts]
-} // End SubtargetPredicate = HasDot7Insts
-
-let SubtargetPredicate = HasDot1Insts in {
-
+let OtherPredicates = [HasDot1Insts] in {
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
-
-} // End SubtargetPredicate = HasDot1Insts
+} // End OtherPredicates = [HasDot1Insts]
def DOT2_BF16_Profile
: VOP3P_Profile<VOP_F32_V2I16_V2I16_F32, VOP3_REGULAR, /*HasDPP*/ 1> {
@@ -436,20 +436,34 @@ multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
let SubtargetPredicate = HasDot8Insts in {
defm V_DOT4_I32_IU8 : VOP3PDOTIUInst<"v_dot4_i32_iu8", int_amdgcn_sudot4>;
defm V_DOT8_I32_IU4 : VOP3PDOTIUInst<"v_dot8_i32_iu4", int_amdgcn_sudot8>;
+
+def : GCNPat < (int_amdgcn_sdot8 i32:$src0,
+ i32:$src1,
+ i32:$src2, (i1 timm:$clamp)),
+ (V_DOT8_I32_IU4 (i32 9), i32:$src0,
+ (i32 9), i32:$src1, (i32 8), i32:$src2, i1:$clamp)
+>;
+
+def : GCNPat < (int_amdgcn_sdot4 i32:$src0,
+ i32:$src1,
+ i32:$src2, (i1 timm:$clamp)),
+ (V_DOT4_I32_IU8 (i32 9), i32:$src0,
+ (i32 9), i32:$src1, (i32 8), i32:$src2, i1:$clamp)
+>;
} // End SubtargetPredicate = HasDot8Insts
def : UDot2Pat<V_DOT2_U32_U16>;
def : SDot2Pat<V_DOT2_I32_I16>;
foreach Type = ["U", "I"] in
- let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in
+ let Predicates = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).Predicates in
def : GCNPat <
!cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
(add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
(!cast<VOP3P_Pseudo>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
foreach Type = ["U", "I"] in
- let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
+ let Predicates = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).Predicates in
def : GCNPat <
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
[1, 2, 3, 4, 5, 6, 7], lhs, y,
@@ -459,7 +473,7 @@ foreach Type = ["U", "I"] in
// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
// in the compile time. Directly handle the pattern generated by the FE here.
foreach Type = ["U", "I"] in
- let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
+ let Predicates = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).Predicates in
def : GCNPat <
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
[7, 1, 2, 3, 4, 5, 6], lhs, y,
@@ -596,7 +610,7 @@ let GISelPredicateCode = [{ return !MF.getInfo<SIMachineFunctionInfo>()->mayNeed
class VgprMAIFrag<SDPatternOperator Op> :
MAIFrag<Op, [{ return !MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); }]>;
-let Predicates = [HasMAIInsts] in {
+let SubtargetPredicate = HasMAIInsts in {
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
@@ -687,7 +701,7 @@ let Predicates = [isGFX90APlus] in {
}
} // End Predicates = [isGFX90APlus]
-let Predicates = [isGFX940Plus], is_gfx940_xdl = 1 in {
+let SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in {
defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>;
defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>;
defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>;
@@ -700,7 +714,7 @@ let Predicates = [isGFX940Plus], is_gfx940_xdl = 1 in {
defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>;
defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>;
defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>;
-} // End Predicates = [isGFX940Plus], is_gfx940_xdl = 1
+} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1
multiclass SMFMACInst<string OpName, string P, SDPatternOperator node> {
let Constraints = "$vdst = $src2", DisableEncoding = "$src2",
@@ -737,12 +751,16 @@ def MAIInstInfoTable : GenericTable {
let PrimaryKeyName = "getMAIInstInfoHelper";
}
-let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1, isReMaterializable = 1 in {
- defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fma>;
- defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fmul>;
- defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>;
+let isCommutable = 1, isReMaterializable = 1 in {
+ let SubtargetPredicate = HasPackedFP32Ops in {
+ defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fma>;
+ defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fmul>;
+ defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>;
+ } // End SubtargetPredicate = HasPackedFP32Ops
+
+ let SubtargetPredicate = HasPkMovB32 in
defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>;
-} // End SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1
+} // End isCommutable = 1, isReMaterializable = 1
def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">;
def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;
@@ -847,34 +865,25 @@ def WMMAOpcode3AddrMappingTable : WMMAMappingTable {
// it converts the default pseudo to the pseudo where src2 is not the same as vdst.
// 3) @earlyclobber on the destination satisfies the constraint during RA.
-multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator node = null_frag, RegisterOperand _Src01RC64 = VRegSrc_256, WMMAType Type> {
+multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator node = null_frag, RegisterOperand _Src01RC64 = VRegSrc_256, WMMAType Type, bit convertibleTo3Addr> {
defvar WMMAConstraints2Addr = "@earlyclobber $vdst,$vdst = $src2";
defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
defvar WMMAProfile = VOPProfileWMMA<P, Suffix, _Src01RC64, Type.hasClamp, Type.hasOpsel>;
- if !eq(Suffix, "_w32") then {
- let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
- let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in {
- def _twoaddr_w32 : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
- }
- let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in {
- def _threeaddr_w32 : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
- }
+ let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+ let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in {
+ def _twoaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
}
- def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr_w32),
- !cast<Instruction>(NAME # _threeaddr_w32)>;
- } else if !eq(Suffix, "_w64") then {
+ }
+ if convertibleTo3Addr then {
let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
- let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in {
- def _twoaddr_w64 : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
- }
let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in {
- def _threeaddr_w64 : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
+ def _threeaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
}
}
- def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr_w64),
- !cast<Instruction>(NAME # _threeaddr_w64)>;
+ def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr # Suffix),
+ !cast<Instruction>(NAME # _threeaddr # Suffix)>;
}
if !eq(Type, WMMAOpSel) then {
@@ -888,21 +897,25 @@ multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator
let WaveSizePredicate = isWave32 in {
- defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_f16", VOP_V8F32_V16F16_V16F16_V8F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular>;
- defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular>;
- defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel>;
- defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel>;
- defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu8", VOP_V8I32_V4I32_V4I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp>;
- defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu4", VOP_V8I32_V2I32_V2I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp>;
+ defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_f16", VOP_V8F32_V16F16_V16F16_V8F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>;
+ defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>;
+ defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel, 1>;
+ defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel, 1>;
+ defm V_WMMA_F16_16X16X16_F16_TIED : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16_tied, VRegSrc_256, WMMAOpSel, 0>;
+ defm V_WMMA_BF16_16X16X16_BF16_TIED : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16_tied, VRegSrc_256, WMMAOpSel, 0>;
+ defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu8", VOP_V8I32_V4I32_V4I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp, 1>;
+ defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu4", VOP_V8I32_V2I32_V2I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp, 1>;
}
let WaveSizePredicate = isWave64 in {
- defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_f16", VOP_V4F32_V16F16_V16F16_V4F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular>;
- defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_bf16", VOP_V4F32_V16I16_V16I16_V4F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular>;
- defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel>;
- defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel>;
- defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu8", VOP_V4I32_V4I32_V4I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp>;
- defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu4", VOP_V4I32_V2I32_V2I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp>;
+ defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_f16", VOP_V4F32_V16F16_V16F16_V4F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>;
+ defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_bf16", VOP_V4F32_V16I16_V16I16_V4F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>;
+ defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel, 1>;
+ defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel, 1>;
+ defm V_WMMA_F16_16X16X16_F16_TIED : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16_tied, VRegSrc_256, WMMAOpSel, 0>;
+ defm V_WMMA_BF16_16X16X16_BF16_TIED : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16_tied, VRegSrc_256, WMMAOpSel, 0>;
+ defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu8", VOP_V4I32_V4I32_V4I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp, 1>;
+ defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu4", VOP_V4I32_V2I32_V2I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp, 1>;
}
@@ -932,56 +945,89 @@ class VOP3P_DPP8_Base<bits<7> op, VOP_Pseudo ps, string opName = ps.OpName>
}
//===----------------------------------------------------------------------===//
-// GFX11.
+// GFX11, GFX12
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX11Plus,
- DecoderNamespace = "GFX11" in {
+multiclass VOP3P_Real_Base<GFXGen Gen, bits<7> op, string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
+ def Gen.Suffix :
+ VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>,
+ VOP3Pe_gfx11_gfx12<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>;
+}
- multiclass VOP3P_Real_gfx11<bits<7> op, string backing_ps_name = NAME,
- string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
- def _gfx11 : VOP3P_Real<!cast<VOP3P_Pseudo>(backing_ps_name),
- SIEncodingFamily.GFX11, asmName>,
- VOP3Pe_gfx11<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>;
- }
+multiclass VOP3P_Real_with_name<GFXGen Gen, bits<7> op,
+ string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
+ defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
+ let AsmString = asmName # ps.AsmOperands in
+ def Gen.Suffix :
+ VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>,
+ VOP3Pe_gfx11_gfx12<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>,
+ MnemonicAlias<ps.Mnemonic, asmName>, Requires<[Gen.AssemblerPredicate]>;
+}
- multiclass VOP3P_Real_dpp_gfx11<bits<7> op, string backing_ps_name = NAME,
- string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
- defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
- def _dpp_gfx11
- : VOP3P_DPP16<op, !cast<VOP_DPP_Pseudo>(backing_ps_name #"_dpp"),
- SIEncodingFamily.GFX11> {
- let AsmString = asmName #ps.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPPGFX11";
- }
+multiclass VOP3P_Real_dpp<GFXGen Gen, bits<7> op, string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
+ defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
+ def _dpp#Gen.Suffix
+ : VOP3P_DPP16<op, !cast<VOP_DPP_Pseudo>(backing_ps_name #"_dpp"),
+ Gen.Subtarget> {
+ let AsmString = asmName #ps.Pfl.AsmVOP3DPP16;
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
}
+}
- multiclass VOP3P_Real_dpp8_gfx11<bits<7> op, string backing_ps_name = NAME,
- string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
- defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
- def _dpp8_gfx11 : VOP3P_DPP8_Base<op, ps> {
- let AsmString = asmName #ps.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8GFX11";
- }
+multiclass VOP3P_Real_dpp8<GFXGen Gen, bits<7> op, string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
+ defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
+ def _dpp8#Gen.Suffix : VOP3P_DPP8_Base<op, ps> {
+ let AsmString = asmName #ps.Pfl.AsmVOP3DPP8;
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
}
+}
- multiclass VOP3P_Realtriple_gfx11<bits<7> op, string backing_ps_name = NAME,
- string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic>
- : VOP3P_Real_gfx11<op, backing_ps_name, asmName>,
- VOP3P_Real_dpp_gfx11<op, backing_ps_name, asmName>,
- VOP3P_Real_dpp8_gfx11<op, backing_ps_name, asmName>;
-} // End AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11"
+multiclass VOP3P_Realtriple<GFXGen Gen, bits<7> op, string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic>
+ : VOP3P_Real_Base<Gen, op, backing_ps_name, asmName>,
+ VOP3P_Real_dpp<Gen, op, backing_ps_name, asmName>,
+ VOP3P_Real_dpp8<Gen, op, backing_ps_name, asmName>;
-defm V_DOT4_I32_IU8 : VOP3P_Real_gfx11 <0x16>;
-defm V_DOT8_I32_IU4 : VOP3P_Real_gfx11 <0x18>;
-defm V_DOT2_F32_BF16 : VOP3P_Real_gfx11 <0x1a>;
+//===----------------------------------------------------------------------===//
+// GFX12
+//===----------------------------------------------------------------------===//
+
+multiclass VOP3P_Real_gfx12<bits<7> op> : VOP3P_Real_Base<GFX12Gen, op>;
+
+multiclass VOP3P_Real_with_name_gfx12<bits<7> op,
+ string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> :
+ VOP3P_Real_with_name<GFX12Gen, op, backing_ps_name, asmName>;
+
+defm V_PK_MIN_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1b, "V_PK_MIN_F16", "v_pk_min_num_f16">;
+defm V_PK_MAX_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1c, "V_PK_MAX_F16", "v_pk_max_num_f16">;
+
+defm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>;
+defm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>;
+
+//===----------------------------------------------------------------------===//
+// GFX11
+//===----------------------------------------------------------------------===//
+
+multiclass VOP3P_Real_gfx11_gfx12<bits<7> op> :
+ VOP3P_Real_Base<GFX11Gen, op>, VOP3P_Real_Base<GFX12Gen, op>;
+
+defm V_DOT4_I32_IU8 : VOP3P_Real_gfx11_gfx12<0x16>;
+defm V_DOT8_I32_IU4 : VOP3P_Real_gfx11_gfx12<0x18>;
+defm V_DOT2_F32_BF16 : VOP3P_Real_gfx11_gfx12<0x1a>;
multiclass VOP3P_Real_WMMA <bits<7> op> {
let WaveSizePredicate = isWave32, DecoderNamespace = "GFX11" in {
- defm _twoaddr_w32 : VOP3P_Real_gfx11 <op>;
+ defm _twoaddr_w32 : VOP3P_Real_Base <GFX11Gen, op>;
}
let WaveSizePredicate = isWave64, DecoderNamespace = "WMMAGFX11" in {
- defm _twoaddr_w64 : VOP3P_Real_gfx11 <op>;
+ defm _twoaddr_w64 : VOP3P_Real_Base <GFX11Gen, op>;
}
}
@@ -1034,25 +1080,23 @@ multiclass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(Op # "_vgprcd" # "_e64"),
VOPProfile Pfl_ACD = PS_ACD.Pfl,
VOPProfile Pfl_VCD = PS_VCD.Pfl> {
- let Predicates = [isGFX940Plus] in {
- if !ne(NameFrom, NameTo) then {
- def : InstAlias <NameTo # " " # PS_ACD.AsmOperands,
- (!cast<VOP3P_Real>(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst,
- Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2,
- cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl;
- def : InstAlias <NameTo # " " # PS_VCD.AsmOperands,
- (!cast<VOP3P_Real>(Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst,
- Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2,
- cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl;
- }
- } // End Predicates = [isGFX940Plus]
+ if !ne(NameFrom, NameTo) then {
+ def : InstAlias <NameTo # " " # PS_ACD.AsmOperands,
+ (!cast<VOP3P_Real>(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst,
+ Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2,
+ cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl;
+ def : InstAlias <NameTo # " " # PS_VCD.AsmOperands,
+ (!cast<VOP3P_Real>(Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst,
+ Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2,
+ cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl;
+ }
}
multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
let SubtargetPredicate = isGFX940Plus,
- AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940",
+ DecoderNamespace = "GFX940",
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
VOP3Pe_MAI <op, PS_ACD.Pfl, 1>;
@@ -1061,23 +1105,32 @@ multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(N
VOP3Pe_MAI <op, PS_VCD.Pfl, 0>;
} // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940"
- defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>;
+ let SubtargetPredicate = isGFX940Plus in {
+ defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>;
- if !ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic) then
- defm : VOP3P_Real_MFMA_gfx940_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>;
+ if !ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic) then
+ defm : VOP3P_Real_MFMA_gfx940_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>;
+ }
}
-multiclass VOP3P_Real_MFMA<bits<7> op, string GFX940Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
- VOP3P_Real_MFMA_gfx90a <op>,
- VOP3P_Real_MFMA_gfx940 <op, GFX940Name> {
+multiclass VOP3P_Real_MFMA_vi<bits<7> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> {
+ let SubtargetPredicate = isGFX8GFX9NotGFX90A;
let AssemblerPredicate = HasMAIInsts;
let DecoderNamespace = "GFX8";
let Constraints = "";
}
}
+multiclass VOP3P_Real_MFMA_vi_gfx90a<bits<7> op> :
+ VOP3P_Real_MFMA_gfx90a <op>,
+ VOP3P_Real_MFMA_vi <op>;
+
+multiclass VOP3P_Real_MFMA<bits<7> op, string GFX940Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
+ VOP3P_Real_MFMA_vi_gfx90a <op>,
+ VOP3P_Real_MFMA_gfx940 <op, GFX940Name>;
+
multiclass VOP3P_Real_SMFMAC<bits<7> op, string alias> {
def _gfx940 : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
VOP3Pe_SMFMAC <op> {
@@ -1087,6 +1140,7 @@ multiclass VOP3P_Real_SMFMAC<bits<7> op, string alias> {
def : MnemonicAlias<alias, !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic>;
}
+let SubtargetPredicate = isGFX8GFX9 in {
defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>;
defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>;
defm V_PK_ADD_I16 : VOP3P_Real_vi <0x02>;
@@ -1108,15 +1162,14 @@ defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>;
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>;
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>;
-
-let SubtargetPredicate = HasMadMixInsts in {
+let OtherPredicates = [HasMadMixInsts] in {
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>;
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>;
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>;
}
-let SubtargetPredicate = HasFmaMixInsts in {
-let DecoderNamespace = "GFX9_DL" in {
+let OtherPredicates = [HasFmaMixInsts],
+ DecoderNamespace = "GFX9_DL" in {
// The mad_mix instructions were renamed and their behaviors changed,
// but the opcode stayed the same so we need to put these in a
// different DecoderNamespace to avoid the ambiguity.
@@ -1124,8 +1177,6 @@ defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>;
}
-}
-
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
@@ -1136,8 +1187,9 @@ defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
+} // End SubtargetPredicate = isGFX8GFX9
-let SubtargetPredicate = HasMAIInsts in {
+let OtherPredicates = [HasMAIInsts] in {
defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>;
defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>;
@@ -1155,17 +1207,15 @@ defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50, "v_mfma_i32_32x32x4_2b_i8">
defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51, "v_mfma_i32_16x16x4_4b_i8">;
defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52, "v_mfma_i32_4x4x4_16b_i8">;
-let SubtargetPredicate = isGFX908orGFX90A in {
-defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA <0x55>;
-defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA <0x54>;
-defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA <0x68>;
-defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA <0x69>;
-defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA <0x6b>;
-defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA <0x6c>;
-defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA <0x6d>;
-}
+defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA_vi_gfx90a <0x55>;
+defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA_vi_gfx90a <0x54>;
+defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x68>;
+defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x69>;
+defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6b>;
+defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6c>;
+defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6d>;
-} // End SubtargetPredicate = HasMAIInsts
+} // End OtherPredicates = [HasMAIInsts]
defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x63>;
defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x64>;
@@ -1212,12 +1262,10 @@ defm V_SMFMAC_F32_32X32X32_BF8_FP8 : VOP3P_Real_SMFMAC <0x7d, "v_smfmac_f32_32x3
defm V_SMFMAC_F32_32X32X32_FP8_BF8 : VOP3P_Real_SMFMAC <0x7e, "v_smfmac_f32_32x32x32fp8bf8">;
defm V_SMFMAC_F32_32X32X32_FP8_FP8 : VOP3P_Real_SMFMAC <0x7f, "v_smfmac_f32_32x32x32fp8fp8">;
-let SubtargetPredicate = HasPackedFP32Ops in {
- defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>;
- defm V_PK_MUL_F32 : VOP3P_Real_vi <0x31>;
- defm V_PK_ADD_F32 : VOP3P_Real_vi <0x32>;
- defm V_PK_MOV_B32 : VOP3P_Real_vi <0x33>;
-} // End SubtargetPredicate = HasPackedFP32Ops
+defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>;
+defm V_PK_MUL_F32 : VOP3P_Real_vi <0x31>;
+defm V_PK_ADD_F32 : VOP3P_Real_vi <0x32>;
+defm V_PK_MOV_B32 : VOP3P_Real_vi <0x33>;
//===----------------------------------------------------------------------===//
// GFX10.
@@ -1230,41 +1278,45 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1 in {
}
} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1
-multiclass VOP3P_Real_gfx10_gfx11<bits<7> op>
- : VOP3P_Real_gfx10<op>, VOP3P_Real_gfx11<op>;
-
-multiclass VOP3P_Real_gfx10_gfx11_Triple<bits<7> op>
- : VOP3P_Real_gfx10<op>, VOP3P_Realtriple_gfx11<op>;
-
-defm V_PK_MAD_I16 : VOP3P_Real_gfx10_gfx11<0x00>;
-defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10_gfx11<0x01>;
-defm V_PK_ADD_I16 : VOP3P_Real_gfx10_gfx11<0x02>;
-defm V_PK_SUB_I16 : VOP3P_Real_gfx10_gfx11<0x03>;
-defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10_gfx11<0x04>;
-defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10_gfx11<0x05>;
-defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10_gfx11<0x06>;
-defm V_PK_MAX_I16 : VOP3P_Real_gfx10_gfx11<0x07>;
-defm V_PK_MIN_I16 : VOP3P_Real_gfx10_gfx11<0x08>;
-defm V_PK_MAD_U16 : VOP3P_Real_gfx10_gfx11<0x09>;
-defm V_PK_ADD_U16 : VOP3P_Real_gfx10_gfx11<0x0a>;
-defm V_PK_SUB_U16 : VOP3P_Real_gfx10_gfx11<0x0b>;
-defm V_PK_MAX_U16 : VOP3P_Real_gfx10_gfx11<0x0c>;
-defm V_PK_MIN_U16 : VOP3P_Real_gfx10_gfx11<0x0d>;
-defm V_PK_FMA_F16 : VOP3P_Real_gfx10_gfx11<0x0e>;
-defm V_PK_ADD_F16 : VOP3P_Real_gfx10_gfx11<0x0f>;
-defm V_PK_MUL_F16 : VOP3P_Real_gfx10_gfx11<0x10>;
+multiclass VOP3P_Real_gfx10_gfx11<bits<7> op> :
+ VOP3P_Real_gfx10<op>, VOP3P_Real_Base<GFX11Gen, op>;
+
+multiclass VOP3P_Real_gfx10_gfx11_gfx12<bits<7> op> :
+ VOP3P_Real_gfx10_gfx11<op>, VOP3P_Real_Base<GFX12Gen, op>;
+
+multiclass VOP3P_Real_gfx10_gfx11_gfx12_Triple<bits<7> op> :
+ VOP3P_Real_gfx10<op>, VOP3P_Realtriple<GFX11Gen, op>,
+ VOP3P_Realtriple<GFX12Gen, op>;
+
+defm V_PK_MAD_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x00>;
+defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x01>;
+defm V_PK_ADD_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x02>;
+defm V_PK_SUB_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x03>;
+defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10_gfx11_gfx12<0x04>;
+defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10_gfx11_gfx12<0x05>;
+defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x06>;
+defm V_PK_MAX_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x07>;
+defm V_PK_MIN_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x08>;
+defm V_PK_MAD_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x09>;
+defm V_PK_ADD_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0a>;
+defm V_PK_SUB_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0b>;
+defm V_PK_MAX_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0c>;
+defm V_PK_MIN_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0d>;
+defm V_PK_FMA_F16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0e>;
+defm V_PK_ADD_F16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0f>;
+defm V_PK_MUL_F16 : VOP3P_Real_gfx10_gfx11_gfx12<0x10>;
defm V_PK_MIN_F16 : VOP3P_Real_gfx10_gfx11<0x11>;
defm V_PK_MAX_F16 : VOP3P_Real_gfx10_gfx11<0x12>;
-defm V_FMA_MIX_F32 : VOP3P_Real_gfx10_gfx11_Triple <0x20>;
-defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x21>;
-defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x22>;
+defm V_FMA_MIX_F32 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x20>;
+defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x21>;
+defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x22>;
defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
-defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x13>;
-defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11 <0x17>;
-defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11 <0x19>;
+defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>;
+defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>;
+defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>;
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 6fc3d0957dce..e5b801048e6d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1081,6 +1081,8 @@ multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
}
}
+defm : FCMP_Pattern <COND_O, V_CMP_O_F32_e64, f32>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F32_e64, f32>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
@@ -1088,6 +1090,8 @@ defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;
+defm : FCMP_Pattern <COND_O, V_CMP_O_F64_e64, f64>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F64_e64, f64>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
@@ -1110,6 +1114,8 @@ defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
let OtherPredicates = [HasTrue16BitInsts] in {
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
@@ -1126,6 +1132,8 @@ defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
} // End OtherPredicates = [HasTrue16BitInsts]
let OtherPredicates = [NotHasTrue16BitInsts] in {
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
@@ -1315,53 +1323,52 @@ class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// GFX11.
+// GFX11, GFX12
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX11Only in {
- multiclass VOPC_Real_gfx11<bits<9> op> {
+multiclass VOPC_Real_Base<GFXGen Gen, bits<9> op> {
+ let AssemblerPredicate = Gen.AssemblerPredicate in {
defvar ps32 = !cast<VOPC_Pseudo>(NAME#"_e32");
defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_e64");
- let DecoderNamespace = "GFX11" in {
- def _e32_gfx11 : VOPC_Real<ps32, SIEncodingFamily.GFX11>,
- VOPCe<op{7-0}>;
- def _e64_gfx11 : VOP3_Real<ps64, SIEncodingFamily.GFX11>,
- VOP3a_gfx11<{0, op}, ps64.Pfl> {
+ let DecoderNamespace = Gen.DecoderNamespace in {
+ def _e32#Gen.Suffix : VOPC_Real<ps32, Gen.Subtarget>,
+ VOPCe<op{7-0}>;
+ def _e64#Gen.Suffix : VOP3_Real<ps64, Gen.Subtarget>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
// Encoding used for VOPC instructions encoded as VOP3 differs from
// VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
bits<8> sdst;
let Inst{7-0} = sdst;
}
- } // End DecoderNamespace = "GFX11"
+ } // End DecoderNamespace = Gen.DecoderNamespace
- defm : VOPCInstAliases<NAME, "gfx11">;
+ defm : VOPCInstAliases<NAME, !substr(Gen.Suffix,1)>;
if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e32" #"_dpp");
defvar AsmDPP = ps32.Pfl.AsmDPP16;
- let DecoderNamespace = "DPPGFX11" in {
- def _e32_dpp_gfx11 : VOPC_DPP16_SIMC<op{7-0}, psDPP,
- SIEncodingFamily.GFX11>;
- def _e32_dpp_w32_gfx11 : VOPC_DPP16<op{7-0}, psDPP> {
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
+ def _e32_dpp#Gen.Suffix : VOPC_DPP16_SIMC<op{7-0}, psDPP, Gen.Subtarget>;
+ def _e32_dpp_w32#Gen.Suffix : VOPC_DPP16<op{7-0}, psDPP> {
let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- def _e32_dpp_w64_gfx11 : VOPC_DPP16<op{7-0}, psDPP> {
+ def _e32_dpp_w64#Gen.Suffix : VOPC_DPP16<op{7-0}, psDPP> {
let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave64;
}
}
defvar AsmDPP8 = ps32.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8GFX11" in {
- def _e32_dpp8_gfx11 : VOPC_DPP8<op{7-0}, ps32>;
- def _e32_dpp8_w32_gfx11 : VOPC_DPP8<op{7-0}, ps32> {
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
+ def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32>;
+ def _e32_dpp8_w32#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32> {
let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- def _e32_dpp8_w64_gfx11 : VOPC_DPP8<op{7-0}, ps32> {
+ def _e32_dpp8_w64#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32> {
let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave64;
@@ -1371,83 +1378,84 @@ let AssemblerPredicate = isGFX11Only in {
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPPGFX11" in {
- def _e64_dpp_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP>,
- SIMCInstr<psDPP.PseudoInstr, SIEncodingFamily.GFX11>;
- def _e64_dpp_w32_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP> {
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
+ def _e64_dpp_w32#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- def _e64_dpp_w64_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP> {
+ def _e64_dpp_w64#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave64;
}
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8GFX11" in {
- def _e64_dpp8_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64>;
- def _e64_dpp8_w32_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64> {
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64>;
+ def _e64_dpp8_w32#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- def _e64_dpp8_w64_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64> {
+ def _e64_dpp8_w64#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave64;
}
}
}
+ } // AssemblerPredicate = Gen.AssemblerPredicate
+}
- }
-
- multiclass VOPC_Real_with_name_gfx11<bits<9> op, string OpName,
- string asm_name, string pseudo_mnemonic = ""> {
+multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
+ string asm_name, string pseudo_mnemonic = ""> {
+ let AssemblerPredicate = Gen.AssemblerPredicate in {
defvar ps32 = !cast<VOPC_Pseudo>(OpName#"_e32");
defvar ps64 = !cast<VOP3_Pseudo>(OpName#"_e64");
- let DecoderNamespace = "GFX11" in {
- def _e32_gfx11 :
+ let DecoderNamespace = Gen.DecoderNamespace in {
+ def _e32#Gen.Suffix :
// 32 and 64 bit forms of the instruction have _e32 and _e64
// respectively appended to their assembly mnemonic.
// _e64 is printed as part of the VOPDstS64orS32 operand, whereas
// the destination-less 32bit forms add it to the asmString here.
- VOPC_Real<ps32, SIEncodingFamily.GFX11, asm_name#"_e32">,
+ VOPC_Real<ps32, Gen.Subtarget, asm_name#"_e32">,
VOPCe<op{7-0}>,
MnemonicAlias<!if(!empty(pseudo_mnemonic), ps32.Mnemonic,
pseudo_mnemonic),
asm_name, ps32.AsmVariantName>,
- Requires<[isGFX11Plus]>;
- def _e64_gfx11 :
- VOP3_Real<ps64, SIEncodingFamily.GFX11, asm_name>,
- VOP3a_gfx11<{0, op}, ps64.Pfl>,
+ Requires<[Gen.AssemblerPredicate]>;
+ def _e64#Gen.Suffix :
+ VOP3_Real<ps64, Gen.Subtarget, asm_name>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl>,
MnemonicAlias<!if(!empty(pseudo_mnemonic), ps64.Mnemonic,
pseudo_mnemonic),
asm_name, ps64.AsmVariantName>,
- Requires<[isGFX11Plus]> {
+ Requires<[Gen.AssemblerPredicate]> {
// Encoding used for VOPC instructions encoded as VOP3 differs from
// VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
bits<8> sdst;
let Inst{7-0} = sdst;
}
- } // End DecoderNamespace = "GFX11"
+ } // End DecoderNamespace = Gen.DecoderNamespace
- defm : VOPCInstAliases<OpName, "gfx11", NAME, asm_name>;
+ defm : VOPCInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e32" #"_dpp");
defvar AsmDPP = ps32.Pfl.AsmDPP16;
- let DecoderNamespace = "DPPGFX11" in {
- def _e32_dpp_gfx11 : VOPC_DPP16_SIMC<op{7-0}, psDPP,
- SIEncodingFamily.GFX11, asm_name>;
- def _e32_dpp_w32_gfx11
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
+ def _e32_dpp#Gen.Suffix : VOPC_DPP16_SIMC<op{7-0}, psDPP,
+ Gen.Subtarget, asm_name>;
+ def _e32_dpp_w32#Gen.Suffix
: VOPC_DPP16<op{7-0}, psDPP, asm_name> {
let AsmString = asm_name # " vcc_lo, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- def _e32_dpp_w64_gfx11
+ def _e32_dpp_w64#Gen.Suffix
: VOPC_DPP16<op{7-0}, psDPP, asm_name> {
let AsmString = asm_name # " vcc, " # AsmDPP;
let isAsmParserOnly = 1;
@@ -1455,15 +1463,15 @@ let AssemblerPredicate = isGFX11Only in {
}
}
defvar AsmDPP8 = ps32.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8GFX11" in {
- def _e32_dpp8_gfx11 : VOPC_DPP8<op{7-0}, ps32, asm_name>;
- def _e32_dpp8_w32_gfx11
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
+ def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32, asm_name>;
+ def _e32_dpp8_w32#Gen.Suffix
: VOPC_DPP8<op{7-0}, ps32, asm_name> {
let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- def _e32_dpp8_w64_gfx11
+ def _e32_dpp8_w64#Gen.Suffix
: VOPC_DPP8<op{7-0}, ps32, asm_name> {
let AsmString = asm_name # " vcc, " # AsmDPP8;
let isAsmParserOnly = 1;
@@ -1475,16 +1483,16 @@ let AssemblerPredicate = isGFX11Only in {
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPPGFX11" in {
- def _e64_dpp_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
- SIMCInstr<psDPP.PseudoInstr, SIEncodingFamily.GFX11>;
- def _e64_dpp_w32_gfx11
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
+ def _e64_dpp_w32#Gen.Suffix
: VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
let AsmString = asm_name # " vcc_lo, " # AsmDPP;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- def _e64_dpp_w64_gfx11
+ def _e64_dpp_w64#Gen.Suffix
: VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
let AsmString = asm_name # " vcc, " # AsmDPP;
let isAsmParserOnly = 1;
@@ -1492,15 +1500,15 @@ let AssemblerPredicate = isGFX11Only in {
}
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8GFX11" in {
- def _e64_dpp8_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
- def _e64_dpp8_w32_gfx11
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
+ def _e64_dpp8_w32#Gen.Suffix
: VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- def _e64_dpp8_w64_gfx11
+ def _e64_dpp8_w64#Gen.Suffix
: VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
let AsmString = asm_name # " vcc, " # AsmDPP8;
let isAsmParserOnly = 1;
@@ -1508,44 +1516,47 @@ let AssemblerPredicate = isGFX11Only in {
}
}
}
- }
+ } // AssemblerPredicate = Gen.AssemblerPredicate
+}
- multiclass VOPC_Real_t16_gfx11<bits<9> op, string asm_name,
- string OpName = NAME> : VOPC_Real_with_name_gfx11<op, OpName, asm_name>;
+multiclass VOPC_Real_t16<GFXGen Gen, bits<9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> :
+ VOPC_Real_with_name<Gen, op, OpName, asm_name, pseudo_mnemonic>;
- multiclass VOPCX_Real_gfx11<bits<9> op> {
+multiclass VOPCX_Real<GFXGen Gen, bits<9> op> {
+ let AssemblerPredicate = Gen.AssemblerPredicate in {
defvar ps32 = !cast<VOPC_Pseudo>(NAME#"_nosdst_e32");
defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_nosdst_e64");
- let DecoderNamespace = "GFX11" in {
- def _e32_gfx11 :
- VOPC_Real<ps32, SIEncodingFamily.GFX11>,
+ let DecoderNamespace = Gen.DecoderNamespace in {
+ def _e32#Gen.Suffix :
+ VOPC_Real<ps32, Gen.Subtarget>,
VOPCe<op{7-0}> {
let AsmString = !subst("_nosdst", "", ps32.PseudoInstr)
# " " # ps32.AsmOperands;
}
- def _e64_gfx11 :
- VOP3_Real<ps64, SIEncodingFamily.GFX11>,
- VOP3a_gfx11<{0, op}, ps64.Pfl> {
+ def _e64#Gen.Suffix :
+ VOP3_Real<ps64, Gen.Subtarget>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
let Inst{7-0} = ?; // sdst
let AsmString = !subst("_nosdst", "", ps64.Mnemonic)
# "{_e64} " # ps64.AsmOperands;
}
- } // End DecoderNamespace = "GFX11"
+ } // End DecoderNamespace = Gen.DecoderNamespace
- defm : VOPCXInstAliases<NAME, "gfx11">;
+ defm : VOPCXInstAliases<NAME, !substr(Gen.Suffix, 1)>;
if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_nosdst_e32" #"_dpp");
defvar AsmDPP = ps32.Pfl.AsmDPP16;
- let DecoderNamespace = "DPPGFX11" in {
- def _e32_dpp_gfx11
- : VOPC_DPP16_SIMC<op{7-0}, psDPP, SIEncodingFamily.GFX11> {
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
+ def _e32_dpp#Gen.Suffix
+ : VOPC_DPP16_SIMC<op{7-0}, psDPP, Gen.Subtarget> {
let AsmString = !subst("_nosdst", "", psDPP.OpName) # " " # AsmDPP;
}
}
defvar AsmDPP8 = ps32.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8GFX11" in {
- def _e32_dpp8_gfx11 : VOPC_DPP8<op{7-0}, ps32> {
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
+ def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32> {
let AsmString = !subst("_nosdst", "", ps32.OpName) # " " # AsmDPP8;
}
}
@@ -1554,268 +1565,305 @@ let AssemblerPredicate = isGFX11Only in {
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_nosdst_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPPGFX11" in {
- def _e64_dpp_gfx11
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
+ def _e64_dpp#Gen.Suffix
: VOPC64_DPP16_NoDst<{0, op}, psDPP>,
- SIMCInstr<psDPP.PseudoInstr, SIEncodingFamily.GFX11> {
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
let AsmString = !subst("_nosdst", "", psDPP.OpName)
# "{_e64_dpp} " # AsmDPP;
}
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8GFX11" in {
- def _e64_dpp8_gfx11 : VOPC64_DPP8_NoDst<{0, op}, ps64> {
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64> {
let AsmString = !subst("_nosdst", "", ps64.OpName)
# "{_e64_dpp} " # AsmDPP8;
}
}
}
- }
+ } // AssemblerPredicate = Gen.AssemblerPredicate
+}
- multiclass VOPCX_Real_with_name_gfx11<bits<9> op, string OpName,
- string asm_name, string pseudo_mnemonic = ""> {
+multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
+ string asm_name, string pseudo_mnemonic = ""> {
+ let AssemblerPredicate = Gen.AssemblerPredicate in {
defvar ps32 = !cast<VOPC_Pseudo>(OpName#"_nosdst_e32");
defvar ps64 = !cast<VOP3_Pseudo>(OpName#"_nosdst_e64");
- let DecoderNamespace = "GFX11" in {
- def _e32_gfx11
- : VOPC_Real<ps32, SIEncodingFamily.GFX11, asm_name>,
+ let DecoderNamespace = Gen.DecoderNamespace in {
+ def _e32#Gen.Suffix
+ : VOPC_Real<ps32, Gen.Subtarget, asm_name>,
MnemonicAlias<!if(!empty(pseudo_mnemonic), !subst("_nosdst", "", ps32.Mnemonic),
pseudo_mnemonic),
asm_name, ps32.AsmVariantName>,
- Requires<[isGFX11Plus]>,
+ Requires<[Gen.AssemblerPredicate]>,
VOPCe<op{7-0}> {
let AsmString = asm_name # "{_e32} " # ps32.AsmOperands;
}
- def _e64_gfx11
- : VOP3_Real<ps64, SIEncodingFamily.GFX11, asm_name>,
+ def _e64#Gen.Suffix
+ : VOP3_Real<ps64, Gen.Subtarget, asm_name>,
MnemonicAlias<!if(!empty(pseudo_mnemonic), !subst("_nosdst", "", ps64.Mnemonic),
pseudo_mnemonic),
asm_name, ps64.AsmVariantName>,
- Requires<[isGFX11Plus]>,
- VOP3a_gfx11<{0, op}, ps64.Pfl> {
+ Requires<[Gen.AssemblerPredicate]>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
let Inst{7-0} = ? ; // sdst
let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
}
- } // End DecoderNamespace = "GFX11"
+ } // End DecoderNamespace = Gen.DecoderNamespace
- defm : VOPCXInstAliases<OpName, "gfx11", NAME, asm_name>;
+ defm : VOPCXInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e32"#"_dpp");
- let DecoderNamespace = "DPPGFX11" in {
- def _e32_dpp_gfx11 : VOPC_DPP16_SIMC<op{7-0}, psDPP,
- SIEncodingFamily.GFX11, asm_name>;
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
+ def _e32_dpp#Gen.Suffix : VOPC_DPP16_SIMC<op{7-0}, psDPP,
+ Gen.Subtarget, asm_name>;
}
- let DecoderNamespace = "DPP8GFX11" in {
- def _e32_dpp8_gfx11 : VOPC_DPP8<op{7-0}, ps32, asm_name>;
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
+ def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32, asm_name>;
}
}
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e64"#"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPPGFX11" in {
- def _e64_dpp_gfx11
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
+ def _e64_dpp#Gen.Suffix
: VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
- SIMCInstr<psDPP.PseudoInstr, SIEncodingFamily.GFX11> {
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
}
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8GFX11" in {
- def _e64_dpp8_gfx11 : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
}
}
}
- }
+ } // AssemblerPredicate = Gen.AssemblerPredicate
+}
- multiclass VOPCX_Real_t16_gfx11<bits<9> op, string asm_name,
- string OpName = NAME> : VOPCX_Real_with_name_gfx11<op, OpName, asm_name>;
+multiclass VOPCX_Real_t16<GFXGen Gen, bits<9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> :
+ VOPCX_Real_with_name<Gen, op, OpName, asm_name, pseudo_mnemonic>;
+multiclass VOPC_Real_gfx11<bits<9> op> : VOPC_Real_Base<GFX11Gen, op>;
-} // End AssemblerPredicate = isGFX11Only
+multiclass VOPC_Real_with_name_gfx11<bits<9> op, string OpName, string asm_name,
+ string pseudo_mnemonic = "">
+ : VOPC_Real_with_name<GFX11Gen, op, OpName, asm_name, pseudo_mnemonic>;
+
+multiclass VOPCX_Real_gfx11<bits<9> op> : VOPCX_Real<GFX11Gen, op>;
+
+multiclass VOPCX_Real_with_name_gfx11<bits<9> op, string OpName,
+ string asm_name, string pseudo_mnemonic = ""> :
+ VOPCX_Real_with_name<GFX11Gen, op, OpName, asm_name, pseudo_mnemonic>;
+
+multiclass VOPC_Real_gfx11_gfx12<bits<9> op> :
+ VOPC_Real_Base<GFX11Gen, op>, VOPC_Real_Base<GFX12Gen, op>;
+
+multiclass VOPCX_Real_gfx11_gfx12<bits<9> op> :
+ VOPCX_Real<GFX11Gen, op>, VOPCX_Real<GFX12Gen, op>;
+
+multiclass VOPC_Real_t16_gfx11<bits <9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> :
+ VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
+
+multiclass VOPC_Real_t16_gfx11_gfx12<bits <9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> :
+ VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
+ VOPC_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
+
+multiclass VOPCX_Real_t16_gfx11<bits<9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> :
+ VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
+
+multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> :
+ VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
+ VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
defm V_CMP_F_F16_t16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
-defm V_CMP_LT_F16_t16 : VOPC_Real_t16_gfx11<0x001, "v_cmp_lt_f16">;
-defm V_CMP_EQ_F16_t16 : VOPC_Real_t16_gfx11<0x002, "v_cmp_eq_f16">;
-defm V_CMP_LE_F16_t16 : VOPC_Real_t16_gfx11<0x003, "v_cmp_le_f16">;
-defm V_CMP_GT_F16_t16 : VOPC_Real_t16_gfx11<0x004, "v_cmp_gt_f16">;
-defm V_CMP_LG_F16_t16 : VOPC_Real_t16_gfx11<0x005, "v_cmp_lg_f16">;
-defm V_CMP_GE_F16_t16 : VOPC_Real_t16_gfx11<0x006, "v_cmp_ge_f16">;
-defm V_CMP_O_F16_t16 : VOPC_Real_t16_gfx11<0x007, "v_cmp_o_f16">;
-defm V_CMP_U_F16_t16 : VOPC_Real_t16_gfx11<0x008, "v_cmp_u_f16">;
-defm V_CMP_NGE_F16_t16 : VOPC_Real_t16_gfx11<0x009, "v_cmp_nge_f16">;
-defm V_CMP_NLG_F16_t16 : VOPC_Real_t16_gfx11<0x00a, "v_cmp_nlg_f16">;
-defm V_CMP_NGT_F16_t16 : VOPC_Real_t16_gfx11<0x00b, "v_cmp_ngt_f16">;
-defm V_CMP_NLE_F16_t16 : VOPC_Real_t16_gfx11<0x00c, "v_cmp_nle_f16">;
-defm V_CMP_NEQ_F16_t16 : VOPC_Real_t16_gfx11<0x00d, "v_cmp_neq_f16">;
-defm V_CMP_NLT_F16_t16 : VOPC_Real_t16_gfx11<0x00e, "v_cmp_nlt_f16">;
+defm V_CMP_LT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
+defm V_CMP_EQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
+defm V_CMP_LE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
+defm V_CMP_GT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
+defm V_CMP_LG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x005, "v_cmp_lg_f16">;
+defm V_CMP_GE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x006, "v_cmp_ge_f16">;
+defm V_CMP_O_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x007, "v_cmp_o_f16">;
+defm V_CMP_U_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x008, "v_cmp_u_f16">;
+defm V_CMP_NGE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x009, "v_cmp_nge_f16">;
+defm V_CMP_NLG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00a, "v_cmp_nlg_f16">;
+defm V_CMP_NGT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00b, "v_cmp_ngt_f16">;
+defm V_CMP_NLE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00c, "v_cmp_nle_f16">;
+defm V_CMP_NEQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00d, "v_cmp_neq_f16">;
+defm V_CMP_NLT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00e, "v_cmp_nlt_f16">;
defm V_CMP_T_F16_t16 : VOPC_Real_with_name_gfx11<0x00f, "V_CMP_TRU_F16_t16", "v_cmp_t_f16", "v_cmp_tru_f16">;
defm V_CMP_F_F32 : VOPC_Real_gfx11<0x010>;
-defm V_CMP_LT_F32 : VOPC_Real_gfx11<0x011>;
-defm V_CMP_EQ_F32 : VOPC_Real_gfx11<0x012>;
-defm V_CMP_LE_F32 : VOPC_Real_gfx11<0x013>;
-defm V_CMP_GT_F32 : VOPC_Real_gfx11<0x014>;
-defm V_CMP_LG_F32 : VOPC_Real_gfx11<0x015>;
-defm V_CMP_GE_F32 : VOPC_Real_gfx11<0x016>;
-defm V_CMP_O_F32 : VOPC_Real_gfx11<0x017>;
-defm V_CMP_U_F32 : VOPC_Real_gfx11<0x018>;
-defm V_CMP_NGE_F32 : VOPC_Real_gfx11<0x019>;
-defm V_CMP_NLG_F32 : VOPC_Real_gfx11<0x01a>;
-defm V_CMP_NGT_F32 : VOPC_Real_gfx11<0x01b>;
-defm V_CMP_NLE_F32 : VOPC_Real_gfx11<0x01c>;
-defm V_CMP_NEQ_F32 : VOPC_Real_gfx11<0x01d>;
-defm V_CMP_NLT_F32 : VOPC_Real_gfx11<0x01e>;
+defm V_CMP_LT_F32 : VOPC_Real_gfx11_gfx12<0x011>;
+defm V_CMP_EQ_F32 : VOPC_Real_gfx11_gfx12<0x012>;
+defm V_CMP_LE_F32 : VOPC_Real_gfx11_gfx12<0x013>;
+defm V_CMP_GT_F32 : VOPC_Real_gfx11_gfx12<0x014>;
+defm V_CMP_LG_F32 : VOPC_Real_gfx11_gfx12<0x015>;
+defm V_CMP_GE_F32 : VOPC_Real_gfx11_gfx12<0x016>;
+defm V_CMP_O_F32 : VOPC_Real_gfx11_gfx12<0x017>;
+defm V_CMP_U_F32 : VOPC_Real_gfx11_gfx12<0x018>;
+defm V_CMP_NGE_F32 : VOPC_Real_gfx11_gfx12<0x019>;
+defm V_CMP_NLG_F32 : VOPC_Real_gfx11_gfx12<0x01a>;
+defm V_CMP_NGT_F32 : VOPC_Real_gfx11_gfx12<0x01b>;
+defm V_CMP_NLE_F32 : VOPC_Real_gfx11_gfx12<0x01c>;
+defm V_CMP_NEQ_F32 : VOPC_Real_gfx11_gfx12<0x01d>;
+defm V_CMP_NLT_F32 : VOPC_Real_gfx11_gfx12<0x01e>;
defm V_CMP_T_F32 : VOPC_Real_with_name_gfx11<0x01f, "V_CMP_TRU_F32", "v_cmp_t_f32">;
defm V_CMP_T_F64 : VOPC_Real_with_name_gfx11<0x02f, "V_CMP_TRU_F64", "v_cmp_t_f64">;
-defm V_CMP_LT_I16_t16 : VOPC_Real_t16_gfx11<0x031, "v_cmp_lt_i16">;
-defm V_CMP_EQ_I16_t16 : VOPC_Real_t16_gfx11<0x032, "v_cmp_eq_i16">;
-defm V_CMP_LE_I16_t16 : VOPC_Real_t16_gfx11<0x033, "v_cmp_le_i16">;
-defm V_CMP_GT_I16_t16 : VOPC_Real_t16_gfx11<0x034, "v_cmp_gt_i16">;
-defm V_CMP_NE_I16_t16 : VOPC_Real_t16_gfx11<0x035, "v_cmp_ne_i16">;
-defm V_CMP_GE_I16_t16 : VOPC_Real_t16_gfx11<0x036, "v_cmp_ge_i16">;
-defm V_CMP_LT_U16_t16 : VOPC_Real_t16_gfx11<0x039, "v_cmp_lt_u16">;
-defm V_CMP_EQ_U16_t16 : VOPC_Real_t16_gfx11<0x03a, "v_cmp_eq_u16">;
-defm V_CMP_LE_U16_t16 : VOPC_Real_t16_gfx11<0x03b, "v_cmp_le_u16">;
-defm V_CMP_GT_U16_t16 : VOPC_Real_t16_gfx11<0x03c, "v_cmp_gt_u16">;
-defm V_CMP_NE_U16_t16 : VOPC_Real_t16_gfx11<0x03d, "v_cmp_ne_u16">;
-defm V_CMP_GE_U16_t16 : VOPC_Real_t16_gfx11<0x03e, "v_cmp_ge_u16">;
+defm V_CMP_LT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x031, "v_cmp_lt_i16">;
+defm V_CMP_EQ_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x032, "v_cmp_eq_i16">;
+defm V_CMP_LE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x033, "v_cmp_le_i16">;
+defm V_CMP_GT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x034, "v_cmp_gt_i16">;
+defm V_CMP_NE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x035, "v_cmp_ne_i16">;
+defm V_CMP_GE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x036, "v_cmp_ge_i16">;
+defm V_CMP_LT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x039, "v_cmp_lt_u16">;
+defm V_CMP_EQ_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03a, "v_cmp_eq_u16">;
+defm V_CMP_LE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03b, "v_cmp_le_u16">;
+defm V_CMP_GT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03c, "v_cmp_gt_u16">;
+defm V_CMP_NE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03d, "v_cmp_ne_u16">;
+defm V_CMP_GE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03e, "v_cmp_ge_u16">;
defm V_CMP_F_I32 : VOPC_Real_gfx11<0x040>;
-defm V_CMP_LT_I32 : VOPC_Real_gfx11<0x041>;
-defm V_CMP_EQ_I32 : VOPC_Real_gfx11<0x042>;
-defm V_CMP_LE_I32 : VOPC_Real_gfx11<0x043>;
-defm V_CMP_GT_I32 : VOPC_Real_gfx11<0x044>;
-defm V_CMP_NE_I32 : VOPC_Real_gfx11<0x045>;
-defm V_CMP_GE_I32 : VOPC_Real_gfx11<0x046>;
+defm V_CMP_LT_I32 : VOPC_Real_gfx11_gfx12<0x041>;
+defm V_CMP_EQ_I32 : VOPC_Real_gfx11_gfx12<0x042>;
+defm V_CMP_LE_I32 : VOPC_Real_gfx11_gfx12<0x043>;
+defm V_CMP_GT_I32 : VOPC_Real_gfx11_gfx12<0x044>;
+defm V_CMP_NE_I32 : VOPC_Real_gfx11_gfx12<0x045>;
+defm V_CMP_GE_I32 : VOPC_Real_gfx11_gfx12<0x046>;
defm V_CMP_T_I32 : VOPC_Real_gfx11<0x047>;
defm V_CMP_F_U32 : VOPC_Real_gfx11<0x048>;
-defm V_CMP_LT_U32 : VOPC_Real_gfx11<0x049>;
-defm V_CMP_EQ_U32 : VOPC_Real_gfx11<0x04a>;
-defm V_CMP_LE_U32 : VOPC_Real_gfx11<0x04b>;
-defm V_CMP_GT_U32 : VOPC_Real_gfx11<0x04c>;
-defm V_CMP_NE_U32 : VOPC_Real_gfx11<0x04d>;
-defm V_CMP_GE_U32 : VOPC_Real_gfx11<0x04e>;
+defm V_CMP_LT_U32 : VOPC_Real_gfx11_gfx12<0x049>;
+defm V_CMP_EQ_U32 : VOPC_Real_gfx11_gfx12<0x04a>;
+defm V_CMP_LE_U32 : VOPC_Real_gfx11_gfx12<0x04b>;
+defm V_CMP_GT_U32 : VOPC_Real_gfx11_gfx12<0x04c>;
+defm V_CMP_NE_U32 : VOPC_Real_gfx11_gfx12<0x04d>;
+defm V_CMP_GE_U32 : VOPC_Real_gfx11_gfx12<0x04e>;
defm V_CMP_T_U32 : VOPC_Real_gfx11<0x04f>;
defm V_CMP_F_I64 : VOPC_Real_gfx11<0x050>;
-defm V_CMP_LT_I64 : VOPC_Real_gfx11<0x051>;
-defm V_CMP_EQ_I64 : VOPC_Real_gfx11<0x052>;
-defm V_CMP_LE_I64 : VOPC_Real_gfx11<0x053>;
-defm V_CMP_GT_I64 : VOPC_Real_gfx11<0x054>;
-defm V_CMP_NE_I64 : VOPC_Real_gfx11<0x055>;
-defm V_CMP_GE_I64 : VOPC_Real_gfx11<0x056>;
+defm V_CMP_LT_I64 : VOPC_Real_gfx11_gfx12<0x051>;
+defm V_CMP_EQ_I64 : VOPC_Real_gfx11_gfx12<0x052>;
+defm V_CMP_LE_I64 : VOPC_Real_gfx11_gfx12<0x053>;
+defm V_CMP_GT_I64 : VOPC_Real_gfx11_gfx12<0x054>;
+defm V_CMP_NE_I64 : VOPC_Real_gfx11_gfx12<0x055>;
+defm V_CMP_GE_I64 : VOPC_Real_gfx11_gfx12<0x056>;
defm V_CMP_T_I64 : VOPC_Real_gfx11<0x057>;
defm V_CMP_F_U64 : VOPC_Real_gfx11<0x058>;
-defm V_CMP_LT_U64 : VOPC_Real_gfx11<0x059>;
-defm V_CMP_EQ_U64 : VOPC_Real_gfx11<0x05a>;
-defm V_CMP_LE_U64 : VOPC_Real_gfx11<0x05b>;
-defm V_CMP_GT_U64 : VOPC_Real_gfx11<0x05c>;
-defm V_CMP_NE_U64 : VOPC_Real_gfx11<0x05d>;
-defm V_CMP_GE_U64 : VOPC_Real_gfx11<0x05e>;
+defm V_CMP_LT_U64 : VOPC_Real_gfx11_gfx12<0x059>;
+defm V_CMP_EQ_U64 : VOPC_Real_gfx11_gfx12<0x05a>;
+defm V_CMP_LE_U64 : VOPC_Real_gfx11_gfx12<0x05b>;
+defm V_CMP_GT_U64 : VOPC_Real_gfx11_gfx12<0x05c>;
+defm V_CMP_NE_U64 : VOPC_Real_gfx11_gfx12<0x05d>;
+defm V_CMP_GE_U64 : VOPC_Real_gfx11_gfx12<0x05e>;
defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>;
-defm V_CMP_CLASS_F16_t16 : VOPC_Real_t16_gfx11<0x07d, "v_cmp_class_f16">;
-defm V_CMP_CLASS_F32 : VOPC_Real_gfx11<0x07e>;
-defm V_CMP_CLASS_F64 : VOPC_Real_gfx11<0x07f>;
+defm V_CMP_CLASS_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
+defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
+defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
defm V_CMPX_F_F16_t16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
-defm V_CMPX_LT_F16_t16 : VOPCX_Real_t16_gfx11<0x081, "v_cmpx_lt_f16">;
-defm V_CMPX_EQ_F16_t16 : VOPCX_Real_t16_gfx11<0x082, "v_cmpx_eq_f16">;
-defm V_CMPX_LE_F16_t16 : VOPCX_Real_t16_gfx11<0x083, "v_cmpx_le_f16">;
-defm V_CMPX_GT_F16_t16 : VOPCX_Real_t16_gfx11<0x084, "v_cmpx_gt_f16">;
-defm V_CMPX_LG_F16_t16 : VOPCX_Real_t16_gfx11<0x085, "v_cmpx_lg_f16">;
-defm V_CMPX_GE_F16_t16 : VOPCX_Real_t16_gfx11<0x086, "v_cmpx_ge_f16">;
-defm V_CMPX_O_F16_t16 : VOPCX_Real_t16_gfx11<0x087, "v_cmpx_o_f16">;
-defm V_CMPX_U_F16_t16 : VOPCX_Real_t16_gfx11<0x088, "v_cmpx_u_f16">;
-defm V_CMPX_NGE_F16_t16 : VOPCX_Real_t16_gfx11<0x089, "v_cmpx_nge_f16">;
-defm V_CMPX_NLG_F16_t16 : VOPCX_Real_t16_gfx11<0x08a, "v_cmpx_nlg_f16">;
-defm V_CMPX_NGT_F16_t16 : VOPCX_Real_t16_gfx11<0x08b, "v_cmpx_ngt_f16">;
-defm V_CMPX_NLE_F16_t16 : VOPCX_Real_t16_gfx11<0x08c, "v_cmpx_nle_f16">;
-defm V_CMPX_NEQ_F16_t16 : VOPCX_Real_t16_gfx11<0x08d, "v_cmpx_neq_f16">;
-defm V_CMPX_NLT_F16_t16 : VOPCX_Real_t16_gfx11<0x08e, "v_cmpx_nlt_f16">;
+defm V_CMPX_LT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
+defm V_CMPX_EQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
+defm V_CMPX_LE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
+defm V_CMPX_GT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
+defm V_CMPX_LG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x085, "v_cmpx_lg_f16">;
+defm V_CMPX_GE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x086, "v_cmpx_ge_f16">;
+defm V_CMPX_O_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x087, "v_cmpx_o_f16">;
+defm V_CMPX_U_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x088, "v_cmpx_u_f16">;
+defm V_CMPX_NGE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x089, "v_cmpx_nge_f16">;
+defm V_CMPX_NLG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08a, "v_cmpx_nlg_f16">;
+defm V_CMPX_NGT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08b, "v_cmpx_ngt_f16">;
+defm V_CMPX_NLE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08c, "v_cmpx_nle_f16">;
+defm V_CMPX_NEQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
+defm V_CMPX_NLT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
defm V_CMPX_T_F16_t16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_t16", "v_cmpx_t_f16", "v_cmpx_tru_f16">;
defm V_CMPX_F_F32 : VOPCX_Real_gfx11<0x090>;
-defm V_CMPX_LT_F32 : VOPCX_Real_gfx11<0x091>;
-defm V_CMPX_EQ_F32 : VOPCX_Real_gfx11<0x092>;
-defm V_CMPX_LE_F32 : VOPCX_Real_gfx11<0x093>;
-defm V_CMPX_GT_F32 : VOPCX_Real_gfx11<0x094>;
-defm V_CMPX_LG_F32 : VOPCX_Real_gfx11<0x095>;
-defm V_CMPX_GE_F32 : VOPCX_Real_gfx11<0x096>;
-defm V_CMPX_O_F32 : VOPCX_Real_gfx11<0x097>;
-defm V_CMPX_U_F32 : VOPCX_Real_gfx11<0x098>;
-defm V_CMPX_NGE_F32 : VOPCX_Real_gfx11<0x099>;
-defm V_CMPX_NLG_F32 : VOPCX_Real_gfx11<0x09a>;
-defm V_CMPX_NGT_F32 : VOPCX_Real_gfx11<0x09b>;
-defm V_CMPX_NLE_F32 : VOPCX_Real_gfx11<0x09c>;
-defm V_CMPX_NEQ_F32 : VOPCX_Real_gfx11<0x09d>;
-defm V_CMPX_NLT_F32 : VOPCX_Real_gfx11<0x09e>;
+defm V_CMPX_LT_F32 : VOPCX_Real_gfx11_gfx12<0x091>;
+defm V_CMPX_EQ_F32 : VOPCX_Real_gfx11_gfx12<0x092>;
+defm V_CMPX_LE_F32 : VOPCX_Real_gfx11_gfx12<0x093>;
+defm V_CMPX_GT_F32 : VOPCX_Real_gfx11_gfx12<0x094>;
+defm V_CMPX_LG_F32 : VOPCX_Real_gfx11_gfx12<0x095>;
+defm V_CMPX_GE_F32 : VOPCX_Real_gfx11_gfx12<0x096>;
+defm V_CMPX_O_F32 : VOPCX_Real_gfx11_gfx12<0x097>;
+defm V_CMPX_U_F32 : VOPCX_Real_gfx11_gfx12<0x098>;
+defm V_CMPX_NGE_F32 : VOPCX_Real_gfx11_gfx12<0x099>;
+defm V_CMPX_NLG_F32 : VOPCX_Real_gfx11_gfx12<0x09a>;
+defm V_CMPX_NGT_F32 : VOPCX_Real_gfx11_gfx12<0x09b>;
+defm V_CMPX_NLE_F32 : VOPCX_Real_gfx11_gfx12<0x09c>;
+defm V_CMPX_NEQ_F32 : VOPCX_Real_gfx11_gfx12<0x09d>;
+defm V_CMPX_NLT_F32 : VOPCX_Real_gfx11_gfx12<0x09e>;
defm V_CMPX_T_F32 : VOPCX_Real_with_name_gfx11<0x09f, "V_CMPX_TRU_F32", "v_cmpx_t_f32">;
defm V_CMPX_F_F64 : VOPCX_Real_gfx11<0x0a0>;
-defm V_CMPX_LT_F64 : VOPCX_Real_gfx11<0x0a1>;
-defm V_CMPX_EQ_F64 : VOPCX_Real_gfx11<0x0a2>;
-defm V_CMPX_LE_F64 : VOPCX_Real_gfx11<0x0a3>;
-defm V_CMPX_GT_F64 : VOPCX_Real_gfx11<0x0a4>;
-defm V_CMPX_LG_F64 : VOPCX_Real_gfx11<0x0a5>;
-defm V_CMPX_GE_F64 : VOPCX_Real_gfx11<0x0a6>;
-defm V_CMPX_O_F64 : VOPCX_Real_gfx11<0x0a7>;
-defm V_CMPX_U_F64 : VOPCX_Real_gfx11<0x0a8>;
-defm V_CMPX_NGE_F64 : VOPCX_Real_gfx11<0x0a9>;
-defm V_CMPX_NLG_F64 : VOPCX_Real_gfx11<0x0aa>;
-defm V_CMPX_NGT_F64 : VOPCX_Real_gfx11<0x0ab>;
-defm V_CMPX_NLE_F64 : VOPCX_Real_gfx11<0x0ac>;
-defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx11<0x0ad>;
-defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11<0x0ae>;
+defm V_CMPX_LT_F64 : VOPCX_Real_gfx11_gfx12<0x0a1>;
+defm V_CMPX_EQ_F64 : VOPCX_Real_gfx11_gfx12<0x0a2>;
+defm V_CMPX_LE_F64 : VOPCX_Real_gfx11_gfx12<0x0a3>;
+defm V_CMPX_GT_F64 : VOPCX_Real_gfx11_gfx12<0x0a4>;
+defm V_CMPX_LG_F64 : VOPCX_Real_gfx11_gfx12<0x0a5>;
+defm V_CMPX_GE_F64 : VOPCX_Real_gfx11_gfx12<0x0a6>;
+defm V_CMPX_O_F64 : VOPCX_Real_gfx11_gfx12<0x0a7>;
+defm V_CMPX_U_F64 : VOPCX_Real_gfx11_gfx12<0x0a8>;
+defm V_CMPX_NGE_F64 : VOPCX_Real_gfx11_gfx12<0x0a9>;
+defm V_CMPX_NLG_F64 : VOPCX_Real_gfx11_gfx12<0x0aa>;
+defm V_CMPX_NGT_F64 : VOPCX_Real_gfx11_gfx12<0x0ab>;
+defm V_CMPX_NLE_F64 : VOPCX_Real_gfx11_gfx12<0x0ac>;
+defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx11_gfx12<0x0ad>;
+defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11_gfx12<0x0ae>;
defm V_CMPX_T_F64 : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">;
-defm V_CMPX_LT_I16_t16 : VOPCX_Real_t16_gfx11<0x0b1, "v_cmpx_lt_i16">;
-defm V_CMPX_EQ_I16_t16 : VOPCX_Real_t16_gfx11<0x0b2, "v_cmpx_eq_i16">;
-defm V_CMPX_LE_I16_t16 : VOPCX_Real_t16_gfx11<0x0b3, "v_cmpx_le_i16">;
-defm V_CMPX_GT_I16_t16 : VOPCX_Real_t16_gfx11<0x0b4, "v_cmpx_gt_i16">;
-defm V_CMPX_NE_I16_t16 : VOPCX_Real_t16_gfx11<0x0b5, "v_cmpx_ne_i16">;
-defm V_CMPX_GE_I16_t16 : VOPCX_Real_t16_gfx11<0x0b6, "v_cmpx_ge_i16">;
-defm V_CMPX_LT_U16_t16 : VOPCX_Real_t16_gfx11<0x0b9, "v_cmpx_lt_u16">;
-defm V_CMPX_EQ_U16_t16 : VOPCX_Real_t16_gfx11<0x0ba, "v_cmpx_eq_u16">;
-defm V_CMPX_LE_U16_t16 : VOPCX_Real_t16_gfx11<0x0bb, "v_cmpx_le_u16">;
-defm V_CMPX_GT_U16_t16 : VOPCX_Real_t16_gfx11<0x0bc, "v_cmpx_gt_u16">;
-defm V_CMPX_NE_U16_t16 : VOPCX_Real_t16_gfx11<0x0bd, "v_cmpx_ne_u16">;
-defm V_CMPX_GE_U16_t16 : VOPCX_Real_t16_gfx11<0x0be, "v_cmpx_ge_u16">;
+defm V_CMPX_LT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
+defm V_CMPX_EQ_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
+defm V_CMPX_LE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
+defm V_CMPX_GT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b4, "v_cmpx_gt_i16">;
+defm V_CMPX_NE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b5, "v_cmpx_ne_i16">;
+defm V_CMPX_GE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b6, "v_cmpx_ge_i16">;
+defm V_CMPX_LT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b9, "v_cmpx_lt_u16">;
+defm V_CMPX_EQ_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0ba, "v_cmpx_eq_u16">;
+defm V_CMPX_LE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bb, "v_cmpx_le_u16">;
+defm V_CMPX_GT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bc, "v_cmpx_gt_u16">;
+defm V_CMPX_NE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bd, "v_cmpx_ne_u16">;
+defm V_CMPX_GE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0be, "v_cmpx_ge_u16">;
defm V_CMPX_F_I32 : VOPCX_Real_gfx11<0x0c0>;
-defm V_CMPX_LT_I32 : VOPCX_Real_gfx11<0x0c1>;
-defm V_CMPX_EQ_I32 : VOPCX_Real_gfx11<0x0c2>;
-defm V_CMPX_LE_I32 : VOPCX_Real_gfx11<0x0c3>;
-defm V_CMPX_GT_I32 : VOPCX_Real_gfx11<0x0c4>;
-defm V_CMPX_NE_I32 : VOPCX_Real_gfx11<0x0c5>;
-defm V_CMPX_GE_I32 : VOPCX_Real_gfx11<0x0c6>;
+defm V_CMPX_LT_I32 : VOPCX_Real_gfx11_gfx12<0x0c1>;
+defm V_CMPX_EQ_I32 : VOPCX_Real_gfx11_gfx12<0x0c2>;
+defm V_CMPX_LE_I32 : VOPCX_Real_gfx11_gfx12<0x0c3>;
+defm V_CMPX_GT_I32 : VOPCX_Real_gfx11_gfx12<0x0c4>;
+defm V_CMPX_NE_I32 : VOPCX_Real_gfx11_gfx12<0x0c5>;
+defm V_CMPX_GE_I32 : VOPCX_Real_gfx11_gfx12<0x0c6>;
defm V_CMPX_T_I32 : VOPCX_Real_gfx11<0x0c7>;
defm V_CMPX_F_U32 : VOPCX_Real_gfx11<0x0c8>;
-defm V_CMPX_LT_U32 : VOPCX_Real_gfx11<0x0c9>;
-defm V_CMPX_EQ_U32 : VOPCX_Real_gfx11<0x0ca>;
-defm V_CMPX_LE_U32 : VOPCX_Real_gfx11<0x0cb>;
-defm V_CMPX_GT_U32 : VOPCX_Real_gfx11<0x0cc>;
-defm V_CMPX_NE_U32 : VOPCX_Real_gfx11<0x0cd>;
-defm V_CMPX_GE_U32 : VOPCX_Real_gfx11<0x0ce>;
+defm V_CMPX_LT_U32 : VOPCX_Real_gfx11_gfx12<0x0c9>;
+defm V_CMPX_EQ_U32 : VOPCX_Real_gfx11_gfx12<0x0ca>;
+defm V_CMPX_LE_U32 : VOPCX_Real_gfx11_gfx12<0x0cb>;
+defm V_CMPX_GT_U32 : VOPCX_Real_gfx11_gfx12<0x0cc>;
+defm V_CMPX_NE_U32 : VOPCX_Real_gfx11_gfx12<0x0cd>;
+defm V_CMPX_GE_U32 : VOPCX_Real_gfx11_gfx12<0x0ce>;
defm V_CMPX_T_U32 : VOPCX_Real_gfx11<0x0cf>;
defm V_CMPX_F_I64 : VOPCX_Real_gfx11<0x0d0>;
-defm V_CMPX_LT_I64 : VOPCX_Real_gfx11<0x0d1>;
-defm V_CMPX_EQ_I64 : VOPCX_Real_gfx11<0x0d2>;
-defm V_CMPX_LE_I64 : VOPCX_Real_gfx11<0x0d3>;
-defm V_CMPX_GT_I64 : VOPCX_Real_gfx11<0x0d4>;
-defm V_CMPX_NE_I64 : VOPCX_Real_gfx11<0x0d5>;
-defm V_CMPX_GE_I64 : VOPCX_Real_gfx11<0x0d6>;
+defm V_CMPX_LT_I64 : VOPCX_Real_gfx11_gfx12<0x0d1>;
+defm V_CMPX_EQ_I64 : VOPCX_Real_gfx11_gfx12<0x0d2>;
+defm V_CMPX_LE_I64 : VOPCX_Real_gfx11_gfx12<0x0d3>;
+defm V_CMPX_GT_I64 : VOPCX_Real_gfx11_gfx12<0x0d4>;
+defm V_CMPX_NE_I64 : VOPCX_Real_gfx11_gfx12<0x0d5>;
+defm V_CMPX_GE_I64 : VOPCX_Real_gfx11_gfx12<0x0d6>;
defm V_CMPX_T_I64 : VOPCX_Real_gfx11<0x0d7>;
defm V_CMPX_F_U64 : VOPCX_Real_gfx11<0x0d8>;
-defm V_CMPX_LT_U64 : VOPCX_Real_gfx11<0x0d9>;
-defm V_CMPX_EQ_U64 : VOPCX_Real_gfx11<0x0da>;
-defm V_CMPX_LE_U64 : VOPCX_Real_gfx11<0x0db>;
-defm V_CMPX_GT_U64 : VOPCX_Real_gfx11<0x0dc>;
-defm V_CMPX_NE_U64 : VOPCX_Real_gfx11<0x0dd>;
-defm V_CMPX_GE_U64 : VOPCX_Real_gfx11<0x0de>;
+defm V_CMPX_LT_U64 : VOPCX_Real_gfx11_gfx12<0x0d9>;
+defm V_CMPX_EQ_U64 : VOPCX_Real_gfx11_gfx12<0x0da>;
+defm V_CMPX_LE_U64 : VOPCX_Real_gfx11_gfx12<0x0db>;
+defm V_CMPX_GT_U64 : VOPCX_Real_gfx11_gfx12<0x0dc>;
+defm V_CMPX_NE_U64 : VOPCX_Real_gfx11_gfx12<0x0dd>;
+defm V_CMPX_GE_U64 : VOPCX_Real_gfx11_gfx12<0x0de>;
defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>;
-defm V_CMPX_CLASS_F16_t16 : VOPCX_Real_t16_gfx11<0x0fd, "v_cmpx_class_f16">;
-defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11<0x0fe>;
-defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11<0x0ff>;
+defm V_CMPX_CLASS_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
+defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11_gfx12<0x0fe>;
+defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11_gfx12<0x0ff>;
//===----------------------------------------------------------------------===//
// GFX10.
@@ -1968,10 +2016,13 @@ multiclass VOPCX_Real_gfx6_gfx7_gfx10 <bits<9> op> :
VOPC_Real_gfx6_gfx7<op>, VOPCX_Real_gfx10<op>;
multiclass VOPC_Real_gfx6_gfx7_gfx10_gfx11<bits<9> op> :
- VOPC_Real_gfx6_gfx7_gfx10<op>, VOPC_Real_gfx11<op>;
+ VOPC_Real_gfx6_gfx7_gfx10<op>, VOPC_Real_Base<GFX11Gen, op>;
multiclass VOPCX_Real_gfx6_gfx7_gfx10_gfx11<bits<9> op> :
- VOPCX_Real_gfx6_gfx7_gfx10<op>, VOPCX_Real_gfx11<op>;
+ VOPCX_Real_gfx6_gfx7_gfx10<op>, VOPCX_Real<GFX11Gen, op>;
+
+multiclass VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<9> op> :
+ VOPC_Real_gfx6_gfx7_gfx10_gfx11<op>, VOPC_Real_Base<GFX12Gen, op>;
defm V_CMP_F_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x000>;
defm V_CMP_LT_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x001>;
@@ -2006,20 +2057,20 @@ defm V_CMPX_NEQ_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01d>;
defm V_CMPX_NLT_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01e>;
defm V_CMPX_TRU_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01f>;
defm V_CMP_F_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x020>;
-defm V_CMP_LT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x021>;
-defm V_CMP_EQ_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x022>;
-defm V_CMP_LE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x023>;
-defm V_CMP_GT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x024>;
-defm V_CMP_LG_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x025>;
-defm V_CMP_GE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x026>;
-defm V_CMP_O_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x027>;
-defm V_CMP_U_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x028>;
-defm V_CMP_NGE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x029>;
-defm V_CMP_NLG_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02a>;
-defm V_CMP_NGT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02b>;
-defm V_CMP_NLE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02c>;
-defm V_CMP_NEQ_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02d>;
-defm V_CMP_NLT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02e>;
+defm V_CMP_LT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x021>;
+defm V_CMP_EQ_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x022>;
+defm V_CMP_LE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x023>;
+defm V_CMP_GT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x024>;
+defm V_CMP_LG_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x025>;
+defm V_CMP_GE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x026>;
+defm V_CMP_O_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x027>;
+defm V_CMP_U_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x028>;
+defm V_CMP_NGE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x029>;
+defm V_CMP_NLG_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02a>;
+defm V_CMP_NGT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02b>;
+defm V_CMP_NLE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02c>;
+defm V_CMP_NEQ_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02d>;
+defm V_CMP_NLT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02e>;
defm V_CMP_TRU_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02f>;
defm V_CMPX_F_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x030>;
defm V_CMPX_LT_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x031>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPDInstructions.td
index eb2e9f04022e..c6af3d67c560 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPDInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPDInstructions.td
@@ -54,23 +54,34 @@ class VOPD_MADKe<bits<4> opX, bits<5> opY> : Enc96 {
// VOPD classes
//===----------------------------------------------------------------------===//
+
+class GFXGenD<GFXGen Gen, list<string> DXPseudos, list<string> DYPseudos,
+ Predicate subtargetPred = Gen.AssemblerPredicate> :
+ GFXGen<Gen.AssemblerPredicate, Gen.DecoderNamespace, Gen.Suffix,
+ Gen.Subtarget> {
+ list<string> VOPDXPseudos = DXPseudos;
+ list<string> VOPDYPseudos = DYPseudos;
+ Predicate SubtargetPredicate = subtargetPred;
+}
+
class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
- VOPD_Component XasVC, VOPD_Component YasVC>
+ VOPD_Component XasVC, VOPD_Component YasVC, GFXGenD Gen>
: VOPAnyCommon<outs, ins, asm, []>,
VOP<NAME>,
- SIMCInstr<NAME, SIEncodingFamily.GFX11> {
+ SIMCInstr<NAME, Gen.Subtarget> {
// Fields for table indexing
Instruction Opcode = !cast<Instruction>(NAME);
bits<5> OpX = XasVC.VOPDOp;
bits<5> OpY = YasVC.VOPDOp;
+ bits<4> SubTgt = Gen.Subtarget;
let VALU = 1;
- let DecoderNamespace = "GFX11";
- let AssemblerPredicate = isGFX11Plus;
+ let DecoderNamespace = Gen.DecoderNamespace;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
let WaveSizePredicate = isWave32;
let isCodeGenOnly = 0;
- let SubtargetPredicate = isGFX11Plus;
+ let SubtargetPredicate = Gen.SubtargetPredicate;
let AsmMatchConverter = "cvtVOPD";
let Size = 8;
let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
@@ -97,77 +108,103 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
}
class VOPD<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
- VOPD_Component XasVC, VOPD_Component YasVC>
- : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
+ VOPD_Component XasVC, VOPD_Component YasVC, GFXGenD Gen>
+ : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC, Gen>,
VOPDe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
}
class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
- VOPD_Component XasVC, VOPD_Component YasVC>
- : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
+ VOPD_Component XasVC, VOPD_Component YasVC, GFXGenD Gen>
+ : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC, Gen>,
VOPD_MADKe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
let Size = 12;
+ let FixedSize = 1;
}
// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is
-// not. Since we generate the DUAL form by converting from the normal form we
-// will never generate it.
-defvar VOPDYPseudos = [
+// not. V_DUAL_DOT2C_F32_BF16 is a legal instruction on GFX12, but
+// V_DOT2C_F32_F16_e32 is not. Since we generate the DUAL form by converting
+// from the normal form we will never generate them.
+defvar VOPDPseudosCommon = [
"V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
"V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
- "V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32",
- "V_DOT2C_F32_F16_e32", "V_ADD_U32_e32", "V_LSHLREV_B32_e32", "V_AND_B32_e32"
+ "V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32"
];
-defvar VOPDXPseudos = VOPDYPseudos[0...VOPDX_Max_Index];
+defvar VOPDPseudosGFX11 = ["V_DOT2C_F32_F16_e32"];
+defvar VOPDYOnlyPseudosCommon = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32",
+ "V_AND_B32_e32"];
+
+defvar VOPDXPseudosGFX11 = !listconcat(VOPDPseudosCommon, VOPDPseudosGFX11);
+defvar VOPDXPseudosGFX12 = VOPDPseudosCommon;
+defvar VOPDYPseudosGFX11 = !listconcat(VOPDXPseudosGFX11, VOPDYOnlyPseudosCommon);
+defvar VOPDYPseudosGFX12 = !listconcat(VOPDXPseudosGFX12, VOPDYOnlyPseudosCommon);
+
+def GFX11GenD : GFXGenD<GFX11Gen, VOPDXPseudosGFX11, VOPDYPseudosGFX11>;
+def GFX12GenD : GFXGenD<GFX12Gen, VOPDXPseudosGFX12, VOPDYPseudosGFX12>;
+
def VOPDDstYOperand : RegisterOperand<VGPR_32, "printRegularOperand"> {
let DecoderMethod = "decodeOperandVOPDDstY";
}
-foreach x = VOPDXPseudos in {
- foreach y = VOPDYPseudos in {
- defvar xInst = !cast<VOP_Pseudo>(x);
- defvar yInst = !cast<VOP_Pseudo>(y);
- defvar XasVC = !cast<VOPD_Component>(x);
- defvar YasVC = !cast<VOPD_Component>(y);
- defvar isMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"),
- !eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
- // If X or Y is MADK (have a mandatory immediate), all src operands which
- // may contain an optional literal must use the VSrc_*_Deferred operand
- // type. Optional literal operands in MADK VOPD components always use this
- // operand form. If Both X and Y are MADK, the mandatory literal of X
- // additionally must use an alternate operand format which defers to the
- // 'real' Y literal
- defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"));
- defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
- defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2);
- defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY);
- if !or(isOpXMADK, isOpYMADK) then {
- if !and(isOpXMADK, isOpYMADK) then {
- defvar X_MADK_Pfl = !cast<VOP_MADK_Base>(xInst.Pfl);
- defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
- defvar asm = XasVC.VOPDName #" "# X_MADK_Pfl.AsmVOPDXDeferred #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
- def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
- } else {
- defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
- if isOpXMADK then {
- assert !not(isOpYMADK), "Expected only OpX as MADK";
- defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDYDeferred);
- def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
- } else {
- assert !not(isOpXMADK), "Expected only OpY as MADK";
+class getRenamed<string VOPDName, GFXGen Gen> {
+ string ret = !if(!eq(Gen.Subtarget, GFX12Gen.Subtarget),
+ !if(!eq(VOPDName, "v_dual_max_f32"),
+ "v_dual_max_num_f32",
+ !if(!eq(VOPDName, "v_dual_min_f32"),
+ "v_dual_min_num_f32",
+ VOPDName)),
+ VOPDName);
+}
+
+foreach Gen = [GFX11GenD, GFX12GenD] in {
+ foreach x = Gen.VOPDXPseudos in {
+ foreach y = Gen.VOPDYPseudos in {
+ defvar xInst = !cast<VOP_Pseudo>(x);
+ defvar yInst = !cast<VOP_Pseudo>(y);
+ defvar XasVC = !cast<VOPD_Component>(x);
+ defvar YasVC = !cast<VOPD_Component>(y);
+ defvar xAsmName = getRenamed<XasVC.VOPDName, Gen>.ret;
+ defvar yAsmName = getRenamed<YasVC.VOPDName, Gen>.ret;
+ defvar isMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"),
+ !eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
+ // If X or Y is MADK (have a mandatory immediate), all src operands which
+ // may contain an optional literal must use the VSrc_*_Deferred operand
+ // type. Optional literal operands in MADK VOPD components always use this
+ // operand form. If Both X and Y are MADK, the mandatory literal of X
+ // additionally must use an alternate operand format which defers to the
+ // 'real' Y literal
+ defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"));
+ defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
+ defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2) # Gen.Suffix;
+ defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY);
+ if !or(isOpXMADK, isOpYMADK) then {
+ if !and(isOpXMADK, isOpYMADK) then {
+ defvar X_MADK_Pfl = !cast<VOP_MADK_Base>(xInst.Pfl);
defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
- def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
+ defvar asm = xAsmName #" "# X_MADK_Pfl.AsmVOPDXDeferred #" :: "# yAsmName #" "# yInst.Pfl.AsmVOPDY;
+ def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC, Gen>;
+ } else {
+ defvar asm = xAsmName #" "# xInst.Pfl.AsmVOPDX #" :: "# yAsmName #" "# yInst.Pfl.AsmVOPDY;
+ if isOpXMADK then {
+ assert !not(isOpYMADK), "Expected only OpX as MADK";
+ defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDYDeferred);
+ def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC, Gen>;
+ } else {
+ assert !not(isOpXMADK), "Expected only OpY as MADK";
+ defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
+ def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC, Gen>;
+ }
}
+ } else {
+ defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDY);
+ defvar asm = xAsmName #" "# xInst.Pfl.AsmVOPDX #" :: "# yAsmName #" "# yInst.Pfl.AsmVOPDY;
+ def OpName : VOPD<outs, ins, asm, xInst, yInst, XasVC, YasVC, Gen>;
}
- } else {
- defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDY);
- defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
- def OpName : VOPD<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 3755daf4f9b1..fd4626d902ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -29,6 +29,22 @@ class LetDummies {
string DecoderNamespace;
}
+//===----------------------------------------------------------------------===//
+// VOP Subtarget info
+//===----------------------------------------------------------------------===//
+
+class GFXGen<Predicate pred, string dn, string suffix, int sub> {
+ Predicate AssemblerPredicate = pred;
+ string DecoderNamespace = dn;
+ string Suffix = suffix;
+ int Subtarget = sub;
+}
+
+def GFX12Gen : GFXGen<isGFX12Only, "GFX12", "_gfx12", SIEncodingFamily.GFX12>;
+def GFX11Gen : GFXGen<isGFX11Only, "GFX11", "_gfx11", SIEncodingFamily.GFX11>;
+
+//===----------------------------------------------------------------------===//
+
class VOP <string opName> {
string OpName = opName;
}
@@ -190,6 +206,14 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni
VOPProfile Pfl = ps.Pfl;
}
+class VOP3_Real_Gen <VOP_Pseudo ps, GFXGen Gen, string asm_name = ps.Mnemonic> :
+ VOP3_Real <ps, Gen.Subtarget, asm_name> {
+ let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
+ Gen.AssemblerPredicate);
+ let DecoderNamespace = Gen.DecoderNamespace#
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+}
+
// XXX - Is there any reason to distinguish this from regular VOP3
// here?
class VOP3P_Real<VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemonic> :
@@ -199,6 +223,12 @@ class VOP3P_Real<VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni
let Constraints = !if(!eq(!substr(ps.Mnemonic,0,6), "v_wmma"), "", ps.Constraints);
}
+class VOP3P_Real_Gen<VOP_Pseudo ps, GFXGen Gen, string asm_name = ps.Mnemonic> :
+ VOP3P_Real<ps, Gen.Subtarget, asm_name> {
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = Gen.DecoderNamespace;
+}
+
class VOP3a<VOPProfile P> : Enc64 {
bits<4> src0_modifiers;
bits<9> src0;
@@ -234,7 +264,7 @@ class VOP3a_gfx10<bits<10> op, VOPProfile p> : VOP3a<p> {
let Inst{31-26} = 0x35;
}
-class VOP3a_gfx11<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p>;
+class VOP3a_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p>;
class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
let Inst{25-16} = op;
@@ -251,7 +281,7 @@ class VOP3e_gfx10<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p> {
let Inst{7-0} = !if(p.EmitDst, vdst{7-0}, 0);
}
-class VOP3e_gfx11<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p>;
+class VOP3e_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p>;
class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
bits<8> vdst;
@@ -272,9 +302,9 @@ class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0);
}
-class VOP3OpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx10<op, p>;
+class VOP3OpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx10<op, p>;
-class VOP3DotOpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11<op, p>{
+class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11_gfx12<op, p>{
let Inst{11} = ?;
let Inst{12} = ?;
}
@@ -435,7 +465,7 @@ class VOP3Pe_gfx10 <bits<7> op, VOPProfile P> : VOP3Pe<op, P> {
let Inst{31-23} = 0x198; //encoding
}
-class VOP3Pe_gfx11<bits<7> op, VOPProfile P> : VOP3Pe_gfx10<op, P>;
+class VOP3Pe_gfx11_gfx12<bits<7> op, VOPProfile P> : VOP3Pe_gfx10<op, P>;
class VOP3be_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3be<p> {
let Inst{25-17} = op;
@@ -448,7 +478,7 @@ class VOP3be_gfx10<bits<10> op, VOPProfile p> : VOP3be<p> {
let Inst{31-26} = 0x35;
}
-class VOP3be_gfx11<bits<10> op, VOPProfile p> : VOP3be_gfx10<op, p>;
+class VOP3be_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3be_gfx10<op, p>;
class VOP3be_vi <bits<10> op, VOPProfile P> : VOP3be<P> {
bits<1> clamp;
@@ -791,8 +821,8 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
string AsmOperands = asmOps;
let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", "");
- let SubtargetPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP);
- let AssemblerPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP);
+ let SubtargetPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP);
+ let AssemblerPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP);
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
@@ -862,8 +892,8 @@ class VOP_DPP_Base <string OpName, VOPProfile P,
let Size = 8;
let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", "");
- let SubtargetPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP);
- let AssemblerPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP);
+ let SubtargetPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP);
+ let AssemblerPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP);
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
@@ -1273,6 +1303,19 @@ multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_f
} // end SubtargetPredicate = isGFX11Plus
}
+class UniformUnaryFragOrOp<SDPatternOperator Op> {
+ SDPatternOperator ret = !if(!or(!isa<SDNode>(Op), !isa<PatFrags>(Op)),
+ UniformUnaryFrag<Op>, Op);
+}
+
+multiclass VOP3PseudoScalarInst<string OpName, VOPProfile P,
+ SDPatternOperator node = null_frag> {
+ def _e64 : VOP3_Pseudo<OpName, P, [(set P.DstVT:$vdst,
+ (UniformUnaryFragOrOp<node>.ret
+ (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp,
+ i32:$omod))))]>;
+}
+
//===----------------------------------------------------------------------===//
// VOP3 DPP
//===----------------------------------------------------------------------===//
@@ -1294,6 +1337,15 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
string opName = ps.OpName>
: Base_VOP3_DPP16<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
+class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
+ string opName = ps.OpName> :
+ VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
+ let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
+ Gen.AssemblerPredicate);
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace#
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+}
+
class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOP3_DPP8<op, opName, ps.Pfl> {
let VOP3_OPSEL = ps.Pfl.HasOpSel;
@@ -1320,164 +1372,240 @@ class VOP3b_DPP8_Base<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
}
//===----------------------------------------------------------------------===//
-// VOP3 GFX11
+// VOP3 GFX11, GFX12
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX11Only,
- DecoderNamespace = "GFX11" in {
- multiclass VOP3_Real_Base_gfx11<bits<10> op, string opName = NAME,
- bit isSingle = 0> {
- defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
- let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
- if ps.Pfl.HasOpSel then
- def _e64_gfx11 :
- VOP3_Real<ps, SIEncodingFamily.GFX11>,
- VOP3OpSel_gfx11<op, ps.Pfl>;
- if !not(ps.Pfl.HasOpSel) then
- def _e64_gfx11 :
- VOP3_Real<ps, SIEncodingFamily.GFX11>,
- VOP3e_gfx11<op, ps.Pfl>;
- }
- }
- multiclass VOP3Dot_Real_Base_gfx11<bits<10> op, string opName = NAME,
- bit isSingle = 0> {
- defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
- let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
- def _e64_gfx11 :
- VOP3_Real<ps, SIEncodingFamily.GFX11>,
- VOP3DotOpSel_gfx11<op, ps.Pfl>;
- }
- }
- multiclass VOP3_Real_with_name_gfx11<bits<10> op, string opName,
- string asmName, bit isSingle = 0> {
- defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
- let AsmString = asmName # ps.AsmOperands,
- IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
- if ps.Pfl.HasOpSel then
- def _e64_gfx11 :
- VOP3_Real<ps, SIEncodingFamily.GFX11>,
- VOP3OpSel_gfx11<op, ps.Pfl>;
- if !not(ps.Pfl.HasOpSel) then
- def _e64_gfx11 :
- VOP3_Real<ps, SIEncodingFamily.GFX11>,
- VOP3e_gfx11<op, ps.Pfl>;
- }
- def _gfx11_VOP3_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>, LetDummies;
- }
- // for READLANE/WRITELANE
- multiclass VOP3_Real_No_Suffix_gfx11<bits<10> op, string opName = NAME> {
- defvar ps = !cast<VOP_Pseudo>(opName);
- def _e64_gfx11 :
- VOP3_Real<ps, SIEncodingFamily.GFX11>,
- VOP3e_gfx11<op, ps.Pfl>;
- }
- multiclass VOP3_Real_dpp_Base_gfx11<bits<10> op, string opName = NAME> {
- def _e64_dpp_gfx11 : VOP3_DPP16<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), SIEncodingFamily.GFX11> {
- let DecoderNamespace = "DPPGFX11";
- }
+multiclass VOP3_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
+ bit isSingle = 0> {
+ defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
+ let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+ if ps.Pfl.HasOpSel then
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3OpSel_gfx11_gfx12<op, ps.Pfl>;
+ if !not(ps.Pfl.HasOpSel) then
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3e_gfx11_gfx12<op, ps.Pfl>;
}
+}
- multiclass VOP3Dot_Real_dpp_Base_gfx11<bits<10> op, string opName = NAME> {
- def _e64_dpp_gfx11 : VOP3_DPP16<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), SIEncodingFamily.GFX11> {
- let Inst{11} = ?;
- let Inst{12} = ?;
- let DecoderNamespace = "DPPGFX11";
- }
+multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
+ bit isSingle = 0> {
+ defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
+ let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3DotOpSel_gfx11_gfx12<op, ps.Pfl>;
}
+}
- multiclass VOP3_Real_dpp_with_name_gfx11<bits<10> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
- let AsmString = asmName # ps.Pfl.AsmVOP3DPP16, DecoderNamespace = "DPPGFX11" in {
- defm NAME : VOP3_Real_dpp_Base_gfx11<op, opName>;
- }
- }
- multiclass VOP3_Real_dpp8_Base_gfx11<bits<10> op, string opName = NAME> {
- defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
- def _e64_dpp8_gfx11 : Base_VOP3_DPP8<op, ps> {
- let DecoderNamespace = "DPP8GFX11";
- }
+multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName,
+ string asmName, bit isSingle = 0> {
+ defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
+ let AsmString = asmName # ps.AsmOperands,
+ IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+ if ps.Pfl.HasOpSel then
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3OpSel_gfx11_gfx12<op, ps.Pfl>;
+ if !not(ps.Pfl.HasOpSel) then
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3e_gfx11_gfx12<op, ps.Pfl>;
}
+ def Gen.Suffix#"_VOP3_alias" : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[Gen.AssemblerPredicate]>, LetDummies;
+}
+
+// for READLANE/WRITELANE
+multiclass VOP3_Real_No_Suffix<GFXGen Gen, bits<10> op, string opName = NAME> {
+ defvar ps = !cast<VOP_Pseudo>(opName);
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3e_gfx11_gfx12<op, ps.Pfl>;
+}
+
+multiclass VOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
+ def _e64_dpp#Gen.Suffix :
+ VOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), Gen>;
+}
- multiclass VOP3Dot_Real_dpp8_Base_gfx11<bits<10> op, string opName = NAME> {
- defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
- def _e64_dpp8_gfx11 : Base_VOP3_DPP8<op, ps> {
+multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
+ def _e64_dpp#Gen.Suffix :
+ VOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), Gen> {
let Inst{11} = ?;
let Inst{12} = ?;
- let DecoderNamespace = "DPP8GFX11";
}
+}
+
+multiclass VOP3_Real_dpp_with_name<GFXGen Gen, bits<10> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let AsmString = asmName # ps.Pfl.AsmVOP3DPP16 in {
+ defm NAME : VOP3_Real_dpp_Base<Gen, op, opName>;
}
+}
- multiclass VOP3_Real_dpp8_with_name_gfx11<bits<10> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
- let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8GFX11" in {
- defm NAME : VOP3_Real_dpp8_Base_gfx11<op, opName>;
- }
+multiclass VOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
}
- multiclass VOP3be_Real_gfx11<bits<10> op, string opName, string asmName,
- bit isSingle = 0> {
- defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
- let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in
- def _e64_gfx11 :
- VOP3_Real<ps, SIEncodingFamily.GFX11, asmName>,
- VOP3be_gfx11<op, ps.Pfl> ;
+}
+
+multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
+ let Inst{11} = ?;
+ let Inst{12} = ?;
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
}
- multiclass VOP3be_Real_dpp_gfx11<bits<10> op, string opName, string asmName> {
- defvar ps = !cast<VOP3_Pseudo>(opName #"_e64");
- defvar dpp_ps = !cast<VOP_DPP_Pseudo>(opName #"_e64" #"_dpp");
- def _e64_dpp_gfx11 : Base_VOP3b_DPP16<op, dpp_ps, asmName>,
- SIMCInstr<dpp_ps.PseudoInstr, SIEncodingFamily.GFX11> {
- let DecoderNamespace = "DPPGFX11";
- }
+}
+
+multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
+ DecoderNamespace = "DPP8"#Gen.DecoderNamespace#
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
+ AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
+ Gen.AssemblerPredicate) in {
+
+ defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>;
}
- multiclass VOP3be_Real_dpp8_gfx11<bits<10> op, string opName, string asmName> {
- defvar ps = !cast<VOP3_Pseudo>(opName #"_e64");
- def _e64_dpp8_gfx11 : VOP3b_DPP8_Base<op, ps, asmName> {
- let DecoderNamespace = "DPP8GFX11";
- }
+}
+
+multiclass VOP3be_Real<GFXGen Gen, bits<10> op, string opName, string asmName,
+ bit isSingle = 0> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen, asmName>,
+ VOP3be_gfx11_gfx12<op, ps.Pfl> ;
+}
+
+multiclass VOP3be_Real_dpp<GFXGen Gen, bits<10> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName #"_e64");
+ defvar dpp_ps = !cast<VOP_DPP_Pseudo>(opName #"_e64" #"_dpp");
+ def _e64_dpp#Gen.Suffix : Base_VOP3b_DPP16<op, dpp_ps, asmName>,
+ SIMCInstr<dpp_ps.PseudoInstr, Gen.Subtarget> {
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ }
+}
+
+multiclass VOP3be_Real_dpp8<GFXGen Gen, bits<10> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName #"_e64");
+ def _e64_dpp8#Gen.Suffix : VOP3b_DPP8_Base<op, ps, asmName> {
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+ let AssemblerPredicate = Gen.AssemblerPredicate;
}
-} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
+}
// VOP1 and VOP2 depend on these triple defs
-multiclass VOP3_Realtriple_gfx11<bits<10> op,
- bit isSingle = 0, string opName = NAME> :
- VOP3_Real_Base_gfx11<op, opName, isSingle>,
- VOP3_Real_dpp_Base_gfx11<op, opName>,
- VOP3_Real_dpp8_Base_gfx11<op, opName>;
-
-multiclass VOP3Dot_Realtriple_gfx11<bits<10> op,
- bit isSingle = 0, string opName = NAME> :
- VOP3Dot_Real_Base_gfx11<op, opName, isSingle>,
- VOP3Dot_Real_dpp_Base_gfx11<op, opName>,
- VOP3Dot_Real_dpp8_Base_gfx11<op, opName>;
-
-multiclass VOP3Only_Realtriple_gfx11<bits<10> op> :
- VOP3_Realtriple_gfx11<op, 1>;
-
-multiclass VOP3_Realtriple_with_name_gfx11<bits<10> op, string opName,
- string asmName, bit isSingle = 0> :
- VOP3_Real_with_name_gfx11<op, opName, asmName, isSingle>,
- VOP3_Real_dpp_with_name_gfx11<op, opName, asmName>,
- VOP3_Real_dpp8_with_name_gfx11<op, opName, asmName>;
+multiclass VOP3_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
+ string opName = NAME> :
+ VOP3_Real_Base<Gen, op, opName, isSingle>,
+ VOP3_Real_dpp_Base<Gen, op, opName>,
+ VOP3_Real_dpp8_Base<Gen, op, opName>;
+
+multiclass VOP3Dot_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
+ string opName = NAME> :
+ VOP3Dot_Real_Base<Gen, op, opName, isSingle>,
+ VOP3Dot_Real_dpp_Base<Gen, op, opName>,
+ VOP3Dot_Real_dpp8_Base<Gen, op, opName>;
+
+multiclass VOP3Only_Realtriple<GFXGen Gen, bits<10> op> :
+ VOP3_Realtriple<Gen, op, 1>;
+
+multiclass VOP3_Realtriple_with_name<GFXGen Gen, bits<10> op, string opName,
+ string asmName, bit isSingle = 0> :
+ VOP3_Real_with_name<Gen, op, opName, asmName, isSingle>,
+ VOP3_Real_dpp_with_name<Gen, op, opName, asmName>,
+ VOP3_Real_dpp8_with_name<Gen, op, opName, asmName>;
+
+multiclass VOP3Only_Realtriple_with_name<GFXGen Gen, bits<10> op, string opName,
+ string asmName> :
+ VOP3_Realtriple_with_name<Gen, op, opName, asmName, 1>;
+
+multiclass VOP3Only_Realtriple_t16<GFXGen Gen, bits<10> op, string asmName,
+ string opName = NAME>
+ : VOP3Only_Realtriple_with_name<Gen, op, opName, asmName>;
+
+multiclass VOP3be_Realtriple<
+ GFXGen Gen, bits<10> op, bit isSingle = 0, string opName = NAME,
+ string asmName = !cast<VOP_Pseudo>(opName#"_e64").Mnemonic> :
+ VOP3be_Real<Gen, op, opName, asmName, isSingle>,
+ VOP3be_Real_dpp<Gen, op, opName, asmName>,
+ VOP3be_Real_dpp8<Gen, op, opName, asmName>;
-multiclass VOP3Only_Realtriple_with_name_gfx11<bits<10> op, string opName,
- string asmName> :
- VOP3_Realtriple_with_name_gfx11<op, opName, asmName, 1>;
+multiclass VOP3beOnly_Realtriple<GFXGen Gen, bits<10> op> :
+ VOP3be_Realtriple<Gen, op, 1>;
+
+//===----------------------------------------------------------------------===//
+// VOP3 GFX11
+//===----------------------------------------------------------------------===//
+
+multiclass VOP3be_Real_gfx11<bits<10> op, string opName, string asmName,
+ bit isSingle = 0> :
+ VOP3be_Real<GFX11Gen, op, opName, asmName, isSingle>;
+
+multiclass VOP3_Real_Base_gfx11<bits<10> op, string opName = NAME,
+ bit isSingle = 0> :
+ VOP3_Real_Base<GFX11Gen, op, opName, isSingle>;
+
+multiclass VOP3_Realtriple_gfx11<bits<10> op, bit isSingle = 0,
+ string opName = NAME> :
+ VOP3_Realtriple<GFX11Gen, op, isSingle, opName>;
multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
string opName = NAME>
- : VOP3Only_Realtriple_with_name_gfx11<op, opName, asmName>;
+ : VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>;
-multiclass VOP3be_Realtriple_gfx11<
- bits<10> op, bit isSingle = 0, string opName = NAME,
- string asmName = !cast<VOP_Pseudo>(opName#"_e64").Mnemonic> :
- VOP3be_Real_gfx11<op, opName, asmName, isSingle>,
- VOP3be_Real_dpp_gfx11<op, opName, asmName>,
- VOP3be_Real_dpp8_gfx11<op, opName, asmName>;
+//===----------------------------------------------------------------------===//
+// VOP3 GFX12
+//===----------------------------------------------------------------------===//
+
+multiclass VOP3Only_Realtriple_gfx12<bits<10> op, bit isSingle = 0> :
+ VOP3_Realtriple<GFX12Gen, op, isSingle>;
+
+// IsSingle is captured from the vopprofile for these instructions, but the
+// following alternative is more explicit
+multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
+ VOP3_Real_Base<GFX12Gen, op, NAME, 1/*IsSingle*/>;
-multiclass VOP3beOnly_Realtriple_gfx11<bits<10> op> :
- VOP3be_Realtriple_gfx11<op, 1>;
+multiclass VOP3Only_Realtriple_t16_gfx12<bits<10> op> :
+ VOP3Only_Realtriple<GFX12Gen, op>;
+
+multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
+ string asmName, bit isSingle = 0> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let AsmString = asmName # ps.AsmOperands,
+ IsSingle = !or(isSingle, ps.Pfl.IsSingle) in
+ def _e64_gfx12 :
+ VOP3_Real_Gen<ps, GFX12Gen, asmName>,
+ VOP3be_gfx11_gfx12<op, ps.Pfl>,
+ MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX12Only]>;
+}
+
+multiclass VOP3_Realtriple_with_name_gfx12<bits<10> op, string opName,
+ string asmName, bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, isSingle>;
+
+multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
+ string asmName> :
+ VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
+ VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP3Only_Realtriple_with_name_t16_gfx12<bits<10> op, string asmName,
+ string opName = NAME>
+ : VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
+
+//===----------------------------------------------------------------------===//
include "VOPCInstructions.td"
include "VOP1Instructions.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARC.h b/contrib/llvm-project/llvm/lib/Target/ARC/ARC.h
index d8ccc47b89af..b81016d0cee4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARC.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARC.h
@@ -23,8 +23,7 @@ class ARCTargetMachine;
class FunctionPass;
class PassRegistry;
-FunctionPass *createARCISelDag(ARCTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+FunctionPass *createARCISelDag(ARCTargetMachine &TM, CodeGenOptLevel OptLevel);
FunctionPass *createARCExpandPseudosPass();
FunctionPass *createARCOptAddrMode();
FunctionPass *createARCBranchFinalizePass();
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp
index 2a66cf8fcd22..28e35f8f2a54 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp
@@ -45,7 +45,7 @@ public:
ARCDAGToDAGISel() = delete;
- ARCDAGToDAGISel(ARCTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ ARCDAGToDAGISel(ARCTargetMachine &TM, CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, TM, OptLevel) {}
void Select(SDNode *N) override;
@@ -69,7 +69,7 @@ INITIALIZE_PASS(ARCDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
/// This pass converts a legalized DAG into a ARC-specific DAG, ready for
/// instruction scheduling.
FunctionPass *llvm::createARCISelDag(ARCTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new ARCDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h
index fce4b6980450..ea82289022eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h
@@ -39,8 +39,6 @@ public:
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
- bool supportsBackwardScavenger() const override { return true; }
-
bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index 2527d6aad9ca..d4ae3255b32a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -32,7 +32,7 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T,
"e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-"
"f32:32:32-i64:32-f64:32-a:0:32-n32",
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.h
index 26d9111502d4..0fc4243ab44a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.h
@@ -29,7 +29,7 @@ public:
ARCTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~ARCTargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
index 2013bfd5d093..b96e01822985 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
@@ -17,7 +17,6 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/CodeGen.h"
#include <functional>
-#include <vector>
namespace llvm {
@@ -37,7 +36,7 @@ FunctionPass *createARMLowOverheadLoopsPass();
FunctionPass *createARMBlockPlacementPass();
Pass *createARMParallelDSPPass();
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
index 4bb20271d0f2..97d1444a553e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
@@ -1227,6 +1227,7 @@ include "ARMScheduleA57.td"
include "ARMScheduleM4.td"
include "ARMScheduleM55.td"
include "ARMScheduleM7.td"
+include "ARMScheduleM85.td"
//===----------------------------------------------------------------------===//
// ARM processors
@@ -1511,13 +1512,24 @@ def : ProcessorModel<"cortex-m55", CortexM55Model, [ARMv81mMainline,
HasMVEFloatOps,
FeatureFixCMSE_CVE_2021_35465]>;
-def : ProcessorModel<"cortex-m85", CortexM7Model, [ARMv81mMainline,
+def : ProcessorModel<"cortex-m85", CortexM85Model, [ARMv81mMainline,
FeatureDSP,
FeatureFPARMv8_D16,
FeaturePACBTI,
FeatureUseMISched,
HasMVEFloatOps]>;
+def : ProcessorModel<"cortex-m52", CortexM55Model, [ARMv81mMainline,
+ FeatureDSP,
+ FeatureFPARMv8_D16,
+ FeatureHasNoBranchPredictor,
+ FeaturePACBTI,
+ FeatureUseMISched,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
+ FeatureMVEVectorCostFactor1,
+ HasMVEFloatOps]>;
+
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
FeatureHWDivARM,
@@ -1650,7 +1662,7 @@ def : ProcNoItin<"neoverse-n1", [ARMv82a,
FeatureCRC,
FeatureDotProd]>;
-def : ProcNoItin<"neoverse-n2", [ARMv85a,
+def : ProcNoItin<"neoverse-n2", [ARMv9a,
FeatureBF16,
FeatureMatMulInt8]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 69df1d12aa8e..15cda9b9432d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -136,13 +136,13 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
else if (F.hasOptSize())
// For small size, but speed and debugging illusion preserved
OptimizationGoal = 3;
- else if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+ else if (TM.getOptLevel() == CodeGenOptLevel::Aggressive)
// Aggressively for speed, small size and debug illusion sacrificed
OptimizationGoal = 2;
- else if (TM.getOptLevel() > CodeGenOpt::None)
+ else if (TM.getOptLevel() > CodeGenOptLevel::None)
// For speed, but small size and good debug illusion preserved
OptimizationGoal = 1;
- else // TM.getOptLevel() == CodeGenOpt::None
+ else // TM.getOptLevel() == CodeGenOptLevel::None
// For good debugging, but speed and small size preserved
OptimizationGoal = 5;
@@ -361,25 +361,26 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
if (!FlagsOP.isImm())
return true;
- unsigned Flags = FlagsOP.getImm();
+ InlineAsm::Flag F(FlagsOP.getImm());
// This operand may not be the one that actually provides the register. If
// it's tied to a previous one then we should refer instead to that one
// for registers and their classes.
unsigned TiedIdx;
- if (InlineAsm::isUseOperandTiedToDef(Flags, TiedIdx)) {
+ if (F.isUseOperandTiedToDef(TiedIdx)) {
for (OpNum = InlineAsm::MIOp_FirstOperand; TiedIdx; --TiedIdx) {
unsigned OpFlags = MI->getOperand(OpNum).getImm();
- OpNum += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ const InlineAsm::Flag F(OpFlags);
+ OpNum += F.getNumOperandRegisters() + 1;
}
- Flags = MI->getOperand(OpNum).getImm();
+ F = InlineAsm::Flag(MI->getOperand(OpNum).getImm());
// Later code expects OpNum to be pointing at the register rather than
// the flags.
OpNum += 1;
}
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const unsigned NumVals = F.getNumOperandRegisters();
unsigned RC;
bool FirstHalf;
const ARMBaseTargetMachine &ATM =
@@ -394,7 +395,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// ExtraCode[0] == 'R'.
FirstHalf = !ATM.isLittleEndian();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- if (InlineAsm::hasRegClassConstraint(Flags, RC) &&
+ if (F.hasRegClassConstraint(RC) &&
ARM::GPRPairRegClass.hasSubClassEq(TRI->getRegClass(RC))) {
if (NumVals != 1)
return true;
@@ -1117,6 +1118,50 @@ void ARMAsmPrinter::emitJumpTableTBInst(const MachineInstr *MI,
emitAlignment(Align(2));
}
+std::tuple<const MCSymbol *, uint64_t, const MCSymbol *,
+ codeview::JumpTableEntrySize>
+ARMAsmPrinter::getCodeViewJumpTableInfo(int JTI,
+ const MachineInstr *BranchInstr,
+ const MCSymbol *BranchLabel) const {
+ codeview::JumpTableEntrySize EntrySize;
+ const MCSymbol *BaseLabel;
+ uint64_t BaseOffset = 0;
+ switch (BranchInstr->getOpcode()) {
+ case ARM::BR_JTadd:
+ case ARM::BR_JTr:
+ case ARM::tBR_JTr:
+ // Word relative to the jump table address.
+ EntrySize = codeview::JumpTableEntrySize::UInt32;
+ BaseLabel = GetARMJTIPICJumpTableLabel(JTI);
+ break;
+ case ARM::tTBH_JT:
+ case ARM::t2TBH_JT:
+ // half-word shifted left, relative to *after* the branch instruction.
+ EntrySize = codeview::JumpTableEntrySize::UInt16ShiftLeft;
+ BranchLabel = GetCPISymbol(BranchInstr->getOperand(3).getImm());
+ BaseLabel = BranchLabel;
+ BaseOffset = 4;
+ break;
+ case ARM::tTBB_JT:
+ case ARM::t2TBB_JT:
+ // byte shifted left, relative to *after* the branch instruction.
+ EntrySize = codeview::JumpTableEntrySize::UInt8ShiftLeft;
+ BranchLabel = GetCPISymbol(BranchInstr->getOperand(3).getImm());
+ BaseLabel = BranchLabel;
+ BaseOffset = 4;
+ break;
+ case ARM::t2BR_JT:
+ // Direct jump.
+ BaseLabel = nullptr;
+ EntrySize = codeview::JumpTableEntrySize::Pointer;
+ break;
+ default:
+ llvm_unreachable("Unknown jump table instruction");
+ }
+
+ return std::make_tuple(BaseLabel, BaseOffset, BranchLabel, EntrySize);
+}
+
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(MI->getFlag(MachineInstr::FrameSetup) &&
"Only instruction which are involved into frame setup code are allowed");
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h
index bd2d9c762119..33b4417aa9b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -89,6 +89,10 @@ public:
void emitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth);
void emitInstruction(const MachineInstr *MI) override;
bool runOnMachineFunction(MachineFunction &F) override;
+ std::tuple<const MCSymbol *, uint64_t, const MCSymbol *,
+ codeview::JumpTableEntrySize>
+ getCodeViewJumpTableInfo(int JTI, const MachineInstr *BranchInstr,
+ const MCSymbol *BranchLabel) const override;
void emitConstantPool() override {
// we emit constant pools customly!
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index f903d583d7c6..a0776296b8eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1953,46 +1953,32 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
return false;
- switch (Load1->getMachineOpcode()) {
- default:
- return false;
- case ARM::LDRi12:
- case ARM::LDRBi12:
- case ARM::LDRD:
- case ARM::LDRH:
- case ARM::LDRSB:
- case ARM::LDRSH:
- case ARM::VLDRD:
- case ARM::VLDRS:
- case ARM::t2LDRi8:
- case ARM::t2LDRBi8:
- case ARM::t2LDRDi8:
- case ARM::t2LDRSHi8:
- case ARM::t2LDRi12:
- case ARM::t2LDRBi12:
- case ARM::t2LDRSHi12:
- break;
- }
+ auto IsLoadOpcode = [&](unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ return true;
+ }
+ };
- switch (Load2->getMachineOpcode()) {
- default:
+ if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
+ !IsLoadOpcode(Load2->getMachineOpcode()))
return false;
- case ARM::LDRi12:
- case ARM::LDRBi12:
- case ARM::LDRD:
- case ARM::LDRH:
- case ARM::LDRSB:
- case ARM::LDRSH:
- case ARM::VLDRD:
- case ARM::VLDRS:
- case ARM::t2LDRi8:
- case ARM::t2LDRBi8:
- case ARM::t2LDRSHi8:
- case ARM::t2LDRi12:
- case ARM::t2LDRBi12:
- case ARM::t2LDRSHi12:
- break;
- }
// Check if base addresses and chain operands match.
if (Load1->getOperand(0) != Load2->getOperand(0) ||
@@ -3886,17 +3872,16 @@ unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
llvm_unreachable("Didn't find the number of microops");
}
-int
+std::optional<unsigned>
ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
- const MCInstrDesc &DefMCID,
- unsigned DefClass,
+ const MCInstrDesc &DefMCID, unsigned DefClass,
unsigned DefIdx, unsigned DefAlign) const {
int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
if (RegNo <= 0)
// Def is the address writeback.
return ItinData->getOperandCycle(DefClass, DefIdx);
- int DefCycle;
+ unsigned DefCycle;
if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
// (regno / 2) + (regno % 2) + 1
DefCycle = RegNo / 2 + 1;
@@ -3927,17 +3912,16 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
return DefCycle;
}
-int
+std::optional<unsigned>
ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
- const MCInstrDesc &DefMCID,
- unsigned DefClass,
+ const MCInstrDesc &DefMCID, unsigned DefClass,
unsigned DefIdx, unsigned DefAlign) const {
int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
if (RegNo <= 0)
// Def is the address writeback.
return ItinData->getOperandCycle(DefClass, DefIdx);
- int DefCycle;
+ unsigned DefCycle;
if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
// 4 registers would be issued: 1, 2, 1.
// 5 registers would be issued: 1, 2, 2.
@@ -3962,16 +3946,15 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
return DefCycle;
}
-int
+std::optional<unsigned>
ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
- const MCInstrDesc &UseMCID,
- unsigned UseClass,
+ const MCInstrDesc &UseMCID, unsigned UseClass,
unsigned UseIdx, unsigned UseAlign) const {
int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
if (RegNo <= 0)
return ItinData->getOperandCycle(UseClass, UseIdx);
- int UseCycle;
+ unsigned UseCycle;
if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
// (regno / 2) + (regno % 2) + 1
UseCycle = RegNo / 2 + 1;
@@ -4002,16 +3985,15 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
return UseCycle;
}
-int
+std::optional<unsigned>
ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
- const MCInstrDesc &UseMCID,
- unsigned UseClass,
+ const MCInstrDesc &UseMCID, unsigned UseClass,
unsigned UseIdx, unsigned UseAlign) const {
int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
if (RegNo <= 0)
return ItinData->getOperandCycle(UseClass, UseIdx);
- int UseCycle;
+ unsigned UseCycle;
if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
UseCycle = RegNo / 2;
if (UseCycle < 2)
@@ -4031,12 +4013,10 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
return UseCycle;
}
-int
-ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MCInstrDesc &DefMCID,
- unsigned DefIdx, unsigned DefAlign,
- const MCInstrDesc &UseMCID,
- unsigned UseIdx, unsigned UseAlign) const {
+std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
+ const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
+ unsigned UseIdx, unsigned UseAlign) const {
unsigned DefClass = DefMCID.getSchedClass();
unsigned UseClass = UseMCID.getSchedClass();
@@ -4046,7 +4026,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// This may be a def / use of a variable_ops instruction, the operand
// latency might be determinable dynamically. Let the target try to
// figure it out.
- int DefCycle = -1;
+ std::optional<unsigned> DefCycle;
bool LdmBypass = false;
switch (DefMCID.getOpcode()) {
default:
@@ -4084,11 +4064,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
break;
}
- if (DefCycle == -1)
+ if (!DefCycle)
// We can't seem to determine the result latency of the def, assume it's 2.
DefCycle = 2;
- int UseCycle = -1;
+ std::optional<unsigned> UseCycle;
switch (UseMCID.getOpcode()) {
default:
UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
@@ -4122,21 +4102,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
break;
}
- if (UseCycle == -1)
+ if (!UseCycle)
// Assume it's read in the first stage.
UseCycle = 1;
- UseCycle = DefCycle - UseCycle + 1;
- if (UseCycle > 0) {
+ if (UseCycle > *DefCycle + 1)
+ return std::nullopt;
+
+ UseCycle = *DefCycle - *UseCycle + 1;
+ if (UseCycle > 0u) {
if (LdmBypass) {
// It's a variable_ops instruction so we can't use DefIdx here. Just use
// first def operand.
if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
UseClass, UseIdx))
- --UseCycle;
+ UseCycle = *UseCycle - 1;
} else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
UseClass, UseIdx)) {
- --UseCycle;
+ UseCycle = *UseCycle - 1;
}
}
@@ -4376,14 +4359,12 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
return Adjust;
}
-int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI,
- unsigned DefIdx,
- const MachineInstr &UseMI,
- unsigned UseIdx) const {
+std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI,
+ unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
// No operand latency. The caller may fall back to getInstrLatency.
if (!ItinData || ItinData->isEmpty())
- return -1;
+ return std::nullopt;
const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
Register Reg = DefMO.getReg();
@@ -4404,7 +4385,7 @@ int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
ResolvedUseMI =
getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
if (!ResolvedUseMI)
- return -1;
+ return std::nullopt;
}
return getOperandLatencyImpl(
@@ -4412,7 +4393,7 @@ int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
}
-int ARMBaseInstrInfo::getOperandLatencyImpl(
+std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
const InstrItineraryData *ItinData, const MachineInstr &DefMI,
unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
@@ -4444,7 +4425,7 @@ int ARMBaseInstrInfo::getOperandLatencyImpl(
}
if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
- return -1;
+ return std::nullopt;
unsigned DefAlign = DefMI.hasOneMemOperand()
? (*DefMI.memoperands_begin())->getAlign().value()
@@ -4454,25 +4435,25 @@ int ARMBaseInstrInfo::getOperandLatencyImpl(
: 0;
// Get the itinerary's latency if possible, and handle variable_ops.
- int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
- UseIdx, UseAlign);
+ std::optional<unsigned> Latency = getOperandLatency(
+ ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
// Unable to find operand latency. The caller may resort to getInstrLatency.
- if (Latency < 0)
- return Latency;
+ if (!Latency)
+ return std::nullopt;
// Adjust for IT block position.
int Adj = DefAdj + UseAdj;
// Adjust for dynamic def-side opcode variants not captured by the itinerary.
Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
- if (Adj >= 0 || (int)Latency > -Adj) {
- return Latency + Adj;
+ if (Adj >= 0 || (int)*Latency > -Adj) {
+ return *Latency + Adj;
}
// Return the itinerary latency, which may be zero but not less than zero.
return Latency;
}
-int
+std::optional<unsigned>
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const {
@@ -4488,10 +4469,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return DefMCID.mayLoad() ? 3 : 1;
if (!UseNode->isMachineOpcode()) {
- int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
+ std::optional<unsigned> Latency =
+ ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
int Threshold = 1 + Adj;
- return Latency <= Threshold ? 1 : Latency - Adj;
+ return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
}
const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
@@ -4503,10 +4485,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned UseAlign = !UseMN->memoperands_empty()
? (*UseMN->memoperands_begin())->getAlign().value()
: 0;
- int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
- UseMCID, UseIdx, UseAlign);
+ std::optional<unsigned> Latency = getOperandLatency(
+ ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
+ if (!Latency)
+ return std::nullopt;
- if (Latency > 1 &&
+ if (Latency > 1U &&
(Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
Subtarget.isCortexA7())) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
@@ -4520,7 +4504,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
- --Latency;
+ Latency = *Latency - 1;
break;
}
case ARM::t2LDRs:
@@ -4531,11 +4515,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned ShAmt =
cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
if (ShAmt == 0 || ShAmt == 2)
- --Latency;
+ Latency = *Latency - 1;
break;
}
}
- } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
+ } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
// FIXME: Properly handle all of the latency adjustments for address
// writeback.
switch (DefMCID.getOpcode()) {
@@ -4548,9 +4532,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
- Latency -= 2;
+ Latency = *Latency - 2;
else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
- --Latency;
+ Latency = *Latency - 1;
break;
}
case ARM::t2LDRs:
@@ -4558,7 +4542,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::t2LDRHs:
case ARM::t2LDRSHs:
// Thumb2 mode: lsl 0-3 only.
- Latency -= 2;
+ Latency = *Latency - 2;
break;
}
}
@@ -4724,7 +4708,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4LNq32Pseudo_UPD:
// If the address is not 64-bit aligned, the latencies of these
// instructions increases by one.
- ++Latency;
+ Latency = *Latency + 1;
break;
}
@@ -4801,8 +4785,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return Latency;
}
-int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- SDNode *Node) const {
+unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const {
if (!Node->isMachineOpcode())
return 1;
@@ -4850,8 +4834,9 @@ bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
if (DDomain == ARMII::DomainGeneral) {
unsigned DefClass = DefMI.getDesc().getSchedClass();
- int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
- return (DefCycle != -1 && DefCycle <= 2);
+ std::optional<unsigned> DefCycle =
+ ItinData->getOperandCycle(DefClass, DefIdx);
+ return DefCycle && DefCycle <= 2U;
}
return false;
}
@@ -4978,12 +4963,27 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
TargetFlags |= ARMII::MO_DLLIMPORT;
else if (IsIndirect)
TargetFlags |= ARMII::MO_COFFSTUB;
- } else if (Subtarget.isGVInGOT(GV)) {
+ } else if (IsIndirect) {
TargetFlags |= ARMII::MO_GOT;
}
- BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
- .addGlobalAddress(GV, 0, TargetFlags);
+ if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
+ Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
+ auto APSREncoding =
+ ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
+ BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
+ .addImm(APSREncoding)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+ .addGlobalAddress(GV, 0, TargetFlags);
+ BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
+ .addImm(APSREncoding)
+ .addReg(CPSRSaveReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
+ } else {
+ BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+ .addGlobalAddress(GV, 0, TargetFlags);
+ }
if (IsIndirect) {
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
@@ -6435,20 +6435,20 @@ void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator It, bool CFI,
bool Auth) const {
int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
+ unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
assert(Align >= 8 && Align <= 256);
if (Auth) {
assert(Subtarget.isThumb2());
// Compute PAC in R12. Outlining ensures R12 is dead across the outlined
// sequence.
- BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC))
- .setMIFlags(MachineInstr::FrameSetup);
+ BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
.addReg(ARM::R12, RegState::Kill)
.addReg(ARM::LR, RegState::Kill)
.addReg(ARM::SP)
.addImm(-Align)
.add(predOps(ARMCC::AL))
- .setMIFlags(MachineInstr::FrameSetup);
+ .setMIFlags(MIFlags);
} else {
unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
@@ -6456,7 +6456,7 @@ void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
.addReg(ARM::SP)
.addImm(-Align)
.add(predOps(ARMCC::AL))
- .setMIFlags(MachineInstr::FrameSetup);
+ .setMIFlags(MIFlags);
}
if (!CFI)
@@ -6511,6 +6511,7 @@ void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator It,
bool CFI, bool Auth) const {
int Align = Subtarget.getStackAlignment().value();
+ unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
if (Auth) {
assert(Subtarget.isThumb2());
// Restore return address PAC and LR.
@@ -6521,7 +6522,7 @@ void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
.addReg(ARM::SP)
.addImm(Align)
.add(predOps(ARMCC::AL))
- .setMIFlags(MachineInstr::FrameDestroy);
+ .setMIFlags(MIFlags);
// LR authentication is after the CFI instructions, below.
} else {
unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
@@ -6532,7 +6533,7 @@ void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
MIB.addReg(0);
MIB.addImm(Subtarget.getStackAlignment().value())
.add(predOps(ARMCC::AL))
- .setMIFlags(MachineInstr::FrameDestroy);
+ .setMIFlags(MIFlags);
}
if (CFI) {
@@ -6731,7 +6732,8 @@ bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
// the tail predication conversion. This means that the element count
// register has to be live for longer, but that has to be better than
// spill/restore and VPT predication.
- return isVCTP(&MI) && !isPredicated(MI);
+ return (isVCTP(&MI) && !isPredicated(MI)) ||
+ TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
unsigned llvm::getBLXOpcode(const MachineFunction &MF) {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 5efcc1a0d9fc..6aebf3b64e8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -316,13 +316,15 @@ public:
unsigned getNumMicroOps(const InstrItineraryData *ItinData,
const MachineInstr &MI) const override;
- int getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI, unsigned DefIdx,
- const MachineInstr &UseMI,
- unsigned UseIdx) const override;
- int getOperandLatency(const InstrItineraryData *ItinData,
- SDNode *DefNode, unsigned DefIdx,
- SDNode *UseNode, unsigned UseIdx) const override;
+ std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &DefMI,
+ unsigned DefIdx,
+ const MachineInstr &UseMI,
+ unsigned UseIdx) const override;
+ std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode,
+ unsigned UseIdx) const override;
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
@@ -421,34 +423,34 @@ private:
unsigned getInstBundleLength(const MachineInstr &MI) const;
- int getVLDMDefCycle(const InstrItineraryData *ItinData,
- const MCInstrDesc &DefMCID,
- unsigned DefClass,
- unsigned DefIdx, unsigned DefAlign) const;
- int getLDMDefCycle(const InstrItineraryData *ItinData,
- const MCInstrDesc &DefMCID,
- unsigned DefClass,
- unsigned DefIdx, unsigned DefAlign) const;
- int getVSTMUseCycle(const InstrItineraryData *ItinData,
- const MCInstrDesc &UseMCID,
- unsigned UseClass,
- unsigned UseIdx, unsigned UseAlign) const;
- int getSTMUseCycle(const InstrItineraryData *ItinData,
- const MCInstrDesc &UseMCID,
- unsigned UseClass,
- unsigned UseIdx, unsigned UseAlign) const;
- int getOperandLatency(const InstrItineraryData *ItinData,
- const MCInstrDesc &DefMCID,
- unsigned DefIdx, unsigned DefAlign,
- const MCInstrDesc &UseMCID,
- unsigned UseIdx, unsigned UseAlign) const;
-
- int getOperandLatencyImpl(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI, unsigned DefIdx,
- const MCInstrDesc &DefMCID, unsigned DefAdj,
- const MachineOperand &DefMO, unsigned Reg,
- const MachineInstr &UseMI, unsigned UseIdx,
- const MCInstrDesc &UseMCID, unsigned UseAdj) const;
+ std::optional<unsigned> getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass, unsigned DefIdx,
+ unsigned DefAlign) const;
+ std::optional<unsigned> getLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass, unsigned DefIdx,
+ unsigned DefAlign) const;
+ std::optional<unsigned> getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass, unsigned UseIdx,
+ unsigned UseAlign) const;
+ std::optional<unsigned> getSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass, unsigned UseIdx,
+ unsigned UseAlign) const;
+ std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign,
+ const MCInstrDesc &UseMCID,
+ unsigned UseIdx,
+ unsigned UseAlign) const;
+
+ std::optional<unsigned> getOperandLatencyImpl(
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI,
+ unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
+ const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
+ unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const;
unsigned getPredicationCost(const MachineInstr &MI) const override;
@@ -456,8 +458,8 @@ private:
const MachineInstr &MI,
unsigned *PredCost = nullptr) const override;
- int getInstrLatency(const InstrItineraryData *ItinData,
- SDNode *Node) const override;
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const override;
bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
const MachineRegisterInfo *MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp
index 6d389cc82730..208d79bf6d70 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp
@@ -16,7 +16,6 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/Support/Debug.h"
-#include <vector>
#define DEBUG_TYPE "arm-bb-utils"
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 0383145afdb0..f9d9930cec6d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -110,7 +110,7 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -123,7 +123,8 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
Register ExtReg = extendRegister(ValVReg, VA);
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOStore, MemTy, Align(1));
@@ -135,14 +136,14 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
- CCValAssign VA = VAs[0];
+ const CCValAssign &VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling");
// Custom lowering for other types, such as f16, is currently not supported
if (VA.getValVT() != MVT::f64)
return 0;
- CCValAssign NextVA = VAs[1];
+ const CCValAssign &NextVA = VAs[1];
assert(NextVA.needsCustom() && "Value doesn't need custom handling");
assert(NextVA.getValVT() == MVT::f64 && "Unsupported type");
@@ -255,7 +256,8 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
if (VA.getLocInfo() == CCValAssign::SExt ||
VA.getLocInfo() == CCValAssign::ZExt) {
// If the value is zero- or sign-extended, its size becomes 4 bytes, so
@@ -272,7 +274,7 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}
MachineInstrBuilder buildLoad(const DstOp &Res, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO) {
+ const MachinePointerInfo &MPO) {
MachineFunction &MF = MIRBuilder.getMF();
auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy,
@@ -281,7 +283,7 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -310,14 +312,14 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
- CCValAssign VA = VAs[0];
+ const CCValAssign &VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling");
// Custom lowering for other types, such as f16, is currently not supported
if (VA.getValVT() != MVT::f64)
return 0;
- CCValAssign NextVA = VAs[1];
+ const CCValAssign &NextVA = VAs[1];
assert(NextVA.needsCustom() && "Value doesn't need custom handling");
assert(NextVA.getValVT() == MVT::f64 && "Unsupported type");
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index a6682f0ca162..7a3ba5870bc6 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -343,9 +343,9 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
// Align blocks where the previous block does not fall through. This may add
// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
-// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
+// measure of how much to align, and only runs at CodeGenOptLevel::Aggressive.
static bool AlignBlocks(MachineFunction *MF, const ARMSubtarget *STI) {
- if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
+ if (MF->getTarget().getOptLevel() != CodeGenOptLevel::Aggressive ||
MF->getFunction().hasOptSize())
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
index 0b35f134ec7b..1d6aaeb7433b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -2180,7 +2180,7 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
// Manually compute the global's type to avoid building it when unnecessary.
- Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
+ Type *GVTy = PointerType::get(*Context, /*AS=*/0);
EVT LCREVT = TLI.getValueType(DL, GVTy);
if (!LCREVT.isSimple()) return 0;
@@ -2964,7 +2964,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {
/*AddCurrentAddress=*/UseGOT_PREL);
Align ConstAlign =
- MF->getDataLayout().getPrefTypeAlign(Type::getInt32PtrTy(*Context));
+ MF->getDataLayout().getPrefTypeAlign(PointerType::get(*Context, 0));
unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
MachineMemOperand *CPMMO =
MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 4496d4928ebe..a3a71a8ec09a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2238,18 +2238,20 @@ static bool requiresAAPCSFrameRecord(const MachineFunction &MF) {
(Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
}
-// Thumb1 may require a spill when storing to a frame index through FP, for
-// cases where FP is a high register (R11). This scans the function for cases
-// where this may happen.
+// Thumb1 may require a spill when storing to a frame index through FP (or any
+// access with execute-only), for cases where FP is a high register (R11). This
+// scans the function for cases where this may happen.
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
const TargetFrameLowering &TFI) {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (!AFI->isThumb1OnlyFunction())
return false;
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
for (const auto &MBB : MF)
for (const auto &MI : MBB)
- if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi)
+ if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
+ STI.genExecuteOnly())
for (const auto &Op : MI.operands())
if (Op.isFI()) {
Register Reg;
@@ -2333,6 +2335,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
CanEliminateFrame = false;
+ // When return address signing is enabled R12 is treated as callee-saved.
+ if (AFI->shouldSignReturnAddress())
+ CanEliminateFrame = false;
+
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is modified.
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
@@ -2532,18 +2538,19 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
CS1Spilled = true;
}
- // This is true when we inserted a spill for a callee-save GPR which is
- // not otherwise used by the function. This guaranteees it is possible
- // to scavenge a register to hold the address of a stack slot. On Thumb1,
- // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
- // subtargets, this is any GPR, i.e. r4-r11 or lr.
+ // This is the number of extra spills inserted for callee-save GPRs which
+ // would not otherwise be used by the function. When greater than zero it
+ // guaranteees that it is possible to scavenge a register to hold the
+ // address of a stack slot. On Thumb1, the register must be a valid operand
+ // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
+ // or lr.
//
// If we don't insert a spill, we instead allocate an emergency spill
// slot, which can be used by scavenging to spill an arbitrary register.
//
// We currently don't try to figure out whether any specific instruction
// requires scavening an additional register.
- bool ExtraCSSpill = false;
+ unsigned NumExtraCSSpill = 0;
if (AFI->isThumb1OnlyFunction()) {
// For Thumb1-only targets, we need some low registers when we save and
@@ -2652,7 +2659,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
CS1Spilled = true;
assert(!MRI.isReserved(Reg) && "Should not be reserved");
if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
- ExtraCSSpill = true;
+ NumExtraCSSpill++;
UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
if (Reg == ARM::LR)
LRSpilled = true;
@@ -2678,7 +2685,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
ForceLRSpill = false;
if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
!AFI->isThumb1OnlyFunction())
- ExtraCSSpill = true;
+ NumExtraCSSpill++;
}
// If stack and double are 8-byte aligned and we are spilling an odd number
@@ -2701,7 +2708,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
<< " to make up alignment\n");
if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
!(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
- ExtraCSSpill = true;
+ NumExtraCSSpill++;
break;
}
}
@@ -2711,18 +2718,26 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
<< " to make up alignment\n");
if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
- ExtraCSSpill = true;
+ NumExtraCSSpill++;
}
}
- // Estimate if we might need to scavenge a register at some point in order
+ // Estimate if we might need to scavenge registers at some point in order
// to materialize a stack offset. If so, either spill one additional
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
// adjustments and for frame index accesses when FP is high register,
// even when the frame itself is small.
- if (!ExtraCSSpill &&
- (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) {
+ unsigned RegsNeeded = 0;
+ if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
+ RegsNeeded++;
+ // With thumb1 execute-only we may need an additional register for saving
+ // and restoring the CPSR.
+ if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
+ RegsNeeded++;
+ }
+
+ if (RegsNeeded > NumExtraCSSpill) {
// If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it!
unsigned NumExtras = TargetAlign.value() / 4;
@@ -2749,10 +2764,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
for (unsigned Reg : Extras) {
SavedRegs.set(Reg);
if (!MRI.isPhysRegUsed(Reg))
- ExtraCSSpill = true;
+ NumExtraCSSpill++;
}
}
- if (!ExtraCSSpill && RS) {
+ while ((RegsNeeded > NumExtraCSSpill) && RS) {
// Reserve a slot closest to SP or frame pointer.
LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
const TargetRegisterClass &RC = ARM::GPRRegClass;
@@ -2760,6 +2775,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
Align Alignment = TRI->getSpillAlign(RC);
RS->addScavengingFrameIndex(
MFI.CreateStackObject(Size, Alignment, false));
+ --RegsNeeded;
}
}
}
@@ -2966,6 +2982,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// We save R4 and R5 before use and restore them before leaving the function.
unsigned ScratchReg0 = ARM::R4;
unsigned ScratchReg1 = ARM::R5;
+ unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
uint64_t AlignedStackSize;
MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
@@ -3083,8 +3100,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
.addImm(AlignedStackSize)
.add(predOps(ARMCC::AL));
} else {
- if (Thumb2) {
- BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
+ if (Thumb2 || ST->genExecuteOnly()) {
+ BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
.addImm(AlignedStackSize);
} else {
auto MBBI = McrMBB->end();
@@ -3119,16 +3136,21 @@ void ARMFrameLowering::adjustForSegmentedStacks(
}
if (Thumb && ST->isThumb1Only()) {
- unsigned PCLabelId = ARMFI->createPICLabelUId();
- ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
- MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
- MachineConstantPool *MCP = MF.getConstantPool();
- unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
-
- // ldr SR0, [pc, offset(STACK_LIMIT)]
- BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
- .addConstantPoolIndex(CPI)
- .add(predOps(ARMCC::AL));
+ if (ST->genExecuteOnly()) {
+ BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
+ .addExternalSymbol("__STACK_LIMIT");
+ } else {
+ unsigned PCLabelId = ARMFI->createPICLabelUId();
+ ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
+ MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
+ MachineConstantPool *MCP = MF.getConstantPool();
+ unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
+
+ // ldr SR0, [pc, offset(STACK_LIMIT)]
+ BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
+ .addConstantPoolIndex(CPI)
+ .add(predOps(ARMCC::AL));
+ }
// ldr SR0, [SR0]
BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
@@ -3188,8 +3210,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
.addImm(AlignedStackSize)
.add(predOps(ARMCC::AL));
} else {
- if (Thumb2) {
- BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
+ if (Thumb2 || ST->genExecuteOnly()) {
+ BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
.addImm(AlignedStackSize);
} else {
auto MBBI = AllocMBB->end();
@@ -3221,8 +3243,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
.addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
.add(predOps(ARMCC::AL));
} else {
- if (Thumb2) {
- BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1)
+ if (Thumb2 || ST->genExecuteOnly()) {
+ BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
.addImm(alignToARMConstant(ARMFI->getArgumentStackSize()));
} else {
auto MBBI = AllocMBB->end();
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h
index 66a1477e5e08..b9ac3555c2bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Support/DataTypes.h"
-#include <array>
#include <initializer_list>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a0607cb5662e..984d8d3e0b08 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -63,7 +63,7 @@ public:
ARMDAGToDAGISel() = delete;
- explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
+ explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, tm, OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -331,7 +331,8 @@ private:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
// Form pairs of consecutive R, S, D, or Q registers.
@@ -498,7 +499,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
/// least on current ARM implementations) which should be avoidded.
bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return true;
if (!Subtarget->hasVMLxHazards())
@@ -1130,8 +1131,7 @@ static bool shouldUseZeroOffsetLdSt(SDValue N) {
bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
SDValue &Offset) {
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
- ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
- if (!NC || !NC->isZero())
+ if (!isNullConstant(N))
return false;
Base = Offset = N;
@@ -3559,8 +3559,7 @@ void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
return;
SDValue Zero = N->getOperand(1);
- if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isZero() ||
- And->getOpcode() != ISD::AND)
+ if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
return;
SDValue X = And.getOperand(0);
auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
@@ -5709,7 +5708,7 @@ bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
std::vector<SDValue> AsmNodeOperands;
- unsigned Flag, Kind;
+ InlineAsm::Flag Flag;
bool Changed = false;
unsigned NumOps = N->getNumOperands();
@@ -5733,24 +5732,22 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
if (i < InlineAsm::Op_FirstOperand)
continue;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
- Flag = C->getZExtValue();
- Kind = InlineAsm::getKind(Flag);
- }
+ if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
+ Flag = InlineAsm::Flag(C->getZExtValue());
else
continue;
// Immediate operands to inline asm in the SelectionDAG are modeled with
- // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+ // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
// the second is a constant with the value of the immediate. If we get here
- // and we have a Kind_Imm, skip the next operand, and continue.
- if (Kind == InlineAsm::Kind_Imm) {
+ // and we have a Kind::Imm, skip the next operand, and continue.
+ if (Flag.isImmKind()) {
SDValue op = N->getOperand(++i);
AsmNodeOperands.push_back(op);
continue;
}
- unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+ const unsigned NumRegs = Flag.getNumOperandRegisters();
if (NumRegs)
OpChanged.push_back(false);
@@ -5758,26 +5755,26 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
bool IsTiedToChangedOp = false;
// If it's a use that is tied with a previous def, it has no
// reg class constraint.
- if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+ if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
IsTiedToChangedOp = OpChanged[DefIdx];
// Memory operands to inline asm in the SelectionDAG are modeled with two
- // operands: a constant of value InlineAsm::Kind_Mem followed by the input
- // operand. If we get here and we have a Kind_Mem, skip the next operand (so
- // it doesn't get misinterpreted), and continue. We do this here because
+ // operands: a constant of value InlineAsm::Kind::Mem followed by the input
+ // operand. If we get here and we have a Kind::Mem, skip the next operand
+ // (so it doesn't get misinterpreted), and continue. We do this here because
// it's important to update the OpChanged array correctly before moving on.
- if (Kind == InlineAsm::Kind_Mem) {
+ if (Flag.isMemKind()) {
SDValue op = N->getOperand(++i);
AsmNodeOperands.push_back(op);
continue;
}
- if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
- && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
+ !Flag.isRegDefEarlyClobberKind())
continue;
unsigned RC;
- bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ const bool HasRC = Flag.hasRegClassConstraint(RC);
if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
|| NumRegs != 2)
continue;
@@ -5790,8 +5787,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
SDValue PairedReg;
MachineRegisterInfo &MRI = MF->getRegInfo();
- if (Kind == InlineAsm::Kind_RegDef ||
- Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
// the original GPRs.
@@ -5816,9 +5812,8 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
Ops.push_back(T1.getValue(1));
CurDAG->UpdateNodeOperands(GU, Ops);
- }
- else {
- // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ } else {
+ // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
// GPRPair and then pass the GPRPair to the inline asm.
SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
@@ -5843,11 +5838,11 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
if(PairedReg.getNode()) {
OpChanged[OpChanged.size() -1 ] = true;
- Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
if (IsTiedToChangedOp)
- Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ Flag.setMatchingOp(DefIdx);
else
- Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
+ Flag.setRegClass(ARM::GPRPairRegClassID);
// Replace the current flag.
AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
Flag, dl, MVT::i32);
@@ -5870,23 +5865,22 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
return true;
}
-
-bool ARMDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
switch(ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_m:
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_Q:
- case InlineAsm::Constraint_Um:
- case InlineAsm::Constraint_Un:
- case InlineAsm::Constraint_Uq:
- case InlineAsm::Constraint_Us:
- case InlineAsm::Constraint_Ut:
- case InlineAsm::Constraint_Uv:
- case InlineAsm::Constraint_Uy:
+ case InlineAsm::ConstraintCode::m:
+ case InlineAsm::ConstraintCode::o:
+ case InlineAsm::ConstraintCode::Q:
+ case InlineAsm::ConstraintCode::Um:
+ case InlineAsm::ConstraintCode::Un:
+ case InlineAsm::ConstraintCode::Uq:
+ case InlineAsm::ConstraintCode::Us:
+ case InlineAsm::ConstraintCode::Ut:
+ case InlineAsm::ConstraintCode::Uv:
+ case InlineAsm::ConstraintCode::Uy:
// Require the address to be in a register. That is safe for all ARM
// variants and it is hard to do anything much smarter without knowing
// how the operand is used.
@@ -5900,6 +5894,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
/// ARM-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new ARMDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 5239e5c4d91b..db63facca870 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -40,6 +40,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -111,7 +112,6 @@
#include <iterator>
#include <limits>
#include <optional>
-#include <string>
#include <tuple>
#include <utility>
#include <vector>
@@ -371,6 +371,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FEXP10, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
}
}
@@ -880,6 +881,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP10, MVT::v2f64, Expand);
// FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
@@ -901,6 +903,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP10, MVT::v4f32, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
@@ -917,6 +920,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
+ setOperationAction(ISD::FEXP10, MVT::v2f32, Expand);
setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
@@ -1058,6 +1062,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::f64, Expand);
setOperationAction(ISD::FEXP, MVT::f64, Expand);
setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP10, MVT::f64, Expand);
setOperationAction(ISD::FCEIL, MVT::f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
setOperationAction(ISD::FRINT, MVT::f64, Expand);
@@ -1330,7 +1335,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
if (!Subtarget->hasAcquireRelease() ||
- getTargetMachine().getOptLevel() == 0) {
+ getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
InsertFencesForAtomic = true;
}
@@ -1344,19 +1349,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
Subtarget->hasAnyDataBarrier() ? Custom : Expand);
- // Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ // Set them all for libcall, which will force libcalls.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall);
// Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
// Unordered/Monotonic case.
if (!InsertFencesForAtomic) {
@@ -1407,6 +1412,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
+ setOperationAction(ISD::GET_FPENV, MVT::i32, Legal);
+ setOperationAction(ISD::SET_FPENV, MVT::i32, Legal);
+ setOperationAction(ISD::RESET_FPENV, MVT::Other, Legal);
}
// We want to custom lower some of our intrinsics.
@@ -1534,6 +1542,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP10, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
@@ -1612,6 +1621,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
+ setPrefFunctionAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
@@ -1971,7 +1981,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
if (MCID.getNumDefs() == 0)
return Sched::RegPressure;
if (!Itins->isEmpty() &&
- Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
+ Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
return Sched::ILP;
return Sched::RegPressure;
@@ -8369,7 +8379,7 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
unsigned EltSize = VT.getScalarSizeInBits();
if (EltSize >= 32 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
- ShuffleVectorInst::isIdentityMask(M) ||
+ ShuffleVectorInst::isIdentityMask(M, M.size()) ||
isVREVMask(M, VT, 64) ||
isVREVMask(M, VT, 32) ||
isVREVMask(M, VT, 16))
@@ -9076,6 +9086,8 @@ static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
EVT Op1VT = V1.getValueType();
EVT Op2VT = V2.getValueType();
assert(Op1VT == Op2VT && "Operand types don't match!");
+ assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
+ "Unexpected i1 concat operations!");
EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
@@ -9087,37 +9099,44 @@ static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
unsigned NumElts = 2 * Op1VT.getVectorNumElements();
+ EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
+ if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
+ // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
+ // ConcatVT.
+ SDValue ConVec =
+ DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
+ return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+ }
+
// Extract the vector elements from Op1 and Op2 one by one and truncate them
// to be the right size for the destination. For example, if Op1 is v4i1
// then the promoted vector is v4i32. The result of concatenation gives a
// v8i1, which when promoted is v8i16. That means each i32 element from Op1
// needs truncating to i16 and inserting in the result.
- EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
- SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
EVT NewVT = NewV.getValueType();
EVT ConcatVT = ConVec.getValueType();
+ unsigned ExtScale = 1;
+ if (NewVT == MVT::v2f64) {
+ NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
+ ExtScale = 2;
+ }
for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
- DAG.getIntPtrConstant(i, dl));
+ DAG.getIntPtrConstant(i * ExtScale, dl));
ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
DAG.getConstant(j, dl, MVT::i32));
}
return ConVec;
};
unsigned j = 0;
+ SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
ConVec = ExtractInto(NewV1, ConVec, j);
ConVec = ExtractInto(NewV2, ConVec, j);
// Now return the result of comparing the subvector with zero, which will
- // generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1 we
- // convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
- if (VT == MVT::v2i1) {
- SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, ConVec);
- SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
- DAG.getConstant(ARMCC::NE, dl, MVT::i32));
- return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
- }
+ // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
};
@@ -9869,7 +9888,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
ArgListEntry Entry;
Entry.Node = SRet;
- Entry.Ty = RetTy->getPointerTo();
+ Entry.Ty = PointerType::getUnqual(RetTy->getContext());
Entry.IsSExt = false;
Entry.IsZExt = false;
Entry.IsSRet = true;
@@ -10383,10 +10402,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
- SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
- SDValue VHi = DAG.getAnyExtOrTrunc(
- DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
- dl, MVT::i32);
+ auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
bool isBigEndian = DAG.getDataLayout().isBigEndian();
if (isBigEndian)
std::swap (VLo, VHi);
@@ -11485,6 +11501,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
+ // Set the call frame size on entry to the new basic blocks.
+ unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
+ loopMBB->setCallFrameSize(CallFrameSize);
+ exitMBB->setCallFrameSize(CallFrameSize);
+
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
@@ -11999,7 +12020,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
TpLoopBody->moveAfter(TpEntry);
TpExit->moveAfter(TpLoopBody);
- // Finally, remove the memcpy Psuedo Instruction
+ // Finally, remove the memcpy Pseudo Instruction
MI.eraseFromParent();
// Return the exit block as it may contain other instructions requiring a
@@ -12081,6 +12102,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
+ // Set the call frame size on entry to the new basic blocks.
+ unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
+ copy0MBB->setCallFrameSize(CallFrameSize);
+ sinkMBB->setCallFrameSize(CallFrameSize);
+
// Check whether CPSR is live past the tMOVCCr_pseudo.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (!MI.killsRegister(ARM::CPSR) &&
@@ -16616,7 +16642,7 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
SDValue NewPtr =
- DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
+ DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
SDValue Extract =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
@@ -16665,7 +16691,7 @@ static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St,
unsigned NewOffset =
i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
SDValue NewPtr =
- DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
+ DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
SDValue Extract = Trunc.getOperand(i);
SDValue Store = DAG.getTruncStore(
@@ -17745,7 +17771,7 @@ static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
SDValue NewPtr =
- DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
+ DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
SDValue NewLoad =
DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
@@ -18582,7 +18608,7 @@ SDValue ARMTargetLowering::PerformMVETruncCombine(
if (!DCI.isAfterLegalizeDAG())
return SDValue();
- SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::Fixed(16), Align(4));
+ SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
int NumIns = N->getNumOperands();
assert((NumIns == 2 || NumIns == 4) &&
@@ -18659,7 +18685,7 @@ static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N,
for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
SDValue NewPtr =
- DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
+ DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
SDValue NewLoad =
DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
@@ -18750,7 +18776,7 @@ SDValue ARMTargetLowering::PerformMVEExtCombine(
// Lower to a stack store and reload:
// VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
- SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::Fixed(16), Align(4));
+ SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
int NumOuts = N->getNumValues();
assert((NumOuts == 2 || NumOuts == 4) &&
@@ -20227,14 +20253,14 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
return false;
InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
- std::string AsmStr = IA->getAsmString();
+ StringRef AsmStr = IA->getAsmString();
SmallVector<StringRef, 4> AsmPieces;
SplitString(AsmStr, AsmPieces, ";\n");
switch (AsmPieces.size()) {
default: return false;
case 1:
- AsmStr = std::string(AsmPieces[0]);
+ AsmStr = AsmPieces[0];
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t,");
@@ -20414,13 +20440,14 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue>&Ops,
+ StringRef Constraint,
+ std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Currently only support length 1 constraints.
- if (Constraint.length() != 1) return;
+ if (Constraint.size() != 1)
+ return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
@@ -21299,7 +21326,7 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 lower this operation to a CAS loop.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
return AtomicExpansionKind::CmpXChg;
return AtomicExpansionKind::LLSC;
}
@@ -21323,8 +21350,8 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
HasAtomicCmpXchg = Subtarget->hasV7Ops();
else
HasAtomicCmpXchg = Subtarget->hasV6Ops();
- if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
- Size <= (Subtarget->isMClass() ? 32U : 64U))
+ if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
+ HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
return AtomicExpansionKind::LLSC;
return AtomicExpansionKind::None;
}
@@ -21345,12 +21372,12 @@ void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT has a global variable holding security cookie.
M.getOrInsertGlobal("__security_cookie",
- Type::getInt8PtrTy(M.getContext()));
+ PointerType::getUnqual(M.getContext()));
// MSVC CRT has a function to validate security cookie.
FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
"__security_check_cookie", Type::getVoidTy(M.getContext()),
- Type::getInt8PtrTy(M.getContext()));
+ PointerType::getUnqual(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
F->addParamAttr(0, Attribute::AttrKind::InReg);
}
@@ -21444,7 +21471,6 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
Function *Ldrex = Intrinsic::getDeclaration(M, Int);
- Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
@@ -21494,7 +21520,6 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
if (!Subtarget->isLittle())
std::swap(Lo, Hi);
- Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
return Builder.CreateCall(Strex, {Lo, Hi, Addr});
}
@@ -21617,21 +21642,14 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// to something legal.
VecTy = FixedVectorType::get(VecTy->getElementType(),
VecTy->getNumElements() / NumLoads);
-
- // We will compute the pointer operand of each load from the original base
- // address using GEPs. Cast the base address to a pointer to the scalar
- // element type.
- BaseAddr = Builder.CreateBitCast(
- BaseAddr,
- VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
}
assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
auto createLoadIntrinsic = [&](Value *BaseAddr) {
if (Subtarget->hasNEON()) {
- Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
- Type *Tys[] = {VecTy, Int8Ptr};
+ Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
+ Type *Tys[] = {VecTy, PtrTy};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
Intrinsic::arm_neon_vld3,
Intrinsic::arm_neon_vld4};
@@ -21639,7 +21657,7 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
SmallVector<Value *, 2> Ops;
- Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+ Ops.push_back(BaseAddr);
Ops.push_back(Builder.getInt32(LI->getAlign().value()));
return Builder.CreateCall(VldnFunc, Ops, "vldN");
@@ -21648,14 +21666,13 @@ bool ARMTargetLowering::lowerInterleavedLoad(
"expected interleave factor of 2 or 4 for MVE");
Intrinsic::ID LoadInts =
Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
- Type *VecEltTy =
- VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace());
- Type *Tys[] = {VecTy, VecEltTy};
+ Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
+ Type *Tys[] = {VecTy, PtrTy};
Function *VldnFunc =
Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
SmallVector<Value *, 2> Ops;
- Ops.push_back(Builder.CreateBitCast(BaseAddr, VecEltTy));
+ Ops.push_back(BaseAddr);
return Builder.CreateCall(VldnFunc, Ops, "vldN");
}
};
@@ -21782,13 +21799,6 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// and sub-vector type to something legal.
LaneLen /= NumStores;
SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
-
- // We will compute the pointer operand of each store from the original base
- // address using GEPs. Cast the base address to a pointer to the scalar
- // element type.
- BaseAddr = Builder.CreateBitCast(
- BaseAddr,
- SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
}
assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
@@ -21801,14 +21811,14 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
Intrinsic::arm_neon_vst3,
Intrinsic::arm_neon_vst4};
- Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
- Type *Tys[] = {Int8Ptr, SubVecTy};
+ Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
+ Type *Tys[] = {PtrTy, SubVecTy};
Function *VstNFunc = Intrinsic::getDeclaration(
SI->getModule(), StoreInts[Factor - 2], Tys);
SmallVector<Value *, 6> Ops;
- Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+ Ops.push_back(BaseAddr);
append_range(Ops, Shuffles);
Ops.push_back(Builder.getInt32(SI->getAlign().value()));
Builder.CreateCall(VstNFunc, Ops);
@@ -21817,14 +21827,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
"expected interleave factor of 2 or 4 for MVE");
Intrinsic::ID StoreInts =
Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
- Type *EltPtrTy = SubVecTy->getElementType()->getPointerTo(
- SI->getPointerAddressSpace());
- Type *Tys[] = {EltPtrTy, SubVecTy};
+ Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
+ Type *Tys[] = {PtrTy, SubVecTy};
Function *VstNFunc =
Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
SmallVector<Value *, 6> Ops;
- Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
+ Ops.push_back(BaseAddr);
append_range(Ops, Shuffles);
for (unsigned F = 0; F < Factor; F++) {
Ops.push_back(Builder.getInt32(F));
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
index 2dd54602ef61..6c2b92de7a1d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -530,33 +530,33 @@ class VectorType;
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
/// true it means one of the asm constraint of the inline asm instruction
/// being processed is 'm'.
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned
+ InlineAsm::ConstraintCode
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "Q")
- return InlineAsm::Constraint_Q;
- else if (ConstraintCode.size() == 2) {
+ return InlineAsm::ConstraintCode::Q;
+ if (ConstraintCode.size() == 2) {
if (ConstraintCode[0] == 'U') {
switch(ConstraintCode[1]) {
default:
break;
case 'm':
- return InlineAsm::Constraint_Um;
+ return InlineAsm::ConstraintCode::Um;
case 'n':
- return InlineAsm::Constraint_Un;
+ return InlineAsm::ConstraintCode::Un;
case 'q':
- return InlineAsm::Constraint_Uq;
+ return InlineAsm::ConstraintCode::Uq;
case 's':
- return InlineAsm::Constraint_Us;
+ return InlineAsm::ConstraintCode::Us;
case 't':
- return InlineAsm::Constraint_Ut;
+ return InlineAsm::ConstraintCode::Ut;
case 'v':
- return InlineAsm::Constraint_Uv;
+ return InlineAsm::ConstraintCode::Uv;
case 'y':
- return InlineAsm::Constraint_Uy;
+ return InlineAsm::ConstraintCode::Uy;
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 00db13f2eb52..ccc883f646a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -104,8 +104,11 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
- if (!Subtarget.useMovt() || Subtarget.isGVInGOT(GV)) {
- if (TM.isPositionIndependent())
+ bool ForceELFGOTPIC = Subtarget.isTargetELF() && !GV->isDSOLocal();
+ if (!Subtarget.useMovt() || ForceELFGOTPIC) {
+ // For ELF non-PIC, use GOT PIC code sequence as well because R_ARM_GOT_ABS
+ // does not have assembler support.
+ if (TM.isPositionIndependent() || ForceELFGOTPIC)
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
index fde386188cd8..812b5730875d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -5357,7 +5357,7 @@ def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
class releasing_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$val, node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getSuccessOrdering();
return isReleaseOrStronger(Ordering);
}]>;
@@ -6184,15 +6184,15 @@ def : ARMPat<(atomic_load_32 ldst_so_reg:$src),
(LDRrs ldst_so_reg:$src)>;
def : ARMPat<(atomic_load_32 addrmode_imm12:$src),
(LDRi12 addrmode_imm12:$src)>;
-def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val),
+def : ARMPat<(atomic_store_8 GPR:$val, ldst_so_reg:$ptr),
(STRBrs GPR:$val, ldst_so_reg:$ptr)>;
-def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val),
+def : ARMPat<(atomic_store_8 GPR:$val, addrmode_imm12:$ptr),
(STRBi12 GPR:$val, addrmode_imm12:$ptr)>;
-def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val),
+def : ARMPat<(atomic_store_16 GPR:$val, addrmode3:$ptr),
(STRH GPR:$val, addrmode3:$ptr)>;
-def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val),
+def : ARMPat<(atomic_store_32 GPR:$val, ldst_so_reg:$ptr),
(STRrs GPR:$val, ldst_so_reg:$ptr)>;
-def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val),
+def : ARMPat<(atomic_store_32 GPR:$val, addrmode_imm12:$ptr),
(STRi12 GPR:$val, addrmode_imm12:$ptr)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
index 32c6843026dd..f31e1e9f9789 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -5711,19 +5711,19 @@ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" i
def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
v2f32, v2f32, fmaxnum, 1>,
- Requires<[HasV8, HasNEON]>;
+ Requires<[HasFPARMv8, HasNEON]>;
def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
v4f32, v4f32, fmaxnum, 1>,
- Requires<[HasV8, HasNEON]>;
+ Requires<[HasFPARMv8, HasNEON]>;
def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f16",
v4f16, v4f16, fmaxnum, 1>,
- Requires<[HasV8, HasNEON, HasFullFP16]>;
+ Requires<[HasFPARMv8, HasNEON, HasFullFP16]>;
def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f16",
v8f16, v8f16, fmaxnum, 1>,
- Requires<[HasV8, HasNEON, HasFullFP16]>;
+ Requires<[HasFPARMv8, HasNEON, HasFullFP16]>;
}
// VMIN : Vector Minimum
@@ -5753,19 +5753,19 @@ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" i
def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
v2f32, v2f32, fminnum, 1>,
- Requires<[HasV8, HasNEON]>;
+ Requires<[HasFPARMv8, HasNEON]>;
def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
v4f32, v4f32, fminnum, 1>,
- Requires<[HasV8, HasNEON]>;
+ Requires<[HasFPARMv8, HasNEON]>;
def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vminnm", "f16",
v4f16, v4f16, fminnum, 1>,
- Requires<[HasV8, HasNEON, HasFullFP16]>;
+ Requires<[HasFPARMv8, HasNEON, HasFullFP16]>;
def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vminnm", "f16",
v8f16, v8f16, fminnum, 1>,
- Requires<[HasV8, HasNEON, HasFullFP16]>;
+ Requires<[HasFPARMv8, HasNEON, HasFullFP16]>;
}
// Vector Pairwise Operations.
@@ -6396,6 +6396,10 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
Requires<[HasFPRegs, HasFastVGETLNi32]> {
let Inst{21} = lane{0};
}
+// VGETLNi32 is also legal as just vmov r0,d0[0] without the .32 suffix
+def : InstAlias<"vmov${p} $R, $V$lane",
+ (VGETLNi32 GPR:$R, DPR:$V, VectorIndex32:$lane, pred:$p), 0>,
+ Requires<VGETLNi32.Predicates>;
let Predicates = [HasNEON] in {
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
@@ -6538,6 +6542,10 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
let isInsertSubreg = 1;
}
}
+// VSETLNi32 is also legal as just vmov d0[0],r0 without the .32 suffix
+def : InstAlias<"vmov${p} $V$lane, $R",
+ (VSETLNi32 DPR:$V, GPR:$R, VectorIndex32:$lane, pred:$p), 0>,
+ Requires<VSETLNi32.Predicates>;
// TODO: for odd lanes we could optimize this a bit by using the VINS
// FullFP16 instruction when it is available
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
index df6c129a1857..be0ca964d3f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1713,17 +1713,17 @@ def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
(tLDRi t_addrmode_is4:$src)>;
def : T1Pat<(atomic_load_32 t_addrmode_rr:$src),
(tLDRr t_addrmode_rr:$src)>;
-def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
+def : T1Pat<(atomic_store_8 tGPR:$val, t_addrmode_is1:$ptr),
(tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
-def : T1Pat<(atomic_store_8 t_addrmode_rr:$ptr, tGPR:$val),
+def : T1Pat<(atomic_store_8 tGPR:$val, t_addrmode_rr:$ptr),
(tSTRBr tGPR:$val, t_addrmode_rr:$ptr)>;
-def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
+def : T1Pat<(atomic_store_16 tGPR:$val, t_addrmode_is2:$ptr),
(tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
-def : T1Pat<(atomic_store_16 t_addrmode_rr:$ptr, tGPR:$val),
+def : T1Pat<(atomic_store_16 tGPR:$val, t_addrmode_rr:$ptr),
(tSTRHr tGPR:$val, t_addrmode_rr:$ptr)>;
-def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
+def : T1Pat<(atomic_store_32 tGPR:$val, t_addrmode_is4:$ptr),
(tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
-def : T1Pat<(atomic_store_32 t_addrmode_rr:$ptr, tGPR:$val),
+def : T1Pat<(atomic_store_32 tGPR:$val, t_addrmode_rr:$ptr),
(tSTRr tGPR:$val, t_addrmode_rr:$ptr)>;
// Large immediate handling.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
index f68f73523ba1..acd46e8093aa 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -4893,23 +4893,23 @@ def : T2Pat<(atomic_load_32 t2addrmode_negimm8:$addr),
(t2LDRi8 t2addrmode_negimm8:$addr)>;
def : T2Pat<(atomic_load_32 t2addrmode_so_reg:$addr),
(t2LDRs t2addrmode_so_reg:$addr)>;
-def : T2Pat<(atomic_store_8 t2addrmode_imm12:$addr, GPR:$val),
+def : T2Pat<(atomic_store_8 GPR:$val, t2addrmode_imm12:$addr),
(t2STRBi12 GPR:$val, t2addrmode_imm12:$addr)>;
-def : T2Pat<(atomic_store_8 t2addrmode_negimm8:$addr, GPR:$val),
+def : T2Pat<(atomic_store_8 GPR:$val, t2addrmode_negimm8:$addr),
(t2STRBi8 GPR:$val, t2addrmode_negimm8:$addr)>;
-def : T2Pat<(atomic_store_8 t2addrmode_so_reg:$addr, GPR:$val),
+def : T2Pat<(atomic_store_8 GPR:$val, t2addrmode_so_reg:$addr),
(t2STRBs GPR:$val, t2addrmode_so_reg:$addr)>;
-def : T2Pat<(atomic_store_16 t2addrmode_imm12:$addr, GPR:$val),
+def : T2Pat<(atomic_store_16 GPR:$val, t2addrmode_imm12:$addr),
(t2STRHi12 GPR:$val, t2addrmode_imm12:$addr)>;
-def : T2Pat<(atomic_store_16 t2addrmode_negimm8:$addr, GPR:$val),
+def : T2Pat<(atomic_store_16 GPR:$val, t2addrmode_negimm8:$addr),
(t2STRHi8 GPR:$val, t2addrmode_negimm8:$addr)>;
-def : T2Pat<(atomic_store_16 t2addrmode_so_reg:$addr, GPR:$val),
+def : T2Pat<(atomic_store_16 GPR:$val, t2addrmode_so_reg:$addr),
(t2STRHs GPR:$val, t2addrmode_so_reg:$addr)>;
-def : T2Pat<(atomic_store_32 t2addrmode_imm12:$addr, GPR:$val),
+def : T2Pat<(atomic_store_32 GPR:$val,t2addrmode_imm12:$addr),
(t2STRi12 GPR:$val, t2addrmode_imm12:$addr)>;
-def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
+def : T2Pat<(atomic_store_32 GPR:$val, t2addrmode_negimm8:$addr),
(t2STRi8 GPR:$val, t2addrmode_negimm8:$addr)>;
-def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
+def : T2Pat<(atomic_store_32 GPR:$val, t2addrmode_so_reg:$addr),
(t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
let AddedComplexity = 8, Predicates = [IsThumb, HasAcquireRelease, HasV7Clrex] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
index 5d940cc29af8..800527bcf756 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -2670,6 +2670,12 @@ def : Pat<(f32 (vfp_f32f16imm:$imm)),
let Predicates = [HasFullFP16];
}
+// Floating-point environment management.
+def : Pat<(get_fpenv), (VMRS)>;
+def : Pat<(set_fpenv GPRnopc:$Rt), (VMSR GPRnopc:$Rt)>;
+def : Pat<(reset_fpenv), (VMSR (MOVi 0))>, Requires<[IsARM]>;
+def : Pat<(reset_fpenv), (VMSR (tMOVi8 0))>, Requires<[IsThumb]>;
+
//===----------------------------------------------------------------------===//
// Assembler aliases.
//
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 93db983b92c0..a679699a66c7 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -2829,9 +2829,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
return Var == DbgVar;
};
- InstrVec.erase(
- std::remove_if(InstrVec.begin(), InstrVec.end(), IsDbgVar),
- InstrVec.end());
+ llvm::erase_if(InstrVec, IsDbgVar);
}
forEachDbgRegOperand(Instr,
[&](MachineOperand &Op) { Op.setReg(0); });
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 247730c7b9ae..5c1c7046fdbf 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -60,7 +60,6 @@
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index aa9d8b54d963..a364992fab3e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -13,6 +13,18 @@ using namespace llvm;
void ARMFunctionInfo::anchor() {}
+yaml::ARMFunctionInfo::ARMFunctionInfo(const llvm::ARMFunctionInfo &MFI)
+ : LRSpilled(MFI.isLRSpilled()) {}
+
+void yaml::ARMFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
+ MappingTraits<ARMFunctionInfo>::mapping(YamlIO, *this);
+}
+
+void ARMFunctionInfo::initializeBaseYamlFields(
+ const yaml::ARMFunctionInfo &YamlMFI) {
+ LRSpilled = YamlMFI.LRSpilled;
+}
+
static bool GetBranchTargetEnforcement(const Function &F,
const ARMSubtarget *Subtarget) {
if (!Subtarget->isMClass() || !Subtarget->hasV7Ops())
@@ -27,9 +39,8 @@ static bool GetBranchTargetEnforcement(const Function &F,
const StringRef BTIEnable =
F.getFnAttribute("branch-target-enforcement").getValueAsString();
- assert(BTIEnable.equals_insensitive("true") ||
- BTIEnable.equals_insensitive("false"));
- return BTIEnable.equals_insensitive("true");
+ assert(BTIEnable == "true" || BTIEnable == "false");
+ return BTIEnable == "true";
}
// The pair returns values for the ARMFunctionInfo members
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index f7531ce78cca..b9ff3a08f998 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/Support/ErrorHandling.h"
@@ -22,6 +23,10 @@
namespace llvm {
+namespace yaml {
+struct ARMFunctionInfo;
+} // end namespace yaml
+
class ARMSubtarget;
/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
@@ -293,8 +298,29 @@ public:
}
bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
+
+ void initializeBaseYamlFields(const yaml::ARMFunctionInfo &YamlMFI);
};
+namespace yaml {
+struct ARMFunctionInfo final : public yaml::MachineFunctionInfo {
+ bool LRSpilled;
+
+ ARMFunctionInfo() = default;
+ ARMFunctionInfo(const llvm::ARMFunctionInfo &MFI);
+
+ void mappingImpl(yaml::IO &YamlIO) override;
+ ~ARMFunctionInfo() = default;
+};
+
+template <> struct MappingTraits<ARMFunctionInfo> {
+ static void mapping(IO &YamlIO, ARMFunctionInfo &MFI) {
+ YamlIO.mapOptional("isLRSpilled", MFI.LRSpilled);
+ }
+};
+
+} // end namespace yaml
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMacroFusion.cpp
index 38bf28ba8219..5aeb7abe92a3 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMacroFusion.cpp
@@ -62,7 +62,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
return false;
}
-std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation () {
+std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation() {
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index f7977941e895..746a8715df0a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -35,7 +35,7 @@ enum PartialMappingIdx {
PMI_Min = PMI_GPR,
};
-RegisterBankInfo::PartialMapping PartMappings[]{
+const RegisterBankInfo::PartialMapping PartMappings[]{
// GPR Partial Mapping
{0, 32, GPRRegBank},
// SPR Partial Mapping
@@ -72,7 +72,7 @@ enum ValueMappingIdx {
DPR3OpsIdx = 7,
};
-RegisterBankInfo::ValueMapping ValueMappings[] = {
+const RegisterBankInfo::ValueMapping ValueMappings[] = {
// invalid
{nullptr, 0},
// 3 ops in GPRs
@@ -89,8 +89,9 @@ RegisterBankInfo::ValueMapping ValueMappings[] = {
{&PartMappings[PMI_DPR - PMI_Min], 1}};
#ifndef NDEBUG
-static bool checkValueMapping(const RegisterBankInfo::ValueMapping &VM,
- RegisterBankInfo::PartialMapping *BreakDown) {
+static bool
+checkValueMapping(const RegisterBankInfo::ValueMapping &VM,
+ const RegisterBankInfo::PartialMapping *BreakDown) {
return VM.NumBreakDowns == 1 && VM.BreakDown == BreakDown;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSLSHardening.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSLSHardening.cpp
index 09357ae2e3a3..23d72b34902d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSLSHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSLSHardening.cpp
@@ -210,7 +210,7 @@ ArmInsertedThunks SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI,
void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
// FIXME: How to better communicate Register number, rather than through
// name and lookup table?
- assert(MF.getName().startswith(getThunkPrefix()));
+ assert(MF.getName().starts_with(getThunkPrefix()));
auto ThunkIt = llvm::find_if(
SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); });
assert(ThunkIt != std::end(SLSBLRThunks));
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td
index 53a2a6fec51e..b28de7873b30 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td
@@ -49,7 +49,7 @@
// NumMicroOps = 2; // Dispatch 2 micro-ops.
// // The two instances of resource P01 are occupied for one cycle. It is one
// // cycle because these resources happen to be pipelined.
-// ResourceCycles = [1, 1];
+// ReleaseAtCycles = [1, 1];
// }
// def : ReadAdvance<ReadAdvanceALUsr, 3>;
@@ -195,7 +195,7 @@ class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl,
list<int> rcl, SchedWriteRes wr> :
SchedWriteRes<!listconcat(wr.ProcResources, resl)> {
let Latency = !add(wr.Latency, lat);
- let ResourceCycles = !listconcat(wr.ResourceCycles, rcl);
+ let ReleaseAtCycles = !listconcat(wr.ReleaseAtCycles, rcl);
let NumMicroOps = !add(wr.NumMicroOps, uops);
SchedWriteRes BaseWr = wr;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
index 531b10bc5cfd..025023c5f41a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -28,30 +28,30 @@ def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
- let ResourceCycles = [17]; }
+ let ReleaseAtCycles = [17]; }
def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18;
- let ResourceCycles = [18]; }
+ let ReleaseAtCycles = [18]; }
def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
- let ResourceCycles = [19]; }
+ let ReleaseAtCycles = [19]; }
def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20;
- let ResourceCycles = [20]; }
+ let ReleaseAtCycles = [20]; }
def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1;
- let ResourceCycles = [1]; }
+ let ReleaseAtCycles = [1]; }
def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2;
- let ResourceCycles = [1]; }
+ let ReleaseAtCycles = [1]; }
def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; }
def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; }
def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; }
def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2;
- let ResourceCycles = [1]; }
+ let ReleaseAtCycles = [1]; }
def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
- let ResourceCycles = [32]; }
+ let ReleaseAtCycles = [32]; }
def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
- let ResourceCycles = [32]; }
+ let ReleaseAtCycles = [32]; }
def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
- let ResourceCycles = [35]; }
+ let ReleaseAtCycles = [35]; }
def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
@@ -89,7 +89,7 @@ def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
let Latency = 64;
let NumMicroOps = 2;
- let ResourceCycles = [32, 32];
+ let ReleaseAtCycles = [32, 32];
}
def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
A57UnitL]> {
@@ -224,7 +224,7 @@ def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
let Latency = 36;
let NumMicroOps = 2;
- let ResourceCycles = [18, 18];
+ let ReleaseAtCycles = [18, 18];
}
def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
A57UnitM]> {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td
index 8b375d3602c2..a0f56a69b2bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -1995,15 +1995,15 @@ def : WriteRes<WriteVST4, []>;
// Reserve A9UnitFP for 2 consecutive cycles.
def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
let Latency = 4;
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
}
def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
let Latency = 7;
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
}
def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
let Latency = 9;
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
}
// Branches don't have a def operand but still consume resources.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM55.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM55.td
index f24f97b26f0a..ff05936e8ba4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM55.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM55.td
@@ -46,7 +46,7 @@
//
// For this schedule, we currently model latencies and pipelines well for each
// instruction. MVE instruction take two beats, modelled using
-// ResourceCycles=[2].
+// ReleaseAtCycles=[2].
//
//
// Dual Issue
@@ -245,7 +245,7 @@ def : ReadAdvance<ReadMAC, 0>;
// MVE and VFP //
//=============//
-// The Writes that take ResourceCycles=[2] are MVE instruction, the others VFP.
+// The Writes that take ReleaseAtCycles=[2] are MVE instruction, the others VFP.
let SingleIssue = 1, Latency = 1 in {
def M55WriteLSE2 : SchedWriteRes<[M55UnitLoadStore]>;
@@ -253,10 +253,10 @@ let SingleIssue = 1, Latency = 1 in {
def M55WriteFloatE2 : SchedWriteRes<[M55UnitVecFPALU]>;
def M55WriteSysE2 : SchedWriteRes<[M55UnitVecSys]>;
- def M55Write2LSE2 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; }
- def M55Write2IntE2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
- def M55Write2FloatE2 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
- def M55Write2IntFPE2 : SchedWriteRes<[M55UnitVecIntFP]> { let ResourceCycles=[2]; }
+ def M55Write2LSE2 : SchedWriteRes<[M55UnitLoadStore]> { let ReleaseAtCycles=[2]; }
+ def M55Write2IntE2 : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; }
+ def M55Write2FloatE2 : SchedWriteRes<[M55UnitVecFPALU]> { let ReleaseAtCycles=[2]; }
+ def M55Write2IntFPE2 : SchedWriteRes<[M55UnitVecIntFP]> { let ReleaseAtCycles=[2]; }
}
let SingleIssue = 1, Latency = 2 in {
@@ -264,20 +264,20 @@ let SingleIssue = 1, Latency = 2 in {
def M55WriteIntE3 : SchedWriteRes<[M55UnitVecALU]>;
def M55WriteFloatE3 : SchedWriteRes<[M55UnitVecFPALU]>;
- def M55Write2LSE3 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; }
- def M55Write2IntE3 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
- def M55Write2FloatE3 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
+ def M55Write2LSE3 : SchedWriteRes<[M55UnitLoadStore]> { let ReleaseAtCycles=[2]; }
+ def M55Write2IntE3 : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; }
+ def M55Write2FloatE3 : SchedWriteRes<[M55UnitVecFPALU]> { let ReleaseAtCycles=[2]; }
}
let SingleIssue = 1, Latency = 3 in {
- def M55Write2IntE3Plus1 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
+ def M55Write2IntE3Plus1 : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; }
// Same as M55Write2IntE3/M55Write2FloatE3 above, but longer latency and no forwarding into stores
- def M55Write2IntE4NoFwd : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
- def M55Write2FloatE4NoFwd : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
+ def M55Write2IntE4NoFwd : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; }
+ def M55Write2FloatE4NoFwd : SchedWriteRes<[M55UnitVecFPALU]> { let ReleaseAtCycles=[2]; }
}
let SingleIssue = 1, Latency = 4 in {
- def M55Write2IntE3Plus2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
+ def M55Write2IntE3Plus2 : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; }
def M55WriteFloatE3Plus2 : SchedWriteRes<[M55UnitVecFPALU]>;
}
let SingleIssue = 1, Latency = 9 in {
@@ -353,9 +353,9 @@ def : InstRW<[M55Write2IntE2], (instregex "MVE_VHADD")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VHCADD")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VHSUB")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_V(MAX|MIN)A?(s|u)")>;
-def : InstRW<[M55Write2IntE3], (instregex "MVE_V(MAX|MIN)A?V(s|u)8")>;
+def : InstRW<[M55Write2IntE3Plus2], (instregex "MVE_V(MAX|MIN)A?V(s|u)8")>;
def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_V(MAX|MIN)A?V(s|u)16")>;
-def : InstRW<[M55Write2IntE3Plus2], (instregex "MVE_V(MAX|MIN)A?V(s|u)32")>;
+def : InstRW<[M55Write2IntE3], (instregex "MVE_V(MAX|MIN)A?V(s|u)32")>;
def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VMOVN")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VMOVL")>;
def : InstRW<[M55Write2IntE3], (instregex "MVE_VMULL[BT]p")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM85.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM85.td
new file mode 100644
index 000000000000..cd375a16305e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM85.td
@@ -0,0 +1,981 @@
+//=- ARMScheduleM85.td - ARM Cortex-M85 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the ARM Cortex-M85 processor.
+//
+// All timing is referred to EX2. Thus, operands which are needed at EX1 are
+// stated to have a ReadAdvance of -1. The FP/MVE pipe actually begins at EX3
+// but is described as if it were in EX2 to avoid having unnaturally long latencies
+// with delayed inputs on every instruction. Instead, whenever an FP instruction
+// must access a GP register or a non-FP instruction (which includes loads/stores)
+// must access an FP register, the operand timing is adjusted:
+// FP accessing GPR: read one cycle later, write one cycle later
+// NOTE: absolute spec timing already includes this if
+// referenced to EX2
+// non-FP accessing FPR: read one cycle earlier, write one cycle earlier
+//===----------------------------------------------------------------------===//
+
+def CortexM85Model : SchedMachineModel {
+ let IssueWidth = 2; // Dual issue for most instructions.
+ let MicroOpBufferSize = 0; // M85 is in-order.
+ let LoadLatency = 2; // Best case for load-use case.
+ let MispredictPenalty = 4; // Mispredict cost for forward branches is 7,
+ // but 4 works better
+ let CompleteModel = 0;
+}
+
+let SchedModel = CortexM85Model in {
+
+//===--------------------------------------------------------------------===//
+// CortexM85 has two ALU, two LOAD, two STORE, a MAC, a BRANCH and two VFP
+// pipes (with three units). There are three shifters available: one per
+// stage.
+
+def M85UnitLoadL : ProcResource<1> { let BufferSize = 0; }
+def M85UnitLoadH : ProcResource<1> { let BufferSize = 0; }
+def M85UnitLoad : ProcResGroup<[M85UnitLoadL,M85UnitLoadH]> { let BufferSize = 0; }
+def M85UnitStoreL : ProcResource<1> { let BufferSize = 0; }
+def M85UnitStoreH : ProcResource<1> { let BufferSize = 0; }
+def M85UnitStore : ProcResGroup<[M85UnitStoreL,M85UnitStoreH]> { let BufferSize = 0; }
+def M85UnitALU : ProcResource<2> { let BufferSize = 0; }
+def M85UnitShift1 : ProcResource<1> { let BufferSize = 0; }
+def M85UnitShift2 : ProcResource<1> { let BufferSize = 0; }
+def M85UnitMAC : ProcResource<1> { let BufferSize = 0; }
+def M85UnitBranch : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVFPAL : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVFPAH : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVFPA : ProcResGroup<[M85UnitVFPAL,M85UnitVFPAH]> { let BufferSize = 0; }
+def M85UnitVFPBL : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVFPBH : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVFPB : ProcResGroup<[M85UnitVFPBL,M85UnitVFPBH]> { let BufferSize = 0; }
+def M85UnitVFPCL : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVFPCH : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVFPC : ProcResGroup<[M85UnitVFPCL,M85UnitVFPCH]> { let BufferSize = 0; }
+def M85UnitVFPD : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVPortL : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVPortH : ProcResource<1> { let BufferSize = 0; }
+def M85UnitVPort : ProcResGroup<[M85UnitVPortL,M85UnitVPortH]> { let BufferSize = 0; }
+def M85UnitSIMD : ProcResource<1> { let BufferSize = 0; }
+def M85UnitLShift : ProcResource<1> { let BufferSize = 0; }
+def M85UnitDiv : ProcResource<1> { let BufferSize = 0; }
+
+def M85UnitSlot0 : ProcResource<1> { let BufferSize = 0; }
+
+//===---------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types with map ProcResources and set latency.
+
+def : WriteRes<WriteALU, [M85UnitALU]> { let Latency = 1; }
+
+// Basic ALU with shifts.
+let Latency = 1 in {
+ def : WriteRes<WriteALUsi, [M85UnitALU, M85UnitShift1]>;
+ def : WriteRes<WriteALUsr, [M85UnitALU, M85UnitShift1]>;
+ def : WriteRes<WriteALUSsr, [M85UnitALU, M85UnitShift1]>;
+}
+
+// Compares.
+def : WriteRes<WriteCMP, [M85UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteCMPsi, [M85UnitALU, M85UnitShift1]> { let Latency = 2; }
+def : WriteRes<WriteCMPsr, [M85UnitALU, M85UnitShift1]> { let Latency = 2; }
+
+// Multiplies.
+let Latency = 2 in {
+ def : WriteRes<WriteMUL16, [M85UnitMAC]>;
+ def : WriteRes<WriteMUL32, [M85UnitMAC]>;
+ def : WriteRes<WriteMUL64Lo, [M85UnitMAC]>;
+ def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; }
+}
+
+// Multiply-accumulates.
+let Latency = 2 in {
+def : WriteRes<WriteMAC16, [M85UnitMAC]>;
+def : WriteRes<WriteMAC32, [M85UnitMAC]>;
+def : WriteRes<WriteMAC64Lo, [M85UnitMAC]>;
+def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; }
+}
+
+// Divisions.
+def : WriteRes<WriteDIV, [M85UnitDiv]> {
+ let Latency = 7;
+}
+
+// Loads/Stores.
+def : WriteRes<WriteLd, [M85UnitLoad]> { let Latency = 1; }
+def : WriteRes<WritePreLd, [M85UnitLoad]> { let Latency = 2; }
+def : WriteRes<WriteST, [M85UnitStore]> { let Latency = 2; }
+def M85WriteLdWide : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH]> { let Latency = 1; }
+def M85WriteStWide : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH]> { let Latency = 2; }
+
+// Branches.
+def : WriteRes<WriteBr, [M85UnitBranch]> { let Latency = 2; }
+def : WriteRes<WriteBrL, [M85UnitBranch]> { let Latency = 2; }
+def : WriteRes<WriteBrTbl, [M85UnitBranch]> { let Latency = 2; }
+
+// Noop.
+def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for floating-point instructions
+//
+// Floating point conversions.
+def : WriteRes<WriteFPCVT, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
+ let Latency = 2;
+}
+def : WriteRes<WriteFPMOV, [M85UnitVPort, M85UnitSlot0]> { let Latency = 1; }
+def M85WriteFPMOV64 : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { let Latency = 1; }
+
+// ALU operations (32/64-bit). These go down the FP pipeline.
+def : WriteRes<WriteFPALU32, [M85UnitVFPA, M85UnitVPort, M85UnitSlot0]> {
+ let Latency = 2;
+}
+def : WriteRes<WriteFPALU64, [M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let Latency = 6;
+}
+
+// Multiplication
+def : WriteRes<WriteFPMUL32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
+ let Latency = 3;
+}
+def : WriteRes<WriteFPMUL64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let Latency = 8;
+}
+
+// Multiply-accumulate. FPMAC goes down the FP Pipeline.
+def : WriteRes<WriteFPMAC32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
+ let Latency = 5;
+}
+def : WriteRes<WriteFPMAC64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let Latency = 14;
+}
+
+// Division. Effective scheduling latency is 3, though real latency is larger
+def : WriteRes<WriteFPDIV32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
+ let Latency = 14;
+}
+def : WriteRes<WriteFPDIV64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let Latency = 29;
+}
+
+// Square-root. Effective scheduling latency is 3, though real latency is larger
+def : WriteRes<WriteFPSQRT32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
+ let Latency = 14;
+}
+def : WriteRes<WriteFPSQRT64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let Latency = 29;
+}
+
+let NumMicroOps = 0 in {
+ def M85SingleIssue : SchedWriteRes<[]> { let SingleIssue = 1; }
+ def M85Slot0Only : SchedWriteRes<[M85UnitSlot0]> { }
+}
+
+// What pipeline stage operands need to be ready for depending on
+// where they come from.
+def : ReadAdvance<ReadALUsr, 0>;
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 1>;
+def : ReadAdvance<ReadALU, 0>;
+def : ReadAdvance<ReadFPMUL, 0>;
+def : ReadAdvance<ReadFPMAC, 3>;
+def M85Read_ISSm1 : SchedReadAdvance<-2>; // operands needed at ISS
+def M85Read_ISS : SchedReadAdvance<-1>; // operands needed at EX1
+def M85Read_EX1 : SchedReadAdvance<0>; // operands needed at EX2
+def M85Read_EX2 : SchedReadAdvance<1>; // operands needed at EX3
+def M85Read_EX3 : SchedReadAdvance<2>; // operands needed at EX4
+def M85Read_EX4 : SchedReadAdvance<3>; // operands needed at EX5
+def M85Write1 : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+def M85Write2 : SchedWriteRes<[]> {
+ let Latency = 2;
+ let NumMicroOps = 0;
+}
+def M85WriteShift2 : SchedWriteRes<[M85UnitALU, M85UnitShift2]> {}
+
+// Non general purpose instructions may not be dual issued. These
+// use both issue units.
+def M85NonGeneralPurpose : SchedWriteRes<[]> {
+ // Assume that these will go down the main ALU pipeline.
+ // In reality, many look likely to stall the whole pipeline.
+ let Latency = 3;
+ let SingleIssue = 1;
+}
+
+// List the non general purpose instructions.
+def : InstRW<[M85NonGeneralPurpose],
+ (instregex "t2MRS", "tSVC", "tBKPT", "t2MSR", "t2DMB", "t2DSB",
+ "t2ISB", "t2HVC", "t2SMC", "t2UDF", "ERET", "tHINT",
+ "t2HINT", "t2CLREX", "t2CLRM", "BUNDLE")>;
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for load/store
+//
+// Mark whether the loads/stores must be single-issue
+// Address operands are needed earlier
+// Data operands are needed later
+
+let NumMicroOps = 0 in {
+ def M85BaseUpdate : SchedWriteRes<[]> {
+ // Update is bypassable out of EX1
+ let Latency = 0;
+ }
+ def M85MVERBaseUpdate : SchedWriteRes<[]> { let Latency = 1; }
+ // Q register base update is available in EX3 to bypass into EX2/ISS.
+ // Latency=2 matches what we want for ISS, Latency=1 for EX2. Going
+ // with 2, as base update into another load/store is most likely. Could
+ // change later in an override.
+ def M85MVEQBaseUpdate : SchedWriteRes<[]> { let Latency = 2; }
+ def M85LoadLatency1 : SchedWriteRes<[]> { let Latency = 1; }
+}
+def M85SlowLoad : SchedWriteRes<[M85UnitLoad]> { let Latency = 2; }
+
+// Byte and half-word loads should have greater latency than other loads.
+// So should load exclusive?
+
+def : InstRW<[M85SlowLoad],
+ (instregex "t2LDR(B|H|SB|SH)pc")>;
+def : InstRW<[M85SlowLoad, M85Read_ISS],
+ (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i",
+ "tLDRspi", "tLDR(B|H)i")>;
+def : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS],
+ (instregex "t2LDR(B|H|SB|SH)s")>;
+def : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS],
+ (instregex "tLDR(B|H)r", "tLDR(SB|SH)")>;
+def : InstRW<[M85SlowLoad, M85BaseUpdate, M85Read_ISS],
+ (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>;
+
+// Exclusive/acquire/release loads/stores cannot be dual-issued
+def : InstRW<[WriteLd, M85SingleIssue, M85Read_ISS],
+ (instregex "t2LDREX$", "t2LDA(EX)?$")>;
+def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85SingleIssue, M85Read_ISS],
+ (instregex "t2LDAEXD$")>;
+def : InstRW<[M85SlowLoad, M85SingleIssue, M85Read_ISS],
+ (instregex "t2LDREX(B|H)", "t2LDA(EX)?(B|H)$")>;
+def : InstRW<[WriteST, M85SingleIssue, M85Read_EX2, M85Read_ISS],
+ (instregex "t2STREX(B|H)?$", "t2STL(EX)?(B|H)?$")>;
+def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_EX2, M85Read_EX2, M85Read_ISS],
+ (instregex "t2STLEXD$")>;
+
+// Load/store multiples end issue groups.
+
+def : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS],
+ (instregex "(t|t2)LDM(DB|IA)$")>;
+def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS],
+ (instregex "(t|t2)STM(DB|IA)$")>;
+def : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS],
+ (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>;
+def : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue, M85Read_ISS],
+ (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>;
+
+// Load/store doubles
+
+def : InstRW<[M85BaseUpdate, M85WriteStWide,
+ M85Read_EX2, M85Read_EX2, M85Read_ISS],
+ (instregex "t2STRD_(PRE|POST)")>;
+def : InstRW<[M85WriteStWide, M85Read_EX2, M85Read_EX2, M85Read_ISS],
+ (instregex "t2STRDi")>;
+def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85BaseUpdate, M85Read_ISS],
+ (instregex "t2LDRD_(PRE|POST)")>;
+def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85Read_ISS],
+ (instregex "t2LDRDi")>;
+
+// Word load / preload
+def : InstRW<[WriteLd],
+ (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>;
+def : InstRW<[WriteLd, M85Read_ISS],
+ (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi")>;
+def : InstRW<[WriteLd, M85Read_ISS, M85Read_ISS],
+ (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>;
+def : InstRW<[WriteLd, M85BaseUpdate, M85Read_ISS],
+ (instregex "t2LDR_(POST|PRE)")>;
+
+// Stores
+def : InstRW<[M85BaseUpdate, WriteST, M85Read_EX2, M85Read_ISS],
+ (instregex "t2STR(B|H)?_(POST|PRE)")>;
+def : InstRW<[WriteST, M85Read_EX2, M85Read_ISS, M85Read_ISS],
+ (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>;
+def : InstRW<[WriteST, M85Read_EX2, M85Read_ISS],
+ (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>;
+
+// TBB/TBH - single-issue only
+
+def M85TableLoad : SchedWriteRes<[M85UnitLoad]> { let SingleIssue = 1; }
+
+def : InstRW<[M85TableLoad, M85Read_ISS, M85Read_ISS],
+ (instregex "t2TB")>;
+
+// VFP/MVE loads and stores
+// Note: timing for VLDR/VSTR special has not been broken out
+// Note 2: see notes at top of file for the reason load latency is 1 and
+// store data is in EX3.
+
+def M85LoadSP : SchedWriteRes<[M85UnitLoad, M85UnitVPort]>;
+def M85LoadDP : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
+ M85UnitVPortL, M85UnitVPortH]>;
+def M85LoadSys : SchedWriteRes<[M85UnitLoad, M85UnitVPort,
+ M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]> {
+ let Latency = 4;
+}
+def M85StoreSP : SchedWriteRes<[M85UnitStore, M85UnitVPort]>;
+def M85StoreDP : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH,
+ M85UnitVPortL, M85UnitVPortH]>;
+def M85StoreSys : SchedWriteRes<[M85UnitStore, M85UnitVPort,
+ M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]>;
+let ReleaseAtCycles = [2,2,1,1], EndGroup = 1 in {
+ def M85LoadMVE : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
+ M85UnitVPortL, M85UnitVPortH]>;
+ def M85LoadMVELate : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
+ M85UnitVPortL, M85UnitVPortH]> {
+ let Latency = 4; // 3 cycles later
+ }
+ def M85StoreMVE : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH,
+ M85UnitVPortL, M85UnitVPortH]>;
+}
+
+def : InstRW<[M85LoadSP, M85Read_ISS], (instregex "VLDR(S|H)$")>;
+def : InstRW<[M85LoadSys, M85Read_ISS], (instregex "VLDR_")>;
+def : InstRW<[M85LoadDP, M85Read_ISS], (instregex "VLDRD$")>;
+def : InstRW<[M85StoreSP, M85Read_EX3, M85Read_ISS], (instregex "VSTR(S|H)$")>;
+def : InstRW<[M85StoreSys, M85Read_EX1, M85Read_ISS], (instregex "VSTR_")>;
+def : InstRW<[M85StoreDP, M85Read_EX3, M85Read_ISS], (instregex "VSTRD$")>;
+
+def : InstRW<[M85LoadMVELate, M85Read_ISS],
+ (instregex "MVE_VLD[24]._[0-9]+$")>;
+def : InstRW<[M85LoadMVELate, M85MVERBaseUpdate, M85Read_ISS],
+ (instregex "MVE_VLD[24].*wb")>;
+def : InstRW<[M85LoadMVE, M85Read_ISS],
+ (instregex "MVE_VLDR.*(8|16|32|64)$")>;
+def : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS, M85Read_ISS],
+ (instregex "MVE_VLDR.*(_rq|_rq|_rq_u)$")>;
+def : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS],
+ (instregex "MVE_VLDR.*_qi$")>;
+def : InstRW<[M85MVERBaseUpdate, M85LoadMVE, M85Read_ISS],
+ (instregex "MVE_VLDR.*(_post|[^i]_pre)$")>;
+def : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85LoadMVE, M85Read_ISS],
+ (instregex "MVE_VLDR.*(qi_pre)$")>;
+
+def : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS],
+ (instregex "MVE_VST[24]._[0-9]+$")>;
+def : InstRW<[M85StoreMVE, M85Read_EX3, M85MVERBaseUpdate, M85Read_ISS],
+ (instregex "MVE_VST[24].*wb")>;
+def : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS],
+ (instregex "MVE_VSTR.*(8|16|32|64)$")>;
+def : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS, M85Read_ISS],
+ (instregex "MVE_VSTR.*(_rq|_rq|_rq_u)$")>;
+def : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS],
+ (instregex "MVE_VSTR.*_qi$")>;
+def : InstRW<[M85MVERBaseUpdate, M85StoreMVE, M85Read_EX3, M85Read_ISS],
+ (instregex "MVE_VSTR.*(_post|[^i]_pre)$")>;
+def : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85StoreMVE,
+ M85Read_EX3, M85Read_ISS],
+ (instregex "MVE_VSTR.*(qi_pre)$")>;
+
+// Load/store multiples end issue groups.
+
+def : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS],
+ (instregex "VLDM(S|D|Q)(DB|IA)$")>;
+def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS, M85Read_EX3],
+ (instregex "VSTM(S|D|Q)(DB|IA)$")>;
+def : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS],
+ (instregex "VLDM(S|D|Q)(DB|IA)_UPD$", "VLLDM")>;
+def : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue,
+ M85Read_ISS, M85Read_EX3],
+ (instregex "VSTM(S|D|Q)(DB|IA)_UPD$", "VLSTM")>;
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for ALU
+//
+
+// Non-small shifted ALU operands are read a cycle early; small LSLs
+// aren't, as they don't require the shifter.
+
+def M85NonsmallShiftWrite : SchedWriteRes<[M85UnitALU,M85UnitShift1]> {
+ let Latency = 1;
+}
+
+def M85WriteALUsi : SchedWriteVariant<[
+ SchedVar<NoSchedPred, [M85NonsmallShiftWrite]>
+]>;
+def M85Ex1ReadNoFastBypass : SchedReadAdvance<-1,
+ [WriteLd, M85WriteLdWide, M85LoadLatency1]>;
+def M85ReadALUsi : SchedReadVariant<[
+ SchedVar<NoSchedPred, [M85Read_ISS]>
+]>;
+
+def : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi],
+ (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|"
+ "SUBS|CMP|CMNz|TEQ|TST)rs$")>;
+def : InstRW<[M85WriteALUsi, M85ReadALUsi],
+ (instregex "t2MVNs")>;
+
+// CortexM85 treats LSL #0 as needing a shifter. In practice the throughput
+// seems to reliably be 2 when run on a cyclemodel, so we don't require a
+// shift resource.
+def : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi],
+ (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|"
+ "SUBS|CMP|CMNz|TEQ|TST)rr$")>;
+def : InstRW<[M85WriteALUsi, M85ReadALUsi],
+ (instregex "t2MVNr")>;
+
+// Shift instructions: most pure shifts (i.e. MOV w/ shift) will use whichever
+// shifter is free, thus it is possible to dual-issue them freely with anything
+// else. As a result, they are not modeled as needing a shifter.
+// RRX is odd because it must use the EX2 shifter, so it cannot dual-issue with
+// itself.
+//
+// Note that pure shifts which use the EX1 shifter would need their operands
+// a cycle earlier. However, they are only forced to use the EX1 shifter
+// when issuing against an RRX instructions, which should be rare.
+
+def : InstRW<[M85WriteShift2],
+ (instregex "t2RRX$")>;
+def : InstRW<[WriteALU],
+ (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)", "t2MOVsr(a|l)")>;
+
+// Instructions that use the shifter, but have normal timing
+
+def : InstRW<[WriteALUsi,M85Slot0Only], (instregex "t2(BFC|BFI)$")>;
+
+// Stack pointer add/sub happens in EX1 with checks in EX2
+
+def M85WritesToSPPred : MCSchedPredicate<CheckRegOperand<0, SP>>;
+
+def M85ReadForSP : SchedReadVariant<[
+ SchedVar<M85WritesToSPPred, [M85Read_ISS]>,
+ SchedVar<NoSchedPred, [M85Read_EX1]>
+]>;
+def M85ReadForSPShift : SchedReadVariant<[
+ SchedVar<M85WritesToSPPred, [M85Read_ISS]>,
+ SchedVar<NoSchedPred, [M85Read_ISS]>
+]>;
+
+def : InstRW<[WriteALU, M85Read_ISS],
+ (instregex "tADDspi", "tSUBspi")>;
+def : InstRW<[WriteALU, M85ReadForSP],
+ (instregex "t2(ADD|SUB)ri", "t2MOVr", "tMOVr")>;
+def : InstRW<[WriteALU, M85ReadForSP, M85ReadForSP],
+ (instregex "tADDrSP", "tADDspr", "tADDhirr")>;
+def : InstRW<[M85WriteALUsi, M85ReadForSP, M85ReadForSPShift],
+ (instregex "t2(ADD|SUB)rs")>;
+
+def : InstRW<[WriteALU, M85Slot0Only], (instregex "t2CLZ")>;
+
+// MAC operations that don't have SchedRW set
+
+def : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>;
+
+// Divides are special because they stall for their latency, and so look like
+// two cycles as far as scheduling opportunities go. By putting M85Write2
+// first, we make the operand latency 2, but keep the instruction latency 7.
+// Divide operands are read early.
+
+def : InstRW<[M85Write2, WriteDIV, M85Read_ISS, M85Read_ISS, WriteALU],
+ (instregex "t2(S|U)DIV")>;
+
+// DSP extension operations
+
+def M85WriteSIMD1 : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> {
+ let Latency = 1;
+}
+def M85WriteSIMD2 : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> {
+ let Latency = 2;
+}
+def M85WriteShSIMD0 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
+ M85UnitShift1, M85UnitSlot0]> {
+ let Latency = 0; // Finishes at EX1
+}
+def M85WriteShSIMD1 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
+ M85UnitShift1, M85UnitSlot0]> {
+ let Latency = 1;
+}
+def M85WriteShSIMD2 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
+ M85UnitShift1, M85UnitSlot0]> {
+ let Latency = 2;
+}
+
+def : InstRW<[M85WriteShSIMD2, M85Read_ISS],
+ (instregex "t2(S|U)SAT")>;
+def : InstRW<[M85WriteSIMD1, ReadALU],
+ (instregex "(t|t2)(S|U)XT(B|H)")>;
+def : InstRW<[M85WriteSIMD1, ReadALU, ReadALU],
+ (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)",
+ "t2SEL")>;
+def : InstRW<[M85WriteSIMD2, ReadALU, ReadALU],
+ (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>;
+def : InstRW<[M85WriteShSIMD2, M85Read_ISS, M85Read_ISS],
+ (instregex "t2QD(ADD|SUB)")>;
+def : InstRW<[M85WriteShSIMD0, M85Read_ISS],
+ (instregex "t2(RBIT|REV)", "tREV")>;
+def : InstRW<[M85WriteShSIMD1, ReadALU, M85Read_ISS],
+ (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>;
+def : InstRW<[M85WriteSIMD2, ReadALU, ReadALU, M85Read_EX2],
+ (instregex "t2USADA8")>;
+
+// MSR/MRS
+def : InstRW<[M85NonGeneralPurpose], (instregex "MSR", "MRS")>;
+
+// 64-bit shift operations in EX3
+
+def M85WriteLShift : SchedWriteRes<[M85UnitLShift, M85UnitALU]> {
+ let Latency = 2;
+}
+def M85WriteLat2 : SchedWriteRes<[]> { let Latency = 2; let NumMicroOps = 0; }
+
+def : InstRW<[M85WriteLShift, M85WriteLat2, M85Read_EX2, M85Read_EX2],
+ (instregex "MVE_(ASRLi|LSLLi|LSRL|SQSHLL|SRSHRL|UQSHLL|URSHRL)$")>;
+def : InstRW<[M85WriteLShift, M85WriteLat2,
+ M85Read_EX2, M85Read_EX2, M85Read_EX2],
+ (instregex "MVE_(ASRLr|LSLLr|SQRSHRL|UQRSHLL)$")>;
+def : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2],
+ (instregex "MVE_(SQRSHR|UQRSHL)$")>;
+def : InstRW<[M85WriteLShift, M85Read_EX2],
+ (instregex "MVE_(SQSHL|SRSHR|UQSHL|URSHR)$")>;
+
+// Loop control/branch future instructions
+
+def M85LE : SchedWriteRes<[]> { let NumMicroOps = 0; let Latency = -2; }
+
+def : InstRW<[WriteALU], (instregex "t2BF(_|Lr|i|Li|r)")>;
+
+def : InstRW<[WriteALU], (instregex "MVE_LCTP")>;
+def : InstRW<[WriteALU],
+ (instregex "t2DLS", "t2WLS", "MVE_DLSTP", "MVE_WLSTP")>;
+def : InstRW<[M85LE], (instregex "t2LE$")>;
+def : InstRW<[M85LE, M85Read_ISSm1],
+ (instregex "t2LEUpdate", "MVE_LETP")>; // LE is executed at ISS
+
+// Conditional selects
+
+def : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2, M85Read_EX2],
+ (instregex "t2(CSEL|CSINC|CSINV|CSNEG)")>;
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for FP and MVE operations
+
+let NumMicroOps = 0 in {
+ def M85OverrideVFPLat5 : SchedWriteRes<[]> { let Latency = 5; }
+ def M85OverrideVFPLat4 : SchedWriteRes<[]> { let Latency = 4; }
+ def M85OverrideVFPLat3 : SchedWriteRes<[]> { let Latency = 3; }
+ def M85OverrideVFPLat2 : SchedWriteRes<[]> { let Latency = 2; }
+}
+
+let Latency = 1 in {
+ def M85GroupALat1S : SchedWriteRes<[M85UnitVFPA, M85UnitVPort, M85UnitSlot0]>;
+ def M85GroupBLat1S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>;
+ def M85GroupCLat1S : SchedWriteRes<[M85UnitVFPC, M85UnitVPort, M85UnitSlot0]>;
+ def M85GroupALat1D : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
+ def M85GroupBLat1D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
+ def M85GroupCLat1D : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
+ def M85GroupABLat1S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>;
+}
+let Latency = 2 in {
+ def M85GroupBLat2S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>;
+ def M85GroupBLat2D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
+ def M85GroupABLat2S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>;
+ def M85GroupABLat2D : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
+}
+
+// Instructions which are missing default schedules
+def : InstRW<[M85GroupALat1S], (instregex "V(FP_VMAXNM|FP_VMINNM)(H|S)$")>;
+def : InstRW<[M85GroupALat1D], (instregex "V(FP_VMAXNM|FP_VMINNM)D$")>;
+def : InstRW<[M85GroupCLat1S], (instregex "VCMPE?Z?(H|S)$")>;
+def : InstRW<[M85GroupCLat1D], (instregex "VCMPE?Z?D$")>;
+def : InstRW<[M85GroupBLat2S],
+ (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S)",
+ "VRINT(A|M|N|P|R|X|Z)(H|S)")>;
+def : InstRW<[M85GroupBLat2D],
+ (instregex "VCVT(B|T)(DH|HD)", "VCVT(A|M|N|P|R|X|Z)(S|U)D",
+ "V.*TOD", "VTO.*D", "VCVTDS", "VCVTSD",
+ "VRINT(A|M|N|P|R|X|Z)D")>;
+def : InstRW<[M85GroupABLat1S], (instregex "VINSH")>;
+def : InstRW<[M85GroupBLat1S], (instregex "V(ABS|NEG)(H|S)$")>;
+def : InstRW<[M85GroupBLat1D], (instregex "V(ABS|NEG)D$")>;
+
+// VMRS/VMSR
+let SingleIssue = 1 in {
+ def M85VMRSEarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 2;}
+ def M85VMRSLate : SchedWriteRes<[M85UnitVPort]> { let Latency = 4; }
+ def M85VMSREarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 1; }
+ def M85VMSRLate : SchedWriteRes<[M85UnitVPort]> { let Latency = 3; }
+}
+
+def M85FPSCRFlagPred : MCSchedPredicate<
+ CheckAll<[CheckIsRegOperand<0>,
+ CheckRegOperand<0, PC>]>>;
+
+def M85VMRSFPSCR : SchedWriteVariant<[
+ SchedVar<M85FPSCRFlagPred, [M85VMRSEarly]>,
+ SchedVar<NoSchedPred, [M85VMRSLate]>
+]>;
+
+def : InstRW<[M85VMSREarly, M85Read_EX2],
+ (instregex "VMSR$", "VMSR_FPSCR_NZCVQC", "VMSR_P0", "VMSR_VPR")>;
+def : InstRW<[M85VMRSEarly], (instregex "VMRS_P0", "VMRS_VPR", "FMSTAT")>;
+def : InstRW<[M85VMRSLate], (instregex "VMRS_FPSCR_NZCVQC")>;
+def : InstRW<[M85VMRSFPSCR], (instregex "VMRS$")>;
+// Not matching properly
+//def : InstRW<[M85VMSRLate, M85Read_EX2], (instregex "VMSR_FPCTX(NS|S)")>;
+//def : InstRW<[M85VMRSLate], (instregex "VMRS_FPCTX(NS|S)")>;
+
+// VSEL cannot bypass in its implied $cpsr operand; model as earlier read
+def : InstRW<[M85GroupBLat1S, ReadALU, ReadALU, M85Read_ISS],
+ (instregex "VSEL.*(S|H)$")>;
+def : InstRW<[M85GroupBLat1D, ReadALU, ReadALU, M85Read_ISS],
+ (instregex "VSEL.*D$")>;
+
+// VMOV
+def : InstRW<[WriteFPMOV],
+ (instregex "VMOV(H|S)$", "FCONST(H|S)")>;
+def : InstRW<[WriteFPMOV, M85Read_EX2],
+ (instregex "VMOVHR$", "VMOVSR$")>;
+def : InstRW<[M85GroupABLat2S],
+ (instregex "VMOVRH$", "VMOVRS$")>;
+def : InstRW<[M85WriteFPMOV64],
+ (instregex "VMOVD$")>;
+def : InstRW<[M85WriteFPMOV64],
+ (instregex "FCONSTD")>;
+def : InstRW<[M85WriteFPMOV64, M85Read_EX2, M85Read_EX2],
+ (instregex "VMOVDRR")>;
+def : InstRW<[M85WriteFPMOV64, M85Write1, M85Read_EX2, M85Read_EX2],
+ (instregex "VMOVSRR")>;
+def : InstRW<[M85GroupABLat2D, M85Write2],
+ (instregex "VMOV(RRD|RRS)")>;
+
+// These shouldn't even exist, but Cortex-m55 defines them, so here they are.
+def : InstRW<[WriteFPMOV, M85Read_EX2],
+ (instregex "VGETLNi32$")>;
+def : InstRW<[M85GroupABLat2S],
+ (instregex "VSETLNi32")>;
+
+// Larger-latency overrides
+
+def M85FPDIV16 : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
+ let Latency = 8;
+}
+def : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VDIVH")>;
+def : InstRW<[M85OverrideVFPLat2, WriteFPDIV32], (instregex "VDIVS")>;
+def : InstRW<[M85OverrideVFPLat2, WriteFPDIV64], (instregex "VDIVD")>;
+def : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VSQRTH")>;
+def : InstRW<[M85OverrideVFPLat2, WriteFPSQRT32], (instregex "VSQRTS")>;
+def : InstRW<[M85OverrideVFPLat2, WriteFPSQRT64], (instregex "VSQRTD")>;
+def : InstRW<[M85OverrideVFPLat3, WriteFPMUL64], (instregex "V(MUL|NMUL)D")>;
+def : InstRW<[M85OverrideVFPLat2, WriteFPALU64], (instregex "V(ADD|SUB)D")>;
+
+// Multiply-accumulate. Chained SP timing is correct; rest need overrides
+// Double-precision chained MAC should also be seen as having latency of 5,
+// as stalls stall everything.
+
+def : InstRW<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL],
+ (instregex "VN?ML(A|S)H")>;
+
+def : InstRW<[M85OverrideVFPLat5, WriteFPMAC64,
+ ReadFPMUL, ReadFPMUL, ReadFPMUL],
+ (instregex "VN?ML(A|S)D$")>;
+
+// Single-precision fused MACs look like latency 4 with advance of 2.
+
+def M85ReadFPMAC2 : SchedReadAdvance<2>;
+
+def : InstRW<[M85OverrideVFPLat4, WriteFPMAC32,
+ M85ReadFPMAC2, ReadFPMUL, ReadFPMUL],
+ (instregex "VF(N)?M(A|S)(H|S)$")>;
+
+// Double-precision fused MAC looks like latency 4.
+
+def : InstRW<[M85OverrideVFPLat4, WriteFPMAC64,
+ ReadFPMUL, ReadFPMUL, ReadFPMUL],
+ (instregex "VF(N)?M(A|S)D$")>;
+
+// MVE beatwise instructions
+// NOTE: Q-register timing for the 2nd beat is off by a cycle and needs
+// DAG overrides to correctly set latencies.
+// NOTE2: MVE integer MAC->MAC accumulate latencies are set as if the
+// accumulate value arrives from an unmatching MAC instruction;
+// matching ones are handled via DAG mutation. These are marked as
+// "limited accumulate bypass"
+
+let Latency = 4, EndGroup = 1 in {
+ def M85GrpALat2MveR : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+ def M85GrpABLat2MveR : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
+ def M85GrpBLat2MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+ def M85Lat2MveR : SchedWriteRes<[]> { let NumMicroOps = 0; }
+ def M85GrpBLat4Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+}
+let Latency = 3, EndGroup = 1 in {
+ def M85GrpBLat3Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+ def M85GrpBLat1MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+ def M85Lat1MveR : SchedWriteRes<[]> { let NumMicroOps = 0; }
+}
+let Latency = 2, EndGroup = 1 in {
+ def M85GrpALat2Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+ def M85GrpABLat2Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
+ def M85GrpBLat2Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+ def M85Lat2Mve : SchedWriteRes<[]> { let NumMicroOps = 0; }
+}
+let Latency = 1, EndGroup = 1 in {
+ def M85GrpALat1Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+ def M85GrpABLat1Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
+ def M85GrpBLat1Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+ def M85GrpCLat1Mve : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,2,1,1,1];
+ }
+ def M85GrpDLat1Mve : SchedWriteRes<[M85UnitVFPD, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
+ let ReleaseAtCycles = [2,1,1,1];
+ }
+}
+
+def : InstRW<[M85GrpABLat1Mve, M85Read_EX1, M85Read_EX2, M85Read_EX2],
+ (instregex "MVE_VMOV_q_rr")>;
+
+def : InstRW<[M85GrpABLat1Mve, M85Read_EX2],
+ (instregex "MVE_VMOV_to_lane_(8|16|32)")>;
+
+def : InstRW<[M85GrpABLat1Mve],
+ (instregex "MVE_VAND$",
+ "MVE_VBIC$", "MVE_VBICimm",
+ "MVE_VCLSs(8|16|32)",
+ "MVE_VCLZs(8|16|32)",
+ "MVE_VEOR",
+ "MVE_VMOVimmf32", "MVE_VMOVimmi(8|16|32|64)",
+ "MVE_VMVN$", "MVE_VMVNimmi(16|32)",
+ "MVE_VORN$",
+ "MVE_VORR$", "MVE_VORRimm", "MQPRCopy",
+ "MVE_VPSEL",
+ "MVE_VREV(16|32|64)_(8|16|32)"
+ )>;
+
+def : InstRW<[M85GrpABLat2MveR, M85Lat2MveR],
+ (instregex "MVE_VMOV_rr_q")>;
+
+def : InstRW<[M85GrpABLat2MveR],
+ (instregex "MVE_VMOV_from_lane_(32|u8|s8|u16|s16)")>;
+
+def : InstRW<[M85GrpALat1Mve, M85Lat1MveR,
+ M85Read_EX1, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VADC$")>;
+
+def : InstRW<[M85GrpALat1Mve, M85Lat1MveR],
+ (instregex "MVE_VADCI")>;
+
+def : InstRW<[M85GrpALat1Mve, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VADD_qr_i(8|16|32)",
+ "MVE_VBRSR(16|32|8)",
+ "MVE_VHADD_qr_[su](8|16|32)",
+ "MVE_VHSUB_qr_[su](8|16|32)",
+ "MVE_VQADD_qr_[su](8|16|32)",
+ "MVE_VQSUB_qr_[su](8|16|32)",
+ "MVE_VSHL_qr[su](8|16|32)",
+ "MVE_VSUB_qr_i(8|16|32)"
+ )>;
+
+def : InstRW<[M85GrpALat1Mve],
+ (instregex "MVE_VABD(s|u)(8|16|32)",
+ "MVE_VABS(s|u)(8|16|32)",
+ "MVE_V(MAX|MIN)A?[us](8|16|32)",
+ "MVE_VADDi(8|16|32)",
+ "MVE_VCADDi(8|16|32)",
+ "MVE_VHCADDs(8|16|32)",
+ "MVE_VHSUB[su](8|16|32)",
+ "MVE_VMOVL[su](8|16)[tb]h",
+ "MVE_VMOVNi(16|32)[tb]h",
+ "MVE_VMULL[BT]?[p](8|16|32)(bh|th)?",
+ "MVE_VNEGs(8|16|32)",
+ "MVE_VQABSs(8|16|32)",
+ "MVE_VQADD[su](8|16|32)",
+ "MVE_VQNEGs(8|16|32)",
+ "MVE_VQSUB[su](8|16|32)",
+ "MVE_VR?HADD[su](8|16|32)",
+ "MVE_VSBC$", "MVE_VSBCI",
+ "MVE_VSHL_by_vec[su](8|16|32)",
+ "MVE_VSHL_immi(8|16|32)",
+ "MVE_VSHLL_imm[su](8|16)[bt]h",
+ "MVE_VSHLL_lw[su](8|16)[bt]h",
+ "MVE_VSHRNi(16|32)[bt]h",
+ "MVE_VSHR_imm[su](8|16|32)",
+ "MVE_VSLIimm[su]?(8|16|32)",
+ "MVE_VSRIimm[su]?(8|16|32)",
+ "MVE_VSUBi(8|16|32)"
+ )>;
+
+def : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2, M85Read_EX2],
+ (instregex "MVE_V(D|I)WDUPu(8|16|32)")>;
+
+def : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2],
+ (instregex "MVE_V(D|I)DUPu(8|16|32)")>;
+
+def : InstRW<[M85GrpALat2Mve, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_V(Q|R|QR)SHL_qr[su](8|16|32)",
+ "MVE_VADD_qr_f(16|32)",
+ "MVE_VSUB_qr_f(16|32)"
+ )>;
+
+def : InstRW<[M85GrpALat1Mve, M85Read_EX2],
+ (instregex "MVE_VDUP(8|16|32)")>;
+
+def : InstRW<[M85GrpBLat1Mve],
+ (instregex "MVE_VABSf(16|32)",
+ "MVE_V(MAX|MIN)NMA?f(16|32)",
+ "MVE_VNEGf(16|32)"
+ )>;
+
+def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3],
+ (instregex "MVE_VADDLV[us]32acc")>;
+
+def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR],
+ (instregex "MVE_VADDLV[us]32no_acc")>;
+
+def : InstRW<[M85GrpBLat2MveR, M85Read_EX3],
+ (instregex "MVE_VADDV[us](8|16|32)acc"
+ )>;
+
+def : InstRW<[M85GrpALat2MveR, M85Read_EX3],
+ (instregex "MVE_V(MAX|MIN)A?V[us](8|16|32)",
+ "MVE_VABAV(s|u)(8|16|32)"
+ )>;
+
+def : InstRW<[M85GrpALat2MveR],
+ (instregex "MVE_VADDV[us](8|16|32)no_acc")>;
+
+def : InstRW<[M85GrpALat2Mve],
+ (instregex "MVE_V(Q|R|QR)SHL_by_vec[su](8|16|32)",
+ "MVE_VABDf(16|32)",
+ "MVE_VADDf(16|32)",
+ "MVE_VCADDf(16|32)",
+ "MVE_VQMOVU?N[su](8|16|32)[tb]h",
+ "MVE_VQR?SHL(U_)?imm[su](8|16|32)",
+ "MVE_VQR?SHRN[bt]h[su](16|32)",
+ "MVE_VQR?SHRUNs(16|32)[bt]h",
+ "MVE_VRSHR_imm[su](8|16|32)",
+ "MVE_VRSHRNi(16|32)[bt]h",
+ "MVE_VSUBf(16|32)"
+ )>;
+
+def : InstRW<[M85GrpBLat2MveR, M85Read_EX2],
+ (instregex "MVE_V(MAX|MIN)NMA?Vf(16|32)")>;
+
+def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VMUL_qr_i(8|16|32)")>;
+
+def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VQDMULL_qr_s(16|32)[tb]h")>;
+
+def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VQR?DMULH_qr_s(8|16|32)")>;
+
+def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX3],
+ // limited accumulate bypass
+ (instregex "MVE_VMLAS?_qr_i(8|16|32)")>;
+
+def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
+ // limited accumulate bypass
+ (instregex "MVE_VQR?DMLAS?H_qrs(8|16|32)")>;
+
+def : InstRW<[M85GrpBLat2Mve],
+ // limited accumulate bypass
+ (instregex "MVE_VQR?DML[AS]DHX?s(8|16|32)")>;
+
+def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3],
+ (instregex "MVE_VR?ML[AS]LDAVH?ax?[su](8|16|32)")>;
+
+def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR],
+ (instregex "MVE_VR?ML[AS]LDAVH?x?[su](8|16|32)")>;
+
+def : InstRW<[M85GrpBLat2MveR, M85Read_EX3],
+ (instregex "MVE_VML[AS]DAVax?[su](8|16|32)")>;
+
+def : InstRW<[M85GrpBLat2MveR],
+ (instregex "MVE_VML[AS]DAVx?[su](8|16|32)")>;
+
+def : InstRW<[M85GrpBLat2Mve],
+ (instregex "MVE_VCVTf16(u|s)16", "MVE_VCVTf32(u|s)32",
+ "MVE_VCVT(u|s)16f16", "MVE_VCVT(u|s)32f32",
+ "MVE_VCVTf16f32", "MVE_VCVTf32f16",
+ "MVE_VMULL[BT]?[su](8|16|32)(bh|th)?",
+ "MVE_VMUL(t1)*i(8|16|32)",
+ "MVE_VQDMULLs(16|32)[tb]h",
+ "MVE_VQR?DMULHi(8|16|32)",
+ "MVE_VR?MULH[su](8|16|32)",
+ "MVE_VRINTf(16|32)"
+ )>;
+
+def : InstRW<[M85GrpBLat3Mve, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VMUL_qr_f(16|32)")>;
+
+def : InstRW<[M85GrpBLat3Mve],
+ (instregex "MVE_VCMULf(16|32)",
+ "MVE_VMULf(16|32)"
+ )>;
+
+def : InstRW<[M85GrpBLat4Mve, M85Read_EX3, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VFMA_qr_Sf(16|32)", // VFMAS
+ "MVE_VFMA_qr_f(16|32)" // VFMA
+ )>;
+
+def : InstRW<[M85GrpBLat4Mve, M85Read_EX3],
+ (instregex "MVE_VCMLAf(16|32)")>;
+
+def : InstRW<[M85GrpBLat4Mve, M85Read_EX3],
+ (instregex "MVE_VFM(A|S)f(16|32)")>;
+
+def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VPTv(4|8)f(16|32)r")>;
+
+def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)r")>;
+
+def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VCMP[isu](8|16|32)r$", "MVE_VCMPf(16|32)r$")>;
+
+def : InstRW<[M85GrpDLat1Mve, M85Read_EX2],
+ (instregex "MVE_VCTP(8|16|32|64)")>;
+
+def : InstRW<[M85GrpCLat1Mve],
+ (instregex "MVE_VCMPf(16|32)$", "MVE_VCMP[isu](8|16|32)$",
+ "MVE_VPTv(4|8)f(16|32)$",
+ "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)$"
+ )>;
+
+def : InstRW<[M85GrpDLat1Mve],
+ (instregex "MVE_VPNOT",
+ "MVE_VPST"
+ )>;
+
+def : InstRW<[M85Lat2MveR, M85GrpALat2Mve, M85Read_EX1, M85Read_EX2],
+ (instregex "MVE_VSHLC")>;
+
+// VFP instructions
+
+def : WriteRes<WriteVLD1, []>;
+def : WriteRes<WriteVLD2, []>;
+def : WriteRes<WriteVLD3, []>;
+def : WriteRes<WriteVLD4, []>;
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
+} // SchedModel = CortexCortexM85Model
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td
index 466acec6f76a..e85646915117 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td
@@ -72,7 +72,7 @@ def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
def : WriteRes<WriteDIV, [R52UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8]; // non-pipelined
+ let Latency = 8; let ReleaseAtCycles = [8]; // non-pipelined
}
// Branches - LR written in Late EX2
@@ -107,12 +107,12 @@ def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
let Latency = 7; // FP div takes fixed #cycles
- let ResourceCycles = [7]; // is not pipelined
+ let ReleaseAtCycles = [7]; // is not pipelined
}
def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
let Latency = 17;
- let ResourceCycles = [17];
+ let ReleaseAtCycles = [17];
}
def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
@@ -145,7 +145,7 @@ def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
let Latency = 4; let NumMicroOps = 0;
}
def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8]; // not pipelined
+ let Latency = 8; let ReleaseAtCycles = [8]; // not pipelined
}
def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
@@ -552,7 +552,7 @@ foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
let Latency = 0;
let NumMicroOps = Num;
- let ResourceCycles = [Num];
+ let ReleaseAtCycles = [Num];
}
}
def R52WriteVLDM : SchedWriteVariant<[
@@ -639,57 +639,57 @@ def R52WriteVLDM : SchedWriteVariant<[
def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
let Latency = 7;
let NumMicroOps = 6;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
let Latency = 8;
let NumMicroOps = 8;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
let Latency = 9;
let NumMicroOps = 10;
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
}
def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
let Latency = 10;
let NumMicroOps = 12;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
}
def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
let Latency = 11;
let NumMicroOps = 14;
- let ResourceCycles = [7];
+ let ReleaseAtCycles = [7];
}
def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
let Latency = 12;
let NumMicroOps = 16;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
let Latency = 13;
let NumMicroOps = 18;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
let Latency = 14;
let NumMicroOps = 20;
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
}
def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
let Latency = 15;
let NumMicroOps = 22;
- let ResourceCycles = [11];
+ let ReleaseAtCycles = [11];
}
def R52WriteSTM : SchedWriteVariant<[
@@ -719,45 +719,45 @@ def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
def : WriteRes<WriteVLD2, [R52UnitLd]> {
let Latency = 6;
let NumMicroOps = 3;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let SingleIssue = 1;
}
def : WriteRes<WriteVLD3, [R52UnitLd]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let SingleIssue = 1;
}
def : WriteRes<WriteVLD4, [R52UnitLd]> {
let Latency = 8;
let NumMicroOps = 7;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let SingleIssue = 1;
}
def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 5;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 6;
let NumMicroOps = 3;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 8;
let NumMicroOps = 7;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 9;
let NumMicroOps = 9;
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td
index d66b3065c7b7..88682f5c0d2c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -88,7 +88,7 @@ let SchedModel = SwiftModel in {
def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
// Plain load without writeback.
def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
@@ -261,7 +261,7 @@ let SchedModel = SwiftModel in {
def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
}
def SwiftWrite1Cycle : SchedWriteRes<[]> {
let Latency = 1;
@@ -283,7 +283,7 @@ let SchedModel = SwiftModel in {
def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [2, 3];
+ let ReleaseAtCycles = [2, 3];
}
// Aliasing sub-target specific WriteRes to generic ones
@@ -313,7 +313,7 @@ let SchedModel = SwiftModel in {
def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
let NumMicroOps = 1;
let Latency = 14;
- let ResourceCycles = [1, 14];
+ let ReleaseAtCycles = [1, 14];
}
// 4.2.18 Integer Divide
def : WriteRes<WriteDIV, [SwiftUnitDiv]>; // Workaround.
@@ -653,15 +653,15 @@ let SchedModel = SwiftModel in {
// Serializing instructions.
def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
let Latency = 15;
- let ResourceCycles = [15];
+ let ReleaseAtCycles = [15];
}
def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
let Latency = 15;
- let ResourceCycles = [15];
+ let ReleaseAtCycles = [15];
}
def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
let Latency = 15;
- let ResourceCycles = [15];
+ let ReleaseAtCycles = [15];
}
def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
(instregex "VMRS")>;
@@ -684,7 +684,7 @@ let SchedModel = SwiftModel in {
def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
let Latency = 0;
let NumMicroOps = Num;
- let ResourceCycles = [Num];
+ let ReleaseAtCycles = [Num];
}
}
@@ -860,17 +860,17 @@ let SchedModel = SwiftModel in {
// 4.2.43 Advanced SIMD, Element or Structure Load and Store
def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
let Latency = 4;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
let Latency = 4;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
foreach Num = 1-2 in {
def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
let Latency = 0;
let NumMicroOps = Num;
- let ResourceCycles = [Num];
+ let ReleaseAtCycles = [Num];
}
}
// VLDx
@@ -1038,12 +1038,12 @@ let SchedModel = SwiftModel in {
def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
let NumMicroOps = 1;
let Latency = 17;
- let ResourceCycles = [1, 15];
+ let ReleaseAtCycles = [1, 15];
}
def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
let NumMicroOps = 1;
let Latency = 32;
- let ResourceCycles = [1, 30];
+ let ReleaseAtCycles = [1, 30];
}
def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
@@ -1086,7 +1086,7 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
// Preload.
def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
- let ResourceCycles = [0];
+ let ReleaseAtCycles = [0];
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 1505e9214050..922fa93226f2 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -298,6 +298,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexM3:
case CortexM7:
case CortexR52:
+ case CortexM52:
case CortexX1:
case CortexX1C:
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
index 715b5bee6dc6..43b4123a1b55 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -32,6 +32,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/TargetParser/Triple.h"
+#include <bitset>
#include <memory>
#include <string>
@@ -71,6 +72,7 @@ protected:
CortexA9,
CortexM3,
CortexM7,
+ CortexM52,
CortexR4,
CortexR4F,
CortexR5,
@@ -198,7 +200,7 @@ protected:
/// operand cycle returned by the itinerary data for pre-ISel operands.
int PreISelOperandLatencyAdjustment = 2;
- /// What alignment is preferred for loop bodies, in log2(bytes).
+ /// What alignment is preferred for loop bodies and functions, in log2(bytes).
unsigned PrefLoopLogAlignment = 0;
/// The cost factor for MVE instructions, representing the multiple beats an
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 39d8607818f7..a99773691df1 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
@@ -130,9 +131,9 @@ computeTargetABI(const Triple &TT, StringRef CPU,
if (ABIName == "aapcs16")
return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
- else if (ABIName.startswith("aapcs"))
+ else if (ABIName.starts_with("aapcs"))
return ARMBaseTargetMachine::ARM_ABI_AAPCS;
- else if (ABIName.startswith("apcs"))
+ else if (ABIName.starts_with("apcs"))
return ARMBaseTargetMachine::ARM_ABI_APCS;
llvm_unreachable("Unhandled/unknown ABI Name!");
@@ -220,7 +221,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool isLittle)
+ CodeGenOptLevel OL, bool isLittle)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(TT, RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
@@ -328,7 +329,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
@@ -336,7 +337,7 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
@@ -422,7 +423,7 @@ void ARMPassConfig::addIRPasses() {
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass(
SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true),
[this](const Function &F) {
@@ -436,15 +437,15 @@ void ARMPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
// Run the parallel DSP pass.
- if (getOptLevel() == CodeGenOpt::Aggressive)
+ if (getOptLevel() == CodeGenOptLevel::Aggressive)
addPass(createARMParallelDSPPass());
// Match complex arithmetic patterns
- if (TM->getOptLevel() >= CodeGenOpt::Default)
+ if (TM->getOptLevel() >= CodeGenOptLevel::Default)
addPass(createComplexDeinterleavingPass(TM));
// Match interleaved memory accesses to ldN/stN intrinsics.
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createInterleavedAccessPass());
// Add Control Flow Guard checks.
@@ -456,13 +457,13 @@ void ARMPassConfig::addIRPasses() {
}
void ARMPassConfig::addCodeGenPrepare() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createTypePromotionLegacyPass());
TargetPassConfig::addCodeGenPrepare();
}
bool ARMPassConfig::addPreISel() {
- if ((TM->getOptLevel() != CodeGenOpt::None &&
+ if ((TM->getOptLevel() != CodeGenOptLevel::None &&
EnableGlobalMerge == cl::BOU_UNSET) ||
EnableGlobalMerge == cl::BOU_TRUE) {
// FIXME: This is using the thumb1 only constant value for
@@ -470,8 +471,9 @@ bool ARMPassConfig::addPreISel() {
// to look into using the old value for non-thumb1 code of
// 4095 based on the TargetMachine, but this starts to become
// tricky when doing code gen per function.
- bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
- (EnableGlobalMerge == cl::BOU_UNSET);
+ bool OnlyOptimizeForSize =
+ (TM->getOptLevel() < CodeGenOptLevel::Aggressive) &&
+ (EnableGlobalMerge == cl::BOU_UNSET);
// Merging of extern globals is enabled by default on non-Mach-O as we
// expect it to be generally either beneficial or harmless. On Mach-O it
// is disabled as we emit the .subsections_via_symbols directive which
@@ -481,7 +483,7 @@ bool ARMPassConfig::addPreISel() {
MergeExternalByDefault));
}
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createHardwareLoopsLegacyPass());
addPass(createMVETailPredicationPass());
// FIXME: IR passes can delete address-taken basic blocks, deleting
@@ -523,8 +525,8 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
}
void ARMPassConfig::addPreRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None) {
- if (getOptLevel() == CodeGenOpt::Aggressive)
+ if (getOptLevel() != CodeGenOptLevel::None) {
+ if (getOptLevel() == CodeGenOptLevel::Aggressive)
addPass(&MachinePipelinerID);
addPass(createMVETPAndVPTOptimisationsPass());
@@ -540,7 +542,7 @@ void ARMPassConfig::addPreRegAlloc() {
}
void ARMPassConfig::addPreSched2() {
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
if (EnableARMLoadStoreOpt)
addPass(createARMLoadStoreOptimizationPass());
@@ -552,7 +554,7 @@ void ARMPassConfig::addPreSched2() {
// proper scheduling.
addPass(createARMExpandPseudoPass());
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
// When optimising for size, always run the Thumb2SizeReduction pass before
// IfConversion. Otherwise, check whether IT blocks are restricted
// (e.g. in v8, IfConversion depends on Thumb instruction widths)
@@ -569,7 +571,7 @@ void ARMPassConfig::addPreSched2() {
// Add both scheduling passes to give the subtarget an opportunity to pick
// between them.
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addPass(&PostMachineSchedulerID);
addPass(&PostRASchedulerID);
}
@@ -588,7 +590,7 @@ void ARMPassConfig::addPreEmitPass() {
}));
// Don't optimize barriers or block placement at -O0.
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addPass(createARMBlockPlacementPass());
addPass(createARMOptimizeBarriersPass());
}
@@ -618,3 +620,23 @@ void ARMPassConfig::addPreEmitPass2() {
addPass(createEHContGuardCatchretPass());
}
}
+
+yaml::MachineFunctionInfo *
+ARMBaseTargetMachine::createDefaultFuncInfoYAML() const {
+ return new yaml::ARMFunctionInfo();
+}
+
+yaml::MachineFunctionInfo *
+ARMBaseTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
+ const auto *MFI = MF.getInfo<ARMFunctionInfo>();
+ return new yaml::ARMFunctionInfo(*MFI);
+}
+
+bool ARMBaseTargetMachine::parseMachineFunctionInfo(
+ const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error, SMRange &SourceRange) const {
+ const auto &YamlMFI = static_cast<const yaml::ARMFunctionInfo &>(MFI);
+ MachineFunction &MF = PFS.MF;
+ MF.getInfo<ARMFunctionInfo>()->initializeBaseYamlFields(YamlMFI);
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h
index fb04433ec522..69d8fa8ada64 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -42,7 +42,7 @@ public:
ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool isLittle);
~ARMBaseTargetMachine() override;
@@ -83,6 +83,14 @@ public:
// Addrspacecasts are always noops.
return true;
}
+
+ yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
+ yaml::MachineFunctionInfo *
+ convertFuncInfoToYAML(const MachineFunction &MF) const override;
+ bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
+ PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error,
+ SMRange &SourceRange) const override;
};
/// ARM/Thumb little endian target machine.
@@ -92,7 +100,7 @@ public:
ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
};
@@ -103,7 +111,7 @@ public:
ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 444ee2efb6d2..cbc5e5210865 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -272,8 +272,8 @@ std::optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
: APInt::getHighBitsSet(2, 1));
SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
// The other lanes will be defined from the inserted elements.
- UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
- : APInt::getHighBitsSet(2, 1));
+ UndefElts &= APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
+ : APInt::getHighBitsSet(2, 1));
return std::nullopt;
};
@@ -1213,7 +1213,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
- Kind = improveShuffleKindFromMask(Kind, Mask);
+ Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
if (ST->hasNEON()) {
if (Kind == TTI::SK_Broadcast) {
static const CostTblEntry NEONDupTbl[] = {
@@ -1670,12 +1670,73 @@ InstructionCost
ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) {
- if (TTI::requiresOrderedReduction(FMF))
- return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
EVT ValVT = TLI->getValueType(DL, ValTy);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
- if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD)
+ unsigned EltSize = ValVT.getScalarSizeInBits();
+
+ // In general floating point reductions are a series of elementwise
+ // operations, with free extracts on each step. These are either in-order or
+ // treewise depending on whether that is allowed by the fast math flags.
+ if ((ISD == ISD::FADD || ISD == ISD::FMUL) &&
+ ((EltSize == 32 && ST->hasVFP2Base()) ||
+ (EltSize == 64 && ST->hasFP64()) ||
+ (EltSize == 16 && ST->hasFullFP16()))) {
+ unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
+ unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
+ InstructionCost VecCost = 0;
+ while (!TTI::requiresOrderedReduction(FMF) && isPowerOf2_32(NumElts) &&
+ NumElts * EltSize > VecLimit) {
+ Type *VecTy = FixedVectorType::get(ValTy->getElementType(), NumElts / 2);
+ VecCost += getArithmeticInstrCost(Opcode, VecTy, CostKind);
+ NumElts /= 2;
+ }
+
+ // For fp16 we need to extract the upper lane elements. MVE can add a
+ // VREV+FMIN/MAX to perform another vector step instead.
+ InstructionCost ExtractCost = 0;
+ if (!TTI::requiresOrderedReduction(FMF) && ST->hasMVEFloatOps() &&
+ ValVT.getVectorElementType() == MVT::f16 && NumElts == 8) {
+ VecCost += ST->getMVEVectorCostFactor(CostKind) * 2;
+ NumElts /= 2;
+ } else if (ValVT.getVectorElementType() == MVT::f16)
+ ExtractCost = NumElts / 2;
+
+ return VecCost + ExtractCost +
+ NumElts *
+ getArithmeticInstrCost(Opcode, ValTy->getElementType(), CostKind);
+ }
+
+ if ((ISD == ISD::AND || ISD == ISD::OR || ISD == ISD::XOR) &&
+ (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
+ unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
+ unsigned VecLimit =
+ ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
+ InstructionCost VecCost = 0;
+ while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
+ Type *VecTy = FixedVectorType::get(ValTy->getElementType(), NumElts / 2);
+ VecCost += getArithmeticInstrCost(Opcode, VecTy, CostKind);
+ NumElts /= 2;
+ }
+ // For i16/i8, MVE will perform a VREV + VORR/VAND/VEOR for the 64bit vector
+ // step.
+ if (ST->hasMVEIntegerOps() && ValVT.getScalarSizeInBits() <= 16 &&
+ NumElts * EltSize == 64) {
+ Type *VecTy = FixedVectorType::get(ValTy->getElementType(), NumElts);
+ VecCost += ST->getMVEVectorCostFactor(CostKind) +
+ getArithmeticInstrCost(Opcode, VecTy, CostKind);
+ NumElts /= 2;
+ }
+
+ // From here we extract the elements and perform the and/or/xor.
+ InstructionCost ExtractCost = NumElts;
+ return VecCost + ExtractCost +
+ (NumElts - 1) * getArithmeticInstrCost(
+ Opcode, ValTy->getElementType(), CostKind);
+ }
+
+ if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD ||
+ TTI::requiresOrderedReduction(FMF))
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
@@ -1753,6 +1814,66 @@ ARMTTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
}
InstructionCost
+ARMTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
+ TTI::TargetCostKind CostKind) {
+ EVT ValVT = TLI->getValueType(DL, Ty);
+
+ // In general floating point reductions are a series of elementwise
+ // operations, with free extracts on each step. These are either in-order or
+ // treewise depending on whether that is allowed by the fast math flags.
+ if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
+ ((ValVT.getVectorElementType() == MVT::f32 && ST->hasVFP2Base()) ||
+ (ValVT.getVectorElementType() == MVT::f64 && ST->hasFP64()) ||
+ (ValVT.getVectorElementType() == MVT::f16 && ST->hasFullFP16()))) {
+ unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
+ unsigned EltSize = ValVT.getScalarSizeInBits();
+ unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
+ InstructionCost VecCost;
+ while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
+ Type *VecTy = FixedVectorType::get(Ty->getElementType(), NumElts/2);
+ IntrinsicCostAttributes ICA(IID, VecTy, {VecTy, VecTy}, FMF);
+ VecCost += getIntrinsicInstrCost(ICA, CostKind);
+ NumElts /= 2;
+ }
+
+ // For fp16 we need to extract the upper lane elements. MVE can add a
+ // VREV+FMIN/MAX to perform another vector step instead.
+ InstructionCost ExtractCost = 0;
+ if (ST->hasMVEFloatOps() && ValVT.getVectorElementType() == MVT::f16 &&
+ NumElts == 8) {
+ VecCost += ST->getMVEVectorCostFactor(CostKind) * 2;
+ NumElts /= 2;
+ } else if (ValVT.getVectorElementType() == MVT::f16)
+ ExtractCost = cast<FixedVectorType>(Ty)->getNumElements() / 2;
+
+ IntrinsicCostAttributes ICA(IID, Ty->getElementType(),
+ {Ty->getElementType(), Ty->getElementType()},
+ FMF);
+ return VecCost + ExtractCost +
+ (NumElts - 1) * getIntrinsicInstrCost(ICA, CostKind);
+ }
+
+ if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
+ IID == Intrinsic::umin || IID == Intrinsic::umax) {
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
+
+ // All costs are the same for u/s min/max. These lower to vminv, which are
+ // given a slightly higher cost as they tend to take multiple cycles for
+ // smaller type sizes.
+ static const CostTblEntry CostTblAdd[]{
+ {ISD::SMIN, MVT::v16i8, 4},
+ {ISD::SMIN, MVT::v8i16, 3},
+ {ISD::SMIN, MVT::v4i32, 2},
+ };
+ if (const auto *Entry = CostTableLookup(CostTblAdd, ISD::SMIN, LT.second))
+ return Entry->Cost * ST->getMVEVectorCostFactor(CostKind) * LT.first;
+ }
+
+ return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
+}
+
+InstructionCost
ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
switch (ICA.getID()) {
@@ -1863,7 +1984,7 @@ bool ARMTTIImpl::isLoweredToCall(const Function *F) {
return BaseT::isLoweredToCall(F);
// Assume all Arm-specific intrinsics map to an instruction.
- if (F->getName().startswith("llvm.arm"))
+ if (F->getName().starts_with("llvm.arm"))
return false;
switch (F->getIntrinsicID()) {
@@ -2309,9 +2430,15 @@ ARMTTIImpl::getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) {
- // Enable Upper bound unrolling universally, not dependant upon the conditions
- // below.
- UP.UpperBound = true;
+ // Enable Upper bound unrolling universally, providing that we do not see an
+ // active lane mask, which will be better kept as a loop to become tail
+ // predicated than to be conditionally unrolled.
+ UP.UpperBound =
+ !ST->hasMVEIntegerOps() || !any_of(*L->getHeader(), [](Instruction &I) {
+ return isa<IntrinsicInst>(I) &&
+ cast<IntrinsicInst>(I).getIntrinsicID() ==
+ Intrinsic::get_active_lane_mask;
+ });
// Only currently enable these preferences for M-Class cores.
if (!ST->isMClass())
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 588704d5b7e5..bb4b321b5300 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -290,6 +290,10 @@ public:
VectorType *ValTy,
TTI::TargetCostKind CostKind);
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
+ TTI::TargetCostKind CostKind);
+
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ef4c70916eeb..18dccb26b877 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -191,7 +191,7 @@ public:
/// Returns true iff a given mnemonic is a CDE instruction
bool isCDEInstr(StringRef Mnemonic) {
// Quick check before searching the set
- if (!Mnemonic.startswith("cx") && !Mnemonic.startswith("vcx"))
+ if (!Mnemonic.starts_with("cx") && !Mnemonic.starts_with("vcx"))
return false;
return CDE.count(Mnemonic);
}
@@ -199,7 +199,7 @@ public:
/// Returns true iff a given mnemonic is a VPT-predicable CDE instruction
/// (possibly with a predication suffix "e" or "t")
bool isVPTPredicableCDEInstr(StringRef Mnemonic) {
- if (!Mnemonic.startswith("vcx"))
+ if (!Mnemonic.starts_with("vcx"))
return false;
return CDEWithVPTSuffix.count(Mnemonic);
}
@@ -207,17 +207,17 @@ public:
/// Returns true iff a given mnemonic is an IT-predicable CDE instruction
/// (possibly with a condition suffix)
bool isITPredicableCDEInstr(StringRef Mnemonic) {
- if (!Mnemonic.startswith("cx"))
+ if (!Mnemonic.starts_with("cx"))
return false;
- return Mnemonic.startswith("cx1a") || Mnemonic.startswith("cx1da") ||
- Mnemonic.startswith("cx2a") || Mnemonic.startswith("cx2da") ||
- Mnemonic.startswith("cx3a") || Mnemonic.startswith("cx3da");
+ return Mnemonic.starts_with("cx1a") || Mnemonic.starts_with("cx1da") ||
+ Mnemonic.starts_with("cx2a") || Mnemonic.starts_with("cx2da") ||
+ Mnemonic.starts_with("cx3a") || Mnemonic.starts_with("cx3da");
}
/// Return true iff a given mnemonic is an integer CDE instruction with
/// dual-register destination
bool isCDEDualRegInstr(StringRef Mnemonic) {
- if (!Mnemonic.startswith("cx"))
+ if (!Mnemonic.starts_with("cx"))
return false;
return Mnemonic == "cx1d" || Mnemonic == "cx1da" ||
Mnemonic == "cx2d" || Mnemonic == "cx2da" ||
@@ -704,10 +704,9 @@ public:
}
// Implementation of the MCTargetAsmParser interface:
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
@@ -4050,22 +4049,21 @@ static unsigned MatchRegisterName(StringRef Name);
/// }
-bool ARMAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool ARMAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- RegNo = tryParseRegister();
+ Reg = tryParseRegister();
- return (RegNo == (unsigned)-1);
+ return Reg == (unsigned)-1;
}
-OperandMatchResultTy ARMAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
- if (parseRegister(RegNo, StartLoc, EndLoc))
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ParseStatus ARMAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ if (parseRegister(Reg, StartLoc, EndLoc))
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
}
/// Try to parse a register name. The token must be an Identifier when called,
@@ -6085,7 +6083,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
case AsmToken::LBrac:
return parseMemory(Operands);
case AsmToken::LCurly:
- return parseRegisterList(Operands, !Mnemonic.startswith("clr"));
+ return parseRegisterList(Operands, !Mnemonic.starts_with("clr"));
case AsmToken::Dollar:
case AsmToken::Hash: {
// #42 -> immediate
@@ -6297,32 +6295,29 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
// Ignore some mnemonics we know aren't predicated forms.
//
// FIXME: Would be nice to autogen this.
- if ((Mnemonic == "movs" && isThumb()) ||
- Mnemonic == "teq" || Mnemonic == "vceq" || Mnemonic == "svc" ||
- Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
- Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
- Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
- Mnemonic == "vaclt" || Mnemonic == "vacle" || Mnemonic == "hlt" ||
- Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
- Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
- Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
- Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" ||
- Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" ||
- Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" ||
- Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
- Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
- Mnemonic == "bxns" || Mnemonic == "blxns" ||
- Mnemonic == "vdot" || Mnemonic == "vmmla" ||
- Mnemonic == "vudot" || Mnemonic == "vsdot" ||
- Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
- Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
- Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" ||
- Mnemonic == "csel" || Mnemonic == "csinc" ||
- Mnemonic == "csinv" || Mnemonic == "csneg" || Mnemonic == "cinc" ||
- Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" ||
- Mnemonic == "csetm" ||
- Mnemonic == "aut" || Mnemonic == "pac" || Mnemonic == "pacbti" ||
- Mnemonic == "bti")
+ if ((Mnemonic == "movs" && isThumb()) || Mnemonic == "teq" ||
+ Mnemonic == "vceq" || Mnemonic == "svc" || Mnemonic == "mls" ||
+ Mnemonic == "smmls" || Mnemonic == "vcls" || Mnemonic == "vmls" ||
+ Mnemonic == "vnmls" || Mnemonic == "vacge" || Mnemonic == "vcge" ||
+ Mnemonic == "vclt" || Mnemonic == "vacgt" || Mnemonic == "vaclt" ||
+ Mnemonic == "vacle" || Mnemonic == "hlt" || Mnemonic == "vcgt" ||
+ Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" ||
+ Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" ||
+ Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || Mnemonic == "fmuls" ||
+ Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
+ Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
+ Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
+ Mnemonic == "vrintm" || Mnemonic == "hvc" ||
+ Mnemonic.starts_with("vsel") || Mnemonic == "vins" ||
+ Mnemonic == "vmovx" || Mnemonic == "bxns" || Mnemonic == "blxns" ||
+ Mnemonic == "vdot" || Mnemonic == "vmmla" || Mnemonic == "vudot" ||
+ Mnemonic == "vsdot" || Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+ Mnemonic == "vfmal" || Mnemonic == "vfmsl" || Mnemonic == "wls" ||
+ Mnemonic == "le" || Mnemonic == "dls" || Mnemonic == "csel" ||
+ Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" ||
+ Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" ||
+ Mnemonic == "cset" || Mnemonic == "csetm" || Mnemonic == "aut" ||
+ Mnemonic == "pac" || Mnemonic == "pacbti" || Mnemonic == "bti")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -6332,16 +6327,13 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" &&
Mnemonic != "sbcs" && Mnemonic != "rscs" &&
!(hasMVE() &&
- (Mnemonic == "vmine" ||
- Mnemonic == "vshle" || Mnemonic == "vshlt" || Mnemonic == "vshllt" ||
- Mnemonic == "vrshle" || Mnemonic == "vrshlt" ||
- Mnemonic == "vmvne" || Mnemonic == "vorne" ||
- Mnemonic == "vnege" || Mnemonic == "vnegt" ||
- Mnemonic == "vmule" || Mnemonic == "vmult" ||
- Mnemonic == "vrintne" ||
- Mnemonic == "vcmult" || Mnemonic == "vcmule" ||
- Mnemonic == "vpsele" || Mnemonic == "vpselt" ||
- Mnemonic.startswith("vq")))) {
+ (Mnemonic == "vmine" || Mnemonic == "vshle" || Mnemonic == "vshlt" ||
+ Mnemonic == "vshllt" || Mnemonic == "vrshle" || Mnemonic == "vrshlt" ||
+ Mnemonic == "vmvne" || Mnemonic == "vorne" || Mnemonic == "vnege" ||
+ Mnemonic == "vnegt" || Mnemonic == "vmule" || Mnemonic == "vmult" ||
+ Mnemonic == "vrintne" || Mnemonic == "vcmult" ||
+ Mnemonic == "vcmule" || Mnemonic == "vpsele" || Mnemonic == "vpselt" ||
+ Mnemonic.starts_with("vq")))) {
unsigned CC = ARMCondCodeFromString(Mnemonic.substr(Mnemonic.size()-2));
if (CC != ~0U) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
@@ -6351,18 +6343,17 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
// Next, determine if we have a carry setting bit. We explicitly ignore all
// the instructions we know end in 's'.
- if (Mnemonic.endswith("s") &&
- !(Mnemonic == "cps" || Mnemonic == "mls" ||
- Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
- Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
- Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
- Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
- Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
- Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
- Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
- Mnemonic == "vfms" || Mnemonic == "vfnms" || Mnemonic == "fconsts" ||
- Mnemonic == "bxns" || Mnemonic == "blxns" || Mnemonic == "vfmas" ||
- Mnemonic == "vmlas" ||
+ if (Mnemonic.ends_with("s") &&
+ !(Mnemonic == "cps" || Mnemonic == "mls" || Mnemonic == "mrs" ||
+ Mnemonic == "smmls" || Mnemonic == "vabs" || Mnemonic == "vcls" ||
+ Mnemonic == "vmls" || Mnemonic == "vmrs" || Mnemonic == "vnmls" ||
+ Mnemonic == "vqabs" || Mnemonic == "vrecps" || Mnemonic == "vrsqrts" ||
+ Mnemonic == "srs" || Mnemonic == "flds" || Mnemonic == "fmrs" ||
+ Mnemonic == "fsqrts" || Mnemonic == "fsubs" || Mnemonic == "fsts" ||
+ Mnemonic == "fcpys" || Mnemonic == "fdivs" || Mnemonic == "fmuls" ||
+ Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || Mnemonic == "vfms" ||
+ Mnemonic == "vfnms" || Mnemonic == "fconsts" || Mnemonic == "bxns" ||
+ Mnemonic == "blxns" || Mnemonic == "vfmas" || Mnemonic == "vmlas" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
@@ -6370,7 +6361,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
// The "cps" instruction can have a interrupt mode operand which is glued into
// the mnemonic. Check if this is the case, split it and parse the imod op
- if (Mnemonic.startswith("cps")) {
+ if (Mnemonic.starts_with("cps")) {
// Split out any imod code.
unsigned IMod =
StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
@@ -6399,16 +6390,15 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
}
// The "it" instruction has the condition mask on the end of the mnemonic.
- if (Mnemonic.startswith("it")) {
+ if (Mnemonic.starts_with("it")) {
ITMask = Mnemonic.slice(2, Mnemonic.size());
Mnemonic = Mnemonic.slice(0, 2);
}
- if (Mnemonic.startswith("vpst")) {
+ if (Mnemonic.starts_with("vpst")) {
ITMask = Mnemonic.slice(4, Mnemonic.size());
Mnemonic = Mnemonic.slice(0, 4);
- }
- else if (Mnemonic.startswith("vpt")) {
+ } else if (Mnemonic.starts_with("vpt")) {
ITMask = Mnemonic.slice(3, Mnemonic.size());
Mnemonic = Mnemonic.slice(0, 3);
}
@@ -6443,39 +6433,36 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" ||
- Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") ||
- Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" ||
+ Mnemonic.starts_with("crc32") || Mnemonic.starts_with("cps") ||
+ Mnemonic.starts_with("vsel") || Mnemonic == "vmaxnm" ||
Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" ||
Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" ||
Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" ||
- Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
- Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
- (FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
- Mnemonic == "vmovx" || Mnemonic == "vins" ||
- Mnemonic == "vudot" || Mnemonic == "vsdot" ||
- Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
- Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
- Mnemonic == "vfmat" || Mnemonic == "vfmab" ||
- Mnemonic == "vdot" || Mnemonic == "vmmla" ||
- Mnemonic == "sb" || Mnemonic == "ssbb" ||
- Mnemonic == "pssbb" || Mnemonic == "vsmmla" ||
- Mnemonic == "vummla" || Mnemonic == "vusmmla" ||
- Mnemonic == "vusdot" || Mnemonic == "vsudot" ||
- Mnemonic == "bfcsel" || Mnemonic == "wls" ||
- Mnemonic == "dls" || Mnemonic == "le" || Mnemonic == "csel" ||
- Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" ||
- Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" ||
- Mnemonic == "cset" || Mnemonic == "csetm" ||
+ Mnemonic.starts_with("aes") || Mnemonic == "hvc" ||
+ Mnemonic == "setpan" || Mnemonic.starts_with("sha1") ||
+ Mnemonic.starts_with("sha256") ||
+ (FullInst.starts_with("vmull") && FullInst.ends_with(".p64")) ||
+ Mnemonic == "vmovx" || Mnemonic == "vins" || Mnemonic == "vudot" ||
+ Mnemonic == "vsdot" || Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+ Mnemonic == "vfmal" || Mnemonic == "vfmsl" || Mnemonic == "vfmat" ||
+ Mnemonic == "vfmab" || Mnemonic == "vdot" || Mnemonic == "vmmla" ||
+ Mnemonic == "sb" || Mnemonic == "ssbb" || Mnemonic == "pssbb" ||
+ Mnemonic == "vsmmla" || Mnemonic == "vummla" || Mnemonic == "vusmmla" ||
+ Mnemonic == "vusdot" || Mnemonic == "vsudot" || Mnemonic == "bfcsel" ||
+ Mnemonic == "wls" || Mnemonic == "dls" || Mnemonic == "le" ||
+ Mnemonic == "csel" || Mnemonic == "csinc" || Mnemonic == "csinv" ||
+ Mnemonic == "csneg" || Mnemonic == "cinc" || Mnemonic == "cinv" ||
+ Mnemonic == "cneg" || Mnemonic == "cset" || Mnemonic == "csetm" ||
(hasCDE() && MS.isCDEInstr(Mnemonic) &&
!MS.isITPredicableCDEInstr(Mnemonic)) ||
- Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") ||
+ Mnemonic.starts_with("vpt") || Mnemonic.starts_with("vpst") ||
Mnemonic == "pac" || Mnemonic == "pacbti" || Mnemonic == "aut" ||
Mnemonic == "bti" ||
(hasMVE() &&
- (Mnemonic.startswith("vst2") || Mnemonic.startswith("vld2") ||
- Mnemonic.startswith("vst4") || Mnemonic.startswith("vld4") ||
- Mnemonic.startswith("wlstp") || Mnemonic.startswith("dlstp") ||
- Mnemonic.startswith("letp")))) {
+ (Mnemonic.starts_with("vst2") || Mnemonic.starts_with("vld2") ||
+ Mnemonic.starts_with("vst4") || Mnemonic.starts_with("vld4") ||
+ Mnemonic.starts_with("wlstp") || Mnemonic.starts_with("dlstp") ||
+ Mnemonic.starts_with("letp")))) {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {
@@ -6486,9 +6473,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
Mnemonic != "dmb" && Mnemonic != "dfb" && Mnemonic != "dsb" &&
Mnemonic != "isb" && Mnemonic != "pld" && Mnemonic != "pli" &&
Mnemonic != "pldw" && Mnemonic != "ldc2" && Mnemonic != "ldc2l" &&
- Mnemonic != "stc2" && Mnemonic != "stc2l" &&
- Mnemonic != "tsb" &&
- !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs");
+ Mnemonic != "stc2" && Mnemonic != "stc2l" && Mnemonic != "tsb" &&
+ !Mnemonic.starts_with("rfe") && !Mnemonic.starts_with("srs");
} else if (isThumbOne()) {
if (hasV6MOps())
CanAcceptPredicationCode = Mnemonic != "movs";
@@ -6784,16 +6770,16 @@ bool ARMAsmParser::shouldOmitVectorPredicateOperand(StringRef Mnemonic,
if (!hasMVE() || Operands.size() < 3)
return true;
- if (Mnemonic.startswith("vld2") || Mnemonic.startswith("vld4") ||
- Mnemonic.startswith("vst2") || Mnemonic.startswith("vst4"))
+ if (Mnemonic.starts_with("vld2") || Mnemonic.starts_with("vld4") ||
+ Mnemonic.starts_with("vst2") || Mnemonic.starts_with("vst4"))
return true;
- if (Mnemonic.startswith("vctp") || Mnemonic.startswith("vpnot"))
+ if (Mnemonic.starts_with("vctp") || Mnemonic.starts_with("vpnot"))
return false;
- if (Mnemonic.startswith("vmov") &&
- !(Mnemonic.startswith("vmovl") || Mnemonic.startswith("vmovn") ||
- Mnemonic.startswith("vmovx"))) {
+ if (Mnemonic.starts_with("vmov") &&
+ !(Mnemonic.starts_with("vmovl") || Mnemonic.starts_with("vmovn") ||
+ Mnemonic.starts_with("vmovx"))) {
for (auto &Operand : Operands) {
if (static_cast<ARMOperand &>(*Operand).isVectorIndex() ||
((*Operand).isReg() &&
@@ -6833,7 +6819,7 @@ static bool isDataTypeToken(StringRef Tok) {
// in the .td files that matches the suffix instead of having it be
// a literal string token the way it is now.
static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
- return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
+ return Mnemonic.starts_with("vldm") || Mnemonic.starts_with("vstm");
}
static void applyMnemonicAliases(StringRef &Mnemonic,
@@ -7003,8 +6989,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// ITxyz -> xyz1 (e.g. ITEET -> 1101)
// Note: See the ARM::PredBlockMask enum in
// /lib/Target/ARM/Utils/ARMBaseInfo.h
- if (Mnemonic == "it" || Mnemonic.startswith("vpt") ||
- Mnemonic.startswith("vpst")) {
+ if (Mnemonic == "it" || Mnemonic.starts_with("vpt") ||
+ Mnemonic.starts_with("vpst")) {
SMLoc Loc = Mnemonic == "it" ? SMLoc::getFromPointer(NameLoc.getPointer() + 2) :
Mnemonic == "vpt" ? SMLoc::getFromPointer(NameLoc.getPointer() + 3) :
SMLoc::getFromPointer(NameLoc.getPointer() + 4);
@@ -7082,8 +7068,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// scalar predication operand we do not add the vector one and leave until
// now to fix it up.
if (CanAcceptVPTPredicationCode && Mnemonic != "vmov" &&
- !Mnemonic.startswith("vcmp") &&
- !(Mnemonic.startswith("vcvt") && Mnemonic != "vcvta" &&
+ !Mnemonic.starts_with("vcmp") &&
+ !(Mnemonic.starts_with("vcvt") && Mnemonic != "vcvta" &&
Mnemonic != "vcvtn" && Mnemonic != "vcvtp" && Mnemonic != "vcvtm")) {
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
CarrySetting);
@@ -7228,10 +7214,11 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// special parsing. So now we have to see if they require vector
// predication and replace the scalar one with the vector predication
// operand if that is the case.
- else if (Mnemonic == "vmov" || Mnemonic.startswith("vcmp") ||
- (Mnemonic.startswith("vcvt") && !Mnemonic.startswith("vcvta") &&
- !Mnemonic.startswith("vcvtn") && !Mnemonic.startswith("vcvtp") &&
- !Mnemonic.startswith("vcvtm"))) {
+ else if (Mnemonic == "vmov" || Mnemonic.starts_with("vcmp") ||
+ (Mnemonic.starts_with("vcvt") && !Mnemonic.starts_with("vcvta") &&
+ !Mnemonic.starts_with("vcvtn") &&
+ !Mnemonic.starts_with("vcvtp") &&
+ !Mnemonic.starts_with("vcvtm"))) {
if (!shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
// We could not split the vector predicate off vcvt because it might
// have been the scalar vcvtt instruction. Now we know its a vector
@@ -7239,11 +7226,11 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// predicated vcvt with 'Then' predication or the vector vcvtt. We can
// distinguish the two based on the suffixes, if it is any of
// ".f16.f32", ".f32.f16", ".f16.f64" or ".f64.f16" then it is the vcvtt.
- if (Mnemonic.startswith("vcvtt") && Operands.size() >= 4) {
+ if (Mnemonic.starts_with("vcvtt") && Operands.size() >= 4) {
auto Sz1 = static_cast<ARMOperand &>(*Operands[2]);
auto Sz2 = static_cast<ARMOperand &>(*Operands[3]);
- if (!(Sz1.isToken() && Sz1.getToken().startswith(".f") &&
- Sz2.isToken() && Sz2.getToken().startswith(".f"))) {
+ if (!(Sz1.isToken() && Sz1.getToken().starts_with(".f") &&
+ Sz2.isToken() && Sz2.getToken().starts_with(".f"))) {
Operands.erase(Operands.begin());
SMLoc MLoc = SMLoc::getFromPointer(NameLoc.getPointer());
VPTPredicationCode = ARMVCC::Then;
@@ -11983,6 +11970,8 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
}
getTargetStreamer().emitInst(Value->getValue(), CurSuffix);
+ forwardITPosition();
+ forwardVPTPosition();
return false;
};
@@ -12648,6 +12637,9 @@ bool ARMAsmParser::enableArchExtFeature(StringRef Name, SMLoc &ExtLoc) {
{ARM::AEK_CRYPTO,
{Feature_HasV8Bit},
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8}},
+ {(ARM::AEK_DSP | ARM::AEK_SIMD | ARM::AEK_FP),
+ {Feature_HasV8_1MMainlineBit},
+ {ARM::HasMVEFloatOps}},
{ARM::AEK_FP,
{Feature_HasV8Bit},
{ARM::FeatureVFP2_SP, ARM::FeatureFPARMv8}},
@@ -12795,12 +12787,12 @@ bool ARMAsmParser::isMnemonicVPTPredicable(StringRef Mnemonic,
return false;
if (MS.isVPTPredicableCDEInstr(Mnemonic) ||
- (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") ||
- (Mnemonic.startswith("vmov") &&
+ (Mnemonic.starts_with("vldrh") && Mnemonic != "vldrhi") ||
+ (Mnemonic.starts_with("vmov") &&
!(ExtraToken == ".f16" || ExtraToken == ".32" || ExtraToken == ".16" ||
ExtraToken == ".8")) ||
- (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") ||
- (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi"))
+ (Mnemonic.starts_with("vrint") && Mnemonic != "vrintr") ||
+ (Mnemonic.starts_with("vstrh") && Mnemonic != "vstrhi"))
return true;
const char *predicable_prefixes[] = {
@@ -12830,5 +12822,5 @@ bool ARMAsmParser::isMnemonicVPTPredicable(StringRef Mnemonic,
return std::any_of(
std::begin(predicable_prefixes), std::end(predicable_prefixes),
- [&Mnemonic](const char *prefix) { return Mnemonic.startswith(prefix); });
+ [&Mnemonic](const char *prefix) { return Mnemonic.starts_with(prefix); });
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ee81bfa65c6b..604f22d71119 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -135,9 +135,9 @@ public:
ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
const MCInstrInfo *MCII)
: MCDisassembler(STI, Ctx), MCII(MCII) {
- InstructionEndianness = STI.hasFeature(ARM::ModeBigEndianInstructions)
- ? llvm::support::big
- : llvm::support::little;
+ InstructionEndianness = STI.hasFeature(ARM::ModeBigEndianInstructions)
+ ? llvm::endianness::big
+ : llvm::endianness::little;
}
~ARMDisassembler() override = default;
@@ -166,7 +166,7 @@ private:
DecodeStatus AddThumbPredicate(MCInst&) const;
void UpdateThumbVFPPredicate(DecodeStatus &, MCInst&) const;
- llvm::support::endianness InstructionEndianness;
+ llvm::endianness InstructionEndianness;
};
} // end anonymous namespace
@@ -6204,7 +6204,7 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
// We have to check if the instruction is MRRC2
// or MCRR2 when constructing the operands for
// Inst. Reason is because MRRC2 stores to two
- // registers so it's tablegen desc has has two
+ // registers so it's tablegen desc has two
// outputs whereas MCRR doesn't store to any
// registers so all of it's operands are listed
// as inputs, therefore the operand order for
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 701691804620..534434fef5ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -74,10 +74,11 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
// ARMFixupKinds.h.
//
// Name Offset (bits) Size (bits) Flags
- {"fixup_arm_ldst_pcrel_12", 0, 32, IsPCRelConstant},
+ {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_t2_ldst_pcrel_12", 0, 32,
- IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
- {"fixup_arm_pcrel_10_unscaled", 0, 32, IsPCRelConstant},
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_pcrel_10", 0, 32, IsPCRelConstant},
{"fixup_t2_pcrel_10", 0, 32,
MCFixupKindInfo::FKF_IsPCRel |
@@ -87,10 +88,12 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{"fixup_arm_ldst_abs_12", 0, 32, 0},
{"fixup_thumb_adr_pcrel_10", 0, 8,
- IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
- {"fixup_arm_adr_pcrel_12", 0, 32, IsPCRelConstant},
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_t2_adr_pcrel_12", 0, 32,
- IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
@@ -132,10 +135,11 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
// ARMFixupKinds.h.
//
// Name Offset (bits) Size (bits) Flags
- {"fixup_arm_ldst_pcrel_12", 0, 32, IsPCRelConstant},
+ {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_t2_ldst_pcrel_12", 0, 32,
- IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
- {"fixup_arm_pcrel_10_unscaled", 0, 32, IsPCRelConstant},
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_pcrel_10", 0, 32, IsPCRelConstant},
{"fixup_t2_pcrel_10", 0, 32,
MCFixupKindInfo::FKF_IsPCRel |
@@ -196,8 +200,9 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
- return (Endian == support::little ? InfosLE
- : InfosBE)[Kind - FirstTargetFixupKind];
+ return (Endian == llvm::endianness::little
+ ? InfosLE
+ : InfosBE)[Kind - FirstTargetFixupKind];
}
void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
@@ -493,7 +498,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
// inst{14-12} = Mid3;
// inst{7-0} = Lo8;
Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
- return swapHalfWords(Value, Endian == support::little);
+ return swapHalfWords(Value, Endian == llvm::endianness::little);
}
case ARM::fixup_arm_thumb_upper_8_15:
if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
@@ -532,7 +537,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
// Same addressing mode as fixup_arm_pcrel_10,
// but with 16-bit halfwords swapped.
if (Kind == ARM::fixup_t2_ldst_pcrel_12)
- return swapHalfWords(Value, Endian == support::little);
+ return swapHalfWords(Value, Endian == llvm::endianness::little);
return Value;
}
@@ -565,7 +570,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
out |= (Value & 0x700) << 4;
out |= (Value & 0x0FF);
- return swapHalfWords(out, Endian == support::little);
+ return swapHalfWords(out, Endian == llvm::endianness::little);
}
case ARM::fixup_arm_condbranch:
@@ -602,7 +607,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
out |= (Value & 0x1FF800) << 5; // imm6 field
out |= (Value & 0x0007FF); // imm11 field
- return swapHalfWords(out, Endian == support::little);
+ return swapHalfWords(out, Endian == llvm::endianness::little);
}
case ARM::fixup_t2_condbranch: {
Value = Value - 4;
@@ -620,7 +625,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
out |= (Value & 0x1F800) << 5; // imm6 field
out |= (Value & 0x007FF); // imm11 field
- return swapHalfWords(out, Endian == support::little);
+ return swapHalfWords(out, Endian == llvm::endianness::little);
}
case ARM::fixup_arm_thumb_bl: {
if (!isInt<25>(Value - 4) ||
@@ -656,7 +661,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
(uint16_t)imm11Bits);
- return joinHalfWords(FirstHalf, SecondHalf, Endian == support::little);
+ return joinHalfWords(FirstHalf, SecondHalf,
+ Endian == llvm::endianness::little);
}
case ARM::fixup_arm_thumb_blx: {
// The value doesn't encode the low two bits (always zero) and is offset by
@@ -692,7 +698,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
((uint16_t)imm10LBits) << 1);
- return joinHalfWords(FirstHalf, SecondHalf, Endian == support::little);
+ return joinHalfWords(FirstHalf, SecondHalf,
+ Endian == llvm::endianness::little);
}
case ARM::fixup_thumb_adr_pcrel_10:
case ARM::fixup_arm_thumb_cp:
@@ -783,7 +790,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
// Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
// swapped.
if (Kind == ARM::fixup_t2_pcrel_10)
- return swapHalfWords(Value, Endian == support::little);
+ return swapHalfWords(Value, Endian == llvm::endianness::little);
return Value;
}
@@ -814,7 +821,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
// Same addressing mode as fixup_arm_pcrel_9, but with 16-bit halfwords
// swapped.
if (Kind == ARM::fixup_t2_pcrel_9)
- return swapHalfWords(Value, Endian == support::little);
+ return swapHalfWords(Value, Endian == llvm::endianness::little);
return Value;
}
@@ -840,7 +847,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
EncValue |= (Value & 0x800) << 15;
EncValue |= (Value & 0x700) << 4;
EncValue |= (Value & 0xff);
- return swapHalfWords(EncValue, Endian == support::little);
+ return swapHalfWords(EncValue, Endian == llvm::endianness::little);
}
case ARM::fixup_bf_branch: {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
@@ -849,7 +856,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
return 0;
}
uint32_t out = (((Value - 4) >> 1) & 0xf) << 23;
- return swapHalfWords(out, Endian == support::little);
+ return swapHalfWords(out, Endian == llvm::endianness::little);
}
case ARM::fixup_bf_target:
case ARM::fixup_bfl_target:
@@ -865,7 +872,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
out |= (((Value - 4) >> 1) & 0x1) << 11;
out |= (((Value - 4) >> 1) & 0x7fe);
out |= (((Value - 4) >> 1) & HighBitMask) << 5;
- return swapHalfWords(out, Endian == support::little);
+ return swapHalfWords(out, Endian == llvm::endianness::little);
}
case ARM::fixup_bfcsel_else_target: {
// If this is a fixup of a branch future's else target then it should be a
@@ -879,7 +886,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
return 0;
}
uint32_t out = ((Value >> 2) & 1) << 17;
- return swapHalfWords(out, Endian == support::little);
+ return swapHalfWords(out, Endian == llvm::endianness::little);
}
case ARM::fixup_wls:
case ARM::fixup_le: {
@@ -894,14 +901,15 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
real_value = -real_value;
out |= ((real_value >> 1) & 0x1) << 11;
out |= ((real_value >> 1) & 0x7fe);
- return swapHalfWords(out, Endian == support::little);
+ return swapHalfWords(out, Endian == llvm::endianness::little);
}
}
}
bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
- const MCValue &Target) {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) {
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
const unsigned FixupKind = Fixup.getKind();
@@ -1089,7 +1097,7 @@ void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
// Used to point to big endian bytes.
unsigned FullSizeBytes;
- if (Endian == support::big) {
+ if (Endian == llvm::endianness::big) {
FullSizeBytes = getFixupKindContainerSizeBytes(Kind);
assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!");
assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
@@ -1099,7 +1107,8 @@ void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
// the fixup value. The Value has been "split up" into the appropriate
// bitfields above.
for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = Endian == support::little ? i : (FullSizeBytes - 1 - i);
+ unsigned Idx =
+ Endian == llvm::endianness::little ? i : (FullSizeBytes - 1 - i);
Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
@@ -1326,7 +1335,7 @@ static MCAsmBackend *createARMAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options,
- support::endianness Endian) {
+ llvm::endianness Endian) {
const Triple &TheTriple = STI.getTargetTriple();
switch (TheTriple.getObjectFormat()) {
default:
@@ -1348,12 +1357,12 @@ MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
- return createARMAsmBackend(T, STI, MRI, Options, support::little);
+ return createARMAsmBackend(T, STI, MRI, Options, llvm::endianness::little);
}
MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
- return createARMAsmBackend(T, STI, MRI, Options, support::big);
+ return createARMAsmBackend(T, STI, MRI, Options, llvm::endianness::big);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 64c78d352895..328eed9b0ec4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -20,7 +20,7 @@ namespace llvm {
class ARMAsmBackend : public MCAsmBackend {
bool isThumbMode; // Currently emitting Thumb code.
public:
- ARMAsmBackend(const Target &T, bool isThumb, support::endianness Endian)
+ ARMAsmBackend(const Target &T, bool isThumb, llvm::endianness Endian)
: MCAsmBackend(Endian), isThumbMode(isThumb) {}
unsigned getNumFixupKinds() const override {
@@ -36,7 +36,8 @@ public:
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, uint64_t Value,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index ace573c8fa96..ac0c9b101cae 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -22,7 +22,8 @@ public:
const MachO::CPUSubTypeARM Subtype;
ARMAsmBackendDarwin(const Target &T, const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI)
- : ARMAsmBackend(T, STI.getTargetTriple().isThumb(), support::little),
+ : ARMAsmBackend(T, STI.getTargetTriple().isThumb(),
+ llvm::endianness::little),
MRI(MRI), TT(STI.getTargetTriple()),
Subtype((MachO::CPUSubTypeARM)cantFail(
MachO::getCPUSubType(STI.getTargetTriple()))) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 37afdf439e9a..abbe73e336f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -20,7 +20,7 @@ class ARMAsmBackendELF : public ARMAsmBackend {
public:
uint8_t OSABI;
ARMAsmBackendELF(const Target &T, bool isThumb, uint8_t OSABI,
- support::endianness Endian)
+ llvm::endianness Endian)
: ARMAsmBackend(T, isThumb, Endian), OSABI(OSABI) {}
std::unique_ptr<MCObjectTargetWriter>
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 6e447df9e4cb..86ce6efe662a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -17,7 +17,7 @@ namespace {
class ARMAsmBackendWinCOFF : public ARMAsmBackend {
public:
ARMAsmBackendWinCOFF(const Target &T, bool isThumb)
- : ARMAsmBackend(T, isThumb, support::little) {}
+ : ARMAsmBackend(T, isThumb, llvm::endianness::little) {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
return createARMWinCOFFObjectWriter();
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index caebace2eb78..44695a86c4e3 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -38,7 +38,7 @@ namespace {
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
unsigned Type) const override;
void addTargetSectionFlags(MCContext &Ctx, MCSectionELF &Sec) override;
@@ -51,7 +51,8 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
ELF::EM_ARM,
/*HasRelocationAddend*/ false) {}
-bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCValue &,
+ const MCSymbol &,
unsigned Type) const {
// FIXME: This is extremely conservative. This really needs to use an
// explicit list with a clear explanation for why each realocation needs to
@@ -157,6 +158,18 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
default:
return ELF::R_ARM_THM_CALL;
}
+ case ARM::fixup_arm_ldst_pcrel_12:
+ return ELF::R_ARM_LDR_PC_G0;
+ case ARM::fixup_arm_pcrel_10_unscaled:
+ return ELF::R_ARM_LDRS_PC_G0;
+ case ARM::fixup_t2_ldst_pcrel_12:
+ return ELF::R_ARM_THM_PC12;
+ case ARM::fixup_arm_adr_pcrel_12:
+ return ELF::R_ARM_ALU_PC_G0;
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return ELF::R_ARM_THM_PC8;
+ case ARM::fixup_t2_adr_pcrel_12:
+ return ELF::R_ARM_THM_ALU_PREL_11_0;
case ARM::fixup_bf_target:
return ELF::R_ARM_THM_BF16;
case ARM::fixup_bfc_target:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 25bbc4ee7eb5..9c9af6068079 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -12,8 +12,9 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMRegisterInfo.h"
+#include "ARMMCTargetDesc.h"
#include "ARMUnwindOpAsm.h"
+#include "Utils/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
index 4f5c067abb0c..fbd067d79af0 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -49,9 +49,8 @@ static unsigned translateShiftImm(unsigned imm) {
return imm;
}
-/// Prints the shift value with an immediate value.
static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
- unsigned ShImm, bool UseMarkup) {
+ unsigned ShImm, const ARMInstPrinter &printer) {
if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
return;
O << ", ";
@@ -61,11 +60,8 @@ static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
if (ShOpc != ARM_AM::rrx) {
O << " ";
- if (UseMarkup)
- O << "<imm:";
- O << "#" << translateShiftImm(ShImm);
- if (UseMarkup)
- O << ">";
+ printer.markup(O, llvm::MCInstPrinter::Markup::Immediate)
+ << "#" << translateShiftImm(ShImm);
}
}
@@ -86,7 +82,7 @@ bool ARMInstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
}
void ARMInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
- OS << markup("<reg:") << getRegisterName(Reg, DefaultAltIdx) << markup(">");
+ markup(OS, Markup::Register) << getRegisterName(Reg, DefaultAltIdx);
}
void ARMInstPrinter::printInst(const MCInst *MI, uint64_t Address,
@@ -139,8 +135,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, uint64_t Address,
return;
}
- O << ", " << markup("<imm:") << "#"
- << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate)
+ << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
printAnnotation(O, Annot);
return;
}
@@ -316,7 +313,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
unsigned Reg = Op.getReg();
printRegName(O, Reg);
} else if (Op.isImm()) {
- O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << '#' << formatImm(Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
const MCExpr *Expr = Op.getExpr();
@@ -372,7 +369,8 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
return;
}
- O << markup("<mem:") << "[pc, ";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[pc, ";
int32_t OffImm = (int32_t)MO1.getImm();
bool isSub = OffImm < 0;
@@ -381,11 +379,11 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
if (OffImm == INT32_MIN)
OffImm = 0;
if (isSub) {
- O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+ markup(O, Markup::Immediate) << "#-" << formatImm(-OffImm);
} else {
- O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
+ markup(O, Markup::Immediate) << "#" << formatImm(OffImm);
}
- O << "]" << markup(">");
+ O << "]";
}
// so_reg is a 4-operand unit corresponding to register forms of the A5.1
@@ -423,7 +421,7 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
// Print the shift opc.
printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
- ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
+ ARM_AM::getSORegOffset(MO2.getImm()), *this);
}
//===--------------------------------------------------------------------===//
@@ -437,16 +435,18 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op + 1);
const MCOperand &MO3 = MI->getOperand(Op + 2);
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
if (!MO2.getReg()) {
if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
- O << ", " << markup("<imm:") << "#"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate)
+ << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ARM_AM::getAM2Offset(MO3.getImm());
}
- O << "]" << markup(">");
+ O << "]";
return;
}
@@ -455,8 +455,8 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
printRegName(O, MO2.getReg());
printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()),
- ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup);
- O << "]" << markup(">");
+ ARM_AM::getAM2Offset(MO3.getImm()), *this);
+ O << "]";
}
void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
@@ -464,11 +464,13 @@ void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op + 1);
- O << markup("<mem:") << "[";
+
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
O << ", ";
printRegName(O, MO2.getReg());
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
@@ -476,11 +478,14 @@ void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op + 1);
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
O << ", ";
printRegName(O, MO2.getReg());
- O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">");
+ O << ", lsl ";
+ markup(O, Markup::Immediate) << "#1";
+ O << "]";
}
void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
@@ -511,9 +516,9 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
if (!MO1.getReg()) {
unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
- O << markup("<imm:") << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs
- << markup(">");
+ markup(O, Markup::Immediate)
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << ImmOffs;
return;
}
@@ -521,7 +526,7 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
printRegName(O, MO1.getReg());
printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO2.getImm()),
- ARM_AM::getAM2Offset(MO2.getImm()), UseMarkup);
+ ARM_AM::getAM2Offset(MO2.getImm()), *this);
}
//===--------------------------------------------------------------------===//
@@ -535,13 +540,14 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op + 1);
const MCOperand &MO3 = MI->getOperand(Op + 2);
- O << markup("<mem:") << '[';
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << '[';
printRegName(O, MO1.getReg());
if (MO2.getReg()) {
O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()));
printRegName(O, MO2.getReg());
- O << ']' << markup(">");
+ O << ']';
return;
}
@@ -550,10 +556,10 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
- O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs
- << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs;
}
- O << ']' << markup(">");
+ O << ']';
}
template <bool AlwaysPrintImm0>
@@ -586,9 +592,9 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
}
unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
- O << markup("<imm:") << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
- << markup(">");
+ markup(O, Markup::Immediate)
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+ << ImmOffs;
}
void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
@@ -596,8 +602,8 @@ void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
- << markup(">");
+ markup(O, Markup::Immediate)
+ << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff);
}
void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
@@ -615,8 +621,8 @@ void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
- << markup(">");
+ markup(O, Markup::Immediate)
+ << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2);
}
template<int shift>
@@ -626,15 +632,16 @@ void ARMInstPrinter::printMveAddrModeRQOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum + 1);
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
O << ", ";
printRegName(O, MO2.getReg());
if (shift > 0)
- printRegImmShift(O, ARM_AM::uxtw, shift, UseMarkup);
+ printRegImmShift(O, ARM_AM::uxtw, shift, *this);
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
@@ -657,16 +664,18 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
return;
}
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
- O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op)
- << ImmOffs * 4 << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate)
+ << "#" << ARM_AM::getAddrOpcStr(Op) << ImmOffs * 4;
}
- O << "]" << markup(">");
+ O << "]";
}
template <bool AlwaysPrintImm0>
@@ -681,20 +690,19 @@ void ARMInstPrinter::printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum,
return;
}
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
unsigned ImmOffs = ARM_AM::getAM5FP16Offset(MO2.getImm());
unsigned Op = ARM_AM::getAM5FP16Op(MO2.getImm());
if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
- O << ", "
- << markup("<imm:")
- << "#"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM5FP16Op(MO2.getImm()))
- << ImmOffs * 2
- << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate)
+ << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM5FP16Op(MO2.getImm()))
+ << ImmOffs * 2;
}
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
@@ -703,21 +711,23 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum + 1);
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
if (MO2.getImm()) {
O << ":" << (MO2.getImm() << 3);
}
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
@@ -742,8 +752,9 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
int32_t lsb = llvm::countr_zero(v);
int32_t width = llvm::bit_width(v) - lsb;
assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
- O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:")
- << '#' << width << markup(">");
+ markup(O, Markup::Immediate) << '#' << lsb;
+ O << ", ";
+ markup(O, Markup::Immediate) << '#' << width;
}
void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
@@ -774,10 +785,11 @@ void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
bool isASR = (ShiftOp & (1 << 5)) != 0;
unsigned Amt = ShiftOp & 0x1f;
if (isASR) {
- O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt)
- << markup(">");
+ O << ", asr ";
+ markup(O, Markup::Immediate) << "#" << (Amt == 0 ? 32 : Amt);
} else if (Amt) {
- O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">");
+ O << ", lsl ";
+ markup(O, Markup::Immediate) << "#" << Amt;
}
}
@@ -788,7 +800,8 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
if (Imm == 0)
return;
assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
- O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">");
+ O << ", lsl ";
+ markup(O, Markup::Immediate) << "#" << Imm;
}
void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
@@ -799,7 +812,8 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
if (Imm == 0)
Imm = 32;
assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
- O << ", asr " << markup("<imm:") << "#" << Imm << markup(">");
+ O << ", asr ";
+ markup(O, Markup::Immediate) << "#" << Imm;
}
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
@@ -1043,29 +1057,27 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
int32_t OffImm = (int32_t)MO.getImm() << scale;
- O << markup("<imm:");
+ WithMarkup ScopedMarkup = markup(O, Markup::Immediate);
if (OffImm == INT32_MIN)
O << "#-0";
else if (OffImm < 0)
O << "#-" << -OffImm;
else
O << "#" << OffImm;
- O << markup(">");
}
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
- << markup(">");
+ markup(O, Markup::Immediate)
+ << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4);
}
void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm))
- << markup(">");
+ markup(O, Markup::Immediate) << "#" << formatImm((Imm == 0 ? 32 : Imm));
}
void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
@@ -1094,13 +1106,14 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
return;
}
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
if (unsigned RegNum = MO2.getReg()) {
O << ", ";
printRegName(O, RegNum);
}
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
@@ -1116,13 +1129,14 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
return;
}
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
if (unsigned ImmOffs = MO2.getImm()) {
- O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale)
- << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << formatImm(ImmOffs * Scale);
}
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
@@ -1168,7 +1182,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
// Print the shift opc.
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
- ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
+ ARM_AM::getSORegOffset(MO2.getImm()), *this);
}
template <bool AlwaysPrintImm0>
@@ -1183,7 +1197,8 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
return;
}
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
@@ -1192,11 +1207,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
if (OffImm == INT32_MIN)
OffImm = 0;
if (isSub) {
- O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#-" << formatImm(-OffImm);
} else if (AlwaysPrintImm0 || OffImm > 0) {
- O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << formatImm(OffImm);
}
- O << "]" << markup(">");
+ O << "]";
}
template <bool AlwaysPrintImm0>
@@ -1207,7 +1224,8 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum + 1);
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
@@ -1216,11 +1234,13 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
if (OffImm == INT32_MIN)
OffImm = 0;
if (isSub) {
- O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#-" << -OffImm;
} else if (AlwaysPrintImm0 || OffImm > 0) {
- O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << OffImm;
}
- O << "]" << markup(">");
+ O << "]";
}
template <bool AlwaysPrintImm0>
@@ -1236,7 +1256,8 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
return;
}
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
@@ -1248,11 +1269,13 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
if (OffImm == INT32_MIN)
OffImm = 0;
if (isSub) {
- O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#-" << -OffImm;
} else if (AlwaysPrintImm0 || OffImm > 0) {
- O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << OffImm;
}
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(
@@ -1261,13 +1284,14 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum + 1);
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
if (MO2.getImm()) {
- O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4)
- << markup(">");
+ O << ", ";
+ markup(O, Markup::Immediate) << "#" << formatImm(MO2.getImm() * 4);
}
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(
@@ -1275,14 +1299,14 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
int32_t OffImm = (int32_t)MO1.getImm();
- O << ", " << markup("<imm:");
+ O << ", ";
+ WithMarkup ScopedMarkup = markup(O, Markup::Immediate);
if (OffImm == INT32_MIN)
O << "#-0";
else if (OffImm < 0)
O << "#-" << -OffImm;
else
O << "#" << OffImm;
- O << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(
@@ -1293,14 +1317,14 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(
assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
- O << ", " << markup("<imm:");
+ O << ", ";
+ WithMarkup ScopedMarkup = markup(O, Markup::Immediate);
if (OffImm == INT32_MIN)
O << "#-0";
else if (OffImm < 0)
O << "#-" << -OffImm;
else
O << "#" << OffImm;
- O << markup(">");
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
@@ -1311,7 +1335,8 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
const MCOperand &MO2 = MI->getOperand(OpNum + 1);
const MCOperand &MO3 = MI->getOperand(OpNum + 2);
- O << markup("<mem:") << "[";
+ WithMarkup ScopedMarkup = markup(O, Markup::Memory);
+ O << "[";
printRegName(O, MO1.getReg());
assert(MO2.getReg() && "Invalid so_reg load / store address!");
@@ -1321,17 +1346,17 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
unsigned ShAmt = MO3.getImm();
if (ShAmt) {
assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
- O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">");
+ O << ", lsl ";
+ markup(O, Markup::Immediate) << "#" << ShAmt;
}
- O << "]" << markup(">");
+ O << "]";
}
void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
- O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm())
- << markup(">");
+ markup(O, Markup::Immediate) << '#' << ARM_AM::getFPImmFloat(MO.getImm());
}
void ARMInstPrinter::printVMOVModImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1340,16 +1365,17 @@ void ARMInstPrinter::printVMOVModImmOperand(const MCInst *MI, unsigned OpNum,
unsigned EncodedImm = MI->getOperand(OpNum).getImm();
unsigned EltBits;
uint64_t Val = ARM_AM::decodeVMOVModImm(EncodedImm, EltBits);
- O << markup("<imm:") << "#0x";
+
+ WithMarkup ScopedMarkup = markup(O, Markup::Immediate);
+ O << "#0x";
O.write_hex(Val);
- O << markup(">");
}
void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">");
+ markup(O, Markup::Immediate) << "#" << formatImm(Imm + 1);
}
void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1359,7 +1385,8 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
if (Imm == 0)
return;
assert(Imm <= 3 && "illegal ror immediate!");
- O << ", ror " << markup("<imm:") << "#" << 8 * Imm << markup(">");
+ O << ", ror ";
+ markup(O, Markup::Immediate) << "#" << 8 * Imm;
}
void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1389,30 +1416,29 @@ void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
int32_t Rotated = llvm::rotr<uint32_t>(Bits, Rot);
if (ARM_AM::getSOImmVal(Rotated) == Op.getImm()) {
// #rot has the least possible value
- O << "#" << markup("<imm:");
+ O << "#";
if (PrintUnsigned)
- O << static_cast<uint32_t>(Rotated);
+ markup(O, Markup::Immediate) << static_cast<uint32_t>(Rotated);
else
- O << Rotated;
- O << markup(">");
+ markup(O, Markup::Immediate) << Rotated;
return;
}
// Explicit #bits, #rot implied
- O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:")
- << Rot << markup(">");
+ O << "#";
+ markup(O, Markup::Immediate) << Bits;
+ O << ", #";
+ markup(O, Markup::Immediate) << Rot;
}
void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O) {
- O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm()
- << markup(">");
+ markup(O, Markup::Immediate) << "#" << 16 - MI->getOperand(OpNum).getImm();
}
void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O) {
- O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm()
- << markup(">");
+ markup(O, Markup::Immediate) << "#" << 32 - MI->getOperand(OpNum).getImm();
}
void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 8c642f61019a..3f37acff292b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -29,6 +29,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -436,19 +437,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void EmitByte(unsigned char C, raw_ostream &OS) const {
- OS << (char)C;
- }
-
- void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
- // Output the constant in little endian byte order.
- for (unsigned i = 0; i != Size; ++i) {
- unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
- EmitByte((Val >> Shift) & 0xff, OS);
- }
- }
-
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -1894,10 +1883,10 @@ getShiftRight64Imm(const MCInst &MI, unsigned Op,
return 64 - MI.getOperand(Op).getImm();
}
-void ARMMCCodeEmitter::
-encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+void ARMMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// Pseudo instructions don't get encoded.
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
uint64_t TSFlags = Desc.TSFlags;
@@ -1910,14 +1899,19 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
else
llvm_unreachable("Unexpected instruction size!");
+ auto Endian =
+ IsLittleEndian ? llvm::endianness::little : llvm::endianness::big;
uint32_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
- // Thumb 32-bit wide instructions need to emit the high order halfword
- // first.
- if (isThumb(STI) && Size == 4) {
- EmitConstant(Binary >> 16, 2, OS);
- EmitConstant(Binary & 0xffff, 2, OS);
- } else
- EmitConstant(Binary, Size, OS);
+ if (Size == 2) {
+ support::endian::write<uint16_t>(CB, Binary, Endian);
+ } else if (isThumb(STI)) {
+ // Thumb 32-bit wide instructions need to emit the high order halfword
+ // first.
+ support::endian::write<uint16_t>(CB, Binary >> 16, Endian);
+ support::endian::write<uint16_t>(CB, Binary & 0xffff, Endian);
+ } else {
+ support::endian::write<uint32_t>(CB, Binary, Endian);
+ }
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index b65d1b24e63d..1237e50c22fd 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -177,7 +177,7 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
switchVendor("aeabi");
const StringRef CPUString = STI.getCPU();
- if (!CPUString.empty() && !CPUString.startswith("generic")) {
+ if (!CPUString.empty() && !CPUString.starts_with("generic")) {
// FIXME: remove krait check when GNU tools support krait cpu
if (STI.hasFeature(ARM::ProcKrait)) {
emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
@@ -238,14 +238,18 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
? ARMBuildAttrs::AllowNeonARMv8_1a
: ARMBuildAttrs::AllowNeonARMv8);
} else {
- if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP))
+ if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP)) {
// FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
// FPU, but there are two different names for it depending on the CPU.
- emitFPU(STI.hasFeature(ARM::FeatureD32)
- ? ARM::FK_FP_ARMV8
- : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16
- : ARM::FK_FPV5_SP_D16));
- else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP))
+ if (STI.hasFeature(ARM::FeatureD32))
+ emitFPU(ARM::FK_FP_ARMV8);
+ else {
+ emitFPU(STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16
+ : ARM::FK_FPV5_SP_D16);
+ if (STI.hasFeature(ARM::HasMVEFloatOps))
+ emitArchExtension(ARM::AEK_SIMD | ARM::AEK_DSP | ARM::AEK_FP);
+ }
+ } else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP))
emitFPU(STI.hasFeature(ARM::FeatureD32)
? ARM::FK_VFPV4
: (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_VFPV4_D16
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 62404f7add48..c62d17fd427a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "ARMUnwindOpAsm.h"
+#include "llvm/ADT/bit.h"
#include "llvm/Support/ARMEHABI.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/MathExtras.h"
#include <cassert>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 31a814900ca5..c4427948d3b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -19,7 +19,6 @@
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <cassert>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 5ac6d481e3d9..48e63e04b119 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -244,7 +244,7 @@ Value *MVEGatherScatterLowering::decomposePtr(Value *Ptr, Value *&Offsets,
if (PtrTy->getNumElements() != 4 || MemoryTy->getScalarSizeInBits() == 32)
return nullptr;
Value *Zero = ConstantInt::get(Builder.getInt32Ty(), 0);
- Value *BasePtr = Builder.CreateIntToPtr(Zero, Builder.getInt8PtrTy());
+ Value *BasePtr = Builder.CreateIntToPtr(Zero, Builder.getPtrTy());
Offsets = Builder.CreatePtrToInt(
Ptr, FixedVectorType::get(Builder.getInt32Ty(), 4));
Scale = 0;
@@ -1224,7 +1224,7 @@ bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
// pointer.
if (Offsets && Base && Base != GEP) {
assert(Scale == 1 && "Expected to fold GEP to a scale of 1");
- Type *BaseTy = Builder.getInt8PtrTy();
+ Type *BaseTy = Builder.getPtrTy();
if (auto *VecTy = dyn_cast<FixedVectorType>(Base->getType()))
BaseTy = FixedVectorType::get(BaseTy, VecTy);
GetElementPtrInst *NewAddress = GetElementPtrInst::Create(
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
index 9e5488313770..fe97d4f75899 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -381,7 +381,7 @@ void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
// Insert a phi to count the number of elements processed by the loop.
- Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
+ Builder.SetInsertPoint(L->getHeader(), L->getHeader()->getFirstNonPHIIt());
PHINode *Processed = Builder.CreatePHI(Ty, 2);
Processed->addIncoming(Start, L->getLoopPreheader());
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
index d6d43b9143d6..d2b0bcf1250f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -11,7 +11,6 @@
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index c2962c4857c3..0f4ece64bff5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -40,7 +40,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include <bitset>
#include <cassert>
#include <iterator>
#include <vector>
@@ -538,18 +537,30 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
AFI->getDPRCalleeSavedAreaSize() +
ArgRegsSaveSize);
+ // We are likely to need a scratch register and we know all callee-save
+ // registers are free at this point in the epilogue, so pick one.
+ unsigned ScratchRegister = ARM::NoRegister;
+ bool HasFP = hasFP(MF);
+ for (auto &I : MFI.getCalleeSavedInfo()) {
+ Register Reg = I.getReg();
+ if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
+ ScratchRegister = Reg;
+ break;
+ }
+ }
+
if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot, the target is ELF and the function has FP, or
// the target uses var sized objects.
if (NumBytes) {
- assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
+ assert(ScratchRegister != ARM::NoRegister &&
"No scratch register to restore SP from FP!");
- emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ScratchRegister, FramePtr, -NumBytes,
TII, *RegInfo, MachineInstr::FrameDestroy);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
- .addReg(ARM::R4)
+ .addReg(ScratchRegister)
.add(predOps(ARMCC::AL))
.setMIFlag(MachineInstr::FrameDestroy);
} else
@@ -558,18 +569,6 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
.add(predOps(ARMCC::AL))
.setMIFlag(MachineInstr::FrameDestroy);
} else {
- // For a large stack frame, we might need a scratch register to store
- // the size of the frame. We know all callee-save registers are free
- // at this point in the epilogue, so pick one.
- unsigned ScratchRegister = ARM::NoRegister;
- bool HasFP = hasFP(MF);
- for (auto &I : MFI.getCalleeSavedInfo()) {
- Register Reg = I.getReg();
- if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
- ScratchRegister = Reg;
- break;
- }
- }
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
&MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 0c010ed1eb34..6e88c9378e9b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -128,6 +128,19 @@ static void emitThumbRegPlusImmInReg(
const ARMBaseRegisterInfo &MRI, unsigned MIFlags = MachineInstr::NoFlags) {
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
+
+ // Use a single sp-relative add if the immediate is small enough.
+ if (BaseReg == ARM::SP &&
+ (DestReg.isVirtual() || isARMLowRegister(DestReg)) && NumBytes >= 0 &&
+ NumBytes <= 1020 && (NumBytes % 4) == 0) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg)
+ .addReg(ARM::SP)
+ .addImm(NumBytes / 4)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
+ return;
+ }
+
bool isHigh = !isARMLowRegister(DestReg) ||
(BaseReg != 0 && !isARMLowRegister(BaseReg));
bool isSub = false;
@@ -160,9 +173,60 @@ static void emitThumbRegPlusImmInReg(
.addReg(LdReg, RegState::Kill)
.setMIFlags(MIFlags);
} else if (ST.genExecuteOnly()) {
- unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
- BuildMI(MBB, MBBI, dl, TII.get(XOInstr), LdReg)
- .addImm(NumBytes).setMIFlags(MIFlags);
+ if (ST.useMovt()) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm ), LdReg)
+ .addImm(NumBytes)
+ .setMIFlags(MIFlags);
+ } else if (!CanChangeCC) {
+ // tMOVi32imm is lowered to a sequence of flag-setting instructions, so
+ // if CPSR is live we need to save and restore CPSR around it.
+ // TODO Try inserting the tMOVi32imm at an earlier point, where CPSR is
+ // dead.
+ bool LiveCpsr = false, CpsrWrite = false;
+ auto isCpsr = [](auto &MO) { return MO.getReg() == ARM::CPSR; };
+ for (auto Iter = MBBI; Iter != MBB.instr_end(); ++Iter) {
+ // If CPSR is used after this instruction (and there's not a def before
+ // that) then CPSR is live.
+ if (any_of(Iter->all_uses(), isCpsr)) {
+ LiveCpsr = true;
+ break;
+ }
+ if (any_of(Iter->all_defs(), isCpsr)) {
+ CpsrWrite = true;
+ break;
+ }
+ }
+ // If there's no use or def of CPSR then it may be live if it's a
+ // live-out value.
+ auto liveOutIsCpsr = [](auto &Out) { return Out.PhysReg == ARM::CPSR; };
+ if (!LiveCpsr && !CpsrWrite)
+ LiveCpsr = any_of(MBB.liveouts(), liveOutIsCpsr);
+
+ Register CPSRSaveReg;
+ unsigned APSREncoding;
+ if (LiveCpsr) {
+ CPSRSaveReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
+ APSREncoding =
+ ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MRS_M), CPSRSaveReg)
+ .addImm(APSREncoding)
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Implicit);
+ }
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi32imm), LdReg)
+ .addImm(NumBytes)
+ .setMIFlags(MIFlags);
+ if (LiveCpsr) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MSR_M))
+ .addImm(APSREncoding)
+ .addReg(CPSRSaveReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
+ }
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi32imm), LdReg)
+ .addImm(NumBytes)
+ .setMIFlags(MIFlags);
+ }
} else
MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0,
MIFlags);
@@ -422,19 +486,33 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
return true;
}
+ // The offset doesn't fit, but we may be able to put some of the offset into
+ // the ldr to simplify the generation of the rest of it.
NumBits = 5;
Mask = (1 << NumBits) - 1;
-
- // If this is a thumb spill / restore, we will be using a constpool load to
- // materialize the offset.
- if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
- ImmOp.ChangeToImmediate(0);
- } else {
- // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
- ImmedOffset = ImmedOffset & Mask;
- ImmOp.ChangeToImmediate(ImmedOffset);
- Offset &= ~(Mask * Scale);
+ InstrOffs = 0;
+ auto &ST = MF.getSubtarget<ARMSubtarget>();
+ // If using the maximum ldr offset will put the rest into the range of a
+ // single sp-relative add then do so.
+ if (FrameReg == ARM::SP && Offset - (Mask * Scale) <= 1020) {
+ InstrOffs = Mask;
+ } else if (ST.genExecuteOnly()) {
+ // With execute-only the offset is generated either with movw+movt or an
+ // add+lsl sequence. If subtracting an offset will make the top half zero
+ // then that saves a movt or lsl+add. Otherwise if we don't have movw then
+ // we may be able to subtract a value such that it makes the bottom byte
+ // zero, saving an add.
+ unsigned BottomBits = (Offset / Scale) & Mask;
+ bool CanMakeBottomByteZero = ((Offset - BottomBits * Scale) & 0xff) == 0;
+ bool TopHalfZero = (Offset & 0xffff0000) == 0;
+ bool CanMakeTopHalfZero = ((Offset - Mask * Scale) & 0xffff0000) == 0;
+ if (!TopHalfZero && CanMakeTopHalfZero)
+ InstrOffs = Mask;
+ else if (!ST.useMovt() && CanMakeBottomByteZero)
+ InstrOffs = BottomBits;
}
+ ImmOp.ChangeToImmediate(InstrOffs);
+ Offset -= InstrOffs * Scale;
}
return Offset == 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVR.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVR.h
index 020c3d4ec6c7..4b1336ecd661 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVR.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVR.h
@@ -26,8 +26,7 @@ class FunctionPass;
class PassRegistry;
Pass *createAVRShiftExpandPass();
-FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+FunctionPass *createAVRISelDag(AVRTargetMachine &TM, CodeGenOptLevel OptLevel);
FunctionPass *createAVRExpandPseudoPass();
FunctionPass *createAVRFrameAnalyzerPass();
FunctionPass *createAVRBranchSelectionPass();
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index ceee44ec0f20..1c8213b668f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -118,8 +118,8 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
Register Reg = MO.getReg();
unsigned ByteNumber = ExtraCode[0] - 'A';
- unsigned OpFlags = MI->getOperand(OpNum - 1).getImm();
- unsigned NumOpRegs = InlineAsm::getNumOperandRegisters(OpFlags);
+ const InlineAsm::Flag OpFlags(MI->getOperand(OpNum - 1).getImm());
+ const unsigned NumOpRegs = OpFlags.getNumOperandRegisters();
const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
@@ -176,8 +176,8 @@ bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
// If NumOpRegs == 2, then we assume it is product of a FrameIndex expansion
// and the second operand is an Imm.
- unsigned OpFlags = MI->getOperand(OpNum - 1).getImm();
- unsigned NumOpRegs = InlineAsm::getNumOperandRegisters(OpFlags);
+ const InlineAsm::Flag OpFlags(MI->getOperand(OpNum - 1).getImm());
+ const unsigned NumOpRegs = OpFlags.getNumOperandRegisters();
if (NumOpRegs == 2) {
assert(MI->getOperand(OpNum).getReg() != AVR::R27R26 &&
@@ -251,13 +251,13 @@ bool AVRAsmPrinter::doFinalization(Module &M) {
}
auto *Section = cast<MCSectionELF>(TLOF.SectionForGlobal(&GO, TM));
- if (Section->getName().startswith(".data"))
+ if (Section->getName().starts_with(".data"))
NeedsCopyData = true;
- else if (Section->getName().startswith(".rodata") && SubTM->hasLPM())
+ else if (Section->getName().starts_with(".rodata") && SubTM->hasLPM())
// AVRs that have a separate program memory (that's most AVRs) store
// .rodata sections in RAM.
NeedsCopyData = true;
- else if (Section->getName().startswith(".bss"))
+ else if (Section->getName().starts_with(".bss"))
NeedsClearBSS = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td
index f6b36dba7733..5eca92ab4b6c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td
@@ -13,7 +13,7 @@
// this by simply setting the same dummy member for all feature sets, which is
// then ignored.
class FeatureSet<string name, string desc, list<SubtargetFeature> i>
- : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
+ : SubtargetFeature<name, "HasFeatureSet"#NAME, "true", desc, i>;
// A family of microcontrollers, defining a set of supported features.
class Family<string name, list<SubtargetFeature> i>
@@ -31,103 +31,103 @@ class Family<string name, list<SubtargetFeature> i>
// `LDS Rd, K`
// `STS k, Rr`
// `PUSH`/`POP`
-def FeatureSRAM : SubtargetFeature<"sram", "m_hasSRAM", "true",
+def FeatureSRAM : SubtargetFeature<"sram", "HasSRAM", "true",
"The device has random access memory">;
// The device supports the `JMP k` and `CALL k` instructions.
-def FeatureJMPCALL : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
+def FeatureJMPCALL : SubtargetFeature<"jmpcall", "HasJMPCALL", "true",
"The device supports the `JMP` and "
"`CALL` instructions">;
// The device supports the indirect branches `IJMP` and `ICALL`.
-def FeatureIJMPCALL : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL", "true",
+def FeatureIJMPCALL : SubtargetFeature<"ijmpcall", "HasSRAMIJMPCALL", "true",
"The device supports `IJMP`/`ICALL`"
"instructions">;
// The device supports the extended indirect branches `EIJMP` and `EICALL`.
-def FeatureEIJMPCALL : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL", "true",
+def FeatureEIJMPCALL : SubtargetFeature<"eijmpcall", "HasEIJMPCALL", "true",
"The device supports the "
"`EIJMP`/`EICALL` instructions">;
// The device supports `ADDI Rd, K`, `SUBI Rd, K`.
-def FeatureADDSUBIW : SubtargetFeature<"addsubiw", "m_hasADDSUBIW", "true",
+def FeatureADDSUBIW : SubtargetFeature<"addsubiw", "HasADDSUBIW", "true",
"Enable 16-bit register-immediate "
"addition and subtraction instructions">;
// The device has an 8-bit stack pointer (SP) register.
def FeatureSmallStack
- : SubtargetFeature<"smallstack", "m_hasSmallStack", "true",
+ : SubtargetFeature<"smallstack", "HasSmallStack", "true",
"The device has an 8-bit "
"stack pointer">;
// The device supports the 16-bit GPR pair MOVW instruction.
-def FeatureMOVW : SubtargetFeature<"movw", "m_hasMOVW", "true",
+def FeatureMOVW : SubtargetFeature<"movw", "HasMOVW", "true",
"The device supports the 16-bit MOVW "
"instruction">;
// The device supports the `LPM` instruction, with implied destination being r0.
-def FeatureLPM : SubtargetFeature<"lpm", "m_hasLPM", "true",
+def FeatureLPM : SubtargetFeature<"lpm", "HasLPM", "true",
"The device supports the `LPM` instruction">;
// The device supports the `LPM Rd, Z[+] instruction.
-def FeatureLPMX : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
+def FeatureLPMX : SubtargetFeature<"lpmx", "HasLPMX", "true",
"The device supports the `LPM Rd, Z[+]` "
"instruction">;
// The device supports the `ELPM` instruction.
-def FeatureELPM : SubtargetFeature<"elpm", "m_hasELPM", "true",
+def FeatureELPM : SubtargetFeature<"elpm", "HasELPM", "true",
"The device supports the ELPM instruction">;
// The device supports the `ELPM Rd, Z[+]` instructions.
-def FeatureELPMX : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
+def FeatureELPMX : SubtargetFeature<"elpmx", "HasELPMX", "true",
"The device supports the `ELPM Rd, Z[+]` "
"instructions">;
// The device supports the `SPM` instruction.
-def FeatureSPM : SubtargetFeature<"spm", "m_hasSPM", "true",
+def FeatureSPM : SubtargetFeature<"spm", "HasSPM", "true",
"The device supports the `SPM` instruction">;
// The device supports the `SPM Z+` instruction.
-def FeatureSPMX : SubtargetFeature<"spmx", "m_hasSPMX", "true",
+def FeatureSPMX : SubtargetFeature<"spmx", "HasSPMX", "true",
"The device supports the `SPM Z+` "
"instruction">;
// The device supports the `DES k` instruction.
-def FeatureDES : SubtargetFeature<"des", "m_hasDES", "true",
+def FeatureDES : SubtargetFeature<"des", "HasDES", "true",
"The device supports the `DES k` encryption "
"instruction">;
// The device supports the Read-Write-Modify instructions
// XCH, LAS, LAC, and LAT.
-def FeatureRMW : SubtargetFeature<"rmw", "m_supportsRMW", "true",
+def FeatureRMW : SubtargetFeature<"rmw", "SupportsRMW", "true",
"The device supports the read-write-modify "
"instructions: XCH, LAS, LAC, LAT">;
// The device supports the `[F]MUL[S][U]` family of instructions.
def FeatureMultiplication
- : SubtargetFeature<"mul", "m_supportsMultiplication", "true",
+ : SubtargetFeature<"mul", "SupportsMultiplication", "true",
"The device supports the "
"multiplication instructions">;
// The device supports the `BREAK` instruction.
-def FeatureBREAK : SubtargetFeature<"break", "m_hasBREAK", "true",
+def FeatureBREAK : SubtargetFeature<"break", "HasBREAK", "true",
"The device supports the `BREAK` debugging "
"instruction">;
// The device has instruction encodings specific to the Tiny core.
def FeatureTinyEncoding
- : SubtargetFeature<"tinyencoding", "m_hasTinyEncoding", "true",
+ : SubtargetFeature<"tinyencoding", "HasTinyEncoding", "true",
"The device has Tiny core specific "
"instruction encodings">;
// When writing a 16-bit port or storing a 16-bit word, do the low byte first.
def FeatureLowByteFirst
- : SubtargetFeature<"lowbytefirst", "m_hasLowByteFirst", "true",
+ : SubtargetFeature<"lowbytefirst", "HasLowByteFirst", "true",
"Do the low byte first when writing a 16-bit port or "
"storing a 16-bit word">;
// The device has CPU registers mapped in data address space
-def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR", "true",
+def FeatureMMR : SubtargetFeature<"memmappedregs", "HasMemMappedGPR", "true",
"The device has CPU registers "
"mapped in data address space">;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index aff2d5ed7b12..64dd0338bf60 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -25,8 +25,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
-#include <vector>
-
namespace llvm {
AVRFrameLowering::AVRFrameLowering()
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index bbb1de40be63..196122e45ab8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -33,7 +33,7 @@ public:
AVRDAGToDAGISel() = delete;
- AVRDAGToDAGISel(AVRTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ AVRDAGToDAGISel(AVRTargetMachine &TM, CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, TM, OptLevel), Subtarget(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -43,7 +43,8 @@ public:
bool selectIndexedLoad(SDNode *N);
unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT, int Bank);
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintCode,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -200,9 +201,10 @@ unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT,
}
bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintCode, std::vector<SDValue> &OutOps) {
- assert((ConstraintCode == InlineAsm::Constraint_m ||
- ConstraintCode == InlineAsm::Constraint_Q) &&
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert((ConstraintCode == InlineAsm::ConstraintCode::m ||
+ ConstraintCode == InlineAsm::ConstraintCode::Q) &&
"Unexpected asm memory constraint");
MachineRegisterInfo &RI = MF->getRegInfo();
@@ -583,6 +585,6 @@ bool AVRDAGToDAGISel::trySelect(SDNode *N) {
}
FunctionPass *llvm::createAVRISelDag(AVRTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new AVRDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
index ee0693cd0103..cd1dcfaea0eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -976,7 +976,7 @@ SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
Ops.push_back(Operand);
}
}
- unsigned Flags = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1);
+ InlineAsm::Flag Flags(InlineAsm::Kind::RegUse, 1);
Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32));
Ops.push_back(ZeroReg);
if (Glue) {
@@ -2440,6 +2440,11 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MF->insert(I, trueMBB);
MF->insert(I, falseMBB);
+ // Set the call frame size on entry to the new basic blocks.
+ unsigned CallFrameSize = TII.getCallFrameSizeAt(MI);
+ trueMBB->setCallFrameSize(CallFrameSize);
+ falseMBB->setCallFrameSize(CallFrameSize);
+
// Transfer remaining instructions and all successors of the current
// block to the block which will contain the Phi node for the
// select.
@@ -2516,13 +2521,13 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
-unsigned
+InlineAsm::ConstraintCode
AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
// Not sure if this is actually the right thing to do, but we got to do
// *something* [agnat]
switch (ConstraintCode[0]) {
case 'Q':
- return InlineAsm::Constraint_Q;
+ return InlineAsm::ConstraintCode::Q;
}
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
@@ -2717,7 +2722,7 @@ AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
@@ -2725,7 +2730,7 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
EVT Ty = Op.getValueType();
// Currently only support length 1 constraints.
- if (Constraint.length() != 1) {
+ if (Constraint.size() != 1) {
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
index b696bebe7136..f60579593453 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -133,9 +133,10 @@ public:
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
- unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
+ InlineAsm::ConstraintCode
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td
index 653c7276ba7f..06d14a2fb47f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -75,17 +75,6 @@ class FRdRr<bits<4> opcode, bits<2> f, dag outs, dag ins, string asmstr,
let Inst{3 - 0} = rr{3 - 0};
}
-class FTST<bits<4> opcode, bits<2> f, dag outs, dag ins, string asmstr,
- list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
- bits<5> rd;
-
- let Inst{15 - 12} = opcode;
- let Inst{11 - 10} = f;
- let Inst{9} = rd{4};
- let Inst{8 - 4} = rd;
- let Inst{3 - 0} = rd{3 - 0};
-}
-
//===----------------------------------------------------------------------===//
// Instruction of the format `<mnemonic> Z, Rd`
// <|1001|001r|rrrr|0ttt>
@@ -445,11 +434,6 @@ class F16<bits<16> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst = opcode;
}
-class F32<bits<32> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst32<outs, ins, asmstr, pattern> {
- let Inst = opcode;
-}
-
//===----------------------------------------------------------------------===//
// Branching instructions with immediate12: <|110f|kkkk|kkkk|kkkk|>
// f = secondary opcode = 1 bit
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index b9d27c78ce8e..2640ad9e3626 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -43,7 +43,6 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
- const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
const AVRRegisterInfo &TRI = *STI.getRegisterInfo();
unsigned Opc;
@@ -496,9 +495,7 @@ unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const AVRTargetMachine &TM =
static_cast<const AVRTargetMachine &>(MF.getTarget());
- const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
-
return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
*TM.getMCAsmInfo());
}
@@ -542,7 +539,7 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
llvm_unreachable("unexpected opcode!");
case AVR::JMPk:
case AVR::CALLk:
- return true;
+ return STI.hasJMPCALL();
case AVR::RCALLk:
case AVR::RJMPk:
return isIntN(13, BrOffset);
@@ -573,7 +570,10 @@ void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
if (STI.hasJMPCALL())
BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
else
- report_fatal_error("cannot create long jump without FeatureJMPCALL");
+ // The RJMP may jump to a far place beyond its legal range. We let the
+ // linker to report 'out of range' rather than crash, or silently emit
+ // incorrect assembly code.
+ BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(&NewDestBB);
}
} // end of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
index f93248b4940c..efaaec32ee6b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1468,9 +1468,7 @@ class AtomicStore<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
(ins PTRRC
: $rd, DRC
: $rr),
- "atomic_op", [(Op i16
- : $rd, DRC
- : $rr)]>;
+ "atomic_op", [(Op DRC:$rr, i16:$rd)]>;
class AtomicLoadOp<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
: Pseudo<(outs DRC:$rd),
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h
index 5c7c600ebbf1..17505ce1f225 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -61,27 +61,9 @@ public:
const TargetMachine &TM);
// Subtarget feature getters.
- // See AVR.td for details.
- bool hasSRAM() const { return m_hasSRAM; }
- bool hasJMPCALL() const { return m_hasJMPCALL; }
- bool hasIJMPCALL() const { return m_hasIJMPCALL; }
- bool hasEIJMPCALL() const { return m_hasEIJMPCALL; }
- bool hasADDSUBIW() const { return m_hasADDSUBIW; }
- bool hasSmallStack() const { return m_hasSmallStack; }
- bool hasMOVW() const { return m_hasMOVW; }
- bool hasLPM() const { return m_hasLPM; }
- bool hasLPMX() const { return m_hasLPMX; }
- bool hasELPM() const { return m_hasELPM; }
- bool hasELPMX() const { return m_hasELPMX; }
- bool hasSPM() const { return m_hasSPM; }
- bool hasSPMX() const { return m_hasSPMX; }
- bool hasDES() const { return m_hasDES; }
- bool supportsRMW() const { return m_supportsRMW; }
- bool supportsMultiplication() const { return m_supportsMultiplication; }
- bool hasBREAK() const { return m_hasBREAK; }
- bool hasTinyEncoding() const { return m_hasTinyEncoding; }
- bool hasMemMappedGPR() const { return m_hasMemMappedGPR; }
- bool hasLowByteFirst() const { return m_hasLowByteFirst; }
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool GETTER() const { return ATTRIBUTE; }
+#include "AVRGenSubtargetInfo.inc"
uint8_t getIORegisterOffset() const { return hasMemMappedGPR() ? 0x20 : 0x0; }
@@ -118,31 +100,9 @@ private:
unsigned ELFArch = 0;
// Subtarget feature settings
- // See AVR.td for details.
- bool m_hasSRAM = false;
- bool m_hasJMPCALL = false;
- bool m_hasIJMPCALL = false;
- bool m_hasEIJMPCALL = false;
- bool m_hasADDSUBIW = false;
- bool m_hasSmallStack = false;
- bool m_hasMOVW = false;
- bool m_hasLPM = false;
- bool m_hasLPMX = false;
- bool m_hasELPM = false;
- bool m_hasELPMX = false;
- bool m_hasSPM = false;
- bool m_hasSPMX = false;
- bool m_hasDES = false;
- bool m_supportsRMW = false;
- bool m_supportsMultiplication = false;
- bool m_hasBREAK = false;
- bool m_hasTinyEncoding = false;
- bool m_hasLowByteFirst = false;
- bool m_hasMemMappedGPR = false;
-
- // Dummy member, used by FeatureSet's. We cannot have a SubtargetFeature with
- // no variable, so we instead bind pseudo features to this variable.
- bool m_FeatureSetDummy = false;
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool ATTRIBUTE = DEFAULT;
+#include "AVRGenSubtargetInfo.inc"
AVRInstrInfo InstrInfo;
AVRFrameLowering FrameLowering;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index e0c0514f62c4..e0776a6cab43 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -48,7 +48,7 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, AVRDataLayout, TT, getCPU(CPU), FS, Options,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.h
index 0fee27dc52f3..c19df2bc301e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.h
@@ -32,7 +32,7 @@ public:
AVRTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
const AVRSubtarget *getSubtargetImpl() const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 6c328ffc58a4..bb9710d13a9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -56,17 +56,16 @@ class AVRAsmParser : public MCTargetAsmParser {
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
- OperandMatchResultTy parseMemriOperand(OperandVector &Operands);
+ ParseStatus parseMemriOperand(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, bool maybeReg);
int parseRegisterName(unsigned (*matchFn)(StringRef));
@@ -559,7 +558,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands, bool maybeReg) {
return true;
}
-OperandMatchResultTy AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
+ParseStatus AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
LLVM_DEBUG(dbgs() << "parseMemriOperand()\n");
SMLoc E, S;
@@ -571,7 +570,7 @@ OperandMatchResultTy AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
RegNo = parseRegister();
if (RegNo == AVR::NoRegister)
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
S = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Parser.Lex(); // Eat register token.
@@ -580,35 +579,34 @@ OperandMatchResultTy AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
// Parse immediate;
{
if (getParser().parseExpression(Expression))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
}
Operands.push_back(AVROperand::CreateMemri(RegNo, Expression, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-bool AVRAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool AVRAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
StartLoc = Parser.getTok().getLoc();
- RegNo = parseRegister(/*RestoreOnFailure=*/false);
+ Reg = parseRegister(/*RestoreOnFailure=*/false);
EndLoc = Parser.getTok().getLoc();
- return (RegNo == AVR::NoRegister);
+ return Reg == AVR::NoRegister;
}
-OperandMatchResultTy AVRAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus AVRAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
StartLoc = Parser.getTok().getLoc();
- RegNo = parseRegister(/*RestoreOnFailure=*/true);
+ Reg = parseRegister(/*RestoreOnFailure=*/true);
EndLoc = Parser.getTok().getLoc();
- if (RegNo == AVR::NoRegister)
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ if (Reg == AVR::NoRegister)
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
}
void AVRAsmParser::eatComma() {
@@ -630,13 +628,12 @@ bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
if (OperandNum > 0)
eatComma();
- auto MatchResult = MatchOperandParserImpl(Operands, Mnemonic);
+ ParseStatus ParseRes = MatchOperandParserImpl(Operands, Mnemonic);
- if (MatchResult == MatchOperand_Success) {
+ if (ParseRes.isSuccess())
continue;
- }
- if (MatchResult == MatchOperand_ParseFail) {
+ if (ParseRes.isFailure()) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index c94469c8d9f3..d520880d73bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -507,7 +507,8 @@ bool AVRAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
- const MCValue &Target) {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) {
switch ((unsigned)Fixup.getKind()) {
default:
return Fixup.getKind() >= FirstLiteralRelocationKind;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index d6a30e4dfa22..023660f0ff14 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -29,7 +29,7 @@ struct MCFixupKindInfo;
class AVRAsmBackend : public MCAsmBackend {
public:
AVRAsmBackend(Triple::OSType OSType)
- : MCAsmBackend(support::little), OSType(OSType) {}
+ : MCAsmBackend(llvm::endianness::little), OSType(OSType) {}
void adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
uint64_t &Value, MCContext *Ctx = nullptr) const;
@@ -60,7 +60,8 @@ public:
const MCSubtargetInfo *STI) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
private:
Triple::OSType OSType;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
index 1f7a926edb5c..4f13a26334b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
@@ -68,10 +68,10 @@ enum Fixups {
/// with the upper 8 bits of a negated 16-bit value (bits 8-15).
fixup_hi8_ldi_neg,
/// Replaces the immediate operand of a 16-bit `Rd, K` instruction
- /// with the upper 8 bits of a negated negated 24-bit value (bits 16-23).
+ /// with the upper 8 bits of a negated 24-bit value (bits 16-23).
fixup_hh8_ldi_neg,
/// Replaces the immediate operand of a 16-bit `Rd, K` instruction
- /// with the upper 8 bits of a negated negated 32-bit value (bits 24-31).
+ /// with the upper 8 bits of a negated 32-bit value (bits 24-31).
fixup_ms8_ldi_neg,
/// Replaces the immediate operand of a 16-bit `Rd, K` instruction
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index c08e293d0437..7682394e8392 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -285,7 +285,7 @@ void AVRMCCodeEmitter::encodeInstruction(const MCInst &MI,
for (int64_t i = Size / 2 - 1; i >= 0; --i) {
uint16_t Word = (BinaryOpCode >> (i * 16)) & 0xFFFF;
- support::endian::write(CB, Word, support::endianness::little);
+ support::endian::write(CB, Word, llvm::endianness::little);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 43edcaace322..90697c6645be 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -39,10 +39,9 @@ class BPFAsmParser : public MCTargetAsmParser {
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -57,9 +56,9 @@ class BPFAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "BPFGenAsmMatcher.inc"
- OperandMatchResultTy parseImmediate(OperandVector &Operands);
- OperandMatchResultTy parseRegister(OperandVector &Operands);
- OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
+ ParseStatus parseImmediate(OperandVector &Operands);
+ ParseStatus parseRegister(OperandVector &Operands);
+ ParseStatus parseOperandAsOperator(OperandVector &Operands);
public:
enum BPFMatchResultTy {
@@ -136,10 +135,14 @@ public:
return static_cast<const MCConstantExpr *>(Val)->getValue();
}
- bool isSImm12() const {
- return (isConstantImm() && isInt<12>(getConstantImm()));
+ bool isSImm16() const {
+ return (isConstantImm() && isInt<16>(getConstantImm()));
}
+ bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
+
+ bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
+
/// getStartLoc - Gets location of the first token of this operand
SMLoc getStartLoc() const override { return StartLoc; }
/// getEndLoc - Gets location of the last token of this operand
@@ -227,6 +230,7 @@ public:
.Case("if", true)
.Case("call", true)
.Case("goto", true)
+ .Case("gotol", true)
.Case("*", true)
.Case("exit", true)
.Case("lock", true)
@@ -241,13 +245,20 @@ public:
.Case("u32", true)
.Case("u16", true)
.Case("u8", true)
+ .Case("s32", true)
+ .Case("s16", true)
+ .Case("s8", true)
.Case("be64", true)
.Case("be32", true)
.Case("be16", true)
.Case("le64", true)
.Case("le32", true)
.Case("le16", true)
+ .Case("bswap16", true)
+ .Case("bswap32", true)
+ .Case("bswap64", true)
.Case("goto", true)
+ .Case("gotol", true)
.Case("ll", true)
.Case("skb", true)
.Case("s", true)
@@ -325,37 +336,41 @@ bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
return Error(ErrorLoc, "invalid operand for instruction");
+ case Match_InvalidBrTarget:
+ return Error(Operands[ErrorInfo]->getStartLoc(),
+ "operand is not an identifier or 16-bit signed integer");
+ case Match_InvalidSImm16:
+ return Error(Operands[ErrorInfo]->getStartLoc(),
+ "operand is not a 16-bit signed integer");
}
llvm_unreachable("Unknown match type detected!");
}
-bool BPFAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
return Error(StartLoc, "invalid register name");
return false;
}
-OperandMatchResultTy BPFAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- RegNo = 0;
+ Reg = BPF::NoRegister;
StringRef Name = getLexer().getTok().getIdentifier();
if (!MatchRegisterName(Name)) {
getParser().Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
-OperandMatchResultTy
-BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
+ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
SMLoc S = getLoc();
if (getLexer().getKind() == AsmToken::Identifier) {
@@ -364,17 +379,17 @@ BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
if (BPFOperand::isValidIdInMiddle(Name)) {
getLexer().Lex();
Operands.push_back(BPFOperand::createToken(Name, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
switch (getLexer().getKind()) {
case AsmToken::Minus:
case AsmToken::Plus: {
if (getLexer().peekTok().is(AsmToken::Integer))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
[[fallthrough]];
}
@@ -395,7 +410,7 @@ BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
getLexer().Lex();
Operands.push_back(BPFOperand::createToken(Name, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
case AsmToken::EqualEqual:
@@ -410,40 +425,40 @@ BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
getLexer().getTok().getString().substr(1, 1), S));
getLexer().Lex();
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
default:
break;
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
-OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
+ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::Identifier:
StringRef Name = getLexer().getTok().getIdentifier();
unsigned RegNo = MatchRegisterName(Name);
if (RegNo == 0)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
getLexer().Lex();
Operands.push_back(BPFOperand::createReg(RegNo, S, E));
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
+ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -457,12 +472,12 @@ OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
SMLoc S = getLoc();
if (getParser().parseExpression(IdVal))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
Operands.push_back(BPFOperand::createImm(IdVal, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// ParseInstruction - Parse an BPF instruction which is in BPF verifier
@@ -482,11 +497,11 @@ bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
while (!getLexer().is(AsmToken::EndOfStatement)) {
// Attempt to parse token as operator
- if (parseOperandAsOperator(Operands) == MatchOperand_Success)
+ if (parseOperandAsOperator(Operands).isSuccess())
continue;
// Attempt to parse token as register
- if (parseRegister(Operands) == MatchOperand_Success)
+ if (parseRegister(Operands).isSuccess())
continue;
if (getLexer().is(AsmToken::Comma)) {
@@ -495,7 +510,7 @@ bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// Attempt to parse token as an immediate
- if (parseImmediate(Operands) != MatchOperand_Success) {
+ if (!parseImmediate(Operands).isSuccess()) {
SMLoc Loc = getLexer().getLoc();
return Error(Loc, "unexpected token");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h
index 9b7bab785ee9..5c77d183e1ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h
@@ -10,12 +10,16 @@
#define LLVM_LIB_TARGET_BPF_BPF_H
#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+class BPFRegisterBankInfo;
+class BPFSubtarget;
class BPFTargetMachine;
+class InstructionSelector;
class PassRegistry;
ModulePass *createBPFCheckAndAdjustIR();
@@ -23,14 +27,16 @@ ModulePass *createBPFCheckAndAdjustIR();
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
-FunctionPass *createBPFMIPeepholeTruncElimPass();
FunctionPass *createBPFMIPreEmitPeepholePass();
FunctionPass *createBPFMIPreEmitCheckingPass();
+InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
+ const BPFSubtarget &,
+ const BPFRegisterBankInfo &);
+
void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
void initializeBPFDAGToDAGISelPass(PassRegistry &);
-void initializeBPFMIPeepholePass(PassRegistry&);
-void initializeBPFMIPeepholeTruncElimPass(PassRegistry &);
+void initializeBPFMIPeepholePass(PassRegistry &);
void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
void initializeBPFMIPreEmitPeepholePass(PassRegistry &);
void initializeBPFMISimplifyPatchablePass(PassRegistry &);
@@ -64,6 +70,24 @@ class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
+
+class BPFPreserveStaticOffsetPass
+ : public PassInfoMixin<BPFPreserveStaticOffsetPass> {
+ bool AllowPartial;
+
+public:
+ BPFPreserveStaticOffsetPass(bool AllowPartial) : AllowPartial(AllowPartial) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ static bool isRequired() { return true; }
+
+ static std::pair<GetElementPtrInst *, LoadInst *>
+ reconstructLoad(CallInst *Call);
+
+ static std::pair<GetElementPtrInst *, StoreInst *>
+ reconstructStore(CallInst *Call);
+};
+
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPF.td b/contrib/llvm-project/llvm/lib/Target/BPF/BPF.td
index 0cc409dfcee1..dff76ca07af5 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPF.td
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPF.td
@@ -11,6 +11,7 @@ include "llvm/Target/Target.td"
include "BPFRegisterInfo.td"
include "BPFCallingConv.td"
include "BPFInstrInfo.td"
+include "GISel/BPFRegisterBanks.td"
def BPFInstrInfo : InstrInfo;
@@ -30,6 +31,7 @@ def : Proc<"generic", []>;
def : Proc<"v1", []>;
def : Proc<"v2", []>;
def : Proc<"v3", [ALU32]>;
+def : Proc<"v4", [ALU32]>;
def : Proc<"probe", []>;
def BPFInstPrinter : AsmWriter {
@@ -45,7 +47,7 @@ def BPFAsmParserVariant : AsmParserVariant {
int Variant = 0;
string Name = "BPF";
string BreakCharacters = ".";
- string TokenizingCharacters = "#()[]=:.<>!+*";
+ string TokenizingCharacters = "#()[]=:.<>!+*%/";
}
def BPF : Target {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 9c99765b60c0..f2d1206d0231 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -78,6 +78,7 @@
#include "BPFCORE.h"
#include "BPFTargetMachine.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/BTF/BTF.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
@@ -88,6 +89,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <stack>
@@ -170,8 +172,6 @@ private:
bool IsValidAIChain(const MDNode *ParentMeta, uint32_t ParentAI,
const MDNode *ChildMeta);
bool removePreserveAccessIndexIntrinsic(Function &F);
- void replaceWithGEP(std::vector<CallInst *> &CallList,
- uint32_t NumOfZerosIndex, uint32_t DIIndex);
bool HasPreserveFieldInfoCall(CallInfoStack &CallStack);
void GetStorageBitRange(DIDerivedType *MemberTy, Align RecordAlignment,
uint32_t &StartBitOffset, uint32_t &EndBitOffset);
@@ -183,7 +183,6 @@ private:
std::string &AccessKey, MDNode *&BaseMeta);
MDNode *computeAccessKey(CallInst *Call, CallInfo &CInfo,
std::string &AccessKey, bool &IsInt32Ret);
- uint64_t getConstant(const Value *IndexValue);
bool transformGEPChain(CallInst *Call, CallInfo &CInfo);
};
@@ -324,6 +323,12 @@ static Type *getBaseElementType(const CallInst *Call) {
return Call->getParamElementType(0);
}
+static uint64_t getConstant(const Value *IndexValue) {
+ const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue);
+ assert(CV);
+ return CV->getValue().getZExtValue();
+}
+
/// Check whether a call is a preserve_*_access_index intrinsic call or not.
bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CallInfo &CInfo) {
@@ -333,7 +338,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
const auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
if (!GV)
return false;
- if (GV->getName().startswith("llvm.preserve.array.access.index")) {
+ if (GV->getName().starts_with("llvm.preserve.array.access.index")) {
CInfo.Kind = BPFPreserveArrayAI;
CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
if (!CInfo.Metadata)
@@ -343,7 +348,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CInfo.RecordAlignment = DL->getABITypeAlign(getBaseElementType(Call));
return true;
}
- if (GV->getName().startswith("llvm.preserve.union.access.index")) {
+ if (GV->getName().starts_with("llvm.preserve.union.access.index")) {
CInfo.Kind = BPFPreserveUnionAI;
CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
if (!CInfo.Metadata)
@@ -353,7 +358,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CInfo.Base = Call->getArgOperand(0);
return true;
}
- if (GV->getName().startswith("llvm.preserve.struct.access.index")) {
+ if (GV->getName().starts_with("llvm.preserve.struct.access.index")) {
CInfo.Kind = BPFPreserveStructAI;
CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
if (!CInfo.Metadata)
@@ -364,17 +369,17 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CInfo.RecordAlignment = DL->getABITypeAlign(getBaseElementType(Call));
return true;
}
- if (GV->getName().startswith("llvm.bpf.preserve.field.info")) {
+ if (GV->getName().starts_with("llvm.bpf.preserve.field.info")) {
CInfo.Kind = BPFPreserveFieldInfoAI;
CInfo.Metadata = nullptr;
// Check validity of info_kind as clang did not check this.
uint64_t InfoKind = getConstant(Call->getArgOperand(1));
- if (InfoKind >= BPFCoreSharedInfo::MAX_FIELD_RELOC_KIND)
+ if (InfoKind >= BTF::MAX_FIELD_RELOC_KIND)
report_fatal_error("Incorrect info_kind for llvm.bpf.preserve.field.info intrinsic");
CInfo.AccessIndex = InfoKind;
return true;
}
- if (GV->getName().startswith("llvm.bpf.preserve.type.info")) {
+ if (GV->getName().starts_with("llvm.bpf.preserve.type.info")) {
CInfo.Kind = BPFPreserveFieldInfoAI;
CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
if (!CInfo.Metadata)
@@ -383,14 +388,14 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
if (Flag >= BPFCoreSharedInfo::MAX_PRESERVE_TYPE_INFO_FLAG)
report_fatal_error("Incorrect flag for llvm.bpf.preserve.type.info intrinsic");
if (Flag == BPFCoreSharedInfo::PRESERVE_TYPE_INFO_EXISTENCE)
- CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_EXISTENCE;
+ CInfo.AccessIndex = BTF::TYPE_EXISTENCE;
else if (Flag == BPFCoreSharedInfo::PRESERVE_TYPE_INFO_MATCH)
- CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_MATCH;
+ CInfo.AccessIndex = BTF::TYPE_MATCH;
else
- CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_SIZE;
+ CInfo.AccessIndex = BTF::TYPE_SIZE;
return true;
}
- if (GV->getName().startswith("llvm.bpf.preserve.enum.value")) {
+ if (GV->getName().starts_with("llvm.bpf.preserve.enum.value")) {
CInfo.Kind = BPFPreserveFieldInfoAI;
CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
if (!CInfo.Metadata)
@@ -399,35 +404,45 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
if (Flag >= BPFCoreSharedInfo::MAX_PRESERVE_ENUM_VALUE_FLAG)
report_fatal_error("Incorrect flag for llvm.bpf.preserve.enum.value intrinsic");
if (Flag == BPFCoreSharedInfo::PRESERVE_ENUM_VALUE_EXISTENCE)
- CInfo.AccessIndex = BPFCoreSharedInfo::ENUM_VALUE_EXISTENCE;
+ CInfo.AccessIndex = BTF::ENUM_VALUE_EXISTENCE;
else
- CInfo.AccessIndex = BPFCoreSharedInfo::ENUM_VALUE;
+ CInfo.AccessIndex = BTF::ENUM_VALUE;
return true;
}
return false;
}
-void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList,
- uint32_t DimensionIndex,
- uint32_t GEPIndex) {
- for (auto *Call : CallList) {
- uint32_t Dimension = 1;
- if (DimensionIndex > 0)
- Dimension = getConstant(Call->getArgOperand(DimensionIndex));
-
- Constant *Zero =
- ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0);
- SmallVector<Value *, 4> IdxList;
- for (unsigned I = 0; I < Dimension; ++I)
- IdxList.push_back(Zero);
- IdxList.push_back(Call->getArgOperand(GEPIndex));
-
- auto *GEP = GetElementPtrInst::CreateInBounds(
- getBaseElementType(Call), Call->getArgOperand(0), IdxList, "", Call);
- Call->replaceAllUsesWith(GEP);
- Call->eraseFromParent();
- }
+static void replaceWithGEP(CallInst *Call, uint32_t DimensionIndex,
+ uint32_t GEPIndex) {
+ uint32_t Dimension = 1;
+ if (DimensionIndex > 0)
+ Dimension = getConstant(Call->getArgOperand(DimensionIndex));
+
+ Constant *Zero =
+ ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0);
+ SmallVector<Value *, 4> IdxList;
+ for (unsigned I = 0; I < Dimension; ++I)
+ IdxList.push_back(Zero);
+ IdxList.push_back(Call->getArgOperand(GEPIndex));
+
+ auto *GEP = GetElementPtrInst::CreateInBounds(
+ getBaseElementType(Call), Call->getArgOperand(0), IdxList, "", Call);
+ Call->replaceAllUsesWith(GEP);
+ Call->eraseFromParent();
+}
+
+void BPFCoreSharedInfo::removeArrayAccessCall(CallInst *Call) {
+ replaceWithGEP(Call, 1, 2);
+}
+
+void BPFCoreSharedInfo::removeStructAccessCall(CallInst *Call) {
+ replaceWithGEP(Call, 0, 1);
+}
+
+void BPFCoreSharedInfo::removeUnionAccessCall(CallInst *Call) {
+ Call->replaceAllUsesWith(Call->getArgOperand(0));
+ Call->eraseFromParent();
}
bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Function &F) {
@@ -462,12 +477,12 @@ bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Function &F) {
// . addr = preserve_struct_access_index(base, gep_index, di_index)
// is transformed to
// addr = GEP(base, 0, gep_index)
- replaceWithGEP(PreserveArrayIndexCalls, 1, 2);
- replaceWithGEP(PreserveStructIndexCalls, 0, 1);
- for (auto *Call : PreserveUnionIndexCalls) {
- Call->replaceAllUsesWith(Call->getArgOperand(0));
- Call->eraseFromParent();
- }
+ for (CallInst *Call : PreserveArrayIndexCalls)
+ BPFCoreSharedInfo::removeArrayAccessCall(Call);
+ for (CallInst *Call : PreserveStructIndexCalls)
+ BPFCoreSharedInfo::removeStructAccessCall(Call);
+ for (CallInst *Call : PreserveUnionIndexCalls)
+ BPFCoreSharedInfo::removeUnionAccessCall(Call);
return Found;
}
@@ -632,12 +647,6 @@ void BPFAbstractMemberAccess::collectAICallChains(Function &F) {
}
}
-uint64_t BPFAbstractMemberAccess::getConstant(const Value *IndexValue) {
- const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue);
- assert(CV);
- return CV->getValue().getZExtValue();
-}
-
/// Get the start and the end of storage offset for \p MemberTy.
void BPFAbstractMemberAccess::GetStorageBitRange(DIDerivedType *MemberTy,
Align RecordAlignment,
@@ -672,11 +681,11 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
uint32_t AccessIndex,
uint32_t PatchImm,
MaybeAlign RecordAlignment) {
- if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE)
- return 1;
+ if (InfoKind == BTF::FIELD_EXISTENCE)
+ return 1;
uint32_t Tag = CTy->getTag();
- if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_OFFSET) {
+ if (InfoKind == BTF::FIELD_BYTE_OFFSET) {
if (Tag == dwarf::DW_TAG_array_type) {
auto *EltTy = stripQualifiers(CTy->getBaseType());
PatchImm += AccessIndex * calcArraySize(CTy, 1) *
@@ -695,7 +704,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
return PatchImm;
}
- if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_SIZE) {
+ if (InfoKind == BTF::FIELD_BYTE_SIZE) {
if (Tag == dwarf::DW_TAG_array_type) {
auto *EltTy = stripQualifiers(CTy->getBaseType());
return calcArraySize(CTy, 1) * (EltTy->getSizeInBits() >> 3);
@@ -715,7 +724,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
}
}
- if (InfoKind == BPFCoreSharedInfo::FIELD_SIGNEDNESS) {
+ if (InfoKind == BTF::FIELD_SIGNEDNESS) {
const DIType *BaseTy;
if (Tag == dwarf::DW_TAG_array_type) {
// Signedness only checked when final array elements are accessed.
@@ -741,7 +750,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
return (Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char);
}
- if (InfoKind == BPFCoreSharedInfo::FIELD_LSHIFT_U64) {
+ if (InfoKind == BTF::FIELD_LSHIFT_U64) {
// The value is loaded into a value with FIELD_BYTE_SIZE size,
// and then zero or sign extended to U64.
// FIELD_LSHIFT_U64 and FIELD_RSHIFT_U64 are operations
@@ -778,7 +787,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
return OffsetInBits + 64 - NextSBitOffset;
}
- if (InfoKind == BPFCoreSharedInfo::FIELD_RSHIFT_U64) {
+ if (InfoKind == BTF::FIELD_RSHIFT_U64) {
DIDerivedType *MemberTy = nullptr;
bool IsBitField = false;
uint32_t SizeInBits;
@@ -849,7 +858,7 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
// we will skip them.
uint32_t FirstIndex = 0;
uint32_t PatchImm = 0; // AccessOffset or the requested field info
- uint32_t InfoKind = BPFCoreSharedInfo::FIELD_BYTE_OFFSET;
+ uint32_t InfoKind = BTF::FIELD_BYTE_OFFSET;
while (CallStack.size()) {
auto StackElem = CallStack.top();
Call = StackElem.first;
@@ -939,7 +948,7 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
if (CInfo.Kind == BPFPreserveFieldInfoAI) {
InfoKind = CInfo.AccessIndex;
- if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE)
+ if (InfoKind == BTF::FIELD_EXISTENCE)
PatchImm = 1;
break;
}
@@ -987,10 +996,10 @@ MDNode *BPFAbstractMemberAccess::computeAccessKey(CallInst *Call,
int64_t PatchImm;
std::string AccessStr("0");
- if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_EXISTENCE ||
- CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_MATCH) {
+ if (CInfo.AccessIndex == BTF::TYPE_EXISTENCE ||
+ CInfo.AccessIndex == BTF::TYPE_MATCH) {
PatchImm = 1;
- } else if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_SIZE) {
+ } else if (CInfo.AccessIndex == BTF::TYPE_SIZE) {
// typedef debuginfo type has size 0, get the eventual base type.
DIType *BaseTy = stripQualifiers(Ty, true);
PatchImm = BaseTy->getSizeInBits() / 8;
@@ -1026,7 +1035,7 @@ MDNode *BPFAbstractMemberAccess::computeAccessKey(CallInst *Call,
EnumIndex++;
}
- if (CInfo.AccessIndex == BPFCoreSharedInfo::ENUM_VALUE) {
+ if (CInfo.AccessIndex == BTF::ENUM_VALUE) {
StringRef EValueStr = ValueStr.substr(Separator + 1);
PatchImm = std::stoll(std::string(EValueStr));
} else {
@@ -1107,7 +1116,8 @@ bool BPFAbstractMemberAccess::transformGEPChain(CallInst *Call,
auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV, "", Call);
// Generate a BitCast
- auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext()));
+ auto *BCInst =
+ new BitCastInst(Base, PointerType::getUnqual(BB->getContext()));
BCInst->insertBefore(Call);
// Generate a GetElementPtr
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h
index c9aa135232c1..f46a8ef62a7f 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_BPF_BPFCORE_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Instructions.h"
namespace llvm {
@@ -19,24 +20,6 @@ class Module;
class BPFCoreSharedInfo {
public:
- enum PatchableRelocKind : uint32_t {
- FIELD_BYTE_OFFSET = 0,
- FIELD_BYTE_SIZE,
- FIELD_EXISTENCE,
- FIELD_SIGNEDNESS,
- FIELD_LSHIFT_U64,
- FIELD_RSHIFT_U64,
- BTF_TYPE_ID_LOCAL,
- BTF_TYPE_ID_REMOTE,
- TYPE_EXISTENCE,
- TYPE_SIZE,
- ENUM_VALUE_EXISTENCE,
- ENUM_VALUE,
- TYPE_MATCH,
-
- MAX_FIELD_RELOC_KIND,
- };
-
enum BTFTypeIdFlag : uint32_t {
BTF_TYPE_ID_LOCAL_RELOC = 0,
BTF_TYPE_ID_REMOTE_RELOC,
@@ -71,6 +54,9 @@ public:
static Instruction *insertPassThrough(Module *M, BasicBlock *BB,
Instruction *Input,
Instruction *Before);
+ static void removeArrayAccessCall(CallInst *Call);
+ static void removeStructAccessCall(CallInst *Call);
+ static void removeUnionAccessCall(CallInst *Call);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index a3616ae7ebab..81effc9b1db4 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -12,6 +12,8 @@
// The following are done for IR adjustment:
// - remove __builtin_bpf_passthrough builtins. Target independent IR
// optimizations are done and those builtins can be removed.
+// - remove llvm.bpf.getelementptr.and.load builtins.
+// - remove llvm.bpf.getelementptr.and.store builtins.
//
//===----------------------------------------------------------------------===//
@@ -24,6 +26,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsBPF.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -51,6 +54,7 @@ private:
bool removePassThroughBuiltin(Module &M);
bool removeCompareBuiltin(Module &M);
bool sinkMinMax(Module &M);
+ bool removeGEPBuiltins(Module &M);
};
} // End anonymous namespace
@@ -115,7 +119,7 @@ bool BPFCheckAndAdjustIR::removePassThroughBuiltin(Module &M) {
auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
if (!GV)
continue;
- if (!GV->getName().startswith("llvm.bpf.passthrough"))
+ if (!GV->getName().starts_with("llvm.bpf.passthrough"))
continue;
Changed = true;
Value *Arg = Call->getArgOperand(1);
@@ -145,7 +149,7 @@ bool BPFCheckAndAdjustIR::removeCompareBuiltin(Module &M) {
auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
if (!GV)
continue;
- if (!GV->getName().startswith("llvm.bpf.compare"))
+ if (!GV->getName().starts_with("llvm.bpf.compare"))
continue;
Changed = true;
@@ -361,10 +365,62 @@ void BPFCheckAndAdjustIR::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LoopInfoWrapperPass>();
}
+static void unrollGEPLoad(CallInst *Call) {
+ auto [GEP, Load] = BPFPreserveStaticOffsetPass::reconstructLoad(Call);
+ GEP->insertBefore(Call);
+ Load->insertBefore(Call);
+ Call->replaceAllUsesWith(Load);
+ Call->eraseFromParent();
+}
+
+static void unrollGEPStore(CallInst *Call) {
+ auto [GEP, Store] = BPFPreserveStaticOffsetPass::reconstructStore(Call);
+ GEP->insertBefore(Call);
+ Store->insertBefore(Call);
+ Call->eraseFromParent();
+}
+
+static bool removeGEPBuiltinsInFunc(Function &F) {
+ SmallVector<CallInst *> GEPLoads;
+ SmallVector<CallInst *> GEPStores;
+ for (auto &BB : F)
+ for (auto &Insn : BB)
+ if (auto *Call = dyn_cast<CallInst>(&Insn))
+ if (auto *Called = Call->getCalledFunction())
+ switch (Called->getIntrinsicID()) {
+ case Intrinsic::bpf_getelementptr_and_load:
+ GEPLoads.push_back(Call);
+ break;
+ case Intrinsic::bpf_getelementptr_and_store:
+ GEPStores.push_back(Call);
+ break;
+ }
+
+ if (GEPLoads.empty() && GEPStores.empty())
+ return false;
+
+ for_each(GEPLoads, unrollGEPLoad);
+ for_each(GEPStores, unrollGEPStore);
+
+ return true;
+}
+
+// Rewrites the following builtins:
+// - llvm.bpf.getelementptr.and.load
+// - llvm.bpf.getelementptr.and.store
+// As (load (getelementptr ...)) or (store (getelementptr ...)).
+bool BPFCheckAndAdjustIR::removeGEPBuiltins(Module &M) {
+ bool Changed = false;
+ for (auto &F : M)
+ Changed = removeGEPBuiltinsInFunc(F) || Changed;
+ return Changed;
+}
+
bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
bool Changed = removePassThroughBuiltin(M);
Changed = removeCompareBuiltin(M) || Changed;
Changed = sinkMinMax(M) || Changed;
+ Changed = removeGEPBuiltins(M) || Changed;
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index fa626a775c83..909c7c005735 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -61,10 +61,10 @@ public:
void PreprocessISelDAG() override;
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintCode,
std::vector<SDValue> &OutOps) override;
-
private:
// Include the pieces autogenerated from the target description.
#include "BPFGenDAGISel.inc"
@@ -159,12 +159,13 @@ bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base,
}
bool BPFDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintCode, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintCode,
+ std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintCode) {
default:
return true;
- case InlineAsm::Constraint_m: // memory
+ case InlineAsm::ConstraintCode::m: // memory
if (!SelectAddr(Op, Op0, Op1))
return true;
break;
@@ -191,18 +192,6 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
default:
break;
- case ISD::SDIV: {
- DebugLoc Empty;
- const DebugLoc &DL = Node->getDebugLoc();
- if (DL != Empty)
- errs() << "Error at line " << DL.getLine() << ": ";
- else
- errs() << "Error: ";
- errs() << "Unsupport signed division for DAG: ";
- Node->print(errs(), CurDAG);
- errs() << "Please convert to unsigned div/mod.\n";
- break;
- }
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
switch (IntNo) {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 83a4bfb2f758..2fe86e75ddae 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -26,7 +26,9 @@
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
#define DEBUG_TYPE "bpf-lower"
@@ -35,22 +37,17 @@ static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
cl::Hidden, cl::init(false),
cl::desc("Expand memcpy into load/store pairs in order"));
-static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) {
- MachineFunction &MF = DAG.getMachineFunction();
- DAG.getContext()->diagnose(
- DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
-}
-
-static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg,
- SDValue Val) {
- MachineFunction &MF = DAG.getMachineFunction();
+static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,
+ SDValue Val = {}) {
std::string Str;
- raw_string_ostream OS(Str);
- OS << Msg;
- Val->print(OS);
- OS.flush();
- DAG.getContext()->diagnose(
- DiagnosticInfoUnsupported(MF.getFunction(), Str, DL.getDebugLoc()));
+ if (Val) {
+ raw_string_ostream OS(Str);
+ Val->print(OS);
+ OS << ' ';
+ }
+ MachineFunction &MF = DAG.getMachineFunction();
+ DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
+ MF.getFunction(), Twine(Str).concat(Msg), DL.getDebugLoc()));
}
BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
@@ -102,7 +99,10 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
+ if (!STI.hasSdivSmod()) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::SREM, VT, Custom);
+ }
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
@@ -131,9 +131,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+ if (!STI.hasMovsx()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+ }
// Extended load operations for i1 types must be promoted
for (MVT VT : MVT::integer_valuetypes()) {
@@ -141,9 +143,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ if (!STI.hasLdsx()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ }
}
setBooleanContents(ZeroOrOneBooleanContent);
@@ -183,6 +187,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
HasAlu32 = STI.getHasAlu32();
HasJmp32 = STI.getHasJmp32();
HasJmpExt = STI.getHasJmpExt();
+ HasMovsx = STI.hasMovsx();
}
bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
@@ -221,6 +226,18 @@ bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
return NumBits1 == 32 && NumBits2 == 64;
}
+bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ EVT VT1 = Val.getValueType();
+ if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) {
+ MVT MT1 = VT1.getSimpleVT().SimpleTy;
+ MVT MT2 = VT2.getSimpleVT().SimpleTy;
+ if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) &&
+ (MT2 == MVT::i32 || MT2 == MVT::i64))
+ return true;
+ }
+ return TargetLoweringBase::isZExtFree(Val, VT2);
+}
+
BPFTargetLowering::ConstraintType
BPFTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
@@ -239,7 +256,7 @@ std::pair<unsigned, const TargetRegisterClass *>
BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
- if (Constraint.size() == 1)
+ if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
case 'r': // GENERAL_REGS
@@ -251,17 +268,18 @@ BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
default:
break;
}
+ }
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
void BPFTargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
- const char *err_msg;
+ const char *Msg;
uint32_t Opcode = N->getOpcode();
switch (Opcode) {
default:
- report_fatal_error("Unhandled custom legalization");
+ report_fatal_error("unhandled custom legalization: " + Twine(Opcode));
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
@@ -269,28 +287,33 @@ void BPFTargetLowering::ReplaceNodeResults(
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD)
- err_msg = "Unsupported atomic operations, please use 32/64 bit version";
+ Msg = "unsupported atomic operation, please use 32/64 bit version";
else
- err_msg = "Unsupported atomic operations, please use 64 bit version";
+ Msg = "unsupported atomic operation, please use 64 bit version";
break;
}
SDLoc DL(N);
- fail(DL, DAG, err_msg);
+ // We'll still produce a fatal error downstream, but this diagnostic is more
+ // user-friendly.
+ fail(DL, DAG, Msg);
}
SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
+ default:
+ report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));
case ISD::BR_CC:
return LowerBR_CC(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
+ case ISD::SDIV:
+ case ISD::SREM:
+ return LowerSDIVSREM(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
- report_fatal_error("Unsupported dynamic stack allocation");
- default:
- llvm_unreachable("unimplemented operand");
+ return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
}
@@ -303,7 +326,7 @@ SDValue BPFTargetLowering::LowerFormalArguments(
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
switch (CallConv) {
default:
- report_fatal_error("Unsupported calling convention");
+ report_fatal_error("unimplemented calling convention: " + Twine(CallConv));
case CallingConv::C:
case CallingConv::Fast:
break;
@@ -317,16 +340,22 @@ SDValue BPFTargetLowering::LowerFormalArguments(
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64);
- for (auto &VA : ArgLocs) {
+ bool HasMemArgs = false;
+ for (size_t I = 0; I < ArgLocs.size(); ++I) {
+ auto &VA = ArgLocs[I];
+
if (VA.isRegLoc()) {
// Arguments passed in registers
EVT RegVT = VA.getLocVT();
MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy;
switch (SimpleTy) {
default: {
- errs() << "LowerFormalArguments Unhandled argument type: "
- << RegVT << '\n';
- llvm_unreachable(nullptr);
+ std::string Str;
+ {
+ raw_string_ostream OS(Str);
+ RegVT.print(OS);
+ }
+ report_fatal_error("unhandled argument type: " + Twine(Str));
}
case MVT::i32:
case MVT::i64:
@@ -349,22 +378,27 @@ SDValue BPFTargetLowering::LowerFormalArguments(
InVals.push_back(ArgValue);
- break;
+ break;
}
} else {
- fail(DL, DAG, "defined with too many args");
+ if (VA.isMemLoc())
+ HasMemArgs = true;
+ else
+ report_fatal_error("unhandled argument location");
InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT()));
}
}
-
- if (IsVarArg || MF.getFunction().hasStructRetAttr()) {
- fail(DL, DAG, "functions with VarArgs or StructRet are not supported");
- }
+ if (HasMemArgs)
+ fail(DL, DAG, "stack arguments are not supported");
+ if (IsVarArg)
+ fail(DL, DAG, "variadic functions are not supported");
+ if (MF.getFunction().hasStructRetAttr())
+ fail(DL, DAG, "aggregate returns are not supported");
return Chain;
}
-const unsigned BPFTargetLowering::MaxArgs = 5;
+const size_t BPFTargetLowering::MaxArgs = 5;
SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -384,7 +418,7 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
switch (CallConv) {
default:
- report_fatal_error("Unsupported calling convention");
+ report_fatal_error("unsupported calling convention: " + Twine(CallConv));
case CallingConv::Fast:
case CallingConv::C:
break;
@@ -399,14 +433,14 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned NumBytes = CCInfo.getStackSize();
if (Outs.size() > MaxArgs)
- fail(CLI.DL, DAG, "too many args to ", Callee);
+ fail(CLI.DL, DAG, "too many arguments", Callee);
for (auto &Arg : Outs) {
ISD::ArgFlagsTy Flags = Arg.Flags;
if (!Flags.isByVal())
continue;
-
- fail(CLI.DL, DAG, "pass by value not supported ", Callee);
+ fail(CLI.DL, DAG, "pass by value not supported", Callee);
+ break;
}
auto PtrVT = getPointerTy(MF.getDataLayout());
@@ -415,16 +449,14 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass;
// Walk arg assignments
- for (unsigned i = 0,
- e = std::min(static_cast<unsigned>(ArgLocs.size()), MaxArgs);
- i != e; ++i) {
+ for (size_t i = 0; i < std::min(ArgLocs.size(), MaxArgs); ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = OutVals[i];
+ SDValue &Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
default:
- llvm_unreachable("Unknown loc info");
+ report_fatal_error("unhandled location info: " + Twine(VA.getLocInfo()));
case CCValAssign::Full:
break;
case CCValAssign::SExt:
@@ -442,7 +474,7 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (VA.isRegLoc())
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
else
- llvm_unreachable("call arg pass bug");
+ report_fatal_error("stack arguments are not supported");
}
SDValue InGlue;
@@ -463,9 +495,9 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
G->getOffset(), 0);
} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
- fail(CLI.DL, DAG, Twine("A call to built-in function '"
- + StringRef(E->getSymbol())
- + "' is not supported."));
+ fail(CLI.DL, DAG,
+ Twine("A call to built-in function '" + StringRef(E->getSymbol()) +
+ "' is not supported."));
}
// Returns a chain & a flag for retval copy to use.
@@ -513,7 +545,7 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
if (MF.getFunction().getReturnType()->isAggregateType()) {
- fail(DL, DAG, "only integer returns supported");
+ fail(DL, DAG, "aggregate returns are not supported");
return DAG.getNode(Opc, DL, MVT::Other, Chain);
}
@@ -524,9 +556,10 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ for (size_t i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
+ if (!VA.isRegLoc())
+ report_fatal_error("stack return values are not supported");
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);
@@ -555,10 +588,10 @@ SDValue BPFTargetLowering::LowerCallResult(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
- if (Ins.size() >= 2) {
+ if (Ins.size() > 1) {
fail(DL, DAG, "only small returns supported");
- for (unsigned i = 0, e = Ins.size(); i != e; ++i)
- InVals.push_back(DAG.getConstant(0, DL, Ins[i].VT));
+ for (auto &In : Ins)
+ InVals.push_back(DAG.getConstant(0, DL, In.VT));
return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1);
}
@@ -589,6 +622,21 @@ static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
}
}
+SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ fail(DL, DAG,
+ "unsupported signed division, please convert to unsigned div/mod.");
+ return DAG.getUNDEF(Op->getValueType(0));
+}
+
+SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ fail(DL, DAG, "unsupported dynamic stack allocation");
+ auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
+ return DAG.getMergeValues(Ops, SDLoc());
+}
+
SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -644,8 +692,10 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
- auto N = cast<GlobalAddressSDNode>(Op);
- assert(N->getOffset() == 0 && "Invalid offset for global address");
+ auto *N = cast<GlobalAddressSDNode>(Op);
+ if (N->getOffset() != 0)
+ report_fatal_error("invalid offset for global address: " +
+ Twine(N->getOffset()));
SDLoc DL(Op);
const GlobalValue *GV = N->getGlobal();
@@ -673,11 +723,15 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
- BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
- BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
- .addReg(PromotedReg0).addImm(32);
- BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
- .addReg(PromotedReg1).addImm(32);
+ if (HasMovsx) {
+ BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg);
+ } else {
+ BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
+ BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
+ .addReg(PromotedReg0).addImm(32);
+ BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
+ .addReg(PromotedReg1).addImm(32);
+ }
return PromotedReg2;
}
@@ -732,9 +786,8 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
Opc == BPF::Select_Ri_32 ||
Opc == BPF::Select_Ri_32_64);
-
- assert((isSelectRROp || isSelectRIOp || isMemcpyOp) &&
- "Unexpected instr type to insert");
+ if (!(isSelectRROp || isSelectRIOp || isMemcpyOp))
+ report_fatal_error("unhandled instruction type: " + Twine(Opc));
#endif
if (isMemcpyOp)
@@ -824,7 +877,8 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
} else {
int64_t imm32 = MI.getOperand(2).getImm();
// Check before we build J*_ri instruction.
- assert (isInt<32>(imm32));
+ if (!isInt<32>(imm32))
+ report_fatal_error("immediate overflows 32 bits: " + Twine(imm32));
BuildMI(BB, DL, TII.get(NewCC))
.addReg(LHS).addImm(imm32).addMBB(Copy1MBB);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h
index 9b6fe8531443..819711b650c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -71,7 +71,10 @@ private:
bool HasAlu32;
bool HasJmp32;
bool HasJmpExt;
+ bool HasMovsx;
+ SDValue LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -84,7 +87,7 @@ private:
SmallVectorImpl<SDValue> &InVals) const;
// Maximum number of arguments to a call
- static const unsigned MaxArgs;
+ static const size_t MaxArgs;
// Lower a call into CALLSEQ_START - BPFISD:CALL - CALLSEQ_END chain
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -143,6 +146,7 @@ private:
// For 32bit ALU result zext to 64bit is free.
bool isZExtFree(Type *Ty1, Type *Ty2) const override;
bool isZExtFree(EVT VT1, EVT VT2) const override;
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
unsigned EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, unsigned Reg,
bool isSigned) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td
index 27db0be080ae..6ed83d877ac0 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -63,6 +63,7 @@ def BPF_JA : BPFJumpOp<0x0>;
def BPF_JEQ : BPFJumpOp<0x1>;
def BPF_JGT : BPFJumpOp<0x2>;
def BPF_JGE : BPFJumpOp<0x3>;
+def BPF_JSET : BPFJumpOp<0x4>;
def BPF_JNE : BPFJumpOp<0x5>;
def BPF_JSGT : BPFJumpOp<0x6>;
def BPF_JSGE : BPFJumpOp<0x7>;
@@ -90,6 +91,7 @@ def BPF_IMM : BPFModeModifer<0x0>;
def BPF_ABS : BPFModeModifer<0x1>;
def BPF_IND : BPFModeModifer<0x2>;
def BPF_MEM : BPFModeModifer<0x3>;
+def BPF_MEMSX : BPFModeModifer<0x4>;
def BPF_ATOMIC : BPFModeModifer<0x6>;
class BPFAtomicFlag<bits<4> val> {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
index 27bd87667b84..7d443a344901 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -49,13 +49,29 @@ def BPFWrapper : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>;
def BPFmemcpy : SDNode<"BPFISD::MEMCPY", SDT_BPFMEMCPY,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
SDNPMayStore, SDNPMayLoad]>;
-def BPFIsLittleEndian : Predicate<"CurDAG->getDataLayout().isLittleEndian()">;
-def BPFIsBigEndian : Predicate<"!CurDAG->getDataLayout().isLittleEndian()">;
+def BPFIsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
+def BPFIsBigEndian : Predicate<"!Subtarget->isLittleEndian()">;
def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">;
def BPFNoALU32 : Predicate<"!Subtarget->getHasAlu32()">;
+def BPFHasLdsx : Predicate<"Subtarget->hasLdsx()">;
+def BPFHasMovsx : Predicate<"Subtarget->hasMovsx()">;
+def BPFHasBswap : Predicate<"Subtarget->hasBswap()">;
+def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">;
+def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">;
+def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">;
+def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">;
+
+class ImmediateAsmOperand<string name> : AsmOperandClass {
+ let Name = name;
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = !strconcat("Invalid", name);
+}
+
+def SImm16AsmOperand : ImmediateAsmOperand<"SImm16">;
def brtarget : Operand<OtherVT> {
let PrintMethod = "printBrTargetOperand";
+ let ParserMatchClass = ImmediateAsmOperand<"BrTarget">;
}
def calltarget : Operand<i64>;
@@ -63,10 +79,22 @@ def u64imm : Operand<i64> {
let PrintMethod = "printImm64Operand";
}
+def s16imm : Operand<i16> {
+ let ParserMatchClass = SImm16AsmOperand;
+}
+
+def gpr_or_imm : Operand<i64>;
+
def i64immSExt32 : PatLeaf<(i64 imm),
[{return isInt<32>(N->getSExtValue()); }]>;
def i32immSExt32 : PatLeaf<(i32 imm),
[{return isInt<32>(N->getSExtValue()); }]>;
+def i64immZExt32 : PatLeaf<(i64 imm),
+ [{return isUInt<32>(N->getZExtValue()); }]>;
+
+def imm_to_i64 : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64);
+}]>;
// Addressing modes.
def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [], []>;
@@ -77,7 +105,7 @@ def MEMri : Operand<i64> {
let PrintMethod = "printMemOperand";
let EncoderMethod = "getMemoryOpValue";
let DecoderMethod = "decodeMemoryOpValue";
- let MIOperandInfo = (ops GPR, i16imm);
+ let MIOperandInfo = (ops GPR, s16imm);
}
// Conditional code predicates - used for pattern matching for jump instructions
@@ -121,6 +149,7 @@ def BPF_CC_LTU_32 : PatLeaf<(i32 imm),
[{return (N->getZExtValue() == ISD::SETULT);}]>;
def BPF_CC_LEU_32 : PatLeaf<(i32 imm),
[{return (N->getZExtValue() == ISD::SETULE);}]>;
+def NoCond : PatLeaf<(vt)> {}
// For arithmetic and jump instructions the 8-bit 'code'
// field is divided into three parts:
@@ -237,21 +266,23 @@ defm JULT : J<BPF_JLT, "<", BPF_CC_LTU, BPF_CC_LTU_32>;
defm JULE : J<BPF_JLE, "<=", BPF_CC_LEU, BPF_CC_LEU_32>;
defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
+defm JSET : J<BPF_JSET, "&", NoCond, NoCond>;
}
// ALU instructions
-class ALU_RI<BPFOpClass Class, BPFArithOp Opc,
+class ALU_RI<BPFOpClass Class, BPFArithOp Opc, int off,
dag outs, dag ins, string asmstr, list<dag> pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value, outs, ins, asmstr, pattern> {
bits<4> dst;
bits<32> imm;
let Inst{51-48} = dst;
+ let Inst{47-32} = off;
let Inst{31-0} = imm;
let BPFClass = Class;
}
-class ALU_RR<BPFOpClass Class, BPFArithOp Opc,
+class ALU_RR<BPFOpClass Class, BPFArithOp Opc, int off,
dag outs, dag ins, string asmstr, list<dag> pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_X.Value, outs, ins, asmstr, pattern> {
bits<4> dst;
@@ -259,26 +290,27 @@ class ALU_RR<BPFOpClass Class, BPFArithOp Opc,
let Inst{55-52} = src;
let Inst{51-48} = dst;
+ let Inst{47-32} = off;
let BPFClass = Class;
}
-multiclass ALU<BPFArithOp Opc, string OpcodeStr, SDNode OpNode> {
- def _rr : ALU_RR<BPF_ALU64, Opc,
+multiclass ALU<BPFArithOp Opc, int off, string OpcodeStr, SDNode OpNode> {
+ def _rr : ALU_RR<BPF_ALU64, Opc, off,
(outs GPR:$dst),
(ins GPR:$src2, GPR:$src),
"$dst "#OpcodeStr#" $src",
[(set GPR:$dst, (OpNode i64:$src2, i64:$src))]>;
- def _ri : ALU_RI<BPF_ALU64, Opc,
+ def _ri : ALU_RI<BPF_ALU64, Opc, off,
(outs GPR:$dst),
(ins GPR:$src2, i64imm:$imm),
"$dst "#OpcodeStr#" $imm",
[(set GPR:$dst, (OpNode GPR:$src2, i64immSExt32:$imm))]>;
- def _rr_32 : ALU_RR<BPF_ALU, Opc,
+ def _rr_32 : ALU_RR<BPF_ALU, Opc, off,
(outs GPR32:$dst),
(ins GPR32:$src2, GPR32:$src),
"$dst "#OpcodeStr#" $src",
[(set GPR32:$dst, (OpNode i32:$src2, i32:$src))]>;
- def _ri_32 : ALU_RI<BPF_ALU, Opc,
+ def _ri_32 : ALU_RI<BPF_ALU, Opc, off,
(outs GPR32:$dst),
(ins GPR32:$src2, i32imm:$imm),
"$dst "#OpcodeStr#" $imm",
@@ -287,18 +319,23 @@ multiclass ALU<BPFArithOp Opc, string OpcodeStr, SDNode OpNode> {
let Constraints = "$dst = $src2" in {
let isAsCheapAsAMove = 1 in {
- defm ADD : ALU<BPF_ADD, "+=", add>;
- defm SUB : ALU<BPF_SUB, "-=", sub>;
- defm OR : ALU<BPF_OR, "|=", or>;
- defm AND : ALU<BPF_AND, "&=", and>;
- defm SLL : ALU<BPF_LSH, "<<=", shl>;
- defm SRL : ALU<BPF_RSH, ">>=", srl>;
- defm XOR : ALU<BPF_XOR, "^=", xor>;
- defm SRA : ALU<BPF_ARSH, "s>>=", sra>;
-}
- defm MUL : ALU<BPF_MUL, "*=", mul>;
- defm DIV : ALU<BPF_DIV, "/=", udiv>;
- defm MOD : ALU<BPF_MOD, "%=", urem>;
+ defm ADD : ALU<BPF_ADD, 0, "+=", add>;
+ defm SUB : ALU<BPF_SUB, 0, "-=", sub>;
+ defm OR : ALU<BPF_OR, 0, "|=", or>;
+ defm AND : ALU<BPF_AND, 0, "&=", and>;
+ defm SLL : ALU<BPF_LSH, 0, "<<=", shl>;
+ defm SRL : ALU<BPF_RSH, 0, ">>=", srl>;
+ defm XOR : ALU<BPF_XOR, 0, "^=", xor>;
+ defm SRA : ALU<BPF_ARSH, 0, "s>>=", sra>;
+}
+ defm MUL : ALU<BPF_MUL, 0, "*=", mul>;
+ defm DIV : ALU<BPF_DIV, 0, "/=", udiv>;
+ defm MOD : ALU<BPF_MOD, 0, "%=", urem>;
+
+ let Predicates = [BPFHasSdivSmod] in {
+ defm SDIV : ALU<BPF_DIV, 1, "s/=", sdiv>;
+ defm SMOD : ALU<BPF_MOD, 1, "s%=", srem>;
+ }
}
class NEG_RR<BPFOpClass Class, BPFArithOp Opc,
@@ -338,26 +375,49 @@ class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def LD_imm64 : LD_IMM64<0, "=">;
-def MOV_rr : ALU_RR<BPF_ALU64, BPF_MOV,
+def MOV_rr : ALU_RR<BPF_ALU64, BPF_MOV, 0,
(outs GPR:$dst),
(ins GPR:$src),
"$dst = $src",
[]>;
-def MOV_ri : ALU_RI<BPF_ALU64, BPF_MOV,
+def MOV_ri : ALU_RI<BPF_ALU64, BPF_MOV, 0,
(outs GPR:$dst),
(ins i64imm:$imm),
"$dst = $imm",
[(set GPR:$dst, (i64 i64immSExt32:$imm))]>;
-def MOV_rr_32 : ALU_RR<BPF_ALU, BPF_MOV,
+def MOV_rr_32 : ALU_RR<BPF_ALU, BPF_MOV, 0,
(outs GPR32:$dst),
(ins GPR32:$src),
"$dst = $src",
[]>;
-def MOV_ri_32 : ALU_RI<BPF_ALU, BPF_MOV,
+def MOV_ri_32 : ALU_RI<BPF_ALU, BPF_MOV, 0,
(outs GPR32:$dst),
(ins i32imm:$imm),
"$dst = $imm",
[(set GPR32:$dst, (i32 i32immSExt32:$imm))]>;
+
+let Predicates = [BPFHasMovsx] in {
+ def MOVSX_rr_8 : ALU_RR<BPF_ALU64, BPF_MOV, 8,
+ (outs GPR:$dst), (ins GPR:$src),
+ "$dst = (s8)$src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i8))]>;
+ def MOVSX_rr_16 : ALU_RR<BPF_ALU64, BPF_MOV, 16,
+ (outs GPR:$dst), (ins GPR:$src),
+ "$dst = (s16)$src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i16))]>;
+ def MOVSX_rr_32 : ALU_RR<BPF_ALU64, BPF_MOV, 32,
+ (outs GPR:$dst), (ins GPR:$src),
+ "$dst = (s32)$src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i32))]>;
+ def MOVSX_rr_32_8 : ALU_RR<BPF_ALU, BPF_MOV, 8,
+ (outs GPR32:$dst), (ins GPR32:$src),
+ "$dst = (s8)$src",
+ [(set GPR32:$dst, (sext_inreg GPR32:$src, i8))]>;
+ def MOVSX_rr_32_16 : ALU_RR<BPF_ALU, BPF_MOV, 16,
+ (outs GPR32:$dst), (ins GPR32:$src),
+ "$dst = (s16)$src",
+ [(set GPR32:$dst, (sext_inreg GPR32:$src, i16))]>;
+}
}
def FI_ri
@@ -411,7 +471,7 @@ class STORE<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
}
class STOREi64<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
- : STORE<Opc, OpcodeStr, [(OpNode i64:$src, ADDRri:$addr)]>;
+ : STORE<Opc, OpcodeStr, [(OpNode GPR:$src, ADDRri:$addr)]>;
let Predicates = [BPFNoALU32] in {
def STW : STOREi64<BPF_W, "u32", truncstorei32>;
@@ -420,9 +480,53 @@ let Predicates = [BPFNoALU32] in {
}
def STD : STOREi64<BPF_DW, "u64", store>;
-// LOAD instructions
-class LOAD<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
+class STORE_imm<BPFWidthModifer SizeOp,
+ string OpcodeStr, dag Pattern>
: TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
+ (outs),
+ (ins i64imm:$imm, MEMri:$addr),
+ "*("#OpcodeStr#" *)($addr) = $imm",
+ [Pattern]> {
+ bits<20> addr;
+ bits<32> imm;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{31-0} = imm;
+ let BPFClass = BPF_ST;
+}
+
+let Predicates = [BPFHasStoreImm] in {
+ // Opcode (BPF_ST | BPF_MEM | BPF_DW) implies sign extension for
+ // value stored to memory:
+ // - it is fine to generate such write when immediate is -1
+ // - it is incorrect to generate such write when immediate is
+ // +0xffff_ffff.
+ //
+ // In the latter case two instructions would be generated instead of
+ // one BPF_ST:
+ // rA = 0xffffffff ll ; LD_imm64
+ // *(u64 *)(rB + 0) = rA ; STX
+ //
+ // For BPF_{B,H,W} the size of value stored matches size of the immediate.
+ def STD_imm : STORE_imm<BPF_DW, "u64", (store (i64 i64immSExt32:$imm), ADDRri:$addr)>;
+ def STW_imm : STORE_imm<BPF_W, "u32", (truncstorei32 (i64 i64immZExt32:$imm), ADDRri:$addr)>;
+ def STH_imm : STORE_imm<BPF_H, "u16", (truncstorei16 (i64 i64immZExt32:$imm), ADDRri:$addr)>;
+ def STB_imm : STORE_imm<BPF_B, "u8", (truncstorei8 (i64 i64immZExt32:$imm), ADDRri:$addr)>;
+}
+
+let Predicates = [BPFHasALU32, BPFHasStoreImm] in {
+ def : Pat<(store (i32 imm:$src), ADDRri:$dst),
+ (STW_imm (imm_to_i64 $src), ADDRri:$dst)>;
+ def : Pat<(truncstorei16 (i32 imm:$src), ADDRri:$dst),
+ (STH_imm (imm_to_i64 imm:$src), ADDRri:$dst)>;
+ def : Pat<(truncstorei8 (i32 imm:$src), ADDRri:$dst),
+ (STB_imm (imm_to_i64 imm:$src), ADDRri:$dst)>;
+}
+
+// LOAD instructions
+class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
+ : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
(outs GPR:$dst),
(ins MEMri:$addr),
"$dst = *("#OpcodeStr#" *)($addr)",
@@ -436,22 +540,25 @@ class LOAD<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
let BPFClass = BPF_LDX;
}
-class LOADi64<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
- : LOAD<SizeOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
+class LOADi64<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
+ : LOAD<SizeOp, ModOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
let isCodeGenOnly = 1 in {
- def CORE_MEM : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
- (outs GPR:$dst),
- (ins u64imm:$opcode, GPR:$src, u64imm:$offset),
- "$dst = core_mem($opcode, $src, $offset)",
- []>;
- def CORE_ALU32_MEM : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
- (outs GPR32:$dst),
- (ins u64imm:$opcode, GPR:$src, u64imm:$offset),
- "$dst = core_alu32_mem($opcode, $src, $offset)",
- []>;
+ class CORE_LD<RegisterClass RegClass, string Sz>
+ : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
+ (outs RegClass:$dst),
+ (ins u64imm:$opcode, GPR:$src, u64imm:$offset),
+ "$dst = core_ld"#Sz#"($opcode, $src, $offset)",
+ []>;
+ def CORE_LD64 : CORE_LD<GPR, "64">;
+ def CORE_LD32 : CORE_LD<GPR32, "32">;
+ def CORE_ST : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
+ (outs),
+ (ins gpr_or_imm:$src, u64imm:$opcode, GPR:$ptr, u64imm:$offset),
+ "core_st($src, $opcode, $ptr, $offset)",
+ []>;
let Constraints = "$dst = $src" in {
- def CORE_SHIFT : ALU_RR<BPF_ALU64, BPF_LSH,
+ def CORE_SHIFT : ALU_RR<BPF_ALU64, BPF_LSH, 0,
(outs GPR:$dst),
(ins u64imm:$opcode, GPR:$src, u64imm:$offset),
"$dst = core_shift($opcode, $src, $offset)",
@@ -460,12 +567,18 @@ let isCodeGenOnly = 1 in {
}
let Predicates = [BPFNoALU32] in {
- def LDW : LOADi64<BPF_W, "u32", zextloadi32>;
- def LDH : LOADi64<BPF_H, "u16", zextloadi16>;
- def LDB : LOADi64<BPF_B, "u8", zextloadi8>;
+ def LDW : LOADi64<BPF_W, BPF_MEM, "u32", zextloadi32>;
+ def LDH : LOADi64<BPF_H, BPF_MEM, "u16", zextloadi16>;
+ def LDB : LOADi64<BPF_B, BPF_MEM, "u8", zextloadi8>;
}
-def LDD : LOADi64<BPF_DW, "u64", load>;
+let Predicates = [BPFHasLdsx] in {
+ def LDWSX : LOADi64<BPF_W, BPF_MEMSX, "s32", sextloadi32>;
+ def LDHSX : LOADi64<BPF_H, BPF_MEMSX, "s16", sextloadi16>;
+ def LDBSX : LOADi64<BPF_B, BPF_MEMSX, "s8", sextloadi8>;
+}
+
+def LDD : LOADi64<BPF_DW, BPF_MEM, "u64", load>;
class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
@@ -479,6 +592,18 @@ class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
let BPFClass = BPF_JMP;
}
+class BRANCH_LONG<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
+ : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
+ (outs),
+ (ins brtarget:$BrDst),
+ !strconcat(OpcodeStr, " $BrDst"),
+ Pattern> {
+ bits<32> BrDst;
+
+ let Inst{31-0} = BrDst;
+ let BPFClass = BPF_JMP32;
+}
+
class CALL<string OpcodeStr>
: TYPE_ALU_JMP<BPF_CALL.Value, BPF_K.Value,
(outs),
@@ -506,6 +631,7 @@ class CALLX<string OpcodeStr>
// Jump always
let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in {
def JMP : BRANCH<BPF_JA, "goto", [(br bb:$BrDst)]>;
+ def JMPL : BRANCH_LONG<BPF_JA, "gotol", []>;
}
// Jump and link
@@ -835,7 +961,7 @@ let Defs = [R0], Uses = [R0] in {
}
// bswap16, bswap32, bswap64
-class BSWAP<bits<32> SizeOp, string OpcodeStr, BPFSrcType SrcType, list<dag> Pattern>
+class BSWAP<BPFOpClass Class, bits<32> SizeOp, string OpcodeStr, BPFSrcType SrcType, list<dag> Pattern>
: TYPE_ALU_JMP<BPF_END.Value, SrcType.Value,
(outs GPR:$dst),
(ins GPR:$src),
@@ -845,21 +971,29 @@ class BSWAP<bits<32> SizeOp, string OpcodeStr, BPFSrcType SrcType, list<dag> Pat
let Inst{51-48} = dst;
let Inst{31-0} = SizeOp;
- let BPFClass = BPF_ALU;
+ let BPFClass = Class;
}
let Constraints = "$dst = $src" in {
+ let Predicates = [BPFHasBswap] in {
+ def BSWAP16 : BSWAP<BPF_ALU64, 16, "bswap16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+ def BSWAP32 : BSWAP<BPF_ALU64, 32, "bswap32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+ def BSWAP64 : BSWAP<BPF_ALU64, 64, "bswap64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>;
+ }
+
+ let Predicates = [BPFNoBswap] in {
let Predicates = [BPFIsLittleEndian] in {
- def BE16 : BSWAP<16, "be16", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
- def BE32 : BSWAP<32, "be32", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
- def BE64 : BSWAP<64, "be64", BPF_TO_BE, [(set GPR:$dst, (bswap GPR:$src))]>;
+ def BE16 : BSWAP<BPF_ALU, 16, "be16", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+ def BE32 : BSWAP<BPF_ALU, 32, "be32", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+ def BE64 : BSWAP<BPF_ALU, 64, "be64", BPF_TO_BE, [(set GPR:$dst, (bswap GPR:$src))]>;
}
let Predicates = [BPFIsBigEndian] in {
- def LE16 : BSWAP<16, "le16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
- def LE32 : BSWAP<32, "le32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
- def LE64 : BSWAP<64, "le64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>;
+ def LE16 : BSWAP<BPF_ALU, 16, "le16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+ def LE32 : BSWAP<BPF_ALU, 32, "le32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+ def LE64 : BSWAP<BPF_ALU, 64, "le64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>;
}
+ }
}
let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1,
@@ -898,13 +1032,20 @@ def LD_IND_H : LOAD_IND<BPF_H, "u16", int_bpf_load_half>;
def LD_IND_W : LOAD_IND<BPF_W, "u32", int_bpf_load_word>;
let isCodeGenOnly = 1 in {
- def MOV_32_64 : ALU_RR<BPF_ALU, BPF_MOV,
+ def MOV_32_64 : ALU_RR<BPF_ALU, BPF_MOV, 0,
(outs GPR:$dst), (ins GPR32:$src),
"$dst = $src", []>;
}
-def : Pat<(i64 (sext GPR32:$src)),
- (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>;
+let Predicates = [BPFNoMovsx] in {
+ def : Pat<(i64 (sext GPR32:$src)),
+ (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>;
+}
+
+let Predicates = [BPFHasMovsx] in {
+ def : Pat<(i64 (sext GPR32:$src)),
+ (MOVSX_rr_32 (MOV_32_64 GPR32:$src))>;
+}
def : Pat<(i64 (zext GPR32:$src)), (MOV_32_64 GPR32:$src)>;
@@ -932,7 +1073,7 @@ class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
}
class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
- : STORE32<Opc, OpcodeStr, [(OpNode i32:$src, ADDRri:$addr)]>;
+ : STORE32<Opc, OpcodeStr, [(OpNode GPR32:$src, ADDRri:$addr)]>;
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
def STW32 : STOREi32<BPF_W, "u32", store>;
@@ -940,8 +1081,8 @@ let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
def STB32 : STOREi32<BPF_B, "u8", truncstorei8>;
}
-class LOAD32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
- : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
+class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
+ : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
(outs GPR32:$dst),
(ins MEMri:$addr),
"$dst = *("#OpcodeStr#" *)($addr)",
@@ -955,13 +1096,13 @@ class LOAD32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
let BPFClass = BPF_LDX;
}
-class LOADi32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
- : LOAD32<SizeOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
+class LOADi32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
+ : LOAD32<SizeOp, ModOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
- def LDW32 : LOADi32<BPF_W, "u32", load>;
- def LDH32 : LOADi32<BPF_H, "u16", zextloadi16>;
- def LDB32 : LOADi32<BPF_B, "u8", zextloadi8>;
+ def LDW32 : LOADi32<BPF_W, BPF_MEM, "u32", load>;
+ def LDH32 : LOADi32<BPF_H, BPF_MEM, "u16", zextloadi16>;
+ def LDB32 : LOADi32<BPF_B, BPF_MEM, "u8", zextloadi8>;
}
let Predicates = [BPFHasALU32] in {
@@ -973,6 +1114,12 @@ let Predicates = [BPFHasALU32] in {
(STW32 (EXTRACT_SUBREG GPR:$src, sub_32), ADDRri:$dst)>;
def : Pat<(i32 (extloadi8 ADDRri:$src)), (i32 (LDB32 ADDRri:$src))>;
def : Pat<(i32 (extloadi16 ADDRri:$src)), (i32 (LDH32 ADDRri:$src))>;
+
+ let Predicates = [BPFHasLdsx] in {
+ def : Pat<(i32 (sextloadi8 ADDRri:$src)), (EXTRACT_SUBREG (LDBSX ADDRri:$src), sub_32)>;
+ def : Pat<(i32 (sextloadi16 ADDRri:$src)), (EXTRACT_SUBREG (LDHSX ADDRri:$src), sub_32)>;
+ }
+
def : Pat<(i64 (zextloadi8 ADDRri:$src)),
(SUBREG_TO_REG (i64 0), (LDB32 ADDRri:$src), sub_32)>;
def : Pat<(i64 (zextloadi16 ADDRri:$src)),
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index d0272bd97bed..f0edf706bd8f 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -34,6 +34,9 @@ using namespace llvm;
#define DEBUG_TYPE "bpf-mi-zext-elim"
+static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,
+ cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));
+
STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
namespace {
@@ -302,6 +305,8 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
static char ID;
MachineFunction *MF;
const TargetRegisterInfo *TRI;
+ const BPFInstrInfo *TII;
+ bool SupportGotol;
BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
@@ -311,7 +316,9 @@ private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
+ bool in16BitRange(int Num);
bool eliminateRedundantMov();
+ bool adjustBranch();
public:
@@ -322,14 +329,20 @@ public:
initialize(MF);
- return eliminateRedundantMov();
+ bool Changed;
+ Changed = eliminateRedundantMov();
+ if (SupportGotol)
+ Changed = adjustBranch() || Changed;
+ return Changed;
}
};
// Initialize class variables.
void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
+ TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
+ SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();
LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
}
@@ -374,190 +387,222 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
return Eliminated;
}
-} // end default namespace
-
-INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
- "BPF PreEmit Peephole Optimization", false, false)
-
-char BPFMIPreEmitPeephole::ID = 0;
-FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
-{
- return new BPFMIPreEmitPeephole();
+bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
+ // Well, the cut-off is not precisely at 16bit range since
+ // new codes are added during the transformation. So let us
+ // a little bit conservative.
+ return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;
}
-STATISTIC(TruncElemNum, "Number of truncation eliminated");
-
-namespace {
-
-struct BPFMIPeepholeTruncElim : public MachineFunctionPass {
-
- static char ID;
- const BPFInstrInfo *TII;
- MachineFunction *MF;
- MachineRegisterInfo *MRI;
-
- BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) {
- initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry());
- }
-
-private:
- // Initialize class variables.
- void initialize(MachineFunction &MFParm);
-
- bool eliminateTruncSeq();
-
-public:
-
- // Main entry point for this pass.
- bool runOnMachineFunction(MachineFunction &MF) override {
- if (skipFunction(MF.getFunction()))
- return false;
-
- initialize(MF);
-
- return eliminateTruncSeq();
- }
-};
-
-static bool TruncSizeCompatible(int TruncSize, unsigned opcode)
-{
- if (TruncSize == 1)
- return opcode == BPF::LDB || opcode == BPF::LDB32;
-
- if (TruncSize == 2)
- return opcode == BPF::LDH || opcode == BPF::LDH32;
-
- if (TruncSize == 4)
- return opcode == BPF::LDW || opcode == BPF::LDW32;
-
- return false;
-}
-
-// Initialize class variables.
-void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
- MF = &MFParm;
- MRI = &MF->getRegInfo();
- TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
- LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n");
-}
-
-// Reg truncating is often the result of 8/16/32bit->64bit or
-// 8/16bit->32bit conversion. If the reg value is loaded with
-// masked byte width, the AND operation can be removed since
-// BPF LOAD already has zero extension.
+// Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
+// is supported for both unconditional (JMP) and condition (JEQ, JSGT,
+// etc.) branches. In certain cases, e.g., full unrolling, the branch
+// target offset might exceed 16bit range. If this happens, the llvm
+// will generate incorrect code as the offset is truncated to 16bit.
//
-// This also solved a correctness issue.
-// In BPF socket-related program, e.g., __sk_buff->{data, data_end}
-// are 32-bit registers, but later on, kernel verifier will rewrite
-// it with 64-bit value. Therefore, truncating the value after the
-// load will result in incorrect code.
-bool BPFMIPeepholeTruncElim::eliminateTruncSeq() {
- MachineInstr* ToErase = nullptr;
- bool Eliminated = false;
-
+// To fix this rare case, a new insn JMPL is introduced. This new
+// insn supports supports 32bit branch target offset. The compiler
+// does not use this insn during insn selection. Rather, BPF backend
+// will estimate the branch target offset and do JMP -> JMPL and
+// JEQ -> JEQ + JMPL conversion if the estimated branch target offset
+// is beyond 16bit.
+bool BPFMIPreEmitPeephole::adjustBranch() {
+ bool Changed = false;
+ int CurrNumInsns = 0;
+ DenseMap<MachineBasicBlock *, int> SoFarNumInsns;
+ DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;
+ std::vector<MachineBasicBlock *> MBBs;
+
+ MachineBasicBlock *PrevBB = nullptr;
for (MachineBasicBlock &MBB : *MF) {
- for (MachineInstr &MI : MBB) {
- // The second insn to remove if the eliminate candidate is a pair.
- MachineInstr *MI2 = nullptr;
- Register DstReg, SrcReg;
- MachineInstr *DefMI;
- int TruncSize = -1;
-
- // If the previous instruction was marked for elimination, remove it now.
- if (ToErase) {
- ToErase->eraseFromParent();
- ToErase = nullptr;
+ // MBB.size() is the number of insns in this basic block, including some
+ // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
+ // Typically we have way more normal insns than DEBUG_VALUE insns.
+ // Also, if we indeed need to convert conditional branch like JEQ to
+ // JEQ + JMPL, we actually introduced some new insns like below.
+ CurrNumInsns += (int)MBB.size();
+ SoFarNumInsns[&MBB] = CurrNumInsns;
+ if (PrevBB != nullptr)
+ FollowThroughBB[PrevBB] = &MBB;
+ PrevBB = &MBB;
+ // A list of original BBs to make later traveral easier.
+ MBBs.push_back(&MBB);
+ }
+ FollowThroughBB[PrevBB] = nullptr;
+
+ for (unsigned i = 0; i < MBBs.size(); i++) {
+ // We have four cases here:
+ // (1). no terminator, simple follow through.
+ // (2). jmp to another bb.
+ // (3). conditional jmp to another bb or follow through.
+ // (4). conditional jmp followed by an unconditional jmp.
+ MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;
+
+ MachineBasicBlock *MBB = MBBs[i];
+ for (MachineInstr &Term : MBB->terminators()) {
+ if (Term.isConditionalBranch()) {
+ assert(CondJmp == nullptr);
+ CondJmp = &Term;
+ } else if (Term.isUnconditionalBranch()) {
+ assert(UncondJmp == nullptr);
+ UncondJmp = &Term;
}
+ }
- // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate
- // for BPF ANDI is i32, and this case only happens on ALU64.
- if (MI.getOpcode() == BPF::SRL_ri &&
- MI.getOperand(2).getImm() == 32) {
- SrcReg = MI.getOperand(1).getReg();
- if (!MRI->hasOneNonDBGUse(SrcReg))
- continue;
-
- MI2 = MRI->getVRegDef(SrcReg);
- DstReg = MI.getOperand(0).getReg();
-
- if (!MI2 ||
- MI2->getOpcode() != BPF::SLL_ri ||
- MI2->getOperand(2).getImm() != 32)
- continue;
-
- // Update SrcReg.
- SrcReg = MI2->getOperand(1).getReg();
- DefMI = MRI->getVRegDef(SrcReg);
- if (DefMI)
- TruncSize = 4;
- } else if (MI.getOpcode() == BPF::AND_ri ||
- MI.getOpcode() == BPF::AND_ri_32) {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- DefMI = MRI->getVRegDef(SrcReg);
-
- if (!DefMI)
- continue;
+ // (1). no terminator, simple follow through.
+ if (!CondJmp && !UncondJmp)
+ continue;
- int64_t imm = MI.getOperand(2).getImm();
- if (imm == 0xff)
- TruncSize = 1;
- else if (imm == 0xffff)
- TruncSize = 2;
- }
+ MachineBasicBlock *CondTargetBB, *JmpBB;
+ CurrNumInsns = SoFarNumInsns[MBB];
- if (TruncSize == -1)
+ // (2). jmp to another bb.
+ if (!CondJmp && UncondJmp) {
+ JmpBB = UncondJmp->getOperand(0).getMBB();
+ if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
continue;
- // The definition is PHI node, check all inputs.
- if (DefMI->isPHI()) {
- bool CheckFail = false;
-
- for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
- MachineOperand &opnd = DefMI->getOperand(i);
- if (!opnd.isReg()) {
- CheckFail = true;
- break;
- }
-
- MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
- if (!PhiDef || PhiDef->isPHI() ||
- !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) {
- CheckFail = true;
- break;
- }
- }
+ // replace this insn as a JMPL.
+ BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
+ UncondJmp->eraseFromParent();
+ Changed = true;
+ continue;
+ }
- if (CheckFail)
- continue;
- } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) {
+ const BasicBlock *TermBB = MBB->getBasicBlock();
+ int Dist;
+
+ // (3). conditional jmp to another bb or follow through.
+ if (!UncondJmp) {
+ CondTargetBB = CondJmp->getOperand(2).getMBB();
+ MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
+ Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
+ if (in16BitRange(Dist))
continue;
- }
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg)
- .addReg(SrcReg);
+ // We have
+ // B2: ...
+ // if (cond) goto B5
+ // B3: ...
+ // where B2 -> B5 is beyond 16bit range.
+ //
+ // We do not have 32bit cond jmp insn. So we try to do
+ // the following.
+ // B2: ...
+ // if (cond) goto New_B1
+ // New_B0 goto B3
+ // New_B1: gotol B5
+ // B3: ...
+ // Basically two new basic blocks are created.
+ MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB);
+ MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB);
+
+ // Insert New_B0 and New_B1 into function block list.
+ MachineFunction::iterator MBB_I = ++MBB->getIterator();
+ MF->insert(MBB_I, New_B0);
+ MF->insert(MBB_I, New_B1);
+
+ // replace B2 cond jump
+ if (CondJmp->getOperand(1).isReg())
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addReg(CondJmp->getOperand(1).getReg())
+ .addMBB(New_B1);
+ else
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addImm(CondJmp->getOperand(1).getImm())
+ .addMBB(New_B1);
+
+ // it is possible that CondTargetBB and FollowBB are the same. But the
+ // above Dist checking should already filtered this case.
+ MBB->removeSuccessor(CondTargetBB);
+ MBB->removeSuccessor(FollowBB);
+ MBB->addSuccessor(New_B0);
+ MBB->addSuccessor(New_B1);
+
+ // Populate insns in New_B0 and New_B1.
+ BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB);
+ BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL))
+ .addMBB(CondTargetBB);
+
+ New_B0->addSuccessor(FollowBB);
+ New_B1->addSuccessor(CondTargetBB);
+ CondJmp->eraseFromParent();
+ Changed = true;
+ continue;
+ }
- if (MI2)
- MI2->eraseFromParent();
+ // (4). conditional jmp followed by an unconditional jmp.
+ CondTargetBB = CondJmp->getOperand(2).getMBB();
+ JmpBB = UncondJmp->getOperand(0).getMBB();
+
+ // We have
+ // B2: ...
+ // if (cond) goto B5
+ // JMP B7
+ // B3: ...
+ //
+ // If only B2->B5 is out of 16bit range, we can do
+ // B2: ...
+ // if (cond) goto new_B
+ // JMP B7
+ // New_B: gotol B5
+ // B3: ...
+ //
+ // If only 'JMP B7' is out of 16bit range, we can replace
+ // 'JMP B7' with 'JMPL B7'.
+ //
+ // If both B2->B5 and 'JMP B7' is out of range, just do
+ // both the above transformations.
+ Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
+ if (!in16BitRange(Dist)) {
+ MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB);
+
+ // Insert New_B0 into function block list.
+ MF->insert(++MBB->getIterator(), New_B);
+
+ // replace B2 cond jump
+ if (CondJmp->getOperand(1).isReg())
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addReg(CondJmp->getOperand(1).getReg())
+ .addMBB(New_B);
+ else
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addImm(CondJmp->getOperand(1).getImm())
+ .addMBB(New_B);
+
+ if (CondTargetBB != JmpBB)
+ MBB->removeSuccessor(CondTargetBB);
+ MBB->addSuccessor(New_B);
+
+ // Populate insn in New_B.
+ BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB);
+
+ New_B->addSuccessor(CondTargetBB);
+ CondJmp->eraseFromParent();
+ Changed = true;
+ }
- // Mark it to ToErase, and erase in the next iteration.
- ToErase = &MI;
- TruncElemNum++;
- Eliminated = true;
+ if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) {
+ BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
+ UncondJmp->eraseFromParent();
+ Changed = true;
}
}
- return Eliminated;
+ return Changed;
}
} // end default namespace
-INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim",
- "BPF MachineSSA Peephole Optimization For TRUNC Eliminate",
- false, false)
+INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
+ "BPF PreEmit Peephole Optimization", false, false)
-char BPFMIPeepholeTruncElim::ID = 0;
-FunctionPass* llvm::createBPFMIPeepholeTruncElimPass()
+char BPFMIPreEmitPeephole::ID = 0;
+FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
{
- return new BPFMIPeepholeTruncElim();
+ return new BPFMIPreEmitPeephole();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 67574403ca83..2af150ad45c2 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -93,10 +93,35 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n");
}
+static bool isST(unsigned Opcode) {
+ return Opcode == BPF::STB_imm || Opcode == BPF::STH_imm ||
+ Opcode == BPF::STW_imm || Opcode == BPF::STD_imm;
+}
+
+static bool isSTX32(unsigned Opcode) {
+ return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32;
+}
+
+static bool isSTX64(unsigned Opcode) {
+ return Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW ||
+ Opcode == BPF::STD;
+}
+
+static bool isLDX32(unsigned Opcode) {
+ return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32;
+}
+
+static bool isLDX64(unsigned Opcode) {
+ return Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
+ Opcode == BPF::LDD;
+}
+
+static bool isLDSX(unsigned Opcode) {
+ return Opcode == BPF::LDBSX || Opcode == BPF::LDHSX || Opcode == BPF::LDWSX;
+}
+
bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) {
- return Opcode == BPF::LDD || Opcode == BPF::LDW || Opcode == BPF::LDH ||
- Opcode == BPF::LDB || Opcode == BPF::LDW32 || Opcode == BPF::LDH32 ||
- Opcode == BPF::LDB32;
+ return isLDX32(Opcode) || isLDX64(Opcode) || isLDSX(Opcode);
}
void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
@@ -117,14 +142,12 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
MachineInstr *DefInst = MO.getParent();
unsigned Opcode = DefInst->getOpcode();
unsigned COREOp;
- if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
- Opcode == BPF::LDD || Opcode == BPF::STB || Opcode == BPF::STH ||
- Opcode == BPF::STW || Opcode == BPF::STD)
- COREOp = BPF::CORE_MEM;
- else if (Opcode == BPF::LDB32 || Opcode == BPF::LDH32 ||
- Opcode == BPF::LDW32 || Opcode == BPF::STB32 ||
- Opcode == BPF::STH32 || Opcode == BPF::STW32)
- COREOp = BPF::CORE_ALU32_MEM;
+ if (isLDX64(Opcode) || isLDSX(Opcode))
+ COREOp = BPF::CORE_LD64;
+ else if (isLDX32(Opcode))
+ COREOp = BPF::CORE_LD32;
+ else if (isSTX64(Opcode) || isSTX32(Opcode) || isST(Opcode))
+ COREOp = BPF::CORE_ST;
else
continue;
@@ -136,9 +159,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
// Reject the form:
// %1 = ADD_rr %2, %3
// *(type *)(%2 + 0) = %1
- if (Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW ||
- Opcode == BPF::STD || Opcode == BPF::STB32 || Opcode == BPF::STH32 ||
- Opcode == BPF::STW32) {
+ if (isSTX64(Opcode) || isSTX32(Opcode)) {
const MachineOperand &Opnd = DefInst->getOperand(0);
if (Opnd.isReg() && Opnd.getReg() == MO.getReg())
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
index ec770eecb2e5..dae1aeea3521 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
@@ -13,6 +13,7 @@
#include "BPF.h"
#include "BPFCORE.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/BTF/BTF.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
@@ -56,7 +57,7 @@ static bool BPFPreserveDITypeImpl(Function &F) {
if (!GV)
continue;
- if (GV->getName().startswith("llvm.bpf.btf.type.id")) {
+ if (GV->getName().starts_with("llvm.bpf.btf.type.id")) {
if (!Call->getMetadata(LLVMContext::MD_preserve_access_index))
report_fatal_error(
"Missing metadata for llvm.bpf.btf.type.id intrinsic");
@@ -82,18 +83,19 @@ static bool BPFPreserveDITypeImpl(Function &F) {
uint32_t Reloc;
if (FlagValue == BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL_RELOC) {
- Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL;
+ Reloc = BTF::BTF_TYPE_ID_LOCAL;
} else {
- Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE;
- DIType *Ty = cast<DIType>(MD);
- while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
- unsigned Tag = DTy->getTag();
- if (Tag != dwarf::DW_TAG_const_type &&
- Tag != dwarf::DW_TAG_volatile_type)
- break;
- Ty = DTy->getBaseType();
- }
+ Reloc = BTF::BTF_TYPE_ID_REMOTE;
+ }
+ DIType *Ty = cast<DIType>(MD);
+ while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ unsigned Tag = DTy->getTag();
+ if (Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type)
+ break;
+ Ty = DTy->getBaseType();
+ }
+ if (Reloc == BTF::BTF_TYPE_ID_REMOTE) {
if (Ty->getName().empty()) {
if (isa<DISubroutineType>(Ty))
report_fatal_error(
@@ -101,8 +103,8 @@ static bool BPFPreserveDITypeImpl(Function &F) {
else
report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc");
}
- MD = Ty;
}
+ MD = Ty;
BasicBlock *BB = Call->getParent();
IntegerType *VarType = Type::getInt64Ty(BB->getContext());
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp
new file mode 100644
index 000000000000..c64fe00beaef
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp
@@ -0,0 +1,680 @@
+//===------ BPFPreserveStaticOffset.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// TLDR: replaces llvm.preserve.static.offset + GEP + load / store
+// with llvm.bpf.getelementptr.and.load / store
+//
+// This file implements BPFPreserveStaticOffsetPass transformation.
+// This transformation address two BPF verifier specific issues:
+//
+// (a) Access to the fields of some structural types is allowed only
+// using load and store instructions with static immediate offsets.
+//
+// Examples of such types are `struct __sk_buff` and `struct
+// bpf_sock_ops`. This is so because offsets of the fields of
+// these structures do not match real offsets in the running
+// kernel. During BPF program load LDX and STX instructions
+// referring to the fields of these types are rewritten so that
+// offsets match real offsets. For this rewrite to happen field
+// offsets have to be encoded as immediate operands of the
+// instructions.
+//
+// See kernel/bpf/verifier.c:convert_ctx_access function in the
+// Linux kernel source tree for details.
+//
+// (b) Pointers to context parameters of BPF programs must not be
+// modified before access.
+//
+// During BPF program verification a tag PTR_TO_CTX is tracked for
+// register values. In case if register with such tag is modified
+// BPF program is not allowed to read or write memory using this
+// register. See kernel/bpf/verifier.c:check_mem_access function
+// in the Linux kernel source tree for details.
+//
+// The following sequence of the IR instructions:
+//
+// %x = getelementptr %ptr, %constant_offset
+// %y = load %x
+//
+// Is translated as a single machine instruction:
+//
+// LDW %ptr, %constant_offset
+//
+// In order for cases (a) and (b) to work the sequence %x-%y above has
+// to be preserved by the IR passes.
+//
+// However, several optimization passes might sink `load` instruction
+// or hoist `getelementptr` instruction so that the instructions are
+// no longer in sequence. Examples of such passes are:
+// SimplifyCFGPass, InstCombinePass, GVNPass.
+// After such modification the verifier would reject the BPF program.
+//
+// To avoid this issue the patterns like (load/store (getelementptr ...))
+// are replaced by calls to BPF specific intrinsic functions:
+// - llvm.bpf.getelementptr.and.load
+// - llvm.bpf.getelementptr.and.store
+//
+// These calls are lowered back to (load/store (getelementptr ...))
+// by BPFCheckAndAdjustIR pass right before the translation from IR to
+// machine instructions.
+//
+// The transformation is split into the following steps:
+// - When IR is generated from AST the calls to intrinsic function
+// llvm.preserve.static.offset are inserted.
+// - BPFPreserveStaticOffsetPass is executed as early as possible
+// with AllowPatial set to true, this handles marked GEP chains
+// with constant offsets.
+// - BPFPreserveStaticOffsetPass is executed at ScalarOptimizerLateEPCallback
+// with AllowPatial set to false, this handles marked GEP chains
+// with offsets that became constant after loop unrolling, e.g.
+// to handle the following code:
+//
+// struct context { int x[4]; } __attribute__((preserve_static_offset));
+//
+// struct context *ctx = ...;
+// #pragma clang loop unroll(full)
+// for (int i = 0; i < 4; ++i)
+// foo(ctx->x[i]);
+//
+// The early BPFPreserveStaticOffsetPass run is necessary to allow
+// additional GVN / CSE opportunities after functions inlining.
+// The relative order of optimization applied to function:
+// - early stage (1)
+// - ...
+// - function inlining (2)
+// - ...
+// - loop unrolling
+// - ...
+// - ScalarOptimizerLateEPCallback (3)
+//
+// When function A is inlined into function B all optimizations for A
+// are already done, while some passes remain for B. In case if
+// BPFPreserveStaticOffsetPass is done at (3) but not done at (1)
+// the code after (2) would contain a mix of
+// (load (gep %p)) and (get.and.load %p) usages:
+// - the (load (gep %p)) would come from the calling function;
+// - the (get.and.load %p) would come from the callee function.
+// Thus clobbering CSE / GVN passes done after inlining.
+
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsBPF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define DEBUG_TYPE "bpf-preserve-static-offset"
+
+using namespace llvm;
+
+static const unsigned GepAndLoadFirstIdxArg = 6;
+static const unsigned GepAndStoreFirstIdxArg = 7;
+
+static bool isIntrinsicCall(Value *I, Intrinsic::ID Id) {
+ if (auto *Call = dyn_cast<CallInst>(I))
+ if (Function *Func = Call->getCalledFunction())
+ return Func->getIntrinsicID() == Id;
+ return false;
+}
+
+static bool isPreserveStaticOffsetCall(Value *I) {
+ return isIntrinsicCall(I, Intrinsic::preserve_static_offset);
+}
+
+static CallInst *isGEPAndLoad(Value *I) {
+ if (isIntrinsicCall(I, Intrinsic::bpf_getelementptr_and_load))
+ return cast<CallInst>(I);
+ return nullptr;
+}
+
+static CallInst *isGEPAndStore(Value *I) {
+ if (isIntrinsicCall(I, Intrinsic::bpf_getelementptr_and_store))
+ return cast<CallInst>(I);
+ return nullptr;
+}
+
+template <class T = Instruction>
+static DILocation *mergeDILocations(SmallVector<T *> &Insns) {
+ DILocation *Merged = (*Insns.begin())->getDebugLoc();
+ for (T *I : Insns)
+ Merged = DILocation::getMergedLocation(Merged, I->getDebugLoc());
+ return Merged;
+}
+
+static CallInst *makeIntrinsicCall(Module *M,
+ Intrinsic::BPFIntrinsics Intrinsic,
+ ArrayRef<Type *> Types,
+ ArrayRef<Value *> Args) {
+
+ Function *Fn = Intrinsic::getDeclaration(M, Intrinsic, Types);
+ return CallInst::Create(Fn, Args);
+}
+
+static void setParamElementType(CallInst *Call, unsigned ArgNo, Type *Type) {
+ LLVMContext &C = Call->getContext();
+ Call->addParamAttr(ArgNo, Attribute::get(C, Attribute::ElementType, Type));
+}
+
+static void setParamReadNone(CallInst *Call, unsigned ArgNo) {
+ LLVMContext &C = Call->getContext();
+ Call->addParamAttr(ArgNo, Attribute::get(C, Attribute::ReadNone));
+}
+
+static void setParamReadOnly(CallInst *Call, unsigned ArgNo) {
+ LLVMContext &C = Call->getContext();
+ Call->addParamAttr(ArgNo, Attribute::get(C, Attribute::ReadOnly));
+}
+
+static void setParamWriteOnly(CallInst *Call, unsigned ArgNo) {
+ LLVMContext &C = Call->getContext();
+ Call->addParamAttr(ArgNo, Attribute::get(C, Attribute::WriteOnly));
+}
+
+namespace {
+struct GEPChainInfo {
+ bool InBounds;
+ Type *SourceElementType;
+ SmallVector<Value *> Indices;
+ SmallVector<GetElementPtrInst *> Members;
+
+ GEPChainInfo() { reset(); }
+
+ void reset() {
+ InBounds = true;
+ SourceElementType = nullptr;
+ Indices.clear();
+ Members.clear();
+ }
+};
+} // Anonymous namespace
+
+template <class T = std::disjunction<LoadInst, StoreInst>>
+static void fillCommonArgs(LLVMContext &C, SmallVector<Value *> &Args,
+ GEPChainInfo &GEP, T *Insn) {
+ Type *Int8Ty = Type::getInt8Ty(C);
+ Type *Int1Ty = Type::getInt1Ty(C);
+ // Implementation of Align guarantees that ShiftValue < 64
+ unsigned AlignShiftValue = Log2_64(Insn->getAlign().value());
+ Args.push_back(GEP.Members[0]->getPointerOperand());
+ Args.push_back(ConstantInt::get(Int1Ty, Insn->isVolatile()));
+ Args.push_back(ConstantInt::get(Int8Ty, (unsigned)Insn->getOrdering()));
+ Args.push_back(ConstantInt::get(Int8Ty, (unsigned)Insn->getSyncScopeID()));
+ Args.push_back(ConstantInt::get(Int8Ty, AlignShiftValue));
+ Args.push_back(ConstantInt::get(Int1Ty, GEP.InBounds));
+ Args.append(GEP.Indices.begin(), GEP.Indices.end());
+}
+
+static Instruction *makeGEPAndLoad(Module *M, GEPChainInfo &GEP,
+ LoadInst *Load) {
+ SmallVector<Value *> Args;
+ fillCommonArgs(M->getContext(), Args, GEP, Load);
+ CallInst *Call = makeIntrinsicCall(M, Intrinsic::bpf_getelementptr_and_load,
+ {Load->getType()}, Args);
+ setParamElementType(Call, 0, GEP.SourceElementType);
+ Call->applyMergedLocation(mergeDILocations(GEP.Members), Load->getDebugLoc());
+ Call->setName((*GEP.Members.rbegin())->getName());
+ if (Load->isUnordered()) {
+ Call->setOnlyReadsMemory();
+ Call->setOnlyAccessesArgMemory();
+ setParamReadOnly(Call, 0);
+ }
+ for (unsigned I = GepAndLoadFirstIdxArg; I < Args.size(); ++I)
+ Call->addParamAttr(I, Attribute::ImmArg);
+ Call->setAAMetadata(Load->getAAMetadata());
+ return Call;
+}
+
+static Instruction *makeGEPAndStore(Module *M, GEPChainInfo &GEP,
+ StoreInst *Store) {
+ SmallVector<Value *> Args;
+ Args.push_back(Store->getValueOperand());
+ fillCommonArgs(M->getContext(), Args, GEP, Store);
+ CallInst *Call =
+ makeIntrinsicCall(M, Intrinsic::bpf_getelementptr_and_store,
+ {Store->getValueOperand()->getType()}, Args);
+ setParamElementType(Call, 1, GEP.SourceElementType);
+ if (Store->getValueOperand()->getType()->isPointerTy())
+ setParamReadNone(Call, 0);
+ Call->applyMergedLocation(mergeDILocations(GEP.Members),
+ Store->getDebugLoc());
+ if (Store->isUnordered()) {
+ Call->setOnlyWritesMemory();
+ Call->setOnlyAccessesArgMemory();
+ setParamWriteOnly(Call, 1);
+ }
+ for (unsigned I = GepAndStoreFirstIdxArg; I < Args.size(); ++I)
+ Call->addParamAttr(I, Attribute::ImmArg);
+ Call->setAAMetadata(Store->getAAMetadata());
+ return Call;
+}
+
+static unsigned getOperandAsUnsigned(CallInst *Call, unsigned ArgNo) {
+ if (auto *Int = dyn_cast<ConstantInt>(Call->getOperand(ArgNo)))
+ return Int->getValue().getZExtValue();
+ std::string Report;
+ raw_string_ostream ReportS(Report);
+ ReportS << "Expecting ConstantInt as argument #" << ArgNo << " of " << *Call
+ << "\n";
+ report_fatal_error(StringRef(Report));
+}
+
+static GetElementPtrInst *reconstructGEP(CallInst *Call, int Delta) {
+ SmallVector<Value *> Indices;
+ Indices.append(Call->data_operands_begin() + 6 + Delta,
+ Call->data_operands_end());
+ Type *GEPPointeeType = Call->getParamElementType(Delta);
+ auto *GEP =
+ GetElementPtrInst::Create(GEPPointeeType, Call->getOperand(Delta),
+ ArrayRef<Value *>(Indices), Call->getName());
+ GEP->setIsInBounds(getOperandAsUnsigned(Call, 5 + Delta));
+ return GEP;
+}
+
+template <class T = std::disjunction<LoadInst, StoreInst>>
+static void reconstructCommon(CallInst *Call, GetElementPtrInst *GEP, T *Insn,
+ int Delta) {
+ Insn->setVolatile(getOperandAsUnsigned(Call, 1 + Delta));
+ Insn->setOrdering((AtomicOrdering)getOperandAsUnsigned(Call, 2 + Delta));
+ Insn->setSyncScopeID(getOperandAsUnsigned(Call, 3 + Delta));
+ unsigned AlignShiftValue = getOperandAsUnsigned(Call, 4 + Delta);
+ Insn->setAlignment(Align(1ULL << AlignShiftValue));
+ GEP->setDebugLoc(Call->getDebugLoc());
+ Insn->setDebugLoc(Call->getDebugLoc());
+ Insn->setAAMetadata(Call->getAAMetadata());
+}
+
+std::pair<GetElementPtrInst *, LoadInst *>
+BPFPreserveStaticOffsetPass::reconstructLoad(CallInst *Call) {
+ GetElementPtrInst *GEP = reconstructGEP(Call, 0);
+ Type *ReturnType = Call->getFunctionType()->getReturnType();
+ auto *Load = new LoadInst(ReturnType, GEP, "",
+ /* These would be set in reconstructCommon */
+ false, Align(1));
+ reconstructCommon(Call, GEP, Load, 0);
+ return std::pair{GEP, Load};
+}
+
+std::pair<GetElementPtrInst *, StoreInst *>
+BPFPreserveStaticOffsetPass::reconstructStore(CallInst *Call) {
+ GetElementPtrInst *GEP = reconstructGEP(Call, 1);
+ auto *Store = new StoreInst(Call->getOperand(0), GEP,
+ /* These would be set in reconstructCommon */
+ false, Align(1));
+ reconstructCommon(Call, GEP, Store, 1);
+ return std::pair{GEP, Store};
+}
+
+static bool isZero(Value *V) {
+ auto *CI = dyn_cast<ConstantInt>(V);
+ return CI && CI->isZero();
+}
+
+// Given a chain of GEP instructions collect information necessary to
+// merge this chain as a single GEP instruction of form:
+// getelementptr %<type>, ptr %p, i32 0, <field_idx1>, <field_idx2>, ...
+static bool foldGEPChainAsStructAccess(SmallVector<GetElementPtrInst *> &GEPs,
+ GEPChainInfo &Info) {
+ if (GEPs.empty())
+ return false;
+
+ if (!all_of(GEPs, [=](GetElementPtrInst *GEP) {
+ return GEP->hasAllConstantIndices();
+ }))
+ return false;
+
+ GetElementPtrInst *First = GEPs[0];
+ Info.InBounds = First->isInBounds();
+ Info.SourceElementType = First->getSourceElementType();
+ Type *ResultElementType = First->getResultElementType();
+ Info.Indices.append(First->idx_begin(), First->idx_end());
+ Info.Members.push_back(First);
+
+ for (auto *Iter = GEPs.begin() + 1; Iter != GEPs.end(); ++Iter) {
+ GetElementPtrInst *GEP = *Iter;
+ if (!isZero(*GEP->idx_begin())) {
+ Info.reset();
+ return false;
+ }
+ if (!GEP->getSourceElementType() ||
+ GEP->getSourceElementType() != ResultElementType) {
+ Info.reset();
+ return false;
+ }
+ Info.InBounds &= GEP->isInBounds();
+ Info.Indices.append(GEP->idx_begin() + 1, GEP->idx_end());
+ Info.Members.push_back(GEP);
+ ResultElementType = GEP->getResultElementType();
+ }
+
+ return true;
+}
+
+// Given a chain of GEP instructions collect information necessary to
+// merge this chain as a single GEP instruction of form:
+// getelementptr i8, ptr %p, i64 %offset
+static bool foldGEPChainAsU8Access(SmallVector<GetElementPtrInst *> &GEPs,
+ GEPChainInfo &Info) {
+ if (GEPs.empty())
+ return false;
+
+ GetElementPtrInst *First = GEPs[0];
+ const DataLayout &DL = First->getModule()->getDataLayout();
+ LLVMContext &C = First->getContext();
+ Type *PtrTy = First->getType()->getScalarType();
+ APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
+ for (GetElementPtrInst *GEP : GEPs) {
+ if (!GEP->accumulateConstantOffset(DL, Offset)) {
+ Info.reset();
+ return false;
+ }
+ Info.InBounds &= GEP->isInBounds();
+ Info.Members.push_back(GEP);
+ }
+ Info.SourceElementType = Type::getInt8Ty(C);
+ Info.Indices.push_back(ConstantInt::get(C, Offset));
+
+ return true;
+}
+
+static void reportNonStaticGEPChain(Instruction *Insn) {
+ auto Msg = DiagnosticInfoUnsupported(
+ *Insn->getFunction(),
+ Twine("Non-constant offset in access to a field of a type marked "
+ "with preserve_static_offset might be rejected by BPF verifier")
+ .concat(Insn->getDebugLoc()
+ ? ""
+ : " (pass -g option to get exact location)"),
+ Insn->getDebugLoc(), DS_Warning);
+ Insn->getContext().diagnose(Msg);
+}
+
+static bool allZeroIndices(SmallVector<GetElementPtrInst *> &GEPs) {
+ return GEPs.empty() || all_of(GEPs, [=](GetElementPtrInst *GEP) {
+ return GEP->hasAllZeroIndices();
+ });
+}
+
+static bool tryToReplaceWithGEPBuiltin(Instruction *LoadOrStoreTemplate,
+ SmallVector<GetElementPtrInst *> &GEPs,
+ Instruction *InsnToReplace) {
+ GEPChainInfo GEPChain;
+ if (!foldGEPChainAsStructAccess(GEPs, GEPChain) &&
+ !foldGEPChainAsU8Access(GEPs, GEPChain)) {
+ return false;
+ }
+ Module *M = InsnToReplace->getModule();
+ if (auto *Load = dyn_cast<LoadInst>(LoadOrStoreTemplate)) {
+ Instruction *Replacement = makeGEPAndLoad(M, GEPChain, Load);
+ Replacement->insertBefore(InsnToReplace);
+ InsnToReplace->replaceAllUsesWith(Replacement);
+ }
+ if (auto *Store = dyn_cast<StoreInst>(LoadOrStoreTemplate)) {
+ Instruction *Replacement = makeGEPAndStore(M, GEPChain, Store);
+ Replacement->insertBefore(InsnToReplace);
+ }
+ return true;
+}
+
+// Check if U->getPointerOperand() == I
+static bool isPointerOperand(Value *I, User *U) {
+ if (auto *L = dyn_cast<LoadInst>(U))
+ return L->getPointerOperand() == I;
+ if (auto *S = dyn_cast<StoreInst>(U))
+ return S->getPointerOperand() == I;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
+ return GEP->getPointerOperand() == I;
+ if (auto *Call = isGEPAndLoad(U))
+ return Call->getArgOperand(0) == I;
+ if (auto *Call = isGEPAndStore(U))
+ return Call->getArgOperand(1) == I;
+ return false;
+}
+
+static bool isInlineableCall(User *U) {
+ if (auto *Call = dyn_cast<CallInst>(U))
+ return Call->hasFnAttr(Attribute::InlineHint);
+ return false;
+}
+
+static void rewriteAccessChain(Instruction *Insn,
+ SmallVector<GetElementPtrInst *> &GEPs,
+ SmallVector<Instruction *> &Visited,
+ bool AllowPatial, bool &StillUsed);
+
+static void rewriteUses(Instruction *Insn,
+ SmallVector<GetElementPtrInst *> &GEPs,
+ SmallVector<Instruction *> &Visited, bool AllowPatial,
+ bool &StillUsed) {
+ for (User *U : Insn->users()) {
+ auto *UI = dyn_cast<Instruction>(U);
+ if (UI && (isPointerOperand(Insn, UI) || isPreserveStaticOffsetCall(UI) ||
+ isInlineableCall(UI)))
+ rewriteAccessChain(UI, GEPs, Visited, AllowPatial, StillUsed);
+ else
+ LLVM_DEBUG({
+ llvm::dbgs() << "unsupported usage in BPFPreserveStaticOffsetPass:\n";
+ llvm::dbgs() << " Insn: " << *Insn << "\n";
+ llvm::dbgs() << " User: " << *U << "\n";
+ });
+ }
+}
+
+// A DFS traversal of GEP chain trees starting from Root.
+//
+// Recursion descends through GEP instructions and
+// llvm.preserve.static.offset calls. Recursion stops at any other
+// instruction. If load or store instruction is reached it is replaced
+// by a call to `llvm.bpf.getelementptr.and.load` or
+// `llvm.bpf.getelementptr.and.store` intrinsic.
+// If `llvm.bpf.getelementptr.and.load/store` is reached the accumulated
+// GEPs are merged into the intrinsic call.
+// If nested calls to `llvm.preserve.static.offset` are encountered these
+// calls are marked for deletion.
+//
+// Parameters description:
+// - Insn - current position in the tree
+// - GEPs - GEP instructions for the current branch
+// - Visited - a list of visited instructions in DFS order,
+// order is important for unused instruction deletion.
+// - AllowPartial - when true GEP chains that can't be folded are
+// not reported, otherwise diagnostic message is show for such chains.
+// - StillUsed - set to true if one of the GEP chains could not be
+// folded, makes sense when AllowPartial is false, means that root
+// preserve.static.offset call is still in use and should remain
+// until the next run of this pass.
+static void rewriteAccessChain(Instruction *Insn,
+ SmallVector<GetElementPtrInst *> &GEPs,
+ SmallVector<Instruction *> &Visited,
+ bool AllowPatial, bool &StillUsed) {
+ auto MarkAndTraverseUses = [&]() {
+ Visited.push_back(Insn);
+ rewriteUses(Insn, GEPs, Visited, AllowPatial, StillUsed);
+ };
+ auto TryToReplace = [&](Instruction *LoadOrStore) {
+ // Do nothing for (preserve.static.offset (load/store ..)) or for
+ // GEPs with zero indices. Such constructs lead to zero offset and
+ // are simplified by other passes.
+ if (allZeroIndices(GEPs))
+ return;
+ if (tryToReplaceWithGEPBuiltin(LoadOrStore, GEPs, Insn)) {
+ Visited.push_back(Insn);
+ return;
+ }
+ if (!AllowPatial)
+ reportNonStaticGEPChain(Insn);
+ StillUsed = true;
+ };
+ if (isa<LoadInst>(Insn) || isa<StoreInst>(Insn)) {
+ TryToReplace(Insn);
+ } else if (isGEPAndLoad(Insn)) {
+ auto [GEP, Load] =
+ BPFPreserveStaticOffsetPass::reconstructLoad(cast<CallInst>(Insn));
+ GEPs.push_back(GEP);
+ TryToReplace(Load);
+ GEPs.pop_back();
+ delete Load;
+ delete GEP;
+ } else if (isGEPAndStore(Insn)) {
+ // This case can't be merged with the above because
+ // `delete Load` / `delete Store` wants a concrete type,
+ // destructor of Instruction is protected.
+ auto [GEP, Store] =
+ BPFPreserveStaticOffsetPass::reconstructStore(cast<CallInst>(Insn));
+ GEPs.push_back(GEP);
+ TryToReplace(Store);
+ GEPs.pop_back();
+ delete Store;
+ delete GEP;
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Insn)) {
+ GEPs.push_back(GEP);
+ MarkAndTraverseUses();
+ GEPs.pop_back();
+ } else if (isPreserveStaticOffsetCall(Insn)) {
+ MarkAndTraverseUses();
+ } else if (isInlineableCall(Insn)) {
+ // Preserve preserve.static.offset call for parameters of
+ // functions that might be inlined. These would be removed on a
+ // second pass after inlining.
+ // Might happen when a pointer to a preserve_static_offset
+ // structure is passed as parameter of a function that would be
+ // inlined inside a loop that would be unrolled.
+ if (AllowPatial)
+ StillUsed = true;
+ } else {
+ SmallString<128> Buf;
+ raw_svector_ostream BufStream(Buf);
+ BufStream << *Insn;
+ report_fatal_error(
+ Twine("Unexpected rewriteAccessChain Insn = ").concat(Buf));
+ }
+}
+
+static void removeMarkerCall(Instruction *Marker) {
+ Marker->replaceAllUsesWith(Marker->getOperand(0));
+ Marker->eraseFromParent();
+}
+
+static bool rewriteAccessChain(Instruction *Marker, bool AllowPatial,
+ SmallPtrSetImpl<Instruction *> &RemovedMarkers) {
+ SmallVector<GetElementPtrInst *> GEPs;
+ SmallVector<Instruction *> Visited;
+ bool StillUsed = false;
+ rewriteUses(Marker, GEPs, Visited, AllowPatial, StillUsed);
+ // Check if Visited instructions could be removed, iterate in
+ // reverse to unblock instructions higher in the chain.
+ for (auto V = Visited.rbegin(); V != Visited.rend(); ++V) {
+ if (isPreserveStaticOffsetCall(*V)) {
+ removeMarkerCall(*V);
+ RemovedMarkers.insert(*V);
+ } else if ((*V)->use_empty()) {
+ (*V)->eraseFromParent();
+ }
+ }
+ return StillUsed;
+}
+
+static std::vector<Instruction *>
+collectPreserveStaticOffsetCalls(Function &F) {
+ std::vector<Instruction *> Calls;
+ for (Instruction &Insn : instructions(F))
+ if (isPreserveStaticOffsetCall(&Insn))
+ Calls.push_back(&Insn);
+ return Calls;
+}
+
+bool isPreserveArrayIndex(Value *V) {
+ return isIntrinsicCall(V, Intrinsic::preserve_array_access_index);
+}
+
+bool isPreserveStructIndex(Value *V) {
+ return isIntrinsicCall(V, Intrinsic::preserve_struct_access_index);
+}
+
+bool isPreserveUnionIndex(Value *V) {
+ return isIntrinsicCall(V, Intrinsic::preserve_union_access_index);
+}
+
+static void removePAICalls(Instruction *Marker) {
+ auto IsPointerOperand = [](Value *Op, User *U) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
+ return GEP->getPointerOperand() == Op;
+ if (isPreserveStaticOffsetCall(U) || isPreserveArrayIndex(U) ||
+ isPreserveStructIndex(U) || isPreserveUnionIndex(U))
+ return cast<CallInst>(U)->getArgOperand(0) == Op;
+ return false;
+ };
+
+ SmallVector<Value *, 32> WorkList;
+ WorkList.push_back(Marker);
+ do {
+ Value *V = WorkList.pop_back_val();
+ for (User *U : V->users())
+ if (IsPointerOperand(V, U))
+ WorkList.push_back(U);
+ auto *Call = dyn_cast<CallInst>(V);
+ if (!Call)
+ continue;
+ if (isPreserveArrayIndex(V))
+ BPFCoreSharedInfo::removeArrayAccessCall(Call);
+ else if (isPreserveStructIndex(V))
+ BPFCoreSharedInfo::removeStructAccessCall(Call);
+ else if (isPreserveUnionIndex(V))
+ BPFCoreSharedInfo::removeUnionAccessCall(Call);
+ } while (!WorkList.empty());
+}
+
+// Look for sequences:
+// - llvm.preserve.static.offset -> getelementptr... -> load
+// - llvm.preserve.static.offset -> getelementptr... -> store
+// And replace those with calls to intrinsics:
+// - llvm.bpf.getelementptr.and.load
+// - llvm.bpf.getelementptr.and.store
+static bool rewriteFunction(Function &F, bool AllowPartial) {
+ LLVM_DEBUG(dbgs() << "********** BPFPreserveStaticOffsetPass (AllowPartial="
+ << AllowPartial << ") ************\n");
+
+ auto MarkerCalls = collectPreserveStaticOffsetCalls(F);
+ SmallPtrSet<Instruction *, 16> RemovedMarkers;
+
+ LLVM_DEBUG(dbgs() << "There are " << MarkerCalls.size()
+ << " preserve.static.offset calls\n");
+
+ if (MarkerCalls.empty())
+ return false;
+
+ for (auto *Call : MarkerCalls)
+ removePAICalls(Call);
+
+ for (auto *Call : MarkerCalls) {
+ if (RemovedMarkers.contains(Call))
+ continue;
+ bool StillUsed = rewriteAccessChain(Call, AllowPartial, RemovedMarkers);
+ if (!StillUsed || !AllowPartial)
+ removeMarkerCall(Call);
+ }
+
+ return true;
+}
+
+PreservedAnalyses
+llvm::BPFPreserveStaticOffsetPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return rewriteFunction(F, AllowPartial) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
index d66933fef72d..9a8e42f32371 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -12,6 +12,10 @@
#include "BPFSubtarget.h"
#include "BPF.h"
+#include "BPFTargetMachine.h"
+#include "GISel/BPFCallLowering.h"
+#include "GISel/BPFLegalizerInfo.h"
+#include "GISel/BPFRegisterBankInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/TargetParser/Host.h"
@@ -23,6 +27,20 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "BPFGenSubtargetInfo.inc"
+static cl::opt<bool> Disable_ldsx("disable-ldsx", cl::Hidden, cl::init(false),
+ cl::desc("Disable ldsx insns"));
+static cl::opt<bool> Disable_movsx("disable-movsx", cl::Hidden, cl::init(false),
+ cl::desc("Disable movsx insns"));
+static cl::opt<bool> Disable_bswap("disable-bswap", cl::Hidden, cl::init(false),
+ cl::desc("Disable bswap insns"));
+static cl::opt<bool> Disable_sdiv_smod("disable-sdiv-smod", cl::Hidden,
+ cl::init(false), cl::desc("Disable sdiv/smod insns"));
+static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
+ cl::desc("Disable gotol insn"));
+static cl::opt<bool>
+ Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false),
+ cl::desc("Disable BPF_ST (immediate store) insn"));
+
void BPFSubtarget::anchor() {}
BPFSubtarget &BPFSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -38,6 +56,12 @@ void BPFSubtarget::initializeEnvironment() {
HasJmp32 = false;
HasAlu32 = false;
UseDwarfRIS = false;
+ HasLdsx = false;
+ HasMovsx = false;
+ HasBswap = false;
+ HasSdivSmod = false;
+ HasGotol = false;
+ HasStoreImm = false;
}
void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -55,10 +79,48 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
HasAlu32 = true;
return;
}
+ if (CPU == "v4") {
+ HasJmpExt = true;
+ HasJmp32 = true;
+ HasAlu32 = true;
+ HasLdsx = !Disable_ldsx;
+ HasMovsx = !Disable_movsx;
+ HasBswap = !Disable_bswap;
+ HasSdivSmod = !Disable_sdiv_smod;
+ HasGotol = !Disable_gotol;
+ HasStoreImm = !Disable_StoreImm;
+ return;
+ }
}
BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
: BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
FrameLowering(initializeSubtargetDependencies(CPU, FS)),
- TLInfo(TM, *this) {}
+ TLInfo(TM, *this) {
+ IsLittleEndian = TT.isLittleEndian();
+
+ CallLoweringInfo.reset(new BPFCallLowering(*getTargetLowering()));
+ Legalizer.reset(new BPFLegalizerInfo(*this));
+ auto *RBI = new BPFRegisterBankInfo(*getRegisterInfo());
+ RegBankInfo.reset(RBI);
+
+ InstSelector.reset(createBPFInstructionSelector(
+ *static_cast<const BPFTargetMachine *>(&TM), *this, *RBI));
+}
+
+const CallLowering *BPFSubtarget::getCallLowering() const {
+ return CallLoweringInfo.get();
+}
+
+InstructionSelector *BPFSubtarget::getInstructionSelector() const {
+ return InstSelector.get();
+}
+
+const LegalizerInfo *BPFSubtarget::getLegalizerInfo() const {
+ return Legalizer.get();
+}
+
+const RegisterBankInfo *BPFSubtarget::getRegBankInfo() const {
+ return RegBankInfo.get();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h
index 8f833b3c75d0..33747546eadc 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -16,7 +16,12 @@
#include "BPFFrameLowering.h"
#include "BPFISelLowering.h"
#include "BPFInstrInfo.h"
+#include "BPFRegisterInfo.h"
#include "BPFSelectionDAGInfo.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -43,6 +48,8 @@ protected:
// unused
bool isDummyMode;
+ bool IsLittleEndian;
+
// whether the cpu supports jmp ext
bool HasJmpExt;
@@ -56,6 +63,14 @@ protected:
// whether we should enable MCAsmInfo DwarfUsesRelocationsAcrossSections
bool UseDwarfRIS;
+ // whether cpu v4 insns are enabled.
+ bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm;
+
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
public:
// This constructor initializes the data members to match that
// of the specified triple.
@@ -71,6 +86,14 @@ public:
bool getHasJmp32() const { return HasJmp32; }
bool getHasAlu32() const { return HasAlu32; }
bool getUseDwarfRIS() const { return UseDwarfRIS; }
+ bool hasLdsx() const { return HasLdsx; }
+ bool hasMovsx() const { return HasMovsx; }
+ bool hasBswap() const { return HasBswap; }
+ bool hasSdivSmod() const { return HasSdivSmod; }
+ bool hasGotol() const { return HasGotol; }
+ bool hasStoreImm() const { return HasStoreImm; }
+
+ bool isLittleEndian() const { return IsLittleEndian; }
const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const BPFFrameLowering *getFrameLowering() const override {
@@ -82,9 +105,14 @@ public:
const BPFSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
- const TargetRegisterInfo *getRegisterInfo() const override {
+ const BPFRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
+
+ const CallLowering *getCallLowering() const override;
+ InstructionSelector *getInstructionSelector() const override;
+ const LegalizerInfo *getLegalizerInfo() const override;
+ const RegisterBankInfo *getRegBankInfo() const override;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index c47e8274b2e2..ab0db576f7f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -15,10 +15,15 @@
#include "BPFTargetTransformInfo.h"
#include "MCTargetDesc/BPFMCAsmInfo.h"
#include "TargetInfo/BPFTargetInfo.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormattedStream.h"
@@ -40,9 +45,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
RegisterTargetMachine<BPFTargetMachine> Z(getTheBPFTarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeGlobalISel(PR);
initializeBPFCheckAndAdjustIRPass(PR);
initializeBPFMIPeepholePass(PR);
- initializeBPFMIPeepholeTruncElimPass(PR);
initializeBPFDAGToDAGISelPass(PR);
}
@@ -63,7 +68,7 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
@@ -91,6 +96,11 @@ public:
bool addInstSelector() override;
void addMachineSSAOptimization() override;
void addPreEmitPass() override;
+
+ bool addIRTranslator() override;
+ bool addLegalizeMachineIR() override;
+ bool addRegBankSelect() override;
+ bool addGlobalInstructionSelect() override;
};
}
@@ -106,11 +116,16 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
FPM.addPass(BPFIRPeepholePass());
return true;
}
+ if (PassName == "bpf-preserve-static-offset") {
+ FPM.addPass(BPFPreserveStaticOffsetPass(false));
+ return true;
+ }
return false;
});
PB.registerPipelineStartEPCallback(
[=](ModulePassManager &MPM, OptimizationLevel) {
FunctionPassManager FPM;
+ FPM.addPass(BPFPreserveStaticOffsetPass(true));
FPM.addPass(BPFAbstractMemberAccessPass(this));
FPM.addPass(BPFPreserveDITypePass());
FPM.addPass(BPFIRPeepholePass());
@@ -120,6 +135,12 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
OptimizationLevel Level) {
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
});
+ PB.registerScalarOptimizerLateEPCallback(
+ [=](FunctionPassManager &FPM, OptimizationLevel Level) {
+ // Run this after loop unrolling but before
+ // SimplifyCFGPass(... .sinkCommonInsts(true))
+ FPM.addPass(BPFPreserveStaticOffsetPass(false));
+ });
PB.registerPipelineEarlySimplificationEPCallback(
[=](ModulePassManager &MPM, OptimizationLevel) {
MPM.addPass(BPFAdjustOptPass());
@@ -155,13 +176,32 @@ void BPFPassConfig::addMachineSSAOptimization() {
if (!DisableMIPeephole) {
if (Subtarget->getHasAlu32())
addPass(createBPFMIPeepholePass());
- addPass(createBPFMIPeepholeTruncElimPass());
}
}
void BPFPassConfig::addPreEmitPass() {
addPass(createBPFMIPreEmitCheckingPass());
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
if (!DisableMIPeephole)
addPass(createBPFMIPreEmitPeepholePass());
}
+
+bool BPFPassConfig::addIRTranslator() {
+ addPass(new IRTranslator());
+ return false;
+}
+
+bool BPFPassConfig::addLegalizeMachineIR() {
+ addPass(new Legalizer());
+ return false;
+}
+
+bool BPFPassConfig::addRegBankSelect() {
+ addPass(new RegBankSelect());
+ return false;
+}
+
+bool BPFPassConfig::addGlobalInstructionSelect() {
+ addPass(new InstructionSelect(getOptLevel()));
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h
index 1f22fccbfe2d..4e6adc722e76 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.h
@@ -26,7 +26,7 @@ public:
BPFTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
const BPFSubtarget *getSubtargetImpl() const { return &Subtarget; }
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
index 3c1422b0e1a2..ebd8447eba85 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -17,6 +17,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
@@ -976,7 +977,7 @@ std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
auto File = SP->getFile();
std::string FileName;
- if (!File->getFilename().startswith("/") && File->getDirectory().size())
+ if (!File->getFilename().starts_with("/") && File->getDirectory().size())
FileName = File->getDirectory().str() + "/" + File->getFilename().str();
else
FileName = std::string(File->getFilename());
@@ -1318,14 +1319,18 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
if (MI->isInlineAsm()) {
// Count the number of register definitions to find the asm string.
unsigned NumDefs = 0;
- for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
- ++NumDefs)
- ;
-
- // Skip this inline asm instruction if the asmstr is empty.
- const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
- if (AsmStr[0] == 0)
- return;
+ while (true) {
+ const MachineOperand &MO = MI->getOperand(NumDefs);
+ if (MO.isReg() && MO.isDef()) {
+ ++NumDefs;
+ continue;
+ }
+ // Skip this inline asm instruction if the asmstr is empty.
+ const char *AsmStr = MO.getSymbolName();
+ if (AsmStr[0] == 0)
+ return;
+ break;
+ }
}
if (MI->getOpcode() == BPF::LD_imm64) {
@@ -1344,8 +1349,9 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
// If the insn is "r2 = LD_imm64 @<an TypeIdAttr global>",
// The LD_imm64 result will be replaced with a btf type id.
processGlobalValue(MI->getOperand(1));
- } else if (MI->getOpcode() == BPF::CORE_MEM ||
- MI->getOpcode() == BPF::CORE_ALU32_MEM ||
+ } else if (MI->getOpcode() == BPF::CORE_LD64 ||
+ MI->getOpcode() == BPF::CORE_LD32 ||
+ MI->getOpcode() == BPF::CORE_ST ||
MI->getOpcode() == BPF::CORE_SHIFT) {
// relocation insn is a load, store or shift insn.
processGlobalValue(MI->getOperand(3));
@@ -1411,7 +1417,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
SecName = Sec->getName();
}
- if (ProcessingMapDef != SecName.startswith(".maps"))
+ if (ProcessingMapDef != SecName.starts_with(".maps"))
continue;
// Create a .rodata datasec if the global variable is an initialized
@@ -1437,7 +1443,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
DIGlobalVariable *DIGlobal = nullptr;
for (auto *GVE : GVs) {
DIGlobal = GVE->getVariable();
- if (SecName.startswith(".maps"))
+ if (SecName.starts_with(".maps"))
visitMapDefType(DIGlobal->getType(), GVTypeId);
else
visitTypeEntry(DIGlobal->getType(), GVTypeId, false, false);
@@ -1512,10 +1518,8 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
return false;
}
- if (Reloc == BPFCoreSharedInfo::ENUM_VALUE_EXISTENCE ||
- Reloc == BPFCoreSharedInfo::ENUM_VALUE ||
- Reloc == BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL ||
- Reloc == BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE)
+ if (Reloc == BTF::ENUM_VALUE_EXISTENCE || Reloc == BTF::ENUM_VALUE ||
+ Reloc == BTF::BTF_TYPE_ID_LOCAL || Reloc == BTF::BTF_TYPE_ID_REMOTE)
OutMI.setOpcode(BPF::LD_imm64);
else
OutMI.setOpcode(BPF::MOV_ri);
@@ -1524,8 +1528,9 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
return true;
}
}
- } else if (MI->getOpcode() == BPF::CORE_MEM ||
- MI->getOpcode() == BPF::CORE_ALU32_MEM ||
+ } else if (MI->getOpcode() == BPF::CORE_LD64 ||
+ MI->getOpcode() == BPF::CORE_LD32 ||
+ MI->getOpcode() == BPF::CORE_ST ||
MI->getOpcode() == BPF::CORE_SHIFT) {
const MachineOperand &MO = MI->getOperand(3);
if (MO.isGlobal()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 2565d8a0d763..536bee539384 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -57,8 +57,7 @@ public:
BPF_ABS = 0x1,
BPF_IND = 0x2,
BPF_MEM = 0x3,
- BPF_LEN = 0x4,
- BPF_MSH = 0x5,
+ BPF_MEMSX = 0x4,
BPF_ATOMIC = 0x6
};
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFCallLowering.cpp
new file mode 100644
index 000000000000..3829a1a3151f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFCallLowering.cpp
@@ -0,0 +1,46 @@
+//===-- BPFCallLowering.cpp - Call lowering for GlobalISel ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the lowering of LLVM calls to machine code calls for
+/// GlobalISel.
+///
+//===----------------------------------------------------------------------===//
+
+#include "BPFCallLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "bpf-call-lowering"
+
+using namespace llvm;
+
+BPFCallLowering::BPFCallLowering(const BPFTargetLowering &TLI)
+ : CallLowering(&TLI) {}
+
+bool BPFCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+ const Value *Val, ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI,
+ Register SwiftErrorVReg) const {
+ if (!VRegs.empty())
+ return false;
+ MIRBuilder.buildInstr(BPF::RET);
+ return true;
+}
+
+bool BPFCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
+ return VRegs.empty();
+}
+
+bool BPFCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const {
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFCallLowering.h b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFCallLowering.h
new file mode 100644
index 000000000000..0099d2048fe5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFCallLowering.h
@@ -0,0 +1,39 @@
+//===-- BPFCallLowering.h - Call lowering for GlobalISel --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes how to lower LLVM calls to machine code calls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_GISEL_BPFCALLLOWERING_H
+#define LLVM_LIB_TARGET_BPF_GISEL_BPFCALLLOWERING_H
+
+#include "BPFISelLowering.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+class BPFTargetLowering;
+
+class BPFCallLowering : public CallLowering {
+public:
+ BPFCallLowering(const BPFTargetLowering &TLI);
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
+ Register SwiftErrorVReg) const override;
+ bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFInstructionSelector.cpp
new file mode 100644
index 000000000000..c7db93a260c4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFInstructionSelector.cpp
@@ -0,0 +1,93 @@
+//===- BPFInstructionSelector.cpp --------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for BPF.
+//===----------------------------------------------------------------------===//
+
+#include "BPFInstrInfo.h"
+#include "BPFRegisterBankInfo.h"
+#include "BPFSubtarget.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/IntrinsicsBPF.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "bpf-gisel"
+
+using namespace llvm;
+
+namespace {
+
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "BPFGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
+class BPFInstructionSelector : public InstructionSelector {
+public:
+ BPFInstructionSelector(const BPFTargetMachine &TM, const BPFSubtarget &STI,
+ const BPFRegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) override;
+ static const char *getName() { return DEBUG_TYPE; }
+
+private:
+ /// tblgen generated 'select' implementation that is used as the initial
+ /// selector for the patterns that do not require complex C++.
+ bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+
+ const BPFInstrInfo &TII;
+ const BPFRegisterInfo &TRI;
+ const BPFRegisterBankInfo &RBI;
+
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "BPFGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
+
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "BPFGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+
+} // namespace
+
+#define GET_GLOBALISEL_IMPL
+#include "BPFGenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
+BPFInstructionSelector::BPFInstructionSelector(const BPFTargetMachine &TM,
+ const BPFSubtarget &STI,
+ const BPFRegisterBankInfo &RBI)
+ : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI),
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "BPFGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "BPFGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
+
+bool BPFInstructionSelector::select(MachineInstr &I) {
+ if (!isPreISelGenericOpcode(I.getOpcode()))
+ return true;
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+ return false;
+}
+
+namespace llvm {
+InstructionSelector *
+createBPFInstructionSelector(const BPFTargetMachine &TM,
+ const BPFSubtarget &Subtarget,
+ const BPFRegisterBankInfo &RBI) {
+ return new BPFInstructionSelector(TM, Subtarget, RBI);
+}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFLegalizerInfo.cpp
new file mode 100644
index 000000000000..04220c176376
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFLegalizerInfo.cpp
@@ -0,0 +1,22 @@
+//===- BPFLegalizerInfo.h ----------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for BPF
+//===----------------------------------------------------------------------===//
+
+#include "BPFLegalizerInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "bpf-legalinfo"
+
+using namespace llvm;
+using namespace LegalizeActions;
+
+BPFLegalizerInfo::BPFLegalizerInfo(const BPFSubtarget &ST) {
+ getLegacyLegalizerInfo().computeTables();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFLegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFLegalizerInfo.h
new file mode 100644
index 000000000000..1704bc03144c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFLegalizerInfo.h
@@ -0,0 +1,28 @@
+//===- BPFLegalizerInfo.h ----------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for BPF
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_GISEL_BPFMACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_BPF_GISEL_BPFMACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class BPFSubtarget;
+
+/// This class provides the information for the BPF target legalizer for
+/// GlobalISel.
+class BPFLegalizerInfo : public LegalizerInfo {
+public:
+ BPFLegalizerInfo(const BPFSubtarget &ST);
+};
+} // namespace llvm
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBankInfo.cpp
new file mode 100644
index 000000000000..f50e8f524a87
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBankInfo.cpp
@@ -0,0 +1,25 @@
+//===- BPFRegisterBankInfo.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for BPF
+//===----------------------------------------------------------------------===//
+
+#include "BPFRegisterBankInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "bpf-reg-bank-info"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "BPFGenRegisterBank.inc"
+
+using namespace llvm;
+
+BPFRegisterBankInfo::BPFRegisterBankInfo(const TargetRegisterInfo &TRI)
+ : BPFGenRegisterBankInfo() {}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBankInfo.h
new file mode 100644
index 000000000000..82421916ca5e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBankInfo.h
@@ -0,0 +1,39 @@
+//===-- BPFRegisterBankInfo.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for BPF.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_GISEL_BPFREGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_BPF_GISEL_BPFREGISTERBANKINFO_H
+
+#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_REGBANK_DECLARATIONS
+#include "BPFGenRegisterBank.inc"
+
+namespace llvm {
+class TargetRegisterInfo;
+
+class BPFGenRegisterBankInfo : public RegisterBankInfo {
+protected:
+#define GET_TARGET_REGBANK_CLASS
+#include "BPFGenRegisterBank.inc"
+};
+
+class BPFRegisterBankInfo final : public BPFGenRegisterBankInfo {
+public:
+ BPFRegisterBankInfo(const TargetRegisterInfo &TRI);
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBanks.td
new file mode 100644
index 000000000000..af4af40a2537
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/GISel/BPFRegisterBanks.td
@@ -0,0 +1,15 @@
+//===-- BPFRegisterBanks.td - Describe the BPF Banks -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Define the BPF register banks used for GlobalISel.
+///
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers
+def GPRRegBank : RegisterBank<"GPRB", [GPR]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 56fdd6766132..fccc4ee9f74a 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -6,12 +6,14 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/BPFMCFixups.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/Support/EndianStream.h"
#include <cassert>
@@ -23,7 +25,7 @@ namespace {
class BPFAsmBackend : public MCAsmBackend {
public:
- BPFAsmBackend(support::endianness Endian) : MCAsmBackend(Endian) {}
+ BPFAsmBackend(llvm::endianness Endian) : MCAsmBackend(Endian) {}
~BPFAsmBackend() override = default;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
@@ -41,7 +43,10 @@ public:
return false;
}
- unsigned getNumFixupKinds() const override { return 1; }
+ unsigned getNumFixupKinds() const override {
+ return BPF::NumTargetFixupKinds;
+ }
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
@@ -49,6 +54,20 @@ public:
} // end anonymous namespace
+const MCFixupKindInfo &
+BPFAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[BPF::NumTargetFixupKinds] = {
+ { "FK_BPF_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const {
if ((Count % 8) != 0)
@@ -78,13 +97,18 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
support::endian::write<uint64_t>(&Data[Fixup.getOffset()], Value, Endian);
} else if (Fixup.getKind() == FK_PCRel_4) {
Value = (uint32_t)((Value - 8) / 8);
- if (Endian == support::little) {
+ if (Endian == llvm::endianness::little) {
Data[Fixup.getOffset() + 1] = 0x10;
support::endian::write32le(&Data[Fixup.getOffset() + 4], Value);
} else {
Data[Fixup.getOffset() + 1] = 0x1;
support::endian::write32be(&Data[Fixup.getOffset() + 4], Value);
}
+ } else if (Fixup.getTargetKind() == BPF::FK_BPF_PCRel_4) {
+ // The input Value represents the number of bytes.
+ Value = (uint32_t)((Value - 8) / 8);
+ support::endian::write<uint32_t>(&Data[Fixup.getOffset() + 4], Value,
+ Endian);
} else {
assert(Fixup.getKind() == FK_PCRel_2);
@@ -107,12 +131,12 @@ MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &) {
- return new BPFAsmBackend(support::little);
+ return new BPFAsmBackend(llvm::endianness::little);
}
MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &) {
- return new BPFAsmBackend(support::big);
+ return new BPFAsmBackend(llvm::endianness::big);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
index 0761681c115b..c266538bec73 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
@@ -10,6 +10,8 @@
//
//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
#include "MCTargetDesc/BPFInstPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -34,15 +36,16 @@ void BPFInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
static void printExpr(const MCExpr *Expr, raw_ostream &O) {
-#ifndef NDEBUG
const MCSymbolRefExpr *SRE;
if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr))
SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
else
SRE = dyn_cast<MCSymbolRefExpr>(Expr);
- assert(SRE && "Unexpected MCExpr type.");
+ if (!SRE)
+ report_fatal_error("Unexpected MCExpr type.");
+#ifndef NDEBUG
MCSymbolRefExpr::VariantKind Kind = SRE->getKind();
assert(Kind == MCSymbolRefExpr::VK_None);
@@ -100,8 +103,13 @@ void BPFInstPrinter::printBrTargetOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm()) {
- int16_t Imm = Op.getImm();
- O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+ if (MI->getOpcode() == BPF::JMPL) {
+ int32_t Imm = Op.getImm();
+ O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+ } else {
+ int16_t Imm = Op.getImm();
+ O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+ }
} else if (Op.isExpr()) {
printExpr(Op.getExpr(), O);
} else {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index 4bc74b54a11d..b807d6904004 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/BPFMCFixups.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -95,6 +96,8 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_4));
else if (MI.getOpcode() == BPF::LD_imm64)
Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_8));
+ else if (MI.getOpcode() == BPF::JMPL)
+ Fixups.push_back(MCFixup::create(0, Expr, (MCFixupKind)BPF::FK_BPF_PCRel_4));
else
// bb label
Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_2));
@@ -113,8 +116,8 @@ void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI,
const MCSubtargetInfo &STI) const {
unsigned Opcode = MI.getOpcode();
raw_svector_ostream OS(CB);
- support::endian::Writer OSE(OS,
- IsLittleEndian ? support::little : support::big);
+ support::endian::Writer OSE(OS, IsLittleEndian ? llvm::endianness::little
+ : llvm::endianness::big);
if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) {
uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
new file mode 100644
index 000000000000..55bc8f90f126
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
@@ -0,0 +1,27 @@
+//=======-- BPFMCFixups.h - BPF-specific fixup entries ------*- C++ -*-=======//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace BPF {
+enum FixupKind {
+ // BPF specific relocations.
+ FK_BPF_PCRel_4 = FirstTargetFixupKind,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // end namespace BPF
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index e687650ab886..7dad40803d47 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -79,12 +79,15 @@ public:
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override {
// The target is the 3rd operand of cond inst and the 1st of uncond inst.
- int16_t Imm;
+ int32_t Imm;
if (isConditionalBranch(Inst)) {
- Imm = Inst.getOperand(2).getImm();
- } else if (isUnconditionalBranch(Inst))
- Imm = Inst.getOperand(0).getImm();
- else
+ Imm = (short)Inst.getOperand(2).getImm();
+ } else if (isUnconditionalBranch(Inst)) {
+ if (Inst.getOpcode() == BPF::JMP)
+ Imm = (short)Inst.getOperand(0).getImm();
+ else
+ Imm = (int)Inst.getOperand(0).getImm();
+ } else
return false;
Target = Addr + Size + Imm * Size;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index ea30e714a5b7..f12b79586baf 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -30,8 +30,7 @@ class MCSubtargetInfo;
class MCTargetOptions;
class Target;
-MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
- MCContext &Ctx);
+MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx);
MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
MCContext &Ctx);
@@ -43,7 +42,7 @@ MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCSubtargetInfo &STI,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectTargetWriter> createBPFELFObjectWriter(uint8_t OSABI);
-}
+} // namespace llvm
// Defines symbolic names for BPF registers. This defines a mapping from
// register name to register number.
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
index 19f33f38cbfd..4711e58bbed6 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
@@ -72,8 +72,7 @@ class CSKYAsmParser : public MCTargetAsmParser {
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -84,8 +83,8 @@ class CSKYAsmParser : public MCTargetAsmParser {
// possible, compression of the instruction is performed.
void emitToStreamer(MCStreamer &S, const MCInst &Inst);
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
MCStreamer &Out);
@@ -1001,24 +1000,24 @@ bool CSKYAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// Attempts to match Name as a register (either using the default name or
// alternative ABI names), setting RegNo to the matching register. Upon
// failure, returns true and sets RegNo to 0.
-static bool matchRegisterNameHelper(const MCSubtargetInfo &STI,
- MCRegister &RegNo, StringRef Name) {
- RegNo = MatchRegisterName(Name);
+static bool matchRegisterNameHelper(const MCSubtargetInfo &STI, MCRegister &Reg,
+ StringRef Name) {
+ Reg = MatchRegisterName(Name);
- if (RegNo == CSKY::NoRegister)
- RegNo = MatchRegisterAltName(Name);
+ if (Reg == CSKY::NoRegister)
+ Reg = MatchRegisterAltName(Name);
- return RegNo == CSKY::NoRegister;
+ return Reg == CSKY::NoRegister;
}
-bool CSKYAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool CSKYAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
StringRef Name = getLexer().getTok().getIdentifier();
- if (!matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name)) {
+ if (!matchRegisterNameHelper(getSTI(), Reg, Name)) {
getParser().Lex(); // Eat identifier token.
return false;
}
@@ -1035,13 +1034,13 @@ ParseStatus CSKYAsmParser::parseRegister(OperandVector &Operands) {
return ParseStatus::NoMatch;
case AsmToken::Identifier: {
StringRef Name = getLexer().getTok().getIdentifier();
- MCRegister RegNo;
+ MCRegister Reg;
- if (matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name))
+ if (matchRegisterNameHelper(getSTI(), Reg, Name))
return ParseStatus::NoMatch;
getLexer().Lex();
- Operands.push_back(CSKYOperand::createReg(RegNo, S, E));
+ Operands.push_back(CSKYOperand::createReg(Reg, S, E));
return ParseStatus::Success;
}
@@ -1514,20 +1513,19 @@ bool CSKYAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return false;
}
-OperandMatchResultTy CSKYAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus CSKYAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
StringRef Name = getLexer().getTok().getIdentifier();
- if (matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name))
- return MatchOperand_NoMatch;
+ if (matchRegisterNameHelper(getSTI(), Reg, Name))
+ return ParseStatus::NoMatch;
getParser().Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
ParseStatus CSKYAsmParser::parseDirective(AsmToken DirectiveID) {
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKY.h b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKY.h
index 871a7d7a2a07..7ca630c9abaa 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKY.h
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKY.h
@@ -23,7 +23,7 @@ class FunctionPass;
class PassRegistry;
FunctionPass *createCSKYISelDag(CSKYTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
FunctionPass *createCSKYConstantIslandPass();
void initializeCSKYConstantIslandsPass(PassRegistry &);
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
index 702053e02332..c0c23a45d155 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
@@ -30,7 +30,7 @@ class CSKYDAGToDAGISel : public SelectionDAGISel {
public:
static char ID;
- explicit CSKYDAGToDAGISel(CSKYTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ explicit CSKYDAGToDAGISel(CSKYTargetMachine &TM, CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, TM, OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -48,7 +48,8 @@ public:
SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
#include "CSKYGenDAGISel.inc"
@@ -116,7 +117,7 @@ void CSKYDAGToDAGISel::Select(SDNode *N) {
bool CSKYDAGToDAGISel::selectInlineAsm(SDNode *N) {
std::vector<SDValue> AsmNodeOperands;
- unsigned Flag, Kind;
+ InlineAsm::Flag Flag;
bool Changed = false;
unsigned NumOps = N->getNumOperands();
@@ -139,23 +140,22 @@ bool CSKYDAGToDAGISel::selectInlineAsm(SDNode *N) {
if (i < InlineAsm::Op_FirstOperand)
continue;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
- Flag = C->getZExtValue();
- Kind = InlineAsm::getKind(Flag);
- } else
+ if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
+ Flag = InlineAsm::Flag(C->getZExtValue());
+ else
continue;
// Immediate operands to inline asm in the SelectionDAG are modeled with
- // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+ // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
// the second is a constant with the value of the immediate. If we get here
- // and we have a Kind_Imm, skip the next operand, and continue.
- if (Kind == InlineAsm::Kind_Imm) {
+ // and we have a Kind::Imm, skip the next operand, and continue.
+ if (Flag.isImmKind()) {
SDValue op = N->getOperand(++i);
AsmNodeOperands.push_back(op);
continue;
}
- unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+ const unsigned NumRegs = Flag.getNumOperandRegisters();
if (NumRegs)
OpChanged.push_back(false);
@@ -163,26 +163,26 @@ bool CSKYDAGToDAGISel::selectInlineAsm(SDNode *N) {
bool IsTiedToChangedOp = false;
// If it's a use that is tied with a previous def, it has no
// reg class constraint.
- if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+ if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
IsTiedToChangedOp = OpChanged[DefIdx];
// Memory operands to inline asm in the SelectionDAG are modeled with two
- // operands: a constant of value InlineAsm::Kind_Mem followed by the input
- // operand. If we get here and we have a Kind_Mem, skip the next operand (so
- // it doesn't get misinterpreted), and continue. We do this here because
+ // operands: a constant of value InlineAsm::Kind::Mem followed by the input
+ // operand. If we get here and we have a Kind::Mem, skip the next operand
+ // (so it doesn't get misinterpreted), and continue. We do this here because
// it's important to update the OpChanged array correctly before moving on.
- if (Kind == InlineAsm::Kind_Mem) {
+ if (Flag.isMemKind()) {
SDValue op = N->getOperand(++i);
AsmNodeOperands.push_back(op);
continue;
}
- if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef &&
- Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
+ !Flag.isRegDefEarlyClobberKind())
continue;
unsigned RC;
- bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ const bool HasRC = Flag.hasRegClassConstraint(RC);
if ((!IsTiedToChangedOp && (!HasRC || RC != CSKY::GPRRegClassID)) ||
NumRegs != 2)
continue;
@@ -195,8 +195,7 @@ bool CSKYDAGToDAGISel::selectInlineAsm(SDNode *N) {
SDValue PairedReg;
MachineRegisterInfo &MRI = MF->getRegInfo();
- if (Kind == InlineAsm::Kind_RegDef ||
- Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
// the original GPRs.
@@ -222,7 +221,7 @@ bool CSKYDAGToDAGISel::selectInlineAsm(SDNode *N) {
Ops.push_back(T1.getValue(1));
CurDAG->UpdateNodeOperands(GU, Ops);
} else {
- // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
// GPRPair and then pass the GPRPair to the inline asm.
SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
@@ -247,11 +246,12 @@ bool CSKYDAGToDAGISel::selectInlineAsm(SDNode *N) {
if (PairedReg.getNode()) {
OpChanged[OpChanged.size() - 1] = true;
- Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ // TODO: maybe a setter for getNumOperandRegisters?
+ Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
if (IsTiedToChangedOp)
- Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ Flag.setMatchingOp(DefIdx);
else
- Flag = InlineAsm::getFlagWordForRegClass(Flag, CSKY::GPRPairRegClassID);
+ Flag.setRegClass(CSKY::GPRPairRegClassID);
// Replace the current flag.
AsmNodeOperands[AsmNodeOperands.size() - 1] =
CurDAG->getTargetConstant(Flag, dl, MVT::i32);
@@ -384,9 +384,10 @@ SDNode *CSKYDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
}
bool CSKYDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
switch (ConstraintID) {
- case InlineAsm::Constraint_m:
+ case InlineAsm::ConstraintCode::m:
// We just support simple memory operands that have a single address
// operand and need no special handling.
OutOps.push_back(Op);
@@ -399,6 +400,6 @@ bool CSKYDAGToDAGISel::SelectInlineAsmMemoryOperand(
}
FunctionPass *llvm::createCSKYISelDag(CSKYTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new CSKYDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index 5d21aab513dd..e3b4a2dc048a 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -59,7 +59,6 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::ROTR, MVT::i32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
@@ -103,6 +102,7 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
if (!Subtarget.has2E3()) {
setOperationAction(ISD::ABS, MVT::i32, Expand);
setOperationAction(ISD::BITREVERSE, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
@@ -1397,7 +1397,22 @@ bool CSKYTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
// unchanged on sub targets with MULT32, since not sure it is better.
if (!Subtarget.hasE2() && (-1 - Imm).isPowerOf2())
return true;
+ // Break (MULT x, imm) to ([IXH32|IXW32|IXD32] (LSLI32 x, i0), x) when
+ // imm=(1<<i0)+[2|4|8] and imm has to be composed via a MOVIH32/ORI32 pair.
+ if (Imm.ugt(0xffff) && ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2()) &&
+ Subtarget.hasE2())
+ return true;
+ if (Imm.ugt(0xffff) && (Imm - 8).isPowerOf2() && Subtarget.has2E3())
+ return true;
}
return false;
}
+
+bool CSKYTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
+ return Subtarget.has2E3();
+}
+
+bool CSKYTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
+ return Subtarget.hasE2();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.h b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.h
index c724882c6042..d59481af3c5b 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.h
@@ -176,6 +176,8 @@ private:
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const override;
+ bool isCheapToSpeculateCttz(Type *Ty) const override;
+ bool isCheapToSpeculateCtlz(Type *Ty) const override;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index 549c883c34a7..c6bfc2495ae2 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -158,6 +158,30 @@ def uimm_shift : Operand<i32>, ImmLeaf<i32, "return isUInt<2>(Imm);"> {
let DecoderMethod = "decodeImmShiftOpValue";
}
+// Optimize (or x, imm) to (BSETI x, log2(imm)). We should exclude the
+// case can be opimized to (ORI32/ORI16 x, imm).
+def imm32_1_pop_bit_XFORM : SDNodeXForm<imm, [{
+ uint32_t I = N->getZExtValue();
+ return CurDAG->getTargetConstant(llvm::Log2_32(I), SDLoc(N),
+ N->getValueType(0));
+}]>;
+def imm32_1_pop_bit : PatLeaf<(imm), [{
+ uint32_t I = N->getZExtValue();
+ return llvm::popcount(I) == 1 && I > 0xfff;
+}]>;
+
+// Optimize (and x, imm) to (BCLRI x, log2(~imm)). We should exclude the
+// case can be opimized to (ANDNI x, ~imm).
+def imm32_1_zero_bit_XFORM : SDNodeXForm<imm, [{
+ uint32_t I = ~N->getZExtValue();
+ return CurDAG->getTargetConstant(llvm::Log2_32(I), SDLoc(N),
+ N->getValueType(0));
+}]>;
+def imm32_1_zero_bit : PatLeaf<(imm), [{
+ uint32_t I = ~N->getZExtValue();
+ return llvm::popcount(I) == 1 && I > 0xfff;
+}]>;
+
def CSKYSymbol : AsmOperandClass {
let Name = "CSKYSymbol";
let RenderMethod = "addImmOperands";
@@ -1178,6 +1202,13 @@ multiclass BTF32Pat0<PatFrag cond0, PatFrag cond1, ImmLeaf imm_ty, Instruction i
defm : BTF32Pat0<setne, seteq, uimm16, CMPNEI32>;
defm : BTF32Pat0<setuge, setult, oimm16, CMPHSI32>;
defm : BTF32Pat0<setlt, setge, oimm16, CMPLTI32>;
+
+def : Pat<(brcond (i32 (setne (and GPR:$rs, imm32_1_pop_bit:$im), 0)), bb:$imm16),
+ (BT32 (BTSTI32 GPR:$rs, (imm32_1_pop_bit_XFORM imm32_1_pop_bit:$im)),
+ bb:$imm16)>;
+def : Pat<(brcond (i32 (seteq (and GPR:$rs, imm32_1_pop_bit:$im), 0)), bb:$imm16),
+ (BF32 (BTSTI32 GPR:$rs, (imm32_1_pop_bit_XFORM imm32_1_pop_bit:$im)),
+ bb:$imm16)>;
}
let Predicates = [iHas2E3] in {
@@ -1226,6 +1257,8 @@ def : Pat<(brcond (i32 (setle GPR:$rs1, (i32 -1))), bb:$imm16),
let Predicates = [iHas2E3] in {
def : Pat<(setne GPR:$rs1, GPR:$rs2),
(CMPNE32 GPR:$rs1, GPR:$rs2)>;
+ def : Pat<(setne (and GPR:$rs, imm32_1_pop_bit:$im), 0),
+ (BTSTI32 GPR:$rs, (imm32_1_pop_bit_XFORM imm32_1_pop_bit:$im))>;
def : Pat<(i32 (seteq GPR:$rs1, GPR:$rs2)),
(MVCV32 (CMPNE32 GPR:$rs1, GPR:$rs2))>;
def : Pat<(setuge GPR:$rs1, GPR:$rs2),
@@ -1342,8 +1375,16 @@ def : Pat<(select CARRY:$ca, GPR:$rx, GPR:$false),
(ISEL32 CARRY:$ca, GPR:$rx, GPR:$false)>;
def : Pat<(select (and CARRY:$ca, 1), GPR:$rx, GPR:$false),
(ISEL32 CARRY:$ca, GPR:$rx, GPR:$false)>;
-}
+def : Pat<(select (i32 (setne (and GPR:$rs, imm32_1_pop_bit:$im), 0)),
+ GPR:$true, GPR:$false),
+ (MOVT32 (BTSTI32 GPR:$rs, (imm32_1_pop_bit_XFORM imm32_1_pop_bit:$im)),
+ GPR:$true, GPR:$false)>;
+def : Pat<(select (i32 (seteq (and GPR:$rs, imm32_1_pop_bit:$im), 0)),
+ GPR:$true, GPR:$false),
+ (MOVF32 (BTSTI32 GPR:$rs, (imm32_1_pop_bit_XFORM imm32_1_pop_bit:$im)),
+ GPR:$true, GPR:$false)>;
+}
let Predicates = [iHas2E3] in {
def : Pat<(select (i32 (setne GPR:$rs1, GPR:$rs2)), (add GPR:$rx, uimm5:$imm), GPR:$false),
@@ -1422,6 +1463,14 @@ let Predicates = [iHasE2] in
def : Pat<(i32 imm:$imm),
(ORI32 (MOVIH32 (uimm32_hi16 imm:$imm)), (uimm32_lo16 imm:$imm))>;
+// Bit operations.
+let Predicates = [iHasE2] in {
+ def : Pat<(or GPR:$rs, imm32_1_pop_bit:$imm),
+ (BSETI32 GPR:$rs, (imm32_1_pop_bit_XFORM imm32_1_pop_bit:$imm))>;
+ def : Pat<(and GPR:$rs, imm32_1_zero_bit:$imm),
+ (BCLRI32 GPR:$rs, (imm32_1_zero_bit_XFORM imm32_1_zero_bit:$imm))>;
+}
+
// Other operations.
let Predicates = [iHasE2] in {
def : Pat<(rotl GPR:$rs1, GPR:$rs2),
@@ -1429,6 +1478,7 @@ let Predicates = [iHasE2] in {
let Predicates = [iHas2E3] in {
def : Pat<(bitreverse GPR:$rx), (BREV32 GPR:$rx)>;
def : Pat<(bswap GPR:$rx), (REVB32 GPR:$rx)>;
+ def : Pat<(i32 (cttz GPR:$rx)), (FF1 (BREV32 GPR:$rx))>;
}
def : Pat<(i32 (ctlz GPR:$rx)), (FF1 GPR:$rx)>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td
index 30cef024f35a..2b6ad9f170b0 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td
@@ -141,6 +141,16 @@ defm FCMPZUO : FT_CMPZX<0b001011, "fcmpzuo">;
defm FRECIP : FT_MOV<0b011001, "frecip">;
+// multiplication
+let Predicates = [HasFPUv2_SF] in {
+ def : Pat<(f32 (fmul (fneg sFPR32Op:$vrx), sFPR32Op:$vry)),
+ (FNMUL_S sFPR32Op:$vrx, sFPR32Op:$vry)>;
+}
+let Predicates = [HasFPUv2_DF] in {
+ def : Pat<(f64 (fmul (fneg sFPR64Op:$vrx), sFPR64Op:$vry)),
+ (FNMUL_D sFPR64Op:$vrx, sFPR64Op:$vry)>;
+}
+
//fmov, fmtvr, fmfvr
defm FMOV : FT_MOV<0b000100, "fmov">;
def FMFVRL : F_XZ_GF<3, 0b011001, (outs GPR:$rz), (ins sFPR32Op:$vrx),
@@ -298,13 +308,19 @@ def : Pat<(f32 fpimm:$imm), (COPY_TO_REGCLASS (ORI32 (MOVIH32 (fpimm32_hi16 fpim
def : Pat<(f64(CSKY_BITCAST_FROM_LOHI GPR:$rs1, GPR:$rs2)), (FMTVRH_D(FMTVRL_D GPR:$rs1), GPR:$rs2)>,
Requires<[HasFPUv2_DF]>;
-multiclass BRCond_Bin<CondCode CC, string Instr, Instruction Br, Instruction MV> {
+multiclass BRCond_Bin<CondCode CC, string Instr, Instruction Br0, Instruction Br1, Instruction MV> {
let Predicates = [HasFPUv2_SF] in
def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)), bb:$imm16),
- (Br (!cast<Instruction>(Instr#_S) sFPR32Op:$rs1, sFPR32Op:$rs2), bb:$imm16)>;
+ (Br0 (!cast<Instruction>(Instr#_S) sFPR32Op:$rs1, sFPR32Op:$rs2), bb:$imm16)>;
+ let Predicates = [HasFPUv2_SF] in
+ def : Pat<(brcond (xor (i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)), 1), bb:$imm16),
+ (Br1 (!cast<Instruction>(Instr#_S) sFPR32Op:$rs1, sFPR32Op:$rs2), bb:$imm16)>;
let Predicates = [HasFPUv2_DF] in
def : Pat<(brcond (i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)), bb:$imm16),
- (Br (!cast<Instruction>(Instr#_D) sFPR64Op:$rs1, sFPR64Op:$rs2), bb:$imm16)>;
+ (Br0 (!cast<Instruction>(Instr#_D) sFPR64Op:$rs1, sFPR64Op:$rs2), bb:$imm16)>;
+ let Predicates = [HasFPUv2_DF] in
+ def : Pat<(brcond (xor (i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)), 1), bb:$imm16),
+ (Br1 (!cast<Instruction>(Instr#_D) sFPR64Op:$rs1, sFPR64Op:$rs2), bb:$imm16)>;
let Predicates = [HasFPUv2_SF] in
def : Pat<(i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)),
@@ -314,13 +330,19 @@ multiclass BRCond_Bin<CondCode CC, string Instr, Instruction Br, Instruction MV>
(MV (!cast<Instruction>(Instr#_D) sFPR64Op:$rs1, sFPR64Op:$rs2))>;
}
-multiclass BRCond_Bin_SWAP<CondCode CC, string Instr, Instruction Br, Instruction MV> {
+multiclass BRCond_Bin_SWAP<CondCode CC, string Instr, Instruction Br0, Instruction Br1, Instruction MV> {
let Predicates = [HasFPUv2_SF] in
def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)), bb:$imm16),
- (Br (!cast<Instruction>(Instr#_S) sFPR32Op:$rs2, sFPR32Op:$rs1), bb:$imm16)>;
+ (Br0 (!cast<Instruction>(Instr#_S) sFPR32Op:$rs2, sFPR32Op:$rs1), bb:$imm16)>;
+ let Predicates = [HasFPUv2_SF] in
+ def : Pat<(brcond (xor (i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)), 1), bb:$imm16),
+ (Br1 (!cast<Instruction>(Instr#_S) sFPR32Op:$rs2, sFPR32Op:$rs1), bb:$imm16)>;
let Predicates = [HasFPUv2_DF] in
def : Pat<(brcond (i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)), bb:$imm16),
- (Br (!cast<Instruction>(Instr#_D) sFPR64Op:$rs2, sFPR64Op:$rs1), bb:$imm16)>;
+ (Br0 (!cast<Instruction>(Instr#_D) sFPR64Op:$rs2, sFPR64Op:$rs1), bb:$imm16)>;
+ let Predicates = [HasFPUv2_DF] in
+ def : Pat<(brcond (xor (i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)), 1), bb:$imm16),
+ (Br1 (!cast<Instruction>(Instr#_D) sFPR64Op:$rs2, sFPR64Op:$rs1), bb:$imm16)>;
let Predicates = [HasFPUv2_SF] in
def : Pat<(i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)),
@@ -332,21 +354,21 @@ multiclass BRCond_Bin_SWAP<CondCode CC, string Instr, Instruction Br, Instructio
// inverse (order && compare) to (unorder || inverse(compare))
-defm : BRCond_Bin<SETUNE, "FCMPNE", BT32, MVC32>;
-defm : BRCond_Bin<SETOEQ, "FCMPNE", BF32, MVCV32>;
-defm : BRCond_Bin<SETOGE, "FCMPHS", BT32, MVC32>;
-defm : BRCond_Bin<SETOLT, "FCMPLT", BT32, MVC32>;
-defm : BRCond_Bin<SETUO, "FCMPUO", BT32, MVC32>;
-defm : BRCond_Bin<SETO, "FCMPUO", BF32, MVCV32>;
-defm : BRCond_Bin_SWAP<SETOGT, "FCMPLT", BT32, MVC32>;
-defm : BRCond_Bin_SWAP<SETOLE, "FCMPHS", BT32, MVC32>;
-
-defm : BRCond_Bin<SETNE, "FCMPNE", BT32, MVC32>;
-defm : BRCond_Bin<SETEQ, "FCMPNE", BF32, MVCV32>;
-defm : BRCond_Bin<SETGE, "FCMPHS", BT32, MVC32>;
-defm : BRCond_Bin<SETLT, "FCMPLT", BT32, MVC32>;
-defm : BRCond_Bin_SWAP<SETGT, "FCMPLT", BT32, MVC32>;
-defm : BRCond_Bin_SWAP<SETLE, "FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin<SETUNE, "FCMPNE", BT32, BF32, MVC32>;
+defm : BRCond_Bin<SETOEQ, "FCMPNE", BF32, BT32, MVCV32>;
+defm : BRCond_Bin<SETOGE, "FCMPHS", BT32, BF32, MVC32>;
+defm : BRCond_Bin<SETOLT, "FCMPLT", BT32, BF32, MVC32>;
+defm : BRCond_Bin<SETUO, "FCMPUO", BT32, BF32, MVC32>;
+defm : BRCond_Bin<SETO, "FCMPUO", BF32, BT32, MVCV32>;
+defm : BRCond_Bin_SWAP<SETOGT, "FCMPLT", BT32, BF32, MVC32>;
+defm : BRCond_Bin_SWAP<SETOLE, "FCMPHS", BT32, BF32, MVC32>;
+
+defm : BRCond_Bin<SETNE, "FCMPNE", BT32, BF32, MVC32>;
+defm : BRCond_Bin<SETEQ, "FCMPNE", BF32, BT32, MVCV32>;
+defm : BRCond_Bin<SETGE, "FCMPHS", BT32, BF32, MVC32>;
+defm : BRCond_Bin<SETLT, "FCMPLT", BT32, BF32, MVC32>;
+defm : BRCond_Bin_SWAP<SETGT, "FCMPLT", BT32, BF32, MVC32>;
+defm : BRCond_Bin_SWAP<SETLE, "FCMPHS", BT32, BF32, MVC32>;
// -----------
@@ -417,4 +439,4 @@ let usesCustomInserter = 1 in {
let Predicates = [HasFPUv2_DF] in
def FSELD : CSKYPseudo<(outs sFPR64Op:$dst), (ins CARRY:$cond, sFPR64Op:$src1, sFPR64Op:$src2),
"!fseld\t$dst, $src1, src2", [(set sFPR64Op:$dst, (select CARRY:$cond, sFPR64Op:$src1, sFPR64Op:$src2))]>;
-} \ No newline at end of file
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td
index 8a00e7d9af3a..a9f8d5479bd2 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td
@@ -208,6 +208,16 @@ defm f2FNMULS : F2_XYZZ_T<0b010101, "fnmuls",
defm f2FNMUL : F2_XYZ_T<0b010001, "fnmul",
BinOpFrag<(fneg (fmul node:$LHS, node:$RHS))>>;
+// multiplication
+let Predicates = [HasFPUv3_SF] in {
+ def : Pat<(f32 (fmul (fneg FPR32Op:$vrx), FPR32Op:$vry)),
+ (f2FNMUL_S FPR32Op:$vrx, FPR32Op:$vry)>;
+}
+let Predicates = [HasFPUv3_DF] in {
+ def : Pat<(f64 (fmul (fneg FPR64Op:$vrx), FPR64Op:$vry)),
+ (f2FNMUL_D FPR64Op:$vrx, FPR64Op:$vry)>;
+}
+
// fcvt
def f2FFTOS32_S : F2_XZ_P<0b01000, 0b011011, "fftoi.f32.s32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
def f2FFTOU32_S : F2_XZ_P<0b01000, 0b011010, "fftoi.f32.u32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
@@ -284,13 +294,19 @@ def : Pat<(f32 fpimm:$imm),(COPY_TO_REGCLASS (ORI32 (MOVIH32 (fpimm32_hi16 fpimm
Requires<[HasFPUv3_SF]>;
-multiclass BRCond_Bin_F2<CondCode CC, string Instr, Instruction Br, Instruction MV, bit IsSelectSwap = 0> {
+multiclass BRCond_Bin_F2<CondCode CC, string Instr, Instruction Br0, Instruction Br1, Instruction MV, bit IsSelectSwap = 0> {
let Predicates = [HasFPUv3_SF] in
def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), bb:$imm16),
- (Br (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2), bb:$imm16)>;
+ (Br0 (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2), bb:$imm16)>;
+ let Predicates = [HasFPUv3_SF] in
+ def : Pat<(brcond (xor (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), 1), bb:$imm16),
+ (Br1 (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2), bb:$imm16)>;
let Predicates = [HasFPUv3_DF] in
def : Pat<(brcond (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), bb:$imm16),
- (Br (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2), bb:$imm16)>;
+ (Br0 (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2), bb:$imm16)>;
+ let Predicates = [HasFPUv3_DF] in
+ def : Pat<(brcond (xor (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), 1), bb:$imm16),
+ (Br1 (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2), bb:$imm16)>;
let Predicates = [HasFPUv3_SF] in
def : Pat<(i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)),
@@ -317,13 +333,19 @@ multiclass BRCond_Bin_F2<CondCode CC, string Instr, Instruction Br, Instruction
}
}
-multiclass BRCond_Bin_SWAP_F2<CondCode CC, string Instr, Instruction Br, Instruction MV, bit IsSelectSwap = 0> {
+multiclass BRCond_Bin_SWAP_F2<CondCode CC, string Instr, Instruction Br0, Instruction Br1, Instruction MV, bit IsSelectSwap = 0> {
let Predicates = [HasFPUv3_SF] in
def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), bb:$imm16),
- (Br (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1), bb:$imm16)>;
+ (Br0 (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1), bb:$imm16)>;
+ let Predicates = [HasFPUv3_SF] in
+ def : Pat<(brcond (xor (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), 1), bb:$imm16),
+ (Br1 (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1), bb:$imm16)>;
let Predicates = [HasFPUv3_DF] in
def : Pat<(brcond (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), bb:$imm16),
- (Br (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1), bb:$imm16)>;
+ (Br0 (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1), bb:$imm16)>;
+ let Predicates = [HasFPUv3_DF] in
+ def : Pat<(brcond (xor (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), 1), bb:$imm16),
+ (Br1 (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1), bb:$imm16)>;
let Predicates = [HasFPUv3_SF] in
def : Pat<(i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)),
@@ -352,21 +374,21 @@ multiclass BRCond_Bin_SWAP_F2<CondCode CC, string Instr, Instruction Br, Instruc
// inverse (order && compare) to (unorder || inverse(compare))
-defm : BRCond_Bin_F2<SETUNE, "f2FCMPNE", BT32, MVC32>;
-defm : BRCond_Bin_F2<SETOEQ, "f2FCMPNE", BF32, MVCV32, 1>;
-defm : BRCond_Bin_F2<SETOGE, "f2FCMPHS", BT32, MVC32>;
-defm : BRCond_Bin_F2<SETOLT, "f2FCMPLT", BT32, MVC32>;
-defm : BRCond_Bin_F2<SETUO, "f2FCMPUO", BT32, MVC32>;
-defm : BRCond_Bin_F2<SETO, "f2FCMPUO", BF32, MVCV32, 1>;
-defm : BRCond_Bin_SWAP_F2<SETOGT, "f2FCMPLT", BT32, MVC32>;
-defm : BRCond_Bin_SWAP_F2<SETOLE, "f2FCMPHS", BT32, MVC32>;
-
-defm : BRCond_Bin_F2<SETNE, "f2FCMPNE", BT32, MVC32>;
-defm : BRCond_Bin_F2<SETEQ, "f2FCMPNE", BF32, MVCV32, 1>;
-defm : BRCond_Bin_F2<SETGE, "f2FCMPHS", BT32, MVC32>;
-defm : BRCond_Bin_F2<SETLT, "f2FCMPLT", BT32, MVC32>;
-defm : BRCond_Bin_SWAP_F2<SETGT, "f2FCMPLT", BT32, MVC32>;
-defm : BRCond_Bin_SWAP_F2<SETLE, "f2FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETUNE, "f2FCMPNE", BT32, BF32, MVC32>;
+defm : BRCond_Bin_F2<SETOEQ, "f2FCMPNE", BF32, BT32, MVCV32, 1>;
+defm : BRCond_Bin_F2<SETOGE, "f2FCMPHS", BT32, BF32, MVC32>;
+defm : BRCond_Bin_F2<SETOLT, "f2FCMPLT", BT32, BF32, MVC32>;
+defm : BRCond_Bin_F2<SETUO, "f2FCMPUO", BT32, BF32, MVC32>;
+defm : BRCond_Bin_F2<SETO, "f2FCMPUO", BF32, BT32, MVCV32, 1>;
+defm : BRCond_Bin_SWAP_F2<SETOGT, "f2FCMPLT", BT32, BF32, MVC32>;
+defm : BRCond_Bin_SWAP_F2<SETOLE, "f2FCMPHS", BT32, BF32, MVC32>;
+
+defm : BRCond_Bin_F2<SETNE, "f2FCMPNE", BT32, BF32, MVC32>;
+defm : BRCond_Bin_F2<SETEQ, "f2FCMPNE", BF32, BT32, MVCV32, 1>;
+defm : BRCond_Bin_F2<SETGE, "f2FCMPHS", BT32, BF32, MVC32>;
+defm : BRCond_Bin_F2<SETLT, "f2FCMPLT", BT32, BF32, MVC32>;
+defm : BRCond_Bin_SWAP_F2<SETGT, "f2FCMPLT", BT32, BF32, MVC32>;
+defm : BRCond_Bin_SWAP_F2<SETLE, "f2FCMPHS", BT32, BF32, MVC32>;
// ------
@@ -459,4 +481,4 @@ def : Pat<(select CARRY:$ca, FPR32Op:$rx, FPR32Op:$false),
(f2FSEL_S CARRY:$ca, FPR32Op:$rx, FPR32Op:$false)>;
let Predicates = [HasFPUv3_DF] in
def : Pat<(select CARRY:$ca, FPR64Op:$rx, FPR64Op:$false),
- (f2FSEL_D CARRY:$ca, FPR64Op:$rx, FPR64Op:$false)>; \ No newline at end of file
+ (f2FSEL_D CARRY:$ca, FPR64Op:$rx, FPR64Op:$false)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
index c5a57f32e29a..8c268dc31614 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
@@ -53,7 +53,7 @@ CSKYTargetMachine::CSKYTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
RM.value_or(Reloc::Static),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.h
index 13d4212c79aa..e47b514ae9dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYTargetMachine.h
@@ -28,7 +28,7 @@ public:
CSKYTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
index d53d2e9e00e9..bceb41a26745 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
@@ -223,7 +223,7 @@ void CSKYAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
- bool IsLittleEndian = (Endian == support::little);
+ bool IsLittleEndian = (Endian == llvm::endianness::little);
bool IsInstFixup = (Kind >= FirstTargetFixupKind);
if (IsLittleEndian && IsInstFixup && (NumBytes == 4)) {
@@ -262,7 +262,8 @@ bool CSKYAsmBackend::mayNeedRelaxation(const MCInst &Inst,
bool CSKYAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
- const MCValue &Target) {
+ const MCValue &Target,
+ const MCSubtargetInfo * /*STI*/) {
if (Fixup.getKind() >= FirstLiteralRelocationKind)
return true;
switch (Fixup.getTargetKind()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
index 09b3ce6cc82b..5fa0c8c01185 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
@@ -20,7 +20,7 @@ class CSKYAsmBackend : public MCAsmBackend {
public:
CSKYAsmBackend(const MCSubtargetInfo &STI, const MCTargetOptions &OP)
- : MCAsmBackend(support::little) {}
+ : MCAsmBackend(llvm::endianness::little) {}
unsigned int getNumFixupKinds() const override {
return CSKY::NumTargetFixupKinds;
@@ -53,7 +53,8 @@ public:
const MCSubtargetInfo *STI) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
index ea41d53ef30f..fc5ddde07f62 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
@@ -57,17 +57,15 @@ CSKYMCCodeEmitter::getImmOpValueMSBSize(const MCInst &MI, unsigned Idx,
return MSB.getImm() - LSB.getImm();
}
-static void writeData(uint32_t Bin, unsigned Size, raw_ostream &OS) {
- uint16_t LO16 = static_cast<uint16_t>(Bin);
- uint16_t HI16 = static_cast<uint16_t>(Bin >> 16);
-
+static void writeData(uint32_t Bin, unsigned Size, SmallVectorImpl<char> &CB) {
if (Size == 4)
- support::endian::write<uint16_t>(OS, HI16, support::little);
-
- support::endian::write<uint16_t>(OS, LO16, support::little);
+ support::endian::write(CB, static_cast<uint16_t>(Bin >> 16),
+ llvm::endianness::little);
+ support::endian::write(CB, static_cast<uint16_t>(Bin),
+ llvm::endianness::little);
}
-void CSKYMCCodeEmitter::expandJBTF(const MCInst &MI, raw_ostream &OS,
+void CSKYMCCodeEmitter::expandJBTF(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -80,7 +78,7 @@ void CSKYMCCodeEmitter::expandJBTF(const MCInst &MI, raw_ostream &OS,
.addOperand(MI.getOperand(0))
.addImm(6);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- writeData(Binary, 2, OS);
+ writeData(Binary, 2, CB);
if (!STI.hasFeature(CSKY::Has2E3))
TmpInst = MCInstBuilder(CSKY::BR32)
@@ -90,10 +88,10 @@ void CSKYMCCodeEmitter::expandJBTF(const MCInst &MI, raw_ostream &OS,
TmpInst = MCInstBuilder(CSKY::JMPI32).addOperand(MI.getOperand(2));
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
Fixups[Fixups.size() - 1].setOffset(2);
- writeData(Binary, 4, OS);
+ writeData(Binary, 4, CB);
}
-void CSKYMCCodeEmitter::expandNEG(const MCInst &MI, raw_ostream &OS,
+void CSKYMCCodeEmitter::expandNEG(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -105,17 +103,17 @@ void CSKYMCCodeEmitter::expandNEG(const MCInst &MI, raw_ostream &OS,
.addOperand(MI.getOperand(0))
.addOperand(MI.getOperand(1));
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- writeData(Binary, Size, OS);
+ writeData(Binary, Size, CB);
TmpInst = MCInstBuilder(Size == 4 ? CSKY::ADDI32 : CSKY::ADDI16)
.addOperand(MI.getOperand(0))
.addOperand(MI.getOperand(0))
.addImm(1);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- writeData(Binary, Size, OS);
+ writeData(Binary, Size, CB);
}
-void CSKYMCCodeEmitter::expandRSUBI(const MCInst &MI, raw_ostream &OS,
+void CSKYMCCodeEmitter::expandRSUBI(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -127,17 +125,18 @@ void CSKYMCCodeEmitter::expandRSUBI(const MCInst &MI, raw_ostream &OS,
.addOperand(MI.getOperand(0))
.addOperand(MI.getOperand(1));
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- writeData(Binary, Size, OS);
+ writeData(Binary, Size, CB);
TmpInst = MCInstBuilder(Size == 4 ? CSKY::ADDI32 : CSKY::ADDI16)
.addOperand(MI.getOperand(0))
.addOperand(MI.getOperand(0))
.addImm(MI.getOperand(2).getImm() + 1);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- writeData(Binary, Size, OS);
+ writeData(Binary, Size, CB);
}
-void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MII.get(MI.getOpcode());
@@ -151,17 +150,17 @@ void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
break;
case CSKY::JBT_E:
case CSKY::JBF_E:
- expandJBTF(MI, OS, Fixups, STI);
+ expandJBTF(MI, CB, Fixups, STI);
MCNumEmitted += 2;
return;
case CSKY::NEG32:
case CSKY::NEG16:
- expandNEG(MI, OS, Fixups, STI);
+ expandNEG(MI, CB, Fixups, STI);
MCNumEmitted += 2;
return;
case CSKY::RSUBI32:
case CSKY::RSUBI16:
- expandRSUBI(MI, OS, Fixups, STI);
+ expandRSUBI(MI, CB, Fixups, STI);
MCNumEmitted += 2;
return;
case CSKY::JBSR32:
@@ -229,16 +228,7 @@ void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
++MCNumEmitted;
-
- uint32_t Bin = getBinaryCodeForInstr(TmpInst, Fixups, STI);
-
- uint16_t LO16 = static_cast<uint16_t>(Bin);
- uint16_t HI16 = static_cast<uint16_t>(Bin >> 16);
-
- if (Size == 4)
- support::endian::write<uint16_t>(OS, HI16, support::little);
-
- support::endian::write<uint16_t>(OS, LO16, support::little);
+ writeData(getBinaryCodeForInstr(TmpInst, Fixups, STI), Size, CB);
}
unsigned
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
index 128430197cc5..0e47d259d43f 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
@@ -32,7 +32,7 @@ public:
~CSKYMCCodeEmitter() {}
- void encodeInstruction(const MCInst &Inst, raw_ostream &OS,
+ void encodeInstruction(const MCInst &Inst, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -172,13 +172,13 @@ public:
return 0;
}
- void expandJBTF(const MCInst &MI, raw_ostream &OS,
+ void expandJBTF(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void expandNEG(const MCInst &MI, raw_ostream &OS,
+ void expandNEG(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void expandRSUBI(const MCInst &MI, raw_ostream &OS,
+ void expandRSUBI(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp
index 37030d977714..00942414d484 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "CSKYTargetStreamer.h"
-#include "CSKYSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCContext.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/CBufferDataLayout.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/CBufferDataLayout.cpp
index 41bb69b3d79c..3ebd7a2b54d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/CBufferDataLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/CBufferDataLayout.cpp
@@ -95,7 +95,7 @@ LegacyCBufferLayout::getStructLayout(StructType *ST) {
if (it != StructLayouts.end())
return it->second;
- TypeSize Offset = TypeSize::Fixed(0);
+ TypeSize Offset = TypeSize::getFixed(0);
LegacyStructLayout Layout;
Layout.ST = ST;
for (Type *EltTy : ST->elements()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 905756469902..59fe6d45757a 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -183,7 +183,8 @@ static StructType *getResRetType(Type *OverloadTy, LLVMContext &Ctx) {
}
static StructType *getHandleType(LLVMContext &Ctx) {
- return getOrCreateStructType("dx.types.Handle", Type::getInt8PtrTy(Ctx), Ctx);
+ return getOrCreateStructType("dx.types.Handle", PointerType::getUnqual(Ctx),
+ Ctx);
}
static Type *getTypeFromParameterKind(ParameterKind Kind, Type *OverloadTy) {
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 660ca415b1a4..026911946b47 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -98,7 +98,7 @@ class DXILPrepareModule : public ModulePass {
PointerType *PtrTy = cast<PointerType>(Operand->getType());
return Builder.Insert(
CastInst::Create(Instruction::BitCast, Operand,
- Builder.getInt8PtrTy(PtrTy->getAddressSpace())));
+ Builder.getPtrTy(PtrTy->getAddressSpace())));
}
public:
@@ -154,7 +154,7 @@ public:
if (auto GEP = dyn_cast<GetElementPtrInst>(&I)) {
if (Value *NoOpBitcast = maybeGenerateBitcast(
Builder, PointerTypes, I, GEP->getPointerOperand(),
- GEP->getResultElementType()))
+ GEP->getSourceElementType()))
GEP->setOperand(0, NoOpBitcast);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp
index dde7255e0425..d3ff12a1f7b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILResource.cpp
@@ -233,9 +233,8 @@ void ResourceBase::print(raw_ostream &OS, StringRef IDPrefix,
}
UAVResource::UAVResource(uint32_t I, FrontendResource R)
- : ResourceBase(I, R),
- Shape(static_cast<ResourceBase::Kinds>(R.getResourceKind())),
- GloballyCoherent(false), HasCounter(false), IsROV(false), ExtProps() {
+ : ResourceBase(I, R), Shape(R.getResourceKind()), GloballyCoherent(false),
+ HasCounter(false), IsROV(R.getIsROV()), ExtProps() {
parseSourceType(R.getSourceType());
}
@@ -259,26 +258,10 @@ void UAVResource::print(raw_ostream &OS) const {
// information we need to remove the source type string from here (See issue:
// https://github.com/llvm/llvm-project/issues/57991).
void UAVResource::parseSourceType(StringRef S) {
- IsROV = S.startswith("RasterizerOrdered");
- if (IsROV)
- S = S.substr(strlen("RasterizerOrdered"));
- if (S.startswith("RW"))
- S = S.substr(strlen("RW"));
-
- // Note: I'm deliberately not handling any of the Texture buffer types at the
- // moment. I want to resolve the issue above before adding Texture or Sampler
- // support.
- Shape = StringSwitch<ResourceBase::Kinds>(S)
- .StartsWith("Buffer<", Kinds::TypedBuffer)
- .StartsWith("ByteAddressBuffer<", Kinds::RawBuffer)
- .StartsWith("StructuredBuffer<", Kinds::StructuredBuffer)
- .Default(Kinds::Invalid);
- assert(Shape != Kinds::Invalid && "Unsupported buffer type");
-
S = S.substr(S.find("<") + 1);
constexpr size_t PrefixLen = StringRef("vector<").size();
- if (S.startswith("vector<"))
+ if (S.starts_with("vector<"))
S = S.substr(PrefixLen, S.find(",") - PrefixLen);
else
S = S.substr(0, S.find(">"));
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 2c321f4a79af..e2d0aeee092e 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -1392,17 +1392,23 @@ static uint64_t rotateSign(APInt Val) {
return I < 0 ? ~(U << 1) : U << 1;
}
-static uint64_t rotateSign(DISubrange::BoundType Val) {
- return rotateSign(Val.get<ConstantInt *>()->getValue());
-}
-
void DXILBitcodeWriter::writeDISubrange(const DISubrange *N,
SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev) {
Record.push_back(N->isDistinct());
+
+ // TODO: Do we need to handle DIExpression here? What about cases where Count
+ // isn't specified but UpperBound and such are?
+ ConstantInt *Count = N->getCount().dyn_cast<ConstantInt *>();
+ assert(Count && "Count is missing or not ConstantInt");
+ Record.push_back(Count->getValue().getSExtValue());
+
+ // TODO: Similarly, DIExpression is allowed here now
+ DISubrange::BoundType LowerBound = N->getLowerBound();
+ assert((LowerBound.isNull() || LowerBound.is<ConstantInt *>()) &&
+ "Lower bound provided but not ConstantInt");
Record.push_back(
- N->getCount().get<ConstantInt *>()->getValue().getSExtValue());
- Record.push_back(rotateSign(N->getLowerBound()));
+ LowerBound ? rotateSign(LowerBound.get<ConstantInt *>()->getValue()) : 0);
Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev);
Record.clear();
@@ -1766,14 +1772,18 @@ unsigned DXILBitcodeWriter::createMetadataStringsAbbrev() {
void DXILBitcodeWriter::writeMetadataStrings(
ArrayRef<const Metadata *> Strings, SmallVectorImpl<uint64_t> &Record) {
+ if (Strings.empty())
+ return;
+
+ unsigned MDSAbbrev = createMetadataStringsAbbrev();
+
for (const Metadata *MD : Strings) {
const MDString *MDS = cast<MDString>(MD);
// Code: [strchar x N]
Record.append(MDS->bytes_begin(), MDS->bytes_end());
// Emit the finished record.
- Stream.EmitRecord(bitc::METADATA_STRING_OLD, Record,
- createMetadataStringsAbbrev());
+ Stream.EmitRecord(bitc::METADATA_STRING_OLD, Record, MDSAbbrev);
Record.clear();
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 59e6fcb44d5a..d5cb488f2fde 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -42,6 +42,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
initializeDXILPrepareModulePass(*PR);
initializeEmbedDXILPassPass(*PR);
initializeWriteDXILPassPass(*PR);
+ initializeDXContainerGlobalsPass(*PR);
initializeDXILOpLoweringLegacyPass(*PR);
initializeDXILTranslateMetadataPass(*PR);
initializeDXILResourceWrapperPass(*PR);
@@ -86,7 +87,7 @@ DirectXTargetMachine::DirectXTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T,
"e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-"
"f32:32-f64:64-n8:16:32:64",
@@ -127,19 +128,18 @@ bool DirectXTargetMachine::addPassesToEmitFile(
TargetPassConfig *PassConfig = createPassConfig(PM);
PassConfig->addCodeGenPrepare();
- if (TargetPassConfig::willCompleteCodeGenPipeline()) {
- PM.add(createDXILEmbedderPass());
- // We embed the other DXContainer globals after embedding DXIL so that the
- // globals don't pollute the DXIL.
- PM.add(createDXContainerGlobalsPass());
- }
switch (FileType) {
- case CGFT_AssemblyFile:
+ case CodeGenFileType::AssemblyFile:
PM.add(createDXILPrettyPrinterPass(Out));
PM.add(createPrintModulePass(Out, "", true));
break;
- case CGFT_ObjectFile:
+ case CodeGenFileType::ObjectFile:
if (TargetPassConfig::willCompleteCodeGenPipeline()) {
+ PM.add(createDXILEmbedderPass());
+ // We embed the other DXContainer globals after embedding DXIL so that the
+ // globals don't pollute the DXIL.
+ PM.add(createDXContainerGlobalsPass());
+
if (!MMIWP)
MMIWP = new MachineModuleInfoWrapperPass(this);
PM.add(MMIWP);
@@ -149,7 +149,7 @@ bool DirectXTargetMachine::addPassesToEmitFile(
} else
PM.add(createDXILWriterPass(Out));
break;
- case CGFT_Null:
+ case CodeGenFileType::Null:
break;
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.h
index a6a1b3ef045b..d04c375b2736 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DirectXTargetMachine.h
@@ -25,7 +25,7 @@ public:
DirectXTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~DirectXTargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
index cb6d4c5cd0a3..4a73cbbea3fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
@@ -64,7 +64,7 @@ class DXILMCCodeEmitter : public MCCodeEmitter {
public:
DXILMCCodeEmitter() {}
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &Inst, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override {}
};
@@ -72,7 +72,8 @@ public:
class DXILAsmBackend : public MCAsmBackend {
public:
- DXILAsmBackend(const MCSubtargetInfo &STI) : MCAsmBackend(support::little) {}
+ DXILAsmBackend(const MCSubtargetInfo &STI)
+ : MCAsmBackend(llvm::endianness::little) {}
~DXILAsmBackend() override = default;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index ce93715d6c42..fd7d25fa16d1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -115,10 +115,9 @@ class HexagonAsmParser : public MCTargetAsmParser {
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
bool ParseDirectiveFalign(unsigned Size, SMLoc L);
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseDirectiveSubsection(SMLoc L);
bool ParseDirectiveComm(bool IsLocal, SMLoc L);
bool RegisterMatchesArch(unsigned MatchNum) const;
@@ -963,14 +962,13 @@ bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious,
return false;
}
-bool HexagonAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool HexagonAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
+ return !tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
}
-OperandMatchResultTy HexagonAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus HexagonAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
MCAsmLexer &Lexer = getLexer();
StartLoc = getLexer().getLoc();
SmallVector<AsmToken, 5> Lookahead;
@@ -1002,20 +1000,20 @@ OperandMatchResultTy HexagonAsmParser::tryParseRegister(MCRegister &RegNo,
unsigned DotReg = matchRegister(DotSplit.first.lower());
if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
if (DotSplit.second.empty()) {
- RegNo = DotReg;
+ Reg = DotReg;
EndLoc = Lexer.getLoc();
if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
} else {
- RegNo = DotReg;
+ Reg = DotReg;
size_t First = RawString.find('.');
StringRef DotString (RawString.data() + First, RawString.size() - First);
Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString));
EndLoc = Lexer.getLoc();
if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
}
}
std::pair<StringRef, StringRef> ColonSplit = StringRef(FullString).split(':');
@@ -1024,16 +1022,16 @@ OperandMatchResultTy HexagonAsmParser::tryParseRegister(MCRegister &RegNo,
do {
Lexer.UnLex(Lookahead.pop_back_val());
} while (!Lookahead.empty() && !Lexer.is(AsmToken::Colon));
- RegNo = ColonReg;
+ Reg = ColonReg;
EndLoc = Lexer.getLoc();
if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
}
while (!Lookahead.empty()) {
Lexer.UnLex(Lookahead.pop_back_val());
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 033e6737f8bb..310993662b67 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -467,7 +467,7 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
// Combine aggressively (for code size)
ShouldCombineAggressively =
- MF.getTarget().getOptLevel() <= CodeGenOpt::Default;
+ MF.getTarget().getOptLevel() <= CodeGenOptLevel::Default;
// Disable CONST64 for tiny core since it takes a LD resource.
if (!OptForSize && ST->isTinyCore())
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
index 736839bb015b..257ca203426e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -15,6 +15,7 @@ def tc_02fe1c65 : InstrItinClass;
def tc_0655b949 : InstrItinClass;
def tc_075c8dd8 : InstrItinClass;
def tc_0a195f2c : InstrItinClass;
+def tc_0a43be35 : InstrItinClass;
def tc_0a6c20ae : InstrItinClass;
def tc_0ba0d5da : InstrItinClass;
def tc_0dfac0a7 : InstrItinClass;
@@ -22,6 +23,7 @@ def tc_0fac1eb8 : InstrItinClass;
def tc_112d30d6 : InstrItinClass;
def tc_1242dc2a : InstrItinClass;
def tc_1248597c : InstrItinClass;
+def tc_139ef484 : InstrItinClass;
def tc_14ab4f41 : InstrItinClass;
def tc_151bf368 : InstrItinClass;
def tc_158aa3f7 : InstrItinClass;
@@ -62,13 +64,16 @@ def tc_44d5a428 : InstrItinClass;
def tc_44fffc58 : InstrItinClass;
def tc_45791fb8 : InstrItinClass;
def tc_45f9d1be : InstrItinClass;
+def tc_46c18ecf : InstrItinClass;
def tc_49fdfd4b : InstrItinClass;
def tc_4a55d03c : InstrItinClass;
def tc_4abdbdc6 : InstrItinClass;
def tc_4ac61d92 : InstrItinClass;
def tc_4bf903b0 : InstrItinClass;
def tc_503ce0f3 : InstrItinClass;
+def tc_512b1653 : InstrItinClass;
def tc_53c851ab : InstrItinClass;
+def tc_54f0cee2 : InstrItinClass;
def tc_5502c366 : InstrItinClass;
def tc_55255f2b : InstrItinClass;
def tc_556f6577 : InstrItinClass;
@@ -78,6 +83,7 @@ def tc_56a124a7 : InstrItinClass;
def tc_57a55b54 : InstrItinClass;
def tc_5944960d : InstrItinClass;
def tc_59a7822c : InstrItinClass;
+def tc_5a222e89 : InstrItinClass;
def tc_5a4b5e58 : InstrItinClass;
def tc_5b347363 : InstrItinClass;
def tc_5ceb2f9e : InstrItinClass;
@@ -92,24 +98,31 @@ def tc_651cbe02 : InstrItinClass;
def tc_65279839 : InstrItinClass;
def tc_65cbd974 : InstrItinClass;
def tc_69bfb303 : InstrItinClass;
+def tc_6aa823ab : InstrItinClass;
def tc_6ae3426b : InstrItinClass;
def tc_6d861a95 : InstrItinClass;
def tc_6e20402a : InstrItinClass;
def tc_6f42bc60 : InstrItinClass;
+def tc_6fb52018 : InstrItinClass;
def tc_6fc5dbea : InstrItinClass;
def tc_711c805f : InstrItinClass;
def tc_713b66bf : InstrItinClass;
def tc_7401744f : InstrItinClass;
def tc_7476d766 : InstrItinClass;
def tc_74a42bda : InstrItinClass;
+def tc_759e57be : InstrItinClass;
def tc_76bb5435 : InstrItinClass;
def tc_77f94a5e : InstrItinClass;
def tc_788b1d09 : InstrItinClass;
+def tc_78f87ed3 : InstrItinClass;
def tc_7af3a37e : InstrItinClass;
def tc_7b9187d3 : InstrItinClass;
+def tc_7c28bd7e : InstrItinClass;
def tc_7c31e19a : InstrItinClass;
def tc_7c6d32e4 : InstrItinClass;
+def tc_7d6a2568 : InstrItinClass;
def tc_7dc63b5c : InstrItinClass;
+def tc_7f58404a : InstrItinClass;
def tc_7f7f45f5 : InstrItinClass;
def tc_7f8ae742 : InstrItinClass;
def tc_8035e91f : InstrItinClass;
@@ -134,6 +147,7 @@ def tc_95f43c5e : InstrItinClass;
def tc_96ef76ef : InstrItinClass;
def tc_975a4e54 : InstrItinClass;
def tc_9783714b : InstrItinClass;
+def tc_9b20a062 : InstrItinClass;
def tc_9b34f5e0 : InstrItinClass;
def tc_9b3c0462 : InstrItinClass;
def tc_9bcfb2ee : InstrItinClass;
@@ -152,6 +166,7 @@ def tc_a32e03e7 : InstrItinClass;
def tc_a38c45dc : InstrItinClass;
def tc_a4e22bbd : InstrItinClass;
def tc_a4ee89db : InstrItinClass;
+def tc_a724463d : InstrItinClass;
def tc_a7a13fac : InstrItinClass;
def tc_a7bdb22c : InstrItinClass;
def tc_a9edeffa : InstrItinClass;
@@ -162,11 +177,15 @@ def tc_ae5babd7 : InstrItinClass;
def tc_aee6250c : InstrItinClass;
def tc_af6af259 : InstrItinClass;
def tc_b1ae5f67 : InstrItinClass;
+def tc_b2196a3f : InstrItinClass;
+def tc_b3d46584 : InstrItinClass;
def tc_b4dc7630 : InstrItinClass;
def tc_b7c4062a : InstrItinClass;
def tc_b837298f : InstrItinClass;
+def tc_b9bec29e : InstrItinClass;
def tc_ba9255a6 : InstrItinClass;
def tc_bb07f2c5 : InstrItinClass;
+def tc_bb78483e : InstrItinClass;
def tc_bb831a7c : InstrItinClass;
def tc_bf2ffc0f : InstrItinClass;
def tc_c20701f0 : InstrItinClass;
@@ -176,12 +195,14 @@ def tc_c818ff7f : InstrItinClass;
def tc_ce59038e : InstrItinClass;
def tc_cfa0e29b : InstrItinClass;
def tc_d03278fd : InstrItinClass;
+def tc_d234b61a : InstrItinClass;
def tc_d33e5eee : InstrItinClass;
def tc_d3632d88 : InstrItinClass;
def tc_d45ba9cd : InstrItinClass;
def tc_d57d649c : InstrItinClass;
def tc_d61dfdc3 : InstrItinClass;
def tc_d68dca5c : InstrItinClass;
+def tc_d71ea8fa : InstrItinClass;
def tc_d7718fbe : InstrItinClass;
def tc_db596beb : InstrItinClass;
def tc_db96aa6b : InstrItinClass;
@@ -192,6 +213,7 @@ def tc_e3d699e3 : InstrItinClass;
def tc_e60def48 : InstrItinClass;
def tc_e9170fb7 : InstrItinClass;
def tc_ed03645c : InstrItinClass;
+def tc_ed3f8d2a : InstrItinClass;
def tc_eed07714 : InstrItinClass;
def tc_eeda4109 : InstrItinClass;
def tc_ef921005 : InstrItinClass;
@@ -469,6 +491,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [4, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -629,6 +655,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -653,10 +683,18 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -693,6 +731,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -749,6 +791,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -765,6 +811,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -789,10 +839,18 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -801,6 +859,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -809,6 +871,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -821,6 +887,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -917,6 +987,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -989,6 +1063,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1041,6 +1119,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1049,6 +1131,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1085,6 +1171,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1109,6 +1199,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -1149,6 +1243,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1269,6 +1367,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1429,6 +1531,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -1453,10 +1559,18 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1493,6 +1607,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1549,6 +1667,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -1565,6 +1687,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1589,10 +1715,18 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -1601,6 +1735,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -1609,6 +1747,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1621,6 +1763,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1717,6 +1863,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -1789,6 +1939,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1841,6 +1995,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1849,6 +2007,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1885,6 +2047,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1909,6 +2075,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -2069,6 +2239,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [3, 3, 2],
@@ -2241,6 +2415,10 @@ class DepScalarItinV60se {
InstrStage<1, [CVI_ST]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -2265,11 +2443,19 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_2early*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_ST]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ST]>], [3, 2, 2],
@@ -2309,6 +2495,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2367,6 +2557,10 @@ class DepScalarItinV60se {
InstrStage<1, [CVI_ST]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -2383,6 +2577,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2408,10 +2606,18 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -2420,6 +2626,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -2429,6 +2639,10 @@ class DepScalarItinV60se {
InstrStage<1, [CVI_ST]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2441,6 +2655,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2539,6 +2757,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -2613,6 +2835,10 @@ class DepScalarItinV60se {
InstrStage<1, [CVI_ST]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2665,6 +2891,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2673,6 +2903,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2710,6 +2944,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2735,6 +2973,10 @@ class DepScalarItinV60se {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -2780,6 +3022,10 @@ class DepScalarItinV60se {
InstrStage<1, [CVI_ST]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2905,6 +3151,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3065,6 +3315,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -3089,10 +3343,18 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3129,6 +3391,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3185,6 +3451,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -3201,6 +3471,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3225,10 +3499,18 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -3237,6 +3519,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -3245,6 +3531,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3257,6 +3547,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3353,6 +3647,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -3425,6 +3723,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3477,6 +3779,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3485,6 +3791,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3521,6 +3831,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3545,6 +3859,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -3585,6 +3903,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3677,6 +3999,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_0a6c20ae, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3705,6 +4031,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3865,6 +4195,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -3889,10 +4223,18 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3929,6 +4271,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3985,6 +4331,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -4001,6 +4351,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4025,10 +4379,18 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -4037,6 +4399,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -4045,6 +4411,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4057,6 +4427,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4153,6 +4527,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -4225,6 +4603,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4265,6 +4647,14 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4277,6 +4667,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4285,6 +4679,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4321,6 +4719,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4345,6 +4747,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -4385,6 +4791,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4477,6 +4887,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_0a6c20ae, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4505,6 +4919,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4665,6 +5083,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -4689,10 +5111,18 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4729,6 +5159,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4785,6 +5219,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -4801,6 +5239,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4825,10 +5267,18 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -4837,6 +5287,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -4845,6 +5299,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4857,6 +5315,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4953,6 +5415,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -5025,6 +5491,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5065,6 +5535,14 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5077,6 +5555,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5085,6 +5567,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5121,6 +5607,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5145,6 +5635,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -5185,6 +5679,10 @@ class DepScalarItinV66 {
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5277,6 +5775,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_0a6c20ae, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5305,6 +5807,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5465,6 +5971,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -5489,10 +5999,18 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5529,6 +6047,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5585,6 +6107,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -5601,6 +6127,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5625,10 +6155,18 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -5637,6 +6175,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -5645,6 +6187,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5657,6 +6203,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5753,6 +6303,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -5825,6 +6379,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5865,6 +6423,18 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5877,6 +6447,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5885,6 +6459,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5921,6 +6499,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5945,6 +6527,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -5985,6 +6571,10 @@ class DepScalarItinV67 {
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6077,6 +6667,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_0a6c20ae, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6105,6 +6699,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6265,6 +6863,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -6289,10 +6891,18 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6329,6 +6939,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT0]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6385,6 +6999,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -6401,6 +7019,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6425,10 +7047,18 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -6437,6 +7067,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -6445,6 +7079,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6457,6 +7095,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6553,6 +7195,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -6625,6 +7271,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6665,6 +7315,14 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6677,6 +7335,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6685,6 +7347,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6721,6 +7387,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT0]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6745,6 +7415,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -6785,6 +7459,10 @@ class DepScalarItinV67T {
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6877,6 +7555,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_0a6c20ae, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6905,6 +7587,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7065,6 +7751,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -7089,10 +7779,18 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7129,6 +7827,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7185,6 +7887,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -7201,6 +7907,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7225,10 +7935,18 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -7237,6 +7955,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -7245,6 +7967,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7257,6 +7983,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7353,6 +8083,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -7425,6 +8159,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7465,6 +8203,14 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7477,6 +8223,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7485,6 +8235,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7521,6 +8275,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7545,6 +8303,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -7585,6 +8347,10 @@ class DepScalarItinV68 {
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7677,6 +8443,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_0a6c20ae, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7705,6 +8475,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7865,6 +8639,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -7889,10 +8667,18 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7929,6 +8715,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7985,6 +8775,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -8001,6 +8795,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8025,10 +8823,18 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -8037,6 +8843,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -8045,6 +8855,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8057,6 +8871,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8153,6 +8971,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -8225,6 +9047,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8265,6 +9091,14 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8277,6 +9111,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8285,6 +9123,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8321,6 +9163,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8345,6 +9191,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -8385,6 +9235,10 @@ class DepScalarItinV69 {
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8477,6 +9331,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_0a6c20ae, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8505,6 +9363,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8665,6 +9527,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -8689,10 +9555,18 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8729,6 +9603,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8785,6 +9663,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -8801,6 +9683,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8825,10 +9711,18 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -8837,6 +9731,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -8845,6 +9743,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8857,6 +9759,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -8953,6 +9859,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -9025,6 +9935,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9065,6 +9979,14 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9077,6 +9999,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9085,6 +10011,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9121,6 +10051,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9145,6 +10079,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -9185,6 +10123,10 @@ class DepScalarItinV71 {
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9277,6 +10219,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_0a6c20ae, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9305,6 +10251,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9465,6 +10415,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -9489,10 +10443,18 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9529,6 +10491,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT0]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9585,6 +10551,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -9601,6 +10571,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9625,10 +10599,18 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -9637,6 +10619,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -9645,6 +10631,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9657,6 +10647,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9753,6 +10747,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -9825,6 +10823,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9865,6 +10867,14 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9877,6 +10887,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9885,6 +10899,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9921,6 +10939,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT0]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -9945,6 +10967,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -9985,6 +11011,10 @@ class DepScalarItinV71T {
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10077,6 +11107,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_0a6c20ae, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10105,6 +11139,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_14ab4f41, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10265,6 +11303,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_49fdfd4b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -10289,10 +11331,18 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_53c851ab, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10329,6 +11379,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5a4b5e58, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10385,6 +11439,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6ae3426b, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
@@ -10401,6 +11459,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10425,10 +11487,18 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_76bb5435, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -10437,6 +11507,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_7af3a37e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
@@ -10445,6 +11519,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10457,6 +11535,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10553,6 +11635,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_9b34f5e0, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [],
[]>,
@@ -10625,6 +11711,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT0]>], [],
[]>,
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_a7a13fac, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10665,6 +11755,14 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10677,6 +11775,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10685,6 +11787,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_bb831a7c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10721,6 +11827,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
InstrItinData <tc_d33e5eee, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -10745,6 +11855,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_d7718fbe, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
@@ -10785,6 +11899,10 @@ class DepScalarItinV73 {
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_eed07714, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
index 2ea6f7941afb..75e87c95f2c4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -1005,6 +1005,12 @@ class Enc_46c951 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
}
+class Enc_46f33d : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
class Enc_47ee5e : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
@@ -1249,6 +1255,10 @@ class Enc_58a8bf : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
+class Enc_598f6c : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
class Enc_5a18b3 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -2240,6 +2250,12 @@ class Enc_9e4c3f : OpcodeHexagon {
bits <4> Rd16;
let Inst{19-16} = Rd16{3-0};
}
+class Enc_9e9047 : OpcodeHexagon {
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
class Enc_9ea4cf : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index 15f5e4407f92..0351217f441d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -5825,6 +5825,16 @@ let isExtentSigned = 1;
let opExtentBits = 9;
let opExtentAlign = 2;
}
+def J2_rte : HInst<
+(outs),
+(ins),
+"rte",
+tc_b9bec29e, TypeJ>, Enc_e3b0c4 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b0101011111100000;
+let Uses = [ELR];
+let Defs = [PC];
+}
def J2_trap0 : HInst<
(outs),
(ins u8_0Imm:$Ii),
@@ -12445,6 +12455,20 @@ let isExtentSigned = 0;
let opExtentBits = 6;
let opExtentAlign = 0;
}
+def L4_loadw_phys : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = memw_phys($Rs32,$Rt32)",
+tc_ed3f8d2a, TypeLD>, Enc_5ab2be {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10010010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let isSolo = 1;
+}
def L4_or_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
@@ -40389,6 +40413,15 @@ let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b0110110000100000;
let isSolo = 1;
}
+def Y2_ciad : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"ciad($Rs32)",
+tc_0a43be35, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000001100000;
+let Inst{31-21} = 0b01100100000;
+let isSoloAX = 1;
+}
def Y2_crswap0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in),
@@ -40413,6 +40446,15 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let Constraints = "$Rx32 = $Rx32in";
}
+def Y2_cswi : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"cswi($Rs32)",
+tc_0a43be35, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000100000;
+let Inst{31-21} = 0b01100100000;
+let isSoloAX = 1;
+}
def Y2_dccleana : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -40423,6 +40465,15 @@ let Inst{31-21} = 0b10100000000;
let isRestrictSlot1AOK = 1;
let hasSideEffects = 1;
}
+def Y2_dccleanidx : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dccleanidx($Rs32)",
+tc_d234b61a, TypeST>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100010001;
+let isSoloAX = 1;
+}
def Y2_dccleaninva : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -40433,6 +40484,15 @@ let Inst{31-21} = 0b10100000010;
let isRestrictSlot1AOK = 1;
let hasSideEffects = 1;
}
+def Y2_dccleaninvidx : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dccleaninvidx($Rs32)",
+tc_d234b61a, TypeST>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100010011;
+let isSoloAX = 1;
+}
def Y2_dcfetch : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -40463,6 +40523,45 @@ let Inst{31-21} = 0b10100000001;
let isRestrictSlot1AOK = 1;
let hasSideEffects = 1;
}
+def Y2_dcinvidx : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dcinvidx($Rs32)",
+tc_d234b61a, TypeST>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100010010;
+let isSoloAX = 1;
+}
+def Y2_dckill : HInst<
+(outs),
+(ins),
+"dckill",
+tc_78f87ed3, TypeST>, Enc_e3b0c4 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b1010001000000000;
+let isSolo = 1;
+}
+def Y2_dctagr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = dctagr($Rs32)",
+tc_a724463d, TypeST>, Enc_5e2823 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10100100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSoloAX = 1;
+}
+def Y2_dctagw : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"dctagw($Rs32,$Rt32)",
+tc_6fb52018, TypeST>, Enc_ca3887 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100100000;
+let isSolo = 1;
+}
def Y2_dczeroa : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -40474,6 +40573,58 @@ let isRestrictSlot1AOK = 1;
let mayStore = 1;
let hasSideEffects = 1;
}
+def Y2_getimask : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = getimask($Rs32)",
+tc_46c18ecf, TypeCR>, Enc_5e2823 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01100110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSoloAX = 1;
+}
+def Y2_iassignr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = iassignr($Rs32)",
+tc_46c18ecf, TypeCR>, Enc_5e2823 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01100110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSoloAX = 1;
+}
+def Y2_iassignw : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"iassignw($Rs32)",
+tc_0a43be35, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000001000000;
+let Inst{31-21} = 0b01100100000;
+let isSoloAX = 1;
+}
+def Y2_icdatar : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = icdatar($Rs32)",
+tc_9b20a062, TypeJ>, Enc_5e2823 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01010101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+}
+def Y2_icdataw : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"icdataw($Rs32,$Rt32)",
+tc_5a222e89, TypeJ>, Enc_ca3887, Requires<[HasV66]> {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b01010101110;
+let isSolo = 1;
+}
def Y2_icinva : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -40483,6 +40634,45 @@ let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010110110;
let isSolo = 1;
}
+def Y2_icinvidx : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"icinvidx($Rs32)",
+tc_7d6a2568, TypeJ>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00100000000000;
+let Inst{31-21} = 0b01010110110;
+let isSolo = 1;
+}
+def Y2_ickill : HInst<
+(outs),
+(ins),
+"ickill",
+tc_b9bec29e, TypeJ>, Enc_e3b0c4 {
+let Inst{13-0} = 0b01000000000000;
+let Inst{31-16} = 0b0101011011000000;
+let isSolo = 1;
+}
+def Y2_ictagr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = ictagr($Rs32)",
+tc_759e57be, TypeJ>, Enc_5e2823 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01010101111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+}
+def Y2_ictagw : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"ictagw($Rs32,$Rt32)",
+tc_139ef484, TypeJ>, Enc_ca3887 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01010101110;
+let isSolo = 1;
+}
def Y2_isync : HInst<
(outs),
(ins),
@@ -40492,6 +40682,24 @@ let Inst{13-0} = 0b00000000000010;
let Inst{31-16} = 0b0101011111000000;
let isSolo = 1;
}
+def Y2_k0lock : HInst<
+(outs),
+(ins),
+"k0lock",
+tc_7f58404a, TypeCR>, Enc_e3b0c4 {
+let Inst{13-0} = 0b00000001100000;
+let Inst{31-16} = 0b0110110000100000;
+let isSolo = 1;
+}
+def Y2_k0unlock : HInst<
+(outs),
+(ins),
+"k0unlock",
+tc_7f58404a, TypeCR>, Enc_e3b0c4 {
+let Inst{13-0} = 0b00000010000000;
+let Inst{31-16} = 0b0110110000100000;
+let isSolo = 1;
+}
def Y2_k1lock_map : HInst<
(outs),
(ins),
@@ -40508,6 +40716,79 @@ PSEUDO, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
+def Y2_l2cleaninvidx : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"l2cleaninvidx($Rs32)",
+tc_d234b61a, TypeST>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10101000011;
+let isSoloAX = 1;
+}
+def Y2_l2kill : HInst<
+(outs),
+(ins),
+"l2kill",
+tc_b3d46584, TypeST>, Enc_e3b0c4 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b1010100000100000;
+let isSolo = 1;
+}
+def Y2_resume : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"resume($Rs32)",
+tc_0a43be35, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000100000;
+let Inst{31-21} = 0b01100100010;
+let isSolo = 1;
+}
+def Y2_setimask : HInst<
+(outs),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"setimask($Pt4,$Rs32)",
+tc_d71ea8fa, TypeCR>, Enc_9e9047 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01100100100;
+let isSoloAX = 1;
+}
+def Y2_setprio : HInst<
+(outs),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"setprio($Pt4,$Rs32)",
+tc_d71ea8fa, TypeCR>, Enc_9e9047, Requires<[HasV66]> {
+let Inst{7-0} = 0b00100000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01100100100;
+}
+def Y2_start : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"start($Rs32)",
+tc_0a43be35, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000100000;
+let Inst{31-21} = 0b01100100011;
+let isSolo = 1;
+}
+def Y2_stop : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"stop($Rs32)",
+tc_0a43be35, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01100100011;
+let isSolo = 1;
+}
+def Y2_swi : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"swi($Rs32)",
+tc_0a43be35, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01100100000;
+let isSoloAX = 1;
+}
def Y2_syncht : HInst<
(outs),
(ins),
@@ -40537,6 +40818,54 @@ let Inst{31-21} = 0b01100111000;
let hasNewValue = 1;
let opNewValue = 0;
}
+def Y2_tlblock : HInst<
+(outs),
+(ins),
+"tlblock",
+tc_7f58404a, TypeCR>, Enc_e3b0c4 {
+let Inst{13-0} = 0b00000000100000;
+let Inst{31-16} = 0b0110110000100000;
+let isSolo = 1;
+}
+def Y2_tlbp : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = tlbp($Rs32)",
+tc_6aa823ab, TypeCR>, Enc_5e2823 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+}
+def Y2_tlbr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = tlbr($Rs32)",
+tc_6aa823ab, TypeCR>, Enc_3a3d62 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101100010;
+let isSolo = 1;
+}
+def Y2_tlbunlock : HInst<
+(outs),
+(ins),
+"tlbunlock",
+tc_7f58404a, TypeCR>, Enc_e3b0c4 {
+let Inst{13-0} = 0b00000001000000;
+let Inst{31-16} = 0b0110110000100000;
+let isSolo = 1;
+}
+def Y2_tlbw : HInst<
+(outs),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"tlbw($Rss32,$Rt32)",
+tc_b2196a3f, TypeCR>, Enc_46f33d {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101100000;
+let isSolo = 1;
+}
def Y2_wait : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -40582,6 +40911,45 @@ let isSoloAX = 1;
let hasSideEffects = 1;
let mayStore = 1;
}
+def Y4_l2tagr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = l2tagr($Rs32)",
+tc_a724463d, TypeST>, Enc_5e2823 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10100100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSoloAX = 1;
+}
+def Y4_l2tagw : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"l2tagw($Rs32,$Rt32)",
+tc_512b1653, TypeST>, Enc_ca3887 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100100010;
+let isSolo = 1;
+}
+def Y4_nmi : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"nmi($Rs32)",
+tc_0a43be35, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000001000000;
+let Inst{31-21} = 0b01100100011;
+let isSolo = 1;
+}
+def Y4_siad : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"siad($Rs32)",
+tc_0a43be35, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000001100000;
+let Inst{31-21} = 0b01100100100;
+let isSoloAX = 1;
+}
def Y4_tfrscpp : HInst<
(outs DoubleRegs:$Rdd32),
(ins SysRegs64:$Sss128),
@@ -40609,6 +40977,27 @@ let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01100010010;
let isSoloAX = 1;
}
+def Y5_ctlbw : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = ctlbw($Rss32,$Rt32)",
+tc_bb78483e, TypeCR>, Enc_3d5b28 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+}
+def Y5_l2cleanidx : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"l2cleanidx($Rs32)",
+tc_d234b61a, TypeST>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100110001;
+let isSoloAX = 1;
+}
def Y5_l2fetch : HInst<
(outs),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
@@ -40621,6 +41010,81 @@ let isSoloAX = 1;
let hasSideEffects = 1;
let mayStore = 1;
}
+def Y5_l2gclean : HInst<
+(outs),
+(ins),
+"l2gclean",
+tc_b3d46584, TypeST>, Enc_e3b0c4 {
+let Inst{13-0} = 0b01000000000000;
+let Inst{31-16} = 0b1010100000100000;
+let isSolo = 1;
+}
+def Y5_l2gcleaninv : HInst<
+(outs),
+(ins),
+"l2gcleaninv",
+tc_b3d46584, TypeST>, Enc_e3b0c4 {
+let Inst{13-0} = 0b01100000000000;
+let Inst{31-16} = 0b1010100000100000;
+let isSolo = 1;
+}
+def Y5_l2gunlock : HInst<
+(outs),
+(ins),
+"l2gunlock",
+tc_b3d46584, TypeST>, Enc_e3b0c4 {
+let Inst{13-0} = 0b00100000000000;
+let Inst{31-16} = 0b1010100000100000;
+let isSolo = 1;
+}
+def Y5_l2invidx : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"l2invidx($Rs32)",
+tc_d234b61a, TypeST>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100110010;
+let isSoloAX = 1;
+}
+def Y5_l2locka : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32),
+"$Pd4 = l2locka($Rs32)",
+tc_a724463d, TypeST>, Enc_48b75f {
+let Inst{13-2} = 0b100000000000;
+let Inst{31-21} = 0b10100000111;
+let isPredicateLate = 1;
+let isSoloAX = 1;
+}
+def Y5_l2unlocka : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"l2unlocka($Rs32)",
+tc_d234b61a, TypeST>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100110011;
+let isSoloAX = 1;
+}
+def Y5_tlbasidi : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"tlbinvasid($Rs32)",
+tc_54f0cee2, TypeCR>, Enc_ecbcc8 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01101100101;
+let isSolo = 1;
+}
+def Y5_tlboc : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = tlboc($Rss32)",
+tc_6aa823ab, TypeCR>, Enc_90cd8b {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+}
def Y6_diag : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -40715,6 +41179,26 @@ let opNewValue = 0;
let hasSideEffects = 1;
let isSolo = 1;
}
+def Y6_l2gcleaninvpa : HInst<
+(outs),
+(ins DoubleRegs:$Rtt32),
+"l2gcleaninv($Rtt32)",
+tc_7c28bd7e, TypeST>, Enc_598f6c {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b1010011011000000;
+let isSolo = 1;
+}
+def Y6_l2gcleanpa : HInst<
+(outs),
+(ins DoubleRegs:$Rtt32),
+"l2gclean($Rtt32)",
+tc_7c28bd7e, TypeST>, Enc_598f6c {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b1010011010100000;
+let isSolo = 1;
+}
def dep_A2_addsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 7dc154aaaea1..e1005296d637 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -115,6 +115,7 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <iterator>
+#include <map>
#include <set>
#include <utility>
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 231ac0825ee1..812e5f7ad930 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -381,7 +381,7 @@ static bool isRestoreCall(unsigned Opc) {
static inline bool isOptNone(const MachineFunction &MF) {
return MF.getFunction().hasOptNone() ||
- MF.getTarget().getOptLevel() == CodeGenOpt::None;
+ MF.getTarget().getOptLevel() == CodeGenOptLevel::None;
}
static inline bool isOptSize(const MachineFunction &MF) {
@@ -1156,7 +1156,7 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
// gdb can't break at the start of the function without it. Will remove if
// this turns out to be a gdb bug.
//
- if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
+ if (MF.getTarget().getOptLevel() == CodeGenOptLevel::None)
return true;
// By default we want to use SP (since it's always there). FP requires
@@ -1269,7 +1269,7 @@ HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
int Offset = MFI.getObjectOffset(FI);
bool HasAlloca = MFI.hasVarSizedObjects();
bool HasExtraAlign = HRI.hasStackRealignment(MF);
- bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
+ bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOptLevel::None;
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
unsigned FrameSize = MFI.getStackSize();
@@ -2584,7 +2584,7 @@ bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
if (!hasFP(MF))
return true;
if (!isOptSize(MF) && !isMinSize(MF))
- if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
+ if (MF.getTarget().getOptLevel() > CodeGenOptLevel::Default)
return true;
// Check if CSI only has double registers, and if the registers form
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 5aad71a0a1c9..f930015026a5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -59,7 +59,7 @@ namespace llvm {
/// createHexagonISelDag - This pass converts a legalized DAG into a
/// Hexagon-specific DAG, ready for instruction scheduling.
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new HexagonDAGToDAGISel(TM, OptLevel);
}
}
@@ -955,17 +955,17 @@ void HexagonDAGToDAGISel::Select(SDNode *N) {
SelectCode(N);
}
-bool HexagonDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool HexagonDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SDValue Inp = Op, Res;
switch (ConstraintID) {
default:
return true;
- case InlineAsm::Constraint_o: // Offsetable.
- case InlineAsm::Constraint_v: // Not offsetable.
- case InlineAsm::Constraint_m: // Memory.
+ case InlineAsm::ConstraintCode::o: // Offsetable.
+ case InlineAsm::ConstraintCode::v: // Not offsetable.
+ case InlineAsm::ConstraintCode::m: // Memory.
if (SelectAddrFI(Inp, Res))
OutOps.push_back(Res);
else
@@ -977,7 +977,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
return false;
}
-
static bool isMemOPCandidate(SDNode *I, SDNode *U) {
// I is an operand of U. Check if U is an arithmetic (binary) operation
// usable in a memop, where the other operand is a loaded value, and the
@@ -1028,15 +1027,11 @@ void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
if (I->getOpcode() != ISD::OR)
continue;
- auto IsZero = [] (const SDValue &V) -> bool {
- if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
- return SC->isZero();
- return false;
- };
- auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
+ auto IsSelect0 = [](const SDValue &Op) -> bool {
if (Op.getOpcode() != ISD::SELECT)
return false;
- return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
+ return isNullConstant(Op.getOperand(1)) ||
+ isNullConstant(Op.getOperand(2));
};
SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
@@ -1050,11 +1045,11 @@ void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
SDValue SX = SOp.getOperand(1);
SDValue SY = SOp.getOperand(2);
SDLoc DLS = SOp;
- if (IsZero(SY)) {
+ if (isNullConstant(SY)) {
SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
DAG.ReplaceAllUsesWith(I, NewSel.getNode());
- } else if (IsZero(SX)) {
+ } else if (isNullConstant(SX)) {
SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
DAG.ReplaceAllUsesWith(I, NewSel.getNode());
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index 061da2a69ba4..8fb1760936e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -14,7 +14,6 @@
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/CodeGen.h"
@@ -36,7 +35,7 @@ public:
HexagonDAGToDAGISel() = delete;
explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
- CodeGenOpt::Level OptLevel)
+ CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, tm, OptLevel), HST(nullptr), HII(nullptr),
HRI(nullptr) {}
@@ -85,7 +84,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
bool tryLoadOfLoadIntrinsic(LoadSDNode *N);
bool SelectBrevLdIntrinsic(SDNode *IntN);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 57b5f9a28794..efb0d405fef2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -2003,10 +2003,10 @@ SmallVector<uint32_t, 8> HvxSelector::getPerfectCompletions(ShuffleMask SM,
if ((unsigned)llvm::popcount(P) < Count) {
// Reset all occurences of P, if there are more occurrences of P
// than there are bits in P.
- for_each(Worklist, [P](unsigned &Q) {
+ for (unsigned &Q : Worklist) {
if (Q == P)
Q = 0;
- });
+ }
}
}
@@ -2341,7 +2341,7 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
}
auto Comps = getPerfectCompletions(SM, LogLen);
- if (llvm::any_of(Comps, [](uint32_t P) { return P == 0; }))
+ if (llvm::is_contained(Comps, 0))
return OpRef::fail();
auto Pick = completeToPerfect(Comps, LogLen);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index db2d2eb9813c..a7d452e7227d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -669,31 +669,32 @@ HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
--NumOps; // Ignore the flag operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag Flags(
+ cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue());
+ unsigned NumVals = Flags.getNumOperandRegisters();
++i; // Skip the ID value.
- switch (InlineAsm::getKind(Flags)) {
- default:
- llvm_unreachable("Bad flags!");
- case InlineAsm::Kind_RegUse:
- case InlineAsm::Kind_Imm:
- case InlineAsm::Kind_Mem:
- i += NumVals;
- break;
- case InlineAsm::Kind_Clobber:
- case InlineAsm::Kind_RegDef:
- case InlineAsm::Kind_RegDefEarlyClobber: {
- for (; NumVals; --NumVals, ++i) {
- Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
- if (Reg != LR)
- continue;
- HMFI.setHasClobberLR(true);
- return Op;
- }
- break;
+ switch (Flags.getKind()) {
+ default:
+ llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind::RegUse:
+ case InlineAsm::Kind::Imm:
+ case InlineAsm::Kind::Mem:
+ i += NumVals;
+ break;
+ case InlineAsm::Kind::Clobber:
+ case InlineAsm::Kind::RegDef:
+ case InlineAsm::Kind::RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
+ if (Reg != LR)
+ continue;
+ HMFI.setHasClobberLR(true);
+ return Op;
+ }
+ break;
+ }
}
- }
}
return Op;
@@ -2717,12 +2718,11 @@ HexagonTargetLowering::extractVectorPred(SDValue VecV, SDValue IdxV,
assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
// Check if this is an extract of the lowest bit.
- if (auto *IdxN = dyn_cast<ConstantSDNode>(IdxV)) {
+ if (isNullConstant(IdxV) && ValTy.getSizeInBits() == 1) {
// Extracting the lowest bit is a no-op, but it changes the type,
// so it must be kept as an operation to avoid errors related to
// type mismatches.
- if (IdxN->isZero() && ValTy.getSizeInBits() == 1)
- return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
+ return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
}
// If the value extracted is a single bit, use tstbit.
@@ -3214,9 +3214,9 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
DAG.getConstant(NeedAlign, dl, MVT::i32))
: BO.first;
SDValue Base0 =
- DAG.getMemBasePlusOffset(BaseNoOff, TypeSize::Fixed(BO.second), dl);
+ DAG.getMemBasePlusOffset(BaseNoOff, TypeSize::getFixed(BO.second), dl);
SDValue Base1 = DAG.getMemBasePlusOffset(
- BaseNoOff, TypeSize::Fixed(BO.second + LoadLen), dl);
+ BaseNoOff, TypeSize::getFixed(BO.second + LoadLen), dl);
MachineMemOperand *WideMMO = nullptr;
if (MachineMemOperand *MMO = LN->getMemOperand()) {
@@ -3847,11 +3847,6 @@ Value *HexagonTargetLowering::emitLoadLinked(IRBuilderBase &Builder,
: Intrinsic::hexagon_L4_loadd_locked;
Function *Fn = Intrinsic::getDeclaration(M, IntID);
- auto PtrTy = cast<PointerType>(Addr->getType());
- PointerType *NewPtrTy =
- Builder.getIntNTy(SZ)->getPointerTo(PtrTy->getAddressSpace());
- Addr = Builder.CreateBitCast(Addr, NewPtrTy);
-
Value *Call = Builder.CreateCall(Fn, Addr, "larx");
return Builder.CreateBitCast(Call, ValueTy);
@@ -3873,8 +3868,6 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
: Intrinsic::hexagon_S4_stored_locked;
Function *Fn = Intrinsic::getDeclaration(M, IntID);
- unsigned AS = Addr->getType()->getPointerAddressSpace();
- Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS));
Val = Builder.CreateBitCast(Val, CastTy);
Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 659997036170..db416a500f59 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -829,8 +829,7 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
return DAG.getUNDEF(VecTy);
if (IsSplat) {
assert(SplatV.getNode());
- auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode());
- if (IdxN && IdxN->isZero())
+ if (isNullConstant(SplatV))
return getZero(dl, VecTy, DAG);
MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
@@ -2975,7 +2974,8 @@ HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
MVT SingleTy = typeSplit(MemTy).first;
SDValue Chain = MemN->getChain();
SDValue Base0 = MemN->getBasePtr();
- SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl);
+ SDValue Base1 =
+ DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
unsigned MemOpc = MemN->getOpcode();
MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 6f0210763bc5..1689b8f1e132 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -4295,11 +4295,9 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency(
///
/// This is a raw interface to the itinerary that may be directly overriden by
/// a target. Use computeOperandLatency to get the best estimate of latency.
-int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI,
- unsigned DefIdx,
- const MachineInstr &UseMI,
- unsigned UseIdx) const {
+std::optional<unsigned> HexagonInstrInfo::getOperandLatency(
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI,
+ unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
// Get DefIdx and UseIdx for super registers.
@@ -4328,9 +4326,9 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
}
- int Latency = TargetInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
- UseMI, UseIdx);
- if (!Latency)
+ std::optional<unsigned> Latency = TargetInstrInfo::getOperandLatency(
+ ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (Latency == 0)
// We should never have 0 cycle latency between two instructions unless
// they can be packetized together. However, this decision can't be made
// here.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 0bc0877f6e70..645b57f4664d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -309,10 +309,11 @@ public:
///
/// This is a raw interface to the itinerary that may be directly overriden by
/// a target. Use computeOperandLatency to get the best estimate of latency.
- int getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI, unsigned DefIdx,
- const MachineInstr &UseMI,
- unsigned UseIdx) const override;
+ std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &DefMI,
+ unsigned DefIdx,
+ const MachineInstr &UseMI,
+ unsigned UseIdx) const override;
/// Decompose the machine operand's target flags into two values - the direct
/// target flag value and any of bit flags that are applied.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index d3d12664228b..51ef72b873a5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -2054,7 +2054,7 @@ bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
// includes the load that feeds the stores. Check for an alias by generating
// the base address and checking everything.
Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
- Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
+ Builder.getPtrTy(SI->getPointerAddressSpace()), ExpPt);
Value *LoadBasePtr = nullptr;
bool Overlap = false;
@@ -2125,7 +2125,7 @@ CleanupAndExit:
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
- Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
+ Builder.getPtrTy(LI->getPointerAddressSpace()), ExpPt);
SmallPtrSet<Instruction*, 2> Ignore2;
Ignore2.insert(SI);
@@ -2263,11 +2263,11 @@ CleanupAndExit:
if (DestVolatile) {
Type *Int32Ty = Type::getInt32Ty(Ctx);
- Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
+ Type *PtrTy = PointerType::get(Ctx, 0);
Type *VoidTy = Type::getVoidTy(Ctx);
Module *M = Func->getParent();
FunctionCallee Fn = M->getOrInsertFunction(
- HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);
+ HexagonVolatileMemcpyName, VoidTy, PtrTy, PtrTy, Int32Ty);
const SCEV *OneS = SE->getConstant(Int32Ty, 1);
const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
@@ -2278,13 +2278,8 @@ CleanupAndExit:
if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT}))
NumWords = Simp;
- Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
- ? StoreBasePtr
- : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
- Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
- ? LoadBasePtr
- : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
- NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
+ NewCall = CondBuilder.CreateCall(Fn,
+ {StoreBasePtr, LoadBasePtr, NumWords});
} else {
NewCall = CondBuilder.CreateMemMove(
StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
index d03c39d949ff..9de50b405445 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -2593,19 +2593,6 @@ class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
: Pat<(Store Value:$val, Addr:$addr),
(MI Addr:$addr, (ValueMod Value:$val))>;
-// Regular stores in the DAG have two operands: value and address.
-// Atomic stores also have two, but they are reversed: address, value.
-// To use atomic stores with the patterns, they need to have their operands
-// swapped. This relies on the knowledge that the F.Fragment uses names
-// "ptr" and "val".
-class AtomSt<PatFrag F>
- : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode,
- F.OperandTransform> {
- let IsAtomic = F.IsAtomic;
- let MemoryVT = F.MemoryVT;
-}
-
-
def IMM_BYTE : SDNodeXForm<imm, [{
// -1 can be represented as 255, etc.
// assigning to a byte restores our desired signed value.
@@ -2726,15 +2713,15 @@ let AddedComplexity = 120 in {
def: Storea_pat<store, V2I32, addrgp, S2_storerdgp>;
def: Storea_pat<store, F32, addrgp, S2_storerigp>;
def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
- def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
- def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
- def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
- def: Storea_pat<AtomSt<atomic_store_32>, V4I8, addrgp, S2_storerigp>;
- def: Storea_pat<AtomSt<atomic_store_32>, V2I16, addrgp, S2_storerigp>;
- def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
- def: Storea_pat<AtomSt<atomic_store_64>, V8I8, addrgp, S2_storerdgp>;
- def: Storea_pat<AtomSt<atomic_store_64>, V4I16, addrgp, S2_storerdgp>;
- def: Storea_pat<AtomSt<atomic_store_64>, V2I32, addrgp, S2_storerdgp>;
+ def: Storea_pat<atomic_store_8, I32, addrgp, S2_storerbgp>;
+ def: Storea_pat<atomic_store_16, I32, addrgp, S2_storerhgp>;
+ def: Storea_pat<atomic_store_32, I32, addrgp, S2_storerigp>;
+ def: Storea_pat<atomic_store_32, V4I8, addrgp, S2_storerigp>;
+ def: Storea_pat<atomic_store_32, V2I16, addrgp, S2_storerigp>;
+ def: Storea_pat<atomic_store_64, I64, addrgp, S2_storerdgp>;
+ def: Storea_pat<atomic_store_64, V8I8, addrgp, S2_storerdgp>;
+ def: Storea_pat<atomic_store_64, V4I16, addrgp, S2_storerdgp>;
+ def: Storea_pat<atomic_store_64, V2I32, addrgp, S2_storerdgp>;
def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>;
def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>;
@@ -2755,15 +2742,15 @@ let AddedComplexity = 110 in {
def: Storea_pat<store, V2I32, anyimm3, PS_storerdabs>;
def: Storea_pat<store, F32, anyimm2, PS_storeriabs>;
def: Storea_pat<store, F64, anyimm3, PS_storerdabs>;
- def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>;
- def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>;
- def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>;
- def: Storea_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, PS_storeriabs>;
- def: Storea_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, PS_storeriabs>;
- def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>;
- def: Storea_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, PS_storerdabs>;
- def: Storea_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, PS_storerdabs>;
- def: Storea_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, PS_storerdabs>;
+ def: Storea_pat<atomic_store_8, I32, anyimm0, PS_storerbabs>;
+ def: Storea_pat<atomic_store_16, I32, anyimm1, PS_storerhabs>;
+ def: Storea_pat<atomic_store_32, I32, anyimm2, PS_storeriabs>;
+ def: Storea_pat<atomic_store_32, V4I8, anyimm2, PS_storeriabs>;
+ def: Storea_pat<atomic_store_32, V2I16, anyimm2, PS_storeriabs>;
+ def: Storea_pat<atomic_store_64, I64, anyimm3, PS_storerdabs>;
+ def: Storea_pat<atomic_store_64, V8I8, anyimm3, PS_storerdabs>;
+ def: Storea_pat<atomic_store_64, V4I16, anyimm3, PS_storerdabs>;
+ def: Storea_pat<atomic_store_64, V2I32, anyimm3, PS_storerdabs>;
def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>;
def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>;
@@ -2918,15 +2905,15 @@ let AddedComplexity = 40 in {
defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>;
defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>;
- defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>;
- defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>;
- defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>;
- defm: Storexi_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, S2_storeri_io>;
- defm: Storexi_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, S2_storeri_io>;
- defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>;
- defm: Storexi_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, S2_storerd_io>;
- defm: Storexi_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, S2_storerd_io>;
- defm: Storexi_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<atomic_store_8, I32, anyimm0, S2_storerb_io>;
+ defm: Storexi_pat<atomic_store_16, I32, anyimm1, S2_storerh_io>;
+ defm: Storexi_pat<atomic_store_32, I32, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<atomic_store_32, V4I8, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<atomic_store_32, V2I16, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<atomic_store_64, I64, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<atomic_store_64, V8I8, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<atomic_store_64, V4I16, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<atomic_store_64, V2I32, anyimm3, S2_storerd_io>;
}
// Reg+Reg
@@ -2977,15 +2964,15 @@ let AddedComplexity = 10 in {
def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_32>, V4I8, S2_storeri_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_32>, V2I16, S2_storeri_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_64>, V8I8, S2_storerd_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_64>, V4I16, S2_storerd_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_64>, V2I32, S2_storerd_io>;
+ def: Storexi_base_pat<atomic_store_8, I32, S2_storerb_io>;
+ def: Storexi_base_pat<atomic_store_16, I32, S2_storerh_io>;
+ def: Storexi_base_pat<atomic_store_32, I32, S2_storeri_io>;
+ def: Storexi_base_pat<atomic_store_32, V4I8, S2_storeri_io>;
+ def: Storexi_base_pat<atomic_store_32, V2I16, S2_storeri_io>;
+ def: Storexi_base_pat<atomic_store_64, I64, S2_storerd_io>;
+ def: Storexi_base_pat<atomic_store_64, V8I8, S2_storerd_io>;
+ def: Storexi_base_pat<atomic_store_64, V4I16, S2_storerd_io>;
+ def: Storexi_base_pat<atomic_store_64, V2I32, S2_storerd_io>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 8917be1b5626..4df811f188df 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -116,13 +116,13 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
for (StringRef F : llvm::reverse(Features.getFeatures())) {
- if (F.startswith("+hvxv"))
+ if (F.starts_with("+hvxv"))
return F;
}
for (StringRef F : llvm::reverse(Features.getFeatures())) {
if (F == "-hvx")
return StringRef();
- if (F.startswith("+hvx") || F == "-hvx")
+ if (F.starts_with("+hvx") || F == "-hvx")
return F.take_front(4); // Return "+hvx" or "-hvx".
}
return StringRef();
@@ -130,7 +130,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
bool AddQFloat = false;
StringRef HvxVer = getHvxVersion(FS);
- if (HvxVer.startswith("+hvxv")) {
+ if (HvxVer.starts_with("+hvxv")) {
int Ver = 0;
if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
AddQFloat = true;
@@ -429,7 +429,7 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
/// Enable use of alias analysis during code generation (during MI
/// scheduling, DAGCombine, etc.).
bool HexagonSubtarget::useAA() const {
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOptLevel::None)
return true;
return false;
}
@@ -467,7 +467,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx,
// default.
if ((DstInst->isRegSequence() || DstInst->isCopy())) {
Register DReg = DstInst->getOperand(0).getReg();
- int DLatency = -1;
+ std::optional<unsigned> DLatency;
for (const auto &DDep : Dst->Succs) {
MachineInstr *DDst = DDep.getSUnit()->getInstr();
int UseIdx = -1;
@@ -482,21 +482,21 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx,
if (UseIdx == -1)
continue;
- int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0,
- *DDst, UseIdx));
+ std::optional<unsigned> Latency =
+ InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
+
// Set DLatency for the first time.
- DLatency = (DLatency == -1) ? Latency : DLatency;
+ if (!DLatency)
+ DLatency = Latency;
// For multiple uses, if the Latency is different across uses, reset
// DLatency.
if (DLatency != Latency) {
- DLatency = -1;
+ DLatency = std::nullopt;
break;
}
}
-
- DLatency = std::max(DLatency, 0);
- Dep.setLatency((unsigned)DLatency);
+ Dep.setLatency(DLatency ? *DLatency : 0);
}
// Try to schedule uses near definitions to generate .cur.
@@ -581,15 +581,16 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
const MachineOperand &MO = DstI->getOperand(OpNum);
if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
- int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcI,
- DefIdx, *DstI, OpNum));
+ std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
+ &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
// For some instructions (ex: COPY), we might end up with < 0 latency
// as they don't have any Itinerary class associated with them.
- Latency = std::max(Latency, 0);
+ if (!Latency)
+ Latency = 0;
bool IsArtificial = I.isArtificial();
- Latency = updateLatency(*SrcI, *DstI, IsArtificial, Latency);
- I.setLatency(Latency);
+ Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
+ I.setLatency(*Latency);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index f5b4461a5d24..e56007ee21e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -70,7 +70,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
public:
Hexagon::ArchEnum HexagonArchVersion;
Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::NoArch;
- CodeGenOpt::Level OptLevel;
+ CodeGenOptLevel OptLevel;
/// True if the target should use Back-Skip-Back scheduling. This is the
/// default for V60.
bool UseBSBScheduling;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 9654c9be303f..590e464e1653 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -177,7 +177,7 @@ namespace llvm {
FunctionPass *createHexagonGenPredicate();
FunctionPass *createHexagonHardwareLoops();
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
FunctionPass *createHexagonLoopRescheduling();
FunctionPass *createHexagonNewValueJump();
FunctionPass *createHexagonOptAddrMode();
@@ -226,7 +226,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
// Specify the vector alignment explicitly. For v512x1, the calculated
// alignment would be 512*alignment(i1), which is 512 bytes, instead of
// the required minimum of 64 bytes.
@@ -237,7 +237,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
"v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small),
- (HexagonNoOpt ? CodeGenOpt::None : OL)),
+ (HexagonNoOpt ? CodeGenOptLevel::None : OL)),
TLOF(std::make_unique<HexagonTargetObjectFile>()) {
initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
initAsmInfo();
@@ -330,7 +330,7 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
void HexagonPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
- bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+ bool NoOpt = (getOptLevel() == CodeGenOptLevel::None);
if (!NoOpt) {
if (EnableInstSimplify)
@@ -363,7 +363,7 @@ void HexagonPassConfig::addIRPasses() {
bool HexagonPassConfig::addInstSelector() {
HexagonTargetMachine &TM = getHexagonTargetMachine();
- bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+ bool NoOpt = (getOptLevel() == CodeGenOptLevel::None);
if (!NoOpt)
addPass(createHexagonOptimizeSZextends());
@@ -401,7 +401,7 @@ bool HexagonPassConfig::addInstSelector() {
}
void HexagonPassConfig::addPreRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
if (EnableCExtOpt)
addPass(createHexagonConstExtenders());
if (EnableExpandCondsets)
@@ -411,12 +411,12 @@ void HexagonPassConfig::addPreRegAlloc() {
if (!DisableHardwareLoops)
addPass(createHexagonHardwareLoops());
}
- if (TM->getOptLevel() >= CodeGenOpt::Default)
+ if (TM->getOptLevel() >= CodeGenOptLevel::Default)
addPass(&MachinePipelinerID);
}
void HexagonPassConfig::addPostRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
if (EnableRDFOpt)
addPass(createHexagonRDFOpt());
if (!DisableHexagonCFGOpt)
@@ -428,13 +428,13 @@ void HexagonPassConfig::addPostRegAlloc() {
void HexagonPassConfig::addPreSched2() {
addPass(createHexagonCopyToCombine());
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(&IfConverterID);
addPass(createHexagonSplitConst32AndConst64());
}
void HexagonPassConfig::addPreEmitPass() {
- bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+ bool NoOpt = (getOptLevel() == CodeGenOptLevel::None);
if (!NoOpt)
addPass(createHexagonNewValueJump());
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 208b47d765c7..dddd79ad1fcf 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -21,8 +21,6 @@
namespace llvm {
-class Module;
-
class HexagonTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap;
@@ -31,7 +29,7 @@ public:
HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~HexagonTargetMachine() override;
const HexagonSubtarget *getSubtargetImpl(const Function &F) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 8355de4cfe96..a7ac24e25a5f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -140,7 +140,7 @@ MCSection *HexagonTargetObjectFile::SelectSectionForGlobal(
// If the lookup table is used by more than one function, do not place
// it in text section.
- if (EmitLutInText && GO->getName().startswith("switch.table")) {
+ if (EmitLutInText && GO->getName().starts_with("switch.table")) {
if (const Function *Fn = getLutUsedFunction(GO))
return selectSectionForLookupTable(GO, TM, Fn);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index b2a55219df06..aa12e9d513d4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -706,7 +706,8 @@ auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,
Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(), "pti");
Value *Mask = HVC.getConstInt(-Alignment);
Value *And = Builder.CreateAnd(remap(AsInt), Mask, "and");
- return Builder.CreateIntToPtr(And, ValTy->getPointerTo(), "itp");
+ return Builder.CreateIntToPtr(
+ And, PointerType::getUnqual(ValTy->getContext()), "itp");
}
auto AlignVectors::createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index 2b004a9c5ad4..f4f966e772b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -552,7 +552,7 @@ void HexagonVectorLoopCarriedReuse::reuseValue() {
}
BasicBlock *BB = BEInst->getParent();
IRBuilder<> IRB(BB);
- IRB.SetInsertPoint(BB->getFirstNonPHI());
+ IRB.SetInsertPoint(BB, BB->getFirstNonPHIIt());
Value *BEVal = BEInst;
PHINode *NewPhi;
for (int i = Iterations-1; i >=0 ; --i) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 12c84ceb5fd2..f9a0ba3608e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -62,10 +62,9 @@ class HexagonAsmBackend : public MCAsmBackend {
public:
HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI,
StringRef CPU)
- : MCAsmBackend(support::little), OSABI(OSABI), CPU(CPU), relaxedCnt(0),
- MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *),
- Extender(nullptr), MaxPacketSize(HexagonMCInstrInfo::packetSize(CPU))
- {}
+ : MCAsmBackend(llvm::endianness::little), OSABI(OSABI), CPU(CPU),
+ relaxedCnt(0), MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *),
+ Extender(nullptr), MaxPacketSize(HexagonMCInstrInfo::packetSize(CPU)) {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
@@ -203,7 +202,8 @@ public:
}
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override {
switch(Fixup.getTargetKind()) {
default:
llvm_unreachable("Unknown Fixup Kind!");
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index b83931eb88ac..42d91f559f51 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -17,7 +17,6 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/SMLoc.h"
#include <set>
#include <utility>
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 8bf4d0a41298..96ec81cd86ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -443,7 +443,7 @@ void HexagonMCCodeEmitter::encodeSingleInstruction(
Binary |= SubBits0 | (SubBits1 << 16);
}
- support::endian::write<uint32_t>(CB, Binary, support::little);
+ support::endian::write<uint32_t>(CB, Binary, llvm::endianness::little);
++MCNumEmitted;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index cf6fa78a2005..fffd5abd9f8b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -253,7 +253,7 @@ public:
if (!Duplex.second.empty()) {
OS << Indent << Duplex.first << Separator;
InstTxt = Duplex.second;
- } else if (!HeadTail.first.trim().startswith("immext")) {
+ } else if (!HeadTail.first.trim().starts_with("immext")) {
InstTxt = Duplex.first;
}
if (!InstTxt.empty())
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 3932077c08f1..ffb81bca208d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -16,7 +16,6 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include <cstdint>
-#include <string>
#define Hexagon_POINTER_SIZE 4
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 2bbc2f644f58..c54c9070bfea 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -31,7 +31,6 @@
#include <cassert>
#include <optional>
#include <utility>
-#include <vector>
#define DEBUG_TYPE "hexagon-shuffle"
@@ -653,7 +652,7 @@ HexagonShuffler::tryAuction(HexagonPacketSummary const &Summary) {
bool HexagonShuffler::shuffle() {
if (size() > HEXAGON_PACKET_SIZE) {
- // Ignore a packet with with more than what a packet can hold
+ // Ignore a packet with more than what a packet can hold
// or with compound or duplex insns for now.
reportError("invalid instruction packet");
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 10b01e75ace6..838672d34741 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -16,7 +16,6 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 6b74423f9bc5..d0a89ad72b4c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -65,10 +65,9 @@ class LanaiAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool parseRegister(MCRegister &RegNum, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
@@ -79,10 +78,9 @@ class LanaiAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "LanaiGenAsmMatcher.inc"
- OperandMatchResultTy parseOperand(OperandVector *Operands,
- StringRef Mnemonic);
+ ParseStatus parseOperand(OperandVector *Operands, StringRef Mnemonic);
- OperandMatchResultTy parseMemoryOperand(OperandVector &Operands);
+ ParseStatus parseMemoryOperand(OperandVector &Operands);
public:
LanaiAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
@@ -725,17 +723,16 @@ bool LanaiAsmParser::parseRegister(MCRegister &RegNum, SMLoc &StartLoc,
return (Op == nullptr);
}
-OperandMatchResultTy LanaiAsmParser::tryParseRegister(MCRegister &RegNum,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus LanaiAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
std::unique_ptr<LanaiOperand> Op = parseRegister(/*RestoreOnFailure=*/true);
if (Op == nullptr)
- return MatchOperand_NoMatch;
- RegNum = Op->getReg();
- return MatchOperand_Success;
+ return ParseStatus::NoMatch;
+ Reg = Op->getReg();
+ return ParseStatus::Success;
}
std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
@@ -885,8 +882,7 @@ bool shouldBeSls(const LanaiOperand &Op) {
}
// Matches memory operand. Returns true if error encountered.
-OperandMatchResultTy
-LanaiAsmParser::parseMemoryOperand(OperandVector &Operands) {
+ParseStatus LanaiAsmParser::parseMemoryOperand(OperandVector &Operands) {
// Try to match a memory operand.
// The memory operands are of the form:
// (1) Register|Immediate|'' '[' '*'? Register '*'? ']' or
@@ -916,13 +912,13 @@ LanaiAsmParser::parseMemoryOperand(OperandVector &Operands) {
// Only continue if next token is '['
if (Lexer.isNot(AsmToken::LBrac)) {
if (!Op)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
// The start of this custom parsing overlaps with register/immediate so
// consider this as a successful match of an operand of that type as the
// token stream can't be rewound to allow them to match separately.
Operands.push_back(std::move(Op));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Parser.Lex(); // Eat the '['.
@@ -944,22 +940,19 @@ LanaiAsmParser::parseMemoryOperand(OperandVector &Operands) {
if (shouldBeSls(*Op)) {
Operands.push_back(LanaiOperand::MorphToMemImm(std::move(Op)));
} else {
- if (!Op->isLoImm16Signed()) {
- Error(Parser.getTok().getLoc(),
- "Memory address is not word "
- "aligned and larger than class RM can handle");
- return MatchOperand_ParseFail;
- }
+ if (!Op->isLoImm16Signed())
+ return Error(Parser.getTok().getLoc(),
+ "Memory address is not word aligned and larger than "
+ "class RM can handle");
Operands.push_back(LanaiOperand::MorphToMemRegImm(
Lanai::R0, std::move(Op), LPAC::ADD));
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
}
- Error(Parser.getTok().getLoc(),
- "Unknown operand, expected register or immediate");
- return MatchOperand_ParseFail;
+ return Error(Parser.getTok().getLoc(),
+ "Unknown operand, expected register or immediate");
}
BaseReg = Op->getReg();
@@ -979,20 +972,16 @@ LanaiAsmParser::parseMemoryOperand(OperandVector &Operands) {
Offset = LanaiOperand::createImm(OffsetConstExpr, Start, End);
}
} else {
- if (Offset || OffsetValue != 0) {
- Error(Parser.getTok().getLoc(), "Expected ']'");
- return MatchOperand_ParseFail;
- }
+ if (Offset || OffsetValue != 0)
+ return Error(Parser.getTok().getLoc(), "Expected ']'");
// Parse operator
AluOp = parseAluOperator(PreOp, PostOp);
// Second form requires offset register
Offset = parseRegister();
- if (!BaseReg || Lexer.isNot(AsmToken::RBrac)) {
- Error(Parser.getTok().getLoc(), "Expected ']'");
- return MatchOperand_ParseFail;
- }
+ if (!BaseReg || Lexer.isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "Expected ']'");
Parser.Lex(); // Eat the ']'.
}
@@ -1001,33 +990,31 @@ LanaiAsmParser::parseMemoryOperand(OperandVector &Operands) {
AluOp = AluWithPrePost(AluOp, PreOp, PostOp);
// Ensure immediate offset is not too large
- if (Offset->isImm() && !Offset->isLoImm16Signed()) {
- Error(Parser.getTok().getLoc(),
- "Memory address is not word "
- "aligned and larger than class RM can handle");
- return MatchOperand_ParseFail;
- }
+ if (Offset->isImm() && !Offset->isLoImm16Signed())
+ return Error(Parser.getTok().getLoc(),
+ "Memory address is not word aligned and larger than class RM "
+ "can handle");
Operands.push_back(
Offset->isImm()
? LanaiOperand::MorphToMemRegImm(BaseReg, std::move(Offset), AluOp)
: LanaiOperand::MorphToMemRegReg(BaseReg, std::move(Offset), AluOp));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Looks at a token type and creates the relevant operand from this
// information, adding to operands.
// If operand was parsed, returns false, else true.
-OperandMatchResultTy
-LanaiAsmParser::parseOperand(OperandVector *Operands, StringRef Mnemonic) {
+ParseStatus LanaiAsmParser::parseOperand(OperandVector *Operands,
+ StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
- OperandMatchResultTy Result = MatchOperandParserImpl(*Operands, Mnemonic);
+ ParseStatus Result = MatchOperandParserImpl(*Operands, Mnemonic);
- if (Result == MatchOperand_Success)
+ if (Result.isSuccess())
return Result;
- if (Result == MatchOperand_ParseFail) {
+ if (Result.isFailure()) {
Parser.eatToEndOfStatement();
return Result;
}
@@ -1043,13 +1030,13 @@ LanaiAsmParser::parseOperand(OperandVector *Operands, StringRef Mnemonic) {
if (!Op) {
Error(Parser.getTok().getLoc(), "Unknown operand");
Parser.eatToEndOfStatement();
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
// Push back parsed operand into list of operands
Operands->push_back(std::move(Op));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Split the mnemonic into ASM operand, conditional code and instruction
@@ -1061,15 +1048,15 @@ StringRef LanaiAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
StringRef Mnemonic = Name;
bool IsBRR = false;
- if (Name.endswith(".r")) {
+ if (Name.ends_with(".r")) {
Mnemonic = Name.substr(0, Name.size() - 2);
IsBRR = true;
}
// Match b?? and s?? (BR, BRR, and SCC instruction classes).
if (Mnemonic[0] == 'b' ||
- (Mnemonic[0] == 's' && !Mnemonic.startswith("sel") &&
- !Mnemonic.startswith("st"))) {
+ (Mnemonic[0] == 's' && !Mnemonic.starts_with("sel") &&
+ !Mnemonic.starts_with("st"))) {
// Parse instructions with a conditional code. For example, 'bne' is
// converted into two operands 'b' and 'ne'.
LPCC::CondCode CondCode =
@@ -1090,8 +1077,8 @@ StringRef LanaiAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
// We ignore .f here and assume they are flag-setting operations, not
// conditional codes (except for select instructions where flag-setting
// variants are not yet implemented).
- if (Mnemonic.startswith("sel") ||
- (!Mnemonic.endswith(".f") && !Mnemonic.startswith("st"))) {
+ if (Mnemonic.starts_with("sel") ||
+ (!Mnemonic.ends_with(".f") && !Mnemonic.starts_with("st"))) {
LPCC::CondCode CondCode = LPCC::suffixToLanaiCondCode(Mnemonic);
if (CondCode != LPCC::UNKNOWN) {
size_t Next = Mnemonic.rfind('.', Name.size());
@@ -1100,7 +1087,7 @@ StringRef LanaiAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
// expected by the generated matcher). If the mnemonic starts with 'sel'
// then include the period as part of the mnemonic, else don't include it
// as part of the mnemonic.
- if (Mnemonic.startswith("sel")) {
+ if (Mnemonic.starts_with("sel")) {
Mnemonic = Mnemonic.substr(0, Next + 1);
} else {
Mnemonic = Mnemonic.substr(0, Next);
@@ -1189,7 +1176,7 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
return false;
// Parse first operand
- if (parseOperand(&Operands, Mnemonic) != MatchOperand_Success)
+ if (!parseOperand(&Operands, Mnemonic).isSuccess())
return true;
// If it is a st instruction with one 1 operand then it is a "store true".
@@ -1207,7 +1194,7 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
// If the instruction is a bt instruction with 1 operand (in assembly) then it
// is an unconditional branch instruction and the first two elements of
// operands need to be merged.
- if (Lexer.is(AsmToken::EndOfStatement) && Name.startswith("bt") &&
+ if (Lexer.is(AsmToken::EndOfStatement) && Name.starts_with("bt") &&
Operands.size() == 3) {
Operands.erase(Operands.begin(), Operands.begin() + 2);
Operands.insert(Operands.begin(), LanaiOperand::CreateToken("bt", NameLoc));
@@ -1219,7 +1206,7 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
Lex();
// Parse next operand
- if (parseOperand(&Operands, Mnemonic) != MatchOperand_Success)
+ if (!parseOperand(&Operands, Mnemonic).isSuccess())
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAluCode.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAluCode.h
index 69be05542723..0494dda459bd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAluCode.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAluCode.h
@@ -14,7 +14,6 @@
#define LLVM_LIB_TARGET_LANAI_LANAIALUCODE_H
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
@@ -114,33 +113,6 @@ inline static AluCode stringToLanaiAluCode(StringRef S) {
.Case("sha", SRA)
.Default(UNKNOWN);
}
-
-inline static AluCode isdToLanaiAluCode(ISD::NodeType Node_type) {
- switch (Node_type) {
- case ISD::ADD:
- return AluCode::ADD;
- case ISD::ADDE:
- return AluCode::ADDC;
- case ISD::SUB:
- return AluCode::SUB;
- case ISD::SUBE:
- return AluCode::SUBB;
- case ISD::AND:
- return AluCode::AND;
- case ISD::OR:
- return AluCode::OR;
- case ISD::XOR:
- return AluCode::XOR;
- case ISD::SHL:
- return AluCode::SHL;
- case ISD::SRL:
- return AluCode::SRL;
- case ISD::SRA:
- return AluCode::SRA;
- default:
- return AluCode::UNKNOWN;
- }
-}
} // namespace LPAC
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
index d142fd3a414f..c66d9166828c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -123,8 +123,8 @@ bool LanaiAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &FlagsOP = MI->getOperand(OpNo - 1);
if (!FlagsOP.isImm())
return true;
- unsigned Flags = FlagsOP.getImm();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag Flags(FlagsOP.getImm());
+ const unsigned NumVals = Flags.getNumOperandRegisters();
if (NumVals != 2)
return true;
unsigned RegOp = OpNo + 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
index b1ecebe24b18..6f5495ac00e1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
@@ -59,7 +59,8 @@ public:
return SelectionDAGISel::runOnMachineFunction(MF);
}
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintCode,
std::vector<SDValue> &OutOps) override;
private:
@@ -212,6 +213,37 @@ bool LanaiDAGToDAGISel::selectAddrSpls(SDValue Addr, SDValue &Base,
return selectAddrRiSpls(Addr, Base, Offset, AluOp, /*RiMode=*/false);
}
+namespace llvm {
+namespace LPAC {
+static AluCode isdToLanaiAluCode(ISD::NodeType Node_type) {
+ switch (Node_type) {
+ case ISD::ADD:
+ return AluCode::ADD;
+ case ISD::ADDE:
+ return AluCode::ADDC;
+ case ISD::SUB:
+ return AluCode::SUB;
+ case ISD::SUBE:
+ return AluCode::SUBB;
+ case ISD::AND:
+ return AluCode::AND;
+ case ISD::OR:
+ return AluCode::OR;
+ case ISD::XOR:
+ return AluCode::XOR;
+ case ISD::SHL:
+ return AluCode::SHL;
+ case ISD::SRL:
+ return AluCode::SRL;
+ case ISD::SRA:
+ return AluCode::SRA;
+ default:
+ return AluCode::UNKNOWN;
+ }
+}
+} // namespace LPAC
+} // namespace llvm
+
bool LanaiDAGToDAGISel::selectAddrRr(SDValue Addr, SDValue &R1, SDValue &R2,
SDValue &AluOp) {
// if Address is FI, get the TargetFrameIndex.
@@ -253,12 +285,13 @@ bool LanaiDAGToDAGISel::selectAddrRr(SDValue Addr, SDValue &R1, SDValue &R2,
}
bool LanaiDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintCode, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintCode,
+ std::vector<SDValue> &OutOps) {
SDValue Op0, Op1, AluOp;
switch (ConstraintCode) {
default:
return true;
- case InlineAsm::Constraint_m: // memory
+ case InlineAsm::ConstraintCode::m: // memory
if (!selectAddrRr(Op, Op0, Op1, AluOp) &&
!selectAddrRi(Op, Op0, Op1, AluOp))
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 157f86027433..cbb5c2b998e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -278,12 +278,12 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
// vector. If it is invalid, don't add anything to Ops.
void LanaiTargetLowering::LowerAsmOperandForConstraint(
- SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Only support length 1 constraints for now.
- if (Constraint.length() > 1)
+ if (Constraint.size() > 1)
return;
char ConstraintLetter = Constraint[0];
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h
index ea1159db9e59..5fa5444b5161 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h
@@ -103,7 +103,7 @@ public:
ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo &Info,
const char *Constraint) const override;
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index ce79bdafc425..2442d7ee923f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -24,7 +24,6 @@
#include "LanaiAluCode.h"
#include "LanaiTargetMachine.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
index 89d9eba7f891..5168dddd9301 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -34,8 +34,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool supportsBackwardScavenger() const override { return true; }
-
bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
index 37a4843e1bc4..11cd7f53505e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
@@ -40,7 +40,7 @@ LanaiSubtarget::LanaiSubtarget(const Triple &TargetTriple, StringRef Cpu,
StringRef FeatureString, const TargetMachine &TM,
const TargetOptions & /*Options*/,
CodeModel::Model /*CodeModel*/,
- CodeGenOpt::Level /*OptLevel*/)
+ CodeGenOptLevel /*OptLevel*/)
: LanaiGenSubtargetInfo(TargetTriple, Cpu, /*TuneCPU*/ Cpu, FeatureString),
FrameLowering(initializeSubtargetDependencies(Cpu, FeatureString)),
TLInfo(TM, *this) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h
index 7955bfe0d8b9..0a229063ab7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.h
@@ -33,7 +33,7 @@ public:
LanaiSubtarget(const Triple &TargetTriple, StringRef Cpu,
StringRef FeatureString, const TargetMachine &TM,
const TargetOptions &Options, CodeModel::Model CodeModel,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
// ParseSubtargetFeatures - Parses features string setting specified
// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index 80a60955c48b..039182b3ffe6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -58,7 +58,7 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
LanaiTargetMachine::LanaiTargetMachine(
const Target &T, const Triple &TT, StringRef Cpu, StringRef FeatureString,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CodeModel, CodeGenOpt::Level OptLevel,
+ std::optional<CodeModel::Model> CodeModel, CodeGenOptLevel OptLevel,
bool JIT)
: LLVMTargetMachine(T, computeDataLayout(), TT, Cpu, FeatureString, Options,
getEffectiveRelocModel(RM),
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h
index 85e3b3f261fe..c5c351b36b31 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h
@@ -32,7 +32,7 @@ public:
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CodeModel,
- CodeGenOpt::Level OptLevel, bool JIT);
+ CodeGenOptLevel OptLevel, bool JIT);
const LanaiSubtarget *
getSubtargetImpl(const llvm::Function & /*Fn*/) const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp
index a421f3156153..a366a89af863 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp
@@ -80,7 +80,7 @@ bool LanaiTargetObjectFile::isGlobalInSmallSectionImpl(
// Global values placed in sections starting with .ldata do not fit in
// 21-bits, so always use large memory access for them. FIXME: This is a
// workaround for a tool limitation.
- if (GVA->getSection().startswith(".ldata"))
+ if (GVA->getSection().starts_with(".ldata"))
return false;
if (TM.getCodeModel() == CodeModel::Small)
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index 3c2a3ac69224..08ca577a4785 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -46,7 +46,7 @@ class LanaiAsmBackend : public MCAsmBackend {
public:
LanaiAsmBackend(const Target &T, Triple::OSType OST)
- : MCAsmBackend(support::big), OSType(OST) {}
+ : MCAsmBackend(llvm::endianness::big), OSType(OST) {}
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index 919d43ad9b9b..a21518e44116 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -26,7 +26,7 @@ public:
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbol &SD,
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
unsigned Type) const override;
};
@@ -72,7 +72,8 @@ unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/,
return Type;
}
-bool LanaiELFObjectWriter::needsRelocateWithSymbol(const MCSymbol & /*SD*/,
+bool LanaiELFObjectWriter::needsRelocateWithSymbol(const MCValue &,
+ const MCSymbol &,
unsigned Type) const {
switch (Type) {
case ELF::R_LANAI_21:
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index 5f9c2a100223..d09966e3695c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -178,7 +178,7 @@ void LanaiMCCodeEmitter::encodeInstruction(
unsigned Value = getBinaryCodeForInstr(Inst, Fixups, SubtargetInfo);
++MCNumEmitted; // Keep track of the number of emitted insns.
- support::endian::write<uint32_t>(CB, Value, support::big);
+ support::endian::write<uint32_t>(CB, Value, llvm::endianness::big);
}
// Encode Lanai Memory Operand
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index 94d530306536..276374afee38 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -43,10 +43,9 @@ class LoongArchAsmParser : public MCTargetAsmParser {
using InstSeq = SmallVector<Inst>;
/// Parse a register as used in CFI directives.
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -549,14 +548,14 @@ static bool matchRegisterNameHelper(MCRegister &RegNo, StringRef Name) {
return RegNo == LoongArch::NoRegister;
}
-bool LoongArchAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool LoongArchAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
return Error(getLoc(), "invalid register number");
}
-OperandMatchResultTy LoongArchAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus LoongArchAsmParser::tryParseRegister(MCRegister &Reg,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
llvm_unreachable("Unimplemented function.");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArch.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArch.td
index 0675caa3b601..75b65fe69f26 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArch.td
@@ -102,6 +102,10 @@ def FeatureUAL
: SubtargetFeature<"ual", "HasUAL", "true",
"Allow memory accesses to be unaligned">;
+def FeatureRelax
+ : SubtargetFeature<"relax", "HasLinkerRelax", "true",
+ "Enable Linker relaxation">;
+
//===----------------------------------------------------------------------===//
// Registers, instruction descriptions ...
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index 51df0463e235..18a532b55ee5 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
Register ScratchReg = MI.getOperand(1).getReg();
Register AddrReg = MI.getOperand(2).getReg();
Register IncrReg = MI.getOperand(3).getReg();
- AtomicOrdering Ordering =
- static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
// .loop:
- // if(Ordering != AtomicOrdering::Monotonic)
- // dbar 0
// ll.[w|d] dest, (addr)
// binop scratch, dest, val
// sc.[w|d] scratch, scratch, (addr)
// beqz scratch, loop
- if (Ordering != AtomicOrdering::Monotonic)
- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopMBB, DL,
TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
.addReg(AddrReg)
@@ -251,12 +245,8 @@ static void doMaskedAtomicBinOpExpansion(
Register AddrReg = MI.getOperand(2).getReg();
Register IncrReg = MI.getOperand(3).getReg();
Register MaskReg = MI.getOperand(4).getReg();
- AtomicOrdering Ordering =
- static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
// .loop:
- // if(Ordering != AtomicOrdering::Monotonic)
- // dbar 0
// ll.w destreg, (alignedaddr)
// binop scratch, destreg, incr
// xor scratch, destreg, scratch
@@ -264,8 +254,6 @@ static void doMaskedAtomicBinOpExpansion(
// xor scratch, destreg, scratch
// sc.w scratch, scratch, (alignedaddr)
// beqz scratch, loop
- if (Ordering != AtomicOrdering::Monotonic)
- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
.addReg(AddrReg)
.addImm(0);
@@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
// Insert new MBBs.
MF->insert(++MBB.getIterator(), LoopHeadMBB);
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
- MF->insert(++LoopTailMBB->getIterator(), TailMBB);
- MF->insert(++TailMBB->getIterator(), DoneMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
// Set up successors and transfer remaining instructions to DoneMBB.
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
LoopHeadMBB->addSuccessor(LoopTailMBB);
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
LoopTailMBB->addSuccessor(LoopHeadMBB);
- LoopTailMBB->addSuccessor(TailMBB);
- TailMBB->addSuccessor(DoneMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
DoneMBB->transferSuccessors(&MBB);
MBB.addSuccessor(LoopHeadMBB);
@@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
//
// .loophead:
- // dbar 0
// ll.w destreg, (alignedaddr)
// and scratch2, destreg, mask
// move scratch1, destreg
- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
.addReg(AddrReg)
.addImm(0);
@@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
// .looptail:
// sc.w scratch1, scratch1, (addr)
// beqz scratch1, loop
- // dbar 0x700
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
.addReg(Scratch1Reg)
.addReg(AddrReg)
@@ -472,10 +454,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
.addReg(Scratch1Reg)
.addMBB(LoopHeadMBB);
- // .tail:
- // dbar 0x700
- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
-
NextMBBI = MBB.end();
MI.eraseFromParent();
@@ -483,7 +461,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
- computeAndAddLiveIns(LiveRegs, *TailMBB);
computeAndAddLiveIns(LiveRegs, *DoneMBB);
return true;
@@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
.addReg(CmpValReg)
.addMBB(TailMBB);
// .looptail:
- // dbar 0
// move scratch, newval
// sc.[w|d] scratch, scratch, (addr)
// beqz scratch, loophead
// b done
- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
.addReg(NewValReg)
.addReg(LoongArch::R0);
@@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
.addMBB(TailMBB);
// .looptail:
- // dbar 0
// andn scratch, dest, mask
// or scratch, scratch, newval
// sc.[w|d] scratch, scratch, (addr)
// beqz scratch, loophead
// b done
- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg)
.addReg(DestReg)
.addReg(MaskReg);
@@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
}
+ AtomicOrdering FailureOrdering =
+ static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
+ int hint;
+
+ switch (FailureOrdering) {
+ case AtomicOrdering::Acquire:
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ // acquire
+ hint = 0b10100;
+ break;
+ default:
+ hint = 0x700;
+ }
+
// .tail:
- // dbar 0x700
- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
+ // dbar 0x700 | acquire
+ BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint);
NextMBBI = MBB.end();
MI.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 826db54febd3..808d73958ff9 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -170,10 +170,11 @@ def : PatFprFpr<fminnum_ieee, FMIN_S, FPR32>;
def : PatFpr<fneg, FNEG_S, FPR32>;
def : PatFpr<fabs, FABS_S, FPR32>;
def : PatFpr<fsqrt, FSQRT_S, FPR32>;
-
def : Pat<(fdiv fpimm1, (fsqrt FPR32:$fj)), (FRSQRT_S FPR32:$fj)>;
-
def : Pat<(fcanonicalize FPR32:$fj), (FMAX_S $fj, $fj)>;
+def : Pat<(is_fpclass FPR32:$fj, (i32 timm:$mask)),
+ (SLTU R0, (ANDI (MOVFR2GR_S (FCLASS_S FPR32:$fj)),
+ (to_fclass_mask timm:$mask)))>;
/// Setcc
@@ -294,8 +295,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)),
def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)),
(FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
-// fnmsub.s: -fj * fk + fa
-def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
+// fnmsub.s: -(fj * fk - fa)
+def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))),
+ (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
+
+// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
+def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
(FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
} // Predicates = [HasBasicF]
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index 5118474725b6..6e0ac286e8f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -142,15 +142,22 @@ def : PatFprFpr<fminnum_ieee, FMIN_D, FPR64>;
def : PatFpr<fneg, FNEG_D, FPR64>;
def : PatFpr<fabs, FABS_D, FPR64>;
def : PatFpr<fsqrt, FSQRT_D, FPR64>;
-
def : Pat<(fdiv fpimm1, (fsqrt FPR64:$fj)), (FRSQRT_D FPR64:$fj)>;
-
def : Pat<(fcopysign FPR64:$fj, FPR32:$fk),
(FCOPYSIGN_D FPR64:$fj, (FCVT_D_S FPR32:$fk))>;
def : Pat<(fcopysign FPR32:$fj, FPR64:$fk),
(FCOPYSIGN_S FPR32:$fj, (FCVT_S_D FPR64:$fk))>;
-
def : Pat<(fcanonicalize FPR64:$fj), (FMAX_D $fj, $fj)>;
+let Predicates = [IsLA32] in {
+def : Pat<(is_fpclass FPR64:$fj, (i32 timm:$mask)),
+ (SLTU R0, (ANDI (MOVFR2GR_S_64 (FCLASS_D FPR64:$fj)),
+ (to_fclass_mask timm:$mask)))>;
+} // Predicates = [IsLA32]
+let Predicates = [IsLA64] in {
+def : Pat<(is_fpclass FPR64:$fj, (i32 timm:$mask)),
+ (SLTU R0, (ANDI (MOVFR2GR_D (FCLASS_D FPR64:$fj)),
+ (to_fclass_mask timm:$mask)))>;
+} // Predicates = [IsLA64]
/// Setcc
@@ -256,7 +263,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)),
(FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
// fnmsub.d: -(fj * fk - fa)
-def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
+def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))),
+ (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
+
+// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA)
+def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
(FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
} // Predicates = [HasBasicD]
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index 0d78e39b3828..dc2d61a6e474 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -291,18 +291,15 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
if (hasFP(MF)) {
// Realign stack.
if (RI->hasStackRealignment(MF)) {
- unsigned ShiftAmount = Log2(MFI.getMaxAlign());
- Register VR =
- MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+ unsigned Align = Log2(MFI.getMaxAlign());
+ assert(Align > 0 && "The stack realignment size is invalid!");
BuildMI(MBB, MBBI, DL,
- TII->get(IsLA64 ? LoongArch::SRLI_D : LoongArch::SRLI_W), VR)
+ TII->get(IsLA64 ? LoongArch::BSTRINS_D : LoongArch::BSTRINS_W),
+ SPReg)
.addReg(SPReg)
- .addImm(ShiftAmount)
- .setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL,
- TII->get(IsLA64 ? LoongArch::SLLI_D : LoongArch::SLLI_W), SPReg)
- .addReg(VR)
- .addImm(ShiftAmount)
+ .addReg(LoongArch::R0)
+ .addImm(Align - 1)
+ .addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
// FP will be used to restore the frame in the epilogue, so we need
// another base register BP to record SP after re-alignment. SP will
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index ae7167cb5ce7..726856bda5dc 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "MCTargetDesc/LoongArchMatInt.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -75,7 +76,64 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm));
return;
}
- // TODO: Add selection nodes needed later.
+ case ISD::BITCAST: {
+ if (VT.is128BitVector() || VT.is256BitVector()) {
+ ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ // Select appropriate [x]vrepli.[bhwd] instructions for constant splats of
+ // 128/256-bit when LSX/LASX is enabled.
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned Op;
+ EVT ViaVecTy;
+ bool Is128Vec = BVN->getValueType(0).is128BitVector();
+ bool Is256Vec = BVN->getValueType(0).is256BitVector();
+
+ if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
+ break;
+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, 8))
+ break;
+
+ switch (SplatBitSize) {
+ default:
+ break;
+ case 8:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
+ ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8;
+ break;
+ case 16:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
+ ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16;
+ break;
+ case 32:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
+ ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32;
+ break;
+ case 64:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
+ ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64;
+ break;
+ }
+
+ SDNode *Res;
+ // If we have a signed 10 bit integer, we can splat it directly.
+ if (SplatValue.isSignedIntN(10)) {
+ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
+ ViaVecTy.getVectorElementType());
+ Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm);
+ ReplaceNode(Node, Res);
+ return;
+ }
+ break;
+ }
}
// Select the default instruction.
@@ -83,7 +141,8 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
}
bool LoongArchDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SDValue Base = Op;
SDValue Offset =
CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getGRLenVT());
@@ -91,12 +150,12 @@ bool LoongArchDAGToDAGISel::SelectInlineAsmMemoryOperand(
default:
llvm_unreachable("unexpected asm memory constraint");
// Reg+Reg addressing.
- case InlineAsm::Constraint_k:
+ case InlineAsm::ConstraintCode::k:
Base = Op.getOperand(0);
Offset = Op.getOperand(1);
break;
// Reg+simm12 addressing.
- case InlineAsm::Constraint_m:
+ case InlineAsm::ConstraintCode::m:
if (CurDAG->isBaseWithConstantOffset(Op)) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (isIntN(12, CN->getSExtValue())) {
@@ -107,10 +166,10 @@ bool LoongArchDAGToDAGISel::SelectInlineAsmMemoryOperand(
}
break;
// Reg+0 addressing.
- case InlineAsm::Constraint_ZB:
+ case InlineAsm::ConstraintCode::ZB:
break;
// Reg+(simm14<<2) addressing.
- case InlineAsm::Constraint_ZC:
+ case InlineAsm::ConstraintCode::ZC:
if (CurDAG->isBaseWithConstantOffset(Op)) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (isIntN(16, CN->getSExtValue()) &&
@@ -262,6 +321,96 @@ bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
return false;
}
+bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm,
+ unsigned MinSizeInBits) const {
+ if (!Subtarget->hasExtLSX())
+ return false;
+
+ BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N);
+
+ if (!Node)
+ return false;
+
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+ MinSizeInBits, /*IsBigEndian=*/false))
+ return false;
+
+ Imm = SplatValue;
+
+ return true;
+}
+
+template <unsigned ImmBitSize, bool IsSigned>
+bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ if (IsSigned && ImmValue.isSignedIntN(ImmBitSize)) {
+ SplatVal = CurDAG->getTargetConstant(ImmValue.getSExtValue(), SDLoc(N),
+ Subtarget->getGRLenVT());
+ return true;
+ }
+ if (!IsSigned && ImmValue.isIntN(ImmBitSize)) {
+ SplatVal = CurDAG->getTargetConstant(ImmValue.getZExtValue(), SDLoc(N),
+ Subtarget->getGRLenVT());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
+ SDValue &SplatImm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ int32_t Log2 = (~ImmValue).exactLogBase2();
+
+ if (Log2 != -1) {
+ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N,
+ SDValue &SplatImm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ int32_t Log2 = ImmValue.exactLogBase2();
+
+ if (Log2 != -1) {
+ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
// This pass converts a legalized DAG into a LoongArch-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) {
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 3099407aea3e..48a178bfeb95 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -38,7 +38,8 @@ public:
void Select(SDNode *Node) override;
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
bool SelectBaseAddr(SDValue Addr, SDValue &Base);
@@ -56,6 +57,14 @@ public:
bool selectSExti32(SDValue N, SDValue &Val);
bool selectZExti32(SDValue N, SDValue &Val);
+ bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const;
+
+ template <unsigned ImmSize, bool IsSigned = false>
+ bool selectVSplatImm(SDValue N, SDValue &SplatVal);
+
+ bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const;
+ bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const;
+
// Include the pieces autogenerated from the target description.
#include "LoongArchGenDAGISel.inc"
};
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index db5961fc501a..4794a131edae 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -47,53 +47,79 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
: TargetLowering(TM), Subtarget(STI) {
MVT GRLenVT = Subtarget.getGRLenVT();
+
// Set up the register classes.
+
addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
if (Subtarget.hasBasicF())
addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
if (Subtarget.hasBasicD())
addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
+
+ static const MVT::SimpleValueType LSXVTs[] = {
+ MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
+ static const MVT::SimpleValueType LASXVTs[] = {
+ MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
+
if (Subtarget.hasExtLSX())
- for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32,
- MVT::v2i64})
+ for (MVT VT : LSXVTs)
addRegisterClass(VT, &LoongArch::LSX128RegClass);
+
if (Subtarget.hasExtLASX())
- for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32,
- MVT::v4i64})
+ for (MVT VT : LASXVTs)
addRegisterClass(VT, &LoongArch::LASX256RegClass);
+ // Set operations for LA32 and LA64.
+
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
MVT::i1, Promote);
- // TODO: add necessary setOperationAction calls later.
setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
setOperationAction(ISD::ROTL, GRLenVT, Expand);
setOperationAction(ISD::CTPOP, GRLenVT, Expand);
- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
- setOperationAction(ISD::TRAP, MVT::Other, Legal);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
- ISD::JumpTable},
+ ISD::JumpTable, ISD::GlobalTLSAddress},
GRLenVT, Custom);
- setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom);
-
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-
- setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
- if (Subtarget.is64Bit())
- setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
+ setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Expand bitreverse.i16 with native-width bitrev and shift for now, before
+ // we get to know which of sll and revb.2h is faster.
+ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
+ setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
+
+ // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
+ // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
+ // and i32 could still be byte-swapped relatively cheaply.
+ setOperationAction(ISD::BSWAP, MVT::i16, Custom);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, GRLenVT, Expand);
+ setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
+
+ setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
+
+ // Set operations for LA64 only.
+
if (Subtarget.is64Bit()) {
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
@@ -104,48 +130,46 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTLZ, MVT::i32, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
- if (Subtarget.hasBasicF() && !Subtarget.hasBasicD())
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- if (Subtarget.hasBasicF())
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- if (Subtarget.hasBasicD())
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- }
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
- // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
- // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
- // and i32 could still be byte-swapped relatively cheaply.
- setOperationAction(ISD::BSWAP, MVT::i16, Custom);
- if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
}
- // Expand bitreverse.i16 with native-width bitrev and shift for now, before
- // we get to know which of sll and revb.2h is faster.
- setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
- setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
- } else {
- setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
+ // Set operations for LA32 only.
+
+ if (!Subtarget.is64Bit()) {
setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
+
+ // Set libcalls.
+ setLibcallName(RTLIB::MUL_I128, nullptr);
+ // The MULO libcall is not part of libgcc, only compiler-rt.
+ setLibcallName(RTLIB::MULO_I64, nullptr);
}
+ // The MULO libcall is not part of libgcc, only compiler-rt.
+ setLibcallName(RTLIB::MULO_I128, nullptr);
+
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
static const ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
ISD::SETGE, ISD::SETNE, ISD::SETGT};
+ // Set operations for 'F' feature.
+
if (Subtarget.hasBasicF()) {
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
+
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f32, Legal);
@@ -153,53 +177,166 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
+ setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
+
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+
+ if (!Subtarget.hasBasicD()) {
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ }
+ }
}
+
+ // Set operations for 'D' feature.
+
if (Subtarget.hasBasicD()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
+
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f64, Legal);
setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ // Set operations for 'LSX' feature.
- setOperationAction(ISD::BR_CC, GRLenVT, Expand);
- setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
- if (!Subtarget.is64Bit())
- setLibcallName(RTLIB::MUL_I128, nullptr);
+ if (Subtarget.hasExtLSX()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
+ // Expand all truncating stores and extending loads.
+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
+ // By default everything must be expanded. Then we will selectively turn
+ // on ones that can be effectively codegen'd.
+ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
+ setOperationAction(Op, VT, Expand);
+ }
- setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
- if ((Subtarget.is64Bit() && Subtarget.hasBasicF() &&
- !Subtarget.hasBasicD())) {
- setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom);
- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom);
+ for (MVT VT : LSXVTs) {
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
+ setOperationAction(ISD::BITCAST, VT, Legal);
+ setOperationAction(ISD::UNDEF, VT, Legal);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+
+ setOperationAction(ISD::SETCC, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ }
+ for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
+ Legal);
+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
+ VT, Legal);
+ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
+ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
+ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
+ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
+ setCondCodeAction(
+ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
+ Expand);
+ }
+ for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
+ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
+ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
+ setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
+ setOperationAction(ISD::FNEG, VT, Legal);
+ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
+ ISD::SETUGE, ISD::SETUGT},
+ VT, Expand);
+ }
}
+ // Set operations for 'LASX' feature.
+
+ if (Subtarget.hasExtLASX()) {
+ for (MVT VT : LASXVTs) {
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
+ setOperationAction(ISD::BITCAST, VT, Legal);
+ setOperationAction(ISD::UNDEF, VT, Legal);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+
+ setOperationAction(ISD::SETCC, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ }
+ for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
+ Legal);
+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
+ VT, Legal);
+ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
+ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
+ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
+ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
+ setCondCodeAction(
+ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
+ Expand);
+ }
+ for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
+ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
+ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
+ setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
+ setOperationAction(ISD::FNEG, VT, Legal);
+ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
+ ISD::SETUGE, ISD::SETUGT},
+ VT, Expand);
+ }
+ }
+
+ // Set DAG combine for LA32 and LA64.
+
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::SRL);
+
+ // Set DAG combine for 'LSX' feature.
+
+ if (Subtarget.hasExtLSX())
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+
// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget.getRegisterInfo());
setStackPointerRegisterToSaveRestore(LoongArch::R3);
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
@@ -211,10 +348,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
-
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::SRL);
}
bool LoongArchTargetLowering::isOffsetFoldingLegal(
@@ -229,6 +362,8 @@ bool LoongArchTargetLowering::isOffsetFoldingLegal(
SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
+ case ISD::ATOMIC_FENCE:
+ return lowerATOMIC_FENCE(Op, DAG);
case ISD::EH_DWARF_CFA:
return lowerEH_DWARF_CFA(Op, DAG);
case ISD::GlobalAddress:
@@ -269,10 +404,139 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerRETURNADDR(Op, DAG);
case ISD::WRITE_REGISTER:
return lowerWRITE_REGISTER(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::BUILD_VECTOR:
+ return lowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return lowerVECTOR_SHUFFLE(Op, DAG);
+ }
+ return SDValue();
+}
+
+SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ // TODO: custom shuffle.
+ return SDValue();
+}
+
+static bool isConstantOrUndef(const SDValue Op) {
+ if (Op->isUndef())
+ return true;
+ if (isa<ConstantSDNode>(Op))
+ return true;
+ if (isa<ConstantFPSDNode>(Op))
+ return true;
+ return false;
+}
+
+static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
+ for (unsigned i = 0; i < Op->getNumOperands(); ++i)
+ if (isConstantOrUndef(Op->getOperand(i)))
+ return true;
+ return false;
+}
+
+SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
+ EVT ResTy = Op->getValueType(0);
+ SDLoc DL(Op);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool Is128Vec = ResTy.is128BitVector();
+ bool Is256Vec = ResTy.is256BitVector();
+
+ if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
+ (!Subtarget.hasExtLASX() || !Is256Vec))
+ return SDValue();
+
+ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+ /*MinSplatBits=*/8) &&
+ SplatBitSize <= 64) {
+ // We can only cope with 8, 16, 32, or 64-bit elements.
+ if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
+ SplatBitSize != 64)
+ return SDValue();
+
+ EVT ViaVecTy;
+
+ switch (SplatBitSize) {
+ default:
+ return SDValue();
+ case 8:
+ ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
+ break;
+ case 16:
+ ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
+ break;
+ case 32:
+ ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
+ break;
+ case 64:
+ ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
+ break;
+ }
+
+ // SelectionDAG::getConstant will promote SplatValue appropriately.
+ SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
+
+ // Bitcast to the type we originally wanted.
+ if (ViaVecTy != ResTy)
+ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
+
+ return Result;
+ }
+
+ if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
+ return Op;
+
+ if (!isConstantOrUndefBUILD_VECTOR(Node)) {
+ // Use INSERT_VECTOR_ELT operations rather than expand to stores.
+ // The resulting code is the same length as the expansion, but it doesn't
+ // use memory operations.
+ EVT ResTy = Node->getValueType(0);
+
+ assert(ResTy.isVector());
+
+ unsigned NumElts = ResTy.getVectorNumElements();
+ SDValue Vector = DAG.getUNDEF(ResTy);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
+ Node->getOperand(i),
+ DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+ }
+ return Vector;
}
+
+ return SDValue();
+}
+
+SDValue
+LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (isa<ConstantSDNode>(Op->getOperand(2)))
+ return Op;
return SDValue();
}
+SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SyncScope::ID FenceSSID =
+ static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
+
+ // singlethread fences only synchronize with signal handlers on the same
+ // thread and thus only need to preserve instruction order, not actually
+ // enforce memory ordering.
+ if (FenceSSID == SyncScope::SingleThread)
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+
+ return Op;
+}
+
SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
SelectionDAG &DAG) const {
@@ -652,9 +916,24 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
return Addr;
}
+template <unsigned N>
+static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
+ SelectionDAG &DAG, bool IsSigned = false) {
+ auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
+ // Check the ImmArg.
+ if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
+ (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
+ DAG.getContext()->emitError(Op->getOperationName(0) +
+ ": argument out of range.");
+ return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
+ }
+ return SDValue();
+}
+
SDValue
LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
+ SDLoc DL(Op);
switch (Op.getConstantOperandVal(0)) {
default:
return SDValue(); // Don't custom lower most intrinsics.
@@ -662,6 +941,271 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getRegister(LoongArch::R2, PtrVT);
}
+ case Intrinsic::loongarch_lsx_vpickve2gr_d:
+ case Intrinsic::loongarch_lsx_vpickve2gr_du:
+ case Intrinsic::loongarch_lsx_vreplvei_d:
+ case Intrinsic::loongarch_lasx_xvrepl128vei_d:
+ return checkIntrinsicImmArg<1>(Op, 2, DAG);
+ case Intrinsic::loongarch_lsx_vreplvei_w:
+ case Intrinsic::loongarch_lasx_xvrepl128vei_w:
+ case Intrinsic::loongarch_lasx_xvpickve2gr_d:
+ case Intrinsic::loongarch_lasx_xvpickve2gr_du:
+ case Intrinsic::loongarch_lasx_xvpickve_d:
+ case Intrinsic::loongarch_lasx_xvpickve_d_f:
+ return checkIntrinsicImmArg<2>(Op, 2, DAG);
+ case Intrinsic::loongarch_lasx_xvinsve0_d:
+ return checkIntrinsicImmArg<2>(Op, 3, DAG);
+ case Intrinsic::loongarch_lsx_vsat_b:
+ case Intrinsic::loongarch_lsx_vsat_bu:
+ case Intrinsic::loongarch_lsx_vrotri_b:
+ case Intrinsic::loongarch_lsx_vsllwil_h_b:
+ case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
+ case Intrinsic::loongarch_lsx_vsrlri_b:
+ case Intrinsic::loongarch_lsx_vsrari_b:
+ case Intrinsic::loongarch_lsx_vreplvei_h:
+ case Intrinsic::loongarch_lasx_xvsat_b:
+ case Intrinsic::loongarch_lasx_xvsat_bu:
+ case Intrinsic::loongarch_lasx_xvrotri_b:
+ case Intrinsic::loongarch_lasx_xvsllwil_h_b:
+ case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
+ case Intrinsic::loongarch_lasx_xvsrlri_b:
+ case Intrinsic::loongarch_lasx_xvsrari_b:
+ case Intrinsic::loongarch_lasx_xvrepl128vei_h:
+ case Intrinsic::loongarch_lasx_xvpickve_w:
+ case Intrinsic::loongarch_lasx_xvpickve_w_f:
+ return checkIntrinsicImmArg<3>(Op, 2, DAG);
+ case Intrinsic::loongarch_lasx_xvinsve0_w:
+ return checkIntrinsicImmArg<3>(Op, 3, DAG);
+ case Intrinsic::loongarch_lsx_vsat_h:
+ case Intrinsic::loongarch_lsx_vsat_hu:
+ case Intrinsic::loongarch_lsx_vrotri_h:
+ case Intrinsic::loongarch_lsx_vsllwil_w_h:
+ case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
+ case Intrinsic::loongarch_lsx_vsrlri_h:
+ case Intrinsic::loongarch_lsx_vsrari_h:
+ case Intrinsic::loongarch_lsx_vreplvei_b:
+ case Intrinsic::loongarch_lasx_xvsat_h:
+ case Intrinsic::loongarch_lasx_xvsat_hu:
+ case Intrinsic::loongarch_lasx_xvrotri_h:
+ case Intrinsic::loongarch_lasx_xvsllwil_w_h:
+ case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
+ case Intrinsic::loongarch_lasx_xvsrlri_h:
+ case Intrinsic::loongarch_lasx_xvsrari_h:
+ case Intrinsic::loongarch_lasx_xvrepl128vei_b:
+ return checkIntrinsicImmArg<4>(Op, 2, DAG);
+ case Intrinsic::loongarch_lsx_vsrlni_b_h:
+ case Intrinsic::loongarch_lsx_vsrani_b_h:
+ case Intrinsic::loongarch_lsx_vsrlrni_b_h:
+ case Intrinsic::loongarch_lsx_vsrarni_b_h:
+ case Intrinsic::loongarch_lsx_vssrlni_b_h:
+ case Intrinsic::loongarch_lsx_vssrani_b_h:
+ case Intrinsic::loongarch_lsx_vssrlni_bu_h:
+ case Intrinsic::loongarch_lsx_vssrani_bu_h:
+ case Intrinsic::loongarch_lsx_vssrlrni_b_h:
+ case Intrinsic::loongarch_lsx_vssrarni_b_h:
+ case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
+ case Intrinsic::loongarch_lsx_vssrarni_bu_h:
+ case Intrinsic::loongarch_lasx_xvsrlni_b_h:
+ case Intrinsic::loongarch_lasx_xvsrani_b_h:
+ case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
+ case Intrinsic::loongarch_lasx_xvsrarni_b_h:
+ case Intrinsic::loongarch_lasx_xvssrlni_b_h:
+ case Intrinsic::loongarch_lasx_xvssrani_b_h:
+ case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
+ case Intrinsic::loongarch_lasx_xvssrani_bu_h:
+ case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
+ case Intrinsic::loongarch_lasx_xvssrarni_b_h:
+ case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
+ case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
+ return checkIntrinsicImmArg<4>(Op, 3, DAG);
+ case Intrinsic::loongarch_lsx_vsat_w:
+ case Intrinsic::loongarch_lsx_vsat_wu:
+ case Intrinsic::loongarch_lsx_vrotri_w:
+ case Intrinsic::loongarch_lsx_vsllwil_d_w:
+ case Intrinsic::loongarch_lsx_vsllwil_du_wu:
+ case Intrinsic::loongarch_lsx_vsrlri_w:
+ case Intrinsic::loongarch_lsx_vsrari_w:
+ case Intrinsic::loongarch_lsx_vslei_bu:
+ case Intrinsic::loongarch_lsx_vslei_hu:
+ case Intrinsic::loongarch_lsx_vslei_wu:
+ case Intrinsic::loongarch_lsx_vslei_du:
+ case Intrinsic::loongarch_lsx_vslti_bu:
+ case Intrinsic::loongarch_lsx_vslti_hu:
+ case Intrinsic::loongarch_lsx_vslti_wu:
+ case Intrinsic::loongarch_lsx_vslti_du:
+ case Intrinsic::loongarch_lsx_vbsll_v:
+ case Intrinsic::loongarch_lsx_vbsrl_v:
+ case Intrinsic::loongarch_lasx_xvsat_w:
+ case Intrinsic::loongarch_lasx_xvsat_wu:
+ case Intrinsic::loongarch_lasx_xvrotri_w:
+ case Intrinsic::loongarch_lasx_xvsllwil_d_w:
+ case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
+ case Intrinsic::loongarch_lasx_xvsrlri_w:
+ case Intrinsic::loongarch_lasx_xvsrari_w:
+ case Intrinsic::loongarch_lasx_xvslei_bu:
+ case Intrinsic::loongarch_lasx_xvslei_hu:
+ case Intrinsic::loongarch_lasx_xvslei_wu:
+ case Intrinsic::loongarch_lasx_xvslei_du:
+ case Intrinsic::loongarch_lasx_xvslti_bu:
+ case Intrinsic::loongarch_lasx_xvslti_hu:
+ case Intrinsic::loongarch_lasx_xvslti_wu:
+ case Intrinsic::loongarch_lasx_xvslti_du:
+ case Intrinsic::loongarch_lasx_xvbsll_v:
+ case Intrinsic::loongarch_lasx_xvbsrl_v:
+ return checkIntrinsicImmArg<5>(Op, 2, DAG);
+ case Intrinsic::loongarch_lsx_vseqi_b:
+ case Intrinsic::loongarch_lsx_vseqi_h:
+ case Intrinsic::loongarch_lsx_vseqi_w:
+ case Intrinsic::loongarch_lsx_vseqi_d:
+ case Intrinsic::loongarch_lsx_vslei_b:
+ case Intrinsic::loongarch_lsx_vslei_h:
+ case Intrinsic::loongarch_lsx_vslei_w:
+ case Intrinsic::loongarch_lsx_vslei_d:
+ case Intrinsic::loongarch_lsx_vslti_b:
+ case Intrinsic::loongarch_lsx_vslti_h:
+ case Intrinsic::loongarch_lsx_vslti_w:
+ case Intrinsic::loongarch_lsx_vslti_d:
+ case Intrinsic::loongarch_lasx_xvseqi_b:
+ case Intrinsic::loongarch_lasx_xvseqi_h:
+ case Intrinsic::loongarch_lasx_xvseqi_w:
+ case Intrinsic::loongarch_lasx_xvseqi_d:
+ case Intrinsic::loongarch_lasx_xvslei_b:
+ case Intrinsic::loongarch_lasx_xvslei_h:
+ case Intrinsic::loongarch_lasx_xvslei_w:
+ case Intrinsic::loongarch_lasx_xvslei_d:
+ case Intrinsic::loongarch_lasx_xvslti_b:
+ case Intrinsic::loongarch_lasx_xvslti_h:
+ case Intrinsic::loongarch_lasx_xvslti_w:
+ case Intrinsic::loongarch_lasx_xvslti_d:
+ return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
+ case Intrinsic::loongarch_lsx_vsrlni_h_w:
+ case Intrinsic::loongarch_lsx_vsrani_h_w:
+ case Intrinsic::loongarch_lsx_vsrlrni_h_w:
+ case Intrinsic::loongarch_lsx_vsrarni_h_w:
+ case Intrinsic::loongarch_lsx_vssrlni_h_w:
+ case Intrinsic::loongarch_lsx_vssrani_h_w:
+ case Intrinsic::loongarch_lsx_vssrlni_hu_w:
+ case Intrinsic::loongarch_lsx_vssrani_hu_w:
+ case Intrinsic::loongarch_lsx_vssrlrni_h_w:
+ case Intrinsic::loongarch_lsx_vssrarni_h_w:
+ case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
+ case Intrinsic::loongarch_lsx_vssrarni_hu_w:
+ case Intrinsic::loongarch_lsx_vfrstpi_b:
+ case Intrinsic::loongarch_lsx_vfrstpi_h:
+ case Intrinsic::loongarch_lasx_xvsrlni_h_w:
+ case Intrinsic::loongarch_lasx_xvsrani_h_w:
+ case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
+ case Intrinsic::loongarch_lasx_xvsrarni_h_w:
+ case Intrinsic::loongarch_lasx_xvssrlni_h_w:
+ case Intrinsic::loongarch_lasx_xvssrani_h_w:
+ case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
+ case Intrinsic::loongarch_lasx_xvssrani_hu_w:
+ case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
+ case Intrinsic::loongarch_lasx_xvssrarni_h_w:
+ case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
+ case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
+ case Intrinsic::loongarch_lasx_xvfrstpi_b:
+ case Intrinsic::loongarch_lasx_xvfrstpi_h:
+ return checkIntrinsicImmArg<5>(Op, 3, DAG);
+ case Intrinsic::loongarch_lsx_vsat_d:
+ case Intrinsic::loongarch_lsx_vsat_du:
+ case Intrinsic::loongarch_lsx_vrotri_d:
+ case Intrinsic::loongarch_lsx_vsrlri_d:
+ case Intrinsic::loongarch_lsx_vsrari_d:
+ case Intrinsic::loongarch_lasx_xvsat_d:
+ case Intrinsic::loongarch_lasx_xvsat_du:
+ case Intrinsic::loongarch_lasx_xvrotri_d:
+ case Intrinsic::loongarch_lasx_xvsrlri_d:
+ case Intrinsic::loongarch_lasx_xvsrari_d:
+ return checkIntrinsicImmArg<6>(Op, 2, DAG);
+ case Intrinsic::loongarch_lsx_vsrlni_w_d:
+ case Intrinsic::loongarch_lsx_vsrani_w_d:
+ case Intrinsic::loongarch_lsx_vsrlrni_w_d:
+ case Intrinsic::loongarch_lsx_vsrarni_w_d:
+ case Intrinsic::loongarch_lsx_vssrlni_w_d:
+ case Intrinsic::loongarch_lsx_vssrani_w_d:
+ case Intrinsic::loongarch_lsx_vssrlni_wu_d:
+ case Intrinsic::loongarch_lsx_vssrani_wu_d:
+ case Intrinsic::loongarch_lsx_vssrlrni_w_d:
+ case Intrinsic::loongarch_lsx_vssrarni_w_d:
+ case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
+ case Intrinsic::loongarch_lsx_vssrarni_wu_d:
+ case Intrinsic::loongarch_lasx_xvsrlni_w_d:
+ case Intrinsic::loongarch_lasx_xvsrani_w_d:
+ case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
+ case Intrinsic::loongarch_lasx_xvsrarni_w_d:
+ case Intrinsic::loongarch_lasx_xvssrlni_w_d:
+ case Intrinsic::loongarch_lasx_xvssrani_w_d:
+ case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
+ case Intrinsic::loongarch_lasx_xvssrani_wu_d:
+ case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
+ case Intrinsic::loongarch_lasx_xvssrarni_w_d:
+ case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
+ case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
+ return checkIntrinsicImmArg<6>(Op, 3, DAG);
+ case Intrinsic::loongarch_lsx_vsrlni_d_q:
+ case Intrinsic::loongarch_lsx_vsrani_d_q:
+ case Intrinsic::loongarch_lsx_vsrlrni_d_q:
+ case Intrinsic::loongarch_lsx_vsrarni_d_q:
+ case Intrinsic::loongarch_lsx_vssrlni_d_q:
+ case Intrinsic::loongarch_lsx_vssrani_d_q:
+ case Intrinsic::loongarch_lsx_vssrlni_du_q:
+ case Intrinsic::loongarch_lsx_vssrani_du_q:
+ case Intrinsic::loongarch_lsx_vssrlrni_d_q:
+ case Intrinsic::loongarch_lsx_vssrarni_d_q:
+ case Intrinsic::loongarch_lsx_vssrlrni_du_q:
+ case Intrinsic::loongarch_lsx_vssrarni_du_q:
+ case Intrinsic::loongarch_lasx_xvsrlni_d_q:
+ case Intrinsic::loongarch_lasx_xvsrani_d_q:
+ case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
+ case Intrinsic::loongarch_lasx_xvsrarni_d_q:
+ case Intrinsic::loongarch_lasx_xvssrlni_d_q:
+ case Intrinsic::loongarch_lasx_xvssrani_d_q:
+ case Intrinsic::loongarch_lasx_xvssrlni_du_q:
+ case Intrinsic::loongarch_lasx_xvssrani_du_q:
+ case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
+ case Intrinsic::loongarch_lasx_xvssrarni_d_q:
+ case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
+ case Intrinsic::loongarch_lasx_xvssrarni_du_q:
+ return checkIntrinsicImmArg<7>(Op, 3, DAG);
+ case Intrinsic::loongarch_lsx_vnori_b:
+ case Intrinsic::loongarch_lsx_vshuf4i_b:
+ case Intrinsic::loongarch_lsx_vshuf4i_h:
+ case Intrinsic::loongarch_lsx_vshuf4i_w:
+ case Intrinsic::loongarch_lasx_xvnori_b:
+ case Intrinsic::loongarch_lasx_xvshuf4i_b:
+ case Intrinsic::loongarch_lasx_xvshuf4i_h:
+ case Intrinsic::loongarch_lasx_xvshuf4i_w:
+ case Intrinsic::loongarch_lasx_xvpermi_d:
+ return checkIntrinsicImmArg<8>(Op, 2, DAG);
+ case Intrinsic::loongarch_lsx_vshuf4i_d:
+ case Intrinsic::loongarch_lsx_vpermi_w:
+ case Intrinsic::loongarch_lsx_vbitseli_b:
+ case Intrinsic::loongarch_lsx_vextrins_b:
+ case Intrinsic::loongarch_lsx_vextrins_h:
+ case Intrinsic::loongarch_lsx_vextrins_w:
+ case Intrinsic::loongarch_lsx_vextrins_d:
+ case Intrinsic::loongarch_lasx_xvshuf4i_d:
+ case Intrinsic::loongarch_lasx_xvpermi_w:
+ case Intrinsic::loongarch_lasx_xvpermi_q:
+ case Intrinsic::loongarch_lasx_xvbitseli_b:
+ case Intrinsic::loongarch_lasx_xvextrins_b:
+ case Intrinsic::loongarch_lasx_xvextrins_h:
+ case Intrinsic::loongarch_lasx_xvextrins_w:
+ case Intrinsic::loongarch_lasx_xvextrins_d:
+ return checkIntrinsicImmArg<8>(Op, 3, DAG);
+ case Intrinsic::loongarch_lsx_vrepli_b:
+ case Intrinsic::loongarch_lsx_vrepli_h:
+ case Intrinsic::loongarch_lsx_vrepli_w:
+ case Intrinsic::loongarch_lsx_vrepli_d:
+ case Intrinsic::loongarch_lasx_xvrepli_b:
+ case Intrinsic::loongarch_lasx_xvrepli_h:
+ case Intrinsic::loongarch_lasx_xvrepli_w:
+ case Intrinsic::loongarch_lasx_xvrepli_d:
+ return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
+ case Intrinsic::loongarch_lsx_vldi:
+ case Intrinsic::loongarch_lasx_xvldi:
+ return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
}
}
@@ -757,6 +1301,34 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
: DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
{Chain, DAG.getConstant(Imm, DL, GRLenVT)});
}
+ case Intrinsic::loongarch_lsx_vld:
+ case Intrinsic::loongarch_lsx_vldrepl_b:
+ case Intrinsic::loongarch_lasx_xvld:
+ case Intrinsic::loongarch_lasx_xvldrepl_b:
+ return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lsx_vldrepl_h:
+ case Intrinsic::loongarch_lasx_xvldrepl_h:
+ return !isShiftedInt<11, 1>(
+ cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
+ ? emitIntrinsicWithChainErrorMessage(
+ Op, "argument out of range or not a multiple of 2", DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lsx_vldrepl_w:
+ case Intrinsic::loongarch_lasx_xvldrepl_w:
+ return !isShiftedInt<10, 2>(
+ cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
+ ? emitIntrinsicWithChainErrorMessage(
+ Op, "argument out of range or not a multiple of 4", DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lsx_vldrepl_d:
+ case Intrinsic::loongarch_lasx_xvldrepl_d:
+ return !isShiftedInt<9, 3>(
+ cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
+ ? emitIntrinsicWithChainErrorMessage(
+ Op, "argument out of range or not a multiple of 8", DAG)
+ : SDValue();
}
}
@@ -875,6 +1447,63 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
: !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: Op;
}
+ case Intrinsic::loongarch_lsx_vst:
+ case Intrinsic::loongarch_lasx_xvst:
+ return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lasx_xvstelm_b:
+ return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
+ !isUInt<5>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lsx_vstelm_b:
+ return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
+ !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lasx_xvstelm_h:
+ return (!isShiftedInt<8, 1>(
+ cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
+ !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ ? emitIntrinsicErrorMessage(
+ Op, "argument out of range or not a multiple of 2", DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lsx_vstelm_h:
+ return (!isShiftedInt<8, 1>(
+ cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
+ !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ ? emitIntrinsicErrorMessage(
+ Op, "argument out of range or not a multiple of 2", DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lasx_xvstelm_w:
+ return (!isShiftedInt<8, 2>(
+ cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
+ !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ ? emitIntrinsicErrorMessage(
+ Op, "argument out of range or not a multiple of 4", DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lsx_vstelm_w:
+ return (!isShiftedInt<8, 2>(
+ cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
+ !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ ? emitIntrinsicErrorMessage(
+ Op, "argument out of range or not a multiple of 4", DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lasx_xvstelm_d:
+ return (!isShiftedInt<8, 3>(
+ cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
+ !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ ? emitIntrinsicErrorMessage(
+ Op, "argument out of range or not a multiple of 8", DAG)
+ : SDValue();
+ case Intrinsic::loongarch_lsx_vstelm_d:
+ return (!isShiftedInt<8, 3>(
+ cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
+ !isUInt<1>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ ? emitIntrinsicErrorMessage(
+ Op, "argument out of range or not a multiple of 8", DAG)
+ : SDValue();
}
}
@@ -1026,16 +1655,122 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
}
-// Helper function that emits error message for intrinsics with chain and return
-// a UNDEF and the chain as the results.
-static void emitErrorAndReplaceIntrinsicWithChainResults(
+// Helper function that emits error message for intrinsics with/without chain
+// and return a UNDEF or and the chain as the results.
+static void emitErrorAndReplaceIntrinsicResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
- StringRef ErrorMsg) {
+ StringRef ErrorMsg, bool WithChain = true) {
DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
Results.push_back(DAG.getUNDEF(N->getValueType(0)));
+ if (!WithChain)
+ return;
Results.push_back(N->getOperand(0));
}
+template <unsigned N>
+static void
+replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
+ unsigned ResOp) {
+ const StringRef ErrorMsgOOR = "argument out of range";
+ unsigned Imm = cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue();
+ if (!isUInt<N>(Imm)) {
+ emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
+ /*WithChain=*/false);
+ return;
+ }
+ SDLoc DL(Node);
+ SDValue Vec = Node->getOperand(1);
+
+ SDValue PickElt =
+ DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
+ DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
+ DAG.getValueType(Vec.getValueType().getVectorElementType()));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
+ PickElt.getValue(0)));
+}
+
+static void replaceVecCondBranchResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget,
+ unsigned ResOp) {
+ SDLoc DL(N);
+ SDValue Vec = N->getOperand(1);
+
+ SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
+ Results.push_back(
+ DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
+}
+
+static void
+replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ switch (N->getConstantOperandVal(0)) {
+ default:
+ llvm_unreachable("Unexpected Intrinsic.");
+ case Intrinsic::loongarch_lsx_vpickve2gr_b:
+ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
+ LoongArchISD::VPICK_SEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_h:
+ case Intrinsic::loongarch_lasx_xvpickve2gr_w:
+ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
+ LoongArchISD::VPICK_SEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_w:
+ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
+ LoongArchISD::VPICK_SEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_bu:
+ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
+ LoongArchISD::VPICK_ZEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_hu:
+ case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
+ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
+ LoongArchISD::VPICK_ZEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_wu:
+ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
+ LoongArchISD::VPICK_ZEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_bz_b:
+ case Intrinsic::loongarch_lsx_bz_h:
+ case Intrinsic::loongarch_lsx_bz_w:
+ case Intrinsic::loongarch_lsx_bz_d:
+ case Intrinsic::loongarch_lasx_xbz_b:
+ case Intrinsic::loongarch_lasx_xbz_h:
+ case Intrinsic::loongarch_lasx_xbz_w:
+ case Intrinsic::loongarch_lasx_xbz_d:
+ replaceVecCondBranchResults(N, Results, DAG, Subtarget,
+ LoongArchISD::VALL_ZERO);
+ break;
+ case Intrinsic::loongarch_lsx_bz_v:
+ case Intrinsic::loongarch_lasx_xbz_v:
+ replaceVecCondBranchResults(N, Results, DAG, Subtarget,
+ LoongArchISD::VANY_ZERO);
+ break;
+ case Intrinsic::loongarch_lsx_bnz_b:
+ case Intrinsic::loongarch_lsx_bnz_h:
+ case Intrinsic::loongarch_lsx_bnz_w:
+ case Intrinsic::loongarch_lsx_bnz_d:
+ case Intrinsic::loongarch_lasx_xbnz_b:
+ case Intrinsic::loongarch_lasx_xbnz_h:
+ case Intrinsic::loongarch_lasx_xbnz_w:
+ case Intrinsic::loongarch_lasx_xbnz_d:
+ replaceVecCondBranchResults(N, Results, DAG, Subtarget,
+ LoongArchISD::VALL_NONZERO);
+ break;
+ case Intrinsic::loongarch_lsx_bnz_v:
+ case Intrinsic::loongarch_lasx_xbnz_v:
+ replaceVecCondBranchResults(N, Results, DAG, Subtarget,
+ LoongArchISD::VANY_NONZERO);
+ break;
+ }
+}
+
void LoongArchTargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDLoc DL(N);
@@ -1168,14 +1903,12 @@ void LoongArchTargetLowering::ReplaceNodeResults(
llvm_unreachable("Unexpected Intrinsic.");
case Intrinsic::loongarch_movfcsr2gr: {
if (!Subtarget.hasBasicF()) {
- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
- ErrorMsgReqF);
+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
return;
}
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
if (!isUInt<2>(Imm)) {
- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
- ErrorMsgOOR);
+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
}
SDValue MOVFCSR2GRResults = DAG.getNode(
@@ -1211,7 +1944,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
{Chain, Op2, \
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
- Results.push_back(NODE.getValue(1)); \
+ Results.push_back(NODE.getValue(1)); \
break; \
}
CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
@@ -1220,8 +1953,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
#define CSR_CASE(ID) \
case Intrinsic::loongarch_##ID: { \
if (!Subtarget.is64Bit()) \
- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \
- ErrorMsgReqLA64); \
+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
break; \
}
CSR_CASE(csrrd_d);
@@ -1232,8 +1964,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
case Intrinsic::loongarch_csrrd_w: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
if (!isUInt<14>(Imm)) {
- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
- ErrorMsgOOR);
+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
}
SDValue CSRRDResults =
@@ -1247,8 +1978,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
case Intrinsic::loongarch_csrwr_w: {
unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
if (!isUInt<14>(Imm)) {
- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
- ErrorMsgOOR);
+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
}
SDValue CSRWRResults =
@@ -1263,8 +1993,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
case Intrinsic::loongarch_csrxchg_w: {
unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
if (!isUInt<14>(Imm)) {
- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
- ErrorMsgOOR);
+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
}
SDValue CSRXCHGResults = DAG.getNode(
@@ -1302,8 +2031,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
}
case Intrinsic::loongarch_lddir_d: {
if (!Subtarget.is64Bit()) {
- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
- ErrorMsgReqLA64);
+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
return;
}
break;
@@ -1322,6 +2050,10 @@ void LoongArchTargetLowering::ReplaceNodeResults(
Results.push_back(N->getOperand(0));
break;
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
+ break;
+ }
}
}
@@ -1685,6 +2417,608 @@ static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
Src.getOperand(0));
}
+template <unsigned N>
+static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
+ SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget,
+ bool IsSigned = false) {
+ SDLoc DL(Node);
+ auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
+ // Check the ImmArg.
+ if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
+ (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
+ DAG.getContext()->emitError(Node->getOperationName(0) +
+ ": argument out of range.");
+ return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
+ }
+ return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
+}
+
+template <unsigned N>
+static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
+ SelectionDAG &DAG, bool IsSigned = false) {
+ SDLoc DL(Node);
+ EVT ResTy = Node->getValueType(0);
+ auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
+
+ // Check the ImmArg.
+ if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
+ (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
+ DAG.getContext()->emitError(Node->getOperationName(0) +
+ ": argument out of range.");
+ return DAG.getNode(ISD::UNDEF, DL, ResTy);
+ }
+ return DAG.getConstant(
+ APInt(ResTy.getScalarType().getSizeInBits(),
+ IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
+ DL, ResTy);
+}
+
+static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
+ SDLoc DL(Node);
+ EVT ResTy = Node->getValueType(0);
+ SDValue Vec = Node->getOperand(2);
+ SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
+ return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
+}
+
+static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
+ SDLoc DL(Node);
+ EVT ResTy = Node->getValueType(0);
+ SDValue One = DAG.getConstant(1, DL, ResTy);
+ SDValue Bit =
+ DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
+
+ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
+ DAG.getNOT(DL, Bit, ResTy));
+}
+
+template <unsigned N>
+static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
+ SDLoc DL(Node);
+ EVT ResTy = Node->getValueType(0);
+ auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
+ // Check the unsigned ImmArg.
+ if (!isUInt<N>(CImm->getZExtValue())) {
+ DAG.getContext()->emitError(Node->getOperationName(0) +
+ ": argument out of range.");
+ return DAG.getNode(ISD::UNDEF, DL, ResTy);
+ }
+
+ APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
+ SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
+
+ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
+}
+
+template <unsigned N>
+static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
+ SDLoc DL(Node);
+ EVT ResTy = Node->getValueType(0);
+ auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
+ // Check the unsigned ImmArg.
+ if (!isUInt<N>(CImm->getZExtValue())) {
+ DAG.getContext()->emitError(Node->getOperationName(0) +
+ ": argument out of range.");
+ return DAG.getNode(ISD::UNDEF, DL, ResTy);
+ }
+
+ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
+ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
+ return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
+}
+
+template <unsigned N>
+static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
+ SDLoc DL(Node);
+ EVT ResTy = Node->getValueType(0);
+ auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
+ // Check the unsigned ImmArg.
+ if (!isUInt<N>(CImm->getZExtValue())) {
+ DAG.getContext()->emitError(Node->getOperationName(0) +
+ ": argument out of range.");
+ return DAG.getNode(ISD::UNDEF, DL, ResTy);
+ }
+
+ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
+ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
+ return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
+}
+
+static SDValue
+performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ SDLoc DL(N);
+ switch (N->getConstantOperandVal(0)) {
+ default:
+ break;
+ case Intrinsic::loongarch_lsx_vadd_b:
+ case Intrinsic::loongarch_lsx_vadd_h:
+ case Intrinsic::loongarch_lsx_vadd_w:
+ case Intrinsic::loongarch_lsx_vadd_d:
+ case Intrinsic::loongarch_lasx_xvadd_b:
+ case Intrinsic::loongarch_lasx_xvadd_h:
+ case Intrinsic::loongarch_lasx_xvadd_w:
+ case Intrinsic::loongarch_lasx_xvadd_d:
+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vaddi_bu:
+ case Intrinsic::loongarch_lsx_vaddi_hu:
+ case Intrinsic::loongarch_lsx_vaddi_wu:
+ case Intrinsic::loongarch_lsx_vaddi_du:
+ case Intrinsic::loongarch_lasx_xvaddi_bu:
+ case Intrinsic::loongarch_lasx_xvaddi_hu:
+ case Intrinsic::loongarch_lasx_xvaddi_wu:
+ case Intrinsic::loongarch_lasx_xvaddi_du:
+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<5>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsub_b:
+ case Intrinsic::loongarch_lsx_vsub_h:
+ case Intrinsic::loongarch_lsx_vsub_w:
+ case Intrinsic::loongarch_lsx_vsub_d:
+ case Intrinsic::loongarch_lasx_xvsub_b:
+ case Intrinsic::loongarch_lasx_xvsub_h:
+ case Intrinsic::loongarch_lasx_xvsub_w:
+ case Intrinsic::loongarch_lasx_xvsub_d:
+ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vsubi_bu:
+ case Intrinsic::loongarch_lsx_vsubi_hu:
+ case Intrinsic::loongarch_lsx_vsubi_wu:
+ case Intrinsic::loongarch_lsx_vsubi_du:
+ case Intrinsic::loongarch_lasx_xvsubi_bu:
+ case Intrinsic::loongarch_lasx_xvsubi_hu:
+ case Intrinsic::loongarch_lasx_xvsubi_wu:
+ case Intrinsic::loongarch_lasx_xvsubi_du:
+ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<5>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vneg_b:
+ case Intrinsic::loongarch_lsx_vneg_h:
+ case Intrinsic::loongarch_lsx_vneg_w:
+ case Intrinsic::loongarch_lsx_vneg_d:
+ case Intrinsic::loongarch_lasx_xvneg_b:
+ case Intrinsic::loongarch_lasx_xvneg_h:
+ case Intrinsic::loongarch_lasx_xvneg_w:
+ case Intrinsic::loongarch_lasx_xvneg_d:
+ return DAG.getNode(
+ ISD::SUB, DL, N->getValueType(0),
+ DAG.getConstant(
+ APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
+ /*isSigned=*/true),
+ SDLoc(N), N->getValueType(0)),
+ N->getOperand(1));
+ case Intrinsic::loongarch_lsx_vmax_b:
+ case Intrinsic::loongarch_lsx_vmax_h:
+ case Intrinsic::loongarch_lsx_vmax_w:
+ case Intrinsic::loongarch_lsx_vmax_d:
+ case Intrinsic::loongarch_lasx_xvmax_b:
+ case Intrinsic::loongarch_lasx_xvmax_h:
+ case Intrinsic::loongarch_lasx_xvmax_w:
+ case Intrinsic::loongarch_lasx_xvmax_d:
+ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vmax_bu:
+ case Intrinsic::loongarch_lsx_vmax_hu:
+ case Intrinsic::loongarch_lsx_vmax_wu:
+ case Intrinsic::loongarch_lsx_vmax_du:
+ case Intrinsic::loongarch_lasx_xvmax_bu:
+ case Intrinsic::loongarch_lasx_xvmax_hu:
+ case Intrinsic::loongarch_lasx_xvmax_wu:
+ case Intrinsic::loongarch_lasx_xvmax_du:
+ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vmaxi_b:
+ case Intrinsic::loongarch_lsx_vmaxi_h:
+ case Intrinsic::loongarch_lsx_vmaxi_w:
+ case Intrinsic::loongarch_lsx_vmaxi_d:
+ case Intrinsic::loongarch_lasx_xvmaxi_b:
+ case Intrinsic::loongarch_lasx_xvmaxi_h:
+ case Intrinsic::loongarch_lasx_xvmaxi_w:
+ case Intrinsic::loongarch_lasx_xvmaxi_d:
+ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
+ case Intrinsic::loongarch_lsx_vmaxi_bu:
+ case Intrinsic::loongarch_lsx_vmaxi_hu:
+ case Intrinsic::loongarch_lsx_vmaxi_wu:
+ case Intrinsic::loongarch_lsx_vmaxi_du:
+ case Intrinsic::loongarch_lasx_xvmaxi_bu:
+ case Intrinsic::loongarch_lasx_xvmaxi_hu:
+ case Intrinsic::loongarch_lasx_xvmaxi_wu:
+ case Intrinsic::loongarch_lasx_xvmaxi_du:
+ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<5>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vmin_b:
+ case Intrinsic::loongarch_lsx_vmin_h:
+ case Intrinsic::loongarch_lsx_vmin_w:
+ case Intrinsic::loongarch_lsx_vmin_d:
+ case Intrinsic::loongarch_lasx_xvmin_b:
+ case Intrinsic::loongarch_lasx_xvmin_h:
+ case Intrinsic::loongarch_lasx_xvmin_w:
+ case Intrinsic::loongarch_lasx_xvmin_d:
+ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vmin_bu:
+ case Intrinsic::loongarch_lsx_vmin_hu:
+ case Intrinsic::loongarch_lsx_vmin_wu:
+ case Intrinsic::loongarch_lsx_vmin_du:
+ case Intrinsic::loongarch_lasx_xvmin_bu:
+ case Intrinsic::loongarch_lasx_xvmin_hu:
+ case Intrinsic::loongarch_lasx_xvmin_wu:
+ case Intrinsic::loongarch_lasx_xvmin_du:
+ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vmini_b:
+ case Intrinsic::loongarch_lsx_vmini_h:
+ case Intrinsic::loongarch_lsx_vmini_w:
+ case Intrinsic::loongarch_lsx_vmini_d:
+ case Intrinsic::loongarch_lasx_xvmini_b:
+ case Intrinsic::loongarch_lasx_xvmini_h:
+ case Intrinsic::loongarch_lasx_xvmini_w:
+ case Intrinsic::loongarch_lasx_xvmini_d:
+ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
+ case Intrinsic::loongarch_lsx_vmini_bu:
+ case Intrinsic::loongarch_lsx_vmini_hu:
+ case Intrinsic::loongarch_lsx_vmini_wu:
+ case Intrinsic::loongarch_lsx_vmini_du:
+ case Intrinsic::loongarch_lasx_xvmini_bu:
+ case Intrinsic::loongarch_lasx_xvmini_hu:
+ case Intrinsic::loongarch_lasx_xvmini_wu:
+ case Intrinsic::loongarch_lasx_xvmini_du:
+ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<5>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vmul_b:
+ case Intrinsic::loongarch_lsx_vmul_h:
+ case Intrinsic::loongarch_lsx_vmul_w:
+ case Intrinsic::loongarch_lsx_vmul_d:
+ case Intrinsic::loongarch_lasx_xvmul_b:
+ case Intrinsic::loongarch_lasx_xvmul_h:
+ case Intrinsic::loongarch_lasx_xvmul_w:
+ case Intrinsic::loongarch_lasx_xvmul_d:
+ return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vmadd_b:
+ case Intrinsic::loongarch_lsx_vmadd_h:
+ case Intrinsic::loongarch_lsx_vmadd_w:
+ case Intrinsic::loongarch_lsx_vmadd_d:
+ case Intrinsic::loongarch_lasx_xvmadd_b:
+ case Intrinsic::loongarch_lasx_xvmadd_h:
+ case Intrinsic::loongarch_lasx_xvmadd_w:
+ case Intrinsic::loongarch_lasx_xvmadd_d: {
+ EVT ResTy = N->getValueType(0);
+ return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
+ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
+ N->getOperand(3)));
+ }
+ case Intrinsic::loongarch_lsx_vmsub_b:
+ case Intrinsic::loongarch_lsx_vmsub_h:
+ case Intrinsic::loongarch_lsx_vmsub_w:
+ case Intrinsic::loongarch_lsx_vmsub_d:
+ case Intrinsic::loongarch_lasx_xvmsub_b:
+ case Intrinsic::loongarch_lasx_xvmsub_h:
+ case Intrinsic::loongarch_lasx_xvmsub_w:
+ case Intrinsic::loongarch_lasx_xvmsub_d: {
+ EVT ResTy = N->getValueType(0);
+ return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
+ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
+ N->getOperand(3)));
+ }
+ case Intrinsic::loongarch_lsx_vdiv_b:
+ case Intrinsic::loongarch_lsx_vdiv_h:
+ case Intrinsic::loongarch_lsx_vdiv_w:
+ case Intrinsic::loongarch_lsx_vdiv_d:
+ case Intrinsic::loongarch_lasx_xvdiv_b:
+ case Intrinsic::loongarch_lasx_xvdiv_h:
+ case Intrinsic::loongarch_lasx_xvdiv_w:
+ case Intrinsic::loongarch_lasx_xvdiv_d:
+ return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vdiv_bu:
+ case Intrinsic::loongarch_lsx_vdiv_hu:
+ case Intrinsic::loongarch_lsx_vdiv_wu:
+ case Intrinsic::loongarch_lsx_vdiv_du:
+ case Intrinsic::loongarch_lasx_xvdiv_bu:
+ case Intrinsic::loongarch_lasx_xvdiv_hu:
+ case Intrinsic::loongarch_lasx_xvdiv_wu:
+ case Intrinsic::loongarch_lasx_xvdiv_du:
+ return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vmod_b:
+ case Intrinsic::loongarch_lsx_vmod_h:
+ case Intrinsic::loongarch_lsx_vmod_w:
+ case Intrinsic::loongarch_lsx_vmod_d:
+ case Intrinsic::loongarch_lasx_xvmod_b:
+ case Intrinsic::loongarch_lasx_xvmod_h:
+ case Intrinsic::loongarch_lasx_xvmod_w:
+ case Intrinsic::loongarch_lasx_xvmod_d:
+ return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vmod_bu:
+ case Intrinsic::loongarch_lsx_vmod_hu:
+ case Intrinsic::loongarch_lsx_vmod_wu:
+ case Intrinsic::loongarch_lsx_vmod_du:
+ case Intrinsic::loongarch_lasx_xvmod_bu:
+ case Intrinsic::loongarch_lasx_xvmod_hu:
+ case Intrinsic::loongarch_lasx_xvmod_wu:
+ case Intrinsic::loongarch_lasx_xvmod_du:
+ return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vand_v:
+ case Intrinsic::loongarch_lasx_xvand_v:
+ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vor_v:
+ case Intrinsic::loongarch_lasx_xvor_v:
+ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vxor_v:
+ case Intrinsic::loongarch_lasx_xvxor_v:
+ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vnor_v:
+ case Intrinsic::loongarch_lasx_xvnor_v: {
+ SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ return DAG.getNOT(DL, Res, Res->getValueType(0));
+ }
+ case Intrinsic::loongarch_lsx_vandi_b:
+ case Intrinsic::loongarch_lasx_xvandi_b:
+ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<8>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vori_b:
+ case Intrinsic::loongarch_lasx_xvori_b:
+ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<8>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vxori_b:
+ case Intrinsic::loongarch_lasx_xvxori_b:
+ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<8>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsll_b:
+ case Intrinsic::loongarch_lsx_vsll_h:
+ case Intrinsic::loongarch_lsx_vsll_w:
+ case Intrinsic::loongarch_lsx_vsll_d:
+ case Intrinsic::loongarch_lasx_xvsll_b:
+ case Intrinsic::loongarch_lasx_xvsll_h:
+ case Intrinsic::loongarch_lasx_xvsll_w:
+ case Intrinsic::loongarch_lasx_xvsll_d:
+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
+ truncateVecElts(N, DAG));
+ case Intrinsic::loongarch_lsx_vslli_b:
+ case Intrinsic::loongarch_lasx_xvslli_b:
+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<3>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vslli_h:
+ case Intrinsic::loongarch_lasx_xvslli_h:
+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<4>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vslli_w:
+ case Intrinsic::loongarch_lasx_xvslli_w:
+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<5>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vslli_d:
+ case Intrinsic::loongarch_lasx_xvslli_d:
+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<6>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsrl_b:
+ case Intrinsic::loongarch_lsx_vsrl_h:
+ case Intrinsic::loongarch_lsx_vsrl_w:
+ case Intrinsic::loongarch_lsx_vsrl_d:
+ case Intrinsic::loongarch_lasx_xvsrl_b:
+ case Intrinsic::loongarch_lasx_xvsrl_h:
+ case Intrinsic::loongarch_lasx_xvsrl_w:
+ case Intrinsic::loongarch_lasx_xvsrl_d:
+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
+ truncateVecElts(N, DAG));
+ case Intrinsic::loongarch_lsx_vsrli_b:
+ case Intrinsic::loongarch_lasx_xvsrli_b:
+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<3>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsrli_h:
+ case Intrinsic::loongarch_lasx_xvsrli_h:
+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<4>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsrli_w:
+ case Intrinsic::loongarch_lasx_xvsrli_w:
+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<5>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsrli_d:
+ case Intrinsic::loongarch_lasx_xvsrli_d:
+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<6>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsra_b:
+ case Intrinsic::loongarch_lsx_vsra_h:
+ case Intrinsic::loongarch_lsx_vsra_w:
+ case Intrinsic::loongarch_lsx_vsra_d:
+ case Intrinsic::loongarch_lasx_xvsra_b:
+ case Intrinsic::loongarch_lasx_xvsra_h:
+ case Intrinsic::loongarch_lasx_xvsra_w:
+ case Intrinsic::loongarch_lasx_xvsra_d:
+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
+ truncateVecElts(N, DAG));
+ case Intrinsic::loongarch_lsx_vsrai_b:
+ case Intrinsic::loongarch_lasx_xvsrai_b:
+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<3>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsrai_h:
+ case Intrinsic::loongarch_lasx_xvsrai_h:
+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<4>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsrai_w:
+ case Intrinsic::loongarch_lasx_xvsrai_w:
+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<5>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vsrai_d:
+ case Intrinsic::loongarch_lasx_xvsrai_d:
+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
+ lowerVectorSplatImm<6>(N, 2, DAG));
+ case Intrinsic::loongarch_lsx_vclz_b:
+ case Intrinsic::loongarch_lsx_vclz_h:
+ case Intrinsic::loongarch_lsx_vclz_w:
+ case Intrinsic::loongarch_lsx_vclz_d:
+ case Intrinsic::loongarch_lasx_xvclz_b:
+ case Intrinsic::loongarch_lasx_xvclz_h:
+ case Intrinsic::loongarch_lasx_xvclz_w:
+ case Intrinsic::loongarch_lasx_xvclz_d:
+ return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
+ case Intrinsic::loongarch_lsx_vpcnt_b:
+ case Intrinsic::loongarch_lsx_vpcnt_h:
+ case Intrinsic::loongarch_lsx_vpcnt_w:
+ case Intrinsic::loongarch_lsx_vpcnt_d:
+ case Intrinsic::loongarch_lasx_xvpcnt_b:
+ case Intrinsic::loongarch_lasx_xvpcnt_h:
+ case Intrinsic::loongarch_lasx_xvpcnt_w:
+ case Intrinsic::loongarch_lasx_xvpcnt_d:
+ return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
+ case Intrinsic::loongarch_lsx_vbitclr_b:
+ case Intrinsic::loongarch_lsx_vbitclr_h:
+ case Intrinsic::loongarch_lsx_vbitclr_w:
+ case Intrinsic::loongarch_lsx_vbitclr_d:
+ case Intrinsic::loongarch_lasx_xvbitclr_b:
+ case Intrinsic::loongarch_lasx_xvbitclr_h:
+ case Intrinsic::loongarch_lasx_xvbitclr_w:
+ case Intrinsic::loongarch_lasx_xvbitclr_d:
+ return lowerVectorBitClear(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitclri_b:
+ case Intrinsic::loongarch_lasx_xvbitclri_b:
+ return lowerVectorBitClearImm<3>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitclri_h:
+ case Intrinsic::loongarch_lasx_xvbitclri_h:
+ return lowerVectorBitClearImm<4>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitclri_w:
+ case Intrinsic::loongarch_lasx_xvbitclri_w:
+ return lowerVectorBitClearImm<5>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitclri_d:
+ case Intrinsic::loongarch_lasx_xvbitclri_d:
+ return lowerVectorBitClearImm<6>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitset_b:
+ case Intrinsic::loongarch_lsx_vbitset_h:
+ case Intrinsic::loongarch_lsx_vbitset_w:
+ case Intrinsic::loongarch_lsx_vbitset_d:
+ case Intrinsic::loongarch_lasx_xvbitset_b:
+ case Intrinsic::loongarch_lasx_xvbitset_h:
+ case Intrinsic::loongarch_lasx_xvbitset_w:
+ case Intrinsic::loongarch_lasx_xvbitset_d: {
+ EVT VecTy = N->getValueType(0);
+ SDValue One = DAG.getConstant(1, DL, VecTy);
+ return DAG.getNode(
+ ISD::OR, DL, VecTy, N->getOperand(1),
+ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
+ }
+ case Intrinsic::loongarch_lsx_vbitseti_b:
+ case Intrinsic::loongarch_lasx_xvbitseti_b:
+ return lowerVectorBitSetImm<3>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitseti_h:
+ case Intrinsic::loongarch_lasx_xvbitseti_h:
+ return lowerVectorBitSetImm<4>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitseti_w:
+ case Intrinsic::loongarch_lasx_xvbitseti_w:
+ return lowerVectorBitSetImm<5>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitseti_d:
+ case Intrinsic::loongarch_lasx_xvbitseti_d:
+ return lowerVectorBitSetImm<6>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitrev_b:
+ case Intrinsic::loongarch_lsx_vbitrev_h:
+ case Intrinsic::loongarch_lsx_vbitrev_w:
+ case Intrinsic::loongarch_lsx_vbitrev_d:
+ case Intrinsic::loongarch_lasx_xvbitrev_b:
+ case Intrinsic::loongarch_lasx_xvbitrev_h:
+ case Intrinsic::loongarch_lasx_xvbitrev_w:
+ case Intrinsic::loongarch_lasx_xvbitrev_d: {
+ EVT VecTy = N->getValueType(0);
+ SDValue One = DAG.getConstant(1, DL, VecTy);
+ return DAG.getNode(
+ ISD::XOR, DL, VecTy, N->getOperand(1),
+ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
+ }
+ case Intrinsic::loongarch_lsx_vbitrevi_b:
+ case Intrinsic::loongarch_lasx_xvbitrevi_b:
+ return lowerVectorBitRevImm<3>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitrevi_h:
+ case Intrinsic::loongarch_lasx_xvbitrevi_h:
+ return lowerVectorBitRevImm<4>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitrevi_w:
+ case Intrinsic::loongarch_lasx_xvbitrevi_w:
+ return lowerVectorBitRevImm<5>(N, DAG);
+ case Intrinsic::loongarch_lsx_vbitrevi_d:
+ case Intrinsic::loongarch_lasx_xvbitrevi_d:
+ return lowerVectorBitRevImm<6>(N, DAG);
+ case Intrinsic::loongarch_lsx_vfadd_s:
+ case Intrinsic::loongarch_lsx_vfadd_d:
+ case Intrinsic::loongarch_lasx_xvfadd_s:
+ case Intrinsic::loongarch_lasx_xvfadd_d:
+ return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vfsub_s:
+ case Intrinsic::loongarch_lsx_vfsub_d:
+ case Intrinsic::loongarch_lasx_xvfsub_s:
+ case Intrinsic::loongarch_lasx_xvfsub_d:
+ return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vfmul_s:
+ case Intrinsic::loongarch_lsx_vfmul_d:
+ case Intrinsic::loongarch_lasx_xvfmul_s:
+ case Intrinsic::loongarch_lasx_xvfmul_d:
+ return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vfdiv_s:
+ case Intrinsic::loongarch_lsx_vfdiv_d:
+ case Intrinsic::loongarch_lasx_xvfdiv_s:
+ case Intrinsic::loongarch_lasx_xvfdiv_d:
+ return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2));
+ case Intrinsic::loongarch_lsx_vfmadd_s:
+ case Intrinsic::loongarch_lsx_vfmadd_d:
+ case Intrinsic::loongarch_lasx_xvfmadd_s:
+ case Intrinsic::loongarch_lasx_xvfmadd_d:
+ return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ case Intrinsic::loongarch_lsx_vinsgr2vr_b:
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2),
+ legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
+ case Intrinsic::loongarch_lsx_vinsgr2vr_h:
+ case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2),
+ legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
+ case Intrinsic::loongarch_lsx_vinsgr2vr_w:
+ case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2),
+ legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
+ case Intrinsic::loongarch_lsx_vinsgr2vr_d:
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2),
+ legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
+ case Intrinsic::loongarch_lsx_vreplgr2vr_b:
+ case Intrinsic::loongarch_lsx_vreplgr2vr_h:
+ case Intrinsic::loongarch_lsx_vreplgr2vr_w:
+ case Intrinsic::loongarch_lsx_vreplgr2vr_d:
+ case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
+ case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
+ case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
+ case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
+ EVT ResTy = N->getValueType(0);
+ SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
+ return DAG.getBuildVector(ResTy, DL, Ops);
+ }
+ case Intrinsic::loongarch_lsx_vreplve_b:
+ case Intrinsic::loongarch_lsx_vreplve_h:
+ case Intrinsic::loongarch_lsx_vreplve_w:
+ case Intrinsic::loongarch_lsx_vreplve_d:
+ case Intrinsic::loongarch_lasx_xvreplve_b:
+ case Intrinsic::loongarch_lasx_xvreplve_h:
+ case Intrinsic::loongarch_lasx_xvreplve_w:
+ case Intrinsic::loongarch_lasx_xvreplve_d:
+ return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
+ N->getOperand(1),
+ DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
+ N->getOperand(2)));
+ }
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -1699,6 +3033,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performSRLCombine(N, DAG, DCI, Subtarget);
case LoongArchISD::BITREV_W:
return performBITREV_WCombine(N, DAG, DCI, Subtarget);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
@@ -1752,6 +3088,196 @@ static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
return SinkMBB;
}
+static MachineBasicBlock *
+emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
+ const LoongArchSubtarget &Subtarget) {
+ unsigned CondOpc;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case LoongArch::PseudoVBZ:
+ CondOpc = LoongArch::VSETEQZ_V;
+ break;
+ case LoongArch::PseudoVBZ_B:
+ CondOpc = LoongArch::VSETANYEQZ_B;
+ break;
+ case LoongArch::PseudoVBZ_H:
+ CondOpc = LoongArch::VSETANYEQZ_H;
+ break;
+ case LoongArch::PseudoVBZ_W:
+ CondOpc = LoongArch::VSETANYEQZ_W;
+ break;
+ case LoongArch::PseudoVBZ_D:
+ CondOpc = LoongArch::VSETANYEQZ_D;
+ break;
+ case LoongArch::PseudoVBNZ:
+ CondOpc = LoongArch::VSETNEZ_V;
+ break;
+ case LoongArch::PseudoVBNZ_B:
+ CondOpc = LoongArch::VSETALLNEZ_B;
+ break;
+ case LoongArch::PseudoVBNZ_H:
+ CondOpc = LoongArch::VSETALLNEZ_H;
+ break;
+ case LoongArch::PseudoVBNZ_W:
+ CondOpc = LoongArch::VSETALLNEZ_W;
+ break;
+ case LoongArch::PseudoVBNZ_D:
+ CondOpc = LoongArch::VSETALLNEZ_D;
+ break;
+ case LoongArch::PseudoXVBZ:
+ CondOpc = LoongArch::XVSETEQZ_V;
+ break;
+ case LoongArch::PseudoXVBZ_B:
+ CondOpc = LoongArch::XVSETANYEQZ_B;
+ break;
+ case LoongArch::PseudoXVBZ_H:
+ CondOpc = LoongArch::XVSETANYEQZ_H;
+ break;
+ case LoongArch::PseudoXVBZ_W:
+ CondOpc = LoongArch::XVSETANYEQZ_W;
+ break;
+ case LoongArch::PseudoXVBZ_D:
+ CondOpc = LoongArch::XVSETANYEQZ_D;
+ break;
+ case LoongArch::PseudoXVBNZ:
+ CondOpc = LoongArch::XVSETNEZ_V;
+ break;
+ case LoongArch::PseudoXVBNZ_B:
+ CondOpc = LoongArch::XVSETALLNEZ_B;
+ break;
+ case LoongArch::PseudoXVBNZ_H:
+ CondOpc = LoongArch::XVSETALLNEZ_H;
+ break;
+ case LoongArch::PseudoXVBNZ_W:
+ CondOpc = LoongArch::XVSETALLNEZ_W;
+ break;
+ case LoongArch::PseudoXVBNZ_D:
+ CondOpc = LoongArch::XVSETALLNEZ_D;
+ break;
+ }
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, FalseBB);
+ F->insert(It, TrueBB);
+ F->insert(It, SinkBB);
+
+ // Transfer the remainder of MBB and its successor edges to Sink.
+ SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
+ SinkBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Insert the real instruction to BB.
+ Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
+ BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
+
+ // Insert branch.
+ BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
+ BB->addSuccessor(FalseBB);
+ BB->addSuccessor(TrueBB);
+
+ // FalseBB.
+ Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
+ BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
+ .addReg(LoongArch::R0)
+ .addImm(0);
+ BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
+ FalseBB->addSuccessor(SinkBB);
+
+ // TrueBB.
+ Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
+ BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
+ .addReg(LoongArch::R0)
+ .addImm(1);
+ TrueBB->addSuccessor(SinkBB);
+
+ // SinkBB: merge the results.
+ BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
+ MI.getOperand(0).getReg())
+ .addReg(RD1)
+ .addMBB(FalseBB)
+ .addReg(RD2)
+ .addMBB(TrueBB);
+
+ // The pseudo instruction is gone now.
+ MI.eraseFromParent();
+ return SinkBB;
+}
+
+static MachineBasicBlock *
+emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
+ const LoongArchSubtarget &Subtarget) {
+ unsigned InsOp;
+ unsigned HalfSize;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case LoongArch::PseudoXVINSGR2VR_B:
+ HalfSize = 16;
+ InsOp = LoongArch::VINSGR2VR_B;
+ break;
+ case LoongArch::PseudoXVINSGR2VR_H:
+ HalfSize = 8;
+ InsOp = LoongArch::VINSGR2VR_H;
+ break;
+ }
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
+ const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ // XDst = vector_insert XSrc, Elt, Idx
+ Register XDst = MI.getOperand(0).getReg();
+ Register XSrc = MI.getOperand(1).getReg();
+ Register Elt = MI.getOperand(2).getReg();
+ unsigned Idx = MI.getOperand(3).getImm();
+
+ Register ScratchReg1 = XSrc;
+ if (Idx >= HalfSize) {
+ ScratchReg1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
+ .addReg(XSrc)
+ .addReg(XSrc)
+ .addImm(1);
+ }
+
+ Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
+ Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
+ .addReg(ScratchReg1, 0, LoongArch::sub_128);
+ BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
+ .addReg(ScratchSubReg1)
+ .addReg(Elt)
+ .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
+
+ Register ScratchReg2 = XDst;
+ if (Idx >= HalfSize)
+ ScratchReg2 = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
+ .addImm(0)
+ .addReg(ScratchSubReg2)
+ .addImm(LoongArch::sub_128);
+
+ if (Idx >= HalfSize)
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
+ .addReg(XSrc)
+ .addReg(ScratchReg2)
+ .addImm(2);
+
+ MI.eraseFromParent();
+ return BB;
+}
+
MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -1786,6 +3312,30 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
MI.eraseFromParent();
return BB;
}
+ case LoongArch::PseudoVBZ:
+ case LoongArch::PseudoVBZ_B:
+ case LoongArch::PseudoVBZ_H:
+ case LoongArch::PseudoVBZ_W:
+ case LoongArch::PseudoVBZ_D:
+ case LoongArch::PseudoVBNZ:
+ case LoongArch::PseudoVBNZ_B:
+ case LoongArch::PseudoVBNZ_H:
+ case LoongArch::PseudoVBNZ_W:
+ case LoongArch::PseudoVBNZ_D:
+ case LoongArch::PseudoXVBZ:
+ case LoongArch::PseudoXVBZ_B:
+ case LoongArch::PseudoXVBZ_H:
+ case LoongArch::PseudoXVBZ_W:
+ case LoongArch::PseudoXVBZ_D:
+ case LoongArch::PseudoXVBNZ:
+ case LoongArch::PseudoXVBNZ_B:
+ case LoongArch::PseudoXVBNZ_H:
+ case LoongArch::PseudoXVBNZ_W:
+ case LoongArch::PseudoXVBNZ_D:
+ return emitVecCondBranchPseudo(MI, BB, Subtarget);
+ case LoongArch::PseudoXVINSGR2VR_B:
+ case LoongArch::PseudoXVINSGR2VR_H:
+ return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
}
}
@@ -1858,6 +3408,13 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MOVFCSR2GR)
NODE_NAME_CASE(CACOP_D)
NODE_NAME_CASE(CACOP_W)
+ NODE_NAME_CASE(VPICK_SEXT_ELT)
+ NODE_NAME_CASE(VPICK_ZEXT_ELT)
+ NODE_NAME_CASE(VREPLVE)
+ NODE_NAME_CASE(VALL_ZERO)
+ NODE_NAME_CASE(VANY_ZERO)
+ NODE_NAME_CASE(VALL_NONZERO)
+ NODE_NAME_CASE(VANY_NONZERO)
}
#undef NODE_NAME_CASE
return nullptr;
@@ -1884,6 +3441,14 @@ const MCPhysReg ArgFPR64s[] = {
LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
+const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
+ LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
+ LoongArch::VR6, LoongArch::VR7};
+
+const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
+ LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
+ LoongArch::XR6, LoongArch::XR7};
+
// Pass a 2*GRLen argument that has been split into two GRLen values through
// registers or the stack as necessary.
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
@@ -2030,6 +3595,10 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
Reg = State.AllocateReg(ArgFPR32s);
else if (ValVT == MVT::f64 && !UseGPRForFloat)
Reg = State.AllocateReg(ArgFPR64s);
+ else if (ValVT.is128BitVector())
+ Reg = State.AllocateReg(ArgVRs);
+ else if (ValVT.is256BitVector())
+ Reg = State.AllocateReg(ArgXRs);
else
Reg = State.AllocateReg(ArgGPRs);
@@ -2896,8 +4465,9 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
- Value *Ordering =
- Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord));
+ AtomicOrdering FailOrd = CI->getFailureOrdering();
+ Value *FailureOrdering =
+ Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
// TODO: Support cmpxchg on LA32.
Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
@@ -2908,7 +4478,7 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
Function *MaskedCmpXchg =
Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
Value *Result = Builder.CreateCall(
- MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}
@@ -2916,6 +4486,22 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
+ // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
+ // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
+ // mask, as this produces better code than the LL/SC loop emitted by
+ // int_loongarch_masked_atomicrmw_xchg.
+ if (AI->getOperation() == AtomicRMWInst::Xchg &&
+ isa<ConstantInt>(AI->getValOperand())) {
+ ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
+ if (CVal->isZero())
+ return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
+ Builder.CreateNot(Mask, "Inv_Mask"),
+ AI->getAlign(), Ord);
+ if (CVal->isMinusOne())
+ return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
+ AI->getAlign(), Ord);
+ }
+
unsigned GRLen = Subtarget.getGRLen();
Value *Ordering =
Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
@@ -3030,12 +4616,12 @@ LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
-unsigned LoongArchTargetLowering::getInlineAsmMemConstraint(
+InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
StringRef ConstraintCode) const {
- return StringSwitch<unsigned>(ConstraintCode)
- .Case("k", InlineAsm::Constraint_k)
- .Case("ZB", InlineAsm::Constraint_ZB)
- .Case("ZC", InlineAsm::Constraint_ZC)
+ return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
+ .Case("k", InlineAsm::ConstraintCode::k)
+ .Case("ZB", InlineAsm::ConstraintCode::ZB)
+ .Case("ZC", InlineAsm::ConstraintCode::ZC)
.Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
}
@@ -3079,8 +4665,8 @@ LoongArchTargetLowering::getRegForInlineAsmConstraint(
// decode the usage of register name aliases into their official names. And
// AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
// official register names.
- if (Constraint.startswith("{$r") || Constraint.startswith("{$f") ||
- Constraint.startswith("{$vr") || Constraint.startswith("{$xr")) {
+ if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
+ Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
bool IsFP = Constraint[2] == 'f';
std::pair<StringRef, StringRef> Temp = Constraint.split('$');
std::pair<unsigned, const TargetRegisterClass *> R;
@@ -3103,10 +4689,10 @@ LoongArchTargetLowering::getRegForInlineAsmConstraint(
}
void LoongArchTargetLowering::LowerAsmOperandForConstraint(
- SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
// Currently only support length 1 constraints.
- if (Constraint.length() == 1) {
+ if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'l':
// Validate & create a 16-bit signed immediate operand.
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 500407493fe5..2d73a7394946 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -21,7 +21,6 @@
namespace llvm {
class LoongArchSubtarget;
-struct LoongArchRegisterInfo;
namespace LoongArchISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
@@ -110,6 +109,20 @@ enum NodeType : unsigned {
// Read CPU configuration information operation
CPUCFG,
+
+ // Vector Shuffle
+ VREPLVE,
+
+ // Extended vector element extraction
+ VPICK_SEXT_ELT,
+ VPICK_ZEXT_ELT,
+
+ // Vector comparisons
+ VALL_ZERO,
+ VANY_ZERO,
+ VALL_NONZERO,
+ VANY_NONZERO,
+
// Intrinsic operations end =============================================
};
} // end namespace LoongArchISD
@@ -216,6 +229,10 @@ public:
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;
+ bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override {
+ return false;
+ }
+
private:
/// Target-specific function used to lower LoongArch calling conventions.
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
@@ -248,6 +265,7 @@ private:
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
@@ -261,6 +279,9 @@ private:
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
@@ -269,13 +290,14 @@ private:
ConstraintType getConstraintType(StringRef Constraint) const override;
- unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
+ InlineAsm::ConstraintCode
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index ef79b8a0dcd3..6576100d3b32 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -47,6 +47,22 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // VR->VR copies.
+ if (LoongArch::LSX128RegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(LoongArch::VORI_B), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
+ }
+
+ // XR->XR copies.
+ if (LoongArch::LASX256RegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(LoongArch::XVORI_B), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
+ }
+
// GPR->CFR copy.
if (LoongArch::CFRRegClass.contains(DstReg) &&
LoongArch::GPRRegClass.contains(SrcReg)) {
@@ -74,6 +90,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = LoongArch::FMOV_S;
} else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) {
Opc = LoongArch::FMOV_D;
+ } else if (LoongArch::GPRRegClass.contains(DstReg) &&
+ LoongArch::FPR32RegClass.contains(SrcReg)) {
+ // FPR32 -> GPR copies
+ Opc = LoongArch::MOVFR2GR_S;
+ } else if (LoongArch::GPRRegClass.contains(DstReg) &&
+ LoongArch::FPR64RegClass.contains(SrcReg)) {
+ // FPR64 -> GPR copies
+ Opc = LoongArch::MOVFR2GR_D;
} else {
// TODO: support other copies.
llvm_unreachable("Impossible reg-to-reg copy");
@@ -99,6 +123,10 @@ void LoongArchInstrInfo::storeRegToStackSlot(
Opcode = LoongArch::FST_S;
else if (LoongArch::FPR64RegClass.hasSubClassEq(RC))
Opcode = LoongArch::FST_D;
+ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::VST;
+ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::XVST;
else if (LoongArch::CFRRegClass.hasSubClassEq(RC))
Opcode = LoongArch::PseudoST_CFR;
else
@@ -133,6 +161,10 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opcode = LoongArch::FLD_S;
else if (LoongArch::FPR64RegClass.hasSubClassEq(RC))
Opcode = LoongArch::FLD_D;
+ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::VLD;
+ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::XVLD;
else if (LoongArch::CFRRegClass.hasSubClassEq(RC))
Opcode = LoongArch::PseudoLD_CFR;
else
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index cf83abf27a1e..4b145d0baa41 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -88,5 +88,20 @@ protected:
const LoongArchSubtarget &STI;
};
+namespace LoongArch {
+
+// Mask assignments for floating-point.
+static constexpr unsigned FClassMaskSignalingNaN = 0x001;
+static constexpr unsigned FClassMaskQuietNaN = 0x002;
+static constexpr unsigned FClassMaskNegativeInfinity = 0x004;
+static constexpr unsigned FClassMaskNegativeNormal = 0x008;
+static constexpr unsigned FClassMaskNegativeSubnormal = 0x010;
+static constexpr unsigned FClassMaskNegativeZero = 0x020;
+static constexpr unsigned FClassMaskPositiveInfinity = 0x040;
+static constexpr unsigned FClassMaskPositiveNormal = 0x080;
+static constexpr unsigned FClassMaskPositiveSubnormal = 0x100;
+static constexpr unsigned FClassMaskPositiveZero = 0x200;
+} // namespace LoongArch
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ac391ef471b1..2fea0f33e9eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -143,6 +143,32 @@ def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D",
def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp,
[SDNPHasChain]>;
+def to_fclass_mask: SDNodeXForm<timm, [{
+ uint64_t Check = N->getZExtValue();
+ unsigned Mask = 0;
+ if (Check & fcSNan)
+ Mask |= LoongArch::FClassMaskSignalingNaN;
+ if (Check & fcQNan)
+ Mask |= LoongArch::FClassMaskQuietNaN;
+ if (Check & fcPosInf)
+ Mask |= LoongArch::FClassMaskPositiveInfinity;
+ if (Check & fcNegInf)
+ Mask |= LoongArch::FClassMaskNegativeInfinity;
+ if (Check & fcPosNormal)
+ Mask |= LoongArch::FClassMaskPositiveNormal;
+ if (Check & fcNegNormal)
+ Mask |= LoongArch::FClassMaskNegativeNormal;
+ if (Check & fcPosSubnormal)
+ Mask |= LoongArch::FClassMaskPositiveSubnormal;
+ if (Check & fcNegSubnormal)
+ Mask |= LoongArch::FClassMaskNegativeSubnormal;
+ if (Check & fcPosZero)
+ Mask |= LoongArch::FClassMaskPositiveZero;
+ if (Check & fcNegZero)
+ Mask |= LoongArch::FClassMaskNegativeZero;
+ return CurDAG->getTargetConstant(Mask, SDLoc(N), Subtarget->getGRLenVT());
+}]>;
+
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@@ -182,7 +208,7 @@ def imm32 : Operand<GRLenVT> {
let ParserMatchClass = ImmAsmOperand<"", 32, "">;
}
-def uimm1 : Operand<GRLenVT> {
+def uimm1 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<1>(Imm);}]>{
let ParserMatchClass = UImmAsmOperand<1>;
}
@@ -197,11 +223,11 @@ def uimm2_plus1 : Operand<GRLenVT>,
let DecoderMethod = "decodeUImmOperand<2, 1>";
}
-def uimm3 : Operand<GRLenVT> {
+def uimm3 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<3>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<3>;
}
-def uimm4 : Operand<GRLenVT> {
+def uimm4 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<4>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<4>;
}
@@ -519,6 +545,40 @@ def AlslSlliImmI0 : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
+// Check if (and r, imm) can be optimized to (BSTRINS r, R0, msb, lsb),
+// in which imm = ~((2^^(msb-lsb+1) - 1) << lsb).
+def BstrinsImm : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ uint64_t Imm = N->getZExtValue();
+ // andi can be used instead if Imm <= 0xfff.
+ if (Imm <= 0xfff)
+ return false;
+ unsigned MaskIdx, MaskLen;
+ return N->getValueType(0).getSizeInBits() == 32
+ ? llvm::isShiftedMask_32(~Imm, MaskIdx, MaskLen)
+ : llvm::isShiftedMask_64(~Imm, MaskIdx, MaskLen);
+}]>;
+
+def BstrinsMsb: SDNodeXForm<imm, [{
+ uint64_t Imm = N->getZExtValue();
+ unsigned MaskIdx, MaskLen;
+ N->getValueType(0).getSizeInBits() == 32
+ ? llvm::isShiftedMask_32(~Imm, MaskIdx, MaskLen)
+ : llvm::isShiftedMask_64(~Imm, MaskIdx, MaskLen);
+ return CurDAG->getTargetConstant(MaskIdx + MaskLen - 1, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+def BstrinsLsb: SDNodeXForm<imm, [{
+ uint64_t Imm = N->getZExtValue();
+ unsigned MaskIdx, MaskLen;
+ N->getValueType(0).getSizeInBits() == 32
+ ? llvm::isShiftedMask_32(~Imm, MaskIdx, MaskLen)
+ : llvm::isShiftedMask_64(~Imm, MaskIdx, MaskLen);
+ return CurDAG->getTargetConstant(MaskIdx, SDLoc(N), N->getValueType(0));
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction Formats
//===----------------------------------------------------------------------===//
@@ -586,6 +646,7 @@ class Br_I26<bits<32> op>
: FmtI26<op, (outs), (ins simm26_b:$imm26), "$imm26"> {
let isBranch = 1;
let isTerminator = 1;
+ let isBarrier = 1;
}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
@@ -1142,6 +1203,18 @@ def : Pat<(not (or GPR:$rj, GPR:$rk)), (NOR GPR:$rj, GPR:$rk)>;
def : Pat<(or GPR:$rj, (not GPR:$rk)), (ORN GPR:$rj, GPR:$rk)>;
def : Pat<(and GPR:$rj, (not GPR:$rk)), (ANDN GPR:$rj, GPR:$rk)>;
+let Predicates = [IsLA32] in {
+def : Pat<(and GPR:$rj, BstrinsImm:$imm),
+ (BSTRINS_W GPR:$rj, R0, (BstrinsMsb BstrinsImm:$imm),
+ (BstrinsLsb BstrinsImm:$imm))>;
+} // Predicates = [IsLA32]
+
+let Predicates = [IsLA64] in {
+def : Pat<(and GPR:$rj, BstrinsImm:$imm),
+ (BSTRINS_D GPR:$rj, R0, (BstrinsMsb BstrinsImm:$imm),
+ (BstrinsLsb BstrinsImm:$imm))>;
+} // Predicates = [IsLA64]
+
/// Traps
// We lower `trap` to `amswap.w rd:$r0, rk:$r1, rj:$r0`, as this is guaranteed
@@ -1589,20 +1662,42 @@ def : RegRegStPat<store, STX_D, GPR, i64>;
/// Atomic loads and stores
-def : Pat<(atomic_fence timm, timm), (DBAR 0)>;
+// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from
+// the Linux patch revealing it [1]:
+//
+// - Bit 4: kind of constraint (0: completion, 1: ordering)
+// - Bit 3: barrier for previous read (0: true, 1: false)
+// - Bit 2: barrier for previous write (0: true, 1: false)
+// - Bit 1: barrier for succeeding read (0: true, 1: false)
+// - Bit 0: barrier for succeeding write (0: true, 1: false)
+//
+// Hint 0x700: barrier for "read after read" from the same address, which is
+// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as
+// nop if such reordering is disabled on supporting newer models.)
+//
+// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/
+//
+// Implementations without support for the finer-granularity hints simply treat
+// all as the full barrier (DBAR 0), so we can unconditionally start emiting the
+// more precise hints right away.
+
+def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire
+def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release
+def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel
+def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst
defm : LdPat<atomic_load_8, LD_B>;
defm : LdPat<atomic_load_16, LD_H>;
defm : LdPat<atomic_load_32, LD_W>;
class release_seqcst_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getSuccessOrdering();
return isReleaseOrStronger(Ordering);
}]>;
class unordered_monotonic_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getSuccessOrdering();
return !isReleaseOrStronger(Ordering);
}]>;
@@ -1614,23 +1709,13 @@ def atomic_store_unordered_monotonic_32
def atomic_store_unordered_monotonic_64
: unordered_monotonic_store<atomic_store_64>;
-/// AtomicStores
-
-multiclass AtomicStPat<PatFrag StoreOp, LAInst Inst, RegisterClass StTy,
- ValueType vt> {
- def : Pat<(StoreOp BaseAddr:$ptr, (vt StTy:$val)),
- (Inst StTy:$val, BaseAddr:$ptr, 0)>;
- def : Pat<(StoreOp (AddLike BaseAddr:$ptr, simm12:$imm12), (vt StTy:$val)),
- (Inst StTy:$val, BaseAddr:$ptr, simm12:$imm12)>;
-}
-
-defm : AtomicStPat<atomic_store_8, ST_B, GPR, GRLenVT>;
-defm : AtomicStPat<atomic_store_16, ST_H, GPR, GRLenVT>;
-defm : AtomicStPat<atomic_store_unordered_monotonic_32, ST_W, GPR, i32>,
+defm : StPat<atomic_store_8, ST_B, GPR, GRLenVT>;
+defm : StPat<atomic_store_16, ST_H, GPR, GRLenVT>;
+defm : StPat<atomic_store_unordered_monotonic_32, ST_W, GPR, i32>,
Requires<[IsLA32]>;
def PseudoAtomicStoreW
- : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk)>,
+ : Pseudo<(outs GPR:$dst), (ins GPR:$rk, GPR:$rj)>,
PseudoInstExpansion<(AMSWAP__DB_W R0, GPR:$rk, GPRMemAtomic:$rj)>;
def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk),
@@ -1638,15 +1723,15 @@ def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk),
let Predicates = [IsLA64] in {
def PseudoAtomicStoreD
- : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk)>,
+ : Pseudo<(outs GPR:$dst), (ins GPR:$rk, GPR:$rj)>,
PseudoInstExpansion<(AMSWAP__DB_D R0, GPR:$rk, GPRMemAtomic:$rj)>;
def : Pat<(atomic_store_release_seqcst_64 GPR:$rj, GPR:$rk),
(PseudoAtomicStoreD GPR:$rj, GPR:$rk)>;
defm : LdPat<atomic_load_64, LD_D>;
-defm : AtomicStPat<atomic_store_unordered_monotonic_32, ST_W, GPR, i64>;
-defm : AtomicStPat<atomic_store_unordered_monotonic_64, ST_D, GPR, i64>;
+defm : StPat<atomic_store_unordered_monotonic_32, ST_W, GPR, i64>;
+defm : StPat<atomic_store_unordered_monotonic_64, ST_D, GPR, i64>;
} // Predicates = [IsLA64]
/// Atomic Ops
@@ -1730,7 +1815,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
class PseudoCmpXchg
: Pseudo<(outs GPR:$res, GPR:$scratch),
- (ins GPR:$addr, GPR:$cmpval, GPR:$newval)> {
+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$fail_order)> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
@@ -1744,7 +1829,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg;
def PseudoMaskedCmpXchg32
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
- grlenimm:$ordering)> {
+ grlenimm:$fail_order)> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
@@ -1762,6 +1847,43 @@ class AtomicPat<Intrinsic intrin, Pseudo AMInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
+// These atomic cmpxchg PatFrags only care about the failure ordering.
+// The PatFrags defined by multiclass `ternary_atomic_op_ord` in
+// TargetSelectionDAG.td care about the merged memory ordering that is the
+// stronger one between success and failure. But for LoongArch LL-SC we only
+// need to care about the failure ordering as explained in PR #67391. So we
+// define these PatFrags that will be used to define cmpxchg pats below.
+multiclass ternary_atomic_op_failure_ord {
+ def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
+ return Ordering == AtomicOrdering::Monotonic;
+ }]>;
+ def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
+ return Ordering == AtomicOrdering::Acquire;
+ }]>;
+ def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
+ return Ordering == AtomicOrdering::Release;
+ }]>;
+ def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
+ return Ordering == AtomicOrdering::AcquireRelease;
+ }]>;
+ def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
+ return Ordering == AtomicOrdering::SequentiallyConsistent;
+ }]>;
+}
+
+defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord;
+defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord;
+
let Predicates = [IsLA64] in {
def : AtomicPat<int_loongarch_masked_atomicrmw_xchg_i64,
PseudoMaskedAtomicSwap32>;
@@ -1820,14 +1942,28 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
PseudoMaskedAtomicLoadUMin32>;
-def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new),
- (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
+ ValueType vt = GRLenVT> {
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
+}
+
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
def : Pat<(int_loongarch_masked_cmpxchg_i64
- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
(PseudoMaskedCmpXchg32
- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
-def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new),
- (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>;
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>;
def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64,
PseudoMaskedAtomicLoadMax32>;
@@ -1857,9 +1993,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>;
/// Intrinsics
def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12),
- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>;
+ (CACOP timm:$op, GPR:$rj, timm:$imm12)>;
def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12),
- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>;
+ (CACOP timm:$op, GPR:$rj, timm:$imm12)>;
def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>;
def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>;
def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>;
@@ -2023,9 +2159,9 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk),
def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk),
(ASRTGT_D GPR:$rj, GPR:$rk)>;
def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8),
- (LDDIR GPR:$rj, uimm8:$imm8)>;
+ (LDDIR GPR:$rj, timm:$imm8)>;
def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8),
- (LDPTE GPR:$rj, uimm8:$imm8)>;
+ (LDPTE GPR:$rj, timm:$imm8)>;
} // Predicates = [IsLA64]
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index dc37b37b2186..ec6983d0f487 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -10,6 +10,37 @@
//
//===----------------------------------------------------------------------===//
+def lasxsplati8
+ : PatFrag<(ops node:$e0),
+ (v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0))>;
+def lasxsplati16
+ : PatFrag<(ops node:$e0),
+ (v16i16 (build_vector node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0))>;
+def lasxsplati32
+ : PatFrag<(ops node:$e0),
+ (v8i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0))>;
+def lasxsplati64
+ : PatFrag<(ops node:$e0),
+ (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>;
+def lasxsplatf32
+ : PatFrag<(ops node:$e0),
+ (v8f32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0))>;
+def lasxsplatf64
+ : PatFrag<(ops node:$e0),
+ (v4f64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -1029,4 +1060,909 @@ def PseudoXVREPLI_D : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [],
"xvrepli.d", "$xd, $imm">;
}
+def PseudoXVBNZ_B : VecCond<loongarch_vall_nonzero, v32i8, LASX256>;
+def PseudoXVBNZ_H : VecCond<loongarch_vall_nonzero, v16i16, LASX256>;
+def PseudoXVBNZ_W : VecCond<loongarch_vall_nonzero, v8i32, LASX256>;
+def PseudoXVBNZ_D : VecCond<loongarch_vall_nonzero, v4i64, LASX256>;
+def PseudoXVBNZ : VecCond<loongarch_vany_nonzero, v32i8, LASX256>;
+
+def PseudoXVBZ_B : VecCond<loongarch_vall_zero, v32i8, LASX256>;
+def PseudoXVBZ_H : VecCond<loongarch_vall_zero, v16i16, LASX256>;
+def PseudoXVBZ_W : VecCond<loongarch_vall_zero, v8i32, LASX256>;
+def PseudoXVBZ_D : VecCond<loongarch_vall_zero, v4i64, LASX256>;
+def PseudoXVBZ : VecCond<loongarch_vany_zero, v32i8, LASX256>;
+
+let usesCustomInserter = 1, Constraints = "$xd = $dst" in {
+def PseudoXVINSGR2VR_B
+ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm5:$imm)>;
+def PseudoXVINSGR2VR_H
+ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>;
+} // usesCustomInserter = 1, Constraints = "$xd = $dst"
+
+} // Predicates = [HasExtLASX]
+
+multiclass PatXr<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(v32i8 (OpNode (v32i8 LASX256:$xj))),
+ (!cast<LAInst>(Inst#"_B") LASX256:$xj)>;
+ def : Pat<(v16i16 (OpNode (v16i16 LASX256:$xj))),
+ (!cast<LAInst>(Inst#"_H") LASX256:$xj)>;
+ def : Pat<(v8i32 (OpNode (v8i32 LASX256:$xj))),
+ (!cast<LAInst>(Inst#"_W") LASX256:$xj)>;
+ def : Pat<(v4i64 (OpNode (v4i64 LASX256:$xj))),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj)>;
+}
+
+multiclass PatXrF<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(v8f32 (OpNode (v8f32 LASX256:$xj))),
+ (!cast<LAInst>(Inst#"_S") LASX256:$xj)>;
+ def : Pat<(v4f64 (OpNode (v4f64 LASX256:$xj))),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj)>;
+}
+
+multiclass PatXrXr<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_B") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_H") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_W") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass PatXrXrF<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_S") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass PatXrXrU<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_BU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_HU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_WU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_DU") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass PatXrSimm5<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_simm5 simm5:$imm))),
+ (!cast<LAInst>(Inst#"_B") LASX256:$xj, simm5:$imm)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_simm5 simm5:$imm))),
+ (!cast<LAInst>(Inst#"_H") LASX256:$xj, simm5:$imm)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_simm5 simm5:$imm))),
+ (!cast<LAInst>(Inst#"_W") LASX256:$xj, simm5:$imm)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_simm5 simm5:$imm))),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj, simm5:$imm)>;
+}
+
+multiclass PatXrUimm5<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_BU") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_HU") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_WU") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_DU") LASX256:$xj, uimm5:$imm)>;
+}
+
+multiclass PatXrXrXr<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xd), (v32i8 LASX256:$xj),
+ (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_B") LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xd), (v16i16 LASX256:$xj),
+ (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_H") LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xd), (v8i32 LASX256:$xj),
+ (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_W") LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xd), (v4i64 LASX256:$xj),
+ (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass PatShiftXrXr<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (and vsplati8_imm_eq_7,
+ (v32i8 LASX256:$xk))),
+ (!cast<LAInst>(Inst#"_B") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (and vsplati16_imm_eq_15,
+ (v16i16 LASX256:$xk))),
+ (!cast<LAInst>(Inst#"_H") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (and vsplati32_imm_eq_31,
+ (v8i32 LASX256:$xk))),
+ (!cast<LAInst>(Inst#"_W") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (and vsplati64_imm_eq_63,
+ (v4i64 LASX256:$xk))),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass PatShiftXrUimm<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm3 uimm3:$imm))),
+ (!cast<LAInst>(Inst#"_B") LASX256:$xj, uimm3:$imm)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm4 uimm4:$imm))),
+ (!cast<LAInst>(Inst#"_H") LASX256:$xj, uimm4:$imm)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_W") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm6 uimm6:$imm))),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj, uimm6:$imm)>;
+}
+
+multiclass PatCCXrSimm5<CondCode CC, string Inst> {
+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj),
+ (v32i8 (SplatPat_simm5 simm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_B") LASX256:$xj, simm5:$imm)>;
+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj),
+ (v16i16 (SplatPat_simm5 simm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_H") LASX256:$xj, simm5:$imm)>;
+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj),
+ (v8i32 (SplatPat_simm5 simm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_W") LASX256:$xj, simm5:$imm)>;
+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj),
+ (v4i64 (SplatPat_simm5 simm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj, simm5:$imm)>;
+}
+
+multiclass PatCCXrUimm5<CondCode CC, string Inst> {
+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj),
+ (v32i8 (SplatPat_uimm5 uimm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_BU") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj),
+ (v16i16 (SplatPat_uimm5 uimm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_HU") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj),
+ (v8i32 (SplatPat_uimm5 uimm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_WU") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj),
+ (v4i64 (SplatPat_uimm5 uimm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_DU") LASX256:$xj, uimm5:$imm)>;
+}
+
+multiclass PatCCXrXr<CondCode CC, string Inst> {
+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_B") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_H") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_W") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass PatCCXrXrU<CondCode CC, string Inst> {
+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_BU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_HU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_WU") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_DU") LASX256:$xj, LASX256:$xk)>;
+}
+
+multiclass PatCCXrXrF<CondCode CC, string Inst> {
+ def : Pat<(v8i32 (setcc (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_S") LASX256:$xj, LASX256:$xk)>;
+ def : Pat<(v4i64 (setcc (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), CC)),
+ (!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
+}
+
+let Predicates = [HasExtLASX] in {
+
+// XVADD_{B/H/W/D}
+defm : PatXrXr<add, "XVADD">;
+// XVSUB_{B/H/W/D}
+defm : PatXrXr<sub, "XVSUB">;
+
+// XVADDI_{B/H/W/D}U
+defm : PatXrUimm5<add, "XVADDI">;
+// XVSUBI_{B/H/W/D}U
+defm : PatXrUimm5<sub, "XVSUBI">;
+
+// XVNEG_{B/H/W/D}
+def : Pat<(sub immAllZerosV, (v32i8 LASX256:$xj)), (XVNEG_B LASX256:$xj)>;
+def : Pat<(sub immAllZerosV, (v16i16 LASX256:$xj)), (XVNEG_H LASX256:$xj)>;
+def : Pat<(sub immAllZerosV, (v8i32 LASX256:$xj)), (XVNEG_W LASX256:$xj)>;
+def : Pat<(sub immAllZerosV, (v4i64 LASX256:$xj)), (XVNEG_D LASX256:$xj)>;
+
+// XVMAX[I]_{B/H/W/D}[U]
+defm : PatXrXr<smax, "XVMAX">;
+defm : PatXrXrU<umax, "XVMAX">;
+defm : PatXrSimm5<smax, "XVMAXI">;
+defm : PatXrUimm5<umax, "XVMAXI">;
+
+// XVMIN[I]_{B/H/W/D}[U]
+defm : PatXrXr<smin, "XVMIN">;
+defm : PatXrXrU<umin, "XVMIN">;
+defm : PatXrSimm5<smin, "XVMINI">;
+defm : PatXrUimm5<umin, "XVMINI">;
+
+// XVMUL_{B/H/W/D}
+defm : PatXrXr<mul, "XVMUL">;
+
+// XVMUH_{B/H/W/D}[U]
+defm : PatXrXr<mulhs, "XVMUH">;
+defm : PatXrXrU<mulhu, "XVMUH">;
+
+// XVMADD_{B/H/W/D}
+defm : PatXrXrXr<muladd, "XVMADD">;
+// XVMSUB_{B/H/W/D}
+defm : PatXrXrXr<mulsub, "XVMSUB">;
+
+// XVDIV_{B/H/W/D}[U]
+defm : PatXrXr<sdiv, "XVDIV">;
+defm : PatXrXrU<udiv, "XVDIV">;
+
+// XVMOD_{B/H/W/D}[U]
+defm : PatXrXr<srem, "XVMOD">;
+defm : PatXrXrU<urem, "XVMOD">;
+
+// XVAND_V
+foreach vt = [v32i8, v16i16, v8i32, v4i64] in
+def : Pat<(and (vt LASX256:$xj), (vt LASX256:$xk)),
+ (XVAND_V LASX256:$xj, LASX256:$xk)>;
+// XVOR_V
+foreach vt = [v32i8, v16i16, v8i32, v4i64] in
+def : Pat<(or (vt LASX256:$xj), (vt LASX256:$xk)),
+ (XVOR_V LASX256:$xj, LASX256:$xk)>;
+// XVXOR_V
+foreach vt = [v32i8, v16i16, v8i32, v4i64] in
+def : Pat<(xor (vt LASX256:$xj), (vt LASX256:$xk)),
+ (XVXOR_V LASX256:$xj, LASX256:$xk)>;
+// XVNOR_V
+foreach vt = [v32i8, v16i16, v8i32, v4i64] in
+def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))),
+ (XVNOR_V LASX256:$xj, LASX256:$xk)>;
+
+// XVANDI_B
+def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
+ (XVANDI_B LASX256:$xj, uimm8:$imm)>;
+// XVORI_B
+def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
+ (XVORI_B LASX256:$xj, uimm8:$imm)>;
+
+// XVXORI_B
+def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
+ (XVXORI_B LASX256:$xj, uimm8:$imm)>;
+
+// XVSLL[I]_{B/H/W/D}
+defm : PatXrXr<shl, "XVSLL">;
+defm : PatShiftXrXr<shl, "XVSLL">;
+defm : PatShiftXrUimm<shl, "XVSLLI">;
+
+// XVSRL[I]_{B/H/W/D}
+defm : PatXrXr<srl, "XVSRL">;
+defm : PatShiftXrXr<srl, "XVSRL">;
+defm : PatShiftXrUimm<srl, "XVSRLI">;
+
+// XVSRA[I]_{B/H/W/D}
+defm : PatXrXr<sra, "XVSRA">;
+defm : PatShiftXrXr<sra, "XVSRA">;
+defm : PatShiftXrUimm<sra, "XVSRAI">;
+
+// XVCLZ_{B/H/W/D}
+defm : PatXr<ctlz, "XVCLZ">;
+
+// XVPCNT_{B/H/W/D}
+defm : PatXr<ctpop, "XVPCNT">;
+
+// XVBITCLR_{B/H/W/D}
+def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))),
+ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>;
+def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))),
+ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>;
+def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))),
+ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>;
+def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))),
+ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>;
+def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1,
+ (vsplati8imm7 v32i8:$xk)))),
+ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>;
+def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1,
+ (vsplati16imm15 v16i16:$xk)))),
+ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>;
+def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1,
+ (vsplati32imm31 v8i32:$xk)))),
+ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>;
+def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1,
+ (vsplati64imm63 v4i64:$xk)))),
+ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>;
+
+// XVBITCLRI_{B/H/W/D}
+def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_inv_pow2 uimm3:$imm))),
+ (XVBITCLRI_B LASX256:$xj, uimm3:$imm)>;
+def : Pat<(and (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_inv_pow2 uimm4:$imm))),
+ (XVBITCLRI_H LASX256:$xj, uimm4:$imm)>;
+def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_inv_pow2 uimm5:$imm))),
+ (XVBITCLRI_W LASX256:$xj, uimm5:$imm)>;
+def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_inv_pow2 uimm6:$imm))),
+ (XVBITCLRI_D LASX256:$xj, uimm6:$imm)>;
+
+// XVBITSET_{B/H/W/D}
+def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)),
+ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>;
+def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)),
+ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>;
+def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)),
+ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>;
+def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)),
+ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>;
+def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))),
+ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>;
+def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))),
+ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>;
+def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))),
+ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>;
+def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))),
+ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>;
+
+// XVBITSETI_{B/H/W/D}
+def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))),
+ (XVBITSETI_B LASX256:$xj, uimm3:$imm)>;
+def : Pat<(or (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))),
+ (XVBITSETI_H LASX256:$xj, uimm4:$imm)>;
+def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))),
+ (XVBITSETI_W LASX256:$xj, uimm5:$imm)>;
+def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))),
+ (XVBITSETI_D LASX256:$xj, uimm6:$imm)>;
+
+// XVBITREV_{B/H/W/D}
+def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)),
+ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>;
+def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)),
+ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>;
+def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)),
+ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>;
+def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)),
+ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>;
+def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))),
+ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>;
+def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))),
+ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>;
+def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))),
+ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>;
+def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))),
+ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>;
+
+// XVBITREVI_{B/H/W/D}
+def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))),
+ (XVBITREVI_B LASX256:$xj, uimm3:$imm)>;
+def : Pat<(xor (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))),
+ (XVBITREVI_H LASX256:$xj, uimm4:$imm)>;
+def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))),
+ (XVBITREVI_W LASX256:$xj, uimm5:$imm)>;
+def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))),
+ (XVBITREVI_D LASX256:$xj, uimm6:$imm)>;
+
+// XVFADD_{S/D}
+defm : PatXrXrF<fadd, "XVFADD">;
+
+// XVFSUB_{S/D}
+defm : PatXrXrF<fsub, "XVFSUB">;
+
+// XVFMUL_{S/D}
+defm : PatXrXrF<fmul, "XVFMUL">;
+
+// XVFDIV_{S/D}
+defm : PatXrXrF<fdiv, "XVFDIV">;
+
+// XVFMADD_{S/D}
+def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
+ (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
+def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa),
+ (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
+
+// XVFMSUB_{S/D}
+def : Pat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)),
+ (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
+def : Pat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)),
+ (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
+
+// XVFNMADD_{S/D}
+def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, v8f32:$xa)),
+ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
+def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, v4f64:$xa)),
+ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
+def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)),
+ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
+def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)),
+ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
+
+// XVFNMSUB_{S/D}
+def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa))),
+ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
+def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa))),
+ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
+def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa),
+ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
+def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa),
+ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
+
+// XVFSQRT_{S/D}
+defm : PatXrF<fsqrt, "XVFSQRT">;
+
+// XVRECIP_{S/D}
+def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj),
+ (XVFRECIP_S v8f32:$xj)>;
+def : Pat<(fdiv vsplatf64_fpimm_eq_1, v4f64:$xj),
+ (XVFRECIP_D v4f64:$xj)>;
+
+// XVFRSQRT_{S/D}
+def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v8f32:$xj)),
+ (XVFRSQRT_S v8f32:$xj)>;
+def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v4f64:$xj)),
+ (XVFRSQRT_D v4f64:$xj)>;
+
+// XVSEQ[I]_{B/H/W/D}
+defm : PatCCXrSimm5<SETEQ, "XVSEQI">;
+defm : PatCCXrXr<SETEQ, "XVSEQ">;
+
+// XVSLE[I]_{B/H/W/D}[U]
+defm : PatCCXrSimm5<SETLE, "XVSLEI">;
+defm : PatCCXrUimm5<SETULE, "XVSLEI">;
+defm : PatCCXrXr<SETLE, "XVSLE">;
+defm : PatCCXrXrU<SETULE, "XVSLE">;
+
+// XVSLT[I]_{B/H/W/D}[U]
+defm : PatCCXrSimm5<SETLT, "XVSLTI">;
+defm : PatCCXrUimm5<SETULT, "XVSLTI">;
+defm : PatCCXrXr<SETLT, "XVSLT">;
+defm : PatCCXrXrU<SETULT, "XVSLT">;
+
+// XVFCMP.cond.{S/D}
+defm : PatCCXrXrF<SETEQ, "XVFCMP_CEQ">;
+defm : PatCCXrXrF<SETOEQ, "XVFCMP_CEQ">;
+defm : PatCCXrXrF<SETUEQ, "XVFCMP_CUEQ">;
+
+defm : PatCCXrXrF<SETLE, "XVFCMP_CLE">;
+defm : PatCCXrXrF<SETOLE, "XVFCMP_CLE">;
+defm : PatCCXrXrF<SETULE, "XVFCMP_CULE">;
+
+defm : PatCCXrXrF<SETLT, "XVFCMP_CLT">;
+defm : PatCCXrXrF<SETOLT, "XVFCMP_CLT">;
+defm : PatCCXrXrF<SETULT, "XVFCMP_CULT">;
+
+defm : PatCCXrXrF<SETNE, "XVFCMP_CNE">;
+defm : PatCCXrXrF<SETONE, "XVFCMP_CNE">;
+defm : PatCCXrXrF<SETUNE, "XVFCMP_CUNE">;
+
+defm : PatCCXrXrF<SETO, "XVFCMP_COR">;
+defm : PatCCXrXrF<SETUO, "XVFCMP_CUN">;
+
+// PseudoXVINSGR2VR_{B/H}
+def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
+ (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;
+def : Pat<(vector_insert v16i16:$xd, GRLenVT:$rj, uimm4:$imm),
+ (PseudoXVINSGR2VR_H v16i16:$xd, GRLenVT:$rj, uimm4:$imm)>;
+
+// XVINSGR2VR_{W/D}
+def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm),
+ (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>;
+def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm),
+ (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>;
+
+def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm),
+ (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>;
+def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm),
+ (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;
+
+// XVPICKVE2GR_W[U]
+def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32),
+ (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>;
+def : Pat<(loongarch_vpick_zext_elt v8i32:$xd, uimm3:$imm, i32),
+ (XVPICKVE2GR_WU v8i32:$xd, uimm3:$imm)>;
+
+// XVREPLGR2VR_{B/H/W/D}
+def : Pat<(lasxsplati8 GPR:$rj), (XVREPLGR2VR_B GPR:$rj)>;
+def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>;
+def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>;
+def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>;
+
+// XVREPLVE_{B/H/W/D}
+def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk),
+ (XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>;
+def : Pat<(loongarch_vreplve v16i16:$xj, GRLenVT:$rk),
+ (XVREPLVE_H v16i16:$xj, GRLenVT:$rk)>;
+def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk),
+ (XVREPLVE_W v8i32:$xj, GRLenVT:$rk)>;
+def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk),
+ (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>;
+
+// XVREPL128VEI_{W/D}
+def : Pat<(lasxsplatf32 FPR32:$fj),
+ (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
+def : Pat<(lasxsplatf64 FPR64:$fj),
+ (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;
+
+// Loads/Stores
+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
+ defm : LdPat<load, XVLD, vt>;
+ def : RegRegLdPat<load, XVLDX, vt>;
+ defm : StPat<store, XVST, LASX256, vt>;
+ def : RegRegStPat<store, XVSTX, LASX256, vt>;
+}
+
+// Vector extraction with constant index.
+def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
+ (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
+def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),
+ (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>;
+def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)),
+ (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>;
+def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)),
+ (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>;
+def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)),
+ (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>;
+def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)),
+ (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>;
+
+// Vector extraction with variable index.
+def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)),
+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj,
+ i64:$rk),
+ sub_32)),
+ GPR), (i64 24))>;
+def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)),
+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj,
+ i64:$rk),
+ sub_32)),
+ GPR), (i64 16))>;
+def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)),
+ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk),
+ sub_32)),
+ GPR)>;
+def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)),
+ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk),
+ sub_64)),
+ GPR)>;
+def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)),
+ (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>;
+def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)),
+ (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>;
+
+// vselect
+def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd,
+ (v32i8 (SplatPat_uimm8 uimm8:$imm)))),
+ (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>;
+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in
+ def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)),
+ (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>;
+
+// fneg
+def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>;
+def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>;
+
+} // Predicates = [HasExtLASX]
+
+/// Intrinsic pattern
+
+class deriveLASXIntrinsic<string Inst> {
+ Intrinsic ret = !cast<Intrinsic>(!tolower("int_loongarch_lasx_"#Inst));
+}
+
+let Predicates = [HasExtLASX] in {
+
+// vty: v32i8/v16i16/v8i32/v4i64
+// Pat<(Intrinsic vty:$xj, vty:$xk),
+// (LAInst vty:$xj, vty:$xk)>;
+foreach Inst = ["XVSADD_B", "XVSADD_BU", "XVSSUB_B", "XVSSUB_BU",
+ "XVHADDW_H_B", "XVHADDW_HU_BU", "XVHSUBW_H_B", "XVHSUBW_HU_BU",
+ "XVADDWEV_H_B", "XVADDWOD_H_B", "XVSUBWEV_H_B", "XVSUBWOD_H_B",
+ "XVADDWEV_H_BU", "XVADDWOD_H_BU", "XVSUBWEV_H_BU", "XVSUBWOD_H_BU",
+ "XVADDWEV_H_BU_B", "XVADDWOD_H_BU_B",
+ "XVAVG_B", "XVAVG_BU", "XVAVGR_B", "XVAVGR_BU",
+ "XVABSD_B", "XVABSD_BU", "XVADDA_B", "XVMUH_B", "XVMUH_BU",
+ "XVMULWEV_H_B", "XVMULWOD_H_B", "XVMULWEV_H_BU", "XVMULWOD_H_BU",
+ "XVMULWEV_H_BU_B", "XVMULWOD_H_BU_B", "XVSIGNCOV_B",
+ "XVANDN_V", "XVORN_V", "XVROTR_B", "XVSRLR_B", "XVSRAR_B",
+ "XVSEQ_B", "XVSLE_B", "XVSLE_BU", "XVSLT_B", "XVSLT_BU",
+ "XVPACKEV_B", "XVPACKOD_B", "XVPICKEV_B", "XVPICKOD_B",
+ "XVILVL_B", "XVILVH_B"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xj, LASX256:$xk)>;
+foreach Inst = ["XVSADD_H", "XVSADD_HU", "XVSSUB_H", "XVSSUB_HU",
+ "XVHADDW_W_H", "XVHADDW_WU_HU", "XVHSUBW_W_H", "XVHSUBW_WU_HU",
+ "XVADDWEV_W_H", "XVADDWOD_W_H", "XVSUBWEV_W_H", "XVSUBWOD_W_H",
+ "XVADDWEV_W_HU", "XVADDWOD_W_HU", "XVSUBWEV_W_HU", "XVSUBWOD_W_HU",
+ "XVADDWEV_W_HU_H", "XVADDWOD_W_HU_H",
+ "XVAVG_H", "XVAVG_HU", "XVAVGR_H", "XVAVGR_HU",
+ "XVABSD_H", "XVABSD_HU", "XVADDA_H", "XVMUH_H", "XVMUH_HU",
+ "XVMULWEV_W_H", "XVMULWOD_W_H", "XVMULWEV_W_HU", "XVMULWOD_W_HU",
+ "XVMULWEV_W_HU_H", "XVMULWOD_W_HU_H", "XVSIGNCOV_H", "XVROTR_H",
+ "XVSRLR_H", "XVSRAR_H", "XVSRLN_B_H", "XVSRAN_B_H", "XVSRLRN_B_H",
+ "XVSRARN_B_H", "XVSSRLN_B_H", "XVSSRAN_B_H", "XVSSRLN_BU_H",
+ "XVSSRAN_BU_H", "XVSSRLRN_B_H", "XVSSRARN_B_H", "XVSSRLRN_BU_H",
+ "XVSSRARN_BU_H",
+ "XVSEQ_H", "XVSLE_H", "XVSLE_HU", "XVSLT_H", "XVSLT_HU",
+ "XVPACKEV_H", "XVPACKOD_H", "XVPICKEV_H", "XVPICKOD_H",
+ "XVILVL_H", "XVILVH_H"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xj, LASX256:$xk)>;
+foreach Inst = ["XVSADD_W", "XVSADD_WU", "XVSSUB_W", "XVSSUB_WU",
+ "XVHADDW_D_W", "XVHADDW_DU_WU", "XVHSUBW_D_W", "XVHSUBW_DU_WU",
+ "XVADDWEV_D_W", "XVADDWOD_D_W", "XVSUBWEV_D_W", "XVSUBWOD_D_W",
+ "XVADDWEV_D_WU", "XVADDWOD_D_WU", "XVSUBWEV_D_WU", "XVSUBWOD_D_WU",
+ "XVADDWEV_D_WU_W", "XVADDWOD_D_WU_W",
+ "XVAVG_W", "XVAVG_WU", "XVAVGR_W", "XVAVGR_WU",
+ "XVABSD_W", "XVABSD_WU", "XVADDA_W", "XVMUH_W", "XVMUH_WU",
+ "XVMULWEV_D_W", "XVMULWOD_D_W", "XVMULWEV_D_WU", "XVMULWOD_D_WU",
+ "XVMULWEV_D_WU_W", "XVMULWOD_D_WU_W", "XVSIGNCOV_W", "XVROTR_W",
+ "XVSRLR_W", "XVSRAR_W", "XVSRLN_H_W", "XVSRAN_H_W", "XVSRLRN_H_W",
+ "XVSRARN_H_W", "XVSSRLN_H_W", "XVSSRAN_H_W", "XVSSRLN_HU_W",
+ "XVSSRAN_HU_W", "XVSSRLRN_H_W", "XVSSRARN_H_W", "XVSSRLRN_HU_W",
+ "XVSSRARN_HU_W",
+ "XVSEQ_W", "XVSLE_W", "XVSLE_WU", "XVSLT_W", "XVSLT_WU",
+ "XVPACKEV_W", "XVPACKOD_W", "XVPICKEV_W", "XVPICKOD_W",
+ "XVILVL_W", "XVILVH_W", "XVPERM_W"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xj, LASX256:$xk)>;
+foreach Inst = ["XVADD_Q", "XVSUB_Q",
+ "XVSADD_D", "XVSADD_DU", "XVSSUB_D", "XVSSUB_DU",
+ "XVHADDW_Q_D", "XVHADDW_QU_DU", "XVHSUBW_Q_D", "XVHSUBW_QU_DU",
+ "XVADDWEV_Q_D", "XVADDWOD_Q_D", "XVSUBWEV_Q_D", "XVSUBWOD_Q_D",
+ "XVADDWEV_Q_DU", "XVADDWOD_Q_DU", "XVSUBWEV_Q_DU", "XVSUBWOD_Q_DU",
+ "XVADDWEV_Q_DU_D", "XVADDWOD_Q_DU_D",
+ "XVAVG_D", "XVAVG_DU", "XVAVGR_D", "XVAVGR_DU",
+ "XVABSD_D", "XVABSD_DU", "XVADDA_D", "XVMUH_D", "XVMUH_DU",
+ "XVMULWEV_Q_D", "XVMULWOD_Q_D", "XVMULWEV_Q_DU", "XVMULWOD_Q_DU",
+ "XVMULWEV_Q_DU_D", "XVMULWOD_Q_DU_D", "XVSIGNCOV_D", "XVROTR_D",
+ "XVSRLR_D", "XVSRAR_D", "XVSRLN_W_D", "XVSRAN_W_D", "XVSRLRN_W_D",
+ "XVSRARN_W_D", "XVSSRLN_W_D", "XVSSRAN_W_D", "XVSSRLN_WU_D",
+ "XVSSRAN_WU_D", "XVSSRLRN_W_D", "XVSSRARN_W_D", "XVSSRLRN_WU_D",
+ "XVSSRARN_WU_D", "XVFFINT_S_L",
+ "XVSEQ_D", "XVSLE_D", "XVSLE_DU", "XVSLT_D", "XVSLT_DU",
+ "XVPACKEV_D", "XVPACKOD_D", "XVPICKEV_D", "XVPICKOD_D",
+ "XVILVL_D", "XVILVH_D"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xj, LASX256:$xk)>;
+
+// vty: v32i8/v16i16/v8i32/v4i64
+// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk),
+// (LAInst vty:$xd, vty:$xj, vty:$xk)>;
+foreach Inst = ["XVMADDWEV_H_B", "XVMADDWOD_H_B", "XVMADDWEV_H_BU",
+ "XVMADDWOD_H_BU", "XVMADDWEV_H_BU_B", "XVMADDWOD_H_BU_B"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v16i16 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+foreach Inst = ["XVMADDWEV_W_H", "XVMADDWOD_W_H", "XVMADDWEV_W_HU",
+ "XVMADDWOD_W_HU", "XVMADDWEV_W_HU_H", "XVMADDWOD_W_HU_H"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v8i32 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+foreach Inst = ["XVMADDWEV_D_W", "XVMADDWOD_D_W", "XVMADDWEV_D_WU",
+ "XVMADDWOD_D_WU", "XVMADDWEV_D_WU_W", "XVMADDWOD_D_WU_W"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v4i64 LASX256:$xd), (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU",
+ "XVMADDWOD_Q_DU", "XVMADDWEV_Q_DU_D", "XVMADDWOD_Q_DU_D"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+
+// vty: v32i8/v16i16/v8i32/v4i64
+// Pat<(Intrinsic vty:$xj),
+// (LAInst vty:$xj)>;
+foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU",
+ "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B",
+ "XVCLO_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU",
+ "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B",
+ "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v32i8 LASX256:$xj)),
+ (!cast<LAInst>(Inst) LASX256:$xj)>;
+foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H",
+ "XVCLO_H", "XVFCVTL_S_H", "XVFCVTH_S_H",
+ "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H",
+ "VEXT2XV_DU_HU", "XVREPLVE0_H"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v16i16 LASX256:$xj)),
+ (!cast<LAInst>(Inst) LASX256:$xj)>;
+foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W",
+ "XVCLO_W", "XVFFINT_S_W", "XVFFINT_S_WU",
+ "XVFFINTL_D_W", "XVFFINTH_D_W",
+ "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v8i32 LASX256:$xj)),
+ (!cast<LAInst>(Inst) LASX256:$xj)>;
+foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D",
+ "XVEXTL_Q_D", "XVEXTL_QU_DU",
+ "XVCLO_D", "XVFFINT_D_L", "XVFFINT_D_LU",
+ "XVREPLVE0_D"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v4i64 LASX256:$xj)),
+ (!cast<LAInst>(Inst) LASX256:$xj)>;
+
+// Pat<(Intrinsic timm:$imm)
+// (LAInst timm:$imm)>;
+def : Pat<(int_loongarch_lasx_xvldi timm:$imm),
+ (XVLDI (to_valid_timm timm:$imm))>;
+foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret timm:$imm),
+ (!cast<LAInst>("Pseudo"#Inst) (to_valid_timm timm:$imm))>;
+
+// vty: v32i8/v16i16/v8i32/v4i64
+// Pat<(Intrinsic vty:$xj, timm:$imm)
+// (LAInst vty:$xj, timm:$imm)>;
+foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B",
+ "XVSLLWIL_HU_BU", "XVSRLRI_B", "XVSRARI_B",
+ "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU",
+ "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v32i8 LASX256:$xj), timm:$imm),
+ (!cast<LAInst>(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>;
+foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H",
+ "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H",
+ "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU",
+ "XVREPL128VEI_H", "XVSHUF4I_H"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v16i16 LASX256:$xj), timm:$imm),
+ (!cast<LAInst>(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>;
+foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W",
+ "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W",
+ "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU",
+ "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v8i32 LASX256:$xj), timm:$imm),
+ (!cast<LAInst>(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>;
+foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D",
+ "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU",
+ "XVPICKVE2GR_D", "XVPICKVE2GR_DU",
+ "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v4i64 LASX256:$xj), timm:$imm),
+ (!cast<LAInst>(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>;
+
+// vty: v32i8/v16i16/v8i32/v4i64
+// Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm)
+// (LAInst vty:$xd, vty:$xj, timm:$imm)>;
+foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H",
+ "XVSSRLNI_B_H", "XVSSRANI_B_H", "XVSSRLNI_BU_H", "XVSSRANI_BU_H",
+ "XVSSRLRNI_B_H", "XVSSRARNI_B_H", "XVSSRLRNI_BU_H", "XVSSRARNI_BU_H",
+ "XVFRSTPI_B", "XVBITSELI_B", "XVEXTRINS_B", "XVPERMI_Q"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj,
+ (to_valid_timm timm:$imm))>;
+foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W",
+ "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W",
+ "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W",
+ "XVFRSTPI_H", "XVEXTRINS_H"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj,
+ (to_valid_timm timm:$imm))>;
+foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D",
+ "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D",
+ "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D",
+ "XVPERMI_W", "XVEXTRINS_W", "XVINSVE0_W"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj,
+ (to_valid_timm timm:$imm))>;
+foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q",
+ "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q",
+ "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q",
+ "XVSHUF4I_D", "XVEXTRINS_D", "XVINSVE0_D"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj,
+ (to_valid_timm timm:$imm))>;
+
+// vty: v32i8/v16i16/v8i32/v4i64
+// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk),
+// (LAInst vty:$xd, vty:$xj, vty:$xk)>;
+foreach Inst = ["XVFRSTP_B", "XVBITSEL_V", "XVSHUF_B"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+foreach Inst = ["XVFRSTP_H", "XVSHUF_H"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+def : Pat<(int_loongarch_lasx_xvshuf_w (v8i32 LASX256:$xd), (v8i32 LASX256:$xj),
+ (v8i32 LASX256:$xk)),
+ (XVSHUF_W LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+def : Pat<(int_loongarch_lasx_xvshuf_d (v4i64 LASX256:$xd), (v4i64 LASX256:$xj),
+ (v4i64 LASX256:$xk)),
+ (XVSHUF_D LASX256:$xd, LASX256:$xj, LASX256:$xk)>;
+
+// vty: v8f32/v4f64
+// Pat<(Intrinsic vty:$xj, vty:$xk, vty:$xa),
+// (LAInst vty:$xj, vty:$xk, vty:$xa)>;
+foreach Inst = ["XVFMSUB_S", "XVFNMADD_S", "XVFNMSUB_S"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), (v8f32 LASX256:$xa)),
+ (!cast<LAInst>(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>;
+foreach Inst = ["XVFMSUB_D", "XVFNMADD_D", "XVFNMSUB_D"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), (v4f64 LASX256:$xa)),
+ (!cast<LAInst>(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>;
+
+// vty: v8f32/v4f64
+// Pat<(Intrinsic vty:$xj, vty:$xk),
+// (LAInst vty:$xj, vty:$xk)>;
+foreach Inst = ["XVFMAX_S", "XVFMIN_S", "XVFMAXA_S", "XVFMINA_S", "XVFCVT_H_S",
+ "XVFCMP_CAF_S", "XVFCMP_CUN_S", "XVFCMP_CEQ_S", "XVFCMP_CUEQ_S",
+ "XVFCMP_CLT_S", "XVFCMP_CULT_S", "XVFCMP_CLE_S", "XVFCMP_CULE_S",
+ "XVFCMP_CNE_S", "XVFCMP_COR_S", "XVFCMP_CUNE_S",
+ "XVFCMP_SAF_S", "XVFCMP_SUN_S", "XVFCMP_SEQ_S", "XVFCMP_SUEQ_S",
+ "XVFCMP_SLT_S", "XVFCMP_SULT_S", "XVFCMP_SLE_S", "XVFCMP_SULE_S",
+ "XVFCMP_SNE_S", "XVFCMP_SOR_S", "XVFCMP_SUNE_S"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xj, LASX256:$xk)>;
+foreach Inst = ["XVFMAX_D", "XVFMIN_D", "XVFMAXA_D", "XVFMINA_D", "XVFCVT_S_D",
+ "XVFTINTRNE_W_D", "XVFTINTRZ_W_D", "XVFTINTRP_W_D", "XVFTINTRM_W_D",
+ "XVFTINT_W_D",
+ "XVFCMP_CAF_D", "XVFCMP_CUN_D", "XVFCMP_CEQ_D", "XVFCMP_CUEQ_D",
+ "XVFCMP_CLT_D", "XVFCMP_CULT_D", "XVFCMP_CLE_D", "XVFCMP_CULE_D",
+ "XVFCMP_CNE_D", "XVFCMP_COR_D", "XVFCMP_CUNE_D",
+ "XVFCMP_SAF_D", "XVFCMP_SUN_D", "XVFCMP_SEQ_D", "XVFCMP_SUEQ_D",
+ "XVFCMP_SLT_D", "XVFCMP_SULT_D", "XVFCMP_SLE_D", "XVFCMP_SULE_D",
+ "XVFCMP_SNE_D", "XVFCMP_SOR_D", "XVFCMP_SUNE_D"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret
+ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)),
+ (!cast<LAInst>(Inst) LASX256:$xj, LASX256:$xk)>;
+
+// vty: v8f32/v4f64
+// Pat<(Intrinsic vty:$xj),
+// (LAInst vty:$xj)>;
+foreach Inst = ["XVFLOGB_S", "XVFCLASS_S", "XVFSQRT_S", "XVFRECIP_S", "XVFRSQRT_S",
+ "XVFRINT_S", "XVFCVTL_D_S", "XVFCVTH_D_S",
+ "XVFRINTRNE_S", "XVFRINTRZ_S", "XVFRINTRP_S", "XVFRINTRM_S",
+ "XVFTINTRNE_W_S", "XVFTINTRZ_W_S", "XVFTINTRP_W_S", "XVFTINTRM_W_S",
+ "XVFTINT_W_S", "XVFTINTRZ_WU_S", "XVFTINT_WU_S",
+ "XVFTINTRNEL_L_S", "XVFTINTRNEH_L_S", "XVFTINTRZL_L_S",
+ "XVFTINTRZH_L_S", "XVFTINTRPL_L_S", "XVFTINTRPH_L_S",
+ "XVFTINTRML_L_S", "XVFTINTRMH_L_S", "XVFTINTL_L_S",
+ "XVFTINTH_L_S"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v8f32 LASX256:$xj)),
+ (!cast<LAInst>(Inst) LASX256:$xj)>;
+foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_D",
+ "XVFRINT_D",
+ "XVFRINTRNE_D", "XVFRINTRZ_D", "XVFRINTRP_D", "XVFRINTRM_D",
+ "XVFTINTRNE_L_D", "XVFTINTRZ_L_D", "XVFTINTRP_L_D", "XVFTINTRM_L_D",
+ "XVFTINT_L_D", "XVFTINTRZ_LU_D", "XVFTINT_LU_D"] in
+ def : Pat<(deriveLASXIntrinsic<Inst>.ret (v4f64 LASX256:$xj)),
+ (!cast<LAInst>(Inst) LASX256:$xj)>;
+
+def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
+ (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm),
+ (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>;
+
+// load
+def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm),
+ (XVLD GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk),
+ (XVLDX GPR:$rj, GPR:$rk)>;
+
+def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm),
+ (XVLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm),
+ (XVLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm),
+ (XVLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm),
+ (XVLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>;
+
+// store
+def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm),
+ (XVST LASX256:$xd, GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk),
+ (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>;
+
+def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx),
+ (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valid_timm timm:$imm),
+ (to_valid_timm timm:$idx))>;
+def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx),
+ (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valid_timm timm:$imm),
+ (to_valid_timm timm:$idx))>;
+def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx),
+ (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valid_timm timm:$imm),
+ (to_valid_timm timm:$idx))>;
+def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx),
+ (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valid_timm timm:$imm),
+ (to_valid_timm timm:$idx))>;
+
} // Predicates = [HasExtLASX]
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index a8ed285a37cf..e468176885d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -10,6 +10,173 @@
//
//===----------------------------------------------------------------------===//
+def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisInt<1>, SDTCisVec<1>,
+ SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
+
+// Target nodes.
+def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
+def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO",
+ SDT_LoongArchVecCond>;
+def loongarch_vany_nonzero : SDNode<"LoongArchISD::VANY_NONZERO",
+ SDT_LoongArchVecCond>;
+def loongarch_vall_zero : SDNode<"LoongArchISD::VALL_ZERO",
+ SDT_LoongArchVecCond>;
+def loongarch_vany_zero : SDNode<"LoongArchISD::VANY_ZERO",
+ SDT_LoongArchVecCond>;
+
+def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT",
+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>;
+def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT",
+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>;
+
+class VecCond<SDPatternOperator OpNode, ValueType TyNode,
+ RegisterClass RC = LSX128>
+ : Pseudo<(outs GPR:$rd), (ins RC:$vj),
+ [(set GPR:$rd, (OpNode (TyNode RC:$vj)))]> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let usesCustomInserter = 1;
+}
+
+def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector),
+ (bitconvert (v4i32 (build_vector)))], [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
+}]>;
+
+def vsplati8_imm_eq_7 : PatFrags<(ops), [(build_vector)], [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 7;
+}]>;
+def vsplati16_imm_eq_15 : PatFrags<(ops), [(build_vector)], [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 15;
+}]>;
+def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31;
+}]>;
+def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector),
+ (bitconvert (v4i32 (build_vector)))], [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63;
+}]>;
+
+def vsplatf32_fpimm_eq_1
+ : PatFrags<(ops), [(bitconvert (v4i32 (build_vector))),
+ (bitconvert (v8i32 (build_vector)))], [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+ N = N->getOperand(0).getNode();
+
+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() &&
+ Imm == APFloat(+1.0f).bitcastToAPInt();
+}]>;
+def vsplatf64_fpimm_eq_1
+ : PatFrags<(ops), [(bitconvert (v2i64 (build_vector))),
+ (bitconvert (v4i64 (build_vector)))], [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+ N = N->getOperand(0).getNode();
+
+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() &&
+ Imm == APFloat(+1.0).bitcastToAPInt();
+}]>;
+
+def vsplati8imm7 : PatFrag<(ops node:$reg),
+ (and node:$reg, vsplati8_imm_eq_7)>;
+def vsplati16imm15 : PatFrag<(ops node:$reg),
+ (and node:$reg, vsplati16_imm_eq_15)>;
+def vsplati32imm31 : PatFrag<(ops node:$reg),
+ (and node:$reg, vsplati32_imm_eq_31)>;
+def vsplati64imm63 : PatFrag<(ops node:$reg),
+ (and node:$reg, vsplati64_imm_eq_63)>;
+
+foreach N = [3, 4, 5, 6, 8] in
+ def SplatPat_uimm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#">",
+ [build_vector, bitconvert], [], 2>;
+
+foreach N = [5] in
+ def SplatPat_simm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#", true>",
+ [build_vector, bitconvert]>;
+
+def vsplat_uimm_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2",
+ [build_vector, bitconvert]>;
+
+def vsplat_uimm_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmPow2",
+ [build_vector, bitconvert]>;
+
+def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk),
+ (add node:$vd, (mul node:$vj, node:$vk))>;
+
+def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk),
+ (sub node:$vd, (mul node:$vj, node:$vk))>;
+
+def lsxsplati8 : PatFrag<(ops node:$e0),
+ (v16i8 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def lsxsplati16 : PatFrag<(ops node:$e0),
+ (v8i16 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def lsxsplati32 : PatFrag<(ops node:$e0),
+ (v4i32 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def lsxsplati64 : PatFrag<(ops node:$e0),
+ (v2i64 (build_vector node:$e0, node:$e0))>;
+def lsxsplatf32 : PatFrag<(ops node:$e0),
+ (v4f32 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def lsxsplatf64 : PatFrag<(ops node:$e0),
+ (v2f64 (build_vector node:$e0, node:$e0))>;
+
+def to_valid_timm : SDNodeXForm<timm, [{
+ auto CN = cast<ConstantSDNode>(N);
+ return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT());
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -1004,4 +1171,894 @@ def PseudoVREPLI_D : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [],
"vrepli.d", "$vd, $imm">;
}
+def PseudoVBNZ_B : VecCond<loongarch_vall_nonzero, v16i8>;
+def PseudoVBNZ_H : VecCond<loongarch_vall_nonzero, v8i16>;
+def PseudoVBNZ_W : VecCond<loongarch_vall_nonzero, v4i32>;
+def PseudoVBNZ_D : VecCond<loongarch_vall_nonzero, v2i64>;
+def PseudoVBNZ : VecCond<loongarch_vany_nonzero, v16i8>;
+
+def PseudoVBZ_B : VecCond<loongarch_vall_zero, v16i8>;
+def PseudoVBZ_H : VecCond<loongarch_vall_zero, v8i16>;
+def PseudoVBZ_W : VecCond<loongarch_vall_zero, v4i32>;
+def PseudoVBZ_D : VecCond<loongarch_vall_zero, v2i64>;
+def PseudoVBZ : VecCond<loongarch_vany_zero, v16i8>;
+
+} // Predicates = [HasExtLSX]
+
+multiclass PatVr<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(v16i8 (OpNode (v16i8 LSX128:$vj))),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vj)>;
+ def : Pat<(v8i16 (OpNode (v8i16 LSX128:$vj))),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vj)>;
+ def : Pat<(v4i32 (OpNode (v4i32 LSX128:$vj))),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vj)>;
+ def : Pat<(v2i64 (OpNode (v2i64 LSX128:$vj))),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj)>;
+}
+
+multiclass PatVrF<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(v4f32 (OpNode (v4f32 LSX128:$vj))),
+ (!cast<LAInst>(Inst#"_S") LSX128:$vj)>;
+ def : Pat<(v2f64 (OpNode (v2f64 LSX128:$vj))),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj)>;
+}
+
+multiclass PatVrVr<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass PatVrVrF<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_S") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass PatVrVrU<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_BU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_HU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_WU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_DU") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass PatVrSimm5<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_simm5 simm5:$imm))),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vj, simm5:$imm)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_simm5 simm5:$imm))),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vj, simm5:$imm)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_simm5 simm5:$imm))),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vj, simm5:$imm)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_simm5 simm5:$imm))),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, simm5:$imm)>;
+}
+
+multiclass PatVrUimm5<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_BU") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_HU") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_WU") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_DU") LSX128:$vj, uimm5:$imm)>;
+}
+
+multiclass PatVrVrVr<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass PatShiftVrVr<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (and vsplati8_imm_eq_7,
+ (v16i8 LSX128:$vk))),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (and vsplati16_imm_eq_15,
+ (v8i16 LSX128:$vk))),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (and vsplati32_imm_eq_31,
+ (v4i32 LSX128:$vk))),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (and vsplati64_imm_eq_63,
+ (v2i64 LSX128:$vk))),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass PatShiftVrUimm<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm3 uimm3:$imm))),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vj, uimm3:$imm)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm4 uimm4:$imm))),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vj, uimm4:$imm)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm6 uimm6:$imm))),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, uimm6:$imm)>;
+}
+
+multiclass PatCCVrSimm5<CondCode CC, string Inst> {
+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj),
+ (v16i8 (SplatPat_simm5 simm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vj, simm5:$imm)>;
+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj),
+ (v8i16 (SplatPat_simm5 simm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vj, simm5:$imm)>;
+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj),
+ (v4i32 (SplatPat_simm5 simm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vj, simm5:$imm)>;
+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj),
+ (v2i64 (SplatPat_simm5 simm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, simm5:$imm)>;
+}
+
+multiclass PatCCVrUimm5<CondCode CC, string Inst> {
+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj),
+ (v16i8 (SplatPat_uimm5 uimm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_BU") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj),
+ (v8i16 (SplatPat_uimm5 uimm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_HU") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj),
+ (v4i32 (SplatPat_uimm5 uimm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_WU") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj),
+ (v2i64 (SplatPat_uimm5 uimm5:$imm)), CC)),
+ (!cast<LAInst>(Inst#"_DU") LSX128:$vj, uimm5:$imm)>;
+}
+
+multiclass PatCCVrVr<CondCode CC, string Inst> {
+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass PatCCVrVrU<CondCode CC, string Inst> {
+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_BU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_HU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_WU") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_DU") LSX128:$vj, LSX128:$vk)>;
+}
+
+multiclass PatCCVrVrF<CondCode CC, string Inst> {
+ def : Pat<(v4i32 (setcc (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_S") LSX128:$vj, LSX128:$vk)>;
+ def : Pat<(v2i64 (setcc (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), CC)),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>;
+}
+
+let Predicates = [HasExtLSX] in {
+
+// VADD_{B/H/W/D}
+defm : PatVrVr<add, "VADD">;
+// VSUB_{B/H/W/D}
+defm : PatVrVr<sub, "VSUB">;
+
+// VADDI_{B/H/W/D}U
+defm : PatVrUimm5<add, "VADDI">;
+// VSUBI_{B/H/W/D}U
+defm : PatVrUimm5<sub, "VSUBI">;
+
+// VNEG_{B/H/W/D}
+def : Pat<(sub immAllZerosV, (v16i8 LSX128:$vj)), (VNEG_B LSX128:$vj)>;
+def : Pat<(sub immAllZerosV, (v8i16 LSX128:$vj)), (VNEG_H LSX128:$vj)>;
+def : Pat<(sub immAllZerosV, (v4i32 LSX128:$vj)), (VNEG_W LSX128:$vj)>;
+def : Pat<(sub immAllZerosV, (v2i64 LSX128:$vj)), (VNEG_D LSX128:$vj)>;
+
+// VMAX[I]_{B/H/W/D}[U]
+defm : PatVrVr<smax, "VMAX">;
+defm : PatVrVrU<umax, "VMAX">;
+defm : PatVrSimm5<smax, "VMAXI">;
+defm : PatVrUimm5<umax, "VMAXI">;
+
+// VMIN[I]_{B/H/W/D}[U]
+defm : PatVrVr<smin, "VMIN">;
+defm : PatVrVrU<umin, "VMIN">;
+defm : PatVrSimm5<smin, "VMINI">;
+defm : PatVrUimm5<umin, "VMINI">;
+
+// VMUL_{B/H/W/D}
+defm : PatVrVr<mul, "VMUL">;
+
+// VMUH_{B/H/W/D}[U]
+defm : PatVrVr<mulhs, "VMUH">;
+defm : PatVrVrU<mulhu, "VMUH">;
+
+// VMADD_{B/H/W/D}
+defm : PatVrVrVr<muladd, "VMADD">;
+// VMSUB_{B/H/W/D}
+defm : PatVrVrVr<mulsub, "VMSUB">;
+
+// VDIV_{B/H/W/D}[U]
+defm : PatVrVr<sdiv, "VDIV">;
+defm : PatVrVrU<udiv, "VDIV">;
+
+// VMOD_{B/H/W/D}[U]
+defm : PatVrVr<srem, "VMOD">;
+defm : PatVrVrU<urem, "VMOD">;
+
+// VAND_V
+foreach vt = [v16i8, v8i16, v4i32, v2i64] in
+def : Pat<(and (vt LSX128:$vj), (vt LSX128:$vk)),
+ (VAND_V LSX128:$vj, LSX128:$vk)>;
+// VOR_V
+foreach vt = [v16i8, v8i16, v4i32, v2i64] in
+def : Pat<(or (vt LSX128:$vj), (vt LSX128:$vk)),
+ (VOR_V LSX128:$vj, LSX128:$vk)>;
+// VXOR_V
+foreach vt = [v16i8, v8i16, v4i32, v2i64] in
+def : Pat<(xor (vt LSX128:$vj), (vt LSX128:$vk)),
+ (VXOR_V LSX128:$vj, LSX128:$vk)>;
+// VNOR_V
+foreach vt = [v16i8, v8i16, v4i32, v2i64] in
+def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))),
+ (VNOR_V LSX128:$vj, LSX128:$vk)>;
+
+// VANDI_B
+def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
+ (VANDI_B LSX128:$vj, uimm8:$imm)>;
+// VORI_B
+def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
+ (VORI_B LSX128:$vj, uimm8:$imm)>;
+
+// VXORI_B
+def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
+ (VXORI_B LSX128:$vj, uimm8:$imm)>;
+
+// VSLL[I]_{B/H/W/D}
+defm : PatVrVr<shl, "VSLL">;
+defm : PatShiftVrVr<shl, "VSLL">;
+defm : PatShiftVrUimm<shl, "VSLLI">;
+
+// VSRL[I]_{B/H/W/D}
+defm : PatVrVr<srl, "VSRL">;
+defm : PatShiftVrVr<srl, "VSRL">;
+defm : PatShiftVrUimm<srl, "VSRLI">;
+
+// VSRA[I]_{B/H/W/D}
+defm : PatVrVr<sra, "VSRA">;
+defm : PatShiftVrVr<sra, "VSRA">;
+defm : PatShiftVrUimm<sra, "VSRAI">;
+
+// VCLZ_{B/H/W/D}
+defm : PatVr<ctlz, "VCLZ">;
+
+// VPCNT_{B/H/W/D}
+defm : PatVr<ctpop, "VPCNT">;
+
+// VBITCLR_{B/H/W/D}
+def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))),
+ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
+def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))),
+ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
+def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))),
+ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
+def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))),
+ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
+def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1,
+ (vsplati8imm7 v16i8:$vk)))),
+ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
+def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1,
+ (vsplati16imm15 v8i16:$vk)))),
+ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
+def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1,
+ (vsplati32imm31 v4i32:$vk)))),
+ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
+def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1,
+ (vsplati64imm63 v2i64:$vk)))),
+ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
+
+// VBITCLRI_{B/H/W/D}
+def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_inv_pow2 uimm3:$imm))),
+ (VBITCLRI_B LSX128:$vj, uimm3:$imm)>;
+def : Pat<(and (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_inv_pow2 uimm4:$imm))),
+ (VBITCLRI_H LSX128:$vj, uimm4:$imm)>;
+def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_inv_pow2 uimm5:$imm))),
+ (VBITCLRI_W LSX128:$vj, uimm5:$imm)>;
+def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))),
+ (VBITCLRI_D LSX128:$vj, uimm6:$imm)>;
+
+// VBITSET_{B/H/W/D}
+def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)),
+ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>;
+def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)),
+ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>;
+def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)),
+ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>;
+def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)),
+ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>;
+def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))),
+ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>;
+def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))),
+ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>;
+def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))),
+ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>;
+def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))),
+ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>;
+
+// VBITSETI_{B/H/W/D}
+def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))),
+ (VBITSETI_B LSX128:$vj, uimm3:$imm)>;
+def : Pat<(or (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))),
+ (VBITSETI_H LSX128:$vj, uimm4:$imm)>;
+def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))),
+ (VBITSETI_W LSX128:$vj, uimm5:$imm)>;
+def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))),
+ (VBITSETI_D LSX128:$vj, uimm6:$imm)>;
+
+// VBITREV_{B/H/W/D}
+def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)),
+ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>;
+def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)),
+ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>;
+def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)),
+ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>;
+def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)),
+ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>;
+def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))),
+ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>;
+def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))),
+ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>;
+def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))),
+ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>;
+def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))),
+ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>;
+
+// VBITREVI_{B/H/W/D}
+def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))),
+ (VBITREVI_B LSX128:$vj, uimm3:$imm)>;
+def : Pat<(xor (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))),
+ (VBITREVI_H LSX128:$vj, uimm4:$imm)>;
+def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))),
+ (VBITREVI_W LSX128:$vj, uimm5:$imm)>;
+def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))),
+ (VBITREVI_D LSX128:$vj, uimm6:$imm)>;
+
+// VFADD_{S/D}
+defm : PatVrVrF<fadd, "VFADD">;
+
+// VFSUB_{S/D}
+defm : PatVrVrF<fsub, "VFSUB">;
+
+// VFMUL_{S/D}
+defm : PatVrVrF<fmul, "VFMUL">;
+
+// VFDIV_{S/D}
+defm : PatVrVrF<fdiv, "VFDIV">;
+
+// VFMADD_{S/D}
+def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),
+ (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
+def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va),
+ (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
+
+// VFMSUB_{S/D}
+def : Pat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)),
+ (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
+def : Pat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)),
+ (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
+
+// VFNMADD_{S/D}
+def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, v4f32:$va)),
+ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
+def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, v2f64:$va)),
+ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
+def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)),
+ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
+def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)),
+ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
+
+// VFNMSUB_{S/D}
+def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va))),
+ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
+def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va))),
+ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
+def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, v4f32:$va),
+ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
+def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va),
+ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
+
+// VFSQRT_{S/D}
+defm : PatVrF<fsqrt, "VFSQRT">;
+
+// VFRECIP_{S/D}
+def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj),
+ (VFRECIP_S v4f32:$vj)>;
+def : Pat<(fdiv vsplatf64_fpimm_eq_1, v2f64:$vj),
+ (VFRECIP_D v2f64:$vj)>;
+
+// VFRSQRT_{S/D}
+def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v4f32:$vj)),
+ (VFRSQRT_S v4f32:$vj)>;
+def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v2f64:$vj)),
+ (VFRSQRT_D v2f64:$vj)>;
+
+// VSEQ[I]_{B/H/W/D}
+defm : PatCCVrSimm5<SETEQ, "VSEQI">;
+defm : PatCCVrVr<SETEQ, "VSEQ">;
+
+// VSLE[I]_{B/H/W/D}[U]
+defm : PatCCVrSimm5<SETLE, "VSLEI">;
+defm : PatCCVrUimm5<SETULE, "VSLEI">;
+defm : PatCCVrVr<SETLE, "VSLE">;
+defm : PatCCVrVrU<SETULE, "VSLE">;
+
+// VSLT[I]_{B/H/W/D}[U]
+defm : PatCCVrSimm5<SETLT, "VSLTI">;
+defm : PatCCVrUimm5<SETULT, "VSLTI">;
+defm : PatCCVrVr<SETLT, "VSLT">;
+defm : PatCCVrVrU<SETULT, "VSLT">;
+
+// VFCMP.cond.{S/D}
+defm : PatCCVrVrF<SETEQ, "VFCMP_CEQ">;
+defm : PatCCVrVrF<SETOEQ, "VFCMP_CEQ">;
+defm : PatCCVrVrF<SETUEQ, "VFCMP_CUEQ">;
+
+defm : PatCCVrVrF<SETLE, "VFCMP_CLE">;
+defm : PatCCVrVrF<SETOLE, "VFCMP_CLE">;
+defm : PatCCVrVrF<SETULE, "VFCMP_CULE">;
+
+defm : PatCCVrVrF<SETLT, "VFCMP_CLT">;
+defm : PatCCVrVrF<SETOLT, "VFCMP_CLT">;
+defm : PatCCVrVrF<SETULT, "VFCMP_CULT">;
+
+defm : PatCCVrVrF<SETNE, "VFCMP_CNE">;
+defm : PatCCVrVrF<SETONE, "VFCMP_CNE">;
+defm : PatCCVrVrF<SETUNE, "VFCMP_CUNE">;
+
+defm : PatCCVrVrF<SETO, "VFCMP_COR">;
+defm : PatCCVrVrF<SETUO, "VFCMP_CUN">;
+
+// VINSGR2VR_{B/H/W/D}
+def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm),
+ (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>;
+def : Pat<(vector_insert v8i16:$vd, GRLenVT:$rj, uimm3:$imm),
+ (VINSGR2VR_H v8i16:$vd, GRLenVT:$rj, uimm3:$imm)>;
+def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm),
+ (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>;
+def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm),
+ (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>;
+
+def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm),
+ (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>;
+def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm),
+ (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>;
+
+// VPICKVE2GR_{B/H/W}[U]
+def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8),
+ (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>;
+def : Pat<(loongarch_vpick_sext_elt v8i16:$vd, uimm3:$imm, i16),
+ (VPICKVE2GR_H v8i16:$vd, uimm3:$imm)>;
+def : Pat<(loongarch_vpick_sext_elt v4i32:$vd, uimm2:$imm, i32),
+ (VPICKVE2GR_W v4i32:$vd, uimm2:$imm)>;
+
+def : Pat<(loongarch_vpick_zext_elt v16i8:$vd, uimm4:$imm, i8),
+ (VPICKVE2GR_BU v16i8:$vd, uimm4:$imm)>;
+def : Pat<(loongarch_vpick_zext_elt v8i16:$vd, uimm3:$imm, i16),
+ (VPICKVE2GR_HU v8i16:$vd, uimm3:$imm)>;
+def : Pat<(loongarch_vpick_zext_elt v4i32:$vd, uimm2:$imm, i32),
+ (VPICKVE2GR_WU v4i32:$vd, uimm2:$imm)>;
+
+// VREPLGR2VR_{B/H/W/D}
+def : Pat<(lsxsplati8 GPR:$rj), (VREPLGR2VR_B GPR:$rj)>;
+def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>;
+def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>;
+def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>;
+
+// VREPLVE_{B/H/W/D}
+def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk),
+ (VREPLVE_B v16i8:$vj, GRLenVT:$rk)>;
+def : Pat<(loongarch_vreplve v8i16:$vj, GRLenVT:$rk),
+ (VREPLVE_H v8i16:$vj, GRLenVT:$rk)>;
+def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk),
+ (VREPLVE_W v4i32:$vj, GRLenVT:$rk)>;
+def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk),
+ (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>;
+
+// VREPLVEI_{W/D}
+def : Pat<(lsxsplatf32 FPR32:$fj),
+ (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
+def : Pat<(lsxsplatf64 FPR64:$fj),
+ (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;
+
+// Loads/Stores
+foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+ defm : LdPat<load, VLD, vt>;
+ def : RegRegLdPat<load, VLDX, vt>;
+ defm : StPat<store, VST, LSX128, vt>;
+ def : RegRegStPat<store, VSTX, LSX128, vt>;
+}
+
+// Vector extraction with constant index.
+def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)),
+ (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>;
+def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)),
+ (VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>;
+def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)),
+ (VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>;
+def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)),
+ (VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>;
+def : Pat<(f32 (vector_extract v4f32:$vj, uimm2:$imm)),
+ (f32 (EXTRACT_SUBREG (VREPLVEI_W v4f32:$vj, uimm2:$imm), sub_32))>;
+def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)),
+ (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>;
+
+// Vector extraction with variable index.
+def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)),
+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj,
+ i64:$rk),
+ sub_32)),
+ GPR), (i64 24))>;
+def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)),
+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj,
+ i64:$rk),
+ sub_32)),
+ GPR), (i64 16))>;
+def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)),
+ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk),
+ sub_32)),
+ GPR)>;
+def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
+ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
+ sub_64)),
+ GPR)>;
+def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)),
+ (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>;
+def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
+ (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>;
+
+// vselect
+def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd,
+ (v16i8 (SplatPat_uimm8 uimm8:$imm)))),
+ (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>;
+foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
+ def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)),
+ (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>;
+
+// fneg
+def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>;
+def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>;
+
+} // Predicates = [HasExtLSX]
+
+/// Intrinsic pattern
+
+class deriveLSXIntrinsic<string Inst> {
+ Intrinsic ret = !cast<Intrinsic>(!tolower("int_loongarch_lsx_"#Inst));
+}
+
+let Predicates = [HasExtLSX] in {
+
+// vty: v16i8/v8i16/v4i32/v2i64
+// Pat<(Intrinsic vty:$vj, vty:$vk),
+// (LAInst vty:$vj, vty:$vk)>;
+foreach Inst = ["VSADD_B", "VSADD_BU", "VSSUB_B", "VSSUB_BU",
+ "VHADDW_H_B", "VHADDW_HU_BU", "VHSUBW_H_B", "VHSUBW_HU_BU",
+ "VADDWEV_H_B", "VADDWOD_H_B", "VSUBWEV_H_B", "VSUBWOD_H_B",
+ "VADDWEV_H_BU", "VADDWOD_H_BU", "VSUBWEV_H_BU", "VSUBWOD_H_BU",
+ "VADDWEV_H_BU_B", "VADDWOD_H_BU_B",
+ "VAVG_B", "VAVG_BU", "VAVGR_B", "VAVGR_BU",
+ "VABSD_B", "VABSD_BU", "VADDA_B", "VMUH_B", "VMUH_BU",
+ "VMULWEV_H_B", "VMULWOD_H_B", "VMULWEV_H_BU", "VMULWOD_H_BU",
+ "VMULWEV_H_BU_B", "VMULWOD_H_BU_B", "VSIGNCOV_B",
+ "VANDN_V", "VORN_V", "VROTR_B", "VSRLR_B", "VSRAR_B",
+ "VSEQ_B", "VSLE_B", "VSLE_BU", "VSLT_B", "VSLT_BU",
+ "VPACKEV_B", "VPACKOD_B", "VPICKEV_B", "VPICKOD_B",
+ "VILVL_B", "VILVH_B"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vj, LSX128:$vk)>;
+foreach Inst = ["VSADD_H", "VSADD_HU", "VSSUB_H", "VSSUB_HU",
+ "VHADDW_W_H", "VHADDW_WU_HU", "VHSUBW_W_H", "VHSUBW_WU_HU",
+ "VADDWEV_W_H", "VADDWOD_W_H", "VSUBWEV_W_H", "VSUBWOD_W_H",
+ "VADDWEV_W_HU", "VADDWOD_W_HU", "VSUBWEV_W_HU", "VSUBWOD_W_HU",
+ "VADDWEV_W_HU_H", "VADDWOD_W_HU_H",
+ "VAVG_H", "VAVG_HU", "VAVGR_H", "VAVGR_HU",
+ "VABSD_H", "VABSD_HU", "VADDA_H", "VMUH_H", "VMUH_HU",
+ "VMULWEV_W_H", "VMULWOD_W_H", "VMULWEV_W_HU", "VMULWOD_W_HU",
+ "VMULWEV_W_HU_H", "VMULWOD_W_HU_H", "VSIGNCOV_H", "VROTR_H",
+ "VSRLR_H", "VSRAR_H", "VSRLN_B_H", "VSRAN_B_H", "VSRLRN_B_H",
+ "VSRARN_B_H", "VSSRLN_B_H", "VSSRAN_B_H", "VSSRLN_BU_H",
+ "VSSRAN_BU_H", "VSSRLRN_B_H", "VSSRARN_B_H", "VSSRLRN_BU_H",
+ "VSSRARN_BU_H",
+ "VSEQ_H", "VSLE_H", "VSLE_HU", "VSLT_H", "VSLT_HU",
+ "VPACKEV_H", "VPACKOD_H", "VPICKEV_H", "VPICKOD_H",
+ "VILVL_H", "VILVH_H"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vj, LSX128:$vk)>;
+foreach Inst = ["VSADD_W", "VSADD_WU", "VSSUB_W", "VSSUB_WU",
+ "VHADDW_D_W", "VHADDW_DU_WU", "VHSUBW_D_W", "VHSUBW_DU_WU",
+ "VADDWEV_D_W", "VADDWOD_D_W", "VSUBWEV_D_W", "VSUBWOD_D_W",
+ "VADDWEV_D_WU", "VADDWOD_D_WU", "VSUBWEV_D_WU", "VSUBWOD_D_WU",
+ "VADDWEV_D_WU_W", "VADDWOD_D_WU_W",
+ "VAVG_W", "VAVG_WU", "VAVGR_W", "VAVGR_WU",
+ "VABSD_W", "VABSD_WU", "VADDA_W", "VMUH_W", "VMUH_WU",
+ "VMULWEV_D_W", "VMULWOD_D_W", "VMULWEV_D_WU", "VMULWOD_D_WU",
+ "VMULWEV_D_WU_W", "VMULWOD_D_WU_W", "VSIGNCOV_W", "VROTR_W",
+ "VSRLR_W", "VSRAR_W", "VSRLN_H_W", "VSRAN_H_W", "VSRLRN_H_W",
+ "VSRARN_H_W", "VSSRLN_H_W", "VSSRAN_H_W", "VSSRLN_HU_W",
+ "VSSRAN_HU_W", "VSSRLRN_H_W", "VSSRARN_H_W", "VSSRLRN_HU_W",
+ "VSSRARN_HU_W",
+ "VSEQ_W", "VSLE_W", "VSLE_WU", "VSLT_W", "VSLT_WU",
+ "VPACKEV_W", "VPACKOD_W", "VPICKEV_W", "VPICKOD_W",
+ "VILVL_W", "VILVH_W"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vj, LSX128:$vk)>;
+foreach Inst = ["VADD_Q", "VSUB_Q",
+ "VSADD_D", "VSADD_DU", "VSSUB_D", "VSSUB_DU",
+ "VHADDW_Q_D", "VHADDW_QU_DU", "VHSUBW_Q_D", "VHSUBW_QU_DU",
+ "VADDWEV_Q_D", "VADDWOD_Q_D", "VSUBWEV_Q_D", "VSUBWOD_Q_D",
+ "VADDWEV_Q_DU", "VADDWOD_Q_DU", "VSUBWEV_Q_DU", "VSUBWOD_Q_DU",
+ "VADDWEV_Q_DU_D", "VADDWOD_Q_DU_D",
+ "VAVG_D", "VAVG_DU", "VAVGR_D", "VAVGR_DU",
+ "VABSD_D", "VABSD_DU", "VADDA_D", "VMUH_D", "VMUH_DU",
+ "VMULWEV_Q_D", "VMULWOD_Q_D", "VMULWEV_Q_DU", "VMULWOD_Q_DU",
+ "VMULWEV_Q_DU_D", "VMULWOD_Q_DU_D", "VSIGNCOV_D", "VROTR_D",
+ "VSRLR_D", "VSRAR_D", "VSRLN_W_D", "VSRAN_W_D", "VSRLRN_W_D",
+ "VSRARN_W_D", "VSSRLN_W_D", "VSSRAN_W_D", "VSSRLN_WU_D",
+ "VSSRAN_WU_D", "VSSRLRN_W_D", "VSSRARN_W_D", "VSSRLRN_WU_D",
+ "VSSRARN_WU_D", "VFFINT_S_L",
+ "VSEQ_D", "VSLE_D", "VSLE_DU", "VSLT_D", "VSLT_DU",
+ "VPACKEV_D", "VPACKOD_D", "VPICKEV_D", "VPICKOD_D",
+ "VILVL_D", "VILVH_D"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vj, LSX128:$vk)>;
+
+// vty: v16i8/v8i16/v4i32/v2i64
+// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk),
+// (LAInst vty:$vd, vty:$vj, vty:$vk)>;
+foreach Inst = ["VMADDWEV_H_B", "VMADDWOD_H_B", "VMADDWEV_H_BU",
+ "VMADDWOD_H_BU", "VMADDWEV_H_BU_B", "VMADDWOD_H_BU_B"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v8i16 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+foreach Inst = ["VMADDWEV_W_H", "VMADDWOD_W_H", "VMADDWEV_W_HU",
+ "VMADDWOD_W_HU", "VMADDWEV_W_HU_H", "VMADDWOD_W_HU_H"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v4i32 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+foreach Inst = ["VMADDWEV_D_W", "VMADDWOD_D_W", "VMADDWEV_D_WU",
+ "VMADDWOD_D_WU", "VMADDWEV_D_WU_W", "VMADDWOD_D_WU_W"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v2i64 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU",
+ "VMADDWOD_Q_DU", "VMADDWEV_Q_DU_D", "VMADDWOD_Q_DU_D"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+
+// vty: v16i8/v8i16/v4i32/v2i64
+// Pat<(Intrinsic vty:$vj),
+// (LAInst vty:$vj)>;
+foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU",
+ "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B",
+ "VCLO_B"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v16i8 LSX128:$vj)),
+ (!cast<LAInst>(Inst) LSX128:$vj)>;
+foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H",
+ "VCLO_H", "VFCVTL_S_H", "VFCVTH_S_H"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v8i16 LSX128:$vj)),
+ (!cast<LAInst>(Inst) LSX128:$vj)>;
+foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W",
+ "VCLO_W", "VFFINT_S_W", "VFFINT_S_WU",
+ "VFFINTL_D_W", "VFFINTH_D_W"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v4i32 LSX128:$vj)),
+ (!cast<LAInst>(Inst) LSX128:$vj)>;
+foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D",
+ "VEXTL_Q_D", "VEXTL_QU_DU",
+ "VCLO_D", "VFFINT_D_L", "VFFINT_D_LU"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v2i64 LSX128:$vj)),
+ (!cast<LAInst>(Inst) LSX128:$vj)>;
+
+// Pat<(Intrinsic timm:$imm)
+// (LAInst timm:$imm)>;
+def : Pat<(int_loongarch_lsx_vldi timm:$imm),
+ (VLDI (to_valid_timm timm:$imm))>;
+foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret timm:$imm),
+ (!cast<LAInst>("Pseudo"#Inst) (to_valid_timm timm:$imm))>;
+
+// vty: v16i8/v8i16/v4i32/v2i64
+// Pat<(Intrinsic vty:$vj, timm:$imm)
+// (LAInst vty:$vj, timm:$imm)>;
+foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B",
+ "VSLLWIL_HU_BU", "VSRLRI_B", "VSRARI_B",
+ "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU",
+ "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v16i8 LSX128:$vj), timm:$imm),
+ (!cast<LAInst>(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>;
+foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H",
+ "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H",
+ "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU",
+ "VREPLVEI_H", "VSHUF4I_H"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v8i16 LSX128:$vj), timm:$imm),
+ (!cast<LAInst>(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>;
+foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W",
+ "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W",
+ "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU",
+ "VREPLVEI_W", "VSHUF4I_W"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v4i32 LSX128:$vj), timm:$imm),
+ (!cast<LAInst>(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>;
+foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D",
+ "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU",
+ "VPICKVE2GR_D", "VPICKVE2GR_DU",
+ "VREPLVEI_D"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v2i64 LSX128:$vj), timm:$imm),
+ (!cast<LAInst>(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>;
+
+// vty: v16i8/v8i16/v4i32/v2i64
+// Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm)
+// (LAInst vty:$vd, vty:$vj, timm:$imm)>;
+foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H",
+ "VSSRLNI_B_H", "VSSRANI_B_H", "VSSRLNI_BU_H", "VSSRANI_BU_H",
+ "VSSRLRNI_B_H", "VSSRARNI_B_H", "VSSRLRNI_BU_H", "VSSRARNI_BU_H",
+ "VFRSTPI_B", "VBITSELI_B", "VEXTRINS_B"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj,
+ (to_valid_timm timm:$imm))>;
+foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W",
+ "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W",
+ "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W",
+ "VFRSTPI_H", "VEXTRINS_H"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj,
+ (to_valid_timm timm:$imm))>;
+foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D",
+ "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D",
+ "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D",
+ "VPERMI_W", "VEXTRINS_W"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj,
+ (to_valid_timm timm:$imm))>;
+foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q",
+ "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q",
+ "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q",
+ "VSHUF4I_D", "VEXTRINS_D"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj,
+ (to_valid_timm timm:$imm))>;
+
+// vty: v16i8/v8i16/v4i32/v2i64
+// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk),
+// (LAInst vty:$vd, vty:$vj, vty:$vk)>;
+foreach Inst = ["VFRSTP_B", "VBITSEL_V", "VSHUF_B"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+foreach Inst = ["VFRSTP_H", "VSHUF_H"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+def : Pat<(int_loongarch_lsx_vshuf_w (v4i32 LSX128:$vd), (v4i32 LSX128:$vj),
+ (v4i32 LSX128:$vk)),
+ (VSHUF_W LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+def : Pat<(int_loongarch_lsx_vshuf_d (v2i64 LSX128:$vd), (v2i64 LSX128:$vj),
+ (v2i64 LSX128:$vk)),
+ (VSHUF_D LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
+
+// vty: v4f32/v2f64
+// Pat<(Intrinsic vty:$vj, vty:$vk, vty:$va),
+// (LAInst vty:$vj, vty:$vk, vty:$va)>;
+foreach Inst = ["VFMSUB_S", "VFNMADD_S", "VFNMSUB_S"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), (v4f32 LSX128:$va)),
+ (!cast<LAInst>(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>;
+foreach Inst = ["VFMSUB_D", "VFNMADD_D", "VFNMSUB_D"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), (v2f64 LSX128:$va)),
+ (!cast<LAInst>(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>;
+
+// vty: v4f32/v2f64
+// Pat<(Intrinsic vty:$vj, vty:$vk),
+// (LAInst vty:$vj, vty:$vk)>;
+foreach Inst = ["VFMAX_S", "VFMIN_S", "VFMAXA_S", "VFMINA_S", "VFCVT_H_S",
+ "VFCMP_CAF_S", "VFCMP_CUN_S", "VFCMP_CEQ_S", "VFCMP_CUEQ_S",
+ "VFCMP_CLT_S", "VFCMP_CULT_S", "VFCMP_CLE_S", "VFCMP_CULE_S",
+ "VFCMP_CNE_S", "VFCMP_COR_S", "VFCMP_CUNE_S",
+ "VFCMP_SAF_S", "VFCMP_SUN_S", "VFCMP_SEQ_S", "VFCMP_SUEQ_S",
+ "VFCMP_SLT_S", "VFCMP_SULT_S", "VFCMP_SLE_S", "VFCMP_SULE_S",
+ "VFCMP_SNE_S", "VFCMP_SOR_S", "VFCMP_SUNE_S"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vj, LSX128:$vk)>;
+foreach Inst = ["VFMAX_D", "VFMIN_D", "VFMAXA_D", "VFMINA_D", "VFCVT_S_D",
+ "VFTINTRNE_W_D", "VFTINTRZ_W_D", "VFTINTRP_W_D", "VFTINTRM_W_D",
+ "VFTINT_W_D",
+ "VFCMP_CAF_D", "VFCMP_CUN_D", "VFCMP_CEQ_D", "VFCMP_CUEQ_D",
+ "VFCMP_CLT_D", "VFCMP_CULT_D", "VFCMP_CLE_D", "VFCMP_CULE_D",
+ "VFCMP_CNE_D", "VFCMP_COR_D", "VFCMP_CUNE_D",
+ "VFCMP_SAF_D", "VFCMP_SUN_D", "VFCMP_SEQ_D", "VFCMP_SUEQ_D",
+ "VFCMP_SLT_D", "VFCMP_SULT_D", "VFCMP_SLE_D", "VFCMP_SULE_D",
+ "VFCMP_SNE_D", "VFCMP_SOR_D", "VFCMP_SUNE_D"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret
+ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)),
+ (!cast<LAInst>(Inst) LSX128:$vj, LSX128:$vk)>;
+
+// vty: v4f32/v2f64
+// Pat<(Intrinsic vty:$vj),
+// (LAInst vty:$vj)>;
+foreach Inst = ["VFLOGB_S", "VFCLASS_S", "VFSQRT_S", "VFRECIP_S", "VFRSQRT_S",
+ "VFRINT_S", "VFCVTL_D_S", "VFCVTH_D_S",
+ "VFRINTRNE_S", "VFRINTRZ_S", "VFRINTRP_S", "VFRINTRM_S",
+ "VFTINTRNE_W_S", "VFTINTRZ_W_S", "VFTINTRP_W_S", "VFTINTRM_W_S",
+ "VFTINT_W_S", "VFTINTRZ_WU_S", "VFTINT_WU_S",
+ "VFTINTRNEL_L_S", "VFTINTRNEH_L_S", "VFTINTRZL_L_S",
+ "VFTINTRZH_L_S", "VFTINTRPL_L_S", "VFTINTRPH_L_S",
+ "VFTINTRML_L_S", "VFTINTRMH_L_S", "VFTINTL_L_S",
+ "VFTINTH_L_S"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v4f32 LSX128:$vj)),
+ (!cast<LAInst>(Inst) LSX128:$vj)>;
+foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D",
+ "VFRINT_D",
+ "VFRINTRNE_D", "VFRINTRZ_D", "VFRINTRP_D", "VFRINTRM_D",
+ "VFTINTRNE_L_D", "VFTINTRZ_L_D", "VFTINTRP_L_D", "VFTINTRM_L_D",
+ "VFTINT_L_D", "VFTINTRZ_LU_D", "VFTINT_LU_D"] in
+ def : Pat<(deriveLSXIntrinsic<Inst>.ret (v2f64 LSX128:$vj)),
+ (!cast<LAInst>(Inst) LSX128:$vj)>;
+
+// load
+def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
+ (VLD GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk),
+ (VLDX GPR:$rj, GPR:$rk)>;
+
+def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm),
+ (VLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm),
+ (VLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm),
+ (VLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm),
+ (VLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>;
+
+// store
+def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm),
+ (VST LSX128:$vd, GPR:$rj, (to_valid_timm timm:$imm))>;
+def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk),
+ (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>;
+
+def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx),
+ (VSTELM_B v16i8:$vd, GPR:$rj, (to_valid_timm timm:$imm),
+ (to_valid_timm timm:$idx))>;
+def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx),
+ (VSTELM_H v8i16:$vd, GPR:$rj, (to_valid_timm timm:$imm),
+ (to_valid_timm timm:$idx))>;
+def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx),
+ (VSTELM_W v4i32:$vd, GPR:$rj, (to_valid_timm timm:$imm),
+ (to_valid_timm timm:$idx))>;
+def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx),
+ (VSTELM_D v2i64:$vd, GPR:$rj, (to_valid_timm timm:$imm),
+ (to_valid_timm timm:$idx))>;
+
} // Predicates = [HasExtLSX]
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index 0fbe23f2f62d..5c173675cca4 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -43,6 +43,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
bool HasLaGlobalWithAbs = false;
bool HasLaLocalWithAbs = false;
bool HasUAL = false;
+ bool HasLinkerRelax = false;
unsigned GRLen = 32;
MVT GRLenVT = MVT::i32;
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
@@ -100,6 +101,7 @@ public:
bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; }
bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; }
bool hasUAL() const { return HasUAL; }
+ bool hasLinkerRelax() const { return HasLinkerRelax; }
MVT getGRLenVT() const { return GRLenVT; }
unsigned getGRLen() const { return GRLen; }
LoongArchABI::ABI getTargetABI() const { return TargetABI; }
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index d0a4e9375048..a5a4d78aceee 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -78,7 +78,7 @@ getEffectiveLoongArchCodeModel(const Triple &TT,
LoongArchTargetMachine::LoongArchTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM),
getEffectiveLoongArchCodeModel(TT, CM), OL),
@@ -159,7 +159,7 @@ void LoongArchPassConfig::addIRPasses() {
//
// Run this before LSR to remove the multiplies involved in computing the
// pointer values N iterations ahead.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch)
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableLoopDataPrefetch)
addPass(createLoopDataPrefetchPass());
addPass(createAtomicExpandPass());
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h
index 06fcec838ea4..7d39d47e86b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h
@@ -27,8 +27,8 @@ public:
LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT);
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
+ bool JIT);
~LoongArchTargetMachine() override;
TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index ecb68ff401e9..14bcef7c7d26 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -17,7 +17,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#define DEBUG_TYPE "loongarch-asmbackend"
@@ -163,12 +162,13 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm,
bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
- const MCValue &Target) {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) {
if (Fixup.getKind() >= FirstLiteralRelocationKind)
return true;
switch (Fixup.getTargetKind()) {
default:
- return false;
+ return STI->hasFeature(LoongArch::FeatureRelax);
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -193,7 +193,8 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
std::unique_ptr<MCObjectTargetWriter>
LoongArchAsmBackend::createObjectTargetWriter() const {
- return createLoongArchELFObjectWriter(OSABI, Is64Bit);
+ return createLoongArchELFObjectWriter(
+ OSABI, Is64Bit, STI.hasFeature(LoongArch::FeatureRelax));
}
MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T,
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
index ae9bb8af0419..d1fbf788e8a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -31,8 +31,8 @@ class LoongArchAsmBackend : public MCAsmBackend {
public:
LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
const MCTargetOptions &Options)
- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit),
- TargetOptions(Options) {}
+ : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI),
+ Is64Bit(Is64Bit), TargetOptions(Options) {}
~LoongArchAsmBackend() override {}
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
@@ -41,7 +41,8 @@ public:
const MCSubtargetInfo *STI) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
index a6b9c0652639..fe19a4f2d3c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
@@ -20,19 +20,27 @@ using namespace llvm;
namespace {
class LoongArchELFObjectWriter : public MCELFObjectTargetWriter {
public:
- LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit);
+ LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, bool EnableRelax);
~LoongArchELFObjectWriter() override;
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
+ unsigned Type) const override {
+ return EnableRelax;
+ }
+
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
+ bool EnableRelax;
};
} // end namespace
-LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)
+LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit,
+ bool EnableRelax)
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,
- /*HasRelocationAddend*/ true) {}
+ /*HasRelocationAddend=*/true),
+ EnableRelax(EnableRelax) {}
LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {}
@@ -87,6 +95,6 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx,
}
std::unique_ptr<MCObjectTargetWriter>
-llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) {
- return std::make_unique<LoongArchELFObjectWriter>(OSABI, Is64Bit);
+llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, bool Relax) {
+ return std::make_unique<LoongArchELFObjectWriter>(OSABI, Is64Bit, Relax);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index 03fb9e008ae9..45169becca37 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -259,6 +259,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
FixupKind = LoongArch::fixup_loongarch_b21;
break;
case LoongArch::B:
+ case LoongArch::BL:
FixupKind = LoongArch::fixup_loongarch_b26;
break;
}
@@ -296,7 +297,7 @@ void LoongArchMCCodeEmitter::expandToVectorLDI(
}
MCInst TmpInst = MCInstBuilder(Opc).addOperand(MI.getOperand(0)).addImm(Imm);
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
}
void LoongArchMCCodeEmitter::encodeInstruction(
@@ -326,7 +327,7 @@ void LoongArchMCCodeEmitter::encodeInstruction(
llvm_unreachable("Unhandled encodeInstruction length!");
case 4: {
uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write(CB, Bits, support::little);
+ support::endian::write(CB, Bits, llvm::endianness::little);
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
index 942e667bc261..a4e6a09863e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -97,13 +97,90 @@ public:
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override {
unsigned NumOps = Inst.getNumOperands();
- if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) {
+ if ((isBranch(Inst) && !isIndirectBranch(Inst)) ||
+ Inst.getOpcode() == LoongArch::BL) {
Target = Addr + Inst.getOperand(NumOps - 1).getImm();
return true;
}
return false;
}
+
+ bool isTerminator(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isTerminator(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::JIRL:
+ return Inst.getOperand(0).getReg() == LoongArch::R0;
+ }
+ }
+
+ bool isCall(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isCall(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::JIRL:
+ return Inst.getOperand(0).getReg() != LoongArch::R0;
+ }
+ }
+
+ bool isReturn(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isReturn(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::JIRL:
+ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
+ Inst.getOperand(1).getReg() == LoongArch::R1;
+ }
+ }
+
+ bool isBranch(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isBranch(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::JIRL:
+ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
+ Inst.getOperand(1).getReg() != LoongArch::R1;
+ }
+ }
+
+ bool isUnconditionalBranch(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isUnconditionalBranch(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::JIRL:
+ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
+ Inst.getOperand(1).getReg() != LoongArch::R1;
+ }
+ }
+
+ bool isIndirectBranch(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isIndirectBranch(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::JIRL:
+ return Inst.getOperand(0).getReg() == LoongArch::R0 &&
+ Inst.getOperand(1).getReg() != LoongArch::R1;
+ }
+ }
};
} // end namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
index ab35a0096c8a..bb05baa9b717 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
@@ -36,7 +36,7 @@ MCAsmBackend *createLoongArchAsmBackend(const Target &T,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectTargetWriter>
-createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit);
+createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, bool Relax);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index 7a0a033c55ad..b2c0fda1ccc2 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -66,10 +66,9 @@ public:
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -157,6 +156,7 @@ public:
bool isAReg() const;
bool isDReg() const;
bool isFPDReg() const;
+ bool isFPCReg() const;
unsigned getReg() const override;
void addRegOperands(MCInst &Inst, unsigned N) const;
@@ -255,9 +255,13 @@ static inline unsigned getRegisterIndex(unsigned Register) {
// SP is sadly not contiguous with the rest of the An registers
return 15;
+ // We don't care about the indices of these registers.
case M68k::PC:
case M68k::CCR:
- return 16;
+ case M68k::FPC:
+ case M68k::FPS:
+ case M68k::FPIAR:
+ return UINT_MAX;
default:
llvm_unreachable("unexpected register number");
@@ -489,7 +493,8 @@ void M68kOperand::addPCIOperands(MCInst &Inst, unsigned N) const {
}
static inline bool checkRegisterClass(unsigned RegNo, bool Data, bool Address,
- bool SP, bool FPDR = false) {
+ bool SP, bool FPDR = false,
+ bool FPCR = false) {
switch (RegNo) {
case M68k::A0:
case M68k::A1:
@@ -527,6 +532,11 @@ static inline bool checkRegisterClass(unsigned RegNo, bool Data, bool Address,
case M68k::FP7:
return FPDR;
+ case M68k::FPC:
+ case M68k::FPS:
+ case M68k::FPIAR:
+ return FPCR;
+
default:
llvm_unreachable("unexpected register type");
return false;
@@ -552,6 +562,13 @@ bool M68kOperand::isFPDReg() const {
/*FPDR=*/true);
}
+bool M68kOperand::isFPCReg() const {
+ return isReg() && checkRegisterClass(getReg(),
+ /*Data=*/false,
+ /*Address=*/false, /*SP=*/false,
+ /*FPDR=*/false, /*FPCR=*/true);
+}
+
unsigned M68kAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) {
M68kOperand &Operand = (M68kOperand &)Op;
@@ -661,12 +678,22 @@ bool M68kAsmParser::parseRegisterName(MCRegister &RegNo, SMLoc Loc,
}
} else if (StringRef(RegisterNameLower).starts_with("fp") &&
RegisterNameLower.size() > 2) {
- // Floating point data register.
auto RegIndex = unsigned(RegisterNameLower[2] - '0');
- if (RegIndex >= 8 || RegisterNameLower.size() > 3)
- return false;
- RegNo = getRegisterByIndex(16 + RegIndex);
- return true;
+ if (RegIndex < 8 && RegisterNameLower.size() == 3) {
+ // Floating point data register.
+ RegNo = getRegisterByIndex(16 + RegIndex);
+ return true;
+ } else {
+ // Floating point control register.
+ RegNo = StringSwitch<unsigned>(RegisterNameLower)
+ .Cases("fpc", "fpcr", M68k::FPC)
+ .Cases("fps", "fpsr", M68k::FPS)
+ .Cases("fpi", "fpiar", M68k::FPIAR)
+ .Default(M68k::NoRegister);
+ assert(RegNo != M68k::NoRegister &&
+ "Unrecognized FP control register name");
+ return true;
+ }
}
return false;
@@ -704,21 +731,19 @@ ParseStatus M68kAsmParser::parseRegister(MCRegister &RegNo) {
return ParseStatus::Success;
}
-bool M68kAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool M68kAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- auto Result = tryParseRegister(RegNo, StartLoc, EndLoc);
- if (Result != MatchOperand_Success) {
+ ParseStatus Result = tryParseRegister(Reg, StartLoc, EndLoc);
+ if (!Result.isSuccess())
return Error(StartLoc, "expected register");
- }
return false;
}
-OperandMatchResultTy M68kAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus M68kAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
StartLoc = getLexer().getLoc();
- ParseStatus Result = parseRegister(RegNo);
+ ParseStatus Result = parseRegister(Reg);
EndLoc = getLexer().getLoc();
return Result;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
index 2124a35cc65a..7f0f737faccd 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
@@ -33,10 +33,11 @@ using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
static const unsigned RegisterDecode[] = {
- M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
- M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
- M68k::A4, M68k::A5, M68k::A6, M68k::SP, M68k::FP0, M68k::FP1,
- M68k::FP2, M68k::FP3, M68k::FP4, M68k::FP5, M68k::FP6, M68k::FP7};
+ M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
+ M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
+ M68k::A4, M68k::A5, M68k::A6, M68k::SP, M68k::FP0, M68k::FP1,
+ M68k::FP2, M68k::FP3, M68k::FP4, M68k::FP5, M68k::FP6, M68k::FP7,
+ M68k::FPIAR, M68k::FPS, M68k::FPC};
static DecodeStatus DecodeRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address, const void *Decoder) {
@@ -97,6 +98,13 @@ static DecodeStatus DecodeFPDRRegisterClass(MCInst &Inst, uint64_t RegNo,
#define DecodeFPDR64RegisterClass DecodeFPDRRegisterClass
#define DecodeFPDR80RegisterClass DecodeFPDRRegisterClass
+static DecodeStatus DecodeFPCSCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClass(Inst, (RegNo >> 1) + 24, Address, Decoder);
+}
+#define DecodeFPICRegisterClass DecodeFPCSCRegisterClass
+
static DecodeStatus DecodeCCRCRegisterClass(MCInst &Inst, APInt &Insn,
uint64_t Address,
const void *Decoder) {
@@ -114,6 +122,7 @@ static DecodeStatus DecodeImm32(MCInst &Inst, uint64_t Imm, uint64_t Address,
#undef DecodeFPDR32RegisterClass
#undef DecodeFPDR64RegisterClass
#undef DecodeFPDR80RegisterClass
+#undef DecodeFPICRegisterClass
/// A disassembler class for M68k.
struct M68kDisassembler : public MCDisassembler {
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
index b0ada29d1cea..e7e629516494 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
@@ -25,6 +25,27 @@
using namespace llvm;
+namespace {
+
+struct M68kFormalArgHandler : public M68kIncomingValueHandler {
+ M68kFormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
+ : M68kIncomingValueHandler(MIRBuilder, MRI) {}
+};
+
+struct CallReturnHandler : public M68kIncomingValueHandler {
+ CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder &MIB)
+ : M68kIncomingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+
+private:
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ const CCValAssign &VA) override;
+
+ MachineInstrBuilder &MIB;
+};
+
+} // end anonymous namespace
+
M68kCallLowering::M68kCallLowering(const M68kTargetLowering &TLI)
: CallLowering(&TLI) {}
@@ -36,14 +57,15 @@ struct M68kOutgoingArgHandler : public CallLowering::OutgoingValueHandler {
STI(MIRBuilder.getMF().getSubtarget<M68kSubtarget>()) {}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
Register ExtReg = extendRegister(ValVReg, VA);
@@ -118,7 +140,7 @@ bool M68kCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.getCCAssignFn(F.getCallingConv(), false, F.isVarArg());
IncomingValueAssigner ArgAssigner(AssignFn);
- FormalArgHandler ArgHandler(MIRBuilder, MRI);
+ M68kFormalArgHandler ArgHandler(MIRBuilder, MRI);
return determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgs,
MIRBuilder, F.getCallingConv(),
F.isVarArg());
@@ -126,17 +148,15 @@ bool M68kCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
void M68kIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign VA) {
+ const CCValAssign &VA) {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
-void M68kIncomingValueHandler::assignValueToAddress(Register ValVReg,
- Register Addr,
- LLT MemTy,
- MachinePointerInfo &MPO,
- CCValAssign &VA) {
+void M68kIncomingValueHandler::assignValueToAddress(
+ Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO,
+ const CCValAssign &VA) {
MachineFunction &MF = MIRBuilder.getMF();
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy,
inferAlignFromPtrInfo(MF, MPO));
@@ -161,7 +181,7 @@ Register M68kIncomingValueHandler::getStackAddress(uint64_t Size,
}
void CallReturnHandler::assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) {
+ const CCValAssign &VA) {
MIB.addDef(PhysReg, RegState::Implicit);
MIRBuilder.buildCopy(ValVReg, PhysReg);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.h b/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
index a1589e96aa3d..53696df21794 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
@@ -53,32 +53,16 @@ struct M68kIncomingValueHandler : public CallLowering::IncomingValueHandler {
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override;
+ const CCValAssign &VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override;
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) override;
};
-
-struct FormalArgHandler : public M68kIncomingValueHandler {
- FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : M68kIncomingValueHandler(MIRBuilder, MRI) {}
-};
-
-struct CallReturnHandler : public M68kIncomingValueHandler {
- CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder &MIB)
- : M68kIncomingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
-
-private:
- void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override;
-
- MachineInstrBuilder &MIB;
-};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_M68K_GLSEL_M68KCALLLOWERING_H
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
index f833eb2d19d4..e7e5bb19c3a0 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
@@ -33,7 +33,7 @@ enum PartialMappingIdx {
PMI_Min = PMI_GPR,
};
-RegisterBankInfo::PartialMapping PartMappings[]{
+const RegisterBankInfo::PartialMapping PartMappings[]{
// GPR Partial Mapping
{0, 32, GPRRegBank},
};
@@ -43,7 +43,7 @@ enum ValueMappingIdx {
GPR3OpsIdx = 1,
};
-RegisterBankInfo::ValueMapping ValueMappings[] = {
+const RegisterBankInfo::ValueMapping ValueMappings[] = {
// invalid
{nullptr, 0},
// 3 operands in GPRs
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
index 2f60fc834a18..7bd382107773 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
@@ -252,38 +252,27 @@ bool M68kExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
case M68k::RET: {
- // Adjust stack to erase error code
- int64_t StackAdj = MBBI->getOperand(0).getImm();
- MachineInstrBuilder MIB;
-
- if (StackAdj == 0) {
- MIB = BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS));
- } else if (isUInt<16>(StackAdj)) {
-
- if (STI->atLeastM68020()) {
- llvm_unreachable("RTD is not implemented");
- } else {
- // Copy PC from stack to a free address(A0 or A1) register
- // TODO check if pseudo expand uses free address register
- BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32aj), M68k::A1)
- .addReg(M68k::SP);
+ if (MBB.getParent()->getFunction().getCallingConv() ==
+ CallingConv::M68k_INTR) {
+ BuildMI(MBB, MBBI, DL, TII->get(M68k::RTE));
+ } else if (int64_t StackAdj = MBBI->getOperand(0).getImm(); StackAdj == 0) {
+ BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS));
+ } else {
+ // Copy return address from stack to a free address(A0 or A1) register
+ // TODO check if pseudo expand uses free address register
+ BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32aj), M68k::A1)
+ .addReg(M68k::SP);
- // Adjust SP
- FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true);
+ // Adjust SP
+ FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true);
- // Put the return address on stack
- BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32ja))
- .addReg(M68k::SP)
- .addReg(M68k::A1);
+ // Put the return address on stack
+ BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32ja))
+ .addReg(M68k::SP)
+ .addReg(M68k::A1);
- // RTS
- BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS));
- }
- } else {
- // TODO: RTD can only handle immediates as big as 2**16-1.
- // If we need to pop off bytes before the return address, we
- // must do it manually.
- llvm_unreachable("Stack adjustment size not supported");
+ // RTS
+ BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS));
}
// FIXME: Can rest of the operands be ignored, if there is any?
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
index e33654ea3f18..e3aa9cb50847 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
@@ -227,7 +227,8 @@ private:
bool SelectPCD(SDNode *Parent, SDValue N, SDValue &Imm);
bool SelectPCI(SDNode *Parent, SDValue N, SDValue &Imm, SDValue &Index);
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
// If Address Mode represents Frame Index store FI in Disp and
@@ -323,7 +324,7 @@ INITIALIZE_PASS(M68kDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
bool M68kDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
SDNode *Root) const {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return false;
if (U == Root) {
@@ -953,7 +954,8 @@ bool M68kDAGToDAGISel::SelectARI(SDNode *Parent, SDValue N, SDValue &Base) {
}
bool M68kDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
// In order to tell AsmPrinter the exact addressing mode we select here, which
// might comprise of multiple SDValues (hence MachineOperands), a 32-bit
// immediate value is prepended to the list of selected SDValues to indicate
@@ -966,7 +968,7 @@ bool M68kDAGToDAGISel::SelectInlineAsmMemoryOperand(
switch (ConstraintID) {
// Generic memory operand.
- case InlineAsm::Constraint_m: {
+ case InlineAsm::ConstraintCode::m: {
// Try every supported (memory) addressing modes.
SDValue Operands[4];
@@ -997,7 +999,7 @@ bool M68kDAGToDAGISel::SelectInlineAsmMemoryOperand(
return true;
}
// 'Q': Address register indirect addressing.
- case InlineAsm::Constraint_Q: {
+ case InlineAsm::ConstraintCode::Q: {
SDValue AMKind, Base;
// 'j' addressing mode.
// TODO: Add support for 'o' and 'e' after their
@@ -1009,7 +1011,7 @@ bool M68kDAGToDAGISel::SelectInlineAsmMemoryOperand(
return true;
}
// 'U': Address register indirect w/ constant offset addressing.
- case InlineAsm::Constraint_Um: {
+ case InlineAsm::ConstraintCode::Um: {
SDValue AMKind, Base, Offset;
// 'p' addressing mode.
if (SelectARID(nullptr, Op, Offset, Base) && addKind(AMKind, AMK::p)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp
index af3af6760ae1..0830cc7feb22 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -204,11 +204,12 @@ M68kTargetLowering::getExceptionSelectorRegister(const Constant *) const {
return M68k::D1;
}
-unsigned
+InlineAsm::ConstraintCode
M68kTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
- return StringSwitch<unsigned>(ConstraintCode)
- .Case("Q", InlineAsm::Constraint_Q)
- .Case("U", InlineAsm::Constraint_Um) // We borrow Constraint_Um for 'U'.
+ return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
+ .Case("Q", InlineAsm::ConstraintCode::Q)
+ // We borrow ConstraintCode::Um for 'U'.
+ .Case("U", InlineAsm::ConstraintCode::Um)
.Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
}
@@ -2896,7 +2897,7 @@ M68kTargetLowering::getConstraintType(StringRef Constraint) const {
}
void M68kTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
@@ -3049,9 +3050,8 @@ M68kTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
/// Determines whether the callee is required to pop its own arguments.
/// Callee pop is necessary to support tail calls.
-bool M68k::isCalleePop(CallingConv::ID CallingConv, bool IsVarArg,
- bool GuaranteeTCO) {
- return false;
+bool M68k::isCalleePop(CallingConv::ID CC, bool IsVarArg, bool GuaranteeTCO) {
+ return CC == CallingConv::M68k_RTD && !IsVarArg;
}
// Return true if it is OK for this CMOV pseudo-opcode to be cascaded
@@ -3204,6 +3204,11 @@ M68kTargetLowering::EmitLoweredSelect(MachineInstr &MI,
F->insert(It, Copy0MBB);
F->insert(It, SinkMBB);
+ // Set the call frame size on entry to the new basic blocks.
+ unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
+ Copy0MBB->setCallFrameSize(CallFrameSize);
+ SinkMBB->setCallFrameSize(CallFrameSize);
+
// If the CCR register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h
index 5f279b3dcbd3..02427a4e749e 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h
@@ -163,7 +163,7 @@ public:
StringRef Constraint, MVT VT) const override;
// Lower operand with C_Immediate and C_Other constraint type
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
@@ -187,7 +187,8 @@ public:
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
- unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
+ InlineAsm::ConstraintCode
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
private:
unsigned GetAlignedArgumentStackSize(unsigned StackSize,
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrAtomics.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrAtomics.td
index 6be53d469bbc..40c6593e2cfa 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrAtomics.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrAtomics.td
@@ -10,8 +10,7 @@ foreach size = [8, 16, 32] in {
def : Pat<(!cast<SDPatternOperator>("atomic_load_"#size) MxCP_ARI:$ptr),
(!cast<MxInst>("MOV"#size#"dj") !cast<MxMemOp>("MxARI"#size):$ptr)>;
- def : Pat<(!cast<SDPatternOperator>("atomic_store_"#size) MxCP_ARI:$ptr,
- !cast<MxRegOp>("MxDRD"#size):$val),
+ def : Pat<(!cast<SDPatternOperator>("atomic_store_"#size) !cast<MxRegOp>("MxDRD"#size):$val, MxCP_ARI:$ptr),
(!cast<MxInst>("MOV"#size#"jd") !cast<MxMemOp>("MxARI"#size):$ptr,
!cast<MxRegOp>("MxDRD"#size):$val)>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrBits.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrBits.td
index abd2ab3cf012..55c95e467096 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrBits.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrBits.td
@@ -12,7 +12,7 @@
///
/// Machine:
///
-/// BCHG [ ] BCLR [ ] BSET [ ] BTST [~]
+/// BCHG [~] BCLR [~] BSET [~] BTST [~]
///
/// Map:
///
@@ -30,92 +30,134 @@
/// ------------+---------+---------+---------+---------
/// F E D C | B A 9 | 8 7 6 | 5 4 3 | 2 1 0
/// ------------+---------+---------+---------+---------
-/// 0 0 0 0 | REG | 1 0 0 | MODE | REG
+/// 0 0 0 0 | REG | OP MODE | MODE | REG
/// ------------+---------+---------+---------+---------
-class MxBTSTEnc_R<MxEncMemOp dst_enc, string bitno_name> {
+class MxBITEnc_R<bits<3> opmode, MxEncMemOp dst_enc, string bitno_name> {
dag Value = (ascend
(descend 0b0000,
(operand "$"#bitno_name, 3),
- 0b100, dst_enc.EA
+ opmode, dst_enc.EA
),
dst_enc.Supplement
);
}
-/// -------------------------------+---------+---------
-/// F E D C B A 9 8 . 7 6 | 5 4 3 | 2 1 0
-/// -------------------------------+---------+---------
-/// 0 0 0 0 1 0 0 0 . 0 0 | MODE | REG
-/// ------------------------+------+---------+---------
+/// ---------------------+---------+---------+---------
+/// F E D C B A 9 | 8 7 6 | 5 4 3 | 2 1 0
+/// ---------------------+---------+---------+---------
+/// 0 0 0 0 1 0 0 | OP MODE | MODE | REG
+/// ---------------------+--+------+---------+---------
/// 0 0 0 0 0 0 0 0 | BIT NUMBER
/// ------------------------+--------------------------
-class MxBTSTEnc_I<MxEncMemOp dst_enc, string bitno_name> {
+class MxBITEnc_I<bits<3> opmode, MxEncMemOp dst_enc, string bitno_name> {
dag Value = (ascend
- (descend 0b0000100000, dst_enc.EA),
+ (descend 0b0000100, opmode, dst_enc.EA),
(descend 0b00000000, (operand "$"#bitno_name, 8)),
dst_enc.Supplement
);
}
let Defs = [CCR] in {
-class MxBTST_RR<MxType TYPE>
- : MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.ROp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.VT:$bitno))]> {
- let Inst = MxBTSTEnc_R<MxEncAddrMode_r<"dst">, "bitno">.Value;
+class MxBIT_RR<string MN, bits<3> OPMODE, MxType TYPE>
+ : MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.ROp:$bitno),
+ MN#"\t$bitno, $dst"> {
+ let Inst = MxBITEnc_R<OPMODE, MxEncAddrMode_r<"dst">, "bitno">.Value;
}
-class MxBTST_RI<MxType TYPE>
- : MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.IOp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.IPat:$bitno))]> {
- let Inst = MxBTSTEnc_I<MxEncAddrMode_r<"dst">, "bitno">.Value;
+class MxBIT_RI<string MN, bits<3> OPMODE, MxType TYPE>
+ : MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.IOp:$bitno),
+ MN#"\t$bitno, $dst"> {
+ let Inst = MxBITEnc_I<OPMODE, MxEncAddrMode_r<"dst">, "bitno">.Value;
}
-class MxBTST_MR<MxType TYPE, MxOperand MEMOpd, ComplexPattern MEMPat,
- MxEncMemOp DST_ENC>
- : MxInst<(outs), (ins MEMOpd:$dst, TYPE.ROp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.VT:$bitno))]> {
- let Inst = MxBTSTEnc_R<DST_ENC, "bitno">.Value;
+class MxBIT_MR<string MN, bits<3> OPMODE, MxType TYPE,
+ MxOperand MEMOpd, MxEncMemOp DST_ENC>
+ : MxInst<(outs), (ins MEMOpd:$dst, TYPE.ROp:$bitno),
+ MN#"\t$bitno, $dst"> {
+ let Inst = MxBITEnc_R<OPMODE, DST_ENC, "bitno">.Value;
}
-class MxBTST_MI<MxType TYPE, MxOperand MEMOpd, ComplexPattern MEMPat,
- MxEncMemOp DST_ENC>
- : MxInst<(outs), (ins MEMOpd:$dst, TYPE.IOp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.IPat:$bitno))]> {
- let Inst = MxBTSTEnc_I<DST_ENC, "bitno">.Value;
+class MxBIT_MI<string MN, bits<3> OPMODE, MxType TYPE,
+ MxOperand MEMOpd, MxEncMemOp DST_ENC>
+ : MxInst<(outs), (ins MEMOpd:$dst, TYPE.IOp:$bitno),
+ MN#"\t$bitno, $dst"> {
+ let Inst = MxBITEnc_I<OPMODE, DST_ENC, "bitno">.Value;
}
} // Defs = [CCR]
-// Register BTST limited to 32 bits only
-def BTST32dd : MxBTST_RR<MxType32d>;
-def BTST32di : MxBTST_RI<MxType32d>;
-
-// Memory BTST limited to 8 bits only
-def BTST8jd : MxBTST_MR<MxType8d, MxType8.JOp, MxType8.JPat,
- MxEncAddrMode_j<"dst">>;
-def BTST8od : MxBTST_MR<MxType8d, MxType8.OOp, MxType8.OPat,
- MxEncAddrMode_o<"dst">>;
-def BTST8ed : MxBTST_MR<MxType8d, MxType8.EOp, MxType8.EPat,
- MxEncAddrMode_e<"dst">>;
-def BTST8pd : MxBTST_MR<MxType8d, MxType8.POp, MxType8.PPat,
- MxEncAddrMode_p<"dst">>;
-def BTST8fd : MxBTST_MR<MxType8d, MxType8.FOp, MxType8.FPat,
- MxEncAddrMode_f<"dst">>;
-def BTST8qd : MxBTST_MR<MxType8d, MxType8.QOp, MxType8.QPat,
+def BTST8qd : MxBIT_MR<"btst", 0b100, MxType8d, MxType8.QOp,
MxEncAddrMode_q<"dst">>;
-def BTST8kd : MxBTST_MR<MxType8d, MxType8.KOp, MxType8.KPat,
+def BTST8kd : MxBIT_MR<"btst", 0b100, MxType8d, MxType8.KOp,
MxEncAddrMode_k<"dst">>;
-
-def BTST8ji : MxBTST_MI<MxType8d, MxType8.JOp, MxType8.JPat,
- MxEncAddrMode_j<"dst">>;
-def BTST8oi : MxBTST_MI<MxType8d, MxType8.OOp, MxType8.OPat,
- MxEncAddrMode_o<"dst">>;
-def BTST8ei : MxBTST_MI<MxType8d, MxType8.EOp, MxType8.EPat,
- MxEncAddrMode_e<"dst">>;
-def BTST8pi : MxBTST_MI<MxType8d, MxType8.POp, MxType8.PPat,
- MxEncAddrMode_p<"dst">>;
-def BTST8fi : MxBTST_MI<MxType8d, MxType8.FOp, MxType8.FPat,
- MxEncAddrMode_f<"dst">>;
-def BTST8qi : MxBTST_MI<MxType8d, MxType8.QOp, MxType8.QPat,
+def BTST8qi : MxBIT_MI<"btst", 0b000, MxType8d, MxType8.QOp,
MxEncAddrMode_q<"dst">>;
-def BTST8ki : MxBTST_MI<MxType8d, MxType8.KOp, MxType8.KPat,
+def BTST8ki : MxBIT_MI<"btst", 0b000, MxType8d, MxType8.KOp,
MxEncAddrMode_k<"dst">>;
+
+multiclass MxBIT<string MN, bits<3> OP, bits<3> OPI> {
+ // Register Bit manipulation limited to 32 bits only
+ def NAME#32dd : MxBIT_RR<MN, OP, MxType32d>;
+ def NAME#32di : MxBIT_RI<MN, OPI, MxType32d>;
+
+ // Memory Bit manipulation limited to 8 bits only
+ def NAME#8jd : MxBIT_MR<MN, OP, MxType8d,
+ MxType8.JOp, MxEncAddrMode_j<"dst">>;
+ def NAME#8od : MxBIT_MR<MN, OP, MxType8d,
+ MxType8.OOp, MxEncAddrMode_o<"dst">>;
+ def NAME#8ed : MxBIT_MR<MN, OP, MxType8d,
+ MxType8.EOp, MxEncAddrMode_e<"dst">>;
+ def NAME#8pd : MxBIT_MR<MN, OP, MxType8d,
+ MxType8.POp, MxEncAddrMode_p<"dst">>;
+ def NAME#8fd : MxBIT_MR<MN, OP, MxType8d,
+ MxType8.FOp, MxEncAddrMode_f<"dst">>;
+
+ def NAME#8ji : MxBIT_MI<MN, OPI, MxType8d,
+ MxType8.JOp, MxEncAddrMode_j<"dst">>;
+ def NAME#8oi : MxBIT_MI<MN, OPI, MxType8d,
+ MxType8.OOp, MxEncAddrMode_o<"dst">>;
+ def NAME#8ei : MxBIT_MI<MN, OPI, MxType8d,
+ MxType8.EOp, MxEncAddrMode_e<"dst">>;
+ def NAME#8pi : MxBIT_MI<MN, OPI, MxType8d,
+ MxType8.POp, MxEncAddrMode_p<"dst">>;
+ def NAME#8fi : MxBIT_MI<MN, OPI, MxType8d,
+ MxType8.FOp, MxEncAddrMode_f<"dst">>;
+}
+
+defm BCHG : MxBIT<"bchg", 0b101, 0b001>;
+defm BCLR : MxBIT<"bclr", 0b110, 0b010>;
+defm BSET : MxBIT<"bset", 0b111, 0b011>;
+defm BTST : MxBIT<"btst", 0b100, 0b000>;
+
+// Codegen patterns
+
+multiclass MxBITPatR<MxInst INSTd, MxInst INSTi, SDNode NODE> {
+def : Pat<(NODE MxType32d.VT:$dst, MxType32d.VT:$bitno),
+ (INSTd MxType32d.ROp:$dst, MxType32d.ROp:$bitno)>;
+def : Pat<(NODE MxType32d.VT:$dst, MxType32d.IPat:$bitno),
+ (INSTi MxType32d.ROp:$dst, MxType32d.IOp:$bitno)>;
+}
+
+defm : MxBITPatR<BTST32dd, BTST32di, MxBtst>;
+
+multiclass MxBITPatM<MxInst INSTd, MxInst INSTi, SDNode NODE, MxType TYPE,
+ MxOperand MEMOpd, ComplexPattern MEMPat> {
+def : Pat<(NODE (TYPE.Load MEMPat:$dst), TYPE.VT:$bitno),
+ (INSTd MEMOpd:$dst, TYPE.ROp:$bitno)>;
+def : Pat<(NODE (TYPE.Load MEMPat:$dst), TYPE.IPat:$bitno),
+ (INSTi MEMOpd:$dst, TYPE.IOp:$bitno)>;
+}
+
+defm : MxBITPatM<BTST8qd, BTST8qi, MxBtst,
+ MxType8d, MxType8.QOp, MxType8.QPat>;
+defm : MxBITPatM<BTST8kd, BTST8ki, MxBtst,
+ MxType8d, MxType8.KOp, MxType8.KPat>;
+defm : MxBITPatM<BTST8jd, BTST8ji, MxBtst,
+ MxType8d, MxType8.JOp, MxType8.JPat>;
+defm : MxBITPatM<BTST8od, BTST8oi, MxBtst,
+ MxType8d, MxType8.OOp, MxType8.OPat>;
+defm : MxBITPatM<BTST8ed, BTST8ei, MxBtst,
+ MxType8d, MxType8.EOp, MxType8.EPat>;
+defm : MxBITPatM<BTST8pd, BTST8pi, MxBtst,
+ MxType8d, MxType8.POp, MxType8.PPat>;
+defm : MxBITPatM<BTST8fd, BTST8fi, MxBtst,
+ MxType8d, MxType8.FOp, MxType8.FPat>;
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrControl.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrControl.td
index 225f932f3316..6e116d7cfe40 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrControl.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrControl.td
@@ -327,6 +327,10 @@ def RTS : MxInst<(outs), (ins), "rts", []> {
let Inst = (descend 0b0100, 0b1110, 0b0111, 0b0101);
}
+def RTE: MxInst<(outs), (ins), "rte", []> {
+ let Inst = (descend 0b0100, 0b1110, 0b0111, 0b0011);
+}
+
let isCodeGenOnly = 1 in
def RET : MxPseudo<(outs), (ins i32imm:$adj, variable_ops),
[(MxRet timm:$adj)]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td
index e6d4471f7aab..624093661d19 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td
@@ -672,3 +672,49 @@ foreach rounding = ["", "s", "d"] in {
foreach size = [32, 64] in
def F # !toupper(rounding) # MOV # size # fp_fp : MxFMove_FF<rounding, size>;
}
+// Direction
+defvar MxFMove_FP_EA = false;
+defvar MxFMove_EA_FP = true;
+
+// Encoding scheme for FPSYS <-> R/M
+class MxEncFSysMove<bit dir, MxEncMemOp EAEnc, string fsys_reg> {
+ dag Value = (ascend
+ (descend 0b1111,
+ /*COPROCESSOR ID*/0b001,
+ 0b000,
+ /*MODE + REGISTER*/
+ EAEnc.EA
+ ),
+ (descend 0b10, /*dir*/ dir,
+ /*REGISTER SELECT*/
+ (operand "$"#fsys_reg, 3, (encoder "encodeFPSYSSelect")),
+ 0b0000000000
+ )
+ );
+}
+
+// FPSYS <-> R
+class MxFMove_FSYS_R<string src_reg,
+ MxOpBundle SrcOpnd = !cast<MxOpBundle>("MxOp32AddrMode_"#src_reg),
+ MxOpBundle DstOpnd = !cond(!eq(src_reg, "d"): MxOp32AddrMode_fpcs,
+ !eq(src_reg, "a"): MxOp32AddrMode_fpi),
+ MxEncMemOp SrcEnc = !cast<MxEncMemOp>("MxMoveSrcOpEnc_"#src_reg)>
+ : MxFMove<"l", (outs DstOpnd.Op:$dst), (ins SrcOpnd.Op:$src),
+ [(null_frag)]> {
+ let Inst = MxEncFSysMove<MxFMove_FP_EA, SrcEnc, "dst">.Value;
+}
+
+class MxFMove_R_FSYS<string dst_reg,
+ MxOpBundle SrcOpnd = !cond(!eq(dst_reg, "d"): MxOp32AddrMode_fpcs,
+ !eq(dst_reg, "a"): MxOp32AddrMode_fpi),
+ MxOpBundle DstOpnd = !cast<MxOpBundle>("MxOp32AddrMode_"#dst_reg),
+ MxEncMemOp DstEnc = !cast<MxEncMemOp>("MxMoveDstOpEnc_"#dst_reg)>
+ : MxFMove<"l", (outs DstOpnd.Op:$dst), (ins SrcOpnd.Op:$src),
+ [(null_frag)]> {
+ let Inst = MxEncFSysMove<MxFMove_EA_FP, DstEnc, "src">.Value;
+}
+
+def FMOVE32fpcs_d : MxFMove_FSYS_R<"d">;
+def FMOVE32d_fpcs : MxFMove_R_FSYS<"d">;
+def FMOVE32fpi_a : MxFMove_FSYS_R<"a">;
+def FMOVE32a_fpi : MxFMove_R_FSYS<"a">;
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index 1803a936701f..d56fef9e9029 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -361,6 +361,7 @@ bool M68kInstrInfo::ExpandMOVX_RR(MachineInstrBuilder &MIB, MVT MVTDst,
assert(RCDst && RCSrc && "Wrong use of MOVX_RR");
assert(RCDst != RCSrc && "You cannot use the same Reg Classes with MOVX_RR");
+ (void)RCSrc;
// We need to find the super source register that matches the size of Dst
unsigned SSrc = RI.getMatchingMegaReg(Src, RCDst);
@@ -407,6 +408,7 @@ bool M68kInstrInfo::ExpandMOVSZX_RR(MachineInstrBuilder &MIB, bool IsSigned,
assert(RCDst && RCSrc && "Wrong use of MOVSX_RR");
assert(RCDst != RCSrc && "You cannot use the same Reg Classes with MOVSX_RR");
+ (void)RCSrc;
// We need to find the super source register that matches the size of Dst
unsigned SSrc = RI.getMatchingMegaReg(Src, RCDst);
@@ -746,6 +748,7 @@ void M68kInstrInfo::storeRegToStackSlot(
const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) &&
"Stack slot is too small to store");
+ (void)MFI;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, TRI, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -763,13 +766,14 @@ void M68kInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) &&
"Stack slot is too small to load");
+ (void)MFI;
unsigned Opc = getLoadRegOpcode(DstReg, RC, TRI, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
M68k::addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DstReg), FrameIndex);
}
-/// Return a virtual register initialized with the the global base register
+/// Return a virtual register initialized with the global base register
/// value. Output instructions required to initialize the register in the
/// function entry block, if necessary.
///
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.h
index b6057a39bc82..577967f2fdfc 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.h
@@ -324,7 +324,7 @@ public:
bool ExpandMOVEM(MachineInstrBuilder &MIB, const MCInstrDesc &Desc,
bool IsRM) const;
- /// Return a virtual register initialized with the the global base register
+ /// Return a virtual register initialized with the global base register
/// value. Output instructions required to initialize the register in the
/// function entry block, if necessary.
unsigned getGlobalBaseReg(MachineFunction *MF) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td
index 6d3370d5ee90..dc66e103361a 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td
@@ -186,6 +186,7 @@ let RenderMethod = "addRegOperands", SuperClasses = [MxRegClass]in {
def MxDRegClass : MxOpClass<"DReg">;
def MxFPDRegClass : MxOpClass<"FPDReg">;
+ def MxFPCRegClass : MxOpClass<"FPCReg">;
}
class MxOperand<ValueType vt, MxSize size, string letter, RegisterClass rc, dag pat = (null_frag)> {
@@ -242,6 +243,12 @@ let ParserMatchClass = MxFPDRegClass in {
def MxFPR80 : MxRegOp<f80, FPDR80, MxSizeF80, "fp">;
}
+// FLOATING POINT SYSTEM CONTROL REGISTER
+let ParserMatchClass = MxFPCRegClass in {
+ def MxFPCSR : MxRegOp<i32, FPCSC, MxSize32, "fpcs">;
+ def MxFPIR : MxRegOp<i32, FPIC, MxSize32, "fpi">;
+}
+
class MxMemOp<dag ops, MxSize size, string letter,
string printMethod = "printOperand",
AsmOperandClass parserMatchClass = ImmAsmOperand>
@@ -727,6 +734,9 @@ foreach size = [32, 64, 80] in
def MxOp#size#AddrMode_fpr
: MxOpBundle<size, !cast<MxOperand>("MxFPR"#size), ?>;
+def MxOp32AddrMode_fpcs : MxOpBundle<32, MxFPCSR, ?>;
+def MxOp32AddrMode_fpi : MxOpBundle<32, MxFPIR, ?>;
+
class MxType8Class<string rLet, MxOperand reg>
: MxType<i8, "b", "", rLet, reg,
MxARI8, MxCP_ARI,
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kRegisterInfo.td
index 1567bcbb7319..45b492eba4ec 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kRegisterInfo.td
@@ -125,7 +125,10 @@ let CopyCost = -1 in {
def CCRC : MxRegClass<[i8], 16, (add CCR)>;
def SRC : MxRegClass<[i16], 16, (add SR)>;
- def FPCR : MxRegClass<[i32], 32, (add FPC, FPS, FPIAR)>;
+ // Float Point System Control Registers
+ def FPIC : MxRegClass<[i32], 32, (add FPIAR)>;
+ def FPCSC : MxRegClass<[i32], 32, (add FPC, FPS)>;
+ def FPSYSC : MxRegClass<[i32], 32, (add FPCSC, FPIC)>;
}
let isAllocatable = 0 in {
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp
index 4e59e27bef8c..af8cb9a83a05 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp
@@ -101,7 +101,7 @@ M68kTargetMachine::M68kTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
Options, getEffectiveRelocModel(TT, RM),
::getEffectiveCodeModel(CM, JIT), OL),
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.h
index e204f639c4f1..4ff4c4cb46b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.h
@@ -38,7 +38,7 @@ public:
M68kTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~M68kTargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
index b66557ec6c3a..1b85e6df379c 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
@@ -41,7 +41,7 @@ namespace {
class M68kAsmBackend : public MCAsmBackend {
public:
- M68kAsmBackend(const Target &T) : MCAsmBackend(support::big) {}
+ M68kAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::big) {}
unsigned getNumFixupKinds() const override { return 0; }
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h
index e52b4961e3c8..1376b06bef6f 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h
@@ -82,11 +82,11 @@ template <typename value_t> value_t swapWord(value_t Val) {
const unsigned NumWords = sizeof(Val) / 2;
if (NumWords <= 1)
return Val;
- Val = support::endian::byte_swap(Val, support::big);
+ Val = support::endian::byte_swap(Val, llvm::endianness::big);
value_t NewVal = 0;
for (unsigned i = 0U; i != NumWords; ++i) {
uint16_t Part = (Val >> (i * 16)) & 0xFFFF;
- Part = support::endian::byte_swap(Part, support::big);
+ Part = support::endian::byte_swap(Part, llvm::endianness::big);
NewVal |= (Part << (i * 16));
}
return NewVal;
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
index 7fc5395671cf..e6bc3af6e191 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
@@ -59,6 +59,10 @@ class M68kMCCodeEmitter : public MCCodeEmitter {
APInt &Value, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ void encodeFPSYSSelect(const MCInst &MI, unsigned OpIdx, unsigned InsertPos,
+ APInt &Value, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
public:
M68kMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), Ctx(ctx) {}
@@ -172,6 +176,26 @@ void M68kMCCodeEmitter::encodePCRelImm(const MCInst &MI, unsigned OpIdx,
}
}
+void M68kMCCodeEmitter::encodeFPSYSSelect(const MCInst &MI, unsigned OpIdx,
+ unsigned InsertPos, APInt &Value,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ MCRegister FPSysReg = MI.getOperand(OpIdx).getReg();
+ switch (FPSysReg) {
+ case M68k::FPC:
+ Value = 0b100;
+ break;
+ case M68k::FPS:
+ Value = 0b010;
+ break;
+ case M68k::FPIAR:
+ Value = 0b001;
+ break;
+ default:
+ llvm_unreachable("Unrecognized FPSYS register");
+ }
+}
+
void M68kMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &Op,
unsigned InsertPos, APInt &Value,
SmallVectorImpl<MCFixup> &Fixups,
@@ -203,14 +227,13 @@ void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI,
SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- unsigned Opcode = MI.getOpcode();
-
- LLVM_DEBUG(dbgs() << "EncodeInstruction: " << MCII.getName(Opcode) << "("
- << Opcode << ")\n");
+ LLVM_DEBUG(dbgs() << "EncodeInstruction: " << MCII.getName(MI.getOpcode())
+ << "(" << MI.getOpcode() << ")\n");
+ (void)MCII;
// Try using the new method first.
APInt EncodedInst(16, 0U);
- APInt Scratch(16, 0U);
+ APInt Scratch(64, 0U); // One APInt word is enough.
getBinaryCodeForInstr(MI, Fixups, EncodedInst, Scratch, STI);
ArrayRef<uint64_t> Data(EncodedInst.getRawData(), EncodedInst.getNumWords());
@@ -218,7 +241,7 @@ void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI,
for (uint64_t Word : Data) {
for (int i = 0; i < 4 && InstSize > 0; ++i, InstSize -= 16) {
support::endian::write<uint16_t>(CB, static_cast<uint16_t>(Word),
- support::big);
+ llvm::endianness::big);
Word >>= 16;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index f2c90f565863..f3213ee72d9a 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -45,10 +45,9 @@ class MSP430AsmParser : public MCTargetAsmParser {
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -290,30 +289,28 @@ bool MSP430AsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
static unsigned MatchRegisterName(StringRef Name);
static unsigned MatchRegisterAltName(StringRef Name);
-bool MSP430AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool MSP430AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- switch (tryParseRegister(RegNo, StartLoc, EndLoc)) {
- case MatchOperand_ParseFail:
+ ParseStatus Res = tryParseRegister(Reg, StartLoc, EndLoc);
+ if (Res.isFailure())
return Error(StartLoc, "invalid register name");
- case MatchOperand_Success:
+ if (Res.isSuccess())
return false;
- case MatchOperand_NoMatch:
+ if (Res.isNoMatch())
return true;
- }
- llvm_unreachable("unknown match result type");
+ llvm_unreachable("unknown parse status");
}
-OperandMatchResultTy MSP430AsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus MSP430AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
if (getLexer().getKind() == AsmToken::Identifier) {
auto Name = getLexer().getTok().getIdentifier().lower();
- RegNo = MatchRegisterName(Name);
- if (RegNo == MSP430::NoRegister) {
- RegNo = MatchRegisterAltName(Name);
- if (RegNo == MSP430::NoRegister)
- return MatchOperand_NoMatch;
+ Reg = MatchRegisterName(Name);
+ if (Reg == MSP430::NoRegister) {
+ Reg = MatchRegisterAltName(Name);
+ if (Reg == MSP430::NoRegister)
+ return ParseStatus::NoMatch;
}
AsmToken const &T = getParser().getTok();
@@ -321,10 +318,10 @@ OperandMatchResultTy MSP430AsmParser::tryParseRegister(MCRegister &RegNo,
EndLoc = T.getEndLoc();
getLexer().Lex(); // eat register token
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
bool MSP430AsmParser::parseJccInstruction(ParseInstructionInfo &Info,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index a667f457bd03..bd9f6279445a 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -34,7 +34,7 @@ class MSP430AsmBackend : public MCAsmBackend {
public:
MSP430AsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI)
- : MCAsmBackend(support::little), OSABI(OSABI) {}
+ : MCAsmBackend(llvm::endianness::little), OSABI(OSABI) {}
~MSP430AsmBackend() override = default;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
index 985906a35331..51428552d8af 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
@@ -94,7 +94,8 @@ void MSP430MCCodeEmitter::encodeInstruction(const MCInst &MI,
size_t WordCount = Size / 2;
while (WordCount--) {
- support::endian::write(CB, (uint16_t)BinaryOpCode, support::little);
+ support::endian::write(CB, (uint16_t)BinaryOpCode,
+ llvm::endianness::little);
BinaryOpCode >>= 16;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h
index 75fa398adc02..60685b6704ba 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h
@@ -39,7 +39,7 @@ class MSP430TargetMachine;
class PassRegistry;
FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
FunctionPass *createMSP430BranchSelectionPass();
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 88f072c78036..660861a5d521 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -95,7 +95,7 @@ namespace {
MSP430DAGToDAGISel() = delete;
- MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, TM, OptLevel) {}
private:
@@ -103,7 +103,8 @@ namespace {
bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -128,11 +129,10 @@ INITIALIZE_PASS(MSP430DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
/// MSP430-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new MSP430DAGToDAGISel(TM, OptLevel);
+ CodeGenOptLevel OptLevel) {
+ return new MSP430DAGToDAGISel(TM, OptLevel);
}
-
/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode.
/// These wrap things that will resolve down into a symbol reference. If no
/// match is possible, this returns true, otherwise it returns false.
@@ -282,13 +282,13 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
return true;
}
-bool MSP430DAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool MSP430DAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintID) {
default: return true;
- case InlineAsm::Constraint_m: // memory
+ case InlineAsm::ConstraintCode::m: // memory
if (!SelectAddr(Op, Op0, Op1))
return true;
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.td
index 0ff9763e4c93..714a5d4f5116 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -305,12 +305,13 @@ def POP16r : IForm16<0b0100, DstReg, SrcPostInc, 2,
let rs = 1;
}
-let mayStore = 1 in
+let mayStore = 1 in {
def PUSH8r : II8r<0b100, (outs), (ins GR8:$rs), "push.b\t$rs", []>;
def PUSH16r : II16r<0b100, (outs), (ins GR16:$rs), "push\t$rs", []>;
def PUSH16c : II16c<0b100, (outs), (ins cg16imm:$imm), "push\t$imm", []>;
def PUSH16i : II16i<0b100, (outs), (ins i16imm:$imm), "push\t$imm", []>;
}
+}
//===----------------------------------------------------------------------===//
// Move Instructions
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 4ee40f47b8c1..9cbc20e45179 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -135,6 +135,9 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.setDesc(TII.get(MSP430::MOV16rr));
MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+ // Remove the now unused Offset operand.
+ MI.removeOperand(FIOperandNum + 1);
+
if (Offset == 0)
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 2efeeb5ee63d..39e0658eb70d 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -43,7 +43,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
Options, getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h
index 30e1f7f4a80d..f9af9a7e26f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -31,7 +31,7 @@ public:
MSP430TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~MSP430TargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index b4f99788410b..3c673ae938fd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -179,10 +179,9 @@ class MipsAsmParser : public MCTargetAsmParser {
bool MatchingInlineAsm) override;
/// Parse a register as used in CFI directives
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool parseParenSuffix(StringRef Name, OperandVector &Operands);
@@ -6250,7 +6249,7 @@ int MipsAsmParser::matchFPURegisterName(StringRef Name) {
}
int MipsAsmParser::matchFCCRegisterName(StringRef Name) {
- if (Name.startswith("fcc")) {
+ if (Name.starts_with("fcc")) {
StringRef NumString = Name.substr(3);
unsigned IntVal;
if (NumString.getAsInteger(10, IntVal))
@@ -6263,7 +6262,7 @@ int MipsAsmParser::matchFCCRegisterName(StringRef Name) {
}
int MipsAsmParser::matchACRegisterName(StringRef Name) {
- if (Name.startswith("ac")) {
+ if (Name.starts_with("ac")) {
StringRef NumString = Name.substr(2);
unsigned IntVal;
if (NumString.getAsInteger(10, IntVal))
@@ -6385,14 +6384,13 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return true;
}
-bool MipsAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool MipsAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
+ return !tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
}
-OperandMatchResultTy MipsAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus MipsAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
ParseStatus Res = parseAnyRegister(Operands);
if (Res.isSuccess()) {
@@ -6407,15 +6405,14 @@ OperandMatchResultTy MipsAsmParser::tryParseRegister(MCRegister &RegNo,
// register is a parse error.
if (Operand.isGPRAsmReg()) {
// Resolve to GPR32 or GPR64 appropriately.
- RegNo = isGP64bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg();
+ Reg = isGP64bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg();
}
- return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch
- : MatchOperand_Success;
+ return (Reg == (unsigned)-1) ? ParseStatus::NoMatch : ParseStatus::Success;
}
assert(Operands.size() == 0);
- return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success;
+ return (Reg == (unsigned)-1) ? ParseStatus::NoMatch : ParseStatus::Success;
}
bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
@@ -6570,7 +6567,7 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
if (Expr->getKind() == MCExpr::SymbolRef) {
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
StringRef DefSymbol = Ref->getSymbol().getName();
- if (DefSymbol.startswith("$")) {
+ if (DefSymbol.starts_with("$")) {
ParseStatus Res =
matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S);
if (Res.isSuccess()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 3e0d51ef887c..94e3d28a4186 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -7,10 +7,9 @@
//===----------------------------------------------------------------------===//
#include "MipsABIInfo.h"
-#include "MipsRegisterInfo.h"
+#include "Mips.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/LowLevelType.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -57,11 +56,11 @@ unsigned MipsABIInfo::GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const {
MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
- if (Options.getABIName().startswith("o32"))
+ if (Options.getABIName().starts_with("o32"))
return MipsABIInfo::O32();
- if (Options.getABIName().startswith("n32"))
+ if (Options.getABIName().starts_with("n32"))
return MipsABIInfo::N32();
- if (Options.getABIName().startswith("n64"))
+ if (Options.getABIName().starts_with("n64"))
return MipsABIInfo::N64();
if (TT.getEnvironment() == llvm::Triple::GNUABIN32)
return MipsABIInfo::N32();
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 71f333d0d0c3..fc95b61fd4df 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -281,7 +281,7 @@ void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
bool microMipsLEByteOrder = needsMMLEByteOrder((unsigned) Kind);
for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = Endian == support::little
+ unsigned Idx = Endian == llvm::endianness::little
? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i)
: (FullSize - 1 - i);
CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
@@ -293,7 +293,7 @@ void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
// Write out the fixed up bytes back to the code/data bits.
for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = Endian == support::little
+ unsigned Idx = Endian == llvm::endianness::little
? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i)
: (FullSize - 1 - i);
Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff);
@@ -519,7 +519,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
- if (Endian == support::little)
+ if (Endian == llvm::endianness::little)
return LittleEndianInfos[Kind - FirstTargetFixupKind];
return BigEndianInfos[Kind - FirstTargetFixupKind];
}
@@ -544,7 +544,8 @@ bool MipsAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
- const MCValue &Target) {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) {
if (Fixup.getKind() >= FirstLiteralRelocationKind)
return true;
const unsigned FixupKind = Fixup.getKind();
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 749223a6d01b..2dd68b601238 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -32,7 +32,8 @@ class MipsAsmBackend : public MCAsmBackend {
public:
MipsAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT,
StringRef CPU, bool N32)
- : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big),
+ : MCAsmBackend(TT.isLittleEndian() ? llvm::endianness::little
+ : llvm::endianness::big),
TheTriple(TT), IsN32(N32) {}
std::unique_ptr<MCObjectTargetWriter>
@@ -67,7 +68,8 @@ public:
const MCSubtargetInfo *STI) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
bool isMicroMips(const MCSymbol *Sym) const override;
}; // class MipsAsmBackend
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 84e8c9f071fb..181b82f14bfe 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -62,7 +62,7 @@ public:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
unsigned Type) const override;
void sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) override;
@@ -505,14 +505,15 @@ void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
Relocs[CopyTo++] = R.R;
}
-bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCValue &Val,
+ const MCSymbol &Sym,
unsigned Type) const {
// If it's a compound relocation for N64 then we need the relocation if any
// sub-relocation needs it.
if (!isUInt<8>(Type))
- return needsRelocateWithSymbol(Sym, Type & 0xff) ||
- needsRelocateWithSymbol(Sym, (Type >> 8) & 0xff) ||
- needsRelocateWithSymbol(Sym, (Type >> 16) & 0xff);
+ return needsRelocateWithSymbol(Val, Sym, Type & 0xff) ||
+ needsRelocateWithSymbol(Val, Sym, (Type >> 8) & 0xff) ||
+ needsRelocateWithSymbol(Val, Sym, (Type >> 16) & 0xff);
switch (Type) {
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
index 72590ab81a3e..1518a539782e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
@@ -11,12 +11,13 @@
//===----------------------------------------------------------------------===//
#include "MipsInstPrinter.h"
-#include "MipsInstrInfo.h"
+#include "Mips.h"
#include "MipsMCExpr.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -72,8 +73,8 @@ const char* Mips::MipsFCCToString(Mips::CondCode CC) {
}
void MipsInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
- OS << markup("<reg:") << '$' << StringRef(getRegisterName(Reg)).lower()
- << markup(">");
+ markup(OS, Markup::Register)
+ << '$' << StringRef(getRegisterName(Reg)).lower();
}
void MipsInstPrinter::printInst(const MCInst *MI, uint64_t Address,
@@ -133,7 +134,7 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
if (Op.isImm()) {
- O << markup("<imm:") << formatImm(Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << formatImm(Op.getImm());
return;
}
@@ -149,9 +150,9 @@ void MipsInstPrinter::printJumpOperand(const MCInst *MI, unsigned OpNo,
return printOperand(MI, OpNo, STI, O);
if (PrintBranchImmAsAddress)
- O << markup("<imm:") << formatHex(Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << formatHex(Op.getImm());
else
- O << markup("<imm:") << formatImm(Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << formatImm(Op.getImm());
}
void MipsInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
@@ -168,9 +169,9 @@ void MipsInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
Target &= 0xffffffff;
else if (STI.hasFeature(Mips::FeatureMips16))
Target &= 0xffff;
- O << markup("<imm:") << formatHex(Target) << markup(">");
+ markup(O, Markup::Immediate) << formatHex(Target);
} else {
- O << markup("<imm:") << formatImm(Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << formatImm(Op.getImm());
}
}
@@ -183,7 +184,7 @@ void MipsInstPrinter::printUImm(const MCInst *MI, int opNum,
Imm -= Offset;
Imm &= (1 << Bits) - 1;
Imm += Offset;
- O << markup("<imm:") << formatImm(Imm) << markup(">");
+ markup(O, Markup::Immediate) << formatImm(Imm);
return;
}
@@ -212,12 +213,11 @@ void MipsInstPrinter::printMemOperand(const MCInst *MI, int opNum,
break;
}
- O << markup("<mem:");
+ WithMarkup M = markup(O, Markup::Memory);
printOperand(MI, opNum + 1, STI, O);
O << "(";
printOperand(MI, opNum, STI, O);
O << ")";
- O << markup(">");
}
void MipsInstPrinter::printMemOperandEA(const MCInst *MI, int opNum,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 1c7440dfbe91..73ee44eec22c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -26,6 +26,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -127,31 +128,12 @@ void MipsMCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const {
OS << (char)C;
}
-void MipsMCCodeEmitter::emitInstruction(uint64_t Val, unsigned Size,
- const MCSubtargetInfo &STI,
- raw_ostream &OS) const {
- // Output the instruction encoding in little endian byte order.
- // Little-endian byte ordering:
- // mips32r2: 4 | 3 | 2 | 1
- // microMIPS: 2 | 1 | 4 | 3
- if (IsLittleEndian && Size == 4 && isMicroMips(STI)) {
- emitInstruction(Val >> 16, 2, STI, OS);
- emitInstruction(Val, 2, STI, OS);
- } else {
- for (unsigned i = 0; i < Size; ++i) {
- unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
- EmitByte((Val >> Shift) & 0xff, OS);
- }
- }
-}
-
/// encodeInstruction - Emit the instruction.
/// Size the instruction with Desc.getSize().
-void MipsMCCodeEmitter::
-encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const
-{
+void MipsMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// Non-pseudo instructions that get changed for direct object
// only based on operand values.
// If this list of instructions get much longer we will move
@@ -224,7 +206,16 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (!Size)
llvm_unreachable("Desc.getSize() returns 0");
- emitInstruction(Binary, Size, STI, OS);
+ auto Endian =
+ IsLittleEndian ? llvm::endianness::little : llvm::endianness::big;
+ if (Size == 2) {
+ support::endian::write<uint16_t>(CB, Binary, Endian);
+ } else if (IsLittleEndian && isMicroMips(STI)) {
+ support::endian::write<uint16_t>(CB, Binary >> 16, Endian);
+ support::endian::write<uint16_t>(CB, Binary & 0xffff, Endian);
+ } else {
+ support::endian::write<uint32_t>(CB, Binary, Endian);
+ }
}
/// getBranchTargetOpValue - Return binary encoding of the branch
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 16e94c723b34..871afd9eb958 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -44,10 +44,7 @@ public:
void EmitByte(unsigned char C, raw_ostream &OS) const;
- void emitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI,
- raw_ostream &OS) const;
-
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index d0aa14a1b724..27d7f0f261d1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -16,10 +16,10 @@
#include "MipsInstPrinter.h"
#include "MipsMCExpr.h"
#include "MipsMCTargetDesc.h"
-#include "MipsTargetObjectFile.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
index 4f4e3f3f2ed7..10c953bb344a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -30,7 +30,6 @@
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
-#include <cassert>
#include <cstdint>
#include <vector>
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index c8c9612b75e7..0be9b94187a2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -220,6 +220,6 @@ bool Mips16DAGToDAGISel::trySelect(SDNode *Node) {
}
FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new Mips16DAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
index 1ef194029f50..c6d3bde68806 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -19,7 +19,7 @@ namespace llvm {
class Mips16DAGToDAGISel : public MipsDAGToDAGISel {
public:
- explicit Mips16DAGToDAGISel(MipsTargetMachine &TM, CodeGenOpt::Level OL)
+ explicit Mips16DAGToDAGISel(MipsTargetMachine &TM, CodeGenOptLevel OL)
: MipsDAGToDAGISel(TM, OL) {}
private:
@@ -48,7 +48,7 @@ private:
};
FunctionPass *createMips16ISelDag(MipsTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index ea35608e6a7b..d97f59b5b2c7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -127,19 +127,19 @@ Mips16TargetLowering::Mips16TargetLowering(const MipsTargetMachine &TM,
if (!Subtarget.useSoftFloat())
setMips16HardFloatLibCalls();
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall);
setOperationAction(ISD::ROTR, MVT::i32, Expand);
setOperationAction(ISD::ROTR, MVT::i64, Expand);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index 20185e83286d..a834188e3bcc 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -341,7 +341,7 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
int SpReg = 0;
rs.enterBasicBlockEnd(MBB);
- rs.backward(II);
+ rs.backward(std::next(II));
//
// We need to know which registers can be used, in the case where there
// are not enough free registers. We exclude all registers that
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
index bd62a56d3008..ac679c4c01bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -894,13 +894,13 @@ def : MipsPat<(atomic_load_32 addr:$a), (LW64 addr:$a)>, ISA_MIPS3, GPR_64;
def : MipsPat<(atomic_load_64 addr:$a), (LD addr:$a)>, ISA_MIPS3, GPR_64;
// Atomic store patterns.
-def : MipsPat<(atomic_store_8 addr:$a, GPR64:$v), (SB64 GPR64:$v, addr:$a)>,
+def : MipsPat<(atomic_store_8 GPR64:$v, addr:$a), (SB64 GPR64:$v, addr:$a)>,
ISA_MIPS3, GPR_64;
-def : MipsPat<(atomic_store_16 addr:$a, GPR64:$v), (SH64 GPR64:$v, addr:$a)>,
+def : MipsPat<(atomic_store_16 GPR64:$v, addr:$a), (SH64 GPR64:$v, addr:$a)>,
ISA_MIPS3, GPR_64;
-def : MipsPat<(atomic_store_32 addr:$a, GPR64:$v), (SW64 GPR64:$v, addr:$a)>,
+def : MipsPat<(atomic_store_32 GPR64:$v, addr:$a), (SW64 GPR64:$v, addr:$a)>,
ISA_MIPS3, GPR_64;
-def : MipsPat<(atomic_store_64 addr:$a, GPR64:$v), (SD GPR64:$v, addr:$a)>,
+def : MipsPat<(atomic_store_64 GPR64:$v, addr:$a), (SD GPR64:$v, addr:$a)>,
ISA_MIPS3, GPR_64;
// Patterns used for matching away redundant sign extensions.
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 26df40e3b13c..30ff82dd911c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -569,8 +569,8 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
if (!FlagsOP.isImm())
return true;
- unsigned Flags = FlagsOP.getImm();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag Flags(FlagsOP.getImm());
+ const unsigned NumVals = Flags.getNumOperandRegisters();
// Number of registers represented by this operand. We are looking
// for 2 for 32 bit mode and 1 for 64 bit mode.
if (NumVals != 2) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
index 4d6ca5ac2bcc..1cd360fe30f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -93,13 +93,14 @@ public:
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override;
+ const CCValAssign &VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override;
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override;
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs,
@@ -129,7 +130,7 @@ private:
void MipsIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign VA) {
+ const CCValAssign &VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
@@ -149,10 +150,9 @@ Register MipsIncomingValueHandler::getStackAddress(uint64_t Size,
return MIRBuilder.buildFrameIndex(LLT::pointer(0, 32), FI).getReg(0);
}
-void MipsIncomingValueHandler::assignValueToAddress(Register ValVReg,
- Register Addr, LLT MemTy,
- MachinePointerInfo &MPO,
- CCValAssign &VA) {
+void MipsIncomingValueHandler::assignValueToAddress(
+ Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO,
+ const CCValAssign &VA) {
MachineFunction &MF = MIRBuilder.getMF();
auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy,
inferAlignFromPtrInfo(MF, MPO));
@@ -185,7 +185,7 @@ MipsIncomingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
markPhysRegUsed(VALo.getLocReg());
markPhysRegUsed(VAHi.getLocReg());
- return 2;
+ return 1;
}
namespace {
@@ -200,14 +200,15 @@ public:
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override;
+ const CCValAssign &VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override;
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override;
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk) override;
@@ -218,7 +219,7 @@ private:
void MipsOutgoingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign VA) {
+ const CCValAssign &VA) {
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
@@ -240,10 +241,9 @@ Register MipsOutgoingValueHandler::getStackAddress(uint64_t Size,
return AddrReg.getReg(0);
}
-void MipsOutgoingValueHandler::assignValueToAddress(Register ValVReg,
- Register Addr, LLT MemTy,
- MachinePointerInfo &MPO,
- CCValAssign &VA) {
+void MipsOutgoingValueHandler::assignValueToAddress(
+ Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO,
+ const CCValAssign &VA) {
MachineFunction &MF = MIRBuilder.getMF();
uint64_t LocMemOffset = VA.getLocMemOffset();
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCombine.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCombine.td
index cb1594421cc5..d757ef9ee0f0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCombine.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCombine.td
@@ -8,6 +8,6 @@
include "llvm/Target/GlobalISel/Combine.td"
-def MipsPostLegalizerCombiner: GICombinerHelper<
+def MipsPostLegalizerCombiner: GICombiner<
"MipsPostLegalizerCombinerImpl", []> {
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 8aa5f769c903..cb98c04ff4e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -610,7 +610,8 @@ bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
continue;
// Delay slot filling is disabled at -O0, or in microMIPS32R6.
- if (!DisableDelaySlotFiller && (TM->getOptLevel() != CodeGenOpt::None) &&
+ if (!DisableDelaySlotFiller &&
+ (TM->getOptLevel() != CodeGenOptLevel::None) &&
!(InMicroMipsMode && STI.hasMips32r6())) {
bool Filled = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 7266dfb206a8..77ce8ba890a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -308,16 +308,16 @@ void MipsDAGToDAGISel::Select(SDNode *Node) {
SelectCode(Node);
}
-bool MipsDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool MipsDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
// All memory constraints can at least accept raw pointers.
switch(ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_m:
- case InlineAsm::Constraint_R:
- case InlineAsm::Constraint_ZC:
+ case InlineAsm::ConstraintCode::m:
+ case InlineAsm::ConstraintCode::R:
+ case InlineAsm::ConstraintCode::ZC:
OutOps.push_back(Op);
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
index d13efdaab2b6..e41cb08712ca 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -34,7 +34,7 @@ public:
MipsDAGToDAGISel() = delete;
- explicit MipsDAGToDAGISel(MipsTargetMachine &TM, CodeGenOpt::Level OL)
+ explicit MipsDAGToDAGISel(MipsTargetMachine &TM, CodeGenOptLevel OL)
: SelectionDAGISel(ID, TM, OL), Subtarget(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -141,7 +141,7 @@ private:
virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 3c69ec4912b1..a0cab8024386 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -2950,8 +2950,6 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
Reg = State.AllocateReg(IntRegs);
LocVT = MVT::i32;
} else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) {
- LocVT = MVT::i32;
-
// Allocate int register and shadow next int register. If first
// available register is Mips::A1 or Mips::A3, shadow it too.
Reg = State.AllocateReg(IntRegs);
@@ -2959,6 +2957,8 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
Reg = State.AllocateReg(IntRegs);
if (Reg) {
+ LocVT = MVT::i32;
+
State.addLoc(
CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
MCRegister HiReg = State.AllocateReg(IntRegs);
@@ -3725,15 +3725,6 @@ SDValue MipsTargetLowering::LowerFormalArguments(
assert(!VA.needsCustom() && "unexpected custom memory argument");
- if (ABI.IsO32()) {
- // We ought to be able to use LocVT directly but O32 sets it to i32
- // when allocating floating point values to integer registers.
- // This shouldn't influence how we load the value into registers unless
- // we are targeting softfloat.
- if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat())
- LocVT = VA.getValVT();
- }
-
// Only arguments pased on the stack should make it here.
assert(VA.isMemLoc());
@@ -4073,7 +4064,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
RC = TRI->getRegClass(Prefix == "hi" ?
Mips::HI32RegClassID : Mips::LO32RegClassID);
return std::make_pair(*(RC->begin()), RC);
- } else if (Prefix.startswith("$msa")) {
+ } else if (Prefix.starts_with("$msa")) {
// Parse $msa(ir|csr|access|save|modify|request|map|unmap)
// No numeric characters follow the name.
@@ -4200,14 +4191,15 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue>&Ops,
- SelectionDAG &DAG) const {
+ StringRef Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Result;
// Only support length 1 constraints for now.
- if (Constraint.length() > 1) return;
+ if (Constraint.size() > 1)
+ return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
index 8614c4d3abe5..c17e51f44651 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -31,9 +31,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
-#include <cassert>
#include <deque>
-#include <string>
#include <utility>
#include <vector>
@@ -641,19 +639,18 @@ class TargetRegisterClass;
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
/// true it means one of the asm constraint of the inline asm instruction
/// being processed is 'm'.
- void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned
+ InlineAsm::ConstraintCode
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "o")
- return InlineAsm::Constraint_o;
+ return InlineAsm::ConstraintCode::o;
if (ConstraintCode == "R")
- return InlineAsm::Constraint_R;
+ return InlineAsm::ConstraintCode::R;
if (ConstraintCode == "ZC")
- return InlineAsm::Constraint_ZC;
+ return InlineAsm::ConstraintCode::ZC;
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
index 973f40a21dee..75270857ea13 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -3357,11 +3357,11 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsPat<(atomic_load_32 addr:$a), (LW addr:$a)>, ISA_MIPS1;
// Atomic store patterns.
- def : MipsPat<(atomic_store_8 addr:$a, GPR32:$v), (SB GPR32:$v, addr:$a)>,
+ def : MipsPat<(atomic_store_8 GPR32:$v, addr:$a), (SB GPR32:$v, addr:$a)>,
ISA_MIPS1;
- def : MipsPat<(atomic_store_16 addr:$a, GPR32:$v), (SH GPR32:$v, addr:$a)>,
+ def : MipsPat<(atomic_store_16 GPR32:$v, addr:$a), (SH GPR32:$v, addr:$a)>,
ISA_MIPS1;
- def : MipsPat<(atomic_store_32 addr:$a, GPR32:$v), (SW GPR32:$v, addr:$a)>,
+ def : MipsPat<(atomic_store_32 GPR32:$v, addr:$a), (SW GPR32:$v, addr:$a)>,
ISA_MIPS1;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index 2738a78e4a86..14f26201e6c0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -12,6 +12,7 @@
#include "MipsLegalizerInfo.h"
#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/IntrinsicsMips.h"
@@ -510,7 +511,7 @@ bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
const MipsRegisterInfo &TRI = *ST.getRegisterInfo();
const RegisterBankInfo &RBI = *ST.getRegBankInfo();
- switch (MI.getIntrinsicID()) {
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::trap: {
MachineInstr *Trap = MIRBuilder.buildInstr(Mips::TRAP);
MI.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index 7b58cb90ab87..b0642f3d1ff2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -21,7 +21,6 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index 7d9824aaf8ec..194b467fb1d8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
index ef847adbebc1..f23d1934684f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/RecyclingAllocator.h"
#include <cassert>
#include <utility>
-#include <vector>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp
index 4247bf9a4e3a..0578655f0443 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
@@ -40,27 +39,24 @@ namespace {
#include "MipsGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_TYPES
-class MipsPostLegalizerCombinerImpl : public GIMatchTableExecutor {
+class MipsPostLegalizerCombinerImpl : public Combiner {
protected:
- CombinerHelper &Helper;
const MipsPostLegalizerCombinerImplRuleConfig &RuleConfig;
-
const MipsSubtarget &STI;
- GISelChangeObserver &Observer;
- MachineIRBuilder &B;
- MachineFunction &MF;
-
- MachineRegisterInfo &MRI;
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
public:
MipsPostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const MipsPostLegalizerCombinerImplRuleConfig &RuleConfig,
- const MipsSubtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper);
+ const MipsSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
static const char *getName() { return "MipsPostLegalizerCombiner"; }
- bool tryCombineAll(MachineInstr &I) const;
+ bool tryCombineAll(MachineInstr &I) const override;
private:
#define GET_GICOMBINER_CLASS_MEMBERS
@@ -73,47 +69,19 @@ private:
#undef GET_GICOMBINER_IMPL
MipsPostLegalizerCombinerImpl::MipsPostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const MipsPostLegalizerCombinerImplRuleConfig &RuleConfig,
- const MipsSubtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper)
- : Helper(Helper), RuleConfig(RuleConfig), STI(STI), Observer(Observer),
- B(B), MF(B.getMF()), MRI(*B.getMRI()),
+ const MipsSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "MipsGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
{
}
-class MipsPostLegalizerCombinerInfo final : public CombinerInfo {
- GISelKnownBits *KB;
-
-public:
- MipsPostLegalizerCombinerImplRuleConfig RuleConfig;
-
- MipsPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- GISelKnownBits *KB, const MipsLegalizerInfo *LI)
- : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
- /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
- KB(KB) {
- if (!RuleConfig.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool MipsPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- const auto &STI = MI.getMF()->getSubtarget<MipsSubtarget>();
- CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB,
- /*DominatorTree*/ nullptr, LInfo);
- MipsPostLegalizerCombinerImpl Impl(RuleConfig, STI, Observer, B, Helper);
- Impl.setupMF(*MI.getMF(), KB);
- return Impl.tryCombineAll(MI);
-}
-
// Pass boilerplate
// ================
@@ -123,9 +91,7 @@ public:
MipsPostLegalizerCombiner(bool IsOptNone = false);
- StringRef getPassName() const override {
- return "MipsPostLegalizerCombiner";
- }
+ StringRef getPassName() const override { return "MipsPostLegalizerCombiner"; }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -133,6 +99,7 @@ public:
private:
bool IsOptNone;
+ MipsPostLegalizerCombinerImplRuleConfig RuleConfig;
};
} // end anonymous namespace
@@ -152,6 +119,9 @@ void MipsPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
MipsPostLegalizerCombiner::MipsPostLegalizerCombiner(bool IsOptNone)
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeMipsPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
}
bool MipsPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
@@ -161,17 +131,20 @@ bool MipsPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
auto *TPC = &getAnalysis<TargetPassConfig>();
const Function &F = MF.getFunction();
bool EnableOpt =
- MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
const MipsSubtarget &ST = MF.getSubtarget<MipsSubtarget>();
const MipsLegalizerInfo *LI =
static_cast<const MipsLegalizerInfo *>(ST.getLegalizerInfo());
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
- MipsPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
- F.hasMinSize(), KB, LI);
- Combiner C(PCInfo, TPC);
- return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+ MachineDominatorTree *MDT =
+ IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
+ CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
+ LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
+ MipsPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
+ RuleConfig, ST, MDT, LI);
+ return Impl.combineMachineInstrs();
}
char MipsPostLegalizerCombiner::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
index 237495a28f62..acf0d6312ef5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -11,10 +11,12 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsLegalizerInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -25,45 +27,62 @@
using namespace llvm;
namespace {
-class MipsPreLegalizerCombinerInfo : public CombinerInfo {
+struct MipsPreLegalizerCombinerInfo : public CombinerInfo {
public:
MipsPreLegalizerCombinerInfo()
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
/*EnableOptSize*/ false, /*EnableMinSize*/ false) {}
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
};
-bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, /*IsPreLegalize*/ true);
+class MipsPreLegalizerCombinerImpl : public Combiner {
+protected:
+ const MipsSubtarget &STI;
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
- switch (MI.getOpcode()) {
- default:
- return false;
- case TargetOpcode::G_MEMCPY_INLINE:
- return Helper.tryEmitMemcpyInline(MI);
- case TargetOpcode::G_LOAD:
- case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD: {
- // Don't attempt to combine non power of 2 loads or unaligned loads when
- // subtarget doesn't support them.
- auto MMO = *MI.memoperands_begin();
- const MipsSubtarget &STI = MI.getMF()->getSubtarget<MipsSubtarget>();
- if (!isPowerOf2_64(MMO->getSize()))
- return false;
- bool isUnaligned = MMO->getAlign() < MMO->getSize();
- if (!STI.systemSupportsUnalignedAccess() && isUnaligned)
+public:
+ MipsPreLegalizerCombinerImpl(MachineFunction &MF, CombinerInfo &CInfo,
+ const TargetPassConfig *TPC, GISelKnownBits &KB,
+ GISelCSEInfo *CSEInfo, const MipsSubtarget &STI,
+ MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo), STI(STI),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI) {}
+
+ static const char *getName() { return "MipsPreLegalizerCombiner"; }
+
+ void setupGeneratedPerFunctionState(MachineFunction &MF) override {
+ // TODO: TableGen-erate this class' impl.
+ }
+
+ bool tryCombineAll(MachineInstr &MI) const override {
+
+ switch (MI.getOpcode()) {
+ default:
return false;
+ case TargetOpcode::G_MEMCPY_INLINE:
+ return Helper.tryEmitMemcpyInline(MI);
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD: {
+ // Don't attempt to combine non power of 2 loads or unaligned loads when
+ // subtarget doesn't support them.
+ auto MMO = *MI.memoperands_begin();
+ const MipsSubtarget &STI = MI.getMF()->getSubtarget<MipsSubtarget>();
+ if (!isPowerOf2_64(MMO->getSize()))
+ return false;
+ bool isUnaligned = MMO->getAlign() < MMO->getSize();
+ if (!STI.systemSupportsUnalignedAccess() && isUnaligned)
+ return false;
+
+ return Helper.tryCombineExtendingLoads(MI);
+ }
+ }
- return Helper.tryCombineExtendingLoads(MI);
- }
+ return false;
}
-
- return false;
-}
+};
// Pass boilerplate
// ================
@@ -84,6 +103,8 @@ public:
void MipsPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
AU.setPreservesCFG();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -97,10 +118,17 @@ bool MipsPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
+
auto *TPC = &getAnalysis<TargetPassConfig>();
+ const MipsSubtarget &ST = MF.getSubtarget<MipsSubtarget>();
+ const MipsLegalizerInfo *LI =
+ static_cast<const MipsLegalizerInfo *>(ST.getLegalizerInfo());
+
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MipsPreLegalizerCombinerInfo PCInfo;
- Combiner C(PCInfo, TPC);
- return C.combineMachineInstrs(MF, nullptr);
+ MipsPreLegalizerCombinerImpl Impl(MF, PCInfo, TPC, *KB, /*CSEInfo*/ nullptr,
+ ST, /*MDT*/ nullptr, LI);
+ return Impl.combineMachineInstrs();
}
char MipsPreLegalizerCombiner::ID = 0;
@@ -108,6 +136,7 @@ INITIALIZE_PASS_BEGIN(MipsPreLegalizerCombiner, DEBUG_TYPE,
"Combine Mips machine instrs before legalization", false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_END(MipsPreLegalizerCombiner, DEBUG_TYPE,
"Combine Mips machine instrs before legalization", false,
false)
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index b4f4f3007c69..b38ca3f09ffb 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -32,7 +32,7 @@ enum PartialMappingIdx {
PMI_Min = PMI_GPR,
};
-RegisterBankInfo::PartialMapping PartMappings[]{
+const RegisterBankInfo::PartialMapping PartMappings[]{
{0, 32, GPRBRegBank},
{0, 32, FPRBRegBank},
{0, 64, FPRBRegBank},
@@ -47,7 +47,7 @@ enum ValueMappingIdx {
MSAIdx = 10
};
-RegisterBankInfo::ValueMapping ValueMappings[] = {
+const RegisterBankInfo::ValueMapping ValueMappings[] = {
// invalid
{nullptr, 0},
// up to 3 operands in GPRs
@@ -675,9 +675,15 @@ using InstListTy = GISelWorkList<4>;
namespace {
class InstManager : public GISelChangeObserver {
InstListTy &InstList;
+ MachineIRBuilder &B;
public:
- InstManager(InstListTy &Insts) : InstList(Insts) {}
+ InstManager(MachineIRBuilder &B, InstListTy &Insts) : InstList(Insts), B(B) {
+ assert(!B.isObservingChanges());
+ B.setChangeObserver(*this);
+ }
+
+ ~InstManager() { B.stopObservingChanges(); }
void createdInstr(MachineInstr &MI) override { InstList.insert(&MI); }
void erasingInstr(MachineInstr &MI) override {}
@@ -724,17 +730,18 @@ combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner,
}
void MipsRegisterBankInfo::applyMappingImpl(
- const OperandsMapper &OpdMapper) const {
+ MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
+ Builder.setInstrAndDebugLoc(MI);
+
InstListTy NewInstrs;
MachineFunction *MF = MI.getMF();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
const LegalizerInfo &LegInfo = *MF->getSubtarget().getLegalizerInfo();
- InstManager NewInstrObserver(NewInstrs);
- MachineIRBuilder B(MI, NewInstrObserver);
- LegalizerHelper Helper(*MF, NewInstrObserver, B);
- LegalizationArtifactCombiner ArtCombiner(B, MF->getRegInfo(), LegInfo);
+ InstManager NewInstrObserver(Builder, NewInstrs);
+ LegalizerHelper Helper(*MF, NewInstrObserver, Builder);
+ LegalizationArtifactCombiner ArtCombiner(Builder, MF->getRegInfo(), LegInfo);
switch (MI.getOpcode()) {
case TargetOpcode::G_LOAD:
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
index 9eca4fdab3d6..bc424b93f605 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
@@ -42,7 +42,8 @@ public:
/// G_UNMERGE and erase instructions that became dead in the process. We
/// manually assign bank to def operand of all new instructions that were
/// created in the process since they will not end up in RegBankSelect loop.
- void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+ void applyMappingImpl(MachineIRBuilder &Builder,
+ const OperandsMapper &OpdMapper) const override;
/// RegBankSelect determined that s64 operand is better to be split into two
/// s32 operands in gprb. Here we manually set register banks of def operands
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 1463304d35ce..b002f4cf3ae7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -70,8 +70,6 @@ public:
/// Return GPR register class.
virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
- bool supportsBackwardScavenger() const override { return true; }
-
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
int FrameIndex, uint64_t StackSize,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 138735d44df6..8c865afd4207 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -1377,17 +1377,17 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
return false;
}
-bool MipsSEDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool MipsSEDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SDValue Base, Offset;
switch(ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
// All memory constraints can at least accept raw pointers.
- case InlineAsm::Constraint_m:
- case InlineAsm::Constraint_o:
+ case InlineAsm::ConstraintCode::m:
+ case InlineAsm::ConstraintCode::o:
if (selectAddrRegImm16(Op, Base, Offset)) {
OutOps.push_back(Base);
OutOps.push_back(Offset);
@@ -1396,7 +1396,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
OutOps.push_back(Op);
OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
return false;
- case InlineAsm::Constraint_R:
+ case InlineAsm::ConstraintCode::R:
// The 'R' constraint is supposed to be much more complicated than this.
// However, it's becoming less useful due to architectural changes and
// ought to be replaced by other constraints such as 'ZC'.
@@ -1410,7 +1410,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
OutOps.push_back(Op);
OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
return false;
- case InlineAsm::Constraint_ZC:
+ case InlineAsm::ConstraintCode::ZC:
// ZC matches whatever the pref, ll, and sc instructions can handle for the
// given subtarget.
if (Subtarget->inMicroMipsMode()) {
@@ -1442,6 +1442,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
}
FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new MipsSEDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 39f665be571e..96dc876cb753 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -20,7 +20,7 @@ namespace llvm {
class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
public:
- explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM, CodeGenOpt::Level OL)
+ explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM, CodeGenOptLevel OL)
: MipsDAGToDAGISel(TM, OL) {}
private:
@@ -135,12 +135,12 @@ private:
void processFunctionAfterISel(MachineFunction &MF) override;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
};
FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index b84f304373f6..5c34067c8888 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -15,7 +15,6 @@
#include "MipsRegisterInfo.h"
#include "MipsSubtarget.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
index c86666cc40b6..61471050c08f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -101,9 +101,9 @@ static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode,
case Mips::SC_MMR6:
return 9;
case Mips::INLINEASM: {
- unsigned ConstraintID = InlineAsm::getMemoryConstraintID(MO.getImm());
- switch (ConstraintID) {
- case InlineAsm::Constraint_ZC: {
+ const InlineAsm::Flag F(MO.getImm());
+ switch (F.getMemoryConstraintID()) {
+ case InlineAsm::ConstraintCode::ZC: {
const MipsSubtarget &Subtarget = MO.getParent()
->getParent()
->getParent()
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index 931412cb261e..a3df88a93cfb 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -160,12 +160,12 @@ def GenericWriteMDUtoGPR : SchedWriteRes<[GenericIssueMDU]> {
def GenericWriteDIV : SchedWriteRes<[GenericIssueDIV]> {
// Estimated worst case
let Latency = 33;
- let ResourceCycles = [33];
+ let ReleaseAtCycles = [33];
}
def GenericWriteDIVU : SchedWriteRes<[GenericIssueDIV]> {
// Estimated worst case
let Latency = 31;
- let ResourceCycles = [31];
+ let ReleaseAtCycles = [31];
}
// mul
@@ -761,35 +761,35 @@ def GenericWriteFPUMoveGPRFPU : SchedWriteRes<[GenericIssueFPUMove]> {
}
def GenericWriteFPUDivS : SchedWriteRes<[GenericFPUDivSqrt]> {
let Latency = 17;
- let ResourceCycles = [ 14 ];
+ let ReleaseAtCycles = [ 14 ];
}
def GenericWriteFPUDivD : SchedWriteRes<[GenericFPUDivSqrt]> {
let Latency = 32;
- let ResourceCycles = [ 29 ];
+ let ReleaseAtCycles = [ 29 ];
}
def GenericWriteFPURcpS : SchedWriteRes<[GenericFPUDivSqrt]> {
let Latency = 13;
- let ResourceCycles = [ 10 ];
+ let ReleaseAtCycles = [ 10 ];
}
def GenericWriteFPURcpD : SchedWriteRes<[GenericFPUDivSqrt]> {
let Latency = 25;
- let ResourceCycles = [ 21 ];
+ let ReleaseAtCycles = [ 21 ];
}
def GenericWriteFPURsqrtS : SchedWriteRes<[GenericFPUDivSqrt]> {
let Latency = 17;
- let ResourceCycles = [ 14 ];
+ let ReleaseAtCycles = [ 14 ];
}
def GenericWriteFPURsqrtD : SchedWriteRes<[GenericFPUDivSqrt]> {
let Latency = 32;
- let ResourceCycles = [ 29 ];
+ let ReleaseAtCycles = [ 29 ];
}
def GenericWriteFPUSqrtS : SchedWriteRes<[GenericFPUDivSqrt]> {
let Latency = 17;
- let ResourceCycles = [ 14 ];
+ let ReleaseAtCycles = [ 14 ];
}
def GenericWriteFPUSqrtD : SchedWriteRes<[GenericFPUDivSqrt]> {
let Latency = 29;
- let ResourceCycles = [ 29 ];
+ let ReleaseAtCycles = [ 29 ];
}
// Floating point compare and branch
@@ -1405,7 +1405,7 @@ let Latency = 5;
}
def GenericWriteFPUDivI : SchedWriteRes<[GenericFPQ]> {
let Latency = 33;
- let ResourceCycles = [ 33 ];
+ let ReleaseAtCycles = [ 33 ];
}
// FPUS is also used in moves from floating point and MSA registers to general
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
index 466b5c6af696..c79cd876596c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
@@ -119,7 +119,7 @@ def P5600WriteCache : SchedWriteRes<[P5600IssueLDST]>;
def P5600WriteStore : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> {
// FIXME: This is a bit pessimistic. P5600CTISTD is only used during cycle 2
// not during 0, 1, and 2.
- let ResourceCycles = [ 1, 3 ];
+ let ReleaseAtCycles = [ 1, 3 ];
}
def P5600WriteGPRFromBypass : SchedWriteRes<[P5600IssueLDST]> {
@@ -165,12 +165,12 @@ def P5600WriteAL2CondMov : SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> {
def P5600WriteAL2Div : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> {
// Estimated worst case
let Latency = 34;
- let ResourceCycles = [1, 34];
+ let ReleaseAtCycles = [1, 34];
}
def P5600WriteAL2DivU : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> {
// Estimated worst case
let Latency = 34;
- let ResourceCycles = [1, 34];
+ let ReleaseAtCycles = [1, 34];
}
def P5600WriteAL2Mul : SchedWriteRes<[P5600IssueAL2]> { let Latency = 3; }
def P5600WriteAL2Mult: SchedWriteRes<[P5600IssueAL2]> { let Latency = 5; }
@@ -241,47 +241,47 @@ def P5600WriteFPUL_MADDSUB : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 6;
def P5600WriteFPUDivI : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
// Best/Common/Worst case = 7 / 23 / 27
let Latency = 23; // Using common case
- let ResourceCycles = [ 1, 23 ];
+ let ReleaseAtCycles = [ 1, 23 ];
}
def P5600WriteFPUDivS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
// Best/Common/Worst case = 7 / 23 / 27
let Latency = 23; // Using common case
- let ResourceCycles = [ 1, 23 ];
+ let ReleaseAtCycles = [ 1, 23 ];
}
def P5600WriteFPUDivD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
// Best/Common/Worst case = 7 / 31 / 35
let Latency = 31; // Using common case
- let ResourceCycles = [ 1, 31 ];
+ let ReleaseAtCycles = [ 1, 31 ];
}
def P5600WriteFPURcpS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
// Best/Common/Worst case = 7 / 19 / 23
let Latency = 19; // Using common case
- let ResourceCycles = [ 1, 19 ];
+ let ReleaseAtCycles = [ 1, 19 ];
}
def P5600WriteFPURcpD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
// Best/Common/Worst case = 7 / 27 / 31
let Latency = 27; // Using common case
- let ResourceCycles = [ 1, 27 ];
+ let ReleaseAtCycles = [ 1, 27 ];
}
def P5600WriteFPURsqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
// Best/Common/Worst case = 7 / 27 / 27
let Latency = 27; // Using common case
- let ResourceCycles = [ 1, 27 ];
+ let ReleaseAtCycles = [ 1, 27 ];
}
def P5600WriteFPURsqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
// Best/Common/Worst case = 7 / 27 / 31
let Latency = 27; // Using common case
- let ResourceCycles = [ 1, 27 ];
+ let ReleaseAtCycles = [ 1, 27 ];
}
def P5600WriteFPUSqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
// Best/Common/Worst case = 7 / 27 / 31
let Latency = 27; // Using common case
- let ResourceCycles = [ 1, 27 ];
+ let ReleaseAtCycles = [ 1, 27 ];
}
def P5600WriteFPUSqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
// Best/Common/Worst case = 7 / 35 / 39
let Latency = 35; // Using common case
- let ResourceCycles = [ 1, 35 ];
+ let ReleaseAtCycles = [ 1, 35 ];
}
def P5600WriteMSAShortLogic : SchedWriteRes<[P5600IssueFPUS]>;
def P5600WriteMSAShortInt : SchedWriteRes<[P5600IssueFPUS]> { let Latency = 2; }
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
index 323e611207a2..0134fcb341f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -234,8 +234,8 @@ void MipsSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
: &Mips::GPR32RegClass);
}
-CodeGenOpt::Level MipsSubtarget::getOptLevelToEnablePostRAScheduler() const {
- return CodeGenOpt::Aggressive;
+CodeGenOptLevel MipsSubtarget::getOptLevelToEnablePostRAScheduler() const {
+ return CodeGenOptLevel::Aggressive;
}
MipsSubtarget &
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h
index ec8ca64c8ce8..225ee139d036 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -26,7 +26,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
-#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "MipsGenSubtargetInfo.inc"
@@ -228,7 +227,7 @@ public:
/// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
bool enablePostRAScheduler() const override;
void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
- CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override;
+ CodeGenOptLevel getOptLevelToEnablePostRAScheduler() const override;
bool isABI_N64() const;
bool isABI_N32() const;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index fe31ab91d0ea..074222836929 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -123,7 +123,7 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT,
+ CodeGenOptLevel OL, bool JIT,
bool isLittle)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(JIT, RM),
@@ -152,7 +152,7 @@ MipsebTargetMachine::MipsebTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
void MipselTargetMachine::anchor() {}
@@ -162,7 +162,7 @@ MipselTargetMachine::MipselTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
const MipsSubtarget *
@@ -347,7 +347,7 @@ bool MipsPassConfig::addLegalizeMachineIR() {
}
void MipsPassConfig::addPreRegBankSelect() {
- bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ bool IsOptNone = getOptLevel() == CodeGenOptLevel::None;
addPass(createMipsPostLegalizeCombiner(IsOptNone));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h
index d07e5bb2dfbc..0ad239e3bed1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -40,7 +40,7 @@ public:
MipsTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT, bool isLittle);
~MipsTargetMachine() override;
@@ -89,7 +89,7 @@ public:
MipsebTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
};
@@ -102,7 +102,7 @@ public:
MipselTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 07c56ac79a63..b7a20c351f5f 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -82,7 +82,7 @@ void NVPTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
unsigned Reg = Op.getReg();
printRegName(O, Reg);
} else if (Op.isImm()) {
- O << markup("<imm:") << formatImm(Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << formatImm(Op.getImm());
} else {
assert(Op.isExpr() && "Unknown operand kind in printOperand");
Op.getExpr()->print(O, &MAI);
@@ -309,3 +309,34 @@ void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
const MCSymbol &Sym = cast<MCSymbolRefExpr>(Expr)->getSymbol();
O << Sym.getName();
}
+
+void NVPTXInstPrinter::printPrmtMode(const MCInst *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ int64_t Imm = MO.getImm();
+
+ switch (Imm) {
+ default:
+ return;
+ case NVPTX::PTXPrmtMode::NONE:
+ break;
+ case NVPTX::PTXPrmtMode::F4E:
+ O << ".f4e";
+ break;
+ case NVPTX::PTXPrmtMode::B4E:
+ O << ".b4e";
+ break;
+ case NVPTX::PTXPrmtMode::RC8:
+ O << ".rc8";
+ break;
+ case NVPTX::PTXPrmtMode::ECL:
+ O << ".ecl";
+ break;
+ case NVPTX::PTXPrmtMode::ECR:
+ O << ".ecr";
+ break;
+ case NVPTX::PTXPrmtMode::RC16:
+ O << ".rc16";
+ break;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index 49ad3f269229..e6954f861cd1 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -47,6 +47,8 @@ public:
raw_ostream &O, const char *Modifier = nullptr);
void printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
+ void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = nullptr);
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h
index ec32a95dea90..07ee34968b02 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h
@@ -36,7 +36,7 @@ enum CondCodes {
}
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
- llvm::CodeGenOpt::Level OptLevel);
+ llvm::CodeGenOptLevel OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMLegacyPass();
ModulePass *createNVPTXCtorDtorLoweringLegacyPass();
@@ -47,7 +47,8 @@ MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
FunctionPass *createNVPTXLowerArgsPass();
FunctionPass *createNVPTXLowerAllocaPass();
-FunctionPass *createNVPTXLowerUnreachablePass();
+FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable,
+ bool NoTrapAfterNoreturn);
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
@@ -180,6 +181,18 @@ enum CmpMode {
FTZ_FLAG = 0x100
};
}
+
+namespace PTXPrmtMode {
+enum PrmtMode {
+ NONE,
+ F4E,
+ B4E,
+ RC8,
+ ECL,
+ ECR,
+ RC16,
+};
+}
}
void initializeNVPTXDAGToDAGISelPass(PassRegistry &);
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td
index 02fa2a4ee81e..f2a4ce381b40 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td
@@ -24,23 +24,24 @@ include "NVPTXInstrInfo.td"
// TableGen in NVPTXGenSubtarget.inc.
//===----------------------------------------------------------------------===//
-class FeatureSM<int version>:
- SubtargetFeature<"sm_"# version, "SmVersion",
- "" # version,
- "Target SM " # version>;
-def SM90a: FeatureSM<90>;
+class FeatureSM<string sm, int value>:
+ SubtargetFeature<"sm_"# sm, "FullSmVersion",
+ "" # value,
+ "Target SM " # sm>;
class FeaturePTX<int version>:
SubtargetFeature<"ptx"# version, "PTXVersion",
"" # version,
"Use PTX version " # version>;
-foreach version = [20, 21, 30, 32, 35, 37, 50, 52, 53,
- 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
- def SM#version: FeatureSM<version>;
+foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
+ 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
+ def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
+
+def SM90a: FeatureSM<"90a", 901>;
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 63, 64, 65,
- 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81] in
+ 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83] in
def PTX#version: FeaturePTX<version>;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
index b4f7e78cb107..4f106584eb0a 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
@@ -94,5 +94,5 @@ ModRefInfo NVPTXAAResult::getModRefInfoMask(const MemoryLocation &Loc,
if (isConstOrParam(Base->getType()->getPointerAddressSpace()))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+ return ModRefInfo::ModRef;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 71b70766bf9e..6a03c7b0abc3 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -537,59 +537,50 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
raw_ostream &O) const {
// If the NVVM IR has some of reqntid* specified, then output
// the reqntid directive, and set the unspecified ones to 1.
- // If none of reqntid* is specified, don't output reqntid directive.
- unsigned reqntidx, reqntidy, reqntidz;
- bool specified = false;
- if (!getReqNTIDx(F, reqntidx))
- reqntidx = 1;
- else
- specified = true;
- if (!getReqNTIDy(F, reqntidy))
- reqntidy = 1;
- else
- specified = true;
- if (!getReqNTIDz(F, reqntidz))
- reqntidz = 1;
- else
- specified = true;
-
- if (specified)
- O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+ // If none of Reqntid* is specified, don't output reqntid directive.
+ unsigned Reqntidx, Reqntidy, Reqntidz;
+ Reqntidx = Reqntidy = Reqntidz = 1;
+ bool ReqSpecified = false;
+ ReqSpecified |= getReqNTIDx(F, Reqntidx);
+ ReqSpecified |= getReqNTIDy(F, Reqntidy);
+ ReqSpecified |= getReqNTIDz(F, Reqntidz);
+
+ if (ReqSpecified)
+ O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
<< "\n";
// If the NVVM IR has some of maxntid* specified, then output
// the maxntid directive, and set the unspecified ones to 1.
// If none of maxntid* is specified, don't output maxntid directive.
- unsigned maxntidx, maxntidy, maxntidz;
- specified = false;
- if (!getMaxNTIDx(F, maxntidx))
- maxntidx = 1;
- else
- specified = true;
- if (!getMaxNTIDy(F, maxntidy))
- maxntidy = 1;
- else
- specified = true;
- if (!getMaxNTIDz(F, maxntidz))
- maxntidz = 1;
- else
- specified = true;
-
- if (specified)
- O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+ unsigned Maxntidx, Maxntidy, Maxntidz;
+ Maxntidx = Maxntidy = Maxntidz = 1;
+ bool MaxSpecified = false;
+ MaxSpecified |= getMaxNTIDx(F, Maxntidx);
+ MaxSpecified |= getMaxNTIDy(F, Maxntidy);
+ MaxSpecified |= getMaxNTIDz(F, Maxntidz);
+
+ if (MaxSpecified)
+ O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
<< "\n";
- unsigned mincta;
- if (getMinCTASm(F, mincta))
- O << ".minnctapersm " << mincta << "\n";
+ unsigned Mincta = 0;
+ if (getMinCTASm(F, Mincta))
+ O << ".minnctapersm " << Mincta << "\n";
- unsigned maxnreg;
- if (getMaxNReg(F, maxnreg))
- O << ".maxnreg " << maxnreg << "\n";
+ unsigned Maxnreg = 0;
+ if (getMaxNReg(F, Maxnreg))
+ O << ".maxnreg " << Maxnreg << "\n";
+
+ // .maxclusterrank directive requires SM_90 or higher, make sure that we
+ // filter it out for lower SM versions, as it causes a hard ptxas crash.
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
+ unsigned Maxclusterrank = 0;
+ if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90)
+ O << ".maxclusterrank " << Maxclusterrank << "\n";
}
-std::string
-NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
+std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
std::string Name;
@@ -674,11 +665,11 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
* Currently, this is valid for CUDA shared variables, which have local
* scope and global lifetime. So the conditions to check are :
* 1. Is the global variable in shared address space?
- * 2. Does it have internal linkage?
+ * 2. Does it have local linkage?
* 3. Is the global variable referenced only in one function?
*/
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
- if (!gv->hasInternalLinkage())
+ if (!gv->hasLocalLinkage())
return false;
PointerType *Pty = gv->getType();
if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
@@ -798,14 +789,17 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
+ // OpenMP supports NVPTX global constructors and destructors.
+ bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
+
if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
- !LowerCtorDtor) {
+ !LowerCtorDtor && !IsOpenMP) {
report_fatal_error(
"Module has a nontrivial global ctor, which NVPTX does not support.");
return true; // error
}
if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
- !LowerCtorDtor) {
+ !LowerCtorDtor && !IsOpenMP) {
report_fatal_error(
"Module has a nontrivial global dtor, which NVPTX does not support.");
return true; // error
@@ -1022,8 +1016,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
// Skip LLVM intrinsic global variables
- if (GVar->getName().startswith("llvm.") ||
- GVar->getName().startswith("nvvm."))
+ if (GVar->getName().starts_with("llvm.") ||
+ GVar->getName().starts_with("nvvm."))
return;
const DataLayout &DL = getDataLayout();
@@ -1989,35 +1983,16 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
switch (CE->getOpcode()) {
- default: {
- // If the code isn't optimized, there may be outstanding folding
- // opportunities. Attempt to fold the expression using DataLayout as a
- // last resort before giving up.
- Constant *C = ConstantFoldConstant(CE, getDataLayout());
- if (C != CE)
- return lowerConstantForGV(C, ProcessingGeneric);
-
- // Otherwise report the problem to the user.
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(Twine(OS.str()));
- }
+ default:
+ break; // Error
case Instruction::AddrSpaceCast: {
// Strip the addrspacecast and pass along the operand
PointerType *DstTy = cast<PointerType>(CE->getType());
- if (DstTy->getAddressSpace() == 0) {
+ if (DstTy->getAddressSpace() == 0)
return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
- }
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/ false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(Twine(OS.str()));
+
+ break; // Error
}
case Instruction::GetElementPtr: {
@@ -2052,9 +2027,12 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
- false/*ZExt*/);
- return lowerConstantForGV(Op, ProcessingGeneric);
+ Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
+ /*IsSigned*/ false, DL);
+ if (Op)
+ return lowerConstantForGV(Op, ProcessingGeneric);
+
+ break; // Error
}
case Instruction::PtrToInt: {
@@ -2091,6 +2069,21 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
}
}
+
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using DataLayout as a
+ // last resort before giving up.
+ Constant *C = ConstantFoldConstant(CE, getDataLayout());
+ if (C != CE)
+ return lowerConstantForGV(C, ProcessingGeneric);
+
+ // Otherwise report the problem to the user.
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/false,
+ !MF ? nullptr : MF->getFunction().getParent());
+ report_fatal_error(Twine(OS.str()));
}
// Copy of MCExpr::print customized for NVPTX
@@ -2200,9 +2193,9 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(opNum);
+ const MachineOperand &MO = MI->getOperand(OpNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
if (MO.getReg().isPhysical()) {
@@ -2236,19 +2229,19 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
}
}
-void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
raw_ostream &O, const char *Modifier) {
- printOperand(MI, opNum, O);
+ printOperand(MI, OpNum, O);
if (Modifier && strcmp(Modifier, "add") == 0) {
O << ", ";
- printOperand(MI, opNum + 1, O);
+ printOperand(MI, OpNum + 1, O);
} else {
- if (MI->getOperand(opNum + 1).isImm() &&
- MI->getOperand(opNum + 1).getImm() == 0)
+ if (MI->getOperand(OpNum + 1).isImm() &&
+ MI->getOperand(OpNum + 1).getImm() == 0)
return; // don't print ',0' or '+0'
O << "+";
- printOperand(MI, opNum + 1, O);
+ printOperand(MI, OpNum + 1, O);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 2bd40116e63c..7f0f37e7207d 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -169,7 +169,7 @@ private:
MCOperand GetSymbolRef(const MCSymbol *Symbol);
unsigned encodeVirtualRegister(unsigned Reg);
- void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ void printMemOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O,
const char *Modifier = nullptr);
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
bool processDemoted, const NVPTXSubtarget &STI);
@@ -184,7 +184,7 @@ private:
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &) override;
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O);
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
index ed7839cafe3a..f77a1f0272c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "NVPTXCtorDtorLowering.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Constants.h"
@@ -32,6 +33,11 @@ static cl::opt<std::string>
cl::desc("Override unique ID of ctor/dtor globals."),
cl::init(""), cl::Hidden);
+static cl::opt<bool>
+ CreateKernels("nvptx-emit-init-fini-kernel",
+ cl::desc("Emit kernels to call ctor/dtor globals."),
+ cl::init(true), cl::Hidden);
+
namespace {
static std::string getHash(StringRef Str) {
@@ -42,11 +48,163 @@ static std::string getHash(StringRef Str) {
return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
}
-static bool createInitOrFiniGlobls(Module &M, StringRef GlobalName,
- bool IsCtor) {
- GlobalVariable *GV = M.getGlobalVariable(GlobalName);
- if (!GV || !GV->hasInitializer())
- return false;
+static void addKernelMetadata(Module &M, GlobalValue *GV) {
+ llvm::LLVMContext &Ctx = M.getContext();
+
+ // Get "nvvm.annotations" metadata node.
+ llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+
+ llvm::Metadata *KernelMDVals[] = {
+ llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "kernel"),
+ llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+
+ // This kernel is only to be called single-threaded.
+ llvm::Metadata *ThreadXMDVals[] = {
+ llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidx"),
+ llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+ llvm::Metadata *ThreadYMDVals[] = {
+ llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidy"),
+ llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+ llvm::Metadata *ThreadZMDVals[] = {
+ llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidz"),
+ llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+
+ llvm::Metadata *BlockMDVals[] = {
+ llvm::ConstantAsMetadata::get(GV),
+ llvm::MDString::get(Ctx, "maxclusterrank"),
+ llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+
+ // Append metadata to nvvm.annotations.
+ MD->addOperand(llvm::MDNode::get(Ctx, KernelMDVals));
+ MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));
+ MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));
+ MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));
+ MD->addOperand(llvm::MDNode::get(Ctx, BlockMDVals));
+}
+
+static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
+ StringRef InitOrFiniKernelName =
+ IsCtor ? "nvptx$device$init" : "nvptx$device$fini";
+ if (M.getFunction(InitOrFiniKernelName))
+ return nullptr;
+
+ Function *InitOrFiniKernel = Function::createWithDefaultAttr(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalValue::WeakODRLinkage, 0, InitOrFiniKernelName, &M);
+ addKernelMetadata(M, InitOrFiniKernel);
+
+ return InitOrFiniKernel;
+}
+
+// We create the IR required to call each callback in this section. This is
+// equivalent to the following code. Normally, the linker would provide us with
+// the definitions of the init and fini array sections. The 'nvlink' linker does
+// not do this so initializing these values is done by the runtime.
+//
+// extern "C" void **__init_array_start = nullptr;
+// extern "C" void **__init_array_end = nullptr;
+// extern "C" void **__fini_array_start = nullptr;
+// extern "C" void **__fini_array_end = nullptr;
+//
+// using InitCallback = void();
+// using FiniCallback = void();
+//
+// void call_init_array_callbacks() {
+// for (auto start = __init_array_start; start != __init_array_end; ++start)
+// reinterpret_cast<InitCallback *>(*start)();
+// }
+//
+// void call_init_array_callbacks() {
+// size_t fini_array_size = __fini_array_end - __fini_array_start;
+// for (size_t i = fini_array_size; i > 0; --i)
+// reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
+// }
+static void createInitOrFiniCalls(Function &F, bool IsCtor) {
+ Module &M = *F.getParent();
+ LLVMContext &C = M.getContext();
+
+ IRBuilder<> IRB(BasicBlock::Create(C, "entry", &F));
+ auto *LoopBB = BasicBlock::Create(C, "while.entry", &F);
+ auto *ExitBB = BasicBlock::Create(C, "while.end", &F);
+ Type *PtrTy = IRB.getPtrTy(llvm::ADDRESS_SPACE_GLOBAL);
+
+ auto *Begin = M.getOrInsertGlobal(
+ IsCtor ? "__init_array_start" : "__fini_array_start",
+ PointerType::get(C, 0), [&]() {
+ auto *GV = new GlobalVariable(
+ M, PointerType::get(C, 0),
+ /*isConstant=*/false, GlobalValue::WeakAnyLinkage,
+ Constant::getNullValue(PointerType::get(C, 0)),
+ IsCtor ? "__init_array_start" : "__fini_array_start",
+ /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,
+ /*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL);
+ GV->setVisibility(GlobalVariable::ProtectedVisibility);
+ return GV;
+ });
+ auto *End = M.getOrInsertGlobal(
+ IsCtor ? "__init_array_end" : "__fini_array_end", PointerType::get(C, 0),
+ [&]() {
+ auto *GV = new GlobalVariable(
+ M, PointerType::get(C, 0),
+ /*isConstant=*/false, GlobalValue::WeakAnyLinkage,
+ Constant::getNullValue(PointerType::get(C, 0)),
+ IsCtor ? "__init_array_end" : "__fini_array_end",
+ /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,
+ /*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL);
+ GV->setVisibility(GlobalVariable::ProtectedVisibility);
+ return GV;
+ });
+
+ // The constructor type is suppoed to allow using the argument vectors, but
+ // for now we just call them with no arguments.
+ auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});
+
+ // The destructor array must be called in reverse order. Get an expression to
+ // the end of the array and iterate backwards in that case.
+ Value *BeginVal = IRB.CreateLoad(Begin->getType(), Begin, "begin");
+ Value *EndVal = IRB.CreateLoad(Begin->getType(), End, "stop");
+ if (!IsCtor) {
+ auto *BeginInt = IRB.CreatePtrToInt(BeginVal, IntegerType::getInt64Ty(C));
+ auto *EndInt = IRB.CreatePtrToInt(EndVal, IntegerType::getInt64Ty(C));
+ auto *SubInst = IRB.CreateSub(EndInt, BeginInt);
+ auto *Offset = IRB.CreateAShr(
+ SubInst, ConstantInt::get(IntegerType::getInt64Ty(C), 3), "offset",
+ /*IsExact=*/true);
+ auto *ValuePtr = IRB.CreateGEP(PointerType::get(C, 0), BeginVal,
+ ArrayRef<Value *>({Offset}));
+ EndVal = BeginVal;
+ BeginVal = IRB.CreateInBoundsGEP(
+ PointerType::get(C, 0), ValuePtr,
+ ArrayRef<Value *>(ConstantInt::get(IntegerType::getInt64Ty(C), -1)),
+ "start");
+ }
+ IRB.CreateCondBr(
+ IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGT, BeginVal,
+ EndVal),
+ LoopBB, ExitBB);
+ IRB.SetInsertPoint(LoopBB);
+ auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");
+ auto *CallBack = IRB.CreateLoad(IRB.getPtrTy(F.getAddressSpace()),
+ CallBackPHI, "callback");
+ IRB.CreateCall(CallBackTy, CallBack);
+ auto *NewCallBack =
+ IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, IsCtor ? 1 : -1, "next");
+ auto *EndCmp = IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,
+ NewCallBack, EndVal, "end");
+ CallBackPHI->addIncoming(BeginVal, &F.getEntryBlock());
+ CallBackPHI->addIncoming(NewCallBack, LoopBB);
+ IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);
+ IRB.SetInsertPoint(ExitBB);
+ IRB.CreateRetVoid();
+}
+
+static bool createInitOrFiniGlobals(Module &M, GlobalVariable *GV,
+ bool IsCtor) {
ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer());
if (!GA || GA->getNumOperands() == 0)
return false;
@@ -81,14 +239,35 @@ static bool createInitOrFiniGlobls(Module &M, StringRef GlobalName,
appendToUsed(M, {GV});
}
+ return true;
+}
+
+static bool createInitOrFiniKernel(Module &M, StringRef GlobalName,
+ bool IsCtor) {
+ GlobalVariable *GV = M.getGlobalVariable(GlobalName);
+ if (!GV || !GV->hasInitializer())
+ return false;
+
+ if (!createInitOrFiniGlobals(M, GV, IsCtor))
+ return false;
+
+ if (!CreateKernels)
+ return true;
+
+ Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
+ if (!InitOrFiniKernel)
+ return false;
+
+ createInitOrFiniCalls(*InitOrFiniKernel, IsCtor);
+
GV->eraseFromParent();
return true;
}
static bool lowerCtorsAndDtors(Module &M) {
bool Modified = false;
- Modified |= createInitOrFiniGlobls(M, "llvm.global_ctors", /*IsCtor =*/true);
- Modified |= createInitOrFiniGlobls(M, "llvm.global_dtors", /*IsCtor =*/false);
+ Modified |= createInitOrFiniKernel(M, "llvm.global_ctors", /*IsCtor =*/true);
+ Modified |= createInitOrFiniKernel(M, "llvm.global_dtors", /*IsCtor =*/false);
return Modified;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 4f03e474edb4..3d6bd1d8ad06 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -62,7 +62,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
if (GV.getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
!llvm::isTexture(GV) && !llvm::isSurface(GV) && !llvm::isSampler(GV) &&
- !GV.getName().startswith("llvm.")) {
+ !GV.getName().starts_with("llvm.")) {
GlobalVariable *NewGV = new GlobalVariable(
M, GV.getValueType(), GV.isConstant(), GV.getLinkage(),
GV.hasInitializer() ? GV.getInitializer() : nullptr, "", &GV,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 99a7fdb9d1e2..894a8636f458 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTXUtilities.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
@@ -32,7 +33,7 @@ using namespace llvm;
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
- llvm::CodeGenOpt::Level OptLevel) {
+ llvm::CodeGenOptLevel OptLevel) {
return new NVPTXDAGToDAGISel(TM, OptLevel);
}
@@ -41,9 +42,9 @@ char NVPTXDAGToDAGISel::ID = 0;
INITIALIZE_PASS(NVPTXDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
- CodeGenOpt::Level OptLevel)
+ CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, tm, OptLevel), TM(tm) {
- doMulWide = (OptLevel > 0);
+ doMulWide = (OptLevel > CodeGenOptLevel::None);
}
bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
@@ -104,7 +105,9 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::SETP_F16X2:
SelectSETP_F16X2(N);
return;
-
+ case NVPTXISD::SETP_BF16X2:
+ SelectSETP_BF16X2(N);
+ return;
case NVPTXISD::LoadV2:
case NVPTXISD::LoadV4:
if (tryLoadVector(N))
@@ -607,15 +610,26 @@ bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
return true;
}
+bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {
+ unsigned PTXCmpMode =
+ getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
+ SDLoc DL(N);
+ SDNode *SetP = CurDAG->getMachineNode(
+ NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
+ N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
+ ReplaceNode(N, SetP);
+ return true;
+}
+
// Find all instances of extract_vector_elt that use this v2f16 vector
// and coalesce them into a scattering move instruction.
bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
SDValue Vector = N->getOperand(0);
- // We only care about f16x2 as it's the only real vector type we
+ // We only care about 16x2 as it's the only real vector type we
// need to deal with.
MVT VT = Vector.getSimpleValueType();
- if (!(VT == MVT::v2f16 || VT == MVT::v2bf16))
+ if (!Isv2x16VT(VT))
return false;
// Find and record all uses of this vector that extract element 0 or 1.
SmallVector<SDNode *, 4> E0, E1;
@@ -828,6 +842,8 @@ pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8,
return Opcode_i16;
case MVT::v2f16:
case MVT::v2bf16:
+ case MVT::v2i16:
+ case MVT::v4i8:
return Opcode_i32;
case MVT::f32:
return Opcode_f32;
@@ -909,9 +925,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
// Vector Setting
unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
if (SimpleVT.isVector()) {
- assert((LoadedVT == MVT::v2f16 || LoadedVT == MVT::v2bf16) &&
+ assert((Isv2x16VT(LoadedVT) || LoadedVT == MVT::v4i8) &&
"Unexpected vector type");
- // v2f16/v2bf16 is loaded using ld.b32
+ // v2f16/v2bf16/v2i16 is loaded using ld.b32
fromTypeWidth = 32;
}
@@ -1061,10 +1077,10 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
EVT EltVT = N->getValueType(0);
- // v8f16 is a special case. PTX doesn't have ld.v8.f16
- // instruction. Instead, we split the vector into v2f16 chunks and
+ // v8x16 is a special case. PTX doesn't have ld.v8.16
+ // instruction. Instead, we split the vector into v2x16 chunks and
// load them with ld.v4.b32.
- if (EltVT == MVT::v2f16 || EltVT == MVT::v2bf16) {
+ if (Isv2x16VT(EltVT)) {
assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
EltVT = MVT::i32;
FromType = NVPTX::PTXLdStInstCode::Untyped;
@@ -1254,18 +1270,23 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
SDLoc DL(N);
SDNode *LD;
SDValue Base, Offset, Addr;
+ EVT OrigType = N->getValueType(0);
EVT EltVT = Mem->getMemoryVT();
unsigned NumElts = 1;
if (EltVT.isVector()) {
NumElts = EltVT.getVectorNumElements();
EltVT = EltVT.getVectorElementType();
- // vectors of f16 are loaded/stored as multiples of v2f16 elements.
- if ((EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) ||
- (EltVT == MVT::bf16 && N->getValueType(0) == MVT::v2bf16)) {
- assert(NumElts % 2 == 0 && "Vector must have even number of elements");
- EltVT = N->getValueType(0);
- NumElts /= 2;
+ // vectors of 16bits type are loaded/stored as multiples of v2x16 elements.
+ if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
+ (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
+ (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
+ assert(NumElts % 2 == 0 && "Vector must have even number of elements");
+ EltVT = OrigType;
+ NumElts /= 2;
+ } else if (OrigType == MVT::v4i8) {
+ EltVT = OrigType;
+ NumElts = 1;
}
}
@@ -1600,7 +1621,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
// concept of sign-/zero-extension, so emulate it here by adding an explicit
// CVT instruction. Ptxas should clean up any redundancies here.
- EVT OrigType = N->getValueType(0);
LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
if (OrigType != EltVT &&
@@ -1678,9 +1698,9 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
MVT ScalarVT = SimpleVT.getScalarType();
unsigned toTypeWidth = ScalarVT.getSizeInBits();
if (SimpleVT.isVector()) {
- assert((StoreVT == MVT::v2f16 || StoreVT == MVT::v2bf16) &&
+ assert((Isv2x16VT(StoreVT) || StoreVT == MVT::v4i8) &&
"Unexpected vector type");
- // v2f16 is stored using st.b32
+ // v2x16 is stored using st.b32
toTypeWidth = 32;
}
@@ -1844,10 +1864,10 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
return false;
}
- // v8f16 is a special case. PTX doesn't have st.v8.f16
- // instruction. Instead, we split the vector into v2f16 chunks and
+ // v8x16 is a special case. PTX doesn't have st.v8.x16
+ // instruction. Instead, we split the vector into v2x16 chunks and
// store them with st.v4.b32.
- if (EltVT == MVT::v2f16 || EltVT == MVT::v2bf16) {
+ if (Isv2x16VT(EltVT)) {
assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
EltVT = MVT::i32;
ToType = NVPTX::PTXLdStInstCode::Untyped;
@@ -3581,12 +3601,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintID) {
default:
return true;
- case InlineAsm::Constraint_m: // memory
+ case InlineAsm::ConstraintCode::m: // memory
if (SelectDirectAddr(Op, Op0)) {
OutOps.push_back(Op0);
OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 25bb73cd5536..84c8432047ca 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -13,12 +13,13 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
@@ -42,15 +43,15 @@ public:
NVPTXDAGToDAGISel() = delete;
- explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
- CodeGenOpt::Level OptLevel);
+ explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOptLevel OptLevel);
bool runOnMachineFunction(MachineFunction &MF) override;
const NVPTXSubtarget *Subtarget = nullptr;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
+
private:
// Include the pieces autogenerated from the target description.
#include "NVPTXGenDAGISel.inc"
@@ -73,6 +74,7 @@ private:
bool tryBFE(SDNode *N);
bool tryConstantFP(SDNode *N);
bool SelectSETP_F16X2(SDNode *N);
+ bool SelectSETP_BF16X2(SDNode *N);
bool tryEXTRACT_VECTOR_ELEMENT(SDNode *N);
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7823e12d6270..e8f36bf50a1b 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineValueType.h"
@@ -36,6 +37,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/FPEnv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -133,6 +135,7 @@ static bool IsPTXVectorType(MVT VT) {
case MVT::v4i8:
case MVT::v2i16:
case MVT::v4i16:
+ case MVT::v8i16: // <4 x i16x2>
case MVT::v2i32:
case MVT::v4i32:
case MVT::v2i64:
@@ -149,12 +152,9 @@ static bool IsPTXVectorType(MVT VT) {
}
}
-static bool Isv2f16Orv2bf16Type(EVT VT) {
- return (VT == MVT::v2f16 || VT == MVT::v2bf16);
-}
-
-static bool Isf16Orbf16Type(MVT VT) {
- return (VT.SimpleTy == MVT::f16 || VT.SimpleTy == MVT::bf16);
+static bool Is16bitsType(MVT VT) {
+ return (VT.SimpleTy == MVT::f16 || VT.SimpleTy == MVT::bf16 ||
+ VT.SimpleTy == MVT::i16);
}
/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
@@ -207,9 +207,26 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
// Vectors with an even number of f16 elements will be passed to
// us as an array of v2f16/v2bf16 elements. We must match this so we
// stay in sync with Ins/Outs.
- if ((Isf16Orbf16Type(EltVT.getSimpleVT())) && NumElts % 2 == 0) {
- EltVT = EltVT == MVT::f16 ? MVT::v2f16 : MVT::v2bf16;
+ if ((Is16bitsType(EltVT.getSimpleVT())) && NumElts % 2 == 0) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ EltVT = MVT::v2f16;
+ break;
+ case MVT::bf16:
+ EltVT = MVT::v2bf16;
+ break;
+ case MVT::i16:
+ EltVT = MVT::v2i16;
+ break;
+ default:
+ llvm_unreachable("Unexpected type");
+ }
NumElts /= 2;
+ } else if (EltVT.getSimpleVT() == MVT::i8 &&
+ (NumElts % 4 == 0 || NumElts == 3)) {
+ // v*i8 are formally lowered as v4i8
+ EltVT = MVT::v4i8;
+ NumElts = (NumElts + 3) / 4;
}
for (unsigned j = 0; j != NumElts; ++j) {
ValueVTs.push_back(EltVT);
@@ -386,9 +403,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// always lower memset, memcpy, and memmove intrinsics to load/store
// instructions, rather
// then generating calls to memset, mempcy or memmove.
- MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
- MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
- MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = (unsigned)0xFFFFFFFF;
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = (unsigned) 0xFFFFFFFF;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = (unsigned) 0xFFFFFFFF;
setBooleanContents(ZeroOrNegativeOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
@@ -420,6 +437,15 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
case ISD::FADD:
case ISD::FMUL:
case ISD::FSUB:
+ case ISD::SELECT:
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ case ISD::FNEARBYINT:
+ case ISD::FRINT:
+ case ISD::FTRUNC:
IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 78;
break;
}
@@ -427,8 +453,27 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
Op, VT, IsOpSupported ? Action : NoBF16Action);
};
+ auto setI16x2OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
+ LegalizeAction NoI16x2Action) {
+ bool IsOpSupported = false;
+ // instructions are available on sm_90 only
+ switch (Op) {
+ case ISD::ADD:
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ case ISD::SUB:
+ IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 80;
+ break;
+ }
+ setOperationAction(Op, VT, IsOpSupported ? Action : NoI16x2Action);
+ };
+
addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
+ addRegisterClass(MVT::v2i16, &NVPTX::Int32RegsRegClass);
+ addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass);
addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
@@ -439,8 +484,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass);
// Conversion to/from FP16/FP16x2 is always legal.
- setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand);
@@ -450,18 +493,50 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
// Conversion to/from BFP16/BFP16x2 is always legal.
- setOperationAction(ISD::SINT_TO_FP, MVT::bf16, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::bf16, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2bf16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2bf16, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2bf16, Expand);
- setBF16OperationAction(ISD::SETCC, MVT::bf16, Legal, Promote);
setBF16OperationAction(ISD::SETCC, MVT::v2bf16, Legal, Expand);
+ setBF16OperationAction(ISD::SETCC, MVT::bf16, Legal, Promote);
+ if (getOperationAction(ISD::SETCC, MVT::bf16) == Promote)
+ AddPromotedToType(ISD::SETCC, MVT::bf16, MVT::f32);
+
+ // Conversion to/from i16/i16x2 is always legal.
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
+ // Only logical ops can be done on v4i8 directly, others must be done
+ // elementwise.
+ setOperationAction(
+ {ISD::ABS, ISD::ADD, ISD::ADDC, ISD::ADDE,
+ ISD::BITREVERSE, ISD::CTLZ, ISD::CTPOP, ISD::CTTZ,
+ ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FSHL, ISD::FSHR,
+ ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::PARITY,
+ ISD::ROTL, ISD::ROTR, ISD::SADDO, ISD::SADDO_CARRY,
+ ISD::SADDSAT, ISD::SDIV, ISD::SDIVREM, ISD::SELECT_CC,
+ ISD::SETCC, ISD::SHL, ISD::SINT_TO_FP, ISD::SMAX,
+ ISD::SMIN, ISD::SMULO, ISD::SMUL_LOHI, ISD::SRA,
+ ISD::SREM, ISD::SRL, ISD::SSHLSAT, ISD::SSUBO,
+ ISD::SSUBO_CARRY, ISD::SSUBSAT, ISD::SUB, ISD::SUBC,
+ ISD::SUBE, ISD::UADDO, ISD::UADDO_CARRY, ISD::UADDSAT,
+ ISD::UDIV, ISD::UDIVREM, ISD::UINT_TO_FP, ISD::UMAX,
+ ISD::UMIN, ISD::UMULO, ISD::UMUL_LOHI, ISD::UREM,
+ ISD::USHLSAT, ISD::USUBO, ISD::USUBO_CARRY, ISD::VSELECT,
+ ISD::USUBSAT},
+ MVT::v4i8, Expand);
+
// Operations not directly supported by NVPTX.
for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32,
- MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+ MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::v4i8,
+ MVT::i32, MVT::i64}) {
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::BR_CC, VT, Expand);
}
@@ -473,6 +548,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom);
@@ -493,10 +569,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::ROTR, MVT::i32, Legal);
setOperationAction(ISD::ROTL, MVT::i16, Expand);
+ setOperationAction(ISD::ROTL, MVT::v2i16, Expand);
setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::ROTR, MVT::v2i16, Expand);
setOperationAction(ISD::ROTL, MVT::i8, Expand);
setOperationAction(ISD::ROTR, MVT::i8, Expand);
setOperationAction(ISD::BSWAP, MVT::i16, Expand);
+ setOperationAction(ISD::BSWAP, MVT::v2i16, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
@@ -528,6 +607,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
// Turn FP truncstore into trunc + store.
// FIXME: vector types should also be expanded
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
@@ -546,12 +629,22 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setTruncStoreAction(VT, MVT::i1, Expand);
}
+ // expand extload of vector of integers.
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
+ MVT::v2i8, Expand);
+ setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
+
// This is legal in NVPTX
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
setOperationAction(ISD::ConstantFP, MVT::bf16, Legal);
+ // Lowering of DYNAMIC_STACKALLOC is unsupported.
+ // Custom lower to produce an error.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+
// TRAP can be lowered to PTX trap
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -584,6 +677,27 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::CTLZ, Ty, Legal);
}
+ setI16x2OperationAction(ISD::ABS, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::SMIN, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::SMAX, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::UMIN, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::UMAX, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::CTPOP, MVT::v2i16, Legal, Expand);
+ setI16x2OperationAction(ISD::CTLZ, MVT::v2i16, Legal, Expand);
+
+ setI16x2OperationAction(ISD::ADD, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::SUB, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::MUL, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::SHL, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::SREM, MVT::v2i16, Legal, Custom);
+ setI16x2OperationAction(ISD::UREM, MVT::v2i16, Legal, Custom);
+
+ // Other arithmetic and logic ops are unsupported.
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS,
+ ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
+ ISD::SINT_TO_FP, ISD::UINT_TO_FP},
+ MVT::v2i16, Expand);
+
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
setOperationAction(ISD::SUBC, MVT::i32, Legal);
@@ -596,6 +710,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
}
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ, MVT::v2i16, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i64, Expand);
@@ -607,8 +722,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
// We have some custom DAG combine patterns for these nodes
- setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::FADD, ISD::MUL, ISD::SHL,
- ISD::SREM, ISD::UREM});
+ setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::EXTRACT_VECTOR_ELT, ISD::FADD,
+ ISD::LOAD, ISD::MUL, ISD::SHL, ISD::SREM, ISD::UREM,
+ ISD::VSELECT});
// setcc for f16x2 and bf16x2 needs special handling to prevent
// legalizer's attempt to scalarize it due to v2i1 not being legal.
@@ -624,9 +740,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
setFP16OperationAction(Op, MVT::f16, Legal, Promote);
setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
- setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
// bf16 must be promoted to f32.
+ setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
if (getOperationAction(Op, MVT::bf16) == Promote)
AddPromotedToType(Op, MVT::bf16, MVT::f32);
}
@@ -646,21 +762,34 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// These map to conversion instructions for scalar FP types.
for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
ISD::FROUNDEVEN, ISD::FTRUNC}) {
- setOperationAction(Op, MVT::bf16, Legal);
setOperationAction(Op, MVT::f16, Legal);
setOperationAction(Op, MVT::f32, Legal);
setOperationAction(Op, MVT::f64, Legal);
setOperationAction(Op, MVT::v2f16, Expand);
setOperationAction(Op, MVT::v2bf16, Expand);
+ setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
+ if (getOperationAction(Op, MVT::bf16) == Promote)
+ AddPromotedToType(Op, MVT::bf16, MVT::f32);
+ }
+
+ // sm_80 only has conversions between f32 and bf16. Custom lower all other
+ // bf16 conversions.
+ if (STI.hasBF16Math() &&
+ (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78)) {
+ for (MVT VT : {MVT::i1, MVT::i16, MVT::i32, MVT::i64}) {
+ setOperationAction(
+ {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
+ VT, Custom);
+ }
}
setOperationAction(ISD::FROUND, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
- setOperationAction(ISD::FROUND, MVT::bf16, Promote);
setOperationAction(ISD::FROUND, MVT::v2bf16, Expand);
setOperationAction(ISD::FROUND, MVT::f32, Custom);
setOperationAction(ISD::FROUND, MVT::f64, Custom);
-
+ setOperationAction(ISD::FROUND, MVT::bf16, Promote);
+ AddPromotedToType(ISD::FROUND, MVT::bf16, MVT::f32);
// 'Expand' implements FCOPYSIGN without calling an external library.
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
@@ -674,14 +803,26 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// promoted to f32. v2f16 is expanded to f16, which is then promoted
// to f32.
for (const auto &Op :
- {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FABS}) {
+ {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) {
setOperationAction(Op, MVT::f16, Promote);
- setOperationAction(Op, MVT::bf16, Promote);
setOperationAction(Op, MVT::f32, Legal);
setOperationAction(Op, MVT::f64, Legal);
setOperationAction(Op, MVT::v2f16, Expand);
setOperationAction(Op, MVT::v2bf16, Expand);
+ setOperationAction(Op, MVT::bf16, Promote);
+ AddPromotedToType(Op, MVT::bf16, MVT::f32);
+ }
+ for (const auto &Op : {ISD::FABS}) {
+ setOperationAction(Op, MVT::f16, Promote);
+ setOperationAction(Op, MVT::f32, Legal);
+ setOperationAction(Op, MVT::f64, Legal);
+ setOperationAction(Op, MVT::v2f16, Expand);
+ setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
+ setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
+ if (getOperationAction(Op, MVT::bf16) == Promote)
+ AddPromotedToType(Op, MVT::bf16, MVT::f32);
}
+
// max.f16, max.f16x2 and max.NaN are supported on sm_80+.
auto GetMinMaxAction = [&](LegalizeAction NotSm80Action) {
bool IsAtLeastSm80 = STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70;
@@ -689,11 +830,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
};
for (const auto &Op : {ISD::FMINNUM, ISD::FMAXNUM}) {
setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Promote), Promote);
- setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
setOperationAction(Op, MVT::f32, Legal);
setOperationAction(Op, MVT::f64, Legal);
setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
+ setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
+ if (getOperationAction(Op, MVT::bf16) == Promote)
+ AddPromotedToType(Op, MVT::bf16, MVT::f32);
}
for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
@@ -817,8 +960,16 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::FUN_SHFR_CLAMP";
case NVPTXISD::IMAD:
return "NVPTXISD::IMAD";
+ case NVPTXISD::BFE:
+ return "NVPTXISD::BFE";
+ case NVPTXISD::BFI:
+ return "NVPTXISD::BFI";
+ case NVPTXISD::PRMT:
+ return "NVPTXISD::PRMT";
case NVPTXISD::SETP_F16X2:
return "NVPTXISD::SETP_F16X2";
+ case NVPTXISD::SETP_BF16X2:
+ return "NVPTXISD::SETP_BF16X2";
case NVPTXISD::Dummy:
return "NVPTXISD::Dummy";
case NVPTXISD::MUL_WIDE_SIGNED:
@@ -1318,7 +1469,7 @@ NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
VT.getScalarType() == MVT::i1)
return TypeSplitVector;
- if (Isv2f16Orv2bf16Type(VT))
+ if (Isv2x16VT(VT))
return TypeLegal;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
@@ -2064,6 +2215,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
return Chain;
}
+SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+
+ DiagnosticInfoUnsupported NoDynamicAlloca(
+ Fn, "dynamic alloca unsupported by NVPTX backend",
+ SDLoc(Op).getDebugLoc());
+ DAG.getContext()->diagnose(NoDynamicAlloca);
+ auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
+ return DAG.getMergeValues(Ops, SDLoc());
+}
+
// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
// (see LegalizeDAG.cpp). This is slow and uses local memory.
// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
@@ -2086,43 +2249,99 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
}
-// We can init constant f16x2 with a single .b32 move. Normally it
+// We can init constant f16x2/v2i16/v4i8 with a single .b32 move. Normally it
// would get lowered as two constant loads and vector-packing move.
-// mov.b16 %h1, 0x4000;
-// mov.b16 %h2, 0x3C00;
-// mov.b32 %hh2, {%h2, %h1};
// Instead we want just a constant move:
-// mov.b32 %hh2, 0x40003C00
-//
-// This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0
-// generates good SASS in both cases.
+// mov.b32 %r2, 0x40003C00
SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- if (!(Isv2f16Orv2bf16Type(Op->getValueType(0)) &&
- isa<ConstantFPSDNode>(Op->getOperand(0)) &&
- isa<ConstantFPSDNode>(Op->getOperand(1))))
+ EVT VT = Op->getValueType(0);
+ if (!(Isv2x16VT(VT) || VT == MVT::v4i8))
return Op;
- APInt E0 =
- cast<ConstantFPSDNode>(Op->getOperand(0))->getValueAPF().bitcastToAPInt();
- APInt E1 =
- cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt();
- SDValue Const =
- DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32);
+ SDLoc DL(Op);
+
+ if (!llvm::all_of(Op->ops(), [](SDValue Operand) {
+ return Operand->isUndef() || isa<ConstantSDNode>(Operand) ||
+ isa<ConstantFPSDNode>(Operand);
+ })) {
+ // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us
+ // to optimize calculation of constant parts.
+ if (VT == MVT::v4i8) {
+ SDValue C8 = DAG.getConstant(8, DL, MVT::i32);
+ SDValue E01 = DAG.getNode(
+ NVPTXISD::BFI, DL, MVT::i32,
+ DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32),
+ DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8);
+ SDValue E012 =
+ DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
+ DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32),
+ E01, DAG.getConstant(16, DL, MVT::i32), C8);
+ SDValue E0123 =
+ DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
+ DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32),
+ E012, DAG.getConstant(24, DL, MVT::i32), C8);
+ return DAG.getNode(ISD::BITCAST, DL, VT, E0123);
+ }
+ return Op;
+ }
+
+ // Get value or the Nth operand as an APInt(32). Undef values treated as 0.
+ auto GetOperand = [](SDValue Op, int N) -> APInt {
+ const SDValue &Operand = Op->getOperand(N);
+ EVT VT = Op->getValueType(0);
+ if (Operand->isUndef())
+ return APInt(32, 0);
+ APInt Value;
+ if (VT == MVT::v2f16 || VT == MVT::v2bf16)
+ Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt();
+ else if (VT == MVT::v2i16 || VT == MVT::v4i8)
+ Value = cast<ConstantSDNode>(Operand)->getAPIntValue();
+ else
+ llvm_unreachable("Unsupported type");
+ // i8 values are carried around as i16, so we need to zero out upper bits,
+ // so they do not get in the way of combining individual byte values
+ if (VT == MVT::v4i8)
+ Value = Value.trunc(8);
+ return Value.zext(32);
+ };
+ APInt Value;
+ if (Isv2x16VT(VT)) {
+ Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(16);
+ } else if (VT == MVT::v4i8) {
+ Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(8) |
+ GetOperand(Op, 2).shl(16) | GetOperand(Op, 3).shl(24);
+ } else {
+ llvm_unreachable("Unsupported type");
+ }
+ SDValue Const = DAG.getConstant(Value, SDLoc(Op), MVT::i32);
return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const);
}
SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDValue Index = Op->getOperand(1);
+ SDValue Vector = Op->getOperand(0);
+ SDLoc DL(Op);
+ EVT VectorVT = Vector.getValueType();
+
+ if (VectorVT == MVT::v4i8) {
+ SDValue BFE =
+ DAG.getNode(NVPTXISD::BFE, DL, MVT::i32,
+ {Vector,
+ DAG.getNode(ISD::MUL, DL, MVT::i32,
+ DAG.getZExtOrTrunc(Index, DL, MVT::i32),
+ DAG.getConstant(8, DL, MVT::i32)),
+ DAG.getConstant(8, DL, MVT::i32)});
+ return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0));
+ }
+
// Constant index will be matched by tablegen.
if (isa<ConstantSDNode>(Index.getNode()))
return Op;
// Extract individual elements and select one of them.
- SDValue Vector = Op->getOperand(0);
- EVT VectorVT = Vector.getValueType();
- assert(VectorVT == MVT::v2f16 && "Unexpected vector type.");
+ assert(Isv2x16VT(VectorVT) && "Unexpected vector type.");
EVT EltVT = VectorVT.getVectorElementType();
SDLoc dl(Op.getNode());
@@ -2134,6 +2353,49 @@ SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
ISD::CondCode::SETEQ);
}
+SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Vector = Op->getOperand(0);
+ EVT VectorVT = Vector.getValueType();
+
+ if (VectorVT != MVT::v4i8)
+ return Op;
+ SDLoc DL(Op);
+ SDValue Value = Op->getOperand(1);
+ if (Value->isUndef())
+ return Vector;
+
+ SDValue Index = Op->getOperand(2);
+
+ SDValue BFI =
+ DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
+ {DAG.getZExtOrTrunc(Value, DL, MVT::i32), Vector,
+ DAG.getNode(ISD::MUL, DL, MVT::i32,
+ DAG.getZExtOrTrunc(Index, DL, MVT::i32),
+ DAG.getConstant(8, DL, MVT::i32)),
+ DAG.getConstant(8, DL, MVT::i32)});
+ return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI);
+}
+
+SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue V1 = Op.getOperand(0);
+ EVT VectorVT = V1.getValueType();
+ if (VectorVT != MVT::v4i8 || Op.getValueType() != MVT::v4i8)
+ return Op;
+
+ // Lower shuffle to PRMT instruction.
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+ SDValue V2 = Op.getOperand(1);
+ uint32_t Selector = 0;
+ for (auto I : llvm::enumerate(SVN->getMask()))
+ Selector |= (I.value() << (I.index() * 4));
+
+ SDLoc DL(Op);
+ return DAG.getNode(NVPTXISD::PRMT, DL, MVT::v4i8, V1, V2,
+ DAG.getConstant(Selector, DL, MVT::i32),
+ DAG.getConstant(NVPTX::PTXPrmtMode::NONE, DL, MVT::i32));
+}
/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
/// amount, or
@@ -2347,7 +2609,56 @@ SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
}
+SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
+
+ if (Op.getValueType() == MVT::bf16) {
+ SDLoc Loc(Op);
+ return DAG.getNode(
+ ISD::FP_ROUND, Loc, MVT::bf16,
+ DAG.getNode(Op.getOpcode(), Loc, MVT::f32, Op.getOperand(0)),
+ DAG.getIntPtrConstant(0, Loc));
+ }
+
+ // Everything else is considered legal.
+ return Op;
+}
+SDValue NVPTXTargetLowering::LowerFP_TO_INT(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
+
+ if (Op.getOperand(0).getValueType() == MVT::bf16) {
+ SDLoc Loc(Op);
+ return DAG.getNode(
+ Op.getOpcode(), Loc, Op.getValueType(),
+ DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, Op.getOperand(0)));
+ }
+
+ // Everything else is considered legal.
+ return Op;
+}
+
+static SDValue LowerVectorArith(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ if (Op.getValueType() != MVT::v2i16)
+ return Op;
+ EVT EltVT = Op.getValueType().getVectorElementType();
+ SmallVector<SDValue> VecElements;
+ for (int I = 0, E = Op.getValueType().getVectorNumElements(); I < E; I++) {
+ SmallVector<SDValue> ScalarArgs;
+ llvm::transform(Op->ops(), std::back_inserter(ScalarArgs),
+ [&](const SDUse &O) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ O.get(), DAG.getIntPtrConstant(I, DL));
+ });
+ VecElements.push_back(DAG.getNode(Op.getOpcode(), DL, EltVT, ScalarArgs));
+ }
+ SDValue V =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, Op.getValueType(), VecElements);
+ return V;
+}
SDValue
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -2366,6 +2677,10 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return Op;
case ISD::EXTRACT_VECTOR_ELT:
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::CONCAT_VECTORS:
return LowerCONCAT_VECTORS(Op, DAG);
case ISD::STORE:
@@ -2381,10 +2696,30 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSelect(Op, DAG);
case ISD::FROUND:
return LowerFROUND(Op, DAG);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG);
case ISD::VAARG:
return LowerVAARG(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG);
+ case ISD::ABS:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SHL:
+ case ISD::SREM:
+ case ISD::UREM:
+ return LowerVectorArith(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
@@ -2468,9 +2803,10 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return LowerLOADi1(Op, DAG);
- // v2f16 is legal, so we can't rely on legalizer to handle unaligned
- // loads and have to handle it here.
- if (Isv2f16Orv2bf16Type(Op.getValueType())) {
+ // v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to handle
+ // unaligned loads and have to handle it here.
+ EVT VT = Op.getValueType();
+ if (Isv2x16VT(VT) || VT == MVT::v4i8) {
LoadSDNode *Load = cast<LoadSDNode>(Op);
EVT MemVT = Load->getMemoryVT();
if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
@@ -2515,13 +2851,13 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// v2f16 is legal, so we can't rely on legalizer to handle unaligned
// stores and have to handle it here.
- if (Isv2f16Orv2bf16Type(VT) &&
+ if ((Isv2x16VT(VT) || VT == MVT::v4i8) &&
!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
VT, *Store->getMemOperand()))
return expandUnalignedStore(Store, DAG);
- // v2f16 and v2bf16 don't need special handling.
- if (VT == MVT::v2f16 || VT == MVT::v2bf16)
+ // v2f16, v2bf16 and v2i16 don't need special handling.
+ if (Isv2x16VT(VT) || VT == MVT::v4i8)
return SDValue();
if (VT.isVector())
@@ -2562,6 +2898,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
case MVT::v4f32:
case MVT::v8f16: // <4 x f16x2>
case MVT::v8bf16: // <4 x bf16x2>
+ case MVT::v8i16: // <4 x i16x2>
// This is a "native" vector type
break;
}
@@ -2606,8 +2943,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// v8f16 is a special case. PTX doesn't have st.v8.f16
// instruction. Instead, we split the vector into v2f16 chunks and
// store them with st.v4.b32.
- assert(Isf16Orbf16Type(EltVT.getSimpleVT()) &&
- "Wrong type for the vector.");
+ assert(Is16bitsType(EltVT.getSimpleVT()) && "Wrong type for the vector.");
Opcode = NVPTXISD::StoreV4;
StoreF16x2 = true;
break;
@@ -2793,7 +3129,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
EVT LoadVT = EltVT;
if (EltVT == MVT::i1)
LoadVT = MVT::i8;
- else if (Isv2f16Orv2bf16Type(EltVT))
+ else if (Isv2x16VT(EltVT) || EltVT == MVT::v4i8)
// getLoad needs a vector type, but it can't handle
// vectors which contain v2f16 or v2bf16 elements. So we must load
// using i32 here and then bitcast back.
@@ -2819,7 +3155,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (EltVT == MVT::i1)
Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
// v2f16 was loaded as an i32. Now we must bitcast it back.
- else if (Isv2f16Orv2bf16Type(EltVT))
+ else if (EltVT != LoadVT)
Elt = DAG.getNode(ISD::BITCAST, dl, EltVT, Elt);
// If a promoted integer type is used, truncate down to the original
@@ -2978,12 +3314,11 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
void NVPTXTargetLowering::LowerAsmOperandForConstraint(
- SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- if (Constraint.length() > 1)
+ if (Constraint.size() > 1)
return;
- else
- TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
@@ -4694,13 +5029,13 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
//===----------------------------------------------------------------------===//
bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
- CodeGenOpt::Level OptLevel) const {
+ CodeGenOptLevel OptLevel) const {
// Always honor command-line argument
if (FMAContractLevelOpt.getNumOccurrences() > 0)
return FMAContractLevelOpt > 0;
// Do not contract if we're not optimizing the code.
- if (OptLevel == 0)
+ if (OptLevel == CodeGenOptLevel::None)
return false;
// Honor TargetOptions flags that explicitly say fusion is okay.
@@ -4724,10 +5059,9 @@ bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
/// operands N0 and N1. This is a helper for PerformADDCombine that is
/// called with the default operands, and if that fails, with commuted
/// operands.
-static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
- TargetLowering::DAGCombinerInfo &DCI,
- const NVPTXSubtarget &Subtarget,
- CodeGenOpt::Level OptLevel) {
+static SDValue PerformADDCombineWithOperands(
+ SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI,
+ const NVPTXSubtarget &Subtarget, CodeGenOptLevel OptLevel) {
SelectionDAG &DAG = DCI.DAG;
// Skip non-integer, non-scalar case
EVT VT=N0.getValueType();
@@ -4742,7 +5076,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
// Since integer multiply-add costs the same as integer multiply
// but is more costly than integer add, do the fusion only when
// the mul is only used in the add.
- if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
+ if (OptLevel == CodeGenOptLevel::None || VT != MVT::i32 ||
!N0.getNode()->hasOneUse())
return SDValue();
@@ -4839,7 +5173,7 @@ static SDValue PerformStoreRetvalCombine(SDNode *N) {
static SDValue PerformADDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const NVPTXSubtarget &Subtarget,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4867,6 +5201,32 @@ static SDValue PerformANDCombine(SDNode *N,
}
SDValue AExt;
+
+ // Convert BFE-> truncate i16 -> and 255
+ // To just BFE-> truncate i16, as the value already has all the bits in the
+ // right places.
+ if (Val.getOpcode() == ISD::TRUNCATE) {
+ SDValue BFE = Val.getOperand(0);
+ if (BFE.getOpcode() != NVPTXISD::BFE)
+ return SDValue();
+
+ ConstantSDNode *BFEBits = dyn_cast<ConstantSDNode>(BFE.getOperand(0));
+ if (!BFEBits)
+ return SDValue();
+ uint64_t BFEBitsVal = BFEBits->getZExtValue();
+
+ ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
+ if (!MaskCnst) {
+ // Not an AND with a constant
+ return SDValue();
+ }
+ uint64_t MaskVal = MaskCnst->getZExtValue();
+
+ if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1)
+ return SDValue();
+ // If we get here, the AND is unnecessary. Just replace it with the trunc
+ DCI.CombineTo(N, Val, false);
+ }
// Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
if (Val.getOpcode() == ISD::ANY_EXTEND) {
AExt = Val;
@@ -4929,11 +5289,11 @@ static SDValue PerformANDCombine(SDNode *N,
static SDValue PerformREMCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
// Don't do anything at less than -O2.
- if (OptLevel < CodeGenOpt::Default)
+ if (OptLevel < CodeGenOptLevel::Default)
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -5099,8 +5459,8 @@ static SDValue TryMULWIDECombine(SDNode *N,
/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
static SDValue PerformMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
- CodeGenOpt::Level OptLevel) {
- if (OptLevel > 0) {
+ CodeGenOptLevel OptLevel) {
+ if (OptLevel > CodeGenOptLevel::None) {
// Try mul.wide combining at OptLevel > 0
if (SDValue Ret = TryMULWIDECombine(N, DCI))
return Ret;
@@ -5112,8 +5472,8 @@ static SDValue PerformMULCombine(SDNode *N,
/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
static SDValue PerformSHLCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
- CodeGenOpt::Level OptLevel) {
- if (OptLevel > 0) {
+ CodeGenOptLevel OptLevel) {
+ if (OptLevel > CodeGenOptLevel::None) {
// Try mul.wide combining at OptLevel > 0
if (SDValue Ret = TryMULWIDECombine(N, DCI))
return Ret;
@@ -5123,12 +5483,17 @@ static SDValue PerformSHLCombine(SDNode *N,
}
static SDValue PerformSETCCCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ unsigned int SmVersion) {
EVT CCType = N->getValueType(0);
SDValue A = N->getOperand(0);
SDValue B = N->getOperand(1);
- if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16)
+ EVT AType = A.getValueType();
+ if (!(CCType == MVT::v2i1 && (AType == MVT::v2f16 || AType == MVT::v2bf16)))
+ return SDValue();
+
+ if (A.getValueType() == MVT::v2bf16 && SmVersion < 90)
return SDValue();
SDLoc DL(N);
@@ -5136,16 +5501,133 @@ static SDValue PerformSETCCCombine(SDNode *N,
// convert back to v2i1. The returned result will be scalarized by
// the legalizer, but the comparison will remain a single vector
// instruction.
- SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL,
- DCI.DAG.getVTList(MVT::i1, MVT::i1),
- {A, B, N->getOperand(2)});
+ SDValue CCNode = DCI.DAG.getNode(
+ A.getValueType() == MVT::v2f16 ? NVPTXISD::SETP_F16X2
+ : NVPTXISD::SETP_BF16X2,
+ DL, DCI.DAG.getVTList(MVT::i1, MVT::i1), {A, B, N->getOperand(2)});
return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
CCNode.getValue(1));
}
+static SDValue PerformEXTRACTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Vector = N->getOperand(0);
+ SDLoc DL(N);
+ EVT VectorVT = Vector.getValueType();
+ if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() &&
+ IsPTXVectorType(VectorVT.getSimpleVT()))
+ return SDValue(); // Native vector loads already combine nicely w/
+ // extract_vector_elt, except for v4i8.
+ // Don't mess with singletons or v2*16 types, we already handle them OK.
+ if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) ||
+ VectorVT == MVT::v4i8)
+ return SDValue();
+
+ uint64_t VectorBits = VectorVT.getSizeInBits();
+ // We only handle the types we can extract in-register.
+ if (!(VectorBits == 16 || VectorBits == 32 || VectorBits == 64))
+ return SDValue();
+
+ ConstantSDNode *Index = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ // Index == 0 is handled by generic DAG combiner.
+ if (!Index || Index->getZExtValue() == 0)
+ return SDValue();
+
+ MVT IVT = MVT::getIntegerVT(VectorBits);
+ EVT EltVT = VectorVT.getVectorElementType();
+ EVT EltIVT = EltVT.changeTypeToInteger();
+ uint64_t EltBits = EltVT.getScalarSizeInBits();
+
+ SDValue Result = DCI.DAG.getNode(
+ ISD::TRUNCATE, DL, EltIVT,
+ DCI.DAG.getNode(
+ ISD::SRA, DL, IVT, DCI.DAG.getNode(ISD::BITCAST, DL, IVT, Vector),
+ DCI.DAG.getConstant(Index->getZExtValue() * EltBits, DL, IVT)));
+
+ // If element has non-integer type, bitcast it back to the expected type.
+ if (EltVT != EltIVT)
+ Result = DCI.DAG.getNode(ISD::BITCAST, DL, EltVT, Result);
+ // Past legalizer, we may need to extent i8 -> i16 to match the register type.
+ if (EltVT != N->getValueType(0))
+ Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result);
+
+ return Result;
+}
+
+static SDValue PerformVSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue VA = N->getOperand(1);
+ EVT VectorVT = VA.getValueType();
+ if (VectorVT != MVT::v4i8)
+ return SDValue();
+
+ // We need to split vselect into individual per-element operations Because we
+ // use BFE/BFI instruction for byte extraction/insertion, we do end up with
+ // 32-bit values, so we may as well do comparison as i32 to avoid conversions
+ // to/from i16 normally used for i8 values.
+ SmallVector<SDValue, 4> E;
+ SDLoc DL(N);
+ SDValue VCond = N->getOperand(0);
+ SDValue VB = N->getOperand(2);
+ for (int I = 0; I < 4; ++I) {
+ SDValue C = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i1, VCond,
+ DCI.DAG.getConstant(I, DL, MVT::i32));
+ SDValue EA = DCI.DAG.getAnyExtOrTrunc(
+ DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VA,
+ DCI.DAG.getConstant(I, DL, MVT::i32)),
+ DL, MVT::i32);
+ SDValue EB = DCI.DAG.getAnyExtOrTrunc(
+ DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VB,
+ DCI.DAG.getConstant(I, DL, MVT::i32)),
+ DL, MVT::i32);
+ E.push_back(DCI.DAG.getAnyExtOrTrunc(
+ DCI.DAG.getNode(ISD::SELECT, DL, MVT::i32, C, EA, EB), DL, MVT::i8));
+ }
+ return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i8, E);
+}
+
+static SDValue PerformLOADCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+
+ // Lower a v16i8 load into a LoadV4 operation with i32 results instead of
+ // letting ReplaceLoadVector split it into smaller loads during legalization.
+ // This is done at dag-combine1 time, so that vector operations with i8
+ // elements can be optimised away instead of being needlessly split during
+ // legalization, which involves storing to the stack and loading it back.
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v16i8)
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // Create a v4i32 vector load operation, effectively <4 x v4i8>.
+ unsigned Opc = NVPTXISD::LoadV4;
+ EVT NewVT = MVT::v4i32;
+ EVT EltVT = NewVT.getVectorElementType();
+ unsigned NumElts = NewVT.getVectorNumElements();
+ EVT RetVTs[] = {EltVT, EltVT, EltVT, EltVT, MVT::Other};
+ SDVTList RetVTList = DAG.getVTList(RetVTs);
+ SmallVector<SDValue, 8> Ops(N->ops());
+ Ops.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
+ SDValue NewLoad = DAG.getMemIntrinsicNode(Opc, DL, RetVTList, Ops, NewVT,
+ LD->getMemOperand());
+ SDValue NewChain = NewLoad.getValue(NumElts);
+
+ // Create a vector of the same type returned by the original load.
+ SmallVector<SDValue, 4> Elts;
+ for (unsigned i = 0; i < NumElts; i++)
+ Elts.push_back(NewLoad.getValue(i));
+ return DCI.DAG.getMergeValues(
+ {DCI.DAG.getBitcast(VT, DCI.DAG.getBuildVector(NewVT, DL, Elts)),
+ NewChain},
+ DL);
+}
+
SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
+ CodeGenOptLevel OptLevel = getTargetMachine().getOptLevel();
switch (N->getOpcode()) {
default: break;
case ISD::ADD:
@@ -5161,11 +5643,17 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SREM:
return PerformREMCombine(N, DCI, OptLevel);
case ISD::SETCC:
- return PerformSETCCCombine(N, DCI);
+ return PerformSETCCCombine(N, DCI, STI.getSmVersion());
+ case ISD::LOAD:
+ return PerformLOADCombine(N, DCI);
case NVPTXISD::StoreRetval:
case NVPTXISD::StoreRetvalV2:
case NVPTXISD::StoreRetvalV4:
return PerformStoreRetvalCombine(N);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return PerformEXTRACTCombine(N, DCI);
+ case ISD::VSELECT:
+ return PerformVSELECTCombine(N, DCI);
}
return SDValue();
}
@@ -5197,7 +5685,9 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
case MVT::v4i32:
case MVT::v4f16:
case MVT::v4f32:
- case MVT::v8f16: // <4 x f16x2>
+ case MVT::v8f16: // <4 x f16x2>
+ case MVT::v8bf16: // <4 x bf16x2>
+ case MVT::v8i16: // <4 x i16x2>
// This is a "native" vector type
break;
}
@@ -5231,7 +5721,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
unsigned Opcode = 0;
SDVTList LdResVTs;
- bool LoadF16x2 = false;
+ bool Load16x2 = false;
switch (NumElts) {
default:
@@ -5250,11 +5740,23 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
// v8f16 is a special case. PTX doesn't have ld.v8.f16
// instruction. Instead, we split the vector into v2f16 chunks and
// load them with ld.v4.b32.
- assert(Isf16Orbf16Type(EltVT.getSimpleVT()) &&
- "Unsupported v8 vector type.");
- LoadF16x2 = true;
+ assert(Is16bitsType(EltVT.getSimpleVT()) && "Unsupported v8 vector type.");
+ Load16x2 = true;
Opcode = NVPTXISD::LoadV4;
- EVT VVT = (EltVT == MVT::f16) ? MVT::v2f16 : MVT::v2bf16;
+ EVT VVT;
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ VVT = MVT::v2f16;
+ break;
+ case MVT::bf16:
+ VVT = MVT::v2bf16;
+ break;
+ case MVT::i16:
+ VVT = MVT::v2i16;
+ break;
+ default:
+ llvm_unreachable("Unsupported v8 vector type.");
+ }
EVT ListVTs[] = {VVT, VVT, VVT, VVT, MVT::Other};
LdResVTs = DAG.getVTList(ListVTs);
break;
@@ -5273,7 +5775,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LD->getMemOperand());
SmallVector<SDValue, 8> ScalarRes;
- if (LoadF16x2) {
+ if (Load16x2) {
// Split v2f16 subvectors back into individual elements.
NumElts /= 2;
for (unsigned i = 0; i < NumElts; ++i) {
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index ccd80359bf80..06adc0c47f05 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -57,6 +57,10 @@ enum NodeType : unsigned {
MUL_WIDE_UNSIGNED,
IMAD,
SETP_F16X2,
+ SETP_BF16X2,
+ BFE,
+ BFI,
+ PRMT,
Dummy,
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -509,6 +513,8 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+
std::string
getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment,
@@ -520,7 +526,7 @@ public:
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
SelectionDAG &DAG) const override;
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
@@ -556,7 +562,7 @@ public:
unsigned combineRepeatedFPDivisors() const override { return 2; }
- bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
+ bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
bool allowUnsafeFPMath(MachineFunction &MF) const;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
@@ -583,6 +589,12 @@ public:
AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
+ // There's rarely any point of packing something into a vector type if we
+ // already have the source data.
+ return true;
+ }
+
private:
const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
@@ -590,11 +602,16 @@ private:
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
@@ -617,6 +634,7 @@ private:
Align getArgumentAlignment(SDValue Callee, const CallBase *CB, Type *Ty,
unsigned Idx, const DataLayout &DL) const;
};
+
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index b98f76ed4b38..13665985f52e 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -12,11 +12,6 @@
include "NVPTXInstrFormats.td"
-// A NOP instruction
-let hasSideEffects = false in {
- def NOP : NVPTXInst<(outs), (ins), "", []>;
-}
-
let OperandType = "OPERAND_IMMEDIATE" in {
def f16imm : Operand<f16>;
def bf16imm : Operand<bf16>;
@@ -81,6 +76,10 @@ def CmpLT : PatLeaf<(i32 2)>;
def CmpLE : PatLeaf<(i32 3)>;
def CmpGT : PatLeaf<(i32 4)>;
def CmpGE : PatLeaf<(i32 5)>;
+def CmpLO : PatLeaf<(i32 6)>;
+def CmpLS : PatLeaf<(i32 7)>;
+def CmpHI : PatLeaf<(i32 8)>;
+def CmpHS : PatLeaf<(i32 9)>;
def CmpEQU : PatLeaf<(i32 10)>;
def CmpNEU : PatLeaf<(i32 11)>;
def CmpLTU : PatLeaf<(i32 12)>;
@@ -112,6 +111,21 @@ def VecElement : Operand<i32> {
let PrintMethod = "printVecElement";
}
+// PRMT modes
+// These must match the enum in NVPTX.h
+def PrmtNONE : PatLeaf<(i32 0x0)>;
+def PrmtF4E : PatLeaf<(i32 0x1)>;
+def PrmtB4E : PatLeaf<(i32 0x2)>;
+def PrmtRC8 : PatLeaf<(i32 0x3)>;
+def PrmtECL : PatLeaf<(i32 0x4)>;
+def PrmtECR : PatLeaf<(i32 0x5)>;
+def PrmtRC16 : PatLeaf<(i32 0x6)>;
+
+def PrmtMode : Operand<i32> {
+ let PrintMethod = "printPrmtMode";
+}
+
+
//===----------------------------------------------------------------------===//
// NVPTX Instruction Predicate Definitions
//===----------------------------------------------------------------------===//
@@ -165,6 +179,7 @@ class ValueToRegClass<ValueType T> {
NVPTXRegClass ret = !cond(
!eq(name, "i1"): Int1Regs,
!eq(name, "i16"): Int16Regs,
+ !eq(name, "v2i16"): Int32Regs,
!eq(name, "i32"): Int32Regs,
!eq(name, "i64"): Int64Regs,
!eq(name, "f16"): Int16Regs,
@@ -199,11 +214,11 @@ multiclass I3<string OpcStr, SDNode OpNode> {
def i32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
def i32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>;
def i16rr :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
@@ -214,6 +229,12 @@ multiclass I3<string OpcStr, SDNode OpNode> {
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
}
+class I16x2<string OpcStr, SDNode OpNode> :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "16x2 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)))]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+
// Template for instructions which take 3 int args. The instructions are
// named "<OpcStr>.s32" (e.g. "addc.cc.s32").
multiclass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> {
@@ -221,11 +242,11 @@ multiclass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> {
def i32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
def i32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>;
def i64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
!strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
@@ -534,6 +555,34 @@ multiclass F2<string OpcStr, SDNode OpNode> {
[(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
}
+multiclass F2_Support_Half<string OpcStr, SDNode OpNode> {
+ def bf16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
+ !strconcat(OpcStr, ".bf16 \t$dst, $a;"),
+ [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a)))]>,
+ Requires<[hasSM<80>, hasPTX<70>]>;
+ def bf16x2 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ !strconcat(OpcStr, ".bf16x2 \t$dst, $a;"),
+ [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a)))]>,
+ Requires<[hasSM<80>, hasPTX<70>]>;
+ def f16_ftz : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
+ !strconcat(OpcStr, ".ftz.f16 \t$dst, $a;"),
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a)))]>,
+ Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
+ def f16x2_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a;"),
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a)))]>,
+ Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
+ def f16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
+ !strconcat(OpcStr, ".f16 \t$dst, $a;"),
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a)))]>,
+ Requires<[hasSM<53>, hasPTX<65>]>;
+ def f16x2 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ !strconcat(OpcStr, ".f16x2 \t$dst, $a;"),
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a)))]>,
+ Requires<[hasSM<53>, hasPTX<65>]>;
+
+}
+
//===----------------------------------------------------------------------===//
// NVPTX Instructions.
//===----------------------------------------------------------------------===//
@@ -740,12 +789,10 @@ defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>;
// def v2f16imm : Operand<v2f16>;
// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>;
-def SELP_f16x2rr :
- NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
- "selp.b32 \t$dst, $a, $b, $p;",
- [(set Int32Regs:$dst,
- (select Int1Regs:$p, (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>;
+foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
+def : Pat<(vt (select Int1Regs:$p, (vt Int32Regs:$a), (vt Int32Regs:$b))),
+ (SELP_b32rr Int32Regs:$a, Int32Regs:$b, Int1Regs:$p)>;
+}
//-----------------------------------
// Test Instructions
@@ -787,6 +834,9 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
defm ADD : I3<"add.s", add>;
defm SUB : I3<"sub.s", sub>;
+def ADD16x2 : I16x2<"add.s", add>;
+def SUB16x2 : I16x2<"sub.s", sub>;
+
// in32 and int64 addition and subtraction with carry-out.
defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>;
defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
@@ -811,14 +861,14 @@ defm UREM : I3<"rem.u", urem>;
// Integer absolute value. NumBits should be one minus the bit width of RC.
// This idiom implements the algorithm at
// http://graphics.stanford.edu/~seander/bithacks.html#IntegerAbs.
-multiclass ABS<RegisterClass RC, string SizeName> {
+multiclass ABS<ValueType T, RegisterClass RC, string SizeName> {
def : NVPTXInst<(outs RC:$dst), (ins RC:$a),
!strconcat("abs", SizeName, " \t$dst, $a;"),
- [(set RC:$dst, (abs RC:$a))]>;
+ [(set (T RC:$dst), (abs (T RC:$a)))]>;
}
-defm ABS_16 : ABS<Int16Regs, ".s16">;
-defm ABS_32 : ABS<Int32Regs, ".s32">;
-defm ABS_64 : ABS<Int64Regs, ".s64">;
+defm ABS_16 : ABS<i16, Int16Regs, ".s16">;
+defm ABS_32 : ABS<i32, Int32Regs, ".s32">;
+defm ABS_64 : ABS<i64, Int64Regs, ".s64">;
// Integer min/max.
defm SMAX : I3<"max.s", smax>;
@@ -826,6 +876,12 @@ defm UMAX : I3<"max.u", umax>;
defm SMIN : I3<"min.s", smin>;
defm UMIN : I3<"min.u", umin>;
+def SMAX16x2 : I16x2<"max.s", smax>;
+def UMAX16x2 : I16x2<"max.u", umax>;
+def SMIN16x2 : I16x2<"min.s", smin>;
+def UMIN16x2 : I16x2<"min.u", umin>;
+
+
//
// Wide multiplication
//
@@ -890,13 +946,13 @@ def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)),
(MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
+def : Pat<(i64 (mul_wide_signed (i32 Int32Regs:$a), imm:$b)),
(MULWIDES64Imm Int32Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)),
(MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
+def : Pat<(i64 (mul_wide_unsigned (i32 Int32Regs:$a), imm:$b)),
(MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
@@ -1022,22 +1078,22 @@ def MAD32rrr :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
+ [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>;
def MAD32rri :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
+ [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), imm:$c))]>;
def MAD32rir :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
+ [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, (i32 Int32Regs:$c)))]>;
def MAD32rii :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, i32imm:$b, i32imm:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, imm:$c))]>;
+ [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, imm:$c))]>;
def MAD64rrr :
NVPTXInst<(outs Int64Regs:$dst),
@@ -1067,7 +1123,7 @@ def INEG16 :
def INEG32 :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
"neg.s32 \t$dst, $src;",
- [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
+ [(set (i32 Int32Regs:$dst), (ineg (i32 Int32Regs:$src)))]>;
def INEG64 :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"neg.s64 \t$dst, $src;",
@@ -1111,6 +1167,9 @@ defm FMAXNAN : F3<"max.NaN", fmaximum>;
defm FABS : F2<"abs", fabs>;
defm FNEG : F2<"neg", fneg>;
+defm FABS_H: F2_Support_Half<"abs", fabs>;
+defm FNEG_H: F2_Support_Half<"neg", fneg>;
+
defm FSQRT : F2<"sqrt.rn", fsqrt>;
//
@@ -1458,11 +1517,11 @@ multiclass BITWISE<string OpcStr, SDNode OpNode> {
def b32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
def b32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>;
def b64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
!strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
@@ -1477,6 +1536,25 @@ defm OR : BITWISE<"or", or>;
defm AND : BITWISE<"and", and>;
defm XOR : BITWISE<"xor", xor>;
+// Lower logical v2i16/v4i8 ops as bitwise ops on b32.
+foreach vt = [v2i16, v4i8] in {
+ def: Pat<(or (vt Int32Regs:$a), (vt Int32Regs:$b)),
+ (ORb32rr Int32Regs:$a, Int32Regs:$b)>;
+ def: Pat<(xor (vt Int32Regs:$a), (vt Int32Regs:$b)),
+ (XORb32rr Int32Regs:$a, Int32Regs:$b)>;
+ def: Pat<(and (vt Int32Regs:$a), (vt Int32Regs:$b)),
+ (ANDb32rr Int32Regs:$a, Int32Regs:$b)>;
+
+ // The constants get legalized into a bitcast from i32, so that's what we need
+ // to match here.
+ def: Pat<(or Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))),
+ (ORb32ri Int32Regs:$a, imm:$b)>;
+ def: Pat<(xor Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))),
+ (XORb32ri Int32Regs:$a, imm:$b)>;
+ def: Pat<(and Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))),
+ (ANDb32ri Int32Regs:$a, imm:$b)>;
+}
+
def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
"not.pred \t$dst, $src;",
[(set Int1Regs:$dst, (not Int1Regs:$src))]>;
@@ -1485,7 +1563,7 @@ def NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
[(set Int16Regs:$dst, (not Int16Regs:$src))]>;
def NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
"not.b32 \t$dst, $src;",
- [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
+ [(set (i32 Int32Regs:$dst), (not (i32 Int32Regs:$src)))]>;
def NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"not.b64 \t$dst, $src;",
[(set Int64Regs:$dst, (not Int64Regs:$src))]>;
@@ -1499,7 +1577,7 @@ multiclass SHIFT<string OpcStr, SDNode OpNode> {
def i64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int32Regs:$b))]>;
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 Int32Regs:$b)))]>;
def i64ri :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
!strconcat(OpcStr, "64 \t$dst, $a, $b;"),
@@ -1507,11 +1585,11 @@ multiclass SHIFT<string OpcStr, SDNode OpNode> {
def i32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
def i32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, (i32 imm:$b)))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 imm:$b)))]>;
def i32ii :
NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
@@ -1519,7 +1597,7 @@ multiclass SHIFT<string OpcStr, SDNode OpNode> {
def i16rr :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int32Regs:$b))]>;
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 Int32Regs:$b)))]>;
def i16ri :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
@@ -1534,7 +1612,7 @@ defm SRL : SHIFT<"shr.u", srl>;
def BREV32 :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
"brev.b32 \t$dst, $a;",
- [(set Int32Regs:$dst, (bitreverse Int32Regs:$a))]>;
+ [(set Int32Regs:$dst, (bitreverse (i32 Int32Regs:$a)))]>;
def BREV64 :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a),
"brev.b64 \t$dst, $a;",
@@ -1550,13 +1628,13 @@ def BREV64 :
def ROTL32imm_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
"shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
+ [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 imm:$amt)))]>,
Requires<[hasHWROT32]>;
def ROTL32reg_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
"shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
Requires<[hasHWROT32]>;
// 32 bit r2 = rotr r1, n
@@ -1565,13 +1643,13 @@ def ROTL32reg_hw :
def ROTR32imm_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
"shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
+ [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 imm:$amt)))]>,
Requires<[hasHWROT32]>;
def ROTR32reg_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
"shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
Requires<[hasHWROT32]>;
// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1.
@@ -1591,10 +1669,10 @@ def SUB_FRM_32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
-def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
+def : Pat<(rotl (i32 Int32Regs:$src), (i32 imm:$amt)),
(ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
Requires<[noHWROT32]>;
-def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
+def : Pat<(rotr (i32 Int32Regs:$src), (i32 imm:$amt)),
(ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
Requires<[noHWROT32]>;
@@ -1610,7 +1688,7 @@ def ROTL32reg_sw :
"shr.b32 \t%rhs, $src, %amt2;\n\t"
"add.u32 \t$dst, %lhs, %rhs;\n\t"
"}}",
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
Requires<[noHWROT32]>;
// 32-bit software rotate right by register.
@@ -1625,7 +1703,7 @@ def ROTR32reg_sw :
"shl.b32 \t%rhs, $src, %amt2;\n\t"
"add.u32 \t$dst, %lhs, %rhs;\n\t"
"}}",
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
Requires<[noHWROT32]>;
// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1.
@@ -1662,7 +1740,7 @@ def ROTL64reg_sw :
"shr.b64 \t%rhs, $src, %amt2;\n\t"
"add.u64 \t$dst, %lhs, %rhs;\n\t"
"}}",
- [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
+ [(set Int64Regs:$dst, (rotl Int64Regs:$src, (i32 Int32Regs:$amt)))]>;
def ROTR64reg_sw :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt),
@@ -1675,7 +1753,7 @@ def ROTR64reg_sw :
"shl.b64 \t%rhs, $src, %amt2;\n\t"
"add.u64 \t$dst, %lhs, %rhs;\n\t"
"}}",
- [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
+ [(set Int64Regs:$dst, (rotr Int64Regs:$src, (i32 Int32Regs:$amt)))]>;
//
// Funnnel shift in clamp mode
@@ -1691,47 +1769,153 @@ def FUNSHFLCLAMP :
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
"shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
[(set Int32Regs:$dst,
- (FUN_SHFL_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>;
+ (FUN_SHFL_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>;
def FUNSHFRCLAMP :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
"shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
[(set Int32Regs:$dst,
- (FUN_SHFR_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>;
+ (FUN_SHFR_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>;
//
// BFE - bit-field extract
//
-// Template for BFE instructions. Takes four args,
-// [dest (reg), src (reg), start (reg or imm), end (reg or imm)].
+// Template for BFE/BFI instructions.
+// Args: [dest (reg), src (reg), start (reg or imm), end (reg or imm)].
// Start may be an imm only if end is also an imm. FIXME: Is this a
// restriction in PTX?
//
// dest and src may be int32 or int64, but start and end are always int32.
-multiclass BFE<string TyStr, RegisterClass RC> {
+def SDTBFE :
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def bfe : SDNode<"NVPTXISD::BFE", SDTBFE>;
+
+def SDTBFI :
+ SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
+def bfi : SDNode<"NVPTXISD::BFI", SDTBFI>;
+
+def SDTPRMT :
+ SDTypeProfile<1, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>, SDTCisVT<4, i32>,]>;
+def prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>;
+
+multiclass BFE<string Instr, ValueType T, RegisterClass RC> {
def rrr
: NVPTXInst<(outs RC:$d),
(ins RC:$a, Int32Regs:$b, Int32Regs:$c),
- !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
+ !strconcat(Instr, " \t$d, $a, $b, $c;"),
+ [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>;
def rri
: NVPTXInst<(outs RC:$d),
(ins RC:$a, Int32Regs:$b, i32imm:$c),
- !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
+ !strconcat(Instr, " \t$d, $a, $b, $c;"),
+ [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 imm:$c)))]>;
def rii
: NVPTXInst<(outs RC:$d),
(ins RC:$a, i32imm:$b, i32imm:$c),
- !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
+ !strconcat(Instr, " \t$d, $a, $b, $c;"),
+ [(set (T RC:$d), (bfe (T RC:$a), (i32 imm:$b), (i32 imm:$c)))]>;
+}
+
+multiclass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> {
+ def rrrr
+ : NVPTXInst<(outs RC:$f),
+ (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d),
+ !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
+ [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>;
+ def rrri
+ : NVPTXInst<(outs RC:$f),
+ (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d),
+ !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
+ [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>;
+ def rrii
+ : NVPTXInst<(outs RC:$f),
+ (ins RC:$a, RC:$b, i32imm:$c, i32imm:$d),
+ !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
+ [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>;
+ def irrr
+ : NVPTXInst<(outs RC:$f),
+ (ins ImmCls:$a, RC:$b, Int32Regs:$c, Int32Regs:$d),
+ !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
+ [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>;
+ def irri
+ : NVPTXInst<(outs RC:$f),
+ (ins ImmCls:$a, RC:$b, Int32Regs:$c, i32imm:$d),
+ !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
+ [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>;
+ def irii
+ : NVPTXInst<(outs RC:$f),
+ (ins ImmCls:$a, RC:$b, i32imm:$c, i32imm:$d),
+ !strconcat(Instr, " \t$f, $a, $b, $c, $d;"),
+ [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>;
+}
+
+multiclass PRMT<ValueType T, RegisterClass RC> {
+ def rrr
+ : NVPTXInst<(outs RC:$d),
+ (ins RC:$a, Int32Regs:$b, Int32Regs:$c, PrmtMode:$mode),
+ !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
+ [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>;
+ def rri
+ : NVPTXInst<(outs RC:$d),
+ (ins RC:$a, Int32Regs:$b, i32imm:$c, PrmtMode:$mode),
+ !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
+ [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>;
+ def rii
+ : NVPTXInst<(outs RC:$d),
+ (ins RC:$a, i32imm:$b, i32imm:$c, PrmtMode:$mode),
+ !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
+ [(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>;
}
let hasSideEffects = false in {
- defm BFE_S32 : BFE<"s32", Int32Regs>;
- defm BFE_U32 : BFE<"u32", Int32Regs>;
- defm BFE_S64 : BFE<"s64", Int64Regs>;
- defm BFE_U64 : BFE<"u64", Int64Regs>;
+ defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>;
+ defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>;
+ defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>;
+ defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>;
+
+ defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>;
+ defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>;
+
+ defm PRMT_B32 : PRMT<i32, Int32Regs>;
}
+
+// byte extraction + signed/unsigned extension to i32.
+def : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), i8)),
+ (BFE_S32rri Int32Regs:$s, Int32Regs:$o, 8)>;
+def : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), i8)),
+ (BFE_S32rii Int32Regs:$s, imm:$o, 8)>;
+def : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), 255)),
+ (BFE_U32rri Int32Regs:$s, Int32Regs:$o, 8)>;
+def : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), 255)),
+ (BFE_U32rii Int32Regs:$s, imm:$o, 8)>;
+
+// byte extraction + signed extension to i16
+def : Pat<(i16 (sext_inreg (trunc (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8)), i8)),
+ (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>;
+
+
+// Byte extraction via shift/trunc/sext
+def : Pat<(i16 (sext_inreg (trunc Int32Regs:$s), i8)),
+ (CVT_s8_s32 Int32Regs:$s, CvtNONE)>;
+def : Pat<(i16 (sext_inreg (trunc (srl (i32 Int32Regs:$s), (i32 imm:$o))), i8)),
+ (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>;
+def : Pat<(sext_inreg (srl (i32 Int32Regs:$s), (i32 imm:$o)), i8),
+ (BFE_S32rii Int32Regs:$s, imm:$o, 8)>;
+def : Pat<(i16 (sra (i16 (trunc Int32Regs:$s)), (i32 8))),
+ (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, 8, 8), CvtNONE)>;
+def : Pat<(sext_inreg (srl (i64 Int64Regs:$s), (i32 imm:$o)), i8),
+ (BFE_S64rii Int64Regs:$s, imm:$o, 8)>;
+def : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)),
+ (CVT_s8_s64 Int64Regs:$s, CvtNONE)>;
+def : Pat<(i16 (sext_inreg (trunc (srl (i64 Int64Regs:$s), (i32 imm:$o))), i8)),
+ (CVT_s8_s64 (BFE_S64rii Int64Regs:$s, imm:$o, 8), CvtNONE)>;
+
//-----------------------------------
// Comparison instructions (setp, set)
//-----------------------------------
@@ -1783,14 +1967,14 @@ def SETP_bf16rr :
NVPTXInst<(outs Int1Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.bf16 \t$dst, $a, $b;",
- []>, Requires<[hasBF16Math]>;
+ []>, Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
def SETP_bf16x2rr :
NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
(ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.bf16x2 \t$p|$q, $a, $b;",
[]>,
- Requires<[hasBF16Math]>;
+ Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form
@@ -1821,7 +2005,7 @@ defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
defm SET_f16 : SET<"f16", Int16Regs, f16imm>;
-defm SET_bf16 : SET<"bf16", Int16Regs, bf16imm>;
+defm SET_bf16 : SET<"bf16", Int16Regs, bf16imm>, Requires<[hasPTX<78>, hasSM<90>]>;
defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
@@ -1915,7 +2099,7 @@ def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
[(set Int16Regs:$dst, imm:$src)]>;
def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
"mov.u32 \t$dst, $src;",
- [(set Int32Regs:$dst, imm:$src)]>;
+ [(set (i32 Int32Regs:$dst), imm:$src)]>;
def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
"mov.u64 \t$dst, $src;",
[(set Int64Regs:$dst, imm:$src)]>;
@@ -1978,9 +2162,9 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
// i32 -> pred
def : Pat<(i1 (OpNode i32:$a, i32:$b)),
(setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
- def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)),
+ def : Pat<(i1 (OpNode (i32 Int32Regs:$a), imm:$b)),
(setp_32ri Int32Regs:$a, imm:$b, Mode)>;
- def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)),
+ def : Pat<(i1 (OpNode imm:$a, (i32 Int32Regs:$b))),
(setp_32ir imm:$a, Int32Regs:$b, Mode)>;
// i64 -> pred
def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)),
@@ -2000,9 +2184,9 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
// i32 -> i32
def : Pat<(i32 (OpNode i32:$a, i32:$b)),
(set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
- def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)),
+ def : Pat<(i32 (OpNode (i32 Int32Regs:$a), imm:$b)),
(set_32ri Int32Regs:$a, imm:$b, Mode)>;
- def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)),
+ def : Pat<(i32 (OpNode imm:$a, (i32 Int32Regs:$b))),
(set_32ir imm:$a, Int32Regs:$b, Mode)>;
// i64 -> i32
def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)),
@@ -2061,6 +2245,29 @@ def : Pat<(seteq Int1Regs:$a, Int1Regs:$b),
def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
(NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
+// comparisons of i8 extracted with BFE as i32
+def: Pat<(setgt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
+ (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGT)>;
+def: Pat<(setge (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
+ (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGE)>;
+def: Pat<(setlt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
+ (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLT)>;
+def: Pat<(setle (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
+ (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLE)>;
+
+def: Pat<(setugt (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
+ (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHI)>;
+def: Pat<(setuge (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
+ (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHS)>;
+def: Pat<(setult (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
+ (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLO)>;
+def: Pat<(setule (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
+ (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLS)>;
+def: Pat<(seteq (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
+ (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpEQ)>;
+def: Pat<(setne (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
+ (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpNE)>;
+
// i1 compare -> i32
def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
(SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
@@ -2633,7 +2840,7 @@ foreach vt = [f16, bf16] in {
def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 Int16Regs:$src)>;
}
-foreach vt = [v2f16, v2bf16] in {
+foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI32 Int32Regs:$src)>;
}
@@ -2929,15 +3136,11 @@ def BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>;
def BITCONVERT_64_I2F : F_BITCONVERT<"64", i64, f64>;
def BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>;
-foreach vt = [v2f16, v2bf16] in {
-def: Pat<(vt (bitconvert (i32 UInt32Const:$a))),
- (IMOVB32ri UInt32Const:$a)>;
-def: Pat<(vt (bitconvert (i32 Int32Regs:$a))),
- (ProxyRegI32 Int32Regs:$a)>;
-def: Pat<(i32 (bitconvert (vt Int32Regs:$a))),
- (ProxyRegI32 Int32Regs:$a)>;
+foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
def: Pat<(vt (bitconvert (f32 Float32Regs:$a))),
(BITCONVERT_32_F2I Float32Regs:$a)>;
+def: Pat<(f32 (bitconvert (vt Int32Regs:$a))),
+ (BITCONVERT_32_I2F Int32Regs:$a)>;
}
foreach vt = [f16, bf16] in {
def: Pat<(vt (bitconvert (i16 UInt16Const:$a))),
@@ -2948,6 +3151,17 @@ def: Pat<(i16 (bitconvert (vt Int16Regs:$a))),
(ProxyRegI16 Int16Regs:$a)>;
}
+foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
+ def: Pat<(ta (bitconvert (i32 UInt32Const:$a))),
+ (IMOVB32ri UInt32Const:$a)>;
+ foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in {
+ if !ne(ta, tb) then {
+ def: Pat<(ta (bitconvert (tb Int32Regs:$a))),
+ (ProxyRegI32 Int32Regs:$a)>;
+ }
+ }
+}
+
// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
// we cannot specify floating-point literals in isel patterns. Therefore, we
// use an integer selp to select either 1 or 0 and then cvt to floating-point.
@@ -3207,25 +3421,25 @@ def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
// Select instructions with 32-bit predicates
-def : Pat<(select Int32Regs:$pred, i16:$a, i16:$b),
+def : Pat<(select (i32 Int32Regs:$pred), i16:$a, i16:$b),
(SELP_b16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, i32:$a, i32:$b),
+def : Pat<(select (i32 Int32Regs:$pred), i32:$a, i32:$b),
(SELP_b32rr Int32Regs:$a, Int32Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
+def : Pat<(select (i32 Int32Regs:$pred), Int64Regs:$a, Int64Regs:$b),
(SELP_b64rr Int64Regs:$a, Int64Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, (f16 Int16Regs:$a), (f16 Int16Regs:$b)),
+def : Pat<(select (i32 Int32Regs:$pred), (f16 Int16Regs:$a), (f16 Int16Regs:$b)),
(SELP_f16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)),
+def : Pat<(select (i32 Int32Regs:$pred), (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)),
(SELP_bf16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
+def : Pat<(select (i32 Int32Regs:$pred), Float32Regs:$a, Float32Regs:$b),
(SELP_f32rr Float32Regs:$a, Float32Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
+def : Pat<(select (i32 Int32Regs:$pred), Float64Regs:$a, Float64Regs:$b),
(SELP_f64rr Float64Regs:$a, Float64Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
@@ -3286,19 +3500,24 @@ def : Pat<(i32 (trunc (srl Int64Regs:$s, (i32 32)))),
def : Pat<(i32 (trunc (sra Int64Regs:$s, (i32 32)))),
(I64toI32H Int64Regs:$s)>;
-def : Pat<(f16 (extractelt (v2f16 Int32Regs:$src), 0)),
+def: Pat<(i32 (sext (extractelt (v2i16 Int32Regs:$src), 0))),
+ (CVT_INREG_s32_s16 Int32Regs:$src)>;
+
+foreach vt = [v2f16, v2bf16, v2i16] in {
+def : Pat<(extractelt (vt Int32Regs:$src), 0),
(I32toI16L Int32Regs:$src)>;
-def : Pat<(f16 (extractelt (v2f16 Int32Regs:$src), 1)),
+def : Pat<(extractelt (vt Int32Regs:$src), 1),
(I32toI16H Int32Regs:$src)>;
+}
def : Pat<(v2f16 (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
(V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
-
-def : Pat<(bf16 (extractelt (v2bf16 Int32Regs:$src), 0)),
- (I32toI16L Int32Regs:$src)>;
-def : Pat<(bf16 (extractelt (v2bf16 Int32Regs:$src), 1)),
- (I32toI16H Int32Regs:$src)>;
def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
(V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
+def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))),
+ (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
+
+def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))),
+ (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
// Count leading zeros
let hasSideEffects = false in {
@@ -3309,7 +3528,7 @@ let hasSideEffects = false in {
}
// 32-bit has a direct PTX instruction
-def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
+def : Pat<(i32 (ctlz (i32 Int32Regs:$a))), (CLZr32 Int32Regs:$a)>;
// The return type of the ctlz ISD node is the same as its input, but the PTX
// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the
@@ -3347,7 +3566,7 @@ let hasSideEffects = false in {
}
// 32-bit has a direct PTX instruction
-def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>;
+def : Pat<(i32 (ctpop (i32 Int32Regs:$a))), (POPCr32 Int32Regs:$a)>;
// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit
// to match the LLVM semantics. Just as with ctlz.i64, we provide a second
@@ -3460,7 +3679,7 @@ let isTerminator=1 in {
"bra.uni \t$target;", [(br bb:$target)]>;
}
-def : Pat<(brcond Int32Regs:$a, bb:$target),
+def : Pat<(brcond (i32 Int32Regs:$a), bb:$target),
(CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
@@ -3508,7 +3727,9 @@ def Callseq_End :
[(callseq_end timm:$amt1, timm:$amt2)]>;
// trap instruction
-def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>;
+// Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
+// This won't be necessary in a future version of ptxas.
+def trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>;
// Call prototype wrapper
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index f0de0144d410..85eae44f349a 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1460,29 +1460,31 @@ class ATOMIC_SHARED_CHK <dag ops, dag frag>
class ATOMIC_GENERIC_CHK <dag ops, dag frag>
: PatFrag<ops, frag, AS_match.generic>;
-multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
+ ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, SDNode IMM, list<Predicate> Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
Requires<Pred>;
def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
- [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
Requires<Pred>;
}
-multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
+multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
list<Predicate> Pred = []> {
- defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, IMMType, IMM, Pred>;
- defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, IMMType, IMM, Pred>;
}
// has 2 operands, neg the second one
-multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
+ ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
list<Predicate> Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
@@ -1492,50 +1494,51 @@ multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
"neg.s", TypeStr, " \ttemp, $b; \n\t",
"atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
"}}"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
Requires<Pred>;
}
-multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
+multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
- defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, Pred> ;
- defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, Pred> ;
}
// has 3 operands
-multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
+ ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, list<Predicate> Pred> {
def reg : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, regclass:$c),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
Requires<Pred>;
def imm1 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, regclass:$c),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
Requires<Pred>;
def imm2 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, IMMType:$c),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
Requires<Pred>;
def imm3 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, IMMType:$c),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
Requires<Pred>;
}
-multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
+multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
- defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, IMMType, Pred>;
- defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, IMMType, Pred>;
}
@@ -1560,36 +1563,36 @@ def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_fadd node:$a, node:$b)>;
-defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
+defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
atomic_load_add_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
+defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
atomic_load_add_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
+defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
atomic_load_add_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".add", atomic_load_add_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
+defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
atomic_load_add_64_g, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
+defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
atomic_load_add_64_s, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
+defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
atomic_load_add_64_gen, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
".add", atomic_load_add_64_gen, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
+defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
atomic_load_add_g, f32imm, fpimm>;
-defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
+defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
atomic_load_add_s, f32imm, fpimm>;
-defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
+defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
atomic_load_add_gen, f32imm, fpimm>;
-defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
+defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
-defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
+defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
-defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
+defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
// atom_sub
@@ -1607,21 +1610,21 @@ def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_sub_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
+defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
atomic_load_sub_32_g>;
-defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
+defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
atomic_load_sub_64_g>;
-defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
+defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
atomic_load_sub_32_gen>;
-defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
+defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
".add", atomic_load_sub_32_gen>;
-defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
+defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
atomic_load_sub_32_s>;
-defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
+defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
atomic_load_sub_64_s>;
-defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
+defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
atomic_load_sub_64_gen>;
-defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
+defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
".add", atomic_load_sub_64_gen>;
// atom_swap
@@ -1639,21 +1642,21 @@ def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_swap_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
+defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
atomic_swap_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
+defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
atomic_swap_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
+defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
atomic_swap_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
".exch", atomic_swap_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
+defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
atomic_swap_64_g, i64imm, imm>;
-defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
+defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
atomic_swap_64_s, i64imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
+defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
atomic_swap_64_gen, i64imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
".exch", atomic_swap_64_gen, i64imm, imm>;
// atom_max
@@ -1683,37 +1686,37 @@ def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umax_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
+defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
".max", atomic_load_max_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
+defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
".max", atomic_load_max_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
+defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
atomic_load_max_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
+defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
+defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
+defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".max", atomic_load_umax_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
+defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
".max", atomic_load_umax_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
atomic_load_umax_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
+defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_min
@@ -1743,37 +1746,37 @@ def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umin_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
+defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
".min", atomic_load_min_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
+defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
".min", atomic_load_min_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
+defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
atomic_load_min_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
+defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
+defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
+defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".min", atomic_load_umin_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
+defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
".min", atomic_load_umin_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
atomic_load_umin_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
+defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_inc atom_dec
@@ -1791,21 +1794,21 @@ def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
-defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
+defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
atomic_load_inc_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
+defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
atomic_load_inc_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
+defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
atomic_load_inc_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".inc", atomic_load_inc_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
+defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
atomic_load_dec_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
+defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
atomic_load_dec_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
+defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
atomic_load_dec_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".dec", atomic_load_dec_32_gen, i32imm, imm>;
// atom_and
@@ -1823,21 +1826,21 @@ def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_and_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
+defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
atomic_load_and_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
+defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
atomic_load_and_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
+defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
atomic_load_and_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
".and", atomic_load_and_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
+defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
+defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
+defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_or
@@ -1855,21 +1858,21 @@ def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_or_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
+defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
atomic_load_or_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
+defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
atomic_load_or_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
".or", atomic_load_or_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
+defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
atomic_load_or_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
+defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
+defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
+defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>;
// atom_xor
@@ -1887,21 +1890,21 @@ def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_xor_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
+defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
atomic_load_xor_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
+defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
atomic_load_xor_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
+defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
atomic_load_xor_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
".xor", atomic_load_xor_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
+defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
+defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
+defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_cas
@@ -1919,21 +1922,21 @@ def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
(atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
-defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
+defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
atomic_cmp_swap_32_g, i32imm>;
-defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
+defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
atomic_cmp_swap_32_s, i32imm>;
-defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
+defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
atomic_cmp_swap_32_gen, i32imm>;
-defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
".cas", atomic_cmp_swap_32_gen, i32imm>;
-defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
+defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
atomic_cmp_swap_64_g, i64imm>;
-defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
+defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
atomic_cmp_swap_64_s, i64imm>;
-defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
+defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
atomic_cmp_swap_64_gen, i64imm>;
-defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
".cas", atomic_cmp_swap_64_gen, i64imm>;
// Support for scoped atomic operations. Matches
@@ -1942,76 +1945,76 @@ defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
// NOTE: not all possible combinations are implemented
// 'space' is limited to generic as it's the only one needed to support CUDA.
// 'scope' = 'gpu' is default and is handled by regular atomic instructions.
-class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
+class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
dag ins, dag Operands>
: NVPTXInst<(outs regclass:$result), ins,
AsmStr,
- [(set regclass:$result, Operands)]>,
+ [(set (regT regclass:$result), Operands)]>,
Requires<Preds>;
// Define instruction variants for all addressing modes.
multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
- NVPTXRegClass regclass, Operand ImmType,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType,
SDNode Imm, ValueType ImmTy,
list<Predicate> Preds> {
let AddedComplexity = 1 in {
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, regclass:$b),
- (Intr Int32Regs:$src, regclass:$b)>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, regclass:$b),
- (Intr Int64Regs:$src, regclass:$b)>;
+ (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
}
// tablegen can't infer argument types from Intrinsic (though it can
// from Instruction) so we have to enforce specific type on
// immediates via explicit cast to ImmTy.
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, ImmType:$b),
- (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, ImmType:$b),
- (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
+ (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
}
multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
- NVPTXRegClass regclass, Operand ImmType,
- SDNode Imm, ValueType ImmTy,
+ ValueType regT, NVPTXRegClass regclass,
+ Operand ImmType, SDNode Imm, ValueType ImmTy,
list<Predicate> Preds> {
// Variants for register/immediate permutations of $b and $c
let AddedComplexity = 2 in {
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, regclass:$b, regclass:$c),
- (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, regclass:$b, regclass:$c),
- (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
+ (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
}
let AddedComplexity = 1 in {
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, ImmType:$b, regclass:$c),
- (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, ImmType:$b, regclass:$c),
- (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, regclass:$b, ImmType:$c),
- (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, regclass:$b, ImmType:$c),
- (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
+ (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
}
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, ImmType:$b, ImmType:$c),
- (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, ImmType:$b, ImmType:$c),
- (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
+ (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
}
// Constructs intrinsic name and instruction asm strings.
multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
string ScopeStr, string SpaceStr,
- NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
ValueType ImmTy, list<Predicate> Preds> {
defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
# !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
@@ -2021,11 +2024,11 @@ multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
# !if(!empty(ScopeStr), "", "_" # ScopeStr)),
- regclass, ImmType, Imm, ImmTy, Preds>;
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
}
multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
string ScopeStr, string SpaceStr,
- NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
ValueType ImmTy, list<Predicate> Preds> {
defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
# !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
@@ -2035,93 +2038,93 @@ multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
# !if(!empty(ScopeStr), "", "_" # ScopeStr)),
- regclass, ImmType, Imm, ImmTy, Preds>;
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
}
// Constructs variants for different address spaces.
// For now we only need variants for generic space pointers.
multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
- string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
+ string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
- regclass, ImmType, Imm, ImmTy, Preds>;
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
}
multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
- string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
+ string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
- regclass, ImmType, Imm, ImmTy, Preds>;
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
}
// Constructs variants for different scopes of atomic op.
multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
- NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
ValueType ImmTy, list<Predicate> Preds> {
// .gpu scope is default and is currently covered by existing
// atomics w/o explicitly specified scope.
defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
- regclass, ImmType, Imm, ImmTy,
+ regT, regclass, ImmType, Imm, ImmTy,
!listconcat(Preds,[hasAtomScope])>;
defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
- regclass, ImmType, Imm, ImmTy,
+ regT, regclass, ImmType, Imm, ImmTy,
!listconcat(Preds,[hasAtomScope])>;
}
multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
- NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
list<Predicate> Preds> {
// No need to define ".gpu"-scoped atomics. They do the same thing
// as the regular, non-scoped atomics defined elsewhere.
defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
- regclass, ImmType, Imm, ImmTy,
+ regT, regclass, ImmType, Imm, ImmTy,
!listconcat(Preds,[hasAtomScope])>;
defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
- regclass, ImmType, Imm, ImmTy,
+ regT, regclass, ImmType, Imm, ImmTy,
!listconcat(Preds,[hasAtomScope])>;
}
// atom.add
multiclass ATOM2_add_impl<string OpStr> {
- defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
- defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
- defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
- defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
+ defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
+ defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
[]>;
- defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
+ defm _f64 : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
[hasAtomAddF64]>;
}
// atom.{and,or,xor}
multiclass ATOM2_bitwise_impl<string OpStr> {
- defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
- defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
+ defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
[hasAtomBitwise64]>;
}
// atom.exch
multiclass ATOM2_exch_impl<string OpStr> {
- defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
- defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
+ defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
}
// atom.{min,max}
multiclass ATOM2_minmax_impl<string OpStr> {
- defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
- defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
- defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
+ defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _s64 : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
[hasAtomMinMax64]>;
- defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
+ defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
[hasAtomMinMax64]>;
}
// atom.{inc,dec}
multiclass ATOM2_incdec_impl<string OpStr> {
- defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
}
// atom.cas
multiclass ATOM3_cas_impl<string OpStr> {
- defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
- defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
+ defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
}
defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 5cd41cc39fd0..9e06f4689304 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -427,7 +427,8 @@ bool NVPTXLowerArgs::runOnKernelFunction(const NVPTXTargetMachine &TM,
auto HandleIntToPtr = [this](Value &V) {
if (llvm::all_of(V.users(), [](User *U) { return isa<IntToPtrInst>(U); })) {
SmallVector<User *, 16> UsersToUpdate(V.users());
- llvm::for_each(UsersToUpdate, [&](User *U) { markPointerAsGlobal(U); });
+ for (User *U : UsersToUpdate)
+ markPointerAsGlobal(U);
}
};
if (TM.getDrvInterface() == NVPTX::CUDA) {
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp
index 1d312f82e6c0..34f06b548db2 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp
@@ -63,8 +63,9 @@
// `bar.sync` instruction happen divergently.
//
// To work around this, we add an `exit` instruction before every `unreachable`,
-// as `ptxas` understands that exit terminates the CFG. Note that `trap` is not
-// equivalent, and only future versions of `ptxas` will model it like `exit`.
+// as `ptxas` understands that exit terminates the CFG. We do only do this if
+// `unreachable` is not lowered to `trap`, which has the same effect (although
+// with current versions of `ptxas` only because it is emited as `trap; exit;`).
//
//===----------------------------------------------------------------------===//
@@ -83,14 +84,19 @@ void initializeNVPTXLowerUnreachablePass(PassRegistry &);
namespace {
class NVPTXLowerUnreachable : public FunctionPass {
+ StringRef getPassName() const override;
bool runOnFunction(Function &F) override;
+ bool isLoweredToTrap(const UnreachableInst &I) const;
public:
static char ID; // Pass identification, replacement for typeid
- NVPTXLowerUnreachable() : FunctionPass(ID) {}
- StringRef getPassName() const override {
- return "add an exit instruction before every unreachable";
- }
+ NVPTXLowerUnreachable(bool TrapUnreachable, bool NoTrapAfterNoreturn)
+ : FunctionPass(ID), TrapUnreachable(TrapUnreachable),
+ NoTrapAfterNoreturn(NoTrapAfterNoreturn) {}
+
+private:
+ bool TrapUnreachable;
+ bool NoTrapAfterNoreturn;
};
} // namespace
@@ -99,12 +105,33 @@ char NVPTXLowerUnreachable::ID = 1;
INITIALIZE_PASS(NVPTXLowerUnreachable, "nvptx-lower-unreachable",
"Lower Unreachable", false, false)
+StringRef NVPTXLowerUnreachable::getPassName() const {
+ return "add an exit instruction before every unreachable";
+}
+
+// =============================================================================
+// Returns whether a `trap` intrinsic should be emitted before I.
+//
+// This is a copy of the logic in SelectionDAGBuilder::visitUnreachable().
+// =============================================================================
+bool NVPTXLowerUnreachable::isLoweredToTrap(const UnreachableInst &I) const {
+ if (!TrapUnreachable)
+ return false;
+ if (!NoTrapAfterNoreturn)
+ return true;
+ const CallInst *Call = dyn_cast_or_null<CallInst>(I.getPrevNode());
+ return Call && Call->doesNotReturn();
+}
+
// =============================================================================
// Main function for this pass.
// =============================================================================
bool NVPTXLowerUnreachable::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
+ // Early out iff isLoweredToTrap() always returns true.
+ if (TrapUnreachable && !NoTrapAfterNoreturn)
+ return false;
LLVMContext &C = F.getContext();
FunctionType *ExitFTy = FunctionType::get(Type::getVoidTy(C), false);
@@ -114,13 +141,16 @@ bool NVPTXLowerUnreachable::runOnFunction(Function &F) {
for (auto &BB : F)
for (auto &I : BB) {
if (auto unreachableInst = dyn_cast<UnreachableInst>(&I)) {
- Changed = true;
+ if (isLoweredToTrap(*unreachableInst))
+ continue; // trap is emitted as `trap; exit;`.
CallInst::Create(ExitFTy, Exit, "", unreachableInst);
+ Changed = true;
}
}
return Changed;
}
-FunctionPass *llvm::createNVPTXLowerUnreachablePass() {
- return new NVPTXLowerUnreachable();
+FunctionPass *llvm::createNVPTXLowerUnreachablePass(bool TrapUnreachable,
+ bool NoTrapAfterNoreturn) {
+ return new NVPTXLowerUnreachable(TrapUnreachable, NoTrapAfterNoreturn);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
index b62460e8cd31..b5231a9cf67f 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -58,7 +58,7 @@ foreach i = 0...31 in {
//===----------------------------------------------------------------------===//
def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>;
-def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16], 32,
+def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8], 32,
(add (sequence "R%u", 0, 4),
VRFrame32, VRFrameLocal32)>;
def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 4bd820e98f05..85f75df39c0d 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -1812,7 +1812,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
std::string ParamBaseName = std::string(MF.getName());
ParamBaseName += "_param_";
- assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
+ assert(Sym.starts_with(ParamBaseName) && "Invalid symbol reference");
unsigned Param = atoi(Sym.data()+ParamBaseName.size());
std::string NewSym;
raw_string_ostream NewSymStr(NewSym);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 7fa64af196b9..420065585b38 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -36,6 +36,11 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
+ // Re-map SM version numbers, SmVersion carries the regular SMs which do
+ // have relative order, while FullSmVersion allows distinguishing sm_90 from
+ // sm_90a, which would *not* be a subset of sm_91.
+ SmVersion = getSmVersion();
+
// Set default to PTX 6.0 (CUDA 9.0)
if (PTXVersion == 0) {
PTXVersion = 60;
@@ -48,7 +53,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
- SmVersion(20), TM(TM),
+ FullSmVersion(200), SmVersion(getSmVersion()), TM(TM),
TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}
bool NVPTXSubtarget::hasImageHandles() const {
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 93af11c258b4..3ca4c1a24c79 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -35,7 +35,12 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned PTXVersion;
- // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
+ // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310
+ // sm_90a == 901
+ unsigned int FullSmVersion;
+
+ // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from
+ // FullSmVersion.
unsigned int SmVersion;
const NVPTXTargetMachine &TM;
@@ -80,7 +85,15 @@ public:
bool allowFP16Math() const;
bool hasMaskOperator() const { return PTXVersion >= 71; }
bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
- unsigned int getSmVersion() const { return SmVersion; }
+ unsigned int getFullSmVersion() const { return FullSmVersion; }
+ unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
+ // GPUs with "a" suffix have include architecture-accelerated features that
+ // are supported on the specified architecture only, hence such targets do not
+ // follow the onion layer model. hasAAFeatures() allows distinguishing such
+ // GPU variants from the base GPU architecture.
+ // - 0 represents base GPU model,
+ // - non-zero value identifies particular architecture-accelerated variant.
+ bool hasAAFeatures() const { return getFullSmVersion() % 10; }
std::string getTargetName() const { return TargetName; }
// Get maximum value of required alignments among the supported data types.
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 1892f951ee83..8d895762fbe1 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -63,13 +63,6 @@ static cl::opt<bool> UseShortPointersOpt(
"Use 32-bit pointers for accessing const/local/shared address spaces."),
cl::init(false), cl::Hidden);
-// FIXME: intended as a temporary debugging aid. Should be removed before it
-// makes it into the LLVM-17 release.
-static cl::opt<bool>
- ExitOnUnreachable("nvptx-exit-on-unreachable",
- cl::desc("Lower 'unreachable' as 'exit' instruction."),
- cl::init(true), cl::Hidden);
-
namespace llvm {
void initializeGenericToNVVMLegacyPassPass(PassRegistry &);
@@ -133,7 +126,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool is64bit)
+ CodeGenOptLevel OL, bool is64bit)
// The pic relocation model is used regardless of what the client has
// specified, as it is the only relocation model currently supported.
: LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
@@ -161,7 +154,7 @@ NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
void NVPTXTargetMachine64::anchor() {}
@@ -171,7 +164,7 @@ NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
namespace {
@@ -310,7 +303,7 @@ NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
}
void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
- if (getOptLevel() == CodeGenOpt::Aggressive)
+ if (getOptLevel() == CodeGenOptLevel::Aggressive)
addPass(createGVNPass());
else
addPass(createEarlyCSEPass());
@@ -373,7 +366,7 @@ void NVPTXPassConfig::addIRPasses() {
const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
addPass(createNVVMReflectPass(ST.getSmVersion()));
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createNVPTXImageOptimizerPass());
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMLegacyPass());
@@ -381,7 +374,7 @@ void NVPTXPassConfig::addIRPasses() {
// NVPTXLowerArgs is required for correctness and should be run right
// before the address space inference passes.
addPass(createNVPTXLowerArgsPass());
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addAddressSpaceInferencePasses();
addStraightLineScalarOptimizationPasses();
}
@@ -403,15 +396,16 @@ void NVPTXPassConfig::addIRPasses() {
// %1 = shl %a, 2
//
// but EarlyCSE can do neither of them.
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addEarlyCSEOrGVNPass();
if (!DisableLoadStoreVectorizer)
addPass(createLoadStoreVectorizerPass());
addPass(createSROAPass());
}
- if (ExitOnUnreachable)
- addPass(createNVPTXLowerUnreachablePass());
+ const auto &Options = getNVPTXTargetMachine().Options;
+ addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
+ Options.NoTrapAfterNoreturn));
}
bool NVPTXPassConfig::addInstSelector() {
@@ -434,7 +428,7 @@ void NVPTXPassConfig::addPreRegAlloc() {
void NVPTXPassConfig::addPostRegAlloc() {
addPass(createNVPTXPrologEpilogPass());
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
// NVPTXPrologEpilogPass calculates frame object offset and replace frame
// index with VRFrame register. NVPTXPeephole need to be run after that and
// will replace VRFrame with VRFrameLocal when possible.
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 25dfea11aabc..cfdd8da9b765 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -38,7 +38,7 @@ public:
NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OP,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OP,
bool is64bit);
~NVPTXTargetMachine() override;
const NVPTXSubtarget *getSubtargetImpl(const Function &) const override {
@@ -88,7 +88,7 @@ public:
NVPTXTargetMachine32(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
};
@@ -99,7 +99,7 @@ public:
NVPTXTargetMachine64(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 988910810da6..35302889095f 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -262,6 +262,10 @@ bool getMaxNTIDz(const Function &F, unsigned &z) {
return findOneNVVMAnnotation(&F, "maxntidz", z);
}
+bool getMaxClusterRank(const Function &F, unsigned &x) {
+ return findOneNVVMAnnotation(&F, "maxclusterrank", x);
+}
+
bool getReqNTIDx(const Function &F, unsigned &x) {
return findOneNVVMAnnotation(&F, "reqntidx", x);
}
@@ -348,4 +352,8 @@ bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM) {
!isKernelFunction(*F);
}
+bool Isv2x16VT(EVT VT) {
+ return (VT == MVT::v2f16 || VT == MVT::v2bf16 || VT == MVT::v2i16);
+}
+
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index f980ea3dec0b..449973bb53de 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -54,6 +55,7 @@ bool getReqNTIDx(const Function &, unsigned &);
bool getReqNTIDy(const Function &, unsigned &);
bool getReqNTIDz(const Function &, unsigned &);
+bool getMaxClusterRank(const Function &, unsigned &);
bool getMinCTASm(const Function &, unsigned &);
bool getMaxNReg(const Function &, unsigned &);
bool isKernelFunction(const Function &);
@@ -74,6 +76,8 @@ inline unsigned promoteScalarArgumentSize(unsigned size) {
}
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM);
+
+bool Isv2x16VT(EVT VT);
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 7ff5a5eb791d..7d2678ae5927 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -20,7 +20,6 @@
#include "NVPTX.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 4f93cdaaa137..8108cfa521c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -105,10 +105,9 @@ class PPCAsmParser : public MCTargetAsmParser {
bool MatchRegisterName(MCRegister &RegNo, int64_t &IntVal);
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
const MCExpr *ExtractModifierFromExpr(const MCExpr *E,
PPCMCExpr::VariantKind &Variant);
@@ -887,6 +886,26 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
+ case PPC::PLA8:
+ case PPC::PLA: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::PLA ? PPC::PADDI : PPC::PADDI8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::PLA8pc:
+ case PPC::PLApc: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::PLApc ? PPC::PADDIpc : PPC::PADDI8pc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
case PPC::SUBI: {
MCInst TmpInst;
TmpInst.setOpcode(PPC::ADDI);
@@ -896,6 +915,15 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
+ case PPC::PSUBI: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::PADDI);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
+ Inst = TmpInst;
+ break;
+ }
case PPC::SUBIS: {
MCInst TmpInst;
TmpInst.setOpcode(PPC::ADDIS);
@@ -1320,24 +1348,23 @@ bool PPCAsmParser::MatchRegisterName(MCRegister &RegNo, int64_t &IntVal) {
return false;
}
-bool PPCAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool PPCAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
return TokError("invalid register name");
return false;
}
-OperandMatchResultTy PPCAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus PPCAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- RegNo = 0;
+ Reg = PPC::NoRegister;
int64_t IntVal;
- if (MatchRegisterName(RegNo, IntVal))
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ if (MatchRegisterName(Reg, IntVal))
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
}
/// Extract \code @l/@ha \endcode modifier from expression. Recursively scan
@@ -1717,7 +1744,7 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
ParseDirectiveAbiVersion(DirectiveID.getLoc());
else if (IDVal == ".localentry")
ParseDirectiveLocalEntry(DirectiveID.getLoc());
- else if (IDVal.startswith(".gnu_attribute"))
+ else if (IDVal.starts_with(".gnu_attribute"))
ParseGNUAttribute(DirectiveID.getLoc());
else
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
index 3913ede3dc18..99ecc3fe360d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
@@ -36,9 +36,10 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
: OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override;
+ const CCValAssign &VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override;
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) override;
@@ -48,7 +49,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
} // namespace
void OutgoingArgHandler::assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) {
+ const CCValAssign &VA) {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -56,8 +57,8 @@ void OutgoingArgHandler::assignValueToReg(Register ValVReg, Register PhysReg,
void OutgoingArgHandler::assignValueToAddress(Register ValVReg, Register Addr,
LLT MemTy,
- MachinePointerInfo &MPO,
- CCValAssign &VA) {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) {
llvm_unreachable("unimplemented");
}
@@ -143,18 +144,18 @@ bool PPCCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
void PPCIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign VA) {
+ const CCValAssign &VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
-void PPCIncomingValueHandler::assignValueToAddress(Register ValVReg,
- Register Addr, LLT MemTy,
- MachinePointerInfo &MPO,
- CCValAssign &VA) {
+void PPCIncomingValueHandler::assignValueToAddress(
+ Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO,
+ const CCValAssign &VA) {
// define a lambda expression to load value
- auto BuildLoad = [](MachineIRBuilder &MIRBuilder, MachinePointerInfo &MPO,
- LLT MemTy, const DstOp &Res, Register Addr) {
+ auto BuildLoad = [](MachineIRBuilder &MIRBuilder,
+ const MachinePointerInfo &MPO, LLT MemTy,
+ const DstOp &Res, Register Addr) {
MachineFunction &MF = MIRBuilder.getMF();
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy,
inferAlignFromPtrInfo(MF, MPO));
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
index cc2cb7b26e84..17e8c57e563f 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
@@ -46,10 +46,11 @@ public:
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override;
+ const CCValAssign &VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override;
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
index 25587b39b97f..6aeef145e307 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -12,6 +12,7 @@
#include "PPCRegisterBankInfo.h"
#include "PPCRegisterInfo.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@@ -289,8 +290,11 @@ bool PPCRegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
unsigned Depth) const {
unsigned Op = MI.getOpcode();
- if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID()))
- return true;
+
+ if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
+ if (isFPIntrinsic(GI->getIntrinsicID()))
+ return true;
+ }
// Do we have an explicit floating point instruction?
if (isPreISelGenericFloatingPointOpcode(Op))
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
index c2a16c92ba85..1477fdca917d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
@@ -37,9 +37,9 @@ protected:
PMI_Min = PMI_GPR32,
};
- static RegisterBankInfo::PartialMapping PartMappings[];
- static RegisterBankInfo::ValueMapping ValMappings[];
- static PartialMappingIdx BankIDToCopyMapIdx[];
+ static const RegisterBankInfo::PartialMapping PartMappings[];
+ static const RegisterBankInfo::ValueMapping ValMappings[];
+ static const PartialMappingIdx BankIDToCopyMapIdx[];
/// Get the pointer to the ValueMapping representing the RegisterBank
/// at \p RBIdx.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 89d04dbe378e..251737ed1275 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -87,7 +87,8 @@ protected:
Triple TT;
public:
PPCAsmBackend(const Target &T, const Triple &TT)
- : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big),
+ : MCAsmBackend(TT.isLittleEndian() ? llvm::endianness::little
+ : llvm::endianness::big),
TT(TT) {}
unsigned getNumFixupKinds() const override {
@@ -132,7 +133,7 @@ public:
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
- return (Endian == support::little
+ return (Endian == llvm::endianness::little
? InfosLE
: InfosBE)[Kind - FirstTargetFixupKind];
}
@@ -154,13 +155,15 @@ public:
// from the fixup value. The Value has been "split up" into the appropriate
// bitfields above.
for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = Endian == support::little ? i : (NumBytes - 1 - i);
+ unsigned Idx =
+ Endian == llvm::endianness::little ? i : (NumBytes - 1 - i);
Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff);
}
}
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override {
MCFixupKind Kind = Fixup.getKind();
switch ((unsigned)Kind) {
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 1e58039582c2..6a72b7b9ad05 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -28,7 +28,7 @@ namespace {
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
unsigned Type) const override;
};
}
@@ -456,7 +456,13 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
}
break;
case FK_Data_4:
- Type = ELF::R_PPC_ADDR32;
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_DTPREL:
+ Type = ELF::R_PPC_DTPREL32;
+ break;
+ default:
+ Type = ELF::R_PPC_ADDR32;
+ }
break;
case FK_Data_2:
Type = ELF::R_PPC_ADDR16;
@@ -466,7 +472,8 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
return Type;
}
-bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCValue &,
+ const MCSymbol &Sym,
unsigned Type) const {
switch (Type) {
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index 2f03aa37745f..1eaa57e16260 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -17,11 +17,10 @@
//
//===----------------------------------------------------------------------===//
-
#include "PPCELFStreamer.h"
#include "PPCFixupKinds.h"
-#include "PPCInstrInfo.h"
#include "PPCMCCodeEmitter.h"
+#include "PPCMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
index 7d786ac13bb9..10204b184a49 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H
#define LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H
-#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCELFStreamer.h"
#include <memory>
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 13480da4e731..9a4291c90408 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -13,14 +13,14 @@
#include "MCTargetDesc/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "PPCInstrInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -484,7 +484,10 @@ void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
if (!MI->getOperand(OpNo).isImm())
return printOperand(MI, OpNo, STI, O);
- O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
+ uint64_t Imm = static_cast<uint64_t>(MI->getOperand(OpNo).getImm()) << 2;
+ if (!TT.isPPC64())
+ Imm = static_cast<uint32_t>(Imm);
+ O << formatHex(Imm);
}
void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
@@ -597,7 +600,8 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
/// showRegistersWithPercentPrefix - Check if this register name should be
/// printed with a percentage symbol as prefix.
bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const {
- if (!FullRegNamesWithPercent || TT.getOS() == Triple::AIX)
+ if ((!FullRegNamesWithPercent && !MAI.useFullRegisterNames()) ||
+ TT.getOS() == Triple::AIX)
return false;
switch (RegName[0]) {
@@ -614,10 +618,10 @@ bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const {
/// getVerboseConditionalRegName - This method expands the condition register
/// when requested explicitly or targetting Darwin.
-const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
- unsigned RegEncoding)
- const {
- if (!FullRegNames)
+const char *
+PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
+ unsigned RegEncoding) const {
+ if (!FullRegNames && !MAI.useFullRegisterNames())
return nullptr;
if (RegNum < PPC::CR0EQ || RegNum > PPC::CR7UN)
return nullptr;
@@ -637,7 +641,7 @@ const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
// showRegistersWithPrefix - This method determines whether registers
// should be number-only or include the prefix.
bool PPCInstPrinter::showRegistersWithPrefix() const {
- return FullRegNamesWithPercent || FullRegNames;
+ return FullRegNamesWithPercent || FullRegNames || MAI.useFullRegisterNames();
}
void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -646,8 +650,7 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
unsigned Reg = Op.getReg();
if (!ShowVSRNumsAsVR)
- Reg = PPCInstrInfo::getRegNumForOperand(MII.get(MI->getOpcode()),
- Reg, OpNo);
+ Reg = PPC::getRegNumForOperand(MII.get(MI->getOpcode()), Reg, OpNo);
const char *RegName;
RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg));
@@ -656,7 +659,7 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (showRegistersWithPercentPrefix(RegName))
O << "%";
if (!showRegistersWithPrefix())
- RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
+ RegName = PPC::stripRegisterPrefix(RegName);
O << RegName;
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index a5dc0b45b13c..4716e37b3443 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -12,7 +12,6 @@
#include "PPCMCAsmInfo.h"
#include "llvm/TargetParser/Triple.h"
-#include <cassert>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index da0174ce1982..910b5892d033 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -12,12 +12,14 @@
#include "PPCMCCodeEmitter.h"
#include "MCTargetDesc/PPCFixupKinds.h"
-#include "PPCInstrInfo.h"
+#include "PPCMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
@@ -47,16 +49,108 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isReg() || MO.isImm())
return getMachineOpValue(MI, MO, Fixups, STI);
- const PPCInstrInfo *InstrInfo = static_cast<const PPCInstrInfo *>(&MCII);
- unsigned Opcode = MI.getOpcode();
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- (InstrInfo->isNoTOCCallInstr(Opcode)
+ (isNoTOCCallInstr(MI)
? (MCFixupKind)PPC::fixup_ppc_br24_notoc
: (MCFixupKind)PPC::fixup_ppc_br24)));
return 0;
}
+/// Check if Opcode corresponds to a call instruction that should be marked
+/// with the NOTOC relocation.
+bool PPCMCCodeEmitter::isNoTOCCallInstr(const MCInst &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ if (!MCII.get(Opcode).isCall())
+ return false;
+
+ switch (Opcode) {
+ default:
+#ifndef NDEBUG
+ llvm_unreachable("Unknown call opcode");
+#endif
+ return false;
+ case PPC::BL8_NOTOC:
+ case PPC::BL8_NOTOC_TLS:
+ case PPC::BL8_NOTOC_RM:
+ return true;
+#ifndef NDEBUG
+ case PPC::BL8:
+ case PPC::BL:
+ case PPC::BL8_TLS:
+ case PPC::BL_TLS:
+ case PPC::BLA8:
+ case PPC::BLA:
+ case PPC::BCCL:
+ case PPC::BCCLA:
+ case PPC::BCL:
+ case PPC::BCLn:
+ case PPC::BL8_NOP:
+ case PPC::BL_NOP:
+ case PPC::BL8_NOP_TLS:
+ case PPC::BLA8_NOP:
+ case PPC::BCTRL8:
+ case PPC::BCTRL:
+ case PPC::BCCCTRL8:
+ case PPC::BCCCTRL:
+ case PPC::BCCTRL8:
+ case PPC::BCCTRL:
+ case PPC::BCCTRL8n:
+ case PPC::BCCTRLn:
+ case PPC::BL8_RM:
+ case PPC::BLA8_RM:
+ case PPC::BL8_NOP_RM:
+ case PPC::BLA8_NOP_RM:
+ case PPC::BCTRL8_RM:
+ case PPC::BCTRL8_LDinto_toc:
+ case PPC::BCTRL8_LDinto_toc_RM:
+ case PPC::BL8_TLS_:
+ case PPC::TCRETURNdi8:
+ case PPC::TCRETURNai8:
+ case PPC::TCRETURNri8:
+ case PPC::TAILBCTR8:
+ case PPC::TAILB8:
+ case PPC::TAILBA8:
+ case PPC::BCLalways:
+ case PPC::BLRL:
+ case PPC::BCCLRL:
+ case PPC::BCLRL:
+ case PPC::BCLRLn:
+ case PPC::BDZL:
+ case PPC::BDNZL:
+ case PPC::BDZLA:
+ case PPC::BDNZLA:
+ case PPC::BDZLp:
+ case PPC::BDNZLp:
+ case PPC::BDZLAp:
+ case PPC::BDNZLAp:
+ case PPC::BDZLm:
+ case PPC::BDNZLm:
+ case PPC::BDZLAm:
+ case PPC::BDNZLAm:
+ case PPC::BDZLRL:
+ case PPC::BDNZLRL:
+ case PPC::BDZLRLp:
+ case PPC::BDNZLRLp:
+ case PPC::BDZLRLm:
+ case PPC::BDNZLRLm:
+ case PPC::BL_RM:
+ case PPC::BLA_RM:
+ case PPC::BL_NOP_RM:
+ case PPC::BCTRL_RM:
+ case PPC::TCRETURNdi:
+ case PPC::TCRETURNai:
+ case PPC::TCRETURNri:
+ case PPC::BCTRL_LWZinto_toc:
+ case PPC::BCTRL_LWZinto_toc_RM:
+ case PPC::TAILBCTR:
+ case PPC::TAILB:
+ case PPC::TAILBA:
+ return false;
+#endif
+ }
+}
+
unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -372,7 +466,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
}
// Get the index for this operand in this instruction. This is needed for
-// computing the register number in PPCInstrInfo::getRegNumForOperand() for
+// computing the register number in PPC::getRegNumForOperand() for
// any instructions that use a different numbering scheme for registers in
// different operands.
static unsigned getOpIdxForMO(const MCInst &MI, const MCOperand &MO) {
@@ -397,8 +491,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
unsigned OpNo = getOpIdxForMO(MI, MO);
unsigned Reg =
- PPCInstrInfo::getRegNumForOperand(MCII.get(MI.getOpcode()),
- MO.getReg(), OpNo);
+ PPC::getRegNumForOperand(MCII.get(MI.getOpcode()), MO.getReg(), OpNo);
return CTX.getRegisterInfo()->getEncodingValue(Reg);
}
@@ -415,7 +508,8 @@ void PPCMCCodeEmitter::encodeInstruction(const MCInst &MI,
// Output the constant in big/little endian byte order.
unsigned Size = getInstSizeInBytes(MI);
- support::endianness E = IsLittleEndian ? support::little : support::big;
+ llvm::endianness E =
+ IsLittleEndian ? llvm::endianness::little : llvm::endianness::big;
switch (Size) {
case 0:
break;
@@ -443,9 +537,7 @@ unsigned PPCMCCodeEmitter::getInstSizeInBytes(const MCInst &MI) const {
}
bool PPCMCCodeEmitter::isPrefixedInstruction(const MCInst &MI) const {
- unsigned Opcode = MI.getOpcode();
- const PPCInstrInfo *InstrInfo = static_cast<const PPCInstrInfo*>(&MCII);
- return InstrInfo->isPrefixed(Opcode);
+ return MCII.get(MI.getOpcode()).TSFlags & PPCII::Prefixed;
}
#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index 17a15ef18cb7..b57455718319 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -121,6 +121,10 @@ public:
// Is this instruction a prefixed instruction.
bool isPrefixedInstruction(const MCInst &MI) const;
+
+ /// Check if Opcode corresponds to a call instruction that should be marked
+ /// with the NOTOC relocation.
+ bool isNoTOCCallInstr(const MCInst &MI) const;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 271f7ab757e1..a804dd823daa 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -57,6 +57,90 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "PPCGenRegisterInfo.inc"
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+const char *PPC::stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'a':
+ if (RegName[1] == 'c' && RegName[2] == 'c')
+ return RegName + 3;
+ break;
+ case 'f':
+ if (RegName[1] == 'p')
+ return RegName + 2;
+ [[fallthrough]];
+ case 'r':
+ case 'v':
+ if (RegName[1] == 's') {
+ if (RegName[2] == 'p')
+ return RegName + 3;
+ return RegName + 2;
+ }
+ return RegName + 1;
+ case 'c':
+ if (RegName[1] == 'r')
+ return RegName + 2;
+ break;
+ case 'w':
+ // For wacc and wacc_hi
+ if (RegName[1] == 'a' && RegName[2] == 'c' && RegName[3] == 'c') {
+ if (RegName[4] == '_')
+ return RegName + 7;
+ else
+ return RegName + 4;
+ }
+ break;
+ case 'd':
+ // For dmr, dmrp, dmrrow, dmrrowp
+ if (RegName[1] == 'm' && RegName[2] == 'r') {
+ if (RegName[3] == 'r' && RegName[4] == 'o' && RegName[5] == 'w' &&
+ RegName[6] == 'p')
+ return RegName + 7;
+ else if (RegName[3] == 'r' && RegName[4] == 'o' && RegName[5] == 'w')
+ return RegName + 6;
+ else if (RegName[3] == 'p')
+ return RegName + 4;
+ else
+ return RegName + 3;
+ }
+ break;
+ }
+
+ return RegName;
+}
+
+/// getRegNumForOperand - some operands use different numbering schemes
+/// for the same registers. For example, a VSX instruction may have any of
+/// vs0-vs63 allocated whereas an Altivec instruction could only have
+/// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual
+/// register number needed for the opcode/operand number combination.
+/// The operand number argument will be useful when we need to extend this
+/// to instructions that use both Altivec and VSX numbering (for different
+/// operands).
+unsigned PPC::getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
+ unsigned OpNo) {
+ int16_t regClass = Desc.operands()[OpNo].RegClass;
+ switch (regClass) {
+ // We store F0-F31, VF0-VF31 in MCOperand and it should be F0-F31,
+ // VSX32-VSX63 during encoding/disassembling
+ case PPC::VSSRCRegClassID:
+ case PPC::VSFRCRegClassID:
+ if (PPC::isVFRegister(Reg))
+ return PPC::VSX32 + (Reg - PPC::VF0);
+ break;
+ // We store VSL0-VSL31, V0-V31 in MCOperand and it should be VSL0-VSL31,
+ // VSX32-VSX63 during encoding/disassembling
+ case PPC::VSRCRegClassID:
+ if (PPC::isVRRegister(Reg))
+ return PPC::VSX32 + (Reg - PPC::V0);
+ break;
+ // Other RegClass doesn't need mapping
+ default:
+ break;
+ }
+ return Reg;
+}
+
PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
// Pin the vtable to this file.
@@ -148,9 +232,10 @@ public:
cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
->getQualNameSymbol();
// On AIX, we have a region handle (symbol@m) and the variable offset
- // (symbol@{gd|le}) for TLS variables, depending on the TLS model.
+ // (symbol@{gd|ie|le}) for TLS variables, depending on the TLS model.
if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
+ Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE)
OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
<< MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 86ca1386fed9..16777725990a 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -26,6 +26,7 @@ namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
+class MCInstrDesc;
class MCInstrInfo;
class MCObjectTargetWriter;
class MCRegisterInfo;
@@ -33,6 +34,24 @@ class MCSubtargetInfo;
class MCTargetOptions;
class Target;
+namespace PPC {
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+const char *stripRegisterPrefix(const char *RegName);
+
+/// getRegNumForOperand - some operands use different numbering schemes
+/// for the same registers. For example, a VSX instruction may have any of
+/// vs0-vs63 allocated whereas an Altivec instruction could only have
+/// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual
+/// register number needed for the opcode/operand number combination.
+/// The operand number argument will be useful when we need to extend this
+/// to instructions that use both Altivec and VSX numbering (for different
+/// operands).
+unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
+ unsigned OpNo);
+
+} // namespace PPC
+
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
MCContext &Ctx);
@@ -102,11 +121,61 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
return false;
}
-} // end namespace llvm
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+ // PPC970 Instruction Flags. These flags describe the characteristics of the
+ // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+ // raw machine instructions.
-// Generated files will use "namespace PPC". To avoid symbol clash,
-// undefine PPC here. PPC may be predefined on some hosts.
-#undef PPC
+ /// PPC970_First - This instruction starts a new dispatch group, so it will
+ /// always be the first one in the group.
+ PPC970_First = 0x1,
+
+ /// PPC970_Single - This instruction starts a new dispatch group and
+ /// terminates it, so it will be the sole instruction in the group.
+ PPC970_Single = 0x2,
+
+ /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+ /// two dispatch pipes to be available to issue.
+ PPC970_Cracked = 0x4,
+
+ /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+ /// an instruction is issued to.
+ PPC970_Shift = 3,
+ PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+ /// These are the various PPC970 execution unit pipelines. Each instruction
+ /// is one of these.
+ PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
+ PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
+ PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
+ PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
+ PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
+ PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
+ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
+ PPC970_BRU = 7 << PPC970_Shift // Branch Unit
+};
+
+enum {
+ /// Shift count to bypass PPC970 flags
+ NewDef_Shift = 6,
+
+ /// This instruction is an X-Form memory operation.
+ XFormMemOp = 0x1 << NewDef_Shift,
+ /// This instruction is prefixed.
+ Prefixed = 0x1 << (NewDef_Shift + 1),
+ /// This instruction produced a sign extended result.
+ SExt32To64 = 0x1 << (NewDef_Shift + 2),
+ /// This instruction produced a zero extended result.
+ ZExt32To64 = 0x1 << (NewDef_Shift + 3)
+};
+} // end namespace PPCII
+
+} // end namespace llvm
// Defines symbolic names for PowerPC registers. This defines a mapping from
// register name to register number.
@@ -214,4 +283,16 @@ using llvm::MCPhysReg;
static const MCPhysReg DMRRegs[8] = PPC_REGS0_7(PPC::DMR); \
static const MCPhysReg DMRpRegs[4] = PPC_REGS0_3(PPC::DMRp);
+namespace llvm {
+namespace PPC {
+static inline bool isVFRegister(unsigned Reg) {
+ return Reg >= PPC::VF0 && Reg <= PPC::VF31;
+}
+
+static inline bool isVRRegister(unsigned Reg) {
+ return Reg >= PPC::V0 && Reg <= PPC::V31;
+}
+} // namespace PPC
+} // namespace llvm
+
#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 284e52c298a2..80c37f82bf29 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -12,7 +12,6 @@
#include "PPCPredicates.h"
#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
using namespace llvm;
PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index df671f53cbd8..065daf42fe6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -69,6 +69,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
return {XCOFF::RelocationType::R_TOCU, SignAndSizeForHalf16};
case MCSymbolRefExpr::VK_PPC_L:
return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16};
+ case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
+ return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForHalf16};
}
} break;
case PPC::fixup_ppc_half16ds:
@@ -82,6 +84,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
return {XCOFF::RelocationType::R_TOC, 15};
case MCSymbolRefExpr::VK_PPC_L:
return {XCOFF::RelocationType::R_TOCL, 15};
+ case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
+ return {XCOFF::RelocationType::R_TLS_LE, 15};
}
} break;
case PPC::fixup_ppc_br24:
@@ -108,6 +112,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
return {XCOFF::RelocationType::R_TLS, SignAndSizeForFKData};
case MCSymbolRefExpr::VK_PPC_AIX_TLSGDM:
return {XCOFF::RelocationType::R_TLSM, SignAndSizeForFKData};
+ case MCSymbolRefExpr::VK_PPC_AIX_TLSIE:
+ return {XCOFF::RelocationType::R_TLS_IE, SignAndSizeForFKData};
case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForFKData};
case MCSymbolRefExpr::VK_None:
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td
index 0827e528a80f..3bbc5a63ca7a 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -825,9 +825,7 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
(instrs
SRADI_rec,
- SRAWI_rec,
- TABORTDCI,
- TABORTWCI
+ SRAWI_rec
)>;
// Single crack instructions
@@ -835,9 +833,7 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
SRAD_rec,
- SRAW_rec,
- TABORTDC,
- TABORTWC
+ SRAW_rec
)>;
// 2-way crack instructions
@@ -879,7 +875,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
MCRXRX,
MFCTR, MFCTR8,
MFLR, MFLR8,
- WAIT
+ WAIT, WAITP10
)>;
// 3 Cycles ALU operations, 1 input operands
@@ -1130,10 +1126,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
MFFSCRNI,
MFFSL,
MFVSCR,
- MTFSB0,
- TBEGIN,
- TRECHKPT,
- TSR
+ MTFSB0
)>;
// Single crack instructions
@@ -1153,9 +1146,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
SUBFME8_rec, SUBFME_rec,
SUBFME8O_rec, SUBFMEO_rec,
SUBFZE8_rec, SUBFZE_rec,
- SUBFZE8O_rec, SUBFZEO_rec,
- TABORT,
- TRECLAIM
+ SUBFZE8O_rec, SUBFZEO_rec
)>;
// Single crack instructions
@@ -1862,8 +1853,6 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
EnforceIEIO,
MSGSYNC,
SLBSYNC,
- TCHECK,
- TEND,
TLBSYNC
)>;
@@ -1895,6 +1884,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read,
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
(instrs
ISYNC,
+ SYNCP10,
SYNC
)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
index 0d3d71742bfb..3d9ea5608193 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
@@ -46,13 +46,14 @@ class ModulePass;
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCBranchCoalescingPass();
- FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOptLevel OL);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
FunctionPass *createPPCExpandISELPass();
FunctionPass *createPPCPreEmitPeepholePass();
FunctionPass *createPPCExpandAtomicPseudoPass();
FunctionPass *createPPCCTRLoopsPass();
+ ModulePass *createPPCMergeStringPoolPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP);
bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
@@ -78,6 +79,7 @@ class ModulePass;
void initializePPCExpandAtomicPseudoPass(PassRegistry &);
void initializePPCCTRLoopsPass(PassRegistry &);
void initializePPCDAGToDAGISelPass(PassRegistry &);
+ void initializePPCMergeStringPoolPass(PassRegistry &);
extern char &PPCVSXFMAMutateID;
@@ -100,79 +102,99 @@ class ModulePass;
// PPC Specific MachineOperand flags.
MO_NO_FLAG,
+ /// On PPC, the 12 bits are not enough for all target operand flags.
+ /// Treat all PPC target flags as direct flags. To define new flag that is
+ /// combination of other flags, add new enum entry instead of combining
+ /// existing flags. See example MO_GOT_TPREL_PCREL_FLAG.
+
/// On a symbol operand "FOO", this indicates that the reference is actually
/// to "FOO@plt". This is used for calls and jumps to external functions
/// and for PIC calls on 32-bit ELF systems.
- MO_PLT = 1,
+ MO_PLT,
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
- MO_PIC_FLAG = 2,
+ MO_PIC_FLAG,
/// MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to
/// the current instruction address(pc), e.g., var@pcrel. Fixup is VK_PCREL.
- MO_PCREL_FLAG = 4,
+ MO_PCREL_FLAG,
/// MO_GOT_FLAG - If this bit is set the symbol reference is to be computed
/// via the GOT. For example when combined with the MO_PCREL_FLAG it should
/// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL.
- MO_GOT_FLAG = 8,
+ MO_GOT_FLAG,
- // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a
- // PC Relative linker optimization.
- MO_PCREL_OPT_FLAG = 16,
+ /// MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a
+ /// PC Relative linker optimization.
+ MO_PCREL_OPT_FLAG,
/// MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to
/// TLS General Dynamic model for Linux and the variable offset of TLS
/// General Dynamic model for AIX.
- MO_TLSGD_FLAG = 32,
+ MO_TLSGD_FLAG,
/// MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to
/// the thread pointer and the symbol can be used for the TLS Initial Exec
/// and Local Exec models.
- MO_TPREL_FLAG = 64,
+ MO_TPREL_FLAG,
/// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to
/// TLS Local Dynamic model.
- MO_TLSLD_FLAG = 128,
+ MO_TLSLD_FLAG,
/// MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative
/// to the region handle of TLS General Dynamic model for AIX.
- MO_TLSGDM_FLAG = 256,
+ MO_TLSGDM_FLAG,
/// MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set
/// they should produce the relocation @got@tlsgd@pcrel.
/// Fix up is VK_PPC_GOT_TLSGD_PCREL
- MO_GOT_TLSGD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSGD_FLAG,
+ /// MO_GOT_TLSGD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSGD_FLAG,
+ MO_GOT_TLSGD_PCREL_FLAG,
/// MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set
/// they should produce the relocation @got@tlsld@pcrel.
/// Fix up is VK_PPC_GOT_TLSLD_PCREL
- MO_GOT_TLSLD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSLD_FLAG,
+ /// MO_GOT_TLSLD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSLD_FLAG,
+ MO_GOT_TLSLD_PCREL_FLAG,
/// MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set
/// they should produce the relocation @got@tprel@pcrel.
/// Fix up is VK_PPC_GOT_TPREL_PCREL
- MO_GOT_TPREL_PCREL_FLAG = MO_GOT_FLAG | MO_TPREL_FLAG | MO_PCREL_FLAG,
-
- /// The next are not flags but distinct values.
- MO_ACCESS_MASK = 0xf00,
+ /// MO_GOT_TPREL_PCREL_FLAG = MO_GOT_FLAG | MO_TPREL_FLAG | MO_PCREL_FLAG,
+ MO_GOT_TPREL_PCREL_FLAG,
/// MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
- MO_LO = 1 << 8,
- MO_HA = 2 << 8,
+ MO_LO,
+ MO_HA,
- MO_TPREL_LO = 4 << 8,
- MO_TPREL_HA = 3 << 8,
+ MO_TPREL_LO,
+ MO_TPREL_HA,
/// These values identify relocations on immediates folded
/// into memory operations.
- MO_DTPREL_LO = 5 << 8,
- MO_TLSLD_LO = 6 << 8,
- MO_TOC_LO = 7 << 8,
+ MO_DTPREL_LO,
+ MO_TLSLD_LO,
+ MO_TOC_LO,
+
+ /// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
+ MO_TLS,
+
+ /// MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA
+ MO_PIC_HA_FLAG,
+
+ /// MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO
+ MO_PIC_LO_FLAG,
+
+ /// MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG
+ MO_TPREL_PCREL_FLAG,
+
+ /// MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS
+ MO_TLS_PCREL_FLAG,
- // Symbol for VK_PPC_TLS fixup attached to an ADD instruction
- MO_TLS = 8 << 8
+ /// MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG
+ MO_GOT_PCREL_FLAG,
};
} // end namespace PPCII
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
index 3ba36f4f01e1..535616d33a80 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
@@ -318,6 +318,17 @@ def FeaturePrivileged :
SubtargetFeature<"privileged", "HasPrivileged", "true",
"Add privileged instructions">;
+// Specifies that local-exec TLS accesses in any function with this target
+// attribute should use the optimized TOC-free sequence (where the offset is an
+// immediate off of R13 for which the linker might add fix-up code if the
+// immediate is too large).
+// Clearly, this isn't really a feature of the subtarget, but is used as a
+// convenient way to affect code generation for individual functions.
+def FeatureAIXLocalExecTLS :
+ SubtargetFeature<"aix-small-local-exec-tls", "HasAIXSmallLocalExecTLS", "true",
+ "Produce a TOC-free local-exec TLS sequence for this function "
+ "for 64-bit AIX">;
+
def FeaturePredictableSelectIsExpensive :
SubtargetFeature<"predictable-select-expensive",
"PredictableSelectIsExpensive",
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 5c10d6307c76..780b22b4fbe6 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -68,6 +68,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -316,7 +317,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
- O << PPCRegisterInfo::stripRegisterPrefix(RegName);
+ O << PPC::stripRegisterPrefix(RegName);
return;
}
case MachineOperand::MO_Immediate:
@@ -376,13 +377,13 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
// This operand uses VSX numbering.
// If the operand is a VMX register, convert it to a VSX register.
Register Reg = MI->getOperand(OpNo).getReg();
- if (PPCInstrInfo::isVRRegister(Reg))
+ if (PPC::isVRRegister(Reg))
Reg = PPC::VSX32 + (Reg - PPC::V0);
- else if (PPCInstrInfo::isVFRegister(Reg))
+ else if (PPC::isVFRegister(Reg))
Reg = PPC::VSX32 + (Reg - PPC::VF0);
const char *RegName;
RegName = PPCInstPrinter::getRegisterName(Reg);
- RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
+ RegName = PPC::stripRegisterPrefix(RegName);
O << RegName;
return false;
}
@@ -632,7 +633,6 @@ void PPCAsmPrinter::EmitAIXTlsCallHelper(const MachineInstr *MI) {
const MCExpr *TlsRef =
MCSymbolRefExpr::create(TlsCall, MCSymbolRefExpr::VK_None, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BLA).addExpr(TlsRef));
- return;
}
/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
@@ -715,25 +715,11 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
}
}
-static bool hasTLSFlag(const MachineOperand &MO) {
- unsigned Flags = MO.getTargetFlags();
- if (Flags & PPCII::MO_TLSGD_FLAG || Flags & PPCII::MO_TPREL_FLAG ||
- Flags & PPCII::MO_TLSLD_FLAG || Flags & PPCII::MO_TLSGDM_FLAG)
- return true;
-
- if (Flags == PPCII::MO_TPREL_LO || Flags == PPCII::MO_TPREL_HA ||
- Flags == PPCII::MO_DTPREL_LO || Flags == PPCII::MO_TLSLD_LO ||
- Flags == PPCII::MO_TLS)
- return true;
-
- return false;
-}
-
static PPCAsmPrinter::TOCEntryType
getTOCEntryTypeForMO(const MachineOperand &MO) {
// Use the target flags to determine if this MO is Thread Local.
// If we don't do this it comes out as Global.
- if (hasTLSFlag(MO))
+ if (PPCInstrInfo::hasTLSFlag(MO.getTargetFlags()))
return PPCAsmPrinter::TOCType_ThreadLocal;
switch (MO.getType()) {
@@ -827,24 +813,27 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
return Expr;
};
auto GetVKForMO = [&](const MachineOperand &MO) {
- // For TLS local-exec accesses on AIX, we have one TOC entry for the symbol
- // (with the variable offset), which is differentiated by MO_TPREL_FLAG.
- if (MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) {
- // TODO: Update the query and the comment above to add a check for initial
- // exec when this TLS model is supported on AIX in the future, as both
- // local-exec and initial-exec can use MO_TPREL_FLAG.
+ // For TLS initial-exec and local-exec accesses on AIX, we have one TOC
+ // entry for the symbol (with the variable offset), which is differentiated
+ // by MO_TPREL_FLAG.
+ unsigned Flag = MO.getTargetFlags();
+ if (Flag == PPCII::MO_TPREL_FLAG ||
+ Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
+ Flag == PPCII::MO_TPREL_PCREL_FLAG) {
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!\n");
TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
if (Model == TLSModel::LocalExec)
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE;
- llvm_unreachable("Only expecting local-exec accesses!");
+ if (Model == TLSModel::InitialExec)
+ return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE;
+ llvm_unreachable("Only expecting local-exec or initial-exec accesses!");
}
// For GD TLS access on AIX, we have two TOC entries for the symbol (one for
// the variable offset and the other for the region handle). They are
// differentiated by MO_TLSGD_FLAG and MO_TLSGDM_FLAG.
- if (MO.getTargetFlags() & PPCII::MO_TLSGDM_FLAG)
+ if (Flag == PPCII::MO_TLSGDM_FLAG)
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM;
- if (MO.getTargetFlags() & PPCII::MO_TLSGD_FLAG)
+ if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
return MCSymbolRefExpr::VariantKind::VK_None;
};
@@ -1534,6 +1523,24 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::EnforceIEIO));
return;
}
+ case PPC::ADDI8: {
+ // The faster non-TOC-based local-exec sequence is represented by `addi`
+ // with an immediate operand having the MO_TPREL_FLAG. Such an instruction
+ // does not otherwise arise.
+ unsigned Flag = MI->getOperand(2).getTargetFlags();
+ if (Flag == PPCII::MO_TPREL_FLAG ||
+ Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
+ Flag == PPCII::MO_TPREL_PCREL_FLAG) {
+ assert(
+ Subtarget->hasAIXSmallLocalExecTLS() &&
+ "addi with thread-pointer only expected with local-exec small TLS");
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
+ TmpInst.setOpcode(PPC::LA8);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ break;
+ }
}
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
@@ -2512,7 +2519,7 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
}
void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
- assert(!GV->getName().startswith("llvm.") &&
+ assert(!GV->getName().starts_with("llvm.") &&
"Unhandled intrinsic global variable.");
if (GV->hasComdat())
@@ -2759,14 +2766,18 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
// and add a format indicator as a part of function name in case we
// will support more than one format.
FormatIndicatorAndUniqueModId = "clang_" + UniqueModuleId.substr(1);
- else
- // Use the Pid and current time as the unique module id when we cannot
- // generate one based on a module's strong external symbols.
- // FIXME: Adjust the comment accordingly after we use source file full
- // path instead.
+ else {
+ // Use threadId, Pid, and current time as the unique module id when we
+ // cannot generate one based on a module's strong external symbols.
+ auto CurTime =
+ std::chrono::duration_cast<std::chrono::nanoseconds>(
+ std::chrono::steady_clock::now().time_since_epoch())
+ .count();
FormatIndicatorAndUniqueModId =
- "clangPidTime_" + llvm::itostr(sys::Process::getProcessId()) +
- "_" + llvm::itostr(time(nullptr));
+ "clangPidTidTime_" + llvm::itostr(sys::Process::getProcessId()) +
+ "_" + llvm::itostr(llvm::get_threadid()) + "_" +
+ llvm::itostr(CurTime);
+ }
}
emitSpecialLLVMGlobal(&G);
@@ -2781,11 +2792,21 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
// Construct an aliasing list for each GlobalObject.
for (const auto &Alias : M.aliases()) {
- const GlobalObject *Base = Alias.getAliaseeObject();
- if (!Base)
+ const GlobalObject *Aliasee = Alias.getAliaseeObject();
+ if (!Aliasee)
report_fatal_error(
"alias without a base object is not yet supported on AIX");
- GOAliasMap[Base].push_back(&Alias);
+
+ if (Aliasee->hasCommonLinkage()) {
+ report_fatal_error("Aliases to common variables are not allowed on AIX:"
+ "\n\tAlias attribute for " +
+ Alias.getGlobalIdentifier() +
+ " is invalid because " + Aliasee->getName() +
+ " is common.",
+ false);
+ }
+
+ GOAliasMap[Aliasee].push_back(&Alias);
}
return Result;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
index 5d97d187b296..8bbe315a2bb9 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
@@ -958,6 +958,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
V_SET0B,
V_SET0H,
WAIT,
+ WAITP10,
XOR,
XOR8,
XOR8_rec,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 3c6b1f84b821..20f53bd4badf 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -95,8 +96,6 @@ class PPCBoolRetToInt : public FunctionPass {
Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext())
: Type::getInt32Ty(V->getContext());
- if (auto *C = dyn_cast<Constant>(V))
- return ConstantExpr::getZExt(C, IntTy);
if (auto *P = dyn_cast<PHINode>(V)) {
// Temporarily set the operands to 0. We'll fix this later in
// runOnUse.
@@ -108,13 +107,12 @@ class PPCBoolRetToInt : public FunctionPass {
return Q;
}
- auto *A = dyn_cast<Argument>(V);
- auto *I = dyn_cast<Instruction>(V);
- assert((A || I) && "Unknown value type");
-
- auto InstPt =
- A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
- return new ZExtInst(V, IntTy, "", InstPt);
+ IRBuilder IRB(V->getContext());
+ if (auto *I = dyn_cast<Instruction>(V))
+ IRB.SetInsertPoint(I->getNextNode());
+ else
+ IRB.SetInsertPoint(&Func->getEntryBlock(), Func->getEntryBlock().begin());
+ return IRB.CreateZExt(V, IntTy);
}
typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
@@ -196,6 +194,7 @@ class PPCBoolRetToInt : public FunctionPass {
auto &TM = TPC->getTM<PPCTargetMachine>();
ST = TM.getSubtargetImpl(F);
+ Func = &F;
PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
B2IMap Bool2IntMap;
@@ -277,6 +276,7 @@ class PPCBoolRetToInt : public FunctionPass {
private:
const PPCSubtarget *ST;
+ Function *Func;
};
} // end anonymous namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 9d580ff57471..799890928577 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -702,6 +701,7 @@ bool PPCBranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
TargetRegion.FallThroughBlock->transferSuccessorsAndUpdatePHIs(
SourceRegion.FallThroughBlock);
TargetRegion.FallThroughBlock->removeSuccessor(SourceRegion.BranchBlock);
+ TargetRegion.FallThroughBlock->normalizeSuccProbs();
// Remove the blocks from the function.
assert(SourceRegion.BranchBlock->empty() &&
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp
index b1f5bdd885cd..1f9947f6f327 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp
@@ -16,8 +16,6 @@
// something that needs to be run (or even defined) for Release builds so the
// entire file is guarded by NDEBUG.
#ifndef NDEBUG
-#include <vector>
-
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
#include "llvm/ADT/SmallSet.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
index a9794ddd0566..aee57a5075ff 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
@@ -239,23 +239,18 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
// loop:
// old = lqarx ptr
// <compare old, cmp>
- // bne 0, fail
+ // bne 0, exit
// succ:
// stqcx new ptr
// bne 0, loop
- // b exit
- // fail:
- // stqcx old ptr
// exit:
// ....
MachineFunction::iterator MFI = ++MBB.getIterator();
MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(MFI, LoopCmpMBB);
MF->insert(MFI, CmpSuccMBB);
- MF->insert(MFI, CmpFailMBB);
MF->insert(MFI, ExitMBB);
ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
MBB.end());
@@ -276,9 +271,9 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE)
.addReg(PPC::CR0)
- .addMBB(CmpFailMBB);
+ .addMBB(ExitMBB);
CurrentMBB->addSuccessor(CmpSuccMBB);
- CurrentMBB->addSuccessor(CmpFailMBB);
+ CurrentMBB->addSuccessor(ExitMBB);
// Build succ.
CurrentMBB = CmpSuccMBB;
PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
@@ -288,16 +283,11 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
.addImm(PPC::PRED_NE)
.addReg(PPC::CR0)
.addMBB(LoopCmpMBB);
- BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB);
CurrentMBB->addSuccessor(LoopCmpMBB);
CurrentMBB->addSuccessor(ExitMBB);
- CurrentMBB = CmpFailMBB;
- BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB);
- CurrentMBB->addSuccessor(ExitMBB);
recomputeLiveIns(*LoopCmpMBB);
recomputeLiveIns(*CmpSuccMBB);
- recomputeLiveIns(*CmpFailMBB);
recomputeLiveIns(*ExitMBB);
NMBBI = MBB.end();
MI.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index d5e4ae34dde7..245e78641ed6 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -466,7 +466,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
RS.enterBasicBlock(*MBB);
} else {
RS.enterBasicBlockEnd(*MBB);
- RS.backward(std::prev(MBBI));
+ RS.backward(MBBI);
}
} else {
// The scratch register will be used at the start of the block.
@@ -2334,24 +2334,16 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
// In case of SPE we only have SuperRegs and CRs
// in our CalleSaveInfo vector.
- unsigned Idx = 0;
for (auto &CalleeSaveReg : CSI) {
- const MCPhysReg &Reg = CalleeSaveReg.getReg();
- const MCPhysReg &Lower = RegInfo->getSubReg(Reg, 1);
- const MCPhysReg &Higher = RegInfo->getSubReg(Reg, 2);
-
- // Check only for SuperRegs.
- if (Lower) {
- if (MRI.isPhysRegModified(Higher)) {
- Idx++;
- continue;
- } else {
+ MCPhysReg Reg = CalleeSaveReg.getReg();
+ MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
+ MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
+
+ if ( // Check only for SuperRegs.
+ Lower &&
// Replace Reg if only lower-32 bits modified
- CSI.erase(CSI.begin() + Idx);
- CSI.insert(CSI.begin() + Idx, CalleeSavedInfo(Lower));
- }
- }
- Idx++;
+ !MRI.isPhysRegModified(Higher))
+ CalleeSaveReg = CalleeSavedInfo(Lower);
}
}
@@ -2740,3 +2732,17 @@ bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
return false;
return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
}
+
+uint64_t PPCFrameLowering::getStackThreshold() const {
+ // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
+ // use `add r1, r1, <scratch_reg>` to release the stack frame.
+ // Scratch register contains a signed 64-bit number, which is negative
+ // when extending the stack and is positive when releasing the stack frame.
+ // To make `stux` and `add` paired, the absolute value of the number contained
+ // in the scratch register should be the same. Thus the maximum stack size
+ // is (2^63)-1, i.e., LONG_MAX.
+ if (Subtarget.isPPC64())
+ return LONG_MAX;
+
+ return TargetFrameLowering::getStackThreshold();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 21883b19a575..e19087ce0e18 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -173,6 +173,8 @@ public:
/// function prologue/epilogue.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+
+ uint64_t getStackThreshold() const override;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
index eff4432206e1..b42f9247f691 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
namespace llvm {
-RegisterBankInfo::PartialMapping PPCGenRegisterBankInfo::PartMappings[]{
+const RegisterBankInfo::PartialMapping PPCGenRegisterBankInfo::PartMappings[]{
/* StartIdx, Length, RegBank */
// 0: GPR 32-bit value.
{0, 32, PPC::GPRRegBank},
@@ -39,7 +39,7 @@ RegisterBankInfo::PartialMapping PPCGenRegisterBankInfo::PartMappings[]{
// 3-operands instructions.
// - Last, mappings for cross-register bank moves follow. Since COPY has only
// 2 operands, a mapping consists of 2 entries.
-RegisterBankInfo::ValueMapping PPCGenRegisterBankInfo::ValMappings[]{
+const RegisterBankInfo::ValueMapping PPCGenRegisterBankInfo::ValMappings[]{
/* BreakDown, NumBreakDowns */
// 0: invalid
{nullptr, 0},
@@ -77,7 +77,7 @@ PPCGenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx) {
return &ValMappings[1 + 3 * ValMappingIdx];
}
-PPCGenRegisterBankInfo::PartialMappingIdx
+const PPCGenRegisterBankInfo::PartialMappingIdx
PPCGenRegisterBankInfo::BankIDToCopyMapIdx[]{
PMI_None,
PMI_FPR64, // FPR
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 96fd83ab6a7b..b57d185bb638 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -151,7 +151,7 @@ namespace {
PPCDAGToDAGISel() = delete;
- explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, tm, OptLevel), TM(tm) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -387,18 +387,19 @@ namespace {
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override {
switch(ConstraintID) {
default:
- errs() << "ConstraintID: " << ConstraintID << "\n";
+ errs() << "ConstraintID: "
+ << InlineAsm::getMemConstraintName(ConstraintID) << "\n";
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_es:
- case InlineAsm::Constraint_m:
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_Q:
- case InlineAsm::Constraint_Z:
- case InlineAsm::Constraint_Zy:
+ case InlineAsm::ConstraintCode::es:
+ case InlineAsm::ConstraintCode::m:
+ case InlineAsm::ConstraintCode::o:
+ case InlineAsm::ConstraintCode::Q:
+ case InlineAsm::ConstraintCode::Z:
+ case InlineAsm::ConstraintCode::Zy:
// We need to make sure that this one operand does not end up in r0
// (because we might end up lowering this as 0(%op)).
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -424,6 +425,7 @@ private:
bool tryFoldSWTestBRCC(SDNode *N);
bool trySelectLoopCountIntrinsic(SDNode *N);
bool tryAsSingleRLDICL(SDNode *N);
+ bool tryAsSingleRLDCL(SDNode *N);
bool tryAsSingleRLDICR(SDNode *N);
bool tryAsSingleRLWINM(SDNode *N);
bool tryAsSingleRLWINM8(SDNode *N);
@@ -754,8 +756,8 @@ static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG) {
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base) {
// Do not do this transformation at -O0.
- if (CurDAG->getTarget().getOptLevel() == CodeGenOpt::None)
- return false;
+ if (CurDAG->getTarget().getOptLevel() == CodeGenOptLevel::None)
+ return false;
// In order to perform this optimization inside tryTLSXForm[Load|Store],
// Base is expected to be an ADD_TLS node.
@@ -1165,6 +1167,31 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
getI32Imm(Shift), getI32Imm(0));
}
+ // 2-7) Patterns : High word == Low word
+ // This may require 2 to 3 instructions, depending on whether Lo32 can be
+ // materialized in 1 instruction.
+ if (Hi32 == Lo32) {
+ // Handle the first 32 bits.
+ uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
+ uint64_t ImmLo16 = Lo32 & 0xffff;
+ if (isInt<16>(Lo32))
+ Result =
+ CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
+ else if (!ImmLo16)
+ Result =
+ CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
+ else {
+ InstCnt = 3;
+ Result =
+ CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(ImmLo16));
+ }
+ // Use rldimi to insert the Low word into High word.
+ SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
+ getI32Imm(0)};
+ return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+ }
// Following patterns use 3 instructions to materialize the Imm.
InstCnt = 3;
@@ -1215,20 +1242,7 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
getI32Imm(TO), getI32Imm(LZ));
}
- // 3-4) Patterns : High word == Low word
- if (Hi32 == Lo32) {
- // Handle the first 32 bits.
- uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
- unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
- Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
- Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
- getI32Imm(Lo32 & 0xffff));
- // Use rldimi to insert the Low word into High word.
- SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
- getI32Imm(0)};
- return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
- }
- // 3-5) Patterns : {******}{33 zeros}{******}
+ // 3-4) Patterns : {******}{33 zeros}{******}
// {******}{33 ones}{******}
// If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
// bits remain on both sides. Rotate right the Imm to construct an int<32>
@@ -4041,7 +4055,7 @@ bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
// This optimization will emit code that assumes 64-bit registers
// so we don't want to run it in 32-bit mode. Also don't run it
// on functions that are not to be optimized.
- if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
+ if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
return false;
// For POWER10, it is more profitable to use the set boolean extension
@@ -4835,8 +4849,7 @@ bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
return false;
SDValue CmpRHS = N->getOperand(3);
- if (!isa<ConstantSDNode>(CmpRHS) ||
- cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
+ if (!isNullConstant(CmpRHS))
return false;
SDValue CmpLHS = N->getOperand(2);
@@ -5085,6 +5098,35 @@ bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
return false;
}
+bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
+ return false;
+
+ SDValue Val = N->getOperand(0);
+
+ if (Val.getOpcode() != ISD::ROTL)
+ return false;
+
+ // Looking to try to avoid a situation like this one:
+ // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
+ // %and1 = and i64 %2, 9223372036854775807
+ // In this function we are looking to try to match RLDCL. However, the above
+ // DAG would better match RLDICL instead which is not what we are looking
+ // for here.
+ SDValue RotateAmt = Val.getOperand(1);
+ if (RotateAmt.getOpcode() == ISD::Constant)
+ return false;
+
+ unsigned MB = 64 - llvm::countr_one(Imm64);
+ SDLoc dl(N);
+ SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
+ return true;
+}
+
bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
uint64_t Imm64;
@@ -5605,8 +5647,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::AND:
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
- if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
- tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
+ if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
+ tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
+ tryAsPairOfRLDICL(N))
return;
// Other cases are autogenerated.
@@ -6624,7 +6667,7 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() {
// Skip peepholes at -O0.
- if (TM.getOptLevel() == CodeGenOpt::None)
+ if (TM.getOptLevel() == CodeGenOptLevel::None)
return;
PeepholePPC64();
@@ -6659,11 +6702,7 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
Op2->getMachineOpcode() != PPC::LI8)
return false;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
- if (!C)
- return false;
-
- if (!C->isZero())
+ if (!isNullConstant(Op2->getOperand(0)))
return false;
}
@@ -7616,13 +7655,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
// is already in place on the operand, so copying the operand
// is sufficient.
ReplaceFlags = false;
- // For these cases, the immediate may not be divisible by 4, in
- // which case the fold is illegal for DS-form instructions. (The
- // other cases provide aligned addresses and are always safe.)
- if (RequiresMod4Offset &&
- (!isa<ConstantSDNode>(Base.getOperand(1)) ||
- Base.getConstantOperandVal(1) % 4 != 0))
- continue;
break;
case PPC::ADDIdtprelL:
Flags = PPCII::MO_DTPREL_LO;
@@ -7674,6 +7706,18 @@ void PPCDAGToDAGISel::PeepholePPC64() {
UpdateHBase = true;
}
} else {
+ // Global addresses can be folded, but only if they are sufficiently
+ // aligned.
+ if (RequiresMod4Offset) {
+ if (GlobalAddressSDNode *GA =
+ dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ const GlobalValue *GV = GA->getGlobal();
+ Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
+ if (Alignment < 4)
+ continue;
+ }
+ }
+
// If we're directly folding the addend from an addi instruction, then:
// 1. In general, the offset on the memory access must be zero.
// 2. If the addend is a constant, then it can be combined with a
@@ -7748,6 +7792,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
/// PowerPC-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new PPCDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 547b71a6101a..22c662a79d87 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -132,6 +132,10 @@ cl::opt<bool> DisableAutoPairedVecSt(
cl::desc("disable automatically generated 32byte paired vector stores"),
cl::init(true), cl::Hidden);
+static cl::opt<unsigned> PPCMinimumJumpTableEntries(
+ "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
+ cl::desc("Set minimum number of entries to use a jump table on PPC"));
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
STATISTIC(ShufflesHandledWithVPERM,
@@ -144,6 +148,12 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
+// A faster local-exec TLS access sequence (enabled with the
+// -maix-small-local-exec-tls option) can be produced for TLS variables;
+// consistent with the IBM XL compiler, we apply a max size of slightly under
+// 32KB.
+constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
+
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
@@ -389,7 +399,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// MASS transformation for LLVM intrinsics with replicating fast-math flag
// to be consistent to PPCGenScalarMASSEntries pass
- if (TM.getOptLevel() == CodeGenOpt::Aggressive) {
+ if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
setOperationAction(ISD::FSIN , MVT::f64, Custom);
setOperationAction(ISD::FCOS , MVT::f64, Custom);
setOperationAction(ISD::FPOW , MVT::f64, Custom);
@@ -1419,6 +1429,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
setLibcallName(RTLIB::FMA_F128, "fmaf128");
+ setLibcallName(RTLIB::FREXP_F128, "frexpf128");
if (Subtarget.isAIXABI()) {
setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
@@ -1434,6 +1445,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setJumpIsExpensive();
}
+ // TODO: The default entry number is set to 64. This stops most jump table
+ // generation on PPC. But it is good for current PPC HWs because the indirect
+ // branch instruction mtctr to the jump table may lead to bad branch predict.
+ // Re-evaluate this value on future HWs that can do better with mtctr.
+ setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
+
setMinFunctionAlignment(Align(4));
switch (Subtarget.getCPUDirective()) {
@@ -1627,6 +1644,27 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
+bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
+ Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
+ if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
+ return false;
+
+ if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
+ if (VTy->getScalarType()->isIntegerTy()) {
+ // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
+ if (ElemSizeInBits == 32) {
+ Index = Subtarget.isLittleEndian() ? 2 : 1;
+ return true;
+ }
+ if (ElemSizeInBits == 64) {
+ Index = Subtarget.isLittleEndian() ? 1 : 0;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -2936,7 +2974,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
template <typename Ty> static bool isValidPCRelNode(SDValue N) {
Ty *PCRelCand = dyn_cast<Ty>(N);
- return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
+ return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
}
/// Returns true if this address is a PC Relative address.
@@ -3097,8 +3135,8 @@ static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
// Don't use the pic base if not in PIC relocation model.
if (IsPIC) {
- HiOpFlags |= PPCII::MO_PIC_FLAG;
- LoOpFlags |= PPCII::MO_PIC_FLAG;
+ HiOpFlags = PPCII::MO_PIC_HA_FLAG;
+ LoOpFlags = PPCII::MO_PIC_LO_FLAG;
}
}
@@ -3326,36 +3364,60 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
bool Is64Bit = Subtarget.isPPC64();
+ bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+ bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
- if (Model == TLSModel::LocalExec) {
+ if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
SDValue VariableOffsetTGA =
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
SDValue TLSReg;
- if (Is64Bit)
- // For local-exec on AIX (64-bit), the sequence that is generated involves
- // a load of the variable offset (from the TOC), followed by an add of the
- // loaded variable offset to R13 (the thread pointer).
+ if (Is64Bit) {
+ // For local-exec and initial-exec on AIX (64-bit), the sequence generated
+ // involves a load of the variable offset (from the TOC), followed by an
+ // add of the loaded variable offset to R13 (the thread pointer).
// This code sequence looks like:
// ld reg1,var[TC](2)
// add reg2, reg1, r13 // r13 contains the thread pointer
TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
- else
- // For local-exec on AIX (32-bit), the sequence that is generated involves
- // loading the variable offset from the TOC, generating a call to
+
+ // With the -maix-small-local-exec-tls option, produce a faster access
+ // sequence for local-exec TLS variables where the offset from the TLS
+ // base is encoded as an immediate operand.
+ //
+ // We only utilize the faster local-exec access sequence when the TLS
+ // variable has a size within the policy limit. We treat types that are
+ // not sized or are empty as being over the policy size limit.
+ if (HasAIXSmallLocalExecTLS && IsTLSLocalExecModel) {
+ Type *GVType = GV->getValueType();
+ if (GVType->isSized() && !GVType->isEmptyTy() &&
+ GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
+ AIXSmallTlsPolicySizeLimit)
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
+ }
+ } else {
+ // For local-exec and initial-exec on AIX (32-bit), the sequence generated
+ // involves loading the variable offset from the TOC, generating a call to
// .__get_tpointer to get the thread pointer (which will be in R3), and
// adding the two together:
// lwz reg1,var[TC](2)
// bla .__get_tpointer
// add reg2, reg1, r3
TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
+
+ // We do not implement the 32-bit version of the faster access sequence
+ // for local-exec that is controlled by -maix-small-local-exec-tls.
+ if (HasAIXSmallLocalExecTLS)
+ report_fatal_error("The small-local-exec TLS access sequence is "
+ "currently only supported on AIX (64-bit mode).");
+ }
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
}
- // The Local-Exec and General-Dynamic TLS models are currently the only
- // supported access models. If Local-exec is not possible or specified, all
- // GlobalTLSAddress nodes are lowered using the general-dynamic model.
+ // Only Local-Exec, Initial-Exec and General-Dynamic TLS models are currently
+ // supported models. If Local- or Initial-exec are not possible or specified,
+ // all GlobalTLSAddress nodes are lowered using the general-dynamic model.
// We need to generate two TOC entries, one for the variable offset, one for
// the region handle. The global address for the TOC entry of the region
// handle is created with the MO_TLSGDM_FLAG flag and the global address
@@ -3393,8 +3455,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
if (Model == TLSModel::LocalExec) {
if (Subtarget.isUsingPCRelativeCalls()) {
SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
- SDValue TGA = DAG.getTargetGlobalAddress(
- GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL_PCREL_FLAG);
SDValue MatAddr =
DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
@@ -3416,8 +3478,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
SDValue TGA = DAG.getTargetGlobalAddress(
GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
SDValue TGATLS = DAG.getTargetGlobalAddress(
- GV, dl, PtrVT, 0,
- IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
+ GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
SDValue TPOffset;
if (IsPCRel) {
SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
@@ -3513,8 +3574,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
EVT Ty = getPointerTy(DAG.getDataLayout());
if (isAccessedAsGotIndirect(Op)) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
- PPCII::MO_PCREL_FLAG |
- PPCII::MO_GOT_FLAG);
+ PPCII::MO_GOT_PCREL_FLAG);
SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
MachinePointerInfo());
@@ -3764,21 +3824,22 @@ SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
// Check all operands that may contain the LR.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag Flags(
+ cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue());
+ unsigned NumVals = Flags.getNumOperandRegisters();
++i; // Skip the ID value.
- switch (InlineAsm::getKind(Flags)) {
+ switch (Flags.getKind()) {
default:
llvm_unreachable("Bad flags!");
- case InlineAsm::Kind_RegUse:
- case InlineAsm::Kind_Imm:
- case InlineAsm::Kind_Mem:
+ case InlineAsm::Kind::RegUse:
+ case InlineAsm::Kind::Imm:
+ case InlineAsm::Kind::Mem:
i += NumVals;
break;
- case InlineAsm::Kind_Clobber:
- case InlineAsm::Kind_RegDef:
- case InlineAsm::Kind_RegDefEarlyClobber: {
+ case InlineAsm::Kind::Clobber:
+ case InlineAsm::Kind::RegDef:
+ case InlineAsm::Kind::RegDefEarlyClobber: {
for (; NumVals; --NumVals, ++i) {
Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
if (Reg != PPC::LR && Reg != PPC::LR8)
@@ -5278,7 +5339,7 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
// inserted into the DAG as part of call lowering. The restore of the TOC
// pointer is modeled by using a pseudo instruction for the call opcode that
// represents the 2 instruction sequence of an indirect branch and link,
- // immediately followed by a load of the TOC pointer from the the stack save
+ // immediately followed by a load of the TOC pointer from the stack save
// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
// as it is not saved or used.
RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
@@ -7193,7 +7254,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// be future work.
SDValue Store = DAG.getStore(
CopyFrom.getValue(1), dl, CopyFrom,
- DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
+ DAG.getObjectPtrOffset(dl, FIN, TypeSize::getFixed(Offset)),
MachinePointerInfo::getFixedStack(MF, FI, Offset));
MemOps.push_back(Store);
@@ -7373,12 +7434,12 @@ SDValue PPCTargetLowering::LowerCall_AIX(
}
auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
- return DAG.getExtLoad(
- ISD::ZEXTLOAD, dl, PtrVT, Chain,
- (LoadOffset != 0)
- ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
- : Arg,
- MachinePointerInfo(), VT);
+ return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
+ (LoadOffset != 0)
+ ? DAG.getObjectPtrOffset(
+ dl, Arg, TypeSize::getFixed(LoadOffset))
+ : Arg,
+ MachinePointerInfo(), VT);
};
unsigned LoadOffset = 0;
@@ -7408,11 +7469,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// Only memcpy the bytes that don't pass in register.
MemcpyFlags.setByValSize(ByValSize - LoadOffset);
Chain = CallSeqStart = createMemcpyOutsideCallSeq(
- (LoadOffset != 0)
- ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
- : Arg,
- DAG.getObjectPtrOffset(dl, StackPtr,
- TypeSize::Fixed(ByValVA.getLocMemOffset())),
+ (LoadOffset != 0) ? DAG.getObjectPtrOffset(
+ dl, Arg, TypeSize::getFixed(LoadOffset))
+ : Arg,
+ DAG.getObjectPtrOffset(
+ dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
CallSeqStart, MemcpyFlags, DAG, dl);
continue;
}
@@ -8020,7 +8081,8 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// For more information, see section F.3 of the 2.06 ISA specification.
// With ISA 3.0
if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
- (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
+ (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
+ ResVT == MVT::f128)
return Op;
// If the RHS of the comparison is a 0.0, we don't need to do the
@@ -10254,11 +10316,6 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
bool isLittleEndian = Subtarget.isLittleEndian();
bool isPPC64 = Subtarget.isPPC64();
- // Only need to place items backwards in LE,
- // the mask will be properly calculated.
- if (isLittleEndian)
- std::swap(V1, V2);
-
if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
(V1->hasOneUse() || V2->hasOneUse())) {
LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
@@ -10268,7 +10325,8 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
// The second input to XXPERM is also an output so if the second input has
// multiple uses then copying is necessary, as a result we want the
// single-use operand to be used as the second input to prevent copying.
- if (!V2->hasOneUse() && V1->hasOneUse()) {
+ if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
+ (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
std::swap(V1, V2);
NeedSwap = !NeedSwap;
}
@@ -10307,27 +10365,24 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
- if (Opcode == PPCISD::XXPERM) {
- if (V1HasXXSWAPD) {
- if (SrcElt < 8)
- SrcElt += 8;
- else if (SrcElt < 16)
- SrcElt -= 8;
- }
- if (V2HasXXSWAPD) {
- if (SrcElt > 23)
- SrcElt -= 8;
- else if (SrcElt > 15)
- SrcElt += 8;
- }
- if (NeedSwap) {
- if (SrcElt < 16)
- SrcElt += 16;
- else
- SrcElt -= 16;
- }
+ if (V1HasXXSWAPD) {
+ if (SrcElt < 8)
+ SrcElt += 8;
+ else if (SrcElt < 16)
+ SrcElt -= 8;
+ }
+ if (V2HasXXSWAPD) {
+ if (SrcElt > 23)
+ SrcElt -= 8;
+ else if (SrcElt > 15)
+ SrcElt += 8;
+ }
+ if (NeedSwap) {
+ if (SrcElt < 16)
+ SrcElt += 16;
+ else
+ SrcElt -= 16;
}
-
for (unsigned j = 0; j != BytesPerElement; ++j)
if (isLittleEndian)
ResultMask.push_back(
@@ -10337,18 +10392,19 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
}
- if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) {
- if (V1HasXXSWAPD) {
- dl = SDLoc(V1->getOperand(0));
- V1 = V1->getOperand(0)->getOperand(1);
- }
- if (V2HasXXSWAPD) {
- dl = SDLoc(V2->getOperand(0));
- V2 = V2->getOperand(0)->getOperand(1);
- }
- if (isPPC64 && ValType != MVT::v2f64)
+ if (V1HasXXSWAPD) {
+ dl = SDLoc(V1->getOperand(0));
+ V1 = V1->getOperand(0)->getOperand(1);
+ }
+ if (V2HasXXSWAPD) {
+ dl = SDLoc(V2->getOperand(0));
+ V2 = V2->getOperand(0)->getOperand(1);
+ }
+
+ if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
+ if (ValType != MVT::v2f64)
V1 = DAG.getBitcast(MVT::v2f64, V1);
- if (isPPC64 && V2.getValueType() != MVT::v2f64)
+ if (V2.getValueType() != MVT::v2f64)
V2 = DAG.getBitcast(MVT::v2f64, V2);
}
@@ -10369,6 +10425,11 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
if (Opcode == PPCISD::XXPERM)
VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
+ // Only need to place items backwards in LE,
+ // the mask was properly calculated.
+ if (isLittleEndian)
+ std::swap(V1, V2);
+
SDValue VPERMNode =
DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
@@ -11037,14 +11098,14 @@ SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
SmallVector<SDValue, 4> Ops{
N->getOperand(0),
DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
- SDValue Val = N->getOperand(2);
+ SDValue Val = N->getOperand(1);
SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
DAG.getConstant(64, dl, MVT::i32));
ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
Ops.push_back(ValLo);
Ops.push_back(ValHi);
- Ops.push_back(N->getOperand(1));
+ Ops.push_back(N->getOperand(2));
return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
N->getMemOperand());
}
@@ -16659,13 +16720,14 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue>&Ops,
+ StringRef Constraint,
+ std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Only support length 1 constraints.
- if (Constraint.length() > 1) return;
+ if (Constraint.size() > 1)
+ return;
char Letter = Constraint[0];
switch (Letter) {
@@ -17075,13 +17137,23 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
- if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
+ if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
- if (Subtarget.hasAltivec() && Op.size() >= 16 &&
- (Op.isAligned(Align(16)) ||
- ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
- return MVT::v4i32;
+ if (Subtarget.hasAltivec() && Op.size() >= 16) {
+ if (Op.isMemset() && Subtarget.hasVSX()) {
+ uint64_t TailSize = Op.size() % 16;
+ // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
+ // element if vector element type matches tail store. For tail size
+ // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
+ if (TailSize > 2 && TailSize <= 4) {
+ return MVT::v8i16;
+ }
+ return MVT::v4i32;
+ }
+ if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
+ return MVT::v4i32;
+ }
}
if (Subtarget.isPPC64()) {
@@ -17227,7 +17299,7 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
Type *Ty) const {
- if (Subtarget.hasSPE())
+ if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
return false;
switch (Ty->getScalarType()->getTypeID()) {
case Type::FloatTyID:
@@ -17431,7 +17503,7 @@ bool PPCTargetLowering::useLoadStackGuardNode() const {
void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
if (Subtarget.isAIXABI()) {
M.getOrInsertGlobal(AIXSSPCanaryWordName,
- Type::getInt8PtrTy(M.getContext()));
+ PointerType::getUnqual(M.getContext()));
return;
}
if (!Subtarget.isTargetLinux())
@@ -18539,9 +18611,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
Value *IncrHi =
Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
- Value *Addr =
- Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
- Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
+ Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
@@ -18566,11 +18636,9 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
Value *NewHi =
Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
- Value *Addr =
- Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
emitLeadingFence(Builder, CI, Ord);
Value *LoHi =
- Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
+ Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
emitTrailingFence(Builder, CI, Ord);
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
index e6ebc68008fb..d8679dcf4018 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -356,9 +356,9 @@ namespace llvm {
/// ADDIS_TLSGD_L_ADDR until after register assignment.
GET_TLS_ADDR,
- /// %x3 = GET_TPOINTER - Used for the local-exec TLS model on 32-bit AIX,
- /// produces a call to .__get_tpointer to retrieve the thread pointer
- /// At the end of the call, the thread pointer is found in R3.
+ /// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on
+ /// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread
+ /// pointer. At the end of the call, the thread pointer is found in R3.
GET_TPOINTER,
/// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
@@ -791,6 +791,11 @@ namespace llvm {
return true;
}
+ bool
+ shallExtractConstSplatVectorElementToStore(Type *VectorTy,
+ unsigned ElemSizeInBits,
+ unsigned &Index) const override;
+
bool isCtlzFast() const override {
return true;
}
@@ -970,21 +975,20 @@ namespace llvm {
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
- void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned
+ InlineAsm::ConstraintCode
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "es")
- return InlineAsm::Constraint_es;
+ return InlineAsm::ConstraintCode::es;
else if (ConstraintCode == "Q")
- return InlineAsm::Constraint_Q;
+ return InlineAsm::ConstraintCode::Q;
else if (ConstraintCode == "Z")
- return InlineAsm::Constraint_Z;
+ return InlineAsm::ConstraintCode::Z;
else if (ConstraintCode == "Zy")
- return InlineAsm::Constraint_Zy;
+ return InlineAsm::ConstraintCode::Zy;
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index fd44efa1b3f4..0322bb37b1fd 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -380,7 +380,7 @@ let mayStore = 1, mayLoad = 1,
Defs = [CR0],
Constraints = "@earlyclobber $scratch,@earlyclobber $RTp" in {
// Atomic pseudo instructions expanded post-ra.
-def ATOMIC_SWAP_I128 : AtomicRMW128<"#ATOMIC_SWAP_I128">;
+def ATOMIC_SWAP_I128 : AtomicRMW128<"#ATOMIC_SWAP_I128">;
def ATOMIC_LOAD_ADD_I128 : AtomicRMW128<"#ATOMIC_LOAD_ADD_I128">;
def ATOMIC_LOAD_SUB_I128 : AtomicRMW128<"#ATOMIC_LOAD_SUB_I128">;
def ATOMIC_LOAD_AND_I128 : AtomicRMW128<"#ATOMIC_LOAD_AND_I128">;
@@ -395,48 +395,21 @@ def ATOMIC_CMP_SWAP_I128 : PPCPostRAExpPseudo<
"#ATOMIC_CMP_SWAP_I128", []>;
}
-def : Pat<(int_ppc_atomicrmw_add_i128 ForceXForm:$ptr,
- i64:$incr_lo,
- i64:$incr_hi),
- (SPLIT_QUADWORD (ATOMIC_LOAD_ADD_I128 memrr:$ptr,
- g8rc:$incr_lo,
- g8rc:$incr_hi))>;
-def : Pat<(int_ppc_atomicrmw_sub_i128 ForceXForm:$ptr,
- i64:$incr_lo,
- i64:$incr_hi),
- (SPLIT_QUADWORD (ATOMIC_LOAD_SUB_I128 memrr:$ptr,
- g8rc:$incr_lo,
- g8rc:$incr_hi))>;
-def : Pat<(int_ppc_atomicrmw_xor_i128 ForceXForm:$ptr,
- i64:$incr_lo,
- i64:$incr_hi),
- (SPLIT_QUADWORD (ATOMIC_LOAD_XOR_I128 memrr:$ptr,
- g8rc:$incr_lo,
- g8rc:$incr_hi))>;
-def : Pat<(int_ppc_atomicrmw_and_i128 ForceXForm:$ptr,
- i64:$incr_lo,
- i64:$incr_hi),
- (SPLIT_QUADWORD (ATOMIC_LOAD_AND_I128 memrr:$ptr,
- g8rc:$incr_lo,
- g8rc:$incr_hi))>;
-def : Pat<(int_ppc_atomicrmw_nand_i128 ForceXForm:$ptr,
- i64:$incr_lo,
- i64:$incr_hi),
- (SPLIT_QUADWORD (ATOMIC_LOAD_NAND_I128 memrr:$ptr,
- g8rc:$incr_lo,
- g8rc:$incr_hi))>;
-def : Pat<(int_ppc_atomicrmw_or_i128 ForceXForm:$ptr,
- i64:$incr_lo,
- i64:$incr_hi),
- (SPLIT_QUADWORD (ATOMIC_LOAD_OR_I128 memrr:$ptr,
- g8rc:$incr_lo,
- g8rc:$incr_hi))>;
-def : Pat<(int_ppc_atomicrmw_xchg_i128 ForceXForm:$ptr,
- i64:$incr_lo,
- i64:$incr_hi),
- (SPLIT_QUADWORD (ATOMIC_SWAP_I128 memrr:$ptr,
- g8rc:$incr_lo,
- g8rc:$incr_hi))>;
+class PatAtomicRMWI128<SDPatternOperator OpNode, AtomicRMW128 Inst> :
+ Pat<(OpNode ForceXForm:$ptr,
+ i64:$incr_lo,
+ i64:$incr_hi),
+ (SPLIT_QUADWORD (Inst memrr:$ptr,
+ g8rc:$incr_lo,
+ g8rc:$incr_hi))>;
+
+def : PatAtomicRMWI128<int_ppc_atomicrmw_add_i128, ATOMIC_LOAD_ADD_I128>;
+def : PatAtomicRMWI128<int_ppc_atomicrmw_sub_i128, ATOMIC_LOAD_SUB_I128>;
+def : PatAtomicRMWI128<int_ppc_atomicrmw_xor_i128, ATOMIC_LOAD_XOR_I128>;
+def : PatAtomicRMWI128<int_ppc_atomicrmw_and_i128, ATOMIC_LOAD_AND_I128>;
+def : PatAtomicRMWI128<int_ppc_atomicrmw_nand_i128, ATOMIC_LOAD_NAND_I128>;
+def : PatAtomicRMWI128<int_ppc_atomicrmw_or_i128, ATOMIC_LOAD_OR_I128>;
+def : PatAtomicRMWI128<int_ppc_atomicrmw_xchg_i128, ATOMIC_SWAP_I128>;
def : Pat<(int_ppc_cmpxchg_i128 ForceXForm:$ptr,
i64:$cmp_lo,
i64:$cmp_hi,
@@ -1958,8 +1931,8 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
def : Pat<(i64 (PPCtoc_entry tglobaltlsaddr:$disp, i64:$reg)),
(i64 (LDtoc tglobaltlsaddr:$disp, i64:$reg))>;
-// The following pattern matches 64-bit local-exec TLS accesses on AIX.
-// PPCaddTls is used in local-exec accesses in order to:
+// The following pattern matches 64-bit local- and initial-exec TLS accesses on AIX.
+// PPCaddTls is used in local- and initial-exec accesses in order to:
// - Get the address of a variable (adding the variable offset to the thread
// pointer in r13).
// - Create an opportunity to optimize the user of the loaded address.
@@ -1970,8 +1943,8 @@ def : Pat<(PPCaddTls i64:$in, i64:$addr),
def : Pat<(atomic_load_64 DSForm:$src), (LD memrix:$src)>;
def : Pat<(atomic_load_64 XForm:$src), (LDX memrr:$src)>;
-def : Pat<(atomic_store_64 DSForm:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
-def : Pat<(atomic_store_64 XForm:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_64 i64:$val, DSForm:$ptr), (STD g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 i64:$val, XForm:$ptr), (STDX g8rc:$val, memrr:$ptr)>;
let Predicates = [IsISA3_0, In64BitMode] in {
def : Pat<(i64 (int_ppc_cmpeqb g8rc:$a, g8rc:$b)),
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 224c7b281ac4..5389f42a325c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -220,7 +220,7 @@ BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk,
}
// 1.7.3 SC-Form
-class SCForm<bits<6> opcode, bits<1> xo,
+class SCForm<bits<6> opcode, bits<1> xo1, bits<1> xo2,
dag OOL, dag IOL, string asmstr, InstrItinClass itin,
list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -229,7 +229,8 @@ class SCForm<bits<6> opcode, bits<1> xo,
let Pattern = pattern;
let Inst{20-26} = LEV;
- let Inst{30} = xo;
+ let Inst{30} = xo1;
+ let Inst{31} = xo2;
}
// 1.7.4 D-Form
@@ -724,6 +725,38 @@ class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
let Inst{31} = 0;
}
+class XForm_IMM2_IMM2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<2> L;
+ bits<2> PL;
+
+ let Pattern = pattern;
+ let Inst{6-8} = 0;
+ let Inst{9-10} = L;
+ let Inst{11-13} = 0;
+ let Inst{14-15} = PL;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_IMM3_IMM2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> L;
+ bits<2> SC;
+
+ let Pattern = pattern;
+ let Inst{6-7} = 0;
+ let Inst{8-10} = L;
+ let Inst{11-13} = 0;
+ let Inst{14-15} = SC;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
class XForm_24_eieio<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: XForm_24_sync<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 784953dbc847..d0a6cced1b19 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -17,11 +17,11 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -155,22 +155,21 @@ unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
continue;
- int Cycle = ItinData->getOperandCycle(DefClass, i);
- if (Cycle < 0)
+ std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
+ if (!Cycle)
continue;
- Latency = std::max(Latency, (unsigned) Cycle);
+ Latency = std::max(Latency, *Cycle);
}
return Latency;
}
-int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI, unsigned DefIdx,
- const MachineInstr &UseMI,
- unsigned UseIdx) const {
- int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
- UseMI, UseIdx);
+std::optional<unsigned> PPCInstrInfo::getOperandLatency(
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI,
+ unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
+ std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
+ ItinData, DefMI, DefIdx, UseMI, UseIdx);
if (!DefMI.getParent())
return Latency;
@@ -190,7 +189,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
if (UseMI.isBranch() && IsRegCR) {
- if (Latency < 0)
+ if (!Latency)
Latency = getInstrLatency(ItinData, DefMI);
// On some cores, there is an additional delay between writing to a condition
@@ -210,34 +209,14 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
// FIXME: Is this needed for POWER9?
- Latency += 2;
- break;
+ Latency = *Latency + 2;
+ break;
}
}
return Latency;
}
-/// This is an architecture-specific helper function of reassociateOps.
-/// Set special operand attributes for new instructions after reassociation.
-void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
- MachineInstr &OldMI2,
- MachineInstr &NewMI1,
- MachineInstr &NewMI2) const {
- // Propagate FP flags from the original instructions.
- // But clear poison-generating flags because those may not be valid now.
- uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
- NewMI1.setFlags(IntersectedFlags);
- NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
- NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
- NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
-
- NewMI2.setFlags(IntersectedFlags);
- NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
- NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
- NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
-}
-
void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,
uint32_t Flags) const {
MI.setFlags(Flags);
@@ -763,7 +742,7 @@ bool PPCInstrInfo::getMachineCombinerPatterns(
bool DoRegPressureReduce) const {
// Using the machine combiner in this way is potentially expensive, so
// restrict to when aggressive optimizations are desired.
- if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
+ if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOptLevel::Aggressive)
return false;
if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
@@ -1121,7 +1100,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(
case PPC::XXSETACCZW:
return true;
}
- return false;
+ return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
@@ -1174,8 +1153,8 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// If machine instrs are no longer in two-address forms, update
// destination register as well.
if (Reg0 == Reg1) {
- // Must be two address instruction!
- assert(MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ // Must be two address instruction (i.e. op1 is tied to op0).
+ assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
"Expecting a two-address instruction!");
assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
Reg2IsKill = false;
@@ -1530,6 +1509,9 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
Register DstReg, Register TrueReg,
Register FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const {
+ if (!Subtarget.hasISEL())
+ return false;
+
if (Cond.size() != 2)
return false;
@@ -2833,10 +2815,6 @@ bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {
.addReg(CRReg, RegState::ImplicitDefine);
SrcMI->clearRegisterDeads(CRReg);
- // Fix up killed/dead flag for SrcReg after transformation.
- if (SrcRegHasOtherUse || CmpMI.getOperand(1).isKill())
- fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg);
-
assert(SrcMI->definesRegister(PPC::CR0) &&
"Record-form instruction does not define cr0?");
@@ -2899,8 +2877,9 @@ static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
}
bool PPCInstrInfo::shouldClusterMemOps(
- ArrayRef<const MachineOperand *> BaseOps1,
- ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+ ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
+ bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+ int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
unsigned NumBytes) const {
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
@@ -2909,9 +2888,10 @@ bool PPCInstrInfo::shouldClusterMemOps(
assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
"Only base registers and frame indices are supported.");
- // The NumLoads means the number of loads that has been clustered.
+ // ClusterSize means the number of memory operations that will have been
+ // clustered if this hook returns true.
// Don't cluster memory op if there are already two ops clustered at least.
- if (NumLoads > 2)
+ if (ClusterSize > 2)
return false;
// Cluster the load/store only when they have the same base
@@ -2976,41 +2956,40 @@ unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
std::pair<unsigned, unsigned>
PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
- const unsigned Mask = PPCII::MO_ACCESS_MASK;
- return std::make_pair(TF & Mask, TF & ~Mask);
+ // PPC always uses a direct mask.
+ return std::make_pair(TF, 0u);
}
ArrayRef<std::pair<unsigned, const char *>>
PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
using namespace PPCII;
static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_LO, "ppc-lo"},
- {MO_HA, "ppc-ha"},
- {MO_TPREL_LO, "ppc-tprel-lo"},
- {MO_TPREL_HA, "ppc-tprel-ha"},
- {MO_DTPREL_LO, "ppc-dtprel-lo"},
- {MO_TLSLD_LO, "ppc-tlsld-lo"},
- {MO_TOC_LO, "ppc-toc-lo"},
- {MO_TLS, "ppc-tls"}};
- return ArrayRef(TargetFlags);
-}
-
-ArrayRef<std::pair<unsigned, const char *>>
-PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
- using namespace PPCII;
- static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_PLT, "ppc-plt"},
{MO_PIC_FLAG, "ppc-pic"},
{MO_PCREL_FLAG, "ppc-pcrel"},
{MO_GOT_FLAG, "ppc-got"},
{MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
{MO_TLSGD_FLAG, "ppc-tlsgd"},
- {MO_TLSLD_FLAG, "ppc-tlsld"},
{MO_TPREL_FLAG, "ppc-tprel"},
+ {MO_TLSLD_FLAG, "ppc-tlsld"},
{MO_TLSGDM_FLAG, "ppc-tlsgdm"},
{MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
{MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
- {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"}};
+ {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
+ {MO_LO, "ppc-lo"},
+ {MO_HA, "ppc-ha"},
+ {MO_TPREL_LO, "ppc-tprel-lo"},
+ {MO_TPREL_HA, "ppc-tprel-ha"},
+ {MO_DTPREL_LO, "ppc-dtprel-lo"},
+ {MO_TLSLD_LO, "ppc-tlsld-lo"},
+ {MO_TOC_LO, "ppc-toc-lo"},
+ {MO_TLS, "ppc-tls"},
+ {MO_PIC_HA_FLAG, "ppc-ha-pic"},
+ {MO_PIC_LO_FLAG, "ppc-lo-pic"},
+ {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
+ {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
+ {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
+ };
return ArrayRef(TargetFlags);
}
@@ -3412,7 +3391,7 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
Opc == PPC::RLWINM8_rec;
bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
- ? isVFRegister(MI.getOperand(0).getReg())
+ ? PPC::isVFRegister(MI.getOperand(0).getReg())
: false;
if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
return nullptr;
@@ -3467,101 +3446,6 @@ ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
}
-void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI,
- unsigned RegNo) const {
- // Conservatively clear kill flag for the register if the instructions are in
- // different basic blocks and in SSA form, because the kill flag may no longer
- // be right. There is no need to bother with dead flags since defs with no
- // uses will be handled by DCE.
- MachineRegisterInfo &MRI = StartMI->getParent()->getParent()->getRegInfo();
- if (MRI.isSSA() && (StartMI->getParent() != EndMI->getParent())) {
- MRI.clearKillFlags(RegNo);
- return;
- }
-
- // Instructions between [StartMI, EndMI] should be in same basic block.
- assert((StartMI->getParent() == EndMI->getParent()) &&
- "Instructions are not in same basic block");
-
- // If before RA, StartMI may be def through COPY, we need to adjust it to the
- // real def. See function getForwardingDefMI.
- if (MRI.isSSA()) {
- bool Reads, Writes;
- std::tie(Reads, Writes) = StartMI->readsWritesVirtualRegister(RegNo);
- if (!Reads && !Writes) {
- assert(Register::isVirtualRegister(RegNo) &&
- "Must be a virtual register");
- // Get real def and ignore copies.
- StartMI = MRI.getVRegDef(RegNo);
- }
- }
-
- bool IsKillSet = false;
-
- auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) {
- MachineOperand &MO = MI.getOperand(Index);
- if (MO.isReg() && MO.isUse() && MO.isKill() &&
- getRegisterInfo().regsOverlap(MO.getReg(), RegNo))
- MO.setIsKill(false);
- };
-
- // Set killed flag for EndMI.
- // No need to do anything if EndMI defines RegNo.
- int UseIndex =
- EndMI->findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
- if (UseIndex != -1) {
- EndMI->getOperand(UseIndex).setIsKill(true);
- IsKillSet = true;
- // Clear killed flag for other EndMI operands related to RegNo. In some
- // upexpected cases, killed may be set multiple times for same register
- // operand in same MI.
- for (int i = 0, e = EndMI->getNumOperands(); i != e; ++i)
- if (i != UseIndex)
- clearOperandKillInfo(*EndMI, i);
- }
-
- // Walking the inst in reverse order (EndMI -> StartMI].
- MachineBasicBlock::reverse_iterator It = *EndMI;
- MachineBasicBlock::reverse_iterator E = EndMI->getParent()->rend();
- // EndMI has been handled above, skip it here.
- It++;
- MachineOperand *MO = nullptr;
- for (; It != E; ++It) {
- // Skip insturctions which could not be a def/use of RegNo.
- if (It->isDebugInstr() || It->isPosition())
- continue;
-
- // Clear killed flag for all It operands related to RegNo. In some
- // upexpected cases, killed may be set multiple times for same register
- // operand in same MI.
- for (int i = 0, e = It->getNumOperands(); i != e; ++i)
- clearOperandKillInfo(*It, i);
-
- // If killed is not set, set killed for its last use or set dead for its def
- // if no use found.
- if (!IsKillSet) {
- if ((MO = It->findRegisterUseOperand(RegNo, false, &getRegisterInfo()))) {
- // Use found, set it killed.
- IsKillSet = true;
- MO->setIsKill(true);
- continue;
- } else if ((MO = It->findRegisterDefOperand(RegNo, false, true,
- &getRegisterInfo()))) {
- // No use found, set dead for its def.
- assert(&*It == StartMI && "No new def between StartMI and EndMI.");
- MO->setIsDead(true);
- break;
- }
- }
-
- if ((&*It) == StartMI)
- break;
- }
- // Ensure RegMo liveness is killed after EndMI.
- assert((IsKillSet || (MO && MO->isDead())) &&
- "RegNo should be killed or dead");
-}
-
// This opt tries to convert the following imm form to an index form to save an
// add for stack variables.
// Return false if no such pattern found.
@@ -3725,8 +3609,8 @@ bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI,
return false;
// TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
- if (!instrHasImmForm(XFormOpcode, isVFRegister(MI.getOperand(0).getReg()),
- III, true))
+ if (!instrHasImmForm(XFormOpcode,
+ PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
return false;
if (!III.IsSummingOperands)
@@ -3796,6 +3680,7 @@ bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
// result of a load-immediate or an add-immediate, convert it to
// the immediate form if the constant is in range.
bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
+ SmallSet<Register, 4> &RegsToUpdate,
MachineInstr **KilledDef) const {
MachineFunction *MF = MI.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -3813,6 +3698,15 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
if (KilledDef && KillFwdDefMI)
*KilledDef = DefMI;
+ // Conservatively add defs from DefMI and defs/uses from MI to the set of
+ // registers that need their kill flags updated.
+ for (const MachineOperand &MO : DefMI->operands())
+ if (MO.isReg() && MO.isDef())
+ RegsToUpdate.insert(MO.getReg());
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isReg())
+ RegsToUpdate.insert(MO.getReg());
+
// If this is a imm instruction and its register operands is produced by ADDI,
// put the imm into imm inst directly.
if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
@@ -3822,7 +3716,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
ImmInstrInfo III;
bool IsVFReg = MI.getOperand(0).isReg()
- ? isVFRegister(MI.getOperand(0).getReg())
+ ? PPC::isVFRegister(MI.getOperand(0).getReg())
: false;
bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
// If this is a reg+reg instruction that has a reg+imm form,
@@ -4615,9 +4509,6 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
// Sign-extend to 64-bits.
int64_t SExtImm = SignExtend64<16>(Immediate);
- bool IsForwardingOperandKilled = MI.getOperand(OpNoForForwarding).isKill();
- Register ForwardingOperandReg = MI.getOperand(OpNoForForwarding).getReg();
-
bool ReplaceWithLI = false;
bool Is64BitLI = false;
int64_t NewImm = 0;
@@ -4829,12 +4720,8 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
*KilledDef = nullptr;
replaceInstrWithLI(MI, LII);
- // Fixup killed/dead flag after transformation.
- // Pattern:
- // ForwardingOperandReg = LI imm1
- // y = op2 imm2, ForwardingOperandReg(killed)
- if (IsForwardingOperandKilled)
- fixupIsDeadOrKill(&DefMI, &MI, ForwardingOperandReg);
+ if (PostRA)
+ recomputeLivenessFlags(*MI.getParent());
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
@@ -4864,7 +4751,7 @@ bool PPCInstrInfo::transformToNewImmFormFedByAdd(
// get Imm Form info.
ImmInstrInfo III;
bool IsVFReg = MI.getOperand(0).isReg()
- ? isVFRegister(MI.getOperand(0).getReg())
+ ? PPC::isVFRegister(MI.getOperand(0).getReg())
: false;
if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
@@ -4895,11 +4782,6 @@ bool PPCInstrInfo::transformToNewImmFormFedByAdd(
if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
return false;
- // Get killed info in case fixup needed after transformation.
- unsigned ForwardKilledOperandReg = ~0U;
- if (MI.getOperand(III.OpNoForForwarding).isKill())
- ForwardKilledOperandReg = MI.getOperand(III.OpNoForForwarding).getReg();
-
// Do the transform
LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
LLVM_DEBUG(MI.dump());
@@ -4907,35 +4789,8 @@ bool PPCInstrInfo::transformToNewImmFormFedByAdd(
LLVM_DEBUG(DefMI.dump());
MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
- if (RegMO->isKill()) {
- MI.getOperand(III.OpNoForForwarding).setIsKill(true);
- // Clear the killed flag in RegMO. Doing this here can handle some cases
- // that DefMI and MI are not in same basic block.
- RegMO->setIsKill(false);
- }
MI.getOperand(III.ImmOpNo).setImm(Imm);
- // FIXME: fix kill/dead flag if MI and DefMI are not in same basic block.
- if (DefMI.getParent() == MI.getParent()) {
- // Check if reg is killed between MI and DefMI.
- auto IsKilledFor = [&](unsigned Reg) {
- MachineBasicBlock::const_reverse_iterator It = MI;
- MachineBasicBlock::const_reverse_iterator E = DefMI;
- It++;
- for (; It != E; ++It) {
- if (It->killsRegister(Reg))
- return true;
- }
- return false;
- };
-
- // Update kill flag
- if (RegMO->isKill() || IsKilledFor(RegMO->getReg()))
- fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
- if (ForwardKilledOperandReg != ~0U)
- fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
- }
-
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
return true;
@@ -4979,12 +4834,8 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
IsFwdFeederRegKilled, SeenIntermediateUse))
return false;
- // Get killed info in case fixup needed after transformation.
- unsigned ForwardKilledOperandReg = ~0U;
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
bool PostRA = !MRI.isSSA();
- if (PostRA && MI.getOperand(OpNoForForwarding).isKill())
- ForwardKilledOperandReg = MI.getOperand(OpNoForForwarding).getReg();
// We know that, the MI and DefMI both meet the pattern, and
// the Imm also meet the requirement with the new Imm-form.
@@ -5036,22 +4887,8 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
// Update the opcode.
MI.setDesc(get(III.ImmOpcode));
- // Fix up killed/dead flag after transformation.
- // Pattern 1:
- // x = ADD KilledFwdFeederReg, imm
- // n = opn KilledFwdFeederReg(killed), regn
- // y = XOP 0, x
- // Pattern 2:
- // x = ADD reg(killed), imm
- // y = XOP 0, x
- if (IsFwdFeederRegKilled || RegMO->isKill())
- fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
- // Pattern 3:
- // ForwardKilledOperandReg = ADD reg, imm
- // y = XOP 0, ForwardKilledOperandReg(killed)
- if (ForwardKilledOperandReg != ~0U)
- fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
-
+ if (PostRA)
+ recomputeLivenessFlags(*MI.getParent());
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
@@ -5107,11 +4944,6 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
return false;
}
- // Get killed info in case fixup needed after transformation.
- unsigned ForwardKilledOperandReg = ~0U;
- if (PostRA && MI.getOperand(ConstantOpNo).isKill())
- ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
-
unsigned Opc = MI.getOpcode();
bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
Opc == PPC::SRW || Opc == PPC::SRW_rec ||
@@ -5205,12 +5037,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
}
}
- // Fix up killed/dead flag after transformation.
- // Pattern:
- // ForwardKilledOperandReg = LI imm
- // y = XOP reg, ForwardKilledOperandReg(killed)
- if (ForwardKilledOperandReg != ~0U)
- fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
+ if (PostRA)
+ recomputeLivenessFlags(*MI.getParent());
LLVM_DEBUG(dbgs() << "With: ");
LLVM_DEBUG(MI.dump());
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 3dc5e2680c61..75f9cd1c206d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -13,7 +13,10 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "PPC.h"
#include "PPCRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
@@ -21,60 +24,6 @@
namespace llvm {
-/// PPCII - This namespace holds all of the PowerPC target-specific
-/// per-instruction flags. These must match the corresponding definitions in
-/// PPC.td and PPCInstrFormats.td.
-namespace PPCII {
-enum {
- // PPC970 Instruction Flags. These flags describe the characteristics of the
- // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
- // raw machine instructions.
-
- /// PPC970_First - This instruction starts a new dispatch group, so it will
- /// always be the first one in the group.
- PPC970_First = 0x1,
-
- /// PPC970_Single - This instruction starts a new dispatch group and
- /// terminates it, so it will be the sole instruction in the group.
- PPC970_Single = 0x2,
-
- /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
- /// two dispatch pipes to be available to issue.
- PPC970_Cracked = 0x4,
-
- /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
- /// an instruction is issued to.
- PPC970_Shift = 3,
- PPC970_Mask = 0x07 << PPC970_Shift
-};
-enum PPC970_Unit {
- /// These are the various PPC970 execution unit pipelines. Each instruction
- /// is one of these.
- PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
- PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
- PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
- PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
- PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
- PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
- PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
- PPC970_BRU = 7 << PPC970_Shift // Branch Unit
-};
-
-enum {
- /// Shift count to bypass PPC970 flags
- NewDef_Shift = 6,
-
- /// This instruction is an X-Form memory operation.
- XFormMemOp = 0x1 << NewDef_Shift,
- /// This instruction is prefixed.
- Prefixed = 0x1 << (NewDef_Shift + 1),
- /// This instruction produced a sign extended result.
- SExt32To64 = 0x1 << (NewDef_Shift + 2),
- /// This instruction produced a zero extended result.
- ZExt32To64 = 0x1 << (NewDef_Shift + 3)
-};
-} // end namespace PPCII
-
// Instructions that have an immediate form might be convertible to that
// form if the correct input is a result of a load immediate. In order to
// know whether the transformation is special, we might need to know some
@@ -324,99 +273,6 @@ public:
return get(Opcode).TSFlags & PPCII::ZExt32To64;
}
- /// Check if Opcode corresponds to a call instruction that should be marked
- /// with the NOTOC relocation.
- bool isNoTOCCallInstr(unsigned Opcode) const {
- if (!get(Opcode).isCall())
- return false;
-
- switch (Opcode) {
- default:
-#ifndef NDEBUG
- llvm_unreachable("Unknown call opcode");
-#endif
- return false;
- case PPC::BL8_NOTOC:
- case PPC::BL8_NOTOC_TLS:
- case PPC::BL8_NOTOC_RM:
- return true;
-#ifndef NDEBUG
- case PPC::BL8:
- case PPC::BL:
- case PPC::BL8_TLS:
- case PPC::BL_TLS:
- case PPC::BLA8:
- case PPC::BLA:
- case PPC::BCCL:
- case PPC::BCCLA:
- case PPC::BCL:
- case PPC::BCLn:
- case PPC::BL8_NOP:
- case PPC::BL_NOP:
- case PPC::BL8_NOP_TLS:
- case PPC::BLA8_NOP:
- case PPC::BCTRL8:
- case PPC::BCTRL:
- case PPC::BCCCTRL8:
- case PPC::BCCCTRL:
- case PPC::BCCTRL8:
- case PPC::BCCTRL:
- case PPC::BCCTRL8n:
- case PPC::BCCTRLn:
- case PPC::BL8_RM:
- case PPC::BLA8_RM:
- case PPC::BL8_NOP_RM:
- case PPC::BLA8_NOP_RM:
- case PPC::BCTRL8_RM:
- case PPC::BCTRL8_LDinto_toc:
- case PPC::BCTRL8_LDinto_toc_RM:
- case PPC::BL8_TLS_:
- case PPC::TCRETURNdi8:
- case PPC::TCRETURNai8:
- case PPC::TCRETURNri8:
- case PPC::TAILBCTR8:
- case PPC::TAILB8:
- case PPC::TAILBA8:
- case PPC::BCLalways:
- case PPC::BLRL:
- case PPC::BCCLRL:
- case PPC::BCLRL:
- case PPC::BCLRLn:
- case PPC::BDZL:
- case PPC::BDNZL:
- case PPC::BDZLA:
- case PPC::BDNZLA:
- case PPC::BDZLp:
- case PPC::BDNZLp:
- case PPC::BDZLAp:
- case PPC::BDNZLAp:
- case PPC::BDZLm:
- case PPC::BDNZLm:
- case PPC::BDZLAm:
- case PPC::BDNZLAm:
- case PPC::BDZLRL:
- case PPC::BDNZLRL:
- case PPC::BDZLRLp:
- case PPC::BDNZLRLp:
- case PPC::BDZLRLm:
- case PPC::BDNZLRLm:
- case PPC::BL_RM:
- case PPC::BLA_RM:
- case PPC::BL_NOP_RM:
- case PPC::BCTRL_RM:
- case PPC::TCRETURNdi:
- case PPC::TCRETURNai:
- case PPC::TCRETURNri:
- case PPC::BCTRL_LWZinto_toc:
- case PPC::BCTRL_LWZinto_toc_RM:
- case PPC::TAILBCTR:
- case PPC::TAILB:
- case PPC::TAILBA:
- return false;
-#endif
- }
- }
-
static bool isSameClassPhysRegCopy(unsigned Opcode) {
unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR,
PPC::VOR, PPC::XXLOR, PPC::XXLORf,
@@ -428,6 +284,32 @@ public:
return false;
}
+ static bool hasPCRelFlag(unsigned TF) {
+ return TF == PPCII::MO_PCREL_FLAG || TF == PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
+ TF == PPCII::MO_GOT_TLSLD_PCREL_FLAG ||
+ TF == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
+ TF == PPCII::MO_TPREL_PCREL_FLAG || TF == PPCII::MO_TLS_PCREL_FLAG ||
+ TF == PPCII::MO_GOT_PCREL_FLAG;
+ }
+
+ static bool hasGOTFlag(unsigned TF) {
+ return TF == PPCII::MO_GOT_FLAG || TF == PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
+ TF == PPCII::MO_GOT_TLSLD_PCREL_FLAG ||
+ TF == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
+ TF == PPCII::MO_GOT_PCREL_FLAG;
+ }
+
+ static bool hasTLSFlag(unsigned TF) {
+ return TF == PPCII::MO_TLSGD_FLAG || TF == PPCII::MO_TPREL_FLAG ||
+ TF == PPCII::MO_TLSLD_FLAG || TF == PPCII::MO_TLSGDM_FLAG ||
+ TF == PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
+ TF == PPCII::MO_GOT_TLSLD_PCREL_FLAG ||
+ TF == PPCII::MO_GOT_TPREL_PCREL_FLAG || TF == PPCII::MO_TPREL_LO ||
+ TF == PPCII::MO_TPREL_HA || TF == PPCII::MO_DTPREL_LO ||
+ TF == PPCII::MO_TLSLD_LO || TF == PPCII::MO_TLS ||
+ TF == PPCII::MO_TPREL_PCREL_FLAG || TF == PPCII::MO_TLS_PCREL_FLAG;
+ }
+
ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const override;
@@ -439,13 +321,15 @@ public:
const MachineInstr &MI,
unsigned *PredCost = nullptr) const override;
- int getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI, unsigned DefIdx,
- const MachineInstr &UseMI,
- unsigned UseIdx) const override;
- int getOperandLatency(const InstrItineraryData *ItinData,
- SDNode *DefNode, unsigned DefIdx,
- SDNode *UseNode, unsigned UseIdx) const override {
+ std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &DefMI,
+ unsigned DefIdx,
+ const MachineInstr &UseMI,
+ unsigned UseIdx) const override;
+ std::optional<unsigned> getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode,
+ unsigned UseIdx) const override {
return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx,
UseNode, UseIdx);
}
@@ -512,12 +396,9 @@ public:
/// perserved for more FMA chain reassociations on PowerPC.
int getExtendResourceLenLimit() const override { return 1; }
- void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
- MachineInstr &NewMI1,
- MachineInstr &NewMI2) const override;
-
// PowerPC specific version of setSpecialOperandAttr that copies Flags to MI
// and clears nuw, nsw, and exact flags.
+ using TargetInstrInfo::setSpecialOperandAttr;
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const;
bool isCoalescableExtInstr(const MachineInstr &MI,
@@ -678,8 +559,11 @@ public:
/// Returns true if the two given memory operations should be scheduled
/// adjacent.
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
- unsigned NumLoads, unsigned NumBytes) const override;
+ int64_t Offset2, bool OffsetIsScalable2,
+ unsigned ClusterSize,
+ unsigned NumBytes) const override;
/// Return true if two MIs access different memory addresses and false
/// otherwise
@@ -700,21 +584,12 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
- ArrayRef<std::pair<unsigned, const char *>>
- getSerializableBitmaskMachineOperandTargetFlags() const override;
-
// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
bool expandVSXMemPseudo(MachineInstr &MI) const;
// Lower pseudo instructions after register allocation.
bool expandPostRAPseudo(MachineInstr &MI) const override;
- static bool isVFRegister(unsigned Reg) {
- return Reg >= PPC::VF0 && Reg <= PPC::VF31;
- }
- static bool isVRRegister(unsigned Reg) {
- return Reg >= PPC::V0 && Reg <= PPC::V31;
- }
const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
static int getRecordFormOpcode(unsigned Opcode);
@@ -737,6 +612,7 @@ public:
}
bool convertToImmediateForm(MachineInstr &MI,
+ SmallSet<Register, 4> &RegsToUpdate,
MachineInstr **KilledDef = nullptr) const;
bool foldFrameOffset(MachineInstr &MI) const;
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const;
@@ -750,23 +626,6 @@ public:
MachineInstr *&ADDIMI, int64_t &OffsetAddi,
int64_t OffsetImm) const;
- /// Fixup killed/dead flag for register \p RegNo between instructions [\p
- /// StartMI, \p EndMI]. Some pre-RA or post-RA transformations may violate
- /// register killed/dead flags semantics, this function can be called to fix
- /// up. Before calling this function,
- /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI.
- /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI)
- /// and possible definition for \p RegNo is \p StartMI or \p EndMI. For
- /// pre-RA cases, definition may be \p StartMI through COPY, \p StartMI
- /// will be adjust to true definition.
- /// 3. We can do accurate fixup for the case when all instructions between
- /// [\p StartMI, \p EndMI] are in same basic block.
- /// 4. For the case when \p StartMI and \p EndMI are not in same basic block,
- /// we conservatively clear kill flag for all uses of \p RegNo for pre-RA
- /// and for post-RA, we give an assertion as without reaching definition
- /// analysis post-RA, \p StartMI and \p EndMI are hard to keep right.
- void fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI,
- unsigned RegNo) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
int64_t Imm) const;
@@ -785,38 +644,6 @@ public:
const DebugLoc &DL, Register Reg,
int64_t Imm) const;
- /// getRegNumForOperand - some operands use different numbering schemes
- /// for the same registers. For example, a VSX instruction may have any of
- /// vs0-vs63 allocated whereas an Altivec instruction could only have
- /// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual
- /// register number needed for the opcode/operand number combination.
- /// The operand number argument will be useful when we need to extend this
- /// to instructions that use both Altivec and VSX numbering (for different
- /// operands).
- static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
- unsigned OpNo) {
- int16_t regClass = Desc.operands()[OpNo].RegClass;
- switch (regClass) {
- // We store F0-F31, VF0-VF31 in MCOperand and it should be F0-F31,
- // VSX32-VSX63 during encoding/disassembling
- case PPC::VSSRCRegClassID:
- case PPC::VSFRCRegClassID:
- if (isVFRegister(Reg))
- return PPC::VSX32 + (Reg - PPC::VF0);
- break;
- // We store VSL0-VSL31, V0-V31 in MCOperand and it should be VSL0-VSL31,
- // VSX32-VSX63 during encoding/disassembling
- case PPC::VSRCRegClassID:
- if (isVRRegister(Reg))
- return PPC::VSX32 + (Reg - PPC::V0);
- break;
- // Other RegClass doesn't need mapping
- default:
- break;
- }
- return Reg;
- }
-
/// Check \p Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool isBDNZ(unsigned Opcode) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 2992f78aa38a..6199785206b2 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1641,10 +1641,18 @@ let isBranch = 1, isTerminator = 1, Size = 0 in {
// System call.
let PPC970_Unit = 7 in {
- def SC : SCForm<17, 1, (outs), (ins i32imm:$LEV),
+ def SC : SCForm<17, 1, 0, (outs), (ins i32imm:$LEV),
"sc $LEV", IIC_BrB, [(PPCsc (i32 imm:$LEV))]>;
}
+// We mark SCV as having no scheduling model since it is only meant to be used
+// as inline assembly. If we implement a builtin pattern for it we will need to
+// add it to the P9 and P10 scheduling models.
+let Predicates = [IsISA3_0], hasNoSchedulingInfo = 1 in {
+ def SCV : SCForm<17, 0, 1, (outs), (ins i32imm:$LEV),
+ "scv $LEV", IIC_BrB, []>;
+}
+
// Branch history rolling buffer.
def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB,
[(PPCclrbhrb)]>,
@@ -1713,11 +1721,11 @@ def : Pat<(int_ppc_dcbf xoaddr:$dst),
def : Pat<(int_ppc_icbt xoaddr:$dst),
(ICBT 0, xoaddr:$dst)>;
-def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
+def : Pat<(prefetch xoaddr:$dst, (i32 0), timm, (i32 1)),
(DCBT 0, xoaddr:$dst)>; // data prefetch for loads
-def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
+def : Pat<(prefetch xoaddr:$dst, (i32 1), timm, (i32 1)),
(DCBTST 0, xoaddr:$dst)>; // data prefetch for stores
-def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
+def : Pat<(prefetch xoaddr:$dst, (i32 0), timm, (i32 0)),
(ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read)
def : Pat<(int_ppc_dcbt_with_hint xoaddr:$dst, i32:$TH),
@@ -3180,6 +3188,7 @@ def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
(TCRETURNri CTRRC:$dst, imm:$imm)>;
def : Pat<(int_ppc_readflm), (MFFS)>;
+def : Pat<(int_ppc_mffsl), (MFFSL)>;
// Hi and Lo for Darwin Global Addresses.
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
@@ -3251,8 +3260,8 @@ def GETtlsTpointer32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
"GETtlsTpointer32AIX",
[(set i32:$rD, (PPCgetTpointer))]>;
-// The following pattern matches local-exec TLS accesses on 32-bit AIX.
-// PPCaddTls is used in local-exec accesses in order to:
+// The following pattern matches local- and initial-exec TLS accesses on 32-bit AIX.
+// PPCaddTls is used in local- and initial-exec accesses in order to:
// - Get the address of a variable (add the variable offset to the thread
// pointer, retrieved by calling .__get_tpointer).
// - Create an opportunity to optimize the user of the loaded address.
@@ -3428,6 +3437,23 @@ def crnot : OutPatFrag<(ops node:$in),
def : Pat<(not i1:$in),
(crnot $in)>;
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class PPCAsmPseudo<string asm, dag iops>
+ : Instruction {
+ let Namespace = "PPC";
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let OutOperandList = (outs);
+ let InOperandList = iops;
+ let Pattern = [];
+ let AsmString = asm;
+ let isAsmParserOnly = 1;
+ let isPseudo = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
// Prefixed instructions may require access to the above defs at a later
// time so we include this after the def.
include "PPCInstrP10.td"
@@ -4449,23 +4475,6 @@ def ICBIEP : XForm_1a<31, 991, (outs), (ins (memrr $RA, $RB):$addr), "icbiep $a
// PowerPC Assembler Instruction Aliases
//
-// Pseudo-instructions for alternate assembly syntax (never used by codegen).
-// These are aliases that require C++ handling to convert to the target
-// instruction, while InstAliases can be handled directly by tblgen.
-class PPCAsmPseudo<string asm, dag iops>
- : Instruction {
- let Namespace = "PPC";
- bit PPC64 = 0; // Default value, override with isPPC64
-
- let OutOperandList = (outs);
- let InOperandList = iops;
- let Pattern = [];
- let AsmString = asm;
- let isAsmParserOnly = 1;
- let isPseudo = 1;
- let hasNoSchedulingInfo = 1;
-}
-
def : InstAlias<"sc", (SC 0)>;
def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
@@ -5027,12 +5036,12 @@ def : Pat<(atomic_load_16 XForm:$src), (LHZX memrr:$src)>;
def : Pat<(atomic_load_32 XForm:$src), (LWZX memrr:$src)>;
// Atomic stores
-def : Pat<(atomic_store_8 DForm:$ptr, i32:$val), (STB gprc:$val, memri:$ptr)>;
-def : Pat<(atomic_store_16 DForm:$ptr, i32:$val), (STH gprc:$val, memri:$ptr)>;
-def : Pat<(atomic_store_32 DForm:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>;
-def : Pat<(atomic_store_8 XForm:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>;
-def : Pat<(atomic_store_16 XForm:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>;
-def : Pat<(atomic_store_32 XForm:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_8 i32:$val, DForm:$ptr), (STB gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_16 i32:$val, DForm:$ptr), (STH gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_32 i32:$val, DForm:$ptr), (STW gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_8 i32:$val, XForm:$ptr), (STBX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_16 i32:$val, XForm:$ptr), (STHX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_32 i32:$val, XForm:$ptr), (STWX gprc:$val, memrr:$ptr)>;
let Predicates = [IsISA3_0] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrP10.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrP10.td
index 8cb8e4d91db2..d5a372e4dc10 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -575,33 +575,54 @@ class XForm_XT5_BI5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
- dag PCRel_IOL, string asmstr,
+ dag PCRel_IOL, dag PCRelOnly_IOL,
+ string asmstr, string asmstr_pcext,
InstrItinClass itin> {
def NAME : MLS_DForm_R_SI34_RTA5_MEM<opcode, OOL, IOL,
!strconcat(asmstr, ", 0"), itin, []>;
def pc : MLS_DForm_R_SI34_RTA5_MEM<opcode, OOL, PCRel_IOL,
!strconcat(asmstr, ", 1"), itin, []>,
isPCRel;
+ let isAsmParserOnly = 1, hasNoSchedulingInfo = 1 in {
+ def nopc : MLS_DForm_R_SI34_RTA5_MEM<opcode, OOL, IOL, asmstr, itin, []>;
+ let RA = 0 in
+ def onlypc : MLS_DForm_R_SI34_RTA5_MEM<opcode, OOL, PCRelOnly_IOL,
+ asmstr_pcext, itin, []>, isPCRel;
+ }
}
multiclass 8LS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
- dag PCRel_IOL, string asmstr,
+ dag PCRel_IOL, dag PCRelOnly_IOL,
+ string asmstr, string asmstr_pcext,
InstrItinClass itin> {
def NAME : 8LS_DForm_R_SI34_RTA5_MEM<opcode, OOL, IOL,
!strconcat(asmstr, ", 0"), itin, []>;
def pc : 8LS_DForm_R_SI34_RTA5_MEM<opcode, OOL, PCRel_IOL,
!strconcat(asmstr, ", 1"), itin, []>,
isPCRel;
+ let isAsmParserOnly = 1, hasNoSchedulingInfo = 1 in {
+ def nopc : 8LS_DForm_R_SI34_RTA5_MEM<opcode, OOL, IOL, asmstr, itin, []>;
+ let RA = 0 in
+ def onlypc : 8LS_DForm_R_SI34_RTA5_MEM<opcode, OOL, PCRelOnly_IOL,
+ asmstr_pcext, itin, []>, isPCRel;
+ }
}
multiclass 8LS_DForm_R_SI34_XT6_RA5_MEM_p<bits<5> opcode, dag OOL, dag IOL,
- dag PCRel_IOL, string asmstr,
+ dag PCRel_IOL, dag PCRelOnly_IOL,
+ string asmstr, string asmstr_pcext,
InstrItinClass itin> {
def NAME : 8LS_DForm_R_SI34_XT6_RA5_MEM<opcode, OOL, IOL,
!strconcat(asmstr, ", 0"), itin, []>;
def pc : 8LS_DForm_R_SI34_XT6_RA5_MEM<opcode, OOL, PCRel_IOL,
!strconcat(asmstr, ", 1"), itin, []>,
isPCRel;
+ let isAsmParserOnly = 1, hasNoSchedulingInfo = 1 in {
+ def nopc : 8LS_DForm_R_SI34_XT6_RA5_MEM<opcode, OOL, IOL, asmstr, itin, []>;
+ let RA = 0 in
+ def onlypc : 8LS_DForm_R_SI34_XT6_RA5_MEM<opcode, OOL, PCRelOnly_IOL,
+ asmstr_pcext, itin, []>, isPCRel;
+ }
}
def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">;
@@ -615,7 +636,7 @@ def RCCp {
let Predicates = [PrefixInstrs] in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm PADDI8 :
- MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI),
+ MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc_nox0:$RA, s34imm:$SI),
(ins immZero:$RA, s34imm_pcrel:$SI),
"paddi $RT, $RA, $SI", IIC_LdStLFD>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
@@ -625,7 +646,7 @@ let Predicates = [PrefixInstrs] in {
}
}
defm PADDI :
- MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI),
+ MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc_nor0:$RA, s34imm:$SI),
(ins immZero:$RA, s34imm_pcrel:$SI),
"paddi $RT, $RA, $SI", IIC_LdStLFD>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
@@ -638,68 +659,88 @@ let Predicates = [PrefixInstrs] in {
defm PLXV :
8LS_DForm_R_SI34_XT6_RA5_MEM_p<25, (outs vsrc:$XST), (ins (memri34 $D, $RA):$addr),
(ins (memri34_pcrel $D, $RA):$addr),
- "plxv $XST, $addr", IIC_LdStLFD>;
+ (ins s34imm_pcrel:$D),
+ "plxv $XST, $addr", "plxv $XST, $D", IIC_LdStLFD>;
defm PLFS :
MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plfs $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plfs $RST, $addr",
+ "plfs $RST, $D", IIC_LdStLFD>;
defm PLFD :
MLS_DForm_R_SI34_RTA5_MEM_p<50, (outs f8rc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plfd $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plfd $RST, $addr",
+ "plfd $RST, $D", IIC_LdStLFD>;
defm PLXSSP :
8LS_DForm_R_SI34_RTA5_MEM_p<43, (outs vfrc:$RST), (ins (memri34 $D, $RA):$addr),
(ins (memri34_pcrel $D, $RA):$addr),
- "plxssp $RST, $addr", IIC_LdStLFD>;
+ (ins s34imm_pcrel:$D),
+ "plxssp $RST, $addr", "plxssp $RST, $D",
+ IIC_LdStLFD>;
defm PLXSD :
8LS_DForm_R_SI34_RTA5_MEM_p<42, (outs vfrc:$RST), (ins (memri34 $D, $RA):$addr),
(ins (memri34_pcrel $D, $RA):$addr),
- "plxsd $RST, $addr", IIC_LdStLFD>;
+ (ins s34imm_pcrel:$D),
+ "plxsd $RST, $addr", "plxsd $RST, $D",
+ IIC_LdStLFD>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm PLBZ8 :
MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plbz $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plbz $RST, $addr",
+ "plbz $RST, $D", IIC_LdStLFD>;
defm PLHZ8 :
MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plhz $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plhz $RST, $addr",
+ "plhz $RST, $D", IIC_LdStLFD>;
defm PLHA8 :
MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plha $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plha $RST, $addr",
+ "plha $RST, $D", IIC_LdStLFD>;
defm PLWA8 :
8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
(ins (memri34_pcrel $D, $RA):$addr),
- "plwa $RST, $addr", IIC_LdStLFD>;
+ (ins s34imm_pcrel:$D),
+ "plwa $RST, $addr", "plwa $RST, $D", IIC_LdStLFD>;
defm PLWZ8 :
MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plwz $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plwz $RST, $addr",
+ "plwz $RST, $D", IIC_LdStLFD>;
}
defm PLBZ :
MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plbz $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plbz $RST, $addr",
+ "plbz $RST, $D", IIC_LdStLFD>;
defm PLHZ :
MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plhz $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plhz $RST, $addr",
+ "plhz $RST, $D", IIC_LdStLFD>;
defm PLHA :
MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plha $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plha $RST, $addr",
+ "plha $RST, $D", IIC_LdStLFD>;
defm PLWZ :
MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plwz $RST, $addr",
- IIC_LdStLFD>;
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D), "plwz $RST, $addr",
+ "plwz $RST, $D", IIC_LdStLFD>;
defm PLWA :
8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plwa $RST, $addr",
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D),
+ "plwa $RST, $addr", "plwa $RST, $D",
IIC_LdStLFD>;
defm PLD :
8LS_DForm_R_SI34_RTA5_MEM_p<57, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "pld $RST, $addr",
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D),
+ "pld $RST, $addr", "pld $RST, $D",
IIC_LdStLFD>;
}
@@ -707,53 +748,65 @@ let Predicates = [PrefixInstrs] in {
defm PSTXV :
8LS_DForm_R_SI34_XT6_RA5_MEM_p<27, (outs), (ins vsrc:$XST, (memri34 $D, $RA):$addr),
(ins vsrc:$XST, (memri34_pcrel $D, $RA):$addr),
- "pstxv $XST, $addr", IIC_LdStLFD>;
+ (ins vsrc:$XST, s34imm_pcrel:$D),
+ "pstxv $XST, $addr", "pstxv $XST, $D", IIC_LdStLFD>;
defm PSTFS :
MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$RST, (memri34 $D, $RA):$addr),
(ins f4rc:$RST, (memri34_pcrel $D, $RA):$addr),
- "pstfs $RST, $addr", IIC_LdStLFD>;
+ (ins f4rc:$RST, s34imm_pcrel:$D),
+ "pstfs $RST, $addr", "pstfs $RST, $D", IIC_LdStLFD>;
defm PSTFD :
MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$RST, (memri34 $D, $RA):$addr),
(ins f8rc:$RST, (memri34_pcrel $D, $RA):$addr),
- "pstfd $RST, $addr", IIC_LdStLFD>;
+ (ins f8rc:$RST, s34imm_pcrel:$D),
+ "pstfd $RST, $addr", "pstfd $RST, $D", IIC_LdStLFD>;
defm PSTXSSP :
8LS_DForm_R_SI34_RTA5_MEM_p<47, (outs), (ins vfrc:$RST, (memri34 $D, $RA):$addr),
(ins vfrc:$RST, (memri34_pcrel $D, $RA):$addr),
- "pstxssp $RST, $addr", IIC_LdStLFD>;
+ (ins vfrc:$RST, s34imm_pcrel:$D),
+ "pstxssp $RST, $addr", "pstxssp $RST, $D", IIC_LdStLFD>;
defm PSTXSD :
8LS_DForm_R_SI34_RTA5_MEM_p<46, (outs), (ins vfrc:$RST, (memri34 $D, $RA):$addr),
(ins vfrc:$RST, (memri34_pcrel $D, $RA):$addr),
- "pstxsd $RST, $addr", IIC_LdStLFD>;
+ (ins vfrc:$RST, s34imm_pcrel:$D),
+ "pstxsd $RST, $addr", "pstxsd $RST, $D", IIC_LdStLFD>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm PSTB8 :
MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RST, (memri34 $D, $RA):$addr),
(ins g8rc:$RST, (memri34_pcrel $D, $RA):$addr),
- "pstb $RST, $addr", IIC_LdStLFD>;
+ (ins g8rc:$RST, s34imm_pcrel:$D),
+ "pstb $RST, $addr", "pstb $RST, $D", IIC_LdStLFD>;
defm PSTH8 :
MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins g8rc:$RST, (memri34 $D, $RA):$addr),
(ins g8rc:$RST, (memri34_pcrel $D, $RA):$addr),
- "psth $RST, $addr", IIC_LdStLFD>;
+ (ins g8rc:$RST, s34imm_pcrel:$D),
+ "psth $RST, $addr", "psth $RST, $D", IIC_LdStLFD>;
defm PSTW8 :
MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins g8rc:$RST, (memri34 $D, $RA):$addr),
(ins g8rc:$RST, (memri34_pcrel $D, $RA):$addr),
- "pstw $RST, $addr", IIC_LdStLFD>;
+ (ins g8rc:$RST, s34imm_pcrel:$D),
+ "pstw $RST, $addr", "pstw $RST, $D", IIC_LdStLFD>;
}
defm PSTB :
MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins gprc:$RST, (memri34 $D, $RA):$addr),
(ins gprc:$RST, (memri34_pcrel $D, $RA):$addr),
- "pstb $RST, $addr", IIC_LdStLFD>;
+ (ins gprc:$RST, s34imm_pcrel:$D),
+ "pstb $RST, $addr", "pstb $RST, $D", IIC_LdStLFD>;
defm PSTH :
MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins gprc:$RST, (memri34 $D, $RA):$addr),
(ins gprc:$RST, (memri34_pcrel $D, $RA):$addr),
- "psth $RST, $addr", IIC_LdStLFD>;
+ (ins gprc:$RST, s34imm_pcrel:$D),
+ "psth $RST, $addr", "psth $RST, $D", IIC_LdStLFD>;
defm PSTW :
MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins gprc:$RST, (memri34 $D, $RA):$addr),
(ins gprc:$RST, (memri34_pcrel $D, $RA):$addr),
- "pstw $RST, $addr", IIC_LdStLFD>;
+ (ins gprc:$RST, s34imm_pcrel:$D),
+ "pstw $RST, $addr", "pstw $RST, $D", IIC_LdStLFD>;
defm PSTD :
8LS_DForm_R_SI34_RTA5_MEM_p<61, (outs), (ins g8rc:$RST, (memri34 $D, $RA):$addr),
(ins g8rc:$RST, (memri34_pcrel $D, $RA):$addr),
- "pstd $RST, $addr", IIC_LdStLFD>;
+ (ins g8rc:$RST, s34imm_pcrel:$D),
+ "pstd $RST, $addr", "pstd $RST, $D", IIC_LdStLFD>;
}
}
@@ -812,13 +865,20 @@ class 8LS_DForm_R_XTp5_SI34_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
}
multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> opcode, dag OOL,
- dag IOL, dag PCRel_IOL,
- string asmstr, InstrItinClass itin> {
+ dag IOL, dag PCRel_IOL, dag PCRelOnly_IOL,
+ string asmstr, string asmstr_pcext,
+ InstrItinClass itin> {
def NAME : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, IOL,
!strconcat(asmstr, ", 0"), itin, []>;
def pc : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, PCRel_IOL,
!strconcat(asmstr, ", 1"), itin, []>,
isPCRel;
+ let isAsmParserOnly = 1, hasNoSchedulingInfo = 1 in {
+ def nopc : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, IOL, asmstr, itin, []>;
+ let RA = 0 in
+ def onlypc : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, PCRelOnly_IOL,
+ asmstr_pcext, itin, []>, isPCRel;
+ }
}
@@ -1079,7 +1139,9 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in {
let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in {
defm PLXVP :
8LS_DForm_R_XTp5_SI34_MEM_p<58, (outs vsrprc:$XTp), (ins (memri34 $D, $RA):$addr),
- (ins (memri34_pcrel $D, $RA):$addr), "plxvp $XTp, $addr",
+ (ins (memri34_pcrel $D, $RA):$addr),
+ (ins s34imm_pcrel:$D),
+ "plxvp $XTp, $addr", "plxvp $XTp, $D",
IIC_LdStLFD>;
}
@@ -1087,7 +1149,8 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] i
defm PSTXVP :
8LS_DForm_R_XTp5_SI34_MEM_p<62, (outs), (ins vsrprc:$XTp, (memri34 $D, $RA):$addr),
(ins vsrprc:$XTp, (memri34_pcrel $D, $RA):$addr),
- "pstxvp $XTp, $addr", IIC_LdStLFD>;
+ (ins vsrprc:$XTp, s34imm_pcrel:$D),
+ "pstxvp $XTp, $addr", "pstxvp $XTp, $D", IIC_LdStLFD>;
}
let Predicates = [PairedVectorMemops] in {
@@ -1236,19 +1299,19 @@ let Predicates = [PCRelativeMemops] in {
(PLDpc $ga, 0)>;
// Atomic Store
- def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
+ def : Pat<(atomic_store_8 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTBpc $RS, $ga, 0)>;
- def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
+ def : Pat<(atomic_store_16 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTHpc $RS, $ga, 0)>;
- def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
+ def : Pat<(atomic_store_32 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTWpc $RS, $ga, 0)>;
- def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
+ def : Pat<(atomic_store_8 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTB8pc $RS, $ga, 0)>;
- def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
+ def : Pat<(atomic_store_16 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTH8pc $RS, $ga, 0)>;
- def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
+ def : Pat<(atomic_store_32 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTW8pc $RS, $ga, 0)>;
- def : Pat<(atomic_store_64 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
+ def : Pat<(atomic_store_64 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTDpc $RS, $ga, 0)>;
// Special Cases For PPCstore_scal_int_from_vsr
@@ -1855,6 +1918,13 @@ let Predicates = [IsISA3_1, HasVSX] in {
[(set f128:$RST, (PPCxsminc f128:$RA, f128:$RB))]>;
}
+let Predicates = [IsISA3_1] in {
+ def WAITP10 : XForm_IMM2_IMM2<31, 30, (outs), (ins u2imm:$L, u2imm:$PL),
+ "wait $L $PL", IIC_LdStLoad, []>;
+ def SYNCP10 : XForm_IMM3_IMM2<31, 598, (outs), (ins u3imm:$L, u2imm:$SC),
+ "sync $L, $SC", IIC_LdStSync, []>;
+}
+
// Multiclass defining patterns for Set Boolean Extension Reverse Instructions.
// This is analogous to the CRNotPat multiclass but specifically for Power10
// and newer subtargets since the extended forms use Set Boolean instructions.
@@ -2031,8 +2101,15 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
(v8i16 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VSRC))>;
def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 ForceXForm:$src)))),
(v16i8 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VSRC))>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ForceXForm:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
}
+let Predicates = [IsISA3_1, IsBigEndian] in {
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ForceXForm:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
+}
+
// FIXME: The swap is overkill when the shift amount is a constant.
// We should just fix the constant in the DAG.
let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
@@ -2276,10 +2353,10 @@ let Predicates = [PrefixInstrs] in {
def : Pat<(atomic_load_64 PDForm:$src), (PLD memri34:$src)>;
// Atomic Store
- def : Pat<(atomic_store_8 PDForm:$dst, i32:$RS), (PSTB $RS, memri34:$dst)>;
- def : Pat<(atomic_store_16 PDForm:$dst, i32:$RS), (PSTH $RS, memri34:$dst)>;
- def : Pat<(atomic_store_32 PDForm:$dst, i32:$RS), (PSTW $RS, memri34:$dst)>;
- def : Pat<(atomic_store_64 PDForm:$dst, i64:$RS), (PSTD $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_8 i32:$RS, PDForm:$dst), (PSTB $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_16 i32:$RS, PDForm:$dst), (PSTH $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_32 i32:$RS, PDForm:$dst), (PSTW $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_64 i64:$RS, PDForm:$dst), (PSTD $RS, memri34:$dst)>;
// Prefixed fpext to v2f64
def : Pat<(v4f32 (PPCldvsxlh PDForm:$src)),
@@ -2379,3 +2456,50 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, Idx)),
(VINSD $vDi, !mul(Idx, 8), $rA)>;
}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC ISA 3.1 Extended Mnemonics.
+//
+
+let Predicates = [IsISA3_1] in {
+ def : InstAlias<"wait", (WAITP10 0, 0)>;
+ def : InstAlias<"wait 0", (WAITP10 0, 0), 0>;
+ def : InstAlias<"wait 1", (WAITP10 1, 0), 0>;
+ def : InstAlias<"waitrsv", (WAITP10 1, 0)>;
+ def : InstAlias<"pause_short", (WAITP10 2, 0), 0>;
+
+ def : InstAlias<"sync", (SYNCP10 0, 0)>;
+ def : InstAlias<"hwsync", (SYNCP10 0, 0), 0>;
+ def : InstAlias<"wsync", (SYNCP10 1, 0), 0>;
+ def : InstAlias<"ptesync", (SYNCP10 2, 0)>;
+ def : InstAlias<"phwsync", (SYNCP10 4, 0)>;
+ def : InstAlias<"plwsync", (SYNCP10 5, 0)>;
+ def : InstAlias<"sync $L", (SYNCP10 u3imm:$L, 0)>;
+ def : InstAlias<"stncisync", (SYNCP10 1, 1)>;
+ def : InstAlias<"stcisync", (SYNCP10 0, 2)>;
+ def : InstAlias<"stsync", (SYNCP10 0, 3)>;
+
+ def : InstAlias<"paddi $RT, $RA, $SI", (PADDI8 g8rc:$RT, g8rc_nox0:$RA, s34imm:$SI)>;
+}
+
+let Predicates = [IsISA3_1, PrefixInstrs], isAsmParserOnly = 1, hasNoSchedulingInfo = 1 in {
+ let Interpretation64Bit = 1 in {
+ def PLA8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT),
+ (ins g8rc_nox0:$RA, s34imm:$SI),
+ "pla $RT, ${SI} ${RA}", IIC_IntSimple, []>;
+ def PLA8pc : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT),
+ (ins s34imm_pcrel:$SI),
+ "pla $RT, $SI", IIC_IntSimple, []>, isPCRel;
+ }
+
+ def PSUBI : PPCAsmPseudo<"psubi $RT, $RA, $SI",
+ (ins g8rc:$RT, g8rc_nox0:$RA, s34imm:$SI)>;
+
+ def PLA : MLS_DForm_SI34_RT5<14, (outs gprc:$RT),
+ (ins gprc_nor0:$RA, s34imm:$SI),
+ "pla $RT, ${SI} ${RA}", IIC_IntSimple, []>;
+ def PLApc : MLS_DForm_SI34_RT5<14, (outs gprc:$RT),
+ (ins s34imm_pcrel:$SI),
+ "pla $RT, $SI", IIC_IntSimple, []>, isPCRel;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index f29a7af1bdf1..dc739a2c7a4d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -660,8 +660,8 @@ PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
Type *I8Ty = Type::getInt8Ty(BaseMemI->getParent()->getContext());
Type *I8PtrTy =
- Type::getInt8PtrTy(BaseMemI->getParent()->getContext(),
- BasePtr->getType()->getPointerAddressSpace());
+ PointerType::get(BaseMemI->getParent()->getContext(),
+ BasePtr->getType()->getPointerAddressSpace());
bool IsConstantInc = false;
const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);
@@ -707,8 +707,8 @@ PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
BasicBlock *LoopPredecessor = L->getLoopPredecessor();
PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
- getInstrName(BaseMemI, PHINodeNameSuffix),
- Header->getFirstNonPHI());
+ getInstrName(BaseMemI, PHINodeNameSuffix));
+ NewPHI->insertBefore(Header->getFirstNonPHIIt());
Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
LoopPredecessor->getTerminator());
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 976effb96adc..9a3ca5a78293 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -31,22 +31,21 @@ using namespace llvm;
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
AsmPrinter &AP) {
- const TargetMachine &TM = AP.TM;
- Mangler &Mang = TM.getObjFileLowering()->getMangler();
- const DataLayout &DL = AP.getDataLayout();
- MCContext &Ctx = AP.OutContext;
-
- SmallString<128> Name;
- if (!MO.isGlobal()) {
- assert(MO.isSymbol() && "Isn't a symbol reference");
- Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
- } else {
+ if (MO.isGlobal()) {
+ // Get the symbol from the global, accounting for XCOFF-specific
+ // intricacies (see TargetLoweringObjectFileXCOFF::getTargetSymbol).
const GlobalValue *GV = MO.getGlobal();
- TM.getNameWithPrefix(Name, GV, Mang);
+ return AP.getSymbol(GV);
}
- MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
+ assert(MO.isSymbol() && "Isn't a symbol reference");
+
+ SmallString<128> Name;
+ const DataLayout &DL = AP.getDataLayout();
+ Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
+ MCContext &Ctx = AP.OutContext;
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
return Sym;
}
@@ -55,7 +54,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
MCContext &Ctx = Printer.OutContext;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
- unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK;
+ unsigned access = MO.getTargetFlags();
switch (access) {
case PPCII::MO_TPREL_LO:
@@ -74,19 +73,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PPC_TOC_LO;
break;
case PPCII::MO_TLS:
- bool IsPCRel = (MO.getTargetFlags() & ~access) == PPCII::MO_PCREL_FLAG;
- RefKind = IsPCRel ? MCSymbolRefExpr::VK_PPC_TLS_PCREL
- : MCSymbolRefExpr::VK_PPC_TLS;
+ RefKind = MCSymbolRefExpr::VK_PPC_TLS;
+ break;
+ case PPCII::MO_TLS_PCREL_FLAG:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLS_PCREL;
break;
}
+ const TargetMachine &TM = Printer.TM;
+
if (MO.getTargetFlags() == PPCII::MO_PLT)
RefKind = MCSymbolRefExpr::VK_PLT;
else if (MO.getTargetFlags() == PPCII::MO_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_PCREL;
- else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG))
+ else if (MO.getTargetFlags() == PPCII::MO_GOT_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
- else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG))
+ else if (MO.getTargetFlags() == PPCII::MO_TPREL_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_TPREL;
else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL;
@@ -94,12 +96,21 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL;
else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL;
+ else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG) {
+ assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
+ TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
+ // For the local-exec TLS model, we may generate the offset from the TLS
+ // base as an immediate operand (instead of using a TOC entry).
+ // Set the relocation type in case the result is used for purposes other
+ // than a TOC reference. In TOC reference cases, this result is discarded.
+ if (Model == TLSModel::LocalExec)
+ RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
+ }
const MachineInstr *MI = MO.getParent();
const MachineFunction *MF = MI->getMF();
const Module *M = MF->getFunction().getParent();
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
- const TargetMachine &TM = Printer.TM;
unsigned MIOpcode = MI->getOpcode();
assert((Subtarget->isUsingPCRelativeCalls() || MIOpcode != PPC::BL8_NOTOC) &&
@@ -128,7 +139,9 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
Ctx);
// Subtract off the PIC base if required.
- if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+ if (MO.getTargetFlags() == PPCII::MO_PIC_FLAG ||
+ MO.getTargetFlags() == PPCII::MO_PIC_HA_FLAG ||
+ MO.getTargetFlags() == PPCII::MO_PIC_LO_FLAG) {
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
@@ -138,9 +151,11 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
// Add ha16() / lo16() markers if required.
switch (access) {
case PPCII::MO_LO:
+ case PPCII::MO_PIC_LO_FLAG:
Expr = PPCMCExpr::createLo(Expr, Ctx);
break;
case PPCII::MO_HA:
+ case PPCII::MO_PIC_HA_FLAG:
Expr = PPCMCExpr::createHa(Expr, Ctx);
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 410f4cba97c6..494e4b52a5b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -16,6 +16,15 @@
// removal, and it would miss cleanups made possible following VSX
// swap removal.
//
+// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
+// is important to keep the code valid after transformations.
+// Common causes of errors stem from violating the contract specified
+// by kill flags. Whenever a transformation changes the live range of
+// a register, that register should be added to the work list using
+// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
+// is changing the definition of a register (i.e. removing the single
+// definition of the original vreg), it needs to provide a dummy
+// definition of that register using addDummyDef(<MBB>, <Reg>).
//===---------------------------------------------------------------------===//
#include "MCTargetDesc/PPCMCTargetDesc.h"
@@ -26,6 +35,7 @@
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -93,6 +103,7 @@ struct PPCMIPeephole : public MachineFunctionPass {
const PPCInstrInfo *TII;
MachineFunction *MF;
MachineRegisterInfo *MRI;
+ LiveVariables *LV;
PPCMIPeephole() : MachineFunctionPass(ID) {
initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
@@ -102,7 +113,8 @@ private:
MachineDominatorTree *MDT;
MachinePostDominatorTree *MPDT;
MachineBlockFrequencyInfo *MBFI;
- uint64_t EntryFreq;
+ BlockFrequency EntryFreq;
+ SmallSet<Register, 16> RegsToUpdate;
// Initialize class variables.
void initialize(MachineFunction &MFParm);
@@ -114,16 +126,32 @@ private:
bool eliminateRedundantCompare();
bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
- bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
+ bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
+ MachineInstr *&ToErase);
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
MachineInstr *MI);
+ // A number of transformations will eliminate the definition of a register
+ // as all of its uses will be removed. However, this leaves a register
+ // without a definition for LiveVariables. Such transformations should
+ // use this function to provide a dummy definition of the register that
+ // will simply be removed by DCE.
+ void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
+ BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
+ }
+ void addRegToUpdateWithLine(Register Reg, int Line);
+ void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
+ SmallVectorImpl<MachineInstr *> &PHIs,
+ Register Dst);
+
public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveVariables>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<LiveVariables>();
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineBlockFrequencyInfo>();
@@ -140,10 +168,26 @@ public:
"TOC pointer used in a function using PC-Relative addressing!");
if (skipFunction(MF.getFunction()))
return false;
- return simplifyCode();
+ bool Changed = simplifyCode();
+#ifndef NDEBUG
+ if (Changed)
+ MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
+ "-mllvm -disable-ppc-peephole");
+#endif
+ return Changed;
}
};
+#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
+void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
+ if (!Register::isVirtualRegister(Reg))
+ return;
+ if (RegsToUpdate.insert(Reg).second)
+ LLVM_DEBUG(dbgs() << "Adding register: " << Register::virtReg2Index(Reg)
+ << " on line " << Line
+ << " for re-computation of kill flags\n");
+}
+
// Initialize class variables.
void PPCMIPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
@@ -151,8 +195,10 @@ void PPCMIPeephole::initialize(MachineFunction &MFParm) {
MDT = &getAnalysis<MachineDominatorTree>();
MPDT = &getAnalysis<MachinePostDominatorTree>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ LV = &getAnalysis<LiveVariables>();
EntryFreq = MBFI->getEntryFreq();
TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+ RegsToUpdate.clear();
LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
LLVM_DEBUG(MF->dump());
}
@@ -254,7 +300,7 @@ void PPCMIPeephole::UpdateTOCSaves(
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
MachineBasicBlock *Entry = &MF->front();
- uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
+ BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
// If the block in which the TOC save resides is in a block that
// post-dominates Entry, or a block that is hotter than entry (keep in mind
@@ -338,10 +384,9 @@ static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
// primed accumulator PHI nodes. The list is traversed in reverse order to
// change all the PHI operands of a PHI node before changing the node itself.
// We keep a map to associate each changed PHI node to its non-changed form.
-static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
- MachineRegisterInfo *MRI,
- SmallVectorImpl<MachineInstr *> &PHIs,
- Register Dst) {
+void PPCMIPeephole::convertUnprimedAccPHIs(
+ const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
+ SmallVectorImpl<MachineInstr *> &PHIs, Register Dst) {
DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
for (MachineInstr *PHI : llvm::reverse(PHIs)) {
SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
@@ -390,8 +435,11 @@ static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
MachineInstrBuilder NewPHI = BuildMI(
*PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
- for (auto RegMBB : PHIOps)
+ for (auto RegMBB : PHIOps) {
NewPHI.add(RegMBB.first).add(RegMBB.second);
+ if (MRI->isSSA())
+ addRegToUpdate(RegMBB.first.getReg());
+ }
ChangedPHIMap[PHI] = NewPHI.getInstr();
LLVM_DEBUG(dbgs() << "Converting PHI: ");
LLVM_DEBUG(PHI->dump());
@@ -421,21 +469,51 @@ bool PPCMIPeephole::simplifyCode() {
if (MI.isDebugInstr())
continue;
- if (TII->convertToImmediateForm(MI)) {
- // We don't erase anything in case the def has other uses. Let DCE
- // remove it if it can be removed.
- LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
- LLVM_DEBUG(MI.dump());
- NumConvertedToImmediateForm++;
- SomethingChanged = true;
- Simplified = true;
+ SmallSet<Register, 4> RRToRIRegsToUpdate;
+ if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
continue;
- }
+ for (Register R : RRToRIRegsToUpdate)
+ addRegToUpdate(R);
+ // The updated instruction may now have new register operands.
+ // Conservatively add them to recompute the flags as well.
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isReg())
+ addRegToUpdate(MO.getReg());
+ // We don't erase anything in case the def has other uses. Let DCE
+ // remove it if it can be removed.
+ LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
+ LLVM_DEBUG(MI.dump());
+ NumConvertedToImmediateForm++;
+ SomethingChanged = true;
+ Simplified = true;
+ continue;
}
}
} while (SomethingChanged && FixedPointRegToImm);
}
+ // Since we are deleting this instruction, we need to run LiveVariables
+ // on any of its definitions that are marked as needing an update since
+ // we can't run LiveVariables on a deleted register. This only needs
+ // to be done for defs since uses will have their own defining
+ // instructions so we won't be running LiveVariables on a deleted reg.
+ auto recomputeLVForDyingInstr = [&]() {
+ if (RegsToUpdate.empty())
+ return;
+ for (MachineOperand &MO : ToErase->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
+ continue;
+ Register RegToUpdate = MO.getReg();
+ RegsToUpdate.erase(RegToUpdate);
+ // If some transformation has introduced an additional definition of
+ // this register (breaking SSA), we can safely convert this def to
+ // a def of an invalid register as the instruction is going away.
+ if (!MRI->getUniqueVRegDef(RegToUpdate))
+ MO.setReg(PPC::NoRegister);
+ LV->recomputeForSingleDefVirtReg(RegToUpdate);
+ }
+ };
+
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
@@ -444,6 +522,7 @@ bool PPCMIPeephole::simplifyCode() {
if (ToErase) {
LLVM_DEBUG(dbgs() << "Deleting instruction: ");
LLVM_DEBUG(ToErase->dump());
+ recomputeLVForDyingInstr();
ToErase->eraseFromParent();
ToErase = nullptr;
}
@@ -503,12 +582,16 @@ bool PPCMIPeephole::simplifyCode() {
if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
break;
Register MIDestReg = MI.getOperand(0).getReg();
+ bool Folded = false;
for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
- Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
+ Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
if (MRI->use_nodbg_empty(MIDestReg)) {
++NumLoadImmZeroFoldedAndRemoved;
ToErase = &MI;
}
+ if (Folded)
+ addRegToUpdate(MIDestReg);
+ Simplified |= Folded;
break;
}
case PPC::STW:
@@ -579,6 +662,7 @@ bool PPCMIPeephole::simplifyCode() {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.add(MI.getOperand(1));
+ addRegToUpdate(MI.getOperand(1).getReg());
ToErase = &MI;
Simplified = true;
}
@@ -608,6 +692,7 @@ bool PPCMIPeephole::simplifyCode() {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.add(MI.getOperand(1));
+ addRegToUpdate(MI.getOperand(1).getReg());
ToErase = &MI;
Simplified = true;
}
@@ -618,9 +703,13 @@ bool PPCMIPeephole::simplifyCode() {
else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
LLVM_DEBUG(MI.dump());
+ addRegToUpdate(MI.getOperand(1).getReg());
+ addRegToUpdate(MI.getOperand(2).getReg());
MI.getOperand(1).setReg(DefReg1);
MI.getOperand(2).setReg(DefReg2);
MI.getOperand(3).setImm(3 - Immed);
+ addRegToUpdate(DefReg1);
+ addRegToUpdate(DefReg2);
Simplified = true;
}
@@ -629,9 +718,12 @@ bool PPCMIPeephole::simplifyCode() {
else if (Immed == 2 && DefImmed == 2) {
LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
LLVM_DEBUG(MI.dump());
+ addRegToUpdate(MI.getOperand(1).getReg());
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.add(DefMI->getOperand(1));
+ addRegToUpdate(DefMI->getOperand(0).getReg());
+ addRegToUpdate(DefMI->getOperand(1).getReg());
ToErase = &MI;
Simplified = true;
}
@@ -648,6 +740,7 @@ bool PPCMIPeephole::simplifyCode() {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.add(MI.getOperand(1));
+ addRegToUpdate(MI.getOperand(1).getReg());
break;
}
// Splat fed by another splat - switch the output of the first
@@ -669,6 +762,7 @@ bool PPCMIPeephole::simplifyCode() {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.add(MI.getOperand(1));
+ addRegToUpdate(MI.getOperand(1).getReg());
} else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
TII->isLoadFromConstantPool(DefMI)) {
const Constant *C = TII->getConstantFromConstantPool(DefMI);
@@ -682,6 +776,7 @@ bool PPCMIPeephole::simplifyCode() {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.add(MI.getOperand(1));
+ addRegToUpdate(MI.getOperand(1).getReg());
}
}
break;
@@ -724,6 +819,7 @@ bool PPCMIPeephole::simplifyCode() {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.add(MI.getOperand(OpNo));
+ addRegToUpdate(MI.getOperand(OpNo).getReg());
ToErase = &MI;
Simplified = true;
}
@@ -747,7 +843,9 @@ bool PPCMIPeephole::simplifyCode() {
LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
<< " to " << NewElem << " in instruction: ");
LLVM_DEBUG(MI.dump());
- MI.getOperand(1).setReg(ShiftOp1);
+ addRegToUpdate(MI.getOperand(OpNo).getReg());
+ addRegToUpdate(ShiftOp1);
+ MI.getOperand(OpNo).setReg(ShiftOp1);
MI.getOperand(2).setImm(NewElem);
}
}
@@ -797,7 +895,9 @@ bool PPCMIPeephole::simplifyCode() {
LLVM_DEBUG(MI.dump());
LLVM_DEBUG(dbgs() << "Through instruction:\n");
LLVM_DEBUG(DefMI->dump());
- RoundInstr->eraseFromParent();
+ addRegToUpdate(ConvReg1);
+ addRegToUpdate(FRSPDefines);
+ ToErase = RoundInstr;
}
};
@@ -844,6 +944,13 @@ bool PPCMIPeephole::simplifyCode() {
else if (MIIs64Bit)
Opc = PPC::LHA8;
+ addRegToUpdate(NarrowReg);
+ addRegToUpdate(MI.getOperand(0).getReg());
+
+ // We are removing a definition of NarrowReg which will cause
+ // problems in AliveBlocks. Add an implicit def that will be
+ // removed so that AliveBlocks are updated correctly.
+ addDummyDef(MBB, &MI, NarrowReg);
LLVM_DEBUG(dbgs() << "Zero-extending load\n");
LLVM_DEBUG(SrcMI->dump());
LLVM_DEBUG(dbgs() << "and sign-extension\n");
@@ -909,6 +1016,13 @@ bool PPCMIPeephole::simplifyCode() {
if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
break;
+ addRegToUpdate(NarrowReg);
+ addRegToUpdate(MI.getOperand(0).getReg());
+
+ // We are removing a definition of NarrowReg which will cause
+ // problems in AliveBlocks. Add an implicit def that will be
+ // removed so that AliveBlocks are updated correctly.
+ addDummyDef(MBB, &MI, NarrowReg);
LLVM_DEBUG(dbgs() << "Zero-extending load\n");
LLVM_DEBUG(SrcMI->dump());
LLVM_DEBUG(dbgs() << "and sign-extension\n");
@@ -981,6 +1095,7 @@ bool PPCMIPeephole::simplifyCode() {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.addReg(SrcReg);
+ addRegToUpdate(SrcReg);
ToErase = &MI;
Simplified = true;
NumEliminatedZExt++;
@@ -1071,23 +1186,96 @@ bool PPCMIPeephole::simplifyCode() {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.add(Op1);
+ addRegToUpdate(Op1.getReg());
+ addRegToUpdate(Op2.getReg());
ToErase = &MI;
Simplified = true;
NumOptADDLIs++;
break;
}
case PPC::RLDICR: {
- Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
+ Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
combineSEXTAndSHL(MI, ToErase);
break;
}
+ case PPC::ANDI_rec:
+ case PPC::ANDI8_rec:
+ case PPC::ANDIS_rec:
+ case PPC::ANDIS8_rec: {
+ Register TrueReg =
+ TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
+ if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
+ break;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
+ if (!SrcMI)
+ break;
+
+ unsigned SrcOpCode = SrcMI->getOpcode();
+ if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
+ break;
+
+ Register SrcReg, DstReg;
+ SrcReg = SrcMI->getOperand(1).getReg();
+ DstReg = MI.getOperand(1).getReg();
+ const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
+ const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
+ if (DstRC != SrcRC)
+ break;
+
+ uint64_t AndImm = MI.getOperand(2).getImm();
+ if (MI.getOpcode() == PPC::ANDIS_rec ||
+ MI.getOpcode() == PPC::ANDIS8_rec)
+ AndImm <<= 16;
+ uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
+ uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
+ uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
+
+ // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
+ // if all bits to AND are already zero in the input.
+ bool PatternResultZero =
+ (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
+ (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
+
+ // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
+ // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
+ bool PatternRemoveRotate =
+ SrcMI->getOperand(2).getImm() == 0 &&
+ ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
+ (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
+
+ if (!PatternResultZero && !PatternRemoveRotate)
+ break;
+
+ LLVM_DEBUG(dbgs() << "Combining pair: ");
+ LLVM_DEBUG(SrcMI->dump());
+ LLVM_DEBUG(MI.dump());
+ if (PatternResultZero)
+ MI.getOperand(2).setImm(0);
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ LLVM_DEBUG(dbgs() << "To: ");
+ LLVM_DEBUG(MI.dump());
+ addRegToUpdate(MI.getOperand(1).getReg());
+ addRegToUpdate(SrcMI->getOperand(0).getReg());
+ Simplified = true;
+ break;
+ }
case PPC::RLWINM:
case PPC::RLWINM_rec:
case PPC::RLWINM8:
case PPC::RLWINM8_rec: {
+ // We might replace operand 1 of the instruction which will
+ // require we recompute kill flags for it.
+ Register OrigOp1Reg = MI.getOperand(1).isReg()
+ ? MI.getOperand(1).getReg()
+ : PPC::NoRegister;
Simplified = TII->combineRLWINM(MI, &ToErase);
- if (Simplified)
+ if (Simplified) {
+ addRegToUpdate(OrigOp1Reg);
+ if (MI.getOperand(1).isReg())
+ addRegToUpdate(MI.getOperand(1).getReg());
++NumRotatesCollapsed;
+ }
break;
}
// We will replace TD/TW/TDI/TWI with an unconditional trap if it will
@@ -1141,6 +1329,7 @@ bool PPCMIPeephole::simplifyCode() {
// If the last instruction was marked for elimination,
// remove it now.
if (ToErase) {
+ recomputeLVForDyingInstr();
ToErase->eraseFromParent();
ToErase = nullptr;
}
@@ -1158,6 +1347,13 @@ bool PPCMIPeephole::simplifyCode() {
// We try to eliminate redundant compare instruction.
Simplified |= eliminateRedundantCompare();
+ // If we have made any modifications and added any registers to the set of
+ // registers for which we need to update the kill flags, do so by recomputing
+ // LiveVariables for those registers.
+ for (Register Reg : RegsToUpdate) {
+ if (!MRI->reg_empty(Reg))
+ LV->recomputeForSingleDefVirtReg(Reg);
+ }
return Simplified;
}
@@ -1586,6 +1782,12 @@ bool PPCMIPeephole::eliminateRedundantCompare() {
LLVM_DEBUG(BI1->dump());
LLVM_DEBUG(CMPI2->dump());
LLVM_DEBUG(BI2->dump());
+ for (const MachineOperand &MO : CMPI1->operands())
+ if (MO.isReg())
+ addRegToUpdate(MO.getReg());
+ for (const MachineOperand &MO : CMPI2->operands())
+ if (MO.isReg())
+ addRegToUpdate(MO.getReg());
// We adjust opcode, predicates and immediate as we determined above.
if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
@@ -1623,6 +1825,7 @@ bool PPCMIPeephole::eliminateRedundantCompare() {
"We cannot support if an operand comes from this BB.");
unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
CMPI2->getOperand(I).setReg(SrcReg);
+ addRegToUpdate(SrcReg);
}
}
auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
@@ -1635,14 +1838,20 @@ bool PPCMIPeephole::eliminateRedundantCompare() {
.addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
.addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
BI2->getOperand(1).setReg(NewVReg);
+ addRegToUpdate(NewVReg);
}
else {
// We finally eliminate compare instruction in MBB2.
+ // We do not need to treat CMPI2 specially here in terms of re-computing
+ // live variables even though it is being deleted because:
+ // - It defines a register that has a single use (already checked in
+ // eligibleForCompareElimination())
+ // - The only user (BI2) is no longer using it so the register is dead (no
+ // def, no uses)
+ // - We do not attempt to recompute live variables for dead registers
BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
CMPI2->eraseFromParent();
}
- BI2->getOperand(1).setIsKill(true);
- BI1->getOperand(1).setIsKill(false);
LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
LLVM_DEBUG(CMPI1->dump());
@@ -1654,7 +1863,6 @@ bool PPCMIPeephole::eliminateRedundantCompare() {
<< " to handle partial redundancy.\n");
LLVM_DEBUG(CMPI2->dump());
}
-
Simplified = true;
}
@@ -1664,7 +1872,8 @@ bool PPCMIPeephole::eliminateRedundantCompare() {
// We miss the opportunity to emit an RLDIC when lowering jump tables
// since ISEL sees only a single basic block. When selecting, the clear
// and shift left will be in different blocks.
-bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
+bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
+ MachineInstr *&ToErase) {
if (MI.getOpcode() != PPC::RLDICR)
return false;
@@ -1710,8 +1919,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
MI.getOperand(2).setImm(NewSH);
MI.getOperand(3).setImm(NewMB);
- MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill());
- SrcMI->getOperand(1).setIsKill(false);
+ addRegToUpdate(MI.getOperand(1).getReg());
+ addRegToUpdate(SrcMI->getOperand(0).getReg());
LLVM_DEBUG(dbgs() << "To: ");
LLVM_DEBUG(MI.dump());
@@ -1720,7 +1929,7 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
if (MRI->use_nodbg_empty(SrcReg)) {
assert(!SrcMI->hasImplicitDef() &&
"Not expecting an implicit def with this instr.");
- SrcMI->eraseFromParent();
+ ToErase = SrcMI;
}
return true;
}
@@ -1793,8 +2002,11 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
LLVM_DEBUG(NewInstr->dump());
++NumEXTSWAndSLDICombined;
ToErase = &MI;
- // SrcMI, which is extsw, is of no use now, erase it.
- SrcMI->eraseFromParent();
+ // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
+ // can recompute its kill flags. We run DCE immediately after this pass
+ // to clean up dead instructions such as this.
+ addRegToUpdate(NewInstr->getOperand(1).getReg());
+ addRegToUpdate(SrcMI->getOperand(0).getReg());
return true;
}
@@ -1805,6 +2017,7 @@ INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
"PowerPC MI Peephole Optimization", false, false)
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index bf1c39a3a3a2..7ad6ef8c3928 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -286,7 +286,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
namespace llvm {
-std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation () {
+std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation() {
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp
new file mode 100644
index 000000000000..d9465e86d896
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp
@@ -0,0 +1,365 @@
+//===-- PPCMergeStringPool.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation tries to merge the strings in the module into one pool
+// of strings. The idea is to reduce the number of TOC entries in the module so
+// that instead of having one TOC entry for each string there is only one global
+// TOC entry and all of the strings are referenced off of that one entry plus
+// an offset.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+#define DEBUG_TYPE "ppc-merge-strings"
+
+STATISTIC(NumPooledStrings, "Number of Strings Pooled");
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+ MaxStringsPooled("ppc-max-strings-pooled", cl::Hidden, cl::init(-1),
+ cl::desc("Maximum Number of Strings to Pool."));
+
+static cl::opt<unsigned>
+ MinStringsBeforePool("ppc-min-strings-before-pool", cl::Hidden, cl::init(2),
+ cl::desc("Minimum number of string candidates before "
+ "pooling is considered."));
+
+namespace {
+struct {
+ bool operator()(const GlobalVariable *LHS, const GlobalVariable *RHS) const {
+ // First priority is alignment.
+ // If elements are sorted in terms of alignment then there won't be an
+ // issue with incorrect alignment that would require padding.
+ Align LHSAlign = LHS->getAlign().valueOrOne();
+ Align RHSAlign = RHS->getAlign().valueOrOne();
+ if (LHSAlign > RHSAlign)
+ return true;
+ else if (LHSAlign < RHSAlign)
+ return false;
+
+ // Next priority is the number of uses.
+ // Smaller offsets are easier to materialize because materializing a large
+ // offset may require more than one instruction. (ie addis, addi).
+ if (LHS->getNumUses() > RHS->getNumUses())
+ return true;
+ else if (LHS->getNumUses() < RHS->getNumUses())
+ return false;
+
+ const Constant *ConstLHS = LHS->getInitializer();
+ const ConstantDataSequential *ConstDataLHS =
+ dyn_cast<ConstantDataSequential>(ConstLHS);
+ unsigned LHSSize =
+ ConstDataLHS->getNumElements() * ConstDataLHS->getElementByteSize();
+ const Constant *ConstRHS = RHS->getInitializer();
+ const ConstantDataSequential *ConstDataRHS =
+ dyn_cast<ConstantDataSequential>(ConstRHS);
+ unsigned RHSSize =
+ ConstDataRHS->getNumElements() * ConstDataRHS->getElementByteSize();
+
+ // Finally smaller constants should go first. This is, again, trying to
+ // minimize the offsets into the final struct.
+ return LHSSize < RHSSize;
+ }
+} CompareConstants;
+
+class PPCMergeStringPool : public ModulePass {
+public:
+ static char ID;
+ PPCMergeStringPool() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override { return mergeModuleStringPool(M); }
+
+ StringRef getPassName() const override { return "PPC Merge String Pool"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ }
+
+private:
+ // Globals in a Module are already unique so a set is not required and a
+ // vector will do.
+ std::vector<GlobalVariable *> MergeableStrings;
+ Align MaxAlignment;
+ Type *PooledStructType;
+ LLVMContext *Context;
+ void collectCandidateConstants(Module &M);
+ bool mergeModuleStringPool(Module &M);
+ void replaceUsesWithGEP(GlobalVariable *GlobalToReplace, GlobalVariable *GPool,
+ unsigned ElementIndex);
+};
+
+
+// In order for a constant to be pooled we need to be able to replace all of
+// the uses for that constant. This function checks all of the uses to make
+// sure that they can be replaced.
+static bool hasReplaceableUsers(GlobalVariable &GV) {
+ for (User *CurrentUser : GV.users()) {
+ // Instruction users are always valid.
+ if (isa<Instruction>(CurrentUser))
+ continue;
+
+ // We cannot replace GlobalValue users because they are not just nodes
+ // in IR. To replace a user like this we would need to create a new
+ // GlobalValue with the replacement and then try to delete the original
+ // GlobalValue. Deleting the original would only happen if it has no other
+ // uses.
+ if (isa<GlobalValue>(CurrentUser))
+ return false;
+
+ // We only support Instruction and Constant users.
+ if (!isa<Constant>(CurrentUser))
+ return false;
+ }
+
+ return true;
+}
+
+// Run through all of the constants in the module and determine if they are
+// valid candidates to be merged into the string pool. Valid candidates will
+// be added to MergeableStrings.
+void PPCMergeStringPool::collectCandidateConstants(Module &M) {
+ SmallVector<GlobalValue *, 4> UsedV;
+ collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/false);
+ SmallVector<GlobalValue *, 4> UsedVCompiler;
+ collectUsedGlobalVariables(M, UsedVCompiler, /*CompilerUsed=*/true);
+ // Combine all of the Global Variables marked as used into a SmallPtrSet for
+ // faster lookup inside the loop.
+ SmallPtrSet<GlobalValue *, 8> AllUsedGlobals;
+ AllUsedGlobals.insert(UsedV.begin(), UsedV.end());
+ AllUsedGlobals.insert(UsedVCompiler.begin(), UsedVCompiler.end());
+
+ for (GlobalVariable &Global : M.globals()) {
+ LLVM_DEBUG(dbgs() << "Looking at global:");
+ LLVM_DEBUG(Global.dump());
+ LLVM_DEBUG(dbgs() << "isConstant() " << Global.isConstant() << "\n");
+ LLVM_DEBUG(dbgs() << "hasInitializer() " << Global.hasInitializer()
+ << "\n");
+
+ // We can only pool constants.
+ if (!Global.isConstant() || !Global.hasInitializer())
+ continue;
+
+ // If a global constant has a section we do not try to pool it because
+ // there is no guarantee that other constants will also be in the same
+ // section. Trying to pool constants from different sections (or no
+ // section) means that the pool has to be in multiple sections at the same
+ // time.
+ if (Global.hasSection())
+ continue;
+
+ // Do not pool constants with metadata because we should not add metadata
+ // to the pool when that metadata refers to a single constant in the pool.
+ if (Global.hasMetadata())
+ continue;
+
+ ConstantDataSequential *ConstData =
+ dyn_cast<ConstantDataSequential>(Global.getInitializer());
+
+ // If the constant is undef then ConstData will be null.
+ if (!ConstData)
+ continue;
+
+ // Do not pool globals that are part of llvm.used or llvm.compiler.end.
+ if (AllUsedGlobals.contains(&Global))
+ continue;
+
+ if (!hasReplaceableUsers(Global))
+ continue;
+
+ Align AlignOfGlobal = Global.getAlign().valueOrOne();
+
+ // TODO: At this point do not allow over-aligned types. Adding a type
+ // with larger alignment may lose the larger alignment once it is
+ // added to the struct.
+ // Fix this in a future patch.
+ if (AlignOfGlobal.value() > ConstData->getElementByteSize())
+ continue;
+
+ // Make sure that the global is only visible inside the compilation unit.
+ if (Global.getLinkage() != GlobalValue::PrivateLinkage &&
+ Global.getLinkage() != GlobalValue::InternalLinkage)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Constant data of Global: ");
+ LLVM_DEBUG(ConstData->dump());
+ LLVM_DEBUG(dbgs() << "\n\n");
+
+ MergeableStrings.push_back(&Global);
+ if (MaxAlignment < AlignOfGlobal)
+ MaxAlignment = AlignOfGlobal;
+
+ // If we have already reached the maximum number of pooled strings then
+ // there is no point in looking for more.
+ if (MergeableStrings.size() >= MaxStringsPooled)
+ break;
+ }
+}
+
+bool PPCMergeStringPool::mergeModuleStringPool(Module &M) {
+
+ LLVM_DEBUG(dbgs() << "Merging string pool for module: " << M.getName()
+ << "\n");
+ LLVM_DEBUG(dbgs() << "Number of globals is: " << M.global_size() << "\n");
+
+ collectCandidateConstants(M);
+
+ // If we have too few constants in the module that are merge candidates we
+ // will skip doing the merging.
+ if (MergeableStrings.size() < MinStringsBeforePool)
+ return false;
+
+ // Sort the global constants to make access more efficient.
+ std::sort(MergeableStrings.begin(), MergeableStrings.end(), CompareConstants);
+
+ SmallVector<Constant *> ConstantsInStruct;
+ for (GlobalVariable *GV : MergeableStrings)
+ ConstantsInStruct.push_back(GV->getInitializer());
+
+ // Use an anonymous struct to pool the strings.
+ // TODO: This pass uses a single anonymous struct for all of the pooled
+ // entries. This may cause a performance issue in the situation where
+ // computing the offset requires two instructions (addis, addi). For the
+ // future we may want to split this into multiple structs.
+ Constant *ConstantPool = ConstantStruct::getAnon(ConstantsInStruct);
+ PooledStructType = ConstantPool->getType();
+
+ // The GlobalVariable constructor calls
+ // MM->insertGlobalVariable(PooledGlobal).
+ GlobalVariable *PooledGlobal =
+ new GlobalVariable(M, PooledStructType,
+ /* isConstant */ true, GlobalValue::PrivateLinkage,
+ ConstantPool, "__ModuleStringPool");
+ PooledGlobal->setAlignment(MaxAlignment);
+
+ LLVM_DEBUG(dbgs() << "Constructing global variable for string pool: ");
+ LLVM_DEBUG(PooledGlobal->dump());
+
+ Context = &M.getContext();
+ size_t ElementIndex = 0;
+ for (GlobalVariable *GV : MergeableStrings) {
+
+ LLVM_DEBUG(dbgs() << "The global:\n");
+ LLVM_DEBUG(GV->dump());
+ LLVM_DEBUG(dbgs() << "Has " << GV->getNumUses() << " uses.\n");
+
+ // Access to the pooled constant strings require an offset. Add a GEP
+ // before every use in order to compute this offset.
+ replaceUsesWithGEP(GV, PooledGlobal, ElementIndex);
+
+ // This GV has no more uses so we can erase it.
+ if (GV->use_empty())
+ GV->eraseFromParent();
+
+ NumPooledStrings++;
+ ElementIndex++;
+ }
+ return true;
+}
+
+static bool userHasOperand(User *TheUser, GlobalVariable *GVOperand) {
+ for (Value *Op : TheUser->operands())
+ if (Op == GVOperand)
+ return true;
+ return false;
+}
+
+// For pooled strings we need to add the offset into the pool for each string.
+// This is done by adding a Get Element Pointer (GEP) before each user. This
+// function adds the GEP.
+void PPCMergeStringPool::replaceUsesWithGEP(GlobalVariable *GlobalToReplace,
+ GlobalVariable *GPool,
+ unsigned ElementIndex) {
+ SmallVector<Value *, 2> Indices;
+ Indices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), 0));
+ Indices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), ElementIndex));
+
+ // Need to save a temporary copy of each user list because we remove uses
+ // as we replace them.
+ SmallVector<User *> Users;
+ for (User *CurrentUser : GlobalToReplace->users())
+ Users.push_back(CurrentUser);
+
+ for (User *CurrentUser : Users) {
+ Instruction *UserInstruction = dyn_cast<Instruction>(CurrentUser);
+ Constant *UserConstant = dyn_cast<Constant>(CurrentUser);
+
+ // At this point we expect that the user is either an instruction or a
+ // constant.
+ assert((UserConstant || UserInstruction) &&
+ "Expected the user to be an instruction or a constant.");
+
+ // The user was not found so it must have been replaced earlier.
+ if (!userHasOperand(CurrentUser, GlobalToReplace))
+ continue;
+
+ // We cannot replace operands in globals so we ignore those.
+ if (isa<GlobalValue>(CurrentUser))
+ continue;
+
+ if (!UserInstruction) {
+ // User is a constant type.
+ Constant *ConstGEP = ConstantExpr::getInBoundsGetElementPtr(
+ PooledStructType, GPool, Indices);
+ UserConstant->handleOperandChange(GlobalToReplace, ConstGEP);
+ continue;
+ }
+
+ if (PHINode *UserPHI = dyn_cast<PHINode>(UserInstruction)) {
+ // GEP instructions cannot be added before PHI nodes.
+ // With getInBoundsGetElementPtr we create the GEP and then replace it
+ // inline into the PHI.
+ Constant *ConstGEP = ConstantExpr::getInBoundsGetElementPtr(
+ PooledStructType, GPool, Indices);
+ UserPHI->replaceUsesOfWith(GlobalToReplace, ConstGEP);
+ continue;
+ }
+ // The user is a valid instruction that is not a PHINode.
+ GetElementPtrInst *GEPInst =
+ GetElementPtrInst::Create(PooledStructType, GPool, Indices);
+ GEPInst->insertBefore(UserInstruction);
+
+ LLVM_DEBUG(dbgs() << "Inserting GEP before:\n");
+ LLVM_DEBUG(UserInstruction->dump());
+
+ LLVM_DEBUG(dbgs() << "Replacing this global:\n");
+ LLVM_DEBUG(GlobalToReplace->dump());
+ LLVM_DEBUG(dbgs() << "with this:\n");
+ LLVM_DEBUG(GEPInst->dump());
+
+ // After the GEP is inserted the GV can be replaced.
+ CurrentUser->replaceUsesOfWith(GlobalToReplace, GEPInst);
+ }
+}
+
+} // namespace
+
+char PPCMergeStringPool::ID = 0;
+
+INITIALIZE_PASS(PPCMergeStringPool, DEBUG_TYPE, "PPC Merge String Pool", false,
+ false)
+
+ModulePass *llvm::createPPCMergeStringPoolPass() {
+ return new PPCMergeStringPool();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 6f1b34843343..6e3bf26a598a 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -14,7 +14,6 @@
#include "PPC.h"
#include "PPCInstrInfo.h"
#include "PPCSubtarget.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -240,7 +239,7 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
return false;
// Finally return true only if the GOT flag is present.
- return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
+ return PPCInstrInfo::hasGOTFlag(SymbolOp.getTargetFlags());
}
bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
@@ -495,7 +494,8 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
}
}
MachineInstr *DefMIToErase = nullptr;
- if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
+ SmallSet<Register, 4> UpdatedRegs;
+ if (TII->convertToImmediateForm(MI, UpdatedRegs, &DefMIToErase)) {
Changed = true;
NumRRConvertedInPreEmit++;
LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 11dbbce42f61..36b8a24ba502 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -172,58 +172,6 @@ public:
Register getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
- /// stripRegisterPrefix - This method strips the character prefix from a
- /// register name so that only the number is left. Used by for linux asm.
- static const char *stripRegisterPrefix(const char *RegName) {
- switch (RegName[0]) {
- case 'a':
- if (RegName[1] == 'c' && RegName[2] == 'c')
- return RegName + 3;
- break;
- case 'f':
- if (RegName[1] == 'p')
- return RegName + 2;
- [[fallthrough]];
- case 'r':
- case 'v':
- if (RegName[1] == 's') {
- if (RegName[2] == 'p')
- return RegName + 3;
- return RegName + 2;
- }
- return RegName + 1;
- case 'c':
- if (RegName[1] == 'r')
- return RegName + 2;
- break;
- case 'w':
- // For wacc and wacc_hi
- if (RegName[1] == 'a' && RegName[2] == 'c' && RegName[3] == 'c') {
- if (RegName[4] == '_')
- return RegName + 7;
- else
- return RegName + 4;
- }
- break;
- case 'd':
- // For dmr, dmrp, dmrrow, dmrrowp
- if (RegName[1] == 'm' && RegName[2] == 'r') {
- if (RegName[3] == 'r' && RegName[4] == 'o' && RegName[5] == 'w' &&
- RegName[6] == 'p')
- return RegName + 7;
- else if (RegName[3] == 'r' && RegName[4] == 'o' && RegName[5] == 'w')
- return RegName + 6;
- else if (RegName[3] == 'p')
- return RegName + 4;
- else
- return RegName + 3;
- }
- break;
- }
-
- return RegName;
- }
-
bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const override {
return Reg == PPC::LR || Reg == PPC::LR8;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 6151faf403aa..375e63654db1 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -798,6 +798,7 @@ def directbrtarget : Operand<OtherVT> {
def absdirectbrtarget : Operand<OtherVT> {
let PrintMethod = "printAbsBranchOperand";
let EncoderMethod = "getAbsDirectBrEncoding";
+ let DecoderMethod = "decodeDirectBrTarget";
let ParserMatchClass = PPCDirectBrAsmOperand;
}
def PPCCondBrAsmOperand : AsmOperandClass {
@@ -814,6 +815,7 @@ def condbrtarget : Operand<OtherVT> {
def abscondbrtarget : Operand<OtherVT> {
let PrintMethod = "printAbsBranchOperand";
let EncoderMethod = "getAbsCondBrEncoding";
+ let DecoderMethod = "decodeCondBrTarget";
let ParserMatchClass = PPCCondBrAsmOperand;
}
def calltarget : Operand<iPTR> {
@@ -826,6 +828,7 @@ def calltarget : Operand<iPTR> {
def abscalltarget : Operand<iPTR> {
let PrintMethod = "printAbsBranchOperand";
let EncoderMethod = "getAbsDirectBrEncoding";
+ let DecoderMethod = "decodeDirectBrTarget";
let ParserMatchClass = PPCDirectBrAsmOperand;
}
def PPCCRBitMaskOperand : AsmOperandClass {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP10.td
index 25be37718af2..f922f8a7d985 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP10.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP10.td
@@ -29,8 +29,8 @@ def P10Model : SchedMachineModel {
let LoopMicroOpBufferSize = 60;
let CompleteModel = 1;
- // Do not support SPE (Signal Procesing Engine) on Power 10.
- let UnsupportedFeatures = [HasSPE, IsE500, IsBookE, IsISAFuture];
+ // Power 10 does not support instructions from SPE, Book E and HTM.
+ let UnsupportedFeatures = [HasSPE, IsE500, IsBookE, IsISAFuture, HasHTM];
}
let SchedModel = P10Model in {
@@ -87,27 +87,27 @@ let SchedModel = P10Model in {
}
def P10W_BF_22C : SchedWriteRes<[P10_BF]> {
- let ResourceCycles = [ 5 ];
+ let ReleaseAtCycles = [ 5 ];
let Latency = 22;
}
def P10W_BF_24C : SchedWriteRes<[P10_BF]> {
- let ResourceCycles = [ 8 ];
+ let ReleaseAtCycles = [ 8 ];
let Latency = 24;
}
def P10W_BF_26C : SchedWriteRes<[P10_BF]> {
- let ResourceCycles = [ 5 ];
+ let ReleaseAtCycles = [ 5 ];
let Latency = 26;
}
def P10W_BF_27C : SchedWriteRes<[P10_BF]> {
- let ResourceCycles = [ 7 ];
+ let ReleaseAtCycles = [ 7 ];
let Latency = 27;
}
def P10W_BF_36C : SchedWriteRes<[P10_BF]> {
- let ResourceCycles = [ 10 ];
+ let ReleaseAtCycles = [ 10 ];
let Latency = 36;
}
@@ -128,134 +128,134 @@ let SchedModel = P10Model in {
}
def P10W_DF_24C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 16 ];
+ let ReleaseAtCycles = [ 16 ];
let Latency = 24;
}
def P10W_DF_25C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 17 ];
+ let ReleaseAtCycles = [ 17 ];
let Latency = 25;
}
def P10W_DF_26C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 18 ];
+ let ReleaseAtCycles = [ 18 ];
let Latency = 26;
}
def P10W_DF_32C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 22 ];
+ let ReleaseAtCycles = [ 22 ];
let Latency = 32;
}
def P10W_DF_33C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 25 ];
+ let ReleaseAtCycles = [ 25 ];
let Latency = 33;
}
def P10W_DF_34C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 25 ];
+ let ReleaseAtCycles = [ 25 ];
let Latency = 34;
}
def P10W_DF_38C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 30 ];
+ let ReleaseAtCycles = [ 30 ];
let Latency = 38;
}
def P10W_DF_40C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 17 ];
+ let ReleaseAtCycles = [ 17 ];
let Latency = 40;
}
def P10W_DF_43C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 34 ];
+ let ReleaseAtCycles = [ 34 ];
let Latency = 43;
}
def P10W_DF_59C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 49 ];
+ let ReleaseAtCycles = [ 49 ];
let Latency = 59;
}
def P10W_DF_61C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 12 ];
+ let ReleaseAtCycles = [ 12 ];
let Latency = 61;
}
def P10W_DF_68C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 15 ];
+ let ReleaseAtCycles = [ 15 ];
let Latency = 68;
}
def P10W_DF_77C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 67 ];
+ let ReleaseAtCycles = [ 67 ];
let Latency = 77;
}
def P10W_DF_87C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 12 ];
+ let ReleaseAtCycles = [ 12 ];
let Latency = 87;
}
def P10W_DF_100C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 32 ];
+ let ReleaseAtCycles = [ 32 ];
let Latency = 100;
}
def P10W_DF_174C : SchedWriteRes<[P10_DF]> {
- let ResourceCycles = [ 33 ];
+ let ReleaseAtCycles = [ 33 ];
let Latency = 174;
}
// A DV pipeline may take from 20 to 83 cycles to complete.
// Some DV operations may keep the pipeline busy for up to 33 cycles.
def P10W_DV_20C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 10 ];
+ let ReleaseAtCycles = [ 10 ];
let Latency = 20;
}
def P10W_DV_25C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 10 ];
+ let ReleaseAtCycles = [ 10 ];
let Latency = 25;
}
def P10W_DV_27C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 10 ];
+ let ReleaseAtCycles = [ 10 ];
let Latency = 27;
}
def P10W_DV_41C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 10 ];
+ let ReleaseAtCycles = [ 10 ];
let Latency = 41;
}
def P10W_DV_43C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 21 ];
+ let ReleaseAtCycles = [ 21 ];
let Latency = 43;
}
def P10W_DV_47C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 21 ];
+ let ReleaseAtCycles = [ 21 ];
let Latency = 47;
}
def P10W_DV_54C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 33 ];
+ let ReleaseAtCycles = [ 33 ];
let Latency = 54;
}
def P10W_DV_60C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 33 ];
+ let ReleaseAtCycles = [ 33 ];
let Latency = 60;
}
def P10W_DV_75C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 21 ];
+ let ReleaseAtCycles = [ 21 ];
let Latency = 75;
}
def P10W_DV_83C : SchedWriteRes<[P10_DV]> {
- let ResourceCycles = [ 33 ];
+ let ReleaseAtCycles = [ 33 ];
let Latency = 83;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index 5a8c1eb2b837..93399e5ddbca 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -6,383 +6,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the itinerary class data for the POWER7 processor.
+// This file defines the SchedModel for the POWER7 processor.
//
//===----------------------------------------------------------------------===//
-// Primary reference:
-// IBM POWER7 multicore server processor
-// B. Sinharoy, et al.
-// IBM J. Res. & Dev. (55) 3. May/June 2011.
-
-// Scheduling for the P7 involves tracking two types of resources:
-// 1. The dispatch bundle slots
-// 2. The functional unit resources
-
-// Dispatch units:
-def P7_DU1 : FuncUnit;
-def P7_DU2 : FuncUnit;
-def P7_DU3 : FuncUnit;
-def P7_DU4 : FuncUnit;
-def P7_DU5 : FuncUnit;
-def P7_DU6 : FuncUnit;
-
-def P7_LS1 : FuncUnit; // Load/Store pipeline 1
-def P7_LS2 : FuncUnit; // Load/Store pipeline 2
-
-def P7_FX1 : FuncUnit; // FX pipeline 1
-def P7_FX2 : FuncUnit; // FX pipeline 2
-
-// VS pipeline 1 (vector integer ops. always here)
-def P7_VS1 : FuncUnit; // VS pipeline 1
-// VS pipeline 2 (128-bit stores and perms. here)
-def P7_VS2 : FuncUnit; // VS pipeline 2
-
-def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
-def P7_BRU : FuncUnit; // BR unit
-
-// Notes:
-// Each LSU pipeline can also execute FX add and logical instructions.
-// Each LSU pipeline can complete a load or store in one cycle.
-//
-// Each store is broken into two parts, AGEN goes to the LSU while a
-// "data steering" op. goes to the FXU or VSU.
-//
-// FX loads have a two cycle load-to-use latency (so one "bubble" cycle).
-// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle).
-//
-// Frequent FX ops. take only one cycle and results can be used again in the
-// next cycle (there is a self-bypass). Getting results from the other FX
-// pipeline takes an additional cycle.
-//
-// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles
-// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops.
-// Dispatch of an instruction to VS1 that uses four single prec. inputs
-// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any
-// floating point instruction.
-//
-// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles
-// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline
-// (unlike on the POWER6).
-//
-// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP
-// share the same write-back, and have a 5-cycle latency difference, so the
-// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP
-// op. has been dispatched to VS1.
-//
-// Three cycles after an L1 cache hit, a dependent VSU instruction can issue.
-//
-// Instruction dispatch groups have (at most) four non-branch instructions, and
-// two branches. Unlike on the POWER4/5, a branch does not automatically
-// end the dispatch group, but a second branch must be the last in the group.
-
-def P7Itineraries : ProcessorItineraries<
- [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6,
- P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [
- InstrItinData<IIC_IntSimple , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2,
- P7_LS1, P7_LS2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntISEL, [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_FX1, P7_FX2], 0>,
- InstrStage<1, [P7_BRU]>],
- [1, 1, 1, 1]>,
- InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1, 1]>,
- // FIXME: Add record-form itinerary data.
- InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<36, [P7_FX1, P7_FX2]>],
- [36, 1, 1]>,
- InstrItinData<IIC_IntDivD , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<68, [P7_FX1, P7_FX2]>],
- [68, 1, 1]>,
- InstrItinData<IIC_IntMulHW , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [4, 1, 1]>,
- InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [4, 1, 1]>,
- InstrItinData<IIC_IntMulHD , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [4, 1, 1]>,
- InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [4, 1, 1]>,
- InstrItinData<IIC_IntRotate , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntRotateD , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntTrapW , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1]>,
- InstrItinData<IIC_IntTrapD , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1]>,
- InstrItinData<IIC_BrB , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
- InstrStage<1, [P7_BRU]>],
- [3, 1, 1]>,
- InstrItinData<IIC_BrCR , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_CRU]>],
- [3, 1, 1]>,
- InstrItinData<IIC_BrMCR , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
- InstrStage<1, [P7_BRU]>],
- [3, 1, 1]>,
- InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
- InstrStage<1, [P7_BRU]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLoad , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>],
- [2, 1, 1]>,
- InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [2, 2, 1, 1]>,
- InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_DU3], 0>,
- InstrStage<1, [P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [3, 3, 1, 1]>,
- InstrItinData<IIC_LdStLD , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>],
- [2, 1, 1]>,
- InstrItinData<IIC_LdStLDU , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [2, 2, 1, 1]>,
- InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_DU3], 0>,
- InstrStage<1, [P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [3, 3, 1, 1]>,
- InstrItinData<IIC_LdStLFD , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [3, 3, 1, 1]>,
- InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [3, 3, 1, 1]>,
- InstrItinData<IIC_LdStLHA , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [4, 4, 1, 1]>,
- InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_DU3], 0>,
- InstrStage<1, [P7_DU4], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [4, 4, 1, 1]>,
- InstrItinData<IIC_LdStLWA , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_DU3], 0>,
- InstrStage<1, [P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_DU3], 0>,
- InstrStage<1, [P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLMW , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>],
- [2, 1, 1]>,
- InstrItinData<IIC_LdStStore , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTD , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTU , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [2, 1, 1, 1]>,
- InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_DU3], 0>,
- InstrStage<1, [P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2]>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [2, 1, 1, 1]>,
- InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_FX1, P7_FX2], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [2, 1, 1, 1]>,
- InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2], 0>,
- InstrStage<1, [P7_VS2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_DU3], 0>,
- InstrStage<1, [P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_DU3], 0>,
- InstrStage<1, [P7_DU4], 0>,
- InstrStage<1, [P7_LS1, P7_LS2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_DU2], 0>,
- InstrStage<1, [P7_DU3], 0>,
- InstrStage<1, [P7_DU4], 0>,
- InstrStage<1, [P7_CRU]>,
- InstrStage<1, [P7_FX1, P7_FX2]>],
- [3, 1]>, // mtcr
- InstrItinData<IIC_SprMFCR , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_CRU]>],
- [6, 1]>,
- InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_CRU]>],
- [3, 1]>,
- InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_FX1]>],
- [4, 1]>, // mtctr
- InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [5, 1, 1]>,
- InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [5, 1, 1]>,
- InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [8, 1, 1]>,
- InstrItinData<IIC_FPDivD , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [33, 1, 1]>,
- InstrItinData<IIC_FPDivS , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [27, 1, 1]>,
- InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [44, 1, 1]>,
- InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [32, 1, 1]>,
- InstrItinData<IIC_FPFused , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [5, 1, 1, 1]>,
- InstrItinData<IIC_FPRes , [InstrStage<1, [P7_DU1, P7_DU2,
- P7_DU3, P7_DU4], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [5, 1, 1]>,
- InstrItinData<IIC_VecGeneral , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_VS1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VecVSL , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_VS1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VecVSR , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_VS1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VecFP , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [6, 1, 1]>,
- InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [6, 1, 1]>,
- InstrItinData<IIC_VecFPRound , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_VS1, P7_VS2]>],
- [6, 1, 1]>,
- InstrItinData<IIC_VecComplex , [InstrStage<1, [P7_DU1], 0>,
- InstrStage<1, [P7_VS1]>],
- [7, 1, 1]>,
- InstrItinData<IIC_VecPerm , [InstrStage<1, [P7_DU1, P7_DU2], 0>,
- InstrStage<1, [P7_VS2]>],
- [3, 1, 1]>
-]>;
-
-// ===---------------------------------------------------------------------===//
-// P7 machine model for scheduling and other instruction cost heuristics.
-
def P7Model : SchedMachineModel {
let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle.
// Note that the dispatch bundle size is 6 (including
@@ -394,11 +21,295 @@ def P7Model : SchedMachineModel {
// Itineraries are queried instead.
let MispredictPenalty = 16;
+ let MicroOpBufferSize = 44;
+
// Try to make sure we have at least 10 dispatch groups in a loop.
let LoopMicroOpBufferSize = 40;
let CompleteModel = 0;
- let Itineraries = P7Itineraries;
+ let UnsupportedFeatures = [HasSPE, PrefixInstrs, MMA,
+ PairedVectorMemops, IsISA3_0, IsISA2_07,
+ PCRelativeMemops, IsISA3_1, IsISAFuture];
}
+let SchedModel = P7Model in {
+ def P7_LSU_FXU: ProcResource<4>;
+ def P7_LSU: ProcResource<2> {
+ let Super = P7_LSU_FXU;
+ }
+ def P7_FXU: ProcResource<2> {
+ let Super = P7_LSU_FXU;
+ }
+ // Implemented as two 2-way SIMD operations for double- and single-precision.
+ def P7_FPU: ProcResource<4>;
+ // Scalar binary floating point instructions can only use two FPUs.
+ def P7_ScalarFPU: ProcResource<2> {
+ let Super = P7_FPU;
+ }
+ def P7_VectorFPU: ProcResource<2> {
+ let Super = P7_FPU;
+ }
+ // Executing simple FX, complex FX, permute and 4-way SIMD single-precision FP ops
+ def P7_VMX: ProcResource<1>;
+ def P7_VPM: ProcResource<1> {
+ let Super = P7_VMX;
+ let BufferSize = 1;
+ }
+ def P7_VXS: ProcResource<1> {
+ let Super = P7_VMX;
+ }
+ def P7_DFU: ProcResource<1>;
+ def P7_BRU: ProcResource<1>;
+ def P7_CRU: ProcResource<1>;
+
+ def P7_PORT_LS : ProcResource<2>;
+ def P7_PORT_FX : ProcResource<2>;
+ def P7_PORT_FP : ProcResource<2>;
+ def P7_PORT_BR : ProcResource<1>;
+ def P7_PORT_CR : ProcResource<1>;
+
+ def P7_DISP_LS : SchedWriteRes<[P7_PORT_LS]>;
+ def P7_DISP_FX : SchedWriteRes<[P7_PORT_FX]>;
+ def P7_DISP_FP : SchedWriteRes<[P7_PORT_FP]>;
+ def P7_DISP_BR : SchedWriteRes<[P7_PORT_BR]>;
+ def P7_DISP_CR : SchedWriteRes<[P7_PORT_CR]>;
+
+ def P7_BRU_NONE : SchedWriteRes<[P7_BRU]>;
+ def P7_BRU_3C : SchedWriteRes<[P7_BRU]> { let Latency = 3; }
+ def P7_BRU_4C : SchedWriteRes<[P7_BRU]> { let Latency = 4; }
+ def P7_CRU_NONE : SchedWriteRes<[P7_CRU]>;
+ def P7_CRU_3C : SchedWriteRes<[P7_CRU]> { let Latency = 3; }
+ def P7_CRU_6C : SchedWriteRes<[P7_CRU]> { let Latency = 6; }
+ def P7_LSU_NONE : SchedWriteRes<[P7_LSU]>;
+ def P7_LSU_2C : SchedWriteRes<[P7_LSU]> { let Latency = 2; }
+ def P7_LSU_3C : SchedWriteRes<[P7_LSU]> { let Latency = 3; }
+ def P7_LSU_4C : SchedWriteRes<[P7_LSU]> { let Latency = 4; }
+ def P7_FXU_NONE : SchedWriteRes<[P7_FXU]>;
+ def P7_FXU_2C : SchedWriteRes<[P7_FXU]> { let Latency = 2; }
+ def P7_FXU_3C : SchedWriteRes<[P7_FXU]> { let Latency = 3; }
+ def P7_FXU_4C : SchedWriteRes<[P7_FXU]> { let Latency = 4; }
+ def P7_FXU_5C : SchedWriteRes<[P7_FXU]> { let Latency = 5; }
+ def P7_FXU_38C : SchedWriteRes<[P7_FXU]> { let Latency = 38; }
+ def P7_FXU_69C : SchedWriteRes<[P7_FXU]> { let Latency = 69; }
+ def P7_LSU_FXU_2C : SchedWriteRes<[P7_LSU_FXU]> { let Latency = 2; }
+ def P7_FPU_NONE : SchedWriteRes<[P7_FPU]>;
+ def P7_VectorFPU_6C : SchedWriteRes<[P7_VectorFPU]> { let Latency = 6; }
+ def P7_VectorFPU_25C : SchedWriteRes<[P7_VectorFPU]> { let Latency = 25; }
+ def P7_VectorFPU_30C : SchedWriteRes<[P7_VectorFPU]> { let Latency = 30; }
+ def P7_VectorFPU_31C : SchedWriteRes<[P7_VectorFPU]> { let Latency = 31; }
+ def P7_VectorFPU_42C : SchedWriteRes<[P7_VectorFPU]> { let Latency = 42; }
+ def P7_ScalarFPU_6C : SchedWriteRes<[P7_ScalarFPU]> { let Latency = 6; }
+ def P7_ScalarFPU_8C : SchedWriteRes<[P7_ScalarFPU]> { let Latency = 8; }
+ def P7_ScalarFPU_27C : SchedWriteRes<[P7_ScalarFPU]> { let Latency = 27; }
+ def P7_ScalarFPU_31C : SchedWriteRes<[P7_ScalarFPU]> { let Latency = 31; }
+ def P7_ScalarFPU_32C : SchedWriteRes<[P7_ScalarFPU]> { let Latency = 32; }
+ def P7_ScalarFPU_33C : SchedWriteRes<[P7_ScalarFPU]> { let Latency = 33; }
+ def P7_ScalarFPU_42C : SchedWriteRes<[P7_ScalarFPU]> { let Latency = 42; }
+ def P7_ScalarFPU_44C : SchedWriteRes<[P7_ScalarFPU]> { let Latency = 44; }
+ def P7_VXS_2C : SchedWriteRes<[P7_VXS]> { let Latency = 2; }
+ def P7_VPM_3C : SchedWriteRes<[P7_VPM]> { let Latency = 3; }
+
+ // Instruction of BRU pipeline
+
+ def : InstRW<[P7_BRU_NONE, P7_DISP_BR],
+ (instregex "^B(L)?(A)?(8)?(_NOP|_NOTOC)?(_TLS|_RM)?(_)?$")>;
+
+ def : InstRW<[P7_BRU_3C, P7_DISP_BR], (instrs
+ BDZLRLp, BDZLRm, BDZLRp, BDZLm, BDZLp, BDZm, BDZp,
+ BDNZ, BDNZ8, BDNZA, BDNZAm, BDNZAp, BDNZL, BDNZLA, BDNZLAm, BDNZLAp, BDNZLR,
+ BDNZLR8, BDNZLRL, BDNZLRLm, BDNZLRLp, BDNZLRm, BDNZLRp, BDNZLm, BDNZLp,
+ BDNZm, BDNZp, BDZ, BDZ8, BDZA, BDZAm, BDZAp, BDZL, BDZLA, BDZLAm, BDZLAp,
+ BDZLR, BDZLR8, BDZLRL, BDZLRLm, BLR, BLR8, BLRL, BCL, BCLR, BCLRL, BCLRLn,
+ BCLRn, BCLalways, BCLn, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc,
+ BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM,
+ BCTRL_RM, BCn, BC, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL,
+ BCCLA, BCCLR, BCCLRL, BCCTR, BCCTR8, BCCTR8n, BCCTRL, BCCTRL8,
+ BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, BCCTR,
+ BCCTR8, BCCTR8n, BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, BCCTRn, gBC, gBCA,
+ gBCAat, gBCCTR, gBCCTRL, gBCL, gBCLA, gBCLAat, gBCLR, gBCLRL, gBCLat, gBCat,
+ MFCTR, MFCTR8, MFLR, MFLR8
+ )>;
+
+ def : InstRW<[P7_BRU_4C], (instrs MTLR, MTLR8, MTCTR, MTCTR8, MTCTR8loop, MTCTRloop)>;
+
+ // Instructions of CRU pipeline
+
+ def : InstRW<[P7_CRU_NONE], (instrs MFCR, MFCR8)>;
+ def : InstRW<[P7_CRU_3C], (instrs MCRF)>;
+ def : InstRW<[P7_CRU_6C, P7_DISP_CR], (instrs
+ CR6SET, CR6UNSET, CRSET, CRUNSET,
+ CRAND, CRANDC, CREQV, CRNAND, CRNOR, CRNOT, CROR, CRORC
+ )>;
+
+ // Instructions of LSU and FXU pipelines
+
+ def : InstRW<[P7_LSU_NONE, P7_DISP_LS], (instrs LMW, LWARX, LWARXL, LDARX, LDARXL)>;
+ def : InstRW<[P7_LSU_2C, P7_DISP_LS], (instrs LHBRX, LHBRX8, LWBRX, LWBRX8)>;
+ def : InstRW<[P7_LSU_3C], (instrs MFSR, MFSRIN)>;
+
+ def : InstRW<[P7_LSU_3C, P7_DISP_LS], (instrs
+ LFS, LFSX, LFSXTLS, LFSXTLS_, LFD, LFDX, LFDXTLS, LFDXTLS_, LXSDX, LXVD2X,
+ LXVW4X, LXVDSX
+ )>;
+
+ def : InstRW<[P7_LSU_3C, P7_FXU_3C, P7_DISP_LS], (instrs
+ LFSU, LFSUX, LFDU, LFDUX)>;
+
+ def : InstRW<[P7_LSU_NONE, P7_FPU_NONE, P7_DISP_LS], (instrs
+ STXSDX, STXVD2X, STXVW4X)>;
+
+ def : InstRW<[P7_LSU_4C, P7_FXU_4C, P7_DISP_LS], (instrs
+ LBARX, LBZCIX, LDBRX, LDCIX, LFIWAX, LFIWZX, LHARX, LHZCIX, LSWI, LVEBX,
+ LVEHX, LVEWX, LVSL, LVSR, LVX, LVXL, LWZCIX,
+ STFD, STFDU, STFDUX, STFDX, STFIWX, STFS, STFSU, STFSUX, STFSX,
+ STHCIX, STSWI, STVEBX, STVEHX, STVEWX, STVX, STVXL, STWCIX,
+ LHA, LHA8, LHAX, LHAX8, LWA, LWAX, LWAX_32, LWA_32, LHAU, LHAU8,
+ LHAUX, LHAUX8, LWAUX
+ )>;
+
+ def : InstRW<[P7_LSU_NONE, P7_FXU_NONE, P7_DISP_LS], (instrs
+ STB, STB8, STH, STH8, STW, STW8, STD, STBX, STBX8, STHX, STHX8, STWX,
+ STWX8, STDX, STHBRX, STWBRX, STMW, STWCX, STDCX, STDU, STHU, STHU8,
+ STBU, STBU8, STWU, STWU8, STDUX, STWUX, STWUX8, STHUX, STHUX8, STBUX, STBUX8
+ )>;
+
+ def : InstRW<[P7_LSU_2C, P7_FXU_2C, P7_DISP_LS], (instrs
+ LWZU, LWZU8, LHZU, LHZU8, LBZU, LBZU8, LDU,
+ LWZUX, LWZUX8, LHZUX, LHZUX8, LBZUX, LBZUX8, LDUX
+ )>;
+
+ def : InstRW<[P7_LSU_FXU_2C, P7_DISP_FX], (instrs
+ (instregex "^(ADD|L)I(S)?(8)?$"),
+ (instregex "^(ADD|SUBF)(4|8)(TLS)?(_)?(_rec)?$"),
+ (instregex "^(X)?ORI(S)?(8)?$"),
+ (instregex "^(X)OR(8)?(_rec)?$"),
+ ADDIC, ADDIC8, SUBFIC, SUBFIC8, SUBFZE, SUBFZE8,
+ ADDE, ADDE8, ADDME, ADDME8, SUBFME, SUBFME8,
+ NEG, NEG8, NEG8_rec, NEG_rec, NEG8O, NEGO,
+ ANDI_rec, ANDIS_rec, AND, AND8, AND_rec, AND8_rec,
+ NAND, NAND8, NAND_rec, NAND8_rec, NOR, NOR8, NOR_rec, NOR8_rec,
+ EQV, EQV8, EQV_rec, EQV8_rec, ANDC, ANDC8, ANDC_rec, ANDC8_rec,
+ ORC, ORC8, ORC_rec, ORC8_rec
+ )>;
+
+ def : InstRW<[P7_FXU_2C, P7_DISP_FX], (instrs
+ CMPD, CMPDI, CMPLD, CMPLDI, CMPLW, CMPLWI, CMPW, CMPWI,
+ EXTSB8_32_64, EXTSB8_rec, EXTSH8_32_64, EXTSH8_rec, EXTSW_32,
+ EXTSW_32_64, EXTSW_32_64_rec, POPCNTB, POPCNTB8, POPCNTD, POPCNTW,
+ ADDPCIS, ANDI8_rec, ANDIS8_rec, SUBFUS, SUBFUS_rec,
+ ADD4O, ADD8O, ADDC, ADDC8, SUBFO, SUBF8O, SUBFC, SUBFC8,
+ ADDIC_rec, ADDE8_rec, ADDE_rec, SUBFE8_rec, SUBFE_rec,
+ ADDME8_rec, ADDME_rec, SUBFME8_rec, SUBFME_rec, ADDZE8_rec, ADDZE_rec,
+ SUBFZE_rec, SUBFZE8_rec, ADD8O_rec, SUBFO_rec, SUBF8O_rec, ADD4O_rec,
+ ADD8O_rec, SUBF8O_rec, SUBFO_rec, ADDE8O, ADDEO, SUBFE8O, SUBFEO, ADDME8O,
+ ADDMEO, SUBFME8O, SUBFMEO, ADDZE8O, ADDZEO, SUBFZE8O, SUBFZEO, NEG8O_rec,
+ NEGO_rec, ADDEO, ADDE8O, SUBFEO, SUBFE8O, ADDMEO, SUBFMEO, SUBFME8O, ADDME8O,
+ ADDZEO, ADDZE8O, SUBFZEO, SUBFZE8O, NEG8O_rec, NEGO_rec,
+ ADDE8O_rec, ADDEO_rec, ADDMEO_rec, ADDME8O_rec, SUBFMEO_rec, SUBFME8O_rec,
+ ADDZEO_rec, ADDZE8O_rec, SUBFZEO_rec, SUBFZE8O_rec,
+ ADDC8_rec, ADDC_rec, ADDCO, ADDCO_rec, ADDC8O, ADDC8O_rec,
+ SUBFC8_rec, SUBFC_rec, SUBFCO, SUBFC8O, SUBFCO_rec, SUBFC8O_rec,
+ EXTSB, EXTSB8, EXTSB_rec, EXTSH, EXTSH8, EXTSH_rec, EXTSW, EXTSW_rec,
+ RLDICL, RLDICL_rec, RLDICR, RLDICR_rec, RLDIC, RLDIC_rec,
+ RLWINM, RLWINM8, RLWINM_rec, RLDCL, RLDCL_rec, RLDCR, RLDCR_rec,
+ RLWNM, RLWNM8, RLWNM_rec, RLDIMI, RLDIMI_rec,
+ RLDICL_32, RLDICL_32_64, RLDICL_32_rec, RLDICR_32, RLWINM8_rec, RLWNM8_rec,
+ SLD, SLD_rec, SLW, SLW8, SLW_rec, SLW8_rec, SRD, SRD_rec, SRW, SRW8, SRW_rec,
+ SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAD, SRAD_rec, SRAW, SRAW_rec,
+ SRADI_32, SUBFE, SUBFE8, SUBFE8O_rec, SUBFEO_rec
+ )>;
+
+ def : InstRW<[P7_FXU_3C, P7_DISP_FX], (instregex "^CNT(L|T)Z(D|W)(8)?(M)?(_rec)?$")>;
+
+ def : InstRW<[P7_FXU_5C, P7_DISP_FX], (instrs
+ MULLI, MULLI8, MULLW, MULHW, MULHWU, MULLD, MULHD, MULHDU, MULLWO, MULLDO,
+ MULLW_rec, MULLD_rec, MULHD_rec, MULHW_rec, MULHDU_rec, MULHWU_rec, MULLWO_rec,
+ MULLDO_rec
+ )>;
+
+ def : InstRW<[P7_FXU_38C, P7_DISP_FX], (instrs
+ DIVDE, DIVDEO, DIVDEO_rec, DIVDEU, DIVDEUO, DIVDEUO_rec, DIVDEU_rec, DIVDE_rec,
+ DIVWE, DIVWEO, DIVWEO_rec, DIVWEU, DIVWEUO, DIVWEUO_rec, DIVWEU_rec, DIVWE_rec,
+ DIVW, DIVWU, DIVWU_rec, DIVWO, DIVWO_rec, DIVWUO, DIVWUO_rec, DIVW_rec
+ )>;
+
+ def : InstRW<[P7_FXU_69C, P7_DISP_FX], (instrs
+ DIVD, DIVDU, DIVDO, DIVDO_rec, DIVDUO, DIVDUO_rec, DIVDU_rec, DIVD_rec)>;
+
+ // Instructions of FPU and VMX pipeline
+
+ def : InstRW<[P7_ScalarFPU_6C, P7_DISP_FP], (instrs
+ (instregex "^F(N)?(M)?(R|ADD|SUB|ABS|NEG|NABS|UL)(D|S)?(_rec)?$"),
+ (instregex "^FC(T|F)I(D|W)(U)?(S)?(Z)?(_rec)?$"),
+ (instregex "^XS(N)?M(SUB|ADD)(A|M)(D|S)P$"),
+ (instregex "^XS(NEG|ABS|NABS|ADD|SUB|MUL)(D|S)P(s)?$"),
+ FRE, FRES_rec, FRE_rec, FRSP_rec, FTDIV, FTSQRT,
+ FRSP, FRES, FRSQRTE, FRSQRTES, FRSQRTES_rec, FRSQRTE_rec, FSELD, FSELS,
+ FSELD_rec, FSELS_rec, FCPSGND, FCPSGND_rec, FCPSGNS, FCPSGNS_rec,
+ FRIMD, FRIMD_rec, FRIMS, FRIMS_rec, FRIND, FRIND_rec, FRINS, FRINS_rec,
+ FRIPD, FRIPD_rec, FRIPS, FRIPS_rec, FRIZD, FRIZD_rec, FRIZS, FRIZS_rec,
+ XSCPSGNDP, XSCVDPSP, XSCVDPSXDS, XSCVDPSXDSs, XSCVDPSXWS, XSCVDPSXWSs,
+ XSCVDPUXDS, XSCVDPUXDSs, XSCVDPUXWS, XSCVDPUXWSs, XSCVSPDP, XSCVSXDDP,
+ XSCVUXDDP, XSMAXDP, XSMINDP, XSRDPI, XSRDPIC, XSRDPIM, XSRDPIP, XSRDPIZ,
+ XSREDP, XSRSQRTEDP, XSTDIVDP, XSTSQRTDP, XSCMPODP, XSCMPUDP
+ )>;
+
+ def : InstRW<[P7_VectorFPU_6C, P7_DISP_FP], (instrs
+ (instregex "^XV(N)?(M)?(ADD|SUB)(A|M)?(D|S)P$"),
+ (instregex "^XV(MAX|MIN|MUL|NEG|ABS|ADD|NABS)(D|S)P$"),
+ XVCMPEQDP, XVCMPEQDP_rec, XVCMPGEDP, XVCMPGEDP_rec, XVCMPGTDP, XVCMPGTDP_rec,
+ XVCPSGNDP, XVCVDPSXDS, XVCVDPSXWS, XVCVDPUXDS, XVCVDPUXWS, XVCVSPSXDS,
+ XVCVSPSXWS, XVCVSPUXDS, XVCVSPUXWS, XVCVSXDDP, XVCVSXWDP, XVCVUXDDP,
+ XVCVUXWDP, XVRDPI, XVRDPIC, XVRDPIM, XVRDPIP, XVRDPIZ, XVREDP,
+ XVRSPI, XVRSPIC, XVRSPIM, XVRSPIP, XVRSPIZ, XVRSQRTEDP, XVTDIVDP,
+ XVTSQRTDP
+ )>;
+
+ // TODO: Altivec instructions are not listed in Book IV.
+ def : InstRW<[P7_VPM_3C, P7_DISP_FP], (instrs
+ (instregex "^VPK(S|U)(H|W)(S|U)(S|M)$"),
+ (instregex "^VUPK(H|L)(S|P)(X|B|H)$"),
+ VPERM, XXMRGHW, XXMRGLW, XXPERMDI, XXPERMDIs, XXSLDWI, XXSLDWIs,
+ VSPLTB, VSPLTBs, VSPLTH, VSPLTHs, VSPLTISB, VSPLTISH, VSPLTISW, VSPLTW,
+ XXSPLTW, XXSPLTWs, VSEL, XXSEL, VPKPX
+ )>;
+
+ def : InstRW<[P7_VXS_2C, P7_DISP_FP], (instrs
+ (instregex "^VADD(U|S)(B|H|W)(S|M)$"),
+ (instregex "^V(MAX|MIN)(S|U)(B|H|W)$"),
+ (instregex "^V(MRG)(L|H)(B|H|W)$"),
+ XXLORf, XXLXORdpz, XXLXORspz, XXLXORz, XVRSQRTESP, XVRESP,
+ XVTDIVSP, XVTSQRTSP, XVCMPEQSP, XVCMPEQSP_rec, XVCMPGESP, XVCMPGESP_rec,
+ XVCMPGTSP, XVCMPGTSP_rec, XVCVSXDSP, XVCVSXWSP, XVCVUXDSP, XVCVUXWSP,
+ XVCPSGNSP, XVCVDPSP, VADDCUW, VADDFP, VAND, VANDC, VAVGSB, VAVGSH,
+ VAVGSW, VAVGUB, VAVGUH, VAVGUW, VCFSX, VCFUX, VCMPBFP, VCMPBFP_rec,
+ VCMPEQFP, VCMPEQFP_rec, VCMPEQUB, VCMPEQUB_rec, VCMPEQUH, VCMPEQUH_rec,
+ VCMPEQUW, VCMPEQUW_rec, VCMPGEFP, VCMPGEFP_rec, VCMPGTFP, VCMPGTFP_rec,
+ VCMPGTSB, VCMPGTSB_rec, VCMPGTSH, VCMPGTSH_rec, VCMPGTSW, VCMPGTSW_rec,
+ VCMPGTUB, VCMPGTUB_rec, VCMPGTUH, VCMPGTUH_rec, VCMPGTUW, VCMPGTUW_rec,
+ VCTSXS, VCTUXS, VEXPTEFP, VLOGEFP, VNOR, VOR,
+ VMADDFP, VMHADDSHS, VMHRADDSHS, VMLADDUHM, VNMSUBFP, VMAXFP, VMINFP,
+ VMSUMMBM, VMSUMSHM, VMSUMSHS, VMSUMUBM, VMSUMUDM, VMSUMUHM, VMSUMUHS,
+ VMULESB, VMULESH, VMULEUB, VMULEUH, VMULOSB, VMULOSH, VMULOUB, VMULOUH,
+ VREFP, VRFIM, VRFIN, VRFIP, VRFIZ, VRLB, VRLH, VRLW, VRSQRTEFP,
+ VSR, VSRAB, VSRAH, VSRAW, VSRB, VSRH, VSRO, VSRW, VSUBCUW, VSL, VSLB,
+ VSLDOI, VSLH, VSLO, VSLW, VSUBFP, VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBM,
+ VSUBUBS, VSUBUHM, VSUBUHS, VSUBUWM, VSUBUWS, VSUM2SWS, VSUM4SBS, VSUM4SHS,
+ VSUM4UBS, VSUMSWS, VXOR, XXLAND, XXLANDC, XXLNOR, XXLOR, XXLXOR
+ )>;
+
+ def : InstRW<[P7_ScalarFPU_8C, P7_DISP_FP],
+ (instrs FCMPOD, FCMPOS, FCMPUD, FCMPUS)>;
+ def : InstRW<[P7_ScalarFPU_27C, P7_DISP_FP], (instrs FDIVS, FDIVS_rec)>;
+ def : InstRW<[P7_ScalarFPU_31C, P7_DISP_FP], (instrs XSDIVDP)>;
+ def : InstRW<[P7_ScalarFPU_32C, P7_DISP_FP], (instrs FSQRTS, XSSQRTSP, FSQRTS_rec)>;
+ def : InstRW<[P7_ScalarFPU_33C, P7_DISP_FP], (instrs FDIV, FDIV_rec)>;
+ def : InstRW<[P7_ScalarFPU_42C, P7_DISP_FP], (instrs XSSQRTDP)>;
+ def : InstRW<[P7_ScalarFPU_44C, P7_DISP_FP], (instrs FSQRT, FSQRT_rec)>;
+
+ def : InstRW<[P7_VectorFPU_25C, P7_DISP_FP], (instrs XVDIVSP)>;
+ def : InstRW<[P7_VectorFPU_30C, P7_DISP_FP], (instrs XVSQRTSP)>;
+ def : InstRW<[P7_VectorFPU_31C, P7_DISP_FP], (instrs XVDIVDP)>;
+ def : InstRW<[P7_VectorFPU_42C, P7_DISP_FP], (instrs XVSQRTDP)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 70a58f42a98a..3a2d9d9b3bc1 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -6,408 +6,332 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the itinerary class data for the POWER8 processor.
+// This file defines the SchedModel for the POWER8 processor.
//
//===----------------------------------------------------------------------===//
-// Scheduling for the P8 involves tracking two types of resources:
-// 1. The dispatch bundle slots
-// 2. The functional unit resources
-
-// Dispatch units:
-def P8_DU1 : FuncUnit;
-def P8_DU2 : FuncUnit;
-def P8_DU3 : FuncUnit;
-def P8_DU4 : FuncUnit;
-def P8_DU5 : FuncUnit;
-def P8_DU6 : FuncUnit;
-def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8
-def P8_DU8 : FuncUnit;
-
-// 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
-
-def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1
-def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2
-
-// Load/Store pipelines can handle Stores, fixed-point loads, and simple
-// fixed-point operations.
-def P8_LSU1 : FuncUnit; // Load/Store pipeline 1
-def P8_LSU2 : FuncUnit; // Load/Store pipeline 2
-
-// Fixed Point unit
-def P8_FXU1 : FuncUnit; // FX pipeline 1
-def P8_FXU2 : FuncUnit; // FX pipeline 2
-
-// The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
-// are combined on P7 and newer into a Vector Scalar Unit (VSU).
-// The P8 Instruction latency documents still refers to the unit as the
-// FPU, so keep in mind that FPU==VSU.
-// In contrast to the P7, the VMX units on P8 are symmetric, so no need to
-// split vector integer ops or 128-bit load/store/perms to the specific units.
-def P8_FPU1 : FuncUnit; // VS pipeline 1
-def P8_FPU2 : FuncUnit; // VS pipeline 2
-
-def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
-def P8_BRU : FuncUnit; // BR unit
-
-def P8Itineraries : ProcessorItineraries<
- [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
- P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
- P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
- InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2,
- P8_LU1, P8_LU2,
- P8_LSU1, P8_LSU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
- P8_LU2, P8_LSU1, P8_LSU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntISEL, [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
- InstrStage<1, [P8_BRU]>],
- [1, 1, 1, 1]>,
- InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<15, [P8_FXU1, P8_FXU2]>],
- [15, 1, 1]>,
- InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<23, [P8_FXU1, P8_FXU2]>],
- [23, 1, 1]>,
- InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [4, 1, 1]>,
- InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [4, 1, 1]>,
- InstrItinData<IIC_IntMulHD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [4, 1, 1]>,
- InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [4, 1, 1]>,
- InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [1, 1]>,
- InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [1, 1]>,
- InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
- InstrStage<1, [P8_BRU]>],
- [3, 1, 1]>,
- // FIXME - the Br* groups below are not branch related, so should probably
- // be renamed.
- // IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc).
- // and should be 'First' in dispatch.
- InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_CRU]>],
- [3, 1, 1]>,
- // IIC_BrMCR consists of the mcrf instruction.
- InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_CRU]>],
- [3, 1, 1]>,
- // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
- // should be first in the dispatch group.
- InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [3, 1]>,
- InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2]>],
- [2, 1, 1]>,
- InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2 ], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [2, 2, 1, 1]>,
- // Update-Indexed form loads/stores are no longer first and last in the
- // dispatch group. They are simply cracked, so require DU1,DU2.
- InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [3, 3, 1, 1]>,
- InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2]>],
- [2, 1, 1]>,
- InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [2, 2, 1, 1]>,
- InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [3, 3, 1, 1]>,
- InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_LU1, P8_LU2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_LU1, P8_LU2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LU1, P8_LU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [3, 3, 1, 1]>,
- InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LU1, P8_LU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [3, 3, 1, 1]>,
- InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2,
- P8_LU1, P8_LU2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [4, 4, 1, 1]>,
- // first+last in dispatch group.
- InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_DU3], 0>,
- InstrStage<1, [P8_DU4], 0>,
- InstrStage<1, [P8_DU5], 0>,
- InstrStage<1, [P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [4, 4, 1, 1]>,
- InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2]>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_DU3], 0>,
- InstrStage<1, [P8_DU4], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2]>],
- [3, 1, 1]>,
- // first+last
- InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_DU3], 0>,
- InstrStage<1, [P8_DU4], 0>,
- InstrStage<1, [P8_DU5], 0>,
- InstrStage<1, [P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2]>],
- [3, 1, 1]>,
- InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2,
- P8_LU1, P8_LU2]>],
- [2, 1, 1]>,
-// Stores are dual-issued from the issue queue, so may only take up one
-// dispatch slot. The instruction will be broken into two IOPS. The agen
-// op is issued to the LSU, and the data op (register fetch) is issued
-// to either the LU (GPR store) or the VSU (FPR store).
- InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2]>,
- InstrStage<1, [P8_LU1, P8_LU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_LU1, P8_LU2,
- P8_LSU1, P8_LSU2]>]
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTU , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LU1, P8_LU2,
- P8_LSU1, P8_LSU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [2, 1, 1, 1]>,
- // First+last
- InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_DU3], 0>,
- InstrStage<1, [P8_DU4], 0>,
- InstrStage<1, [P8_DU5], 0>,
- InstrStage<1, [P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [2, 1, 1, 1]>,
- InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [2, 1, 1, 1]>,
- InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_DU3], 0>,
- InstrStage<1, [P8_DU4], 0>,
- InstrStage<1, [P8_DU5], 0>,
- InstrStage<1, [P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
- InstrStage<1, [P8_LU1, P8_LU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_DU2], 0>,
- InstrStage<1, [P8_DU3], 0>,
- InstrStage<1, [P8_DU4], 0>,
- InstrStage<1, [P8_DU5], 0>,
- InstrStage<1, [P8_DU6], 0>,
- InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
- InstrStage<1, [P8_LU1, P8_LU2]>],
- [1, 1, 1]>,
- InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_CRU]>],
- [6, 1]>,
- InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_CRU]>],
- [3, 1]>,
- InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FXU1, P8_FXU2]>],
- [4, 1]>, // mtctr
- InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [5, 1, 1]>,
- InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [5, 1, 1]>,
- InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [8, 1, 1]>,
- InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [33, 1, 1]>,
- InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [27, 1, 1]>,
- InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [44, 1, 1]>,
- InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [32, 1, 1]>,
- InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [5, 1, 1, 1]>,
- InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
- P8_DU4, P8_DU5, P8_DU6], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [5, 1, 1]>,
- InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [6, 1, 1]>,
- InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [6, 1, 1]>,
- InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [6, 1, 1]>,
- InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [7, 1, 1]>,
- InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
- InstrStage<1, [P8_FPU1, P8_FPU2]>],
- [3, 1, 1]>
-]>;
-
-// ===---------------------------------------------------------------------===//
-// P8 machine model for scheduling and other instruction cost heuristics.
-// P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
-// to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
-
def P8Model : SchedMachineModel {
- let IssueWidth = 8; // up to 8 instructions dispatched per cycle.
- // up to six non-branch instructions.
- // up to two branches in a dispatch group.
-
- let LoadLatency = 3; // Optimistic load latency assuming bypass.
- // This is overriden by OperandCycles if the
- // Itineraries are queried instead.
+ let IssueWidth = 8;
+ let LoadLatency = 3;
let MispredictPenalty = 16;
-
- // Try to make sure we have at least 10 dispatch groups in a loop.
let LoopMicroOpBufferSize = 60;
-
+ let MicroOpBufferSize = 64;
+ // TODO: Due to limitation of instruction definitions, non-P8 instructions
+ // are required to be listed here. Change this after it got fixed.
let CompleteModel = 0;
-
- let Itineraries = P8Itineraries;
+ let UnsupportedFeatures = [HasSPE, PrefixInstrs, MMA,
+ PairedVectorMemops, PCRelativeMemops,
+ IsISA3_0, IsISA3_1, IsISAFuture];
}
+let SchedModel = P8Model in {
+ // Power8 Pipeline Units:
+
+ def P8_LU_LS_FX : ProcResource<6>;
+ def P8_LU_LS : ProcResource<4> { let Super = P8_LU_LS_FX; }
+ def P8_LS : ProcResource<2> { let Super = P8_LU_LS; }
+ def P8_LU : ProcResource<2> { let Super = P8_LU_LS; }
+ def P8_FX : ProcResource<2> { let Super = P8_LU_LS_FX; }
+ def P8_DFU : ProcResource<1>;
+ def P8_BR : ProcResource<1> { let BufferSize = 16; }
+ def P8_CY : ProcResource<1>;
+ def P8_CRL : ProcResource<1>;
+ def P8_VMX : ProcResource<2>;
+ def P8_PM : ProcResource<2> {
+ // This is workaround for scheduler to respect latency of long permute chain.
+ let BufferSize = 1;
+ let Super = P8_VMX;
+ }
+ def P8_XS : ProcResource<2> { let Super = P8_VMX; }
+ def P8_VX : ProcResource<2> { let Super = P8_VMX; }
+ def P8_FPU : ProcResource<4>;
+ // Units for scalar, 2xDouble and 4xSingle
+ def P8_FP_Scal : ProcResource<2> { let Super = P8_FPU; }
+ def P8_FP_2x64 : ProcResource<2> { let Super = P8_FPU; }
+ def P8_FP_4x32 : ProcResource<2> { let Super = P8_FPU; }
+
+ // Power8 Dispatch Ports:
+ // Two ports to do loads or fixed-point operations.
+ // Two ports to do stores, fixed-point loads, or fixed-point operations.
+ // Two ports for fixed-point operations.
+ // Two issue ports shared by 2 DFP/2 VSX/2 VMX/1 CY/1 DFP operations.
+ // One for branch operations.
+ // One for condition register operations.
+
+ // TODO: Model dispatch of cracked instructions.
+
+ // Six ports in total are available for fixed-point operations.
+ def P8_PORT_ALLFX : ProcResource<6>;
+ // Four ports in total are available for fixed-point load operations.
+ def P8_PORT_FXLD : ProcResource<4> { let Super = P8_PORT_ALLFX; }
+ // Two ports to do loads or fixed-point operations.
+ def P8_PORT_LD_FX : ProcResource<2> { let Super = P8_PORT_FXLD; }
+ // Two ports to do stores, fixed-point loads, or fixed-point operations.
+ def P8_PORT_ST_FXLD_FX : ProcResource<2> { let Super = P8_PORT_FXLD; }
+ // Two issue ports shared by two floating-point, two VSX, two VMX, one crypto,
+ // and one DFP operations.
+ def P8_PORT_VMX_FP : ProcResource<2>;
+ // One port for branch operation.
+ def P8_PORT_BR : ProcResource<1>;
+ // One port for condition register operation.
+ def P8_PORT_CR : ProcResource<1>;
+
+ def P8_ISSUE_FX : SchedWriteRes<[P8_PORT_ALLFX]>;
+ def P8_ISSUE_FXLD : SchedWriteRes<[P8_PORT_FXLD]>;
+ def P8_ISSUE_LD : SchedWriteRes<[P8_PORT_LD_FX]>;
+ def P8_ISSUE_ST : SchedWriteRes<[P8_PORT_ST_FXLD_FX]>;
+ def P8_ISSUE_VSX : SchedWriteRes<[P8_PORT_VMX_FP]>;
+ def P8_ISSUE_BR : SchedWriteRes<[P8_PORT_BR]>;
+ def P8_ISSUE_CR : SchedWriteRes<[P8_PORT_CR]>;
+
+ // Power8 Instruction Latency & Port Groups:
+
+ def P8_LS_LU_NONE : SchedWriteRes<[P8_LU, P8_LS]>;
+ def P8_LS_FP_NONE : SchedWriteRes<[P8_LS, P8_FPU]>;
+ def P8_LU_or_LS_3C : SchedWriteRes<[P8_LU_LS]> { let Latency = 3; }
+ def P8_LS_FX_3C : SchedWriteRes<[P8_LS, P8_FX]> { let Latency = 3; }
+ def P8_LU_or_LS_or_FX_2C : SchedWriteRes<[P8_LU_LS_FX]> { let Latency = 2; }
+ def P8_LU_or_LS_FX_3C : SchedWriteRes<[P8_LU_LS, P8_FX]> { let Latency = 3; }
+ def P8_FX_NONE : SchedWriteRes<[P8_FX]>;
+ def P8_FX_1C : SchedWriteRes<[P8_FX]> { let Latency = 1; }
+ def P8_FX_2C : SchedWriteRes<[P8_FX]> { let Latency = 2; }
+ def P8_FX_3C : SchedWriteRes<[P8_FX]> { let Latency = 3; }
+ def P8_FX_5C : SchedWriteRes<[P8_FX]> { let Latency = 5; }
+ def P8_FX_10C : SchedWriteRes<[P8_FX]> { let Latency = 10; }
+ def P8_FX_23C : SchedWriteRes<[P8_FX]> { let Latency = 23; }
+ def P8_FX_15C : SchedWriteRes<[P8_FX]> { let Latency = 15; }
+ def P8_FX_41C : SchedWriteRes<[P8_FX]> { let Latency = 41; }
+ def P8_BR_2C : SchedWriteRes<[P8_BR]> { let Latency = 2; }
+ def P8_CR_NONE : SchedWriteRes<[P8_CRL]>;
+ def P8_CR_3C : SchedWriteRes<[P8_CRL]> { let Latency = 3; }
+ def P8_CR_5C : SchedWriteRes<[P8_CRL]> { let Latency = 5; }
+ def P8_LU_5C : SchedWriteRes<[P8_LU]> { let Latency = 5; }
+ def P8_LU_FX_5C : SchedWriteRes<[P8_LU, P8_FX]> { let Latency = 5; }
+ def P8_LS_FP_FX_2C : SchedWriteRes<[P8_LS, P8_FPU, P8_FX]> { let Latency = 2; }
+ def P8_LS_FP_FX_3C : SchedWriteRes<[P8_LS, P8_FPU, P8_FX]> { let Latency = 3; }
+ def P8_LS_3C : SchedWriteRes<[P8_LS]> { let Latency = 3; }
+ def P8_FP_3C : SchedWriteRes<[P8_FPU]> { let Latency = 3; }
+ def P8_FP_Scal_6C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 6; }
+ def P8_FP_4x32_6C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 6; }
+ def P8_FP_2x64_6C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 6; }
+ def P8_FP_26C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 26; }
+ def P8_FP_28C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 28; }
+ def P8_FP_31C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 31; }
+ def P8_FP_Scal_32C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 32; }
+ def P8_FP_2x64_32C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 32; }
+ def P8_FP_4x32_32C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 32; }
+ def P8_FP_Scal_43C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 43; }
+ def P8_FP_2x64_43C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 43; }
+ def P8_XS_2C : SchedWriteRes<[P8_XS]> { let Latency = 2; }
+ def P8_PM_2C : SchedWriteRes<[P8_PM]> { let Latency = 2; }
+ def P8_XS_4C : SchedWriteRes<[P8_XS]> { let Latency = 4; }
+ def P8_VX_7C : SchedWriteRes<[P8_VX]> { let Latency = 7; }
+ def P8_XS_9C : SchedWriteRes<[P8_XS]> { let Latency = 9; }
+ def P8_CY_6C : SchedWriteRes<[P8_CY]> { let Latency = 6; }
+ def P8_DFU_13C : SchedWriteRes<[P8_DFU]> { let Latency = 13; }
+ def P8_DFU_15C : SchedWriteRes<[P8_DFU]> { let Latency = 15; }
+ def P8_DFU_17C : SchedWriteRes<[P8_DFU]> { let Latency = 17; }
+ def P8_DFU_25C : SchedWriteRes<[P8_DFU]> { let Latency = 25; }
+ def P8_DFU_32C : SchedWriteRes<[P8_DFU]> { let Latency = 32; }
+ def P8_DFU_34C : SchedWriteRes<[P8_DFU]> { let Latency = 34; }
+ def P8_DFU_40C : SchedWriteRes<[P8_DFU]> { let Latency = 40; }
+ def P8_DFU_90C : SchedWriteRes<[P8_DFU]> { let Latency = 90; }
+ def P8_DFU_96C : SchedWriteRes<[P8_DFU]> { let Latency = 96; }
+ def P8_DFU_172C : SchedWriteRes<[P8_DFU]> { let Latency = 172; }
+ // Direct move instructions
+ def P8_DM_5C : SchedWriteRes<[]> { let Latency = 5; }
+
+ // Instructions of CR pipeline
+
+ def : InstRW<[P8_CR_NONE, P8_ISSUE_CR], (instrs MFCR, MFCR8)>;
+ def : InstRW<[P8_CR_3C, P8_ISSUE_CR], (instrs MFOCRF, MFOCRF8)>;
+ def : InstRW<[P8_CR_5C, P8_ISSUE_CR], (instrs MFLR, MFLR8, MFCTR, MFCTR8)>;
+
+ // Instructions of CY pipeline
+
+ def : InstRW<[P8_CY_6C, P8_ISSUE_VSX], (instrs
+ VCIPHER, VCIPHERLAST, VNCIPHER, VNCIPHERLAST, VPMSUMB, VPMSUMD, VPMSUMH, VPMSUMW, VSBOX)>;
+
+ // Instructions of FPU pipeline
+
+ def : InstRW<[P8_FP_26C, P8_ISSUE_VSX], (instrs (instregex "^FDIVS(_rec)?$"), XSDIVSP)>;
+ def : InstRW<[P8_FP_28C, P8_ISSUE_VSX], (instrs XVDIVSP)>;
+ def : InstRW<[P8_FP_31C, P8_ISSUE_VSX], (instregex "^FSQRTS(_rec)?$")>;
+ def : InstRW<[P8_FP_Scal_32C, P8_ISSUE_VSX], (instrs FDIV, FDIV_rec, XSDIVDP)>;
+ def : InstRW<[P8_FP_2x64_32C, P8_ISSUE_VSX], (instrs XVDIVDP)>;
+ def : InstRW<[P8_FP_4x32_32C, P8_ISSUE_VSX], (instrs XVSQRTSP)>;
+ def : InstRW<[P8_FP_Scal_43C, P8_ISSUE_VSX], (instrs FSQRT, FSQRT_rec, XSSQRTDP)>;
+ def : InstRW<[P8_FP_2x64_43C, P8_ISSUE_VSX], (instrs XVSQRTDP)>;
+
+ def : InstRW<[P8_FP_3C, P8_ISSUE_VSX], (instrs
+ MTFSFI_rec, MTFSF_rec, MTFSFI, MTFSFIb, MTFSF, MTFSFb, MTFSB0, MTFSB1)>;
+
+ def : InstRW<[P8_FP_Scal_6C, P8_ISSUE_VSX], (instrs
+ (instregex "^F(N)?M(ADD|SUB)(S)?(_rec)?$"),
+ (instregex "^XS(N)?M(ADD|SUB)(A|M)(D|S)P$"),
+ (instregex "^FC(F|T)I(D|W)(U)?(S|Z)?(_rec)?$"),
+ (instregex "^(F|XS)(ABS|CPSGN|ADD|MUL|NABS|RE|NEG|SUB|SEL|RSQRTE)(D|S)?(P)?(s)?(_rec)?$"),
+ (instregex "^FRI(M|N|P|Z)(D|S)(_rec)?$"),
+ (instregex "^XSCVDP(S|U)X(W|D)S(s)?$"),
+ (instregex "^XSCV(S|U)XD(D|S)P$"),
+ (instregex "^XSCV(D|S)P(S|D)P(N)?$"),
+ (instregex "^XSRDPI(C|M|P|Z)?$"),
+ FMR, FRSP, FMR_rec, FRSP_rec, XSRSP)>;
+
+ def : InstRW<[P8_FP_4x32_6C, P8_ISSUE_VSX], (instrs
+ (instregex "^XV(N)?M(ADD|SUB)(A|M)SP$"),
+ (instregex "^VRFI(M|N|P|Z)$"),
+ XVRSQRTESP, XVSUBSP, VADDFP, VEXPTEFP, VLOGEFP, VMADDFP, VNMSUBFP, VREFP,
+ VRSQRTEFP, VSUBFP, XVCVSXWSP, XVCVUXWSP, XVMULSP, XVNABSSP, XVNEGSP, XVRESP,
+ XVCVDPSP, XVCVSXDSP, XVCVUXDSP, XVABSSP, XVADDSP, XVCPSGNSP)>;
+
+ def : InstRW<[P8_FP_2x64_6C, P8_ISSUE_VSX], (instrs
+ (instregex "^XVR(D|S)PI(C|M|P|Z)?$"),
+ (instregex "^XVCV(S|U)X(D|W)DP$"),
+ (instregex "^XVCV(D|W|S)P(S|U)X(D|W)S$"),
+ (instregex "^XV(N)?(M)?(RSQRTE|CPSGN|SUB|ADD|ABS|UL|NEG|RE)(A|M)?DP$"),
+ XVCVSPDP)>;
+
+ // Instructions of FX, LU or LS pipeline
+
+ def : InstRW<[P8_FX_NONE, P8_ISSUE_FX], (instrs TDI, TWI, TD, TW, MTCRF, MTCRF8, MTOCRF, MTOCRF8)>;
+ def : InstRW<[P8_FX_1C, P8_ISSUE_FX], (instregex "^RLWIMI(8)?$")>;
+ // TODO: Pipeline of logical instructions might be LS or FX
+ def : InstRW<[P8_FX_2C, P8_ISSUE_FX], (instrs
+ (instregex "^(N|X)?(EQV|AND|OR)(I)?(S|C)?(8)?(_rec)?$"),
+ (instregex "^EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
+ (instregex "^RL(D|W)(I)?(NM|C)(L|R)?(8)?(_32)?(_64)?(_rec)?$"),
+ (instregex "^S(L|R)(A)?(W|D)(I)?(8)?(_rec|_32)?$"),
+ (instregex "^(ADD|SUBF)(M|Z)?(C|E)?(4|8)?O(_rec)?$"),
+ (instregex "^(ADD|SUBF)(M|Z)?E(8)?_rec$"),
+ (instregex "^(ADD|SUBF|NEG)(4|8)?_rec$"),
+ NOP, ADDG6S, ADDG6S8, ADDZE, ADDZE8, ADDIC_rec, NEGO_rec, ADDC, ADDC8, SUBFC, SUBFC8,
+ ADDC_rec, ADDC8_rec, SUBFC_rec, SUBFC8_rec, COPY, NEG8O_rec,
+ RLDIMI, RLDIMI_rec, RLWIMI8_rec, RLWIMI_rec)>;
+
+ def : InstRW<[P8_FX_3C], (instregex "^(POP)?CNT(LZ)?(B|W|D)(8)?(_rec)?$")>;
+ def : InstRW<[P8_FX_5C, P8_ISSUE_FX], (instrs
+ (instregex "^MUL(H|L)(I|W|D)(8)?(U|O)?(_rec)?$"),
+ CMPDI,CMPWI,CMPD,CMPW,CMPLDI,CMPLWI,CMPLD,CMPLW,
+ ISEL, ISEL8, MTLR, MTLR8, MTCTR, MTCTR8, MTCTR8loop, MTCTRloop)>;
+
+ def : InstRW<[P8_FX_10C, P8_ISSUE_VSX], (instregex "^MFTB(8)?$")>;
+ def : InstRW<[P8_FX_15C, P8_ISSUE_FX], (instregex "^DIVW(U)?$")>;
+
+ def : InstRW<[P8_FX_23C, P8_ISSUE_FX], (instregex "^DIV(D|WE)(U)?$")>;
+ def : InstRW<[P8_FX_41C], (instrs
+ (instregex "^DIV(D|W)(E)?(U)?O(_rec)?$"),
+ (instregex "^DIV(D|W)(E)?(U)?_rec$"),
+ DIVDE, DIVDEU)>;
+
+ def : InstRW<[P8_LS_3C, P8_ISSUE_FX], (instrs MFSR, MFSRIN)>;
+
+ def : InstRW<[P8_LU_5C, P8_ISSUE_LD], (instrs
+ LFS, LFSX, LFD, LFDX, LFDXTLS, LFDXTLS_, LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX,
+ LVX, LVXL, LXSDX, LFIWAX, LFIWZX, LFSXTLS, LFSXTLS_, LXVB16X, LXVD2X, LXSIWZX,
+ DFLOADf64, XFLOADf64, LIWZX)>;
+
+ def : InstRW<[P8_LS_FX_3C, P8_ISSUE_FXLD], (instrs LQ)>;
+ def : InstRW<[P8_LU_FX_5C, P8_ISSUE_LD], (instregex "^LF(D|S)U(X)?$")>;
+
+ def : InstRW<[P8_LS_FP_NONE, P8_ISSUE_ST], (instrs
+ STXSDX, STXVD2X, STXVW4X, STFIWX, STFS, STFSX, STFD, STFDX,
+ STFDEPX, STFDXTLS, STFDXTLS_, STFSXTLS, STFSXTLS_, STXSIWX, STXSSP, STXSSPX)>;
+
+ def : InstRW<[P8_LS_FP_FX_2C, P8_ISSUE_ST], (instrs STVEBX, STVEHX, STVEWX, STVX, STVXL)>;
+ def : InstRW<[P8_LS_FP_FX_3C, P8_ISSUE_ST], (instregex "^STF(D|S)U(X)?$")>;
+
+ def : InstRW<[P8_LS_LU_NONE, P8_ISSUE_ST], (instrs
+ (instregex "^ST(B|H|W|D)(U)?(X)?(8|TLS)?(_)?(32)?$"),
+ STBCIX, STBCX, STBEPX, STDBRX, STDCIX, STDCX, STHBRX, STHCIX, STHCX, STHEPX,
+ STMW, STSWI, STWBRX, STWCIX, STWCX, STWEPX)>;
+
+ def : InstRW<[P8_LU_or_LS_FX_3C, P8_ISSUE_FXLD],
+ (instregex "^L(B|H|W|D)(A|Z)?(U)?(X)?(8|TLS)?(_)?(32)?$")>;
+
+ def : InstRW<[P8_LU_or_LS_3C, P8_ISSUE_FXLD], (instrs
+ LBARX, LBARXL, LBEPX, LBZCIX, LDARX, LDARXL, LDBRX, LDCIX, LFDEPX, LHARX, LHARXL, LHBRX, LXSIWAX,
+ LHBRX8, LHEPX, LHZCIX, LMW, LSWI, LVSL, LVSR, LWARX, LWARXL, LWBRX, LWBRX8, LWEPX, LWZCIX)>;
+
+ def : InstRW<[P8_LU_or_LS_or_FX_2C, P8_ISSUE_FX], (instrs
+ (instregex "^ADDI(C)?(dtprel|tlsgd|toc)?(L)?(ADDR)?(32|8)?$"),
+ (instregex "^ADDIS(dtprel|tlsgd|toc|gotTprel)?(HA)?(32|8)?$"),
+ (instregex "^LI(S)?(8)?$"),
+ (instregex "^ADD(M)?(E)?(4|8)?(TLS)?(_)?$"),
+ (instregex "^SUBF(M|Z)?(E)?(IC)?(4|8)?$"),
+ (instregex "^NEG(8)?(O)?$"))>;
+
+ // Instructions of PM pipeline
+
+ def : InstRW<[P8_PM_2C, P8_ISSUE_VSX], (instrs
+ (instregex "^VPK(S|U)(H|W|D)(S|U)(M|S)$"),
+ (instregex "^VUPK(H|L)(P|S)(H|B|W|X)$"),
+ (instregex "^VSPLT(IS)?(B|H|W)(s)?$"),
+ (instregex "^(XX|V)MRG(E|O|H|L)(B|H|W)$"),
+ XXPERMDI, XXPERMDIs, XXSEL, XXSLDWI, XXSLDWIs, XXSPLTW, XXSPLTWs, VPERMXOR,
+ VPKPX, VPERM, VBPERMQ, VGBBD, VSEL, VSL, VSLDOI, VSLO, VSR, VSRO)>;
+
+ def : InstRW<[P8_XS_2C, P8_ISSUE_VSX], (instrs
+ (instregex "^V(ADD|SUB)(S|U)(B|H|W|D)(M|S)$"),
+ (instregex "^X(S|V)(MAX|MIN)(D|S)P$"),
+ (instregex "^V(S)?(R)?(L)?(A)?(B|D|H|W)$"),
+ (instregex "^VAVG(S|U)(B|H|W)$"),
+ (instregex "^VM(AX|IN)(S|U)(B|H|W|D)$"),
+ (instregex "^(XX|V)(L)?(N)?(X)?(AND|OR|EQV)(C)?$"),
+ (instregex "^(X)?VCMP(EQ|GT|GE|B)(F|S|U)?(B|H|W|D|P|S)(P)?(_rec)?$"),
+ (instregex "^VCLZ(B|H|W|D)$"),
+ (instregex "^VPOPCNT(B|H|W)$"),
+ XXLORf, XXLXORdpz, XXLXORspz, XXLXORz, VEQV, VMAXFP, VMINFP,
+ VSHASIGMAD, VSHASIGMAW, VSUBCUW, VADDCUW, MFVSCR, MTVSCR)>;
+
+ def : InstRW<[P8_XS_4C, P8_ISSUE_VSX], (instrs
+ (instregex "^V(ADD|SUB)(E)?(C)?UQ(M)?$"),
+ VPOPCNTD)>;
+
+ def : InstRW<[P8_XS_9C, P8_ISSUE_CR], (instrs
+ (instregex "^(F|XS)CMP(O|U)(D|S)(P)?$"),
+ (instregex "^(F|XS|XV)T(DIV|SQRT)((D|S)P)?$"))>;
+
+ // Instructions of VX pipeline
+
+ def : InstRW<[P8_VX_7C, P8_ISSUE_VSX], (instrs
+ (instregex "^V(M)?SUM(2|4)?(M|S|U)(B|H|W)(M|S)$"),
+ (instregex "^VMUL(E|O)?(S|U)(B|H|W)(M)?$"),
+ VMHADDSHS, VMHRADDSHS, VMLADDUHM)>;
+
+ // Instructions of BR pipeline
+
+ def : InstRW<[P8_BR_2C, P8_ISSUE_BR], (instrs
+ (instregex "^(g)?B(C)?(C)?(CTR)?(L)?(A)?(R)?(L)?(8)?(_LD|_LWZ)?(always|into_toc|at)?(_RM)?(n)?$"),
+ (instregex "^BD(N)?Z(L)?(R|A)?(L)?(m|p|8)?$"),
+ (instregex "^BL(R|A)?(8)?(_NOP)?(_TLS)?(_)?(RM)?$"))>;
+
+ // Instructions of DFP pipeline
+ // DFP operations also use float/vector/crypto issue ports.
+ def : InstRW<[P8_DFU_13C, P8_ISSUE_VSX], (instrs
+ (instregex "^DTST(D|S)(C|F|G)(Q)?$"),
+ (instregex "^D(Q|X)EX(Q)?(_rec)?$"),
+ (instregex "^D(ADD|SUB|IEX|QUA|RRND|RINTX|RINTN|CTDP|DEDPD|ENBCD)(_rec)?$"),
+ (instregex "^DSC(L|R)I(_rec)?$"),
+ BCDADD_rec, BCDSUB_rec, DCMPO, DCMPU, DTSTEX, DQUAI)>;
+
+ def : InstRW<[P8_DFU_15C, P8_ISSUE_VSX], (instrs
+ (instregex "^DRINT(N|X)Q(_rec)?$"),
+ DCMPOQ, DCMPUQ, DRRNDQ, DRRNDQ_rec, DIEXQ, DIEXQ_rec, DQUAIQ, DQUAIQ_rec,
+ DTSTEXQ, DDEDPDQ, DDEDPDQ_rec, DENBCDQ, DENBCDQ_rec, DSCLIQ, DSCLIQ_rec,
+ DSCRIQ, DSCRIQ_rec, DCTQPQ, DCTQPQ_rec)>;
+
+ def : InstRW<[P8_DFU_17C, P8_ISSUE_VSX], (instregex "^D(ADD|SUB|QUA)Q(_rec)?$")>;
+ def : InstRW<[P8_DFU_25C, P8_ISSUE_VSX], (instrs DRSP, DRSP_rec, DCTFIX, DCTFIX_rec)>;
+ def : InstRW<[P8_DFU_32C, P8_ISSUE_VSX], (instrs DCFFIX, DCFFIX_rec)>;
+ def : InstRW<[P8_DFU_34C, P8_ISSUE_VSX], (instrs DCFFIXQ, DCFFIXQ_rec)>;
+ def : InstRW<[P8_DFU_40C, P8_ISSUE_VSX], (instrs DMUL, DMUL_rec)>;
+ def : InstRW<[P8_DFU_90C, P8_ISSUE_VSX], (instrs DMULQ, DMULQ_rec)>;
+ def : InstRW<[P8_DFU_96C, P8_ISSUE_VSX], (instrs DDIV, DDIV_rec)>;
+ def : InstRW<[P8_DFU_172C, P8_ISSUE_VSX], (instrs DDIVQ, DDIVQ_rec)>;
+
+ // Direct move instructions
+
+ def : InstRW<[P8_DM_5C, P8_ISSUE_VSX], (instrs
+ MFVRD, MFVSRD, MFVRWZ, MFVSRWZ, MTVRD, MTVSRD, MTVRWA, MTVSRWA, MTVRWZ, MTVSRWZ)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index b763191d980e..36befceef4ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -227,17 +227,17 @@ let SchedModel = P9Model in {
}
def P9_DIV_16C_8 : SchedWriteRes<[DIV]> {
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let Latency = 16;
}
def P9_DIV_24C_8 : SchedWriteRes<[DIV]> {
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let Latency = 24;
}
def P9_DIV_40C_8 : SchedWriteRes<[DIV]> {
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let Latency = 40;
}
@@ -261,62 +261,62 @@ let SchedModel = P9Model in {
}
def P9_DP_22C_5 : SchedWriteRes<[DP]> {
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
let Latency = 22;
}
def P9_DPO_24C_8 : SchedWriteRes<[DPO]> {
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let Latency = 24;
}
def P9_DPE_24C_8 : SchedWriteRes<[DPE]> {
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let Latency = 24;
}
def P9_DP_26C_5 : SchedWriteRes<[DP]> {
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
let Latency = 22;
}
def P9_DPE_27C_10 : SchedWriteRes<[DP]> {
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
let Latency = 27;
}
def P9_DPO_27C_10 : SchedWriteRes<[DP]> {
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
let Latency = 27;
}
def P9_DP_33C_8 : SchedWriteRes<[DP]> {
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let Latency = 33;
}
def P9_DPE_33C_8 : SchedWriteRes<[DPE]> {
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let Latency = 33;
}
def P9_DPO_33C_8 : SchedWriteRes<[DPO]> {
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let Latency = 33;
}
def P9_DP_36C_10 : SchedWriteRes<[DP]> {
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
let Latency = 36;
}
def P9_DPE_36C_10 : SchedWriteRes<[DP]> {
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
let Latency = 36;
}
def P9_DPO_36C_10 : SchedWriteRes<[DP]> {
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
let Latency = 36;
}
@@ -358,27 +358,27 @@ let SchedModel = P9Model in {
def P9_DFU_23C : SchedWriteRes<[DFU]> {
let Latency = 23;
- let ResourceCycles = [11];
+ let ReleaseAtCycles = [11];
}
def P9_DFU_24C : SchedWriteRes<[DFU]> {
let Latency = 24;
- let ResourceCycles = [12];
+ let ReleaseAtCycles = [12];
}
def P9_DFU_37C : SchedWriteRes<[DFU]> {
let Latency = 37;
- let ResourceCycles = [25];
+ let ReleaseAtCycles = [25];
}
def P9_DFU_58C : SchedWriteRes<[DFU]> {
let Latency = 58;
- let ResourceCycles = [44];
+ let ReleaseAtCycles = [44];
}
def P9_DFU_76C : SchedWriteRes<[DFU]> {
let Latency = 76;
- let ResourceCycles = [62];
+ let ReleaseAtCycles = [62];
}
// 2 or 5 cycle latencies for the branch unit.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 49400eefe4a9..c9740818c9bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -123,6 +123,11 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
// Determine endianness.
IsLittleEndian = TM.isLittleEndian();
+
+ if (HasAIXSmallLocalExecTLS && (!TargetTriple.isOSAIX() || !IsPPC64))
+ report_fatal_error(
+ "The aix-small-local-exec-tls attribute is only supported on AIX in "
+ "64-bit mode.\n", false);
}
bool PPCSubtarget::enableMachineScheduler() const { return true; }
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 8120975c4fb2..81f078ab246e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -100,7 +100,7 @@ protected:
return true;
for (const MachineOperand &MO : MI.operands()) {
- if ((MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) == PPCII::MO_TOC_LO)
+ if (MO.getTargetFlags() == PPCII::MO_TOC_LO)
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3858d44e5099..d676fa86a10e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -100,6 +100,11 @@ static cl::opt<bool>
cl::desc("Expand eligible cr-logical binary ops to branches"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> MergeStringPool(
+ "ppc-merge-string-pool",
+ cl::desc("Merge all of the strings in a module into one pool"),
+ cl::init(true), cl::Hidden);
+
static cl::opt<bool> EnablePPCGenScalarMASSEntries(
"enable-ppc-gen-scalar-mass", cl::init(false),
cl::desc("Enable lowering math functions to their corresponding MASS "
@@ -137,6 +142,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
initializeGlobalISel(PR);
initializePPCCTRLoopsPass(PR);
initializePPCDAGToDAGISelPass(PR);
+ initializePPCMergeStringPoolPass(PR);
}
static bool isLittleEndianTriple(const Triple &T) {
@@ -191,7 +197,7 @@ static std::string getDataLayoutString(const Triple &T) {
return Ret;
}
-static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
+static std::string computeFSAdditions(StringRef FS, CodeGenOptLevel OL,
const Triple &TT) {
std::string FullFS = std::string(FS);
@@ -203,14 +209,14 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
FullFS = "+64bit";
}
- if (OL >= CodeGenOpt::Default) {
+ if (OL >= CodeGenOptLevel::Default) {
if (!FullFS.empty())
FullFS = "+crbits," + FullFS;
else
FullFS = "+crbits";
}
- if (OL != CodeGenOpt::None) {
+ if (OL != CodeGenOptLevel::None) {
if (!FullFS.empty())
FullFS = "+invariant-function-descriptors," + FullFS;
else
@@ -236,9 +242,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
const TargetOptions &Options) {
- if (Options.MCOptions.getABIName().startswith("elfv1"))
+ if (Options.MCOptions.getABIName().starts_with("elfv1"))
return PPCTargetMachine::PPC_ABI_ELFv1;
- else if (Options.MCOptions.getABIName().startswith("elfv2"))
+ else if (Options.MCOptions.getABIName().starts_with("elfv2"))
return PPCTargetMachine::PPC_ABI_ELFv2;
assert(Options.MCOptions.getABIName().empty() &&
@@ -259,8 +265,9 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
std::optional<Reloc::Model> RM) {
- assert((!TT.isOSAIX() || !RM || *RM == Reloc::PIC_) &&
- "Invalid relocation model for AIX.");
+ if (TT.isOSAIX() && RM && *RM != Reloc::PIC_)
+ report_fatal_error("invalid relocation model, AIX only supports PIC",
+ false);
if (RM)
return *RM;
@@ -339,7 +346,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM),
@@ -408,7 +415,7 @@ public:
: TargetPassConfig(TM, PM) {
// At any optimization level above -O0 we use the Machine Scheduler and not
// the default Post RA List Scheduler.
- if (TM.getOptLevel() != CodeGenOpt::None)
+ if (TM.getOptLevel() != CodeGenOptLevel::None)
substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
}
@@ -448,7 +455,7 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void PPCPassConfig::addIRPasses() {
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createPPCBoolRetToIntPass());
addPass(createAtomicExpandPass());
@@ -457,7 +464,7 @@ void PPCPassConfig::addIRPasses() {
// Generate PowerPC target-specific entries for scalar math functions
// that are available in IBM MASS (scalar) library.
- if (TM->getOptLevel() == CodeGenOpt::Aggressive &&
+ if (TM->getOptLevel() == CodeGenOptLevel::Aggressive &&
EnablePPCGenScalarMASSEntries) {
TM->Options.PPCGenScalarMASSEntries = EnablePPCGenScalarMASSEntries;
addPass(createPPCGenScalarMASSEntriesPass());
@@ -467,7 +474,7 @@ void PPCPassConfig::addIRPasses() {
if (EnablePrefetch.getNumOccurrences() > 0)
addPass(createLoopDataPrefetchPass());
- if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
+ if (TM->getOptLevel() >= CodeGenOptLevel::Default && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
// and lower a GEP with multiple indices to either arithmetic operations or
// multiple GEPs with single index.
@@ -484,10 +491,13 @@ void PPCPassConfig::addIRPasses() {
}
bool PPCPassConfig::addPreISel() {
- if (!DisableInstrFormPrep && getOptLevel() != CodeGenOpt::None)
+ if (MergeStringPool && getOptLevel() != CodeGenOptLevel::None)
+ addPass(createPPCMergeStringPoolPass());
+
+ if (!DisableInstrFormPrep && getOptLevel() != CodeGenOptLevel::None)
addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine()));
- if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOptLevel::None)
addPass(createHardwareLoopsLegacyPass());
return false;
@@ -507,7 +517,7 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel()));
#ifndef NDEBUG
- if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOptLevel::None)
addPass(createPPCCTRLoopsVerify());
#endif
@@ -518,12 +528,12 @@ bool PPCPassConfig::addInstSelector() {
void PPCPassConfig::addMachineSSAOptimization() {
// Run CTR loops pass before any cfg modification pass to prevent the
// canonical form of hardware loop from being destroied.
- if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOptLevel::None)
addPass(createPPCCTRLoopsPass());
// PPCBranchCoalescingPass need to be done before machine sinking
// since it merges empty blocks.
- if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None)
+ if (EnableBranchCoalescing && getOptLevel() != CodeGenOptLevel::None)
addPass(createPPCBranchCoalescingPass());
TargetPassConfig::addMachineSSAOptimization();
// For little endian, remove where possible the vector swap instructions
@@ -532,7 +542,7 @@ void PPCPassConfig::addMachineSSAOptimization() {
!DisableVSXSwapRemoval)
addPass(createPPCVSXSwapRemovalPass());
// Reduce the number of cr-logical ops.
- if (ReduceCRLogical && getOptLevel() != CodeGenOpt::None)
+ if (ReduceCRLogical && getOptLevel() != CodeGenOptLevel::None)
addPass(createPPCReduceCRLogicalsPass());
// Target-specific peephole cleanups performed after instruction
// selection.
@@ -543,7 +553,7 @@ void PPCPassConfig::addMachineSSAOptimization() {
}
void PPCPassConfig::addPreRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
&PPCVSXFMAMutateID);
@@ -561,12 +571,12 @@ void PPCPassConfig::addPreRegAlloc() {
if (EnableExtraTOCRegDeps)
addPass(createPPCTOCRegDepsPass());
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(&MachinePipelinerID);
}
void PPCPassConfig::addPreSched2() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(&IfConverterID);
}
@@ -574,7 +584,7 @@ void PPCPassConfig::addPreEmitPass() {
addPass(createPPCPreEmitPeepholePass());
addPass(createPPCExpandISELPass());
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createPPCEarlyReturnPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 5d4571b7323a..56145a2eb39c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -39,7 +39,7 @@ public:
PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~PPCTargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 8137b61f4982..062b53e24a0d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -18,7 +18,6 @@
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include <optional>
@@ -27,6 +26,9 @@ using namespace llvm;
#define DEBUG_TYPE "ppctti"
+static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
+cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
+
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
@@ -73,16 +75,14 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (getOrEnforceKnownAlignment(
II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
- Value *Ptr = IC.Builder.CreateBitCast(
- II.getArgOperand(0), PointerType::getUnqual(II.getType()));
+ Value *Ptr = II.getArgOperand(0);
return new LoadInst(II.getType(), Ptr, "", false, Align(16));
}
break;
case Intrinsic::ppc_vsx_lxvw4x:
case Intrinsic::ppc_vsx_lxvd2x: {
// Turn PPC VSX loads into normal loads.
- Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
- PointerType::getUnqual(II.getType()));
+ Value *Ptr = II.getArgOperand(0);
return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
}
case Intrinsic::ppc_altivec_stvx:
@@ -91,16 +91,14 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (getOrEnforceKnownAlignment(
II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
- Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
- Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ Value *Ptr = II.getArgOperand(1);
return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
}
break;
case Intrinsic::ppc_vsx_stxvw4x:
case Intrinsic::ppc_vsx_stxvd2x: {
// Turn PPC VSX stores into normal stores.
- Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
- Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ Value *Ptr = II.getArgOperand(1);
return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
}
case Intrinsic::ppc_altivec_vperm:
@@ -700,6 +698,9 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return Cost;
} else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
+ unsigned EltSize = Val->getScalarSizeInBits();
+ // Computing on 1 bit values requires extra mask or compare operations.
+ unsigned MaskCost = VecMaskCost && EltSize == 1 ? 1 : 0;
if (ST->hasP9Altivec()) {
if (ISD == ISD::INSERT_VECTOR_ELT)
// A move-to VSR and a permute/insert. Assume vector operation cost
@@ -721,12 +722,15 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
// We need a vector extract (or mfvsrld). Assume vector operation cost.
// The cost of the load constant for a vector extract is disregarded
// (invariant, easily schedulable).
- return CostFactor;
+ return CostFactor + MaskCost;
- } else if (ST->hasDirectMove())
+ } else if (ST->hasDirectMove()) {
// Assume permute has standard cost.
// Assume move-to/move-from VSR have 2x standard cost.
- return 3;
+ if (ISD == ISD::INSERT_VECTOR_ELT)
+ return 3;
+ return 3 + MaskCost;
+ }
}
// Estimated cost of a load-hit-store delay. This was obtained
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 046a208921ae..f3ea0f597eec 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -16,7 +16,6 @@
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -108,10 +107,9 @@ class RISCVAsmParser : public MCTargetAsmParser {
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -204,6 +202,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
ParseStatus parseFRMArg(OperandVector &Operands);
ParseStatus parseFenceArg(OperandVector &Operands);
ParseStatus parseReglist(OperandVector &Operands);
+ ParseStatus parseRegReg(OperandVector &Operands);
ParseStatus parseRetval(OperandVector &Operands);
ParseStatus parseZcmpSpimm(OperandVector &Operands);
@@ -260,6 +259,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
std::unique_ptr<RISCVOperand> defaultMaskRegOp() const;
std::unique_ptr<RISCVOperand> defaultFRMArgOp() const;
+ std::unique_ptr<RISCVOperand> defaultFRMArgLegacyOp() const;
public:
enum RISCVMatchResultTy {
@@ -286,11 +286,11 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
auto ABIName = StringRef(Options.ABIName);
- if (ABIName.endswith("f") && !getSTI().hasFeature(RISCV::FeatureStdExtF)) {
+ if (ABIName.ends_with("f") && !getSTI().hasFeature(RISCV::FeatureStdExtF)) {
errs() << "Hard-float 'f' ABI can't be used for a target that "
"doesn't support the F instruction set extension (ignoring "
"target-abi)\n";
- } else if (ABIName.endswith("d") &&
+ } else if (ABIName.ends_with("d") &&
!getSTI().hasFeature(RISCV::FeatureStdExtD)) {
errs() << "Hard-float 'd' ABI can't be used for a target that "
"doesn't support the D instruction set extension (ignoring "
@@ -325,6 +325,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {
Fence,
Rlist,
Spimm,
+ RegReg,
} Kind;
struct RegOp {
@@ -369,6 +370,11 @@ struct RISCVOperand final : public MCParsedAsmOperand {
unsigned Val;
};
+ struct RegRegOp {
+ MCRegister Reg1;
+ MCRegister Reg2;
+ };
+
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
@@ -381,6 +387,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {
struct FenceOp Fence;
struct RlistOp Rlist;
struct SpimmOp Spimm;
+ struct RegRegOp RegReg;
};
RISCVOperand(KindTy K) : Kind(K) {}
@@ -421,6 +428,9 @@ public:
case KindTy::Spimm:
Spimm = o.Spimm;
break;
+ case KindTy::RegReg:
+ RegReg = o.RegReg;
+ break;
}
}
@@ -445,6 +455,7 @@ public:
bool isImm() const override { return Kind == KindTy::Immediate; }
bool isMem() const override { return false; }
bool isSystemRegister() const { return Kind == KindTy::SystemRegister; }
+ bool isRegReg() const { return Kind == KindTy::RegReg; }
bool isRlist() const { return Kind == KindTy::Rlist; }
bool isSpimm() const { return Kind == KindTy::Spimm; }
@@ -564,6 +575,7 @@ public:
/// Return true if the operand is a valid floating point rounding mode.
bool isFRMArg() const { return Kind == KindTy::FRM; }
+ bool isFRMArgLegacy() const { return Kind == KindTy::FRM; }
bool isRTZArg() const { return isFRMArg() && FRM.FRM == RISCVFPRndMode::RTZ; }
/// Return true if the operand is a valid fli.s floating-point immediate.
@@ -660,6 +672,7 @@ public:
bool isUImm6() const { return IsUImm<6>(); }
bool isUImm7() const { return IsUImm<7>(); }
bool isUImm8() const { return IsUImm<8>(); }
+ bool isUImm20() const { return IsUImm<20>(); }
bool isUImm8GE32() const {
int64_t Imm;
@@ -1024,6 +1037,10 @@ public:
RISCVZC::printSpimm(Spimm.Val, OS);
OS << '>';
break;
+ case KindTy::RegReg:
+ OS << "<RegReg: Reg1 " << RegName(RegReg.Reg1);
+ OS << " Reg2 " << RegName(RegReg.Reg2);
+ break;
}
}
@@ -1107,6 +1124,16 @@ public:
return Op;
}
+ static std::unique_ptr<RISCVOperand> createRegReg(unsigned Reg1No,
+ unsigned Reg2No, SMLoc S) {
+ auto Op = std::make_unique<RISCVOperand>(KindTy::RegReg);
+ Op->RegReg.Reg1 = Reg1No;
+ Op->RegReg.Reg2 = Reg2No;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static std::unique_ptr<RISCVOperand> createSpimm(unsigned Spimm, SMLoc S) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Spimm);
Op->Spimm.Val = Spimm;
@@ -1182,6 +1209,12 @@ public:
Inst.addOperand(MCOperand::createImm(Rlist.Val));
}
+ void addRegRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(RegReg.Reg1));
+ Inst.addOperand(MCOperand::createReg(RegReg.Reg2));
+ }
+
void addSpimmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(Spimm.Val));
@@ -1478,6 +1511,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"operand must be a symbol with "
"%hi/%tprel_hi modifier or an integer in "
"the range");
+ case Match_InvalidUImm20:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1);
case Match_InvalidUImm20AUIPC:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 20) - 1,
@@ -1546,6 +1581,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidRnumArg: {
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10);
}
+ case Match_InvalidRegReg: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operands must be register and register");
+ }
}
llvm_unreachable("Unknown match type detected!");
@@ -1571,27 +1610,26 @@ static MCRegister matchRegisterNameHelper(bool IsRVE, StringRef Name) {
return Reg;
}
-bool RISCVAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool RISCVAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
return Error(StartLoc, "invalid register name");
return false;
}
-OperandMatchResultTy RISCVAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus RISCVAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
StringRef Name = getLexer().getTok().getIdentifier();
- RegNo = matchRegisterNameHelper(isRVE(), Name);
- if (!RegNo)
- return MatchOperand_NoMatch;
+ Reg = matchRegisterNameHelper(isRVE(), Name);
+ if (!Reg)
+ return ParseStatus::NoMatch;
getParser().Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
ParseStatus RISCVAsmParser::parseRegister(OperandVector &Operands,
@@ -2379,6 +2417,37 @@ ParseStatus RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) {
return ParseStatus::Success;
}
+ParseStatus RISCVAsmParser::parseRegReg(OperandVector &Operands) {
+ // RR : a2(a1)
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return ParseStatus::NoMatch;
+
+ StringRef RegName = getLexer().getTok().getIdentifier();
+ MCRegister Reg = matchRegisterNameHelper(isRVE(), RegName);
+ if (!Reg)
+ return Error(getLoc(), "invalid register");
+ getLexer().Lex();
+
+ if (parseToken(AsmToken::LParen, "expected '(' or invalid operand"))
+ return ParseStatus::Failure;
+
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return Error(getLoc(), "expected register");
+
+ StringRef Reg2Name = getLexer().getTok().getIdentifier();
+ MCRegister Reg2 = matchRegisterNameHelper(isRVE(), Reg2Name);
+ if (!Reg2)
+ return Error(getLoc(), "invalid register");
+ getLexer().Lex();
+
+ if (parseToken(AsmToken::RParen, "expected ')'"))
+ return ParseStatus::Failure;
+
+ Operands.push_back(RISCVOperand::createRegReg(Reg, Reg2, getLoc()));
+
+ return ParseStatus::Success;
+}
+
ParseStatus RISCVAsmParser::parseReglist(OperandVector &Operands) {
// Rlist: {ra [, s0[-sN]]}
// XRlist: {x1 [, x8[-x9][, x18[-xN]]]}
@@ -2974,8 +3043,7 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
MCStreamer &Out) {
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Value, getSTI().getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Value, getSTI());
MCRegister SrcReg = RISCV::X0;
for (const RISCVMatInt::Inst &Inst : Seq) {
@@ -3200,7 +3268,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
.addOperand(Inst.getOperand(1))
.addOperand(Inst.getOperand(2))
.addOperand(Inst.getOperand(3))
- .addOperand(Inst.getOperand(4)));
+ .addReg(RISCV::NoRegister));
emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
@@ -3209,8 +3277,8 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
// masked va >= x, any vd
//
// pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
- // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt; vmandn.mm vd,
- // vd, v0; vmor.mm vd, vt, vd
+ // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt;
+ // vmandn.mm vd, vd, v0; vmor.mm vd, vt, vd
assert(Inst.getOperand(1).getReg() != RISCV::V0 &&
"The temporary vector register should not be V0.");
emitToStreamer(Out, MCInstBuilder(Opcode)
@@ -3256,6 +3324,11 @@ std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultFRMArgOp() const {
llvm::SMLoc());
}
+std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultFRMArgLegacyOp() const {
+ return RISCVOperand::createFRMArg(RISCVFPRndMode::RoundingMode::RNE,
+ llvm::SMLoc());
+}
+
bool RISCVAsmParser::validateInstruction(MCInst &Inst,
OperandVector &Operands) {
unsigned Opcode = Inst.getOpcode();
@@ -3352,16 +3425,21 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst,
}
unsigned DestReg = Inst.getOperand(0).getReg();
+ unsigned Offset = 0;
+ int TiedOp = MCID.getOperandConstraint(1, MCOI::TIED_TO);
+ if (TiedOp == 0)
+ Offset = 1;
+
// Operands[1] will be the first operand, DestReg.
SMLoc Loc = Operands[1]->getStartLoc();
if (MCID.TSFlags & RISCVII::VS2Constraint) {
- unsigned CheckReg = Inst.getOperand(1).getReg();
+ unsigned CheckReg = Inst.getOperand(Offset + 1).getReg();
if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
}
- if ((MCID.TSFlags & RISCVII::VS1Constraint) && (Inst.getOperand(2).isReg())) {
- unsigned CheckReg = Inst.getOperand(2).getReg();
+ if ((MCID.TSFlags & RISCVII::VS1Constraint) && Inst.getOperand(Offset + 2).isReg()) {
+ unsigned CheckReg = Inst.getOperand(Offset + 2).getReg();
if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index e6ea6baa72ff..53e2b6b4d94e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -196,10 +196,7 @@ static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- if (RegNo >= 32)
- return MCDisassembler::Fail;
-
- if (RegNo % 2)
+ if (RegNo >= 32 || RegNo % 2)
return MCDisassembler::Fail;
const RISCVDisassembler *Dis =
@@ -216,10 +213,7 @@ static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- if (RegNo >= 32)
- return MCDisassembler::Fail;
-
- if (RegNo % 4)
+ if (RegNo >= 32 || RegNo % 4)
return MCDisassembler::Fail;
const RISCVDisassembler *Dis =
@@ -236,10 +230,7 @@ static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- if (RegNo >= 32)
- return MCDisassembler::Fail;
-
- if (RegNo % 8)
+ if (RegNo >= 32 || RegNo % 8)
return MCDisassembler::Fail;
const RISCVDisassembler *Dis =
@@ -256,16 +247,11 @@ static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- MCRegister Reg = RISCV::NoRegister;
- switch (RegNo) {
- default:
+ if (RegNo > 2) {
return MCDisassembler::Fail;
- case 0:
- Reg = RISCV::V0;
- break;
- case 1:
- break;
}
+ MCRegister Reg = (RegNo == 0) ? RISCV::V0 : RISCV::NoRegister;
+
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -367,6 +353,9 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
static DecodeStatus decodeZcmpRlist(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder);
+static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+
static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder);
@@ -464,6 +453,15 @@ static DecodeStatus decodeZcmpRlist(MCInst &Inst, unsigned Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
+ uint32_t Rs1 = fieldFromInstruction(Insn, 0, 5);
+ uint32_t Rs2 = fieldFromInstruction(Insn, 5, 5);
+ DecodeGPRRegisterClass(Inst, Rs1, Address, Decoder);
+ DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder);
+ return MCDisassembler::Success;
+}
+
// spimm is based on rlist now.
static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder) {
@@ -528,43 +526,65 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32,
"RVZfinx table (Float in Integer)");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps,
- DecoderTableVentana32, "Ventana custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBa, DecoderTableTHeadBa32,
+ DecoderTableXVentana32, "Ventana custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBa, DecoderTableXTHeadBa32,
"XTHeadBa custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBb, DecoderTableTHeadBb32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBb, DecoderTableXTHeadBb32,
"XTHeadBb custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBs, DecoderTableTHeadBs32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBs, DecoderTableXTHeadBs32,
"XTHeadBs custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCondMov,
- DecoderTableTHeadCondMov32,
+ DecoderTableXTHeadCondMov32,
"XTHeadCondMov custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCmo, DecoderTableTHeadCmo32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCmo, DecoderTableXTHeadCmo32,
"XTHeadCmo custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadFMemIdx,
- DecoderTableTHeadFMemIdx32,
+ DecoderTableXTHeadFMemIdx32,
"XTHeadFMemIdx custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMac, DecoderTableTHeadMac32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMac, DecoderTableXTHeadMac32,
"XTHeadMac custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMemIdx,
- DecoderTableTHeadMemIdx32,
+ DecoderTableXTHeadMemIdx32,
"XTHeadMemIdx custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMemPair,
- DecoderTableTHeadMemPair32,
+ DecoderTableXTHeadMemPair32,
"XTHeadMemPair custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadSync,
- DecoderTableTHeadSync32,
+ DecoderTableXTHeadSync32,
"XTHeadSync custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadVdot, DecoderTableTHeadV32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadVdot, DecoderTableXTHeadVdot32,
"XTHeadVdot custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfvcp, DecoderTableXSfvcp32,
"SiFive VCIX custom opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXSfvqmaccdod, DecoderTableXSfvqmaccdod32,
+ "SiFive Matrix Multiplication (2x8 and 8x2) Instruction opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXSfvqmaccqoq, DecoderTableXSfvqmaccqoq32,
+ "SiFive Matrix Multiplication (4x8 and 8x4) Instruction opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXSfvfwmaccqqq, DecoderTableXSfvfwmaccqqq32,
+ "SiFive Matrix Multiplication Instruction opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXSfvfnrclipxfqf, DecoderTableXSfvfnrclipxfqf32,
+ "SiFive FP32-to-int8 Ranged Clip Instructions opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfcie, DecoderTableXSfcie32,
"Sifive CIE custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbitmanip,
DecoderTableXCVbitmanip32,
"CORE-V Bit Manipulation custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVelw, DecoderTableXCVelw32,
+ "CORE-V Event load custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVmac, DecoderTableXCVmac32,
"CORE-V MAC custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVmem, DecoderTableXCVmem32,
+ "CORE-V MEM custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCValu, DecoderTableXCValu32,
+ "CORE-V ALU custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVsimd, DecoderTableXCVsimd32,
+ "CORE-V SIMD extensions custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbi, DecoderTableXCVbi32,
+ "CORE-V Immediate Branching custom opcode table");
TRY_TO_DECODE(true, DecoderTable32, "RISCV32 table");
return MCDisassembler::Fail;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 5505f89a32f2..50ed85acdec0 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -14,9 +14,11 @@
#include "RISCVCallLowering.h"
#include "RISCVISelLowering.h"
+#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
using namespace llvm;
@@ -46,37 +48,118 @@ public:
const DataLayout &DL = MF.getDataLayout();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
- return RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
- LocInfo, Flags, State, /*IsFixed=*/true, IsRet,
- Info.Ty, *Subtarget.getTargetLowering(),
- /*FirstMaskArgument=*/std::nullopt);
+ if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
+ LocInfo, Flags, State, Info.IsFixed, IsRet, Info.Ty,
+ *Subtarget.getTargetLowering(),
+ /*FirstMaskArgument=*/std::nullopt))
+ return true;
+
+ StackSize = State.getStackSize();
+ return false;
}
};
struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
RISCVOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB)
- : OutgoingValueHandler(B, MRI), MIB(MIB) {}
-
- MachineInstrBuilder MIB;
-
+ : OutgoingValueHandler(B, MRI), MIB(MIB),
+ Subtarget(MIRBuilder.getMF().getSubtarget<RISCVSubtarget>()) {}
Register getStackAddress(uint64_t MemSize, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) override {
- llvm_unreachable("not implemented");
+ MachineFunction &MF = MIRBuilder.getMF();
+ LLT p0 = LLT::pointer(0, Subtarget.getXLen());
+ LLT sXLen = LLT::scalar(Subtarget.getXLen());
+
+ if (!SPReg)
+ SPReg = MIRBuilder.buildCopy(p0, Register(RISCV::X2)).getReg(0);
+
+ auto OffsetReg = MIRBuilder.buildConstant(sXLen, Offset);
+
+ auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
+
+ MPO = MachinePointerInfo::getStack(MF, Offset);
+ return AddrReg.getReg(0);
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
- llvm_unreachable("not implemented");
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
+ MachineFunction &MF = MIRBuilder.getMF();
+ uint64_t LocMemOffset = VA.getLocMemOffset();
+
+ // TODO: Move StackAlignment to subtarget and share with FrameLowering.
+ auto MMO =
+ MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
+ commonAlignment(Align(16), LocMemOffset));
+
+ Register ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
+ // If we're passing an f32 value into an i64, anyextend before copying.
+ if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
+ ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(64), ValVReg).getReg(0);
+
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}
+
+ unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
+ assert(VAs.size() >= 2 && "Expected at least 2 VAs.");
+ const CCValAssign &VALo = VAs[0];
+ const CCValAssign &VAHi = VAs[1];
+
+ assert(VAHi.needsCustom() && "Value doesn't need custom handling");
+ assert(VALo.getValNo() == VAHi.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 &&
+ VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 &&
+ "unexpected custom value");
+
+ Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+ MIRBuilder.buildUnmerge(NewRegs, Arg.Regs[0]);
+
+ if (VAHi.isMemLoc()) {
+ LLT MemTy(VAHi.getLocVT());
+
+ MachinePointerInfo MPO;
+ Register StackAddr = getStackAddress(
+ MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]);
+
+ assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO,
+ const_cast<CCValAssign &>(VAHi));
+ }
+
+ auto assignFunc = [=]() {
+ assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo);
+ if (VAHi.isRegLoc())
+ assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi);
+ };
+
+ if (Thunk) {
+ *Thunk = assignFunc;
+ return 1;
+ }
+
+ assignFunc();
+ return 1;
+ }
+
+private:
+ MachineInstrBuilder MIB;
+
+ // Cache the SP register vreg if we need it more than once in this call site.
+ Register SPReg;
+
+ const RISCVSubtarget &Subtarget;
};
struct RISCVIncomingValueAssigner : public CallLowering::IncomingValueAssigner {
@@ -103,33 +186,105 @@ public:
const DataLayout &DL = MF.getDataLayout();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
- return RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
- LocInfo, Flags, State, /*IsFixed=*/true, IsRet,
- Info.Ty, *Subtarget.getTargetLowering(),
- /*FirstMaskArgument=*/std::nullopt);
+ if (LocVT.isScalableVector())
+ MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
+
+ if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
+ LocInfo, Flags, State, /*IsFixed=*/true, IsRet, Info.Ty,
+ *Subtarget.getTargetLowering(),
+ /*FirstMaskArgument=*/std::nullopt))
+ return true;
+
+ StackSize = State.getStackSize();
+ return false;
}
};
struct RISCVIncomingValueHandler : public CallLowering::IncomingValueHandler {
RISCVIncomingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
- : IncomingValueHandler(B, MRI) {}
+ : IncomingValueHandler(B, MRI),
+ Subtarget(MIRBuilder.getMF().getSubtarget<RISCVSubtarget>()) {}
Register getStackAddress(uint64_t MemSize, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) override {
- llvm_unreachable("not implemented");
+ MachineFrameInfo &MFI = MIRBuilder.getMF().getFrameInfo();
+
+ int FI = MFI.CreateFixedObject(MemSize, Offset, /*Immutable=*/true);
+ MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+ return MIRBuilder.buildFrameIndex(LLT::pointer(0, Subtarget.getXLen()), FI)
+ .getReg(0);
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
- llvm_unreachable("not implemented");
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
+ MachineFunction &MF = MIRBuilder.getMF();
+ auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy,
+ inferAlignFromPtrInfo(MF, MPO));
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
- // Copy argument received in physical register to desired VReg.
+ const CCValAssign &VA) override {
+ markPhysRegUsed(PhysReg);
+ IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
+ }
+
+ unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
+ assert(VAs.size() >= 2 && "Expected at least 2 VAs.");
+ const CCValAssign &VALo = VAs[0];
+ const CCValAssign &VAHi = VAs[1];
+
+ assert(VAHi.needsCustom() && "Value doesn't need custom handling");
+ assert(VALo.getValNo() == VAHi.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 &&
+ VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 &&
+ "unexpected custom value");
+
+ Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+
+ if (VAHi.isMemLoc()) {
+ LLT MemTy(VAHi.getLocVT());
+
+ MachinePointerInfo MPO;
+ Register StackAddr = getStackAddress(
+ MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]);
+
+ assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO,
+ const_cast<CCValAssign &>(VAHi));
+ }
+
+ assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo);
+ if (VAHi.isRegLoc())
+ assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi);
+
+ MIRBuilder.buildMergeLikeInstr(Arg.Regs[0], NewRegs);
+
+ return 1;
+ }
+
+ /// How the physical register gets marked varies between formal
+ /// parameters (it's a basic-block live-in), and a call instruction
+ /// (it's an implicit-def of the BL).
+ virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
+
+private:
+ const RISCVSubtarget &Subtarget;
+};
+
+struct RISCVFormalArgHandler : public RISCVIncomingValueHandler {
+ RISCVFormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
+ : RISCVIncomingValueHandler(B, MRI) {}
+
+ void markPhysRegUsed(MCRegister PhysReg) override {
+ MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
- MIRBuilder.buildCopy(ValVReg, PhysReg);
}
};
@@ -138,14 +293,11 @@ struct RISCVCallReturnHandler : public RISCVIncomingValueHandler {
MachineInstrBuilder &MIB)
: RISCVIncomingValueHandler(B, MRI), MIB(MIB) {}
- MachineInstrBuilder MIB;
-
- void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
- // Copy argument received in physical register to desired VReg.
+ void markPhysRegUsed(MCRegister PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
- MIRBuilder.buildCopy(ValVReg, PhysReg);
}
+
+ MachineInstrBuilder MIB;
};
} // namespace
@@ -153,6 +305,80 @@ struct RISCVCallReturnHandler : public RISCVIncomingValueHandler {
RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI)
: CallLowering(&TLI) {}
+/// Return true if scalable vector with ScalarTy is legal for lowering.
+static bool isLegalElementTypeForRVV(Type *EltTy,
+ const RISCVSubtarget &Subtarget) {
+ if (EltTy->isPointerTy())
+ return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
+ if (EltTy->isIntegerTy(1) || EltTy->isIntegerTy(8) ||
+ EltTy->isIntegerTy(16) || EltTy->isIntegerTy(32))
+ return true;
+ if (EltTy->isIntegerTy(64))
+ return Subtarget.hasVInstructionsI64();
+ if (EltTy->isHalfTy())
+ return Subtarget.hasVInstructionsF16();
+ if (EltTy->isBFloatTy())
+ return Subtarget.hasVInstructionsBF16();
+ if (EltTy->isFloatTy())
+ return Subtarget.hasVInstructionsF32();
+ if (EltTy->isDoubleTy())
+ return Subtarget.hasVInstructionsF64();
+ return false;
+}
+
+// TODO: Support all argument types.
+// TODO: Remove IsLowerArgs argument by adding support for vectors in lowerCall.
+static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget,
+ bool IsLowerArgs = false) {
+ // TODO: Integers larger than 2*XLen are passed indirectly which is not
+ // supported yet.
+ if (T->isIntegerTy())
+ return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+ if (T->isFloatTy() || T->isDoubleTy())
+ return true;
+ if (T->isPointerTy())
+ return true;
+ // TODO: Support fixed vector types.
+ if (IsLowerArgs && T->isVectorTy() && Subtarget.hasVInstructions() &&
+ T->isScalableTy() &&
+ isLegalElementTypeForRVV(T->getScalarType(), Subtarget))
+ return true;
+ return false;
+}
+
+// TODO: Only integer, pointer and aggregate types are supported now.
+// TODO: Remove IsLowerRetVal argument by adding support for vectors in
+// lowerCall.
+static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget,
+ bool IsLowerRetVal = false) {
+ // TODO: Integers larger than 2*XLen are passed indirectly which is not
+ // supported yet.
+ if (T->isIntegerTy())
+ return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+ if (T->isFloatTy() || T->isDoubleTy())
+ return true;
+ if (T->isPointerTy())
+ return true;
+
+ if (T->isArrayTy())
+ return isSupportedReturnType(T->getArrayElementType(), Subtarget);
+
+ if (T->isStructTy()) {
+ auto StructT = cast<StructType>(T);
+ for (unsigned i = 0, e = StructT->getNumElements(); i != e; ++i)
+ if (!isSupportedReturnType(StructT->getElementType(i), Subtarget))
+ return false;
+ return true;
+ }
+
+ if (IsLowerRetVal && T->isVectorTy() && Subtarget.hasVInstructions() &&
+ T->isScalableTy() &&
+ isLegalElementTypeForRVV(T->getScalarType(), Subtarget))
+ return true;
+
+ return false;
+}
+
bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
const Value *Val,
ArrayRef<Register> VRegs,
@@ -160,8 +386,9 @@ bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
if (!Val)
return true;
- // TODO: Only integer, pointer and aggregate types are supported now.
- if (!Val->getType()->isIntOrPtrTy() && !Val->getType()->isAggregateType())
+ const RISCVSubtarget &Subtarget =
+ MIRBuilder.getMF().getSubtarget<RISCVSubtarget>();
+ if (!isSupportedReturnType(Val->getType(), Subtarget, /*IsLowerRetVal=*/true))
return false;
MachineFunction &MF = MIRBuilder.getMF();
@@ -196,25 +423,89 @@ bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
return true;
}
+/// If there are varargs that were passed in a0-a7, the data in those registers
+/// must be copied to the varargs save area on the stack.
+void RISCVCallLowering::saveVarArgRegisters(
+ MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
+ IncomingValueAssigner &Assigner, CCState &CCInfo) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ unsigned XLenInBytes = Subtarget.getXLen() / 8;
+ ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+
+ // Size of the vararg save area. For now, the varargs save area is either
+ // zero or large enough to hold a0-a7.
+ int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
+ int FI;
+
+ // If all registers are allocated, then all varargs must be passed on the
+ // stack and we don't need to save any argregs.
+ if (VarArgsSaveSize == 0) {
+ int VaArgOffset = Assigner.StackSize;
+ FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
+ } else {
+ int VaArgOffset = -VarArgsSaveSize;
+ FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
+
+ // If saving an odd number of registers then create an extra stack slot to
+ // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
+ // offsets to even-numbered registered remain 2*XLEN-aligned.
+ if (Idx % 2) {
+ MFI.CreateFixedObject(XLenInBytes,
+ VaArgOffset - static_cast<int>(XLenInBytes), true);
+ VarArgsSaveSize += XLenInBytes;
+ }
+
+ const LLT p0 = LLT::pointer(MF.getDataLayout().getAllocaAddrSpace(),
+ Subtarget.getXLen());
+ const LLT sXLen = LLT::scalar(Subtarget.getXLen());
+
+ auto FIN = MIRBuilder.buildFrameIndex(p0, FI);
+ auto Offset = MIRBuilder.buildConstant(
+ MRI.createGenericVirtualRegister(sXLen), XLenInBytes);
+
+ // Copy the integer registers that may have been used for passing varargs
+ // to the vararg save area.
+ const MVT XLenVT = Subtarget.getXLenVT();
+ for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
+ const Register VReg = MRI.createGenericVirtualRegister(sXLen);
+ Handler.assignValueToReg(
+ VReg, ArgRegs[I],
+ CCValAssign::getReg(I + MF.getFunction().getNumOperands(), XLenVT,
+ ArgRegs[I], XLenVT, CCValAssign::Full));
+ auto MPO =
+ MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes);
+ MIRBuilder.buildStore(VReg, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));
+ FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
+ FIN.getReg(0), Offset);
+ }
+ }
+
+ // Record the frame index of the first variable argument which is a value
+ // necessary to G_VASTART.
+ RVFI->setVarArgsFrameIndex(FI);
+ RVFI->setVarArgsSaveSize(VarArgsSaveSize);
+}
+
bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<ArrayRef<Register>> VRegs,
FunctionLoweringInfo &FLI) const {
- // Early exit if there are no arguments.
- if (F.arg_empty())
+ // Early exit if there are no arguments. varargs are not part of F.args() but
+ // must be lowered.
+ if (F.arg_empty() && !F.isVarArg())
return true;
- // TODO: Support vararg functions.
- if (F.isVarArg())
- return false;
-
- // TODO: Support all argument types.
+ const RISCVSubtarget &Subtarget =
+ MIRBuilder.getMF().getSubtarget<RISCVSubtarget>();
for (auto &Arg : F.args()) {
- if (Arg.getType()->isIntegerTy())
- continue;
- if (Arg.getType()->isPointerTy())
- continue;
- return false;
+ if (!isSupportedArgumentType(Arg.getType(), Subtarget,
+ /*IsLowerArgs=*/true))
+ return false;
}
MachineFunction &MF = MIRBuilder.getMF();
@@ -239,10 +530,18 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
RISCVIncomingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/false);
- RISCVIncomingValueHandler Handler(MIRBuilder, MF.getRegInfo());
+ RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo());
- return determineAndHandleAssignments(Handler, Assigner, SplitArgInfos,
- MIRBuilder, CC, F.isVarArg());
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, F.isVarArg(), MIRBuilder.getMF(), ArgLocs, F.getContext());
+ if (!determineAssignments(Assigner, SplitArgInfos, CCInfo) ||
+ !handleAssignments(Handler, SplitArgInfos, CCInfo, ArgLocs, MIRBuilder))
+ return false;
+
+ if (F.isVarArg())
+ saveVarArgRegisters(MIRBuilder, Handler, Assigner, CCInfo);
+
+ return true;
}
bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
@@ -252,21 +551,20 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const Function &F = MF.getFunction();
CallingConv::ID CC = F.getCallingConv();
- // TODO: Support vararg functions.
- if (Info.IsVarArg)
- return false;
-
- // TODO: Support all argument types.
+ const RISCVSubtarget &Subtarget =
+ MIRBuilder.getMF().getSubtarget<RISCVSubtarget>();
for (auto &AInfo : Info.OrigArgs) {
- if (AInfo.Ty->isIntegerTy())
- continue;
- if (AInfo.Ty->isPointerTy())
- continue;
- if (AInfo.Ty->isFloatingPointTy())
- continue;
- return false;
+ if (!isSupportedArgumentType(AInfo.Ty, Subtarget))
+ return false;
}
+ if (!Info.OrigRet.Ty->isVoidTy() &&
+ !isSupportedReturnType(Info.OrigRet.Ty, Subtarget))
+ return false;
+
+ MachineInstrBuilder CallSeqStart =
+ MIRBuilder.buildInstr(RISCV::ADJCALLSTACKDOWN);
+
SmallVector<ArgInfo, 32> SplitArgInfos;
SmallVector<ISD::OutputArg, 8> Outs;
for (auto &AInfo : Info.OrigArgs) {
@@ -279,14 +577,17 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// TODO: Support tail calls.
Info.IsTailCall = false;
+ // Select the recommended relocation type R_RISCV_CALL_PLT.
if (!Info.Callee.isReg())
- Info.Callee.setTargetFlags(RISCVII::MO_CALL);
+ Info.Callee.setTargetFlags(RISCVII::MO_PLT);
MachineInstrBuilder Call =
MIRBuilder
.buildInstrNoInsert(Info.Callee.isReg() ? RISCV::PseudoCALLIndirect
: RISCV::PseudoCALL)
.add(Info.Callee);
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ Call.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));
RISCVOutgoingValueAssigner ArgAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
@@ -298,22 +599,26 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIRBuilder.insertInstr(Call);
+ CallSeqStart.addImm(ArgAssigner.StackSize).addImm(0);
+ MIRBuilder.buildInstr(RISCV::ADJCALLSTACKUP)
+ .addImm(ArgAssigner.StackSize)
+ .addImm(0);
+
+ // If Callee is a reg, since it is used by a target specific
+ // instruction, it must have a register class matching the
+ // constraint of that instruction.
+ if (Call->getOperand(0).isReg())
+ constrainOperandRegClass(MF, *TRI, MF.getRegInfo(),
+ *Subtarget.getInstrInfo(),
+ *Subtarget.getRegBankInfo(), *Call,
+ Call->getDesc(), Call->getOperand(0), 0);
+
if (Info.OrigRet.Ty->isVoidTy())
return true;
- // TODO: Only integer, pointer and aggregate types are supported now.
- if (!Info.OrigRet.Ty->isIntOrPtrTy() && !Info.OrigRet.Ty->isAggregateType())
- return false;
-
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC);
- // Assignments should be handled *before* the merging of values takes place.
- // To ensure this, the insert point is temporarily adjusted to just after the
- // call instruction.
- MachineBasicBlock::iterator CallInsertPt = Call;
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), std::next(CallInsertPt));
-
RISCVIncomingValueAssigner RetAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/true);
@@ -322,8 +627,5 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIRBuilder, CC, Info.IsVarArg))
return false;
- // Readjust insert point to end of basic block.
- MIRBuilder.setMBB(MIRBuilder.getMBB());
-
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
index d80a666f3489..abe704b4a645 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
@@ -42,6 +42,11 @@ public:
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
ArrayRef<Register> VRegs, MachineInstrBuilder &Ret) const;
+
+ void saveVarArgRegisters(MachineIRBuilder &MIRBuilder,
+ CallLowering::IncomingValueHandler &Handler,
+ IncomingValueAssigner &Assigner,
+ CCState &CCInfo) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 691439b3a18b..61bdbfc47d94 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -11,17 +11,23 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCVRegisterBankInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "riscv-isel"
using namespace llvm;
+using namespace MIPatternMatch;
#define GET_GLOBALISEL_PREDICATE_BITSET
#include "RISCVGenGlobalISel.inc"
@@ -35,16 +41,86 @@ public:
const RISCVSubtarget &STI,
const RISCVRegisterBankInfo &RBI);
- bool select(MachineInstr &I) override;
+ bool select(MachineInstr &MI) override;
static const char *getName() { return DEBUG_TYPE; }
private:
+ const TargetRegisterClass *
+ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) const;
+
+ bool isRegInGprb(Register Reg, MachineRegisterInfo &MRI) const;
+ bool isRegInFprb(Register Reg, MachineRegisterInfo &MRI) const;
+
+ // tblgen-erated 'select' implementation, used as the initial selector for
+ // the patterns that don't require complex C++.
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+ // A lowering phase that runs before any selection attempts.
+ // Returns true if the instruction was modified.
+ void preISelLower(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI);
+
+ bool replacePtrWithInt(MachineOperand &Op, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI);
+
+ // Custom selection methods
+ bool selectCopy(MachineInstr &MI, MachineRegisterInfo &MRI) const;
+ bool selectImplicitDef(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ bool materializeImm(Register Reg, int64_t Imm, MachineIRBuilder &MIB) const;
+ bool selectAddr(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI, bool IsLocal = true,
+ bool IsExternWeak = false) const;
+ bool selectSExtInreg(MachineInstr &MI, MachineIRBuilder &MIB) const;
+ bool selectSelect(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ bool selectFPCompare(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ bool selectIntrinsicWithSideEffects(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ void emitFence(AtomicOrdering FenceOrdering, SyncScope::ID FenceSSID,
+ MachineIRBuilder &MIB) const;
+ bool selectMergeValues(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ bool selectUnmergeValues(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+
+ ComplexRendererFns selectShiftMask(MachineOperand &Root) const;
+ ComplexRendererFns selectAddrRegImm(MachineOperand &Root) const;
+
+ ComplexRendererFns selectSHXADDOp(MachineOperand &Root, unsigned ShAmt) const;
+ template <unsigned ShAmt>
+ ComplexRendererFns selectSHXADDOp(MachineOperand &Root) const {
+ return selectSHXADDOp(Root, ShAmt);
+ }
+
+ ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root,
+ unsigned ShAmt) const;
+ template <unsigned ShAmt>
+ ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root) const {
+ return selectSHXADD_UWOp(Root, ShAmt);
+ }
+
+ // Custom renderers for tablegen
+ void renderNegImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderImmSubFromXLen(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderImmSubFrom32(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderImmPlus1(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
+ void renderTrailingZeros(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
const RISCVSubtarget &STI;
const RISCVInstrInfo &TII;
const RISCVRegisterInfo &TRI;
const RISCVRegisterBankInfo &RBI;
+ const RISCVTargetMachine &TM;
// FIXME: This is necessary because DAGISel uses "Subtarget->" and GlobalISel
// uses "STI." in the code generated by TableGen. We need to unify the name of
@@ -70,6 +146,7 @@ RISCVInstructionSelector::RISCVInstructionSelector(
const RISCVTargetMachine &TM, const RISCVSubtarget &STI,
const RISCVRegisterBankInfo &RBI)
: STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI),
+ TM(TM),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "RISCVGenGlobalISel.inc"
@@ -80,19 +157,1111 @@ RISCVInstructionSelector::RISCVInstructionSelector(
{
}
-bool RISCVInstructionSelector::select(MachineInstr &I) {
+InstructionSelector::ComplexRendererFns
+RISCVInstructionSelector::selectShiftMask(MachineOperand &Root) const {
+ // TODO: Also check if we are seeing the result of an AND operation which
+ // could be bypassed since we only check the lower log2(xlen) bits.
+ return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+RISCVInstructionSelector::selectSHXADDOp(MachineOperand &Root,
+ unsigned ShAmt) const {
+ using namespace llvm::MIPatternMatch;
+ MachineFunction &MF = *Root.getParent()->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (!Root.isReg())
+ return std::nullopt;
+ Register RootReg = Root.getReg();
+
+ const unsigned XLen = STI.getXLen();
+ APInt Mask, C2;
+ Register RegY;
+ std::optional<bool> LeftShift;
+ // (and (shl y, c2), mask)
+ if (mi_match(RootReg, MRI,
+ m_GAnd(m_GShl(m_Reg(RegY), m_ICst(C2)), m_ICst(Mask))))
+ LeftShift = true;
+ // (and (lshr y, c2), mask)
+ else if (mi_match(RootReg, MRI,
+ m_GAnd(m_GLShr(m_Reg(RegY), m_ICst(C2)), m_ICst(Mask))))
+ LeftShift = false;
+
+ if (LeftShift.has_value()) {
+ if (*LeftShift)
+ Mask &= maskTrailingZeros<uint64_t>(C2.getLimitedValue());
+ else
+ Mask &= maskTrailingOnes<uint64_t>(XLen - C2.getLimitedValue());
+
+ if (Mask.isShiftedMask()) {
+ unsigned Leading = XLen - Mask.getActiveBits();
+ unsigned Trailing = Mask.countr_zero();
+ // Given (and (shl y, c2), mask) in which mask has no leading zeros and
+ // c3 trailing zeros. We can use an SRLI by c3 - c2 followed by a SHXADD.
+ if (*LeftShift && Leading == 0 && C2.ult(Trailing) && Trailing == ShAmt) {
+ Register DstReg =
+ MRI.createGenericVirtualRegister(MRI.getType(RootReg));
+ return {{[=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(RISCV::SRLI, {DstReg}, {RegY})
+ .addImm(Trailing - C2.getLimitedValue());
+ MIB.addReg(DstReg);
+ }}};
+ }
+
+ // Given (and (lshr y, c2), mask) in which mask has c2 leading zeros and
+ // c3 trailing zeros. We can use an SRLI by c2 + c3 followed by a SHXADD.
+ if (!*LeftShift && Leading == C2 && Trailing == ShAmt) {
+ Register DstReg =
+ MRI.createGenericVirtualRegister(MRI.getType(RootReg));
+ return {{[=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(RISCV::SRLI, {DstReg}, {RegY})
+ .addImm(Leading + Trailing);
+ MIB.addReg(DstReg);
+ }}};
+ }
+ }
+ }
+
+ LeftShift.reset();
+
+ // (shl (and y, mask), c2)
+ if (mi_match(RootReg, MRI,
+ m_GShl(m_OneNonDBGUse(m_GAnd(m_Reg(RegY), m_ICst(Mask))),
+ m_ICst(C2))))
+ LeftShift = true;
+ // (lshr (and y, mask), c2)
+ else if (mi_match(RootReg, MRI,
+ m_GLShr(m_OneNonDBGUse(m_GAnd(m_Reg(RegY), m_ICst(Mask))),
+ m_ICst(C2))))
+ LeftShift = false;
+
+ if (LeftShift.has_value() && Mask.isShiftedMask()) {
+ unsigned Leading = XLen - Mask.getActiveBits();
+ unsigned Trailing = Mask.countr_zero();
+
+ // Given (shl (and y, mask), c2) in which mask has 32 leading zeros and
+ // c3 trailing zeros. If c1 + c3 == ShAmt, we can emit SRLIW + SHXADD.
+ bool Cond = *LeftShift && Leading == 32 && Trailing > 0 &&
+ (Trailing + C2.getLimitedValue()) == ShAmt;
+ if (!Cond)
+ // Given (lshr (and y, mask), c2) in which mask has 32 leading zeros and
+ // c3 trailing zeros. If c3 - c1 == ShAmt, we can emit SRLIW + SHXADD.
+ Cond = !*LeftShift && Leading == 32 && C2.ult(Trailing) &&
+ (Trailing - C2.getLimitedValue()) == ShAmt;
+
+ if (Cond) {
+ Register DstReg = MRI.createGenericVirtualRegister(MRI.getType(RootReg));
+ return {{[=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(RISCV::SRLIW, {DstReg}, {RegY})
+ .addImm(Trailing);
+ MIB.addReg(DstReg);
+ }}};
+ }
+ }
+
+ return std::nullopt;
+}
+
+InstructionSelector::ComplexRendererFns
+RISCVInstructionSelector::selectSHXADD_UWOp(MachineOperand &Root,
+ unsigned ShAmt) const {
+ using namespace llvm::MIPatternMatch;
+ MachineFunction &MF = *Root.getParent()->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (!Root.isReg())
+ return std::nullopt;
+ Register RootReg = Root.getReg();
+
+ // Given (and (shl x, c2), mask) in which mask is a shifted mask with
+ // 32 - ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
+ // c2 - ShAmt followed by SHXADD_UW with ShAmt for x amount.
+ APInt Mask, C2;
+ Register RegX;
+ if (mi_match(
+ RootReg, MRI,
+ m_OneNonDBGUse(m_GAnd(m_OneNonDBGUse(m_GShl(m_Reg(RegX), m_ICst(C2))),
+ m_ICst(Mask))))) {
+ Mask &= maskTrailingZeros<uint64_t>(C2.getLimitedValue());
+
+ if (Mask.isShiftedMask()) {
+ unsigned Leading = Mask.countl_zero();
+ unsigned Trailing = Mask.countr_zero();
+ if (Leading == 32 - ShAmt && C2 == Trailing && Trailing > ShAmt) {
+ Register DstReg =
+ MRI.createGenericVirtualRegister(MRI.getType(RootReg));
+ return {{[=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(RISCV::SLLI, {DstReg}, {RegX})
+ .addImm(C2.getLimitedValue() - ShAmt);
+ MIB.addReg(DstReg);
+ }}};
+ }
+ }
+ }
+
+ return std::nullopt;
+}
+
+InstructionSelector::ComplexRendererFns
+RISCVInstructionSelector::selectAddrRegImm(MachineOperand &Root) const {
+ MachineFunction &MF = *Root.getParent()->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (!Root.isReg())
+ return std::nullopt;
+
+ MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
+ if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
+ }};
+ }
+
+ if (isBaseWithConstantOffset(Root, MRI)) {
+ MachineOperand &LHS = RootDef->getOperand(1);
+ MachineOperand &RHS = RootDef->getOperand(2);
+ MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
+ MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
+
+ int64_t RHSC = RHSDef->getOperand(1).getCImm()->getSExtValue();
+ if (isInt<12>(RHSC)) {
+ if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
+ }};
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }}};
+ }
+ }
+
+ // TODO: Need to get the immediate from a G_PTR_ADD. Should this be done in
+ // the combiner?
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }}};
+}
+
+/// Returns the RISCVCC::CondCode that corresponds to the CmpInst::Predicate CC.
+/// CC Must be an ICMP Predicate.
+static RISCVCC::CondCode getRISCVCCFromICmp(CmpInst::Predicate CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Expected ICMP CmpInst::Predicate.");
+ case CmpInst::Predicate::ICMP_EQ:
+ return RISCVCC::COND_EQ;
+ case CmpInst::Predicate::ICMP_NE:
+ return RISCVCC::COND_NE;
+ case CmpInst::Predicate::ICMP_ULT:
+ return RISCVCC::COND_LTU;
+ case CmpInst::Predicate::ICMP_SLT:
+ return RISCVCC::COND_LT;
+ case CmpInst::Predicate::ICMP_UGE:
+ return RISCVCC::COND_GEU;
+ case CmpInst::Predicate::ICMP_SGE:
+ return RISCVCC::COND_GE;
+ }
+}
+
+static void getOperandsForBranch(Register CondReg, MachineRegisterInfo &MRI,
+ RISCVCC::CondCode &CC, Register &LHS,
+ Register &RHS) {
+ // Try to fold an ICmp. If that fails, use a NE compare with X0.
+ CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
+ if (!mi_match(CondReg, MRI, m_GICmp(m_Pred(Pred), m_Reg(LHS), m_Reg(RHS)))) {
+ LHS = CondReg;
+ RHS = RISCV::X0;
+ CC = RISCVCC::COND_NE;
+ return;
+ }
+
+ // We found an ICmp, do some canonicalizations.
+
+ // Adjust comparisons to use comparison with 0 if possible.
+ if (auto Constant = getIConstantVRegSExtVal(RHS, MRI)) {
+ switch (Pred) {
+ case CmpInst::Predicate::ICMP_SGT:
+ // Convert X > -1 to X >= 0
+ if (*Constant == -1) {
+ CC = RISCVCC::COND_GE;
+ RHS = RISCV::X0;
+ return;
+ }
+ break;
+ case CmpInst::Predicate::ICMP_SLT:
+ // Convert X < 1 to 0 >= X
+ if (*Constant == 1) {
+ CC = RISCVCC::COND_GE;
+ RHS = LHS;
+ LHS = RISCV::X0;
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (Pred) {
+ default:
+ llvm_unreachable("Expected ICMP CmpInst::Predicate.");
+ case CmpInst::Predicate::ICMP_EQ:
+ case CmpInst::Predicate::ICMP_NE:
+ case CmpInst::Predicate::ICMP_ULT:
+ case CmpInst::Predicate::ICMP_SLT:
+ case CmpInst::Predicate::ICMP_UGE:
+ case CmpInst::Predicate::ICMP_SGE:
+ // These CCs are supported directly by RISC-V branches.
+ break;
+ case CmpInst::Predicate::ICMP_SGT:
+ case CmpInst::Predicate::ICMP_SLE:
+ case CmpInst::Predicate::ICMP_UGT:
+ case CmpInst::Predicate::ICMP_ULE:
+ // These CCs are not supported directly by RISC-V branches, but changing the
+ // direction of the CC and swapping LHS and RHS are.
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ std::swap(LHS, RHS);
+ break;
+ }
+
+ CC = getRISCVCCFromICmp(Pred);
+ return;
+}
+
+bool RISCVInstructionSelector::select(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineIRBuilder MIB(MI);
+
+ preISelLower(MI, MIB, MRI);
+ const unsigned Opc = MI.getOpcode();
+
+ if (!MI.isPreISelOpcode() || Opc == TargetOpcode::G_PHI) {
+ if (Opc == TargetOpcode::PHI || Opc == TargetOpcode::G_PHI) {
+ const Register DefReg = MI.getOperand(0).getReg();
+ const LLT DefTy = MRI.getType(DefReg);
+
+ const RegClassOrRegBank &RegClassOrBank =
+ MRI.getRegClassOrRegBank(DefReg);
+
+ const TargetRegisterClass *DefRC =
+ RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+ if (!DefRC) {
+ if (!DefTy.isValid()) {
+ LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
+ return false;
+ }
+
+ const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
+ DefRC = getRegClassForTypeOnBank(DefTy, RB);
+ if (!DefRC) {
+ LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
+ return false;
+ }
+ }
+
+ MI.setDesc(TII.get(TargetOpcode::PHI));
+ return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
+ }
- if (!isPreISelGenericOpcode(I.getOpcode())) {
// Certain non-generic instructions also need some special handling.
+ if (MI.isCopy())
+ return selectCopy(MI, MRI);
+
+ return true;
+ }
+
+ if (selectImpl(MI, *CoverageInfo))
+ return true;
+
+ switch (Opc) {
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_PTRTOINT:
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_TRUNC:
+ return selectCopy(MI, MRI);
+ case TargetOpcode::G_CONSTANT: {
+ Register DstReg = MI.getOperand(0).getReg();
+ int64_t Imm = MI.getOperand(1).getCImm()->getSExtValue();
+
+ if (!materializeImm(DstReg, Imm, MIB))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_FCONSTANT: {
+ // TODO: Use constant pool for complext constants.
+ // TODO: Optimize +0.0 to use fcvt.d.w for s64 on rv32.
+ Register DstReg = MI.getOperand(0).getReg();
+ const APFloat &FPimm = MI.getOperand(1).getFPImm()->getValueAPF();
+ APInt Imm = FPimm.bitcastToAPInt();
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
+ if (Size == 32 || (Size == 64 && Subtarget->is64Bit())) {
+ Register GPRReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ if (!materializeImm(GPRReg, Imm.getSExtValue(), MIB))
+ return false;
+
+ unsigned Opcode = Size == 64 ? RISCV::FMV_D_X : RISCV::FMV_W_X;
+ auto FMV = MIB.buildInstr(Opcode, {DstReg}, {GPRReg});
+ if (!FMV.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else {
+ assert(Size == 64 && !Subtarget->is64Bit() &&
+ "Unexpected size or subtarget");
+ // Split into two pieces and build through the stack.
+ Register GPRRegHigh = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ Register GPRRegLow = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ if (!materializeImm(GPRRegHigh, Imm.extractBits(32, 32).getSExtValue(),
+ MIB))
+ return false;
+ if (!materializeImm(GPRRegLow, Imm.trunc(32).getSExtValue(), MIB))
+ return false;
+ MachineInstrBuilder PairF64 = MIB.buildInstr(
+ RISCV::BuildPairF64Pseudo, {DstReg}, {GPRRegLow, GPRRegHigh});
+ if (!PairF64.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ auto *GV = MI.getOperand(1).getGlobal();
+ if (GV->isThreadLocal()) {
+ // TODO: implement this case.
+ return false;
+ }
+
+ return selectAddr(MI, MIB, MRI, GV->isDSOLocal(),
+ GV->hasExternalWeakLinkage());
+ }
+ case TargetOpcode::G_JUMP_TABLE:
+ case TargetOpcode::G_CONSTANT_POOL:
+ return selectAddr(MI, MIB, MRI);
+ case TargetOpcode::G_BRCOND: {
+ Register LHS, RHS;
+ RISCVCC::CondCode CC;
+ getOperandsForBranch(MI.getOperand(0).getReg(), MRI, CC, LHS, RHS);
+
+ auto Bcc = MIB.buildInstr(RISCVCC::getBrCond(CC), {}, {LHS, RHS})
+ .addMBB(MI.getOperand(1).getMBB());
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
+ }
+ case TargetOpcode::G_BRJT: {
+ // FIXME: Move to legalization?
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout());
+ assert((EntrySize == 4 || (Subtarget->is64Bit() && EntrySize == 8)) &&
+ "Unsupported jump-table entry size");
+ assert(
+ (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_Custom32 ||
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_BlockAddress) &&
+ "Unexpected jump-table entry kind");
+
+ auto SLL =
+ MIB.buildInstr(RISCV::SLLI, {&RISCV::GPRRegClass}, {MI.getOperand(2)})
+ .addImm(Log2_32(EntrySize));
+ if (!SLL.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ // TODO: Use SHXADD. Moving to legalization would fix this automatically.
+ auto ADD = MIB.buildInstr(RISCV::ADD, {&RISCV::GPRRegClass},
+ {MI.getOperand(0), SLL.getReg(0)});
+ if (!ADD.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ unsigned LdOpc = EntrySize == 8 ? RISCV::LD : RISCV::LW;
+ auto Dest =
+ MIB.buildInstr(LdOpc, {&RISCV::GPRRegClass}, {ADD.getReg(0)})
+ .addImm(0)
+ .addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(MF), MachineMemOperand::MOLoad,
+ EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout()))));
+ if (!Dest.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ // If the Kind is EK_LabelDifference32, the table stores an offset from
+ // the location of the table. Add the table address to get an absolute
+ // address.
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) {
+ Dest = MIB.buildInstr(RISCV::ADD, {&RISCV::GPRRegClass},
+ {Dest.getReg(0), MI.getOperand(0)});
+ if (!Dest.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+
+ auto Branch =
+ MIB.buildInstr(RISCV::PseudoBRIND, {}, {Dest.getReg(0)}).addImm(0);
+ if (!Branch.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_BRINDIRECT:
+ MI.setDesc(TII.get(RISCV::PseudoBRIND));
+ MI.addOperand(MachineOperand::CreateImm(0));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ case TargetOpcode::G_SEXT_INREG:
+ return selectSExtInreg(MI, MIB);
+ case TargetOpcode::G_FRAME_INDEX: {
+ // TODO: We may want to replace this code with the SelectionDAG patterns,
+ // which fail to get imported because it uses FrameAddrRegImm, which is a
+ // ComplexPattern
+ MI.setDesc(TII.get(RISCV::ADDI));
+ MI.addOperand(MachineOperand::CreateImm(0));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+ case TargetOpcode::G_SELECT:
+ return selectSelect(MI, MIB, MRI);
+ case TargetOpcode::G_FCMP:
+ return selectFPCompare(MI, MIB, MRI);
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ return selectIntrinsicWithSideEffects(MI, MIB, MRI);
+ case TargetOpcode::G_FENCE: {
+ AtomicOrdering FenceOrdering =
+ static_cast<AtomicOrdering>(MI.getOperand(0).getImm());
+ SyncScope::ID FenceSSID =
+ static_cast<SyncScope::ID>(MI.getOperand(1).getImm());
+ emitFence(FenceOrdering, FenceSSID, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return selectImplicitDef(MI, MIB, MRI);
+ case TargetOpcode::G_MERGE_VALUES:
+ return selectMergeValues(MI, MIB, MRI);
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return selectUnmergeValues(MI, MIB, MRI);
+ default:
+ return false;
+ }
+}
+
+bool RISCVInstructionSelector::selectMergeValues(
+ MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_MERGE_VALUES);
+
+ // Build a F64 Pair from operands
+ if (MI.getNumOperands() != 3)
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ Register Lo = MI.getOperand(1).getReg();
+ Register Hi = MI.getOperand(2).getReg();
+ if (!isRegInFprb(Dst, MRI) || !isRegInGprb(Lo, MRI) || !isRegInGprb(Hi, MRI))
+ return false;
+ MI.setDesc(TII.get(RISCV::BuildPairF64Pseudo));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+}
+
+bool RISCVInstructionSelector::selectUnmergeValues(
+ MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+
+ // Split F64 Src into two s32 parts
+ if (MI.getNumOperands() != 3)
+ return false;
+ Register Src = MI.getOperand(2).getReg();
+ Register Lo = MI.getOperand(0).getReg();
+ Register Hi = MI.getOperand(1).getReg();
+ if (!isRegInFprb(Src, MRI) || !isRegInGprb(Lo, MRI) || !isRegInGprb(Hi, MRI))
+ return false;
+ MI.setDesc(TII.get(RISCV::SplitF64Pseudo));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+}
+
+bool RISCVInstructionSelector::replacePtrWithInt(MachineOperand &Op,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) {
+ Register PtrReg = Op.getReg();
+ assert(MRI.getType(PtrReg).isPointer() && "Operand is not a pointer!");
+
+ const LLT sXLen = LLT::scalar(STI.getXLen());
+ auto PtrToInt = MIB.buildPtrToInt(sXLen, PtrReg);
+ MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(RISCV::GPRBRegBankID));
+ Op.setReg(PtrToInt.getReg(0));
+ return select(*PtrToInt);
+}
+
+void RISCVInstructionSelector::preISelLower(MachineInstr &MI,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_PTR_ADD: {
+ Register DstReg = MI.getOperand(0).getReg();
+ const LLT sXLen = LLT::scalar(STI.getXLen());
+
+ replacePtrWithInt(MI.getOperand(1), MIB, MRI);
+ MI.setDesc(TII.get(TargetOpcode::G_ADD));
+ MRI.setType(DstReg, sXLen);
+ break;
+ }
+ case TargetOpcode::G_PTRMASK: {
+ Register DstReg = MI.getOperand(0).getReg();
+ const LLT sXLen = LLT::scalar(STI.getXLen());
+ replacePtrWithInt(MI.getOperand(1), MIB, MRI);
+ MI.setDesc(TII.get(TargetOpcode::G_AND));
+ MRI.setType(DstReg, sXLen);
+ }
+ }
+}
+
+void RISCVInstructionSelector::renderNegImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();
+ MIB.addImm(-CstVal);
+}
+
+void RISCVInstructionSelector::renderImmSubFromXLen(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ uint64_t CstVal = MI.getOperand(1).getCImm()->getZExtValue();
+ MIB.addImm(STI.getXLen() - CstVal);
+}
+
+void RISCVInstructionSelector::renderImmSubFrom32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ uint64_t CstVal = MI.getOperand(1).getCImm()->getZExtValue();
+ MIB.addImm(32 - CstVal);
+}
+
+void RISCVInstructionSelector::renderImmPlus1(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();
+ MIB.addImm(CstVal + 1);
+}
+
+void RISCVInstructionSelector::renderImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();
+ MIB.addImm(CstVal);
+}
+
+void RISCVInstructionSelector::renderTrailingZeros(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ uint64_t C = MI.getOperand(1).getCImm()->getZExtValue();
+ MIB.addImm(llvm::countr_zero(C));
+}
+
+const TargetRegisterClass *RISCVInstructionSelector::getRegClassForTypeOnBank(
+ LLT Ty, const RegisterBank &RB) const {
+ if (RB.getID() == RISCV::GPRBRegBankID) {
+ if (Ty.getSizeInBits() <= 32 || (STI.is64Bit() && Ty.getSizeInBits() == 64))
+ return &RISCV::GPRRegClass;
+ }
+
+ if (RB.getID() == RISCV::FPRBRegBankID) {
+ if (Ty.getSizeInBits() == 32)
+ return &RISCV::FPR32RegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &RISCV::FPR64RegClass;
+ }
+
+ // TODO: Non-GPR register classes.
+ return nullptr;
+}
+
+bool RISCVInstructionSelector::isRegInGprb(Register Reg,
+ MachineRegisterInfo &MRI) const {
+ return RBI.getRegBank(Reg, MRI, TRI)->getID() == RISCV::GPRBRegBankID;
+}
+
+bool RISCVInstructionSelector::isRegInFprb(Register Reg,
+ MachineRegisterInfo &MRI) const {
+ return RBI.getRegBank(Reg, MRI, TRI)->getID() == RISCV::FPRBRegBankID;
+}
+
+bool RISCVInstructionSelector::selectCopy(MachineInstr &MI,
+ MachineRegisterInfo &MRI) const {
+ Register DstReg = MI.getOperand(0).getReg();
+
+ if (DstReg.isPhysical())
+ return true;
+
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(
+ MRI.getType(DstReg), *RBI.getRegBank(DstReg, MRI, TRI));
+ assert(DstRC &&
+ "Register class not available for LLT, register bank combination");
+
+ // No need to constrain SrcReg. It will get constrained when
+ // we hit another of its uses or its defs.
+ // Copies do not have constraints.
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(MI.getOpcode())
+ << " operand\n");
+ return false;
+ }
+
+ MI.setDesc(TII.get(RISCV::COPY));
+ return true;
+}
+
+bool RISCVInstructionSelector::selectImplicitDef(
+ MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
+
+ const Register DstReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(
+ MRI.getType(DstReg), *RBI.getRegBank(DstReg, MRI, TRI));
+
+ assert(DstRC &&
+ "Register class not available for LLT, register bank combination");
+
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(MI.getOpcode())
+ << " operand\n");
+ }
+ MI.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
+ return true;
+}
+
+bool RISCVInstructionSelector::materializeImm(Register DstReg, int64_t Imm,
+ MachineIRBuilder &MIB) const {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ if (Imm == 0) {
+ MIB.buildCopy(DstReg, Register(RISCV::X0));
+ RBI.constrainGenericRegister(DstReg, RISCV::GPRRegClass, MRI);
+ return true;
+ }
+
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, *Subtarget);
+ unsigned NumInsts = Seq.size();
+ Register SrcReg = RISCV::X0;
+
+ for (unsigned i = 0; i < NumInsts; i++) {
+ Register TmpReg = i < NumInsts - 1
+ ? MRI.createVirtualRegister(&RISCV::GPRRegClass)
+ : DstReg;
+ const RISCVMatInt::Inst &I = Seq[i];
+ MachineInstr *Result;
+
+ switch (I.getOpndKind()) {
+ case RISCVMatInt::Imm:
+ // clang-format off
+ Result = MIB.buildInstr(I.getOpcode(), {TmpReg}, {})
+ .addImm(I.getImm());
+ // clang-format on
+ break;
+ case RISCVMatInt::RegX0:
+ Result = MIB.buildInstr(I.getOpcode(), {TmpReg},
+ {SrcReg, Register(RISCV::X0)});
+ break;
+ case RISCVMatInt::RegReg:
+ Result = MIB.buildInstr(I.getOpcode(), {TmpReg}, {SrcReg, SrcReg});
+ break;
+ case RISCVMatInt::RegImm:
+ Result =
+ MIB.buildInstr(I.getOpcode(), {TmpReg}, {SrcReg}).addImm(I.getImm());
+ break;
+ }
+
+ if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI))
+ return false;
+
+ SrcReg = TmpReg;
+ }
+
+ return true;
+}
+
+bool RISCVInstructionSelector::selectAddr(MachineInstr &MI,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI,
+ bool IsLocal,
+ bool IsExternWeak) const {
+ assert((MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE ||
+ MI.getOpcode() == TargetOpcode::G_JUMP_TABLE ||
+ MI.getOpcode() == TargetOpcode::G_CONSTANT_POOL) &&
+ "Unexpected opcode");
+
+ const MachineOperand &DispMO = MI.getOperand(1);
+
+ Register DefReg = MI.getOperand(0).getReg();
+ const LLT DefTy = MRI.getType(DefReg);
+
+ // When HWASAN is used and tagging of global variables is enabled
+ // they should be accessed via the GOT, since the tagged address of a global
+ // is incompatible with existing code models. This also applies to non-pic
+ // mode.
+ if (TM.isPositionIndependent() || Subtarget->allowTaggedGlobals()) {
+ if (IsLocal && !Subtarget->allowTaggedGlobals()) {
+ // Use PC-relative addressing to access the symbol. This generates the
+ // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
+ // %pcrel_lo(auipc)).
+ MI.setDesc(TII.get(RISCV::PseudoLLA));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+
+ // Use PC-relative addressing to access the GOT for this symbol, then
+ // load the address from the GOT. This generates the pattern (PseudoLGA
+ // sym), which expands to (ld (addi (auipc %got_pcrel_hi(sym))
+ // %pcrel_lo(auipc))).
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ DefTy, Align(DefTy.getSizeInBits() / 8));
+
+ auto Result = MIB.buildInstr(RISCV::PseudoLGA, {DefReg}, {})
+ .addDisp(DispMO, 0)
+ .addMemOperand(MemOp);
+
+ if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ switch (TM.getCodeModel()) {
+ default: {
+ reportGISelFailure(const_cast<MachineFunction &>(*MF), *TPC, *MORE,
+ getName(), "Unsupported code model for lowering", MI);
+ return false;
+ }
+ case CodeModel::Small: {
+ // Must lie within a single 2 GiB address range and must lie between
+ // absolute addresses -2 GiB and +2 GiB. This generates the pattern (addi
+ // (lui %hi(sym)) %lo(sym)).
+ Register AddrHiDest = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ MachineInstr *AddrHi = MIB.buildInstr(RISCV::LUI, {AddrHiDest}, {})
+ .addDisp(DispMO, 0, RISCVII::MO_HI);
+
+ if (!constrainSelectedInstRegOperands(*AddrHi, TII, TRI, RBI))
+ return false;
+
+ auto Result = MIB.buildInstr(RISCV::ADDI, {DefReg}, {AddrHiDest})
+ .addDisp(DispMO, 0, RISCVII::MO_LO);
+
+ if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
return true;
}
+ case CodeModel::Medium:
+ // Emit LGA/LLA instead of the sequence it expands to because the pcrel_lo
+ // relocation needs to reference a label that points to the auipc
+ // instruction itself, not the global. This cannot be done inside the
+ // instruction selector.
+ if (IsExternWeak) {
+ // An extern weak symbol may be undefined, i.e. have value 0, which may
+ // not be within 2GiB of PC, so use GOT-indirect addressing to access the
+ // symbol. This generates the pattern (PseudoLGA sym), which expands to
+ // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ DefTy, Align(DefTy.getSizeInBits() / 8));
+
+ auto Result = MIB.buildInstr(RISCV::PseudoLGA, {DefReg}, {})
+ .addDisp(DispMO, 0)
+ .addMemOperand(MemOp);
+
+ if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // Generate a sequence for accessing addresses within any 2GiB range
+ // within the address space. This generates the pattern (PseudoLLA sym),
+ // which expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
+ MI.setDesc(TII.get(RISCV::PseudoLLA));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+
+ return false;
+}
+
+bool RISCVInstructionSelector::selectSExtInreg(MachineInstr &MI,
+ MachineIRBuilder &MIB) const {
+ if (!STI.isRV64())
+ return false;
+
+ const MachineOperand &Size = MI.getOperand(2);
+ // Only Size == 32 (i.e. shift by 32 bits) is acceptable at this point.
+ if (!Size.isImm() || Size.getImm() != 32)
+ return false;
+
+ const MachineOperand &Src = MI.getOperand(1);
+ const MachineOperand &Dst = MI.getOperand(0);
+ // addiw rd, rs, 0 (i.e. sext.w rd, rs)
+ MachineInstr *NewMI =
+ MIB.buildInstr(RISCV::ADDIW, {Dst.getReg()}, {Src.getReg()}).addImm(0U);
+
+ if (!constrainSelectedInstRegOperands(*NewMI, TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool RISCVInstructionSelector::selectSelect(MachineInstr &MI,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ auto &SelectMI = cast<GSelect>(MI);
+
+ Register LHS, RHS;
+ RISCVCC::CondCode CC;
+ getOperandsForBranch(SelectMI.getCondReg(), MRI, CC, LHS, RHS);
+
+ Register DstReg = SelectMI.getReg(0);
+
+ unsigned Opc = RISCV::Select_GPR_Using_CC_GPR;
+ if (RBI.getRegBank(DstReg, MRI, TRI)->getID() == RISCV::FPRBRegBankID) {
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
+ Opc = Size == 32 ? RISCV::Select_FPR32_Using_CC_GPR
+ : RISCV::Select_FPR64_Using_CC_GPR;
+ }
+
+ MachineInstr *Result = MIB.buildInstr(Opc)
+ .addDef(DstReg)
+ .addReg(LHS)
+ .addReg(RHS)
+ .addImm(CC)
+ .addReg(SelectMI.getTrueReg())
+ .addReg(SelectMI.getFalseReg());
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Result, TII, TRI, RBI);
+}
+
+// Convert an FCMP predicate to one of the supported F or D instructions.
+static unsigned getFCmpOpcode(CmpInst::Predicate Pred, unsigned Size) {
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unsupported predicate");
+ case CmpInst::FCMP_OLT:
+ return Size == 32 ? RISCV::FLT_S : RISCV::FLT_D;
+ case CmpInst::FCMP_OLE:
+ return Size == 32 ? RISCV::FLE_S : RISCV::FLE_D;
+ case CmpInst::FCMP_OEQ:
+ return Size == 32 ? RISCV::FEQ_S : RISCV::FEQ_D;
+ }
+}
+
+// Try legalizing an FCMP by swapping or inverting the predicate to one that
+// is supported.
+static bool legalizeFCmpPredicate(Register &LHS, Register &RHS,
+ CmpInst::Predicate &Pred, bool &NeedInvert) {
+ auto isLegalFCmpPredicate = [](CmpInst::Predicate Pred) {
+ return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE ||
+ Pred == CmpInst::FCMP_OEQ;
+ };
+
+ assert(!isLegalFCmpPredicate(Pred) && "Predicate already legal?");
- if (selectImpl(I, *CoverageInfo))
+ CmpInst::Predicate InvPred = CmpInst::getSwappedPredicate(Pred);
+ if (isLegalFCmpPredicate(InvPred)) {
+ Pred = InvPred;
+ std::swap(LHS, RHS);
return true;
+ }
+
+ InvPred = CmpInst::getInversePredicate(Pred);
+ NeedInvert = true;
+ if (isLegalFCmpPredicate(InvPred)) {
+ Pred = InvPred;
+ return true;
+ }
+ InvPred = CmpInst::getSwappedPredicate(InvPred);
+ if (isLegalFCmpPredicate(InvPred)) {
+ Pred = InvPred;
+ std::swap(LHS, RHS);
+ return true;
+ }
return false;
}
+// Emit a sequence of instructions to compare LHS and RHS using Pred. Return
+// the result in DstReg.
+// FIXME: Maybe we should expand this earlier.
+bool RISCVInstructionSelector::selectFPCompare(MachineInstr &MI,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ auto &CmpMI = cast<GFCmp>(MI);
+ CmpInst::Predicate Pred = CmpMI.getCond();
+
+ Register DstReg = CmpMI.getReg(0);
+ Register LHS = CmpMI.getLHSReg();
+ Register RHS = CmpMI.getRHSReg();
+
+ unsigned Size = MRI.getType(LHS).getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unexpected size");
+
+ Register TmpReg = DstReg;
+
+ bool NeedInvert = false;
+ // First try swapping operands or inverting.
+ if (legalizeFCmpPredicate(LHS, RHS, Pred, NeedInvert)) {
+ if (NeedInvert)
+ TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ auto Cmp = MIB.buildInstr(getFCmpOpcode(Pred, Size), {TmpReg}, {LHS, RHS});
+ if (!Cmp.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else if (Pred == CmpInst::FCMP_ONE || Pred == CmpInst::FCMP_UEQ) {
+ // fcmp one LHS, RHS => (OR (FLT LHS, RHS), (FLT RHS, LHS))
+ NeedInvert = Pred == CmpInst::FCMP_UEQ;
+ auto Cmp1 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OLT, Size),
+ {&RISCV::GPRRegClass}, {LHS, RHS});
+ if (!Cmp1.constrainAllUses(TII, TRI, RBI))
+ return false;
+ auto Cmp2 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OLT, Size),
+ {&RISCV::GPRRegClass}, {RHS, LHS});
+ if (!Cmp2.constrainAllUses(TII, TRI, RBI))
+ return false;
+ if (NeedInvert)
+ TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ auto Or =
+ MIB.buildInstr(RISCV::OR, {TmpReg}, {Cmp1.getReg(0), Cmp2.getReg(0)});
+ if (!Or.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
+ // fcmp ord LHS, RHS => (AND (FEQ LHS, LHS), (FEQ RHS, RHS))
+ // FIXME: If LHS and RHS are the same we can use a single FEQ.
+ NeedInvert = Pred == CmpInst::FCMP_UNO;
+ auto Cmp1 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OEQ, Size),
+ {&RISCV::GPRRegClass}, {LHS, LHS});
+ if (!Cmp1.constrainAllUses(TII, TRI, RBI))
+ return false;
+ auto Cmp2 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OEQ, Size),
+ {&RISCV::GPRRegClass}, {RHS, RHS});
+ if (!Cmp2.constrainAllUses(TII, TRI, RBI))
+ return false;
+ if (NeedInvert)
+ TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ auto And =
+ MIB.buildInstr(RISCV::AND, {TmpReg}, {Cmp1.getReg(0), Cmp2.getReg(0)});
+ if (!And.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else
+ llvm_unreachable("Unhandled predicate");
+
+ // Emit an XORI to invert the result if needed.
+ if (NeedInvert) {
+ auto Xor = MIB.buildInstr(RISCV::XORI, {DstReg}, {TmpReg}).addImm(1);
+ if (!Xor.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
+ MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
+ "Unexpected opcode");
+ // Find the intrinsic ID.
+ unsigned IntrinID = cast<GIntrinsic>(MI).getIntrinsicID();
+
+ // Select the instruction.
+ switch (IntrinID) {
+ default:
+ return false;
+ case Intrinsic::trap:
+ MIB.buildInstr(RISCV::UNIMP, {}, {});
+ break;
+ case Intrinsic::debugtrap:
+ MIB.buildInstr(RISCV::EBREAK, {}, {});
+ break;
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+void RISCVInstructionSelector::emitFence(AtomicOrdering FenceOrdering,
+ SyncScope::ID FenceSSID,
+ MachineIRBuilder &MIB) const {
+ if (STI.hasStdExtZtso()) {
+ // The only fence that needs an instruction is a sequentially-consistent
+ // cross-thread fence.
+ if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
+ FenceSSID == SyncScope::System) {
+ // fence rw, rw
+ MIB.buildInstr(RISCV::FENCE, {}, {})
+ .addImm(RISCVFenceField::R | RISCVFenceField::W)
+ .addImm(RISCVFenceField::R | RISCVFenceField::W);
+ return;
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ MIB.buildInstr(TargetOpcode::MEMBARRIER, {}, {});
+ return;
+ }
+
+ // singlethread fences only synchronize with signal handlers on the same
+ // thread and thus only need to preserve instruction order, not actually
+ // enforce memory ordering.
+ if (FenceSSID == SyncScope::SingleThread) {
+ MIB.buildInstr(TargetOpcode::MEMBARRIER, {}, {});
+ return;
+ }
+
+ // Refer to Table A.6 in the version 2.3 draft of the RISC-V Instruction Set
+ // Manual: Volume I.
+ unsigned Pred, Succ;
+ switch (FenceOrdering) {
+ default:
+ llvm_unreachable("Unexpected ordering");
+ case AtomicOrdering::AcquireRelease:
+ // fence acq_rel -> fence.tso
+ MIB.buildInstr(RISCV::FENCE_TSO, {}, {});
+ return;
+ case AtomicOrdering::Acquire:
+ // fence acquire -> fence r, rw
+ Pred = RISCVFenceField::R;
+ Succ = RISCVFenceField::R | RISCVFenceField::W;
+ break;
+ case AtomicOrdering::Release:
+ // fence release -> fence rw, w
+ Pred = RISCVFenceField::R | RISCVFenceField::W;
+ Succ = RISCVFenceField::W;
+ break;
+ case AtomicOrdering::SequentiallyConsistent:
+ // fence seq_cst -> fence rw, rw
+ Pred = RISCVFenceField::R | RISCVFenceField::W;
+ Succ = RISCVFenceField::R | RISCVFenceField::W;
+ break;
+ }
+ MIB.buildInstr(RISCV::FENCE, {}, {}).addImm(Pred).addImm(Succ);
+}
+
namespace llvm {
InstructionSelector *
createRISCVInstructionSelector(const RISCVTargetMachine &TM,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 3f829cc2e677..8f03a7ac41d3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -11,23 +11,452 @@
//===----------------------------------------------------------------------===//
#include "RISCVLegalizerInfo.h"
+#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
using namespace llvm;
+using namespace LegalityPredicates;
+using namespace LegalizeMutations;
-RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) {
- const unsigned XLen = ST.getXLen();
- const LLT XLenLLT = LLT::scalar(XLen);
+// Is this type supported by scalar FP arithmetic operations given the current
+// subtarget.
+static LegalityPredicate typeIsScalarFPArith(unsigned TypeIdx,
+ const RISCVSubtarget &ST) {
+ return [=, &ST](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isScalar() &&
+ ((ST.hasStdExtF() && Query.Types[TypeIdx].getSizeInBits() == 32) ||
+ (ST.hasStdExtD() && Query.Types[TypeIdx].getSizeInBits() == 64));
+ };
+}
+
+RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
+ : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
+ const LLT sDoubleXLen = LLT::scalar(2 * XLen);
+ const LLT p0 = LLT::pointer(0, XLen);
+ const LLT s1 = LLT::scalar(1);
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
using namespace TargetOpcode;
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
- .legalFor({XLenLLT})
- .clampScalar(0, XLenLLT, XLenLLT);
+ .legalFor({s32, sXLen})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, sXLen);
+
+ getActionDefinitionsBuilder(
+ {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
+
+ getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
+
+ auto &ShiftActions = getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL});
+ if (ST.is64Bit())
+ ShiftActions.customFor({{s32, s32}});
+ ShiftActions.legalFor({{s32, s32}, {s32, sXLen}, {sXLen, sXLen}})
+ .widenScalarToNextPow2(0)
+ .clampScalar(1, s32, sXLen)
+ .clampScalar(0, s32, sXLen)
+ .minScalarSameAs(1, 0);
+
+ if (ST.is64Bit()) {
+ getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
+ .legalFor({{sXLen, s32}})
+ .maxScalar(0, sXLen);
+
+ getActionDefinitionsBuilder(G_SEXT_INREG)
+ .customFor({sXLen})
+ .maxScalar(0, sXLen)
+ .lower();
+ } else {
+ getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}).maxScalar(0, sXLen);
+
+ getActionDefinitionsBuilder(G_SEXT_INREG).maxScalar(0, sXLen).lower();
+ }
+
+ // Merge/Unmerge
+ for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
+ unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
+ unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
+ auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
+ if (XLen == 32 && ST.hasStdExtD()) {
+ LLT IdxZeroTy = G_MERGE_VALUES ? s64 : s32;
+ LLT IdxOneTy = G_MERGE_VALUES ? s32 : s64;
+ MergeUnmergeActions.legalFor({IdxZeroTy, IdxOneTy});
+ }
+ MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
+ .widenScalarToNextPow2(BigTyIdx, XLen)
+ .clampScalar(LitTyIdx, sXLen, sXLen)
+ .clampScalar(BigTyIdx, sXLen, sXLen);
+ }
+
+ getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
+
+ auto &RotateActions = getActionDefinitionsBuilder({G_ROTL, G_ROTR});
+ if (ST.hasStdExtZbb()) {
+ RotateActions.legalFor({{s32, sXLen}, {sXLen, sXLen}});
+ // Widen s32 rotate amount to s64 so SDAG patterns will match.
+ if (ST.is64Bit())
+ RotateActions.widenScalarIf(all(typeIs(0, s32), typeIs(1, s32)),
+ changeTo(1, sXLen));
+ }
+ RotateActions.lower();
+
+ getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();
+
+ auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
+ if (ST.hasStdExtZbb())
+ BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
+ else
+ BSWAPActions.maxScalar(0, sXLen).lower();
+
+ auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
+ auto &CountZerosUndefActions =
+ getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
+ if (ST.hasStdExtZbb()) {
+ CountZerosActions.legalFor({{s32, s32}, {sXLen, sXLen}})
+ .clampScalar(0, s32, sXLen)
+ .widenScalarToNextPow2(0)
+ .scalarSameSizeAs(1, 0);
+ } else {
+ CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
+ CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
+ }
+ CountZerosUndefActions.lower();
+
+ auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
+ if (ST.hasStdExtZbb()) {
+ CTPOPActions.legalFor({{s32, s32}, {sXLen, sXLen}})
+ .clampScalar(0, s32, sXLen)
+ .widenScalarToNextPow2(0)
+ .scalarSameSizeAs(1, 0);
+ } else {
+ CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
+ }
+
+ getActionDefinitionsBuilder({G_CONSTANT, G_IMPLICIT_DEF})
+ .legalFor({s32, sXLen, p0})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, sXLen);
+
+ getActionDefinitionsBuilder(G_ICMP)
+ .legalFor({{sXLen, sXLen}, {sXLen, p0}})
+ .widenScalarToNextPow2(1)
+ .clampScalar(1, sXLen, sXLen)
+ .clampScalar(0, sXLen, sXLen);
+
+ auto &SelectActions = getActionDefinitionsBuilder(G_SELECT).legalFor(
+ {{s32, sXLen}, {p0, sXLen}});
+ if (XLen == 64 || ST.hasStdExtD())
+ SelectActions.legalFor({{s64, sXLen}});
+ SelectActions.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
+ .clampScalar(1, sXLen, sXLen);
+
+ auto &LoadStoreActions =
+ getActionDefinitionsBuilder({G_LOAD, G_STORE})
+ .legalForTypesWithMemDesc({{s32, p0, s8, 8},
+ {s32, p0, s16, 16},
+ {s32, p0, s32, 32},
+ {p0, p0, sXLen, XLen}});
+ auto &ExtLoadActions =
+ getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
+ .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 16}});
+ if (XLen == 64) {
+ LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s8, 8},
+ {s64, p0, s16, 16},
+ {s64, p0, s32, 32},
+ {s64, p0, s64, 64}});
+ ExtLoadActions.legalForTypesWithMemDesc(
+ {{s64, p0, s8, 8}, {s64, p0, s16, 16}, {s64, p0, s32, 32}});
+ } else if (ST.hasStdExtD()) {
+ LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}});
+ }
+ LoadStoreActions.clampScalar(0, s32, sXLen).lower();
+ ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen).lower();
+
+ getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});
+
+ getActionDefinitionsBuilder(G_PTRTOINT)
+ .legalFor({{sXLen, p0}})
+ .clampScalar(0, sXLen, sXLen);
+
+ getActionDefinitionsBuilder(G_INTTOPTR)
+ .legalFor({{p0, sXLen}})
+ .clampScalar(1, sXLen, sXLen);
+
+ getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);
+
+ getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, sXLen}});
+
+ getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
+
+ getActionDefinitionsBuilder(G_PHI)
+ .legalFor({p0, sXLen})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, sXLen, sXLen);
+
+ getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
+ .legalFor({p0});
+
+ if (ST.hasStdExtM() || ST.hasStdExtZmmul()) {
+ getActionDefinitionsBuilder(G_MUL)
+ .legalFor({s32, sXLen})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, sXLen);
+
+ // clang-format off
+ getActionDefinitionsBuilder({G_SMULH, G_UMULH})
+ .legalFor({sXLen})
+ .lower();
+ // clang-format on
+
+ getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
+ } else {
+ getActionDefinitionsBuilder(G_MUL)
+ .libcallFor({sXLen, sDoubleXLen})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, sXLen, sDoubleXLen);
+
+ getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});
+
+ getActionDefinitionsBuilder({G_SMULO, G_UMULO})
+ .minScalar(0, sXLen)
+ // Widen sXLen to sDoubleXLen so we can use a single libcall to get
+ // the low bits for the mul result and high bits to do the overflow
+ // check.
+ .widenScalarIf(typeIs(0, sXLen),
+ LegalizeMutations::changeTo(0, sDoubleXLen))
+ .lower();
+ }
+
+ if (ST.hasStdExtM()) {
+ getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
+ .legalFor({s32, sXLen})
+ .libcallFor({sDoubleXLen})
+ .clampScalar(0, s32, sDoubleXLen)
+ .widenScalarToNextPow2(0);
+ } else {
+ getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
+ .libcallFor({sXLen, sDoubleXLen})
+ .clampScalar(0, sXLen, sDoubleXLen)
+ .widenScalarToNextPow2(0);
+ }
+
+ auto &AbsActions = getActionDefinitionsBuilder(G_ABS);
+ if (ST.hasStdExtZbb())
+ AbsActions.customFor({s32, sXLen}).minScalar(0, sXLen);
+ AbsActions.lower();
+
+ auto &MinMaxActions =
+ getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN});
+ if (ST.hasStdExtZbb())
+ MinMaxActions.legalFor({sXLen}).minScalar(0, sXLen);
+ MinMaxActions.lower();
+
+ getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
+
+ getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
+
+ // FP Operations
+
+ getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
+ G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM})
+ .legalIf(typeIsScalarFPArith(0, ST));
+
+ getActionDefinitionsBuilder(G_FCOPYSIGN)
+ .legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST)));
+
+ getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
+ [=, &ST](const LegalityQuery &Query) -> bool {
+ return (ST.hasStdExtD() && typeIs(0, s32)(Query) &&
+ typeIs(1, s64)(Query));
+ });
+ getActionDefinitionsBuilder(G_FPEXT).legalIf(
+ [=, &ST](const LegalityQuery &Query) -> bool {
+ return (ST.hasStdExtD() && typeIs(0, s64)(Query) &&
+ typeIs(1, s32)(Query));
+ });
+
+ getActionDefinitionsBuilder(G_FCMP)
+ .legalIf(all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST)))
+ .clampScalar(0, sXLen, sXLen);
+
+ // TODO: Support vector version of G_IS_FPCLASS.
+ getActionDefinitionsBuilder(G_IS_FPCLASS)
+ .customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST)));
+
+ getActionDefinitionsBuilder(G_FCONSTANT)
+ .legalIf(typeIsScalarFPArith(0, ST))
+ .lowerFor({s32, s64});
+
+ getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
+ .legalIf(all(typeInSet(0, {s32, sXLen}), typeIsScalarFPArith(1, ST)))
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, sXLen);
+
+ getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
+ .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen})))
+ .widenScalarToNextPow2(1)
+ .clampScalar(1, s32, sXLen);
+
+ // FIXME: We can do custom inline expansion like SelectionDAG.
+ // FIXME: Legal with Zfa.
+ getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR})
+ .libcallFor({s32, s64});
+
+ getActionDefinitionsBuilder(G_VASTART).customFor({p0});
+
+ // va_list must be a pointer, but most sized types are pretty easy to handle
+ // as the destination.
+ getActionDefinitionsBuilder(G_VAARG)
+ // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
+ // outside the [s32, sXLen] range.
+ .clampScalar(0, s32, sXLen)
+ .lowerForCartesianProduct({s32, sXLen, p0}, {p0});
getLegacyLegalizerInfo().computeTables();
}
+
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
+ switch (IntrinsicID) {
+ default:
+ return false;
+ case Intrinsic::vacopy: {
+ // vacopy arguments must be legal because of the intrinsic signature.
+ // No need to check here.
+
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ MachineFunction &MF = *MI.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+
+ Register DstLst = MI.getOperand(1).getReg();
+ LLT PtrTy = MRI.getType(DstLst);
+
+ // Load the source va_list
+ Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
+ MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
+ auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);
+
+ // Store the result in the destination va_list
+ MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, Alignment);
+ MIRBuilder.buildStore(DstLst, Tmp, *StoreMMO);
+
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+}
+
+bool RISCVLegalizerInfo::legalizeShlAshrLshr(
+ MachineInstr &MI, MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
+ MI.getOpcode() == TargetOpcode::G_LSHR ||
+ MI.getOpcode() == TargetOpcode::G_SHL);
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
+ // imported patterns can select it later. Either way, it will be legal.
+ Register AmtReg = MI.getOperand(2).getReg();
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
+ if (!VRegAndVal)
+ return true;
+ // Check the shift amount is in range for an immediate form.
+ uint64_t Amount = VRegAndVal->Value.getZExtValue();
+ if (Amount > 31)
+ return true; // This will have to remain a register variant.
+ auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(ExtCst.getReg(0));
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
+ MachineIRBuilder &MIRBuilder) const {
+ // Stores the address of the VarArgsFrameIndex slot into the memory location
+ assert(MI.getOpcode() == TargetOpcode::G_VASTART);
+ MachineFunction *MF = MI.getParent()->getParent();
+ RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();
+ int FI = FuncInfo->getVarArgsFrameIndex();
+ LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
+ auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
+ assert(MI.hasOneMemOperand());
+ MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
+ *MI.memoperands()[0]);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool RISCVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ GISelChangeObserver &Observer = Helper.Observer;
+ switch (MI.getOpcode()) {
+ default:
+ // No idea what to do.
+ return false;
+ case TargetOpcode::G_ABS:
+ return Helper.lowerAbsToMaxNeg(MI);
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ return legalizeShlAshrLshr(MI, MIRBuilder, Observer);
+ case TargetOpcode::G_SEXT_INREG: {
+ // Source size of 32 is sext.w.
+ int64_t SizeInBits = MI.getOperand(2).getImm();
+ if (SizeInBits == 32)
+ return true;
+
+ return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
+ LegalizerHelper::Legalized;
+ }
+ case TargetOpcode::G_IS_FPCLASS: {
+ Register GISFPCLASS = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ const MachineOperand &ImmOp = MI.getOperand(2);
+ MachineIRBuilder MIB(MI);
+
+ // Turn LLVM IR's floating point classes to that in RISC-V,
+ // by simply rotating the 10-bit immediate right by two bits.
+ APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
+ auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
+ auto ConstZero = MIB.buildConstant(sXLen, 0);
+
+ auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
+ auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
+ MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_VASTART:
+ return legalizeVAStart(MI, MIRBuilder);
+ }
+
+ llvm_unreachable("expected switch to return");
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index 960410ead62c..48c36976501f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -17,12 +17,29 @@
namespace llvm {
+class GISelChangeObserver;
+class MachineIRBuilder;
class RISCVSubtarget;
/// This class provides the information for the target register banks.
class RISCVLegalizerInfo : public LegalizerInfo {
+ const RISCVSubtarget &STI;
+ const unsigned XLen;
+ const LLT sXLen;
+
public:
RISCVLegalizerInfo(const RISCVSubtarget &ST);
+
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
+
+ bool legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const override;
+
+private:
+ bool legalizeShlAshrLshr(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const;
+
+ bool legalizeVAStart(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
};
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..be77979512e0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp
@@ -0,0 +1,155 @@
+//=== RISCVO0PreLegalizerCombiner.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#define GET_GICOMBINER_DEPS
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
+#define DEBUG_TYPE "riscv-O0-prelegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+#define GET_GICOMBINER_TYPES
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
+class RISCVO0PreLegalizerCombinerImpl : public Combiner {
+protected:
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
+ const RISCVO0PreLegalizerCombinerImplRuleConfig &RuleConfig;
+ const RISCVSubtarget &STI;
+
+public:
+ RISCVO0PreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVO0PreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI);
+
+ static const char *getName() { return "RISCVO0PreLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const override;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
+
+#define GET_GICOMBINER_IMPL
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_IMPL
+
+RISCVO0PreLegalizerCombinerImpl::RISCVO0PreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVO0PreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB), RuleConfig(RuleConfig),
+ STI(STI),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
+
+// Pass boilerplate
+// ================
+
+class RISCVO0PreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVO0PreLegalizerCombiner();
+
+ StringRef getPassName() const override {
+ return "RISCVO0PreLegalizerCombiner";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ RISCVO0PreLegalizerCombinerImplRuleConfig RuleConfig;
+};
+} // end anonymous namespace
+
+void RISCVO0PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+RISCVO0PreLegalizerCombiner::RISCVO0PreLegalizerCombiner()
+ : MachineFunctionPass(ID) {
+ initializeRISCVO0PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+}
+
+bool RISCVO0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto &TPC = getAnalysis<TargetPassConfig>();
+
+ const Function &F = MF.getFunction();
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
+ F.hasOptSize(), F.hasMinSize());
+ RISCVO0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,
+ /*CSEInfo*/ nullptr, RuleConfig, ST);
+ return Impl.combineMachineInstrs();
+}
+
+char RISCVO0PreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(RISCVO0PreLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V machine instrs before legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_END(RISCVO0PreLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V machine instrs before legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createRISCVO0PreLegalizerCombiner() {
+ return new RISCVO0PreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
new file mode 100644
index 000000000000..9c28944abc76
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
@@ -0,0 +1,173 @@
+//=== RISCVPostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Post-legalization combines on generic MachineInstrs.
+///
+/// The combines here must preserve instruction legality.
+///
+/// Combines which don't rely on instruction legality should go in the
+/// RISCVPreLegalizerCombiner.
+///
+//===----------------------------------------------------------------------===//
+
+#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#define GET_GICOMBINER_DEPS
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
+#define DEBUG_TYPE "riscv-postlegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+
+#define GET_GICOMBINER_TYPES
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
+class RISCVPostLegalizerCombinerImpl : public Combiner {
+protected:
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
+ const RISCVPostLegalizerCombinerImplRuleConfig &RuleConfig;
+ const RISCVSubtarget &STI;
+
+public:
+ RISCVPostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVPostLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
+
+ static const char *getName() { return "RISCVPostLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const override;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
+
+#define GET_GICOMBINER_IMPL
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_IMPL
+
+RISCVPostLegalizerCombinerImpl::RISCVPostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVPostLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
+ RuleConfig(RuleConfig), STI(STI),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
+
+class RISCVPostLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVPostLegalizerCombiner();
+
+ StringRef getPassName() const override {
+ return "RISCVPostLegalizerCombiner";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ RISCVPostLegalizerCombinerImplRuleConfig RuleConfig;
+};
+} // end anonymous namespace
+
+void RISCVPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ AU.addPreserved<GISelCSEAnalysisWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner()
+ : MachineFunctionPass(ID) {
+ initializeRISCVPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+}
+
+bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized) &&
+ "Expected a legalized function?");
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
+
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ const auto *LI = ST.getLegalizerInfo();
+
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
+
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
+ F.hasMinSize());
+ RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo,
+ RuleConfig, ST, MDT, LI);
+ return Impl.combineMachineInstrs();
+}
+
+char RISCVPostLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(RISCVPostLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V MachineInstrs after legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(RISCVPostLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V MachineInstrs after legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createRISCVPostLegalizerCombiner() {
+ return new RISCVPostLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..9a35fffae058
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp
@@ -0,0 +1,169 @@
+//=== RISCVPreLegalizerCombiner.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#define GET_GICOMBINER_DEPS
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
+#define DEBUG_TYPE "riscv-prelegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+
+#define GET_GICOMBINER_TYPES
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
+class RISCVPreLegalizerCombinerImpl : public Combiner {
+protected:
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
+ const RISCVPreLegalizerCombinerImplRuleConfig &RuleConfig;
+ const RISCVSubtarget &STI;
+
+public:
+ RISCVPreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVPreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
+
+ static const char *getName() { return "RISCV00PreLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const override;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
+
+#define GET_GICOMBINER_IMPL
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_IMPL
+
+RISCVPreLegalizerCombinerImpl::RISCVPreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVPreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
+ RuleConfig(RuleConfig), STI(STI),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
+
+// Pass boilerplate
+// ================
+
+class RISCVPreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVPreLegalizerCombiner();
+
+ StringRef getPassName() const override { return "RISCVPreLegalizerCombiner"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ RISCVPreLegalizerCombinerImplRuleConfig RuleConfig;
+};
+} // end anonymous namespace
+
+void RISCVPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ AU.addPreserved<GISelCSEAnalysisWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+RISCVPreLegalizerCombiner::RISCVPreLegalizerCombiner()
+ : MachineFunctionPass(ID) {
+ initializeRISCVPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+}
+
+bool RISCVPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto &TPC = getAnalysis<TargetPassConfig>();
+
+ // Enable CSE.
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig());
+
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ const auto *LI = ST.getLegalizerInfo();
+
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
+ F.hasMinSize());
+ RISCVPreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, CSEInfo, RuleConfig,
+ ST, MDT, LI);
+ return Impl.combineMachineInstrs();
+}
+
+char RISCVPreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(RISCVPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V machine instrs before legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_END(RISCVPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V machine instrs before legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createRISCVPreLegalizerCombiner() {
+ return new RISCVPreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 9b601902ad20..cf0ff63a5e51 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -12,6 +12,7 @@
#include "RISCVRegisterBankInfo.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
@@ -20,7 +21,448 @@
#define GET_TARGET_REGBANK_IMPL
#include "RISCVGenRegisterBank.inc"
+namespace llvm {
+namespace RISCV {
+
+const RegisterBankInfo::PartialMapping PartMappings[] = {
+ {0, 32, GPRBRegBank},
+ {0, 64, GPRBRegBank},
+ {0, 32, FPRBRegBank},
+ {0, 64, FPRBRegBank},
+};
+
+enum PartialMappingIdx {
+ PMI_GPRB32 = 0,
+ PMI_GPRB64 = 1,
+ PMI_FPRB32 = 2,
+ PMI_FPRB64 = 3,
+};
+
+const RegisterBankInfo::ValueMapping ValueMappings[] = {
+ // Invalid value mapping.
+ {nullptr, 0},
+ // Maximum 3 GPR operands; 32 bit.
+ {&PartMappings[PMI_GPRB32], 1},
+ {&PartMappings[PMI_GPRB32], 1},
+ {&PartMappings[PMI_GPRB32], 1},
+ // Maximum 3 GPR operands; 64 bit.
+ {&PartMappings[PMI_GPRB64], 1},
+ {&PartMappings[PMI_GPRB64], 1},
+ {&PartMappings[PMI_GPRB64], 1},
+ // Maximum 3 FPR operands; 32 bit.
+ {&PartMappings[PMI_FPRB32], 1},
+ {&PartMappings[PMI_FPRB32], 1},
+ {&PartMappings[PMI_FPRB32], 1},
+ // Maximum 3 FPR operands; 64 bit.
+ {&PartMappings[PMI_FPRB64], 1},
+ {&PartMappings[PMI_FPRB64], 1},
+ {&PartMappings[PMI_FPRB64], 1},
+};
+
+enum ValueMappingIdx {
+ InvalidIdx = 0,
+ GPRB32Idx = 1,
+ GPRB64Idx = 4,
+ FPRB32Idx = 7,
+ FPRB64Idx = 10,
+};
+} // namespace RISCV
+} // namespace llvm
+
using namespace llvm;
RISCVRegisterBankInfo::RISCVRegisterBankInfo(unsigned HwMode)
: RISCVGenRegisterBankInfo(HwMode) {}
+
+const RegisterBank &
+RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT Ty) const {
+ switch (RC.getID()) {
+ default:
+ llvm_unreachable("Register class not supported");
+ case RISCV::GPRRegClassID:
+ case RISCV::GPRF16RegClassID:
+ case RISCV::GPRF32RegClassID:
+ case RISCV::GPRNoX0RegClassID:
+ case RISCV::GPRNoX0X2RegClassID:
+ case RISCV::GPRJALRRegClassID:
+ case RISCV::GPRTCRegClassID:
+ case RISCV::GPRC_and_GPRTCRegClassID:
+ case RISCV::GPRCRegClassID:
+ case RISCV::GPRC_and_SR07RegClassID:
+ case RISCV::SR07RegClassID:
+ case RISCV::SPRegClassID:
+ case RISCV::GPRX0RegClassID:
+ return getRegBank(RISCV::GPRBRegBankID);
+ case RISCV::FPR64RegClassID:
+ case RISCV::FPR16RegClassID:
+ case RISCV::FPR32RegClassID:
+ case RISCV::FPR64CRegClassID:
+ case RISCV::FPR32CRegClassID:
+ return getRegBank(RISCV::FPRBRegBankID);
+ case RISCV::VMRegClassID:
+ case RISCV::VRRegClassID:
+ case RISCV::VRNoV0RegClassID:
+ case RISCV::VRM2RegClassID:
+ case RISCV::VRM2NoV0RegClassID:
+ case RISCV::VRM4RegClassID:
+ case RISCV::VRM4NoV0RegClassID:
+ case RISCV::VMV0RegClassID:
+ case RISCV::VRM2_with_sub_vrm1_0_in_VMV0RegClassID:
+ case RISCV::VRM4_with_sub_vrm1_0_in_VMV0RegClassID:
+ case RISCV::VRM8RegClassID:
+ case RISCV::VRM8NoV0RegClassID:
+ case RISCV::VRM8_with_sub_vrm1_0_in_VMV0RegClassID:
+ return getRegBank(RISCV::VRBRegBankID);
+ }
+}
+
+static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
+ assert(Size == 32 || Size == 64);
+ unsigned Idx = Size == 64 ? RISCV::FPRB64Idx : RISCV::FPRB32Idx;
+ return &RISCV::ValueMappings[Idx];
+}
+
+/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
+/// having only floating-point operands.
+/// FIXME: this is copied from target AArch64. Needs some code refactor here to
+/// put this function in GlobalISel/Utils.cpp.
+static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FCOPYSIGN:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMINIMUM:
+ return true;
+ }
+ return false;
+}
+
+// TODO: Make this more like AArch64?
+bool RISCVRegisterBankInfo::hasFPConstraints(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ if (isPreISelGenericFloatingPointOpcode(MI.getOpcode()))
+ return true;
+
+ // If we have a copy instruction, we could be feeding floating point
+ // instructions.
+ if (MI.getOpcode() != TargetOpcode::COPY)
+ return false;
+
+ return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) == &RISCV::FPRBRegBank;
+}
+
+bool RISCVRegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FCMP:
+ return true;
+ default:
+ break;
+ }
+
+ return hasFPConstraints(MI, MRI, TRI);
+}
+
+bool RISCVRegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP:
+ return true;
+ default:
+ break;
+ }
+
+ return hasFPConstraints(MI, MRI, TRI);
+}
+
+bool RISCVRegisterBankInfo::anyUseOnlyUseFP(
+ Register Def, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ return any_of(
+ MRI.use_nodbg_instructions(Def),
+ [&](const MachineInstr &UseMI) { return onlyUsesFP(UseMI, MRI, TRI); });
+}
+
+const RegisterBankInfo::InstructionMapping &
+RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ const unsigned Opc = MI.getOpcode();
+
+ // Try the default logic for non-generic instructions that are either copies
+ // or already have some operands assigned to banks.
+ if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) {
+ const InstructionMapping &Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ }
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+
+ unsigned GPRSize = getMaximumSize(RISCV::GPRBRegBankID);
+ assert((GPRSize == 32 || GPRSize == 64) && "Unexpected GPR size");
+
+ unsigned NumOperands = MI.getNumOperands();
+ const ValueMapping *GPRValueMapping =
+ &RISCV::ValueMappings[GPRSize == 64 ? RISCV::GPRB64Idx
+ : RISCV::GPRB32Idx];
+
+ switch (Opc) {
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMULH:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_UMULH:
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_PTR_ADD:
+ case TargetOpcode::G_PTRTOINT:
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD:
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping,
+ NumOperands);
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1,
+ getFPValueMapping(Ty.getSizeInBits()),
+ NumOperands);
+ }
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ Register Dst = MI.getOperand(0).getReg();
+ auto Mapping = GPRValueMapping;
+ // FIXME: May need to do a better job determining when to use FPRB.
+ // For example, the look through COPY case:
+ // %0:_(s32) = G_IMPLICIT_DEF
+ // %1:_(s32) = COPY %0
+ // $f10_d = COPY %1(s32)
+ if (anyUseOnlyUseFP(Dst, MRI, TRI))
+ Mapping = getFPValueMapping(MRI.getType(Dst).getSizeInBits());
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1, Mapping,
+ NumOperands);
+ }
+ }
+
+ SmallVector<const ValueMapping *, 4> OpdsMapping(NumOperands);
+
+ switch (Opc) {
+ case TargetOpcode::G_LOAD: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[1] = GPRValueMapping;
+ // Use FPR64 for s64 loads on rv32.
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ break;
+ }
+
+ // Check if that load feeds fp instructions.
+ // In that case, we want the default mapping to be on FPR
+ // instead of blind map every scalar to GPR.
+ if (anyUseOnlyUseFP(MI.getOperand(0).getReg(), MRI, TRI))
+ // If we have at least one direct use in a FP instruction,
+ // assume this was a floating point load in the IR. If it was
+ // not, we would have had a bitcast before reaching that
+ // instruction.
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+
+ break;
+ }
+ case TargetOpcode::G_STORE: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[1] = GPRValueMapping;
+ // Use FPR64 for s64 stores on rv32.
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ break;
+ }
+
+ MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(0).getReg());
+ if (onlyDefinesFP(*DefMI, MRI, TRI))
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ break;
+ }
+ case TargetOpcode::G_SELECT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ // Try to minimize the number of copies. If we have more floating point
+ // constrained values than not, then we'll put everything on FPR. Otherwise,
+ // everything has to be on GPR.
+ unsigned NumFP = 0;
+
+ // Use FPR64 for s64 select on rv32.
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ NumFP = 3;
+ } else {
+ // Check if the uses of the result always produce floating point values.
+ //
+ // For example:
+ //
+ // %z = G_SELECT %cond %x %y
+ // fpr = G_FOO %z ...
+ if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ return onlyUsesFP(UseMI, MRI, TRI);
+ }))
+ ++NumFP;
+
+ // Check if the defs of the source values always produce floating point
+ // values.
+ //
+ // For example:
+ //
+ // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
+ // %z = G_SELECT %cond %x %y
+ //
+ // Also check whether or not the sources have already been decided to be
+ // FPR. Keep track of this.
+ //
+ // This doesn't check the condition, since the condition is always an
+ // integer.
+ for (unsigned Idx = 2; Idx < 4; ++Idx) {
+ Register VReg = MI.getOperand(Idx).getReg();
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ if (getRegBank(VReg, MRI, TRI) == &RISCV::FPRBRegBank ||
+ onlyDefinesFP(*DefMI, MRI, TRI))
+ ++NumFP;
+ }
+ }
+
+ // Condition operand is always GPR.
+ OpdsMapping[1] = GPRValueMapping;
+
+ const ValueMapping *Mapping = GPRValueMapping;
+ if (NumFP >= 2)
+ Mapping = getFPValueMapping(Ty.getSizeInBits());
+
+ OpdsMapping[0] = OpdsMapping[2] = OpdsMapping[3] = Mapping;
+ break;
+ }
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case RISCV::G_FCLASS: {
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[1] = getFPValueMapping(Ty.getSizeInBits());
+ break;
+ }
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ OpdsMapping[1] = GPRValueMapping;
+ break;
+ }
+ case TargetOpcode::G_FCMP: {
+ LLT Ty = MRI.getType(MI.getOperand(2).getReg());
+
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP");
+
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[2] = OpdsMapping[3] = getFPValueMapping(Size);
+ break;
+ }
+ case TargetOpcode::G_MERGE_VALUES: {
+ // Use FPR64 for s64 merge on rv32.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ OpdsMapping[1] = GPRValueMapping;
+ OpdsMapping[2] = GPRValueMapping;
+ }
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ // Use FPR64 for s64 unmerge on rv32.
+ LLT Ty = MRI.getType(MI.getOperand(2).getReg());
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[1] = GPRValueMapping;
+ OpdsMapping[2] = getFPValueMapping(Ty.getSizeInBits());
+ }
+ break;
+ }
+ default:
+ // By default map all scalars to GPR.
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ auto &MO = MI.getOperand(Idx);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ LLT Ty = MRI.getType(MO.getReg());
+ if (!Ty.isValid())
+ continue;
+
+ if (isPreISelGenericFloatingPointOpcode(Opc))
+ OpdsMapping[Idx] = getFPValueMapping(Ty.getSizeInBits());
+ else
+ OpdsMapping[Idx] = GPRValueMapping;
+ }
+ break;
+ }
+
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1,
+ getOperandsMapping(OpdsMapping), NumOperands);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
index ee6d4db27880..abd0837395f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
@@ -32,6 +32,29 @@ protected:
class RISCVRegisterBankInfo final : public RISCVGenRegisterBankInfo {
public:
RISCVRegisterBankInfo(unsigned HwMode);
+
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT Ty) const override;
+
+ const InstructionMapping &
+ getInstrMapping(const MachineInstr &MI) const override;
+
+private:
+ /// \returns true if \p MI only uses and defines FPRs.
+ bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// \returns true if \p MI only uses FPRs.
+ bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// \returns true if any use of \p Def only user FPRs.
+ bool anyUseOnlyUseFP(Register Def, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// \returns true if \p MI only defines FPRs.
+ bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
};
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td
index b49f8259e382..b1ef815fe373 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td
@@ -10,4 +10,11 @@
//===----------------------------------------------------------------------===//
/// General Purpose Registers: X.
-def GPRRegBank : RegisterBank<"GPRB", [GPR]>;
+def GPRBRegBank : RegisterBank<"GPRB", [GPR]>;
+
+/// Floating Point Registers: F.
+def FPRBRegBank : RegisterBank<"FPRB", [FPR64]>;
+
+/// Vector Registers : V.
+def VRBRegBank : RegisterBank<"VRB", [VRM8]>;
+
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 8f8684e30b3a..aba2511959af 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -14,7 +14,6 @@
#include "RISCVCustomBehaviour.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "RISCV.h"
-#include "RISCVInstrInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
@@ -64,9 +63,9 @@ uint8_t RISCVLMULInstrument::getLMUL() const {
.Case("M2", 0b001)
.Case("M4", 0b010)
.Case("M8", 0b011)
- .Case("MF2", 0b101)
+ .Case("MF2", 0b111)
.Case("MF4", 0b110)
- .Case("MF8", 0b111);
+ .Case("MF8", 0b101);
}
const llvm::StringRef RISCVSEWInstrument::DESC_NAME = "RISCV-SEW";
@@ -186,13 +185,46 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) {
return SmallVector<UniqueInstrument>();
}
+static std::pair<uint8_t, uint8_t>
+getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL,
+ uint8_t SEW) {
+ uint8_t EEW;
+ switch (Opcode) {
+ case RISCV::VLM_V:
+ case RISCV::VSM_V:
+ case RISCV::VLE8_V:
+ case RISCV::VSE8_V:
+ EEW = 8;
+ break;
+ case RISCV::VLE16_V:
+ case RISCV::VSE16_V:
+ EEW = 16;
+ break;
+ case RISCV::VLE32_V:
+ case RISCV::VSE32_V:
+ EEW = 32;
+ break;
+ case RISCV::VLE64_V:
+ case RISCV::VSE64_V:
+ EEW = 64;
+ break;
+ default:
+ llvm_unreachable("Opcode is not a vector unit stride load nor store");
+ }
+
+ auto EMUL = RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW);
+ if (!EEW)
+ llvm_unreachable("Invalid SEW or LMUL for new ratio");
+ return std::make_pair(EEW, *EMUL);
+}
+
unsigned RISCVInstrumentManager::getSchedClassID(
const MCInstrInfo &MCII, const MCInst &MCI,
const llvm::SmallVector<Instrument *> &IVec) const {
unsigned short Opcode = MCI.getOpcode();
unsigned SchedClassID = MCII.get(Opcode).getSchedClass();
- // Unpack all possible RISCV instruments from IVec.
+ // Unpack all possible RISC-V instruments from IVec.
RISCVLMULInstrument *LI = nullptr;
RISCVSEWInstrument *SI = nullptr;
for (auto &I : IVec) {
@@ -215,12 +247,23 @@ unsigned RISCVInstrumentManager::getSchedClassID(
// or (Opcode, LMUL, SEW) if SEW instrument is active, and depends on LMUL
// and SEW, or (Opcode, LMUL, 0) if does not depend on SEW.
uint8_t SEW = SI ? SI->getSEW() : 0;
- // Check if it depends on LMUL and SEW
- const RISCVVInversePseudosTable::PseudoInfo *RVV =
- RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW);
- // Check if it depends only on LMUL
- if (!RVV)
- RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0);
+
+ const RISCVVInversePseudosTable::PseudoInfo *RVV = nullptr;
+ if (Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V ||
+ Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V ||
+ Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V ||
+ Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V ||
+ Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V) {
+ RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL);
+ auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, VLMUL, SEW);
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, EMUL, EEW);
+ } else {
+ // Check if it depends on LMUL and SEW
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW);
+ // Check if it depends only on LMUL
+ if (!RVV)
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0);
+ }
// Not a RVV instr
if (!RVV) {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 1b890fbe041a..716fb67c5824 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVAsmBackend.cpp - RISCV Assembler Backend ---------------------===//
+//===-- RISCVAsmBackend.cpp - RISC-V Assembler Backend --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -19,6 +19,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
@@ -27,6 +28,15 @@
using namespace llvm;
+static cl::opt<bool> RelaxBranches("riscv-asm-relax-branches", cl::init(true),
+ cl::Hidden);
+// Temporary workaround for old linkers that do not support ULEB128 relocations,
+// which are abused by DWARF v5 DW_LLE_offset_pair/DW_RLE_offset_pair
+// implemented in Clang/LLVM.
+static cl::opt<bool> ULEB128Reloc(
+ "riscv-uleb128-reloc", cl::init(true), cl::Hidden,
+ cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate"));
+
std::optional<MCFixupKind> RISCVAsmBackend::getFixupKind(StringRef Name) const {
if (STI.getTargetTriple().isOSBinFormatELF()) {
unsigned Type;
@@ -76,24 +86,6 @@ RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_riscv_call_plt", 0, 64, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_riscv_relax", 0, 0, 0},
{"fixup_riscv_align", 0, 0, 0},
-
- {"fixup_riscv_set_8", 0, 8, 0},
- {"fixup_riscv_add_8", 0, 8, 0},
- {"fixup_riscv_sub_8", 0, 8, 0},
-
- {"fixup_riscv_set_16", 0, 16, 0},
- {"fixup_riscv_add_16", 0, 16, 0},
- {"fixup_riscv_sub_16", 0, 16, 0},
-
- {"fixup_riscv_set_32", 0, 32, 0},
- {"fixup_riscv_add_32", 0, 32, 0},
- {"fixup_riscv_sub_32", 0, 32, 0},
-
- {"fixup_riscv_add_64", 0, 64, 0},
- {"fixup_riscv_sub_64", 0, 64, 0},
-
- {"fixup_riscv_set_6b", 2, 6, 0},
- {"fixup_riscv_sub_6b", 2, 6, 0},
};
static_assert((std::size(Infos)) == RISCV::NumTargetFixupKinds,
"Not all fixup kinds added to Infos array");
@@ -116,7 +108,8 @@ RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
// necessary for correctness as offsets may change during relaxation.
bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
- const MCValue &Target) {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) {
if (Fixup.getKind() >= FirstLiteralRelocationKind)
return true;
switch (Fixup.getTargetKind()) {
@@ -126,6 +119,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
+ case FK_Data_leb128:
if (Target.isAbsolute())
return false;
break;
@@ -135,7 +129,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
return true;
}
- return STI.hasFeature(RISCV::FeatureRelax) || ForceRelocs;
+ return STI->hasFeature(RISCV::FeatureRelax) || ForceRelocs;
}
bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
@@ -144,15 +138,12 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout,
const bool WasForced) const {
+ if (!RelaxBranches)
+ return false;
+
int64_t Offset = int64_t(Value);
unsigned Kind = Fixup.getTargetKind();
- // We only do conditional branch relaxation when the symbol is resolved.
- // For conditional branch, the immediate must be in the range
- // [-4096, 4094].
- if (Kind == RISCV::fixup_riscv_branch)
- return Resolved && !isInt<13>(Offset);
-
// Return true if the symbol is actually unresolved.
// Resolved could be always false when shouldForceRelocation return true.
// We use !WasForced to indicate that the symbol is unresolved and not forced
@@ -171,6 +162,10 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
// For compressed jump instructions the immediate must be
// in the range [-2048, 2046].
return Offset > 2046 || Offset < -2048;
+ case RISCV::fixup_riscv_branch:
+ // For conditional branch instructions the immediate must be
+ // in the range [-4096, 4095].
+ return !isInt<13>(Offset);
}
}
@@ -251,7 +246,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF,
OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
Offset = OS.tell();
Fixup = RISCV::getRelocPairForSize(2);
- support::endian::write<uint16_t>(OS, 0, support::little);
+ support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
}
const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
@@ -301,27 +296,31 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF,
auto AddFixups = [&Fixups, &AddrDelta](unsigned Offset,
std::pair<unsigned, unsigned> Fixup) {
const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
- Fixups.push_back(MCFixup::create(
- Offset, MBE.getLHS(), static_cast<MCFixupKind>(std::get<0>(Fixup))));
- Fixups.push_back(MCFixup::create(
- Offset, MBE.getRHS(), static_cast<MCFixupKind>(std::get<1>(Fixup))));
+ Fixups.push_back(
+ MCFixup::create(Offset, MBE.getLHS(),
+ static_cast<MCFixupKind>(FirstLiteralRelocationKind +
+ std::get<0>(Fixup))));
+ Fixups.push_back(
+ MCFixup::create(Offset, MBE.getRHS(),
+ static_cast<MCFixupKind>(FirstLiteralRelocationKind +
+ std::get<1>(Fixup))));
};
if (isUIntN(6, Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc);
- AddFixups(0, {RISCV::fixup_riscv_set_6b, RISCV::fixup_riscv_sub_6b});
+ AddFixups(0, {ELF::R_RISCV_SET6, ELF::R_RISCV_SUB6});
} else if (isUInt<8>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc1);
- support::endian::write<uint8_t>(OS, 0, support::little);
- AddFixups(1, {RISCV::fixup_riscv_set_8, RISCV::fixup_riscv_sub_8});
+ support::endian::write<uint8_t>(OS, 0, llvm::endianness::little);
+ AddFixups(1, {ELF::R_RISCV_SET8, ELF::R_RISCV_SUB8});
} else if (isUInt<16>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc2);
- support::endian::write<uint16_t>(OS, 0, support::little);
- AddFixups(1, {RISCV::fixup_riscv_set_16, RISCV::fixup_riscv_sub_16});
+ support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
+ AddFixups(1, {ELF::R_RISCV_SET16, ELF::R_RISCV_SUB16});
} else if (isUInt<32>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc4);
- support::endian::write<uint32_t>(OS, 0, support::little);
- AddFixups(1, {RISCV::fixup_riscv_set_32, RISCV::fixup_riscv_sub_32});
+ support::endian::write<uint32_t>(OS, 0, llvm::endianness::little);
+ AddFixups(1, {ELF::R_RISCV_SET32, ELF::R_RISCV_SUB32});
} else {
llvm_unreachable("unsupported CFA encoding");
}
@@ -330,6 +329,18 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF,
return true;
}
+bool RISCVAsmBackend::relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout,
+ int64_t &Value) const {
+ if (LF.isSigned())
+ return false;
+ const MCExpr &Expr = LF.getValue();
+ if (ULEB128Reloc) {
+ LF.getFixups().push_back(
+ MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc()));
+ }
+ return Expr.evaluateKnownAbsolute(Value, Layout);
+}
+
// Given a compressed control flow instruction this function returns
// the expanded instruction.
unsigned RISCVAsmBackend::getRelaxedOpcode(unsigned Op) const {
@@ -400,25 +411,12 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case RISCV::fixup_riscv_tls_got_hi20:
case RISCV::fixup_riscv_tls_gd_hi20:
llvm_unreachable("Relocation should be unconditionally forced\n");
- case RISCV::fixup_riscv_set_8:
- case RISCV::fixup_riscv_add_8:
- case RISCV::fixup_riscv_sub_8:
- case RISCV::fixup_riscv_set_16:
- case RISCV::fixup_riscv_add_16:
- case RISCV::fixup_riscv_sub_16:
- case RISCV::fixup_riscv_set_32:
- case RISCV::fixup_riscv_add_32:
- case RISCV::fixup_riscv_sub_32:
- case RISCV::fixup_riscv_add_64:
- case RISCV::fixup_riscv_sub_64:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
- case FK_Data_6b:
+ case FK_Data_leb128:
return Value;
- case RISCV::fixup_riscv_set_6b:
- return Value & 0x03;
case RISCV::fixup_riscv_lo12_i:
case RISCV::fixup_riscv_pcrel_lo12_i:
case RISCV::fixup_riscv_tprel_lo12_i:
@@ -483,6 +481,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return UpperImm | ((LowerImm << 20) << 32);
}
case RISCV::fixup_riscv_rvc_jump: {
+ if (!isInt<12>(Value))
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Need to produce offset[11|4|9:8|10|6|7|3:1|5] from the 11-bit Value.
unsigned Bit11 = (Value >> 11) & 0x1;
unsigned Bit4 = (Value >> 4) & 0x1;
@@ -497,6 +497,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return Value;
}
case RISCV::fixup_riscv_rvc_branch: {
+ if (!isInt<9>(Value))
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Need to produce offset[8|4:3], [reg 3 bit], offset[7:6|2:1|5]
unsigned Bit8 = (Value >> 8) & 0x1;
unsigned Bit7_6 = (Value >> 6) & 0x3;
@@ -513,8 +515,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
bool RISCVAsmBackend::evaluateTargetFixup(
const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup,
- const MCFragment *DF, const MCValue &Target, uint64_t &Value,
- bool &WasForced) {
+ const MCFragment *DF, const MCValue &Target, const MCSubtargetInfo *STI,
+ uint64_t &Value, bool &WasForced) {
const MCFixup *AUIPCFixup;
const MCFragment *AUIPCDF;
MCValue AUIPCTarget;
@@ -564,7 +566,7 @@ bool RISCVAsmBackend::evaluateTargetFixup(
Value = Layout.getSymbolOffset(SA) + AUIPCTarget.getConstant();
Value -= Layout.getFragmentOffset(AUIPCDF) + AUIPCFixup->getOffset();
- if (shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget)) {
+ if (shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget, STI)) {
WasForced = true;
return false;
}
@@ -596,6 +598,10 @@ bool RISCVAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout,
TA = ELF::R_RISCV_ADD64;
TB = ELF::R_RISCV_SUB64;
break;
+ case llvm::FK_Data_leb128:
+ TA = ELF::R_RISCV_SET_ULEB128;
+ TB = ELF::R_RISCV_SUB_ULEB128;
+ break;
default:
llvm_unreachable("unsupported fixup size");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index 0ea1f32e8296..2ad6534ac8bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -31,8 +31,8 @@ class RISCVAsmBackend : public MCAsmBackend {
public:
RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
const MCTargetOptions &Options)
- : MCAsmBackend(support::little, RISCV::fixup_riscv_relax), STI(STI),
- OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {
+ : MCAsmBackend(llvm::endianness::little, RISCV::fixup_riscv_relax),
+ STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {
RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
}
~RISCVAsmBackend() override = default;
@@ -50,8 +50,8 @@ public:
bool evaluateTargetFixup(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &WasForced) override;
+ const MCValue &Target, const MCSubtargetInfo *STI,
+ uint64_t &Value, bool &WasForced) override;
bool handleAddSubRelocations(const MCAsmLayout &Layout, const MCFragment &F,
const MCFixup &Fixup, const MCValue &Target,
@@ -66,7 +66,8 @@ public:
createObjectTargetWriter() const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
@@ -99,6 +100,8 @@ public:
bool &WasRelaxed) const override;
bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout,
bool &WasRelaxed) const override;
+ bool relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout,
+ int64_t &Value) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 0a42c6faee29..66a46a485f53 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -47,11 +47,11 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits,
errs()
<< "'" << ABIName
<< "' is not a recognized ABI for this target (ignoring target-abi)\n";
- } else if (ABIName.startswith("ilp32") && IsRV64) {
+ } else if (ABIName.starts_with("ilp32") && IsRV64) {
errs() << "32-bit ABIs are not supported for 64-bit targets (ignoring "
"target-abi)\n";
TargetABI = ABI_Unknown;
- } else if (ABIName.startswith("lp64") && !IsRV64) {
+ } else if (ABIName.starts_with("lp64") && !IsRV64) {
errs() << "64-bit ABIs are not supported for 32-bit targets (ignoring "
"target-abi)\n";
TargetABI = ABI_Unknown;
@@ -206,6 +206,17 @@ unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
return (SEW * 8) / LMul;
}
+std::optional<RISCVII::VLMUL>
+RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW) {
+ unsigned Ratio = RISCVVType::getSEWLMULRatio(SEW, VLMUL);
+ unsigned EMULFixedPoint = (EEW * 8) / Ratio;
+ bool Fractional = EMULFixedPoint < 8;
+ unsigned EMUL = Fractional ? 8 / EMULFixedPoint : EMULFixedPoint / 8;
+ if (!isValidLMUL(EMUL, Fractional))
+ return std::nullopt;
+ return RISCVVType::encodeLMUL(EMUL, Fractional);
+}
+
// Include the auto-generated portion of the compress emitter.
#define GEN_UNCOMPRESS_INSTR
#define GEN_COMPRESS_INSTR
@@ -242,7 +253,7 @@ int RISCVLoadFPImm::getLoadFPImm(APFloat FPImm) {
"Unexpected semantics");
// Handle the minimum normalized value which is different for each type.
- if (FPImm.isSmallestNormalized())
+ if (FPImm.isSmallestNormalized() && !FPImm.isNegative())
return 1;
// Convert to single precision to use its lookup table.
@@ -273,7 +284,7 @@ int RISCVLoadFPImm::getLoadFPImm(APFloat FPImm) {
if (Sign) {
if (Entry == 16)
return 0;
- return false;
+ return -1;
}
return Entry;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index f86419319dd3..00b4751905f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -203,6 +203,35 @@ static inline unsigned getVecPolicyOpNum(const MCInstrDesc &Desc) {
return Desc.getNumOperands() - 1;
}
+/// \returns the index to the rounding mode immediate value if any, otherwise
+/// returns -1.
+static inline int getFRMOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags))
+ return -1;
+
+ // The operand order
+ // --------------------------------------
+ // | n-1 (if any) | n-2 | n-3 | n-4 |
+ // | policy | sew | vl | frm |
+ // --------------------------------------
+ return getVLOpNum(Desc) - 1;
+}
+
+/// \returns the index to the rounding mode immediate value if any, otherwise
+/// returns -1.
+static inline int getVXRMOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ if (!hasRoundModeOp(TSFlags) || !usesVXRM(TSFlags))
+ return -1;
+ // The operand order
+ // --------------------------------------
+ // | n-1 (if any) | n-2 | n-3 | n-4 |
+ // | policy | sew | vl | vxrm |
+ // --------------------------------------
+ return getVLOpNum(Desc) - 1;
+}
+
// Is the first def operand tied to the first use operand. This is true for
// vector pseudo instructions that have a merge operand for tail/mask
// undisturbed. It's also true for vector FMA instructions where one of the
@@ -506,6 +535,8 @@ void printVType(unsigned VType, raw_ostream &OS);
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul);
+std::optional<RISCVII::VLMUL>
+getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW);
} // namespace RISCVVType
namespace RISCVRVC {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index db7dc1aed7fc..0799267eaf7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -27,7 +27,7 @@ public:
// Return true if the given relocation must be with a symbol rather than
// section plus offset.
- bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
unsigned Type) const override {
// TODO: this is very conservative, update once RISC-V psABI requirements
// are clarified.
@@ -89,22 +89,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_RISCV_CALL_PLT;
case RISCV::fixup_riscv_call_plt:
return ELF::R_RISCV_CALL_PLT;
- case RISCV::fixup_riscv_add_8:
- return ELF::R_RISCV_ADD8;
- case RISCV::fixup_riscv_sub_8:
- return ELF::R_RISCV_SUB8;
- case RISCV::fixup_riscv_add_16:
- return ELF::R_RISCV_ADD16;
- case RISCV::fixup_riscv_sub_16:
- return ELF::R_RISCV_SUB16;
- case RISCV::fixup_riscv_add_32:
- return ELF::R_RISCV_ADD32;
- case RISCV::fixup_riscv_sub_32:
- return ELF::R_RISCV_SUB32;
- case RISCV::fixup_riscv_add_64:
- return ELF::R_RISCV_ADD64;
- case RISCV::fixup_riscv_sub_64:
- return ELF::R_RISCV_SUB64;
}
}
@@ -143,32 +127,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_RISCV_RELAX;
case RISCV::fixup_riscv_align:
return ELF::R_RISCV_ALIGN;
- case RISCV::fixup_riscv_set_6b:
- return ELF::R_RISCV_SET6;
- case RISCV::fixup_riscv_sub_6b:
- return ELF::R_RISCV_SUB6;
- case RISCV::fixup_riscv_add_8:
- return ELF::R_RISCV_ADD8;
- case RISCV::fixup_riscv_set_8:
- return ELF::R_RISCV_SET8;
- case RISCV::fixup_riscv_sub_8:
- return ELF::R_RISCV_SUB8;
- case RISCV::fixup_riscv_set_16:
- return ELF::R_RISCV_SET16;
- case RISCV::fixup_riscv_add_16:
- return ELF::R_RISCV_ADD16;
- case RISCV::fixup_riscv_sub_16:
- return ELF::R_RISCV_SUB16;
- case RISCV::fixup_riscv_set_32:
- return ELF::R_RISCV_SET32;
- case RISCV::fixup_riscv_add_32:
- return ELF::R_RISCV_ADD32;
- case RISCV::fixup_riscv_sub_32:
- return ELF::R_RISCV_SUB32;
- case RISCV::fixup_riscv_add_64:
- return ELF::R_RISCV_ADD64;
- case RISCV::fixup_riscv_sub_64:
- return ELF::R_RISCV_SUB64;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index e43cb8b40d83..9db5148208b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -125,6 +125,65 @@ void RISCVTargetELFStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) {
void RISCVELFStreamer::reset() {
static_cast<RISCVTargetStreamer *>(getTargetStreamer())->reset();
MCELFStreamer::reset();
+ MappingSymbolCounter = 0;
+ LastMappingSymbols.clear();
+ LastEMS = EMS_None;
+}
+
+void RISCVELFStreamer::emitDataMappingSymbol() {
+ if (LastEMS == EMS_Data)
+ return;
+ emitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+}
+
+void RISCVELFStreamer::emitInstructionsMappingSymbol() {
+ if (LastEMS == EMS_Instructions)
+ return;
+ emitMappingSymbol("$x");
+ LastEMS = EMS_Instructions;
+}
+
+void RISCVELFStreamer::emitMappingSymbol(StringRef Name) {
+ auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++)));
+ emitLabel(Symbol);
+ Symbol->setType(ELF::STT_NOTYPE);
+ Symbol->setBinding(ELF::STB_LOCAL);
+}
+
+void RISCVELFStreamer::changeSection(MCSection *Section,
+ const MCExpr *Subsection) {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection().first] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::changeSection(Section, Subsection);
+}
+
+void RISCVELFStreamer::emitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ emitInstructionsMappingSymbol();
+ MCELFStreamer::emitInstruction(Inst, STI);
+}
+
+void RISCVELFStreamer::emitBytes(StringRef Data) {
+ emitDataMappingSymbol();
+ MCELFStreamer::emitBytes(Data);
+}
+
+void RISCVELFStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue,
+ SMLoc Loc) {
+ emitDataMappingSymbol();
+ MCELFStreamer::emitFill(NumBytes, FillValue, Loc);
+}
+
+void RISCVELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
+ SMLoc Loc) {
+ emitDataMappingSymbol();
+ MCELFStreamer::emitValueImpl(Value, Size, Loc);
}
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index e68f70261146..a6f54bf67b5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -16,12 +16,27 @@ using namespace llvm;
class RISCVELFStreamer : public MCELFStreamer {
void reset() override;
+ void emitDataMappingSymbol();
+ void emitInstructionsMappingSymbol();
+ void emitMappingSymbol(StringRef Name);
+
+ enum ElfMappingSymbol { EMS_None, EMS_Instructions, EMS_Data };
+
+ int64_t MappingSymbolCounter = 0;
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS = EMS_None;
public:
RISCVELFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> MOW,
std::unique_ptr<MCCodeEmitter> MCE)
: MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {}
+
+ void changeSection(MCSection *Section, const MCExpr *Subsection) override;
+ void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void emitBytes(StringRef Data) override;
+ void emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc) override;
+ void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
};
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index 5727aab3cd4c..74bd9398a9ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -1,4 +1,4 @@
-//===-- RISCVFixupKinds.h - RISCV Specific Fixup Entries --------*- C++ -*-===//
+//===-- RISCVFixupKinds.h - RISC-V Specific Fixup Entries -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVFIXUPKINDS_H
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVFIXUPKINDS_H
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCFixup.h"
#include <utility>
@@ -70,42 +71,6 @@ enum Fixups {
// Used to generate an R_RISCV_ALIGN relocation, which indicates the linker
// should fixup the alignment after linker relaxation.
fixup_riscv_align,
- // 8-bit fixup corresponding to R_RISCV_SET8 for local label assignment.
- fixup_riscv_set_8,
- // 8-bit fixup corresponding to R_RISCV_ADD8 for 8-bit symbolic difference
- // paired relocations.
- fixup_riscv_add_8,
- // 8-bit fixup corresponding to R_RISCV_SUB8 for 8-bit symbolic difference
- // paired relocations.
- fixup_riscv_sub_8,
- // 16-bit fixup corresponding to R_RISCV_SET16 for local label assignment.
- fixup_riscv_set_16,
- // 16-bit fixup corresponding to R_RISCV_ADD16 for 16-bit symbolic difference
- // paired reloctions.
- fixup_riscv_add_16,
- // 16-bit fixup corresponding to R_RISCV_SUB16 for 16-bit symbolic difference
- // paired reloctions.
- fixup_riscv_sub_16,
- // 32-bit fixup corresponding to R_RISCV_SET32 for local label assignment.
- fixup_riscv_set_32,
- // 32-bit fixup corresponding to R_RISCV_ADD32 for 32-bit symbolic difference
- // paired relocations.
- fixup_riscv_add_32,
- // 32-bit fixup corresponding to R_RISCV_SUB32 for 32-bit symbolic difference
- // paired relocations.
- fixup_riscv_sub_32,
- // 64-bit fixup corresponding to R_RISCV_ADD64 for 64-bit symbolic difference
- // paired relocations.
- fixup_riscv_add_64,
- // 64-bit fixup corresponding to R_RISCV_SUB64 for 64-bit symbolic difference
- // paired relocations.
- fixup_riscv_sub_64,
- // 6-bit fixup corresponding to R_RISCV_SET6 for local label assignment in
- // DWARF CFA.
- fixup_riscv_set_6b,
- // 6-bit fixup corresponding to R_RISCV_SUB6 for local label assignment in
- // DWARF CFA.
- fixup_riscv_sub_6b,
// Used as a sentinel, must be the last
fixup_riscv_invalid,
@@ -118,17 +83,21 @@ getRelocPairForSize(unsigned Size) {
default:
llvm_unreachable("unsupported fixup size");
case 1:
- return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_8),
- MCFixupKind(RISCV::fixup_riscv_sub_8));
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD8),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB8));
case 2:
- return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_16),
- MCFixupKind(RISCV::fixup_riscv_sub_16));
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD16),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB16));
case 4:
- return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_32),
- MCFixupKind(RISCV::fixup_riscv_sub_32));
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD32),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB32));
case 8:
- return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_64),
- MCFixupKind(RISCV::fixup_riscv_sub_64));
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD64),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB64));
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 8e98abd65aab..195dda0b8b14 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -75,7 +76,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
void RISCVInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
- O << getRegisterName(Reg);
+ markup(O, Markup::Register) << getRegisterName(Reg);
}
void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -90,7 +91,7 @@ void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
if (MO.isImm()) {
- O << MO.getImm();
+ markup(O, Markup::Immediate) << formatImm(MO.getImm());
return;
}
@@ -110,9 +111,9 @@ void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
uint64_t Target = Address + MO.getImm();
if (!STI.hasFeature(RISCV::Feature64Bit))
Target &= 0xffffffff;
- O << formatHex(Target);
+ markup(O, Markup::Target) << formatHex(Target);
} else {
- O << MO.getImm();
+ markup(O, Markup::Target) << formatImm(MO.getImm());
}
}
@@ -123,11 +124,11 @@ void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByEncoding(Imm);
auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
if (SiFiveReg && SiFiveReg->haveVendorRequiredFeatures(STI.getFeatureBits()))
- O << SiFiveReg->Name;
+ markup(O, Markup::Register) << SiFiveReg->Name;
else if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
- O << SysReg->Name;
+ markup(O, Markup::Register) << SysReg->Name;
else
- O << Imm;
+ markup(O, Markup::Register) << formatImm(Imm);
}
void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
@@ -157,16 +158,29 @@ void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
O << ", " << RISCVFPRndMode::roundingModeToString(FRMArg);
}
+void RISCVInstPrinter::printFRMArgLegacy(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ auto FRMArg =
+ static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm());
+ // Never print rounding mode if it's the default 'rne'. This ensures the
+ // output can still be parsed by older tools that erroneously failed to
+ // accept a rounding mode.
+ if (FRMArg == RISCVFPRndMode::RoundingMode::RNE)
+ return;
+ O << ", " << RISCVFPRndMode::roundingModeToString(FRMArg);
+}
+
void RISCVInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
if (Imm == 1) {
- O << "min";
+ markup(O, Markup::Immediate) << "min";
} else if (Imm == 30) {
- O << "inf";
+ markup(O, Markup::Immediate) << "inf";
} else if (Imm == 31) {
- O << "nan";
+ markup(O, Markup::Immediate) << "nan";
} else {
float FPVal = RISCVLoadFPImm::getFPImm(Imm);
// If the value is an integer, print a .0 fraction. Otherwise, use %g to
@@ -174,9 +188,9 @@ void RISCVInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNo,
// if it is shorter than printing as a decimal. The smallest value requires
// 12 digits of precision including the decimal.
if (FPVal == (int)(FPVal))
- O << format("%.1f", FPVal);
+ markup(O, Markup::Immediate) << format("%.1f", FPVal);
else
- O << format("%.12g", FPVal);
+ markup(O, Markup::Immediate) << format("%.12g", FPVal);
}
}
@@ -198,7 +212,7 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
// or non-zero in bits 8 and above.
if (RISCVVType::getVLMUL(Imm) == RISCVII::VLMUL::LMUL_RESERVED ||
RISCVVType::getSEW(Imm) > 64 || (Imm >> 8) != 0) {
- O << Imm;
+ O << formatImm(Imm);
return;
}
// Print the text form.
@@ -211,16 +225,30 @@ void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo,
O << "{";
switch (Imm) {
case RISCVZC::RLISTENCODE::RA:
- O << (ArchRegNames ? "x1" : "ra");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
break;
case RISCVZC::RLISTENCODE::RA_S0:
- O << (ArchRegNames ? "x1, x8" : "ra, s0");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
+ O << ", ";
+ markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0");
break;
case RISCVZC::RLISTENCODE::RA_S0_S1:
- O << (ArchRegNames ? "x1, x8-x9" : "ra, s0-s1");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
+ O << ", ";
+ markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0");
+ O << '-';
+ markup(O, Markup::Register) << (ArchRegNames ? "x9" : "s1");
break;
case RISCVZC::RLISTENCODE::RA_S0_S2:
- O << (ArchRegNames ? "x1, x8-x9, x18" : "ra, s0-s2");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
+ O << ", ";
+ markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0");
+ O << '-';
+ markup(O, Markup::Register) << (ArchRegNames ? "x9" : "s2");
+ if (ArchRegNames) {
+ O << ", ";
+ markup(O, Markup::Register) << "x18";
+ }
break;
case RISCVZC::RLISTENCODE::RA_S0_S3:
case RISCVZC::RLISTENCODE::RA_S0_S4:
@@ -229,11 +257,21 @@ void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo,
case RISCVZC::RLISTENCODE::RA_S0_S7:
case RISCVZC::RLISTENCODE::RA_S0_S8:
case RISCVZC::RLISTENCODE::RA_S0_S9:
- O << (ArchRegNames ? "x1, x8-x9, x18-" : "ra, s0-")
- << getRegisterName(RISCV::X19 + (Imm - RISCVZC::RLISTENCODE::RA_S0_S3));
- break;
case RISCVZC::RLISTENCODE::RA_S0_S11:
- O << (ArchRegNames ? "x1, x8-x9, x18-x27" : "ra, s0-s11");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
+ O << ", ";
+ markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0");
+ O << '-';
+ if (ArchRegNames) {
+ markup(O, Markup::Register) << "x9";
+ O << ", ";
+ markup(O, Markup::Register) << "x18";
+ O << '-';
+ }
+ markup(O, Markup::Register) << getRegisterName(
+ RISCV::X19 + (Imm == RISCVZC::RLISTENCODE::RA_S0_S11
+ ? 8
+ : Imm - RISCVZC::RLISTENCODE::RA_S0_S3));
break;
default:
llvm_unreachable("invalid register list");
@@ -241,6 +279,22 @@ void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo,
O << "}";
}
+void RISCVInstPrinter::printRegReg(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ assert(MO.isReg() && "printRegReg can only print register operands");
+ if (MO.getReg() == RISCV::NoRegister)
+ return;
+ printRegName(O, MO.getReg());
+
+ O << "(";
+ const MCOperand &MO1 = MI->getOperand(OpNo + 1);
+ assert(MO1.isReg() && "printRegReg can only print register operands");
+ printRegName(O, MO1.getReg());
+ O << ")";
+}
+
void RISCVInstPrinter::printSpimm(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
int64_t Imm = MI->getOperand(OpNo).getImm();
@@ -256,6 +310,8 @@ void RISCVInstPrinter::printSpimm(const MCInst *MI, unsigned OpNo,
if (Opcode == RISCV::CM_PUSH)
Spimm = -Spimm;
+ // RAII guard for ANSI color escape sequences
+ WithMarkup ScopedMarkup = markup(O, Markup::Immediate);
RISCVZC::printSpimm(Spimm, O);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index 20f12af13008..4512bd5f4c4b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -40,6 +40,8 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printFRMArgLegacy(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printFPImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printZeroOffsetMemOp(const MCInst *MI, unsigned OpNo,
@@ -52,7 +54,8 @@ public:
raw_ostream &O);
void printSpimm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
-
+ void printRegReg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
// Autogenerated by tblgen.
std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index b63a5cea823e..82fed50bce75 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -92,6 +92,10 @@ public:
unsigned getRlistOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+
+ unsigned getRegReg(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
};
} // end anonymous namespace
@@ -137,7 +141,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI,
// Emit AUIPC Ra, Func with R_RISCV_CALL relocation type.
TmpInst = MCInstBuilder(RISCV::AUIPC).addReg(Ra).addExpr(CallExpr);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
if (MI.getOpcode() == RISCV::PseudoTAIL ||
MI.getOpcode() == RISCV::PseudoJump)
@@ -147,7 +151,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI,
// Emit JALR Ra, Ra, 0
TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
}
// Expand PseudoAddTPRel to a simple ADD with the correct relocation.
@@ -186,7 +190,7 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI,
.addOperand(SrcReg)
.addOperand(TPReg);
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
}
static unsigned getInvertedBranchOp(unsigned BrOp) {
@@ -240,14 +244,14 @@ void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI,
Opcode == RISCV::PseudoLongBNE ? RISCV::C_BEQZ : RISCV::C_BNEZ;
MCInst TmpInst = MCInstBuilder(InvOpc).addReg(SrcReg1).addImm(6);
uint16_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write<uint16_t>(CB, Binary, support::little);
+ support::endian::write<uint16_t>(CB, Binary, llvm::endianness::little);
Offset = 2;
} else {
unsigned InvOpc = getInvertedBranchOp(Opcode);
MCInst TmpInst =
MCInstBuilder(InvOpc).addReg(SrcReg1).addReg(SrcReg2).addImm(8);
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
Offset = 4;
}
@@ -255,7 +259,7 @@ void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI,
MCInst TmpInst =
MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addOperand(SrcSymbol);
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
Fixups.clear();
if (SrcSymbol.isExpr()) {
@@ -306,12 +310,12 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI,
llvm_unreachable("Unhandled encodeInstruction length!");
case 2: {
uint16_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write<uint16_t>(CB, Bits, support::little);
+ support::endian::write<uint16_t>(CB, Bits, llvm::endianness::little);
break;
}
case 4: {
uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write(CB, Bits, support::little);
+ support::endian::write(CB, Bits, llvm::endianness::little);
break;
}
}
@@ -442,8 +446,11 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
RelaxCandidate = true;
break;
}
- } else if (Kind == MCExpr::SymbolRef &&
- cast<MCSymbolRefExpr>(Expr)->getKind() == MCSymbolRefExpr::VK_None) {
+ } else if ((Kind == MCExpr::SymbolRef &&
+ cast<MCSymbolRefExpr>(Expr)->getKind() ==
+ MCSymbolRefExpr::VK_None) ||
+ Kind == MCExpr::Binary) {
+ // FIXME: Sub kind binary exprs have chance of underflow.
if (MIFrm == RISCVII::InstFormatJ) {
FixupKind = RISCV::fixup_riscv_jal;
} else if (MIFrm == RISCVII::InstFormatB) {
@@ -503,4 +510,17 @@ unsigned RISCVMCCodeEmitter::getRlistOpValue(const MCInst &MI, unsigned OpNo,
return Imm;
}
+unsigned RISCVMCCodeEmitter::getRegReg(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ const MCOperand &MO1 = MI.getOperand(OpNo + 1);
+ assert(MO.isReg() && MO1.isReg() && "Expected registers.");
+
+ unsigned Op = Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+ unsigned Op1 = Ctx.getRegisterInfo()->getEncodingValue(MO1.getReg());
+
+ return Op | Op1 << 5;
+}
+
#include "RISCVGenMCCodeEmitter.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 75af5c2de094..79e56a7a6d03 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -31,6 +31,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include <bitset>
#define GET_INSTRINFO_MC_DESC
#define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -114,10 +115,79 @@ static MCTargetStreamer *createRISCVNullTargetStreamer(MCStreamer &S) {
namespace {
class RISCVMCInstrAnalysis : public MCInstrAnalysis {
+ int64_t GPRState[31] = {};
+ std::bitset<31> GPRValidMask;
+
+ static bool isGPR(unsigned Reg) {
+ return Reg >= RISCV::X0 && Reg <= RISCV::X31;
+ }
+
+ static unsigned getRegIndex(unsigned Reg) {
+ assert(isGPR(Reg) && Reg != RISCV::X0 && "Invalid GPR reg");
+ return Reg - RISCV::X1;
+ }
+
+ void setGPRState(unsigned Reg, std::optional<int64_t> Value) {
+ if (Reg == RISCV::X0)
+ return;
+
+ auto Index = getRegIndex(Reg);
+
+ if (Value) {
+ GPRState[Index] = *Value;
+ GPRValidMask.set(Index);
+ } else {
+ GPRValidMask.reset(Index);
+ }
+ }
+
+ std::optional<int64_t> getGPRState(unsigned Reg) const {
+ if (Reg == RISCV::X0)
+ return 0;
+
+ auto Index = getRegIndex(Reg);
+
+ if (GPRValidMask.test(Index))
+ return GPRState[Index];
+ return std::nullopt;
+ }
+
public:
explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info)
: MCInstrAnalysis(Info) {}
+ void resetState() override { GPRValidMask.reset(); }
+
+ void updateState(const MCInst &Inst, uint64_t Addr) override {
+ // Terminators mark the end of a basic block which means the sequentially
+ // next instruction will be the first of another basic block and the current
+ // state will typically not be valid anymore. For calls, we assume all
+ // registers may be clobbered by the callee (TODO: should we take the
+ // calling convention into account?).
+ if (isTerminator(Inst) || isCall(Inst)) {
+ resetState();
+ return;
+ }
+
+ switch (Inst.getOpcode()) {
+ default: {
+ // Clear the state of all defined registers for instructions that we don't
+ // explicitly support.
+ auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
+ for (unsigned I = 0; I < NumDefs; ++I) {
+ auto DefReg = Inst.getOperand(I).getReg();
+ if (isGPR(DefReg))
+ setGPRState(DefReg, std::nullopt);
+ }
+ break;
+ }
+ case RISCV::AUIPC:
+ setGPRState(Inst.getOperand(0).getReg(),
+ Addr + (Inst.getOperand(1).getImm() << 12));
+ break;
+ }
+ }
+
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override {
if (isConditionalBranch(Inst)) {
@@ -140,6 +210,15 @@ public:
return true;
}
+ if (Inst.getOpcode() == RISCV::JALR) {
+ if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+ Target = *TargetRegState + Inst.getOperand(2).getImm();
+ return true;
+ }
+
+ return false;
+ }
+
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index f659779e9772..4358a5b878e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -45,13 +45,12 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
}
// Recursively generate a sequence for materializing an integer.
-static void generateInstSeqImpl(int64_t Val,
- const FeatureBitset &ActiveFeatures,
+static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI,
RISCVMatInt::InstSeq &Res) {
- bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
// Use BSETI for a single bit that can't be expressed by a single LUI or ADDI.
- if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val) &&
+ if (STI.hasFeature(RISCV::FeatureStdExtZbs) && isPowerOf2_64(Val) &&
(!isInt<32>(Val) || Val == 0x800)) {
Res.emplace_back(RISCV::BSETI, Log2_64(Val));
return;
@@ -122,7 +121,7 @@ static void generateInstSeqImpl(int64_t Val,
ShiftAmount -= 12;
Val = (uint64_t)Val << 12;
} else if (isUInt<32>((uint64_t)Val << 12) &&
- ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ STI.hasFeature(RISCV::FeatureStdExtZba)) {
// Reduce the shift amount and add zeros to the LSBs so it will match
// LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
ShiftAmount -= 12;
@@ -133,7 +132,7 @@ static void generateInstSeqImpl(int64_t Val,
// Try to use SLLI_UW for Val when it is uint32 but not int32.
if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) &&
- ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ STI.hasFeature(RISCV::FeatureStdExtZba)) {
// Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
// SLLI_UW.
Val = ((uint64_t)Val) | (0xffffffffull << 32);
@@ -141,7 +140,7 @@ static void generateInstSeqImpl(int64_t Val,
}
}
- generateInstSeqImpl(Val, ActiveFeatures, Res);
+ generateInstSeqImpl(Val, STI, Res);
// Skip shift if we were able to use LUI directly.
if (ShiftAmount) {
@@ -171,10 +170,60 @@ static unsigned extractRotateInfo(int64_t Val) {
return 0;
}
+static void generateInstSeqLeadingZeros(int64_t Val, const MCSubtargetInfo &STI,
+ RISCVMatInt::InstSeq &Res) {
+ assert(Val > 0 && "Expected postive val");
+
+ unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
+ uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
+ // Fill in the bits that will be shifted out with 1s. An example where this
+ // helps is trailing one masks with 32 or more ones. This will generate
+ // ADDI -1 and an SRLI.
+ ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
+
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
+
+ // Keep the new sequence if it is an improvement or the original is empty.
+ if ((TmpSeq.size() + 1) < Res.size() ||
+ (Res.empty() && TmpSeq.size() < 8)) {
+ TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+ Res = TmpSeq;
+ }
+
+ // Some cases can benefit from filling the lower bits with zeros instead.
+ ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
+ TmpSeq.clear();
+ generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
+
+ // Keep the new sequence if it is an improvement or the original is empty.
+ if ((TmpSeq.size() + 1) < Res.size() ||
+ (Res.empty() && TmpSeq.size() < 8)) {
+ TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+ Res = TmpSeq;
+ }
+
+ // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
+ // the end of the sequence.
+ if (LeadingZeros == 32 && STI.hasFeature(RISCV::FeatureStdExtZba)) {
+ // Try replacing upper bits with 1.
+ uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
+ TmpSeq.clear();
+ generateInstSeqImpl(LeadingOnesVal, STI, TmpSeq);
+
+ // Keep the new sequence if it is an improvement.
+ if ((TmpSeq.size() + 1) < Res.size() ||
+ (Res.empty() && TmpSeq.size() < 8)) {
+ TmpSeq.emplace_back(RISCV::ADD_UW, 0);
+ Res = TmpSeq;
+ }
+ }
+}
+
namespace llvm::RISCVMatInt {
-InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
+InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) {
RISCVMatInt::InstSeq Res;
- generateInstSeqImpl(Val, ActiveFeatures, Res);
+ generateInstSeqImpl(Val, STI, Res);
// If the low 12 bits are non-zero, the first expansion may end with an ADDI
// or ADDIW. If there are trailing zeros, try generating a sign extended
@@ -187,9 +236,9 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// NOTE: We don't check for C extension to minimize differences in generated
// code.
bool IsShiftedCompressible =
- isInt<6>(ShiftedVal) && !ActiveFeatures[RISCV::TuneLUIADDIFusion];
+ isInt<6>(ShiftedVal) && !STI.hasFeature(RISCV::TuneLUIADDIFusion);
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
// Keep the new sequence if it is an improvement.
if ((TmpSeq.size() + 1) < Res.size() || IsShiftedCompressible) {
@@ -203,65 +252,56 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
if (Res.size() <= 2)
return Res;
- assert(ActiveFeatures[RISCV::Feature64Bit] &&
+ assert(STI.hasFeature(RISCV::Feature64Bit) &&
"Expected RV32 to only need 2 instructions");
- // If the constant is positive we might be able to generate a shifted constant
- // with no leading zeros and use a final SRLI to restore them.
- if (Val > 0) {
- assert(Res.size() > 2 && "Expected longer sequence");
- unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
- uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
- // Fill in the bits that will be shifted out with 1s. An example where this
- // helps is trailing one masks with 32 or more ones. This will generate
- // ADDI -1 and an SRLI.
- ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
-
+ // If the lower 13 bits are something like 0x17ff, try to add 1 to change the
+ // lower 13 bits to 0x1800. We can restore this with an ADDI of -1 at the end
+ // of the sequence. Call generateInstSeqImpl on the new constant which may
+ // subtract 0xfffffffffffff800 to create another ADDI. This will leave a
+ // constant with more than 12 trailing zeros for the next recursive step.
+ if ((Val & 0xfff) != 0 && (Val & 0x1800) == 0x1000) {
+ int64_t Imm12 = -(0x800 - (Val & 0xfff));
+ int64_t AdjustedVal = Val - Imm12;
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(AdjustedVal, STI, TmpSeq);
// Keep the new sequence if it is an improvement.
if ((TmpSeq.size() + 1) < Res.size()) {
- TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+ TmpSeq.emplace_back(RISCV::ADDI, Imm12);
Res = TmpSeq;
}
+ }
- // Some cases can benefit from filling the lower bits with zeros instead.
- ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
- TmpSeq.clear();
- generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
-
- // Keep the new sequence if it is an improvement.
- if ((TmpSeq.size() + 1) < Res.size()) {
- TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
- Res = TmpSeq;
- }
+ // If the constant is positive we might be able to generate a shifted constant
+ // with no leading zeros and use a final SRLI to restore them.
+ if (Val > 0 && Res.size() > 2) {
+ generateInstSeqLeadingZeros(Val, STI, Res);
+ }
- // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
- // the end of the sequence.
- if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
- // Try replacing upper bits with 1.
- uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
- TmpSeq.clear();
- generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
+ // If the constant is negative, trying inverting and using our trailing zero
+ // optimizations. Use an xori to invert the final value.
+ if (Val < 0 && Res.size() > 3) {
+ uint64_t InvertedVal = ~(uint64_t)Val;
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqLeadingZeros(InvertedVal, STI, TmpSeq);
- // Keep the new sequence if it is an improvement.
- if ((TmpSeq.size() + 1) < Res.size()) {
- TmpSeq.emplace_back(RISCV::ADD_UW, 0);
- Res = TmpSeq;
- }
+ // Keep it if we found a sequence that is smaller after inverting.
+ if (!TmpSeq.empty() && (TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(RISCV::XORI, -1);
+ Res = TmpSeq;
}
}
// If the Low and High halves are the same, use pack. The pack instruction
// packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in the
// lower half and rs2 in the upper half.
- if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbkb]) {
+ if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbkb)) {
int64_t LoVal = SignExtend64<32>(Val);
int64_t HiVal = SignExtend64<32>(Val >> 32);
if (LoVal == HiVal) {
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(LoVal, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(LoVal, STI, TmpSeq);
if ((TmpSeq.size() + 1) < Res.size()) {
TmpSeq.emplace_back(RISCV::PACK, 0);
Res = TmpSeq;
@@ -270,7 +310,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
// Perform optimization with BCLRI/BSETI in the Zbs extension.
- if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
+ if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) {
// 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
// call generateInstSeqImpl with Val|0x80000000 (which is expected be
// an int32), then emit (BCLRI r, 31).
@@ -288,7 +328,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
if (isInt<32>(NewVal)) {
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(NewVal, STI, TmpSeq);
if ((TmpSeq.size() + 1) < Res.size()) {
TmpSeq.emplace_back(Opc, 31);
Res = TmpSeq;
@@ -302,7 +342,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
uint32_t Hi = Hi_32(Val);
Opc = 0;
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(Lo, STI, TmpSeq);
// Check if it is profitable to use BCLRI/BSETI.
if (Lo > 0 && TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
Opc = RISCV::BSETI;
@@ -323,7 +363,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
// Perform optimization with SH*ADD in the Zba extension.
- if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZba)) {
int64_t Div = 0;
unsigned Opc = 0;
RISCVMatInt::InstSeq TmpSeq;
@@ -340,7 +380,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
// Build the new instruction sequence.
if (Div > 0) {
- generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(Val / Div, STI, TmpSeq);
if ((TmpSeq.size() + 1) < Res.size()) {
TmpSeq.emplace_back(Opc, 0);
Res = TmpSeq;
@@ -367,7 +407,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
assert(Lo12 != 0 &&
"unexpected instruction sequence for immediate materialisation");
assert(TmpSeq.empty() && "Expected empty TmpSeq");
- generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(Hi52 / Div, STI, TmpSeq);
if ((TmpSeq.size() + 2) < Res.size()) {
TmpSeq.emplace_back(Opc, 0);
TmpSeq.emplace_back(RISCV::ADDI, Lo12);
@@ -379,14 +419,14 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// Perform optimization with rori in the Zbb and th.srri in the XTheadBb
// extension.
- if (Res.size() > 2 && (ActiveFeatures[RISCV::FeatureStdExtZbb] ||
- ActiveFeatures[RISCV::FeatureVendorXTHeadBb])) {
+ if (Res.size() > 2 && (STI.hasFeature(RISCV::FeatureStdExtZbb) ||
+ STI.hasFeature(RISCV::FeatureVendorXTHeadBb))) {
if (unsigned Rotate = extractRotateInfo(Val)) {
RISCVMatInt::InstSeq TmpSeq;
uint64_t NegImm12 = llvm::rotl<uint64_t>(Val, Rotate);
assert(isInt<12>(NegImm12));
TmpSeq.emplace_back(RISCV::ADDI, NegImm12);
- TmpSeq.emplace_back(ActiveFeatures[RISCV::FeatureStdExtZbb]
+ TmpSeq.emplace_back(STI.hasFeature(RISCV::FeatureStdExtZbb)
? RISCV::RORI
: RISCV::TH_SRRI,
Rotate);
@@ -396,11 +436,44 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
return Res;
}
-int getIntMatCost(const APInt &Val, unsigned Size,
- const FeatureBitset &ActiveFeatures, bool CompressionCost) {
- bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
- bool HasRVC = CompressionCost && (ActiveFeatures[RISCV::FeatureStdExtC] ||
- ActiveFeatures[RISCV::FeatureStdExtZca]);
+InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI,
+ unsigned &ShiftAmt, unsigned &AddOpc) {
+ int64_t LoVal = SignExtend64<32>(Val);
+ if (LoVal == 0)
+ return RISCVMatInt::InstSeq();
+
+ // Subtract the LoVal to emulate the effect of the final ADD.
+ uint64_t Tmp = (uint64_t)Val - (uint64_t)LoVal;
+ assert(Tmp != 0);
+
+ // Use trailing zero counts to figure how far we need to shift LoVal to line
+ // up with the remaining constant.
+ // TODO: This algorithm assumes all non-zero bits in the low 32 bits of the
+ // final constant come from LoVal.
+ unsigned TzLo = llvm::countr_zero((uint64_t)LoVal);
+ unsigned TzHi = llvm::countr_zero(Tmp);
+ assert(TzLo < 32 && TzHi >= 32);
+ ShiftAmt = TzHi - TzLo;
+ AddOpc = RISCV::ADD;
+
+ if (Tmp == ((uint64_t)LoVal << ShiftAmt))
+ return RISCVMatInt::generateInstSeq(LoVal, STI);
+
+ // If we have Zba, we can use (ADD_UW X, (SLLI X, 32)).
+ if (STI.hasFeature(RISCV::FeatureStdExtZba) && Lo_32(Val) == Hi_32(Val)) {
+ ShiftAmt = 32;
+ AddOpc = RISCV::ADD_UW;
+ return RISCVMatInt::generateInstSeq(LoVal, STI);
+ }
+
+ return RISCVMatInt::InstSeq();
+}
+
+int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI,
+ bool CompressionCost) {
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ bool HasRVC = CompressionCost && (STI.hasFeature(RISCV::FeatureStdExtC) ||
+ STI.hasFeature(RISCV::FeatureStdExtZca));
int PlatRegSize = IsRV64 ? 64 : 32;
// Split the constant into platform register sized chunks, and calculate cost
@@ -408,7 +481,7 @@ int getIntMatCost(const APInt &Val, unsigned Size,
int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
- InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures);
+ InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), STI);
Cost += getInstSeqCost(MatSeq, HasRVC);
}
return std::max(1, Cost);
@@ -429,6 +502,7 @@ OpndKind Inst::getOpndKind() const {
return RISCVMatInt::RegReg;
case RISCV::ADDI:
case RISCV::ADDIW:
+ case RISCV::XORI:
case RISCV::SLLI:
case RISCV::SRLI:
case RISCV::SLLI_UW:
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
index ae7b8d402184..780f685463f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
@@ -10,7 +10,7 @@
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_MATINT_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include <cstdint>
namespace llvm {
@@ -46,7 +46,15 @@ using InstSeq = SmallVector<Inst, 8>;
// simple struct is produced rather than directly emitting the instructions in
// order to allow this helper to be used from both the MC layer and during
// instruction selection.
-InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures);
+InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI);
+
+// Helper to generate an instruction sequence that can materialize the given
+// immediate value into a register using an additional temporary register. This
+// handles cases where the constant can be generated by (ADD (SLLI X, C), X) or
+// (ADD_UW (SLLI X, C) X). The sequence to generate X is returned. ShiftAmt is
+// provides the SLLI and AddOpc indicates ADD or ADD_UW.
+InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI,
+ unsigned &ShiftAmt, unsigned &AddOpc);
// Helper to estimate the number of instructions required to materialise the
// given immediate value into a register. This estimate does not account for
@@ -58,8 +66,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures);
// If CompressionCost is true it will use a different cost calculation if RVC is
// enabled. This should be used to compare two different sequences to determine
// which is more compressible.
-int getIntMatCost(const APInt &Val, unsigned Size,
- const FeatureBitset &ActiveFeatures,
+int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI,
bool CompressionCost = false);
} // namespace RISCVMatInt
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
index 107ca51520b7..9eb18099894b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
@@ -18,13 +18,8 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
-class AsmPrinter;
class FunctionPass;
class InstructionSelector;
-class MCInst;
-class MCOperand;
-class MachineInstr;
-class MachineOperand;
class PassRegistry;
class RISCVRegisterBankInfo;
class RISCVSubtarget;
@@ -33,8 +28,11 @@ class RISCVTargetMachine;
FunctionPass *createRISCVCodeGenPreparePass();
void initializeRISCVCodeGenPreparePass(PassRegistry &);
+FunctionPass *createRISCVDeadRegisterDefinitionsPass();
+void initializeRISCVDeadRegisterDefinitionsPass(PassRegistry &);
+
FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
FunctionPass *createRISCVMakeCompressibleOptPass();
void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
@@ -42,6 +40,9 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
FunctionPass *createRISCVGatherScatterLoweringPass();
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
+FunctionPass *createRISCVFoldMasksPass();
+void initializeRISCVFoldMasksPass(PassRegistry &);
+
FunctionPass *createRISCVOptWInstrsPass();
void initializeRISCVOptWInstrsPass(PassRegistry &);
@@ -60,9 +61,14 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
+FunctionPass *createRISCVPostRAExpandPseudoPass();
+void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertReadWriteCSRPass();
void initializeRISCVInsertReadWriteCSRPass(PassRegistry &);
+FunctionPass *createRISCVInsertWriteVXRMPass();
+void initializeRISCVInsertWriteVXRMPass(PassRegistry &);
+
FunctionPass *createRISCVRedundantCopyEliminationPass();
void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
@@ -80,6 +86,15 @@ InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVSubtarget &,
RISCVRegisterBankInfo &);
void initializeRISCVDAGToDAGISelPass(PassRegistry &);
+
+FunctionPass *createRISCVPostLegalizerCombiner();
+void initializeRISCVPostLegalizerCombinerPass(PassRegistry &);
+
+FunctionPass *createRISCVO0PreLegalizerCombiner();
+void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &);
+
+FunctionPass *createRISCVPreLegalizerCombiner();
+void initializeRISCVPreLegalizerCombinerPass(PassRegistry &);
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index d2520d932ddf..0fd514fa87cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -36,6 +36,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/RISCVISAInfo.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
@@ -46,6 +47,10 @@ using namespace llvm;
STATISTIC(RISCVNumInstrsCompressed,
"Number of RISC-V Compressed instructions emitted");
+namespace llvm {
+extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
+} // namespace llvm
+
namespace {
class RISCVAsmPrinter : public AsmPrinter {
const RISCVSubtarget *STI;
@@ -57,6 +62,15 @@ public:
StringRef getPassName() const override { return "RISC-V Assembly Printer"; }
+ void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+
+ void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+
+ void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+
bool runOnMachineFunction(MachineFunction &MF) override;
void emitInstruction(const MachineInstr *MI) override;
@@ -83,6 +97,7 @@ public:
void emitEndOfAsmFile(Module &M) override;
void emitFunctionEntryLabel() override;
+ bool emitDirectiveOptionArch();
private:
void emitAttributes();
@@ -93,6 +108,78 @@ private:
};
}
+void RISCVAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ unsigned NOPBytes = STI->getFeatureBits()[RISCV::FeatureStdExtC] ? 2 : 4;
+ unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
+
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.emitLabel(MILabel);
+
+ SM.recordStackMap(*MILabel, MI);
+ assert(NumNOPBytes % NOPBytes == 0 &&
+ "Invalid number of NOP bytes requested!");
+
+ // Scan ahead to trim the shadow.
+ const MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::const_iterator MII(MI);
+ ++MII;
+ while (NumNOPBytes > 0) {
+ if (MII == MBB.end() || MII->isCall() ||
+ MII->getOpcode() == RISCV::DBG_VALUE ||
+ MII->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MII->getOpcode() == TargetOpcode::STACKMAP)
+ break;
+ ++MII;
+ NumNOPBytes -= 4;
+ }
+
+ // Emit nops.
+ emitNops(NumNOPBytes / NOPBytes);
+}
+
+// Lower a patchpoint of the form:
+// [<def>], <id>, <numBytes>, <target>, <numArgs>
+void RISCVAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ unsigned NOPBytes = STI->getFeatureBits()[RISCV::FeatureStdExtC] ? 2 : 4;
+
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.emitLabel(MILabel);
+ SM.recordPatchPoint(*MILabel, MI);
+
+ PatchPointOpers Opers(&MI);
+
+ unsigned EncodedBytes = 0;
+
+ // Emit padding.
+ unsigned NumBytes = Opers.getNumPatchBytes();
+ assert(NumBytes >= EncodedBytes &&
+ "Patchpoint can't request size less than the length of a call.");
+ assert((NumBytes - EncodedBytes) % NOPBytes == 0 &&
+ "Invalid number of NOP bytes requested!");
+ emitNops((NumBytes - EncodedBytes) / NOPBytes);
+}
+
+void RISCVAsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ unsigned NOPBytes = STI->getFeatureBits()[RISCV::FeatureStdExtC] ? 2 : 4;
+
+ StatepointOpers SOpers(&MI);
+ if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
+ assert(PatchBytes % NOPBytes == 0 &&
+ "Invalid number of NOP bytes requested!");
+ emitNops(PatchBytes / NOPBytes);
+ }
+
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.emitLabel(MILabel);
+ SM.recordStatepoint(*MILabel, MI);
+}
+
void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
bool Res = RISCVRVC::compress(CInst, Inst, *STI);
@@ -160,6 +247,12 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
case RISCV::PseudoRVVInitUndefM4:
case RISCV::PseudoRVVInitUndefM8:
return;
+ case TargetOpcode::STACKMAP:
+ return LowerSTACKMAP(*OutStreamer, SM, *MI);
+ case TargetOpcode::PATCHPOINT:
+ return LowerPATCHPOINT(*OutStreamer, SM, *MI);
+ case TargetOpcode::STATEPOINT:
+ return LowerSTATEPOINT(*OutStreamer, SM, *MI);
}
MCInst OutInst;
@@ -230,7 +323,7 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
// RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand).
if (!AddrReg.isReg())
return true;
- if (!Offset.isImm() && !Offset.isGlobal())
+ if (!Offset.isImm() && !Offset.isGlobal() && !Offset.isBlockAddress())
return true;
MCOperand MCO;
@@ -239,17 +332,49 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
if (Offset.isImm())
OS << MCO.getImm();
- else if (Offset.isGlobal())
+ else if (Offset.isGlobal() || Offset.isBlockAddress())
OS << *MCO.getExpr();
OS << "(" << RISCVInstPrinter::getRegisterName(AddrReg.getReg()) << ")";
return false;
}
+bool RISCVAsmPrinter::emitDirectiveOptionArch() {
+ RISCVTargetStreamer &RTS =
+ static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
+ SmallVector<RISCVOptionArchArg> NeedEmitStdOptionArgs;
+ const MCSubtargetInfo &MCSTI = *TM.getMCSubtargetInfo();
+ for (const auto &Feature : RISCVFeatureKV) {
+ if (STI->hasFeature(Feature.Value) == MCSTI.hasFeature(Feature.Value))
+ continue;
+
+ if (!llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
+ continue;
+
+ auto Delta = STI->hasFeature(Feature.Value) ? RISCVOptionArchArgType::Plus
+ : RISCVOptionArchArgType::Minus;
+ NeedEmitStdOptionArgs.emplace_back(Delta, Feature.Key);
+ }
+ if (!NeedEmitStdOptionArgs.empty()) {
+ RTS.emitDirectiveOptionPush();
+ RTS.emitDirectiveOptionArch(NeedEmitStdOptionArgs);
+ return true;
+ }
+
+ return false;
+}
+
bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
STI = &MF.getSubtarget<RISCVSubtarget>();
+ RISCVTargetStreamer &RTS =
+ static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+ bool EmittedOptionArch = emitDirectiveOptionArch();
SetupMachineFunction(MF);
emitFunctionBody();
+
+ if (EmittedOptionArch)
+ RTS.emitDirectiveOptionPop();
return false;
}
@@ -738,13 +863,14 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
uint64_t TSFlags = MCID.TSFlags;
unsigned NumOps = MI->getNumExplicitOperands();
- // Skip policy, VL and SEW operands which are the last operands if present.
+ // Skip policy, SEW, VL, VXRM/FRM operands which are the last operands if
+ // present.
if (RISCVII::hasVecPolicyOp(TSFlags))
--NumOps;
- if (RISCVII::hasVLOp(TSFlags))
- --NumOps;
if (RISCVII::hasSEWOp(TSFlags))
--NumOps;
+ if (RISCVII::hasVLOp(TSFlags))
+ --NumOps;
if (RISCVII::hasRoundModeOp(TSFlags))
--NumOps;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 2fcd9a40588a..f9d8401bab7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -28,8 +28,6 @@ using namespace llvm;
#define DEBUG_TYPE "riscv-codegenprepare"
#define PASS_NAME "RISC-V CodeGenPrepare"
-STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt");
-
namespace {
class RISCVCodeGenPrepare : public FunctionPass,
@@ -52,59 +50,14 @@ public:
}
bool visitInstruction(Instruction &I) { return false; }
- bool visitZExtInst(ZExtInst &I);
bool visitAnd(BinaryOperator &BO);
};
} // end anonymous namespace
-bool RISCVCodeGenPrepare::visitZExtInst(ZExtInst &ZExt) {
- if (!ST->is64Bit())
- return false;
-
- Value *Src = ZExt.getOperand(0);
-
- // We only care about ZExt from i32 to i64.
- if (!ZExt.getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32))
- return false;
-
- // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we
- // can determine that the sign bit of X is zero via a dominating condition.
- // This often occurs with widened induction variables.
- if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
- Constant::getNullValue(Src->getType()), &ZExt,
- *DL).value_or(false)) {
- auto *SExt = new SExtInst(Src, ZExt.getType(), "", &ZExt);
- SExt->takeName(&ZExt);
- SExt->setDebugLoc(ZExt.getDebugLoc());
-
- ZExt.replaceAllUsesWith(SExt);
- ZExt.eraseFromParent();
- ++NumZExtToSExt;
- return true;
- }
-
- // Convert (zext (abs(i32 X, i1 1))) -> (sext (abs(i32 X, i1 1))). If abs of
- // INT_MIN is poison, the sign bit is zero.
- using namespace PatternMatch;
- if (match(Src, m_Intrinsic<Intrinsic::abs>(m_Value(), m_One()))) {
- auto *SExt = new SExtInst(Src, ZExt.getType(), "", &ZExt);
- SExt->takeName(&ZExt);
- SExt->setDebugLoc(ZExt.getDebugLoc());
-
- ZExt.replaceAllUsesWith(SExt);
- ZExt.eraseFromParent();
- ++NumZExtToSExt;
- return true;
- }
-
- return false;
-}
-
// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
-// but bits 63:32 are zero. If we can prove that bit 31 of X is 0, we can fill
-// the upper 32 bits with ones. A separate transform will turn (zext X) into
-// (sext X) for the same condition.
+// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
+// the upper 32 bits with ones.
bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
if (!ST->is64Bit())
return false;
@@ -112,9 +65,17 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
if (!BO.getType()->isIntegerTy(64))
return false;
- // Left hand side should be sext or zext.
+ auto canBeSignExtend = [](Instruction *I) {
+ if (isa<SExtInst>(I))
+ return true;
+ if (isa<ZExtInst>(I))
+ return I->hasNonNeg();
+ return false;
+ };
+
+ // Left hand side should be a sext or zext nneg.
Instruction *LHS = dyn_cast<Instruction>(BO.getOperand(0));
- if (!LHS || (!isa<SExtInst>(LHS) && !isa<ZExtInst>(LHS)))
+ if (!LHS || !canBeSignExtend(LHS))
return false;
Value *LHSSrc = LHS->getOperand(0);
@@ -135,13 +96,6 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
return false;
- // If we can determine the sign bit of the input is 0, we can replace the
- // And mask constant.
- if (!isImpliedByDomCondition(ICmpInst::ICMP_SGE, LHSSrc,
- Constant::getNullValue(LHSSrc->getType()),
- LHS, *DL).value_or(false))
- return false;
-
// Sign extend the constant and replace the And operand.
C = SignExtend64<32>(C);
BO.setOperand(1, ConstantInt::get(LHS->getType(), C));
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td
new file mode 100644
index 000000000000..3a5afb1b075c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -0,0 +1,27 @@
+//=- RISCVCombine.td - Define RISC-V Combine Rules -----------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/GlobalISel/Combine.td"
+
+def RISCVPreLegalizerCombiner: GICombiner<
+ "RISCVPreLegalizerCombinerImpl", [all_combines]> {
+}
+
+def RISCVO0PreLegalizerCombiner: GICombiner<
+ "RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> {
+}
+
+// Post-legalization combines which are primarily optimizations.
+// TODO: Add more combines.
+def RISCVPostLegalizerCombiner
+ : GICombiner<"RISCVPostLegalizerCombinerImpl",
+ [redundant_and, identity_combines]> {
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
new file mode 100644
index 000000000000..df607236f7d5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
@@ -0,0 +1,103 @@
+//===- RISCVDeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass rewrites Rd to x0 for instrs whose return values are unused.
+//
+//===---------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "riscv-dead-defs"
+#define RISCV_DEAD_REG_DEF_NAME "RISC-V Dead register definitions"
+
+STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
+
+namespace {
+class RISCVDeadRegisterDefinitions : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVDeadRegisterDefinitions() : MachineFunctionPass(ID) {}
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return RISCV_DEAD_REG_DEF_NAME; }
+};
+} // end anonymous namespace
+
+char RISCVDeadRegisterDefinitions::ID = 0;
+INITIALIZE_PASS(RISCVDeadRegisterDefinitions, DEBUG_TYPE,
+ RISCV_DEAD_REG_DEF_NAME, false, false)
+
+FunctionPass *llvm::createRISCVDeadRegisterDefinitionsPass() {
+ return new RISCVDeadRegisterDefinitions();
+}
+
+bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "***** RISCVDeadRegisterDefinitions *****\n");
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // We only handle non-computational instructions since some NOP encodings
+ // are reserved for HINT instructions.
+ const MCInstrDesc &Desc = MI.getDesc();
+ if (!Desc.mayLoad() && !Desc.mayStore() &&
+ !Desc.hasUnmodeledSideEffects())
+ continue;
+ // For PseudoVSETVLIX0, Rd = X0 has special meaning.
+ if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
+ continue;
+ for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || MO.isEarlyClobber())
+ continue;
+ // Be careful not to change the register if it's a tied operand.
+ if (MI.isRegTiedToUseOperand(I)) {
+ LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
+ continue;
+ }
+ // We should not have any relevant physreg defs that are replacable by
+ // zero before register allocation. So we just check for dead vreg defs.
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual() || (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
+ continue;
+ LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
+ MI.print(dbgs()));
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
+ if (!(RC && RC->contains(RISCV::X0))) {
+ LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
+ continue;
+ }
+ MO.setReg(RISCV::X0);
+ MO.setIsDead();
+ LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n ";
+ MI.print(dbgs()));
+ ++NumDeadDefsReplaced;
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index d10bba26023f..bb772fc5da92 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -30,6 +30,7 @@ namespace {
class RISCVExpandAtomicPseudo : public MachineFunctionPass {
public:
+ const RISCVSubtarget *STI;
const RISCVInstrInfo *TII;
static char ID;
@@ -72,7 +73,8 @@ private:
char RISCVExpandAtomicPseudo::ID = 0;
bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
+ STI = &MF.getSubtarget<RISCVSubtarget>();
+ TII = STI->getInstrInfo();
#ifndef NDEBUG
const unsigned OldSize = getInstSizeInBytes(MF);
@@ -105,8 +107,8 @@ bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
- // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
- // expanded instructions for each pseudo is correct in the Size field of the
+ // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
+ // expanded instructions for each pseudo is correct in the Size field of the
// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
case RISCV::PseudoAtomicLoadNand32:
@@ -148,24 +150,30 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
return false;
}
-static unsigned getLRForRMW32(AtomicOrdering Ordering) {
+static unsigned getLRForRMW32(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
case AtomicOrdering::Monotonic:
return RISCV::LR_W;
case AtomicOrdering::Acquire:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::LR_W;
return RISCV::LR_W_AQ;
case AtomicOrdering::Release:
return RISCV::LR_W;
case AtomicOrdering::AcquireRelease:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::LR_W;
return RISCV::LR_W_AQ;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::LR_W_AQ_RL;
}
}
-static unsigned getSCForRMW32(AtomicOrdering Ordering) {
+static unsigned getSCForRMW32(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
@@ -174,32 +182,42 @@ static unsigned getSCForRMW32(AtomicOrdering Ordering) {
case AtomicOrdering::Acquire:
return RISCV::SC_W;
case AtomicOrdering::Release:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::SC_W;
return RISCV::SC_W_RL;
case AtomicOrdering::AcquireRelease:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::SC_W;
return RISCV::SC_W_RL;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::SC_W_RL;
}
}
-static unsigned getLRForRMW64(AtomicOrdering Ordering) {
+static unsigned getLRForRMW64(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
case AtomicOrdering::Monotonic:
return RISCV::LR_D;
case AtomicOrdering::Acquire:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::LR_D;
return RISCV::LR_D_AQ;
case AtomicOrdering::Release:
return RISCV::LR_D;
case AtomicOrdering::AcquireRelease:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::LR_D;
return RISCV::LR_D_AQ;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::LR_D_AQ_RL;
}
}
-static unsigned getSCForRMW64(AtomicOrdering Ordering) {
+static unsigned getSCForRMW64(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
@@ -208,27 +226,33 @@ static unsigned getSCForRMW64(AtomicOrdering Ordering) {
case AtomicOrdering::Acquire:
return RISCV::SC_D;
case AtomicOrdering::Release:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::SC_D;
return RISCV::SC_D_RL;
case AtomicOrdering::AcquireRelease:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::SC_D;
return RISCV::SC_D_RL;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::SC_D_RL;
}
}
-static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
+static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
+ const RISCVSubtarget *Subtarget) {
if (Width == 32)
- return getLRForRMW32(Ordering);
+ return getLRForRMW32(Ordering, Subtarget);
if (Width == 64)
- return getLRForRMW64(Ordering);
+ return getLRForRMW64(Ordering, Subtarget);
llvm_unreachable("Unexpected LR width\n");
}
-static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
+static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
+ const RISCVSubtarget *Subtarget) {
if (Width == 32)
- return getSCForRMW32(Ordering);
+ return getSCForRMW32(Ordering, Subtarget);
if (Width == 64)
- return getSCForRMW64(Ordering);
+ return getSCForRMW64(Ordering, Subtarget);
llvm_unreachable("Unexpected SC width\n");
}
@@ -236,7 +260,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
DebugLoc DL, MachineBasicBlock *ThisMBB,
MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB,
- AtomicRMWInst::BinOp BinOp, int Width) {
+ AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
Register AddrReg = MI.getOperand(2).getReg();
@@ -249,7 +274,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
// binop scratch, dest, val
// sc.[w|d] scratch, scratch, (addr)
// bnez scratch, loop
- BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
.addReg(AddrReg);
switch (BinOp) {
default:
@@ -263,7 +288,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
.addImm(-1);
break;
}
- BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
@@ -294,10 +319,13 @@ static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
.addReg(ScratchReg);
}
-static void doMaskedAtomicBinOpExpansion(
- const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
- MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
- MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
+static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
+ MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB,
+ MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB,
+ AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
@@ -315,7 +343,7 @@ static void doMaskedAtomicBinOpExpansion(
// xor scratch, destreg, scratch
// sc.w scratch, scratch, (alignedaddr)
// bnez scratch, loop
- BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
.addReg(AddrReg);
switch (BinOp) {
default:
@@ -348,7 +376,7 @@ static void doMaskedAtomicBinOpExpansion(
insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
ScratchReg);
- BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
@@ -380,10 +408,11 @@ bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
MBB.addSuccessor(LoopMBB);
if (!IsMasked)
- doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
+ doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width,
+ STI);
else
doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
- Width);
+ Width, STI);
NextMBBI = MBB.end();
MI.eraseFromParent();
@@ -455,7 +484,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
// mv scratch1, destreg
// [sext scratch2 if signed min/max]
// ifnochangeneeded scratch2, incr, .looptail
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
.addReg(DestReg)
@@ -507,7 +536,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
// .looptail:
// sc.w scratch1, scratch1, (addr)
// bnez scratch1, loop
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)
.addReg(AddrReg)
.addReg(Scratch1Reg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -635,7 +664,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
// .loophead:
// lr.[w|d] dest, (addr)
// bne dest, cmpval, done
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
+ DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
.addReg(DestReg)
@@ -644,7 +674,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
// .looptail:
// sc.[w|d] scratch, newval, (addr)
// bnez scratch, loophead
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
+ ScratchReg)
.addReg(AddrReg)
.addReg(NewValReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -657,7 +688,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
// and scratch, dest, mask
// bne scratch, cmpval, done
Register MaskReg = MI.getOperand(5).getReg();
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
+ DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
@@ -675,7 +707,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
// bnez scratch, loophead
insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
MaskReg, ScratchReg);
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
+ ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 58896ee1b388..24a13f93af88 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -34,9 +34,7 @@ public:
const RISCVInstrInfo *TII;
static char ID;
- RISCVExpandPseudo() : MachineFunctionPass(ID) {
- initializeRISCVExpandPseudoPass(*PassRegistry::getPassRegistry());
- }
+ RISCVExpandPseudo() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -119,6 +117,23 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCXOR:
case RISCV::PseudoCCADDW:
case RISCV::PseudoCCSUBW:
+ case RISCV::PseudoCCSLL:
+ case RISCV::PseudoCCSRL:
+ case RISCV::PseudoCCSRA:
+ case RISCV::PseudoCCADDI:
+ case RISCV::PseudoCCSLLI:
+ case RISCV::PseudoCCSRLI:
+ case RISCV::PseudoCCSRAI:
+ case RISCV::PseudoCCANDI:
+ case RISCV::PseudoCCORI:
+ case RISCV::PseudoCCXORI:
+ case RISCV::PseudoCCSLLW:
+ case RISCV::PseudoCCSRLW:
+ case RISCV::PseudoCCSRAW:
+ case RISCV::PseudoCCADDIW:
+ case RISCV::PseudoCCSLLIW:
+ case RISCV::PseudoCCSRLIW:
+ case RISCV::PseudoCCSRAIW:
return expandCCOp(MBB, MBBI, NextMBBI);
case RISCV::PseudoVSETVLI:
case RISCV::PseudoVSETVLIX0:
@@ -188,11 +203,28 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
llvm_unreachable("Unexpected opcode!");
case RISCV::PseudoCCADD: NewOpc = RISCV::ADD; break;
case RISCV::PseudoCCSUB: NewOpc = RISCV::SUB; break;
+ case RISCV::PseudoCCSLL: NewOpc = RISCV::SLL; break;
+ case RISCV::PseudoCCSRL: NewOpc = RISCV::SRL; break;
+ case RISCV::PseudoCCSRA: NewOpc = RISCV::SRA; break;
case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break;
case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break;
case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break;
+ case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
+ case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break;
+ case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break;
+ case RISCV::PseudoCCSRAI: NewOpc = RISCV::SRAI; break;
+ case RISCV::PseudoCCANDI: NewOpc = RISCV::ANDI; break;
+ case RISCV::PseudoCCORI: NewOpc = RISCV::ORI; break;
+ case RISCV::PseudoCCXORI: NewOpc = RISCV::XORI; break;
case RISCV::PseudoCCADDW: NewOpc = RISCV::ADDW; break;
case RISCV::PseudoCCSUBW: NewOpc = RISCV::SUBW; break;
+ case RISCV::PseudoCCSLLW: NewOpc = RISCV::SLLW; break;
+ case RISCV::PseudoCCSRLW: NewOpc = RISCV::SRLW; break;
+ case RISCV::PseudoCCSRAW: NewOpc = RISCV::SRAW; break;
+ case RISCV::PseudoCCADDIW: NewOpc = RISCV::ADDIW; break;
+ case RISCV::PseudoCCSLLIW: NewOpc = RISCV::SLLIW; break;
+ case RISCV::PseudoCCSRLIW: NewOpc = RISCV::SRLIW; break;
+ case RISCV::PseudoCCSRAIW: NewOpc = RISCV::SRAIW; break;
}
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
.add(MI.getOperand(5))
@@ -275,8 +307,8 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
.addReg(MBBI->getOperand(1).getReg())
.add(MBBI->getOperand(2));
if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
- // FIXME: Zdinx RV32 can not work on unaligned scalar memory.
- assert(!STI->enableUnalignedScalarMem());
+ // FIXME: Zdinx RV32 can not work on unaligned memory.
+ assert(!STI->hasFastUnalignedAccess());
assert(MBBI->getOperand(2).getOffset() % 8 == 0);
MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4);
@@ -347,9 +379,7 @@ public:
const RISCVInstrInfo *TII;
static char ID;
- RISCVPreRAExpandPseudo() : MachineFunctionPass(ID) {
- initializeRISCVPreRAExpandPseudoPass(*PassRegistry::getPassRegistry());
- }
+ RISCVPreRAExpandPseudo() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
index 4ce9c41eaf5c..294927aecb94 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -73,7 +73,7 @@ def HasStdExtZihintpause : Predicate<"Subtarget->hasStdExtZihintpause()">,
"'Zihintpause' (Pause Hint)">;
def FeatureStdExtZihintntl
- : SubtargetFeature<"experimental-zihintntl", "HasStdExtZihintntl", "true",
+ : SubtargetFeature<"zihintntl", "HasStdExtZihintntl", "true",
"'Zihintntl' (Non-Temporal Locality Hints)">;
def HasStdExtZihintntl : Predicate<"Subtarget->hasStdExtZihintntl()">,
AssemblerPredicate<(all_of FeatureStdExtZihintntl),
@@ -159,7 +159,7 @@ def HasStdExtZhinxOrZhinxmin
"'Zhinxmin' (Half Float in Integer Minimal)">;
def FeatureStdExtZfa
- : SubtargetFeature<"experimental-zfa", "HasStdExtZfa", "true",
+ : SubtargetFeature<"zfa", "HasStdExtZfa", "true",
"'Zfa' (Additional Floating-Point)",
[FeatureStdExtF]>;
def HasStdExtZfa : Predicate<"Subtarget->hasStdExtZfa()">,
@@ -444,8 +444,8 @@ def FeatureStdExtV
def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">,
AssemblerPredicate<
(any_of FeatureStdExtZve32x),
- "'V' (Vector Extension for Application Processors), 'Zve32x' or "
- "'Zve64x' (Vector Extensions for Embedded Processors)">;
+ "'V' (Vector Extension for Application Processors), 'Zve32x' "
+ "(Vector Extensions for Embedded Processors)">;
def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
AssemblerPredicate<
(any_of FeatureStdExtZve64x),
@@ -454,17 +454,25 @@ def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
AssemblerPredicate<
(any_of FeatureStdExtZve32f),
- "'V' (Vector Extension for Application Processors), 'Zve32f', "
- "'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">;
+ "'V' (Vector Extension for Application Processors), 'Zve32f' "
+ "(Vector Extensions for Embedded Processors)">;
def HasVInstructionsF64 : Predicate<"Subtarget->hasVInstructionsF64()">;
def HasVInstructionsFullMultiply : Predicate<"Subtarget->hasVInstructionsFullMultiply()">;
+def FeatureStdExtZfbfmin
+ : SubtargetFeature<"experimental-zfbfmin", "HasStdExtZfbfmin", "true",
+ "'Zfbfmin' (Scalar BF16 Converts)",
+ [FeatureStdExtF]>;
+def HasStdExtZfbfmin : Predicate<"Subtarget->hasStdExtZfbfmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZfbfmin),
+ "'Zfbfmin' (Scalar BF16 Converts)">;
+
def FeatureStdExtZvfbfmin
: SubtargetFeature<"experimental-zvfbfmin", "HasStdExtZvfbfmin", "true",
"'Zvbfmin' (Vector BF16 Converts)",
- [FeatureStdExtZve32f]>;
+ [FeatureStdExtZve32f, FeatureStdExtZfbfmin]>;
def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
AssemblerPredicate<(all_of FeatureStdExtZvfbfmin),
"'Zvfbfmin' (Vector BF16 Converts)">;
@@ -472,18 +480,30 @@ def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
def FeatureStdExtZvfbfwma
: SubtargetFeature<"experimental-zvfbfwma", "HasStdExtZvfbfwma", "true",
"'Zvfbfwma' (Vector BF16 widening mul-add)",
- [FeatureStdExtZve32f]>;
+ [FeatureStdExtZvfbfmin]>;
def HasStdExtZvfbfwma : Predicate<"Subtarget->hasStdExtZvfbfwma()">,
AssemblerPredicate<(all_of FeatureStdExtZvfbfwma),
"'Zvfbfwma' (Vector BF16 widening mul-add)">;
+def HasVInstructionsBF16 : Predicate<"Subtarget->hasVInstructionsBF16()">;
+
def FeatureStdExtZvfh
: SubtargetFeature<"zvfh", "HasStdExtZvfh", "true",
"'Zvfh' (Vector Half-Precision Floating-Point)",
[FeatureStdExtZve32f, FeatureStdExtZfhmin]>;
+def FeatureStdExtZvfhmin
+ : SubtargetFeature<"zvfhmin", "HasStdExtZvfhmin", "true",
+ "'Zvfhmin' (Vector Half-Precision Floating-Point Minimal)",
+ [FeatureStdExtZve32f]>;
+
def HasVInstructionsF16 : Predicate<"Subtarget->hasVInstructionsF16()">;
+def HasVInstructionsF16Minimal : Predicate<"Subtarget->hasVInstructionsF16Minimal()">,
+ AssemblerPredicate<(any_of FeatureStdExtZvfhmin, FeatureStdExtZvfh),
+ "'Zvfhmin' (Vector Half-Precision Floating-Point Minimal) or "
+ "'Zvfh' (Vector Half-Precision Floating-Point)">;
+
def HasStdExtZfhOrZvfh
: Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZvfh()">,
AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZvfh),
@@ -529,9 +549,10 @@ def HasStdExtSvinval : Predicate<"Subtarget->hasStdExtSvinval()">,
def FeatureStdExtZtso
: SubtargetFeature<"experimental-ztso", "HasStdExtZtso", "true",
"'Ztso' (Memory Model - Total Store Order)">;
-def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZTso()">,
+def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZtso()">,
AssemblerPredicate<(all_of FeatureStdExtZtso),
"'Ztso' (Memory Model - Total Store Order)">;
+def NotHasStdExtZtso : Predicate<"!Subtarget->hasStdExtZtso()">;
def FeatureStdExtZawrs : SubtargetFeature<"zawrs", "HasStdExtZawrs", "true",
"'Zawrs' (Wait on Reservation Set)">;
@@ -539,12 +560,20 @@ def HasStdExtZawrs : Predicate<"Subtarget->hasStdExtZawrs()">,
AssemblerPredicate<(all_of FeatureStdExtZawrs),
"'Zawrs' (Wait on Reservation Set)">;
+def FeatureStdExtZvkb
+ : SubtargetFeature<"experimental-zvkb", "HasStdExtZvkb", "true",
+ "'Zvkb' (Vector Bit-manipulation used in Cryptography)">;
+def HasStdExtZvkb : Predicate<"Subtarget->hasStdExtZvkb()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvkb),
+ "'Zvkb' (Vector Bit-manipulation used in Cryptography)">;
+
def FeatureStdExtZvbb
: SubtargetFeature<"experimental-zvbb", "HasStdExtZvbb", "true",
- "'Zvbb' (Vector Bit-manipulation used in Cryptography)">;
+ "'Zvbb' (Vector basic bit-manipulation instructions.)",
+ [FeatureStdExtZvkb]>;
def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">,
AssemblerPredicate<(all_of FeatureStdExtZvbb),
- "'Zvbb' (Vector Bit-manipulation used in Cryptography)">;
+ "'Zvbb' (Vector basic bit-manipulation instructions.)">;
def FeatureStdExtZvbc
: SubtargetFeature<"experimental-zvbc", "HasStdExtZvbc", "true",
@@ -560,16 +589,6 @@ def HasStdExtZvkg : Predicate<"Subtarget->hasStdExtZvkg()">,
AssemblerPredicate<(all_of FeatureStdExtZvkg),
"'Zvkg' (Vector GCM instructions for Cryptography)">;
-def FeatureStdExtZvkn
- : SubtargetFeature<"experimental-zvkn", "HasStdExtZvkn", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvkned, Zvknhb, Zvbb, Zvbc, and Zvkt.">;
-
-def FeatureStdExtZvknc
- : SubtargetFeature<"experimental-zvknc", "HasStdExtZvknc", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvkn and Zvbc.">;
-
def FeatureStdExtZvkned
: SubtargetFeature<"experimental-zvkned", "HasStdExtZvkned", "true",
"'Zvkned' (Vector AES Encryption & Decryption (Single Round))">;
@@ -577,32 +596,24 @@ def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">,
AssemblerPredicate<(all_of FeatureStdExtZvkned),
"'Zvkned' (Vector AES Encryption & Decryption (Single Round))">;
-def FeatureStdExtZvkng
- : SubtargetFeature<"experimental-zvkng", "HasStdExtZvkng", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvkn and Zvkg.">;
-
def FeatureStdExtZvknha
: SubtargetFeature<"experimental-zvknha", "HasStdExtZvknha", "true",
"'Zvknha' (Vector SHA-2 (SHA-256 only))">;
-
-def FeatureStdExtZvknhb
- : SubtargetFeature<"experimental-zvknhb", "HasStdExtZvknhb", "true",
- "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))",
- [FeatureStdExtZvknha]>;
def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">,
AssemblerPredicate<(all_of FeatureStdExtZvknha),
"'Zvknha' (Vector SHA-2 (SHA-256 only))">;
-def FeatureStdExtZvks
- : SubtargetFeature<"experimental-zvks", "HasStdExtZvks", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvksed, Zvksh, Zvbb, Zvbc, and Zvkt.">;
+def FeatureStdExtZvknhb
+ : SubtargetFeature<"experimental-zvknhb", "HasStdExtZvknhb", "true",
+ "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))",
+ [FeatureStdExtZve64x]>;
+def HasStdExtZvknhb : Predicate<"Subtarget->hasStdExtZvknhb()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvknhb),
+ "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))">;
-def FeatureStdExtZvksc
- : SubtargetFeature<"experimental-zvksc", "HasStdExtZvksc", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvks and Zvbc.">;
+def HasStdExtZvknhaOrZvknhb : Predicate<"Subtarget->hasStdExtZvknha() || Subtarget->hasStdExtZvknhb()">,
+ AssemblerPredicate<(any_of FeatureStdExtZvknha, FeatureStdExtZvknhb),
+ "'Zvknha' or 'Zvknhb' (Vector SHA-2)">;
def FeatureStdExtZvksed
: SubtargetFeature<"experimental-zvksed", "HasStdExtZvksed", "true",
@@ -611,11 +622,6 @@ def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">,
AssemblerPredicate<(all_of FeatureStdExtZvksed),
"'Zvksed' (SM4 Block Cipher Instructions)">;
-def FeatureStdExtZvksg
- : SubtargetFeature<"experimental-zvksg", "HasStdExtZvksg", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvks and Zvkg.">;
-
def FeatureStdExtZvksh
: SubtargetFeature<"experimental-zvksh", "HasStdExtZvksh", "true",
"'Zvksh' (SM3 Hash Function Instructions)">;
@@ -627,6 +633,53 @@ def FeatureStdExtZvkt
: SubtargetFeature<"experimental-zvkt", "HasStdExtZvkt", "true",
"'Zvkt' (Vector Data-Independent Execution Latency)">;
+// Zvk short-hand extensions
+
+def FeatureStdExtZvkn
+ : SubtargetFeature<"experimental-zvkn", "HasStdExtZvkn", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvkned, Zvknhb, Zvkb and Zvkt.",
+ [FeatureStdExtZvkned, FeatureStdExtZvknhb,
+ FeatureStdExtZvkb, FeatureStdExtZvkt]>;
+
+def FeatureStdExtZvknc
+ : SubtargetFeature<"experimental-zvknc", "HasStdExtZvknc", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvkn and Zvbc.",
+ [FeatureStdExtZvkn, FeatureStdExtZvbc]>;
+
+def FeatureStdExtZvkng
+ : SubtargetFeature<"experimental-zvkng", "HasStdExtZvkng", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvkn and Zvkg.",
+ [FeatureStdExtZvkn, FeatureStdExtZvkg]>;
+
+def FeatureStdExtZvks
+ : SubtargetFeature<"experimental-zvks", "HasStdExtZvks", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvksed, Zvksh, Zvkb and Zvkt.",
+ [FeatureStdExtZvksed, FeatureStdExtZvksh,
+ FeatureStdExtZvkb, FeatureStdExtZvkt]>;
+
+def FeatureStdExtZvksc
+ : SubtargetFeature<"experimental-zvksc", "HasStdExtZvksc", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvks and Zvbc.",
+ [FeatureStdExtZvks, FeatureStdExtZvbc]>;
+
+def FeatureStdExtZvksg
+ : SubtargetFeature<"experimental-zvksg", "HasStdExtZvksg", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvks and Zvkg.",
+ [FeatureStdExtZvks, FeatureStdExtZvkg]>;
+
+def FeatureStdExtZicfilp
+ : SubtargetFeature<"experimental-zicfilp", "HasStdExtZicfilp", "true",
+ "'Zicfilp' (Landing pad)">;
+def HasStdExtZicfilp : Predicate<"Subtarget->hasStdExtZicfilp()">,
+ AssemblerPredicate<(all_of FeatureStdExtZicfilp),
+ "'Zicfilp' (Landing pad)">;
+
def FeatureStdExtZicond
: SubtargetFeature<"experimental-zicond", "HasStdExtZicond", "true",
"'Zicond' (Integer Conditional Operations)">;
@@ -635,34 +688,25 @@ def HasStdExtZicond : Predicate<"Subtarget->hasStdExtZicond()">,
"'Zicond' (Integer Conditional Operations)">;
def FeatureStdExtSmaia
- : SubtargetFeature<"experimental-smaia", "HasStdExtSmaia", "true",
+ : SubtargetFeature<"smaia", "HasStdExtSmaia", "true",
"'Smaia' (Smaia encompasses all added CSRs and all "
"modifications to interrupt response behavior that the "
"AIA specifies for a hart, over all privilege levels.)",
[]>;
def FeatureStdExtSsaia
- : SubtargetFeature<"experimental-ssaia", "HasStdExtSsaia", "true",
+ : SubtargetFeature<"ssaia", "HasStdExtSsaia", "true",
"'Ssaia' (Ssaia is essentially the same as Smaia except "
"excluding the machine-level CSRs and behavior not "
"directly visible to supervisor level.)", []>;
-def FeatureStdExtZfbfmin
- : SubtargetFeature<"experimental-zfbfmin", "HasStdExtZfbfmin", "true",
- "'Zfbfmin' (Scalar BF16 Converts)",
- [FeatureStdExtF]>;
-def HasStdExtZfbfmin : Predicate<"Subtarget->hasStdExtZfbfmin()">,
- AssemblerPredicate<(all_of FeatureStdExtZfbfmin),
- "'Zfbfmin' (Scalar BF16 Converts)">;
-
def HasHalfFPLoadStoreMove
: Predicate<"Subtarget->hasHalfFPLoadStoreMove()">,
AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin,
- FeatureStdExtZfbfmin, FeatureStdExtZvfbfwma),
+ FeatureStdExtZfbfmin),
"'Zfh' (Half-Precision Floating-Point) or "
"'Zfhmin' (Half-Precision Floating-Point Minimal) or "
- "'Zfbfmin' (Scalar BF16 Converts) or "
- "'Zvfbfwma' (Vector BF16 widening mul-add)">;
+ "'Zfbfmin' (Scalar BF16 Converts)">;
def FeatureStdExtZacas
: SubtargetFeature<"experimental-zacas", "HasStdExtZacas", "true",
@@ -776,6 +820,45 @@ def HasVendorXSfcie : Predicate<"Subtarget->hasVendorXSfcie()">,
AssemblerPredicate<(all_of FeatureVendorXSfcie),
"'XSfcie' (SiFive Custom Instruction Extension SCIE.)">;
+def FeatureVendorXSfvqmaccdod
+ : SubtargetFeature<"xsfvqmaccdod", "HasVendorXSfvqmaccdod", "true",
+ "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))",
+ [FeatureStdExtZve32x]>;
+def HasVendorXSfvqmaccdod : Predicate<"Subtarget->hasVendorXSfvqmaccdod()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvqmaccdod),
+ "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))">;
+
+def FeatureVendorXSfvqmaccqoq
+ : SubtargetFeature<"xsfvqmaccqoq", "HasVendorXSfvqmaccqoq", "true",
+ "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))",
+ [FeatureStdExtZve32x]>;
+def HasVendorXSfvqmaccqoq : Predicate<"Subtarget->hasVendorXSfvqmaccqoq()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvqmaccqoq),
+ "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))">;
+
+def FeatureVendorXSfvfwmaccqqq
+ : SubtargetFeature<"xsfvfwmaccqqq", "HasVendorXSfvfwmaccqqq", "true",
+ "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))",
+ [FeatureStdExtZve32f, FeatureStdExtZvfbfmin]>;
+def HasVendorXSfvfwmaccqqq : Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq),
+ "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))">;
+
+def FeatureVendorXSfvfnrclipxfqf
+ : SubtargetFeature<"xsfvfnrclipxfqf", "HasVendorXSfvfnrclipxfqf", "true",
+ "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)",
+ [FeatureStdExtZve32f]>;
+def HasVendorXSfvfnrclipxfqf : Predicate<"Subtarget->hasVendorXSfvfnrclipxfqf()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvfnrclipxfqf),
+ "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)">;
+def FeatureVendorXCVelw
+ : SubtargetFeature<"xcvelw", "HasVendorXCVelw", "true",
+ "'XCVelw' (CORE-V Event Load Word)">;
+def HasVendorXCVelw
+ : Predicate<"Subtarget->hasVendorXCVelw()">,
+ AssemblerPredicate<(any_of FeatureVendorXCVelw),
+ "'XCVelw' (CORE-V Event Load Word)">;
+
def FeatureVendorXCVbitmanip
: SubtargetFeature<"xcvbitmanip", "HasVendorXCVbitmanip", "true",
"'XCVbitmanip' (CORE-V Bit Manipulation)">;
@@ -790,6 +873,36 @@ def HasVendorXCVmac : Predicate<"Subtarget->hasVendorXCVmac()">,
AssemblerPredicate<(all_of FeatureVendorXCVmac),
"'XCVmac' (CORE-V Multiply-Accumulate)">;
+def FeatureVendorXCVmem
+ : SubtargetFeature<"xcvmem", "HasVendorXCVmem", "true",
+ "'XCVmem' (CORE-V Post-incrementing Load & Store)">;
+def HasVendorXCVmem
+ : Predicate<"Subtarget->hasVendorXCVmem()">,
+ AssemblerPredicate<(any_of FeatureVendorXCVmem),
+ "'XCVmem' (CORE-V Post-incrementing Load & Store)">;
+
+def FeatureVendorXCValu
+ : SubtargetFeature<"xcvalu", "HasVendorXCValu", "true",
+ "'XCValu' (CORE-V ALU Operations)">;
+def HasVendorXCValu : Predicate<"Subtarget->hasVendorXCValu()">,
+ AssemblerPredicate<(all_of FeatureVendorXCValu),
+ "'XCValu' (CORE-V ALU Operations)">;
+
+def FeatureVendorXCVsimd
+ : SubtargetFeature<"xcvsimd", "HasVendorXCvsimd", "true",
+ "'XCVsimd' (CORE-V SIMD ALU)">;
+def HasVendorXCVsimd
+ : Predicate<"Subtarget->hasVendorXCVsimd()">,
+ AssemblerPredicate<(any_of FeatureVendorXCVsimd),
+ "'XCVsimd' (CORE-V SIMD ALU)">;
+
+def FeatureVendorXCVbi
+ : SubtargetFeature<"xcvbi", "HasVendorXCVbi", "true",
+ "'XCVbi' (CORE-V Immediate Branching)">;
+def HasVendorXCVbi : Predicate<"Subtarget->hasVendorXCVbi()">,
+ AssemblerPredicate<(all_of FeatureVendorXCVbi),
+ "'XCVbi' (CORE-V Immediate Branching)">;
+
//===----------------------------------------------------------------------===//
// LLVM specific features and extensions
//===----------------------------------------------------------------------===//
@@ -833,15 +946,13 @@ def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence",
"true",
"Enable trailing fence for seq-cst store.">;
-def FeatureUnalignedScalarMem
- : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
- "true", "Has reasonably performant unaligned scalar "
- "loads and stores">;
+def FeatureFastUnalignedAccess
+ : SubtargetFeature<"fast-unaligned-access", "HasFastUnalignedAccess",
+ "true", "Has reasonably performant unaligned "
+ "loads and stores (both scalar and vector)">;
-def FeatureUnalignedVectorMem
- : SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem",
- "true", "Has reasonably performant unaligned vector "
- "loads and stores">;
+def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
+ "UsePostRAScheduler", "true", "Schedule again after register allocation">;
def TuneNoOptimizedZeroStrideLoad
: SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
@@ -859,6 +970,16 @@ def TuneLUIADDIFusion
: SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
"true", "Enable LUI+ADDI macrofusion">;
+def TuneAUIPCADDIFusion
+ : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
+ "true", "Enable AUIPC+ADDI macrofusion">;
+def TuneShiftedZExtFusion
+ : SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion",
+ "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">;
+def TuneLDADDFusion
+ : SubtargetFeature<"ld-add-fusion", "HasLDADDFusion",
+ "true", "Enable LD+ADD macrofusion.">;
+
def TuneNoDefaultUnroll
: SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
"Disable default unroll preference.">;
@@ -876,6 +997,13 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
[TuneNoDefaultUnroll,
TuneShortForwardBranchOpt]>;
+def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
+ "Ventana Veyron-Series processors",
+ [TuneLUIADDIFusion,
+ TuneAUIPCADDIFusion,
+ TuneShiftedZExtFusion,
+ TuneLDADDFusion]>;
+
// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
// is responsible for providing any necessary __sync implementations. Code
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
new file mode 100644
index 000000000000..6ee006525df5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
@@ -0,0 +1,216 @@
+//===- RISCVFoldMasks.cpp - MI Vector Pseudo Mask Peepholes ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass performs various peephole optimisations that fold masks into vector
+// pseudo instructions after instruction selection.
+//
+// Currently it converts
+// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
+// ->
+// PseudoVMV_V_V %false, %true, %vl, %sew
+//
+//===---------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVISelDAGToDAG.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-fold-masks"
+
+namespace {
+
+class RISCVFoldMasks : public MachineFunctionPass {
+public:
+ static char ID;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+ RISCVFoldMasks() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+ StringRef getPassName() const override { return "RISC-V Fold Masks"; }
+
+private:
+ bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef);
+ bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef);
+
+ bool isAllOnesMask(MachineInstr *MaskDef);
+};
+
+} // namespace
+
+char RISCVFoldMasks::ID = 0;
+
+INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false)
+
+bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) {
+ if (!MaskDef)
+ return false;
+ assert(MaskDef->isCopy() && MaskDef->getOperand(0).getReg() == RISCV::V0);
+ Register SrcReg = TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI);
+ if (!SrcReg.isVirtual())
+ return false;
+ MaskDef = MRI->getVRegDef(SrcReg);
+ if (!MaskDef)
+ return false;
+
+ // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
+ // undefined behaviour if it's the wrong bitwidth, so we could choose to
+ // assume that it's all-ones? Same applies to its VL.
+ switch (MaskDef->getOpcode()) {
+ case RISCV::PseudoVMSET_M_B1:
+ case RISCV::PseudoVMSET_M_B2:
+ case RISCV::PseudoVMSET_M_B4:
+ case RISCV::PseudoVMSET_M_B8:
+ case RISCV::PseudoVMSET_M_B16:
+ case RISCV::PseudoVMSET_M_B32:
+ case RISCV::PseudoVMSET_M_B64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
+// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
+bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
+#define CASE_VMERGE_TO_VMV(lmul) \
+ case RISCV::PseudoVMERGE_VVM_##lmul: \
+ NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
+ break;
+ unsigned NewOpc;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ CASE_VMERGE_TO_VMV(MF8)
+ CASE_VMERGE_TO_VMV(MF4)
+ CASE_VMERGE_TO_VMV(MF2)
+ CASE_VMERGE_TO_VMV(M1)
+ CASE_VMERGE_TO_VMV(M2)
+ CASE_VMERGE_TO_VMV(M4)
+ CASE_VMERGE_TO_VMV(M8)
+ }
+
+ Register MergeReg = MI.getOperand(1).getReg();
+ Register FalseReg = MI.getOperand(2).getReg();
+ // Check merge == false (or merge == undef)
+ if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) !=
+ TRI->lookThruCopyLike(FalseReg, MRI))
+ return false;
+
+ assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
+ if (!isAllOnesMask(V0Def))
+ return false;
+
+ MI.setDesc(TII->get(NewOpc));
+ MI.removeOperand(1); // Merge operand
+ MI.tieOperands(0, 1); // Tie false to dest
+ MI.removeOperand(3); // Mask operand
+ MI.addOperand(
+ MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));
+
+ // vmv.v.v doesn't have a mask operand, so we may be able to inflate the
+ // register class for the destination and merge operands e.g. VRNoV0 -> VR
+ MRI->recomputeRegClass(MI.getOperand(0).getReg());
+ MRI->recomputeRegClass(MI.getOperand(1).getReg());
+ return true;
+}
+
+bool RISCVFoldMasks::convertToUnmasked(MachineInstr &MI,
+ MachineInstr *MaskDef) {
+ const RISCV::RISCVMaskedPseudoInfo *I =
+ RISCV::getMaskedPseudoInfo(MI.getOpcode());
+ if (!I)
+ return false;
+
+ if (!isAllOnesMask(MaskDef))
+ return false;
+
+ // There are two classes of pseudos in the table - compares and
+ // everything else. See the comment on RISCVMaskedPseudo for details.
+ const unsigned Opc = I->UnmaskedPseudo;
+ const MCInstrDesc &MCID = TII->get(Opc);
+ const bool HasPolicyOp = RISCVII::hasVecPolicyOp(MCID.TSFlags);
+ const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
+#ifndef NDEBUG
+ const MCInstrDesc &MaskedMCID = TII->get(MI.getOpcode());
+ assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
+ RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
+ "Masked and unmasked pseudos are inconsistent");
+ assert(HasPolicyOp == HasPassthru && "Unexpected pseudo structure");
+#endif
+ (void)HasPolicyOp;
+
+ MI.setDesc(MCID);
+
+ // TODO: Increment all MaskOpIdxs in tablegen by num of explicit defs?
+ unsigned MaskOpIdx = I->MaskOpIdx + MI.getNumExplicitDefs();
+ MI.removeOperand(MaskOpIdx);
+
+ // The unmasked pseudo will no longer be constrained to the vrnov0 reg class,
+ // so try and relax it to vr.
+ MRI->recomputeRegClass(MI.getOperand(0).getReg());
+ unsigned PassthruOpIdx = MI.getNumExplicitDefs();
+ if (HasPassthru) {
+ if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister)
+ MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
+ } else
+ MI.removeOperand(PassthruOpIdx);
+
+ return true;
+}
+
+bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ // Skip if the vector extension is not enabled.
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ if (!ST.hasVInstructions())
+ return false;
+
+ TII = ST.getInstrInfo();
+ MRI = &MF.getRegInfo();
+ TRI = MRI->getTargetRegisterInfo();
+
+ bool Changed = false;
+
+ // Masked pseudos coming out of isel will have their mask operand in the form:
+ //
+ // $v0:vr = COPY %mask:vr
+ // %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr
+ //
+ // Because $v0 isn't in SSA, keep track of it so we can check the mask operand
+ // on each pseudo.
+ MachineInstr *CurrentV0Def;
+ for (MachineBasicBlock &MBB : MF) {
+ CurrentV0Def = nullptr;
+ for (MachineInstr &MI : MBB) {
+ Changed |= convertToUnmasked(MI, CurrentV0Def);
+ Changed |= convertVMergeToVMv(MI, CurrentV0Def);
+
+ if (MI.definesRegister(RISCV::V0, TRI))
+ CurrentV0Def = &MI;
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createRISCVFoldMasksPass() { return new RISCVFoldMasks(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index f312cc8129dd..8dfea6d38620 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -226,37 +226,38 @@ getRestoreLibCallName(const MachineFunction &MF,
return RestoreLibCalls[LibCallID];
}
-// Return encoded value for PUSH/POP instruction, representing
-// registers to store/load.
-static unsigned getPushPopEncoding(const Register MaxReg) {
+// Return encoded value and register count for PUSH/POP instruction,
+// representing registers to store/load.
+static std::pair<unsigned, unsigned>
+getPushPopEncodingAndNum(const Register MaxReg) {
switch (MaxReg) {
default:
llvm_unreachable("Unexpected Reg for Push/Pop Inst");
case RISCV::X27: /*s11*/
case RISCV::X26: /*s10*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S11;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S11, 13);
case RISCV::X25: /*s9*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S9;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S9, 11);
case RISCV::X24: /*s8*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S8;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S8, 10);
case RISCV::X23: /*s7*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S7;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S7, 9);
case RISCV::X22: /*s6*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S6;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S6, 8);
case RISCV::X21: /*s5*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S5;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S5, 7);
case RISCV::X20: /*s4*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S4;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S4, 6);
case RISCV::X19: /*s3*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S3;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S3, 5);
case RISCV::X18: /*s2*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S2;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S2, 4);
case RISCV::X9: /*s1*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S1;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S1, 3);
case RISCV::X8: /*s0*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0, 2);
case RISCV::X1: /*ra*/
- return llvm::RISCVZC::RLISTENCODE::RA;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA, 1);
}
}
@@ -265,9 +266,10 @@ static Register getMaxPushPopReg(const MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI) {
Register MaxPushPopReg = RISCV::NoRegister;
for (auto &CS : CSI) {
- Register Reg = CS.getReg();
- if (RISCV::PGPRRegClass.contains(Reg))
- MaxPushPopReg = std::max(MaxPushPopReg.id(), Reg.id());
+ // RISCVRegisterInfo::hasReservedSpillSlot assigns negative frame indices to
+ // registers which can be saved by Zcmp Push.
+ if (CS.getFrameIdx() < 0)
+ MaxPushPopReg = std::max(MaxPushPopReg.id(), CS.getReg().id());
}
// if rlist is {rs, s0-s10}, then s11 will also be included
if (MaxPushPopReg == RISCV::X26)
@@ -275,16 +277,6 @@ static Register getMaxPushPopReg(const MachineFunction &MF,
return MaxPushPopReg;
}
-static uint64_t adjSPInPushPop(MachineBasicBlock::iterator MBBI,
- unsigned RequiredStack, unsigned FreePushStack,
- bool IsPop) {
- if (FreePushStack > RequiredStack)
- RequiredStack = 0;
- unsigned Spimm = std::min(RequiredStack, 48u);
- MBBI->getOperand(1).setImm(Spimm);
- return alignTo(RequiredStack - Spimm, 16);
-}
-
// Return true if the specified function should have a dedicated frame
// pointer register. This is true if frame pointer elimination is
// disabled, if it needs dynamic stack realignment, if the function has
@@ -514,8 +506,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// FIXME (note copied from Lanai): This appears to be overallocating. Needs
// investigation. Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = getStackSizeWithRVVPadding(MF);
- uint64_t RealStackSize =
- StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize();
+ uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
// Early exit if there is no need to allocate on the stack
@@ -535,13 +526,13 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
RealStackSize = FirstSPAdjustAmount;
}
- if (RVFI->isPushable(MF) && FirstFrameSetup->getOpcode() == RISCV::CM_PUSH) {
+ if (RVFI->isPushable(MF) && FirstFrameSetup != MBB.end() &&
+ FirstFrameSetup->getOpcode() == RISCV::CM_PUSH) {
// Use available stack adjustment in push instruction to allocate additional
// stack space.
- unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8);
- unsigned SpImmBase = RVFI->getRVPushStackSize();
- StackSize = adjSPInPushPop(FirstFrameSetup, StackSize,
- (SpImmBase - PushStack), true);
+ uint64_t Spimm = std::min(StackSize, (uint64_t)48);
+ FirstFrameSetup->getOperand(1).setImm(Spimm);
+ StackSize -= Spimm;
}
if (StackSize != 0) {
@@ -584,8 +575,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
Offset = FrameIdx * (int64_t)STI.getXLen() / 8;
}
} else {
- Offset = MFI.getObjectOffset(Entry.getFrameIdx()) -
- RVFI->getLibCallStackSize();
+ Offset = MFI.getObjectOffset(FrameIdx) - RVFI->getReservedSpillsSize();
}
Register Reg = Entry.getReg();
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
@@ -730,8 +720,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
LastFrameDestroy = std::prev(MBBI, CSI.size());
uint64_t StackSize = getStackSizeWithRVVPadding(MF);
- uint64_t RealStackSize =
- StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize();
+ uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize();
uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
@@ -776,9 +765,9 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
MBBI->getOpcode() == RISCV::CM_POP) {
// Use available stack adjustment in pop instruction to deallocate stack
// space.
- unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8);
- unsigned SpImmBase = RVFI->getRVPushStackSize();
- StackSize = adjSPInPushPop(MBBI, StackSize, (SpImmBase - PushStack), true);
+ uint64_t Spimm = std::min(StackSize, (uint64_t)48);
+ MBBI->getOperand(1).setImm(Spimm);
+ StackSize -= Spimm;
}
// Deallocate stack
@@ -882,7 +871,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
if (FrameReg == getFPReg(STI)) {
Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize());
if (FI >= 0)
- Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize());
+ Offset -= StackOffset::getFixed(RVFI->getReservedSpillsSize());
// When using FP to access scalable vector objects, we need to minus
// the frame size.
//
@@ -950,8 +939,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
assert(!RI->hasStackRealignment(MF) &&
"Can't index across variable sized realign");
Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) +
- RVFI->getLibCallStackSize() +
- RVFI->getRVPushStackSize(),
+ RVFI->getReservedSpillsSize(),
RVFI->getRVVStackSize());
} else {
Offset += StackOffset::getFixed(MFI.getStackSize());
@@ -993,11 +981,11 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
RISCV::X5, RISCV::X6, RISCV::X7, /* t0-t2 */
RISCV::X10, RISCV::X11, /* a0-a1, a2-a7 */
RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17,
- RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31, 0 /* t3-t6 */
+ RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31 /* t3-t6 */
};
- for (unsigned i = 0; CSRegs[i]; ++i)
- SavedRegs.set(CSRegs[i]);
+ for (auto Reg : CSRegs)
+ SavedRegs.set(Reg);
if (MF.getSubtarget<RISCVSubtarget>().hasStdExtF()) {
@@ -1277,7 +1265,8 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
// We would like to split the SP adjustment to reduce prologue/epilogue
// as following instructions. In this way, the offset of the callee saved
-// register could fit in a single store.
+// register could fit in a single store. Supposed that the first sp adjust
+// amount is 2032.
// add sp,sp,-2032
// sw ra,2028(sp)
// sw s0,2024(sp)
@@ -1295,19 +1284,60 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
// Disable SplitSPAdjust if save-restore libcall is used. The callee-saved
// registers will be pushed by the save-restore libcalls, so we don't have to
// split the SP adjustment in this case.
- if (RVFI->getLibCallStackSize() || RVFI->getRVPushStackSize())
+ if (RVFI->getReservedSpillsSize())
return 0;
// Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
// 12-bit and there exists a callee-saved register needing to be pushed.
if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
- // FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will
- // cause sp = sp + 2048 in the epilogue to be split into multiple
+ // FirstSPAdjustAmount is chosen at most as (2048 - StackAlign) because
+ // 2048 will cause sp = sp + 2048 in the epilogue to be split into multiple
// instructions. Offsets smaller than 2048 can fit in a single load/store
// instruction, and we have to stick with the stack alignment. 2048 has
// 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for
// RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment.
- return 2048 - getStackAlign().value();
+ const uint64_t StackAlign = getStackAlign().value();
+
+ // Amount of (2048 - StackAlign) will prevent callee saved and restored
+ // instructions be compressed, so try to adjust the amount to the largest
+ // offset that stack compression instructions accept when target supports
+ // compression instructions.
+ if (STI.hasStdExtCOrZca()) {
+ // The compression extensions may support the following instructions:
+ // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2)
+ // c.swsp rs2, offset[7:2] => 2^(6 + 2)
+ // c.flwsp rd, offset[7:2] => 2^(6 + 2)
+ // c.fswsp rs2, offset[7:2] => 2^(6 + 2)
+ // riscv64: c.ldsp rd, offset[8:3] => 2^(6 + 3)
+ // c.sdsp rs2, offset[8:3] => 2^(6 + 3)
+ // c.fldsp rd, offset[8:3] => 2^(6 + 3)
+ // c.fsdsp rs2, offset[8:3] => 2^(6 + 3)
+ const uint64_t RVCompressLen = STI.getXLen() * 8;
+ // Compared with amount (2048 - StackAlign), StackSize needs to
+ // satisfy the following conditions to avoid using more instructions
+ // to adjust the sp after adjusting the amount, such as
+ // StackSize meets the condition (StackSize <= 2048 + RVCompressLen),
+ // case1: Amount is 2048 - StackAlign: use addi + addi to adjust sp.
+ // case2: Amount is RVCompressLen: use addi + addi to adjust sp.
+ auto CanCompress = [&](uint64_t CompressLen) -> bool {
+ if (StackSize <= 2047 + CompressLen ||
+ (StackSize > 2048 * 2 - StackAlign &&
+ StackSize <= 2047 * 2 + CompressLen) ||
+ StackSize > 2048 * 3 - StackAlign)
+ return true;
+
+ return false;
+ };
+ // In the epilogue, addi sp, sp, 496 is used to recover the sp and it
+ // can be compressed(C.ADDI16SP, offset can be [-512, 496]), but
+ // addi sp, sp, 512 can not be compressed. So try to use 496 first.
+ const uint64_t ADDI16SPCompressLen = 496;
+ if (STI.is64Bit() && CanCompress(ADDI16SPCompressLen))
+ return ADDI16SPCompressLen;
+ if (CanCompress(RVCompressLen))
+ return RVCompressLen;
+ }
+ return 2048 - StackAlign;
}
return 0;
}
@@ -1328,14 +1358,12 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
if (RVFI->isPushable(*MF)) {
Register MaxReg = getMaxPushPopReg(*MF, CSI);
- unsigned PushedRegNum =
- getPushPopEncoding(MaxReg) - llvm::RISCVZC::RLISTENCODE::RA + 1;
- RVFI->setRVPushRegs(PushedRegNum);
- RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16));
-
if (MaxReg != RISCV::NoRegister) {
+ auto [RegEnc, PushedRegNum] = getPushPopEncodingAndNum(MaxReg);
+ RVFI->setRVPushRegs(PushedRegNum);
+ RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16));
+
// Use encoded number to represent registers to spill.
- unsigned RegEnc = getPushPopEncoding(MaxReg);
RVFI->setRVPushRlist(RegEnc);
MachineInstrBuilder PushBuilder =
BuildMI(MBB, MI, DL, TII.get(RISCV::CM_PUSH))
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 79adc83e8d65..9bc100981f2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -61,7 +61,7 @@ public:
const TargetRegisterInfo *TRI) const override;
// Get the first stack adjustment amount for SplitSPAdjust.
- // Return 0 if we don't want to to split the SP adjustment in prologue and
+ // Return 0 if we don't want to split the SP adjustment in prologue and
// epilogue.
uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td
new file mode 100644
index 000000000000..5f16ffb0a024
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -0,0 +1,159 @@
+//===-- RISCVGIsel.td - RISC-V GlobalISel Patterns ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains patterns that are relevant to GlobalISel, including
+/// GIComplexOperandMatcher definitions for equivalent SelectionDAG
+/// ComplexPatterns.
+//
+//===----------------------------------------------------------------------===//
+
+include "RISCV.td"
+include "RISCVCombine.td"
+
+def simm12Plus1 : ImmLeaf<XLenVT, [{
+ return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+def simm12Plus1i32 : ImmLeaf<i32, [{
+ return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+
+// FIXME: This doesn't check that the G_CONSTANT we're deriving the immediate
+// from is only used once
+def simm12Minus1Nonzero : ImmLeaf<XLenVT, [{
+ return (Imm >= -2049 && Imm < 0) || (Imm > 0 && Imm <= 2046);}]>;
+
+def simm12Minus1NonzeroNonNeg1 : ImmLeaf<XLenVT, [{
+ return (Imm >= -2049 && Imm < -1) || (Imm > 0 && Imm <= 2046);}]>;
+
+// Return an immediate value plus 1.
+def ImmPlus1 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() + 1, SDLoc(N),
+ N->getValuePtrVTpe(0));}]>;
+
+def GINegImm : GICustomOperandRenderer<"renderNegImm">,
+ GISDNodeXFormEquiv<NegImm>;
+
+def GIImmSubFromXLen : GICustomOperandRenderer<"renderImmSubFromXLen">,
+ GISDNodeXFormEquiv<ImmSubFromXLen>;
+def GIImmSubFrom32 : GICustomOperandRenderer<"renderImmSubFrom32">,
+ GISDNodeXFormEquiv<ImmSubFrom32>;
+
+def GIImmPlus1 :
+ GICustomOperandRenderer<"renderImmPlus1">,
+ GISDNodeXFormEquiv<ImmPlus1>;
+
+def GIAddrRegImm :
+ GIComplexOperandMatcher<s32, "selectAddrRegImm">,
+ GIComplexPatternEquiv<AddrRegImm>;
+
+def gi_as_i64imm : GICustomOperandRenderer<"renderImm">,
+ GISDNodeXFormEquiv<as_i64imm>;
+
+def gi_trailing_zero : GICustomOperandRenderer<"renderTrailingZeros">,
+ GISDNodeXFormEquiv<TrailingZeros>;
+
+// FIXME: This is labelled as handling 's32', however the ComplexPattern it
+// refers to handles both i32 and i64 based on the HwMode. Currently this LLT
+// parameter appears to be ignored so this pattern works for both, however we
+// should add a LowLevelTypeByHwMode, and use that to define our XLenLLT instead
+// here.
+def GIShiftMaskXLen :
+ GIComplexOperandMatcher<s32, "selectShiftMask">,
+ GIComplexPatternEquiv<shiftMaskXLen>;
+def GIShiftMask32 :
+ GIComplexOperandMatcher<s32, "selectShiftMask">,
+ GIComplexPatternEquiv<shiftMask32>;
+
+def gi_sh1add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<1>">,
+ GIComplexPatternEquiv<sh1add_op>;
+def gi_sh2add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<2>">,
+ GIComplexPatternEquiv<sh2add_op>;
+def gi_sh3add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<3>">,
+ GIComplexPatternEquiv<sh3add_op>;
+
+def gi_sh1add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<1>">,
+ GIComplexPatternEquiv<sh1add_uw_op>;
+def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">,
+ GIComplexPatternEquiv<sh2add_uw_op>;
+def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">,
+ GIComplexPatternEquiv<sh3add_uw_op>;
+
+// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
+def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
+ (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
+
+let Predicates = [IsRV64] in {
+def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
+ (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
+
+def : Pat<(i32 (shl GPR:$rs1, (i32 GPR:$rs2))), (SLLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (sra GPR:$rs1, (i32 GPR:$rs2))), (SRAW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (srl GPR:$rs1, (i32 GPR:$rs2))), (SRLW GPR:$rs1, GPR:$rs2)>;
+}
+
+// Ptr type used in patterns with GlobalISelEmitter
+def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
+
+// Define pattern expansions for pointer ult/slt conditional codes
+def : Pat<(XLenVT (setult (PtrVT GPR:$rs1), simm12:$imm12)),
+ (SLTIU GPR:$rs1, simm12:$imm12)>;
+def : Pat<(XLenVT (setult (PtrVT GPR:$rs1), (PtrVT GPR:$rs2))),
+ (SLTU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(XLenVT (setlt (PtrVT GPR:$rs1), simm12:$imm12)),
+ (SLTI GPR:$rs1, simm12:$imm12)>;
+def : Pat<(XLenVT (setlt (PtrVT GPR:$rs1), (PtrVT GPR:$rs2))),
+ (SLT GPR:$rs1, GPR:$rs2)>;
+
+// Define pattern expansions for setcc operations that aren't directly
+// handled by a RISC-V instruction.
+foreach Ty = [PtrVT, XLenVT] in {
+def : Pat<(XLenVT (seteq (Ty GPR:$rs1), (Ty 0))), (SLTIU GPR:$rs1, 1)>;
+def : Pat<(XLenVT (seteq (Ty GPR:$rs1), (Ty simm12Plus1:$imm12))),
+ (SLTIU (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm12)), 1)>;
+def : Pat<(XLenVT (seteq (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(XLenVT (setne (Ty GPR:$rs1), (Ty 0))), (SLTU (XLenVT X0), GPR:$rs1)>;
+def : Pat<(XLenVT (setne (Ty GPR:$rs1), (Ty simm12Plus1:$imm12))),
+ (SLTU (XLenVT X0), (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm12)))>;
+def : Pat<(XLenVT (setne (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (SLTU (XLenVT X0), (XOR GPR:$rs1, GPR:$rs2))>;
+def : Pat<(XLenVT (setugt (Ty GPR:$rs1), (Ty simm12Minus1NonzeroNonNeg1:$imm))),
+ (XORI (SLTIU GPR:$rs1,
+ (ImmPlus1 simm12Minus1NonzeroNonNeg1:$imm)), 1)>;
+def : Pat<(XLenVT (setugt (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (SLTU GPR:$rs2, GPR:$rs1)>;
+def : Pat<(XLenVT (setgt (Ty GPR:$rs1), (Ty simm12Minus1Nonzero:$imm))),
+ (XORI (SLTI GPR:$rs1, (ImmPlus1 simm12Minus1Nonzero:$imm)), 1)>;
+def : Pat<(XLenVT (setgt (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (SLT GPR:$rs2, GPR:$rs1)>;
+def : Pat<(XLenVT (setuge (XLenVT GPR:$rs1), (Ty simm12:$imm))),
+ (XORI (SLTIU GPR:$rs1, simm12:$imm), 1)>;
+def : Pat<(XLenVT (setuge (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(XLenVT (setge (Ty GPR:$rs1), (Ty simm12:$imm))),
+ (XORI (SLTI GPR:$rs1, simm12:$imm), 1)>;
+def : Pat<(XLenVT (setge (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(XLenVT (setule (Ty GPR:$rs1), (Ty simm12Minus1NonzeroNonNeg1:$imm))),
+ (SLTIU GPR:$rs1, (ImmPlus1 simm12Minus1NonzeroNonNeg1:$imm))>;
+def : Pat<(XLenVT (setule (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>;
+def : Pat<(XLenVT (setle (Ty GPR:$rs1), (Ty simm12Minus1Nonzero:$imm))),
+ (SLTI GPR:$rs1, (ImmPlus1 simm12Minus1Nonzero:$imm))>;
+def : Pat<(XLenVT (setle (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>;
+}
+
+let Predicates = [IsRV32] in {
+def : LdPat<load, LW, PtrVT>;
+def : StPat<store, SW, GPR, PtrVT>;
+}
+
+let Predicates = [IsRV64] in {
+def : LdPat<load, LD, PtrVT>;
+def : StPat<store, SD, GPR, PtrVT>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index b9c69a966b4a..5ad1e082344e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -67,7 +67,7 @@ private:
bool tryCreateStridedLoadStore(IntrinsicInst *II, Type *DataType, Value *Ptr,
Value *AlignOp);
- std::pair<Value *, Value *> determineBaseAndStride(GetElementPtrInst *GEP,
+ std::pair<Value *, Value *> determineBaseAndStride(Instruction *Ptr,
IRBuilderBase &Builder);
bool matchStridedRecurrence(Value *Index, Loop *L, Value *&Stride,
@@ -321,9 +321,19 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
}
std::pair<Value *, Value *>
-RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
+RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr,
IRBuilderBase &Builder) {
+ // A gather/scatter of a splat is a zero strided load/store.
+ if (auto *BasePtr = getSplatValue(Ptr)) {
+ Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType());
+ return std::make_pair(BasePtr, ConstantInt::get(IntPtrTy, 0));
+ }
+
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return std::make_pair(nullptr, nullptr);
+
auto I = StridedAddrs.find(GEP);
if (I != StridedAddrs.end())
return I->second;
@@ -331,8 +341,12 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
SmallVector<Value *, 2> Ops(GEP->operands());
// Base pointer needs to be a scalar.
- if (Ops[0]->getType()->isVectorTy())
- return std::make_pair(nullptr, nullptr);
+ Value *ScalarBase = Ops[0];
+ if (ScalarBase->getType()->isVectorTy()) {
+ ScalarBase = getSplatValue(ScalarBase);
+ if (!ScalarBase)
+ return std::make_pair(nullptr, nullptr);
+ }
std::optional<unsigned> VecOperand;
unsigned TypeScale = 0;
@@ -362,11 +376,19 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
// We can't extract the stride if the arithmetic is done at a different size
// than the pointer type. Adding the stride later may not wrap correctly.
// Technically we could handle wider indices, but I don't expect that in
- // practice.
+ // practice. Handle one special case here - constants. This simplifies
+ // writing test cases.
Value *VecIndex = Ops[*VecOperand];
Type *VecIntPtrTy = DL->getIntPtrType(GEP->getType());
- if (VecIndex->getType() != VecIntPtrTy)
- return std::make_pair(nullptr, nullptr);
+ if (VecIndex->getType() != VecIntPtrTy) {
+ auto *VecIndexC = dyn_cast<Constant>(VecIndex);
+ if (!VecIndexC)
+ return std::make_pair(nullptr, nullptr);
+ if (VecIndex->getType()->getScalarSizeInBits() > VecIntPtrTy->getScalarSizeInBits())
+ VecIndex = ConstantFoldCastInstruction(Instruction::Trunc, VecIndexC, VecIntPtrTy);
+ else
+ VecIndex = ConstantFoldCastInstruction(Instruction::SExt, VecIndexC, VecIntPtrTy);
+ }
// Handle the non-recursive case. This is what we see if the vectorizer
// decides to use a scalar IV + vid on demand instead of a vector IV.
@@ -379,7 +401,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
Ops[*VecOperand] = Start;
Type *SourceTy = GEP->getSourceElementType();
Value *BasePtr =
- Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
+ Builder.CreateGEP(SourceTy, ScalarBase, ArrayRef(Ops).drop_front());
// Convert stride to pointer size if needed.
Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType());
@@ -415,7 +437,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
Ops[*VecOperand] = BasePhi;
Type *SourceTy = GEP->getSourceElementType();
Value *BasePtr =
- Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
+ Builder.CreateGEP(SourceTy, ScalarBase, ArrayRef(Ops).drop_front());
// Final adjustments to stride should go in the start block.
Builder.SetInsertPoint(
@@ -448,17 +470,17 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
if (!TLI->isTypeLegal(DataTypeVT))
return false;
- // Pointer should be a GEP.
- auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP)
+ // Pointer should be an instruction.
+ auto *PtrI = dyn_cast<Instruction>(Ptr);
+ if (!PtrI)
return false;
- LLVMContext &Ctx = GEP->getContext();
+ LLVMContext &Ctx = PtrI->getContext();
IRBuilder<InstSimplifyFolder> Builder(Ctx, *DL);
- Builder.SetInsertPoint(GEP);
+ Builder.SetInsertPoint(PtrI);
Value *BasePtr, *Stride;
- std::tie(BasePtr, Stride) = determineBaseAndStride(GEP, Builder);
+ std::tie(BasePtr, Stride) = determineBaseAndStride(PtrI, Builder);
if (!BasePtr)
return false;
assert(Stride != nullptr);
@@ -481,8 +503,8 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
II->replaceAllUsesWith(Call);
II->eraseFromParent();
- if (GEP->use_empty())
- RecursivelyDeleteTriviallyDeadInstructions(GEP);
+ if (PtrI->use_empty())
+ RecursivelyDeleteTriviallyDeadInstructions(PtrI);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 901204043b3c..09b3ab96974c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -22,13 +22,18 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <optional>
using namespace llvm;
#define DEBUG_TYPE "riscv-isel"
#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
+static cl::opt<bool> UsePseudoMovImm(
+ "riscv-use-rematerializable-movimm", cl::Hidden,
+ cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
+ "constant materialization"),
+ cl::init(false));
+
namespace llvm::RISCV {
#define GET_RISCVVSSEGTable_IMPL
#define GET_RISCVVLSEGTable_IMPL
@@ -61,8 +66,11 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
SDLoc DL(N);
SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
- Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
- N->getOperand(0), VL);
+ SDValue Src = N->getOperand(0);
+ if (VT.isInteger())
+ Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
+ N->getOperand(0));
+ Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
break;
}
case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
@@ -83,7 +91,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
// Create temporary stack for each expanding node.
SDValue StackSlot =
- CurDAG->CreateStackTemporary(TypeSize::Fixed(8), Align(4));
+ CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
@@ -91,7 +99,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
SDValue OffsetSlot =
- CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
+ CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
Align(8));
@@ -142,13 +150,25 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
continue;
MadeChange |= doPeepholeSExtW(N);
- MadeChange |= doPeepholeMaskedRVV(N);
+
+ // FIXME: This is here only because the VMerge transform doesn't
+ // know how to handle masked true inputs. Once that has been moved
+ // to post-ISEL, this can be deleted as well.
+ MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
}
CurDAG->setRoot(Dummy.getValue());
MadeChange |= doPeepholeMergeVVMFold();
+ // After we're done with everything else, convert IMPLICIT_DEF
+ // passthru operands to NoRegister. This is required to workaround
+ // an optimization deficiency in MachineCSE. This really should
+ // be merged back into each of the patterns (i.e. there's no good
+ // reason not to go directly to NoReg), but is being done this way
+ // to allow easy backporting.
+ MadeChange |= doPeepholeNoRegPassThru();
+
if (MadeChange)
CurDAG->RemoveDeadNodes();
}
@@ -184,28 +204,32 @@ static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
int64_t Imm, const RISCVSubtarget &Subtarget) {
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
- // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at
+ // Use a rematerializable pseudo instruction for short sequences if enabled.
+ if (Seq.size() == 2 && UsePseudoMovImm)
+ return SDValue(
+ CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
+ CurDAG->getTargetConstant(Imm, DL, VT)),
+ 0);
+
+ // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
// worst an LUI+ADDIW. This will require an extra register, but avoids a
// constant pool.
+ // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
+ // low and high 32 bits are the same and bit 31 and 63 are set.
if (Seq.size() > 3) {
- int64_t LoVal = SignExtend64<32>(Imm);
- int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
- if (LoVal == HiVal) {
- RISCVMatInt::InstSeq SeqLo =
- RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
- if ((SeqLo.size() + 2) < Seq.size()) {
- SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
-
- SDValue SLLI = SDValue(
- CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
- CurDAG->getTargetConstant(32, DL, VT)),
- 0);
- return SDValue(CurDAG->getMachineNode(RISCV::ADD, DL, VT, Lo, SLLI),
- 0);
- }
+ unsigned ShiftAmt, AddOpc;
+ RISCVMatInt::InstSeq SeqLo =
+ RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
+ if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
+ SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
+
+ SDValue SLLI = SDValue(
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
+ CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
+ 0);
+ return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
}
}
@@ -552,6 +576,12 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
SDValue VLOperand;
unsigned Opcode = RISCV::PseudoVSETVLI;
+ if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
+ const unsigned VLEN = Subtarget->getRealMinVLen();
+ if (VLEN == Subtarget->getRealMaxVLen())
+ if (VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
+ VLMax = true;
+ }
if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
Opcode = RISCV::PseudoVSETVLIX0;
@@ -808,7 +838,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
case ISD::Constant: {
- assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
+ assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
auto *ConstNode = cast<ConstantSDNode>(Node);
if (ConstNode->isZero()) {
SDValue New =
@@ -832,26 +862,34 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
case ISD::ConstantFP: {
const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
- int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(
- APF, VT);
+ auto [FPImm, NeedsFNeg] =
+ static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
+ VT);
if (FPImm >= 0) {
unsigned Opc;
+ unsigned FNegOpc;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Unexpected size");
case MVT::f16:
Opc = RISCV::FLI_H;
+ FNegOpc = RISCV::FSGNJN_H;
break;
case MVT::f32:
Opc = RISCV::FLI_S;
+ FNegOpc = RISCV::FSGNJN_S;
break;
case MVT::f64:
Opc = RISCV::FLI_D;
+ FNegOpc = RISCV::FSGNJN_D;
break;
}
-
SDNode *Res = CurDAG->getMachineNode(
Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
+ if (NeedsFNeg)
+ Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
+ SDValue(Res, 0));
+
ReplaceNode(Node, Res);
return;
}
@@ -866,10 +904,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
*Subtarget);
+ bool HasZdinx = Subtarget->hasStdExtZdinx();
+ bool Is64Bit = Subtarget->is64Bit();
unsigned Opc;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Unexpected size");
+ case MVT::bf16:
+ assert(Subtarget->hasStdExtZfbfmin());
+ Opc = RISCV::FMV_H_X;
+ break;
case MVT::f16:
Opc =
Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
@@ -881,20 +925,29 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// For RV32, we can't move from a GPR, we need to convert instead. This
// should only happen for +0.0 and -0.0.
assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
- bool HasZdinx = Subtarget->hasStdExtZdinx();
- if (Subtarget->is64Bit())
+ if (Is64Bit)
Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
else
Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
break;
}
- SDNode *Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
+ SDNode *Res;
+ if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
+ Res = CurDAG->getMachineNode(
+ Opc, DL, VT, Imm,
+ CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
+ else
+ Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
// For f64 -0.0, we need to insert a fneg.d idiom.
- if (NegZeroF64)
- Res = CurDAG->getMachineNode(RISCV::FSGNJN_D, DL, VT, SDValue(Res, 0),
- SDValue(Res, 0));
+ if (NegZeroF64) {
+ Opc = RISCV::FSGNJN_D;
+ if (HasZdinx)
+ Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
+ Res =
+ CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
+ }
ReplaceNode(Node, Res);
return;
@@ -2082,8 +2135,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
break;
- SmallVector<SDValue> Operands =
- {CurDAG->getUNDEF(VT), Ld->getBasePtr()};
+ SmallVector<SDValue> Operands = {
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
+ Ld->getBasePtr()};
if (IsStrided)
Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
@@ -2141,12 +2195,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
// Always produce a register and immediate operand, as expected by
// RISCVAsmPrinter::PrintAsmMemoryOperand.
switch (ConstraintID) {
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_m: {
+ case InlineAsm::ConstraintCode::o:
+ case InlineAsm::ConstraintCode::m: {
SDValue Op0, Op1;
bool Found = SelectAddrRegImm(Op, Op0, Op1);
assert(Found && "SelectAddrRegImm should always succeed");
@@ -2155,7 +2210,7 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
OutOps.push_back(Op1);
return false;
}
- case InlineAsm::Constraint_A:
+ case InlineAsm::ConstraintCode::A:
OutOps.push_back(Op);
OutOps.push_back(
CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
@@ -2205,7 +2260,8 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
// Fold constant addresses.
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
const MVT VT, const RISCVSubtarget *Subtarget,
- SDValue Addr, SDValue &Base, SDValue &Offset) {
+ SDValue Addr, SDValue &Base, SDValue &Offset,
+ bool IsPrefetch = false) {
if (!isa<ConstantSDNode>(Addr))
return false;
@@ -2217,6 +2273,9 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
int64_t Lo12 = SignExtend64<12>(CVal);
int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
+ if (IsPrefetch && (Lo12 & 0b11111) != 0)
+ return false;
+
if (Hi) {
int64_t Hi20 = (Hi >> 12) & 0xfffff;
Base = SDValue(
@@ -2231,14 +2290,15 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
}
// Ask how constant materialization would handle this constant.
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
// If the last instruction would be an ADDI, we can fold its immediate and
// emit the rest of the sequence as the base.
if (Seq.back().getOpcode() != RISCV::ADDI)
return false;
Lo12 = Seq.back().getImm();
+ if (IsPrefetch && (Lo12 & 0b11111) != 0)
+ return false;
// Drop the last instruction.
Seq.pop_back();
@@ -2419,14 +2479,85 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
return true;
}
+/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
+/// Offset shoule be all zeros.
+bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (SelectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ SDLoc DL(Addr);
+ MVT VT = Addr.getSimpleValueType();
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ if (isInt<12>(CVal)) {
+ Base = Addr.getOperand(0);
+
+ // Early-out if not a valid offset.
+ if ((CVal & 0b11111) != 0) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, VT);
+ return true;
+ }
+
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
+ Offset = CurDAG->getTargetConstant(CVal, DL, VT);
+ return true;
+ }
+ }
+
+ // Handle ADD with large immediates.
+ if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
+ "simm12 not already handled?");
+
+ // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
+ // one instruction by folding adjustment (-2048 or 2016) into the address.
+ if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
+ int64_t Adj = CVal < 0 ? -2048 : 2016;
+ int64_t AdjustedOffset = CVal - Adj;
+ Base = SDValue(CurDAG->getMachineNode(
+ RISCV::ADDI, DL, VT, Addr.getOperand(0),
+ CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
+ 0);
+ Offset = CurDAG->getTargetConstant(Adj, DL, VT);
+ return true;
+ }
+
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
+ Offset, true)) {
+ // Insert an ADD instruction with the materialized Hi52 bits.
+ Base = SDValue(
+ CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
+ 0);
+ return true;
+ }
+ }
+
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
+ return true;
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, VT);
+ return true;
+}
+
bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
SDValue &ShAmt) {
ShAmt = N;
+ // Peek through zext.
+ if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
+ ShAmt = ShAmt.getOperand(0);
+
// Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
// amount. If there is an AND on the shift amount, we can bypass it if it
// doesn't affect any of those bits.
- if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) {
+ if (ShAmt.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(ShAmt.getOperand(1))) {
const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
// Since the max shift amount is a power of 2 we can subtract 1 to make a
@@ -2729,6 +2860,36 @@ bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
return false;
}
+static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
+ unsigned Bits,
+ const TargetInstrInfo *TII) {
+ unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
+
+ if (!MCOpcode)
+ return false;
+
+ const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
+ const uint64_t TSFlags = MCID.TSFlags;
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return false;
+ assert(RISCVII::hasVLOp(TSFlags));
+
+ bool HasGlueOp = User->getGluedNode() != nullptr;
+ unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
+ bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
+ bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
+ unsigned VLIdx =
+ User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
+ const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
+
+ if (UserOpNo == VLIdx)
+ return false;
+
+ auto NumDemandedBits =
+ RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
+ return NumDemandedBits && Bits >= *NumDemandedBits;
+}
+
// Return true if all users of this SDNode* only consume the lower \p Bits.
// This can be used to form W instructions for add/sub/mul/shl even when the
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
@@ -2751,6 +2912,11 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;
+ // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
+ // the VT. Ensure the type is scalar to avoid wasting time on vectors.
+ if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
+ return false;
+
for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
// Users of this node should have already been instruction selected
@@ -2760,6 +2926,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
// TODO: Add more opcodes?
switch (User->getMachineOpcode()) {
default:
+ if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
+ break;
return false;
case RISCV::ADDW:
case RISCV::ADDIW:
@@ -2937,27 +3105,41 @@ bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
return true;
}
+static SDValue findVSplat(SDValue N) {
+ if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
+ if (!N.getOperand(0).isUndef())
+ return SDValue();
+ N = N.getOperand(1);
+ }
+ SDValue Splat = N;
+ if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
+ Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
+ !Splat.getOperand(0).isUndef())
+ return SDValue();
+ assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
+ return Splat;
+}
+
bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
- if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
+ SDValue Splat = findVSplat(N);
+ if (!Splat)
return false;
- assert(N.getNumOperands() == 3 && "Unexpected number of operands");
- SplatVal = N.getOperand(1);
+
+ SplatVal = Splat.getOperand(1);
return true;
}
-using ValidateFn = bool (*)(int64_t);
-
-static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
- SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget,
- ValidateFn ValidateImm) {
- if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
- !isa<ConstantSDNode>(N.getOperand(1)))
+static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
+ std::function<bool(int64_t)> ValidateImm) {
+ SDValue Splat = findVSplat(N);
+ if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
return false;
- assert(N.getNumOperands() == 3 && "Unexpected number of operands");
- int64_t SplatImm =
- cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
+ const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
+ assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
+ "Unexpected splat operand type");
// The semantics of RISCVISD::VMV_V_X_VL is that when the operand
// type is wider than the resulting vector element type: an implicit
@@ -2966,34 +3148,31 @@ static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
// any zero-extended immediate.
// For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
// sign-extending to (XLenVT -1).
- MVT XLenVT = Subtarget.getXLenVT();
- assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
- "Unexpected splat operand type");
- MVT EltVT = N.getSimpleValueType().getVectorElementType();
- if (EltVT.bitsLT(XLenVT))
- SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
+ APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
+
+ int64_t SplatImm = SplatConst.getSExtValue();
if (!ValidateImm(SplatImm))
return false;
- SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
+ SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
return true;
}
bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
- return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
- [](int64_t Imm) { return isInt<5>(Imm); });
+ return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
+ [](int64_t Imm) { return isInt<5>(Imm); });
}
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
- return selectVSplatSimmHelper(
+ return selectVSplatImmHelper(
N, SplatVal, *CurDAG, *Subtarget,
[](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
}
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
SDValue &SplatVal) {
- return selectVSplatSimmHelper(
+ return selectVSplatImmHelper(
N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
});
@@ -3001,29 +3180,34 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
SDValue &SplatVal) {
- if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
- !isa<ConstantSDNode>(N.getOperand(1)))
- return false;
-
- int64_t SplatImm =
- cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
-
- if (!isUIntN(Bits, SplatImm))
- return false;
-
- SplatVal =
- CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
-
- return true;
+ return selectVSplatImmHelper(
+ N, SplatVal, *CurDAG, *Subtarget,
+ [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
}
-bool RISCVDAGToDAGISel::selectExtOneUseVSplat(SDValue N, SDValue &SplatVal) {
- if (N->getOpcode() == ISD::SIGN_EXTEND ||
- N->getOpcode() == ISD::ZERO_EXTEND) {
- if (!N.hasOneUse())
+bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
+ // Truncates are custom lowered during legalization.
+ auto IsTrunc = [this](SDValue N) {
+ if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
+ return false;
+ SDValue VL;
+ selectVLOp(N->getOperand(2), VL);
+ // Any vmset_vl is ok, since any bits past VL are undefined and we can
+ // assume they are set.
+ return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
+ isa<ConstantSDNode>(VL) &&
+ cast<ConstantSDNode>(VL)->getSExtValue() == RISCV::VLMaxSentinel;
+ };
+
+ // We can have multiple nested truncates, so unravel them all if needed.
+ while (N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) {
+ if (!N.hasOneUse() ||
+ N.getValueType().getSizeInBits().getKnownMinValue() < 8)
return false;
N = N->getOperand(0);
}
+
return selectVSplat(N, SplatVal);
}
@@ -3038,8 +3222,12 @@ bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
MVT VT = CFP->getSimpleValueType(0);
- if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
- VT) >= 0)
+ // Even if this FPImm requires an additional FNEG (i.e. the second element of
+ // the returned pair is true) we still prefer FLI + FNEG over immediate
+ // materialization as the latter might generate a longer instruction sequence.
+ if (static_cast<const RISCVTargetLowering *>(TLI)
+ ->getLegalZfaFPImm(APF, VT)
+ .first >= 0)
return false;
MVT XLenVT = Subtarget->getXLenVT();
@@ -3125,6 +3313,9 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
case RISCV::TH_MULAH:
case RISCV::TH_MULSW:
case RISCV::TH_MULSH:
+ if (N0.getValueType() == MVT::i32)
+ break;
+
// Result is already sign extended just remove the sext.w.
// NOTE: We only handle the nodes that are selected with hasAllWUsers.
ReplaceUses(N, N0.getNode());
@@ -3154,6 +3345,12 @@ static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
// Check the instruction defining V0; it needs to be a VMSET pseudo.
SDValue MaskSetter = Glued->getOperand(2);
+ // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
+ // from an extract_subvector or insert_subvector.
+ if (MaskSetter->isMachineOpcode() &&
+ MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
+ MaskSetter = MaskSetter->getOperand(0);
+
const auto IsVMSet = [](unsigned Opc) {
return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
@@ -3183,7 +3380,7 @@ static bool isImplicitDef(SDValue V) {
// corresponding "unmasked" pseudo versions. The mask we're interested in will
// take the form of a V0 physical register operand, with a glued
// register-setting instruction.
-bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
+bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
const RISCV::RISCVMaskedPseudoInfo *I =
RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
if (!I)
@@ -3222,7 +3419,12 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
if (auto *TGlued = Glued->getGluedNode())
Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
- SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ MachineSDNode *Result =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+
+ if (!N->memoperands_empty())
+ CurDAG->setNodeMemRefs(Result, N->memoperands());
+
Result->setFlags(N->getFlags());
ReplaceUses(N, Result);
@@ -3230,21 +3432,11 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
}
static bool IsVMerge(SDNode *N) {
- unsigned Opc = N->getMachineOpcode();
- return Opc == RISCV::PseudoVMERGE_VVM_MF8 ||
- Opc == RISCV::PseudoVMERGE_VVM_MF4 ||
- Opc == RISCV::PseudoVMERGE_VVM_MF2 ||
- Opc == RISCV::PseudoVMERGE_VVM_M1 ||
- Opc == RISCV::PseudoVMERGE_VVM_M2 ||
- Opc == RISCV::PseudoVMERGE_VVM_M4 || Opc == RISCV::PseudoVMERGE_VVM_M8;
+ return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
}
static bool IsVMv(SDNode *N) {
- unsigned Opc = N->getMachineOpcode();
- return Opc == RISCV::PseudoVMV_V_V_MF8 || Opc == RISCV::PseudoVMV_V_V_MF4 ||
- Opc == RISCV::PseudoVMV_V_V_MF2 || Opc == RISCV::PseudoVMV_V_V_M1 ||
- Opc == RISCV::PseudoVMV_V_V_M2 || Opc == RISCV::PseudoVMV_V_V_M4 ||
- Opc == RISCV::PseudoVMV_V_V_M8;
+ return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
}
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
@@ -3336,6 +3528,11 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
if (!Info)
return false;
+ // When Mask is not a true mask, this transformation is illegal for some
+ // operations whose results are affected by mask, like viota.m.
+ if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
+ return false;
+
if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
// The vmerge instruction must be TU.
// FIXME: This could be relaxed, but we need to handle the policy for the
@@ -3503,10 +3700,13 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// Add the glue for the CopyToReg of mask->v0.
Ops.push_back(Glue);
- SDNode *Result =
+ MachineSDNode *Result =
CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
Result->setFlags(True->getFlags());
+ if (!cast<MachineSDNode>(True)->memoperands_empty())
+ CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
+
// Replace vmerge.vvm node by Result.
ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
@@ -3514,46 +3714,30 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
- // Try to transform Result to unmasked intrinsic.
- doPeepholeMaskedRVV(Result);
return true;
}
-// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
-// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
-bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) {
-#define CASE_VMERGE_TO_VMV(lmul) \
- case RISCV::PseudoVMERGE_VVM_##lmul: \
- NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
- break;
- unsigned NewOpc;
- switch (N->getMachineOpcode()) {
- default:
- llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
- CASE_VMERGE_TO_VMV(MF8)
- CASE_VMERGE_TO_VMV(MF4)
- CASE_VMERGE_TO_VMV(MF2)
- CASE_VMERGE_TO_VMV(M1)
- CASE_VMERGE_TO_VMV(M2)
- CASE_VMERGE_TO_VMV(M4)
- CASE_VMERGE_TO_VMV(M8)
- }
+bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
+ bool MadeChange = false;
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
- if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
- return false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
- SDLoc DL(N);
- SDValue PolicyOp =
- CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT());
- SDNode *Result = CurDAG->getMachineNode(
- NewOpc, DL, N->getValueType(0),
- {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5),
- PolicyOp});
- ReplaceUses(N, Result);
- return true;
+ if (IsVMerge(N) || IsVMv(N))
+ MadeChange |= performCombineVMergeAndVOps(N);
+ }
+ return MadeChange;
}
-bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
+/// If our passthru is an implicit_def, use noreg instead. This side
+/// steps issues with MachineCSE not being able to CSE expressions with
+/// IMPLICIT_DEF operands while preserving the semantic intent. See
+/// pr64282 for context. Note that this transform is the last one
+/// performed at ISEL DAG to DAG.
+bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
bool MadeChange = false;
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
@@ -3562,18 +3746,34 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
if (N->use_empty() || !N->isMachineOpcode())
continue;
- if (IsVMerge(N) || IsVMv(N))
- MadeChange |= performCombineVMergeAndVOps(N);
- if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1))
- MadeChange |= performVMergeToVMv(N);
+ const unsigned Opc = N->getMachineOpcode();
+ if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
+ !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
+ !isImplicitDef(N->getOperand(0)))
+ continue;
+
+ SmallVector<SDValue> Ops;
+ Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
+ for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
+ SDValue Op = N->getOperand(I);
+ Ops.push_back(Op);
+ }
+
+ MachineSDNode *Result =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ Result->setFlags(N->getFlags());
+ CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
+ ReplaceUses(N, Result);
+ MadeChange = true;
}
return MadeChange;
}
+
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new RISCVDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 281719c12e70..77e174135a59 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -30,7 +30,7 @@ public:
RISCVDAGToDAGISel() = delete;
explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine,
- CodeGenOpt::Level OptLevel)
+ CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, TargetMachine, OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -43,7 +43,8 @@ public:
void Select(SDNode *Node) override;
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset);
@@ -53,6 +54,7 @@ public:
bool SelectAddrRegImmINX(SDValue Addr, SDValue &Base, SDValue &Offset) {
return SelectAddrRegImm(Addr, Base, Offset, true);
}
+ bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount,
SDValue &Base, SDValue &Index, SDValue &Scale);
@@ -134,7 +136,9 @@ public:
}
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal);
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal);
- bool selectExtOneUseVSplat(SDValue N, SDValue &SplatVal);
+ // Matches the splat of a value which can be extended or truncated, such that
+ // only the bottom 8 bits are preserved.
+ bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal);
bool selectFPImm(SDValue N, SDValue &Imm);
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm);
@@ -183,9 +187,9 @@ public:
private:
bool doPeepholeSExtW(SDNode *Node);
- bool doPeepholeMaskedRVV(SDNode *Node);
+ bool doPeepholeMaskedRVV(MachineSDNode *Node);
bool doPeepholeMergeVVMFold();
- bool performVMergeToVMv(SDNode *N);
+ bool doPeepholeNoRegPassThru();
bool performCombineVMergeAndVOps(SDNode *N);
};
@@ -259,6 +263,7 @@ struct RISCVMaskedPseudoInfo {
uint16_t MaskedPseudo;
uint16_t UnmaskedPseudo;
uint8_t MaskOpIdx;
+ uint8_t MaskAffectsResult : 1;
};
#define GET_RISCVVSSEGTable_DECL
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f030982cb815..03e994586d0c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -38,6 +39,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -73,6 +75,10 @@ static cl::opt<int>
"use for creating a floating-point immediate value"),
cl::init(2));
+static cl::opt<bool>
+ RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
+ cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
+
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -113,6 +119,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Set up the register classes.
addRegisterClass(XLenVT, &RISCV::GPRRegClass);
+ if (Subtarget.is64Bit() && RV64LegalI32)
+ addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
if (Subtarget.hasStdExtZfhOrZfhmin())
addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
@@ -145,6 +153,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
static const MVT::SimpleValueType F16VecVTs[] = {
MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
+ static const MVT::SimpleValueType BF16VecVTs[] = {
+ MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
+ MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
static const MVT::SimpleValueType F32VecVTs[] = {
MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
static const MVT::SimpleValueType F64VecVTs[] = {
@@ -154,7 +165,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
auto addRegClassForRVV = [this](MVT VT) {
// Disable the smallest fractional LMUL types if ELEN is less than
// RVVBitsPerBlock.
- unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+ unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
if (VT.getVectorMinNumElements() < MinElts)
return;
@@ -183,10 +194,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
addRegClassForRVV(VT);
}
- if (Subtarget.hasVInstructionsF16())
+ if (Subtarget.hasVInstructionsF16Minimal())
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
+ if (Subtarget.hasVInstructionsBF16())
+ for (MVT VT : BF16VecVTs)
+ addRegClassForRVV(VT);
+
if (Subtarget.hasVInstructionsF32())
for (MVT VT : F32VecVTs)
addRegClassForRVV(VT);
@@ -228,8 +243,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
setCondCodeAction(ISD::SETLE, XLenVT, Expand);
setCondCodeAction(ISD::SETGT, XLenVT, Custom);
@@ -238,6 +257,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::SETCC, MVT::i32, Promote);
+
setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
@@ -253,14 +275,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit()) {
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
-
- setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
- MVT::i32, Custom);
-
- setOperationAction(ISD::SADDO, MVT::i32, Custom);
- setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
- MVT::i32, Custom);
+ if (!RV64LegalI32) {
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
+ MVT::i32, Custom);
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
+ MVT::i32, Custom);
+ }
} else {
setLibcallName(
{RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
@@ -268,19 +290,36 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::MULO_I64, nullptr);
}
- if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
+ if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
- else if (Subtarget.is64Bit())
- setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
- else
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::MUL, MVT::i32, Promote);
+ } else if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::MUL, MVT::i128, Custom);
+ if (!RV64LegalI32)
+ setOperationAction(ISD::MUL, MVT::i32, Custom);
+ } else {
setOperationAction(ISD::MUL, MVT::i64, Custom);
+ }
- if (!Subtarget.hasStdExtM())
+ if (!Subtarget.hasStdExtM()) {
setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
XLenVT, Expand);
- else if (Subtarget.is64Bit())
- setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
- {MVT::i8, MVT::i16, MVT::i32}, Custom);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
+ Promote);
+ } else if (Subtarget.is64Bit()) {
+ if (!RV64LegalI32)
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
+ {MVT::i8, MVT::i16, MVT::i32}, Custom);
+ }
+
+ if (RV64LegalI32 && Subtarget.is64Bit()) {
+ setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
+ setOperationAction(
+ {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
+ Expand);
+ }
setOperationAction(
{ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
@@ -290,14 +329,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Custom);
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
- if (Subtarget.is64Bit())
+ if (!RV64LegalI32 && Subtarget.is64Bit())
setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
} else if (Subtarget.hasVendorXTHeadBb()) {
if (Subtarget.is64Bit())
setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
+ } else if (Subtarget.hasVendorXCVbitmanip()) {
+ setOperationAction(ISD::ROTL, XLenVT, Expand);
} else {
setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
}
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
@@ -307,37 +350,74 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Subtarget.hasVendorXTHeadBb())
? Legal
: Expand);
- // Zbkb can use rev8+brev8 to implement bitreverse.
- setOperationAction(ISD::BITREVERSE, XLenVT,
- Subtarget.hasStdExtZbkb() ? Custom : Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::BSWAP, MVT::i32,
+ (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+ Subtarget.hasVendorXTHeadBb())
+ ? Promote
+ : Expand);
+
+
+ if (Subtarget.hasVendorXCVbitmanip()) {
+ setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
+ } else {
+ // Zbkb can use rev8+brev8 to implement bitreverse.
+ setOperationAction(ISD::BITREVERSE, XLenVT,
+ Subtarget.hasStdExtZbkb() ? Custom : Expand);
+ }
if (Subtarget.hasStdExtZbb()) {
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
Legal);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
+ Promote);
- if (Subtarget.is64Bit())
- setOperationAction(
- {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
- MVT::i32, Custom);
- } else {
- setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
+ if (Subtarget.is64Bit()) {
+ if (RV64LegalI32)
+ setOperationAction(ISD::CTTZ, MVT::i32, Legal);
+ else
+ setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
+ }
+ } else if (!Subtarget.hasVendorXCVbitmanip()) {
+ setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
}
- if (Subtarget.hasVendorXTHeadBb()) {
- setOperationAction(ISD::CTLZ, XLenVT, Legal);
-
+ if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
+ Subtarget.hasVendorXCVbitmanip()) {
// We need the custom lowering to make sure that the resulting sequence
// for the 32bit case is efficient on 64bit targets.
- if (Subtarget.is64Bit())
- setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+ if (Subtarget.is64Bit()) {
+ if (RV64LegalI32) {
+ setOperationAction(ISD::CTLZ, MVT::i32,
+ Subtarget.hasStdExtZbb() ? Legal : Promote);
+ if (!Subtarget.hasStdExtZbb())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
+ } else
+ setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+ }
+ } else {
+ setOperationAction(ISD::CTLZ, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
}
- if (Subtarget.is64Bit())
+ if (!RV64LegalI32 && Subtarget.is64Bit() &&
+ !Subtarget.hasShortForwardBranchOpt())
setOperationAction(ISD::ABS, MVT::i32, Custom);
+ // We can use PseudoCCSUB to implement ABS.
+ if (Subtarget.hasShortForwardBranchOpt())
+ setOperationAction(ISD::ABS, XLenVT, Legal);
+
if (!Subtarget.hasVendorXTHeadCondMov())
setOperationAction(ISD::SELECT, XLenVT, Custom);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::SELECT, MVT::i32, Promote);
+
static const unsigned FPLegalNodeTypes[] = {
ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
@@ -361,7 +441,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
-
+
+ static const unsigned ZfhminZfbfminPromoteOps[] = {
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
+ ISD::FSUB, ISD::FMUL, ISD::FMA,
+ ISD::FDIV, ISD::FSQRT, ISD::FABS,
+ ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
+ ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
+ ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
+ ISD::FROUNDEVEN, ISD::SELECT};
+
if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
@@ -369,6 +460,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
+ setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
+ setOperationAction(ISD::FREM, MVT::bf16, Promote);
+ // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
+ // DAGCombiner::visitFP_ROUND probably needs improvements first.
+ setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
}
if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
@@ -379,18 +477,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
} else {
- static const unsigned ZfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
- ISD::FSUB, ISD::FMUL, ISD::FMA,
- ISD::FDIV, ISD::FSQRT, ISD::FABS,
- ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
- ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
- ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
- ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
- ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
- ISD::FROUNDEVEN, ISD::SELECT};
-
- setOperationAction(ZfhminPromoteOps, MVT::f16, Promote);
+ setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
MVT::f16, Legal);
@@ -409,7 +496,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Subtarget.hasStdExtZfa() ? Legal : Promote);
setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
- ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10},
+ ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10},
MVT::f16, Promote);
// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
@@ -439,6 +527,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FPOpToExpand, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
@@ -481,6 +571,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FPOpToExpand, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
@@ -504,6 +596,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
XLenVT, Legal);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
+ ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
+ MVT::i32, Legal);
+
setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
@@ -548,6 +645,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::VSCALE, XLenVT, Custom);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::VSCALE, MVT::i32, Custom);
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
@@ -576,7 +675,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
- ISD::VP_ABS};
+ ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
@@ -588,7 +687,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
- ISD::VP_FRINT, ISD::VP_FNEARBYINT};
+ ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
+ ISD::EXPERIMENTAL_VP_REVERSE};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -659,9 +759,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Expand all extending loads to types larger than this, and truncating
// stores from types larger than this.
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
- setTruncStoreAction(OtherVT, VT, Expand);
- setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
- VT, Expand);
+ setTruncStoreAction(VT, OtherVT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
+ OtherVT, Expand);
}
setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
@@ -673,6 +773,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+
setOperationPromotedToType(
ISD::VECTOR_SPLICE, VT,
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
@@ -695,8 +797,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
Legal);
- setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
-
// Custom-lower extensions and truncations from/to mask types.
setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
VT, Custom);
@@ -712,7 +812,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT, Custom);
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
-
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
setOperationAction(
{ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
@@ -751,8 +851,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
- setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
- VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
+ OtherVT, Expand);
}
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
@@ -761,15 +861,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Splice
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
+ if (Subtarget.hasStdExtZvkb()) {
+ setOperationAction(ISD::BSWAP, VT, Legal);
+ setOperationAction(ISD::VP_BSWAP, VT, Custom);
+ } else {
+ setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
+ setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
+ }
+
if (Subtarget.hasStdExtZvbb()) {
- setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal);
- setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Legal);
+ setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
VT, Custom);
} else {
- setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand);
- setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand);
+ setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
@@ -784,8 +891,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
VT, Custom);
}
-
- setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
}
}
@@ -802,6 +907,27 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
};
+ // TODO: support more ops.
+ static const unsigned ZvfhminPromoteOps[] = {
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
+ ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
+ ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
+ ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
+
+ // TODO: support more vp ops.
+ static const unsigned ZvfhminPromoteVPOps[] = {
+ ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
+ ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
+ ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
+ ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
+ ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
+ ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
+ ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
+ ISD::VP_FNEARBYINT, ISD::VP_SETCC};
+
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
@@ -817,6 +943,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setCondCodeAction(VFPCCToExpand, VT, Expand);
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
@@ -833,6 +960,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FEXP10, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
@@ -891,6 +1019,38 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
continue;
SetCommonVFPActions(VT);
}
+ } else if (Subtarget.hasVInstructionsF16Minimal()) {
+ for (MVT VT : F16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+ Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
+ ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
+ VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ // load/store
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+
+ // Custom split nxv32f16 since nxv32f32 if not legal.
+ if (VT == MVT::nxv32f16) {
+ setOperationAction(ZvfhminPromoteOps, VT, Custom);
+ setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
+ continue;
+ }
+ // Add more promote ops.
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
+ }
}
if (Subtarget.hasVInstructionsF32()) {
@@ -922,8 +1082,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
- setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
- OtherVT, VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
+ OtherVT, Expand);
}
// Custom lower fixed vector undefs to scalable vector undefs to avoid
@@ -986,6 +1146,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
ISD::VP_SETCC, ISD::VP_TRUNCATE},
VT, Custom);
+
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
continue;
}
@@ -1039,13 +1201,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(IntegerVPOps, VT, Custom);
- // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
- // range of f32.
- EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
- if (isTypeLegal(FloatVT))
- setOperationAction(
- {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
- Custom);
+ if (Subtarget.hasStdExtZvkb())
+ setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
+
+ if (Subtarget.hasStdExtZvbb()) {
+ setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
+ ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
+ VT, Custom);
+ } else {
+ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
+ // range of f32.
+ EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ if (isTypeLegal(FloatVT))
+ setOperationAction(
+ {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Custom);
+ }
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -1066,6 +1237,34 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// expansion to a build_vector of 0s.
setOperationAction(ISD::UNDEF, VT, Custom);
+ if (VT.getVectorElementType() == MVT::f16 &&
+ !Subtarget.hasVInstructionsF16()) {
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+ Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction(
+ {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
+ ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
+ VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ // Don't promote f16 vector operations to f32 if f32 vector type is
+ // not legal.
+ // TODO: could split the f16 vector into two vectors and do promotion.
+ if (!isTypeLegal(F32VecVT))
+ continue;
+ setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
+ continue;
+ }
+
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
Custom);
@@ -1088,7 +1287,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
- ISD::IS_FPCLASS},
+ ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
VT, Custom);
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
@@ -1132,14 +1331,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
+ if (Subtarget.hasStdExtA()) {
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ }
+
if (Subtarget.hasForcedAtomics()) {
- // Set atomic rmw/cas operations to expand to force __sync libcalls.
+ // Force __sync libcalls to be emitted for atomic rmw/cas operations.
setOperationAction(
{ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
- XLenVT, Expand);
+ XLenVT, LibCall);
}
if (Subtarget.hasVendorXTHeadMemIdx()) {
@@ -1166,11 +1371,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
- setMinimumJumpTableEntries(5);
-
- // Jumps are expensive, compared to logic
- setJumpIsExpensive();
-
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
@@ -1197,7 +1397,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
- ISD::CONCAT_VECTORS});
+ ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
+ ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
+ ISD::INSERT_VECTOR_ELT});
if (Subtarget.hasVendorXTHeadMemPair())
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
if (Subtarget.useRVVForFixedLengthVectors())
@@ -1239,7 +1441,7 @@ bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
return true;
// Don't allow VF=1 if those types are't legal.
- if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN())
+ if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
return true;
// VLEN=32 support is incomplete.
@@ -1602,11 +1804,12 @@ bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
}
bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
- return Subtarget.hasStdExtZbb();
+ return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
}
bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
- return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
+ return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
+ Subtarget.hasVendorXCVbitmanip();
}
bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
@@ -1677,7 +1880,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
// replace. If we don't support unaligned scalar mem, prefer the constant
// pool.
// TODO: Can the caller pass down the alignment?
- if (!Subtarget.enableUnalignedScalarMem())
+ if (!Subtarget.hasFastUnalignedAccess())
return true;
// Prefer to keep the load if it would require many instructions.
@@ -1686,8 +1889,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
// TODO: Should we keep the load only when we're definitely going to emit a
// constant pool?
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget);
return Seq.size() <= Subtarget.getMaxBuildIntsCost();
}
@@ -1844,8 +2046,11 @@ bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
// If the vector op is supported, but the scalar op is not, the transform may
// not be worthwhile.
+ // Permit a vector binary operation can be converted to scalar binary
+ // operation which is custom lowered with illegal type.
EVT ScalarVT = VecVT.getScalarType();
- return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
+ return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
+ isOperationCustom(Opc, ScalarVT);
}
bool RISCVTargetLowering::isOffsetFoldingLegal(
@@ -1857,11 +2062,17 @@ bool RISCVTargetLowering::isOffsetFoldingLegal(
return false;
}
-// Returns 0-31 if the fli instruction is available for the type and this is
-// legal FP immediate for the type. Returns -1 otherwise.
-int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
+// Return one of the followings:
+// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
+// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
+// positive counterpart, which will be materialized from the first returned
+// element. The second returned element indicated that there should be a FNEG
+// followed.
+// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
+std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
+ EVT VT) const {
if (!Subtarget.hasStdExtZfa())
- return -1;
+ return std::make_pair(-1, false);
bool IsSupportedVT = false;
if (VT == MVT::f16) {
@@ -1874,9 +2085,14 @@ int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
}
if (!IsSupportedVT)
- return -1;
+ return std::make_pair(-1, false);
- return RISCVLoadFPImm::getLoadFPImm(Imm);
+ int Index = RISCVLoadFPImm::getLoadFPImm(Imm);
+ if (Index < 0 && Imm.isNegative())
+ // Try the combination of its positive counterpart + FNEG.
+ return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
+ else
+ return std::make_pair(Index, false);
}
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
@@ -1888,11 +2104,13 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
IsLegalVT = Subtarget.hasStdExtFOrZfinx();
else if (VT == MVT::f64)
IsLegalVT = Subtarget.hasStdExtDOrZdinx();
+ else if (VT == MVT::bf16)
+ IsLegalVT = Subtarget.hasStdExtZfbfmin();
if (!IsLegalVT)
return false;
- if (getLegalZfaFPImm(Imm, VT) >= 0)
+ if (getLegalZfaFPImm(Imm, VT).first >= 0)
return true;
// Cannot create a 64 bit floating-point immediate value for rv32.
@@ -1901,14 +2119,17 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
// -0.0 can be created by fmv + fneg.
return Imm.isZero();
}
- // Special case: the cost for -0.0 is 1.
- int Cost = Imm.isNegZero()
- ? 1
- : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
- Subtarget.getXLen(),
- Subtarget.getFeatureBits());
- // If the constantpool data is already in cache, only Cost 1 is cheaper.
- return Cost < FPImmCost;
+
+ // Special case: fmv + fneg
+ if (Imm.isNegZero())
+ return true;
+
+ // Building an integer and then converting requires a fmv at the end of
+ // the integer sequence.
+ const int Cost =
+ 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
+ Subtarget);
+ return Cost <= FPImmCost;
}
// TODO: This is very conservative.
@@ -1953,7 +2174,12 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
!Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
return MVT::f32;
- return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+ MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+
+ if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
+ return MVT::i64;
+
+ return PartVT;
}
unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
@@ -1968,6 +2194,21 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
+unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+
+ if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
+ IntermediateVT = MVT::i64;
+
+ if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
+ RegisterVT = MVT::i64;
+
+ return NumRegs;
+}
+
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly by branches
// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
@@ -2010,7 +2251,7 @@ static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
}
break;
case ISD::SETLT:
- // Convert X < 1 to 0 <= X.
+ // Convert X < 1 to 0 >= X.
if (C == 1) {
RHS = LHS;
LHS = DAG.getConstant(0, DL, RHS.getValueType());
@@ -2228,7 +2469,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
return false;
break;
case MVT::f16:
- if (!Subtarget.hasVInstructionsF16())
+ if (!Subtarget.hasVInstructionsF16Minimal())
return false;
break;
case MVT::f32:
@@ -2242,7 +2483,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
}
// Reject elements larger than ELEN.
- if (EltVT.getSizeInBits() > Subtarget.getELEN())
+ if (EltVT.getSizeInBits() > Subtarget.getELen())
return false;
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
@@ -2271,7 +2512,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
"Expected legal fixed length vector!");
unsigned MinVLen = Subtarget.getRealMinVLen();
- unsigned MaxELen = Subtarget.getELEN();
+ unsigned MaxELen = Subtarget.getELen();
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
@@ -2348,16 +2589,32 @@ static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
}
-static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
+ SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
+ // If we know the exact VLEN, our VL is exactly equal to VLMAX, and
+ // we can't encode the AVL as an immediate, use the VLMAX encoding.
+ const auto [MinVLMAX, MaxVLMAX] =
+ RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
+ if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31)
+ return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
+
return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
}
static std::pair<SDValue, SDValue>
+getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(VecVT.isScalableVector() && "Expecting a scalable vector");
+ SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
+ SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
+ return {Mask, VL};
+}
+
+static std::pair<SDValue, SDValue>
getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
- SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
+ SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
return {Mask, VL};
}
@@ -2373,18 +2630,7 @@ getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
Subtarget);
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
- MVT XLenVT = Subtarget.getXLenVT();
- SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
- SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
- return {Mask, VL};
-}
-
-// As above but assuming the given type is a scalable vector type.
-static std::pair<SDValue, SDValue>
-getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- assert(VecVT.isScalableVector() && "Expecting a scalable vector");
- return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
+ return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
}
SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
@@ -2394,6 +2640,25 @@ SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
VecVT.getVectorElementCount());
}
+std::pair<unsigned, unsigned>
+RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,
+ const RISCVSubtarget &Subtarget) {
+ assert(VecVT.isScalableVector() && "Expected scalable vector");
+
+ unsigned EltSize = VecVT.getScalarSizeInBits();
+ unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
+
+ unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
+ unsigned MaxVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
+
+ unsigned VectorBitsMin = Subtarget.getRealMinVLen();
+ unsigned MinVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
+
+ return std::make_pair(MinVLMAX, MaxVLMAX);
+}
+
// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
// of either is (currently) supported. This can get us into an infinite loop
// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
@@ -2407,6 +2672,51 @@ bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
return false;
}
+InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
+ // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
+ // implementation-defined.
+ if (!VT.isVector())
+ return InstructionCost::getInvalid();
+ unsigned DLenFactor = Subtarget.getDLenFactor();
+ unsigned Cost;
+ if (VT.isScalableVector()) {
+ unsigned LMul;
+ bool Fractional;
+ std::tie(LMul, Fractional) =
+ RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
+ if (Fractional)
+ Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
+ else
+ Cost = (LMul * DLenFactor);
+ } else {
+ Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
+ }
+ return Cost;
+}
+
+
+/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
+/// is generally quadratic in the number of vreg implied by LMUL. Note that
+/// operand (index and possibly mask) are handled separately.
+InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
+ return getLMULCost(VT) * getLMULCost(VT);
+}
+
+/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
+/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
+/// or may track the vrgather.vv cost. It is implementation-dependent.
+InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
+ return getLMULCost(VT);
+}
+
+/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
+/// for the type VT. (This does not cover the vslide1up or vslide1down
+/// variants.) Slides may be linear in the number of vregs implied by LMUL,
+/// or may track the vrgather.vv cost. It is implementation-dependent.
+InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const {
+ return getLMULCost(VT);
+}
+
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// RISC-V FP-to-int conversions saturate to the destination register size, but
@@ -2420,9 +2730,10 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
if (!DstVT.isVector()) {
- // In absense of Zfh, promote f16 to f32, then saturate the result.
- if (Src.getSimpleValueType() == MVT::f16 &&
- !Subtarget.hasStdExtZfhOrZhinx()) {
+ // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
+ // the result.
+ if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
+ Src.getValueType() == MVT::bf16) {
Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
}
@@ -2778,6 +3089,31 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
}
+// Expand vector LRINT and LLRINT by converting to the integer domain.
+static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isVector() && "Unexpected type");
+
+ SDLoc DL(Op);
+ SDValue Src = Op.getOperand(0);
+ MVT ContainerVT = VT;
+
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ }
+
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Truncated =
+ DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
+
+ if (!VT.isFixedLengthVector())
+ return Truncated;
+
+ return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
+}
+
static SDValue
getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
@@ -2802,6 +3138,14 @@ getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
}
+static MVT getLMUL1VT(MVT VT) {
+ assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
+ "Unexpected vector MVT");
+ return MVT::getScalableVectorVT(
+ VT.getVectorElementType(),
+ RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
+}
+
struct VIDSequence {
int64_t StepNumerator;
unsigned StepDenominator;
@@ -2975,8 +3319,124 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
-static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+
+/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
+/// which constitute a large proportion of the elements. In such cases we can
+/// splat a vector with the dominant element and make up the shortfall with
+/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
+/// Note that this includes vectors of 2 elements by association. The
+/// upper-most element is the "dominant" one, allowing us to use a splat to
+/// "insert" the upper element, and an insert of the lower element at position
+/// 0, which improves codegen.
+static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
+
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+
+ SDLoc DL(Op);
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ unsigned NumElts = Op.getNumOperands();
+
+ SDValue DominantValue;
+ unsigned MostCommonCount = 0;
+ DenseMap<SDValue, unsigned> ValueCounts;
+ unsigned NumUndefElts =
+ count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
+
+ // Track the number of scalar loads we know we'd be inserting, estimated as
+ // any non-zero floating-point constant. Other kinds of element are either
+ // already in registers or are materialized on demand. The threshold at which
+ // a vector load is more desirable than several scalar materializion and
+ // vector-insertion instructions is not known.
+ unsigned NumScalarLoads = 0;
+
+ for (SDValue V : Op->op_values()) {
+ if (V.isUndef())
+ continue;
+
+ ValueCounts.insert(std::make_pair(V, 0));
+ unsigned &Count = ValueCounts[V];
+ if (0 == Count)
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
+ NumScalarLoads += !CFP->isExactlyValue(+0.0);
+
+ // Is this value dominant? In case of a tie, prefer the highest element as
+ // it's cheaper to insert near the beginning of a vector than it is at the
+ // end.
+ if (++Count >= MostCommonCount) {
+ DominantValue = V;
+ MostCommonCount = Count;
+ }
+ }
+
+ assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
+ unsigned NumDefElts = NumElts - NumUndefElts;
+ unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
+
+ // Don't perform this optimization when optimizing for size, since
+ // materializing elements and inserting them tends to cause code bloat.
+ if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
+ (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
+ ((MostCommonCount > DominantValueCountThreshold) ||
+ (ValueCounts.size() <= Log2_32(NumDefElts)))) {
+ // Start by splatting the most common element.
+ SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
+
+ DenseSet<SDValue> Processed{DominantValue};
+
+ // We can handle an insert into the last element (of a splat) via
+ // v(f)slide1down. This is slightly better than the vslideup insert
+ // lowering as it avoids the need for a vector group temporary. It
+ // is also better than using vmerge.vx as it avoids the need to
+ // materialize the mask in a vector register.
+ if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
+ !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
+ LastOp != DominantValue) {
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ auto OpCode =
+ VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+ if (!VT.isFloatingPoint())
+ LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
+ Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+ LastOp, Mask, VL);
+ Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
+ Processed.insert(LastOp);
+ }
+
+ MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
+ for (const auto &OpIdx : enumerate(Op->ops())) {
+ const SDValue &V = OpIdx.value();
+ if (V.isUndef() || !Processed.insert(V).second)
+ continue;
+ if (ValueCounts[V] == 1) {
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
+ DAG.getConstant(OpIdx.index(), DL, XLenVT));
+ } else {
+ // Blend in all instances of this value using a VSELECT, using a
+ // mask where each bit signals whether that element is the one
+ // we're after.
+ SmallVector<SDValue> Ops;
+ transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
+ return DAG.getConstant(V == V1, DL, XLenVT);
+ });
+ Vec = DAG.getNode(ISD::VSELECT, DL, VT,
+ DAG.getBuildVector(SelMaskTy, DL, Ops),
+ DAG.getSplatBuildVector(VT, DL, V), Vec);
+ }
+ }
+
+ return Vec;
+ }
+
+ return SDValue();
+}
+
+static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
@@ -3008,94 +3468,68 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// XLenVT if we're producing a v8i1. This results in more consistent
// codegen across RV32 and RV64.
unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
- NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
- // If we have to use more than one INSERT_VECTOR_ELT then this
- // optimization is likely to increase code size; avoid peforming it in
- // such a case. We can use a load from a constant pool in this case.
- if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
- return SDValue();
- // Now we can create our integer vector type. Note that it may be larger
- // than the resulting mask type: v4i1 would use v1i8 as its integer type.
- unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
- MVT IntegerViaVecVT =
- MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
- IntegerViaVecElts);
-
- uint64_t Bits = 0;
- unsigned BitPos = 0, IntegerEltIdx = 0;
- SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
-
- for (unsigned I = 0; I < NumElts;) {
- SDValue V = Op.getOperand(I);
- bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
- Bits |= ((uint64_t)BitValue << BitPos);
- ++BitPos;
- ++I;
-
- // Once we accumulate enough bits to fill our scalar type or process the
- // last element, insert into our vector and clear our accumulated data.
- if (I % NumViaIntegerBits == 0 || I == NumElts) {
- if (NumViaIntegerBits <= 32)
- Bits = SignExtend64<32>(Bits);
- SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
- Elts[IntegerEltIdx] = Elt;
- Bits = 0;
- BitPos = 0;
- IntegerEltIdx++;
- }
- }
-
- SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
-
- if (NumElts < NumViaIntegerBits) {
- // If we're producing a smaller vector than our minimum legal integer
- // type, bitcast to the equivalent (known-legal) mask type, and extract
- // our final mask.
- assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
- Vec = DAG.getBitcast(MVT::v8i1, Vec);
- Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
- DAG.getConstant(0, DL, XLenVT));
- } else {
- // Else we must have produced an integer type with the same size as the
- // mask type; bitcast for the final result.
- assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
- Vec = DAG.getBitcast(VT, Vec);
+ NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
+ // If we have to use more than one INSERT_VECTOR_ELT then this
+ // optimization is likely to increase code size; avoid peforming it in
+ // such a case. We can use a load from a constant pool in this case.
+ if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
+ return SDValue();
+ // Now we can create our integer vector type. Note that it may be larger
+ // than the resulting mask type: v4i1 would use v1i8 as its integer type.
+ unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
+ MVT IntegerViaVecVT =
+ MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
+ IntegerViaVecElts);
+
+ uint64_t Bits = 0;
+ unsigned BitPos = 0, IntegerEltIdx = 0;
+ SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
+
+ for (unsigned I = 0; I < NumElts;) {
+ SDValue V = Op.getOperand(I);
+ bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
+ Bits |= ((uint64_t)BitValue << BitPos);
+ ++BitPos;
+ ++I;
+
+ // Once we accumulate enough bits to fill our scalar type or process the
+ // last element, insert into our vector and clear our accumulated data.
+ if (I % NumViaIntegerBits == 0 || I == NumElts) {
+ if (NumViaIntegerBits <= 32)
+ Bits = SignExtend64<32>(Bits);
+ SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
+ Elts[IntegerEltIdx] = Elt;
+ Bits = 0;
+ BitPos = 0;
+ IntegerEltIdx++;
}
-
- return Vec;
}
- // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
- // vector type, we have a legal equivalently-sized i8 type, so we can use
- // that.
- MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
- SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
+ SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
- SDValue WideVec;
- if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
- // For a splat, perform a scalar truncate before creating the wider
- // vector.
- assert(Splat.getValueType() == XLenVT &&
- "Unexpected type for i1 splat value");
- Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
- DAG.getConstant(1, DL, XLenVT));
- WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
+ if (NumElts < NumViaIntegerBits) {
+ // If we're producing a smaller vector than our minimum legal integer
+ // type, bitcast to the equivalent (known-legal) mask type, and extract
+ // our final mask.
+ assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
+ Vec = DAG.getBitcast(MVT::v8i1, Vec);
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
+ DAG.getConstant(0, DL, XLenVT));
} else {
- SmallVector<SDValue, 8> Ops(Op->op_values());
- WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
- SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
- WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
+ // Else we must have produced an integer type with the same size as the
+ // mask type; bitcast for the final result.
+ assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
+ Vec = DAG.getBitcast(VT, Vec);
}
- return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
+ return Vec;
}
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
- if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
- return Gather;
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL;
+ if (!VT.isFloatingPoint())
+ Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
Splat =
DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
@@ -3142,18 +3576,16 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
(StepOpcode == ISD::SHL && SplatStepVal != 0)) {
- SDValue SplatStep = DAG.getSplatBuildVector(
- VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
+ SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
}
if (StepDenominator != 1) {
- SDValue SplatStep = DAG.getSplatBuildVector(
- VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
+ SDValue SplatStep =
+ DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
}
if (Addend != 0 || Negate) {
- SDValue SplatAddend = DAG.getSplatBuildVector(
- VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
+ SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
VID);
}
@@ -3165,6 +3597,48 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
}
+ // For very small build_vectors, use a single scalar insert of a constant.
+ // TODO: Base this on constant rematerialization cost, not size.
+ const unsigned EltBitSize = VT.getScalarSizeInBits();
+ if (VT.getSizeInBits() <= 32 &&
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
+ assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
+ "Unexpected sequence type");
+ // If we can use the original VL with the modified element type, this
+ // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
+ // be moved into InsertVSETVLI?
+ unsigned ViaVecLen =
+ (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
+ MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
+
+ uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
+ uint64_t SplatValue = 0;
+ // Construct the amalgamated value at this larger vector type.
+ for (const auto &OpIdx : enumerate(Op->op_values())) {
+ const auto &SeqV = OpIdx.value();
+ if (!SeqV.isUndef())
+ SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
+ << (OpIdx.index() * EltBitSize));
+ }
+
+ // On RV64, sign-extend from 32 to 64 bits where possible in order to
+ // achieve better constant materializion.
+ if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
+ SplatValue = SignExtend64<32>(SplatValue);
+
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
+ DAG.getUNDEF(ViaVecVT),
+ DAG.getConstant(SplatValue, DL, XLenVT),
+ DAG.getConstant(0, DL, XLenVT));
+ if (ViaVecLen != 1)
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ MVT::getVectorVT(ViaIntVT, 1), Vec,
+ DAG.getConstant(0, DL, XLenVT));
+ return DAG.getBitcast(VT, Vec);
+ }
+
+
// Attempt to detect "hidden" splats, which only reveal themselves as splats
// when re-interpreted as a vector with a larger element type. For example,
// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
@@ -3173,7 +3647,6 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// TODO: This optimization could also work on non-constant splats, but it
// would require bit-manipulation instructions to construct the splat value.
SmallVector<SDValue> Sequence;
- unsigned EltBitSize = VT.getScalarSizeInBits();
const auto *BV = cast<BuildVectorSDNode>(Op);
if (VT.isInteger() && EltBitSize < 64 &&
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
@@ -3181,11 +3654,19 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
(Sequence.size() * EltBitSize) <= 64) {
unsigned SeqLen = Sequence.size();
MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
- MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
ViaIntVT == MVT::i64) &&
"Unexpected sequence type");
+ // If we can use the original VL with the modified element type, this
+ // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
+ // be moved into InsertVSETVLI?
+ const unsigned RequiredVL = NumElts / SeqLen;
+ const unsigned ViaVecLen =
+ (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
+ NumElts : RequiredVL;
+ MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
+
unsigned EltIdx = 0;
uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
uint64_t SplatValue = 0;
@@ -3219,94 +3700,171 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
DAG.getUNDEF(ViaContainerVT),
DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
+ if (ViaVecLen != RequiredVL)
+ Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
+ DAG.getConstant(0, DL, XLenVT));
return DAG.getBitcast(VT, Splat);
}
}
- // Try and optimize BUILD_VECTORs with "dominant values" - these are values
- // which constitute a large proportion of the elements. In such cases we can
- // splat a vector with the dominant element and make up the shortfall with
- // INSERT_VECTOR_ELTs.
- // Note that this includes vectors of 2 elements by association. The
- // upper-most element is the "dominant" one, allowing us to use a splat to
- // "insert" the upper element, and an insert of the lower element at position
- // 0, which improves codegen.
- SDValue DominantValue;
- unsigned MostCommonCount = 0;
- DenseMap<SDValue, unsigned> ValueCounts;
- unsigned NumUndefElts =
- count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
+ // If the number of signbits allows, see if we can lower as a <N x i8>.
+ // Our main goal here is to reduce LMUL (and thus work) required to
+ // build the constant, but we will also narrow if the resulting
+ // narrow vector is known to materialize cheaply.
+ // TODO: We really should be costing the smaller vector. There are
+ // profitable cases this misses.
+ if (EltBitSize > 8 && VT.isInteger() &&
+ (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
+ unsigned SignBits = DAG.ComputeNumSignBits(Op);
+ if (EltBitSize - SignBits < 8) {
+ SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
+ DL, Op->ops());
+ Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
+ Source, DAG, Subtarget);
+ SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
+ return convertFromScalableVector(VT, Res, DAG, Subtarget);
+ }
+ }
- // Track the number of scalar loads we know we'd be inserting, estimated as
- // any non-zero floating-point constant. Other kinds of element are either
- // already in registers or are materialized on demand. The threshold at which
- // a vector load is more desirable than several scalar materializion and
- // vector-insertion instructions is not known.
- unsigned NumScalarLoads = 0;
+ if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
+ return Res;
- for (SDValue V : Op->op_values()) {
- if (V.isUndef())
- continue;
+ // For constant vectors, use generic constant pool lowering. Otherwise,
+ // we'd have to materialize constants in GPRs just to move them into the
+ // vector.
+ return SDValue();
+}
- ValueCounts.insert(std::make_pair(V, 0));
- unsigned &Count = ValueCounts[V];
- if (0 == Count)
- if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
- NumScalarLoads += !CFP->isExactlyValue(+0.0);
+static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
- // Is this value dominant? In case of a tie, prefer the highest element as
- // it's cheaper to insert near the beginning of a vector than it is at the
- // end.
- if (++Count >= MostCommonCount) {
- DominantValue = V;
- MostCommonCount = Count;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
+
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+
+ SDLoc DL(Op);
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ if (VT.getVectorElementType() == MVT::i1) {
+ // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
+ // vector type, we have a legal equivalently-sized i8 type, so we can use
+ // that.
+ MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
+ SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
+
+ SDValue WideVec;
+ if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
+ // For a splat, perform a scalar truncate before creating the wider
+ // vector.
+ Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
+ DAG.getConstant(1, DL, Splat.getValueType()));
+ WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
+ } else {
+ SmallVector<SDValue, 8> Ops(Op->op_values());
+ WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
+ SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
+ WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
}
+
+ return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
}
- assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
- unsigned NumDefElts = NumElts - NumUndefElts;
- unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
+ if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
+ if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
+ return Gather;
+ unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
+ : RISCVISD::VMV_V_X_VL;
+ if (!VT.isFloatingPoint())
+ Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
+ Splat =
+ DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
+ return convertFromScalableVector(VT, Splat, DAG, Subtarget);
+ }
- // Don't perform this optimization when optimizing for size, since
- // materializing elements and inserting them tends to cause code bloat.
- if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
- (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
- ((MostCommonCount > DominantValueCountThreshold) ||
- (ValueCounts.size() <= Log2_32(NumDefElts)))) {
- // Start by splatting the most common element.
- SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
+ if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
+ return Res;
- DenseSet<SDValue> Processed{DominantValue};
- MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
- for (const auto &OpIdx : enumerate(Op->ops())) {
- const SDValue &V = OpIdx.value();
- if (V.isUndef() || !Processed.insert(V).second)
- continue;
- if (ValueCounts[V] == 1) {
- Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
- DAG.getConstant(OpIdx.index(), DL, XLenVT));
- } else {
- // Blend in all instances of this value using a VSELECT, using a
- // mask where each bit signals whether that element is the one
- // we're after.
- SmallVector<SDValue> Ops;
- transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
- return DAG.getConstant(V == V1, DL, XLenVT);
- });
- Vec = DAG.getNode(ISD::VSELECT, DL, VT,
- DAG.getBuildVector(SelMaskTy, DL, Ops),
- DAG.getSplatBuildVector(VT, DL, V), Vec);
- }
+ // If we're compiling for an exact VLEN value, we can split our work per
+ // register in the register group.
+ const unsigned MinVLen = Subtarget.getRealMinVLen();
+ const unsigned MaxVLen = Subtarget.getRealMaxVLen();
+ if (MinVLen == MaxVLen && VT.getSizeInBits().getKnownMinValue() > MinVLen) {
+ MVT ElemVT = VT.getVectorElementType();
+ unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
+ MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
+ assert(M1VT == getLMUL1VT(M1VT));
+
+ // The following semantically builds up a fixed length concat_vector
+ // of the component build_vectors. We eagerly lower to scalable and
+ // insert_subvector here to avoid DAG combining it back to a large
+ // build_vector.
+ SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
+ unsigned NumOpElts = M1VT.getVectorMinNumElements();
+ SDValue Vec = DAG.getUNDEF(ContainerVT);
+ for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
+ auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
+ SDValue SubBV =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
+ SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
+ unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
+ DAG.getVectorIdxConstant(InsertIdx, DL));
}
+ return convertFromScalableVector(VT, Vec, DAG, Subtarget);
+ }
- return Vec;
+ // Cap the cost at a value linear to the number of elements in the vector.
+ // The default lowering is to use the stack. The vector store + scalar loads
+ // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
+ // being (at least) linear in LMUL. As a result, using the vslidedown
+ // lowering for every element ends up being VL*LMUL..
+ // TODO: Should we be directly costing the stack alternative? Doing so might
+ // give us a more accurate upper bound.
+ InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
+
+ // TODO: unify with TTI getSlideCost.
+ InstructionCost PerSlideCost = 1;
+ switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
+ default: break;
+ case RISCVII::VLMUL::LMUL_2:
+ PerSlideCost = 2;
+ break;
+ case RISCVII::VLMUL::LMUL_4:
+ PerSlideCost = 4;
+ break;
+ case RISCVII::VLMUL::LMUL_8:
+ PerSlideCost = 8;
+ break;
}
- // For constant vectors, use generic constant pool lowering. Otherwise,
- // we'd have to materialize constants in GPRs just to move them into the
- // vector.
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
- ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ // TODO: Should we be using the build instseq then cost + evaluate scheme
+ // we use for integer constants here?
+ unsigned UndefCount = 0;
+ for (const SDValue &V : Op->ops()) {
+ if (V.isUndef()) {
+ UndefCount++;
+ continue;
+ }
+ if (UndefCount) {
+ LinearBudget -= PerSlideCost;
+ UndefCount = 0;
+ }
+ LinearBudget -= PerSlideCost;
+ }
+ if (UndefCount) {
+ LinearBudget -= PerSlideCost;
+ }
+
+ if (LinearBudget < 0)
return SDValue();
assert((!VT.isFloatingPoint() ||
@@ -3315,13 +3873,24 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
- SDValue Vec = DAG.getUNDEF(ContainerVT);
- unsigned UndefCount = 0;
- for (const SDValue &V : Op->ops()) {
+ SDValue Vec;
+ UndefCount = 0;
+ for (SDValue V : Op->ops()) {
if (V.isUndef()) {
UndefCount++;
continue;
}
+
+ // Start our sequence with a TA splat in the hopes that hardware is able to
+ // recognize there's no dependency on the prior value of our temporary
+ // register.
+ if (!Vec) {
+ Vec = DAG.getSplatVector(VT, DL, V);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ UndefCount = 0;
+ continue;
+ }
+
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
@@ -3330,6 +3899,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
auto OpCode =
VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+ if (!VT.isFloatingPoint())
+ V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
V, Mask, VL);
}
@@ -3354,19 +3925,43 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
if ((LoC >> 31) == HiC)
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
- // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
- // vmv.v.x whose EEW = 32 to lower it.
- if (LoC == HiC && isAllOnesConstant(VL)) {
- MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
- // TODO: if vl <= min(VLMAX), we can also do this. But we could not
- // access the subtarget here now.
- auto InterVec = DAG.getNode(
- RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
- DAG.getRegister(RISCV::X0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
+ // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
+ // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
+ // vlmax vsetvli or vsetivli to change the VL.
+ // FIXME: Support larger constants?
+ // FIXME: Support non-constant VLs by saturating?
+ if (LoC == HiC) {
+ SDValue NewVL;
+ if (isAllOnesConstant(VL) ||
+ (isa<RegisterSDNode>(VL) &&
+ cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
+ NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
+ else if (isa<ConstantSDNode>(VL) &&
+ isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
+ NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
+
+ if (NewVL) {
+ MVT InterVT =
+ MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
+ auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
+ DAG.getUNDEF(InterVT), Lo,
+ DAG.getRegister(RISCV::X0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
+ }
}
}
+ // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
+ if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
+ isa<ConstantSDNode>(Hi.getOperand(1)) &&
+ Hi.getConstantOperandVal(1) == 31)
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
+
+ // If the hi bits of the splat are undefined, then it's fine to just splat Lo
+ // even if it might be sign extended.
+ if (Hi.isUndef())
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
+
// Fall back to a stack store and stride x0 vector load.
return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
Hi, VL);
@@ -3393,12 +3988,8 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
bool HasPassthru = Passthru && !Passthru.isUndef();
if (!HasPassthru && !Passthru)
Passthru = DAG.getUNDEF(VT);
- if (VT.isFloatingPoint()) {
- // If VL is 1, we could use vfmv.s.f.
- if (isOneConstant(VL))
- return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
+ if (VT.isFloatingPoint())
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
- }
MVT XLenVT = Subtarget.getXLenVT();
@@ -3411,12 +4002,6 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
- // If VL is 1 and the scalar value won't benefit from immediate, we could
- // use vmv.s.x.
- if (isOneConstant(VL) &&
- (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
- return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
}
@@ -3431,14 +4016,6 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
}
-static MVT getLMUL1VT(MVT VT) {
- assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
- "Unexpected vector MVT");
- return MVT::getScalableVectorVT(
- VT.getVectorElementType(),
- RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
-}
-
// This function lowers an insert of a scalar operand Scalar into lane
// 0 of the vector regardless of the value of VL. The contents of the
// remaining lanes of the result vector are unspecified. VL is assumed
@@ -3446,24 +4023,34 @@ static MVT getLMUL1VT(MVT VT) {
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- const MVT XLenVT = Subtarget.getXLenVT();
+ assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
+ const MVT XLenVT = Subtarget.getXLenVT();
SDValue Passthru = DAG.getUNDEF(VT);
- if (VT.isFloatingPoint()) {
- // TODO: Use vmv.v.i for appropriate constants
- // Use M1 or smaller to avoid over constraining register allocation
- const MVT M1VT = getLMUL1VT(VT);
- auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
- SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
- DAG.getUNDEF(InnerVT), Scalar, VL);
- if (VT != InnerVT)
- Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- DAG.getUNDEF(VT),
- Result, DAG.getConstant(0, DL, XLenVT));
- return Result;
+
+ if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isNullConstant(Scalar.getOperand(1))) {
+ SDValue ExtractedVal = Scalar.getOperand(0);
+ MVT ExtractedVT = ExtractedVal.getSimpleValueType();
+ MVT ExtractedContainerVT = ExtractedVT;
+ if (ExtractedContainerVT.isFixedLengthVector()) {
+ ExtractedContainerVT = getContainerForFixedLengthVector(
+ DAG, ExtractedContainerVT, Subtarget);
+ ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal,
+ DAG, Subtarget);
+ }
+ if (ExtractedContainerVT.bitsLE(VT))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal,
+ DAG.getConstant(0, DL, XLenVT));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
+ DAG.getConstant(0, DL, XLenVT));
}
+ if (VT.isFloatingPoint())
+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
+ DAG.getUNDEF(VT), Scalar, VL);
+
// Avoid the tricky legalization cases by falling back to using the
// splat code which already handles it gracefully.
if (!Scalar.getValueType().bitsLE(XLenVT))
@@ -3478,24 +4065,8 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
- // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
- // higher would involve overly constraining the register allocator for
- // no purpose.
- if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) {
- if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
- VT.bitsLE(getLMUL1VT(VT)))
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
- }
- // Use M1 or smaller to avoid over constraining register allocation
- const MVT M1VT = getLMUL1VT(VT);
- auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
- SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
- DAG.getUNDEF(InnerVT), Scalar, VL);
- if (VT != InnerVT)
- Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- DAG.getUNDEF(VT),
- Result, DAG.getConstant(0, DL, XLenVT));
- return Result;
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
+ DAG.getUNDEF(VT), Scalar, VL);
}
// Is this a shuffle extracts either the even or odd elements of a vector?
@@ -3509,7 +4080,7 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const RISCVSubtarget &Subtarget) {
// Need to be able to widen the vector.
- if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
+ if (VT.getScalarSizeInBits() >= Subtarget.getELen())
return false;
// Both input must be extracts.
@@ -3553,7 +4124,7 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
int &OddSrc, const RISCVSubtarget &Subtarget) {
// We need to be able to widen elements to the next larger integer type.
- if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
+ if (VT.getScalarSizeInBits() >= Subtarget.getELen())
return false;
int Size = Mask.size();
@@ -3882,6 +4453,8 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
auto OpCode = IsVSlidedown ?
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
+ if (!VT.isFloatingPoint())
+ Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
DAG.getUNDEF(ContainerVT),
convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
@@ -3904,7 +4477,7 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
}
- assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN());
+ assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
// We're working with a vector of the same size as the resulting
// interleaved vector, but with half the number of elements and
@@ -3925,24 +4498,37 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
SDValue Passthru = DAG.getUNDEF(WideContainerVT);
- // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
- // vwaddu.vv
- SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
- EvenV, OddV, Passthru, Mask, VL);
-
- // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
- SDValue AllOnesVec = DAG.getSplatVector(
- VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
- SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,
- AllOnesVec, Passthru, Mask, VL);
-
- // Add the two together so we get
- // (OddV * 0xff...ff) + (OddV + EvenV)
- // = (OddV * 0x100...00) + EvenV
- // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
- // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
- Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved,
- OddsMul, Passthru, Mask, VL);
+ SDValue Interleaved;
+ if (Subtarget.hasStdExtZvbb()) {
+ // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
+ SDValue OffsetVec =
+ DAG.getSplatVector(VecContainerVT, DL,
+ DAG.getConstant(VecVT.getScalarSizeInBits(), DL,
+ Subtarget.getXLenVT()));
+ Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
+ OffsetVec, Passthru, Mask, VL);
+ Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
+ Interleaved, EvenV, Passthru, Mask, VL);
+ } else {
+ // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
+ // vwaddu.vv
+ Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
+ OddV, Passthru, Mask, VL);
+
+ // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
+ SDValue AllOnesVec = DAG.getSplatVector(
+ VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
+ SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
+ OddV, AllOnesVec, Passthru, Mask, VL);
+
+ // Add the two together so we get
+ // (OddV * 0xff...ff) + (OddV + EvenV)
+ // = (OddV * 0x100...00) + EvenV
+ // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
+ // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
+ Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
+ Interleaved, OddsMul, Passthru, Mask, VL);
+ }
// Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
MVT ResultContainerVT = MVT::getVectorVT(
@@ -3961,6 +4547,96 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
return Interleaved;
}
+// If we have a vector of bits that we want to reverse, we can use a vbrev on a
+// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
+static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDLoc DL(SVN);
+ MVT VT = SVN->getSimpleValueType(0);
+ SDValue V = SVN->getOperand(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert(VT.getVectorElementType() == MVT::i1);
+
+ if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),
+ SVN->getMask().size()) ||
+ !SVN->getOperand(1).isUndef())
+ return SDValue();
+
+ unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
+ EVT ViaVT = EVT::getVectorVT(
+ *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
+ EVT ViaBitVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
+
+ // If we don't have zvbb or the larger element type > ELEN, the operation will
+ // be illegal.
+ if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,
+ ViaVT) ||
+ !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
+ return SDValue();
+
+ // If the bit vector doesn't fit exactly into the larger element type, we need
+ // to insert it into the larger vector and then shift up the reversed bits
+ // afterwards to get rid of the gap introduced.
+ if (ViaEltSize > NumElts)
+ V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
+ V, DAG.getVectorIdxConstant(0, DL));
+
+ SDValue Res =
+ DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
+
+ // Shift up the reversed bits if the vector didn't exactly fit into the larger
+ // element type.
+ if (ViaEltSize > NumElts)
+ Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
+ DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
+
+ Res = DAG.getBitcast(ViaBitVT, Res);
+
+ if (ViaEltSize > NumElts)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getVectorIdxConstant(0, DL));
+ return Res;
+}
+
+// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
+// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
+// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
+static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDLoc DL(SVN);
+
+ EVT VT = SVN->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumSubElts, RotateAmt;
+ if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
+ NumElts, NumSubElts, RotateAmt))
+ return SDValue();
+ MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
+ NumElts / NumSubElts);
+
+ // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
+ if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT))
+ return SDValue();
+
+ SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
+
+ SDValue Rotate;
+ // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
+ // so canonicalize to vrev8.
+ if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
+ Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
+ else
+ Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
+ DAG.getConstant(RotateAmt, DL, RotateVT));
+
+ return DAG.getBitcast(VT, Rotate);
+}
+
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
@@ -3971,8 +4647,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
unsigned NumElts = VT.getVectorNumElements();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
- // Promote i1 shuffle to i8 shuffle.
if (VT.getVectorElementType() == MVT::i1) {
+ // Lower to a vror.vi of a larger element type if possible before we promote
+ // i1s to i8s.
+ if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
+ return V;
+ if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
+ return V;
+
+ // Promote i1 shuffle to i8 shuffle.
MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
@@ -4008,8 +4691,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
auto *Ld = cast<LoadSDNode>(V);
Offset *= SVT.getStoreSize();
- SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
- TypeSize::Fixed(Offset), DL);
+ SDValue NewAddr = DAG.getMemBasePlusOffset(
+ Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
// If this is SEW=64 on RV32, use a strided load with a stride of x0.
if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
@@ -4071,6 +4754,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
+ // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
+ // available.
+ if (Subtarget.hasStdExtZvkb())
+ if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
+ return V;
+
// Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
// be undef which can be handled with a single SLIDEDOWN/UP.
int LoSrc, HiSrc;
@@ -4197,6 +4886,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (IsSelect)
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
+ // We might be able to express the shuffle as a bitrotate. But even if we
+ // don't have Zvkb and have to expand, the expanded sequence of approx. 2
+ // shifts and a vor will have a higher throughput than a vrgather.
+ if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
+ return V;
+
if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
// On such a large vector we're unable to use i8 as the index type.
// FIXME: We could promote the index to i16 and use vrgatherei16, but that
@@ -4216,6 +4911,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
}
+ // If the mask allows, we can do all the index computation in 16 bits. This
+ // requires less work and less register pressure at high LMUL, and creates
+ // smaller constants which may be cheaper to materialize.
+ if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
+ (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
+ GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
+ IndexVT = IndexVT.changeVectorElementType(MVT::i16);
+ }
+
MVT IndexContainerVT =
ContainerVT.changeVectorElementType(IndexVT.getScalarType());
@@ -4490,26 +5194,26 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
if (!Subtarget.useConstantPoolForLargeInts())
return Op;
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
return Op;
- // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do
+ // Optimizations below are disabled for opt size. If we're optimizing for
+ // size, use a constant pool.
+ if (DAG.shouldOptForSize())
+ return SDValue();
+
+ // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
// that if it will avoid a constant pool.
// It will require an extra temporary register though.
- if (!DAG.shouldOptForSize()) {
- int64_t LoVal = SignExtend64<32>(Imm);
- int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
- if (LoVal == HiVal) {
- RISCVMatInt::InstSeq SeqLo =
- RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
- if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
- return Op;
- }
- }
+ // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
+ // low and high 32 bits are the same and bit 31 and 63 are set.
+ unsigned ShiftAmt, AddOpc;
+ RISCVMatInt::InstSeq SeqLo =
+ RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
+ if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
+ return Op;
- // Expand to a constant pool using the default expansion code.
return SDValue();
}
@@ -4547,8 +5251,7 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
- auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
- unsigned Check = CNode->getZExtValue();
+ unsigned Check = Op.getConstantOperandVal(1);
unsigned TDCMask = 0;
if (Check & fcSNan)
TDCMask |= RISCV::FPMASK_Signaling_NaN;
@@ -4582,6 +5285,10 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
if (VT.isScalableVector()) {
MVT DstVT = VT0.changeVectorElementTypeToInteger();
auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
+ if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
+ Mask = Op.getOperand(2);
+ VL = Op.getOperand(3);
+ }
SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
VL, Op->getFlags());
if (IsOneBitMask)
@@ -4598,7 +5305,13 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
MVT ContainerVT = getContainerForFixedLengthVector(VT);
MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
-
+ if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
+ Mask = Op.getOperand(2);
+ MVT MaskContainerVT =
+ getContainerForFixedLengthVector(Mask.getSimpleValueType());
+ Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+ VL = Op.getOperand(3);
+ }
Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
@@ -4616,7 +5329,7 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
- SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
+ SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
@@ -4626,10 +5339,11 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
}
- SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0));
- SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV);
- return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT),
- ISD::CondCode::SETNE);
+ SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
+ SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
+ SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
+ ISD::CondCode::SETNE);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
}
// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
@@ -4637,38 +5351,88 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDLoc DL(Op);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
- MVT XLenVT = Subtarget.getXLenVT();
+ if (!VT.isVector()) {
+ MVT XLenVT = Subtarget.getXLenVT();
- // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
- // ensures that when one input is a nan, the other will also be a nan allowing
- // the nan to propagate. If both inputs are nan, this will swap the inputs
- // which is harmless.
- // FIXME: Handle nonans FMF and use isKnownNeverNaN.
- SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
- SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
+ // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
+ // ensures that when one input is a nan, the other will also be a nan
+ // allowing the nan to propagate. If both inputs are nan, this will swap the
+ // inputs which is harmless.
- SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
- SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
+ SDValue NewY = Y;
+ if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
+ SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
+ NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
+ }
+
+ SDValue NewX = X;
+ if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
+ SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
+ NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
+ }
+
+ unsigned Opc =
+ Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
+ return DAG.getNode(Opc, DL, VT, NewX, NewY);
+ }
+
+ // Check no NaNs before converting to fixed vector scalable.
+ bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
+ bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
+ Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
+ }
+
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ SDValue NewY = Y;
+ if (!XIsNeverNan) {
+ SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
+ {X, X, DAG.getCondCode(ISD::SETOEQ),
+ DAG.getUNDEF(ContainerVT), Mask, VL});
+ NewY =
+ DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, XIsNonNan, Y, X, VL);
+ }
+
+ SDValue NewX = X;
+ if (!YIsNeverNan) {
+ SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
+ {Y, Y, DAG.getCondCode(ISD::SETOEQ),
+ DAG.getUNDEF(ContainerVT), Mask, VL});
+ NewX =
+ DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, YIsNonNan, X, Y, VL);
+ }
unsigned Opc =
- Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
- return DAG.getNode(Opc, DL, VT, NewX, NewY);
+ Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL;
+ SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
+ DAG.getUNDEF(ContainerVT), Mask, VL);
+ if (VT.isFixedLengthVector())
+ Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
+ return Res;
}
-/// Get a RISCV target specified VL op for a given SDNode.
+/// Get a RISC-V target specified VL op for a given SDNode.
static unsigned getRISCVVLOp(SDValue Op) {
#define OP_CASE(NODE) \
case ISD::NODE: \
return RISCVISD::NODE##_VL;
+#define VP_CASE(NODE) \
+ case ISD::VP_##NODE: \
+ return RISCVISD::NODE##_VL;
+ // clang-format off
switch (Op.getOpcode()) {
default:
llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
- // clang-format off
OP_CASE(ADD)
OP_CASE(SUB)
OP_CASE(MUL)
@@ -4681,6 +5445,13 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SHL)
OP_CASE(SRA)
OP_CASE(SRL)
+ OP_CASE(ROTL)
+ OP_CASE(ROTR)
+ OP_CASE(BSWAP)
+ OP_CASE(CTTZ)
+ OP_CASE(CTLZ)
+ OP_CASE(CTPOP)
+ OP_CASE(BITREVERSE)
OP_CASE(SADDSAT)
OP_CASE(UADDSAT)
OP_CASE(SSUBSAT)
@@ -4696,47 +5467,113 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SMAX)
OP_CASE(UMIN)
OP_CASE(UMAX)
- OP_CASE(FMINNUM)
- OP_CASE(FMAXNUM)
OP_CASE(STRICT_FADD)
OP_CASE(STRICT_FSUB)
OP_CASE(STRICT_FMUL)
OP_CASE(STRICT_FDIV)
OP_CASE(STRICT_FSQRT)
- // clang-format on
-#undef OP_CASE
+ VP_CASE(ADD) // VP_ADD
+ VP_CASE(SUB) // VP_SUB
+ VP_CASE(MUL) // VP_MUL
+ VP_CASE(SDIV) // VP_SDIV
+ VP_CASE(SREM) // VP_SREM
+ VP_CASE(UDIV) // VP_UDIV
+ VP_CASE(UREM) // VP_UREM
+ VP_CASE(SHL) // VP_SHL
+ VP_CASE(FADD) // VP_FADD
+ VP_CASE(FSUB) // VP_FSUB
+ VP_CASE(FMUL) // VP_FMUL
+ VP_CASE(FDIV) // VP_FDIV
+ VP_CASE(FNEG) // VP_FNEG
+ VP_CASE(FABS) // VP_FABS
+ VP_CASE(SMIN) // VP_SMIN
+ VP_CASE(SMAX) // VP_SMAX
+ VP_CASE(UMIN) // VP_UMIN
+ VP_CASE(UMAX) // VP_UMAX
+ VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
+ VP_CASE(SETCC) // VP_SETCC
+ VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
+ VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
+ VP_CASE(BITREVERSE) // VP_BITREVERSE
+ VP_CASE(BSWAP) // VP_BSWAP
+ VP_CASE(CTLZ) // VP_CTLZ
+ VP_CASE(CTTZ) // VP_CTTZ
+ VP_CASE(CTPOP) // VP_CTPOP
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ return RISCVISD::CTLZ_VL;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ return RISCVISD::CTTZ_VL;
case ISD::FMA:
+ case ISD::VP_FMA:
return RISCVISD::VFMADD_VL;
case ISD::STRICT_FMA:
return RISCVISD::STRICT_VFMADD_VL;
case ISD::AND:
+ case ISD::VP_AND:
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
return RISCVISD::VMAND_VL;
return RISCVISD::AND_VL;
case ISD::OR:
+ case ISD::VP_OR:
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
return RISCVISD::VMOR_VL;
return RISCVISD::OR_VL;
case ISD::XOR:
+ case ISD::VP_XOR:
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
return RISCVISD::VMXOR_VL;
return RISCVISD::XOR_VL;
+ case ISD::VP_SELECT:
+ return RISCVISD::VSELECT_VL;
+ case ISD::VP_MERGE:
+ return RISCVISD::VP_MERGE_VL;
+ case ISD::VP_ASHR:
+ return RISCVISD::SRA_VL;
+ case ISD::VP_LSHR:
+ return RISCVISD::SRL_VL;
+ case ISD::VP_SQRT:
+ return RISCVISD::FSQRT_VL;
+ case ISD::VP_SIGN_EXTEND:
+ return RISCVISD::VSEXT_VL;
+ case ISD::VP_ZERO_EXTEND:
+ return RISCVISD::VZEXT_VL;
+ case ISD::VP_FP_TO_SINT:
+ return RISCVISD::VFCVT_RTZ_X_F_VL;
+ case ISD::VP_FP_TO_UINT:
+ return RISCVISD::VFCVT_RTZ_XU_F_VL;
+ case ISD::FMINNUM:
+ case ISD::VP_FMINNUM:
+ return RISCVISD::VFMIN_VL;
+ case ISD::FMAXNUM:
+ case ISD::VP_FMAXNUM:
+ return RISCVISD::VFMAX_VL;
}
+ // clang-format on
+#undef OP_CASE
+#undef VP_CASE
}
/// Return true if a RISC-V target specified op has a merge operand.
static bool hasMergeOp(unsigned Opcode) {
assert(Opcode > RISCVISD::FIRST_NUMBER &&
- Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
+ Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
- assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
- "adding target specific op should update this function");
- if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::FMAXNUM_VL)
+ static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
+ 125 &&
+ RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
+ ISD::FIRST_TARGET_STRICTFP_OPCODE ==
+ 21 &&
+ "adding target specific op should update this function");
+ if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
return true;
if (Opcode == RISCVISD::FCOPYSIGN_VL)
return true;
if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
return true;
+ if (Opcode == RISCVISD::SETCC_VL)
+ return true;
if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
return true;
return false;
@@ -4745,10 +5582,14 @@ static bool hasMergeOp(unsigned Opcode) {
/// Return true if a RISC-V target specified op has a mask operand.
static bool hasMaskOp(unsigned Opcode) {
assert(Opcode > RISCVISD::FIRST_NUMBER &&
- Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
+ Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
- assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
- "adding target specific op should update this function");
+ static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
+ 125 &&
+ RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
+ ISD::FIRST_TARGET_STRICTFP_OPCODE ==
+ 21 &&
+ "adding target specific op should update this function");
if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
return true;
if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
@@ -4759,6 +5600,112 @@ static bool hasMaskOp(unsigned Opcode) {
return false;
}
+static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
+ SDLoc DL(Op);
+
+ SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
+ SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (!Op.getOperand(j).getValueType().isVector()) {
+ LoOperands[j] = Op.getOperand(j);
+ HiOperands[j] = Op.getOperand(j);
+ continue;
+ }
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitVector(Op.getOperand(j), DL);
+ }
+
+ SDValue LoRes =
+ DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
+ SDValue HiRes =
+ DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
+}
+
+static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
+ assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
+ SDLoc DL(Op);
+
+ SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
+ SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
+ continue;
+ }
+ if (!Op.getOperand(j).getValueType().isVector()) {
+ LoOperands[j] = Op.getOperand(j);
+ HiOperands[j] = Op.getOperand(j);
+ continue;
+ }
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitVector(Op.getOperand(j), DL);
+ }
+
+ SDValue LoRes =
+ DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
+ SDValue HiRes =
+ DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
+}
+
+static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+
+ auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
+ auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
+ auto [EVLLo, EVLHi] =
+ DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
+
+ SDValue ResLo =
+ DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
+ {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
+ {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
+}
+
+static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
+
+ assert(Op->isStrictFPOpcode());
+
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
+
+ SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
+ SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
+
+ SDLoc DL(Op);
+
+ SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
+ SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (!Op.getOperand(j).getValueType().isVector()) {
+ LoOperands[j] = Op.getOperand(j);
+ HiOperands[j] = Op.getOperand(j);
+ continue;
+ }
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitVector(Op.getOperand(j), DL);
+ }
+
+ SDValue LoRes =
+ DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
+ HiOperands[0] = LoRes.getValue(1);
+ SDValue HiRes =
+ DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
+
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
+ LoRes.getValue(0), HiRes.getValue(0));
+ return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -4796,6 +5743,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerShiftRightParts(Op, DAG, false);
case ISD::ROTL:
case ISD::ROTR:
+ if (Op.getValueType().isFixedLengthVector()) {
+ assert(Subtarget.hasStdExtZvkb());
+ return lowerToScalableOp(Op, DAG);
+ }
assert(Subtarget.hasVendorXTHeadBb() &&
!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
"Unexpected custom legalization");
@@ -4889,6 +5840,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return LowerIS_FPCLASS(Op, DAG);
case ISD::BITREVERSE: {
MVT VT = Op.getSimpleValueType();
+ if (VT.isFixedLengthVector()) {
+ assert(Subtarget.hasStdExtZvbb());
+ return lowerToScalableOp(Op, DAG);
+ }
SDLoc DL(Op);
assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
@@ -4931,6 +5886,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (VT.isFixedLengthVector())
ContainerVT = getContainerForFixedLengthVector(VT);
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Scalar, VL);
if (VT.isFixedLengthVector())
@@ -4938,9 +5894,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return V;
}
case ISD::VSCALE: {
+ MVT XLenVT = Subtarget.getXLenVT();
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
- SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
+ SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
// We define our scalable vector types for lmul=1 to use a 64 bit known
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
// vscale as VLENB / 8.
@@ -4953,22 +5910,23 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (isPowerOf2_64(Val)) {
uint64_t Log2 = Log2_64(Val);
if (Log2 < 3)
- return DAG.getNode(ISD::SRL, DL, VT, VLENB,
- DAG.getConstant(3 - Log2, DL, VT));
- if (Log2 > 3)
- return DAG.getNode(ISD::SHL, DL, VT, VLENB,
- DAG.getConstant(Log2 - 3, DL, VT));
- return VLENB;
- }
- // If the multiplier is a multiple of 8, scale it down to avoid needing
- // to shift the VLENB value.
- if ((Val % 8) == 0)
- return DAG.getNode(ISD::MUL, DL, VT, VLENB,
- DAG.getConstant(Val / 8, DL, VT));
-
- SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
- DAG.getConstant(3, DL, VT));
- return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
+ Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
+ DAG.getConstant(3 - Log2, DL, VT));
+ else if (Log2 > 3)
+ Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
+ DAG.getConstant(Log2 - 3, DL, XLenVT));
+ } else if ((Val % 8) == 0) {
+ // If the multiplier is a multiple of 8, scale it down to avoid needing
+ // to shift the VLENB value.
+ Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
+ DAG.getConstant(Val / 8, DL, XLenVT));
+ } else {
+ SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
+ DAG.getConstant(3, DL, XLenVT));
+ Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
+ DAG.getConstant(Val, DL, XLenVT));
+ }
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
}
case ISD::FPOWI: {
// Custom promote f16 powi with illegal i32 integer type on RV64. Once
@@ -4986,6 +5944,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::FMAXIMUM:
case ISD::FMINIMUM:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
case ISD::FP_EXTEND: {
SDLoc DL(Op);
@@ -5026,10 +5988,42 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
return lowerStrictFPExtendOrRoundLike(Op, DAG);
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ if (Op.getValueType().isVector() &&
+ Op.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op.getValueType() == MVT::nxv32f16)
+ return SplitVectorOp(Op, DAG);
+ // int -> f32
+ SDLoc DL(Op);
+ MVT NVT =
+ MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
+ SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
+ // f32 -> f16
+ return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ }
+ [[fallthrough]];
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (SDValue Op1 = Op.getOperand(0);
+ Op1.getValueType().isVector() &&
+ Op1.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op1.getValueType() == MVT::nxv32f16)
+ return SplitVectorOp(Op, DAG);
+ // f16 -> f32
+ SDLoc DL(Op);
+ MVT NVT = MVT::getVectorVT(MVT::f32,
+ Op1.getValueType().getVectorElementCount());
+ SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
+ // f32 -> int
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
+ }
+ [[fallthrough]];
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_SINT_TO_FP:
@@ -5180,7 +6174,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
SDValue Res =
makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit() && !RV64LegalI32)
return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
return DAG.getBitcast(MVT::i32, Res);
}
@@ -5209,7 +6203,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
SDValue Res =
makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit() && !RV64LegalI32)
return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
return DAG.getBitcast(MVT::i32, Res);
}
@@ -5236,6 +6230,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FROUND:
case ISD::FROUNDEVEN:
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ return lowerVectorXRINT(Op, DAG, Subtarget);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
@@ -5262,6 +6259,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_REDUCE_SEQ_FADD:
case ISD::VP_REDUCE_FMIN:
case ISD::VP_REDUCE_FMAX:
+ if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorReductionOp(Op, DAG);
return lowerVPREDUCE(Op, DAG);
case ISD::VP_REDUCE_AND:
case ISD::VP_REDUCE_OR:
@@ -5291,6 +6292,21 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::SPLAT_VECTOR:
+ if (Op.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op.getValueType() == MVT::nxv32f16)
+ return SplitVectorOp(Op, DAG);
+ SDLoc DL(Op);
+ SDValue NewScalar =
+ DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
+ SDValue NewSplat = DAG.getNode(
+ ISD::SPLAT_VECTOR, DL,
+ MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
+ NewScalar);
+ return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ }
if (Op.getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskSplat(Op, DAG);
return SDValue();
@@ -5387,6 +6403,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
}
+ if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
+
return lowerFixedLengthVectorSetccToRVV(Op, DAG);
}
case ISD::ADD:
@@ -5401,6 +6422,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::SREM:
case ISD::UDIV:
case ISD::UREM:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
return lowerToScalableOp(Op, DAG);
case ISD::SHL:
case ISD::SRA:
@@ -5411,10 +6434,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
return SDValue();
- case ISD::SADDSAT:
- case ISD::UADDSAT:
- case ISD::SSUBSAT:
- case ISD::USUBSAT:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -5423,23 +6442,40 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FABS:
case ISD::FSQRT:
case ISD::FMA:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
+ [[fallthrough]];
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
return lowerToScalableOp(Op, DAG);
case ISD::ABS:
case ISD::VP_ABS:
return lowerABS(Op, DAG);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
+ if (Subtarget.hasStdExtZvbb())
+ return lowerToScalableOp(Op, DAG);
+ assert(Op.getOpcode() != ISD::CTTZ);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
@@ -5447,6 +6483,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::STRICT_FDIV:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitStrictFPVectorOp(Op, DAG);
return lowerToScalableOp(Op, DAG);
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
@@ -5472,106 +6512,115 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::EH_DWARF_CFA:
return lowerEH_DWARF_CFA(Op, DAG);
case ISD::VP_SELECT:
- return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
case ISD::VP_MERGE:
- return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
case ISD::VP_ADD:
- return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
case ISD::VP_SUB:
- return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
case ISD::VP_MUL:
- return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
case ISD::VP_SDIV:
- return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
case ISD::VP_UDIV:
- return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
case ISD::VP_SREM:
- return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
case ISD::VP_UREM:
- return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
case ISD::VP_AND:
- return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
case ISD::VP_OR:
- return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
case ISD::VP_XOR:
- return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
- case ISD::VP_ASHR:
- return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true);
- case ISD::VP_LSHR:
- return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true);
- case ISD::VP_SHL:
- return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true);
+ return lowerLogicVPOp(Op, DAG);
case ISD::VP_FADD:
- return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
case ISD::VP_FSUB:
- return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
case ISD::VP_FMUL:
- return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
case ISD::VP_FDIV:
- return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
case ISD::VP_FNEG:
- return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
case ISD::VP_FABS:
- return lowerVPOp(Op, DAG, RISCVISD::FABS_VL);
case ISD::VP_SQRT:
- return lowerVPOp(Op, DAG, RISCVISD::FSQRT_VL);
case ISD::VP_FMA:
- return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
case ISD::VP_FMINNUM:
- return lowerVPOp(Op, DAG, RISCVISD::FMINNUM_VL, /*HasMergeOp*/ true);
case ISD::VP_FMAXNUM:
- return lowerVPOp(Op, DAG, RISCVISD::FMAXNUM_VL, /*HasMergeOp*/ true);
case ISD::VP_FCOPYSIGN:
- return lowerVPOp(Op, DAG, RISCVISD::FCOPYSIGN_VL, /*HasMergeOp*/ true);
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVPOp(Op, DAG);
+ [[fallthrough]];
+ case ISD::VP_ASHR:
+ case ISD::VP_LSHR:
+ case ISD::VP_SHL:
+ return lowerVPOp(Op, DAG);
+ case ISD::VP_IS_FPCLASS:
+ return LowerIS_FPCLASS(Op, DAG);
case ISD::VP_SIGN_EXTEND:
case ISD::VP_ZERO_EXTEND:
if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
return lowerVPExtMaskOp(Op, DAG);
- return lowerVPOp(Op, DAG,
- Op.getOpcode() == ISD::VP_SIGN_EXTEND
- ? RISCVISD::VSEXT_VL
- : RISCVISD::VZEXT_VL);
+ return lowerVPOp(Op, DAG);
case ISD::VP_TRUNCATE:
return lowerVectorTruncLike(Op, DAG);
case ISD::VP_FP_EXTEND:
case ISD::VP_FP_ROUND:
return lowerVectorFPExtendOrRoundLike(Op, DAG);
- case ISD::VP_FP_TO_SINT:
- return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_X_F_VL);
- case ISD::VP_FP_TO_UINT:
- return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_XU_F_VL);
case ISD::VP_SINT_TO_FP:
- return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
case ISD::VP_UINT_TO_FP:
- return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
+ if (Op.getValueType().isVector() &&
+ Op.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op.getValueType() == MVT::nxv32f16)
+ return SplitVPOp(Op, DAG);
+ // int -> f32
+ SDLoc DL(Op);
+ MVT NVT =
+ MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
+ auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
+ // f32 -> f16
+ return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ }
+ [[fallthrough]];
+ case ISD::VP_FP_TO_SINT:
+ case ISD::VP_FP_TO_UINT:
+ if (SDValue Op1 = Op.getOperand(0);
+ Op1.getValueType().isVector() &&
+ Op1.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op1.getValueType() == MVT::nxv32f16)
+ return SplitVPOp(Op, DAG);
+ // f16 -> f32
+ SDLoc DL(Op);
+ MVT NVT = MVT::getVectorVT(MVT::f32,
+ Op1.getValueType().getVectorElementCount());
+ SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
+ // f32 -> int
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
+ {WidenVec, Op.getOperand(1), Op.getOperand(2)});
+ }
+ return lowerVPFPIntConvOp(Op, DAG);
case ISD::VP_SETCC:
+ if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVPOp(Op, DAG);
if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
return lowerVPSetCCMaskOp(Op, DAG);
- return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true);
+ [[fallthrough]];
case ISD::VP_SMIN:
- return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true);
case ISD::VP_SMAX:
- return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true);
case ISD::VP_UMIN:
- return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
case ISD::VP_UMAX:
- return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
case ISD::VP_BITREVERSE:
- return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true);
case ISD::VP_BSWAP:
- return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
case ISD::VP_CTLZ:
case ISD::VP_CTLZ_ZERO_UNDEF:
if (Subtarget.hasStdExtZvbb())
- return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VP_CTTZ:
case ISD::VP_CTTZ_ZERO_UNDEF:
if (Subtarget.hasStdExtZvbb())
- return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VP_CTPOP:
- return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
return lowerVPStridedLoad(Op, DAG);
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
@@ -5583,7 +6632,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_FROUND:
case ISD::VP_FROUNDEVEN:
case ISD::VP_FROUNDTOZERO:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVPOp(Op, DAG);
return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
+ case ISD::EXPERIMENTAL_VP_REVERSE:
+ return lowerVPReverseExperimental(Op, DAG);
}
}
@@ -5630,15 +6685,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
// Use PC-relative addressing to access the GOT for this symbol, then load
// the address from the GOT. This generates the pattern (PseudoLGA sym),
// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MemOp = MF.getMachineMemOperand(
MachinePointerInfo::getGOT(MF),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
- SDValue Load =
- DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, DAG.getVTList(Ty, MVT::Other),
- {DAG.getEntryNode(), Addr}, Ty, MemOp);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
return Load;
}
@@ -5660,16 +6715,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
// not be within 2GiB of PC, so use GOT-indirect addressing to access the
// symbol. This generates the pattern (PseudoLGA sym), which expands to
// (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MemOp = MF.getMachineMemOperand(
MachinePointerInfo::getGOT(MF),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
- SDValue Load =
- DAG.getMemIntrinsicNode(RISCVISD::LGA, DL,
- DAG.getVTList(Ty, MVT::Other),
- {DAG.getEntryNode(), Addr}, Ty, MemOp);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
return Load;
}
@@ -5724,15 +6778,15 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
// the pattern (PseudoLA_TLS_IE sym), which expands to
// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MemOp = MF.getMachineMemOperand(
MachinePointerInfo::getGOT(MF),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
- SDValue Load = DAG.getMemIntrinsicNode(
- RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other),
- {DAG.getEntryNode(), Addr}, Ty, MemOp);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
// Add the thread pointer.
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
@@ -5768,7 +6822,8 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
- SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr);
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
// Prepare argument list to generate call.
ArgListTy Args;
@@ -5904,56 +6959,6 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
-/// check for equality with 0. This function emits nodes that convert the
-/// seteq/setne into something that can be compared with 0.
-/// Based on RISCVDAGToDAGISel::selectSETCC but modified to produce
-/// target-independent SelectionDAG nodes rather than machine nodes.
-static SDValue selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
- SelectionDAG &DAG) {
- assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
- "Unexpected condition code!");
-
- // We're looking for a setcc.
- if (N->getOpcode() != ISD::SETCC)
- return SDValue();
-
- // Must be an equality comparison.
- ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
- if (CCVal != ExpectedCCVal)
- return SDValue();
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
- if (!LHS.getValueType().isScalarInteger())
- return SDValue();
-
- // If the RHS side is 0, we don't need any extra instructions, return the LHS.
- if (isNullConstant(RHS))
- return LHS;
-
- SDLoc DL(N);
-
- if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
- int64_t CVal = C->getSExtValue();
- // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
- // non-zero otherwise.
- if (CVal == -2048)
- return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS,
- DAG.getConstant(CVal, DL, N->getValueType(0)));
- // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
- // LHS is equal to the RHS and non-zero otherwise.
- if (isInt<12>(CVal) || CVal == 2048)
- return DAG.getNode(ISD::ADD, DL, N->getValueType(0), LHS,
- DAG.getConstant(-CVal, DL, N->getValueType(0)));
- }
-
- // If nothing else we can XOR the LHS and RHS to produce zero if they are
- // equal and a non-zero value if they aren't.
- return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, RHS);
-}
-
// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
@@ -6041,35 +7046,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// sequence or RISCVISD::SELECT_CC node (branch-based select).
if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
VT.isScalarInteger()) {
- if (SDValue NewCondV = selectSETCC(CondV, ISD::SETNE, DAG)) {
- // (select (riscv_setne c), t, 0) -> (czero_eqz t, c)
- if (isNullConstant(FalseV))
- return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV);
- // (select (riscv_setne c), 0, f) -> (czero_nez f, c)
- if (isNullConstant(TrueV))
- return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV);
- // (select (riscv_setne c), t, f) -> (or (czero_eqz t, c), (czero_nez f,
- // c)
- return DAG.getNode(
- ISD::OR, DL, VT,
- DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV),
- DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV));
- }
- if (SDValue NewCondV = selectSETCC(CondV, ISD::SETEQ, DAG)) {
- // (select (riscv_seteq c), t, 0) -> (czero_nez t, c)
- if (isNullConstant(FalseV))
- return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV);
- // (select (riscv_seteq c), 0, f) -> (czero_eqz f, c)
- if (isNullConstant(TrueV))
- return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV);
- // (select (riscv_seteq c), t, f) -> (or (czero_eqz f, c), (czero_nez t,
- // c)
- return DAG.getNode(
- ISD::OR, DL, VT,
- DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV),
- DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV));
- }
-
// (select c, t, 0) -> (czero_eqz t, c)
if (isNullConstant(FalseV))
return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
@@ -6090,10 +7066,17 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
ISD::OR, DL, VT, FalseV,
DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
+ // Try some other optimizations before falling back to generic lowering.
+ if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
+ return V;
+
// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
- return DAG.getNode(ISD::OR, DL, VT,
- DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
- DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
+ // Unless we have the short forward branch optimization.
+ if (!Subtarget.hasShortForwardBranchOpt())
+ return DAG.getNode(
+ ISD::OR, DL, VT,
+ DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
}
if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
@@ -6297,7 +7280,7 @@ SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = Lo << Shamt
- // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
+ // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
// else:
// Lo = 0
// Hi = Lo << (Shamt-XLEN)
@@ -6336,7 +7319,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
// SRA expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
// Hi = Hi >>s Shamt
// else:
// Lo = Hi >>s (Shamt-XLEN);
@@ -6344,7 +7327,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
//
// SRL expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
// Hi = Hi >>u Shamt
// else:
// Lo = Hi >>u (Shamt-XLEN);
@@ -6394,12 +7377,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
}
- MVT XLenVT = Subtarget.getXLenVT();
- assert(SplatVal.getValueType() == XLenVT &&
- "Unexpected type for i1 splat value");
MVT InterVT = VT.changeVectorElementType(MVT::i8);
- SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
- DAG.getConstant(1, DL, XLenVT));
+ SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
+ DAG.getConstant(1, DL, SplatVal.getValueType()));
SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
SDValue Zero = DAG.getConstant(0, DL, InterVT);
return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
@@ -6420,37 +7400,19 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
- if (VecVT.isFixedLengthVector()) {
- MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
- SDLoc DL(Op);
- auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
+ MVT ContainerVT = VecVT;
+ if (VecVT.isFixedLengthVector())
+ ContainerVT = getContainerForFixedLengthVector(VecVT);
- SDValue Res =
- splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
- return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
- }
+ auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
- if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
- int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
- int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
- // If Hi constant is all the same sign bit as Lo, lower this as a custom
- // node in order to try and match RVV vector/scalar instructions.
- if ((LoC >> 31) == HiC)
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
- Lo, DAG.getRegister(RISCV::X0, MVT::i32));
- }
+ SDValue Res =
+ splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
- // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
- if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
- isa<ConstantSDNode>(Hi.getOperand(1)) &&
- Hi.getConstantOperandVal(1) == 31)
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
- DAG.getRegister(RISCV::X0, MVT::i32));
+ if (VecVT.isFixedLengthVector())
+ Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
- // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
- DAG.getUNDEF(VecVT), Lo, Hi,
- DAG.getRegister(RISCV::X0, MVT::i32));
+ return Res;
}
// Custom-lower extensions from mask vectors by using a vselect either with 1
@@ -6754,6 +7716,32 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
return Result;
}
+// Given a scalable vector type and an index into it, returns the type for the
+// smallest subvector that the index fits in. This can be used to reduce LMUL
+// for operations like vslidedown.
+//
+// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
+static std::optional<MVT>
+getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(VecVT.isScalableVector());
+ const unsigned EltSize = VecVT.getScalarSizeInBits();
+ const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
+ const unsigned MinVLMAX = VectorBitsMin / EltSize;
+ MVT SmallerVT;
+ if (MaxIdx < MinVLMAX)
+ SmallerVT = getLMUL1VT(VecVT);
+ else if (MaxIdx < MinVLMAX * 2)
+ SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
+ else if (MaxIdx < MinVLMAX * 4)
+ SmallerVT = getLMUL1VT(VecVT)
+ .getDoubleNumVectorElementsVT()
+ .getDoubleNumVectorElementsVT();
+ if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
+ return std::nullopt;
+ return SmallerVT;
+}
+
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
// first position of a vector, and that vector is slid up to the insert index.
// By limiting the active vector length to index+1 and merging with the
@@ -6784,6 +7772,43 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
+ // If we know the index we're going to insert at, we can shrink Vec so that
+ // we're performing the scalar inserts and slideup on a smaller LMUL.
+ MVT OrigContainerVT = ContainerVT;
+ SDValue OrigVec = Vec;
+ SDValue AlignedIdx;
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
+ const unsigned OrigIdx = IdxC->getZExtValue();
+ // Do we know an upper bound on LMUL?
+ if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
+ DL, DAG, Subtarget)) {
+ ContainerVT = *ShrunkVT;
+ AlignedIdx = DAG.getVectorIdxConstant(0, DL);
+ }
+
+ // If we're compiling for an exact VLEN value, we can always perform
+ // the insert in m1 as we can determine the register corresponding to
+ // the index in the register group.
+ const unsigned MinVLen = Subtarget.getRealMinVLen();
+ const unsigned MaxVLen = Subtarget.getRealMaxVLen();
+ const MVT M1VT = getLMUL1VT(ContainerVT);
+ if (MinVLen == MaxVLen && ContainerVT.bitsGT(M1VT)) {
+ EVT ElemVT = VecVT.getVectorElementType();
+ unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
+ unsigned RemIdx = OrigIdx % ElemsPerVReg;
+ unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
+ unsigned ExtractIdx =
+ SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
+ AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
+ Idx = DAG.getVectorIdxConstant(RemIdx, DL);
+ ContainerVT = M1VT;
+ }
+
+ if (AlignedIdx)
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
+ AlignedIdx);
+ }
+
MVT XLenVT = Subtarget.getXLenVT();
bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
@@ -6807,7 +7832,13 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
unsigned Opc =
VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
if (isNullConstant(Idx)) {
+ if (!VecVT.isFloatingPoint())
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
+
+ if (AlignedIdx)
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ Vec, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return Vec;
return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
@@ -6840,6 +7871,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
// Bitcast back to the right container type.
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
+ if (AlignedIdx)
+ ValInVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ ValInVec, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return ValInVec;
return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
@@ -6870,6 +7905,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Policy = RISCVII::TAIL_AGNOSTIC;
SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
Idx, Mask, InsertVL, Policy);
+
+ if (AlignedIdx)
+ Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ Slideup, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return Slideup;
return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
@@ -6899,8 +7938,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
SDValue Vfirst =
DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
- return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT),
- ISD::SETEQ);
+ SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
+ DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
+ return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
}
if (VecVT.isFixedLengthVector()) {
unsigned NumElts = VecVT.getVectorNumElements();
@@ -6909,7 +7949,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
unsigned WidenVecLen;
SDValue ExtractElementIdx;
SDValue ExtractBitIdx;
- unsigned MaxEEW = Subtarget.getELEN();
+ unsigned MaxEEW = Subtarget.getELen();
MVT LargestEltVT = MVT::getIntegerVT(
std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
if (NumElts <= LargestEltVT.getSizeInBits()) {
@@ -6938,8 +7978,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
// Extract the bit from GPR.
SDValue ShiftRight =
DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
- return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
- DAG.getConstant(1, DL, XLenVT));
+ SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
+ DAG.getConstant(1, DL, XLenVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
}
}
// Otherwise, promote to an i8 vector and extract from that.
@@ -6955,6 +7996,61 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
+ // If we're compiling for an exact VLEN value and we have a known
+ // constant index, we can always perform the extract in m1 (or
+ // smaller) as we can determine the register corresponding to
+ // the index in the register group.
+ const unsigned MinVLen = Subtarget.getRealMinVLen();
+ const unsigned MaxVLen = Subtarget.getRealMaxVLen();
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
+ IdxC && MinVLen == MaxVLen &&
+ VecVT.getSizeInBits().getKnownMinValue() > MinVLen) {
+ MVT M1VT = getLMUL1VT(ContainerVT);
+ unsigned OrigIdx = IdxC->getZExtValue();
+ EVT ElemVT = VecVT.getVectorElementType();
+ unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
+ unsigned RemIdx = OrigIdx % ElemsPerVReg;
+ unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
+ unsigned ExtractIdx =
+ SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
+ DAG.getVectorIdxConstant(ExtractIdx, DL));
+ Idx = DAG.getVectorIdxConstant(RemIdx, DL);
+ ContainerVT = M1VT;
+ }
+
+ // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
+ // contains our index.
+ std::optional<uint64_t> MaxIdx;
+ if (VecVT.isFixedLengthVector())
+ MaxIdx = VecVT.getVectorNumElements() - 1;
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
+ MaxIdx = IdxC->getZExtValue();
+ if (MaxIdx) {
+ if (auto SmallerVT =
+ getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
+ ContainerVT = *SmallerVT;
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
+ DAG.getConstant(0, DL, XLenVT));
+ }
+ }
+
+ // If after narrowing, the required slide is still greater than LMUL2,
+ // fallback to generic expansion and go through the stack. This is done
+ // for a subtle reason: extracting *all* elements out of a vector is
+ // widely expected to be linear in vector size, but because vslidedown
+ // is linear in LMUL, performing N extracts using vslidedown becomes
+ // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
+ // seems to have the same problem (the store is linear in LMUL), but the
+ // generic expansion *memoizes* the store, and thus for many extracts of
+ // the same vector we end up with one store and a bunch of loads.
+ // TODO: We don't have the same code for insert_vector_elt because we
+ // have BUILD_VECTOR and handle the degenerate case there. Should we
+ // consider adding an inverse BUILD_VECTOR node?
+ MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
+ if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
+ return SDValue();
+
// If the index is 0, the vector is already in the right position.
if (!isNullConstant(Idx)) {
// Use a VL of 1 to avoid processing more elements than we need.
@@ -7062,16 +8158,8 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
// Optimize for constant AVL
if (isa<ConstantSDNode>(AVL)) {
- unsigned EltSize = VT.getScalarSizeInBits();
- unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
-
- unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
- unsigned MaxVLMAX =
- RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
-
- unsigned VectorBitsMin = Subtarget.getRealMinVLen();
- unsigned MinVLMAX =
- RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
+ const auto [MinVLMAX, MaxVLMAX] =
+ RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget);
uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
if (AVLInt <= MinVLMAX) {
@@ -7182,7 +8270,7 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
// Determine the VF that corresponds to LMUL 1 for ElementWidth.
unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
// We don't support VF==1 with ELEN==32.
- unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+ unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen();
unsigned VF = N->getConstantOperandVal(2);
assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
@@ -7202,7 +8290,39 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+ SDValue Res =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
+}
+
+static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG,
+ SmallVector<SDValue> &Ops) {
+ SDLoc DL(Op);
+
+ const RISCVSubtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+ for (const SDValue &V : Op->op_values()) {
+ EVT ValType = V.getValueType();
+ if (ValType.isScalableVector() && ValType.isFloatingPoint()) {
+ MVT InterimIVT =
+ MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
+ ValType.getVectorElementCount());
+ Ops.push_back(DAG.getBitcast(InterimIVT, V));
+ } else if (ValType.isFixedLengthVector()) {
+ MVT OpContainerVT = getContainerForFixedLengthVector(
+ DAG, V.getSimpleValueType(), Subtarget);
+ Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
+ } else
+ Ops.push_back(V);
+ }
+}
+
+// LMUL * VLEN should be greater than or equal to EGS * SEW
+static inline bool isValidEGW(int EGS, EVT VT,
+ const RISCVSubtarget &Subtarget) {
+ return (Subtarget.getRealMinVLen() *
+ VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
+ EGS * VT.getScalarSizeInBits();
}
SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -7238,12 +8358,30 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
}
+ if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+ SDValue NewOp =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+ }
+
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
case Intrinsic::riscv_sm4ks:
case Intrinsic::riscv_sm4ed: {
unsigned Opc =
IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
+
+ if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+ SDValue Res =
+ DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+ }
+
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
Op.getOperand(3));
}
@@ -7254,20 +8392,43 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
case Intrinsic::riscv_clmul:
+ if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+ SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+ }
return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2));
case Intrinsic::riscv_clmulh:
- return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1),
- Op.getOperand(2));
- case Intrinsic::riscv_clmulr:
- return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1),
- Op.getOperand(2));
+ case Intrinsic::riscv_clmulr: {
+ unsigned Opc =
+ IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
+ if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+ NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
+ DAG.getConstant(32, DL, MVT::i64));
+ NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
+ DAG.getConstant(32, DL, MVT::i64));
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
+ DAG.getConstant(32, DL, MVT::i64));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+ }
+
+ return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
+ }
case Intrinsic::experimental_get_vector_length:
return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
- case Intrinsic::riscv_vmv_x_s:
- assert(Op.getValueType() == XLenVT && "Unexpected VT!");
- return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
- Op.getOperand(1));
+ case Intrinsic::riscv_vmv_x_s: {
+ SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
+ return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
+ }
case Intrinsic::riscv_vfmv_f_s:
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
@@ -7325,6 +8486,86 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
Vec, VL);
}
+ // EGS * EEW >= 128 bits
+ case Intrinsic::riscv_vaesdf_vv:
+ case Intrinsic::riscv_vaesdf_vs:
+ case Intrinsic::riscv_vaesdm_vv:
+ case Intrinsic::riscv_vaesdm_vs:
+ case Intrinsic::riscv_vaesef_vv:
+ case Intrinsic::riscv_vaesef_vs:
+ case Intrinsic::riscv_vaesem_vv:
+ case Intrinsic::riscv_vaesem_vs:
+ case Intrinsic::riscv_vaeskf1:
+ case Intrinsic::riscv_vaeskf2:
+ case Intrinsic::riscv_vaesz_vs:
+ case Intrinsic::riscv_vsm4k:
+ case Intrinsic::riscv_vsm4r_vv:
+ case Intrinsic::riscv_vsm4r_vs: {
+ if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
+ !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
+ !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
+ report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
+ return Op;
+ }
+ // EGS * EEW >= 256 bits
+ case Intrinsic::riscv_vsm3c:
+ case Intrinsic::riscv_vsm3me: {
+ if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
+ !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
+ report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
+ return Op;
+ }
+ // zvknha(SEW=32)/zvknhb(SEW=[32|64])
+ case Intrinsic::riscv_vsha2ch:
+ case Intrinsic::riscv_vsha2cl:
+ case Intrinsic::riscv_vsha2ms: {
+ if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
+ !Subtarget.hasStdExtZvknhb())
+ report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
+ if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
+ !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
+ !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
+ report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
+ return Op;
+ }
+ case Intrinsic::riscv_sf_vc_v_x:
+ case Intrinsic::riscv_sf_vc_v_i:
+ case Intrinsic::riscv_sf_vc_v_xv:
+ case Intrinsic::riscv_sf_vc_v_iv:
+ case Intrinsic::riscv_sf_vc_v_vv:
+ case Intrinsic::riscv_sf_vc_v_fv:
+ case Intrinsic::riscv_sf_vc_v_xvv:
+ case Intrinsic::riscv_sf_vc_v_ivv:
+ case Intrinsic::riscv_sf_vc_v_vvv:
+ case Intrinsic::riscv_sf_vc_v_fvv:
+ case Intrinsic::riscv_sf_vc_v_xvw:
+ case Intrinsic::riscv_sf_vc_v_ivw:
+ case Intrinsic::riscv_sf_vc_v_vvw:
+ case Intrinsic::riscv_sf_vc_v_fvw: {
+ MVT VT = Op.getSimpleValueType();
+
+ SmallVector<SDValue> Ops;
+ getVCIXOperands(Op, DAG, Ops);
+
+ MVT RetVT = VT;
+ if (VT.isFixedLengthVector())
+ RetVT = getContainerForFixedLengthVector(VT);
+ else if (VT.isFloatingPoint())
+ RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
+ VT.getVectorElementCount());
+
+ SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops);
+
+ if (VT.isFixedLengthVector())
+ NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
+ else if (VT.isFloatingPoint())
+ NewNode = DAG.getBitcast(VT, NewNode);
+
+ if (Op == NewNode)
+ break;
+
+ return NewNode;
+ }
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -7425,7 +8666,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
MVT VT = Op->getSimpleValueType(0);
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
+ SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
+ Subtarget);
SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
auto *Load = cast<MemIntrinsicSDNode>(Op);
SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
@@ -7445,6 +8687,49 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Results.push_back(Result.getValue(NF));
return DAG.getMergeValues(Results, DL);
}
+ case Intrinsic::riscv_sf_vc_v_x_se:
+ case Intrinsic::riscv_sf_vc_v_i_se:
+ case Intrinsic::riscv_sf_vc_v_xv_se:
+ case Intrinsic::riscv_sf_vc_v_iv_se:
+ case Intrinsic::riscv_sf_vc_v_vv_se:
+ case Intrinsic::riscv_sf_vc_v_fv_se:
+ case Intrinsic::riscv_sf_vc_v_xvv_se:
+ case Intrinsic::riscv_sf_vc_v_ivv_se:
+ case Intrinsic::riscv_sf_vc_v_vvv_se:
+ case Intrinsic::riscv_sf_vc_v_fvv_se:
+ case Intrinsic::riscv_sf_vc_v_xvw_se:
+ case Intrinsic::riscv_sf_vc_v_ivw_se:
+ case Intrinsic::riscv_sf_vc_v_vvw_se:
+ case Intrinsic::riscv_sf_vc_v_fvw_se: {
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ SmallVector<SDValue> Ops;
+ getVCIXOperands(Op, DAG, Ops);
+
+ MVT RetVT = VT;
+ if (VT.isFixedLengthVector())
+ RetVT = getContainerForFixedLengthVector(VT);
+ else if (VT.isFloatingPoint())
+ RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()),
+ RetVT.getVectorElementCount());
+
+ SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
+ SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
+
+ if (VT.isFixedLengthVector()) {
+ SDValue FixedVector =
+ convertFromScalableVector(VT, NewNode, DAG, Subtarget);
+ NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL);
+ } else if (VT.isFloatingPoint()) {
+ SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0));
+ NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL);
+ }
+
+ if (Op == NewNode)
+ break;
+
+ return NewNode;
+ }
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -7517,7 +8802,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
MVT VT = Op->getOperand(2).getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
+ SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
+ Subtarget);
SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
SDValue Ptr = Op->getOperand(NF + 2);
@@ -7532,6 +8818,73 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
}
+ case Intrinsic::riscv_sf_vc_x_se_e8mf8:
+ case Intrinsic::riscv_sf_vc_x_se_e8mf4:
+ case Intrinsic::riscv_sf_vc_x_se_e8mf2:
+ case Intrinsic::riscv_sf_vc_x_se_e8m1:
+ case Intrinsic::riscv_sf_vc_x_se_e8m2:
+ case Intrinsic::riscv_sf_vc_x_se_e8m4:
+ case Intrinsic::riscv_sf_vc_x_se_e8m8:
+ case Intrinsic::riscv_sf_vc_x_se_e16mf4:
+ case Intrinsic::riscv_sf_vc_x_se_e16mf2:
+ case Intrinsic::riscv_sf_vc_x_se_e16m1:
+ case Intrinsic::riscv_sf_vc_x_se_e16m2:
+ case Intrinsic::riscv_sf_vc_x_se_e16m4:
+ case Intrinsic::riscv_sf_vc_x_se_e16m8:
+ case Intrinsic::riscv_sf_vc_x_se_e32mf2:
+ case Intrinsic::riscv_sf_vc_x_se_e32m1:
+ case Intrinsic::riscv_sf_vc_x_se_e32m2:
+ case Intrinsic::riscv_sf_vc_x_se_e32m4:
+ case Intrinsic::riscv_sf_vc_x_se_e32m8:
+ case Intrinsic::riscv_sf_vc_x_se_e64m1:
+ case Intrinsic::riscv_sf_vc_x_se_e64m2:
+ case Intrinsic::riscv_sf_vc_x_se_e64m4:
+ case Intrinsic::riscv_sf_vc_x_se_e64m8:
+ case Intrinsic::riscv_sf_vc_i_se_e8mf8:
+ case Intrinsic::riscv_sf_vc_i_se_e8mf4:
+ case Intrinsic::riscv_sf_vc_i_se_e8mf2:
+ case Intrinsic::riscv_sf_vc_i_se_e8m1:
+ case Intrinsic::riscv_sf_vc_i_se_e8m2:
+ case Intrinsic::riscv_sf_vc_i_se_e8m4:
+ case Intrinsic::riscv_sf_vc_i_se_e8m8:
+ case Intrinsic::riscv_sf_vc_i_se_e16mf4:
+ case Intrinsic::riscv_sf_vc_i_se_e16mf2:
+ case Intrinsic::riscv_sf_vc_i_se_e16m1:
+ case Intrinsic::riscv_sf_vc_i_se_e16m2:
+ case Intrinsic::riscv_sf_vc_i_se_e16m4:
+ case Intrinsic::riscv_sf_vc_i_se_e16m8:
+ case Intrinsic::riscv_sf_vc_i_se_e32mf2:
+ case Intrinsic::riscv_sf_vc_i_se_e32m1:
+ case Intrinsic::riscv_sf_vc_i_se_e32m2:
+ case Intrinsic::riscv_sf_vc_i_se_e32m4:
+ case Intrinsic::riscv_sf_vc_i_se_e32m8:
+ case Intrinsic::riscv_sf_vc_i_se_e64m1:
+ case Intrinsic::riscv_sf_vc_i_se_e64m2:
+ case Intrinsic::riscv_sf_vc_i_se_e64m4:
+ case Intrinsic::riscv_sf_vc_i_se_e64m8:
+ case Intrinsic::riscv_sf_vc_xv_se:
+ case Intrinsic::riscv_sf_vc_iv_se:
+ case Intrinsic::riscv_sf_vc_vv_se:
+ case Intrinsic::riscv_sf_vc_fv_se:
+ case Intrinsic::riscv_sf_vc_xvv_se:
+ case Intrinsic::riscv_sf_vc_ivv_se:
+ case Intrinsic::riscv_sf_vc_vvv_se:
+ case Intrinsic::riscv_sf_vc_fvv_se:
+ case Intrinsic::riscv_sf_vc_xvw_se:
+ case Intrinsic::riscv_sf_vc_ivw_se:
+ case Intrinsic::riscv_sf_vc_vvw_se:
+ case Intrinsic::riscv_sf_vc_fvw_se: {
+ SmallVector<SDValue> Ops;
+ getVCIXOperands(Op, DAG, Ops);
+
+ SDValue NewNode =
+ DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops);
+
+ if (Op == NewNode)
+ break;
+
+ return NewNode;
+ }
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -7541,23 +8894,40 @@ static unsigned getRVVReductionOp(unsigned ISDOpcode) {
switch (ISDOpcode) {
default:
llvm_unreachable("Unhandled reduction");
+ case ISD::VP_REDUCE_ADD:
case ISD::VECREDUCE_ADD:
return RISCVISD::VECREDUCE_ADD_VL;
+ case ISD::VP_REDUCE_UMAX:
case ISD::VECREDUCE_UMAX:
return RISCVISD::VECREDUCE_UMAX_VL;
+ case ISD::VP_REDUCE_SMAX:
case ISD::VECREDUCE_SMAX:
return RISCVISD::VECREDUCE_SMAX_VL;
+ case ISD::VP_REDUCE_UMIN:
case ISD::VECREDUCE_UMIN:
return RISCVISD::VECREDUCE_UMIN_VL;
+ case ISD::VP_REDUCE_SMIN:
case ISD::VECREDUCE_SMIN:
return RISCVISD::VECREDUCE_SMIN_VL;
+ case ISD::VP_REDUCE_AND:
case ISD::VECREDUCE_AND:
return RISCVISD::VECREDUCE_AND_VL;
+ case ISD::VP_REDUCE_OR:
case ISD::VECREDUCE_OR:
return RISCVISD::VECREDUCE_OR_VL;
+ case ISD::VP_REDUCE_XOR:
case ISD::VECREDUCE_XOR:
return RISCVISD::VECREDUCE_XOR_VL;
+ case ISD::VP_REDUCE_FADD:
+ return RISCVISD::VECREDUCE_FADD_VL;
+ case ISD::VP_REDUCE_SEQ_FADD:
+ return RISCVISD::VECREDUCE_SEQ_FADD_VL;
+ case ISD::VP_REDUCE_FMAX:
+ return RISCVISD::VECREDUCE_FMAX_VL;
+ case ISD::VP_REDUCE_FMIN:
+ return RISCVISD::VECREDUCE_FMIN_VL;
}
+
}
SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
@@ -7575,8 +8945,6 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
"Unexpected reduction lowering");
MVT XLenVT = Subtarget.getXLenVT();
- assert(Op.getValueType() == XLenVT &&
- "Expected reduction output to be legalized to XLenVT");
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
@@ -7630,6 +8998,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
}
SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
+ SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
if (!IsVP)
return SetCC;
@@ -7640,7 +9009,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
// 0 for an inactive vector, and so we've already received the neutral value:
// AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
// can simply include the start value.
- return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
+ return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
}
static bool isNonZeroAVL(SDValue AVL) {
@@ -7716,9 +9085,19 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- SDValue NeutralElem =
- DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
- return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), NeutralElem, Vec,
+ SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
+ switch (BaseOpc) {
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::UMAX:
+ case ISD::UMIN:
+ case ISD::SMAX:
+ case ISD::SMIN:
+ MVT XLenVT = Subtarget.getXLenVT();
+ StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
+ DAG.getConstant(0, DL, XLenVT));
+ }
+ return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
Mask, VL, DL, DAG, Subtarget);
}
@@ -7726,11 +9105,11 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
// the vector SDValue and the scalar SDValue required to lower this to a
// RISCVISD node.
static std::tuple<unsigned, SDValue, SDValue>
-getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
+getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
+ const RISCVSubtarget &Subtarget) {
SDLoc DL(Op);
auto Flags = Op->getFlags();
unsigned Opcode = Op.getOpcode();
- unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
switch (Opcode) {
default:
llvm_unreachable("Unhandled reduction");
@@ -7744,11 +9123,16 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
Op.getOperand(0));
case ISD::VECREDUCE_FMIN:
- return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
- DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
- case ISD::VECREDUCE_FMAX:
- return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
- DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
+ case ISD::VECREDUCE_FMAX: {
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue Front =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
+ DAG.getConstant(0, DL, XLenVT));
+ unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN)
+ ? RISCVISD::VECREDUCE_FMIN_VL
+ : RISCVISD::VECREDUCE_FMAX_VL;
+ return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
+ }
}
}
@@ -7760,7 +9144,7 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
unsigned RVVOpcode;
SDValue VectorVal, ScalarVal;
std::tie(RVVOpcode, VectorVal, ScalarVal) =
- getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
+ getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
MVT VecVT = VectorVal.getSimpleValueType();
MVT ContainerVT = VecVT;
@@ -7774,37 +9158,6 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
VectorVal, Mask, VL, DL, DAG, Subtarget);
}
-static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
- switch (ISDOpcode) {
- default:
- llvm_unreachable("Unhandled reduction");
- case ISD::VP_REDUCE_ADD:
- return RISCVISD::VECREDUCE_ADD_VL;
- case ISD::VP_REDUCE_UMAX:
- return RISCVISD::VECREDUCE_UMAX_VL;
- case ISD::VP_REDUCE_SMAX:
- return RISCVISD::VECREDUCE_SMAX_VL;
- case ISD::VP_REDUCE_UMIN:
- return RISCVISD::VECREDUCE_UMIN_VL;
- case ISD::VP_REDUCE_SMIN:
- return RISCVISD::VECREDUCE_SMIN_VL;
- case ISD::VP_REDUCE_AND:
- return RISCVISD::VECREDUCE_AND_VL;
- case ISD::VP_REDUCE_OR:
- return RISCVISD::VECREDUCE_OR_VL;
- case ISD::VP_REDUCE_XOR:
- return RISCVISD::VECREDUCE_XOR_VL;
- case ISD::VP_REDUCE_FADD:
- return RISCVISD::VECREDUCE_FADD_VL;
- case ISD::VP_REDUCE_SEQ_FADD:
- return RISCVISD::VECREDUCE_SEQ_FADD_VL;
- case ISD::VP_REDUCE_FMAX:
- return RISCVISD::VECREDUCE_FMAX_VL;
- case ISD::VP_REDUCE_FMIN:
- return RISCVISD::VECREDUCE_FMIN_VL;
- }
-}
-
SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -7817,7 +9170,7 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
return SDValue();
MVT VecVT = VecEVT.getSimpleVT();
- unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
+ unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
if (VecVT.isFixedLengthVector()) {
auto ContainerVT = getContainerForFixedLengthVector(VecVT);
@@ -7892,19 +9245,24 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
- DAG.getUNDEF(ContainerVT), SubVec,
- DAG.getConstant(0, DL, XLenVT));
+
if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SubVec,
+ DAG.getConstant(0, DL, XLenVT));
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
return DAG.getBitcast(Op.getValueType(), SubVec);
}
+
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SubVec,
+ DAG.getConstant(0, DL, XLenVT));
SDValue Mask =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. Note
// that for slideup this includes the offset.
unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
- SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget);
+ SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
// Use tail agnostic policy if we're inserting over Vec's tail.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
@@ -8051,26 +9409,38 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
}
}
+ // With an index of 0 this is a cast-like subvector, which can be performed
+ // with subregister operations.
+ if (OrigIdx == 0)
+ return Op;
+
// If the subvector vector is a fixed-length type, we cannot use subregister
// manipulation to simplify the codegen; we don't know which register of a
// LMUL group contains the specific subvector as we only know the minimum
// register size. Therefore we must slide the vector group down the full
// amount.
if (SubVecVT.isFixedLengthVector()) {
- // With an index of 0 this is a cast-like subvector, which can be performed
- // with subregister operations.
- if (OrigIdx == 0)
- return Op;
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
+
+ // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
+ unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
+ if (auto ShrunkVT =
+ getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
+ ContainerVT = *ShrunkVT;
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
SDValue Mask =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. This
// avoids sliding down elements we're going to discard straight away.
- SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget);
+ SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
+ Subtarget);
SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
SDValue Slidedown =
getVSlidedown(DAG, Subtarget, DL, ContainerVT,
@@ -8092,17 +9462,18 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
if (RemIdx == 0)
return Op;
- // Else we must shift our vector register directly to extract the subvector.
- // Do this using VSLIDEDOWN.
+ // Else SubVecVT is a fractional LMUL and may need to be slid down.
+ assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second);
// If the vector type is an LMUL-group type, extract a subvector equal to the
- // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
- // instruction.
+ // nearest full vector register type.
MVT InterSubVT = VecVT;
if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
+ // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
+ // we should have successfully decomposed the extract into a subregister.
+ assert(SubRegIdx != RISCV::NoSubRegister);
InterSubVT = getLMUL1VT(VecVT);
- Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
- DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
+ Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
}
// Slide this vector register down by the desired number of elements in order
@@ -8200,7 +9571,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
// We can deinterleave through vnsrl.wi if the element type is smaller than
// ELEN
- if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
+ if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
SDValue Even =
getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
SDValue Odd =
@@ -8269,7 +9640,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
// If the element type is smaller than ELEN, then we can interleave with
// vwaddu.vv and vwmaccu.vx
- if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
+ if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
DAG, Subtarget);
} else {
@@ -8476,7 +9847,20 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
+ // If we know the exact VLEN and our fixed length vector completely fills
+ // the container, use a whole register load instead.
+ const auto [MinVLMAX, MaxVLMAX] =
+ RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
+ if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
+ getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
+ SDValue NewLoad =
+ DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
+ Load->getMemOperand());
+ SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
+ return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
+ }
+
+ SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
SDValue IntID = DAG.getTargetConstant(
@@ -8520,11 +9904,22 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
-
SDValue NewValue =
convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
+
+ // If we know the exact VLEN and our fixed length vector completely fills
+ // the container, use a whole register store instead.
+ const auto [MinVLMAX, MaxVLMAX] =
+ RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
+ if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
+ getLMUL1VT(ContainerVT).bitsLE(ContainerVT))
+ return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
+ Store->getMemOperand());
+
+ SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
+ Subtarget);
+
bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
SDValue IntID = DAG.getTargetConstant(
IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
@@ -8902,9 +10297,10 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
// * The EVL operand is promoted from i32 to i64 on RV64.
// * Fixed-length vectors are converted to their scalable-vector container
// types.
-SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
- unsigned RISCVISDOpc,
- bool HasMergeOp) const {
+SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
+ unsigned RISCVISDOpc = getRISCVVLOp(Op);
+ bool HasMergeOp = hasMergeOp(RISCVISDOpc);
+
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SmallVector<SDValue, 4> Ops;
@@ -9053,13 +10449,14 @@ SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
}
// Lower Floating-Point/Integer Type-Convert VP SDNodes
-SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
- unsigned RISCVISDOpc) const {
+SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
SDValue Mask = Op.getOperand(1);
SDValue VL = Op.getOperand(2);
+ unsigned RISCVISDOpc = getRISCVVLOp(Op);
MVT DstVT = Op.getSimpleValueType();
MVT SrcVT = Src.getSimpleValueType();
@@ -9185,12 +10582,132 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Result, DAG, Subtarget);
}
-SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
- unsigned MaskOpc,
- unsigned VecOpc) const {
+SDValue
+RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ SDValue Op1 = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(1);
+ SDValue EVL = Op.getOperand(2);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ MVT GatherVT = ContainerVT;
+ MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
+ // Check if we are working with mask vectors
+ bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
+ if (IsMaskVector) {
+ GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
+
+ // Expand input operand
+ SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
+ DAG.getUNDEF(IndicesVT),
+ DAG.getConstant(1, DL, XLenVT), EVL);
+ SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
+ DAG.getUNDEF(IndicesVT),
+ DAG.getConstant(0, DL, XLenVT), EVL);
+ Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, IndicesVT, Op1, SplatOne,
+ SplatZero, EVL);
+ }
+
+ unsigned EltSize = GatherVT.getScalarSizeInBits();
+ unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
+ unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
+ unsigned MaxVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
+
+ unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
+ // If this is SEW=8 and VLMAX is unknown or more than 256, we need
+ // to use vrgatherei16.vv.
+ // TODO: It's also possible to use vrgatherei16.vv for other types to
+ // decrease register width for the index calculation.
+ // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
+ if (MaxVLMAX > 256 && EltSize == 8) {
+ // If this is LMUL=8, we have to split before using vrgatherei16.vv.
+ // Split the vector in half and reverse each half using a full register
+ // reverse.
+ // Swap the halves and concatenate them.
+ // Slide the concatenated result by (VLMax - VL).
+ if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
+ auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
+
+ SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
+ SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
+
+ // Reassemble the low and high pieces reversed.
+ // NOTE: this Result is unmasked (because we do not need masks for
+ // shuffles). If in the future this has to change, we can use a SELECT_VL
+ // between Result and UNDEF using the mask originally passed to VP_REVERSE
+ SDValue Result =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
+
+ // Slide off any elements from past EVL that were reversed into the low
+ // elements.
+ unsigned MinElts = GatherVT.getVectorMinNumElements();
+ SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
+ DAG.getConstant(MinElts, DL, XLenVT));
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
+
+ Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
+ DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
+
+ if (IsMaskVector) {
+ // Truncate Result back to a mask vector
+ Result =
+ DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
+ {Result, DAG.getConstant(0, DL, GatherVT),
+ DAG.getCondCode(ISD::SETNE),
+ DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
+ }
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+ }
+
+ // Just promote the int type to i16 which will double the LMUL.
+ IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
+ GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
+ }
+
+ SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
+ SDValue VecLen =
+ DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
+ SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
+ DAG.getUNDEF(IndicesVT), VecLen, EVL);
+ SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
+ DAG.getUNDEF(IndicesVT), Mask, EVL);
+ SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
+ DAG.getUNDEF(GatherVT), Mask, EVL);
+
+ if (IsMaskVector) {
+ // Truncate Result back to a mask vector
+ Result = DAG.getNode(
+ RISCVISD::SETCC_VL, DL, ContainerVT,
+ {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
+ DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
+ }
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
+SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
+ SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.getVectorElementType() != MVT::i1)
- return lowerVPOp(Op, DAG, VecOpc, true);
+ return lowerVPOp(Op, DAG);
// It is safe to drop mask parameter as masked-off elements are undef.
SDValue Op1 = Op->getOperand(0);
@@ -9206,7 +10723,7 @@ SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
}
SDLoc DL(Op);
- SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
+ SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
if (!IsFixed)
return Val;
return convertFromScalableVector(VT, Val, DAG, Subtarget);
@@ -9366,10 +10883,7 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
IndexVT = IndexVT.changeVectorElementType(XLenVT);
- SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
- VL);
- Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
- TrueMask, VL);
+ Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
}
unsigned IntID =
@@ -9468,10 +10982,7 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
IndexVT = IndexVT.changeVectorElementType(XLenVT);
- SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
- VL);
- Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
- TrueMask, VL);
+ Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
}
unsigned IntID =
@@ -9539,6 +11050,8 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
(RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
(RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
+ RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
+
SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
DAG.getConstant(2, DL, XLenVT));
SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
@@ -9653,8 +11166,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res.getValue(1));
return;
}
- // In absense of Zfh, promote f16 to f32, then convert.
- if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
+ // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
+ // convert.
+ if ((Op0.getValueType() == MVT::f16 &&
+ !Subtarget.hasStdExtZfhOrZhinx()) ||
+ Op0.getValueType() == MVT::bf16)
Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
@@ -10281,6 +11797,136 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
}
}
+/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
+/// which corresponds to it.
+static unsigned getVecReduceOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled binary to transfrom reduction");
+ case ISD::ADD:
+ return ISD::VECREDUCE_ADD;
+ case ISD::UMAX:
+ return ISD::VECREDUCE_UMAX;
+ case ISD::SMAX:
+ return ISD::VECREDUCE_SMAX;
+ case ISD::UMIN:
+ return ISD::VECREDUCE_UMIN;
+ case ISD::SMIN:
+ return ISD::VECREDUCE_SMIN;
+ case ISD::AND:
+ return ISD::VECREDUCE_AND;
+ case ISD::OR:
+ return ISD::VECREDUCE_OR;
+ case ISD::XOR:
+ return ISD::VECREDUCE_XOR;
+ case ISD::FADD:
+ // Note: This is the associative form of the generic reduction opcode.
+ return ISD::VECREDUCE_FADD;
+ }
+}
+
+/// Perform two related transforms whose purpose is to incrementally recognize
+/// an explode_vector followed by scalar reduction as a vector reduction node.
+/// This exists to recover from a deficiency in SLP which can't handle
+/// forests with multiple roots sharing common nodes. In some cases, one
+/// of the trees will be vectorized, and the other will remain (unprofitably)
+/// scalarized.
+static SDValue
+combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+
+ // This transforms need to run before all integer types have been legalized
+ // to i64 (so that the vector element type matches the add type), and while
+ // it's safe to introduce odd sized vector types.
+ if (DAG.NewNodesMustHaveLegalTypes)
+ return SDValue();
+
+ // Without V, this transform isn't useful. We could form the (illegal)
+ // operations and let them be scalarized again, but there's really no point.
+ if (!Subtarget.hasVInstructions())
+ return SDValue();
+
+ const SDLoc DL(N);
+ const EVT VT = N->getValueType(0);
+ const unsigned Opc = N->getOpcode();
+
+ // For FADD, we only handle the case with reassociation allowed. We
+ // could handle strict reduction order, but at the moment, there's no
+ // known reason to, and the complexity isn't worth it.
+ // TODO: Handle fminnum and fmaxnum here
+ if (!VT.isInteger() &&
+ (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
+ return SDValue();
+
+ const unsigned ReduceOpc = getVecReduceOpcode(Opc);
+ assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
+ "Inconsistent mappings");
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (!LHS.hasOneUse() || !RHS.hasOneUse())
+ return SDValue();
+
+ if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ std::swap(LHS, RHS);
+
+ if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(RHS.getOperand(1)))
+ return SDValue();
+
+ uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
+ SDValue SrcVec = RHS.getOperand(0);
+ EVT SrcVecVT = SrcVec.getValueType();
+ assert(SrcVecVT.getVectorElementType() == VT);
+ if (SrcVecVT.isScalableVector())
+ return SDValue();
+
+ if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
+ return SDValue();
+
+ // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
+ // reduce_op (extract_subvector [2 x VT] from V). This will form the
+ // root of our reduction tree. TODO: We could extend this to any two
+ // adjacent aligned constant indices if desired.
+ if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
+ uint64_t LHSIdx =
+ cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
+ if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
+ EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
+ SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
+ DAG.getVectorIdxConstant(0, DL));
+ return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
+ }
+ }
+
+ // Match (binop (reduce (extract_subvector V, 0),
+ // (extract_vector_elt V, sizeof(SubVec))))
+ // into a reduction of one more element from the original vector V.
+ if (LHS.getOpcode() != ReduceOpc)
+ return SDValue();
+
+ SDValue ReduceVec = LHS.getOperand(0);
+ if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
+ isNullConstant(ReduceVec.getOperand(1)) &&
+ ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
+ // For illegal types (e.g. 3xi32), most will be combined again into a
+ // wider (hopefully legal) type. If this is a terminal state, we are
+ // relying on type legalization here to produce something reasonable
+ // and this lowering quality could probably be improved. (TODO)
+ EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
+ SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
+ DAG.getVectorIdxConstant(0, DL));
+ auto Flags = ReduceVec->getFlags();
+ Flags.intersectWith(N->getFlags());
+ return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
+ }
+
+ return SDValue();
+}
+
+
// Try to fold (<bop> x, (reduction.<bop> vec, start))
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
@@ -10453,8 +12099,23 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
if (VT.isVector())
return SDValue();
- if (!Subtarget.hasShortForwardBranchOpt() ||
- (Slct.getOpcode() != ISD::SELECT &&
+ if (!Subtarget.hasShortForwardBranchOpt()) {
+ // (select cond, x, (and x, c)) has custom lowering with Zicond.
+ if ((!Subtarget.hasStdExtZicond() &&
+ !Subtarget.hasVendorXVentanaCondOps()) ||
+ N->getOpcode() != ISD::AND)
+ return SDValue();
+
+ // Maybe harmful when condition code has multiple use.
+ if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
+ return SDValue();
+
+ // Maybe harmful when VT is wider than XLen.
+ if (VT.getSizeInBits() > Subtarget.getXLen())
+ return SDValue();
+ }
+
+ if ((Slct.getOpcode() != ISD::SELECT &&
Slct.getOpcode() != RISCVISD::SELECT_CC) ||
!Slct.hasOneUse())
return SDValue();
@@ -10573,7 +12234,7 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
}
-// Try to turn (add (xor (setcc X, Y), 1) -1) into (neg (setcc X, Y)).
+// Try to turn (add (xor bool, 1) -1) into (neg bool).
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -10584,9 +12245,13 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
if (!isAllOnesConstant(N1))
return SDValue();
- // Look for an (xor (setcc X, Y), 1).
- if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)) ||
- N0.getOperand(0).getOpcode() != ISD::SETCC)
+ // Look for (xor X, 1).
+ if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
+ return SDValue();
+
+ // First xor input should be 0 or 1.
+ APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
+ if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
return SDValue();
// Emit a negate of the setcc.
@@ -10604,6 +12269,9 @@ static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
return V;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
+
// fold (add (select lhs, rhs, cc, 0, y), x) ->
// (select lhs, rhs, cc, x, (add x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
@@ -10732,7 +12400,7 @@ static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
// shift amounts larger than 31 would produce poison. If we wait until
// type legalization, we'll create RISCVISD::SRLW and we can't recover it
// to use a BEXT instruction.
- if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
+ if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
!isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
SDLoc DL(N0);
@@ -10759,7 +12427,7 @@ static SDValue performANDCombine(SDNode *N,
// shift amounts larger than 31 would produce poison. If we wait until
// type legalization, we'll create RISCVISD::SRLW and we can't recover it
// to use a BEXT instruction.
- if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
+ if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.hasOneUse()) {
@@ -10774,6 +12442,8 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
if (DCI.isAfterLegalizeDAG())
if (SDValue V = combineDeMorganOfBoolean(N, DAG))
@@ -10784,17 +12454,64 @@ static SDValue performANDCombine(SDNode *N,
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
}
+// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
+// FIXME: Generalize to other binary operators with same operand.
+static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
+ SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
+
+ if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
+ N1.getOpcode() != RISCVISD::CZERO_NEZ ||
+ !N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ // Should have the same condition.
+ SDValue Cond = N0.getOperand(1);
+ if (Cond != N1.getOperand(1))
+ return SDValue();
+
+ SDValue TrueV = N0.getOperand(0);
+ SDValue FalseV = N1.getOperand(0);
+
+ if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
+ TrueV.getOperand(1) != FalseV.getOperand(1) ||
+ !isOneConstant(TrueV.getOperand(1)) ||
+ !TrueV.hasOneUse() || !FalseV.hasOneUse())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
+ Cond);
+ SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
+ Cond);
+ SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
+ return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
+}
+
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
if (DCI.isAfterLegalizeDAG())
if (SDValue V = combineDeMorganOfBoolean(N, DAG))
return V;
+ // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
+ // We may be able to pull a common operation out of the true and false value.
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
+ return V;
+ if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
+ return V;
+
// fold (or (select cond, 0, y), x) ->
// (select cond, x, (or x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
@@ -10805,6 +12522,21 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
+ // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
+ // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
+ if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
+ N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
+ N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
+ !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
+ SDLoc DL(N);
+ SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
+ SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
+ }
+
// fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
// NOTE: Assumes ROL being legal means ROLW is legal.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -10817,7 +12549,7 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
}
// Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SETCC && isOneConstant(N1)) {
+ if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
if (ConstN00 && CC == ISD::SETLT) {
@@ -10832,32 +12564,102 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
+
// fold (xor (select cond, 0, y), x) ->
// (select cond, x, (xor x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
}
-// According to the property that indexed load/store instructions
-// zero-extended their indices, \p narrowIndex tries to narrow the type of index
-// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
-// bits(ty).
-static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) {
- if (N.getOpcode() != ISD::SHL || !N->hasOneUse())
+static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
return SDValue();
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue MulOper;
+ unsigned AddSubOpc;
+
+ // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
+ // (mul x, add (y, 1)) -> (add x, (mul x, y))
+ // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
+ // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
+ auto IsAddSubWith1 = [&](SDValue V) -> bool {
+ AddSubOpc = V->getOpcode();
+ if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
+ SDValue Opnd = V->getOperand(1);
+ MulOper = V->getOperand(0);
+ if (AddSubOpc == ISD::SUB)
+ std::swap(Opnd, MulOper);
+ if (isOneOrOneSplat(Opnd))
+ return true;
+ }
+ return false;
+ };
+
+ if (IsAddSubWith1(N0)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
+ }
+
+ if (IsAddSubWith1(N1)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
+ }
+
+ return SDValue();
+}
+
+/// According to the property that indexed load/store instructions zero-extend
+/// their indices, try to narrow the type of index operand.
+static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
+ if (isIndexTypeSigned(IndexType))
+ return false;
+
+ if (!N->hasOneUse())
+ return false;
+
+ EVT VT = N.getValueType();
+ SDLoc DL(N);
+
+ // In general, what we're doing here is seeing if we can sink a truncate to
+ // a smaller element type into the expression tree building our index.
+ // TODO: We can generalize this and handle a bunch more cases if useful.
+
+ // Narrow a buildvector to the narrowest element type. This requires less
+ // work and less register pressure at high LMUL, and creates smaller constants
+ // which may be cheaper to materialize.
+ if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
+ KnownBits Known = DAG.computeKnownBits(N);
+ unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
+ LLVMContext &C = *DAG.getContext();
+ EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
+ if (ResultVT.bitsLT(VT.getVectorElementType())) {
+ N = DAG.getNode(ISD::TRUNCATE, DL,
+ VT.changeVectorElementType(ResultVT), N);
+ return true;
+ }
+ }
+
+ // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
+ if (N.getOpcode() != ISD::SHL)
+ return false;
+
SDValue N0 = N.getOperand(0);
if (N0.getOpcode() != ISD::ZERO_EXTEND &&
N0.getOpcode() != RISCVISD::VZEXT_VL)
- return SDValue();
+ return false;;
if (!N0->hasOneUse())
- return SDValue();
+ return false;;
APInt ShAmt;
SDValue N1 = N.getOperand(1);
if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
- return SDValue();
+ return false;;
- SDLoc DL(N);
SDValue Src = N0.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned SrcElen = SrcVT.getScalarSizeInBits();
@@ -10867,14 +12669,15 @@ static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) {
// Skip if NewElen is not narrower than the original extended type.
if (NewElen >= N0.getValueType().getScalarSizeInBits())
- return SDValue();
+ return false;
EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
- return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
+ N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
+ return true;
}
// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
@@ -11949,10 +13752,18 @@ static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
VL);
}
-static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
return V;
+ if (N->getValueType(0).isScalableVector() &&
+ N->getValueType(0).getVectorElementType() == MVT::f32 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ return SDValue();
+ }
+
// FIXME: Ignore strict opcodes for now.
if (N->isTargetStrictFPOpcode())
return SDValue();
@@ -12003,7 +13814,15 @@ static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) {
N->getOperand(2), Mask, VL);
}
-static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (N->getValueType(0).isScalableVector() &&
+ N->getValueType(0).getVectorElementType() == MVT::f32 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ return SDValue();
+ }
+
// FIXME: Ignore strict opcodes for now.
assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
@@ -12036,7 +13855,15 @@ static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) {
Op1, Merge, Mask, VL);
}
-static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (N->getValueType(0).isScalableVector() &&
+ N->getValueType(0).getVectorElementType() == MVT::f32 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ return SDValue();
+ }
+
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Merge = N->getOperand(2);
@@ -12267,12 +14094,10 @@ static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
// shift can be omitted.
// Fold setlt (sra X, N), 0 -> setlt X, 0 and
// setge (sra X, N), 0 -> setge X, 0
- if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS.getNode())) {
- if ((CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
- LHS.getOpcode() == ISD::SRA && RHSConst->isZero()) {
- LHS = LHS.getOperand(0);
- return true;
- }
+ if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
+ LHS.getOpcode() == ISD::SRA) {
+ LHS = LHS.getOperand(0);
+ return true;
}
if (!ISD::isIntEqualitySetCC(CCVal))
@@ -12358,9 +14183,13 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
SDValue TrueVal, SDValue FalseVal,
bool Swapped) {
bool Commutative = true;
- switch (TrueVal.getOpcode()) {
+ unsigned Opc = TrueVal.getOpcode();
+ switch (Opc) {
default:
return SDValue();
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
case ISD::SUB:
Commutative = false;
break;
@@ -12383,12 +14212,18 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDLoc DL(N);
- SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
+ EVT OtherOpVT = OtherOp->getValueType(0);
+ SDValue IdentityOperand =
+ DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
+ if (!Commutative)
+ IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
+ assert(IdentityOperand && "No identity operand!");
if (Swapped)
- std::swap(OtherOp, Zero);
- SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero);
+ std::swap(OtherOp, IdentityOperand);
+ SDValue NewSel =
+ DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
}
@@ -12453,11 +14288,45 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
}
+static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue Cond = N->getOperand(0);
+ SDValue True = N->getOperand(1);
+ SDValue False = N->getOperand(2);
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT CondVT = Cond.getValueType();
+
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
+ return SDValue();
+
+ // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
+ // BEXTI, where C is power of 2.
+ if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
+ (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
+ uint64_t MaskVal = LHS.getConstantOperandVal(1);
+ if (isPowerOf2_64(MaskVal) && !isInt<12>(MaskVal))
+ return DAG.getSelect(DL, VT,
+ DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
+ False, True);
+ }
+ }
+ return SDValue();
+}
+
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
return Folded;
+ if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
+ return V;
+
if (Subtarget.hasShortForwardBranchOpt())
return SDValue();
@@ -12468,6 +14337,132 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
}
+/// If we have a build_vector where each lane is binop X, C, where C
+/// is a constant (but not necessarily the same constant on all lanes),
+/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
+/// We assume that materializing a constant build vector will be no more
+/// expensive that performing O(n) binops.
+static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
+ const RISCVTargetLowering &TLI) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ assert(!VT.isScalableVector() && "unexpected build vector");
+
+ if (VT.getVectorNumElements() == 1)
+ return SDValue();
+
+ const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
+ if (!TLI.isBinOp(Opcode))
+ return SDValue();
+
+ if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ SmallVector<SDValue> LHSOps;
+ SmallVector<SDValue> RHSOps;
+ for (SDValue Op : N->ops()) {
+ if (Op.isUndef()) {
+ // We can't form a divide or remainder from undef.
+ if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+ return SDValue();
+
+ LHSOps.push_back(Op);
+ RHSOps.push_back(Op);
+ continue;
+ }
+
+ // TODO: We can handle operations which have an neutral rhs value
+ // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
+ // of profit in a more explicit manner.
+ if (Op.getOpcode() != Opcode || !Op.hasOneUse())
+ return SDValue();
+
+ LHSOps.push_back(Op.getOperand(0));
+ if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
+ !isa<ConstantFPSDNode>(Op.getOperand(1)))
+ return SDValue();
+ // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
+ // have different LHS and RHS types.
+ if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
+ return SDValue();
+ RHSOps.push_back(Op.getOperand(1));
+ }
+
+ return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
+ DAG.getBuildVector(VT, DL, RHSOps));
+}
+
+static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
+ const RISCVTargetLowering &TLI) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+ SDLoc DL(N);
+
+ EVT VT = InVec.getValueType();
+ if (VT.isScalableVector())
+ return SDValue();
+
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
+ // move the insert_vector_elts into the arms of the binop. Note that
+ // the new RHS must be a constant.
+ const unsigned InVecOpcode = InVec->getOpcode();
+ if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
+ InVal.hasOneUse()) {
+ SDValue InVecLHS = InVec->getOperand(0);
+ SDValue InVecRHS = InVec->getOperand(1);
+ SDValue InValLHS = InVal->getOperand(0);
+ SDValue InValRHS = InVal->getOperand(1);
+
+ if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))
+ return SDValue();
+ if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
+ return SDValue();
+ // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
+ // have different LHS and RHS types.
+ if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
+ return SDValue();
+ SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+ InVecLHS, InValLHS, EltNo);
+ SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+ InVecRHS, InValRHS, EltNo);
+ return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
+ }
+
+ // Given insert_vector_elt (concat_vectors ...), InVal, Elt
+ // move the insert_vector_elt to the source operand of the concat_vector.
+ if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+
+ auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
+ if (!IndexC)
+ return SDValue();
+ unsigned Elt = IndexC->getZExtValue();
+
+ EVT ConcatVT = InVec.getOperand(0).getValueType();
+ if (ConcatVT.getVectorElementType() != InVal.getValueType())
+ return SDValue();
+ unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
+ SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
+ EltNo.getValueType());
+
+ unsigned ConcatOpIdx = Elt / ConcatNumElts;
+ SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
+ ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
+ ConcatOp, InVal, NewIdx);
+
+ SmallVector<SDValue> ConcatOps;
+ ConcatOps.append(InVec->op_begin(), InVec->op_end());
+ ConcatOps[ConcatOpIdx] = ConcatOp;
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+}
+
// If we're concatenating a series of vector loads like
// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
// Then we can turn this into a strided load by widening the vector elements
@@ -12492,13 +14487,11 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT BaseLdVT = BaseLd->getValueType(0);
- SDValue BasePtr = BaseLd->getBasePtr();
// Go through the loads and check that they're strided
- SDValue CurPtr = BasePtr;
- SDValue Stride;
+ SmallVector<LoadSDNode *> Lds;
+ Lds.push_back(BaseLd);
Align Align = BaseLd->getAlign();
-
for (SDValue Op : N->ops().drop_front()) {
auto *Ld = dyn_cast<LoadSDNode>(Op);
if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
@@ -12506,42 +14499,46 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
Ld->getValueType(0) != BaseLdVT)
return SDValue();
- SDValue Ptr = Ld->getBasePtr();
- // Check that each load's pointer is (add CurPtr, Stride)
- if (Ptr.getOpcode() != ISD::ADD || Ptr.getOperand(0) != CurPtr)
- return SDValue();
- SDValue Offset = Ptr.getOperand(1);
- if (!Stride)
- Stride = Offset;
- else if (Offset != Stride)
- return SDValue();
+ Lds.push_back(Ld);
// The common alignment is the most restrictive (smallest) of all the loads
Align = std::min(Align, Ld->getAlign());
-
- CurPtr = Ptr;
}
- // A special case is if the stride is exactly the width of one of the loads,
- // in which case it's contiguous and can be combined into a regular vle
- // without changing the element size
- if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
- ConstStride &&
- ConstStride->getZExtValue() == BaseLdVT.getFixedSizeInBits() / 8) {
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(),
- VT.getStoreSize(), Align);
- // Can't do the combine if the load isn't naturally aligned with the element
- // type
- if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(),
- DAG.getDataLayout(), VT, *MMO))
+ using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
+ auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
+ LoadSDNode *Ld2) -> std::optional<PtrDiff> {
+ // If the load ptrs can be decomposed into a common (Base + Index) with a
+ // common constant stride, then return the constant stride.
+ BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
+ BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
+ if (BIO1.equalBaseIndex(BIO2, DAG))
+ return {{BIO2.getOffset() - BIO1.getOffset(), false}};
+
+ // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
+ SDValue P1 = Ld1->getBasePtr();
+ SDValue P2 = Ld2->getBasePtr();
+ if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
+ return {{P2.getOperand(1), false}};
+ if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
+ return {{P1.getOperand(1), true}};
+
+ return std::nullopt;
+ };
+
+ // Get the distance between the first and second loads
+ auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
+ if (!BaseDiff)
+ return SDValue();
+
+ // Check all the loads are the same distance apart
+ for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
+ if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
return SDValue();
- SDValue WideLoad = DAG.getLoad(VT, DL, BaseLd->getChain(), BasePtr, MMO);
- for (SDValue Ld : N->ops())
- DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), WideLoad);
- return WideLoad;
- }
+ // TODO: At this point, we've successfully matched a generalized gather
+ // load. Maybe we should emit that, and then move the specialized
+ // matchers above and below into a DAG combine?
// Get the widened scalar type, e.g. v4i8 -> i64
unsigned WideScalarBitWidth =
@@ -12557,21 +14554,29 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
return SDValue();
- MVT ContainerVT = TLI.getContainerForFixedLengthVector(WideVecVT);
- SDValue VL =
- getDefaultVLOps(WideVecVT, ContainerVT, DL, DAG, Subtarget).second;
- SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
+ auto [StrideVariant, MustNegateStride] = *BaseDiff;
+ SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
+ ? std::get<SDValue>(StrideVariant)
+ : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
+ Lds[0]->getOffset().getValueType());
+ if (MustNegateStride)
+ Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
+
+ SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
SDValue IntID =
- DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, Subtarget.getXLenVT());
- SDValue Ops[] = {BaseLd->getChain(),
- IntID,
- DAG.getUNDEF(ContainerVT),
- BasePtr,
- Stride,
- VL};
+ DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
+ Subtarget.getXLenVT());
+
+ SDValue AllOneMask =
+ DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
+ DAG.getConstant(1, DL, MVT::i1));
+
+ SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
+ BaseLd->getBasePtr(), Stride, AllOneMask};
uint64_t MemSize;
- if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride))
+ if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
+ ConstStride && ConstStride->getSExtValue() >= 0)
// total size = (elsize * n) + (stride - elsize) * (n-1)
// = elsize + stride * (n-1)
MemSize = WideScalarVT.getSizeInBits() +
@@ -12589,11 +14594,7 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
for (SDValue Ld : N->ops())
DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
- // Note: Perform the bitcast before the convertFromScalableVector so we have
- // balanced pairs of convertFromScalable/convertToScalable
- SDValue Res = DAG.getBitcast(
- TLI.getContainerForFixedLengthVector(VT.getSimpleVT()), StridedLoad);
- return convertFromScalableVector(VT, Res, DAG, Subtarget);
+ return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
}
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
@@ -12653,9 +14654,121 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opc, DL, VT, Ops);
}
+static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
+ ISD::MemIndexType &IndexType,
+ RISCVTargetLowering::DAGCombinerInfo &DCI) {
+ if (!DCI.isBeforeLegalize())
+ return false;
+
+ SelectionDAG &DAG = DCI.DAG;
+ const MVT XLenVT =
+ DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
+
+ const EVT IndexVT = Index.getValueType();
+
+ // RISC-V indexed loads only support the "unsigned unscaled" addressing
+ // mode, so anything else must be manually legalized.
+ if (!isIndexTypeSigned(IndexType))
+ return false;
+
+ if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
+ // Any index legalization should first promote to XLenVT, so we don't lose
+ // bits when scaling. This may create an illegal index type so we let
+ // LLVM's legalization take care of the splitting.
+ // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
+ IndexVT.changeVectorElementType(XLenVT), Index);
+ }
+ IndexType = ISD::UNSIGNED_SCALED;
+ return true;
+}
+
+/// Match the index vector of a scatter or gather node as the shuffle mask
+/// which performs the rearrangement if possible. Will only match if
+/// all lanes are touched, and thus replacing the scatter or gather with
+/// a unit strided access and shuffle is legal.
+static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
+ SmallVector<int> &ShuffleMask) {
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return false;
+ if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
+ return false;
+
+ const unsigned ElementSize = VT.getScalarStoreSize();
+ const unsigned NumElems = VT.getVectorNumElements();
+
+ // Create the shuffle mask and check all bits active
+ assert(ShuffleMask.empty());
+ BitVector ActiveLanes(NumElems);
+ for (unsigned i = 0; i < Index->getNumOperands(); i++) {
+ // TODO: We've found an active bit of UB, and could be
+ // more aggressive here if desired.
+ if (Index->getOperand(i)->isUndef())
+ return false;
+ uint64_t C = Index->getConstantOperandVal(i);
+ if (C % ElementSize != 0)
+ return false;
+ C = C / ElementSize;
+ if (C >= NumElems)
+ return false;
+ ShuffleMask.push_back(C);
+ ActiveLanes.set(C);
+ }
+ return ActiveLanes.all();
+}
+
+/// Match the index of a gather or scatter operation as an operation
+/// with twice the element width and half the number of elements. This is
+/// generally profitable (if legal) because these operations are linear
+/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
+/// come out ahead.
+static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
+ Align BaseAlign, const RISCVSubtarget &ST) {
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return false;
+ if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
+ return false;
+
+ // Attempt a doubling. If we can use a element type 4x or 8x in
+ // size, this will happen via multiply iterations of the transform.
+ const unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems % 2 != 0)
+ return false;
+
+ const unsigned ElementSize = VT.getScalarStoreSize();
+ const unsigned WiderElementSize = ElementSize * 2;
+ if (WiderElementSize > ST.getELen()/8)
+ return false;
+
+ if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize)
+ return false;
+
+ for (unsigned i = 0; i < Index->getNumOperands(); i++) {
+ // TODO: We've found an active bit of UB, and could be
+ // more aggressive here if desired.
+ if (Index->getOperand(i)->isUndef())
+ return false;
+ // TODO: This offset check is too strict if we support fully
+ // misaligned memory operations.
+ uint64_t C = Index->getConstantOperandVal(i);
+ if (i % 2 == 0) {
+ if (C % WiderElementSize != 0)
+ return false;
+ continue;
+ }
+ uint64_t Last = Index->getConstantOperandVal(i-1);
+ if (C != Last + ElementSize)
+ return false;
+ }
+ return true;
+}
+
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
+ const MVT XLenVT = Subtarget.getXLenVT();
+ SDLoc DL(N);
// Helper to call SimplifyDemandedBits on an operand of N where only some low
// bits are demanded. N will be added to the Worklist if it was not deleted.
@@ -12687,8 +14800,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DCI.CombineTo(N, Lo, Hi);
}
- SDLoc DL(N);
-
// It's cheaper to materialise two 32-bit integers than to load a double
// from the constant pool and transfer it to integer registers through the
// stack.
@@ -12795,14 +14906,21 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return performORCombine(N, DCI, Subtarget);
case ISD::XOR:
return performXORCombine(N, DAG, Subtarget);
+ case ISD::MUL:
+ return performMULCombine(N, DAG);
case ISD::FADD:
case ISD::UMAX:
case ISD::UMIN:
case ISD::SMAX:
case ISD::SMIN:
case ISD::FMAXNUM:
- case ISD::FMINNUM:
- return combineBinOpToReduce(N, DAG, Subtarget);
+ case ISD::FMINNUM: {
+ if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
+ return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
+ return SDValue();
+ }
case ISD::SETCC:
return performSETCCCombine(N, DAG, Subtarget);
case ISD::SIGN_EXTEND_INREG:
@@ -12829,6 +14947,56 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
}
return SDValue();
+ case RISCVISD::TRUNCATE_VECTOR_VL: {
+ // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
+ // This would be benefit for the cases where X and Y are both the same value
+ // type of low precision vectors. Since the truncate would be lowered into
+ // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
+ // restriction, such pattern would be expanded into a series of "vsetvli"
+ // and "vnsrl" instructions later to reach this point.
+ auto IsTruncNode = [](SDValue V) {
+ if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
+ return false;
+ SDValue VL = V.getOperand(2);
+ auto *C = dyn_cast<ConstantSDNode>(VL);
+ // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
+ bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
+ (isa<RegisterSDNode>(VL) &&
+ cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
+ return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
+ IsVLMAXForVMSET;
+ };
+
+ SDValue Op = N->getOperand(0);
+
+ // We need to first find the inner level of TRUNCATE_VECTOR_VL node
+ // to distinguish such pattern.
+ while (IsTruncNode(Op)) {
+ if (!Op.hasOneUse())
+ return SDValue();
+ Op = Op.getOperand(0);
+ }
+
+ if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N10 = N1.getOperand(0);
+ if (N00.getValueType().isVector() &&
+ N00.getValueType() == N10.getValueType() &&
+ N->getValueType(0) == N10.getValueType()) {
+ unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
+ SDValue SMin = DAG.getNode(
+ ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
+ DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
+ return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
+ }
+ }
+ }
+ break;
+ }
case ISD::TRUNCATE:
return performTRUNCATECombine(N, DAG, Subtarget);
case ISD::SELECT:
@@ -12939,6 +15107,19 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
}
+ // If both true/false are an xor with 1, pull through the select.
+ // This can occur after op legalization if both operands are setccs that
+ // require an xor to invert.
+ // FIXME: Generalize to other binary ops with identical operand?
+ if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
+ TrueV.getOperand(1) == FalseV.getOperand(1) &&
+ isOneConstant(TrueV.getOperand(1)) &&
+ TrueV.hasOneUse() && FalseV.hasOneUse()) {
+ SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
+ TrueV.getOperand(0), FalseV.getOperand(0));
+ return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
+ }
+
return SDValue();
}
case RISCVISD::BR_CC: {
@@ -12985,75 +15166,187 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
}
- case ISD::MGATHER:
- case ISD::MSCATTER:
- case ISD::VP_GATHER:
- case ISD::VP_SCATTER: {
- if (!DCI.isBeforeLegalize())
- break;
- SDValue Index, ScaleOp;
- bool IsIndexSigned = false;
- if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
- Index = VPGSN->getIndex();
- ScaleOp = VPGSN->getScale();
- IsIndexSigned = VPGSN->isIndexSigned();
- assert(!VPGSN->isIndexScaled() &&
- "Scaled gather/scatter should not be formed");
- } else {
- const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
- Index = MGSN->getIndex();
- ScaleOp = MGSN->getScale();
- IsIndexSigned = MGSN->isIndexSigned();
- assert(!MGSN->isIndexScaled() &&
- "Scaled gather/scatter should not be formed");
+ case ISD::MGATHER: {
+ const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
+ const EVT VT = N->getValueType(0);
+ SDValue Index = MGN->getIndex();
+ SDValue ScaleOp = MGN->getScale();
+ ISD::MemIndexType IndexType = MGN->getIndexType();
+ assert(!MGN->isIndexScaled() &&
+ "Scaled gather/scatter should not be formed");
+ SDLoc DL(N);
+ if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
+ return DAG.getMaskedGather(
+ N->getVTList(), MGN->getMemoryVT(), DL,
+ {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
+ MGN->getBasePtr(), Index, ScaleOp},
+ MGN->getMemOperand(), IndexType, MGN->getExtensionType());
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getMaskedGather(
+ N->getVTList(), MGN->getMemoryVT(), DL,
+ {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
+ MGN->getBasePtr(), Index, ScaleOp},
+ MGN->getMemOperand(), IndexType, MGN->getExtensionType());
+
+ if (Index.getOpcode() == ISD::BUILD_VECTOR &&
+ MGN->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index);
+ SimpleVID && SimpleVID->StepDenominator == 1) {
+ const int64_t StepNumerator = SimpleVID->StepNumerator;
+ const int64_t Addend = SimpleVID->Addend;
+
+ // Note: We don't need to check alignment here since (by assumption
+ // from the existance of the gather), our offsets must be sufficiently
+ // aligned.
+
+ const EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
+ assert(IndexType == ISD::UNSIGNED_SCALED);
+ SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
+ DAG.getConstant(Addend, DL, PtrVT));
+
+ SDVTList VTs = DAG.getVTList({VT, MVT::Other});
+ SDValue IntID =
+ DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
+ XLenVT);
+ SDValue Ops[] =
+ {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
+ DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
+ Ops, VT, MGN->getMemOperand());
+ }
}
- EVT IndexVT = Index.getValueType();
- MVT XLenVT = Subtarget.getXLenVT();
- // RISC-V indexed loads only support the "unsigned unscaled" addressing
- // mode, so anything else must be manually legalized.
- bool NeedsIdxLegalization =
- (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
- if (!NeedsIdxLegalization)
- break;
- SDLoc DL(N);
+ SmallVector<int> ShuffleMask;
+ if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
+ matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
+ SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
+ MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
+ MGN->getMask(), DAG.getUNDEF(VT),
+ MGN->getMemoryVT(), MGN->getMemOperand(),
+ ISD::UNINDEXED, ISD::NON_EXTLOAD);
+ SDValue Shuffle =
+ DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
+ return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
+ }
- // Any index legalization should first promote to XLenVT, so we don't lose
- // bits when scaling. This may create an illegal index type so we let
- // LLVM's legalization take care of the splitting.
- // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
- if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
- IndexVT = IndexVT.changeVectorElementType(XLenVT);
- Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
- DL, IndexVT, Index);
+ if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
+ matchIndexAsWiderOp(VT, Index, MGN->getMask(),
+ MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
+ SmallVector<SDValue> NewIndices;
+ for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
+ NewIndices.push_back(Index.getOperand(i));
+ EVT IndexVT = Index.getValueType()
+ .getHalfNumVectorElementsVT(*DAG.getContext());
+ Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
+
+ unsigned ElementSize = VT.getScalarStoreSize();
+ EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
+ auto EltCnt = VT.getVectorElementCount();
+ assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
+ EltCnt.divideCoefficientBy(2));
+ SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ EltCnt.divideCoefficientBy(2));
+ SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
+
+ SDValue Gather =
+ DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
+ {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
+ Index, ScaleOp},
+ MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
+ SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
+ return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
+ }
+ break;
+ }
+ case ISD::MSCATTER:{
+ const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
+ SDValue Index = MSN->getIndex();
+ SDValue ScaleOp = MSN->getScale();
+ ISD::MemIndexType IndexType = MSN->getIndexType();
+ assert(!MSN->isIndexScaled() &&
+ "Scaled gather/scatter should not be formed");
+
+ SDLoc DL(N);
+ if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
+ return DAG.getMaskedScatter(
+ N->getVTList(), MSN->getMemoryVT(), DL,
+ {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
+ Index, ScaleOp},
+ MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getMaskedScatter(
+ N->getVTList(), MSN->getMemoryVT(), DL,
+ {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
+ Index, ScaleOp},
+ MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
+
+ EVT VT = MSN->getValue()->getValueType(0);
+ SmallVector<int> ShuffleMask;
+ if (!MSN->isTruncatingStore() &&
+ matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
+ SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
+ DAG.getUNDEF(VT), ShuffleMask);
+ return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
+ DAG.getUNDEF(XLenVT), MSN->getMask(),
+ MSN->getMemoryVT(), MSN->getMemOperand(),
+ ISD::UNINDEXED, false);
}
+ break;
+ }
+ case ISD::VP_GATHER: {
+ const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
+ SDValue Index = VPGN->getIndex();
+ SDValue ScaleOp = VPGN->getScale();
+ ISD::MemIndexType IndexType = VPGN->getIndexType();
+ assert(!VPGN->isIndexScaled() &&
+ "Scaled gather/scatter should not be formed");
+
+ SDLoc DL(N);
+ if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
+ return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
+ {VPGN->getChain(), VPGN->getBasePtr(), Index,
+ ScaleOp, VPGN->getMask(),
+ VPGN->getVectorLength()},
+ VPGN->getMemOperand(), IndexType);
- ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED;
- if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
+ if (narrowIndex(Index, IndexType, DAG))
return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
{VPGN->getChain(), VPGN->getBasePtr(), Index,
ScaleOp, VPGN->getMask(),
VPGN->getVectorLength()},
- VPGN->getMemOperand(), NewIndexTy);
- if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
+ VPGN->getMemOperand(), IndexType);
+
+ break;
+ }
+ case ISD::VP_SCATTER: {
+ const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
+ SDValue Index = VPSN->getIndex();
+ SDValue ScaleOp = VPSN->getScale();
+ ISD::MemIndexType IndexType = VPSN->getIndexType();
+ assert(!VPSN->isIndexScaled() &&
+ "Scaled gather/scatter should not be formed");
+
+ SDLoc DL(N);
+ if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
{VPSN->getChain(), VPSN->getValue(),
VPSN->getBasePtr(), Index, ScaleOp,
VPSN->getMask(), VPSN->getVectorLength()},
- VPSN->getMemOperand(), NewIndexTy);
- if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
- return DAG.getMaskedGather(
- N->getVTList(), MGN->getMemoryVT(), DL,
- {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
- MGN->getBasePtr(), Index, ScaleOp},
- MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
- const auto *MSN = cast<MaskedScatterSDNode>(N);
- return DAG.getMaskedScatter(
- N->getVTList(), MSN->getMemoryVT(), DL,
- {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
- Index, ScaleOp},
- MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
+ VPSN->getMemOperand(), IndexType);
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
+ {VPSN->getChain(), VPSN->getValue(),
+ VPSN->getBasePtr(), Index, ScaleOp,
+ VPSN->getMask(), VPSN->getVectorLength()},
+ VPSN->getMemOperand(), IndexType);
+ break;
}
case RISCVISD::SRA_VL:
case RISCVISD::SRL_VL:
@@ -13062,7 +15355,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
// We don't need the upper 32 bits of a 64-bit element for a shift amount.
SDLoc DL(N);
- SDValue VL = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
EVT VT = N->getValueType(0);
ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
ShAmt.getOperand(1), VL);
@@ -13108,12 +15401,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::STRICT_VFNMADD_VL:
case RISCVISD::STRICT_VFMSUB_VL:
case RISCVISD::STRICT_VFNMSUB_VL:
- return performVFMADD_VLCombine(N, DAG);
+ return performVFMADD_VLCombine(N, DAG, Subtarget);
case RISCVISD::FMUL_VL:
- return performVFMUL_VLCombine(N, DAG);
+ return performVFMUL_VLCombine(N, DAG, Subtarget);
case RISCVISD::FADD_VL:
case RISCVISD::FSUB_VL:
- return performFADDSUB_VLCombine(N, DAG);
+ return performFADDSUB_VLCombine(N, DAG, Subtarget);
case ISD::LOAD:
case ISD::STORE: {
if (DCI.isAfterLegalizeDAG())
@@ -13149,16 +15442,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
// Get the constant vector bits
APInt NewC(Val.getValueSizeInBits(), 0);
+ uint64_t EltSize = Val.getScalarValueSizeInBits();
for (unsigned i = 0; i < Val.getNumOperands(); i++) {
if (Val.getOperand(i).isUndef())
continue;
- NewC.insertBits(Val.getConstantOperandAPInt(i),
- i * Val.getScalarValueSizeInBits());
+ NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
+ i * EltSize);
}
MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
- if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
- Subtarget.getFeatureBits(), true) <= 2 &&
+ if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
+ true) <= 2 &&
allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
NewVT, *Store->getMemOperand())) {
SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
@@ -13201,7 +15495,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Src = Val.getOperand(0);
MVT VecVT = Src.getSimpleValueType();
// VecVT should be scalable and memory VT should match the element type.
- if (VecVT.isScalableVector() &&
+ if (!Store->isIndexed() && VecVT.isScalableVector() &&
MemVT == VecVT.getVectorElementType()) {
SDLoc DL(N);
MVT MaskVT = getMaskTypeFor(VecVT);
@@ -13226,19 +15520,51 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return Gather;
break;
}
+ case ISD::BUILD_VECTOR:
+ if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
+ return V;
+ break;
case ISD::CONCAT_VECTORS:
if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
return V;
break;
+ case ISD::INSERT_VECTOR_ELT:
+ if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
+ return V;
+ break;
+ case RISCVISD::VFMV_V_F_VL: {
+ const MVT VT = N->getSimpleValueType(0);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Scalar = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
+ // If VL is 1, we can use vfmv.s.f.
+ if (isOneConstant(VL))
+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
+ break;
+ }
case RISCVISD::VMV_V_X_VL: {
+ const MVT VT = N->getSimpleValueType(0);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Scalar = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
// Tail agnostic VMV.V.X only demands the vector element bitwidth from the
// scalar input.
- unsigned ScalarSize = N->getOperand(1).getValueSizeInBits();
- unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
- if (ScalarSize > EltWidth && N->getOperand(0).isUndef())
+ unsigned ScalarSize = Scalar.getValueSizeInBits();
+ unsigned EltWidth = VT.getScalarSizeInBits();
+ if (ScalarSize > EltWidth && Passthru.isUndef())
if (SimplifyDemandedLowBitsHelper(1, EltWidth))
return SDValue(N, 0);
+ // If VL is 1 and the scalar value won't benefit from immediate, we can
+ // use vmv.s.x.
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
+ if (isOneConstant(VL) &&
+ (!Const || Const->isZero() ||
+ !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
+
break;
}
case RISCVISD::VFMV_S_F_VL: {
@@ -13258,6 +15584,35 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return Src.getOperand(0);
// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
}
+ [[fallthrough]];
+ }
+ case RISCVISD::VMV_S_X_VL: {
+ const MVT VT = N->getSimpleValueType(0);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Scalar = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
+ // Use M1 or smaller to avoid over constraining register allocation
+ const MVT M1VT = getLMUL1VT(VT);
+ if (M1VT.bitsLT(VT)) {
+ SDValue M1Passthru =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
+ DAG.getVectorIdxConstant(0, DL));
+ SDValue Result =
+ DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
+ Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
+ DAG.getConstant(0, DL, XLenVT));
+ return Result;
+ }
+
+ // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
+ // higher would involve overly constraining the register allocator for
+ // no purpose.
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
+ Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
+ VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
+
break;
}
case ISD::INTRINSIC_VOID:
@@ -13269,6 +15624,43 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// By default we do not combine any intrinsic.
default:
return SDValue();
+ case Intrinsic::riscv_masked_strided_load: {
+ MVT VT = N->getSimpleValueType(0);
+ auto *Load = cast<MemIntrinsicSDNode>(N);
+ SDValue PassThru = N->getOperand(2);
+ SDValue Base = N->getOperand(3);
+ SDValue Stride = N->getOperand(4);
+ SDValue Mask = N->getOperand(5);
+
+ // If the stride is equal to the element size in bytes, we can use
+ // a masked.load.
+ const unsigned ElementSize = VT.getScalarStoreSize();
+ if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
+ StrideC && StrideC->getZExtValue() == ElementSize)
+ return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
+ DAG.getUNDEF(XLenVT), Mask, PassThru,
+ Load->getMemoryVT(), Load->getMemOperand(),
+ ISD::UNINDEXED, ISD::NON_EXTLOAD);
+ return SDValue();
+ }
+ case Intrinsic::riscv_masked_strided_store: {
+ auto *Store = cast<MemIntrinsicSDNode>(N);
+ SDValue Value = N->getOperand(2);
+ SDValue Base = N->getOperand(3);
+ SDValue Stride = N->getOperand(4);
+ SDValue Mask = N->getOperand(5);
+
+ // If the stride is equal to the element size in bytes, we can use
+ // a masked.store.
+ const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
+ if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
+ StrideC && StrideC->getZExtValue() == ElementSize)
+ return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
+ DAG.getUNDEF(XLenVT), Mask,
+ Store->getMemoryVT(), Store->getMemOperand(),
+ ISD::UNINDEXED, false);
+ return SDValue();
+ }
case Intrinsic::riscv_vcpop:
case Intrinsic::riscv_vcpop_mask:
case Intrinsic::riscv_vfirst:
@@ -13287,23 +15679,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getConstant(-1, DL, VT);
return DAG.getConstant(0, DL, VT);
}
- case Intrinsic::riscv_vloxei:
- case Intrinsic::riscv_vloxei_mask:
- case Intrinsic::riscv_vluxei:
- case Intrinsic::riscv_vluxei_mask:
- case Intrinsic::riscv_vsoxei:
- case Intrinsic::riscv_vsoxei_mask:
- case Intrinsic::riscv_vsuxei:
- case Intrinsic::riscv_vsuxei_mask:
- if (SDValue V = narrowIndex(N->getOperand(4), DAG)) {
- SmallVector<SDValue, 8> Ops(N->ops());
- Ops[4] = V;
- const auto *MemSD = cast<MemIntrinsicSDNode>(N);
- return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(),
- Ops, MemSD->getMemoryVT(),
- MemSD->getMemOperand());
- }
- return SDValue();
}
}
case ISD::BITCAST: {
@@ -13386,12 +15761,12 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
// Neither constant will fit into an immediate, so find materialisation
// costs.
- int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
- Subtarget.getFeatureBits(),
- /*CompressionCost*/true);
+ int C1Cost =
+ RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
+ /*CompressionCost*/ true);
int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
- ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
- /*CompressionCost*/true);
+ ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
+ /*CompressionCost*/ true);
// Materialising `c1` is cheaper than materialising `c1 << c2`, so the
// combine should be prevented.
@@ -13562,6 +15937,15 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known = Known.sext(BitWidth);
break;
}
+ case RISCVISD::SLLW: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
+ // Restore the original width by sign extending.
+ Known = Known.sext(BitWidth);
+ break;
+ }
case RISCVISD::CTZW: {
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
@@ -13600,7 +15984,7 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.One.setBit(Log2_32(MinVLenB));
break;
}
- case RISCVISD::FPCLASS: {
+ case RISCVISD::FCLASS: {
// fclass will only set one of the low 10 bits.
Known.Zero.setBitsFrom(10);
break;
@@ -13615,7 +15999,7 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax:
- // Assume that VL output is >= 65536.
+ // Assume that VL output is <= 65536.
// TODO: Take SEW and LMUL into account.
if (BitWidth > 17)
Known.Zero.setBitsFrom(17);
@@ -13705,6 +16089,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
assert(Subtarget.hasStdExtA());
return 33;
}
+ break;
}
}
@@ -14187,47 +16572,6 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
return TailMBB;
}
-static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB,
- unsigned Opcode) {
- DebugLoc DL = MI.getDebugLoc();
-
- const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
-
- assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7);
- unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3;
-
- // Update FRM and save the old value.
- BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
- .addImm(MI.getOperand(FRMIdx).getImm());
-
- // Emit an VFCVT with the FRM == DYN
- auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode));
-
- for (unsigned I = 0; I < MI.getNumOperands(); I++)
- if (I != FRMIdx)
- MIB = MIB.add(MI.getOperand(I));
- else
- MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN
-
- MIB.add(MachineOperand::CreateReg(RISCV::FRM,
- /*IsDef*/ false,
- /*IsImp*/ true));
-
- if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
- MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
-
- // Restore FRM.
- BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
- .addReg(SavedFRM, RegState::Kill);
-
- // Erase the pseudoinstruction.
- MI.eraseFromParent();
- return BB;
-}
-
static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
MachineBasicBlock *BB,
unsigned CVTXOpc,
@@ -14472,43 +16816,6 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
Subtarget);
-#define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \
- case RISCV::RMOpc##_##LMUL: \
- return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \
- case RISCV::RMOpc##_##LMUL##_MASK: \
- return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
-
-#define PseudoVFCVT_RM_CASE(RMOpc, Opc) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
-
-#define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \
- PseudoVFCVT_RM_CASE(RMOpc, Opc) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
-
-#define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \
- PseudoVFCVT_RM_CASE(RMOpc, Opc) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
-
- // VFCVT
- PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V)
- PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V)
- PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V)
- PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V)
-
- // VFWCVT
- PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V);
- PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V);
-
- // VFNCVT
- PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W);
- PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W);
- PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W);
- PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W);
-
case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
RISCV::PseudoVFCVT_F_X_V_M1_MASK);
@@ -14535,41 +16842,26 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case RISCV::PseudoFROUND_D_INX:
case RISCV::PseudoFROUND_D_IN32X:
return emitFROUND(MI, BB, Subtarget);
+ case TargetOpcode::STATEPOINT:
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::PATCHPOINT:
+ if (!Subtarget.is64Bit())
+ report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
+ "supported on 64-bit targets");
+ return emitPatchPoint(MI, BB);
}
}
-// Returns the index to the rounding mode immediate value if any, otherwise the
-// function will return None.
-static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) {
- uint64_t TSFlags = MI.getDesc().TSFlags;
- if (!RISCVII::hasRoundModeOp(TSFlags))
- return std::nullopt;
-
- // The operand order
- // -------------------------------------
- // | n-1 (if any) | n-2 | n-3 | n-4 |
- // | policy | sew | vl | rm |
- // -------------------------------------
- return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3;
-}
-
void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
- // Add FRM dependency to vector floating-point instructions with dynamic
- // rounding mode.
- if (auto RoundModeIdx = getRoundModeIdx(MI)) {
- unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm();
- if (FRMImm == RISCVFPRndMode::DYN && !MI.readsRegister(RISCV::FRM)) {
- MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false,
- /*isImp*/ true));
- }
- }
-
// Add FRM dependency to any instructions with dynamic rounding mode.
- unsigned Opc = MI.getOpcode();
- auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
- if (Idx < 0)
- return;
+ int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
+ if (Idx < 0) {
+ // Vector pseudos have FRM index indicated by TSFlags.
+ Idx = RISCVII::getFRMOpNum(MI.getDesc());
+ if (Idx < 0)
+ return;
+ }
if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
return;
// If the instruction already reads FRM, don't add another read.
@@ -14604,10 +16896,6 @@ void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// register-size fields in the same situations they would be for fixed
// arguments.
-static const MCPhysReg ArgGPRs[] = {
- RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
- RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
-};
static const MCPhysReg ArgFPR16s[] = {
RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
@@ -14632,6 +16920,14 @@ static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
RISCV::V20M4};
static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
+ArrayRef<MCPhysReg> RISCV::getArgGPRs() {
+ static const MCPhysReg ArgGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
+ RISCV::X13, RISCV::X14, RISCV::X15,
+ RISCV::X16, RISCV::X17};
+
+ return ArrayRef(ArgGPRs);
+}
+
// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
@@ -14639,6 +16935,7 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
MVT ValVT2, MVT LocVT2,
ISD::ArgFlagsTy ArgFlags2) {
unsigned XLenInBytes = XLen / 8;
+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
if (Register Reg = State.AllocateReg(ArgGPRs)) {
// At least one half can be passed via register.
State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
@@ -14759,6 +17056,8 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
LocInfo = CCValAssign::BCvt;
}
+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
+
// If this is a variadic argument, the RISC-V calling convention requires
// that it is assigned an 'even' or 'aligned' register if it has 8-byte
// alignment (RV32) or 16-byte alignment (RV64). An aligned register should
@@ -14785,23 +17084,29 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
// Handle passing f64 on RV32D with a soft float ABI or when floating point
// registers are exhausted.
if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
- assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
- "Can't lower f64 if it is split");
+ assert(PendingLocs.empty() && "Can't lower f64 if it is split");
// Depending on available argument GPRS, f64 may be passed in a pair of
// GPRs, split between a GPR and the stack, or passed completely on the
// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
// cases.
Register Reg = State.AllocateReg(ArgGPRs);
- LocVT = MVT::i32;
if (!Reg) {
unsigned StackOffset = State.AllocateStack(8, Align(8));
State.addLoc(
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
- if (!State.AllocateReg(ArgGPRs))
- State.AllocateStack(4, Align(4));
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ LocVT = MVT::i32;
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ Register HiReg = State.AllocateReg(ArgGPRs);
+ if (HiReg) {
+ State.addLoc(
+ CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
+ } else {
+ unsigned StackOffset = State.AllocateStack(4, Align(4));
+ State.addLoc(
+ CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+ }
return false;
}
@@ -15002,12 +17307,18 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
break;
case CCValAssign::BCvt:
if (VA.getLocVT().isInteger() &&
- (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
+ (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
- else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
- Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
- else
+ } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+ if (RV64LegalI32) {
+ Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
+ } else {
+ Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
+ }
+ } else {
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+ }
break;
}
return Val;
@@ -15061,13 +17372,19 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
break;
case CCValAssign::BCvt:
- if (VA.getLocVT().isInteger() &&
- (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
- Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
- else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
- Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
- else
+ if (LocVT.isInteger() &&
+ (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
+ Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
+ } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
+ if (RV64LegalI32) {
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
+ } else {
+ Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
+ }
+ } else {
Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
+ }
break;
}
return Val;
@@ -15110,38 +17427,32 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
}
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
- const CCValAssign &VA, const SDLoc &DL) {
+ const CCValAssign &VA,
+ const CCValAssign &HiVA,
+ const SDLoc &DL) {
assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
"Unexpected VA");
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- if (VA.isMemLoc()) {
- // f64 is passed on the stack.
- int FI =
- MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
- SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- return DAG.getLoad(MVT::f64, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(MF, FI));
- }
-
assert(VA.isRegLoc() && "Expected register VA assignment");
Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
SDValue Hi;
- if (VA.getLocReg() == RISCV::X17) {
+ if (HiVA.isMemLoc()) {
// Second half of f64 is passed on the stack.
- int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
+ int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
+ /*IsImmutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
} else {
// Second half of f64 is passed in another GPR.
Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
- RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
+ RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
}
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
@@ -15346,6 +17657,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
report_fatal_error("Unsupported calling convention");
case CallingConv::C:
case CallingConv::Fast:
+ case CallingConv::SPIR_KERNEL:
+ case CallingConv::GRAAL:
break;
case CallingConv::GHC:
if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
@@ -15384,15 +17697,16 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
: RISCV::CC_RISCV);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
CCValAssign &VA = ArgLocs[i];
SDValue ArgValue;
// Passing f64 on RV32D with a soft float ABI must be handled as a special
// case.
- if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
- ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
- else if (VA.isRegLoc())
- ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
+ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+ assert(VA.needsCustom());
+ ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
+ } else if (VA.isRegLoc())
+ ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
else
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
@@ -15404,12 +17718,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
// stores are relative to that.
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
MachinePointerInfo()));
- unsigned ArgIndex = Ins[i].OrigArgIndex;
- unsigned ArgPartOffset = Ins[i].PartOffset;
+ unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
+ unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
- while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
+ while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
CCValAssign &PartVA = ArgLocs[i + 1];
- unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
+ unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
if (PartVA.getValVT().isScalableVector())
Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
@@ -15417,6 +17731,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
MachinePointerInfo()));
++i;
+ ++InsIdx;
}
continue;
}
@@ -15428,57 +17743,56 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
if (IsVarArg) {
- ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
+ ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs();
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
- // Offset of the first variable argument from stack pointer, and size of
- // the vararg save area. For now, the varargs save area is either zero or
- // large enough to hold a0-a7.
- int VaArgOffset, VarArgsSaveSize;
+ // Size of the vararg save area. For now, the varargs save area is either
+ // zero or large enough to hold a0-a7.
+ int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
+ int FI;
// If all registers are allocated, then all varargs must be passed on the
// stack and we don't need to save any argregs.
- if (ArgRegs.size() == Idx) {
- VaArgOffset = CCInfo.getStackSize();
- VarArgsSaveSize = 0;
+ if (VarArgsSaveSize == 0) {
+ int VaArgOffset = CCInfo.getStackSize();
+ FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
} else {
- VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
- VaArgOffset = -VarArgsSaveSize;
+ int VaArgOffset = -VarArgsSaveSize;
+ FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
+
+ // If saving an odd number of registers then create an extra stack slot to
+ // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
+ // offsets to even-numbered registered remain 2*XLEN-aligned.
+ if (Idx % 2) {
+ MFI.CreateFixedObject(
+ XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
+ VarArgsSaveSize += XLenInBytes;
+ }
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+
+ // Copy the integer registers that may have been used for passing varargs
+ // to the vararg save area.
+ for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
+ const Register Reg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(ArgRegs[I], Reg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
+ SDValue Store = DAG.getStore(
+ Chain, DL, ArgValue, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
+ OutChains.push_back(Store);
+ FIN =
+ DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
+ }
}
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
- int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
RVFI->setVarArgsFrameIndex(FI);
-
- // If saving an odd number of registers then create an extra stack slot to
- // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
- // offsets to even-numbered registered remain 2*XLEN-aligned.
- if (Idx % 2) {
- MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
- VarArgsSaveSize += XLenInBytes;
- }
-
- // Copy the integer registers that may have been used for passing varargs
- // to the vararg save area.
- for (unsigned I = Idx; I < ArgRegs.size();
- ++I, VaArgOffset += XLenInBytes) {
- const Register Reg = RegInfo.createVirtualRegister(RC);
- RegInfo.addLiveIn(ArgRegs[I], Reg);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
- FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
- SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
- MachinePointerInfo::getFixedStack(MF, FI));
- cast<StoreSDNode>(Store.getNode())
- ->getMemOperand()
- ->setValue((Value *)nullptr);
- OutChains.push_back(Store);
- }
RVFI->setVarArgsSaveSize(VarArgsSaveSize);
}
@@ -15632,15 +17946,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
- for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
+ ++i, ++OutIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue ArgValue = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ SDValue ArgValue = OutVals[OutIdx];
+ ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
// Handle passing f64 on RV32D with a soft float ABI as a special case.
- bool IsF64OnRV32DSoftABI =
- VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
- if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
+ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+ assert(VA.isRegLoc() && "Expected register VA assignment");
+ assert(VA.needsCustom());
SDValue SplitF64 = DAG.getNode(
RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
SDValue Lo = SplitF64.getValue(0);
@@ -15649,32 +17964,33 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
Register RegLo = VA.getLocReg();
RegsToPass.push_back(std::make_pair(RegLo, Lo));
- if (RegLo == RISCV::X17) {
+ // Get the CCValAssign for the Hi part.
+ CCValAssign &HiVA = ArgLocs[++i];
+
+ if (HiVA.isMemLoc()) {
// Second half of f64 is passed on the stack.
- // Work out the address of the stack slot.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
+ SDValue Address =
+ DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+ DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
// Emit the store.
MemOpChains.push_back(
- DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
+ DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
} else {
// Second half of f64 is passed in another GPR.
- assert(RegLo < RISCV::X31 && "Invalid register pair");
- Register RegHigh = RegLo + 1;
+ Register RegHigh = HiVA.getLocReg();
RegsToPass.push_back(std::make_pair(RegHigh, Hi));
}
continue;
}
- // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
- // as any other MemLoc.
-
// Promote the value if needed.
// For now, only handle fully promoted and indirect arguments.
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
Align StackAlign =
- std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
+ std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
getPrefTypeAlign(ArgValue.getValueType(), DAG));
TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
// If the original argument was split (e.g. i128), we need
@@ -15682,16 +17998,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Vectors may be partly split to registers and partly to the stack, in
// which case the base address is partly offset and subsequent stores are
// relative to that.
- unsigned ArgIndex = Outs[i].OrigArgIndex;
- unsigned ArgPartOffset = Outs[i].PartOffset;
+ unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
+ unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
// Calculate the total size to store. We don't have access to what we're
// actually storing other than performing the loop and collecting the
// info.
SmallVector<std::pair<SDValue, SDValue>> Parts;
- while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
- SDValue PartValue = OutVals[i + 1];
- unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
+ while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
+ SDValue PartValue = OutVals[OutIdx + 1];
+ unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
EVT PartVT = PartValue.getValueType();
if (PartVT.isScalableVector())
@@ -15700,6 +18016,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
Parts.push_back(std::make_pair(PartValue, Offset));
++i;
+ ++OutIdx;
}
SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
@@ -15841,7 +18158,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
// Copy all of the result registers out of their specified physreg.
- for (auto &VA : RVLocs) {
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ auto &VA = RVLocs[i];
// Copy the value out
SDValue RetValue =
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
@@ -15850,9 +18168,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
Glue = RetValue.getValue(2);
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
- assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
- SDValue RetValue2 =
- DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
+ assert(VA.needsCustom());
+ SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
+ MVT::i32, Glue);
Chain = RetValue2.getValue(1);
Glue = RetValue2.getValue(2);
RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
@@ -15915,21 +18233,21 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
- for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
- SDValue Val = OutVals[i];
+ for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
+ SDValue Val = OutVals[OutIdx];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
// Handle returning f64 on RV32D with a soft float ABI.
assert(VA.isRegLoc() && "Expected return via registers");
+ assert(VA.needsCustom());
SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
DAG.getVTList(MVT::i32, MVT::i32), Val);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
Register RegLo = VA.getLocReg();
- assert(RegLo < RISCV::X31 && "Invalid register pair");
- Register RegHi = RegLo + 1;
+ Register RegHi = RVLocs[++i].getLocReg();
if (STI.isRegisterReservedByUser(RegLo) ||
STI.isRegisterReservedByUser(RegHi))
@@ -16067,10 +18385,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ADD_LO)
NODE_NAME_CASE(HI)
NODE_NAME_CASE(LLA)
- NODE_NAME_CASE(LGA)
NODE_NAME_CASE(ADD_TPREL)
- NODE_NAME_CASE(LA_TLS_IE)
- NODE_NAME_CASE(LA_TLS_GD)
NODE_NAME_CASE(MULHSU)
NODE_NAME_CASE(SLLW)
NODE_NAME_CASE(SRAW)
@@ -16097,7 +18412,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FP_ROUND_BF16)
NODE_NAME_CASE(FP_EXTEND_BF16)
NODE_NAME_CASE(FROUND)
- NODE_NAME_CASE(FPCLASS)
+ NODE_NAME_CASE(FCLASS)
NODE_NAME_CASE(FMAX)
NODE_NAME_CASE(FMIN)
NODE_NAME_CASE(READ_CYCLE_WIDE)
@@ -16159,6 +18474,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SREM_VL)
NODE_NAME_CASE(SRA_VL)
NODE_NAME_CASE(SRL_VL)
+ NODE_NAME_CASE(ROTL_VL)
+ NODE_NAME_CASE(ROTR_VL)
NODE_NAME_CASE(SUB_VL)
NODE_NAME_CASE(UDIV_VL)
NODE_NAME_CASE(UREM_VL)
@@ -16193,8 +18510,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CTLZ_VL)
NODE_NAME_CASE(CTTZ_VL)
NODE_NAME_CASE(CTPOP_VL)
- NODE_NAME_CASE(FMINNUM_VL)
- NODE_NAME_CASE(FMAXNUM_VL)
+ NODE_NAME_CASE(VFMIN_VL)
+ NODE_NAME_CASE(VFMAX_VL)
NODE_NAME_CASE(MULHS_VL)
NODE_NAME_CASE(MULHU_VL)
NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
@@ -16241,6 +18558,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VWADDU_W_VL)
NODE_NAME_CASE(VWSUB_W_VL)
NODE_NAME_CASE(VWSUBU_W_VL)
+ NODE_NAME_CASE(VWSLL_VL)
NODE_NAME_CASE(VFWMUL_VL)
NODE_NAME_CASE(VFWADD_VL)
NODE_NAME_CASE(VFWSUB_VL)
@@ -16314,6 +18632,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// TODO: Support fixed vectors up to XLen for P extension?
if (VT.isVector())
break;
+ if (VT == MVT::f16 && Subtarget.hasStdExtZhinxOrZhinxmin())
+ return std::make_pair(0U, &RISCV::GPRF16RegClass);
+ if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
+ return std::make_pair(0U, &RISCV::GPRF32RegClass);
+ if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
+ return std::make_pair(0U, &RISCV::GPRPF64RegClass);
return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
case 'f':
if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16)
@@ -16501,13 +18825,13 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return Res;
}
-unsigned
+InlineAsm::ConstraintCode
RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
// Currently only support length 1 constraints.
if (ConstraintCode.size() == 1) {
switch (ConstraintCode[0]) {
case 'A':
- return InlineAsm::Constraint_A;
+ return InlineAsm::ConstraintCode::A;
default:
break;
}
@@ -16517,10 +18841,10 @@ RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
}
void RISCVTargetLowering::LowerAsmOperandForConstraint(
- SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
// Currently only support length 1 constraints.
- if (Constraint.length() == 1) {
+ if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'I':
// Validate & create a 12-bit signed immediate operand.
@@ -16581,8 +18905,11 @@ Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
- if (Subtarget.hasStdExtZtso())
+ if (Subtarget.hasStdExtZtso()) {
+ if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
+ return Builder.CreateFence(Ord);
return nullptr;
+ }
if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
return Builder.CreateFence(AtomicOrdering::Acquire);
@@ -16666,6 +18993,22 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
+ // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
+ // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
+ // mask, as this produces better code than the LR/SC loop emitted by
+ // int_riscv_masked_atomicrmw_xchg.
+ if (AI->getOperation() == AtomicRMWInst::Xchg &&
+ isa<ConstantInt>(AI->getValOperand())) {
+ ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
+ if (CVal->isZero())
+ return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
+ Builder.CreateNot(Mask, "Inv_Mask"),
+ AI->getAlign(), Ord);
+ if (CVal->isMinusOne())
+ return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
+ AI->getAlign(), Ord);
+ }
+
unsigned XLen = Subtarget.getXLen();
Value *Ordering =
Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
@@ -16741,9 +19084,13 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
return Result;
}
-bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
EVT DataVT) const {
- return false;
+ // We have indexed loads for all legal index types. Indices are always
+ // zero extended
+ return Extend.getOpcode() == ISD::ZERO_EXTEND &&
+ isTypeLegal(Extend.getValueType()) &&
+ isTypeLegal(Extend.getOperand(0).getValueType());
}
bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
@@ -16999,8 +19346,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
unsigned *Fast) const {
if (!VT.isVector()) {
if (Fast)
- *Fast = Subtarget.enableUnalignedScalarMem();
- return Subtarget.enableUnalignedScalarMem();
+ *Fast = Subtarget.hasFastUnalignedAccess();
+ return Subtarget.hasFastUnalignedAccess();
}
// All vector implementations must support element alignment
@@ -17016,8 +19363,51 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
// misaligned accesses. TODO: Work through the codegen implications of
// allowing such accesses to be formed, and considered fast.
if (Fast)
- *Fast = Subtarget.enableUnalignedVectorMem();
- return Subtarget.enableUnalignedVectorMem();
+ *Fast = Subtarget.hasFastUnalignedAccess();
+ return Subtarget.hasFastUnalignedAccess();
+}
+
+
+EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
+ if (!Subtarget.hasVInstructions())
+ return MVT::Other;
+
+ if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
+ return MVT::Other;
+
+ // We use LMUL1 memory operations here for a non-obvious reason. Our caller
+ // has an expansion threshold, and we want the number of hardware memory
+ // operations to correspond roughly to that threshold. LMUL>1 operations
+ // are typically expanded linearly internally, and thus correspond to more
+ // than one actual memory operation. Note that store merging and load
+ // combining will typically form larger LMUL operations from the LMUL1
+ // operations emitted here, and that's okay because combining isn't
+ // introducing new memory operations; it's just merging existing ones.
+ const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
+ if (Op.size() < MinVLenInBytes)
+ // TODO: Figure out short memops. For the moment, do the default thing
+ // which ends up using scalar sequences.
+ return MVT::Other;
+
+ // Prefer i8 for non-zero memset as it allows us to avoid materializing
+ // a large scalar constant and instead use vmv.v.x/i to do the
+ // broadcast. For everything else, prefer ELenVT to minimize VL and thus
+ // maximize the chance we can encode the size in the vsetvli.
+ MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
+ MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
+
+ // Do we have sufficient alignment for our preferred VT? If not, revert
+ // to largest size allowed by our alignment criteria.
+ if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) {
+ Align RequiredAlign(PreferredVT.getStoreSize());
+ if (Op.isFixedDstAlign())
+ RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
+ if (Op.isMemcpy())
+ RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
+ PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
+ }
+ return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
}
bool RISCVTargetLowering::splitValueIntoRegisterParts(
@@ -17142,10 +19532,8 @@ static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
- return IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
- IRB.CreateCall(ThreadPointerFunc), Offset),
- IRB.getInt8PtrTy()->getPointerTo(0));
+ return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
+ IRB.CreateCall(ThreadPointerFunc), Offset);
}
Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
@@ -17203,7 +19591,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
if (!isLegalElementTypeForRVV(ScalarType))
return false;
- if (!Subtarget.enableUnalignedVectorMem() &&
+ if (!Subtarget.hasFastUnalignedAccess() &&
Alignment < ScalarType.getStoreSize())
return false;
@@ -17503,6 +19891,72 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
}
+bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
+ if (VT.isScalableVector())
+ return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
+ if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
+ return true;
+ return Subtarget.hasStdExtZbb() &&
+ (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
+}
+
+unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
+ ISD::CondCode Cond) const {
+ return isCtpopFast(VT) ? 0 : 1;
+}
+
+bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
+ // At the moment, the only scalable instruction GISel knows how to lower is
+ // ret with scalable argument.
+
+ if (Inst.getType()->isScalableTy())
+ return true;
+
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+ if (Inst.getOperand(i)->getType()->isScalableTy() &&
+ !isa<ReturnInst>(&Inst))
+ return true;
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
+ if (AI->getAllocatedType()->isScalableTy())
+ return true;
+ }
+
+ return false;
+}
+
+SDValue
+RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SDIV as SDIV
+
+ // Only perform this transform if short forward branch opt is supported.
+ if (!Subtarget.hasShortForwardBranchOpt())
+ return SDValue();
+ EVT VT = N->getValueType(0);
+ if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
+ return SDValue();
+
+ // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
+ if (Divisor.sgt(2048) || Divisor.slt(-2048))
+ return SDValue();
+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
+}
+
+bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
+ EVT VT, const APInt &AndMask) const {
+ if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
+ return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
+ return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
+}
+
+unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
+ return Subtarget.getMinimumJumpTableEntries();
+}
+
namespace llvm::RISCVVIntrinsicsTable {
#define GET_RISCVVIntrinsicsTable_IMPL
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 164ded95a1b5..41a2dc5771c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -22,9 +22,12 @@
#include <optional>
namespace llvm {
+class InstructionCost;
class RISCVSubtarget;
struct RISCVRegisterInfo;
+
namespace RISCVISD {
+// clang-format off
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
RET_GLUE,
@@ -54,9 +57,6 @@ enum NodeType : unsigned {
// Selected as PseudoAddTPRel. Used to emit a TP-relative relocation.
ADD_TPREL,
- // Load address.
- LA_TLS_GD,
-
// Multiply high for signedxunsigned.
MULHSU,
// RV64I shifts, directly matching the semantics of the named RISC-V
@@ -121,7 +121,7 @@ enum NodeType : unsigned {
// inserter.
FROUND,
- FPCLASS,
+ FCLASS,
// Floating point fmax and fmin matching the RISC-V instruction semantics.
FMAX, FMIN,
@@ -143,10 +143,11 @@ enum NodeType : unsigned {
SM3P0, SM3P1,
// Vector Extension
+ FIRST_VL_VECTOR_OP,
// VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand
// for the VL value to be used for the operation. The first operand is
// passthru operand.
- VMV_V_V_VL,
+ VMV_V_V_VL = FIRST_VL_VECTOR_OP,
// VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
// for the VL value to be used for the operation. The first operand is
// passthru operand.
@@ -166,15 +167,13 @@ enum NodeType : unsigned {
// expanded late to two scalar stores and a stride 0 vector load.
// The first operand is passthru operand.
SPLAT_VECTOR_SPLIT_I64_VL,
- // Read VLENB CSR
- READ_VLENB,
// Truncates a RVV integer vector by one power-of-two. Carries both an extra
// mask and VL operand.
TRUNCATE_VECTOR_VL,
// Matches the semantics of vslideup/vslidedown. The first operand is the
- // pass-thru operand, the second is the source vector, the third is the
- // XLenVT index (either constant or non-constant), the fourth is the mask
- // and the fifth the VL.
+ // pass-thru operand, the second is the source vector, the third is the XLenVT
+ // index (either constant or non-constant), the fourth is the mask, the fifth
+ // is the VL and the sixth is the policy.
VSLIDEUP_VL,
VSLIDEDOWN_VL,
// Matches the semantics of vslide1up/slide1down. The first operand is
@@ -232,6 +231,8 @@ enum NodeType : unsigned {
SREM_VL,
SRA_VL,
SRL_VL,
+ ROTL_VL,
+ ROTR_VL,
SUB_VL,
UDIV_VL,
UREM_VL,
@@ -258,8 +259,8 @@ enum NodeType : unsigned {
FSUB_VL,
FMUL_VL,
FDIV_VL,
- FMINNUM_VL,
- FMAXNUM_VL,
+ VFMIN_VL,
+ VFMAX_VL,
// Vector unary ops with a mask as a second operand and VL as a third operand.
FNEG_VL,
@@ -307,6 +308,7 @@ enum NodeType : unsigned {
VWADDU_W_VL,
VWSUB_W_VL,
VWSUBU_W_VL,
+ VWSLL_VL,
VFWMUL_VL,
VFWADD_VL,
@@ -360,6 +362,10 @@ enum NodeType : unsigned {
// vfirst.m with additional mask and VL operands.
VFIRST_VL,
+ LAST_VL_VECTOR_OP = VFIRST_VL,
+
+ // Read VLENB CSR
+ READ_VLENB,
// Reads value of CSR.
// The first operand is a chain pointer. The second specifies address of the
// required CSR. Two results are produced, the read value and the new chain
@@ -405,22 +411,19 @@ enum NodeType : unsigned {
STRICT_FSETCC_VL,
STRICT_FSETCCS_VL,
STRICT_VFROUND_NOEXCEPT_VL,
+ LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL,
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
- // Represents an AUIPC+L[WD] pair. Selected to PseudoLGA.
- LGA = ISD::FIRST_TARGET_MEMORY_OPCODE,
- // Load initial exec thread-local address.
- LA_TLS_IE,
-
- TH_LWD,
+ TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE,
TH_LWUD,
TH_LDD,
TH_SWD,
TH_SDD,
};
+// clang-format on
} // namespace RISCVISD
class RISCVTargetLowering : public TargetLowering {
@@ -464,7 +467,7 @@ public:
SmallVectorImpl<Use *> &Ops) const override;
bool shouldScalarizeBinop(SDValue VecOp) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
- int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
+ std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
@@ -487,6 +490,12 @@ public:
CallingConv::ID CC,
EVT VT) const override;
+ unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const override;
+
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
EVT VT) const override;
@@ -514,6 +523,13 @@ public:
shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const override;
+ /// Return the cost of LMUL for linear operations.
+ InstructionCost getLMULCost(MVT VT) const;
+
+ InstructionCost getVRGatherVVCost(MVT VT) const;
+ InstructionCost getVRGatherVICost(MVT VT) const;
+ InstructionCost getVSlideCost(MVT VT) const;
+
// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
@@ -552,13 +568,14 @@ public:
ConstraintType getConstraintType(StringRef Constraint) const override;
- unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
+ InlineAsm::ConstraintCode
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
@@ -592,6 +609,10 @@ public:
}
bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
+ bool isCtpopFast(EVT VT) const override;
+
+ unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override;
+
bool preferZeroCompareBranch() const override { return true; }
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
@@ -698,6 +719,9 @@ public:
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;
+ EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
+
bool splitValueIntoRegisterParts(
SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
@@ -720,7 +744,13 @@ public:
// The following equations have been reordered to prevent loss of precision
// when calculating fractional LMUL.
return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
- };
+ }
+
+ // Return inclusive (low, high) bounds on the value of VLMAX for the
+ // given scalable container type given known bounds on VLEN.
+ static std::pair<unsigned, unsigned>
+ computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget);
+
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul);
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index);
static unsigned getRegClassIDForVecVT(MVT VT);
@@ -730,7 +760,7 @@ public:
const RISCVRegisterInfo *TRI);
MVT getContainerForFixedLengthVector(MVT VT) const;
- bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
+ bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
bool isLegalElementTypeForRVV(EVT ScalarTy) const;
@@ -777,6 +807,8 @@ public:
unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
+ bool fallBackToDAGISel(const Instruction &Inst) const override;
+
bool lowerInterleavedLoad(LoadInst *LI,
ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices,
@@ -874,14 +906,12 @@ private:
SelectionDAG &DAG) const;
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc,
- bool HasMergeOp = false) const;
- SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc,
- unsigned VecOpc) const;
+ SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
- unsigned RISCVISDOpc) const;
+ SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
@@ -935,6 +965,14 @@ private:
/// For available scheduling models FDIV + two independent FMULs are much
/// faster than two FDIVs.
unsigned combineRepeatedFPDivisors() const override;
+
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
+
+ bool shouldFoldSelectWithSingleBitTest(EVT VT,
+ const APInt &AndMask) const override;
+
+ unsigned getMinimumJumpTableEntries() const override;
};
namespace RISCV {
@@ -954,6 +992,9 @@ bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
+
+ArrayRef<MCPhysReg> getArgGPRs();
+
} // end namespace RISCV
namespace RISCVVIntrinsicsTable {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
index 4b26c27bb4f8..b807abcc5681 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
@@ -8,8 +8,9 @@
// This file implements the machine function pass to insert read/write of CSR-s
// of the RISC-V instructions.
//
-// Currently the pass implements naive insertion of a write to vxrm before an
-// RVV fixed-point instruction.
+// Currently the pass implements:
+// -Writing and saving frm before an RVV floating-point instruction with a
+// static rounding mode and restores the value after.
//
//===----------------------------------------------------------------------===//
@@ -30,9 +31,7 @@ class RISCVInsertReadWriteCSR : public MachineFunctionPass {
public:
static char ID;
- RISCVInsertReadWriteCSR() : MachineFunctionPass(ID) {
- initializeRISCVInsertReadWriteCSRPass(*PassRegistry::getPassRegistry());
- }
+ RISCVInsertReadWriteCSR() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -56,60 +55,36 @@ char RISCVInsertReadWriteCSR::ID = 0;
INITIALIZE_PASS(RISCVInsertReadWriteCSR, DEBUG_TYPE,
RISCV_INSERT_READ_WRITE_CSR_NAME, false, false)
-// Returns the index to the rounding mode immediate value if any, otherwise the
-// function will return None.
-static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) {
- uint64_t TSFlags = MI.getDesc().TSFlags;
- if (!RISCVII::hasRoundModeOp(TSFlags))
- return std::nullopt;
-
- // The operand order
- // -------------------------------------
- // | n-1 (if any) | n-2 | n-3 | n-4 |
- // | policy | sew | vl | rm |
- // -------------------------------------
- return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3;
-}
-
-// This function inserts a write to vxrm when encountering an RVV fixed-point
-// instruction.
+// This function also swaps frm and restores it when encountering an RVV
+// floating point instruction with a static rounding mode.
bool RISCVInsertReadWriteCSR::emitWriteRoundingMode(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineInstr &MI : MBB) {
- if (auto RoundModeIdx = getRoundModeIdx(MI)) {
- if (RISCVII::usesVXRM(MI.getDesc().TSFlags)) {
- unsigned VXRMImm = MI.getOperand(*RoundModeIdx).getImm();
-
- Changed = true;
-
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
- .addImm(VXRMImm);
- MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
- /*IsImp*/ true));
- } else { // FRM
- unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm();
-
- // The value is a hint to this pass to not alter the frm value.
- if (FRMImm == RISCVFPRndMode::DYN)
- continue;
-
- Changed = true;
-
- // Save
- MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo();
- Register SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm),
- SavedFRM)
- .addImm(FRMImm);
- MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false,
- /*IsImp*/ true));
- // Restore
- MachineInstrBuilder MIB =
- BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM))
- .addReg(SavedFRM);
- MBB.insertAfter(MI, MIB);
- }
- }
+ int FRMIdx = RISCVII::getFRMOpNum(MI.getDesc());
+ if (FRMIdx < 0)
+ continue;
+
+ unsigned FRMImm = MI.getOperand(FRMIdx).getImm();
+
+ // The value is a hint to this pass to not alter the frm value.
+ if (FRMImm == RISCVFPRndMode::DYN)
+ continue;
+
+ Changed = true;
+
+ // Save
+ MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo();
+ Register SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm),
+ SavedFRM)
+ .addImm(FRMImm);
+ MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false,
+ /*IsImp*/ true));
+ // Restore
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM))
+ .addReg(SavedFRM);
+ MBB.insertAfter(MI, MIB);
}
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index f1ebe63cfa14..3400b24e0abb 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -67,16 +67,28 @@ static bool isVLPreservingConfig(const MachineInstr &MI) {
return RISCV::X0 == MI.getOperand(0).getReg();
}
-static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) {
- const RISCVVPseudosTable::PseudoInfo *RVV =
- RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
- if (!RVV)
- return 0;
- return RVV->BaseInstr;
+static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VFMV_S_F:
+ case RISCV::VFMV_V_F:
+ return true;
+ }
+}
+
+static bool isScalarExtractInstr(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VMV_X_S:
+ case RISCV::VFMV_F_S:
+ return true;
+ }
}
-static bool isScalarMoveInstr(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
+static bool isScalarInsertInstr(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VMV_S_X:
@@ -86,7 +98,7 @@ static bool isScalarMoveInstr(const MachineInstr &MI) {
}
static bool isScalarSplatInstr(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VMV_V_I:
@@ -97,7 +109,7 @@ static bool isScalarSplatInstr(const MachineInstr &MI) {
}
static bool isVSlideInstr(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VSLIDEDOWN_VX:
@@ -111,7 +123,7 @@ static bool isVSlideInstr(const MachineInstr &MI) {
/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
/// not a load or store which ignores SEW.
static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return std::nullopt;
case RISCV::VLE8_V:
@@ -137,6 +149,13 @@ static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
}
}
+static bool isNonZeroLoadImmediate(MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::ADDI &&
+ MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
+ MI.getOperand(1).getReg() == RISCV::X0 &&
+ MI.getOperand(2).getImm() != 0;
+}
+
/// Return true if this is an operation on mask registers. Note that
/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
static bool isMaskRegOp(const MachineInstr &MI) {
@@ -160,9 +179,13 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
// lanes are undefined.
return true;
- // If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands
- // are solely IMPLICIT_DEFS), the pass through lanes are undefined.
+ // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
+ // operands are solely IMPLICIT_DEFS, then the pass through lanes are
+ // undefined.
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+ if (UseMO.getReg() == RISCV::NoRegister)
+ return true;
+
if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
if (UseMI->isImplicitDef())
return true;
@@ -188,10 +211,14 @@ struct DemandedFields {
bool VLZeroness = false;
// What properties of SEW we need to preserve.
enum : uint8_t {
- SEWEqual = 2, // The exact value of SEW needs to be preserved.
- SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
+ SEWEqual = 3, // The exact value of SEW needs to be preserved.
+ SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
// than or equal to the original value.
- SEWNone = 0 // We don't need to preserve SEW at all.
+ SEWGreaterThanOrEqualAndLessThan64 =
+ 1, // SEW can be changed as long as it's greater
+ // than or equal to the original value, but must be less
+ // than 64.
+ SEWNone = 0 // We don't need to preserve SEW at all.
} SEW = SEWNone;
bool LMUL = false;
bool SEWLMULRatio = false;
@@ -243,6 +270,9 @@ struct DemandedFields {
case SEWGreaterThanOrEqual:
OS << "SEWGreaterThanOrEqual";
break;
+ case SEWGreaterThanOrEqualAndLessThan64:
+ OS << "SEWGreaterThanOrEqualAndLessThan64";
+ break;
case SEWNone:
OS << "SEWNone";
break;
@@ -270,13 +300,23 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
/// of instructions) which use only the Used subfields and properties.
static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
const DemandedFields &Used) {
- if (Used.SEW == DemandedFields::SEWEqual &&
- RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
- return false;
-
- if (Used.SEW == DemandedFields::SEWGreaterThanOrEqual &&
- RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
- return false;
+ switch (Used.SEW) {
+ case DemandedFields::SEWNone:
+ break;
+ case DemandedFields::SEWEqual:
+ if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
+ return false;
+ break;
+ case DemandedFields::SEWGreaterThanOrEqual:
+ if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
+ return false;
+ break;
+ case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
+ if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
+ RISCVVType::getSEW(NewVType) >= 64)
+ return false;
+ break;
+ }
if (Used.LMUL &&
RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
@@ -302,7 +342,8 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
/// Return the fields and properties demanded by the provided instruction.
DemandedFields getDemanded(const MachineInstr &MI,
- const MachineRegisterInfo *MRI) {
+ const MachineRegisterInfo *MRI,
+ const RISCVSubtarget *ST) {
// Warning: This function has to work on both the lowered (i.e. post
// emitVSETVLIs) and pre-lowering forms. The main implication of this is
// that it can't use the value of a SEW, VL, or Policy operand as they might
@@ -354,7 +395,7 @@ DemandedFields getDemanded(const MachineInstr &MI,
}
// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
- if (isScalarMoveInstr(MI)) {
+ if (isScalarInsertInstr(MI)) {
Res.LMUL = false;
Res.SEWLMULRatio = false;
Res.VLAny = false;
@@ -365,11 +406,23 @@ DemandedFields getDemanded(const MachineInstr &MI,
// tail lanes to either be the original value or -1. We are writing
// unknown bits to the lanes here.
if (hasUndefinedMergeOp(MI, *MRI)) {
- Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
+ if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
+ Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
+ else
+ Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
Res.TailPolicy = false;
}
}
+ // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
+ if (isScalarExtractInstr(MI)) {
+ assert(!RISCVII::hasVLOp(TSFlags));
+ Res.LMUL = false;
+ Res.SEWLMULRatio = false;
+ Res.TailPolicy = false;
+ Res.MaskPolicy = false;
+ }
+
return Res;
}
@@ -431,8 +484,22 @@ public:
return AVLImm;
}
+ void setAVL(VSETVLIInfo Info) {
+ assert(Info.isValid());
+ if (Info.isUnknown())
+ setUnknown();
+ else if (Info.hasAVLReg())
+ setAVLReg(Info.getAVLReg());
+ else {
+ assert(Info.hasAVLImm());
+ setAVLImm(Info.getAVLImm());
+ }
+ }
+
unsigned getSEW() const { return SEW; }
RISCVII::VLMUL getVLMUL() const { return VLMul; }
+ bool getTailAgnostic() const { return TailAgnostic; }
+ bool getMaskAgnostic() const { return MaskAgnostic; }
bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
if (hasAVLImm())
@@ -441,10 +508,7 @@ public:
if (getAVLReg() == RISCV::X0)
return true;
if (MachineInstr *MI = MRI.getVRegDef(getAVLReg());
- MI && MI->getOpcode() == RISCV::ADDI &&
- MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
- MI->getOperand(1).getReg() == RISCV::X0 &&
- MI->getOperand(2).getImm() != 0)
+ MI && isNonZeroLoadImmediate(*MI))
return true;
return false;
}
@@ -485,6 +549,8 @@ public:
MaskAgnostic = MA;
}
+ void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
+
unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown");
@@ -545,12 +611,6 @@ public:
if (SEWLMULRatioOnly)
return false;
- // If the instruction doesn't need an AVLReg and the SEW matches, consider
- // it compatible.
- if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
- if (SEW == Require.SEW)
- return true;
-
if (Used.VLAny && !hasSameAVL(Require))
return false;
@@ -661,10 +721,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
#endif
struct BlockData {
- // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
- // made by this block. Calculated in Phase 1.
- VSETVLIInfo Change;
-
// The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
// block. Calculated in Phase 2.
VSETVLIInfo Exit;
@@ -680,6 +736,7 @@ struct BlockData {
};
class RISCVInsertVSETVLI : public MachineFunctionPass {
+ const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
@@ -689,9 +746,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
public:
static char ID;
- RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
- initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
- }
+ RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -712,9 +767,10 @@ private:
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
- void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
- void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
- bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
+ void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
+ void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
+ bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
+ VSETVLIInfo &Info) const;
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
void doLocalPostpass(MachineBasicBlock &MBB);
@@ -729,6 +785,25 @@ char RISCVInsertVSETVLI::ID = 0;
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)
+// Return a VSETVLIInfo representing the changes made by this VSETVLI or
+// VSETIVLI instruction.
+static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
+ VSETVLIInfo NewInfo;
+ if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+ NewInfo.setAVLImm(MI.getOperand(1).getImm());
+ } else {
+ assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0);
+ Register AVLReg = MI.getOperand(1).getReg();
+ assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
+ "Can't handle X0, X0 vsetvli yet");
+ NewInfo.setAVLReg(AVLReg);
+ }
+ NewInfo.setVTYPE(MI.getOperand(2).getImm());
+
+ return NewInfo;
+}
+
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
@@ -779,6 +854,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
InstrInfo.setAVLReg(VLOp.getReg());
}
} else {
+ assert(isScalarExtractInstr(MI));
InstrInfo.setAVLReg(RISCV::NoRegister);
}
#ifndef NDEBUG
@@ -788,6 +864,21 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
#endif
InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
+ // AVL operand with the AVL of the defining vsetvli. We avoid general
+ // register AVLs to avoid extending live ranges without being sure we can
+ // kill the original source reg entirely.
+ if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) {
+ MachineInstr *DefMI = MRI->getVRegDef(InstrInfo.getAVLReg());
+ if (DefMI && isVectorConfigInstr(*DefMI)) {
+ VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
+ if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
+ (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) {
+ InstrInfo.setAVL(DefInstrInfo);
+ }
+ }
+ }
+
return InstrInfo;
}
@@ -798,25 +889,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
}
-// Return a VSETVLIInfo representing the changes made by this VSETVLI or
-// VSETIVLI instruction.
-static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
- VSETVLIInfo NewInfo;
- if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
- NewInfo.setAVLImm(MI.getOperand(1).getImm());
- } else {
- assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETVLIX0);
- Register AVLReg = MI.getOperand(1).getReg();
- assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
- "Can't handle X0, X0 vsetvli yet");
- NewInfo.setAVLReg(AVLReg);
- }
- NewInfo.setVTYPE(MI.getOperand(2).getImm());
-
- return NewInfo;
-}
-
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
@@ -875,10 +947,10 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
.addReg(RISCV::VL, RegState::Implicit);
return;
}
- // Otherwise use an AVL of 0 to avoid depending on previous vl.
+ // Otherwise use an AVL of 1 to avoid depending on previous vl.
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addImm(0)
+ .addImm(1)
.addImm(Info.encodeVTYPE());
return;
}
@@ -916,7 +988,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
return true;
- DemandedFields Used = getDemanded(MI, MRI);
+ DemandedFields Used = getDemanded(MI, MRI, ST);
// A slidedown/slideup with an *undefined* merge op can freely clobber
// elements not copied from the source vector (e.g. masked off, tail, or
@@ -944,7 +1016,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
Used.LMUL = false;
Used.SEWLMULRatio = false;
Used.VLAny = false;
- Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
+ if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
+ Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
+ else
+ Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
Used.TailPolicy = false;
}
@@ -969,67 +1044,82 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
return true;
}
-// Given an incoming state reaching MI, modifies that state so that it is minimally
-// compatible with MI. The resulting state is guaranteed to be semantically legal
-// for MI, but may not be the state requested by MI.
-void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
+// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
+// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
+// places.
+static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
+ DemandedFields &Demanded) {
+ VSETVLIInfo Info = NewInfo;
+
+ if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
+ !PrevInfo.isUnknown()) {
+ if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
+ PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
+ Info.setVLMul(*NewVLMul);
+ Demanded.LMUL = true;
+ }
+
+ return Info;
+}
+
+// Given an incoming state reaching MI, minimally modifies that state so that it
+// is compatible with MI. The resulting state is guaranteed to be semantically
+// legal for MI, but may not be the state requested by MI.
+void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
+ const MachineInstr &MI) const {
uint64_t TSFlags = MI.getDesc().TSFlags;
if (!RISCVII::hasSEWOp(TSFlags))
return;
const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
+ assert(NewInfo.isValid() && !NewInfo.isUnknown());
if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
return;
const VSETVLIInfo PrevInfo = Info;
- Info = NewInfo;
-
- if (!RISCVII::hasVLOp(TSFlags))
- return;
-
- // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
- // VL > 0. We can discard the user requested AVL and just use the last
- // one if we can prove it equally zero. This removes a vsetvli entirely
- // if the types match or allows use of cheaper avl preserving variant
- // if VLMAX doesn't change. If VLMAX might change, we couldn't use
- // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
- // prevent extending live range of an avl register operand.
+ if (!Info.isValid() || Info.isUnknown())
+ Info = NewInfo;
+
+ DemandedFields Demanded = getDemanded(MI, MRI, ST);
+ const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
+
+ // If MI only demands that VL has the same zeroness, we only need to set the
+ // AVL if the zeroness differs. This removes a vsetvli entirely if the types
+ // match or allows use of cheaper avl preserving variant if VLMAX doesn't
+ // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
+ // variant, so we avoid the transform to prevent extending live range of an
+ // avl register operand.
// TODO: We can probably relax this for immediates.
- if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
- PrevInfo.hasEquallyZeroAVL(Info, *MRI) &&
- Info.hasSameVLMAX(PrevInfo)) {
- if (PrevInfo.hasAVLImm())
- Info.setAVLImm(PrevInfo.getAVLImm());
- else
- Info.setAVLReg(PrevInfo.getAVLReg());
- return;
- }
-
- // If AVL is defined by a vsetvli with the same VLMAX, we can
- // replace the AVL operand with the AVL of the defining vsetvli.
- // We avoid general register AVLs to avoid extending live ranges
- // without being sure we can kill the original source reg entirely.
- if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
- return;
- MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
- if (!DefMI || !isVectorConfigInstr(*DefMI))
- return;
-
- VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
- if (DefInfo.hasSameVLMAX(Info) &&
- (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
- if (DefInfo.hasAVLImm())
- Info.setAVLImm(DefInfo.getAVLImm());
- else
- Info.setAVLReg(DefInfo.getAVLReg());
- return;
+ bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, *MRI) &&
+ IncomingInfo.hasSameVLMAX(PrevInfo);
+ if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
+ Info.setAVL(IncomingInfo);
+
+ Info.setVTYPE(
+ ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
+ .getVLMUL(),
+ ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
+ // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
+ // if needed.
+ (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
+ IncomingInfo.getTailAgnostic(),
+ (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
+ IncomingInfo.getMaskAgnostic());
+
+ // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
+ // the AVL.
+ if (Info.hasSEWLMULRatioOnly()) {
+ VSETVLIInfo RatiolessInfo = IncomingInfo;
+ RatiolessInfo.setAVL(Info);
+ Info = RatiolessInfo;
}
}
// Given a state with which we evaluated MI (see transferBefore above for why
// this might be different that the state MI requested), modify the state to
// reflect the changes MI might make.
-void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
+void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
+ const MachineInstr &MI) const {
if (isVectorConfigInstr(MI)) {
Info = getInfoForVSETVLI(MI);
return;
@@ -1048,18 +1138,18 @@ void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI
Info = VSETVLIInfo::getUnknown();
}
-bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
+bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
+ VSETVLIInfo &Info) const {
bool HadVectorOp = false;
- BlockData &BBInfo = BlockInfo[MBB.getNumber()];
- BBInfo.Change = BBInfo.Pred;
+ Info = BlockInfo[MBB.getNumber()].Pred;
for (const MachineInstr &MI : MBB) {
- transferBefore(BBInfo.Change, MI);
+ transferBefore(Info, MI);
if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
HadVectorOp = true;
- transferAfter(BBInfo.Change, MI);
+ transferAfter(Info, MI);
}
return HadVectorOp;
@@ -1098,8 +1188,8 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
// compatibility checks performed a blocks output state can change based on
// the input state. To cache, we'd have to add logic for finding
// never-compatible state changes.
- computeVLVTYPEChanges(MBB);
- VSETVLIInfo TmpStatus = BBInfo.Change;
+ VSETVLIInfo TmpStatus;
+ computeVLVTYPEChanges(MBB, TmpStatus);
// If the new exit value matches the old exit value, we don't need to revisit
// any blocks.
@@ -1205,9 +1295,20 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (RISCVII::hasVLOp(TSFlags)) {
MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isReg()) {
+ Register Reg = VLOp.getReg();
+ MachineInstr *VLOpDef = MRI->getVRegDef(Reg);
+
// Erase the AVL operand from the instruction.
VLOp.setReg(RISCV::NoRegister);
VLOp.setIsKill(false);
+
+ // If the AVL was an immediate > 31, then it would have been emitted
+ // as an ADDI. However, the ADDI might not have been used in the
+ // vsetvli, or a vsetvli might not have been emitted, so it may be
+ // dead now.
+ if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) &&
+ MRI->use_nodbg_empty(Reg))
+ VLOpDef->eraseFromParent();
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
/*isImp*/ true));
@@ -1251,36 +1352,12 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
}
}
-/// Return true if the VL value configured must be equal to the requested one.
-static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
- if (!Info.hasAVLImm())
- // VLMAX is always the same value.
- // TODO: Could extend to other registers by looking at the associated vreg
- // def placement.
- return RISCV::X0 == Info.getAVLReg();
-
- unsigned AVL = Info.getAVLImm();
- unsigned SEW = Info.getSEW();
- unsigned AVLInBits = AVL * SEW;
-
- unsigned LMul;
- bool Fractional;
- std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
-
- if (Fractional)
- return ST.getRealMinVLen() / LMul >= AVLInBits;
- return ST.getRealMinVLen() * LMul >= AVLInBits;
-}
-
/// Perform simple partial redundancy elimination of the VSETVLI instructions
/// we're about to insert by looking for cases where we can PRE from the
/// beginning of one block to the end of one of its predecessors. Specifically,
/// this is geared to catch the common case of a fixed length vsetvl in a single
/// block loop when it could execute once in the preheader instead.
void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
- const MachineFunction &MF = *MBB.getParent();
- const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
-
if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
return;
@@ -1308,9 +1385,21 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
if (UnavailablePred->succ_size() != 1)
return;
- // If VL can be less than AVL, then we can't reduce the frequency of exec.
- if (!hasFixedResult(AvailableInfo, ST))
- return;
+ // If the AVL value is a register (other than our VLMAX sentinel),
+ // we need to prove the value is available at the point we're going
+ // to insert the vsetvli at.
+ if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) {
+ MachineInstr *AVLDefMI = MRI->getVRegDef(AvailableInfo.getAVLReg());
+ if (!AVLDefMI)
+ return;
+ // This is an inline dominance check which covers the case of
+ // UnavailablePred being the preheader of a loop.
+ if (AVLDefMI->getParent() != UnavailablePred)
+ return;
+ for (auto &TermMI : UnavailablePred->terminators())
+ if (&TermMI == AVLDefMI)
+ return;
+ }
// Model the effect of changing the input state of the block MBB to
// AvailableInfo. We're looking for two issues here; one legality,
@@ -1370,9 +1459,16 @@ static void doUnion(DemandedFields &A, DemandedFields B) {
A.MaskPolicy |= B.MaskPolicy;
}
-static bool isNonZeroAVL(const MachineOperand &MO) {
- if (MO.isReg())
- return RISCV::X0 == MO.getReg();
+static bool isNonZeroAVL(const MachineOperand &MO,
+ const MachineRegisterInfo &MRI) {
+ if (MO.isReg()) {
+ if (MO.getReg() == RISCV::X0)
+ return true;
+ if (MachineInstr *MI = MRI.getVRegDef(MO.getReg());
+ MI && isNonZeroLoadImmediate(*MI))
+ return true;
+ return false;
+ }
assert(MO.isImm());
return 0 != MO.getImm();
}
@@ -1381,7 +1477,8 @@ static bool isNonZeroAVL(const MachineOperand &MO) {
// fields which would be observed.
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
const MachineInstr &MI,
- const DemandedFields &Used) {
+ const DemandedFields &Used,
+ const MachineRegisterInfo &MRI) {
// If the VL values aren't equal, return false if either a) the former is
// demanded, or b) we can't rewrite the former to be the later for
// implementation reasons.
@@ -1389,29 +1486,21 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
if (Used.VLAny)
return false;
- // TODO: Requires more care in the mutation...
- if (isVLPreservingConfig(PrevMI))
- return false;
-
// We don't bother to handle the equally zero case here as it's largely
// uninteresting.
- if (Used.VLZeroness &&
- (!isNonZeroAVL(MI.getOperand(1)) ||
- !isNonZeroAVL(PrevMI.getOperand(1))))
- return false;
+ if (Used.VLZeroness) {
+ if (isVLPreservingConfig(PrevMI))
+ return false;
+ if (!isNonZeroAVL(MI.getOperand(1), MRI) ||
+ !isNonZeroAVL(PrevMI.getOperand(1), MRI))
+ return false;
+ }
// TODO: Track whether the register is defined between
// PrevMI and MI.
if (MI.getOperand(1).isReg() &&
RISCV::X0 != MI.getOperand(1).getReg())
return false;
-
- // TODO: We need to change the result register to allow this rewrite
- // without the result forming a vl preserving vsetvli which is not
- // a correct state merge.
- if (PrevMI.getOperand(0).getReg() == RISCV::X0 &&
- MI.getOperand(1).isReg())
- return false;
}
if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
@@ -1433,7 +1522,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
if (!isVectorConfigInstr(MI)) {
- doUnion(Used, getDemanded(MI, MRI));
+ doUnion(Used, getDemanded(MI, MRI, ST));
continue;
}
@@ -1447,25 +1536,32 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
ToDelete.push_back(&MI);
// Leave NextMI unchanged
continue;
- } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
+ } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
+ MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
+ MI.getOperand(0).setIsDead(false);
+ Register OldVLReg;
+ if (MI.getOperand(1).isReg())
+ OldVLReg = MI.getOperand(1).getReg();
if (NextMI->getOperand(1).isImm())
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
+ if (OldVLReg) {
+ MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
+ if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
+ MRI->use_nodbg_empty(OldVLReg))
+ VLOpDef->eraseFromParent();
+ }
MI.setDesc(NextMI->getDesc());
}
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
- // Don't delete a vsetvli if its result might be used.
- Register NextVRefDef = NextMI->getOperand(0).getReg();
- if (NextVRefDef == RISCV::X0 ||
- (NextVRefDef.isVirtual() && MRI->use_nodbg_empty(NextVRefDef)))
- ToDelete.push_back(NextMI);
+ ToDelete.push_back(NextMI);
// fallthrough
}
}
NextMI = &MI;
- Used = getDemanded(MI, MRI);
+ Used = getDemanded(MI, MRI, ST);
}
for (auto *MI : ToDelete)
@@ -1488,13 +1584,13 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
- const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
- if (!ST.hasVInstructions())
+ ST = &MF.getSubtarget<RISCVSubtarget>();
+ if (!ST->hasVInstructions())
return false;
LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
- TII = ST.getInstrInfo();
+ TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
assert(BlockInfo.empty() && "Expect empty block infos");
@@ -1504,10 +1600,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Phase 1 - determine how VL/VTYPE are affected by the each block.
for (const MachineBasicBlock &MBB : MF) {
- HaveVectorOp |= computeVLVTYPEChanges(MBB);
+ VSETVLIInfo TmpStatus;
+ HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
// Initial exit state is whatever change we found in the block.
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
- BBInfo.Exit = BBInfo.Change;
+ BBInfo.Exit = TmpStatus;
LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
<< " is " << BBInfo.Exit << "\n");
@@ -1552,22 +1649,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
doLocalPostpass(MBB);
- // Once we're fully done rewriting all the instructions, do a final pass
- // through to check for VSETVLIs which write to an unused destination.
- // For the non X0, X0 variant, we can replace the destination register
- // with X0 to reduce register pressure. This is really a generic
- // optimization which can be applied to any dead def (TODO: generalize).
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETIVLI) {
- Register VRegDef = MI.getOperand(0).getReg();
- if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
- MI.getOperand(0).setReg(RISCV::X0);
- }
- }
- }
-
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
for (MachineBasicBlock &MBB : MF)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
new file mode 100644
index 000000000000..de2227f82192
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
@@ -0,0 +1,458 @@
+//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts writes to the VXRM CSR as needed by vector instructions.
+// Each instruction that uses VXRM carries an operand that contains its required
+// VXRM value. This pass tries to optimize placement to avoid redundant writes
+// to VXRM.
+//
+// This is done using 2 dataflow algorithms. The first is a forward data flow
+// to calculate where a VXRM value is available. The second is a backwards
+// dataflow to determine where a VXRM value is anticipated.
+//
+// Finally, we use the results of these two dataflows to insert VXRM writes
+// where a value is anticipated, but not available.
+//
+// FIXME: This pass does not split critical edges, so there can still be some
+// redundancy.
+//
+// FIXME: If we are willing to have writes that aren't always needed, we could
+// reduce the number of VXRM writes in some cases.
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <queue>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-insert-write-vxrm"
+#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
+
+namespace {
+
+class VXRMInfo {
+ uint8_t VXRMImm = 0;
+
+ enum : uint8_t {
+ Uninitialized,
+ Static,
+ Unknown,
+ } State = Uninitialized;
+
+public:
+ VXRMInfo() {}
+
+ static VXRMInfo getUnknown() {
+ VXRMInfo Info;
+ Info.setUnknown();
+ return Info;
+ }
+
+ bool isValid() const { return State != Uninitialized; }
+ void setUnknown() { State = Unknown; }
+ bool isUnknown() const { return State == Unknown; }
+
+ bool isStatic() const { return State == Static; }
+
+ void setVXRMImm(unsigned Imm) {
+ assert(Imm <= 3 && "Unexpected VXRM value");
+ VXRMImm = Imm;
+ State = Static;
+ }
+ unsigned getVXRMImm() const {
+ assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
+ return VXRMImm;
+ }
+
+ bool operator==(const VXRMInfo &Other) const {
+ // Uninitialized is only equal to another Uninitialized.
+ if (State != Other.State)
+ return false;
+
+ if (isStatic())
+ return VXRMImm == Other.VXRMImm;
+
+ assert((isValid() || isUnknown()) && "Unexpected state");
+ return true;
+ }
+
+ bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
+
+ // Calculate the VXRMInfo visible to a block assuming this and Other are
+ // both predecessors.
+ VXRMInfo intersect(const VXRMInfo &Other) const {
+ // If the new value isn't valid, ignore it.
+ if (!Other.isValid())
+ return *this;
+
+ // If this value isn't valid, this must be the first predecessor, use it.
+ if (!isValid())
+ return Other;
+
+ // If either is unknown, the result is unknown.
+ if (isUnknown() || Other.isUnknown())
+ return VXRMInfo::getUnknown();
+
+ // If we have an exact match, return this.
+ if (*this == Other)
+ return *this;
+
+ // Otherwise the result is unknown.
+ return VXRMInfo::getUnknown();
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Support for debugging, callable in GDB: V->dump()
+ LLVM_DUMP_METHOD void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << '{';
+ if (!isValid())
+ OS << "Uninitialized";
+ else if (isUnknown())
+ OS << "Unknown";
+ else
+ OS << getVXRMImm();
+ OS << '}';
+ }
+#endif
+};
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_ATTRIBUTE_USED
+inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
+ V.print(OS);
+ return OS;
+}
+#endif
+
+struct BlockData {
+ // Indicates if the block uses VXRM. Uninitialized means no use.
+ VXRMInfo VXRMUse;
+
+ // Indicates the VXRM output from the block. Unitialized means transparent.
+ VXRMInfo VXRMOut;
+
+ // Keeps track of the available VXRM value at the start of the basic bloc.
+ VXRMInfo AvailableIn;
+
+ // Keeps track of the available VXRM value at the end of the basic block.
+ VXRMInfo AvailableOut;
+
+ // Keeps track of what VXRM is anticipated at the start of the basic block.
+ VXRMInfo AnticipatedIn;
+
+ // Keeps track of what VXRM is anticipated at the end of the basic block.
+ VXRMInfo AnticipatedOut;
+
+ // Keeps track of whether the block is already in the queue.
+ bool InQueue;
+
+ BlockData() = default;
+};
+
+class RISCVInsertWriteVXRM : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+
+ std::vector<BlockData> BlockInfo;
+ std::queue<const MachineBasicBlock *> WorkList;
+
+public:
+ static char ID;
+
+ RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return RISCV_INSERT_WRITE_VXRM_NAME;
+ }
+
+private:
+ bool computeVXRMChanges(const MachineBasicBlock &MBB);
+ void computeAvailable(const MachineBasicBlock &MBB);
+ void computeAnticipated(const MachineBasicBlock &MBB);
+ void emitWriteVXRM(MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+char RISCVInsertWriteVXRM::ID = 0;
+
+INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
+ false, false)
+
+bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ bool NeedVXRMWrite = false;
+ for (const MachineInstr &MI : MBB) {
+ int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
+ if (VXRMIdx >= 0) {
+ unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
+
+ if (!BBInfo.VXRMUse.isValid())
+ BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
+
+ BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
+ NeedVXRMWrite = true;
+ continue;
+ }
+
+ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM)) {
+ if (!BBInfo.VXRMUse.isValid())
+ BBInfo.VXRMUse.setUnknown();
+
+ BBInfo.VXRMOut.setUnknown();
+ }
+ }
+
+ return NeedVXRMWrite;
+}
+
+void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ BBInfo.InQueue = false;
+
+ VXRMInfo Available;
+ if (MBB.pred_empty()) {
+ Available.setUnknown();
+ } else {
+ for (const MachineBasicBlock *P : MBB.predecessors())
+ Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
+ }
+
+ // If we don't have any valid available info, wait until we do.
+ if (!Available.isValid())
+ return;
+
+ if (Available != BBInfo.AvailableIn) {
+ BBInfo.AvailableIn = Available;
+ LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AvailableIn << "\n");
+ }
+
+ if (BBInfo.VXRMOut.isValid())
+ Available = BBInfo.VXRMOut;
+
+ if (Available == BBInfo.AvailableOut)
+ return;
+
+ BBInfo.AvailableOut = Available;
+ LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AvailableOut << "\n");
+
+ // Add the successors to the work list so that we can propagate.
+ for (MachineBasicBlock *S : MBB.successors()) {
+ if (!BlockInfo[S->getNumber()].InQueue) {
+ BlockInfo[S->getNumber()].InQueue = true;
+ WorkList.push(S);
+ }
+ }
+}
+
+void RISCVInsertWriteVXRM::computeAnticipated(const MachineBasicBlock &MBB) {
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ BBInfo.InQueue = false;
+
+ VXRMInfo Anticipated;
+ if (MBB.succ_empty()) {
+ Anticipated.setUnknown();
+ } else {
+ for (const MachineBasicBlock *S : MBB.successors())
+ Anticipated =
+ Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
+ }
+
+ // If we don't have any valid anticipated info, wait until we do.
+ if (!Anticipated.isValid())
+ return;
+
+ if (Anticipated != BBInfo.AnticipatedOut) {
+ BBInfo.AnticipatedOut = Anticipated;
+ LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AnticipatedOut << "\n");
+ }
+
+ // If this block reads VXRM, copy it.
+ if (BBInfo.VXRMUse.isValid())
+ Anticipated = BBInfo.VXRMUse;
+
+ if (Anticipated == BBInfo.AnticipatedIn)
+ return;
+
+ BBInfo.AnticipatedIn = Anticipated;
+ LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AnticipatedIn << "\n");
+
+ // Add the predecessors to the work list so that we can propagate.
+ for (MachineBasicBlock *P : MBB.predecessors()) {
+ if (!BlockInfo[P->getNumber()].InQueue) {
+ BlockInfo[P->getNumber()].InQueue = true;
+ WorkList.push(P);
+ }
+ }
+}
+
+void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
+ const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ VXRMInfo Info = BBInfo.AvailableIn;
+
+ // Flag to indicates we need to insert a VXRM write. We want to delay it as
+ // late as possible in this block.
+ bool PendingInsert = false;
+
+ // Insert VXRM write if anticipated and not available.
+ if (BBInfo.AnticipatedIn.isStatic()) {
+ // If this is the entry block and the value is anticipated, insert.
+ if (MBB.isEntryBlock()) {
+ PendingInsert = true;
+ } else {
+ // Search for any predecessors that wouldn't satisfy our requirement and
+ // insert a write VXRM if needed.
+ // NOTE: If one predecessor is able to provide the requirement, but
+ // another isn't, it means we have a critical edge. The better placement
+ // would be to split the critical edge.
+ for (MachineBasicBlock *P : MBB.predecessors()) {
+ const BlockData &PInfo = BlockInfo[P->getNumber()];
+ // If it's available out of the predecessor, then we're ok.
+ if (PInfo.AvailableOut.isStatic() &&
+ PInfo.AvailableOut.getVXRMImm() ==
+ BBInfo.AnticipatedIn.getVXRMImm())
+ continue;
+ // If the predecessor anticipates this value for all its succesors,
+ // then a write to VXRM would have already occured before this block is
+ // executed.
+ if (PInfo.AnticipatedOut.isStatic() &&
+ PInfo.AnticipatedOut.getVXRMImm() ==
+ BBInfo.AnticipatedIn.getVXRMImm())
+ continue;
+ PendingInsert = true;
+ break;
+ }
+ }
+
+ Info = BBInfo.AnticipatedIn;
+ }
+
+ for (MachineInstr &MI : MBB) {
+ int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
+ if (VXRMIdx >= 0) {
+ unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
+
+ if (PendingInsert || !Info.isStatic() ||
+ Info.getVXRMImm() != NewVXRMImm) {
+ assert((!PendingInsert ||
+ (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) &&
+ "Pending VXRM insertion mismatch");
+ LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
+ .addImm(NewVXRMImm);
+ PendingInsert = false;
+ }
+
+ MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
+ /*IsImp*/ true));
+ Info.setVXRMImm(NewVXRMImm);
+ continue;
+ }
+
+ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM))
+ Info.setUnknown();
+ }
+
+ // If all our successors anticipate a value, do the insert.
+ // NOTE: It's possible that not all predecessors of our successor provide the
+ // correct value. This can occur on critical edges. If we don't split the
+ // critical edge we'll also have a write vxrm in the succesor that is
+ // redundant with this one.
+ if (PendingInsert ||
+ (BBInfo.AnticipatedOut.isStatic() &&
+ (!Info.isStatic() ||
+ Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) {
+ assert((!PendingInsert ||
+ (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() &&
+ Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) &&
+ "Pending VXRM insertion mismatch");
+ LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB)
+ << " changing to " << BBInfo.AnticipatedOut << "\n");
+ BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(),
+ TII->get(RISCV::WriteVXRMImm))
+ .addImm(BBInfo.AnticipatedOut.getVXRMImm());
+ }
+}
+
+bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) {
+ // Skip if the vector extension is not enabled.
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ if (!ST.hasVInstructions())
+ return false;
+
+ TII = ST.getInstrInfo();
+
+ assert(BlockInfo.empty() && "Expect empty block infos");
+ BlockInfo.resize(MF.getNumBlockIDs());
+
+ // Phase 1 - collect block information.
+ bool NeedVXRMChange = false;
+ for (const MachineBasicBlock &MBB : MF)
+ NeedVXRMChange |= computeVXRMChanges(MBB);
+
+ if (!NeedVXRMChange) {
+ BlockInfo.clear();
+ return false;
+ }
+
+ // Phase 2 - Compute available VXRM using a forward walk.
+ for (const MachineBasicBlock &MBB : MF) {
+ WorkList.push(&MBB);
+ BlockInfo[MBB.getNumber()].InQueue = true;
+ }
+ while (!WorkList.empty()) {
+ const MachineBasicBlock &MBB = *WorkList.front();
+ WorkList.pop();
+ computeAvailable(MBB);
+ }
+
+ // Phase 3 - Compute anticipated VXRM using a backwards walk.
+ for (const MachineBasicBlock &MBB : llvm::reverse(MF)) {
+ WorkList.push(&MBB);
+ BlockInfo[MBB.getNumber()].InQueue = true;
+ }
+ while (!WorkList.empty()) {
+ const MachineBasicBlock &MBB = *WorkList.front();
+ WorkList.pop();
+ computeAnticipated(MBB);
+ }
+
+ // Phase 4 - Emit VXRM writes at the earliest place possible.
+ for (MachineBasicBlock &MBB : MF)
+ emitWriteVXRM(MBB);
+
+ BlockInfo.clear();
+
+ return true;
+}
+
+FunctionPass *llvm::createRISCVInsertWriteVXRMPass() {
+ return new RISCVInsertWriteVXRM();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 504952b6bd2f..e80ba26800a1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -241,8 +241,8 @@ class PseudoQuietFCMP<DAGOperand Ty>
}
// Pseudo load instructions.
-class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
- : Pseudo<(outs rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> {
+class PseudoLoad<string opcodestr>
+ : Pseudo<(outs GPR:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> {
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 0;
@@ -250,7 +250,7 @@ class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
let isAsmParserOnly = 1;
}
-class PseudoFloatLoad<string opcodestr, RegisterClass rdty = GPR>
+class PseudoFloatLoad<string opcodestr, RegisterClass rdty>
: Pseudo<(outs GPR:$tmp, rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr, $tmp"> {
let hasSideEffects = 0;
let mayLoad = 1;
@@ -270,20 +270,51 @@ class PseudoStore<string opcodestr, RegisterClass rsty = GPR>
}
// Instruction formats are listed in the order they appear in the RISC-V
-// instruction set manual (R, I, S, B, U, J) with sub-formats (e.g. RVInstR4,
-// RVInstRAtomic) sorted alphabetically.
+// instruction set manual (R, R4, I, S, B, U, J).
+
+// Common base class for R format instructions. Bits {31-25} should be set by
+// the subclasses.
+class RVInstRBase<bits<3> funct3, RISCVOpcode opcode, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = opcode.Value;
+}
class RVInstR<bits<7> funct7, bits<3> funct3, RISCVOpcode opcode, dag outs,
dag ins, string opcodestr, string argstr>
+ : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31-25} = funct7;
+}
+
+class RVInstRAtomic<bits<5> funct5, bit aq, bit rl, bits<3> funct3,
+ RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
+ string argstr>
+ : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31-27} = funct5;
+ let Inst{26} = aq;
+ let Inst{25} = rl;
+}
+
+class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins,
+ string opcodestr, string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
bits<5> rs2;
bits<5> rs1;
+ bits<3> frm;
bits<5> rd;
let Inst{31-25} = funct7;
let Inst{24-20} = rs2;
let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
+ let Inst{14-12} = frm;
let Inst{11-7} = rd;
let Inst{6-0} = opcode.Value;
}
@@ -323,83 +354,51 @@ class RVInstR4Frm<bits<2> funct2, RISCVOpcode opcode, dag outs, dag ins,
let Inst{6-0} = opcode.Value;
}
-class RVInstRAtomic<bits<5> funct5, bit aq, bit rl, bits<3> funct3,
- RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
- string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
- bits<5> rs2;
+// Common base class for I format instructions. Bits {31-20} should be set by
+// the subclasses.
+class RVInstIBase<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
bits<5> rs1;
bits<5> rd;
- let Inst{31-27} = funct5;
- let Inst{26} = aq;
- let Inst{25} = rl;
- let Inst{24-20} = rs2;
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
let Inst{6-0} = opcode.Value;
}
-class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins,
- string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
- bits<5> rs2;
- bits<5> rs1;
- bits<3> frm;
- bits<5> rd;
-
- let Inst{31-25} = funct7;
- let Inst{24-20} = rs2;
- let Inst{19-15} = rs1;
- let Inst{14-12} = frm;
- let Inst{11-7} = rd;
- let Inst{6-0} = opcode.Value;
-}
-
class RVInstI<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
bits<12> imm12;
- bits<5> rs1;
- bits<5> rd;
let Inst{31-20} = imm12;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = opcode.Value;
}
class RVInstIShift<bits<5> imm11_7, bits<3> funct3, RISCVOpcode opcode,
dag outs, dag ins, string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
bits<6> shamt;
- bits<5> rs1;
- bits<5> rd;
let Inst{31-27} = imm11_7;
let Inst{26} = 0;
let Inst{25-20} = shamt;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = opcode.Value;
}
class RVInstIShiftW<bits<7> imm11_5, bits<3> funct3, RISCVOpcode opcode,
dag outs, dag ins, string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
bits<5> shamt;
- bits<5> rs1;
- bits<5> rd;
let Inst{31-25} = imm11_5;
let Inst{24-20} = shamt;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = opcode.Value;
+}
+
+class RVInstIUnary<bits<12> imm12, bits<3> funct3, RISCVOpcode opcode,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31-20} = imm12;
}
class RVInstS<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td
new file mode 100644
index 000000000000..ede8c9809833
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -0,0 +1,26 @@
+//===-- RISCVInstrGISel.td - RISC-V GISel target pseudos ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+// RISC-V GlobalISel target pseudo instruction definitions. This is kept
+// separately from the other tablegen files for organizational purposes, but
+// share the same infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+class RISCVGenericInstruction : GenericInstruction {
+ let Namespace = "RISCV";
+}
+
+// Pseudo equivalent to a RISCVISD::FCLASS.
+def G_FCLASS : RISCVGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = false;
+}
+def : GINodeEquiv<G_FCLASS, riscv_fclass>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index c1065f73000f..1dcff7eb563e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
@@ -27,6 +28,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/TargetRegistry.h"
@@ -293,6 +295,112 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
return false;
}
+void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, MCRegister DstReg,
+ MCRegister SrcReg, bool KillSrc,
+ unsigned Opc, unsigned NF) const {
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ RISCVII::VLMUL LMul;
+ unsigned SubRegIdx;
+ unsigned VVOpc, VIOpc;
+ switch (Opc) {
+ default:
+ llvm_unreachable("Impossible LMUL for vector register copy.");
+ case RISCV::VMV1R_V:
+ LMul = RISCVII::LMUL_1;
+ SubRegIdx = RISCV::sub_vrm1_0;
+ VVOpc = RISCV::PseudoVMV_V_V_M1;
+ VIOpc = RISCV::PseudoVMV_V_I_M1;
+ break;
+ case RISCV::VMV2R_V:
+ LMul = RISCVII::LMUL_2;
+ SubRegIdx = RISCV::sub_vrm2_0;
+ VVOpc = RISCV::PseudoVMV_V_V_M2;
+ VIOpc = RISCV::PseudoVMV_V_I_M2;
+ break;
+ case RISCV::VMV4R_V:
+ LMul = RISCVII::LMUL_4;
+ SubRegIdx = RISCV::sub_vrm4_0;
+ VVOpc = RISCV::PseudoVMV_V_V_M4;
+ VIOpc = RISCV::PseudoVMV_V_I_M4;
+ break;
+ case RISCV::VMV8R_V:
+ assert(NF == 1);
+ LMul = RISCVII::LMUL_8;
+ SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0.
+ VVOpc = RISCV::PseudoVMV_V_V_M8;
+ VIOpc = RISCV::PseudoVMV_V_I_M8;
+ break;
+ }
+
+ bool UseVMV_V_V = false;
+ bool UseVMV_V_I = false;
+ MachineBasicBlock::const_iterator DefMBBI;
+ if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
+ UseVMV_V_V = true;
+ Opc = VVOpc;
+
+ if (DefMBBI->getOpcode() == VIOpc) {
+ UseVMV_V_I = true;
+ Opc = VIOpc;
+ }
+ }
+
+ if (NF == 1) {
+ auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
+ if (UseVMV_V_V)
+ MIB.addReg(DstReg, RegState::Undef);
+ if (UseVMV_V_I)
+ MIB = MIB.add(DefMBBI->getOperand(2));
+ else
+ MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (UseVMV_V_V) {
+ const MCInstrDesc &Desc = DefMBBI->getDesc();
+ MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
+ MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
+ MIB.addImm(0); // tu, mu
+ MIB.addReg(RISCV::VL, RegState::Implicit);
+ MIB.addReg(RISCV::VTYPE, RegState::Implicit);
+ }
+ return;
+ }
+
+ int I = 0, End = NF, Incr = 1;
+ unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
+ unsigned DstEncoding = TRI->getEncodingValue(DstReg);
+ unsigned LMulVal;
+ bool Fractional;
+ std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul);
+ assert(!Fractional && "It is impossible be fractional lmul here.");
+ if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) {
+ I = NF - 1;
+ End = -1;
+ Incr = -1;
+ }
+
+ for (; I != End; I += Incr) {
+ auto MIB =
+ BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I));
+ if (UseVMV_V_V)
+ MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), RegState::Undef);
+ if (UseVMV_V_I)
+ MIB = MIB.add(DefMBBI->getOperand(2));
+ else
+ MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
+ getKillRegState(KillSrc));
+ if (UseVMV_V_V) {
+ const MCInstrDesc &Desc = DefMBBI->getDesc();
+ MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
+ MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
+ MIB.addImm(0); // tu, mu
+ MIB.addReg(RISCV::VL, RegState::Implicit);
+ MIB.addReg(RISCV::VTYPE, RegState::Implicit);
+ }
+ }
+}
+
void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg,
@@ -329,195 +437,159 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // FPR->FPR copies and VR->VR copies.
- unsigned Opc;
- bool IsScalableVector = true;
- unsigned NF = 1;
- RISCVII::VLMUL LMul = RISCVII::LMUL_1;
- unsigned SubRegIdx = RISCV::sub_vrm1_0;
if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
+ unsigned Opc;
if (STI.hasStdExtZfh()) {
Opc = RISCV::FSGNJ_H;
} else {
- assert(STI.hasStdExtF() && STI.hasStdExtZfhmin() &&
+ assert(STI.hasStdExtF() &&
+ (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
"Unexpected extensions");
- // Zfhmin subset doesn't have FSGNJ_H, replaces FSGNJ_H with FSGNJ_S.
+ // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
&RISCV::FPR32RegClass);
SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
&RISCV::FPR32RegClass);
Opc = RISCV::FSGNJ_S;
}
- IsScalableVector = false;
- } else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::FSGNJ_S;
- IsScalableVector = false;
- } else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::FSGNJ_D;
- IsScalableVector = false;
- } else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV2R_V;
- LMul = RISCVII::LMUL_2;
- } else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV4R_V;
- LMul = RISCVII::LMUL_4;
- } else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV8R_V;
- LMul = RISCVII::LMUL_8;
- } else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 2;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV2R_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- NF = 2;
- LMul = RISCVII::LMUL_2;
- } else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV4R_V;
- SubRegIdx = RISCV::sub_vrm4_0;
- NF = 2;
- LMul = RISCVII::LMUL_4;
- } else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 3;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV2R_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- NF = 3;
- LMul = RISCVII::LMUL_2;
- } else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 4;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV2R_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- NF = 4;
- LMul = RISCVII::LMUL_2;
- } else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 5;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 6;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 7;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 8;
- LMul = RISCVII::LMUL_1;
- } else {
- llvm_unreachable("Impossible reg-to-reg copy");
+ BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- if (IsScalableVector) {
- bool UseVMV_V_V = false;
- bool UseVMV_V_I = false;
- MachineBasicBlock::const_iterator DefMBBI;
- if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
- UseVMV_V_V = true;
- // We only need to handle LMUL = 1/2/4/8 here because we only define
- // vector register classes for LMUL = 1/2/4/8.
- unsigned VIOpc;
- switch (LMul) {
- default:
- llvm_unreachable("Impossible LMUL for vector register copy.");
- case RISCVII::LMUL_1:
- Opc = RISCV::PseudoVMV_V_V_M1;
- VIOpc = RISCV::PseudoVMV_V_I_M1;
- break;
- case RISCVII::LMUL_2:
- Opc = RISCV::PseudoVMV_V_V_M2;
- VIOpc = RISCV::PseudoVMV_V_I_M2;
- break;
- case RISCVII::LMUL_4:
- Opc = RISCV::PseudoVMV_V_V_M4;
- VIOpc = RISCV::PseudoVMV_V_I_M4;
- break;
- case RISCVII::LMUL_8:
- Opc = RISCV::PseudoVMV_V_V_M8;
- VIOpc = RISCV::PseudoVMV_V_I_M8;
- break;
- }
+ if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- if (DefMBBI->getOpcode() == VIOpc) {
- UseVMV_V_I = true;
- Opc = VIOpc;
- }
- }
+ if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- if (NF == 1) {
- auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
- if (UseVMV_V_V)
- MIB.addReg(DstReg, RegState::Undef);
- if (UseVMV_V_I)
- MIB = MIB.add(DefMBBI->getOperand(2));
- else
- MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (UseVMV_V_V) {
- const MCInstrDesc &Desc = DefMBBI->getDesc();
- MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
- MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
- MIB.addImm(0); // tu, mu
- MIB.addReg(RISCV::VL, RegState::Implicit);
- MIB.addReg(RISCV::VTYPE, RegState::Implicit);
- }
- } else {
- int I = 0, End = NF, Incr = 1;
- unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
- unsigned DstEncoding = TRI->getEncodingValue(DstReg);
- unsigned LMulVal;
- bool Fractional;
- std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul);
- assert(!Fractional && "It is impossible be fractional lmul here.");
- if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) {
- I = NF - 1;
- End = -1;
- Incr = -1;
- }
+ if (RISCV::FPR32RegClass.contains(DstReg) &&
+ RISCV::GPRRegClass.contains(SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- for (; I != End; I += Incr) {
- auto MIB = BuildMI(MBB, MBBI, DL, get(Opc),
- TRI->getSubReg(DstReg, SubRegIdx + I));
- if (UseVMV_V_V)
- MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I),
- RegState::Undef);
- if (UseVMV_V_I)
- MIB = MIB.add(DefMBBI->getOperand(2));
- else
- MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
- getKillRegState(KillSrc));
- if (UseVMV_V_V) {
- const MCInstrDesc &Desc = DefMBBI->getDesc();
- MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
- MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
- MIB.addImm(0); // tu, mu
- MIB.addReg(RISCV::VL, RegState::Implicit);
- MIB.addReg(RISCV::VTYPE, RegState::Implicit);
- }
- }
- }
- } else {
- BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
- .addReg(SrcReg, getKillRegState(KillSrc))
+ if (RISCV::GPRRegClass.contains(DstReg) &&
+ RISCV::FPR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (RISCV::FPR64RegClass.contains(DstReg) &&
+ RISCV::GPRRegClass.contains(SrcReg)) {
+ assert(STI.getXLen() == 64 && "Unexpected GPR size");
+ BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (RISCV::GPRRegClass.contains(DstReg) &&
+ RISCV::FPR64RegClass.contains(SrcReg)) {
+ assert(STI.getXLen() == 64 && "Unexpected GPR size");
+ BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ // VR->VR copies.
+ if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V);
+ return;
+ }
+
+ if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V);
+ return;
+ }
+
+ if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V);
+ return;
+ }
+
+ if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV8R_V);
+ return;
+ }
+
+ if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/2);
+ return;
+ }
+
+ if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V,
+ /*NF=*/2);
+ return;
+ }
+
+ if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V,
+ /*NF=*/2);
+ return;
+ }
+
+ if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/3);
+ return;
+ }
+
+ if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V,
+ /*NF=*/3);
+ return;
+ }
+
+ if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/4);
+ return;
+ }
+
+ if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V,
+ /*NF=*/4);
+ return;
+ }
+
+ if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/5);
+ return;
+ }
+
+ if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/6);
+ return;
+ }
+
+ if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/7);
+ return;
+ }
+
+ if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/8);
+ return;
}
+
+ llvm_unreachable("Impossible reg-to-reg copy");
}
void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -526,10 +598,6 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
Register VReg) const {
- DebugLoc DL;
- if (I != MBB.end())
- DL = I->getDebugLoc();
-
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -590,7 +658,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
- BuildMI(MBB, I, DL, get(Opcode))
+ BuildMI(MBB, I, DebugLoc(), get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addMemOperand(MMO);
@@ -599,7 +667,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
- BuildMI(MBB, I, DL, get(Opcode))
+ BuildMI(MBB, I, DebugLoc(), get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addImm(0)
@@ -613,10 +681,6 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
Register VReg) const {
- DebugLoc DL;
- if (I != MBB.end())
- DL = I->getDebugLoc();
-
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -677,7 +741,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
- BuildMI(MBB, I, DL, get(Opcode), DstReg)
+ BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
.addFrameIndex(FI)
.addMemOperand(MMO);
} else {
@@ -685,7 +749,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
- BuildMI(MBB, I, DL, get(Opcode), DstReg)
+ BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
@@ -704,8 +768,7 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
if (MF.getDataLayout().isBigEndian())
return nullptr;
- // Fold load from stack followed by sext.w into lw.
- // TODO: Fold with sext.b, sext.h, zext.b, zext.h, zext.w?
+ // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
if (Ops.size() != 1 || Ops[0] != 1)
return nullptr;
@@ -753,38 +816,50 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
- MachineInstr::MIFlag Flag) const {
+ MachineInstr::MIFlag Flag, bool DstRenamable,
+ bool DstIsDead) const {
Register SrcReg = RISCV::X0;
if (!STI.is64Bit() && !isInt<32>(Val))
report_fatal_error("Should only materialize 32-bit constants for RV32");
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
assert(!Seq.empty());
+ bool SrcRenamable = false;
+ unsigned Num = 0;
+
for (const RISCVMatInt::Inst &Inst : Seq) {
+ bool LastItem = ++Num == Seq.size();
+ unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) |
+ getRenamableRegState(DstRenamable);
+ unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) |
+ getRenamableRegState(SrcRenamable);
switch (Inst.getOpndKind()) {
case RISCVMatInt::Imm:
- BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
+ BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
+ .addReg(DstReg, RegState::Define | DstRegState)
.addImm(Inst.getImm())
.setMIFlag(Flag);
break;
case RISCVMatInt::RegX0:
- BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
- .addReg(SrcReg, RegState::Kill)
+ BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
+ .addReg(DstReg, RegState::Define | DstRegState)
+ .addReg(SrcReg, SrcRegState)
.addReg(RISCV::X0)
.setMIFlag(Flag);
break;
case RISCVMatInt::RegReg:
- BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addReg(SrcReg, RegState::Kill)
+ BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
+ .addReg(DstReg, RegState::Define | DstRegState)
+ .addReg(SrcReg, SrcRegState)
+ .addReg(SrcReg, SrcRegState)
.setMIFlag(Flag);
break;
case RISCVMatInt::RegImm:
- BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
- .addReg(SrcReg, RegState::Kill)
+ BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
+ .addReg(DstReg, RegState::Define | DstRegState)
+ .addReg(SrcReg, SrcRegState)
.addImm(Inst.getImm())
.setMIFlag(Flag);
break;
@@ -792,6 +867,7 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
// Only the first instruction has X0 as its source.
SrcReg = DstReg;
+ SrcRenamable = DstRenamable;
}
}
@@ -829,25 +905,29 @@ static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
Cond.push_back(LastInst.getOperand(1));
}
-const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
+unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unknown condition code!");
case RISCVCC::COND_EQ:
- return get(RISCV::BEQ);
+ return RISCV::BEQ;
case RISCVCC::COND_NE:
- return get(RISCV::BNE);
+ return RISCV::BNE;
case RISCVCC::COND_LT:
- return get(RISCV::BLT);
+ return RISCV::BLT;
case RISCVCC::COND_GE:
- return get(RISCV::BGE);
+ return RISCV::BGE;
case RISCVCC::COND_LTU:
- return get(RISCV::BLTU);
+ return RISCV::BLTU;
case RISCVCC::COND_GEU:
- return get(RISCV::BGEU);
+ return RISCV::BGEU;
}
}
+const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
+ return get(RISCVCC::getBrCond(CC));
+}
+
RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
switch (CC) {
default:
@@ -907,6 +987,10 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
if (I->getDesc().isIndirectBranch())
return true;
+ // We can't handle Generic branch opcodes from Global ISel.
+ if (I->isPreISelOpcode())
+ return true;
+
// We can't handle blocks with more than 2 terminators.
if (NumTerminators > 2)
return true;
@@ -1079,6 +1163,125 @@ bool RISCVInstrInfo::reverseBranchCondition(
return false;
}
+bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ MachineBasicBlock *TBB, *FBB;
+ SmallVector<MachineOperand, 3> Cond;
+ if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false))
+ return false;
+ (void)FBB;
+
+ RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
+ assert(CC != RISCVCC::COND_INVALID);
+
+ if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)
+ return false;
+
+ // For two constants C0 and C1 from
+ // ```
+ // li Y, C0
+ // li Z, C1
+ // ```
+ // 1. if C1 = C0 + 1
+ // we can turn:
+ // (a) blt Y, X -> bge X, Z
+ // (b) bge Y, X -> blt X, Z
+ //
+ // 2. if C1 = C0 - 1
+ // we can turn:
+ // (a) blt X, Y -> bge Z, X
+ // (b) bge X, Y -> blt Z, X
+ //
+ // To make sure this optimization is really beneficial, we only
+ // optimize for cases where Y had only one use (i.e. only used by the branch).
+
+ // Right now we only care about LI (i.e. ADDI x0, imm)
+ auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
+ if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).getReg() == RISCV::X0) {
+ Imm = MI->getOperand(2).getImm();
+ return true;
+ }
+ return false;
+ };
+ // Either a load from immediate instruction or X0.
+ auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
+ if (!Op.isReg())
+ return false;
+ Register Reg = Op.getReg();
+ if (Reg == RISCV::X0) {
+ Imm = 0;
+ return true;
+ }
+ if (!Reg.isVirtual())
+ return false;
+ return isLoadImm(MRI.getVRegDef(Op.getReg()), Imm);
+ };
+
+ MachineOperand &LHS = MI.getOperand(0);
+ MachineOperand &RHS = MI.getOperand(1);
+ // Try to find the register for constant Z; return
+ // invalid register otherwise.
+ auto searchConst = [&](int64_t C1) -> Register {
+ MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
+ auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
+ int64_t Imm;
+ return isLoadImm(&I, Imm) && Imm == C1;
+ });
+ if (DefC1 != E)
+ return DefC1->getOperand(0).getReg();
+
+ return Register();
+ };
+
+ bool Modify = false;
+ int64_t C0;
+ if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
+ // Might be case 1.
+ // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
+ // to worry about unsigned overflow here)
+ if (C0 < INT64_MAX)
+ if (Register RegZ = searchConst(C0 + 1)) {
+ reverseBranchCondition(Cond);
+ Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);
+ Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
+ // We might extend the live range of Z, clear its kill flag to
+ // account for this.
+ MRI.clearKillFlags(RegZ);
+ Modify = true;
+ }
+ } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) {
+ // Might be case 2.
+ // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
+ // when C0 is zero.
+ if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)
+ if (Register RegZ = searchConst(C0 - 1)) {
+ reverseBranchCondition(Cond);
+ Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
+ Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);
+ // We might extend the live range of Z, clear its kill flag to
+ // account for this.
+ MRI.clearKillFlags(RegZ);
+ Modify = true;
+ }
+ }
+
+ if (!Modify)
+ return false;
+
+ // Build the new branch and remove the old one.
+ BuildMI(*MBB, MI, MI.getDebugLoc(),
+ getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm())))
+ .add(Cond[1])
+ .add(Cond[2])
+ .addMBB(TBB);
+ MI.eraseFromParent();
+
+ return true;
+}
+
MachineBasicBlock *
RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
assert(MI.getDesc().isBranch() && "Unexpected opcode!");
@@ -1118,12 +1321,31 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
switch (Opcode) {
case RISCV::ADD: return RISCV::PseudoCCADD; break;
case RISCV::SUB: return RISCV::PseudoCCSUB; break;
+ case RISCV::SLL: return RISCV::PseudoCCSLL; break;
+ case RISCV::SRL: return RISCV::PseudoCCSRL; break;
+ case RISCV::SRA: return RISCV::PseudoCCSRA; break;
case RISCV::AND: return RISCV::PseudoCCAND; break;
case RISCV::OR: return RISCV::PseudoCCOR; break;
case RISCV::XOR: return RISCV::PseudoCCXOR; break;
+ case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
+ case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
+ case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
+ case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
+ case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
+ case RISCV::ORI: return RISCV::PseudoCCORI; break;
+ case RISCV::XORI: return RISCV::PseudoCCXORI; break;
+
case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
+ case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
+ case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
+ case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;
+
+ case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
+ case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
+ case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
+ case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
}
return RISCV::INSTRUCTION_LIST_END;
@@ -1144,6 +1366,10 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
// Check if MI can be predicated and folded into the CCMOV.
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
return nullptr;
+ // Don't predicate li idiom.
+ if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).getReg() == RISCV::X0)
+ return nullptr;
// Check if MI has any other defs or physreg uses.
for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
// Reject frame index operands, PEI can't handle the predicated pseudos.
@@ -1290,7 +1516,20 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (isCompressibleInst(MI, STI))
return 2;
}
- return get(Opcode).getSize();
+
+ switch (Opcode) {
+ case TargetOpcode::STACKMAP:
+ // The upper bound for a stackmap intrinsic is the full length of its shadow
+ return StackMapOpers(&MI).getNumPatchBytes();
+ case TargetOpcode::PATCHPOINT:
+ // The size of the patchpoint intrinsic is the number of bytes requested
+ return PatchPointOpers(&MI).getNumPatchBytes();
+ case TargetOpcode::STATEPOINT:
+ // The size of the statepoint intrinsic is the number of bytes requested
+ return StatepointOpers(&MI).getNumPatchBytes();
+ default:
+ return get(Opcode).getSize();
+ }
}
unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
@@ -1372,15 +1611,6 @@ MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
return ForceMachineCombinerStrategy;
}
-void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
- MachineInstr &OldMI2,
- MachineInstr &NewMI1,
- MachineInstr &NewMI2) const {
- uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
- NewMI1.setFlags(IntersectedFlags);
- NewMI2.setFlags(IntersectedFlags);
-}
-
void RISCVInstrInfo::finalizeInsInstrs(
MachineInstr &Root, MachineCombinerPattern &P,
SmallVectorImpl<MachineInstr *> &InsInstrs) const {
@@ -1896,8 +2126,174 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
-// Return true if get the base operand, byte offset of an instruction and the
-// memory width. Width is the size of memory that is being loaded/stored.
+bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const {
+ switch (MemI.getOpcode()) {
+ default:
+ return false;
+ case RISCV::LB:
+ case RISCV::LBU:
+ case RISCV::LH:
+ case RISCV::LHU:
+ case RISCV::LW:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD:
+ break;
+ }
+
+ if (MemI.getOperand(0).getReg() == Reg)
+ return false;
+
+ if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
+ !AddrI.getOperand(2).isImm())
+ return false;
+
+ int64_t OldOffset = MemI.getOperand(2).getImm();
+ int64_t Disp = AddrI.getOperand(2).getImm();
+ int64_t NewOffset = OldOffset + Disp;
+ if (!STI.is64Bit())
+ NewOffset = SignExtend64<32>(NewOffset);
+
+ if (!isInt<12>(NewOffset))
+ return false;
+
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = 0;
+ AM.Scale = 0;
+ AM.Displacement = NewOffset;
+ AM.Form = ExtAddrMode::Formula::Basic;
+ return true;
+}
+
+MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const {
+
+ const DebugLoc &DL = MemI.getDebugLoc();
+ MachineBasicBlock &MBB = *MemI.getParent();
+
+ assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+ "Addressing mode not supported for folding");
+
+ return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+}
+
+bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
+ if (!LdSt.mayLoadOrStore())
+ return false;
+
+ // Conservatively, only handle scalar loads/stores for now.
+ switch (LdSt.getOpcode()) {
+ case RISCV::LB:
+ case RISCV::LBU:
+ case RISCV::SB:
+ case RISCV::LH:
+ case RISCV::LHU:
+ case RISCV::FLH:
+ case RISCV::SH:
+ case RISCV::FSH:
+ case RISCV::LW:
+ case RISCV::LWU:
+ case RISCV::FLW:
+ case RISCV::SW:
+ case RISCV::FSW:
+ case RISCV::LD:
+ case RISCV::FLD:
+ case RISCV::SD:
+ case RISCV::FSD:
+ break;
+ default:
+ return false;
+ }
+ const MachineOperand *BaseOp;
+ OffsetIsScalable = false;
+ if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
+ return false;
+ BaseOps.push_back(BaseOp);
+ return true;
+}
+
+// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
+// helper?
+static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
+ ArrayRef<const MachineOperand *> BaseOps1,
+ const MachineInstr &MI2,
+ ArrayRef<const MachineOperand *> BaseOps2) {
+ // Only examine the first "base" operand of each instruction, on the
+ // assumption that it represents the real base address of the memory access.
+ // Other operands are typically offsets or indices from this base address.
+ if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
+ return true;
+
+ if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
+ return false;
+
+ auto MO1 = *MI1.memoperands_begin();
+ auto MO2 = *MI2.memoperands_begin();
+ if (MO1->getAddrSpace() != MO2->getAddrSpace())
+ return false;
+
+ auto Base1 = MO1->getValue();
+ auto Base2 = MO2->getValue();
+ if (!Base1 || !Base2)
+ return false;
+ Base1 = getUnderlyingObject(Base1);
+ Base2 = getUnderlyingObject(Base2);
+
+ if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
+ return false;
+
+ return Base1 == Base2;
+}
+
+bool RISCVInstrInfo::shouldClusterMemOps(
+ ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
+ bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+ int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
+ unsigned NumBytes) const {
+ // If the mem ops (to be clustered) do not have the same base ptr, then they
+ // should not be clustered
+ if (!BaseOps1.empty() && !BaseOps2.empty()) {
+ const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
+ const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
+ if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
+ return false;
+ } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
+ // If only one base op is empty, they do not have the same base ptr
+ return false;
+ }
+
+ // TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets
+ // indicate they likely share a cache line.
+ return ClusterSize <= 4;
+}
+
+// Set BaseReg (the base register operand), Offset (the byte offset being
+// accessed) and the access Width of the passed instruction that reads/writes
+// memory. Returns false if the instruction does not read/write memory or the
+// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
+// recognise base operands and offsets in all cases.
+// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
+// function) and set it as appropriate.
bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo *TRI) const {
@@ -1906,10 +2302,11 @@ bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
// Here we assume the standard RISC-V ISA, which uses a base+offset
// addressing mode. You'll need to relax these conditions to support custom
- // load/stores instructions.
+ // load/store instructions.
if (LdSt.getNumExplicitOperands() != 3)
return false;
- if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
+ if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
+ !LdSt.getOperand(2).isImm())
return false;
if (!LdSt.hasOneMemOperand())
@@ -2132,6 +2529,23 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
return It;
}
+std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
+ Register Reg) const {
+ // TODO: Handle cases where Reg is a super- or sub-register of the
+ // destination register.
+ const MachineOperand &Op0 = MI.getOperand(0);
+ if (!Op0.isReg() || Reg != Op0.getReg())
+ return std::nullopt;
+
+ // Don't consider ADDIW as a candidate because the caller may not be aware
+ // of its sign extension behaviour.
+ if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isImm())
+ return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()};
+
+ return std::nullopt;
+}
+
// MIR printer helper function to annotate Operands with a comment.
std::string RISCVInstrInfo::createMIROperandComment(
const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
@@ -2202,9 +2616,9 @@ std::string RISCVInstrInfo::createMIROperandComment(
case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE)
#define CASE_VFMA_SPLATS(OP) \
- CASE_VFMA_OPCODE_LMULS_MF4(OP, VF16): \
- case CASE_VFMA_OPCODE_LMULS_MF2(OP, VF32): \
- case CASE_VFMA_OPCODE_LMULS_M1(OP, VF64)
+ CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16): \
+ case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32): \
+ case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64)
// clang-format on
bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
@@ -2365,9 +2779,9 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VF16) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VF32) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VF64)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
@@ -2591,6 +3005,7 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(MI.getOperand(3))
.add(MI.getOperand(4))
.add(MI.getOperand(5));
+ break;
}
}
MIB.copyImplicitOps(MI);
@@ -2836,3 +3251,123 @@ bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);
return FrmOp1.getImm() == FrmOp2.getImm();
}
+
+std::optional<unsigned>
+RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) {
+ // TODO: Handle Zvbb instructions
+ switch (Opcode) {
+ default:
+ return std::nullopt;
+
+ // 11.6. Vector Single-Width Shift Instructions
+ case RISCV::VSLL_VX:
+ case RISCV::VSRL_VX:
+ case RISCV::VSRA_VX:
+ // 12.4. Vector Single-Width Scaling Shift Instructions
+ case RISCV::VSSRL_VX:
+ case RISCV::VSSRA_VX:
+ // Only the low lg2(SEW) bits of the shift-amount value are used.
+ return Log2SEW;
+
+ // 11.7 Vector Narrowing Integer Right Shift Instructions
+ case RISCV::VNSRL_WX:
+ case RISCV::VNSRA_WX:
+ // 12.5. Vector Narrowing Fixed-Point Clip Instructions
+ case RISCV::VNCLIPU_WX:
+ case RISCV::VNCLIP_WX:
+ // Only the low lg2(2*SEW) bits of the shift-amount value are used.
+ return Log2SEW + 1;
+
+ // 11.1. Vector Single-Width Integer Add and Subtract
+ case RISCV::VADD_VX:
+ case RISCV::VSUB_VX:
+ case RISCV::VRSUB_VX:
+ // 11.2. Vector Widening Integer Add/Subtract
+ case RISCV::VWADDU_VX:
+ case RISCV::VWSUBU_VX:
+ case RISCV::VWADD_VX:
+ case RISCV::VWSUB_VX:
+ case RISCV::VWADDU_WX:
+ case RISCV::VWSUBU_WX:
+ case RISCV::VWADD_WX:
+ case RISCV::VWSUB_WX:
+ // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+ case RISCV::VADC_VXM:
+ case RISCV::VADC_VIM:
+ case RISCV::VMADC_VXM:
+ case RISCV::VMADC_VIM:
+ case RISCV::VMADC_VX:
+ case RISCV::VSBC_VXM:
+ case RISCV::VMSBC_VXM:
+ case RISCV::VMSBC_VX:
+ // 11.5 Vector Bitwise Logical Instructions
+ case RISCV::VAND_VX:
+ case RISCV::VOR_VX:
+ case RISCV::VXOR_VX:
+ // 11.8. Vector Integer Compare Instructions
+ case RISCV::VMSEQ_VX:
+ case RISCV::VMSNE_VX:
+ case RISCV::VMSLTU_VX:
+ case RISCV::VMSLT_VX:
+ case RISCV::VMSLEU_VX:
+ case RISCV::VMSLE_VX:
+ case RISCV::VMSGTU_VX:
+ case RISCV::VMSGT_VX:
+ // 11.9. Vector Integer Min/Max Instructions
+ case RISCV::VMINU_VX:
+ case RISCV::VMIN_VX:
+ case RISCV::VMAXU_VX:
+ case RISCV::VMAX_VX:
+ // 11.10. Vector Single-Width Integer Multiply Instructions
+ case RISCV::VMUL_VX:
+ case RISCV::VMULH_VX:
+ case RISCV::VMULHU_VX:
+ case RISCV::VMULHSU_VX:
+ // 11.11. Vector Integer Divide Instructions
+ case RISCV::VDIVU_VX:
+ case RISCV::VDIV_VX:
+ case RISCV::VREMU_VX:
+ case RISCV::VREM_VX:
+ // 11.12. Vector Widening Integer Multiply Instructions
+ case RISCV::VWMUL_VX:
+ case RISCV::VWMULU_VX:
+ case RISCV::VWMULSU_VX:
+ // 11.13. Vector Single-Width Integer Multiply-Add Instructions
+ case RISCV::VMACC_VX:
+ case RISCV::VNMSAC_VX:
+ case RISCV::VMADD_VX:
+ case RISCV::VNMSUB_VX:
+ // 11.14. Vector Widening Integer Multiply-Add Instructions
+ case RISCV::VWMACCU_VX:
+ case RISCV::VWMACC_VX:
+ case RISCV::VWMACCSU_VX:
+ case RISCV::VWMACCUS_VX:
+ // 11.15. Vector Integer Merge Instructions
+ case RISCV::VMERGE_VXM:
+ // 11.16. Vector Integer Move Instructions
+ case RISCV::VMV_V_X:
+ // 12.1. Vector Single-Width Saturating Add and Subtract
+ case RISCV::VSADDU_VX:
+ case RISCV::VSADD_VX:
+ case RISCV::VSSUBU_VX:
+ case RISCV::VSSUB_VX:
+ // 12.2. Vector Single-Width Averaging Add and Subtract
+ case RISCV::VAADDU_VX:
+ case RISCV::VAADD_VX:
+ case RISCV::VASUBU_VX:
+ case RISCV::VASUB_VX:
+ // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+ case RISCV::VSMUL_VX:
+ // 16.1. Integer Scalar Move Instructions
+ case RISCV::VMV_S_X:
+ return 1U << Log2SEW;
+ }
+}
+
+unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
+ const RISCVVPseudosTable::PseudoInfo *RVV =
+ RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
+ if (!RVV)
+ return 0;
+ return RVV->BaseInstr;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 99c907a98121..7e1d3f311806 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -43,6 +43,7 @@ enum CondCode {
};
CondCode getOppositeBranchCondition(CondCode);
+unsigned getBrCond(CondCode CC);
} // end of namespace RISCVCC
@@ -63,6 +64,10 @@ public:
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex,
unsigned &MemBytes) const override;
+ void copyPhysRegVector(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
+ unsigned Opc, unsigned NF = 1) const;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg,
bool KillSrc) const override;
@@ -91,7 +96,8 @@ public:
// Materializes the given integer Val into DstReg.
void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
- MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
+ MachineInstr::MIFlag Flag = MachineInstr::NoFlags,
+ bool DstRenamable = false, bool DstIsDead = false) const;
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
@@ -116,6 +122,8 @@ public:
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ bool optimizeCondBranch(MachineInstr &MI) const override;
+
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
bool isBranchOffsetInRange(unsigned BranchOpc,
@@ -137,6 +145,25 @@ public:
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
+ bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const override;
+
+ MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const override;
+
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ int64_t Offset1, bool OffsetIsScalable1,
+ ArrayRef<const MachineOperand *> BaseOps2,
+ int64_t Offset2, bool OffsetIsScalable2,
+ unsigned ClusterSize,
+ unsigned NumBytes) const override;
+
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
int64_t &Offset, unsigned &Width,
@@ -182,6 +209,9 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
outliner::Candidate &C) const override;
+ std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
+ Register Reg) const override;
+
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
@@ -206,9 +236,6 @@ public:
MachineTraceStrategy getMachineCombinerTraceStrategy() const override;
- void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
- MachineInstr &NewMI1,
- MachineInstr &NewMI2) const override;
bool
getMachineCombinerPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns,
@@ -265,6 +292,15 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
// one of the instructions does not have rounding mode, false will be returned.
bool hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2);
+// If \p Opcode is a .vx vector instruction, returns the lower number of bits
+// that are used from the scalar .x operand for a given \p Log2SEW. Otherwise
+// returns null.
+std::optional<unsigned> getVectorLowDemandedScalarBits(uint16_t Opcode,
+ unsigned Log2SEW);
+
+// Returns the MC opcode of RVV pseudo instruction.
+unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode);
+
// Special immediate for AVL operand of V pseudo instructions to indicate VLMax.
static constexpr int64_t VLMaxSentinel = -1LL;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index e58e3412aea3..edc08187d8f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -84,18 +84,12 @@ def riscv_read_cycle_wide : SDNode<"RISCVISD::READ_CYCLE_WIDE",
def riscv_add_lo : SDNode<"RISCVISD::ADD_LO", SDTIntBinOp>;
def riscv_hi : SDNode<"RISCVISD::HI", SDTIntUnaryOp>;
def riscv_lla : SDNode<"RISCVISD::LLA", SDTIntUnaryOp>;
-def riscv_lga : SDNode<"RISCVISD::LGA", SDTLoad,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def riscv_add_tprel : SDNode<"RISCVISD::ADD_TPREL",
SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>]>>;
-def riscv_la_tls_ie : SDNode<"RISCVISD::LA_TLS_IE", SDTLoad,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def riscv_la_tls_gd : SDNode<"RISCVISD::LA_TLS_GD", SDTIntUnaryOp>;
-
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@@ -149,18 +143,40 @@ class UImmAsmOperand<int width, string suffix = "">
: ImmAsmOperand<"U", width, suffix> {
}
+class RISCVOp<ValueType vt = XLenVT> : Operand<vt> {
+ let OperandNamespace = "RISCVOp";
+}
+
+class RISCVUImmOp<int bitsNum> : RISCVOp {
+ let ParserMatchClass = UImmAsmOperand<bitsNum>;
+ let DecoderMethod = "decodeUImmOperand<" # bitsNum # ">";
+ let OperandType = "OPERAND_UIMM" # bitsNum;
+}
+
+class RISCVUImmLeafOp<int bitsNum> :
+ RISCVUImmOp<bitsNum>, ImmLeaf<XLenVT, "return isUInt<" # bitsNum # ">(Imm);">;
+
+class RISCVSImmOp<int bitsNum> : RISCVOp {
+ let ParserMatchClass = SImmAsmOperand<bitsNum>;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeSImmOperand<" # bitsNum # ">";
+ let OperandType = "OPERAND_SIMM" # bitsNum;
+}
+
+class RISCVSImmLeafOp<int bitsNum> :
+ RISCVSImmOp<bitsNum>, ImmLeaf<XLenVT, "return isInt<" # bitsNum # ">(Imm);">;
+
def FenceArg : AsmOperandClass {
let Name = "FenceArg";
let RenderMethod = "addFenceArgOperands";
let ParserMethod = "parseFenceArg";
}
-def fencearg : Operand<XLenVT> {
+def fencearg : RISCVOp {
let ParserMatchClass = FenceArg;
let PrintMethod = "printFenceArg";
let DecoderMethod = "decodeUImmOperand<4>";
let OperandType = "OPERAND_UIMM4";
- let OperandNamespace = "RISCVOp";
}
def UImmLog2XLenAsmOperand : AsmOperandClass {
@@ -169,7 +185,7 @@ def UImmLog2XLenAsmOperand : AsmOperandClass {
let DiagnosticType = "InvalidUImmLog2XLen";
}
-def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+def uimmlog2xlen : RISCVOp, ImmLeaf<XLenVT, [{
if (Subtarget->is64Bit())
return isUInt<6>(Imm);
return isUInt<5>(Imm);
@@ -186,21 +202,17 @@ def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{
return isUInt<5>(Imm);
}];
let OperandType = "OPERAND_UIMMLOG2XLEN";
- let OperandNamespace = "RISCVOp";
}
-def uimm1 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<1>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<1>;
- let DecoderMethod = "decodeUImmOperand<1>";
- let OperandType = "OPERAND_UIMM1";
- let OperandNamespace = "RISCVOp";
+def InsnDirectiveOpcode : AsmOperandClass {
+ let Name = "InsnDirectiveOpcode";
+ let ParserMethod = "parseInsnDirectiveOpcode";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImm";
}
-def uimm2 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<2>;
- let DecoderMethod = "decodeUImmOperand<2>";
- let OperandType = "OPERAND_UIMM2";
- let OperandNamespace = "RISCVOp";
+def uimm1 : RISCVUImmLeafOp<1>;
+def uimm2 : RISCVUImmLeafOp<2> {
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -208,75 +220,22 @@ def uimm2 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> {
return isUInt<2>(Imm);
}];
}
-
-def uimm3 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<3>;
- let DecoderMethod = "decodeUImmOperand<3>";
- let OperandType = "OPERAND_UIMM3";
- let OperandNamespace = "RISCVOp";
-}
-
-def uimm4 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<4>;
- let DecoderMethod = "decodeUImmOperand<4>";
- let OperandType = "OPERAND_UIMM4";
- let OperandNamespace = "RISCVOp";
-}
-
-def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<5>;
- let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_UIMM5";
- let OperandNamespace = "RISCVOp";
-}
-
-def InsnDirectiveOpcode : AsmOperandClass {
- let Name = "InsnDirectiveOpcode";
- let ParserMethod = "parseInsnDirectiveOpcode";
- let RenderMethod = "addImmOperands";
- let PredicateMethod = "isImm";
-}
-
-def uimm6 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<6>;
- let DecoderMethod = "decodeUImmOperand<6>";
- let OperandType = "OPERAND_UIMM6";
- let OperandNamespace = "RISCVOp";
-}
-
-def uimm7_opcode : Operand<XLenVT> {
+def uimm3 : RISCVUImmOp<3>;
+def uimm4 : RISCVUImmOp<4>;
+def uimm5 : RISCVUImmLeafOp<5>;
+def uimm6 : RISCVUImmLeafOp<6>;
+def uimm7_opcode : RISCVUImmOp<7> {
let ParserMatchClass = InsnDirectiveOpcode;
- let DecoderMethod = "decodeUImmOperand<7>";
- let OperandType = "OPERAND_UIMM7";
- let OperandNamespace = "RISCVOp";
}
-
-def uimm7 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<7>;
- let DecoderMethod = "decodeUImmOperand<7>";
- let OperandType = "OPERAND_UIMM7";
- let OperandNamespace = "RISCVOp";
-}
-
-def uimm8 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<8>;
- let DecoderMethod = "decodeUImmOperand<8>";
- let OperandType = "OPERAND_UIMM8";
- let OperandNamespace = "RISCVOp";
-}
-
-def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<12>;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmOperand<12>";
+def uimm7 : RISCVUImmOp<7>;
+def uimm8 : RISCVUImmOp<8>;
+def simm12 : RISCVSImmLeafOp<12> {
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
return isInt<12>(Imm);
return MCOp.isBareSymbolRef();
}];
- let OperandType = "OPERAND_SIMM12";
- let OperandNamespace = "RISCVOp";
}
// A 12-bit signed immediate which cannot fit in 6-bit signed immediate,
@@ -299,26 +258,38 @@ def simm13_lsb0 : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
}
-class UImm20Operand : Operand<XLenVT> {
+class UImm20Operand : RISCVOp {
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<20>";
+ let OperandType = "OPERAND_UIMM20";
+}
+
+class UImm20OperandMaybeSym : UImm20Operand {
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
return isUInt<20>(Imm);
return MCOp.isBareSymbolRef();
}];
- let OperandType = "OPERAND_UIMM20";
- let OperandNamespace = "RISCVOp";
}
-def uimm20_lui : UImm20Operand {
+def uimm20_lui : UImm20OperandMaybeSym {
let ParserMatchClass = UImmAsmOperand<20, "LUI">;
}
-def uimm20_auipc : UImm20Operand {
+def uimm20_auipc : UImm20OperandMaybeSym {
let ParserMatchClass = UImmAsmOperand<20, "AUIPC">;
}
+def uimm20 : UImm20Operand {
+ let ParserMatchClass = UImmAsmOperand<20>;
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isUInt<20>(Imm);
+ }];
+}
+
def Simm21Lsb0JALAsmOperand : SImmAsmOperand<21, "Lsb0JAL"> {
let ParserMethod = "parseJALOffset";
}
@@ -392,12 +363,11 @@ def CSRSystemRegister : AsmOperandClass {
let DiagnosticType = "InvalidCSRSystemRegister";
}
-def csr_sysreg : Operand<XLenVT> {
+def csr_sysreg : RISCVOp {
let ParserMatchClass = CSRSystemRegister;
let PrintMethod = "printCSRSystemRegister";
let DecoderMethod = "decodeUImmOperand<12>";
let OperandType = "OPERAND_UIMM12";
- let OperandNamespace = "RISCVOp";
}
// A parameterized register class alternative to i32imm/i64imm from Target.td.
@@ -1105,6 +1075,10 @@ def : MnemonicAlias<"sbreak", "ebreak">;
// that don't support this alias.
def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>;
+let Predicates = [HasStdExtZicfilp] in {
+def : InstAlias<"lpad $imm20", (AUIPC X0, uimm20:$imm20)>;
+}
+
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
@@ -1209,11 +1183,13 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})",
class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
: Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>;
-class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
- : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt1 = XLenVT,
+ ValueType vt2 = XLenVT>
+ : Pat<(vt1 (OpNode (vt1 GPR:$rs1), (vt2 GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
-class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType>
- : Pat<(XLenVT (OpNode (XLenVT GPR:$rs1), ImmType:$imm)),
+class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType,
+ ValueType vt = XLenVT>
+ : Pat<(vt (OpNode (vt GPR:$rs1), ImmType:$imm)),
(Inst GPR:$rs1, ImmType:$imm)>;
class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
: PatGprImm<OpNode, Inst, simm12>;
@@ -1232,7 +1208,9 @@ def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
}]>;
def zexti32 : ComplexPattern<i64, 1, "selectZExtBits<32>">;
def zexti16 : ComplexPattern<XLenVT, 1, "selectZExtBits<16>">;
+def zexti16i32 : ComplexPattern<i32, 1, "selectZExtBits<16>">;
def zexti8 : ComplexPattern<XLenVT, 1, "selectZExtBits<8>">;
+def zexti8i32 : ComplexPattern<i32, 1, "selectZExtBits<8>">;
def ext : PatFrags<(ops node:$A), [(sext node:$A), (zext node:$A)]>;
@@ -1264,6 +1242,10 @@ def anyext_oneuse : unop_oneuse<anyext>;
def ext_oneuse : unop_oneuse<ext>;
def fpext_oneuse : unop_oneuse<any_fpextend>;
+def 33signbits_node : PatLeaf<(i64 GPR:$src), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N, 0)) > 32;
+}]>;
+
/// Simple arithmetic operations
def : PatGprGpr<add, ADD>;
@@ -1421,6 +1403,21 @@ def PseudoCCSUB : Pseudo<(outs GPR:$dst),
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSLL : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSRL : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSRA : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
def PseudoCCAND : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
@@ -1437,6 +1434,42 @@ def PseudoCCXOR : Pseudo<(outs GPR:$dst),
Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
+def PseudoCCADDI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSLLI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSRLI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSRAI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCANDI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCORI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCXORI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+
// RV64I instructions
def PseudoCCADDW : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
@@ -1448,6 +1481,42 @@ def PseudoCCSUBW : Pseudo<(outs GPR:$dst),
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSLLW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSRLW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSRAW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+
+def PseudoCCADDIW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSLLIW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSRLIW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSRAIW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
}
multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> {
@@ -1603,6 +1672,16 @@ def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), [],
"jump", "$target, $rd">,
Sched<[WriteIALU, WriteJalr, ReadJalr]>;
+// Pseudo for a rematerializable constant materialization sequence.
+// This is an experimental feature enabled by
+// -riscv-use-rematerializable-movimm in RISCVISelDAGToDAG.cpp
+// It will be expanded after register allocation.
+// FIXME: The scheduling information does not reflect the multiple instructions.
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 1,
+ isPseudo = 1, isReMaterializable = 1, IsSignExtendingOpW = 1 in
+def PseudoMovImm : Pseudo<(outs GPR:$dst), (ins i32imm:$imm), []>,
+ Sched<[WriteIALU]>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
@@ -1623,8 +1702,6 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
def PseudoLGA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"lga", "$dst, $src">;
-def : Pat<(iPTR (riscv_lga tglobaladdr:$in)), (PseudoLGA tglobaladdr:$in)>;
-
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
@@ -1641,16 +1718,11 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.ie", "$dst, $src">;
-def : Pat<(iPTR (riscv_la_tls_ie tglobaltlsaddr:$in)),
- (PseudoLA_TLS_IE tglobaltlsaddr:$in)>;
-
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.gd", "$dst, $src">;
-def : Pat<(riscv_la_tls_gd tglobaltlsaddr:$in),
- (PseudoLA_TLS_GD tglobaltlsaddr:$in)>;
/// Sign/Zero Extends
@@ -1680,7 +1752,7 @@ def : LdPat<sextloadi8, LB>;
def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb.
def : LdPat<sextloadi16, LH>;
def : LdPat<extloadi16, LH>;
-def : LdPat<load, LW, i32>, Requires<[IsRV32]>;
+def : LdPat<load, LW, i32>;
def : LdPat<zextloadi8, LBU>;
def : LdPat<zextloadi16, LHU>;
@@ -1694,7 +1766,7 @@ class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
def : StPat<truncstorei8, SB, GPR, XLenVT>;
def : StPat<truncstorei16, SH, GPR, XLenVT>;
-def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>;
+def : StPat<store, SW, GPR, i32>;
/// Fences
@@ -1796,6 +1868,12 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
(SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
}
+class binop_allhusers<SDPatternOperator operator>
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (XLenVT (operator node:$lhs, node:$rhs)), [{
+ return hasAllHUsers(Node);
+}]>;
+
// PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
// if only the lower 32 bits of their result is used.
class binop_allwusers<SDPatternOperator operator>
@@ -1904,9 +1982,9 @@ def : Pat<(debugtrap), (EBREAK)>;
let Predicates = [IsRV64], Uses = [X5],
Defs = [X1, X6, X7, X28, X29, X30, X31] in
-def HWASAN_CHECK_MEMACCESS_SHORTGRANULES
+def HWASAN_CHECK_MEMACCESS_SHORTGRANULES
: Pseudo<(outs), (ins GPRJALR:$ptr, i32imm:$accessinfo),
- [(int_hwasan_check_memaccess_shortgranules X5, GPRJALR:$ptr,
+ [(int_hwasan_check_memaccess_shortgranules (i64 X5), GPRJALR:$ptr,
(i32 timm:$accessinfo))]>;
// This gets lowered into a 20-byte instruction sequence (at most)
@@ -1928,6 +2006,86 @@ def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
(AddiPairImmSmall AddiPair:$rs2))>;
}
+let Predicates = [HasShortForwardBranchOpt] in
+def : Pat<(XLenVT (abs GPR:$rs1)),
+ (PseudoCCSUB (XLenVT GPR:$rs1), (XLenVT X0), /* COND_LT */ 2,
+ (XLenVT GPR:$rs1), (XLenVT X0), (XLenVT GPR:$rs1))>;
+let Predicates = [HasShortForwardBranchOpt, IsRV64] in
+def : Pat<(sext_inreg (abs 33signbits_node:$rs1), i32),
+ (PseudoCCSUBW (i64 GPR:$rs1), (i64 X0), /* COND_LT */ 2,
+ (i64 GPR:$rs1), (i64 X0), (i64 GPR:$rs1))>;
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+def simm12i32 : ImmLeaf<i32, [{return isInt<12>(Imm);}]>;
+
+// Convert from i32 immediate to i64 target immediate to make SelectionDAG type
+// checking happy so we can use ADDIW which expects an XLen immediate.
+def as_i64imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
+}]>;
+
+def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{
+ KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0);
+ return Known.isNonNegative();
+}]>;
+
+let Predicates = [IsRV64] in {
+def : LdPat<sextloadi8, LB, i32>;
+def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb.
+def : LdPat<sextloadi16, LH, i32>;
+def : LdPat<extloadi16, LH, i32>;
+def : LdPat<zextloadi8, LBU, i32>;
+def : LdPat<zextloadi16, LHU, i32>;
+
+def : StPat<truncstorei8, SB, GPR, i32>;
+def : StPat<truncstorei16, SH, GPR, i32>;
+
+def : Pat<(anyext GPR:$src), (COPY GPR:$src)>;
+def : Pat<(sext GPR:$src), (ADDIW GPR:$src, 0)>;
+def : Pat<(trunc GPR:$src), (COPY GPR:$src)>;
+
+def : PatGprGpr<add, ADDW, i32, i32>;
+def : PatGprGpr<sub, SUBW, i32, i32>;
+def : PatGprGpr<and, AND, i32, i32>;
+def : PatGprGpr<or, OR, i32, i32>;
+def : PatGprGpr<xor, XOR, i32, i32>;
+def : PatGprGpr<shiftopw<shl>, SLLW, i32, i64>;
+def : PatGprGpr<shiftopw<srl>, SRLW, i32, i64>;
+def : PatGprGpr<shiftopw<sra>, SRAW, i32, i64>;
+
+def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)),
+ (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
+def : Pat<(i32 (and GPR:$rs1, simm12i32:$imm)),
+ (ANDI GPR:$rs1, (i64 (as_i64imm $imm)))>;
+def : Pat<(i32 (or GPR:$rs1, simm12i32:$imm)),
+ (ORI GPR:$rs1, (i64 (as_i64imm $imm)))>;
+def : Pat<(i32 (xor GPR:$rs1, simm12i32:$imm)),
+ (XORI GPR:$rs1, (i64 (as_i64imm $imm)))>;
+
+def : PatGprImm<shl, SLLIW, uimm5, i32>;
+def : PatGprImm<srl, SRLIW, uimm5, i32>;
+def : PatGprImm<sra, SRAIW, uimm5, i32>;
+
+def : Pat<(i32 (and GPR:$rs, TrailingOnesMask:$mask)),
+ (SRLI (SLLI $rs, (i64 (XLenSubTrailingOnes $mask))),
+ (i64 (XLenSubTrailingOnes $mask)))>;
+
+// Use sext if the sign bit of the input is 0.
+def : Pat<(zext_is_sext GPR:$src), (ADDIW GPR:$src, 0)>;
+}
+
+let Predicates = [IsRV64, NotHasStdExtZba] in {
+def : Pat<(zext GPR:$src), (SRLI (SLLI GPR:$src, 32), 32)>;
+
+// If we're shifting a 32-bit zero extended value left by 0-31 bits, use 2
+// shifts instead of 3. This can occur when unsigned is used to index an array.
+def : Pat<(shl (zext GPR:$rs), uimm5:$shamt),
+ (SRLI (SLLI GPR:$rs, 32), (ImmSubFrom32 uimm5:$shamt))>;
+}
+
//===----------------------------------------------------------------------===//
// Standard extensions
//===----------------------------------------------------------------------===//
@@ -1951,7 +2109,6 @@ include "RISCVInstrInfoZk.td"
// Vector
include "RISCVInstrInfoV.td"
-include "RISCVInstrInfoZvfbf.td"
include "RISCVInstrInfoZvk.td"
// Integer
@@ -1970,3 +2127,9 @@ include "RISCVInstrInfoXVentana.td"
include "RISCVInstrInfoXTHead.td"
include "RISCVInstrInfoXSf.td"
include "RISCVInstrInfoXCV.td"
+
+//===----------------------------------------------------------------------===//
+// Global ISel
+//===----------------------------------------------------------------------===//
+
+include "RISCVInstrGISel.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 8421109b8514..c8301fcc6b93 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -44,17 +44,11 @@ multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">;
}
-class AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
- ValueType vt = XLenVT>
- : Pat<(StoreOp (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12),
- (vt StTy:$rs2)),
- (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
-
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtA] in {
+let Predicates = [HasStdExtA], IsSignExtendingOpW = 1 in {
defm LR_W : LR_r_aq_rl<0b010, "lr.w">, Sched<[WriteAtomicLDW, ReadAtomicLDW]>;
defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">,
Sched<[WriteAtomicSTW, ReadAtomicSTW, ReadAtomicSTW]>;
@@ -123,21 +117,21 @@ let Predicates = [HasAtomicLdSt] in {
def : LdPat<atomic_load_16, LH>;
def : LdPat<atomic_load_32, LW>;
- def : AtomicStPat<atomic_store_8, SB, GPR>;
- def : AtomicStPat<atomic_store_16, SH, GPR>;
- def : AtomicStPat<atomic_store_32, SW, GPR>;
+ def : StPat<atomic_store_8, SB, GPR, XLenVT>;
+ def : StPat<atomic_store_16, SH, GPR, XLenVT>;
+ def : StPat<atomic_store_32, SW, GPR, XLenVT>;
}
let Predicates = [HasAtomicLdSt, IsRV64] in {
def : LdPat<atomic_load_64, LD, i64>;
- def : AtomicStPat<atomic_store_64, SD, GPR, i64>;
+ def : StPat<atomic_store_64, SD, GPR, i64>;
}
-let Predicates = [HasStdExtA] in {
-
/// AMOs
-multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT> {
+multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
+ list<Predicate> ExtraPreds = []> {
+let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in {
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
!cast<RVInst>(BaseInst), vt>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
@@ -149,6 +143,19 @@ multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT> {
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
!cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
}
+let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in {
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst), vt>;
+}
+}
defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
defm : AMOPat<"atomic_load_add_32", "AMOADD_W">;
@@ -160,16 +167,7 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
-def : Pat<(XLenVT (atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr)),
- (AMOADD_W GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_acquire GPR:$addr, GPR:$incr)),
- (AMOADD_W_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_release GPR:$addr, GPR:$incr)),
- (AMOADD_W_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr)),
- (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr)),
- (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
+let Predicates = [HasStdExtA] in {
/// Pseudo AMOs
@@ -318,30 +316,17 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
} // Predicates = [HasStdExtA]
-let Predicates = [HasStdExtA, IsRV64] in {
+defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64>;
-defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64>;
-defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64>;
-defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64>;
-defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64>;
-defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64>;
-defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64>;
-defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64>;
-defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64>;
-
-/// 64-bit AMOs
-
-def : Pat<(i64 (atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr)),
- (AMOADD_D GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_acquire GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_release GPR:$addr, GPR:$incr)),
- (AMOADD_D_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
+let Predicates = [HasStdExtA, IsRV64] in {
/// 64-bit pseudo AMOs
@@ -387,3 +372,61 @@ def : Pat<(int_riscv_masked_cmpxchg_i64
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
} // Predicates = [HasStdExtA, IsRV64]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+class PatGprGprA<SDPatternOperator OpNode, RVInst Inst, ValueType vt>
+ : Pat<(vt (OpNode (XLenVT GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+
+multiclass AMOPat2<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
+ list<Predicate> ExtraPreds = []> {
+let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in {
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst#"_AQ"), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst#"_RL"), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
+}
+let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in {
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst), vt>;
+}
+}
+
+defm : AMOPat2<"atomic_swap_32", "AMOSWAP_W", i32>;
+defm : AMOPat2<"atomic_load_add_32", "AMOADD_W", i32>;
+defm : AMOPat2<"atomic_load_and_32", "AMOAND_W", i32>;
+defm : AMOPat2<"atomic_load_or_32", "AMOOR_W", i32>;
+defm : AMOPat2<"atomic_load_xor_32", "AMOXOR_W", i32>;
+defm : AMOPat2<"atomic_load_max_32", "AMOMAX_W", i32>;
+defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>;
+defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>;
+defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>;
+
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>;
+
+let Predicates = [HasAtomicLdSt] in {
+ def : LdPat<atomic_load_8, LB, i32>;
+ def : LdPat<atomic_load_16, LH, i32>;
+ def : LdPat<atomic_load_32, LW, i32>;
+
+ def : StPat<atomic_store_8, SB, GPR, i32>;
+ def : StPat<atomic_store_16, SH, GPR, i32>;
+ def : StPat<atomic_store_32, SW, GPR, i32>;
+}
+
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 74439bb67c61..07137031d9fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -18,7 +18,7 @@ def UImmLog2XLenNonZeroAsmOperand : AsmOperandClass {
let DiagnosticType = "InvalidUImmLog2XLenNonZero";
}
-def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+def uimmlog2xlennonzero : RISCVOp, ImmLeaf<XLenVT, [{
if (Subtarget->is64Bit())
return isUInt<6>(Imm) && (Imm != 0);
return isUInt<5>(Imm) && (Imm != 0);
@@ -27,7 +27,6 @@ def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{
// TODO: should ensure invalid shamt is rejected when decoding.
let DecoderMethod = "decodeUImmNonZeroOperand<6>";
let OperandType = "OPERAND_UIMMLOG2XLEN_NONZERO";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -38,12 +37,7 @@ def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{
}];
}
-def simm6 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<6>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<6>;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmOperand<6>";
- let OperandType = "OPERAND_SIMM6";
- let OperandNamespace = "RISCVOp";
+def simm6 : RISCVSImmLeafOp<6> {
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -52,13 +46,12 @@ def simm6 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<6>(Imm);}]> {
}];
}
-def simm6nonzero : Operand<XLenVT>,
+def simm6nonzero : RISCVOp,
ImmLeaf<XLenVT, [{return (Imm != 0) && isInt<6>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<6, "NonZero">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmNonZeroOperand<6>";
let OperandType = "OPERAND_SIMM6_NONZERO";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -67,11 +60,10 @@ def simm6nonzero : Operand<XLenVT>,
}];
}
-def immzero : Operand<XLenVT>,
+def immzero : RISCVOp,
ImmLeaf<XLenVT, [{return (Imm == 0);}]> {
let ParserMatchClass = ImmZeroAsmOperand;
let OperandType = "OPERAND_ZERO";
- let OperandNamespace = "RISCVOp";
}
def CLUIImmAsmOperand : AsmOperandClass {
@@ -86,7 +78,7 @@ def CLUIImmAsmOperand : AsmOperandClass {
// loaded in to bits 17-12 of the destination register and sign extended from
// bit 17. Therefore, this 6-bit immediate can represent values in the ranges
// [1, 31] and [0xfffe0, 0xfffff].
-def c_lui_imm : Operand<XLenVT>,
+def c_lui_imm : RISCVOp,
ImmLeaf<XLenVT, [{return (Imm != 0) &&
(isUInt<5>(Imm) ||
(Imm >= 0xfffe0 && Imm <= 0xfffff));}]> {
@@ -94,7 +86,6 @@ def c_lui_imm : Operand<XLenVT>,
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeCLUIImmOperand";
let OperandType = "OPERAND_CLUI_IMM";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -105,13 +96,12 @@ def c_lui_imm : Operand<XLenVT>,
}
// A 7-bit unsigned immediate where the least significant two bits are zero.
-def uimm7_lsb00 : Operand<XLenVT>,
+def uimm7_lsb00 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<5, 2>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<7, "Lsb00">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<7>";
let OperandType = "OPERAND_UIMM7_LSB00";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -121,13 +111,12 @@ def uimm7_lsb00 : Operand<XLenVT>,
}
// A 8-bit unsigned immediate where the least significant two bits are zero.
-def uimm8_lsb00 : Operand<XLenVT>,
+def uimm8_lsb00 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<6, 2>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<8, "Lsb00">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<8>";
let OperandType = "OPERAND_UIMM8_LSB00";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -137,13 +126,12 @@ def uimm8_lsb00 : Operand<XLenVT>,
}
// A 8-bit unsigned immediate where the least significant three bits are zero.
-def uimm8_lsb000 : Operand<XLenVT>,
+def uimm8_lsb000 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<5, 3>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<8, "Lsb000">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<8>";
let OperandType = "OPERAND_UIMM8_LSB000";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -170,13 +158,12 @@ def simm9_lsb0 : Operand<OtherVT>,
}
// A 9-bit unsigned immediate where the least significant three bits are zero.
-def uimm9_lsb000 : Operand<XLenVT>,
+def uimm9_lsb000 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<6, 3>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<9, "Lsb000">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<9>";
let OperandType = "OPERAND_UIMM9_LSB000";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -187,14 +174,13 @@ def uimm9_lsb000 : Operand<XLenVT>,
// A 10-bit unsigned immediate where the least significant two bits are zero
// and the immediate can't be zero.
-def uimm10_lsb00nonzero : Operand<XLenVT>,
+def uimm10_lsb00nonzero : RISCVOp,
ImmLeaf<XLenVT,
[{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> {
let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmNonZeroOperand<10>";
let OperandType = "OPERAND_UIMM10_LSB00_NONZERO";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -204,14 +190,13 @@ def uimm10_lsb00nonzero : Operand<XLenVT>,
}
// A 10-bit signed immediate where the least significant four bits are zero.
-def simm10_lsb0000nonzero : Operand<XLenVT>,
+def simm10_lsb0000nonzero : RISCVOp,
ImmLeaf<XLenVT,
[{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmNonZeroOperand<10>";
let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -243,11 +228,10 @@ def InsnCDirectiveOpcode : AsmOperandClass {
let PredicateMethod = "isImm";
}
-def uimm2_opcode : Operand<XLenVT> {
+def uimm2_opcode : RISCVOp {
let ParserMatchClass = InsnCDirectiveOpcode;
let DecoderMethod = "decodeUImmOperand<2>";
let OperandType = "OPERAND_UIMM2";
- let OperandNamespace = "RISCVOp";
}
//===----------------------------------------------------------------------===//
@@ -972,8 +956,14 @@ def : CompressPat<(JAL X0, simm12_lsb0:$offset),
(C_J simm12_lsb0:$offset)>;
def : CompressPat<(BEQ GPRC:$rs1, X0, simm9_lsb0:$imm),
(C_BEQZ GPRC:$rs1, simm9_lsb0:$imm)>;
+let isCompressOnly = true in
+def : CompressPat<(BEQ X0, GPRC:$rs1, simm9_lsb0:$imm),
+ (C_BEQZ GPRC:$rs1, simm9_lsb0:$imm)>;
def : CompressPat<(BNE GPRC:$rs1, X0, simm9_lsb0:$imm),
(C_BNEZ GPRC:$rs1, simm9_lsb0:$imm)>;
+let isCompressOnly = true in
+def : CompressPat<(BNE X0, GPRC:$rs1, simm9_lsb0:$imm),
+ (C_BNEZ GPRC:$rs1, simm9_lsb0:$imm)>;
} // Predicates = [HasStdExtCOrZca]
// Quadrant 2
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 7a79e3ca6a2f..6af710049a9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -78,7 +78,7 @@ def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>;
} // Predicates = [HasStdExtD]
foreach Ext = DExts in {
- let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in {
+ let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64Addend] in {
defm FMADD_D : FPFMA_rrr_frm_m<OPC_MADD, 0b01, "fmadd.d", Ext>;
defm FMSUB_D : FPFMA_rrr_frm_m<OPC_MSUB, 0b01, "fmsub.d", Ext>;
defm FNMSUB_D : FPFMA_rrr_frm_m<OPC_NMSUB, 0b01, "fnmsub.d", Ext>;
@@ -115,8 +115,8 @@ foreach Ext = DExts in {
Ext.PrimaryTy, "fcvt.s.d">,
Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>;
- defm FCVT_D_S : FPUnaryOp_r_m<0b0100001, 0b00000, 0b000, Ext, Ext.PrimaryTy,
- Ext.F32Ty, "fcvt.d.s">,
+ defm FCVT_D_S : FPUnaryOp_r_frmlegacy_m<0b0100001, 0b00000, Ext, Ext.PrimaryTy,
+ Ext.F32Ty, "fcvt.d.s">,
Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>;
let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in {
@@ -140,12 +140,12 @@ foreach Ext = DExts in {
"fcvt.wu.d">,
Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
- defm FCVT_D_W : FPUnaryOp_r_m<0b1101001, 0b00000, 0b000, Ext, Ext.PrimaryTy, GPR,
- "fcvt.d.w">,
+ defm FCVT_D_W : FPUnaryOp_r_frmlegacy_m<0b1101001, 0b00000, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.d.w">,
Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
- defm FCVT_D_WU : FPUnaryOp_r_m<0b1101001, 0b00001, 0b000, Ext, Ext.PrimaryTy, GPR,
- "fcvt.d.wu">,
+ defm FCVT_D_WU : FPUnaryOp_r_frmlegacy_m<0b1101001, 0b00001, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.d.wu">,
Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
} // foreach Ext = DExts
@@ -240,7 +240,7 @@ let Predicates = [HasStdExtD] in {
// f64 -> f32, f32 -> f64
def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>;
+def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1, FRM_RNE)>;
} // Predicates = [HasStdExtD]
let Predicates = [HasStdExtZdinx, IsRV64] in {
@@ -248,7 +248,7 @@ let Predicates = [HasStdExtZdinx, IsRV64] in {
// f64 -> f32, f32 -> f64
def : Pat<(any_fpround FPR64INX:$rs1), (FCVT_S_D_INX FPR64INX:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_INX FPR32INX:$rs1)>;
+def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_INX FPR32INX:$rs1, FRM_RNE)>;
} // Predicates = [HasStdExtZdinx, IsRV64]
let Predicates = [HasStdExtZdinx, IsRV32] in {
@@ -256,7 +256,7 @@ let Predicates = [HasStdExtZdinx, IsRV32] in {
// f64 -> f32, f32 -> f64
def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_S_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1)>;
+def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1, FRM_RNE)>;
} // Predicates = [HasStdExtZdinx, IsRV32]
// [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so
@@ -277,11 +277,12 @@ def : Pat<(any_fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR64:$rs1), (FSGNJN_D $rs1, $rs1)>;
def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR64:$rs1), (FCLASS_D $rs1)>;
+def : Pat<(riscv_fclass FPR64:$rs1), (FCLASS_D $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_D, FPR64, f64>;
def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>;
-def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2))>;
+def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2,
+ FRM_RNE))>;
def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2,
FRM_DYN))>;
@@ -312,13 +313,13 @@ def : Pat<(any_fsqrt FPR64INX:$rs1), (FSQRT_D_INX FPR64INX:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR64INX:$rs1), (FSGNJN_D_INX $rs1, $rs1)>;
def : Pat<(fabs FPR64INX:$rs1), (FSGNJX_D_INX $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>;
+def : Pat<(riscv_fclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_D_INX, FPR64INX, f64>;
def : Pat<(fcopysign FPR64INX:$rs1, (fneg FPR64INX:$rs2)),
(FSGNJN_D_INX $rs1, $rs2)>;
def : Pat<(fcopysign FPR64INX:$rs1, FPR32INX:$rs2),
- (FSGNJ_D_INX $rs1, (FCVT_D_S_INX $rs2))>;
+ (FSGNJ_D_INX $rs1, (FCVT_D_S_INX $rs2, FRM_RNE))>;
def : Pat<(fcopysign FPR32INX:$rs1, FPR64INX:$rs2),
(FSGNJ_S_INX $rs1, (FCVT_S_D_INX $rs2, FRM_DYN))>;
@@ -349,13 +350,13 @@ def : Pat<(any_fsqrt FPR64IN32X:$rs1), (FSQRT_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>
def : Pat<(fneg FPR64IN32X:$rs1), (FSGNJN_D_IN32X $rs1, $rs1)>;
def : Pat<(fabs FPR64IN32X:$rs1), (FSGNJX_D_IN32X $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>;
+def : Pat<(riscv_fclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_D_IN32X, FPR64IN32X, f64>;
def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)),
(FSGNJN_D_IN32X $rs1, $rs2)>;
def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2),
- (FSGNJ_D_IN32X $rs1, (FCVT_D_S_INX $rs2))>;
+ (FSGNJ_D_IN32X $rs1, (FCVT_D_S_INX $rs2, FRM_RNE))>;
def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2),
(FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, FRM_DYN))>;
@@ -396,12 +397,12 @@ foreach Ext = DExts in {
// Match non-signaling FEQ_D
foreach Ext = DExts in {
- defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_D, Ext, f64>;
- defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_D, Ext, f64>;
- defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_D, Ext, f64>;
- defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_D, Ext, f64>;
- defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_D, Ext, f64>;
- defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_D, Ext, f64>;
+ defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_D, Ext>;
+ defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_D, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_D, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_D, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_D, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_D, Ext>;
}
let Predicates = [HasStdExtD] in {
@@ -537,7 +538,7 @@ def SplitF64Pseudo_INX
[(set GPR:$dst1, GPR:$dst2, (RISCVSplitF64 FPR64IN32X:$src))]>;
} // Predicates = [HasStdExtZdinx, IsRV32]
-let Predicates = [HasStdExtD, IsRV32] in {
+let Predicates = [HasStdExtD] in {
// double->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, FRM_RTZ)>;
@@ -554,9 +555,9 @@ def : Pat<(i32 (any_lrint FPR64:$rs1)), (FCVT_W_D $rs1, FRM_DYN)>;
def : Pat<(i32 (any_lround FPR64:$rs1)), (FCVT_W_D $rs1, FRM_RMM)>;
// [u]int->double.
-def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>;
-def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>;
-} // Predicates = [HasStdExtD, IsRV32]
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1, FRM_RNE)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1, FRM_RNE)>;
+} // Predicates = [HasStdExtD]
let Predicates = [HasStdExtZdinx, IsRV32] in {
@@ -575,8 +576,8 @@ def : Pat<(i32 (any_lrint FPR64IN32X:$rs1)), (FCVT_W_D_IN32X $rs1, FRM_DYN)>;
def : Pat<(i32 (any_lround FPR64IN32X:$rs1)), (FCVT_W_D_IN32X $rs1, FRM_RMM)>;
// [u]int->double.
-def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W_IN32X GPR:$rs1)>;
-def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU_IN32X GPR:$rs1)>;
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W_IN32X GPR:$rs1, FRM_RNE)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU_IN32X GPR:$rs1, FRM_RNE)>;
} // Predicates = [HasStdExtZdinx, IsRV32]
let Predicates = [HasStdExtD, IsRV64] in {
@@ -592,8 +593,8 @@ def : Pat<(riscv_any_fcvt_w_rv64 FPR64:$rs1, timm:$frm), (FCVT_W_D $rs1, timm:$
def : Pat<(riscv_any_fcvt_wu_rv64 FPR64:$rs1, timm:$frm), (FCVT_WU_D $rs1, timm:$frm)>;
// [u]int32->fp
-def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>;
-def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>;
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1, FRM_RNE)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1, FRM_RNE)>;
// Saturating double->[u]int64.
def : Pat<(i64 (riscv_fcvt_x FPR64:$rs1, timm:$frm)), (FCVT_L_D $rs1, timm:$frm)>;
@@ -629,8 +630,8 @@ def : Pat<(riscv_any_fcvt_w_rv64 FPR64INX:$rs1, timm:$frm), (FCVT_W_D_INX $rs1,
def : Pat<(riscv_any_fcvt_wu_rv64 FPR64INX:$rs1, timm:$frm), (FCVT_WU_D_INX $rs1, timm:$frm)>;
// [u]int32->fp
-def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W_INX $rs1)>;
-def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU_INX $rs1)>;
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W_INX $rs1, FRM_RNE)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU_INX $rs1, FRM_RNE)>;
// Saturating double->[u]int64.
def : Pat<(i64 (riscv_fcvt_x FPR64INX:$rs1, timm:$frm)), (FCVT_L_D_INX $rs1, timm:$frm)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 290c03defc5f..52eadbdec255 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -29,11 +29,11 @@ def SDT_RISCVFCVT_X
def SDT_RISCVFROUND
: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
SDTCisVT<3, XLenVT>]>;
-def SDT_RISCVFPCLASS
+def SDT_RISCVFCLASS
: SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>;
-def riscv_fpclass
- : SDNode<"RISCVISD::FPCLASS", SDT_RISCVFPCLASS>;
+def riscv_fclass
+ : SDNode<"RISCVISD::FCLASS", SDT_RISCVFCLASS>;
def riscv_fround
: SDNode<"RISCVISD::FROUND", SDT_RISCVFROUND>;
@@ -132,6 +132,26 @@ def frmarg : Operand<XLenVT> {
let DecoderMethod = "decodeFRMArg";
}
+// Variants of the rounding mode operand that default to 'rne'. This is used
+// for historical/legacy reasons. fcvt functions where the rounding mode
+// doesn't affect the output originally always set it to 0b000 ('rne'). As old
+// versions of LLVM and GCC will fail to decode versions of these instructions
+// with the rounding mode set to something other than 'rne', we retain this
+// default.
+def FRMArgLegacy : AsmOperandClass {
+ let Name = "FRMArgLegacy";
+ let RenderMethod = "addFRMArgOperands";
+ let ParserMethod = "parseFRMArg";
+ let IsOptional = 1;
+ let DefaultMethod = "defaultFRMArgLegacyOp";
+}
+
+def frmarglegacy : Operand<XLenVT> {
+ let ParserMatchClass = FRMArgLegacy;
+ let PrintMethod = "printFRMArgLegacy";
+ let DecoderMethod = "decodeFRMArg";
+}
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -227,6 +247,24 @@ multiclass FPUnaryOp_r_frm_m<bits<7> funct7, bits<5> rs2val,
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
+ UseNamedOperandTable = 1, hasPostISelHook = 1 in
+class FPUnaryOp_r_frmlegacy<bits<7> funct7, bits<5> rs2val, DAGOperand rdty,
+ DAGOperand rs1ty, string opcodestr>
+ : RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd),
+ (ins rs1ty:$rs1, frmarglegacy:$frm), opcodestr,
+ "$rd, $rs1$frm"> {
+ let rs2 = rs2val;
+}
+multiclass FPUnaryOp_r_frmlegacy_m<bits<7> funct7, bits<5> rs2val,
+ ExtInfo Ext, DAGOperand rdty, DAGOperand rs1ty,
+ string opcodestr, list<Predicate> ExtraPreds = []> {
+ let Predicates = !listconcat(Ext.Predicates, ExtraPreds),
+ DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPUnaryOp_r_frmlegacy<funct7, rs2val, rdty, rs1ty,
+ opcodestr>;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
IsSignExtendingOpW = 1 in
class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
DAGOperand rty, bit Commutable = 0>
@@ -264,7 +302,7 @@ def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
} // Predicates = [HasStdExtF]
foreach Ext = FExts in {
- let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
+ let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in {
defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", Ext>;
defm FMSUB_S : FPFMA_rrr_frm_m<OPC_MSUB, 0b00, "fmsub.s", Ext>;
defm FNMSUB_S : FPFMA_rrr_frm_m<OPC_NMSUB, 0b00, "fnmsub.s", Ext>;
@@ -443,10 +481,10 @@ class PatSetCC<DAGOperand Ty, SDPatternOperator OpNode, CondCode Cond,
RVInst Inst, ValueType vt>
: Pat<(XLenVT (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>;
multiclass PatSetCC_m<SDPatternOperator OpNode, CondCode Cond,
- RVInst Inst, ExtInfo Ext, ValueType vt> {
+ RVInst Inst, ExtInfo Ext> {
let Predicates = Ext.Predicates in
def Ext.Suffix : PatSetCC<Ext.PrimaryTy, OpNode, Cond,
- !cast<RVInst>(Inst#Ext.Suffix), vt>;
+ !cast<RVInst>(Inst#Ext.Suffix), Ext.PrimaryVT>;
}
class PatFprFpr<SDPatternOperator OpNode, RVInstR Inst,
@@ -489,7 +527,7 @@ def : Pat<(any_fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR32:$rs1), (FSGNJN_S $rs1, $rs1)>;
def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR32:$rs1), (FCLASS_S $rs1)>;
+def : Pat<(riscv_fclass FPR32:$rs1), (FCLASS_S $rs1)>;
} // Predicates = [HasStdExtF]
let Predicates = [HasStdExtZfinx] in {
@@ -498,7 +536,7 @@ def : Pat<(any_fsqrt FPR32INX:$rs1), (FSQRT_S_INX FPR32INX:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR32INX:$rs1), (FSGNJN_S_INX $rs1, $rs1)>;
def : Pat<(fabs FPR32INX:$rs1), (FSGNJX_S_INX $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR32INX:$rs1), (FCLASS_S_INX $rs1)>;
+def : Pat<(riscv_fclass FPR32INX:$rs1), (FCLASS_S_INX $rs1)>;
} // Predicates = [HasStdExtZfinx]
foreach Ext = FExts in
@@ -568,12 +606,12 @@ foreach Ext = FExts in {
// Match non-signaling FEQ_S
foreach Ext = FExts in {
- defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_S, Ext, f32>;
- defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_S, Ext, f32>;
- defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_S, Ext, f32>;
- defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_S, Ext, f32>;
- defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_S, Ext, f32>;
- defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_S, Ext, f32>;
+ defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_S, Ext>;
+ defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_S, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_S, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_S, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_S, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_S, Ext>;
}
let Predicates = [HasStdExtF] in {
@@ -607,10 +645,10 @@ def : Pat<(XLenVT (strict_fsetccs FPR32INX:$rs1, FPR32INX:$rs1, SETOEQ)),
} // Predicates = [HasStdExtZfinx]
foreach Ext = FExts in {
- defm : PatSetCC_m<any_fsetccs, SETLT, FLT_S, Ext, f32>;
- defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_S, Ext, f32>;
- defm : PatSetCC_m<any_fsetccs, SETLE, FLE_S, Ext, f32>;
- defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_S, Ext, f32>;
+ defm : PatSetCC_m<any_fsetccs, SETLT, FLT_S, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_S, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETLE, FLE_S, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_S, Ext>;
}
let Predicates = [HasStdExtF] in {
@@ -642,19 +680,19 @@ def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm
(SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
} // Predicates = [HasStdExtZfinx]
-let Predicates = [HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtF] in {
// Moves (no conversion)
def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>;
def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
-} // Predicates = [HasStdExtF, IsRV32]
+} // Predicates = [HasStdExtF]
-let Predicates = [HasStdExtZfinx, IsRV32] in {
+let Predicates = [HasStdExtZfinx] in {
// Moves (no conversion)
def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>;
def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>;
-} // Predicates = [HasStdExtZfinx, IsRV32]
+} // Predicates = [HasStdExtZfinx]
-let Predicates = [HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtF] in {
// float->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RTZ)>;
def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, FRM_RTZ)>;
@@ -672,9 +710,9 @@ def : Pat<(i32 (any_lround FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RMM)>;
// [u]int->float. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, FRM_DYN)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtF, IsRV32]
+} // Predicates = [HasStdExtF]
-let Predicates = [HasStdExtZfinx, IsRV32] in {
+let Predicates = [HasStdExtZfinx] in {
// float->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RTZ)>;
def : Pat<(i32 (any_fp_to_uint FPR32INX:$rs1)), (FCVT_WU_S_INX $rs1, FRM_RTZ)>;
@@ -692,7 +730,7 @@ def : Pat<(i32 (any_lround FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RMM)>;
// [u]int->float. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W_INX $rs1, FRM_DYN)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU_INX $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZfinx, IsRV32]
+} // Predicates = [HasStdExtZfinx]
let Predicates = [HasStdExtF, IsRV64] in {
// Moves (no conversion)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 6c3c9a771d94..f9890ca4b0ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -114,3 +114,18 @@ let Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] in {
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
(MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
} // Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtMOrZmmul, IsRV64] in {
+def : PatGprGpr<mul, MULW, i32, i32>;
+}
+
+let Predicates = [HasStdExtM, IsRV64] in {
+def : PatGprGpr<sdiv, DIVW, i32, i32>;
+def : PatGprGpr<udiv, DIVUW, i32, i32>;
+def : PatGprGpr<srem, REMW, i32, i32>;
+def : PatGprGpr<urem, REMUW, i32, i32>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 6e5ee8043e92..9fc9a29c210d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -24,12 +24,11 @@ class VTypeIAsmOperand<int VTypeINum> : AsmOperandClass {
let RenderMethod = "addVTypeIOperands";
}
-class VTypeIOp<int VTypeINum> : Operand<XLenVT> {
+class VTypeIOp<int VTypeINum> : RISCVOp {
let ParserMatchClass = VTypeIAsmOperand<VTypeINum>;
let PrintMethod = "printVTypeI";
let DecoderMethod = "decodeUImmOperand<"#VTypeINum#">";
let OperandType = "OPERAND_VTYPEI" # VTypeINum;
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -58,12 +57,7 @@ def VMaskOp : RegisterOperand<VMV0> {
let DecoderMethod = "decodeVMaskReg";
}
-def simm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<5>;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmOperand<5>";
- let OperandType = "OPERAND_SIMM5";
- let OperandNamespace = "RISCVOp";
+def simm5 : RISCVSImmLeafOp<5> {
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -78,11 +72,10 @@ def SImm5Plus1AsmOperand : AsmOperandClass {
let DiagnosticType = "InvalidSImm5Plus1";
}
-def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
+def simm5_plus1 : RISCVOp, ImmLeaf<XLenVT,
[{return (isInt<5>(Imm) && Imm != -16) || Imm == 16;}]> {
let ParserMatchClass = SImm5Plus1AsmOperand;
let OperandType = "OPERAND_SIMM5_PLUS1";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -98,88 +91,209 @@ def simm5_plus1_nonzero : ImmLeaf<XLenVT,
// Scheduling definitions.
//===----------------------------------------------------------------------===//
-class VMVRSched<int n> : Sched<[
- !cast<SchedReadWrite>("WriteVMov" #n #"V"),
- !cast<SchedReadWrite>("ReadVMov" #n #"V")
-]>;
-
-class VLESched<string lmul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLDE_" #lmul),
- ReadVLDX, ReadVMask
-]>;
-
-class VSESched<string lmul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVSTE_" #lmul),
- !cast<SchedReadWrite>("ReadVSTEV_" #lmul),
- ReadVSTX, ReadVMask
-]>;
-
-class VLSSched<int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLDS" #eew #"_" #emul),
- ReadVLDX, ReadVLDSX, ReadVMask
-]>;
-
-class VSSSched<int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVSTS" #eew #"_" #emul),
- !cast<SchedReadWrite>("ReadVSTS" #eew #"V_" #emul),
- ReadVSTX, ReadVSTSX, ReadVMask
-]>;
-
-class VLXSched<int dataEEW, string isOrdered,
- string dataEMUL = "WorstCase",
- string idxEMUL = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLD" #isOrdered #"X" #dataEEW #"_" #dataEMUL),
- ReadVLDX,
- !cast<SchedReadWrite>("ReadVLD" #isOrdered #"XV_" #idxEMUL), ReadVMask
-]>;
-
-class VSXSched<int dataEEW, string isOrdered,
- string dataEMUL = "WorstCase",
- string idxEMUL = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVST" #isOrdered #"X" #dataEEW #"_" #dataEMUL),
- !cast<SchedReadWrite>("ReadVST" #isOrdered #"X" #dataEEW #"_" #dataEMUL),
- ReadVSTX, !cast<SchedReadWrite>("ReadVST" #isOrdered #"XV_" #idxEMUL), ReadVMask
-]>;
-
-class VLFSched<string lmul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLDFF_" #lmul),
- ReadVLDX, ReadVMask
-]>;
+// Common class of scheduling definitions.
+// `ReadVMergeOp` will be prepended to reads if instruction is masked.
+// `ReadVMask` will be appended to reads if instruction is masked.
+// Operands:
+// `writes` SchedWrites that are listed for each explicit def operand
+// in order.
+// `reads` SchedReads that are listed for each explicit use operand.
+// `forceMasked` Forced to be masked (e.g. Add-with-Carry Instructions).
+// `forceMergeOpRead` Force to have read for merge operand.
+class SchedCommon<list<SchedWrite> writes, list<SchedRead> reads,
+ string mx = "WorstCase", int sew = 0, bit forceMasked = 0,
+ bit forceMergeOpRead = 0> : Sched<[]> {
+ defvar isMasked = !ne(!find(NAME, "_MASK"), -1);
+ defvar isMaskedOrForceMasked = !or(forceMasked, isMasked);
+ defvar mergeRead = !if(!or(!eq(mx, "WorstCase"), !eq(sew, 0)),
+ !cast<SchedRead>("ReadVMergeOp_" # mx),
+ !cast<SchedRead>("ReadVMergeOp_" # mx # "_E" #sew));
+ defvar needsMergeRead = !or(isMaskedOrForceMasked, forceMergeOpRead);
+ defvar readsWithMask =
+ !if(isMaskedOrForceMasked, !listconcat(reads, [ReadVMask]), reads);
+ defvar allReads =
+ !if(needsMergeRead, !listconcat([mergeRead], readsWithMask), reads);
+ let SchedRW = !listconcat(writes, allReads);
+}
+
+// Common class of scheduling definitions for n-ary instructions.
+// The scheudling resources are relevant to LMUL and may be relevant to SEW.
+class SchedNary<string write, list<string> reads, string mx, int sew = 0,
+ bit forceMasked = 0, bit forceMergeOpRead = 0>
+ : SchedCommon<[!cast<SchedWrite>(
+ !if(sew,
+ write # "_" # mx # "_E" # sew,
+ write # "_" # mx))],
+ !foreach(read, reads,
+ !cast<SchedRead>(!if(sew, read #"_" #mx #"_E" #sew,
+ read #"_" #mx))),
+ mx, sew, forceMasked, forceMergeOpRead>;
+
+// Classes with postfix "MC" are only used in MC layer.
+// For these classes, we assume that they are with the worst case costs and
+// `ReadVMask` is always needed (with some exceptions).
+
+// For instructions with no operand.
+class SchedNullary<string write, string mx, int sew = 0, bit forceMasked = 0,
+ bit forceMergeOpRead = 0>:
+ SchedNary<write, [], mx, sew, forceMasked, forceMergeOpRead>;
+class SchedNullaryMC<string write, bit forceMasked = 1>:
+ SchedNullary<write, "WorstCase", forceMasked=forceMasked>;
+
+// For instructions with one operand.
+class SchedUnary<string write, string read0, string mx, int sew = 0,
+ bit forceMasked = 0, bit forceMergeOpRead = 0>:
+ SchedNary<write, [read0], mx, sew, forceMasked, forceMergeOpRead>;
+class SchedUnaryMC<string write, string read0, bit forceMasked = 1>:
+ SchedUnary<write, read0, "WorstCase", forceMasked=forceMasked>;
+
+// For instructions with two operands.
+class SchedBinary<string write, string read0, string read1, string mx,
+ int sew = 0, bit forceMasked = 0, bit forceMergeOpRead = 0>
+ : SchedNary<write, [read0, read1], mx, sew, forceMasked, forceMergeOpRead>;
+class SchedBinaryMC<string write, string read0, string read1,
+ bit forceMasked = 1>:
+ SchedBinary<write, read0, read1, "WorstCase", forceMasked=forceMasked>;
+
+// For instructions with three operands.
+class SchedTernary<string write, string read0, string read1, string read2,
+ string mx, int sew = 0, bit forceMasked = 0,
+ bit forceMergeOpRead = 0>
+ : SchedNary<write, [read0, read1, read2], mx, sew, forceMasked,
+ forceMergeOpRead>;
+class SchedTernaryMC<string write, string read0, string read1, string read2,
+ int sew = 0, bit forceMasked = 1>:
+ SchedNary<write, [read0, read1, read2], "WorstCase", sew, forceMasked>;
+
+// For reduction instructions.
+class SchedReduction<string write, string read, string mx, int sew,
+ bit forceMergeOpRead = 0>
+ : SchedCommon<[!cast<SchedWrite>(write #"_" #mx #"_E" #sew)],
+ !listsplat(!cast<SchedRead>(read), 3), mx, sew, forceMergeOpRead>;
+class SchedReductionMC<string write, string readV, string readV0>:
+ SchedCommon<[!cast<SchedWrite>(write # "_WorstCase")],
+ [!cast<SchedRead>(readV), !cast<SchedRead>(readV0)],
+ forceMasked=1>;
+
+// Whole Vector Register Move
+class VMVRSched<int n> : SchedCommon<
+ [!cast<SchedWrite>("WriteVMov" # n # "V")],
+ [!cast<SchedRead>("ReadVMov" # n # "V")]
+>;
+
+// Vector Unit-Stride Loads and Stores
+class VLESched<string lmul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLDE_" # lmul)],
+ [ReadVLDX], mx=lmul, forceMasked=forceMasked
+>;
+class VLESchedMC : VLESched<"WorstCase", forceMasked=1>;
+
+class VSESched<string lmul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVSTE_" # lmul)],
+ [!cast<SchedRead>("ReadVSTEV_" # lmul), ReadVSTX], mx=lmul,
+ forceMasked=forceMasked
+>;
+class VSESchedMC : VSESched<"WorstCase", forceMasked=1>;
+
+// Vector Strided Loads and Stores
+class VLSSched<int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLDS" # eew # "_" # emul)],
+ [ReadVLDX, ReadVLDSX], emul, eew, forceMasked
+>;
+class VLSSchedMC<int eew> : VLSSched<eew, "WorstCase", forceMasked=1>;
+
+class VSSSched<int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVSTS" # eew # "_" # emul)],
+ [!cast<SchedRead>("ReadVSTS" # eew # "V_" # emul), ReadVSTX, ReadVSTSX],
+ emul, eew, forceMasked
+>;
+class VSSSchedMC<int eew> : VSSSched<eew, "WorstCase", forceMasked=1>;
+
+// Vector Indexed Loads and Stores
+class VLXSched<int dataEEW, bit isOrdered, string dataEMUL, string idxEMUL,
+ bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLD" # !if(isOrdered, "O", "U") # "X" # dataEEW # "_" # dataEMUL)],
+ [ReadVLDX, !cast<SchedRead>("ReadVLD" # !if(isOrdered, "O", "U") # "XV_" # idxEMUL)],
+ dataEMUL, dataEEW, forceMasked
+>;
+class VLXSchedMC<int dataEEW, bit isOrdered>:
+ VLXSched<dataEEW, isOrdered, "WorstCase", "WorstCase", forceMasked=1>;
+
+class VSXSched<int dataEEW, bit isOrdered, string dataEMUL, string idxEMUL,
+ bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVST" # !if(isOrdered, "O", "U") # "X" # dataEEW # "_" # dataEMUL)],
+ [!cast<SchedRead>("ReadVST" # !if(isOrdered, "O", "U") #"X" # dataEEW # "_" # dataEMUL),
+ ReadVSTX, !cast<SchedRead>("ReadVST" # !if(isOrdered, "O", "U") # "XV_" # idxEMUL)],
+ dataEMUL, dataEEW, forceMasked
+>;
+class VSXSchedMC<int dataEEW, bit isOrdered>:
+ VSXSched<dataEEW, isOrdered, "WorstCase", "WorstCase", forceMasked=1>;
+
+// Unit-stride Fault-Only-First Loads
+class VLFSched<string lmul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLDFF_" # lmul)],
+ [ReadVLDX], mx=lmul, forceMasked=forceMasked
+>;
+class VLFSchedMC: VLFSched<"WorstCase", forceMasked=1>;
// Unit-Stride Segment Loads and Stores
-class VLSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLSEG" #nf #"e" #eew #"_" #emul),
- ReadVLDX, ReadVMask
-]>;
-class VSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVSSEG" #nf #"e" #eew #"_" #emul),
- !cast<SchedReadWrite>("ReadVSTEV_" #emul),
- ReadVSTX, ReadVMask
-]>;
-class VLSEGFFSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLSEGFF" #nf #"e" #eew #"_" #emul),
- ReadVLDX, ReadVMask
-]>;
+class VLSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLSEG" #nf #"e" #eew #"_" #emul)],
+ [ReadVLDX], emul, eew, forceMasked
+>;
+class VLSEGSchedMC<int nf, int eew> : VLSEGSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
+class VSSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVSSEG" # nf # "e" # eew # "_" # emul)],
+ [!cast<SchedRead>("ReadVSTEV_" #emul), ReadVSTX], emul, eew, forceMasked
+>;
+class VSSEGSchedMC<int nf, int eew> : VSSEGSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
+class VLSEGFFSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLSEGFF" # nf # "e" # eew # "_" # emul)],
+ [ReadVLDX], emul, eew, forceMasked
+>;
+class VLSEGFFSchedMC<int nf, int eew> : VLSEGFFSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
// Strided Segment Loads and Stores
-class VLSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLSSEG" #nf #"e" #eew #"_" #emul),
- ReadVLDX, ReadVLDSX, ReadVMask
-]>;
-class VSSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVSSSEG" #nf #"e" #eew #"_" #emul),
- !cast<SchedReadWrite>("ReadVSTS" #eew #"V_" #emul),
- ReadVSTX, ReadVSTSX, ReadVMask
-]>;
+class VLSSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLSSEG" #nf #"e" #eew #"_" #emul)],
+ [ReadVLDX, ReadVLDSX], emul, eew, forceMasked
+>;
+class VLSSEGSchedMC<int nf, int eew> : VLSSEGSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
+class VSSSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVSSSEG" #nf #"e" #eew #"_" #emul)],
+ [!cast<SchedRead>("ReadVSTS" #eew #"V_" #emul),
+ ReadVSTX, ReadVSTSX], emul, eew, forceMasked
+>;
+class VSSSEGSchedMC<int nf, int eew> : VSSSEGSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
// Indexed Segment Loads and Stores
-class VLXSEGSched<int nf, int eew, string isOrdered, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVL" #isOrdered #"XSEG" #nf #"e" #eew #"_" #emul),
- ReadVLDX, !cast<SchedReadWrite>("ReadVLD" #isOrdered #"XV_" #emul), ReadVMask
-]>;
-class VSXSEGSched<int nf, int eew, string isOrdered, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVS" #isOrdered #"XSEG" #nf #"e" #eew #"_" #emul),
- !cast<SchedReadWrite>("ReadVST" #isOrdered #"X" #eew #"_" #emul),
- ReadVSTX, !cast<SchedReadWrite>("ReadVST" #isOrdered #"XV_" #emul), ReadVMask
-]>;
+class VLXSEGSched<int nf, int eew, bit isOrdered, string emul,
+ bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVL" #!if(isOrdered, "O", "U") #"XSEG" #nf #"e" #eew #"_" #emul)],
+ [ReadVLDX, !cast<SchedRead>("ReadVLD" #!if(isOrdered, "O", "U") #"XV_" #emul)],
+ emul, eew, forceMasked
+>;
+class VLXSEGSchedMC<int nf, int eew, bit isOrdered>:
+ VLXSEGSched<nf, eew, isOrdered, "WorstCase", forceMasked=1>;
+
+// Passes sew=0 instead of eew=0 since this pseudo does not follow MX_E form.
+class VSXSEGSched<int nf, int eew, bit isOrdered, string emul,
+ bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVS" #!if(isOrdered, "O", "U") #"XSEG" #nf #"e" #eew #"_" #emul)],
+ [!cast<SchedRead>("ReadVST" #!if(isOrdered, "O", "U") #"X" #eew #"_" #emul),
+ ReadVSTX, !cast<SchedRead>("ReadVST" #!if(isOrdered, "O", "U") #"XV_" #emul)],
+ emul, sew=0, forceMasked=forceMasked
+>;
+class VSXSEGSchedMC<int nf, int eew, bit isOrdered>:
+ VSXSEGSched<nf, eew, isOrdered, "WorstCase", forceMasked=1>;
//===----------------------------------------------------------------------===//
// Instruction class templates
@@ -327,10 +441,14 @@ class VALUmVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
}
// op vd, vs1, vs2, vm (reverse the order of vs1 and vs2)
-class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVV<funct6, opv, (outs VR:$vd),
- (ins VR:$vs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $vs1, $vs2$vm">;
+class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber = 0>
+ : RVInstVV<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $vs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
// op vd, vs2, vs1
class VALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
@@ -355,10 +473,14 @@ class VALUmVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
}
// op vd, rs1, vs2, vm (reverse the order of rs1 and vs2)
-class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VR:$vd),
- (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $rs1, $vs2$vm">;
+class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber = 0>
+ : RVInstVX<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $rs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
// op vd, vs1, vs2
class VALUVXNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
@@ -397,10 +519,14 @@ class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
opcodestr, "$vd, $vs2, $rs1$vm">;
// op vd, rs1, vs2, vm (Float) (with mask, reverse the order of rs1 and vs2)
-class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VR:$vd),
- (ins FPR32:$rs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $rs1, $vs2$vm">;
+class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber = 0>
+ : RVInstVX<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, FPR32:$rs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $rs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
// op vd, vs2, vm (use vs1 as instruction encoding)
class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
@@ -422,42 +548,37 @@ class VALUVs2NoVm<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodest
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
-multiclass VIndexLoadStore<list<int> EEWList> {
- foreach n = EEWList in {
- defvar w = !cast<RISCVWidth>("LSWidth" # n);
-
- def VLUXEI # n # _V :
- VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # n # ".v">,
- VLXSched<n, "U">;
- def VLOXEI # n # _V :
- VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # n # ".v">,
- VLXSched<n, "O">;
-
- def VSUXEI # n # _V :
- VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # n # ".v">,
- VSXSched<n, "U">;
- def VSOXEI # n # _V :
- VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # n # ".v">,
- VSXSched<n, "O">;
- }
+multiclass VIndexLoadStore<int eew> {
+ defvar w = !cast<RISCVWidth>("LSWidth" # eew);
+
+ def VLUXEI # eew # _V :
+ VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # eew # ".v">,
+ VLXSchedMC<eew, isOrdered=0>;
+ def VLOXEI # eew # _V :
+ VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # eew # ".v">,
+ VLXSchedMC<eew, isOrdered=1>;
+
+ def VSUXEI # eew # _V :
+ VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # eew # ".v">,
+ VSXSchedMC<eew, isOrdered=0>;
+ def VSOXEI # eew # _V :
+ VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # eew # ".v">,
+ VSXSchedMC<eew, isOrdered=1>;
}
multiclass VALU_IV_V<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVIALUV_WorstCase, ReadVIALUV_WorstCase,
- ReadVIALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">;
}
multiclass VALU_IV_X<string opcodestr, bits<6> funct6> {
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVIALUX_WorstCase, ReadVIALUV_WorstCase,
- ReadVIALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX">;
}
multiclass VALU_IV_I<string opcodestr, bits<6> funct6> {
def I : VALUVI<funct6, opcodestr # ".vi", simm5>,
- Sched<[WriteVIALUI_WorstCase, ReadVIALUV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">;
}
multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6>
@@ -475,364 +596,314 @@ multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6>
multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw> {
def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIWALUV_WorstCase, ReadVIWALUV_WorstCase,
- ReadVIWALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV">;
def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIWALUX_WorstCase, ReadVIWALUV_WorstCase,
- ReadVIWALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIWALUX", "ReadVIWALUV", "ReadVIWALUX">;
}
multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIMulAddV_WorstCase, ReadVIMulAddV_WorstCase,
- ReadVIMulAddV_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV",
+ "ReadVIMulAddV">;
def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIMulAddX_WorstCase, ReadVIMulAddV_WorstCase,
- ReadVIMulAddX_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX",
+ "ReadVIMulAddV">;
}
multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6> {
+ let RVVConstraint = WidenV in
def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIWMulAddX_WorstCase, ReadVIWMulAddV_WorstCase,
- ReadVIWMulAddX_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
+ "ReadVIWMulAddV">;
}
multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6>
: VWMAC_MV_X<opcodestr, funct6> {
- def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIWMulAddV_WorstCase, ReadVIWMulAddV_WorstCase,
- ReadVIWMulAddV_WorstCase, ReadVMask]>;
+ let RVVConstraint = WidenV in
+ def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv", EarlyClobber=1>,
+ SchedTernaryMC<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
+ "ReadVIWMulAddV">;
}
multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
- Sched<[WriteVExtV_WorstCase, ReadVExtV_WorstCase, ReadVMask]>;
+ SchedUnaryMC<"WriteVExtV", "ReadVExtV">;
}
multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> {
def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
- Sched<[WriteVIMergeV_WorstCase, ReadVIMergeV_WorstCase,
- ReadVIMergeV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV">;
def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
- Sched<[WriteVIMergeX_WorstCase, ReadVIMergeV_WorstCase,
- ReadVIMergeX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX">;
def IM : VALUmVI<funct6, opcodestr # ".vim">,
- Sched<[WriteVIMergeI_WorstCase, ReadVIMergeV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVIMergeI", "ReadVIMergeV">;
}
multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
- Sched<[WriteVICALUV_WorstCase, ReadVICALUV_WorstCase,
- ReadVICALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV">;
def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
- Sched<[WriteVICALUX_WorstCase, ReadVICALUV_WorstCase,
- ReadVICALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX">;
}
multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6>
: VALUm_IV_V_X<opcodestr, funct6> {
def IM : VALUmVI<funct6, opcodestr # ".vim">,
- Sched<[WriteVICALUI_WorstCase, ReadVICALUV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVICALUI", "ReadVICALUV">;
}
multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVICALUV_WorstCase, ReadVICALUV_WorstCase,
- ReadVICALUV_WorstCase]>;
+ SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV",
+ forceMasked=0>;
def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVICALUX_WorstCase, ReadVICALUV_WorstCase,
- ReadVICALUX_WorstCase]>;
+ SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX",
+ forceMasked=0>;
}
multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6>
: VALUNoVm_IV_V_X<opcodestr, funct6> {
def I : VALUVINoVm<funct6, opcodestr # ".vi", simm5>,
- Sched<[WriteVICALUI_WorstCase, ReadVICALUV_WorstCase]>;
+ SchedUnaryMC<"WriteVICALUI", "ReadVICALUV", forceMasked=0>;
}
multiclass VALU_FV_F<string opcodestr, bits<6> funct6> {
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFALUF_WorstCase, ReadVFALUV_WorstCase,
- ReadVFALUF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF">;
}
multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6>
: VALU_FV_F<opcodestr, funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFALUV_WorstCase, ReadVFALUV_WorstCase,
- ReadVFALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV">;
}
multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw> {
def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFWALUV_WorstCase, ReadVFWALUV_WorstCase,
- ReadVFWALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV">;
def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFWALUF_WorstCase, ReadVFWALUV_WorstCase,
- ReadVFWALUF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF">;
}
multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFMulV_WorstCase, ReadVFMulV_WorstCase,
- ReadVFMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV">;
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFMulF_WorstCase, ReadVFMulV_WorstCase,
- ReadVFMulF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF">;
}
multiclass VDIV_FV_F<string opcodestr, bits<6> funct6> {
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFDivF_WorstCase, ReadVFDivV_WorstCase,
- ReadVFDivF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF">;
}
multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6>
: VDIV_FV_F<opcodestr, funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFDivV_WorstCase, ReadVFDivV_WorstCase,
- ReadVFDivV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFDivV", "ReadVFDivV", "ReadVFDivV">;
}
multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFWMulV_WorstCase, ReadVFWMulV_WorstCase,
- ReadVFWMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV">;
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFWMulF_WorstCase, ReadVFWMulV_WorstCase,
- ReadVFWMulF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF">;
}
multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFMulAddV_WorstCase, ReadVFMulAddV_WorstCase,
- ReadVFMulAddV_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV">;
def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFMulAddF_WorstCase, ReadVFMulAddV_WorstCase,
- ReadVFMulAddF_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV">;
}
multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6> {
- def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFWMulAddV_WorstCase, ReadVFWMulAddV_WorstCase,
- ReadVFWMulAddV_WorstCase, ReadVMask]>;
- def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFWMulAddF_WorstCase, ReadVFWMulAddV_WorstCase,
- ReadVFWMulAddF_WorstCase, ReadVMask]>;
+ let RVVConstraint = WidenV in {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv", EarlyClobber=1>,
+ SchedTernaryMC<"WriteVFWMulAddV", "ReadVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV">;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf", EarlyClobber=1>,
+ SchedTernaryMC<"WriteVFWMulAddF", "ReadVFWMulAddV", "ReadVFWMulAddF",
+ "ReadVFWMulAddV">;
+ }
}
multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFSqrtV_WorstCase, ReadVFSqrtV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFSqrtV", "ReadVFSqrtV">;
}
multiclass VRCP_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFRecpV_WorstCase, ReadVFRecpV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFRecpV", "ReadVFRecpV">;
}
multiclass VMINMAX_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFMinMaxV_WorstCase, ReadVFMinMaxV_WorstCase,
- ReadVFMinMaxV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV">;
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFMinMaxF_WorstCase, ReadVFMinMaxV_WorstCase,
- ReadVFMinMaxF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF">;
}
multiclass VCMP_FV_F<string opcodestr, bits<6> funct6> {
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFCmpF_WorstCase, ReadVFCmpV_WorstCase,
- ReadVFCmpF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFCmpF", "ReadVFCmpV", "ReadVFCmpF">;
}
multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6>
: VCMP_FV_F<opcodestr, funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFCmpV_WorstCase, ReadVFCmpV_WorstCase,
- ReadVFCmpV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFCmpV", "ReadVFCmpV", "ReadVFCmpV">;
}
multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFSgnjV_WorstCase, ReadVFSgnjV_WorstCase,
- ReadVFSgnjV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV">;
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFSgnjF_WorstCase, ReadVFSgnjV_WorstCase,
- ReadVFSgnjF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF">;
}
multiclass VCLS_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFClassV_WorstCase, ReadVFClassV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFClassV", "ReadVFClassV">;
}
multiclass VCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFCvtIToFV_WorstCase, ReadVFCvtIToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFCvtIToFV", "ReadVFCvtIToFV">;
}
multiclass VCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFCvtFToIV_WorstCase, ReadVFCvtFToIV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFCvtFToIV", "ReadVFCvtFToIV">;
}
multiclass VWCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFWCvtIToFV_WorstCase, ReadVFWCvtIToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV">;
}
multiclass VWCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFWCvtFToIV_WorstCase, ReadVFWCvtFToIV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV">;
}
multiclass VWCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFWCvtFToFV_WorstCase, ReadVFWCvtFToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV">;
}
multiclass VNCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFNCvtIToFV_WorstCase, ReadVFNCvtIToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV">;
}
multiclass VNCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFNCvtFToIV_WorstCase, ReadVFNCvtFToIV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV">;
}
multiclass VNCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFNCvtFToFV_WorstCase, ReadVFNCvtFToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV">;
}
multiclass VRED_MV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">,
- Sched<[WriteVIRedV_From_WorstCase, ReadVIRedV, ReadVIRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVIRedV_From", "ReadVIRedV", "ReadVIRedV0">;
}
multiclass VREDMINMAX_MV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">,
- Sched<[WriteVIRedMinMaxV_From_WorstCase, ReadVIRedV, ReadVIRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVIRedMinMaxV_From", "ReadVIRedV", "ReadVIRedV0">;
}
multiclass VWRED_IV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">,
- Sched<[WriteVIWRedV_From_WorstCase, ReadVIWRedV, ReadVIWRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVIWRedV_From", "ReadVIWRedV", "ReadVIWRedV0">;
}
multiclass VRED_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFRedV_From_WorstCase, ReadVFRedV, ReadVFRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFRedV_From", "ReadVFRedV", "ReadVFRedV0">;
}
multiclass VREDMINMAX_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFRedMinMaxV_From_WorstCase, ReadVFRedV, ReadVFRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFRedMinMaxV_From", "ReadVFRedV", "ReadVFRedV0">;
}
multiclass VREDO_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFRedOV_From_WorstCase, ReadVFRedOV, ReadVFRedOV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFRedOV_From", "ReadVFRedOV", "ReadVFRedOV0">;
}
multiclass VWRED_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFWRedV_From_WorstCase, ReadVFWRedV, ReadVFWRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFWRedV_From", "ReadVFWRedV", "ReadVFWRedV0">;
}
multiclass VWREDO_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFWRedOV_From_WorstCase, ReadVFWRedOV, ReadVFWRedOV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFWRedOV_From", "ReadVFWRedOV", "ReadVFWRedOV0">;
}
multiclass VMALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
def M : VALUVVNoVm<funct6, OPMVV, opcodestr #"." #vm #"m">,
- Sched<[WriteVMALUV_WorstCase, ReadVMALUV_WorstCase,
- ReadVMALUV_WorstCase]>;
+ SchedBinaryMC<"WriteVMALUV", "ReadVMALUV", "ReadVMALUV",
+ forceMasked=0>;
}
multiclass VMSFS_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
- Sched<[WriteVMSFSV_WorstCase, ReadVMSFSV_WorstCase, ReadVMask]>;
+ SchedUnaryMC<"WriteVMSFSV", "ReadVMSFSV">;
}
multiclass VMIOT_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
- Sched<[WriteVMIotV_WorstCase, ReadVMIotV_WorstCase, ReadVMask]>;
+ SchedUnaryMC<"WriteVMIotV", "ReadVMIotV">;
}
multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVShiftV_WorstCase, ReadVShiftV_WorstCase,
- ReadVShiftV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVShiftV", "ReadVShiftV", "ReadVShiftV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVShiftX_WorstCase, ReadVShiftV_WorstCase,
- ReadVShiftX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVShiftX", "ReadVShiftV", "ReadVShiftX">;
def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
- Sched<[WriteVShiftI_WorstCase, ReadVShiftV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVShiftI", "ReadVShiftV">;
}
multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".wv">,
- Sched<[WriteVNShiftV_WorstCase, ReadVNShiftV_WorstCase,
- ReadVNShiftV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVNShiftV", "ReadVNShiftV", "ReadVNShiftV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".wx">,
- Sched<[WriteVNShiftX_WorstCase, ReadVNShiftV_WorstCase,
- ReadVNShiftX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVNShiftX", "ReadVNShiftV", "ReadVNShiftX">;
def I : VALUVI<funct6, opcodestr # ".wi", uimm5>,
- Sched<[WriteVNShiftI_WorstCase, ReadVNShiftV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVNShiftI", "ReadVNShiftV">;
}
multiclass VMINMAX_IV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVIMinMaxV_WorstCase, ReadVIMinMaxV_WorstCase,
- ReadVIMinMaxV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVIMinMaxX_WorstCase, ReadVIMinMaxV_WorstCase,
- ReadVIMinMaxX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX">;
}
multiclass VCMP_IV_V<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVICmpV_WorstCase, ReadVICmpV_WorstCase,
- ReadVICmpV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV">;
}
multiclass VCMP_IV_X<string opcodestr, bits<6> funct6> {
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVICmpX_WorstCase, ReadVICmpV_WorstCase,
- ReadVICmpX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX">;
}
multiclass VCMP_IV_I<string opcodestr, bits<6> funct6> {
def I : VALUVI<funct6, opcodestr # ".vi", simm5>,
- Sched<[WriteVICmpI_WorstCase, ReadVICmpV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVICmpI", "ReadVICmpV">;
}
multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6>
@@ -850,140 +921,109 @@ multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6>
multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIMulV_WorstCase, ReadVIMulV_WorstCase,
- ReadVIMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV">;
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIMulX_WorstCase, ReadVIMulV_WorstCase,
- ReadVIMulX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX">;
}
multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIWMulV_WorstCase, ReadVIWMulV_WorstCase,
- ReadVIWMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV">;
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIWMulX_WorstCase, ReadVIWMulV_WorstCase,
- ReadVIWMulX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIWMulX", "ReadVIWMulV", "ReadVIWMulX">;
}
multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIDivV_WorstCase, ReadVIDivV_WorstCase,
- ReadVIDivV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIDivV", "ReadVIDivV", "ReadVIDivV">;
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIDivX_WorstCase, ReadVIDivV_WorstCase,
- ReadVIDivX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIDivX", "ReadVIDivV", "ReadVIDivX">;
}
multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVSALUV_WorstCase, ReadVSALUV_WorstCase,
- ReadVSALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSALUV", "ReadVSALUV", "ReadVSALUV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVSALUX_WorstCase, ReadVSALUV_WorstCase,
- ReadVSALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX">;
}
multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6>
: VSALU_IV_V_X<opcodestr, funct6> {
def I : VALUVI<funct6, opcodestr # ".vi", simm5>,
- Sched<[WriteVSALUI_WorstCase, ReadVSALUV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVSALUI", "ReadVSALUV">;
}
multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVAALUV_WorstCase, ReadVAALUV_WorstCase,
- ReadVAALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVAALUV", "ReadVAALUV", "ReadVAALUV">;
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVAALUX_WorstCase, ReadVAALUV_WorstCase,
- ReadVAALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVAALUX", "ReadVAALUV", "ReadVAALUX">;
}
multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVSMulV_WorstCase, ReadVSMulV_WorstCase,
- ReadVSMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSMulV", "ReadVSMulV", "ReadVSMulV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVSMulX_WorstCase, ReadVSMulV_WorstCase,
- ReadVSMulX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSMulX", "ReadVSMulV", "ReadVSMulX">;
}
multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVSShiftV_WorstCase, ReadVSShiftV_WorstCase,
- ReadVSShiftV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSShiftV", "ReadVSShiftV", "ReadVSShiftV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVSShiftX_WorstCase, ReadVSShiftV_WorstCase,
- ReadVSShiftX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSShiftX", "ReadVSShiftV", "ReadVSShiftX">;
def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
- Sched<[WriteVSShiftI_WorstCase, ReadVSShiftV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVSShiftI", "ReadVSShiftV">;
}
multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".wv">,
- Sched<[WriteVNClipV_WorstCase, ReadVNClipV_WorstCase,
- ReadVNClipV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVNClipV", "ReadVNClipV", "ReadVNClipV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".wx">,
- Sched<[WriteVNClipX_WorstCase, ReadVNClipV_WorstCase,
- ReadVNClipX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVNClipX", "ReadVNClipV", "ReadVNClipX">;
def I : VALUVI<funct6, opcodestr # ".wi", uimm5>,
- Sched<[WriteVNClipI_WorstCase, ReadVNClipV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVNClipI", "ReadVNClipV">;
}
multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6> {
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVISlideX_WorstCase, ReadVISlideV_WorstCase,
- ReadVISlideX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVISlideX", "ReadVISlideV", "ReadVISlideX">;
def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
- Sched<[WriteVISlideI_WorstCase, ReadVISlideV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVISlideI", "ReadVISlideV">;
}
multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6> {
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVISlide1X_WorstCase, ReadVISlideV_WorstCase,
- ReadVISlideX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVISlide1X", "ReadVISlideV", "ReadVISlideX">;
}
multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6> {
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFSlide1F_WorstCase, ReadVFSlideV_WorstCase,
- ReadVFSlideF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFSlide1F", "ReadVFSlideV", "ReadVFSlideF">;
}
multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVRGatherVV_WorstCase, ReadVRGatherVV_data_WorstCase,
- ReadVRGatherVV_index_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVRGatherVV", "ReadVRGatherVV_data",
+ "ReadVRGatherVV_index">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVRGatherVX_WorstCase, ReadVRGatherVX_data_WorstCase,
- ReadVRGatherVX_index_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVRGatherVX", "ReadVRGatherVX_data",
+ "ReadVRGatherVX_index">;
def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
- Sched<[WriteVRGatherVI_WorstCase, ReadVRGatherVI_data_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVRGatherVI", "ReadVRGatherVI_data">;
}
multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">,
- Sched<[WriteVCompressV_WorstCase, ReadVCompressV_WorstCase,
- ReadVCompressV_WorstCase]>;
+ SchedBinaryMC<"WriteVCompressV", "ReadVCompressV", "ReadVCompressV">;
}
-multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
- foreach l = [8, 16, 32] in {
- defvar w = !cast<RISCVWidth>("LSWidth" # l);
- defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R");
+multiclass VWholeLoadN<int l, bits<3> nf, string opcodestr, RegisterClass VRC> {
+ defvar w = !cast<RISCVWidth>("LSWidth" # l);
+ defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R");
- def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>,
- Sched<[s, ReadVLDX]>;
- }
-}
-multiclass VWholeLoadEEW64<bits<3> nf, string opcodestr, RegisterClass VRC, SchedReadWrite schedrw> {
- def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>,
- Sched<[schedrw, ReadVLDX]>;
+ def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>,
+ Sched<[s, ReadVLDX]>;
}
//===----------------------------------------------------------------------===//
@@ -1003,23 +1043,34 @@ def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
"vsetvl", "$rd, $rs1, $rs2">,
Sched<[WriteVSETVL, ReadVSETVL, ReadVSETVL]>;
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
-foreach eew = [8, 16, 32] in {
+} // Predicates = [HasVInstructions]
+
+foreach eew = [8, 16, 32, 64] in {
defvar w = !cast<RISCVWidth>("LSWidth" # eew);
- // Vector Unit-Stride Instructions
- def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESched;
- def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESched;
+ let Predicates = !if(!eq(eew, 64), [HasVInstructionsI64],
+ [HasVInstructions]) in {
+ // Vector Unit-Stride Instructions
+ def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESchedMC;
+ def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESchedMC;
- // Vector Unit-Stride Fault-only-First Loads
- def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSched;
+ // Vector Unit-Stride Fault-only-First Loads
+ def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSchedMC;
- // Vector Strided Instructions
- def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew>;
- def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew>;
-}
+ // Vector Strided Instructions
+ def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSchedMC<eew>;
+ def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSchedMC<eew>;
-defm "" : VIndexLoadStore<[8, 16, 32]>;
-} // Predicates = [HasVInstructions]
+ defm VL1R : VWholeLoadN<eew, 0, "vl1r", VR>;
+ defm VL2R : VWholeLoadN<eew, 1, "vl2r", VRM2>;
+ defm VL4R : VWholeLoadN<eew, 3, "vl4r", VRM4>;
+ defm VL8R : VWholeLoadN<eew, 7, "vl8r", VRM8>;
+ }
+
+ let Predicates = !if(!eq(eew, 64), [IsRV64, HasVInstructionsI64],
+ [HasVInstructions]) in
+ defm "" : VIndexLoadStore<eew>;
+}
let Predicates = [HasVInstructions] in {
def VLM_V : VUnitStrideLoadMask<"vlm.v">,
@@ -1031,11 +1082,6 @@ def : InstAlias<"vle1.v $vd, (${rs1})",
def : InstAlias<"vse1.v $vs3, (${rs1})",
(VSM_V VR:$vs3, GPR:$rs1), 0>;
-defm VL1R : VWholeLoadN<0, "vl1r", VR>;
-defm VL2R : VWholeLoadN<1, "vl2r", VRM2>;
-defm VL4R : VWholeLoadN<3, "vl4r", VRM4>;
-defm VL8R : VWholeLoadN<7, "vl8r", VRM8>;
-
def VS1R_V : VWholeStore<0, "vs1r.v", VR>,
Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>;
def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>,
@@ -1051,33 +1097,6 @@ def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>;
def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>;
} // Predicates = [HasVInstructions]
-let Predicates = [HasVInstructionsI64] in {
-// Vector Unit-Stride Instructions
-def VLE64_V : VUnitStrideLoad<LSWidth64, "vle64.v">,
- VLESched;
-
-def VLE64FF_V : VUnitStrideLoadFF<LSWidth64, "vle64ff.v">,
- VLFSched;
-
-def VSE64_V : VUnitStrideStore<LSWidth64, "vse64.v">,
- VSESched;
-// Vector Strided Instructions
-def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">,
- VLSSched<32>;
-
-def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">,
- VSSSched<64>;
-
-defm VL1R: VWholeLoadEEW64<0, "vl1r", VR, WriteVLD1R>;
-defm VL2R: VWholeLoadEEW64<1, "vl2r", VRM2, WriteVLD2R>;
-defm VL4R: VWholeLoadEEW64<3, "vl4r", VRM4, WriteVLD4R>;
-defm VL8R: VWholeLoadEEW64<7, "vl8r", VRM8, WriteVLD8R>;
-} // Predicates = [HasVInstructionsI64]
-let Predicates = [IsRV64, HasVInstructionsI64] in {
- // Vector Indexed Instructions
- defm "" : VIndexLoadStore<[64]>;
-} // [IsRV64, HasVInstructionsI64]
-
let Predicates = [HasVInstructions] in {
// Vector Single-Width Integer Add and Subtract
defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
@@ -1268,12 +1287,10 @@ defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>;
defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>;
// Vector Widening Integer Multiply-Add Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>;
defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>;
defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>;
defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Integer Merge Instructions
defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>;
@@ -1284,15 +1301,15 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1,
// op vd, vs1
def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd),
(ins VR:$vs1), "vmv.v.v", "$vd, $vs1">,
- Sched<[WriteVIMovV_WorstCase, ReadVIMovV_WorstCase]>;
+ SchedUnaryMC<"WriteVIMovV", "ReadVIMovV", forceMasked=0>;
// op vd, rs1
def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd),
(ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">,
- Sched<[WriteVIMovX_WorstCase, ReadVIMovX_WorstCase]>;
+ SchedUnaryMC<"WriteVIMovX", "ReadVIMovX", forceMasked=0>;
// op vd, imm
def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd),
(ins simm5:$imm), "vmv.v.i", "$vd, $imm">,
- Sched<[WriteVIMovI_WorstCase]>;
+ SchedNullaryMC<"WriteVIMovI", forceMasked=0>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Vector Fixed-Point Arithmetic Instructions
@@ -1373,8 +1390,7 @@ defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>;
}
// Vector Widening Floating-Point Fused Multiply-Add Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV,
- Uses = [FRM], mayRaiseFPException = true in {
+let Uses = [FRM], mayRaiseFPException = true in {
defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>;
defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>;
defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>;
@@ -1435,15 +1451,14 @@ let vm = 0 in
def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins VR:$vs2, FPR32:$rs1, VMV0:$v0),
"vfmerge.vfm", "$vd, $vs2, $rs1, v0">,
- Sched<[WriteVFMergeV_WorstCase, ReadVFMergeV_WorstCase,
- ReadVFMergeF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMergeV", "ReadVFMergeV", "ReadVFMergeF">;
// Vector Floating-Point Move Instruction
let RVVConstraint = NoConstraint in
let vm = 1, vs2 = 0 in
def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">,
- Sched<[WriteVFMovV_WorstCase, ReadVFMovF_WorstCase]>;
+ SchedUnaryMC<"WriteVFMovV", "ReadVFMovF", forceMasked=0>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
@@ -1584,15 +1599,13 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
def VCPOP_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
(ins VR:$vs2, VMaskOp:$vm),
"vcpop.m", "$vd, $vs2$vm">,
- Sched<[WriteVMPopV_WorstCase, ReadVMPopV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVMPopV", "ReadVMPopV">;
// vfirst find-first-set mask bit
def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
(ins VR:$vs2, VMaskOp:$vm),
"vfirst.m", "$vd, $vs2$vm">,
- Sched<[WriteVMFFSV_WorstCase, ReadVMFFSV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVMFFSV", "ReadVMFFSV">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
@@ -1618,7 +1631,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
let vs2 = 0 in
def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd),
(ins VMaskOp:$vm), "vid.v", "$vd$vm">,
- Sched<[WriteVMIdxV_WorstCase, ReadVMask]>;
+ SchedNullaryMC<"WriteVMIdxV">;
// Integer Scalar Move Instructions
let vm = 1, RVVConstraint = NoConstraint in {
@@ -1674,8 +1687,8 @@ let Predicates = [HasVInstructions] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100>;
def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">,
- Sched<[WriteVRGatherVV_WorstCase, ReadVRGatherVV_data_WorstCase,
- ReadVRGatherVV_index_WorstCase]>;
+ SchedBinaryMC<"WriteVRGatherVV", "ReadVRGatherVV_data",
+ "ReadVRGatherVV_index">;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather
// Vector Compress Instruction
@@ -1705,38 +1718,38 @@ let Predicates = [HasVInstructions] in {
def VLSEG#nf#E#eew#_V :
VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">,
- VLSEGSched<nf, eew>;
+ VLSEGSchedMC<nf, eew>;
def VLSEG#nf#E#eew#FF_V :
VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">,
- VLSEGFFSched<nf, eew>;
+ VLSEGFFSchedMC<nf, eew>;
def VSSEG#nf#E#eew#_V :
VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">,
- VSSEGSched<nf, eew>;
+ VSSEGSchedMC<nf, eew>;
// Vector Strided Instructions
def VLSSEG#nf#E#eew#_V :
VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">,
- VLSSEGSched<nf, eew>;
+ VLSSEGSchedMC<nf, eew>;
def VSSSEG#nf#E#eew#_V :
VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">,
- VSSSEGSched<nf, eew>;
+ VSSSEGSchedMC<nf, eew>;
// Vector Indexed Instructions
def VLUXSEG#nf#EI#eew#_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, w,
"vluxseg"#nf#"ei"#eew#".v">,
- VLXSEGSched<nf, eew, "U">;
+ VLXSEGSchedMC<nf, eew, isOrdered=0>;
def VLOXSEG#nf#EI#eew#_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, w,
"vloxseg"#nf#"ei"#eew#".v">,
- VLXSEGSched<nf, eew, "O">;
+ VLXSEGSchedMC<nf, eew, isOrdered=1>;
def VSUXSEG#nf#EI#eew#_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, w,
"vsuxseg"#nf#"ei"#eew#".v">,
- VSXSEGSched<nf, eew, "U">;
+ VSXSEGSchedMC<nf, eew, isOrdered=0>;
def VSOXSEG#nf#EI#eew#_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, w,
"vsoxseg"#nf#"ei"#eew#".v">,
- VSXSEGSched<nf, eew, "O">;
+ VSXSEGSchedMC<nf, eew, isOrdered=1>;
}
}
} // Predicates = [HasVInstructions]
@@ -1746,21 +1759,21 @@ let Predicates = [HasVInstructionsI64] in {
// Vector Unit-strided Segment Instructions
def VLSEG#nf#E64_V :
VUnitStrideSegmentLoad<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64.v">,
- VLSEGSched<nf, 64>;
+ VLSEGSchedMC<nf, 64>;
def VLSEG#nf#E64FF_V :
VUnitStrideSegmentLoadFF<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64ff.v">,
- VLSEGFFSched<nf, 64>;
+ VLSEGFFSchedMC<nf, 64>;
def VSSEG#nf#E64_V :
VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">,
- VSSEGSched<nf, 64>;
+ VSSEGSchedMC<nf, 64>;
// Vector Strided Segment Instructions
def VLSSEG#nf#E64_V :
VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">,
- VLSSEGSched<nf, 64>;
+ VLSSEGSchedMC<nf, 64>;
def VSSSEG#nf#E64_V :
VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">,
- VSSSEGSched<nf, 64>;
+ VSSSEGSchedMC<nf, 64>;
}
} // Predicates = [HasVInstructionsI64]
let Predicates = [HasVInstructionsI64, IsRV64] in {
@@ -1769,20 +1782,21 @@ let Predicates = [HasVInstructionsI64, IsRV64] in {
def VLUXSEG #nf #EI64_V
: VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, LSWidth64,
"vluxseg" #nf #"ei64.v">,
- VLXSEGSched<nf, 64, "U">;
+ VLXSEGSchedMC<nf, 64, isOrdered=0>;
def VLOXSEG #nf #EI64_V
: VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, LSWidth64,
"vloxseg" #nf #"ei64.v">,
- VLXSEGSched<nf, 64, "O">;
+ VLXSEGSchedMC<nf, 64, isOrdered=1>;
def VSUXSEG #nf #EI64_V
: VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, LSWidth64,
"vsuxseg" #nf #"ei64.v">,
- VSXSEGSched<nf, 64, "U">;
+ VSXSEGSchedMC<nf, 64, isOrdered=0>;
def VSOXSEG #nf #EI64_V
: VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth64,
"vsoxseg" #nf #"ei64.v">,
- VSXSEGSched<nf, 64, "O">;
+ VSXSEGSchedMC<nf, 64, isOrdered=1>;
}
} // Predicates = [HasVInstructionsI64, IsRV64]
+include "RISCVInstrInfoZvfbf.td"
include "RISCVInstrInfoVPseudos.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index f8b7e32fe34c..5e06422cf9ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -31,7 +31,7 @@
/// the exact bit pattern of inactive lanes, or produce the bit pattern -1 for
/// those lanes. Note that each lane can make this choice independently.
/// Instructions which produce masks (and only those instructions) also have the
-/// option of producing a result as-if VL had been VLMAX.
+/// option of producing a result as-if VL had been VLMAX.
/// * "Undefined" - The bit pattern of the inactive lanes is unspecified, and
/// can be changed without impacting the semantics of the program. Note that
/// this concept does not exist in the specification, and requires source
@@ -52,26 +52,26 @@
///
/// Currently, the policy is represented via the following instrinsic families:
/// * _MASK - Can represent all three policy states for both tail and mask. If
-/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise,
-/// policy operand and tablegen flags drive the interpretation. (If policy
-/// operand is not present - there are a couple, thought we're rapidly
-/// removing them - a non-undefined policy defaults to "tail agnostic", and
-/// "mask undisturbed". Since this is the only variant with a mask, all
-/// other variants are "mask undefined".
+/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined".
+/// Otherwise, policy operand and tablegen flags drive the interpretation.
+/// (If policy operand is not present - there are a couple, though we're
+/// rapidly removing them - a non-undefined policy defaults to "tail
+/// agnostic", and "mask undisturbed". Since this is the only variant with
+/// a mask, all other variants are "mask undefined".
/// * Unsuffixed w/ both passthrough and policy operand. Can represent all
-/// three policy states. If passthrough is IMPLICIT_DEF, then represents
-/// "undefined". Otherwise, policy operand and tablegen flags drive the
-/// interpretation.
+/// three policy states. If passthrough is IMPLICIT_DEF (or NoReg), then
+/// represents "undefined". Otherwise, policy operand and tablegen flags
+/// drive the interpretation.
/// * Unsuffixed w/o passthrough or policy operand -- Does not have a
/// passthrough operand, and thus represents the "undefined" state. Note
/// that terminology in code frequently refers to these as "TA" which is
/// confusing. We're in the process of migrating away from this
/// representation.
/// * _TU w/o policy operand -- Has a passthrough operand, and always
-/// represents the tail undisturbed state.
+/// represents the tail undisturbed state.
/// * _TU w/policy operand - Can represent all three policy states. If
-/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise,
-/// policy operand and tablegen flags drive the interpretation.
+/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined".
+/// Otherwise, policy operand and tablegen flags drive the interpretation.
///
//===----------------------------------------------------------------------===//
@@ -81,9 +81,9 @@ def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
-// Operand that is allowed to be a register or a 5 bit immediate.
-// This allows us to pick between VSETIVLI and VSETVLI opcodes using the same
-// pseudo instructions.
+// Operand that is allowed to be a register other than X0, a 5 bit unsigned
+// immediate, or -1. -1 means VLMAX. This allows us to pick between VSETIVLI and
+// VSETVLI opcodes using the same pseudo instructions.
def AVL : RegisterOperand<GPRNoX0> {
let OperandNamespace = "RISCVOp";
let OperandType = "OPERAND_AVL";
@@ -115,16 +115,9 @@ class PseudoToVInst<string PseudoInst> {
["_E32", ""],
["_E16", ""],
["_E8", ""],
- ["_F64", "_F"],
- ["_F32", "_F"],
- ["_F16", "_F"],
- ["_VF64", "_VF"],
- ["_VF32", "_VF"],
- ["_VF16", "_VF"],
- ["_WF64", "_WF"],
- ["_WF32", "_WF"],
- ["_WF16", "_WF"],
- ["_TU", ""],
+ ["FPR64", "F"],
+ ["FPR32", "F"],
+ ["FPR16", "F"],
["_TIED", ""],
["_MASK", ""],
["_B64", ""],
@@ -141,7 +134,8 @@ class PseudoToVInst<string PseudoInst> {
["_M2", ""],
["_M4", ""],
["_M8", ""],
- ["_SE", ""]
+ ["_SE", ""],
+ ["_RM", ""]
];
string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst,
!subst(AffixSubst[0], AffixSubst[1], Acc));
@@ -189,7 +183,7 @@ defvar MxListFWRed = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
// Use for zext/sext.vf2
defvar MxListVF2 = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
-// Use for zext/sext.vf4
+// Use for zext/sext.vf4 and vector crypto instructions
defvar MxListVF4 = [V_MF2, V_M1, V_M2, V_M4, V_M8];
// Use for zext/sext.vf8
@@ -204,7 +198,7 @@ class MxSet<int eew> {
class FPR_Info<int sew> {
RegisterClass fprclass = !cast<RegisterClass>("FPR" # sew);
- string FX = "F" # sew;
+ string FX = "FPR" # sew;
int SEW = sew;
list<LMULInfo> MxList = MxSet<sew>.m;
list<LMULInfo> MxListFW = !if(!eq(sew, 64), [], !listremove(MxList, [V_M8]));
@@ -214,16 +208,20 @@ def SCALAR_F16 : FPR_Info<16>;
def SCALAR_F32 : FPR_Info<32>;
def SCALAR_F64 : FPR_Info<64>;
+// BF16 uses the same register class as F16.
+def SCALAR_BF16 : FPR_Info<16>;
+
defvar FPList = [SCALAR_F16, SCALAR_F32, SCALAR_F64];
// Used for widening instructions. It excludes F64.
defvar FPListW = [SCALAR_F16, SCALAR_F32];
+// Used for widening bf16 instructions.
+defvar BFPListW = [SCALAR_BF16];
+
class NFSet<LMULInfo m> {
- list<int> L = !cond(!eq(m.value, V_M8.value): [],
- !eq(m.value, V_M4.value): [2],
- !eq(m.value, V_M2.value): [2, 3, 4],
- true: [2, 3, 4, 5, 6, 7, 8]);
+ defvar lmul = !shl(1, m.value);
+ list<int> L = NFList<lmul>.L;
}
class octuple_to_str<int octuple> {
@@ -243,6 +241,8 @@ def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>;
// This must be kept in sync with RISCV::VLMaxSentinel.
def VLMax : OutPatFrag<(ops), (XLenVT -1)>;
+def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>;
+
// List of EEW.
defvar EEWList = [8, 16, 32, 64];
@@ -272,9 +272,10 @@ class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
OutPatFrag AVL = VLMax;
string ScalarSuffix = !cond(!eq(Scal, XLenVT) : "X",
- !eq(Scal, f16) : "F16",
- !eq(Scal, f32) : "F32",
- !eq(Scal, f64) : "F64");
+ !eq(Scal, f16) : "FPR16",
+ !eq(Scal, bf16) : "FPR16",
+ !eq(Scal, f32) : "FPR32",
+ !eq(Scal, f64) : "FPR64");
}
class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew,
@@ -356,6 +357,25 @@ defset list<VTypeInfo> AllVectors = {
}
}
+defset list<VTypeInfo> AllBFloatVectors = {
+ defset list<VTypeInfo> NoGroupBFloatVectors = {
+ defset list<VTypeInfo> FractionalGroupBFloatVectors = {
+ def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, VR, V_MF4, bf16, FPR16>;
+ def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, VR, V_MF2, bf16, FPR16>;
+ }
+ def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, VR, V_M1, bf16, FPR16>;
+ }
+
+ defset list<GroupVTypeInfo> GroupBFloatVectors = {
+ def VBF16M2: GroupVTypeInfo<vbfloat16m2_t, vbfloat16m1_t, vbool8_t, 16,
+ VRM2, V_M2, bf16, FPR16>;
+ def VBF16M4: GroupVTypeInfo<vbfloat16m4_t, vbfloat16m1_t, vbool4_t, 16,
+ VRM4, V_M4, bf16, FPR16>;
+ def VBF16M8: GroupVTypeInfo<vbfloat16m8_t, vbfloat16m1_t, vbool2_t, 16,
+ VRM8, V_M8, bf16, FPR16>;
+ }
+}
+
// This functor is used to obtain the int vector type that has the same SEW and
// multiplier as the input parameter type
class GetIntVTypeInfo<VTypeInfo vti> {
@@ -491,6 +511,14 @@ defset list<VTypeInfoToWide> AllWidenableIntToFloatVectors = {
def : VTypeInfoToWide<VI32M4, VF64M8>;
}
+defset list<VTypeInfoToWide> AllWidenableBFloatToFloatVectors = {
+ def : VTypeInfoToWide<VBF16MF4, VF32MF2>;
+ def : VTypeInfoToWide<VBF16MF2, VF32M1>;
+ def : VTypeInfoToWide<VBF16M1, VF32M2>;
+ def : VTypeInfoToWide<VBF16M2, VF32M4>;
+ def : VTypeInfoToWide<VBF16M4, VF32M8>;
+}
+
// This class holds the record of the RISCVVPseudoTable below.
// This represents the information we need in codegen for each pseudo.
// The definition should be consistent with `struct PseudoInfo` in
@@ -500,11 +528,21 @@ class RISCVVPseudo {
Instruction BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
// SEW = 0 is used to denote that the Pseudo is not SEW specific (or unknown).
bits<8> SEW = 0;
+ bit NeedBeInPseudoTable = 1;
+ // TargetOverlapConstraintType indicates that these instructions can
+ // overlap between source operands and destination operands.
+ // 1 -> default value, remain current constraint
+ // 2 -> narrow case
+ // 3 -> widen case
+ // TODO: Add TargetOverlapConstraintType into PseudosTable for further
+ // query.
+ bits<2> TargetOverlapConstraintType = 1;
}
// The actual table.
def RISCVVPseudosTable : GenericTable {
let FilterClass = "RISCVVPseudo";
+ let FilterClassField = "NeedBeInPseudoTable";
let CppTypeName = "PseudoInfo";
let Fields = [ "Pseudo", "BaseInstr" ];
let PrimaryKey = [ "Pseudo" ];
@@ -534,16 +572,17 @@ def RISCVVIntrinsicsTable : GenericTable {
// unmasked variant. For all but compares, both the masked and
// unmasked variant have a passthru and policy operand. For compares,
// neither has a policy op, and only the masked version has a passthru.
-class RISCVMaskedPseudo<bits<4> MaskIdx> {
+class RISCVMaskedPseudo<bits<4> MaskIdx, bit MaskAffectsRes=false> {
Pseudo MaskedPseudo = !cast<Pseudo>(NAME);
Pseudo UnmaskedPseudo = !cast<Pseudo>(!subst("_MASK", "", NAME));
bits<4> MaskOpIdx = MaskIdx;
+ bit MaskAffectsResult = MaskAffectsRes;
}
def RISCVMaskedPseudosTable : GenericTable {
let FilterClass = "RISCVMaskedPseudo";
let CppTypeName = "RISCVMaskedPseudoInfo";
- let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx"];
+ let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx", "MaskAffectsResult"];
let PrimaryKey = ["MaskedPseudo"];
let PrimaryKeyName = "getMaskedPseudoInfo";
}
@@ -723,16 +762,18 @@ class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins, int sew = 0> :
class GetVTypePredicates<VTypeInfo vti> {
list<Predicate> Predicates = !cond(!eq(vti.Scalar, f16) : [HasVInstructionsF16],
+ !eq(vti.Scalar, bf16) : [HasVInstructionsBF16],
!eq(vti.Scalar, f32) : [HasVInstructionsAnyF],
!eq(vti.Scalar, f64) : [HasVInstructionsF64],
!eq(vti.SEW, 64) : [HasVInstructionsI64],
true : [HasVInstructions]);
}
-class VPseudoUSLoadNoMask<VReg RetClass, int EEW> :
+class VPseudoUSLoadNoMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy),[]>,
+ ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -744,11 +785,12 @@ class VPseudoUSLoadNoMask<VReg RetClass, int EEW> :
let Constraints = "$rd = $dest";
}
-class VPseudoUSLoadMask<VReg RetClass, int EEW> :
+class VPseudoUSLoadMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- GPRMem:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPRMem:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -761,10 +803,11 @@ class VPseudoUSLoadMask<VReg RetClass, int EEW> :
let UsesMaskPolicy = 1;
}
-class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW> :
+class VPseudoUSLoadFFNoMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs RetClass:$rd, GPR:$vl),
(ins RetClass:$dest, GPRMem:$rs1, AVL:$avl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -776,11 +819,12 @@ class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW> :
let Constraints = "$rd = $dest";
}
-class VPseudoUSLoadFFMask<VReg RetClass, int EEW> :
+class VPseudoUSLoadFFMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- GPRMem:$rs1,
- VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPRMem:$rs1,
+ VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -793,10 +837,11 @@ class VPseudoUSLoadFFMask<VReg RetClass, int EEW> :
let UsesMaskPolicy = 1;
}
-class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
+class VPseudoSLoadNoMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -808,11 +853,12 @@ class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
let Constraints = "$rd = $dest";
}
-class VPseudoSLoadMask<VReg RetClass, int EEW>:
+class VPseudoSLoadMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- GPRMem:$rs1, GPR:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPRMem:$rs1, GPR:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -825,11 +871,16 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
let UsesMaskPolicy = 1;
}
-class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bit Ordered, bit EarlyClobber>:
+class VPseudoILoadNoMask<VReg RetClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bit Ordered,
+ bit EarlyClobber,
+ int TargetConstraintType = 1> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
@@ -839,29 +890,37 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest");
+ let TargetOverlapConstraintType = TargetConstraintType;
}
-class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bit Ordered, bit EarlyClobber>:
+class VPseudoILoadMask<VReg RetClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bit Ordered,
+ bit EarlyClobber,
+ int TargetConstraintType = 1> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- GPRMem:$rs1, IdxClass:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPRMem:$rs1, IdxClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $merge", "$rd = $merge");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
-class VPseudoUSStoreNoMask<VReg StClass, int EEW>:
+class VPseudoUSStoreNoMask<VReg StClass,
+ int EEW> :
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSE</*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -871,9 +930,11 @@ class VPseudoUSStoreNoMask<VReg StClass, int EEW>:
let HasSEWOp = 1;
}
-class VPseudoUSStoreMask<VReg StClass, int EEW>:
+class VPseudoUSStoreMask<VReg StClass,
+ int EEW> :
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSE</*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -883,9 +944,11 @@ class VPseudoUSStoreMask<VReg StClass, int EEW>:
let HasSEWOp = 1;
}
-class VPseudoSStoreNoMask<VReg StClass, int EEW>:
+class VPseudoSStoreNoMask<VReg StClass,
+ int EEW> :
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2,
+ AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSE</*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -895,9 +958,11 @@ class VPseudoSStoreNoMask<VReg StClass, int EEW>:
let HasSEWOp = 1;
}
-class VPseudoSStoreMask<VReg StClass, int EEW>:
+class VPseudoSStoreMask<VReg StClass,
+ int EEW> :
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSE</*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -907,10 +972,11 @@ class VPseudoSStoreMask<VReg StClass, int EEW>:
let HasSEWOp = 1;
}
-class VPseudoNullaryNoMask<VReg RegClass>:
+class VPseudoNullaryNoMask<VReg RegClass> :
Pseudo<(outs RegClass:$rd),
- (ins RegClass:$merge, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>, RISCVVPseudo {
+ (ins RegClass:$merge,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -920,10 +986,11 @@ class VPseudoNullaryNoMask<VReg RegClass>:
let HasVecPolicyOp = 1;
}
-class VPseudoNullaryMask<VReg RegClass>:
+class VPseudoNullaryMask<VReg RegClass> :
Pseudo<(outs GetVRegNoV0<RegClass>.R:$rd),
- (ins GetVRegNoV0<RegClass>.R:$merge, VMaskOp:$vm, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo {
+ (ins GetVRegNoV0<RegClass>.R:$merge,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -936,9 +1003,9 @@ class VPseudoNullaryMask<VReg RegClass>:
// Nullary for pseudo instructions. They are expanded in
// RISCVExpandPseudoInsts pass.
-class VPseudoNullaryPseudoM<string BaseInst>
- : Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+class VPseudoNullaryPseudoM<string BaseInst> :
+ Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -947,33 +1014,41 @@ class VPseudoNullaryPseudoM<string BaseInst>
// BaseInstr is not used in RISCVExpandPseudoInsts pass.
// Just fill a corresponding real v-inst to pass tablegen check.
let BaseInstr = !cast<Instruction>(BaseInst);
+ // We exclude them from RISCVVPseudoTable.
+ let NeedBeInPseudoTable = 0;
}
-class VPseudoUnaryNoMask<DAGOperand RetClass, DAGOperand OpClass,
- string Constraint = ""> :
+class VPseudoUnaryNoMask<DAGOperand RetClass,
+ DAGOperand OpClass,
+ string Constraint = "",
+ int TargetConstraintType = 1> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>,
- RISCVVPseudo {
+ (ins RetClass:$merge, OpClass:$rs2,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
}
-class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass, DAGOperand OpClass,
- string Constraint = ""> :
+class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass,
+ DAGOperand OpClass,
+ string Constraint = "",
+ int TargetConstraintType = 1> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$rm, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>,
- RISCVVPseudo {
+ (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -981,27 +1056,33 @@ class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass, DAGOperand OpClass,
let UsesVXRM = 0;
}
-class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+class VPseudoUnaryMask<VReg RetClass,
+ VReg OpClass,
+ string Constraint = "",
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
-class VPseudoUnaryMaskRoundingMode<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
- VMaskOp:$vm, ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+class VPseudoUnaryMaskRoundingMode<VReg RetClass,
+ VReg OpClass,
+ string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1014,10 +1095,12 @@ class VPseudoUnaryMaskRoundingMode<VReg RetClass, VReg OpClass, string Constrain
let UsesVXRM = 0;
}
-class VPseudoUnaryMask_NoExcept<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, VMaskOp:$vm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
+class VPseudoUnaryMask_NoExcept<VReg RetClass,
+ VReg OpClass,
+ string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1029,10 +1112,13 @@ class VPseudoUnaryMask_NoExcept<VReg RetClass, VReg OpClass, string Constraint =
let usesCustomInserter = 1;
}
-class VPseudoUnaryNoMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy), []> {
+class VPseudoUnaryNoMask_FRM<VReg RetClass,
+ VReg OpClass,
+ string Constraint = ""> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1040,13 +1126,17 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
- let usesCustomInserter = 1;
+ let HasRoundModeOp = 1;
}
-class VPseudoUnaryMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
- VMaskOp:$vm, ixlenimm:$frm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
+class VPseudoUnaryMask_FRM<VReg RetClass,
+ VReg OpClass,
+ string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, ixlenimm:$frm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1055,13 +1145,13 @@ class VPseudoUnaryMask_FRM<VReg RetClass, VReg OpClass, string Constraint = "">
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
- let usesCustomInserter = 1;
+ let HasRoundModeOp = 1;
}
class VPseudoUnaryNoMaskGPROut :
- Pseudo<(outs GPR:$rd),
- (ins VR:$rs2, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ Pseudo<(outs GPR:$rd),
+ (ins VR:$rs2, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1069,10 +1159,10 @@ class VPseudoUnaryNoMaskGPROut :
let HasSEWOp = 1;
}
-class VPseudoUnaryMaskGPROut:
- Pseudo<(outs GPR:$rd),
- (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+class VPseudoUnaryMaskGPROut :
+ Pseudo<(outs GPR:$rd),
+ (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1084,10 +1174,8 @@ class VPseudoUnaryMaskGPROut:
class VPseudoUnaryAnyMask<VReg RetClass,
VReg Op1Class> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge,
- Op1Class:$rs2,
- VR:$vm, AVL:$vl, ixlenimm:$sew),
- []>,
+ (ins RetClass:$merge, Op1Class:$rs2,
+ VR:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@@ -1100,14 +1188,16 @@ class VPseudoUnaryAnyMask<VReg RetClass,
class VPseudoBinaryNoMask<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = Constraint;
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
}
@@ -1115,15 +1205,17 @@ class VPseudoBinaryNoMask<VReg RetClass,
class VPseudoBinaryNoMaskTU<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1133,14 +1225,16 @@ class VPseudoBinaryNoMaskRoundingMode<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
string Constraint,
- int UsesVXRM_ = 1> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ int UsesVXRM_ = 1,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1152,16 +1246,18 @@ class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint,
- int UsesVXRM_> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, ixlenimm:$rm, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ int UsesVXRM_,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, ixlenimm:$rm, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1175,15 +1271,17 @@ class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass,
// This allows maskedoff and rs2 to be the same register.
class VPseudoTiedBinaryNoMask<VReg RetClass,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $rs2"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1193,17 +1291,19 @@ class VPseudoTiedBinaryNoMask<VReg RetClass,
class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs2, Op2Class:$rs1,
- ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs2, Op2Class:$rs1,
+ ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $rs2"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1216,7 +1316,8 @@ class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass,
class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered>:
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl,
+ ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
@@ -1229,7 +1330,8 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
class VPseudoIStoreMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered>:
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
@@ -1243,11 +1345,11 @@ class VPseudoBinaryMask<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1259,16 +1361,18 @@ class VPseudoBinaryMask<VReg RetClass,
class VPseudoBinaryMaskPolicy<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1279,11 +1383,11 @@ class VPseudoTernaryMaskPolicy<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1297,13 +1401,13 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm,
- ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm,
+ ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1319,14 +1423,16 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass,
class VPseudoBinaryMOutNoMask<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = Constraint;
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
}
@@ -1335,16 +1441,18 @@ class VPseudoBinaryMOutNoMask<VReg RetClass,
class VPseudoBinaryMOutMask<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let UsesMaskPolicy = 1;
@@ -1355,16 +1463,18 @@ class VPseudoBinaryMOutMask<VReg RetClass,
// This allows maskedoff and rs2 to be the same register.
class VPseudoTiedBinaryMask<VReg RetClass,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1374,18 +1484,20 @@ class VPseudoTiedBinaryMask<VReg RetClass,
class VPseudoTiedBinaryMaskRoundingMode<VReg RetClass,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op2Class:$rs1,
- VMaskOp:$vm,
- ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op2Class:$rs1,
+ VMaskOp:$vm,
+ ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1400,17 +1512,20 @@ class VPseudoBinaryCarryIn<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
bit CarryIn,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- !if(CarryIn,
- (ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl,
- ixlenimm:$sew),
- (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ !if(CarryIn,
+ (ins Op1Class:$rs2, Op2Class:$rs1,
+ VMV0:$carry, AVL:$vl, ixlenimm:$sew),
+ (ins Op1Class:$rs2, Op2Class:$rs1,
+ AVL:$vl, ixlenimm:$sew)), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = Constraint;
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let VLMul = MInfo.value;
@@ -1422,12 +1537,13 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass,
LMULInfo MInfo,
bit CarryIn,
string Constraint> :
- Pseudo<(outs RetClass:$rd),
- !if(CarryIn,
- (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl,
- ixlenimm:$sew),
- (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>,
- RISCVVPseudo {
+ Pseudo<(outs RetClass:$rd),
+ !if(CarryIn,
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1,
+ VMV0:$carry, AVL:$vl, ixlenimm:$sew),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1,
+ AVL:$vl, ixlenimm:$sew)), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1442,11 +1558,10 @@ class VPseudoTernaryNoMask<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
- AVL:$vl, ixlenimm:$sew),
- []>,
- RISCVVPseudo {
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1458,16 +1573,17 @@ class VPseudoTernaryNoMask<VReg RetClass,
class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),
- []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $rs3"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVecPolicyOp = 1;
let HasVLOp = 1;
let HasSEWOp = 1;
@@ -1476,16 +1592,17 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
class VPseudoTernaryNoMaskWithPolicyRoundingMode<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
- ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),
- []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $rs3"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVecPolicyOp = 1;
let HasVLOp = 1;
let HasSEWOp = 1;
@@ -1493,10 +1610,12 @@ class VPseudoTernaryNoMaskWithPolicyRoundingMode<VReg RetClass,
let UsesVXRM = 0;
}
-class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoUSSegLoadNoMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMem:$rs1, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1508,10 +1627,12 @@ class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
let Constraints = "$rd = $dest";
}
-class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoUSSegLoadMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1524,10 +1645,12 @@ class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
let UsesMaskPolicy = 1;
}
-class VPseudoUSSegLoadFFNoMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoUSSegLoadFFNoMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs RetClass:$rd, GPR:$vl),
(ins RetClass:$dest, GPRMem:$rs1, AVL:$avl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1539,10 +1662,12 @@ class VPseudoUSSegLoadFFNoMask<VReg RetClass, int EEW, bits<4> NF>:
let Constraints = "$rd = $dest";
}
-class VPseudoUSSegLoadFFMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoUSSegLoadFFMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
- VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1555,10 +1680,12 @@ class VPseudoUSSegLoadFFMask<VReg RetClass, int EEW, bits<4> NF>:
let UsesMaskPolicy = 1;
}
-class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoSSegLoadNoMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$merge, GPRMem:$rs1, GPR:$offset, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1570,11 +1697,13 @@ class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
let Constraints = "$rd = $merge";
}
-class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoSSegLoadMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy),[]>,
+ ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1587,11 +1716,15 @@ class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
let UsesMaskPolicy = 1;
}
-class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bits<4> NF, bit Ordered>:
+class VPseudoISegLoadNoMask<VReg RetClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bits<4> NF,
+ bit Ordered> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$merge, GPRMem:$rs1, IdxClass:$offset, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
@@ -1605,12 +1738,16 @@ class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasVecPolicyOp = 1;
}
-class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bits<4> NF, bit Ordered>:
+class VPseudoISegLoadMask<VReg RetClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bits<4> NF,
+ bit Ordered> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy),[]>,
+ ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
@@ -1625,9 +1762,11 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let UsesMaskPolicy = 1;
}
-class VPseudoUSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
+class VPseudoUSSegStoreNoMask<VReg ValClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs),
- (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -1637,10 +1776,12 @@ class VPseudoUSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
let HasSEWOp = 1;
}
-class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
+class VPseudoUSSegStoreMask<VReg ValClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -1650,9 +1791,12 @@ class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
let HasSEWOp = 1;
}
-class VPseudoSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
+class VPseudoSSegStoreNoMask<VReg ValClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs),
- (ins ValClass:$rd, GPRMem:$rs1, GPR: $offset, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins ValClass:$rd, GPRMem:$rs1, GPR:$offset,
+ AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -1662,10 +1806,12 @@ class VPseudoSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
let HasSEWOp = 1;
}
-class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
+class VPseudoSSegStoreMask<VReg ValClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1, GPR: $offset,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -1675,11 +1821,15 @@ class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
let HasSEWOp = 1;
}
-class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bits<4> NF, bit Ordered>:
+class VPseudoISegStoreNoMask<VReg ValClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bits<4> NF,
+ bit Ordered> :
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index,
- AVL:$vl, ixlenimm:$sew),[]>,
+ AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
@@ -1689,11 +1839,15 @@ class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL
let HasSEWOp = 1;
}
-class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bits<4> NF, bit Ordered>:
+class VPseudoISegStoreMask<VReg ValClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bits<4> NF,
+ bit Ordered> :
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
@@ -1782,15 +1936,16 @@ multiclass VPseudoILoad<bit Ordered> {
defvar Vreg = dataEMUL.vrclass;
defvar IdxVreg = idxEMUL.vrclass;
defvar HasConstraint = !ne(dataEEW, idxEEW);
- defvar Order = !if(Ordered, "O", "U");
+ defvar TypeConstraints =
+ !if(!eq(dataEEW, idxEEW), 1, !if(!gt(dataEEW, idxEEW), !if(!ge(idxEMULOctuple, 8), 3, 1), 2));
let VLMul = dataEMUL.value in {
def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
- VPseudoILoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint>,
- VLXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ VPseudoILoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint, TypeConstraints>,
+ VLXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
- VPseudoILoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint>,
+ VPseudoILoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint, TypeConstraints>,
RISCVMaskedPseudo<MaskIdx=3>,
- VLXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ VLXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
}
}
}
@@ -1853,14 +2008,13 @@ multiclass VPseudoIStore<bit Ordered> {
defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
defvar Vreg = dataEMUL.vrclass;
defvar IdxVreg = idxEMUL.vrclass;
- defvar Order = !if(Ordered, "O", "U");
let VLMul = dataEMUL.value in {
def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
VPseudoIStoreNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>,
- VSXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
VPseudoIStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>,
- VSXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
}
}
}
@@ -1871,13 +2025,11 @@ multiclass VPseudoIStore<bit Ordered> {
multiclass VPseudoVPOP_M {
foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
- defvar WriteVMPopV_MX = !cast<SchedWrite>("WriteVMPopV_" # mx);
- defvar ReadVMPopV_MX = !cast<SchedRead>("ReadVMPopV_" # mx);
let VLMul = mti.LMul.value in {
def "_M_" # mti.BX : VPseudoUnaryNoMaskGPROut,
- Sched<[WriteVMPopV_MX, ReadVMPopV_MX, ReadVMPopV_MX]>;
+ SchedBinary<"WriteVMPopV", "ReadVMPopV", "ReadVMPopV", mx>;
def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMaskGPROut,
- Sched<[WriteVMPopV_MX, ReadVMPopV_MX, ReadVMPopV_MX]>;
+ SchedBinary<"WriteVMPopV", "ReadVMPopV", "ReadVMPopV", mx>;
}
}
}
@@ -1885,13 +2037,11 @@ multiclass VPseudoVPOP_M {
multiclass VPseudoV1ST_M {
foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
- defvar WriteVMFFSV_MX = !cast<SchedWrite>("WriteVMFFSV_" # mx);
- defvar ReadVMFFSV_MX = !cast<SchedRead>("ReadVMFFSV_" # mx);
let VLMul = mti.LMul.value in {
- def "_M_" # mti.BX : VPseudoUnaryNoMaskGPROut,
- Sched<[WriteVMFFSV_MX, ReadVMFFSV_MX, ReadVMFFSV_MX]>;
+ def "_M_" #mti.BX : VPseudoUnaryNoMaskGPROut,
+ SchedBinary<"WriteVMFFSV", "ReadVMFFSV", "ReadVMFFSV", mx>;
def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMaskGPROut,
- Sched<[WriteVMFFSV_MX, ReadVMFFSV_MX, ReadVMFFSV_MX]>;
+ SchedBinary<"WriteVMFFSV", "ReadVMFFSV", "ReadVMFFSV", mx>;
}
}
}
@@ -1900,13 +2050,13 @@ multiclass VPseudoVSFS_M {
defvar constraint = "@earlyclobber $rd";
foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
- defvar WriteVMSFSV_MX = !cast<SchedWrite>("WriteVMSFSV_" # mx);
- defvar ReadVMSFSV_MX = !cast<SchedRead>("ReadVMSFSV_" # mx);
let VLMul = mti.LMul.value in {
def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>,
- Sched<[WriteVMSFSV_MX, ReadVMSFSV_MX, ReadVMask]>;
+ SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx,
+ forceMergeOpRead=true>;
def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>,
- Sched<[WriteVMSFSV_MX, ReadVMSFSV_MX, ReadVMask]>;
+ SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx,
+ forceMergeOpRead=true>;
}
}
}
@@ -1914,28 +2064,22 @@ multiclass VPseudoVSFS_M {
multiclass VPseudoVID_V {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVMIdxV_MX = !cast<SchedWrite>("WriteVMIdxV_" # mx);
- defvar ReadVMIdxV_MX = !cast<SchedRead>("ReadVMIdxV_" # mx);
-
let VLMul = m.value in {
- def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>,
- Sched<[WriteVMIdxV_MX, ReadVMask]>;
- def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>,
+ def "_V_" # mx : VPseudoNullaryNoMask<m.vrclass>,
+ SchedNullary<"WriteVMIdxV", mx, forceMergeOpRead=true>;
+ def "_V_" # mx # "_MASK" : VPseudoNullaryMask<m.vrclass>,
RISCVMaskedPseudo<MaskIdx=1>,
- Sched<[WriteVMIdxV_MX, ReadVMask]>;
+ SchedNullary<"WriteVMIdxV", mx,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoNullaryPseudoM <string BaseInst> {
foreach mti = AllMasks in {
- defvar mx = mti.LMul.MX;
- defvar WriteVMALUV_MX = !cast<SchedWrite>("WriteVMALUV_" # mx);
- defvar ReadVMALUV_MX = !cast<SchedRead>("ReadVMALUV_" # mx);
-
let VLMul = mti.LMul.value in {
def "_M_" # mti.BX : VPseudoNullaryPseudoM<BaseInst # "_MM">,
- Sched<[WriteVMALUV_MX, ReadVMALUV_MX, ReadVMALUV_MX]>;
+ SchedBinary<"WriteVMALUV", "ReadVMALUV", "ReadVMALUV", mti.LMul.MX>;
}
}
}
@@ -1944,14 +2088,14 @@ multiclass VPseudoVIOT_M {
defvar constraint = "@earlyclobber $rd";
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVMIotV_MX = !cast<SchedWrite>("WriteVMIotV_" # mx);
- defvar ReadVMIotV_MX = !cast<SchedRead>("ReadVMIotV_" # mx);
let VLMul = m.value in {
- def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>,
- Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>,
- RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>;
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, VR, constraint>,
+ SchedUnary<"WriteVMIotV", "ReadVMIotV", mx,
+ forceMergeOpRead=true>;
+ def "_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>,
+ RISCVMaskedPseudo<MaskIdx=2, MaskAffectsRes=true>,
+ SchedUnary<"WriteVMIotV", "ReadVMIotV", mx,
+ forceMergeOpRead=true>;
}
}
}
@@ -1963,12 +2107,11 @@ multiclass VPseudoVCPR_V {
let VLMul = m.value in
foreach e = sews in {
defvar suffix = "_" # m.MX # "_E" # e;
- defvar WriteVCompressV_MX_E = !cast<SchedWrite>("WriteVCompressV" # suffix);
- defvar ReadVCompressV_MX_E = !cast<SchedRead>("ReadVCompressV" # suffix);
-
let SEW = e in
- def _VM # suffix : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>,
- Sched<[WriteVCompressV_MX_E, ReadVCompressV_MX_E, ReadVCompressV_MX_E]>;
+ def _VM # suffix
+ : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>,
+ SchedBinary<"WriteVCompressV", "ReadVCompressV", "ReadVCompressV",
+ mx, e>;
}
}
}
@@ -1978,33 +2121,50 @@ multiclass VPseudoBinary<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
- int sew = 0> {
+ int sew = 0,
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value, SEW=sew in {
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
- Constraint>;
+ Constraint, TargetConstraintType>;
def suffix # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
- Constraint>,
+ Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
+multiclass VPseudoBinaryNoMask<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = "",
+ int sew = 0> {
+ let VLMul = MInfo.value, SEW=sew in {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
+ Constraint>;
+ }
+}
+
multiclass VPseudoBinaryRoundingMode<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
int sew = 0,
- int UsesVXRM = 1> {
+ int UsesVXRM = 1,
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value, SEW=sew in {
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
def suffix : VPseudoBinaryNoMaskRoundingMode<RetClass, Op1Class, Op2Class,
- Constraint, UsesVXRM>;
+ Constraint, UsesVXRM,
+ TargetConstraintType>;
def suffix # "_MASK" : VPseudoBinaryMaskPolicyRoundingMode<RetClass,
Op1Class,
Op2Class,
Constraint,
- UsesVXRM>,
+ UsesVXRM,
+ TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -2014,13 +2174,14 @@ multiclass VPseudoBinaryM<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class,
- Constraint>;
+ Constraint, TargetConstraintType>;
let ForceTailAgnostic = true in
def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMOutMask<RetClass, Op1Class,
- Op2Class, Constraint>,
+ Op2Class, Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -2045,24 +2206,26 @@ multiclass VPseudoBinaryEmul<VReg RetClass,
multiclass VPseudoTiedBinary<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask<RetClass, Op2Class,
- Constraint>;
+ Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask<RetClass, Op2Class,
- Constraint>;
+ Constraint, TargetConstraintType>;
}
}
multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX # "_TIED":
- VPseudoTiedBinaryNoMaskRoundingMode<RetClass, Op2Class, Constraint>;
+ VPseudoTiedBinaryNoMaskRoundingMode<RetClass, Op2Class, Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK_TIED" :
- VPseudoTiedBinaryMaskRoundingMode<RetClass, Op2Class, Constraint>;
+ VPseudoTiedBinaryMaskRoundingMode<RetClass, Op2Class, Constraint, TargetConstraintType>;
}
}
@@ -2098,11 +2261,11 @@ multiclass VPseudoVGTR_VV_EEW<int eew, string Constraint = ""> {
defvar emul = !cast<LMULInfo>("V_" # emulMX);
defvar sews = SchedSEWSet<mx>.val;
foreach e = sews in {
- defvar WriteVRGatherVV_MX_E = !cast<SchedWrite>("WriteVRGatherVV_" # mx # "_E" # e);
- defvar ReadVRGatherVV_data_MX_E = !cast<SchedRead>("ReadVRGatherVV_data_" # mx # "_E" # e);
- defvar ReadVRGatherVV_index_MX_E = !cast<SchedRead>("ReadVRGatherVV_index_" # mx # "_E" # e);
- defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint, e>,
- Sched<[WriteVRGatherVV_MX_E, ReadVRGatherVV_data_MX_E, ReadVRGatherVV_index_MX_E]>;
+ defm _VV
+ : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul,
+ Constraint, e>,
+ SchedBinary<"WriteVRGatherVV", "ReadVRGatherVV_data",
+ "ReadVRGatherVV_index", mx, e, forceMergeOpRead=true>;
}
}
}
@@ -2119,13 +2282,9 @@ multiclass VPseudoBinaryV_VX_RM<LMULInfo m, string Constraint = ""> {
multiclass VPseudoVSLD1_VX<string Constraint = ""> {
foreach m = MxList in {
- defvar mx = m.MX;
- defvar WriteVISlide1X_MX = !cast<SchedWrite>("WriteVISlide1X_" # mx);
- defvar ReadVISlideV_MX = !cast<SchedRead>("ReadVISlideV_" # mx);
- defvar ReadVISlideX_MX = !cast<SchedRead>("ReadVISlideX_" # mx);
-
defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>,
- Sched<[WriteVISlide1X_MX, ReadVISlideV_MX, ReadVISlideX_MX, ReadVMask]>;
+ SchedBinary<"WriteVISlide1X", "ReadVISlideV", "ReadVISlideX",
+ m.MX, forceMergeOpRead=true>;
}
}
@@ -2143,14 +2302,10 @@ multiclass VPseudoBinaryV_VF_RM<LMULInfo m, FPR_Info f, string Constraint = "",
multiclass VPseudoVSLD1_VF<string Constraint = ""> {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFSlide1F_MX = !cast<SchedWrite>("WriteVFSlide1F_" # mx);
- defvar ReadVFSlideV_MX = !cast<SchedRead>("ReadVFSlideV_" # mx);
- defvar ReadVFSlideF_MX = !cast<SchedRead>("ReadVFSlideF_" # mx);
-
- defm "_V" # f.FX :
- VPseudoBinary<m.vrclass, m.vrclass, f.fprclass, m, Constraint>,
- Sched<[WriteVFSlide1F_MX, ReadVFSlideV_MX, ReadVFSlideF_MX, ReadVMask]>;
+ defm "_V" #f.FX
+ : VPseudoBinary<m.vrclass, m.vrclass, f.fprclass, m, Constraint>,
+ SchedBinary<"WriteVFSlide1F", "ReadVFSlideV", "ReadVFSlideF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2163,15 +2318,12 @@ multiclass VPseudoBinaryV_VI_RM<Operand ImmType = simm5, LMULInfo m, string Cons
defm _VI : VPseudoBinaryRoundingMode<m.vrclass, m.vrclass, ImmType, m, Constraint>;
}
-multiclass VPseudoVALU_MM {
+multiclass VPseudoVALU_MM<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVMALUV_MX = !cast<SchedWrite>("WriteVMALUV_" # mx);
- defvar ReadVMALUV_MX = !cast<SchedRead>("ReadVMALUV_" # mx);
-
- let VLMul = m.value in {
+ let VLMul = m.value, isCommutable = Commutable in {
def "_MM_" # mx : VPseudoBinaryNoMask<VR, VR, VR, "">,
- Sched<[WriteVMALUV_MX, ReadVMALUV_MX, ReadVMALUV_MX]>;
+ SchedBinary<"WriteVMALUV", "ReadVMALUV", "ReadVMALUV", mx>;
}
}
}
@@ -2185,17 +2337,23 @@ multiclass VPseudoVALU_MM {
// destination register group is legal. Otherwise, it is illegal.
multiclass VPseudoBinaryW_VV<LMULInfo m> {
defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_VV_RM<LMULInfo m> {
defm _VV : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m,
- "@earlyclobber $rd", UsesVXRM=0>;
+ "@earlyclobber $rd", UsesVXRM=0,
+ TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_VX<LMULInfo m> {
defm "_VX" : VPseudoBinary<m.wvrclass, m.vrclass, GPR, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
+}
+
+multiclass VPseudoBinaryW_VI<Operand ImmType, LMULInfo m> {
+ defm "_VI" : VPseudoBinary<m.wvrclass, m.vrclass, ImmType, m,
+ "@earlyclobber $rd", TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_VF<LMULInfo m, FPR_Info f> {
@@ -2208,36 +2366,40 @@ multiclass VPseudoBinaryW_VF_RM<LMULInfo m, FPR_Info f> {
defm "_V" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass,
f.fprclass, m,
"@earlyclobber $rd",
- UsesVXRM=0>;
+ UsesVXRM=0,
+ TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_WV<LMULInfo m> {
defm _WV : VPseudoBinary<m.wvrclass, m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
defm _WV : VPseudoTiedBinary<m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_WV_RM<LMULInfo m> {
defm _WV : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd", UsesVXRM=0>;
+ "@earlyclobber $rd", UsesVXRM=0, TargetConstraintType=3>;
defm _WV : VPseudoTiedBinaryRoundingMode<m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_WX<LMULInfo m> {
- defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m>;
+ defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m, /*Constraint*/ "", TargetConstraintType=3>;
}
-multiclass VPseudoBinaryW_WF<LMULInfo m, FPR_Info f> {
+multiclass VPseudoBinaryW_WF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> {
defm "_W" # f.FX : VPseudoBinary<m.wvrclass, m.wvrclass,
- f.fprclass, m>;
+ f.fprclass, m, /*Constraint*/ "", TargetConstraintType=TargetConstraintType>;
}
multiclass VPseudoBinaryW_WF_RM<LMULInfo m, FPR_Info f> {
defm "_W" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass,
f.fprclass, m,
- UsesVXRM=0>;
+ Constraint="",
+ sew=0,
+ UsesVXRM=0,
+ TargetConstraintType=3>;
}
// Narrowing instructions like vnsrl/vnsra/vnclip(u) don't need @earlyclobber
@@ -2245,9 +2407,9 @@ multiclass VPseudoBinaryW_WF_RM<LMULInfo m, FPR_Info f> {
// exception from the spec.
// "The destination EEW is smaller than the source EEW and the overlap is in the
// lowest-numbered part of the source register group."
-multiclass VPseudoBinaryV_WV<LMULInfo m> {
+multiclass VPseudoBinaryV_WV<LMULInfo m, int TargetConstraintType = 1> {
defm _WV : VPseudoBinary<m.vrclass, m.wvrclass, m.vrclass, m,
- !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 8), "@earlyclobber $rd", ""), TargetConstraintType=TargetConstraintType>;
}
multiclass VPseudoBinaryV_WV_RM<LMULInfo m> {
@@ -2256,9 +2418,9 @@ multiclass VPseudoBinaryV_WV_RM<LMULInfo m> {
"@earlyclobber $rd", "")>;
}
-multiclass VPseudoBinaryV_WX<LMULInfo m> {
+multiclass VPseudoBinaryV_WX<LMULInfo m, int TargetConstraintType = 1> {
defm _WX : VPseudoBinary<m.vrclass, m.wvrclass, GPR, m,
- !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 8), "@earlyclobber $rd", ""), TargetConstraintType=TargetConstraintType>;
}
multiclass VPseudoBinaryV_WX_RM<LMULInfo m> {
@@ -2267,9 +2429,9 @@ multiclass VPseudoBinaryV_WX_RM<LMULInfo m> {
"@earlyclobber $rd", "")>;
}
-multiclass VPseudoBinaryV_WI<LMULInfo m> {
+multiclass VPseudoBinaryV_WI<LMULInfo m, int TargetConstraintType = 1> {
defm _WI : VPseudoBinary<m.vrclass, m.wvrclass, uimm5, m,
- !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 8), "@earlyclobber $rd", ""), TargetConstraintType=TargetConstraintType>;
}
multiclass VPseudoBinaryV_WI_RM<LMULInfo m> {
@@ -2282,12 +2444,15 @@ multiclass VPseudoBinaryV_WI_RM<LMULInfo m> {
// vector register is v0.
// For vadc and vsbc, CarryIn == 1 and CarryOut == 0
multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
- string Constraint = ""> {
+ string Constraint = "",
+ bit Commutable = 0,
+ int TargetConstraintType = 1> {
+ let isCommutable = Commutable in
def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX :
VPseudoBinaryCarryIn<!if(CarryOut, VR,
!if(!and(CarryIn, !not(CarryOut)),
GetVRegNoV0<m.vrclass>.R, m.vrclass)),
- m.vrclass, m.vrclass, m, CarryIn, Constraint>;
+ m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>;
}
multiclass VPseudoTiedBinaryV_VM<LMULInfo m> {
@@ -2297,12 +2462,12 @@ multiclass VPseudoTiedBinaryV_VM<LMULInfo m> {
}
multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
- string Constraint = ""> {
+ string Constraint = "", int TargetConstraintType = 1> {
def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX :
VPseudoBinaryCarryIn<!if(CarryOut, VR,
!if(!and(CarryIn, !not(CarryOut)),
GetVRegNoV0<m.vrclass>.R, m.vrclass)),
- m.vrclass, GPR, m, CarryIn, Constraint>;
+ m.vrclass, GPR, m, CarryIn, Constraint, TargetConstraintType>;
}
multiclass VPseudoTiedBinaryV_XM<LMULInfo m> {
@@ -2315,25 +2480,23 @@ multiclass VPseudoVMRG_FM {
foreach f = FPList in {
foreach m = f.MxList in {
defvar mx = m.MX;
- defvar WriteVFMergeV_MX = !cast<SchedWrite>("WriteVFMergeV_" # mx);
- defvar ReadVFMergeV_MX = !cast<SchedRead>("ReadVFMergeV_" # mx);
- defvar ReadVFMergeF_MX = !cast<SchedRead>("ReadVFMergeF_" # mx);
-
- def "_V" # f.FX # "M_" # mx:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, f.fprclass, m, CarryIn=1, Constraint="">,
- Sched<[WriteVFMergeV_MX, ReadVFMergeV_MX, ReadVFMergeF_MX, ReadVMask]>;
+ def "_V" # f.FX # "M_" # mx
+ : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, m.vrclass,
+ f.fprclass, m, CarryIn=1,
+ Constraint = "">,
+ SchedBinary<"WriteVFMergeV", "ReadVFMergeV", "ReadVFMergeF", mx,
+ forceMasked=1, forceMergeOpRead=true>;
}
}
}
multiclass VPseudoBinaryV_IM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
- string Constraint = ""> {
+ string Constraint = "", int TargetConstraintType = 1> {
def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX :
VPseudoBinaryCarryIn<!if(CarryOut, VR,
!if(!and(CarryIn, !not(CarryOut)),
GetVRegNoV0<m.vrclass>.R, m.vrclass)),
- m.vrclass, simm5, m, CarryIn, Constraint>;
+ m.vrclass, simm5, m, CarryIn, Constraint, TargetConstraintType>;
}
multiclass VPseudoTiedBinaryV_IM<LMULInfo m> {
@@ -2346,19 +2509,16 @@ multiclass VPseudoUnaryVMV_V_X_I {
foreach m = MxList in {
let VLMul = m.value in {
defvar mx = m.MX;
- defvar WriteVIMovV_MX = !cast<SchedWrite>("WriteVIMovV_" # mx);
- defvar WriteVIMovX_MX = !cast<SchedWrite>("WriteVIMovX_" # mx);
- defvar WriteVIMovI_MX = !cast<SchedWrite>("WriteVIMovI_" # mx);
- defvar ReadVIMovV_MX = !cast<SchedRead>("ReadVIMovV_" # mx);
- defvar ReadVIMovX_MX = !cast<SchedRead>("ReadVIMovX_" # mx);
-
let VLMul = m.value in {
def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
- Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>;
+ SchedUnary<"WriteVIMovV", "ReadVIMovV", mx,
+ forceMergeOpRead=true>;
def "_X_" # mx : VPseudoUnaryNoMask<m.vrclass, GPR>,
- Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>;
+ SchedUnary<"WriteVIMovX", "ReadVIMovX", mx,
+ forceMergeOpRead=true>;
def "_I_" # mx : VPseudoUnaryNoMask<m.vrclass, simm5>,
- Sched<[WriteVIMovI_MX]>;
+ SchedNullary<"WriteVIMovI", mx,
+ forceMergeOpRead=true>;
}
}
}
@@ -2368,13 +2528,10 @@ multiclass VPseudoVMV_F {
foreach f = FPList in {
foreach m = f.MxList in {
defvar mx = m.MX;
- defvar WriteVFMovV_MX = !cast<SchedWrite>("WriteVFMovV_" # mx);
- defvar ReadVFMovF_MX = !cast<SchedRead>("ReadVFMovF_" # mx);
-
let VLMul = m.value in {
def "_" # f.FX # "_" # mx :
VPseudoUnaryNoMask<m.vrclass, f.fprclass>,
- Sched<[WriteVFMovV_MX, ReadVFMovF_MX]>;
+ SchedUnary<"WriteVFMovV", "ReadVFMovF", mx, forceMergeOpRead=true>;
}
}
}
@@ -2383,15 +2540,14 @@ multiclass VPseudoVMV_F {
multiclass VPseudoVCLS_V {
foreach m = MxListF in {
defvar mx = m.MX;
- defvar WriteVFClassV_MX = !cast<SchedWrite>("WriteVFClassV_" # mx);
- defvar ReadVFClassV_MX = !cast<SchedRead>("ReadVFClassV_" # mx);
-
let VLMul = m.value in {
def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
- Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFClassV", "ReadVFClassV", mx,
+ forceMergeOpRead=true>;
def "_V_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFClassV", "ReadVFClassV", mx,
+ forceMergeOpRead=true>;
}
}
}
@@ -2404,17 +2560,15 @@ multiclass VPseudoVSQR_V_RM {
let VLMul = m.value in
foreach e = sews in {
defvar suffix = "_" # mx # "_E" # e;
- defvar WriteVFSqrtV_MX_E = !cast<SchedWrite>("WriteVFSqrtV" # suffix);
- defvar ReadVFSqrtV_MX_E = !cast<SchedRead>("ReadVFSqrtV" # suffix);
-
let SEW = e in {
def "_V" # suffix : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
- Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E,
- ReadVMask]>;
- def "_V" # suffix # "_MASK" : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E,
- ReadVMask]>;
+ SchedUnary<"WriteVFSqrtV", "ReadVFSqrtV", mx, e,
+ forceMergeOpRead=true>;
+ def "_V" #suffix # "_MASK"
+ : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFSqrtV", "ReadVFSqrtV", mx, e,
+ forceMergeOpRead=true>;
}
}
}
@@ -2423,15 +2577,14 @@ multiclass VPseudoVSQR_V_RM {
multiclass VPseudoVRCP_V {
foreach m = MxListF in {
defvar mx = m.MX;
- defvar WriteVFRecpV_MX = !cast<SchedWrite>("WriteVFRecpV_" # mx);
- defvar ReadVFRecpV_MX = !cast<SchedRead>("ReadVFRecpV_" # mx);
-
let VLMul = m.value in {
- def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
- Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
- def "_V_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
+ def "_V_" # mx
+ : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
+ def "_V_" # mx # "_MASK"
+ : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
}
}
}
@@ -2439,69 +2592,59 @@ multiclass VPseudoVRCP_V {
multiclass VPseudoVRCP_V_RM {
foreach m = MxListF in {
defvar mx = m.MX;
- defvar WriteVFRecpV_MX = !cast<SchedWrite>("WriteVFRecpV_" # mx);
- defvar ReadVFRecpV_MX = !cast<SchedRead>("ReadVFRecpV_" # mx);
-
let VLMul = m.value in {
- def "_V_" # mx : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
- Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
- def "_V_" # mx # "_MASK" : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
+ def "_V_" # mx
+ : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
+ def "_V_" # mx # "_MASK"
+ : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
}
}
}
-multiclass PseudoVEXT_VF2 {
+multiclass PseudoVEXT_VF2<int TargetConstraintType = 1> {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF2 in {
defvar mx = m.MX;
- defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx);
- defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx);
-
let VLMul = m.value in {
- def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>,
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints>,
+ VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
}
}
}
-multiclass PseudoVEXT_VF4 {
+multiclass PseudoVEXT_VF4<int TargetConstraintType = 1> {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF4 in {
defvar mx = m.MX;
- defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx);
- defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx);
-
let VLMul = m.value in {
- def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>,
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints>,
+ VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
}
}
}
-multiclass PseudoVEXT_VF8 {
+multiclass PseudoVEXT_VF8<int TargetConstraintType = 1> {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF8 in {
defvar mx = m.MX;
- defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx);
- defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx);
-
let VLMul = m.value in {
- def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>,
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints>,
+ VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
}
}
}
@@ -2517,51 +2660,43 @@ multiclass PseudoVEXT_VF8 {
// lowest-numbered part of the source register group".
// With LMUL<=1 the source and dest occupy a single register so any overlap
// is in the lowest-numbered part.
-multiclass VPseudoBinaryM_VV<LMULInfo m> {
+multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> {
defm _VV : VPseudoBinaryM<VR, m.vrclass, m.vrclass, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
}
-multiclass VPseudoBinaryM_VX<LMULInfo m> {
+multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> {
defm "_VX" :
VPseudoBinaryM<VR, m.vrclass, GPR, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
}
-multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f> {
+multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> {
defm "_V" # f.FX :
VPseudoBinaryM<VR, m.vrclass, f.fprclass, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
}
-multiclass VPseudoBinaryM_VI<LMULInfo m> {
+multiclass VPseudoBinaryM_VI<LMULInfo m, int TargetConstraintType = 1> {
defm _VI : VPseudoBinaryM<VR, m.vrclass, simm5, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
}
multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVRGatherVX_MX = !cast<SchedWrite>("WriteVRGatherVX_" # mx);
- defvar WriteVRGatherVI_MX = !cast<SchedWrite>("WriteVRGatherVI_" # mx);
- defvar ReadVRGatherVX_data_MX = !cast<SchedRead>("ReadVRGatherVX_data_" # mx);
- defvar ReadVRGatherVX_index_MX = !cast<SchedRead>("ReadVRGatherVX_index_" # mx);
- defvar ReadVRGatherVI_data_MX = !cast<SchedRead>("ReadVRGatherVI_data_" # mx);
-
defm "" : VPseudoBinaryV_VX<m, Constraint>,
- Sched<[WriteVRGatherVX_MX, ReadVRGatherVX_data_MX,
- ReadVRGatherVX_index_MX, ReadVMask]>;
+ SchedBinary<"WriteVRGatherVX", "ReadVRGatherVX_data",
+ "ReadVRGatherVX_index", mx, forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
- Sched<[WriteVRGatherVI_MX, ReadVRGatherVI_data_MX, ReadVMask]>;
+ SchedUnary<"WriteVRGatherVI", "ReadVRGatherVI_data", mx,
+ forceMergeOpRead=true>;
defvar sews = SchedSEWSet<mx>.val;
foreach e = sews in {
- defvar WriteVRGatherVV_MX_E = !cast<SchedWrite>("WriteVRGatherVV_" # mx # "_E" # e);
- defvar ReadVRGatherVV_data_MX_E = !cast<SchedRead>("ReadVRGatherVV_data_" # mx # "_E" # e);
- defvar ReadVRGatherVV_index_MX_E = !cast<SchedRead>("ReadVRGatherVV_index_" # mx # "_E" # e);
defm "" : VPseudoBinaryV_VV<m, Constraint, e>,
- Sched<[WriteVRGatherVV_MX_E, ReadVRGatherVV_data_MX_E,
- ReadVRGatherVV_index_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVRGatherVV", "ReadVRGatherVV_data",
+ "ReadVRGatherVV_index", mx, e, forceMergeOpRead=true>;
}
}
}
@@ -2569,18 +2704,14 @@ multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = "">
multiclass VPseudoVSALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVSALUV_MX = !cast<SchedWrite>("WriteVSALUV_" # mx);
- defvar WriteVSALUX_MX = !cast<SchedWrite>("WriteVSALUX_" # mx);
- defvar WriteVSALUI_MX = !cast<SchedWrite>("WriteVSALUI_" # mx);
- defvar ReadVSALUV_MX = !cast<SchedRead>("ReadVSALUV_" # mx);
- defvar ReadVSALUX_MX = !cast<SchedRead>("ReadVSALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m, Constraint>,
- Sched<[WriteVSALUV_MX, ReadVSALUV_MX, ReadVSALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVSALUV", "ReadVSALUV", "ReadVSALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
- Sched<[WriteVSALUX_MX, ReadVSALUV_MX, ReadVSALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
- Sched<[WriteVSALUI_MX, ReadVSALUV_MX, ReadVMask]>;
+ SchedUnary<"WriteVSALUI", "ReadVSALUV", mx, forceMergeOpRead=true>;
}
}
@@ -2588,129 +2719,98 @@ multiclass VPseudoVSALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""
multiclass VPseudoVSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVShiftV_MX = !cast<SchedWrite>("WriteVShiftV_" # mx);
- defvar WriteVShiftX_MX = !cast<SchedWrite>("WriteVShiftX_" # mx);
- defvar WriteVShiftI_MX = !cast<SchedWrite>("WriteVShiftI_" # mx);
- defvar ReadVShiftV_MX = !cast<SchedRead>("ReadVShiftV_" # mx);
- defvar ReadVShiftX_MX = !cast<SchedRead>("ReadVShiftX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m, Constraint>,
- Sched<[WriteVShiftV_MX, ReadVShiftV_MX, ReadVShiftV_MX, ReadVMask]>;
+ SchedBinary<"WriteVShiftV", "ReadVShiftV", "ReadVShiftV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
- Sched<[WriteVShiftX_MX, ReadVShiftV_MX, ReadVShiftX_MX, ReadVMask]>;
+ SchedBinary<"WriteVShiftX", "ReadVShiftV", "ReadVShiftX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
- Sched<[WriteVShiftI_MX, ReadVShiftV_MX, ReadVMask]>;
+ SchedUnary<"WriteVShiftI", "ReadVShiftV", mx, forceMergeOpRead=true>;
}
}
multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVSShiftV_MX = !cast<SchedWrite>("WriteVSShiftV_" # mx);
- defvar WriteVSShiftX_MX = !cast<SchedWrite>("WriteVSShiftX_" # mx);
- defvar WriteVSShiftI_MX = !cast<SchedWrite>("WriteVSShiftI_" # mx);
- defvar ReadVSShiftV_MX = !cast<SchedRead>("ReadVSShiftV_" # mx);
- defvar ReadVSShiftX_MX = !cast<SchedRead>("ReadVSShiftX_" # mx);
-
defm "" : VPseudoBinaryV_VV_RM<m, Constraint>,
- Sched<[WriteVSShiftV_MX, ReadVSShiftV_MX, ReadVSShiftV_MX, ReadVMask]>;
+ SchedBinary<"WriteVSShiftV", "ReadVSShiftV", "ReadVSShiftV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX_RM<m, Constraint>,
- Sched<[WriteVSShiftX_MX, ReadVSShiftV_MX, ReadVSShiftX_MX, ReadVMask]>;
+ SchedBinary<"WriteVSShiftX", "ReadVSShiftV", "ReadVSShiftX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI_RM<ImmType, m, Constraint>,
- Sched<[WriteVSShiftI_MX, ReadVSShiftV_MX, ReadVMask]>;
+ SchedUnary<"WriteVSShiftI", "ReadVSShiftV", mx, forceMergeOpRead=true>;
}
}
multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
- defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUX_" # mx);
- defvar WriteVIALUI_MX = !cast<SchedWrite>("WriteVIALUI_" # mx);
- defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
- defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m, Constraint>,
- Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
- Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
- Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>;
+ SchedUnary<"WriteVIALUI", "ReadVIALUV", mx, forceMergeOpRead=true>;
}
}
multiclass VPseudoVSALU_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVSALUV_MX = !cast<SchedWrite>("WriteVSALUV_" # mx);
- defvar WriteVSALUX_MX = !cast<SchedWrite>("WriteVSALUX_" # mx);
- defvar ReadVSALUV_MX = !cast<SchedRead>("ReadVSALUV_" # mx);
- defvar ReadVSALUX_MX = !cast<SchedRead>("ReadVSALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVSALUV_MX, ReadVSALUV_MX, ReadVSALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVSALUV", "ReadVSALUV", "ReadVSALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVSALUX_MX, ReadVSALUV_MX, ReadVSALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVSMUL_VV_VX_RM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVSMulV_MX = !cast<SchedWrite>("WriteVSMulV_" # mx);
- defvar WriteVSMulX_MX = !cast<SchedWrite>("WriteVSMulX_" # mx);
- defvar ReadVSMulV_MX = !cast<SchedRead>("ReadVSMulV_" # mx);
- defvar ReadVSMulX_MX = !cast<SchedRead>("ReadVSMulX_" # mx);
-
defm "" : VPseudoBinaryV_VV_RM<m>,
- Sched<[WriteVSMulV_MX, ReadVSMulV_MX, ReadVSMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVSMulV", "ReadVSMulV", "ReadVSMulV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX_RM<m>,
- Sched<[WriteVSMulX_MX, ReadVSMulV_MX, ReadVSMulX_MX, ReadVMask]>;
+ SchedBinary<"WriteVSMulX", "ReadVSMulV", "ReadVSMulX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVAALU_VV_VX_RM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVAALUV_MX = !cast<SchedWrite>("WriteVAALUV_" # mx);
- defvar WriteVAALUX_MX = !cast<SchedWrite>("WriteVAALUX_" # mx);
- defvar ReadVAALUV_MX = !cast<SchedRead>("ReadVAALUV_" # mx);
- defvar ReadVAALUX_MX = !cast<SchedRead>("ReadVAALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV_RM<m>,
- Sched<[WriteVAALUV_MX, ReadVAALUV_MX, ReadVAALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVAALUV", "ReadVAALUV", "ReadVAALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX_RM<m>,
- Sched<[WriteVAALUX_MX, ReadVAALUV_MX, ReadVAALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVAALUX", "ReadVAALUV", "ReadVAALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVMINMAX_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIMinMaxV_MX = !cast<SchedWrite>("WriteVIMinMaxV_" # mx);
- defvar WriteVIMinMaxX_MX = !cast<SchedWrite>("WriteVIMinMaxX_" # mx);
- defvar ReadVIMinMaxV_MX = !cast<SchedRead>("ReadVIMinMaxV_" # mx);
- defvar ReadVIMinMaxX_MX = !cast<SchedRead>("ReadVIMinMaxX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVIMinMaxV_MX, ReadVIMinMaxV_MX, ReadVIMinMaxV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVIMinMaxX_MX, ReadVIMinMaxV_MX, ReadVIMinMaxX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX", mx>;
}
}
multiclass VPseudoVMUL_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIMulV_MX = !cast<SchedWrite>("WriteVIMulV_" # mx);
- defvar WriteVIMulX_MX = !cast<SchedWrite>("WriteVIMulX_" # mx);
- defvar ReadVIMulV_MX = !cast<SchedRead>("ReadVIMulV_" # mx);
- defvar ReadVIMulX_MX = !cast<SchedRead>("ReadVIMulX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVIMulV_MX, ReadVIMulV_MX, ReadVIMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVIMulX_MX, ReadVIMulV_MX, ReadVIMulX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX", mx>;
}
}
@@ -2719,38 +2819,26 @@ multiclass VPseudoVDIV_VV_VX {
defvar mx = m.MX;
defvar sews = SchedSEWSet<mx>.val;
foreach e = sews in {
- defvar WriteVIDivV_MX_E = !cast<SchedWrite>("WriteVIDivV_" # mx # "_E" # e);
- defvar WriteVIDivX_MX_E = !cast<SchedWrite>("WriteVIDivX_" # mx # "_E" # e);
- defvar ReadVIDivV_MX_E = !cast<SchedRead>("ReadVIDivV_" # mx # "_E" # e);
- defvar ReadVIDivX_MX_E = !cast<SchedRead>("ReadVIDivX_" # mx # "_E" # e);
-
defm "" : VPseudoBinaryV_VV<m, "", e>,
- Sched<[WriteVIDivV_MX_E, ReadVIDivV_MX_E, ReadVIDivV_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVIDivV", "ReadVIDivV", "ReadVIDivV", mx, e>;
defm "" : VPseudoBinaryV_VX<m, "", e>,
- Sched<[WriteVIDivX_MX_E, ReadVIDivV_MX_E, ReadVIDivX_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVIDivX", "ReadVIDivV", "ReadVIDivX", mx, e>;
}
}
}
multiclass VPseudoVFMUL_VV_VF_RM {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFMulV_MX = !cast<SchedWrite>("WriteVFMulV_" # mx);
- defvar ReadVFMulV_MX = !cast<SchedRead>("ReadVFMulV_" # mx);
-
defm "" : VPseudoBinaryFV_VV_RM<m>,
- Sched<[WriteVFMulV_MX, ReadVFMulV_MX, ReadVFMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFMulF_MX = !cast<SchedWrite>("WriteVFMulF_" # mx);
- defvar ReadVFMulV_MX = !cast<SchedRead>("ReadVFMulV_" # mx);
- defvar ReadVFMulF_MX = !cast<SchedRead>("ReadVFMulF_" # mx);
-
defm "" : VPseudoBinaryV_VF_RM<m, f>,
- Sched<[WriteVFMulF_MX, ReadVFMulV_MX, ReadVFMulF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2760,23 +2848,17 @@ multiclass VPseudoVFDIV_VV_VF_RM {
defvar mx = m.MX;
defvar sews = SchedSEWSet<mx, isF=1>.val;
foreach e = sews in {
- defvar WriteVFDivV_MX_E = !cast<SchedWrite>("WriteVFDivV_" # mx # "_E" # e);
- defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # e);
-
defm "" : VPseudoBinaryFV_VV_RM<m, "", e>,
- Sched<[WriteVFDivV_MX_E, ReadVFDivV_MX_E, ReadVFDivV_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVFDivV", "ReadVFDivV", "ReadVFDivV", mx, e,
+ forceMergeOpRead=true>;
}
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFDivF_MX_E = !cast<SchedWrite>("WriteVFDivF_" # mx # "_E" # f.SEW);
- defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # f.SEW);
- defvar ReadVFDivF_MX_E = !cast<SchedRead>("ReadVFDivF_" # mx # "_E" # f.SEW);
-
defm "" : VPseudoBinaryV_VF_RM<m, f, "", f.SEW>,
- Sched<[WriteVFDivF_MX_E, ReadVFDivV_MX_E, ReadVFDivF_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF", m.MX, f.SEW,
+ forceMergeOpRead=true>;
}
}
}
@@ -2784,118 +2866,84 @@ multiclass VPseudoVFDIV_VV_VF_RM {
multiclass VPseudoVFRDIV_VF_RM {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFDivF_MX_E = !cast<SchedWrite>("WriteVFDivF_" # mx # "_E" # f.SEW);
- defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # f.SEW);
- defvar ReadVFDivF_MX_E = !cast<SchedRead>("ReadVFDivF_" # mx # "_E" # f.SEW);
-
defm "" : VPseudoBinaryV_VF_RM<m, f, "", f.SEW>,
- Sched<[WriteVFDivF_MX_E, ReadVFDivV_MX_E, ReadVFDivF_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF", m.MX, f.SEW,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVALU_VV_VX {
foreach m = MxList in {
- defvar mx = m.MX;
- defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
- defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
- defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
- defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVSGNJ_VV_VF {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFSgnjV_MX = !cast<SchedWrite>("WriteVFSgnjV_" # mx);
- defvar ReadVFSgnjV_MX = !cast<SchedRead>("ReadVFSgnjV_" # mx);
-
defm "" : VPseudoBinaryFV_VV<m>,
- Sched<[WriteVFSgnjV_MX, ReadVFSgnjV_MX, ReadVFSgnjV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFSgnjF_MX = !cast<SchedWrite>("WriteVFSgnjF_" # mx);
- defvar ReadVFSgnjV_MX = !cast<SchedRead>("ReadVFSgnjV_" # mx);
- defvar ReadVFSgnjF_MX = !cast<SchedRead>("ReadVFSgnjF_" # mx);
-
defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFSgnjF_MX, ReadVFSgnjV_MX, ReadVFSgnjF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVMAX_VV_VF {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFMinMaxV_MX = !cast<SchedWrite>("WriteVFMinMaxV_" # mx);
- defvar ReadVFMinMaxV_MX = !cast<SchedRead>("ReadVFMinMaxV_" # mx);
-
defm "" : VPseudoBinaryFV_VV<m>,
- Sched<[WriteVFMinMaxV_MX, ReadVFMinMaxV_MX, ReadVFMinMaxV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFMinMaxF_MX = !cast<SchedWrite>("WriteVFMinMaxF_" # mx);
- defvar ReadVFMinMaxV_MX = !cast<SchedRead>("ReadVFMinMaxV_" # mx);
- defvar ReadVFMinMaxF_MX = !cast<SchedRead>("ReadVFMinMaxF_" # mx);
-
defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFMinMaxF_MX, ReadVFMinMaxV_MX, ReadVFMinMaxF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVALU_VV_VF {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFALUV_MX = !cast<SchedWrite>("WriteVFALUV_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
-
defm "" : VPseudoBinaryFV_VV<m>,
- Sched<[WriteVFALUV_MX, ReadVFALUV_MX, ReadVFALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
- defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVALU_VV_VF_RM {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFALUV_MX = !cast<SchedWrite>("WriteVFALUV_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
-
defm "" : VPseudoBinaryFV_VV_RM<m>,
- Sched<[WriteVFALUV_MX, ReadVFALUV_MX, ReadVFALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
- defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
defm "" : VPseudoBinaryV_VF_RM<m, f>,
- Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2903,13 +2951,9 @@ multiclass VPseudoVALU_VV_VF_RM {
multiclass VPseudoVALU_VF {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
- defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
-
defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2917,13 +2961,9 @@ multiclass VPseudoVALU_VF {
multiclass VPseudoVALU_VF_RM {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
- defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
-
defm "" : VPseudoBinaryV_VF_RM<m, f>,
- Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2931,67 +2971,56 @@ multiclass VPseudoVALU_VF_RM {
multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUX_" # mx);
- defvar WriteVIALUI_MX = !cast<SchedWrite>("WriteVIALUI_" # mx);
- defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
- defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx);
-
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m>,
- Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>;
+ SchedUnary<"WriteVIALUI", "ReadVIALUV", mx, forceMergeOpRead=true>;
}
}
multiclass VPseudoVWALU_VV_VX {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVIWALUV_MX = !cast<SchedWrite>("WriteVIWALUV_" # mx);
- defvar WriteVIWALUX_MX = !cast<SchedWrite>("WriteVIWALUX_" # mx);
- defvar ReadVIWALUV_MX = !cast<SchedRead>("ReadVIWALUV_" # mx);
- defvar ReadVIWALUX_MX = !cast<SchedRead>("ReadVIWALUX_" # mx);
-
defm "" : VPseudoBinaryW_VV<m>,
- Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryW_VX<m>,
- Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryW_VX<m>,
+ SchedBinary<"WriteVIWALUX", "ReadVIWALUV", "ReadVIWALUX", mx,
+ forceMergeOpRead=true>;
+ }
+}
+
+multiclass VPseudoVWALU_VV_VX_VI<Operand ImmType> : VPseudoVWALU_VV_VX {
+ foreach m = MxListW in {
+ defm "" : VPseudoBinaryW_VI<ImmType, m>;
}
}
multiclass VPseudoVWMUL_VV_VX {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVIWMulV_MX = !cast<SchedWrite>("WriteVIWMulV_" # mx);
- defvar WriteVIWMulX_MX = !cast<SchedWrite>("WriteVIWMulX_" # mx);
- defvar ReadVIWMulV_MX = !cast<SchedRead>("ReadVIWMulV_" # mx);
- defvar ReadVIWMulX_MX = !cast<SchedRead>("ReadVIWMulX_" # mx);
-
defm "" : VPseudoBinaryW_VV<m>,
- Sched<[WriteVIWMulV_MX, ReadVIWMulV_MX, ReadVIWMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
- Sched<[WriteVIWMulX_MX, ReadVIWMulV_MX, ReadVIWMulX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWMulX", "ReadVIWMulV", "ReadVIWMulX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWMUL_VV_VF_RM {
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWMulV_MX = !cast<SchedWrite>("WriteVFWMulV_" # mx);
- defvar ReadVFWMulV_MX = !cast<SchedRead>("ReadVFWMulV_" # mx);
-
defm "" : VPseudoBinaryW_VV_RM<m>,
- Sched<[WriteVFWMulV_MX, ReadVFWMulV_MX, ReadVFWMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWMulF_MX = !cast<SchedWrite>("WriteVFWMulF_" # mx);
- defvar ReadVFWMulV_MX = !cast<SchedRead>("ReadVFWMulV_" # mx);
- defvar ReadVFWMulF_MX = !cast<SchedRead>("ReadVFWMulF_" # mx);
-
defm "" : VPseudoBinaryW_VF_RM<m, f>,
- Sched<[WriteVFWMulF_MX, ReadVFWMulV_MX, ReadVFWMulF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2999,59 +3028,42 @@ multiclass VPseudoVWMUL_VV_VF_RM {
multiclass VPseudoVWALU_WV_WX {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVIWALUV_MX = !cast<SchedWrite>("WriteVIWALUV_" # mx);
- defvar WriteVIWALUX_MX = !cast<SchedWrite>("WriteVIWALUX_" # mx);
- defvar ReadVIWALUV_MX = !cast<SchedRead>("ReadVIWALUV_" # mx);
- defvar ReadVIWALUX_MX = !cast<SchedRead>("ReadVIWALUX_" # mx);
-
defm "" : VPseudoBinaryW_WV<m>,
- Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_WX<m>,
- Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWALUX", "ReadVIWALUV", "ReadVIWALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVFWALU_VV_VF_RM {
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWALUV_MX = !cast<SchedWrite>("WriteVFWALUV_" # mx);
- defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
-
defm "" : VPseudoBinaryW_VV_RM<m>,
- Sched<[WriteVFWALUV_MX, ReadVFWALUV_MX, ReadVFWALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWALUF_MX = !cast<SchedWrite>("WriteVFWALUF_" # mx);
- defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
- defvar ReadVFWALUF_MX = !cast<SchedRead>("ReadVFWALUF_" # mx);
-
defm "" : VPseudoBinaryW_VF_RM<m, f>,
- Sched<[WriteVFWALUF_MX, ReadVFWALUV_MX, ReadVFWALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVFWALU_WV_WF_RM {
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWALUV_MX = !cast<SchedWrite>("WriteVFWALUV_" # mx);
- defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
-
defm "" : VPseudoBinaryW_WV_RM<m>,
- Sched<[WriteVFWALUV_MX, ReadVFWALUV_MX, ReadVFWALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWALUF_MX = !cast<SchedWrite>("WriteVFWALUF_" # mx);
- defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
- defvar ReadVFWALUF_MX = !cast<SchedRead>("ReadVFWALUF_" # mx);
-
defm "" : VPseudoBinaryW_WF_RM<m, f>,
- Sched<[WriteVFWALUF_MX, ReadVFWALUV_MX, ReadVFWALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -3059,159 +3071,134 @@ multiclass VPseudoVFWALU_WV_WF_RM {
multiclass VPseudoVMRG_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIMergeV_MX = !cast<SchedWrite>("WriteVIMergeV_" # mx);
- defvar WriteVIMergeX_MX = !cast<SchedWrite>("WriteVIMergeX_" # mx);
- defvar WriteVIMergeI_MX = !cast<SchedWrite>("WriteVIMergeI_" # mx);
- defvar ReadVIMergeV_MX = !cast<SchedRead>("ReadVIMergeV_" # mx);
- defvar ReadVIMergeX_MX = !cast<SchedRead>("ReadVIMergeX_" # mx);
-
def "_VVM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, m.vrclass, m, 1, "">,
- Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx,
+ forceMergeOpRead=true>;
def "_VXM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, GPR, m, 1, "">,
- Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx,
+ forceMergeOpRead=true>;
def "_VIM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, simm5, m, 1, "">,
- Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>;
+ SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALU_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar WriteVICALUI_MX = !cast<SchedWrite>("WriteVICALUI_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
defm "" : VPseudoTiedBinaryV_VM<m>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_XM<m>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_IM<m>,
- Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>;
+ SchedUnary<"WriteVICALUI", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALU_VM_XM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
defm "" : VPseudoTiedBinaryV_VM<m>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_XM<m>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALUM_VM_XM_IM<string Constraint> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar WriteVICALUI_MX = !cast<SchedWrite>("WriteVICALUI_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
- defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint,
+ Commutable=1, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVICALUI", "ReadVICALUV", mx, forceMasked=1,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALUM_VM_XM<string Constraint> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
- defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALUM_V_X_I<string Constraint> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar WriteVICALUI_MX = !cast<SchedWrite>("WriteVICALUI_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
- defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>;
- defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>;
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint,
+ Commutable=1, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUI_MX, ReadVICALUV_MX]>;
+ SchedUnary<"WriteVICALUI", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALUM_V_X<string Constraint> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
- defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>;
- defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>;
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCLP_WV_WX_WI_RM {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVNClipV_MX = !cast<SchedWrite>("WriteVNClipV_" # mx);
- defvar WriteVNClipX_MX = !cast<SchedWrite>("WriteVNClipX_" # mx);
- defvar WriteVNClipI_MX = !cast<SchedWrite>("WriteVNClipI_" # mx);
- defvar ReadVNClipV_MX = !cast<SchedRead>("ReadVNClipV_" # mx);
- defvar ReadVNClipX_MX = !cast<SchedRead>("ReadVNClipX_" # mx);
-
defm "" : VPseudoBinaryV_WV_RM<m>,
- Sched<[WriteVNClipV_MX, ReadVNClipV_MX, ReadVNClipV_MX, ReadVMask]>;
+ SchedBinary<"WriteVNClipV", "ReadVNClipV", "ReadVNClipV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_WX_RM<m>,
- Sched<[WriteVNClipX_MX, ReadVNClipV_MX, ReadVNClipX_MX, ReadVMask]>;
+ SchedBinary<"WriteVNClipX", "ReadVNClipV", "ReadVNClipX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_WI_RM<m>,
- Sched<[WriteVNClipI_MX, ReadVNClipV_MX, ReadVMask]>;
+ SchedUnary<"WriteVNClipI", "ReadVNClipV", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNSHT_WV_WX_WI {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVNShiftV_MX = !cast<SchedWrite>("WriteVNShiftV_" # mx);
- defvar WriteVNShiftX_MX = !cast<SchedWrite>("WriteVNShiftX_" # mx);
- defvar WriteVNShiftI_MX = !cast<SchedWrite>("WriteVNShiftI_" # mx);
- defvar ReadVNShiftV_MX = !cast<SchedRead>("ReadVNShiftV_" # mx);
- defvar ReadVNShiftX_MX = !cast<SchedRead>("ReadVNShiftX_" # mx);
-
- defm "" : VPseudoBinaryV_WV<m>,
- Sched<[WriteVNShiftV_MX, ReadVNShiftV_MX, ReadVNShiftV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_WX<m>,
- Sched<[WriteVNShiftX_MX, ReadVNShiftV_MX, ReadVNShiftX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_WI<m>,
- Sched<[WriteVNShiftI_MX, ReadVNShiftV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_WV<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVNShiftV", "ReadVNShiftV", "ReadVNShiftV", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_WX<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVNShiftX", "ReadVNShiftV", "ReadVNShiftX", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_WI<m, TargetConstraintType=2>,
+ SchedUnary<"WriteVNShiftI", "ReadVNShiftV", mx,
+ forceMergeOpRead=true>;
}
}
@@ -3222,11 +3209,12 @@ multiclass VPseudoTernaryWithTailPolicy<VReg RetClass,
int sew,
string Constraint = "",
bit Commutable = 0> {
- let VLMul = MInfo.value in {
+ let VLMul = MInfo.value, SEW=sew in {
defvar mx = MInfo.MX;
let isCommutable = Commutable in
def "_" # mx # "_E" # sew : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
- def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>,
+ RISCVMaskedPseudo<MaskIdx=3, MaskAffectsRes=true>;
}
}
@@ -3237,15 +3225,16 @@ multiclass VPseudoTernaryWithTailPolicyRoundingMode<VReg RetClass,
int sew,
string Constraint = "",
bit Commutable = 0> {
- let VLMul = MInfo.value in {
+ let VLMul = MInfo.value, SEW=sew in {
defvar mx = MInfo.MX;
let isCommutable = Commutable in
def "_" # mx # "_E" # sew
- : VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
+ : VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
Op2Class, Constraint>;
def "_" # mx # "_E" # sew # "_MASK"
: VPseudoTernaryMaskPolicyRoundingMode<RetClass, Op1Class,
- Op2Class, Constraint>;
+ Op2Class, Constraint>,
+ RISCVMaskedPseudo<MaskIdx=3, MaskAffectsRes=true>;
}
}
@@ -3254,11 +3243,12 @@ multiclass VPseudoTernaryWithPolicy<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
- bit Commutable = 0> {
+ bit Commutable = 0,
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
let isCommutable = Commutable in
- def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>,
+ def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -3268,16 +3258,19 @@ multiclass VPseudoTernaryWithPolicyRoundingMode<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
- bit Commutable = 0> {
+ bit Commutable = 0,
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
let isCommutable = Commutable in
def "_" # MInfo.MX :
VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
- Op2Class, Constraint>;
+ Op2Class, Constraint,
+ TargetConstraintType>;
def "_" # MInfo.MX # "_MASK" :
VPseudoBinaryMaskPolicyRoundingMode<RetClass, Op1Class,
Op2Class, Constraint,
- UsesVXRM_=0>,
+ UsesVXRM_=0,
+ TargetConstraintType=TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -3312,31 +3305,34 @@ multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f, string Constraint
multiclass VPseudoTernaryW_VV<LMULInfo m> {
defvar constraint = "@earlyclobber $rd";
defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m,
- constraint>;
+ constraint, /*Commutable*/ 0, TargetConstraintType=3>;
}
multiclass VPseudoTernaryW_VV_RM<LMULInfo m> {
defvar constraint = "@earlyclobber $rd";
defm _VV : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m,
- constraint>;
+ constraint, /* Commutable */ 0,
+ TargetConstraintType=3>;
}
multiclass VPseudoTernaryW_VX<LMULInfo m> {
defvar constraint = "@earlyclobber $rd";
defm "_VX" : VPseudoTernaryWithPolicy<m.wvrclass, GPR, m.vrclass, m,
- constraint>;
+ constraint, /*Commutable*/ 0, TargetConstraintType=3>;
}
-multiclass VPseudoTernaryW_VF<LMULInfo m, FPR_Info f> {
+multiclass VPseudoTernaryW_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> {
defvar constraint = "@earlyclobber $rd";
defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.wvrclass, f.fprclass,
- m.vrclass, m, constraint>;
+ m.vrclass, m, constraint, /*Commutable*/ 0, TargetConstraintType>;
}
multiclass VPseudoTernaryW_VF_RM<LMULInfo m, FPR_Info f> {
defvar constraint = "@earlyclobber $rd";
defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, f.fprclass,
- m.vrclass, m, constraint>;
+ m.vrclass, m, constraint,
+ /* Commutable */ 0,
+ TargetConstraintType=3>;
}
multiclass VPseudoVSLDVWithPolicy<VReg RetClass,
@@ -3362,62 +3358,43 @@ multiclass VPseudoVSLDV_VI<Operand ImmType = simm5, LMULInfo m, string Constrain
multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIMulAddV_MX = !cast<SchedWrite>("WriteVIMulAddV_" # mx);
- defvar WriteVIMulAddX_MX = !cast<SchedWrite>("WriteVIMulAddX_" # mx);
- defvar ReadVIMulAddV_MX = !cast<SchedRead>("ReadVIMulAddV_" # mx);
- defvar ReadVIMulAddX_MX = !cast<SchedRead>("ReadVIMulAddX_" # mx);
-
defm "" : VPseudoTernaryV_VV_AAXA<m, Constraint>,
- Sched<[WriteVIMulAddV_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX,
- ReadVIMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV",
+ "ReadVIMulAddV", mx>;
defm "" : VPseudoTernaryV_VX_AAXA<m, Constraint>,
- Sched<[WriteVIMulAddX_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX,
- ReadVIMulAddX_MX, ReadVMask]>;
+ SchedTernary<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX",
+ "ReadVIMulAddV", mx>;
}
}
multiclass VPseudoVMAC_VV_VF_AAXA<string Constraint = ""> {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFMulAddV_MX = !cast<SchedWrite>("WriteVFMulAddV_" # mx);
- defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
-
defm "" : VPseudoTernaryV_VV_AAXA<m, Constraint>,
- Sched<[WriteVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV", m.MX>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFMulAddF_MX = !cast<SchedWrite>("WriteVFMulAddF_" # mx);
- defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
- defvar ReadVFMulAddF_MX = !cast<SchedRead>("ReadVFMulAddF_" # mx);
-
defm "" : VPseudoTernaryV_VF_AAXA<m, f, Constraint>,
- Sched<[WriteVFMulAddF_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddF_MX, ReadVMask]>;
+ SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV", m.MX>;
}
}
}
multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFMulAddV_MX = !cast<SchedWrite>("WriteVFMulAddV_" # mx);
- defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
-
defm "" : VPseudoTernaryV_VV_AAXA_RM<m, Constraint>,
- Sched<[WriteVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV", m.MX>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFMulAddF_MX = !cast<SchedWrite>("WriteVFMulAddF_" # mx);
- defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
- defvar ReadVFMulAddF_MX = !cast<SchedRead>("ReadVFMulAddF_" # mx);
-
defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, Constraint>,
- Sched<[WriteVFMulAddF_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddF_MX, ReadVMask]>;
+ SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV", m.MX>;
}
}
}
@@ -3425,70 +3402,64 @@ multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> {
multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVISlideX_MX = !cast<SchedWrite>("WriteVISlideX_" # mx);
- defvar WriteVISlideI_MX = !cast<SchedWrite>("WriteVISlideI_" # mx);
- defvar ReadVISlideV_MX = !cast<SchedRead>("ReadVISlideV_" # mx);
- defvar ReadVISlideX_MX = !cast<SchedRead>("ReadVISlideX_" # mx);
-
defm "" : VPseudoVSLDV_VX<m, Constraint>,
- Sched<[WriteVISlideX_MX, ReadVISlideV_MX, ReadVISlideV_MX,
- ReadVISlideX_MX, ReadVMask]>;
+ SchedTernary<"WriteVISlideX", "ReadVISlideV", "ReadVISlideV",
+ "ReadVISlideX", mx>;
defm "" : VPseudoVSLDV_VI<ImmType, m, Constraint>,
- Sched<[WriteVISlideI_MX, ReadVISlideV_MX, ReadVISlideV_MX, ReadVMask]>;
+ SchedBinary<"WriteVISlideI", "ReadVISlideV", "ReadVISlideV", mx>;
}
}
multiclass VPseudoVWMAC_VV_VX {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVIWMulAddV_MX = !cast<SchedWrite>("WriteVIWMulAddV_" # mx);
- defvar WriteVIWMulAddX_MX = !cast<SchedWrite>("WriteVIWMulAddX_" # mx);
- defvar ReadVIWMulAddV_MX = !cast<SchedRead>("ReadVIWMulAddV_" # mx);
- defvar ReadVIWMulAddX_MX = !cast<SchedRead>("ReadVIWMulAddX_" # mx);
-
defm "" : VPseudoTernaryW_VV<m>,
- Sched<[WriteVIWMulAddV_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX,
- ReadVIWMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
+ "ReadVIWMulAddV", mx>;
defm "" : VPseudoTernaryW_VX<m>,
- Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX,
- ReadVIWMulAddX_MX, ReadVMask]>;
+ SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
+ "ReadVIWMulAddV", mx>;
}
}
multiclass VPseudoVWMAC_VX {
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVIWMulAddX_MX = !cast<SchedWrite>("WriteVIWMulAddX_" # mx);
- defvar ReadVIWMulAddV_MX= !cast<SchedRead>("ReadVIWMulAddV_" # mx);
- defvar ReadVIWMulAddX_MX = !cast<SchedRead>("ReadVIWMulAddX_" # mx);
-
defm "" : VPseudoTernaryW_VX<m>,
- Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX,
- ReadVIWMulAddX_MX, ReadVMask]>;
+ SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
+ "ReadVIWMulAddV", m.MX>;
}
}
multiclass VPseudoVWMAC_VV_VF_RM {
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWMulAddV_MX = !cast<SchedWrite>("WriteVFWMulAddV_" # mx);
- defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx);
-
defm "" : VPseudoTernaryW_VV_RM<m>,
- Sched<[WriteVFWMulAddV_MX, ReadVFWMulAddV_MX,
- ReadVFWMulAddV_MX, ReadVFWMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWMulAddF_MX = !cast<SchedWrite>("WriteVFWMulAddF_" # mx);
- defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx);
- defvar ReadVFWMulAddF_MX = !cast<SchedRead>("ReadVFWMulAddF_" # mx);
+ defm "" : VPseudoTernaryW_VF_RM<m, f>,
+ SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
+ "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX>;
+ }
+ }
+}
+multiclass VPseudoVWMAC_VV_VF_BF_RM {
+ foreach m = MxListFW in {
+ defvar mx = m.MX;
+ defm "" : VPseudoTernaryW_VV_RM<m>,
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", mx>;
+ }
+
+ foreach f = BFPListW in {
+ foreach m = f.MxListFW in {
+ defvar mx = m.MX;
defm "" : VPseudoTernaryW_VF_RM<m, f>,
- Sched<[WriteVFWMulAddF_MX, ReadVFWMulAddV_MX,
- ReadVFWMulAddV_MX, ReadVFWMulAddF_MX, ReadVMask]>;
+ SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
+ "ReadVFWMulAddF", "ReadVFWMulAddV", mx>;
}
}
}
@@ -3496,55 +3467,35 @@ multiclass VPseudoVWMAC_VV_VF_RM {
multiclass VPseudoVCMPM_VV_VX_VI {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICmpV_MX = !cast<SchedWrite>("WriteVICmpV_" # mx);
- defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx);
- defvar WriteVICmpI_MX = !cast<SchedWrite>("WriteVICmpI_" # mx);
- defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx);
- defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx);
-
- defm "" : VPseudoBinaryM_VV<m>,
- Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryM_VX<m>,
- Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryM_VI<m>,
- Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>;
+ defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
+ defm "" : VPseudoBinaryM_VI<m, TargetConstraintType=2>,
+ SchedUnary<"WriteVICmpI", "ReadVICmpV", mx>;
}
}
multiclass VPseudoVCMPM_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICmpV_MX = !cast<SchedWrite>("WriteVICmpV_" # mx);
- defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx);
- defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx);
- defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx);
-
- defm "" : VPseudoBinaryM_VV<m>,
- Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryM_VX<m>,
- Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>;
+ defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
}
}
multiclass VPseudoVCMPM_VV_VF {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCmpV_MX = !cast<SchedWrite>("WriteVFCmpV_" # mx);
- defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx);
-
- defm "" : VPseudoBinaryM_VV<m>,
- Sched<[WriteVFCmpV_MX, ReadVFCmpV_MX, ReadVFCmpV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVFCmpV", "ReadVFCmpV", "ReadVFCmpV", m.MX>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFCmpF_MX = !cast<SchedWrite>("WriteVFCmpF_" # mx);
- defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx);
- defvar ReadVFCmpF_MX = !cast<SchedRead>("ReadVFCmpF_" # mx);
-
- defm "" : VPseudoBinaryM_VF<m, f>,
- Sched<[WriteVFCmpF_MX, ReadVFCmpV_MX, ReadVFCmpF_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VF<m, f, TargetConstraintType=2>,
+ SchedBinary<"WriteVFCmpF", "ReadVFCmpV", "ReadVFCmpF", m.MX>;
}
}
}
@@ -3552,13 +3503,8 @@ multiclass VPseudoVCMPM_VV_VF {
multiclass VPseudoVCMPM_VF {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFCmpF_MX = !cast<SchedWrite>("WriteVFCmpF_" # mx);
- defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx);
- defvar ReadVFCmpF_MX = !cast<SchedRead>("ReadVFCmpF_" # mx);
-
- defm "" : VPseudoBinaryM_VF<m, f>,
- Sched<[WriteVFCmpF_MX, ReadVFCmpV_MX, ReadVFCmpF_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VF<m, f, TargetConstraintType=2>,
+ SchedBinary<"WriteVFCmpF", "ReadVFCmpV", "ReadVFCmpF", m.MX>;
}
}
}
@@ -3566,15 +3512,10 @@ multiclass VPseudoVCMPM_VF {
multiclass VPseudoVCMPM_VX_VI {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx);
- defvar WriteVICmpI_MX = !cast<SchedWrite>("WriteVICmpI_" # mx);
- defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx);
- defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx);
-
- defm "" : VPseudoBinaryM_VX<m>,
- Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryM_VI<m>,
- Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
+ defm "" : VPseudoBinaryM_VI<m, TargetConstraintType=2>,
+ SchedUnary<"WriteVICmpI", "ReadVICmpV", mx>;
}
}
@@ -3582,10 +3523,8 @@ multiclass VPseudoVRED_VS {
foreach m = MxList in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx>.val in {
- defvar WriteVIRedV_From_MX_E = !cast<SchedWrite>("WriteVIRedV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
- Sched<[WriteVIRedV_From_MX_E, ReadVIRedV, ReadVIRedV, ReadVIRedV,
- ReadVMask]>;
+ SchedReduction<"WriteVIRedV_From", "ReadVIRedV", mx, e>;
}
}
}
@@ -3594,10 +3533,8 @@ multiclass VPseudoVREDMINMAX_VS {
foreach m = MxList in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx>.val in {
- defvar WriteVIRedMinMaxV_From_MX_E = !cast<SchedWrite>("WriteVIRedMinMaxV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
- Sched<[WriteVIRedMinMaxV_From_MX_E, ReadVIRedV, ReadVIRedV,
- ReadVIRedV, ReadVMask]>;
+ SchedReduction<"WriteVIRedMinMaxV_From", "ReadVIRedV", mx, e>;
}
}
}
@@ -3606,10 +3543,8 @@ multiclass VPseudoVWRED_VS {
foreach m = MxListWRed in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isWidening=1>.val in {
- defvar WriteVIWRedV_From_MX_E = !cast<SchedWrite>("WriteVIWRedV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
- Sched<[WriteVIWRedV_From_MX_E, ReadVIWRedV, ReadVIWRedV,
- ReadVIWRedV, ReadVMask]>;
+ SchedReduction<"WriteVIWRedV_From", "ReadVIWRedV", mx, e>;
}
}
}
@@ -3618,12 +3553,10 @@ multiclass VPseudoVFRED_VS_RM {
foreach m = MxListF in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isF=1>.val in {
- defvar WriteVFRedV_From_MX_E = !cast<SchedWrite>("WriteVFRedV_From_" # mx # "_E" # e);
defm _VS
- : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
+ : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
V_M1.vrclass, m, e>,
- Sched<[WriteVFRedV_From_MX_E, ReadVFRedV, ReadVFRedV, ReadVFRedV,
- ReadVMask]>;
+ SchedReduction<"WriteVFRedV_From", "ReadVFRedV", mx, e>;
}
}
}
@@ -3632,10 +3565,8 @@ multiclass VPseudoVFREDMINMAX_VS {
foreach m = MxListF in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isF=1>.val in {
- defvar WriteVFRedMinMaxV_From_MX_E = !cast<SchedWrite>("WriteVFRedMinMaxV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
- Sched<[WriteVFRedMinMaxV_From_MX_E, ReadVFRedV, ReadVFRedV, ReadVFRedV,
- ReadVMask]>;
+ SchedReduction<"WriteVFRedMinMaxV_From", "ReadVFRedV", mx, e>;
}
}
}
@@ -3644,11 +3575,9 @@ multiclass VPseudoVFREDO_VS_RM {
foreach m = MxListF in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isF=1>.val in {
- defvar WriteVFRedOV_From_MX_E = !cast<SchedWrite>("WriteVFRedOV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
V_M1.vrclass, m, e>,
- Sched<[WriteVFRedOV_From_MX_E, ReadVFRedOV, ReadVFRedOV,
- ReadVFRedOV, ReadVMask]>;
+ SchedReduction<"WriteVFRedOV_From", "ReadVFRedOV", mx, e>;
}
}
}
@@ -3657,12 +3586,22 @@ multiclass VPseudoVFWRED_VS_RM {
foreach m = MxListFWRed in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
- defvar WriteVFWRedV_From_MX_E = !cast<SchedWrite>("WriteVFWRedV_From_" # mx # "_E" # e);
defm _VS
: VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
V_M1.vrclass, m, e>,
- Sched<[WriteVFWRedV_From_MX_E, ReadVFWRedV, ReadVFWRedV,
- ReadVFWRedV, ReadVMask]>;
+ SchedReduction<"WriteVFWRedV_From", "ReadVFWRedV", mx, e>;
+ }
+ }
+}
+
+multiclass VPseudoVFWREDO_VS_RM {
+ foreach m = MxListFWRed in {
+ defvar mx = m.MX;
+ foreach e = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defm _VS
+ : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
+ V_M1.vrclass, m, e>,
+ SchedReduction<"WriteVFWRedOV_From", "ReadVFWRedV", mx, e>;
}
}
}
@@ -3670,11 +3609,12 @@ multiclass VPseudoVFWRED_VS_RM {
multiclass VPseudoConversion<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint>;
+ def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
- Constraint>,
+ Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3682,9 +3622,10 @@ multiclass VPseudoConversion<VReg RetClass,
multiclass VPseudoConversionRoundingMode<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint>;
+ def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class,
Constraint>,
RISCVMaskedPseudo<MaskIdx=2>;
@@ -3716,211 +3657,157 @@ multiclass VPseudoConversionNoExcept<VReg RetClass,
multiclass VPseudoVCVTI_V {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx);
- defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx);
-
defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTI_V_RM {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx);
- defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx);
-
defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTI_RM_V {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx);
- defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx);
-
defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVFROUND_NOEXCEPT_V {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx);
- defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx);
-
defm _V : VPseudoConversionNoExcept<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTF_V_RM {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtIToFV_MX = !cast<SchedWrite>("WriteVFCvtIToFV_" # mx);
- defvar ReadVFCvtIToFV_MX = !cast<SchedRead>("ReadVFCvtIToFV_" # mx);
-
defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtIToFV_MX, ReadVFCvtIToFV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTF_RM_V {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtIToFV_MX = !cast<SchedWrite>("WriteVFCvtIToFV_" # mx);
- defvar ReadVFCvtIToFV_MX = !cast<SchedRead>("ReadVFCvtIToFV_" # mx);
-
defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtIToFV_MX, ReadVFCvtIToFV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTI_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx);
- defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx);
-
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>;
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTI_V_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx);
- defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx);
-
- defm _V : VPseudoConversionRoundingMode<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>;
+ defm _V : VPseudoConversionRoundingMode<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTI_RM_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx);
- defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx);
-
defm _V : VPseudoConversionRM<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTF_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtIToFV_MX = !cast<SchedWrite>("WriteVFWCvtIToFV_" # mx);
- defvar ReadVFWCvtIToFV_MX = !cast<SchedRead>("ReadVFWCvtIToFV_" # mx);
-
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtIToFV_MX, ReadVFWCvtIToFV_MX, ReadVMask]>;
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTD_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtFToFV_MX = !cast<SchedWrite>("WriteVFWCvtFToFV_" # mx);
- defvar ReadVFWCvtFToFV_MX = !cast<SchedRead>("ReadVFWCvtFToFV_" # mx);
-
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtFToFV_MX, ReadVFWCvtFToFV_MX, ReadVMask]>;
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTI_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx);
- defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx);
-
- defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>;
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTI_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx);
- defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx);
-
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>;
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTI_RM_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx);
- defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx);
-
defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTF_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtIToFV_MX = !cast<SchedWrite>("WriteVFNCvtIToFV_" # mx);
- defvar ReadVFNCvtIToFV_MX = !cast<SchedRead>("ReadVFNCvtIToFV_" # mx);
-
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtIToFV_MX, ReadVFNCvtIToFV_MX, ReadVMask]>;
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTF_RM_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtIToFV_MX = !cast<SchedWrite>("WriteVFNCvtIToFV_" # mx);
- defvar ReadVFNCvtIToFV_MX = !cast<SchedRead>("ReadVFNCvtIToFV_" # mx);
-
defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtIToFV_MX, ReadVFNCvtIToFV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTD_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToFV_MX = !cast<SchedWrite>("WriteVFNCvtFToFV_" # mx);
- defvar ReadVFNCvtFToFV_MX = !cast<SchedRead>("ReadVFNCvtFToFV_" # mx);
-
- defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToFV_MX, ReadVFNCvtFToFV_MX, ReadVMask]>;
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTD_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToFV_MX = !cast<SchedWrite>("WriteVFNCvtFToFV_" # mx);
- defvar ReadVFNCvtFToFV_MX = !cast<SchedRead>("ReadVFNCvtFToFV_" # mx);
-
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToFV_MX, ReadVFNCvtFToFV_MX, ReadVMask]>;
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
@@ -3988,18 +3875,17 @@ multiclass VPseudoISegLoad<bit Ordered> {
defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
defvar DataVreg = dataEMUL.vrclass;
defvar IdxVreg = idxEMUL.vrclass;
- defvar Order = !if(Ordered, "O", "U");
let VLMul = dataEMUL.value in {
foreach nf = NFSet<dataEMUL>.L in {
defvar Vreg = SegRegClass<dataEMUL, nf>.RC;
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
VPseudoISegLoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VLXSEGSched<nf, dataEEW, Order, DataLInfo>;
+ VLXSEGSched<nf, dataEEW, Ordered, DataLInfo>;
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
VPseudoISegLoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VLXSEGSched<nf, dataEEW, Order, DataLInfo>;
+ VLXSEGSched<nf, dataEEW, Ordered, DataLInfo>;
}
}
}
@@ -4055,18 +3941,17 @@ multiclass VPseudoISegStore<bit Ordered> {
defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
defvar DataVreg = dataEMUL.vrclass;
defvar IdxVreg = idxEMUL.vrclass;
- defvar Order = !if(Ordered, "O", "U");
let VLMul = dataEMUL.value in {
foreach nf = NFSet<dataEMUL>.L in {
defvar Vreg = SegRegClass<dataEMUL, nf>.RC;
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
VPseudoISegStoreNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VSXSEGSched<nf, idxEEW, Order, DataLInfo>;
+ VSXSEGSched<nf, idxEEW, Ordered, DataLInfo>;
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
VPseudoISegStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VSXSEGSched<nf, idxEEW, Order, DataLInfo>;
+ VSXSEGSched<nf, idxEEW, Ordered, DataLInfo>;
}
}
}
@@ -4087,16 +3972,12 @@ class VPatUnaryNoMask<string intrinsic_name,
int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op2_reg_class,
- bit isSEWAware = 0> :
+ VReg op2_reg_class> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
VLOpFrag)),
- (!cast<Instruction>(
- !if(isSEWAware,
- inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
- inst#"_"#kind#"_"#vlmul.MX))
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
GPR:$vl, log2sew, TU_MU)>;
@@ -4135,17 +4016,13 @@ class VPatUnaryMask<string intrinsic_name,
int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op2_reg_class,
- bit isSEWAware = 0> :
+ VReg op2_reg_class> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
(mask_type V0),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>(
- !if(isSEWAware,
- inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
- inst#"_"#kind#"_"#vlmul.MX#"_MASK"))
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
(mask_type V0), GPR:$vl, log2sew, (XLenVT timm:$policy))>;
@@ -4187,7 +4064,7 @@ class VPatMaskUnaryNoMask<string intrinsic_name,
(!cast<Instruction>(inst#"_M_"#mti.BX)
(mti.Mask (IMPLICIT_DEF)),
(mti.Mask VR:$rs2),
- GPR:$vl, mti.Log2SEW, TU_MU)>;
+ GPR:$vl, mti.Log2SEW, TA_MA)>;
class VPatMaskUnaryMask<string intrinsic_name,
string inst,
@@ -4831,15 +4708,15 @@ multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
}
multiclass VPatUnaryV_V<string intrinsic, string instruction,
- list<VTypeInfo> vtilist, bit isSEWAware = 0> {
+ list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : VPatUnaryNoMask<intrinsic, instruction, "V",
vti.Vector, vti.Vector, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
+ vti.LMul, vti.RegClass, vti.RegClass>;
def : VPatUnaryMask<intrinsic, instruction, "V",
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
+ vti.LMul, vti.RegClass, vti.RegClass>;
}
}
}
@@ -6080,6 +5957,21 @@ multiclass VPatConversionWF_VF<string intrinsic, string instruction> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
+ // Define vfwcvt.f.f.v for f16 when Zvfhmin is enable.
+ let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal],
+ !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates)) in
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in
+ {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
@@ -6136,8 +6028,21 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
}
}
-multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction> {
- foreach fvtiToFWti = AllWidenableFloatVectors in {
+multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction,
+ list<VTypeInfoToWide> wlist = AllWidenableFloatVectors> {
+ foreach fvtiToFWti = wlist in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
@@ -6336,7 +6241,7 @@ foreach vti = AllIntegerVectors in {
GPR:$vl,
vti.Log2SEW,
(XLenVT timm:$policy))>;
-
+
// Match VSUB with a small immediate to vadd.vi by negating the immediate.
def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector (undef)),
(vti.Vector vti.RegClass:$rs1),
@@ -6346,7 +6251,7 @@ foreach vti = AllIntegerVectors in {
vti.RegClass:$rs1,
(NegImm simm5_plus1:$rs2),
GPR:$vl,
- vti.Log2SEW, TU_MU)>;
+ vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
@@ -6593,6 +6498,8 @@ defm PseudoVFWMACC : VPseudoVWMAC_VV_VF_RM;
defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF_RM;
defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF_RM;
defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF_RM;
+let Predicates = [HasStdExtZvfbfwma] in
+defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM;
}
//===----------------------------------------------------------------------===//
@@ -6697,6 +6604,7 @@ defm PseudoVFWCVT_F_XU : VPseudoVWCVTF_V;
defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V;
defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V;
+defm PseudoVFWCVTBF16_F_F : VPseudoVWCVTD_V;
} // mayRaiseFPException = true
//===----------------------------------------------------------------------===//
@@ -6722,6 +6630,7 @@ defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W;
let hasSideEffects = 0, hasPostISelHook = 1 in
defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM;
+defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM;
defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
} // mayRaiseFPException = true
@@ -6774,7 +6683,7 @@ let IsRVVWideningReduction = 1,
hasSideEffects = 0,
mayRaiseFPException = true in {
defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM;
-defm PseudoVFWREDOSUM : VPseudoVFWRED_VS_RM;
+defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM;
}
} // Predicates = [HasVInstructionsAnyF]
@@ -6787,14 +6696,14 @@ defm PseudoVFWREDOSUM : VPseudoVFWRED_VS_RM;
// 15.1 Vector Mask-Register Logical Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMAND: VPseudoVALU_MM;
-defm PseudoVMNAND: VPseudoVALU_MM;
+defm PseudoVMAND: VPseudoVALU_MM<Commutable=1>;
+defm PseudoVMNAND: VPseudoVALU_MM<Commutable=1>;
defm PseudoVMANDN: VPseudoVALU_MM;
-defm PseudoVMXOR: VPseudoVALU_MM;
-defm PseudoVMOR: VPseudoVALU_MM;
-defm PseudoVMNOR: VPseudoVALU_MM;
+defm PseudoVMXOR: VPseudoVALU_MM<Commutable=1>;
+defm PseudoVMOR: VPseudoVALU_MM<Commutable=1>;
+defm PseudoVMNOR: VPseudoVALU_MM<Commutable=1>;
defm PseudoVMORN: VPseudoVALU_MM;
-defm PseudoVMXNOR: VPseudoVALU_MM;
+defm PseudoVMXNOR: VPseudoVALU_MM<Commutable=1>;
// Pseudo instructions
defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">;
@@ -7005,7 +6914,7 @@ foreach vti = AllIntegerVectors in {
(XLenVT 1), VLOpFrag)),
(!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
- vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(XLenVT 1),
@@ -7139,7 +7048,7 @@ foreach vti = AllVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
$passthru, $rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
-
+
// vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td
}
}
@@ -7222,7 +7131,7 @@ defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB",
//===----------------------------------------------------------------------===//
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL",
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL",
AllFloatVectors>;
defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfdiv", "PseudoVFDIV",
AllFloatVectors, isSEWAware=1>;
@@ -7258,6 +7167,9 @@ defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC",
AllWidenableFloatVectors>;
defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC",
AllWidenableFloatVectors>;
+let Predicates = [HasStdExtZvfbfwma] in
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmaccbf16", "PseudoVFWMACCBF16",
+ AllWidenableBFloatToFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.8. Vector Floating-Point Square-Root Instruction
@@ -7362,6 +7274,8 @@ defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F">
defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">;
defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">;
defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">;
+defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v",
+ "PseudoVFWCVTBF16_F_F">;
//===----------------------------------------------------------------------===//
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
@@ -7372,7 +7286,18 @@ defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F
defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">;
defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
-defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">;
+defvar WidenableFloatVectorsExceptF16 = !filter(fvtiToFWti, AllWidenableFloatVectors,
+ !ne(fvtiToFWti.Vti.Scalar, f16));
+defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F",
+ WidenableFloatVectorsExceptF16>;
+// Define vfncvt.f.f.w for f16 when Zvfhmin is enable.
+defvar F16WidenableFloatVectors = !filter(fvtiToFWti, AllWidenableFloatVectors,
+ !eq(fvtiToFWti.Vti.Scalar, f16));
+let Predicates = [HasVInstructionsF16Minimal] in
+defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F",
+ F16WidenableFloatVectors>;
+defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w",
+ "PseudoVFNCVTBF16_F_F">;
defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">;
//===----------------------------------------------------------------------===//
@@ -7500,6 +7425,11 @@ foreach fvti = AllFloatVectors in {
(fvti.Scalar (fpimm0)), VLOpFrag)),
(!cast<Instruction>("PseudoVMV_S_X_" # fvti.LMul.MX)
(fvti.Vector $rs1), (XLenVT X0), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_S_X_" # fvti.LMul.MX)
+ (fvti.Vector $rs1), GPR:$imm, GPR:$vl, fvti.Log2SEW)>;
}
}
@@ -7532,10 +7462,6 @@ defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
// 16.5. Vector Compress Instruction
//===----------------------------------------------------------------------===//
defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
-defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
-defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
-defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
-defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
// Include the non-intrinsic ISel patterns
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 4141c7698bb4..b7c845703794 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -35,7 +35,7 @@ multiclass VPatUSLoadStoreSDNode<ValueType type,
// Load
def : Pat<(type (load GPR:$rs1)),
(load_instr (type (IMPLICIT_DEF)), GPR:$rs1, avl,
- log2sew, TU_MU)>;
+ log2sew, TA_MA)>;
// Store
def : Pat<(store type:$rs2, GPR:$rs1),
(store_instr reg_class:$rs2, GPR:$rs1, avl, log2sew)>;
@@ -399,7 +399,7 @@ multiclass VPatExtendSDNode_V<list<SDNode> ops, string inst_name, string suffix,
def : Pat<(vti.Vector (op (fti.Vector fti.RegClass:$rs2))),
(!cast<Instruction>(inst_name#"_"#suffix#"_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- fti.RegClass:$rs2, fti.AVL, vti.Log2SEW, TU_MU)>;
+ fti.RegClass:$rs2, fti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -416,7 +416,7 @@ multiclass VPatConvertI2FPSDNode_V_RM<SDPatternOperator vop,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- fvti.AVL, fvti.Log2SEW, TU_MU)>;
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
}
@@ -429,7 +429,7 @@ multiclass VPatConvertFP2ISDNode_V<SDPatternOperator vop,
def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
(ivti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TU_MU)>;
+ fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TA_MA)>;
}
}
@@ -444,7 +444,7 @@ multiclass VPatWConvertI2FPSDNode_V<SDPatternOperator vop,
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
(fwti.Vector (IMPLICIT_DEF)),
ivti.RegClass:$rs1,
- ivti.AVL, ivti.Log2SEW, TU_MU)>;
+ ivti.AVL, ivti.Log2SEW, TA_MA)>;
}
}
@@ -458,7 +458,7 @@ multiclass VPatWConvertFP2ISDNode_V<SDPatternOperator vop,
def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
(iwti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TU_MU)>;
+ fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
}
@@ -476,7 +476,7 @@ multiclass VPatNConvertI2FPSDNode_W_RM<SDPatternOperator vop,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- fvti.AVL, fvti.Log2SEW, TU_MU)>;
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
}
@@ -490,7 +490,7 @@ multiclass VPatNConvertFP2ISDNode_W<SDPatternOperator vop,
def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -505,12 +505,12 @@ multiclass VPatWidenBinarySDNode_VV_VX<SDNode op, PatFrags extop1, PatFrags exto
(wti.Vector (extop2 (vti.Vector vti.RegClass:$rs1)))),
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))),
(wti.Vector (extop2 (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
(!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- GPR:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ GPR:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -531,7 +531,7 @@ multiclass VPatWidenBinarySDNode_WV_WX<SDNode op, PatFrags extop,
(wti.Vector (extop (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, GPR:$rs1,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -588,7 +588,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> {
(vti.Mask true_mask), (XLenVT srcvalue)))),
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
@@ -597,14 +597,14 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> {
(vti.Mask true_mask), (XLenVT srcvalue)))),
(!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
(wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))),
(!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -627,7 +627,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
@@ -640,7 +640,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
@@ -651,7 +651,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -683,7 +683,7 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector wti.RegClass:$rs2),
(wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))))),
(!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
@@ -692,7 +692,7 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -883,16 +883,20 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
// 7.4. Vector Unit-Stride Instructions
foreach vti = !listconcat(FractionalGroupIntegerVectors,
- FractionalGroupFloatVectors) in
- let Predicates = GetVTypePredicates<vti>.Predicates in
+ FractionalGroupFloatVectors,
+ FractionalGroupBFloatVectors) in
+ let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
+ GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.AVL, vti.RegClass>;
-foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VF16M1, VF32M1, VF64M1] in
- let Predicates = GetVTypePredicates<vti>.Predicates in
+foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VBF16M1, VF16M1, VF32M1, VF64M1] in
+ let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
+ GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
-foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors) in
- let Predicates = GetVTypePredicates<vti>.Predicates in
+foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors, GroupBFloatVectors) in
+ let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
+ GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
foreach mti = AllMasks in
@@ -916,12 +920,12 @@ foreach vti = AllIntegerVectors in {
(vti.Vector vti.RegClass:$rs1)),
(!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(sub (vti.Vector (SplatPat_simm5 simm5:$rs2)),
(vti.Vector vti.RegClass:$rs1)),
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
- simm5:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ simm5:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -944,17 +948,17 @@ foreach vtiToWti = AllWidenableIntVectors in {
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVWADD_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs1))),
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(shl (wti.Vector (anyext_oneuse (vti.Vector vti.RegClass:$rs1))),
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -989,7 +993,7 @@ foreach vti = AllIntegerVectors in {
(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
- vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
@@ -1051,6 +1055,23 @@ defm : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV", isSEWAware=1>;
defm : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU", isSEWAware=1>;
defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM", isSEWAware=1>;
+foreach vtiTowti = AllWidenableIntVectors in {
+ defvar vti = vtiTowti.Vti;
+ defvar wti = vtiTowti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<
+ (vti.Vector
+ (riscv_trunc_vector_vl
+ (srem (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs2)))),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (!cast<Instruction>("PseudoVREM_VV_"#vti.LMul.MX#"_E"#!shl(1, vti.Log2SEW))
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+}
+
// 11.12. Vector Widening Integer Multiply Instructions
defm : VPatWidenBinarySDNode_VV_VX<mul, sext_oneuse, sext_oneuse,
"PseudoVWMUL">;
@@ -1145,7 +1166,7 @@ foreach mti = AllMasks in {
// Handle rvv_vnot the same as the vmnot.m pseudoinstruction.
def : Pat<(mti.Mask (rvv_vnot VR:$rs)),
(!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX)
- VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>;
+ VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>;
}
}
@@ -1279,40 +1300,40 @@ foreach vti = AllFloatVectors in {
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
// 13.12. Vector Floating-Point Sign-Injection Instructions
def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
(!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
// Handle fneg with VFSGNJN using the same input for both operands.
def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
(!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
-
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg vti.RegClass:$rs2)))),
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
(!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -1337,7 +1358,8 @@ defm : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
foreach fvti = AllFloatVectors in {
- let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in {
def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1,
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
@@ -1346,6 +1368,15 @@ foreach fvti = AllFloatVectors in {
fvti.AVL, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
+
+ }
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
(SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
@@ -1353,14 +1384,6 @@ foreach fvti = AllFloatVectors in {
fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
(fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
- (SplatFPOp (fvti.Scalar fpimm0)),
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
- }
}
// 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
@@ -1383,8 +1406,9 @@ defm : VPatNConvertI2FPSDNode_W_RM<any_uint_to_fp, "PseudoVFNCVT_F_XU_W">;
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
- GetVTypePredicates<fwti>.Predicates) in
+ let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal],
+ !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates)) in
def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
(fvti.Vector (IMPLICIT_DEF)),
@@ -1392,7 +1416,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- fvti.AVL, fvti.Log2SEW, TU_MU)>;
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
//===----------------------------------------------------------------------===//
@@ -1400,18 +1424,18 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
//===----------------------------------------------------------------------===//
foreach fvti = AllFloatVectors in {
- let Predicates = GetVTypePredicates<fvti>.Predicates in {
- def : Pat<(fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl undef, fvti.ScalarRegClass:$rs1, srcvalue)),
(!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
(fvti.Vector (IMPLICIT_DEF)),
(fvti.Scalar fvti.ScalarRegClass:$rs1),
- fvti.AVL, fvti.Log2SEW, TU_MU)>;
-
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in
def : Pat<(fvti.Vector (SplatFPOp (fvti.Scalar fpimm0))),
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
(fvti.Vector (IMPLICIT_DEF)),
- 0, fvti.AVL, fvti.Log2SEW, TU_MU)>;
- }
+ 0, fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 900f9dd1be05..dc6b57fad321 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -98,6 +98,8 @@ def riscv_urem_vl : SDNode<"RISCVISD::UREM_VL", SDT_RISCVIntBinOp_VL>;
def riscv_shl_vl : SDNode<"RISCVISD::SHL_VL", SDT_RISCVIntBinOp_VL>;
def riscv_sra_vl : SDNode<"RISCVISD::SRA_VL", SDT_RISCVIntBinOp_VL>;
def riscv_srl_vl : SDNode<"RISCVISD::SRL_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_rotl_vl : SDNode<"RISCVISD::ROTL_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_rotr_vl : SDNode<"RISCVISD::ROTR_VL", SDT_RISCVIntBinOp_VL>;
def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
@@ -122,8 +124,8 @@ def riscv_fneg_vl : SDNode<"RISCVISD::FNEG_VL", SDT_RISCVFPUnOp_VL>;
def riscv_fabs_vl : SDNode<"RISCVISD::FABS_VL", SDT_RISCVFPUnOp_VL>;
def riscv_fsqrt_vl : SDNode<"RISCVISD::FSQRT_VL", SDT_RISCVFPUnOp_VL>;
def riscv_fcopysign_vl : SDNode<"RISCVISD::FCOPYSIGN_VL", SDT_RISCVCopySign_VL>;
-def riscv_fminnum_vl : SDNode<"RISCVISD::FMINNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
-def riscv_fmaxnum_vl : SDNode<"RISCVISD::FMAXNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
+def riscv_vfmin_vl : SDNode<"RISCVISD::VFMIN_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
+def riscv_vfmax_vl : SDNode<"RISCVISD::VFMAX_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
def riscv_strict_fadd_vl : SDNode<"RISCVISD::STRICT_FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative, SDNPHasChain]>;
def riscv_strict_fsub_vl : SDNode<"RISCVISD::STRICT_FSUB_VL", SDT_RISCVFPBinOp_VL, [SDNPHasChain]>;
@@ -407,6 +409,7 @@ def riscv_vwadd_vl : SDNode<"RISCVISD::VWADD_VL", SDT_RISCVVWIntBinOp_VL, [S
def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWIntBinOp_VL, [SDNPCommutative]>;
def riscv_vwsub_vl : SDNode<"RISCVISD::VWSUB_VL", SDT_RISCVVWIntBinOp_VL, []>;
def riscv_vwsubu_vl : SDNode<"RISCVISD::VWSUBU_VL", SDT_RISCVVWIntBinOp_VL, []>;
+def riscv_vwsll_vl : SDNode<"RISCVISD::VWSLL_VL", SDT_RISCVVWIntBinOp_VL, []>;
def SDT_RISCVVWIntTernOp_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisInt<0>,
SDTCisInt<1>,
@@ -577,14 +580,15 @@ def SplatPat_simm5_plus1
def SplatPat_simm5_plus1_nonzero
: ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero", [], [], 3>;
-def ext_oneuse_SplatPat
- : ComplexPattern<vAny, 1, "selectExtOneUseVSplat", [], [], 2>;
+// Selects extends or truncates of splats where we only care about the lowest 8
+// bits of each element.
+def Low8BitsSplatPat
+ : ComplexPattern<vAny, 1, "selectLow8BitsVSplat", [], [], 2>;
-def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>;
-
-// Ignore the vl operand.
-def SplatFPOp : PatFrag<(ops node:$op),
- (riscv_vfmv_v_f_vl undef, node:$op, srcvalue)>;
+// Ignore the vl operand on vmv_v_f, and vmv_s_f.
+def SplatFPOp : PatFrags<(ops node:$op),
+ [(riscv_vfmv_v_f_vl undef, node:$op, srcvalue),
+ (riscv_vfmv_s_f_vl undef, node:$op, srcvalue)]>;
def sew8simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<8>", []>;
def sew16simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<16>", []>;
@@ -1377,16 +1381,6 @@ multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
let Predicates = GetVTypePredicates<vti>.Predicates in {
def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
(vti.Vector vti.RegClass:$rs1), VR:$rs2,
- (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti_m1.Vector VR:$rs2),
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1), VR:$rs2,
(vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1404,19 +1398,6 @@ multiclass VPatReductionVL_RM<SDNode vop, string instruction_name, bit is_float>
let Predicates = GetVTypePredicates<vti>.Predicates in {
def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
(vti.Vector vti.RegClass:$rs1), VR:$rs2,
- (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti_m1.Vector VR:$rs2),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1), VR:$rs2,
(vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1447,18 +1428,18 @@ multiclass VPatBinaryVL_WV_WX_WI<SDNode op, string instruction_name> {
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<
(vti.Vector
(riscv_trunc_vector_vl
(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (ext_oneuse_SplatPat (XLenVT GPR:$rs1)))),
+ (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1)))),
(vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<
(vti.Vector
@@ -1468,7 +1449,7 @@ multiclass VPatBinaryVL_WV_WX_WI<SDNode op, string instruction_name> {
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -1482,14 +1463,6 @@ multiclass VPatWidenReductionVL<SDNode vop, PatFrags extop, string instruction_n
GetVTypePredicates<wti>.Predicates) in {
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW,
- (XLenVT timm:$policy))>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
VR:$rs2, (vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1509,18 +1482,6 @@ multiclass VPatWidenReductionVL_RM<SDNode vop, PatFrags extop, string instructio
GetVTypePredicates<wti>.Predicates) in {
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW,
- (XLenVT timm:$policy))>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
VR:$rs2, (vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1544,14 +1505,6 @@ multiclass VPatWidenReductionVL_Ext_VL<SDNode vop, PatFrags extop, string instru
GetVTypePredicates<wti>.Predicates) in {
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW,
- (XLenVT timm:$policy))>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
VR:$rs2, (vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1571,18 +1524,6 @@ multiclass VPatWidenReductionVL_Ext_VL_RM<SDNode vop, PatFrags extop, string ins
GetVTypePredicates<wti>.Predicates) in {
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW,
- (XLenVT timm:$policy))>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
VR:$rs2, (vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1693,7 +1634,7 @@ multiclass VPatNarrowShiftSplatExt_WX<SDNode op, PatFrags extop, string instruct
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
@@ -1713,7 +1654,7 @@ multiclass VPatNarrowShiftExtVL_WV<SDNode op, PatFrags extop, string instruction
(vti.Mask V0), VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, vti.RegClass:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
@@ -1832,13 +1773,13 @@ multiclass VPatNarrowShiftSplat_WX_WI<SDNode op, string instruction_name> {
srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (riscv_trunc_vector_vl
(wti.Vector (op wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -2039,6 +1980,56 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name> {
}
}
+multiclass VPatSlideVL_VX_VI<SDNode vop, string instruction_name> {
+ foreach vti = AllVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ uimm5:$rs2, (vti.Mask V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, uimm5:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+
+ def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ }
+ }
+}
+
+multiclass VPatSlide1VL_VX<SDNode vop, string instruction_name> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
+ }
+}
+
+multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> {
+ foreach vti = AllFloatVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.Scalar:$rs2, (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$rs3, vti.RegClass:$rs1, vti.Scalar:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -2132,7 +2123,7 @@ foreach vti = AllIntegerVectors in {
srcvalue, (vti.Mask true_mask), VLOpFrag),
(!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
}
// 11.7. Vector Narrowing Integer Right Shift Instructions
@@ -2216,7 +2207,7 @@ defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", IntegerVectorsExceptI6
// vsmul.vv and vsmul.vx are not included in EEW=64 in Zve64*.
let Predicates = [HasVInstructionsFullMultiply] in {
defm : VPatBinaryVL_VV_VX<riscv_mulhs_vl, "PseudoVMULH", I64IntegerVectors>;
- defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", I64IntegerVectors>;
+ defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", I64IntegerVectors>;
}
// 11.11. Vector Integer Divide Instructions
@@ -2373,8 +2364,8 @@ defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmsub_vl, "PseudoVFWMSAC">;
defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwnmsub_vl, "PseudoVFWNMSAC">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm : VPatBinaryFPVL_VV_VF<riscv_fminnum_vl, "PseudoVFMIN">;
-defm : VPatBinaryFPVL_VV_VF<riscv_fmaxnum_vl, "PseudoVFMAX">;
+defm : VPatBinaryFPVL_VV_VF<riscv_vfmin_vl, "PseudoVFMIN">;
+defm : VPatBinaryFPVL_VV_VF<riscv_vfmax_vl, "PseudoVFMAX">;
// 13.13. Vector Floating-Point Compare Instructions
defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETEQ,
@@ -2441,7 +2432,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag),
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
(SplatFPOp vti.ScalarRegClass:$rs2),
@@ -2459,12 +2450,13 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>("PseudoVFROUND_NOEXCEPT_V_" # vti.LMul.MX #"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
-
+
// 14.14. Vector Floating-Point Classify Instruction
- def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), VLOpFrag),
- (!cast<Instruction>("PseudoVFCLASS_V_"# vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVFCLASS_V_"# vti.LMul.MX #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
@@ -2472,7 +2464,8 @@ foreach fvti = AllFloatVectors in {
// Floating-point vselects:
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
- let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in {
def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
fvti.RegClass:$rs1,
fvti.RegClass:$rs2,
@@ -2483,16 +2476,6 @@ foreach fvti = AllFloatVectors in {
GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2,
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
(SplatFPOp (SelectFPImm (XLenVT GPR:$imm))),
fvti.RegClass:$rs2,
VLOpFrag)),
@@ -2519,21 +2502,33 @@ foreach fvti = AllFloatVectors in {
GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
+ (SplatFPOp (fvti.Scalar fpimm0)),
fvti.RegClass:$rs2,
VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+ }
+
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs2,
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
(fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- (SplatFPOp (fvti.Scalar fpimm0)),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2,
VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
// 13.16. Vector Floating-Point Move Instruction
// If we're splatting fpimm0, use vmv.v.x vd, x0.
@@ -2585,8 +2580,9 @@ defm : VPatWConvertI2FPVL_V<any_riscv_sint_to_fp_vl, "PseudoVFWCVT_F_X_V">;
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
- GetVTypePredicates<fwti>.Predicates) in
+ let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal],
+ !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates)) in
def : Pat<(fwti.Vector (any_riscv_fpextend_vl
(fvti.Vector fvti.RegClass:$rs1),
(fvti.Mask V0),
@@ -2615,8 +2611,10 @@ defm : VPatNConvertI2FP_RM_VL_W<riscv_vfcvt_rm_f_x_vl, "PseudoVFNCVT_RM_F_X_W">;
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
- GetVTypePredicates<fwti>.Predicates) in {
+ // Define vfwcvt.f.f.v for f16 when Zvfhmin is enable.
+ let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal],
+ !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates)) in {
def : Pat<(fvti.Vector (any_riscv_fpround_vl
(fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask V0), VLOpFrag)),
@@ -2628,6 +2626,8 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
FRM_DYN,
GPR:$vl, fvti.Log2SEW, TA_MA)>;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl
(fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask V0), VLOpFrag)),
@@ -2766,7 +2766,7 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX)
vti.RegClass:$merge,
(vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
-
+
def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2,
vti.RegClass:$rs1,
vti.RegClass:$merge,
@@ -2922,70 +2922,12 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), (vti.Mask V0), GPR:$vl, vti.Log2SEW,
TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX)
- vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX)
- vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
}
}
-foreach vti = AllFloatVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- vti.Scalar:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- vti.Scalar:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
- }
-}
-
-foreach vti = AllVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- uimm5:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEUP_VI_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEUP_VX_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- uimm5:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
- }
-}
+defm : VPatSlideVL_VX_VI<riscv_slideup_vl, "PseudoVSLIDEUP">;
+defm : VPatSlideVL_VX_VI<riscv_slidedown_vl, "PseudoVSLIDEDOWN">;
+defm : VPatSlide1VL_VX<riscv_slide1up_vl, "PseudoVSLIDE1UP">;
+defm : VPatSlide1VL_VF<riscv_fslide1up_vl, "PseudoVFSLIDE1UP">;
+defm : VPatSlide1VL_VX<riscv_slide1down_vl, "PseudoVSLIDE1DOWN">;
+defm : VPatSlide1VL_VF<riscv_fslide1down_vl, "PseudoVFSLIDE1DOWN">;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index 4ba052b25e42..924e91e15c34 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -13,10 +13,12 @@
let DecoderNamespace = "XCVbitmanip" in {
class CVInstBitManipRII<bits<2> funct2, bits<3> funct3, dag outs, dag ins,
string opcodestr, string argstr>
- : RVInstI<funct3, OPC_CUSTOM_2, outs, ins, opcodestr, argstr> {
+ : RVInstIBase<funct3, OPC_CUSTOM_2, outs, ins, opcodestr, argstr> {
bits<5> is3;
bits<5> is2;
- let imm12 = {funct2, is3, is2};
+ let Inst{31-30} = funct2;
+ let Inst{29-25} = is3;
+ let Inst{24-20} = is2;
}
class CVBitManipRII<bits<2> funct2, bits<3> funct3, string opcodestr,
@@ -31,7 +33,7 @@ let DecoderNamespace = "XCVbitmanip" in {
class CVBitManipR<bits<7> funct7, string opcodestr>
: RVInstR<funct7, 0b011, OPC_CUSTOM_1, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1"> {
+ (ins GPR:$rs1), opcodestr, "$rd, $rs1"> {
let rs2 = 0b00000;
}
}
@@ -67,125 +69,80 @@ let Predicates = [HasVendorXCVbitmanip, IsRV32],
def CV_CNT : CVBitManipR<0b0100100, "cv.cnt">;
}
-class CVInstMac<bits<7> funct7, bits<3> funct3, dag outs, dag ins,
- string opcodestr, string argstr, list<dag> pattern>
- : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatOther> {
- bits<5> rs2;
- bits<5> rs1;
- bits<5> rd;
-
- let Inst{31-25} = funct7;
- let Inst{24-20} = rs2;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = OPC_CUSTOM_1.Value;
+class CVInstMac<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1,
+ (outs GPR:$rd_wb), (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
+ opcodestr, "$rd, $rs1, $rs2"> {
let DecoderNamespace = "XCVmac";
}
-class CVInstMac16I<bits<2> funct2, bits<3> funct3, dag outs, dag ins,
- string opcodestr, string argstr, list<dag> pattern>
- : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatOther> {
+class CVInstMacMulN<bits<2> funct2, bits<3> funct3, dag outs, dag ins,
+ string opcodestr>
+ : RVInstRBase<funct3, OPC_CUSTOM_2, outs, ins, opcodestr,
+ "$rd, $rs1, $rs2, $imm5"> {
bits<5> imm5;
- bits<5> rs2;
- bits<5> rs1;
- bits<5> rd;
let Inst{31-30} = funct2;
let Inst{29-25} = imm5;
- let Inst{24-20} = rs2;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = OPC_CUSTOM_2.Value;
let DecoderNamespace = "XCVmac";
}
+class CVInstMacN<bits<2> funct2, bits<3> funct3, string opcodestr>
+ : CVInstMacMulN<funct2, funct3, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), opcodestr>;
+
+class CVInstMulN<bits<2> funct2, bits<3> funct3, string opcodestr>
+ : CVInstMacMulN<funct2, funct3, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), opcodestr>;
+
let Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0,
mayStore = 0, Constraints = "$rd = $rd_wb" in {
// 32x32 bit macs
- def CV_MAC : CVInstMac<0b1001000, 0b011, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
- "cv.mac", "$rd, $rs1, $rs2", []>,
+ def CV_MAC : CVInstMac<0b1001000, 0b011, "cv.mac">,
Sched<[]>;
- def CV_MSU : CVInstMac<0b1001001, 0b011, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
- "cv.msu", "$rd, $rs1, $rs2", []>,
+ def CV_MSU : CVInstMac<0b1001001, 0b011, "cv.msu">,
Sched<[]>;
// Signed 16x16 bit macs with imm
- def CV_MACSN : CVInstMac16I<0b00, 0b110, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.macsn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACSN : CVInstMacN<0b00, 0b110, "cv.macsn">,
Sched<[]>;
- def CV_MACHHSN : CVInstMac16I<0b01, 0b110, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.machhsn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACHHSN : CVInstMacN<0b01, 0b110, "cv.machhsn">,
Sched<[]>;
- def CV_MACSRN : CVInstMac16I<0b10, 0b110, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.macsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACSRN : CVInstMacN<0b10, 0b110, "cv.macsrn">,
Sched<[]>;
- def CV_MACHHSRN : CVInstMac16I<0b11, 0b110, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.machhsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACHHSRN : CVInstMacN<0b11, 0b110, "cv.machhsrn">,
Sched<[]>;
// Unsigned 16x16 bit macs with imm
- def CV_MACUN : CVInstMac16I<0b00, 0b111, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.macun", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACUN : CVInstMacN<0b00, 0b111, "cv.macun">,
Sched<[]>;
- def CV_MACHHUN : CVInstMac16I<0b01, 0b111, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.machhun", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACHHUN : CVInstMacN<0b01, 0b111, "cv.machhun">,
Sched<[]>;
- def CV_MACURN : CVInstMac16I<0b10, 0b111, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.macurn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACURN : CVInstMacN<0b10, 0b111, "cv.macurn">,
Sched<[]>;
- def CV_MACHHURN : CVInstMac16I<0b11, 0b111, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.machhurn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACHHURN : CVInstMacN<0b11, 0b111, "cv.machhurn">,
Sched<[]>;
} // Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0...
let Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// Signed 16x16 bit muls with imm
- def CV_MULSN : CVInstMac16I<0b00, 0b100, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulsn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULSN : CVInstMulN<0b00, 0b100, "cv.mulsn">,
Sched<[]>;
- def CV_MULHHSN : CVInstMac16I<0b01, 0b100, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulhhsn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULHHSN : CVInstMulN<0b01, 0b100, "cv.mulhhsn">,
Sched<[]>;
- def CV_MULSRN : CVInstMac16I<0b10, 0b100, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULSRN : CVInstMulN<0b10, 0b100, "cv.mulsrn">,
Sched<[]>;
- def CV_MULHHSRN : CVInstMac16I<0b11, 0b100, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulhhsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULHHSRN : CVInstMulN<0b11, 0b100, "cv.mulhhsrn">,
Sched<[]>;
-
// Unsigned 16x16 bit muls with imm
- def CV_MULUN : CVInstMac16I<0b00, 0b101, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulun", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULUN : CVInstMulN<0b00, 0b101, "cv.mulun">,
Sched<[]>;
- def CV_MULHHUN : CVInstMac16I<0b01, 0b101, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulhhun", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULHHUN : CVInstMulN<0b01, 0b101, "cv.mulhhun">,
Sched<[]>;
- def CV_MULURN : CVInstMac16I<0b10, 0b101, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulurn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULURN : CVInstMulN<0b10, 0b101, "cv.mulurn">,
Sched<[]>;
- def CV_MULHHURN : CVInstMac16I<0b11, 0b101, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulhhurn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULHHURN : CVInstMulN<0b11, 0b101, "cv.mulhhurn">,
Sched<[]>;
} // Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0...
@@ -203,3 +160,547 @@ let Predicates = [HasVendorXCVmac, IsRV32] in {
def : InstAlias<"cv.mulhhu $rd1, $rs1, $rs2",
(CV_MULHHUN GPR:$rd1, GPR:$rs1, GPR:$rs2, 0)>;
} // Predicates = [HasVendorXCVmac, IsRV32]
+
+let DecoderNamespace = "XCValu" in {
+ class CVInstAluRRI<bits<2> funct2, bits<3> funct3, string opcodestr>
+ : RVInstRBase<funct3, OPC_CUSTOM_2, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), opcodestr,
+ "$rd, $rs1, $rs2, $imm5"> {
+ bits<5> imm5;
+
+ let Inst{31-30} = funct2;
+ let Inst{29-25} = imm5;
+ }
+
+ class CVInstAluRR<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+
+ class CVInstAluRRNR<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+
+ class CVInstAluRI<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstIBase<funct3, OPC_CUSTOM_1, (outs GPR:$rd),
+ (ins GPR:$rs1, uimm5:$imm5), opcodestr,
+ "$rd, $rs1, $imm5"> {
+ bits<5> imm5;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = imm5;
+ }
+
+ class CVInstAluR<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1"> {
+ let rs2 = 0b00000;
+ }
+
+} // DecoderNamespace = "XCValu"
+
+let Predicates = [HasVendorXCValu],
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+ // General ALU Operations
+ def CV_ABS : CVInstAluR<0b0101000, 0b011, "cv.abs">,
+ Sched<[]>;
+ def CV_SLET : CVInstAluRR<0b0101001, 0b011, "cv.slet">,
+ Sched<[]>;
+ def CV_SLETU : CVInstAluRR<0b0101010, 0b011, "cv.sletu">,
+ Sched<[]>;
+ def CV_MIN : CVInstAluRR<0b0101011, 0b011, "cv.min">,
+ Sched<[]>;
+ def CV_MINU : CVInstAluRR<0b0101100, 0b011, "cv.minu">,
+ Sched<[]>;
+ def CV_MAX : CVInstAluRR<0b0101101, 0b011, "cv.max">,
+ Sched<[]>;
+ def CV_MAXU : CVInstAluRR<0b0101110, 0b011, "cv.maxu">,
+ Sched<[]>;
+ def CV_EXTHS : CVInstAluR<0b0110000, 0b011, "cv.exths">,
+ Sched<[]>;
+ def CV_EXTHZ : CVInstAluR<0b0110001, 0b011, "cv.exthz">,
+ Sched<[]>;
+ def CV_EXTBS : CVInstAluR<0b0110010, 0b011, "cv.extbs">,
+ Sched<[]>;
+ def CV_EXTBZ : CVInstAluR<0b0110011, 0b011, "cv.extbz">,
+ Sched<[]>;
+
+ def CV_CLIP : CVInstAluRI<0b0111000, 0b011, "cv.clip">,
+ Sched<[]>;
+ def CV_CLIPU : CVInstAluRI<0b0111001, 0b011, "cv.clipu">,
+ Sched<[]>;
+ def CV_CLIPR : CVInstAluRR<0b0111010, 0b011, "cv.clipr">,
+ Sched<[]>;
+ def CV_CLIPUR : CVInstAluRR<0b0111011, 0b011, "cv.clipur">,
+ Sched<[]>;
+
+ def CV_ADDN : CVInstAluRRI<0b00, 0b010, "cv.addn">,
+ Sched<[]>;
+ def CV_ADDUN : CVInstAluRRI<0b01, 0b010, "cv.addun">,
+ Sched<[]>;
+ def CV_ADDRN : CVInstAluRRI<0b10, 0b010, "cv.addrn">,
+ Sched<[]>;
+ def CV_ADDURN : CVInstAluRRI<0b11, 0b010, "cv.addurn">,
+ Sched<[]>;
+ def CV_SUBN : CVInstAluRRI<0b00, 0b011, "cv.subn">,
+ Sched<[]>;
+ def CV_SUBUN : CVInstAluRRI<0b01, 0b011, "cv.subun">,
+ Sched<[]>;
+ def CV_SUBRN : CVInstAluRRI<0b10, 0b011, "cv.subrn">,
+ Sched<[]>;
+ def CV_SUBURN : CVInstAluRRI<0b11, 0b011, "cv.suburn">,
+ Sched<[]>;
+} // Predicates = [HasVendorXCValu],
+ // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+let Predicates = [HasVendorXCValu],
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+ Constraints = "$rd = $rd_wb" in {
+ def CV_ADDNR : CVInstAluRRNR<0b1000000, 0b011, "cv.addnr">,
+ Sched<[]>;
+ def CV_ADDUNR : CVInstAluRRNR<0b1000001, 0b011, "cv.addunr">,
+ Sched<[]>;
+ def CV_ADDRNR : CVInstAluRRNR<0b1000010, 0b011, "cv.addrnr">,
+ Sched<[]>;
+ def CV_ADDURNR : CVInstAluRRNR<0b1000011, 0b011, "cv.addurnr">,
+ Sched<[]>;
+ def CV_SUBNR : CVInstAluRRNR<0b1000100, 0b011, "cv.subnr">,
+ Sched<[]>;
+ def CV_SUBUNR : CVInstAluRRNR<0b1000101, 0b011, "cv.subunr">,
+ Sched<[]>;
+ def CV_SUBRNR : CVInstAluRRNR<0b1000110, 0b011, "cv.subrnr">,
+ Sched<[]>;
+ def CV_SUBURNR : CVInstAluRRNR<0b1000111, 0b011, "cv.suburnr">,
+ Sched<[]>;
+
+} // Predicates = [HasVendorXCValu],
+ // hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+ // Constraints = "$rd = $rd_wb"
+
+
+class CVInstSIMDRR<bits<5> funct5, bit F, bit funct1, bits<3> funct3,
+ RISCVOpcode opcode, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31-27} = funct5;
+ let Inst{26} = F;
+ let Inst{25} = funct1;
+ let DecoderNamespace = "XCVsimd";
+}
+
+class CVInstSIMDRI<bits<5> funct5, bit F, bits<3> funct3, RISCVOpcode opcode,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ bits<6> imm6;
+
+ let Inst{31-27} = funct5;
+ let Inst{26} = F;
+ let Inst{25} = imm6{0}; // funct1 unused
+ let Inst{24-20} = imm6{5-1};
+ let DecoderNamespace = "XCVsimd";
+}
+
+class CVSIMDRR<bits<5> funct5, bit F, bit funct1, bits<3> funct3,
+ string opcodestr>
+ : CVInstSIMDRR<funct5, F, funct1, funct3, OPC_CUSTOM_3, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+
+class CVSIMDRRWb<bits<5> funct5, bit F, bit funct1, bits<3> funct3,
+ string opcodestr>
+ : CVInstSIMDRR<funct5, F, funct1, funct3, OPC_CUSTOM_3, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
+ let Constraints = "$rd = $rd_wb";
+}
+
+class CVSIMDRI<bits<5> funct5, bit F, bits<3> funct3, string opcodestr>
+ : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3, (outs GPR:$rd),
+ (ins GPR:$rs1, simm6:$imm6), opcodestr, "$rd, $rs1, $imm6">;
+
+class CVSIMDRIWb<bits<5> funct5, bit F, bits<3> funct3, string opcodestr>
+ : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3,
+ (outs GPR:$rd_wb), (ins GPR:$rd, GPR:$rs1, simm6:$imm6),
+ opcodestr, "$rd, $rs1, $imm6"> {
+ let Constraints = "$rd = $rd_wb";
+}
+
+class CVSIMDRU<bits<5> funct5, bit F, bits<3> funct3, string opcodestr,
+ Operand immtype = uimm6>
+ : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3,
+ (outs GPR:$rd), (ins GPR:$rs1, immtype:$imm6),
+ opcodestr, "$rd, $rs1, $imm6">;
+
+class CVSIMDRUWb<bits<5> funct5, bit F, bits<3> funct3, string opcodestr>
+ : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3,
+ (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, uimm6:$imm6),
+ opcodestr, "$rd, $rs1, $imm6"> {
+ let Constraints = "$rd = $rd_wb";
+}
+
+class CVSIMDR<bits<5> funct5, bit F, bit funct1, bits<3> funct3,
+ string opcodestr>
+ : CVInstSIMDRR<funct5, F, funct1, funct3, OPC_CUSTOM_3, (outs GPR:$rd),
+ (ins GPR:$rs1), opcodestr, "$rd, $rs1"> {
+ let rs2 = 0b00000;
+}
+
+multiclass CVSIMDBinarySigned<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRR<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRR<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRR<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRR<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRI<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">;
+ def CV_ # NAME # _SCI_B : CVSIMDRI<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">;
+}
+
+multiclass CVSIMDBinaryUnsigned<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRR<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRR<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRR<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRR<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRU<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">;
+ def CV_ # NAME # _SCI_B : CVSIMDRU<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">;
+}
+
+multiclass CVSIMDShift<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRR<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRR<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRR<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRR<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRU<funct5, F, 0b110, "cv." # mnemonic # ".sci.h", uimm4>;
+ def CV_ # NAME # _SCI_B : CVSIMDRU<funct5, F, 0b111, "cv." # mnemonic # ".sci.b", uimm3>;
+}
+
+multiclass CVSIMDBinarySignedWb<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRRWb<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRRWb<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRRWb<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRRWb<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRIWb<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">;
+ def CV_ # NAME # _SCI_B : CVSIMDRIWb<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">;
+}
+
+multiclass CVSIMDBinaryUnsignedWb<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRRWb<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRRWb<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRRWb<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRRWb<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRUWb<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">;
+ def CV_ # NAME # _SCI_B : CVSIMDRUWb<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">;
+}
+
+
+let Predicates = [HasVendorXCVsimd, IsRV32],
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+ defm ADD : CVSIMDBinarySigned<0b00000, 0, 0, "add">;
+ defm SUB : CVSIMDBinarySigned<0b00001, 0, 0, "sub">;
+ defm AVG : CVSIMDBinarySigned<0b00010, 0, 0, "avg">;
+ defm AVGU : CVSIMDBinaryUnsigned<0b00011, 0, 0, "avgu">;
+ defm MIN : CVSIMDBinarySigned<0b00100, 0, 0, "min">;
+ defm MINU : CVSIMDBinaryUnsigned<0b00101, 0, 0, "minu">;
+ defm MAX : CVSIMDBinarySigned<0b00110, 0, 0, "max">;
+ defm MAXU : CVSIMDBinaryUnsigned<0b00111, 0, 0, "maxu">;
+ defm SRL : CVSIMDShift<0b01000, 0, 0, "srl">;
+ defm SRA : CVSIMDShift<0b01001, 0, 0, "sra">;
+ defm SLL : CVSIMDShift<0b01010, 0, 0, "sll">;
+ defm OR : CVSIMDBinarySigned<0b01011, 0, 0, "or">;
+ defm XOR : CVSIMDBinarySigned<0b01100, 0, 0, "xor">;
+ defm AND : CVSIMDBinarySigned<0b01101, 0, 0, "and">;
+
+ def CV_ABS_H : CVSIMDR<0b01110, 0, 0, 0b000, "cv.abs.h">;
+ def CV_ABS_B : CVSIMDR<0b01110, 0, 0, 0b001, "cv.abs.b">;
+
+ // 0b01111xx: UNDEF
+
+ defm DOTUP : CVSIMDBinaryUnsigned<0b10000, 0, 0, "dotup">;
+ defm DOTUSP : CVSIMDBinarySigned<0b10001, 0, 0, "dotusp">;
+ defm DOTSP : CVSIMDBinarySigned<0b10010, 0, 0, "dotsp">;
+ defm SDOTUP : CVSIMDBinaryUnsignedWb<0b10011, 0, 0, "sdotup">;
+ defm SDOTUSP : CVSIMDBinarySignedWb<0b10100, 0, 0, "sdotusp">;
+ defm SDOTSP : CVSIMDBinarySignedWb<0b10101, 0, 0, "sdotsp">;
+
+ // 0b10110xx: UNDEF
+
+ def CV_EXTRACT_H : CVSIMDRU<0b10111, 0, 0b000, "cv.extract.h">;
+ def CV_EXTRACT_B : CVSIMDRU<0b10111, 0, 0b001, "cv.extract.b">;
+ def CV_EXTRACTU_H : CVSIMDRU<0b10111, 0, 0b010, "cv.extractu.h">;
+ def CV_EXTRACTU_B : CVSIMDRU<0b10111, 0, 0b011, "cv.extractu.b">;
+ def CV_INSERT_H : CVSIMDRUWb<0b10111, 0, 0b100, "cv.insert.h">;
+ def CV_INSERT_B : CVSIMDRUWb<0b10111, 0, 0b101, "cv.insert.b">;
+
+ def CV_SHUFFLE_H : CVSIMDRR<0b11000, 0, 0, 0b000, "cv.shuffle.h">;
+ def CV_SHUFFLE_B : CVSIMDRR<0b11000, 0, 0, 0b001, "cv.shuffle.b">;
+ def CV_SHUFFLE_SCI_H : CVSIMDRU<0b11000, 0, 0b110, "cv.shuffle.sci.h">;
+ def CV_SHUFFLEI0_SCI_B : CVSIMDRU<0b11000, 0, 0b111, "cv.shufflei0.sci.b">;
+
+ def CV_SHUFFLEI1_SCI_B : CVSIMDRU<0b11001, 0, 0b111, "cv.shufflei1.sci.b">;
+
+ def CV_SHUFFLEI2_SCI_B : CVSIMDRU<0b11010, 0, 0b111, "cv.shufflei2.sci.b">;
+
+ def CV_SHUFFLEI3_SCI_B : CVSIMDRU<0b11011, 0, 0b111, "cv.shufflei3.sci.b">;
+
+ def CV_SHUFFLE2_H : CVSIMDRRWb<0b11100, 0, 0, 0b000, "cv.shuffle2.h">;
+ def CV_SHUFFLE2_B : CVSIMDRRWb<0b11100, 0, 0, 0b001, "cv.shuffle2.b">;
+
+ // 0b11101xx: UNDEF
+
+ def CV_PACK : CVSIMDRR<0b11110, 0, 0, 0b000, "cv.pack">;
+ def CV_PACK_H : CVSIMDRR<0b11110, 0, 1, 0b000, "cv.pack.h">;
+
+ def CV_PACKHI_B : CVSIMDRRWb<0b11111, 0, 1, 0b001, "cv.packhi.b">;
+ def CV_PACKLO_B : CVSIMDRRWb<0b11111, 0, 0, 0b001, "cv.packlo.b">;
+
+ defm CMPEQ : CVSIMDBinarySigned<0b00000, 1, 0, "cmpeq">;
+ defm CMPNE : CVSIMDBinarySigned<0b00001, 1, 0, "cmpne">;
+ defm CMPGT : CVSIMDBinarySigned<0b00010, 1, 0, "cmpgt">;
+ defm CMPGE : CVSIMDBinarySigned<0b00011, 1, 0, "cmpge">;
+ defm CMPLT : CVSIMDBinarySigned<0b00100, 1, 0, "cmplt">;
+ defm CMPLE : CVSIMDBinarySigned<0b00101, 1, 0, "cmple">;
+ defm CMPGTU : CVSIMDBinaryUnsigned<0b00110, 1, 0, "cmpgtu">;
+ defm CMPGEU : CVSIMDBinaryUnsigned<0b00111, 1, 0, "cmpgeu">;
+ defm CMPLTU : CVSIMDBinaryUnsigned<0b01000, 1, 0, "cmpltu">;
+ defm CMPLEU : CVSIMDBinaryUnsigned<0b01001, 1, 0, "cmpleu">;
+
+ def CV_CPLXMUL_R : CVSIMDRRWb<0b01010, 1, 0, 0b000, "cv.cplxmul.r">;
+ def CV_CPLXMUL_I : CVSIMDRRWb<0b01010, 1, 1, 0b000, "cv.cplxmul.i">;
+ def CV_CPLXMUL_R_DIV2 : CVSIMDRRWb<0b01010, 1, 0, 0b010, "cv.cplxmul.r.div2">;
+ def CV_CPLXMUL_I_DIV2 : CVSIMDRRWb<0b01010, 1, 1, 0b010, "cv.cplxmul.i.div2">;
+ def CV_CPLXMUL_R_DIV4 : CVSIMDRRWb<0b01010, 1, 0, 0b100, "cv.cplxmul.r.div4">;
+ def CV_CPLXMUL_I_DIV4 : CVSIMDRRWb<0b01010, 1, 1, 0b100, "cv.cplxmul.i.div4">;
+ def CV_CPLXMUL_R_DIV8 : CVSIMDRRWb<0b01010, 1, 0, 0b110, "cv.cplxmul.r.div8">;
+ def CV_CPLXMUL_I_DIV8 : CVSIMDRRWb<0b01010, 1, 1, 0b110, "cv.cplxmul.i.div8">;
+
+ def CV_CPLXCONJ : CVSIMDR<0b01011, 1, 0, 0b000, "cv.cplxconj">;
+
+ // 0b01011xx: UNDEF
+
+ def CV_SUBROTMJ : CVSIMDRR<0b01100, 1, 0, 0b000, "cv.subrotmj">;
+ def CV_SUBROTMJ_DIV2 : CVSIMDRR<0b01100, 1, 0, 0b010, "cv.subrotmj.div2">;
+ def CV_SUBROTMJ_DIV4 : CVSIMDRR<0b01100, 1, 0, 0b100, "cv.subrotmj.div4">;
+ def CV_SUBROTMJ_DIV8 : CVSIMDRR<0b01100, 1, 0, 0b110, "cv.subrotmj.div8">;
+
+ def CV_ADD_DIV2 : CVSIMDRR<0b01101, 1, 0, 0b010, "cv.add.div2">;
+ def CV_ADD_DIV4 : CVSIMDRR<0b01101, 1, 0, 0b100, "cv.add.div4">;
+ def CV_ADD_DIV8 : CVSIMDRR<0b01101, 1, 0, 0b110, "cv.add.div8">;
+
+ def CV_SUB_DIV2 : CVSIMDRR<0b01110, 1, 0, 0b010, "cv.sub.div2">;
+ def CV_SUB_DIV4 : CVSIMDRR<0b01110, 1, 0, 0b100, "cv.sub.div4">;
+ def CV_SUB_DIV8 : CVSIMDRR<0b01110, 1, 0, 0b110, "cv.sub.div8">;
+}
+
+class CVInstImmBranch<bits<3> funct3, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInstB<funct3, OPC_CUSTOM_0, outs, ins, opcodestr, argstr> {
+ bits<5> imm5;
+ let rs2 = imm5;
+ let DecoderNamespace = "XCVbi";
+}
+
+let Predicates = [HasVendorXCVbi, IsRV32], hasSideEffects = 0, mayLoad = 0,
+ mayStore = 0, isBranch = 1, isTerminator = 1 in {
+ // Immediate branching operations
+ def CV_BEQIMM : CVInstImmBranch<0b110, (outs),
+ (ins GPR:$rs1, simm5:$imm5, simm13_lsb0:$imm12),
+ "cv.beqimm", "$rs1, $imm5, $imm12">, Sched<[]>;
+ def CV_BNEIMM : CVInstImmBranch<0b111, (outs),
+ (ins GPR:$rs1, simm5:$imm5, simm13_lsb0:$imm12),
+ "cv.bneimm", "$rs1, $imm5, $imm12">, Sched<[]>;
+}
+
+def CVrrAsmOperand : AsmOperandClass {
+ let Name = "RegReg";
+ let ParserMethod = "parseRegReg";
+ let DiagnosticType = "InvalidRegReg";
+}
+
+def CVrr : Operand<OtherVT> {
+ let ParserMatchClass = CVrrAsmOperand;
+ let EncoderMethod = "getRegReg";
+ let DecoderMethod = "decodeRegReg";
+ let PrintMethod = "printRegReg";
+}
+
+class CVLoad_ri_inc<bits<3> funct3, string opcodestr>
+ : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb), (ins GPRMem:$rs1, simm12:$imm12),
+ opcodestr, "$rd, (${rs1}), ${imm12}"> {
+ let Constraints = "$rs1_wb = $rs1";
+ let DecoderNamespace = "XCVmem";
+}
+
+class CVLoad_rr_inc<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd, GPR:$rs1_wb), (ins GPRMem:$rs1, GPR:$rs2),
+ opcodestr, "$rd, (${rs1}), ${rs2}"> {
+ let Constraints = "$rs1_wb = $rs1";
+ let DecoderNamespace = "XCVmem";
+}
+
+class CVLoad_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd), (ins CVrr:$cvrr),
+ opcodestr, "$rd, $cvrr"> {
+ bits<5> rd;
+ bits<10> cvrr;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = cvrr{4-0};
+ let Inst{19-15} = cvrr{9-5};
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let DecoderNamespace = "XCVmem";
+}
+
+let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0,
+ mayLoad = 1, mayStore = 0, Constraints = "$rs1_wb = $rs1" in {
+ // Register-Immediate load with post-increment
+ def CV_LB_ri_inc : CVLoad_ri_inc<0b000, "cv.lb">;
+ def CV_LBU_ri_inc : CVLoad_ri_inc<0b100, "cv.lbu">;
+ def CV_LH_ri_inc : CVLoad_ri_inc<0b001, "cv.lh">;
+ def CV_LHU_ri_inc : CVLoad_ri_inc<0b101, "cv.lhu">;
+ def CV_LW_ri_inc : CVLoad_ri_inc<0b010, "cv.lw">;
+
+ // Register-Register load with post-increment
+ def CV_LB_rr_inc : CVLoad_rr_inc<0b0000000, 0b011, "cv.lb">;
+ def CV_LBU_rr_inc : CVLoad_rr_inc<0b0001000, 0b011, "cv.lbu">;
+ def CV_LH_rr_inc : CVLoad_rr_inc<0b0000001, 0b011, "cv.lh">;
+ def CV_LHU_rr_inc : CVLoad_rr_inc<0b0001001, 0b011, "cv.lhu">;
+ def CV_LW_rr_inc : CVLoad_rr_inc<0b0000010, 0b011, "cv.lw">;
+}
+
+let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0,
+ mayLoad = 1, mayStore = 0 in {
+ // Register-Register load
+ def CV_LB_rr : CVLoad_rr<0b0000100, 0b011, "cv.lb">;
+ def CV_LBU_rr : CVLoad_rr<0b0001100, 0b011, "cv.lbu">;
+ def CV_LH_rr : CVLoad_rr<0b0000101, 0b011, "cv.lh">;
+ def CV_LHU_rr : CVLoad_rr<0b0001101, 0b011, "cv.lhu">;
+ def CV_LW_rr : CVLoad_rr<0b0000110, 0b011, "cv.lw">;
+}
+
+class CVStore_ri_inc<bits<3> funct3, string opcodestr>
+ : RVInstS<funct3, OPC_CUSTOM_1, (outs GPR:$rs1_wb),
+ (ins GPR:$rs2, GPR:$rs1, simm12:$imm12),
+ opcodestr, "$rs2, (${rs1}), ${imm12}"> {
+ let Constraints = "$rs1_wb = $rs1";
+ let DecoderNamespace = "XCVmem";
+}
+
+class CVStore_rr_inc<bits<3> funct3, bits<7> funct7, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatOther> {
+ bits<5> rs3;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rs3;
+ let Inst{6-0} = OPC_CUSTOM_1.Value;
+ let DecoderNamespace = "XCVmem";
+}
+
+
+class CVStore_rr<bits<3> funct3, bits<7> funct7, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatOther> {
+ bits<5> rs2;
+ bits<10> cvrr;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = cvrr{9-5};
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = cvrr{4-0};
+ let Inst{6-0} = OPC_CUSTOM_1.Value;
+ let DecoderNamespace = "XCVmem";
+}
+
+let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0,
+ mayLoad = 0, mayStore = 1, Constraints = "$rs1_wb = $rs1" in {
+ // Register-Immediate store with post-increment
+ def CV_SB_ri_inc : CVStore_ri_inc<0b000, "cv.sb">;
+ def CV_SH_ri_inc : CVStore_ri_inc<0b001, "cv.sh">;
+ def CV_SW_ri_inc : CVStore_ri_inc<0b010, "cv.sw">;
+
+ // Register-Register store with post-increment
+ def CV_SB_rr_inc : CVStore_rr_inc<0b011, 0b0010000,
+ (outs GPR:$rs1_wb), (ins GPR:$rs2, GPR:$rs1, GPR:$rs3),
+ "cv.sb", "$rs2, (${rs1}), ${rs3}">;
+ def CV_SH_rr_inc : CVStore_rr_inc<0b011, 0b0010001,
+ (outs GPR:$rs1_wb), (ins GPR:$rs2, GPR:$rs1, GPR:$rs3),
+ "cv.sh", "$rs2, (${rs1}), ${rs3}">;
+ def CV_SW_rr_inc : CVStore_rr_inc<0b011, 0b0010010,
+ (outs GPR:$rs1_wb), (ins GPR:$rs2, GPR:$rs1, GPR:$rs3),
+ "cv.sw", "$rs2, (${rs1}), ${rs3}">;
+}
+
+
+let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0,
+ mayLoad = 0, mayStore = 1 in {
+ // Register-Register store
+ def CV_SB_rr : CVStore_rr<0b011, 0b0010100,
+ (outs), (ins GPR:$rs2, CVrr:$cvrr),
+ "cv.sb", "$rs2, $cvrr">;
+ def CV_SH_rr : CVStore_rr<0b011, 0b0010101,
+ (outs), (ins GPR:$rs2, CVrr:$cvrr),
+ "cv.sh", "$rs2, $cvrr">;
+ def CV_SW_rr : CVStore_rr<0b011, 0b0010110,
+ (outs), (ins GPR:$rs2, CVrr:$cvrr),
+ "cv.sw", "$rs2, $cvrr">;
+}
+
+let DecoderNamespace = "XCVelw" in
+class CVLoad_ri<bits<3> funct3, string opcodestr>
+ : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPRMem:$rs1, simm12:$imm12), opcodestr, "$rd, ${imm12}(${rs1})">;
+
+let Predicates = [HasVendorXCVelw, IsRV32], hasSideEffects = 0,
+ mayLoad = 1, mayStore = 0 in {
+ // Event load
+ def CV_ELW : CVLoad_ri<0b011, "cv.elw">;
+}
+
+def cv_tuimm2 : TImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]>;
+def cv_tuimm5 : TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]>;
+def cv_uimm10 : ImmLeaf<XLenVT, [{return isUInt<10>(Imm);}]>;
+
+def CV_LO5: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0x1f, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+def CV_HI5: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() >> 5, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+multiclass PatCoreVBitManip<Intrinsic intr> {
+ def : PatGprGpr<intr, !cast<RVInst>("CV_" # NAME # "R")>;
+ def : Pat<(intr GPR:$rs1, cv_uimm10:$imm),
+ (!cast<RVInst>("CV_" # NAME)
+ GPR:$rs1, (CV_HI5 cv_uimm10:$imm), (CV_LO5 cv_uimm10:$imm))>;
+}
+
+let Predicates = [HasVendorXCVbitmanip, IsRV32] in {
+ defm EXTRACT : PatCoreVBitManip<int_riscv_cv_bitmanip_extract>;
+ defm EXTRACTU : PatCoreVBitManip<int_riscv_cv_bitmanip_extractu>;
+ defm BCLR : PatCoreVBitManip<int_riscv_cv_bitmanip_bclr>;
+ defm BSET : PatCoreVBitManip<int_riscv_cv_bitmanip_bset>;
+
+ def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, GPR:$rs2, GPR:$rd),
+ (CV_INSERTR GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+ def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, cv_uimm10:$imm, GPR:$rd),
+ (CV_INSERT GPR:$rd, GPR:$rs1, (CV_HI5 cv_uimm10:$imm),
+ (CV_LO5 cv_uimm10:$imm))>;
+
+ def : PatGpr<cttz, CV_FF1>;
+ def : PatGpr<ctlz, CV_FL1>;
+ def : PatGpr<int_riscv_cv_bitmanip_clb, CV_CLB>;
+ def : PatGpr<ctpop, CV_CNT>;
+
+ def : PatGprGpr<rotr, CV_ROR>;
+
+ def : Pat<(int_riscv_cv_bitmanip_bitrev GPR:$rs1, cv_tuimm5:$pts,
+ cv_tuimm2:$radix),
+ (CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>;
+ def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 03ed501ba6a3..fa618b437ce7 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -27,30 +27,19 @@ def VCIX_XV : VCIXType<0b0010>;
def VCIX_XVV : VCIXType<0b1010>;
def VCIX_XVW : VCIXType<0b1111>;
-// The payload and timm5 operands are all marked as ImmArg in the IR
+// The payload and tsimm5 operands are all marked as ImmArg in the IR
// intrinsic and will be target constant, so use TImmLeaf rather than ImmLeaf.
-def payload1 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<1>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<1>;
- let DecoderMethod = "decodeUImmOperand<1>";
- let OperandType = "OPERAND_UIMM1";
- let OperandNamespace = "RISCVOp";
+class PayloadOp<int bitsNum> : RISCVOp, TImmLeaf<XLenVT, "return isUInt<" # bitsNum # ">(Imm);"> {
+ let ParserMatchClass = UImmAsmOperand<bitsNum>;
+ let DecoderMethod = "decodeUImmOperand<"# bitsNum # ">";
+ let OperandType = "OPERAND_UIMM" # bitsNum;
}
-def payload2 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<2>;
- let DecoderMethod = "decodeUImmOperand<2>";
- let OperandType = "OPERAND_UIMM2";
- let OperandNamespace = "RISCVOp";
-}
-
-def payload5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<5>;
- let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_UIMM5";
- let OperandNamespace = "RISCVOp";
-}
+def payload1 : PayloadOp<1>;
+def payload2 : PayloadOp<2>;
+def payload5 : PayloadOp<5>;
-def timm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> {
+def tsimm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<5>;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmOperand<5>";
@@ -111,15 +100,6 @@ class RVInstVCFCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins,
let RVVConstraint = NoConstraint;
}
-class GetFTypeInfo<int sew> {
- ValueType Scalar = !cond(!eq(sew, 16): f16,
- !eq(sew, 32): f32,
- !eq(sew, 64): f64);
- RegisterClass ScalarRegClass = !cond(!eq(sew, 16): FPR16,
- !eq(sew, 32): FPR32,
- !eq(sew, 64): FPR64);
-}
-
class VCIXInfo<string suffix, VCIXType type, DAGOperand TyRd,
DAGOperand TyRs2, DAGOperand TyRs1, bit HaveOutputDst> {
string OpcodeStr = !if(HaveOutputDst, "sf.vc.v." # suffix,
@@ -187,6 +167,20 @@ multiclass CustomSiFiveVCIX<string suffix, VCIXType type,
InTyRs1, 1>;
}
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class CustomSiFiveVMACC<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVCCustom2<funct6{5-2}, opv.Value, (outs VR:$rd), (ins VR:$rs1, VR:$rs2),
+ opcodestr, "$rd, $rs1, $rs2"> {
+ let vm = 1;
+ let funct6_lo2 = funct6{1-0};
+}
+}
+
+class CustomSiFiveVFNRCLIP<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : VALUVF<funct6, opv, opcodestr> {
+ let Inst{6-0} = OPC_CUSTOM_2.Value;
+}
+
let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
hasSideEffects = 1, hasNoSchedulingInfo = 1, DecoderNamespace = "XSfvcp" in {
defm X : CustomSiFiveVCIX<"x", VCIX_X, uimm5, uimm5, GPR>, Sched<[]>;
@@ -205,6 +199,28 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR, VR, FPR32>, Sched<[]>;
}
+let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvqmaccdod" in {
+ def VQMACCU_2x8x2 : CustomSiFiveVMACC<0b101100, OPMVV, "sf.vqmaccu.2x8x2">;
+ def VQMACC_2x8x2 : CustomSiFiveVMACC<0b101101, OPMVV, "sf.vqmacc.2x8x2">;
+ def VQMACCUS_2x8x2 : CustomSiFiveVMACC<0b101110, OPMVV, "sf.vqmaccus.2x8x2">;
+ def VQMACCSU_2x8x2 : CustomSiFiveVMACC<0b101111, OPMVV, "sf.vqmaccsu.2x8x2">;
+}
+
+let Predicates = [HasVendorXSfvqmaccqoq], DecoderNamespace = "XSfvqmaccqoq" in {
+ def VQMACCU_4x8x4 : CustomSiFiveVMACC<0b111100, OPMVV, "sf.vqmaccu.4x8x4">;
+ def VQMACC_4x8x4 : CustomSiFiveVMACC<0b111101, OPMVV, "sf.vqmacc.4x8x4">;
+ def VQMACCUS_4x8x4 : CustomSiFiveVMACC<0b111110, OPMVV, "sf.vqmaccus.4x8x4">;
+ def VQMACCSU_4x8x4 : CustomSiFiveVMACC<0b111111, OPMVV, "sf.vqmaccsu.4x8x4">;
+}
+
+let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq" in {
+ def VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">;
+}
+
+let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf" in {
+ def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">;
+ def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">;
+}
class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
bit HasSideEffect = 1> :
Pseudo<(outs),
@@ -327,15 +343,41 @@ multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class,
}
}
+multiclass VPseudoSiFiveVMACC<string mx, VReg vd_type, VReg vs2_type,
+ string Constraint = ""> {
+ def "Pseudo" # NAME # "_" # mx
+ : VPseudoTernaryNoMaskWithPolicy<vd_type, V_M1.vrclass, vs2_type, Constraint>;
+}
+
+multiclass VPseudoSiFiveVQMACC<string Constraint = ""> {
+ foreach m = MxListVF8 in
+ defm NAME : VPseudoSiFiveVMACC<m.MX, m.vrclass, m.vrclass, Constraint>;
+}
+
+multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> {
+ foreach m = MxListFW in
+ defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>;
+}
+
+multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> {
+ foreach m = MxListVF4 in
+ let hasSideEffects = 0 in
+ defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<!if(!eq(m.vrclass, VRM8),
+ VRM2, VR),
+ m.vrclass, FPR32, m,
+ Constraint, /*sew*/0,
+ UsesVXRM=0>;
+}
+
let Predicates = [HasVendorXSfvcp] in {
foreach m = MxList in {
defm X : VPseudoVC_X<m, GPR>;
- defm I : VPseudoVC_X<m, timm5>;
+ defm I : VPseudoVC_X<m, tsimm5>;
defm XV : VPseudoVC_XV<m, GPR>;
- defm IV : VPseudoVC_XV<m, timm5>;
+ defm IV : VPseudoVC_XV<m, tsimm5>;
defm VV : VPseudoVC_XV<m, m.vrclass>;
defm XVV : VPseudoVC_XVV<m, GPR>;
- defm IVV : VPseudoVC_XVV<m, timm5>;
+ defm IVV : VPseudoVC_XVV<m, tsimm5>;
defm VVV : VPseudoVC_XVV<m, m.vrclass>;
}
foreach f = FPList in {
@@ -346,7 +388,7 @@ let Predicates = [HasVendorXSfvcp] in {
}
foreach m = MxListW in {
defm XVW : VPseudoVC_XVW<m, GPR>;
- defm IVW : VPseudoVC_XVW<m, timm5>;
+ defm IVW : VPseudoVC_XVW<m, tsimm5>;
defm VVW : VPseudoVC_XVW<m, m.vrclass>;
}
foreach f = FPListW in {
@@ -355,6 +397,29 @@ let Predicates = [HasVendorXSfvcp] in {
}
}
+let Predicates = [HasVendorXSfvqmaccdod] in {
+ defm VQMACCU_2x8x2 : VPseudoSiFiveVQMACC;
+ defm VQMACC_2x8x2 : VPseudoSiFiveVQMACC;
+ defm VQMACCUS_2x8x2 : VPseudoSiFiveVQMACC;
+ defm VQMACCSU_2x8x2 : VPseudoSiFiveVQMACC;
+}
+
+let Predicates = [HasVendorXSfvqmaccqoq] in {
+ defm VQMACCU_4x8x4 : VPseudoSiFiveVQMACC;
+ defm VQMACC_4x8x4 : VPseudoSiFiveVQMACC;
+ defm VQMACCUS_4x8x4 : VPseudoSiFiveVQMACC;
+ defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACC;
+}
+
+let Predicates = [HasVendorXSfvfwmaccqqq] in {
+ defm VFWMACC_4x4x4 : VPseudoSiFiveVFWMACC;
+}
+
+let Predicates = [HasVendorXSfvfnrclipxfqf] in {
+ defm VFNRCLIP_XU_F_QF : VPseudoSiFiveVFNRCLIP;
+ defm VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP;
+}
+
class VPatVC_OP4<string intrinsic_name,
string inst,
ValueType op2_type,
@@ -472,39 +537,126 @@ multiclass VPatVC_XVV<string intrinsic_suffix, string instruction_suffix,
wti.RegClass, vti.RegClass, kind, op1_kind>;
}
+class GetFTypeInfo<int Sew> {
+ ValueType Scalar = !cond(!eq(Sew, 16) : f16,
+ !eq(Sew, 32) : f32,
+ !eq(Sew, 64) : f64);
+ RegisterClass ScalarRegClass = !cond(!eq(Sew, 16) : FPR16,
+ !eq(Sew, 32) : FPR32,
+ !eq(Sew, 64) : FPR64);
+
+ string ScalarSuffix = !cond(!eq(Scalar, f16) : "FPR16",
+ !eq(Scalar, f32) : "FPR32",
+ !eq(Scalar, f64) : "FPR64");
+}
+
+multiclass VPatVMACC<string intrinsic, string instruction, string kind,
+ list<VTypeInfoToWide> info_pairs, ValueType vec_m1> {
+ foreach pair = info_pairs in {
+ defvar VdInfo = pair.Wti;
+ defvar Vs2Info = pair.Vti;
+ let Predicates = [HasVInstructions] in
+ def : VPatTernaryNoMaskWithPolicy<"int_riscv_sf_" # intrinsic,
+ "Pseudo" # instruction, kind, VdInfo.Vector,
+ vec_m1, Vs2Info.Vector,
+ Vs2Info.Log2SEW, Vs2Info.LMul,
+ VdInfo.RegClass, VR, Vs2Info.RegClass>;
+ }
+}
+
+defset list<VTypeInfoToWide> VQMACCInfoPairs = {
+ def : VTypeInfoToWide<VI8M1, VI32M1>;
+ def : VTypeInfoToWide<VI8M2, VI32M2>;
+ def : VTypeInfoToWide<VI8M4, VI32M4>;
+ def : VTypeInfoToWide<VI8M8, VI32M8>;
+}
+
+multiclass VPatVQMACC<string intrinsic, string instruction, string kind>
+ : VPatVMACC<intrinsic, instruction, kind, VQMACCInfoPairs, vint8m1_t>;
+
+
+multiclass VPatVFWMACC<string intrinsic, string instruction, string kind>
+ : VPatVMACC<intrinsic, instruction, kind, AllWidenableBFloatToFloatVectors,
+ vbfloat16m1_t>;
+
+defset list<VTypeInfoToWide> VFNRCLIPInfoPairs = {
+ def : VTypeInfoToWide<VI8MF8, VF32MF2>;
+ def : VTypeInfoToWide<VI8MF4, VF32M1>;
+ def : VTypeInfoToWide<VI8MF2, VF32M2>;
+ def : VTypeInfoToWide<VI8M1, VF32M4>;
+ def : VTypeInfoToWide<VI8M2, VF32M8>;
+}
+
+multiclass VPatVFNRCLIP<string intrinsic, string instruction> {
+ foreach pair = VFNRCLIPInfoPairs in {
+ defvar Vti = pair.Vti;
+ defvar Wti = pair.Wti;
+ defm : VPatBinaryRoundingMode<"int_riscv_sf_" # intrinsic,
+ "Pseudo" # instruction # "_" # Wti.LMul.MX,
+ Vti.Vector, Wti.Vector, Wti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Wti.ScalarRegClass>;
+ }
+}
+
let Predicates = [HasVendorXSfvcp] in {
foreach vti = AllIntegerVectors in {
- defm : VPatVC_X<"x", "X", vti, vti.Scalar, vti.ScalarRegClass>;
- defm : VPatVC_X<"i", "I", vti, XLenVT, timm5>;
- defm : VPatVC_XV<"xv", "XV", vti, vti.Scalar, vti.ScalarRegClass>;
- defm : VPatVC_XV<"iv", "IV", vti, XLenVT, timm5>;
+ defm : VPatVC_X<"x", "X", vti, XLenVT, GPR>;
+ defm : VPatVC_X<"i", "I", vti, XLenVT, tsimm5>;
+ defm : VPatVC_XV<"xv", "XV", vti, XLenVT, GPR>;
+ defm : VPatVC_XV<"iv", "IV", vti, XLenVT, tsimm5>;
defm : VPatVC_XV<"vv", "VV", vti, vti.Vector, vti.RegClass>;
- defm : VPatVC_XVV<"xvv", "XVV", vti, vti, vti.Scalar, vti.ScalarRegClass>;
- defm : VPatVC_XVV<"ivv", "IVV", vti, vti, XLenVT, timm5>;
+ defm : VPatVC_XVV<"xvv", "XVV", vti, vti, XLenVT, GPR>;
+ defm : VPatVC_XVV<"ivv", "IVV", vti, vti, XLenVT, tsimm5>;
defm : VPatVC_XVV<"vvv", "VVV", vti, vti, vti.Vector, vti.RegClass>;
- if !ge(vti.SEW, 16) then {
- defm : VPatVC_XV<"fv", "F" # vti.SEW # "V", vti,
- GetFTypeInfo<vti.SEW>.Scalar,
- GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>;
- defm : VPatVC_XVV<"fvv", "F" # vti.SEW # "VV", vti, vti,
- GetFTypeInfo<vti.SEW>.Scalar,
- GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>;
+
+ if !ne(vti.SEW, 8) then {
+ defvar finfo = GetFTypeInfo<vti.SEW>;
+ defm : VPatVC_XV<"fv", finfo.ScalarSuffix # "V", vti, finfo.Scalar,
+ finfo.ScalarRegClass, payload1>;
+ defm : VPatVC_XVV<"fvv", finfo.ScalarSuffix # "VV", vti, vti, finfo.Scalar,
+ finfo.ScalarRegClass, payload1>;
}
}
foreach VtiToWti = AllWidenableIntVectors in {
defvar vti = VtiToWti.Vti;
defvar wti = VtiToWti.Wti;
- defm : VPatVC_XVV<"xvw", "XVW", wti, vti, vti.Scalar, vti.ScalarRegClass>;
- defm : VPatVC_XVV<"ivw", "IVW", wti, vti, XLenVT, timm5>;
+ defvar iinfo = GetIntVTypeInfo<vti>.Vti;
+ defm : VPatVC_XVV<"xvw", "XVW", wti, vti, iinfo.Scalar, iinfo.ScalarRegClass>;
+ defm : VPatVC_XVV<"ivw", "IVW", wti, vti, XLenVT, tsimm5>;
defm : VPatVC_XVV<"vvw", "VVW", wti, vti, vti.Vector, vti.RegClass>;
- if !ge(vti.SEW, 16) then {
- defm : VPatVC_XVV<"fvw", "F" # vti.SEW # "VW", wti, vti,
- GetFTypeInfo<vti.SEW>.Scalar,
- GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>;
+
+ if !ne(vti.SEW, 8) then {
+ defvar finfo = GetFTypeInfo<vti.SEW>;
+ defm : VPatVC_XVV<"fvw", finfo.ScalarSuffix # "VW", wti, vti, finfo.Scalar,
+ finfo.ScalarRegClass, payload1>;
}
}
}
+let Predicates = [HasVendorXSfvqmaccdod] in {
+ defm : VPatVQMACC<"vqmaccu_2x8x2", "VQMACCU", "2x8x2">;
+ defm : VPatVQMACC<"vqmacc_2x8x2", "VQMACC", "2x8x2">;
+ defm : VPatVQMACC<"vqmaccus_2x8x2", "VQMACCUS", "2x8x2">;
+ defm : VPatVQMACC<"vqmaccsu_2x8x2", "VQMACCSU", "2x8x2">;
+}
+
+let Predicates = [HasVendorXSfvqmaccqoq] in {
+ defm : VPatVQMACC<"vqmaccu_4x8x4", "VQMACCU", "4x8x4">;
+ defm : VPatVQMACC<"vqmacc_4x8x4", "VQMACC", "4x8x4">;
+ defm : VPatVQMACC<"vqmaccus_4x8x4", "VQMACCUS", "4x8x4">;
+ defm : VPatVQMACC<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">;
+}
+
+let Predicates = [HasVendorXSfvfwmaccqqq] in {
+ defm : VPatVFWMACC<"vfwmacc_4x4x4", "VFWMACC", "4x4x4">;
+}
+
+let Predicates = [HasVendorXSfvfnrclipxfqf] in {
+ defm : VPatVFNRCLIP<"vfnrclip_xu_f_qf", "VFNRCLIP_XU_F_QF">;
+ defm : VPatVFNRCLIP<"vfnrclip_x_f_qf", "VFNRCLIP_X_F_QF">;
+}
+
let Predicates = [HasVendorXSfcie] in {
let hasSideEffects = 1, mayLoad = 0, mayStore = 0, DecoderNamespace = "XSfcie" in {
def SF_CFLUSH_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cflush.d.l1","$rs1">,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index e840dfddd8d9..1d44b1ad2636 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -14,31 +14,36 @@
// T-HEAD specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDT_LoadPair : SDTypeProfile<2, 2,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
-def SDT_StorePair : SDTypeProfile<0, 4,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
+def SDT_LoadPair : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 3>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, XLenVT>]>;
+def SDT_StorePair : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 3>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, XLenVT>]>;
def th_lwud : SDNode<"RISCVISD::TH_LWUD", SDT_LoadPair,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def th_lwd : SDNode<"RISCVISD::TH_LWD", SDT_LoadPair,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def th_ldd : SDNode<"RISCVISD::TH_LDD", SDT_LoadPair,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def th_swd : SDNode<"RISCVISD::TH_SWD", SDT_StorePair,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def th_sdd : SDNode<"RISCVISD::TH_SDD", SDT_StorePair,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
+
class THInstVdotVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
string opcodestr, string argstr>
: RVInstVV<funct6, opv, outs, ins, opcodestr, argstr> {
let Inst{26} = 0;
let Inst{6-0} = OPC_CUSTOM_0.Value;
- let DecoderNamespace = "THeadV";
+ let DecoderNamespace = "XTHeadVdot";
}
class THInstVdotVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
@@ -46,45 +51,53 @@ class THInstVdotVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
: RVInstVX<funct6, opv, outs, ins, opcodestr, argstr> {
let Inst{26} = 1;
let Inst{6-0} = OPC_CUSTOM_0.Value;
- let DecoderNamespace = "THeadV";
+ let DecoderNamespace = "XTHeadVdot";
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// op vd, vs1, vs2, vm (reverse the order of vs1 and vs2)
-class THVdotALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : THInstVdotVV<funct6, opv, (outs VR:$vd),
- (ins VR:$vs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $vs1, $vs2$vm">;
+class THVdotALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber>
+ : THInstVdotVV<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $vs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
// op vd, rs1, vs2, vm (reverse the order of rs1 and vs2)
-class THVdotALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : THInstVdotVX<funct6, opv, (outs VR:$vd),
- (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $rs1, $vs2$vm">;
+class THVdotALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber>
+ : THInstVdotVX<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $rs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
-let Predicates = [HasVendorXTHeadBa], DecoderNamespace = "THeadBa",
- hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let Predicates = [HasVendorXTHeadBa], DecoderNamespace = "XTHeadBa",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class THShiftALU_rri<bits<3> funct3, string opcodestr>
- : RVInstR<0, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
- opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
+ : RVInstRBase<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
+ opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
bits<2> uimm2;
let Inst{31-27} = 0;
let Inst{26-25} = uimm2;
}
-let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "THeadBb",
- hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "XTHeadBb",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
class THShift_ri<bits<5> funct5, bits<3> funct3, string opcodestr>
: RVInstIShift<funct5, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
(ins GPR:$rs1, uimmlog2xlen:$shamt),
opcodestr, "$rd, $rs1, $shamt">;
class THBitfieldExtract_rii<bits<3> funct3, string opcodestr>
- : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
- (ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb),
- opcodestr, "$rd, $rs1, $msb, $lsb"> {
+ : RVInstIBase<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb),
+ opcodestr, "$rd, $rs1, $msb, $lsb"> {
bits<6> msb;
bits<6> lsb;
let Inst{31-26} = msb;
@@ -92,21 +105,18 @@ class THBitfieldExtract_rii<bits<3> funct3, string opcodestr>
}
class THRev_r<bits<5> funct5, bits<2> funct2, string opcodestr>
- : RVInstR4<funct2, 0b001, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1),
- opcodestr, "$rd, $rs1"> {
- let rs3 = funct5;
- let rs2 = 0;
-}
+ : RVInstIUnary<{funct5, funct2, 0b00000}, 0b001, OPC_CUSTOM_0,
+ (outs GPR:$rd), (ins GPR:$rs1), opcodestr, "$rd, $rs1">;
}
-let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "THeadBb",
- hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "XTHeadBb",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class THShiftW_ri<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstIShiftW<funct7, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
(ins GPR:$rs1, uimm5:$shamt),
opcodestr, "$rd, $rs1, $shamt">;
-let Predicates = [HasVendorXTHeadCondMov], DecoderNamespace = "THeadCondMov",
+let Predicates = [HasVendorXTHeadCondMov], DecoderNamespace = "XTHeadCondMov",
hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
class THCondMov_rr<bits<7> funct7, string opcodestr>
: RVInstR<funct7, 0b001, OPC_CUSTOM_0, (outs GPR:$rd_wb),
@@ -115,7 +125,7 @@ class THCondMov_rr<bits<7> funct7, string opcodestr>
let Constraints = "$rd_wb = $rd";
}
-let Predicates = [HasVendorXTHeadMac], DecoderNamespace = "THeadMac",
+let Predicates = [HasVendorXTHeadMac], DecoderNamespace = "XTHeadMac",
hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
class THMulAccumulate_rr<bits<7> funct7, string opcodestr>
: RVInstR<funct7, 0b001, OPC_CUSTOM_0, (outs GPR:$rd_wb),
@@ -124,27 +134,28 @@ class THMulAccumulate_rr<bits<7> funct7, string opcodestr>
let Constraints = "$rd_wb = $rd";
}
-let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
- hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "XTHeadMemPair",
+ hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
class THLoadPair<bits<5> funct5, string opcodestr>
- : RVInstR<!shl(funct5, 2), 0b100, OPC_CUSTOM_0,
- (outs GPR:$rd, GPR:$rs2),
- (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
- opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+ : RVInstRBase<0b100, OPC_CUSTOM_0,
+ (outs GPR:$rd, GPR:$rs2),
+ (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+ opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
bits<2> uimm2;
+ let Inst{31-27} = funct5;
let Inst{26-25} = uimm2;
let DecoderMethod = "decodeXTHeadMemPair";
let Constraints = "@earlyclobber $rd,@earlyclobber $rs2";
}
-let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
- hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "XTHeadMemPair",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class THStorePair<bits<5> funct5, string opcodestr>
- : RVInstR<!shl(funct5, 2), 0b101, OPC_CUSTOM_0,
- (outs),
- (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
- opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+ : RVInstRBase<0b101, OPC_CUSTOM_0, (outs),
+ (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+ opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
bits<2> uimm2;
+ let Inst{31-27} = funct5;
let Inst{26-25} = uimm2;
let DecoderMethod = "decodeXTHeadMemPair";
}
@@ -174,44 +185,46 @@ class THCacheInst_void<bits<5> funct5, string opcodestr>
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
class THLoadIndexed<RegisterClass Ty, bits<5> funct5, string opcodestr>
- : RVInstR<!shl(funct5, 2), !if(!eq(Ty, GPR), 0b100, 0b110), OPC_CUSTOM_0,
- (outs Ty:$rd), (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
- opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
+ : RVInstRBase<!if(!eq(Ty, GPR), 0b100, 0b110), OPC_CUSTOM_0,
+ (outs Ty:$rd), (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
+ opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
bits<2> uimm2;
+ let Inst{31-27} = funct5;
let Inst{26-25} = uimm2;
}
class THLoadUpdate<bits<5> funct5, string opcodestr>
- : RVInstI<0b100, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb),
- (ins GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
- opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
+ : RVInstIBase<0b100, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb),
+ (ins GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
+ opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
bits<5> simm5;
bits<2> uimm2;
- let imm12{11-7} = funct5;
- let imm12{6-5} = uimm2;
- let imm12{4-0} = simm5;
+ let Inst{31-27} = funct5;
+ let Inst{26-25} = uimm2;
+ let Inst{24-20} = simm5;
let Constraints = "@earlyclobber $rd, $rs1_wb = $rs1";
}
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
class THStoreIndexed<RegisterClass StTy, bits<5> funct5, string opcodestr>
- : RVInstR<!shl(funct5, 2), !if(!eq(StTy, GPR), 0b101, 0b111), OPC_CUSTOM_0,
- (outs), (ins StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
- opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
+ : RVInstRBase<!if(!eq(StTy, GPR), 0b101, 0b111), OPC_CUSTOM_0,
+ (outs), (ins StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
+ opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
bits<2> uimm2;
+ let Inst{31-27} = funct5;
let Inst{26-25} = uimm2;
}
class THStoreUpdate<bits<5> funct5, string opcodestr>
- : RVInstI<0b101, OPC_CUSTOM_0, (outs GPR:$rs1_up),
- (ins GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
- opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
+ : RVInstIBase<0b101, OPC_CUSTOM_0, (outs GPR:$rs1_up),
+ (ins GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
+ opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
bits<5> simm5;
bits<2> uimm2;
- let imm12{11-7} = funct5;
- let imm12{6-5} = uimm2;
- let imm12{4-0} = simm5;
+ let Inst{31-27} = funct5;
+ let Inst{26-25} = uimm2;
+ let Inst{24-20} = simm5;
let Constraints = "$rs1_up = $rs1";
}
}
@@ -220,22 +233,25 @@ class THStoreUpdate<bits<5> funct5, string opcodestr>
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
+
multiclass THVdotVMAQA_VX<string opcodestr, bits<6> funct6> {
- def _VX : THVdotALUrVX<funct6, OPMVX, opcodestr # ".vx">;
+ let RVVConstraint = WidenV in
+ def _VX : THVdotALUrVX<funct6, OPMVX, opcodestr # ".vx", EarlyClobber=1>;
}
-multiclass THVdotVMAQA<string opcodestr, bits<6> funct6> {
- def _VV : THVdotALUrVV<funct6, OPMVX, opcodestr # ".vv">;
- defm "" : THVdotVMAQA_VX<opcodestr, funct6>;
+multiclass THVdotVMAQA<string opcodestr, bits<6> funct6>
+ : THVdotVMAQA_VX<opcodestr, funct6> {
+ let RVVConstraint = WidenV in
+ def _VV : THVdotALUrVV<funct6, OPMVX, opcodestr # ".vv", EarlyClobber=1>;
}
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVendorXTHeadBa] in {
+
+let Predicates = [HasVendorXTHeadBa] in
def TH_ADDSL : THShiftALU_rri<0b001, "th.addsl">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
-} // Predicates = [HasVendorXTHeadBa]
let Predicates = [HasVendorXTHeadBb] in {
def TH_SRRI : THShift_ri<0b00010, 0b001, "th.srri">;
@@ -252,20 +268,19 @@ def TH_SRRIW : THShiftW_ri<0b0001010, 0b001, "th.srriw">;
def TH_REVW : THRev_r<0b10010, 0b00, "th.revw">;
} // Predicates = [HasVendorXTHeadBb, IsRV64]
-let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "THeadBs" in {
-let IsSignExtendingOpW = 1 in
+let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "XTHeadBs",
+ IsSignExtendingOpW = 1 in
def TH_TST : RVBShift_ri<0b10001, 0b001, OPC_CUSTOM_0, "th.tst">,
Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
-} // Predicates = [HasVendorXTHeadBs]
let Predicates = [HasVendorXTHeadCondMov] in {
-def TH_MVEQZ : THCondMov_rr<0b0100000, "th.mveqz">;
-def TH_MVNEZ : THCondMov_rr<0b0100001, "th.mvnez">;
+def TH_MVEQZ : THCondMov_rr<0b0100000, "th.mveqz">;
+def TH_MVNEZ : THCondMov_rr<0b0100001, "th.mvnez">;
} // Predicates = [HasVendorXTHeadCondMov]
let Predicates = [HasVendorXTHeadMac] in {
-def TH_MULA : THMulAccumulate_rr<0b0010000, "th.mula">;
-def TH_MULS : THMulAccumulate_rr<0b0010001, "th.muls">;
+def TH_MULA : THMulAccumulate_rr<0b0010000, "th.mula">;
+def TH_MULS : THMulAccumulate_rr<0b0010001, "th.muls">;
} // Predicates = [HasVendorXTHeadMac]
let Predicates = [HasVendorXTHeadMac], IsSignExtendingOpW = 1 in {
@@ -289,130 +304,130 @@ def TH_LWD : THLoadPair<0b11100, "th.lwd">,
}
let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
-def TH_LDD : THLoadPair<0b11111, "th.ldd">,
- Sched<[WriteLDD, WriteLDD, ReadMemBase]>;
-def TH_SDD : THStorePair<0b11111, "th.sdd">,
- Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>;
+def TH_LDD : THLoadPair<0b11111, "th.ldd">,
+ Sched<[WriteLDD, WriteLDD, ReadMemBase]>;
+def TH_SDD : THStorePair<0b11111, "th.sdd">,
+ Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>;
}
-let Predicates = [HasVendorXTHeadMemIdx], DecoderNamespace = "THeadMemIdx" in {
+let Predicates = [HasVendorXTHeadMemIdx], DecoderNamespace = "XTHeadMemIdx" in {
// T-Head Load/Store + Update instructions.
def TH_LBIA : THLoadUpdate<0b00011, "th.lbia">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LBIB : THLoadUpdate<0b00001, "th.lbib">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LBUIA : THLoadUpdate<0b10011, "th.lbuia">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LBUIB : THLoadUpdate<0b10001, "th.lbuib">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LHIA : THLoadUpdate<0b00111, "th.lhia">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LHIB : THLoadUpdate<0b00101, "th.lhib">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LHUIA : THLoadUpdate<0b10111, "th.lhuia">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LHUIB : THLoadUpdate<0b10101, "th.lhuib">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LWIA : THLoadUpdate<0b01011, "th.lwia">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LWIB : THLoadUpdate<0b01001, "th.lwib">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_SBIA : THStoreUpdate<0b00011, "th.sbia">,
- Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
def TH_SBIB : THStoreUpdate<0b00001, "th.sbib">,
- Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
def TH_SHIA : THStoreUpdate<0b00111, "th.shia">,
- Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
def TH_SHIB : THStoreUpdate<0b00101, "th.shib">,
- Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
def TH_SWIA : THStoreUpdate<0b01011, "th.swia">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
def TH_SWIB : THStoreUpdate<0b01001, "th.swib">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
// T-Head Load/Store Indexed instructions.
def TH_LRB : THLoadIndexed<GPR, 0b00000, "th.lrb">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LRBU : THLoadIndexed<GPR, 0b10000, "th.lrbu">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LURB : THLoadIndexed<GPR, 0b00010, "th.lurb">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LURBU : THLoadIndexed<GPR, 0b10010, "th.lurbu">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LRH : THLoadIndexed<GPR, 0b00100, "th.lrh">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LRHU : THLoadIndexed<GPR, 0b10100, "th.lrhu">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LURH : THLoadIndexed<GPR, 0b00110, "th.lurh">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LURHU : THLoadIndexed<GPR, 0b10110, "th.lurhu">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LRW : THLoadIndexed<GPR, 0b01000, "th.lrw">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LURW : THLoadIndexed<GPR, 0b01010, "th.lurw">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_SRB : THStoreIndexed<GPR, 0b00000, "th.srb">,
- Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
def TH_SURB : THStoreIndexed<GPR, 0b00010, "th.surb">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_SRH : THStoreIndexed<GPR, 0b00100, "th.srh">,
- Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
def TH_SURH : THStoreIndexed<GPR, 0b00110, "th.surh">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_SRW : THStoreIndexed<GPR, 0b01000, "th.srw">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
def TH_SURW : THStoreIndexed<GPR, 0b01010, "th.surw">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
}
-let Predicates = [HasVendorXTHeadMemIdx, IsRV64], DecoderNamespace = "THeadMemIdx" in {
+let Predicates = [HasVendorXTHeadMemIdx, IsRV64], DecoderNamespace = "XTHeadMemIdx" in {
// T-Head Load/Store + Update instructions.
def TH_LWUIA : THLoadUpdate<0b11011, "th.lwuia">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LWUIB : THLoadUpdate<0b11001, "th.lwuib">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LDIA : THLoadUpdate<0b01111, "th.ldia">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LDIB : THLoadUpdate<0b01101, "th.ldib">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_SDIA : THStoreUpdate<0b01111, "th.sdia">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
def TH_SDIB : THStoreUpdate<0b01101, "th.sdib">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
// T-Head Load/Store Indexed instructions.
def TH_LRWU : THLoadIndexed<GPR, 0b11000, "th.lrwu">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LURWU : THLoadIndexed<GPR, 0b11010, "th.lurwu">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LRD : THLoadIndexed<GPR, 0b01100, "th.lrd">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LURD : THLoadIndexed<GPR, 0b01110, "th.lurd">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_SRD : THStoreIndexed<GPR, 0b01100, "th.srd">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
def TH_SURD : THStoreIndexed<GPR, 0b01110, "th.surd">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
}
// T-Head Load/Store Indexed instructions for floating point registers.
let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF],
- DecoderNamespace = "THeadFMemIdx" in {
+ DecoderNamespace = "XTHeadFMemIdx" in {
def TH_FLRW : THLoadIndexed<FPR32, 0b01000, "th.flrw">,
Sched<[WriteFLD32, ReadFMemBase]>;
def TH_FSRW : THStoreIndexed<FPR32, 0b01000, "th.fsrw">,
@@ -420,7 +435,7 @@ def TH_FSRW : THStoreIndexed<FPR32, 0b01000, "th.fsrw">,
}
let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD],
- DecoderNamespace = "THeadFMemIdx" in {
+ DecoderNamespace = "XTHeadFMemIdx" in {
def TH_FLRD : THLoadIndexed<FPR64, 0b01100, "th.flrd">,
Sched<[WriteFLD64, ReadFMemBase]>;
def TH_FSRD : THStoreIndexed<FPR64, 0b01100, "th.fsrd">,
@@ -428,7 +443,7 @@ def TH_FSRD : THStoreIndexed<FPR64, 0b01100, "th.fsrd">,
}
let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF, IsRV64],
- DecoderNamespace = "THeadFMemIdx" in {
+ DecoderNamespace = "XTHeadFMemIdx" in {
def TH_FLURW : THLoadIndexed<FPR32, 0b01010, "th.flurw">,
Sched<[WriteFLD32, ReadFMemBase]>;
def TH_FSURW : THStoreIndexed<FPR32, 0b01010, "th.fsurw">,
@@ -436,16 +451,14 @@ def TH_FSURW : THStoreIndexed<FPR32, 0b01010, "th.fsurw">,
}
let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD, IsRV64],
- DecoderNamespace = "THeadFMemIdx" in {
+ DecoderNamespace = "XTHeadFMemIdx" in {
def TH_FLURD : THLoadIndexed<FPR64, 0b01110, "th.flurd">,
Sched<[WriteFLD64, ReadFMemBase]>;
def TH_FSURD : THStoreIndexed<FPR64, 0b01110, "th.fsurd">,
Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]>;
}
-let Predicates = [HasVendorXTHeadVdot],
- Constraints = "@earlyclobber $vd",
- RVVConstraint = WidenV in {
+let Predicates = [HasVendorXTHeadVdot] in {
defm THVdotVMAQA : THVdotVMAQA<"th.vmaqa", 0b100000>;
defm THVdotVMAQAU : THVdotVMAQA<"th.vmaqau", 0b100010>;
defm THVdotVMAQASU : THVdotVMAQA<"th.vmaqasu", 0b100100>;
@@ -472,8 +485,10 @@ defset list<VTypeInfoToWide> AllQuadWidenableInt8NoVLMulVectors = {
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
+
multiclass VPseudoVMAQA_VV_VX {
foreach m = MxListTHVdot in {
+ // TODO: Add Sched
defm "" : VPseudoTernaryW_VV<m>;
defm "" : VPseudoTernaryW_VX<m>;
}
@@ -481,6 +496,7 @@ multiclass VPseudoVMAQA_VV_VX {
multiclass VPseudoVMAQA_VX {
foreach m = MxListTHVdot in {
+ // TODO: Add Sched
defm "" : VPseudoTernaryW_VX<m>;
}
}
@@ -518,16 +534,17 @@ multiclass VPatTernaryVMAQA_VV_VX<string intrinsic, string instruction,
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
+
let Predicates = [HasVendorXTHeadBa] in {
def : Pat<(add (XLenVT GPR:$rs1), (shl GPR:$rs2, uimm2:$uimm2)),
(TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
// Reuse complex patterns from StdExtZba
-def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2),
+def : Pat<(add_non_imm12 sh1add_op:$rs1, (XLenVT GPR:$rs2)),
(TH_ADDSL GPR:$rs2, sh1add_op:$rs1, 1)>;
-def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
+def : Pat<(add_non_imm12 sh2add_op:$rs1, (XLenVT GPR:$rs2)),
(TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>;
-def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
+def : Pat<(add_non_imm12 sh3add_op:$rs1, (XLenVT GPR:$rs2)),
(TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
@@ -687,10 +704,14 @@ defm PseudoTHVdotVMAQASU : VPseudoVMAQA_VV_VX;
defm PseudoTHVdotVMAQAUS : VPseudoVMAQA_VX;
let Predicates = [HasVendorXTHeadVdot] in {
-defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqa", "PseudoTHVdotVMAQA", AllQuadWidenableInt8NoVLMulVectors>;
-defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqau", "PseudoTHVdotVMAQAU", AllQuadWidenableInt8NoVLMulVectors>;
-defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",AllQuadWidenableInt8NoVLMulVectors>;
-defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS",AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqa", "PseudoTHVdotVMAQA",
+ AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqau", "PseudoTHVdotVMAQAU",
+ AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",
+ AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS",
+ AllQuadWidenableInt8NoVLMulVectors>;
}
def uimm2_3_XFORM : SDNodeXForm<imm, [{
@@ -725,7 +746,7 @@ let Predicates = [HasVendorXTHeadMemPair] in {
(TH_SWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
}
-let Predicates = [HasVendorXTHeadCmo], DecoderNamespace = "THeadCmo" in {
+let Predicates = [HasVendorXTHeadCmo], DecoderNamespace = "XTHeadCmo" in {
def TH_DCACHE_CSW : THCacheInst_r<0b00001, "th.dcache.csw">;
def TH_DCACHE_ISW : THCacheInst_r<0b00010, "th.dcache.isw">;
def TH_DCACHE_CISW : THCacheInst_r<0b00011, "th.dcache.cisw">;
@@ -750,7 +771,7 @@ def TH_L2CACHE_IALL : THCacheInst_void<0b10110, "th.l2cache.iall">;
def TH_L2CACHE_CIALL : THCacheInst_void<0b10111, "th.l2cache.ciall">;
}
-let Predicates = [HasVendorXTHeadSync], DecoderNamespace = "THeadSync" in {
+let Predicates = [HasVendorXTHeadSync], DecoderNamespace = "XTHeadSync" in {
def TH_SFENCE_VMAS : THCacheInst_rr<0b0000010, "th.sfence.vmas">;
def TH_SYNC : THCacheInst_void<0b11000, "th.sync">;
def TH_SYNC_S : THCacheInst_void<0b11001, "th.sync.s">;
@@ -865,9 +886,7 @@ defm : StoreUpdatePat<post_truncsti8, TH_SBIA>;
defm : StoreUpdatePat<pre_truncsti8, TH_SBIB>;
defm : StoreUpdatePat<post_truncsti16, TH_SHIA>;
defm : StoreUpdatePat<pre_truncsti16, TH_SHIB>;
-}
-let Predicates = [HasVendorXTHeadMemIdx, IsRV32] in {
defm : StoreUpdatePat<post_store, TH_SWIA, i32>;
defm : StoreUpdatePat<pre_store, TH_SWIB, i32>;
}
@@ -878,3 +897,15 @@ defm : StoreUpdatePat<pre_truncsti32, TH_SWIB, i64>;
defm : StoreUpdatePat<post_store, TH_SDIA, i64>;
defm : StoreUpdatePat<pre_store, TH_SDIB, i64>;
}
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in {
+defm : StoreUpdatePat<post_truncsti8, TH_SBIA, i32>;
+defm : StoreUpdatePat<pre_truncsti8, TH_SBIB, i32>;
+defm : StoreUpdatePat<post_truncsti16, TH_SHIA, i32>;
+defm : StoreUpdatePat<pre_truncsti16, TH_SHIB, i32>;
+}
+
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
index f6b0feaf7628..d0a798ef475c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
@@ -15,7 +15,7 @@
//===----------------------------------------------------------------------===//
let Predicates = [IsRV64, HasVendorXVentanaCondOps], hasSideEffects = 0,
- mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, DecoderNamespace = "Ventana" in
+ mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, DecoderNamespace = "XVentana" in
class VTMaskedMove<bits<3> funct3, string opcodestr>
: RVInstR<0b0000000, funct3, OPC_CUSTOM_3, (outs GPR:$rd),
(ins GPR:$rs1, GPR:$rs2), opcodestr,
@@ -29,8 +29,17 @@ def VT_MASKCN : VTMaskedMove<0b111, "vt.maskcn">,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
let Predicates = [IsRV64, HasVendorXVentanaCondOps] in {
-def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, GPR:$rc)),
+def : Pat<(i64 (riscv_czero_eqz GPR:$rs1, GPR:$rc)),
(VT_MASKC GPR:$rs1, GPR:$rc)>;
-def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, GPR:$rc)),
+def : Pat<(i64 (riscv_czero_nez GPR:$rs1, GPR:$rc)),
(VT_MASKCN GPR:$rs1, GPR:$rc)>;
+
+def : Pat<(i64 (riscv_czero_eqz GPR:$rs1, (riscv_setne (i64 GPR:$rc)))),
+ (VT_MASKC GPR:$rs1, GPR:$rc)>;
+def : Pat<(i64 (riscv_czero_eqz GPR:$rs1, (riscv_seteq (i64 GPR:$rc)))),
+ (VT_MASKCN GPR:$rs1, GPR:$rc)>;
+def : Pat<(i64 (riscv_czero_nez GPR:$rs1, (riscv_setne (i64 GPR:$rc)))),
+ (VT_MASKCN GPR:$rs1, GPR:$rc)>;
+def : Pat<(i64 (riscv_czero_nez GPR:$rs1, (riscv_seteq (i64 GPR:$rc)))),
+ (VT_MASKC GPR:$rs1, GPR:$rc)>;
} // Predicates = [IsRV64, HasVendorXVentanaCondOps]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index caeedfa652e4..8055473a37c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -13,9 +13,6 @@
// Zbc - 1.0
// Zbs - 1.0
//
-// The experimental extensions appeared in an earlier draft of the Bitmanip
-// extensions. They are not ratified and subject to change.
-//
// This file also describes RISC-V instructions from the Zbk* extensions in
// Cryptography Extensions Volume I: Scalar & Entropy Source Instructions,
// versions:
@@ -48,7 +45,7 @@ def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
let DiagnosticType = "InvalidUImmLog2XLenHalf";
}
-def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+def shfl_uimm : RISCVOp, ImmLeaf<XLenVT, [{
if (Subtarget->is64Bit())
return isUInt<5>(Imm);
return isUInt<4>(Imm);
@@ -56,7 +53,6 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
let ParserMatchClass = UImmLog2XLenHalfAsmOperand;
let DecoderMethod = "decodeUImmOperand<5>";
let OperandType = "OPERAND_UIMM_SHFL";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -183,7 +179,7 @@ def C5LeftShift : PatLeaf<(imm), [{
def C9LeftShift : PatLeaf<(imm), [{
uint64_t C = N->getZExtValue();
- return C > 5 && (C >> llvm::countr_zero(C)) == 9;
+ return C > 9 && (C >> llvm::countr_zero(C)) == 9;
}]>;
// Constant of the form (3 << C) where C is less than 32.
@@ -234,18 +230,38 @@ def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
-// Pattern to exclude simm12 immediates from matching.
-def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{
- auto *C = dyn_cast<ConstantSDNode>(N);
+// Pattern to exclude simm12 immediates from matching, namely `non_imm12`.
+// GISel currently doesn't support PatFrag for leaf nodes, so `non_imm12`
+// cannot be implemented in that way. To reuse patterns between the two
+// ISels, we instead create PatFrag on operators that use `non_imm12`.
+class binop_with_non_imm12<SDPatternOperator binop>
+ : PatFrag<(ops node:$x, node:$y), (binop node:$x, node:$y), [{
+ auto *C = dyn_cast<ConstantSDNode>(Operands[1]);
return !C || !isInt<12>(C->getSExtValue());
-}]>;
+}]> {
+ let PredicateCodeUsesOperands = 1;
+ let GISelPredicateCode = [{
+ const MachineOperand &ImmOp = *Operands[1];
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (ImmOp.isReg() && ImmOp.getReg())
+ if (auto Val = getIConstantVRegValWithLookThrough(ImmOp.getReg(), MRI)) {
+ // We do NOT want immediates that fit in 12 bits.
+ return !isInt<12>(Val->Value.getSExtValue());
+ }
+
+ return true;
+ }];
+}
+def add_non_imm12 : binop_with_non_imm12<add>;
+def or_is_add_non_imm12 : binop_with_non_imm12<or_is_add>;
-def Shifted32OnesMask : PatLeaf<(imm), [{
- uint64_t Imm = N->getZExtValue();
- if (!isShiftedMask_64(Imm))
+def Shifted32OnesMask : IntImmLeaf<XLenVT, [{
+ if (!Imm.isShiftedMask())
return false;
- unsigned TrailingZeros = llvm::countr_zero(Imm);
+ unsigned TrailingZeros = Imm.countr_zero();
return TrailingZeros > 0 && TrailingZeros < 32 &&
Imm == UINT64_C(0xFFFFFFFF) << TrailingZeros;
}], TrailingZeros>;
@@ -262,16 +278,11 @@ def sh3add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<3>", [], [], 6>;
// Instruction class templates
//===----------------------------------------------------------------------===//
-// Some of these templates should be moved to RISCVInstrFormats.td once the B
-// extension has been ratified.
-
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVBUnary<bits<7> funct7, bits<5> funct5, bits<3> funct3,
+class RVBUnary<bits<12> imm12, bits<3> funct3,
RISCVOpcode opcode, string opcodestr>
- : RVInstR<funct7, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1),
- opcodestr, "$rd, $rs1"> {
- let rs2 = funct5;
-}
+ : RVInstIUnary<imm12, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVBShift_ri<bits<5> imm11_7, bits<3> funct3, RISCVOpcode opcode,
@@ -375,27 +386,27 @@ def XPERM8 : ALU_rr<0b0010100, 0b100, "xperm8">,
} // Predicates = [HasStdExtZbkx]
let Predicates = [HasStdExtZbb], IsSignExtendingOpW = 1 in {
-def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM, "clz">,
+def CLZ : RVBUnary<0b011000000000, 0b001, OPC_OP_IMM, "clz">,
Sched<[WriteCLZ, ReadCLZ]>;
-def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM, "ctz">,
+def CTZ : RVBUnary<0b011000000001, 0b001, OPC_OP_IMM, "ctz">,
Sched<[WriteCTZ, ReadCTZ]>;
-def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM, "cpop">,
+def CPOP : RVBUnary<0b011000000010, 0b001, OPC_OP_IMM, "cpop">,
Sched<[WriteCPOP, ReadCPOP]>;
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbb, IsRV64], IsSignExtendingOpW = 1 in {
-def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">,
+def CLZW : RVBUnary<0b011000000000, 0b001, OPC_OP_IMM_32, "clzw">,
Sched<[WriteCLZ32, ReadCLZ32]>;
-def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">,
+def CTZW : RVBUnary<0b011000000001, 0b001, OPC_OP_IMM_32, "ctzw">,
Sched<[WriteCTZ32, ReadCTZ32]>;
-def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">,
+def CPOPW : RVBUnary<0b011000000010, 0b001, OPC_OP_IMM_32, "cpopw">,
Sched<[WriteCPOP32, ReadCPOP32]>;
} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbb], IsSignExtendingOpW = 1 in {
-def SEXT_B : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">,
+def SEXT_B : RVBUnary<0b011000000100, 0b001, OPC_OP_IMM, "sext.b">,
Sched<[WriteIALU, ReadIALU]>;
-def SEXT_H : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">,
+def SEXT_H : RVBUnary<0b011000000101, 0b001, OPC_OP_IMM, "sext.h">,
Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb]
@@ -435,38 +446,38 @@ def PACKW : ALUW_rr<0b0000100, 0b100, "packw">,
Sched<[WritePACK32, ReadPACK32, ReadPACK32]>;
let Predicates = [HasStdExtZbb, IsRV32] in {
-def ZEXT_H_RV32 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP, "zext.h">,
+def ZEXT_H_RV32 : RVBUnary<0b000010000000, 0b100, OPC_OP, "zext.h">,
Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb, IsRV32]
let Predicates = [HasStdExtZbb, IsRV64], IsSignExtendingOpW = 1 in {
-def ZEXT_H_RV64 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP_32, "zext.h">,
+def ZEXT_H_RV64 : RVBUnary<0b000010000000, 0b100, OPC_OP_32, "zext.h">,
Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in {
-def REV8_RV32 : RVBUnary<0b0110100, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
+def REV8_RV32 : RVBUnary<0b011010011000, 0b101, OPC_OP_IMM, "rev8">,
Sched<[WriteREV8, ReadREV8]>;
} // Predicates = [HasStdExtZbbOrZbkb, IsRV32]
let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
-def REV8_RV64 : RVBUnary<0b0110101, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
+def REV8_RV64 : RVBUnary<0b011010111000, 0b101, OPC_OP_IMM, "rev8">,
Sched<[WriteREV8, ReadREV8]>;
} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
let Predicates = [HasStdExtZbb] in {
-def ORC_B : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">,
+def ORC_B : RVBUnary<0b001010000111, 0b101, OPC_OP_IMM, "orc.b">,
Sched<[WriteORCB, ReadORCB]>;
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbkb] in
-def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">,
+def BREV8 : RVBUnary<0b011010000111, 0b101, OPC_OP_IMM, "brev8">,
Sched<[WriteBREV8, ReadBREV8]>;
let Predicates = [HasStdExtZbkb, IsRV32] in {
-def ZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">,
+def ZIP_RV32 : RVBUnary<0b000010001111, 0b001, OPC_OP_IMM, "zip">,
Sched<[WriteZIP, ReadZIP]>;
-def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">,
+def UNZIP_RV32 : RVBUnary<0b000010001111, 0b101, OPC_OP_IMM, "unzip">,
Sched<[WriteZIP, ReadZIP]>;
} // Predicates = [HasStdExtZbkb, IsRV32]
@@ -543,6 +554,8 @@ def : Pat<(XLenVT (and (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)), 1)),
def : Pat<(XLenVT (shiftop<shl> 1, (XLenVT GPR:$rs2))),
(BSET (XLenVT X0), GPR:$rs2)>;
+def : Pat<(XLenVT (not (shiftop<shl> -1, (XLenVT GPR:$rs2)))),
+ (ADDI (BSET (XLenVT X0), GPR:$rs2), -1)>;
def : Pat<(XLenVT (and GPR:$rs1, BCLRMask:$mask)),
(BCLRI GPR:$rs1, BCLRMask:$mask)>;
@@ -632,6 +645,10 @@ def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)),
def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)),
(zexti8 (XLenVT GPR:$rs1))), 0xFFFF),
(PACKH GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
+ (zexti8 (XLenVT GPR:$rs1))),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbkb]
let Predicates = [HasStdExtZbkb, IsRV32] in
@@ -656,20 +673,17 @@ let Predicates = [HasStdExtZbb, IsRV64] in
def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>;
let Predicates = [HasStdExtZba] in {
-def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), non_imm12:$rs2),
- (SH1ADD GPR:$rs1, GPR:$rs2)>;
-def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), non_imm12:$rs2),
- (SH2ADD GPR:$rs1, GPR:$rs2)>;
-def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), non_imm12:$rs2),
- (SH3ADD GPR:$rs1, GPR:$rs2)>;
-// More complex cases use a ComplexPattern.
-def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2),
- (SH1ADD sh1add_op:$rs1, GPR:$rs2)>;
-def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
- (SH2ADD sh2add_op:$rs1, GPR:$rs2)>;
-def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
- (SH3ADD sh3add_op:$rs1, GPR:$rs2)>;
+foreach i = {1,2,3} in {
+ defvar shxadd = !cast<Instruction>("SH"#i#"ADD");
+ def : Pat<(XLenVT (add_non_imm12 (shl GPR:$rs1, (XLenVT i)), GPR:$rs2)),
+ (shxadd GPR:$rs1, GPR:$rs2)>;
+
+ defvar pat = !cast<ComplexPattern>("sh"#i#"add_op");
+ // More complex cases use a ComplexPattern.
+ def : Pat<(XLenVT (add_non_imm12 pat:$rs1, GPR:$rs2)),
+ (shxadd pat:$rs1, GPR:$rs2)>;
+}
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
(SH1ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
@@ -739,46 +753,46 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)),
def : Pat<(i64 (and GPR:$rs1, Shifted32OnesMask:$mask)),
(SLLI_UW (SRLI GPR:$rs1, Shifted32OnesMask:$mask),
Shifted32OnesMask:$mask)>;
-
-def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)),
(ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>;
-def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)),
- (SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)),
- (SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)),
- (SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (or_is_add_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)),
+ (ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)),
+foreach i = {1,2,3} in {
+ defvar shxadd_uw = !cast<Instruction>("SH"#i#"ADD_UW");
+ def : Pat<(i64 (add_non_imm12 (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 i)), (XLenVT GPR:$rs2))),
+ (shxadd_uw GPR:$rs1, GPR:$rs2)>;
+}
+
+def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), (XLenVT GPR:$rs2))),
(SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), (XLenVT GPR:$rs2))),
(SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (XLenVT GPR:$rs2))),
(SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
// More complex cases use a ComplexPattern.
-def : Pat<(i64 (add sh1add_uw_op:$rs1, non_imm12:$rs2)),
- (SH1ADD_UW sh1add_uw_op:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add sh2add_uw_op:$rs1, non_imm12:$rs2)),
- (SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add sh3add_uw_op:$rs1, non_imm12:$rs2)),
- (SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>;
-
-def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), non_imm12:$rs2)),
+foreach i = {1,2,3} in {
+ defvar pat = !cast<ComplexPattern>("sh"#i#"add_uw_op");
+ def : Pat<(i64 (add_non_imm12 pat:$rs1, (XLenVT GPR:$rs2))),
+ (!cast<Instruction>("SH"#i#"ADD_UW") pat:$rs1, GPR:$rs2)>;
+}
+
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), (XLenVT GPR:$rs2))),
(SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
-def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFC), (XLenVT GPR:$rs2))),
(SH2ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>;
-def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF8), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), (XLenVT GPR:$rs2))),
(SH3ADD (SRLIW GPR:$rs1, 3), GPR:$rs2)>;
// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift.
-def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), (XLenVT GPR:$rs2))),
(SH1ADD_UW (SRLI GPR:$rs1, 1), GPR:$rs2)>;
-def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), (XLenVT GPR:$rs2))),
(SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>;
-def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), (XLenVT GPR:$rs2))),
(SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>;
def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i)),
@@ -804,3 +818,99 @@ let Predicates = [HasStdExtZbkx] in {
def : PatGprGpr<int_riscv_xperm4, XPERM4>;
def : PatGprGpr<int_riscv_xperm8, XPERM8>;
} // Predicates = [HasStdExtZbkx]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+def BCLRMaski32 : ImmLeaf<i32, [{
+ return !isInt<12>(Imm) && isPowerOf2_32(~Imm);
+}]>;
+def SingleBitSetMaski32 : ImmLeaf<i32, [{
+ return !isInt<12>(Imm) && isPowerOf2_32(Imm);
+}]>;
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : PatGpr<ctlz, CLZW, i32>;
+def : PatGpr<cttz, CTZW, i32>;
+def : PatGpr<ctpop, CPOPW, i32>;
+
+def : Pat<(i32 (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>;
+def : Pat<(i32 (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
+def : Pat<(i32 (and GPR:$rs1, (not GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (or GPR:$rs1, (not GPR:$rs2))), (ORN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>;
+
+def : PatGprGpr<shiftopw<rotl>, ROLW, i32, i64>;
+def : PatGprGpr<shiftopw<rotr>, RORW, i32, i64>;
+def : PatGprImm<rotr, RORIW, uimm5, i32>;
+
+def : Pat<(i32 (rotl GPR:$rs1, uimm5:$rs2)),
+ (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
+
+let Predicates = [HasStdExtZbkb, IsRV64] in {
+def : Pat<(or (and (shl GPR:$rs2, (i64 8)), 0xFFFF),
+ (zexti8i32 (i32 GPR:$rs1))),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (shl (zexti8i32 (i32 GPR:$rs2)), (i64 8)),
+ (zexti8i32 (i32 GPR:$rs1))),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+def : Pat<(and (anyext (or (shl GPR:$rs2, (XLenVT 8)),
+ (zexti8i32 (i32 GPR:$rs1)))), 0xFFFF),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(i32 (or (shl GPR:$rs2, (i64 16)), (zexti16i32 (i32 GPR:$rs1)))),
+ (PACKW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbkb, IsRV64]
+
+let Predicates = [HasStdExtZba, IsRV64] in {
+def : Pat<(shl (i64 (zext i32:$rs1)), uimm5:$shamt),
+ (SLLI_UW GPR:$rs1, uimm5:$shamt)>;
+
+def : Pat<(i64 (add_non_imm12 (zext GPR:$rs1), GPR:$rs2)),
+ (ADD_UW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zext GPR:$src), (ADD_UW GPR:$src, (XLenVT X0))>;
+
+def : Pat<(i64 (or_is_add_non_imm12 (zext GPR:$rs1), GPR:$rs2)),
+ (ADD_UW GPR:$rs1, GPR:$rs2)>;
+
+foreach i = {1,2,3} in {
+ defvar shxadd = !cast<Instruction>("SH"#i#"ADD");
+ def : Pat<(i32 (add_non_imm12 (shl GPR:$rs1, (i64 i)), GPR:$rs2)),
+ (shxadd GPR:$rs1, GPR:$rs2)>;
+}
+}
+
+let Predicates = [HasStdExtZbs, IsRV64] in {
+def : Pat<(i32 (and (not (shiftop<shl> 1, (XLenVT GPR:$rs2))), GPR:$rs1)),
+ (BCLR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
+ (BCLR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (or (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
+ (BSET GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (xor (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
+ (BINV GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (and (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)), 1)),
+ (BEXT GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (and (anyext (i32 (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)))), 1)),
+ (BEXT GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(i32 (shiftop<shl> 1, (XLenVT GPR:$rs2))),
+ (BSET (XLenVT X0), GPR:$rs2)>;
+def : Pat<(i32 (not (shiftop<shl> -1, (XLenVT GPR:$rs2)))),
+ (ADDI (BSET (XLenVT X0), GPR:$rs2), -1)>;
+
+def : Pat<(i32 (and (srl GPR:$rs1, uimm5:$shamt), (i32 1))),
+ (BEXTI GPR:$rs1, uimm5:$shamt)>;
+
+def : Pat<(i32 (and GPR:$rs1, BCLRMaski32:$mask)),
+ (BCLRI GPR:$rs1, (i64 (BCLRXForm $mask)))>;
+def : Pat<(i32 (or GPR:$rs1, SingleBitSetMaski32:$mask)),
+ (BSETI GPR:$rs1, (i64 (SingleBitSetMaskToIndex $mask)))>;
+def : Pat<(i32 (xor GPR:$rs1, SingleBitSetMaski32:$mask)),
+ (BINVI GPR:$rs1, (i64 (SingleBitSetMaskToIndex $mask)))>;
+} // Predicates = [HasStdExtZbs, IsRV64]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
index 6687343086da..a78f36244468 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
@@ -8,8 +8,6 @@
///
/// This file describes the RISC-V instructions from the 'Zc*' compressed
/// instruction extensions, version 1.0.3.
-/// This version is still experimental as the 'Zc*' extensions haven't been
-/// ratified yet.
///
//===----------------------------------------------------------------------===//
@@ -17,13 +15,12 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def uimm2_lsb0 : Operand<XLenVT>,
+def uimm2_lsb0 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<1, 1>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<2, "Lsb0">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<2>";
let OperandType = "OPERAND_UIMM2_LSB0";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -32,11 +29,10 @@ def uimm2_lsb0 : Operand<XLenVT>,
}];
}
-def uimm8ge32 : Operand<XLenVT> {
+def uimm8ge32 : RISCVOp {
let ParserMatchClass = UImmAsmOperand<8, "GE32">;
let DecoderMethod = "decodeUImmOperand<8>";
let OperandType = "OPERAND_UIMM8_GE32";
- let OperandNamespace = "RISCVOp";
}
def RlistAsmOperand : AsmOperandClass {
@@ -131,7 +127,7 @@ class RVZcArith_r<bits<5> funct5, string OpcodeStr> :
class RVInstZcCPPP<bits<5> funct5, string opcodestr>
: RVInst16<(outs), (ins rlist:$rlist, spimm:$spimm),
- opcodestr, "{$rlist}, $spimm", [], InstFormatOther> {
+ opcodestr, "$rlist, $spimm", [], InstFormatOther> {
bits<4> rlist;
bits<16> spimm;
@@ -285,9 +281,9 @@ def : CompressPat<(SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm),
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZcb] in {
-def : InstAlias<"c.lbu $rd, (${rs1})",(C_LBU GPRC:$rd, GPRC:$rs1, 0)>;
-def : InstAlias<"c.lhu $rd, (${rs1})",(C_LHU GPRC:$rd, GPRC:$rs1, 0)>;
-def : InstAlias<"c.lh $rd, (${rs1})", (C_LH GPRC:$rd, GPRC:$rs1, 0)>;
-def : InstAlias<"c.sb $rd, (${rs1})", (C_SB GPRC:$rd, GPRC:$rs1, 0)>;
-def : InstAlias<"c.sh $rd, (${rs1})", (C_SH GPRC:$rd, GPRC:$rs1, 0)>;
+def : InstAlias<"c.lbu $rd, (${rs1})",(C_LBU GPRC:$rd, GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.lhu $rd, (${rs1})",(C_LHU GPRC:$rd, GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.lh $rd, (${rs1})", (C_LH GPRC:$rd, GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.sb $rd, (${rs1})", (C_SB GPRC:$rd, GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.sh $rd, (${rs1})", (C_SH GPRC:$rd, GPRC:$rs1, 0), 0>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index f36882f9a968..6f88ff7f7ac1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -7,9 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file describes the RISC-V instructions from the standard 'Zfa'
-// additional floating-point extension, version 0.1.
-// This version is still experimental as the 'Zfa' extension hasn't been
-// ratified yet.
+// additional floating-point extension, version 1.0.
//
//===----------------------------------------------------------------------===//
@@ -54,18 +52,14 @@ class FPBinaryOp_rr<bits<7> funct7, bits<3> funct3, DAGOperand rdty,
(ins rsty:$rs1, rsty:$rs2), opcodestr, "$rd, $rs1, $rs2">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPUnaryOp_imm<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
- dag outs, dag ins, string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+class FPFLI_r<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
+ DAGOperand rdty, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_OP_FP, (outs rdty:$rd),
+ (ins loadfpimm:$imm), opcodestr, "$rd, $imm"> {
bits<5> imm;
- bits<5> rd;
-
- let Inst{31-25} = funct7;
- let Inst{24-20} = rs2val;
- let Inst{19-15} = imm;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = OPC_OP_FP.Value;
+
+ let rs2 = rs2val;
+ let rs1 = imm;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
@@ -84,8 +78,7 @@ class FPUnaryOp_r_rtz<bits<7> funct7, bits<5> rs2val, DAGOperand rdty,
let Predicates = [HasStdExtZfa] in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def FLI_S : FPUnaryOp_imm<0b1111000, 0b00001, 0b000, (outs FPR32:$rd),
- (ins loadfpimm:$imm), "fli.s", "$rd, $imm">,
+def FLI_S : FPFLI_r<0b1111000, 0b00001, 0b000, FPR32, "fli.s">,
Sched<[WriteFLI32]>;
let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in {
@@ -106,8 +99,7 @@ def FLEQ_S : FPCmp_rr<0b1010000, 0b100, "fleq.s", FPR32>;
let Predicates = [HasStdExtZfa, HasStdExtD] in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def FLI_D : FPUnaryOp_imm<0b1111001, 0b00001, 0b000, (outs FPR64:$rd),
- (ins loadfpimm:$imm), "fli.d", "$rd, $imm">,
+def FLI_D : FPFLI_r<0b1111001, 0b00001, 0b000, FPR64, "fli.d">,
Sched<[WriteFLI64]>;
let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in {
@@ -120,6 +112,7 @@ def FROUND_D : FPUnaryOp_r_frm<0b0100001, 0b00100, FPR64, FPR64, "fround.d">,
def FROUNDNX_D : FPUnaryOp_r_frm<0b0100001, 0b00101, FPR64, FPR64, "froundnx.d">,
Sched<[WriteFRoundF64, ReadFRoundF64]>;
+let IsSignExtendingOpW = 1 in
def FCVTMOD_W_D
: FPUnaryOp_r_rtz<0b1100001, 0b01000, GPR, FPR64, "fcvtmod.w.d">,
Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
@@ -146,8 +139,7 @@ def FMV_X_W_FPR64 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR64,
let Predicates = [HasStdExtZfa, HasStdExtZfhOrZvfh] in
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def FLI_H : FPUnaryOp_imm<0b1111010, 0b00001, 0b000, (outs FPR16:$rd),
- (ins loadfpimm:$imm), "fli.h", "$rd, $imm">,
+def FLI_H : FPFLI_r<0b1111010, 0b00001, 0b000, FPR16, "fli.h">,
Sched<[WriteFLI16]>;
let Predicates = [HasStdExtZfa, HasStdExtZfh] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
index 35f9f03f61a1..d819033eea68 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
@@ -16,12 +16,12 @@
//===----------------------------------------------------------------------===//
// RISC-V specific DAG Nodes.
//===----------------------------------------------------------------------===//
-
+
def SDT_RISCVFP_ROUND_BF16
: SDTypeProfile<1, 1, [SDTCisVT<0, bf16>, SDTCisVT<1, f32>]>;
def SDT_RISCVFP_EXTEND_BF16
: SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, bf16>]>;
-
+
def riscv_fpround_bf16
: SDNode<"RISCVISD::FP_ROUND_BF16", SDT_RISCVFP_ROUND_BF16>;
def riscv_fpextend_bf16
@@ -41,7 +41,7 @@ def FCVT_S_BF16 : FPUnaryOp_r_frm<0b0100000, 0b00110, FPR32, FPR16, "fcvt.s.bf16
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
-
+
let Predicates = [HasStdExtZfbfmin] in {
/// Loads
def : LdPat<load, FLH, bf16>;
@@ -51,9 +51,9 @@ def : StPat<store, FSH, FPR16, bf16>;
/// Float conversion operations
// f32 -> bf16, bf16 -> f32
-def : Pat<(bf16 (riscv_fpround_bf16 FPR32:$rs1)),
+def : Pat<(bf16 (riscv_fpround_bf16 FPR32:$rs1)),
(FCVT_BF16_S FPR32:$rs1, FRM_DYN)>;
-def : Pat<(riscv_fpextend_bf16 (bf16 FPR16:$rs1)),
+def : Pat<(riscv_fpextend_bf16 (bf16 FPR16:$rs1)),
(FCVT_S_BF16 FPR16:$rs1, FRM_DYN)>;
// Moves (no conversion)
@@ -61,3 +61,25 @@ def : Pat<(bf16 (riscv_fmv_h_x GPR:$src)), (FMV_H_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
def : Pat<(riscv_fmv_x_signexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
} // Predicates = [HasStdExtZfbfmin]
+
+let Predicates = [HasStdExtZfbfmin] in {
+// bf16->[u]int. Round-to-zero must be used for the f32->int step, the
+// rounding mode has no effect for bf16->f32.
+def : Pat<(i32 (any_fp_to_sint (bf16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint (bf16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;
+
+// [u]int->bf16. Match GCC and default to using dynamic rounding mode.
+def : Pat<(bf16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>;
+def : Pat<(bf16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>;
+}
+
+let Predicates = [HasStdExtZfbfmin, IsRV64] in {
+// bf16->[u]int64. Round-to-zero must be used for the f32->int step, the
+// rounding mode has no effect for bf16->f32.
+def : Pat<(i64 (any_fp_to_sint (bf16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint (bf16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;
+
+// [u]int->bf16. Match GCC and default to using dynamic rounding mode.
+def : Pat<(bf16 (any_sint_to_fp (i64 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_L $rs1, FRM_DYN), FRM_DYN)>;
+def : Pat<(bf16 (any_uint_to_fp (i64 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_LU $rs1, FRM_DYN), FRM_DYN)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 810775a78241..055f13032788 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -85,7 +85,7 @@ def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>;
} // Predicates = [HasHalfFPLoadStoreMove]
foreach Ext = ZfhExts in {
- let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in {
+ let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16Addend] in {
defm FMADD_H : FPFMA_rrr_frm_m<OPC_MADD, 0b10, "fmadd.h", Ext>;
defm FMSUB_H : FPFMA_rrr_frm_m<OPC_MSUB, 0b10, "fmsub.h", Ext>;
defm FNMSUB_H : FPFMA_rrr_frm_m<OPC_NMSUB, 0b10, "fnmsub.h", Ext>;
@@ -142,8 +142,8 @@ foreach Ext = ZfhminExts in {
Ext.F32Ty, "fcvt.h.s">,
Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
- defm FCVT_S_H : FPUnaryOp_r_m<0b0100000, 0b00010, 0b000, Ext, Ext.F32Ty,
- Ext.PrimaryTy, "fcvt.s.h">,
+ defm FCVT_S_H : FPUnaryOp_r_frmlegacy_m<0b0100000, 0b00010,Ext, Ext.F32Ty,
+ Ext.PrimaryTy, "fcvt.s.h">,
Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>;
} // foreach Ext = ZfhminExts
@@ -191,8 +191,8 @@ foreach Ext = ZfhminDExts in {
Ext.F64Ty, "fcvt.h.d">,
Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>;
- defm FCVT_D_H : FPUnaryOp_r_m<0b0100001, 0b00010, 0b000, Ext, Ext.F64Ty,
- Ext.F16Ty, "fcvt.d.h">,
+ defm FCVT_D_H : FPUnaryOp_r_frmlegacy_m<0b0100001, 0b00010, Ext, Ext.F64Ty,
+ Ext.F16Ty, "fcvt.d.h">,
Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>;
} // foreach Ext = ZfhminDExts
@@ -248,7 +248,6 @@ def PseudoQuietFLT_H_INX : PseudoQuietFCMP<FPR16INX>;
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZfh] in {
/// Float conversion operations
@@ -257,17 +256,20 @@ let Predicates = [HasStdExtZfh] in {
/// Float arithmetic operations
-def : PatFprFprDynFrm<any_fadd, FADD_H, FPR16, f16>;
-def : PatFprFprDynFrm<any_fsub, FSUB_H, FPR16, f16>;
-def : PatFprFprDynFrm<any_fmul, FMUL_H, FPR16, f16>;
-def : PatFprFprDynFrm<any_fdiv, FDIV_H, FPR16, f16>;
+foreach Ext = ZfhExts in {
+ defm : PatFprFprDynFrm_m<any_fadd, FADD_H, Ext>;
+ defm : PatFprFprDynFrm_m<any_fsub, FSUB_H, Ext>;
+ defm : PatFprFprDynFrm_m<any_fmul, FMUL_H, Ext>;
+ defm : PatFprFprDynFrm_m<any_fdiv, FDIV_H, Ext>;
+}
+let Predicates = [HasStdExtZfh] in {
def : Pat<(f16 (any_fsqrt FPR16:$rs1)), (FSQRT_H FPR16:$rs1, FRM_DYN)>;
def : Pat<(f16 (fneg FPR16:$rs1)), (FSGNJN_H $rs1, $rs1)>;
def : Pat<(f16 (fabs FPR16:$rs1)), (FSGNJX_H $rs1, $rs1)>;
-def : Pat<(riscv_fpclass (f16 FPR16:$rs1)), (FCLASS_H $rs1)>;
+def : Pat<(riscv_fclass (f16 FPR16:$rs1)), (FCLASS_H $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_H, FPR16, f16>;
def : Pat<(f16 (fcopysign FPR16:$rs1, (f16 (fneg FPR16:$rs2)))), (FSGNJN_H $rs1, $rs2)>;
@@ -304,17 +306,12 @@ let Predicates = [HasStdExtZhinx] in {
/// Float arithmetic operations
-def : PatFprFprDynFrm<any_fadd, FADD_H_INX, FPR16INX, f16>;
-def : PatFprFprDynFrm<any_fsub, FSUB_H_INX, FPR16INX, f16>;
-def : PatFprFprDynFrm<any_fmul, FMUL_H_INX, FPR16INX, f16>;
-def : PatFprFprDynFrm<any_fdiv, FDIV_H_INX, FPR16INX, f16>;
-
def : Pat<(any_fsqrt FPR16INX:$rs1), (FSQRT_H_INX FPR16INX:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR16INX:$rs1), (FSGNJN_H_INX $rs1, $rs1)>;
def : Pat<(fabs FPR16INX:$rs1), (FSGNJX_H_INX $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR16INX:$rs1), (FCLASS_H_INX $rs1)>;
+def : Pat<(riscv_fclass FPR16INX:$rs1), (FCLASS_H_INX $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_H_INX, FPR16INX, f16>;
def : Pat<(fcopysign FPR16INX:$rs1, (fneg FPR16INX:$rs2)), (FSGNJN_H_INX $rs1, $rs2)>;
@@ -358,12 +355,12 @@ foreach Ext = ZfhExts in {
// Match non-signaling FEQ_D
foreach Ext = ZfhExts in {
- defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_H, Ext, f16>;
- defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_H, Ext, f16>;
- defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_H, Ext, f16>;
- defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_H, Ext, f16>;
- defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_H, Ext, f16>;
- defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_H, Ext, f16>;
+ defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_H, Ext>;
+ defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_H, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_H, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_H, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_H, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_H, Ext>;
}
let Predicates = [HasStdExtZfh] in {
@@ -397,10 +394,10 @@ def : Pat<(XLenVT (strict_fsetccs FPR16INX:$rs1, FPR16INX:$rs1, SETOEQ)),
} // Predicates = [HasStdExtZhinx]
foreach Ext = ZfhExts in {
- defm : PatSetCC_m<any_fsetccs, SETLT, FLT_H, Ext, f16>;
- defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_H, Ext, f16>;
- defm : PatSetCC_m<any_fsetccs, SETLE, FLE_H, Ext, f16>;
- defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_H, Ext, f16>;
+ defm : PatSetCC_m<any_fsetccs, SETLT, FLT_H, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_H, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETLE, FLE_H, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_H, Ext>;
}
let Predicates = [HasStdExtZfh] in {
@@ -425,11 +422,13 @@ def : StPat<store, FSH, FPR16, f16>;
let Predicates = [HasStdExtZhinxOrZhinxmin] in {
/// Loads
-def : Pat<(f16 (load GPR:$rs1)), (COPY_TO_REGCLASS (LH GPR:$rs1, 0), GPRF16)>;
+def : Pat<(f16 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
+ (COPY_TO_REGCLASS (LH GPR:$rs1, simm12:$imm12), GPRF16)>;
/// Stores
-def : Pat<(store (f16 FPR16INX:$rs2), GPR:$rs1),
- (SH (COPY_TO_REGCLASS FPR16INX:$rs2, GPR), GPR:$rs1, 0)>;
+def : Pat<(store (f16 FPR16INX:$rs2),
+ (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
+ (SH (COPY_TO_REGCLASS FPR16INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
} // Predicates = [HasStdExtZhinxOrZhinxmin]
let Predicates = [HasStdExtZfhOrZfhmin] in {
@@ -437,14 +436,14 @@ let Predicates = [HasStdExtZfhOrZfhmin] in {
// f32 -> f16, f16 -> f32
def : Pat<(f16 (any_fpround FPR32:$rs1)), (FCVT_H_S FPR32:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_S_H FPR16:$rs1)>;
+def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_S_H FPR16:$rs1, FRM_RNE)>;
// Moves (no conversion)
def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (FMV_H_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyexth (f16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
def : Pat<(riscv_fmv_x_signexth (f16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
-def : Pat<(fcopysign FPR32:$rs1, (f16 FPR16:$rs2)), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>;
+def : Pat<(fcopysign FPR32:$rs1, (f16 FPR16:$rs2)), (FSGNJ_S $rs1, (FCVT_S_H $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZfhOrZfhmin]
let Predicates = [HasStdExtZhinxOrZhinxmin] in {
@@ -452,17 +451,17 @@ let Predicates = [HasStdExtZhinxOrZhinxmin] in {
// f32 -> f16, f16 -> f32
def : Pat<(any_fpround FPR32INX:$rs1), (FCVT_H_S_INX FPR32INX:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1)>;
+def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1, FRM_RNE)>;
// Moves (no conversion)
def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (COPY_TO_REGCLASS GPR:$src, GPR)>;
def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
def : Pat<(riscv_fmv_x_signexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
-def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2))>;
+def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZhinxOrZhinxmin]
-let Predicates = [HasStdExtZfh, IsRV32] in {
+let Predicates = [HasStdExtZfh] in {
// half->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_H $rs1, 0b001)>;
def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_H $rs1, 0b001)>;
@@ -480,9 +479,9 @@ def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_H $rs1, FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_W $rs1, FRM_DYN)>;
def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_WU $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZfh, IsRV32]
+} // Predicates = [HasStdExtZfh]
-let Predicates = [HasStdExtZhinx, IsRV32] in {
+let Predicates = [HasStdExtZhinx] in {
// half->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, 0b001)>;
def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_H_INX $rs1, 0b001)>;
@@ -500,7 +499,7 @@ def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W_INX $rs1, FRM_DYN)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU_INX $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZhinx, IsRV32]
+} // Predicates = [HasStdExtZhinx]
let Predicates = [HasStdExtZfh, IsRV64] in {
// Use target specific isd nodes to help us remember the result is sign
@@ -566,82 +565,82 @@ let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
def : Pat<(f16 (any_fpround FPR64:$rs1)), (FCVT_H_D FPR64:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_D_H FPR16:$rs1)>;
+def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_D_H FPR16:$rs1, FRM_RNE)>;
/// Float arithmetic operations
def : Pat<(f16 (fcopysign FPR16:$rs1, FPR64:$rs2)),
(FSGNJ_H $rs1, (FCVT_H_D $rs2, FRM_DYN))>;
-def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>;
+def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD]
let Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV32] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_H_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1)>;
+def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1, FRM_RNE)>;
/// Float arithmetic operations
def : Pat<(fcopysign FPR16INX:$rs1, FPR64IN32X:$rs2),
(FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, 0b111))>;
-def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2))>;
+def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV32]
let Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
def : Pat<(any_fpround FPR64INX:$rs1), (FCVT_H_D_INX FPR64INX:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_INX FPR16INX:$rs1)>;
+def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_INX FPR16INX:$rs1, FRM_RNE)>;
/// Float arithmetic operations
def : Pat<(fcopysign FPR16INX:$rs1, FPR64INX:$rs2),
(FSGNJ_H_INX $rs1, (FCVT_H_D_INX $rs2, 0b111))>;
-def : Pat<(fcopysign FPR64INX:$rs1, FPR16INX:$rs2), (FSGNJ_D_INX $rs1, (FCVT_D_H_INX $rs2))>;
+def : Pat<(fcopysign FPR64INX:$rs1, FPR16INX:$rs2), (FSGNJ_D_INX $rs1, (FCVT_D_H_INX $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64]
-let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32] in {
+let Predicates = [HasStdExtZfhmin, NoStdExtZfh] in {
// half->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_RTZ)>;
-def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
// half->int32 with current rounding mode.
-def : Pat<(i32 (any_lrint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_DYN)>;
+def : Pat<(i32 (any_lrint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>;
// half->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_RMM)>;
+def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>;
def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>;
-} // Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32]
+} // Predicates = [HasStdExtZfhmin, NoStdExtZfh]
-let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32] in {
+let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx] in {
// half->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
-def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
// half->int32 with current rounding mode.
-def : Pat<(i32 (any_lrint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>;
+def : Pat<(i32 (any_lrint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>;
// half->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>;
+def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_W_INX $rs1, FRM_DYN), FRM_DYN)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_WU_INX $rs1, FRM_DYN), FRM_DYN)>;
-} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32]
+} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx]
let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64] in {
// half->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RTZ)>;
-def : Pat<(i64 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_H $rs1), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
// half->int64 with current rounding mode.
-def : Pat<(i64 (any_lrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_DYN)>;
-def : Pat<(i64 (any_llrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_DYN)>;
+def : Pat<(i64 (any_lrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>;
+def : Pat<(i64 (any_llrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>;
// half->int64 rounded to nearest with ties rounded away from zero.
-def : Pat<(i64 (any_lround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RMM)>;
-def : Pat<(i64 (any_llround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RMM)>;
+def : Pat<(i64 (any_lround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>;
+def : Pat<(i64 (any_llround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(f16 (any_sint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_L $rs1, FRM_DYN), FRM_DYN)>;
@@ -650,16 +649,16 @@ def : Pat<(f16 (any_uint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_LU $rs1, FRM_
let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV64] in {
// half->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
-def : Pat<(i64 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_LU_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_LU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
// half->int64 with current rounding mode.
-def : Pat<(i64 (any_lrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>;
-def : Pat<(i64 (any_llrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>;
+def : Pat<(i64 (any_lrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>;
+def : Pat<(i64 (any_llrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>;
// half->int64 rounded to nearest with ties rounded away from zero.
-def : Pat<(i64 (any_lround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>;
-def : Pat<(i64 (any_llround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>;
+def : Pat<(i64 (any_lround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>;
+def : Pat<(i64 (any_llround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_L_INX $rs1, FRM_DYN), FRM_DYN)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
index 509d1cfcd874..56b68e324de2 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
@@ -16,7 +16,7 @@
//===----------------------------------------------------------------------===//
// A 12-bit signed immediate where the least significant five bits are zero.
-def simm12_lsb00000 : Operand<XLenVT>,
+def simm12_lsb00000 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedInt<7, 5>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<12, "Lsb00000">;
let EncoderMethod = "getImmOpValue";
@@ -28,7 +28,6 @@ def simm12_lsb00000 : Operand<XLenVT>,
return MCOp.isBareSymbolRef();
}];
let OperandType = "OPERAND_SIMM12_LSB00000";
- let OperandNamespace = "RISCVOp";
}
//===----------------------------------------------------------------------===//
@@ -74,12 +73,16 @@ def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>;
// Patterns
//===----------------------------------------------------------------------===//
+def AddrRegImmLsb00000 : ComplexPattern<iPTR, 2, "SelectAddrRegImmLsb00000">;
+
let Predicates = [HasStdExtZicbop] in {
- // FIXME: Match address with offset
- def : Pat<(prefetch GPR:$rs1, imm, imm, (XLenVT 0)),
- (PREFETCH_I GPR:$rs1, 0)>;
- def : Pat<(prefetch GPR:$rs1, (XLenVT 0), imm, (XLenVT 1)),
- (PREFETCH_R GPR:$rs1, 0)>;
- def : Pat<(prefetch GPR:$rs1, (XLenVT 1), imm, (XLenVT 1)),
- (PREFETCH_W GPR:$rs1, 0)>;
+ def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+ timm, timm, (i32 0)),
+ (PREFETCH_I GPR:$rs1, simm12_lsb00000:$imm12)>;
+ def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+ (i32 0), timm, (i32 1)),
+ (PREFETCH_R GPR:$rs1, simm12_lsb00000:$imm12)>;
+ def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+ (i32 1), timm, (i32 1)),
+ (PREFETCH_W GPR:$rs1, simm12_lsb00000:$imm12)>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
index ab0b93d62af5..0790a941823b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
@@ -40,4 +40,13 @@ def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, GPR:$rc)),
(CZERO_EQZ GPR:$rs1, GPR:$rc)>;
def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, GPR:$rc)),
(CZERO_NEZ GPR:$rs1, GPR:$rc)>;
+
+def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, (riscv_setne (XLenVT GPR:$rc)))),
+ (CZERO_EQZ GPR:$rs1, GPR:$rc)>;
+def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, (riscv_seteq (XLenVT GPR:$rc)))),
+ (CZERO_NEZ GPR:$rs1, GPR:$rc)>;
+def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, (riscv_setne (XLenVT GPR:$rc)))),
+ (CZERO_NEZ GPR:$rs1, GPR:$rc)>;
+def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, (riscv_seteq (XLenVT GPR:$rc)))),
+ (CZERO_EQZ GPR:$rs1, GPR:$rc)>;
} // Predicates = [HasStdExtZicond]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
index b8c0606034c5..3ec63b1b6adb 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
@@ -36,30 +36,26 @@ def RnumArg : AsmOperandClass {
let DiagnosticType = "InvalidRnumArg";
}
-def rnum : Operand<i32>, TImmLeaf<i32, [{return (Imm >= 0 && Imm <= 10);}]> {
+def rnum : RISCVOp<i32>, TImmLeaf<i32, [{return (Imm >= 0 && Imm <= 10);}]> {
let ParserMatchClass = RnumArg;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<4>";
let OperandType = "OPERAND_RVKRNUM";
- let OperandNamespace = "RISCVOp";
}
-def byteselect : Operand<i32>, TImmLeaf<i32, [{return isUInt<2>(Imm);}]> {
+def byteselect : RISCVOp<i32>, TImmLeaf<i32, [{return isUInt<2>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<2>;
let DecoderMethod = "decodeUImmOperand<2>";
let OperandType = "OPERAND_UIMM2";
- let OperandNamespace = "RISCVOp";
}
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVKUnary<bits<12> imm12_in, bits<3> funct3, string opcodestr>
- : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
- opcodestr, "$rd, $rs1">{
- let imm12 = imm12_in;
-}
+class RVKUnary<bits<12> imm12, bits<3> funct3, string opcodestr>
+ : RVInstIUnary<imm12, funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVKByteSelect<bits<5> funct5, string opcodestr>
@@ -72,12 +68,12 @@ class RVKByteSelect<bits<5> funct5, string opcodestr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVKUnary_rnum<bits<7> funct7, bits<3> funct3, string opcodestr>
- : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, rnum:$rnum),
- opcodestr, "$rd, $rs1, $rnum">{
- bits<4> rnum;
- let Inst{31-25} = funct7;
- let Inst{24} = 1;
- let Inst{23-20} = rnum;
+ : RVInstIBase<funct3, OPC_OP_IMM, (outs GPR:$rd),
+ (ins GPR:$rs1, rnum:$rnum), opcodestr, "$rd, $rs1, $rnum"> {
+ bits<4> rnum;
+ let Inst{31-25} = funct7;
+ let Inst{24} = 0b1;
+ let Inst{23-20} = rnum;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index 046074d848f5..1b1f3b9b16e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -25,7 +25,8 @@ let Uses = [FRM] in
defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>;
}
-let Predicates = [HasStdExtZvfbfwma], Constraints = "@earlyclobber $vd",
+let Predicates = [HasStdExtZvfbfwma],
+ Constraints = "@earlyclobber $vd_wb, $vd = $vd_wb",
RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true in {
defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 13c98ce92d14..1ffa78a28d09 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -15,46 +15,16 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def RnumArg_0_7 : AsmOperandClass {
- let Name = "RnumArg_0_7";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "InvalidRnumArg_0_7";
-}
-
-def RnumArg_1_10 : AsmOperandClass {
- let Name = "RnumArg_1_10";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "InvalidRnumArg_1_10";
-}
-
-def RnumArg_2_14 : AsmOperandClass {
- let Name = "RnumArg_2_14";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "InvalidRnumArg_2_14";
-}
-
-def rnum_0_7 : Operand<XLenVT>, ImmLeaf<XLenVT,
- [{return (0 <= Imm && Imm <= 7);}]> {
- let ParserMatchClass = RnumArg_0_7;
+def tuimm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<5>;
+ let EncoderMethod = "getUImmOpValue";
let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_RVKRNUM_0_7";
- let OperandNamespace = "RISCVOp";
-}
-
-def rnum_1_10 : Operand<XLenVT>, ImmLeaf<XLenVT,
- [{return (1 <= Imm && Imm <= 10);}]> {
- let ParserMatchClass = RnumArg_1_10;
- let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_RVKRNUM_1_10";
- let OperandNamespace = "RISCVOp";
-}
-
-def rnum_2_14 : Operand<XLenVT>, ImmLeaf<XLenVT,
- [{return (2 <= Imm && Imm <= 14);}]> {
- let ParserMatchClass = RnumArg_2_14;
- let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_RVKRNUM_2_14";
- let OperandNamespace = "RISCVOp";
+ let MCOperandPredicate = [{
+ int64_t UImm;
+ if (MCOp.evaluateAsConstantImm(UImm))
+ return isUInt<5>(UImm);
+ return MCOp.isBareSymbolRef();
+ }];
}
//===----------------------------------------------------------------------===//
@@ -140,15 +110,10 @@ class VAESKF_MV_I<bits<6> funct6, string opcodestr, Operand optype>
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZvbb] in {
- defm VANDN_V : VALU_IV_V_X<"vandn", 0b000001>;
- def VBREV8_V : VALUVs2<0b010010, 0b01000, OPMVV, "vbrev8.v">;
def VBREV_V : VALUVs2<0b010010, 0b01010, OPMVV, "vbrev.v">;
def VCLZ_V : VALUVs2<0b010010, 0b01100, OPMVV, "vclz.v">;
def VCPOP_V : VALUVs2<0b010010, 0b01110, OPMVV, "vcpop.v">;
def VCTZ_V : VALUVs2<0b010010, 0b01101, OPMVV, "vctz.v">;
- def VREV8_V : VALUVs2<0b010010, 0b01001, OPMVV, "vrev8.v">;
- defm VROL_V : VALU_IV_V_X<"vrol", 0b010101>;
- defm VROR_V : VROR_IV_V_X_I<"vror", 0b010100>;
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in
defm VWSLL_V : VSHT_IV_V_X_I<"vwsll", 0b110101>;
} // Predicates = [HasStdExtZvbb]
@@ -158,16 +123,24 @@ let Predicates = [HasStdExtZvbc] in {
defm VCLMULH_V : VCLMUL_MV_V_X<"vclmulh", 0b001101>;
} // Predicates = [HasStdExtZvbc]
+let Predicates = [HasStdExtZvkb] in {
+ defm VANDN_V : VALU_IV_V_X<"vandn", 0b000001>;
+ def VBREV8_V : VALUVs2<0b010010, 0b01000, OPMVV, "vbrev8.v">;
+ def VREV8_V : VALUVs2<0b010010, 0b01001, OPMVV, "vrev8.v">;
+ defm VROL_V : VALU_IV_V_X<"vrol", 0b010101>;
+ defm VROR_V : VROR_IV_V_X_I<"vror", 0b010100>;
+} // Predicates = [HasStdExtZvkb]
+
let Predicates = [HasStdExtZvkg], RVVConstraint = NoConstraint in {
def VGHSH_VV : PALUVVNoVm<0b101100, OPMVV, "vghsh.vv">;
def VGMUL_VV : PALUVs2NoVm<0b101000, 0b10001, OPMVV, "vgmul.vv">;
} // Predicates = [HasStdExtZvkg]
-let Predicates = [HasStdExtZvknha], RVVConstraint = NoConstraint in {
+let Predicates = [HasStdExtZvknhaOrZvknhb], RVVConstraint = NoConstraint in {
def VSHA2CH_VV : PALUVVNoVm<0b101110, OPMVV, "vsha2ch.vv">;
def VSHA2CL_VV : PALUVVNoVm<0b101111, OPMVV, "vsha2cl.vv">;
def VSHA2MS_VV : PALUVVNoVm<0b101101, OPMVV, "vsha2ms.vv">;
-} // Predicates = [HasStdExtZvknha]
+} // Predicates = [HasStdExtZvknhaOrZvknhb]
let Predicates = [HasStdExtZvkned], RVVConstraint = NoConstraint in {
defm VAESDF : VAES_MV_V_S<0b101000, 0b101001, 0b00001, OPMVV, "vaesdf">;
@@ -193,34 +166,254 @@ let Predicates = [HasStdExtZvksh], RVVConstraint = NoConstraint in {
// Pseudo instructions
//===----------------------------------------------------------------------===//
-defm PseudoVANDN : VPseudoVALU_VV_VX;
+defvar I32IntegerVectors = !filter(vti, AllIntegerVectors, !eq(vti.SEW, 32));
+defvar I32I64IntegerVectors = !filter(vti, AllIntegerVectors,
+ !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64)));
+
+class ZvkI32IntegerVectors<string vd_lmul> {
+ list<VTypeInfo> vs2_types = !cond(!eq(vd_lmul, "M8") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 32)),
+ !eq(vd_lmul, "M4") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 32)),
+ !eq(vd_lmul, "M2") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 16)),
+ !eq(vd_lmul, "M1") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 8)),
+ !eq(vd_lmul, "MF2") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 4)),
+ !eq(vd_lmul, "MF4") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 2)),
+ !eq(vd_lmul, "MF8") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 1)));
+}
+
+class ZvkMxSet<string vd_lmul> {
+ list<LMULInfo> vs2_lmuls = !cond(!eq(vd_lmul, "M8") : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4],
+ !eq(vd_lmul, "M4") : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4],
+ !eq(vd_lmul, "M2") : [V_MF8, V_MF4, V_MF2, V_M1, V_M2],
+ !eq(vd_lmul, "M1") : [V_MF8, V_MF4, V_MF2, V_M1],
+ !eq(vd_lmul, "MF2") : [V_MF8, V_MF4, V_MF2],
+ !eq(vd_lmul, "MF4") : [V_MF8, V_MF4],
+ !eq(vd_lmul, "MF8") : [V_MF8]);
+}
+
+class VPseudoUnaryNoMask_Zvk<DAGOperand RetClass, VReg OpClass, string Constraint = ""> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoBinaryNoMask_Zvk<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+multiclass VPseudoBinaryNoMask_Zvk<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in
+ def "_" # MInfo.MX : VPseudoBinaryNoMask_Zvk<RetClass, Op1Class, Op2Class,
+ Constraint>;
+}
+
+multiclass VPseudoUnaryV_V_NoMask_Zvk<LMULInfo m, string Constraint = ""> {
+ let VLMul = m.value in {
+ def "_VV_" # m.MX : VPseudoUnaryNoMask_Zvk<m.vrclass, m.vrclass, Constraint>;
+ }
+}
+
+multiclass VPseudoUnaryV_S_NoMask_Zvk<LMULInfo m, string Constraint = ""> {
+ let VLMul = m.value in
+ foreach vs2_lmul = ZvkMxSet<m.MX>.vs2_lmuls in
+ def "_VS_" # m.MX # "_" # vs2_lmul.MX : VPseudoUnaryNoMask_Zvk<m.vrclass, vs2_lmul.vrclass, Constraint>;
+}
+
+multiclass VPseudoVALU_V_NoMask_Zvk<string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm "" : VPseudoUnaryV_V_NoMask_Zvk<m, Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVALU_S_NoMask_Zvk<string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm "" : VPseudoUnaryV_S_NoMask_Zvk<m, Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVALU_V_S_NoMask_Zvk<string Constraint = ""> {
+ defm "" : VPseudoVALU_V_NoMask_Zvk<Constraint>;
+ defm "" : VPseudoVALU_S_NoMask_Zvk<Constraint>;
+}
+
+multiclass VPseudoVALU_VV_NoMask_Zvk<string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm _VV : VPseudoBinaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m,
+ Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
-multiclass VPseudoUnaryV_V {
+multiclass VPseudoVALU_VI_NoMask_Zvk<Operand ImmType = simm5, string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm _VI : VPseudoBinaryNoMask_Zvk<m.vrclass, m.vrclass, ImmType, m,
+ Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVALU_VI_NoMaskTU_Zvk<Operand ImmType = uimm5, string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm _VI : VPseudoBinaryNoMask<m.vrclass, m.vrclass, ImmType, m,
+ Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVALU_VV_NoMaskTU_Zvk<string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm _VV : VPseudoBinaryNoMask<m.vrclass, m.vrclass, m.vrclass, m,
+ Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVCLMUL_VV_VX {
foreach m = MxList in {
- let VLMul = m.value in {
- def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
- def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx=2>;
- }
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+ defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx);
+
+ defm "" : VPseudoBinaryV_VV<m>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX<m>,
+ Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoUnaryV_V<LMULInfo m> {
+ let VLMul = m.value in {
+ defvar suffix = "_V_" # m.MX;
+ def suffix : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
+ def suffix # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx=2>;
+ }
+}
+
+multiclass VPseudoVALU_V {
+ foreach m = MxList in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm "" : VPseudoUnaryV_V<m>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
}
}
-defm PseudoVBREV : VPseudoUnaryV_V;
-defm PseudoVREV8 : VPseudoUnaryV_V;
-defm PseudoVCLZ : VPseudoUnaryV_V;
-defm PseudoVCTZ : VPseudoUnaryV_V;
-defm PseudoVCPOP : VPseudoUnaryV_V;
+let Predicates = [HasStdExtZvbb] in {
+ defm PseudoVBREV : VPseudoVALU_V;
+ defm PseudoVCLZ : VPseudoVALU_V;
+ defm PseudoVCTZ : VPseudoVALU_V;
+ defm PseudoVCPOP : VPseudoVALU_V;
+ defm PseudoVWSLL : VPseudoVWALU_VV_VX_VI<uimm5>;
+} // Predicates = [HasStdExtZvbb]
+
+let Predicates = [HasStdExtZvbc] in {
+ defm PseudoVCLMUL : VPseudoVCLMUL_VV_VX;
+ defm PseudoVCLMULH : VPseudoVCLMUL_VV_VX;
+} // Predicates = [HasStdExtZvbc]
+
+let Predicates = [HasStdExtZvkb] in {
+ defm PseudoVANDN : VPseudoVALU_VV_VX;
+ defm PseudoVBREV8 : VPseudoVALU_V;
+ defm PseudoVREV8 : VPseudoVALU_V;
+ defm PseudoVROL : VPseudoVALU_VV_VX;
+ defm PseudoVROR : VPseudoVALU_VV_VX_VI<uimm6>;
+} // Predicates = [HasStdExtZvkb]
+
+let Predicates = [HasStdExtZvkg] in {
+ defm PseudoVGHSH : VPseudoVALU_VV_NoMask_Zvk;
+ defm PseudoVGMUL : VPseudoVALU_V_NoMask_Zvk;
+} // Predicates = [HasStdExtZvkg]
+
+let Predicates = [HasStdExtZvkned] in {
+ defm PseudoVAESDF : VPseudoVALU_V_S_NoMask_Zvk;
+ defm PseudoVAESDM : VPseudoVALU_V_S_NoMask_Zvk;
+ defm PseudoVAESEF : VPseudoVALU_V_S_NoMask_Zvk;
+ defm PseudoVAESEM : VPseudoVALU_V_S_NoMask_Zvk;
+ defm PseudoVAESKF1 : VPseudoVALU_VI_NoMaskTU_Zvk;
+ defm PseudoVAESKF2 : VPseudoVALU_VI_NoMask_Zvk<uimm5>;
+ defm PseudoVAESZ : VPseudoVALU_S_NoMask_Zvk;
+} // Predicates = [HasStdExtZvkned]
+
+let Predicates = [HasStdExtZvknhaOrZvknhb] in {
+ defm PseudoVSHA2CH : VPseudoVALU_VV_NoMask_Zvk;
+ defm PseudoVSHA2CL : VPseudoVALU_VV_NoMask_Zvk;
+ defm PseudoVSHA2MS : VPseudoVALU_VV_NoMask_Zvk;
+} // Predicates = [HasStdExtZvknhaOrZvknhb]
+
+let Predicates = [HasStdExtZvksed] in {
+ defm PseudoVSM4K : VPseudoVALU_VI_NoMaskTU_Zvk;
+ defm PseudoVSM4R : VPseudoVALU_V_S_NoMask_Zvk;
+} // Predicates = [HasStdExtZvksed]
-defm PseudoVROL : VPseudoVALU_VV_VX;
-defm PseudoVROR : VPseudoVALU_VV_VX_VI<uimm6>;
+let Predicates = [HasStdExtZvksh] in {
+ defm PseudoVSM3C : VPseudoVALU_VI_NoMask_Zvk<uimm5>;
+ defm PseudoVSM3ME : VPseudoVALU_VV_NoMaskTU_Zvk;
+} // Predicates = [HasStdExtZvksh]
//===----------------------------------------------------------------------===//
// SDNode patterns
//===----------------------------------------------------------------------===//
-multiclass VPatUnarySDNode_V<SDPatternOperator op, string instruction_name> {
+multiclass VPatUnarySDNode_V<SDPatternOperator op, string instruction_name,
+ Predicate predicate = HasStdExtZvbb> {
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([predicate],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_V_"#vti.LMul.MX)
@@ -239,7 +432,7 @@ def riscv_vnot : PatFrag<(ops node:$rs1), (xor node:$rs1,
(riscv_splat_vector -1))>;
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([HasStdExtZvkb],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (and (riscv_vnot vti.RegClass:$rs1),
vti.RegClass:$rs2)),
@@ -260,14 +453,27 @@ foreach vti = AllIntegerVectors in {
}
defm : VPatUnarySDNode_V<bitreverse, "PseudoVBREV">;
-defm : VPatUnarySDNode_V<bswap, "PseudoVREV8">;
+defm : VPatUnarySDNode_V<bswap, "PseudoVREV8", HasStdExtZvkb>;
defm : VPatUnarySDNode_V<ctlz, "PseudoVCLZ">;
defm : VPatUnarySDNode_V<cttz, "PseudoVCTZ">;
defm : VPatUnarySDNode_V<ctpop, "PseudoVCPOP">;
defm : VPatBinarySDNode_VV_VX<rotl, "PseudoVROL">;
-def NegImm64 : SDNodeXForm<imm, [{
+// Invert the immediate and mask it to SEW for readability.
+def InvRot8Imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(0x7 & (64 - N->getZExtValue()), SDLoc(N),
+ N->getValueType(0));
+}]>;
+def InvRot16Imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(0xf & (64 - N->getZExtValue()), SDLoc(N),
+ N->getValueType(0));
+}]>;
+def InvRot32Imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(0x1f & (64 - N->getZExtValue()), SDLoc(N),
+ N->getValueType(0));
+}]>;
+def InvRot64Imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(0x3f & (64 - N->getZExtValue()), SDLoc(N),
N->getValueType(0));
}]>;
@@ -275,26 +481,56 @@ def NegImm64 : SDNodeXForm<imm, [{
// Although there is no vrol.vi, an immediate rotate left can be achieved by
// negating the immediate in vror.vi
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([HasStdExtZvkb],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (rotl vti.RegClass:$rs2,
(vti.Vector (SplatPat_uimm6 uimm6:$rs1)))),
(!cast<Instruction>("PseudoVROR_VI_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs2,
- (NegImm64 uimm6:$rs1),
+ (!cast<SDNodeXForm>("InvRot" # vti.SEW # "Imm") uimm6:$rs1),
vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
defm : VPatBinarySDNode_VV_VX_VI<rotr, "PseudoVROR", uimm6>;
+foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat([HasStdExtZvbb],
+ GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1)))),
+ (!cast<Instruction>("PseudoVWSLL_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, vti.RegClass:$rs1,
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1)))),
+ (!cast<Instruction>("PseudoVWSLL_VX_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, GPR:$rs1,
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (SplatPat_uimm5 uimm5:$rs1))),
+ (!cast<Instruction>("PseudoVWSLL_VI_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, uimm5:$rs1,
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// VL patterns
//===----------------------------------------------------------------------===//
-multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name> {
+multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name,
+ Predicate predicate = HasStdExtZvbb> {
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([predicate],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$merge),
@@ -312,7 +548,7 @@ multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name> {
}
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([HasStdExtZvkb],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (riscv_and_vl (riscv_xor_vl
(vti.Vector vti.RegClass:$rs1),
@@ -351,7 +587,339 @@ foreach vti = AllIntegerVectors in {
}
defm : VPatUnaryVL_V<riscv_bitreverse_vl, "PseudoVBREV">;
-defm : VPatUnaryVL_V<riscv_bswap_vl, "PseudoVREV8">;
+defm : VPatUnaryVL_V<riscv_bswap_vl, "PseudoVREV8", HasStdExtZvkb>;
defm : VPatUnaryVL_V<riscv_ctlz_vl, "PseudoVCLZ">;
defm : VPatUnaryVL_V<riscv_cttz_vl, "PseudoVCTZ">;
defm : VPatUnaryVL_V<riscv_ctpop_vl, "PseudoVCPOP">;
+
+defm : VPatBinaryVL_VV_VX<riscv_rotl_vl, "PseudoVROL">;
+// Although there is no vrol.vi, an immediate rotate left can be achieved by
+// negating the immediate in vror.vi
+foreach vti = AllIntegerVectors in {
+ let Predicates = !listconcat([HasStdExtZvkb],
+ GetVTypePredicates<vti>.Predicates) in {
+ def : Pat<(riscv_rotl_vl vti.RegClass:$rs2,
+ (vti.Vector (SplatPat_uimm6 uimm6:$rs1)),
+ (vti.Vector vti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVROR_VI_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs2,
+ (!cast<SDNodeXForm>("InvRot" # vti.SEW # "Imm") uimm6:$rs1),
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+defm : VPatBinaryVL_VV_VX_VI<riscv_rotr_vl, "PseudoVROR", uimm6>;
+
+foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat([HasStdExtZvbb],
+ GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(riscv_shl_vl
+ (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_shl_vl
+ (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_shl_vl
+ (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (SplatPat_uimm5 uimm5:$rs1)),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_vwsll_vl
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$rs1),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_vwsll_vl
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_vwsll_vl
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector (SplatPat_uimm5 uimm5:$rs1)),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+
+class VPatUnaryNoMask_Zvk<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
+
+class VPatUnaryNoMask_VS_Zvk<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ LMULInfo vs2_lmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_"#vs2_lmul.MX)
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
+
+multiclass VPatUnaryV_V_NoMask_Zvk<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ def : VPatUnaryNoMask_Zvk<intrinsic # "_vv", instruction, "VV",
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatUnaryV_S_NoMaskVectorCrypto<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ foreach vti_vs2 = ZvkI32IntegerVectors<vti.LMul.MX>.vs2_types in
+ def : VPatUnaryNoMask_VS_Zvk<intrinsic # "_vs", instruction, "VS",
+ vti.Vector, vti_vs2.Vector, vti.Log2SEW,
+ vti.LMul, vti_vs2.LMul, vti.RegClass, vti_vs2.RegClass>;
+}
+
+multiclass VPatUnaryV_V_S_NoMask_Zvk<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ defm : VPatUnaryV_V_NoMask_Zvk<intrinsic, instruction, vtilist>;
+ defm : VPatUnaryV_S_NoMaskVectorCrypto<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatBinaryV_VV_NoMask<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VV",
+ vti.Vector, vti.Vector, vti.Vector,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryV_VI_NoMask<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand imm_type = tuimm5> {
+ foreach vti = vtilist in
+ def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VI",
+ vti.Vector, vti.Vector, XLenVT,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, imm_type>;
+}
+
+multiclass VPatBinaryV_VI_NoMaskTU<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand imm_type = tuimm5> {
+ foreach vti = vtilist in
+ def : VPatBinaryNoMaskTU<intrinsic, instruction # "_VI_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Log2SEW,
+ vti.RegClass, vti.RegClass, imm_type>;
+}
+
+multiclass VPatBinaryV_VV_NoMaskTU<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ def : VPatBinaryNoMaskTU<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.RegClass, vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryV_VX_VROTATE<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
+ foreach vti = vtilist in {
+ defvar kind = "V"#vti.ScalarSuffix;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinary<intrinsic,
+ !if(isSEWAware,
+ instruction#"_"#kind#"_"#vti.LMul.MX#"_E"#vti.SEW,
+ instruction#"_"#kind#"_"#vti.LMul.MX),
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryV_VI_VROL<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
+ foreach vti = vtilist in {
+ defvar Intr = !cast<Intrinsic>(intrinsic);
+ defvar Pseudo = !cast<Instruction>(
+ !if(isSEWAware, instruction#"_VI_"#vti.LMul.MX#"_E"#vti.SEW,
+ instruction#"_VI_"#vti.LMul.MX));
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : Pat<(vti.Vector (Intr (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (XLenVT uimm6:$rs1),
+ VLOpFrag)),
+ (Pseudo (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (InvRot64Imm uimm6:$rs1),
+ GPR:$vl, vti.Log2SEW, TU_MU)>;
+
+ defvar IntrMask = !cast<Intrinsic>(intrinsic#"_mask");
+ defvar PseudoMask = !cast<Instruction>(
+ !if(isSEWAware, instruction#"_VI_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK",
+ instruction#"_VI_"#vti.LMul.MX#"_MASK"));
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : Pat<(vti.Vector (IntrMask (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (XLenVT uimm6:$rs1),
+ (vti.Mask V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (PseudoMask (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (InvRot64Imm uimm6:$rs1),
+ (vti.Mask V0),
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+ }
+}
+
+multiclass VPatBinaryV_VV_VX_VROL<string intrinsic, string instruction,
+ string instruction2, list<VTypeInfo> vtilist>
+ : VPatBinaryV_VV<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VX_VROTATE<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VI_VROL<intrinsic, instruction2, vtilist>;
+
+multiclass VPatBinaryV_VV_VX_VI_VROR<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand ImmType = uimm6>
+ : VPatBinaryV_VV<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VX_VROTATE<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VI<intrinsic, instruction, vtilist, ImmType>;
+
+multiclass VPatBinaryW_VI_VWSLL<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defm : VPatBinary<intrinsic, instruction # "_VI_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, XLenVT, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, uimm5>;
+ }
+}
+
+multiclass VPatBinaryW_VX_VWSLL<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defvar kind = "V"#Vti.ScalarSuffix;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, XLenVT, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryW_VV_VX_VI_VWSLL<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+ : VPatBinaryW_VV<intrinsic, instruction, vtilist>,
+ VPatBinaryW_VX_VWSLL<intrinsic, instruction, vtilist>,
+ VPatBinaryW_VI_VWSLL<intrinsic, instruction, vtilist>;
+
+let Predicates = [HasStdExtZvbb] in {
+ defm : VPatUnaryV_V<"int_riscv_vbrev", "PseudoVBREV", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vclz", "PseudoVCLZ", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vctz", "PseudoVCTZ", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vcpopv", "PseudoVCPOP", AllIntegerVectors>;
+ defm : VPatBinaryW_VV_VX_VI_VWSLL<"int_riscv_vwsll", "PseudoVWSLL", AllWidenableIntVectors>;
+} // Predicates = [HasStdExtZvbb]
+
+let Predicates = [HasStdExtZvbc] in {
+ defm : VPatBinaryV_VV_VX<"int_riscv_vclmul", "PseudoVCLMUL", I64IntegerVectors>;
+ defm : VPatBinaryV_VV_VX<"int_riscv_vclmulh", "PseudoVCLMULH", I64IntegerVectors>;
+} // Predicates = [HasStdExtZvbc]
+
+let Predicates = [HasStdExtZvkb] in {
+ defm : VPatBinaryV_VV_VX<"int_riscv_vandn", "PseudoVANDN", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vbrev8", "PseudoVBREV8", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vrev8", "PseudoVREV8", AllIntegerVectors>;
+ defm : VPatBinaryV_VV_VX_VROL<"int_riscv_vrol", "PseudoVROL", "PseudoVROR", AllIntegerVectors>;
+ defm : VPatBinaryV_VV_VX_VI_VROR<"int_riscv_vror", "PseudoVROR", AllIntegerVectors>;
+} // Predicates = [HasStdExtZvkb]
+
+let Predicates = [HasStdExtZvkg] in {
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vghsh", "PseudoVGHSH", I32IntegerVectors>;
+ defm : VPatUnaryV_V_NoMask_Zvk<"int_riscv_vgmul", "PseudoVGMUL", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvkg]
+
+let Predicates = [HasStdExtZvkned] in {
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesdf", "PseudoVAESDF", I32IntegerVectors>;
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesdm", "PseudoVAESDM", I32IntegerVectors>;
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesef", "PseudoVAESEF", I32IntegerVectors>;
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesem", "PseudoVAESEM", I32IntegerVectors>;
+ defm : VPatBinaryV_VI_NoMaskTU<"int_riscv_vaeskf1", "PseudoVAESKF1", I32IntegerVectors>;
+ defm : VPatBinaryV_VI_NoMask<"int_riscv_vaeskf2", "PseudoVAESKF2", I32IntegerVectors>;
+ defm : VPatUnaryV_S_NoMaskVectorCrypto<"int_riscv_vaesz", "PseudoVAESZ", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvkned]
+
+let Predicates = [HasStdExtZvknha] in {
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvknha]
+
+let Predicates = [HasStdExtZvknhb] in {
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors>;
+} // Predicates = [HasStdExtZvknhb]
+
+let Predicates = [HasStdExtZvksed] in {
+ defm : VPatBinaryV_VI_NoMaskTU<"int_riscv_vsm4k", "PseudoVSM4K", I32IntegerVectors>;
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vsm4r", "PseudoVSM4R", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvksed]
+
+let Predicates = [HasStdExtZvksh] in {
+ defm : VPatBinaryV_VI_NoMask<"int_riscv_vsm3c", "PseudoVSM3C", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMaskTU<"int_riscv_vsm3me", "PseudoVSM3ME", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvksh]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 5dfd47a687e9..fcc20c17c6b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -104,13 +104,18 @@ public:
BranchRelaxationScratchFrameIndex = Index;
}
+ unsigned getReservedSpillsSize() const {
+ return LibCallStackSize + RVPushStackSize;
+ }
+
unsigned getLibCallStackSize() const { return LibCallStackSize; }
void setLibCallStackSize(unsigned Size) { LibCallStackSize = Size; }
bool useSaveRestoreLibCalls(const MachineFunction &MF) const {
// We cannot use fixed locations for the callee saved spill slots if the
// function uses a varargs save area, or is an interrupt handler.
- return MF.getSubtarget<RISCVSubtarget>().enableSaveRestore() &&
+ return !isPushable(MF) &&
+ MF.getSubtarget<RISCVSubtarget>().enableSaveRestore() &&
VarArgsSaveSize == 0 && !MF.getFrameInfo().hasTailCall() &&
!MF.getFunction().hasFnAttribute("interrupt");
}
@@ -127,10 +132,13 @@ public:
unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
- uint64_t isPushable(const MachineFunction &MF) const {
- return (!useSaveRestoreLibCalls(MF) &&
- MF.getSubtarget<RISCVSubtarget>().hasStdExtZcmp() &&
- !MF.getTarget().Options.DisableFramePointerElim(MF));
+ bool isPushable(const MachineFunction &MF) const {
+ // We cannot use fixed locations for the callee saved spill slots if the
+ // function uses a varargs save area.
+ // TODO: Use a seperate placement for vararg registers to enable Zcmp.
+ return MF.getSubtarget<RISCVSubtarget>().hasStdExtZcmp() &&
+ !MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ VarArgsSaveSize == 0;
}
int getRVPushRlist() const { return RVPushRlist; }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index da104657680a..02ea5270823d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -18,6 +18,101 @@
using namespace llvm;
+static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) {
+ if (!SecondMI.getOperand(1).isReg())
+ return false;
+
+ if (SecondMI.getOperand(1).getReg() != FirstDest)
+ return false;
+
+ // If the input is virtual make sure this is the only user.
+ if (FirstDest.isVirtual()) {
+ auto &MRI = SecondMI.getMF()->getRegInfo();
+ return MRI.hasOneNonDBGUse(FirstDest);
+ }
+
+ return SecondMI.getOperand(0).getReg() == FirstDest;
+}
+
+// Fuse load with add:
+// add rd, rs1, rs2
+// ld rd, 0(rd)
+static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::LD)
+ return false;
+
+ if (!SecondMI.getOperand(2).isImm())
+ return false;
+
+ if (SecondMI.getOperand(2).getImm() != 0)
+ return false;
+
+ // Given SecondMI, when FirstMI is unspecified, we must return
+ // if SecondMI may be part of a fused pair at all.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::ADD)
+ return true;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse these patterns:
+//
+// slli rd, rs1, 32
+// srli rd, rd, x
+// where 0 <= x <= 32
+//
+// and
+//
+// slli rd, rs1, 48
+// srli rd, rd, x
+static bool isShiftedZExt(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::SRLI)
+ return false;
+
+ if (!SecondMI.getOperand(2).isImm())
+ return false;
+
+ unsigned SRLIImm = SecondMI.getOperand(2).getImm();
+ bool IsShiftBy48 = SRLIImm == 48;
+ if (SRLIImm > 32 && !IsShiftBy48)
+ return false;
+
+ // Given SecondMI, when FirstMI is unspecified, we must return
+ // if SecondMI may be part of a fused pair at all.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::SLLI)
+ return false;
+
+ unsigned SLLIImm = FirstMI->getOperand(2).getImm();
+ if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32))
+ return false;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse AUIPC followed by ADDI
+// auipc rd, imm20
+// addi rd, rd, imm12
+static bool isAUIPCADDI(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::ADDI)
+ return false;
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::AUIPC)
+ return false;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
// Fuse LUI followed by ADDI or ADDIW.
// rd = imm[31:0] which decomposes to
// lui rd, imm[31:12]
@@ -27,7 +122,6 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
if (SecondMI.getOpcode() != RISCV::ADDI &&
SecondMI.getOpcode() != RISCV::ADDIW)
return false;
-
// Assume the 1st instr to be a wildcard if it is unspecified.
if (!FirstMI)
return true;
@@ -35,25 +129,7 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
if (FirstMI->getOpcode() != RISCV::LUI)
return false;
- // The first operand of ADDI might be a frame index.
- if (!SecondMI.getOperand(1).isReg())
- return false;
-
- Register FirstDest = FirstMI->getOperand(0).getReg();
-
- // Destination of LUI should be the ADDI(W) source register.
- if (SecondMI.getOperand(1).getReg() != FirstDest)
- return false;
-
- // If the input is virtual make sure this is the only user.
- if (FirstDest.isVirtual()) {
- auto &MRI = SecondMI.getMF()->getRegInfo();
- return MRI.hasOneNonDBGUse(FirstDest);
- }
-
- // If the FirstMI destination is non-virtual, it should match the SecondMI
- // destination.
- return SecondMI.getOperand(0).getReg() == FirstDest;
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
}
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
@@ -65,6 +141,15 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI))
return true;
+ if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI))
+ return true;
+
+ if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI))
+ return true;
+
+ if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI))
+ return true;
+
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index 841439bb732e..ff21fe1d4064 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -84,9 +84,7 @@ struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &Fn) override;
- RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {
- initializeRISCVMakeCompressibleOptPass(*PassRegistry::getPassRegistry());
- }
+ RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
};
@@ -271,7 +269,7 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI,
RegScavenger RS;
RS.enterBasicBlockEnd(MBB);
- RS.backward(MIs.back()->getIterator());
+ RS.backward(std::next(MIs.back()->getIterator()));
return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(),
/*RestoreAfter=*/false, /*SPAdj=*/0,
/*AllowSpill=*/false);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 855322b981fb..ae46d5554d35 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -19,7 +19,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
#include <optional>
-#include <set>
using namespace llvm;
#define DEBUG_TYPE "riscv-merge-base-offset"
@@ -94,7 +93,8 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
if (HiOp1.getTargetFlags() != ExpectedFlags)
return false;
- if (!(HiOp1.isGlobal() || HiOp1.isCPI()) || HiOp1.getOffset() != 0)
+ if (!(HiOp1.isGlobal() || HiOp1.isCPI() || HiOp1.isBlockAddress()) ||
+ HiOp1.getOffset() != 0)
return false;
Register HiDestReg = Hi.getOperand(0).getReg();
@@ -108,7 +108,8 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
const MachineOperand &LoOp2 = Lo->getOperand(2);
if (Hi.getOpcode() == RISCV::LUI) {
if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
- !(LoOp2.isGlobal() || LoOp2.isCPI()) || LoOp2.getOffset() != 0)
+ !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
+ LoOp2.getOffset() != 0)
return false;
} else {
assert(Hi.getOpcode() == RISCV::AUIPC);
@@ -120,8 +121,10 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
if (HiOp1.isGlobal()) {
LLVM_DEBUG(dbgs() << " Found lowered global address: "
<< *HiOp1.getGlobal() << "\n");
- } else {
- assert(HiOp1.isCPI());
+ } else if (HiOp1.isBlockAddress()) {
+ LLVM_DEBUG(dbgs() << " Found lowered basic address: "
+ << *HiOp1.getBlockAddress() << "\n");
+ } else if (HiOp1.isCPI()) {
LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex()
<< "\n");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
index 6c1b0cf5ca7f..3c5462057b28 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVMoveMerger.cpp - RISCV move merge pass -----------------------===//
+//===-- RISCVMoveMerger.cpp - RISC-V move merge pass ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -22,9 +22,7 @@ namespace {
struct RISCVMoveMerge : public MachineFunctionPass {
static char ID;
- RISCVMoveMerge() : MachineFunctionPass(ID) {
- initializeRISCVMoveMergePass(*PassRegistry::getPassRegistry());
- }
+ RISCVMoveMerge() : MachineFunctionPass(ID) {}
const RISCVInstrInfo *TII;
const TargetRegisterInfo *TRI;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 7014755b6706..2c2b34bb5b77 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -12,16 +12,21 @@
// extended bits aren't consumed or because the input was already sign extended
// by an earlier instruction.
//
-// Then it removes the -w suffix from each addiw and slliw instructions
-// whenever all users are dependent only on the lower word of the result of the
-// instruction. We do this only for addiw, slliw, and mulw because the -w forms
-// are less compressible.
+// Then it removes the -w suffix from opw instructions whenever all users are
+// dependent only on the lower word of the result of the instruction.
+// The cases handled are:
+// * addw because c.add has a larger register encoding than c.addw.
+// * addiw because it helps reduce test differences between RV32 and RV64
+// w/o being a pessimization.
+// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
+// * slliw because c.slliw doesn't exist and c.slli does
//
//===---------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -48,9 +53,7 @@ class RISCVOptWInstrs : public MachineFunctionPass {
public:
static char ID;
- RISCVOptWInstrs() : MachineFunctionPass(ID) {
- initializeRISCVOptWInstrsPass(*PassRegistry::getPassRegistry());
- }
+ RISCVOptWInstrs() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII,
@@ -76,6 +79,29 @@ FunctionPass *llvm::createRISCVOptWInstrsPass() {
return new RISCVOptWInstrs();
}
+static bool vectorPseudoHasAllNBitUsers(const MachineOperand &UserOp,
+ unsigned Bits) {
+ const MachineInstr &MI = *UserOp.getParent();
+ unsigned MCOpcode = RISCV::getRVVMCOpcode(MI.getOpcode());
+
+ if (!MCOpcode)
+ return false;
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ const uint64_t TSFlags = MCID.TSFlags;
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return false;
+ assert(RISCVII::hasVLOp(TSFlags));
+ const unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MCID)).getImm();
+
+ if (UserOp.getOperandNo() == RISCVII::getVLOpNum(MCID))
+ return false;
+
+ auto NumDemandedBits =
+ RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
+ return NumDemandedBits && Bits >= *NumDemandedBits;
+}
+
// Checks if all users only demand the lower \p OrigBits of the original
// instruction's result.
// TODO: handle multiple interdependent transformations
@@ -100,12 +126,14 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
if (MI->getNumExplicitDefs() != 1)
return false;
- for (auto &UserOp : MRI.use_operands(MI->getOperand(0).getReg())) {
+ for (auto &UserOp : MRI.use_nodbg_operands(MI->getOperand(0).getReg())) {
const MachineInstr *UserMI = UserOp.getParent();
unsigned OpIdx = UserOp.getOperandNo();
switch (UserMI->getOpcode()) {
default:
+ if (vectorPseudoHasAllNBitUsers(UserOp, Bits))
+ break;
return false;
case RISCV::ADDIW:
@@ -283,6 +311,8 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
+ case RISCV::CZERO_EQZ:
+ case RISCV::CZERO_NEZ:
case RISCV::VT_MASKC:
case RISCV::VT_MASKCN:
if (OpIdx != 1)
@@ -327,9 +357,27 @@ static bool isSignExtendingOpW(const MachineInstr &MI,
// An ORI with an >11 bit immediate (negative 12-bit) will set bits 63:11.
case RISCV::ORI:
return !isUInt<11>(MI.getOperand(2).getImm());
+ // A bseti with X0 is sign extended if the immediate is less than 31.
+ case RISCV::BSETI:
+ return MI.getOperand(2).getImm() < 31 &&
+ MI.getOperand(1).getReg() == RISCV::X0;
// Copying from X0 produces zero.
case RISCV::COPY:
return MI.getOperand(1).getReg() == RISCV::X0;
+ case RISCV::PseudoAtomicLoadNand32:
+ return true;
+ case RISCV::PseudoVMV_X_S_MF8:
+ case RISCV::PseudoVMV_X_S_MF4:
+ case RISCV::PseudoVMV_X_S_MF2:
+ case RISCV::PseudoVMV_X_S_M1:
+ case RISCV::PseudoVMV_X_S_M2:
+ case RISCV::PseudoVMV_X_S_M4:
+ case RISCV::PseudoVMV_X_S_M8: {
+ // vmv.x.s has at least 33 sign bits if log2(sew) <= 5.
+ int64_t Log2SEW = MI.getOperand(2).getImm();
+ assert(Log2SEW >= 3 && Log2SEW <= 6 && "Unexpected Log2SEW");
+ return Log2SEW <= 5;
+ }
}
return false;
@@ -348,6 +396,11 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
if (!SrcMI)
return false;
+ // Code assumes the register is operand 0.
+ // TODO: Maybe the worklist should store register?
+ if (!SrcMI->getOperand(0).isReg() ||
+ SrcMI->getOperand(0).getReg() != SrcReg)
+ return false;
// Add SrcMI to the worklist.
Worklist.push_back(SrcMI);
return true;
@@ -446,9 +499,16 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
break;
case RISCV::PseudoCCADDW:
+ case RISCV::PseudoCCADDIW:
case RISCV::PseudoCCSUBW:
- // Returns operand 4 or an ADDW/SUBW of operands 5 and 6. We only need to
- // check if operand 4 is sign extended.
+ case RISCV::PseudoCCSLLW:
+ case RISCV::PseudoCCSRLW:
+ case RISCV::PseudoCCSRAW:
+ case RISCV::PseudoCCSLLIW:
+ case RISCV::PseudoCCSRLIW:
+ case RISCV::PseudoCCSRAIW:
+ // Returns operand 4 or an ADDW/SUBW/etc. of operands 5 and 6. We only
+ // need to check if operand 4 is sign extended.
if (!AddRegDefToWorkList(MI->getOperand(4).getReg()))
return false;
break;
@@ -504,6 +564,8 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
break;
}
+ case RISCV::CZERO_EQZ:
+ case RISCV::CZERO_NEZ:
case RISCV::VT_MASKC:
case RISCV::VT_MASKCN:
// Instructions return zero or operand 1. Result is sign extended if
@@ -567,25 +629,23 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- for (auto I = MBB.begin(), IE = MBB.end(); I != IE;) {
- MachineInstr *MI = &*I++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// We're looking for the sext.w pattern ADDIW rd, rs1, 0.
- if (!RISCV::isSEXT_W(*MI))
+ if (!RISCV::isSEXT_W(MI))
continue;
- Register SrcReg = MI->getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
SmallPtrSet<MachineInstr *, 4> FixableDefs;
// If all users only use the lower bits, this sext.w is redundant.
// Or if all definitions reaching MI sign-extend their output,
// then sext.w is redundant.
- if (!hasAllWUsers(*MI, ST, MRI) &&
+ if (!hasAllWUsers(MI, ST, MRI) &&
!isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
continue;
- Register DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
continue;
@@ -603,7 +663,7 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
MRI.replaceRegWith(DstReg, SrcReg);
MRI.clearKillFlags(SrcReg);
- MI->eraseFromParent();
+ MI.eraseFromParent();
++NumRemovedSExtW;
MadeChange = true;
}
@@ -621,14 +681,13 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- for (auto I = MBB.begin(), IE = MBB.end(); I != IE; ++I) {
- MachineInstr &MI = *I;
-
+ for (MachineInstr &MI : MBB) {
unsigned Opc;
switch (MI.getOpcode()) {
default:
continue;
case RISCV::ADDW: Opc = RISCV::ADD; break;
+ case RISCV::ADDIW: Opc = RISCV::ADDI; break;
case RISCV::MULW: Opc = RISCV::MUL; break;
case RISCV::SLLIW: Opc = RISCV::SLLI; break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
new file mode 100644
index 000000000000..57b473645ae7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
@@ -0,0 +1,116 @@
+//===-- RISCVPostRAExpandPseudoInsts.cpp - Expand pseudo instrs ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands the pseudo instruction pseudolisimm32
+// into target instructions. This pass should be run during the post-regalloc
+// passes, before post RA scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVMatInt.h"
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define RISCV_POST_RA_EXPAND_PSEUDO_NAME \
+ "RISC-V post-regalloc pseudo instruction expansion pass"
+
+namespace {
+
+class RISCVPostRAExpandPseudo : public MachineFunctionPass {
+public:
+ const RISCVInstrInfo *TII;
+ static char ID;
+
+ RISCVPostRAExpandPseudo() : MachineFunctionPass(ID) {
+ initializeRISCVPostRAExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return RISCV_POST_RA_EXPAND_PSEUDO_NAME;
+ }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+};
+
+char RISCVPostRAExpandPseudo::ID = 0;
+
+bool RISCVPostRAExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+ return Modified;
+}
+
+bool RISCVPostRAExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ switch (MBBI->getOpcode()) {
+ case RISCV::PseudoMovImm:
+ return expandMovImm(MBB, MBBI);
+ default:
+ return false;
+ }
+}
+
+bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ int64_t Val = MBBI->getOperand(1).getImm();
+
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Val, MBB.getParent()->getSubtarget());
+ assert(!Seq.empty());
+
+ Register DstReg = MBBI->getOperand(0).getReg();
+ bool DstIsDead = MBBI->getOperand(0).isDead();
+ bool Renamable = MBBI->getOperand(0).isRenamable();
+
+ TII->movImm(MBB, MBBI, DL, DstReg, Val, MachineInstr::NoFlags, Renamable,
+ DstIsDead);
+
+ MBBI->eraseFromParent();
+ return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",
+ RISCV_POST_RA_EXPAND_PSEUDO_NAME, false, false)
+namespace llvm {
+
+FunctionPass *createRISCVPostRAExpandPseudoPass() {
+ return new RISCVPostRAExpandPseudo();
+}
+
+} // end of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
index 01291001cd7c..58989fd716fa 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -10,6 +10,35 @@
// RISC-V processors supported.
//===----------------------------------------------------------------------===//
+class RISCVTuneInfo {
+ bits<8> PrefFunctionAlignment = 1;
+ bits<8> PrefLoopAlignment = 1;
+
+ // Information needed by LoopDataPrefetch.
+ bits<16> CacheLineSize = 0;
+ bits<16> PrefetchDistance = 0;
+ bits<16> MinPrefetchStride = 1;
+ bits<32> MaxPrefetchIterationsAhead = -1;
+
+ bits<32> MinimumJumpTableEntries = 5;
+}
+
+def RISCVTuneInfoTable : GenericTable {
+ let FilterClass = "RISCVTuneInfo";
+ let CppTypeName = "RISCVTuneInfo";
+ let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment",
+ "CacheLineSize", "PrefetchDistance",
+ "MinPrefetchStride", "MaxPrefetchIterationsAhead",
+ "MinimumJumpTableEntries"];
+}
+
+def getRISCVTuneInfo : SearchIndex {
+ let Table = RISCVTuneInfoTable;
+ let Key = ["Name"];
+}
+
+class GenericTuneInfo: RISCVTuneInfo;
+
class RISCVProcessorModel<string n,
SchedMachineModel m,
list<SubtargetFeature> f,
@@ -27,13 +56,15 @@ class RISCVTuneProcessorModel<string n,
def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32",
NoSchedModel,
- [Feature32Bit]>;
+ [Feature32Bit]>,
+ GenericTuneInfo;
def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64",
NoSchedModel,
- [Feature64Bit]>;
+ [Feature64Bit]>,
+ GenericTuneInfo;
// Support generic for compatibility with other targets. The triple will be used
// to change to the appropriate rv32/rv64 version.
-def : ProcessorModel<"generic", NoSchedModel, []>;
+def : ProcessorModel<"generic", NoSchedModel, []>, GenericTuneInfo;
def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
RocketModel,
@@ -201,3 +232,47 @@ def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max",
FeatureStdExtM,
FeatureStdExtC],
[TuneNoDefaultUnroll]>;
+
+def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
+ NoSchedModel,
+ [Feature64Bit,
+ FeatureStdExtZifencei,
+ FeatureStdExtZicsr,
+ FeatureStdExtZicntr,
+ FeatureStdExtZihpm,
+ FeatureStdExtZihintpause,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC,
+ FeatureStdExtZba,
+ FeatureStdExtZbb,
+ FeatureStdExtZbc,
+ FeatureStdExtZbs,
+ FeatureStdExtZicbom,
+ FeatureStdExtZicbop,
+ FeatureStdExtZicboz,
+ FeatureVendorXVentanaCondOps],
+ [TuneVeyronFusions]>;
+
+def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
+ NoSchedModel,
+ [Feature64Bit,
+ FeatureStdExtZicsr,
+ FeatureStdExtZifencei,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC,
+ FeatureStdExtZba,
+ FeatureStdExtZbb,
+ FeatureStdExtZbc,
+ FeatureStdExtZbs,
+ FeatureStdExtZkn,
+ FeatureStdExtZksed,
+ FeatureStdExtZksh,
+ FeatureStdExtSvinval,
+ FeatureStdExtZicbom,
+ FeatureStdExtZicboz]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
index f885adca669f..009dcf57f46d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
@@ -1,4 +1,4 @@
-//===------- RISCVPushPopOptimizer.cpp - RISCV Push/Pop opt. pass ---------===//
+//===------- RISCVPushPopOptimizer.cpp - RISC-V Push/Pop opt. pass --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,9 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains a pass that modifies PUSH/POP instructions from Zca
-// standard to use their non prolog/epilog related functionalities
-// and generates POPRET instruction.
+// This file contains a pass that replaces Zcmp POP instructions with
+// POPRET[Z] where possible.
//
//===----------------------------------------------------------------------===//
@@ -23,9 +22,7 @@ namespace {
struct RISCVPushPopOpt : public MachineFunctionPass {
static char ID;
- RISCVPushPopOpt() : MachineFunctionPass(ID) {
- initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry());
- }
+ RISCVPushPopOpt() : MachineFunctionPass(ID) {}
const RISCVInstrInfo *TII;
const TargetRegisterInfo *TRI;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
index fed3fa2987e5..735fc1350c00 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
@@ -9,7 +9,8 @@
// This file implements a function pass that initializes undef vector value to
// temporary pseudo instruction and remove it in expandpseudo pass to prevent
// register allocation resulting in a constraint violated result for vector
-// instruction.
+// instruction. It also rewrites the NoReg tied operand back to an
+// IMPLICIT_DEF.
//
// RISC-V vector instruction has register overlapping constraint for certain
// instructions, and will cause illegal instruction trap if violated, we use
@@ -30,10 +31,18 @@
//
// See also: https://github.com/llvm/llvm-project/issues/50157
//
+// Additionally, this pass rewrites tied operands of vector instructions
+// from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of
+// operands to the above.) We use NoReg to side step a MachineCSE
+// optimization quality problem but need to convert back before
+// TwoAddressInstruction. See pr64282 for context.
+//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/DetectDeadLanes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
@@ -49,12 +58,14 @@ class RISCVInitUndef : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetRegisterInfo *TRI;
+ // Newly added vregs, assumed to be fully rewritten
+ SmallSet<Register, 8> NewRegs;
+ SmallVector<MachineInstr *, 8> DeadInsts;
+
public:
static char ID;
- RISCVInitUndef() : MachineFunctionPass(ID) {
- initializeRISCVInitUndefPass(*PassRegistry::getPassRegistry());
- }
+ RISCVInitUndef() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -67,13 +78,13 @@ public:
private:
bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
const DeadLaneDetector &DLD);
- bool handleImplicitDef(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &Inst);
bool isVectorRegClass(const Register R);
const TargetRegisterClass *
getVRLargestSuperClass(const TargetRegisterClass *RC) const;
bool handleSubReg(MachineFunction &MF, MachineInstr &MI,
const DeadLaneDetector &DLD);
+ bool fixupIllOperand(MachineInstr *MI, MachineOperand &MO);
+ bool handleReg(MachineInstr *MI);
};
} // end anonymous namespace
@@ -118,65 +129,38 @@ static unsigned getUndefInitOpcode(unsigned RegClassID) {
}
}
-bool RISCVInitUndef::handleImplicitDef(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &Inst) {
- const TargetRegisterInfo &TRI =
- *MBB.getParent()->getSubtarget().getRegisterInfo();
-
- assert(Inst->getOpcode() == TargetOpcode::IMPLICIT_DEF);
-
- Register Reg = Inst->getOperand(0).getReg();
- if (!Reg.isVirtual())
- return false;
-
- bool NeedPseudoInit = false;
- SmallVector<MachineOperand *, 1> UseMOs;
- for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
- MachineInstr *UserMI = MO.getParent();
-
- bool HasEarlyClobber = false;
- bool TiedToDef = false;
- for (MachineOperand &UserMO : UserMI->operands()) {
- if (!UserMO.isReg())
- continue;
- if (UserMO.isEarlyClobber())
- HasEarlyClobber = true;
- if (UserMO.isUse() && UserMO.isTied() &&
- TRI.regsOverlap(UserMO.getReg(), Reg))
- TiedToDef = true;
- }
- if (HasEarlyClobber && !TiedToDef) {
- NeedPseudoInit = true;
- UseMOs.push_back(&MO);
- }
- }
-
- if (!NeedPseudoInit)
- return false;
-
- LLVM_DEBUG(
- dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register "
- << Reg << '\n');
-
- unsigned RegClassID = getVRLargestSuperClass(MRI->getRegClass(Reg))->getID();
- unsigned Opcode = getUndefInitOpcode(RegClassID);
-
- BuildMI(MBB, Inst, Inst->getDebugLoc(), TII->get(Opcode), Reg);
-
- Inst = MBB.erase(Inst);
-
- for (auto MO : UseMOs)
- MO->setIsUndef(false);
-
- return true;
-}
-
static bool isEarlyClobberMI(MachineInstr &MI) {
return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) {
return DefMO.isReg() && DefMO.isEarlyClobber();
});
}
+static bool findImplictDefMIFromReg(Register Reg, MachineRegisterInfo *MRI) {
+ for (auto &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getOpcode() == TargetOpcode::IMPLICIT_DEF)
+ return true;
+ }
+ return false;
+}
+
+bool RISCVInitUndef::handleReg(MachineInstr *MI) {
+ bool Changed = false;
+ for (auto &UseMO : MI->uses()) {
+ if (!UseMO.isReg())
+ continue;
+ if (UseMO.isTied())
+ continue;
+ if (!UseMO.getReg().isVirtual())
+ continue;
+ if (!isVectorRegClass(UseMO.getReg()))
+ continue;
+
+ if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI))
+ Changed |= fixupIllOperand(MI, UseMO);
+ }
+ return Changed;
+}
+
bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
const DeadLaneDetector &DLD) {
bool Changed = false;
@@ -186,8 +170,12 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
continue;
if (!UseMO.getReg().isVirtual())
continue;
+ if (UseMO.isTied())
+ continue;
Register Reg = UseMO.getReg();
+ if (NewRegs.count(Reg))
+ continue;
DeadLaneDetector::VRegInfo Info =
DLD.getVRegInfo(Register::virtReg2Index(Reg));
@@ -235,18 +223,53 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
return Changed;
}
+bool RISCVInitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) {
+
+ LLVM_DEBUG(
+ dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register "
+ << MO.getReg() << '\n');
+
+ const TargetRegisterClass *TargetRegClass =
+ getVRLargestSuperClass(MRI->getRegClass(MO.getReg()));
+ unsigned Opcode = getUndefInitOpcode(TargetRegClass->getID());
+ Register NewReg = MRI->createVirtualRegister(TargetRegClass);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(Opcode), NewReg);
+ MO.setReg(NewReg);
+ if (MO.isUndef())
+ MO.setIsUndef(false);
+ return true;
+}
+
bool RISCVInitUndef::processBasicBlock(MachineFunction &MF,
MachineBasicBlock &MBB,
const DeadLaneDetector &DLD) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
MachineInstr &MI = *I;
- if (ST->enableSubRegLiveness() && isEarlyClobberMI(MI))
- Changed |= handleSubReg(MF, MI, DLD);
- if (MI.isImplicitDef()) {
- auto DstReg = MI.getOperand(0).getReg();
- if (isVectorRegClass(DstReg))
- Changed |= handleImplicitDef(MBB, I);
+
+ // If we used NoReg to represent the passthru, switch this back to being
+ // an IMPLICIT_DEF before TwoAddressInstructions.
+ unsigned UseOpIdx;
+ if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+ MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+ if (UseMO.getReg() == RISCV::NoRegister) {
+ const TargetRegisterClass *RC =
+ TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
+ Register NewDest = MRI->createVirtualRegister(RC);
+ // We don't have a way to update dead lanes, so keep track of the
+ // new register so that we avoid querying it later.
+ NewRegs.insert(NewDest);
+ BuildMI(MBB, I, I->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), NewDest);
+ UseMO.setReg(NewDest);
+ Changed = true;
+ }
+ }
+
+ if (isEarlyClobberMI(MI)) {
+ if (ST->enableSubRegLiveness())
+ Changed |= handleSubReg(MF, MI, DLD);
+ Changed |= handleReg(&MI);
}
}
return Changed;
@@ -268,6 +291,10 @@ bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &BB : MF)
Changed |= processBasicBlock(MF, BB, DLD);
+ for (auto *DeadMI : DeadInsts)
+ DeadMI->eraseFromParent();
+ DeadInsts.clear();
+
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index c3ba4c1e7fdb..a3c19115bd31 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -14,6 +14,7 @@
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -84,10 +85,11 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
const RISCVFrameLowering *TFI = getFrameLowering(MF);
BitVector Reserved(getNumRegs());
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
// Mark any registers requested to be reserved as such
for (size_t Reg = 0; Reg < getNumRegs(); Reg++) {
- if (MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(Reg))
+ if (Subtarget.isRegisterReservedByUser(Reg))
markSuperRegs(Reserved, Reg);
}
@@ -118,6 +120,13 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, RISCV::FRM);
markSuperRegs(Reserved, RISCV::FFLAGS);
+ if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
+ if (Subtarget.isRVE())
+ report_fatal_error("Graal reserved registers do not exist in RVE");
+ markSuperRegs(Reserved, RISCV::X23);
+ markSuperRegs(Reserved, RISCV::X27);
+ }
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
@@ -132,7 +141,7 @@ const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const {
}
// Frame indexes representing locations of CSRs which are given a fixed location
-// by save/restore libcalls.
+// by save/restore libcalls or Zcmp Push/Pop.
static const std::pair<unsigned, int> FixedCSRFIMap[] = {
{/*ra*/ RISCV::X1, -1},
{/*s0*/ RISCV::X8, -2},
@@ -290,12 +299,20 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
"Unexpected subreg numbering");
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
+ // Optimize for constant VLEN.
+ const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
+ if (STI.getRealMinVLen() == STI.getRealMaxVLen()) {
+ const int64_t VLENB = STI.getRealMinVLen() / 8;
+ int64_t Offset = VLENB * LMUL;
+ STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
+ } else {
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
+ uint32_t ShiftAmount = Log2_32(LMUL);
+ if (ShiftAmount != 0)
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
+ .addReg(VL)
+ .addImm(ShiftAmount);
+ }
Register SrcReg = II->getOperand(0).getReg();
Register Base = II->getOperand(1).getReg();
@@ -359,12 +376,20 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
"Unexpected subreg numbering");
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
+ // Optimize for constant VLEN.
+ const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
+ if (STI.getRealMinVLen() == STI.getRealMaxVLen()) {
+ const int64_t VLENB = STI.getRealMinVLen() / 8;
+ int64_t Offset = VLENB * LMUL;
+ STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
+ } else {
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
+ uint32_t ShiftAmount = Log2_32(LMUL);
+ if (ShiftAmount != 0)
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
+ .addReg(VL)
+ .addImm(ShiftAmount);
+ }
Register DestReg = II->getOperand(0).getReg();
Register Base = II->getOperand(1).getReg();
@@ -435,9 +460,16 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// offset can by construction, at worst, a LUI and a ADD.
int64_t Val = Offset.getFixed();
int64_t Lo12 = SignExtend64<12>(Val);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
- Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
- Offset.getScalable());
+ if ((MI.getOpcode() == RISCV::PREFETCH_I ||
+ MI.getOpcode() == RISCV::PREFETCH_R ||
+ MI.getOpcode() == RISCV::PREFETCH_W) &&
+ (Lo12 & 0b11111) != 0)
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+ else {
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
+ Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
+ Offset.getScalable());
+ }
}
}
@@ -655,6 +687,14 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &) const {
if (RC == &RISCV::VMV0RegClass)
return &RISCV::VRRegClass;
+ if (RC == &RISCV::VRNoV0RegClass)
+ return &RISCV::VRRegClass;
+ if (RC == &RISCV::VRM2NoV0RegClass)
+ return &RISCV::VRM2RegClass;
+ if (RC == &RISCV::VRM4NoV0RegClass)
+ return &RISCV::VRM4RegClass;
+ if (RC == &RISCV::VRM8NoV0RegClass)
+ return &RISCV::VRM8RegClass;
return RC;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 0b17f54431ef..c59c9b294d79 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -165,15 +165,6 @@ def SP : GPRRegisterClass<(add X2)>;
def SR07 : GPRRegisterClass<(add (sequence "X%u", 8, 9),
(sequence "X%u", 18, 23))>;
-// Registers saveable by PUSH/POP instruction in Zcmp extension
-def PGPR : RegisterClass<"RISCV", [XLenVT], 32, (add
- (sequence "X%u", 8, 9),
- (sequence "X%u", 18, 27),
- X1
- )> {
- let RegInfos = XLenRI;
-}
-
// Floating point registers
let RegAltNameIndices = [ABIRegAltName] in {
def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>;
@@ -313,6 +304,13 @@ defvar vfloat16m2_t = nxv8f16;
defvar vfloat16m4_t = nxv16f16;
defvar vfloat16m8_t = nxv32f16;
+defvar vbfloat16mf4_t = nxv1bf16;
+defvar vbfloat16mf2_t = nxv2bf16;
+defvar vbfloat16m1_t = nxv4bf16;
+defvar vbfloat16m2_t = nxv8bf16;
+defvar vbfloat16m4_t = nxv16bf16;
+defvar vbfloat16m8_t = nxv32bf16;
+
defvar vfloat32mf2_t = nxv1f32;
defvar vfloat32m1_t = nxv2f32;
defvar vfloat32m2_t = nxv4f32;
@@ -339,20 +337,21 @@ defvar LMULList = [1, 2, 4, 8];
// Utility classes for segment load/store.
//===----------------------------------------------------------------------===//
// The set of legal NF for LMUL = lmul.
-// LMUL == 1, NF = 2, 3, 4, 5, 6, 7, 8
+// LMUL <= 1, NF = 2, 3, 4, 5, 6, 7, 8
// LMUL == 2, NF = 2, 3, 4
// LMUL == 4, NF = 2
+// LMUL == 8, no legal NF
class NFList<int lmul> {
- list<int> L = !cond(!eq(lmul, 1): [2, 3, 4, 5, 6, 7, 8],
- !eq(lmul, 2): [2, 3, 4],
+ list<int> L = !cond(!eq(lmul, 8): [],
!eq(lmul, 4): [2],
- !eq(lmul, 8): []);
+ !eq(lmul, 2): [2, 3, 4],
+ true: [2, 3, 4, 5, 6, 7, 8]);
}
// Generate [start, end) SubRegIndex list.
class SubRegSet<int nf, int lmul> {
list<SubRegIndex> L = !foldl([]<SubRegIndex>,
- [0, 1, 2, 3, 4, 5, 6, 7],
+ !range(0, 8),
AccList, i,
!listconcat(AccList,
!if(!lt(i, nf),
@@ -380,15 +379,9 @@ class IndexSet<int tuple_index, int nf, int lmul, bit isV0 = false> {
!foldl([]<int>,
!if(isV0, [0],
!cond(
- !eq(lmul, 1):
- [8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23,
- 24, 25, 26, 27, 28, 29, 30, 31,
- 1, 2, 3, 4, 5, 6, 7],
- !eq(lmul, 2):
- [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3],
- !eq(lmul, 4):
- [2, 3, 4, 5, 6, 7, 1])),
+ !eq(lmul, 1): !listconcat(!range(8, 32), !range(1, 8)),
+ !eq(lmul, 2): !listconcat(!range(4, 16), !range(1, 4)),
+ !eq(lmul, 4): !listconcat(!range(2, 8), !range(1, 2)))),
L, i,
!listconcat(L,
!if(!le(!mul(!add(i, tuple_index), lmul),
@@ -418,12 +411,11 @@ class VRegList<list<dag> LIn, int start, int nf, int lmul, bit isV0> {
}
// Vector registers
-foreach Index = 0-31 in {
+foreach Index = !range(0, 32, 1) in {
def V#Index : RISCVReg<Index, "v"#Index>, DwarfRegNum<[!add(Index, 96)]>;
}
-foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
- 24, 26, 28, 30] in {
+foreach Index = !range(0, 32, 2) in {
def V#Index#M2 : RISCVRegWithSubRegs<Index, "v"#Index,
[!cast<Register>("V"#Index),
!cast<Register>("V"#!add(Index, 1))]>,
@@ -432,7 +424,7 @@ foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
}
}
-foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in {
+foreach Index = !range(0, 32, 4) in {
def V#Index#M4 : RISCVRegWithSubRegs<Index, "v"#Index,
[!cast<Register>("V"#Index#"M2"),
!cast<Register>("V"#!add(Index, 2)#"M2")]>,
@@ -441,7 +433,7 @@ foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in {
}
}
-foreach Index = [0, 8, 16, 24] in {
+foreach Index = !range(0, 32, 8) in {
def V#Index#M8 : RISCVRegWithSubRegs<Index, "v"#Index,
[!cast<Register>("V"#Index#"M4"),
!cast<Register>("V"#!add(Index, 4)#"M4")]>,
@@ -461,6 +453,7 @@ def VLENB : RISCVReg<0, "vlenb">,
def VCSR : RegisterClass<"RISCV", [XLenVT], 32,
(add VTYPE, VL, VLENB)> {
let RegInfos = XLenRI;
+ let isAllocatable = 0;
}
@@ -488,19 +481,23 @@ defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
vbool32_t, vbool64_t];
defvar VM1VTs = [vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
- vfloat16m1_t, vfloat32m1_t, vfloat64m1_t,
- vint8mf2_t, vint8mf4_t, vint8mf8_t,
- vint16mf2_t, vint16mf4_t, vint32mf2_t,
- vfloat16mf4_t, vfloat16mf2_t, vfloat32mf2_t];
+ vbfloat16m1_t, vfloat16m1_t, vfloat32m1_t,
+ vfloat64m1_t, vint8mf2_t, vint8mf4_t, vint8mf8_t,
+ vint16mf2_t, vint16mf4_t, vint32mf2_t,
+ vfloat16mf4_t, vfloat16mf2_t, vbfloat16mf4_t,
+ vbfloat16mf2_t, vfloat32mf2_t];
defvar VM2VTs = [vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t,
- vfloat16m2_t, vfloat32m2_t, vfloat64m2_t];
+ vfloat16m2_t, vbfloat16m2_t,
+ vfloat32m2_t, vfloat64m2_t];
defvar VM4VTs = [vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t,
- vfloat16m4_t, vfloat32m4_t, vfloat64m4_t];
+ vfloat16m4_t, vbfloat16m4_t,
+ vfloat32m4_t, vfloat64m4_t];
defvar VM8VTs = [vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
- vfloat16m8_t, vfloat32m8_t, vfloat64m8_t];
+ vfloat16m8_t, vbfloat16m8_t,
+ vfloat32m8_t, vfloat64m8_t];
def VR : VReg<!listconcat(VM1VTs, VMaskVTs),
(add (sequence "V%u", 8, 31),
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index b14cdd40f154..bb9dfe5d0124 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -70,11 +70,11 @@ def : WriteRes<WriteIMul32, [RocketUnitIMul]>;
// Worst case latency is used.
def : WriteRes<WriteIDiv32, [RocketUnitIDiv]> {
let Latency = 34;
- let ResourceCycles = [34];
+ let ReleaseAtCycles = [34];
}
def : WriteRes<WriteIDiv, [RocketUnitIDiv]> {
let Latency = 33;
- let ResourceCycles = [33];
+ let ReleaseAtCycles = [33];
}
// Memory
@@ -157,16 +157,16 @@ def : WriteRes<WriteFMA64, [RocketUnitFPALU]>;
// FP division
// FP division unit on Rocket is not pipelined, so set resource cycles to latency.
-let Latency = 20, ResourceCycles = [20] in {
+let Latency = 20, ReleaseAtCycles = [20] in {
def : WriteRes<WriteFDiv32, [RocketUnitFPDivSqrt]>;
def : WriteRes<WriteFDiv64, [RocketUnitFPDivSqrt]>;
}
// FP square root unit on Rocket is not pipelined, so set resource cycles to latency.
def : WriteRes<WriteFSqrt32, [RocketUnitFPDivSqrt]> { let Latency = 20;
- let ResourceCycles = [20]; }
+ let ReleaseAtCycles = [20]; }
def : WriteRes<WriteFSqrt64, [RocketUnitFPDivSqrt]> { let Latency = 25;
- let ResourceCycles = [25]; }
+ let ReleaseAtCycles = [25]; }
// Others
def : WriteRes<WriteCSR, []>;
@@ -206,7 +206,9 @@ def : ReadAdvance<ReadFAdd64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
def : ReadAdvance<ReadFSqrt32, 0>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index e22c05b30b7f..45783d482f3b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -154,12 +154,12 @@ class SiFive7GetReductionCycles<string mx, int sew> {
!eq(mx, "MF8") : 1
);
int c = !add(
- !div(TwoTimesLMUL, DLEN),
+ TwoTimesLMUL,
!mul(5, !add(4, !logtwo(!div(DLEN, sew))))
);
}
-/// Cycles for ordered reductions take approximatley 5*VL cycles
+/// Cycles for ordered reductions take approximatley 6*VL cycles
class SiFive7GetOrderedReductionCycles<string mx, int sew> {
defvar VLEN = 512;
// (VLEN * LMUL) / SEW
@@ -172,7 +172,7 @@ class SiFive7GetOrderedReductionCycles<string mx, int sew> {
!eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
!eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
);
- int c = !mul(5, VLUpperBound);
+ int c = !mul(6, VLUpperBound);
}
class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
@@ -182,6 +182,8 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
WriteSHXADD, WriteSHXADD32,
WriteRotateImm, WriteRotateImm32,
WriteRotateReg, WriteRotateReg32,
+ WriteSingleBit, WriteSingleBitImm,
+ WriteBEXT, WriteBEXTI,
WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
WriteCPOP, WriteCPOP32,
WriteREV8, WriteORCB, WriteSFB,
@@ -206,20 +208,26 @@ def SiFive7Model : SchedMachineModel {
// Pipe A can handle memory, integer alu and vector operations.
// Pipe B can handle integer alu, control flow, integer multiply and divide,
// and floating point computation.
-// Pipe V can handle the V extension.
+// The V pipeline is modeled by the VCQ, VA, VL, and VS resources.
let SchedModel = SiFive7Model in {
let BufferSize = 0 in {
def SiFive7PipeA : ProcResource<1>;
def SiFive7PipeB : ProcResource<1>;
-def SiFive7PipeV : ProcResource<1>;
-}
-
-let BufferSize = 1 in {
-def SiFive7IDiv : ProcResource<1> { let Super = SiFive7PipeB; } // Int Division
-def SiFive7FDiv : ProcResource<1> { let Super = SiFive7PipeB; } // FP Division/Sqrt
-def SiFive7VA : ProcResource<1> { let Super = SiFive7PipeV; } // Arithmetic sequencer
-def SiFive7VL : ProcResource<1> { let Super = SiFive7PipeV; } // Load sequencer
-def SiFive7VS : ProcResource<1> { let Super = SiFive7PipeV; } // Store sequencer
+def SiFive7IDiv : ProcResource<1>; // Int Division
+def SiFive7FDiv : ProcResource<1>; // FP Division/Sqrt
+def SiFive7VA : ProcResource<1>; // Arithmetic sequencer
+def SiFive7VL : ProcResource<1>; // Load sequencer
+def SiFive7VS : ProcResource<1>; // Store sequencer
+// The VCQ accepts instructions from the the A Pipe and holds them until the
+// vector unit is ready to dequeue them. The unit dequeues up to one instruction
+// per cycle, in order, as soon as the sequencer for that type of instruction is
+// avaliable. This resource is meant to be used for 1 cycle by all vector
+// instructions, to model that only one vector instruction may be dequed at a
+// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and
+// VS sequencer resources below. Each of them will only accept a single
+// instruction at a time and remain busy for the number of cycles associated
+// with that instruction.
+def SiFive7VCQ : ProcResource<1>; // Vector Command Queue
}
def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
@@ -256,11 +264,11 @@ def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
// Integer division
def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
let Latency = 66;
- let ResourceCycles = [1, 65];
+ let ReleaseAtCycles = [1, 65];
}
def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> {
let Latency = 34;
- let ResourceCycles = [1, 33];
+ let ReleaseAtCycles = [1, 33];
}
// Bitmanip
@@ -292,6 +300,16 @@ def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
}
+// Single-bit instructions
+// BEXT[I] instruction is available on all ALUs and the other instructions
+// are only available on the SiFive7B pipe.
+let Latency = 3 in {
+def : WriteRes<WriteSingleBit, [SiFive7PipeB]>;
+def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>;
+def : WriteRes<WriteBEXT, [SiFive7PipeAB]>;
+def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>;
+}
+
// Memory
def : WriteRes<WriteSTB, [SiFive7PipeA]>;
def : WriteRes<WriteSTH, [SiFive7PipeA]>;
@@ -336,7 +354,7 @@ def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
}
-let Latency = 14, ResourceCycles = [1, 13] in {
+let Latency = 14, ReleaseAtCycles = [1, 13] in {
def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
}
@@ -353,9 +371,9 @@ def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
}
def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
- let ResourceCycles = [1, 26]; }
+ let ReleaseAtCycles = [1, 26]; }
def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
- let ResourceCycles = [1, 26]; }
+ let ReleaseAtCycles = [1, 26]; }
// Double precision
let Latency = 7 in {
@@ -369,9 +387,9 @@ def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
}
def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
- let ResourceCycles = [1, 55]; }
+ let ReleaseAtCycles = [1, 55]; }
def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
- let ResourceCycles = [1, 55]; }
+ let ReleaseAtCycles = [1, 55]; }
// Conversions
let Latency = 3 in {
@@ -421,21 +439,21 @@ def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VL], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VL], mx, IsWorstCase>;
- let Latency = 1, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
// Strided loads and stores operate at one element per cycle and should be
@@ -445,81 +463,101 @@ foreach mx = SchedMxList in {
// specific suffixes, but since SEW is already encoded in the name of the
// resource, we do not need to use LMULSEWXXX constructors. However, we do
// use the SEW from the name to determine the number of Cycles.
+
+// This predicate is true when the rs2 operand of vlse or vsse is x0, false
+// otherwise.
+def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
+
foreach mx = SchedMxList in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS8", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VL], mx, IsWorstCase>;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
+ 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
+ [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
-foreach mx = SchedMxList in {
+// TODO: The MxLists need to be filtered by EEW. We only need to support
+// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
+// since LMUL >= 16/64.
+foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS16", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VL], mx, IsWorstCase>;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
+ 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
+ [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
-foreach mx = SchedMxList in {
+foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS32", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VL], mx, IsWorstCase>;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
+ 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
+ [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
-foreach mx = SchedMxList in {
+foreach mx = ["M1", "M2", "M4", "M8"] in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS64", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VL], mx, IsWorstCase>;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
+ 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
+ [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
// VLD*R is LMUL aware
-let Latency = 4, ResourceCycles = [2] in
- def : WriteRes<WriteVLD1R, [SiFive7VL]>;
-let Latency = 4, ResourceCycles = [4] in
- def : WriteRes<WriteVLD2R, [SiFive7VL]>;
-let Latency = 4, ResourceCycles = [8] in
- def : WriteRes<WriteVLD4R, [SiFive7VL]>;
-let Latency = 4, ResourceCycles = [16] in
- def : WriteRes<WriteVLD8R, [SiFive7VL]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
+ def : WriteRes<WriteVLD1R, [SiFive7VCQ, SiFive7VL]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
+ def : WriteRes<WriteVLD2R, [SiFive7VCQ, SiFive7VL]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
+ def : WriteRes<WriteVLD4R, [SiFive7VCQ, SiFive7VL]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
+ def : WriteRes<WriteVLD8R, [SiFive7VCQ, SiFive7VL]>;
// VST*R is LMUL aware
-let Latency = 1, ResourceCycles = [2] in
- def : WriteRes<WriteVST1R, [SiFive7VS]>;
-let Latency = 1, ResourceCycles = [4] in
- def : WriteRes<WriteVST2R, [SiFive7VS]>;
-let Latency = 1, ResourceCycles = [8] in
- def : WriteRes<WriteVST4R, [SiFive7VS]>;
-let Latency = 1, ResourceCycles = [16] in
- def : WriteRes<WriteVST8R, [SiFive7VS]>;
+let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
+ def : WriteRes<WriteVST1R, [SiFive7VCQ, SiFive7VS]>;
+let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
+ def : WriteRes<WriteVST2R, [SiFive7VCQ, SiFive7VS]>;
+let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
+ def : WriteRes<WriteVST4R, [SiFive7VCQ, SiFive7VS]>;
+let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
+ def : WriteRes<WriteVST8R, [SiFive7VCQ, SiFive7VS]>;
// Segmented Loads and Stores
// Unit-stride segmented loads and stores are effectively converted into strided
@@ -532,22 +570,22 @@ foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
// Does not chain so set latency high
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
foreach nf=3-8 in {
defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
// Does not chain so set latency high
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
}
@@ -557,15 +595,15 @@ foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
// Does not chain so set latency high
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
}
@@ -575,41 +613,41 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
// Mask results can't chain.
- let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxList in {
@@ -617,9 +655,9 @@ foreach mx = SchedMxList in {
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
!div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = Cycles, ResourceCycles = [Cycles] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VA], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VA], mx, sew, IsWorstCase>;
+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -628,24 +666,24 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// Narrowing
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
@@ -653,27 +691,27 @@ foreach mx = SchedMxListW in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// Narrowing
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
@@ -681,30 +719,30 @@ foreach mx = SchedMxListW in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
// Mask results can't chain.
- let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxListF in {
@@ -712,10 +750,10 @@ foreach mx = SchedMxListF in {
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
!div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- let Latency = Cycles, ResourceCycles = [Cycles] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VA], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VA], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VA], mx, sew, IsWorstCase>;
+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -724,38 +762,38 @@ foreach mx = SchedMxListF in {
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxListFW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// Narrowing
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxListFW in {
defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
@@ -764,11 +802,12 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = Cycles, ResourceCycles = [Cycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VA],
- mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VA],
- mx, sew, IsWorstCase>;
+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
+ mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
+ mx, sew, IsWorstCase>;
+ }
}
}
@@ -776,8 +815,8 @@ foreach mx = SchedMxListWRed in {
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
- let Latency = Cycles, ResourceCycles = [Cycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VA],
+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
}
}
@@ -786,15 +825,15 @@ foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- let Latency = RedCycles, ResourceCycles = [RedCycles] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VA],
+ let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VA],
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
}
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
- let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VA],
+ let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
}
}
@@ -803,12 +842,12 @@ foreach mx = SchedMxListFWRed in {
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
- let Latency = RedCycles, ResourceCycles = [RedCycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VA],
+ let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
- let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VA],
+ let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
}
}
@@ -817,35 +856,35 @@ foreach mx = SchedMxListFWRed in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// 16. Vector Permutation Instructions
-let Latency = 4, ResourceCycles = [1] in {
- def : WriteRes<WriteVIMovVX, [SiFive7VA]>;
- def : WriteRes<WriteVIMovXV, [SiFive7VA]>;
- def : WriteRes<WriteVFMovVF, [SiFive7VA]>;
- def : WriteRes<WriteVFMovFV, [SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
+ def : WriteRes<WriteVIMovVX, [SiFive7VCQ, SiFive7VA]>;
+ def : WriteRes<WriteVIMovXV, [SiFive7VCQ, SiFive7VA]>;
+ def : WriteRes<WriteVFMovVF, [SiFive7VCQ, SiFive7VA]>;
+ def : WriteRes<WriteVFMovFV, [SiFive7VCQ, SiFive7VA]>;
}
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
@@ -853,9 +892,9 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VA], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VA], mx, sew, IsWorstCase>;
+ let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -863,23 +902,23 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// VMov*V is LMUL Aware
-let Latency = 4, ResourceCycles = [2] in
- def : WriteRes<WriteVMov1V, [SiFive7VA]>;
-let Latency = 4, ResourceCycles = [4] in
- def : WriteRes<WriteVMov2V, [SiFive7VA]>;
-let Latency = 4, ResourceCycles = [8] in
- def : WriteRes<WriteVMov4V, [SiFive7VA]>;
-let Latency = 4, ResourceCycles = [16] in
- def : WriteRes<WriteVMov8V, [SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
+ def : WriteRes<WriteVMov1V, [SiFive7VCQ, SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
+ def : WriteRes<WriteVMov2V, [SiFive7VCQ, SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
+ def : WriteRes<WriteVMov4V, [SiFive7VCQ, SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
+ def : WriteRes<WriteVMov8V, [SiFive7VCQ, SiFive7VA]>;
// Others
def : WriteRes<WriteCSR, [SiFive7PipeB]>;
@@ -922,10 +961,13 @@ def : ReadAdvance<ReadFAdd32, 0>;
def : ReadAdvance<ReadFAdd64, 0>;
def : ReadAdvance<ReadFMul16, 0>;
def : ReadAdvance<ReadFMA16, 0>;
+def : ReadAdvance<ReadFMA16Addend, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
def : ReadAdvance<ReadFDiv16, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
@@ -987,6 +1029,9 @@ def : SiFive7AnyToGPRBypass<ReadORCB>;
def : SiFive7AnyToGPRBypass<ReadREV8>;
def : SiFive7AnyToGPRBypass<ReadSHXADD>;
def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
+// Single-bit instructions
+def : SiFive7AnyToGPRBypass<ReadSingleBit>;
+def : SiFive7AnyToGPRBypass<ReadSingleBitImm>;
// 6. Configuration-Setting Instructions
def : ReadAdvance<ReadVSETVLI, 2>;
@@ -1154,11 +1199,16 @@ def : ReadAdvance<ReadVMov8V, 0>;
// Others
def : ReadAdvance<ReadVMask, 0>;
+def : ReadAdvance<ReadVMergeOp_WorstCase, 0>;
+foreach mx = SchedMxList in {
+ def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>;
+ foreach sew = SchedSEWSet<mx>.val in
+ def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>;
+}
//===----------------------------------------------------------------------===//
// Unsupported extensions
defm : UnsupportedSchedZbc;
-defm : UnsupportedSchedZbs;
defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedZfa;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
index 41eefa0c67d9..06ad2075b073 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
@@ -55,14 +55,14 @@ def : WriteRes<WriteIMul, [SCR1_MUL]>;
def : WriteRes<WriteIMul32, [SCR1_MUL]>;
// Integer division: latency 33, inverse throughput 33
-let Latency = 33, ResourceCycles = [33] in {
+let Latency = 33, ReleaseAtCycles = [33] in {
def : WriteRes<WriteIDiv32, [SCR1_DIV]>;
def : WriteRes<WriteIDiv, [SCR1_DIV]>;
}
// Load/store instructions on SCR1 have latency 2 and inverse throughput 2
// (SCR1_CFG_RV32IMC_MAX includes TCM)
-let Latency = 2, ResourceCycles=[2] in {
+let Latency = 2, ReleaseAtCycles=[2] in {
// Memory
def : WriteRes<WriteSTB, [SCR1_LSU]>;
def : WriteRes<WriteSTH, [SCR1_LSU]>;
@@ -164,7 +164,9 @@ def : ReadAdvance<ReadFAdd64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
def : ReadAdvance<ReadFSqrt32, 0>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
index af318ea5bf68..f6c1b096ad90 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -150,8 +150,11 @@ def ReadFMul16 : SchedRead; // 16-bit floating point multiply
def ReadFMul32 : SchedRead; // 32-bit floating point multiply
def ReadFMul64 : SchedRead; // 64-bit floating point multiply
def ReadFMA16 : SchedRead; // 16-bit floating point fused multiply-add
+def ReadFMA16Addend : SchedRead; // 16-bit floating point fused multiply-add (addend)
def ReadFMA32 : SchedRead; // 32-bit floating point fused multiply-add
+def ReadFMA32Addend : SchedRead; // 32-bit floating point fused multiply-add (addend)
def ReadFMA64 : SchedRead; // 64-bit floating point fused multiply-add
+def ReadFMA64Addend : SchedRead; // 64-bit floating point fused multiply-add (addend)
def ReadFDiv16 : SchedRead; // 16-bit floating point divide
def ReadFDiv32 : SchedRead; // 32-bit floating point divide
def ReadFDiv64 : SchedRead; // 64-bit floating point divide
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 676383c5a636..29f2ceec25fa 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -62,6 +62,52 @@ multiclass LMULSEWWriteResMXSEW<string name, list<ProcResourceKind> resources,
def : WriteRes<!cast<SchedWrite>(name # "_WorstCase"), resources>;
}
+// Define a SchedAlias for the SchedWrite associated with (name, mx) whose
+// behavior is aliased to a Variant. The Variant has Latency predLad and
+// ReleaseAtCycles predCycles if the SchedPredicate Pred is true, otherwise has
+// Latency noPredLat and ReleaseAtCycles noPredCycles. The WorstCase SchedWrite
+// is created similiarly if IsWorstCase is true.
+multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred,
+ list<ProcResourceKind> resources,
+ int predLat, list<int> predAcquireCycles,
+ list<int> predReleaseCycles, int noPredLat,
+ list<int> noPredAcquireCycles,
+ list<int> noPredReleaseCycles,
+ string mx, bit IsWorstCase> {
+ defvar nameMX = name # "_" # mx;
+
+ // Define the different behaviors
+ def nameMX # "_Pred" : SchedWriteRes<resources>{
+ let Latency = predLat;
+ let AcquireAtCycles = predAcquireCycles;
+ let ReleaseAtCycles = predReleaseCycles;
+ }
+ def nameMX # "_NoPred" : SchedWriteRes<resources> {
+ let Latency = noPredLat;
+ let AcquireAtCycles = noPredAcquireCycles;
+ let ReleaseAtCycles = noPredReleaseCycles;
+ }
+
+ // Tie behavior to predicate
+ def NAME # nameMX # "_Variant" : SchedWriteVariant<[
+ SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>,
+ SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX # "_NoPred")]>
+ ]>;
+ def : SchedAlias<
+ !cast<SchedReadWrite>(nameMX),
+ !cast<SchedReadWrite>(NAME # nameMX # "_Variant")>;
+
+ if IsWorstCase then {
+ def NAME # name # "_WorstCase_Variant" : SchedWriteVariant<[
+ SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>,
+ SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX # "_NoPred")]>
+ ]>;
+ def : SchedAlias<
+ !cast<SchedReadWrite>(name # "_WorstCase"),
+ !cast<SchedReadWrite>(NAME # name # "_WorstCase_Variant")>;
+ }
+}
+
// Define multiclasses to define SchedWrite, SchedRead, WriteRes, and
// ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the
// SchedMxList variants above. Each multiclass is responsible for defining
@@ -687,6 +733,12 @@ def ReadVMov8V : SchedRead;
// Others
def ReadVMask : SchedRead;
+def ReadVMergeOp_WorstCase : SchedRead;
+foreach mx = SchedMxList in {
+ def ReadVMergeOp_ # mx : SchedRead;
+ foreach sew = SchedSEWSet<mx>.val in
+ def ReadVMergeOp_ # mx # "_E" # sew : SchedRead;
+}
//===----------------------------------------------------------------------===//
/// Define default scheduler resources for V.
@@ -1050,6 +1102,12 @@ def : ReadAdvance<ReadVMov8V, 0>;
// Others
def : ReadAdvance<ReadVMask, 0>;
+def : ReadAdvance<ReadVMergeOp_WorstCase, 0>;
+foreach mx = SchedMxList in {
+ def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>;
+ foreach sew = SchedSEWSet<mx>.val in
+ def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>;
+}
} // Unsupported
} // UnsupportedSchedV
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index eec2e7359eda..7b64d3cee9c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -29,6 +29,12 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "RISCVGenSubtargetInfo.inc"
+namespace llvm::RISCVTuneInfoTable {
+
+#define GET_RISCVTuneInfoTable_IMPL
+#include "RISCVGenSearchableTables.inc"
+} // namespace llvm::RISCVTuneInfoTable
+
static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness",
cl::init(true), cl::Hidden);
@@ -48,6 +54,13 @@ static cl::opt<unsigned> RISCVMaxBuildIntsCost(
cl::desc("The maximum cost used for building integers."), cl::init(0),
cl::Hidden);
+static cl::opt<bool> UseAA("riscv-use-aa", cl::init(true),
+ cl::desc("Enable the use of AA during codegen."));
+
+static cl::opt<unsigned> RISCVMinimumJumpTableEntries(
+ "riscv-min-jump-table-entries", cl::Hidden,
+ cl::desc("Set minimum number of entries to use a jump table on RISCV"));
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -62,12 +75,13 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU,
if (TuneCPU.empty())
TuneCPU = CPU;
- ParseSubtargetFeatures(CPU, TuneCPU, FS);
- if (Is64Bit) {
- XLenVT = MVT::i64;
- XLen = 64;
- }
+ TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo(TuneCPU);
+ // If there is no TuneInfo for this CPU, we fail back to generic.
+ if (!TuneInfo)
+ TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo("generic");
+ assert(TuneInfo && "TuneInfo shouldn't be nullptr!");
+ ParseSubtargetFeatures(CPU, TuneCPU, FS);
TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName);
RISCVFeatures::validate(TT, getFeatureBits());
return *this;
@@ -175,3 +189,13 @@ void RISCVSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(createRISCVMacroFusionDAGMutation());
}
+
+ /// Enable use of alias analysis during code generation (during MI
+ /// scheduling, DAGCombine, etc.).
+bool RISCVSubtarget::useAA() const { return UseAA; }
+
+unsigned RISCVSubtarget::getMinimumJumpTableEntries() const {
+ return RISCVMinimumJumpTableEntries.getNumOccurrences() > 0
+ ? RISCVMinimumJumpTableEntries
+ : TuneInfo->MinimumJumpTableEntries;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
index a831beb7edd9..23d56cfa6e4e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
+#include <bitset>
#define GET_SUBTARGETINFO_HEADER
#include "RISCVGenSubtargetInfo.inc"
@@ -32,13 +33,35 @@
namespace llvm {
class StringRef;
+namespace RISCVTuneInfoTable {
+
+struct RISCVTuneInfo {
+ const char *Name;
+ uint8_t PrefFunctionAlignment;
+ uint8_t PrefLoopAlignment;
+
+ // Information needed by LoopDataPrefetch.
+ uint16_t CacheLineSize;
+ uint16_t PrefetchDistance;
+ uint16_t MinPrefetchStride;
+ unsigned MaxPrefetchIterationsAhead;
+
+ unsigned MinimumJumpTableEntries;
+};
+
+#define GET_RISCVTuneInfoTable_DECL
+#include "RISCVGenSearchableTables.inc"
+} // namespace RISCVTuneInfoTable
+
class RISCVSubtarget : public RISCVGenSubtargetInfo {
public:
+ // clang-format off
enum RISCVProcFamilyEnum : uint8_t {
Others,
SiFive7,
+ VentanaVeyron,
};
-
+ // clang-format on
private:
virtual void anchor();
@@ -48,16 +71,13 @@ private:
bool ATTRIBUTE = DEFAULT;
#include "RISCVGenSubtargetInfo.inc"
- unsigned XLen = 32;
unsigned ZvlLen = 0;
- MVT XLenVT = MVT::i32;
unsigned RVVVectorBitsMin;
unsigned RVVVectorBitsMax;
uint8_t MaxInterleaveFactor = 2;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
std::bitset<RISCV::NUM_TARGET_REGS> UserReservedRegister;
- Align PrefFunctionAlignment;
- Align PrefLoopAlignment;
+ const RISCVTuneInfoTable::RISCVTuneInfo *TuneInfo;
RISCVFrameLowering FrameLowering;
RISCVInstrInfo InstrInfo;
@@ -98,8 +118,16 @@ public:
}
bool enableMachineScheduler() const override { return true; }
- Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
- Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
+ bool enablePostRAScheduler() const override {
+ return getSchedModel().PostRAScheduler || UsePostRAScheduler;
+ }
+
+ Align getPrefFunctionAlignment() const {
+ return Align(TuneInfo->PrefFunctionAlignment);
+ }
+ Align getPrefLoopAlignment() const {
+ return Align(TuneInfo->PrefLoopAlignment);
+ }
/// Returns RISC-V processor family.
/// Avoid this function! CPU specifics should be kept local to this class
@@ -124,12 +152,15 @@ public:
return hasStdExtZfhOrZfhmin() || hasStdExtZhinxOrZhinxmin();
}
bool hasHalfFPLoadStoreMove() const {
- return HasStdExtZfh || HasStdExtZfhmin || HasStdExtZfbfmin ||
- HasStdExtZvfbfwma;
+ return hasStdExtZfhOrZfhmin() || HasStdExtZfbfmin;
}
bool is64Bit() const { return IsRV64; }
- MVT getXLenVT() const { return XLenVT; }
- unsigned getXLen() const { return XLen; }
+ MVT getXLenVT() const {
+ return is64Bit() ? MVT::i64 : MVT::i32;
+ }
+ unsigned getXLen() const {
+ return is64Bit() ? 64 : 32;
+ }
unsigned getFLen() const {
if (HasStdExtD)
return 64;
@@ -139,7 +170,7 @@ public:
return 0;
}
- unsigned getELEN() const {
+ unsigned getELen() const {
assert(hasVInstructions() && "Expected V extension");
return hasVInstructionsI64() ? 64 : 32;
}
@@ -162,16 +193,21 @@ public:
return UserReservedRegister[i];
}
- bool hasMacroFusion() const { return hasLUIADDIFusion(); }
+ bool hasMacroFusion() const {
+ return hasLUIADDIFusion() || hasAUIPCADDIFusion() ||
+ hasShiftedZExtFusion() || hasLDADDFusion();
+ }
// Vector codegen related methods.
bool hasVInstructions() const { return HasStdExtZve32x; }
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
+ bool hasVInstructionsF16Minimal() const {
+ return HasStdExtZvfhmin || HasStdExtZvfh;
+ }
bool hasVInstructionsF16() const { return HasStdExtZvfh; }
- // FIXME: Consider Zfinx in the future
- bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; }
- // FIXME: Consider Zdinx in the future
- bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; }
+ bool hasVInstructionsBF16() const { return HasStdExtZvfbfmin; }
+ bool hasVInstructionsF32() const { return HasStdExtZve32f; }
+ bool hasVInstructionsF64() const { return HasStdExtZve64d; }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
bool hasVInstructionsFullMultiply() const { return HasStdExtV; }
@@ -222,6 +258,26 @@ public:
void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
&Mutations) const override;
+
+ bool useAA() const override;
+
+ unsigned getCacheLineSize() const override {
+ return TuneInfo->CacheLineSize;
+ };
+ unsigned getPrefetchDistance() const override {
+ return TuneInfo->PrefetchDistance;
+ };
+ unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const override {
+ return TuneInfo->MinPrefetchStride;
+ };
+ unsigned getMaxPrefetchIterationsAhead() const override {
+ return TuneInfo->MaxPrefetchIterationsAhead;
+ };
+
+ unsigned getMinimumJumpTableEntries() const;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 59dac5c7b57d..3abdb6003659 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"
@@ -34,6 +35,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
#include <optional>
using namespace llvm;
@@ -71,21 +73,55 @@ static cl::opt<bool> EnableRISCVCopyPropagation(
cl::desc("Enable the copy propagation with RISC-V copy instr"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableRISCVDeadRegisterElimination(
+ "riscv-enable-dead-defs", cl::Hidden,
+ cl::desc("Enable the pass that removes dead"
+ " definitons and replaces stores to"
+ " them with stores to x0"),
+ cl::init(true));
+
+static cl::opt<bool>
+ EnableSinkFold("riscv-enable-sink-fold",
+ cl::desc("Enable sinking and folding of instruction copies"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ EnableLoopDataPrefetch("riscv-enable-loop-data-prefetch", cl::Hidden,
+ cl::desc("Enable the loop data prefetch pass"),
+ cl::init(true));
+
+static cl::opt<bool>
+ EnableSplitRegAlloc("riscv-split-regalloc", cl::Hidden,
+ cl::desc("Enable Split RegisterAlloc for RVV"),
+ cl::init(true));
+
+static cl::opt<bool> EnableMISchedLoadClustering(
+ "riscv-misched-load-clustering", cl::Hidden,
+ cl::desc("Enable load clustering in the machine scheduler"),
+ cl::init(false));
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
+ initializeRISCVO0PreLegalizerCombinerPass(*PR);
+ initializeRISCVPreLegalizerCombinerPass(*PR);
+ initializeRISCVPostLegalizerCombinerPass(*PR);
initializeKCFIPass(*PR);
+ initializeRISCVDeadRegisterDefinitionsPass(*PR);
initializeRISCVMakeCompressibleOptPass(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
initializeRISCVCodeGenPreparePass(*PR);
+ initializeRISCVPostRAExpandPseudoPass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVOptWInstrsPass(*PR);
initializeRISCVPreRAExpandPseudoPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
+ initializeRISCVFoldMasksPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
initializeRISCVInsertReadWriteCSRPass(*PR);
+ initializeRISCVInsertWriteVXRMPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
initializeRISCVInitUndefPass(*PR);
initializeRISCVMoveMergePass(*PR);
@@ -109,7 +145,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
@@ -177,13 +213,8 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
llvm::bit_floor((RVVBitsMax < 64 || RVVBitsMax > 65536) ? 0 : RVVBitsMax);
SmallString<512> Key;
- Key += "RVVMin";
- Key += std::to_string(RVVBitsMin);
- Key += "RVVMax";
- Key += std::to_string(RVVBitsMax);
- Key += CPU;
- Key += TuneCPU;
- Key += FS;
+ raw_svector_ostream(Key) << "RVVMin" << RVVBitsMin << "RVVMax" << RVVBitsMax
+ << CPU << TuneCPU << FS;
auto &I = SubtargetMap[Key];
if (!I) {
// This needs to be done before we create a new subtarget since any
@@ -228,10 +259,84 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
}
namespace {
+
+class RVVRegisterRegAlloc : public RegisterRegAllocBase<RVVRegisterRegAlloc> {
+public:
+ RVVRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+ : RegisterRegAllocBase(N, D, C) {}
+};
+
+static bool onlyAllocateRVVReg(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass &RC) {
+ return RISCV::VRRegClass.hasSubClassEq(&RC) ||
+ RISCV::VRM2RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRM4RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRM8RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN2M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN2M2RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN2M4RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN3M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN3M2RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN4M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN4M2RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN5M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN6M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN7M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN8M1RegClass.hasSubClassEq(&RC);
+}
+
+static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+
+static llvm::once_flag InitializeDefaultRVVRegisterAllocatorFlag;
+
+/// -riscv-rvv-regalloc=<fast|basic|greedy> command line option.
+/// This option could designate the rvv register allocator only.
+/// For example: -riscv-rvv-regalloc=basic
+static cl::opt<RVVRegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RVVRegisterRegAlloc>>
+ RVVRegAlloc("riscv-rvv-regalloc", cl::Hidden,
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use for RVV register."));
+
+static void initializeDefaultRVVRegisterAllocatorOnce() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RVVRegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = RVVRegAlloc;
+ RVVRegisterRegAlloc::setDefault(RVVRegAlloc);
+ }
+}
+
+static FunctionPass *createBasicRVVRegisterAllocator() {
+ return createBasicRegisterAllocator(onlyAllocateRVVReg);
+}
+
+static FunctionPass *createGreedyRVVRegisterAllocator() {
+ return createGreedyRegisterAllocator(onlyAllocateRVVReg);
+}
+
+static FunctionPass *createFastRVVRegisterAllocator() {
+ return createFastRegisterAllocator(onlyAllocateRVVReg, false);
+}
+
+static RVVRegisterRegAlloc basicRegAllocRVVReg("basic",
+ "basic register allocator",
+ createBasicRVVRegisterAllocator);
+static RVVRegisterRegAlloc
+ greedyRegAllocRVVReg("greedy", "greedy register allocator",
+ createGreedyRVVRegisterAllocator);
+
+static RVVRegisterRegAlloc fastRegAllocRVVReg("fast", "fast register allocator",
+ createFastRVVRegisterAllocator);
+
class RISCVPassConfig : public TargetPassConfig {
public:
RISCVPassConfig(RISCVTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ if (TM.getOptLevel() != CodeGenOptLevel::None)
+ substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
+ setEnableSinkAndFold(EnableSinkFold);
+ }
RISCVTargetMachine &getRISCVTargetMachine() const {
return getTM<RISCVTargetMachine>();
@@ -240,12 +345,16 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+ ScheduleDAGMILive *DAG = nullptr;
+ if (EnableMISchedLoadClustering) {
+ DAG = createGenericSchedLive(C);
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ }
if (ST.hasMacroFusion()) {
- ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ DAG = DAG ? DAG : createGenericSchedLive(C);
DAG->addMutation(createRISCVMacroFusionDAGMutation());
- return DAG;
}
- return nullptr;
+ return DAG;
}
ScheduleDAGInstrs *
@@ -263,16 +372,22 @@ public:
bool addPreISel() override;
bool addInstSelector() override;
bool addIRTranslator() override;
+ void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
+ void addPreRegBankSelect() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
void addPreEmitPass() override;
void addPreEmitPass2() override;
void addPreSched2() override;
void addMachineSSAOptimization() override;
+ FunctionPass *createRVVRegAllocPass(bool Optimized);
+ bool addRegAssignAndRewriteFast() override;
+ bool addRegAssignAndRewriteOptimized() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addOptimizedRegAlloc() override;
+ void addFastRegAlloc() override;
};
} // namespace
@@ -280,10 +395,42 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
return new RISCVPassConfig(*this, PM);
}
+FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
+ // Initialize the global default.
+ llvm::call_once(InitializeDefaultRVVRegisterAllocatorFlag,
+ initializeDefaultRVVRegisterAllocatorOnce);
+
+ RegisterRegAlloc::FunctionPassCtor Ctor = RVVRegisterRegAlloc::getDefault();
+ if (Ctor != useDefaultRegisterAllocator)
+ return Ctor();
+
+ if (Optimized)
+ return createGreedyRVVRegisterAllocator();
+
+ return createFastRVVRegisterAllocator();
+}
+
+bool RISCVPassConfig::addRegAssignAndRewriteFast() {
+ if (EnableSplitRegAlloc)
+ addPass(createRVVRegAllocPass(false));
+ return TargetPassConfig::addRegAssignAndRewriteFast();
+}
+
+bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
+ if (EnableSplitRegAlloc) {
+ addPass(createRVVRegAllocPass(true));
+ addPass(createVirtRegRewriter(false));
+ }
+ return TargetPassConfig::addRegAssignAndRewriteOptimized();
+}
+
void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
+ if (EnableLoopDataPrefetch)
+ addPass(createLoopDataPrefetchPass());
+
addPass(createRISCVGatherScatterLoweringPass());
addPass(createInterleavedAccessPass());
addPass(createRISCVCodeGenPreparePass());
@@ -293,7 +440,7 @@ void RISCVPassConfig::addIRPasses() {
}
bool RISCVPassConfig::addPreISel() {
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
// Add a barrier before instruction selection so that we will not get
// deleted block address after enabling default outlining. See D99707 for
// more details.
@@ -320,11 +467,24 @@ bool RISCVPassConfig::addIRTranslator() {
return false;
}
+void RISCVPassConfig::addPreLegalizeMachineIR() {
+ if (getOptLevel() == CodeGenOptLevel::None) {
+ addPass(createRISCVO0PreLegalizerCombiner());
+ } else {
+ addPass(createRISCVPreLegalizerCombiner());
+ }
+}
+
bool RISCVPassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
}
+void RISCVPassConfig::addPreRegBankSelect() {
+ if (getOptLevel() != CodeGenOptLevel::None)
+ addPass(createRISCVPostLegalizerCombiner());
+}
+
bool RISCVPassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
@@ -336,6 +496,8 @@ bool RISCVPassConfig::addGlobalInstructionSelect() {
}
void RISCVPassConfig::addPreSched2() {
+ addPass(createRISCVPostRAExpandPseudoPass());
+
// Emit KCFI checks for indirect calls.
addPass(createKCFIPass());
}
@@ -349,12 +511,13 @@ void RISCVPassConfig::addPreEmitPass() {
// propagation after the machine outliner (which runs after addPreEmitPass)
// currently leads to incorrect code-gen, where copies to registers within
// outlined functions are removed erroneously.
- if (TM->getOptLevel() >= CodeGenOpt::Default && EnableRISCVCopyPropagation)
+ if (TM->getOptLevel() >= CodeGenOptLevel::Default &&
+ EnableRISCVCopyPropagation)
addPass(createMachineCopyPropagationPass(true));
}
void RISCVPassConfig::addPreEmitPass2() {
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createRISCVMoveMergePass());
// Schedule PushPop Optimization before expansion of Pseudo instruction,
// ensuring return instruction is detected correctly.
@@ -374,32 +537,45 @@ void RISCVPassConfig::addPreEmitPass2() {
}
void RISCVPassConfig::addMachineSSAOptimization() {
+ addPass(createRISCVFoldMasksPass());
+
TargetPassConfig::addMachineSSAOptimization();
+
if (EnableMachineCombiner)
addPass(&MachineCombinerID);
- if (TM->getTargetTriple().getArch() == Triple::riscv64) {
+ if (TM->getTargetTriple().isRISCV64()) {
addPass(createRISCVOptWInstrsPass());
}
}
void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVPreRAExpandPseudoPass());
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createRISCVInsertVSETVLIPass());
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableRISCVDeadRegisterElimination)
+ addPass(createRISCVDeadRegisterDefinitionsPass());
addPass(createRISCVInsertReadWriteCSRPass());
+ addPass(createRISCVInsertWriteVXRMPass());
}
void RISCVPassConfig::addOptimizedRegAlloc() {
- if (getOptimizeRegAlloc())
- insertPass(&DetectDeadLanesID, &RISCVInitUndefID);
+ insertPass(&DetectDeadLanesID, &RISCVInitUndefID);
TargetPassConfig::addOptimizedRegAlloc();
}
+void RISCVPassConfig::addFastRegAlloc() {
+ addPass(createRISCVInitUndefPass());
+ TargetPassConfig::addFastRegAlloc();
+}
+
+
void RISCVPassConfig::addPostRegAlloc() {
- if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableRedundantCopyElimination)
addPass(createRISCVRedundantCopyEliminationPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
index 775422075314..68dfb3c81f2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -29,7 +29,7 @@ public:
RISCVTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
const RISCVSubtarget *getSubtargetImpl(const Function &F) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 62883e962b4c..4614446b2150 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -34,28 +34,6 @@ static cl::opt<unsigned> SLPMaxVF(
"exclusively by SLP vectorizer."),
cl::Hidden);
-InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) {
- // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
- // implementation-defined.
- if (!VT.isVector())
- return InstructionCost::getInvalid();
- unsigned DLenFactor = ST->getDLenFactor();
- unsigned Cost;
- if (VT.isScalableVector()) {
- unsigned LMul;
- bool Fractional;
- std::tie(LMul, Fractional) =
- RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
- if (Fractional)
- Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
- else
- Cost = (LMul * DLenFactor);
- } else {
- Cost = divideCeil(VT.getSizeInBits(), ST->getRealMinVLen() / DLenFactor);
- }
- return Cost;
-}
-
InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy() &&
@@ -67,8 +45,7 @@ InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
// Otherwise, we check how many instructions it will take to materialise.
const DataLayout &DL = getDataLayout();
- return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty),
- getST()->getFeatureBits());
+ return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), *getST());
}
// Look for patterns of shift followed by AND that can be turned into a pair of
@@ -149,6 +126,9 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
// Power of 2 is a shift. Negated power of 2 is a shift and a negate.
if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
return TTI::TCC_Free;
+ // One more or less than a power of 2 can use SLLI+ADD/SUB.
+ if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
+ return TTI::TCC_Free;
// FIXME: There is no MULI instruction.
Takes12BitImm = true;
break;
@@ -192,7 +172,9 @@ RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
TargetTransformInfo::PopcntSupportKind
RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
- return ST->hasStdExtZbb() ? TTI::PSK_FastHardware : TTI::PSK_Software;
+ return ST->hasStdExtZbb() || ST->hasVendorXCVbitmanip()
+ ? TTI::PSK_FastHardware
+ : TTI::PSK_Software;
}
bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
@@ -263,19 +245,12 @@ static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
return cast<VectorType>(EVT(IndexVT).getTypeForEVT(C));
}
-/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
-/// is generally quadratic in the number of vreg implied by LMUL. Note that
-/// operand (index and possibly mask) are handled separately.
-InstructionCost RISCVTTIImpl::getVRGatherVVCost(MVT VT) {
- return getLMULCost(VT) * getLMULCost(VT);
-}
-
InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
- Kind = improveShuffleKindFromMask(Kind, Mask);
+ Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
@@ -292,52 +267,86 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// If the size of the element is < ELEN then shuffles of interleaves and
// deinterleaves of 2 vectors can be lowered into the following
// sequences
- if (EltTp.getScalarSizeInBits() < ST->getELEN()) {
+ if (EltTp.getScalarSizeInBits() < ST->getELen()) {
// Example sequence:
// vsetivli zero, 4, e8, mf4, ta, ma (ignored)
// vwaddu.vv v10, v8, v9
// li a0, -1 (ignored)
// vwmaccu.vx v10, a0, v9
if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size()))
- return 2 * LT.first * getLMULCost(LT.second);
+ return 2 * LT.first * TLI->getLMULCost(LT.second);
if (Mask[0] == 0 || Mask[0] == 1) {
auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());
// Example sequence:
// vnsrl.wi v10, v8, 0
if (equal(DeinterleaveMask, Mask))
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getLMULCost(LT.second);
}
}
-
- // vrgather + cost of generating the mask constant.
- // We model this for an unknown mask with a single vrgather.
- if (LT.first == 1 &&
- (LT.second.getScalarSizeInBits() != 8 ||
- LT.second.getVectorNumElements() <= 256)) {
- VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
- InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
- return IndexCost + getVRGatherVVCost(LT.second);
- }
}
- break;
+ // vrgather + cost of generating the mask constant.
+ // We model this for an unknown mask with a single vrgather.
+ if (LT.second.isFixedLengthVector() && LT.first == 1 &&
+ (LT.second.getScalarSizeInBits() != 8 ||
+ LT.second.getVectorNumElements() <= 256)) {
+ VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
+ InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
+ return IndexCost + TLI->getVRGatherVVCost(LT.second);
+ }
+ [[fallthrough]];
}
case TTI::SK_Transpose:
case TTI::SK_PermuteTwoSrc: {
- if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
- // 2 x (vrgather + cost of generating the mask constant) + cost of mask
- // register for the second vrgather. We model this for an unknown
- // (shuffle) mask.
- if (LT.first == 1 &&
- (LT.second.getScalarSizeInBits() != 8 ||
- LT.second.getVectorNumElements() <= 256)) {
- auto &C = Tp->getContext();
- auto EC = Tp->getElementCount();
- VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
- VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
- InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
- InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
- return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost;
+ // 2 x (vrgather + cost of generating the mask constant) + cost of mask
+ // register for the second vrgather. We model this for an unknown
+ // (shuffle) mask.
+ if (LT.second.isFixedLengthVector() && LT.first == 1 &&
+ (LT.second.getScalarSizeInBits() != 8 ||
+ LT.second.getVectorNumElements() <= 256)) {
+ auto &C = Tp->getContext();
+ auto EC = Tp->getElementCount();
+ VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
+ VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
+ InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
+ InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
+ return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost;
+ }
+ [[fallthrough]];
+ }
+ case TTI::SK_Select: {
+ // We are going to permute multiple sources and the result will be in
+ // multiple destinations. Providing an accurate cost only for splits where
+ // the element type remains the same.
+ if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
+ LT.second.isFixedLengthVector() &&
+ LT.second.getVectorElementType().getSizeInBits() ==
+ Tp->getElementType()->getPrimitiveSizeInBits() &&
+ LT.second.getVectorNumElements() <
+ cast<FixedVectorType>(Tp)->getNumElements() &&
+ divideCeil(Mask.size(),
+ cast<FixedVectorType>(Tp)->getNumElements()) ==
+ static_cast<unsigned>(*LT.first.getValue())) {
+ unsigned NumRegs = *LT.first.getValue();
+ unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
+ unsigned SubVF = PowerOf2Ceil(VF / NumRegs);
+ auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF);
+
+ InstructionCost Cost = 0;
+ for (unsigned I = 0; I < NumRegs; ++I) {
+ bool IsSingleVector = true;
+ SmallVector<int> SubMask(SubVF, PoisonMaskElem);
+ transform(Mask.slice(I * SubVF,
+ I == NumRegs - 1 ? Mask.size() % SubVF : SubVF),
+ SubMask.begin(), [&](int I) {
+ bool SingleSubVector = I / VF == 0;
+ IsSingleVector &= SingleSubVector;
+ return (SingleSubVector ? 0 : 1) * SubVF + I % VF;
+ });
+ Cost += getShuffleCost(IsSingleVector ? TTI::SK_PermuteSingleSrc
+ : TTI::SK_PermuteTwoSrc,
+ SubVecTy, SubMask, CostKind, 0, nullptr);
+ return Cost;
}
}
break;
@@ -356,19 +365,19 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Example sequence:
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
// vslidedown.vi v8, v9, 2
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getVSlideCost(LT.second);
case TTI::SK_InsertSubvector:
// Example sequence:
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
// vslideup.vi v8, v9, 2
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getVSlideCost(LT.second);
case TTI::SK_Select: {
// Example sequence:
// li a0, 90
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
// vmv.s.x v0, a0
// vmerge.vvm v8, v9, v8, v0
- return LT.first * 3 * getLMULCost(LT.second);
+ return LT.first * 3 * TLI->getLMULCost(LT.second);
}
case TTI::SK_Broadcast: {
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
@@ -380,7 +389,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
// vmv.v.x v8, a0
// vmsne.vi v0, v8, 0
- return LT.first * getLMULCost(LT.second) * 3;
+ return LT.first * TLI->getLMULCost(LT.second) * 3;
}
// Example sequence:
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -391,26 +400,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vmv.v.x v8, a0
// vmsne.vi v0, v8, 0
- return LT.first * getLMULCost(LT.second) * 6;
+ return LT.first * TLI->getLMULCost(LT.second) * 6;
}
if (HasScalar) {
// Example sequence:
// vmv.v.x v8, a0
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getLMULCost(LT.second);
}
// Example sequence:
// vrgather.vi v9, v8, 0
- // TODO: vrgather could be slower than vmv.v.x. It is
- // implementation-dependent.
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getVRGatherVICost(LT.second);
}
case TTI::SK_Splice:
// vslidedown+vslideup.
// TODO: Multiplying by LT.first implies this legalizes into multiple copies
// of similar code, but I think we expand through memory.
- return 2 * LT.first * getLMULCost(LT.second);
+ return 2 * LT.first * TLI->getVSlideCost(LT.second);
case TTI::SK_Reverse: {
// TODO: Cases to improve here:
// * Illegal vector types
@@ -430,7 +437,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (LT.second.isFixedLengthVector())
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
- InstructionCost GatherCost = 2 + getVRGatherVVCost(LT.second);
+ InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second);
// Mask operation additionally required extend and truncate
InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
return LT.first * (LenCost + GatherCost + ExtendCost);
@@ -495,7 +502,7 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
InstructionCost Cost = MemCost;
for (unsigned Index : Indices) {
FixedVectorType *SubVecTy =
- FixedVectorType::get(FVTy->getElementType(), VF);
+ FixedVectorType::get(FVTy->getElementType(), VF * Factor);
auto Mask = createStrideMask(Index, Factor, VF);
InstructionCost ShuffleCost =
getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, SubVecTy, Mask,
@@ -662,6 +669,31 @@ static const CostTblEntry VectorIntrinsicCostTable[]{
{Intrinsic::rint, MVT::nxv2f64, 7},
{Intrinsic::rint, MVT::nxv4f64, 7},
{Intrinsic::rint, MVT::nxv8f64, 7},
+ {Intrinsic::lrint, MVT::v2i32, 1},
+ {Intrinsic::lrint, MVT::v4i32, 1},
+ {Intrinsic::lrint, MVT::v8i32, 1},
+ {Intrinsic::lrint, MVT::v16i32, 1},
+ {Intrinsic::lrint, MVT::nxv1i32, 1},
+ {Intrinsic::lrint, MVT::nxv2i32, 1},
+ {Intrinsic::lrint, MVT::nxv4i32, 1},
+ {Intrinsic::lrint, MVT::nxv8i32, 1},
+ {Intrinsic::lrint, MVT::nxv16i32, 1},
+ {Intrinsic::lrint, MVT::v2i64, 1},
+ {Intrinsic::lrint, MVT::v4i64, 1},
+ {Intrinsic::lrint, MVT::v8i64, 1},
+ {Intrinsic::lrint, MVT::v16i64, 1},
+ {Intrinsic::lrint, MVT::nxv1i64, 1},
+ {Intrinsic::lrint, MVT::nxv2i64, 1},
+ {Intrinsic::lrint, MVT::nxv4i64, 1},
+ {Intrinsic::lrint, MVT::nxv8i64, 1},
+ {Intrinsic::llrint, MVT::v2i64, 1},
+ {Intrinsic::llrint, MVT::v4i64, 1},
+ {Intrinsic::llrint, MVT::v8i64, 1},
+ {Intrinsic::llrint, MVT::v16i64, 1},
+ {Intrinsic::llrint, MVT::nxv1i64, 1},
+ {Intrinsic::llrint, MVT::nxv2i64, 1},
+ {Intrinsic::llrint, MVT::nxv4i64, 1},
+ {Intrinsic::llrint, MVT::nxv8i64, 1},
{Intrinsic::nearbyint, MVT::v2f32, 9},
{Intrinsic::nearbyint, MVT::v4f32, 9},
{Intrinsic::nearbyint, MVT::v8f32, 9},
@@ -1045,6 +1077,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::floor:
case Intrinsic::trunc:
case Intrinsic::rint:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint:
case Intrinsic::round:
case Intrinsic::roundeven: {
// These all use the same code.
@@ -1074,6 +1108,12 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return LT.first;
break;
}
+ case Intrinsic::ctpop: {
+ auto LT = getTypeLegalizationCost(RetTy);
+ if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector())
+ return LT.first;
+ break;
+ }
case Intrinsic::abs: {
auto LT = getTypeLegalizationCost(RetTy);
if (ST->hasVInstructions() && LT.second.isVector()) {
@@ -1142,8 +1182,8 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
// Skip if element size of Dst or Src is bigger than ELEN.
- if (Src->getScalarSizeInBits() > ST->getELEN() ||
- Dst->getScalarSizeInBits() > ST->getELEN())
+ if (Src->getScalarSizeInBits() > ST->getELen() ||
+ Dst->getScalarSizeInBits() > ST->getELen())
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -1226,7 +1266,7 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
// Skip if scalar size of Ty is bigger than ELEN.
- if (Ty->getScalarSizeInBits() > ST->getELEN())
+ if (Ty->getScalarSizeInBits() > ST->getELen())
return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
@@ -1253,7 +1293,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
// Skip if scalar size of Ty is bigger than ELEN.
- if (Ty->getScalarSizeInBits() > ST->getELEN())
+ if (Ty->getScalarSizeInBits() > ST->getELen())
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -1288,7 +1328,7 @@ InstructionCost RISCVTTIImpl::getExtendedReductionCost(
FMF, CostKind);
// Skip if scalar size of ResTy is bigger than ELEN.
- if (ResTy->getScalarSizeInBits() > ST->getELEN())
+ if (ResTy->getScalarSizeInBits() > ST->getELen())
return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
FMF, CostKind);
@@ -1349,7 +1389,7 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// handles the LT.first term for us.
if (std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
LT.second.isVector())
- BaseCost *= getLMULCost(LT.second);
+ BaseCost *= TLI->getLMULCost(LT.second);
return Cost + BaseCost;
}
@@ -1368,7 +1408,7 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
I);
// Skip if scalar size of ValTy is bigger than ELEN.
- if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELEN())
+ if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
@@ -1437,6 +1477,15 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
+InstructionCost RISCVTTIImpl::getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Opcode == Instruction::PHI ? 0 : 1;
+ // Branches are assumed to be predicted.
+ return 0;
+}
+
InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
@@ -1451,8 +1500,26 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
// This type is legalized to a scalar type.
- if (!LT.second.isVector())
- return 0;
+ if (!LT.second.isVector()) {
+ auto *FixedVecTy = cast<FixedVectorType>(Val);
+ // If Index is a known constant, cost is zero.
+ if (Index != -1U)
+ return 0;
+ // Extract/InsertElement with non-constant index is very costly when
+ // scalarized; estimate cost of loads/stores sequence via the stack:
+ // ExtractElement cost: store vector to stack, load scalar;
+ // InsertElement cost: store vector to stack, store scalar, load vector.
+ Type *ElemTy = FixedVecTy->getElementType();
+ auto NumElems = FixedVecTy->getNumElements();
+ auto Align = DL.getPrefTypeAlign(ElemTy);
+ InstructionCost LoadCost =
+ getMemoryOpCost(Instruction::Load, ElemTy, Align, 0, CostKind);
+ InstructionCost StoreCost =
+ getMemoryOpCost(Instruction::Store, ElemTy, Align, 0, CostKind);
+ return Opcode == Instruction::ExtractElement
+ ? StoreCost * NumElems + LoadCost
+ : (StoreCost + LoadCost) * NumElems + StoreCost;
+ }
// For unsupported scalable vector.
if (LT.second.isScalableVector() && !LT.first.isValid())
@@ -1461,6 +1528,31 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (!isTypeLegal(Val))
return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
+ // Mask vector extract/insert is expanded via e8.
+ if (Val->getScalarSizeInBits() == 1) {
+ VectorType *WideTy =
+ VectorType::get(IntegerType::get(Val->getContext(), 8),
+ cast<VectorType>(Val)->getElementCount());
+ if (Opcode == Instruction::ExtractElement) {
+ InstructionCost ExtendCost
+ = getCastInstrCost(Instruction::ZExt, WideTy, Val,
+ TTI::CastContextHint::None, CostKind);
+ InstructionCost ExtractCost
+ = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr);
+ return ExtendCost + ExtractCost;
+ }
+ InstructionCost ExtendCost
+ = getCastInstrCost(Instruction::ZExt, WideTy, Val,
+ TTI::CastContextHint::None, CostKind);
+ InstructionCost InsertCost
+ = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr);
+ InstructionCost TruncCost
+ = getCastInstrCost(Instruction::Trunc, Val, WideTy,
+ TTI::CastContextHint::None, CostKind);
+ return ExtendCost + InsertCost + TruncCost;
+ }
+
+
// In RVV, we could use vslidedown + vmv.x.s to extract element from vector
// and vslideup + vmv.s.x to insert element to vector.
unsigned BaseCost = 1;
@@ -1482,30 +1574,6 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
SlideCost = 1; // With a constant index, we do not need to use addi.
}
- // Mask vector extract/insert element is different from normal case.
- if (Val->getScalarSizeInBits() == 1) {
- // For extractelement, we need the following instructions:
- // vmv.v.i v8, 0
- // vmerge.vim v8, v8, 1, v0
- // vsetivli zero, 1, e8, m2, ta, mu (not count)
- // vslidedown.vx v8, v8, a0
- // vmv.x.s a0, v8
-
- // For insertelement, we need the following instructions:
- // vsetvli a2, zero, e8, m1, ta, mu (not count)
- // vmv.s.x v8, a0
- // vmv.v.i v9, 0
- // vmerge.vim v9, v9, 1, v0
- // addi a0, a1, 1
- // vsetvli zero, a0, e8, m1, tu, mu (not count)
- // vslideup.vx v9, v8, a1
- // vsetvli a0, zero, e8, m1, ta, mu (not count)
- // vand.vi v8, v9, 1
- // vmsne.vi v0, v8, 0
-
- // TODO: should we count these special vsetvlis?
- BaseCost = Opcode == Instruction::InsertElement ? 5 : 3;
- }
// Extract i64 in the target that has XLEN=32 need more instruction.
if (Val->getScalarType()->isIntegerTy() &&
ST->getXLen() < Val->getScalarSizeInBits()) {
@@ -1547,7 +1615,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
Args, CxtI);
// Skip if scalar size of Ty is bigger than ELEN.
- if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELEN())
+ if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELen())
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Args, CxtI);
@@ -1596,7 +1664,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
case ISD::FSUB:
case ISD::FMUL:
case ISD::FNEG: {
- return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1;
+ return ConstantMatCost + TLI->getLMULCost(LT.second) * LT.first * 1;
}
default:
return ConstantMatCost +
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 7ffcb4828d0c..efc8350064a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -48,9 +48,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// actual target hardware.
unsigned getEstimatedVLFor(VectorType *Ty);
- /// Return the cost of LMUL. The larger the LMUL, the higher the cost.
- InstructionCost getLMULCost(MVT VT);
-
/// Return the cost of accessing a constant pool entry of the specified
/// type.
InstructionCost getConstantPoolLoadCost(Type *Ty,
@@ -123,8 +120,6 @@ public:
return ST->useRVVForFixedLengthVectors() ? 16 : 0;
}
- InstructionCost getVRGatherVVCost(MVT VT);
-
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
@@ -174,6 +169,9 @@ public:
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
+ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
using BaseT::getVectorInstrCost;
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
@@ -201,7 +199,7 @@ public:
return false;
EVT ElemType = DataTypeVT.getScalarType();
- if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
+ if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
return false;
return TLI->isLegalElementTypeForRVV(ElemType);
@@ -226,7 +224,7 @@ public:
return false;
EVT ElemType = DataTypeVT.getScalarType();
- if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
+ if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
return false;
return TLI->isLegalElementTypeForRVV(ElemType);
@@ -288,9 +286,9 @@ public:
case RecurKind::UMax:
case RecurKind::FMin:
case RecurKind::FMax:
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
case RecurKind::FMulAdd:
+ case RecurKind::IAnyOf:
+ case RecurKind::FAnyOf:
return true;
default:
return false;
@@ -359,6 +357,10 @@ public:
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2);
+
+ bool shouldFoldTerminatingConditionAfterLSR() const {
+ return true;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
index 4156a0026411..1b80e4b9277b 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
@@ -18,7 +18,7 @@ namespace {
class SPIRVAsmBackend : public MCAsmBackend {
public:
- SPIRVAsmBackend(support::endianness Endian) : MCAsmBackend(Endian) {}
+ SPIRVAsmBackend(llvm::endianness Endian) : MCAsmBackend(Endian) {}
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
@@ -59,5 +59,5 @@ MCAsmBackend *llvm::createSPIRVAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &) {
- return new SPIRVAsmBackend(support::little);
+ return new SPIRVAsmBackend(llvm::endianness::little);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp
index 0b7b0160dee7..b69031adb167 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp
@@ -129,6 +129,24 @@ getSymbolicOperandCapabilities(SPIRV::OperandCategory::OperandCategory Category,
return Capabilities;
}
+CapabilityList
+getCapabilitiesEnabledByExtension(SPIRV::Extension::Extension Extension) {
+ const SPIRV::ExtensionEntry *Entry =
+ SPIRV::lookupSymbolicOperandsEnabledByExtension(
+ Extension, SPIRV::OperandCategory::CapabilityOperand);
+
+ CapabilityList Capabilities;
+ while (Entry &&
+ Entry->Category == SPIRV::OperandCategory::CapabilityOperand &&
+ Entry->ReqExtension == Extension) {
+ Capabilities.push_back(
+ static_cast<SPIRV::Capability::Capability>(Entry->Value));
+ ++Entry;
+ }
+
+ return Capabilities;
+}
+
ExtensionList
getSymbolicOperandExtensions(SPIRV::OperandCategory::OperandCategory Category,
uint32_t Value) {
@@ -159,7 +177,7 @@ std::string getLinkStringForBuiltIn(SPIRV::BuiltIn::BuiltIn BuiltInValue) {
bool getSpirvBuiltInIdByName(llvm::StringRef Name,
SPIRV::BuiltIn::BuiltIn &BI) {
const std::string Prefix = "__spirv_BuiltIn";
- if (!Name.startswith(Prefix))
+ if (!Name.starts_with(Prefix))
return false;
const SPIRV::SymbolicOperand *Lookup =
@@ -199,8 +217,7 @@ getExtInstSetFromString(std::string SetName) {
std::string getExtInstName(SPIRV::InstructionSet::InstructionSet Set,
uint32_t InstructionNumber) {
const SPIRV::ExtendedBuiltin *Lookup =
- SPIRV::lookupExtendedBuiltinBySetAndNumber(
- SPIRV::InstructionSet::OpenCL_std, InstructionNumber);
+ SPIRV::lookupExtendedBuiltinBySetAndNumber(Set, InstructionNumber);
if (!Lookup)
return "UNKNOWN_EXT_INST";
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
index d6075f72e55c..616d2ea71b39 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
@@ -223,6 +223,8 @@ getSymbolicOperandMaxVersion(SPIRV::OperandCategory::OperandCategory Category,
CapabilityList
getSymbolicOperandCapabilities(SPIRV::OperandCategory::OperandCategory Category,
uint32_t Value);
+CapabilityList
+getCapabilitiesEnabledByExtension(SPIRV::Extension::Extension Extension);
ExtensionList
getSymbolicOperandExtensions(SPIRV::OperandCategory::OperandCategory Category,
uint32_t Value);
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
index a1e90cd104a9..163b2ec0fefe 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
@@ -13,6 +13,7 @@
#include "SPIRVInstPrinter.h"
#include "SPIRV.h"
#include "SPIRVBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -49,14 +50,49 @@ void SPIRVInstPrinter::printRemainingVariableOps(const MCInst *MI,
void SPIRVInstPrinter::printOpConstantVarOps(const MCInst *MI,
unsigned StartIndex,
raw_ostream &O) {
+ const unsigned NumVarOps = MI->getNumOperands() - StartIndex;
+
+ assert((NumVarOps == 1 || NumVarOps == 2) &&
+ "Unsupported number of bits for literal variable");
+
O << ' ';
- if (MI->getNumOperands() - StartIndex == 2) { // Handle 64 bit literals.
- uint64_t Imm = MI->getOperand(StartIndex).getImm();
+
+ uint64_t Imm = MI->getOperand(StartIndex).getImm();
+
+ // Handle 64 bit literals.
+ if (NumVarOps == 2) {
Imm |= (MI->getOperand(StartIndex + 1).getImm() << 32);
- O << Imm;
- } else {
- printRemainingVariableOps(MI, StartIndex, O, true, false);
}
+
+ // Format and print float values.
+ if (MI->getOpcode() == SPIRV::OpConstantF) {
+ APFloat FP = NumVarOps == 1 ? APFloat(APInt(32, Imm).bitsToFloat())
+ : APFloat(APInt(64, Imm).bitsToDouble());
+
+ // Print infinity and NaN as hex floats.
+ // TODO: Make sure subnormal numbers are handled correctly as they may also
+ // require hex float notation.
+ if (FP.isInfinity()) {
+ if (FP.isNegative())
+ O << '-';
+ O << "0x1p+128";
+ return;
+ }
+ if (FP.isNaN()) {
+ O << "0x1.8p+128";
+ return;
+ }
+
+ // Format val as a decimal floating point or scientific notation (whichever
+ // is shorter), with enough digits of precision to produce the exact value.
+ O << format("%.*g", std::numeric_limits<double>::max_digits10,
+ FP.convertToDouble());
+
+ return;
+ }
+
+ // Print integer values directly.
+ O << Imm;
}
void SPIRVInstPrinter::recordOpExtInstImport(const MCInst *MI) {
@@ -169,7 +205,9 @@ void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
case SPIRV::OpConstantI:
case SPIRV::OpConstantF:
- printOpConstantVarOps(MI, NumFixedOps, OS);
+ // The last fixed operand along with any variadic operands that follow
+ // are part of the variable value.
+ printOpConstantVarOps(MI, NumFixedOps - 1, OS);
break;
default:
printRemainingVariableOps(MI, NumFixedOps, OS);
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp
index 5555adc19010..8aea26d9963c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp
@@ -43,7 +43,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
};
@@ -74,12 +74,14 @@ static bool hasType(const MCInst &MI, const MCInstrInfo &MII) {
return false;
}
-static void emitOperand(const MCOperand &Op, EndianWriter &OSE) {
+static void emitOperand(const MCOperand &Op, SmallVectorImpl<char> &CB) {
if (Op.isReg()) {
// Emit the id index starting at 1 (0 is an invalid index).
- OSE.write<uint32_t>(Register::virtReg2Index(Op.getReg()) + 1);
+ support::endian::write<uint32_t>(
+ CB, Register::virtReg2Index(Op.getReg()) + 1, llvm::endianness::little);
} else if (Op.isImm()) {
- OSE.write<uint32_t>(Op.getImm());
+ support::endian::write(CB, static_cast<uint32_t>(Op.getImm()),
+ llvm::endianness::little);
} else {
llvm_unreachable("Unexpected operand type in VReg");
}
@@ -87,36 +89,37 @@ static void emitOperand(const MCOperand &Op, EndianWriter &OSE) {
// Emit the type in operand 1 before the ID in operand 0 it defines, and all
// remaining operands in the order they come naturally.
-static void emitTypedInstrOperands(const MCInst &MI, EndianWriter &OSE) {
+static void emitTypedInstrOperands(const MCInst &MI,
+ SmallVectorImpl<char> &CB) {
unsigned NumOps = MI.getNumOperands();
- emitOperand(MI.getOperand(1), OSE);
- emitOperand(MI.getOperand(0), OSE);
+ emitOperand(MI.getOperand(1), CB);
+ emitOperand(MI.getOperand(0), CB);
for (unsigned i = 2; i < NumOps; ++i)
- emitOperand(MI.getOperand(i), OSE);
+ emitOperand(MI.getOperand(i), CB);
}
// Emit operands in the order they come naturally.
-static void emitUntypedInstrOperands(const MCInst &MI, EndianWriter &OSE) {
+static void emitUntypedInstrOperands(const MCInst &MI,
+ SmallVectorImpl<char> &CB) {
for (const auto &Op : MI)
- emitOperand(Op, OSE);
+ emitOperand(Op, CB);
}
-void SPIRVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void SPIRVMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- EndianWriter OSE(OS, support::little);
-
// Encode the first 32 SPIR-V bytes with the number of args and the opcode.
const uint64_t OpCode = getBinaryCodeForInstr(MI, Fixups, STI);
const uint32_t NumWords = MI.getNumOperands() + 1;
const uint32_t FirstWord = (NumWords << 16) | OpCode;
- OSE.write<uint32_t>(FirstWord);
+ support::endian::write(CB, FirstWord, llvm::endianness::little);
// Emit the instruction arguments (emitting the output type first if present).
if (hasType(MI, MCII))
- emitTypedInstrOperands(MI, OSE);
+ emitTypedInstrOperands(MI, CB);
else
- emitUntypedInstrOperands(MI, OSE);
+ emitUntypedInstrOperands(MI, CB);
}
#include "SPIRVGenMCCodeEmitter.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
index 62ce15550ae7..78dfbf4ec932 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
@@ -88,7 +88,8 @@ static MCInstrAnalysis *createSPIRVInstrAnalysis(const MCInstrInfo *Info) {
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTargetMC() {
- for (Target *T : {&getTheSPIRV32Target(), &getTheSPIRV64Target()}) {
+ for (Target *T : {&getTheSPIRV32Target(), &getTheSPIRV64Target(),
+ &getTheSPIRVLogicalTarget()}) {
RegisterMCAsmInfo<SPIRVMCAsmInfo> X(*T);
TargetRegistry::RegisterMCInstrInfo(*T, createSPIRVMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(*T, createSPIRVMCRegisterInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRV.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRV.h
index 20834c547646..3151d69ab745 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRV.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRV.h
@@ -19,7 +19,7 @@ class SPIRVSubtarget;
class InstructionSelector;
class RegisterBankInfo;
-ModulePass *createSPIRVPrepareFunctionsPass();
+ModulePass *createSPIRVPrepareFunctionsPass(const SPIRVTargetMachine &TM);
FunctionPass *createSPIRVRegularizerPass();
FunctionPass *createSPIRVPreLegalizerPass();
FunctionPass *createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM);
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
index d07c0bcdf9af..27da0f21f157 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -66,6 +66,9 @@ public:
void outputExtFuncDecls();
void outputExecutionModeFromMDNode(Register Reg, MDNode *Node,
SPIRV::ExecutionMode::ExecutionMode EM);
+ void outputExecutionModeFromNumthreadsAttribute(
+ const Register &Reg, const Attribute &Attr,
+ SPIRV::ExecutionMode::ExecutionMode EM);
void outputExecutionMode(const Module &M);
void outputAnnotations(const Module &M);
void outputModuleSections();
@@ -412,6 +415,29 @@ void SPIRVAsmPrinter::outputExecutionModeFromMDNode(
outputMCInst(Inst);
}
+void SPIRVAsmPrinter::outputExecutionModeFromNumthreadsAttribute(
+ const Register &Reg, const Attribute &Attr,
+ SPIRV::ExecutionMode::ExecutionMode EM) {
+ assert(Attr.isValid() && "Function called with an invalid attribute.");
+
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpExecutionMode);
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createImm(static_cast<unsigned>(EM)));
+
+ SmallVector<StringRef> NumThreads;
+ Attr.getValueAsString().split(NumThreads, ',');
+ assert(NumThreads.size() == 3 && "invalid numthreads");
+ for (uint32_t i = 0; i < 3; ++i) {
+ uint32_t V;
+ [[maybe_unused]] bool Result = NumThreads[i].getAsInteger(10, V);
+ assert(!Result && "Failed to parse numthreads");
+ Inst.addOperand(MCOperand::createImm(V));
+ }
+
+ outputMCInst(Inst);
+}
+
void SPIRVAsmPrinter::outputExecutionMode(const Module &M) {
NamedMDNode *Node = M.getNamedMetadata("spirv.ExecutionMode");
if (Node) {
@@ -431,6 +457,9 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) {
if (MDNode *Node = F.getMetadata("reqd_work_group_size"))
outputExecutionModeFromMDNode(FReg, Node,
SPIRV::ExecutionMode::LocalSize);
+ if (Attribute Attr = F.getFnAttribute("hlsl.numthreads"); Attr.isValid())
+ outputExecutionModeFromNumthreadsAttribute(
+ FReg, Attr, SPIRV::ExecutionMode::LocalSize);
if (MDNode *Node = F.getMetadata("work_group_size_hint"))
outputExecutionModeFromMDNode(FReg, Node,
SPIRV::ExecutionMode::LocalSizeHint);
@@ -447,7 +476,7 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) {
Inst.addOperand(MCOperand::createImm(TypeCode));
outputMCInst(Inst);
}
- if (!M.getNamedMetadata("spirv.ExecutionMode") &&
+ if (ST->isOpenCLEnv() && !M.getNamedMetadata("spirv.ExecutionMode") &&
!M.getNamedMetadata("opencl.enable.FP_CONTRACT")) {
MCInst Inst;
Inst.setOpcode(SPIRV::OpExecutionMode);
@@ -542,4 +571,5 @@ bool SPIRVAsmPrinter::doInitialization(Module &M) {
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVAsmPrinter() {
RegisterAsmPrinter<SPIRVAsmPrinter> X(getTheSPIRV32Target());
RegisterAsmPrinter<SPIRVAsmPrinter> Y(getTheSPIRV64Target());
+ RegisterAsmPrinter<SPIRVAsmPrinter> Z(getTheSPIRVLogicalTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index c53f1643adc0..5ac45079bd00 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -163,7 +163,7 @@ lookupBuiltin(StringRef DemangledCall,
// the information after angle brackets and return type removed.
if (BuiltinName.find('<') && BuiltinName.back() == '>') {
BuiltinName = BuiltinName.substr(0, BuiltinName.find('<'));
- BuiltinName = BuiltinName.substr(BuiltinName.find_last_of(" ") + 1);
+ BuiltinName = BuiltinName.substr(BuiltinName.find_last_of(' ') + 1);
}
// Check if the extracted name begins with "__spirv_ImageSampleExplicitLod"
@@ -872,8 +872,8 @@ static bool generateGroupInst(const SPIRV::IncomingCall *Call,
std::tie(GroupResultRegister, GroupResultType) =
buildBoolRegister(MIRBuilder, Call->ReturnType, GR);
- auto Scope = Builtin->Name.startswith("sub_group") ? SPIRV::Scope::Subgroup
- : SPIRV::Scope::Workgroup;
+ auto Scope = Builtin->Name.starts_with("sub_group") ? SPIRV::Scope::Subgroup
+ : SPIRV::Scope::Workgroup;
Register ScopeRegister = buildConstantIntReg(Scope, MIRBuilder, GR);
// Build work/sub group instruction.
@@ -976,7 +976,7 @@ static bool genWorkgroupQuery(const SPIRV::IncomingCall *Call,
// Use Intrinsic::spv_extractelt so dynamic vs static extraction is
// handled later: extr = spv_extractelt LoadedVector, IndexRegister.
MachineInstrBuilder ExtractInst = MIRBuilder.buildIntrinsic(
- Intrinsic::spv_extractelt, ArrayRef<Register>{Extracted}, true);
+ Intrinsic::spv_extractelt, ArrayRef<Register>{Extracted}, true, false);
ExtractInst.addUse(LoadedVector).addUse(IndexRegister);
// If the index is dynamic, need check if it's < 3, and then use a select.
@@ -1644,8 +1644,8 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call,
Register Reg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
MRI->setType(Reg, LLType);
GR->assignSPIRVTypeToVReg(PointerSizeTy, Reg, MIRBuilder.getMF());
- auto GEPInst = MIRBuilder.buildIntrinsic(Intrinsic::spv_gep,
- ArrayRef<Register>{Reg}, true);
+ auto GEPInst = MIRBuilder.buildIntrinsic(
+ Intrinsic::spv_gep, ArrayRef<Register>{Reg}, true, false);
GEPInst
.addImm(GepMI->getOperand(2).getImm()) // In bound.
.addUse(ArrayMI->getOperand(0).getReg()) // Alloca.
@@ -1999,71 +1999,17 @@ struct OpenCLType {
//===----------------------------------------------------------------------===//
static Type *parseTypeString(const StringRef Name, LLVMContext &Context) {
- if (Name.startswith("void"))
+ if (Name.starts_with("void"))
return Type::getVoidTy(Context);
- else if (Name.startswith("int") || Name.startswith("uint"))
+ else if (Name.starts_with("int") || Name.starts_with("uint"))
return Type::getInt32Ty(Context);
- else if (Name.startswith("float"))
+ else if (Name.starts_with("float"))
return Type::getFloatTy(Context);
- else if (Name.startswith("half"))
+ else if (Name.starts_with("half"))
return Type::getHalfTy(Context);
llvm_unreachable("Unable to recognize type!");
}
-static const TargetExtType *parseToTargetExtType(const Type *OpaqueType,
- MachineIRBuilder &MIRBuilder) {
- assert(isSpecialOpaqueType(OpaqueType) &&
- "Not a SPIR-V/OpenCL special opaque type!");
- assert(!OpaqueType->isTargetExtTy() &&
- "This already is SPIR-V/OpenCL TargetExtType!");
-
- StringRef NameWithParameters = OpaqueType->getStructName();
-
- // Pointers-to-opaque-structs representing OpenCL types are first translated
- // to equivalent SPIR-V types. OpenCL builtin type names should have the
- // following format: e.g. %opencl.event_t
- if (NameWithParameters.startswith("opencl.")) {
- const SPIRV::OpenCLType *OCLTypeRecord =
- SPIRV::lookupOpenCLType(NameWithParameters);
- if (!OCLTypeRecord)
- report_fatal_error("Missing TableGen record for OpenCL type: " +
- NameWithParameters);
- NameWithParameters = OCLTypeRecord->SpirvTypeLiteral;
- // Continue with the SPIR-V builtin type...
- }
-
- // Names of the opaque structs representing a SPIR-V builtins without
- // parameters should have the following format: e.g. %spirv.Event
- assert(NameWithParameters.startswith("spirv.") &&
- "Unknown builtin opaque type!");
-
- // Parameterized SPIR-V builtins names follow this format:
- // e.g. %spirv.Image._void_1_0_0_0_0_0_0, %spirv.Pipe._0
- if (NameWithParameters.find('_') == std::string::npos)
- return TargetExtType::get(OpaqueType->getContext(), NameWithParameters);
-
- SmallVector<StringRef> Parameters;
- unsigned BaseNameLength = NameWithParameters.find('_') - 1;
- SplitString(NameWithParameters.substr(BaseNameLength + 1), Parameters, "_");
-
- SmallVector<Type *, 1> TypeParameters;
- bool HasTypeParameter = !isDigit(Parameters[0][0]);
- if (HasTypeParameter)
- TypeParameters.push_back(parseTypeString(
- Parameters[0], MIRBuilder.getMF().getFunction().getContext()));
- SmallVector<unsigned> IntParameters;
- for (unsigned i = HasTypeParameter ? 1 : 0; i < Parameters.size(); i++) {
- unsigned IntParameter = 0;
- bool ValidLiteral = !Parameters[i].getAsInteger(10, IntParameter);
- assert(ValidLiteral &&
- "Invalid format of SPIR-V builtin parameter literal!");
- IntParameters.push_back(IntParameter);
- }
- return TargetExtType::get(OpaqueType->getContext(),
- NameWithParameters.substr(0, BaseNameLength),
- TypeParameters, IntParameters);
-}
-
//===----------------------------------------------------------------------===//
// Implementation functions for builtin types.
//===----------------------------------------------------------------------===//
@@ -2127,6 +2073,56 @@ static SPIRVType *getSampledImageType(const TargetExtType *OpaqueType,
}
namespace SPIRV {
+const TargetExtType *
+parseBuiltinTypeNameToTargetExtType(std::string TypeName,
+ MachineIRBuilder &MIRBuilder) {
+ StringRef NameWithParameters = TypeName;
+
+ // Pointers-to-opaque-structs representing OpenCL types are first translated
+ // to equivalent SPIR-V types. OpenCL builtin type names should have the
+ // following format: e.g. %opencl.event_t
+ if (NameWithParameters.starts_with("opencl.")) {
+ const SPIRV::OpenCLType *OCLTypeRecord =
+ SPIRV::lookupOpenCLType(NameWithParameters);
+ if (!OCLTypeRecord)
+ report_fatal_error("Missing TableGen record for OpenCL type: " +
+ NameWithParameters);
+ NameWithParameters = OCLTypeRecord->SpirvTypeLiteral;
+ // Continue with the SPIR-V builtin type...
+ }
+
+ // Names of the opaque structs representing a SPIR-V builtins without
+ // parameters should have the following format: e.g. %spirv.Event
+ assert(NameWithParameters.starts_with("spirv.") &&
+ "Unknown builtin opaque type!");
+
+ // Parameterized SPIR-V builtins names follow this format:
+ // e.g. %spirv.Image._void_1_0_0_0_0_0_0, %spirv.Pipe._0
+ if (NameWithParameters.find('_') == std::string::npos)
+ return TargetExtType::get(MIRBuilder.getContext(), NameWithParameters);
+
+ SmallVector<StringRef> Parameters;
+ unsigned BaseNameLength = NameWithParameters.find('_') - 1;
+ SplitString(NameWithParameters.substr(BaseNameLength + 1), Parameters, "_");
+
+ SmallVector<Type *, 1> TypeParameters;
+ bool HasTypeParameter = !isDigit(Parameters[0][0]);
+ if (HasTypeParameter)
+ TypeParameters.push_back(parseTypeString(
+ Parameters[0], MIRBuilder.getMF().getFunction().getContext()));
+ SmallVector<unsigned> IntParameters;
+ for (unsigned i = HasTypeParameter ? 1 : 0; i < Parameters.size(); i++) {
+ unsigned IntParameter = 0;
+ bool ValidLiteral = !Parameters[i].getAsInteger(10, IntParameter);
+ assert(ValidLiteral &&
+ "Invalid format of SPIR-V builtin parameter literal!");
+ IntParameters.push_back(IntParameter);
+ }
+ return TargetExtType::get(MIRBuilder.getContext(),
+ NameWithParameters.substr(0, BaseNameLength),
+ TypeParameters, IntParameters);
+}
+
SPIRVType *lowerBuiltinType(const Type *OpaqueType,
SPIRV::AccessQualifier::AccessQualifier AccessQual,
MachineIRBuilder &MIRBuilder,
@@ -2141,7 +2137,8 @@ SPIRVType *lowerBuiltinType(const Type *OpaqueType,
// will be removed in the future release of LLVM.
const TargetExtType *BuiltinType = dyn_cast<TargetExtType>(OpaqueType);
if (!BuiltinType)
- BuiltinType = parseToTargetExtType(OpaqueType, MIRBuilder);
+ BuiltinType = parseBuiltinTypeNameToTargetExtType(
+ OpaqueType->getStructName().str(), MIRBuilder);
unsigned NumStartingVRegs = MIRBuilder.getMRI()->getNumVirtRegs();
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.h
index 7ee5c49dc5b3..6f9572954648 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVBuiltins.h
@@ -37,6 +37,18 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
const Register OrigRet, const Type *OrigRetTy,
const SmallVectorImpl<Register> &Args,
SPIRVGlobalRegistry *GR);
+
+/// Translates a string representing a SPIR-V or OpenCL builtin type to a
+/// TargetExtType that can be further lowered with lowerBuiltinType().
+///
+/// \return A TargetExtType representing the builtin SPIR-V type.
+///
+/// \p TypeName is the full string representation of the SPIR-V or OpenCL
+/// builtin type.
+const TargetExtType *
+parseBuiltinTypeNameToTargetExtType(std::string TypeName,
+ MachineIRBuilder &MIRBuilder);
+
/// Handles the translation of the provided special opaque/builtin type \p Type
/// to SPIR-V type. Generates the corresponding machine instructions for the
/// target type or gets the already existing OpType<...> register from the
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 47b25a1f8351..629db8e2eb4d 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -194,23 +194,70 @@ getKernelArgTypeQual(const Function &KernelFunction, unsigned ArgIdx) {
return {};
}
-static Type *getArgType(const Function &F, unsigned ArgIdx) {
+static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx,
+ SPIRVGlobalRegistry *GR,
+ MachineIRBuilder &MIRBuilder) {
+ // Read argument's access qualifier from metadata or default.
+ SPIRV::AccessQualifier::AccessQualifier ArgAccessQual =
+ getArgAccessQual(F, ArgIdx);
+
Type *OriginalArgType = getOriginalFunctionType(F)->getParamType(ArgIdx);
+
+ // In case of non-kernel SPIR-V function or already TargetExtType, use the
+ // original IR type.
if (F.getCallingConv() != CallingConv::SPIR_KERNEL ||
isSpecialOpaqueType(OriginalArgType))
- return OriginalArgType;
+ return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual);
MDString *MDKernelArgType =
getKernelArgAttribute(F, ArgIdx, "kernel_arg_type");
- if (!MDKernelArgType || !MDKernelArgType->getString().endswith("_t"))
- return OriginalArgType;
-
- std::string KernelArgTypeStr = "opencl." + MDKernelArgType->getString().str();
- Type *ExistingOpaqueType =
- StructType::getTypeByName(F.getContext(), KernelArgTypeStr);
- return ExistingOpaqueType
- ? ExistingOpaqueType
- : StructType::create(F.getContext(), KernelArgTypeStr);
+ if (!MDKernelArgType || (MDKernelArgType->getString().ends_with("*") &&
+ MDKernelArgType->getString().ends_with("_t")))
+ return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual);
+
+ if (MDKernelArgType->getString().ends_with("*"))
+ return GR->getOrCreateSPIRVTypeByName(
+ MDKernelArgType->getString(), MIRBuilder,
+ addressSpaceToStorageClass(OriginalArgType->getPointerAddressSpace()));
+
+ if (MDKernelArgType->getString().ends_with("_t"))
+ return GR->getOrCreateSPIRVTypeByName(
+ "opencl." + MDKernelArgType->getString().str(), MIRBuilder,
+ SPIRV::StorageClass::Function, ArgAccessQual);
+
+ llvm_unreachable("Unable to recognize argument type name.");
+}
+
+static bool isEntryPoint(const Function &F) {
+ // OpenCL handling: any function with the SPIR_KERNEL
+ // calling convention will be a potential entry point.
+ if (F.getCallingConv() == CallingConv::SPIR_KERNEL)
+ return true;
+
+ // HLSL handling: special attribute are emitted from the
+ // front-end.
+ if (F.getFnAttribute("hlsl.shader").isValid())
+ return true;
+
+ return false;
+}
+
+static SPIRV::ExecutionModel::ExecutionModel
+getExecutionModel(const SPIRVSubtarget &STI, const Function &F) {
+ if (STI.isOpenCLEnv())
+ return SPIRV::ExecutionModel::Kernel;
+
+ auto attribute = F.getFnAttribute("hlsl.shader");
+ if (!attribute.isValid()) {
+ report_fatal_error(
+ "This entry point lacks mandatory hlsl.shader attribute.");
+ }
+
+ const auto value = attribute.getValueAsString();
+ if (value == "compute")
+ return SPIRV::ExecutionModel::GLCompute;
+
+ report_fatal_error("This HLSL entry point is not supported by this backend.");
}
bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
@@ -230,10 +277,8 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
// TODO: handle the case of multiple registers.
if (VRegs[i].size() > 1)
return false;
- SPIRV::AccessQualifier::AccessQualifier ArgAccessQual =
- getArgAccessQual(F, i);
- auto *SpirvTy = GR->assignTypeToVReg(getArgType(F, i), VRegs[i][0],
- MIRBuilder, ArgAccessQual);
+ auto *SpirvTy = getArgSPIRVType(F, i, GR, MIRBuilder);
+ GR->assignSPIRVTypeToVReg(SpirvTy, VRegs[i][0], MIRBuilder.getMF());
ArgTypeVRegs.push_back(SpirvTy);
if (Arg.hasName())
@@ -336,9 +381,11 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
buildOpName(FuncVReg, F.getName(), MIRBuilder);
// Handle entry points and function linkage.
- if (F.getCallingConv() == CallingConv::SPIR_KERNEL) {
+ if (isEntryPoint(F)) {
+ const auto &STI = MIRBuilder.getMF().getSubtarget<SPIRVSubtarget>();
+ auto executionModel = getExecutionModel(STI, F);
auto MIB = MIRBuilder.buildInstr(SPIRV::OpEntryPoint)
- .addImm(static_cast<uint32_t>(SPIRV::ExecutionModel::Kernel))
+ .addImm(static_cast<uint32_t>(executionModel))
.addUse(FuncVReg);
addStringImm(F.getName(), MIB);
} else if (F.getLinkage() == GlobalValue::LinkageTypes::ExternalLinkage ||
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
index 00553d9710b6..96cc621791e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
@@ -59,6 +59,7 @@ struct SpecialTypeDescriptor {
STK_Sampler,
STK_Pipe,
STK_DeviceEvent,
+ STK_Pointer,
STK_Last = -1
};
SpecialTypeKind Kind;
@@ -160,6 +161,23 @@ struct DeviceEventTypeDescriptor : public SpecialTypeDescriptor {
return TD->Kind == SpecialTypeKind::STK_DeviceEvent;
}
};
+
+struct PointerTypeDescriptor : public SpecialTypeDescriptor {
+ const Type *ElementType;
+ unsigned AddressSpace;
+
+ PointerTypeDescriptor() = delete;
+ PointerTypeDescriptor(const Type *ElementType, unsigned AddressSpace)
+ : SpecialTypeDescriptor(SpecialTypeKind::STK_Pointer),
+ ElementType(ElementType), AddressSpace(AddressSpace) {
+ Hash = (DenseMapInfo<Type *>().getHashValue(ElementType) & 0xffff) ^
+ ((AddressSpace << 8) | Kind);
+ }
+
+ static bool classof(const SpecialTypeDescriptor *TD) {
+ return TD->Kind == SpecialTypeKind::STK_Pointer;
+ }
+};
} // namespace SPIRV
template <> struct DenseMapInfo<SPIRV::SpecialTypeDescriptor> {
@@ -262,8 +280,14 @@ public:
void buildDepsGraph(std::vector<SPIRV::DTSortableEntry *> &Graph,
MachineModuleInfo *MMI);
- void add(const Type *T, const MachineFunction *MF, Register R) {
- TT.add(T, MF, R);
+ void add(const Type *Ty, const MachineFunction *MF, Register R) {
+ TT.add(Ty, MF, R);
+ }
+
+ void add(const Type *PointerElementType, unsigned AddressSpace,
+ const MachineFunction *MF, Register R) {
+ ST.add(SPIRV::PointerTypeDescriptor(PointerElementType, AddressSpace), MF,
+ R);
}
void add(const Constant *C, const MachineFunction *MF, Register R) {
@@ -287,8 +311,14 @@ public:
ST.add(TD, MF, R);
}
- Register find(const Type *T, const MachineFunction *MF) {
- return TT.find(const_cast<Type *>(T), MF);
+ Register find(const Type *Ty, const MachineFunction *MF) {
+ return TT.find(const_cast<Type *>(Ty), MF);
+ }
+
+ Register find(const Type *PointerElementType, unsigned AddressSpace,
+ const MachineFunction *MF) {
+ return ST.find(
+ SPIRV::PointerTypeDescriptor(PointerElementType, AddressSpace), MF);
}
Register find(const Constant *C, const MachineFunction *MF) {
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 4e8afbe2e77e..ec62a819b00e 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -58,14 +58,21 @@ class SPIRVEmitIntrinsics
void preprocessCompositeConstants();
void preprocessUndefs();
CallInst *buildIntrWithMD(Intrinsic::ID IntrID, ArrayRef<Type *> Types,
- Value *Arg, Value *Arg2) {
+ Value *Arg, Value *Arg2,
+ ArrayRef<Constant *> Imms) {
ConstantAsMetadata *CM = ValueAsMetadata::getConstant(Arg);
MDTuple *TyMD = MDNode::get(F->getContext(), CM);
MetadataAsValue *VMD = MetadataAsValue::get(F->getContext(), TyMD);
- return IRB->CreateIntrinsic(IntrID, {Types}, {Arg2, VMD});
+ SmallVector<Value *, 4> Args;
+ Args.push_back(Arg2);
+ Args.push_back(VMD);
+ for (auto *Imm : Imms)
+ Args.push_back(Imm);
+ return IRB->CreateIntrinsic(IntrID, {Types}, Args);
}
void replaceMemInstrUses(Instruction *Old, Instruction *New);
void processInstrAfterVisit(Instruction *I);
+ void insertAssignPtrTypeIntrs(Instruction *I);
void insertAssignTypeIntrs(Instruction *I);
void processGlobalValue(GlobalVariable &GV);
@@ -121,6 +128,13 @@ static void setInsertPointSkippingPhis(IRBuilder<> &B, Instruction *I) {
B.SetInsertPoint(I);
}
+static bool requireAssignPtrType(Instruction *I) {
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I))
+ return true;
+
+ return false;
+}
+
static bool requireAssignType(Instruction *I) {
IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(I);
if (Intr) {
@@ -387,9 +401,31 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV) {
IRB->CreateIntrinsic(Intrinsic::spv_unref_global, GV.getType(), &GV);
}
+void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I) {
+ if (I->getType()->isVoidTy() || !requireAssignPtrType(I))
+ return;
+
+ setInsertPointSkippingPhis(*IRB, I->getNextNode());
+
+ Constant *EltTyConst;
+ unsigned AddressSpace = 0;
+ if (auto *AI = dyn_cast<AllocaInst>(I)) {
+ EltTyConst = Constant::getNullValue(AI->getAllocatedType());
+ AddressSpace = AI->getAddressSpace();
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ EltTyConst = Constant::getNullValue(GEP->getResultElementType());
+ AddressSpace = GEP->getPointerAddressSpace();
+ } else {
+ llvm_unreachable("Unexpected instruction!");
+ }
+
+ buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {I->getType()}, EltTyConst, I,
+ {IRB->getInt32(AddressSpace)});
+}
+
void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) {
Type *Ty = I->getType();
- if (!Ty->isVoidTy() && requireAssignType(I)) {
+ if (!Ty->isVoidTy() && requireAssignType(I) && !requireAssignPtrType(I)) {
setInsertPointSkippingPhis(*IRB, I->getNextNode());
Type *TypeToAssign = Ty;
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
@@ -401,7 +437,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) {
}
}
Constant *Const = Constant::getNullValue(TypeToAssign);
- buildIntrWithMD(Intrinsic::spv_assign_type, {Ty}, Const, I);
+ buildIntrWithMD(Intrinsic::spv_assign_type, {Ty}, Const, I, {});
}
for (const auto &Op : I->operands()) {
if (isa<ConstantPointerNull>(Op) || isa<UndefValue>(Op) ||
@@ -410,9 +446,10 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) {
setInsertPointSkippingPhis(*IRB, I);
if (isa<UndefValue>(Op) && Op->getType()->isAggregateType())
buildIntrWithMD(Intrinsic::spv_assign_type, {IRB->getInt32Ty()}, Op,
- UndefValue::get(IRB->getInt32Ty()));
+ UndefValue::get(IRB->getInt32Ty()), {});
else
- buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op);
+ buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op,
+ {});
}
}
}
@@ -425,8 +462,8 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) {
Type *Ty = IRB->getInt32Ty();
auto t = AggrConsts.find(I);
assert(t != AggrConsts.end());
- auto *NewOp =
- buildIntrWithMD(Intrinsic::spv_track_constant, {Ty, Ty}, t->second, I);
+ auto *NewOp = buildIntrWithMD(Intrinsic::spv_track_constant, {Ty, Ty},
+ t->second, I, {});
I->replaceAllUsesWith(NewOp);
NewOp->setArgOperand(0, I);
}
@@ -441,7 +478,7 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) {
continue;
IRB->SetInsertPoint(I);
auto *NewOp = buildIntrWithMD(Intrinsic::spv_track_constant,
- {Op->getType(), Op->getType()}, Op, Op);
+ {Op->getType(), Op->getType()}, Op, Op, {});
I->setOperand(OpNo, NewOp);
}
}
@@ -468,13 +505,11 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
if (!SI)
continue;
Type *ElTy = SI->getValueOperand()->getType();
- PointerType *PTy = cast<PointerType>(SI->getOperand(1)->getType());
- if (ElTy->isAggregateType() || ElTy->isVectorTy() ||
- !PTy->isOpaqueOrPointeeTypeMatches(ElTy))
+ if (ElTy->isAggregateType() || ElTy->isVectorTy())
AggrStores.insert(&I);
}
- IRB->SetInsertPoint(&Func.getEntryBlock().front());
+ IRB->SetInsertPoint(&Func.getEntryBlock(), Func.getEntryBlock().begin());
for (auto &GV : Func.getParent()->globals())
processGlobalValue(GV);
@@ -484,8 +519,10 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
for (auto &I : instructions(Func))
Worklist.push_back(&I);
- for (auto &I : Worklist)
+ for (auto &I : Worklist) {
+ insertAssignPtrTypeIntrs(I);
insertAssignTypeIntrs(I);
+ }
for (auto *I : Worklist) {
TrackConstants = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index c77a7f860eda..b8a6784ff3c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -81,7 +81,15 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeInt(uint32_t Width,
MachineIRBuilder &MIRBuilder,
bool IsSigned) {
assert(Width <= 64 && "Unsupported integer width!");
- if (Width <= 8)
+ const SPIRVSubtarget &ST =
+ cast<SPIRVSubtarget>(MIRBuilder.getMF().getSubtarget());
+ if (ST.canUseExtension(
+ SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers)) {
+ MIRBuilder.buildInstr(SPIRV::OpExtension)
+ .addImm(SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers);
+ MIRBuilder.buildInstr(SPIRV::OpCapability)
+ .addImm(SPIRV::Capability::ArbitraryPrecisionIntegersINTEL);
+ } else if (Width <= 8)
Width = 8;
else if (Width <= 16)
Width = 16;
@@ -236,24 +244,27 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val,
MachineIRBuilder &MIRBuilder,
SPIRVType *SpvType) {
auto &MF = MIRBuilder.getMF();
- const Type *LLVMFPTy;
- if (SpvType) {
- LLVMFPTy = getTypeForSPIRVType(SpvType);
- assert(LLVMFPTy->isFloatingPointTy());
- } else {
- LLVMFPTy = IntegerType::getFloatTy(MF.getFunction().getContext());
+ auto &Ctx = MF.getFunction().getContext();
+ if (!SpvType) {
+ const Type *LLVMFPTy = Type::getFloatTy(Ctx);
+ SpvType = getOrCreateSPIRVType(LLVMFPTy, MIRBuilder);
}
// Find a constant in DT or build a new one.
- const auto ConstFP = ConstantFP::get(LLVMFPTy->getContext(), Val);
+ const auto ConstFP = ConstantFP::get(Ctx, Val);
Register Res = DT.find(ConstFP, &MF);
if (!Res.isValid()) {
- unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
- Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
+ Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(32));
MF.getRegInfo().setRegClass(Res, &SPIRV::IDRegClass);
- assignTypeToVReg(LLVMFPTy, Res, MIRBuilder);
+ assignSPIRVTypeToVReg(SpvType, Res, MF);
DT.add(ConstFP, &MF, Res);
- MIRBuilder.buildFConstant(Res, *ConstFP);
+
+ MachineInstrBuilder MIB;
+ MIB = MIRBuilder.buildInstr(SPIRV::OpConstantF)
+ .addDef(Res)
+ .addUse(getSPIRVTypeID(SpvType));
+ addNumImm(ConstFP->getValueAPF().bitcastToAPInt(), MIB);
}
+
return Res;
}
@@ -583,12 +594,6 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(const StructType *Ty,
SPIRVType *SPIRVGlobalRegistry::getOrCreateSpecialType(
const Type *Ty, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier::AccessQualifier AccQual) {
- // Some OpenCL and SPIRV builtins like image2d_t are passed in as
- // pointers, but should be treated as custom types like OpTypeImage.
- if (auto PType = dyn_cast<PointerType>(Ty)) {
- assert(!PType->isOpaque());
- Ty = PType->getNonOpaquePointerElementType();
- }
assert(isSpecialOpaqueType(Ty) && "Not a special opaque builtin type");
return SPIRV::lowerBuiltinType(Ty, AccQual, MIRBuilder, this);
}
@@ -630,6 +635,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateOpTypeFunctionWithArgs(
if (Reg.isValid())
return getSPIRVTypeForVReg(Reg);
SPIRVType *SpirvType = getOpTypeFunction(RetType, ArgTypes, MIRBuilder);
+ DT.add(Ty, CurMF, getSPIRVTypeID(SpirvType));
return finishCreatingSPIRVType(Ty, SpirvType);
}
@@ -701,12 +707,7 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(
// At the moment, all opaque pointers correspond to i8 element type.
// TODO: change the implementation once opaque pointers are supported
// in the SPIR-V specification.
- if (PType->isOpaque())
- SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder);
- else
- SpvElementType =
- findSPIRVType(PType->getNonOpaquePointerElementType(), MIRBuilder,
- SPIRV::AccessQualifier::ReadWrite, EmitIR);
+ SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder);
auto SC = addressSpaceToStorageClass(PType->getAddressSpace());
// Null pointer means we have a loop in type definitions, make and
// return corresponding OpTypeForwardPointer.
@@ -740,8 +741,14 @@ SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(
// Do not add OpTypeForwardPointer to DT, a corresponding normal pointer type
// will be added later. For special types it is already added to DT.
if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() &&
- !isSpecialOpaqueType(Ty))
- DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
+ !isSpecialOpaqueType(Ty)) {
+ if (!Ty->isPointerTy())
+ DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
+ else
+ DT.add(Type::getInt8Ty(MIRBuilder.getMF().getFunction().getContext()),
+ Ty->getPointerAddressSpace(), &MIRBuilder.getMF(),
+ getSPIRVTypeID(SpirvType));
+ }
return SpirvType;
}
@@ -759,7 +766,14 @@ SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const {
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(
const Type *Ty, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) {
- Register Reg = DT.find(Ty, &MIRBuilder.getMF());
+ Register Reg;
+ if (!Ty->isPointerTy())
+ Reg = DT.find(Ty, &MIRBuilder.getMF());
+ else
+ Reg =
+ DT.find(Type::getInt8Ty(MIRBuilder.getMF().getFunction().getContext()),
+ Ty->getPointerAddressSpace(), &MIRBuilder.getMF());
+
if (Reg.isValid() && !isSpecialOpaqueType(Ty))
return getSPIRVTypeForVReg(Reg);
TypesInProcessing.clear();
@@ -913,8 +927,9 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateOpTypeByOpcode(
if (ResVReg.isValid())
return MIRBuilder.getMF().getRegInfo().getUniqueVRegDef(ResVReg);
ResVReg = createTypeVReg(MIRBuilder);
+ SPIRVType *SpirvTy = MIRBuilder.buildInstr(Opcode).addDef(ResVReg);
DT.add(Ty, &MIRBuilder.getMF(), ResVReg);
- return MIRBuilder.buildInstr(Opcode).addDef(ResVReg);
+ return SpirvTy;
}
const MachineInstr *
@@ -927,40 +942,82 @@ SPIRVGlobalRegistry::checkSpecialInstr(const SPIRV::SpecialTypeDescriptor &TD,
}
// TODO: maybe use tablegen to implement this.
-SPIRVType *
-SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName(StringRef TypeStr,
- MachineIRBuilder &MIRBuilder) {
+SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName(
+ StringRef TypeStr, MachineIRBuilder &MIRBuilder,
+ SPIRV::StorageClass::StorageClass SC,
+ SPIRV::AccessQualifier::AccessQualifier AQ) {
unsigned VecElts = 0;
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ // Parse strings representing either a SPIR-V or OpenCL builtin type.
+ if (hasBuiltinTypePrefix(TypeStr))
+ return getOrCreateSPIRVType(
+ SPIRV::parseBuiltinTypeNameToTargetExtType(TypeStr.str(), MIRBuilder),
+ MIRBuilder, AQ);
+
// Parse type name in either "typeN" or "type vector[N]" format, where
// N is the number of elements of the vector.
- Type *Type;
- if (TypeStr.startswith("void")) {
- Type = Type::getVoidTy(Ctx);
+ Type *Ty;
+
+ if (TypeStr.starts_with("atomic_"))
+ TypeStr = TypeStr.substr(strlen("atomic_"));
+
+ if (TypeStr.starts_with("void")) {
+ Ty = Type::getVoidTy(Ctx);
TypeStr = TypeStr.substr(strlen("void"));
- } else if (TypeStr.startswith("int") || TypeStr.startswith("uint")) {
- Type = Type::getInt32Ty(Ctx);
- TypeStr = TypeStr.startswith("int") ? TypeStr.substr(strlen("int"))
- : TypeStr.substr(strlen("uint"));
- } else if (TypeStr.startswith("float")) {
- Type = Type::getFloatTy(Ctx);
- TypeStr = TypeStr.substr(strlen("float"));
- } else if (TypeStr.startswith("half")) {
- Type = Type::getHalfTy(Ctx);
+ } else if (TypeStr.starts_with("bool")) {
+ Ty = Type::getIntNTy(Ctx, 1);
+ TypeStr = TypeStr.substr(strlen("bool"));
+ } else if (TypeStr.starts_with("char") || TypeStr.starts_with("uchar")) {
+ Ty = Type::getInt8Ty(Ctx);
+ TypeStr = TypeStr.starts_with("char") ? TypeStr.substr(strlen("char"))
+ : TypeStr.substr(strlen("uchar"));
+ } else if (TypeStr.starts_with("short") || TypeStr.starts_with("ushort")) {
+ Ty = Type::getInt16Ty(Ctx);
+ TypeStr = TypeStr.starts_with("short") ? TypeStr.substr(strlen("short"))
+ : TypeStr.substr(strlen("ushort"));
+ } else if (TypeStr.starts_with("int") || TypeStr.starts_with("uint")) {
+ Ty = Type::getInt32Ty(Ctx);
+ TypeStr = TypeStr.starts_with("int") ? TypeStr.substr(strlen("int"))
+ : TypeStr.substr(strlen("uint"));
+ } else if (TypeStr.starts_with("long") || TypeStr.starts_with("ulong")) {
+ Ty = Type::getInt64Ty(Ctx);
+ TypeStr = TypeStr.starts_with("long") ? TypeStr.substr(strlen("long"))
+ : TypeStr.substr(strlen("ulong"));
+ } else if (TypeStr.starts_with("half")) {
+ Ty = Type::getHalfTy(Ctx);
TypeStr = TypeStr.substr(strlen("half"));
- } else if (TypeStr.startswith("opencl.sampler_t")) {
- Type = StructType::create(Ctx, "opencl.sampler_t");
+ } else if (TypeStr.starts_with("float")) {
+ Ty = Type::getFloatTy(Ctx);
+ TypeStr = TypeStr.substr(strlen("float"));
+ } else if (TypeStr.starts_with("double")) {
+ Ty = Type::getDoubleTy(Ctx);
+ TypeStr = TypeStr.substr(strlen("double"));
} else
llvm_unreachable("Unable to recognize SPIRV type name.");
- if (TypeStr.startswith(" vector[")) {
+
+ auto SpirvTy = getOrCreateSPIRVType(Ty, MIRBuilder, AQ);
+
+ // Handle "type*" or "type* vector[N]".
+ if (TypeStr.starts_with("*")) {
+ SpirvTy = getOrCreateSPIRVPointerType(SpirvTy, MIRBuilder, SC);
+ TypeStr = TypeStr.substr(strlen("*"));
+ }
+
+ // Handle "typeN*" or "type vector[N]*".
+ bool IsPtrToVec = TypeStr.consume_back("*");
+
+ if (TypeStr.starts_with(" vector[")) {
TypeStr = TypeStr.substr(strlen(" vector["));
TypeStr = TypeStr.substr(0, TypeStr.find(']'));
}
TypeStr.getAsInteger(10, VecElts);
- auto SpirvTy = getOrCreateSPIRVType(Type, MIRBuilder);
if (VecElts > 0)
SpirvTy = getOrCreateSPIRVVectorType(SpirvTy, VecElts, MIRBuilder);
+
+ if (IsPtrToVec)
+ SpirvTy = getOrCreateSPIRVPointerType(SpirvTy, MIRBuilder, SC);
+
return SpirvTy;
}
@@ -977,7 +1034,6 @@ SPIRVType *SPIRVGlobalRegistry::finishCreatingSPIRVType(const Type *LLVMTy,
assert(CurMF == SpirvType->getMF());
VRegToTypeMap[CurMF][getSPIRVTypeID(SpirvType)] = SpirvType;
SPIRVToLLVMType[SpirvType] = LLVMTy;
- DT.add(LLVMTy, CurMF, getSPIRVTypeID(SpirvType));
return SpirvType;
}
@@ -992,6 +1048,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(
.addDef(createTypeVReg(CurMF->getRegInfo()))
.addImm(BitWidth)
.addImm(0);
+ DT.add(LLVMTy, CurMF, getSPIRVTypeID(MIB));
return finishCreatingSPIRVType(LLVMTy, MIB);
}
@@ -1012,6 +1069,7 @@ SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineInstr &I,
MachineBasicBlock &BB = *I.getParent();
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeBool))
.addDef(createTypeVReg(CurMF->getRegInfo()));
+ DT.add(LLVMTy, CurMF, getSPIRVTypeID(MIB));
return finishCreatingSPIRVType(LLVMTy, MIB);
}
@@ -1036,6 +1094,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType(
.addDef(createTypeVReg(CurMF->getRegInfo()))
.addUse(getSPIRVTypeID(BaseType))
.addImm(NumElements);
+ DT.add(LLVMTy, CurMF, getSPIRVTypeID(MIB));
return finishCreatingSPIRVType(LLVMTy, MIB);
}
@@ -1054,25 +1113,38 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVArrayType(
.addDef(createTypeVReg(CurMF->getRegInfo()))
.addUse(getSPIRVTypeID(BaseType))
.addUse(Len);
+ DT.add(LLVMTy, CurMF, getSPIRVTypeID(MIB));
return finishCreatingSPIRVType(LLVMTy, MIB);
}
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType(
SPIRVType *BaseType, MachineIRBuilder &MIRBuilder,
- SPIRV::StorageClass::StorageClass SClass) {
- return getOrCreateSPIRVType(
- PointerType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)),
- storageClassToAddressSpace(SClass)),
- MIRBuilder);
+ SPIRV::StorageClass::StorageClass SC) {
+ const Type *PointerElementType = getTypeForSPIRVType(BaseType);
+ unsigned AddressSpace = storageClassToAddressSpace(SC);
+ Type *LLVMTy =
+ PointerType::get(const_cast<Type *>(PointerElementType), AddressSpace);
+ Register Reg = DT.find(PointerElementType, AddressSpace, CurMF);
+ if (Reg.isValid())
+ return getSPIRVTypeForVReg(Reg);
+ auto MIB = BuildMI(MIRBuilder.getMBB(), MIRBuilder.getInsertPt(),
+ MIRBuilder.getDebugLoc(),
+ MIRBuilder.getTII().get(SPIRV::OpTypePointer))
+ .addDef(createTypeVReg(CurMF->getRegInfo()))
+ .addImm(static_cast<uint32_t>(SC))
+ .addUse(getSPIRVTypeID(BaseType));
+ DT.add(PointerElementType, AddressSpace, CurMF, getSPIRVTypeID(MIB));
+ return finishCreatingSPIRVType(LLVMTy, MIB);
}
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType(
SPIRVType *BaseType, MachineInstr &I, const SPIRVInstrInfo &TII,
SPIRV::StorageClass::StorageClass SC) {
+ const Type *PointerElementType = getTypeForSPIRVType(BaseType);
+ unsigned AddressSpace = storageClassToAddressSpace(SC);
Type *LLVMTy =
- PointerType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)),
- storageClassToAddressSpace(SC));
- Register Reg = DT.find(LLVMTy, CurMF);
+ PointerType::get(const_cast<Type *>(PointerElementType), AddressSpace);
+ Register Reg = DT.find(PointerElementType, AddressSpace, CurMF);
if (Reg.isValid())
return getSPIRVTypeForVReg(Reg);
MachineBasicBlock &BB = *I.getParent();
@@ -1080,6 +1152,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType(
.addDef(createTypeVReg(CurMF->getRegInfo()))
.addImm(static_cast<uint32_t>(SC))
.addUse(getSPIRVTypeID(BaseType));
+ DT.add(PointerElementType, AddressSpace, CurMF, getSPIRVTypeID(MIB));
return finishCreatingSPIRVType(LLVMTy, MIB);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index 88769f84b3e5..60967bfb68a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -138,8 +138,11 @@ public:
// Either generate a new OpTypeXXX instruction or return an existing one
// corresponding to the given string containing the name of the builtin type.
- SPIRVType *getOrCreateSPIRVTypeByName(StringRef TypeStr,
- MachineIRBuilder &MIRBuilder);
+ SPIRVType *getOrCreateSPIRVTypeByName(
+ StringRef TypeStr, MachineIRBuilder &MIRBuilder,
+ SPIRV::StorageClass::StorageClass SC = SPIRV::StorageClass::Function,
+ SPIRV::AccessQualifier::AccessQualifier AQ =
+ SPIRV::AccessQualifier::ReadWrite);
// Return the SPIR-V type instruction corresponding to the given VReg, or
// nullptr if no such type instruction exists.
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 44b5536becf7..da033ba32624 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -91,6 +91,10 @@ def OpNop: SimpleOp<"OpNop", 0>;
def OpUndef: Op<1, (outs ID:$res), (ins TYPE:$type), "$res = OpUndef $type">;
def OpSizeOf: Op<321, (outs ID:$res), (ins TYPE:$ty, ID:$ptr), "$res = OpSizeOf $ty $ptr">;
+// - SPV_KHR_expect_assume : Expect assume instructions
+def OpAssumeTrueKHR: Op<5630, (outs), (ins ID:$cond), "OpAssumeTrueKHR $cond">;
+def OpExpectKHR: Op<5631, (outs ID:$res), (ins TYPE:$ty, ID:$val, ID:$expected), "$res = OpExpectKHR $ty $val $expected">;
+
// 3.42.2 Debug Instructions
def OpSourceContinued: Op<2, (outs), (ins StringImm:$str, variable_ops),
@@ -217,9 +221,9 @@ def ConstPseudoNull: IntImmLeaf<i64, [{ return Imm.isZero(); }]>;
multiclass IntFPImm<bits<16> opCode, string name> {
def I: Op<opCode, (outs ID:$dst), (ins TYPE:$type, ID:$src, variable_ops),
- "$dst = "#name#" $type $src", [(set ID:$dst, (assigntype PseudoConstI:$src, TYPE:$type))]>;
+ "$dst = "#name#" $type", [(set ID:$dst, (assigntype PseudoConstI:$src, TYPE:$type))]>;
def F: Op<opCode, (outs ID:$dst), (ins TYPE:$type, fID:$src, variable_ops),
- "$dst = "#name#" $type $src", [(set ID:$dst, (assigntype PseudoConstF:$src, TYPE:$type))]>;
+ "$dst = "#name#" $type", [(set ID:$dst, (assigntype PseudoConstF:$src, TYPE:$type))]>;
}
def OpConstantTrue: Op<41, (outs ID:$dst), (ins TYPE:$src_ty), "$dst = OpConstantTrue $src_ty",
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 2fc7342458de..5ad47de4fc54 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -12,6 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "MCTargetDesc/SPIRVMCTargetDesc.h"
#include "SPIRV.h"
#include "SPIRVGlobalRegistry.h"
#include "SPIRVInstrInfo.h"
@@ -21,6 +23,7 @@
#include "SPIRVUtils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -172,6 +175,9 @@ private:
bool selectExtInst(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I, const ExtInstList &ExtInsts) const;
+ bool selectLog10(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
Register buildI32Constant(uint32_t Val, MachineInstr &I,
const SPIRVType *ResType = nullptr) const;
@@ -275,6 +281,7 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
return selectOpUndef(ResVReg, ResType, I);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return selectIntrinsic(ResVReg, ResType, I);
case TargetOpcode::G_BITREVERSE:
return selectBitreverse(ResVReg, ResType, I);
@@ -359,7 +366,7 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
case TargetOpcode::G_FLOG2:
return selectExtInst(ResVReg, ResType, I, CL::log2, GL::Log2);
case TargetOpcode::G_FLOG10:
- return selectExtInst(ResVReg, ResType, I, CL::log10);
+ return selectLog10(ResVReg, ResType, I);
case TargetOpcode::G_FABS:
return selectExtInst(ResVReg, ResType, I, CL::fabs, GL::FAbs);
@@ -590,15 +597,16 @@ static void addMemoryOperands(uint64_t Flags, MachineInstrBuilder &MIB) {
bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
- unsigned OpOffset =
- I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
+ unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
Register Ptr = I.getOperand(1 + OpOffset).getReg();
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.addUse(Ptr);
if (!I.getNumMemOperands()) {
- assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ||
+ I.getOpcode() ==
+ TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS);
addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
} else {
addMemoryOperands(*I.memoperands_begin(), MIB);
@@ -607,8 +615,7 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
}
bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
- unsigned OpOffset =
- I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
+ unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
Register StoreVal = I.getOperand(0 + OpOffset).getReg();
Register Ptr = I.getOperand(1 + OpOffset).getReg();
MachineBasicBlock &BB = *I.getParent();
@@ -616,7 +623,9 @@ bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
.addUse(Ptr)
.addUse(StoreVal);
if (!I.getNumMemOperands()) {
- assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ||
+ I.getOpcode() ==
+ TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS);
addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
} else {
addMemoryOperands(*I.memoperands_begin(), MIB);
@@ -718,7 +727,7 @@ bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg,
Register MemSemEqReg;
Register MemSemNeqReg;
Register Ptr = I.getOperand(2).getReg();
- if (I.getOpcode() != TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) {
+ if (!isa<GIntrinsic>(I)) {
assert(I.hasOneMemOperand());
const MachineMemOperand *MemOp = *I.memoperands_begin();
unsigned Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID()));
@@ -1294,18 +1303,28 @@ bool SPIRVInstructionSelector::selectExtractElt(Register ResVReg,
bool SPIRVInstructionSelector::selectGEP(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
- // In general we should also support OpAccessChain instrs here (i.e. not
- // PtrAccessChain) but SPIRV-LLVM Translator doesn't emit them at all and so
- // do we to stay compliant with its test and more importantly consumers.
- unsigned Opcode = I.getOperand(2).getImm() ? SPIRV::OpInBoundsPtrAccessChain
- : SPIRV::OpPtrAccessChain;
+ const bool IsGEPInBounds = I.getOperand(2).getImm();
+
+ // OpAccessChain could be used for OpenCL, but the SPIRV-LLVM Translator only
+ // relies on PtrAccessChain, so we'll try not to deviate. For Vulkan however,
+ // we have to use Op[InBounds]AccessChain.
+ const unsigned Opcode = STI.isVulkanEnv()
+ ? (IsGEPInBounds ? SPIRV::OpInBoundsAccessChain
+ : SPIRV::OpAccessChain)
+ : (IsGEPInBounds ? SPIRV::OpInBoundsPtrAccessChain
+ : SPIRV::OpPtrAccessChain);
+
auto Res = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
// Object to get a pointer to.
.addUse(I.getOperand(3).getReg());
// Adding indices.
- for (unsigned i = 4; i < I.getNumExplicitOperands(); ++i)
+ const unsigned StartingIndex =
+ (Opcode == SPIRV::OpAccessChain || Opcode == SPIRV::OpInBoundsAccessChain)
+ ? 5
+ : 4;
+ for (unsigned i = StartingIndex; i < I.getNumExplicitOperands(); ++i)
Res.addUse(I.getOperand(i).getReg());
return Res.constrainAllUses(TII, TRI, RBI);
}
@@ -1314,7 +1333,7 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
MachineBasicBlock &BB = *I.getParent();
- switch (I.getIntrinsicID()) {
+ switch (cast<GIntrinsic>(I).getIntrinsicID()) {
case Intrinsic::spv_load:
return selectLoad(ResVReg, ResType, I);
case Intrinsic::spv_store:
@@ -1391,6 +1410,19 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
break;
case Intrinsic::spv_alloca:
return selectFrameIndex(ResVReg, ResType, I);
+ case Intrinsic::spv_assume:
+ if (STI.canUseExtension(SPIRV::Extension::SPV_KHR_expect_assume))
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpAssumeTrueKHR))
+ .addUse(I.getOperand(1).getReg());
+ break;
+ case Intrinsic::spv_expect:
+ if (STI.canUseExtension(SPIRV::Extension::SPV_KHR_expect_assume))
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExpectKHR))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(I.getOperand(2).getReg())
+ .addUse(I.getOperand(3).getReg());
+ break;
default:
llvm_unreachable("Intrinsic selection not implemented");
}
@@ -1472,9 +1504,21 @@ bool SPIRVInstructionSelector::selectGlobalValue(
// FIXME: don't use MachineIRBuilder here, replace it with BuildMI.
MachineIRBuilder MIRBuilder(I);
const GlobalValue *GV = I.getOperand(1).getGlobal();
- SPIRVType *ResType = GR.getOrCreateSPIRVType(
- GV->getType(), MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false);
-
+ Type *GVType = GV->getValueType();
+ SPIRVType *PointerBaseType;
+ if (GVType->isArrayTy()) {
+ SPIRVType *ArrayElementType =
+ GR.getOrCreateSPIRVType(GVType->getArrayElementType(), MIRBuilder,
+ SPIRV::AccessQualifier::ReadWrite, false);
+ PointerBaseType = GR.getOrCreateSPIRVArrayType(
+ ArrayElementType, GVType->getArrayNumElements(), I, TII);
+ } else {
+ PointerBaseType = GR.getOrCreateSPIRVType(
+ GVType, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false);
+ }
+ SPIRVType *ResType = GR.getOrCreateSPIRVPointerType(
+ PointerBaseType, I, TII,
+ addressSpaceToStorageClass(GV->getAddressSpace()));
std::string GlobalIdent = GV->getGlobalIdentifier();
// We have functions as operands in tests with blocks of instruction e.g. in
// transcoding/global_block.ll. These operands are not used and should be
@@ -1485,8 +1529,6 @@ bool SPIRVInstructionSelector::selectGlobalValue(
MachineBasicBlock &BB = *I.getParent();
Register NewReg = GR.find(ConstVal, GR.CurMF);
if (!NewReg.isValid()) {
- SPIRVType *SpvBaseTy = GR.getOrCreateSPIRVIntegerType(8, I, TII);
- ResType = GR.getOrCreateSPIRVPointerType(SpvBaseTy, I, TII);
Register NewReg = ResVReg;
GR.add(ConstVal, GR.CurMF, NewReg);
return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
@@ -1526,6 +1568,57 @@ bool SPIRVInstructionSelector::selectGlobalValue(
return Reg.isValid();
}
+bool SPIRVInstructionSelector::selectLog10(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ if (STI.canUseExtInstSet(SPIRV::InstructionSet::OpenCL_std)) {
+ return selectExtInst(ResVReg, ResType, I, CL::log10);
+ }
+
+ // There is no log10 instruction in the GLSL Extended Instruction set, so it
+ // is implemented as:
+ // log10(x) = log2(x) * (1 / log2(10))
+ // = log2(x) * 0.30103
+
+ MachineIRBuilder MIRBuilder(I);
+ MachineBasicBlock &BB = *I.getParent();
+
+ // Build log2(x).
+ Register VarReg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ bool Result =
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+ .addDef(VarReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+ .addImm(GL::Log2)
+ .add(I.getOperand(1))
+ .constrainAllUses(TII, TRI, RBI);
+
+ // Build 0.30103.
+ assert(ResType->getOpcode() == SPIRV::OpTypeVector ||
+ ResType->getOpcode() == SPIRV::OpTypeFloat);
+ // TODO: Add matrix implementation once supported by the HLSL frontend.
+ const SPIRVType *SpirvScalarType =
+ ResType->getOpcode() == SPIRV::OpTypeVector
+ ? GR.getSPIRVTypeForVReg(ResType->getOperand(1).getReg())
+ : ResType;
+ Register ScaleReg =
+ GR.buildConstantFP(APFloat(0.30103f), MIRBuilder, SpirvScalarType);
+
+ // Multiply log2(x) by 0.30103 to get log10(x) result.
+ auto Opcode = ResType->getOpcode() == SPIRV::OpTypeVector
+ ? SPIRV::OpVectorTimesScalar
+ : SPIRV::OpFMulS;
+ Result &= BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(VarReg)
+ .addUse(ScaleReg)
+ .constrainAllUses(TII, TRI, RBI);
+
+ return Result;
+}
+
namespace llvm {
InstructionSelector *
createSPIRVInstructionSelector(const SPIRVTargetMachine &TM,
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index b0028f8c80a4..faaf7f0e2548 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -229,11 +229,16 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
// Control-flow. In some cases (e.g. constants) s1 may be promoted to s32.
getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s32});
+ // TODO: Review the target OpenCL and GLSL Extended Instruction Set specs to
+ // tighten these requirements. Many of these math functions are only legal on
+ // specific bitwidths, so they are not selectable for
+ // allFloatScalarsAndVectors.
getActionDefinitionsBuilder({G_FPOW,
G_FEXP,
G_FEXP2,
G_FLOG,
G_FLOG2,
+ G_FLOG10,
G_FABS,
G_FMINNUM,
G_FMAXNUM,
@@ -259,8 +264,6 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
allFloatScalarsAndVectors, allIntScalarsAndVectors);
if (ST.canUseExtInstSet(SPIRV::InstructionSet::OpenCL_std)) {
- getActionDefinitionsBuilder(G_FLOG10).legalFor(allFloatScalarsAndVectors);
-
getActionDefinitionsBuilder(
{G_CTTZ, G_CTTZ_ZERO_UNDEF, G_CTLZ, G_CTLZ_ZERO_UNDEF})
.legalForCartesianProduct(allIntScalarsAndVectors,
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 22746788607b..779036016560 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -15,6 +15,8 @@
//===----------------------------------------------------------------------===//
#include "SPIRVModuleAnalysis.h"
+#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "MCTargetDesc/SPIRVMCTargetDesc.h"
#include "SPIRV.h"
#include "SPIRVSubtarget.h"
#include "SPIRVTargetMachine.h"
@@ -106,11 +108,18 @@ void SPIRVModuleAnalysis::setBaseInfo(const Module &M) {
MAI.Mem =
static_cast<SPIRV::MemoryModel::MemoryModel>(getMetadataUInt(MemMD, 1));
} else {
- MAI.Mem = SPIRV::MemoryModel::OpenCL;
- unsigned PtrSize = ST->getPointerSize();
- MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32
- : PtrSize == 64 ? SPIRV::AddressingModel::Physical64
- : SPIRV::AddressingModel::Logical;
+ // TODO: Add support for VulkanMemoryModel.
+ MAI.Mem = ST->isOpenCLEnv() ? SPIRV::MemoryModel::OpenCL
+ : SPIRV::MemoryModel::GLSL450;
+ if (MAI.Mem == SPIRV::MemoryModel::OpenCL) {
+ unsigned PtrSize = ST->getPointerSize();
+ MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32
+ : PtrSize == 64 ? SPIRV::AddressingModel::Physical64
+ : SPIRV::AddressingModel::Logical;
+ } else {
+ // TODO: Add support for PhysicalStorageBufferAddress.
+ MAI.Addr = SPIRV::AddressingModel::Logical;
+ }
}
// Get the OpenCL version number from metadata.
// TODO: support other source languages.
@@ -148,9 +157,12 @@ void SPIRVModuleAnalysis::setBaseInfo(const Module &M) {
MAI.Reqs.getAndAddRequirements(SPIRV::OperandCategory::AddressingModelOperand,
MAI.Addr, *ST);
- // TODO: check if it's required by default.
- MAI.ExtInstSetMap[static_cast<unsigned>(SPIRV::InstructionSet::OpenCL_std)] =
- Register::index2VirtReg(MAI.getNextID());
+ if (ST->isOpenCLEnv()) {
+ // TODO: check if it's required by default.
+ MAI.ExtInstSetMap[static_cast<unsigned>(
+ SPIRV::InstructionSet::OpenCL_std)] =
+ Register::index2VirtReg(MAI.getNextID());
+ }
}
// Collect MI which defines the register in the given machine function.
@@ -494,7 +506,7 @@ void SPIRV::RequirementHandler::checkSatisfiable(
for (auto Ext : AllExtensions) {
if (ST.canUseExtension(Ext))
continue;
- LLVM_DEBUG(dbgs() << "Extension not suported: "
+ LLVM_DEBUG(dbgs() << "Extension not supported: "
<< getSymbolicOperandMnemonic(
OperandCategory::ExtensionOperand, Ext)
<< "\n");
@@ -513,12 +525,31 @@ void SPIRV::RequirementHandler::addAvailableCaps(const CapabilityList &ToAdd) {
SPIRV::OperandCategory::CapabilityOperand, Cap));
}
+void SPIRV::RequirementHandler::removeCapabilityIf(
+ const Capability::Capability ToRemove,
+ const Capability::Capability IfPresent) {
+ if (AllCaps.contains(IfPresent))
+ AllCaps.erase(ToRemove);
+}
+
namespace llvm {
namespace SPIRV {
void RequirementHandler::initAvailableCapabilities(const SPIRVSubtarget &ST) {
- // TODO: Implemented for other targets other then OpenCL.
- if (!ST.isOpenCLEnv())
+ if (ST.isOpenCLEnv()) {
+ initAvailableCapabilitiesForOpenCL(ST);
return;
+ }
+
+ if (ST.isVulkanEnv()) {
+ initAvailableCapabilitiesForVulkan(ST);
+ return;
+ }
+
+ report_fatal_error("Unimplemented environment for SPIR-V generation.");
+}
+
+void RequirementHandler::initAvailableCapabilitiesForOpenCL(
+ const SPIRVSubtarget &ST) {
// Add the min requirements for different OpenCL and SPIR-V versions.
addAvailableCaps({Capability::Addresses, Capability::Float16Buffer,
Capability::Int16, Capability::Int8, Capability::Kernel,
@@ -552,8 +583,24 @@ void RequirementHandler::initAvailableCapabilities(const SPIRVSubtarget &ST) {
// TODO: verify if this needs some checks.
addAvailableCaps({Capability::Float16, Capability::Float64});
+ // Add capabilities enabled by extensions.
+ for (auto Extension : ST.getAllAvailableExtensions()) {
+ CapabilityList EnabledCapabilities =
+ getCapabilitiesEnabledByExtension(Extension);
+ addAvailableCaps(EnabledCapabilities);
+ }
+
// TODO: add OpenCL extensions.
}
+
+void RequirementHandler::initAvailableCapabilitiesForVulkan(
+ const SPIRVSubtarget &ST) {
+ addAvailableCaps({Capability::Shader, Capability::Linkage});
+
+ // Provided by Vulkan version 1.0.
+ addAvailableCaps({Capability::Int16, Capability::Int64, Capability::Float64});
+}
+
} // namespace SPIRV
} // namespace llvm
@@ -696,6 +743,16 @@ void addInstrRequirements(const MachineInstr &MI,
break;
}
case SPIRV::OpBitReverse:
+ case SPIRV::OpBitFieldInsert:
+ case SPIRV::OpBitFieldSExtract:
+ case SPIRV::OpBitFieldUExtract:
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_KHR_bit_instructions)) {
+ Reqs.addCapability(SPIRV::Capability::Shader);
+ break;
+ }
+ Reqs.addExtension(SPIRV::Extension::SPV_KHR_bit_instructions);
+ Reqs.addCapability(SPIRV::Capability::BitInstructions);
+ break;
case SPIRV::OpTypeRuntimeArray:
Reqs.addCapability(SPIRV::Capability::Shader);
break;
@@ -846,9 +903,22 @@ void addInstrRequirements(const MachineInstr &MI,
case SPIRV::OpGroupNonUniformBallotFindMSB:
Reqs.addCapability(SPIRV::Capability::GroupNonUniformBallot);
break;
+ case SPIRV::OpAssumeTrueKHR:
+ case SPIRV::OpExpectKHR:
+ if (ST.canUseExtension(SPIRV::Extension::SPV_KHR_expect_assume)) {
+ Reqs.addExtension(SPIRV::Extension::SPV_KHR_expect_assume);
+ Reqs.addCapability(SPIRV::Capability::ExpectAssumeKHR);
+ }
+ break;
default:
break;
}
+
+ // If we require capability Shader, then we can remove the requirement for
+ // the BitInstructions capability, since Shader is a superset capability
+ // of BitInstructions.
+ Reqs.removeCapabilityIf(SPIRV::Capability::BitInstructions,
+ SPIRV::Capability::Shader);
}
static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI,
@@ -886,6 +956,11 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI,
MAI.Reqs.getAndAddRequirements(
SPIRV::OperandCategory::ExecutionModeOperand,
SPIRV::ExecutionMode::LocalSize, ST);
+ if (F.getFnAttribute("hlsl.numthreads").isValid()) {
+ MAI.Reqs.getAndAddRequirements(
+ SPIRV::OperandCategory::ExecutionModeOperand,
+ SPIRV::ExecutionMode::LocalSize, ST);
+ }
if (F.getMetadata("work_group_size_hint"))
MAI.Reqs.getAndAddRequirements(
SPIRV::OperandCategory::ExecutionModeOperand,
@@ -898,6 +973,13 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI,
MAI.Reqs.getAndAddRequirements(
SPIRV::OperandCategory::ExecutionModeOperand,
SPIRV::ExecutionMode::VecTypeHint, ST);
+
+ if (F.hasOptNone() &&
+ ST.canUseExtension(SPIRV::Extension::SPV_INTEL_optnone)) {
+ // Output OpCapability OptNoneINTEL.
+ MAI.Reqs.addExtension(SPIRV::Extension::SPV_INTEL_optnone);
+ MAI.Reqs.addCapability(SPIRV::Capability::OptNoneINTEL);
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index abb6797c5218..5124181b49e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -60,15 +60,24 @@ struct Requirements {
struct RequirementHandler {
private:
CapabilityList MinimalCaps;
+
+ // AllCaps and AvailableCaps are related but different. AllCaps is a subset of
+ // AvailableCaps. AvailableCaps is the complete set of capabilities that are
+ // available to the current target. AllCaps is the set of capabilities that
+ // are required by the current module.
SmallSet<Capability::Capability, 8> AllCaps;
+ DenseSet<unsigned> AvailableCaps;
+
SmallSet<Extension::Extension, 4> AllExtensions;
unsigned MinVersion; // 0 if no min version is defined.
unsigned MaxVersion; // 0 if no max version is defined.
- DenseSet<unsigned> AvailableCaps;
// Remove a list of capabilities from dedupedCaps and add them to AllCaps,
// recursing through their implicitly declared capabilities too.
void pruneCapabilities(const CapabilityList &ToPrune);
+ void initAvailableCapabilitiesForOpenCL(const SPIRVSubtarget &ST);
+ void initAvailableCapabilitiesForVulkan(const SPIRVSubtarget &ST);
+
public:
RequirementHandler() : MinVersion(0), MaxVersion(0) {}
void clear() {
@@ -110,6 +119,10 @@ public:
bool isCapabilityAvailable(Capability::Capability Cap) const {
return AvailableCaps.contains(Cap);
}
+
+ // Remove capability ToRemove, but only if IfPresent is present.
+ void removeCapabilityIf(const Capability::Capability ToRemove,
+ const Capability::Capability IfPresent);
};
using InstrList = SmallVector<MachineInstr *>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index c0c53170f462..f4076be2a7b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -242,7 +242,20 @@ static void generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
!ReachedBegin;) {
MachineInstr &MI = *MII;
- if (isSpvIntrinsic(MI, Intrinsic::spv_assign_type)) {
+ if (isSpvIntrinsic(MI, Intrinsic::spv_assign_ptr_type)) {
+ Register Reg = MI.getOperand(1).getReg();
+ MIB.setInsertPt(*MI.getParent(), MI.getIterator());
+ SPIRVType *BaseTy = GR->getOrCreateSPIRVType(
+ getMDOperandAsType(MI.getOperand(2).getMetadata(), 0), MIB);
+ SPIRVType *AssignedPtrType = GR->getOrCreateSPIRVPointerType(
+ BaseTy, MI, *MF.getSubtarget<SPIRVSubtarget>().getInstrInfo(),
+ addressSpaceToStorageClass(MI.getOperand(3).getImm()));
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ assert(Def && "Expecting an instruction that defines the register");
+ insertAssignInstr(Reg, nullptr, AssignedPtrType, GR, MIB,
+ MF.getRegInfo());
+ ToErase.push_back(&MI);
+ } else if (isSpvIntrinsic(MI, Intrinsic::spv_assign_type)) {
Register Reg = MI.getOperand(1).getReg();
Type *Ty = getMDOperandAsType(MI.getOperand(2).getMetadata(), 0);
MachineInstr *Def = MRI.getVRegDef(Reg);
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 554e66988f09..c376497469ce 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -19,11 +19,14 @@
//===----------------------------------------------------------------------===//
#include "SPIRV.h"
+#include "SPIRVSubtarget.h"
#include "SPIRVTargetMachine.h"
#include "SPIRVUtils.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsSPIRV.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
@@ -36,12 +39,13 @@ void initializeSPIRVPrepareFunctionsPass(PassRegistry &);
namespace {
class SPIRVPrepareFunctions : public ModulePass {
+ const SPIRVTargetMachine &TM;
bool substituteIntrinsicCalls(Function *F);
Function *removeAggregateTypesFromSignature(Function *F);
public:
static char ID;
- SPIRVPrepareFunctions() : ModulePass(ID) {
+ SPIRVPrepareFunctions(const SPIRVTargetMachine &TM) : ModulePass(ID), TM(TM) {
initializeSPIRVPrepareFunctionsPass(*PassRegistry::getPassRegistry());
}
@@ -233,6 +237,32 @@ static void buildUMulWithOverflowFunc(Function *UMulFunc) {
IRB.CreateRet(Res);
}
+static void lowerExpectAssume(IntrinsicInst *II) {
+ // If we cannot use the SPV_KHR_expect_assume extension, then we need to
+ // ignore the intrinsic and move on. It should be removed later on by LLVM.
+ // Otherwise we should lower the intrinsic to the corresponding SPIR-V
+ // instruction.
+ // For @llvm.assume we have OpAssumeTrueKHR.
+ // For @llvm.expect we have OpExpectKHR.
+ //
+ // We need to lower this into a builtin and then the builtin into a SPIR-V
+ // instruction.
+ if (II->getIntrinsicID() == Intrinsic::assume) {
+ Function *F = Intrinsic::getDeclaration(
+ II->getModule(), Intrinsic::SPVIntrinsics::spv_assume);
+ II->setCalledFunction(F);
+ } else if (II->getIntrinsicID() == Intrinsic::expect) {
+ Function *F = Intrinsic::getDeclaration(
+ II->getModule(), Intrinsic::SPVIntrinsics::spv_expect,
+ {II->getOperand(0)->getType()});
+ II->setCalledFunction(F);
+ } else {
+ llvm_unreachable("Unknown intrinsic");
+ }
+
+ return;
+}
+
static void lowerUMulWithOverflow(IntrinsicInst *UMulIntrinsic) {
// Get a separate function - otherwise, we'd have to rework the CFG of the
// current one. Then simply replace the intrinsic uses with a call to the new
@@ -270,6 +300,12 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
} else if (II->getIntrinsicID() == Intrinsic::umul_with_overflow) {
lowerUMulWithOverflow(II);
Changed = true;
+ } else if (II->getIntrinsicID() == Intrinsic::assume ||
+ II->getIntrinsicID() == Intrinsic::expect) {
+ const SPIRVSubtarget &STI = TM.getSubtarget<SPIRVSubtarget>(*F);
+ if (STI.canUseExtension(SPIRV::Extension::SPV_KHR_expect_assume))
+ lowerExpectAssume(II);
+ Changed = true;
}
}
}
@@ -362,6 +398,7 @@ bool SPIRVPrepareFunctions::runOnModule(Module &M) {
return Changed;
}
-ModulePass *llvm::createSPIRVPrepareFunctionsPass() {
- return new SPIRVPrepareFunctions();
+ModulePass *
+llvm::createSPIRVPrepareFunctionsPass(const SPIRVTargetMachine &TM) {
+ return new SPIRVPrepareFunctions(TM);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp
index 3a51e29dcf16..322e051a87db 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp
@@ -177,8 +177,8 @@ void SPIRVRegularizer::visitCallInst(CallInst &CI) {
StringRef DemangledName(NameStr);
// TODO: add support for other builtins.
- if (DemangledName.startswith("fmin") || DemangledName.startswith("fmax") ||
- DemangledName.startswith("min") || DemangledName.startswith("max"))
+ if (DemangledName.starts_with("fmin") || DemangledName.starts_with("fmax") ||
+ DemangledName.starts_with("min") || DemangledName.starts_with("max"))
visitCallScalToVec(&CI, MangledName, DemangledName);
free(NameStr);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
index 0f047b09c521..cf6dfb127cde 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
@@ -27,25 +27,42 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "SPIRVGenSubtargetInfo.inc"
+cl::list<SPIRV::Extension::Extension> Extensions(
+ "spirv-extensions", cl::desc("SPIR-V extensions"), cl::ZeroOrMore,
+ cl::Hidden,
+ cl::values(
+ clEnumValN(SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers,
+ "SPV_INTEL_arbitrary_precision_integers",
+ "Allows generating arbitrary width integer types"),
+ clEnumValN(SPIRV::Extension::SPV_INTEL_optnone, "SPV_INTEL_optnone",
+ "Adds OptNoneINTEL value for Function Control mask that "
+ "indicates a request to not optimize the function"),
+ clEnumValN(SPIRV::Extension::SPV_KHR_no_integer_wrap_decoration,
+ "SPV_KHR_no_integer_wrap_decoration",
+ "Adds decorations to indicate that a given instruction does "
+ "not cause integer wrapping"),
+ clEnumValN(SPIRV::Extension::SPV_KHR_expect_assume,
+ "SPV_KHR_expect_assume",
+ "Provides additional information to a compiler, similar to "
+ "the llvm.assume and llvm.expect intrinsics."),
+ clEnumValN(SPIRV::Extension::SPV_KHR_bit_instructions,
+ "SPV_KHR_bit_instructions",
+ "This enables bit instructions to be used by SPIR-V modules "
+ "without requiring the Shader capability")));
+
// Compare version numbers, but allow 0 to mean unspecified.
static bool isAtLeastVer(uint32_t Target, uint32_t VerToCompareTo) {
return Target == 0 || Target >= VerToCompareTo;
}
-static unsigned computePointerSize(const Triple &TT) {
- const auto Arch = TT.getArch();
- // TODO: unify this with pointers legalization.
- assert(TT.isSPIRV());
- return Arch == Triple::spirv32 ? 32 : 64;
-}
-
SPIRVSubtarget::SPIRVSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const SPIRVTargetMachine &TM)
: SPIRVGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS),
- PointerSize(computePointerSize(TT)), SPIRVVersion(0), OpenCLVersion(0),
- InstrInfo(), FrameLowering(initSubtargetDependencies(CPU, FS)),
- TLInfo(TM, *this) {
+ PointerSize(TM.getPointerSizeInBits(/* AS= */ 0)), SPIRVVersion(0),
+ OpenCLVersion(0), InstrInfo(),
+ FrameLowering(initSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+ TargetTriple(TT) {
// The order of initialization is important.
initAvailableExtensions();
initAvailableExtInstSets();
@@ -82,6 +99,8 @@ bool SPIRVSubtarget::isAtLeastSPIRVVer(uint32_t VerToCompareTo) const {
}
bool SPIRVSubtarget::isAtLeastOpenCLVer(uint32_t VerToCompareTo) const {
+ if (!isOpenCLEnv())
+ return false;
return isAtLeastVer(OpenCLVersion, VerToCompareTo);
}
@@ -90,14 +109,13 @@ bool SPIRVSubtarget::canDirectlyComparePointers() const {
return isAtLeastVer(SPIRVVersion, 14);
}
-// TODO: use command line args for this rather than defaults.
void SPIRVSubtarget::initAvailableExtensions() {
AvailableExtensions.clear();
if (!isOpenCLEnv())
return;
- // A default extension for testing.
- AvailableExtensions.insert(
- SPIRV::Extension::SPV_KHR_no_integer_wrap_decoration);
+
+ for (auto Extension : Extensions)
+ AvailableExtensions.insert(Extension);
}
// TODO: use command line args for this rather than just defaults.
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.h
index dd19a1d0a9bb..62524ebfc9bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.h
@@ -17,6 +17,7 @@
#include "SPIRVFrameLowering.h"
#include "SPIRVISelLowering.h"
#include "SPIRVInstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -24,6 +25,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#define GET_SUBTARGETINFO_HEADER
#include "SPIRVGenSubtargetInfo.inc"
@@ -45,6 +47,7 @@ private:
SPIRVInstrInfo InstrInfo;
SPIRVFrameLowering FrameLowering;
SPIRVTargetLowering TLInfo;
+ Triple TargetTriple;
// GlobalISel related APIs.
std::unique_ptr<CallLowering> CallLoweringInfo;
@@ -70,15 +73,23 @@ public:
unsigned getPointerSize() const { return PointerSize; }
bool canDirectlyComparePointers() const;
// TODO: this environment is not implemented in Triple, we need to decide
- // how to standartize its support. For now, let's assume that we always
- // operate with OpenCL.
- bool isOpenCLEnv() const { return true; }
+ // how to standardize its support. For now, let's assume SPIR-V with physical
+ // addressing is OpenCL, and Logical addressing is Vulkan.
+ bool isOpenCLEnv() const {
+ return TargetTriple.getArch() == Triple::spirv32 ||
+ TargetTriple.getArch() == Triple::spirv64;
+ }
+ bool isVulkanEnv() const { return TargetTriple.getArch() == Triple::spirv; }
uint32_t getSPIRVVersion() const { return SPIRVVersion; };
bool isAtLeastSPIRVVer(uint32_t VerToCompareTo) const;
bool isAtLeastOpenCLVer(uint32_t VerToCompareTo) const;
// TODO: implement command line args or other ways to determine this.
bool hasOpenCLFullProfile() const { return true; }
bool hasOpenCLImageSupport() const { return true; }
+ const SmallSet<SPIRV::Extension::Extension, 4> &
+ getAllAvailableExtensions() const {
+ return AvailableExtensions;
+ }
bool canUseExtension(SPIRV::Extension::Extension E) const;
bool canUseExtInstSet(SPIRV::InstructionSet::InstructionSet E) const;
@@ -106,6 +117,10 @@ public:
const SPIRVRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
+
+ static bool classof(const TargetSubtargetInfo *ST) {
+ return ST->getTargetTriple().isSPIRV();
+ }
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 1b36c10df15b..ac92ee4a0756 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -77,6 +77,12 @@ def ExtensionEntries : GenericTable {
let PrimaryKeyName = "lookupExtensionByCategoryAndValue";
}
+// Function to lookup symbolic operands enabled by a given extension.
+def lookupSymbolicOperandsEnabledByExtension : SearchIndex {
+ let Table = ExtensionEntries;
+ let Key = ["ReqExtension", "Category"];
+}
+
//===----------------------------------------------------------------------===//
// Lookup table for matching symbolic operands (category + 32-bit value) to
// SPIR-V capabilities. If an operand requires more than one capability, there
@@ -195,7 +201,7 @@ defm SPV_AMD_shader_ballot : ExtensionOperand<5>;
defm SPV_AMD_gpu_shader_half_float : ExtensionOperand<6>;
defm SPV_KHR_shader_draw_parameters : ExtensionOperand<7>;
defm SPV_KHR_subgroup_vote : ExtensionOperand<8>;
-defm SPV_KHR_16bit_storeage : ExtensionOperand<9>;
+defm SPV_KHR_16bit_storage : ExtensionOperand<9>;
defm SPV_KHR_device_group : ExtensionOperand<10>;
defm SPV_KHR_multiview : ExtensionOperand<11>;
defm SPV_NVX_multiview_per_view_attributes : ExtensionOperand<12>;
@@ -243,6 +249,52 @@ defm SPV_KHR_shader_clock : ExtensionOperand<54>;
defm SPV_INTEL_unstructured_loop_controls : ExtensionOperand<55>;
defm SPV_EXT_demote_to_helper_invocation : ExtensionOperand<56>;
defm SPV_INTEL_fpga_reg : ExtensionOperand<57>;
+defm SPV_INTEL_blocking_pipes : ExtensionOperand<58>;
+defm SPV_GOOGLE_user_type : ExtensionOperand<59>;
+defm SPV_KHR_physical_storage_buffer : ExtensionOperand<60>;
+defm SPV_INTEL_kernel_attributes : ExtensionOperand<61>;
+defm SPV_KHR_non_semantic_info : ExtensionOperand<62>;
+defm SPV_INTEL_io_pipes : ExtensionOperand<63>;
+defm SPV_KHR_ray_tracing : ExtensionOperand<64>;
+defm SPV_KHR_ray_query : ExtensionOperand<65>;
+defm SPV_INTEL_fpga_memory_accesses : ExtensionOperand<66>;
+defm SPV_INTEL_arbitrary_precision_integers : ExtensionOperand<67>;
+defm SPV_EXT_shader_atomic_float_add : ExtensionOperand<68>;
+defm SPV_KHR_terminate_invocation : ExtensionOperand<69>;
+defm SPV_KHR_fragment_shading_rate : ExtensionOperand<70>;
+defm SPV_EXT_shader_image_int64 : ExtensionOperand<71>;
+defm SPV_INTEL_fp_fast_math_mode : ExtensionOperand<72>;
+defm SPV_INTEL_fpga_cluster_attributes : ExtensionOperand<73>;
+defm SPV_INTEL_loop_fuse : ExtensionOperand<74>;
+defm SPV_EXT_shader_atomic_float_min_max : ExtensionOperand<75>;
+defm SPV_KHR_workgroup_memory_explicit_layout : ExtensionOperand<76>;
+defm SPV_KHR_linkonce_odr : ExtensionOperand<77>;
+defm SPV_KHR_expect_assume : ExtensionOperand<78>;
+defm SPV_INTEL_fpga_dsp_control : ExtensionOperand<79>;
+defm SPV_NV_bindless_texture : ExtensionOperand<80>;
+defm SPV_INTEL_fpga_invocation_pipelining_attributes : ExtensionOperand<81>;
+defm SPV_KHR_subgroup_uniform_control_flow : ExtensionOperand<82>;
+defm SPV_HUAWEI_subpass_shading : ExtensionOperand<83>;
+defm SPV_KHR_integer_dot_product : ExtensionOperand<84>;
+defm SPV_EXT_shader_atomic_float16_add : ExtensionOperand<85>;
+defm SPV_INTEL_runtime_aligned : ExtensionOperand<86>;
+defm SPV_KHR_bit_instructions : ExtensionOperand<87>;
+defm SPV_NV_ray_tracing_motion_blur : ExtensionOperand<88>;
+defm SPV_KHR_uniform_group_instructions : ExtensionOperand<89>;
+defm SPV_KHR_subgroup_rotate : ExtensionOperand<90>;
+defm SPV_INTEL_split_barrier : ExtensionOperand<91>;
+defm SPV_KHR_ray_cull_mask : ExtensionOperand<92>;
+defm SPV_KHR_fragment_shader_barycentric : ExtensionOperand<93>;
+defm SPV_EXT_relaxed_printf_string_address_space : ExtensionOperand<94>;
+defm SPV_EXT_ycbcr_attachments : ExtensionOperand<95>;
+defm SPV_EXT_mesh_shader : ExtensionOperand<96>;
+defm SPV_ARM_core_builtins : ExtensionOperand<97>;
+defm SPV_EXT_opacity_micromap : ExtensionOperand<98>;
+defm SPV_NV_shader_invocation_reorder : ExtensionOperand<99>;
+defm SPV_INTEL_usm_storage_classes : ExtensionOperand<100>;
+defm SPV_INTEL_fpga_latency_control : ExtensionOperand<101>;
+defm SPV_INTEL_fpga_argument_interfaces : ExtensionOperand<102>;
+defm SPV_INTEL_optnone : ExtensionOperand<103>;
//===----------------------------------------------------------------------===//
// Multiclass used to define Capabilities enum values and at the same time
@@ -335,10 +387,10 @@ defm GroupNonUniformQuad : CapabilityOperand<68, 0x10300, 0, [], [GroupNonUnifor
defm SubgroupBallotKHR : CapabilityOperand<4423, 0, 0, [SPV_KHR_shader_ballot], []>;
defm DrawParameters : CapabilityOperand<4427, 0x10300, 0, [SPV_KHR_shader_draw_parameters], [Shader]>;
defm SubgroupVoteKHR : CapabilityOperand<4431, 0, 0, [SPV_KHR_subgroup_vote], []>;
-defm StorageBuffer16BitAccess : CapabilityOperand<4433, 0x10300, 0, [SPV_KHR_16bit_storeage], []>;
-defm StorageUniform16 : CapabilityOperand<4434, 0x10300, 0, [SPV_KHR_16bit_storeage], [StorageBuffer16BitAccess]>;
-defm StoragePushConstant16 : CapabilityOperand<4435, 0x10300, 0, [SPV_KHR_16bit_storeage], []>;
-defm StorageInputOutput16 : CapabilityOperand<4436, 0x10300, 0, [SPV_KHR_16bit_storeage], []>;
+defm StorageBuffer16BitAccess : CapabilityOperand<4433, 0x10300, 0, [SPV_KHR_16bit_storage], []>;
+defm StorageUniform16 : CapabilityOperand<4434, 0x10300, 0, [SPV_KHR_16bit_storage], [StorageBuffer16BitAccess]>;
+defm StoragePushConstant16 : CapabilityOperand<4435, 0x10300, 0, [SPV_KHR_16bit_storage], []>;
+defm StorageInputOutput16 : CapabilityOperand<4436, 0x10300, 0, [SPV_KHR_16bit_storage], []>;
defm DeviceGroup : CapabilityOperand<4437, 0x10300, 0, [SPV_KHR_device_group], []>;
defm MultiView : CapabilityOperand<4439, 0x10300, 0, [SPV_KHR_multiview], [Shader]>;
defm VariablePointersStorageBuffer : CapabilityOperand<4441, 0x10300, 0, [SPV_KHR_variable_pointers], [Shader]>;
@@ -396,6 +448,10 @@ defm ComputeDerivativeGroupLinearNV : CapabilityOperand<5350, 0, 0, [], []>;
defm FragmentDensityEXT : CapabilityOperand<5291, 0, 0, [], [Shader]>;
defm PhysicalStorageBufferAddressesEXT : CapabilityOperand<5347, 0, 0, [], [Shader]>;
defm CooperativeMatrixNV : CapabilityOperand<5357, 0, 0, [], [Shader]>;
+defm ArbitraryPrecisionIntegersINTEL : CapabilityOperand<5844, 0, 0, [SPV_INTEL_arbitrary_precision_integers], [Int8, Int16]>;
+defm OptNoneINTEL : CapabilityOperand<6094, 0, 0, [SPV_INTEL_optnone], []>;
+defm BitInstructions : CapabilityOperand<6025, 0, 0, [SPV_KHR_bit_instructions], []>;
+defm ExpectAssumeKHR : CapabilityOperand<5629, 0, 0, [SPV_KHR_expect_assume], []>;
//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index 6721c60834bd..1503f263e42c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -37,6 +37,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTarget() {
// Register the target.
RegisterTargetMachine<SPIRVTargetMachine> X(getTheSPIRV32Target());
RegisterTargetMachine<SPIRVTargetMachine> Y(getTheSPIRV64Target());
+ RegisterTargetMachine<SPIRVTargetMachine> Z(getTheSPIRVLogicalTarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeGlobalISel(PR);
@@ -45,6 +46,11 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTarget() {
static std::string computeDataLayout(const Triple &TT) {
const auto Arch = TT.getArch();
+ // TODO: this probably needs to be revisited:
+ // Logical SPIR-V has no pointer size, so any fixed pointer size would be
+ // wrong. The choice to default to 32 or 64 is just motivated by another
+ // memory model used for graphics: PhysicalStorageBuffer64. But it shouldn't
+ // mean anything.
if (Arch == Triple::spirv32)
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024";
@@ -66,7 +72,7 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
@@ -84,7 +90,7 @@ namespace {
class SPIRVPassConfig : public TargetPassConfig {
public:
SPIRVPassConfig(SPIRVTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM), TM(TM) {}
SPIRVTargetMachine &getSPIRVTargetMachine() const {
return getTM<SPIRVTargetMachine>();
@@ -103,6 +109,9 @@ public:
void addOptimizedRegAlloc() override {}
void addPostRegAlloc() override;
+
+private:
+ const SPIRVTargetMachine &TM;
};
} // namespace
@@ -144,7 +153,7 @@ TargetPassConfig *SPIRVTargetMachine::createPassConfig(PassManagerBase &PM) {
void SPIRVPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
addPass(createSPIRVRegularizerPass());
- addPass(createSPIRVPrepareFunctionsPass());
+ addPass(createSPIRVPrepareFunctionsPass(TM));
}
void SPIRVPassConfig::addISelPrepare() {
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
index cb16d7a01f70..a1a9f2684615 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
@@ -26,7 +26,7 @@ public:
SPIRVTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
const SPIRVSubtarget *getSubtargetImpl() const { return &Subtarget; }
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index f4f3cdce1ac3..1c0e8d84e2fd 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -15,6 +15,7 @@
#include "SPIRV.h"
#include "SPIRVInstrInfo.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -209,15 +210,16 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) {
MachineInstr *getDefInstrMaybeConstant(Register &ConstReg,
const MachineRegisterInfo *MRI) {
MachineInstr *ConstInstr = MRI->getVRegDef(ConstReg);
- if (ConstInstr->getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
- ConstInstr->getIntrinsicID() == Intrinsic::spv_track_constant) {
- ConstReg = ConstInstr->getOperand(2).getReg();
- ConstInstr = MRI->getVRegDef(ConstReg);
+ if (auto *GI = dyn_cast<GIntrinsic>(ConstInstr)) {
+ if (GI->is(Intrinsic::spv_track_constant)) {
+ ConstReg = ConstInstr->getOperand(2).getReg();
+ return MRI->getVRegDef(ConstReg);
+ }
} else if (ConstInstr->getOpcode() == SPIRV::ASSIGN_TYPE) {
ConstReg = ConstInstr->getOperand(1).getReg();
- ConstInstr = MRI->getVRegDef(ConstReg);
+ return MRI->getVRegDef(ConstReg);
}
- return ConstInstr;
+ return MRI->getVRegDef(ConstReg);
}
uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) {
@@ -227,8 +229,9 @@ uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) {
}
bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) {
- return MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
- MI.getIntrinsicID() == IntrinsicID;
+ if (auto *GI = dyn_cast<GIntrinsic>(&MI))
+ return GI->is(IntrinsicID);
+ return false;
}
Type *getMDOperandAsType(const MDNode *N, unsigned I) {
@@ -276,7 +279,7 @@ static bool isKernelQueryBI(const StringRef MangledName) {
}
static bool isNonMangledOCLBuiltin(StringRef Name) {
- if (!Name.startswith("__"))
+ if (!Name.starts_with("__"))
return false;
return isEnqueueKernelBI(Name) || isKernelQueryBI(Name) ||
@@ -286,8 +289,8 @@ static bool isNonMangledOCLBuiltin(StringRef Name) {
std::string getOclOrSpirvBuiltinDemangledName(StringRef Name) {
bool IsNonMangledOCL = isNonMangledOCLBuiltin(Name);
- bool IsNonMangledSPIRV = Name.startswith("__spirv_");
- bool IsMangled = Name.startswith("_Z");
+ bool IsNonMangledSPIRV = Name.starts_with("__spirv_");
+ bool IsMangled = Name.starts_with("_Z");
if (!IsNonMangledOCL && !IsNonMangledSPIRV && !IsMangled)
return std::string();
@@ -308,7 +311,7 @@ std::string getOclOrSpirvBuiltinDemangledName(StringRef Name) {
// Similar to ::std:: in C++.
size_t Start, Len = 0;
size_t DemangledNameLenStart = 2;
- if (Name.startswith("_ZN")) {
+ if (Name.starts_with("_ZN")) {
// Skip CV and ref qualifiers.
size_t NameSpaceStart = Name.find_first_not_of("rVKRO", 3);
// All built-ins are in the ::cl:: namespace.
@@ -323,13 +326,12 @@ std::string getOclOrSpirvBuiltinDemangledName(StringRef Name) {
}
const Type *getTypedPtrEltType(const Type *Ty) {
- auto PType = dyn_cast<PointerType>(Ty);
- if (!PType || PType->isOpaque())
- return Ty;
- return PType->getNonOpaquePointerElementType();
+ // TODO: This function requires updating following the opaque pointer
+ // migration.
+ return Ty;
}
-static bool hasBuiltinTypePrefix(StringRef Name) {
+bool hasBuiltinTypePrefix(StringRef Name) {
if (Name.starts_with("opencl.") || Name.starts_with("spirv."))
return true;
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.h
index 7c193611a857..30fae6c7de47 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -92,6 +92,9 @@ std::string getOclOrSpirvBuiltinDemangledName(StringRef Name);
// element type, otherwise return Type.
const Type *getTypedPtrEltType(const Type *Type);
+// Check if a string contains a builtin prefix.
+bool hasBuiltinTypePrefix(StringRef Name);
+
// Check if given LLVM type is a special opaque builtin type.
bool isSpecialOpaqueType(const Type *Ty);
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp
index fb7cab4fe779..febefc024920 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp
@@ -19,10 +19,16 @@ Target &llvm::getTheSPIRV64Target() {
static Target TheSPIRV64Target;
return TheSPIRV64Target;
}
+Target &llvm::getTheSPIRVLogicalTarget() {
+ static Target TheSPIRVLogicalTarget;
+ return TheSPIRVLogicalTarget;
+}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTargetInfo() {
RegisterTarget<Triple::spirv32> X(getTheSPIRV32Target(), "spirv32",
"SPIR-V 32-bit", "SPIRV");
RegisterTarget<Triple::spirv64> Y(getTheSPIRV64Target(), "spirv64",
"SPIR-V 64-bit", "SPIRV");
+ RegisterTarget<Triple::spirv> Z(getTheSPIRVLogicalTarget(), "spirv",
+ "SPIR-V Logical", "SPIRV");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h
index 4353258e1d1a..9131b2598a7d 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h
@@ -15,6 +15,7 @@ class Target;
Target &getTheSPIRV32Target();
Target &getTheSPIRV64Target();
+Target &getTheSPIRVLogicalTarget();
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 9bfee26db806..54a7d9214456 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -12,9 +12,11 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmMacro.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
@@ -28,6 +30,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"
@@ -70,10 +73,9 @@ class SparcAsmParser : public MCTargetAsmParser {
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
@@ -82,25 +84,25 @@ class SparcAsmParser : public MCTargetAsmParser {
unsigned Kind) override;
// Custom parse functions for Sparc specific operands.
- OperandMatchResultTy parseMEMOperand(OperandVector &Operands);
+ ParseStatus parseMEMOperand(OperandVector &Operands);
- OperandMatchResultTy parseMembarTag(OperandVector &Operands);
+ ParseStatus parseMembarTag(OperandVector &Operands);
+
+ ParseStatus parseASITag(OperandVector &Operands);
template <TailRelocKind Kind>
- OperandMatchResultTy parseTailRelocSym(OperandVector &Operands);
+ ParseStatus parseTailRelocSym(OperandVector &Operands);
- template <unsigned N>
- OperandMatchResultTy parseShiftAmtImm(OperandVector &Operands);
+ template <unsigned N> ParseStatus parseShiftAmtImm(OperandVector &Operands);
- OperandMatchResultTy parseCallTarget(OperandVector &Operands);
+ ParseStatus parseCallTarget(OperandVector &Operands);
- OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
+ ParseStatus parseOperand(OperandVector &Operands, StringRef Name);
- OperandMatchResultTy
- parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Operand,
- bool isCall = false);
+ ParseStatus parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Operand,
+ bool isCall = false);
- OperandMatchResultTy parseBranchModifiers(OperandVector &Operands);
+ ParseStatus parseBranchModifiers(OperandVector &Operands);
// Helper function for dealing with %lo / %hi in PIC mode.
const SparcMCExpr *adjustPICRelocation(SparcMCExpr::VariantKind VK,
@@ -119,6 +121,9 @@ class SparcAsmParser : public MCTargetAsmParser {
bool expandSET(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ bool expandSETX(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
public:
@@ -233,7 +238,8 @@ private:
k_Register,
k_Immediate,
k_MemoryReg,
- k_MemoryImm
+ k_MemoryImm,
+ k_ASITag
} Kind;
SMLoc StartLoc, EndLoc;
@@ -263,6 +269,7 @@ private:
struct RegOp Reg;
struct ImmOp Imm;
struct MemOp Mem;
+ unsigned ASI;
};
public:
@@ -275,6 +282,7 @@ public:
bool isMEMrr() const { return Kind == k_MemoryReg; }
bool isMEMri() const { return Kind == k_MemoryImm; }
bool isMembarTag() const { return Kind == k_Immediate; }
+ bool isASITag() const { return Kind == k_ASITag; }
bool isTailRelocSym() const { return Kind == k_Immediate; }
bool isCallTarget() const {
@@ -354,6 +362,11 @@ public:
return Mem.Off;
}
+ unsigned getASITag() const {
+ assert((Kind == k_ASITag) && "Invalid access!");
+ return ASI;
+ }
+
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const override {
return StartLoc;
@@ -374,6 +387,9 @@ public:
OS << "Mem: " << getMemBase()
<< "+" << *getMemOff()
<< "\n"; break;
+ case k_ASITag:
+ OS << "ASI tag: " << getASITag() << "\n";
+ break;
}
}
@@ -425,6 +441,11 @@ public:
addExpr(Inst, Expr);
}
+ void addASITagOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getASITag()));
+ }
+
void addMembarTagOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCExpr *Expr = getImm();
@@ -469,6 +490,15 @@ public:
return Op;
}
+ static std::unique_ptr<SparcOperand> CreateASITag(unsigned Val, SMLoc S,
+ SMLoc E) {
+ auto Op = std::make_unique<SparcOperand>(k_ASITag);
+ Op->ASI = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static bool MorphToIntPairReg(SparcOperand &Op) {
unsigned Reg = Op.getReg();
assert(Op.Reg.Kind == rk_IntReg);
@@ -643,6 +673,78 @@ bool SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
return false;
}
+bool SparcAsmParser::expandSETX(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCOperand MCRegOp = Inst.getOperand(0);
+ MCOperand MCValOp = Inst.getOperand(1);
+ MCOperand MCTmpOp = Inst.getOperand(2);
+ assert(MCRegOp.isReg() && MCTmpOp.isReg());
+ assert(MCValOp.isImm() || MCValOp.isExpr());
+
+ // the imm operand can be either an expression or an immediate.
+ bool IsImm = MCValOp.isImm();
+ int64_t ImmValue = IsImm ? MCValOp.getImm() : 0;
+
+ const MCExpr *ValExpr = IsImm ? MCConstantExpr::create(ImmValue, getContext())
+ : MCValOp.getExpr();
+
+ // Very small immediates can be expressed directly as a single `or`.
+ if (IsImm && isInt<13>(ImmValue)) {
+ // or rd, val, rd
+ Instructions.push_back(MCInstBuilder(SP::ORri)
+ .addReg(MCRegOp.getReg())
+ .addReg(Sparc::G0)
+ .addExpr(ValExpr));
+ return false;
+ }
+
+ // Otherwise, first we set the lower half of the register.
+
+ // sethi %hi(val), rd
+ Instructions.push_back(
+ MCInstBuilder(SP::SETHIi)
+ .addReg(MCRegOp.getReg())
+ .addExpr(adjustPICRelocation(SparcMCExpr::VK_Sparc_HI, ValExpr)));
+ // or rd, %lo(val), rd
+ Instructions.push_back(
+ MCInstBuilder(SP::ORri)
+ .addReg(MCRegOp.getReg())
+ .addReg(MCRegOp.getReg())
+ .addExpr(adjustPICRelocation(SparcMCExpr::VK_Sparc_LO, ValExpr)));
+
+ // Small positive immediates can be expressed as a single `sethi`+`or`
+ // combination, so we can just return here.
+ if (IsImm && isUInt<32>(ImmValue))
+ return false;
+
+ // For bigger immediates, we need to generate the upper half, then shift and
+ // merge it with the lower half that has just been generated above.
+
+ // sethi %hh(val), tmp
+ Instructions.push_back(
+ MCInstBuilder(SP::SETHIi)
+ .addReg(MCTmpOp.getReg())
+ .addExpr(adjustPICRelocation(SparcMCExpr::VK_Sparc_HH, ValExpr)));
+ // or tmp, %hm(val), tmp
+ Instructions.push_back(
+ MCInstBuilder(SP::ORri)
+ .addReg(MCTmpOp.getReg())
+ .addReg(MCTmpOp.getReg())
+ .addExpr(adjustPICRelocation(SparcMCExpr::VK_Sparc_HM, ValExpr)));
+ // sllx tmp, 32, tmp
+ Instructions.push_back(MCInstBuilder(SP::SLLXri)
+ .addReg(MCTmpOp.getReg())
+ .addReg(MCTmpOp.getReg())
+ .addImm(32));
+ // or tmp, rd, rd
+ Instructions.push_back(MCInstBuilder(SP::ORrr)
+ .addReg(MCRegOp.getReg())
+ .addReg(MCTmpOp.getReg())
+ .addReg(MCRegOp.getReg()));
+
+ return false;
+}
+
bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -663,6 +765,10 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (expandSET(Inst, IDLoc, Instructions))
return true;
break;
+ case SP::SETX:
+ if (expandSETX(Inst, IDLoc, Instructions))
+ return true;
+ break;
}
for (const MCInst &I : Instructions) {
@@ -694,31 +800,30 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Implement any new match types added!");
}
-bool SparcAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool SparcAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
return Error(StartLoc, "invalid register name");
return false;
}
-OperandMatchResultTy SparcAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus SparcAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- RegNo = 0;
+ Reg = Sparc::NoRegister;
if (getLexer().getKind() != AsmToken::Percent)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex();
unsigned regKind = SparcOperand::rk_None;
- if (matchRegisterName(Tok, RegNo, regKind)) {
+ if (matchRegisterName(Tok, Reg, regKind)) {
Parser.Lex();
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
getLexer().UnLex(Tok);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
static void applyMnemonicAliases(StringRef &Mnemonic,
@@ -738,12 +843,12 @@ bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
if (getLexer().is(AsmToken::Comma)) {
- if (parseBranchModifiers(Operands) != MatchOperand_Success) {
+ if (!parseBranchModifiers(Operands).isSuccess()) {
SMLoc Loc = getLexer().getLoc();
return Error(Loc, "unexpected token");
}
}
- if (parseOperand(Operands, Name) != MatchOperand_Success) {
+ if (!parseOperand(Operands, Name).isSuccess()) {
SMLoc Loc = getLexer().getLoc();
return Error(Loc, "unexpected token");
}
@@ -755,7 +860,7 @@ bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
}
Parser.Lex(); // Eat the comma or plus.
// Parse and remember the operand.
- if (parseOperand(Operands, Name) != MatchOperand_Success) {
+ if (!parseOperand(Operands, Name).isSuccess()) {
SMLoc Loc = getLexer().getLoc();
return Error(Loc, "unexpected token");
}
@@ -788,24 +893,21 @@ ParseStatus SparcAsmParser::parseDirective(AsmToken DirectiveID) {
return ParseStatus::NoMatch;
}
-OperandMatchResultTy
-SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
+ParseStatus SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
SMLoc S, E;
std::unique_ptr<SparcOperand> LHS;
- if (parseSparcAsmOperand(LHS) != MatchOperand_Success)
- return MatchOperand_NoMatch;
+ if (!parseSparcAsmOperand(LHS).isSuccess())
+ return ParseStatus::NoMatch;
// Single immediate operand
if (LHS->isImm()) {
Operands.push_back(SparcOperand::MorphToMEMri(Sparc::G0, std::move(LHS)));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- if (!LHS->isIntReg()) {
- Error(LHS->getStartLoc(), "invalid register kind for this operand");
- return MatchOperand_ParseFail;
- }
+ if (!LHS->isIntReg())
+ return Error(LHS->getStartLoc(), "invalid register kind for this operand");
AsmToken Tok = getLexer().getTok();
// The plus token may be followed by a register or an immediate value, the
@@ -814,57 +916,51 @@ SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
(void)Parser.parseOptionalToken(AsmToken::Plus);
std::unique_ptr<SparcOperand> RHS;
- if (parseSparcAsmOperand(RHS) != MatchOperand_Success)
- return MatchOperand_NoMatch;
+ if (!parseSparcAsmOperand(RHS).isSuccess())
+ return ParseStatus::NoMatch;
- if (RHS->isReg() && !RHS->isIntReg()) {
- Error(RHS->getStartLoc(), "invalid register kind for this operand");
- return MatchOperand_ParseFail;
- }
+ if (RHS->isReg() && !RHS->isIntReg())
+ return Error(RHS->getStartLoc(),
+ "invalid register kind for this operand");
Operands.push_back(
RHS->isImm()
? SparcOperand::MorphToMEMri(LHS->getReg(), std::move(RHS))
: SparcOperand::MorphToMEMrr(LHS->getReg(), std::move(RHS)));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Operands.push_back(SparcOperand::CreateMEMr(LHS->getReg(), S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
template <unsigned N>
-OperandMatchResultTy SparcAsmParser::parseShiftAmtImm(OperandVector &Operands) {
+ParseStatus SparcAsmParser::parseShiftAmtImm(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
// This is a register, not an immediate
if (getLexer().getKind() == AsmToken::Percent)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
const MCExpr *Expr;
if (getParser().parseExpression(Expr))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
- if (!CE) {
- Error(S, "constant expression expected");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(S, "constant expression expected");
- if (!isUInt<N>(CE->getValue())) {
- Error(S, "immediate shift value out of range");
- return MatchOperand_ParseFail;
- }
+ if (!isUInt<N>(CE->getValue()))
+ return Error(S, "immediate shift value out of range");
Operands.push_back(SparcOperand::CreateImm(Expr, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
template <SparcAsmParser::TailRelocKind Kind>
-OperandMatchResultTy
-SparcAsmParser::parseTailRelocSym(OperandVector &Operands) {
+ParseStatus SparcAsmParser::parseTailRelocSym(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
@@ -910,61 +1006,50 @@ SparcAsmParser::parseTailRelocSym(OperandVector &Operands) {
llvm_unreachable("Unhandled SparcAsmParser::TailRelocKind enum");
};
- if (getLexer().getKind() != AsmToken::Percent) {
- Error(getLoc(), "expected '%' for operand modifier");
- return MatchOperand_ParseFail;
- }
+ if (getLexer().getKind() != AsmToken::Percent)
+ return Error(getLoc(), "expected '%' for operand modifier");
const AsmToken Tok = Parser.getTok();
getParser().Lex(); // Eat '%'
- if (getLexer().getKind() != AsmToken::Identifier) {
- Error(getLoc(), "expected valid identifier for operand modifier");
- return MatchOperand_ParseFail;
- }
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return Error(getLoc(), "expected valid identifier for operand modifier");
StringRef Name = getParser().getTok().getIdentifier();
SparcMCExpr::VariantKind VK = SparcMCExpr::parseVariantKind(Name);
- if (VK == SparcMCExpr::VK_Sparc_None) {
- Error(getLoc(), "invalid operand modifier");
- return MatchOperand_ParseFail;
- }
+ if (VK == SparcMCExpr::VK_Sparc_None)
+ return Error(getLoc(), "invalid operand modifier");
if (!MatchesKind(VK)) {
// Did not match the specified set of relocation types, put '%' back.
getLexer().UnLex(Tok);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
Parser.Lex(); // Eat the identifier.
- if (getLexer().getKind() != AsmToken::LParen) {
- Error(getLoc(), "expected '('");
- return MatchOperand_ParseFail;
- }
+ if (getLexer().getKind() != AsmToken::LParen)
+ return Error(getLoc(), "expected '('");
getParser().Lex(); // Eat '('
const MCExpr *SubExpr;
- if (getParser().parseParenExpression(SubExpr, E)) {
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseParenExpression(SubExpr, E))
+ return ParseStatus::Failure;
const MCExpr *Val = adjustPICRelocation(VK, SubExpr);
Operands.push_back(SparcOperand::CreateImm(Val, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy SparcAsmParser::parseMembarTag(OperandVector &Operands) {
+ParseStatus SparcAsmParser::parseMembarTag(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const MCExpr *EVal;
int64_t ImmVal = 0;
std::unique_ptr<SparcOperand> Mask;
- if (parseSparcAsmOperand(Mask) == MatchOperand_Success) {
+ if (parseSparcAsmOperand(Mask).isSuccess()) {
if (!Mask->isImm() || !Mask->getImm()->evaluateAsAbsolute(ImmVal) ||
- ImmVal < 0 || ImmVal > 127) {
- Error(S, "invalid membar mask number");
- return MatchOperand_ParseFail;
- }
+ ImmVal < 0 || ImmVal > 127)
+ return Error(S, "invalid membar mask number");
}
while (getLexer().getKind() == AsmToken::Hash) {
@@ -982,10 +1067,8 @@ OperandMatchResultTy SparcAsmParser::parseMembarTag(OperandVector &Operands) {
Parser.Lex(); // Eat the identifier token.
- if (!MaskVal) {
- Error(TagStart, "unknown membar tag");
- return MatchOperand_ParseFail;
- }
+ if (!MaskVal)
+ return Error(TagStart, "unknown membar tag");
ImmVal |= MaskVal;
@@ -996,16 +1079,51 @@ OperandMatchResultTy SparcAsmParser::parseMembarTag(OperandVector &Operands) {
EVal = MCConstantExpr::create(ImmVal, getContext());
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(SparcOperand::CreateImm(EVal, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
+}
+
+ParseStatus SparcAsmParser::parseASITag(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = Parser.getTok().getEndLoc();
+ int64_t ASIVal = 0;
+
+ if (is64Bit() && (getLexer().getKind() == AsmToken::Hash)) {
+ // For now we only support named tags for 64-bit/V9 systems.
+ // TODO: add support for 32-bit/V8 systems.
+ SMLoc TagStart = getLexer().peekTok(false).getLoc();
+ Parser.Lex(); // Eat the '#'.
+ auto ASIName = Parser.getTok().getString();
+ auto ASITag = SparcASITag::lookupASITagByName(ASIName);
+ if (!ASITag)
+ ASITag = SparcASITag::lookupASITagByAltName(ASIName);
+ Parser.Lex(); // Eat the identifier token.
+
+ if (!ASITag)
+ return Error(TagStart, "unknown ASI tag");
+
+ ASIVal = ASITag->Encoding;
+ } else if (!getParser().parseAbsoluteExpression(ASIVal)) {
+ if (!isUInt<8>(ASIVal))
+ return Error(S, "invalid ASI number, must be between 0 and 255");
+ } else {
+ return Error(
+ S, is64Bit()
+ ? "malformed ASI tag, must be %asi, a constant integer "
+ "expression, or a named tag"
+ : "malformed ASI tag, must be a constant integer expression");
+ }
+
+ Operands.push_back(SparcOperand::CreateASITag(ASIVal, S, E));
+ return ParseStatus::Success;
}
-OperandMatchResultTy SparcAsmParser::parseCallTarget(OperandVector &Operands) {
+ParseStatus SparcAsmParser::parseCallTarget(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::LParen:
case AsmToken::Integer:
case AsmToken::Identifier:
@@ -1015,7 +1133,7 @@ OperandMatchResultTy SparcAsmParser::parseCallTarget(OperandVector &Operands) {
const MCExpr *DestValue;
if (getParser().parseExpression(DestValue))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
bool IsPic = getContext().getObjectFileInfo()->isPositionIndependent();
SparcMCExpr::VariantKind Kind =
@@ -1023,19 +1141,19 @@ OperandMatchResultTy SparcAsmParser::parseCallTarget(OperandVector &Operands) {
const MCExpr *DestExpr = SparcMCExpr::create(Kind, DestValue, getContext());
Operands.push_back(SparcOperand::CreateImm(DestExpr, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+ParseStatus SparcAsmParser::parseOperand(OperandVector &Operands,
+ StringRef Mnemonic) {
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ ParseStatus Res = MatchOperandParserImpl(Operands, Mnemonic);
// If there wasn't a custom match, try the generic matcher below. Otherwise,
// there was a match, but an error occurred, in which case, just return that
// the operand parsing failed.
- if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
- return ResTy;
+ if (Res.isSuccess() || Res.isFailure())
+ return Res;
if (getLexer().is(AsmToken::LBrac)) {
// Memory operand
@@ -1043,59 +1161,94 @@ SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
Parser.getTok().getLoc()));
Parser.Lex(); // Eat the [
- if (Mnemonic == "cas" || Mnemonic == "casx" || Mnemonic == "casa") {
+ if (Mnemonic == "cas" || Mnemonic == "casl" || Mnemonic == "casa" ||
+ Mnemonic == "casx" || Mnemonic == "casxl" || Mnemonic == "casxa") {
SMLoc S = Parser.getTok().getLoc();
if (getLexer().getKind() != AsmToken::Percent)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // eat %
MCRegister RegNo;
unsigned RegKind;
if (!matchRegisterName(Parser.getTok(), RegNo, RegKind))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat the identifier token.
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer()-1);
Operands.push_back(SparcOperand::CreateReg(RegNo, RegKind, S, E));
- ResTy = MatchOperand_Success;
+ Res = ParseStatus::Success;
} else {
- ResTy = parseMEMOperand(Operands);
+ Res = parseMEMOperand(Operands);
}
- if (ResTy != MatchOperand_Success)
- return ResTy;
+ if (!Res.isSuccess())
+ return Res;
if (!getLexer().is(AsmToken::RBrac))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Operands.push_back(SparcOperand::CreateToken("]",
Parser.getTok().getLoc()));
Parser.Lex(); // Eat the ]
// Parse an optional address-space identifier after the address.
- if (getLexer().is(AsmToken::Integer)) {
- std::unique_ptr<SparcOperand> Op;
- ResTy = parseSparcAsmOperand(Op, false);
- if (ResTy != MatchOperand_Success || !Op)
- return MatchOperand_ParseFail;
- Operands.push_back(std::move(Op));
+ // This will be either an immediate constant expression, or, on 64-bit
+ // processors, the %asi register.
+ if (is64Bit() && getLexer().is(AsmToken::Percent)) {
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the %.
+ const AsmToken Tok = Parser.getTok();
+ if (Tok.is(AsmToken::Identifier) && Tok.getString() == "asi") {
+ // Here we patch the MEM operand from [base + %g0] into [base + 0]
+ // as memory operations with ASI tag stored in %asi register needs
+ // to use immediate offset. We need to do this because Reg addressing
+ // will be parsed as Reg+G0 initially.
+ // This allows forms such as `ldxa [%o0] %asi, %o0` to parse correctly.
+ SparcOperand &OldMemOp = (SparcOperand &)*Operands[Operands.size() - 2];
+ if (OldMemOp.isMEMrr()) {
+ if (OldMemOp.getMemOffsetReg() != Sparc::G0) {
+ return Error(S, "invalid operand for instruction");
+ }
+ Operands[Operands.size() - 2] = SparcOperand::MorphToMEMri(
+ OldMemOp.getMemBase(),
+ SparcOperand::CreateImm(MCConstantExpr::create(0, getContext()),
+ OldMemOp.getStartLoc(),
+ OldMemOp.getEndLoc()));
+ }
+ Parser.Lex(); // Eat the identifier.
+ // In this context, we convert the register operand into
+ // a plain "%asi" token since the register access is already
+ // implicit in the instruction definition and encoding.
+ // See LoadASI/StoreASI in SparcInstrInfo.td.
+ Operands.push_back(SparcOperand::CreateToken("%asi", S));
+ return ParseStatus::Success;
+ }
+
+ return Error(S, "malformed ASI tag, must be %asi, a constant integer "
+ "expression, or a named tag");
}
- return MatchOperand_Success;
+
+ // If we're not at the end of statement and the next token is not a comma,
+ // then it is an immediate ASI value.
+ if (getLexer().isNot(AsmToken::EndOfStatement) &&
+ getLexer().isNot(AsmToken::Comma))
+ return parseASITag(Operands);
+ return ParseStatus::Success;
}
std::unique_ptr<SparcOperand> Op;
- ResTy = parseSparcAsmOperand(Op, (Mnemonic == "call"));
- if (ResTy != MatchOperand_Success || !Op)
- return MatchOperand_ParseFail;
+ Res = parseSparcAsmOperand(Op, (Mnemonic == "call"));
+ if (!Res.isSuccess() || !Op)
+ return ParseStatus::Failure;
// Push the parsed operand into the list of operands
Operands.push_back(std::move(Op));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
+ParseStatus
SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
bool isCall) {
SMLoc S = Parser.getTok().getLoc();
@@ -1108,47 +1261,16 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
case AsmToken::Percent: {
Parser.Lex(); // Eat the '%'.
- MCRegister RegNo;
+ MCRegister Reg;
unsigned RegKind;
- if (matchRegisterName(Parser.getTok(), RegNo, RegKind)) {
- StringRef name = Parser.getTok().getString();
+ if (matchRegisterName(Parser.getTok(), Reg, RegKind)) {
+ StringRef Name = Parser.getTok().getString();
Parser.Lex(); // Eat the identifier token.
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- switch (RegNo) {
- default:
- Op = SparcOperand::CreateReg(RegNo, RegKind, S, E);
- break;
- case Sparc::PSR:
- Op = SparcOperand::CreateToken("%psr", S);
- break;
- case Sparc::FSR:
- Op = SparcOperand::CreateToken("%fsr", S);
- break;
- case Sparc::FQ:
- Op = SparcOperand::CreateToken("%fq", S);
- break;
- case Sparc::CPSR:
- Op = SparcOperand::CreateToken("%csr", S);
- break;
- case Sparc::CPQ:
- Op = SparcOperand::CreateToken("%cq", S);
- break;
- case Sparc::WIM:
- Op = SparcOperand::CreateToken("%wim", S);
- break;
- case Sparc::TBR:
- Op = SparcOperand::CreateToken("%tbr", S);
- break;
- case Sparc::PC:
- Op = SparcOperand::CreateToken("%pc", S);
- break;
- case Sparc::ICC:
- if (name == "xcc")
- Op = SparcOperand::CreateToken("%xcc", S);
- else
- Op = SparcOperand::CreateToken("%icc", S);
- break;
- }
+ if (Reg == Sparc::ICC && Name == "xcc")
+ Op = SparcOperand::CreateToken("%xcc", S);
+ else
+ Op = SparcOperand::CreateReg(Reg, RegKind, S, E);
break;
}
if (matchSparcAsmModifiers(EVal, E)) {
@@ -1182,18 +1304,17 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
Op = SparcOperand::CreateImm(EVal, S, E);
break;
}
- return (Op) ? MatchOperand_Success : MatchOperand_ParseFail;
+ return Op ? ParseStatus::Success : ParseStatus::Failure;
}
-OperandMatchResultTy
-SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
+ParseStatus SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
// parse (,a|,pn|,pt)+
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma
if (!getLexer().is(AsmToken::Identifier))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
StringRef modName = Parser.getTok().getString();
if (modName == "a" || modName == "pn" || modName == "pt") {
Operands.push_back(SparcOperand::CreateToken(modName,
@@ -1201,7 +1322,7 @@ SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
Parser.Lex(); // eat the identifier.
}
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, MCRegister &RegNo,
@@ -1238,9 +1359,8 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, MCRegister &RegNo,
return true;
}
- // %fprs is an alias of %asr6.
if (name.equals("fprs")) {
- RegNo = ASRRegs[6];
+ RegNo = Sparc::ASR6;
RegKind = SparcOperand::rk_Special;
return true;
}
@@ -1444,7 +1564,70 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, MCRegister &RegNo,
return true;
}
if (name.equals("pc")) {
- RegNo = Sparc::PC;
+ RegNo = Sparc::ASR5;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("asi")) {
+ RegNo = Sparc::ASR3;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("ccr")) {
+ RegNo = Sparc::ASR2;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("gl")) {
+ RegNo = Sparc::GL;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("ver")) {
+ RegNo = Sparc::VER;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+
+ // JPS1 extension - aliases for ASRs
+ // Section A.51 - Read State Register
+ if (name.equals("pcr")) {
+ RegNo = Sparc::ASR16;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("pic")) {
+ RegNo = Sparc::ASR17;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("dcr")) {
+ RegNo = Sparc::ASR18;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("gsr")) {
+ RegNo = Sparc::ASR19;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("softint")) {
+ RegNo = Sparc::ASR22;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tick_cmpr")) {
+ RegNo = Sparc::ASR23;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("stick") || name.equals("sys_tick")) {
+ RegNo = Sparc::ASR24;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("stick_cmpr") || name.equals("sys_tick_cmpr")) {
+ RegNo = Sparc::ASR25;
RegKind = SparcOperand::rk_Special;
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index b7581c1979d8..828d63872358 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -101,20 +101,16 @@ static const unsigned FCCRegDecoderTable[] = {
SP::FCC0, SP::FCC1, SP::FCC2, SP::FCC3 };
static const unsigned ASRRegDecoderTable[] = {
- SP::Y, SP::ASR1, SP::ASR2, SP::ASR3,
- SP::ASR4, SP::ASR5, SP::ASR6, SP::ASR7,
- SP::ASR8, SP::ASR9, SP::ASR10, SP::ASR11,
- SP::ASR12, SP::ASR13, SP::ASR14, SP::ASR15,
- SP::ASR16, SP::ASR17, SP::ASR18, SP::ASR19,
- SP::ASR20, SP::ASR21, SP::ASR22, SP::ASR23,
- SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
- SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
+ SP::Y, SP::ASR1, SP::ASR2, SP::ASR3, SP::ASR4, SP::ASR5, SP::ASR6,
+ SP::ASR7, SP::ASR8, SP::ASR9, SP::ASR10, SP::ASR11, SP::ASR12, SP::ASR13,
+ SP::ASR14, SP::ASR15, SP::ASR16, SP::ASR17, SP::ASR18, SP::ASR19, SP::ASR20,
+ SP::ASR21, SP::ASR22, SP::ASR23, SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
+ SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
static const unsigned PRRegDecoderTable[] = {
- SP::TPC, SP::TNPC, SP::TSTATE, SP::TT, SP::TICK, SP::TBA, SP::PSTATE,
- SP::TL, SP::PIL, SP::CWP, SP::CANSAVE, SP::CANRESTORE, SP::CLEANWIN,
- SP::OTHERWIN, SP::WSTATE, SP::PC
-};
+ SP::TPC, SP::TNPC, SP::TSTATE, SP::TT, SP::TICK,
+ SP::TBA, SP::PSTATE, SP::TL, SP::PIL, SP::CWP,
+ SP::CANSAVE, SP::CANRESTORE, SP::CLEANWIN, SP::OTHERWIN, SP::WSTATE};
static const uint16_t IntPairDecoderTable[] = {
SP::G0_G1, SP::G2_G3, SP::G4_G5, SP::G6_G7,
@@ -310,8 +306,10 @@ DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
{
Result = decodeInstruction(DecoderTableSparcV832, Instr, Insn, Address, this, STI);
}
- if (Result != MCDisassembler::Fail)
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
return Result;
+ }
Result =
decodeInstruction(DecoderTableSparc32, Instr, Insn, Address, this, STI);
@@ -344,7 +342,7 @@ static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address,
static DecodeStatus DecodeSIMM13(MCInst &MI, unsigned insn, uint64_t Address,
const MCDisassembler *Decoder) {
- unsigned tgt = SignExtend32<13>(fieldFromInstruction(insn, 0, 13));
- MI.addOperand(MCOperand::createImm(tgt));
+ assert(isUInt<13>(insn));
+ MI.addOperand(MCOperand::createImm(SignExtend64<13>(insn)));
return MCDisassembler::Success;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp
index bd26710fcbab..45a46c131d21 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp
@@ -38,6 +38,9 @@ InsertNOPLoad::InsertNOPLoad() : LEONMachineFunctionPass(ID) {}
bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
+ if (!Subtarget->insertNOPLoad())
+ return false;
+
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = DebugLoc();
@@ -74,6 +77,8 @@ DetectRoundChange::DetectRoundChange() : LEONMachineFunctionPass(ID) {}
bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
+ if (!Subtarget->detectRoundChange())
+ return false;
bool Modified = false;
for (MachineBasicBlock &MBB : MF) {
@@ -122,6 +127,9 @@ FixAllFDIVSQRT::FixAllFDIVSQRT() : LEONMachineFunctionPass(ID) {}
bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
+ if (!Subtarget->fixAllFDIVSQRT())
+ return false;
+
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = DebugLoc();
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 2c0696e8048b..240f5396855c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -136,8 +136,9 @@ namespace {
public:
SparcAsmBackend(const Target &T)
- : MCAsmBackend(StringRef(T.getName()) == "sparcel" ? support::little
- : support::big),
+ : MCAsmBackend(StringRef(T.getName()) == "sparcel"
+ ? llvm::endianness::little
+ : llvm::endianness::big),
TheTarget(T), Is64Bit(StringRef(TheTarget.getName()) == "sparcv9") {}
unsigned getNumFixupKinds() const override {
@@ -264,14 +265,15 @@ namespace {
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
- if (Endian == support::little)
+ if (Endian == llvm::endianness::little)
return InfosLE[Kind - FirstTargetFixupKind];
return InfosBE[Kind - FirstTargetFixupKind];
}
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override {
if (Fixup.getKind() >= FirstLiteralRelocationKind)
return true;
switch ((Sparc::Fixups)Fixup.getKind()) {
@@ -355,7 +357,8 @@ namespace {
// from the fixup value. The Value has been "split up" into the
// appropriate bitfields above.
for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = Endian == support::little ? i : (NumBytes - 1) - i;
+ unsigned Idx =
+ Endian == llvm::endianness::little ? i : (NumBytes - 1) - i;
Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index c48beab01229..f17d3e997452 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -32,9 +32,8 @@ namespace {
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
unsigned Type) const override;
-
};
}
@@ -124,8 +123,9 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_SPARC_NONE;
}
-bool SparcELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
- unsigned Type) const {
+bool SparcELFObjectWriter::needsRelocateWithSymbol(const MCValue &,
+ const MCSymbol &,
+ unsigned Type) const {
switch (Type) {
default:
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
index 51a6732d05c6..ef7764850471 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
@@ -39,7 +39,12 @@ bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const {
}
void SparcInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
- OS << '%' << StringRef(getRegisterName(Reg)).lower();
+ OS << '%' << getRegisterName(Reg);
+}
+
+void SparcInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg,
+ unsigned AltIdx) const {
+ OS << '%' << getRegisterName(Reg, AltIdx);
}
void SparcInstPrinter::printInst(const MCInst *MI, uint64_t Address,
@@ -111,7 +116,11 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
const MCOperand &MO = MI->getOperand (opNum);
if (MO.isReg()) {
- printRegName(O, MO.getReg());
+ unsigned Reg = MO.getReg();
+ if (isV9(STI))
+ printRegName(O, Reg, SP::RegNamesStateReg);
+ else
+ printRegName(O, Reg);
return ;
}
@@ -139,15 +148,7 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
const MCSubtargetInfo &STI,
- raw_ostream &O, const char *Modifier) {
- // If this is an ADD operand, emit it like normal operands.
- if (Modifier && !strcmp(Modifier, "arith")) {
- printOperand(MI, opNum, STI, O);
- O << ", ";
- printOperand(MI, opNum + 1, STI, O);
- return;
- }
-
+ raw_ostream &O) {
const MCOperand &Op1 = MI->getOperand(opNum);
const MCOperand &Op2 = MI->getOperand(opNum + 1);
@@ -242,3 +243,13 @@ void SparcInstPrinter::printMembarTag(const MCInst *MI, int opNum,
}
}
}
+
+void SparcInstPrinter::printASITag(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ unsigned Imm = MI->getOperand(opNum).getImm();
+ auto ASITag = SparcASITag::lookupASITagByEncoding(Imm);
+ if (isV9(STI) && ASITag)
+ O << '#' << ASITag->Name;
+ else
+ O << Imm;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
index a9f4a652e0c0..cb691a3420da 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCINSTPRINTER_H
#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCINSTPRINTER_H
+#include "SparcMCTargetDesc.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -24,6 +25,7 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+ void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx) const;
void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
const MCSubtargetInfo &STI, raw_ostream &O) override;
bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
@@ -39,18 +41,21 @@ public:
void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
unsigned OpIdx, unsigned PrintMethodIdx,
const MCSubtargetInfo &STI, raw_ostream &O);
- static const char *getRegisterName(MCRegister Reg);
+ static const char *getRegisterName(MCRegister Reg,
+ unsigned AltIdx = SP::NoRegAltName);
void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
raw_ostream &OS);
void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
- raw_ostream &OS, const char *Modifier = nullptr);
+ raw_ostream &OS);
void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
raw_ostream &OS);
bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &OS);
void printMembarTag(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printASITag(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &O);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 93c6365a8ddd..42357e3b1aa9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -93,8 +93,9 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI,
const MCSubtargetInfo &STI) const {
unsigned Bits = getBinaryCodeForInstr(MI, Fixups, STI);
support::endian::write(CB, Bits,
- Ctx.getAsmInfo()->isLittleEndian() ? support::little
- : support::big);
+ Ctx.getAsmInfo()->isLittleEndian()
+ ? llvm::endianness::little
+ : llvm::endianness::big);
// Some instructions have phantom operands that only contribute a fixup entry.
unsigned SymOpNo = 0;
@@ -104,7 +105,6 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI,
case SP::GDOP_LDrr:
case SP::GDOP_LDXrr:
case SP::TLS_ADDrr:
- case SP::TLS_ADDXrr:
case SP::TLS_LDrr:
case SP::TLS_LDXrr: SymOpNo = 3; break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index d6688c31334c..fb634ccb280d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -21,6 +21,13 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+namespace llvm {
+namespace SparcASITag {
+#define GET_ASITagsList_IMPL
+#include "SparcGenSearchableTables.inc"
+} // end namespace SparcASITag
+} // end namespace llvm
+
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 8e6a9ebdb2dd..fd76627aa067 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCTARGETDESC_H
#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCTARGETDESC_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
#include <memory>
@@ -35,6 +36,18 @@ MCAsmBackend *createSparcAsmBackend(const Target &T, const MCSubtargetInfo &STI,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectTargetWriter> createSparcELFObjectWriter(bool Is64Bit,
uint8_t OSABI);
+
+// Defines symbolic names for Sparc v9 ASI tag names.
+namespace SparcASITag {
+struct ASITag {
+ const char *Name;
+ const char *AltName;
+ unsigned Encoding;
+};
+
+#define GET_ASITagsList_DECL
+#include "SparcGenSearchableTables.inc"
+} // end namespace SparcASITag
} // End llvm namespace
// Defines symbolic names for Sparc registers. This defines a mapping from
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td
index 4cc713abe046..1a71cfed3128 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td
@@ -69,6 +69,7 @@ include "LeonFeatures.td"
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
+include "SparcASITags.td"
include "SparcRegisterInfo.td"
include "SparcCallingConv.td"
include "SparcSchedule.td"
@@ -80,6 +81,10 @@ def SparcAsmParser : AsmParser {
bit ShouldEmitMatchRegisterName = 0;
}
+def SparcAsmParserVariant : AsmParserVariant {
+ let RegisterPrefix = "%";
+}
+
//===----------------------------------------------------------------------===//
// SPARC processors supported.
//===----------------------------------------------------------------------===//
@@ -178,6 +183,7 @@ def Sparc : Target {
// Pull in Instruction Info:
let InstructionSet = SparcInstrInfo;
let AssemblyParsers = [SparcAsmParser];
+ let AssemblyParserVariants = [SparcAsmParserVariant];
let AssemblyWriters = [SparcAsmWriter];
let AllowRegisterRenaming = 1;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcASITags.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcASITags.td
new file mode 100644
index 000000000000..115e41bfe033
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcASITags.td
@@ -0,0 +1,54 @@
+//===- SparcASITags.td -------------------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the symbolic operands permitted for various kinds of
+// SPARCv9 ASI.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/TableGen/SearchableTable.td"
+
+class ASITag<string name, string alt_name, bits<8> op> {
+ string Name = name;
+ // A maximum of one alias is supported right now.
+ string AltName = alt_name;
+ bits<8> Encoding = op;
+}
+
+def ASITagsList : GenericTable {
+ let FilterClass = "ASITag";
+ let Fields = ["Name", "AltName", "Encoding"];
+
+ let PrimaryKey = [ "Encoding" ];
+ let PrimaryKeyName = "lookupASITagByEncoding";
+}
+
+def lookupASITagByName : SearchIndex {
+ let Table = ASITagsList;
+ let Key = [ "Name" ];
+}
+
+def lookupASITagByAltName : SearchIndex {
+ let Table = ASITagsList;
+ let Key = [ "AltName" ];
+}
+
+def : ASITag<"ASI_N", "ASI_NUCLEUS", 0x4>;
+def : ASITag<"ASI_N_L", "ASI_NUCLEUS_LITTLE", 0xC>;
+def : ASITag<"ASI_AIUP", "ASI_AS_IF_USER_PRIMARY", 0x10>;
+def : ASITag<"ASI_AIUS", "ASI_AS_IF_USER_SECONDARY", 0x11>;
+def : ASITag<"ASI_AIUP_L", "ASI_AS_IF_USER_PRIMARY_LITTLE", 0x18>;
+def : ASITag<"ASI_AIUS_L", "ASI_AS_IF_USER_SECONDARY_LITTLE", 0x19>;
+def : ASITag<"ASI_P", "ASI_PRIMARY", 0x80>;
+def : ASITag<"ASI_S", "ASI_SECONDARY", 0x81>;
+def : ASITag<"ASI_PNF", "ASI_PRIMARY_NOFAULT", 0x82>;
+def : ASITag<"ASI_SNF", "ASI_SECONDARY_NOFAULT", 0x83>;
+def : ASITag<"ASI_P_L", "ASI_PRIMARY_LITTLE", 0x88>;
+def : ASITag<"ASI_S_L", "ASI_SECONDARY_LITTLE", 0x89>;
+def : ASITag<"ASI_PNF_L", "ASI_PRIMARY_NOFAULT_LITTLE", 0x8A>;
+def : ASITag<"ASI_SNF_L", "ASI_SECONDARY_NOFAULT_LITTLE", 0x8B>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 034e8759c2f0..cca624e09267 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -301,7 +301,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
if (MI->getOpcode() == SP::CALL)
assert(TF == SparcMCExpr::VK_Sparc_None &&
"Cannot handle target flags on call address");
- else if (MI->getOpcode() == SP::SETHIi || MI->getOpcode() == SP::SETHIXi)
+ else if (MI->getOpcode() == SP::SETHIi)
assert((TF == SparcMCExpr::VK_Sparc_HI
|| TF == SparcMCExpr::VK_Sparc_H44
|| TF == SparcMCExpr::VK_Sparc_HH
@@ -329,7 +329,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
else if (MI->getOpcode() == SP::TLS_LDXrr)
assert(TF == SparcMCExpr::VK_Sparc_TLS_IE_LDX &&
"Cannot handle target flags on ldx for TLS");
- else if (MI->getOpcode() == SP::XORri || MI->getOpcode() == SP::XORXri)
+ else if (MI->getOpcode() == SP::XORri)
assert((TF == SparcMCExpr::VK_Sparc_TLS_LDO_LOX10
|| TF == SparcMCExpr::VK_Sparc_TLS_LE_LOX10) &&
"Cannot handle target flags on xor for TLS");
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 8339f5c42908..3c9841d81b4f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -56,7 +56,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -162,7 +162,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
// and have that work. Then, delete this function.
bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
std::vector<SDValue> AsmNodeOperands;
- unsigned Flag, Kind;
+ InlineAsm::Flag Flag;
bool Changed = false;
unsigned NumOps = N->getNumOperands();
@@ -186,24 +186,22 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
if (i < InlineAsm::Op_FirstOperand)
continue;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
- Flag = C->getZExtValue();
- Kind = InlineAsm::getKind(Flag);
- }
+ if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
+ Flag = InlineAsm::Flag(C->getZExtValue());
else
continue;
// Immediate operands to inline asm in the SelectionDAG are modeled with
- // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+ // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
// the second is a constant with the value of the immediate. If we get here
- // and we have a Kind_Imm, skip the next operand, and continue.
- if (Kind == InlineAsm::Kind_Imm) {
+ // and we have a Kind::Imm, skip the next operand, and continue.
+ if (Flag.isImmKind()) {
SDValue op = N->getOperand(++i);
AsmNodeOperands.push_back(op);
continue;
}
- unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+ const unsigned NumRegs = Flag.getNumOperandRegisters();
if (NumRegs)
OpChanged.push_back(false);
@@ -211,15 +209,15 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
bool IsTiedToChangedOp = false;
// If it's a use that is tied with a previous def, it has no
// reg class constraint.
- if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+ if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
IsTiedToChangedOp = OpChanged[DefIdx];
- if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
- && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
+ !Flag.isRegDefEarlyClobberKind())
continue;
unsigned RC;
- bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ const bool HasRC = Flag.hasRegClassConstraint(RC);
if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID))
|| NumRegs != 2)
continue;
@@ -232,8 +230,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
SDValue PairedReg;
MachineRegisterInfo &MRI = MF->getRegInfo();
- if (Kind == InlineAsm::Kind_RegDef ||
- Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
// the original GPRs.
@@ -258,9 +255,8 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
Ops.push_back(T1.getValue(1));
CurDAG->UpdateNodeOperands(GU, Ops);
- }
- else {
- // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ } else {
+ // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
// GPRPair and then pass the GPRPair to the inline asm.
SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
@@ -296,11 +292,11 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
if(PairedReg.getNode()) {
OpChanged[OpChanged.size() -1 ] = true;
- Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
if (IsTiedToChangedOp)
- Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ Flag.setMatchingOp(DefIdx);
else
- Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID);
+ Flag.setRegClass(SP::IntPairRegClassID);
// Replace the current flag.
AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
Flag, dl, MVT::i32);
@@ -379,18 +375,17 @@ void SparcDAGToDAGISel::Select(SDNode *N) {
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
-bool
-SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintID) {
default: return true;
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_m: // memory
- if (!SelectADDRrr(Op, Op0, Op1))
- SelectADDRri(Op, Op0, Op1);
- break;
+ case InlineAsm::ConstraintCode::o:
+ case InlineAsm::ConstraintCode::m: // memory
+ if (!SelectADDRrr(Op, Op0, Op1))
+ SelectADDRri(Op, Op0, Op1);
+ break;
}
OutOps.push_back(Op0);
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 0aa3c875a14f..4f0801479211 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1748,6 +1748,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::MULO_I64, nullptr);
+ setLibcallName(RTLIB::MUL_I128, nullptr);
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
@@ -2603,9 +2604,8 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
if (LHS.getValueType().isInteger()) {
// On V9 processors running in 64-bit mode, if CC compares two `i64`s
// and the RHS is zero we might be able to use a specialized branch.
- const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
- if (is64Bit && isV9 && LHS.getValueType() == MVT::i64 && RHSC &&
- RHSC->isZero() && !ISD::isUnsignedIntSetCC(CC))
+ if (is64Bit && isV9 && LHS.getValueType() == MVT::i64 &&
+ isNullConstant(RHS) && !ISD::isUnsignedIntSetCC(CC))
return DAG.getNode(SPISD::BR_REG, dl, MVT::Other, Chain, Dest,
DAG.getConstant(intCondCCodeToRcond(CC), dl, MVT::i32),
LHS);
@@ -3427,15 +3427,13 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
-void SparcTargetLowering::
-LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const {
+void SparcTargetLowering::LowerAsmOperandForConstraint(
+ SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
SDValue Result;
// Only support length 1 constraints for now.
- if (Constraint.length() > 1)
+ if (Constraint.size() > 1)
return;
char ConstraintLetter = Constraint[0];
@@ -3643,3 +3641,11 @@ void SparcTargetLowering::insertSSPDeclarations(Module &M) const {
if (!Subtarget->isTargetLinux())
return TargetLowering::insertSSPDeclarations(M);
}
+
+void SparcTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const {
+ assert(MI.getOpcode() == SP::SUBCCrr || MI.getOpcode() == SP::SUBCCri);
+ // If the result is dead, replace it with %g0.
+ if (!Node->hasAnyUseOfValue(0))
+ MI.getOperand(0).setReg(SP::G0);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h
index 5504dcd464fb..15d09bc93097 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -88,8 +88,7 @@ namespace llvm {
ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo &info,
const char *constraint) const override;
- void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
@@ -223,7 +222,10 @@ namespace llvm {
MachineBasicBlock *expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
unsigned BROpcode) const;
+
+ void AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const override;
};
} // end namespace llvm
-#endif // SPARC_ISELLOWERING_H
+#endif // LLVM_LIB_TARGET_SPARC_SPARCISELLOWERING_H
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index 0a6479487418..93862414fb35 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -65,6 +65,7 @@ def : Pat<(i64 0), (COPY (i64 G0))>,
Requires<[Is64Bit]>;
// The ALU instructions want their simm13 operands as i32 immediates.
+// FIXME: This is no longer true, they are now pointer-sized.
def as_i32imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
}]>;
@@ -144,43 +145,29 @@ def : Pat<(i64 imm:$val),
let Predicates = [Is64Bit] in {
-// Register-register instructions.
-let isCodeGenOnly = 1 in {
-defm ANDX : F3_12<"and", 0b000001, and, I64Regs, i64, i64imm>;
-defm ORX : F3_12<"or", 0b000010, or, I64Regs, i64, i64imm>;
-defm XORX : F3_12<"xor", 0b000011, xor, I64Regs, i64, i64imm>;
-
-def ANDXNrr : F3_1<2, 0b000101,
- (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
- "andn $rs1, $rs2, $rd",
- [(set i64:$rd, (and i64:$rs1, (not i64:$rs2)))]>;
-def ORXNrr : F3_1<2, 0b000110,
- (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
- "orn $rs1, $rs2, $rd",
- [(set i64:$rd, (or i64:$rs1, (not i64:$rs2)))]>;
-def XNORXrr : F3_1<2, 0b000111,
- (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
- "xnor $rs1, $rs2, $rd",
- [(set i64:$rd, (not (xor i64:$rs1, i64:$rs2)))]>;
-
-defm ADDX : F3_12<"add", 0b000000, add, I64Regs, i64, i64imm>;
-defm SUBX : F3_12<"sub", 0b000100, sub, I64Regs, i64, i64imm>;
-
-def TLS_ADDXrr : F3_1<2, 0b000000, (outs I64Regs:$rd),
- (ins I64Regs:$rs1, I64Regs:$rs2, TailRelocSymTLSAdd:$sym),
- "add $rs1, $rs2, $rd, $sym",
- [(set i64:$rd,
- (tlsadd i64:$rs1, i64:$rs2, tglobaltlsaddr:$sym))]>;
-
-// "LEA" form of add
-def LEAX_ADDri : F3_2<2, 0b000000,
- (outs I64Regs:$rd), (ins (MEMri $rs1, $simm13):$addr),
- "add ${addr:arith}, $rd",
- [(set iPTR:$rd, ADDRri:$addr)]>;
-}
+def : Pat<(and i64:$lhs, i64:$rhs), (ANDrr $lhs, $rhs)>;
+def : Pat<(or i64:$lhs, i64:$rhs), (ORrr $lhs, $rhs)>;
+def : Pat<(xor i64:$lhs, i64:$rhs), (XORrr $lhs, $rhs)>;
+
+def : Pat<(and i64:$lhs, (i64 simm13:$rhs)), (ANDri $lhs, imm:$rhs)>;
+def : Pat<(or i64:$lhs, (i64 simm13:$rhs)), (ORri $lhs, imm:$rhs)>;
+def : Pat<(xor i64:$lhs, (i64 simm13:$rhs)), (XORri $lhs, imm:$rhs)>;
+
+def : Pat<(and i64:$lhs, (not i64:$rhs)), (ANDNrr $lhs, $rhs)>;
+def : Pat<(or i64:$lhs, (not i64:$rhs)), (ORNrr $lhs, $rhs)>;
+def : Pat<(not (xor i64:$lhs, i64:$rhs)), (XNORrr $lhs, $rhs)>;
+
+def : Pat<(add i64:$lhs, i64:$rhs), (ADDrr $lhs, $rhs)>;
+def : Pat<(sub i64:$lhs, i64:$rhs), (SUBrr $lhs, $rhs)>;
+
+def : Pat<(add i64:$lhs, (i64 simm13:$rhs)), (ADDri $lhs, imm:$rhs)>;
+def : Pat<(sub i64:$lhs, (i64 simm13:$rhs)), (SUBri $lhs, imm:$rhs)>;
+
+def : Pat<(tlsadd i64:$rs1, i64:$rs2, tglobaltlsaddr:$sym),
+ (TLS_ADDrr $rs1, $rs2, $sym)>;
-def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>;
-def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>;
+def : Pat<(SPcmpicc i64:$lhs, i64:$rhs), (SUBCCrr $lhs, $rhs)>;
+def : Pat<(SPcmpicc i64:$lhs, (i64 simm13:$rhs)), (SUBCCri $lhs, imm:$rhs)>;
def : Pat<(i64 (ctpop i64:$src)), (POPCrr $src)>;
} // Predicates = [Is64Bit]
@@ -239,7 +226,7 @@ def UDIVXri : F3_2<2, 0b001101,
let Predicates = [Is64Bit] in {
// 64-bit loads.
-defm LDX : Load<"ldx", 0b001011, load, I64Regs, i64>;
+defm LDX : LoadA<"ldx", 0b001011, 0b011011, load, I64Regs, i64>;
let mayLoad = 1, isAsmParserOnly = 1 in {
def TLS_LDXrr : F3_1<3, 0b001011,
@@ -282,10 +269,10 @@ def : Pat<(i64 (extloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
def : Pat<(i64 (extloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
// Sign-extending load of i32 into i64 is a new SPARC v9 instruction.
-defm LDSW : Load<"ldsw", 0b001000, sextloadi32, I64Regs, i64>;
+defm LDSW : LoadA<"ldsw", 0b001000, 0b011000, sextloadi32, I64Regs, i64>;
// 64-bit stores.
-defm STX : Store<"stx", 0b001110, store, I64Regs, i64>;
+defm STX : StoreA<"stx", 0b001110, 0b011110, store, I64Regs, i64>;
// Truncating stores from i64 are identical to the i32 stores.
def : Pat<(truncstorei8 i64:$src, ADDRrr:$addr), (STBrr ADDRrr:$addr, $src)>;
@@ -465,24 +452,20 @@ def : Pat<(SPselectreg (i64 simm10:$t), i64:$f, imm:$rcond, i64:$rs1),
} // Predicates = [Is64Bit]
-
-// 64 bit SETHI
-let Predicates = [Is64Bit], isCodeGenOnly = 1 in {
-def SETHIXi : F2_1<0b100,
- (outs IntRegs:$rd), (ins i64imm:$imm22),
- "sethi $imm22, $rd",
- [(set i64:$rd, SETHIimm:$imm22)]>;
-}
-
// ATOMICS.
-let Predicates = [Is64Bit], Constraints = "$swap = $rd", asi = 0b10000000 in {
- def CASXrr: F3_1_asi<3, 0b111110,
+let Predicates = [Is64Bit, HasV9], Constraints = "$swap = $rd" in {
+ def CASXArr: F3_1_asi<3, 0b111110,
(outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2,
- I64Regs:$swap),
- "casx [$rs1], $rs2, $rd",
- [(set i64:$rd,
- (atomic_cmp_swap_64 i64:$rs1, i64:$rs2, i64:$swap))]>;
+ I64Regs:$swap, ASITag:$asi),
+ "casxa [$rs1] $asi, $rs2, $rd",
+ []>;
+ let Uses = [ASR3] in
+ def CASXAri: F3_1_cas_asi<3, 0b111110,
+ (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2,
+ I64Regs:$swap),
+ "casxa [$rs1] %asi, $rs2, $rd",
+ []>;
} // Predicates = [Is64Bit], Constraints = ...
let Predicates = [Is64Bit] in {
@@ -492,8 +475,11 @@ def : Pat<(i64 (atomic_load_64 ADDRrr:$src)), (LDXrr ADDRrr:$src)>;
def : Pat<(i64 (atomic_load_64 ADDRri:$src)), (LDXri ADDRri:$src)>;
// atomic_store_64 val, addr -> store val, addr
-def : Pat<(atomic_store_64 ADDRrr:$dst, i64:$val), (STXrr ADDRrr:$dst, $val)>;
-def : Pat<(atomic_store_64 ADDRri:$dst, i64:$val), (STXri ADDRri:$dst, $val)>;
+def : Pat<(atomic_store_64 i64:$val, ADDRrr:$dst), (STXrr ADDRrr:$dst, $val)>;
+def : Pat<(atomic_store_64 i64:$val, ADDRri:$dst), (STXri ADDRri:$dst, $val)>;
+
+def : Pat<(atomic_cmp_swap_64 i64:$rs1, i64:$rs2, i64:$swap),
+ (CASXArr $rs1, $rs2, $swap, 0x80)>;
} // Predicates = [Is64Bit]
@@ -504,25 +490,25 @@ let Predicates = [Is64Bit], hasSideEffects = 1, Uses = [ICC], cc = 0b10 in
let Predicates = [Is64Bit] in {
def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
-def : Pat<(SPlo tglobaladdr:$in), (ORXri (i64 G0), tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri (i64 G0), tglobaladdr:$in)>;
def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
-def : Pat<(SPlo tconstpool:$in), (ORXri (i64 G0), tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri (i64 G0), tconstpool:$in)>;
// GlobalTLS addresses
def : Pat<(SPhi tglobaltlsaddr:$in), (SETHIi tglobaltlsaddr:$in)>;
-def : Pat<(SPlo tglobaltlsaddr:$in), (ORXri (i64 G0), tglobaltlsaddr:$in)>;
+def : Pat<(SPlo tglobaltlsaddr:$in), (ORri (i64 G0), tglobaltlsaddr:$in)>;
def : Pat<(add (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)),
- (ADDXri (SETHIXi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>;
+ (ADDri (SETHIi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>;
def : Pat<(xor (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)),
- (XORXri (SETHIXi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>;
+ (XORri (SETHIi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>;
// Blockaddress
def : Pat<(SPhi tblockaddress:$in), (SETHIi tblockaddress:$in)>;
-def : Pat<(SPlo tblockaddress:$in), (ORXri (i64 G0), tblockaddress:$in)>;
+def : Pat<(SPlo tblockaddress:$in), (ORri (i64 G0), tblockaddress:$in)>;
// Add reg, lo. This is used when taking the addr of a global/constpool entry.
-def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDXri $r, tglobaladdr:$in)>;
-def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDXri $r, tconstpool:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDri $r, tconstpool:$in)>;
def : Pat<(add iPTR:$r, (SPlo tblockaddress:$in)),
- (ADDXri $r, tblockaddress:$in)>;
+ (ADDri $r, tblockaddress:$in)>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td
index 01c3696cc7bc..db4c05cf1806 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -350,6 +350,8 @@ defm : int_cond_alias<"vs", 0b0111>;
let EmitPriority = 0 in
{
defm : int_cond_alias<"", 0b1000>; // same as a; gnu asm, not in manual
+ defm : int_cond_alias<"gt", 0b1010>; // same as g; gnu asm, not in manual
+ defm : int_cond_alias<"lt", 0b0011>; // same as l; gnu asm, not in manual
defm : int_cond_alias<"nz", 0b1001>; // same as ne
defm : int_cond_alias<"eq", 0b0001>; // same as e
defm : int_cond_alias<"z", 0b0001>; // same as e
@@ -411,10 +413,13 @@ defm : reg_cond_alias<"gez", 0b111>;
// non-alias form, except for the most obvious and clarifying aliases: cmp, jmp,
// call, tst, ret, retl.
-// Note: cmp is handled in SparcInstrInfo.
-// jmp/call/ret/retl have special case handling for output in
+// Note: jmp/call/ret/retl have special case handling for output in
// SparcInstPrinter.cpp
+// cmp rs1, reg_or_imm -> subcc rs1, reg_or_imm, %g0
+def : InstAlias<"cmp $rs1, $rs2", (SUBCCrr G0, IntRegs:$rs1, IntRegs:$rs2)>;
+def : InstAlias<"cmp $rs1, $imm", (SUBCCri G0, IntRegs:$rs1, simm13Op:$imm)>;
+
// jmp addr -> jmpl addr, %g0
def : InstAlias<"jmp $addr", (JMPLrr G0, MEMrr:$addr), 0>;
def : InstAlias<"jmp $addr", (JMPLri G0, MEMri:$addr), 0>;
@@ -443,6 +448,13 @@ def : InstAlias<"save", (SAVErr G0, G0, G0)>;
// def : InstAlias<"set $val, $rd", (ORri IntRegs:$rd, (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
def SET : AsmPseudoInst<(outs IntRegs:$rd), (ins i32imm:$val), "set $val, $rd">;
+// setx value, tmp, rd
+// (turns into a sequence of sethi+or+shift, depending on the value)
+def SETX : AsmPseudoInst<(outs I64Regs:$rd),
+ (ins i64imm:$val, I64Regs:$tmp),
+ "setx $val, $tmp, $rd">,
+ Requires<[Is64Bit, HasV9]>;
+
// not rd -> xnor rd, %g0, rd
def : InstAlias<"not $rd", (XNORrr IntRegs:$rd, IntRegs:$rd, G0), 0>;
@@ -455,45 +467,63 @@ def : InstAlias<"neg $rd", (SUBrr IntRegs:$rd, G0, IntRegs:$rd), 0>;
// neg reg, rd -> sub %g0, reg, rd
def : InstAlias<"neg $rs2, $rd", (SUBrr IntRegs:$rd, G0, IntRegs:$rs2), 0>;
+let Predicates = [HasV9] in {
+ // cas [rs1], rs2, rd -> casa [rs1] #ASI_P, rs2, rd
+ def : InstAlias<"cas [$rs1], $rs2, $rd",
+ (CASArr IntRegs:$rd, IntRegs:$rs1, IntRegs:$rs2, 0x80)>;
+
+ // casl [rs1], rs2, rd -> casa [rs1] #ASI_P_L, rs2, rd
+ def : InstAlias<"casl [$rs1], $rs2, $rd",
+ (CASArr IntRegs:$rd, IntRegs:$rs1, IntRegs:$rs2, 0x88)>;
+
+ // casx [rs1], rs2, rd -> casxa [rs1] #ASI_P, rs2, rd
+ def : InstAlias<"casx [$rs1], $rs2, $rd",
+ (CASXArr I64Regs:$rd, I64Regs:$rs1, I64Regs:$rs2, 0x80)>;
+
+ // casxl [rs1], rs2, rd -> casxa [rs1] #ASI_P_L, rs2, rd
+ def : InstAlias<"casxl [$rs1], $rs2, $rd",
+ (CASXArr I64Regs:$rd, I64Regs:$rs1, I64Regs:$rs2, 0x88)>;
+}
+
// inc rd -> add rd, 1, rd
def : InstAlias<"inc $rd", (ADDri IntRegs:$rd, IntRegs:$rd, 1), 0>;
// inc simm13, rd -> add rd, simm13, rd
-def : InstAlias<"inc $simm13, $rd", (ADDri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+def : InstAlias<"inc $simm13, $rd", (ADDri IntRegs:$rd, IntRegs:$rd, simm13Op:$simm13), 0>;
// inccc rd -> addcc rd, 1, rd
def : InstAlias<"inccc $rd", (ADDCCri IntRegs:$rd, IntRegs:$rd, 1), 0>;
// inccc simm13, rd -> addcc rd, simm13, rd
-def : InstAlias<"inccc $simm13, $rd", (ADDCCri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+def : InstAlias<"inccc $simm13, $rd", (ADDCCri IntRegs:$rd, IntRegs:$rd, simm13Op:$simm13), 0>;
// dec rd -> sub rd, 1, rd
def : InstAlias<"dec $rd", (SUBri IntRegs:$rd, IntRegs:$rd, 1), 0>;
// dec simm13, rd -> sub rd, simm13, rd
-def : InstAlias<"dec $simm13, $rd", (SUBri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+def : InstAlias<"dec $simm13, $rd", (SUBri IntRegs:$rd, IntRegs:$rd, simm13Op:$simm13), 0>;
// deccc rd -> subcc rd, 1, rd
def : InstAlias<"deccc $rd", (SUBCCri IntRegs:$rd, IntRegs:$rd, 1), 0>;
// deccc simm13, rd -> subcc rd, simm13, rd
-def : InstAlias<"deccc $simm13, $rd", (SUBCCri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+def : InstAlias<"deccc $simm13, $rd", (SUBCCri IntRegs:$rd, IntRegs:$rd, simm13Op:$simm13), 0>;
// btst reg_or_imm, reg -> andcc reg,reg_or_imm,%g0
def : InstAlias<"btst $rs2, $rs1", (ANDCCrr G0, IntRegs:$rs1, IntRegs:$rs2), 0>;
-def : InstAlias<"btst $simm13, $rs1", (ANDCCri G0, IntRegs:$rs1, i32imm:$simm13), 0>;
+def : InstAlias<"btst $simm13, $rs1", (ANDCCri G0, IntRegs:$rs1, simm13Op:$simm13), 0>;
// bset reg_or_imm, rd -> or rd,reg_or_imm,rd
def : InstAlias<"bset $rs2, $rd", (ORrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>;
-def : InstAlias<"bset $simm13, $rd", (ORri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+def : InstAlias<"bset $simm13, $rd", (ORri IntRegs:$rd, IntRegs:$rd, simm13Op:$simm13), 0>;
// bclr reg_or_imm, rd -> andn rd,reg_or_imm,rd
def : InstAlias<"bclr $rs2, $rd", (ANDNrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>;
-def : InstAlias<"bclr $simm13, $rd", (ANDNri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+def : InstAlias<"bclr $simm13, $rd", (ANDNri IntRegs:$rd, IntRegs:$rd, simm13Op:$simm13), 0>;
// btog reg_or_imm, rd -> xor rd,reg_or_imm,rd
def : InstAlias<"btog $rs2, $rd", (XORrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>;
-def : InstAlias<"btog $simm13, $rd", (XORri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+def : InstAlias<"btog $simm13, $rd", (XORri IntRegs:$rd, IntRegs:$rd, simm13Op:$simm13), 0>;
// clr rd -> or %g0, %g0, rd
@@ -510,48 +540,52 @@ def : InstAlias<"clr [$addr]", (STri MEMri:$addr, G0), 0>;
// mov reg_or_imm, rd -> or %g0, reg_or_imm, rd
def : InstAlias<"mov $rs2, $rd", (ORrr IntRegs:$rd, G0, IntRegs:$rs2)>;
-def : InstAlias<"mov $simm13, $rd", (ORri IntRegs:$rd, G0, i32imm:$simm13)>;
+def : InstAlias<"mov $simm13, $rd", (ORri IntRegs:$rd, G0, simm13Op:$simm13)>;
// mov specialreg, rd -> rd specialreg, rd
def : InstAlias<"mov $asr, $rd", (RDASR IntRegs:$rd, ASRRegs:$asr), 0>;
def : InstAlias<"mov %psr, $rd", (RDPSR IntRegs:$rd), 0>;
def : InstAlias<"mov %wim, $rd", (RDWIM IntRegs:$rd), 0>;
def : InstAlias<"mov %tbr, $rd", (RDTBR IntRegs:$rd), 0>;
-def : InstAlias<"mov %pc, $rd", (RDPC IntRegs:$rd), 0>;
// mov reg_or_imm, specialreg -> wr %g0, reg_or_imm, specialreg
def : InstAlias<"mov $rs2, $asr", (WRASRrr ASRRegs:$asr, G0, IntRegs:$rs2), 0>;
-def : InstAlias<"mov $simm13, $asr", (WRASRri ASRRegs:$asr, G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $simm13, $asr", (WRASRri ASRRegs:$asr, G0, simm13Op:$simm13), 0>;
def : InstAlias<"mov $rs2, %psr", (WRPSRrr G0, IntRegs:$rs2), 0>;
-def : InstAlias<"mov $simm13, %psr", (WRPSRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $simm13, %psr", (WRPSRri G0, simm13Op:$simm13), 0>;
def : InstAlias<"mov $rs2, %wim", (WRWIMrr G0, IntRegs:$rs2), 0>;
-def : InstAlias<"mov $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $simm13, %wim", (WRWIMri G0, simm13Op:$simm13), 0>;
def : InstAlias<"mov $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>;
-def : InstAlias<"mov $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $simm13, %tbr", (WRTBRri G0, simm13Op:$simm13), 0>;
// End of Section A.3
+// or imm, reg, rd -> or reg, imm, rd
+// Nonstandard GNU extension.
+let EmitPriority = 0 in
+ def : InstAlias<"or $simm13, $rs1, $rd", (ORri IntRegs:$rd, IntRegs:$rs1, simm13Op:$simm13)>;
+
// wr reg_or_imm, specialreg -> wr %g0, reg_or_imm, specialreg
// (aka: omit the first arg when it's g0. This is not in the manual, but is
// supported by gnu and solaris as)
def : InstAlias<"wr $rs2, $asr", (WRASRrr ASRRegs:$asr, G0, IntRegs:$rs2), 0>;
-def : InstAlias<"wr $simm13, $asr", (WRASRri ASRRegs:$asr, G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $simm13, $asr", (WRASRri ASRRegs:$asr, G0, simm13Op:$simm13), 0>;
def : InstAlias<"wr $rs2, %psr", (WRPSRrr G0, IntRegs:$rs2), 0>;
-def : InstAlias<"wr $simm13, %psr", (WRPSRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $simm13, %psr", (WRPSRri G0, simm13Op:$simm13), 0>;
def : InstAlias<"wr $rs2, %wim", (WRWIMrr G0, IntRegs:$rs2), 0>;
-def : InstAlias<"wr $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $simm13, %wim", (WRWIMri G0, simm13Op:$simm13), 0>;
def : InstAlias<"wr $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>;
-def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, simm13Op:$simm13), 0>;
def : InstAlias<"pwr $rs2, %psr", (PWRPSRrr G0, IntRegs:$rs2), 0>;
-def : InstAlias<"pwr $simm13, %psr", (PWRPSRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"pwr $simm13, %psr", (PWRPSRri G0, simm13Op:$simm13), 0>;
// wrpr %reg, %rd -> wrpr %reg, %g0, %rd
// wrpr imm, %rd -> wrpr %g0, imm, %rd
// Nonstandard GNU extensions.
let Predicates = [HasV9] in {
def : InstAlias<"wrpr $rs1, $rd", (WRPRrr PRRegs:$rd, IntRegs:$rs1, G0), 0>;
- def : InstAlias<"wrpr $simm13, $rd", (WRPRri PRRegs:$rd, G0, i32imm:$simm13), 0>;
+ def : InstAlias<"wrpr $simm13, $rd", (WRPRri PRRegs:$rd, G0, simm13Op:$simm13), 0>;
}
// flush -> flush %g0
@@ -573,7 +607,14 @@ def : MnemonicAlias<"stsh", "sth">;
def : MnemonicAlias<"stuha", "stha">;
def : MnemonicAlias<"stsha", "stha">;
+
def : MnemonicAlias<"stw", "st">, Requires<[HasV9]>;
+def : MnemonicAlias<"stuw", "st">, Requires<[HasV9]>;
+def : MnemonicAlias<"stsw", "st">, Requires<[HasV9]>;
+
+def : MnemonicAlias<"stwa", "sta">, Requires<[HasV9]>;
+def : MnemonicAlias<"stuwa", "sta">, Requires<[HasV9]>;
+def : MnemonicAlias<"stswa", "sta">, Requires<[HasV9]>;
def : MnemonicAlias<"lduw", "ld">, Requires<[HasV9]>;
def : MnemonicAlias<"lduwa", "lda">, Requires<[HasV9]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td
index c67b591ab98a..3939f4ed9427 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -135,6 +135,14 @@ class F3_1_asi<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
let Inst{4-0} = rs2;
}
+// CAS instructions does not use an immediate even when i=1
+class F3_1_cas_asi<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : F3_1_asi<opVal, op3val, outs, ins, asmstr, pattern, itin> {
+ let asi = 0;
+ let Inst{13} = 1; // i field = 1
+}
+
class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin = IIC_iu_instr>
: F3_1_asi<opVal, op3val, outs, ins, asmstr, pattern, itin> {
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 9af8b17edcc5..5e792427cca2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -51,10 +51,14 @@ def HasVIS3 : Predicate<"Subtarget->isVIS3()">,
// point instructions.
def HasHardQuad : Predicate<"Subtarget->hasHardQuad()">;
-// HasLeonCASA - This is true when the target processor supports the CASA
-// instruction
+// HasLeonCASA - This is true when the target processor supports the Leon CASA
+// instruction.
def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
+// HasCASA - This is true when the target processor supports CASA instruction.
+def HasCASA : Predicate<"Subtarget->hasLeonCasa() || Subtarget->isV9()">,
+ AssemblerPredicate<(any_of LeonCASA, FeatureV9)>;
+
// HasPWRPSR - This is true when the target processor supports partial
// writes to the PSR register that only affects the ET field.
def HasPWRPSR : Predicate<"Subtarget->hasPWRPSR()">,
@@ -113,7 +117,7 @@ def SETHIimm_not : PatLeaf<(i32 imm), [{
// Addressing modes.
def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
+def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [], []>;
// Constrained operands for the shift operations.
class ShiftAmtImmAsmOperand<int Bits> : AsmOperandClass {
@@ -183,6 +187,16 @@ def MembarTag : Operand<i32> {
let ParserMatchClass = SparcMembarTagAsmOperand;
}
+def SparcASITagAsmOperand : AsmOperandClass {
+ let Name = "ASITag";
+ let ParserMethod = "parseASITag";
+}
+
+def ASITag : Operand<i32> {
+ let PrintMethod = "printASITag";
+ let ParserMatchClass = SparcASITagAsmOperand;
+}
+
// Branch targets have OtherVT type.
def brtarget : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
@@ -207,7 +221,8 @@ def calltarget : Operand<i32> {
let ParserMatchClass = SparcCallTargetAsmOperand;
}
-def simm13Op : Operand<i32> {
+def simm13Op : Operand<iPTR> {
+ let OperandType = "OPERAND_IMMEDIATE";
let DecoderMethod = "DecodeSIMM13";
let EncoderMethod = "getSImm13OpValue";
}
@@ -421,19 +436,26 @@ multiclass Load<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
// TODO: Instructions of the LoadASI class are currently asm only; hooking up
// CodeGen's address spaces to use these is a future task.
-class LoadASI<string OpcStr, bits<6> Op3Val, RegisterClass RC> :
- F3_1_asi<3, Op3Val, (outs RC:$rd), (ins (MEMrr $rs1, $rs2):$addr, i8imm:$asi),
- !strconcat(OpcStr, "a [$addr] $asi, $rd"),
- []>;
+multiclass LoadASI<string OpcStr, bits<6> Op3Val, RegisterClass RC> {
+ def rr : F3_1_asi<3, Op3Val, (outs RC:$rd), (ins (MEMrr $rs1, $rs2):$addr, ASITag:$asi),
+ !strconcat(OpcStr, "a [$addr] $asi, $rd"),
+ []>;
+
+ let Predicates = [HasV9], Uses = [ASR3] in
+ def ri : F3_2<3, Op3Val, (outs RC:$rd), (ins (MEMri $rs1, $simm13):$addr),
+ !strconcat(OpcStr, "a [$addr] %asi, $rd"),
+ []>;
+}
// LoadA multiclass - As above, but also define alternate address space variant
multiclass LoadA<string OpcStr, bits<6> Op3Val, bits<6> LoadAOp3Val,
SDPatternOperator OpNode, RegisterClass RC, ValueType Ty,
InstrItinClass itin = NoItinerary> :
Load<OpcStr, Op3Val, OpNode, RC, Ty, itin> {
- def Arr : LoadASI<OpcStr, LoadAOp3Val, RC>;
+ defm A : LoadASI<OpcStr, LoadAOp3Val, RC>;
}
+
// The LDSTUB instruction is supported for asm only.
// It is unlikely that general-purpose code could make use of it.
// CAS is preferred for sparc v9.
@@ -442,8 +464,12 @@ def LDSTUBrr : F3_1<3, 0b001101, (outs IntRegs:$rd), (ins (MEMrr $rs1, $rs2):$ad
def LDSTUBri : F3_2<3, 0b001101, (outs IntRegs:$rd), (ins (MEMri $rs1, $simm13):$addr),
"ldstub [$addr], $rd", []>;
def LDSTUBArr : F3_1_asi<3, 0b011101, (outs IntRegs:$rd),
- (ins (MEMrr $rs1, $rs2):$addr, i8imm:$asi),
+ (ins (MEMrr $rs1, $rs2):$addr, ASITag:$asi),
"ldstuba [$addr] $asi, $rd", []>;
+let Predicates = [HasV9], Uses = [ASR3] in
+def LDSTUBAri : F3_2<3, 0b011101, (outs IntRegs:$rd),
+ (ins (MEMri $rs1, $simm13):$addr),
+ "ldstuba [$addr] %asi, $rd", []>;
// Store multiclass - Define both Reg+Reg/Reg+Imm patterns in one shot.
multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
@@ -462,17 +488,24 @@ multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
// TODO: Instructions of the StoreASI class are currently asm only; hooking up
// CodeGen's address spaces to use these is a future task.
-class StoreASI<string OpcStr, bits<6> Op3Val, RegisterClass RC,
- InstrItinClass itin = IIC_st> :
- F3_1_asi<3, Op3Val, (outs), (ins (MEMrr $rs1, $rs2):$addr, RC:$rd, i8imm:$asi),
+multiclass StoreASI<string OpcStr, bits<6> Op3Val, RegisterClass RC,
+ InstrItinClass itin = IIC_st> {
+ def rr : F3_1_asi<3, Op3Val, (outs), (ins (MEMrr $rs1, $rs2):$addr, RC:$rd, ASITag:$asi),
!strconcat(OpcStr, "a $rd, [$addr] $asi"),
[],
itin>;
+ let Predicates = [HasV9], Uses = [ASR3] in
+ def ri : F3_2<3, Op3Val, (outs), (ins (MEMri $rs1, $simm13):$addr, RC:$rd),
+ !strconcat(OpcStr, "a $rd, [$addr] %asi"),
+ [],
+ itin>;
+}
+
multiclass StoreA<string OpcStr, bits<6> Op3Val, bits<6> StoreAOp3Val,
SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
Store<OpcStr, Op3Val, OpNode, RC, Ty> {
- def Arr : StoreASI<OpcStr, StoreAOp3Val, RC>;
+ defm A : StoreASI<OpcStr, StoreAOp3Val, RC>;
}
//===----------------------------------------------------------------------===//
@@ -587,18 +620,21 @@ defm LD : LoadA<"ld", 0b000000, 0b010000, load, IntRegs, i32>;
defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32, IIC_ldd>;
// Section B.2 - Load Floating-point Instructions, p. 92
-defm LDF : Load<"ld", 0b100000, load, FPRegs, f32, IIC_iu_or_fpu_instr>;
-def LDFArr : LoadASI<"ld", 0b110000, FPRegs>,
- Requires<[HasV9]>;
+defm LDF : Load<"ld", 0b100000, load, FPRegs, f32, IIC_iu_or_fpu_instr>;
+defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64, IIC_ldd>;
-defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64, IIC_ldd>;
-def LDDFArr : LoadASI<"ldd", 0b110011, DFPRegs>,
- Requires<[HasV9]>;
-defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>,
- Requires<[HasV9, HasHardQuad]>;
+let DecoderNamespace = "SparcV9", Predicates = [HasV9] in {
+ defm LDFA : LoadASI<"ld", 0b110000, FPRegs>;
+ defm LDDFA : LoadASI<"ldd", 0b110011, DFPRegs>;
+ defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>,
+ Requires<[HasHardQuad]>;
+}
-defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>;
-defm LDDC : Load<"ldd", 0b110011, load, CoprocPair, v2i32, IIC_ldd>;
+// Coprocessor instructions were removed in v9.
+let DecoderNamespace = "SparcV8", Predicates = [HasNoV9] in {
+ defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>;
+ defm LDDC : Load<"ldd", 0b110011, load, CoprocPair, v2i32, IIC_ldd>;
+}
let Defs = [CPSR] in {
let rd = 0 in {
@@ -641,16 +677,20 @@ defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>;
// Section B.5 - Store Floating-point Instructions, p. 97
defm STF : Store<"st", 0b100100, store, FPRegs, f32>;
-def STFArr : StoreASI<"st", 0b110100, FPRegs>,
- Requires<[HasV9]>;
defm STDF : Store<"std", 0b100111, store, DFPRegs, f64, IIC_std>;
-def STDFArr : StoreASI<"std", 0b110111, DFPRegs>,
- Requires<[HasV9]>;
-defm STQF : StoreA<"stq", 0b100110, 0b110110, store, QFPRegs, f128>,
- Requires<[HasV9, HasHardQuad]>;
-defm STC : Store<"st", 0b110100, store, CoprocRegs, i32>;
-defm STDC : Store<"std", 0b110111, store, CoprocPair, v2i32, IIC_std>;
+let DecoderNamespace = "SparcV9", Predicates = [HasV9] in {
+ defm STFA : StoreASI<"st", 0b110100, FPRegs>;
+ defm STDFA : StoreASI<"std", 0b110111, DFPRegs>;
+ defm STQF : StoreA<"stq", 0b100110, 0b110110, store, QFPRegs, f128>,
+ Requires<[HasHardQuad]>;
+}
+
+// Coprocessor instructions were removed in v9.
+let DecoderNamespace = "SparcV8", Predicates = [HasNoV9] in {
+ defm STC : Store<"st", 0b110100, store, CoprocRegs, i32>;
+ defm STDC : Store<"std", 0b110111, store, CoprocPair, v2i32, IIC_std>;
+}
let rd = 0 in {
let Defs = [CPSR] in {
@@ -700,9 +740,14 @@ let Constraints = "$val = $rd" in {
"swap [$addr], $rd",
[(set i32:$rd, (atomic_swap_32 ADDRri:$addr, i32:$val))]>;
def SWAPArr : F3_1_asi<3, 0b011111,
- (outs IntRegs:$rd), (ins (MEMrr $rs1, $rs2):$addr, i8imm:$asi, IntRegs:$val),
+ (outs IntRegs:$rd), (ins (MEMrr $rs1, $rs2):$addr, ASITag:$asi, IntRegs:$val),
"swapa [$addr] $asi, $rd",
[/*FIXME: pattern?*/]>;
+let Predicates = [HasV9], Uses = [ASR3] in
+ def SWAPAri : F3_2<3, 0b011111,
+ (outs IntRegs:$rd), (ins (MEMri $rs1, $simm13):$addr, IntRegs:$val),
+ "swapa [$addr] %asi, $rd",
+ [/*FIXME: pattern?*/]>;
}
@@ -771,13 +816,6 @@ defm SRA : F3_S<"sra", 0b100111, 0, sra, i32, shift_imm5, IntRegs>;
// Section B.13 - Add Instructions, p. 108
defm ADD : F3_12<"add", 0b000000, add, IntRegs, i32, simm13Op>;
-// "LEA" forms of add (patterns to make tblgen happy)
-let Predicates = [Is32Bit], isCodeGenOnly = 1 in
- def LEA_ADDri : F3_2<2, 0b000000,
- (outs IntRegs:$rd), (ins (MEMri $rs1, $simm13):$addr),
- "add ${addr:arith}, $rd",
- [(set iPTR:$rd, ADDRri:$addr)]>;
-
let Defs = [ICC] in
defm ADDCC : F3_12<"addcc", 0b010000, addc, IntRegs, i32, simm13Op>;
@@ -792,23 +830,14 @@ defm SUB : F3_12 <"sub" , 0b000100, sub, IntRegs, i32, simm13Op>;
let Uses = [ICC], Defs = [ICC] in
defm SUBE : F3_12 <"subxcc" , 0b011100, sube, IntRegs, i32, simm13Op>;
-let Defs = [ICC] in
+let Defs = [ICC], hasPostISelHook = true in
defm SUBCC : F3_12 <"subcc", 0b010100, subc, IntRegs, i32, simm13Op>;
let Uses = [ICC] in
defm SUBC : F3_12np <"subx", 0b001100>;
-// cmp (from Section A.3) is a specialized alias for subcc
-let Defs = [ICC], rd = 0 in {
- def CMPrr : F3_1<2, 0b010100,
- (outs), (ins IntRegs:$rs1, IntRegs:$rs2),
- "cmp $rs1, $rs2",
- [(SPcmpicc i32:$rs1, i32:$rs2)]>;
- def CMPri : F3_2<2, 0b010100,
- (outs), (ins IntRegs:$rs1, simm13Op:$simm13),
- "cmp $rs1, $simm13",
- [(SPcmpicc i32:$rs1, (i32 simm13:$simm13))]>;
-}
+def : Pat<(SPcmpicc i32:$lhs, i32:$rhs), (SUBCCrr $lhs, $rhs)>;
+def : Pat<(SPcmpicc i32:$lhs, (i32 simm13:$rhs)), (SUBCCri $lhs, imm:$rhs)>;
// Section B.18 - Multiply Instructions, p. 113
let Defs = [Y] in {
@@ -1123,14 +1152,6 @@ let Predicates = [HasNoV9] in {
"rd %tbr, $rd", []>;
}
-// PC don't exist on the SparcV8, only the V9.
-let Predicates = [HasV9] in {
- let rs2 = 0, rs1 = 5 in
- def RDPC : F3_1<2, 0b101000,
- (outs IntRegs:$rd), (ins),
- "rd %pc, $rd", []>;
-}
-
// Section B.29 - Write State Register Instructions
def WRASRrr : F3_1<2, 0b110000,
(outs ASRRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2),
@@ -1667,36 +1688,20 @@ let Predicates = [HasV9], rd = 15, rs1 = 0b00000 in
(ins simm13Op:$simm13),
"sir $simm13", []>;
-// The CAS instruction, unlike other instructions, only comes in a
-// form which requires an ASI be provided. The ASI value hardcoded
-// here is ASI_PRIMARY, the default unprivileged ASI for SparcV9.
-let Predicates = [HasV9], Constraints = "$swap = $rd", asi = 0b10000000 in
- def CASrr: F3_1_asi<3, 0b111100,
+// CASA supported on all V9, some LEON3 and all LEON4 processors.
+let Predicates = [HasCASA], Constraints = "$swap = $rd" in
+ def CASArr: F3_1_asi<3, 0b111100,
(outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2,
- IntRegs:$swap),
- "cas [$rs1], $rs2, $rd",
- [(set i32:$rd,
- (atomic_cmp_swap_32 iPTR:$rs1, i32:$rs2, i32:$swap))]>;
-
+ IntRegs:$swap, ASITag:$asi),
+ "casa [$rs1] $asi, $rs2, $rd", []>;
-// CASA is supported as an instruction on some LEON3 and all LEON4 processors.
-// This version can be automatically lowered from C code, selecting ASI 10
-let Predicates = [HasLeonCASA], Constraints = "$swap = $rd", asi = 0b00001010 in
- def CASAasi10: F3_1_asi<3, 0b111100,
+// On the other hand, CASA that takes its ASI from a register
+// is only supported on V9 processors.
+let Predicates = [HasV9], Uses = [ASR3], Constraints = "$swap = $rd" in
+ def CASAri: F3_1_cas_asi<3, 0b111100,
(outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2,
IntRegs:$swap),
- "casa [$rs1] 10, $rs2, $rd",
- [(set i32:$rd,
- (atomic_cmp_swap_32 iPTR:$rs1, i32:$rs2, i32:$swap))]>;
-
-// CASA supported on some LEON3 and all LEON4 processors. Same pattern as
-// CASrr, above, but with a different ASI. This version is supported for
-// inline assembly lowering only.
-let Predicates = [HasLeonCASA], Constraints = "$swap = $rd" in
- def CASArr: F3_1_asi<3, 0b111100,
- (outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2,
- IntRegs:$swap, i8imm:$asi),
- "casa [$rs1] $asi, $rs2, $rd", []>;
+ "casa [$rs1] %asi, $rs2, $rd", []>;
// TODO: Add DAG sequence to lower these instructions. Currently, only provided
// as inline assembler-supported instructions.
@@ -1777,6 +1782,15 @@ let rs2 = 0 in
def RDPR : F3_1<2, 0b101010,
(outs IntRegs:$rd), (ins PRRegs:$rs1),
"rdpr $rs1, $rd", []>;
+
+// Special case %fq as the register is also used in V8
+// (albeit with different instructions and encoding).
+// This allows us to reuse the register definition and
+// the "%fq" designation while giving it a different encoding.
+let Uses = [FQ], rs1 = 15, rs2 = 0 in
+ def RDFQ : F3_1<2, 0b101010,
+ (outs IntRegs:$rd), (ins),
+ "rdpr %fq, $rd", []>;
}
// Section A.62 - Write Privileged Register Instructions
@@ -1802,6 +1816,12 @@ def : Pat<(i32 simm13:$val),
def : Pat<(i32 imm:$val),
(ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+// Frame index.
+def to_tframeindex : SDNodeXForm<frameindex, [{
+ return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
+}]>;
+def : Pat<(i32 (frameindex:$ptr)), (ADDri (i32 (to_tframeindex $ptr)), (i32 0))>;
+def : Pat<(i64 (frameindex:$ptr)), (ADDri (i64 (to_tframeindex $ptr)), (i64 0))>;
// Global addresses, constant pool entries
let Predicates = [Is32Bit] in {
@@ -1868,12 +1888,21 @@ def : Pat<(i32 (atomic_load_32 ADDRrr:$src)), (LDrr ADDRrr:$src)>;
def : Pat<(i32 (atomic_load_32 ADDRri:$src)), (LDri ADDRri:$src)>;
// atomic_store val, addr -> store val, addr
-def : Pat<(atomic_store_8 ADDRrr:$dst, i32:$val), (STBrr ADDRrr:$dst, $val)>;
-def : Pat<(atomic_store_8 ADDRri:$dst, i32:$val), (STBri ADDRri:$dst, $val)>;
-def : Pat<(atomic_store_16 ADDRrr:$dst, i32:$val), (STHrr ADDRrr:$dst, $val)>;
-def : Pat<(atomic_store_16 ADDRri:$dst, i32:$val), (STHri ADDRri:$dst, $val)>;
-def : Pat<(atomic_store_32 ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>;
-def : Pat<(atomic_store_32 ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>;
+def : Pat<(atomic_store_8 i32:$val, ADDRrr:$dst), (STBrr ADDRrr:$dst, $val)>;
+def : Pat<(atomic_store_8 i32:$val, ADDRri:$dst), (STBri ADDRri:$dst, $val)>;
+def : Pat<(atomic_store_16 i32:$val, ADDRrr:$dst), (STHrr ADDRrr:$dst, $val)>;
+def : Pat<(atomic_store_16 i32:$val, ADDRri:$dst), (STHri ADDRri:$dst, $val)>;
+def : Pat<(atomic_store_32 i32:$val, ADDRrr:$dst), (STrr ADDRrr:$dst, $val)>;
+def : Pat<(atomic_store_32 i32:$val, ADDRri:$dst), (STri ADDRri:$dst, $val)>;
+
+let Predicates = [HasV9] in
+def : Pat<(atomic_cmp_swap_32 iPTR:$rs1, i32:$rs2, i32:$swap),
+ (CASArr $rs1, $rs2, $swap, 0x80)>;
+
+// Same pattern as CASArr above, but with a different ASI.
+let Predicates = [HasLeonCASA] in
+def : Pat<(atomic_cmp_swap_32 iPTR:$rs1, i32:$rs2, i32:$swap),
+ (CASArr $rs1, $rs2, $swap, 0x0A)>;
// A register pair with zero upper half.
// The upper part is done with ORrr instead of `COPY G0`
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td
index 947bbaed8c70..d5ba7464695c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -15,7 +15,8 @@ class SparcReg<bits<16> Enc, string n> : Register<n> {
let Namespace = "SP";
}
-class SparcCtrlReg<bits<16> Enc, string n>: Register<n> {
+class SparcCtrlReg<bits<16> Enc, string n,
+ list<string> altNames = []>: Register<n, altNames> {
let HWEncoding = Enc;
let Namespace = "SP";
}
@@ -27,6 +28,11 @@ def sub_even64 : SubRegIndex<64>;
def sub_odd64 : SubRegIndex<64, 64>;
}
+let Namespace = "SP",
+ FallbackRegAltNameIndex = NoRegAltName in {
+ def RegNamesStateReg : RegAltNameIndex;
+}
+
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers
class Ri<bits<16> Enc, string n> : SparcReg<Enc, n>;
@@ -55,270 +61,275 @@ class Rq<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
}
// Control Registers
-def ICC : SparcCtrlReg<0, "ICC">; // This represents icc and xcc in 64-bit code.
+def ICC : SparcCtrlReg<0, "icc">; // This represents icc and xcc in 64-bit code.
foreach I = 0-3 in
- def FCC#I : SparcCtrlReg<I, "FCC"#I>;
-
-def FSR : SparcCtrlReg<0, "FSR">; // Floating-point state register.
-
-def FQ : SparcCtrlReg<0, "FQ">; // Floating-point deferred-trap queue.
+ def FCC#I : SparcCtrlReg<I, "fcc"#I>;
-def CPSR : SparcCtrlReg<0, "CPSR">; // Co-processor state register.
-
-def CPQ : SparcCtrlReg<0, "CPQ">; // Co-processor queue.
+def FSR : SparcCtrlReg<0, "fsr">; // Floating-point state register.
+def FQ : SparcCtrlReg<0, "fq">; // Floating-point deferred-trap queue.
+def CPSR : SparcCtrlReg<0, "csr">; // Co-processor state register.
+def CPQ : SparcCtrlReg<0, "cq">; // Co-processor queue.
// Y register
-def Y : SparcCtrlReg<0, "Y">, DwarfRegNum<[64]>;
+def Y : SparcCtrlReg<0, "y">, DwarfRegNum<[64]>;
// Ancillary state registers (implementation defined)
-def ASR1 : SparcCtrlReg<1, "ASR1">;
-def ASR2 : SparcCtrlReg<2, "ASR2">;
-def ASR3 : SparcCtrlReg<3, "ASR3">;
-def ASR4 : SparcCtrlReg<4, "ASR4">;
-def ASR5 : SparcCtrlReg<5, "ASR5">;
-def ASR6 : SparcCtrlReg<6, "ASR6">;
-def ASR7 : SparcCtrlReg<7, "ASR7">;
-def ASR8 : SparcCtrlReg<8, "ASR8">;
-def ASR9 : SparcCtrlReg<9, "ASR9">;
-def ASR10 : SparcCtrlReg<10, "ASR10">;
-def ASR11 : SparcCtrlReg<11, "ASR11">;
-def ASR12 : SparcCtrlReg<12, "ASR12">;
-def ASR13 : SparcCtrlReg<13, "ASR13">;
-def ASR14 : SparcCtrlReg<14, "ASR14">;
-def ASR15 : SparcCtrlReg<15, "ASR15">;
-def ASR16 : SparcCtrlReg<16, "ASR16">;
-def ASR17 : SparcCtrlReg<17, "ASR17">;
-def ASR18 : SparcCtrlReg<18, "ASR18">;
-def ASR19 : SparcCtrlReg<19, "ASR19">;
-def ASR20 : SparcCtrlReg<20, "ASR20">;
-def ASR21 : SparcCtrlReg<21, "ASR21">;
-def ASR22 : SparcCtrlReg<22, "ASR22">;
-def ASR23 : SparcCtrlReg<23, "ASR23">;
-def ASR24 : SparcCtrlReg<24, "ASR24">;
-def ASR25 : SparcCtrlReg<25, "ASR25">;
-def ASR26 : SparcCtrlReg<26, "ASR26">;
-def ASR27 : SparcCtrlReg<27, "ASR27">;
-def ASR28 : SparcCtrlReg<28, "ASR28">;
-def ASR29 : SparcCtrlReg<29, "ASR29">;
-def ASR30 : SparcCtrlReg<30, "ASR30">;
-def ASR31 : SparcCtrlReg<31, "ASR31">;
+def ASR1 : SparcCtrlReg<1, "asr1">;
+let RegAltNameIndices = [RegNamesStateReg] in {
+// FIXME: Currently this results in the assembler accepting
+// the alternate names (%ccr, %asi, etc.) when targeting V8.
+// Make sure that the alternate names are available for V9 only:
+// %asr2-asr6 : valid on both V8 and V9.
+// %ccr, %asi, etc.: valid on V9, returns "no such register" error on V8.
+def ASR2 : SparcCtrlReg<2, "asr2", ["ccr"]>;
+def ASR3 : SparcCtrlReg<3, "asr3", ["asi"]>;
+def ASR4 : SparcCtrlReg<4, "asr4", ["tick"]>;
+def ASR5 : SparcCtrlReg<5, "asr5", ["pc"]>;
+def ASR6 : SparcCtrlReg<6, "asr6", ["fprs"]>;
+}
+def ASR7 : SparcCtrlReg< 7, "asr7">;
+def ASR8 : SparcCtrlReg< 8, "asr8">;
+def ASR9 : SparcCtrlReg< 9, "asr9">;
+def ASR10 : SparcCtrlReg<10, "asr10">;
+def ASR11 : SparcCtrlReg<11, "asr11">;
+def ASR12 : SparcCtrlReg<12, "asr12">;
+def ASR13 : SparcCtrlReg<13, "asr13">;
+def ASR14 : SparcCtrlReg<14, "asr14">;
+def ASR15 : SparcCtrlReg<15, "asr15">;
+def ASR16 : SparcCtrlReg<16, "asr16">;
+def ASR17 : SparcCtrlReg<17, "asr17">;
+def ASR18 : SparcCtrlReg<18, "asr18">;
+def ASR19 : SparcCtrlReg<19, "asr19">;
+def ASR20 : SparcCtrlReg<20, "asr20">;
+def ASR21 : SparcCtrlReg<21, "asr21">;
+def ASR22 : SparcCtrlReg<22, "asr22">;
+def ASR23 : SparcCtrlReg<23, "asr23">;
+def ASR24 : SparcCtrlReg<24, "asr24">;
+def ASR25 : SparcCtrlReg<25, "asr25">;
+def ASR26 : SparcCtrlReg<26, "asr26">;
+def ASR27 : SparcCtrlReg<27, "asr27">;
+def ASR28 : SparcCtrlReg<28, "asr28">;
+def ASR29 : SparcCtrlReg<29, "asr29">;
+def ASR30 : SparcCtrlReg<30, "asr30">;
+def ASR31 : SparcCtrlReg<31, "asr31">;
// Note that PSR, WIM, and TBR don't exist on the SparcV9, only the V8.
-def PSR : SparcCtrlReg<0, "PSR">;
-def WIM : SparcCtrlReg<0, "WIM">;
-def TBR : SparcCtrlReg<0, "TBR">;
-// PC on the other hand is only available for SparcV9.
-def PC : SparcCtrlReg<5, "PC">;
-
-def TPC : SparcCtrlReg<0, "TPC">;
-def TNPC : SparcCtrlReg<1, "TNPC">;
-def TSTATE : SparcCtrlReg<2, "TSTATE">;
-def TT : SparcCtrlReg<3, "TT">;
-def TICK : SparcCtrlReg<4, "TICK">;
-def TBA : SparcCtrlReg<5, "TBA">;
-def PSTATE : SparcCtrlReg<6, "PSTATE">;
-def TL : SparcCtrlReg<7, "TL">;
-def PIL : SparcCtrlReg<8, "PIL">;
-def CWP : SparcCtrlReg<9, "CWP">;
-def CANSAVE : SparcCtrlReg<10, "CANSAVE">;
-def CANRESTORE : SparcCtrlReg<11, "CANRESTORE">;
-def CLEANWIN : SparcCtrlReg<12, "CLEANWIN">;
-def OTHERWIN : SparcCtrlReg<13, "OTHERWIN">;
-def WSTATE : SparcCtrlReg<14, "WSTATE">;
+def PSR : SparcCtrlReg<0, "psr">;
+def WIM : SparcCtrlReg<0, "wim">;
+def TBR : SparcCtrlReg<0, "tbr">;
+
+// Privileged V9 state registers
+def TPC : SparcCtrlReg< 0, "tpc">;
+def TNPC : SparcCtrlReg< 1, "tnpc">;
+def TSTATE : SparcCtrlReg< 2, "tstate">;
+def TT : SparcCtrlReg< 3, "tt">;
+def TICK : SparcCtrlReg< 4, "tick">;
+def TBA : SparcCtrlReg< 5, "tba">;
+def PSTATE : SparcCtrlReg< 6, "pstate">;
+def TL : SparcCtrlReg< 7, "tl">;
+def PIL : SparcCtrlReg< 8, "pil">;
+def CWP : SparcCtrlReg< 9, "cwp">;
+def CANSAVE : SparcCtrlReg<10, "cansave">;
+def CANRESTORE : SparcCtrlReg<11, "canrestore">;
+def CLEANWIN : SparcCtrlReg<12, "cleanwin">;
+def OTHERWIN : SparcCtrlReg<13, "otherwin">;
+def WSTATE : SparcCtrlReg<14, "wstate">;
+def GL : SparcCtrlReg<16, "gl">;
+def VER : SparcCtrlReg<31, "ver">;
// Integer registers
-def G0 : Ri< 0, "G0">, DwarfRegNum<[0]> {
+def G0 : Ri< 0, "g0">, DwarfRegNum<[0]> {
let isConstant = true;
}
-def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
-def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>;
-def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>;
-def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>;
-def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>;
-def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>;
-def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>;
-def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>;
-def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>;
-def O2 : Ri<10, "O2">, DwarfRegNum<[10]>;
-def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
-def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
-def O5 : Ri<13, "O5">, DwarfRegNum<[13]>;
-def O6 : Ri<14, "SP">, DwarfRegNum<[14]>;
-def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
-def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
-def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
-def L2 : Ri<18, "L2">, DwarfRegNum<[18]>;
-def L3 : Ri<19, "L3">, DwarfRegNum<[19]>;
-def L4 : Ri<20, "L4">, DwarfRegNum<[20]>;
-def L5 : Ri<21, "L5">, DwarfRegNum<[21]>;
-def L6 : Ri<22, "L6">, DwarfRegNum<[22]>;
-def L7 : Ri<23, "L7">, DwarfRegNum<[23]>;
-def I0 : Ri<24, "I0">, DwarfRegNum<[24]>;
-def I1 : Ri<25, "I1">, DwarfRegNum<[25]>;
-def I2 : Ri<26, "I2">, DwarfRegNum<[26]>;
-def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
-def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
-def I5 : Ri<29, "I5">, DwarfRegNum<[29]>;
-def I6 : Ri<30, "FP">, DwarfRegNum<[30]>;
-def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
+def G1 : Ri< 1, "g1">, DwarfRegNum<[1]>;
+def G2 : Ri< 2, "g2">, DwarfRegNum<[2]>;
+def G3 : Ri< 3, "g3">, DwarfRegNum<[3]>;
+def G4 : Ri< 4, "g4">, DwarfRegNum<[4]>;
+def G5 : Ri< 5, "g5">, DwarfRegNum<[5]>;
+def G6 : Ri< 6, "g6">, DwarfRegNum<[6]>;
+def G7 : Ri< 7, "g7">, DwarfRegNum<[7]>;
+def O0 : Ri< 8, "o0">, DwarfRegNum<[8]>;
+def O1 : Ri< 9, "o1">, DwarfRegNum<[9]>;
+def O2 : Ri<10, "o2">, DwarfRegNum<[10]>;
+def O3 : Ri<11, "o3">, DwarfRegNum<[11]>;
+def O4 : Ri<12, "o4">, DwarfRegNum<[12]>;
+def O5 : Ri<13, "o5">, DwarfRegNum<[13]>;
+def O6 : Ri<14, "sp">, DwarfRegNum<[14]>;
+def O7 : Ri<15, "o7">, DwarfRegNum<[15]>;
+def L0 : Ri<16, "l0">, DwarfRegNum<[16]>;
+def L1 : Ri<17, "l1">, DwarfRegNum<[17]>;
+def L2 : Ri<18, "l2">, DwarfRegNum<[18]>;
+def L3 : Ri<19, "l3">, DwarfRegNum<[19]>;
+def L4 : Ri<20, "l4">, DwarfRegNum<[20]>;
+def L5 : Ri<21, "l5">, DwarfRegNum<[21]>;
+def L6 : Ri<22, "l6">, DwarfRegNum<[22]>;
+def L7 : Ri<23, "l7">, DwarfRegNum<[23]>;
+def I0 : Ri<24, "i0">, DwarfRegNum<[24]>;
+def I1 : Ri<25, "i1">, DwarfRegNum<[25]>;
+def I2 : Ri<26, "i2">, DwarfRegNum<[26]>;
+def I3 : Ri<27, "i3">, DwarfRegNum<[27]>;
+def I4 : Ri<28, "i4">, DwarfRegNum<[28]>;
+def I5 : Ri<29, "i5">, DwarfRegNum<[29]>;
+def I6 : Ri<30, "fp">, DwarfRegNum<[30]>;
+def I7 : Ri<31, "i7">, DwarfRegNum<[31]>;
// Floating-point registers
-def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>;
-def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>;
-def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>;
-def F3 : Rf< 3, "F3">, DwarfRegNum<[35]>;
-def F4 : Rf< 4, "F4">, DwarfRegNum<[36]>;
-def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>;
-def F6 : Rf< 6, "F6">, DwarfRegNum<[38]>;
-def F7 : Rf< 7, "F7">, DwarfRegNum<[39]>;
-def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>;
-def F9 : Rf< 9, "F9">, DwarfRegNum<[41]>;
-def F10 : Rf<10, "F10">, DwarfRegNum<[42]>;
-def F11 : Rf<11, "F11">, DwarfRegNum<[43]>;
-def F12 : Rf<12, "F12">, DwarfRegNum<[44]>;
-def F13 : Rf<13, "F13">, DwarfRegNum<[45]>;
-def F14 : Rf<14, "F14">, DwarfRegNum<[46]>;
-def F15 : Rf<15, "F15">, DwarfRegNum<[47]>;
-def F16 : Rf<16, "F16">, DwarfRegNum<[48]>;
-def F17 : Rf<17, "F17">, DwarfRegNum<[49]>;
-def F18 : Rf<18, "F18">, DwarfRegNum<[50]>;
-def F19 : Rf<19, "F19">, DwarfRegNum<[51]>;
-def F20 : Rf<20, "F20">, DwarfRegNum<[52]>;
-def F21 : Rf<21, "F21">, DwarfRegNum<[53]>;
-def F22 : Rf<22, "F22">, DwarfRegNum<[54]>;
-def F23 : Rf<23, "F23">, DwarfRegNum<[55]>;
-def F24 : Rf<24, "F24">, DwarfRegNum<[56]>;
-def F25 : Rf<25, "F25">, DwarfRegNum<[57]>;
-def F26 : Rf<26, "F26">, DwarfRegNum<[58]>;
-def F27 : Rf<27, "F27">, DwarfRegNum<[59]>;
-def F28 : Rf<28, "F28">, DwarfRegNum<[60]>;
-def F29 : Rf<29, "F29">, DwarfRegNum<[61]>;
-def F30 : Rf<30, "F30">, DwarfRegNum<[62]>;
-def F31 : Rf<31, "F31">, DwarfRegNum<[63]>;
+def F0 : Rf< 0, "f0">, DwarfRegNum<[32]>;
+def F1 : Rf< 1, "f1">, DwarfRegNum<[33]>;
+def F2 : Rf< 2, "f2">, DwarfRegNum<[34]>;
+def F3 : Rf< 3, "f3">, DwarfRegNum<[35]>;
+def F4 : Rf< 4, "f4">, DwarfRegNum<[36]>;
+def F5 : Rf< 5, "f5">, DwarfRegNum<[37]>;
+def F6 : Rf< 6, "f6">, DwarfRegNum<[38]>;
+def F7 : Rf< 7, "f7">, DwarfRegNum<[39]>;
+def F8 : Rf< 8, "f8">, DwarfRegNum<[40]>;
+def F9 : Rf< 9, "f9">, DwarfRegNum<[41]>;
+def F10 : Rf<10, "f10">, DwarfRegNum<[42]>;
+def F11 : Rf<11, "f11">, DwarfRegNum<[43]>;
+def F12 : Rf<12, "f12">, DwarfRegNum<[44]>;
+def F13 : Rf<13, "f13">, DwarfRegNum<[45]>;
+def F14 : Rf<14, "f14">, DwarfRegNum<[46]>;
+def F15 : Rf<15, "f15">, DwarfRegNum<[47]>;
+def F16 : Rf<16, "f16">, DwarfRegNum<[48]>;
+def F17 : Rf<17, "f17">, DwarfRegNum<[49]>;
+def F18 : Rf<18, "f18">, DwarfRegNum<[50]>;
+def F19 : Rf<19, "f19">, DwarfRegNum<[51]>;
+def F20 : Rf<20, "f20">, DwarfRegNum<[52]>;
+def F21 : Rf<21, "f21">, DwarfRegNum<[53]>;
+def F22 : Rf<22, "f22">, DwarfRegNum<[54]>;
+def F23 : Rf<23, "f23">, DwarfRegNum<[55]>;
+def F24 : Rf<24, "f24">, DwarfRegNum<[56]>;
+def F25 : Rf<25, "f25">, DwarfRegNum<[57]>;
+def F26 : Rf<26, "f26">, DwarfRegNum<[58]>;
+def F27 : Rf<27, "f27">, DwarfRegNum<[59]>;
+def F28 : Rf<28, "f28">, DwarfRegNum<[60]>;
+def F29 : Rf<29, "f29">, DwarfRegNum<[61]>;
+def F30 : Rf<30, "f30">, DwarfRegNum<[62]>;
+def F31 : Rf<31, "f31">, DwarfRegNum<[63]>;
// Aliases of the F* registers used to hold 64-bit fp values (doubles)
-def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[72]>;
-def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[73]>;
-def D2 : Rd< 4, "F4", [F4, F5]>, DwarfRegNum<[74]>;
-def D3 : Rd< 6, "F6", [F6, F7]>, DwarfRegNum<[75]>;
-def D4 : Rd< 8, "F8", [F8, F9]>, DwarfRegNum<[76]>;
-def D5 : Rd<10, "F10", [F10, F11]>, DwarfRegNum<[77]>;
-def D6 : Rd<12, "F12", [F12, F13]>, DwarfRegNum<[78]>;
-def D7 : Rd<14, "F14", [F14, F15]>, DwarfRegNum<[79]>;
-def D8 : Rd<16, "F16", [F16, F17]>, DwarfRegNum<[80]>;
-def D9 : Rd<18, "F18", [F18, F19]>, DwarfRegNum<[81]>;
-def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<[82]>;
-def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<[83]>;
-def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<[84]>;
-def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[85]>;
-def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[86]>;
-def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
+def D0 : Rd< 0, "f0", [F0, F1]>, DwarfRegNum<[72]>;
+def D1 : Rd< 2, "f2", [F2, F3]>, DwarfRegNum<[73]>;
+def D2 : Rd< 4, "f4", [F4, F5]>, DwarfRegNum<[74]>;
+def D3 : Rd< 6, "f6", [F6, F7]>, DwarfRegNum<[75]>;
+def D4 : Rd< 8, "f8", [F8, F9]>, DwarfRegNum<[76]>;
+def D5 : Rd<10, "f10", [F10, F11]>, DwarfRegNum<[77]>;
+def D6 : Rd<12, "f12", [F12, F13]>, DwarfRegNum<[78]>;
+def D7 : Rd<14, "f14", [F14, F15]>, DwarfRegNum<[79]>;
+def D8 : Rd<16, "f16", [F16, F17]>, DwarfRegNum<[80]>;
+def D9 : Rd<18, "f18", [F18, F19]>, DwarfRegNum<[81]>;
+def D10 : Rd<20, "f20", [F20, F21]>, DwarfRegNum<[82]>;
+def D11 : Rd<22, "f22", [F22, F23]>, DwarfRegNum<[83]>;
+def D12 : Rd<24, "f24", [F24, F25]>, DwarfRegNum<[84]>;
+def D13 : Rd<26, "f26", [F26, F27]>, DwarfRegNum<[85]>;
+def D14 : Rd<28, "f28", [F28, F29]>, DwarfRegNum<[86]>;
+def D15 : Rd<30, "f30", [F30, F31]>, DwarfRegNum<[87]>;
// Co-processor registers
-def C0 : Ri< 0, "C0">;
-def C1 : Ri< 1, "C1">;
-def C2 : Ri< 2, "C2">;
-def C3 : Ri< 3, "C3">;
-def C4 : Ri< 4, "C4">;
-def C5 : Ri< 5, "C5">;
-def C6 : Ri< 6, "C6">;
-def C7 : Ri< 7, "C7">;
-def C8 : Ri< 8, "C8">;
-def C9 : Ri< 9, "C9">;
-def C10 : Ri< 10, "C10">;
-def C11 : Ri< 11, "C11">;
-def C12 : Ri< 12, "C12">;
-def C13 : Ri< 13, "C13">;
-def C14 : Ri< 14, "C14">;
-def C15 : Ri< 15, "C15">;
-def C16 : Ri< 16, "C16">;
-def C17 : Ri< 17, "C17">;
-def C18 : Ri< 18, "C18">;
-def C19 : Ri< 19, "C19">;
-def C20 : Ri< 20, "C20">;
-def C21 : Ri< 21, "C21">;
-def C22 : Ri< 22, "C22">;
-def C23 : Ri< 23, "C23">;
-def C24 : Ri< 24, "C24">;
-def C25 : Ri< 25, "C25">;
-def C26 : Ri< 26, "C26">;
-def C27 : Ri< 27, "C27">;
-def C28 : Ri< 28, "C28">;
-def C29 : Ri< 29, "C29">;
-def C30 : Ri< 30, "C30">;
-def C31 : Ri< 31, "C31">;
+def C0 : Ri< 0, "c0">;
+def C1 : Ri< 1, "c1">;
+def C2 : Ri< 2, "c2">;
+def C3 : Ri< 3, "c3">;
+def C4 : Ri< 4, "c4">;
+def C5 : Ri< 5, "c5">;
+def C6 : Ri< 6, "c6">;
+def C7 : Ri< 7, "c7">;
+def C8 : Ri< 8, "c8">;
+def C9 : Ri< 9, "c9">;
+def C10 : Ri<10, "c10">;
+def C11 : Ri<11, "c11">;
+def C12 : Ri<12, "c12">;
+def C13 : Ri<13, "c13">;
+def C14 : Ri<14, "c14">;
+def C15 : Ri<15, "c15">;
+def C16 : Ri<16, "c16">;
+def C17 : Ri<17, "c17">;
+def C18 : Ri<18, "c18">;
+def C19 : Ri<19, "c19">;
+def C20 : Ri<20, "c20">;
+def C21 : Ri<21, "c21">;
+def C22 : Ri<22, "c22">;
+def C23 : Ri<23, "c23">;
+def C24 : Ri<24, "c24">;
+def C25 : Ri<25, "c25">;
+def C26 : Ri<26, "c26">;
+def C27 : Ri<27, "c27">;
+def C28 : Ri<28, "c28">;
+def C29 : Ri<29, "c29">;
+def C30 : Ri<30, "c30">;
+def C31 : Ri<31, "c31">;
// Unaliased double precision floating point registers.
// FIXME: Define DwarfRegNum for these registers.
-def D16 : SparcReg< 1, "F32">;
-def D17 : SparcReg< 3, "F34">;
-def D18 : SparcReg< 5, "F36">;
-def D19 : SparcReg< 7, "F38">;
-def D20 : SparcReg< 9, "F40">;
-def D21 : SparcReg<11, "F42">;
-def D22 : SparcReg<13, "F44">;
-def D23 : SparcReg<15, "F46">;
-def D24 : SparcReg<17, "F48">;
-def D25 : SparcReg<19, "F50">;
-def D26 : SparcReg<21, "F52">;
-def D27 : SparcReg<23, "F54">;
-def D28 : SparcReg<25, "F56">;
-def D29 : SparcReg<27, "F58">;
-def D30 : SparcReg<29, "F60">;
-def D31 : SparcReg<31, "F62">;
+def D16 : SparcReg< 1, "f32">;
+def D17 : SparcReg< 3, "f34">;
+def D18 : SparcReg< 5, "f36">;
+def D19 : SparcReg< 7, "f38">;
+def D20 : SparcReg< 9, "f40">;
+def D21 : SparcReg<11, "f42">;
+def D22 : SparcReg<13, "f44">;
+def D23 : SparcReg<15, "f46">;
+def D24 : SparcReg<17, "f48">;
+def D25 : SparcReg<19, "f50">;
+def D26 : SparcReg<21, "f52">;
+def D27 : SparcReg<23, "f54">;
+def D28 : SparcReg<25, "f56">;
+def D29 : SparcReg<27, "f58">;
+def D30 : SparcReg<29, "f60">;
+def D31 : SparcReg<31, "f62">;
// Aliases of the F* registers used to hold 128-bit for values (long doubles).
-def Q0 : Rq< 0, "F0", [D0, D1]>;
-def Q1 : Rq< 4, "F4", [D2, D3]>;
-def Q2 : Rq< 8, "F8", [D4, D5]>;
-def Q3 : Rq<12, "F12", [D6, D7]>;
-def Q4 : Rq<16, "F16", [D8, D9]>;
-def Q5 : Rq<20, "F20", [D10, D11]>;
-def Q6 : Rq<24, "F24", [D12, D13]>;
-def Q7 : Rq<28, "F28", [D14, D15]>;
-def Q8 : Rq< 1, "F32", [D16, D17]>;
-def Q9 : Rq< 5, "F36", [D18, D19]>;
-def Q10 : Rq< 9, "F40", [D20, D21]>;
-def Q11 : Rq<13, "F44", [D22, D23]>;
-def Q12 : Rq<17, "F48", [D24, D25]>;
-def Q13 : Rq<21, "F52", [D26, D27]>;
-def Q14 : Rq<25, "F56", [D28, D29]>;
-def Q15 : Rq<29, "F60", [D30, D31]>;
+def Q0 : Rq< 0, "f0", [D0, D1]>;
+def Q1 : Rq< 4, "f4", [D2, D3]>;
+def Q2 : Rq< 8, "f8", [D4, D5]>;
+def Q3 : Rq<12, "f12", [D6, D7]>;
+def Q4 : Rq<16, "f16", [D8, D9]>;
+def Q5 : Rq<20, "f20", [D10, D11]>;
+def Q6 : Rq<24, "f24", [D12, D13]>;
+def Q7 : Rq<28, "f28", [D14, D15]>;
+def Q8 : Rq< 1, "f32", [D16, D17]>;
+def Q9 : Rq< 5, "f36", [D18, D19]>;
+def Q10 : Rq< 9, "f40", [D20, D21]>;
+def Q11 : Rq<13, "f44", [D22, D23]>;
+def Q12 : Rq<17, "f48", [D24, D25]>;
+def Q13 : Rq<21, "f52", [D26, D27]>;
+def Q14 : Rq<25, "f56", [D28, D29]>;
+def Q15 : Rq<29, "f60", [D30, D31]>;
// Aliases of the integer registers used for LDD/STD double-word operations
-def G0_G1 : Rdi<0, "G0", [G0, G1]>;
-def G2_G3 : Rdi<2, "G2", [G2, G3]>;
-def G4_G5 : Rdi<4, "G4", [G4, G5]>;
-def G6_G7 : Rdi<6, "G6", [G6, G7]>;
-def O0_O1 : Rdi<8, "O0", [O0, O1]>;
-def O2_O3 : Rdi<10, "O2", [O2, O3]>;
-def O4_O5 : Rdi<12, "O4", [O4, O5]>;
-def O6_O7 : Rdi<14, "O6", [O6, O7]>;
-def L0_L1 : Rdi<16, "L0", [L0, L1]>;
-def L2_L3 : Rdi<18, "L2", [L2, L3]>;
-def L4_L5 : Rdi<20, "L4", [L4, L5]>;
-def L6_L7 : Rdi<22, "L6", [L6, L7]>;
-def I0_I1 : Rdi<24, "I0", [I0, I1]>;
-def I2_I3 : Rdi<26, "I2", [I2, I3]>;
-def I4_I5 : Rdi<28, "I4", [I4, I5]>;
-def I6_I7 : Rdi<30, "I6", [I6, I7]>;
+def G0_G1 : Rdi< 0, "g0", [G0, G1]>;
+def G2_G3 : Rdi< 2, "g2", [G2, G3]>;
+def G4_G5 : Rdi< 4, "g4", [G4, G5]>;
+def G6_G7 : Rdi< 6, "g6", [G6, G7]>;
+def O0_O1 : Rdi< 8, "o0", [O0, O1]>;
+def O2_O3 : Rdi<10, "o2", [O2, O3]>;
+def O4_O5 : Rdi<12, "o4", [O4, O5]>;
+def O6_O7 : Rdi<14, "o6", [O6, O7]>;
+def L0_L1 : Rdi<16, "l0", [L0, L1]>;
+def L2_L3 : Rdi<18, "l2", [L2, L3]>;
+def L4_L5 : Rdi<20, "l4", [L4, L5]>;
+def L6_L7 : Rdi<22, "l6", [L6, L7]>;
+def I0_I1 : Rdi<24, "i0", [I0, I1]>;
+def I2_I3 : Rdi<26, "i2", [I2, I3]>;
+def I4_I5 : Rdi<28, "i4", [I4, I5]>;
+def I6_I7 : Rdi<30, "i6", [I6, I7]>;
// Aliases of the co-processor registers used for LDD/STD double-word operations
-def C0_C1 : Rdi<0, "C0", [C0, C1]>;
-def C2_C3 : Rdi<2, "C2", [C2, C3]>;
-def C4_C5 : Rdi<4, "C4", [C4, C5]>;
-def C6_C7 : Rdi<6, "C6", [C6, C7]>;
-def C8_C9 : Rdi<8, "C8", [C8, C9]>;
-def C10_C11 : Rdi<10, "C10", [C10, C11]>;
-def C12_C13 : Rdi<12, "C12", [C12, C13]>;
-def C14_C15 : Rdi<14, "C14", [C14, C15]>;
-def C16_C17 : Rdi<16, "C16", [C16, C17]>;
-def C18_C19 : Rdi<18, "C18", [C18, C19]>;
-def C20_C21 : Rdi<20, "C20", [C20, C21]>;
-def C22_C23 : Rdi<22, "C22", [C22, C23]>;
-def C24_C25 : Rdi<24, "C24", [C24, C25]>;
-def C26_C27 : Rdi<26, "C26", [C26, C27]>;
-def C28_C29 : Rdi<28, "C28", [C28, C29]>;
-def C30_C31 : Rdi<30, "C30", [C30, C31]>;
+def C0_C1 : Rdi< 0, "c0", [C0, C1]>;
+def C2_C3 : Rdi< 2, "c2", [C2, C3]>;
+def C4_C5 : Rdi< 4, "c4", [C4, C5]>;
+def C6_C7 : Rdi< 6, "c6", [C6, C7]>;
+def C8_C9 : Rdi< 8, "c8", [C8, C9]>;
+def C10_C11 : Rdi<10, "c10", [C10, C11]>;
+def C12_C13 : Rdi<12, "c12", [C12, C13]>;
+def C14_C15 : Rdi<14, "c14", [C14, C15]>;
+def C16_C17 : Rdi<16, "c16", [C16, C17]>;
+def C18_C19 : Rdi<18, "c18", [C18, C19]>;
+def C20_C21 : Rdi<20, "c20", [C20, C21]>;
+def C22_C23 : Rdi<22, "c22", [C22, C23]>;
+def C24_C25 : Rdi<24, "c24", [C24, C25]>;
+def C26_C27 : Rdi<26, "c26", [C26, C27]>;
+def C28_C29 : Rdi<28, "c28", [C28, C29]>;
+def C30_C31 : Rdi<30, "c30", [C30, C31]>;
// Register classes.
//
@@ -361,8 +372,11 @@ def FCCRegs : RegisterClass<"SP", [i1], 1, (sequence "FCC%u", 0, 3)>;
let isAllocatable = 0 in {
// Ancillary state registers
+ // FIXME: TICK is special-cased here as it can be accessed
+ // from the ASR (as ASR4) or the privileged register set.
+ // For now this is required for the parser to work.
def ASRRegs : RegisterClass<"SP", [i32], 32,
- (add Y, (sequence "ASR%u", 1, 31))>;
+ (add Y, TICK, (sequence "ASR%u", 1, 31))>;
// This register class should not be used to hold i64 values.
def CoprocRegs : RegisterClass<"SP", [i32], 32,
@@ -379,5 +393,4 @@ let isAllocatable = 0 in {
// Privileged Registers
def PRRegs : RegisterClass<"SP", [i64], 64,
(add TPC, TNPC, TSTATE, TT, TICK, TBA, PSTATE, TL, PIL, CWP,
- CANSAVE, CANRESTORE, CLEANWIN, OTHERWIN, WSTATE)>;
-
+ CANSAVE, CANRESTORE, CLEANWIN, OTHERWIN, WSTATE, GL, VER)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 577dc1351de9..dbc26636e39f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -100,16 +100,14 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT,
+ CodeGenOptLevel OL, bool JIT,
bool is64bit)
: LLVMTargetMachine(T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
getEffectiveSparcCodeModel(
CM, getEffectiveRelocModel(RM), is64bit, JIT),
OL),
- TLOF(std::make_unique<SparcELFTargetObjectFile>()),
- Subtarget(TT, std::string(CPU), std::string(FS), *this, is64bit),
- is64Bit(is64bit) {
+ TLOF(std::make_unique<SparcELFTargetObjectFile>()), is64Bit(is64bit) {
initAsmInfo();
}
@@ -189,18 +187,9 @@ void SparcPassConfig::addPreEmitPass(){
addPass(&BranchRelaxationPassID);
addPass(createSparcDelaySlotFillerPass());
-
- if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad())
- {
- addPass(new InsertNOPLoad());
- }
- if (this->getSparcTargetMachine().getSubtargetImpl()->detectRoundChange()) {
- addPass(new DetectRoundChange());
- }
- if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT())
- {
- addPass(new FixAllFDIVSQRT());
- }
+ addPass(new InsertNOPLoad());
+ addPass(new DetectRoundChange());
+ addPass(new FixAllFDIVSQRT());
}
void SparcV8TargetMachine::anchor() { }
@@ -210,7 +199,7 @@ SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
void SparcV9TargetMachine::anchor() { }
@@ -220,7 +209,7 @@ SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
void SparcelTargetMachine::anchor() {}
@@ -230,5 +219,5 @@ SparcelTargetMachine::SparcelTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.h
index 6e3c8e7df43f..497d5f6623cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -22,7 +22,6 @@ namespace llvm {
class SparcTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- SparcSubtarget Subtarget;
bool is64Bit;
mutable StringMap<std::unique_ptr<SparcSubtarget>> SubtargetMap;
@@ -30,12 +29,11 @@ public:
SparcTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT, bool is64bit);
~SparcTargetMachine() override;
- const SparcSubtarget *getSubtargetImpl() const { return &Subtarget; }
- const SparcSubtarget *getSubtargetImpl(const Function &) const override;
+ const SparcSubtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
@@ -57,7 +55,7 @@ public:
SparcV8TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
};
@@ -70,7 +68,7 @@ public:
SparcV9TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
};
@@ -81,7 +79,7 @@ public:
SparcelTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index dc4f2a438c9f..a58e8e0dfedf 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -154,8 +154,8 @@ private:
}
public:
- SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc)
- : Kind(kind), StartLoc(startLoc), EndLoc(endLoc) {}
+ SystemZOperand(OperandKind Kind, SMLoc StartLoc, SMLoc EndLoc)
+ : Kind(Kind), StartLoc(StartLoc), EndLoc(EndLoc) {}
// Create particular kinds of operand.
static std::unique_ptr<SystemZOperand> createInvalid(SMLoc StartLoc,
@@ -420,10 +420,9 @@ private:
bool parseIntegerRegister(Register &Reg, RegisterGroup Group);
- OperandMatchResultTy parseRegister(OperandVector &Operands,
- RegisterKind Kind);
+ ParseStatus parseRegister(OperandVector &Operands, RegisterKind Kind);
- OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
+ ParseStatus parseAnyRegister(OperandVector &Operands);
bool parseAddress(bool &HaveReg1, Register &Reg1, bool &HaveReg2,
Register &Reg2, const MCExpr *&Disp, const MCExpr *&Length,
@@ -434,12 +433,11 @@ private:
bool ParseDirectiveMachine(SMLoc L);
bool ParseGNUAttribute(SMLoc L);
- OperandMatchResultTy parseAddress(OperandVector &Operands,
- MemoryKind MemKind,
- RegisterKind RegKind);
+ ParseStatus parseAddress(OperandVector &Operands, MemoryKind MemKind,
+ RegisterKind RegKind);
- OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
- int64_t MaxVal, bool AllowTLS);
+ ParseStatus parsePCRel(OperandVector &Operands, int64_t MinVal,
+ int64_t MaxVal, bool AllowTLS);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
@@ -495,12 +493,11 @@ public:
// Override MCTargetAsmParser.
ParseStatus parseDirective(AsmToken DirectiveID) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
bool RestoreOnFailure);
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -510,96 +507,96 @@ public:
bool isLabel(AsmToken &Token) override;
// Used by the TableGen code to parse particular operand types.
- OperandMatchResultTy parseGR32(OperandVector &Operands) {
+ ParseStatus parseGR32(OperandVector &Operands) {
return parseRegister(Operands, GR32Reg);
}
- OperandMatchResultTy parseGRH32(OperandVector &Operands) {
+ ParseStatus parseGRH32(OperandVector &Operands) {
return parseRegister(Operands, GRH32Reg);
}
- OperandMatchResultTy parseGRX32(OperandVector &Operands) {
+ ParseStatus parseGRX32(OperandVector &Operands) {
llvm_unreachable("GRX32 should only be used for pseudo instructions");
}
- OperandMatchResultTy parseGR64(OperandVector &Operands) {
+ ParseStatus parseGR64(OperandVector &Operands) {
return parseRegister(Operands, GR64Reg);
}
- OperandMatchResultTy parseGR128(OperandVector &Operands) {
+ ParseStatus parseGR128(OperandVector &Operands) {
return parseRegister(Operands, GR128Reg);
}
- OperandMatchResultTy parseADDR32(OperandVector &Operands) {
+ ParseStatus parseADDR32(OperandVector &Operands) {
// For the AsmParser, we will accept %r0 for ADDR32 as well.
return parseRegister(Operands, GR32Reg);
}
- OperandMatchResultTy parseADDR64(OperandVector &Operands) {
+ ParseStatus parseADDR64(OperandVector &Operands) {
// For the AsmParser, we will accept %r0 for ADDR64 as well.
return parseRegister(Operands, GR64Reg);
}
- OperandMatchResultTy parseADDR128(OperandVector &Operands) {
+ ParseStatus parseADDR128(OperandVector &Operands) {
llvm_unreachable("Shouldn't be used as an operand");
}
- OperandMatchResultTy parseFP32(OperandVector &Operands) {
+ ParseStatus parseFP32(OperandVector &Operands) {
return parseRegister(Operands, FP32Reg);
}
- OperandMatchResultTy parseFP64(OperandVector &Operands) {
+ ParseStatus parseFP64(OperandVector &Operands) {
return parseRegister(Operands, FP64Reg);
}
- OperandMatchResultTy parseFP128(OperandVector &Operands) {
+ ParseStatus parseFP128(OperandVector &Operands) {
return parseRegister(Operands, FP128Reg);
}
- OperandMatchResultTy parseVR32(OperandVector &Operands) {
+ ParseStatus parseVR32(OperandVector &Operands) {
return parseRegister(Operands, VR32Reg);
}
- OperandMatchResultTy parseVR64(OperandVector &Operands) {
+ ParseStatus parseVR64(OperandVector &Operands) {
return parseRegister(Operands, VR64Reg);
}
- OperandMatchResultTy parseVF128(OperandVector &Operands) {
+ ParseStatus parseVF128(OperandVector &Operands) {
llvm_unreachable("Shouldn't be used as an operand");
}
- OperandMatchResultTy parseVR128(OperandVector &Operands) {
+ ParseStatus parseVR128(OperandVector &Operands) {
return parseRegister(Operands, VR128Reg);
}
- OperandMatchResultTy parseAR32(OperandVector &Operands) {
+ ParseStatus parseAR32(OperandVector &Operands) {
return parseRegister(Operands, AR32Reg);
}
- OperandMatchResultTy parseCR64(OperandVector &Operands) {
+ ParseStatus parseCR64(OperandVector &Operands) {
return parseRegister(Operands, CR64Reg);
}
- OperandMatchResultTy parseAnyReg(OperandVector &Operands) {
+ ParseStatus parseAnyReg(OperandVector &Operands) {
return parseAnyRegister(Operands);
}
- OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
+ ParseStatus parseBDAddr32(OperandVector &Operands) {
return parseAddress(Operands, BDMem, GR32Reg);
}
- OperandMatchResultTy parseBDAddr64(OperandVector &Operands) {
+ ParseStatus parseBDAddr64(OperandVector &Operands) {
return parseAddress(Operands, BDMem, GR64Reg);
}
- OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) {
+ ParseStatus parseBDXAddr64(OperandVector &Operands) {
return parseAddress(Operands, BDXMem, GR64Reg);
}
- OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
+ ParseStatus parseBDLAddr64(OperandVector &Operands) {
return parseAddress(Operands, BDLMem, GR64Reg);
}
- OperandMatchResultTy parseBDRAddr64(OperandVector &Operands) {
+ ParseStatus parseBDRAddr64(OperandVector &Operands) {
return parseAddress(Operands, BDRMem, GR64Reg);
}
- OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) {
+ ParseStatus parseBDVAddr64(OperandVector &Operands) {
return parseAddress(Operands, BDVMem, GR64Reg);
}
- OperandMatchResultTy parsePCRel12(OperandVector &Operands) {
+ ParseStatus parsePCRel12(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false);
}
- OperandMatchResultTy parsePCRel16(OperandVector &Operands) {
+ ParseStatus parsePCRel16(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false);
}
- OperandMatchResultTy parsePCRel24(OperandVector &Operands) {
+ ParseStatus parsePCRel24(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 24), (1LL << 24) - 1, false);
}
- OperandMatchResultTy parsePCRel32(OperandVector &Operands) {
+ ParseStatus parsePCRel32(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false);
}
- OperandMatchResultTy parsePCRelTLS16(OperandVector &Operands) {
+ ParseStatus parsePCRelTLS16(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true);
}
- OperandMatchResultTy parsePCRelTLS32(OperandVector &Operands) {
+ ParseStatus parsePCRelTLS32(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true);
}
};
@@ -814,8 +811,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg, bool RestoreOnFailure) {
}
// Parse a register of kind Kind and add it to Operands.
-OperandMatchResultTy
-SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) {
+ParseStatus SystemZAsmParser::parseRegister(OperandVector &Operands,
+ RegisterKind Kind) {
Register Reg;
RegisterGroup Group;
switch (Kind) {
@@ -846,7 +843,7 @@ SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) {
// Handle register names of the form %<prefix><number>
if (isParsingATT() && Parser.getTok().is(AsmToken::Percent)) {
if (parseRegister(Reg))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
// Check the parsed register group "Reg.Group" with the expected "Group"
// Have to error out if user specified wrong prefix.
@@ -855,25 +852,21 @@ SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) {
case RegFP:
case RegAR:
case RegCR:
- if (Group != Reg.Group) {
- Error(Reg.StartLoc, "invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (Group != Reg.Group)
+ return Error(Reg.StartLoc, "invalid operand for instruction");
break;
case RegV:
- if (Reg.Group != RegV && Reg.Group != RegFP) {
- Error(Reg.StartLoc, "invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (Reg.Group != RegV && Reg.Group != RegFP)
+ return Error(Reg.StartLoc, "invalid operand for instruction");
break;
}
} else if (Parser.getTok().is(AsmToken::Integer)) {
if (parseIntegerRegister(Reg, Group))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
// Otherwise we didn't match a register operand.
else
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
// Determine the LLVM register number according to Kind.
const unsigned *Regs;
@@ -891,33 +884,28 @@ SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) {
case AR32Reg: Regs = SystemZMC::AR32Regs; break;
case CR64Reg: Regs = SystemZMC::CR64Regs; break;
}
- if (Regs[Reg.Num] == 0) {
- Error(Reg.StartLoc, "invalid register pair");
- return MatchOperand_ParseFail;
- }
+ if (Regs[Reg.Num] == 0)
+ return Error(Reg.StartLoc, "invalid register pair");
Operands.push_back(
SystemZOperand::createReg(Kind, Regs[Reg.Num], Reg.StartLoc, Reg.EndLoc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Parse any type of register (including integers) and add it to Operands.
-OperandMatchResultTy
-SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
+ParseStatus SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
SMLoc StartLoc = Parser.getTok().getLoc();
// Handle integer values.
if (Parser.getTok().is(AsmToken::Integer)) {
const MCExpr *Register;
if (Parser.parseExpression(Register))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (auto *CE = dyn_cast<MCConstantExpr>(Register)) {
int64_t Value = CE->getValue();
- if (Value < 0 || Value > 15) {
- Error(StartLoc, "invalid register");
- return MatchOperand_ParseFail;
- }
+ if (Value < 0 || Value > 15)
+ return Error(StartLoc, "invalid register");
}
SMLoc EndLoc =
@@ -927,16 +915,14 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
}
else {
if (isParsingHLASM())
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Register Reg;
if (parseRegister(Reg))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
- if (Reg.Num > 15) {
- Error(StartLoc, "invalid register");
- return MatchOperand_ParseFail;
- }
+ if (Reg.Num > 15)
+ return Error(StartLoc, "invalid register");
// Map to the correct register kind.
RegisterKind Kind;
@@ -962,13 +948,13 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
RegNo = SystemZMC::CR64Regs[Reg.Num];
}
else {
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
Operands.push_back(SystemZOperand::createReg(Kind, RegNo,
Reg.StartLoc, Reg.EndLoc));
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool SystemZAsmParser::parseIntegerRegister(Register &Reg,
@@ -1098,7 +1084,8 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) {
if (Reg.Group == RegV) {
Error(Reg.StartLoc, "invalid use of vector addressing");
return true;
- } else if (Reg.Group != RegGR) {
+ }
+ if (Reg.Group != RegGR) {
Error(Reg.StartLoc, "invalid address register");
return true;
}
@@ -1107,9 +1094,9 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) {
// Parse a memory operand and add it to Operands. The other arguments
// are as above.
-OperandMatchResultTy
-SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
- RegisterKind RegKind) {
+ParseStatus SystemZAsmParser::parseAddress(OperandVector &Operands,
+ MemoryKind MemKind,
+ RegisterKind RegKind) {
SMLoc StartLoc = Parser.getTok().getLoc();
unsigned Base = 0, Index = 0, LengthReg = 0;
Register Reg1, Reg2;
@@ -1121,7 +1108,7 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
bool HasVectorIndex = (MemKind == BDVMem) ? true : false;
if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length, HasLength,
HasVectorIndex))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const unsigned *Regs;
switch (RegKind) {
@@ -1135,78 +1122,68 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
// If we have Reg1, it must be an address register.
if (HaveReg1) {
if (parseAddressRegister(Reg1))
- return MatchOperand_ParseFail;
- Base = Regs[Reg1.Num];
+ return ParseStatus::Failure;
+ Base = Reg1.Num == 0 ? 0 : Regs[Reg1.Num];
}
// There must be no Reg2.
- if (HaveReg2) {
- Error(StartLoc, "invalid use of indexed addressing");
- return MatchOperand_ParseFail;
- }
+ if (HaveReg2)
+ return Error(StartLoc, "invalid use of indexed addressing");
break;
case BDXMem:
// If we have Reg1, it must be an address register.
if (HaveReg1) {
if (parseAddressRegister(Reg1))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
// If the are two registers, the first one is the index and the
// second is the base.
if (HaveReg2)
- Index = Regs[Reg1.Num];
+ Index = Reg1.Num == 0 ? 0 : Regs[Reg1.Num];
else
- Base = Regs[Reg1.Num];
+ Base = Reg1.Num == 0 ? 0 : Regs[Reg1.Num];
}
// If we have Reg2, it must be an address register.
if (HaveReg2) {
if (parseAddressRegister(Reg2))
- return MatchOperand_ParseFail;
- Base = Regs[Reg2.Num];
+ return ParseStatus::Failure;
+ Base = Reg2.Num == 0 ? 0 : Regs[Reg2.Num];
}
break;
case BDLMem:
// If we have Reg2, it must be an address register.
if (HaveReg2) {
if (parseAddressRegister(Reg2))
- return MatchOperand_ParseFail;
- Base = Regs[Reg2.Num];
+ return ParseStatus::Failure;
+ Base = Reg2.Num == 0 ? 0 : Regs[Reg2.Num];
}
// We cannot support base+index addressing.
- if (HaveReg1 && HaveReg2) {
- Error(StartLoc, "invalid use of indexed addressing");
- return MatchOperand_ParseFail;
- }
+ if (HaveReg1 && HaveReg2)
+ return Error(StartLoc, "invalid use of indexed addressing");
// We must have a length.
- if (!Length) {
- Error(StartLoc, "missing length in address");
- return MatchOperand_ParseFail;
- }
+ if (!Length)
+ return Error(StartLoc, "missing length in address");
break;
case BDRMem:
// We must have Reg1, and it must be a GPR.
- if (!HaveReg1 || Reg1.Group != RegGR) {
- Error(StartLoc, "invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (!HaveReg1 || Reg1.Group != RegGR)
+ return Error(StartLoc, "invalid operand for instruction");
LengthReg = SystemZMC::GR64Regs[Reg1.Num];
// If we have Reg2, it must be an address register.
if (HaveReg2) {
if (parseAddressRegister(Reg2))
- return MatchOperand_ParseFail;
- Base = Regs[Reg2.Num];
+ return ParseStatus::Failure;
+ Base = Reg2.Num == 0 ? 0 : Regs[Reg2.Num];
}
break;
case BDVMem:
// We must have Reg1, and it must be a vector register.
- if (!HaveReg1 || Reg1.Group != RegV) {
- Error(StartLoc, "vector index required in address");
- return MatchOperand_ParseFail;
- }
+ if (!HaveReg1 || Reg1.Group != RegV)
+ return Error(StartLoc, "vector index required in address");
Index = SystemZMC::VR128Regs[Reg1.Num];
// If we have Reg2, it must be an address register.
if (HaveReg2) {
if (parseAddressRegister(Reg2))
- return MatchOperand_ParseFail;
- Base = Regs[Reg2.Num];
+ return ParseStatus::Failure;
+ Base = Reg2.Num == 0 ? 0 : Regs[Reg2.Num];
}
break;
}
@@ -1216,7 +1193,7 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp,
Index, Length, LengthReg,
StartLoc, EndLoc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
ParseStatus SystemZAsmParser::parseDirective(AsmToken DirectiveID) {
@@ -1226,7 +1203,7 @@ ParseStatus SystemZAsmParser::parseDirective(AsmToken DirectiveID) {
return ParseDirectiveInsn(DirectiveID.getLoc());
if (IDVal == ".machine")
return ParseDirectiveMachine(DirectiveID.getLoc());
- if (IDVal.startswith(".gnu_attribute"))
+ if (IDVal.starts_with(".gnu_attribute"))
return ParseGNUAttribute(DirectiveID.getLoc());
return ParseStatus::NoMatch;
@@ -1260,8 +1237,8 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
assert(Entry->Format == Format);
// Parse the following operands using the table's information.
- for (int i = 0; i < Entry->NumOperands; i++) {
- MatchClassKind Kind = Entry->OperandKinds[i];
+ for (int I = 0; I < Entry->NumOperands; I++) {
+ MatchClassKind Kind = Entry->OperandKinds[I];
SMLoc StartLoc = Parser.getTok().getLoc();
@@ -1271,7 +1248,7 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
Lex();
// Parse operands.
- OperandMatchResultTy ResTy;
+ ParseStatus ResTy;
if (Kind == MCK_AnyReg)
ResTy = parseAnyReg(Operands);
else if (Kind == MCK_VR128)
@@ -1299,19 +1276,19 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
- ResTy = MatchOperand_Success;
+ ResTy = ParseStatus::Success;
}
- if (ResTy != MatchOperand_Success)
+ if (!ResTy.isSuccess())
return true;
}
// Build the instruction with the parsed operands.
MCInst Inst = MCInstBuilder(Entry->Opcode);
- for (size_t i = 0; i < Operands.size(); i++) {
- MCParsedAsmOperand &Operand = *Operands[i];
- MatchClassKind Kind = Entry->OperandKinds[i];
+ for (size_t I = 0; I < Operands.size(); I++) {
+ MCParsedAsmOperand &Operand = *Operands[I];
+ MatchClassKind Kind = Entry->OperandKinds[I];
// Verify operand.
unsigned Res = validateOperandClass(Operand, Kind);
@@ -1397,23 +1374,21 @@ bool SystemZAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
return false;
}
-bool SystemZAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool SystemZAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
+ return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
}
-OperandMatchResultTy SystemZAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
- bool Result =
- ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
+ParseStatus SystemZAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
bool PendingErrors = getParser().hasPendingError();
getParser().clearPendingErrors();
if (PendingErrors)
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (Result)
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
}
bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
@@ -1490,15 +1465,15 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
FeatureBitset All;
All.set();
setAvailableFeatures(All);
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ ParseStatus Res = MatchOperandParserImpl(Operands, Mnemonic);
setAvailableFeatures(AvailableFeatures);
- if (ResTy == MatchOperand_Success)
+ if (Res.isSuccess())
return false;
// If there wasn't a custom match, try the generic matcher below. Otherwise,
// there was a match, but an error occurred, in which case, just return that
// the operand parsing failed.
- if (ResTy == MatchOperand_ParseFail)
+ if (Res.isFailure())
return true;
// Check for a register. All real register operands should have used
@@ -1599,17 +1574,17 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Unexpected match type");
}
-OperandMatchResultTy
-SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
- int64_t MaxVal, bool AllowTLS) {
+ParseStatus SystemZAsmParser::parsePCRel(OperandVector &Operands,
+ int64_t MinVal, int64_t MaxVal,
+ bool AllowTLS) {
MCContext &Ctx = getContext();
MCStreamer &Out = getStreamer();
const MCExpr *Expr;
SMLoc StartLoc = Parser.getTok().getLoc();
if (getParser().parseExpression(Expr))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
- auto isOutOfRangeConstant = [&](const MCExpr *E, bool Negate) -> bool {
+ auto IsOutOfRangeConstant = [&](const MCExpr *E, bool Negate) -> bool {
if (auto *CE = dyn_cast<MCConstantExpr>(E)) {
int64_t Value = CE->getValue();
if (Negate)
@@ -1623,14 +1598,10 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
// For consistency with the GNU assembler, treat immediates as offsets
// from ".".
if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
- if (isParsingHLASM()) {
- Error(StartLoc, "Expected PC-relative expression");
- return MatchOperand_ParseFail;
- }
- if (isOutOfRangeConstant(CE, false)) {
- Error(StartLoc, "offset out of range");
- return MatchOperand_ParseFail;
- }
+ if (isParsingHLASM())
+ return Error(StartLoc, "Expected PC-relative expression");
+ if (IsOutOfRangeConstant(CE, false))
+ return Error(StartLoc, "offset out of range");
int64_t Value = CE->getValue();
MCSymbol *Sym = Ctx.createTempSymbol();
Out.emitLabel(Sym);
@@ -1642,22 +1613,18 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
// For consistency with the GNU assembler, conservatively assume that a
// constant offset must by itself be within the given size range.
if (const auto *BE = dyn_cast<MCBinaryExpr>(Expr))
- if (isOutOfRangeConstant(BE->getLHS(), false) ||
- isOutOfRangeConstant(BE->getRHS(),
- BE->getOpcode() == MCBinaryExpr::Sub)) {
- Error(StartLoc, "offset out of range");
- return MatchOperand_ParseFail;
- }
+ if (IsOutOfRangeConstant(BE->getLHS(), false) ||
+ IsOutOfRangeConstant(BE->getRHS(),
+ BE->getOpcode() == MCBinaryExpr::Sub))
+ return Error(StartLoc, "offset out of range");
// Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol.
const MCExpr *Sym = nullptr;
if (AllowTLS && getLexer().is(AsmToken::Colon)) {
Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::Identifier)) {
- Error(Parser.getTok().getLoc(), "unexpected token");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return Error(Parser.getTok().getLoc(), "unexpected token");
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
StringRef Name = Parser.getTok().getString();
@@ -1665,22 +1632,16 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
Kind = MCSymbolRefExpr::VK_TLSGD;
else if (Name == "tls_ldcall")
Kind = MCSymbolRefExpr::VK_TLSLDM;
- else {
- Error(Parser.getTok().getLoc(), "unknown TLS tag");
- return MatchOperand_ParseFail;
- }
+ else
+ return Error(Parser.getTok().getLoc(), "unknown TLS tag");
Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::Colon)) {
- Error(Parser.getTok().getLoc(), "unexpected token");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::Colon))
+ return Error(Parser.getTok().getLoc(), "unexpected token");
Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::Identifier)) {
- Error(Parser.getTok().getLoc(), "unexpected token");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return Error(Parser.getTok().getLoc(), "unexpected token");
StringRef Identifier = Parser.getTok().getString();
Sym = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(Identifier),
@@ -1697,7 +1658,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
else
Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool SystemZAsmParser::isLabel(AsmToken &Token) {
@@ -1742,6 +1703,7 @@ bool SystemZAsmParser::isLabel(AsmToken &Token) {
}
// Force static initialization.
+// NOLINTNEXTLINE(readability-identifier-naming)
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmParser() {
RegisterMCAsmParser<SystemZAsmParser> X(getTheSystemZTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index d26ad63dc515..07a3e788fa40 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -32,7 +32,7 @@ public:
: MCDisassembler(STI, Ctx) {}
~SystemZDisassembler() override = default;
- DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
};
@@ -45,6 +45,7 @@ static MCDisassembler *createSystemZDisassembler(const Target &T,
return new SystemZDisassembler(STI, Ctx);
}
+// NOLINTNEXTLINE(readability-identifier-naming)
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheSystemZTarget(),
@@ -70,11 +71,11 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZDisassembler() {
/// is done and if a symbol is found an MCExpr is created with that, else
/// an MCExpr with the immediate Value is created. This function returns true
/// if it adds an operand to the MCInst and false otherwise.
-static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
+static bool tryAddingSymbolicOperand(int64_t Value, bool IsBranch,
uint64_t Address, uint64_t Offset,
uint64_t Width, MCInst &MI,
const MCDisassembler *Decoder) {
- return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
+ return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset,
Width, /*InstSize=*/0);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp
index 9c6a1b6e8af0..de1eedb8daff 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZELFObjectWriter.cpp
@@ -1,4 +1,4 @@
-//===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===//
+//===-- SystemZELFObjectWriter.cpp - SystemZ ELF writer -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -18,15 +18,16 @@
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
+#include <memory>
using namespace llvm;
namespace {
-class SystemZObjectWriter : public MCELFObjectTargetWriter {
+class SystemZELFObjectWriter : public MCELFObjectTargetWriter {
public:
- SystemZObjectWriter(uint8_t OSABI);
- ~SystemZObjectWriter() override = default;
+ SystemZELFObjectWriter(uint8_t OSABI);
+ ~SystemZELFObjectWriter() override = default;
protected:
// Override MCELFObjectTargetWriter.
@@ -36,9 +37,9 @@ protected:
} // end anonymous namespace
-SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
- : MCELFObjectTargetWriter(/*Is64Bit_=*/true, OSABI, ELF::EM_S390,
- /*HasRelocationAddend_=*/ true) {}
+SystemZELFObjectWriter::SystemZELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit_=*/true, OSABI, ELF::EM_S390,
+ /*HasRelocationAddend_=*/true) {}
// Return the relocation type for an absolute value of MCFixupKind Kind.
static unsigned getAbsoluteReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
@@ -146,10 +147,10 @@ static unsigned getPLTReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
return 0;
}
-unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
- const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
+unsigned SystemZELFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
SMLoc Loc = Fixup.getLoc();
unsigned Kind = Fixup.getKind();
if (Kind >= FirstLiteralRelocationKind)
@@ -199,6 +200,6 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
}
std::unique_ptr<MCObjectTargetWriter>
-llvm::createSystemZObjectWriter(uint8_t OSABI) {
- return std::make_unique<SystemZObjectWriter>(OSABI);
+llvm::createSystemZELFObjectWriter(uint8_t OSABI) {
+ return std::make_unique<SystemZELFObjectWriter>(OSABI);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGOFFObjectWriter.cpp
new file mode 100644
index 000000000000..205066814fbd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGOFFObjectWriter.cpp
@@ -0,0 +1,27 @@
+//===- SystemZGOFFObjectWriter.cpp - SystemZ GOFF writer ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/MC/MCGOFFObjectWriter.h"
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+class SystemZGOFFObjectWriter : public MCGOFFObjectTargetWriter {
+public:
+ SystemZGOFFObjectWriter();
+};
+} // end anonymous namespace
+
+SystemZGOFFObjectWriter::SystemZGOFFObjectWriter()
+ : MCGOFFObjectTargetWriter() {}
+
+std::unique_ptr<MCObjectTargetWriter> llvm::createSystemZGOFFObjectWriter() {
+ return std::make_unique<SystemZGOFFObjectWriter>();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index a32dc9a2e7d5..fa534fadc323 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -32,11 +32,12 @@ void SystemZInstPrinter::printAddress(const MCAsmInfo *MAI, MCRegister Base,
O << '(';
if (Index) {
printFormattedRegName(MAI, Index, O);
- if (Base)
- O << ',';
+ O << ',';
}
if (Base)
printFormattedRegName(MAI, Base, O);
+ else
+ O << '0';
O << ')';
}
}
@@ -50,7 +51,7 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
printFormattedRegName(MAI, MO.getReg(), O);
}
else if (MO.isImm())
- O << markup("<imm:") << MO.getImm() << markup(">");
+ markup(O, Markup::Immediate) << MO.getImm();
else if (MO.isExpr())
MO.getExpr()->print(O, MAI);
else
@@ -64,9 +65,9 @@ void SystemZInstPrinter::printFormattedRegName(const MCAsmInfo *MAI,
if (MAI->getAssemblerDialect() == AD_HLASM) {
// Skip register prefix so that only register number is left
assert(isalpha(RegName[0]) && isdigit(RegName[1]));
- O << markup("<reg:") << (RegName + 1) << markup(">");
+ markup(O, Markup::Register) << (RegName + 1);
} else
- O << markup("<reg:") << '%' << RegName << markup(">");
+ markup(O, Markup::Register) << '%' << RegName;
}
void SystemZInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
@@ -90,7 +91,7 @@ void SystemZInstPrinter::printUImmOperand(const MCInst *MI, int OpNum,
}
uint64_t Value = static_cast<uint64_t>(MO.getImm());
assert(isUInt<N>(Value) && "Invalid uimm argument");
- O << markup("<imm:") << Value << markup(">");
+ markup(O, Markup::Immediate) << Value;
}
template <unsigned N>
@@ -103,7 +104,7 @@ void SystemZInstPrinter::printSImmOperand(const MCInst *MI, int OpNum,
}
int64_t Value = MI->getOperand(OpNum).getImm();
assert(isInt<N>(Value) && "Invalid simm argument");
- O << markup("<imm:") << Value << markup(">");
+ markup(O, Markup::Immediate) << Value;
}
void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum,
@@ -170,9 +171,9 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
if (MO.isImm()) {
- O << markup("<imm:") << "0x";
+ WithMarkup M = markup(O, Markup::Immediate);
+ O << "0x";
O.write_hex(MO.getImm());
- O << markup(">");
} else
MO.getExpr()->print(O, &MAI);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 880766a1a23f..e32023ccedf5 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -106,10 +106,8 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value,
namespace {
class SystemZMCAsmBackend : public MCAsmBackend {
- uint8_t OSABI;
public:
- SystemZMCAsmBackend(uint8_t osABI)
- : MCAsmBackend(support::big), OSABI(osABI) {}
+ SystemZMCAsmBackend() : MCAsmBackend(llvm::endianness::big) {}
// Override MCAsmBackend
unsigned getNumFixupKinds() const override {
@@ -118,7 +116,8 @@ public:
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
@@ -130,10 +129,6 @@ public:
}
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- return createSystemZObjectWriter(OSABI);
- }
};
} // end anonymous namespace
@@ -170,8 +165,9 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
}
bool SystemZMCAsmBackend::shouldForceRelocation(const MCAssembler &,
- const MCFixup &Fixup,
- const MCValue &) {
+ const MCFixup &Fixup,
+ const MCValue &,
+ const MCSubtargetInfo *STI) {
return Fixup.getKind() >= FirstLiteralRelocationKind;
}
@@ -208,11 +204,39 @@ bool SystemZMCAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
return true;
}
+namespace {
+class ELFSystemZAsmBackend : public SystemZMCAsmBackend {
+ uint8_t OSABI;
+
+public:
+ ELFSystemZAsmBackend(uint8_t OsABI) : SystemZMCAsmBackend(), OSABI(OsABI){};
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ return createSystemZELFObjectWriter(OSABI);
+ }
+};
+
+class GOFFSystemZAsmBackend : public SystemZMCAsmBackend {
+public:
+ GOFFSystemZAsmBackend() : SystemZMCAsmBackend(){};
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ return createSystemZGOFFObjectWriter();
+ }
+};
+} // namespace
+
MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
+ if (STI.getTargetTriple().isOSzOS()) {
+ return new GOFFSystemZAsmBackend();
+ }
+
uint8_t OSABI =
MCELFObjectTargetWriter::getOSABI(STI.getTargetTriple().getOS());
- return new SystemZMCAsmBackend(OSABI);
+ return new ELFSystemZAsmBackend(OSABI);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index e453ec60d70c..a6285a2ccf9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -12,7 +12,6 @@
#include "MCTargetDesc/SystemZMCFixups.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
-#include "SystemZInstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -23,7 +22,6 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -38,9 +36,8 @@ class SystemZMCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
public:
- SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : MCII(mcii), Ctx(ctx) {
- }
+ SystemZMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
+ : MCII(MCII), Ctx(Ctx) {}
~SystemZMCCodeEmitter() override = default;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h
index f548b34baa42..62e61091a14c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_SystemZ_MCTARGETDESC_SystemZMCEXPR_H
-#define LLVM_LIB_TARGET_SystemZ_MCTARGETDESC_SystemZMCEXPR_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCEXPR_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCEXPR_H
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index f2bfc9ac48e5..39c1836a1370 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -85,7 +85,9 @@ MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
-std::unique_ptr<MCObjectTargetWriter> createSystemZObjectWriter(uint8_t OSABI);
+std::unique_ptr<MCObjectTargetWriter>
+createSystemZELFObjectWriter(uint8_t OSABI);
+std::unique_ptr<MCObjectTargetWriter> createSystemZGOFFObjectWriter();
} // end namespace llvm
// Defines symbolic names for SystemZ registers.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h
index cdd2850ad8e1..d7aa9e4e18cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h
@@ -189,7 +189,7 @@ static inline bool isImmHF(uint64_t Val) {
} // end namespace SystemZ
FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index afebdd3f6149..3186002c57d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -27,6 +27,10 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/ConvertEBCDIC.h"
+#include "llvm/Support/FormatProviders.h"
+#include "llvm/Support/FormatVariadic.h"
using namespace llvm;
@@ -349,9 +353,9 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
EmitToStreamer(*OutStreamer, MCInstBuilder(Op)
.addReg(TargetReg)
- .addReg(IndexReg)
+ .addReg(ADAReg)
.addImm(Disp)
- .addReg(ADAReg));
+ .addReg(IndexReg));
return;
}
@@ -952,6 +956,7 @@ void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
auto TT = OutContext.getTargetTriple();
if (TT.isOSzOS()) {
emitADASection();
+ emitIDRLSection(M);
}
emitAttributes(M);
}
@@ -1025,6 +1030,72 @@ void SystemZAsmPrinter::emitADASection() {
OutStreamer->popSection();
}
+static std::string getProductID(Module &M) {
+ std::string ProductID;
+ if (auto *MD = M.getModuleFlag("zos_product_id"))
+ ProductID = cast<MDString>(MD)->getString().str();
+ if (ProductID.empty())
+ ProductID = "LLVM";
+ return ProductID;
+}
+
+static uint32_t getProductVersion(Module &M) {
+ if (auto *VersionVal = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("zos_product_major_version")))
+ return VersionVal->getZExtValue();
+ return LLVM_VERSION_MAJOR;
+}
+
+static uint32_t getProductRelease(Module &M) {
+ if (auto *ReleaseVal = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("zos_product_minor_version")))
+ return ReleaseVal->getZExtValue();
+ return LLVM_VERSION_MINOR;
+}
+
+static uint32_t getProductPatch(Module &M) {
+ if (auto *PatchVal = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("zos_product_patchlevel")))
+ return PatchVal->getZExtValue();
+ return LLVM_VERSION_PATCH;
+}
+
+static time_t getTranslationTime(Module &M) {
+ std::time_t Time = 0;
+ if (auto *Val = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("zos_translation_time"))) {
+ long SecondsSinceEpoch = Val->getSExtValue();
+ Time = static_cast<time_t>(SecondsSinceEpoch);
+ }
+ return Time;
+}
+
+void SystemZAsmPrinter::emitIDRLSection(Module &M) {
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(getObjFileLowering().getIDRLSection());
+ constexpr unsigned IDRLDataLength = 30;
+ std::time_t Time = getTranslationTime(M);
+
+ uint32_t ProductVersion = getProductVersion(M);
+ uint32_t ProductRelease = getProductRelease(M);
+
+ std::string ProductID = getProductID(M);
+
+ SmallString<IDRLDataLength + 1> TempStr;
+ raw_svector_ostream O(TempStr);
+ O << formatv("{0,-10}{1,0-2:d}{2,0-2:d}{3:%Y%m%d%H%M%S}{4,0-2}",
+ ProductID.substr(0, 10).c_str(), ProductVersion, ProductRelease,
+ llvm::sys::toUtcTime(Time), "0");
+ SmallString<IDRLDataLength> Data;
+ ConverterEBCDIC::convertToEBCDIC(TempStr, Data);
+
+ OutStreamer->emitInt8(0); // Reserved.
+ OutStreamer->emitInt8(3); // Format.
+ OutStreamer->emitInt16(IDRLDataLength); // Length.
+ OutStreamer->emitBytes(Data.str());
+ OutStreamer->popSection();
+}
+
void SystemZAsmPrinter::emitFunctionBodyEnd() {
if (TM.getTargetTriple().isOSzOS()) {
// Emit symbol for the end of function if the z/OS target streamer
@@ -1043,7 +1114,8 @@ void SystemZAsmPrinter::emitFunctionBodyEnd() {
}
static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg,
- bool StackProtector, bool FPRMask, bool VRMask) {
+ bool StackProtector, bool FPRMask, bool VRMask,
+ bool HasName) {
enum class PPA1Flag1 : uint8_t {
DSA64Bit = (0x80 >> 0),
VarArg = (0x80 >> 7),
@@ -1069,7 +1141,7 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg,
auto Flags1 = PPA1Flag1(0);
auto Flags2 = PPA1Flag2::ExternalProcedure;
auto Flags3 = PPA1Flag3(0);
- auto Flags4 = PPA1Flag4::EPMOffsetPresent | PPA1Flag4::ProcedureNamePresent;
+ auto Flags4 = PPA1Flag4::EPMOffsetPresent;
Flags1 |= PPA1Flag1::DSA64Bit;
@@ -1086,6 +1158,9 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg,
if (VRMask)
Flags4 |= PPA1Flag4::VRMask; // Add emit VR mask flag.
+ if (HasName)
+ Flags4 |= PPA1Flag4::ProcedureNamePresent; // Add optional name block.
+
OutStreamer->AddComment("PPA1 Flags 1");
if ((Flags1 & PPA1Flag1::DSA64Bit) == PPA1Flag1::DSA64Bit)
OutStreamer->AddComment(" Bit 0: 1 = 64-bit DSA");
@@ -1113,11 +1188,40 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg,
OutStreamer->AddComment("PPA1 Flags 4");
if ((Flags4 & PPA1Flag4::VRMask) == PPA1Flag4::VRMask)
OutStreamer->AddComment(" Bit 2: 1 = Vector Reg Mask is in optional area");
+ if ((Flags4 & PPA1Flag4::ProcedureNamePresent) ==
+ PPA1Flag4::ProcedureNamePresent)
+ OutStreamer->AddComment(" Bit 7: 1 = Name Length and Name");
OutStreamer->emitInt8(static_cast<uint8_t>(
Flags4)); // Flags 4 (optional sections, always emit these).
}
+static void emitPPA1Name(std::unique_ptr<MCStreamer> &OutStreamer,
+ StringRef OutName) {
+ size_t NameSize = OutName.size();
+ uint16_t OutSize;
+ if (NameSize < UINT16_MAX) {
+ OutSize = static_cast<uint16_t>(NameSize);
+ } else {
+ OutName = OutName.substr(0, UINT16_MAX);
+ OutSize = UINT16_MAX;
+ }
+ // Emit padding to ensure that the next optional field word-aligned.
+ uint8_t ExtraZeros = 4 - ((2 + OutSize) % 4);
+
+ SmallString<512> OutnameConv;
+ ConverterEBCDIC::convertToEBCDIC(OutName, OutnameConv);
+ OutName = OutnameConv.str();
+
+ OutStreamer->AddComment("Length of Name");
+ OutStreamer->emitInt16(OutSize);
+ OutStreamer->AddComment("Name of Function");
+ OutStreamer->emitBytes(OutName);
+ OutStreamer->emitZeros(ExtraZeros);
+}
+
void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) {
+ assert(PPA2Sym != nullptr && "PPA2 Symbol not defined");
+
const TargetRegisterInfo *TRI = MF->getRegInfo().getTargetRegisterInfo();
const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
const auto TargetHasVector = Subtarget.hasVector();
@@ -1207,10 +1311,15 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) {
OutStreamer->emitInt8(0xCE); // CEL signature.
OutStreamer->AddComment("Saved GPR Mask");
OutStreamer->emitInt16(SavedGPRMask);
+ OutStreamer->AddComment("Offset to PPA2");
+ OutStreamer->emitAbsoluteSymbolDiff(PPA2Sym, CurrentFnPPA1Sym, 4);
+
+ bool HasName =
+ MF->getFunction().hasName() && MF->getFunction().getName().size() > 0;
emitPPA1Flags(OutStreamer, MF->getFunction().isVarArg(),
MFFrame.hasStackProtectorIndex(), SavedFPRMask != 0,
- TargetHasVector && SavedVRMask != 0);
+ TargetHasVector && SavedVRMask != 0, HasName);
OutStreamer->AddComment("Length/4 of Parms");
OutStreamer->emitInt16(
@@ -1252,11 +1361,133 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) {
OutStreamer->emitInt32(FrameAndVROffset);
}
+ // Emit name length and name optional section (0x01 of flags 4)
+ if (HasName)
+ emitPPA1Name(OutStreamer, MF->getFunction().getName());
+
// Emit offset to entry point optional section (0x80 of flags 4).
OutStreamer->emitAbsoluteSymbolDiff(CurrentFnEPMarkerSym, CurrentFnPPA1Sym,
4);
}
+void SystemZAsmPrinter::emitStartOfAsmFile(Module &M) {
+ if (TM.getTargetTriple().isOSzOS())
+ emitPPA2(M);
+ AsmPrinter::emitStartOfAsmFile(M);
+}
+
+void SystemZAsmPrinter::emitPPA2(Module &M) {
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(getObjFileLowering().getPPA2Section());
+ MCContext &OutContext = OutStreamer->getContext();
+ // Make CELQSTRT symbol.
+ const char *StartSymbolName = "CELQSTRT";
+ MCSymbol *CELQSTRT = OutContext.getOrCreateSymbol(StartSymbolName);
+
+ // Create symbol and assign to class field for use in PPA1.
+ PPA2Sym = OutContext.createTempSymbol("PPA2", false);
+ MCSymbol *DateVersionSym = OutContext.createTempSymbol("DVS", false);
+
+ std::time_t Time = getTranslationTime(M);
+ SmallString<15> CompilationTime; // 14 + null
+ raw_svector_ostream O(CompilationTime);
+ O << formatv("{0:%Y%m%d%H%M%S}", llvm::sys::toUtcTime(Time));
+
+ uint32_t ProductVersion = getProductVersion(M),
+ ProductRelease = getProductRelease(M),
+ ProductPatch = getProductPatch(M);
+
+ SmallString<7> Version; // 6 + null
+ raw_svector_ostream ostr(Version);
+ ostr << formatv("{0,0-2:d}{1,0-2:d}{2,0-2:d}", ProductVersion, ProductRelease,
+ ProductPatch);
+
+ // Drop 0 during conversion.
+ SmallString<sizeof(CompilationTime) - 1> CompilationTimeStr;
+ SmallString<sizeof(Version) - 1> VersionStr;
+
+ ConverterEBCDIC::convertToEBCDIC(CompilationTime, CompilationTimeStr);
+ ConverterEBCDIC::convertToEBCDIC(Version, VersionStr);
+
+ enum class PPA2MemberId : uint8_t {
+ // See z/OS Language Environment Vendor Interfaces v2r5, p.23, for
+ // complete list. Only the C runtime is supported by this backend.
+ LE_C_Runtime = 3,
+ };
+ enum class PPA2MemberSubId : uint8_t {
+ // List of languages using the LE C runtime implementation.
+ C = 0x00,
+ CXX = 0x01,
+ Swift = 0x03,
+ Go = 0x60,
+ LLVMBasedLang = 0xe7,
+ };
+ // PPA2 Flags
+ enum class PPA2Flags : uint8_t {
+ CompileForBinaryFloatingPoint = 0x80,
+ CompiledWithXPLink = 0x01,
+ CompiledUnitASCII = 0x04,
+ HasServiceInfo = 0x20,
+ };
+
+ PPA2MemberSubId MemberSubId = PPA2MemberSubId::LLVMBasedLang;
+ if (auto *MD = M.getModuleFlag("zos_cu_language")) {
+ StringRef Language = cast<MDString>(MD)->getString();
+ MemberSubId = StringSwitch<PPA2MemberSubId>(Language)
+ .Case("C", PPA2MemberSubId::C)
+ .Case("C++", PPA2MemberSubId::CXX)
+ .Case("Swift", PPA2MemberSubId::Swift)
+ .Case("Go", PPA2MemberSubId::Go)
+ .Default(PPA2MemberSubId::LLVMBasedLang);
+ }
+
+ // Emit PPA2 section.
+ OutStreamer->emitLabel(PPA2Sym);
+ OutStreamer->emitInt8(static_cast<uint8_t>(PPA2MemberId::LE_C_Runtime));
+ OutStreamer->emitInt8(static_cast<uint8_t>(MemberSubId));
+ OutStreamer->emitInt8(0x22); // Member defined, c370_plist+c370_env
+ OutStreamer->emitInt8(0x04); // Control level 4 (XPLink)
+ OutStreamer->emitAbsoluteSymbolDiff(CELQSTRT, PPA2Sym, 4);
+ OutStreamer->emitInt32(0x00000000);
+ OutStreamer->emitAbsoluteSymbolDiff(DateVersionSym, PPA2Sym, 4);
+ OutStreamer->emitInt32(
+ 0x00000000); // Offset to main entry point, always 0 (so says TR).
+ uint8_t Flgs = static_cast<uint8_t>(PPA2Flags::CompileForBinaryFloatingPoint);
+ Flgs |= static_cast<uint8_t>(PPA2Flags::CompiledWithXPLink);
+
+ if (auto *MD = M.getModuleFlag("zos_le_char_mode")) {
+ const StringRef &CharMode = cast<MDString>(MD)->getString();
+ if (CharMode == "ascii") {
+ Flgs |= static_cast<uint8_t>(
+ PPA2Flags::CompiledUnitASCII); // Setting bit for ASCII char. mode.
+ } else if (CharMode != "ebcdic") {
+ report_fatal_error(
+ "Only ascii or ebcdic are valid values for zos_le_char_mode "
+ "metadata");
+ }
+ }
+
+ OutStreamer->emitInt8(Flgs);
+ OutStreamer->emitInt8(0x00); // Reserved.
+ // No MD5 signature before timestamp.
+ // No FLOAT(AFP(VOLATILE)).
+ // Remaining 5 flag bits reserved.
+ OutStreamer->emitInt16(0x0000); // 16 Reserved flag bits.
+
+ // Emit date and version section.
+ OutStreamer->emitLabel(DateVersionSym);
+ OutStreamer->emitBytes(CompilationTimeStr.str());
+ OutStreamer->emitBytes(VersionStr.str());
+
+ OutStreamer->emitInt16(0x0000); // Service level string length.
+
+ // Emit 8 byte alignment.
+ // Emit pointer to PPA2 label.
+ OutStreamer->AddComment("A(PPA2-CELQSTRT)");
+ OutStreamer->emitAbsoluteSymbolDiff(PPA2Sym, CELQSTRT, 8);
+ OutStreamer->popSection();
+}
+
void SystemZAsmPrinter::emitFunctionEntryLabel() {
const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
@@ -1276,14 +1507,16 @@ void SystemZAsmPrinter::emitFunctionEntryLabel() {
// EntryPoint Marker
const MachineFrameInfo &MFFrame = MF->getFrameInfo();
bool IsUsingAlloca = MFFrame.hasVarSizedObjects();
+ uint32_t DSASize = MFFrame.getStackSize();
+ bool IsLeaf = DSASize == 0 && MFFrame.getCalleeSavedInfo().empty();
- // Set Flags
+ // Set Flags.
uint8_t Flags = 0;
+ if (IsLeaf)
+ Flags |= 0x08;
if (IsUsingAlloca)
Flags |= 0x04;
- uint32_t DSASize = MFFrame.getStackSize();
-
// Combine into top 27 bits of DSASize and bottom 5 bits of Flags.
uint32_t DSAAndFlags = DSASize & 0xFFFFFFE0; // (x/32) << 5
DSAAndFlags |= Flags;
@@ -1301,6 +1534,10 @@ void SystemZAsmPrinter::emitFunctionEntryLabel() {
if (OutStreamer->isVerboseAsm()) {
OutStreamer->AddComment("DSA Size 0x" + Twine::utohexstr(DSASize));
OutStreamer->AddComment("Entry Flags");
+ if (Flags & 0x08)
+ OutStreamer->AddComment(" Bit 1: 1 = Leaf function");
+ else
+ OutStreamer->AddComment(" Bit 1: 0 = Non-leaf function");
if (Flags & 0x04)
OutStreamer->AddComment(" Bit 2: 1 = Uses alloca");
else
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index c9dbbfd0b4c4..303cce1a1b65 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -27,6 +27,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
private:
MCSymbol *CurrentFnPPA1Sym; // PPA1 Symbol.
MCSymbol *CurrentFnEPMarkerSym; // Entry Point Marker.
+ MCSymbol *PPA2Sym;
SystemZTargetStreamer *getTargetStreamer() {
MCTargetStreamer *TS = OutStreamer->getTargetStreamer();
@@ -90,12 +91,15 @@ private:
AssociatedDataAreaTable ADATable;
void emitPPA1(MCSymbol *FnEndSym);
+ void emitPPA2(Module &M);
void emitADASection();
+ void emitIDRLSection(Module &M);
public:
SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), CurrentFnPPA1Sym(nullptr),
- CurrentFnEPMarkerSym(nullptr), ADATable(TM.getPointerSize(0)) {}
+ CurrentFnEPMarkerSym(nullptr), PPA2Sym(nullptr),
+ ADATable(TM.getPointerSize(0)) {}
// Override AsmPrinter.
StringRef getPassName() const override { return "SystemZ Assembly Printer"; }
@@ -113,6 +117,7 @@ public:
}
void emitFunctionEntryLabel() override;
void emitFunctionBodyEnd() override;
+ void emitStartOfAsmFile(Module &M) override;
private:
void emitCallInformation(CallType CT);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index 29b4a26736b2..136d3d254721 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -102,9 +102,10 @@ def CC_SystemZ_ELF : CallingConv<[
// A SwiftError is passed in callee-saved R9.
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R9D]>>>,
- // Force long double values to the stack and pass i64 pointers to them.
- CCIfType<[f128], CCPassIndirect<i64>>,
- // Same for i128 values. These are already split into two i64 here,
+ // Force i128 (if the type is legal) and long double values to the stack
+ // and pass i64 pointers to them.
+ CCIfType<[i128, f128], CCPassIndirect<i64>>,
+ // If i128 is not legal, such values are already split into two i64 here,
// so we have to use a custom handler.
CCIfType<[i64], CCCustom<"CC_SystemZ_I128Indirect">>,
@@ -240,9 +241,10 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// A SwiftError is passed in R0.
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R0D]>>>,
- // First i128 values. These are already split into two i64 here,
- // so we have to use a custom handler and assign into registers, if possible
- // We need to deal with this first
+ // Force i128 values to the stack and pass i64 pointers to them.
+ CCIfType<[i128], CCPassIndirect<i64>>,
+ // If i128 is not legal, such values are already split into two i64 here,
+ // so we have to use a custom handler.
CCIfType<[i64], CCCustom<"CC_SystemZ_I128Indirect">>,
// The first 3 integer arguments are passed in registers R1D-R3D.
// The rest will be passed in the user area. The address offset of the user
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
index 78b8394d6486..fdd94206421a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
@@ -32,6 +32,11 @@ def FeatureSoftFloat : SystemZFeature<
"Use software emulation for floating point"
>;
+def FeatureBackChain : SystemZFeature<
+ "backchain", "BackChain", (all_of FeatureBackChain),
+ "Store the address of the caller's frame into the callee's stack frame"
+>;
+
//===----------------------------------------------------------------------===//
//
// New features added in the Ninth Edition of the z/Architecture
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 11a59df899a1..7522998fd06d 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -443,7 +443,7 @@ void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized(
MachineFrameInfo &MFFrame = MF.getFrameInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineRegisterInfo *MRI = &MF.getRegInfo();
- bool BackChain = MF.getFunction().hasFnAttribute("backchain");
+ bool BackChain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
if (!usePackedStack(MF) || BackChain)
// Create the incoming register save area.
@@ -628,7 +628,7 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
.addImm(StackSize);
}
else {
- bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
+ bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
// If we need backchain, save current stack pointer. R1 is free at
// this point.
if (StoreBackchain)
@@ -786,7 +786,7 @@ void SystemZELFFrameLowering::inlineStackProbe(
.addMemOperand(MMO);
};
- bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
+ bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
if (StoreBackchain)
BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR))
.addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
@@ -861,8 +861,9 @@ StackOffset SystemZELFFrameLowering::getFrameIndexReference(
unsigned SystemZELFFrameLowering::getRegSpillOffset(MachineFunction &MF,
Register Reg) const {
bool IsVarArg = MF.getFunction().isVarArg();
- bool BackChain = MF.getFunction().hasFnAttribute("backchain");
- bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ bool BackChain = Subtarget.hasBackChain();
+ bool SoftFloat = Subtarget.hasSoftFloat();
unsigned Offset = RegSpillOffsets[Reg];
if (usePackedStack(MF) && !(IsVarArg && !SoftFloat)) {
if (SystemZ::GR64BitRegClass.contains(Reg))
@@ -890,8 +891,9 @@ int SystemZELFFrameLowering::getOrCreateFramePointerSaveIndex(
bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const {
bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
- bool BackChain = MF.getFunction().hasFnAttribute("backchain");
- bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ bool BackChain = Subtarget.hasBackChain();
+ bool SoftFloat = Subtarget.hasSoftFloat();
if (HasPackedStackAttr && BackChain && !SoftFloat)
report_fatal_error("packed-stack + backchain + hard-float is unsupported.");
bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
@@ -946,7 +948,7 @@ static bool isXPLeafCandidate(const MachineFunction &MF) {
return false;
// If the backchain pointer should be stored, then it is not a XPLeaf routine.
- if (MF.getFunction().hasFnAttribute("backchain"))
+ if (MF.getSubtarget<SystemZSubtarget>().hasBackChain())
return false;
// If function acquires its own stack frame, then it is not a XPLeaf routine.
@@ -989,7 +991,7 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
// If the function needs a frame pointer, or if the backchain pointer should
// be stored, then save the stack pointer register R4.
- if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain"))
+ if (hasFP(MF) || Subtarget.hasBackChain())
CSI.push_back(CalleeSavedInfo(Regs.getStackPointerRegister()));
// Scan the call-saved GPRs and find the bounds of the register spill area.
@@ -1275,6 +1277,30 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
for (MachineBasicBlock &B : llvm::drop_begin(MF))
B.addLiveIn(Regs.getFramePointerRegister());
}
+
+ // Save GPRs used for varargs, if any.
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ bool IsVarArg = MF.getFunction().isVarArg();
+
+ if (IsVarArg) {
+ // FixedRegs is the number of used registers, accounting for shadow
+ // registers.
+ unsigned FixedRegs = ZFI->getVarArgsFirstGPR() + ZFI->getVarArgsFirstFPR();
+ auto &GPRs = SystemZ::XPLINK64ArgGPRs;
+ for (unsigned I = FixedRegs; I < SystemZ::XPLINK64NumArgGPRs; I++) {
+ uint64_t StartOffset = MFFrame.getOffsetAdjustment() +
+ MFFrame.getStackSize() + Regs.getCallFrameSize() +
+ getOffsetOfLocalArea() + I * 8;
+ unsigned Reg = GPRs[I];
+ BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STG))
+ .addReg(Reg)
+ .addReg(Regs.getStackPointerRegister())
+ .addImm(StartOffset)
+ .addReg(0);
+ if (!MBB.isLiveIn(Reg))
+ MBB.addLiveIn(Reg);
+ }
+ }
}
void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -1423,6 +1449,18 @@ void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized(
// Setup stack frame offset
MFFrame.setOffsetAdjustment(Regs.getStackPointerBias());
+
+ // Nothing to do for leaf functions.
+ uint64_t StackSize = MFFrame.estimateStackSize(MF);
+ if (StackSize == 0 && MFFrame.getCalleeSavedInfo().empty())
+ return;
+
+ // Although the XPLINK specifications for AMODE64 state that minimum size
+ // of the param area is minimum 32 bytes and no rounding is otherwise
+ // specified, we round this area in 64 bytes increments to be compatible
+ // with existing compilers.
+ MFFrame.setMaxCallFrameSize(
+ std::max(64U, (unsigned)alignTo(MFFrame.getMaxCallFrameSize(), 64)));
}
// Determines the size of the frame, and creates the deferred spill objects.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index c0a173df7ba2..34888f44aa22 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -187,8 +187,8 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
FU = "LSU";
OS << "/" << FU;
- if (PI->Cycles > 1)
- OS << "(" << PI->Cycles << "cyc)";
+ if (PI->ReleaseAtCycle> 1)
+ OS << "(" << PI->ReleaseAtCycle << "cyc)";
}
if (SC->NumMicroOps > 1)
@@ -301,7 +301,7 @@ EmitInstruction(SUnit *SU) {
continue;
int &CurrCounter =
ProcResourceCounters[PI->ProcResourceIdx];
- CurrCounter += PI->Cycles;
+ CurrCounter += PI->ReleaseAtCycle;
// Check if this is now the new critical resource.
if ((CurrCounter > ProcResCostLim) &&
(CriticalResourceIdx == UINT_MAX ||
@@ -401,7 +401,7 @@ resourcesCost(SUnit *SU) {
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
if (PI->ProcResourceIdx == CriticalResourceIdx)
- Cost = PI->Cycles;
+ Cost = PI->ReleaseAtCycle;
}
return Cost;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 1e9e2917a3aa..e5e1e91916f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -307,6 +307,8 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
void loadVectorConstant(const SystemZVectorConstantInfo &VCI,
SDNode *Node);
+ SDNode *loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL);
+
// Try to use gather instruction Opcode to implement vector insertion N.
bool tryGather(SDNode *N, unsigned Opcode);
@@ -350,7 +352,7 @@ public:
SystemZDAGToDAGISel() = delete;
- SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, TM, OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -368,7 +370,8 @@ public:
// Override SelectionDAGISel.
void Select(SDNode *Node) override;
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
void PreprocessISelDAG() override;
@@ -383,7 +386,7 @@ char SystemZDAGToDAGISel::ID = 0;
INITIALIZE_PASS(SystemZDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new SystemZDAGToDAGISel(TM, OptLevel);
}
@@ -783,6 +786,8 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
case ISD::TRUNCATE: {
if (RxSBG.Opcode == SystemZ::RNSBG)
return false;
+ if (N.getOperand(0).getValueSizeInBits() > 64)
+ return false;
uint64_t BitSize = N.getValueSizeInBits();
uint64_t Mask = allOnes(BitSize);
if (!refineRxSBGMask(RxSBG, Mask))
@@ -1182,6 +1187,35 @@ void SystemZDAGToDAGISel::loadVectorConstant(
SelectCode(Op.getNode());
}
+SDNode *SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL) {
+ SDNode *ResNode;
+ assert (VT.getSizeInBits() == 128);
+
+ SDValue CP = CurDAG->getTargetConstantPool(
+ ConstantInt::get(Type::getInt128Ty(*CurDAG->getContext()), Val),
+ TLI->getPointerTy(CurDAG->getDataLayout()));
+
+ EVT PtrVT = CP.getValueType();
+ SDValue Ops[] = {
+ SDValue(CurDAG->getMachineNode(SystemZ::LARL, DL, PtrVT, CP), 0),
+ CurDAG->getTargetConstant(0, DL, PtrVT),
+ CurDAG->getRegister(0, PtrVT),
+ CurDAG->getEntryNode()
+ };
+ ResNode = CurDAG->getMachineNode(SystemZ::VL, DL, VT, MVT::Other, Ops);
+
+ // Annotate ResNode with memory operand information so that MachineInstr
+ // queries work properly. This e.g. gives the register allocation the
+ // required information for rematerialization.
+ MachineFunction& MF = CurDAG->getMachineFunction();
+ MachineMemOperand *MemOp =
+ MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+ MachineMemOperand::MOLoad, 16, Align(8));
+
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
+ return ResNode;
+}
+
bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
SDValue ElemV = N->getOperand(2);
auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
@@ -1557,6 +1591,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
break;
}
}
+ // Don't split an XOR with -1 as LCGR/AGHI is more compact.
+ if (Opcode == ISD::XOR && Op1->isAllOnes())
+ break;
if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
splitLargeImmediate(Opcode, Node, Node->getOperand(0),
Val - uint32_t(Val), uint32_t(Val));
@@ -1578,6 +1615,27 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
return;
break;
+ case ISD::BSWAP:
+ if (Node->getValueType(0) == MVT::i128) {
+ SDLoc DL(Node);
+ SDValue Src = Node->getOperand(0);
+ Src = CurDAG->getNode(ISD::BITCAST, DL, MVT::v16i8, Src);
+
+ uint64_t Bytes[2] = { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL };
+ SDNode *Mask = loadPoolVectorConstant(APInt(128, Bytes), MVT::v16i8, DL);
+ SDValue Ops[] = { Src, Src, SDValue(Mask, 0) };
+ SDValue Res = SDValue(CurDAG->getMachineNode(SystemZ::VPERM, DL,
+ MVT::v16i8, Ops), 0);
+
+ Res = CurDAG->getNode(ISD::BITCAST, DL, MVT::i128, Res);
+ SDNode *ResNode = Res.getNode();
+ ReplaceNode(Node, ResNode);
+ SelectCode(Src.getNode());
+ SelectCode(ResNode);
+ return;
+ }
+ break;
+
case ISD::Constant:
// If this is a 64-bit constant that is out of the range of LLILF,
// LLIHF and LGFI, split it into two 32-bit pieces.
@@ -1589,6 +1647,18 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
return;
}
}
+ if (Node->getValueType(0) == MVT::i128) {
+ const APInt &Val = cast<ConstantSDNode>(Node)->getAPIntValue();
+ SystemZVectorConstantInfo VCI(Val);
+ if (VCI.isVectorConstantLegal(*Subtarget)) {
+ loadVectorConstant(VCI, Node);
+ return;
+ }
+ // If we can't materialize the constant we need to use a literal pool.
+ SDNode *ResNode = loadPoolVectorConstant(Val, MVT::i128, SDLoc(Node));
+ ReplaceNode(Node, ResNode);
+ return;
+ }
break;
case SystemZISD::SELECT_CCMASK: {
@@ -1599,6 +1669,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) ||
(Subtarget->hasLoadStoreOnCond2() &&
Node->getValueType(0).isInteger() &&
+ Node->getValueType(0).getSizeInBits() <= 64 &&
Op1.getOpcode() == ISD::Constant &&
isInt<16>(cast<ConstantSDNode>(Op1)->getSExtValue()) &&
!(Op0.getOpcode() == ISD::Constant &&
@@ -1677,10 +1748,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
SelectCode(Node);
}
-bool SystemZDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SystemZAddressingMode::AddrForm Form;
SystemZAddressingMode::DispRange DispRange;
SDValue Base, Disp, Index;
@@ -1688,30 +1758,30 @@ SelectInlineAsmMemoryOperand(const SDValue &Op,
switch(ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_i:
- case InlineAsm::Constraint_Q:
- case InlineAsm::Constraint_ZQ:
+ case InlineAsm::ConstraintCode::i:
+ case InlineAsm::ConstraintCode::Q:
+ case InlineAsm::ConstraintCode::ZQ:
// Accept an address with a short displacement, but no index.
Form = SystemZAddressingMode::FormBD;
DispRange = SystemZAddressingMode::Disp12Only;
break;
- case InlineAsm::Constraint_R:
- case InlineAsm::Constraint_ZR:
+ case InlineAsm::ConstraintCode::R:
+ case InlineAsm::ConstraintCode::ZR:
// Accept an address with a short displacement and an index.
Form = SystemZAddressingMode::FormBDXNormal;
DispRange = SystemZAddressingMode::Disp12Only;
break;
- case InlineAsm::Constraint_S:
- case InlineAsm::Constraint_ZS:
+ case InlineAsm::ConstraintCode::S:
+ case InlineAsm::ConstraintCode::ZS:
// Accept an address with a long displacement, but no index.
Form = SystemZAddressingMode::FormBD;
DispRange = SystemZAddressingMode::Disp20Only;
break;
- case InlineAsm::Constraint_T:
- case InlineAsm::Constraint_m:
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_p:
- case InlineAsm::Constraint_ZT:
+ case InlineAsm::ConstraintCode::T:
+ case InlineAsm::ConstraintCode::m:
+ case InlineAsm::ConstraintCode::o:
+ case InlineAsm::ConstraintCode::p:
+ case InlineAsm::ConstraintCode::ZT:
// Accept an address with a long displacement and an index.
// m works the same as T, as this is the most general case.
// We don't really have any special handling of "offsettable"
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 785a08a763eb..a1803cf9a042 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -112,6 +112,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
}
+
+ if (Subtarget.hasVector())
+ addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
}
// Compute derived properties from the register classes
@@ -129,6 +132,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ setMaxAtomicSizeInBitsSupported(128);
+
// Instructions are strings of 2-byte aligned 2-byte values.
setMinFunctionAlignment(Align(2));
// For performance reasons we prefer 16-byte alignment.
@@ -161,12 +166,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Expand BRCOND into a BR_CC (see above).
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
- // Handle integer types.
+ // Handle integer types except i128.
for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
I <= MVT::LAST_INTEGER_VALUETYPE;
++I) {
MVT VT = MVT::SimpleValueType(I);
- if (isTypeLegal(VT)) {
+ if (isTypeLegal(VT) && VT != MVT::i128) {
setOperationAction(ISD::ABS, VT, Legal);
// Expand individual DIV and REMs into DIVREMs.
@@ -234,6 +239,45 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
}
}
+ // Handle i128 if legal.
+ if (isTypeLegal(MVT::i128)) {
+ // No special instructions for these.
+ setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i128, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand);
+ setOperationAction(ISD::ROTR, MVT::i128, Expand);
+ setOperationAction(ISD::ROTL, MVT::i128, Expand);
+ setOperationAction(ISD::MUL, MVT::i128, Expand);
+ setOperationAction(ISD::MULHS, MVT::i128, Expand);
+ setOperationAction(ISD::MULHU, MVT::i128, Expand);
+ setOperationAction(ISD::SDIV, MVT::i128, Expand);
+ setOperationAction(ISD::UDIV, MVT::i128, Expand);
+ setOperationAction(ISD::SREM, MVT::i128, Expand);
+ setOperationAction(ISD::UREM, MVT::i128, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i128, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i128, Expand);
+
+ // Support addition/subtraction with carry.
+ setOperationAction(ISD::UADDO, MVT::i128, Custom);
+ setOperationAction(ISD::USUBO, MVT::i128, Custom);
+ setOperationAction(ISD::UADDO_CARRY, MVT::i128, Custom);
+ setOperationAction(ISD::USUBO_CARRY, MVT::i128, Custom);
+
+ // Use VPOPCT and add up partial results.
+ setOperationAction(ISD::CTPOP, MVT::i128, Custom);
+
+ // We have to use libcalls for these.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall);
+ }
+
// Type legalization will convert 8- and 16-bit atomic operations into
// forms that operate on i32s (but still keeping the original memory VT).
// Lower them into full i32 operations.
@@ -249,7 +293,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
- // Even though i128 is not a legal type, we still need to custom lower
+ // Whether or not i128 is not a legal type, we need to custom lower
// the atomic operations in order to exploit SystemZ instructions.
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
@@ -297,7 +341,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SRA_I128, nullptr);
// Handle bitcast from fp128 to i128.
- setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+ if (!isTypeLegal(MVT::i128))
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
// We have native instructions for i8, i16 and i32 extensions, but not i1.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -385,16 +430,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
- // Detect shifts by a scalar amount and convert them into
+ // Detect shifts/rotates by a scalar amount and convert them into
// V*_BY_SCALAR.
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
-
- // At present ROTL isn't matched by DAGCombiner. ROTR should be
- // converted into ROTL.
- setOperationAction(ISD::ROTL, VT, Expand);
- setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Custom);
// Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
// and inverting the result as necessary.
@@ -691,6 +732,19 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Default to having -disable-strictnode-mutation on
IsStrictFPEnabled = true;
+
+ if (Subtarget.isTargetzOS()) {
+ struct RTLibCallMapping {
+ RTLIB::Libcall Code;
+ const char *Name;
+ };
+ static RTLibCallMapping RTLibCallCommon[] = {
+#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
+#include "ZOSLibcallNames.def"
+ };
+ for (auto &E : RTLibCallCommon)
+ setLibcallName(E.Code, E.Name);
+ }
}
bool SystemZTargetLowering::useSoftFloat() const {
@@ -859,6 +913,25 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const
return false;
}
+TargetLowering::AtomicExpansionKind
+SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+ // Don't expand subword operations as they require special treatment.
+ if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
+ return AtomicExpansionKind::None;
+
+ // Don't expand if there is a target instruction available.
+ if (Subtarget.hasInterlockedAccess1() &&
+ (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
+ (RMW->getOperation() == AtomicRMWInst::BinOp::Add ||
+ RMW->getOperation() == AtomicRMWInst::BinOp::Sub ||
+ RMW->getOperation() == AtomicRMWInst::BinOp::And ||
+ RMW->getOperation() == AtomicRMWInst::BinOp::Or ||
+ RMW->getOperation() == AtomicRMWInst::BinOp::Xor))
+ return AtomicExpansionKind::None;
+
+ return AtomicExpansionKind::CmpXChg;
+}
+
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// We can use CGFI or CLGFI.
return isInt<32>(Imm) || isUInt<32>(Imm);
@@ -1289,12 +1362,11 @@ SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
report_fatal_error("Invalid register name global variable");
}
-void SystemZTargetLowering::
-LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const {
+void SystemZTargetLowering::LowerAsmOperandForConstraint(
+ SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
// Only support length 1 constraints for now.
- if (Constraint.length() == 1) {
+ if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'I': // Unsigned 8-bit constant
if (auto *C = dyn_cast<ConstantSDNode>(Op))
@@ -1357,24 +1429,6 @@ bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
-// We do not yet support 128-bit single-element vector types. If the user
-// attempts to use such types as function argument or return type, prefer
-// to error out instead of emitting code violating the ABI.
-static void VerifyVectorType(MVT VT, EVT ArgVT) {
- if (ArgVT.isVector() && !VT.isVector())
- report_fatal_error("Unsupported vector argument or return type");
-}
-
-static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
- for (unsigned i = 0; i < Ins.size(); ++i)
- VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
-}
-
-static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
- for (unsigned i = 0; i < Outs.size(); ++i)
- VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
-}
-
// Value is a value that has been passed to us in the location described by VA
// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
// any loads onto Chain.
@@ -1445,7 +1499,15 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
SDLoc DL(In);
SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
+ if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
+ DAG.getNode(ISD::SRL, DL, MVT::i128, In,
+ DAG.getConstant(64, DL, MVT::i32)));
+ } else {
+ std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
+ }
+
SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
MVT::Untyped, Hi, Lo);
return SDValue(Pair, 0);
@@ -1457,7 +1519,16 @@ static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
DL, MVT::i64, In);
SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
DL, MVT::i64, In);
- return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
+
+ if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
+ DAG.getConstant(64, DL, MVT::i32));
+ return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
+ } else {
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
+ }
}
bool SystemZTargetLowering::splitValueIntoRegisterParts(
@@ -1497,10 +1568,6 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- // Detect unsupported vector argument types.
- if (Subtarget.hasVector())
- VerifyVectorTypes(Ins);
-
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
@@ -1601,7 +1668,23 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
}
- // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
+ if (IsVarArg && Subtarget.isTargetXPLINK64()) {
+ // Save the number of non-varargs registers for later use by va_start, etc.
+ FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
+ FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
+
+ auto *Regs = static_cast<SystemZXPLINK64Registers *>(
+ Subtarget.getSpecialRegisters());
+
+ // Likewise the address (in the form of a frame index) of where the
+ // first stack vararg would be. The 1-byte size here is arbitrary.
+ // FIXME: Pre-include call frame size in the offset, should not
+ // need to manually add it here.
+ int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
+ int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
+ FuncInfo->setVarArgsFrameIndex(FI);
+ }
+
if (IsVarArg && Subtarget.isTargetELF()) {
// Save the number of non-varargs registers for later use by va_start, etc.
FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
@@ -1785,12 +1868,6 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (Subtarget.isTargetXPLINK64())
IsTailCall = false;
- // Detect unsupported vector argument and return types.
- if (Subtarget.hasVector()) {
- VerifyVectorTypes(Outs);
- VerifyVectorTypes(Ins);
- }
-
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
@@ -1804,13 +1881,6 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getStackSize();
- if (Subtarget.isTargetXPLINK64())
- // Although the XPLINK specifications for AMODE64 state that minimum size
- // of the param area is minimum 32 bytes and no rounding is otherwise
- // specified, we round this area in 64 bytes increments to be compatible
- // with existing compilers.
- NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
-
// Mark the start of the call.
if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
@@ -2041,12 +2111,8 @@ CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
- // Detect unsupported vector return types.
- if (Subtarget.hasVector())
- VerifyVectorTypes(Outs);
-
// Special case that we cannot easily detect in RetCC_SystemZ since
- // i128 is not a legal type.
+ // i128 may not be a legal type.
for (auto &Out : Outs)
if (Out.ArgVT == MVT::i128)
return false;
@@ -2064,10 +2130,6 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SDLoc &DL, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- // Detect unsupported vector return types.
- if (Subtarget.hasVector())
- VerifyVectorTypes(Outs);
-
// Assign locations to each returned value.
SmallVector<CCValAssign, 16> RetLocs;
CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
@@ -2365,7 +2427,7 @@ static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
return;
auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
- if (!ConstOp1)
+ if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
return;
int64_t Value = ConstOp1->getSExtValue();
@@ -2399,6 +2461,8 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
// The load must be an extending one and the constant must be within the
// range of the unextended value.
auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
+ if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
+ return;
uint64_t Value = ConstOp1->getZExtValue();
uint64_t Mask = (1 << NumBits) - 1;
if (Load->getExtensionType() == ISD::SEXTLOAD) {
@@ -2477,7 +2541,9 @@ static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
// Return true if it is better to swap the operands of C.
static bool shouldSwapCmpOperands(const Comparison &C) {
- // Leave f128 comparisons alone, since they have no memory forms.
+ // Leave i128 and f128 comparisons alone, since they have no memory forms.
+ if (C.Op0.getValueType() == MVT::i128)
+ return false;
if (C.Op0.getValueType() == MVT::f128)
return false;
@@ -2614,6 +2680,7 @@ static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
if (C.Op0.getOpcode() == ISD::TRUNCATE &&
C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
C.Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
@@ -2742,6 +2809,27 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
// Update the arguments with the TM version if so.
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
Comparison &C) {
+ // Use VECTOR TEST UNDER MASK for i128 operations.
+ if (C.Op0.getValueType() == MVT::i128) {
+ // We can use VTM for EQ/NE comparisons of x & y against 0.
+ if (C.Op0.getOpcode() == ISD::AND &&
+ (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
+ C.CCMask == SystemZ::CCMASK_CMP_NE)) {
+ auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
+ if (Mask && Mask->getAPIntValue() == 0) {
+ C.Opcode = SystemZISD::VTM;
+ C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
+ C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
+ C.CCValid = SystemZ::CCMASK_VCMP;
+ if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
+ C.CCMask = SystemZ::CCMASK_VCMP_ALL;
+ else
+ C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
+ }
+ }
+ return;
+ }
+
// Check that we have a comparison with a constant.
auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
if (!ConstOp1)
@@ -2828,6 +2916,51 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
C.CCMask = NewCCMask;
}
+// Implement i128 comparison in vector registers.
+static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
+ if (C.Opcode != SystemZISD::ICMP)
+ return;
+ if (C.Op0.getValueType() != MVT::i128)
+ return;
+
+ // (In-)Equality comparisons can be implemented via VCEQGS.
+ if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
+ C.CCMask == SystemZ::CCMASK_CMP_NE) {
+ C.Opcode = SystemZISD::VICMPES;
+ C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
+ C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
+ C.CCValid = SystemZ::CCMASK_VCMP;
+ if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
+ C.CCMask = SystemZ::CCMASK_VCMP_ALL;
+ else
+ C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
+ return;
+ }
+
+ // Normalize other comparisons to GT.
+ bool Swap = false, Invert = false;
+ switch (C.CCMask) {
+ case SystemZ::CCMASK_CMP_GT: break;
+ case SystemZ::CCMASK_CMP_LT: Swap = true; break;
+ case SystemZ::CCMASK_CMP_LE: Invert = true; break;
+ case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
+ default: llvm_unreachable("Invalid integer condition!");
+ }
+ if (Swap)
+ std::swap(C.Op0, C.Op1);
+
+ if (C.ICmpType == SystemZICMP::UnsignedOnly)
+ C.Opcode = SystemZISD::UCMP128HI;
+ else
+ C.Opcode = SystemZISD::SCMP128HI;
+ C.CCValid = SystemZ::CCMASK_ANY;
+ C.CCMask = SystemZ::CCMASK_1;
+
+ if (Invert)
+ C.CCMask ^= C.CCValid;
+}
+
// See whether the comparison argument contains a redundant AND
// and remove it if so. This sometimes happens due to the generic
// BRCOND expansion.
@@ -2836,7 +2969,7 @@ static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
if (C.Op0.getOpcode() != ISD::AND)
return;
auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
- if (!Mask)
+ if (!Mask || Mask->getValueSizeInBits(0) > 64)
return;
KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
@@ -2888,16 +3021,17 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
bool IsSignaling = false) {
if (CmpOp1.getOpcode() == ISD::Constant) {
assert(!Chain);
- uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
unsigned Opcode, CCValid;
if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
- return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
+ cast<ConstantSDNode>(CmpOp1)->getZExtValue(), Cond);
if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
- return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
+ cast<ConstantSDNode>(CmpOp1)->getZExtValue(), Cond);
}
Comparison C(CmpOp0, CmpOp1, Chain);
C.CCMask = CCMaskForCondCode(Cond);
@@ -2942,6 +3076,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
}
adjustForTestUnderMask(DAG, DL, C);
+ adjustICmp128(DAG, DL, C);
return C;
}
@@ -2969,6 +3104,11 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
}
+ if (C.Opcode == SystemZISD::VICMPES) {
+ SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
+ SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
+ return SDValue(Val.getNode(), 1);
+ }
if (C.Chain) {
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
@@ -3314,6 +3454,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
C.CCMask != SystemZ::CCMASK_CMP_EQ &&
C.CCMask != SystemZ::CCMASK_CMP_NE &&
C.Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
if (isAbsolute(C.Op0, TrueOp, FalseOp))
return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
@@ -3599,9 +3740,17 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
- // FIXME The frontend should detect this case.
if (Depth > 0) {
- report_fatal_error("Unsupported stack frame traversal count");
+ // FIXME The frontend should detect this case.
+ if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
+ report_fatal_error("Unsupported stack frame traversal count");
+
+ SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
+ while (Depth--) {
+ BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
+ MachinePointerInfo());
+ BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
+ }
}
return BackChain;
@@ -3620,9 +3769,19 @@ SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- // FIXME The frontend should detect this case.
if (Depth > 0) {
- report_fatal_error("Unsupported stack frame traversal count");
+ // FIXME The frontend should detect this case.
+ if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
+ report_fatal_error("Unsupported stack frame traversal count");
+
+ SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
+ auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
+ int Offset = (TFL->usePackedStack(MF) ? -2 : 14) *
+ getTargetMachine().getPointerSize(0);
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
+ DAG.getConstant(Offset, DL, PtrVT));
+ return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
+ MachinePointerInfo());
}
// Return R14D, which has the return address. Mark it an implicit live-in.
@@ -3838,7 +3997,7 @@ SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
MachineFunction &MF = DAG.getMachineFunction();
bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
- bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
+ bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
@@ -4079,6 +4238,29 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDLoc DL(N);
+
+ if (N->getValueType(0) == MVT::i128) {
+ unsigned BaseOp = 0;
+ unsigned FlagOp = 0;
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown instruction!");
+ case ISD::UADDO:
+ BaseOp = ISD::ADD;
+ FlagOp = SystemZISD::VACC;
+ break;
+ case ISD::USUBO:
+ BaseOp = ISD::SUB;
+ FlagOp = SystemZISD::VSCBI;
+ break;
+ }
+ SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
+ SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
+ Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
+ DAG.getValueType(MVT::i1));
+ Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
+ }
+
unsigned BaseOp = 0;
unsigned CCValid = 0;
unsigned CCMask = 0;
@@ -4144,6 +4326,30 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
SDValue RHS = N->getOperand(1);
SDValue Carry = Op.getOperand(2);
SDLoc DL(N);
+
+ if (VT == MVT::i128) {
+ unsigned BaseOp = 0;
+ unsigned FlagOp = 0;
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown instruction!");
+ case ISD::UADDO_CARRY:
+ BaseOp = SystemZISD::VAC;
+ FlagOp = SystemZISD::VACCC;
+ break;
+ case ISD::USUBO_CARRY:
+ BaseOp = SystemZISD::VSBI;
+ FlagOp = SystemZISD::VSBCBI;
+ break;
+ }
+ Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
+ SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
+ SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
+ Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
+ DAG.getValueType(MVT::i1));
+ Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
+ }
+
unsigned BaseOp = 0;
unsigned CCValid = 0;
unsigned CCMask = 0;
@@ -4189,6 +4395,15 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
SDLoc DL(Op);
Op = Op.getOperand(0);
+ if (VT.getScalarSizeInBits() == 128) {
+ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
+ Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
+ SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
+ DAG.getConstant(0, DL, MVT::i64));
+ Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
+ return Op;
+ }
+
// Handle vector types via VPOPCT.
if (VT.isVector()) {
Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
@@ -4282,6 +4497,12 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
+ if (Node->getMemoryVT() == MVT::i128) {
+ // Use same code to handle both legal and non-legal i128 types.
+ SmallVector<SDValue, 2> Results;
+ LowerOperationWrapper(Node, Results, DAG);
+ return DAG.getMergeValues(Results, SDLoc(Op));
+ }
return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
Node->getChain(), Node->getBasePtr(),
Node->getMemoryVT(), Node->getMemOperand());
@@ -4291,6 +4512,12 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
+ if (Node->getMemoryVT() == MVT::i128) {
+ // Use same code to handle both legal and non-legal i128 types.
+ SmallVector<SDValue, 1> Results;
+ LowerOperationWrapper(Node, Results, DAG);
+ return DAG.getMergeValues(Results, SDLoc(Op));
+ }
SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
Node->getBasePtr(), Node->getMemoryVT(),
Node->getMemOperand());
@@ -4302,6 +4529,31 @@ SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
return Chain;
}
+// Prepare for a Compare And Swap for a subword operation. This needs to be
+// done in memory with 4 bytes at natural alignment.
+static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL,
+ SDValue &AlignedAddr, SDValue &BitShift,
+ SDValue &NegBitShift) {
+ EVT PtrVT = Addr.getValueType();
+ EVT WideVT = MVT::i32;
+
+ // Get the address of the containing word.
+ AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
+ DAG.getConstant(-4, DL, PtrVT));
+
+ // Get the number of bits that the word must be rotated left in order
+ // to bring the field to the top bits of a GR32.
+ BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
+ DAG.getConstant(3, DL, PtrVT));
+ BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
+
+ // Get the complementing shift amount, for rotating a field in the top
+ // bits back to its proper position.
+ NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
+ DAG.getConstant(0, DL, WideVT), BitShift);
+
+}
+
// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
@@ -4309,7 +4561,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
unsigned Opcode) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
- // 32-bit operations need no code outside the main loop.
+ // 32-bit operations need no special handling.
EVT NarrowVT = Node->getMemoryVT();
EVT WideVT = MVT::i32;
if (NarrowVT == WideVT)
@@ -4321,7 +4573,6 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
SDValue Src2 = Node->getVal();
MachineMemOperand *MMO = Node->getMemOperand();
SDLoc DL(Node);
- EVT PtrVT = Addr.getValueType();
// Convert atomic subtracts of constants into additions.
if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
@@ -4330,20 +4581,8 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
}
- // Get the address of the containing word.
- SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
- DAG.getConstant(-4, DL, PtrVT));
-
- // Get the number of bits that the word must be rotated left in order
- // to bring the field to the top bits of a GR32.
- SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
- DAG.getConstant(3, DL, PtrVT));
- BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
-
- // Get the complementing shift amount, for rotating a field in the top
- // bits back to its proper position.
- SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
- DAG.getConstant(0, DL, WideVT), BitShift);
+ SDValue AlignedAddr, BitShift, NegBitShift;
+ getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
// Extend the source operand to 32 bits and prepare it for the inner loop.
// ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
@@ -4375,38 +4614,24 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
return DAG.getMergeValues(RetOps, DL);
}
-// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
-// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
-// operations into additions.
+// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
+// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
EVT MemVT = Node->getMemoryVT();
if (MemVT == MVT::i32 || MemVT == MVT::i64) {
- // A full-width operation.
+ // A full-width operation: negate and use LAA(G).
assert(Op.getValueType() == MemVT && "Mismatched VTs");
+ assert(Subtarget.hasInterlockedAccess1() &&
+ "Should have been expanded by AtomicExpand pass.");
SDValue Src2 = Node->getVal();
- SDValue NegSrc2;
SDLoc DL(Src2);
-
- if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
- // Use an addition if the operand is constant and either LAA(G) is
- // available or the negative value is in the range of A(G)FHI.
- int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
- if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
- NegSrc2 = DAG.getConstant(Value, DL, MemVT);
- } else if (Subtarget.hasInterlockedAccess1())
- // Use LAA(G) if available.
- NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
- Src2);
-
- if (NegSrc2.getNode())
- return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
- Node->getChain(), Node->getBasePtr(), NegSrc2,
- Node->getMemOperand());
-
- // Use the node as-is.
- return Op;
+ SDValue NegSrc2 =
+ DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
+ Node->getChain(), Node->getBasePtr(), NegSrc2,
+ Node->getMemOperand());
}
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
@@ -4423,6 +4648,13 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
MachineMemOperand *MMO = Node->getMemOperand();
SDLoc DL(Node);
+ if (Node->getMemoryVT() == MVT::i128) {
+ // Use same code to handle both legal and non-legal i128 types.
+ SmallVector<SDValue, 3> Results;
+ LowerOperationWrapper(Node, Results, DAG);
+ return DAG.getMergeValues(Results, DL);
+ }
+
// We have native support for 32-bit and 64-bit compare and swap, but we
// still need to expand extracting the "success" result from the CC.
EVT NarrowVT = Node->getMemoryVT();
@@ -4444,22 +4676,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
// Convert 8-bit and 16-bit compare and swap to a loop, implemented
// via a fullword ATOMIC_CMP_SWAPW operation.
int64_t BitSize = NarrowVT.getSizeInBits();
- EVT PtrVT = Addr.getValueType();
- // Get the address of the containing word.
- SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
- DAG.getConstant(-4, DL, PtrVT));
-
- // Get the number of bits that the word must be rotated left in order
- // to bring the field to the top bits of a GR32.
- SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
- DAG.getConstant(3, DL, PtrVT));
- BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
-
- // Get the complementing shift amount, for rotating a field in the top
- // bits back to its proper position.
- SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
- DAG.getConstant(0, DL, WideVT), BitShift);
+ SDValue AlignedAddr, BitShift, NegBitShift;
+ getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
// Construct the ATOMIC_CMP_SWAPW node.
SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
@@ -4515,7 +4734,7 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
auto *Regs = Subtarget.getSpecialRegisters();
- bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
+ bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
report_fatal_error("Variable-sized stack allocations are not supported "
@@ -4641,6 +4860,40 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::s390_vsumqg:
return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::s390_vaq:
+ return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::s390_vaccb:
+ case Intrinsic::s390_vacch:
+ case Intrinsic::s390_vaccf:
+ case Intrinsic::s390_vaccg:
+ case Intrinsic::s390_vaccq:
+ return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::s390_vacq:
+ return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::s390_vacccq:
+ return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::s390_vsq:
+ return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::s390_vscbib:
+ case Intrinsic::s390_vscbih:
+ case Intrinsic::s390_vscbif:
+ case Intrinsic::s390_vscbig:
+ case Intrinsic::s390_vscbiq:
+ return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::s390_vsbiq:
+ return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::s390_vsbcbiq:
+ return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
return SDValue();
@@ -5931,6 +6184,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
case ISD::SRA:
return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
+ case ISD::ROTL:
+ return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
case ISD::IS_FPCLASS:
return lowerIS_FPCLASS(Op, DAG);
case ISD::GET_ROUNDING:
@@ -5961,9 +6216,8 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
case ISD::ATOMIC_STORE: {
SDLoc DL(N);
SDVTList Tys = DAG.getVTList(MVT::Other);
- SDValue Ops[] = { N->getOperand(0),
- lowerI128ToGR128(DAG, N->getOperand(2)),
- N->getOperand(1) };
+ SDValue Ops[] = {N->getOperand(0), lowerI128ToGR128(DAG, N->getOperand(1)),
+ N->getOperand(2)};
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
DL, Tys, Ops, MVT::i128, MMO);
@@ -6096,7 +6350,14 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VSHL_BY_SCALAR);
OPCODE(VSRL_BY_SCALAR);
OPCODE(VSRA_BY_SCALAR);
+ OPCODE(VROTL_BY_SCALAR);
OPCODE(VSUM);
+ OPCODE(VACC);
+ OPCODE(VSCBI);
+ OPCODE(VAC);
+ OPCODE(VSBI);
+ OPCODE(VACCC);
+ OPCODE(VSBCBI);
OPCODE(VICMPE);
OPCODE(VICMPH);
OPCODE(VICMPHL);
@@ -6121,6 +6382,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VROUND);
OPCODE(STRICT_VROUND);
OPCODE(VTM);
+ OPCODE(SCMP128HI);
+ OPCODE(UCMP128HI);
OPCODE(VFAE_CC);
OPCODE(VFAEZ_CC);
OPCODE(VFEE_CC);
@@ -6431,6 +6694,71 @@ SDValue SystemZTargetLowering::combineLOAD(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT LdVT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Replace an i128 load that is used solely to move its value into GPRs
+ // by separate loads of both halves.
+ if (LdVT == MVT::i128) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (!LD->isSimple() || !ISD::isNormalLoad(LD))
+ return SDValue();
+
+ // Scan through all users.
+ SmallVector<std::pair<SDNode *, int>, 2> Users;
+ int UsedElements = 0;
+ for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+ // Skip the uses of the chain.
+ if (UI.getUse().getResNo() != 0)
+ continue;
+
+ // Verify every user is a TRUNCATE to i64 of the low or high half ...
+ SDNode *User = *UI;
+ int Index = 1;
+ if (User->getOpcode() == ISD::SRL &&
+ User->getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(User->getOperand(1))->getZExtValue() == 64 &&
+ User->hasOneUse()) {
+ User = *User->use_begin();
+ Index = 0;
+ }
+ if (User->getOpcode() != ISD::TRUNCATE ||
+ User->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ // ... and no half is extracted twice.
+ if (UsedElements & (1 << Index))
+ return SDValue();
+
+ UsedElements |= 1 << Index;
+ Users.push_back(std::make_pair(User, Index));
+ }
+
+ // Rewrite each extraction as an independent load.
+ SmallVector<SDValue, 2> ArgChains;
+ for (auto UserAndIndex : Users) {
+ SDNode *User = UserAndIndex.first;
+ unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second;
+ SDValue Ptr =
+ DAG.getMemBasePlusOffset(LD->getBasePtr(), TypeSize::getFixed(Offset), DL);
+ SDValue EltLoad =
+ DAG.getLoad(User->getValueType(0), DL, LD->getChain(), Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+
+ DCI.CombineTo(User, EltLoad, true);
+ ArgChains.push_back(EltLoad.getValue(1));
+ }
+
+ // Collect all chains via TokenFactor.
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ ArgChains);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ DCI.AddToWorklist(Chain.getNode());
+ return SDValue(N, 0);
+ }
+
if (LdVT.isVector() || LdVT.isInteger())
return SDValue();
// Transform a scalar load that is REPLICATEd as well as having other
@@ -6454,7 +6782,6 @@ SDValue SystemZTargetLowering::combineLOAD(
if (!Replicate || OtherUses.empty())
return SDValue();
- SDLoc DL(N);
SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
Replicate, DAG.getConstant(0, DL, MVT::i32));
// Update uses of the loaded Value while preserving old chains.
@@ -6471,7 +6798,7 @@ bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
return true;
if (Subtarget.hasVectorEnhancements2())
- if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
+ if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
return true;
return false;
}
@@ -6509,6 +6836,33 @@ static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
return true;
}
+static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
+ if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
+ return false;
+
+ SDValue Op0 = Val.getOperand(0);
+ SDValue Op1 = Val.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::SHL)
+ std::swap(Op0, Op1);
+ if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
+ Op1.getOperand(1).getOpcode() != ISD::Constant ||
+ cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue() != 64)
+ return false;
+ Op1 = Op1.getOperand(0);
+
+ if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
+ Op0.getOperand(0).getValueType() != MVT::i64)
+ return false;
+ if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
+ Op1.getOperand(0).getValueType() != MVT::i64)
+ return false;
+
+ LoPart = Op0.getOperand(0);
+ HiPart = Op1.getOperand(0);
+ return true;
+}
+
SDValue SystemZTargetLowering::combineSTORE(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6567,6 +6921,27 @@ SDValue SystemZTargetLowering::combineSTORE(
}
}
+ // Transform a store of an i128 moved from GPRs into two separate stores.
+ if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) {
+ SDValue LoPart, HiPart;
+ if (isMovedFromParts(Op1, LoPart, HiPart)) {
+ SDLoc DL(SN);
+ SDValue Chain0 =
+ DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
+ SN->getPointerInfo(), SN->getOriginalAlign(),
+ SN->getMemOperand()->getFlags(), SN->getAAInfo());
+ SDValue Chain1 =
+ DAG.getStore(SN->getChain(), DL, LoPart,
+ DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
+ TypeSize::getFixed(8)),
+ SN->getPointerInfo().getWithOffset(8),
+ SN->getOriginalAlign(),
+ SN->getMemOperand()->getFlags(), SN->getAAInfo());
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
+ }
+ }
+
// Replicate a reg or immediate with VREP instead of scalar multiply or
// immediate load. It seems best to do this during the first DAGCombine as
// it is straight-forward to handle the zero-extend node in the initial
@@ -7657,6 +8032,7 @@ static bool isSelectPseudo(MachineInstr &MI) {
switch (MI.getOpcode()) {
case SystemZ::Select32:
case SystemZ::Select64:
+ case SystemZ::Select128:
case SystemZ::SelectF32:
case SystemZ::SelectF64:
case SystemZ::SelectF128:
@@ -7901,20 +8277,80 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
return JoinMBB;
}
-// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
-// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
-// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
-// BitSize is the width of the field in bits, or 0 if this is a partword
-// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
-// is one of the operands. Invert says whether the field should be
-// inverted after performing BinOpcode (e.g. for NAND).
+// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
+MachineBasicBlock *
+SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ bool Unsigned) const {
+ MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Synthetic instruction to compare 128-bit values.
+ // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
+ Register Op0 = MI.getOperand(0).getReg();
+ Register Op1 = MI.getOperand(1).getReg();
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
+ MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ //
+ // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
+ // Swap the inputs to get:
+ // CC 1 if high(Op0) > high(Op1)
+ // CC 2 if high(Op0) < high(Op1)
+ // CC 0 if high(Op0) == high(Op1)
+ //
+ // If CC != 0, we'd done, so jump over the next instruction.
+ //
+ // VEC[L]G Op1, Op0
+ // JNE JoinMBB
+ // # fallthrough to HiEqMBB
+ MBB = StartMBB;
+ int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
+ BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
+ .addReg(Op1).addReg(Op0);
+ BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE).addMBB(JoinMBB);
+ MBB->addSuccessor(JoinMBB);
+ MBB->addSuccessor(HiEqMBB);
+
+ // HiEqMBB:
+ //
+ // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
+ // Since we already know the high parts are equal, the CC
+ // result will only depend on the low parts:
+ // CC 1 if low(Op0) > low(Op1)
+ // CC 3 if low(Op0) <= low(Op1)
+ //
+ // VCHLGS Tmp, Op0, Op1
+ // # fallthrough to JoinMBB
+ MBB = HiEqMBB;
+ Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
+ BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
+ .addReg(Op0).addReg(Op1);
+ MBB->addSuccessor(JoinMBB);
+
+ // Mark CC as live-in to JoinMBB.
+ JoinMBB->addLiveIn(SystemZ::CC);
+
+ MI.eraseFromParent();
+ return JoinMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
+// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
+// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
+// whether the field should be inverted after performing BinOpcode (e.g. for
+// NAND).
MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
- unsigned BitSize, bool Invert) const {
+ bool Invert) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- bool IsSubWord = (BitSize < 32);
// Extract the operands. Base can be a register or a frame index.
// Src2 can be a register or immediate.
@@ -7922,31 +8358,22 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
MachineOperand Base = earlyUseOperand(MI.getOperand(1));
int64_t Disp = MI.getOperand(2).getImm();
MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
- Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register();
- Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register();
+ Register BitShift = MI.getOperand(4).getReg();
+ Register NegBitShift = MI.getOperand(5).getReg();
+ unsigned BitSize = MI.getOperand(6).getImm();
DebugLoc DL = MI.getDebugLoc();
- if (IsSubWord)
- BitSize = MI.getOperand(6).getImm();
-
- // Subword operations use 32-bit registers.
- const TargetRegisterClass *RC = (BitSize <= 32 ?
- &SystemZ::GR32BitRegClass :
- &SystemZ::GR64BitRegClass);
- unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
- unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
// Get the right opcodes for the displacement.
- LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
- CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+ unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
+ unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
assert(LOpcode && CSOpcode && "Displacement out of range");
// Create virtual registers for temporary results.
- Register OrigVal = MRI.createVirtualRegister(RC);
- Register OldVal = MRI.createVirtualRegister(RC);
- Register NewVal = (BinOpcode || IsSubWord ?
- MRI.createVirtualRegister(RC) : Src2.getReg());
- Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
- Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+ Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
@@ -7973,39 +8400,28 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
.addReg(OrigVal).addMBB(StartMBB)
.addReg(Dest).addMBB(LoopMBB);
- if (IsSubWord)
- BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
- .addReg(OldVal).addReg(BitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+ .addReg(OldVal).addReg(BitShift).addImm(0);
if (Invert) {
// Perform the operation normally and then invert every bit of the field.
- Register Tmp = MRI.createVirtualRegister(RC);
+ Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
- if (BitSize <= 32)
- // XILF with the upper BitSize bits set.
- BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
- .addReg(Tmp).addImm(-1U << (32 - BitSize));
- else {
- // Use LCGR and add -1 to the result, which is more compact than
- // an XILF, XILH pair.
- Register Tmp2 = MRI.createVirtualRegister(RC);
- BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
- BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
- .addReg(Tmp2).addImm(-1);
- }
+ // XILF with the upper BitSize bits set.
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
+ .addReg(Tmp).addImm(-1U << (32 - BitSize));
} else if (BinOpcode)
// A simply binary operation.
BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
.addReg(RotatedOldVal)
.add(Src2);
- else if (IsSubWord)
+ else
// Use RISBG to rotate Src2 into position and use it to replace the
// field in RotatedOldVal.
BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
.addReg(RotatedOldVal).addReg(Src2.getReg())
.addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
- if (IsSubWord)
- BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
- .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+ .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal)
.addReg(NewVal)
@@ -8020,50 +8436,40 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
return DoneMBB;
}
-// Implement EmitInstrWithCustomInserter for pseudo
-// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
+// Implement EmitInstrWithCustomInserter for subword pseudo
+// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
// instruction that should be used to compare the current field with the
// minimum or maximum value. KeepOldMask is the BRC condition-code mask
-// for when the current field should be kept. BitSize is the width of
-// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
+// for when the current field should be kept.
MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
- unsigned KeepOldMask, unsigned BitSize) const {
+ unsigned KeepOldMask) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- bool IsSubWord = (BitSize < 32);
// Extract the operands. Base can be a register or a frame index.
Register Dest = MI.getOperand(0).getReg();
MachineOperand Base = earlyUseOperand(MI.getOperand(1));
int64_t Disp = MI.getOperand(2).getImm();
Register Src2 = MI.getOperand(3).getReg();
- Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register());
- Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register());
+ Register BitShift = MI.getOperand(4).getReg();
+ Register NegBitShift = MI.getOperand(5).getReg();
+ unsigned BitSize = MI.getOperand(6).getImm();
DebugLoc DL = MI.getDebugLoc();
- if (IsSubWord)
- BitSize = MI.getOperand(6).getImm();
-
- // Subword operations use 32-bit registers.
- const TargetRegisterClass *RC = (BitSize <= 32 ?
- &SystemZ::GR32BitRegClass :
- &SystemZ::GR64BitRegClass);
- unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
- unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
// Get the right opcodes for the displacement.
- LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
- CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+ unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
+ unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
assert(LOpcode && CSOpcode && "Displacement out of range");
// Create virtual registers for temporary results.
- Register OrigVal = MRI.createVirtualRegister(RC);
- Register OldVal = MRI.createVirtualRegister(RC);
- Register NewVal = MRI.createVirtualRegister(RC);
- Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
- Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
- Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+ Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
@@ -8089,9 +8495,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
.addReg(OrigVal).addMBB(StartMBB)
.addReg(Dest).addMBB(UpdateMBB);
- if (IsSubWord)
- BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
- .addReg(OldVal).addReg(BitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+ .addReg(OldVal).addReg(BitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CompareOpcode))
.addReg(RotatedOldVal).addReg(Src2);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
@@ -8103,10 +8508,9 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
// # fall through to UpdateMBB
MBB = UseAltMBB;
- if (IsSubWord)
- BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
- .addReg(RotatedOldVal).addReg(Src2)
- .addImm(32).addImm(31 + BitSize).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
+ .addReg(RotatedOldVal).addReg(Src2)
+ .addImm(32).addImm(31 + BitSize).addImm(0);
MBB->addSuccessor(UpdateMBB);
// UpdateMBB:
@@ -8120,9 +8524,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
.addReg(RotatedOldVal).addMBB(LoopMBB)
.addReg(RotatedAltVal).addMBB(UseAltMBB);
- if (IsSubWord)
- BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
- .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+ .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal)
.addReg(NewVal)
@@ -8137,7 +8540,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
return DoneMBB;
}
-// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
+// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
// instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
@@ -8901,6 +9304,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
switch (MI.getOpcode()) {
case SystemZ::Select32:
case SystemZ::Select64:
+ case SystemZ::Select128:
case SystemZ::SelectF32:
case SystemZ::SelectF64:
case SystemZ::SelectF128:
@@ -8946,6 +9350,11 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::CondStoreF64Inv:
return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
+ case SystemZ::SCmp128Hi:
+ return emitICmp128Hi(MI, MBB, false);
+ case SystemZ::UCmp128Hi:
+ return emitICmp128Hi(MI, MBB, true);
+
case SystemZ::PAIR128:
return emitPair128(MI, MBB);
case SystemZ::AEXT128:
@@ -8954,171 +9363,44 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
return emitExt128(MI, MBB, true);
case SystemZ::ATOMIC_SWAPW:
- return emitAtomicLoadBinary(MI, MBB, 0, 0);
- case SystemZ::ATOMIC_SWAP_32:
- return emitAtomicLoadBinary(MI, MBB, 0, 32);
- case SystemZ::ATOMIC_SWAP_64:
- return emitAtomicLoadBinary(MI, MBB, 0, 64);
+ return emitAtomicLoadBinary(MI, MBB, 0);
case SystemZ::ATOMIC_LOADW_AR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
case SystemZ::ATOMIC_LOADW_AFI:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
- case SystemZ::ATOMIC_LOAD_AR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
- case SystemZ::ATOMIC_LOAD_AHI:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
- case SystemZ::ATOMIC_LOAD_AFI:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
- case SystemZ::ATOMIC_LOAD_AGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
- case SystemZ::ATOMIC_LOAD_AGHI:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
- case SystemZ::ATOMIC_LOAD_AGFI:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
case SystemZ::ATOMIC_LOADW_SR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
- case SystemZ::ATOMIC_LOAD_SR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
- case SystemZ::ATOMIC_LOAD_SGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
case SystemZ::ATOMIC_LOADW_NR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
case SystemZ::ATOMIC_LOADW_NILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
- case SystemZ::ATOMIC_LOAD_NR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
- case SystemZ::ATOMIC_LOAD_NILL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
- case SystemZ::ATOMIC_LOAD_NILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
- case SystemZ::ATOMIC_LOAD_NILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
- case SystemZ::ATOMIC_LOAD_NGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
- case SystemZ::ATOMIC_LOAD_NILL64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
- case SystemZ::ATOMIC_LOAD_NILH64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
- case SystemZ::ATOMIC_LOAD_NIHL64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
- case SystemZ::ATOMIC_LOAD_NIHH64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
- case SystemZ::ATOMIC_LOAD_NILF64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
- case SystemZ::ATOMIC_LOAD_NIHF64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
case SystemZ::ATOMIC_LOADW_OR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
case SystemZ::ATOMIC_LOADW_OILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
- case SystemZ::ATOMIC_LOAD_OR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
- case SystemZ::ATOMIC_LOAD_OILL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
- case SystemZ::ATOMIC_LOAD_OILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
- case SystemZ::ATOMIC_LOAD_OILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
- case SystemZ::ATOMIC_LOAD_OGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
- case SystemZ::ATOMIC_LOAD_OILL64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
- case SystemZ::ATOMIC_LOAD_OILH64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
- case SystemZ::ATOMIC_LOAD_OIHL64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
- case SystemZ::ATOMIC_LOAD_OIHH64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
- case SystemZ::ATOMIC_LOAD_OILF64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
- case SystemZ::ATOMIC_LOAD_OIHF64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
case SystemZ::ATOMIC_LOADW_XR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
case SystemZ::ATOMIC_LOADW_XILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
- case SystemZ::ATOMIC_LOAD_XR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
- case SystemZ::ATOMIC_LOAD_XILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
- case SystemZ::ATOMIC_LOAD_XGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
- case SystemZ::ATOMIC_LOAD_XILF64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
- case SystemZ::ATOMIC_LOAD_XIHF64:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
case SystemZ::ATOMIC_LOADW_NRi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
case SystemZ::ATOMIC_LOADW_NILHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
- case SystemZ::ATOMIC_LOAD_NRi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
- case SystemZ::ATOMIC_LOAD_NILLi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
- case SystemZ::ATOMIC_LOAD_NILHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
- case SystemZ::ATOMIC_LOAD_NILFi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
- case SystemZ::ATOMIC_LOAD_NGRi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
- case SystemZ::ATOMIC_LOAD_NILL64i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
- case SystemZ::ATOMIC_LOAD_NILH64i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHL64i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHH64i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
- case SystemZ::ATOMIC_LOAD_NILF64i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHF64i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
case SystemZ::ATOMIC_LOADW_MIN:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
- SystemZ::CCMASK_CMP_LE, 0);
- case SystemZ::ATOMIC_LOAD_MIN_32:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
- SystemZ::CCMASK_CMP_LE, 32);
- case SystemZ::ATOMIC_LOAD_MIN_64:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
- SystemZ::CCMASK_CMP_LE, 64);
-
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
case SystemZ::ATOMIC_LOADW_MAX:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
- SystemZ::CCMASK_CMP_GE, 0);
- case SystemZ::ATOMIC_LOAD_MAX_32:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
- SystemZ::CCMASK_CMP_GE, 32);
- case SystemZ::ATOMIC_LOAD_MAX_64:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
- SystemZ::CCMASK_CMP_GE, 64);
-
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
case SystemZ::ATOMIC_LOADW_UMIN:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
- SystemZ::CCMASK_CMP_LE, 0);
- case SystemZ::ATOMIC_LOAD_UMIN_32:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
- SystemZ::CCMASK_CMP_LE, 32);
- case SystemZ::ATOMIC_LOAD_UMIN_64:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
- SystemZ::CCMASK_CMP_LE, 64);
-
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
case SystemZ::ATOMIC_LOADW_UMAX:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
- SystemZ::CCMASK_CMP_GE, 0);
- case SystemZ::ATOMIC_LOAD_UMAX_32:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
- SystemZ::CCMASK_CMP_GE, 32);
- case SystemZ::ATOMIC_LOAD_UMAX_64:
- return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
- SystemZ::CCMASK_CMP_GE, 64);
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
case SystemZ::ATOMIC_CMP_SWAPW:
return emitAtomicCmpSwapW(MI, MBB);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 47fa1831c3ee..6b3ce3f8c1d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -215,17 +215,25 @@ enum NodeType : unsigned {
UNPACK_LOW,
UNPACKL_LOW,
- // Shift each element of vector operand 0 by the number of bits specified
- // by scalar operand 1.
+ // Shift/rotate each element of vector operand 0 by the number of bits
+ // specified by scalar operand 1.
VSHL_BY_SCALAR,
VSRL_BY_SCALAR,
VSRA_BY_SCALAR,
+ VROTL_BY_SCALAR,
// For each element of the output type, sum across all sub-elements of
// operand 0 belonging to the corresponding element, and add in the
// rightmost sub-element of the corresponding element of operand 1.
VSUM,
+ // Compute carry/borrow indication for add/subtract.
+ VACC, VSCBI,
+ // Add/subtract with carry/borrow.
+ VAC, VSBI,
+ // Compute carry/borrow indication for add/subtract with carry/borrow.
+ VACCC, VSBCBI,
+
// Compare integer vector operands 0 and 1 to produce the usual 0/-1
// vector result. VICMPE is for equality, VICMPH for "signed greater than"
// and VICMPHL for "unsigned greater than".
@@ -264,6 +272,10 @@ enum NodeType : unsigned {
// AND the two vector operands together and set CC based on the result.
VTM,
+ // i128 high integer comparisons.
+ SCMP128HI,
+ UCMP128HI,
+
// String operations that set CC as a side-effect.
VFAE_CC,
VFAEZ_CC,
@@ -431,7 +443,17 @@ public:
return 1;
return TargetLowering::getNumRegisters(Context, VT);
}
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
+ EVT VT) const override {
+ // 128-bit single-element vector types are passed like other vectors,
+ // not like their element type.
+ if (VT.isVector() && VT.getSizeInBits() == 128 &&
+ VT.getVectorNumElements() == 1)
+ return MVT::v16i8;
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+ }
bool isCheapToSpeculateCtlz(Type *) const override { return true; }
+ bool isCheapToSpeculateCttz(Type *) const override { return true; }
bool preferZeroCompareBranch() const override { return true; }
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override {
ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
@@ -452,6 +474,8 @@ public:
return VT != MVT::f64;
}
bool hasInlineStackProbe(const MachineFunction &MF) const override;
+ AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -487,39 +511,39 @@ public:
TargetLowering::ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo &info,
const char *constraint) const override;
- void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+ InlineAsm::ConstraintCode
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode.size() == 1) {
switch(ConstraintCode[0]) {
default:
break;
case 'o':
- return InlineAsm::Constraint_o;
+ return InlineAsm::ConstraintCode::o;
case 'Q':
- return InlineAsm::Constraint_Q;
+ return InlineAsm::ConstraintCode::Q;
case 'R':
- return InlineAsm::Constraint_R;
+ return InlineAsm::ConstraintCode::R;
case 'S':
- return InlineAsm::Constraint_S;
+ return InlineAsm::ConstraintCode::S;
case 'T':
- return InlineAsm::Constraint_T;
+ return InlineAsm::ConstraintCode::T;
}
} else if (ConstraintCode.size() == 2 && ConstraintCode[0] == 'Z') {
switch (ConstraintCode[1]) {
default:
break;
case 'Q':
- return InlineAsm::Constraint_ZQ;
+ return InlineAsm::ConstraintCode::ZQ;
case 'R':
- return InlineAsm::Constraint_ZR;
+ return InlineAsm::ConstraintCode::ZR;
case 'S':
- return InlineAsm::Constraint_ZS;
+ return InlineAsm::ConstraintCode::ZS;
case 'T':
- return InlineAsm::Constraint_ZT;
+ return InlineAsm::ConstraintCode::ZT;
}
}
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
@@ -739,19 +763,20 @@ private:
MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const;
+ MachineBasicBlock *emitICmp128Hi(MachineInstr &MI, MachineBasicBlock *BB,
+ bool Unsigned) const;
MachineBasicBlock *emitPair128(MachineInstr &MI,
MachineBasicBlock *MBB) const;
MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB,
bool ClearEven) const;
MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI,
MachineBasicBlock *BB,
- unsigned BinOpcode, unsigned BitSize,
+ unsigned BinOpcode,
bool Invert = false) const;
MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr &MI,
MachineBasicBlock *MBB,
unsigned CompareOpcode,
- unsigned KeepOldMask,
- unsigned BitSize) const;
+ unsigned KeepOldMask) const;
MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *BB,
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index a25719f80ad0..2e5ff4a1df67 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -5327,30 +5327,6 @@ multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
}
}
-// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND
-// describe the second (non-memory) operand.
-class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
- dag pat, DAGOperand operand>
- : Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2),
- [(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> {
- let Defs = [CC];
- let Has20BitOffset = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let usesCustomInserter = 1;
- let hasNoSchedulingInfo = 1;
-}
-
-// Specializations of AtomicLoadWBinary.
-class AtomicLoadBinaryReg32<SDPatternOperator operator>
- : AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>;
-class AtomicLoadBinaryImm32<SDPatternOperator operator, ImmOpWithPattern imm>
- : AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>;
-class AtomicLoadBinaryReg64<SDPatternOperator operator>
- : AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>;
-class AtomicLoadBinaryImm64<SDPatternOperator operator, ImmOpWithPattern imm>
- : AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>;
-
// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND
// describe the second (non-memory) operand.
class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 87eb3838aec4..937e36057a6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1746,112 +1746,29 @@ let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
}
def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>;
-def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32<atomic_swap_32>;
-def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64<atomic_swap_64>;
def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg<z_atomic_loadw_add>;
def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>;
-let Predicates = [FeatureNoInterlockedAccess1] in {
- def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32<atomic_load_add_32>;
- def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>;
- def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>;
- def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64<atomic_load_add_64>;
- def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>;
- def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>;
-}
def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg<z_atomic_loadw_sub>;
-def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32<atomic_load_sub_32>;
-def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>;
def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
-let Predicates = [FeatureNoInterlockedAccess1] in {
- def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>;
- def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32,
- imm32ll16c>;
- def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32,
- imm32lh16c>;
- def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
- def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>;
- def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64,
- imm64ll16c>;
- def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64,
- imm64lh16c>;
- def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64,
- imm64hl16c>;
- def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64,
- imm64hh16c>;
- def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64,
- imm64lf32c>;
- def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64,
- imm64hf32c>;
-}
def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>;
def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>;
-let Predicates = [FeatureNoInterlockedAccess1] in {
- def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>;
- def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
- def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
- def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
- def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>;
- def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
- def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
- def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
- def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
- def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
- def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
-}
def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>;
def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>;
-let Predicates = [FeatureNoInterlockedAccess1] in {
- def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>;
- def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
- def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>;
- def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
- def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
-}
def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>;
def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand,
imm32lh16c>;
-def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>;
-def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm32<atomic_load_nand_32,
- imm32ll16c>;
-def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm32<atomic_load_nand_32,
- imm32lh16c>;
-def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
-def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>;
-def ATOMIC_LOAD_NILL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
- imm64ll16c>;
-def ATOMIC_LOAD_NILH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
- imm64lh16c>;
-def ATOMIC_LOAD_NIHL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
- imm64hl16c>;
-def ATOMIC_LOAD_NIHH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
- imm64hh16c>;
-def ATOMIC_LOAD_NILF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
- imm64lf32c>;
-def ATOMIC_LOAD_NIHF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
- imm64hf32c>;
def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>;
-def ATOMIC_LOAD_MIN_32 : AtomicLoadBinaryReg32<atomic_load_min_32>;
-def ATOMIC_LOAD_MIN_64 : AtomicLoadBinaryReg64<atomic_load_min_64>;
-
def ATOMIC_LOADW_MAX : AtomicLoadWBinaryReg<z_atomic_loadw_max>;
-def ATOMIC_LOAD_MAX_32 : AtomicLoadBinaryReg32<atomic_load_max_32>;
-def ATOMIC_LOAD_MAX_64 : AtomicLoadBinaryReg64<atomic_load_max_64>;
-
def ATOMIC_LOADW_UMIN : AtomicLoadWBinaryReg<z_atomic_loadw_umin>;
-def ATOMIC_LOAD_UMIN_32 : AtomicLoadBinaryReg32<atomic_load_umin_32>;
-def ATOMIC_LOAD_UMIN_64 : AtomicLoadBinaryReg64<atomic_load_umin_64>;
-
def ATOMIC_LOADW_UMAX : AtomicLoadWBinaryReg<z_atomic_loadw_umax>;
-def ATOMIC_LOAD_UMAX_32 : AtomicLoadBinaryReg32<atomic_load_umax_32>;
-def ATOMIC_LOAD_UMAX_64 : AtomicLoadBinaryReg64<atomic_load_umax_64>;
def ATOMIC_CMP_SWAPW
: Pseudo<(outs GR32:$dst), (ins bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
@@ -2346,6 +2263,10 @@ let isCodeGenOnly = 1, hasSideEffects = 1 in {
def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
(XGR GR64:$y, (NGR GR64:$y, GR64:$x))>;
+// Use LCGR/AGHI for i64 xor with -1.
+def : Pat<(xor GR64:$x, (i64 -1)),
+ (AGHI (LCGR GR64:$x), (i64 -1))>;
+
// Shift/rotate instructions only use the last 6 bits of the second operand
// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the
// last 16 bits.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 82863d7838a9..799b27d74414 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -266,7 +266,7 @@ let Predicates = [FeatureVectorEnhancements2] in {
def VLBRH : UnaryVRX<"vlbrh", 0xE606, z_loadbswap, v128h, 16, 1>;
def VLBRF : UnaryVRX<"vlbrf", 0xE606, z_loadbswap, v128f, 16, 2>;
def VLBRG : UnaryVRX<"vlbrg", 0xE606, z_loadbswap, v128g, 16, 3>;
- def VLBRQ : UnaryVRX<"vlbrq", 0xE606, null_frag, v128q, 16, 4>;
+ def VLBRQ : UnaryVRX<"vlbrq", 0xE606, z_loadbswap, v128q, 16, 4>;
// Load elements reversed.
def VLER : UnaryVRXGeneric<"vler", 0xE607>;
@@ -307,7 +307,7 @@ let Predicates = [FeatureVectorEnhancements2] in {
def VSTBRH : StoreVRX<"vstbrh", 0xE60E, z_storebswap, v128h, 16, 1>;
def VSTBRF : StoreVRX<"vstbrf", 0xE60E, z_storebswap, v128f, 16, 2>;
def VSTBRG : StoreVRX<"vstbrg", 0xE60E, z_storebswap, v128g, 16, 3>;
- def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, null_frag, v128q, 16, 4>;
+ def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, z_storebswap, v128q, 16, 4>;
// Store elements reversed.
def VSTER : StoreVRXGeneric<"vster", 0xE60F>;
@@ -478,26 +478,26 @@ let Predicates = [FeatureVector] in {
def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
- def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
+ def VAQ : BinaryVRRc<"vaq", 0xE7F3, add, v128q, v128q, 4>;
}
let isCommutable = 1 in {
// Add compute carry.
def VACC : BinaryVRRcGeneric<"vacc", 0xE7F1>;
- def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
- def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
- def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
- def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
- def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
+ def VACCB : BinaryVRRc<"vaccb", 0xE7F1, z_vacc, v128b, v128b, 0>;
+ def VACCH : BinaryVRRc<"vacch", 0xE7F1, z_vacc, v128h, v128h, 1>;
+ def VACCF : BinaryVRRc<"vaccf", 0xE7F1, z_vacc, v128f, v128f, 2>;
+ def VACCG : BinaryVRRc<"vaccg", 0xE7F1, z_vacc, v128g, v128g, 3>;
+ def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, z_vacc, v128q, v128q, 4>;
// Add with carry.
def VAC : TernaryVRRdGeneric<"vac", 0xE7BB>;
- def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
+ def VACQ : TernaryVRRd<"vacq", 0xE7BB, z_vac, v128q, v128q, 4>;
// Add with carry compute carry.
def VACCC : TernaryVRRdGeneric<"vaccc", 0xE7B9>;
- def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
- }
+ def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, z_vaccc, v128q, v128q, 4>;
+ }
// And.
let isCommutable = 1 in
@@ -732,21 +732,17 @@ let Predicates = [FeatureVector] in {
// Element rotate left logical (with vector shift amount).
def VERLLV : BinaryVRRcGeneric<"verllv", 0xE773>;
- def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb,
- v128b, v128b, 0>;
- def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh,
- v128h, v128h, 1>;
- def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf,
- v128f, v128f, 2>;
- def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg,
- v128g, v128g, 3>;
+ def VERLLVB : BinaryVRRc<"verllvb", 0xE773, rotl, v128b, v128b, 0>;
+ def VERLLVH : BinaryVRRc<"verllvh", 0xE773, rotl, v128h, v128h, 1>;
+ def VERLLVF : BinaryVRRc<"verllvf", 0xE773, rotl, v128f, v128f, 2>;
+ def VERLLVG : BinaryVRRc<"verllvg", 0xE773, rotl, v128g, v128g, 3>;
// Element rotate left logical (with scalar shift amount).
def VERLL : BinaryVRSaGeneric<"verll", 0xE733>;
- def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>;
- def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>;
- def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>;
- def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>;
+ def VERLLB : BinaryVRSa<"verllb", 0xE733, z_vrotl_by_scalar, v128b, v128b, 0>;
+ def VERLLH : BinaryVRSa<"verllh", 0xE733, z_vrotl_by_scalar, v128h, v128h, 1>;
+ def VERLLF : BinaryVRSa<"verllf", 0xE733, z_vrotl_by_scalar, v128f, v128f, 2>;
+ def VERLLG : BinaryVRSa<"verllg", 0xE733, z_vrotl_by_scalar, v128g, v128g, 3>;
// Element rotate and insert under mask.
def VERIM : QuaternaryVRIdGeneric<"verim", 0xE772>;
@@ -834,24 +830,23 @@ let Predicates = [FeatureVector] in {
def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>;
def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>;
def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>;
- def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>;
+ def VSQ : BinaryVRRc<"vsq", 0xE7F7, sub, v128q, v128q, 4>;
// Subtract compute borrow indication.
def VSCBI : BinaryVRRcGeneric<"vscbi", 0xE7F5>;
- def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>;
- def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>;
- def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>;
- def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>;
- def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>;
+ def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, z_vscbi, v128b, v128b, 0>;
+ def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, z_vscbi, v128h, v128h, 1>;
+ def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, z_vscbi, v128f, v128f, 2>;
+ def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, z_vscbi, v128g, v128g, 3>;
+ def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, z_vscbi, v128q, v128q, 4>;
// Subtract with borrow indication.
def VSBI : TernaryVRRdGeneric<"vsbi", 0xE7BF>;
- def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>;
+ def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, z_vsbi, v128q, v128q, 4>;
// Subtract with borrow compute borrow indication.
def VSBCBI : TernaryVRRdGeneric<"vsbcbi", 0xE7BD>;
- def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq,
- v128q, v128q, 4>;
+ def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, z_vsbcbi, v128q, v128q, 4>;
// Sum across doubleword.
def VSUMG : BinaryVRRcGeneric<"vsumg", 0xE765>;
@@ -870,34 +865,35 @@ let Predicates = [FeatureVector] in {
}
// Instantiate the bitwise ops for type TYPE.
-multiclass BitwiseVectorOps<ValueType type> {
+multiclass BitwiseVectorOps<ValueType type, SDPatternOperator not_op> {
let Predicates = [FeatureVector] in {
def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>;
- def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))),
+ def : Pat<(type (and VR128:$x, (not_op VR128:$y))),
(VNC VR128:$x, VR128:$y)>;
def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>;
def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>;
def : Pat<(type (or (and VR128:$x, VR128:$z),
- (and VR128:$y, (z_vnot VR128:$z)))),
+ (and VR128:$y, (not_op VR128:$z)))),
(VSEL VR128:$x, VR128:$y, VR128:$z)>;
- def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))),
+ def : Pat<(type (not_op (or VR128:$x, VR128:$y))),
(VNO VR128:$x, VR128:$y)>;
- def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>;
+ def : Pat<(type (not_op VR128:$x)), (VNO VR128:$x, VR128:$x)>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- def : Pat<(type (z_vnot (xor VR128:$x, VR128:$y))),
+ def : Pat<(type (not_op (xor VR128:$x, VR128:$y))),
(VNX VR128:$x, VR128:$y)>;
- def : Pat<(type (z_vnot (and VR128:$x, VR128:$y))),
+ def : Pat<(type (not_op (and VR128:$x, VR128:$y))),
(VNN VR128:$x, VR128:$y)>;
- def : Pat<(type (or VR128:$x, (z_vnot VR128:$y))),
+ def : Pat<(type (or VR128:$x, (not_op VR128:$y))),
(VOC VR128:$x, VR128:$y)>;
}
}
-defm : BitwiseVectorOps<v16i8>;
-defm : BitwiseVectorOps<v8i16>;
-defm : BitwiseVectorOps<v4i32>;
-defm : BitwiseVectorOps<v2i64>;
+defm : BitwiseVectorOps<v16i8, z_vnot>;
+defm : BitwiseVectorOps<v8i16, z_vnot>;
+defm : BitwiseVectorOps<v4i32, z_vnot>;
+defm : BitwiseVectorOps<v2i64, z_vnot>;
+defm : BitwiseVectorOps<i128, not>;
// Instantiate additional patterns for absolute-related expressions on
// type TYPE. LC is the negate instruction for TYPE and LP is the absolute
@@ -966,6 +962,26 @@ defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
+// Instantiate full-vector shifts.
+multiclass FullVectorShiftOps<SDPatternOperator shift,
+ Instruction sbit, Instruction sbyte> {
+ let Predicates = [FeatureVector] in {
+ def : Pat<(shift (i128 VR128:$x), imm32nobytes:$amt),
+ (sbit VR128:$x, (VREPIB (UIMM8 imm:$amt)))>;
+ def : Pat<(shift (i128 VR128:$x), imm32nobits:$amt),
+ (sbyte VR128:$x, (VREPIB (UIMM8 imm:$amt)))>;
+ def : Pat<(shift (i128 VR128:$x), imm32:$amt),
+ (sbit (sbyte VR128:$x, (VREPIB (UIMM8 imm:$amt))),
+ (VREPIB (UIMM8 imm:$amt)))>;
+ def : Pat<(shift (i128 VR128:$x), GR32:$amt),
+ (sbit (sbyte VR128:$x, (VREPB (VLVGP32 GR32:$amt, GR32:$amt), 15)),
+ (VREPB (VLVGP32 GR32:$amt, GR32:$amt), 15))>;
+ }
+}
+defm : FullVectorShiftOps<vshiftop<shl>, VSL, VSLB>;
+defm : FullVectorShiftOps<vshiftop<srl>, VSRL, VSRLB>;
+defm : FullVectorShiftOps<vshiftop<sra>, VSRA, VSRAB>;
+
//===----------------------------------------------------------------------===//
// Integer comparison
//===----------------------------------------------------------------------===//
@@ -1521,12 +1537,165 @@ let Predicates = [FeatureVector] in {
}
//===----------------------------------------------------------------------===//
+// Support for 128-bit integer values in vector registers
+//===----------------------------------------------------------------------===//
+
+// Loads and stores.
+let Predicates = [FeatureVector] in {
+ def : Pat<(i128 (load bdxaddr12only:$addr)),
+ (VL bdxaddr12only:$addr)>;
+ def : Pat<(store (i128 VR128:$src), bdxaddr12only:$addr),
+ (VST VR128:$src, bdxaddr12only:$addr)>;
+}
+
+// Full i128 move from GPR pair.
+let Predicates = [FeatureVector] in
+ def : Pat<(i128 (or (zext GR64:$x), (shl (anyext GR64:$y), (i32 64)))),
+ (VLVGP GR64:$y, GR64:$x)>;
+
+// Any-extensions from GPR to i128.
+let Predicates = [FeatureVector] in {
+ def : Pat<(i128 (anyext GR32:$x)), (VLVGP32 GR32:$x, GR32:$x)>;
+ def : Pat<(i128 (anyext GR64:$x)), (VLVGP GR64:$x, GR64:$x)>;
+}
+
+// Any-extending loads into i128.
+let Predicates = [FeatureVector] in {
+ def : Pat<(i128 (extloadi8 bdxaddr12only:$addr)),
+ (VLREPB bdxaddr12only:$addr)>;
+ def : Pat<(i128 (extloadi16 bdxaddr12only:$addr)),
+ (VLREPH bdxaddr12only:$addr)>;
+ def : Pat<(i128 (extloadi32 bdxaddr12only:$addr)),
+ (VLREPF bdxaddr12only:$addr)>;
+ def : Pat<(i128 (extloadi64 bdxaddr12only:$addr)),
+ (VLREPG bdxaddr12only:$addr)>;
+}
+
+// Truncations from i128 to GPR.
+let Predicates = [FeatureVector] in {
+ def : Pat<(i32 (trunc (i128 VR128:$vec))),
+ (EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 3), subreg_l32)>;
+ def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 32)))),
+ (EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 2), subreg_l32)>;
+ def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 64)))),
+ (EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 1), subreg_l32)>;
+ def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 96)))),
+ (EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 0), subreg_l32)>;
+ def : Pat<(i64 (trunc (i128 VR128:$vec))),
+ (VLGVG VR128:$vec, zero_reg, 1)>;
+ def : Pat<(i64 (trunc (srl (i128 VR128:$vec), (i32 64)))),
+ (VLGVG VR128:$vec, zero_reg, 0)>;
+}
+
+// Truncating stores from i128.
+let Predicates = [FeatureVector] in {
+ def : Pat<(truncstorei8 (i128 VR128:$x), bdxaddr12only:$addr),
+ (VSTEB VR128:$x, bdxaddr12only:$addr, 15)>;
+ def : Pat<(truncstorei16 (i128 VR128:$x), bdxaddr12only:$addr),
+ (VSTEH VR128:$x, bdxaddr12only:$addr, 7)>;
+ def : Pat<(truncstorei32 (i128 VR128:$x), bdxaddr12only:$addr),
+ (VSTEF VR128:$x, bdxaddr12only:$addr, 3)>;
+ def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 32)), bdxaddr12only:$addr),
+ (VSTEF VR128:$x, bdxaddr12only:$addr, 2)>;
+ def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 64)), bdxaddr12only:$addr),
+ (VSTEF VR128:$x, bdxaddr12only:$addr, 1)>;
+ def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 96)), bdxaddr12only:$addr),
+ (VSTEF VR128:$x, bdxaddr12only:$addr, 0)>;
+ def : Pat<(truncstorei64 (i128 VR128:$x), bdxaddr12only:$addr),
+ (VSTEG VR128:$x, bdxaddr12only:$addr, 1)>;
+ def : Pat<(truncstorei64 (srl (i128 VR128:$x), (i32 64)), bdxaddr12only:$addr),
+ (VSTEG VR128:$x, bdxaddr12only:$addr, 0)>;
+}
+
+// Zero-extensions from GPR to i128.
+let Predicates = [FeatureVector] in {
+ def : Pat<(i128 (zext8 (anyext GR32:$x))),
+ (VLVGB (VGBM 0), GR32:$x, zero_reg, 15)>;
+ def : Pat<(i128 (zext16 (anyext GR32:$x))),
+ (VLVGH (VGBM 0), GR32:$x, zero_reg, 7)>;
+ def : Pat<(i128 (zext GR32:$x)),
+ (VLVGF (VGBM 0), GR32:$x, zero_reg, 3)>;
+ def : Pat<(i128 (zext GR64:$x)),
+ (VLVGG (VGBM 0), GR64:$x, zero_reg, 1)>;
+}
+
+// Zero-extending loads into i128.
+let Predicates = [FeatureVector] in {
+ def : Pat<(i128 (zextloadi8 bdxaddr12only:$addr)),
+ (VLEB (VGBM 0), bdxaddr12only:$addr, 15)>;
+ def : Pat<(i128 (zextloadi16 bdxaddr12only:$addr)),
+ (VLEH (VGBM 0), bdxaddr12only:$addr, 7)>;
+ def : Pat<(i128 (zextloadi32 bdxaddr12only:$addr)),
+ (VLEF (VGBM 0), bdxaddr12only:$addr, 3)>;
+ def : Pat<(i128 (zextloadi64 bdxaddr12only:$addr)),
+ (VLEG (VGBM 0), bdxaddr12only:$addr, 1)>;
+}
+
+// In-register i128 sign-extensions.
+let Predicates = [FeatureVector] in {
+ def : Pat<(i128 (sext_inreg VR128:$x, i8)),
+ (VSRAB (VREPB VR128:$x, 15), (VREPIB 120))>;
+ def : Pat<(i128 (sext_inreg VR128:$x, i16)),
+ (VSRAB (VREPH VR128:$x, 7), (VREPIB 112))>;
+ def : Pat<(i128 (sext_inreg VR128:$x, i32)),
+ (VSRAB (VREPF VR128:$x, 3), (VREPIB 96))>;
+ def : Pat<(i128 (sext_inreg VR128:$x, i64)),
+ (VSRAB (VREPG VR128:$x, 1), (VREPIB 64))>;
+}
+
+// Sign-extensions from GPR to i128.
+let Predicates = [FeatureVector] in {
+ def : Pat<(i128 (sext_inreg (anyext GR32:$x), i8)),
+ (VLVGP (SRAG (LGBR (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GR32:$x, subreg_l32)), zero_reg, 63),
+ (LGBR (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GR32:$x, subreg_l32)))>;
+ def : Pat<(i128 (sext_inreg (anyext GR32:$x), i16)),
+ (VLVGP (SRAG (LGHR (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GR32:$x, subreg_l32)), zero_reg, 63),
+ (LGHR (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GR32:$x, subreg_l32)))>;
+ def : Pat<(i128 (sext GR32:$x)),
+ (VLVGP (SRAG (LGFR GR32:$x), zero_reg, 63), (LGFR GR32:$x))>;
+ def : Pat<(i128 (sext GR64:$x)),
+ (VLVGP (SRAG GR64:$x, zero_reg, 63), GR64:$x)>;
+}
+
+// Sign-extending loads into i128.
+let Predicates = [FeatureVector] in {
+ def : Pat<(i128 (sextloadi8 bdxaddr12only:$addr)),
+ (VSRAB (VLREPB bdxaddr12only:$addr), (VREPIB 120))>;
+ def : Pat<(i128 (sextloadi16 bdxaddr12only:$addr)),
+ (VSRAB (VLREPH bdxaddr12only:$addr), (VREPIB 112))>;
+ def : Pat<(i128 (sextloadi32 bdxaddr12only:$addr)),
+ (VSRAB (VLREPF bdxaddr12only:$addr), (VREPIB 96))>;
+ def : Pat<(i128 (sextloadi64 bdxaddr12only:$addr)),
+ (VSRAB (VLREPG bdxaddr12only:$addr), (VREPIB 64))>;
+}
+
+// i128 comparison pseudo-instructions.
+let Predicates = [FeatureVector], Defs = [CC],
+ usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+ def SCmp128Hi : Pseudo<(outs), (ins VR128:$src1, VR128:$src2),
+ [(set CC, (z_scmp128hi (i128 VR128:$src1),
+ (i128 VR128:$src2)))]>;
+ def UCmp128Hi : Pseudo<(outs), (ins VR128:$src1, VR128:$src2),
+ [(set CC, (z_ucmp128hi (i128 VR128:$src1),
+ (i128 VR128:$src2)))]>;
+}
+
+// i128 select pseudo-instructions.
+let Predicates = [FeatureVector] in
+ def Select128 : SelectWrapper<i128, VR128>;
+
+//===----------------------------------------------------------------------===//
// Conversions
//===----------------------------------------------------------------------===//
def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (i128 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>;
@@ -1534,6 +1703,7 @@ def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (i128 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>;
@@ -1541,6 +1711,7 @@ def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (i128 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>;
@@ -1548,6 +1719,7 @@ def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (i128 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>;
@@ -1555,6 +1727,7 @@ def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (i128 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>;
@@ -1562,6 +1735,7 @@ def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (i128 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>;
@@ -1570,9 +1744,18 @@ def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>;
def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>;
def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>;
def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (i128 VR128:$src))), (f128 VR128:$src)>;
def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>;
def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(i128 (bitconvert (v16i8 VR128:$src))), (i128 VR128:$src)>;
+def : Pat<(i128 (bitconvert (v8i16 VR128:$src))), (i128 VR128:$src)>;
+def : Pat<(i128 (bitconvert (v4i32 VR128:$src))), (i128 VR128:$src)>;
+def : Pat<(i128 (bitconvert (v2i64 VR128:$src))), (i128 VR128:$src)>;
+def : Pat<(i128 (bitconvert (v4f32 VR128:$src))), (i128 VR128:$src)>;
+def : Pat<(i128 (bitconvert (v2f64 VR128:$src))), (i128 VR128:$src)>;
+def : Pat<(i128 (bitconvert (f128 VR128:$src))), (i128 VR128:$src)>;
+
//===----------------------------------------------------------------------===//
// Replicating scalars
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index be7012a37a3d..5411b94129a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -17,10 +17,10 @@ namespace SystemZ {
// A struct to hold the low and high GPR registers to be saved/restored as
// well as the offset into the register save area of the low register.
struct GPRRegs {
- unsigned LowGPR;
- unsigned HighGPR;
- unsigned GPROffset;
- GPRRegs() : LowGPR(0), HighGPR(0), GPROffset(0) {}
+ unsigned LowGPR = 0;
+ unsigned HighGPR = 0;
+ unsigned GPROffset = 0;
+ GPRRegs() = default;
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td
index c92e0abe38ac..0221e2c53f2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -314,70 +314,72 @@ def U48Imm : ImmediateAsmOperand<"U48Imm">;
// Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being zero.
defm imm32ll16 : Immediate<i32, [{
- return SystemZ::isImmLL(N->getZExtValue());
+ return N->getAPIntValue().isIntN(32) && SystemZ::isImmLL(N->getZExtValue());
}], LL16, "U16Imm">;
defm imm32lh16 : Immediate<i32, [{
- return SystemZ::isImmLH(N->getZExtValue());
+ return N->getAPIntValue().isIntN(32) && SystemZ::isImmLH(N->getZExtValue());
}], LH16, "U16Imm">;
// Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being one.
defm imm32ll16c : Immediate<i32, [{
- return SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
+ return N->getAPIntValue().isIntN(32) &&
+ SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
}], LL16, "U16Imm">;
defm imm32lh16c : Immediate<i32, [{
- return SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
+ return N->getAPIntValue().isIntN(32) &&
+ SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
}], LH16, "U16Imm">;
// Short immediates
defm imm32zx1 : Immediate<i32, [{
- return isUInt<1>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(1);
}], NOOP_SDNodeXForm, "U1Imm">;
defm imm32zx2 : Immediate<i32, [{
- return isUInt<2>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(2);
}], NOOP_SDNodeXForm, "U2Imm">;
defm imm32zx3 : Immediate<i32, [{
- return isUInt<3>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(3);
}], NOOP_SDNodeXForm, "U3Imm">;
defm imm32zx4 : Immediate<i32, [{
- return isUInt<4>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(4);
}], NOOP_SDNodeXForm, "U4Imm">;
// Note: this enforces an even value during code generation only.
// When used from the assembler, any 4-bit value is allowed.
defm imm32zx4even : Immediate<i32, [{
- return isUInt<4>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(4);
}], UIMM8EVEN, "U4Imm">;
defm imm32sx8 : Immediate<i32, [{
- return isInt<8>(N->getSExtValue());
+ return N->getAPIntValue().isSignedIntN(8);
}], SIMM8, "S8Imm">;
defm imm32zx8 : Immediate<i32, [{
- return isUInt<8>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(8);
}], UIMM8, "U8Imm">;
defm imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
defm imm32zx12 : Immediate<i32, [{
- return isUInt<12>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(12);
}], UIMM12, "U12Imm">;
defm imm32sx16 : Immediate<i32, [{
- return isInt<16>(N->getSExtValue());
+ return N->getAPIntValue().isSignedIntN(16);
}], SIMM16, "S16Imm">;
defm imm32sx16n : Immediate<i32, [{
- return isInt<16>(-N->getSExtValue());
+ return (-N->getAPIntValue()).isSignedIntN(16);
}], NEGSIMM16, "S16Imm">;
defm imm32zx16 : Immediate<i32, [{
- return isUInt<16>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(16);
}], UIMM16, "U16Imm">;
defm imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
@@ -390,7 +392,8 @@ defm simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
defm uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
defm simm32n : Immediate<i32, [{
- return isInt<32>(-N->getSExtValue());
+ auto SImm = N->getAPIntValue().trySExtValue();
+ return SImm.has_value() && isInt<32>(-*SImm);
}], NEGSIMM32, "S32Imm">;
def imm32 : ImmLeaf<i32, [{}]>;
@@ -402,107 +405,115 @@ def imm32 : ImmLeaf<i32, [{}]>;
// Immediates for 16-bit chunks of an i64, with the other bits of the
// i32 being zero.
defm imm64ll16 : Immediate<i64, [{
- return SystemZ::isImmLL(N->getZExtValue());
+ return N->getAPIntValue().isIntN(64) && SystemZ::isImmLL(N->getZExtValue());
}], LL16, "U16Imm">;
defm imm64lh16 : Immediate<i64, [{
- return SystemZ::isImmLH(N->getZExtValue());
+ return N->getAPIntValue().isIntN(64) && SystemZ::isImmLH(N->getZExtValue());
}], LH16, "U16Imm">;
defm imm64hl16 : Immediate<i64, [{
- return SystemZ::isImmHL(N->getZExtValue());
+ return N->getAPIntValue().isIntN(64) && SystemZ::isImmHL(N->getZExtValue());
}], HL16, "U16Imm">;
defm imm64hh16 : Immediate<i64, [{
- return SystemZ::isImmHH(N->getZExtValue());
+ return N->getAPIntValue().isIntN(64) && SystemZ::isImmHH(N->getZExtValue());
}], HH16, "U16Imm">;
// Immediates for 16-bit chunks of an i64, with the other bits of the
// i32 being one.
defm imm64ll16c : Immediate<i64, [{
- return SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
+ return N->getAPIntValue().isIntN(64) &&
+ SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
}], LL16, "U16Imm">;
defm imm64lh16c : Immediate<i64, [{
- return SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
+ return N->getAPIntValue().isIntN(64) &&
+ SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
}], LH16, "U16Imm">;
defm imm64hl16c : Immediate<i64, [{
- return SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
+ return N->getAPIntValue().isIntN(64) &&
+ SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
}], HL16, "U16Imm">;
defm imm64hh16c : Immediate<i64, [{
- return SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
+ return N->getAPIntValue().isIntN(64) &&
+ SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
}], HH16, "U16Imm">;
// Immediates for the lower and upper 32 bits of an i64, with the other
// bits of the i32 being zero.
defm imm64lf32 : Immediate<i64, [{
- return SystemZ::isImmLF(N->getZExtValue());
+ return N->getAPIntValue().isIntN(64) && SystemZ::isImmLF(N->getZExtValue());
}], LF32, "U32Imm">;
defm imm64hf32 : Immediate<i64, [{
- return SystemZ::isImmHF(N->getZExtValue());
+ return N->getAPIntValue().isIntN(64) && SystemZ::isImmHF(N->getZExtValue());
}], HF32, "U32Imm">;
// Immediates for the lower and upper 32 bits of an i64, with the other
// bits of the i32 being one.
defm imm64lf32c : Immediate<i64, [{
- return SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
+ return N->getAPIntValue().isIntN(64) &&
+ SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
}], LF32, "U32Imm">;
defm imm64hf32c : Immediate<i64, [{
- return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
+ return N->getAPIntValue().isIntN(64) &&
+ SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
}], HF32, "U32Imm">;
// Negated immediates that fit LF32 or LH16.
defm imm64lh16n : Immediate<i64, [{
- return SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
+ return N->getAPIntValue().isIntN(64) &&
+ SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
}], NEGLH16, "U16Imm">;
defm imm64lf32n : Immediate<i64, [{
- return SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
+ return N->getAPIntValue().isIntN(64) &&
+ SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
}], NEGLF32, "U32Imm">;
// Short immediates.
defm imm64sx8 : Immediate<i64, [{
- return isInt<8>(N->getSExtValue());
+ return N->getAPIntValue().isSignedIntN(8);
}], SIMM8, "S8Imm">;
defm imm64zx8 : Immediate<i64, [{
- return isUInt<8>(N->getSExtValue());
+ return N->getAPIntValue().isIntN(8);;
}], UIMM8, "U8Imm">;
defm imm64sx16 : Immediate<i64, [{
- return isInt<16>(N->getSExtValue());
+ return N->getAPIntValue().isSignedIntN(16);
}], SIMM16, "S16Imm">;
defm imm64sx16n : Immediate<i64, [{
- return isInt<16>(-N->getSExtValue());
+ return (-N->getAPIntValue()).isSignedIntN(16);
}], NEGSIMM16, "S16Imm">;
defm imm64zx16 : Immediate<i64, [{
- return isUInt<16>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(16);
}], UIMM16, "U16Imm">;
defm imm64sx32 : Immediate<i64, [{
- return isInt<32>(N->getSExtValue());
+ return N->getAPIntValue().isSignedIntN(32);
}], SIMM32, "S32Imm">;
defm imm64sx32n : Immediate<i64, [{
- return isInt<32>(-N->getSExtValue());
+ return (-N->getAPIntValue()).isSignedIntN(32);
}], NEGSIMM32, "S32Imm">;
defm imm64zx32 : Immediate<i64, [{
- return isUInt<32>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(32);
}], UIMM32, "U32Imm">;
defm imm64zx32n : Immediate<i64, [{
- return isUInt<32>(-N->getSExtValue());
+ return (-N->getAPIntValue()).isIntN(32);
}], NEGUIMM32, "U32Imm">;
defm imm64zx48 : Immediate<i64, [{
- return isUInt<64>(N->getZExtValue());
+ return N->getAPIntValue().isIntN(64);
}], UIMM48, "U48Imm">;
class Imm64 : ImmLeaf<i64, [{}]>, Operand<i64> {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
index 6713cac2a780..af6cf340f8a3 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -59,6 +59,15 @@ def SDT_ZBinaryWithCarry : SDTypeProfile<2, 3,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisVT<1, i32>]>;
+def SDT_ZBinaryConv : SDTypeProfile<1, 2,
+ [SDTCisInt<0>,
+ SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_ZTernary : SDTypeProfile<1, 3,
+ [SDTCisInt<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>,
@@ -283,6 +292,12 @@ def z_uaddo : SDNode<"SystemZISD::UADDO", SDT_ZBinaryWithFlags>;
def z_usubo : SDNode<"SystemZISD::USUBO", SDT_ZBinaryWithFlags>;
def z_addcarry_1 : SDNode<"SystemZISD::ADDCARRY", SDT_ZBinaryWithCarry>;
def z_subcarry_1 : SDNode<"SystemZISD::SUBCARRY", SDT_ZBinaryWithCarry>;
+def z_vacc : SDNode<"SystemZISD::VACC", SDTIntBinOp>;
+def z_vac : SDNode<"SystemZISD::VAC", SDT_ZTernary>;
+def z_vaccc : SDNode<"SystemZISD::VACCC", SDT_ZTernary>;
+def z_vscbi : SDNode<"SystemZISD::VSCBI", SDTIntBinOp>;
+def z_vsbi : SDNode<"SystemZISD::VSBI", SDT_ZTernary>;
+def z_vsbcbi : SDNode<"SystemZISD::VSBCBI", SDT_ZTernary>;
def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -324,7 +339,9 @@ def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR",
SDT_ZVecBinaryInt>;
def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR",
SDT_ZVecBinaryInt>;
-def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
+def z_vrotl_by_scalar : SDNode<"SystemZISD::VROTL_BY_SCALAR",
+ SDT_ZVecBinaryInt>;
+def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZBinaryConv>;
def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
@@ -356,6 +373,8 @@ def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND",
SDT_ZVecUnaryConv, [SDNPHasChain]>;
def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>;
+def z_scmp128hi : SDNode<"SystemZISD::SCMP128HI", SDT_ZCmp>;
+def z_ucmp128hi : SDNode<"SystemZISD::UCMP128HI", SDT_ZCmp>;
def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>;
def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>;
def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinaryCC>;
@@ -755,10 +774,27 @@ class shiftop<SDPatternOperator operator>
[(operator node:$val, node:$count),
(operator node:$val, (and node:$count, imm32bottom6set))]>;
+// Create a shift operator that optionally ignores an AND of the
+// shift count with an immediate if the bottom 7 bits are all set.
+def imm32bottom7set : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() & 0x7f) == 0x7f;
+}]>;
+class vshiftop<SDPatternOperator operator>
+ : PatFrags<(ops node:$val, node:$count),
+ [(operator node:$val, node:$count),
+ (operator node:$val, (and node:$count, imm32bottom7set))]>;
+
def imm32mod64 : PatLeaf<(i32 imm), [{
return (N->getZExtValue() % 64 == 0);
}]>;
+def imm32nobits : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() & 0x07) == 0;
+}]>;
+def imm32nobytes : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() & 0x78) == 0;
+}]>;
+
// Load a scalar and replicate it in all elements of a vector.
class z_replicate_load<ValueType scalartype, SDPatternOperator load>
: PatFrag<(ops node:$addr),
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 4d6b94da3a27..d5313acd8785 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -377,12 +377,12 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
- const TargetRegisterClass *SrcRC,
- unsigned SubReg,
- const TargetRegisterClass *DstRC,
- unsigned DstSubReg,
- const TargetRegisterClass *NewRC,
- LiveIntervals &LIS) const {
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const {
assert (MI->isCopy() && "Only expecting COPY instructions");
// Coalesce anything which is not a COPY involving a subreg to/from GR128.
@@ -390,44 +390,26 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
(getRegSizeInBits(*SrcRC) <= 64 || getRegSizeInBits(*DstRC) <= 64)))
return true;
- // Allow coalescing of a GR128 subreg COPY only if the live ranges are small
- // and local to one MBB with not too much interferring registers. Otherwise
+ // Allow coalescing of a GR128 subreg COPY only if the subreg liverange is
+ // local to one MBB with not too many interferring physreg clobbers. Otherwise
// regalloc may run out of registers.
+ unsigned SubregOpIdx = getRegSizeInBits(*SrcRC) == 128 ? 0 : 1;
+ LiveInterval &LI = LIS.getInterval(MI->getOperand(SubregOpIdx).getReg());
- unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0);
- Register GR128Reg = MI->getOperand(WideOpNo).getReg();
- Register GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg();
- LiveInterval &IntGR128 = LIS.getInterval(GR128Reg);
- LiveInterval &IntGRNar = LIS.getInterval(GRNarReg);
-
- // Check that the two virtual registers are local to MBB.
+ // Check that the subreg is local to MBB.
MachineBasicBlock *MBB = MI->getParent();
- MachineInstr *FirstMI_GR128 =
- LIS.getInstructionFromIndex(IntGR128.beginIndex());
- MachineInstr *FirstMI_GRNar =
- LIS.getInstructionFromIndex(IntGRNar.beginIndex());
- MachineInstr *LastMI_GR128 = LIS.getInstructionFromIndex(IntGR128.endIndex());
- MachineInstr *LastMI_GRNar = LIS.getInstructionFromIndex(IntGRNar.endIndex());
- if ((!FirstMI_GR128 || FirstMI_GR128->getParent() != MBB) ||
- (!FirstMI_GRNar || FirstMI_GRNar->getParent() != MBB) ||
- (!LastMI_GR128 || LastMI_GR128->getParent() != MBB) ||
- (!LastMI_GRNar || LastMI_GRNar->getParent() != MBB))
+ MachineInstr *FirstMI = LIS.getInstructionFromIndex(LI.beginIndex());
+ MachineInstr *LastMI = LIS.getInstructionFromIndex(LI.endIndex());
+ if (!FirstMI || FirstMI->getParent() != MBB ||
+ !LastMI || LastMI->getParent() != MBB)
return false;
- MachineBasicBlock::iterator MII = nullptr, MEE = nullptr;
- if (WideOpNo == 1) {
- MII = FirstMI_GR128;
- MEE = LastMI_GRNar;
- } else {
- MII = FirstMI_GRNar;
- MEE = LastMI_GR128;
- }
-
// Check if coalescing seems safe by finding the set of clobbered physreg
// pairs in the region.
BitVector PhysClobbered(getNumRegs());
- MEE++;
- for (; MII != MEE; ++MII) {
+ for (MachineBasicBlock::iterator MII = FirstMI,
+ MEE = std::next(LastMI->getIterator());
+ MII != MEE; ++MII)
for (const MachineOperand &MO : MII->operands())
if (MO.isReg() && MO.getReg().isPhysical()) {
for (MCPhysReg SI : superregs_inclusive(MO.getReg()))
@@ -436,7 +418,6 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
break;
}
}
- }
// Demand an arbitrary margin of free regs.
unsigned const DemandedFreeGR128 = 3;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 5d66501172b2..8f9bb56f2eb3 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -124,7 +124,7 @@ defm GRX32 : SystemZRegClass<"GRX32", [i32], 32,
R12L,R12H,R13L,R13H,R14L,R14H,R15L,R15H)
]>;
-// The architecture doesn't really have any i128 support, so model the
+// On machines without SIMD support, i128 is not a legal type, so model the
// register pairs as untyped instead.
// XPLINK64: Allocate all registers in natural order
defm GR128 : SystemZRegClass<"GR128", [untyped], 128,
@@ -285,7 +285,8 @@ defm VF128 : SystemZRegClass<"VF128",
// All vector registers.
defm VR128 : SystemZRegClass<"VR128",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
+ [v16i8, v8i16, v4i32, v2i64, i128,
+ v4f32, v2f64, f128],
128, (add (sequence "V%u", 0, 7),
(sequence "V%u", 16, 31),
(sequence "V%u", 8, 15))>;
@@ -305,7 +306,7 @@ def v128b : TypedReg<v16i8, VR128>;
def v128h : TypedReg<v8i16, VR128>;
def v128f : TypedReg<v4i32, VR128>;
def v128g : TypedReg<v2i64, VR128>;
-def v128q : TypedReg<v16i8, VR128>;
+def v128q : TypedReg<i128, VR128>;
def v128sb : TypedReg<v4f32, VR128>;
def v128db : TypedReg<v2f64, VR128>;
def v128xb : TypedReg<f128, VR128>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
index fd01a8a941c9..fe84ca45374b 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -92,7 +92,7 @@ let NumMicroOps = 0 in {
def : WriteRes<VecMul, [Z13_VecUnit]>;
def : WriteRes<VecStr, [Z13_VecUnit]>;
def : WriteRes<VecXsPm, [Z13_VecUnit]>;
- foreach Num = 2-5 in { let ResourceCycles = [Num] in {
+ foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in {
def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z13_FXaUnit]>;
def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z13_FXbUnit]>;
def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z13_LSUnit]>;
@@ -104,7 +104,7 @@ let NumMicroOps = 0 in {
def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z13_VecUnit]>;
}}
- def : WriteRes<VecFPd, [Z13_VecFPdUnit]> { let ResourceCycles = [30]; }
+ def : WriteRes<VecFPd, [Z13_VecFPdUnit]> { let ReleaseAtCycles = [30]; }
def : WriteRes<VBU, [Z13_VBUnit]>; // Virtual Branching Unit
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index 3f406736a71f..d5eab33dbb4a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -92,7 +92,7 @@ let NumMicroOps = 0 in {
def : WriteRes<VecMul, [Z14_VecUnit]>;
def : WriteRes<VecStr, [Z14_VecUnit]>;
def : WriteRes<VecXsPm, [Z14_VecUnit]>;
- foreach Num = 2-5 in { let ResourceCycles = [Num] in {
+ foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in {
def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z14_FXaUnit]>;
def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z14_FXbUnit]>;
def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z14_LSUnit]>;
@@ -104,7 +104,7 @@ let NumMicroOps = 0 in {
def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z14_VecUnit]>;
}}
- def : WriteRes<VecFPd, [Z14_VecFPdUnit]> { let ResourceCycles = [30]; }
+ def : WriteRes<VecFPd, [Z14_VecFPdUnit]> { let ReleaseAtCycles = [30]; }
def : WriteRes<VBU, [Z14_VBUnit]>; // Virtual Branching Unit
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
index 6ae911c3f3eb..09b505fc149e 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
@@ -92,7 +92,7 @@ let NumMicroOps = 0 in {
def : WriteRes<VecMul, [Z15_VecUnit]>;
def : WriteRes<VecStr, [Z15_VecUnit]>;
def : WriteRes<VecXsPm, [Z15_VecUnit]>;
- foreach Num = 2-5 in { let ResourceCycles = [Num] in {
+ foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in {
def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z15_FXaUnit]>;
def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z15_FXbUnit]>;
def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z15_LSUnit]>;
@@ -104,7 +104,7 @@ let NumMicroOps = 0 in {
def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z15_VecUnit]>;
}}
- def : WriteRes<VecFPd, [Z15_VecFPdUnit]> { let ResourceCycles = [30]; }
+ def : WriteRes<VecFPd, [Z15_VecFPdUnit]> { let ReleaseAtCycles = [30]; }
def : WriteRes<VBU, [Z15_VBUnit]>; // Virtual Branching Unit
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
index ca688671a7e2..695604af1b88 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
@@ -92,7 +92,7 @@ let NumMicroOps = 0 in {
def : WriteRes<VecMul, [Z16_VecUnit]>;
def : WriteRes<VecStr, [Z16_VecUnit]>;
def : WriteRes<VecXsPm, [Z16_VecUnit]>;
- foreach Num = 2-5 in { let ResourceCycles = [Num] in {
+ foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in {
def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z16_FXaUnit]>;
def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z16_FXbUnit]>;
def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z16_LSUnit]>;
@@ -104,8 +104,8 @@ let NumMicroOps = 0 in {
def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z16_VecUnit]>;
}}
- def : WriteRes<VecFPd, [Z16_VecFPdUnit]> { let ResourceCycles = [30]; }
- def : WriteRes<VecFPd20, [Z16_VecFPdUnit]> { let ResourceCycles = [20]; }
+ def : WriteRes<VecFPd, [Z16_VecFPdUnit]> { let ReleaseAtCycles = [30]; }
+ def : WriteRes<VecFPd20, [Z16_VecFPdUnit]> { let ReleaseAtCycles = [20]; }
def : WriteRes<VBU, [Z16_VBUnit]>; // Virtual Branching Unit
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
index 173cf960d2bd..98ea7d094906 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -82,7 +82,7 @@ let NumMicroOps = 0 in {
def : WriteRes<LSU, [Z196_LSUnit]>;
def : WriteRes<FPU, [Z196_FPUnit]>;
def : WriteRes<DFU, [Z196_DFUnit]>;
- foreach Num = 2-6 in { let ResourceCycles = [Num] in {
+ foreach Num = 2-6 in { let ReleaseAtCycles = [Num] in {
def : WriteRes<!cast<SchedWrite>("FXU"#Num), [Z196_FXUnit]>;
def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z196_LSUnit]>;
def : WriteRes<!cast<SchedWrite>("FPU"#Num), [Z196_FPUnit]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
index d2060471d65e..bd0bc480a008 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -83,7 +83,7 @@ let NumMicroOps = 0 in {
def : WriteRes<LSU, [ZEC12_LSUnit]>;
def : WriteRes<FPU, [ZEC12_FPUnit]>;
def : WriteRes<DFU, [ZEC12_DFUnit]>;
- foreach Num = 2-6 in { let ResourceCycles = [Num] in {
+ foreach Num = 2-6 in { let ReleaseAtCycles = [Num] in {
def : WriteRes<!cast<SchedWrite>("FXU"#Num), [ZEC12_FXUnit]>;
def : WriteRes<!cast<SchedWrite>("LSU"#Num), [ZEC12_LSUnit]>;
def : WriteRes<!cast<SchedWrite>("FPU"#Num), [ZEC12_FPUnit]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index e008ce859a9a..491bff7f3c30 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -59,10 +59,8 @@ SystemZSubtarget::initializeSpecialRegisters() {
return new SystemZXPLINK64Registers;
else if (isTargetELF())
return new SystemZELFRegisters;
- else {
- llvm_unreachable("Invalid Calling Convention. Cannot initialize Special "
- "Call Registers!");
- }
+ llvm_unreachable("Invalid Calling Convention. Cannot initialize Special "
+ "Call Registers!");
}
SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 787c51645de1..73e01e3ec184 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -13,8 +13,6 @@
#include "SystemZMachineScheduler.h"
#include "SystemZTargetTransformInfo.h"
#include "TargetInfo/SystemZTargetInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -25,11 +23,13 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Scalar.h"
+#include <memory>
#include <optional>
#include <string>
using namespace llvm;
+// NOLINTNEXTLINE(readability-identifier-naming)
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
// Register the target.
RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget());
@@ -143,7 +143,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(
T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
@@ -169,12 +169,14 @@ SystemZTargetMachine::getSubtargetImpl(const Function &F) const {
FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
// FIXME: This is related to the code below to reset the target options,
- // we need to know whether or not the soft float flag is set on the
- // function, so we can enable it as a subtarget feature.
- bool softFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
-
- if (softFloat)
+ // we need to know whether the soft float and backchain flags are set on the
+ // function, so we can enable them as subtarget features.
+ bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
+ if (SoftFloat)
FS += FS.empty() ? "+soft-float" : ",+soft-float";
+ bool BackChain = F.hasFnAttribute("backchain");
+ if (BackChain)
+ FS += FS.empty() ? "+backchain" : ",+backchain";
auto &I = SubtargetMap[CPU + TuneCPU + FS];
if (!I) {
@@ -221,18 +223,20 @@ public:
} // end anonymous namespace
void SystemZPassConfig::addIRPasses() {
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addPass(createSystemZTDCPass());
addPass(createLoopDataPrefetchPass());
}
+ addPass(createAtomicExpandPass());
+
TargetPassConfig::addIRPasses();
}
bool SystemZPassConfig::addInstSelector() {
addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createSystemZLDCleanupPass(getSystemZTargetMachine()));
return false;
@@ -254,12 +258,12 @@ void SystemZPassConfig::addPostRewrite() {
void SystemZPassConfig::addPostRegAlloc() {
// PostRewrite needs to be run at -O0 also (in which case addPostRewrite()
// is not called).
- if (getOptLevel() == CodeGenOpt::None)
+ if (getOptLevel() == CodeGenOptLevel::None)
addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
}
void SystemZPassConfig::addPreSched2() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(&IfConverterID);
}
@@ -267,7 +271,7 @@ void SystemZPassConfig::addPreEmitPass() {
// Do instruction shortening before compare elimination because some
// vector instructions will be shortened into opcodes that compare
// elimination recognizes.
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createSystemZShortenInstPass(getSystemZTargetMachine()));
// We eliminate comparisons here rather than earlier because some
@@ -293,14 +297,14 @@ void SystemZPassConfig::addPreEmitPass() {
// Doing it so late makes it more likely that a register will be reused
// between the comparison and the branch, but it isn't clear whether
// preventing that would be a win or not.
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createSystemZElimComparePass(getSystemZTargetMachine()));
addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
// Do final scheduling after all other optimizations, to get an
// optimal input for the decoder (branch relaxation must happen
// after block placement).
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(&PostMachineSchedulerID);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index 20d68fff170a..75e5d68e74ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -33,7 +33,7 @@ public:
SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~SystemZTargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
index 884082cfa196..4c7a6ca38643 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
@@ -12,6 +12,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
+#include <map>
+#include <utility>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index abac7a9bfe0a..1f97e0f761c0 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -598,7 +598,7 @@ InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
- Kind = improveShuffleKindFromMask(Kind, Mask);
+ Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
if (ST->hasVector()) {
unsigned NumVectors = getNumVectorRegs(Tp);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index acfafd91bc17..91e4c91b00b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -16,6 +16,7 @@ Target &llvm::getTheSystemZTarget() {
return TheSystemZTarget;
}
+// NOLINTNEXTLINE(readability-identifier-naming)
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetInfo() {
RegisterTarget<Triple::systemz, /*HasJIT=*/true> X(
getTheSystemZTarget(), "systemz", "SystemZ", "SystemZ");
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/ZOSLibcallNames.def b/contrib/llvm-project/llvm/lib/Target/SystemZ/ZOSLibcallNames.def
new file mode 100644
index 000000000000..12a01522a7e6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/ZOSLibcallNames.def
@@ -0,0 +1,100 @@
+//===-- ZOSLibcallNames.def ----------------------------------- -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the runtime library functions on z/OS which can be
+// generated during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(HANDLE_LIBCALL)
+#error "HANDLE_LIBCALL must be defined"
+#endif
+
+HANDLE_LIBCALL(TRUNC_F64, "@@TRNC@B")
+HANDLE_LIBCALL(TRUNC_F32, "@@FTRC@B")
+HANDLE_LIBCALL(TRUNC_F128, "@@LTRC@B")
+HANDLE_LIBCALL(SQRT_F64, "@@WSQT@B")
+HANDLE_LIBCALL(SQRT_F32, "@@FSQT@B")
+HANDLE_LIBCALL(SQRT_F128, "@@LSQT@B")
+HANDLE_LIBCALL(SIN_F64, "@@SSIN@B")
+HANDLE_LIBCALL(SIN_F32, "@@FSIN@B")
+HANDLE_LIBCALL(SIN_F128, "@@LSIN@B")
+HANDLE_LIBCALL(ROUND_F64, "@@ROUN@B")
+HANDLE_LIBCALL(ROUND_F32, "@@ROUNFB")
+HANDLE_LIBCALL(ROUND_F128, "@@ROUNLB")
+HANDLE_LIBCALL(RINT_F64, "@@SRNT@B")
+HANDLE_LIBCALL(RINT_F32, "@@RINTFB")
+HANDLE_LIBCALL(RINT_F128, "@@RINTLB")
+HANDLE_LIBCALL(REM_F64, "@@WFMD@B")
+HANDLE_LIBCALL(REM_F32, "@@FFMD@B")
+HANDLE_LIBCALL(REM_F128, "@@LFMD@B")
+HANDLE_LIBCALL(POW_F64, "@@WPOW@B")
+HANDLE_LIBCALL(POW_F32, "@@FPOW@B")
+HANDLE_LIBCALL(POW_F128, "@@LPOW@B")
+HANDLE_LIBCALL(NEARBYINT_F64, "@@NBYI@B")
+HANDLE_LIBCALL(NEARBYINT_F32, "@@NBYIFB")
+HANDLE_LIBCALL(NEARBYINT_F128, "@@NBYILB")
+HANDLE_LIBCALL(LROUND_F64, "@@ROND@B")
+HANDLE_LIBCALL(LROUND_F32, "@@FRND@B")
+HANDLE_LIBCALL(LROUND_F128, "@@LRND@B")
+HANDLE_LIBCALL(LRINT_F64, "@@LRNT@B")
+HANDLE_LIBCALL(LRINT_F32, "@@LRNTFB")
+HANDLE_LIBCALL(LRINT_F128, "@@LRNTLB")
+HANDLE_LIBCALL(LOG_F64, "@@WLOG@B")
+HANDLE_LIBCALL(LOG_F32, "@@FLOG@B")
+HANDLE_LIBCALL(LOG_F128, "@@LLOG@B")
+HANDLE_LIBCALL(LOG2_F64, "@@LOG2@B")
+HANDLE_LIBCALL(LOG2_F32, "@@FLG2@B")
+HANDLE_LIBCALL(LOG2_F128, "@@LLG2@B")
+HANDLE_LIBCALL(LOG10_F64, "@@WLG1@B")
+HANDLE_LIBCALL(LOG10_F32, "@@FLG1@B")
+HANDLE_LIBCALL(LOG10_F128, "@@LLG1@B")
+HANDLE_LIBCALL(LLROUND_F64, "@@LLRD@B")
+HANDLE_LIBCALL(LLROUND_F32, "@@LLRDFB")
+HANDLE_LIBCALL(LLROUND_F128, "@@LLRDLB")
+HANDLE_LIBCALL(LLRINT_F64, "@@LLRT@B")
+HANDLE_LIBCALL(LLRINT_F32, "@@LLRTFB")
+HANDLE_LIBCALL(LLRINT_F128, "@@LLRTLB")
+HANDLE_LIBCALL(LDEXP_F64, "@@SLXP@B")
+HANDLE_LIBCALL(LDEXP_F32, "@@FLXP@B")
+HANDLE_LIBCALL(LDEXP_F128, "@@LLXP@B")
+HANDLE_LIBCALL(FREXP_F64, "@@SFXP@B")
+HANDLE_LIBCALL(FREXP_F32, "@@FFXP@B")
+HANDLE_LIBCALL(FREXP_F128, "@@LFXP@B")
+HANDLE_LIBCALL(FMIN_F64, "@@FMIN@B")
+HANDLE_LIBCALL(FMIN_F32, "@@FMINFB")
+HANDLE_LIBCALL(FMIN_F128, "@@FMINLB")
+HANDLE_LIBCALL(FMA_F64, "@@FMA@B")
+HANDLE_LIBCALL(FMA_F32, "@@FMAFB")
+HANDLE_LIBCALL(FMA_F128, "@@FMALB")
+HANDLE_LIBCALL(FMAX_F64, "@@FMAX@B")
+HANDLE_LIBCALL(FMAX_F32, "@@FMAXFB")
+HANDLE_LIBCALL(FMAX_F128, "@@FMAXLB")
+HANDLE_LIBCALL(FLOOR_F64, "@@SFLR@B")
+HANDLE_LIBCALL(FLOOR_F32, "@@FFLR@B")
+HANDLE_LIBCALL(FLOOR_F128, "@@LFLR@B")
+HANDLE_LIBCALL(EXP_F64, "@@WEXP@B")
+HANDLE_LIBCALL(EXP_F32, "@@FEXP@B")
+HANDLE_LIBCALL(EXP_F128, "@@LEXP@B")
+HANDLE_LIBCALL(EXP2_F64, "@@EXP2@B")
+HANDLE_LIBCALL(EXP2_F32, "@@FXP2@B")
+HANDLE_LIBCALL(EXP2_F128, "@@LXP2@B")
+HANDLE_LIBCALL(COS_F64, "@@SCOS@B")
+HANDLE_LIBCALL(COS_F32, "@@FCOS@B")
+HANDLE_LIBCALL(COS_F128, "@@LCOS@B")
+HANDLE_LIBCALL(COPYSIGN_F64, "@@DCPY@B")
+HANDLE_LIBCALL(COPYSIGN_F32, "@@FCPY@B")
+HANDLE_LIBCALL(COPYSIGN_F128, "@@LCPY@B")
+HANDLE_LIBCALL(CEIL_F64, "@@SCEL@B")
+HANDLE_LIBCALL(CEIL_F32, "@@FCEL@B")
+HANDLE_LIBCALL(CEIL_F128, "@@LCEL@B")
+HANDLE_LIBCALL(CBRT_F64, "@@SCRT@B")
+HANDLE_LIBCALL(CBRT_F32, "@@FCBT@B")
+HANDLE_LIBCALL(CBRT_F128, "@@LCBT@B")
+
+#undef HANDLE_LIBCALL
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
index bc465168f1db..3f96bd37755e 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
@@ -35,18 +35,64 @@ TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
: TheTarget(T), DL(DataLayoutString), TargetTriple(TT),
TargetCPU(std::string(CPU)), TargetFS(std::string(FS)), AsmInfo(nullptr),
MRI(nullptr), MII(nullptr), STI(nullptr), RequireStructuredCFG(false),
- O0WantsFastISel(false), DefaultOptions(Options), Options(Options) {}
+ O0WantsFastISel(false), Options(Options) {}
TargetMachine::~TargetMachine() = default;
-bool TargetMachine::isLargeData() const {
+bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const {
if (getTargetTriple().getArch() != Triple::x86_64)
return false;
- // Large data under the large code model still needs to be thought about, so
- // restrict this to medium.
- if (getCodeModel() != CodeModel::Medium)
+
+ auto *GO = GVal->getAliaseeObject();
+
+ // Be conservative if we can't find an underlying GlobalObject.
+ if (!GO)
+ return true;
+
+ auto *GV = dyn_cast<GlobalVariable>(GO);
+
+ // Functions/GlobalIFuncs are only large under the large code model.
+ if (!GV)
+ return getCodeModel() == CodeModel::Large;
+
+ if (GV->isThreadLocal())
return false;
- return true;
+
+ // We should properly mark well-known section name prefixes as small/large,
+ // because otherwise the output section may have the wrong section flags and
+ // the linker will lay it out in an unexpected way.
+ // TODO: bring back lbss/ldata/lrodata checks after fixing accesses to large
+ // globals in the small code model.
+ StringRef Name = GV->getSection();
+ if (!Name.empty()) {
+ auto IsPrefix = [&](StringRef Prefix) {
+ StringRef S = Name;
+ return S.consume_front(Prefix) && (S.empty() || S[0] == '.');
+ };
+ if (IsPrefix(".bss") || IsPrefix(".data") || IsPrefix(".rodata"))
+ return false;
+ }
+
+ // For x86-64, we treat an explicit GlobalVariable small code model to mean
+ // that the global should be placed in a small section, and ditto for large.
+ // Well-known section names above take precedence for correctness.
+ if (auto CM = GV->getCodeModel()) {
+ if (*CM == CodeModel::Small)
+ return false;
+ if (*CM == CodeModel::Large)
+ return true;
+ }
+
+ if (getCodeModel() == CodeModel::Medium ||
+ getCodeModel() == CodeModel::Large) {
+ if (!GV->getValueType()->isSized())
+ return true;
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeSizeInBits(GV->getValueType()) / 8;
+ return Size == 0 || Size > LargeDataThreshold;
+ }
+
+ return false;
}
bool TargetMachine::isPositionIndependent() const {
@@ -78,6 +124,20 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
/// and dynamic-no-pic.
Reloc::Model TargetMachine::getRelocationModel() const { return RM; }
+uint64_t TargetMachine::getMaxCodeSize() const {
+ switch (getCodeModel()) {
+ case CodeModel::Tiny:
+ return llvm::maxUIntN(10);
+ case CodeModel::Small:
+ case CodeModel::Kernel:
+ case CodeModel::Medium:
+ return llvm::maxUIntN(31);
+ case CodeModel::Large:
+ return llvm::maxUIntN(64);
+ }
+ llvm_unreachable("Unhandled CodeModel enum");
+}
+
/// Get the IR-specified TLS model for Var.
static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) {
switch (GV->getThreadLocalMode()) {
@@ -184,9 +244,9 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
}
/// Returns the optimization level: None, Less, Default, or Aggressive.
-CodeGenOpt::Level TargetMachine::getOptLevel() const { return OptLevel; }
+CodeGenOptLevel TargetMachine::getOptLevel() const { return OptLevel; }
-void TargetMachine::setOptLevel(CodeGenOpt::Level Level) { OptLevel = Level; }
+void TargetMachine::setOptLevel(CodeGenOptLevel Level) { OptLevel = Level; }
TargetTransformInfo
TargetMachine::getTargetTransformInfo(const Function &F) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp
index 7cd29b40da12..80024f9a6d5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/CodeGenCWrappers.h"
@@ -28,6 +29,24 @@
using namespace llvm;
+namespace llvm {
+
+/// Options for LLVMCreateTargetMachine().
+struct LLVMTargetMachineOptions {
+ std::string CPU;
+ std::string Features;
+ std::string ABI;
+ CodeGenOptLevel OL = CodeGenOptLevel::Default;
+ std::optional<Reloc::Model> RM;
+ std::optional<CodeModel::Model> CM;
+ bool JIT;
+};
+
+} // namespace llvm
+
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLVMTargetMachineOptions,
+ LLVMTargetMachineOptionsRef)
+
static TargetMachine *unwrap(LLVMTargetMachineRef P) {
return reinterpret_cast<TargetMachine *>(P);
}
@@ -96,56 +115,114 @@ LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) {
return unwrap(T)->hasMCAsmBackend();
}
-LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
- const char *Triple, const char *CPU, const char *Features,
- LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
- LLVMCodeModel CodeModel) {
- std::optional<Reloc::Model> RM;
- switch (Reloc){
- case LLVMRelocStatic:
- RM = Reloc::Static;
- break;
- case LLVMRelocPIC:
- RM = Reloc::PIC_;
- break;
- case LLVMRelocDynamicNoPic:
- RM = Reloc::DynamicNoPIC;
- break;
- case LLVMRelocROPI:
- RM = Reloc::ROPI;
- break;
- case LLVMRelocRWPI:
- RM = Reloc::RWPI;
- break;
- case LLVMRelocROPI_RWPI:
- RM = Reloc::ROPI_RWPI;
- break;
- default:
- break;
- }
+LLVMTargetMachineOptionsRef LLVMCreateTargetMachineOptions(void) {
+ return wrap(new LLVMTargetMachineOptions());
+}
- bool JIT;
- std::optional<CodeModel::Model> CM = unwrap(CodeModel, JIT);
+void LLVMDisposeTargetMachineOptions(LLVMTargetMachineOptionsRef Options) {
+ delete unwrap(Options);
+}
+
+void LLVMTargetMachineOptionsSetCPU(LLVMTargetMachineOptionsRef Options,
+ const char *CPU) {
+ unwrap(Options)->CPU = CPU;
+}
+
+void LLVMTargetMachineOptionsSetFeatures(LLVMTargetMachineOptionsRef Options,
+ const char *Features) {
+ unwrap(Options)->Features = Features;
+}
+
+void LLVMTargetMachineOptionsSetABI(LLVMTargetMachineOptionsRef Options,
+ const char *ABI) {
+ unwrap(Options)->ABI = ABI;
+}
+
+void LLVMTargetMachineOptionsSetCodeGenOptLevel(
+ LLVMTargetMachineOptionsRef Options, LLVMCodeGenOptLevel Level) {
+ CodeGenOptLevel OL;
- CodeGenOpt::Level OL;
switch (Level) {
- case LLVMCodeGenLevelNone:
- OL = CodeGenOpt::None;
- break;
- case LLVMCodeGenLevelLess:
- OL = CodeGenOpt::Less;
- break;
- case LLVMCodeGenLevelAggressive:
- OL = CodeGenOpt::Aggressive;
- break;
- default:
- OL = CodeGenOpt::Default;
- break;
+ case LLVMCodeGenLevelNone:
+ OL = CodeGenOptLevel::None;
+ break;
+ case LLVMCodeGenLevelLess:
+ OL = CodeGenOptLevel::Less;
+ break;
+ case LLVMCodeGenLevelAggressive:
+ OL = CodeGenOptLevel::Aggressive;
+ break;
+ case LLVMCodeGenLevelDefault:
+ OL = CodeGenOptLevel::Default;
+ break;
+ }
+
+ unwrap(Options)->OL = OL;
+}
+
+void LLVMTargetMachineOptionsSetRelocMode(LLVMTargetMachineOptionsRef Options,
+ LLVMRelocMode Reloc) {
+ std::optional<Reloc::Model> RM;
+
+ switch (Reloc) {
+ case LLVMRelocStatic:
+ RM = Reloc::Static;
+ break;
+ case LLVMRelocPIC:
+ RM = Reloc::PIC_;
+ break;
+ case LLVMRelocDynamicNoPic:
+ RM = Reloc::DynamicNoPIC;
+ break;
+ case LLVMRelocROPI:
+ RM = Reloc::ROPI;
+ break;
+ case LLVMRelocRWPI:
+ RM = Reloc::RWPI;
+ break;
+ case LLVMRelocROPI_RWPI:
+ RM = Reloc::ROPI_RWPI;
+ break;
+ case LLVMRelocDefault:
+ break;
}
- TargetOptions opt;
- return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM, CM,
- OL, JIT));
+ unwrap(Options)->RM = RM;
+}
+
+void LLVMTargetMachineOptionsSetCodeModel(LLVMTargetMachineOptionsRef Options,
+ LLVMCodeModel CodeModel) {
+ auto CM = unwrap(CodeModel, unwrap(Options)->JIT);
+ unwrap(Options)->CM = CM;
+}
+
+LLVMTargetMachineRef
+LLVMCreateTargetMachineWithOptions(LLVMTargetRef T, const char *Triple,
+ LLVMTargetMachineOptionsRef Options) {
+ auto *Opt = unwrap(Options);
+ TargetOptions TO;
+ TO.MCOptions.ABIName = Opt->ABI;
+ return wrap(unwrap(T)->createTargetMachine(Triple, Opt->CPU, Opt->Features,
+ TO, Opt->RM, Opt->CM, Opt->OL,
+ Opt->JIT));
+}
+
+LLVMTargetMachineRef
+LLVMCreateTargetMachine(LLVMTargetRef T, const char *Triple, const char *CPU,
+ const char *Features, LLVMCodeGenOptLevel Level,
+ LLVMRelocMode Reloc, LLVMCodeModel CodeModel) {
+ auto *Options = LLVMCreateTargetMachineOptions();
+
+ LLVMTargetMachineOptionsSetCPU(Options, CPU);
+ LLVMTargetMachineOptionsSetFeatures(Options, Features);
+ LLVMTargetMachineOptionsSetCodeGenOptLevel(Options, Level);
+ LLVMTargetMachineOptionsSetRelocMode(Options, Reloc);
+ LLVMTargetMachineOptionsSetCodeModel(Options, CodeModel);
+
+ auto *Machine = LLVMCreateTargetMachineWithOptions(T, Triple, Options);
+
+ LLVMDisposeTargetMachineOptions(Options);
+ return Machine;
}
void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { delete unwrap(T); }
@@ -175,6 +252,37 @@ void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
unwrap(T)->Options.MCOptions.AsmVerbose = VerboseAsm;
}
+void LLVMSetTargetMachineFastISel(LLVMTargetMachineRef T, LLVMBool Enable) {
+ unwrap(T)->setFastISel(Enable);
+}
+
+void LLVMSetTargetMachineGlobalISel(LLVMTargetMachineRef T, LLVMBool Enable) {
+ unwrap(T)->setGlobalISel(Enable);
+}
+
+void LLVMSetTargetMachineGlobalISelAbort(LLVMTargetMachineRef T,
+ LLVMGlobalISelAbortMode Mode) {
+ GlobalISelAbortMode AM = GlobalISelAbortMode::Enable;
+ switch (Mode) {
+ case LLVMGlobalISelAbortDisable:
+ AM = GlobalISelAbortMode::Disable;
+ break;
+ case LLVMGlobalISelAbortEnable:
+ AM = GlobalISelAbortMode::Enable;
+ break;
+ case LLVMGlobalISelAbortDisableWithDiag:
+ AM = GlobalISelAbortMode::DisableWithDiag;
+ break;
+ }
+
+ unwrap(T)->setGlobalISelAbort(AM);
+}
+
+void LLVMSetTargetMachineMachineOutliner(LLVMTargetMachineRef T,
+ LLVMBool Enable) {
+ unwrap(T)->setMachineOutliner(Enable);
+}
+
LLVMTargetDataRef LLVMCreateTargetDataLayout(LLVMTargetMachineRef T) {
return wrap(new DataLayout(unwrap(T)->createDataLayout()));
}
@@ -195,10 +303,10 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
CodeGenFileType ft;
switch (codegen) {
case LLVMAssemblyFile:
- ft = CGFT_AssemblyFile;
+ ft = CodeGenFileType::AssemblyFile;
break;
default:
- ft = CGFT_ObjectFile;
+ ft = CodeGenFileType::ObjectFile;
break;
}
if (TM->addPassesToEmitFile(pass, OS, nullptr, ft)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index a9cedf1dd97c..21d5f7653a68 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -55,11 +55,10 @@ class VEAsmParser : public MCTargetAsmParser {
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
int parseRegisterName(unsigned (*matchFn)(StringRef));
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
@@ -68,13 +67,13 @@ class VEAsmParser : public MCTargetAsmParser {
unsigned Kind) override;
// Custom parse functions for VE specific operands.
- OperandMatchResultTy parseMEMOperand(OperandVector &Operands);
- OperandMatchResultTy parseMEMAsOperand(OperandVector &Operands);
- OperandMatchResultTy parseCCOpOperand(OperandVector &Operands);
- OperandMatchResultTy parseRDOpOperand(OperandVector &Operands);
- OperandMatchResultTy parseMImmOperand(OperandVector &Operands);
- OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
- OperandMatchResultTy parseVEAsmOperand(std::unique_ptr<VEOperand> &Operand);
+ ParseStatus parseMEMOperand(OperandVector &Operands);
+ ParseStatus parseMEMAsOperand(OperandVector &Operands);
+ ParseStatus parseCCOpOperand(OperandVector &Operands);
+ ParseStatus parseRDOpOperand(OperandVector &Operands);
+ ParseStatus parseMImmOperand(OperandVector &Operands);
+ ParseStatus parseOperand(OperandVector &Operands, StringRef Name);
+ ParseStatus parseVEAsmOperand(std::unique_ptr<VEOperand> &Operand);
// Helper function to parse expression with a symbol.
const MCExpr *extractModifierFromExpr(const MCExpr *E,
@@ -797,9 +796,9 @@ bool VEAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Implement any new match types added!");
}
-bool VEAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool VEAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
return Error(StartLoc, "invalid register name");
return false;
}
@@ -828,28 +827,27 @@ static unsigned MatchRegisterName(StringRef Name);
/// \note Generated by TableGen.
static unsigned MatchRegisterAltName(StringRef Name);
-OperandMatchResultTy VEAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus VEAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- RegNo = 0;
+ Reg = VE::NoRegister;
if (getLexer().getKind() != AsmToken::Percent)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex();
- RegNo = parseRegisterName(&MatchRegisterName);
- if (RegNo == VE::NoRegister)
- RegNo = parseRegisterName(&MatchRegisterAltName);
+ Reg = parseRegisterName(&MatchRegisterName);
+ if (Reg == VE::NoRegister)
+ Reg = parseRegisterName(&MatchRegisterAltName);
- if (RegNo != VE::NoRegister) {
+ if (Reg != VE::NoRegister) {
Parser.Lex();
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
getLexer().UnLex(Tok);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
static StringRef parseCC(StringRef Name, unsigned Prefix, unsigned Suffix,
@@ -925,31 +923,35 @@ StringRef VEAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
(Name[Next + 1] == 'd' || Name[Next + 1] == 's'))
ICC = false;
Mnemonic = parseCC(Name, Start, Next, ICC, true, NameLoc, Operands);
- } else if (Name.startswith("cmov.l.") || Name.startswith("cmov.w.") ||
- Name.startswith("cmov.d.") || Name.startswith("cmov.s.")) {
+ } else if (Name.starts_with("cmov.l.") || Name.starts_with("cmov.w.") ||
+ Name.starts_with("cmov.d.") || Name.starts_with("cmov.s.")) {
bool ICC = Name[5] == 'l' || Name[5] == 'w';
Mnemonic = parseCC(Name, 7, Name.size(), ICC, false, NameLoc, Operands);
- } else if (Name.startswith("cvt.w.d.sx") || Name.startswith("cvt.w.d.zx") ||
- Name.startswith("cvt.w.s.sx") || Name.startswith("cvt.w.s.zx")) {
+ } else if (Name.starts_with("cvt.w.d.sx") || Name.starts_with("cvt.w.d.zx") ||
+ Name.starts_with("cvt.w.s.sx") || Name.starts_with("cvt.w.s.zx")) {
Mnemonic = parseRD(Name, 10, NameLoc, Operands);
- } else if (Name.startswith("cvt.l.d")) {
+ } else if (Name.starts_with("cvt.l.d")) {
Mnemonic = parseRD(Name, 7, NameLoc, Operands);
- } else if (Name.startswith("vcvt.w.d.sx") || Name.startswith("vcvt.w.d.zx") ||
- Name.startswith("vcvt.w.s.sx") || Name.startswith("vcvt.w.s.zx")) {
+ } else if (Name.starts_with("vcvt.w.d.sx") ||
+ Name.starts_with("vcvt.w.d.zx") ||
+ Name.starts_with("vcvt.w.s.sx") ||
+ Name.starts_with("vcvt.w.s.zx")) {
Mnemonic = parseRD(Name, 11, NameLoc, Operands);
- } else if (Name.startswith("vcvt.l.d")) {
+ } else if (Name.starts_with("vcvt.l.d")) {
Mnemonic = parseRD(Name, 8, NameLoc, Operands);
- } else if (Name.startswith("pvcvt.w.s.lo") ||
- Name.startswith("pvcvt.w.s.up")) {
+ } else if (Name.starts_with("pvcvt.w.s.lo") ||
+ Name.starts_with("pvcvt.w.s.up")) {
Mnemonic = parseRD(Name, 12, NameLoc, Operands);
- } else if (Name.startswith("pvcvt.w.s")) {
+ } else if (Name.starts_with("pvcvt.w.s")) {
Mnemonic = parseRD(Name, 9, NameLoc, Operands);
- } else if (Name.startswith("vfmk.l.") || Name.startswith("vfmk.w.") ||
- Name.startswith("vfmk.d.") || Name.startswith("vfmk.s.")) {
+ } else if (Name.starts_with("vfmk.l.") || Name.starts_with("vfmk.w.") ||
+ Name.starts_with("vfmk.d.") || Name.starts_with("vfmk.s.")) {
bool ICC = Name[5] == 'l' || Name[5] == 'w' ? true : false;
Mnemonic = parseCC(Name, 7, Name.size(), ICC, true, NameLoc, Operands);
- } else if (Name.startswith("pvfmk.w.lo.") || Name.startswith("pvfmk.w.up.") ||
- Name.startswith("pvfmk.s.lo.") || Name.startswith("pvfmk.s.up.")) {
+ } else if (Name.starts_with("pvfmk.w.lo.") ||
+ Name.starts_with("pvfmk.w.up.") ||
+ Name.starts_with("pvfmk.s.lo.") ||
+ Name.starts_with("pvfmk.s.up.")) {
bool ICC = Name[6] == 'l' || Name[6] == 'w' ? true : false;
Mnemonic = parseCC(Name, 11, Name.size(), ICC, true, NameLoc, Operands);
} else {
@@ -976,7 +978,7 @@ bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- if (parseOperand(Operands, Mnemonic) != MatchOperand_Success) {
+ if (!parseOperand(Operands, Mnemonic).isSuccess()) {
SMLoc Loc = getLexer().getLoc();
return Error(Loc, "unexpected token");
}
@@ -984,7 +986,7 @@ bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
- if (parseOperand(Operands, Mnemonic) != MatchOperand_Success) {
+ if (!parseOperand(Operands, Mnemonic).isSuccess()) {
SMLoc Loc = getLexer().getLoc();
return Error(Loc, "unexpected token");
}
@@ -1195,7 +1197,7 @@ bool VEAsmParser::parseExpression(const MCExpr *&EVal) {
return false;
}
-OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
+ParseStatus VEAsmParser::parseMEMOperand(OperandVector &Operands) {
LLVM_DEBUG(dbgs() << "parseMEMOperand\n");
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
@@ -1212,7 +1214,7 @@ OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
std::unique_ptr<VEOperand> Offset;
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::Minus:
case AsmToken::Integer:
@@ -1222,7 +1224,7 @@ OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
if (!parseExpression(EVal))
Offset = VEOperand::CreateImm(EVal, S, E);
else
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
break;
}
@@ -1235,12 +1237,12 @@ OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
case AsmToken::EndOfStatement:
Operands.push_back(VEOperand::MorphToMEMzii(
MCConstantExpr::create(0, getContext()), std::move(Offset)));
- return MatchOperand_Success;
+ return ParseStatus::Success;
case AsmToken::LParen:
Parser.Lex(); // Eat the (
@@ -1253,14 +1255,14 @@ OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
if (parseRegister(IndexReg, S, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
break;
case AsmToken::Minus:
case AsmToken::Integer:
case AsmToken::Dot:
if (getParser().parseExpression(IndexValue, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
break;
case AsmToken::Comma:
@@ -1271,14 +1273,14 @@ OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
case AsmToken::RParen:
Parser.Lex(); // Eat the )
Operands.push_back(
IndexValue ? VEOperand::MorphToMEMzii(IndexValue, std::move(Offset))
: VEOperand::MorphToMEMzri(IndexReg, std::move(Offset)));
- return MatchOperand_Success;
+ return ParseStatus::Success;
case AsmToken::Comma:
Parser.Lex(); // Eat the ,
@@ -1287,10 +1289,10 @@ OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
MCRegister BaseReg;
if (parseRegister(BaseReg, S, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (!Parser.getTok().is(AsmToken::RParen))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Parser.Lex(); // Eat the )
Operands.push_back(
@@ -1298,10 +1300,10 @@ OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
? VEOperand::MorphToMEMrii(BaseReg, IndexValue, std::move(Offset))
: VEOperand::MorphToMEMrri(BaseReg, IndexReg, std::move(Offset)));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy VEAsmParser::parseMEMAsOperand(OperandVector &Operands) {
+ParseStatus VEAsmParser::parseMEMAsOperand(OperandVector &Operands) {
LLVM_DEBUG(dbgs() << "parseMEMAsOperand\n");
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
@@ -1319,7 +1321,7 @@ OperandMatchResultTy VEAsmParser::parseMEMAsOperand(OperandVector &Operands) {
std::unique_ptr<VEOperand> Offset;
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::Minus:
case AsmToken::Integer:
@@ -1329,13 +1331,13 @@ OperandMatchResultTy VEAsmParser::parseMEMAsOperand(OperandVector &Operands) {
if (!parseExpression(EVal))
Offset = VEOperand::CreateImm(EVal, S, E);
else
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
break;
}
case AsmToken::Percent:
if (parseRegister(BaseReg, S, E))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Offset =
VEOperand::CreateImm(MCConstantExpr::create(0, getContext()), S, E);
break;
@@ -1349,18 +1351,18 @@ OperandMatchResultTy VEAsmParser::parseMEMAsOperand(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
case AsmToken::EndOfStatement:
case AsmToken::Comma:
Operands.push_back(BaseReg != VE::NoRegister
? VEOperand::MorphToMEMri(BaseReg, std::move(Offset))
: VEOperand::MorphToMEMzi(std::move(Offset)));
- return MatchOperand_Success;
+ return ParseStatus::Success;
case AsmToken::LParen:
if (BaseReg != VE::NoRegister)
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Parser.Lex(); // Eat the (
break;
}
@@ -1368,13 +1370,13 @@ OperandMatchResultTy VEAsmParser::parseMEMAsOperand(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
if (parseRegister(BaseReg, S, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
break;
case AsmToken::Comma:
Parser.Lex(); // Eat the ,
if (parseRegister(BaseReg, S, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
break;
case AsmToken::RParen:
@@ -1382,23 +1384,23 @@ OperandMatchResultTy VEAsmParser::parseMEMAsOperand(OperandVector &Operands) {
}
if (!Parser.getTok().is(AsmToken::RParen))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Parser.Lex(); // Eat the )
Operands.push_back(BaseReg != VE::NoRegister
? VEOperand::MorphToMEMri(BaseReg, std::move(Offset))
: VEOperand::MorphToMEMzi(std::move(Offset)));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy VEAsmParser::parseMImmOperand(OperandVector &Operands) {
+ParseStatus VEAsmParser::parseMImmOperand(OperandVector &Operands) {
LLVM_DEBUG(dbgs() << "parseMImmOperand\n");
// Parsing "(" + number + ")0/1"
const AsmToken Tok1 = Parser.getTok();
if (!Tok1.is(AsmToken::LParen))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat the '('.
@@ -1407,14 +1409,14 @@ OperandMatchResultTy VEAsmParser::parseMImmOperand(OperandVector &Operands) {
const MCExpr *EVal;
if (!Tok2.is(AsmToken::Integer) || getParser().parseExpression(EVal, E)) {
getLexer().UnLex(Tok1);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
const AsmToken Tok3 = Parser.getTok();
if (!Tok3.is(AsmToken::RParen)) {
getLexer().UnLex(Tok2);
getLexer().UnLex(Tok1);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
Parser.Lex(); // Eat the ')'.
@@ -1424,25 +1426,25 @@ OperandMatchResultTy VEAsmParser::parseMImmOperand(OperandVector &Operands) {
getLexer().UnLex(Tok3);
getLexer().UnLex(Tok2);
getLexer().UnLex(Tok1);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
Parser.Lex(); // Eat the value.
SMLoc EndLoc = SMLoc::getFromPointer(Suffix.end());
Operands.push_back(
VEOperand::CreateMImm(EVal, Suffix == "0", Tok1.getLoc(), EndLoc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
- StringRef Mnemonic) {
+ParseStatus VEAsmParser::parseOperand(OperandVector &Operands,
+ StringRef Mnemonic) {
LLVM_DEBUG(dbgs() << "parseOperand\n");
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ ParseStatus Res = MatchOperandParserImpl(Operands, Mnemonic);
// If there wasn't a custom match, try the generic matcher below. Otherwise,
// there was a match, but an error occurred, in which case, just return that
// the operand parsing failed.
- if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
- return ResTy;
+ if (Res.isSuccess() || Res.isFailure())
+ return Res;
switch (getLexer().getKind()) {
case AsmToken::LParen: {
@@ -1450,28 +1452,28 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
const AsmToken Tok1 = Parser.getTok();
Parser.Lex(); // Eat the '('.
- MCRegister RegNo1;
+ MCRegister Reg1;
SMLoc S1, E1;
- if (tryParseRegister(RegNo1, S1, E1) != MatchOperand_Success) {
+ if (!tryParseRegister(Reg1, S1, E1).isSuccess()) {
getLexer().UnLex(Tok1);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
if (!Parser.getTok().is(AsmToken::Comma))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Parser.Lex(); // Eat the ','.
- MCRegister RegNo2;
+ MCRegister Reg2;
SMLoc S2, E2;
- if (tryParseRegister(RegNo2, S2, E2) != MatchOperand_Success)
- return MatchOperand_ParseFail;
+ if (!tryParseRegister(Reg2, S2, E2).isSuccess())
+ return ParseStatus::Failure;
if (!Parser.getTok().is(AsmToken::RParen))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Operands.push_back(VEOperand::CreateToken(Tok1.getString(), Tok1.getLoc()));
- Operands.push_back(VEOperand::CreateReg(RegNo1, S1, E1));
- Operands.push_back(VEOperand::CreateReg(RegNo2, S2, E2));
+ Operands.push_back(VEOperand::CreateReg(Reg1, S1, E1));
+ Operands.push_back(VEOperand::CreateReg(Reg2, S2, E2));
Operands.push_back(VEOperand::CreateToken(Parser.getTok().getString(),
Parser.getTok().getLoc()));
Parser.Lex(); // Eat the ')'.
@@ -1479,9 +1481,9 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
}
default: {
std::unique_ptr<VEOperand> Op;
- ResTy = parseVEAsmOperand(Op);
- if (ResTy != MatchOperand_Success || !Op)
- return MatchOperand_ParseFail;
+ Res = parseVEAsmOperand(Op);
+ if (!Res.isSuccess() || !Op)
+ return ParseStatus::Failure;
// Push the parsed operand into the list of operands
Operands.push_back(std::move(Op));
@@ -1495,12 +1497,12 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
Parser.Lex(); // Eat the '('.
std::unique_ptr<VEOperand> Op2;
- ResTy = parseVEAsmOperand(Op2);
- if (ResTy != MatchOperand_Success || !Op2)
- return MatchOperand_ParseFail;
+ Res = parseVEAsmOperand(Op2);
+ if (!Res.isSuccess() || !Op2)
+ return ParseStatus::Failure;
if (!Parser.getTok().is(AsmToken::RParen))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Operands.push_back(std::move(Op1));
Operands.push_back(std::move(Op2));
@@ -1511,11 +1513,10 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
}
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-VEAsmParser::parseVEAsmOperand(std::unique_ptr<VEOperand> &Op) {
+ParseStatus VEAsmParser::parseVEAsmOperand(std::unique_ptr<VEOperand> &Op) {
LLVM_DEBUG(dbgs() << "parseVEAsmOperand\n");
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
@@ -1527,9 +1528,9 @@ VEAsmParser::parseVEAsmOperand(std::unique_ptr<VEOperand> &Op) {
break;
case AsmToken::Percent: {
- MCRegister RegNo;
- if (tryParseRegister(RegNo, S, E) == MatchOperand_Success)
- Op = VEOperand::CreateReg(RegNo, S, E);
+ MCRegister Reg;
+ if (tryParseRegister(Reg, S, E).isSuccess())
+ Op = VEOperand::CreateReg(Reg, S, E);
break;
}
case AsmToken::Minus:
@@ -1540,7 +1541,7 @@ VEAsmParser::parseVEAsmOperand(std::unique_ptr<VEOperand> &Op) {
Op = VEOperand::CreateImm(EVal, S, E);
break;
}
- return (Op) ? MatchOperand_Success : MatchOperand_ParseFail;
+ return Op ? ParseStatus::Success : ParseStatus::Failure;
}
// Force static initialization.
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
index 38d163b37080..05f7c00bdc14 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
@@ -97,7 +97,8 @@ protected:
const Target &TheTarget;
public:
- VEAsmBackend(const Target &T) : MCAsmBackend(support::little), TheTarget(T) {}
+ VEAsmBackend(const Target &T)
+ : MCAsmBackend(llvm::endianness::little), TheTarget(T) {}
unsigned getNumFixupKinds() const override { return VE::NumTargetFixupKinds; }
@@ -131,7 +132,8 @@ public:
}
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override {
switch ((VE::Fixups)Fixup.getKind()) {
default:
return false;
@@ -174,7 +176,7 @@ public:
for (uint64_t i = 0; i < Count; i += 8)
support::endian::write<uint64_t>(OS, 0x7900000000000000ULL,
- support::little);
+ llvm::endianness::little);
return true;
}
@@ -207,7 +209,8 @@ public:
// from the fixup value. The Value has been "split up" into the
// appropriate bitfields above.
for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = Endian == support::little ? i : (NumBytes - 1) - i;
+ unsigned Idx =
+ Endian == llvm::endianness::little ? i : (NumBytes - 1) - i;
Data[Offset + Idx] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
index 1c89d6444d11..b2cdf29e4f38 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
@@ -31,7 +31,7 @@ protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
unsigned Type) const override;
};
} // namespace
@@ -134,7 +134,8 @@ unsigned VEELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
return ELF::R_VE_NONE;
}
-bool VEELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+bool VEELFObjectWriter::needsRelocateWithSymbol(const MCValue &,
+ const MCSymbol &,
unsigned Type) const {
switch (Type) {
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
index bb643d23e618..31a07fab042d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
@@ -82,7 +82,7 @@ void VEMCCodeEmitter::encodeInstruction(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write<uint64_t>(CB, Bits, support::little);
+ support::endian::write<uint64_t>(CB, Bits, llvm::endianness::little);
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.h b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
index e82cb1901633..6f0243010ba0 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
@@ -16,8 +16,6 @@
#include "MCTargetDesc/VEMCTargetDesc.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
@@ -204,80 +202,6 @@ inline static unsigned VECondCodeToVal(VECC::CondCode CC) {
}
}
-/// Convert a DAG integer condition code to a VE ICC condition.
-inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) {
- switch (CC) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ISD::SETEQ:
- return VECC::CC_IEQ;
- case ISD::SETNE:
- return VECC::CC_INE;
- case ISD::SETLT:
- return VECC::CC_IL;
- case ISD::SETGT:
- return VECC::CC_IG;
- case ISD::SETLE:
- return VECC::CC_ILE;
- case ISD::SETGE:
- return VECC::CC_IGE;
- case ISD::SETULT:
- return VECC::CC_IL;
- case ISD::SETULE:
- return VECC::CC_ILE;
- case ISD::SETUGT:
- return VECC::CC_IG;
- case ISD::SETUGE:
- return VECC::CC_IGE;
- }
-}
-
-/// Convert a DAG floating point condition code to a VE FCC condition.
-inline static VECC::CondCode fpCondCode2Fcc(ISD::CondCode CC) {
- switch (CC) {
- default:
- llvm_unreachable("Unknown fp condition code!");
- case ISD::SETFALSE:
- return VECC::CC_AF;
- case ISD::SETEQ:
- case ISD::SETOEQ:
- return VECC::CC_EQ;
- case ISD::SETNE:
- case ISD::SETONE:
- return VECC::CC_NE;
- case ISD::SETLT:
- case ISD::SETOLT:
- return VECC::CC_L;
- case ISD::SETGT:
- case ISD::SETOGT:
- return VECC::CC_G;
- case ISD::SETLE:
- case ISD::SETOLE:
- return VECC::CC_LE;
- case ISD::SETGE:
- case ISD::SETOGE:
- return VECC::CC_GE;
- case ISD::SETO:
- return VECC::CC_NUM;
- case ISD::SETUO:
- return VECC::CC_NAN;
- case ISD::SETUEQ:
- return VECC::CC_EQNAN;
- case ISD::SETUNE:
- return VECC::CC_NENAN;
- case ISD::SETULT:
- return VECC::CC_LNAN;
- case ISD::SETUGT:
- return VECC::CC_GNAN;
- case ISD::SETULE:
- return VECC::CC_LENAN;
- case ISD::SETUGE:
- return VECC::CC_GENAN;
- case ISD::SETTRUE:
- return VECC::CC_AT;
- }
-}
-
inline static VECC::CondCode VEValToCondCode(unsigned Val, bool IsInteger) {
if (IsInteger) {
switch (Val) {
@@ -402,22 +326,6 @@ inline static VERD::RoundingMode VEValToRD(unsigned Val) {
llvm_unreachable("Invalid branch predicates");
}
-/// getImmVal - get immediate representation of integer value
-inline static uint64_t getImmVal(const ConstantSDNode *N) {
- return N->getSExtValue();
-}
-
-/// getFpImmVal - get immediate representation of floating point value
-inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
- const APInt &Imm = N->getValueAPF().bitcastToAPInt();
- uint64_t Val = Imm.getZExtValue();
- if (Imm.getBitWidth() == 32) {
- // Immediate value of float place places at higher bits on VE.
- Val <<= 32;
- }
- return Val;
-}
-
// MImm - Special immediate value of sequential bit stream of 0 or 1.
// See VEInstrInfo.td for details.
inline static bool isMImmVal(uint64_t Val) {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.td b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
index 1cb4a642632c..bb076bd9f6d4 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
@@ -46,7 +46,7 @@ def VEAsmParser : AsmParser {
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
-def : Proc<"generic", []>;
+def : Proc<"generic", [FeatureEnableVPU]>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index 859c33df4028..87646bc1a12f 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -58,7 +58,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -301,16 +301,15 @@ void VEDAGToDAGISel::Select(SDNode *N) {
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
-bool
-VEDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool VEDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_m: // memory
+ case InlineAsm::ConstraintCode::o:
+ case InlineAsm::ConstraintCode::m: // memory
// Try to match ADDRri since reg+imm style is safe for all VE instructions
// with a memory operand.
if (selectADDRri(Op, Op0, Op1)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
index 1ebfa5330d42..0267aefd1e91 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -1130,7 +1130,7 @@ SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
// Generate "fencem 3" as acq_rel and seq_cst fence.
- // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
+ // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
// so seq_cst may require more instruction for them.
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
DAG.getTargetConstant(3, DL, MVT::i32),
@@ -1428,11 +1428,10 @@ static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
-
EVT MemVT = LdNode->getMemoryVT();
- // Dispatch to vector isel.
- if (MemVT.isVector() && !isMaskType(MemVT))
+ // If VPU is enabled, always expand non-mask vector loads to VVP
+ if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
return lowerToVVP(Op, DAG);
SDValue BasePtr = LdNode->getBasePtr();
@@ -1542,10 +1541,10 @@ static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
-
- // always expand non-mask vector loads to VVP
EVT MemVT = StNode->getMemoryVT();
- if (MemVT.isVector() && !isMaskType(MemVT))
+
+ // If VPU is enabled, always expand non-mask vector stores to VVP
+ if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
return lowerToVVP(Op, DAG);
SDValue BasePtr = StNode->getBasePtr();
@@ -1871,7 +1870,7 @@ VETargetLowering::getCustomOperationAction(SDNode &Op) const {
}
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- LLVM_DEBUG(dbgs() << "::LowerOperation"; Op->print(dbgs()););
+ LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
unsigned Opcode = Op.getOpcode();
/// Scalar isel.
@@ -1922,7 +1921,6 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
/// Vector isel.
- LLVM_DEBUG(dbgs() << "::LowerOperation_VVP"; Op->print(dbgs()););
if (ISD::isVPOpcode(Opcode))
return lowerToVVP(Op, DAG);
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
index fa4ced5e2f9e..8b9412d78662 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
@@ -65,6 +65,96 @@ enum NodeType : unsigned {
};
}
+/// Convert a DAG integer condition code to a VE ICC condition.
+inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ISD::SETEQ:
+ return VECC::CC_IEQ;
+ case ISD::SETNE:
+ return VECC::CC_INE;
+ case ISD::SETLT:
+ return VECC::CC_IL;
+ case ISD::SETGT:
+ return VECC::CC_IG;
+ case ISD::SETLE:
+ return VECC::CC_ILE;
+ case ISD::SETGE:
+ return VECC::CC_IGE;
+ case ISD::SETULT:
+ return VECC::CC_IL;
+ case ISD::SETULE:
+ return VECC::CC_ILE;
+ case ISD::SETUGT:
+ return VECC::CC_IG;
+ case ISD::SETUGE:
+ return VECC::CC_IGE;
+ }
+}
+
+/// Convert a DAG floating point condition code to a VE FCC condition.
+inline static VECC::CondCode fpCondCode2Fcc(ISD::CondCode CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown fp condition code!");
+ case ISD::SETFALSE:
+ return VECC::CC_AF;
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ return VECC::CC_EQ;
+ case ISD::SETNE:
+ case ISD::SETONE:
+ return VECC::CC_NE;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ return VECC::CC_L;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ return VECC::CC_G;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ return VECC::CC_LE;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ return VECC::CC_GE;
+ case ISD::SETO:
+ return VECC::CC_NUM;
+ case ISD::SETUO:
+ return VECC::CC_NAN;
+ case ISD::SETUEQ:
+ return VECC::CC_EQNAN;
+ case ISD::SETUNE:
+ return VECC::CC_NENAN;
+ case ISD::SETULT:
+ return VECC::CC_LNAN;
+ case ISD::SETUGT:
+ return VECC::CC_GNAN;
+ case ISD::SETULE:
+ return VECC::CC_LENAN;
+ case ISD::SETUGE:
+ return VECC::CC_GENAN;
+ case ISD::SETTRUE:
+ return VECC::CC_AT;
+ }
+}
+
+/// getImmVal - get immediate representation of integer value
+inline static uint64_t getImmVal(const ConstantSDNode *N) {
+ return N->getSExtValue();
+}
+
+/// getFpImmVal - get immediate representation of floating point value
+inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
+ const APInt &Imm = N->getValueAPF().bitcastToAPInt();
+ uint64_t Val = Imm.getZExtValue();
+ if (Imm.getBitWidth() == 32) {
+ // Immediate value of float place places at higher bits on VE.
+ Val <<= 32;
+ }
+ return Val;
+}
+
class VECustomDAG;
class VETargetLowering : public TargetLowering {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
index 166598cab41d..1e548d7c101a 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1864,10 +1864,10 @@ defm : ZXATMLD32m<atomic_load_32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
multiclass ATMSTm<SDPatternOperator from, ValueType ty,
RM torri, RM torii,
RM tozri, RM tozii> {
- def : Pat<(from ADDRrri:$addr, ty:$src), (torri MEMrri:$addr, $src)>;
- def : Pat<(from ADDRrii:$addr, ty:$src), (torii MEMrii:$addr, $src)>;
- def : Pat<(from ADDRzri:$addr, ty:$src), (tozri MEMzri:$addr, $src)>;
- def : Pat<(from ADDRzii:$addr, ty:$src), (tozii MEMzii:$addr, $src)>;
+ def : Pat<(from ty:$src, ADDRrri:$addr), (torri MEMrri:$addr, $src)>;
+ def : Pat<(from ty:$src, ADDRrii:$addr), (torii MEMrii:$addr, $src)>;
+ def : Pat<(from ty:$src, ADDRzri:$addr), (tozri MEMzri:$addr, $src)>;
+ def : Pat<(from ty:$src, ADDRzii:$addr), (tozii MEMzii:$addr, $src)>;
}
defm : ATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
defm : ATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
@@ -1880,14 +1880,14 @@ multiclass TRATMSTm<SDPatternOperator from,
RM torii,
RM tozri,
RM tozii> {
- def : Pat<(from ADDRrri:$addr, (i32 (trunc i64:$src))),
- (torri MEMrri:$addr, (l2i $src))>;
- def : Pat<(from ADDRrii:$addr, (i32 (trunc i64:$src))),
- (torii MEMrii:$addr, (l2i $src))>;
- def : Pat<(from ADDRzri:$addr, (i32 (trunc i64:$src))),
- (tozri MEMzri:$addr, (l2i $src))>;
- def : Pat<(from ADDRzii:$addr, (i32 (trunc i64:$src))),
- (tozii MEMzii:$addr, (l2i $src))>;
+ def : Pat<(from (i32 (trunc i64:$src)), ADDRrri:$addr),
+ (torri MEMrri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from (i32 (trunc i64:$src)), ADDRrii:$addr),
+ (torii MEMrii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from (i32 (trunc i64:$src)), ADDRzri:$addr),
+ (tozri MEMzri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from (i32 (trunc i64:$src)), ADDRzii:$addr),
+ (tozii MEMzii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
}
defm : TRATMSTm<atomic_store_8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
defm : TRATMSTm<atomic_store_16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
index 93fb3d8ef8d5..6d102bfd3926 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -88,7 +88,7 @@ VETargetMachine::VETargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h
index 057ff16b7592..fd838296b9dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h
@@ -31,7 +31,7 @@ public:
VETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~VETargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VVPISelLowering.cpp
index e4c35dafff56..f1e2d7f71701 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VVPISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPISelLowering.cpp
@@ -169,6 +169,12 @@ SDValue VETargetLowering::lowerVVP_LOAD_STORE(SDValue Op,
// VVP_STORE
assert(VVPOpc == VEISD::VVP_STORE);
+ if (getTypeAction(*CDAG.getDAG()->getContext(), Data.getValueType()) !=
+ TargetLowering::TypeLegal)
+ // Doesn't lower store instruction if an operand is not lowered yet.
+ // If it isn't, return SDValue(). In this way, LLVM will try to lower
+ // store instruction again after lowering all operands.
+ return SDValue();
return CDAG.getNode(VEISD::VVP_STORE, Op.getNode()->getVTList(),
{Chain, Data, BasePtr, StrideV, Mask, AVL});
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 1e2d3888fe1c..1b92997f03f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -32,7 +32,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/SourceMgr.h"
using namespace llvm;
@@ -272,13 +271,11 @@ public:
#include "WebAssemblyGenAsmMatcher.inc"
// TODO: This is required to be implemented, but appears unused.
- bool parseRegister(MCRegister & /*RegNo*/, SMLoc & /*StartLoc*/,
- SMLoc & /*EndLoc*/) override {
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override {
llvm_unreachable("parseRegister is not implemented.");
}
- OperandMatchResultTy tryParseRegister(MCRegister & /*RegNo*/,
- SMLoc & /*StartLoc*/,
- SMLoc & /*EndLoc*/) override {
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override {
llvm_unreachable("tryParseRegister is not implemented.");
}
@@ -1106,7 +1103,7 @@ public:
// object writer expects each function to have its own section. This way
// The user can't forget this "convention".
auto SymName = Symbol->getName();
- if (SymName.startswith(".L"))
+ if (SymName.starts_with(".L"))
return; // Local Symbol.
// TODO: If the user explicitly creates a new function section, we ignore
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
index bc0cb2d10cdb..69466667e45a 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
@@ -32,7 +32,6 @@
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/SourceMgr.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 2c3604cc72d2..ed7757be6615 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -25,6 +25,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
@@ -108,8 +109,8 @@ template <typename T>
bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
if (Size + sizeof(T) > Bytes.size())
return false;
- T Val = support::endian::read<T, support::endianness::little, 1>(
- Bytes.data() + Size);
+ T Val =
+ support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size);
Size += sizeof(T);
if (std::is_floating_point<T>::value) {
MI.addOperand(
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 85bb52c03e80..ffab67f8ab2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -35,7 +35,7 @@ class WebAssemblyAsmBackend final : public MCAsmBackend {
public:
explicit WebAssemblyAsmBackend(bool Is64Bit, bool IsEmscripten)
- : MCAsmBackend(support::little), Is64Bit(Is64Bit),
+ : MCAsmBackend(llvm::endianness::little), Is64Bit(Is64Bit),
IsEmscripten(IsEmscripten) {}
unsigned getNumFixupKinds() const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index a9673ab344d3..bf6d6dce1f8a 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -15,15 +15,16 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
#include "WebAssembly.h"
-#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
@@ -39,7 +40,7 @@ WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
MCRegister Reg) const {
- assert(Reg.id() != WebAssemblyFunctionInfo::UnusedReg);
+ assert(Reg.id() != WebAssembly::UnusedReg);
// Note that there's an implicit local.get/local.set here!
OS << "$" << Reg.id();
}
@@ -297,9 +298,9 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (int(WAReg) >= 0)
printRegName(O, WAReg);
else if (OpNo >= Desc.getNumDefs() && !IsVariadicDef)
- O << "$pop" << WebAssemblyFunctionInfo::getWARegStackId(WAReg);
- else if (WAReg != WebAssemblyFunctionInfo::UnusedReg)
- O << "$push" << WebAssemblyFunctionInfo::getWARegStackId(WAReg);
+ O << "$pop" << WebAssembly::getWARegStackId(WAReg);
+ else if (WAReg != WebAssembly::UnusedReg)
+ O << "$push" << WebAssembly::getWARegStackId(WAReg);
else
O << "$drop";
// Add a '=' suffix if this is a def.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 634ed10d4df5..aaca213c4afe 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -112,16 +112,20 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
break;
case WebAssembly::OPERAND_SIGNATURE:
case WebAssembly::OPERAND_VEC_I8IMM:
- support::endian::write<uint8_t>(OS, MO.getImm(), support::little);
+ support::endian::write<uint8_t>(OS, MO.getImm(),
+ llvm::endianness::little);
break;
case WebAssembly::OPERAND_VEC_I16IMM:
- support::endian::write<uint16_t>(OS, MO.getImm(), support::little);
+ support::endian::write<uint16_t>(OS, MO.getImm(),
+ llvm::endianness::little);
break;
case WebAssembly::OPERAND_VEC_I32IMM:
- support::endian::write<uint32_t>(OS, MO.getImm(), support::little);
+ support::endian::write<uint32_t>(OS, MO.getImm(),
+ llvm::endianness::little);
break;
case WebAssembly::OPERAND_VEC_I64IMM:
- support::endian::write<uint64_t>(OS, MO.getImm(), support::little);
+ support::endian::write<uint64_t>(OS, MO.getImm(),
+ llvm::endianness::little);
break;
case WebAssembly::OPERAND_GLOBAL:
Ctx.reportError(
@@ -137,10 +141,10 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
} else if (MO.isSFPImm()) {
uint32_t F = MO.getSFPImm();
- support::endian::write<uint32_t>(OS, F, support::little);
+ support::endian::write<uint32_t>(OS, F, llvm::endianness::little);
} else if (MO.isDFPImm()) {
uint64_t D = MO.getDFPImm();
- support::endian::write<uint64_t>(OS, D, support::little);
+ support::endian::write<uint64_t>(OS, D, llvm::endianness::little);
} else if (MO.isExpr()) {
const MCOperandInfo &Info = Desc.operands()[I];
llvm::MCFixupKind FixupKind;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index fc33cebaa48a..15aeaaeb8c4a 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -14,10 +14,10 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
-#include "../WebAssemblySubtarget.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DataTypes.h"
#include <memory>
@@ -425,8 +425,8 @@ inline bool isCallIndirect(unsigned Opc) {
}
}
-inline bool isBrTable(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
+inline bool isBrTable(unsigned Opc) {
+ switch (Opc) {
case WebAssembly::BR_TABLE_I32:
case WebAssembly::BR_TABLE_I32_S:
case WebAssembly::BR_TABLE_I64:
@@ -535,7 +535,18 @@ inline bool isLocalTee(unsigned Opc) {
}
}
+static const unsigned UnusedReg = -1u;
+
+// For a given stackified WAReg, return the id number to print with push/pop.
+unsigned inline getWARegStackId(unsigned Reg) {
+ assert(Reg & INT32_MIN);
+ return Reg & INT32_MAX;
+}
+
} // end namespace WebAssembly
} // end namespace llvm
+#define GET_SUBTARGETINFO_ENUM
+#include "WebAssemblyGenSubtargetInfo.inc"
+
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WasmAddressSpaces.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WasmAddressSpaces.h
new file mode 100644
index 000000000000..2239badca69c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WasmAddressSpaces.h
@@ -0,0 +1,48 @@
+//===--- llvm/CodeGen/WasmAddressSpaces.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Address Spaces for WebAssembly Type Handling
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WASMADDRESSSPACES_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WASMADDRESSSPACES_H
+
+namespace llvm {
+
+namespace WebAssembly {
+
+enum WasmAddressSpace : unsigned {
+ // Default address space, for pointers to linear memory (stack, heap, data).
+ WASM_ADDRESS_SPACE_DEFAULT = 0,
+ // A non-integral address space for pointers to named objects outside of
+ // linear memory: WebAssembly globals or WebAssembly locals. Loads and stores
+ // to these pointers are lowered to global.get / global.set or local.get /
+ // local.set, as appropriate.
+ WASM_ADDRESS_SPACE_VAR = 1,
+ // A non-integral address space for externref values
+ WASM_ADDRESS_SPACE_EXTERNREF = 10,
+ // A non-integral address space for funcref values
+ WASM_ADDRESS_SPACE_FUNCREF = 20,
+};
+
+inline bool isDefaultAddressSpace(unsigned AS) {
+ return AS == WASM_ADDRESS_SPACE_DEFAULT;
+}
+inline bool isWasmVarAddressSpace(unsigned AS) {
+ return AS == WASM_ADDRESS_SPACE_VAR;
+}
+inline bool isValidAddressSpace(unsigned AS) {
+ return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS);
+}
+
+} // namespace WebAssembly
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WASMADDRESSSPACES_H
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
index bf5db09e05de..86fb99cc98a9 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
@@ -13,7 +13,6 @@
#include "WebAssemblyTypeUtilities.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
// Get register classes enum.
#define GET_REGINFO_ENUM
@@ -63,11 +62,6 @@ wasm::ValType WebAssembly::toValType(MVT Type) {
}
}
-wasm::ValType WebAssembly::regClassToValType(const TargetRegisterClass *RC) {
- assert(RC != nullptr);
- return regClassToValType(RC->getID());
-}
-
void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
const ArrayRef<MVT> &VTs) {
assert(!Sym->getType());
@@ -77,8 +71,7 @@ void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
// that is a reference type.
wasm::ValType ValTy;
bool IsTable = false;
- if (GlobalVT->isArrayTy() && WebAssembly::isWebAssemblyReferenceType(
- GlobalVT->getArrayElementType())) {
+ if (WebAssembly::isWebAssemblyTableType(GlobalVT)) {
IsTable = true;
const Type *ElTy = GlobalVT->getArrayElementType();
if (WebAssembly::isWebAssemblyExternrefType(ElTy))
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
index 9f58d7582fab..a8860477a247 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
@@ -16,28 +16,28 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYTYPEUTILITIES_H
#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
+#include "WasmAddressSpaces.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/CodeGen/MachineValueType.h"
-#include "llvm/CodeGen/WasmAddressSpaces.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/MC/MCSymbolWasm.h"
namespace llvm {
-class TargetRegisterClass;
-
namespace WebAssembly {
/// Return true if this is a WebAssembly Externref Type.
inline bool isWebAssemblyExternrefType(const Type *Ty) {
- return Ty->getPointerAddressSpace() ==
- WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
+ return Ty->isPointerTy() &&
+ Ty->getPointerAddressSpace() ==
+ WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
}
/// Return true if this is a WebAssembly Funcref Type.
inline bool isWebAssemblyFuncrefType(const Type *Ty) {
- return Ty->getPointerAddressSpace() ==
- WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
+ return Ty->isPointerTy() &&
+ Ty->getPointerAddressSpace() ==
+ WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
}
/// Return true if this is a WebAssembly Reference Type.
@@ -45,6 +45,12 @@ inline bool isWebAssemblyReferenceType(const Type *Ty) {
return isWebAssemblyExternrefType(Ty) || isWebAssemblyFuncrefType(Ty);
}
+/// Return true if the table represents a WebAssembly table type.
+inline bool isWebAssemblyTableType(const Type *Ty) {
+ return Ty->isArrayTy() &&
+ isWebAssemblyReferenceType(Ty->getArrayElementType());
+}
+
// Convert StringRef to ValType / HealType / BlockType
MVT parseMVT(StringRef Type);
@@ -52,9 +58,6 @@ MVT parseMVT(StringRef Type);
// Convert a MVT into its corresponding wasm ValType.
wasm::ValType toValType(MVT Type);
-// Convert a register class to a wasm ValType.
-wasm::ValType regClassToValType(const TargetRegisterClass *RC);
-
/// Sets a Wasm Symbol Type.
void wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
const ArrayRef<MVT> &VTs);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h
index 53be8f5b67b4..91765ad117bd 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -33,7 +33,7 @@ FunctionPass *createWebAssemblyLowerRefTypesIntPtrConv();
// ISel and immediate followup passes.
FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
FunctionPass *createWebAssemblyArgumentMove();
FunctionPass *createWebAssemblySetP2AlignOperands();
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td
index 7531d36a74a6..d538197450b6 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -71,6 +71,10 @@ def FeatureExtendedConst :
SubtargetFeature<"extended-const", "HasExtendedConst", "true",
"Enable extended const expressions">;
+def FeatureMultiMemory :
+ SubtargetFeature<"multimemory", "HasMultiMemory", "true",
+ "Enable multiple memories">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
index 4089b04a515c..02f5cc6da77c 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
@@ -26,10 +26,10 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index d492bec97d46..908efbb8d321 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -18,13 +18,13 @@
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "Utils/WebAssemblyTypeUtilities.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMCInstLower.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyRegisterInfo.h"
#include "WebAssemblyRuntimeLibcallSignatures.h"
#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
@@ -76,7 +76,7 @@ std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
"Unlowered physical register encountered during assembly printing");
assert(!MFI->isVRegStackified(RegNo));
unsigned WAReg = MFI->getWAReg(RegNo);
- assert(WAReg != WebAssemblyFunctionInfo::UnusedReg);
+ assert(WAReg != WebAssembly::UnusedReg);
return '$' + utostr(WAReg);
}
@@ -104,7 +104,7 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() {
static bool isEmscriptenInvokeName(StringRef Name) {
if (Name.front() == '"' && Name.back() == '"')
Name = Name.substr(1, Name.size() - 2);
- return Name.startswith("__invoke_");
+ return Name.starts_with("__invoke_");
}
// Returns a character that represents the given wasm value type in invoke
@@ -235,7 +235,7 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) {
return WasmSym;
}
- if (Name.startswith("GCC_except_table")) {
+ if (Name.starts_with("GCC_except_table")) {
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
return WasmSym;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index 497ab5440678..06758e465197 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -17,11 +17,11 @@
////===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyExceptionInfo.h"
#include "WebAssemblySortRegion.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index cc8052352b38..d8cbddf74545 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -22,12 +22,12 @@
//===----------------------------------------------------------------------===//
#include "Utils/WebAssemblyTypeUtilities.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyExceptionInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySortRegion.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -667,7 +667,7 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
// When there is an unconditional branch right before a catch instruction and
// it branches to the end of end_try marker, we don't need the branch, because
- // it there is no exception, the control flow transfers to that point anyway.
+ // if there is no exception, the control flow transfers to that point anyway.
// bb0:
// try
// ...
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp
index f3f54a5fb501..4a75bab6b95d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp
@@ -15,10 +15,10 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index ab3512cfd640..8deac76b2bc3 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -13,7 +13,7 @@
#include "WebAssemblyExceptionInfo.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
index 3c0add4b53d7..832ef1e49d78 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYEXCEPTIONINFO_H
#include "WebAssembly.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 84fd34d73b63..0159c44a79b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -16,11 +16,11 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyDebugValueManager.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 1bb2d6bf9e79..37abbb072cdd 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -17,11 +17,11 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "Utils/WebAssemblyTypeUtilities.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
index fa5b4a508fa5..495f19a7ccde 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
@@ -16,6 +16,7 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -166,7 +167,7 @@ bool WebAssemblyFixBrTableDefaults::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB = *MBBSet.begin();
MBBSet.erase(MBB);
for (auto &MI : *MBB) {
- if (WebAssembly::isBrTable(MI)) {
+ if (WebAssembly::isBrTable(MI.getOpcode())) {
fixBrTableIndex(MI, MBB, MF);
auto *Fixed = fixBrTableDefault(MI, MBB, MF);
if (Fixed != nullptr) {
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index b3fe110a092b..81a450dbc0d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -247,8 +247,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (F.getName() == "main") {
Main = &F;
LLVMContext &C = M.getContext();
- Type *MainArgTys[] = {Type::getInt32Ty(C),
- PointerType::get(Type::getInt8PtrTy(C), 0)};
+ Type *MainArgTys[] = {Type::getInt32Ty(C), PointerType::get(C, 0)};
FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys,
/*isVarArg=*/false);
if (shouldFixMainFunction(F.getFunctionType(), MainTy)) {
@@ -256,9 +255,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
<< *F.getFunctionType() << "\n");
Value *Args[] = {UndefValue::get(MainArgTys[0]),
UndefValue::get(MainArgTys[1])};
- Value *Casted =
- ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0));
- CallMain = CallInst::Create(MainTy, Casted, Args, "call_main");
+ CallMain = CallInst::Create(MainTy, Main, Args, "call_main");
Uses.push_back(std::make_pair(CallMain, &F));
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index e60f1397b993..8f3ad167ae41 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -130,7 +130,15 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame(
bool WebAssemblyFrameLowering::needsSPForLocalFrame(
const MachineFunction &MF) const {
auto &MFI = MF.getFrameInfo();
- return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF);
+ auto &MRI = MF.getRegInfo();
+ // llvm.stacksave can explicitly read SP register and it can appear without
+ // dynamic alloca.
+ bool HasExplicitSPUse =
+ any_of(MRI.use_operands(getSPReg(MF)),
+ [](MachineOperand &MO) { return !MO.isImplicit(); });
+
+ return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF) ||
+ HasExplicitSPUse;
}
// In function with EH pads, we need to make a copy of the value of
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 9aacddb0187e..8833aee02a6a 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -47,7 +47,7 @@ public:
WebAssemblyDAGToDAGISel() = delete;
WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &TM,
- CodeGenOpt::Level OptLevel)
+ CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, TM, OptLevel), Subtarget(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -64,7 +64,8 @@ public:
void Select(SDNode *Node) override;
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
bool SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr);
@@ -293,9 +294,10 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
}
bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
switch (ConstraintID) {
- case InlineAsm::Constraint_m:
+ case InlineAsm::ConstraintCode::m:
// We just support simple memory operands that just have a single address
// operand and need no special handling.
OutOps.push_back(Op);
@@ -406,6 +408,6 @@ bool WebAssemblyDAGToDAGISel::SelectAddrOperands64(SDValue Op, SDValue &Offset,
/// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
/// for instruction scheduling.
FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new WebAssemblyDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index f00d02ad4190..4bcf89690505 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -14,10 +14,10 @@
#include "WebAssemblyISelLowering.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "Utils/WebAssemblyTypeUtilities.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -32,6 +32,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -833,6 +834,30 @@ bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
return isa<Function>(GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
}
+bool WebAssemblyTargetLowering::shouldSinkOperands(
+ Instruction *I, SmallVectorImpl<Use *> &Ops) const {
+ using namespace llvm::PatternMatch;
+
+ if (!I->getType()->isVectorTy() || !I->isShift())
+ return false;
+
+ Value *V = I->getOperand(1);
+ // We dont need to sink constant splat.
+ if (dyn_cast<Constant>(V))
+ return false;
+
+ if (match(V, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()),
+ m_Value(), m_ZeroMask()))) {
+ // Sink insert
+ Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));
+ // Sink shuffle
+ Ops.push_back(&I->getOperandUse(1));
+ return true;
+ }
+
+ return false;
+}
+
EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &C,
EVT VT) const {
@@ -1699,8 +1724,11 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
fail(DL, DAG, "Invalid address space for WebAssembly target");
unsigned OperandFlags = 0;
- if (isPositionIndependent()) {
- const GlobalValue *GV = GA->getGlobal();
+ const GlobalValue *GV = GA->getGlobal();
+ // Since WebAssembly tables cannot yet be shared accross modules, we don't
+ // need special treatment for tables in PIC mode.
+ if (isPositionIndependent() &&
+ !WebAssembly::isWebAssemblyTableType(GV->getValueType())) {
if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
MachineFunction &MF = DAG.getMachineFunction();
MVT PtrVT = getPointerTy(MF.getDataLayout());
@@ -2444,8 +2472,8 @@ performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
if (!N->getOperand(1).isUndef())
return SDValue();
SDValue CastOp = Bitcast.getOperand(0);
- MVT SrcType = CastOp.getSimpleValueType();
- MVT DstType = Bitcast.getSimpleValueType();
+ EVT SrcType = CastOp.getValueType();
+ EVT DstType = Bitcast.getValueType();
if (!SrcType.is128BitVector() ||
SrcType.getVectorNumElements() != DstType.getVectorNumElements())
return SDValue();
@@ -2548,6 +2576,8 @@ performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
+ // Endianness doesn't matter in this context because we are looking for
+ // an all-zero value.
return Splat &&
Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs) &&
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index ecf5d5b1ea5d..1d1338ab40d0 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -76,6 +76,8 @@ private:
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 2dbcdd50fb8d..4623ce9b5c38 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -192,16 +192,17 @@ multiclass AtomicStore<WebAssemblyRegClass rc, string name, int atomic_op> {
defm ATOMIC_STORE_I32 : AtomicStore<I32, "i32.atomic.store", 0x17>;
defm ATOMIC_STORE_I64 : AtomicStore<I64, "i64.atomic.store", 0x18>;
-// We need an 'atomic' version of store patterns because store and atomic_store
-// nodes have different operand orders:
-// store: (store $val, $ptr)
-// atomic_store: (store $ptr, $val)
+// We used to need an 'atomic' version of store patterns because store and atomic_store
+// nodes have different operand orders.
+//
+// TODO: This is no longer true and atomic_store and store patterns
+// can be unified.
multiclass AStorePat<ValueType ty, PatFrag kind, string inst> {
- def : Pat<(kind (AddrOps32 offset32_op:$offset, I32:$addr), ty:$val),
+ def : Pat<(kind ty:$val, (AddrOps32 offset32_op:$offset, I32:$addr)),
(!cast<NI>(inst#_A32) 0, $offset, $addr, $val)>,
Requires<[HasAddr32, HasAtomics]>;
- def : Pat<(kind (AddrOps64 offset64_op:$offset, I64:$addr), ty:$val),
+ def : Pat<(kind ty:$val, (AddrOps64 offset64_op:$offset, I64:$addr)),
(!cast<NI>(inst#_A64) 0, $offset, $addr, $val)>,
Requires<[HasAddr64, HasAtomics]>;
}
@@ -221,8 +222,8 @@ defm ATOMIC_STORE32_I64 : AtomicStore<I64, "i64.atomic.store32", 0x1d>;
// instructions, we just need to match bare atomic stores. On the other hand,
// truncating stores from i64 values are once truncated to i32 first.
class trunc_astore_64<PatFrag kind> :
- PatFrag<(ops node:$addr, node:$val),
- (kind node:$addr, (i32 (trunc (i64 node:$val))))>;
+ PatFrag<(ops node:$val, node:$addr),
+ (kind (i32 (trunc (i64 node:$val))), node:$addr)>;
def trunc_astore_8_64 : trunc_astore_64<atomic_store_8>;
def trunc_astore_16_64 : trunc_astore_64<atomic_store_16>;
def trunc_astore_32_64 : trunc_astore_64<atomic_store_32>;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index b2dd656ccdda..32a4accd040e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -14,10 +14,10 @@
#include "WebAssemblyInstrInfo.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -46,11 +46,11 @@ bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable(
case WebAssembly::CONST_I64:
case WebAssembly::CONST_F32:
case WebAssembly::CONST_F64:
- // isReallyTriviallyReMaterializableGeneric misses these because of the
- // ARGUMENTS implicit def, so we manualy override it here.
+ // TargetInstrInfo::isReallyTriviallyReMaterializable misses these
+ // because of the ARGUMENTS implicit def, so we manualy override it here.
return true;
default:
- return false;
+ return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 134a0efc6822..59ea9247bd86 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -70,6 +70,10 @@ def HasExtendedConst :
Predicate<"Subtarget->hasExtendedConst()">,
AssemblerPredicate<(all_of FeatureExtendedConst), "extended-const">;
+def HasMultiMemory :
+ Predicate<"Subtarget->hasMultiMemory()">,
+ AssemblerPredicate<(all_of FeatureMultiMemory), "multimemory">;
+
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Node Types.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index d16bb6b6648a..94037b9ab189 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 4b8fdcf3a5b3..77e6640d5a82 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -282,6 +282,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
+#include <set>
using namespace llvm;
@@ -471,10 +472,10 @@ static Type *getAddrIntType(Module *M) {
}
// Returns an integer pointer type for the target architecture's address space.
-// i32* for wasm32 and i64* for wasm64.
+// i32* for wasm32 and i64* for wasm64. With opaque pointers this is just a ptr
+// in address space zero.
static Type *getAddrPtrType(Module *M) {
- return Type::getIntNPtrTy(M->getContext(),
- M->getDataLayout().getPointerSizeInBits());
+ return PointerType::getUnqual(M->getContext());
}
// Returns an integer whose type is the integer type for the target's address
@@ -495,7 +496,7 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M,
unsigned NumClauses) {
if (FindMatchingCatches.count(NumClauses))
return FindMatchingCatches[NumClauses];
- PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ PointerType *Int8PtrTy = PointerType::getUnqual(M.getContext());
SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy);
FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false);
Function *F = getEmscriptenFunction(
@@ -622,7 +623,7 @@ static bool canLongjmp(const Value *Callee) {
return false;
// __cxa_find_matching_catch_N functions cannot longjmp
- if (Callee->getName().startswith("__cxa_find_matching_catch_"))
+ if (Callee->getName().starts_with("__cxa_find_matching_catch_"))
return false;
// Exception-catching related functions
@@ -831,8 +832,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::replaceLongjmpWith(Function *LongjmpF,
Env =
IRB.CreatePtrToInt(CI->getArgOperand(0), getAddrIntType(M), "env");
else // WasmLongjmpF
- Env =
- IRB.CreateBitCast(CI->getArgOperand(0), IRB.getInt8PtrTy(), "env");
+ Env = IRB.CreateBitCast(CI->getArgOperand(0), IRB.getPtrTy(), "env");
IRB.CreateCall(NewF, {Env, CI->getArgOperand(1)});
ToErase.push_back(CI);
}
@@ -945,13 +945,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
if (EnableEmEH) {
// Register __resumeException function
FunctionType *ResumeFTy =
- FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false);
+ FunctionType::get(IRB.getVoidTy(), IRB.getPtrTy(), false);
ResumeF = getEmscriptenFunction(ResumeFTy, "__resumeException", &M);
ResumeF->addFnAttr(Attribute::NoReturn);
// Register llvm_eh_typeid_for function
FunctionType *EHTypeIDTy =
- FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false);
+ FunctionType::get(IRB.getInt32Ty(), IRB.getPtrTy(), false);
EHTypeIDF = getEmscriptenFunction(EHTypeIDTy, "llvm_eh_typeid_for", &M);
}
@@ -995,36 +995,36 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M);
EmLongjmpF->addFnAttr(Attribute::NoReturn);
} else { // EnableWasmSjLj
+ Type *Int8PtrTy = IRB.getPtrTy();
// Register __wasm_longjmp function, which calls __builtin_wasm_longjmp.
FunctionType *FTy = FunctionType::get(
- IRB.getVoidTy(), {IRB.getInt8PtrTy(), IRB.getInt32Ty()}, false);
+ IRB.getVoidTy(), {Int8PtrTy, IRB.getInt32Ty()}, false);
WasmLongjmpF = getEmscriptenFunction(FTy, "__wasm_longjmp", &M);
WasmLongjmpF->addFnAttr(Attribute::NoReturn);
}
if (SetjmpF) {
+ Type *Int8PtrTy = IRB.getPtrTy();
+ Type *Int32PtrTy = IRB.getPtrTy();
+ Type *Int32Ty = IRB.getInt32Ty();
// Register saveSetjmp function
FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
- FunctionType *FTy =
- FunctionType::get(Type::getInt32PtrTy(C),
- {SetjmpFTy->getParamType(0), IRB.getInt32Ty(),
- Type::getInt32PtrTy(C), IRB.getInt32Ty()},
- false);
+ FunctionType *FTy = FunctionType::get(
+ Int32PtrTy,
+ {SetjmpFTy->getParamType(0), Int32Ty, Int32PtrTy, Int32Ty}, false);
SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M);
// Register testSetjmp function
- FTy = FunctionType::get(
- IRB.getInt32Ty(),
- {getAddrIntType(&M), Type::getInt32PtrTy(C), IRB.getInt32Ty()},
- false);
+ FTy = FunctionType::get(Int32Ty,
+ {getAddrIntType(&M), Int32PtrTy, Int32Ty}, false);
TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M);
// wasm.catch() will be lowered down to wasm 'catch' instruction in
// instruction selection.
CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
// Type for struct __WasmLongjmpArgs
- LongjmpArgsTy = StructType::get(IRB.getInt8PtrTy(), // env
- IRB.getInt32Ty() // val
+ LongjmpArgsTy = StructType::get(Int8PtrTy, // env
+ Int32Ty // val
);
}
}
@@ -1290,9 +1290,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// setjmpTable = (int *) malloc(40);
Type *IntPtrTy = getAddrIntType(&M);
Constant *size = ConstantInt::get(IntPtrTy, 40);
- Instruction *SetjmpTable =
- CallInst::CreateMalloc(SetjmpTableSize, IntPtrTy, IRB.getInt32Ty(), size,
- nullptr, nullptr, "setjmpTable");
+ IRB.SetInsertPoint(SetjmpTableSize);
+ auto *SetjmpTable = IRB.CreateMalloc(IntPtrTy, IRB.getInt32Ty(), size,
+ nullptr, nullptr, "setjmpTable");
SetjmpTable->setDebugLoc(FirstDL);
// CallInst::CreateMalloc may return a bitcast instruction if the result types
// mismatch. We need to set the debug loc for the original call too.
@@ -1301,7 +1301,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
MallocCallI->setDebugLoc(FirstDL);
}
// setjmpTable[0] = 0;
- IRB.SetInsertPoint(SetjmpTableSize);
IRB.CreateStore(IRB.getInt32(0), SetjmpTable);
SetjmpTableInsts.push_back(SetjmpTable);
SetjmpTableSizeInsts.push_back(SetjmpTableSize);
@@ -1336,7 +1335,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// Add a phi to the tail, which will be the output of setjmp, which
// indicates if this is the first call or a longjmp back. The phi directly
// uses the right value based on where we arrive from
- IRB.SetInsertPoint(Tail->getFirstNonPHI());
+ IRB.SetInsertPoint(Tail, Tail->getFirstNonPHIIt());
PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret");
// setjmp initial call returns 0
@@ -1402,14 +1401,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
if (auto *CB = dyn_cast<CallBase>(I))
if (auto Bundle = CB->getOperandBundle(LLVMContext::OB_funclet))
Bundles.push_back(OperandBundleDef(*Bundle));
- auto *Free = CallInst::CreateFree(SetjmpTable, Bundles, I);
+ IRB.SetInsertPoint(I);
+ auto *Free = IRB.CreateFree(SetjmpTable, Bundles);
Free->setDebugLoc(DL);
- // CallInst::CreateFree may create a bitcast instruction if its argument
- // types mismatch. We need to set the debug loc for the bitcast too.
- if (auto *FreeCallI = dyn_cast<CallInst>(Free)) {
- if (auto *BitCastI = dyn_cast<BitCastInst>(FreeCallI->getArgOperand(0)))
- BitCastI->setDebugLoc(DL);
- }
}
// Every call to saveSetjmp can change setjmpTable and setjmpTableSize
@@ -1426,7 +1420,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// saveSetjmp and testSetjmp calls have the correct arguments.
SSAUpdater SetjmpTableSSA;
SSAUpdater SetjmpTableSizeSSA;
- SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable");
+ SetjmpTableSSA.Initialize(PointerType::get(C, 0), "setjmpTable");
SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize");
for (Instruction *I : SetjmpTableInsts)
SetjmpTableSSA.AddAvailableValue(I->getParent(), I);
@@ -1523,7 +1517,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj(
Value *Threw = nullptr;
BasicBlock *Tail;
- if (Callee->getName().startswith("__invoke_")) {
+ if (Callee->getName().starts_with("__invoke_")) {
// If invoke wrapper has already been generated for this call in
// previous EH phase, search for the load instruction
// %__THREW__.val = __THREW__;
@@ -1680,7 +1674,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
FunctionType::get(IRB.getInt32Ty(), /* isVarArg */ true);
Value *PersF = M.getOrInsertFunction(PersName, PersType).getCallee();
F.setPersonalityFn(
- cast<Constant>(IRB.CreateBitCast(PersF, IRB.getInt8PtrTy())));
+ cast<Constant>(IRB.CreateBitCast(PersF, IRB.getPtrTy())));
}
// Use the entry BB's debugloc as a fallback
@@ -1733,7 +1727,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
Value *ValField =
IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 1, "val_gep");
// void *env = __wasm_longjmp_args.env;
- Instruction *Env = IRB.CreateLoad(IRB.getInt8PtrTy(), EnvField, "env");
+ Instruction *Env = IRB.CreateLoad(IRB.getPtrTy(), EnvField, "env");
// int val = __wasm_longjmp_args.val;
Instruction *Val = IRB.CreateLoad(IRB.getInt32Ty(), ValField, "val");
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
index 94b6e41e87d0..e0a219211228 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
@@ -19,6 +19,7 @@
#include "WebAssemblySubtarget.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Pass.h"
+#include <set>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 5ceeebdeab5e..f6e24f7aaece 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -15,10 +15,10 @@
#include "WebAssemblyMCInstLower.h"
#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "Utils/WebAssemblyTypeUtilities.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssemblyAsmPrinter.h"
#include "WebAssemblyISelLowering.h"
#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
@@ -202,12 +202,12 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
const MachineRegisterInfo &MRI =
MI->getParent()->getParent()->getRegInfo();
for (const MachineOperand &MO : MI->defs())
- Returns.push_back(
- WebAssembly::regClassToValType(MRI.getRegClass(MO.getReg())));
+ Returns.push_back(WebAssembly::regClassToValType(
+ MRI.getRegClass(MO.getReg())->getID()));
for (const MachineOperand &MO : MI->explicit_uses())
if (MO.isReg())
- Params.push_back(
- WebAssembly::regClassToValType(MRI.getRegClass(MO.getReg())));
+ Params.push_back(WebAssembly::regClassToValType(
+ MRI.getRegClass(MO.getReg())->getID()));
// call_indirect instructions have a callee operand at the end which
// doesn't count as a param.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
index 21f6fd37d402..13acbd2e24cc 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
@@ -14,10 +14,10 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 7207fbeb305a..1e959111a4db 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -35,7 +35,7 @@ MachineFunctionInfo *WebAssemblyFunctionInfo::clone(
void WebAssemblyFunctionInfo::initWARegs(MachineRegisterInfo &MRI) {
assert(WARegs.empty());
- unsigned Reg = UnusedReg;
+ unsigned Reg = WebAssembly::UnusedReg;
WARegs.resize(MRI.getNumVirtRegs(), Reg);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 7622164449a5..fe18347ad8c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -21,6 +21,7 @@
#include "llvm/MC/MCSymbolWasm.h"
namespace llvm {
+class WebAssemblyTargetLowering;
struct WasmEHFuncInfo;
@@ -118,8 +119,6 @@ public:
}
void setBasePointerVreg(unsigned Reg) { BasePtrVreg = Reg; }
- static const unsigned UnusedReg = -1u;
-
void stackifyVReg(MachineRegisterInfo &MRI, unsigned VReg) {
assert(MRI.getUniqueVRegDef(VReg));
auto I = Register::virtReg2Index(VReg);
@@ -141,7 +140,7 @@ public:
void initWARegs(MachineRegisterInfo &MRI);
void setWAReg(unsigned VReg, unsigned WAReg) {
- assert(WAReg != UnusedReg);
+ assert(WAReg != WebAssembly::UnusedReg);
auto I = Register::virtReg2Index(VReg);
assert(I < WARegs.size());
WARegs[I] = WAReg;
@@ -152,12 +151,6 @@ public:
return WARegs[I];
}
- // For a given stackified WAReg, return the id number to print with push/pop.
- static unsigned getWARegStackId(unsigned Reg) {
- assert(Reg & INT32_MIN);
- return Reg & INT32_MAX;
- }
-
bool isCFGStackified() const { return CFGStackified; }
void setCFGStackified(bool Value = true) { CFGStackified = Value; }
};
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index 5fcee7af9bde..6e2d566d9b48 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index 4a6d37d7052e..c9ef17f92814 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -226,7 +226,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
// If there are calls to setjmp or sigsetjmp, don't perform coloring. Virtual
// registers could be modified before the longjmp is executed, resulting in
- // the wrong value being used afterwards. (See <rdar://problem/8007500>.)
+ // the wrong value being used afterwards.
// TODO: Does WebAssembly need to care about setjmp for register coloring?
if (MF.exposesReturnsTwice())
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index 76c78cd23130..1203b343bf24 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -13,10 +13,10 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -100,7 +100,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
MFI.setWAReg(VReg, INT32_MIN | NumStackRegs++);
continue;
}
- if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg) {
+ if (MFI.getWAReg(VReg) == WebAssembly::UnusedReg) {
LLVM_DEBUG(dbgs() << "VReg " << VReg << " -> WAReg " << CurReg << "\n");
MFI.setWAReg(VReg, CurReg++);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 2e0df3c47841..3046f9476f91 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -20,12 +20,11 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyDebugValueManager.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index 2995b8816d1f..3e2e029695ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -191,6 +191,9 @@ struct RuntimeLibcallSignatureTable {
Table[RTLIB::EXP2_F32] = f32_func_f32;
Table[RTLIB::EXP2_F64] = f64_func_f64;
Table[RTLIB::EXP2_F128] = i64_i64_func_i64_i64;
+ Table[RTLIB::EXP10_F32] = f32_func_f32;
+ Table[RTLIB::EXP10_F64] = f64_func_f64;
+ Table[RTLIB::EXP10_F128] = i64_i64_func_i64_i64;
Table[RTLIB::SIN_F32] = f32_func_f32;
Table[RTLIB::SIN_F64] = f64_func_f64;
Table[RTLIB::SIN_F128] = i64_i64_func_i64_i64;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 780694980523..85d02b087c78 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssemblyFrameLowering.h"
#include "WebAssemblyISelLowering.h"
#include "WebAssemblyInstrInfo.h"
@@ -22,7 +23,6 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include <string>
-#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_HEADER
#include "WebAssemblyGenSubtargetInfo.inc"
@@ -49,6 +49,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
bool HasTailCall = false;
bool HasReferenceTypes = false;
bool HasExtendedConst = false;
+ bool HasMultiMemory = false;
/// What processor and OS we're targeting.
Triple TargetTriple;
@@ -101,6 +102,7 @@ public:
bool hasMutableGlobals() const { return HasMutableGlobals; }
bool hasTailCall() const { return HasTailCall; }
bool hasReferenceTypes() const { return HasReferenceTypes; }
+ bool hasMultiMemory() const { return HasMultiMemory; }
/// Parses features string setting specified subtarget options. Definition of
/// function is auto generated by tblgen.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 6ef219f216a3..2db1b6493cc4 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -14,12 +14,12 @@
#include "WebAssemblyTargetMachine.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "TargetInfo/WebAssemblyTargetInfo.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyISelLowering.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyTargetObjectFile.h"
#include "WebAssemblyTargetTransformInfo.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
@@ -48,6 +48,12 @@ static cl::opt<bool> WasmDisableExplicitLocals(
" instruction output for test purposes only."),
cl::init(false));
+static cl::opt<bool> WasmDisableFixIrreducibleControlFlowPass(
+ "wasm-disable-fix-irreducible-control-flow-pass", cl::Hidden,
+ cl::desc("webassembly: disables the fix "
+ " irreducible control flow optimization pass"),
+ cl::init(false));
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() {
// Register the target.
RegisterTargetMachine<WebAssemblyTargetMachine> X(
@@ -107,7 +113,7 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM,
WebAssemblyTargetMachine::WebAssemblyTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(
T,
TT.isArch64Bit()
@@ -127,6 +133,7 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
// LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
// 'unreachable' instructions which is meant for that case.
this->Options.TrapUnreachable = true;
+ this->Options.NoTrapAfterNoreturn = false;
// WebAssembly treats each function as an independent unit. Force
// -ffunction-sections, effectively, so that we can emit them independently.
@@ -368,7 +375,7 @@ static void basicCheckForEHAndSjLj(TargetMachine *TM) {
// to TargetOptions and MCAsmInfo. But when clang compiles bitcode directly,
// clang's LangOptions is not used and thus the exception model info is not
// correctly transferred to TargetOptions and MCAsmInfo, so we make sure we
- // have the correct exception model in in WebAssemblyMCAsmInfo constructor.
+ // have the correct exception model in WebAssemblyMCAsmInfo constructor.
// But in this case TargetOptions is still not updated, so we make sure they
// are the same.
TM->Options.ExceptionModel = TM->getMCAsmInfo()->getExceptionHandlingType();
@@ -426,7 +433,7 @@ void WebAssemblyPassConfig::addIRPasses() {
addPass(createWebAssemblyFixFunctionBitcasts());
// Optimize "returned" function attributes.
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createWebAssemblyOptimizeReturned());
basicCheckForEHAndSjLj(TM);
@@ -503,7 +510,7 @@ void WebAssemblyPassConfig::addOptimizedRegAlloc() {
// usually not used for production builds.
// TODO Investigate why RegisterCoalesce degrades debug info quality and fix
// it properly
- if (getOptLevel() == CodeGenOpt::Less)
+ if (getOptLevel() == CodeGenOptLevel::Less)
disablePass(&RegisterCoalescerID);
TargetPassConfig::addOptimizedRegAlloc();
}
@@ -537,7 +544,8 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyNullifyDebugValueLists());
// Eliminate multiple-entry loops.
- addPass(createWebAssemblyFixIrreducibleControlFlow());
+ if (!WasmDisableFixIrreducibleControlFlowPass)
+ addPass(createWebAssemblyFixIrreducibleControlFlow());
// Do various transformations for exception handling.
// Every CFG-changing optimizations should come before this.
@@ -550,7 +558,7 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyReplacePhysRegs());
// Preparations and optimizations related to register stackification.
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
// Depend on LiveIntervals and perform some optimizations on it.
addPass(createWebAssemblyOptimizeLiveIntervals());
@@ -585,7 +593,7 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyLowerBrUnless());
// Perform the very last peephole optimizations on the code.
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createWebAssemblyPeephole());
// Create a mapping from LLVM CodeGen virtual registers to wasm registers.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
index 04bf2de81fe6..2e8cd43840e3 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -30,7 +30,7 @@ public:
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT);
+ CodeGenOptLevel OL, bool JIT);
~WebAssemblyTargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index 8d7fa4dc3dee..189111cef7d0 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -13,8 +13,10 @@
#include "WebAssemblyUtilities.h"
#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
index 7f28fb1858a6..7f28fb1858a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 11cfe3cba751..e78d16056460 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -106,6 +106,9 @@ class X86AsmParser : public MCTargetAsmParser {
DispEncoding ForcedDispEncoding = DispEncoding_Default;
+ // Does this instruction use apx extended register?
+ bool UseApxExtendedReg = false;
+
private:
SMLoc consumeToken() {
MCAsmParser &Parser = getParser();
@@ -419,7 +422,6 @@ private:
IES_RPAREN,
IES_REGISTER,
IES_INTEGER,
- IES_IDENTIFIER,
IES_ERROR
};
@@ -1141,8 +1143,8 @@ private:
bool ParseIntelMemoryOperandSize(unsigned &Size);
bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
- unsigned Scale, SMLoc Start, SMLoc End,
- unsigned Size, StringRef Identifier,
+ unsigned Scale, bool NonAbsMem, SMLoc Start,
+ SMLoc End, unsigned Size, StringRef Identifier,
const InlineAsmIdentifierInfo &Info,
OperandVector &Operands);
@@ -1272,10 +1274,9 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
@@ -1411,9 +1412,12 @@ bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
}
}
+ if (X86II::isApxExtendedReg(RegNo))
+ UseApxExtendedReg = true;
+
// If this is "db[0-15]", match it as an alias
// for dr[0-15].
- if (RegNo == 0 && RegName.startswith("db")) {
+ if (RegNo == 0 && RegName.starts_with("db")) {
if (RegName.size() == 3) {
switch (RegName[2]) {
case '0':
@@ -1576,23 +1580,21 @@ bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
return false;
}
-bool X86AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
+ return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
}
-OperandMatchResultTy X86AsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
- bool Result =
- ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
+ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
bool PendingErrors = getParser().hasPendingError();
getParser().clearPendingErrors();
if (PendingErrors)
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (Result)
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
}
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
@@ -1742,10 +1744,13 @@ bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
return parseATTOperand(Operands);
}
-bool X86AsmParser::CreateMemForMSInlineAsm(
- unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
- unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
- const InlineAsmIdentifierInfo &Info, OperandVector &Operands) {
+bool X86AsmParser::CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
+ unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale, bool NonAbsMem,
+ SMLoc Start, SMLoc End,
+ unsigned Size, StringRef Identifier,
+ const InlineAsmIdentifierInfo &Info,
+ OperandVector &Operands) {
// If we found a decl other than a VarDecl, then assume it is a FuncDecl or
// some other label reference.
if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
@@ -1770,11 +1775,15 @@ bool X86AsmParser::CreateMemForMSInlineAsm(
}
// It is widely common for MS InlineAsm to use a global variable and one/two
// registers in a mmory expression, and though unaccessible via rip/eip.
- if (IsGlobalLV && (BaseReg || IndexReg)) {
- Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
- End, Size, Identifier, Decl, 0,
- BaseReg && IndexReg));
- return false;
+ if (IsGlobalLV) {
+ if (BaseReg || IndexReg) {
+ Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
+ End, Size, Identifier, Decl, 0,
+ BaseReg && IndexReg));
+ return false;
+ }
+ if (NonAbsMem)
+ BaseReg = 1; // Make isAbsMem() false
}
Operands.push_back(X86Operand::CreateMem(
getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
@@ -2063,7 +2072,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
Lex(); // eat type
bool EndDot = parseOptionalToken(AsmToken::Dot);
while (EndDot || (getTok().is(AsmToken::Identifier) &&
- getTok().getString().startswith("."))) {
+ getTok().getString().starts_with("."))) {
getParser().parseIdentifier(Identifier);
if (!EndDot)
Identifier.consume_front(".");
@@ -2262,7 +2271,7 @@ bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
const SMLoc consumedToken = consumeToken();
if (Tok.isNot(AsmToken::Identifier))
return Error(Tok.getLoc(), "Expected an identifier after {");
- if (Tok.getIdentifier().startswith("r")){
+ if (Tok.getIdentifier().starts_with("r")) {
int rndMode = StringSwitch<int>(Tok.getIdentifier())
.Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
.Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
@@ -2304,7 +2313,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
// Drop the optional '.'.
StringRef DotDispStr = Tok.getString();
- if (DotDispStr.startswith("."))
+ if (DotDispStr.starts_with("."))
DotDispStr = DotDispStr.drop_front(1);
StringRef TrailingDot;
@@ -2316,7 +2325,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
Info.Offset = DotDisp.getZExtValue();
} else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
Tok.is(AsmToken::Identifier)) {
- if (DotDispStr.endswith(".")) {
+ if (DotDispStr.ends_with(".")) {
TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
DotDispStr = DotDispStr.drop_back(1);
}
@@ -2618,9 +2627,12 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
ErrMsg))
return Error(Start, ErrMsg);
+ bool IsUnconditionalBranch =
+ Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
if (isParsingMSInlineAsm())
- return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start,
- End, Size, SM.getSymName(),
+ return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale,
+ IsUnconditionalBranch && is64BitMode(),
+ Start, End, Size, SM.getSymName(),
SM.getIdentifierInfo(), Operands);
// When parsing x64 MS-style assembly, all non-absolute references to a named
@@ -2628,8 +2640,6 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
unsigned DefaultBaseReg = X86::NoRegister;
bool MaybeDirectBranchDest = true;
- bool IsUnconditionalBranch =
- Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
if (Parser.isParsingMasm()) {
if (is64BitMode() && SM.getElementSize() > 0) {
DefaultBaseReg = X86::RIP;
@@ -2805,7 +2815,7 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
SmallVector<char, 5> BroadcastVector;
StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
.toStringRef(BroadcastVector);
- if (!BroadcastString.startswith("1to"))
+ if (!BroadcastString.starts_with("1to"))
return TokError("Expected 1to<NUM> at this point");
const char *BroadcastPrimitive =
StringSwitch<const char *>(BroadcastString)
@@ -3087,6 +3097,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Reset the forced VEX encoding.
ForcedVEXEncoding = VEXEncoding_Default;
ForcedDispEncoding = DispEncoding_Default;
+ UseApxExtendedReg = false;
// Parse pseudo prefixes.
while (true) {
@@ -3163,7 +3174,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (isParsingIntelSyntax() &&
(PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
PatchedName == "jcxz" || PatchedName == "jecxz" ||
- (PatchedName.startswith("j") &&
+ (PatchedName.starts_with("j") &&
ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
StringRef NextTok = Parser.getTok().getString();
if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
@@ -3181,17 +3192,17 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// FIXME: Hack to recognize setneb as setne.
- if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+ if (PatchedName.starts_with("set") && PatchedName.ends_with("b") &&
PatchedName != "setb" && PatchedName != "setnb")
PatchedName = PatchedName.substr(0, Name.size()-1);
unsigned ComparisonPredicate = ~0U;
// FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
- if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
- (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
- PatchedName.endswith("sh") || PatchedName.endswith("ph") ||
- PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) &&
+ (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") ||
+ PatchedName.ends_with("sh") || PatchedName.ends_with("ph") ||
+ PatchedName.ends_with("ps") || PatchedName.ends_with("pd"))) {
bool IsVCMP = PatchedName[0] == 'v';
unsigned CCIdx = IsVCMP ? 4 : 3;
unsigned CC = StringSwitch<unsigned>(
@@ -3246,17 +3257,17 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
.Default(~0U);
if (CC != ~0U && (IsVCMP || CC < 8) &&
(IsVCMP || PatchedName.back() != 'h')) {
- if (PatchedName.endswith("ss"))
+ if (PatchedName.ends_with("ss"))
PatchedName = IsVCMP ? "vcmpss" : "cmpss";
- else if (PatchedName.endswith("sd"))
+ else if (PatchedName.ends_with("sd"))
PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
- else if (PatchedName.endswith("ps"))
+ else if (PatchedName.ends_with("ps"))
PatchedName = IsVCMP ? "vcmpps" : "cmpps";
- else if (PatchedName.endswith("pd"))
+ else if (PatchedName.ends_with("pd"))
PatchedName = IsVCMP ? "vcmppd" : "cmppd";
- else if (PatchedName.endswith("sh"))
+ else if (PatchedName.ends_with("sh"))
PatchedName = "vcmpsh";
- else if (PatchedName.endswith("ph"))
+ else if (PatchedName.ends_with("ph"))
PatchedName = "vcmpph";
else
llvm_unreachable("Unexpected suffix!");
@@ -3266,7 +3277,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
- if (PatchedName.startswith("vpcmp") &&
+ if (PatchedName.starts_with("vpcmp") &&
(PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
@@ -3295,7 +3306,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
- if (PatchedName.startswith("vpcom") &&
+ if (PatchedName.starts_with("vpcom") &&
(PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
@@ -3323,7 +3334,6 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
}
-
// Determine whether this is an instruction prefix.
// FIXME:
// Enhance prefixes integrity robustness. for example, following forms
@@ -3369,9 +3379,9 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Parser.Lex(); // eat the prefix
// Hack: we could have something like "rep # some comment" or
// "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
- while (Name.startswith(";") || Name.startswith("\n") ||
- Name.startswith("#") || Name.startswith("\t") ||
- Name.startswith("/")) {
+ while (Name.starts_with(";") || Name.starts_with("\n") ||
+ Name.starts_with("#") || Name.starts_with("\t") ||
+ Name.starts_with("/")) {
// FIXME: The mnemonic won't match correctly if its not in lower case.
Name = Parser.getTok().getString();
Parser.Lex(); // go to next prefix or instr
@@ -3530,7 +3540,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
bool HadVerifyError = false;
// Append default arguments to "ins[bwld]"
- if (Name.startswith("ins") &&
+ if (Name.starts_with("ins") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
Name == "ins")) {
@@ -3542,7 +3552,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// Append default arguments to "outs[bwld]"
- if (Name.startswith("outs") &&
+ if (Name.starts_with("outs") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "outsb" || Name == "outsw" || Name == "outsl" ||
Name == "outsd" || Name == "outs")) {
@@ -3554,7 +3564,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
// values of $SIREG according to the mode. It would be nice if this
// could be achieved with InstAlias in the tables.
- if (Name.startswith("lods") &&
+ if (Name.starts_with("lods") &&
(Operands.size() == 1 || Operands.size() == 2) &&
(Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
@@ -3565,7 +3575,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
// values of $DIREG according to the mode. It would be nice if this
// could be achieved with InstAlias in the tables.
- if (Name.startswith("stos") &&
+ if (Name.starts_with("stos") &&
(Operands.size() == 1 || Operands.size() == 2) &&
(Name == "stos" || Name == "stosb" || Name == "stosw" ||
Name == "stosl" || Name == "stosd" || Name == "stosq")) {
@@ -3576,7 +3586,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
// values of $DIREG according to the mode. It would be nice if this
// could be achieved with InstAlias in the tables.
- if (Name.startswith("scas") &&
+ if (Name.starts_with("scas") &&
(Operands.size() == 1 || Operands.size() == 2) &&
(Name == "scas" || Name == "scasb" || Name == "scasw" ||
Name == "scasl" || Name == "scasd" || Name == "scasq")) {
@@ -3585,7 +3595,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// Add default SI and DI operands to "cmps[bwlq]".
- if (Name.startswith("cmps") &&
+ if (Name.starts_with("cmps") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
@@ -3595,10 +3605,10 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// Add default SI and DI operands to "movs[bwlq]".
- if (((Name.startswith("movs") &&
+ if (((Name.starts_with("movs") &&
(Name == "movs" || Name == "movsb" || Name == "movsw" ||
Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
- (Name.startswith("smov") &&
+ (Name.starts_with("smov") &&
(Name == "smov" || Name == "smovb" || Name == "smovw" ||
Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
(Operands.size() == 1 || Operands.size() == 3)) {
@@ -3957,6 +3967,9 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
unsigned Opc = Inst.getOpcode();
const MCInstrDesc &MCID = MII.get(Opc);
+ if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID))
+ return Match_Unsupported;
+
if (ForcedVEXEncoding == VEXEncoding_EVEX &&
(MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
return Match_Unsupported;
@@ -3967,8 +3980,8 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
(MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
return Match_Unsupported;
- // These instructions are only available with {vex}, {vex2} or {vex3} prefix
- if (MCID.TSFlags & X86II::ExplicitVEXPrefix &&
+ if ((MCID.TSFlags & X86II::ExplicitOpPrefixMask) ==
+ X86II::ExplicitVEXPrefix &&
(ForcedVEXEncoding != VEXEncoding_VEX &&
ForcedVEXEncoding != VEXEncoding_VEX2 &&
ForcedVEXEncoding != VEXEncoding_VEX3))
@@ -4458,11 +4471,11 @@ bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
MCAsmParser &Parser = getParser();
StringRef IDVal = DirectiveID.getIdentifier();
- if (IDVal.startswith(".arch"))
+ if (IDVal.starts_with(".arch"))
return parseDirectiveArch();
- if (IDVal.startswith(".code"))
+ if (IDVal.starts_with(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
- else if (IDVal.startswith(".att_syntax")) {
+ else if (IDVal.starts_with(".att_syntax")) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "prefix")
Parser.Lex();
@@ -4473,7 +4486,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
}
getParser().setAssemblerDialect(0);
return false;
- } else if (IDVal.startswith(".intel_syntax")) {
+ } else if (IDVal.starts_with(".intel_syntax")) {
getParser().setAssemblerDialect(1);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "noprefix")
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 4661e73c3ef8..641158cb351f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -357,28 +357,28 @@ struct X86Operand final : public MCParsedAsmOperand {
}
bool isMem64_RC128X() const {
- return isMem64() && isMemIndexReg(X86::XMM0, X86::XMM31);
+ return isMem64() && X86II::isXMMReg(Mem.IndexReg);
}
bool isMem128_RC128X() const {
- return isMem128() && isMemIndexReg(X86::XMM0, X86::XMM31);
+ return isMem128() && X86II::isXMMReg(Mem.IndexReg);
}
bool isMem128_RC256X() const {
- return isMem128() && isMemIndexReg(X86::YMM0, X86::YMM31);
+ return isMem128() && X86II::isYMMReg(Mem.IndexReg);
}
bool isMem256_RC128X() const {
- return isMem256() && isMemIndexReg(X86::XMM0, X86::XMM31);
+ return isMem256() && X86II::isXMMReg(Mem.IndexReg);
}
bool isMem256_RC256X() const {
- return isMem256() && isMemIndexReg(X86::YMM0, X86::YMM31);
+ return isMem256() && X86II::isYMMReg(Mem.IndexReg);
}
bool isMem256_RC512() const {
- return isMem256() && isMemIndexReg(X86::ZMM0, X86::ZMM31);
+ return isMem256() && X86II::isZMMReg(Mem.IndexReg);
}
bool isMem512_RC256X() const {
- return isMem512() && isMemIndexReg(X86::YMM0, X86::YMM31);
+ return isMem512() && X86II::isYMMReg(Mem.IndexReg);
}
bool isMem512_RC512() const {
- return isMem512() && isMemIndexReg(X86::ZMM0, X86::ZMM31);
+ return isMem512() && X86II::isZMMReg(Mem.IndexReg);
}
bool isMem512_GR16() const {
if (!isMem512())
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 49651da63ecf..59e2008f5632 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -150,12 +150,18 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext,
dec =
&THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case MAP4:
+ dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
case MAP5:
dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case MAP6:
dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case MAP7:
+ dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
}
switch (dec->modrm_type) {
@@ -194,7 +200,7 @@ template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
uint64_t offset = insn->readerCursor - insn->startLocation;
if (offset + sizeof(T) > r.size())
return true;
- ptr = support::endian::read<T>(&r[offset], support::little);
+ ptr = support::endian::read<T>(&r[offset], llvm::endianness::little);
insn->readerCursor += sizeof(T);
return false;
}
@@ -203,6 +209,10 @@ static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
}
+static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
+ return insn->mode == MODE_64BIT && prefix == 0xd5;
+}
+
// Consumes all of an instruction's prefix bytes, and marks the
// instruction as having them. Also sets the instruction's default operand,
// address, and other relevant data sizes to report operands correctly.
@@ -334,8 +344,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
return -1;
}
- if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
- ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
+ if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {
insn->vectorExtensionType = TYPE_EVEX;
} else {
--insn->readerCursor; // unconsume byte1
@@ -354,13 +363,19 @@ static int readPrefixes(struct InternalInstruction *insn) {
return -1;
}
- // We simulate the REX prefix for simplicity's sake
if (insn->mode == MODE_64BIT) {
+ // We simulate the REX prefix for simplicity's sake
insn->rexPrefix = 0x40 |
(wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
(rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
(xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
(bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
+
+ // We simulate the REX2 prefix for simplicity's sake
+ insn->rex2ExtensionPrefix[1] =
+ (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) |
+ (x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) |
+ (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4);
}
LLVM_DEBUG(
@@ -471,6 +486,23 @@ static int readPrefixes(struct InternalInstruction *insn) {
insn->vectorExtensionPrefix[1],
insn->vectorExtensionPrefix[2]));
}
+ } else if (isREX2(insn, byte)) {
+ uint8_t byte1;
+ if (peek(insn, byte1)) {
+ LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
+ return -1;
+ }
+ insn->rex2ExtensionPrefix[0] = byte;
+ consume(insn, insn->rex2ExtensionPrefix[1]);
+
+ // We simulate the REX prefix for simplicity's sake
+ insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) |
+ (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) |
+ (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) |
+ (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0);
+ LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
+ insn->rex2ExtensionPrefix[0],
+ insn->rex2ExtensionPrefix[1]));
} else if (isREX(insn, byte)) {
if (peek(insn, nextByte))
return -1;
@@ -529,7 +561,8 @@ static int readSIB(struct InternalInstruction *insn) {
if (consume(insn, insn->sib))
return -1;
- index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+ index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) |
+ (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
if (index == 0x4) {
insn->sibIndex = SIB_INDEX_NONE;
@@ -539,7 +572,8 @@ static int readSIB(struct InternalInstruction *insn) {
insn->sibScale = 1 << scaleFromSIB(insn->sib);
- base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+ base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) |
+ (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
switch (base) {
case 0x5:
@@ -601,7 +635,7 @@ static int readDisplacement(struct InternalInstruction *insn) {
// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
static int readModRM(struct InternalInstruction *insn) {
- uint8_t mod, rm, reg, evexrm;
+ uint8_t mod, rm, reg;
LLVM_DEBUG(dbgs() << "readModRM()");
if (insn->consumedModRM)
@@ -633,14 +667,13 @@ static int readModRM(struct InternalInstruction *insn) {
break;
}
- reg |= rFromREX(insn->rexPrefix) << 3;
- rm |= bFromREX(insn->rexPrefix) << 3;
+ reg |= (rFromREX(insn->rexPrefix) << 3) |
+ (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
+ rm |= (bFromREX(insn->rexPrefix) << 3) |
+ (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
- evexrm = 0;
- if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
- evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
- }
insn->reg = (Reg)(insn->regBase + reg);
@@ -728,7 +761,7 @@ static int readModRM(struct InternalInstruction *insn) {
break;
case 0x3:
insn->eaDisplacement = EA_DISP_NONE;
- insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
break;
}
break;
@@ -738,7 +771,7 @@ static int readModRM(struct InternalInstruction *insn) {
return 0;
}
-#define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
+#define GENERIC_FIXUP_FUNC(name, base, prefix) \
static uint16_t name(struct InternalInstruction *insn, OperandType type, \
uint8_t index, uint8_t *valid) { \
*valid = 1; \
@@ -750,28 +783,15 @@ static int readModRM(struct InternalInstruction *insn) {
case TYPE_Rv: \
return base + index; \
case TYPE_R8: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
- if (insn->rexPrefix && index >= 4 && index <= 7) { \
+ if (insn->rexPrefix && index >= 4 && index <= 7) \
return prefix##_SPL + (index - 4); \
- } else { \
+ else \
return prefix##_AL + index; \
- } \
case TYPE_R16: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
return prefix##_AX + index; \
case TYPE_R32: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
return prefix##_EAX + index; \
case TYPE_R64: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
return prefix##_RAX + index; \
case TYPE_ZMM: \
return prefix##_ZMM0 + index; \
@@ -821,8 +841,8 @@ static int readModRM(struct InternalInstruction *insn) {
// @param valid - The address of a uint8_t. The target is set to 1 if the
// field is valid for the register class; 0 if not.
// @return - The proper value.
-GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
-GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
+GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
// Consult an operand specifier to determine which of the fixup*Value functions
// to use in correcting readModRM()'ss interpretation.
@@ -852,8 +872,31 @@ static int fixupReg(struct InternalInstruction *insn,
if (!valid)
return -1;
break;
- case ENCODING_SIB:
CASE_ENCODING_RM:
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
+ modFromModRM(insn->modRM) == 3) {
+ // EVEX_X can extend the register id to 32 for a non-GPR register that is
+ // encoded in RM.
+ // mode : MODE_64_BIT
+ // Only 8 vector registers are available in 32 bit mode
+ // mod : 3
+ // RM encodes a register
+ switch (op->type) {
+ case TYPE_Rv:
+ case TYPE_R8:
+ case TYPE_R16:
+ case TYPE_R32:
+ case TYPE_R64:
+ break;
+ default:
+ insn->eaBase =
+ (EABase)(insn->eaBase +
+ (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));
+ break;
+ }
+ }
+ [[fallthrough]];
+ case ENCODING_SIB:
if (insn->eaBase >= insn->eaRegBase) {
insn->eaBase = (EABase)fixupRMValue(
insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
@@ -889,6 +932,9 @@ static bool readOpcode(struct InternalInstruction *insn) {
case VEX_LOB_0F3A:
insn->opcodeType = THREEBYTE_3A;
return consume(insn, insn->opcode);
+ case VEX_LOB_MAP4:
+ insn->opcodeType = MAP4;
+ return consume(insn, insn->opcode);
case VEX_LOB_MAP5:
insn->opcodeType = MAP5;
return consume(insn, insn->opcode);
@@ -918,6 +964,9 @@ static bool readOpcode(struct InternalInstruction *insn) {
case VEX_LOB_MAP6:
insn->opcodeType = MAP6;
return consume(insn, insn->opcode);
+ case VEX_LOB_MAP7:
+ insn->opcodeType = MAP7;
+ return consume(insn, insn->opcode);
}
} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
insn->opcodeType = TWOBYTE;
@@ -939,6 +988,10 @@ static bool readOpcode(struct InternalInstruction *insn) {
insn->opcodeType = XOPA_MAP;
return consume(insn, insn->opcode);
}
+ } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) {
+ // m bit indicates opcode map 1
+ insn->opcodeType = TWOBYTE;
+ return consume(insn, insn->opcode);
}
if (consume(insn, current))
@@ -1053,12 +1106,18 @@ static int getInstructionIDWithAttrMask(uint16_t *instructionID,
case THREEDNOW_MAP:
decision = &THREEDNOW_MAP_SYM;
break;
+ case MAP4:
+ decision = &MAP4_SYM;
+ break;
case MAP5:
decision = &MAP5_SYM;
break;
case MAP6:
decision = &MAP6_SYM;
break;
+ case MAP7:
+ decision = &MAP7_SYM;
+ break;
}
if (decision->opcodeDecisions[insnCtx]
@@ -1207,6 +1266,11 @@ static int getInstructionID(struct InternalInstruction *insn,
attrMask &= ~ATTR_ADSIZE;
}
+ // Absolute jump and pushp/popp need special handling
+ if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE &&
+ (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))
+ attrMask |= ATTR_REX2;
+
if (insn->mode == MODE_16BIT) {
// JCXZ/JECXZ need special handling for 16-bit mode because the meaning
// of the AdSize prefix is inverted w.r.t. 32-bit mode.
@@ -1266,7 +1330,8 @@ static int getInstructionID(struct InternalInstruction *insn,
// any position.
if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
(insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
- (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
+ (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) ||
+ (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) {
// Make sure we observed the prefixes in any position.
if (insn->hasAdSize)
attrMask |= ATTR_ADSIZE;
@@ -1379,10 +1444,16 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
if (size == 0)
size = insn->registerSize;
+ auto setOpcodeRegister = [&](unsigned base) {
+ insn->opcodeRegister =
+ (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) |
+ (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) |
+ (insn->opcode & 7)));
+ };
+
switch (size) {
case 1:
- insn->opcodeRegister = (Reg)(
- MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ setOpcodeRegister(MODRM_REG_AL);
if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
insn->opcodeRegister < MODRM_REG_AL + 0x8) {
insn->opcodeRegister =
@@ -1391,18 +1462,13 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
break;
case 2:
- insn->opcodeRegister = (Reg)(
- MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ setOpcodeRegister(MODRM_REG_AX);
break;
case 4:
- insn->opcodeRegister =
- (Reg)(MODRM_REG_EAX +
- ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ setOpcodeRegister(MODRM_REG_EAX);
break;
case 8:
- insn->opcodeRegister =
- (Reg)(MODRM_REG_RAX +
- ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ setOpcodeRegister(MODRM_REG_RAX);
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 95d3c8ede366..decc45091941 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -20,187 +20,332 @@
namespace llvm {
namespace X86Disassembler {
-
-// Accessor functions for various fields of an Intel instruction
-#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
-#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
-#define rmFromModRM(modRM) ((modRM) & 0x7)
-#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
-#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
-#define baseFromSIB(sib) ((sib) & 0x7)
-#define wFromREX(rex) (((rex) & 0x8) >> 3)
-#define rFromREX(rex) (((rex) & 0x4) >> 2)
-#define xFromREX(rex) (((rex) & 0x2) >> 1)
-#define bFromREX(rex) ((rex) & 0x1)
-
-#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)
-#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
-#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
-#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
-#define mmmFromEVEX2of4(evex) ((evex) & 0x7)
-#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
-#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
-#define ppFromEVEX3of4(evex) ((evex) & 0x3)
-#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)
-#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)
-#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)
-#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)
-#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)
-#define aaaFromEVEX4of4(evex) ((evex) & 0x7)
-
-#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
-#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
-#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
-#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
-#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
-#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
-#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
-#define ppFromVEX3of3(vex) ((vex) & 0x3)
-
-#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
-#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
-#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
-#define ppFromVEX2of2(vex) ((vex) & 0x3)
-
-#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
-#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
-#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
-#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
-#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
-#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
-#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
-#define ppFromXOP3of3(xop) ((xop) & 0x3)
+// Helper macros
+#define bitFromOffset0(val) ((val) & 0x1)
+#define bitFromOffset1(val) (((val) >> 1) & 0x1)
+#define bitFromOffset2(val) (((val) >> 2) & 0x1)
+#define bitFromOffset3(val) (((val) >> 3) & 0x1)
+#define bitFromOffset4(val) (((val) >> 4) & 0x1)
+#define bitFromOffset5(val) (((val) >> 5) & 0x1)
+#define bitFromOffset6(val) (((val) >> 6) & 0x1)
+#define bitFromOffset7(val) (((val) >> 7) & 0x1)
+#define twoBitsFromOffset0(val) ((val) & 0x3)
+#define twoBitsFromOffset6(val) (((val) >> 6) & 0x3)
+#define threeBitsFromOffset0(val) ((val) & 0x7)
+#define threeBitsFromOffset3(val) (((val) >> 3) & 0x7)
+#define fiveBitsFromOffset0(val) ((val) & 0x1f)
+#define invertedBitFromOffset2(val) (((~(val)) >> 2) & 0x1)
+#define invertedBitFromOffset3(val) (((~(val)) >> 3) & 0x1)
+#define invertedBitFromOffset4(val) (((~(val)) >> 4) & 0x1)
+#define invertedBitFromOffset5(val) (((~(val)) >> 5) & 0x1)
+#define invertedBitFromOffset6(val) (((~(val)) >> 6) & 0x1)
+#define invertedBitFromOffset7(val) (((~(val)) >> 7) & 0x1)
+#define invertedFourBitsFromOffset3(val) (((~(val)) >> 3) & 0xf)
+// MOD/RM
+#define modFromModRM(modRM) twoBitsFromOffset6(modRM)
+#define regFromModRM(modRM) threeBitsFromOffset3(modRM)
+#define rmFromModRM(modRM) threeBitsFromOffset0(modRM)
+// SIB
+#define scaleFromSIB(sib) twoBitsFromOffset6(sib)
+#define indexFromSIB(sib) threeBitsFromOffset3(sib)
+#define baseFromSIB(sib) threeBitsFromOffset0(sib)
+// REX
+#define wFromREX(rex) bitFromOffset3(rex)
+#define rFromREX(rex) bitFromOffset2(rex)
+#define xFromREX(rex) bitFromOffset1(rex)
+#define bFromREX(rex) bitFromOffset0(rex)
+// REX2
+#define mFromREX2(rex2) bitFromOffset7(rex2)
+#define r2FromREX2(rex2) bitFromOffset6(rex2)
+#define x2FromREX2(rex2) bitFromOffset5(rex2)
+#define b2FromREX2(rex2) bitFromOffset4(rex2)
+#define wFromREX2(rex2) bitFromOffset3(rex2)
+#define rFromREX2(rex2) bitFromOffset2(rex2)
+#define xFromREX2(rex2) bitFromOffset1(rex2)
+#define bFromREX2(rex2) bitFromOffset0(rex2)
+// XOP
+#define rFromXOP2of3(xop) invertedBitFromOffset7(xop)
+#define xFromXOP2of3(xop) invertedBitFromOffset6(xop)
+#define bFromXOP2of3(xop) invertedBitFromOffset5(xop)
+#define mmmmmFromXOP2of3(xop) fiveBitsFromOffset0(xop)
+#define wFromXOP3of3(xop) bitFromOffset7(xop)
+#define vvvvFromXOP3of3(xop) invertedFourBitsFromOffset3(xop)
+#define lFromXOP3of3(xop) bitFromOffset2(xop)
+#define ppFromXOP3of3(xop) twoBitsFromOffset0(xop)
+// VEX2
+#define rFromVEX2of2(vex) invertedBitFromOffset7(vex)
+#define vvvvFromVEX2of2(vex) invertedFourBitsFromOffset3(vex)
+#define lFromVEX2of2(vex) bitFromOffset2(vex)
+#define ppFromVEX2of2(vex) twoBitsFromOffset0(vex)
+// VEX3
+#define rFromVEX2of3(vex) invertedBitFromOffset7(vex)
+#define xFromVEX2of3(vex) invertedBitFromOffset6(vex)
+#define bFromVEX2of3(vex) invertedBitFromOffset5(vex)
+#define mmmmmFromVEX2of3(vex) fiveBitsFromOffset0(vex)
+#define wFromVEX3of3(vex) bitFromOffset7(vex)
+#define vvvvFromVEX3of3(vex) invertedFourBitsFromOffset3(vex)
+#define lFromVEX3of3(vex) bitFromOffset2(vex)
+#define ppFromVEX3of3(vex) twoBitsFromOffset0(vex)
+// EVEX
+#define rFromEVEX2of4(evex) invertedBitFromOffset7(evex)
+#define xFromEVEX2of4(evex) invertedBitFromOffset6(evex)
+#define bFromEVEX2of4(evex) invertedBitFromOffset5(evex)
+#define r2FromEVEX2of4(evex) invertedBitFromOffset4(evex)
+#define b2FromEVEX2of4(evex) bitFromOffset3(evex)
+#define mmmFromEVEX2of4(evex) threeBitsFromOffset0(evex)
+#define wFromEVEX3of4(evex) bitFromOffset7(evex)
+#define vvvvFromEVEX3of4(evex) invertedFourBitsFromOffset3(evex)
+#define x2FromEVEX3of4(evex) invertedBitFromOffset2(evex)
+#define ppFromEVEX3of4(evex) twoBitsFromOffset0(evex)
+#define zFromEVEX4of4(evex) bitFromOffset7(evex)
+#define l2FromEVEX4of4(evex) bitFromOffset6(evex)
+#define lFromEVEX4of4(evex) bitFromOffset5(evex)
+#define bFromEVEX4of4(evex) bitFromOffset4(evex)
+#define v2FromEVEX4of4(evex) invertedBitFromOffset3(evex)
+#define aaaFromEVEX4of4(evex) threeBitsFromOffset0(evex)
// These enums represent Intel registers for use by the decoder.
-#define REGS_8BIT \
- ENTRY(AL) \
- ENTRY(CL) \
- ENTRY(DL) \
- ENTRY(BL) \
- ENTRY(AH) \
- ENTRY(CH) \
- ENTRY(DH) \
- ENTRY(BH) \
- ENTRY(R8B) \
- ENTRY(R9B) \
- ENTRY(R10B) \
- ENTRY(R11B) \
- ENTRY(R12B) \
- ENTRY(R13B) \
- ENTRY(R14B) \
- ENTRY(R15B) \
- ENTRY(SPL) \
- ENTRY(BPL) \
- ENTRY(SIL) \
+#define REGS_8BIT \
+ ENTRY(AL) \
+ ENTRY(CL) \
+ ENTRY(DL) \
+ ENTRY(BL) \
+ ENTRY(AH) \
+ ENTRY(CH) \
+ ENTRY(DH) \
+ ENTRY(BH) \
+ ENTRY(R8B) \
+ ENTRY(R9B) \
+ ENTRY(R10B) \
+ ENTRY(R11B) \
+ ENTRY(R12B) \
+ ENTRY(R13B) \
+ ENTRY(R14B) \
+ ENTRY(R15B) \
+ ENTRY(R16B) \
+ ENTRY(R17B) \
+ ENTRY(R18B) \
+ ENTRY(R19B) \
+ ENTRY(R20B) \
+ ENTRY(R21B) \
+ ENTRY(R22B) \
+ ENTRY(R23B) \
+ ENTRY(R24B) \
+ ENTRY(R25B) \
+ ENTRY(R26B) \
+ ENTRY(R27B) \
+ ENTRY(R28B) \
+ ENTRY(R29B) \
+ ENTRY(R30B) \
+ ENTRY(R31B) \
+ ENTRY(SPL) \
+ ENTRY(BPL) \
+ ENTRY(SIL) \
ENTRY(DIL)
-#define EA_BASES_16BIT \
- ENTRY(BX_SI) \
- ENTRY(BX_DI) \
- ENTRY(BP_SI) \
- ENTRY(BP_DI) \
- ENTRY(SI) \
- ENTRY(DI) \
- ENTRY(BP) \
- ENTRY(BX) \
- ENTRY(R8W) \
- ENTRY(R9W) \
- ENTRY(R10W) \
- ENTRY(R11W) \
- ENTRY(R12W) \
- ENTRY(R13W) \
- ENTRY(R14W) \
- ENTRY(R15W)
-
-#define REGS_16BIT \
- ENTRY(AX) \
- ENTRY(CX) \
- ENTRY(DX) \
- ENTRY(BX) \
- ENTRY(SP) \
- ENTRY(BP) \
- ENTRY(SI) \
- ENTRY(DI) \
- ENTRY(R8W) \
- ENTRY(R9W) \
- ENTRY(R10W) \
- ENTRY(R11W) \
- ENTRY(R12W) \
- ENTRY(R13W) \
- ENTRY(R14W) \
- ENTRY(R15W)
-
-#define EA_BASES_32BIT \
- ENTRY(EAX) \
- ENTRY(ECX) \
- ENTRY(EDX) \
- ENTRY(EBX) \
- ENTRY(sib) \
- ENTRY(EBP) \
- ENTRY(ESI) \
- ENTRY(EDI) \
- ENTRY(R8D) \
- ENTRY(R9D) \
- ENTRY(R10D) \
- ENTRY(R11D) \
- ENTRY(R12D) \
- ENTRY(R13D) \
- ENTRY(R14D) \
- ENTRY(R15D)
-
-#define REGS_32BIT \
- ENTRY(EAX) \
- ENTRY(ECX) \
- ENTRY(EDX) \
- ENTRY(EBX) \
- ENTRY(ESP) \
- ENTRY(EBP) \
- ENTRY(ESI) \
- ENTRY(EDI) \
- ENTRY(R8D) \
- ENTRY(R9D) \
- ENTRY(R10D) \
- ENTRY(R11D) \
- ENTRY(R12D) \
- ENTRY(R13D) \
- ENTRY(R14D) \
- ENTRY(R15D)
-
-#define EA_BASES_64BIT \
- ENTRY(RAX) \
- ENTRY(RCX) \
- ENTRY(RDX) \
- ENTRY(RBX) \
- ENTRY(sib64) \
- ENTRY(RBP) \
- ENTRY(RSI) \
- ENTRY(RDI) \
- ENTRY(R8) \
- ENTRY(R9) \
- ENTRY(R10) \
- ENTRY(R11) \
- ENTRY(R12) \
- ENTRY(R13) \
- ENTRY(R14) \
- ENTRY(R15)
-
-#define REGS_64BIT \
- ENTRY(RAX) \
- ENTRY(RCX) \
- ENTRY(RDX) \
- ENTRY(RBX) \
- ENTRY(RSP) \
- ENTRY(RBP) \
- ENTRY(RSI) \
- ENTRY(RDI) \
- ENTRY(R8) \
- ENTRY(R9) \
- ENTRY(R10) \
- ENTRY(R11) \
- ENTRY(R12) \
- ENTRY(R13) \
- ENTRY(R14) \
- ENTRY(R15)
+#define EA_BASES_16BIT \
+ ENTRY(BX_SI) \
+ ENTRY(BX_DI) \
+ ENTRY(BP_SI) \
+ ENTRY(BP_DI) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(BP) \
+ ENTRY(BX) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W) \
+ ENTRY(R16W) \
+ ENTRY(R17W) \
+ ENTRY(R18W) \
+ ENTRY(R19W) \
+ ENTRY(R20W) \
+ ENTRY(R21W) \
+ ENTRY(R22W) \
+ ENTRY(R23W) \
+ ENTRY(R24W) \
+ ENTRY(R25W) \
+ ENTRY(R26W) \
+ ENTRY(R27W) \
+ ENTRY(R28W) \
+ ENTRY(R29W) \
+ ENTRY(R30W) \
+ ENTRY(R31W)
+
+#define REGS_16BIT \
+ ENTRY(AX) \
+ ENTRY(CX) \
+ ENTRY(DX) \
+ ENTRY(BX) \
+ ENTRY(SP) \
+ ENTRY(BP) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W) \
+ ENTRY(R16W) \
+ ENTRY(R17W) \
+ ENTRY(R18W) \
+ ENTRY(R19W) \
+ ENTRY(R20W) \
+ ENTRY(R21W) \
+ ENTRY(R22W) \
+ ENTRY(R23W) \
+ ENTRY(R24W) \
+ ENTRY(R25W) \
+ ENTRY(R26W) \
+ ENTRY(R27W) \
+ ENTRY(R28W) \
+ ENTRY(R29W) \
+ ENTRY(R30W) \
+ ENTRY(R31W)
+
+#define EA_BASES_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(sib) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D) \
+ ENTRY(R16D) \
+ ENTRY(R17D) \
+ ENTRY(R18D) \
+ ENTRY(R19D) \
+ ENTRY(R20D) \
+ ENTRY(R21D) \
+ ENTRY(R22D) \
+ ENTRY(R23D) \
+ ENTRY(R24D) \
+ ENTRY(R25D) \
+ ENTRY(R26D) \
+ ENTRY(R27D) \
+ ENTRY(R28D) \
+ ENTRY(R29D) \
+ ENTRY(R30D) \
+ ENTRY(R31D)
+
+#define REGS_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(ESP) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D) \
+ ENTRY(R16D) \
+ ENTRY(R17D) \
+ ENTRY(R18D) \
+ ENTRY(R19D) \
+ ENTRY(R20D) \
+ ENTRY(R21D) \
+ ENTRY(R22D) \
+ ENTRY(R23D) \
+ ENTRY(R24D) \
+ ENTRY(R25D) \
+ ENTRY(R26D) \
+ ENTRY(R27D) \
+ ENTRY(R28D) \
+ ENTRY(R29D) \
+ ENTRY(R30D) \
+ ENTRY(R31D)
+
+#define EA_BASES_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(sib64) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15) \
+ ENTRY(R16) \
+ ENTRY(R17) \
+ ENTRY(R18) \
+ ENTRY(R19) \
+ ENTRY(R20) \
+ ENTRY(R21) \
+ ENTRY(R22) \
+ ENTRY(R23) \
+ ENTRY(R24) \
+ ENTRY(R25) \
+ ENTRY(R26) \
+ ENTRY(R27) \
+ ENTRY(R28) \
+ ENTRY(R29) \
+ ENTRY(R30) \
+ ENTRY(R31)
+
+#define REGS_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(RSP) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15) \
+ ENTRY(R16) \
+ ENTRY(R17) \
+ ENTRY(R18) \
+ ENTRY(R19) \
+ ENTRY(R20) \
+ ENTRY(R21) \
+ ENTRY(R22) \
+ ENTRY(R23) \
+ ENTRY(R24) \
+ ENTRY(R25) \
+ ENTRY(R26) \
+ ENTRY(R27) \
+ ENTRY(R28) \
+ ENTRY(R29) \
+ ENTRY(R30) \
+ ENTRY(R31)
#define REGS_MMX \
ENTRY(MM0) \
@@ -483,8 +628,10 @@ enum VEXLeadingOpcodeByte {
VEX_LOB_0F = 0x1,
VEX_LOB_0F38 = 0x2,
VEX_LOB_0F3A = 0x3,
+ VEX_LOB_MAP4 = 0x4,
VEX_LOB_MAP5 = 0x5,
- VEX_LOB_MAP6 = 0x6
+ VEX_LOB_MAP6 = 0x6,
+ VEX_LOB_MAP7 = 0x7
};
enum XOPMapSelect {
@@ -539,6 +686,8 @@ struct InternalInstruction {
uint8_t vectorExtensionPrefix[4];
// The type of the vector extension prefix
VectorExtensionType vectorExtensionType;
+ // The value of the REX2 prefix, if present
+ uint8_t rex2ExtensionPrefix[2];
// The value of the REX prefix, if present
uint8_t rexPrefix;
// The segment override type
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86CallLowering.cpp
index a47a09414cf7..e69e77e76e6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86CallLowering.cpp
@@ -106,14 +106,15 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
Register ExtReg = extendRegister(ValVReg, VA);
@@ -201,7 +202,8 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
auto *MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy,
@@ -210,7 +212,7 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86CallLowering.h
index 9067abf060bd..9067abf060bd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86CallLowering.h
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 6157dafb5c51..d7a10f45cb5f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -20,6 +20,7 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
@@ -116,6 +117,8 @@ private:
bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ bool selectSelect(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
@@ -429,6 +432,8 @@ bool X86InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM:
return selectMulDivRem(I, MRI, MF);
+ case TargetOpcode::G_SELECT:
+ return selectSelect(I, MRI, MF);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectIntrinsicWSideEffects(I, MRI, MF);
}
@@ -1789,6 +1794,49 @@ bool X86InstructionSelector::selectMulDivRem(MachineInstr &I,
return true;
}
+bool X86InstructionSelector::selectSelect(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ GSelect &Sel = cast<GSelect>(I);
+ unsigned DstReg = Sel.getReg(0);
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::TEST32rr))
+ .addReg(Sel.getCondReg())
+ .addReg(Sel.getCondReg());
+
+ unsigned OpCmp;
+ LLT Ty = MRI.getType(DstReg);
+ switch (Ty.getSizeInBits()) {
+ default:
+ return false;
+ case 8:
+ OpCmp = X86::CMOV_GR8;
+ break;
+ case 16:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
+ break;
+ case 32:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
+ break;
+ case 64:
+ assert(STI.is64Bit() && STI.canUseCMOV());
+ OpCmp = X86::CMOV64rr;
+ break;
+ }
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
+ .addReg(Sel.getTrueReg())
+ .addReg(Sel.getFalseReg())
+ .addImm(X86::COND_E);
+
+ const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI);
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n");
+ return false;
+ }
+
+ Sel.eraseFromParent();
+ return true;
+}
+
bool X86InstructionSelector::selectIntrinsicWSideEffects(
MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index a4a247f85f3d..27381dff338e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -29,6 +29,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
: Subtarget(STI) {
bool Is64Bit = Subtarget.is64Bit();
+ bool HasCMOV = Subtarget.canUseCMOV();
bool HasSSE1 = Subtarget.hasSSE1();
bool HasSSE2 = Subtarget.hasSSE2();
bool HasSSE41 = Subtarget.hasSSE41();
@@ -255,7 +256,9 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
getActionDefinitionsBuilder(G_ICMP)
.legalForCartesianProduct({s8}, Is64Bit ? IntTypes64 : IntTypes32)
- .clampScalar(0, s8, s8);
+ .clampScalar(0, s8, s8)
+ .clampScalar(1, s8, sMaxScalar)
+ .scalarSameSizeAs(2, 1);
// bswap
getActionDefinitionsBuilder(G_BSWAP)
@@ -519,15 +522,18 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
// todo: vectors and address spaces
getActionDefinitionsBuilder(G_SELECT)
- .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32},
- {p0, s32}})
- .widenScalarToNextPow2(0, /*Min=*/8)
- .clampScalar(0, s8, sMaxScalar)
- .clampScalar(1, s32, s32);
+ .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}})
+ .widenScalarToNextPow2(0, /*Min=*/8)
+ .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar)
+ .clampScalar(1, s32, s32);
// memory intrinsics
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
+ getActionDefinitionsBuilder({G_DYN_STACKALLOC,
+ G_STACKSAVE,
+ G_STACKRESTORE}).lower();
+
// fp intrinsics
getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN)
.scalarize(0)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 1f69feceae27..1f69feceae27 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index 3160969e81e4..72828f961f93 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -272,7 +272,7 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
void X86RegisterBankInfo::applyMappingImpl(
- const OperandsMapper &OpdMapper) const {
+ MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
return applyDefaultMapping(OpdMapper);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
index fca36a317b58..9a4e23d8b34d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
@@ -71,7 +71,8 @@ public:
getInstrAlternativeMappings(const MachineInstr &MI) const override;
/// See RegisterBankInfo::applyMapping.
- void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+ void applyMappingImpl(MachineIRBuilder &Builder,
+ const OperandsMapper &OpdMapper) const override;
const InstructionMapping &
getInstrMapping(const MachineInstr &MI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp
index 78379290aae9..84a3ee3ef27e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp
@@ -13,7 +13,7 @@
#include "X86CustomBehaviour.h"
#include "TargetInfo/X86TargetInfo.h"
-#include "X86InstrInfo.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/WithColor.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h b/contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
index 3cb5edeee5b5..4a83ba848dd8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
@@ -17,7 +17,6 @@
#ifndef LLVM_LIB_TARGET_X86_MCA_X86CUSTOMBEHAVIOUR_H
#define LLVM_LIB_TARGET_X86_MCA_X86CUSTOMBEHAVIOUR_H
-#include "llvm/ADT/SmallVector.h"
#include "llvm/MCA/CustomBehaviour.h"
#include "llvm/TargetParser/TargetParser.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index 4b36135da352..e96f9279826b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -36,7 +36,7 @@ using namespace llvm;
#include "X86GenAsmWriter.inc"
void X86ATTInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
- OS << markup("<reg:") << '%' << getRegisterName(Reg) << markup(">");
+ markup(OS, Markup::Register) << '%' << getRegisterName(Reg);
}
void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
@@ -386,7 +386,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else if (Op.isImm()) {
// Print immediates as signed values.
int64_t Imm = Op.getImm();
- O << markup("<imm:") << '$' << formatImm(Imm) << markup(">");
+ markup(O, Markup::Immediate) << '$' << formatImm(Imm);
// TODO: This should be in a helper function in the base class, so it can
// be used by other printers.
@@ -405,9 +405,9 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << markup("<imm:") << '$';
+ WithMarkup M = markup(O, Markup::Immediate);
+ O << '$';
Op.getExpr()->print(O, &MAI);
- O << markup(">");
}
}
@@ -427,7 +427,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
- O << markup("<mem:");
+ WithMarkup M = markup(O, Markup::Memory);
// If this has a segment register, print it.
printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
@@ -451,19 +451,17 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
printOperand(MI, Op + X86::AddrIndexReg, O);
unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm();
if (ScaleVal != 1) {
- O << ',' << markup("<imm:") << ScaleVal // never printed in hex.
- << markup(">");
+ O << ',';
+ markup(O, Markup::Immediate) << ScaleVal; // never printed in hex.
}
}
O << ')';
}
-
- O << markup(">");
}
void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
raw_ostream &O) {
- O << markup("<mem:");
+ WithMarkup M = markup(O, Markup::Memory);
// If this has a segment register, print it.
printOptionalSegReg(MI, Op + 1, O);
@@ -471,26 +469,22 @@ void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
O << "(";
printOperand(MI, Op, O);
O << ")";
-
- O << markup(">");
}
void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
raw_ostream &O) {
- O << markup("<mem:");
+ WithMarkup M = markup(O, Markup::Memory);
O << "%es:(";
printOperand(MI, Op, O);
O << ")";
-
- O << markup(">");
}
void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &DispSpec = MI->getOperand(Op);
- O << markup("<mem:");
+ WithMarkup M = markup(O, Markup::Memory);
// If this has a segment register, print it.
printOptionalSegReg(MI, Op + 1, O);
@@ -501,8 +495,6 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
assert(DispSpec.isExpr() && "non-immediate displacement?");
DispSpec.getExpr()->print(O, &MAI);
}
-
- O << markup(">");
}
void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
@@ -510,8 +502,8 @@ void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
if (MI->getOperand(Op).isExpr())
return printOperand(MI, Op, O);
- O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
- << markup(">");
+ markup(O, Markup::Immediate)
+ << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff);
}
void X86ATTInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,
@@ -520,7 +512,7 @@ void X86ATTInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,
unsigned Reg = Op.getReg();
// Override the default printing to print st(0) instead st.
if (Reg == X86::ST0)
- OS << markup("<reg:") << "%st(0)" << markup(">");
+ markup(OS, Markup::Register) << "%st(0)";
else
printRegName(OS, Reg);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 289642ac37bb..99dc9797f6df 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -137,7 +137,7 @@ class X86AsmBackend : public MCAsmBackend {
public:
X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
- : MCAsmBackend(support::little), STI(STI),
+ : MCAsmBackend(llvm::endianness::little), STI(STI),
MCII(T.createMCInstrInfo()) {
if (X86AlignBranchWithin32BBoundaries) {
// At the moment, this defaults to aligning fused branches, unconditional
@@ -173,7 +173,8 @@ public:
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
@@ -418,7 +419,7 @@ isRightAfterData(MCFragment *CurrentFragment,
// - If it's not the fragment where the previous instruction is,
// returns true.
// - If it's the fragment holding the previous instruction but its
- // size changed since the the previous instruction was emitted into
+ // size changed since the previous instruction was emitted into
// it, returns true.
// - Otherwise returns false.
// - If the fragment is not a DataFragment, returns false.
@@ -569,7 +570,7 @@ void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst)
if (!needAlign(Inst) || !PendingBA)
return;
- // Tie the aligned instructions into a a pending BoundaryAlign.
+ // Tie the aligned instructions into a pending BoundaryAlign.
PendingBA->setLastFragment(CF);
PendingBA = nullptr;
@@ -645,8 +646,8 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
}
bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
- const MCFixup &Fixup,
- const MCValue &) {
+ const MCFixup &Fixup, const MCValue &,
+ const MCSubtargetInfo *STI) {
return Fixup.getKind() >= FirstLiteralRelocationKind;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index e2293fe30561..b0fcaef5f4b0 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -22,1217 +22,1285 @@
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
-
namespace X86 {
- // Enums for memory operand decoding. Each memory operand is represented with
- // a 5 operand sequence in the form:
- // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
- // These enums help decode this.
- enum {
- AddrBaseReg = 0,
- AddrScaleAmt = 1,
- AddrIndexReg = 2,
- AddrDisp = 3,
-
- /// AddrSegmentReg - The operand # of the segment in the memory operand.
- AddrSegmentReg = 4,
-
- /// AddrNumOperands - Total number of operands in a memory reference.
- AddrNumOperands = 5
- };
-
- /// AVX512 static rounding constants. These need to match the values in
- /// avx512fintrin.h.
- enum STATIC_ROUNDING {
- TO_NEAREST_INT = 0,
- TO_NEG_INF = 1,
- TO_POS_INF = 2,
- TO_ZERO = 3,
- CUR_DIRECTION = 4,
- NO_EXC = 8
- };
-
- /// The constants to describe instr prefixes if there are
- enum IPREFIXES {
- IP_NO_PREFIX = 0,
- IP_HAS_OP_SIZE = 1U << 0,
- IP_HAS_AD_SIZE = 1U << 1,
- IP_HAS_REPEAT_NE = 1U << 2,
- IP_HAS_REPEAT = 1U << 3,
- IP_HAS_LOCK = 1U << 4,
- IP_HAS_NOTRACK = 1U << 5,
- IP_USE_VEX = 1U << 6,
- IP_USE_VEX2 = 1U << 7,
- IP_USE_VEX3 = 1U << 8,
- IP_USE_EVEX = 1U << 9,
- IP_USE_DISP8 = 1U << 10,
- IP_USE_DISP32 = 1U << 11,
- };
-
- enum OperandType : unsigned {
- /// AVX512 embedded rounding control. This should only have values 0-3.
- OPERAND_ROUNDING_CONTROL = MCOI::OPERAND_FIRST_TARGET,
- OPERAND_COND_CODE,
- };
-
- // X86 specific condition code. These correspond to X86_*_COND in
- // X86InstrInfo.td. They must be kept in synch.
- enum CondCode {
- COND_O = 0,
- COND_NO = 1,
- COND_B = 2,
- COND_AE = 3,
- COND_E = 4,
- COND_NE = 5,
- COND_BE = 6,
- COND_A = 7,
- COND_S = 8,
- COND_NS = 9,
- COND_P = 10,
- COND_NP = 11,
- COND_L = 12,
- COND_GE = 13,
- COND_LE = 14,
- COND_G = 15,
- LAST_VALID_COND = COND_G,
-
- // Artificial condition codes. These are used by analyzeBranch
- // to indicate a block terminated with two conditional branches that together
- // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE,
- // which can't be represented on x86 with a single condition. These
- // are never used in MachineInstrs and are inverses of one another.
- COND_NE_OR_P,
- COND_E_AND_NP,
-
- COND_INVALID
- };
-
- // The classification for the first instruction in macro fusion.
- enum class FirstMacroFusionInstKind {
- // TEST
- Test,
- // CMP
- Cmp,
- // AND
- And,
- // FIXME: Zen 3 support branch fusion for OR/XOR.
- // ADD, SUB
- AddSub,
- // INC, DEC
- IncDec,
- // Not valid as a first macro fusion instruction
- Invalid
- };
-
- enum class SecondMacroFusionInstKind {
- // JA, JB and variants.
- AB,
- // JE, JL, JG and variants.
- ELG,
- // JS, JP, JO and variants
- SPO,
- // Not a fusible jump.
- Invalid,
- };
-
- /// \returns the type of the first instruction in macro-fusion.
- inline FirstMacroFusionInstKind
- classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
- switch (Opcode) {
- default:
- return FirstMacroFusionInstKind::Invalid;
- // TEST
- case X86::TEST16i16:
- case X86::TEST16mr:
- case X86::TEST16ri:
- case X86::TEST16rr:
- case X86::TEST32i32:
- case X86::TEST32mr:
- case X86::TEST32ri:
- case X86::TEST32rr:
- case X86::TEST64i32:
- case X86::TEST64mr:
- case X86::TEST64ri32:
- case X86::TEST64rr:
- case X86::TEST8i8:
- case X86::TEST8mr:
- case X86::TEST8ri:
- case X86::TEST8rr:
- return FirstMacroFusionInstKind::Test;
- case X86::AND16i16:
- case X86::AND16ri:
- case X86::AND16ri8:
- case X86::AND16rm:
- case X86::AND16rr:
- case X86::AND16rr_REV:
- case X86::AND32i32:
- case X86::AND32ri:
- case X86::AND32ri8:
- case X86::AND32rm:
- case X86::AND32rr:
- case X86::AND32rr_REV:
- case X86::AND64i32:
- case X86::AND64ri32:
- case X86::AND64ri8:
- case X86::AND64rm:
- case X86::AND64rr:
- case X86::AND64rr_REV:
- case X86::AND8i8:
- case X86::AND8ri:
- case X86::AND8ri8:
- case X86::AND8rm:
- case X86::AND8rr:
- case X86::AND8rr_REV:
- return FirstMacroFusionInstKind::And;
- // FIXME: Zen 3 support branch fusion for OR/XOR.
- // CMP
- case X86::CMP16i16:
- case X86::CMP16mr:
- case X86::CMP16ri:
- case X86::CMP16ri8:
- case X86::CMP16rm:
- case X86::CMP16rr:
- case X86::CMP16rr_REV:
- case X86::CMP32i32:
- case X86::CMP32mr:
- case X86::CMP32ri:
- case X86::CMP32ri8:
- case X86::CMP32rm:
- case X86::CMP32rr:
- case X86::CMP32rr_REV:
- case X86::CMP64i32:
- case X86::CMP64mr:
- case X86::CMP64ri32:
- case X86::CMP64ri8:
- case X86::CMP64rm:
- case X86::CMP64rr:
- case X86::CMP64rr_REV:
- case X86::CMP8i8:
- case X86::CMP8mr:
- case X86::CMP8ri:
- case X86::CMP8ri8:
- case X86::CMP8rm:
- case X86::CMP8rr:
- case X86::CMP8rr_REV:
- return FirstMacroFusionInstKind::Cmp;
- // ADD
- case X86::ADD16i16:
- case X86::ADD16ri:
- case X86::ADD16ri8:
- case X86::ADD16rm:
- case X86::ADD16rr:
- case X86::ADD16rr_REV:
- case X86::ADD32i32:
- case X86::ADD32ri:
- case X86::ADD32ri8:
- case X86::ADD32rm:
- case X86::ADD32rr:
- case X86::ADD32rr_REV:
- case X86::ADD64i32:
- case X86::ADD64ri32:
- case X86::ADD64ri8:
- case X86::ADD64rm:
- case X86::ADD64rr:
- case X86::ADD64rr_REV:
- case X86::ADD8i8:
- case X86::ADD8ri:
- case X86::ADD8ri8:
- case X86::ADD8rm:
- case X86::ADD8rr:
- case X86::ADD8rr_REV:
- // SUB
- case X86::SUB16i16:
- case X86::SUB16ri:
- case X86::SUB16ri8:
- case X86::SUB16rm:
- case X86::SUB16rr:
- case X86::SUB16rr_REV:
- case X86::SUB32i32:
- case X86::SUB32ri:
- case X86::SUB32ri8:
- case X86::SUB32rm:
- case X86::SUB32rr:
- case X86::SUB32rr_REV:
- case X86::SUB64i32:
- case X86::SUB64ri32:
- case X86::SUB64ri8:
- case X86::SUB64rm:
- case X86::SUB64rr:
- case X86::SUB64rr_REV:
- case X86::SUB8i8:
- case X86::SUB8ri:
- case X86::SUB8ri8:
- case X86::SUB8rm:
- case X86::SUB8rr:
- case X86::SUB8rr_REV:
- return FirstMacroFusionInstKind::AddSub;
- // INC
- case X86::INC16r:
- case X86::INC16r_alt:
- case X86::INC32r:
- case X86::INC32r_alt:
- case X86::INC64r:
- case X86::INC8r:
- // DEC
- case X86::DEC16r:
- case X86::DEC16r_alt:
- case X86::DEC32r:
- case X86::DEC32r_alt:
- case X86::DEC64r:
- case X86::DEC8r:
- return FirstMacroFusionInstKind::IncDec;
- }
+// Enums for memory operand decoding. Each memory operand is represented with
+// a 5 operand sequence in the form: [Base, Scale, Index, Disp, Segment]
+enum {
+ AddrBaseReg = 0,
+ AddrScaleAmt = 1,
+ AddrIndexReg = 2,
+ AddrDisp = 3,
+ // The operand # of the segment in the memory operand.
+ AddrSegmentReg = 4,
+ // Total number of operands in a memory reference.
+ AddrNumOperands = 5
+};
+
+/// AVX512 static rounding constants. These need to match the values in
+/// avx512fintrin.h.
+enum STATIC_ROUNDING {
+ TO_NEAREST_INT = 0,
+ TO_NEG_INF = 1,
+ TO_POS_INF = 2,
+ TO_ZERO = 3,
+ CUR_DIRECTION = 4,
+ NO_EXC = 8
+};
+
+/// The constants to describe instr prefixes if there are
+enum IPREFIXES {
+ IP_NO_PREFIX = 0,
+ IP_HAS_OP_SIZE = 1U << 0,
+ IP_HAS_AD_SIZE = 1U << 1,
+ IP_HAS_REPEAT_NE = 1U << 2,
+ IP_HAS_REPEAT = 1U << 3,
+ IP_HAS_LOCK = 1U << 4,
+ IP_HAS_NOTRACK = 1U << 5,
+ IP_USE_VEX = 1U << 6,
+ IP_USE_VEX2 = 1U << 7,
+ IP_USE_VEX3 = 1U << 8,
+ IP_USE_EVEX = 1U << 9,
+ IP_USE_DISP8 = 1U << 10,
+ IP_USE_DISP32 = 1U << 11,
+};
+
+enum OperandType : unsigned {
+ // AVX512 embedded rounding control. This should only have values 0-3.
+ OPERAND_ROUNDING_CONTROL = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_COND_CODE,
+};
+
+// X86 specific condition code. These correspond to X86_*_COND in
+// X86InstrInfo.td. They must be kept in synch.
+enum CondCode {
+ COND_O = 0,
+ COND_NO = 1,
+ COND_B = 2,
+ COND_AE = 3,
+ COND_E = 4,
+ COND_NE = 5,
+ COND_BE = 6,
+ COND_A = 7,
+ COND_S = 8,
+ COND_NS = 9,
+ COND_P = 10,
+ COND_NP = 11,
+ COND_L = 12,
+ COND_GE = 13,
+ COND_LE = 14,
+ COND_G = 15,
+ LAST_VALID_COND = COND_G,
+ // Artificial condition codes. These are used by analyzeBranch
+ // to indicate a block terminated with two conditional branches that together
+ // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE,
+ // which can't be represented on x86 with a single condition. These
+ // are never used in MachineInstrs and are inverses of one another.
+ COND_NE_OR_P,
+ COND_E_AND_NP,
+ COND_INVALID
+};
+
+// The classification for the first instruction in macro fusion.
+// FIXME: Zen 3 support branch fusion for OR/XOR.
+enum class FirstMacroFusionInstKind {
+ Test, // TEST
+ Cmp, // CMP
+ And, // AND
+ AddSub, // ADD, SUB
+ IncDec, // INC, DEC
+ Invalid // Not valid as a first macro fusion instruction
+};
+
+enum class SecondMacroFusionInstKind {
+ AB, // JA, JB and variants
+ ELG, // JE, JL, JG and variants
+ SPO, // JS, JP, JO and variants
+ Invalid, // Not a fusible jump.
+};
+
+/// \returns the type of the first instruction in macro-fusion.
+// FIXME: Zen 3 support branch fusion for OR/XOR.
+inline FirstMacroFusionInstKind
+classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return FirstMacroFusionInstKind::Invalid;
+ // TEST
+ case X86::TEST16i16:
+ case X86::TEST16mr:
+ case X86::TEST16ri:
+ case X86::TEST16rr:
+ case X86::TEST32i32:
+ case X86::TEST32mr:
+ case X86::TEST32ri:
+ case X86::TEST32rr:
+ case X86::TEST64i32:
+ case X86::TEST64mr:
+ case X86::TEST64ri32:
+ case X86::TEST64rr:
+ case X86::TEST8i8:
+ case X86::TEST8mr:
+ case X86::TEST8ri:
+ case X86::TEST8rr:
+ return FirstMacroFusionInstKind::Test;
+ case X86::AND16i16:
+ case X86::AND16ri:
+ case X86::AND16ri8:
+ case X86::AND16rm:
+ case X86::AND16rr:
+ case X86::AND16rr_REV:
+ case X86::AND32i32:
+ case X86::AND32ri:
+ case X86::AND32ri8:
+ case X86::AND32rm:
+ case X86::AND32rr:
+ case X86::AND32rr_REV:
+ case X86::AND64i32:
+ case X86::AND64ri32:
+ case X86::AND64ri8:
+ case X86::AND64rm:
+ case X86::AND64rr:
+ case X86::AND64rr_REV:
+ case X86::AND8i8:
+ case X86::AND8ri:
+ case X86::AND8ri8:
+ case X86::AND8rm:
+ case X86::AND8rr:
+ case X86::AND8rr_REV:
+ return FirstMacroFusionInstKind::And;
+ // CMP
+ case X86::CMP16i16:
+ case X86::CMP16mr:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP16rm:
+ case X86::CMP16rr:
+ case X86::CMP16rr_REV:
+ case X86::CMP32i32:
+ case X86::CMP32mr:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP32rm:
+ case X86::CMP32rr:
+ case X86::CMP32rr_REV:
+ case X86::CMP64i32:
+ case X86::CMP64mr:
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP64rm:
+ case X86::CMP64rr:
+ case X86::CMP64rr_REV:
+ case X86::CMP8i8:
+ case X86::CMP8mr:
+ case X86::CMP8ri:
+ case X86::CMP8ri8:
+ case X86::CMP8rm:
+ case X86::CMP8rr:
+ case X86::CMP8rr_REV:
+ return FirstMacroFusionInstKind::Cmp;
+ // ADD
+ case X86::ADD16i16:
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16rm:
+ case X86::ADD16rr:
+ case X86::ADD16rr_REV:
+ case X86::ADD32i32:
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32rm:
+ case X86::ADD32rr:
+ case X86::ADD32rr_REV:
+ case X86::ADD64i32:
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ case X86::ADD64rm:
+ case X86::ADD64rr:
+ case X86::ADD64rr_REV:
+ case X86::ADD8i8:
+ case X86::ADD8ri:
+ case X86::ADD8ri8:
+ case X86::ADD8rm:
+ case X86::ADD8rr:
+ case X86::ADD8rr_REV:
+ // SUB
+ case X86::SUB16i16:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB16rm:
+ case X86::SUB16rr:
+ case X86::SUB16rr_REV:
+ case X86::SUB32i32:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB32rm:
+ case X86::SUB32rr:
+ case X86::SUB32rr_REV:
+ case X86::SUB64i32:
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB64rm:
+ case X86::SUB64rr:
+ case X86::SUB64rr_REV:
+ case X86::SUB8i8:
+ case X86::SUB8ri:
+ case X86::SUB8ri8:
+ case X86::SUB8rm:
+ case X86::SUB8rr:
+ case X86::SUB8rr_REV:
+ return FirstMacroFusionInstKind::AddSub;
+ // INC
+ case X86::INC16r:
+ case X86::INC16r_alt:
+ case X86::INC32r:
+ case X86::INC32r_alt:
+ case X86::INC64r:
+ case X86::INC8r:
+ // DEC
+ case X86::DEC16r:
+ case X86::DEC16r_alt:
+ case X86::DEC32r:
+ case X86::DEC32r_alt:
+ case X86::DEC64r:
+ case X86::DEC8r:
+ return FirstMacroFusionInstKind::IncDec;
}
+}
- /// \returns the type of the second instruction in macro-fusion.
- inline SecondMacroFusionInstKind
- classifySecondCondCodeInMacroFusion(X86::CondCode CC) {
- if (CC == X86::COND_INVALID)
- return SecondMacroFusionInstKind::Invalid;
-
- switch (CC) {
- default:
- return SecondMacroFusionInstKind::Invalid;
- // JE,JZ
- case X86::COND_E:
- // JNE,JNZ
- case X86::COND_NE:
- // JL,JNGE
- case X86::COND_L:
- // JLE,JNG
- case X86::COND_LE:
- // JG,JNLE
- case X86::COND_G:
- // JGE,JNL
- case X86::COND_GE:
- return SecondMacroFusionInstKind::ELG;
- // JB,JC
- case X86::COND_B:
- // JNA,JBE
- case X86::COND_BE:
- // JA,JNBE
- case X86::COND_A:
- // JAE,JNC,JNB
- case X86::COND_AE:
- return SecondMacroFusionInstKind::AB;
- // JS
- case X86::COND_S:
- // JNS
- case X86::COND_NS:
- // JP,JPE
- case X86::COND_P:
- // JNP,JPO
- case X86::COND_NP:
- // JO
- case X86::COND_O:
- // JNO
- case X86::COND_NO:
- return SecondMacroFusionInstKind::SPO;
- }
+/// \returns the type of the second instruction in macro-fusion.
+inline SecondMacroFusionInstKind
+classifySecondCondCodeInMacroFusion(X86::CondCode CC) {
+ if (CC == X86::COND_INVALID)
+ return SecondMacroFusionInstKind::Invalid;
+ switch (CC) {
+ default:
+ return SecondMacroFusionInstKind::Invalid;
+ case X86::COND_E: // JE,JZ
+ case X86::COND_NE: // JNE,JNZ
+ case X86::COND_L: // JL,JNGE
+ case X86::COND_LE: // JLE,JNG
+ case X86::COND_G: // JG,JNLE
+ case X86::COND_GE: // JGE,JNL
+ return SecondMacroFusionInstKind::ELG;
+ case X86::COND_B: // JB,JC
+ case X86::COND_BE: // JNA,JBE
+ case X86::COND_A: // JA,JNBE
+ case X86::COND_AE: // JAE,JNC,JNB
+ return SecondMacroFusionInstKind::AB;
+ case X86::COND_S: // JS
+ case X86::COND_NS: // JNS
+ case X86::COND_P: // JP,JPE
+ case X86::COND_NP: // JNP,JPO
+ case X86::COND_O: // JO
+ case X86::COND_NO: // JNO
+ return SecondMacroFusionInstKind::SPO;
}
+}
- /// \param FirstKind kind of the first instruction in macro fusion.
- /// \param SecondKind kind of the second instruction in macro fusion.
- ///
- /// \returns true if the two instruction can be macro fused.
- inline bool isMacroFused(FirstMacroFusionInstKind FirstKind,
- SecondMacroFusionInstKind SecondKind) {
- switch (FirstKind) {
- case X86::FirstMacroFusionInstKind::Test:
- case X86::FirstMacroFusionInstKind::And:
- return true;
- case X86::FirstMacroFusionInstKind::Cmp:
- case X86::FirstMacroFusionInstKind::AddSub:
- return SecondKind == X86::SecondMacroFusionInstKind::AB ||
- SecondKind == X86::SecondMacroFusionInstKind::ELG;
- case X86::FirstMacroFusionInstKind::IncDec:
- return SecondKind == X86::SecondMacroFusionInstKind::ELG;
- case X86::FirstMacroFusionInstKind::Invalid:
- return false;
- }
- llvm_unreachable("unknown fusion type");
+/// \param FirstKind kind of the first instruction in macro fusion.
+/// \param SecondKind kind of the second instruction in macro fusion.
+///
+/// \returns true if the two instruction can be macro fused.
+inline bool isMacroFused(FirstMacroFusionInstKind FirstKind,
+ SecondMacroFusionInstKind SecondKind) {
+ switch (FirstKind) {
+ case X86::FirstMacroFusionInstKind::Test:
+ case X86::FirstMacroFusionInstKind::And:
+ return true;
+ case X86::FirstMacroFusionInstKind::Cmp:
+ case X86::FirstMacroFusionInstKind::AddSub:
+ return SecondKind == X86::SecondMacroFusionInstKind::AB ||
+ SecondKind == X86::SecondMacroFusionInstKind::ELG;
+ case X86::FirstMacroFusionInstKind::IncDec:
+ return SecondKind == X86::SecondMacroFusionInstKind::ELG;
+ case X86::FirstMacroFusionInstKind::Invalid:
+ return false;
}
+ llvm_unreachable("unknown fusion type");
+}
- /// Defines the possible values of the branch boundary alignment mask.
- enum AlignBranchBoundaryKind : uint8_t {
- AlignBranchNone = 0,
- AlignBranchFused = 1U << 0,
- AlignBranchJcc = 1U << 1,
- AlignBranchJmp = 1U << 2,
- AlignBranchCall = 1U << 3,
- AlignBranchRet = 1U << 4,
- AlignBranchIndirect = 1U << 5
- };
-
- /// Defines the encoding values for segment override prefix.
- enum EncodingOfSegmentOverridePrefix : uint8_t {
- CS_Encoding = 0x2E,
- DS_Encoding = 0x3E,
- ES_Encoding = 0x26,
- FS_Encoding = 0x64,
- GS_Encoding = 0x65,
- SS_Encoding = 0x36
- };
-
- /// Given a segment register, return the encoding of the segment override
- /// prefix for it.
- inline EncodingOfSegmentOverridePrefix
- getSegmentOverridePrefixForReg(unsigned Reg) {
- switch (Reg) {
- default:
- llvm_unreachable("Unknown segment register!");
- case X86::CS:
- return CS_Encoding;
- case X86::DS:
- return DS_Encoding;
- case X86::ES:
- return ES_Encoding;
- case X86::FS:
- return FS_Encoding;
- case X86::GS:
- return GS_Encoding;
- case X86::SS:
- return SS_Encoding;
- }
+/// Defines the possible values of the branch boundary alignment mask.
+enum AlignBranchBoundaryKind : uint8_t {
+ AlignBranchNone = 0,
+ AlignBranchFused = 1U << 0,
+ AlignBranchJcc = 1U << 1,
+ AlignBranchJmp = 1U << 2,
+ AlignBranchCall = 1U << 3,
+ AlignBranchRet = 1U << 4,
+ AlignBranchIndirect = 1U << 5
+};
+
+/// Defines the encoding values for segment override prefix.
+enum EncodingOfSegmentOverridePrefix : uint8_t {
+ CS_Encoding = 0x2E,
+ DS_Encoding = 0x3E,
+ ES_Encoding = 0x26,
+ FS_Encoding = 0x64,
+ GS_Encoding = 0x65,
+ SS_Encoding = 0x36
+};
+
+/// Given a segment register, return the encoding of the segment override
+/// prefix for it.
+inline EncodingOfSegmentOverridePrefix
+getSegmentOverridePrefixForReg(unsigned Reg) {
+ switch (Reg) {
+ default:
+ llvm_unreachable("Unknown segment register!");
+ case X86::CS:
+ return CS_Encoding;
+ case X86::DS:
+ return DS_Encoding;
+ case X86::ES:
+ return ES_Encoding;
+ case X86::FS:
+ return FS_Encoding;
+ case X86::GS:
+ return GS_Encoding;
+ case X86::SS:
+ return SS_Encoding;
}
+}
-} // end namespace X86;
+} // namespace X86
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
namespace X86II {
- /// Target Operand Flag enum.
- enum TOF {
- //===------------------------------------------------------------------===//
- // X86 Specific MachineOperand flags.
-
- MO_NO_FLAG,
-
- /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
- /// relocation of:
- /// SYMBOL_LABEL + [. - PICBASELABEL]
- MO_GOT_ABSOLUTE_ADDRESS,
-
- /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
- /// immediate should get the value of the symbol minus the PIC base label:
- /// SYMBOL_LABEL - PICBASELABEL
- MO_PIC_BASE_OFFSET,
-
- /// MO_GOT - On a symbol operand this indicates that the immediate is the
- /// offset to the GOT entry for the symbol name from the base of the GOT.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @GOT
- MO_GOT,
-
- /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
- /// the offset to the location of the symbol name from the base of the GOT.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @GOTOFF
- MO_GOTOFF,
-
- /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
- /// offset to the GOT entry for the symbol name from the current code
- /// location.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @GOTPCREL
- MO_GOTPCREL,
-
- /// MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL
- /// relocations are guaranteed to be emitted by the integrated assembler
- /// instead of the relaxable R_X86_64[_REX]_GOTPCRELX relocations.
- MO_GOTPCREL_NORELAX,
-
- /// MO_PLT - On a symbol operand this indicates that the immediate is
- /// offset to the PLT entry of symbol name from the current code location.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @PLT
- MO_PLT,
-
- /// MO_TLSGD - On a symbol operand this indicates that the immediate is
- /// the offset of the GOT entry with the TLS index structure that contains
- /// the module number and variable offset for the symbol. Used in the
- /// general dynamic TLS access model.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @TLSGD
- MO_TLSGD,
-
- /// MO_TLSLD - On a symbol operand this indicates that the immediate is
- /// the offset of the GOT entry with the TLS index for the module that
- /// contains the symbol. When this index is passed to a call to
- /// __tls_get_addr, the function will return the base address of the TLS
- /// block for the symbol. Used in the x86-64 local dynamic TLS access model.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @TLSLD
- MO_TLSLD,
-
- /// MO_TLSLDM - On a symbol operand this indicates that the immediate is
- /// the offset of the GOT entry with the TLS index for the module that
- /// contains the symbol. When this index is passed to a call to
- /// ___tls_get_addr, the function will return the base address of the TLS
- /// block for the symbol. Used in the IA32 local dynamic TLS access model.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @TLSLDM
- MO_TLSLDM,
-
- /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
- /// the offset of the GOT entry with the thread-pointer offset for the
- /// symbol. Used in the x86-64 initial exec TLS access model.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @GOTTPOFF
- MO_GOTTPOFF,
-
- /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
- /// the absolute address of the GOT entry with the negative thread-pointer
- /// offset for the symbol. Used in the non-PIC IA32 initial exec TLS access
- /// model.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @INDNTPOFF
- MO_INDNTPOFF,
-
- /// MO_TPOFF - On a symbol operand this indicates that the immediate is
- /// the thread-pointer offset for the symbol. Used in the x86-64 local
- /// exec TLS access model.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @TPOFF
- MO_TPOFF,
-
- /// MO_DTPOFF - On a symbol operand this indicates that the immediate is
- /// the offset of the GOT entry with the TLS offset of the symbol. Used
- /// in the local dynamic TLS access model.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @DTPOFF
- MO_DTPOFF,
-
- /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
- /// the negative thread-pointer offset for the symbol. Used in the IA32
- /// local exec TLS access model.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @NTPOFF
- MO_NTPOFF,
-
- /// MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is
- /// the offset of the GOT entry with the negative thread-pointer offset for
- /// the symbol. Used in the PIC IA32 initial exec TLS access model.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @GOTNTPOFF
- MO_GOTNTPOFF,
-
- /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "__imp_FOO" symbol. This is used for
- /// dllimport linkage on windows.
- MO_DLLIMPORT,
-
- /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
- /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
- MO_DARWIN_NONLAZY,
-
- /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
- /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
- /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
- MO_DARWIN_NONLAZY_PIC_BASE,
-
- /// MO_TLVP - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// This is the TLS offset for the Darwin TLS mechanism.
- MO_TLVP,
-
- /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
- /// is some TLS offset from the picbase.
- ///
- /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
- MO_TLVP_PIC_BASE,
-
- /// MO_SECREL - On a symbol operand this indicates that the immediate is
- /// the offset from beginning of section.
- ///
- /// This is the TLS offset for the COFF/Windows TLS mechanism.
- MO_SECREL,
-
- /// MO_ABS8 - On a symbol operand this indicates that the symbol is known
- /// to be an absolute symbol in range [0,128), so we can use the @ABS8
- /// symbol modifier.
- MO_ABS8,
-
- /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the ".refptr.FOO" symbol. This is used for
- /// stub symbols on windows.
- MO_COFFSTUB,
- };
-
- enum : uint64_t {
- //===------------------------------------------------------------------===//
- // Instruction encodings. These are the standard/most common forms for X86
- // instructions.
- //
-
- // PseudoFrm - This represents an instruction that is a pseudo instruction
- // or one that has not been implemented yet. It is illegal to code generate
- // it, but tolerated for intermediate implementation stages.
- Pseudo = 0,
-
- /// Raw - This form is for instructions that don't have any operands, so
- /// they are just a fixed opcode value, like 'leave'.
- RawFrm = 1,
-
- /// AddRegFrm - This form is used for instructions like 'push r32' that have
- /// their one register operand added to their opcode.
- AddRegFrm = 2,
-
- /// RawFrmMemOffs - This form is for instructions that store an absolute
- /// memory offset as an immediate with a possible segment override.
- RawFrmMemOffs = 3,
-
- /// RawFrmSrc - This form is for instructions that use the source index
- /// register SI/ESI/RSI with a possible segment override.
- RawFrmSrc = 4,
-
- /// RawFrmDst - This form is for instructions that use the destination index
- /// register DI/EDI/RDI.
- RawFrmDst = 5,
-
- /// RawFrmDstSrc - This form is for instructions that use the source index
- /// register SI/ESI/RSI with a possible segment override, and also the
- /// destination index register DI/EDI/RDI.
- RawFrmDstSrc = 6,
-
- /// RawFrmImm8 - This is used for the ENTER instruction, which has two
- /// immediates, the first of which is a 16-bit immediate (specified by
- /// the imm encoding) and the second is a 8-bit fixed value.
- RawFrmImm8 = 7,
-
- /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
- /// immediates, the first of which is a 16 or 32-bit immediate (specified by
- /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
- /// manual, this operand is described as pntr16:32 and pntr16:16
- RawFrmImm16 = 8,
-
- /// AddCCFrm - This form is used for Jcc that encode the condition code
- /// in the lower 4 bits of the opcode.
- AddCCFrm = 9,
-
- /// PrefixByte - This form is used for instructions that represent a prefix
- /// byte like data16 or rep.
- PrefixByte = 10,
-
- /// MRMDestMem4VOp3CC - This form is used for instructions that use the Mod/RM
- /// byte to specify a destination which in this case is memory and operand 3
- /// with VEX.VVVV, and also encodes a condition code.
- MRMDestMem4VOp3CC = 20,
-
- /// MRM[0-7][rm] - These forms are used to represent instructions that use
- /// a Mod/RM byte, and use the middle field to hold extended opcode
- /// information. In the intel manual these are represented as /0, /1, ...
- ///
-
- // Instructions operate on a register Reg/Opcode operand not the r/m field.
- MRMr0 = 21,
-
- /// MRMSrcMem - But force to use the SIB field.
- MRMSrcMemFSIB = 22,
-
- /// MRMDestMem - But force to use the SIB field.
- MRMDestMemFSIB = 23,
-
- /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
- /// to specify a destination, which in this case is memory.
- ///
- MRMDestMem = 24,
-
- /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
- /// to specify a source, which in this case is memory.
- ///
- MRMSrcMem = 25,
-
- /// MRMSrcMem4VOp3 - This form is used for instructions that encode
- /// operand 3 with VEX.VVVV and load from memory.
- ///
- MRMSrcMem4VOp3 = 26,
-
- /// MRMSrcMemOp4 - This form is used for instructions that use the Mod/RM
- /// byte to specify the fourth source, which in this case is memory.
- ///
- MRMSrcMemOp4 = 27,
-
- /// MRMSrcMemCC - This form is used for instructions that use the Mod/RM
- /// byte to specify the operands and also encodes a condition code.
- ///
- MRMSrcMemCC = 28,
-
- /// MRMXm - This form is used for instructions that use the Mod/RM byte
- /// to specify a memory source, but doesn't use the middle field. And has
- /// a condition code.
- ///
- MRMXmCC = 30,
-
- /// MRMXm - This form is used for instructions that use the Mod/RM byte
- /// to specify a memory source, but doesn't use the middle field.
- ///
- MRMXm = 31,
-
- // Next, instructions that operate on a memory r/m operand...
- MRM0m = 32, MRM1m = 33, MRM2m = 34, MRM3m = 35, // Format /0 /1 /2 /3
- MRM4m = 36, MRM5m = 37, MRM6m = 38, MRM7m = 39, // Format /4 /5 /6 /7
-
- /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
- /// to specify a destination, which in this case is a register.
- ///
- MRMDestReg = 40,
-
- /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
- /// to specify a source, which in this case is a register.
- ///
- MRMSrcReg = 41,
-
- /// MRMSrcReg4VOp3 - This form is used for instructions that encode
- /// operand 3 with VEX.VVVV and do not load from memory.
- ///
- MRMSrcReg4VOp3 = 42,
-
- /// MRMSrcRegOp4 - This form is used for instructions that use the Mod/RM
- /// byte to specify the fourth source, which in this case is a register.
- ///
- MRMSrcRegOp4 = 43,
-
- /// MRMSrcRegCC - This form is used for instructions that use the Mod/RM
- /// byte to specify the operands and also encodes a condition code
- ///
- MRMSrcRegCC = 44,
-
- /// MRMXCCr - This form is used for instructions that use the Mod/RM byte
- /// to specify a register source, but doesn't use the middle field. And has
- /// a condition code.
- ///
- MRMXrCC = 46,
-
- /// MRMXr - This form is used for instructions that use the Mod/RM byte
- /// to specify a register source, but doesn't use the middle field.
- ///
- MRMXr = 47,
-
- // Instructions that operate on a register r/m operand...
- MRM0r = 48, MRM1r = 49, MRM2r = 50, MRM3r = 51, // Format /0 /1 /2 /3
- MRM4r = 52, MRM5r = 53, MRM6r = 54, MRM7r = 55, // Format /4 /5 /6 /7
-
- // Instructions that operate that have mod=11 and an opcode but ignore r/m.
- MRM0X = 56, MRM1X = 57, MRM2X = 58, MRM3X = 59, // Format /0 /1 /2 /3
- MRM4X = 60, MRM5X = 61, MRM6X = 62, MRM7X = 63, // Format /4 /5 /6 /7
-
- /// MRM_XX - A mod/rm byte of exactly 0xXX.
- MRM_C0 = 64, MRM_C1 = 65, MRM_C2 = 66, MRM_C3 = 67,
- MRM_C4 = 68, MRM_C5 = 69, MRM_C6 = 70, MRM_C7 = 71,
- MRM_C8 = 72, MRM_C9 = 73, MRM_CA = 74, MRM_CB = 75,
- MRM_CC = 76, MRM_CD = 77, MRM_CE = 78, MRM_CF = 79,
- MRM_D0 = 80, MRM_D1 = 81, MRM_D2 = 82, MRM_D3 = 83,
- MRM_D4 = 84, MRM_D5 = 85, MRM_D6 = 86, MRM_D7 = 87,
- MRM_D8 = 88, MRM_D9 = 89, MRM_DA = 90, MRM_DB = 91,
- MRM_DC = 92, MRM_DD = 93, MRM_DE = 94, MRM_DF = 95,
- MRM_E0 = 96, MRM_E1 = 97, MRM_E2 = 98, MRM_E3 = 99,
- MRM_E4 = 100, MRM_E5 = 101, MRM_E6 = 102, MRM_E7 = 103,
- MRM_E8 = 104, MRM_E9 = 105, MRM_EA = 106, MRM_EB = 107,
- MRM_EC = 108, MRM_ED = 109, MRM_EE = 110, MRM_EF = 111,
- MRM_F0 = 112, MRM_F1 = 113, MRM_F2 = 114, MRM_F3 = 115,
- MRM_F4 = 116, MRM_F5 = 117, MRM_F6 = 118, MRM_F7 = 119,
- MRM_F8 = 120, MRM_F9 = 121, MRM_FA = 122, MRM_FB = 123,
- MRM_FC = 124, MRM_FD = 125, MRM_FE = 126, MRM_FF = 127,
-
- FormMask = 127,
-
- //===------------------------------------------------------------------===//
- // Actual flags...
-
- // OpSize - OpSizeFixed implies instruction never needs a 0x66 prefix.
- // OpSize16 means this is a 16-bit instruction and needs 0x66 prefix in
- // 32-bit mode. OpSize32 means this is a 32-bit instruction needs a 0x66
- // prefix in 16-bit mode.
- OpSizeShift = 7,
- OpSizeMask = 0x3 << OpSizeShift,
-
- OpSizeFixed = 0 << OpSizeShift,
- OpSize16 = 1 << OpSizeShift,
- OpSize32 = 2 << OpSizeShift,
-
- // AsSize - AdSizeX implies this instruction determines its need of 0x67
- // prefix from a normal ModRM memory operand. The other types indicate that
- // an operand is encoded with a specific width and a prefix is needed if
- // it differs from the current mode.
- AdSizeShift = OpSizeShift + 2,
- AdSizeMask = 0x3 << AdSizeShift,
-
- AdSizeX = 0 << AdSizeShift,
- AdSize16 = 1 << AdSizeShift,
- AdSize32 = 2 << AdSizeShift,
- AdSize64 = 3 << AdSizeShift,
-
- //===------------------------------------------------------------------===//
- // OpPrefix - There are several prefix bytes that are used as opcode
- // extensions. These are 0x66, 0xF3, and 0xF2. If this field is 0 there is
- // no prefix.
- //
- OpPrefixShift = AdSizeShift + 2,
- OpPrefixMask = 0x3 << OpPrefixShift,
-
- // PD - Prefix code for packed double precision vector floating point
- // operations performed in the SSE registers.
- PD = 1 << OpPrefixShift,
-
- // XS, XD - These prefix codes are for single and double precision scalar
- // floating point operations performed in the SSE registers.
- XS = 2 << OpPrefixShift, XD = 3 << OpPrefixShift,
-
- //===------------------------------------------------------------------===//
- // OpMap - This field determines which opcode map this instruction
- // belongs to. i.e. one-byte, two-byte, 0x0f 0x38, 0x0f 0x3a, etc.
- //
- OpMapShift = OpPrefixShift + 2,
- OpMapMask = 0xF << OpMapShift,
-
- // OB - OneByte - Set if this instruction has a one byte opcode.
- OB = 0 << OpMapShift,
-
- // TB - TwoByte - Set if this instruction has a two byte opcode, which
- // starts with a 0x0F byte before the real opcode.
- TB = 1 << OpMapShift,
-
- // T8, TA - Prefix after the 0x0F prefix.
- T8 = 2 << OpMapShift, TA = 3 << OpMapShift,
-
- // XOP8 - Prefix to include use of imm byte.
- XOP8 = 4 << OpMapShift,
-
- // XOP9 - Prefix to exclude use of imm byte.
- XOP9 = 5 << OpMapShift,
-
- // XOPA - Prefix to encode 0xA in VEX.MMMM of XOP instructions.
- XOPA = 6 << OpMapShift,
-
- /// ThreeDNow - This indicates that the instruction uses the
- /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
- /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
- /// storing a classifier in the imm8 field. To simplify our implementation,
- /// we handle this by storeing the classifier in the opcode field and using
- /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
- ThreeDNow = 7 << OpMapShift,
-
- // MAP5, MAP6 - Prefix after the 0x0F prefix.
- T_MAP5 = 8 << OpMapShift,
- T_MAP6 = 9 << OpMapShift,
-
- //===------------------------------------------------------------------===//
- // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
- // They are used to specify GPRs and SSE registers, 64-bit operand size,
- // etc. We only cares about REX.W and REX.R bits and only the former is
- // statically determined.
- //
- REXShift = OpMapShift + 4,
- REX_W = 1 << REXShift,
-
- //===------------------------------------------------------------------===//
- // This three-bit field describes the size of an immediate operand. Zero is
- // unused so that we can tell if we forgot to set a value.
- ImmShift = REXShift + 1,
- ImmMask = 15 << ImmShift,
- Imm8 = 1 << ImmShift,
- Imm8PCRel = 2 << ImmShift,
- Imm8Reg = 3 << ImmShift,
- Imm16 = 4 << ImmShift,
- Imm16PCRel = 5 << ImmShift,
- Imm32 = 6 << ImmShift,
- Imm32PCRel = 7 << ImmShift,
- Imm32S = 8 << ImmShift,
- Imm64 = 9 << ImmShift,
-
- //===------------------------------------------------------------------===//
- // FP Instruction Classification... Zero is non-fp instruction.
-
- // FPTypeMask - Mask for all of the FP types...
- FPTypeShift = ImmShift + 4,
- FPTypeMask = 7 << FPTypeShift,
-
- // NotFP - The default, set for instructions that do not use FP registers.
- NotFP = 0 << FPTypeShift,
-
- // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
- ZeroArgFP = 1 << FPTypeShift,
-
- // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
- OneArgFP = 2 << FPTypeShift,
-
- // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
- // result back to ST(0). For example, fcos, fsqrt, etc.
- //
- OneArgFPRW = 3 << FPTypeShift,
-
- // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
- // explicit argument, storing the result to either ST(0) or the implicit
- // argument. For example: fadd, fsub, fmul, etc...
- TwoArgFP = 4 << FPTypeShift,
-
- // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
- // explicit argument, but have no destination. Example: fucom, fucomi, ...
- CompareFP = 5 << FPTypeShift,
-
- // CondMovFP - "2 operand" floating point conditional move instructions.
- CondMovFP = 6 << FPTypeShift,
-
- // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
- SpecialFP = 7 << FPTypeShift,
-
- // Lock prefix
- LOCKShift = FPTypeShift + 3,
- LOCK = 1 << LOCKShift,
-
- // REP prefix
- REPShift = LOCKShift + 1,
- REP = 1 << REPShift,
-
- // Execution domain for SSE instructions.
- // 0 means normal, non-SSE instruction.
- SSEDomainShift = REPShift + 1,
-
- // Encoding
- EncodingShift = SSEDomainShift + 2,
- EncodingMask = 0x3 << EncodingShift,
-
- // VEX - encoding using 0xC4/0xC5
- VEX = 1 << EncodingShift,
-
- /// XOP - Opcode prefix used by XOP instructions.
- XOP = 2 << EncodingShift,
-
- // VEX_EVEX - Specifies that this instruction use EVEX form which provides
- // syntax support up to 32 512-bit register operands and up to 7 16-bit
- // mask operands as well as source operand data swizzling/memory operand
- // conversion, eviction hint, and rounding mode.
- EVEX = 3 << EncodingShift,
-
- // Opcode
- OpcodeShift = EncodingShift + 2,
-
- /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
- /// address instructions in SSE are represented as 3 address ones in AVX
- /// and the additional register is encoded in VEX_VVVV prefix.
- VEX_4VShift = OpcodeShift + 8,
- VEX_4V = 1ULL << VEX_4VShift,
-
- /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
- /// instruction uses 256-bit wide registers. This is usually auto detected
- /// if a VR256 register is used, but some AVX instructions also have this
- /// field marked when using a f256 memory references.
- VEX_LShift = VEX_4VShift + 1,
- VEX_L = 1ULL << VEX_LShift,
-
- // EVEX_K - Set if this instruction requires masking
- EVEX_KShift = VEX_LShift + 1,
- EVEX_K = 1ULL << EVEX_KShift,
-
- // EVEX_Z - Set if this instruction has EVEX.Z field set.
- EVEX_ZShift = EVEX_KShift + 1,
- EVEX_Z = 1ULL << EVEX_ZShift,
-
- // EVEX_L2 - Set if this instruction has EVEX.L' field set.
- EVEX_L2Shift = EVEX_ZShift + 1,
- EVEX_L2 = 1ULL << EVEX_L2Shift,
-
- // EVEX_B - Set if this instruction has EVEX.B field set.
- EVEX_BShift = EVEX_L2Shift + 1,
- EVEX_B = 1ULL << EVEX_BShift,
-
- // The scaling factor for the AVX512's 8-bit compressed displacement.
- CD8_Scale_Shift = EVEX_BShift + 1,
- CD8_Scale_Mask = 7ULL << CD8_Scale_Shift,
-
- /// Explicitly specified rounding control
- EVEX_RCShift = CD8_Scale_Shift + 3,
- EVEX_RC = 1ULL << EVEX_RCShift,
+/// Target Operand Flag enum.
+enum TOF {
+ //===------------------------------------------------------------------===//
+ // X86 Specific MachineOperand flags.
+ //
+ /// MO_NO_FLAG - No flag for the operand
+ MO_NO_FLAG,
+ /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+ /// relocation of:
+ /// SYMBOL_LABEL + [. - PICBASELABEL]
+ MO_GOT_ABSOLUTE_ADDRESS,
+ /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+ /// immediate should get the value of the symbol minus the PIC base label:
+ /// SYMBOL_LABEL - PICBASELABEL
+ MO_PIC_BASE_OFFSET,
+ /// MO_GOT - On a symbol operand this indicates that the immediate is the
+ /// offset to the GOT entry for the symbol name from the base of the GOT.
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOT
+ MO_GOT,
+ /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+ /// the offset to the location of the symbol name from the base of the GOT.
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTOFF
+ MO_GOTOFF,
+ /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+ /// offset to the GOT entry for the symbol name from the current code
+ /// location.
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTPCREL
+ MO_GOTPCREL,
+ /// MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL
+ /// relocations are guaranteed to be emitted by the integrated assembler
+ /// instead of the relaxable R_X86_64[_REX]_GOTPCRELX relocations.
+ MO_GOTPCREL_NORELAX,
+ /// MO_PLT - On a symbol operand this indicates that the immediate is
+ /// offset to the PLT entry of symbol name from the current code location.
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @PLT
+ MO_PLT,
+ /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index structure that contains
+ /// the module number and variable offset for the symbol. Used in the
+ /// general dynamic TLS access model.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSGD
+ MO_TLSGD,
+ /// MO_TLSLD - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index for the module that
+ /// contains the symbol. When this index is passed to a call to
+ /// __tls_get_addr, the function will return the base address of the TLS
+ /// block for the symbol. Used in the x86-64 local dynamic TLS access model.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSLD
+ MO_TLSLD,
+ /// MO_TLSLDM - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index for the module that
+ /// contains the symbol. When this index is passed to a call to
+ /// ___tls_get_addr, the function will return the base address of the TLS
+ /// block for the symbol. Used in the IA32 local dynamic TLS access model.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSLDM
+ MO_TLSLDM,
+ /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the thread-pointer offset for the
+ /// symbol. Used in the x86-64 initial exec TLS access model.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @GOTTPOFF
+ MO_GOTTPOFF,
+ /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+ /// the absolute address of the GOT entry with the negative thread-pointer
+ /// offset for the symbol. Used in the non-PIC IA32 initial exec TLS access
+ /// model.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @INDNTPOFF
+ MO_INDNTPOFF,
+ /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+ /// the thread-pointer offset for the symbol. Used in the x86-64 local
+ /// exec TLS access model.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TPOFF
+ MO_TPOFF,
+ /// MO_DTPOFF - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS offset of the symbol. Used
+ /// in the local dynamic TLS access model.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @DTPOFF
+ MO_DTPOFF,
+ /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+ /// the negative thread-pointer offset for the symbol. Used in the IA32
+ /// local exec TLS access model.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @NTPOFF
+ MO_NTPOFF,
+ /// MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the negative thread-pointer offset for
+ /// the symbol. Used in the PIC IA32 initial exec TLS access model.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @GOTNTPOFF
+ MO_GOTNTPOFF,
+ /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "__imp_FOO" symbol. This is used for
+ /// dllimport linkage on windows.
+ MO_DLLIMPORT,
+ /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+ /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY,
+ /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+ /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+ /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY_PIC_BASE,
+ /// MO_TLVP - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ /// This is the TLS offset for the Darwin TLS mechanism.
+ MO_TLVP,
+ /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+ /// is some TLS offset from the picbase.
+ /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+ MO_TLVP_PIC_BASE,
+ /// MO_SECREL - On a symbol operand this indicates that the immediate is
+ /// the offset from beginning of section.
+ /// This is the TLS offset for the COFF/Windows TLS mechanism.
+ MO_SECREL,
+ /// MO_ABS8 - On a symbol operand this indicates that the symbol is known
+ /// to be an absolute symbol in range [0,128), so we can use the @ABS8
+ /// symbol modifier.
+ MO_ABS8,
+ /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the ".refptr.FOO" symbol. This is used for
+ /// stub symbols on windows.
+ MO_COFFSTUB,
+};
+
+enum : uint64_t {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for X86
+ // instructions.
+ //
+ /// PseudoFrm - This represents an instruction that is a pseudo instruction
+ /// or one that has not been implemented yet. It is illegal to code generate
+ /// it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+ /// Raw - This form is for instructions that don't have any operands, so
+ /// they are just a fixed opcode value, like 'leave'.
+ RawFrm = 1,
+ /// AddRegFrm - This form is used for instructions like 'push r32' that have
+ /// their one register operand added to their opcode.
+ AddRegFrm = 2,
+ /// RawFrmMemOffs - This form is for instructions that store an absolute
+ /// memory offset as an immediate with a possible segment override.
+ RawFrmMemOffs = 3,
+ /// RawFrmSrc - This form is for instructions that use the source index
+ /// register SI/ESI/RSI with a possible segment override.
+ RawFrmSrc = 4,
+ /// RawFrmDst - This form is for instructions that use the destination index
+ /// register DI/EDI/RDI.
+ RawFrmDst = 5,
+ /// RawFrmDstSrc - This form is for instructions that use the source index
+ /// register SI/ESI/RSI with a possible segment override, and also the
+ /// destination index register DI/EDI/RDI.
+ RawFrmDstSrc = 6,
+ /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+ /// immediates, the first of which is a 16-bit immediate (specified by
+ /// the imm encoding) and the second is a 8-bit fixed value.
+ RawFrmImm8 = 7,
+ /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+ /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+ /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
+ /// manual, this operand is described as pntr16:32 and pntr16:16
+ RawFrmImm16 = 8,
+ /// AddCCFrm - This form is used for Jcc that encode the condition code
+ /// in the lower 4 bits of the opcode.
+ AddCCFrm = 9,
+ /// PrefixByte - This form is used for instructions that represent a prefix
+ /// byte like data16 or rep.
+ PrefixByte = 10,
+ /// MRMDestMem4VOp3CC - This form is used for instructions that use the Mod/RM
+ /// byte to specify a destination which in this case is memory and operand 3
+ /// with VEX.VVVV, and also encodes a condition code.
+ MRMDestMem4VOp3CC = 20,
+ /// Instructions operate on a register Reg/Opcode operand not the r/m field.
+ MRMr0 = 21,
+ /// MRMSrcMem - But force to use the SIB field.
+ MRMSrcMemFSIB = 22,
+ /// MRMDestMem - But force to use the SIB field.
+ MRMDestMemFSIB = 23,
+ /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is memory.
+ MRMDestMem = 24,
+ /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is memory.
+ MRMSrcMem = 25,
+ /// MRMSrcMem4VOp3 - This form is used for instructions that encode
+ /// operand 3 with VEX.VVVV and load from memory.
+ MRMSrcMem4VOp3 = 26,
+ /// MRMSrcMemOp4 - This form is used for instructions that use the Mod/RM
+ /// byte to specify the fourth source, which in this case is memory.
+ MRMSrcMemOp4 = 27,
+ /// MRMSrcMemCC - This form is used for instructions that use the Mod/RM
+ /// byte to specify the operands and also encodes a condition code.
+ MRMSrcMemCC = 28,
+ /// MRMXm - This form is used for instructions that use the Mod/RM byte
+ /// to specify a memory source, but doesn't use the middle field. And has
+ /// a condition code.
+ MRMXmCC = 30,
+ /// MRMXm - This form is used for instructions that use the Mod/RM byte
+ /// to specify a memory source, but doesn't use the middle field.
+ MRMXm = 31,
+ /// MRM0m-MRM7m - Instructions that operate on a memory r/m operand and use
+ /// reg field to hold extended opcode, which is represented as /0, /1, ...
+ MRM0m = 32, // Format /0
+ MRM1m = 33, // Format /1
+ MRM2m = 34, // Format /2
+ MRM3m = 35, // Format /3
+ MRM4m = 36, // Format /4
+ MRM5m = 37, // Format /5
+ MRM6m = 38, // Format /6
+ MRM7m = 39, // Format /7
+ /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is a register.
+ MRMDestReg = 40,
+ /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is a register.
+ MRMSrcReg = 41,
+ /// MRMSrcReg4VOp3 - This form is used for instructions that encode
+ /// operand 3 with VEX.VVVV and do not load from memory.
+ MRMSrcReg4VOp3 = 42,
+ /// MRMSrcRegOp4 - This form is used for instructions that use the Mod/RM
+ /// byte to specify the fourth source, which in this case is a register.
+ MRMSrcRegOp4 = 43,
+ /// MRMSrcRegCC - This form is used for instructions that use the Mod/RM
+ /// byte to specify the operands and also encodes a condition code
+ MRMSrcRegCC = 44,
+ /// MRMXCCr - This form is used for instructions that use the Mod/RM byte
+ /// to specify a register source, but doesn't use the middle field. And has
+ /// a condition code.
+ MRMXrCC = 46,
+ /// MRMXr - This form is used for instructions that use the Mod/RM byte
+ /// to specify a register source, but doesn't use the middle field.
+ MRMXr = 47,
+ /// MRM0r-MRM7r - Instructions that operate on a register r/m operand and use
+ /// reg field to hold extended opcode, which is represented as /0, /1, ...
+ MRM0r = 48, // Format /0
+ MRM1r = 49, // Format /1
+ MRM2r = 50, // Format /2
+ MRM3r = 51, // Format /3
+ MRM4r = 52, // Format /4
+ MRM5r = 53, // Format /5
+ MRM6r = 54, // Format /6
+ MRM7r = 55, // Format /7
+ /// MRM0X-MRM7X - Instructions that operate that have mod=11 and an opcode but
+ /// ignore r/m.
+ MRM0X = 56, // Format /0
+ MRM1X = 57, // Format /1
+ MRM2X = 58, // Format /2
+ MRM3X = 59, // Format /3
+ MRM4X = 60, // Format /4
+ MRM5X = 61, // Format /5
+ MRM6X = 62, // Format /6
+ MRM7X = 63, // Format /7
+ /// MRM_XX (XX: C0-FF)- A mod/rm byte of exactly 0xXX.
+ MRM_C0 = 64,
+ MRM_C1 = 65,
+ MRM_C2 = 66,
+ MRM_C3 = 67,
+ MRM_C4 = 68,
+ MRM_C5 = 69,
+ MRM_C6 = 70,
+ MRM_C7 = 71,
+ MRM_C8 = 72,
+ MRM_C9 = 73,
+ MRM_CA = 74,
+ MRM_CB = 75,
+ MRM_CC = 76,
+ MRM_CD = 77,
+ MRM_CE = 78,
+ MRM_CF = 79,
+ MRM_D0 = 80,
+ MRM_D1 = 81,
+ MRM_D2 = 82,
+ MRM_D3 = 83,
+ MRM_D4 = 84,
+ MRM_D5 = 85,
+ MRM_D6 = 86,
+ MRM_D7 = 87,
+ MRM_D8 = 88,
+ MRM_D9 = 89,
+ MRM_DA = 90,
+ MRM_DB = 91,
+ MRM_DC = 92,
+ MRM_DD = 93,
+ MRM_DE = 94,
+ MRM_DF = 95,
+ MRM_E0 = 96,
+ MRM_E1 = 97,
+ MRM_E2 = 98,
+ MRM_E3 = 99,
+ MRM_E4 = 100,
+ MRM_E5 = 101,
+ MRM_E6 = 102,
+ MRM_E7 = 103,
+ MRM_E8 = 104,
+ MRM_E9 = 105,
+ MRM_EA = 106,
+ MRM_EB = 107,
+ MRM_EC = 108,
+ MRM_ED = 109,
+ MRM_EE = 110,
+ MRM_EF = 111,
+ MRM_F0 = 112,
+ MRM_F1 = 113,
+ MRM_F2 = 114,
+ MRM_F3 = 115,
+ MRM_F4 = 116,
+ MRM_F5 = 117,
+ MRM_F6 = 118,
+ MRM_F7 = 119,
+ MRM_F8 = 120,
+ MRM_F9 = 121,
+ MRM_FA = 122,
+ MRM_FB = 123,
+ MRM_FC = 124,
+ MRM_FD = 125,
+ MRM_FE = 126,
+ MRM_FF = 127,
+ FormMask = 127,
+ //===------------------------------------------------------------------===//
+ // Actual flags...
+ /// OpSize - OpSizeFixed implies instruction never needs a 0x66 prefix.
+ /// OpSize16 means this is a 16-bit instruction and needs 0x66 prefix in
+ /// 32-bit mode. OpSize32 means this is a 32-bit instruction needs a 0x66
+ /// prefix in 16-bit mode.
+ OpSizeShift = 7,
+ OpSizeMask = 0x3 << OpSizeShift,
+ OpSizeFixed = 0 << OpSizeShift,
+ OpSize16 = 1 << OpSizeShift,
+ OpSize32 = 2 << OpSizeShift,
+ /// AsSize - AdSizeX implies this instruction determines its need of 0x67
+ /// prefix from a normal ModRM memory operand. The other types indicate that
+ /// an operand is encoded with a specific width and a prefix is needed if
+ /// it differs from the current mode.
+ AdSizeShift = OpSizeShift + 2,
+ AdSizeMask = 0x3 << AdSizeShift,
+ AdSizeX = 0 << AdSizeShift,
+ AdSize16 = 1 << AdSizeShift,
+ AdSize32 = 2 << AdSizeShift,
+ AdSize64 = 3 << AdSizeShift,
+ //===------------------------------------------------------------------===//
+ /// OpPrefix - There are several prefix bytes that are used as opcode
+ /// extensions. These are 0x66, 0xF3, and 0xF2. If this field is 0 there is
+ /// no prefix.
+ OpPrefixShift = AdSizeShift + 2,
+ OpPrefixMask = 0x3 << OpPrefixShift,
+ /// PD - Prefix code for packed double precision vector floating point
+ /// operations performed in the SSE registers.
+ PD = 1 << OpPrefixShift,
+ /// XS, XD - These prefix codes are for single and double precision scalar
+ /// floating point operations performed in the SSE registers.
+ XS = 2 << OpPrefixShift,
+ XD = 3 << OpPrefixShift,
+ //===------------------------------------------------------------------===//
+ /// OpMap - This field determines which opcode map this instruction
+ /// belongs to. i.e. one-byte, two-byte, 0x0f 0x38, 0x0f 0x3a, etc.
+ OpMapShift = OpPrefixShift + 2,
+ OpMapMask = 0xF << OpMapShift,
+ /// OB - OneByte - Set if this instruction has a one byte opcode.
+ OB = 0 << OpMapShift,
+ /// TB - TwoByte - Set if this instruction has a two byte opcode, which
+ /// starts with a 0x0F byte before the real opcode.
+ TB = 1 << OpMapShift,
+ /// T8, TA - Prefix after the 0x0F prefix.
+ T8 = 2 << OpMapShift,
+ TA = 3 << OpMapShift,
+ /// XOP8 - Prefix to include use of imm byte.
+ XOP8 = 4 << OpMapShift,
+ /// XOP9 - Prefix to exclude use of imm byte.
+ XOP9 = 5 << OpMapShift,
+ /// XOPA - Prefix to encode 0xA in VEX.MMMM of XOP instructions.
+ XOPA = 6 << OpMapShift,
+ /// ThreeDNow - This indicates that the instruction uses the
+ /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
+ /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+ /// storing a classifier in the imm8 field. To simplify our implementation,
+ /// we handle this by storeing the classifier in the opcode field and using
+ /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+ ThreeDNow = 7 << OpMapShift,
+ /// MAP4, MAP5, MAP6, MAP7 - Prefix after the 0x0F prefix.
+ T_MAP4 = 8 << OpMapShift,
+ T_MAP5 = 9 << OpMapShift,
+ T_MAP6 = 10 << OpMapShift,
+ T_MAP7 = 11 << OpMapShift,
+ //===------------------------------------------------------------------===//
+ /// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ /// They are used to specify GPRs and SSE registers, 64-bit operand size,
+ /// etc. We only cares about REX.W and REX.R bits and only the former is
+ /// statically determined.
+ REXShift = OpMapShift + 4,
+ REX_W = 1 << REXShift,
+ //===------------------------------------------------------------------===//
+ // This 4-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = REXShift + 1,
+ Imm8 = 1 << ImmShift,
+ Imm8PCRel = 2 << ImmShift,
+ Imm8Reg = 3 << ImmShift,
+ Imm16 = 4 << ImmShift,
+ Imm16PCRel = 5 << ImmShift,
+ Imm32 = 6 << ImmShift,
+ Imm32PCRel = 7 << ImmShift,
+ Imm32S = 8 << ImmShift,
+ Imm64 = 9 << ImmShift,
+ ImmMask = 15 << ImmShift,
+ //===------------------------------------------------------------------===//
+ /// FP Instruction Classification... Zero is non-fp instruction.
+ /// FPTypeMask - Mask for all of the FP types...
+ FPTypeShift = ImmShift + 4,
+ FPTypeMask = 7 << FPTypeShift,
+ /// NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+ /// ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+ ZeroArgFP = 1 << FPTypeShift,
+ /// OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+ OneArgFP = 2 << FPTypeShift,
+ /// OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+ /// result back to ST(0). For example, fcos, fsqrt, etc.
+ OneArgFPRW = 3 << FPTypeShift,
+ /// TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+ /// explicit argument, storing the result to either ST(0) or the implicit
+ /// argument. For example: fadd, fsub, fmul, etc...
+ TwoArgFP = 4 << FPTypeShift,
+ /// CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+ /// explicit argument, but have no destination. Example: fucom, fucomi, ...
+ CompareFP = 5 << FPTypeShift,
+ /// CondMovFP - "2 operand" floating point conditional move instructions.
+ CondMovFP = 6 << FPTypeShift,
+ /// SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
+ SpecialFP = 7 << FPTypeShift,
+ /// Lock prefix
+ LOCKShift = FPTypeShift + 3,
+ LOCK = 1 << LOCKShift,
+ /// REP prefix
+ REPShift = LOCKShift + 1,
+ REP = 1 << REPShift,
+ /// Execution domain for SSE instructions.
+ /// 0 means normal, non-SSE instruction.
+ SSEDomainShift = REPShift + 1,
+ /// Encoding
+ EncodingShift = SSEDomainShift + 2,
+ EncodingMask = 0x3 << EncodingShift,
+ /// VEX - encoding using 0xC4/0xC5
+ VEX = 1 << EncodingShift,
+ /// XOP - Opcode prefix used by XOP instructions.
+ XOP = 2 << EncodingShift,
+ /// EVEX - Specifies that this instruction use EVEX form which provides
+ /// syntax support up to 32 512-bit register operands and up to 7 16-bit
+ /// mask operands as well as source operand data swizzling/memory operand
+ /// conversion, eviction hint, and rounding mode.
+ EVEX = 3 << EncodingShift,
+ /// Opcode
+ OpcodeShift = EncodingShift + 2,
+ /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ /// address instructions in SSE are represented as 3 address ones in AVX
+ /// and the additional register is encoded in VEX_VVVV prefix.
+ VEX_4VShift = OpcodeShift + 8,
+ VEX_4V = 1ULL << VEX_4VShift,
+ /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ /// instruction uses 256-bit wide registers. This is usually auto detected
+ /// if a VR256 register is used, but some AVX instructions also have this
+ /// field marked when using a f256 memory references.
+ VEX_LShift = VEX_4VShift + 1,
+ VEX_L = 1ULL << VEX_LShift,
+ /// EVEX_K - Set if this instruction requires masking
+ EVEX_KShift = VEX_LShift + 1,
+ EVEX_K = 1ULL << EVEX_KShift,
+ /// EVEX_Z - Set if this instruction has EVEX.Z field set.
+ EVEX_ZShift = EVEX_KShift + 1,
+ EVEX_Z = 1ULL << EVEX_ZShift,
+ /// EVEX_L2 - Set if this instruction has EVEX.L' field set.
+ EVEX_L2Shift = EVEX_ZShift + 1,
+ EVEX_L2 = 1ULL << EVEX_L2Shift,
+ /// EVEX_B - Set if this instruction has EVEX.B field set.
+ EVEX_BShift = EVEX_L2Shift + 1,
+ EVEX_B = 1ULL << EVEX_BShift,
+ /// The scaling factor for the AVX512's 8-bit compressed displacement.
+ CD8_Scale_Shift = EVEX_BShift + 1,
+ CD8_Scale_Mask = 7ULL << CD8_Scale_Shift,
+ /// Explicitly specified rounding control
+ EVEX_RCShift = CD8_Scale_Shift + 3,
+ EVEX_RC = 1ULL << EVEX_RCShift,
+ /// NOTRACK prefix
+ NoTrackShift = EVEX_RCShift + 1,
+ NOTRACK = 1ULL << NoTrackShift,
+ /// Force REX2/VEX/EVEX encoding
+ ExplicitOpPrefixShift = NoTrackShift + 1,
+ /// For instructions that require REX2 prefix even if EGPR is not used.
+ ExplicitREX2Prefix = 1ULL << ExplicitOpPrefixShift,
+ /// For instructions that use VEX encoding only when {vex}, {vex2} or {vex3}
+ /// is present.
+ ExplicitVEXPrefix = 2ULL << ExplicitOpPrefixShift,
+ /// For instructions that are promoted to EVEX space for EGPR.
+ ExplicitEVEXPrefix = 3ULL << ExplicitOpPrefixShift,
+ ExplicitOpPrefixMask = 3ULL << ExplicitOpPrefixShift
+};
+
+/// \returns true if the instruction with given opcode is a prefix.
+inline bool isPrefix(uint64_t TSFlags) {
+ return (TSFlags & X86II::FormMask) == PrefixByte;
+}
- // NOTRACK prefix
- NoTrackShift = EVEX_RCShift + 1,
- NOTRACK = 1ULL << NoTrackShift,
+/// \returns true if the instruction with given opcode is a pseudo.
+inline bool isPseudo(uint64_t TSFlags) {
+ return (TSFlags & X86II::FormMask) == Pseudo;
+}
- // Force VEX encoding
- ExplicitVEXShift = NoTrackShift + 1,
- ExplicitVEXPrefix = 1ULL << ExplicitVEXShift
- };
+/// \returns the "base" X86 opcode for the specified machine
+/// instruction.
+inline uint8_t getBaseOpcodeFor(uint64_t TSFlags) {
+ return TSFlags >> X86II::OpcodeShift;
+}
- /// \returns true if the instruction with given opcode is a prefix.
- inline bool isPrefix(uint64_t TSFlags) {
- return (TSFlags & X86II::FormMask) == PrefixByte;
+inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; }
+
+/// Decode the "size of immediate" field from the TSFlags field of the
+/// specified instruction.
+inline unsigned getSizeOfImm(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default:
+ llvm_unreachable("Unknown immediate size");
+ case X86II::Imm8:
+ case X86II::Imm8PCRel:
+ case X86II::Imm8Reg:
+ return 1;
+ case X86II::Imm16:
+ case X86II::Imm16PCRel:
+ return 2;
+ case X86II::Imm32:
+ case X86II::Imm32S:
+ case X86II::Imm32PCRel:
+ return 4;
+ case X86II::Imm64:
+ return 8;
}
+}
- /// \returns true if the instruction with given opcode is a pseudo.
- inline bool isPseudo(uint64_t TSFlags) {
- return (TSFlags & X86II::FormMask) == Pseudo;
+/// \returns true if the immediate of the specified instruction's TSFlags
+/// indicates that it is pc relative.
+inline bool isImmPCRel(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default:
+ llvm_unreachable("Unknown immediate size");
+ case X86II::Imm8PCRel:
+ case X86II::Imm16PCRel:
+ case X86II::Imm32PCRel:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm8Reg:
+ case X86II::Imm16:
+ case X86II::Imm32:
+ case X86II::Imm32S:
+ case X86II::Imm64:
+ return false;
}
+}
- /// \returns the "base" X86 opcode for the specified machine
- /// instruction.
- inline uint8_t getBaseOpcodeFor(uint64_t TSFlags) {
- return TSFlags >> X86II::OpcodeShift;
+/// \returns true if the immediate of the specified instruction's
+/// TSFlags indicates that it is signed.
+inline bool isImmSigned(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default:
+ llvm_unreachable("Unknown immediate signedness");
+ case X86II::Imm32S:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm8PCRel:
+ case X86II::Imm8Reg:
+ case X86II::Imm16:
+ case X86II::Imm16PCRel:
+ case X86II::Imm32:
+ case X86II::Imm32PCRel:
+ case X86II::Imm64:
+ return false;
}
+}
- inline bool hasImm(uint64_t TSFlags) {
- return (TSFlags & X86II::ImmMask) != 0;
+/// Compute whether all of the def operands are repeated in the uses and
+/// therefore should be skipped.
+/// This determines the start of the unique operand list. We need to determine
+/// if all of the defs have a corresponding tied operand in the uses.
+/// Unfortunately, the tied operand information is encoded in the uses not
+/// the defs so we have to use some heuristics to find which operands to
+/// query.
+inline unsigned getOperandBias(const MCInstrDesc &Desc) {
+ unsigned NumDefs = Desc.getNumDefs();
+ unsigned NumOps = Desc.getNumOperands();
+ switch (NumDefs) {
+ default:
+ llvm_unreachable("Unexpected number of defs");
+ case 0:
+ return 0;
+ case 1:
+ // Common two addr case.
+ if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
+ return 1;
+ // Check for AVX-512 scatter which has a TIED_TO in the second to last
+ // operand.
+ if (NumOps == 8 && Desc.getOperandConstraint(6, MCOI::TIED_TO) == 0)
+ return 1;
+ return 0;
+ case 2:
+ // XCHG/XADD have two destinations and two sources.
+ if (NumOps >= 4 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
+ Desc.getOperandConstraint(3, MCOI::TIED_TO) == 1)
+ return 2;
+ // Check for gather. AVX-512 has the second tied operand early. AVX2
+ // has it as the last op.
+ if (NumOps == 9 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
+ (Desc.getOperandConstraint(3, MCOI::TIED_TO) == 1 ||
+ Desc.getOperandConstraint(8, MCOI::TIED_TO) == 1))
+ return 2;
+ return 0;
}
+}
- /// Decode the "size of immediate" field from the TSFlags field of the
- /// specified instruction.
- inline unsigned getSizeOfImm(uint64_t TSFlags) {
- switch (TSFlags & X86II::ImmMask) {
- default: llvm_unreachable("Unknown immediate size");
- case X86II::Imm8:
- case X86II::Imm8PCRel:
- case X86II::Imm8Reg: return 1;
- case X86II::Imm16:
- case X86II::Imm16PCRel: return 2;
- case X86II::Imm32:
- case X86II::Imm32S:
- case X86II::Imm32PCRel: return 4;
- case X86II::Imm64: return 8;
- }
+/// \returns operand # for the first field of the memory operand or -1 if no
+/// memory operands.
+/// NOTE: This ignores tied operands. If there is a tied register which is
+/// duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only counted
+/// as one operand.
+inline int getMemoryOperandNo(uint64_t TSFlags) {
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasEVEX_K = TSFlags & X86II::EVEX_K;
+
+ switch (TSFlags & X86II::FormMask) {
+ default:
+ llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!");
+ case X86II::Pseudo:
+ case X86II::RawFrm:
+ case X86II::AddRegFrm:
+ case X86II::RawFrmImm8:
+ case X86II::RawFrmImm16:
+ case X86II::RawFrmMemOffs:
+ case X86II::RawFrmSrc:
+ case X86II::RawFrmDst:
+ case X86II::RawFrmDstSrc:
+ case X86II::AddCCFrm:
+ case X86II::PrefixByte:
+ return -1;
+ case X86II::MRMDestMem:
+ case X86II::MRMDestMemFSIB:
+ return 0;
+ case X86II::MRMSrcMem:
+ case X86II::MRMSrcMemFSIB:
+ // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
+ // mask register.
+ return 1 + HasVEX_4V + HasEVEX_K;
+ case X86II::MRMSrcMem4VOp3:
+ // Skip registers encoded in reg.
+ return 1 + HasEVEX_K;
+ case X86II::MRMSrcMemOp4:
+ // Skip registers encoded in reg, VEX_VVVV, and I8IMM.
+ return 3;
+ case X86II::MRMSrcMemCC:
+ case X86II::MRMDestMem4VOp3CC:
+ // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
+ // mask register.
+ return 1;
+ case X86II::MRMDestReg:
+ case X86II::MRMSrcReg:
+ case X86II::MRMSrcReg4VOp3:
+ case X86II::MRMSrcRegOp4:
+ case X86II::MRMSrcRegCC:
+ case X86II::MRMXrCC:
+ case X86II::MRMr0:
+ case X86II::MRMXr:
+ case X86II::MRM0r:
+ case X86II::MRM1r:
+ case X86II::MRM2r:
+ case X86II::MRM3r:
+ case X86II::MRM4r:
+ case X86II::MRM5r:
+ case X86II::MRM6r:
+ case X86II::MRM7r:
+ return -1;
+ case X86II::MRM0X:
+ case X86II::MRM1X:
+ case X86II::MRM2X:
+ case X86II::MRM3X:
+ case X86II::MRM4X:
+ case X86II::MRM5X:
+ case X86II::MRM6X:
+ case X86II::MRM7X:
+ return -1;
+ case X86II::MRMXmCC:
+ case X86II::MRMXm:
+ case X86II::MRM0m:
+ case X86II::MRM1m:
+ case X86II::MRM2m:
+ case X86II::MRM3m:
+ case X86II::MRM4m:
+ case X86II::MRM5m:
+ case X86II::MRM6m:
+ case X86II::MRM7m:
+ // Start from 0, skip registers encoded in VEX_VVVV or a mask register.
+ return 0 + HasVEX_4V + HasEVEX_K;
+ case X86II::MRM_C0:
+ case X86II::MRM_C1:
+ case X86II::MRM_C2:
+ case X86II::MRM_C3:
+ case X86II::MRM_C4:
+ case X86II::MRM_C5:
+ case X86II::MRM_C6:
+ case X86II::MRM_C7:
+ case X86II::MRM_C8:
+ case X86II::MRM_C9:
+ case X86II::MRM_CA:
+ case X86II::MRM_CB:
+ case X86II::MRM_CC:
+ case X86II::MRM_CD:
+ case X86II::MRM_CE:
+ case X86II::MRM_CF:
+ case X86II::MRM_D0:
+ case X86II::MRM_D1:
+ case X86II::MRM_D2:
+ case X86II::MRM_D3:
+ case X86II::MRM_D4:
+ case X86II::MRM_D5:
+ case X86II::MRM_D6:
+ case X86II::MRM_D7:
+ case X86II::MRM_D8:
+ case X86II::MRM_D9:
+ case X86II::MRM_DA:
+ case X86II::MRM_DB:
+ case X86II::MRM_DC:
+ case X86II::MRM_DD:
+ case X86II::MRM_DE:
+ case X86II::MRM_DF:
+ case X86II::MRM_E0:
+ case X86II::MRM_E1:
+ case X86II::MRM_E2:
+ case X86II::MRM_E3:
+ case X86II::MRM_E4:
+ case X86II::MRM_E5:
+ case X86II::MRM_E6:
+ case X86II::MRM_E7:
+ case X86II::MRM_E8:
+ case X86II::MRM_E9:
+ case X86II::MRM_EA:
+ case X86II::MRM_EB:
+ case X86II::MRM_EC:
+ case X86II::MRM_ED:
+ case X86II::MRM_EE:
+ case X86II::MRM_EF:
+ case X86II::MRM_F0:
+ case X86II::MRM_F1:
+ case X86II::MRM_F2:
+ case X86II::MRM_F3:
+ case X86II::MRM_F4:
+ case X86II::MRM_F5:
+ case X86II::MRM_F6:
+ case X86II::MRM_F7:
+ case X86II::MRM_F8:
+ case X86II::MRM_F9:
+ case X86II::MRM_FA:
+ case X86II::MRM_FB:
+ case X86II::MRM_FC:
+ case X86II::MRM_FD:
+ case X86II::MRM_FE:
+ case X86II::MRM_FF:
+ return -1;
}
+}
- /// \returns true if the immediate of the specified instruction's TSFlags
- /// indicates that it is pc relative.
- inline bool isImmPCRel(uint64_t TSFlags) {
- switch (TSFlags & X86II::ImmMask) {
- default: llvm_unreachable("Unknown immediate size");
- case X86II::Imm8PCRel:
- case X86II::Imm16PCRel:
- case X86II::Imm32PCRel:
- return true;
- case X86II::Imm8:
- case X86II::Imm8Reg:
- case X86II::Imm16:
- case X86II::Imm32:
- case X86II::Imm32S:
- case X86II::Imm64:
- return false;
- }
- }
+/// \returns true if the register is a XMM.
+inline bool isXMMReg(unsigned RegNo) {
+ assert(X86::XMM15 - X86::XMM0 == 15 &&
+ "XMM0-15 registers are not continuous");
+ assert(X86::XMM31 - X86::XMM16 == 15 &&
+ "XMM16-31 registers are not continuous");
+ return (RegNo >= X86::XMM0 && RegNo <= X86::XMM15) ||
+ (RegNo >= X86::XMM16 && RegNo <= X86::XMM31);
+}
- /// \returns true if the immediate of the specified instruction's
- /// TSFlags indicates that it is signed.
- inline bool isImmSigned(uint64_t TSFlags) {
- switch (TSFlags & X86II::ImmMask) {
- default: llvm_unreachable("Unknown immediate signedness");
- case X86II::Imm32S:
- return true;
- case X86II::Imm8:
- case X86II::Imm8PCRel:
- case X86II::Imm8Reg:
- case X86II::Imm16:
- case X86II::Imm16PCRel:
- case X86II::Imm32:
- case X86II::Imm32PCRel:
- case X86II::Imm64:
- return false;
- }
- }
+/// \returns true if the register is a YMM.
+inline bool isYMMReg(unsigned RegNo) {
+ assert(X86::YMM15 - X86::YMM0 == 15 &&
+ "YMM0-15 registers are not continuous");
+ assert(X86::YMM31 - X86::YMM16 == 15 &&
+ "YMM16-31 registers are not continuous");
+ return (RegNo >= X86::YMM0 && RegNo <= X86::YMM15) ||
+ (RegNo >= X86::YMM16 && RegNo <= X86::YMM31);
+}
- /// Compute whether all of the def operands are repeated in the uses and
- /// therefore should be skipped.
- /// This determines the start of the unique operand list. We need to determine
- /// if all of the defs have a corresponding tied operand in the uses.
- /// Unfortunately, the tied operand information is encoded in the uses not
- /// the defs so we have to use some heuristics to find which operands to
- /// query.
- inline unsigned getOperandBias(const MCInstrDesc& Desc) {
- unsigned NumDefs = Desc.getNumDefs();
- unsigned NumOps = Desc.getNumOperands();
- switch (NumDefs) {
- default: llvm_unreachable("Unexpected number of defs");
- case 0:
- return 0;
- case 1:
- // Common two addr case.
- if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
- return 1;
- // Check for AVX-512 scatter which has a TIED_TO in the second to last
- // operand.
- if (NumOps == 8 &&
- Desc.getOperandConstraint(6, MCOI::TIED_TO) == 0)
- return 1;
- return 0;
- case 2:
- // XCHG/XADD have two destinations and two sources.
- if (NumOps >= 4 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
- Desc.getOperandConstraint(3, MCOI::TIED_TO) == 1)
- return 2;
- // Check for gather. AVX-512 has the second tied operand early. AVX2
- // has it as the last op.
- if (NumOps == 9 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
- (Desc.getOperandConstraint(3, MCOI::TIED_TO) == 1 ||
- Desc.getOperandConstraint(8, MCOI::TIED_TO) == 1))
- return 2;
- return 0;
- }
- }
+/// \returns true if the register is a ZMM.
+inline bool isZMMReg(unsigned RegNo) {
+ assert(X86::ZMM31 - X86::ZMM0 == 31 && "ZMM registers are not continuous");
+ return RegNo >= X86::ZMM0 && RegNo <= X86::ZMM31;
+}
- /// The function returns the MCInst operand # for the first field of the
- /// memory operand. If the instruction doesn't have a
- /// memory operand, this returns -1.
- ///
- /// Note that this ignores tied operands. If there is a tied register which
- /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
- /// counted as one operand.
- ///
- inline int getMemoryOperandNo(uint64_t TSFlags) {
- bool HasVEX_4V = TSFlags & X86II::VEX_4V;
- bool HasEVEX_K = TSFlags & X86II::EVEX_K;
+/// \returns true if \p RegNo is an apx extended register.
+inline bool isApxExtendedReg(unsigned RegNo) {
+ assert(X86::R31WH - X86::R16 == 95 && "EGPRs are not continuous");
+ return RegNo >= X86::R16 && RegNo <= X86::R31WH;
+}
- switch (TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!");
- case X86II::Pseudo:
- case X86II::RawFrm:
- case X86II::AddRegFrm:
- case X86II::RawFrmImm8:
- case X86II::RawFrmImm16:
- case X86II::RawFrmMemOffs:
- case X86II::RawFrmSrc:
- case X86II::RawFrmDst:
- case X86II::RawFrmDstSrc:
- case X86II::AddCCFrm:
- case X86II::PrefixByte:
- return -1;
- case X86II::MRMDestMem:
- case X86II::MRMDestMemFSIB:
- return 0;
- case X86II::MRMSrcMem:
- case X86II::MRMSrcMemFSIB:
- // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
- // mask register.
- return 1 + HasVEX_4V + HasEVEX_K;
- case X86II::MRMSrcMem4VOp3:
- // Skip registers encoded in reg.
- return 1 + HasEVEX_K;
- case X86II::MRMSrcMemOp4:
- // Skip registers encoded in reg, VEX_VVVV, and I8IMM.
- return 3;
- case X86II::MRMSrcMemCC:
- case X86II::MRMDestMem4VOp3CC:
- // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
- // mask register.
- return 1;
- case X86II::MRMDestReg:
- case X86II::MRMSrcReg:
- case X86II::MRMSrcReg4VOp3:
- case X86II::MRMSrcRegOp4:
- case X86II::MRMSrcRegCC:
- case X86II::MRMXrCC:
- case X86II::MRMr0:
- case X86II::MRMXr:
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- return -1;
- case X86II::MRM0X: case X86II::MRM1X:
- case X86II::MRM2X: case X86II::MRM3X:
- case X86II::MRM4X: case X86II::MRM5X:
- case X86II::MRM6X: case X86II::MRM7X:
- return -1;
- case X86II::MRMXmCC:
- case X86II::MRMXm:
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- // Start from 0, skip registers encoded in VEX_VVVV or a mask register.
- return 0 + HasVEX_4V + HasEVEX_K;
- case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C5:
- case X86II::MRM_C6: case X86II::MRM_C7: case X86II::MRM_C8:
- case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
- case X86II::MRM_CC: case X86II::MRM_CD: case X86II::MRM_CE:
- case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D2: case X86II::MRM_D3: case X86II::MRM_D4:
- case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D7:
- case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
- case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
- case X86II::MRM_DE: case X86II::MRM_DF: case X86II::MRM_E0:
- case X86II::MRM_E1: case X86II::MRM_E2: case X86II::MRM_E3:
- case X86II::MRM_E4: case X86II::MRM_E5: case X86II::MRM_E6:
- case X86II::MRM_E7: case X86II::MRM_E8: case X86II::MRM_E9:
- case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
- case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_EF:
- case X86II::MRM_F0: case X86II::MRM_F1: case X86II::MRM_F2:
- case X86II::MRM_F3: case X86II::MRM_F4: case X86II::MRM_F5:
- case X86II::MRM_F6: case X86II::MRM_F7: case X86II::MRM_F8:
- case X86II::MRM_F9: case X86II::MRM_FA: case X86II::MRM_FB:
- case X86II::MRM_FC: case X86II::MRM_FD: case X86II::MRM_FE:
- case X86II::MRM_FF:
- return -1;
- }
+/// \returns true if the MachineOperand is a x86-64 extended (r8 or
+/// higher) register, e.g. r8, xmm8, xmm13, etc.
+inline bool isX86_64ExtendedReg(unsigned RegNo) {
+ if ((RegNo >= X86::XMM8 && RegNo <= X86::XMM15) ||
+ (RegNo >= X86::XMM16 && RegNo <= X86::XMM31) ||
+ (RegNo >= X86::YMM8 && RegNo <= X86::YMM15) ||
+ (RegNo >= X86::YMM16 && RegNo <= X86::YMM31) ||
+ (RegNo >= X86::ZMM8 && RegNo <= X86::ZMM31))
+ return true;
+
+ if (isApxExtendedReg(RegNo))
+ return true;
+
+ switch (RegNo) {
+ default:
+ break;
+ case X86::R8:
+ case X86::R9:
+ case X86::R10:
+ case X86::R11:
+ case X86::R12:
+ case X86::R13:
+ case X86::R14:
+ case X86::R15:
+ case X86::R8D:
+ case X86::R9D:
+ case X86::R10D:
+ case X86::R11D:
+ case X86::R12D:
+ case X86::R13D:
+ case X86::R14D:
+ case X86::R15D:
+ case X86::R8W:
+ case X86::R9W:
+ case X86::R10W:
+ case X86::R11W:
+ case X86::R12W:
+ case X86::R13W:
+ case X86::R14W:
+ case X86::R15W:
+ case X86::R8B:
+ case X86::R9B:
+ case X86::R10B:
+ case X86::R11B:
+ case X86::R12B:
+ case X86::R13B:
+ case X86::R14B:
+ case X86::R15B:
+ case X86::CR8:
+ case X86::CR9:
+ case X86::CR10:
+ case X86::CR11:
+ case X86::CR12:
+ case X86::CR13:
+ case X86::CR14:
+ case X86::CR15:
+ case X86::DR8:
+ case X86::DR9:
+ case X86::DR10:
+ case X86::DR11:
+ case X86::DR12:
+ case X86::DR13:
+ case X86::DR14:
+ case X86::DR15:
+ return true;
}
+ return false;
+}
- /// \returns true if the MachineOperand is a x86-64 extended (r8 or
- /// higher) register, e.g. r8, xmm8, xmm13, etc.
- inline bool isX86_64ExtendedReg(unsigned RegNo) {
- if ((RegNo >= X86::XMM8 && RegNo <= X86::XMM31) ||
- (RegNo >= X86::YMM8 && RegNo <= X86::YMM31) ||
- (RegNo >= X86::ZMM8 && RegNo <= X86::ZMM31))
- return true;
-
- switch (RegNo) {
- default: break;
- case X86::R8: case X86::R9: case X86::R10: case X86::R11:
- case X86::R12: case X86::R13: case X86::R14: case X86::R15:
- case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
- case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
- case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
- case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
- case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
- case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
- case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
- case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
- case X86::DR8: case X86::DR9: case X86::DR10: case X86::DR11:
- case X86::DR12: case X86::DR13: case X86::DR14: case X86::DR15:
- return true;
- }
+inline bool canUseApxExtendedReg(const MCInstrDesc &Desc) {
+ uint64_t TSFlags = Desc.TSFlags;
+ uint64_t Encoding = TSFlags & EncodingMask;
+ // EVEX can always use egpr.
+ if (Encoding == X86II::EVEX)
+ return true;
+
+ // To be conservative, egpr is not used for all pseudo instructions
+ // because we are not sure what instruction it will become.
+ // FIXME: Could we improve it in X86ExpandPseudo?
+ if (isPseudo(TSFlags))
return false;
- }
- /// \returns true if the MemoryOperand is a 32 extended (zmm16 or higher)
- /// registers, e.g. zmm21, etc.
- static inline bool is32ExtendedReg(unsigned RegNo) {
- return ((RegNo >= X86::XMM16 && RegNo <= X86::XMM31) ||
- (RegNo >= X86::YMM16 && RegNo <= X86::YMM31) ||
- (RegNo >= X86::ZMM16 && RegNo <= X86::ZMM31));
- }
-
-
- inline bool isX86_64NonExtLowByteReg(unsigned reg) {
- return (reg == X86::SPL || reg == X86::BPL ||
- reg == X86::SIL || reg == X86::DIL);
+ // MAP OB/TB in legacy encoding space can always use egpr except
+ // XSAVE*/XRSTOR*.
+ unsigned Opcode = Desc.Opcode;
+ switch (Opcode) {
+ default:
+ break;
+ case X86::XSAVE:
+ case X86::XSAVE64:
+ case X86::XSAVEOPT:
+ case X86::XSAVEOPT64:
+ case X86::XSAVEC:
+ case X86::XSAVEC64:
+ case X86::XSAVES:
+ case X86::XSAVES64:
+ case X86::XRSTOR:
+ case X86::XRSTOR64:
+ case X86::XRSTORS:
+ case X86::XRSTORS64:
+ return false;
}
+ uint64_t OpMap = TSFlags & X86II::OpMapMask;
+ return !Encoding && (OpMap == X86II::OB || OpMap == X86II::TB);
+}
- /// \returns true if this is a masked instruction.
- inline bool isKMasked(uint64_t TSFlags) {
- return (TSFlags & X86II::EVEX_K) != 0;
- }
+/// \returns true if the MemoryOperand is a 32 extended (zmm16 or higher)
+/// registers, e.g. zmm21, etc.
+static inline bool is32ExtendedReg(unsigned RegNo) {
+ return ((RegNo >= X86::XMM16 && RegNo <= X86::XMM31) ||
+ (RegNo >= X86::YMM16 && RegNo <= X86::YMM31) ||
+ (RegNo >= X86::ZMM16 && RegNo <= X86::ZMM31));
+}
- /// \returns true if this is a merge masked instruction.
- inline bool isKMergeMasked(uint64_t TSFlags) {
- return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0;
- }
+inline bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL ||
+ reg == X86::DIL);
}
-} // end namespace llvm;
+/// \returns true if this is a masked instruction.
+inline bool isKMasked(uint64_t TSFlags) {
+ return (TSFlags & X86II::EVEX_K) != 0;
+}
+/// \returns true if this is a merge masked instruction.
+inline bool isKMergeMasked(uint64_t TSFlags) {
+ return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0;
+}
+} // namespace X86II
+} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index d083bf245af2..373e29bf6a83 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -140,8 +140,9 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
}
llvm_unreachable("unexpected relocation type!");
case MCSymbolRefExpr::VK_GOTOFF:
- assert(Type == RT64_64);
assert(!IsPCRel);
+ if (Type != RT64_64)
+ Ctx.reportError(Loc, "unsupported relocation type");
return ELF::R_X86_64_GOTOFF64;
case MCSymbolRefExpr::VK_TPOFF:
assert(!IsPCRel);
@@ -229,7 +230,7 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
enum X86_32RelType { RT32_NONE, RT32_32, RT32_16, RT32_8 };
-static unsigned getRelocType32(MCContext &Ctx,
+static unsigned getRelocType32(MCContext &Ctx, SMLoc Loc,
MCSymbolRefExpr::VariantKind Modifier,
X86_32RelType Type, bool IsPCRel,
MCFixupKind Kind) {
@@ -252,7 +253,8 @@ static unsigned getRelocType32(MCContext &Ctx,
}
llvm_unreachable("unexpected relocation type!");
case MCSymbolRefExpr::VK_GOT:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
if (IsPCRel)
return ELF::R_386_GOTPC;
// Older versions of ld.bfd/ld.gold/lld do not support R_386_GOT32X and we
@@ -264,49 +266,61 @@ static unsigned getRelocType32(MCContext &Ctx,
? ELF::R_386_GOT32X
: ELF::R_386_GOT32;
case MCSymbolRefExpr::VK_GOTOFF:
- assert(Type == RT32_32);
assert(!IsPCRel);
+ if (Type != RT32_32)
+ break;
return ELF::R_386_GOTOFF;
case MCSymbolRefExpr::VK_TLSCALL:
return ELF::R_386_TLS_DESC_CALL;
case MCSymbolRefExpr::VK_TLSDESC:
return ELF::R_386_TLS_GOTDESC;
case MCSymbolRefExpr::VK_TPOFF:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
assert(!IsPCRel);
return ELF::R_386_TLS_LE_32;
case MCSymbolRefExpr::VK_DTPOFF:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
assert(!IsPCRel);
return ELF::R_386_TLS_LDO_32;
case MCSymbolRefExpr::VK_TLSGD:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
assert(!IsPCRel);
return ELF::R_386_TLS_GD;
case MCSymbolRefExpr::VK_GOTTPOFF:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
assert(!IsPCRel);
return ELF::R_386_TLS_IE_32;
case MCSymbolRefExpr::VK_PLT:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
return ELF::R_386_PLT32;
case MCSymbolRefExpr::VK_INDNTPOFF:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
assert(!IsPCRel);
return ELF::R_386_TLS_IE;
case MCSymbolRefExpr::VK_NTPOFF:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
assert(!IsPCRel);
return ELF::R_386_TLS_LE;
case MCSymbolRefExpr::VK_GOTNTPOFF:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
assert(!IsPCRel);
return ELF::R_386_TLS_GOTIE;
case MCSymbolRefExpr::VK_TLSLDM:
- assert(Type == RT32_32);
+ if (Type != RT32_32)
+ break;
assert(!IsPCRel);
return ELF::R_386_TLS_LDM;
}
+ Ctx.reportError(Loc, "unsupported relocation type");
+ return ELF::R_386_NONE;
}
unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
@@ -329,7 +343,7 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
break;
case RT64_64:
Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
- break;
+ return ELF::R_386_NONE;
case RT64_32:
case RT64_32S:
RelType = RT32_32;
@@ -341,7 +355,7 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
RelType = RT32_8;
break;
}
- return getRelocType32(Ctx, Modifier, RelType, IsPCRel, Kind);
+ return getRelocType32(Ctx, Fixup.getLoc(), Modifier, RelType, IsPCRel, Kind);
}
std::unique_ptr<MCObjectTargetWriter>
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 031ba9f87acb..20b37d5a9990 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -234,11 +234,11 @@ using namespace llvm;
CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int)
static unsigned getVectorRegSize(unsigned RegNo) {
- if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
+ if (X86II::isZMMReg(RegNo))
return 512;
- if (X86::YMM0 <= RegNo && RegNo <= X86::YMM31)
+ if (X86II::isYMMReg(RegNo))
return 256;
- if (X86::XMM0 <= RegNo && RegNo <= X86::XMM31)
+ if (X86II::isXMMReg(RegNo))
return 128;
if (X86::MM0 <= RegNo && RegNo <= X86::MM7)
return 64;
@@ -1285,8 +1285,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
break;
- case X86::VBROADCASTF128:
- case X86::VBROADCASTI128:
+ case X86::VBROADCASTF128rm:
+ case X86::VBROADCASTI128rm:
CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm)
CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm)
DecodeSubVectorBroadcast(4, 2, ShuffleMask);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index f2cb3dfc8ed0..cab2f0a2e1c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -322,15 +322,13 @@ void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address,
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm()) {
- O << markup("<imm:");
if (PrintBranchImmAsAddress) {
uint64_t Target = Address + Op.getImm();
if (MAI.getCodePointerSize() == 4)
Target &= 0xffffffff;
- O << formatHex(Target);
+ markup(O, Markup::Target) << formatHex(Target);
} else
- O << formatImm(Op.getImm());
- O << markup(">");
+ markup(O, Markup::Immediate) << formatImm(Op.getImm());
} else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
// If a symbolic branch target was added as a constant expression then print
@@ -338,7 +336,7 @@ void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address,
const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
int64_t Address;
if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
- O << markup("<imm:") << formatHex((uint64_t)Address) << markup(">");
+ markup(O, Markup::Immediate) << formatHex((uint64_t)Address);
} else {
// Otherwise, just print the expression.
Op.getExpr()->print(O, &MAI);
@@ -372,13 +370,15 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O,
O << "\trep\t";
// These all require a pseudo prefix
- if ((Flags & X86::IP_USE_VEX) || (TSFlags & X86II::ExplicitVEXPrefix))
+ if ((Flags & X86::IP_USE_VEX) ||
+ (TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix)
O << "\t{vex}";
else if (Flags & X86::IP_USE_VEX2)
O << "\t{vex2}";
else if (Flags & X86::IP_USE_VEX3)
O << "\t{vex3}";
- else if (Flags & X86::IP_USE_EVEX)
+ else if ((Flags & X86::IP_USE_EVEX) ||
+ (TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitEVEXPrefix)
O << "\t{evex}";
if (Flags & X86::IP_USE_DISP8)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index ecdc9090ac64..0705700c7817 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
#include "X86GenAsmWriter1.inc"
void X86IntelInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
- OS << markup("<reg:") << getRegisterName(Reg) << markup(">");
+ markup(OS, Markup::Register) << getRegisterName(Reg);
}
void X86IntelInstPrinter::printInst(const MCInst *MI, uint64_t Address,
@@ -361,7 +361,7 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
printRegName(O, Op.getReg());
} else if (Op.isImm()) {
- O << markup("<imm:") << formatImm((int64_t)Op.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << formatImm((int64_t)Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
O << "offset ";
@@ -388,7 +388,8 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
// If this has a segment register, print it.
printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
- O << markup("<mem:") << '[';
+ WithMarkup M = markup(O, Markup::Memory);
+ O << '[';
bool NeedPlus = false;
if (BaseReg.getReg()) {
@@ -419,28 +420,33 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
DispVal = -DispVal;
}
}
- O << markup("<imm:") << formatImm(DispVal) << markup(">");
+ markup(O, Markup::Immediate) << formatImm(DispVal);
}
}
- O << ']' << markup(">");
+ O << ']';
}
void X86IntelInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
raw_ostream &O) {
// If this has a segment register, print it.
printOptionalSegReg(MI, Op + 1, O);
- O << markup("<mem:") << '[';
+
+ WithMarkup M = markup(O, Markup::Memory);
+ O << '[';
printOperand(MI, Op, O);
- O << ']' << markup(">");
+ O << ']';
}
void X86IntelInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
raw_ostream &O) {
// DI accesses are always ES-based.
- O << "es:" << markup("<mem:") << '[';
+ O << "es:";
+
+ WithMarkup M = markup(O, Markup::Memory);
+ O << '[';
printOperand(MI, Op, O);
- O << ']' << markup(">");
+ O << ']';
}
void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
@@ -450,16 +456,17 @@ void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
// If this has a segment register, print it.
printOptionalSegReg(MI, Op + 1, O);
- O << markup("<mem:") << '[';
+ WithMarkup M = markup(O, Markup::Memory);
+ O << '[';
if (DispSpec.isImm()) {
- O << markup("<imm:") << formatImm(DispSpec.getImm()) << markup(">");
+ markup(O, Markup::Immediate) << formatImm(DispSpec.getImm());
} else {
assert(DispSpec.isExpr() && "non-immediate displacement?");
DispSpec.getExpr()->print(O, &MAI);
}
- O << ']' << markup(">");
+ O << ']';
}
void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
@@ -467,8 +474,7 @@ void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
if (MI->getOperand(Op).isExpr())
return MI->getOperand(Op).getExpr()->print(O, &MAI);
- O << markup("<imm:") << formatImm(MI->getOperand(Op).getImm() & 0xff)
- << markup(">");
+ markup(O, Markup::Immediate) << formatImm(MI->getOperand(Op).getImm() & 0xff);
}
void X86IntelInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 6af3ebb2feae..b6ebbcf56aef 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -36,7 +36,7 @@ using namespace llvm;
namespace {
-enum PrefixKind { None, REX, XOP, VEX2, VEX3, EVEX };
+enum PrefixKind { None, REX, REX2, XOP, VEX2, VEX3, EVEX };
static void emitByte(uint8_t C, SmallVectorImpl<char> &CB) { CB.push_back(C); }
@@ -46,6 +46,11 @@ class X86OpcodePrefixHelper {
// | 40H | | WRXB |
// +-----+ +------+
+ // REX2 (2 bytes)
+ // +-----+ +-------------------+
+ // | D5H | | M | R'X'B' | WRXB |
+ // +-----+ +-------------------+
+
// XOP (3-byte)
// +-----+ +--------------+ +-------------------+
// | 8Fh | | RXB | m-mmmm | | W | vvvv | L | pp |
@@ -89,6 +94,7 @@ class X86OpcodePrefixHelper {
// 0b00100: Reserved for future use
// 0b00101: VEX MAP5
// 0b00110: VEX MAP6
+ // 0b00111: VEX MAP7
// 0b00111-0b11111: Reserved for future use
// 0b01000: XOP map select - 08h instructions with imm byte
// 0b01001: XOP map select - 09h instructions with no imm byte
@@ -105,9 +111,9 @@ class X86OpcodePrefixHelper {
// 0b11: F2
// EVEX (4 bytes)
- // +-----+ +--------------+ +-------------------+ +------------------------+
- // | 62h | | RXBR' | 0mmm | | W | vvvv | 1 | pp | | z | L'L | b | v' | aaa |
- // +-----+ +--------------+ +-------------------+ +------------------------+
+ // +-----+ +---------------+ +--------------------+ +------------------------+
+ // | 62h | | RXBR' | B'mmm | | W | vvvv | X' | pp | | z | L'L | b | v' | aaa |
+ // +-----+ +---------------+ +--------------------+ +------------------------+
// EVEX_L2/VEX_L (Vector Length):
// L2 L
@@ -115,16 +121,39 @@ class X86OpcodePrefixHelper {
// 0 1: 256-bit vector
// 1 0: 512-bit vector
+ // 32-Register Support in 64-bit Mode Using EVEX with Embedded REX/REX2 Bits:
+ //
+ // +----------+---------+--------+-----------+---------+--------------+
+ // | | 4 | 3 | [2:0] | Type | Common Usage |
+ // +----------+---------+--------+-----------+---------+--------------+
+ // | REG | EVEX_R' | EVEX_R | modrm.reg | GPR, VR | Dest or Src |
+ // | VVVV | EVEX_v' | EVEX.vvvv | GPR, VR | Dest or Src |
+ // | RM (VR) | EVEX_X | EVEX_B | modrm.r/m | VR | Dest or Src |
+ // | RM (GPR) | EVEX_B' | EVEX_B | modrm.r/m | GPR | Dest or Src |
+ // | BASE | EVEX_B' | EVEX_B | modrm.r/m | GPR | MA |
+ // | INDEX | EVEX_X' | EVEX_X | sib.index | GPR | MA |
+ // | VIDX | EVEX_v' | EVEX_X | sib.index | VR | VSIB MA |
+ // +----------+---------+--------+-----------+---------+--------------+
+ //
+ // * GPR - General-purpose register
+ // * VR - Vector register
+ // * VIDX - Vector index
+ // * VSIB - Vector SIB
+ // * MA - Memory addressing
+
private:
unsigned W : 1;
unsigned R : 1;
unsigned X : 1;
unsigned B : 1;
+ unsigned M : 1;
+ unsigned R2 : 1;
+ unsigned X2 : 1;
+ unsigned B2 : 1;
unsigned VEX_4V : 4;
unsigned VEX_L : 1;
unsigned VEX_PP : 2;
unsigned VEX_5M : 5;
- unsigned EVEX_R2 : 1;
unsigned EVEX_z : 1;
unsigned EVEX_L2 : 1;
unsigned EVEX_b : 1;
@@ -138,7 +167,20 @@ private:
}
void setR(unsigned Encoding) { R = Encoding >> 3 & 1; }
- void setR2(unsigned Encoding) { EVEX_R2 = Encoding >> 4 & 1; }
+ void setR2(unsigned Encoding) {
+ R2 = Encoding >> 4 & 1;
+ assert((!R2 || (Kind <= REX2 || Kind == EVEX)) && "invalid setting");
+ }
+ void setX(unsigned Encoding) { X = Encoding >> 3 & 1; }
+ void setX2(unsigned Encoding) {
+ assert((Kind <= REX2 || Kind == EVEX) && "invalid setting");
+ X2 = Encoding >> 4 & 1;
+ }
+ void setB(unsigned Encoding) { B = Encoding >> 3 & 1; }
+ void setB2(unsigned Encoding) {
+ assert((Kind <= REX2 || Kind == EVEX) && "invalid setting");
+ B2 = Encoding >> 4 & 1;
+ }
void set4V(unsigned Encoding) { VEX_4V = Encoding & 0xf; }
void setV2(unsigned Encoding) { EVEX_V2 = Encoding >> 4 & 1; }
@@ -148,7 +190,12 @@ public:
setR(getRegEncoding(MI, OpNum));
}
void setX(const MCInst &MI, unsigned OpNum, unsigned Shift = 3) {
- X = getRegEncoding(MI, OpNum) >> Shift & 1;
+ unsigned Reg = MI.getOperand(OpNum).getReg();
+ // X is used to extend vector register only when shift is not 3.
+ if (Shift != 3 && X86II::isApxExtendedReg(Reg))
+ return;
+ unsigned Encoding = MRI.getEncodingValue(Reg);
+ X = Encoding >> Shift & 1;
}
void setB(const MCInst &MI, unsigned OpNum) {
B = getRegEncoding(MI, OpNum) >> 3 & 1;
@@ -167,11 +214,34 @@ public:
setR(Encoding);
setR2(Encoding);
}
+ void setM(bool V) { M = V; }
+ void setXX2(const MCInst &MI, unsigned OpNum) {
+ unsigned Reg = MI.getOperand(OpNum).getReg();
+ unsigned Encoding = MRI.getEncodingValue(Reg);
+ setX(Encoding);
+ // Index can be a vector register while X2 is used to extend GPR only.
+ if (Kind <= REX2 || X86II::isApxExtendedReg(Reg))
+ setX2(Encoding);
+ }
+ void setBB2(const MCInst &MI, unsigned OpNum) {
+ unsigned Reg = MI.getOperand(OpNum).getReg();
+ unsigned Encoding = MRI.getEncodingValue(Reg);
+ setB(Encoding);
+ // Base can be a vector register while B2 is used to extend GPR only
+ if (Kind <= REX2 || X86II::isApxExtendedReg(Reg))
+ setB2(Encoding);
+ }
void setZ(bool V) { EVEX_z = V; }
void setL2(bool V) { EVEX_L2 = V; }
void setEVEX_b(bool V) { EVEX_b = V; }
- void setV2(const MCInst &MI, unsigned OpNum) {
- setV2(getRegEncoding(MI, OpNum));
+ void setV2(const MCInst &MI, unsigned OpNum, bool HasVEX_4V) {
+ // Only needed with VSIB which don't use VVVV.
+ if (HasVEX_4V)
+ return;
+ unsigned Reg = MI.getOperand(OpNum).getReg();
+ if (X86II::isApxExtendedReg(Reg))
+ return;
+ setV2(MRI.getEncodingValue(Reg));
}
void set4VV2(const MCInst &MI, unsigned OpNum) {
unsigned Encoding = getRegEncoding(MI, OpNum);
@@ -183,18 +253,24 @@ public:
}
X86OpcodePrefixHelper(const MCRegisterInfo &MRI)
- : W(0), R(0), X(0), B(0), VEX_4V(0), VEX_L(0), VEX_PP(0), VEX_5M(0),
- EVEX_R2(0), EVEX_z(0), EVEX_L2(0), EVEX_b(0), EVEX_V2(0), EVEX_aaa(0),
- MRI(MRI) {}
+ : W(0), R(0), X(0), B(0), M(0), R2(0), X2(0), B2(0), VEX_4V(0), VEX_L(0),
+ VEX_PP(0), VEX_5M(0), EVEX_z(0), EVEX_L2(0), EVEX_b(0), EVEX_V2(0),
+ EVEX_aaa(0), MRI(MRI) {}
void setLowerBound(PrefixKind K) { Kind = K; }
PrefixKind determineOptimalKind() {
switch (Kind) {
case None:
- Kind = (W | R | X | B) ? REX : None;
+ // Not M bit here by intention b/c
+ // 1. No guarantee that REX2 is supported by arch w/o explict EGPR
+ // 2. REX2 is longer than 0FH
+ Kind = (R2 | X2 | B2) ? REX2 : (W | R | X | B) ? REX : None;
break;
case REX:
+ Kind = (R2 | X2 | B2) ? REX2 : REX;
+ break;
+ case REX2:
case XOP:
case VEX3:
case EVEX:
@@ -216,6 +292,12 @@ public:
case REX:
emitByte(0x40 | W << 3 | R << 2 | X << 1 | B, CB);
return;
+ case REX2:
+ emitByte(0xD5, CB);
+ emitByte(M << 7 | R2 << 6 | X2 << 5 | B2 << 4 | W << 3 | R << 2 | X << 1 |
+ B,
+ CB);
+ return;
case VEX2:
emitByte(0xC5, CB);
emitByte(((~R) & 1) << 7 | LastPayload, CB);
@@ -229,8 +311,9 @@ public:
case EVEX:
assert(VEX_5M && !(VEX_5M & 0x8) && "invalid mmm fields for EVEX!");
emitByte(0x62, CB);
- emitByte(FirstPayload | ((~EVEX_R2) & 0x1) << 4 | VEX_5M, CB);
- emitByte(W << 7 | ((~VEX_4V) & 0xf) << 3 | 1 << 2 | VEX_PP, CB);
+ emitByte(FirstPayload | ((~R2) & 0x1) << 4 | B2 << 3 | VEX_5M, CB);
+ emitByte(W << 7 | ((~VEX_4V) & 0xf) << 3 | ((~X2) & 0x1) << 2 | VEX_PP,
+ CB);
emitByte(EVEX_z << 7 | EVEX_L2 << 6 | VEX_L << 5 | EVEX_b << 4 |
((~EVEX_V2) & 0x1) << 3 | EVEX_aaa,
CB);
@@ -285,6 +368,7 @@ private:
SmallVectorImpl<char> &CB) const;
PrefixKind emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI,
SmallVectorImpl<char> &CB) const;
void emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI,
@@ -546,7 +630,8 @@ void X86MCCodeEmitter::emitMemModRMByte(
// movq loads is a subset of reloc_riprel_4byte_relax_rex. It is a
// special case because COFF and Mach-O don't support ELF's more
// flexible R_X86_64_REX_GOTPCRELX relaxation.
- assert(Kind == REX);
+ // TODO: Support new relocation for REX2.
+ assert(Kind == REX || Kind == REX2);
return X86::reloc_riprel_4byte_movq_load;
case X86::ADC32rm:
case X86::ADD32rm:
@@ -570,8 +655,11 @@ void X86MCCodeEmitter::emitMemModRMByte(
case X86::SBB64rm:
case X86::SUB64rm:
case X86::XOR64rm:
- return Kind == REX ? X86::reloc_riprel_4byte_relax_rex
- : X86::reloc_riprel_4byte_relax;
+ // We haven't support relocation for REX2 prefix, so temporarily use REX
+ // relocation.
+ // TODO: Support new relocation for REX2.
+ return (Kind == REX || Kind == REX2) ? X86::reloc_riprel_4byte_relax_rex
+ : X86::reloc_riprel_4byte_relax;
}
}();
@@ -664,11 +752,11 @@ void X86MCCodeEmitter::emitMemModRMByte(
bool AllowDisp8 = !UseDisp32;
// Determine whether a SIB byte is needed.
- if (// The SIB byte must be used if there is an index register or the
- // encoding requires a SIB byte.
+ if ( // The SIB byte must be used if there is an index register or the
+ // encoding requires a SIB byte.
!ForceSIB && IndexReg.getReg() == 0 &&
- // The SIB byte must be used if the base is ESP/RSP/R12, all of which
- // encode to an R/M value of 4, which indicates that a SIB byte is
+ // The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
+ // which encode to an R/M value of 4, which indicates that a SIB byte is
// present.
BaseRegNo != N86::ESP &&
// If there is no base register and we're in 64-bit mode, we need a SIB
@@ -681,10 +769,11 @@ void X86MCCodeEmitter::emitMemModRMByte(
return;
}
- // If the base is not EBP/ESP/R12/R13 and there is no displacement, use
- // simple indirect register encoding, this handles addresses like [EAX].
- // The encoding for [EBP] or[R13] with no displacement means [disp32] so we
- // handle it by emitting a displacement of 0 later.
+ // If the base is not EBP/ESP/R12/R13/R20/R21/R28/R29 and there is no
+ // displacement, use simple indirect register encoding, this handles
+ // addresses like [EAX]. The encoding for [EBP], [R13], [R20], [R21], [R28]
+ // or [R29] with no displacement means [disp32] so we handle it by emitting
+ // a displacement of 0 later.
if (BaseRegNo != N86::EBP) {
if (Disp.isImm() && Disp.getImm() == 0 && AllowNoDisp) {
emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CB);
@@ -706,8 +795,8 @@ void X86MCCodeEmitter::emitMemModRMByte(
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
// Including a compressed disp8 for EVEX instructions that support it.
- // This also handles the 0 displacement for [EBP] or [R13]. We can't use
- // disp8 if the {disp32} pseudo prefix is present.
+ // This also handles the 0 displacement for [EBP], [R13], [R21] or [R29]. We
+ // can't use disp8 if the {disp32} pseudo prefix is present.
if (Disp.isImm() && AllowDisp8) {
int ImmOffset = 0;
if (isDispOrCDisp8(TSFlags, Disp.getImm(), ImmOffset)) {
@@ -719,8 +808,8 @@ void X86MCCodeEmitter::emitMemModRMByte(
}
// Otherwise, emit the most general non-SIB encoding: [REG+disp32].
- // Displacement may be 0 for [EBP] or [R13] case if {disp32} pseudo prefix
- // prevented using disp8 above.
+ // Displacement may be 0 for [EBP], [R13], [R21], [R29] case if {disp32}
+ // pseudo prefix prevented using disp8 above.
emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), CB);
unsigned Opcode = MI.getOpcode();
unsigned FixupKind = Opcode == X86::MOV32rm ? X86::reloc_signed_4byte_relax
@@ -744,18 +833,18 @@ void X86MCCodeEmitter::emitMemModRMByte(
emitByte(modRMByte(0, RegOpcodeField, 4), CB);
ForceDisp32 = true;
} else if (Disp.isImm() && Disp.getImm() == 0 && AllowNoDisp &&
- // Base reg can't be EBP/RBP/R13 as that would end up with '5' as
- // the base field, but that is the magic [*] nomenclature that
- // indicates no base when mod=0. For these cases we'll emit a 0
- // displacement instead.
+ // Base reg can't be EBP/RBP/R13/R21/R29 as that would end up with
+ // '5' as the base field, but that is the magic [*] nomenclature
+ // that indicates no base when mod=0. For these cases we'll emit a
+ // 0 displacement instead.
BaseRegNo != N86::EBP) {
// Emit no displacement ModR/M byte
emitByte(modRMByte(0, RegOpcodeField, 4), CB);
} else if (Disp.isImm() && AllowDisp8 &&
isDispOrCDisp8(TSFlags, Disp.getImm(), ImmOffset)) {
// Displacement fits in a byte or matches an EVEX compressed disp8, use
- // disp8 encoding. This also handles EBP/R13 base with 0 displacement unless
- // {disp32} pseudo prefix was used.
+ // disp8 encoding. This also handles EBP/R13/R21/R29 base with 0
+ // displacement unless {disp32} pseudo prefix was used.
emitByte(modRMByte(1, RegOpcodeField, 4), CB);
ForceDisp8 = true;
} else {
@@ -841,7 +930,7 @@ PrefixKind X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
// REX prefix is optional, but if used must be immediately before the opcode
// Encoding type for this instruction.
return (TSFlags & X86II::EncodingMask)
- ? emitVEXOpcodePrefix(MemoryOperand, MI, CB)
+ ? emitVEXOpcodePrefix(MemoryOperand, MI, STI, CB)
: emitOpcodePrefix(MemoryOperand, MI, STI, CB);
}
@@ -860,12 +949,27 @@ PrefixKind X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
/// \returns the used prefix.
PrefixKind
X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI,
SmallVectorImpl<char> &CB) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
uint64_t TSFlags = Desc.TSFlags;
assert(!(TSFlags & X86II::LOCK) && "Can't have LOCK VEX.");
+#ifndef NDEBUG
+ unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = NumOps ? X86II::getOperandBias(Desc) : 0; I != NumOps;
+ ++I) {
+ const MCOperand &MO = MI.getOperand(I);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
+ report_fatal_error(
+ "Cannot encode high byte register in VEX/EVEX-prefixed instruction");
+ }
+#endif
+
X86OpcodePrefixHelper Prefix(*Ctx.getRegisterInfo());
switch (TSFlags & X86II::EncodingMask) {
default:
@@ -909,16 +1013,25 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
case X86II::XOPA:
Prefix.set5M(0xA);
break;
+ case X86II::T_MAP4:
+ Prefix.set5M(0x4);
+ break;
case X86II::T_MAP5:
Prefix.set5M(0x5);
break;
case X86II::T_MAP6:
Prefix.set5M(0x6);
break;
+ case X86II::T_MAP7:
+ Prefix.set5M(0x7);
+ break;
}
Prefix.setL(TSFlags & X86II::VEX_L);
Prefix.setL2(TSFlags & X86II::EVEX_L2);
+ if ((TSFlags & X86II::EVEX_L2) && STI.hasFeature(X86::FeatureAVX512) &&
+ !STI.hasFeature(X86::FeatureEVEX512))
+ report_fatal_error("ZMM registers are not supported without EVEX512");
switch (TSFlags & X86II::OpPrefixMask) {
case X86II::PD:
Prefix.setPP(0x1); // 66
@@ -942,11 +1055,11 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
default:
llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!");
case X86II::MRMDestMem4VOp3CC: {
- // MemAddr, src1(ModR/M), src2(VEX_4V)
- Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
- Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
+ // src1(ModR/M), MemAddr, src2(VEX_4V)
+ Prefix.setRR2(MI, CurOp++);
+ Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
CurOp += X86::AddrNumOperands;
- Prefix.setR(MI, ++CurOp);
Prefix.set4V(MI, CurOp++);
break;
}
@@ -960,10 +1073,9 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// MemAddr, src1(VEX_4V), src2(ModR/M)
// MemAddr, src1(ModR/M), imm8
//
- Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
- Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
- if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
- Prefix.setV2(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setV2(MI, MemOperand + X86::AddrIndexReg, HasVEX_4V);
CurOp += X86::AddrNumOperands;
@@ -994,28 +1106,27 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
if (HasVEX_4V)
Prefix.set4VV2(MI, CurOp++);
- Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
- Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
- if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
- Prefix.setV2(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setV2(MI, MemOperand + X86::AddrIndexReg, HasVEX_4V);
break;
}
case X86II::MRMSrcMem4VOp3: {
// Instruction format for 4VOp3:
// src1(ModR/M), MemAddr, src3(VEX_4V)
- Prefix.setR(MI, CurOp++);
- Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
- Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
- Prefix.set4V(MI, CurOp + X86::AddrNumOperands);
+ Prefix.setRR2(MI, CurOp++);
+ Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.set4VV2(MI, CurOp + X86::AddrNumOperands);
break;
}
case X86II::MRMSrcMemOp4: {
// dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M),
Prefix.setR(MI, CurOp++);
Prefix.set4V(MI, CurOp++);
- Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
- Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
break;
}
case X86II::MRM0m:
@@ -1035,10 +1146,9 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
if (HasEVEX_K)
Prefix.setAAA(MI, CurOp++);
- Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
- Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
- if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
- Prefix.setV2(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setV2(MI, MemOperand + X86::AddrIndexReg, HasVEX_4V);
break;
}
@@ -1058,7 +1168,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
if (HasVEX_4V)
Prefix.set4VV2(MI, CurOp++);
- Prefix.setB(MI, CurOp);
+ Prefix.setBB2(MI, CurOp);
Prefix.setX(MI, CurOp, 4);
++CurOp;
@@ -1077,9 +1187,9 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
case X86II::MRMSrcReg4VOp3: {
// Instruction format for 4VOp3:
// src1(ModR/M), src2(ModR/M), src3(VEX_4V)
- Prefix.setR(MI, CurOp++);
- Prefix.setB(MI, CurOp++);
- Prefix.set4V(MI, CurOp++);
+ Prefix.setRR2(MI, CurOp++);
+ Prefix.setBB2(MI, CurOp++);
+ Prefix.set4VV2(MI, CurOp++);
break;
}
case X86II::MRMSrcRegOp4: {
@@ -1099,7 +1209,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
// dst(ModR/M), src1(VEX_4V), src2(ModR/M)
- Prefix.setB(MI, CurOp);
+ Prefix.setBB2(MI, CurOp);
Prefix.setX(MI, CurOp, 4);
++CurOp;
@@ -1137,7 +1247,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
if (HasEVEX_K)
Prefix.setAAA(MI, CurOp++);
- Prefix.setB(MI, CurOp);
+ Prefix.setBB2(MI, CurOp);
Prefix.setX(MI, CurOp, 4);
++CurOp;
break;
@@ -1198,6 +1308,8 @@ PrefixKind X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
}
}
}
+ if ((TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitREX2Prefix)
+ Prefix.setLowerBound(REX2);
switch (TSFlags & X86II::FormMask) {
default:
assert(!HasRegOp && "Unexpected form in emitREXPrefix!");
@@ -1209,29 +1321,29 @@ PrefixKind X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
case X86II::RawFrmDstSrc:
break;
case X86II::AddRegFrm:
- Prefix.setB(MI, CurOp++);
+ Prefix.setBB2(MI, CurOp++);
break;
case X86II::MRMSrcReg:
case X86II::MRMSrcRegCC:
- Prefix.setR(MI, CurOp++);
- Prefix.setB(MI, CurOp++);
+ Prefix.setRR2(MI, CurOp++);
+ Prefix.setBB2(MI, CurOp++);
break;
case X86II::MRMSrcMem:
case X86II::MRMSrcMemCC:
- Prefix.setR(MI, CurOp++);
- Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
- Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setRR2(MI, CurOp++);
+ Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
CurOp += X86::AddrNumOperands;
break;
case X86II::MRMDestReg:
- Prefix.setB(MI, CurOp++);
- Prefix.setR(MI, CurOp++);
+ Prefix.setBB2(MI, CurOp++);
+ Prefix.setRR2(MI, CurOp++);
break;
case X86II::MRMDestMem:
- Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
- Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
CurOp += X86::AddrNumOperands;
- Prefix.setR(MI, CurOp++);
+ Prefix.setRR2(MI, CurOp++);
break;
case X86II::MRMXmCC:
case X86II::MRMXm:
@@ -1243,8 +1355,8 @@ PrefixKind X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
case X86II::MRM5m:
case X86II::MRM6m:
case X86II::MRM7m:
- Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
- Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
break;
case X86II::MRMXrCC:
case X86II::MRMXr:
@@ -1256,9 +1368,10 @@ PrefixKind X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
case X86II::MRM5r:
case X86II::MRM6r:
case X86II::MRM7r:
- Prefix.setB(MI, CurOp++);
+ Prefix.setBB2(MI, CurOp++);
break;
}
+ Prefix.setM((TSFlags & X86II::OpMapMask) == X86II::TB);
PrefixKind Kind = Prefix.determineOptimalKind();
if (Kind && UsesHighByteReg)
report_fatal_error(
@@ -1320,6 +1433,10 @@ PrefixKind X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI,
// 0x0F escape code must be emitted just before the opcode.
switch (TSFlags & X86II::OpMapMask) {
case X86II::TB: // Two-byte opcode map
+ // Encoded by M bit in REX2
+ if (Kind == REX2)
+ break;
+ [[fallthrough]];
case X86II::T8: // 0F 38
case X86II::TA: // 0F 3A
case X86II::ThreeDNow: // 0F 0F, second 0F emitted by caller.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 9519608ac022..ed4d0a45bd8f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -397,6 +397,18 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
if (CPU.empty())
CPU = "generic";
+ size_t posNoEVEX512 = FS.rfind("-evex512");
+ // Make sure we won't be cheated by "-avx512fp16".
+ size_t posNoAVX512F =
+ FS.ends_with("-avx512f") ? FS.size() - 8 : FS.rfind("-avx512f,");
+ size_t posEVEX512 = FS.rfind("+evex512");
+ size_t posAVX512F = FS.rfind("+avx512"); // Any AVX512XXX will enable AVX512F.
+
+ if (posAVX512F != StringRef::npos &&
+ (posNoAVX512F == StringRef::npos || posNoAVX512F < posAVX512F))
+ if (posEVEX512 == StringRef::npos && posNoEVEX512 == StringRef::npos)
+ ArchFS += ",+evex512";
+
return createX86MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS);
}
@@ -747,165 +759,193 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86TargetMC() {
MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size,
bool High) {
+#define DEFAULT_NOREG \
+ default: \
+ return X86::NoRegister;
+#define SUB_SUPER(R1, R2, R3, R4, R) \
+ case X86::R1: \
+ case X86::R2: \
+ case X86::R3: \
+ case X86::R4: \
+ return X86::R;
+#define A_SUB_SUPER(R) \
+ case X86::AH: \
+ SUB_SUPER(AL, AX, EAX, RAX, R)
+#define D_SUB_SUPER(R) \
+ case X86::DH: \
+ SUB_SUPER(DL, DX, EDX, RDX, R)
+#define C_SUB_SUPER(R) \
+ case X86::CH: \
+ SUB_SUPER(CL, CX, ECX, RCX, R)
+#define B_SUB_SUPER(R) \
+ case X86::BH: \
+ SUB_SUPER(BL, BX, EBX, RBX, R)
+#define SI_SUB_SUPER(R) SUB_SUPER(SIL, SI, ESI, RSI, R)
+#define DI_SUB_SUPER(R) SUB_SUPER(DIL, DI, EDI, RDI, R)
+#define BP_SUB_SUPER(R) SUB_SUPER(BPL, BP, EBP, RBP, R)
+#define SP_SUB_SUPER(R) SUB_SUPER(SPL, SP, ESP, RSP, R)
+#define NO_SUB_SUPER(NO, REG) \
+ SUB_SUPER(R##NO##B, R##NO##W, R##NO##D, R##NO, REG)
+#define NO_SUB_SUPER_B(NO) NO_SUB_SUPER(NO, R##NO##B)
+#define NO_SUB_SUPER_W(NO) NO_SUB_SUPER(NO, R##NO##W)
+#define NO_SUB_SUPER_D(NO) NO_SUB_SUPER(NO, R##NO##D)
+#define NO_SUB_SUPER_Q(NO) NO_SUB_SUPER(NO, R##NO)
switch (Size) {
- default: llvm_unreachable("illegal register size");
+ default:
+ llvm_unreachable("illegal register size");
case 8:
if (High) {
switch (Reg.id()) {
- default: return X86::NoRegister;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::AH;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::DH;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::CH;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::BH;
+ DEFAULT_NOREG
+ A_SUB_SUPER(AH)
+ D_SUB_SUPER(DH)
+ C_SUB_SUPER(CH)
+ B_SUB_SUPER(BH)
}
} else {
switch (Reg.id()) {
- default: return X86::NoRegister;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::AL;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::DL;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::CL;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::BL;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SIL;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DIL;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BPL;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SPL;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8B;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9B;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10B;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11B;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12B;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13B;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14B;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15B;
+ DEFAULT_NOREG
+ A_SUB_SUPER(AL)
+ D_SUB_SUPER(DL)
+ C_SUB_SUPER(CL)
+ B_SUB_SUPER(BL)
+ SI_SUB_SUPER(SIL)
+ DI_SUB_SUPER(DIL)
+ BP_SUB_SUPER(BPL)
+ SP_SUB_SUPER(SPL)
+ NO_SUB_SUPER_B(8)
+ NO_SUB_SUPER_B(9)
+ NO_SUB_SUPER_B(10)
+ NO_SUB_SUPER_B(11)
+ NO_SUB_SUPER_B(12)
+ NO_SUB_SUPER_B(13)
+ NO_SUB_SUPER_B(14)
+ NO_SUB_SUPER_B(15)
+ NO_SUB_SUPER_B(16)
+ NO_SUB_SUPER_B(17)
+ NO_SUB_SUPER_B(18)
+ NO_SUB_SUPER_B(19)
+ NO_SUB_SUPER_B(20)
+ NO_SUB_SUPER_B(21)
+ NO_SUB_SUPER_B(22)
+ NO_SUB_SUPER_B(23)
+ NO_SUB_SUPER_B(24)
+ NO_SUB_SUPER_B(25)
+ NO_SUB_SUPER_B(26)
+ NO_SUB_SUPER_B(27)
+ NO_SUB_SUPER_B(28)
+ NO_SUB_SUPER_B(29)
+ NO_SUB_SUPER_B(30)
+ NO_SUB_SUPER_B(31)
}
}
case 16:
switch (Reg.id()) {
- default: return X86::NoRegister;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::AX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::DX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::CX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::BX;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SP;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8W;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9W;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10W;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11W;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12W;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13W;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14W;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15W;
+ DEFAULT_NOREG
+ A_SUB_SUPER(AX)
+ D_SUB_SUPER(DX)
+ C_SUB_SUPER(CX)
+ B_SUB_SUPER(BX)
+ SI_SUB_SUPER(SI)
+ DI_SUB_SUPER(DI)
+ BP_SUB_SUPER(BP)
+ SP_SUB_SUPER(SP)
+ NO_SUB_SUPER_W(8)
+ NO_SUB_SUPER_W(9)
+ NO_SUB_SUPER_W(10)
+ NO_SUB_SUPER_W(11)
+ NO_SUB_SUPER_W(12)
+ NO_SUB_SUPER_W(13)
+ NO_SUB_SUPER_W(14)
+ NO_SUB_SUPER_W(15)
+ NO_SUB_SUPER_W(16)
+ NO_SUB_SUPER_W(17)
+ NO_SUB_SUPER_W(18)
+ NO_SUB_SUPER_W(19)
+ NO_SUB_SUPER_W(20)
+ NO_SUB_SUPER_W(21)
+ NO_SUB_SUPER_W(22)
+ NO_SUB_SUPER_W(23)
+ NO_SUB_SUPER_W(24)
+ NO_SUB_SUPER_W(25)
+ NO_SUB_SUPER_W(26)
+ NO_SUB_SUPER_W(27)
+ NO_SUB_SUPER_W(28)
+ NO_SUB_SUPER_W(29)
+ NO_SUB_SUPER_W(30)
+ NO_SUB_SUPER_W(31)
}
case 32:
switch (Reg.id()) {
- default: return X86::NoRegister;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::EAX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::EDX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::ECX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::EBX;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::ESI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::EDI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::EBP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::ESP;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8D;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9D;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10D;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11D;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12D;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13D;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14D;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15D;
+ DEFAULT_NOREG
+ A_SUB_SUPER(EAX)
+ D_SUB_SUPER(EDX)
+ C_SUB_SUPER(ECX)
+ B_SUB_SUPER(EBX)
+ SI_SUB_SUPER(ESI)
+ DI_SUB_SUPER(EDI)
+ BP_SUB_SUPER(EBP)
+ SP_SUB_SUPER(ESP)
+ NO_SUB_SUPER_D(8)
+ NO_SUB_SUPER_D(9)
+ NO_SUB_SUPER_D(10)
+ NO_SUB_SUPER_D(11)
+ NO_SUB_SUPER_D(12)
+ NO_SUB_SUPER_D(13)
+ NO_SUB_SUPER_D(14)
+ NO_SUB_SUPER_D(15)
+ NO_SUB_SUPER_D(16)
+ NO_SUB_SUPER_D(17)
+ NO_SUB_SUPER_D(18)
+ NO_SUB_SUPER_D(19)
+ NO_SUB_SUPER_D(20)
+ NO_SUB_SUPER_D(21)
+ NO_SUB_SUPER_D(22)
+ NO_SUB_SUPER_D(23)
+ NO_SUB_SUPER_D(24)
+ NO_SUB_SUPER_D(25)
+ NO_SUB_SUPER_D(26)
+ NO_SUB_SUPER_D(27)
+ NO_SUB_SUPER_D(28)
+ NO_SUB_SUPER_D(29)
+ NO_SUB_SUPER_D(30)
+ NO_SUB_SUPER_D(31)
}
case 64:
switch (Reg.id()) {
- default: return X86::NoRegister;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::RAX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::RDX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::RCX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::RBX;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::RSI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::RDI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::RBP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::RSP;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15;
+ DEFAULT_NOREG
+ A_SUB_SUPER(RAX)
+ D_SUB_SUPER(RDX)
+ C_SUB_SUPER(RCX)
+ B_SUB_SUPER(RBX)
+ SI_SUB_SUPER(RSI)
+ DI_SUB_SUPER(RDI)
+ BP_SUB_SUPER(RBP)
+ SP_SUB_SUPER(RSP)
+ NO_SUB_SUPER_Q(8)
+ NO_SUB_SUPER_Q(9)
+ NO_SUB_SUPER_Q(10)
+ NO_SUB_SUPER_Q(11)
+ NO_SUB_SUPER_Q(12)
+ NO_SUB_SUPER_Q(13)
+ NO_SUB_SUPER_Q(14)
+ NO_SUB_SUPER_Q(15)
+ NO_SUB_SUPER_Q(16)
+ NO_SUB_SUPER_Q(17)
+ NO_SUB_SUPER_Q(18)
+ NO_SUB_SUPER_Q(19)
+ NO_SUB_SUPER_Q(20)
+ NO_SUB_SUPER_Q(21)
+ NO_SUB_SUPER_Q(22)
+ NO_SUB_SUPER_Q(23)
+ NO_SUB_SUPER_Q(24)
+ NO_SUB_SUPER_Q(25)
+ NO_SUB_SUPER_Q(26)
+ NO_SUB_SUPER_Q(27)
+ NO_SUB_SUPER_Q(28)
+ NO_SUB_SUPER_Q(29)
+ NO_SUB_SUPER_Q(30)
+ NO_SUB_SUPER_Q(31)
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp
index 39b7f0f4160e..30360274afe9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "X86InstrInfo.h"
+#include "X86BaseInfo.h"
#define GET_X86_MNEMONIC_TABLES_CPP
#include "X86GenMnemonicTables.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.h b/contrib/llvm-project/llvm/lib/Target/X86/X86.h
index 76ecc77bc39c..485afbc1dfbc 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.h
@@ -27,8 +27,7 @@ class X86TargetMachine;
/// This pass converts a legalized DAG into a X86-specific DAG, ready for
/// instruction scheduling.
-FunctionPass *createX86ISelDag(X86TargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOptLevel OptLevel);
/// This pass initializes a global base register for PIC on x86-32.
FunctionPass *createX86GlobalBaseRegPass();
@@ -195,7 +194,6 @@ void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &);
void initializeX86LowerTileCopyPass(PassRegistry &);
void initializeX86OptimizeLEAPassPass(PassRegistry &);
void initializeX86PartialReductionPass(PassRegistry &);
-void initializeX86PreAMXConfigPassPass(PassRegistry &);
void initializeX86PreTileConfigPass(PassRegistry &);
void initializeX86ReturnThunksPass(PassRegistry &);
void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
index 05cc50712c52..5fd6828f4312 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
@@ -119,6 +119,8 @@ def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
"Support 16-bit floating point conversion instructions",
[FeatureAVX]>;
+def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true",
+ "Support ZMM and 64-bit mask instructions">;
def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
"Enable AVX-512 instructions",
[FeatureAVX2, FeatureFMA, FeatureF16C]>;
@@ -218,7 +220,7 @@ def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
[FeatureSSE2]>;
def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
"Promote selected AES instructions to AVX512/AVX registers",
- [FeatureAVX, FeatureAES]>;
+ [FeatureAVX2, FeatureAES]>;
def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
"Enable TBM instructions">;
def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
@@ -244,7 +246,7 @@ def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
[FeatureSSE2]>;
def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true",
"Support SHA512 instructions",
- [FeatureAVX]>;
+ [FeatureAVX2]>;
// Processor supports CET SHSTK - Control-Flow Enforcement Technology
// using Shadow Stack
def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
@@ -254,7 +256,7 @@ def FeatureSM3 : SubtargetFeature<"sm3", "HasSM3", "true",
[FeatureAVX]>;
def FeatureSM4 : SubtargetFeature<"sm4", "HasSM4", "true",
"Support SM4 instructions",
- [FeatureAVX]>;
+ [FeatureAVX2]>;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
@@ -323,12 +325,34 @@ def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
"Support TSXLDTRK instructions">;
def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
"Has UINTR Instructions">;
+def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
+ "Support USERMSR instructions">;
def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
"platform configuration instruction">;
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
"Support movdiri instruction (direct store integer)">;
def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
"Support movdir64b instruction (direct store 64 bytes)">;
+def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
+ "Support AVX10.1 up to 256-bit instruction",
+ [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
+ FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
+ FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
+def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
+ "Support AVX10.1 up to 512-bit instruction",
+ [FeatureAVX10_1, FeatureEVEX512]>;
+def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
+ "Support extended general purpose register">;
+def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
+ "Support PUSH2/POP2 instructions">;
+def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
+ "Support Push-Pop Acceleration">;
+def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
+ "Support non-destructive destination">;
+def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
+ "Support conditional cmp & test instructions">;
+def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
+ "Support conditional faulting">;
// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
@@ -817,6 +841,7 @@ def ProcessorFeatures {
];
list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
+ FeatureEVEX512,
FeatureBWI,
FeatureCDI,
FeatureDQI,
@@ -940,6 +965,7 @@ def ProcessorFeatures {
FeatureXSAVES,
FeatureCLFLUSHOPT,
FeatureAVX512,
+ FeatureEVEX512,
FeatureCDI,
FeatureDQI,
FeatureBWI,
@@ -982,6 +1008,7 @@ def ProcessorFeatures {
// Cannonlake
list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
+ FeatureEVEX512,
FeatureCDI,
FeatureDQI,
FeatureBWI,
@@ -1207,6 +1234,18 @@ def ProcessorFeatures {
list<SubtargetFeature> ADLFeatures =
!listconcat(TRMFeatures, ADLAdditionalFeatures);
+ // Gracemont
+ list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide32,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFast15ByteNOP,
+ TuningFastVariablePerLaneShuffle,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
+
// Sierraforest
list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
FeatureAVXIFMA,
@@ -1222,6 +1261,26 @@ def ProcessorFeatures {
list<SubtargetFeature> GRRFeatures =
!listconcat(SRFFeatures, GRRAdditionalFeatures);
+ // Arrowlake S
+ list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
+ FeatureSHA512,
+ FeatureSM3,
+ FeatureSM4];
+ list<SubtargetFeature> ARLSFeatures =
+ !listconcat(SRFFeatures, ARLSAdditionalFeatures);
+
+ // Pantherlake
+ list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
+ list<SubtargetFeature> PTLFeatures =
+ !listconcat(ARLSFeatures, PTLAdditionalFeatures);
+
+
+ // Clearwaterforest
+ list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
+ FeatureUSERMSR];
+ list<SubtargetFeature> CWFFeatures =
+ !listconcat(ARLSFeatures, CWFAdditionalFeatures);
+
// Knights Landing
list<SubtargetFeature> KNLFeatures = [FeatureX87,
FeatureCX8,
@@ -1242,6 +1301,7 @@ def ProcessorFeatures {
FeatureF16C,
FeatureFSGSBase,
FeatureAVX512,
+ FeatureEVEX512,
FeatureERI,
FeatureCDI,
FeaturePFI,
@@ -1451,6 +1511,7 @@ def ProcessorFeatures {
!listconcat(ZN2Features, ZN3AdditionalFeatures);
list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
+ FeatureEVEX512,
FeatureCDI,
FeatureDQI,
FeatureBWI,
@@ -1720,10 +1781,23 @@ def : ProcModel<"sapphirerapids", SapphireRapidsModel,
ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
def : ProcModel<"alderlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
+// FIXME: Use Gracemont Schedule Model when it is ready.
+def : ProcModel<"gracemont", AlderlakePModel,
+ ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
def : ProcModel<"raptorlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"meteorlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"arrowlake", AlderlakePModel,
+ ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
+foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
+def : ProcModel<P, AlderlakePModel,
+ ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
+}
+def : ProcModel<"pantherlake", AlderlakePModel,
+ ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"clearwaterforest", AlderlakePModel,
+ ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"graniterapids", SapphireRapidsModel,
ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>;
def : ProcModel<"emeraldrapids", SapphireRapidsModel,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
index bb94444525fb..15cfd247f125 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -14,6 +14,7 @@
#include "X86AsmPrinter.h"
#include "MCTargetDesc/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "MCTargetDesc/X86TargetStreamer.h"
#include "TargetInfo/X86TargetInfo.h"
#include "X86InstrInfo.h"
@@ -34,6 +35,7 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
@@ -530,6 +532,86 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
O << ']';
}
+const MCSubtargetInfo *X86AsmPrinter::getIFuncMCSubtargetInfo() const {
+ assert(Subtarget);
+ return Subtarget;
+}
+
+void X86AsmPrinter::emitMachOIFuncStubBody(Module &M, const GlobalIFunc &GI,
+ MCSymbol *LazyPointer) {
+ // _ifunc:
+ // jmpq *lazy_pointer(%rip)
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JMP32m)
+ .addReg(X86::RIP)
+ .addImm(1)
+ .addReg(0)
+ .addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(LazyPointer, OutContext)))
+ .addReg(0),
+ *Subtarget);
+}
+
+void X86AsmPrinter::emitMachOIFuncStubHelperBody(Module &M,
+ const GlobalIFunc &GI,
+ MCSymbol *LazyPointer) {
+ // _ifunc.stub_helper:
+ // push %rax
+ // push %rdi
+ // push %rsi
+ // push %rdx
+ // push %rcx
+ // push %r8
+ // push %r9
+ // callq foo
+ // movq %rax,lazy_pointer(%rip)
+ // pop %r9
+ // pop %r8
+ // pop %rcx
+ // pop %rdx
+ // pop %rsi
+ // pop %rdi
+ // pop %rax
+ // jmpq *lazy_pointer(%rip)
+
+ for (int Reg :
+ {X86::RAX, X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9})
+ OutStreamer->emitInstruction(MCInstBuilder(X86::PUSH64r).addReg(Reg),
+ *Subtarget);
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::CALL64pcrel32)
+ .addOperand(MCOperand::createExpr(lowerConstant(GI.getResolver()))),
+ *Subtarget);
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::MOV64mr)
+ .addReg(X86::RIP)
+ .addImm(1)
+ .addReg(0)
+ .addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(LazyPointer, OutContext)))
+ .addReg(0)
+ .addReg(X86::RAX),
+ *Subtarget);
+
+ for (int Reg :
+ {X86::R9, X86::R8, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RAX})
+ OutStreamer->emitInstruction(MCInstBuilder(X86::POP64r).addReg(Reg),
+ *Subtarget);
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JMP32m)
+ .addReg(X86::RIP)
+ .addImm(1)
+ .addReg(0)
+ .addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(LazyPointer, OutContext)))
+ .addReg(0),
+ *Subtarget);
+}
+
static bool printAsmMRegister(const X86AsmPrinter &P, const MachineOperand &MO,
char Mode, raw_ostream &O) {
Register Reg = MO.getReg();
@@ -766,8 +848,8 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) {
if (FeatureFlagsAnd) {
// Emit a .note.gnu.property section with the flags.
- if (!TT.isArch32Bit() && !TT.isArch64Bit())
- llvm_unreachable("CFProtection used on invalid architecture!");
+ assert((TT.isArch32Bit() || TT.isArch64Bit()) &&
+ "CFProtection used on invalid architecture!");
MCSection *Cur = OutStreamer->getCurrentSectionOnly();
MCSection *Nt = MMI->getContext().getELFSection(
".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
index c81651cf7f2f..693021eca329 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -120,6 +120,11 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const char *Modifier);
void PrintIntelMemReference(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier);
+ const MCSubtargetInfo *getIFuncMCSubtargetInfo() const override;
+ void emitMachOIFuncStubBody(Module &M, const GlobalIFunc &GI,
+ MCSymbol *LazyPointer) override;
+ void emitMachOIFuncStubHelperBody(Module &M, const GlobalIFunc &GI,
+ MCSymbol *LazyPointer) override;
public:
X86AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
index 3ff107316b9c..0ea51bec29b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Module.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
index 06cebdc21594..16014d6a2f60 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
@@ -23,6 +23,11 @@ class CCIfNotSubtarget<string F, CCAction A>
"(State.getMachineFunction().getSubtarget()).", F),
A>;
+/// CCIfRegCallv4 - Match if RegCall ABIv4 is respected.
+class CCIfRegCallv4<CCAction A>
+ : CCIf<"State.getMachineFunction().getFunction().getParent()->getModuleFlag(\"RegCallv4\")!=nullptr",
+ A>;
+
/// CCIfIsVarArgOnWin - Match if isVarArg on Windows 32bits.
class CCIfIsVarArgOnWin<CCAction A>
: CCIf<"State.isVarArg() && "
@@ -55,6 +60,20 @@ def RC_X86_32_RegCall : RC_X86_RegCall {
let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7];
}
+// RegCall register classes for 32 bits if it respect regcall ABI v.4
+// Change in __regcall ABI v.4: don't use EAX as a spare register is
+// needed to code virtual call thunk,
+def RC_X86_32_RegCallv4_Win : RC_X86_RegCall {
+ let GPR_8 = [CL, DL, DIL, SIL];
+ let GPR_16 = [CX, DX, DI, SI];
+ let GPR_32 = [ECX, EDX, EDI, ESI];
+ let GPR_64 = [RAX]; ///< Not actually used, but AssignToReg can't handle []
+ ///< \todo Fix AssignToReg to enable empty lists
+ let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7];
+ let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7];
+ let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7];
+}
+
class RC_X86_64_RegCall : RC_X86_RegCall {
let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15];
@@ -71,6 +90,18 @@ def RC_X86_64_RegCall_Win : RC_X86_64_RegCall {
let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11, R12, R14, R15];
}
+// On Windows 64 we don't want to use R13 - it is reserved for
+// largely aligned stack.
+// Change in __regcall ABI v.4: additionally don't use R10 as a
+// a spare register is needed to code virtual call thunk.
+//
+def RC_X86_64_RegCallv4_Win : RC_X86_64_RegCall {
+ let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R11B, R12B, R14B, R15B];
+ let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R11W, R12W, R14W, R15W];
+ let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R11D, R12D, R14D, R15D];
+ let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R11, R12, R14, R15];
+}
+
def RC_X86_64_RegCall_SysV : RC_X86_64_RegCall {
let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R12B, R13B, R14B, R15B];
let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R12W, R13W, R14W, R15W];
@@ -388,15 +419,6 @@ def RetCC_X86_64_HiPE : CallingConv<[
CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>>
]>;
-// X86-64 WebKit_JS return-value convention.
-def RetCC_X86_64_WebKit_JS : CallingConv<[
- // Promote all types to i64
- CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-
- // Return: RAX
- CCIfType<[i64], CCAssignToReg<[RAX]>>
-]>;
-
def RetCC_X86_64_Swift : CallingConv<[
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R12]>>>,
@@ -433,8 +455,12 @@ def RetCC_X86_64_AnyReg : CallingConv<[
defm X86_32_RegCall :
X86_RegCall_base<RC_X86_32_RegCall>;
+defm X86_32_RegCallv4_Win :
+ X86_RegCall_base<RC_X86_32_RegCallv4_Win>;
defm X86_Win64_RegCall :
X86_RegCall_base<RC_X86_64_RegCall_Win>;
+defm X86_Win64_RegCallv4 :
+ X86_RegCall_base<RC_X86_64_RegCallv4_Win>;
defm X86_SysV64_RegCall :
X86_RegCall_base<RC_X86_64_RegCall_SysV>;
@@ -447,6 +473,8 @@ def RetCC_X86_32 : CallingConv<[
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
+ CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin32()", CCIfRegCallv4<CCDelegateTo<RetCC_X86_32_RegCallv4_Win>>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_32_RegCall>>,
// Otherwise, use RetCC_X86_32_C.
@@ -458,8 +486,7 @@ def RetCC_X86_64 : CallingConv<[
// HiPE uses RetCC_X86_64_HiPE
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>,
- // Handle JavaScript calls.
- CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo<RetCC_X86_64_WebKit_JS>>,
+ // Handle AnyReg calls.
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_X86_64_AnyReg>>,
// Handle Swift calls.
@@ -474,6 +501,9 @@ def RetCC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_64_Vectorcall>>,
CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin64()", CCIfRegCallv4<CCDelegateTo<RetCC_X86_Win64_RegCallv4>>>>,
+
+ CCIfCC<"CallingConv::X86_RegCall",
CCIfSubtarget<"isTargetWin64()",
CCDelegateTo<RetCC_X86_Win64_RegCall>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_SysV64_RegCall>>,
@@ -532,6 +562,14 @@ def CC_X86_64_C : CallingConv<[
// The first 6 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+
+ // i128 can be either passed in two i64 registers, or on the stack, but
+ // not split across register and stack. As such, do not allow using R9
+ // for a split i64.
+ CCIfType<[i64],
+ CCIfSplit<CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
+ CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [R9]>>>,
+
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
// The first 8 MMX vector arguments are passed in XMM registers on Darwin.
@@ -705,22 +743,6 @@ def CC_X86_64_HiPE : CallingConv<[
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
]>;
-def CC_X86_64_WebKit_JS : CallingConv<[
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
-
- // Only the first integer argument is passed in register.
- CCIfType<[i32], CCAssignToReg<[EAX]>>,
- CCIfType<[i64], CCAssignToReg<[RAX]>>,
-
- // The remaining integer arguments are passed on the stack. 32bit integer and
- // floating-point arguments are aligned to 4 byte and stored in 4 byte slots.
- // 64bit integer and floating-point arguments are aligned to 8 byte and stored
- // in 8 byte stack slots.
- CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
- CCIfType<[i64, f64], CCAssignToStack<8, 8>>
-]>;
-
// No explicit register is specified for the AnyReg calling convention. The
// register allocator may assign the arguments to any free register.
//
@@ -1052,6 +1074,8 @@ def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::Tail", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
+ CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin32()", CCIfRegCallv4<CCDelegateTo<CC_X86_32_RegCallv4_Win>>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
// Otherwise, drop to normal X86-32 CC
@@ -1062,12 +1086,13 @@ def CC_X86_32 : CallingConv<[
def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
- CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo<CC_X86_64_WebKit_JS>>,
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_X86_64_AnyReg>>,
CCIfCC<"CallingConv::Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
CCIfCC<"CallingConv::X86_RegCall",
+ CCIfSubtarget<"isTargetWin64()", CCIfRegCallv4<CCDelegateTo<CC_X86_Win64_RegCallv4>>>>,
+ CCIfCC<"CallingConv::X86_RegCall",
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
@@ -1126,6 +1151,9 @@ def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
R8, R9, R10)>;
+def CSR_Win64_RT_MostRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs,
+ (sequence "XMM%u", 6, 15))>;
+
// All registers - except r11 and return registers.
def CSR_64_RT_AllRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs,
(sequence "XMM%u", 0, 15))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 4e31bbc4345c..bdd86e48fa54 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -17,7 +17,6 @@
#include "X86Subtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -620,29 +619,40 @@ void X86DomainReassignment::initConverters() {
std::make_unique<InstrReplacerDstCOPY>(From, To);
};
- createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
- createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm);
+ bool HasEGPR = STI->hasEGPR();
+ createReplacerDstCOPY(X86::MOVZX32rm16,
+ HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+ createReplacerDstCOPY(X86::MOVZX64rm16,
+ HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
- createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
- createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
+ createReplacerDstCOPY(X86::MOVZX32rr16,
+ HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
+ createReplacerDstCOPY(X86::MOVZX64rr16,
+ HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
if (STI->hasDQI()) {
- createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm);
- createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm);
- createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm);
-
- createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
- createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
- createReplacerDstCOPY(X86::MOVZX64rr8, X86::KMOVBkk);
+ createReplacerDstCOPY(X86::MOVZX16rm8,
+ HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+ createReplacerDstCOPY(X86::MOVZX32rm8,
+ HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+ createReplacerDstCOPY(X86::MOVZX64rm8,
+ HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+
+ createReplacerDstCOPY(X86::MOVZX16rr8,
+ HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+ createReplacerDstCOPY(X86::MOVZX32rr8,
+ HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+ createReplacerDstCOPY(X86::MOVZX64rr8,
+ HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
}
auto createReplacer = [&](unsigned From, unsigned To) {
Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
};
- createReplacer(X86::MOV16rm, X86::KMOVWkm);
- createReplacer(X86::MOV16mr, X86::KMOVWmk);
- createReplacer(X86::MOV16rr, X86::KMOVWkk);
+ createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+ createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
+ createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
createReplacer(X86::NOT16r, X86::KNOTWrr);
@@ -651,14 +661,14 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::XOR16rr, X86::KXORWrr);
if (STI->hasBWI()) {
- createReplacer(X86::MOV32rm, X86::KMOVDkm);
- createReplacer(X86::MOV64rm, X86::KMOVQkm);
+ createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
+ createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
- createReplacer(X86::MOV32mr, X86::KMOVDmk);
- createReplacer(X86::MOV64mr, X86::KMOVQmk);
+ createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
+ createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
- createReplacer(X86::MOV32rr, X86::KMOVDkk);
- createReplacer(X86::MOV64rr, X86::KMOVQkk);
+ createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
+ createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
@@ -696,9 +706,9 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::AND8rr, X86::KANDBrr);
- createReplacer(X86::MOV8rm, X86::KMOVBkm);
- createReplacer(X86::MOV8mr, X86::KMOVBmk);
- createReplacer(X86::MOV8rr, X86::KMOVBkk);
+ createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+ createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
+ createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacer(X86::NOT8r, X86::KNOTBrr);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
index 88366558562c..c425c37b4186 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -12,9 +12,10 @@
/// are encoded using the EVEX prefix and if possible replaces them by their
/// corresponding VEX encoding which is usually shorter by 2 bytes.
/// EVEX instructions may be encoded via the VEX prefix when the AVX-512
-/// instruction has a corresponding AVX/AVX2 opcode, when vector length
-/// accessed by instruction is less than 512 bits and when it does not use
-// the xmm or the mask registers or xmm/ymm registers with indexes higher than 15.
+/// instruction has a corresponding AVX/AVX2 opcode, when vector length
+/// accessed by instruction is less than 512 bits and when it does not use
+// the xmm or the mask registers or xmm/ymm registers with indexes higher
+// than 15.
/// The pass applies code reduction on the generated code for AVX-512 instrs.
//
//===----------------------------------------------------------------------===//
@@ -39,16 +40,16 @@ using namespace llvm;
// Including the generated EVEX2VEX tables.
struct X86EvexToVexCompressTableEntry {
- uint16_t EvexOpcode;
- uint16_t VexOpcode;
+ uint16_t EvexOpc;
+ uint16_t VexOpc;
bool operator<(const X86EvexToVexCompressTableEntry &RHS) const {
- return EvexOpcode < RHS.EvexOpcode;
+ return EvexOpc < RHS.EvexOpc;
}
friend bool operator<(const X86EvexToVexCompressTableEntry &TE,
unsigned Opc) {
- return TE.EvexOpcode < Opc;
+ return TE.EvexOpc < Opc;
}
};
#include "X86GenEVEX2VEXTables.inc"
@@ -61,16 +62,9 @@ struct X86EvexToVexCompressTableEntry {
namespace {
class EvexToVexInstPass : public MachineFunctionPass {
-
- /// For EVEX instructions that can be encoded using VEX encoding, replace
- /// them by the VEX encoding in order to reduce size.
- bool CompressEvexToVexImpl(MachineInstr &MI) const;
-
public:
static char ID;
-
- EvexToVexInstPass() : MachineFunctionPass(ID) { }
-
+ EvexToVexInstPass() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return EVEX2VEX_DESC; }
/// Loop over all of the basic blocks, replacing EVEX instructions
@@ -82,49 +76,23 @@ public:
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
-
-private:
- /// Machine instruction info used throughout the class.
- const X86InstrInfo *TII = nullptr;
-
- const X86Subtarget *ST = nullptr;
};
} // end anonymous namespace
char EvexToVexInstPass::ID = 0;
-bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
-
- ST = &MF.getSubtarget<X86Subtarget>();
- if (!ST->hasAVX512())
- return false;
-
- bool Changed = false;
-
- /// Go over all basic blocks in function and replace
- /// EVEX encoded instrs by VEX encoding when possible.
- for (MachineBasicBlock &MBB : MF) {
-
- // Traverse the basic block.
- for (MachineInstr &MI : MBB)
- Changed |= CompressEvexToVexImpl(MI);
- }
-
- return Changed;
-}
-
static bool usesExtendedRegister(const MachineInstr &MI) {
auto isHiRegIdx = [](unsigned Reg) {
// Check for XMM register with indexes between 16 - 31.
if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
return true;
-
// Check for YMM register with indexes between 16 - 31.
if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
return true;
-
+ // Check for GPR with indexes between 16 - 31.
+ if (X86II::isApxExtendedReg(Reg))
+ return true;
return false;
};
@@ -135,10 +103,8 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
continue;
Register Reg = MO.getReg();
-
- assert(!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31) &&
+ assert(!X86II::isZMMReg(Reg) &&
"ZMM instructions should not be in the EVEX->VEX tables");
-
if (isHiRegIdx(Reg))
return true;
}
@@ -146,21 +112,58 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
return false;
}
+static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) {
+ switch (EvexOpc) {
+ default:
+ return true;
+ case X86::VCVTNEPS2BF16Z128rm:
+ case X86::VCVTNEPS2BF16Z128rr:
+ case X86::VCVTNEPS2BF16Z256rm:
+ case X86::VCVTNEPS2BF16Z256rr:
+ return ST.hasAVXNECONVERT();
+ case X86::VPDPBUSDSZ128m:
+ case X86::VPDPBUSDSZ128r:
+ case X86::VPDPBUSDSZ256m:
+ case X86::VPDPBUSDSZ256r:
+ case X86::VPDPBUSDZ128m:
+ case X86::VPDPBUSDZ128r:
+ case X86::VPDPBUSDZ256m:
+ case X86::VPDPBUSDZ256r:
+ case X86::VPDPWSSDSZ128m:
+ case X86::VPDPWSSDSZ128r:
+ case X86::VPDPWSSDSZ256m:
+ case X86::VPDPWSSDSZ256r:
+ case X86::VPDPWSSDZ128m:
+ case X86::VPDPWSSDZ128r:
+ case X86::VPDPWSSDZ256m:
+ case X86::VPDPWSSDZ256r:
+ return ST.hasAVXVNNI();
+ case X86::VPMADD52HUQZ128m:
+ case X86::VPMADD52HUQZ128r:
+ case X86::VPMADD52HUQZ256m:
+ case X86::VPMADD52HUQZ256r:
+ case X86::VPMADD52LUQZ128m:
+ case X86::VPMADD52LUQZ128r:
+ case X86::VPMADD52LUQZ256m:
+ case X86::VPMADD52LUQZ256r:
+ return ST.hasAVXIFMA();
+ }
+}
+
// Do any custom cleanup needed to finalize the conversion.
-static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
- const X86Subtarget *ST) {
- (void)NewOpc;
+static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
+ (void)VexOpc;
unsigned Opc = MI.getOpcode();
switch (Opc) {
case X86::VALIGNDZ128rri:
case X86::VALIGNDZ128rmi:
case X86::VALIGNQZ128rri:
case X86::VALIGNQZ128rmi: {
- assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
+ assert((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) &&
"Unexpected new opcode!");
- unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
- Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
- MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
+ unsigned Scale =
+ (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
+ MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
Imm.setImm(Imm.getImm() * Scale);
break;
}
@@ -172,10 +175,10 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
case X86::VSHUFI32X4Z256rri:
case X86::VSHUFI64X2Z256rmi:
case X86::VSHUFI64X2Z256rri: {
- assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
- NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
+ assert((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr ||
+ VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) &&
"Unexpected new opcode!");
- MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
+ MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
int64_t ImmVal = Imm.getImm();
// Set bit 5, move bit 1 to bit 4, copy bit 0.
Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
@@ -208,10 +211,9 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
return true;
}
-
// For EVEX instructions that can be encoded using VEX encoding
// replace them by the VEX encoding in order to reduce size.
-bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
+static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
// VEX format.
// # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1
// [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM]
@@ -219,7 +221,6 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
// EVEX format.
// # of bytes: 4 1 1 1 4 / 1 1
// [Prefixes] EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate]
-
const MCInstrDesc &Desc = MI.getDesc();
// Check for EVEX instructions only.
@@ -237,6 +238,29 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
if (Desc.TSFlags & X86II::EVEX_L2)
return false;
+ // Use the VEX.L bit to select the 128 or 256-bit table.
+ ArrayRef<X86EvexToVexCompressTableEntry> Table =
+ (Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable)
+ : ArrayRef(X86EvexToVex128CompressTable);
+
+ unsigned EvexOpc = MI.getOpcode();
+ const auto *I = llvm::lower_bound(Table, EvexOpc);
+ if (I == Table.end() || I->EvexOpc != EvexOpc)
+ return false;
+
+ if (usesExtendedRegister(MI))
+ return false;
+ if (!checkVEXInstPredicate(EvexOpc, ST))
+ return false;
+ if (!performCustomAdjustments(MI, I->VexOpc))
+ return false;
+
+ MI.setDesc(ST.getInstrInfo()->get(I->VexOpc));
+ MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
+ return true;
+}
+
+bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
#ifndef NDEBUG
// Make sure the tables are sorted.
static std::atomic<bool> TableChecked(false);
@@ -248,30 +272,21 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
TableChecked.store(true, std::memory_order_relaxed);
}
#endif
-
- // Use the VEX.L bit to select the 128 or 256-bit table.
- ArrayRef<X86EvexToVexCompressTableEntry> Table =
- (Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable)
- : ArrayRef(X86EvexToVex128CompressTable);
-
- const auto *I = llvm::lower_bound(Table, MI.getOpcode());
- if (I == Table.end() || I->EvexOpcode != MI.getOpcode())
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ if (!ST.hasAVX512())
return false;
- unsigned NewOpc = I->VexOpcode;
-
- if (usesExtendedRegister(MI))
- return false;
-
- if (!CheckVEXInstPredicate(MI, ST))
- return false;
+ bool Changed = false;
- if (!performCustomAdjustments(MI, NewOpc, ST))
- return false;
+ /// Go over all basic blocks in function and replace
+ /// EVEX encoded instrs by VEX encoding when possible.
+ for (MachineBasicBlock &MBB : MF) {
+ // Traverse the basic block.
+ for (MachineInstr &MI : MBB)
+ Changed |= CompressEvexToVexImpl(MI, ST);
+ }
- MI.setDesc(TII->get(NewOpc));
- MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
- return true;
+ return Changed;
}
INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 085fa9280b0e..ecc7208e7607 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -264,6 +264,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
const DebugLoc &DL = MBBI->getDebugLoc();
+ bool HasEGPR = STI->hasEGPR();
switch (Opcode) {
default:
return false;
@@ -466,10 +467,14 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
- auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
- .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
- auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
- .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBLo =
+ BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
+ .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBHi =
+ BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
+ .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
for (int i = 0; i < X86::AddrNumOperands; ++i) {
MIBLo.add(MBBI->getOperand(1 + i));
@@ -500,8 +505,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
- auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
- auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
+ auto MIBLo = BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk));
+ auto MIBHi = BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk));
for (int i = 0; i < X86::AddrNumOperands; ++i) {
MIBLo.add(MBBI->getOperand(i));
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
index ff90b402b9b9..7f134fe1c72b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
@@ -711,7 +711,12 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
// Handle constant address.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
// Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Small)
+ if (TM.getCodeModel() != CodeModel::Small &&
+ TM.getCodeModel() != CodeModel::Medium)
+ return false;
+
+ // Can't handle large objects yet.
+ if (TM.isLargeGlobalValue(GV))
return false;
// Can't handle TLS yet.
@@ -1044,7 +1049,8 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
// Handle constant address.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
// Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Small)
+ if (TM.getCodeModel() != CodeModel::Small &&
+ TM.getCodeModel() != CodeModel::Medium)
return false;
// RIP-relative addresses can't have additional register operands.
@@ -2391,7 +2397,7 @@ bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
return false;
// TODO: We could sign extend narrower types.
- MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
return false;
@@ -3231,13 +3237,12 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (Subtarget->useIndirectThunkCalls())
return false;
- // Handle only C, fastcc, and webkit_js calling conventions for now.
+ // Handle only C and fastcc calling conventions for now.
switch (CC) {
default: return false;
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Tail:
- case CallingConv::WebKit_JS:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::X86_FastCall:
@@ -3284,9 +3289,9 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (auto *CI = dyn_cast<ConstantInt>(Val)) {
if (CI->getBitWidth() < 32) {
if (Flags.isSExt())
- Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext()));
+ Val = ConstantInt::get(CI->getContext(), CI->getValue().sext(32));
else
- Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext()));
+ Val = ConstantInt::get(CI->getContext(), CI->getValue().zext(32));
}
}
@@ -3519,6 +3524,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
assert(GV && "Not a direct call");
// See if we need any target-specific flags on the GV operand.
unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
+ if (OpFlags == X86II::MO_PLT && !Is64Bit &&
+ TM.getRelocationModel() == Reloc::Static && isa<Function>(GV) &&
+ cast<Function>(GV)->isIntrinsic())
+ OpFlags = X86II::MO_NO_FLAG;
// This will be a direct call, or an indirect call through memory for
// NonLazyBind calls or dllimport calls.
@@ -3765,7 +3774,8 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
// Can't handle alternate code models yet.
CodeModel::Model CM = TM.getCodeModel();
- if (CM != CodeModel::Small && CM != CodeModel::Large)
+ if (CM != CodeModel::Small && CM != CodeModel::Medium &&
+ CM != CodeModel::Large)
return 0;
// Get opcode and regclass of the output for the given load instruction.
@@ -3803,7 +3813,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
else if (OpFlag == X86II::MO_GOTOFF)
PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
+ else if (Subtarget->is64Bit() && TM.getCodeModel() != CodeModel::Large)
PICBase = X86::RIP;
// Create the load from the constant pool.
@@ -3833,8 +3843,11 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
}
unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
- // Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Small)
+ // Can't handle large GlobalValues yet.
+ if (TM.getCodeModel() != CodeModel::Small &&
+ TM.getCodeModel() != CodeModel::Medium)
+ return 0;
+ if (TM.isLargeGlobalValue(GV))
return 0;
// Materialize addresses with LEA/MOV instructions.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
index 69b2adcd5f9e..ea942445a181 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
@@ -20,7 +20,6 @@
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index 5980e4572e7a..bf8588ad6dee 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -50,7 +50,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -79,11 +79,10 @@ class FixupBWInstPass : public MachineFunctionPass {
/// byte or word instructions with better alternatives.
void processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
- /// This sets the \p SuperDestReg to the 32 bit super reg of the original
- /// destination register of the MachineInstr passed in. It returns true if
- /// that super register is dead just prior to \p OrigMI, and false if not.
- bool getSuperRegDestIfDead(MachineInstr *OrigMI,
- Register &SuperDestReg) const;
+ /// This returns the 32 bit super reg of the original destination register of
+ /// the MachineInstr passed in, if that super register is dead just prior to
+ /// \p OrigMI. Otherwise it returns Register().
+ Register getSuperRegDestIfDead(MachineInstr *OrigMI) const;
/// Change the MachineInstr \p MI into the equivalent extending load to 32 bit
/// register if it is safe to do so. Return the replacement instruction if
@@ -146,7 +145,7 @@ private:
MachineLoopInfo *MLI = nullptr;
/// Register Liveness information after the current instruction.
- LivePhysRegs LiveRegs;
+ LiveRegUnits LiveUnits;
ProfileSummaryInfo *PSI = nullptr;
MachineBlockFrequencyInfo *MBFI = nullptr;
@@ -170,7 +169,7 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
- LiveRegs.init(TII->getRegisterInfo());
+ LiveUnits.init(TII->getRegisterInfo());
LLVM_DEBUG(dbgs() << "Start X86FixupBWInsts\n";);
@@ -188,11 +187,10 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
/// destination register.
///
/// If so, return that super register in \p SuperDestReg.
-bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
- Register &SuperDestReg) const {
+Register FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI) const {
const X86RegisterInfo *TRI = &TII->getRegisterInfo();
Register OrigDestReg = OrigMI->getOperand(0).getReg();
- SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32);
+ Register SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32);
assert(SuperDestReg.isValid() && "Invalid Operand");
const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg);
@@ -202,24 +200,23 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
// If it isn't, then the register isn't really dead even if the
// super-register is considered dead.
if (SubRegIdx == X86::sub_8bit_hi)
- return false;
-
- // If neither the destination-super register nor any applicable subregisters
- // are live after this instruction, then the super register is safe to use.
- if (!LiveRegs.contains(SuperDestReg)) {
- // If the original destination register was not the low 8-bit subregister
- // then the super register check is sufficient.
- if (SubRegIdx != X86::sub_8bit)
- return true;
- // If the original destination register was the low 8-bit subregister and
- // we also need to check the 16-bit subregister and the high 8-bit
- // subregister.
- MCRegister HighReg = getX86SubSuperRegister(SuperDestReg, 8, /*High=*/true);
- if (!LiveRegs.contains(getX86SubSuperRegister(OrigDestReg, 16)) &&
- (!HighReg.isValid() || !LiveRegs.contains(HighReg)))
- return true;
- // Otherwise, we have a little more checking to do.
+ return Register();
+
+ // Test all regunits of the super register that are not part of the
+ // sub register. If none of them are live then the super register is safe to
+ // use.
+ bool SuperIsLive = false;
+ auto Range = TRI->regunits(OrigDestReg);
+ MCRegUnitIterator I = Range.begin(), E = Range.end();
+ for (MCRegUnit S : TRI->regunits(SuperDestReg)) {
+ I = std::lower_bound(I, E, S);
+ if ((I == E || *I > S) && LiveUnits.getBitVector().test(S)) {
+ SuperIsLive = true;
+ break;
+ }
}
+ if (!SuperIsLive)
+ return SuperDestReg;
// If we get here, the super-register destination (or some part of it) is
// marked as live after the original instruction.
@@ -257,7 +254,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
// same properties.
if (Opc != X86::MOV8rm && Opc != X86::MOV16rm && Opc != X86::MOV8rr &&
Opc != X86::MOV16rr)
- return false;
+ return Register();
bool IsDefined = false;
for (auto &MO: OrigMI->implicit_operands()) {
@@ -275,26 +272,25 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
// %eax, or %rax will prevent us from using the %eax register.
if (MO.isUse() && !TRI->isSubRegisterEq(OrigDestReg, MO.getReg()) &&
TRI->regsOverlap(SuperDestReg, MO.getReg()))
- return false;
+ return Register();
}
// Reg is not Imp-def'ed -> it's live both before/after the instruction.
if (!IsDefined)
- return false;
+ return Register();
// Otherwise, the Reg is not live before the MI and the MOV can't
// make it really live, so it's in fact dead even after the MI.
- return true;
+ return SuperDestReg;
}
MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
MachineInstr *MI) const {
- Register NewDestReg;
-
// We are going to try to rewrite this load to a larger zero-extending
// load. This is safe if all portions of the 32 bit super-register
// of the original destination register, except for the original destination
// register are dead. getSuperRegDestIfDead checks that.
- if (!getSuperRegDestIfDead(MI, NewDestReg))
+ Register NewDestReg = getSuperRegDestIfDead(MI);
+ if (!NewDestReg)
return nullptr;
// Safe to change the instruction.
@@ -323,8 +319,8 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
auto &OldDest = MI->getOperand(0);
auto &OldSrc = MI->getOperand(1);
- Register NewDestReg;
- if (!getSuperRegDestIfDead(MI, NewDestReg))
+ Register NewDestReg = getSuperRegDestIfDead(MI);
+ if (!NewDestReg)
return nullptr;
Register NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32);
@@ -357,8 +353,8 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
MachineInstr *FixupBWInstPass::tryReplaceExtend(unsigned New32BitOpcode,
MachineInstr *MI) const {
- Register NewDestReg;
- if (!getSuperRegDestIfDead(MI, NewDestReg))
+ Register NewDestReg = getSuperRegDestIfDead(MI);
+ if (!NewDestReg)
return nullptr;
// Don't interfere with formation of CBW instructions which should be a
@@ -452,9 +448,9 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
// Start computing liveness for this block. We iterate from the end to be able
// to update this for each instruction.
- LiveRegs.clear();
+ LiveUnits.clear();
// We run after PEI, so we need to AddPristinesAndCSRs.
- LiveRegs.addLiveOuts(MBB);
+ LiveUnits.addLiveOuts(MBB);
OptForSize = MF.getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
@@ -464,7 +460,7 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
MIReplacements.push_back(std::make_pair(&MI, NewMI));
// We're done with this instruction, update liveness for the next one.
- LiveRegs.stepBackward(MI);
+ LiveUnits.stepBackward(MI);
}
while (!MIReplacements.empty()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
index c702c015d7b3..beeebf42dfe8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -330,8 +330,8 @@ static inline bool isInefficientLEAReg(unsigned Reg) {
Reg == X86::R13D || Reg == X86::R13;
}
-/// Returns true if this LEA uses base an index registers, and the base register
-/// is known to be inefficient for the subtarget.
+/// Returns true if this LEA uses base and index registers, and the base
+/// register is known to be inefficient for the subtarget.
// TODO: use a variant scheduling class to model the latency profile
// of LEA instructions, and implement this logic as a scheduling predicate.
static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
@@ -341,7 +341,8 @@ static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
}
static inline bool hasLEAOffset(const MachineOperand &Offset) {
- return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
+ return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal() ||
+ Offset.isBlockAddress();
}
static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
index 94e221fd877c..483becebbe10 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
@@ -9,7 +9,7 @@
// This file examines all full size vector constant pool loads and attempts to
// replace them with smaller constant pool entries, including:
// * Converting AVX512 memory-fold instructions to their broadcast-fold form
-// * TODO: Broadcasting of full width loads.
+// * Broadcasting of full width loads.
// * TODO: Sign/Zero extension of full width loads.
//
//===----------------------------------------------------------------------===//
@@ -190,12 +190,13 @@ static Constant *rebuildSplatableConstant(const Constant *C,
Type *SclTy = OriginalType->getScalarType();
unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
+ LLVMContext &Ctx = OriginalType->getContext();
if (NumSclBits == 8) {
SmallVector<uint8_t> RawBits;
for (unsigned I = 0; I != SplatBitWidth; I += 8)
RawBits.push_back(Splat->extractBits(8, I).getZExtValue());
- return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ return ConstantDataVector::get(Ctx, RawBits);
}
if (NumSclBits == 16) {
@@ -204,7 +205,7 @@ static Constant *rebuildSplatableConstant(const Constant *C,
RawBits.push_back(Splat->extractBits(16, I).getZExtValue());
if (SclTy->is16bitFPTy())
return ConstantDataVector::getFP(SclTy, RawBits);
- return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ return ConstantDataVector::get(Ctx, RawBits);
}
if (NumSclBits == 32) {
@@ -213,7 +214,7 @@ static Constant *rebuildSplatableConstant(const Constant *C,
RawBits.push_back(Splat->extractBits(32, I).getZExtValue());
if (SclTy->isFloatTy())
return ConstantDataVector::getFP(SclTy, RawBits);
- return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ return ConstantDataVector::get(Ctx, RawBits);
}
// Fallback to i64 / double.
@@ -222,7 +223,7 @@ static Constant *rebuildSplatableConstant(const Constant *C,
RawBits.push_back(Splat->extractBits(64, I).getZExtValue());
if (SclTy->isDoubleTy())
return ConstantDataVector::getFP(SclTy, RawBits);
- return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ return ConstantDataVector::get(Ctx, RawBits);
}
bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
@@ -230,8 +231,10 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
+ bool HasAVX2 = ST->hasAVX2();
bool HasDQI = ST->hasDQI();
bool HasBWI = ST->hasBWI();
+ bool HasVLX = ST->hasVLX();
auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128,
unsigned OpBcst64, unsigned OpBcst32,
@@ -283,7 +286,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
case X86::VMOVAPSYrm:
case X86::VMOVUPDYrm:
case X86::VMOVUPSYrm:
- return ConvertToBroadcast(0, X86::VBROADCASTF128, X86::VBROADCASTSDYrm,
+ return ConvertToBroadcast(0, X86::VBROADCASTF128rm, X86::VBROADCASTSDYrm,
X86::VBROADCASTSSYrm, 0, 0, 1);
case X86::VMOVAPDZ128rm:
case X86::VMOVAPSZ128rm:
@@ -295,33 +298,32 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
case X86::VMOVAPSZ256rm:
case X86::VMOVUPDZ256rm:
case X86::VMOVUPSZ256rm:
- return ConvertToBroadcast(
- 0, HasDQI ? X86::VBROADCASTF64X2Z128rm : X86::VBROADCASTF32X4Z256rm,
- X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm, 0, 0, 1);
+ return ConvertToBroadcast(0, X86::VBROADCASTF32X4Z256rm,
+ X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm,
+ 0, 0, 1);
case X86::VMOVAPDZrm:
case X86::VMOVAPSZrm:
case X86::VMOVUPDZrm:
case X86::VMOVUPSZrm:
- return ConvertToBroadcast(
- HasDQI ? X86::VBROADCASTF32X8rm : X86::VBROADCASTF64X4rm,
- HasDQI ? X86::VBROADCASTF64X2rm : X86::VBROADCASTF32X4rm,
- X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0, 1);
+ return ConvertToBroadcast(X86::VBROADCASTF64X4rm, X86::VBROADCASTF32X4rm,
+ X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0,
+ 1);
/* Integer Loads */
case X86::VMOVDQArm:
case X86::VMOVDQUrm:
- if (ST->hasAVX2())
- return ConvertToBroadcast(0, 0, X86::VPBROADCASTQrm, X86::VPBROADCASTDrm,
- X86::VPBROADCASTWrm, X86::VPBROADCASTBrm, 1);
- return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
- 1);
+ return ConvertToBroadcast(
+ 0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm,
+ HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm,
+ HasAVX2 ? X86::VPBROADCASTWrm : 0, HasAVX2 ? X86::VPBROADCASTBrm : 0,
+ 1);
case X86::VMOVDQAYrm:
case X86::VMOVDQUYrm:
- if (ST->hasAVX2())
- return ConvertToBroadcast(0, X86::VBROADCASTI128, X86::VPBROADCASTQYrm,
- X86::VPBROADCASTDYrm, X86::VPBROADCASTWYrm,
- X86::VPBROADCASTBYrm, 1);
- return ConvertToBroadcast(0, X86::VBROADCASTF128, X86::VBROADCASTSDYrm,
- X86::VBROADCASTSSYrm, 0, 0, 1);
+ return ConvertToBroadcast(
+ 0, HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm,
+ HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm,
+ HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm,
+ HasAVX2 ? X86::VPBROADCASTWYrm : 0, HasAVX2 ? X86::VPBROADCASTBYrm : 0,
+ 1);
case X86::VMOVDQA32Z128rm:
case X86::VMOVDQA64Z128rm:
case X86::VMOVDQU32Z128rm:
@@ -334,37 +336,36 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
case X86::VMOVDQA64Z256rm:
case X86::VMOVDQU32Z256rm:
case X86::VMOVDQU64Z256rm:
- return ConvertToBroadcast(
- 0, HasDQI ? X86::VBROADCASTI64X2Z128rm : X86::VBROADCASTI32X4Z256rm,
- X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm,
- HasBWI ? X86::VPBROADCASTWZ256rm : 0,
- HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1);
+ return ConvertToBroadcast(0, X86::VBROADCASTI32X4Z256rm,
+ X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm,
+ HasBWI ? X86::VPBROADCASTWZ256rm : 0,
+ HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1);
case X86::VMOVDQA32Zrm:
case X86::VMOVDQA64Zrm:
case X86::VMOVDQU32Zrm:
case X86::VMOVDQU64Zrm:
- return ConvertToBroadcast(
- HasDQI ? X86::VBROADCASTI32X8rm : X86::VBROADCASTI64X4rm,
- HasDQI ? X86::VBROADCASTI64X2rm : X86::VBROADCASTI32X4rm,
- X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
- HasBWI ? X86::VPBROADCASTWZrm : 0, HasBWI ? X86::VPBROADCASTBZrm : 0,
- 1);
+ return ConvertToBroadcast(X86::VBROADCASTI64X4rm, X86::VBROADCASTI32X4rm,
+ X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
+ HasBWI ? X86::VPBROADCASTWZrm : 0,
+ HasBWI ? X86::VPBROADCASTBZrm : 0, 1);
}
- // Attempt to find a AVX512 mapping from a full width memory-fold instruction
- // to a broadcast-fold instruction variant.
- if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX) {
+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
unsigned OpBcst32 = 0, OpBcst64 = 0;
unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
- if (const X86MemoryFoldTableEntry *Mem2Bcst =
- llvm::lookupBroadcastFoldTable(Opc, 32)) {
- OpBcst32 = Mem2Bcst->DstOp;
- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
+ if (OpSrc32) {
+ if (const X86FoldTableEntry *Mem2Bcst =
+ llvm::lookupBroadcastFoldTable(OpSrc32, 32)) {
+ OpBcst32 = Mem2Bcst->DstOp;
+ OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
+ }
}
- if (const X86MemoryFoldTableEntry *Mem2Bcst =
- llvm::lookupBroadcastFoldTable(Opc, 64)) {
- OpBcst64 = Mem2Bcst->DstOp;
- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
+ if (OpSrc64) {
+ if (const X86FoldTableEntry *Mem2Bcst =
+ llvm::lookupBroadcastFoldTable(OpSrc64, 64)) {
+ OpBcst64 = Mem2Bcst->DstOp;
+ OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
+ }
}
assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
"OperandNo mismatch");
@@ -373,6 +374,70 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo);
}
+ return false;
+ };
+
+ // Attempt to find a AVX512 mapping from a full width memory-fold instruction
+ // to a broadcast-fold instruction variant.
+ if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX)
+ return ConvertToBroadcastAVX512(Opc, Opc);
+
+ // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
+ // conversion to see if we can convert to a broadcasted (integer) logic op.
+ if (HasVLX && !HasDQI) {
+ unsigned OpSrc32 = 0, OpSrc64 = 0;
+ switch (Opc) {
+ case X86::VANDPDrm:
+ case X86::VANDPSrm:
+ case X86::VPANDrm:
+ OpSrc32 = X86 ::VPANDDZ128rm;
+ OpSrc64 = X86 ::VPANDQZ128rm;
+ break;
+ case X86::VANDPDYrm:
+ case X86::VANDPSYrm:
+ case X86::VPANDYrm:
+ OpSrc32 = X86 ::VPANDDZ256rm;
+ OpSrc64 = X86 ::VPANDQZ256rm;
+ break;
+ case X86::VANDNPDrm:
+ case X86::VANDNPSrm:
+ case X86::VPANDNrm:
+ OpSrc32 = X86 ::VPANDNDZ128rm;
+ OpSrc64 = X86 ::VPANDNQZ128rm;
+ break;
+ case X86::VANDNPDYrm:
+ case X86::VANDNPSYrm:
+ case X86::VPANDNYrm:
+ OpSrc32 = X86 ::VPANDNDZ256rm;
+ OpSrc64 = X86 ::VPANDNQZ256rm;
+ break;
+ case X86::VORPDrm:
+ case X86::VORPSrm:
+ case X86::VPORrm:
+ OpSrc32 = X86 ::VPORDZ128rm;
+ OpSrc64 = X86 ::VPORQZ128rm;
+ break;
+ case X86::VORPDYrm:
+ case X86::VORPSYrm:
+ case X86::VPORYrm:
+ OpSrc32 = X86 ::VPORDZ256rm;
+ OpSrc64 = X86 ::VPORQZ256rm;
+ break;
+ case X86::VXORPDrm:
+ case X86::VXORPSrm:
+ case X86::VPXORrm:
+ OpSrc32 = X86 ::VPXORDZ128rm;
+ OpSrc64 = X86 ::VPXORQZ128rm;
+ break;
+ case X86::VXORPDYrm:
+ case X86::VXORPSYrm:
+ case X86::VPXORYrm:
+ OpSrc32 = X86 ::VPXORDZ256rm;
+ OpSrc64 = X86 ::VPXORQZ256rm;
+ break;
+ }
+ if (OpSrc32 || OpSrc64)
+ return ConvertToBroadcastAVX512(OpSrc32, OpSrc64);
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index f24dbcfe972d..b13bf361ab79 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -24,15 +24,11 @@
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -156,8 +152,6 @@ namespace {
/// dispatch with specific functionality.
enum class FlagArithMnemonic {
ADC,
- ADCX,
- ADOX,
RCL,
RCR,
SBB,
@@ -221,18 +215,6 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
#undef LLVM_EXPAND_INSTR_SIZES
- case X86::ADCX32rr:
- case X86::ADCX64rr:
- case X86::ADCX32rm:
- case X86::ADCX64rm:
- return FlagArithMnemonic::ADCX;
-
- case X86::ADOX32rr:
- case X86::ADOX64rr:
- case X86::ADOX32rm:
- case X86::ADOX64rm:
- return FlagArithMnemonic::ADOX;
-
case X86::SETB_C32r:
case X86::SETB_C64r:
return FlagArithMnemonic::SETB;
@@ -802,7 +784,6 @@ void X86FlagsCopyLoweringPass::rewriteArithmetic(
switch (getMnemonicFromOpcode(MI.getOpcode())) {
case FlagArithMnemonic::ADC:
- case FlagArithMnemonic::ADCX:
case FlagArithMnemonic::RCL:
case FlagArithMnemonic::RCR:
case FlagArithMnemonic::SBB:
@@ -812,13 +793,6 @@ void X86FlagsCopyLoweringPass::rewriteArithmetic(
// having a higher bit available.
Addend = 255;
break;
-
- case FlagArithMnemonic::ADOX:
- Cond = X86::COND_O; // OF == 1
- // Set up an addend that when one is added will turn from positive to
- // negative and thus overflow in the signed domain.
- Addend = 127;
- break;
}
// Now get a register that contains the value of the flag input to the
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 7513b198e604..aab2535aa86d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -26,7 +26,6 @@
#include "X86InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -1598,8 +1597,9 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
unsigned Flags = MI.getOperand(i).getImm();
+ const InlineAsm::Flag F(Flags);
- NumOps = InlineAsm::getNumOperandRegisters(Flags);
+ NumOps = F.getNumOperandRegisters();
if (NumOps != 1)
continue;
const MachineOperand &MO = MI.getOperand(i + 1);
@@ -1611,20 +1611,20 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
// If the flag has a register class constraint, this must be an operand
// with constraint "f". Record its index and continue.
- if (InlineAsm::hasRegClassConstraint(Flags, RCID)) {
+ if (F.hasRegClassConstraint(RCID)) {
FRegIdx.insert(i + 1);
continue;
}
- switch (InlineAsm::getKind(Flags)) {
- case InlineAsm::Kind_RegUse:
+ switch (F.getKind()) {
+ case InlineAsm::Kind::RegUse:
STUses |= (1u << STReg);
break;
- case InlineAsm::Kind_RegDef:
- case InlineAsm::Kind_RegDefEarlyClobber:
+ case InlineAsm::Kind::RegDef:
+ case InlineAsm::Kind::RegDefEarlyClobber:
STDefs |= (1u << STReg);
break;
- case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind::Clobber:
STClobbers |= (1u << STReg);
break;
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
index a5a4f91299f3..c0d358ead278 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -17,7 +17,6 @@
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -42,6 +41,7 @@
STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
STATISTIC(NumFrameExtraProbe,
"Number of extra stack probes generated in prologue");
+STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
using namespace llvm;
@@ -69,8 +69,8 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
/// call frame pseudos can be simplified. Having a FP, as in the default
/// implementation, is not sufficient here since we can't always use it.
/// Use a more nuanced condition.
-bool
-X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+bool X86FrameLowering::canSimplifyCallFramePseudos(
+ const MachineFunction &MF) const {
return hasReservedCallFrame(MF) ||
MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
(hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
@@ -84,8 +84,8 @@ X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
// that were not simplified earlier.
// So, this is required for x86 functions that have push sequences even
// when there are no stack objects.
-bool
-X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
+bool X86FrameLowering::needsFrameIndexResolution(
+ const MachineFunction &MF) const {
return MF.getFrameInfo().hasStackObjects() ||
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
}
@@ -140,6 +140,38 @@ static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
return X86::MOV32ri;
}
+// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
+// value written by the PUSH from the stack. The processor tracks these marked
+// instructions internally and fast-forwards register data between matching PUSH
+// and POP instructions, without going through memory or through the training
+// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
+// memory-renaming optimization can be used.
+//
+// The PPX hint is purely a performance hint. Instructions with this hint have
+// the same functional semantics as those without. PPX hints set by the
+// compiler that violate the balancing rule may turn off the PPX optimization,
+// but they will not affect program semantics.
+//
+// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
+// are not considered).
+//
+// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
+// GPRs at a time to/from the stack.
+static unsigned getPUSHOpcode(const X86Subtarget &ST) {
+ return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
+ : X86::PUSH32r;
+}
+static unsigned getPOPOpcode(const X86Subtarget &ST) {
+ return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
+ : X86::POP32r;
+}
+static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
+ return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
+}
+static unsigned getPOP2Opcode(const X86Subtarget &ST) {
+ return ST.hasPPX() ? X86::POP2P : X86::POP2;
+}
+
static bool isEAXLiveIn(MachineBasicBlock &MBB) {
for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
unsigned Reg = RegMask.PhysReg;
@@ -195,8 +227,8 @@ flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
/// stack pointer by a constant value.
void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const DebugLoc &DL,
- int64_t NumBytes, bool InEpilogue) const {
+ const DebugLoc &DL, int64_t NumBytes,
+ bool InEpilogue) const {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
MachineInstr::MIFlag Flag =
@@ -280,13 +312,11 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
if (ThisVal == SlotSize) {
// Use push / pop for slot sized adjustments as a size optimization. We
// need to find a dead register when using pop.
- unsigned Reg = isSub
- ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
- : TRI->findDeadCallerSavedReg(MBB, MBBI);
+ unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+ : TRI->findDeadCallerSavedReg(MBB, MBBI);
if (Reg) {
- unsigned Opc = isSub
- ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
- : (Is64Bit ? X86::POP64r : X86::POP32r);
+ unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64Bit ? X86::POP64r : X86::POP32r);
BuildMI(MBB, MBBI, DL, TII.get(Opc))
.addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
.setMIFlag(Flag);
@@ -562,49 +592,13 @@ void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
RegsToZero.reset(Reg);
}
+ // Zero out the GPRs first.
for (MCRegister Reg : GPRsToZero.set_bits())
- BuildMI(MBB, MBBI, DL, TII.get(X86::XOR32rr), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
-
- // Zero out registers.
- for (MCRegister Reg : RegsToZero.set_bits()) {
- if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
- // FIXME: Ignore MMX registers?
- continue;
+ TII.buildClearRegister(Reg, MBB, MBBI, DL);
- unsigned XorOp;
- if (X86::VR128RegClass.contains(Reg)) {
- // XMM#
- if (!ST.hasSSE1())
- continue;
- XorOp = X86::PXORrr;
- } else if (X86::VR256RegClass.contains(Reg)) {
- // YMM#
- if (!ST.hasAVX())
- continue;
- XorOp = X86::VPXORrr;
- } else if (X86::VR512RegClass.contains(Reg)) {
- // ZMM#
- if (!ST.hasAVX512())
- continue;
- XorOp = X86::VPXORYrr;
- } else if (X86::VK1RegClass.contains(Reg) ||
- X86::VK2RegClass.contains(Reg) ||
- X86::VK4RegClass.contains(Reg) ||
- X86::VK8RegClass.contains(Reg) ||
- X86::VK16RegClass.contains(Reg)) {
- if (!ST.hasVLX())
- continue;
- XorOp = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
- } else {
- continue;
- }
-
- BuildMI(MBB, MBBI, DL, TII.get(XorOp), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
- }
+ // Zero out the remaining registers.
+ for (MCRegister Reg : RegsToZero.set_bits())
+ TII.buildClearRegister(Reg, MBB, MBBI, DL);
}
void X86FrameLowering::emitStackProbe(
@@ -959,24 +953,16 @@ void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
// registers. For the prolog expansion we use RAX, RCX and RDX.
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterClass *RegClass = &X86::GR64RegClass;
- const Register SizeReg = InProlog ? X86::RAX
- : MRI.createVirtualRegister(RegClass),
- ZeroReg = InProlog ? X86::RCX
- : MRI.createVirtualRegister(RegClass),
- CopyReg = InProlog ? X86::RDX
- : MRI.createVirtualRegister(RegClass),
- TestReg = InProlog ? X86::RDX
- : MRI.createVirtualRegister(RegClass),
- FinalReg = InProlog ? X86::RDX
- : MRI.createVirtualRegister(RegClass),
- RoundedReg = InProlog ? X86::RDX
- : MRI.createVirtualRegister(RegClass),
- LimitReg = InProlog ? X86::RCX
- : MRI.createVirtualRegister(RegClass),
- JoinReg = InProlog ? X86::RCX
- : MRI.createVirtualRegister(RegClass),
- ProbeReg = InProlog ? X86::RCX
- : MRI.createVirtualRegister(RegClass);
+ const Register
+ SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
+ ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
+ CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
+ TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
+ FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
+ RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
+ LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
+ JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
+ ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
// SP-relative offsets where we can save RCX and RDX.
int64_t RCXShadowSlot = 0;
@@ -1048,7 +1034,9 @@ void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
.addReg(X86::GS);
BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
// Jump if the desired stack pointer is at or above the stack limit.
- BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
+ BuildMI(&MBB, DL, TII.get(X86::JCC_1))
+ .addMBB(ContinueMBB)
+ .addImm(X86::COND_AE);
// Add code to roundMBB to round the final stack pointer to a page boundary.
RoundMBB->addLiveIn(FinalReg);
@@ -1085,7 +1073,9 @@ void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
.addReg(RoundedReg)
.addReg(ProbeReg);
- BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
+ BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(LoopMBB)
+ .addImm(X86::COND_NE);
MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
@@ -1169,7 +1159,7 @@ void X86FrameLowering::emitStackProbeCall(
CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
} else {
CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
- .addExternalSymbol(MF.createExternalSymbolName(Symbol));
+ .addExternalSymbol(MF.createExternalSymbolName(Symbol));
}
unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
@@ -1231,7 +1221,8 @@ static unsigned calculateSetFPREG(uint64_t SPAdjust) {
// info, we need to know the ABI stack alignment as well in case we
// have a call out. Otherwise just make sure we have some alignment - we'll
// go with the minimum SlotSize.
-uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
+uint64_t
+X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
Align StackAlign = getStackAlign();
@@ -1322,8 +1313,7 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
// Loop entry block
{
- const unsigned SUBOpc =
- getSUBriOpcode(Uses64BitFramePtr);
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
@@ -1353,8 +1343,7 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
.addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
- const unsigned SUBOpc =
- getSUBriOpcode(Uses64BitFramePtr);
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
@@ -1405,7 +1394,7 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
}
}
-bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
+bool X86FrameLowering::has128ByteRedZone(const MachineFunction &MF) const {
// x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
// clobbered by any interrupt handler.
assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
@@ -1521,7 +1510,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
- uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
+ uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
bool IsFunclet = MBB.isEHFuncletEntry();
EHPersonality Personality = EHPersonality::Unknown;
if (Fn.hasPersonalityFn())
@@ -1539,8 +1528,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
bool NeedsDwarfCFI = needsDwarfCFI(MF);
Register FramePtr = TRI->getFrameRegister(MF);
const Register MachineFramePtr =
- STI.isTarget64BitILP32()
- ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
+ STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
+ : FramePtr;
Register BasePtr = TRI->getBaseRegister();
bool HasWinCFI = false;
@@ -1575,7 +1564,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
}
BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
int64_t Offset = -(int64_t)SlotSize;
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm))
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
.addReg(ArgBaseReg)
.addImm(1)
.addReg(X86::NoRegister)
@@ -1587,7 +1576,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Space reserved for stack-based arguments when making a (ABI-guaranteed)
// tail call.
unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
- if (TailCallArgReserveSize && IsWin64Prologue)
+ if (TailCallArgReserveSize && IsWin64Prologue)
report_fatal_error("Can't handle guaranteed tail call under win64 yet");
const bool EmitStackProbeCall =
@@ -1659,7 +1648,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize =
X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta();
- if (HasFP) MinSize += SlotSize;
+ if (HasFP)
+ MinSize += SlotSize;
X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
MFI.setStackSize(StackSize);
@@ -1714,17 +1704,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
- NumBytes = FrameSize -
- (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
+ NumBytes =
+ FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
// Callee-saved registers are pushed on stack before the stack is realigned.
if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
NumBytes = alignTo(NumBytes, MaxAlign);
// Save EBP/RBP into the appropriate stack slot.
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
- .addReg(MachineFramePtr, RegState::Kill)
- .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
+ .addReg(MachineFramePtr, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
// Mark the place where EBP/RBP was saved.
@@ -1839,8 +1830,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
}
} else {
assert(!IsFunclet && "funclets without FPs not yet implemented");
- NumBytes = StackSize -
- (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
+ NumBytes =
+ StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
}
// Update the offset adjustment, which is mainly used by codeview to translate
@@ -1861,19 +1852,30 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Skip the callee-saved push instructions.
bool PushedRegs = false;
int StackOffset = 2 * stackGrowth;
+ MachineBasicBlock::const_iterator LastCSPush = MBBI;
+ auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
+ if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
+ return false;
+ unsigned Opc = MBBI->getOpcode();
+ return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
+ Opc == X86::PUSH2 || Opc == X86::PUSH2P;
+ };
- while (MBBI != MBB.end() &&
- MBBI->getFlag(MachineInstr::FrameSetup) &&
- (MBBI->getOpcode() == X86::PUSH32r ||
- MBBI->getOpcode() == X86::PUSH64r)) {
+ while (IsCSPush(MBBI)) {
PushedRegs = true;
Register Reg = MBBI->getOperand(0).getReg();
+ LastCSPush = MBBI;
++MBBI;
+ unsigned Opc = LastCSPush->getOpcode();
if (!HasFP && NeedsDwarfCFI) {
// Mark callee-saved push instruction.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
+ // Compared to push, push2 introduces more stack offset (one more
+ // register).
+ if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
+ StackOffset += stackGrowth;
BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset),
MachineInstr::FrameSetup);
@@ -1885,6 +1887,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
.addImm(Reg)
.setMIFlag(MachineInstr::FrameSetup);
+ if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
+ .addImm(LastCSPush->getOperand(1).getReg())
+ .setMIFlag(MachineInstr::FrameSetup);
}
}
@@ -1933,13 +1939,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (Is64Bit) {
// Save RAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
- .addReg(X86::RAX, RegState::Kill)
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(X86::RAX, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
} else {
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
- .addReg(X86::EAX, RegState::Kill)
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(X86::EAX, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
}
}
@@ -2122,16 +2128,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Update the base pointer with the current stack pointer.
unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
- .addReg(SPOrEstablisher)
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(SPOrEstablisher)
+ .setMIFlag(MachineInstr::FrameSetup);
if (X86FI->getRestoreBasePointer()) {
// Stash value of base pointer. Saving RSP instead of EBP shortens
// dependence chain. Used by SjLj EH.
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
- addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
- FramePtr, true, X86FI->getRestoreBasePointerOffset())
- .addReg(SPOrEstablisher)
- .setMIFlag(MachineInstr::FrameSetup);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
+ X86FI->getRestoreBasePointerOffset())
+ .addReg(SPOrEstablisher)
+ .setMIFlag(MachineInstr::FrameSetup);
}
if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
@@ -2244,9 +2250,9 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
// This is the size of the pushed CSRs.
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
// This is the size of callee saved XMMs.
- const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
- unsigned XMMSize = WinEHXMMSlotInfo.size() *
- TRI->getSpillSize(X86::VR128RegClass);
+ const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
+ unsigned XMMSize =
+ WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
// This is the amount of stack a funclet needs to allocate.
unsigned UsedSize;
EHPersonality Personality =
@@ -2270,10 +2276,9 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
}
static bool isTailCallOpcode(unsigned Opc) {
- return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
- Opc == X86::TCRETURNmi ||
- Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 ||
- Opc == X86::TCRETURNmi64;
+ return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
+ Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
+ Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
}
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
@@ -2359,18 +2364,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (X86FI->hasSwiftAsyncContext()) {
// Discard the context.
int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
- emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
+ emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
}
// Pop EBP.
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())),
MachineFramePtr)
.setMIFlag(MachineInstr::FrameDestroy);
// We need to reset FP to its untagged state on return. Bit 60 is currently
// used to show the presence of an extended frame.
if (X86FI->hasSwiftAsyncContext()) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8),
- MachineFramePtr)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
.addUse(MachineFramePtr)
.addImm(60)
.setMIFlag(MachineInstr::FrameDestroy);
@@ -2403,10 +2408,10 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned Opc = PI->getOpcode();
if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
- if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
- (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
- (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
- (Opc != X86::ADD64ri32 || !PI->getFlag(MachineInstr::FrameDestroy)))
+ if (!PI->getFlag(MachineInstr::FrameDestroy) ||
+ (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
+ Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
+ Opc != X86::POP2P && Opc != X86::LEA64r))
break;
FirstCSPop = PI;
}
@@ -2458,13 +2463,12 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// effects of the prologue can safely be undone.
if (LEAAmount != 0) {
unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
- addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
- FramePtr, false, LEAAmount);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), FramePtr,
+ false, LEAAmount);
--MBBI;
} else {
unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
- BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(FramePtr);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
--MBBI;
}
} else if (NumBytes) {
@@ -2498,8 +2502,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator PI = MBBI;
unsigned Opc = PI->getOpcode();
++MBBI;
- if (Opc == X86::POP32r || Opc == X86::POP64r) {
+ if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
+ Opc == X86::POP2 || Opc == X86::POP2P) {
Offset += SlotSize;
+ // Compared to pop, pop2 introduces more stack offset (one more
+ // register).
+ if (Opc == X86::POP2 || Opc == X86::POP2P)
+ Offset += SlotSize;
BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset),
MachineInstr::FrameDestroy);
@@ -2570,7 +2579,8 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
- // If required, include space for extra hidden slot for stashing base pointer.
+ // If required, include space for extra hidden slot for stashing base
+ // pointer.
if (X86FI->getRestoreBasePointer())
FrameSize += SlotSize;
uint64_t NumBytes = FrameSize - CSSize;
@@ -2615,7 +2625,7 @@ int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
+ const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
const auto it = WinEHXMMSlotInfo.find(FI);
if (it == WinEHXMMSlotInfo.end())
@@ -2743,7 +2753,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// }
// [EBP]
MFI.CreateFixedObject(-TailCallReturnAddrDelta,
- TailCallReturnAddrDelta - SlotSize, true);
+ TailCallReturnAddrDelta - SlotSize, true);
}
// Spill the BasePtr if it's used.
@@ -2774,13 +2784,37 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// about avoiding it later.
Register FPReg = TRI->getFrameRegister(MF);
for (unsigned i = 0; i < CSI.size(); ++i) {
- if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
+ if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
CSI.erase(CSI.begin() + i);
break;
}
}
}
+ // Strategy:
+ // 1. Use push2 when
+ // a) number of CSR > 1 if no need padding
+ // b) number of CSR > 2 if need padding
+ // 2. When the number of CSR push is odd
+ // a. Start to use push2 from the 1st push if stack is 16B aligned.
+ // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
+ // 3. When the number of CSR push is even, start to use push2 from the 1st
+ // push and make the stack 16B aligned before the push
+ unsigned NumRegsForPush2 = 0;
+ if (STI.hasPush2Pop2()) {
+ unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
+ return X86::GR64RegClass.contains(I.getReg());
+ });
+ bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
+ bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
+ X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
+ NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
+ if (X86FI->padForPush2Pop2()) {
+ SpillSlotOffset -= SlotSize;
+ MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+ }
+ }
+
// Assign slots for GPRs. It increases frame size.
for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
Register Reg = I.getReg();
@@ -2788,6 +2822,13 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
continue;
+ // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
+ // or only an odd number of registers in the candidates.
+ if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
+ (SpillSlotOffset % 16 == 0 ||
+ X86FI->getNumCandidatesForPush2Pop2() % 2))
+ X86FI->addCandidateForPush2Pop2(Reg);
+
SpillSlotOffset -= SlotSize;
CalleeSavedFrameSize += SlotSize;
@@ -2805,6 +2846,10 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// TODO: saving the slot index is better?
X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
}
+ assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
+ "Expect even candidates for push2/pop2");
+ if (X86FI->getNumCandidatesForPush2Pop2())
+ ++NumFunctionUsingPush2Pop2;
X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
@@ -2854,40 +2899,50 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
// Push GPRs. It increases frame size.
const MachineFunction &MF = *MBB.getParent();
- unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
- for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
- Register Reg = I.getReg();
-
- if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
- continue;
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ if (X86FI->padForPush2Pop2())
+ emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
+ // Update LiveIn of the basic block and decide whether we can add a kill flag
+ // to the use.
+ auto UpdateLiveInCheckCanKill = [&](Register Reg) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
- bool isLiveIn = MRI.isLiveIn(Reg);
- if (!isLiveIn)
- MBB.addLiveIn(Reg);
-
- // Decide whether we can add a kill flag to the use.
- bool CanKill = !isLiveIn;
- // Check if any subregister is live-in
- if (CanKill) {
- for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) {
- if (MRI.isLiveIn(*AReg)) {
- CanKill = false;
- break;
- }
- }
- }
-
// Do not set a kill flag on values that are also marked as live-in. This
// happens with the @llvm-returnaddress intrinsic and with arguments
// passed in callee saved registers.
// Omitting the kill flags is conservatively correct even if the live-in
// is not used after all.
- BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill))
- .setMIFlag(MachineInstr::FrameSetup);
+ if (MRI.isLiveIn(Reg))
+ return false;
+ MBB.addLiveIn(Reg);
+ // Check if any subregister is live-in
+ for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
+ if (MRI.isLiveIn(*AReg))
+ return false;
+ return true;
+ };
+ auto UpdateLiveInGetKillRegState = [&](Register Reg) {
+ return getKillRegState(UpdateLiveInCheckCanKill(Reg));
+ };
+
+ for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
+ Register Reg = RI->getReg();
+ if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
+ continue;
+
+ if (X86FI->isCandidateForPush2Pop2(Reg)) {
+ Register Reg2 = (++RI)->getReg();
+ BuildMI(MBB, MI, DL, TII.get(getPUSH2Opcode(STI)))
+ .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
+ .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
+ .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
- const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
if (X86FI->getRestoreBasePointer()) {
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
Register BaseReg = this->TRI->getBaseRegister();
@@ -2979,8 +3034,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(
// Reload XMMs from stack frame.
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
- if (X86::GR64RegClass.contains(Reg) ||
- X86::GR32RegClass.contains(Reg))
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
continue;
// If this is k-register make sure we lookup via the largest legal type.
@@ -3004,16 +3058,22 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(
}
// POP GPRs.
- unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- if (!X86::GR64RegClass.contains(Reg) &&
- !X86::GR32RegClass.contains(Reg))
+ for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ Register Reg = I->getReg();
+ if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
continue;
- BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
- .setMIFlag(MachineInstr::FrameDestroy);
+ if (X86FI->isCandidateForPush2Pop2(Reg))
+ BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
+ .addReg((++I)->getReg(), RegState::Define)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ else
+ BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
+ .setMIFlag(MachineInstr::FrameDestroy);
}
+ if (X86FI->padForPush2Pop2())
+ emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
+
return true;
}
@@ -3023,7 +3083,7 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
// Spill the BasePtr if it's used.
- if (TRI->hasBasePointer(MF)){
+ if (TRI->hasBasePointer(MF)) {
Register BasePtr = TRI->getBaseRegister();
if (STI.isTarget64BitILP32())
BasePtr = getX86SubSuperRegister(BasePtr, 64);
@@ -3031,11 +3091,10 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
-static bool
-HasNestArgument(const MachineFunction *MF) {
+static bool HasNestArgument(const MachineFunction *MF) {
const Function &F = MF->getFunction();
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
- I != E; I++) {
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+ I++) {
if (I->hasNestAttr() && !I->use_empty())
return true;
}
@@ -3046,8 +3105,8 @@ HasNestArgument(const MachineFunction *MF) {
/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
/// and the properties of the function either one or two registers will be
/// needed. Set primary to true for the first register, false for the second.
-static unsigned
-GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
+static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
+ const MachineFunction &MF, bool Primary) {
CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
// Erlang stuff.
@@ -3148,7 +3207,7 @@ void X86FrameLowering::adjustForSegmentedStacks(
TlsOffset = IsLP64 ? 0x70 : 0x40;
} else if (STI.isTargetDarwin()) {
TlsReg = X86::GS;
- TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
+ TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
} else if (STI.isTargetWin64()) {
TlsReg = X86::GS;
TlsOffset = 0x28; // pvArbitrary, reserved for application use
@@ -3165,18 +3224,28 @@ void X86FrameLowering::adjustForSegmentedStacks(
if (CompareStackPointer)
ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
else
- BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
- .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
-
- BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
- .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
+ ScratchReg)
+ .addReg(X86::RSP)
+ .addImm(1)
+ .addReg(0)
+ .addImm(-StackSize)
+ .addReg(0);
+
+ BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
+ .addReg(ScratchReg)
+ .addReg(0)
+ .addImm(1)
+ .addReg(0)
+ .addImm(TlsOffset)
+ .addReg(TlsReg);
} else {
if (STI.isTargetLinux()) {
TlsReg = X86::GS;
TlsOffset = 0x30;
} else if (STI.isTargetDarwin()) {
TlsReg = X86::GS;
- TlsOffset = 0x48 + 90*4;
+ TlsOffset = 0x48 + 90 * 4;
} else if (STI.isTargetWin32()) {
TlsReg = X86::FS;
TlsOffset = 0x14; // pvArbitrary, reserved for application use
@@ -3192,13 +3261,22 @@ void X86FrameLowering::adjustForSegmentedStacks(
if (CompareStackPointer)
ScratchReg = X86::ESP;
else
- BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
- .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
+ BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
+ .addReg(X86::ESP)
+ .addImm(1)
+ .addReg(0)
+ .addImm(-StackSize)
+ .addReg(0);
if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
STI.isTargetDragonFly()) {
- BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
+ .addReg(ScratchReg)
+ .addReg(0)
+ .addImm(0)
+ .addReg(0)
+ .addImm(TlsOffset)
+ .addReg(TlsReg);
} else if (STI.isTargetDarwin()) {
// TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
@@ -3223,15 +3301,17 @@ void X86FrameLowering::adjustForSegmentedStacks(
if (SaveScratch2)
BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
- .addReg(ScratchReg2, RegState::Kill);
+ .addReg(ScratchReg2, RegState::Kill);
BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
- .addImm(TlsOffset);
+ .addImm(TlsOffset);
BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
- .addReg(ScratchReg)
- .addReg(ScratchReg2).addImm(1).addReg(0)
- .addImm(0)
- .addReg(TlsReg);
+ .addReg(ScratchReg)
+ .addReg(ScratchReg2)
+ .addImm(1)
+ .addReg(0)
+ .addImm(0)
+ .addReg(TlsReg);
if (SaveScratch2)
BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
@@ -3240,7 +3320,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
// This jump is taken if SP >= (Stacklet Limit + Stack Space required).
// It jumps to normal execution of the function body.
- BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
+ BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(&PrologueMBB)
+ .addImm(X86::COND_A);
// On 32 bit we first push the arguments size and then the frame size. On 64
// bit, we pass the stack frame size in r10 and the argument size in r11.
@@ -3264,9 +3346,8 @@ void X86FrameLowering::adjustForSegmentedStacks(
.addImm(X86FI->getArgumentStackSize());
} else {
BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
- .addImm(X86FI->getArgumentStackSize());
- BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
- .addImm(StackSize);
+ .addImm(X86FI->getArgumentStackSize());
+ BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
}
// __morestack is in libgcc
@@ -3298,10 +3379,10 @@ void X86FrameLowering::adjustForSegmentedStacks(
} else {
if (Is64Bit)
BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
- .addExternalSymbol("__morestack");
+ .addExternalSymbol("__morestack");
else
BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("__morestack");
+ .addExternalSymbol("__morestack");
}
if (IsNested)
@@ -3323,22 +3404,24 @@ void X86FrameLowering::adjustForSegmentedStacks(
/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
/// to fields it needs, through a named metadata node "hipe.literals" containing
/// name-value pairs.
-static unsigned getHiPELiteral(
- NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) {
+static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
+ const StringRef LiteralName) {
for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
MDNode *Node = HiPELiteralsMD->getOperand(i);
- if (Node->getNumOperands() != 2) continue;
+ if (Node->getNumOperands() != 2)
+ continue;
MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
- if (!NodeName || !NodeVal) continue;
+ if (!NodeName || !NodeVal)
+ continue;
ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
if (ValConst && NodeName->getString() == LiteralName) {
return ValConst->getZExtValue();
}
}
- report_fatal_error("HiPE literal " + LiteralName
- + " required but not provided");
+ report_fatal_error("HiPE literal " + LiteralName +
+ " required but not provided");
}
// Return true if there are no non-ehpad successors to MBB and there are no
@@ -3378,19 +3461,19 @@ void X86FrameLowering::adjustForHiPEPrologue(
assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
// HiPE-specific values
- NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule()
- ->getNamedMetadata("hipe.literals");
+ NamedMDNode *HiPELiteralsMD =
+ MF.getMMI().getModule()->getNamedMetadata("hipe.literals");
if (!HiPELiteralsMD)
report_fatal_error(
"Can't generate HiPE prologue without runtime parameters");
- const unsigned HipeLeafWords
- = getHiPELiteral(HiPELiteralsMD,
- Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
+ const unsigned HipeLeafWords = getHiPELiteral(
+ HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
const unsigned Guaranteed = HipeLeafWords * SlotSize;
- unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ?
- MF.getFunction().arg_size() - CCRegisteredArgs : 0;
- unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize;
+ unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
+ ? MF.getFunction().arg_size() - CCRegisteredArgs
+ : 0;
+ unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
assert(STI.isTargetLinux() &&
"HiPE prologue is only supported on Linux operating systems.");
@@ -3430,11 +3513,13 @@ void X86FrameLowering::adjustForHiPEPrologue(
F->getName().find_first_of("._") == StringRef::npos)
continue;
- unsigned CalleeStkArity =
- F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
+ unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
+ ? F->arg_size() - CCRegisteredArgs
+ : 0;
if (HipeLeafWords - 1 > CalleeStkArity)
- MoreStackForCalls = std::max(MoreStackForCalls,
- (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
+ MoreStackForCalls =
+ std::max(MoreStackForCalls,
+ (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
}
}
MaxStack += MoreStackForCalls;
@@ -3459,13 +3544,13 @@ void X86FrameLowering::adjustForHiPEPrologue(
SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
if (Is64Bit) {
SPReg = X86::RSP;
- PReg = X86::RBP;
+ PReg = X86::RBP;
LEAop = X86::LEA64r;
CMPop = X86::CMP64rm;
CALLop = X86::CALL64pcrel32;
} else {
SPReg = X86::ESP;
- PReg = X86::EBP;
+ PReg = X86::EBP;
LEAop = X86::LEA32r;
CMPop = X86::CMP32rm;
CALLop = X86::CALLpcrel32;
@@ -3476,21 +3561,24 @@ void X86FrameLowering::adjustForHiPEPrologue(
"HiPE prologue scratch register is live-in");
// Create new MBB for StackCheck:
- addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
- SPReg, false, -MaxStack);
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
+ false, -MaxStack);
// SPLimitOffset is in a fixed heap location (pointed by BP).
- addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
- .addReg(ScratchReg), PReg, false, SPLimitOffset);
- BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
+ PReg, false, SPLimitOffset);
+ BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(&PrologueMBB)
+ .addImm(X86::COND_AE);
// Create new MBB for IncStack:
- BuildMI(incStackMBB, DL, TII.get(CALLop)).
- addExternalSymbol("inc_stack_0");
- addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
- SPReg, false, -MaxStack);
- addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
- .addReg(ScratchReg), PReg, false, SPLimitOffset);
- BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
+ BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
+ false, -MaxStack);
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
+ PReg, false, SPLimitOffset);
+ BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(incStackMBB)
+ .addImm(X86::COND_LE);
stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
@@ -3570,15 +3658,15 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
Regs[FoundRegs++] = Regs[0];
for (int i = 0; i < NumPops; ++i)
- BuildMI(MBB, MBBI, DL,
- TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
+ BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
+ Regs[i]);
return true;
}
-MachineBasicBlock::iterator X86FrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
bool reserveCallFrame = hasReservedCallFrame(MF);
unsigned Opcode = I->getOpcode();
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
@@ -3666,9 +3754,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// TODO: When not using precise CFA, we also need to adjust for the
// InternalAmt here.
if (CfaAdjustment) {
- BuildCFI(MBB, InsertPos, DL,
- MCCFIInstruction::createAdjustCfaOffset(nullptr,
- CfaAdjustment));
+ BuildCFI(
+ MBB, InsertPos, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
}
}
@@ -3837,11 +3925,11 @@ X86FrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
namespace {
// Struct used by orderFrameObjects to help sort the stack objects.
struct X86FrameSortingObject {
- bool IsValid = false; // true if we care about this Object.
- unsigned ObjectIndex = 0; // Index of Object into MFI list.
- unsigned ObjectSize = 0; // Size of Object in bytes.
+ bool IsValid = false; // true if we care about this Object.
+ unsigned ObjectIndex = 0; // Index of Object into MFI list.
+ unsigned ObjectSize = 0; // Size of Object in bytes.
Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
- unsigned ObjectNumUses = 0; // Object static number of uses.
+ unsigned ObjectNumUses = 0; // Object static number of uses.
};
// The comparison function we use for std::sort to order our local
@@ -3881,9 +3969,9 @@ struct X86FrameSortingComparator {
// the division and, with it, the need for any floating point
// arithmetic.
DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
- static_cast<uint64_t>(B.ObjectSize);
+ static_cast<uint64_t>(B.ObjectSize);
DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
- static_cast<uint64_t>(A.ObjectSize);
+ static_cast<uint64_t>(A.ObjectSize);
// If the two densities are equal, prioritize highest alignment
// objects. This allows for similar alignment objects
@@ -3976,8 +4064,8 @@ void X86FrameLowering::orderFrameObjects(
std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
}
-
-unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
+unsigned
+X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
// RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
unsigned Offset = 16;
// RBP is immediately pushed.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4380f8c7ae92..7ec59c74f5f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -173,7 +173,7 @@ namespace {
X86DAGToDAGISel() = delete;
- explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
+ explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr),
OptForMinSize(false), IndirectTlsSegRefs(false) {}
@@ -212,6 +212,8 @@ namespace {
bool matchAddress(SDValue N, X86ISelAddressMode &AM);
bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
+ SDValue matchIndexRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth);
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM,
@@ -257,7 +259,7 @@ namespace {
/// Implement addressing mode selection for inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintID,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
void emitSpecialCodeForMain();
@@ -622,7 +624,8 @@ bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
bool
X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
- if (OptLevel == CodeGenOpt::None) return false;
+ if (OptLevel == CodeGenOptLevel::None)
+ return false;
if (!N.hasOneUse())
return false;
@@ -878,6 +881,10 @@ static bool isEndbrImm64(uint64_t Imm) {
return false;
}
+static bool needBWI(MVT VT) {
+ return (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v64i8);
+}
+
void X86DAGToDAGISel::PreprocessISelDAG() {
bool MadeChange = false;
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
@@ -983,15 +990,15 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
case X86ISD::VBROADCAST: {
MVT VT = N->getSimpleValueType(0);
// Emulate v32i16/v64i8 broadcast without BWI.
- if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
- MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
+ if (!Subtarget->hasBWI() && needBWI(VT)) {
+ MVT NarrowVT = VT.getHalfNumVectorElementsVT();
SDLoc dl(N);
SDValue NarrowBCast =
CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0));
SDValue Res =
CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
- unsigned Index = VT == MVT::v32i16 ? 16 : 32;
+ unsigned Index = NarrowVT.getVectorMinNumElements();
Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
CurDAG->getIntPtrConstant(Index, dl));
@@ -1007,8 +1014,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
case X86ISD::VBROADCAST_LOAD: {
MVT VT = N->getSimpleValueType(0);
// Emulate v32i16/v64i8 broadcast without BWI.
- if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
- MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
+ if (!Subtarget->hasBWI() && needBWI(VT)) {
+ MVT NarrowVT = VT.getHalfNumVectorElementsVT();
auto *MemNode = cast<MemSDNode>(N);
SDLoc dl(N);
SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
@@ -1019,7 +1026,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
SDValue Res =
CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
- unsigned Index = VT == MVT::v32i16 ? 16 : 32;
+ unsigned Index = NarrowVT.getVectorMinNumElements();
Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
CurDAG->getIntPtrConstant(Index, dl));
@@ -1033,6 +1040,50 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
break;
}
+ case ISD::LOAD: {
+ // If this is a XMM/YMM load of the same lower bits as another YMM/ZMM
+ // load, then just extract the lower subvector and avoid the second load.
+ auto *Ld = cast<LoadSDNode>(N);
+ MVT VT = N->getSimpleValueType(0);
+ if (!ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
+ !(VT.is128BitVector() || VT.is256BitVector()))
+ break;
+
+ MVT MaxVT = VT;
+ SDNode *MaxLd = nullptr;
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Chain = Ld->getChain();
+ for (SDNode *User : Ptr->uses()) {
+ auto *UserLd = dyn_cast<LoadSDNode>(User);
+ MVT UserVT = User->getSimpleValueType(0);
+ if (User != N && UserLd && ISD::isNormalLoad(User) &&
+ UserLd->getBasePtr() == Ptr && UserLd->getChain() == Chain &&
+ !User->hasAnyUseOfValue(1) &&
+ (UserVT.is256BitVector() || UserVT.is512BitVector()) &&
+ UserVT.getSizeInBits() > VT.getSizeInBits() &&
+ (!MaxLd || UserVT.getSizeInBits() > MaxVT.getSizeInBits())) {
+ MaxLd = User;
+ MaxVT = UserVT;
+ }
+ }
+ if (MaxLd) {
+ SDLoc dl(N);
+ unsigned NumSubElts = VT.getSizeInBits() / MaxVT.getScalarSizeInBits();
+ MVT SubVT = MVT::getVectorVT(MaxVT.getScalarType(), NumSubElts);
+ SDValue Extract = CurDAG->getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT,
+ SDValue(MaxLd, 0),
+ CurDAG->getIntPtrConstant(0, dl));
+ SDValue Res = CurDAG->getBitcast(VT, Extract);
+
+ --I;
+ SDValue To[] = {Res, SDValue(MaxLd, 1)};
+ CurDAG->ReplaceAllUsesWith(N, To);
+ ++I;
+ MadeChange = true;
+ continue;
+ }
+ break;
+ }
case ISD::VSELECT: {
// Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG.
EVT EleVT = N->getOperand(0).getValueType().getVectorElementType();
@@ -1240,7 +1291,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
}
}
- if (OptLevel != CodeGenOpt::None &&
+ if (OptLevel != CodeGenOptLevel::None &&
// Only do this when the target can fold the load into the call or
// jmp.
!Subtarget->useIndirectThunkCalls() &&
@@ -1479,7 +1530,7 @@ bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
void X86DAGToDAGISel::PostprocessISelDAG() {
// Skip peepholes at -O0.
- if (TM.getOptLevel() == CodeGenOpt::None)
+ if (TM.getOptLevel() == CodeGenOptLevel::None)
return;
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
@@ -1696,10 +1747,28 @@ bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
!isDispSafeForFrameIndex(Val))
return true;
+ // In ILP32 (x32) mode, pointers are 32 bits and need to be zero-extended to
+ // 64 bits. Instructions with 32-bit register addresses perform this zero
+ // extension for us and we can safely ignore the high bits of Offset.
+ // Instructions with only a 32-bit immediate address do not, though: they
+ // sign extend instead. This means only address the low 2GB of address space
+ // is directly addressable, we need indirect addressing for the high 2GB of
+ // address space.
+ // TODO: Some of the earlier checks may be relaxed for ILP32 mode as the
+ // implicit zero extension of instructions would cover up any problem.
+ // However, we have asserts elsewhere that get triggered if we do, so keep
+ // the checks for now.
+ // TODO: We would actually be able to accept these, as well as the same
+ // addresses in LP64 mode, by adding the EIZ pseudo-register as an operand
+ // to get an address size override to be emitted. However, this
+ // pseudo-register is not part of any register class and therefore causes
+ // MIR verification to fail.
+ if (Subtarget->isTarget64BitILP32() && !isUInt<31>(Val) &&
+ !AM.hasBaseOrIndexReg())
+ return true;
}
AM.Disp = Val;
return false;
-
}
bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
@@ -1939,8 +2008,8 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
SDValue NewMask = DAG.getConstant(0xff, DL, XVT);
SDValue Srl = DAG.getNode(ISD::SRL, DL, XVT, X, Eight);
SDValue And = DAG.getNode(ISD::AND, DL, XVT, Srl, NewMask);
- SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
SDValue Ext = DAG.getZExtOrTrunc(And, DL, VT);
+ SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Ext, ShlCount);
// Insert the new nodes into the topological ordering. We must do this in
@@ -1949,12 +2018,11 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no
// hierarchy left to express.
insertDAGNode(DAG, N, Eight);
- insertDAGNode(DAG, N, Srl);
insertDAGNode(DAG, N, NewMask);
+ insertDAGNode(DAG, N, Srl);
insertDAGNode(DAG, N, And);
+ insertDAGNode(DAG, N, Ext);
insertDAGNode(DAG, N, ShlCount);
- if (Ext != And)
- insertDAGNode(DAG, N, Ext);
insertDAGNode(DAG, N, Shl);
DAG.ReplaceAllUsesWith(N, Shl);
DAG.RemoveDeadNode(N.getNode());
@@ -2066,22 +2134,22 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
!isa<ConstantSDNode>(Shift.getOperand(1)))
return true;
+ // We need to ensure that mask is a continuous run of bits.
+ unsigned MaskIdx, MaskLen;
+ if (!isShiftedMask_64(Mask, MaskIdx, MaskLen))
+ return true;
+ unsigned MaskLZ = 64 - (MaskIdx + MaskLen);
+
unsigned ShiftAmt = Shift.getConstantOperandVal(1);
- unsigned MaskLZ = llvm::countl_zero(Mask);
- unsigned MaskTZ = llvm::countr_zero(Mask);
// The amount of shift we're trying to fit into the addressing mode is taken
- // from the trailing zeros of the mask.
- unsigned AMShiftAmt = MaskTZ;
+ // from the shifted mask index (number of trailing zeros of the mask).
+ unsigned AMShiftAmt = MaskIdx;
// There is nothing we can do here unless the mask is removing some bits.
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
- // We also need to ensure that mask is a continuous run of bits.
- if (llvm::countr_one(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64)
- return true;
-
// Scale the leading zero count down based on the actual size of the value.
// Also scale it down based on the size of the shift.
unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
@@ -2107,8 +2175,8 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
}
APInt MaskedHighBits =
APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
- KnownBits Known = DAG.computeKnownBits(X);
- if (MaskedHighBits != Known.Zero) return true;
+ if (!DAG.MaskedValueIsZero(X, MaskedHighBits))
+ return true;
// We've identified a pattern that can be transformed into a single shift
// and an addressing mode. Make it so.
@@ -2120,11 +2188,14 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
insertDAGNode(DAG, N, NewX);
X = NewX;
}
+
+ MVT XVT = X.getSimpleValueType();
SDLoc DL(N);
SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
- SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
+ SDValue NewSRL = DAG.getNode(ISD::SRL, DL, XVT, X, NewSRLAmt);
+ SDValue NewExt = DAG.getZExtOrTrunc(NewSRL, DL, VT);
SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
- SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewExt, NewSHLAmt);
// Insert the new nodes into the topological ordering. We must do this in
// a valid topological ordering as nothing is going to go back and re-sort
@@ -2133,13 +2204,14 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
// hierarchy left to express.
insertDAGNode(DAG, N, NewSRLAmt);
insertDAGNode(DAG, N, NewSRL);
+ insertDAGNode(DAG, N, NewExt);
insertDAGNode(DAG, N, NewSHLAmt);
insertDAGNode(DAG, N, NewSHL);
DAG.ReplaceAllUsesWith(N, NewSHL);
DAG.RemoveDeadNode(N.getNode());
AM.Scale = 1 << AMShiftAmt;
- AM.IndexReg = NewSRL;
+ AM.IndexReg = NewExt;
return false;
}
@@ -2162,26 +2234,30 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
return true;
// We need to ensure that mask is a continuous run of bits.
- if (!isShiftedMask_64(Mask)) return true;
+ unsigned MaskIdx, MaskLen;
+ if (!isShiftedMask_64(Mask, MaskIdx, MaskLen))
+ return true;
unsigned ShiftAmt = Shift.getConstantOperandVal(1);
// The amount of shift we're trying to fit into the addressing mode is taken
- // from the trailing zeros of the mask.
- unsigned AMShiftAmt = llvm::countr_zero(Mask);
+ // from the shifted mask index (number of trailing zeros of the mask).
+ unsigned AMShiftAmt = MaskIdx;
// There is nothing we can do here unless the mask is removing some bits.
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
+ MVT XVT = X.getSimpleValueType();
MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
- SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
- SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
+ SDValue NewSRL = DAG.getNode(ISD::SRL, DL, XVT, X, NewSRLAmt);
+ SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, XVT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, XVT, NewSRL, NewMask);
+ SDValue NewExt = DAG.getZExtOrTrunc(NewAnd, DL, VT);
SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
- SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewExt, NewSHLAmt);
// Insert the new nodes into the topological ordering. We must do this in
// a valid topological ordering as nothing is going to go back and re-sort
@@ -2192,16 +2268,140 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
insertDAGNode(DAG, N, NewSRL);
insertDAGNode(DAG, N, NewMask);
insertDAGNode(DAG, N, NewAnd);
+ insertDAGNode(DAG, N, NewExt);
insertDAGNode(DAG, N, NewSHLAmt);
insertDAGNode(DAG, N, NewSHL);
DAG.ReplaceAllUsesWith(N, NewSHL);
DAG.RemoveDeadNode(N.getNode());
AM.Scale = 1 << AMShiftAmt;
- AM.IndexReg = NewAnd;
+ AM.IndexReg = NewExt;
return false;
}
+// Attempt to peek further into a scaled index register, collecting additional
+// extensions / offsets / etc. Returns /p N if we can't peek any further.
+SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N,
+ X86ISelAddressMode &AM,
+ unsigned Depth) {
+ assert(AM.IndexReg.getNode() == nullptr && "IndexReg already matched");
+ assert((AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8) &&
+ "Illegal index scale");
+
+ // Limit recursion.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return N;
+
+ EVT VT = N.getValueType();
+ unsigned Opc = N.getOpcode();
+
+ // index: add(x,c) -> index: x, disp + c
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ auto *AddVal = cast<ConstantSDNode>(N.getOperand(1));
+ uint64_t Offset = (uint64_t)AddVal->getSExtValue() * AM.Scale;
+ if (!foldOffsetIntoAddress(Offset, AM))
+ return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
+ }
+
+ // index: add(x,x) -> index: x, scale * 2
+ if (Opc == ISD::ADD && N.getOperand(0) == N.getOperand(1)) {
+ if (AM.Scale <= 4) {
+ AM.Scale *= 2;
+ return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
+ }
+ }
+
+ // index: shl(x,i) -> index: x, scale * (1 << i)
+ if (Opc == X86ISD::VSHLI) {
+ uint64_t ShiftAmt = N.getConstantOperandVal(1);
+ uint64_t ScaleAmt = 1ULL << ShiftAmt;
+ if ((AM.Scale * ScaleAmt) <= 8) {
+ AM.Scale *= ScaleAmt;
+ return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
+ }
+ }
+
+ // index: sext(add_nsw(x,c)) -> index: sext(x), disp + sext(c)
+ // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext?
+ if (Opc == ISD::SIGN_EXTEND && !VT.isVector() && N.hasOneUse()) {
+ SDValue Src = N.getOperand(0);
+ if (Src.getOpcode() == ISD::ADD && Src->getFlags().hasNoSignedWrap() &&
+ Src.hasOneUse()) {
+ if (CurDAG->isBaseWithConstantOffset(Src)) {
+ SDValue AddSrc = Src.getOperand(0);
+ auto *AddVal = cast<ConstantSDNode>(Src.getOperand(1));
+ uint64_t Offset = (uint64_t)AddVal->getSExtValue();
+ if (!foldOffsetIntoAddress(Offset * AM.Scale, AM)) {
+ SDLoc DL(N);
+ SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
+ SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
+ SDValue ExtAdd = CurDAG->getNode(ISD::ADD, DL, VT, ExtSrc, ExtVal);
+ insertDAGNode(*CurDAG, N, ExtSrc);
+ insertDAGNode(*CurDAG, N, ExtVal);
+ insertDAGNode(*CurDAG, N, ExtAdd);
+ CurDAG->ReplaceAllUsesWith(N, ExtAdd);
+ CurDAG->RemoveDeadNode(N.getNode());
+ return ExtSrc;
+ }
+ }
+ }
+ }
+
+ // index: zext(add_nuw(x,c)) -> index: zext(x), disp + zext(c)
+ // index: zext(addlike(x,c)) -> index: zext(x), disp + zext(c)
+ // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext?
+ if (Opc == ISD::ZERO_EXTEND && !VT.isVector() && N.hasOneUse()) {
+ SDValue Src = N.getOperand(0);
+ unsigned SrcOpc = Src.getOpcode();
+ if (((SrcOpc == ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) ||
+ CurDAG->isADDLike(Src)) &&
+ Src.hasOneUse()) {
+ if (CurDAG->isBaseWithConstantOffset(Src)) {
+ SDValue AddSrc = Src.getOperand(0);
+ auto *AddVal = cast<ConstantSDNode>(Src.getOperand(1));
+ uint64_t Offset = (uint64_t)AddVal->getZExtValue();
+ if (!foldOffsetIntoAddress(Offset * AM.Scale, AM)) {
+ SDLoc DL(N);
+ SDValue Res;
+ // If we're also scaling, see if we can use that as well.
+ if (AddSrc.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(AddSrc.getOperand(1))) {
+ SDValue ShVal = AddSrc.getOperand(0);
+ uint64_t ShAmt = AddSrc.getConstantOperandVal(1);
+ APInt HiBits =
+ APInt::getHighBitsSet(AddSrc.getScalarValueSizeInBits(), ShAmt);
+ uint64_t ScaleAmt = 1ULL << ShAmt;
+ if ((AM.Scale * ScaleAmt) <= 8 &&
+ (AddSrc->getFlags().hasNoUnsignedWrap() ||
+ CurDAG->MaskedValueIsZero(ShVal, HiBits))) {
+ AM.Scale *= ScaleAmt;
+ SDValue ExtShVal = CurDAG->getNode(Opc, DL, VT, ShVal);
+ SDValue ExtShift = CurDAG->getNode(ISD::SHL, DL, VT, ExtShVal,
+ AddSrc.getOperand(1));
+ insertDAGNode(*CurDAG, N, ExtShVal);
+ insertDAGNode(*CurDAG, N, ExtShift);
+ AddSrc = ExtShift;
+ Res = ExtShVal;
+ }
+ }
+ SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
+ SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
+ SDValue ExtAdd = CurDAG->getNode(SrcOpc, DL, VT, ExtSrc, ExtVal);
+ insertDAGNode(*CurDAG, N, ExtSrc);
+ insertDAGNode(*CurDAG, N, ExtVal);
+ insertDAGNode(*CurDAG, N, ExtAdd);
+ CurDAG->ReplaceAllUsesWith(N, ExtAdd);
+ CurDAG->RemoveDeadNode(N.getNode());
+ return Res ? Res : ExtSrc;
+ }
+ }
+ }
+ }
+
+ // TODO: Handle extensions, shifted masks etc.
+ return N;
+}
+
bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
SDLoc dl(N);
@@ -2210,7 +2410,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.dump(CurDAG);
});
// Limit recursion.
- if (Depth > 5)
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
return matchAddressBase(N, AM);
// If this is already a %rip relative address, we can only merge immediates
@@ -2279,21 +2479,9 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// the base doesn't end up getting used, a post-processing step
// in MatchAddress turns (,x,2) into (x,x), which is cheaper.
if (Val == 1 || Val == 2 || Val == 3) {
- AM.Scale = 1 << Val;
SDValue ShVal = N.getOperand(0);
-
- // Okay, we know that we have a scale by now. However, if the scaled
- // value is an add of something and a constant, we can fold the
- // constant into the disp field here.
- if (CurDAG->isBaseWithConstantOffset(ShVal)) {
- AM.IndexReg = ShVal.getOperand(0);
- auto *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
- uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
- if (!foldOffsetIntoAddress(Disp, AM))
- return false;
- }
-
- AM.IndexReg = ShVal;
+ AM.Scale = 1 << Val;
+ AM.IndexReg = matchIndexRecursively(ShVal, AM, Depth + 1);
return false;
}
}
@@ -2431,28 +2619,14 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
return false;
}
- case ISD::ADD:
- if (!matchAdd(N, AM, Depth))
- return false;
- break;
-
case ISD::OR:
- // We want to look through a transform in InstCombine and DAGCombiner that
- // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
- // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
- // An 'lea' can then be used to match the shift (multiply) and add:
- // and $1, %esi
- // lea (%rsi, %rdi, 8), %rax
- if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
- !matchAdd(N, AM, Depth))
- return false;
- break;
-
case ISD::XOR:
- // We want to look through a transform in InstCombine that
- // turns 'add' with min_signed_val into 'xor', so we can treat this 'xor'
- // exactly like an 'add'.
- if (isMinSignedConstant(N.getOperand(1)) && !matchAdd(N, AM, Depth))
+ // See if we can treat the OR/XOR node as an ADD node.
+ if (!CurDAG->isADDLike(N))
+ break;
+ [[fallthrough]];
+ case ISD::ADD:
+ if (!matchAdd(N, AM, Depth))
return false;
break;
@@ -2503,8 +2677,18 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
break;
- // Peek through mask: zext(and(shl(x,c1),c2))
SDValue Src = N.getOperand(0);
+
+ // See if we can match a zext(addlike(x,c)).
+ // TODO: Move more ZERO_EXTEND patterns into matchIndexRecursively.
+ if (Src.getOpcode() == ISD::ADD || Src.getOpcode() == ISD::OR)
+ if (SDValue Index = matchIndexRecursively(N, AM, Depth + 1))
+ if (Index != N) {
+ AM.IndexReg = Index;
+ return false;
+ }
+
+ // Peek through mask: zext(and(shl(x,c1),c2))
APInt Mask = APInt::getAllOnes(Src.getScalarValueSizeInBits());
if (Src.getOpcode() == ISD::AND && Src.hasOneUse())
if (auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
@@ -2527,7 +2711,8 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// That makes it safe to widen to the destination type.
APInt HighZeros =
APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV);
- if (!CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
+ if (!Src->getFlags().hasNoUnsignedWrap() &&
+ !CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
break;
// zext (shl nuw i8 %x, C1) to i32
@@ -2559,11 +2744,22 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
return false;
}
- // Try to fold the mask and shift into an extract and scale.
- if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes() &&
- !foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src,
+ if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes()) {
+ // Try to fold the mask and shift into an extract and scale.
+ if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src,
+ Src.getOperand(0), AM))
+ return false;
+
+ // Try to fold the mask and shift directly into the scale.
+ if (!foldMaskAndShiftToScale(*CurDAG, N, Mask.getZExtValue(), Src,
Src.getOperand(0), AM))
- return false;
+ return false;
+
+ // Try to fold the mask and shift into BEXTR and scale.
+ if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask.getZExtValue(), Src,
+ Src.getOperand(0), AM, *Subtarget))
+ return false;
+ }
break;
}
@@ -2603,7 +2799,7 @@ bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N,
AM.dump(CurDAG);
});
// Limit recursion.
- if (Depth > 5)
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
return matchAddressBase(N, AM);
// TODO: Support other operations.
@@ -2659,9 +2855,15 @@ bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
X86ISelAddressMode AM;
- AM.IndexReg = IndexOp;
AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
+ // Attempt to match index patterns, as long as we're not relying on implicit
+ // sign-extension, which is performed BEFORE scale.
+ if (IndexOp.getScalarValueSizeInBits() == BasePtr.getScalarValueSizeInBits())
+ AM.IndexReg = matchIndexRecursively(IndexOp, AM, 0);
+ else
+ AM.IndexReg = IndexOp;
+
unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace();
if (AddrSpace == X86AS::GS)
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -2725,6 +2927,13 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
}
bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
+ // Cannot use 32 bit constants to reference objects in kernel code model.
+ // Cannot use 32 bit constants to reference objects in large PIC mode since
+ // GOTOFF is 64 bits.
+ if (TM.getCodeModel() == CodeModel::Kernel ||
+ (TM.getCodeModel() == CodeModel::Large && TM.isPositionIndependent()))
+ return false;
+
// In static codegen with small code model, we can get the address of a label
// into a register with 'movl'
if (N->getOpcode() != X86ISD::Wrapper)
@@ -2738,15 +2947,18 @@ bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
return false;
Imm = N;
- if (N->getOpcode() != ISD::TargetGlobalAddress)
- return TM.getCodeModel() == CodeModel::Small;
+ // Small/medium code model can reference non-TargetGlobalAddress objects with
+ // 32 bit constants.
+ if (N->getOpcode() != ISD::TargetGlobalAddress) {
+ return TM.getCodeModel() == CodeModel::Small ||
+ TM.getCodeModel() == CodeModel::Medium;
+ }
- std::optional<ConstantRange> CR =
- cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
- if (!CR)
- return TM.getCodeModel() == CodeModel::Small;
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
+ if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
+ return CR->getUnsignedMax().ult(1ull << 32);
- return CR->getUnsignedMax().ult(1ull << 32);
+ return !TM.isLargeGlobalValue(GV);
}
bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
@@ -3734,7 +3946,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
}
if (Subtarget->hasBMI2()) {
- // Great, just emit the the BZHI..
+ // Great, just emit the BZHI..
if (NVT != MVT::i32) {
// But have to place the bit count into the wide-enough register first.
NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
@@ -6221,18 +6433,18 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SelectCode(Node);
}
-bool X86DAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool X86DAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SDValue Op0, Op1, Op2, Op3, Op4;
switch (ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_o: // offsetable ??
- case InlineAsm::Constraint_v: // not offsetable ??
- case InlineAsm::Constraint_m: // memory
- case InlineAsm::Constraint_X:
- case InlineAsm::Constraint_p: // address
+ case InlineAsm::ConstraintCode::o: // offsetable ??
+ case InlineAsm::ConstraintCode::v: // not offsetable ??
+ case InlineAsm::ConstraintCode::m: // memory
+ case InlineAsm::ConstraintCode::X:
+ case InlineAsm::ConstraintCode::p: // address
if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
@@ -6249,6 +6461,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
/// This pass converts a legalized DAG into a X86-specific DAG,
/// ready for instruction scheduling.
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new X86DAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0f1cb5f1e236..b80c766c7ffa 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43,7 +43,6 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
@@ -70,8 +69,6 @@ using namespace llvm;
#define DEBUG_TYPE "x86-isel"
-STATISTIC(NumTailCalls, "Number of tail calls");
-
static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
"x86-experimental-pref-innermost-loop-alignment", cl::init(4),
cl::desc(
@@ -86,45 +83,6 @@ static cl::opt<bool> MulConstantOptimization(
"SHIFT, LEA, etc."),
cl::Hidden);
-static cl::opt<bool> ExperimentalUnorderedISEL(
- "x86-experimental-unordered-atomic-isel", cl::init(false),
- cl::desc("Use LoadSDNode and StoreSDNode instead of "
- "AtomicSDNode for unordered atomic loads and "
- "stores respectively."),
- cl::Hidden);
-
-/// Call this when the user attempts to do something unsupported, like
-/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
-/// report_fatal_error, so calling code should attempt to recover without
-/// crashing.
-static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
- const char *Msg) {
- MachineFunction &MF = DAG.getMachineFunction();
- DAG.getContext()->diagnose(
- DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
-}
-
-/// Returns true if a CC can dynamically exclude a register from the list of
-/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
-/// the return registers.
-static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
- switch (CC) {
- default:
- return false;
- case CallingConv::X86_RegCall:
- case CallingConv::PreserveMost:
- case CallingConv::PreserveAll:
- return true;
- }
-}
-
-/// Returns true if a CC can dynamically exclude a register from the list of
-/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
-/// the parameters.
-static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
- return CC == CallingConv::X86_RegCall;
-}
-
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -151,7 +109,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
// Bypass expensive divides and use cheaper ones.
- if (TM.getOptLevel() >= CodeGenOpt::Default) {
+ if (TM.getOptLevel() >= CodeGenOptLevel::Default) {
if (Subtarget.hasSlowDivide32())
addBypassSlowDiv(32, 8);
if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
@@ -184,11 +142,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::POWI_F64, nullptr);
}
- // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
- // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
- // FIXME: Should we be limiting the atomic size on other configs? Default is
- // 1024.
- if (!Subtarget.canUseCMPXCHG8B())
+ if (Subtarget.canUseCMPXCHG16B())
+ setMaxAtomicSizeInBitsSupported(128);
+ else if (Subtarget.canUseCMPXCHG8B())
+ setMaxAtomicSizeInBitsSupported(64);
+ else
setMaxAtomicSizeInBitsSupported(32);
setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
@@ -611,6 +569,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FLOG10, VT, Action);
setOperationAction(ISD::FEXP, VT, Action);
setOperationAction(ISD::FEXP2, VT, Action);
+ setOperationAction(ISD::FEXP10, VT, Action);
setOperationAction(ISD::FCEIL, VT, Action);
setOperationAction(ISD::FFLOOR, VT, Action);
setOperationAction(ISD::FNEARBYINT, VT, Action);
@@ -830,6 +789,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
setOperationAction(ISD::LROUND, MVT::f80, Expand);
setOperationAction(ISD::LLROUND, MVT::f80, Expand);
@@ -923,6 +883,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::f80, Expand);
setOperationAction(ISD::FEXP, MVT::f80, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP10, MVT::f80, Expand);
setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
@@ -941,6 +902,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FEXP10, VT, Expand);
}
// First set operation action for all vector types to either promote
@@ -961,6 +923,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::FROUNDEVEN, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
@@ -1165,6 +1128,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v8f16, Custom);
+ setOperationAction(ISD::FABS, MVT::v8f16, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Custom);
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
@@ -1624,6 +1590,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STORE, VT, Custom);
}
setF16Action(MVT::v16f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v16f16, Custom);
+ setOperationAction(ISD::FABS, MVT::v16f16, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v16f16, Custom);
setOperationAction(ISD::FADD, MVT::v16f16, Expand);
setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
@@ -1996,20 +1965,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasVBMI2()) {
- for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v16i16, MVT::v8i32, MVT::v4i64,
- MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
+ for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
}
setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
- setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
- setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
}
}// useAVX512Regs
+ if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
+ for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
+ MVT::v4i64}) {
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
+ }
+ }
+
// This block controls legalization for operations that don't have
// pre-AVX512 equivalents. Without VLX we use 512-bit operations for
// narrower widths.
@@ -2076,6 +2049,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
+ setOperationAction(ISD::FNEG, MVT::v32f16, Custom);
+ setOperationAction(ISD::FABS, MVT::v32f16, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v32f16, Custom);
}
// This block control legalization of v32i1/v64i1 which are available with
@@ -2143,6 +2119,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
setOperationAction(ISD::FROUND, VT, Custom);
@@ -2356,7 +2334,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
- if (Subtarget.hasAMXTILE()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasAMXTILE()) {
addRegisterClass(MVT::x86amx, &X86::TILERegClass);
}
@@ -2563,2867 +2541,6 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
-static std::pair<MVT, unsigned>
-handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
- const X86Subtarget &Subtarget) {
- // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
- // convention is one that uses k registers.
- if (NumElts == 2)
- return {MVT::v2i64, 1};
- if (NumElts == 4)
- return {MVT::v4i32, 1};
- if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
- CC != CallingConv::Intel_OCL_BI)
- return {MVT::v8i16, 1};
- if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
- CC != CallingConv::Intel_OCL_BI)
- return {MVT::v16i8, 1};
- // v32i1 passes in ymm unless we have BWI and the calling convention is
- // regcall.
- if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
- return {MVT::v32i8, 1};
- // Split v64i1 vectors if we don't have v64i8 available.
- if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
- if (Subtarget.useAVX512Regs())
- return {MVT::v64i8, 1};
- return {MVT::v32i8, 2};
- }
-
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
- if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
- NumElts > 64)
- return {MVT::i8, NumElts};
-
- return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
-}
-
-MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
- CallingConv::ID CC,
- EVT VT) const {
- if (VT.isVector()) {
- if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
- unsigned NumElts = VT.getVectorNumElements();
-
- MVT RegisterVT;
- unsigned NumRegisters;
- std::tie(RegisterVT, NumRegisters) =
- handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
- if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
- return RegisterVT;
- }
-
- if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
- return MVT::v8f16;
- }
-
- // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
- if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
- !Subtarget.hasX87())
- return MVT::i32;
-
- if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
- return getRegisterTypeForCallingConv(Context, CC,
- VT.changeVectorElementType(MVT::f16));
-
- return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
-}
-
-unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
- CallingConv::ID CC,
- EVT VT) const {
- if (VT.isVector()) {
- if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
- unsigned NumElts = VT.getVectorNumElements();
-
- MVT RegisterVT;
- unsigned NumRegisters;
- std::tie(RegisterVT, NumRegisters) =
- handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
- if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
- return NumRegisters;
- }
-
- if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
- return 1;
- }
-
- // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
- // x87 is disabled.
- if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
- if (VT == MVT::f64)
- return 2;
- if (VT == MVT::f80)
- return 3;
- }
-
- if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
- return getNumRegistersForCallingConv(Context, CC,
- VT.changeVectorElementType(MVT::f16));
-
- return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
-}
-
-unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
- LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
- unsigned &NumIntermediates, MVT &RegisterVT) const {
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
- if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
- Subtarget.hasAVX512() &&
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
- VT.getVectorNumElements() > 64)) {
- RegisterVT = MVT::i8;
- IntermediateVT = MVT::i1;
- NumIntermediates = VT.getVectorNumElements();
- return NumIntermediates;
- }
-
- // Split v64i1 vectors if we don't have v64i8 available.
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
- CC != CallingConv::X86_RegCall) {
- RegisterVT = MVT::v32i8;
- IntermediateVT = MVT::v32i1;
- NumIntermediates = 2;
- return 2;
- }
-
- // Split vNbf16 vectors according to vNf16.
- if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
- VT = VT.changeVectorElementType(MVT::f16);
-
- return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
- NumIntermediates, RegisterVT);
-}
-
-EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
- LLVMContext& Context,
- EVT VT) const {
- if (!VT.isVector())
- return MVT::i8;
-
- if (Subtarget.hasAVX512()) {
- // Figure out what this type will be legalized to.
- EVT LegalVT = VT;
- while (getTypeAction(Context, LegalVT) != TypeLegal)
- LegalVT = getTypeToTransformTo(Context, LegalVT);
-
- // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
- if (LegalVT.getSimpleVT().is512BitVector())
- return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
-
- if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
- // If we legalized to less than a 512-bit vector, then we will use a vXi1
- // compare for vXi32/vXi64 for sure. If we have BWI we will also support
- // vXi16/vXi8.
- MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
- if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
- return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
- }
- }
-
- return VT.changeVectorElementTypeToInteger();
-}
-
-/// Helper for getByValTypeAlignment to determine
-/// the desired ByVal argument alignment.
-static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
- if (MaxAlign == 16)
- return;
- if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
- MaxAlign = Align(16);
- } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Align EltAlign;
- getMaxByValAlign(ATy->getElementType(), EltAlign);
- if (EltAlign > MaxAlign)
- MaxAlign = EltAlign;
- } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
- for (auto *EltTy : STy->elements()) {
- Align EltAlign;
- getMaxByValAlign(EltTy, EltAlign);
- if (EltAlign > MaxAlign)
- MaxAlign = EltAlign;
- if (MaxAlign == 16)
- break;
- }
- }
-}
-
-/// Return the desired alignment for ByVal aggregate
-/// function arguments in the caller parameter area. For X86, aggregates
-/// that contain SSE vectors are placed at 16-byte boundaries while the rest
-/// are at 4-byte boundaries.
-uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
- const DataLayout &DL) const {
- if (Subtarget.is64Bit()) {
- // Max of 8 and alignment of type.
- Align TyAlign = DL.getABITypeAlign(Ty);
- if (TyAlign > 8)
- return TyAlign.value();
- return 8;
- }
-
- Align Alignment(4);
- if (Subtarget.hasSSE1())
- getMaxByValAlign(Ty, Alignment);
- return Alignment.value();
-}
-
-/// It returns EVT::Other if the type should be determined using generic
-/// target-independent logic.
-/// For vector ops we check that the overall size isn't larger than our
-/// preferred vector width.
-EVT X86TargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
- if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
- if (Op.size() >= 16 &&
- (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
- // FIXME: Check if unaligned 64-byte accesses are slow.
- if (Op.size() >= 64 && Subtarget.hasAVX512() &&
- (Subtarget.getPreferVectorWidth() >= 512)) {
- return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
- }
- // FIXME: Check if unaligned 32-byte accesses are slow.
- if (Op.size() >= 32 && Subtarget.hasAVX() &&
- Subtarget.useLight256BitInstructions()) {
- // Although this isn't a well-supported type for AVX1, we'll let
- // legalization and shuffle lowering produce the optimal codegen. If we
- // choose an optimal type with a vector element larger than a byte,
- // getMemsetStores() may create an intermediate splat (using an integer
- // multiply) before we splat as a vector.
- return MVT::v32i8;
- }
- if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
- return MVT::v16i8;
- // TODO: Can SSE1 handle a byte vector?
- // If we have SSE1 registers we should be able to use them.
- if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
- (Subtarget.getPreferVectorWidth() >= 128))
- return MVT::v4f32;
- } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
- Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
- // Do not use f64 to lower memcpy if source is string constant. It's
- // better to use i32 to avoid the loads.
- // Also, do not use f64 to lower memset unless this is a memset of zeros.
- // The gymnastics of splatting a byte value into an XMM register and then
- // only using 8-byte stores (because this is a CPU with slow unaligned
- // 16-byte accesses) makes that a loser.
- return MVT::f64;
- }
- }
- // This is a compromise. If we reach here, unaligned accesses may be slow on
- // this target. However, creating smaller, aligned accesses could be even
- // slower and would certainly be a lot more code.
- if (Subtarget.is64Bit() && Op.size() >= 8)
- return MVT::i64;
- return MVT::i32;
-}
-
-bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
- if (VT == MVT::f32)
- return Subtarget.hasSSE1();
- if (VT == MVT::f64)
- return Subtarget.hasSSE2();
- return true;
-}
-
-static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
- return (8 * Alignment.value()) % SizeInBits == 0;
-}
-
-bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
- if (isBitAligned(Alignment, VT.getSizeInBits()))
- return true;
- switch (VT.getSizeInBits()) {
- default:
- // 8-byte and under are always assumed to be fast.
- return true;
- case 128:
- return !Subtarget.isUnalignedMem16Slow();
- case 256:
- return !Subtarget.isUnalignedMem32Slow();
- // TODO: What about AVX-512 (512-bit) accesses?
- }
-}
-
-bool X86TargetLowering::allowsMisalignedMemoryAccesses(
- EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
- unsigned *Fast) const {
- if (Fast)
- *Fast = isMemoryAccessFast(VT, Alignment);
- // NonTemporal vector memory ops must be aligned.
- if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
- // NT loads can only be vector aligned, so if its less aligned than the
- // minimum vector size (which we can split the vector down to), we might as
- // well use a regular unaligned vector load.
- // We don't have any NT loads pre-SSE41.
- if (!!(Flags & MachineMemOperand::MOLoad))
- return (Alignment < 16 || !Subtarget.hasSSE41());
- return false;
- }
- // Misaligned accesses of any size are always allowed.
- return true;
-}
-
-bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
- const DataLayout &DL, EVT VT,
- unsigned AddrSpace, Align Alignment,
- MachineMemOperand::Flags Flags,
- unsigned *Fast) const {
- if (Fast)
- *Fast = isMemoryAccessFast(VT, Alignment);
- if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
- if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
- /*Fast=*/nullptr))
- return true;
- // NonTemporal vector memory ops are special, and must be aligned.
- if (!isBitAligned(Alignment, VT.getSizeInBits()))
- return false;
- switch (VT.getSizeInBits()) {
- case 128:
- if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
- return true;
- if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
- return true;
- return false;
- case 256:
- if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
- return true;
- if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
- return true;
- return false;
- case 512:
- if (Subtarget.hasAVX512())
- return true;
- return false;
- default:
- return false; // Don't have NonTemporal vector memory ops of this size.
- }
- }
- return true;
-}
-
-/// Return the entry encoding for a jump table in the
-/// current function. The returned value is a member of the
-/// MachineJumpTableInfo::JTEntryKind enum.
-unsigned X86TargetLowering::getJumpTableEncoding() const {
- // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
- // symbol.
- if (isPositionIndependent() && Subtarget.isPICStyleGOT())
- return MachineJumpTableInfo::EK_Custom32;
-
- // Otherwise, use the normal jump table encoding heuristics.
- return TargetLowering::getJumpTableEncoding();
-}
-
-bool X86TargetLowering::splitValueIntoRegisterParts(
- SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
- unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.has_value();
- EVT ValueVT = Val.getValueType();
- if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
- unsigned ValueBits = ValueVT.getSizeInBits();
- unsigned PartBits = PartVT.getSizeInBits();
- Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
- Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
- Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
- Parts[0] = Val;
- return true;
- }
- return false;
-}
-
-SDValue X86TargetLowering::joinRegisterPartsIntoValue(
- SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
- MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.has_value();
- if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
- unsigned ValueBits = ValueVT.getSizeInBits();
- unsigned PartBits = PartVT.getSizeInBits();
- SDValue Val = Parts[0];
-
- Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
- Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
- Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
- return Val;
- }
- return SDValue();
-}
-
-bool X86TargetLowering::useSoftFloat() const {
- return Subtarget.useSoftFloat();
-}
-
-void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
- ArgListTy &Args) const {
-
- // Only relabel X86-32 for C / Stdcall CCs.
- if (Subtarget.is64Bit())
- return;
- if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
- return;
- unsigned ParamRegs = 0;
- if (auto *M = MF->getFunction().getParent())
- ParamRegs = M->getNumberRegisterParameters();
-
- // Mark the first N int arguments as having reg
- for (auto &Arg : Args) {
- Type *T = Arg.Ty;
- if (T->isIntOrPtrTy())
- if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
- unsigned numRegs = 1;
- if (MF->getDataLayout().getTypeAllocSize(T) > 4)
- numRegs = 2;
- if (ParamRegs < numRegs)
- return;
- ParamRegs -= numRegs;
- Arg.IsInReg = true;
- }
- }
-}
-
-const MCExpr *
-X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
- const MachineBasicBlock *MBB,
- unsigned uid,MCContext &Ctx) const{
- assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
- // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
- // entries.
- return MCSymbolRefExpr::create(MBB->getSymbol(),
- MCSymbolRefExpr::VK_GOTOFF, Ctx);
-}
-
-/// Returns relocation base for the given PIC jumptable.
-SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
- SelectionDAG &DAG) const {
- if (!Subtarget.is64Bit())
- // This doesn't have SDLoc associated with it, but is not really the
- // same as a Register.
- return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
- getPointerTy(DAG.getDataLayout()));
- return Table;
-}
-
-/// This returns the relocation base for the given PIC jumptable,
-/// the same as getPICJumpTableRelocBase, but as an MCExpr.
-const MCExpr *X86TargetLowering::
-getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
- MCContext &Ctx) const {
- // X86-64 uses RIP relative addressing based on the jump table label.
- if (Subtarget.isPICStyleRIPRel())
- return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
-
- // Otherwise, the reference is relative to the PIC base.
- return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
-}
-
-std::pair<const TargetRegisterClass *, uint8_t>
-X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
- MVT VT) const {
- const TargetRegisterClass *RRC = nullptr;
- uint8_t Cost = 1;
- switch (VT.SimpleTy) {
- default:
- return TargetLowering::findRepresentativeClass(TRI, VT);
- case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
- RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
- break;
- case MVT::x86mmx:
- RRC = &X86::VR64RegClass;
- break;
- case MVT::f32: case MVT::f64:
- case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
- case MVT::v4f32: case MVT::v2f64:
- case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
- case MVT::v8f32: case MVT::v4f64:
- case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
- case MVT::v16f32: case MVT::v8f64:
- RRC = &X86::VR128XRegClass;
- break;
- }
- return std::make_pair(RRC, Cost);
-}
-
-unsigned X86TargetLowering::getAddressSpace() const {
- if (Subtarget.is64Bit())
- return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
- return 256;
-}
-
-static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
- return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
- (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
-}
-
-static Constant* SegmentOffset(IRBuilderBase &IRB,
- int Offset, unsigned AddressSpace) {
- return ConstantExpr::getIntToPtr(
- ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
- Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
-}
-
-Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
- // glibc, bionic, and Fuchsia have a special slot for the stack guard in
- // tcbhead_t; use it instead of the usual global variable (see
- // sysdeps/{i386,x86_64}/nptl/tls.h)
- if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
- unsigned AddressSpace = getAddressSpace();
-
- // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
- if (Subtarget.isTargetFuchsia())
- return SegmentOffset(IRB, 0x10, AddressSpace);
-
- Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- // Specially, some users may customize the base reg and offset.
- int Offset = M->getStackProtectorGuardOffset();
- // If we don't set -stack-protector-guard-offset value:
- // %fs:0x28, unless we're using a Kernel code model, in which case
- // it's %gs:0x28. gs:0x14 on i386.
- if (Offset == INT_MAX)
- Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
-
- StringRef GuardReg = M->getStackProtectorGuardReg();
- if (GuardReg == "fs")
- AddressSpace = X86AS::FS;
- else if (GuardReg == "gs")
- AddressSpace = X86AS::GS;
-
- // Use symbol guard if user specify.
- StringRef GuardSymb = M->getStackProtectorGuardSymbol();
- if (!GuardSymb.empty()) {
- GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
- if (!GV) {
- Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
- : Type::getInt32Ty(M->getContext());
- GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
- nullptr, GuardSymb, nullptr,
- GlobalValue::NotThreadLocal, AddressSpace);
- if (!Subtarget.isTargetDarwin())
- GV->setDSOLocal(M->getDirectAccessExternalData());
- }
- return GV;
- }
-
- return SegmentOffset(IRB, Offset, AddressSpace);
- }
- return TargetLowering::getIRStackGuard(IRB);
-}
-
-void X86TargetLowering::insertSSPDeclarations(Module &M) const {
- // MSVC CRT provides functionalities for stack protection.
- if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
- Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
- // MSVC CRT has a global variable holding security cookie.
- M.getOrInsertGlobal("__security_cookie",
- Type::getInt8PtrTy(M.getContext()));
-
- // MSVC CRT has a function to validate security cookie.
- FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
- "__security_check_cookie", Type::getVoidTy(M.getContext()),
- Type::getInt8PtrTy(M.getContext()));
- if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
- F->setCallingConv(CallingConv::X86_FastCall);
- F->addParamAttr(0, Attribute::AttrKind::InReg);
- }
- return;
- }
-
- StringRef GuardMode = M.getStackProtectorGuard();
-
- // glibc, bionic, and Fuchsia have a special slot for the stack guard.
- if ((GuardMode == "tls" || GuardMode.empty()) &&
- hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
- return;
- TargetLowering::insertSSPDeclarations(M);
-}
-
-Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
- // MSVC CRT has a global variable holding security cookie.
- if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
- Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
- return M.getGlobalVariable("__security_cookie");
- }
- return TargetLowering::getSDagStackGuard(M);
-}
-
-Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
- // MSVC CRT has a function to validate security cookie.
- if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
- Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
- return M.getFunction("__security_check_cookie");
- }
- return TargetLowering::getSSPStackGuardCheck(M);
-}
-
-Value *
-X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
- if (Subtarget.getTargetTriple().isOSContiki())
- return getDefaultSafeStackPointerLocation(IRB, false);
-
- // Android provides a fixed TLS slot for the SafeStack pointer. See the
- // definition of TLS_SLOT_SAFESTACK in
- // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
- if (Subtarget.isTargetAndroid()) {
- // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
- // %gs:0x24 on i386
- int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
- return SegmentOffset(IRB, Offset, getAddressSpace());
- }
-
- // Fuchsia is similar.
- if (Subtarget.isTargetFuchsia()) {
- // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
- return SegmentOffset(IRB, 0x18, getAddressSpace());
- }
-
- return TargetLowering::getSafeStackPointerLocation(IRB);
-}
-
-//===----------------------------------------------------------------------===//
-// Return Value Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-bool X86TargetLowering::CanLowerReturn(
- CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
- return CCInfo.CheckReturn(Outs, RetCC_X86);
-}
-
-const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
- static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
- return ScratchRegs;
-}
-
-ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
- // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
- // tests at the moment, which is not what we expected.
- static const MCPhysReg RCRegs[] = {X86::MXCSR};
- return RCRegs;
-}
-
-/// Lowers masks values (v*i1) to the local register values
-/// \returns DAG node after lowering to register type
-static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
- const SDLoc &DL, SelectionDAG &DAG) {
- EVT ValVT = ValArg.getValueType();
-
- if (ValVT == MVT::v1i1)
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
- DAG.getIntPtrConstant(0, DL));
-
- if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
- (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
- // Two stage lowering might be required
- // bitcast: v8i1 -> i8 / v16i1 -> i16
- // anyextend: i8 -> i32 / i16 -> i32
- EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
- SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
- if (ValLoc == MVT::i32)
- ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
- return ValToCopy;
- }
-
- if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
- (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
- // One stage lowering is required
- // bitcast: v32i1 -> i32 / v64i1 -> i64
- return DAG.getBitcast(ValLoc, ValArg);
- }
-
- return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
-}
-
-/// Breaks v64i1 value into two registers and adds the new node to the DAG
-static void Passv64i1ArgInRegs(
- const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
- SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
- CCValAssign &NextVA, const X86Subtarget &Subtarget) {
- assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
- assert(Subtarget.is32Bit() && "Expecting 32 bit target");
- assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
- assert(VA.isRegLoc() && NextVA.isRegLoc() &&
- "The value should reside in two registers");
-
- // Before splitting the value we cast it to i64
- Arg = DAG.getBitcast(MVT::i64, Arg);
-
- // Splitting the value into two i32 types
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
-
- // Attach the two i32 types into corresponding registers
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
- RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
-}
-
-SDValue
-X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SDLoc &dl, SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-
- // In some cases we need to disable registers from the default CSR list.
- // For example, when they are used as return registers (preserve_* and X86's
- // regcall) or for argument passing (X86's regcall).
- bool ShouldDisableCalleeSavedRegister =
- shouldDisableRetRegFromCSR(CallConv) ||
- MF.getFunction().hasFnAttribute("no_caller_saved_registers");
-
- if (CallConv == CallingConv::X86_INTR && !Outs.empty())
- report_fatal_error("X86 interrupts may not return any value");
-
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
- CCInfo.AnalyzeReturn(Outs, RetCC_X86);
-
- SmallVector<std::pair<Register, SDValue>, 4> RetVals;
- for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
- ++I, ++OutsIndex) {
- CCValAssign &VA = RVLocs[I];
- assert(VA.isRegLoc() && "Can only return in registers!");
-
- // Add the register to the CalleeSaveDisableRegs list.
- if (ShouldDisableCalleeSavedRegister)
- MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
-
- SDValue ValToCopy = OutVals[OutsIndex];
- EVT ValVT = ValToCopy.getValueType();
-
- // Promote values to the appropriate types.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
- else if (VA.getLocInfo() == CCValAssign::AExt) {
- if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
- ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
- else
- ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
- }
- else if (VA.getLocInfo() == CCValAssign::BCvt)
- ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
-
- assert(VA.getLocInfo() != CCValAssign::FPExt &&
- "Unexpected FP-extend for return value.");
-
- // Report an error if we have attempted to return a value via an XMM
- // register and SSE was disabled.
- if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
- errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
- VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
- } else if (!Subtarget.hasSSE2() &&
- X86::FR64XRegClass.contains(VA.getLocReg()) &&
- ValVT == MVT::f64) {
- // When returning a double via an XMM register, report an error if SSE2 is
- // not enabled.
- errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
- VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
- }
-
- // Returns in ST0/ST1 are handled specially: these are pushed as operands to
- // the RET instruction and handled by the FP Stackifier.
- if (VA.getLocReg() == X86::FP0 ||
- VA.getLocReg() == X86::FP1) {
- // If this is a copy from an xmm register to ST(0), use an FPExtend to
- // change the value to the FP stack register class.
- if (isScalarFPTypeInSSEReg(VA.getValVT()))
- ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
- RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
- // Don't emit a copytoreg.
- continue;
- }
-
- // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
- // which is returned in RAX / RDX.
- if (Subtarget.is64Bit()) {
- if (ValVT == MVT::x86mmx) {
- if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
- ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
- ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
- ValToCopy);
- // If we don't have SSE2 available, convert to v4f32 so the generated
- // register is legal.
- if (!Subtarget.hasSSE2())
- ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
- }
- }
- }
-
- if (VA.needsCustom()) {
- assert(VA.getValVT() == MVT::v64i1 &&
- "Currently the only custom case is when we split v64i1 to 2 regs");
-
- Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
- Subtarget);
-
- // Add the second register to the CalleeSaveDisableRegs list.
- if (ShouldDisableCalleeSavedRegister)
- MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
- } else {
- RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
- }
- }
-
- SDValue Glue;
- SmallVector<SDValue, 6> RetOps;
- RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
- // Operand #1 = Bytes To Pop
- RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
- MVT::i32));
-
- // Copy the result values into the output registers.
- for (auto &RetVal : RetVals) {
- if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
- RetOps.push_back(RetVal.second);
- continue; // Don't emit a copytoreg.
- }
-
- Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
- Glue = Chain.getValue(1);
- RetOps.push_back(
- DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
- }
-
- // Swift calling convention does not require we copy the sret argument
- // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
-
- // All x86 ABIs require that for returning structs by value we copy
- // the sret argument into %rax/%eax (depending on ABI) for the return.
- // We saved the argument into a virtual register in the entry block,
- // so now we copy the value out and into %rax/%eax.
- //
- // Checking Function.hasStructRetAttr() here is insufficient because the IR
- // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
- // false, then an sret argument may be implicitly inserted in the SelDAG. In
- // either case FuncInfo->setSRetReturnReg() will have been called.
- if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
- // When we have both sret and another return value, we should use the
- // original Chain stored in RetOps[0], instead of the current Chain updated
- // in the above loop. If we only have sret, RetOps[0] equals to Chain.
-
- // For the case of sret and another return value, we have
- // Chain_0 at the function entry
- // Chain_1 = getCopyToReg(Chain_0) in the above loop
- // If we use Chain_1 in getCopyFromReg, we will have
- // Val = getCopyFromReg(Chain_1)
- // Chain_2 = getCopyToReg(Chain_1, Val) from below
-
- // getCopyToReg(Chain_0) will be glued together with
- // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
- // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
- // Data dependency from Unit B to Unit A due to usage of Val in
- // getCopyToReg(Chain_1, Val)
- // Chain dependency from Unit A to Unit B
-
- // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
- SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
- getPointerTy(MF.getDataLayout()));
-
- Register RetValReg
- = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
- X86::RAX : X86::EAX;
- Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
- Glue = Chain.getValue(1);
-
- // RAX/EAX now acts like a return value.
- RetOps.push_back(
- DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
-
- // Add the returned register to the CalleeSaveDisableRegs list. Don't do
- // this however for preserve_most/preserve_all to minimize the number of
- // callee-saved registers for these CCs.
- if (ShouldDisableCalleeSavedRegister &&
- CallConv != CallingConv::PreserveAll &&
- CallConv != CallingConv::PreserveMost)
- MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
- }
-
- const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
- const MCPhysReg *I =
- TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
- if (I) {
- for (; *I; ++I) {
- if (X86::GR64RegClass.contains(*I))
- RetOps.push_back(DAG.getRegister(*I, MVT::i64));
- else
- llvm_unreachable("Unexpected register class in CSRsViaCopy!");
- }
- }
-
- RetOps[0] = Chain; // Update chain.
-
- // Add the glue if we have it.
- if (Glue.getNode())
- RetOps.push_back(Glue);
-
- X86ISD::NodeType opcode = X86ISD::RET_GLUE;
- if (CallConv == CallingConv::X86_INTR)
- opcode = X86ISD::IRET;
- return DAG.getNode(opcode, dl, MVT::Other, RetOps);
-}
-
-bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
- if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
- return false;
-
- SDValue TCChain = Chain;
- SDNode *Copy = *N->use_begin();
- if (Copy->getOpcode() == ISD::CopyToReg) {
- // If the copy has a glue operand, we conservatively assume it isn't safe to
- // perform a tail call.
- if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
- return false;
- TCChain = Copy->getOperand(0);
- } else if (Copy->getOpcode() != ISD::FP_EXTEND)
- return false;
-
- bool HasRet = false;
- for (const SDNode *U : Copy->uses()) {
- if (U->getOpcode() != X86ISD::RET_GLUE)
- return false;
- // If we are returning more than one value, we can definitely
- // not make a tail call see PR19530
- if (U->getNumOperands() > 4)
- return false;
- if (U->getNumOperands() == 4 &&
- U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
- return false;
- HasRet = true;
- }
-
- if (!HasRet)
- return false;
-
- Chain = TCChain;
- return true;
-}
-
-EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
- ISD::NodeType ExtendKind) const {
- MVT ReturnMVT = MVT::i32;
-
- bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
- if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
- // The ABI does not require i1, i8 or i16 to be extended.
- //
- // On Darwin, there is code in the wild relying on Clang's old behaviour of
- // always extending i8/i16 return values, so keep doing that for now.
- // (PR26665).
- ReturnMVT = MVT::i8;
- }
-
- EVT MinVT = getRegisterType(Context, ReturnMVT);
- return VT.bitsLT(MinVT) ? MinVT : VT;
-}
-
-/// Reads two 32 bit registers and creates a 64 bit mask value.
-/// \param VA The current 32 bit value that need to be assigned.
-/// \param NextVA The next 32 bit value that need to be assigned.
-/// \param Root The parent DAG node.
-/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
-/// glue purposes. In the case the DAG is already using
-/// physical register instead of virtual, we should glue
-/// our new SDValue to InGlue SDvalue.
-/// \return a new SDvalue of size 64bit.
-static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
- SDValue &Root, SelectionDAG &DAG,
- const SDLoc &DL, const X86Subtarget &Subtarget,
- SDValue *InGlue = nullptr) {
- assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
- assert(Subtarget.is32Bit() && "Expecting 32 bit target");
- assert(VA.getValVT() == MVT::v64i1 &&
- "Expecting first location of 64 bit width type");
- assert(NextVA.getValVT() == VA.getValVT() &&
- "The locations should have the same type");
- assert(VA.isRegLoc() && NextVA.isRegLoc() &&
- "The values should reside in two registers");
-
- SDValue Lo, Hi;
- SDValue ArgValueLo, ArgValueHi;
-
- MachineFunction &MF = DAG.getMachineFunction();
- const TargetRegisterClass *RC = &X86::GR32RegClass;
-
- // Read a 32 bit value from the registers.
- if (nullptr == InGlue) {
- // When no physical register is present,
- // create an intermediate virtual register.
- Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
- ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
- Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
- ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
- } else {
- // When a physical register is available read the value from it and glue
- // the reads together.
- ArgValueLo =
- DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
- *InGlue = ArgValueLo.getValue(2);
- ArgValueHi =
- DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
- *InGlue = ArgValueHi.getValue(2);
- }
-
- // Convert the i32 type into v32i1 type.
- Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
-
- // Convert the i32 type into v32i1 type.
- Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
-
- // Concatenate the two values together.
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
-}
-
-/// The function will lower a register of various sizes (8/16/32/64)
-/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
-/// \returns a DAG node contains the operand after lowering to mask type.
-static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
- const EVT &ValLoc, const SDLoc &DL,
- SelectionDAG &DAG) {
- SDValue ValReturned = ValArg;
-
- if (ValVT == MVT::v1i1)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
-
- if (ValVT == MVT::v64i1) {
- // In 32 bit machine, this case is handled by getv64i1Argument
- assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
- // In 64 bit machine, There is no need to truncate the value only bitcast
- } else {
- MVT MaskLenVT;
- switch (ValVT.getSimpleVT().SimpleTy) {
- case MVT::v8i1:
- MaskLenVT = MVT::i8;
- break;
- case MVT::v16i1:
- MaskLenVT = MVT::i16;
- break;
- case MVT::v32i1:
- MaskLenVT = MVT::i32;
- break;
- default:
- llvm_unreachable("Expecting a vector of i1 types");
- }
-
- ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
- }
- return DAG.getBitcast(ValVT, ValReturned);
-}
-
-/// Lower the result values of a call into the
-/// appropriate copies out of appropriate physical registers.
-///
-SDValue X86TargetLowering::LowerCallResult(
- SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- uint32_t *RegMask) const {
-
- const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- // Assign locations to each value returned by this call.
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
-
- // Copy all of the result registers out of their specified physreg.
- for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
- ++I, ++InsIndex) {
- CCValAssign &VA = RVLocs[I];
- EVT CopyVT = VA.getLocVT();
-
- // In some calling conventions we need to remove the used registers
- // from the register mask.
- if (RegMask) {
- for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
- RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
- }
-
- // Report an error if there was an attempt to return FP values via XMM
- // registers.
- if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
- errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
- if (VA.getLocReg() == X86::XMM1)
- VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
- else
- VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
- } else if (!Subtarget.hasSSE2() &&
- X86::FR64XRegClass.contains(VA.getLocReg()) &&
- CopyVT == MVT::f64) {
- errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
- if (VA.getLocReg() == X86::XMM1)
- VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
- else
- VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
- }
-
- // If we prefer to use the value in xmm registers, copy it out as f80 and
- // use a truncate to move it from fp stack reg to xmm reg.
- bool RoundAfterCopy = false;
- if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
- isScalarFPTypeInSSEReg(VA.getValVT())) {
- if (!Subtarget.hasX87())
- report_fatal_error("X87 register return with X87 disabled");
- CopyVT = MVT::f80;
- RoundAfterCopy = (CopyVT != VA.getLocVT());
- }
-
- SDValue Val;
- if (VA.needsCustom()) {
- assert(VA.getValVT() == MVT::v64i1 &&
- "Currently the only custom case is when we split v64i1 to 2 regs");
- Val =
- getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
- .getValue(1);
- Val = Chain.getValue(0);
- InGlue = Chain.getValue(2);
- }
-
- if (RoundAfterCopy)
- Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
- // This truncation won't change the value.
- DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
-
- if (VA.isExtInLoc()) {
- if (VA.getValVT().isVector() &&
- VA.getValVT().getScalarType() == MVT::i1 &&
- ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
- (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
- // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
- Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
- } else
- Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
- }
-
- if (VA.getLocInfo() == CCValAssign::BCvt)
- Val = DAG.getBitcast(VA.getValVT(), Val);
-
- InVals.push_back(Val);
- }
-
- return Chain;
-}
-
-//===----------------------------------------------------------------------===//
-// C & StdCall & Fast Calling Convention implementation
-//===----------------------------------------------------------------------===//
-// StdCall calling convention seems to be standard for many Windows' API
-// routines and around. It differs from C calling convention just a little:
-// callee should clean up the stack, not caller. Symbols should be also
-// decorated in some fancy way :) It doesn't support any vector arguments.
-// For info on fast calling convention see Fast Calling Convention (tail call)
-// implementation LowerX86_32FastCCCallTo.
-
-/// Determines whether Args, either a set of outgoing arguments to a call, or a
-/// set of incoming args of a call, contains an sret pointer that the callee
-/// pops
-template <typename T>
-static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
- const X86Subtarget &Subtarget) {
- // Not C++20 (yet), so no concepts available.
- static_assert(std::is_same_v<T, ISD::OutputArg> ||
- std::is_same_v<T, ISD::InputArg>,
- "requires ISD::OutputArg or ISD::InputArg");
-
- // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
- // for most compilations.
- if (!Subtarget.is32Bit())
- return false;
-
- if (Args.empty())
- return false;
-
- // Most calls do not have an sret argument, check the arg next.
- const ISD::ArgFlagsTy &Flags = Args[0].Flags;
- if (!Flags.isSRet() || Flags.isInReg())
- return false;
-
- // The MSVCabi does not pop the sret.
- if (Subtarget.getTargetTriple().isOSMSVCRT())
- return false;
-
- // MCUs don't pop the sret
- if (Subtarget.isTargetMCU())
- return false;
-
- // Callee pops argument
- return true;
-}
-
-/// Make a copy of an aggregate at address specified by "Src" to address
-/// "Dst" with size and alignment information specified by the specific
-/// parameter attribute. The copy will be passed as a byval function parameter.
-static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
- SDValue Chain, ISD::ArgFlagsTy Flags,
- SelectionDAG &DAG, const SDLoc &dl) {
- SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
-
- return DAG.getMemcpy(
- Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
- /*isVolatile*/ false, /*AlwaysInline=*/true,
- /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
-}
-
-/// Return true if the calling convention is one that we can guarantee TCO for.
-static bool canGuaranteeTCO(CallingConv::ID CC) {
- return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
- CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
- CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
-}
-
-/// Return true if we might ever do TCO for calls with this calling convention.
-static bool mayTailCallThisCC(CallingConv::ID CC) {
- switch (CC) {
- // C calling conventions:
- case CallingConv::C:
- case CallingConv::Win64:
- case CallingConv::X86_64_SysV:
- // Callee pop conventions:
- case CallingConv::X86_ThisCall:
- case CallingConv::X86_StdCall:
- case CallingConv::X86_VectorCall:
- case CallingConv::X86_FastCall:
- // Swift:
- case CallingConv::Swift:
- return true;
- default:
- return canGuaranteeTCO(CC);
- }
-}
-
-/// Return true if the function is being made into a tailcall target by
-/// changing its ABI.
-static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
- return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
- CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
-}
-
-bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
- if (!CI->isTailCall())
- return false;
-
- CallingConv::ID CalleeCC = CI->getCallingConv();
- if (!mayTailCallThisCC(CalleeCC))
- return false;
-
- return true;
-}
-
-SDValue
-X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- const SDLoc &dl, SelectionDAG &DAG,
- const CCValAssign &VA,
- MachineFrameInfo &MFI, unsigned i) const {
- // Create the nodes corresponding to a load from this parameter slot.
- ISD::ArgFlagsTy Flags = Ins[i].Flags;
- bool AlwaysUseMutable = shouldGuaranteeTCO(
- CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
- bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
- EVT ValVT;
- MVT PtrVT = getPointerTy(DAG.getDataLayout());
-
- // If value is passed by pointer we have address passed instead of the value
- // itself. No need to extend if the mask value and location share the same
- // absolute size.
- bool ExtendedInMem =
- VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
- VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
-
- if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
- ValVT = VA.getLocVT();
- else
- ValVT = VA.getValVT();
-
- // FIXME: For now, all byval parameter objects are marked mutable. This can be
- // changed with more analysis.
- // In case of tail call optimization mark all arguments mutable. Since they
- // could be overwritten by lowering of arguments in case of a tail call.
- if (Flags.isByVal()) {
- unsigned Bytes = Flags.getByValSize();
- if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
-
- // FIXME: For now, all byval parameter objects are marked as aliasing. This
- // can be improved with deeper analysis.
- int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
- /*isAliased=*/true);
- return DAG.getFrameIndex(FI, PtrVT);
- }
-
- EVT ArgVT = Ins[i].ArgVT;
-
- // If this is a vector that has been split into multiple parts, don't elide
- // the copy. The layout on the stack may not match the packed in-memory
- // layout.
- bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
-
- // This is an argument in memory. We might be able to perform copy elision.
- // If the argument is passed directly in memory without any extension, then we
- // can perform copy elision. Large vector types, for example, may be passed
- // indirectly by pointer.
- if (Flags.isCopyElisionCandidate() &&
- VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
- !ScalarizedVector) {
- SDValue PartAddr;
- if (Ins[i].PartOffset == 0) {
- // If this is a one-part value or the first part of a multi-part value,
- // create a stack object for the entire argument value type and return a
- // load from our portion of it. This assumes that if the first part of an
- // argument is in memory, the rest will also be in memory.
- int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
- /*IsImmutable=*/false);
- PartAddr = DAG.getFrameIndex(FI, PtrVT);
- return DAG.getLoad(
- ValVT, dl, Chain, PartAddr,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- }
-
- // This is not the first piece of an argument in memory. See if there is
- // already a fixed stack object including this offset. If so, assume it
- // was created by the PartOffset == 0 branch above and create a load from
- // the appropriate offset into it.
- int64_t PartBegin = VA.getLocMemOffset();
- int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
- int FI = MFI.getObjectIndexBegin();
- for (; MFI.isFixedObjectIndex(FI); ++FI) {
- int64_t ObjBegin = MFI.getObjectOffset(FI);
- int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
- if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
- break;
- }
- if (MFI.isFixedObjectIndex(FI)) {
- SDValue Addr =
- DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
- DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
- return DAG.getLoad(ValVT, dl, Chain, Addr,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FI, Ins[i].PartOffset));
- }
- }
-
- int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
- VA.getLocMemOffset(), isImmutable);
-
- // Set SExt or ZExt flag.
- if (VA.getLocInfo() == CCValAssign::ZExt) {
- MFI.setObjectZExt(FI, true);
- } else if (VA.getLocInfo() == CCValAssign::SExt) {
- MFI.setObjectSExt(FI, true);
- }
-
- MaybeAlign Alignment;
- if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
- ValVT != MVT::f80)
- Alignment = MaybeAlign(4);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- SDValue Val = DAG.getLoad(
- ValVT, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
- Alignment);
- return ExtendedInMem
- ? (VA.getValVT().isVector()
- ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
- : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
- : Val;
-}
-
-// FIXME: Get this from tablegen.
-static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
- const X86Subtarget &Subtarget) {
- assert(Subtarget.is64Bit());
-
- if (Subtarget.isCallingConvWin64(CallConv)) {
- static const MCPhysReg GPR64ArgRegsWin64[] = {
- X86::RCX, X86::RDX, X86::R8, X86::R9
- };
- return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
- }
-
- static const MCPhysReg GPR64ArgRegs64Bit[] = {
- X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
- };
- return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
-}
-
-// FIXME: Get this from tablegen.
-static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
- CallingConv::ID CallConv,
- const X86Subtarget &Subtarget) {
- assert(Subtarget.is64Bit());
- if (Subtarget.isCallingConvWin64(CallConv)) {
- // The XMM registers which might contain var arg parameters are shadowed
- // in their paired GPR. So we only need to save the GPR to their home
- // slots.
- // TODO: __vectorcall will change this.
- return std::nullopt;
- }
-
- bool isSoftFloat = Subtarget.useSoftFloat();
- if (isSoftFloat || !Subtarget.hasSSE1())
- // Kernel mode asks for SSE to be disabled, so there are no XMM argument
- // registers.
- return std::nullopt;
-
- static const MCPhysReg XMMArgRegs64Bit[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
- X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
- };
- return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
-}
-
-#ifndef NDEBUG
-static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
- return llvm::is_sorted(
- ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
- return A.getValNo() < B.getValNo();
- });
-}
-#endif
-
-namespace {
-/// This is a helper class for lowering variable arguments parameters.
-class VarArgsLoweringHelper {
-public:
- VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
- SelectionDAG &DAG, const X86Subtarget &Subtarget,
- CallingConv::ID CallConv, CCState &CCInfo)
- : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
- TheMachineFunction(DAG.getMachineFunction()),
- TheFunction(TheMachineFunction.getFunction()),
- FrameInfo(TheMachineFunction.getFrameInfo()),
- FrameLowering(*Subtarget.getFrameLowering()),
- TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
- CCInfo(CCInfo) {}
-
- // Lower variable arguments parameters.
- void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
-
-private:
- void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
-
- void forwardMustTailParameters(SDValue &Chain);
-
- bool is64Bit() const { return Subtarget.is64Bit(); }
- bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
-
- X86MachineFunctionInfo *FuncInfo;
- const SDLoc &DL;
- SelectionDAG &DAG;
- const X86Subtarget &Subtarget;
- MachineFunction &TheMachineFunction;
- const Function &TheFunction;
- MachineFrameInfo &FrameInfo;
- const TargetFrameLowering &FrameLowering;
- const TargetLowering &TargLowering;
- CallingConv::ID CallConv;
- CCState &CCInfo;
-};
-} // namespace
-
-void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
- SDValue &Chain, unsigned StackSize) {
- // If the function takes variable number of arguments, make a frame index for
- // the start of the first vararg value... for expansion of llvm.va_start. We
- // can skip this if there are no va_start calls.
- if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
- CallConv != CallingConv::X86_ThisCall)) {
- FuncInfo->setVarArgsFrameIndex(
- FrameInfo.CreateFixedObject(1, StackSize, true));
- }
-
- // 64-bit calling conventions support varargs and register parameters, so we
- // have to do extra work to spill them in the prologue.
- if (is64Bit()) {
- // Find the first unallocated argument registers.
- ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
- ArrayRef<MCPhysReg> ArgXMMs =
- get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
- unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
-
- assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
- "SSE register cannot be used when SSE is disabled!");
-
- if (isWin64()) {
- // Get to the caller-allocated home save location. Add 8 to account
- // for the return address.
- int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
- FuncInfo->setRegSaveFrameIndex(
- FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
- // Fixup to set vararg frame on shadow area (4 x i64).
- if (NumIntRegs < 4)
- FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
- } else {
- // For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so
- // they may be loaded by dereferencing the result of va_next.
- FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
- FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
- FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
- ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
- }
-
- SmallVector<SDValue, 6>
- LiveGPRs; // list of SDValue for GPR registers keeping live input value
- SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
- // keeping live input value
- SDValue ALVal; // if applicable keeps SDValue for %al register
-
- // Gather all the live in physical registers.
- for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
- Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
- LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
- }
- const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
- if (!AvailableXmms.empty()) {
- Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
- ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
- for (MCPhysReg Reg : AvailableXmms) {
- // FastRegisterAllocator spills virtual registers at basic
- // block boundary. That leads to usages of xmm registers
- // outside of check for %al. Pass physical registers to
- // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
- TheMachineFunction.getRegInfo().addLiveIn(Reg);
- LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
- }
- }
-
- // Store the integer parameter registers.
- SmallVector<SDValue, 8> MemOps;
- SDValue RSFIN =
- DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
- TargLowering.getPointerTy(DAG.getDataLayout()));
- unsigned Offset = FuncInfo->getVarArgsGPOffset();
- for (SDValue Val : LiveGPRs) {
- SDValue FIN = DAG.getNode(ISD::ADD, DL,
- TargLowering.getPointerTy(DAG.getDataLayout()),
- RSFIN, DAG.getIntPtrConstant(Offset, DL));
- SDValue Store =
- DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(),
- FuncInfo->getRegSaveFrameIndex(), Offset));
- MemOps.push_back(Store);
- Offset += 8;
- }
-
- // Now store the XMM (fp + vector) parameter registers.
- if (!LiveXMMRegs.empty()) {
- SmallVector<SDValue, 12> SaveXMMOps;
- SaveXMMOps.push_back(Chain);
- SaveXMMOps.push_back(ALVal);
- SaveXMMOps.push_back(RSFIN);
- SaveXMMOps.push_back(
- DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
- llvm::append_range(SaveXMMOps, LiveXMMRegs);
- MachineMemOperand *StoreMMO =
- DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
- Offset),
- MachineMemOperand::MOStore, 128, Align(16));
- MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
- DL, DAG.getVTList(MVT::Other),
- SaveXMMOps, MVT::i8, StoreMMO));
- }
-
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
- }
-}
-
-void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
- // Find the largest legal vector type.
- MVT VecVT = MVT::Other;
- // FIXME: Only some x86_32 calling conventions support AVX512.
- if (Subtarget.useAVX512Regs() &&
- (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
- CallConv == CallingConv::Intel_OCL_BI)))
- VecVT = MVT::v16f32;
- else if (Subtarget.hasAVX())
- VecVT = MVT::v8f32;
- else if (Subtarget.hasSSE2())
- VecVT = MVT::v4f32;
-
- // We forward some GPRs and some vector types.
- SmallVector<MVT, 2> RegParmTypes;
- MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
- RegParmTypes.push_back(IntVT);
- if (VecVT != MVT::Other)
- RegParmTypes.push_back(VecVT);
-
- // Compute the set of forwarded registers. The rest are scratch.
- SmallVectorImpl<ForwardedRegister> &Forwards =
- FuncInfo->getForwardedMustTailRegParms();
- CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
-
- // Forward AL for SysV x86_64 targets, since it is used for varargs.
- if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
- Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
- Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
- }
-
- // Copy all forwards from physical to virtual registers.
- for (ForwardedRegister &FR : Forwards) {
- // FIXME: Can we use a less constrained schedule?
- SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
- FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
- TargLowering.getRegClassFor(FR.VT));
- Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
- }
-}
-
-void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
- unsigned StackSize) {
- // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
- // If necessary, it would be set into the correct value later.
- FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
- FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
-
- if (FrameInfo.hasVAStart())
- createVarArgAreaAndStoreRegisters(Chain, StackSize);
-
- if (FrameInfo.hasMustTailInVarArgFunc())
- forwardMustTailParameters(Chain);
-}
-
-SDValue X86TargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- MachineFunction &MF = DAG.getMachineFunction();
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-
- const Function &F = MF.getFunction();
- if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
- F.getName() == "main")
- FuncInfo->setForceFramePointer(true);
-
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool Is64Bit = Subtarget.is64Bit();
- bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
-
- assert(
- !(IsVarArg && canGuaranteeTCO(CallConv)) &&
- "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
-
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
-
- // Allocate shadow area for Win64.
- if (IsWin64)
- CCInfo.AllocateStack(32, Align(8));
-
- CCInfo.AnalyzeArguments(Ins, CC_X86);
-
- // In vectorcall calling convention a second pass is required for the HVA
- // types.
- if (CallingConv::X86_VectorCall == CallConv) {
- CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
- }
-
- // The next loop assumes that the locations are in the same order of the
- // input arguments.
- assert(isSortedByValueNo(ArgLocs) &&
- "Argument Location list must be sorted before lowering");
-
- SDValue ArgValue;
- for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
- ++I, ++InsIndex) {
- assert(InsIndex < Ins.size() && "Invalid Ins index");
- CCValAssign &VA = ArgLocs[I];
-
- if (VA.isRegLoc()) {
- EVT RegVT = VA.getLocVT();
- if (VA.needsCustom()) {
- assert(
- VA.getValVT() == MVT::v64i1 &&
- "Currently the only custom case is when we split v64i1 to 2 regs");
-
- // v64i1 values, in regcall calling convention, that are
- // compiled to 32 bit arch, are split up into two registers.
- ArgValue =
- getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
- } else {
- const TargetRegisterClass *RC;
- if (RegVT == MVT::i8)
- RC = &X86::GR8RegClass;
- else if (RegVT == MVT::i16)
- RC = &X86::GR16RegClass;
- else if (RegVT == MVT::i32)
- RC = &X86::GR32RegClass;
- else if (Is64Bit && RegVT == MVT::i64)
- RC = &X86::GR64RegClass;
- else if (RegVT == MVT::f16)
- RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
- else if (RegVT == MVT::f32)
- RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
- else if (RegVT == MVT::f64)
- RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
- else if (RegVT == MVT::f80)
- RC = &X86::RFP80RegClass;
- else if (RegVT == MVT::f128)
- RC = &X86::VR128RegClass;
- else if (RegVT.is512BitVector())
- RC = &X86::VR512RegClass;
- else if (RegVT.is256BitVector())
- RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
- else if (RegVT.is128BitVector())
- RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
- else if (RegVT == MVT::x86mmx)
- RC = &X86::VR64RegClass;
- else if (RegVT == MVT::v1i1)
- RC = &X86::VK1RegClass;
- else if (RegVT == MVT::v8i1)
- RC = &X86::VK8RegClass;
- else if (RegVT == MVT::v16i1)
- RC = &X86::VK16RegClass;
- else if (RegVT == MVT::v32i1)
- RC = &X86::VK32RegClass;
- else if (RegVT == MVT::v64i1)
- RC = &X86::VK64RegClass;
- else
- llvm_unreachable("Unknown argument type!");
-
- Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
- ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
- }
-
- // If this is an 8 or 16-bit value, it is really passed promoted to 32
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::BCvt)
- ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
-
- if (VA.isExtInLoc()) {
- // Handle MMX values passed in XMM regs.
- if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
- ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
- else if (VA.getValVT().isVector() &&
- VA.getValVT().getScalarType() == MVT::i1 &&
- ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
- (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
- // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
- ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
- } else
- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
- }
- } else {
- assert(VA.isMemLoc());
- ArgValue =
- LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
- }
-
- // If value is passed via pointer - do a load.
- if (VA.getLocInfo() == CCValAssign::Indirect &&
- !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
- ArgValue =
- DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
- }
-
- InVals.push_back(ArgValue);
- }
-
- for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
- if (Ins[I].Flags.isSwiftAsync()) {
- auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
- if (Subtarget.is64Bit())
- X86FI->setHasSwiftAsyncContext(true);
- else {
- int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
- X86FI->setSwiftAsyncContextFrameIdx(FI);
- SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
- DAG.getFrameIndex(FI, MVT::i32),
- MachinePointerInfo::getFixedStack(MF, FI));
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
- }
- }
-
- // Swift calling convention does not require we copy the sret argument
- // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
- if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
- continue;
-
- // All x86 ABIs require that for returning structs by value we copy the
- // sret argument into %rax/%eax (depending on ABI) for the return. Save
- // the argument into a virtual register so that we can access it from the
- // return points.
- if (Ins[I].Flags.isSRet()) {
- assert(!FuncInfo->getSRetReturnReg() &&
- "SRet return has already been set");
- MVT PtrTy = getPointerTy(DAG.getDataLayout());
- Register Reg =
- MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
- FuncInfo->setSRetReturnReg(Reg);
- SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
- break;
- }
- }
-
- unsigned StackSize = CCInfo.getStackSize();
- // Align stack specially for tail calls.
- if (shouldGuaranteeTCO(CallConv,
- MF.getTarget().Options.GuaranteedTailCallOpt))
- StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
-
- if (IsVarArg)
- VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
- .lowerVarArgsParameters(Chain, StackSize);
-
- // Some CCs need callee pop.
- if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
- MF.getTarget().Options.GuaranteedTailCallOpt)) {
- FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
- } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
- // X86 interrupts must pop the error code (and the alignment padding) if
- // present.
- FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
- } else {
- FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
- // If this is an sret function, the return should pop the hidden pointer.
- if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
- FuncInfo->setBytesToPopOnReturn(4);
- }
-
- if (!Is64Bit) {
- // RegSaveFrameIndex is X86-64 only.
- FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
- }
-
- FuncInfo->setArgumentStackSize(StackSize);
-
- if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
- EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
- if (Personality == EHPersonality::CoreCLR) {
- assert(Is64Bit);
- // TODO: Add a mechanism to frame lowering that will allow us to indicate
- // that we'd prefer this slot be allocated towards the bottom of the frame
- // (i.e. near the stack pointer after allocating the frame). Every
- // funclet needs a copy of this slot in its (mostly empty) frame, and the
- // offset from the bottom of this and each funclet's frame must be the
- // same, so the size of funclets' (mostly empty) frames is dictated by
- // how far this slot is from the bottom (since they allocate just enough
- // space to accommodate holding this slot at the correct offset).
- int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
- EHInfo->PSPSymFrameIdx = PSPSymFI;
- }
- }
-
- if (shouldDisableArgRegFromCSR(CallConv) ||
- F.hasFnAttribute("no_caller_saved_registers")) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- for (std::pair<Register, Register> Pair : MRI.liveins())
- MRI.disableCalleeSavedRegister(Pair.first);
- }
-
- return Chain;
-}
-
-SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
- SDValue Arg, const SDLoc &dl,
- SelectionDAG &DAG,
- const CCValAssign &VA,
- ISD::ArgFlagsTy Flags,
- bool isByVal) const {
- unsigned LocMemOffset = VA.getLocMemOffset();
- SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
- StackPtr, PtrOff);
- if (isByVal)
- return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
-
- MaybeAlign Alignment;
- if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
- Arg.getSimpleValueType() != MVT::f80)
- Alignment = MaybeAlign(4);
- return DAG.getStore(
- Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
- Alignment);
-}
-
-/// Emit a load of return address if tail call
-/// optimization is performed and it is required.
-SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
- SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
- bool Is64Bit, int FPDiff, const SDLoc &dl) const {
- // Adjust the Return address stack slot.
- EVT VT = getPointerTy(DAG.getDataLayout());
- OutRetAddr = getReturnAddressFrameIndex(DAG);
-
- // Load the "old" Return address.
- OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
- return SDValue(OutRetAddr.getNode(), 1);
-}
-
-/// Emit a store of the return address if tail call
-/// optimization is performed and it is required (FPDiff!=0).
-static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
- SDValue Chain, SDValue RetAddrFrIdx,
- EVT PtrVT, unsigned SlotSize,
- int FPDiff, const SDLoc &dl) {
- // Store the return address to the appropriate stack slot.
- if (!FPDiff) return Chain;
- // Calculate the new stack slot for the return address.
- int NewReturnAddrFI =
- MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
- false);
- SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
- Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), NewReturnAddrFI));
- return Chain;
-}
-
-/// Returns a vector_shuffle mask for an movs{s|d}, movd
-/// operation of specified width.
-static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
- SDValue V2) {
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> Mask;
- Mask.push_back(NumElems);
- for (unsigned i = 1; i != NumElems; ++i)
- Mask.push_back(i);
- return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
-}
-
-SDValue
-X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- SelectionDAG &DAG = CLI.DAG;
- SDLoc &dl = CLI.DL;
- SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
- SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
- SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
- SDValue Chain = CLI.Chain;
- SDValue Callee = CLI.Callee;
- CallingConv::ID CallConv = CLI.CallConv;
- bool &isTailCall = CLI.IsTailCall;
- bool isVarArg = CLI.IsVarArg;
- const auto *CB = CLI.CB;
-
- MachineFunction &MF = DAG.getMachineFunction();
- bool Is64Bit = Subtarget.is64Bit();
- bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
- bool IsSibcall = false;
- bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
- CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
- bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
- X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
- bool HasNCSR = (CB && isa<CallInst>(CB) &&
- CB->hasFnAttr("no_caller_saved_registers"));
- bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
- bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
- bool IsCFICall = IsIndirectCall && CLI.CFIType;
- const Module *M = MF.getMMI().getModule();
- Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
-
- MachineFunction::CallSiteInfo CSInfo;
- if (CallConv == CallingConv::X86_INTR)
- report_fatal_error("X86 interrupts may not be called directly");
-
- bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
- if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
- // If we are using a GOT, disable tail calls to external symbols with
- // default visibility. Tail calling such a symbol requires using a GOT
- // relocation, which forces early binding of the symbol. This breaks code
- // that require lazy function symbol resolution. Using musttail or
- // GuaranteedTailCallOpt will override this.
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (!G || (!G->getGlobal()->hasLocalLinkage() &&
- G->getGlobal()->hasDefaultVisibility()))
- isTailCall = false;
- }
-
- if (isTailCall && !IsMustTail) {
- // Check if it's really possible to do a tail call.
- isTailCall = IsEligibleForTailCallOptimization(
- Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
- Ins, DAG);
-
- // Sibcalls are automatically detected tailcalls which do not require
- // ABI changes.
- if (!IsGuaranteeTCO && isTailCall)
- IsSibcall = true;
-
- if (isTailCall)
- ++NumTailCalls;
- }
-
- if (IsMustTail && !isTailCall)
- report_fatal_error("failed to perform tail call elimination on a call "
- "site marked musttail");
-
- assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
- "Var args not supported with calling convention fastcc, ghc or hipe");
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
-
- // Allocate shadow area for Win64.
- if (IsWin64)
- CCInfo.AllocateStack(32, Align(8));
-
- CCInfo.AnalyzeArguments(Outs, CC_X86);
-
- // In vectorcall calling convention a second pass is required for the HVA
- // types.
- if (CallingConv::X86_VectorCall == CallConv) {
- CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
- }
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
- if (IsSibcall)
- // This is a sibcall. The memory operands are available in caller's
- // own caller's stack.
- NumBytes = 0;
- else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
- NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
-
- int FPDiff = 0;
- if (isTailCall &&
- shouldGuaranteeTCO(CallConv,
- MF.getTarget().Options.GuaranteedTailCallOpt)) {
- // Lower arguments at fp - stackoffset + fpdiff.
- unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
-
- FPDiff = NumBytesCallerPushed - NumBytes;
-
- // Set the delta of movement of the returnaddr stackslot.
- // But only set if delta is greater than previous delta.
- if (FPDiff < X86Info->getTCReturnAddrDelta())
- X86Info->setTCReturnAddrDelta(FPDiff);
- }
-
- unsigned NumBytesToPush = NumBytes;
- unsigned NumBytesToPop = NumBytes;
-
- // If we have an inalloca argument, all stack space has already been allocated
- // for us and be right at the top of the stack. We don't support multiple
- // arguments passed in memory when using inalloca.
- if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
- NumBytesToPush = 0;
- if (!ArgLocs.back().isMemLoc())
- report_fatal_error("cannot use inalloca attribute on a register "
- "parameter");
- if (ArgLocs.back().getLocMemOffset() != 0)
- report_fatal_error("any parameter with the inalloca attribute must be "
- "the only memory argument");
- } else if (CLI.IsPreallocated) {
- assert(ArgLocs.back().isMemLoc() &&
- "cannot use preallocated attribute on a register "
- "parameter");
- SmallVector<size_t, 4> PreallocatedOffsets;
- for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
- if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
- PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
- }
- }
- auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
- size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
- MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
- MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
- NumBytesToPush = 0;
- }
-
- if (!IsSibcall && !IsMustTail)
- Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
- NumBytes - NumBytesToPush, dl);
-
- SDValue RetAddrFrIdx;
- // Load return address for tail calls.
- if (isTailCall && FPDiff)
- Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
- Is64Bit, FPDiff, dl);
-
- SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
- SmallVector<SDValue, 8> MemOpChains;
- SDValue StackPtr;
-
- // The next loop assumes that the locations are in the same order of the
- // input arguments.
- assert(isSortedByValueNo(ArgLocs) &&
- "Argument Location list must be sorted before lowering");
-
- // Walk the register/memloc assignments, inserting copies/loads. In the case
- // of tail call optimization arguments are handle later.
- const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
- ++I, ++OutIndex) {
- assert(OutIndex < Outs.size() && "Invalid Out index");
- // Skip inalloca/preallocated arguments, they have already been written.
- ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
- if (Flags.isInAlloca() || Flags.isPreallocated())
- continue;
-
- CCValAssign &VA = ArgLocs[I];
- EVT RegVT = VA.getLocVT();
- SDValue Arg = OutVals[OutIndex];
- bool isByVal = Flags.isByVal();
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
- break;
- case CCValAssign::AExt:
- if (Arg.getValueType().isVector() &&
- Arg.getValueType().getVectorElementType() == MVT::i1)
- Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
- else if (RegVT.is128BitVector()) {
- // Special case: passing MMX values in XMM registers.
- Arg = DAG.getBitcast(MVT::i64, Arg);
- Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
- Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
- } else
- Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
- break;
- case CCValAssign::BCvt:
- Arg = DAG.getBitcast(RegVT, Arg);
- break;
- case CCValAssign::Indirect: {
- if (isByVal) {
- // Memcpy the argument to a temporary stack slot to prevent
- // the caller from seeing any modifications the callee may make
- // as guaranteed by the `byval` attribute.
- int FrameIdx = MF.getFrameInfo().CreateStackObject(
- Flags.getByValSize(),
- std::max(Align(16), Flags.getNonZeroByValAlign()), false);
- SDValue StackSlot =
- DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
- Chain =
- CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
- // From now on treat this as a regular pointer
- Arg = StackSlot;
- isByVal = false;
- } else {
- // Store the argument.
- SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
- int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- Chain = DAG.getStore(
- Chain, dl, Arg, SpillSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- Arg = SpillSlot;
- }
- break;
- }
- }
-
- if (VA.needsCustom()) {
- assert(VA.getValVT() == MVT::v64i1 &&
- "Currently the only custom case is when we split v64i1 to 2 regs");
- // Split v64i1 value into two registers
- Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
- } else if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.EmitCallSiteInfo)
- CSInfo.emplace_back(VA.getLocReg(), I);
- if (isVarArg && IsWin64) {
- // Win64 ABI requires argument XMM reg to be copied to the corresponding
- // shadow reg if callee is a varargs function.
- Register ShadowReg;
- switch (VA.getLocReg()) {
- case X86::XMM0: ShadowReg = X86::RCX; break;
- case X86::XMM1: ShadowReg = X86::RDX; break;
- case X86::XMM2: ShadowReg = X86::R8; break;
- case X86::XMM3: ShadowReg = X86::R9; break;
- }
- if (ShadowReg)
- RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
- }
- } else if (!IsSibcall && (!isTailCall || isByVal)) {
- assert(VA.isMemLoc());
- if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
- getPointerTy(DAG.getDataLayout()));
- MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
- dl, DAG, VA, Flags, isByVal));
- }
- }
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
-
- if (Subtarget.isPICStyleGOT()) {
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer (except regcall).
- if (!isTailCall) {
- // Indirect call with RegCall calling convertion may use up all the
- // general registers, so it is not suitable to bind EBX reister for
- // GOT address, just let register allocator handle it.
- if (CallConv != CallingConv::X86_RegCall)
- RegsToPass.push_back(std::make_pair(
- Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
- getPointerTy(DAG.getDataLayout()))));
- } else {
- // If we are tail calling and generating PIC/GOT style code load the
- // address of the callee into ECX. The value in ecx is used as target of
- // the tail jump. This is done to circumvent the ebx/callee-saved problem
- // for tail calls on PIC/GOT architectures. Normally we would just put the
- // address of GOT into ebx and then call target@PLT. But for tail calls
- // ebx would be restored (since ebx is callee saved) before jumping to the
- // target@PLT.
-
- // Note: The actual moving to ECX is done further down.
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (G && !G->getGlobal()->hasLocalLinkage() &&
- G->getGlobal()->hasDefaultVisibility())
- Callee = LowerGlobalAddress(Callee, DAG);
- else if (isa<ExternalSymbolSDNode>(Callee))
- Callee = LowerExternalSymbol(Callee, DAG);
- }
- }
-
- if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
- (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
- // From AMD64 ABI document:
- // For calls that may call functions that use varargs or stdargs
- // (prototype-less calls or calls to functions containing ellipsis (...) in
- // the declaration) %al is used as hidden argument to specify the number
- // of SSE registers used. The contents of %al do not need to match exactly
- // the number of registers, but must be an ubound on the number of SSE
- // registers used and is in the range 0 - 8 inclusive.
-
- // Count the number of XMM registers allocated.
- static const MCPhysReg XMMArgRegs[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
- X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
- };
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
- assert((Subtarget.hasSSE1() || !NumXMMRegs)
- && "SSE registers cannot be used when SSE is disabled");
- RegsToPass.push_back(std::make_pair(Register(X86::AL),
- DAG.getConstant(NumXMMRegs, dl,
- MVT::i8)));
- }
-
- if (isVarArg && IsMustTail) {
- const auto &Forwards = X86Info->getForwardedMustTailRegParms();
- for (const auto &F : Forwards) {
- SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
- RegsToPass.push_back(std::make_pair(F.PReg, Val));
- }
- }
-
- // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
- // don't need this because the eligibility check rejects calls that require
- // shuffling arguments passed in memory.
- if (!IsSibcall && isTailCall) {
- // Force all the incoming stack arguments to be loaded from the stack
- // before any new outgoing arguments are stored to the stack, because the
- // outgoing stack slots may alias the incoming argument stack slots, and
- // the alias isn't otherwise explicit. This is slightly more conservative
- // than necessary, because it means that each store effectively depends
- // on every argument instead of just those arguments it would clobber.
- SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
-
- SmallVector<SDValue, 8> MemOpChains2;
- SDValue FIN;
- int FI = 0;
- for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
- ++I, ++OutsIndex) {
- CCValAssign &VA = ArgLocs[I];
-
- if (VA.isRegLoc()) {
- if (VA.needsCustom()) {
- assert((CallConv == CallingConv::X86_RegCall) &&
- "Expecting custom case only in regcall calling convention");
- // This means that we are in special case where one argument was
- // passed through two register locations - Skip the next location
- ++I;
- }
-
- continue;
- }
-
- assert(VA.isMemLoc());
- SDValue Arg = OutVals[OutsIndex];
- ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
- // Skip inalloca/preallocated arguments. They don't require any work.
- if (Flags.isInAlloca() || Flags.isPreallocated())
- continue;
- // Create frame index.
- int32_t Offset = VA.getLocMemOffset()+FPDiff;
- uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
- FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
- FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
-
- if (Flags.isByVal()) {
- // Copy relative to framepointer.
- SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
- if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
- getPointerTy(DAG.getDataLayout()));
- Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
- StackPtr, Source);
-
- MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
- ArgChain,
- Flags, DAG, dl));
- } else {
- // Store relative to framepointer.
- MemOpChains2.push_back(DAG.getStore(
- ArgChain, dl, Arg, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
- }
- }
-
- if (!MemOpChains2.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
-
- // Store the return address to the appropriate stack slot.
- Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
- getPointerTy(DAG.getDataLayout()),
- RegInfo->getSlotSize(), FPDiff, dl);
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and glue operands which copy the outgoing args into registers.
- SDValue InGlue;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InGlue);
- InGlue = Chain.getValue(1);
- }
-
- if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
- assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
- // In the 64-bit large code model, we have to make all calls
- // through a register, since the call instruction's 32-bit
- // pc-relative offset may not be large enough to hold the whole
- // address.
- } else if (Callee->getOpcode() == ISD::GlobalAddress ||
- Callee->getOpcode() == ISD::ExternalSymbol) {
- // Lower direct calls to global addresses and external symbols. Setting
- // ForCall to true here has the effect of removing WrapperRIP when possible
- // to allow direct calls to be selected without first materializing the
- // address into a register.
- Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
- } else if (Subtarget.isTarget64BitILP32() &&
- Callee.getValueType() == MVT::i32) {
- // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
- Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
- }
-
- // Returns a chain & a glue for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- SmallVector<SDValue, 8> Ops;
-
- if (!IsSibcall && isTailCall && !IsMustTail) {
- Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
- InGlue = Chain.getValue(1);
- }
-
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- if (isTailCall)
- Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add a register mask operand representing the call-preserved registers.
- const uint32_t *Mask = [&]() {
- auto AdaptedCC = CallConv;
- // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
- // use X86_INTR calling convention because it has the same CSR mask
- // (same preserved registers).
- if (HasNCSR)
- AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
- // If NoCalleeSavedRegisters is requested, than use GHC since it happens
- // to use the CSR_NoRegs_RegMask.
- if (CB && CB->hasFnAttr("no_callee_saved_registers"))
- AdaptedCC = (CallingConv::ID)CallingConv::GHC;
- return RegInfo->getCallPreservedMask(MF, AdaptedCC);
- }();
- assert(Mask && "Missing call preserved mask for calling convention");
-
- // If this is an invoke in a 32-bit function using a funclet-based
- // personality, assume the function clobbers all registers. If an exception
- // is thrown, the runtime will not restore CSRs.
- // FIXME: Model this more precisely so that we can register allocate across
- // the normal edge and spill and fill across the exceptional edge.
- if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
- const Function &CallerFn = MF.getFunction();
- EHPersonality Pers =
- CallerFn.hasPersonalityFn()
- ? classifyEHPersonality(CallerFn.getPersonalityFn())
- : EHPersonality::Unknown;
- if (isFuncletEHPersonality(Pers))
- Mask = RegInfo->getNoPreservedMask();
- }
-
- // Define a new register mask from the existing mask.
- uint32_t *RegMask = nullptr;
-
- // In some calling conventions we need to remove the used physical registers
- // from the reg mask. Create a new RegMask for such calling conventions.
- // RegMask for calling conventions that disable only return registers (e.g.
- // preserve_most) will be modified later in LowerCallResult.
- bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
- if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
- const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
-
- // Allocate a new Reg Mask and copy Mask.
- RegMask = MF.allocateRegMask();
- unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
- memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
-
- // Make sure all sub registers of the argument registers are reset
- // in the RegMask.
- if (ShouldDisableArgRegs) {
- for (auto const &RegPair : RegsToPass)
- for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
- RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
- }
-
- // Create the RegMask Operand according to our updated mask.
- Ops.push_back(DAG.getRegisterMask(RegMask));
- } else {
- // Create the RegMask Operand according to the static mask.
- Ops.push_back(DAG.getRegisterMask(Mask));
- }
-
- if (InGlue.getNode())
- Ops.push_back(InGlue);
-
- if (isTailCall) {
- // We used to do:
- //// If this is the first return lowered for this function, add the regs
- //// to the liveout set for the function.
- // This isn't right, although it's probably harmless on x86; liveouts
- // should be computed from returns not tail calls. Consider a void
- // function making a tail call to a function returning int.
- MF.getFrameInfo().setHasTailCall();
- SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
-
- if (IsCFICall)
- Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
-
- DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
- DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
- return Ret;
- }
-
- if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
- Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
- } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
- // Calls with a "clang.arc.attachedcall" bundle are special. They should be
- // expanded to the call, directly followed by a special marker sequence and
- // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
- assert(!isTailCall &&
- "tail calls cannot be marked with clang.arc.attachedcall");
- assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
-
- // Add a target global address for the retainRV/claimRV runtime function
- // just before the call target.
- Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
- auto PtrVT = getPointerTy(DAG.getDataLayout());
- auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
- Ops.insert(Ops.begin() + 1, GA);
- Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
- } else {
- Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
- }
-
- if (IsCFICall)
- Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
-
- InGlue = Chain.getValue(1);
- DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
- DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
-
- // Save heapallocsite metadata.
- if (CLI.CB)
- if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
- DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
-
- // Create the CALLSEQ_END node.
- unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
- if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
- DAG.getTarget().Options.GuaranteedTailCallOpt))
- NumBytesForCalleeToPop = NumBytes; // Callee pops everything
- else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
- // If this call passes a struct-return pointer, the callee
- // pops that struct pointer.
- NumBytesForCalleeToPop = 4;
-
- // Returns a glue for retval copy to use.
- if (!IsSibcall) {
- Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
- InGlue, dl);
- InGlue = Chain.getValue(1);
- }
-
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
- InVals, RegMask);
-}
-
-//===----------------------------------------------------------------------===//
-// Fast Calling Convention (tail call) implementation
-//===----------------------------------------------------------------------===//
-
-// Like std call, callee cleans arguments, convention except that ECX is
-// reserved for storing the tail called function address. Only 2 registers are
-// free for argument passing (inreg). Tail call optimization is performed
-// provided:
-// * tailcallopt is enabled
-// * caller/callee are fastcc
-// On X86_64 architecture with GOT-style position independent code only local
-// (within module) calls are supported at the moment.
-// To keep the stack aligned according to platform abi the function
-// GetAlignedArgumentStackSize ensures that argument delta is always multiples
-// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
-// If a tail called function callee has more arguments than the caller the
-// caller needs to make sure that there is room to move the RETADDR to. This is
-// achieved by reserving an area the size of the argument delta right after the
-// original RETADDR, but before the saved framepointer or the spilled registers
-// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
-// stack layout:
-// arg1
-// arg2
-// RETADDR
-// [ new RETADDR
-// move area ]
-// (possible EBP)
-// ESI
-// EDI
-// local1 ..
-
-/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
-/// requirement.
-unsigned
-X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
- SelectionDAG &DAG) const {
- const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
- const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
- assert(StackSize % SlotSize == 0 &&
- "StackSize must be a multiple of SlotSize");
- return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
-}
-
-/// Return true if the given stack call argument is already available in the
-/// same position (relatively) of the caller's incoming argument stack.
-static
-bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
- MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
- const X86InstrInfo *TII, const CCValAssign &VA) {
- unsigned Bytes = Arg.getValueSizeInBits() / 8;
-
- for (;;) {
- // Look through nodes that don't alter the bits of the incoming value.
- unsigned Op = Arg.getOpcode();
- if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
- Arg = Arg.getOperand(0);
- continue;
- }
- if (Op == ISD::TRUNCATE) {
- const SDValue &TruncInput = Arg.getOperand(0);
- if (TruncInput.getOpcode() == ISD::AssertZext &&
- cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
- Arg.getValueType()) {
- Arg = TruncInput.getOperand(0);
- continue;
- }
- }
- break;
- }
-
- int FI = INT_MAX;
- if (Arg.getOpcode() == ISD::CopyFromReg) {
- Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!VR.isVirtual())
- return false;
- MachineInstr *Def = MRI->getVRegDef(VR);
- if (!Def)
- return false;
- if (!Flags.isByVal()) {
- if (!TII->isLoadFromStackSlot(*Def, FI))
- return false;
- } else {
- unsigned Opcode = Def->getOpcode();
- if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
- Opcode == X86::LEA64_32r) &&
- Def->getOperand(1).isFI()) {
- FI = Def->getOperand(1).getIndex();
- Bytes = Flags.getByValSize();
- } else
- return false;
- }
- } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
- if (Flags.isByVal())
- // ByVal argument is passed in as a pointer but it's now being
- // dereferenced. e.g.
- // define @foo(%struct.X* %A) {
- // tail call @bar(%struct.X* byval %A)
- // }
- return false;
- SDValue Ptr = Ld->getBasePtr();
- FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
- if (!FINode)
- return false;
- FI = FINode->getIndex();
- } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
- FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
- FI = FINode->getIndex();
- Bytes = Flags.getByValSize();
- } else
- return false;
-
- assert(FI != INT_MAX);
- if (!MFI.isFixedObjectIndex(FI))
- return false;
-
- if (Offset != MFI.getObjectOffset(FI))
- return false;
-
- // If this is not byval, check that the argument stack object is immutable.
- // inalloca and argument copy elision can create mutable argument stack
- // objects. Byval objects can be mutated, but a byval call intends to pass the
- // mutated memory.
- if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
- return false;
-
- if (VA.getLocVT().getFixedSizeInBits() >
- Arg.getValueSizeInBits().getFixedValue()) {
- // If the argument location is wider than the argument type, check that any
- // extension flags match.
- if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
- Flags.isSExt() != MFI.isObjectSExt(FI)) {
- return false;
- }
- }
-
- return Bytes == MFI.getObjectSize(FI);
-}
-
-/// Check whether the call is eligible for tail call optimization. Targets
-/// that want to do tail call optimization should implement this function.
-bool X86TargetLowering::IsEligibleForTailCallOptimization(
- SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
- bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
- if (!mayTailCallThisCC(CalleeCC))
- return false;
-
- // If -tailcallopt is specified, make fastcc functions tail-callable.
- MachineFunction &MF = DAG.getMachineFunction();
- const Function &CallerF = MF.getFunction();
-
- // If the function return type is x86_fp80 and the callee return type is not,
- // then the FP_EXTEND of the call result is not a nop. It's not safe to
- // perform a tailcall optimization here.
- if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
- return false;
-
- CallingConv::ID CallerCC = CallerF.getCallingConv();
- bool CCMatch = CallerCC == CalleeCC;
- bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
- bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
- bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
- CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
-
- // Win64 functions have extra shadow space for argument homing. Don't do the
- // sibcall if the caller and callee have mismatched expectations for this
- // space.
- if (IsCalleeWin64 != IsCallerWin64)
- return false;
-
- if (IsGuaranteeTCO) {
- if (canGuaranteeTCO(CalleeCC) && CCMatch)
- return true;
- return false;
- }
-
- // Look for obvious safe cases to perform tail call optimization that do not
- // require ABI changes. This is what gcc calls sibcall.
-
- // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
- // emit a special epilogue.
- const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- if (RegInfo->hasStackRealignment(MF))
- return false;
-
- // Also avoid sibcall optimization if we're an sret return fn and the callee
- // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
- // insufficient.
- if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
- // For a compatible tail call the callee must return our sret pointer. So it
- // needs to be (a) an sret function itself and (b) we pass our sret as its
- // sret. Condition #b is harder to determine.
- return false;
- } else if (IsCalleePopSRet)
- // The callee pops an sret, so we cannot tail-call, as our caller doesn't
- // expect that.
- return false;
-
- // Do not sibcall optimize vararg calls unless all arguments are passed via
- // registers.
- LLVMContext &C = *DAG.getContext();
- if (isVarArg && !Outs.empty()) {
- // Optimizing for varargs on Win64 is unlikely to be safe without
- // additional testing.
- if (IsCalleeWin64 || IsCallerWin64)
- return false;
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
- CCInfo.AnalyzeCallOperands(Outs, CC_X86);
- for (const auto &VA : ArgLocs)
- if (!VA.isRegLoc())
- return false;
- }
-
- // If the call result is in ST0 / ST1, it needs to be popped off the x87
- // stack. Therefore, if it's not used by the call it is not safe to optimize
- // this into a sibcall.
- bool Unused = false;
- for (const auto &In : Ins) {
- if (!In.Used) {
- Unused = true;
- break;
- }
- }
- if (Unused) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
- CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
- for (const auto &VA : RVLocs) {
- if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
- return false;
- }
- }
-
- // Check that the call results are passed in the same way.
- if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
- RetCC_X86, RetCC_X86))
- return false;
- // The callee has to preserve all registers the caller needs to preserve.
- const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
- if (!CCMatch) {
- const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
- if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
- return false;
- }
-
- unsigned StackArgsSize = 0;
-
- // If the callee takes no arguments then go on to check the results of the
- // call.
- if (!Outs.empty()) {
- // Check if stack adjustment is needed. For now, do not do this if any
- // argument is passed on the stack.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
-
- // Allocate shadow area for Win64
- if (IsCalleeWin64)
- CCInfo.AllocateStack(32, Align(8));
-
- CCInfo.AnalyzeCallOperands(Outs, CC_X86);
- StackArgsSize = CCInfo.getStackSize();
-
- if (CCInfo.getStackSize()) {
- // Check if the arguments are already laid out in the right way as
- // the caller's fixed stack objects.
- MachineFrameInfo &MFI = MF.getFrameInfo();
- const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const X86InstrInfo *TII = Subtarget.getInstrInfo();
- for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
- const CCValAssign &VA = ArgLocs[I];
- SDValue Arg = OutVals[I];
- ISD::ArgFlagsTy Flags = Outs[I].Flags;
- if (VA.getLocInfo() == CCValAssign::Indirect)
- return false;
- if (!VA.isRegLoc()) {
- if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
- TII, VA))
- return false;
- }
- }
- }
-
- bool PositionIndependent = isPositionIndependent();
- // If the tailcall address may be in a register, then make sure it's
- // possible to register allocate for it. In 32-bit, the call address can
- // only target EAX, EDX, or ECX since the tail call must be scheduled after
- // callee-saved registers are restored. These happen to be the same
- // registers used to pass 'inreg' arguments so watch out for those.
- if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee)) ||
- PositionIndependent)) {
- unsigned NumInRegs = 0;
- // In PIC we need an extra register to formulate the address computation
- // for the callee.
- unsigned MaxInRegs = PositionIndependent ? 2 : 3;
-
- for (const auto &VA : ArgLocs) {
- if (!VA.isRegLoc())
- continue;
- Register Reg = VA.getLocReg();
- switch (Reg) {
- default: break;
- case X86::EAX: case X86::EDX: case X86::ECX:
- if (++NumInRegs == MaxInRegs)
- return false;
- break;
- }
- }
- }
-
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
- return false;
- }
-
- bool CalleeWillPop =
- X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
- MF.getTarget().Options.GuaranteedTailCallOpt);
-
- if (unsigned BytesToPop =
- MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
- // If we have bytes to pop, the callee must pop them.
- bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
- if (!CalleePopMatches)
- return false;
- } else if (CalleeWillPop && StackArgsSize > 0) {
- // If we don't have bytes to pop, make sure the callee doesn't pop any.
- return false;
- }
-
- return true;
-}
-
FastISel *
X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
@@ -5587,26 +2704,6 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
return false;
}
-/// Determines whether the callee is required to pop its own arguments.
-/// Callee pop is necessary to support tail calls.
-bool X86::isCalleePop(CallingConv::ID CallingConv,
- bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
- // If GuaranteeTCO is true, we force some calls to be callee pop so that we
- // can guarantee TCO.
- if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
- return true;
-
- switch (CallingConv) {
- default:
- return false;
- case CallingConv::X86_StdCall:
- case CallingConv::X86_FastCall:
- case CallingConv::X86_ThisCall:
- case CallingConv::X86_VectorCall:
- return !is64Bit;
- }
-}
-
/// Return true if the condition is an signed comparison operation.
static bool isX86CCSigned(unsigned X86CC) {
switch (X86CC) {
@@ -6170,6 +3267,73 @@ bool X86TargetLowering::
return NewShiftOpcode == ISD::SHL;
}
+unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
+ EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
+ const APInt &ShiftOrRotateAmt, const std::optional<APInt> &AndMask) const {
+ if (!VT.isInteger())
+ return ShiftOpc;
+
+ bool PreferRotate = false;
+ if (VT.isVector()) {
+ // For vectors, if we have rotate instruction support, then its definetly
+ // best. Otherwise its not clear what the best so just don't make changed.
+ PreferRotate = Subtarget.hasAVX512() && (VT.getScalarType() == MVT::i32 ||
+ VT.getScalarType() == MVT::i64);
+ } else {
+ // For scalar, if we have bmi prefer rotate for rorx. Otherwise prefer
+ // rotate unless we have a zext mask+shr.
+ PreferRotate = Subtarget.hasBMI2();
+ if (!PreferRotate) {
+ unsigned MaskBits =
+ VT.getScalarSizeInBits() - ShiftOrRotateAmt.getZExtValue();
+ PreferRotate = (MaskBits != 8) && (MaskBits != 16) && (MaskBits != 32);
+ }
+ }
+
+ if (ShiftOpc == ISD::SHL || ShiftOpc == ISD::SRL) {
+ assert(AndMask.has_value() && "Null andmask when querying about shift+and");
+
+ if (PreferRotate && MayTransformRotate)
+ return ISD::ROTL;
+
+ // If vector we don't really get much benefit swapping around constants.
+ // Maybe we could check if the DAG has the flipped node already in the
+ // future.
+ if (VT.isVector())
+ return ShiftOpc;
+
+ // See if the beneficial to swap shift type.
+ if (ShiftOpc == ISD::SHL) {
+ // If the current setup has imm64 mask, then inverse will have
+ // at least imm32 mask (or be zext i32 -> i64).
+ if (VT == MVT::i64)
+ return AndMask->getSignificantBits() > 32 ? (unsigned)ISD::SRL
+ : ShiftOpc;
+
+ // We can only benefit if req at least 7-bit for the mask. We
+ // don't want to replace shl of 1,2,3 as they can be implemented
+ // with lea/add.
+ return ShiftOrRotateAmt.uge(7) ? (unsigned)ISD::SRL : ShiftOpc;
+ }
+
+ if (VT == MVT::i64)
+ // Keep exactly 32-bit imm64, this is zext i32 -> i64 which is
+ // extremely efficient.
+ return AndMask->getSignificantBits() > 33 ? (unsigned)ISD::SHL : ShiftOpc;
+
+ // Keep small shifts as shl so we can generate add/lea.
+ return ShiftOrRotateAmt.ult(7) ? (unsigned)ISD::SHL : ShiftOpc;
+ }
+
+ // We prefer rotate for vectors of if we won't get a zext mask with SRL
+ // (PreferRotate will be set in the latter case).
+ if (PreferRotate || VT.isVector())
+ return ShiftOpc;
+
+ // Non-vector type and we have a zext mask with SRL.
+ return ISD::SRL;
+}
+
bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
return N->getOpcode() != ISD::FP_EXTEND;
}
@@ -6633,6 +3797,12 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
return DAG.getBuildVector(ResultVT, dl,
Vec->ops().slice(IdxVal, ElemsPerChunk));
+ // Check if we're extracting the upper undef of a widening pattern.
+ if (Vec.getOpcode() == ISD::INSERT_SUBVECTOR && Vec.getOperand(0).isUndef() &&
+ Vec.getOperand(1).getValueType().getVectorNumElements() <= IdxVal &&
+ isNullConstant(Vec.getOperand(2)))
+ return DAG.getUNDEF(ResultVT);
+
SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
}
@@ -6698,7 +3868,7 @@ static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
const SDLoc &dl) {
- assert(Vec.getValueSizeInBits().getFixedValue() < VT.getFixedSizeInBits() &&
+ assert(Vec.getValueSizeInBits().getFixedValue() <= VT.getFixedSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type");
SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
@@ -6712,7 +3882,7 @@ static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
const SDLoc &dl, unsigned WideSizeInBits) {
- assert(Vec.getValueSizeInBits() < WideSizeInBits &&
+ assert(Vec.getValueSizeInBits() <= WideSizeInBits &&
(WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&
"Unsupported vector widening type");
unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits();
@@ -6721,6 +3891,25 @@ static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
}
+/// Widen a mask vector type to a minimum of v8i1/v16i1 to allow use of KSHIFT
+/// and bitcast with integer types.
+static MVT widenMaskVectorType(MVT VT, const X86Subtarget &Subtarget) {
+ assert(VT.getVectorElementType() == MVT::i1 && "Expected bool vector");
+ unsigned NumElts = VT.getVectorNumElements();
+ if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
+ return Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ return VT;
+}
+
+/// Widen a mask vector to a minimum of v8i1/v16i1 to allow use of KSHIFT and
+/// bitcast with integer types.
+static SDValue widenMaskVector(SDValue Vec, bool ZeroNewElements,
+ const X86Subtarget &Subtarget, SelectionDAG &DAG,
+ const SDLoc &dl) {
+ MVT VT = widenMaskVectorType(Vec.getSimpleValueType(), Subtarget);
+ return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
+}
+
// Helper function to collect subvector ops that are concatenated together,
// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
// The subvectors in Ops are guaranteed to be the same type.
@@ -6777,22 +3966,24 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops,
}
// Helper to check if \p V can be split into subvectors and the upper subvectors
-// are all undef. In which case return the lower subvectors.
-static bool isUpperSubvectorUndef(SDValue V, SmallVectorImpl<SDValue> &LowerOps,
- SelectionDAG &DAG) {
+// are all undef. In which case return the lower subvector.
+static SDValue isUpperSubvectorUndef(SDValue V, const SDLoc &DL,
+ SelectionDAG &DAG) {
SmallVector<SDValue> SubOps;
if (!collectConcatOps(V.getNode(), SubOps, DAG))
- return false;
+ return SDValue();
unsigned NumSubOps = SubOps.size();
+ unsigned HalfNumSubOps = NumSubOps / 2;
assert((NumSubOps % 2) == 0 && "Unexpected number of subvectors");
- ArrayRef<SDValue> UpperOps(SubOps.begin() + (NumSubOps / 2), SubOps.end());
+ ArrayRef<SDValue> UpperOps(SubOps.begin() + HalfNumSubOps, SubOps.end());
if (any_of(UpperOps, [](SDValue Op) { return !Op.isUndef(); }))
- return false;
+ return SDValue();
- LowerOps.assign(SubOps.begin(), SubOps.begin() + (NumSubOps / 2));
- return true;
+ EVT HalfVT = V.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
+ ArrayRef<SDValue> LowerOps(SubOps.begin(), SubOps.begin() + HalfNumSubOps);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, LowerOps);
}
// Helper to check if we can access all the constituent subvectors without any
@@ -7008,9 +4199,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
// Extend to natively supported kshift.
- MVT WideOpVT = OpVT;
- if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
- WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ MVT WideOpVT = widenMaskVectorType(OpVT, Subtarget);
// Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
// if necessary.
@@ -7109,7 +4298,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
- // Do an optimization for the the most frequently used types.
+ // Do an optimization for the most frequently used types.
if (WideOpVT != MVT::v64i1 || Subtarget.is64Bit()) {
APInt Mask0 = APInt::getBitsSet(NumElems, IdxVal, IdxVal + SubVecNumElems);
Mask0.flipAllBits();
@@ -7212,35 +4401,6 @@ static SDValue getBitSelect(const SDLoc &DL, MVT VT, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
}
-// Match (xor X, -1) -> X.
-// Match extract_subvector(xor X, -1) -> extract_subvector(X).
-// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y).
-static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
- V = peekThroughBitcasts(V);
- if (V.getOpcode() == ISD::XOR &&
- (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
- isAllOnesConstant(V.getOperand(1))))
- return V.getOperand(0);
- if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
- if (SDValue Not = IsNOT(V.getOperand(0), DAG)) {
- Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(),
- Not, V.getOperand(1));
- }
- }
- SmallVector<SDValue, 2> CatOps;
- if (collectConcatOps(V.getNode(), CatOps, DAG)) {
- for (SDValue &CatOp : CatOps) {
- SDValue NotCat = IsNOT(CatOp, DAG);
- if (!NotCat) return SDValue();
- CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat);
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps);
- }
- return SDValue();
-}
-
void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask,
bool Lo, bool Unary) {
assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 &&
@@ -7394,15 +4554,17 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
}
-static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) {
+static const ConstantPoolSDNode *getTargetConstantPoolFromBasePtr(SDValue Ptr) {
if (Ptr.getOpcode() == X86ISD::Wrapper ||
Ptr.getOpcode() == X86ISD::WrapperRIP)
Ptr = Ptr.getOperand(0);
+ return dyn_cast<ConstantPoolSDNode>(Ptr);
+}
- auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
+static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) {
+ const ConstantPoolSDNode *CNode = getTargetConstantPoolFromBasePtr(Ptr);
if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
return nullptr;
-
return CNode->getConstVal();
}
@@ -7575,6 +4737,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+ if ((SizeInBits % SrcEltSizeInBits) != 0)
+ return false;
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
@@ -7798,6 +4962,59 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
return true;
}
+// Match not(xor X, -1) -> X.
+// Match not(pcmpgt(C, X)) -> pcmpgt(X, C - 1).
+// Match not(extract_subvector(xor X, -1)) -> extract_subvector(X).
+// Match not(concat_vectors(xor X, -1, xor Y, -1)) -> concat_vectors(X, Y).
+static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
+ V = peekThroughBitcasts(V);
+ if (V.getOpcode() == ISD::XOR &&
+ (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
+ isAllOnesConstant(V.getOperand(1))))
+ return V.getOperand(0);
+ if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
+ if (SDValue Not = IsNOT(V.getOperand(0), DAG)) {
+ Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(),
+ Not, V.getOperand(1));
+ }
+ }
+ if (V.getOpcode() == X86ISD::PCMPGT &&
+ !ISD::isBuildVectorAllZeros(V.getOperand(0).getNode()) &&
+ !ISD::isBuildVectorAllOnes(V.getOperand(0).getNode()) &&
+ V.getOperand(0).hasOneUse()) {
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ if (getTargetConstantBitsFromNode(V.getOperand(0),
+ V.getScalarValueSizeInBits(), UndefElts,
+ EltBits)) {
+ // Don't fold min_signed_value -> (min_signed_value - 1)
+ bool MinSigned = false;
+ for (APInt &Elt : EltBits) {
+ MinSigned |= Elt.isMinSignedValue();
+ Elt -= 1;
+ }
+ if (!MinSigned) {
+ SDLoc DL(V);
+ MVT VT = V.getSimpleValueType();
+ return DAG.getNode(X86ISD::PCMPGT, DL, VT, V.getOperand(1),
+ getConstVector(EltBits, UndefElts, VT, DAG, DL));
+ }
+ }
+ }
+ SmallVector<SDValue, 2> CatOps;
+ if (collectConcatOps(V.getNode(), CatOps, DAG)) {
+ for (SDValue &CatOp : CatOps) {
+ SDValue NotCat = IsNOT(CatOp, DAG);
+ if (!NotCat) return SDValue();
+ CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps);
+ }
+ return SDValue();
+}
+
/// Create a shuffle mask that matches the PACKSS/PACKUS truncation.
/// A multi-stage pack shuffle mask is created by specifying NumStages > 1.
/// Note: This ignores saturation, so inputs must be checked first.
@@ -8555,17 +5772,28 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
unsigned NumSubElts = SubVT.getVectorNumElements();
if (!N->isOnlyUserOf(Sub.getNode()))
return false;
+ SDValue SubBC = peekThroughBitcasts(Sub);
uint64_t InsertIdx = N.getConstantOperandVal(2);
// Handle INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)).
- if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- Sub.getOperand(0).getValueType() == VT) {
- uint64_t ExtractIdx = Sub.getConstantOperandVal(1);
- for (int i = 0; i != (int)NumElts; ++i)
- Mask.push_back(i);
+ if (SubBC.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ SubBC.getOperand(0).getValueSizeInBits() == NumSizeInBits) {
+ uint64_t ExtractIdx = SubBC.getConstantOperandVal(1);
+ SDValue SubBCSrc = SubBC.getOperand(0);
+ unsigned NumSubSrcBCElts = SubBCSrc.getValueType().getVectorNumElements();
+ unsigned MaxElts = std::max(NumElts, NumSubSrcBCElts);
+ assert((MaxElts % NumElts) == 0 && (MaxElts % NumSubSrcBCElts) == 0 &&
+ "Subvector valuetype mismatch");
+ InsertIdx *= (MaxElts / NumElts);
+ ExtractIdx *= (MaxElts / NumSubSrcBCElts);
+ NumSubElts *= (MaxElts / NumElts);
+ bool SrcIsUndef = Src.isUndef();
+ for (int i = 0; i != (int)MaxElts; ++i)
+ Mask.push_back(SrcIsUndef ? SM_SentinelUndef : i);
for (int i = 0; i != (int)NumSubElts; ++i)
- Mask[InsertIdx + i] = NumElts + ExtractIdx + i;
- Ops.push_back(Src);
- Ops.push_back(Sub.getOperand(0));
+ Mask[InsertIdx + i] = (SrcIsUndef ? 0 : MaxElts) + ExtractIdx + i;
+ if (!SrcIsUndef)
+ Ops.push_back(Src);
+ Ops.push_back(SubBCSrc);
return true;
}
// Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
@@ -8999,8 +6227,8 @@ static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT,
if (!Mem || !Mem->readMem() || !Mem->isSimple() || Mem->isNonTemporal())
return SDValue();
- SDValue Ptr =
- DAG.getMemBasePlusOffset(Mem->getBasePtr(), TypeSize::Fixed(Offset), DL);
+ SDValue Ptr = DAG.getMemBasePlusOffset(Mem->getBasePtr(),
+ TypeSize::getFixed(Offset), DL);
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = {Mem->getChain(), Ptr};
SDValue BcstLd = DAG.getMemIntrinsicNode(
@@ -9173,13 +6401,29 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask,
SDValue V;
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
- for (unsigned i = 0; i < 16; i += 2) {
+ // If both the lowest 16-bits are non-zero, then convert to MOVD.
+ if (!NonZeroMask.extractBits(2, 0).isZero() &&
+ !NonZeroMask.extractBits(2, 2).isZero()) {
+ for (unsigned I = 0; I != 4; ++I) {
+ if (!NonZeroMask[I])
+ continue;
+ SDValue Elt = DAG.getZExtOrTrunc(Op.getOperand(I), dl, MVT::i32);
+ if (I != 0)
+ Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+ DAG.getConstant(I * 8, dl, MVT::i8));
+ V = V ? DAG.getNode(ISD::OR, dl, MVT::i32, V, Elt) : Elt;
+ }
+ assert(V && "Failed to fold v16i8 vector to zero");
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v8i16, V);
+ }
+ for (unsigned i = V ? 4 : 0; i < 16; i += 2) {
bool ThisIsNonZero = NonZeroMask[i];
bool NextIsNonZero = NonZeroMask[i + 1];
if (!ThisIsNonZero && !NextIsNonZero)
continue;
- // FIXME: Investigate combining the first 4 bytes as a i32 instead.
SDValue Elt;
if (ThisIsNonZero) {
if (NumZero || NextIsNonZero)
@@ -9796,6 +7040,31 @@ static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL,
IsAfterLegalize);
}
+static Constant *getConstantVector(MVT VT, ArrayRef<APInt> Bits,
+ const APInt &Undefs, LLVMContext &C) {
+ unsigned ScalarSize = VT.getScalarSizeInBits();
+ Type *Ty = EVT(VT.getScalarType()).getTypeForEVT(C);
+
+ auto getConstantScalar = [&](const APInt &Val) -> Constant * {
+ if (VT.isFloatingPoint()) {
+ if (ScalarSize == 16)
+ return ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
+ if (ScalarSize == 32)
+ return ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
+ assert(ScalarSize == 64 && "Unsupported floating point scalar size");
+ return ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
+ }
+ return Constant::getIntegerValue(Ty, Val);
+ };
+
+ SmallVector<Constant *, 32> ConstantVec;
+ for (unsigned I = 0, E = Bits.size(); I != E; ++I)
+ ConstantVec.push_back(Undefs[I] ? UndefValue::get(Ty)
+ : getConstantScalar(Bits[I]));
+
+ return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
+}
+
static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
unsigned SplatBitSize, LLVMContext &C) {
unsigned ScalarSize = VT.getScalarSizeInBits();
@@ -11483,6 +8752,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// constants. Insertion into a zero vector is handled as a special-case
// somewhere below here.
if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
+ FrozenUndefMask.isZero() &&
(isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) ||
isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) {
// Create an all-constant vector. The variable element in the old
@@ -11889,16 +9159,12 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
// insert_subvector will give us two kshifts.
if (isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros &&
Log2_64(NonZeros) != NumOperands - 1) {
- MVT ShiftVT = ResVT;
- if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
- ShiftVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
unsigned Idx = Log2_64(NonZeros);
SDValue SubVec = Op.getOperand(Idx);
unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements();
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ShiftVT,
- DAG.getUNDEF(ShiftVT), SubVec,
- DAG.getIntPtrConstant(0, dl));
- Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec,
+ MVT ShiftVT = widenMaskVectorType(ResVT, Subtarget);
+ Op = widenSubVector(ShiftVT, SubVec, false, Subtarget, DAG, dl);
+ Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, Op,
DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op,
DAG.getIntPtrConstant(0, dl));
@@ -13865,31 +11131,6 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
return DAG.getVectorShuffle(VT, DL, V1, V2, FinalMask);
}
-/// Try to lower a vector shuffle as a bit rotation.
-///
-/// Look for a repeated rotation pattern in each sub group.
-/// Returns a ISD::ROTL element rotation amount or -1 if failed.
-static int matchShuffleAsBitRotate(ArrayRef<int> Mask, int NumSubElts) {
- int NumElts = Mask.size();
- assert((NumElts % NumSubElts) == 0 && "Illegal shuffle mask");
-
- int RotateAmt = -1;
- for (int i = 0; i != NumElts; i += NumSubElts) {
- for (int j = 0; j != NumSubElts; ++j) {
- int M = Mask[i + j];
- if (M < 0)
- continue;
- if (!isInRange(M, i, i + NumSubElts))
- return -1;
- int Offset = (NumSubElts - (M - (i + j))) % NumSubElts;
- if (0 <= RotateAmt && Offset != RotateAmt)
- return -1;
- RotateAmt = Offset;
- }
- }
- return RotateAmt;
-}
-
static int matchShuffleAsBitRotate(MVT &RotateVT, int EltSizeInBits,
const X86Subtarget &Subtarget,
ArrayRef<int> Mask) {
@@ -13899,18 +11140,14 @@ static int matchShuffleAsBitRotate(MVT &RotateVT, int EltSizeInBits,
// AVX512 only has vXi32/vXi64 rotates, so limit the rotation sub group size.
int MinSubElts = Subtarget.hasAVX512() ? std::max(32 / EltSizeInBits, 2) : 2;
int MaxSubElts = 64 / EltSizeInBits;
- for (int NumSubElts = MinSubElts; NumSubElts <= MaxSubElts; NumSubElts *= 2) {
- int RotateAmt = matchShuffleAsBitRotate(Mask, NumSubElts);
- if (RotateAmt < 0)
- continue;
-
- int NumElts = Mask.size();
- MVT RotateSVT = MVT::getIntegerVT(EltSizeInBits * NumSubElts);
- RotateVT = MVT::getVectorVT(RotateSVT, NumElts / NumSubElts);
- return RotateAmt * EltSizeInBits;
- }
-
- return -1;
+ unsigned RotateAmt, NumSubElts;
+ if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, MinSubElts,
+ MaxSubElts, NumSubElts, RotateAmt))
+ return -1;
+ unsigned NumElts = Mask.size();
+ MVT RotateSVT = MVT::getIntegerVT(EltSizeInBits * NumSubElts);
+ RotateVT = MVT::getVectorVT(RotateSVT, NumElts / NumSubElts);
+ return RotateAmt;
}
/// Lower shuffle using X86ISD::VROTLI rotations.
@@ -14124,6 +11361,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
/// rotate* of the vector lanes.
static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&
@@ -14135,11 +11373,40 @@ static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1,
SDValue Lo = V1, Hi = V2;
int Rotation = matchShuffleAsElementRotate(Lo, Hi, Mask);
- if (Rotation <= 0)
+ if (0 < Rotation)
+ return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
+ DAG.getTargetConstant(Rotation, DL, MVT::i8));
+
+ // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ.
+ // TODO: Pull this out as a matchShuffleAsElementShift helper?
+ // TODO: We can probably make this more aggressive and use shift-pairs like
+ // lowerShuffleAsByteShiftMask.
+ unsigned NumElts = Mask.size();
+ unsigned ZeroLo = Zeroable.countr_one();
+ unsigned ZeroHi = Zeroable.countl_one();
+ assert((ZeroLo + ZeroHi) < NumElts && "Zeroable shuffle detected");
+ if (!ZeroLo && !ZeroHi)
return SDValue();
- return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
- DAG.getTargetConstant(Rotation, DL, MVT::i8));
+ if (ZeroLo) {
+ SDValue Src = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
+ int Low = Mask[ZeroLo] < (int)NumElts ? 0 : NumElts;
+ if (isSequentialOrUndefInRange(Mask, ZeroLo, NumElts - ZeroLo, Low))
+ return DAG.getNode(X86ISD::VALIGN, DL, VT, Src,
+ getZeroVector(VT, Subtarget, DAG, DL),
+ DAG.getTargetConstant(NumElts - ZeroLo, DL, MVT::i8));
+ }
+
+ if (ZeroHi) {
+ SDValue Src = Mask[0] < (int)NumElts ? V1 : V2;
+ int Low = Mask[0] < (int)NumElts ? 0 : NumElts;
+ if (isSequentialOrUndefInRange(Mask, 0, NumElts - ZeroHi, Low + ZeroHi))
+ return DAG.getNode(X86ISD::VALIGN, DL, VT,
+ getZeroVector(VT, Subtarget, DAG, DL), Src,
+ DAG.getTargetConstant(ZeroHi, DL, MVT::i8));
+ }
+
+ return SDValue();
}
/// Try to lower a vector shuffle as a byte shift sequence.
@@ -15185,7 +12452,7 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
unsigned Offset = BroadcastIdx * SVT.getStoreSize();
assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
SDValue NewAddr =
- DAG.getMemBasePlusOffset(BaseAddr, TypeSize::Fixed(Offset), DL);
+ DAG.getMemBasePlusOffset(BaseAddr, TypeSize::getFixed(Offset), DL);
// Directly form VBROADCAST_LOAD if we're using VBROADCAST opcode rather
// than MOVDDUP.
@@ -15535,7 +12802,7 @@ static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (Subtarget.hasSSSE3()) {
if (Subtarget.hasVLX())
if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v2i64, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Rotate;
if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v2i64, V1, V2, Mask,
@@ -15850,7 +13117,7 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (Subtarget.hasSSSE3()) {
if (Subtarget.hasVLX())
if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i32, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Rotate;
if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask,
@@ -18066,6 +15333,12 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
for (int i = 0; i != NumElts; i += NumBroadcastElts)
for (int j = 0; j != NumBroadcastElts; ++j)
BroadcastMask[i + j] = j;
+
+ // Avoid returning the same shuffle operation. For example,
+ // v8i32 = vector_shuffle<0,1,0,1,0,1,0,1> t5, undef:v8i32
+ if (BroadcastMask == Mask)
+ return SDValue();
+
return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT),
BroadcastMask);
}
@@ -18584,7 +15857,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have VLX support, we can use VALIGN or VEXPAND.
if (Subtarget.hasVLX()) {
if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i64, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Rotate;
if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2,
@@ -18846,7 +16119,7 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have VLX support, we can use VALIGN or EXPAND.
if (Subtarget.hasVLX()) {
if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i32, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Rotate;
if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2,
@@ -19304,8 +16577,8 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
// Try to lower to vshuf64x2/vshuf32x4.
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
- unsigned PermMask = 0;
- // Insure elements came from the same Op.
+ int PermMask[4] = {-1, -1, -1, -1};
+ // Ensure elements came from the same Op.
for (int i = 0; i < 4; ++i) {
assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value");
if (Widened128Mask[i] < 0)
@@ -19318,13 +16591,11 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
else if (Ops[OpIndex] != Op)
return SDValue();
- // Convert the 128-bit shuffle mask selection values into 128-bit selection
- // bits defined by a vshuf64x2 instruction's immediate control byte.
- PermMask |= (Widened128Mask[i] % 4) << (i * 2);
+ PermMask[i] = Widened128Mask[i] % 4;
}
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
- DAG.getTargetConstant(PermMask, DL, MVT::i8));
+ getV4X86ShuffleImm8ForMask(PermMask, DL, DAG));
}
/// Handle lowering of 8-lane 64-bit floating point shuffles.
@@ -19497,7 +16768,7 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Try to use VALIGN.
if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i64, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Rotate;
// Try to use PALIGNR.
@@ -19581,7 +16852,7 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Try to use VALIGN.
if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v16i32, V1, V2, Mask,
- Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Rotate;
// Try to use byte rotation instructions.
@@ -19886,13 +17157,8 @@ static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask,
assert(ShiftAmt >= 0 && "All undef?");
// Great we found a shift right.
- MVT WideVT = VT;
- if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
- WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
- SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
- DAG.getUNDEF(WideVT), V1,
- DAG.getIntPtrConstant(0, DL));
- Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res,
+ SDValue Res = widenMaskVector(V1, false, Subtarget, DAG, DL);
+ Res = DAG.getNode(X86ISD::KSHIFTR, DL, Res.getValueType(), Res,
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
DAG.getIntPtrConstant(0, DL));
@@ -19989,12 +17255,8 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
unsigned Opcode;
int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
if (ShiftAmt >= 0) {
- MVT WideVT = VT;
- if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
- WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
- SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
- DAG.getUNDEF(WideVT), V,
- DAG.getIntPtrConstant(0, DL));
+ SDValue Res = widenMaskVector(V, false, Subtarget, DAG, DL);
+ MVT WideVT = Res.getSimpleValueType();
// Widened right shifts need two shifts to ensure we shift in zeroes.
if (Opcode == X86ISD::KSHIFTR && WideVT != VT) {
int WideElts = WideVT.getVectorNumElements();
@@ -20531,6 +17793,11 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
unsigned NumElts = VecVT.getVectorNumElements();
// Extending v8i1/v16i1 to 512-bit get better performance on KNL
// than extending to 128/256bit.
+ if (NumElts == 1) {
+ Vec = widenMaskVector(Vec, false, Subtarget, DAG, dl);
+ MVT IntVT = MVT::getIntegerVT(Vec.getValueType().getVectorNumElements());
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, DAG.getBitcast(IntVT, Vec));
+ }
MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec);
@@ -20543,23 +17810,50 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
return Op;
// Extend to natively supported kshift.
- unsigned NumElems = VecVT.getVectorNumElements();
- MVT WideVecVT = VecVT;
- if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
- WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
- DAG.getUNDEF(WideVecVT), Vec,
- DAG.getIntPtrConstant(0, dl));
- }
+ Vec = widenMaskVector(Vec, false, Subtarget, DAG, dl);
// Use kshiftr instruction to move to the lower element.
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, Vec.getSimpleValueType(), Vec,
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getIntPtrConstant(0, dl));
}
+// Helper to find all the extracted elements from a vector.
+static APInt getExtractedDemandedElts(SDNode *N) {
+ MVT VT = N->getSimpleValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ APInt DemandedElts = APInt::getZero(NumElts);
+ for (SDNode *User : N->uses()) {
+ switch (User->getOpcode()) {
+ case X86ISD::PEXTRB:
+ case X86ISD::PEXTRW:
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (!isa<ConstantSDNode>(User->getOperand(1))) {
+ DemandedElts.setAllBits();
+ return DemandedElts;
+ }
+ DemandedElts.setBit(User->getConstantOperandVal(1));
+ break;
+ case ISD::BITCAST: {
+ if (!User->getValueType(0).isSimple() ||
+ !User->getValueType(0).isVector()) {
+ DemandedElts.setAllBits();
+ return DemandedElts;
+ }
+ APInt DemandedSrcElts = getExtractedDemandedElts(User);
+ DemandedElts |= APIntOps::ScaleBitMask(DemandedSrcElts, NumElts);
+ break;
+ }
+ default:
+ DemandedElts.setAllBits();
+ return DemandedElts;
+ }
+ }
+ return DemandedElts;
+}
+
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
@@ -20574,7 +17868,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (!IdxC) {
// Its more profitable to go through memory (1 cycles throughput)
- // than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput)
+ // than using VMOVD + VPERMV/PSHUFB sequence (2/3 cycles throughput)
// IACA tool was used to get performance estimation
// (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer)
//
@@ -20651,13 +17945,16 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
return Res;
- // TODO: We only extract a single element from v16i8, we can probably afford
- // to be more aggressive here before using the default approach of spilling to
- // stack.
- if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) {
+ // Only extract a single element from a v16i8 source - determine the common
+ // DWORD/WORD that all extractions share, and extract the sub-byte.
+ // TODO: Add QWORD MOVQ extraction?
+ if (VT == MVT::i8) {
+ APInt DemandedElts = getExtractedDemandedElts(Vec.getNode());
+ assert(DemandedElts.getBitWidth() == 16 && "Vector width mismatch");
+
// Extract either the lowest i32 or any i16, and extract the sub-byte.
int DWordIdx = IdxVal / 4;
- if (DWordIdx == 0) {
+ if (DWordIdx == 0 && DemandedElts == (DemandedElts & 15)) {
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec),
DAG.getIntPtrConstant(DWordIdx, dl));
@@ -20669,14 +17966,16 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
}
int WordIdx = IdxVal / 2;
- SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
- DAG.getBitcast(MVT::v8i16, Vec),
- DAG.getIntPtrConstant(WordIdx, dl));
- int ShiftVal = (IdxVal % 2) * 8;
- if (ShiftVal != 0)
- Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res,
- DAG.getConstant(ShiftVal, dl, MVT::i8));
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ if (DemandedElts == (DemandedElts & (3 << (WordIdx * 2)))) {
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+ DAG.getBitcast(MVT::v8i16, Vec),
+ DAG.getIntPtrConstant(WordIdx, dl));
+ int ShiftVal = (IdxVal % 2) * 8;
+ if (ShiftVal != 0)
+ Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res,
+ DAG.getConstant(ShiftVal, dl, MVT::i8));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ }
}
if (VT == MVT::f16 || VT.getSizeInBits() == 32) {
@@ -21006,20 +18305,11 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
if (IdxVal == 0) // the operation is legal
return Op;
- MVT VecVT = Vec.getSimpleValueType();
- unsigned NumElems = VecVT.getVectorNumElements();
-
// Extend to natively supported kshift.
- MVT WideVecVT = VecVT;
- if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
- WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
- DAG.getUNDEF(WideVecVT), Vec,
- DAG.getIntPtrConstant(0, dl));
- }
+ Vec = widenMaskVector(Vec, false, Subtarget, DAG, dl);
// Shift to the LSB.
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, Vec.getSimpleValueType(), Vec,
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
@@ -21033,15 +18323,10 @@ unsigned X86TargetLowering::getGlobalWrapperKind(
if (GV && GV->isAbsoluteSymbolRef())
return X86ISD::Wrapper;
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ // The following OpFlags under RIP-rel PIC use RIP.
if (Subtarget.isPICStyleRIPRel() &&
- (M == CodeModel::Small || M == CodeModel::Kernel))
- return X86ISD::WrapperRIP;
-
- // In the medium model, functions can always be referenced RIP-relatively,
- // since they must be within 2GiB. This is also possible in non-PIC mode, and
- // shorter than the 64-bit absolute immediate that would otherwise be emitted.
- if (M == CodeModel::Medium && isa_and_nonnull<Function>(GV))
+ (OpFlags == X86II::MO_NO_FLAG || OpFlags == X86II::MO_COFFSTUB ||
+ OpFlags == X86II::MO_DLLIMPORT))
return X86ISD::WrapperRIP;
// GOTPCREL references must always use RIP.
@@ -21069,7 +18354,8 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
SDValue Result = DAG.getTargetConstantPool(
CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset(), OpFlag);
SDLoc DL(CP);
- Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
+ Result =
+ DAG.getNode(getGlobalWrapperKind(nullptr, OpFlag), DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag) {
Result =
@@ -21090,7 +18376,8 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
SDLoc DL(JT);
- Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
+ Result =
+ DAG.getNode(getGlobalWrapperKind(nullptr, OpFlag), DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag)
@@ -21116,7 +18403,8 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
- Result = DAG.getNode(getGlobalWrapperKind(), dl, PtrVT, Result);
+ Result =
+ DAG.getNode(getGlobalWrapperKind(nullptr, OpFlags), dl, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (isGlobalRelativeToPICBase(OpFlags)) {
@@ -21315,8 +18603,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDLoc dl(GA);
// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
- Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
- is64Bit ? 257 : 256));
+ Value *Ptr = Constant::getNullValue(
+ PointerType::get(*DAG.getContext(), is64Bit ? 257 : 256));
SDValue ThreadPointer =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
@@ -21458,11 +18746,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
// %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
// use its literal value of 0x2C.
- Value *Ptr = Constant::getNullValue(Subtarget.is64Bit()
- ? Type::getInt8PtrTy(*DAG.getContext(),
- 256)
- : Type::getInt32PtrTy(*DAG.getContext(),
- 257));
+ Value *Ptr = Constant::getNullValue(
+ Subtarget.is64Bit() ? PointerType::get(*DAG.getContext(), 256)
+ : PointerType::get(*DAG.getContext(), 257));
SDValue TlsArray = Subtarget.is64Bit()
? DAG.getIntPtrConstant(0x58, dl)
@@ -22412,7 +19698,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI);
if (SrcVT == MVT::i32) {
SDValue OffsetSlot =
- DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl);
+ DAG.getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), dl);
SDValue Store1 = DAG.getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign);
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
OffsetSlot, MPI.getWithOffset(4), SlotAlign);
@@ -22822,22 +20108,18 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
if (SrcVT == DstVT)
return In;
- // We only support vector truncation to 64bits or greater from a
- // 128bits or greater source.
- unsigned DstSizeInBits = DstVT.getSizeInBits();
- unsigned SrcSizeInBits = SrcVT.getSizeInBits();
- if ((DstSizeInBits % 64) != 0 || (SrcSizeInBits % 128) != 0)
- return SDValue();
-
unsigned NumElems = SrcVT.getVectorNumElements();
- if (!isPowerOf2_32(NumElems))
+ if (NumElems < 2 || !isPowerOf2_32(NumElems) )
return SDValue();
- LLVMContext &Ctx = *DAG.getContext();
+ unsigned DstSizeInBits = DstVT.getSizeInBits();
+ unsigned SrcSizeInBits = SrcVT.getSizeInBits();
assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
assert(SrcSizeInBits > DstSizeInBits && "Illegal truncation");
+ LLVMContext &Ctx = *DAG.getContext();
EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
+ EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
// Pack to the largest type possible:
// vXi64/vXi32 -> PACK*SDW and vXi16 -> PACK*SWB.
@@ -22848,20 +20130,32 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
OutVT = MVT::i16;
}
- // 128bit -> 64bit truncate - PACK 128-bit src in the lower subvector.
- if (SrcVT.is128BitVector()) {
+ // Sub-128-bit truncation - widen to 128-bit src and pack in the lower half.
+ // On pre-AVX512, pack the src in both halves to help value tracking.
+ if (SrcSizeInBits <= 128) {
InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits());
OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits());
- In = DAG.getBitcast(InVT, In);
- SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, DAG.getUNDEF(InVT));
- Res = extractSubVector(Res, 0, DAG, DL, 64);
- return DAG.getBitcast(DstVT, Res);
+ In = widenSubVector(In, false, Subtarget, DAG, DL, 128);
+ SDValue LHS = DAG.getBitcast(InVT, In);
+ SDValue RHS = Subtarget.hasAVX512() ? DAG.getUNDEF(InVT) : LHS;
+ SDValue Res = DAG.getNode(Opcode, DL, OutVT, LHS, RHS);
+ Res = extractSubVector(Res, 0, DAG, DL, SrcSizeInBits / 2);
+ Res = DAG.getBitcast(PackedVT, Res);
+ return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget);
}
// Split lower/upper subvectors.
SDValue Lo, Hi;
std::tie(Lo, Hi) = splitVector(In, DAG, DL);
+ // If Hi is undef, then don't bother packing it and widen the result instead.
+ if (Hi.isUndef()) {
+ EVT DstHalfVT = DstVT.getHalfNumVectorElementsVT(Ctx);
+ if (SDValue Res =
+ truncateVectorWithPACK(Opcode, DstHalfVT, Lo, DL, DAG, Subtarget))
+ return widenSubVector(Res, false, Subtarget, DAG, DL, DstSizeInBits);
+ }
+
unsigned SubSizeInBits = SrcSizeInBits / 2;
InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
@@ -22893,7 +20187,6 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
return DAG.getBitcast(DstVT, Res);
// If 512bit -> 128bit truncate another stage.
- EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
Res = DAG.getBitcast(PackedVT, Res);
return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget);
}
@@ -22901,7 +20194,6 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
// Recursively pack lower/upper subvectors, concat result and pack again.
assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater");
- EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
if (PackedVT.is128BitVector()) {
// Avoid CONCAT_VECTORS on sub-128bit nodes as these can fail after
// type legalization.
@@ -22917,17 +20209,14 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget);
}
-/// Truncate using ISD::AND mask and X86ISD::PACKUS.
+/// Truncate using inreg zero extension (AND mask) and X86ISD::PACKUS.
/// e.g. trunc <8 x i32> X to <8 x i16> -->
/// MaskX = X & 0xffff (clear high bits to prevent saturation)
/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
static SDValue truncateVectorWithPACKUS(EVT DstVT, SDValue In, const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- EVT SrcVT = In.getValueType();
- APInt Mask = APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
- DstVT.getScalarSizeInBits());
- In = DAG.getNode(ISD::AND, DL, SrcVT, In, DAG.getConstant(Mask, DL, SrcVT));
+ In = DAG.getZeroExtendInReg(In, DL, DstVT);
return truncateVectorWithPACK(X86ISD::PACKUS, DstVT, In, DL, DAG, Subtarget);
}
@@ -22941,6 +20230,96 @@ static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL,
return truncateVectorWithPACK(X86ISD::PACKSS, DstVT, In, DL, DAG, Subtarget);
}
+/// Helper to determine if \p In truncated to \p DstVT has the necessary
+/// signbits / leading zero bits to be truncated with PACKSS / PACKUS,
+/// possibly by converting a SRL node to SRA for sign extension.
+static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
+ SDValue In, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Requires SSE2.
+ if (!Subtarget.hasSSE2())
+ return SDValue();
+
+ EVT SrcVT = In.getValueType();
+ EVT DstSVT = DstVT.getVectorElementType();
+ EVT SrcSVT = SrcVT.getVectorElementType();
+
+ // Check we have a truncation suited for PACKSS/PACKUS.
+ if (!((SrcSVT == MVT::i16 || SrcSVT == MVT::i32 || SrcSVT == MVT::i64) &&
+ (DstSVT == MVT::i8 || DstSVT == MVT::i16 || DstSVT == MVT::i32)))
+ return SDValue();
+
+ assert(SrcSVT.getSizeInBits() > DstSVT.getSizeInBits() && "Bad truncation");
+ unsigned NumStages = Log2_32(SrcSVT.getSizeInBits() / DstSVT.getSizeInBits());
+
+ // Truncation from 128-bit to vXi32 can be better handled with PSHUFD.
+ // Truncation to sub-64-bit vXi16 can be better handled with PSHUFD/PSHUFLW.
+ // Truncation from v2i64 to v2i8 can be better handled with PSHUFB.
+ if ((DstSVT == MVT::i32 && SrcVT.getSizeInBits() <= 128) ||
+ (DstSVT == MVT::i16 && SrcVT.getSizeInBits() <= (64 * NumStages)) ||
+ (DstVT == MVT::v2i8 && SrcVT == MVT::v2i64 && Subtarget.hasSSSE3()))
+ return SDValue();
+
+ // Prefer to lower v4i64 -> v4i32 as a shuffle unless we can cheaply
+ // split this for packing.
+ if (SrcVT == MVT::v4i64 && DstVT == MVT::v4i32 &&
+ !isFreeToSplitVector(In.getNode(), DAG) &&
+ (!Subtarget.hasAVX() || DAG.ComputeNumSignBits(In) != 64))
+ return SDValue();
+
+ // Don't truncate AVX512 targets as multiple PACK nodes stages.
+ if (Subtarget.hasAVX512() && NumStages > 1)
+ return SDValue();
+
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+ unsigned NumPackedSignBits = std::min<unsigned>(DstSVT.getSizeInBits(), 16);
+ unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;
+
+ // Truncate with PACKUS if we are truncating a vector with leading zero
+ // bits that extend all the way to the packed/truncated value.
+ // e.g. Masks, zext_in_reg, etc.
+ // Pre-SSE41 we can only use PACKUSWB.
+ KnownBits Known = DAG.computeKnownBits(In);
+ if ((NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) {
+ PackOpcode = X86ISD::PACKUS;
+ return In;
+ }
+
+ // Truncate with PACKSS if we are truncating a vector with sign-bits
+ // that extend all the way to the packed/truncated value.
+ // e.g. Comparison result, sext_in_reg, etc.
+ unsigned NumSignBits = DAG.ComputeNumSignBits(In);
+
+ // Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with
+ // a sign splat (or AVX512 VPSRAQ support). ComputeNumSignBits struggles to
+ // see through BITCASTs later on and combines/simplifications can't then use
+ // it.
+ if (DstSVT == MVT::i32 && NumSignBits != SrcSVT.getSizeInBits() &&
+ !Subtarget.hasAVX512())
+ return SDValue();
+
+ unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;
+ if (MinSignBits < NumSignBits) {
+ PackOpcode = X86ISD::PACKSS;
+ return In;
+ }
+
+ // If we have a srl that only generates signbits that we will discard in
+ // the truncation then we can use PACKSS by converting the srl to a sra.
+ // SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
+ if (In.getOpcode() == ISD::SRL && In->hasOneUse())
+ if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
+ In, APInt::getAllOnes(SrcVT.getVectorNumElements()))) {
+ if (*ShAmt == MinSignBits) {
+ PackOpcode = X86ISD::PACKSS;
+ return DAG.getNode(ISD::SRA, DL, SrcVT, In->ops());
+ }
+ }
+
+ return SDValue();
+}
+
/// This function lowers a vector truncation of 'extended sign-bits' or
/// 'extended zero-bits' values.
/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
@@ -22955,26 +20334,12 @@ static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
(DstSVT == MVT::i8 || DstSVT == MVT::i16 || DstSVT == MVT::i32)))
return SDValue();
- // Don't lower with PACK nodes on AVX512 targets if we'd need more than one.
- if (Subtarget.hasAVX512() &&
- SrcSVT.getSizeInBits() > (DstSVT.getSizeInBits() * 2))
- return SDValue();
-
- // Prefer to lower v4i64 -> v4i32 as a shuffle unless we can cheaply
- // split this for packing.
- if (SrcVT == MVT::v4i64 && DstVT == MVT::v4i32 &&
- !isFreeToSplitVector(In.getNode(), DAG) &&
- (!Subtarget.hasInt256() || DAG.ComputeNumSignBits(In) != 64))
- return SDValue();
-
// If the upper half of the source is undef, then attempt to split and
// only truncate the lower half.
if (DstVT.getSizeInBits() >= 128) {
SmallVector<SDValue> LowerOps;
- if (isUpperSubvectorUndef(In, LowerOps, DAG)) {
+ if (SDValue Lo = isUpperSubvectorUndef(In, DL, DAG)) {
MVT DstHalfVT = DstVT.getHalfNumVectorElementsVT();
- MVT SrcHalfVT = SrcVT.getHalfNumVectorElementsVT();
- SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcHalfVT, LowerOps);
if (SDValue Res = LowerTruncateVecPackWithSignBits(DstHalfVT, Lo, DL,
Subtarget, DAG))
return widenSubVector(Res, false, Subtarget, DAG, DL,
@@ -22982,25 +20347,10 @@ static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
}
}
- unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
- unsigned NumPackedSignBits = std::min<unsigned>(DstSVT.getSizeInBits(), 16);
- unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;
-
- // Truncate with PACKUS if we are truncating a vector with leading zero
- // bits that extend all the way to the packed/truncated value. Pre-SSE41
- // we can only use PACKUSWB.
- KnownBits Known = DAG.computeKnownBits(In);
- if ((NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros())
- if (SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, DstVT, In, DL, DAG,
- Subtarget))
- return V;
-
- // Truncate with PACKSS if we are truncating a vector with sign-bits
- // that extend all the way to the packed/truncated value.
- if ((NumSrcEltBits - NumPackedSignBits) < DAG.ComputeNumSignBits(In))
- if (SDValue V = truncateVectorWithPACK(X86ISD::PACKSS, DstVT, In, DL, DAG,
- Subtarget))
- return V;
+ unsigned PackOpcode;
+ if (SDValue Src =
+ matchTruncateWithPACK(PackOpcode, DstVT, In, DL, DAG, Subtarget))
+ return truncateVectorWithPACK(PackOpcode, DstVT, Src, DL, DAG, Subtarget);
return SDValue();
}
@@ -23031,10 +20381,8 @@ static SDValue LowerTruncateVecPack(MVT DstVT, SDValue In, const SDLoc &DL,
// only truncate the lower half.
if (DstVT.getSizeInBits() >= 128) {
SmallVector<SDValue> LowerOps;
- if (isUpperSubvectorUndef(In, LowerOps, DAG)) {
+ if (SDValue Lo = isUpperSubvectorUndef(In, DL, DAG)) {
MVT DstHalfVT = DstVT.getHalfNumVectorElementsVT();
- MVT SrcHalfVT = SrcVT.getHalfNumVectorElementsVT();
- SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcHalfVT, LowerOps);
if (SDValue Res = LowerTruncateVecPack(DstHalfVT, Lo, DL, Subtarget, DAG))
return widenSubVector(Res, false, Subtarget, DAG, DL,
DstVT.getSizeInBits());
@@ -23151,7 +20499,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// If we're called by the type legalizer, handle a few cases.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isTypeLegal(InVT)) {
+ if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(InVT)) {
if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) &&
VT.is128BitVector() && Subtarget.hasAVX512()) {
assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) &&
@@ -24295,7 +21643,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Unexpected type in LowerFABSorFNEG");
- // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
+ // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOptLevel to
// decide if we should generate a 16-byte constant mask when we only need 4 or
// 8 bytes for the scalar case.
@@ -25222,6 +22570,29 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
return Sub.getValue(1);
}
+bool X86TargetLowering::isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
+ EVT VT) const {
+ return !VT.isVector() || Cond != ISD::CondCode::SETEQ;
+}
+
+bool X86TargetLowering::optimizeFMulOrFDivAsShiftAddBitcast(
+ SDNode *N, SDValue, SDValue IntPow2) const {
+ if (N->getOpcode() == ISD::FDIV)
+ return true;
+
+ EVT FPVT = N->getValueType(0);
+ EVT IntVT = IntPow2.getValueType();
+
+ // This indicates a non-free bitcast.
+ // TODO: This is probably overly conservative as we will need to scale the
+ // integer vector anyways for the int->fp cast.
+ if (FPVT.isVector() &&
+ FPVT.getScalarSizeInBits() != IntVT.getScalarSizeInBits())
+ return false;
+
+ return true;
+}
+
/// Check if replacement of SQRT with RSQRT should be disabled.
bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -25377,38 +22748,12 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
!(Subtarget.is64Bit() && VT == MVT::i64))
return SDValue();
- unsigned Lg2 = Divisor.countr_zero();
-
// If the divisor is 2 or -2, the default expansion is better.
- if (Lg2 == 1)
+ if (Divisor == 2 ||
+ Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true))
return SDValue();
- SDLoc DL(N);
- SDValue N0 = N->getOperand(0);
- SDValue Zero = DAG.getConstant(0, DL, VT);
- APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
- SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
-
- // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
- SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
- SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
-
- Created.push_back(Cmp.getNode());
- Created.push_back(Add.getNode());
- Created.push_back(CMov.getNode());
-
- // Divide by pow2.
- SDValue SRA =
- DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8));
-
- // If we're dividing by a positive value, we're done. Otherwise, we must
- // negate the result.
- if (Divisor.isNonNegative())
- return SRA;
-
- Created.push_back(SRA.getNode());
- return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
}
/// Result of 'and' is compared against zero. Change to a BT node if possible.
@@ -27032,7 +24377,7 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
unsigned HalfOffset = Value0.getValueType().getStoreSize();
SDValue Ptr0 = Store->getBasePtr();
SDValue Ptr1 =
- DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(HalfOffset), DL);
+ DAG.getMemBasePlusOffset(Ptr0, TypeSize::getFixed(HalfOffset), DL);
SDValue Ch0 =
DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),
Store->getOriginalAlign(),
@@ -27068,7 +24413,7 @@ static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT,
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Offset = i * ScalarSize;
SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(),
- TypeSize::Fixed(Offset), DL);
+ TypeSize::getFixed(Offset), DL);
SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal,
DAG.getIntPtrConstant(i, DL));
SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr,
@@ -27464,7 +24809,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getMemBasePlusOffset(FIN, TypeSize::Fixed(4), DL);
+ FIN = DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(4), DL);
Store = DAG.getStore(
Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN,
@@ -28494,7 +25839,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Control);
}
- // ADC/ADCX/SBB
+ // ADC/SBB
case ADX: {
SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32);
@@ -28767,7 +26112,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
auto &Context = MF.getMMI().getContext();
MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
Twine(MF.getFunctionNumber()));
- return DAG.getNode(getGlobalWrapperKind(), dl, VT,
+ return DAG.getNode(getGlobalWrapperKind(nullptr, /*OpFlags=*/0), dl, VT,
DAG.getMCSymbol(S, PtrVT));
}
@@ -28900,7 +26245,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
- Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(
+ Value *Ptr = Constant::getNullValue(PointerType::get(
*DAG.getContext(), Subtarget.is64Bit() ? X86AS::FS : X86AS::GS));
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getIntPtrConstant(0, dl), MachinePointerInfo(Ptr));
@@ -29764,11 +27109,13 @@ Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT,
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
Register Reg = StringSwitch<unsigned>(RegName)
- .Case("esp", X86::ESP)
- .Case("rsp", X86::RSP)
- .Case("ebp", X86::EBP)
- .Case("rbp", X86::RBP)
- .Default(0);
+ .Case("esp", X86::ESP)
+ .Case("rsp", X86::RSP)
+ .Case("ebp", X86::EBP)
+ .Case("rbp", X86::RBP)
+ .Case("r14", X86::R14)
+ .Case("r15", X86::R15)
+ .Default(0);
if (Reg == X86::EBP || Reg == X86::RBP) {
if (!TFI.hasFP(MF))
@@ -30842,10 +28189,8 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
// abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))
// abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- SDValue LHS = DAG.getFreeze(Op.getOperand(0));
- SDValue RHS = DAG.getFreeze(Op.getOperand(1));
- LHS = DAG.getNode(ExtOpc, dl, WideVT, LHS);
- RHS = DAG.getNode(ExtOpc, dl, WideVT, RHS);
+ SDValue LHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(0));
+ SDValue RHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(1));
SDValue Diff = DAG.getNode(ISD::SUB, dl, WideVT, LHS, RHS);
SDValue AbsDiff = DAG.getNode(ISD::ABS, dl, WideVT, Diff);
return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff);
@@ -32972,7 +30317,7 @@ void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
Instruction *I = AI->user_back();
LLVMContext &Ctx = AI->getContext();
Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),
- Type::getInt8PtrTy(Ctx));
+ PointerType::getUnqual(Ctx));
Function *BitTest = nullptr;
Value *Result = nullptr;
auto BitTested = FindSingleBitChange(AI->getValOperand());
@@ -33141,7 +30486,7 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
Function *CmpArith =
Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),
- Type::getInt8PtrTy(Ctx));
+ PointerType::getUnqual(Ctx));
Value *Call = Builder.CreateCall(
CmpArith, {Addr, AI->getValOperand(), Builder.getInt32((unsigned)CC)});
Value *Result = Builder.CreateTrunc(Call, Type::getInt1Ty(Ctx));
@@ -33265,18 +30610,6 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
return Loaded;
}
-bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
- if (!SI.isUnordered())
- return false;
- return ExperimentalUnorderedISEL;
-}
-bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
- if (!LI.isUnordered())
- return false;
- return ExperimentalUnorderedISEL;
-}
-
-
/// Emit a locked operation on a stack location which does not change any
/// memory location, but does involve a lock prefix. Location is chosen to be
/// a) very likely accessed only by a single thread to minimize cache traffic,
@@ -33953,8 +31286,8 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
SDValue Chain;
if (Subtarget.hasSSE1()) {
- SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
- Node->getOperand(2));
+ SDValue SclToVec =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getVal());
MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
SclToVec = DAG.getBitcast(StVT, SclToVec);
SDVTList Tys = DAG.getVTList(MVT::Other);
@@ -33968,9 +31301,8 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
- Chain =
- DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr,
- MPI, MaybeAlign(), MachineMemOperand::MOStore);
+ Chain = DAG.getStore(Node->getChain(), dl, Node->getVal(), StackPtr,
+ MPI, MaybeAlign(), MachineMemOperand::MOStore);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue LdOps[] = {Chain, StackPtr};
SDValue Value = DAG.getMemIntrinsicNode(
@@ -33999,11 +31331,9 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
// Convert seq_cst store -> xchg
// Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
// FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
- Node->getMemoryVT(),
- Node->getOperand(0),
- Node->getOperand(1), Node->getOperand(2),
- Node->getMemOperand());
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, Node->getMemoryVT(),
+ Node->getOperand(0), Node->getOperand(2),
+ Node->getOperand(1), Node->getMemOperand());
return Swap.getValue(1);
}
@@ -34447,7 +31777,7 @@ static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
for (auto &AsmStr : AsmStrs) {
// Match the OpNo string. We should match exactly to exclude match
// sub-string, e.g. "$12" contain "$1"
- if (AsmStr.endswith(OpNoStr1))
+ if (AsmStr.ends_with(OpNoStr1))
I = AsmStr.size() - OpNoStr1.size();
// Get the index of operand in AsmStr.
@@ -34851,9 +32181,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT InVT = In.getValueType();
EVT InEltVT = InVT.getVectorElementType();
EVT EltVT = VT.getVectorElementType();
+ unsigned MinElts = VT.getVectorNumElements();
unsigned WidenNumElts = WidenVT.getVectorNumElements();
-
unsigned InBits = InVT.getSizeInBits();
+
+ // See if there are sufficient leading bits to perform a PACKUS/PACKSS.
+ unsigned PackOpcode;
+ if (SDValue Src =
+ matchTruncateWithPACK(PackOpcode, VT, In, dl, DAG, Subtarget)) {
+ if (SDValue Res = truncateVectorWithPACK(PackOpcode, VT, Src,
+ dl, DAG, Subtarget)) {
+ Res = widenSubVector(WidenVT, Res, false, Subtarget, DAG, dl);
+ Results.push_back(Res);
+ return;
+ }
+ }
+
if (128 % InBits == 0) {
// 128 bit and smaller inputs should avoid truncate all together and
// just use a build_vector that will become a shuffle.
@@ -34861,7 +32204,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
- unsigned MinElts = VT.getVectorNumElements();
for (unsigned i=0; i < MinElts; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In,
DAG.getIntPtrConstant(i, dl));
@@ -34870,6 +32212,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getBuildVector(WidenVT, dl, Ops));
return;
}
+
// With AVX512 there are some cases that can use a target specific
// truncate node to go from 256/512 to less than 128 with zeros in the
// upper elements of the 128 bit result.
@@ -34908,8 +32251,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// this via type legalization.
if ((InEltVT == MVT::i16 || InEltVT == MVT::i32 || InEltVT == MVT::i64) &&
(EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32) &&
- (!Subtarget.hasSSSE3() || (InVT == MVT::v8i64 && VT == MVT::v8i8) ||
- (InVT == MVT::v4i64 && VT == MVT::v4i16 && !Subtarget.hasAVX()))) {
+ (!Subtarget.hasSSSE3() ||
+ (!isTypeLegal(InVT) &&
+ !(MinElts <= 4 && InEltVT == MVT::i64 && EltVT == MVT::i8)))) {
SDValue WidenIn = widenSubVector(In, false, Subtarget, DAG, dl,
InEltVT.getSizeInBits() * WidenNumElts);
Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, WidenVT, WidenIn));
@@ -37272,6 +34616,11 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
F->insert(It, FalseMBB);
F->insert(It, SinkMBB);
+ // Set the call frame size on entry to the new basic blocks.
+ unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
+ FalseMBB->setCallFrameSize(CallFrameSize);
+ SinkMBB->setCallFrameSize(CallFrameSize);
+
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -40078,7 +37427,8 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
(MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
(MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
- (MaskVT.is512BitVector() && Subtarget.hasAVX512())) {
+ (MaskVT.is512BitVector() && Subtarget.hasAVX512() &&
+ (32 <= EltSizeInBits || Subtarget.hasBWI()))) {
if (matchShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL, DAG,
Subtarget)) {
SrcVT = DstVT = MaskVT;
@@ -40473,8 +37823,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL,
ArrayRef<int> ScaledMask, SDValue V1, SDValue V2,
SelectionDAG &DAG) {
- unsigned PermMask = 0;
- // Insure elements came from the same Op.
+ int PermMask[4] = {-1, -1, -1, -1};
+ // Ensure elements came from the same Op.
SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
for (int i = 0; i < 4; ++i) {
assert(ScaledMask[i] >= -1 && "Illegal shuffle sentinel value");
@@ -40488,16 +37838,13 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
else if (Ops[OpIndex] != Op)
return SDValue();
- // Convert the 128-bit shuffle mask selection values into 128-bit
- // selection bits defined by a vshuf64x2 instruction's immediate control
- // byte.
- PermMask |= (ScaledMask[i] % 4) << (i * 2);
+ PermMask[i] = ScaledMask[i] % 4;
}
return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT,
CanonicalizeShuffleInput(ShuffleVT, Ops[0]),
CanonicalizeShuffleInput(ShuffleVT, Ops[1]),
- DAG.getTargetConstant(PermMask, DL, MVT::i8));
+ getV4X86ShuffleImm8ForMask(PermMask, DL, DAG));
};
// FIXME: Is there a better way to do this? is256BitLaneRepeatedShuffleMask
@@ -42233,9 +39580,21 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
return SDValue();
}
+// TODO - move this to TLI like isBinOp?
+static bool isUnaryOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTPOP:
+ return true;
+ }
+ return false;
+}
+
+// Canonicalize SHUFFLE(UNARYOP(X)) -> UNARYOP(SHUFFLE(X)).
// Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
-static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
- const SDLoc &DL) {
+static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
+ const SDLoc &DL) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT ShuffleVT = N.getValueType();
@@ -42362,6 +39721,25 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
DAG.getBitcast(OpVT, RHS)));
}
}
+ if (isUnaryOp(SrcOpcode) && N1.getOpcode() == SrcOpcode &&
+ N0.getValueType() == N1.getValueType() &&
+ IsSafeToMoveShuffle(N0, SrcOpcode) &&
+ IsSafeToMoveShuffle(N1, SrcOpcode)) {
+ SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
+ SDValue Op10 = peekThroughOneUseBitcasts(N1.getOperand(0));
+ SDValue Res;
+ Op00 = DAG.getBitcast(ShuffleVT, Op00);
+ Op10 = DAG.getBitcast(ShuffleVT, Op10);
+ if (N.getNumOperands() == 3) {
+ Res = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10, N.getOperand(2));
+ } else {
+ Res = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10);
+ }
+ EVT OpVT = N0.getValueType();
+ return DAG.getBitcast(
+ ShuffleVT,
+ DAG.getNode(SrcOpcode, DL, OpVT, DAG.getBitcast(OpVT, Res)));
+ }
}
break;
}
@@ -42509,12 +39887,15 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
extract128BitVector(Src, 0, DAG, DL));
// broadcast(scalar_to_vector(x)) -> broadcast(x).
- if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ Src.getValueType().getScalarType() == Src.getOperand(0).getValueType())
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
// broadcast(extract_vector_elt(x, 0)) -> broadcast(x).
if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isNullConstant(Src.getOperand(1)) &&
+ Src.getValueType() ==
+ Src.getOperand(0).getValueType().getScalarType() &&
DAG.getTargetLoweringInfo().isTypeLegal(
Src.getOperand(0).getValueType()))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
@@ -42611,8 +39992,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
LN->isSimple()) {
unsigned Offset = ShiftAmt / 8;
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ptr = DAG.getMemBasePlusOffset(LN->getBasePtr(),
- TypeSize::Fixed(Offset), DL);
+ SDValue Ptr = DAG.getMemBasePlusOffset(
+ LN->getBasePtr(), TypeSize::getFixed(Offset), DL);
SDValue Ops[] = { LN->getChain(), Ptr };
SDValue BcastLd = DAG.getMemIntrinsicNode(
X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
@@ -43073,7 +40454,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
// See if this reduces to a PSHUFD which is no more expensive and can
// combine with more operations. Note that it has to at least flip the
// dwords as otherwise it would have been removed as a no-op.
- if (ArrayRef(Mask).equals({2, 3, 0, 1})) {
+ if (ArrayRef<int>(Mask).equals({2, 3, 0, 1})) {
int DMask[] = {0, 1, 2, 3};
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
DMask[DOffset + 0] = DOffset + 1;
@@ -43108,8 +40489,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
int MappedMask[8];
for (int i = 0; i < 8; ++i)
MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
- if (ArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
- ArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
+ if (ArrayRef<int>(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
+ ArrayRef<int>(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
// We can replace all three shuffles with an unpack.
V = DAG.getBitcast(VT, D.getOperand(0));
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
@@ -43403,7 +40784,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
SDLoc dl(N);
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.isTypeLegal(VT))
+ if (TLI.isTypeLegal(VT) && !isSoftF16(VT, Subtarget))
if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
return AddSub;
@@ -43440,10 +40821,11 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, DCI))
return SDValue(N, 0);
+ // Canonicalize SHUFFLE(UNARYOP(X)) -> UNARYOP(SHUFFLE(X)).
// Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
// Perform this after other shuffle combines to allow inner shuffles to be
// combined away first.
- if (SDValue BinOp = canonicalizeShuffleWithBinOps(Op, DAG, dl))
+ if (SDValue BinOp = canonicalizeShuffleWithOp(Op, DAG, dl))
return BinOp;
}
@@ -44113,6 +41495,18 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// Integer ops.
case X86ISD::PACKSS:
case X86ISD::PACKUS:
+ case X86ISD::PCMPEQ:
+ case X86ISD::PCMPGT:
+ case X86ISD::PMULUDQ:
+ case X86ISD::PMULDQ:
+ case X86ISD::VSHLV:
+ case X86ISD::VSRLV:
+ case X86ISD::VSRAV:
+ // Float ops.
+ case X86ISD::FMAX:
+ case X86ISD::FMIN:
+ case X86ISD::FMAXC:
+ case X86ISD::FMINC:
// Horizontal Ops.
case X86ISD::HADD:
case X86ISD::HSUB:
@@ -44784,6 +42178,19 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
return Op.getOperand(1);
break;
+ case X86ISD::BLENDV: {
+ // BLENDV: Cond (MSB) ? LHS : RHS
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+
+ KnownBits CondKnown = DAG.computeKnownBits(Cond, DemandedElts, Depth + 1);
+ if (CondKnown.isNegative())
+ return LHS;
+ if (CondKnown.isNonNegative())
+ return RHS;
+ break;
+ }
case X86ISD::ANDNP: {
// ANDNP = (~LHS & RHS);
SDValue LHS = Op.getOperand(0);
@@ -46435,6 +43842,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
case ISD::FROUND:
case ISD::FFLOOR:
case X86ISD::FRCP:
@@ -46574,10 +43982,15 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
DAG.computeKnownBits(Rdx).getMaxValue().ule(255) &&
(EltSizeInBits == 16 || Rdx.getOpcode() == ISD::ZERO_EXTEND ||
Subtarget.hasAVX512())) {
- EVT ByteVT = VecVT.changeVectorElementType(MVT::i8);
- Rdx = DAG.getNode(ISD::TRUNCATE, DL, ByteVT, Rdx);
- if (ByteVT.getSizeInBits() < 128)
- Rdx = WidenToV16I8(Rdx, true);
+ if (Rdx.getValueType() == MVT::v8i16) {
+ Rdx = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Rdx,
+ DAG.getUNDEF(MVT::v8i16));
+ } else {
+ EVT ByteVT = VecVT.changeVectorElementType(MVT::i8);
+ Rdx = DAG.getNode(ISD::TRUNCATE, DL, ByteVT, Rdx);
+ if (ByteVT.getSizeInBits() < 128)
+ Rdx = WidenToV16I8(Rdx, true);
+ }
// Build the PSADBW, split as 128/256/512 bits for SSE/AVX2/AVX512BW.
auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
@@ -46914,7 +44327,7 @@ static SDValue combineToExtendBoolVectorInReg(
Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask);
// Compare against the bitmask and extend the result.
- EVT CCVT = VT.changeVectorElementType(MVT::i1);
+ EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ);
Vec = DAG.getSExtOrTrunc(Vec, DL, VT);
@@ -51258,7 +48671,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
// Attempt to combine a scalar bitmask AND with an extracted shuffle.
if ((VT.getScalarSizeInBits() % 8) == 0 &&
N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(N0.getOperand(1))) {
+ isa<ConstantSDNode>(N0.getOperand(1)) && N0->hasOneUse()) {
SDValue BitMask = N1;
SDValue SrcVec = N0.getOperand(0);
EVT SrcVecVT = SrcVec.getValueType();
@@ -51344,7 +48757,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
if (useVPTERNLOG(Subtarget, VT)) {
// Emit a VPTERNLOG node directly - 0xCA is the imm code for A?B:C.
// VPTERNLOG is only available as vXi32/64-bit types.
- MVT OpSVT = EltSizeInBits == 32 ? MVT::i32 : MVT::i64;
+ MVT OpSVT = EltSizeInBits <= 32 ? MVT::i32 : MVT::i64;
MVT OpVT =
MVT::getVectorVT(OpSVT, VT.getSizeInBits() / OpSVT.getSizeInBits());
SDValue A = DAG.getBitcast(OpVT, N0.getOperand(1));
@@ -52231,14 +49644,12 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
(Subtarget.hasVLX() || InVT.getSizeInBits() > 256) &&
!(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);
- if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 &&
- VT.getSizeInBits() >= 64 &&
+ if (!PreferAVX512 && VT.getVectorNumElements() > 1 &&
+ isPowerOf2_32(VT.getVectorNumElements()) &&
(SVT == MVT::i8 || SVT == MVT::i16) &&
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
// vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).
- // Only do this when the result is at least 64 bits or we'll leaving
- // dangling PACKSSDW nodes.
if (SVT == MVT::i8 && InSVT == MVT::i32) {
EVT MidVT = VT.changeVectorElementType(MVT::i16);
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,
@@ -52475,7 +49886,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
unsigned HalfOffset = 16;
SDValue Ptr1 = Ld->getBasePtr();
SDValue Ptr2 =
- DAG.getMemBasePlusOffset(Ptr1, TypeSize::Fixed(HalfOffset), dl);
+ DAG.getMemBasePlusOffset(Ptr1, TypeSize::getFixed(HalfOffset), dl);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
NumElems / 2);
SDValue Load1 =
@@ -52509,25 +49920,67 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
}
}
- // If we also broadcast this as a subvector to a wider type, then just extract
- // the lowest subvector.
+ // If we also load/broadcast this to a wider type, then just extract the
+ // lowest subvector.
if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() &&
(RegVT.is128BitVector() || RegVT.is256BitVector())) {
SDValue Ptr = Ld->getBasePtr();
SDValue Chain = Ld->getChain();
- for (SDNode *User : Ptr->uses()) {
- if (User != N && User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
- cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
- cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
- cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
- MemVT.getSizeInBits() &&
+ for (SDNode *User : Chain->uses()) {
+ if (User != N &&
+ (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD ||
+ User->getOpcode() == X86ISD::VBROADCAST_LOAD ||
+ ISD::isNormalLoad(User)) &&
+ cast<MemSDNode>(User)->getChain() == Chain &&
!User->hasAnyUseOfValue(1) &&
User->getValueSizeInBits(0).getFixedValue() >
RegVT.getFixedSizeInBits()) {
- SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
- RegVT.getSizeInBits());
- Extract = DAG.getBitcast(RegVT, Extract);
- return DCI.CombineTo(N, Extract, SDValue(User, 1));
+ if (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
+ cast<MemSDNode>(User)->getBasePtr() == Ptr &&
+ cast<MemSDNode>(User)->getMemoryVT().getSizeInBits() ==
+ MemVT.getSizeInBits()) {
+ SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
+ RegVT.getSizeInBits());
+ Extract = DAG.getBitcast(RegVT, Extract);
+ return DCI.CombineTo(N, Extract, SDValue(User, 1));
+ }
+ auto MatchingBits = [](const APInt &Undefs, const APInt &UserUndefs,
+ ArrayRef<APInt> Bits, ArrayRef<APInt> UserBits) {
+ for (unsigned I = 0, E = Undefs.getBitWidth(); I != E; ++I) {
+ if (Undefs[I])
+ continue;
+ if (UserUndefs[I] || Bits[I] != UserBits[I])
+ return false;
+ }
+ return true;
+ };
+ // See if we are loading a constant that matches in the lower
+ // bits of a longer constant (but from a different constant pool ptr).
+ EVT UserVT = User->getValueType(0);
+ SDValue UserPtr = cast<MemSDNode>(User)->getBasePtr();
+ const Constant *LdC = getTargetConstantFromBasePtr(Ptr);
+ const Constant *UserC = getTargetConstantFromBasePtr(UserPtr);
+ if (LdC && UserC && UserPtr != Ptr) {
+ unsigned LdSize = LdC->getType()->getPrimitiveSizeInBits();
+ unsigned UserSize = UserC->getType()->getPrimitiveSizeInBits();
+ if (LdSize < UserSize || !ISD::isNormalLoad(User)) {
+ APInt Undefs, UserUndefs;
+ SmallVector<APInt> Bits, UserBits;
+ unsigned NumBits = std::min(RegVT.getScalarSizeInBits(),
+ UserVT.getScalarSizeInBits());
+ if (getTargetConstantBitsFromNode(SDValue(N, 0), NumBits, Undefs,
+ Bits) &&
+ getTargetConstantBitsFromNode(SDValue(User, 0), NumBits,
+ UserUndefs, UserBits)) {
+ if (MatchingBits(Undefs, UserUndefs, Bits, UserBits)) {
+ SDValue Extract = extractSubVector(
+ SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());
+ Extract = DAG.getBitcast(RegVT, Extract);
+ return DCI.CombineTo(N, Extract, SDValue(User, 1));
+ }
+ }
+ }
+ }
}
}
}
@@ -52540,9 +49993,9 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
if (PtrVT != Ld->getBasePtr().getSimpleValueType()) {
SDValue Cast =
DAG.getAddrSpaceCast(dl, PtrVT, Ld->getBasePtr(), AddrSpace, 0);
- return DAG.getLoad(RegVT, dl, Ld->getChain(), Cast, Ld->getPointerInfo(),
- Ld->getOriginalAlign(),
- Ld->getMemOperand()->getFlags());
+ return DAG.getExtLoad(Ext, dl, RegVT, Ld->getChain(), Cast,
+ Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(),
+ Ld->getMemOperand()->getFlags());
}
}
@@ -52602,7 +50055,7 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
Addr = MaskedOp->getBasePtr();
if (TrueMaskElt != 0) {
Offset = TrueMaskElt * EltVT.getStoreSize();
- Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::Fixed(Offset),
+ Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::getFixed(Offset),
SDLoc(MaskedOp));
}
@@ -52896,7 +50349,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
Hi = combinevXi1ConstantToInteger(Hi, DAG);
SDValue Ptr0 = St->getBasePtr();
- SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(4), dl);
+ SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, TypeSize::getFixed(4), dl);
SDValue Ch0 =
DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(),
@@ -53044,9 +50497,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
if (PtrVT != St->getBasePtr().getSimpleValueType()) {
SDValue Cast =
DAG.getAddrSpaceCast(dl, PtrVT, St->getBasePtr(), AddrSpace, 0);
- return DAG.getStore(St->getChain(), dl, StoredVal, Cast,
- St->getPointerInfo(), St->getOriginalAlign(),
- St->getMemOperand()->getFlags(), St->getAAInfo());
+ return DAG.getTruncStore(
+ St->getChain(), dl, StoredVal, Cast, St->getPointerInfo(), StVT,
+ St->getOriginalAlign(), St->getMemOperand()->getFlags(),
+ St->getAAInfo());
}
}
@@ -53063,11 +50517,14 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
bool F64IsLegal =
!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
- if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) &&
- isa<LoadSDNode>(St->getValue()) &&
+
+ if (!F64IsLegal || Subtarget.is64Bit())
+ return SDValue();
+
+ if (VT == MVT::i64 && isa<LoadSDNode>(St->getValue()) &&
cast<LoadSDNode>(St->getValue())->isSimple() &&
St->getChain().hasOneUse() && St->isSimple()) {
- LoadSDNode *Ld = cast<LoadSDNode>(St->getValue().getNode());
+ auto *Ld = cast<LoadSDNode>(St->getValue());
if (!ISD::isNormalLoad(Ld))
return SDValue();
@@ -53094,7 +50551,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// to get past legalization. The execution dependencies fixup pass will
// choose the optimal machine instruction for the store if this really is
// an integer or v2f32 rather than an f64.
- if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() &&
+ if (VT == MVT::i64 &&
St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
SDValue OldExtract = St->getOperand(1);
SDValue ExtOp0 = OldExtract.getOperand(0);
@@ -53407,37 +50864,18 @@ static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
int CombineOpcode =
N->getOpcode() == X86ISD::VFCMULC ? X86ISD::VFMULC : X86ISD::VFCMULC;
- auto isConjugationConstant = [](const Constant *c) {
- if (const auto *CI = dyn_cast<ConstantInt>(c)) {
- APInt ConjugationInt32 = APInt(32, 0x80000000, true);
- APInt ConjugationInt64 = APInt(64, 0x8000000080000000ULL, true);
- switch (CI->getBitWidth()) {
- case 16:
- return false;
- case 32:
- return CI->getValue() == ConjugationInt32;
- case 64:
- return CI->getValue() == ConjugationInt64;
- default:
- llvm_unreachable("Unexpected bit width");
- }
- }
- if (const auto *CF = dyn_cast<ConstantFP>(c))
- return CF->getType()->isFloatTy() && CF->isNegativeZeroValue();
- return false;
- };
auto combineConjugation = [&](SDValue &r) {
if (LHS->getOpcode() == ISD::BITCAST && RHS.hasOneUse()) {
SDValue XOR = LHS.getOperand(0);
if (XOR->getOpcode() == ISD::XOR && XOR.hasOneUse()) {
- SDValue XORRHS = XOR.getOperand(1);
- if (XORRHS.getOpcode() == ISD::BITCAST && XORRHS.hasOneUse())
- XORRHS = XORRHS.getOperand(0);
- if (XORRHS.getOpcode() == X86ISD::VBROADCAST_LOAD &&
- XORRHS.getOperand(1).getNumOperands()) {
- ConstantPoolSDNode *CP =
- dyn_cast<ConstantPoolSDNode>(XORRHS.getOperand(1).getOperand(0));
- if (CP && isConjugationConstant(CP->getConstVal())) {
+ KnownBits XORRHS = DAG.computeKnownBits(XOR.getOperand(1));
+ if (XORRHS.isConstant()) {
+ APInt ConjugationInt32 = APInt(32, 0x80000000, true);
+ APInt ConjugationInt64 = APInt(64, 0x8000000080000000ULL, true);
+ if ((XORRHS.getBitWidth() == 32 &&
+ XORRHS.getConstant() == ConjugationInt32) ||
+ (XORRHS.getBitWidth() == 64 &&
+ XORRHS.getConstant() == ConjugationInt64)) {
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0));
SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F);
@@ -53471,20 +50909,11 @@ static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
return DAG.getTarget().Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros();
};
- auto IsVectorAllNegativeZero = [](const SDNode *N) {
- if (N->getOpcode() != X86ISD::VBROADCAST_LOAD)
- return false;
- assert(N->getSimpleValueType(0).getScalarType() == MVT::f32 &&
- "Unexpected vector type!");
- if (ConstantPoolSDNode *CP =
- dyn_cast<ConstantPoolSDNode>(N->getOperand(1)->getOperand(0))) {
- APInt AI = APInt(32, 0x80008000, true);
- if (const auto *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
- return CI->getValue() == AI;
- if (const auto *CF = dyn_cast<ConstantFP>(CP->getConstVal()))
- return CF->getValue() == APFloat(APFloat::IEEEsingle(), AI);
- }
- return false;
+ auto IsVectorAllNegativeZero = [&DAG](SDValue Op) {
+ APInt AI = APInt(32, 0x80008000, true);
+ KnownBits Bits = DAG.computeKnownBits(Op);
+ return Bits.getBitWidth() == 32 && Bits.isConstant() &&
+ Bits.getConstant() == AI;
};
if (N->getOpcode() != ISD::FADD || !Subtarget.hasFP16() ||
@@ -53516,7 +50945,7 @@ static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
if ((Opcode == X86ISD::VFMADDC || Opcode == X86ISD::VFCMADDC) &&
((ISD::isBuildVectorAllZeros(Op0->getOperand(2).getNode()) &&
HasNoSignedZero(Op0->getFlags())) ||
- IsVectorAllNegativeZero(Op0->getOperand(2).getNode()))) {
+ IsVectorAllNegativeZero(Op0->getOperand(2)))) {
MulOp0 = Op0.getOperand(0);
MulOp1 = Op0.getOperand(1);
IsConj = Opcode == X86ISD::VFCMADDC;
@@ -53634,93 +51063,6 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// This function transforms vector truncation of 'extended sign-bits' or
-/// 'extended zero-bits' values.
-/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
-/// TODO: Remove this and just use LowerTruncateVecPackWithSignBits.
-static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- // Requires SSE2.
- if (!Subtarget.hasSSE2())
- return SDValue();
-
- if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple())
- return SDValue();
-
- SDValue In = N->getOperand(0);
- if (!In.getValueType().isSimple())
- return SDValue();
-
- MVT VT = N->getValueType(0).getSimpleVT();
- MVT SVT = VT.getScalarType();
-
- MVT InVT = In.getValueType().getSimpleVT();
- MVT InSVT = InVT.getScalarType();
-
- // Check we have a truncation suited for PACKSS/PACKUS.
- if (!isPowerOf2_32(VT.getVectorNumElements()))
- return SDValue();
- if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)
- return SDValue();
- if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)
- return SDValue();
-
- // Truncation to sub-128bit vXi32 can be better handled with shuffles.
- if (SVT == MVT::i32 && VT.getSizeInBits() < 128)
- return SDValue();
-
- // AVX512 has fast truncate, but if the input is already going to be split,
- // there's no harm in trying pack.
- if (Subtarget.hasAVX512() &&
- !(!Subtarget.useAVX512Regs() && VT.is256BitVector() &&
- InVT.is512BitVector())) {
- // PACK should still be worth it for 128-bit vectors if the sources were
- // originally concatenated from subvectors.
- if (VT.getSizeInBits() > 128 || !isFreeToSplitVector(In.getNode(), DAG))
- return SDValue();
- }
-
- unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16);
- unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;
-
- // Use PACKUS if the input has zero-bits that extend all the way to the
- // packed/truncated value. e.g. masks, zext_in_reg, etc.
- KnownBits Known = DAG.computeKnownBits(In);
- unsigned NumLeadingZeroBits = Known.countMinLeadingZeros();
- if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits))
- return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget);
-
- // Use PACKSS if the input has sign-bits that extend all the way to the
- // packed/truncated value. e.g. Comparison result, sext_in_reg, etc.
- unsigned NumSignBits = DAG.ComputeNumSignBits(In);
-
- // Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with
- // a sign splat. ComputeNumSignBits struggles to see through BITCASTs later
- // on and combines/simplifications can't then use it.
- if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits())
- return SDValue();
-
- unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits;
- if (NumSignBits > MinSignBits)
- return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);
-
- // If we have a srl that only generates signbits that we will discard in
- // the truncation then we can use PACKSS by converting the srl to a sra.
- // SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
- if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode()))
- if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
- In, APInt::getAllOnes(VT.getVectorNumElements()))) {
- if (*ShAmt == MinSignBits) {
- SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops());
- return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG,
- Subtarget);
- }
- }
-
- return SDValue();
-}
-
// Try to form a MULHU or MULHS node by looking for
// (trunc (srl (mul ext, ext), 16))
// TODO: This is X86 specific because we want to be able to handle wide types
@@ -53977,10 +51319,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
}
- // Try to truncate extended sign/zero bits with PACKSS/PACKUS.
- if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))
- return V;
-
return SDValue();
}
@@ -55018,9 +52356,13 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
if (Add.getOpcode() != ISD::ADD)
return SDValue();
+ SDValue AddOp0 = Add.getOperand(0);
+ SDValue AddOp1 = Add.getOperand(1);
bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND;
bool NSW = Add->getFlags().hasNoSignedWrap();
bool NUW = Add->getFlags().hasNoUnsignedWrap();
+ NSW = NSW || (Sext && DAG.willNotOverflowAdd(true, AddOp0, AddOp1));
+ NUW = NUW || (!Sext && DAG.willNotOverflowAdd(false, AddOp0, AddOp1));
// We need an 'add nsw' feeding into the 'sext' or 'add nuw' feeding
// into the 'zext'
@@ -55030,8 +52372,8 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
// Having a constant operand to the 'add' ensures that we are not increasing
// the instruction count because the constant is extended for free below.
// A constant operand can also become the displacement field of an LEA.
- auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
- if (!AddOp1)
+ auto *AddOp1C = dyn_cast<ConstantSDNode>(AddOp1);
+ if (!AddOp1C)
return SDValue();
// Don't make the 'add' bigger if there's no hope of combining it with some
@@ -55050,10 +52392,9 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
return SDValue();
// Everything looks good, so pull the '{s|z}ext' ahead of the 'add'.
- int64_t AddConstant = Sext ? AddOp1->getSExtValue() : AddOp1->getZExtValue();
- SDValue AddOp0 = Add.getOperand(0);
+ int64_t AddC = Sext ? AddOp1C->getSExtValue() : AddOp1C->getZExtValue();
SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0);
- SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT);
+ SDValue NewConstant = DAG.getConstant(AddC, SDLoc(Add), VT);
// The wider add is guaranteed to not wrap because both operands are
// sign-extended.
@@ -55821,43 +53162,10 @@ static SDValue combineTESTP(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+ TargetLowering::DAGCombinerInfo &DCI) {
auto *MemOp = cast<X86MaskedGatherScatterSDNode>(N);
- SDValue BasePtr = MemOp->getBasePtr();
- SDValue Index = MemOp->getIndex();
- SDValue Scale = MemOp->getScale();
SDValue Mask = MemOp->getMask();
- // Attempt to fold an index scale into the scale value directly.
- // For smaller indices, implicit sext is performed BEFORE scale, preventing
- // this fold under most circumstances.
- // TODO: Move this into X86DAGToDAGISel::matchVectorAddressRecursively?
- if ((Index.getOpcode() == X86ISD::VSHLI ||
- (Index.getOpcode() == ISD::ADD &&
- Index.getOperand(0) == Index.getOperand(1))) &&
- isa<ConstantSDNode>(Scale) &&
- BasePtr.getScalarValueSizeInBits() == Index.getScalarValueSizeInBits()) {
- unsigned ShiftAmt =
- Index.getOpcode() == ISD::ADD ? 1 : Index.getConstantOperandVal(1);
- uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
- uint64_t NewScaleAmt = ScaleAmt * (1ULL << ShiftAmt);
- if (isPowerOf2_64(NewScaleAmt) && NewScaleAmt <= 8) {
- SDValue NewIndex = Index.getOperand(0);
- SDValue NewScale =
- DAG.getTargetConstant(NewScaleAmt, SDLoc(N), Scale.getValueType());
- if (N->getOpcode() == X86ISD::MGATHER)
- return getAVX2GatherNode(N->getOpcode(), SDValue(N, 0), DAG,
- MemOp->getOperand(1), Mask,
- MemOp->getBasePtr(), NewIndex, NewScale,
- MemOp->getChain(), Subtarget);
- if (N->getOpcode() == X86ISD::MSCATTER)
- return getScatterNode(N->getOpcode(), SDValue(N, 0), DAG,
- MemOp->getOperand(1), Mask, MemOp->getBasePtr(),
- NewIndex, NewScale, MemOp->getChain(), Subtarget);
- }
- }
-
// With vector masks we only demand the upper bit of the mask.
if (Mask.getScalarValueSizeInBits() != 1) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -56137,24 +53445,31 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
- // UINT_TO_FP(vXi1~15) -> UINT_TO_FP(ZEXT(vXi1~15 to vXi16))
- // UINT_TO_FP(vXi17~31) -> UINT_TO_FP(ZEXT(vXi17~31 to vXi32))
- // UINT_TO_FP(vXi33~63) -> UINT_TO_FP(ZEXT(vXi33~63 to vXi64))
+ // Using i16 as an intermediate type is a bad idea, unless we have HW support
+ // for it. Therefore for type sizes equal or smaller than 32 just go with i32.
+ // if hasFP16 support:
+ // UINT_TO_FP(vXi1~15) -> SINT_TO_FP(ZEXT(vXi1~15 to vXi16))
+ // UINT_TO_FP(vXi17~31) -> SINT_TO_FP(ZEXT(vXi17~31 to vXi32))
+ // else
+ // UINT_TO_FP(vXi1~31) -> SINT_TO_FP(ZEXT(vXi1~31 to vXi32))
+ // UINT_TO_FP(vXi33~63) -> SINT_TO_FP(ZEXT(vXi33~63 to vXi64))
if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
unsigned ScalarSize = InVT.getScalarSizeInBits();
- if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+ if ((ScalarSize == 16 && Subtarget.hasFP16()) || ScalarSize == 32 ||
+ ScalarSize >= 64)
return SDValue();
SDLoc dl(N);
- EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
- ScalarSize < 16 ? MVT::i16
- : ScalarSize < 32 ? MVT::i32
- : MVT::i64,
- InVT.getVectorNumElements());
+ EVT DstVT =
+ EVT::getVectorVT(*DAG.getContext(),
+ (Subtarget.hasFP16() && ScalarSize < 16) ? MVT::i16
+ : ScalarSize < 32 ? MVT::i32
+ : MVT::i64,
+ InVT.getVectorNumElements());
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
if (IsStrict)
- return DAG.getNode(ISD::STRICT_UINT_TO_FP, dl, {VT, MVT::Other},
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
{N->getOperand(0), P});
- return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
}
// UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
@@ -56200,19 +53515,26 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
- // SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16))
- // SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
+ // Using i16 as an intermediate type is a bad idea, unless we have HW support
+ // for it. Therefore for type sizes equal or smaller than 32 just go with i32.
+ // if hasFP16 support:
+ // SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16))
+ // SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
+ // else
+ // SINT_TO_FP(vXi1~31) -> SINT_TO_FP(ZEXT(vXi1~31 to vXi32))
// SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64))
if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
unsigned ScalarSize = InVT.getScalarSizeInBits();
- if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+ if ((ScalarSize == 16 && Subtarget.hasFP16()) || ScalarSize == 32 ||
+ ScalarSize >= 64)
return SDValue();
SDLoc dl(N);
- EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
- ScalarSize < 16 ? MVT::i16
- : ScalarSize < 32 ? MVT::i32
- : MVT::i64,
- InVT.getVectorNumElements());
+ EVT DstVT =
+ EVT::getVectorVT(*DAG.getContext(),
+ (Subtarget.hasFP16() && ScalarSize < 16) ? MVT::i16
+ : ScalarSize < 32 ? MVT::i32
+ : MVT::i64,
+ InVT.getVectorNumElements());
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
if (IsStrict)
return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
@@ -56360,7 +53682,8 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
return true;
}
-static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// Only handle test patterns.
if (!isNullConstant(N->getOperand(1)))
return SDValue();
@@ -56372,6 +53695,7 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
SDLoc dl(N);
SDValue Op = N->getOperand(0);
EVT VT = Op.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we have a constant logical shift that's only used in a comparison
// against zero turn it into an equivalent AND. This allows turning it into
@@ -56395,12 +53719,41 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
}
}
+ // If we're extracting from a avx512 bool vector and comparing against zero,
+ // then try to just bitcast the vector to an integer to use TEST/BT directly.
+ // (and (extract_elt (kshiftr vXi1, C), 0), 1) -> (and (bc vXi1), 1<<C)
+ if (Op.getOpcode() == ISD::AND && isOneConstant(Op.getOperand(1)) &&
+ Op.hasOneUse() && onlyZeroFlagUsed(SDValue(N, 0))) {
+ SDValue Src = Op.getOperand(0);
+ if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isNullConstant(Src.getOperand(1)) &&
+ Src.getOperand(0).getValueType().getScalarType() == MVT::i1) {
+ SDValue BoolVec = Src.getOperand(0);
+ unsigned ShAmt = 0;
+ if (BoolVec.getOpcode() == X86ISD::KSHIFTR) {
+ ShAmt = BoolVec.getConstantOperandVal(1);
+ BoolVec = BoolVec.getOperand(0);
+ }
+ BoolVec = widenMaskVector(BoolVec, false, Subtarget, DAG, dl);
+ EVT VecVT = BoolVec.getValueType();
+ unsigned BitWidth = VecVT.getVectorNumElements();
+ EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
+ if (TLI.isTypeLegal(VecVT) && TLI.isTypeLegal(BCVT)) {
+ APInt Mask = APInt::getOneBitSet(BitWidth, ShAmt);
+ Op = DAG.getBitcast(BCVT, BoolVec);
+ Op = DAG.getNode(ISD::AND, dl, BCVT, Op,
+ DAG.getConstant(Mask, dl, BCVT));
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, dl, BCVT));
+ }
+ }
+ }
+
// Peek through any zero-extend if we're only testing for a zero result.
if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
- if (SrcVT.getScalarSizeInBits() >= 8 &&
- DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
+ if (SrcVT.getScalarSizeInBits() >= 8 && TLI.isTypeLegal(SrcVT))
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Src,
DAG.getConstant(0, dl, SrcVT));
}
@@ -57058,7 +54411,7 @@ static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG) {
SDValue SetCC = Op1.getOperand(0);
X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
X86::CondCode NewCC = X86::GetOppositeBranchCondition(CC);
- uint64_t NewImm = Op0C->getZExtValue() - 1;
+ APInt NewImm = Op0C->getAPIntValue() - 1;
SDLoc DL(Op1);
SDValue NewSetCC = getSETCC(NewCC, SetCC.getOperand(1), DL, DAG);
NewSetCC = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NewSetCC);
@@ -57172,6 +54525,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
SDValue Op0 = Ops[0];
bool IsSplat = llvm::all_equal(Ops);
+ unsigned NumOps = Ops.size();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ LLVMContext &Ctx = *DAG.getContext();
// Repeated subvectors.
if (IsSplat &&
@@ -57180,25 +54536,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (Op0.getOpcode() == X86ISD::VBROADCAST)
return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));
- // If this simple subvector or scalar/subvector broadcast_load is inserted
- // into both halves, use a larger broadcast_load. Update other uses to use
- // an extracted subvector.
- if (ISD::isNormalLoad(Op0.getNode()) ||
- Op0.getOpcode() == X86ISD::VBROADCAST_LOAD ||
- Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
- auto *Mem = cast<MemSDNode>(Op0);
- unsigned Opc = Op0.getOpcode() == X86ISD::VBROADCAST_LOAD
- ? X86ISD::VBROADCAST_LOAD
- : X86ISD::SUBV_BROADCAST_LOAD;
- if (SDValue BcastLd =
- getBROADCAST_LOAD(Opc, DL, VT, Mem->getMemoryVT(), Mem, 0, DAG)) {
- SDValue BcastSrc =
- extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits());
- DAG.ReplaceAllUsesOfValueWith(Op0, BcastSrc);
- return BcastLd;
- }
- }
-
// concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
(Subtarget.hasAVX2() ||
@@ -57219,10 +54556,16 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
// concat_vectors(extract_subvector(broadcast(x)),
// extract_subvector(broadcast(x))) -> broadcast(x)
+ // concat_vectors(extract_subvector(subv_broadcast(x)),
+ // extract_subvector(subv_broadcast(x))) -> subv_broadcast(x)
if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
Op0.getOperand(0).getValueType() == VT) {
- if (Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST ||
- Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD)
+ SDValue SrcVec = Op0.getOperand(0);
+ if (SrcVec.getOpcode() == X86ISD::VBROADCAST ||
+ SrcVec.getOpcode() == X86ISD::VBROADCAST_LOAD)
+ return Op0.getOperand(0);
+ if (SrcVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
+ Op0.getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT())
return Op0.getOperand(0);
}
}
@@ -57230,7 +54573,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
// concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128.
// Only concat of subvector high halves which vperm2x128 is best at.
// TODO: This should go in combineX86ShufflesRecursively eventually.
- if (VT.is256BitVector() && Ops.size() == 2) {
+ if (VT.is256BitVector() && NumOps == 2) {
SDValue Src0 = peekThroughBitcasts(Ops[0]);
SDValue Src1 = peekThroughBitcasts(Ops[1]);
if (Src0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
@@ -57263,18 +54606,21 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
};
auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {
+ bool AllConstants = true;
+ bool AllSubVectors = true;
for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
SDValue Sub = SubOps[I].getOperand(Op);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (Sub.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- Sub.getOperand(0).getValueType() != VT ||
- Sub.getConstantOperandAPInt(1) != (I * NumSubElts))
- return false;
- }
- return true;
+ SDValue BC = peekThroughBitcasts(Sub);
+ AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
+ AllSubVectors &= Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Sub.getOperand(0).getValueType() == VT &&
+ Sub.getConstantOperandAPInt(1) == (I * NumSubElts);
+ }
+ return AllConstants || AllSubVectors;
};
- unsigned NumOps = Ops.size();
switch (Op0.getOpcode()) {
case X86ISD::VBROADCAST: {
if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) {
@@ -57316,7 +54662,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
case X86ISD::UNPCKH:
case X86ISD::UNPCKL: {
// Don't concatenate build_vector patterns.
- if (!IsSplat && VT.getScalarSizeInBits() >= 32 &&
+ if (!IsSplat && EltSizeInBits >= 32 &&
((VT.is256BitVector() && Subtarget.hasInt256()) ||
(VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
none_of(Ops, [](SDValue Op) {
@@ -57341,7 +54687,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
[[fallthrough]];
case X86ISD::VPERMILPI:
- if (!IsSplat && VT.getScalarSizeInBits() == 32 &&
+ if (!IsSplat && EltSizeInBits == 32 &&
(VT.is256BitVector() ||
(VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
all_of(Ops, [&Op0](SDValue Op) {
@@ -57433,6 +54779,44 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
}
break;
+ case X86ISD::VPERM2X128: {
+ if (!IsSplat && VT.is512BitVector() && Subtarget.useAVX512Regs()) {
+ assert(NumOps == 2 && "Bad concat_vectors operands");
+ unsigned Imm0 = Ops[0].getConstantOperandVal(2);
+ unsigned Imm1 = Ops[1].getConstantOperandVal(2);
+ // TODO: Handle zero'd subvectors.
+ if ((Imm0 & 0x88) == 0 && (Imm1 & 0x88) == 0) {
+ int Mask[4] = {(int)(Imm0 & 0x03), (int)((Imm0 >> 4) & 0x3), (int)(Imm1 & 0x03),
+ (int)((Imm1 >> 4) & 0x3)};
+ MVT ShuffleVT = VT.isFloatingPoint() ? MVT::v8f64 : MVT::v8i64;
+ SDValue LHS = concatSubVectors(Ops[0].getOperand(0),
+ Ops[0].getOperand(1), DAG, DL);
+ SDValue RHS = concatSubVectors(Ops[1].getOperand(0),
+ Ops[1].getOperand(1), DAG, DL);
+ SDValue Res = DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT,
+ DAG.getBitcast(ShuffleVT, LHS),
+ DAG.getBitcast(ShuffleVT, RHS),
+ getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+ break;
+ }
+ case X86ISD::SHUF128: {
+ if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {
+ unsigned Imm0 = Ops[0].getConstantOperandVal(2);
+ unsigned Imm1 = Ops[1].getConstantOperandVal(2);
+ unsigned Imm = ((Imm0 & 1) << 0) | ((Imm0 & 2) << 1) | 0x08 |
+ ((Imm1 & 1) << 4) | ((Imm1 & 2) << 5) | 0x80;
+ SDValue LHS = concatSubVectors(Ops[0].getOperand(0),
+ Ops[0].getOperand(1), DAG, DL);
+ SDValue RHS = concatSubVectors(Ops[1].getOperand(0),
+ Ops[1].getOperand(1), DAG, DL);
+ return DAG.getNode(X86ISD::SHUF128, DL, VT, LHS, RHS,
+ DAG.getTargetConstant(Imm, DL, MVT::i8));
+ }
+ break;
+ }
case ISD::TRUNCATE:
if (!IsSplat && NumOps == 2 && VT.is256BitVector()) {
EVT SrcVT = Ops[0].getOperand(0).getValueType();
@@ -57441,7 +54825,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
Subtarget.useAVX512Regs() &&
Subtarget.getPreferVectorWidth() >= 512 &&
(SrcVT.getScalarSizeInBits() > 16 || Subtarget.useBWIRegs())) {
- EVT NewSrcVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
+ EVT NewSrcVT = SrcVT.getDoubleNumVectorElementsVT(Ctx);
return DAG.getNode(ISD::TRUNCATE, DL, VT,
ConcatSubOperand(NewSrcVT, Ops, 0));
}
@@ -57498,12 +54882,18 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
case X86ISD::ANDNP:
if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
- MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
- SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
- NumOps * SrcVT.getVectorNumElements());
return DAG.getNode(Op0.getOpcode(), DL, VT,
- ConcatSubOperand(SrcVT, Ops, 0),
- ConcatSubOperand(SrcVT, Ops, 1));
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 1));
+ }
+ break;
+ case X86ISD::PCMPEQ:
+ case X86ISD::PCMPGT:
+ if (!IsSplat && VT.is256BitVector() && Subtarget.hasInt256() &&
+ (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) {
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 1));
}
break;
case ISD::CTPOP:
@@ -57535,12 +54925,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
(VT.is512BitVector() && Subtarget.useAVX512Regs() &&
(EltSizeInBits >= 32 || Subtarget.useBWIRegs())))) {
- MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
- SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
- NumOps * SrcVT.getVectorNumElements());
return DAG.getNode(Op0.getOpcode(), DL, VT,
- ConcatSubOperand(SrcVT, Ops, 0),
- ConcatSubOperand(SrcVT, Ops, 1));
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 1));
}
break;
// Due to VADD, VSUB, VMUL can executed on more ports than VINSERT and
@@ -57548,22 +54935,26 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
case ISD::FDIV:
if (!IsSplat && (VT.is256BitVector() ||
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
- MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
- SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
- NumOps * SrcVT.getVectorNumElements());
return DAG.getNode(Op0.getOpcode(), DL, VT,
- ConcatSubOperand(SrcVT, Ops, 0),
- ConcatSubOperand(SrcVT, Ops, 1));
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 1));
}
break;
case X86ISD::HADD:
case X86ISD::HSUB:
case X86ISD::FHADD:
case X86ISD::FHSUB:
- case X86ISD::PACKSS:
- case X86ISD::PACKUS:
if (!IsSplat && VT.is256BitVector() &&
(VT.isFloatingPoint() || Subtarget.hasInt256())) {
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 1));
+ }
+ break;
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS:
+ if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is512BitVector() && Subtarget.useBWIRegs()))) {
MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
NumOps * SrcVT.getVectorNumElements());
@@ -57591,9 +54982,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
(EltSizeInBits >= 32 || Subtarget.hasBWI())) {
EVT SelVT = Ops[0].getOperand(0).getValueType();
if (SelVT.getVectorElementType() == MVT::i1) {
- SelVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Ops.size() * SelVT.getVectorNumElements());
- if (DAG.getTargetLoweringInfo().isTypeLegal(SelVT))
+ SelVT = EVT::getVectorVT(Ctx, MVT::i1,
+ NumOps * SelVT.getVectorNumElements());
+ if (TLI.isTypeLegal(SelVT))
return DAG.getNode(Op0.getOpcode(), DL, VT,
ConcatSubOperand(SelVT.getSimpleVT(), Ops, 0),
ConcatSubOperand(VT, Ops, 1),
@@ -57602,12 +54993,12 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
[[fallthrough]];
case X86ISD::BLENDV:
- if (!IsSplat && VT.is256BitVector() && Ops.size() == 2 &&
+ if (!IsSplat && VT.is256BitVector() && NumOps == 2 &&
(EltSizeInBits >= 32 || Subtarget.hasInt256()) &&
IsConcatFree(VT, Ops, 1) && IsConcatFree(VT, Ops, 2)) {
EVT SelVT = Ops[0].getOperand(0).getValueType();
- SelVT = SelVT.getDoubleNumVectorElementsVT(*DAG.getContext());
- if (DAG.getTargetLoweringInfo().isTypeLegal(SelVT))
+ SelVT = SelVT.getDoubleNumVectorElementsVT(Ctx);
+ if (TLI.isTypeLegal(SelVT))
return DAG.getNode(Op0.getOpcode(), DL, VT,
ConcatSubOperand(SelVT.getSimpleVT(), Ops, 0),
ConcatSubOperand(VT, Ops, 1),
@@ -57622,7 +55013,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
unsigned Fast;
const X86TargetLowering *TLI = Subtarget.getTargetLowering();
- if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ if (TLI->allowsMemoryAccess(Ctx, DAG.getDataLayout(), VT,
*FirstLd->getMemOperand(), &Fast) &&
Fast) {
if (SDValue Ld =
@@ -57635,17 +55026,59 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (all_of(Ops, [](SDValue Op) { return getTargetConstantFromNode(Op); })) {
SmallVector<APInt> EltBits;
APInt UndefElts = APInt::getZero(VT.getVectorNumElements());
- for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
+ for (unsigned I = 0; I != NumOps; ++I) {
APInt OpUndefElts;
SmallVector<APInt> OpEltBits;
if (!getTargetConstantBitsFromNode(Ops[I], EltSizeInBits, OpUndefElts,
- OpEltBits, true, false))
- break;
+ OpEltBits, true, false))
+ break;
EltBits.append(OpEltBits);
UndefElts.insertBits(OpUndefElts, I * OpUndefElts.getBitWidth());
}
- if (EltBits.size() == VT.getVectorNumElements())
- return getConstVector(EltBits, UndefElts, VT, DAG, DL);
+ if (EltBits.size() == VT.getVectorNumElements()) {
+ Constant *C = getConstantVector(VT, EltBits, UndefElts, Ctx);
+ MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue CV = DAG.getConstantPool(C, PVT);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF);
+ SDValue Ld = DAG.getLoad(VT, DL, DAG.getEntryNode(), CV, MPI);
+ SDValue Sub = extractSubVector(Ld, 0, DAG, DL, Op0.getValueSizeInBits());
+ DAG.ReplaceAllUsesOfValueWith(Op0, Sub);
+ return Ld;
+ }
+ }
+
+ // If this simple subvector or scalar/subvector broadcast_load is inserted
+ // into both halves, use a larger broadcast_load. Update other uses to use
+ // an extracted subvector.
+ if (IsSplat &&
+ (VT.is256BitVector() || (VT.is512BitVector() && Subtarget.hasAVX512()))) {
+ if (ISD::isNormalLoad(Op0.getNode()) ||
+ Op0.getOpcode() == X86ISD::VBROADCAST_LOAD ||
+ Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
+ auto *Mem = cast<MemSDNode>(Op0);
+ unsigned Opc = Op0.getOpcode() == X86ISD::VBROADCAST_LOAD
+ ? X86ISD::VBROADCAST_LOAD
+ : X86ISD::SUBV_BROADCAST_LOAD;
+ if (SDValue BcastLd =
+ getBROADCAST_LOAD(Opc, DL, VT, Mem->getMemoryVT(), Mem, 0, DAG)) {
+ SDValue BcastSrc =
+ extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits());
+ DAG.ReplaceAllUsesOfValueWith(Op0, BcastSrc);
+ return BcastLd;
+ }
+ }
+ }
+
+ // If we're splatting a 128-bit subvector to 512-bits, use SHUF128 directly.
+ if (IsSplat && NumOps == 4 && VT.is512BitVector() &&
+ Subtarget.useAVX512Regs()) {
+ MVT ShuffleVT = VT.isFloatingPoint() ? MVT::v8f64 : MVT::v8i64;
+ SDValue Res = widenSubVector(Op0, false, Subtarget, DAG, DL, 512);
+ Res = DAG.getBitcast(ShuffleVT, Res);
+ Res = DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT, Res, Res,
+ getV4X86ShuffleImm8ForMask({0, 0, 0, 0}, DL, DAG));
+ return DAG.getBitcast(VT, Res);
}
return SDValue();
@@ -57795,6 +55228,15 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
getZeroVector(OpVT, Subtarget, DAG, dl),
SubVectorOps[0], DAG.getIntPtrConstant(0, dl));
+
+ // Attempt to recursively combine to a shuffle.
+ if (all_of(SubVectorOps, [](SDValue SubOp) {
+ return isTargetShuffle(SubOp.getOpcode());
+ })) {
+ SDValue Op(N, 0);
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
+ return Res;
+ }
}
// If this is a broadcast insert into an upper undef, use a larger broadcast.
@@ -58096,11 +55538,10 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
// Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
- Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
- if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
- if (C->isZero())
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
- Src.getOperand(1));
+ Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
+ isNullConstant(Src.getOperand(1)))
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
+ Src.getOperand(1));
// Reduce v2i64 to v4i32 if we don't need the upper bits or are known zero.
// TODO: Move to DAGCombine/SimplifyDemandedBits?
@@ -58597,7 +56038,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
- case X86ISD::CMP: return combineCMP(N, DAG);
+ case X86ISD::CMP: return combineCMP(N, DAG, Subtarget);
case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
case X86ISD::ADD:
@@ -58735,8 +56176,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget);
case X86ISD::TESTP: return combineTESTP(N, DAG, DCI, Subtarget);
case X86ISD::MGATHER:
- case X86ISD::MSCATTER:
- return combineX86GatherScatter(N, DAG, DCI, Subtarget);
+ case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI);
case ISD::MGATHER:
case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI);
case X86ISD::PCMPEQ:
@@ -58765,6 +56205,12 @@ bool X86TargetLowering::preferABDSToABSWithNSW(EVT VT) const {
return false;
}
+// Prefer (non-AVX512) vector TRUNCATE(SIGN_EXTEND_INREG(X)) to use of PACKSS.
+bool X86TargetLowering::preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
+ EVT ExtVT) const {
+ return Subtarget.hasAVX512() || !VT.isVector();
+}
+
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (!isTypeLegal(VT))
return false;
@@ -58811,8 +56257,9 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
return true;
}
-SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
+SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc &dl,
SDValue Value, SDValue Addr,
+ int JTI,
SelectionDAG &DAG) const {
const Module *M = DAG.getMachineFunction().getMMI().getModule();
Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
@@ -58821,10 +56268,11 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
// notrack prefix to the indirect branch.
// In order to do that we create NT_BRIND SDNode.
// Upon ISEL, the pattern will convert it to jmp with NoTrack prefix.
- return DAG.getNode(X86ISD::NT_BRIND, dl, MVT::Other, Value, Addr);
+ SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
+ return DAG.getNode(X86ISD::NT_BRIND, dl, MVT::Other, JTInfo, Addr);
}
- return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
+ return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
}
TargetLowering::AndOrSETCCFoldKind
@@ -58940,7 +56388,7 @@ static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace.
for (StringRef Piece : Pieces) {
- if (!S.startswith(Piece)) // Check if the piece matches.
+ if (!S.starts_with(Piece)) // Check if the piece matches.
return false;
S = S.substr(Piece.size());
@@ -59148,19 +56596,19 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
- X86TargetLowering::getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const {
- ConstraintWeight weight = CW_Invalid;
- Value *CallOperandVal = info.CallOperandVal;
- // If we don't have a value, we can't do a match,
- // but allow it at the lowest weight.
+X86TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &Info, const char *Constraint) const {
+ ConstraintWeight Wt = CW_Invalid;
+ Value *CallOperandVal = Info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;
- Type *type = CallOperandVal->getType();
+ Type *Ty = CallOperandVal->getType();
// Look at the constraint type.
- switch (*constraint) {
+ switch (*Constraint) {
default:
- weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ Wt = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
[[fallthrough]];
case 'R':
case 'q':
@@ -59173,121 +56621,112 @@ TargetLowering::ConstraintWeight
case 'D':
case 'A':
if (CallOperandVal->getType()->isIntegerTy())
- weight = CW_SpecificReg;
+ Wt = CW_SpecificReg;
break;
case 'f':
case 't':
case 'u':
- if (type->isFloatingPointTy())
- weight = CW_SpecificReg;
+ if (Ty->isFloatingPointTy())
+ Wt = CW_SpecificReg;
break;
case 'y':
- if (type->isX86_MMXTy() && Subtarget.hasMMX())
- weight = CW_SpecificReg;
+ if (Ty->isX86_MMXTy() && Subtarget.hasMMX())
+ Wt = CW_SpecificReg;
break;
case 'Y':
- if (StringRef(constraint).size() != 2)
+ if (StringRef(Constraint).size() != 2)
break;
- switch (constraint[1]) {
- default:
- return CW_Invalid;
- // XMM0
- case 'z':
- if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
- ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) ||
- ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()))
- return CW_SpecificReg;
- return CW_Invalid;
- // Conditional OpMask regs (AVX512)
- case 'k':
- if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
- return CW_Register;
- return CW_Invalid;
- // Any MMX reg
- case 'm':
- if (type->isX86_MMXTy() && Subtarget.hasMMX())
- return weight;
+ switch (Constraint[1]) {
+ default:
+ return CW_Invalid;
+ // XMM0
+ case 'z':
+ if (((Ty->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
+ ((Ty->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) ||
+ ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()))
+ return CW_SpecificReg;
+ return CW_Invalid;
+ // Conditional OpMask regs (AVX512)
+ case 'k':
+ if ((Ty->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
+ return CW_Register;
+ return CW_Invalid;
+ // Any MMX reg
+ case 'm':
+ if (Ty->isX86_MMXTy() && Subtarget.hasMMX())
+ return Wt;
+ return CW_Invalid;
+ // Any SSE reg when ISA >= SSE2, same as 'x'
+ case 'i':
+ case 't':
+ case '2':
+ if (!Subtarget.hasSSE2())
return CW_Invalid;
- // Any SSE reg when ISA >= SSE2, same as 'x'
- case 'i':
- case 't':
- case '2':
- if (!Subtarget.hasSSE2())
- return CW_Invalid;
- break;
+ break;
}
break;
case 'v':
- if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
- weight = CW_Register;
+ if ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
+ Wt = CW_Register;
[[fallthrough]];
case 'x':
- if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
- ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()))
- weight = CW_Register;
+ if (((Ty->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
+ ((Ty->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()))
+ Wt = CW_Register;
break;
case 'k':
// Enable conditional vector operations using %k<#> registers.
- if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
- weight = CW_Register;
+ if ((Ty->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
+ Wt = CW_Register;
break;
case 'I':
- if (auto *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(Info.CallOperandVal))
if (C->getZExtValue() <= 31)
- weight = CW_Constant;
- }
+ Wt = CW_Constant;
break;
case 'J':
- if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (C->getZExtValue() <= 63)
- weight = CW_Constant;
- }
+ Wt = CW_Constant;
break;
case 'K':
- if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
- weight = CW_Constant;
- }
+ Wt = CW_Constant;
break;
case 'L':
- if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
- weight = CW_Constant;
- }
+ Wt = CW_Constant;
break;
case 'M':
- if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (C->getZExtValue() <= 3)
- weight = CW_Constant;
- }
+ Wt = CW_Constant;
break;
case 'N':
- if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (C->getZExtValue() <= 0xff)
- weight = CW_Constant;
- }
+ Wt = CW_Constant;
break;
case 'G':
case 'C':
- if (isa<ConstantFP>(CallOperandVal)) {
- weight = CW_Constant;
- }
+ if (isa<ConstantFP>(CallOperandVal))
+ Wt = CW_Constant;
break;
case 'e':
- if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if ((C->getSExtValue() >= -0x80000000LL) &&
(C->getSExtValue() <= 0x7fffffffLL))
- weight = CW_Constant;
- }
+ Wt = CW_Constant;
break;
case 'Z':
- if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (C->getZExtValue() <= 0xffffffff)
- weight = CW_Constant;
- }
+ Wt = CW_Constant;
break;
}
- return weight;
+ return Wt;
}
/// Try to replace an X constraint, which matches anything, with another that
@@ -59334,13 +56773,14 @@ SDValue X86TargetLowering::LowerAsmOutputForConstraint(
/// Lower the specified operand into the Ops vector.
/// If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue>&Ops,
+ StringRef Constraint,
+ std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Only support length 1 constraints for now.
- if (Constraint.length() > 1) return;
+ if (Constraint.size() > 1)
+ return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
@@ -59549,13 +56989,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget.is64Bit()) {
if (VT == MVT::i8 || VT == MVT::i1)
- return std::make_pair(0U, &X86::GR8RegClass);
+ return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
if (VT == MVT::i16)
- return std::make_pair(0U, &X86::GR16RegClass);
+ return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
if (VT == MVT::i32 || VT == MVT::f32)
- return std::make_pair(0U, &X86::GR32RegClass);
+ return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
if (VT != MVT::f80 && !VT.isVector())
- return std::make_pair(0U, &X86::GR64RegClass);
+ return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
break;
}
[[fallthrough]];
@@ -59574,14 +57014,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'r': // GENERAL_REGS
case 'l': // INDEX_REGS
if (VT == MVT::i8 || VT == MVT::i1)
- return std::make_pair(0U, &X86::GR8RegClass);
+ return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
if (VT == MVT::i16)
- return std::make_pair(0U, &X86::GR16RegClass);
+ return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
if (VT == MVT::i32 || VT == MVT::f32 ||
(!VT.isVector() && !Subtarget.is64Bit()))
- return std::make_pair(0U, &X86::GR32RegClass);
+ return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
if (VT != MVT::f80 && !VT.isVector())
- return std::make_pair(0U, &X86::GR64RegClass);
+ return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
break;
case 'R': // LEGACY_REGS
if (VT == MVT::i8 || VT == MVT::i1)
@@ -59640,7 +57080,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v8f16:
if (!Subtarget.hasFP16())
break;
- [[fallthrough]];
+ if (VConstraint)
+ return std::make_pair(0U, &X86::VR128XRegClass);
+ return std::make_pair(0U, &X86::VR128RegClass);
+ case MVT::v8bf16:
+ if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
+ break;
+ if (VConstraint)
+ return std::make_pair(0U, &X86::VR128XRegClass);
+ return std::make_pair(0U, &X86::VR128RegClass);
case MVT::f128:
case MVT::v16i8:
case MVT::v8i16:
@@ -59655,7 +57103,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v16f16:
if (!Subtarget.hasFP16())
break;
- [[fallthrough]];
+ if (VConstraint)
+ return std::make_pair(0U, &X86::VR256XRegClass);
+ return std::make_pair(0U, &X86::VR256RegClass);
+ case MVT::v16bf16:
+ if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
+ break;
+ if (VConstraint)
+ return std::make_pair(0U, &X86::VR256XRegClass);
+ return std::make_pair(0U, &X86::VR256RegClass);
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
@@ -59670,7 +57126,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v32f16:
if (!Subtarget.hasFP16())
break;
- [[fallthrough]];
+ if (VConstraint)
+ return std::make_pair(0U, &X86::VR512RegClass);
+ return std::make_pair(0U, &X86::VR512_0_15RegClass);
+ case MVT::v32bf16:
+ if (!Subtarget.hasBF16())
+ break;
+ if (VConstraint)
+ return std::make_pair(0U, &X86::VR512RegClass);
+ return std::make_pair(0U, &X86::VR512_0_15RegClass);
case MVT::v64i8:
case MVT::v32i16:
case MVT::v8f64:
@@ -59713,7 +57177,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v8f16:
if (!Subtarget.hasFP16())
break;
- [[fallthrough]];
+ return std::make_pair(X86::XMM0, &X86::VR128RegClass);
+ case MVT::v8bf16:
+ if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
+ break;
+ return std::make_pair(X86::XMM0, &X86::VR128RegClass);
case MVT::f128:
case MVT::v16i8:
case MVT::v8i16:
@@ -59726,7 +57194,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v16f16:
if (!Subtarget.hasFP16())
break;
- [[fallthrough]];
+ return std::make_pair(X86::YMM0, &X86::VR256RegClass);
+ case MVT::v16bf16:
+ if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
+ break;
+ return std::make_pair(X86::YMM0, &X86::VR256RegClass);
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
@@ -59739,7 +57211,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v32f16:
if (!Subtarget.hasFP16())
break;
- [[fallthrough]];
+ return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
+ case MVT::v32bf16:
+ if (!Subtarget.hasBF16())
+ break;
+ return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
case MVT::v64i8:
case MVT::v32i16:
case MVT::v8f64:
@@ -59814,7 +57290,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(X86::DF, &X86::DFCCRRegClass);
// fpsr -> FPSW
- if (StringRef("{fpsr}").equals_insensitive(Constraint))
+ // Only allow for clobber.
+ if (StringRef("{fpsr}").equals_insensitive(Constraint) && VT == MVT::Other)
return std::make_pair(X86::FPSW, &X86::FPCCRRegClass);
return Res;
@@ -60051,7 +57528,7 @@ X86TargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
assert(Target.isSymbol() && "Unexpected target operand for a direct call");
// X86TargetLowering::EmitLoweredIndirectThunk always uses r11 for
// 64-bit indirect thunk calls.
- assert(StringRef(Target.getSymbolName()).endswith("_r11") &&
+ assert(StringRef(Target.getSymbolName()).ends_with("_r11") &&
"Unexpected register for an indirect thunk call");
TargetReg = X86::R11;
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
index 047d8f021047..9bd1622cb0d3 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1054,6 +1054,12 @@ namespace llvm {
bool preferABDSToABSWithNSW(EVT VT) const override;
+ bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
+ EVT ExtVT) const override;
+
+ bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
+ EVT VT) const override;
+
/// Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
@@ -1132,6 +1138,11 @@ namespace llvm {
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const override;
+ unsigned preferedOpcodeForCmpEqPiecesOfOperand(
+ EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
+ const APInt &ShiftOrRotateAmt,
+ const std::optional<APInt> &AndMask) const override;
+
bool preferScalarizeSplat(SDNode *N) const override;
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
@@ -1260,23 +1271,22 @@ namespace llvm {
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
ConstraintWeight
- getSingleConstraintMatchWeight(AsmOperandInfo &info,
- const char *constraint) const override;
+ getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+ const char *Constraint) const override;
const char *LowerXConstraint(EVT ConstraintVT) const override;
/// Lower the specified operand into the Ops vector. If it is invalid, don't
/// add anything to Ops. If hasMemory is true it means one of the asm
/// constraint of the inline asm instruction being processed is 'm'.
- void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned
+ InlineAsm::ConstraintCode
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "v")
- return InlineAsm::Constraint_v;
+ return InlineAsm::ConstraintCode::v;
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
@@ -1556,9 +1566,8 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
- SDValue Addr, SelectionDAG &DAG)
- const override;
+ SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
+ int JTI, SelectionDAG &DAG) const override;
Align getPrefLoopAlignment(MachineLoop *ML) const override;
@@ -1631,8 +1640,8 @@ namespace llvm {
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
- const unsigned char OpFlags = 0) const;
+ unsigned getGlobalWrapperKind(const GlobalValue *GV,
+ const unsigned char OpFlags) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -1744,9 +1753,6 @@ namespace llvm {
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
- bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
- bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
-
bool needsCmpXchgNb(Type *MemType) const;
void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
@@ -1803,6 +1809,9 @@ namespace llvm {
const SDLoc &dl, SelectionDAG &DAG,
SDValue &X86CC) const;
+ bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
+ SDValue IntPow2) const override;
+
/// Check if replacement of SQRT with RSQRT should be disabled.
bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
@@ -1820,6 +1829,9 @@ namespace llvm {
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
+
+ SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
+ SDValue V2) const;
};
namespace X86 {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
new file mode 100644
index 000000000000..b8b5421b9005
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -0,0 +1,2950 @@
+//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements the lowering of LLVM calls to DAG nodes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86CallingConv.h"
+#include "X86FrameLowering.h"
+#include "X86ISelLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "X86TargetObjectFile.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IRBuilder.h"
+
+#define DEBUG_TYPE "x86-isel"
+
+using namespace llvm;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+/// Call this when the user attempts to do something unsupported, like
+/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
+/// report_fatal_error, so calling code should attempt to recover without
+/// crashing.
+static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
+ const char *Msg) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ DAG.getContext()->diagnose(
+ DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
+}
+
+/// Returns true if a CC can dynamically exclude a register from the list of
+/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
+/// the return registers.
+static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
+ switch (CC) {
+ default:
+ return false;
+ case CallingConv::X86_RegCall:
+ case CallingConv::PreserveMost:
+ case CallingConv::PreserveAll:
+ return true;
+ }
+}
+
+/// Returns true if a CC can dynamically exclude a register from the list of
+/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
+/// the parameters.
+static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
+ return CC == CallingConv::X86_RegCall;
+}
+
+static std::pair<MVT, unsigned>
+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
+ const X86Subtarget &Subtarget) {
+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
+ // convention is one that uses k registers.
+ if (NumElts == 2)
+ return {MVT::v2i64, 1};
+ if (NumElts == 4)
+ return {MVT::v4i32, 1};
+ if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
+ CC != CallingConv::Intel_OCL_BI)
+ return {MVT::v8i16, 1};
+ if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
+ CC != CallingConv::Intel_OCL_BI)
+ return {MVT::v16i8, 1};
+ // v32i1 passes in ymm unless we have BWI and the calling convention is
+ // regcall.
+ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
+ return {MVT::v32i8, 1};
+ // Split v64i1 vectors if we don't have v64i8 available.
+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
+ if (Subtarget.useAVX512Regs())
+ return {MVT::v64i8, 1};
+ return {MVT::v32i8, 2};
+ }
+
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
+ NumElts > 64)
+ return {MVT::i8, NumElts};
+
+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
+}
+
+MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ if (VT.isVector()) {
+ if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return RegisterVT;
+ }
+
+ if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
+ return MVT::v8f16;
+ }
+
+ // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
+ if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
+ !Subtarget.hasX87())
+ return MVT::i32;
+
+ if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
+ return getRegisterTypeForCallingConv(Context, CC,
+ VT.changeVectorElementType(MVT::f16));
+
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+}
+
+unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ if (VT.isVector()) {
+ if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return NumRegisters;
+ }
+
+ if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
+ return 1;
+ }
+
+ // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
+ // x87 is disabled.
+ if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
+ if (VT == MVT::f64)
+ return 2;
+ if (VT == MVT::f80)
+ return 3;
+ }
+
+ if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
+ return getNumRegistersForCallingConv(Context, CC,
+ VT.changeVectorElementType(MVT::f16));
+
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+}
+
+unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+ Subtarget.hasAVX512() &&
+ (!isPowerOf2_32(VT.getVectorNumElements()) ||
+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
+ VT.getVectorNumElements() > 64)) {
+ RegisterVT = MVT::i8;
+ IntermediateVT = MVT::i1;
+ NumIntermediates = VT.getVectorNumElements();
+ return NumIntermediates;
+ }
+
+ // Split v64i1 vectors if we don't have v64i8 available.
+ if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+ CC != CallingConv::X86_RegCall) {
+ RegisterVT = MVT::v32i8;
+ IntermediateVT = MVT::v32i1;
+ NumIntermediates = 2;
+ return 2;
+ }
+
+ // Split vNbf16 vectors according to vNf16.
+ if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
+ VT = VT.changeVectorElementType(MVT::f16);
+
+ return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+}
+
+EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
+ LLVMContext& Context,
+ EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i8;
+
+ if (Subtarget.hasAVX512()) {
+ // Figure out what this type will be legalized to.
+ EVT LegalVT = VT;
+ while (getTypeAction(Context, LegalVT) != TypeLegal)
+ LegalVT = getTypeToTransformTo(Context, LegalVT);
+
+ // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
+ if (LegalVT.getSimpleVT().is512BitVector())
+ return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
+
+ if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
+ // If we legalized to less than a 512-bit vector, then we will use a vXi1
+ // compare for vXi32/vXi64 for sure. If we have BWI we will also support
+ // vXi16/vXi8.
+ MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
+ if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
+ return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
+ }
+ }
+
+ return VT.changeVectorElementTypeToInteger();
+}
+
+/// Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
+ if (MaxAlign == 16)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
+ MaxAlign = Align(16);
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Align EltAlign;
+ getMaxByValAlign(ATy->getElementType(), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (auto *EltTy : STy->elements()) {
+ Align EltAlign;
+ getMaxByValAlign(EltTy, EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == 16)
+ break;
+ }
+ }
+}
+
+/// Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
+uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
+ if (Subtarget.is64Bit()) {
+ // Max of 8 and alignment of type.
+ Align TyAlign = DL.getABITypeAlign(Ty);
+ if (TyAlign > 8)
+ return TyAlign.value();
+ return 8;
+ }
+
+ Align Alignment(4);
+ if (Subtarget.hasSSE1())
+ getMaxByValAlign(Ty, Alignment);
+ return Alignment.value();
+}
+
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+/// For vector ops we check that the overall size isn't larger than our
+/// preferred vector width.
+EVT X86TargetLowering::getOptimalMemOpType(
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
+ if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
+ if (Op.size() >= 16 &&
+ (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
+ // FIXME: Check if unaligned 64-byte accesses are slow.
+ if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
+ (Subtarget.getPreferVectorWidth() >= 512)) {
+ return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
+ }
+ // FIXME: Check if unaligned 32-byte accesses are slow.
+ if (Op.size() >= 32 && Subtarget.hasAVX() &&
+ Subtarget.useLight256BitInstructions()) {
+ // Although this isn't a well-supported type for AVX1, we'll let
+ // legalization and shuffle lowering produce the optimal codegen. If we
+ // choose an optimal type with a vector element larger than a byte,
+ // getMemsetStores() may create an intermediate splat (using an integer
+ // multiply) before we splat as a vector.
+ return MVT::v32i8;
+ }
+ if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
+ return MVT::v16i8;
+ // TODO: Can SSE1 handle a byte vector?
+ // If we have SSE1 registers we should be able to use them.
+ if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
+ (Subtarget.getPreferVectorWidth() >= 128))
+ return MVT::v4f32;
+ } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
+ Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
+ // Do not use f64 to lower memcpy if source is string constant. It's
+ // better to use i32 to avoid the loads.
+ // Also, do not use f64 to lower memset unless this is a memset of zeros.
+ // The gymnastics of splatting a byte value into an XMM register and then
+ // only using 8-byte stores (because this is a CPU with slow unaligned
+ // 16-byte accesses) makes that a loser.
+ return MVT::f64;
+ }
+ }
+ // This is a compromise. If we reach here, unaligned accesses may be slow on
+ // this target. However, creating smaller, aligned accesses could be even
+ // slower and would certainly be a lot more code.
+ if (Subtarget.is64Bit() && Op.size() >= 8)
+ return MVT::i64;
+ return MVT::i32;
+}
+
+bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
+ if (VT == MVT::f32)
+ return Subtarget.hasSSE1();
+ if (VT == MVT::f64)
+ return Subtarget.hasSSE2();
+ return true;
+}
+
+static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
+ return (8 * Alignment.value()) % SizeInBits == 0;
+}
+
+bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
+ if (isBitAligned(Alignment, VT.getSizeInBits()))
+ return true;
+ switch (VT.getSizeInBits()) {
+ default:
+ // 8-byte and under are always assumed to be fast.
+ return true;
+ case 128:
+ return !Subtarget.isUnalignedMem16Slow();
+ case 256:
+ return !Subtarget.isUnalignedMem32Slow();
+ // TODO: What about AVX-512 (512-bit) accesses?
+ }
+}
+
+bool X86TargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
+ unsigned *Fast) const {
+ if (Fast)
+ *Fast = isMemoryAccessFast(VT, Alignment);
+ // NonTemporal vector memory ops must be aligned.
+ if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
+ // NT loads can only be vector aligned, so if its less aligned than the
+ // minimum vector size (which we can split the vector down to), we might as
+ // well use a regular unaligned vector load.
+ // We don't have any NT loads pre-SSE41.
+ if (!!(Flags & MachineMemOperand::MOLoad))
+ return (Alignment < 16 || !Subtarget.hasSSE41());
+ return false;
+ }
+ // Misaligned accesses of any size are always allowed.
+ return true;
+}
+
+bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ unsigned AddrSpace, Align Alignment,
+ MachineMemOperand::Flags Flags,
+ unsigned *Fast) const {
+ if (Fast)
+ *Fast = isMemoryAccessFast(VT, Alignment);
+ if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
+ if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
+ /*Fast=*/nullptr))
+ return true;
+ // NonTemporal vector memory ops are special, and must be aligned.
+ if (!isBitAligned(Alignment, VT.getSizeInBits()))
+ return false;
+ switch (VT.getSizeInBits()) {
+ case 128:
+ if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
+ return true;
+ if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
+ return true;
+ return false;
+ case 256:
+ if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
+ return true;
+ if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
+ return true;
+ return false;
+ case 512:
+ if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
+ return true;
+ return false;
+ default:
+ return false; // Don't have NonTemporal vector memory ops of this size.
+ }
+ }
+ return true;
+}
+
+/// Return the entry encoding for a jump table in the
+/// current function. The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned X86TargetLowering::getJumpTableEncoding() const {
+ // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
+ // symbol.
+ if (isPositionIndependent() && Subtarget.isPICStyleGOT())
+ return MachineJumpTableInfo::EK_Custom32;
+ if (isPositionIndependent() &&
+ getTargetMachine().getCodeModel() == CodeModel::Large)
+ return MachineJumpTableInfo::EK_LabelDifference64;
+
+ // Otherwise, use the normal jump table encoding heuristics.
+ return TargetLowering::getJumpTableEncoding();
+}
+
+bool X86TargetLowering::splitValueIntoRegisterParts(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
+ bool IsABIRegCopy = CC.has_value();
+ EVT ValueVT = Val.getValueType();
+ if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
+ unsigned ValueBits = ValueVT.getSizeInBits();
+ unsigned PartBits = PartVT.getSizeInBits();
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ Parts[0] = Val;
+ return true;
+ }
+ return false;
+}
+
+SDValue X86TargetLowering::joinRegisterPartsIntoValue(
+ SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
+ bool IsABIRegCopy = CC.has_value();
+ if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
+ unsigned ValueBits = ValueVT.getSizeInBits();
+ unsigned PartBits = PartVT.getSizeInBits();
+ SDValue Val = Parts[0];
+
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ return Val;
+ }
+ return SDValue();
+}
+
+bool X86TargetLowering::useSoftFloat() const {
+ return Subtarget.useSoftFloat();
+}
+
+void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
+ ArgListTy &Args) const {
+
+ // Only relabel X86-32 for C / Stdcall CCs.
+ if (Subtarget.is64Bit())
+ return;
+ if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
+ return;
+ unsigned ParamRegs = 0;
+ if (auto *M = MF->getFunction().getParent())
+ ParamRegs = M->getNumberRegisterParameters();
+
+ // Mark the first N int arguments as having reg
+ for (auto &Arg : Args) {
+ Type *T = Arg.Ty;
+ if (T->isIntOrPtrTy())
+ if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
+ unsigned numRegs = 1;
+ if (MF->getDataLayout().getTypeAllocSize(T) > 4)
+ numRegs = 2;
+ if (ParamRegs < numRegs)
+ return;
+ ParamRegs -= numRegs;
+ Arg.IsInReg = true;
+ }
+ }
+}
+
+const MCExpr *
+X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid,MCContext &Ctx) const{
+ assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
+ // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
+ // entries.
+ return MCSymbolRefExpr::create(MBB->getSymbol(),
+ MCSymbolRefExpr::VK_GOTOFF, Ctx);
+}
+
+/// Returns relocation base for the given PIC jumptable.
+SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ if (!Subtarget.is64Bit())
+ // This doesn't have SDLoc associated with it, but is not really the
+ // same as a Register.
+ return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
+ getPointerTy(DAG.getDataLayout()));
+ return Table;
+}
+
+/// This returns the relocation base for the given PIC jumptable,
+/// the same as getPICJumpTableRelocBase, but as an MCExpr.
+const MCExpr *X86TargetLowering::
+getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
+ MCContext &Ctx) const {
+ // X86-64 uses RIP relative addressing based on the jump table label.
+ if (Subtarget.isPICStyleRIPRel() ||
+ (Subtarget.is64Bit() &&
+ getTargetMachine().getCodeModel() == CodeModel::Large))
+ return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+
+ // Otherwise, the reference is relative to the PIC base.
+ return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
+}
+
+std::pair<const TargetRegisterClass *, uint8_t>
+X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
+ const TargetRegisterClass *RRC = nullptr;
+ uint8_t Cost = 1;
+ switch (VT.SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(TRI, VT);
+ case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+ RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
+ break;
+ case MVT::x86mmx:
+ RRC = &X86::VR64RegClass;
+ break;
+ case MVT::f32: case MVT::f64:
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
+ case MVT::v8f32: case MVT::v4f64:
+ case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
+ case MVT::v16f32: case MVT::v8f64:
+ RRC = &X86::VR128XRegClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+unsigned X86TargetLowering::getAddressSpace() const {
+ if (Subtarget.is64Bit())
+ return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
+ return 256;
+}
+
+static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
+ return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
+ (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
+}
+
+static Constant* SegmentOffset(IRBuilderBase &IRB,
+ int Offset, unsigned AddressSpace) {
+ return ConstantExpr::getIntToPtr(
+ ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
+ IRB.getPtrTy(AddressSpace));
+}
+
+Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
+ // glibc, bionic, and Fuchsia have a special slot for the stack guard in
+ // tcbhead_t; use it instead of the usual global variable (see
+ // sysdeps/{i386,x86_64}/nptl/tls.h)
+ if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
+ unsigned AddressSpace = getAddressSpace();
+
+ // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
+ if (Subtarget.isTargetFuchsia())
+ return SegmentOffset(IRB, 0x10, AddressSpace);
+
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ // Specially, some users may customize the base reg and offset.
+ int Offset = M->getStackProtectorGuardOffset();
+ // If we don't set -stack-protector-guard-offset value:
+ // %fs:0x28, unless we're using a Kernel code model, in which case
+ // it's %gs:0x28. gs:0x14 on i386.
+ if (Offset == INT_MAX)
+ Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
+
+ StringRef GuardReg = M->getStackProtectorGuardReg();
+ if (GuardReg == "fs")
+ AddressSpace = X86AS::FS;
+ else if (GuardReg == "gs")
+ AddressSpace = X86AS::GS;
+
+ // Use symbol guard if user specify.
+ StringRef GuardSymb = M->getStackProtectorGuardSymbol();
+ if (!GuardSymb.empty()) {
+ GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
+ if (!GV) {
+ Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
+ : Type::getInt32Ty(M->getContext());
+ GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, GuardSymb, nullptr,
+ GlobalValue::NotThreadLocal, AddressSpace);
+ if (!Subtarget.isTargetDarwin())
+ GV->setDSOLocal(M->getDirectAccessExternalData());
+ }
+ return GV;
+ }
+
+ return SegmentOffset(IRB, Offset, AddressSpace);
+ }
+ return TargetLowering::getIRStackGuard(IRB);
+}
+
+void X86TargetLowering::insertSSPDeclarations(Module &M) const {
+ // MSVC CRT provides functionalities for stack protection.
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
+ // MSVC CRT has a global variable holding security cookie.
+ M.getOrInsertGlobal("__security_cookie",
+ PointerType::getUnqual(M.getContext()));
+
+ // MSVC CRT has a function to validate security cookie.
+ FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
+ "__security_check_cookie", Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(M.getContext()));
+ if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
+ F->setCallingConv(CallingConv::X86_FastCall);
+ F->addParamAttr(0, Attribute::AttrKind::InReg);
+ }
+ return;
+ }
+
+ StringRef GuardMode = M.getStackProtectorGuard();
+
+ // glibc, bionic, and Fuchsia have a special slot for the stack guard.
+ if ((GuardMode == "tls" || GuardMode.empty()) &&
+ hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
+ return;
+ TargetLowering::insertSSPDeclarations(M);
+}
+
+Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
+ // MSVC CRT has a global variable holding security cookie.
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
+ return M.getGlobalVariable("__security_cookie");
+ }
+ return TargetLowering::getSDagStackGuard(M);
+}
+
+Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
+ // MSVC CRT has a function to validate security cookie.
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
+ return M.getFunction("__security_check_cookie");
+ }
+ return TargetLowering::getSSPStackGuardCheck(M);
+}
+
+Value *
+X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
+ // Android provides a fixed TLS slot for the SafeStack pointer. See the
+ // definition of TLS_SLOT_SAFESTACK in
+ // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+ if (Subtarget.isTargetAndroid()) {
+ // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
+ // %gs:0x24 on i386
+ int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
+ return SegmentOffset(IRB, Offset, getAddressSpace());
+ }
+
+ // Fuchsia is similar.
+ if (Subtarget.isTargetFuchsia()) {
+ // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
+ return SegmentOffset(IRB, 0x18, getAddressSpace());
+ }
+
+ return TargetLowering::getSafeStackPointerLocation(IRB);
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+bool X86TargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_X86);
+}
+
+const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
+ static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
+ return ScratchRegs;
+}
+
+ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
+ // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
+ // tests at the moment, which is not what we expected.
+ static const MCPhysReg RCRegs[] = {X86::MXCSR};
+ return RCRegs;
+}
+
+/// Lowers masks values (v*i1) to the local register values
+/// \returns DAG node after lowering to register type
+static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
+ const SDLoc &DL, SelectionDAG &DAG) {
+ EVT ValVT = ValArg.getValueType();
+
+ if (ValVT == MVT::v1i1)
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
+ DAG.getIntPtrConstant(0, DL));
+
+ if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
+ (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
+ // Two stage lowering might be required
+ // bitcast: v8i1 -> i8 / v16i1 -> i16
+ // anyextend: i8 -> i32 / i16 -> i32
+ EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
+ SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
+ if (ValLoc == MVT::i32)
+ ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
+ return ValToCopy;
+ }
+
+ if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
+ (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
+ // One stage lowering is required
+ // bitcast: v32i1 -> i32 / v64i1 -> i64
+ return DAG.getBitcast(ValLoc, ValArg);
+ }
+
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
+}
+
+/// Breaks v64i1 value into two registers and adds the new node to the DAG
+static void Passv64i1ArgInRegs(
+ const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
+ SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
+ CCValAssign &NextVA, const X86Subtarget &Subtarget) {
+ assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
+ assert(Subtarget.is32Bit() && "Expecting 32 bit target");
+ assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
+ assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+ "The value should reside in two registers");
+
+ // Before splitting the value we cast it to i64
+ Arg = DAG.getBitcast(MVT::i64, Arg);
+
+ // Splitting the value into two i32 types
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
+
+ // Attach the two i32 types into corresponding registers
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
+}
+
+SDValue
+X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SDLoc &dl, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ // In some cases we need to disable registers from the default CSR list.
+ // For example, when they are used as return registers (preserve_* and X86's
+ // regcall) or for argument passing (X86's regcall).
+ bool ShouldDisableCalleeSavedRegister =
+ shouldDisableRetRegFromCSR(CallConv) ||
+ MF.getFunction().hasFnAttribute("no_caller_saved_registers");
+
+ if (CallConv == CallingConv::X86_INTR && !Outs.empty())
+ report_fatal_error("X86 interrupts may not return any value");
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+ SmallVector<std::pair<Register, SDValue>, 4> RetVals;
+ for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
+ ++I, ++OutsIndex) {
+ CCValAssign &VA = RVLocs[I];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // Add the register to the CalleeSaveDisableRegs list.
+ if (ShouldDisableCalleeSavedRegister)
+ MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
+
+ SDValue ValToCopy = OutVals[OutsIndex];
+ EVT ValVT = ValToCopy.getValueType();
+
+ // Promote values to the appropriate types.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
+ else if (VA.getLocInfo() == CCValAssign::AExt) {
+ if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
+ ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
+ else
+ ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
+ }
+ else if (VA.getLocInfo() == CCValAssign::BCvt)
+ ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
+
+ assert(VA.getLocInfo() != CCValAssign::FPExt &&
+ "Unexpected FP-extend for return value.");
+
+ // Report an error if we have attempted to return a value via an XMM
+ // register and SSE was disabled.
+ if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
+ errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
+ VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
+ } else if (!Subtarget.hasSSE2() &&
+ X86::FR64XRegClass.contains(VA.getLocReg()) &&
+ ValVT == MVT::f64) {
+ // When returning a double via an XMM register, report an error if SSE2 is
+ // not enabled.
+ errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
+ VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
+ }
+
+ // Returns in ST0/ST1 are handled specially: these are pushed as operands to
+ // the RET instruction and handled by the FP Stackifier.
+ if (VA.getLocReg() == X86::FP0 ||
+ VA.getLocReg() == X86::FP1) {
+ // If this is a copy from an xmm register to ST(0), use an FPExtend to
+ // change the value to the FP stack register class.
+ if (isScalarFPTypeInSSEReg(VA.getValVT()))
+ ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
+ RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
+ // Don't emit a copytoreg.
+ continue;
+ }
+
+ // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
+ // which is returned in RAX / RDX.
+ if (Subtarget.is64Bit()) {
+ if (ValVT == MVT::x86mmx) {
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ ValToCopy);
+ // If we don't have SSE2 available, convert to v4f32 so the generated
+ // register is legal.
+ if (!Subtarget.hasSSE2())
+ ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
+ }
+ }
+ }
+
+ if (VA.needsCustom()) {
+ assert(VA.getValVT() == MVT::v64i1 &&
+ "Currently the only custom case is when we split v64i1 to 2 regs");
+
+ Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
+ Subtarget);
+
+ // Add the second register to the CalleeSaveDisableRegs list.
+ if (ShouldDisableCalleeSavedRegister)
+ MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
+ } else {
+ RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
+ }
+ }
+
+ SDValue Glue;
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Operand #1 = Bytes To Pop
+ RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
+ MVT::i32));
+
+ // Copy the result values into the output registers.
+ for (auto &RetVal : RetVals) {
+ if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
+ RetOps.push_back(RetVal.second);
+ continue; // Don't emit a copytoreg.
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
+ Glue = Chain.getValue(1);
+ RetOps.push_back(
+ DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
+ }
+
+ // Swift calling convention does not require we copy the sret argument
+ // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
+
+ // All x86 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into %rax/%eax.
+ //
+ // Checking Function.hasStructRetAttr() here is insufficient because the IR
+ // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
+ // false, then an sret argument may be implicitly inserted in the SelDAG. In
+ // either case FuncInfo->setSRetReturnReg() will have been called.
+ if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
+ // When we have both sret and another return value, we should use the
+ // original Chain stored in RetOps[0], instead of the current Chain updated
+ // in the above loop. If we only have sret, RetOps[0] equals to Chain.
+
+ // For the case of sret and another return value, we have
+ // Chain_0 at the function entry
+ // Chain_1 = getCopyToReg(Chain_0) in the above loop
+ // If we use Chain_1 in getCopyFromReg, we will have
+ // Val = getCopyFromReg(Chain_1)
+ // Chain_2 = getCopyToReg(Chain_1, Val) from below
+
+ // getCopyToReg(Chain_0) will be glued together with
+ // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
+ // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
+ // Data dependency from Unit B to Unit A due to usage of Val in
+ // getCopyToReg(Chain_1, Val)
+ // Chain dependency from Unit A to Unit B
+
+ // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
+ SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
+ getPointerTy(MF.getDataLayout()));
+
+ Register RetValReg
+ = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
+ X86::RAX : X86::EAX;
+ Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
+ Glue = Chain.getValue(1);
+
+ // RAX/EAX now acts like a return value.
+ RetOps.push_back(
+ DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+
+ // Add the returned register to the CalleeSaveDisableRegs list. Don't do
+ // this however for preserve_most/preserve_all to minimize the number of
+ // callee-saved registers for these CCs.
+ if (ShouldDisableCalleeSavedRegister &&
+ CallConv != CallingConv::PreserveAll &&
+ CallConv != CallingConv::PreserveMost)
+ MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
+ }
+
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+ if (X86::GR64RegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
+
+ X86ISD::NodeType opcode = X86ISD::RET_GLUE;
+ if (CallConv == CallingConv::X86_INTR)
+ opcode = X86ISD::IRET;
+ return DAG.getNode(opcode, dl, MVT::Other, RetOps);
+}
+
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
+ if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDValue TCChain = Chain;
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() == ISD::CopyToReg) {
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ TCChain = Copy->getOperand(0);
+ } else if (Copy->getOpcode() != ISD::FP_EXTEND)
+ return false;
+
+ bool HasRet = false;
+ for (const SDNode *U : Copy->uses()) {
+ if (U->getOpcode() != X86ISD::RET_GLUE)
+ return false;
+ // If we are returning more than one value, we can definitely
+ // not make a tail call see PR19530
+ if (U->getNumOperands() > 4)
+ return false;
+ if (U->getNumOperands() == 4 &&
+ U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
+ return false;
+ HasRet = true;
+ }
+
+ if (!HasRet)
+ return false;
+
+ Chain = TCChain;
+ return true;
+}
+
+EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const {
+ MVT ReturnMVT = MVT::i32;
+
+ bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
+ if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
+ // The ABI does not require i1, i8 or i16 to be extended.
+ //
+ // On Darwin, there is code in the wild relying on Clang's old behaviour of
+ // always extending i8/i16 return values, so keep doing that for now.
+ // (PR26665).
+ ReturnMVT = MVT::i8;
+ }
+
+ EVT MinVT = getRegisterType(Context, ReturnMVT);
+ return VT.bitsLT(MinVT) ? MinVT : VT;
+}
+
+/// Reads two 32 bit registers and creates a 64 bit mask value.
+/// \param VA The current 32 bit value that need to be assigned.
+/// \param NextVA The next 32 bit value that need to be assigned.
+/// \param Root The parent DAG node.
+/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
+/// glue purposes. In the case the DAG is already using
+/// physical register instead of virtual, we should glue
+/// our new SDValue to InGlue SDvalue.
+/// \return a new SDvalue of size 64bit.
+static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ const SDLoc &DL, const X86Subtarget &Subtarget,
+ SDValue *InGlue = nullptr) {
+ assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
+ assert(Subtarget.is32Bit() && "Expecting 32 bit target");
+ assert(VA.getValVT() == MVT::v64i1 &&
+ "Expecting first location of 64 bit width type");
+ assert(NextVA.getValVT() == VA.getValVT() &&
+ "The locations should have the same type");
+ assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+ "The values should reside in two registers");
+
+ SDValue Lo, Hi;
+ SDValue ArgValueLo, ArgValueHi;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterClass *RC = &X86::GR32RegClass;
+
+ // Read a 32 bit value from the registers.
+ if (nullptr == InGlue) {
+ // When no physical register is present,
+ // create an intermediate virtual register.
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+ ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
+ } else {
+ // When a physical register is available read the value from it and glue
+ // the reads together.
+ ArgValueLo =
+ DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
+ *InGlue = ArgValueLo.getValue(2);
+ ArgValueHi =
+ DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
+ *InGlue = ArgValueHi.getValue(2);
+ }
+
+ // Convert the i32 type into v32i1 type.
+ Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
+
+ // Convert the i32 type into v32i1 type.
+ Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
+
+ // Concatenate the two values together.
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
+}
+
+/// The function will lower a register of various sizes (8/16/32/64)
+/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
+/// \returns a DAG node contains the operand after lowering to mask type.
+static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
+ const EVT &ValLoc, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ SDValue ValReturned = ValArg;
+
+ if (ValVT == MVT::v1i1)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
+
+ if (ValVT == MVT::v64i1) {
+ // In 32 bit machine, this case is handled by getv64i1Argument
+ assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
+ // In 64 bit machine, There is no need to truncate the value only bitcast
+ } else {
+ MVT MaskLenVT;
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ case MVT::v8i1:
+ MaskLenVT = MVT::i8;
+ break;
+ case MVT::v16i1:
+ MaskLenVT = MVT::i16;
+ break;
+ case MVT::v32i1:
+ MaskLenVT = MVT::i32;
+ break;
+ default:
+ llvm_unreachable("Expecting a vector of i1 types");
+ }
+
+ ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
+ }
+ return DAG.getBitcast(ValVT, ValReturned);
+}
+
+/// Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue X86TargetLowering::LowerCallResult(
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ uint32_t *RegMask) const {
+
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
+ ++I, ++InsIndex) {
+ CCValAssign &VA = RVLocs[I];
+ EVT CopyVT = VA.getLocVT();
+
+ // In some calling conventions we need to remove the used registers
+ // from the register mask.
+ if (RegMask) {
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
+ RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
+ }
+
+ // Report an error if there was an attempt to return FP values via XMM
+ // registers.
+ if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
+ errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
+ if (VA.getLocReg() == X86::XMM1)
+ VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
+ else
+ VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
+ } else if (!Subtarget.hasSSE2() &&
+ X86::FR64XRegClass.contains(VA.getLocReg()) &&
+ CopyVT == MVT::f64) {
+ errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
+ if (VA.getLocReg() == X86::XMM1)
+ VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
+ else
+ VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
+ }
+
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ bool RoundAfterCopy = false;
+ if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ isScalarFPTypeInSSEReg(VA.getValVT())) {
+ if (!Subtarget.hasX87())
+ report_fatal_error("X87 register return with X87 disabled");
+ CopyVT = MVT::f80;
+ RoundAfterCopy = (CopyVT != VA.getLocVT());
+ }
+
+ SDValue Val;
+ if (VA.needsCustom()) {
+ assert(VA.getValVT() == MVT::v64i1 &&
+ "Currently the only custom case is when we split v64i1 to 2 regs");
+ Val =
+ getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
+ .getValue(1);
+ Val = Chain.getValue(0);
+ InGlue = Chain.getValue(2);
+ }
+
+ if (RoundAfterCopy)
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
+
+ if (VA.isExtInLoc()) {
+ if (VA.getValVT().isVector() &&
+ VA.getValVT().getScalarType() == MVT::i1 &&
+ ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
+ (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
+ // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
+ Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
+ } else
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ }
+
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ Val = DAG.getBitcast(VA.getValVT(), Val);
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// C & StdCall & Fast Calling Convention implementation
+//===----------------------------------------------------------------------===//
+// StdCall calling convention seems to be standard for many Windows' API
+// routines and around. It differs from C calling convention just a little:
+// callee should clean up the stack, not caller. Symbols should be also
+// decorated in some fancy way :) It doesn't support any vector arguments.
+// For info on fast calling convention see Fast Calling Convention (tail call)
+// implementation LowerX86_32FastCCCallTo.
+
+/// Determines whether Args, either a set of outgoing arguments to a call, or a
+/// set of incoming args of a call, contains an sret pointer that the callee
+/// pops
+template <typename T>
+static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
+ const X86Subtarget &Subtarget) {
+ // Not C++20 (yet), so no concepts available.
+ static_assert(std::is_same_v<T, ISD::OutputArg> ||
+ std::is_same_v<T, ISD::InputArg>,
+ "requires ISD::OutputArg or ISD::InputArg");
+
+ // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
+ // for most compilations.
+ if (!Subtarget.is32Bit())
+ return false;
+
+ if (Args.empty())
+ return false;
+
+ // Most calls do not have an sret argument, check the arg next.
+ const ISD::ArgFlagsTy &Flags = Args[0].Flags;
+ if (!Flags.isSRet() || Flags.isInReg())
+ return false;
+
+ // The MSVCabi does not pop the sret.
+ if (Subtarget.getTargetTriple().isOSMSVCRT())
+ return false;
+
+ // MCUs don't pop the sret
+ if (Subtarget.isTargetMCU())
+ return false;
+
+ // Callee pops argument
+ return true;
+}
+
+/// Make a copy of an aggregate at address specified by "Src" to address
+/// "Dst" with size and alignment information specified by the specific
+/// parameter attribute. The copy will be passed as a byval function parameter.
+static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
+ SDValue Chain, ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG, const SDLoc &dl) {
+ SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
+
+ return DAG.getMemcpy(
+ Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
+ /*isVolatile*/ false, /*AlwaysInline=*/true,
+ /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
+}
+
+/// Return true if the calling convention is one that we can guarantee TCO for.
+static bool canGuaranteeTCO(CallingConv::ID CC) {
+ return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
+ CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
+ CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
+}
+
+/// Return true if we might ever do TCO for calls with this calling convention.
+static bool mayTailCallThisCC(CallingConv::ID CC) {
+ switch (CC) {
+ // C calling conventions:
+ case CallingConv::C:
+ case CallingConv::Win64:
+ case CallingConv::X86_64_SysV:
+ // Callee pop conventions:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_VectorCall:
+ case CallingConv::X86_FastCall:
+ // Swift:
+ case CallingConv::Swift:
+ return true;
+ default:
+ return canGuaranteeTCO(CC);
+ }
+}
+
+/// Return true if the function is being made into a tailcall target by
+/// changing its ABI.
+static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
+ return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
+ CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
+}
+
+bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
+
+ CallingConv::ID CalleeCC = CI->getCallingConv();
+ if (!mayTailCallThisCC(CalleeCC))
+ return false;
+
+ return true;
+}
+
+SDValue
+X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo &MFI, unsigned i) const {
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ bool AlwaysUseMutable = shouldGuaranteeTCO(
+ CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+ EVT ValVT;
+ MVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // If value is passed by pointer we have address passed instead of the value
+ // itself. No need to extend if the mask value and location share the same
+ // absolute size.
+ bool ExtendedInMem =
+ VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
+ VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
+
+ if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
+ ValVT = VA.getLocVT();
+ else
+ ValVT = VA.getValVT();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can be
+ // changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ if (Flags.isByVal()) {
+ unsigned Bytes = Flags.getByValSize();
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+
+ // FIXME: For now, all byval parameter objects are marked as aliasing. This
+ // can be improved with deeper analysis.
+ int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
+ /*isAliased=*/true);
+ return DAG.getFrameIndex(FI, PtrVT);
+ }
+
+ EVT ArgVT = Ins[i].ArgVT;
+
+ // If this is a vector that has been split into multiple parts, don't elide
+ // the copy. The layout on the stack may not match the packed in-memory
+ // layout.
+ bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
+
+ // This is an argument in memory. We might be able to perform copy elision.
+ // If the argument is passed directly in memory without any extension, then we
+ // can perform copy elision. Large vector types, for example, may be passed
+ // indirectly by pointer.
+ if (Flags.isCopyElisionCandidate() &&
+ VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
+ !ScalarizedVector) {
+ SDValue PartAddr;
+ if (Ins[i].PartOffset == 0) {
+ // If this is a one-part value or the first part of a multi-part value,
+ // create a stack object for the entire argument value type and return a
+ // load from our portion of it. This assumes that if the first part of an
+ // argument is in memory, the rest will also be in memory.
+ int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
+ /*IsImmutable=*/false);
+ PartAddr = DAG.getFrameIndex(FI, PtrVT);
+ return DAG.getLoad(
+ ValVT, dl, Chain, PartAddr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ }
+
+ // This is not the first piece of an argument in memory. See if there is
+ // already a fixed stack object including this offset. If so, assume it
+ // was created by the PartOffset == 0 branch above and create a load from
+ // the appropriate offset into it.
+ int64_t PartBegin = VA.getLocMemOffset();
+ int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
+ int FI = MFI.getObjectIndexBegin();
+ for (; MFI.isFixedObjectIndex(FI); ++FI) {
+ int64_t ObjBegin = MFI.getObjectOffset(FI);
+ int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
+ if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
+ break;
+ }
+ if (MFI.isFixedObjectIndex(FI)) {
+ SDValue Addr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
+ DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
+ return DAG.getLoad(ValVT, dl, Chain, Addr,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI, Ins[i].PartOffset));
+ }
+ }
+
+ int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
+ VA.getLocMemOffset(), isImmutable);
+
+ // Set SExt or ZExt flag.
+ if (VA.getLocInfo() == CCValAssign::ZExt) {
+ MFI.setObjectZExt(FI, true);
+ } else if (VA.getLocInfo() == CCValAssign::SExt) {
+ MFI.setObjectSExt(FI, true);
+ }
+
+ MaybeAlign Alignment;
+ if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
+ ValVT != MVT::f80)
+ Alignment = MaybeAlign(4);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getLoad(
+ ValVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ Alignment);
+ return ExtendedInMem
+ ? (VA.getValVT().isVector()
+ ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
+ : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
+ : Val;
+}
+
+// FIXME: Get this from tablegen.
+static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.is64Bit());
+
+ if (Subtarget.isCallingConvWin64(CallConv)) {
+ static const MCPhysReg GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
+ }
+
+ static const MCPhysReg GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
+}
+
+// FIXME: Get this from tablegen.
+static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
+ CallingConv::ID CallConv,
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.is64Bit());
+ if (Subtarget.isCallingConvWin64(CallConv)) {
+ // The XMM registers which might contain var arg parameters are shadowed
+ // in their paired GPR. So we only need to save the GPR to their home
+ // slots.
+ // TODO: __vectorcall will change this.
+ return std::nullopt;
+ }
+
+ bool isSoftFloat = Subtarget.useSoftFloat();
+ if (isSoftFloat || !Subtarget.hasSSE1())
+ // Kernel mode asks for SSE to be disabled, so there are no XMM argument
+ // registers.
+ return std::nullopt;
+
+ static const MCPhysReg XMMArgRegs64Bit[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
+}
+
+#ifndef NDEBUG
+static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
+ return llvm::is_sorted(
+ ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
+ return A.getValNo() < B.getValNo();
+ });
+}
+#endif
+
+namespace {
+/// This is a helper class for lowering variable arguments parameters.
+class VarArgsLoweringHelper {
+public:
+ VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget,
+ CallingConv::ID CallConv, CCState &CCInfo)
+ : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
+ TheMachineFunction(DAG.getMachineFunction()),
+ TheFunction(TheMachineFunction.getFunction()),
+ FrameInfo(TheMachineFunction.getFrameInfo()),
+ FrameLowering(*Subtarget.getFrameLowering()),
+ TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
+ CCInfo(CCInfo) {}
+
+ // Lower variable arguments parameters.
+ void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
+
+private:
+ void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
+
+ void forwardMustTailParameters(SDValue &Chain);
+
+ bool is64Bit() const { return Subtarget.is64Bit(); }
+ bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
+
+ X86MachineFunctionInfo *FuncInfo;
+ const SDLoc &DL;
+ SelectionDAG &DAG;
+ const X86Subtarget &Subtarget;
+ MachineFunction &TheMachineFunction;
+ const Function &TheFunction;
+ MachineFrameInfo &FrameInfo;
+ const TargetFrameLowering &FrameLowering;
+ const TargetLowering &TargLowering;
+ CallingConv::ID CallConv;
+ CCState &CCInfo;
+};
+} // namespace
+
+void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
+ SDValue &Chain, unsigned StackSize) {
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start. We
+ // can skip this if there are no va_start calls.
+ if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
+ FuncInfo->setVarArgsFrameIndex(
+ FrameInfo.CreateFixedObject(1, StackSize, true));
+ }
+
+ // 64-bit calling conventions support varargs and register parameters, so we
+ // have to do extra work to spill them in the prologue.
+ if (is64Bit()) {
+ // Find the first unallocated argument registers.
+ ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
+ ArrayRef<MCPhysReg> ArgXMMs =
+ get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
+
+ assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
+ "SSE register cannot be used when SSE is disabled!");
+
+ if (isWin64()) {
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(
+ FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+ // Fixup to set vararg frame on shadow area (4 x i64).
+ if (NumIntRegs < 4)
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ } else {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so
+ // they may be loaded by dereferencing the result of va_next.
+ FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+ FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
+ ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
+ }
+
+ SmallVector<SDValue, 6>
+ LiveGPRs; // list of SDValue for GPR registers keeping live input value
+ SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
+ // keeping live input value
+ SDValue ALVal; // if applicable keeps SDValue for %al register
+
+ // Gather all the live in physical registers.
+ for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
+ Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
+ LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
+ }
+ const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
+ if (!AvailableXmms.empty()) {
+ Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
+ ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
+ for (MCPhysReg Reg : AvailableXmms) {
+ // FastRegisterAllocator spills virtual registers at basic
+ // block boundary. That leads to usages of xmm registers
+ // outside of check for %al. Pass physical registers to
+ // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
+ TheMachineFunction.getRegInfo().addLiveIn(Reg);
+ LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
+ }
+ }
+
+ // Store the integer parameter registers.
+ SmallVector<SDValue, 8> MemOps;
+ SDValue RSFIN =
+ DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ TargLowering.getPointerTy(DAG.getDataLayout()));
+ unsigned Offset = FuncInfo->getVarArgsGPOffset();
+ for (SDValue Val : LiveGPRs) {
+ SDValue FIN = DAG.getNode(ISD::ADD, DL,
+ TargLowering.getPointerTy(DAG.getDataLayout()),
+ RSFIN, DAG.getIntPtrConstant(Offset, DL));
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(),
+ FuncInfo->getRegSaveFrameIndex(), Offset));
+ MemOps.push_back(Store);
+ Offset += 8;
+ }
+
+ // Now store the XMM (fp + vector) parameter registers.
+ if (!LiveXMMRegs.empty()) {
+ SmallVector<SDValue, 12> SaveXMMOps;
+ SaveXMMOps.push_back(Chain);
+ SaveXMMOps.push_back(ALVal);
+ SaveXMMOps.push_back(RSFIN);
+ SaveXMMOps.push_back(
+ DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
+ llvm::append_range(SaveXMMOps, LiveXMMRegs);
+ MachineMemOperand *StoreMMO =
+ DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
+ Offset),
+ MachineMemOperand::MOStore, 128, Align(16));
+ MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
+ DL, DAG.getVTList(MVT::Other),
+ SaveXMMOps, MVT::i8, StoreMMO));
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
+ }
+}
+
+void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
+ // Find the largest legal vector type.
+ MVT VecVT = MVT::Other;
+ // FIXME: Only some x86_32 calling conventions support AVX512.
+ if (Subtarget.useAVX512Regs() &&
+ (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
+ CallConv == CallingConv::Intel_OCL_BI)))
+ VecVT = MVT::v16f32;
+ else if (Subtarget.hasAVX())
+ VecVT = MVT::v8f32;
+ else if (Subtarget.hasSSE2())
+ VecVT = MVT::v4f32;
+
+ // We forward some GPRs and some vector types.
+ SmallVector<MVT, 2> RegParmTypes;
+ MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
+ RegParmTypes.push_back(IntVT);
+ if (VecVT != MVT::Other)
+ RegParmTypes.push_back(VecVT);
+
+ // Compute the set of forwarded registers. The rest are scratch.
+ SmallVectorImpl<ForwardedRegister> &Forwards =
+ FuncInfo->getForwardedMustTailRegParms();
+ CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
+
+ // Forward AL for SysV x86_64 targets, since it is used for varargs.
+ if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
+ Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
+ Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
+ }
+
+ // Copy all forwards from physical to virtual registers.
+ for (ForwardedRegister &FR : Forwards) {
+ // FIXME: Can we use a less constrained schedule?
+ SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
+ FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
+ TargLowering.getRegClassFor(FR.VT));
+ Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
+ }
+}
+
+void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
+ unsigned StackSize) {
+ // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
+ // If necessary, it would be set into the correct value later.
+ FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
+ FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
+
+ if (FrameInfo.hasVAStart())
+ createVarArgAreaAndStoreRegisters(Chain, StackSize);
+
+ if (FrameInfo.hasMustTailInVarArgFunc())
+ forwardMustTailParameters(Chain);
+}
+
+SDValue X86TargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Function &F = MF.getFunction();
+ if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
+ F.getName() == "main")
+ FuncInfo->setForceFramePointer(true);
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool Is64Bit = Subtarget.is64Bit();
+ bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
+
+ assert(
+ !(IsVarArg && canGuaranteeTCO(CallConv)) &&
+ "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64.
+ if (IsWin64)
+ CCInfo.AllocateStack(32, Align(8));
+
+ CCInfo.AnalyzeArguments(Ins, CC_X86);
+
+ // In vectorcall calling convention a second pass is required for the HVA
+ // types.
+ if (CallingConv::X86_VectorCall == CallConv) {
+ CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
+ }
+
+ // The next loop assumes that the locations are in the same order of the
+ // input arguments.
+ assert(isSortedByValueNo(ArgLocs) &&
+ "Argument Location list must be sorted before lowering");
+
+ SDValue ArgValue;
+ for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
+ ++I, ++InsIndex) {
+ assert(InsIndex < Ins.size() && "Invalid Ins index");
+ CCValAssign &VA = ArgLocs[I];
+
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ if (VA.needsCustom()) {
+ assert(
+ VA.getValVT() == MVT::v64i1 &&
+ "Currently the only custom case is when we split v64i1 to 2 regs");
+
+ // v64i1 values, in regcall calling convention, that are
+ // compiled to 32 bit arch, are split up into two registers.
+ ArgValue =
+ getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
+ } else {
+ const TargetRegisterClass *RC;
+ if (RegVT == MVT::i8)
+ RC = &X86::GR8RegClass;
+ else if (RegVT == MVT::i16)
+ RC = &X86::GR16RegClass;
+ else if (RegVT == MVT::i32)
+ RC = &X86::GR32RegClass;
+ else if (Is64Bit && RegVT == MVT::i64)
+ RC = &X86::GR64RegClass;
+ else if (RegVT == MVT::f16)
+ RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
+ else if (RegVT == MVT::f32)
+ RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
+ else if (RegVT == MVT::f64)
+ RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
+ else if (RegVT == MVT::f80)
+ RC = &X86::RFP80RegClass;
+ else if (RegVT == MVT::f128)
+ RC = &X86::VR128RegClass;
+ else if (RegVT.is512BitVector())
+ RC = &X86::VR512RegClass;
+ else if (RegVT.is256BitVector())
+ RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
+ else if (RegVT.is128BitVector())
+ RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
+ else if (RegVT == MVT::x86mmx)
+ RC = &X86::VR64RegClass;
+ else if (RegVT == MVT::v1i1)
+ RC = &X86::VK1RegClass;
+ else if (RegVT == MVT::v8i1)
+ RC = &X86::VK8RegClass;
+ else if (RegVT == MVT::v16i1)
+ RC = &X86::VK16RegClass;
+ else if (RegVT == MVT::v32i1)
+ RC = &X86::VK32RegClass;
+ else if (RegVT == MVT::v64i1)
+ RC = &X86::VK64RegClass;
+ else
+ llvm_unreachable("Unknown argument type!");
+
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ }
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::BCvt)
+ ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
+
+ if (VA.isExtInLoc()) {
+ // Handle MMX values passed in XMM regs.
+ if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
+ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
+ else if (VA.getValVT().isVector() &&
+ VA.getValVT().getScalarType() == MVT::i1 &&
+ ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
+ (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
+ // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
+ ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
+ } else
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+ } else {
+ assert(VA.isMemLoc());
+ ArgValue =
+ LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
+ }
+
+ // If value is passed via pointer - do a load.
+ if (VA.getLocInfo() == CCValAssign::Indirect &&
+ !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
+ ArgValue =
+ DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
+ }
+
+ InVals.push_back(ArgValue);
+ }
+
+ for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
+ if (Ins[I].Flags.isSwiftAsync()) {
+ auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ if (Subtarget.is64Bit())
+ X86FI->setHasSwiftAsyncContext(true);
+ else {
+ int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
+ X86FI->setSwiftAsyncContextFrameIdx(FI);
+ SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
+ DAG.getFrameIndex(FI, MVT::i32),
+ MachinePointerInfo::getFixedStack(MF, FI));
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
+ }
+ }
+
+ // Swift calling convention does not require we copy the sret argument
+ // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
+ if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
+ continue;
+
+ // All x86 ABIs require that for returning structs by value we copy the
+ // sret argument into %rax/%eax (depending on ABI) for the return. Save
+ // the argument into a virtual register so that we can access it from the
+ // return points.
+ if (Ins[I].Flags.isSRet()) {
+ assert(!FuncInfo->getSRetReturnReg() &&
+ "SRet return has already been set");
+ MVT PtrTy = getPointerTy(DAG.getDataLayout());
+ Register Reg =
+ MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+ FuncInfo->setSRetReturnReg(Reg);
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ break;
+ }
+ }
+
+ unsigned StackSize = CCInfo.getStackSize();
+ // Align stack specially for tail calls.
+ if (shouldGuaranteeTCO(CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt))
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
+ if (IsVarArg)
+ VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
+ .lowerVarArgsParameters(Chain, StackSize);
+
+ // Some CCs need callee pop.
+ if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
+ MF.getTarget().Options.GuaranteedTailCallOpt)) {
+ FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
+ } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
+ // X86 interrupts must pop the error code (and the alignment padding) if
+ // present.
+ FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
+ } else {
+ FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
+ // If this is an sret function, the return should pop the hidden pointer.
+ if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
+ FuncInfo->setBytesToPopOnReturn(4);
+ }
+
+ if (!Is64Bit) {
+ // RegSaveFrameIndex is X86-64 only.
+ FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
+ }
+
+ FuncInfo->setArgumentStackSize(StackSize);
+
+ if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
+ EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
+ if (Personality == EHPersonality::CoreCLR) {
+ assert(Is64Bit);
+ // TODO: Add a mechanism to frame lowering that will allow us to indicate
+ // that we'd prefer this slot be allocated towards the bottom of the frame
+ // (i.e. near the stack pointer after allocating the frame). Every
+ // funclet needs a copy of this slot in its (mostly empty) frame, and the
+ // offset from the bottom of this and each funclet's frame must be the
+ // same, so the size of funclets' (mostly empty) frames is dictated by
+ // how far this slot is from the bottom (since they allocate just enough
+ // space to accommodate holding this slot at the correct offset).
+ int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
+ EHInfo->PSPSymFrameIdx = PSPSymFI;
+ }
+ }
+
+ if (shouldDisableArgRegFromCSR(CallConv) ||
+ F.hasFnAttribute("no_caller_saved_registers")) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (std::pair<Register, Register> Pair : MRI.liveins())
+ MRI.disableCalleeSavedRegister(Pair.first);
+ }
+
+ return Chain;
+}
+
+SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
+ SDValue Arg, const SDLoc &dl,
+ SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags,
+ bool isByVal) const {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ StackPtr, PtrOff);
+ if (isByVal)
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+
+ MaybeAlign Alignment;
+ if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
+ Arg.getSimpleValueType() != MVT::f80)
+ Alignment = MaybeAlign(4);
+ return DAG.getStore(
+ Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+ Alignment);
+}
+
+/// Emit a load of return address if tail call
+/// optimization is performed and it is required.
+SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
+ SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
+ bool Is64Bit, int FPDiff, const SDLoc &dl) const {
+ // Adjust the Return address stack slot.
+ EVT VT = getPointerTy(DAG.getDataLayout());
+ OutRetAddr = getReturnAddressFrameIndex(DAG);
+
+ // Load the "old" Return address.
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
+ return SDValue(OutRetAddr.getNode(), 1);
+}
+
+/// Emit a store of the return address if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
+ SDValue Chain, SDValue RetAddrFrIdx,
+ EVT PtrVT, unsigned SlotSize,
+ int FPDiff, const SDLoc &dl) {
+ // Store the return address to the appropriate stack slot.
+ if (!FPDiff) return Chain;
+ // Calculate the new stack slot for the return address.
+ int NewReturnAddrFI =
+ MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
+ false);
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
+ Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), NewReturnAddrFI));
+ return Chain;
+}
+
+/// Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
+ SDValue V1, SDValue V2) const {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i)
+ Mask.push_back(i);
+ return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
+}
+
+SDValue
+X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool &isTailCall = CLI.IsTailCall;
+ bool isVarArg = CLI.IsVarArg;
+ const auto *CB = CLI.CB;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool Is64Bit = Subtarget.is64Bit();
+ bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
+ bool IsSibcall = false;
+ bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
+ CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
+ bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
+ X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
+ bool HasNCSR = (CB && isa<CallInst>(CB) &&
+ CB->hasFnAttr("no_caller_saved_registers"));
+ bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
+ bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
+ bool IsCFICall = IsIndirectCall && CLI.CFIType;
+ const Module *M = MF.getMMI().getModule();
+ Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
+
+ MachineFunction::CallSiteInfo CSInfo;
+ if (CallConv == CallingConv::X86_INTR)
+ report_fatal_error("X86 interrupts may not be called directly");
+
+ bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
+ if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
+ // If we are using a GOT, disable tail calls to external symbols with
+ // default visibility. Tail calling such a symbol requires using a GOT
+ // relocation, which forces early binding of the symbol. This breaks code
+ // that require lazy function symbol resolution. Using musttail or
+ // GuaranteedTailCallOpt will override this.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (!G || (!G->getGlobal()->hasLocalLinkage() &&
+ G->getGlobal()->hasDefaultVisibility()))
+ isTailCall = false;
+ }
+
+ if (isTailCall && !IsMustTail) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(
+ Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
+ Ins, DAG);
+
+ // Sibcalls are automatically detected tailcalls which do not require
+ // ABI changes.
+ if (!IsGuaranteeTCO && isTailCall)
+ IsSibcall = true;
+
+ if (isTailCall)
+ ++NumTailCalls;
+ }
+
+ if (IsMustTail && !isTailCall)
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+
+ assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
+ "Var args not supported with calling convention fastcc, ghc or hipe");
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64.
+ if (IsWin64)
+ CCInfo.AllocateStack(32, Align(8));
+
+ CCInfo.AnalyzeArguments(Outs, CC_X86);
+
+ // In vectorcall calling convention a second pass is required for the HVA
+ // types.
+ if (CallingConv::X86_VectorCall == CallConv) {
+ CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
+ }
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
+ if (IsSibcall)
+ // This is a sibcall. The memory operands are available in caller's
+ // own caller's stack.
+ NumBytes = 0;
+ else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+ int FPDiff = 0;
+ if (isTailCall &&
+ shouldGuaranteeTCO(CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt)) {
+ // Lower arguments at fp - stackoffset + fpdiff.
+ unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
+
+ FPDiff = NumBytesCallerPushed - NumBytes;
+
+ // Set the delta of movement of the returnaddr stackslot.
+ // But only set if delta is greater than previous delta.
+ if (FPDiff < X86Info->getTCReturnAddrDelta())
+ X86Info->setTCReturnAddrDelta(FPDiff);
+ }
+
+ unsigned NumBytesToPush = NumBytes;
+ unsigned NumBytesToPop = NumBytes;
+
+ // If we have an inalloca argument, all stack space has already been allocated
+ // for us and be right at the top of the stack. We don't support multiple
+ // arguments passed in memory when using inalloca.
+ if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
+ NumBytesToPush = 0;
+ if (!ArgLocs.back().isMemLoc())
+ report_fatal_error("cannot use inalloca attribute on a register "
+ "parameter");
+ if (ArgLocs.back().getLocMemOffset() != 0)
+ report_fatal_error("any parameter with the inalloca attribute must be "
+ "the only memory argument");
+ } else if (CLI.IsPreallocated) {
+ assert(ArgLocs.back().isMemLoc() &&
+ "cannot use preallocated attribute on a register "
+ "parameter");
+ SmallVector<size_t, 4> PreallocatedOffsets;
+ for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
+ if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
+ PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
+ }
+ }
+ auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
+ MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
+ MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
+ NumBytesToPush = 0;
+ }
+
+ if (!IsSibcall && !IsMustTail)
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
+ NumBytes - NumBytesToPush, dl);
+
+ SDValue RetAddrFrIdx;
+ // Load return address for tail calls.
+ if (isTailCall && FPDiff)
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
+ Is64Bit, FPDiff, dl);
+
+ SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+
+ // The next loop assumes that the locations are in the same order of the
+ // input arguments.
+ assert(isSortedByValueNo(ArgLocs) &&
+ "Argument Location list must be sorted before lowering");
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization arguments are handle later.
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
+ ++I, ++OutIndex) {
+ assert(OutIndex < Outs.size() && "Invalid Out index");
+ // Skip inalloca/preallocated arguments, they have already been written.
+ ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
+ if (Flags.isInAlloca() || Flags.isPreallocated())
+ continue;
+
+ CCValAssign &VA = ArgLocs[I];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[OutIndex];
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ if (Arg.getValueType().isVector() &&
+ Arg.getValueType().getVectorElementType() == MVT::i1)
+ Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
+ else if (RegVT.is128BitVector()) {
+ // Special case: passing MMX values in XMM registers.
+ Arg = DAG.getBitcast(MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+ Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+ } else
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getBitcast(RegVT, Arg);
+ break;
+ case CCValAssign::Indirect: {
+ if (isByVal) {
+ // Memcpy the argument to a temporary stack slot to prevent
+ // the caller from seeing any modifications the callee may make
+ // as guaranteed by the `byval` attribute.
+ int FrameIdx = MF.getFrameInfo().CreateStackObject(
+ Flags.getByValSize(),
+ std::max(Align(16), Flags.getNonZeroByValAlign()), false);
+ SDValue StackSlot =
+ DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
+ Chain =
+ CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
+ // From now on treat this as a regular pointer
+ Arg = StackSlot;
+ isByVal = false;
+ } else {
+ // Store the argument.
+ SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ Chain = DAG.getStore(
+ Chain, dl, Arg, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ Arg = SpillSlot;
+ }
+ break;
+ }
+ }
+
+ if (VA.needsCustom()) {
+ assert(VA.getValVT() == MVT::v64i1 &&
+ "Currently the only custom case is when we split v64i1 to 2 regs");
+ // Split v64i1 value into two registers
+ Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
+ } else if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.EmitCallSiteInfo)
+ CSInfo.emplace_back(VA.getLocReg(), I);
+ if (isVarArg && IsWin64) {
+ // Win64 ABI requires argument XMM reg to be copied to the corresponding
+ // shadow reg if callee is a varargs function.
+ Register ShadowReg;
+ switch (VA.getLocReg()) {
+ case X86::XMM0: ShadowReg = X86::RCX; break;
+ case X86::XMM1: ShadowReg = X86::RDX; break;
+ case X86::XMM2: ShadowReg = X86::R8; break;
+ case X86::XMM3: ShadowReg = X86::R9; break;
+ }
+ if (ShadowReg)
+ RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+ }
+ } else if (!IsSibcall && (!isTailCall || isByVal)) {
+ assert(VA.isMemLoc());
+ if (!StackPtr.getNode())
+ StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+ getPointerTy(DAG.getDataLayout()));
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags, isByVal));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+ if (Subtarget.isPICStyleGOT()) {
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer (except regcall).
+ if (!isTailCall) {
+ // Indirect call with RegCall calling convertion may use up all the
+ // general registers, so it is not suitable to bind EBX reister for
+ // GOT address, just let register allocator handle it.
+ if (CallConv != CallingConv::X86_RegCall)
+ RegsToPass.push_back(std::make_pair(
+ Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
+ getPointerTy(DAG.getDataLayout()))));
+ } else {
+ // If we are tail calling and generating PIC/GOT style code load the
+ // address of the callee into ECX. The value in ecx is used as target of
+ // the tail jump. This is done to circumvent the ebx/callee-saved problem
+ // for tail calls on PIC/GOT architectures. Normally we would just put the
+ // address of GOT into ebx and then call target@PLT. But for tail calls
+ // ebx would be restored (since ebx is callee saved) before jumping to the
+ // target@PLT.
+
+ // Note: The actual moving to ECX is done further down.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (G && !G->getGlobal()->hasLocalLinkage() &&
+ G->getGlobal()->hasDefaultVisibility())
+ Callee = LowerGlobalAddress(Callee, DAG);
+ else if (isa<ExternalSymbolSDNode>(Callee))
+ Callee = LowerExternalSymbol(Callee, DAG);
+ }
+ }
+
+ if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
+ (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+
+ // Count the number of XMM registers allocated.
+ static const MCPhysReg XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
+ assert((Subtarget.hasSSE1() || !NumXMMRegs)
+ && "SSE registers cannot be used when SSE is disabled");
+ RegsToPass.push_back(std::make_pair(Register(X86::AL),
+ DAG.getConstant(NumXMMRegs, dl,
+ MVT::i8)));
+ }
+
+ if (isVarArg && IsMustTail) {
+ const auto &Forwards = X86Info->getForwardedMustTailRegParms();
+ for (const auto &F : Forwards) {
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
+ RegsToPass.push_back(std::make_pair(F.PReg, Val));
+ }
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
+ // don't need this because the eligibility check rejects calls that require
+ // shuffling arguments passed in memory.
+ if (!IsSibcall && isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
+
+ SmallVector<SDValue, 8> MemOpChains2;
+ SDValue FIN;
+ int FI = 0;
+ for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
+ ++I, ++OutsIndex) {
+ CCValAssign &VA = ArgLocs[I];
+
+ if (VA.isRegLoc()) {
+ if (VA.needsCustom()) {
+ assert((CallConv == CallingConv::X86_RegCall) &&
+ "Expecting custom case only in regcall calling convention");
+ // This means that we are in special case where one argument was
+ // passed through two register locations - Skip the next location
+ ++I;
+ }
+
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+ SDValue Arg = OutVals[OutsIndex];
+ ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
+ // Skip inalloca/preallocated arguments. They don't require any work.
+ if (Flags.isInAlloca() || Flags.isPreallocated())
+ continue;
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+ FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
+ FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+
+ if (Flags.isByVal()) {
+ // Copy relative to framepointer.
+ SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
+ if (!StackPtr.getNode())
+ StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+ getPointerTy(DAG.getDataLayout()));
+ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ StackPtr, Source);
+
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
+ ArgChain,
+ Flags, DAG, dl));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(DAG.getStore(
+ ArgChain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
+ }
+ }
+
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
+ getPointerTy(DAG.getDataLayout()),
+ RegInfo->getSlotSize(), FPDiff, dl);
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and glue operands which copy the outgoing args into registers.
+ SDValue InGlue;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
+ }
+
+ if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
+ assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+ // In the 64-bit large code model, we have to make all calls
+ // through a register, since the call instruction's 32-bit
+ // pc-relative offset may not be large enough to hold the whole
+ // address.
+ } else if (Callee->getOpcode() == ISD::GlobalAddress ||
+ Callee->getOpcode() == ISD::ExternalSymbol) {
+ // Lower direct calls to global addresses and external symbols. Setting
+ // ForCall to true here has the effect of removing WrapperRIP when possible
+ // to allow direct calls to be selected without first materializing the
+ // address into a register.
+ Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
+ } else if (Subtarget.isTarget64BitILP32() &&
+ Callee.getValueType() == MVT::i32) {
+ // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
+ Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
+ }
+
+ // Returns a chain & a glue for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+
+ if (!IsSibcall && isTailCall && !IsMustTail) {
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
+ InGlue = Chain.getValue(1);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (isTailCall)
+ Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const uint32_t *Mask = [&]() {
+ auto AdaptedCC = CallConv;
+ // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
+ // use X86_INTR calling convention because it has the same CSR mask
+ // (same preserved registers).
+ if (HasNCSR)
+ AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
+ // If NoCalleeSavedRegisters is requested, than use GHC since it happens
+ // to use the CSR_NoRegs_RegMask.
+ if (CB && CB->hasFnAttr("no_callee_saved_registers"))
+ AdaptedCC = (CallingConv::ID)CallingConv::GHC;
+ return RegInfo->getCallPreservedMask(MF, AdaptedCC);
+ }();
+ assert(Mask && "Missing call preserved mask for calling convention");
+
+ // If this is an invoke in a 32-bit function using a funclet-based
+ // personality, assume the function clobbers all registers. If an exception
+ // is thrown, the runtime will not restore CSRs.
+ // FIXME: Model this more precisely so that we can register allocate across
+ // the normal edge and spill and fill across the exceptional edge.
+ if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
+ const Function &CallerFn = MF.getFunction();
+ EHPersonality Pers =
+ CallerFn.hasPersonalityFn()
+ ? classifyEHPersonality(CallerFn.getPersonalityFn())
+ : EHPersonality::Unknown;
+ if (isFuncletEHPersonality(Pers))
+ Mask = RegInfo->getNoPreservedMask();
+ }
+
+ // Define a new register mask from the existing mask.
+ uint32_t *RegMask = nullptr;
+
+ // In some calling conventions we need to remove the used physical registers
+ // from the reg mask. Create a new RegMask for such calling conventions.
+ // RegMask for calling conventions that disable only return registers (e.g.
+ // preserve_most) will be modified later in LowerCallResult.
+ bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
+ if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+ // Allocate a new Reg Mask and copy Mask.
+ RegMask = MF.allocateRegMask();
+ unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
+
+ // Make sure all sub registers of the argument registers are reset
+ // in the RegMask.
+ if (ShouldDisableArgRegs) {
+ for (auto const &RegPair : RegsToPass)
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
+ RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
+ }
+
+ // Create the RegMask Operand according to our updated mask.
+ Ops.push_back(DAG.getRegisterMask(RegMask));
+ } else {
+ // Create the RegMask Operand according to the static mask.
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
+
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
+
+ if (isTailCall) {
+ // We used to do:
+ //// If this is the first return lowered for this function, add the regs
+ //// to the liveout set for the function.
+ // This isn't right, although it's probably harmless on x86; liveouts
+ // should be computed from returns not tail calls. Consider a void
+ // function making a tail call to a function returning int.
+ MF.getFrameInfo().setHasTailCall();
+ SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
+
+ if (IsCFICall)
+ Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
+
+ DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
+ DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
+ return Ret;
+ }
+
+ if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
+ Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
+ } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
+ // Calls with a "clang.arc.attachedcall" bundle are special. They should be
+ // expanded to the call, directly followed by a special marker sequence and
+ // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
+ assert(!isTailCall &&
+ "tail calls cannot be marked with clang.arc.attachedcall");
+ assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
+
+ // Add a target global address for the retainRV/claimRV runtime function
+ // just before the call target.
+ Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
+ Ops.insert(Ops.begin() + 1, GA);
+ Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
+ } else {
+ Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
+ }
+
+ if (IsCFICall)
+ Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
+
+ InGlue = Chain.getValue(1);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
+ DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
+
+ // Save heapallocsite metadata.
+ if (CLI.CB)
+ if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
+ DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ DAG.getTarget().Options.GuaranteedTailCallOpt))
+ NumBytesForCalleeToPop = NumBytes; // Callee pops everything
+ else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
+ // If this call passes a struct-return pointer, the callee
+ // pops that struct pointer.
+ NumBytesForCalleeToPop = 4;
+
+ // Returns a glue for retval copy to use.
+ if (!IsSibcall) {
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
+ InGlue, dl);
+ InGlue = Chain.getValue(1);
+ }
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, RegMask);
+}
+
+//===----------------------------------------------------------------------===//
+// Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+// Like std call, callee cleans arguments, convention except that ECX is
+// reserved for storing the tail called function address. Only 2 registers are
+// free for argument passing (inreg). Tail call optimization is performed
+// provided:
+// * tailcallopt is enabled
+// * caller/callee are fastcc
+// On X86_64 architecture with GOT-style position independent code only local
+// (within module) calls are supported at the moment.
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
+// If a tail called function callee has more arguments than the caller the
+// caller needs to make sure that there is room to move the RETADDR to. This is
+// achieved by reserving an area the size of the argument delta right after the
+// original RETADDR, but before the saved framepointer or the spilled registers
+// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+// stack layout:
+// arg1
+// arg2
+// RETADDR
+// [ new RETADDR
+// move area ]
+// (possible EBP)
+// ESI
+// EDI
+// local1 ..
+
+/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
+/// requirement.
+unsigned
+X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
+ SelectionDAG &DAG) const {
+ const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
+ const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
+ assert(StackSize % SlotSize == 0 &&
+ "StackSize must be a multiple of SlotSize");
+ return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
+}
+
+/// Return true if the given stack call argument is already available in the
+/// same position (relatively) of the caller's incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
+ const X86InstrInfo *TII, const CCValAssign &VA) {
+ unsigned Bytes = Arg.getValueSizeInBits() / 8;
+
+ for (;;) {
+ // Look through nodes that don't alter the bits of the incoming value.
+ unsigned Op = Arg.getOpcode();
+ if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
+ Op == ISD::AssertZext) {
+ Arg = Arg.getOperand(0);
+ continue;
+ }
+ if (Op == ISD::TRUNCATE) {
+ const SDValue &TruncInput = Arg.getOperand(0);
+ if (TruncInput.getOpcode() == ISD::AssertZext &&
+ cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
+ Arg.getValueType()) {
+ Arg = TruncInput.getOperand(0);
+ continue;
+ }
+ }
+ break;
+ }
+
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!VR.isVirtual())
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(*Def, FI))
+ return false;
+ } else {
+ unsigned Opcode = Def->getOpcode();
+ if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
+ Opcode == X86::LEA64_32r) &&
+ Def->getOperand(1).isFI()) {
+ FI = Def->getOperand(1).getIndex();
+ Bytes = Flags.getByValSize();
+ } else
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
+ FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
+ FI = FINode->getIndex();
+ Bytes = Flags.getByValSize();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI.isFixedObjectIndex(FI))
+ return false;
+
+ if (Offset != MFI.getObjectOffset(FI))
+ return false;
+
+ // If this is not byval, check that the argument stack object is immutable.
+ // inalloca and argument copy elision can create mutable argument stack
+ // objects. Byval objects can be mutated, but a byval call intends to pass the
+ // mutated memory.
+ if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
+ return false;
+
+ if (VA.getLocVT().getFixedSizeInBits() >
+ Arg.getValueSizeInBits().getFixedValue()) {
+ // If the argument location is wider than the argument type, check that any
+ // extension flags match.
+ if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
+ Flags.isSExt() != MFI.isObjectSExt(FI)) {
+ return false;
+ }
+ }
+
+ return Bytes == MFI.getObjectSize(FI);
+}
+
+/// Check whether the call is eligible for tail call optimization. Targets
+/// that want to do tail call optimization should implement this function.
+bool X86TargetLowering::IsEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
+ bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
+ if (!mayTailCallThisCC(CalleeCC))
+ return false;
+
+ // If -tailcallopt is specified, make fastcc functions tail-callable.
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function &CallerF = MF.getFunction();
+
+ // If the function return type is x86_fp80 and the callee return type is not,
+ // then the FP_EXTEND of the call result is not a nop. It's not safe to
+ // perform a tailcall optimization here.
+ if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
+ return false;
+
+ CallingConv::ID CallerCC = CallerF.getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+ bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
+ bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
+ bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
+ CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
+
+ // Win64 functions have extra shadow space for argument homing. Don't do the
+ // sibcall if the caller and callee have mismatched expectations for this
+ // space.
+ if (IsCalleeWin64 != IsCallerWin64)
+ return false;
+
+ if (IsGuaranteeTCO) {
+ if (canGuaranteeTCO(CalleeCC) && CCMatch)
+ return true;
+ return false;
+ }
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+ // emit a special epilogue.
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ if (RegInfo->hasStackRealignment(MF))
+ return false;
+
+ // Also avoid sibcall optimization if we're an sret return fn and the callee
+ // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
+ // insufficient.
+ if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
+ // For a compatible tail call the callee must return our sret pointer. So it
+ // needs to be (a) an sret function itself and (b) we pass our sret as its
+ // sret. Condition #b is harder to determine.
+ return false;
+ } else if (IsCalleePopSRet)
+ // The callee pops an sret, so we cannot tail-call, as our caller doesn't
+ // expect that.
+ return false;
+
+ // Do not sibcall optimize vararg calls unless all arguments are passed via
+ // registers.
+ LLVMContext &C = *DAG.getContext();
+ if (isVarArg && !Outs.empty()) {
+ // Optimizing for varargs on Win64 is unlikely to be safe without
+ // additional testing.
+ if (IsCalleeWin64 || IsCallerWin64)
+ return false;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ for (const auto &VA : ArgLocs)
+ if (!VA.isRegLoc())
+ return false;
+ }
+
+ // If the call result is in ST0 / ST1, it needs to be popped off the x87
+ // stack. Therefore, if it's not used by the call it is not safe to optimize
+ // this into a sibcall.
+ bool Unused = false;
+ for (const auto &In : Ins) {
+ if (!In.Used) {
+ Unused = true;
+ break;
+ }
+ }
+ if (Unused) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (const auto &VA : RVLocs) {
+ if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
+ return false;
+ }
+ }
+
+ // Check that the call results are passed in the same way.
+ if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
+ RetCC_X86, RetCC_X86))
+ return false;
+ // The callee has to preserve all registers the caller needs to preserve.
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
+ if (!CCMatch) {
+ const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
+ return false;
+ }
+
+ unsigned StackArgsSize = 0;
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
+
+ // Allocate shadow area for Win64
+ if (IsCalleeWin64)
+ CCInfo.AllocateStack(32, Align(8));
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ StackArgsSize = CCInfo.getStackSize();
+
+ if (CCInfo.getStackSize()) {
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ const CCValAssign &VA = ArgLocs[I];
+ SDValue Arg = OutVals[I];
+ ISD::ArgFlagsTy Flags = Outs[I].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
+ TII, VA))
+ return false;
+ }
+ }
+ }
+
+ bool PositionIndependent = isPositionIndependent();
+ // If the tailcall address may be in a register, then make sure it's
+ // possible to register allocate for it. In 32-bit, the call address can
+ // only target EAX, EDX, or ECX since the tail call must be scheduled after
+ // callee-saved registers are restored. These happen to be the same
+ // registers used to pass 'inreg' arguments so watch out for those.
+ if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) ||
+ PositionIndependent)) {
+ unsigned NumInRegs = 0;
+ // In PIC we need an extra register to formulate the address computation
+ // for the callee.
+ unsigned MaxInRegs = PositionIndependent ? 2 : 3;
+
+ for (const auto &VA : ArgLocs) {
+ if (!VA.isRegLoc())
+ continue;
+ Register Reg = VA.getLocReg();
+ switch (Reg) {
+ default: break;
+ case X86::EAX: case X86::EDX: case X86::ECX:
+ if (++NumInRegs == MaxInRegs)
+ return false;
+ break;
+ }
+ }
+ }
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
+ return false;
+ }
+
+ bool CalleeWillPop =
+ X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
+ MF.getTarget().Options.GuaranteedTailCallOpt);
+
+ if (unsigned BytesToPop =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
+ // If we have bytes to pop, the callee must pop them.
+ bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
+ if (!CalleePopMatches)
+ return false;
+ } else if (CalleeWillPop && StackArgsSize > 0) {
+ // If we don't have bytes to pop, make sure the callee doesn't pop any.
+ return false;
+ }
+
+ return true;
+}
+
+/// Determines whether the callee is required to pop its own arguments.
+/// Callee pop is necessary to support tail calls.
+bool X86::isCalleePop(CallingConv::ID CallingConv,
+ bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
+ // If GuaranteeTCO is true, we force some calls to be callee pop so that we
+ // can guarantee TCO.
+ if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
+ return true;
+
+ switch (CallingConv) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::X86_VectorCall:
+ return !is64Bit;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index 3baf73344b62..785bdd83cd99 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -169,7 +169,7 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
break;
} else if (I->isEHLabel()) {
// Old Landingpad BB (is not Landingpad now) with
- // the the old "callee" EHLabel.
+ // the old "callee" EHLabel.
MCSymbol *Sym = I->getOperand(0).getMCSymbol();
if (!MF.hasCallSiteLandingPad(Sym))
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
index 29ae05bf0c94..3e11ab2d98a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -110,6 +110,11 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
Prefetches &Prefetches) const {
assert(Prefetches.empty() &&
"Expected caller passed empty PrefetchInfo vector.");
+
+ // There is no point to match prefetch hints if the profile is using MD5.
+ if (FunctionSamples::UseMD5)
+ return false;
+
static constexpr std::pair<StringLiteral, unsigned> HintTypes[] = {
{"_nta_", X86::PREFETCHNTA},
{"_t0_", X86::PREFETCHT0},
@@ -125,12 +130,12 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
// Convert serialized prefetch hints into PrefetchInfo objects, and populate
// the Prefetches vector.
for (const auto &S_V : *T) {
- StringRef Name = S_V.getKey();
+ StringRef Name = S_V.first.stringRef();
if (Name.consume_front(SerializedPrefetchPrefix)) {
int64_t D = static_cast<int64_t>(S_V.second);
unsigned IID = 0;
for (const auto &HintType : HintTypes) {
- if (Name.startswith(HintType.first)) {
+ if (Name.starts_with(HintType.first)) {
Name = Name.drop_front(HintType.first.size());
IID = HintType.second;
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm-project/llvm/lib/Target/X86/X86Instr3DNow.td
index cd1b06365971..3be03ab0f433 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Instr3DNow.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -79,21 +79,11 @@ let SchedRW = [WriteEMMS],
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
[(int_x86_mmx_femms)]>, TB;
-// PREFETCHWT1 is supported we want to use it for everything but T0.
-def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{
- return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
-}]>;
-
-// Use PREFETCHWT1 for NTA, T2, T1.
-def PrefetchWT1Level : ImmLeaf<i32, [{
- return Imm < 3;
-}]>;
-
let SchedRW = [WriteLoad] in {
let Predicates = [Has3DNow, NoSSEPrefetch] in
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
"prefetch\t$addr",
- [(prefetch addr:$addr, imm, imm, (i32 1))]>, TB;
+ [(prefetch addr:$addr, timm, timm, (i32 1))]>, TB;
def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index b5dac7a0c65a..e1fe2b680b96 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12,194 +12,6 @@
//
//===----------------------------------------------------------------------===//
-// Group template arguments that can be derived from the vector type (EltNum x
-// EltVT). These are things like the register class for the writemask, etc.
-// The idea is to pass one of these as the template argument rather than the
-// individual arguments.
-// The template is also used for scalar types, in this case numelts is 1.
-class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
- string suffix = ""> {
- RegisterClass RC = rc;
- ValueType EltVT = eltvt;
- int NumElts = numelts;
-
- // Corresponding mask register class.
- RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
-
- // Corresponding mask register pair class.
- RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
- !cast<RegisterOperand>("VK" # NumElts # "Pair"));
-
- // Corresponding write-mask register class.
- RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
-
- // The mask VT.
- ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
-
- // Suffix used in the instruction mnemonic.
- string Suffix = suffix;
-
- // VTName is a string name for vector VT. For vector types it will be
- // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
- // It is a little bit complex for scalar types, where NumElts = 1.
- // In this case we build v4f32 or v2f64
- string VTName = "v" # !if (!eq (NumElts, 1),
- !if (!eq (EltVT.Size, 16), 8,
- !if (!eq (EltVT.Size, 32), 4,
- !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
-
- // The vector VT.
- ValueType VT = !cast<ValueType>(VTName);
-
- string EltTypeName = !cast<string>(EltVT);
- // Size of the element type in bits, e.g. 32 for v16i32.
- string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName)));
- int EltSize = EltVT.Size;
-
- // "i" for integer types and "f" for floating-point types
- string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName));
-
- // Size of RC in bits, e.g. 512 for VR512.
- int Size = VT.Size;
-
- // The corresponding memory operand, e.g. i512mem for VR512.
- X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
- X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem");
- // FP scalar memory operand for intrinsics - ssmem/sdmem.
- Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
- !if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"),
- !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
- !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))));
-
- // Load patterns
- PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
-
- PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
-
- PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName));
- PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
-
- PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
- !if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"),
- !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
- !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))));
-
- // The string to specify embedded broadcast in assembly.
- string BroadcastStr = "{1to" # NumElts # "}";
-
- // 8-bit compressed displacement tuple/subvector format. This is only
- // defined for NumElts <= 8.
- CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
- !cast<CD8VForm>("CD8VT" # NumElts), ?);
-
- SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
- !if (!eq (Size, 256), sub_ymm, ?));
-
- Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
- !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
- !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
- !if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME?
- SSEPackedInt))));
-
- RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
- !if (!eq (EltTypeName, "f16"), FR16X,
- !if (!eq (EltTypeName, "bf16"), FR16X,
- FR64X)));
-
- dag ImmAllZerosV = (VT immAllZerosV);
-
- string ZSuffix = !if (!eq (Size, 128), "Z128",
- !if (!eq (Size, 256), "Z256", "Z"));
-}
-
-def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
-def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
-def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
-def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
-def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
-def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">;
-def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
-def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
-
-// "x" in v32i8x_info means RC = VR256X
-def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
-def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
-def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
-def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
-def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
-def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">;
-def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
-def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
-
-def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
-def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
-def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
-def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
-def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
-def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf">;
-def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
-def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
-
-// We map scalar types to the smallest (128-bit) vector type
-// with the appropriate element type. This allows to use the same masking logic.
-def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
-def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
-def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">;
-def bf16x_info : X86VectorVTInfo<1, bf16, VR128X, "sbf">;
-def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
-def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
-
-class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
- X86VectorVTInfo i128> {
- X86VectorVTInfo info512 = i512;
- X86VectorVTInfo info256 = i256;
- X86VectorVTInfo info128 = i128;
-}
-
-def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
- v16i8x_info>;
-def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
- v8i16x_info>;
-def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
- v4i32x_info>;
-def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
- v2i64x_info>;
-def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
- v8f16x_info>;
-def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info,
- v8bf16x_info>;
-def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
- v4f32x_info>;
-def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
- v2f64x_info>;
-
-class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
- ValueType _vt> {
- RegisterClass KRC = _krc;
- RegisterClass KRCWM = _krcwm;
- ValueType KVT = _vt;
-}
-
-def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
-def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
-def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
-def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
-def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
-def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
-def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
-
-// Used for matching masked operations. Ensures the operation part only has a
-// single use.
-def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
- (vselect node:$mask, node:$src1, node:$src2), [{
- return isProfitableToFormMaskedOp(N);
-}]>;
-
-def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
- (X86selects node:$mask, node:$src1, node:$src2), [{
- return isProfitableToFormMaskedOp(N);
-}]>;
-
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
// It assumes custom ISel patterns for masking which can be provided as
@@ -1812,19 +1624,19 @@ multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo ShuffleMask> {
- defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
- ShuffleMask.info512>,
- avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
- ShuffleMask.info512>, EVEX_V512;
+ defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
+ ShuffleMask.info512>,
+ avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
+ ShuffleMask.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
- defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
- ShuffleMask.info128>,
- avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
- ShuffleMask.info128>, EVEX_V128;
- defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
- ShuffleMask.info256>,
- avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
- ShuffleMask.info256>, EVEX_V256;
+ defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
+ ShuffleMask.info128>,
+ avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
+ ShuffleMask.info128>, EVEX_V128;
+ defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
+ ShuffleMask.info256>,
+ avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
+ ShuffleMask.info256>, EVEX_V256;
}
}
@@ -1834,13 +1646,13 @@ multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo Idx,
Predicate Prd> {
let Predicates = [Prd] in
- defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
- Idx.info512>, EVEX_V512;
+ defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
+ Idx.info512>, EVEX_V512;
let Predicates = [Prd, HasVLX] in {
- defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
- Idx.info128>, EVEX_V128;
- defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
- Idx.info256>, EVEX_V256;
+ defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
+ Idx.info128>, EVEX_V128;
+ defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
+ Idx.info256>, EVEX_V256;
}
}
@@ -1890,9 +1702,9 @@ multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
}
// TODO: Should we add more casts? The vXi64 case is common due to ABI.
-defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
-defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
-defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
+defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>;
+defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>;
+defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>;
// VPERMT2
multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
@@ -1931,19 +1743,19 @@ multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo ShuffleMask> {
- defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
- ShuffleMask.info512>,
- avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
- ShuffleMask.info512>, EVEX_V512;
+ defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
+ ShuffleMask.info512>,
+ avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
+ ShuffleMask.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
- defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
- ShuffleMask.info128>,
- avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
- ShuffleMask.info128>, EVEX_V128;
- defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
- ShuffleMask.info256>,
- avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
- ShuffleMask.info256>, EVEX_V256;
+ defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
+ ShuffleMask.info128>,
+ avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
+ ShuffleMask.info128>, EVEX_V128;
+ defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
+ ShuffleMask.info256>,
+ avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
+ ShuffleMask.info256>, EVEX_V256;
}
}
@@ -1952,13 +1764,13 @@ multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo Idx, Predicate Prd> {
let Predicates = [Prd] in
- defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
- Idx.info512>, EVEX_V512;
+ defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
+ Idx.info512>, EVEX_V512;
let Predicates = [Prd, HasVLX] in {
- defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
- Idx.info128>, EVEX_V128;
- defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
- Idx.info256>, EVEX_V256;
+ defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
+ Idx.info128>, EVEX_V128;
+ defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
+ Idx.info256>, EVEX_V256;
}
}
@@ -2157,15 +1969,6 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
}
}
-def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
- (X86cmpms node:$src1, node:$src2, node:$cc), [{
- return N->hasOneUse();
-}]>;
-def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
- (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
- return N->hasOneUse();
-}]>;
-
let Predicates = [HasAVX512] in {
let ExeDomain = SSEPackedSingle in
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
@@ -2261,12 +2064,6 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
}
}
-// This fragment treats X86cmpm as commutable to help match loads in both
-// operands for PCMPEQ.
-def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
-def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
- (setcc node:$src1, node:$src2, SETGT)>;
-
// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
// increase the pattern complexity the way an immediate would.
let AddedComplexity = 2 in {
@@ -2304,20 +2101,6 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
T8PD, REX_W, EVEX_CD8<64, CD8VF>;
}
-def X86pcmpm_imm : SDNodeXForm<setcc, [{
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- uint8_t SSECC = X86::getVPCMPImmForCond(CC);
- return getI8Imm(SSECC, SDLoc(N));
-}]>;
-
-// Swapped operand version of the above.
-def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- uint8_t SSECC = X86::getVPCMPImmForCond(CC);
- SSECC = X86::getSwappedVPCMPImm(SSECC);
- return getI8Imm(SSECC, SDLoc(N));
-}]>;
-
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
PatFrag Frag_su,
X86FoldableSchedWrite sched,
@@ -2451,30 +2234,6 @@ multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
}
}
-def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
- (setcc node:$src1, node:$src2, node:$cc), [{
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- return !ISD::isUnsignedIntSetCC(CC);
-}], X86pcmpm_imm>;
-
-def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
- (setcc node:$src1, node:$src2, node:$cc), [{
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
-}], X86pcmpm_imm>;
-
-def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
- (setcc node:$src1, node:$src2, node:$cc), [{
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- return ISD::isUnsignedIntSetCC(CC);
-}], X86pcmpm_imm>;
-
-def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
- (setcc node:$src1, node:$src2, node:$cc), [{
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
-}], X86pcmpm_imm>;
-
// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
@@ -2504,16 +2263,6 @@ defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
SchedWriteVecALU, avx512vl_i64_info,
HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
-def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
- (X86cmpm node:$src1, node:$src2, node:$cc), [{
- return N->hasOneUse();
-}]>;
-
-def X86cmpm_imm_commute : SDNodeXForm<timm, [{
- uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
- return getI8Imm(Imm, SDLoc(N));
-}]>;
-
multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
let Uses = [MXCSR], mayRaiseFPException = 1 in {
@@ -2679,16 +2428,6 @@ let Predicates = [HasFP16] in {
// ----------------------------------------------------------------
// FPClass
-def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
- (X86Vfpclasss node:$src1, node:$src2), [{
- return N->hasOneUse();
-}]>;
-
-def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
- (X86Vfpclass node:$src1, node:$src2), [{
- return N->hasOneUse();
-}]>;
-
//handle fpclass instruction mask = op(reg_scalar,imm)
// op(mem_scalar,imm)
multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
@@ -2853,46 +2592,55 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E
// - copy from GPR to mask register and vice versa
//
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
- string OpcodeStr, RegisterClass KRC,
- ValueType vvt, X86MemOperand x86memop> {
- let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
- def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[WriteMove]>;
- def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set KRC:$dst, (vvt (load addr:$src)))]>,
- Sched<[WriteLoad]>;
- def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store KRC:$src, addr:$dst)]>,
- Sched<[WriteStore]>;
+ string OpcodeStr, RegisterClass KRC, ValueType vvt,
+ X86MemOperand x86memop, string Suffix = ""> {
+ let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
+ explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
+ def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ Sched<[WriteMove]>;
+ def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set KRC:$dst, (vvt (load addr:$src)))]>,
+ Sched<[WriteLoad]>;
+ def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store KRC:$src, addr:$dst)]>,
+ Sched<[WriteStore]>;
}
multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
- string OpcodeStr,
- RegisterClass KRC, RegisterClass GRC> {
+ string OpcodeStr, RegisterClass KRC,
+ RegisterClass GRC, string Suffix = ""> {
let hasSideEffects = 0 in {
- def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[WriteMove]>;
- def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[WriteMove]>;
+ def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ Sched<[WriteMove]>;
+ def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ Sched<[WriteMove]>;
}
}
-let Predicates = [HasDQI] in
+let Predicates = [HasDQI, NoEGPR] in
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
VEX, PD;
+let Predicates = [HasDQI, HasEGPR, In64BitMode] in
+ defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
+ EVEX, PD;
-let Predicates = [HasAVX512] in
+let Predicates = [HasAVX512, NoEGPR] in
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
VEX, PS;
+let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
+ defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
+ EVEX, PS;
-let Predicates = [HasBWI] in {
+let Predicates = [HasBWI, NoEGPR] in {
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
VEX, PD, REX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
@@ -2902,6 +2650,16 @@ let Predicates = [HasBWI] in {
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
VEX, XD, REX_W;
}
+let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
+ defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
+ EVEX, PD, REX_W;
+ defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
+ EVEX, XD;
+ defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
+ EVEX, PS, REX_W;
+ defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
+ EVEX, XD, REX_W;
+}
// GR from/to mask register
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
@@ -3063,10 +2821,6 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PS;
}
-// These nodes use 'vnot' instead of 'not' to support vectors.
-def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
-def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
-
// TODO - do we need a X86SchedWriteWidths::KMASK type?
defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
@@ -9861,19 +9615,6 @@ defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
// Integer truncate and extend operations
//-------------------------------------------------
-// PatFrags that contain a select and a truncate op. The take operands in the
-// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
-// either to the multiclasses.
-def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
- (vselect_mask node:$mask,
- (trunc node:$src), node:$src0)>;
-def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
- (vselect_mask node:$mask,
- (X86vtruncs node:$src), node:$src0)>;
-def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
- (vselect_mask node:$mask,
- (X86vtruncus node:$src), node:$src0)>;
-
multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
@@ -12657,11 +12398,6 @@ defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
-def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
- (X86Vpshufbitqmb node:$src1, node:$src2), [{
- return N->hasOneUse();
-}]>;
-
multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
(ins VTI.RC:$src1, VTI.RC:$src2),
@@ -12855,8 +12591,10 @@ multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _
}
}
+let ExeDomain = SSEPackedInt in {
defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
+}
multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
index 9cde6f559886..8c355e84a065 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -48,16 +48,6 @@ def PLEA64r : PseudoI<(outs GR64:$dst), (ins anymem:$src), []>;
// Fixed-Register Multiplication and Division Instructions.
//
-// SchedModel info for instruction that loads one value and gets the second
-// (and possibly third) value from a register.
-// This is used for instructions that put the memory operands before other
-// uses.
-class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded,
- // Memory operand.
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- // Register reads (implicit or explicit).
- Sched.ReadAfterFold, Sched.ReadAfterFold]>;
-
// BinOpRR - Binary instructions with inputs "reg, reg".
class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
@@ -506,34 +496,23 @@ class IMulOpRMI<bits<8> opcode, string mnemonic, X86TypeInfo info,
let ImmT = info.ImmEncoding;
}
-def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86add_flag node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
-def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86sub_flag node:$lhs, node:$rhs), [{
- // Only use DEC if the result is used.
- return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
-let CodeSize = 1, hasSideEffects = 0 in {
+let hasSideEffects = 0 in {
def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>;
def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>;
-} // CodeSize = 1, hasSideEffects = 0
+} // hasSideEffects = 0
-let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
+let isConvertibleToThreeAddress = 1 in { // Can xform into LEA.
def INC8r : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>;
def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>;
def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>;
def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>;
-} // isConvertibleToThreeAddress = 1, CodeSize = 2
+} // isConvertibleToThreeAddress = 1
} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2, SchedRW = [WriteALURMW] in {
+let SchedRW = [WriteALURMW] in {
let Predicates = [UseIncDec] in {
def INC8m : INCDECM<MRM0m, "inc", Xi8, 1>;
def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>;
@@ -542,24 +521,24 @@ let Predicates = [UseIncDec] in {
let Predicates = [UseIncDec, In64BitMode] in {
def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>;
} // Predicates
-} // CodeSize = 2, SchedRW
+} // SchedRW
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
-let CodeSize = 1, hasSideEffects = 0 in {
+let hasSideEffects = 0 in {
def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>;
def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>;
-} // CodeSize = 1, hasSideEffects = 0
+} // hasSideEffects = 0
-let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
+let isConvertibleToThreeAddress = 1 in { // Can xform into LEA.
def DEC8r : INCDECR<MRM1r, "dec", Xi8, X86sub_flag_nocf>;
def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>;
def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>;
def DEC64r : INCDECR<MRM1r, "dec", Xi64, X86sub_flag_nocf>;
-} // isConvertibleToThreeAddress = 1, CodeSize = 2
+} // isConvertibleToThreeAddress = 1
} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2, SchedRW = [WriteALURMW] in {
+let SchedRW = [WriteALURMW] in {
let Predicates = [UseIncDec] in {
def DEC8m : INCDECM<MRM1m, "dec", Xi8, -1>;
def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>;
@@ -568,7 +547,7 @@ let Predicates = [UseIncDec] in {
let Predicates = [UseIncDec, In64BitMode] in {
def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>;
} // Predicates
-} // CodeSize = 2, SchedRW
+} // SchedRW
} // Defs = [EFLAGS]
// Extra precision multiplication
@@ -764,7 +743,6 @@ def IDIV64m: MulOpM<0xF7, MRM7m, "idiv", Xi64, WriteIDiv64, []>,
//
// unary instructions
-let CodeSize = 2 in {
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
def NEG8r : NegOpR<0xF6, "neg", Xi8>;
@@ -798,7 +776,6 @@ def NOT16m : NotOpM<0xF7, "not", Xi16>;
def NOT32m : NotOpM<0xF7, "not", Xi32>;
def NOT64m : NotOpM<0xF7, "not", Xi64>, Requires<[In64BitMode]>;
} // SchedRW
-} // CodeSize
/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
/// defined with "(set GPR:$dst, EFLAGS, (...".
@@ -1223,9 +1200,6 @@ def : Pat<(store (X86adc_flag i64relocImmSExt32_su:$src, (load addr:$dst), EFLAG
// generate a result. From an encoding perspective, they are very different:
// they don't have all the usual imm8 and REV forms, and are encoded into a
// different space.
-def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
- (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
-
let isCompare = 1 in {
let Defs = [EFLAGS] in {
let isCommutable = 1 in {
@@ -1291,21 +1265,34 @@ def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
//
multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
PatFrag ld_frag, X86FoldableSchedWrite sched> {
+let Predicates = [HasBMI, NoEGPR] in {
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
- Sched<[sched]>;
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
+ VEX_4V, Sched<[sched]>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, EFLAGS,
- (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS,
+ (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
+ VEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+}
+let Predicates = [HasBMI, HasEGPR, In64BitMode] in {
+ def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
+ EVEX_4V, Sched<[sched]>;
+ def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS,
+ (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+}
}
// Complexity is reduced to give and with immediate a chance to match first.
-let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in {
- defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS, VEX_4V;
- defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, REX_W;
+let Defs = [EFLAGS], AddedComplexity = -6 in {
+ defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS;
+ defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, REX_W;
}
let Predicates = [HasBMI], AddedComplexity = -6 in {
@@ -1325,6 +1312,7 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in {
+let Predicates = [HasBMI2, NoEGPR] in {
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
[]>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>;
@@ -1348,15 +1336,27 @@ let hasSideEffects = 0 in {
def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src),
[]>, Sched<[sched.Folded]>;
}
+let Predicates = [HasBMI2, HasEGPR, In64BitMode] in
+ def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
+ !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+ []>, T8XD, EVEX_4V, Sched<[WriteIMulH, sched]>;
+let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in
+ def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
+ !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+ []>, T8XD, EVEX_4V,
+ Sched<[WriteIMulHLd, sched.Folded,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Implicit read of EDX/RDX
+ sched.ReadAfterFold]>;
}
-
-let Predicates = [HasBMI2] in {
- let Uses = [EDX] in
- defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
- let Uses = [RDX] in
- defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;
}
+let Uses = [EDX] in
+ defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
+let Uses = [RDX] in
+ defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;
+
//===----------------------------------------------------------------------===//
// ADCX and ADOX Instructions
//
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
index 52750937c425..457833f8cc33 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -786,16 +786,6 @@ defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;
defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;
defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
-def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86lock_add node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 0));
-}]>;
-
-def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86lock_sub node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 0));
-}]>;
-
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
SchedRW = [WriteALURMW] in {
let Predicates = [UseIncDec] in {
@@ -1071,30 +1061,28 @@ defm LXADD : ATOMIC_RMW_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add">, TB, LOCK;
* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
*/
multiclass RELEASE_BINOP_MI<string Name, SDNode op> {
- def : Pat<(atomic_store_8 addr:$dst,
- (op (atomic_load_8 addr:$dst), (i8 imm:$src))),
+ def : Pat<(atomic_store_8 (op (atomic_load_8 addr:$dst), (i8 imm:$src)),
+ addr:$dst),
(!cast<Instruction>(Name#"8mi") addr:$dst, imm:$src)>;
- def : Pat<(atomic_store_16 addr:$dst,
- (op (atomic_load_16 addr:$dst), (i16 imm:$src))),
+ def : Pat<(atomic_store_16 (op (atomic_load_16 addr:$dst), (i16 imm:$src)),
+ addr:$dst),
(!cast<Instruction>(Name#"16mi") addr:$dst, imm:$src)>;
- def : Pat<(atomic_store_32 addr:$dst,
- (op (atomic_load_32 addr:$dst), (i32 imm:$src))),
+ def : Pat<(atomic_store_32 (op (atomic_load_32 addr:$dst), (i32 imm:$src)),
+ addr:$dst),
(!cast<Instruction>(Name#"32mi") addr:$dst, imm:$src)>;
- def : Pat<(atomic_store_64 addr:$dst,
- (op (atomic_load_64 addr:$dst), (i64immSExt32:$src))),
+ def : Pat<(atomic_store_64 (op (atomic_load_64 addr:$dst), (i64immSExt32:$src)),
+ addr:$dst),
(!cast<Instruction>(Name#"64mi32") addr:$dst, (i64immSExt32:$src))>;
-
- def : Pat<(atomic_store_8 addr:$dst,
- (op (atomic_load_8 addr:$dst), (i8 GR8:$src))),
+ def : Pat<(atomic_store_8 (op (atomic_load_8 addr:$dst), (i8 GR8:$src)), addr:$dst),
(!cast<Instruction>(Name#"8mr") addr:$dst, GR8:$src)>;
- def : Pat<(atomic_store_16 addr:$dst,
- (op (atomic_load_16 addr:$dst), (i16 GR16:$src))),
+ def : Pat<(atomic_store_16 (op (atomic_load_16 addr:$dst), (i16 GR16:$src)),
+ addr:$dst),
(!cast<Instruction>(Name#"16mr") addr:$dst, GR16:$src)>;
- def : Pat<(atomic_store_32 addr:$dst,
- (op (atomic_load_32 addr:$dst), (i32 GR32:$src))),
+ def : Pat<(atomic_store_32 (op (atomic_load_32 addr:$dst), (i32 GR32:$src)),
+ addr:$dst),
(!cast<Instruction>(Name#"32mr") addr:$dst, GR32:$src)>;
- def : Pat<(atomic_store_64 addr:$dst,
- (op (atomic_load_64 addr:$dst), (i64 GR64:$src))),
+ def : Pat<(atomic_store_64 (op (atomic_load_64 addr:$dst), (i64 GR64:$src)),
+ addr:$dst),
(!cast<Instruction>(Name#"64mr") addr:$dst, GR64:$src)>;
}
defm : RELEASE_BINOP_MI<"ADD", add>;
@@ -1131,13 +1119,13 @@ defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>;
multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
dag dag64> {
- def : Pat<(atomic_store_8 addr:$dst, dag8),
+ def : Pat<(atomic_store_8 dag8, addr:$dst),
(!cast<Instruction>(Name#8m) addr:$dst)>;
- def : Pat<(atomic_store_16 addr:$dst, dag16),
+ def : Pat<(atomic_store_16 dag16, addr:$dst),
(!cast<Instruction>(Name#16m) addr:$dst)>;
- def : Pat<(atomic_store_32 addr:$dst, dag32),
+ def : Pat<(atomic_store_32 dag32, addr:$dst),
(!cast<Instruction>(Name#32m) addr:$dst)>;
- def : Pat<(atomic_store_64 addr:$dst, dag64),
+ def : Pat<(atomic_store_64 dag64, addr:$dst),
(!cast<Instruction>(Name#64m) addr:$dst)>;
}
@@ -1165,22 +1153,22 @@ defm : RELEASE_UNOP<"NOT",
(not (i32 (atomic_load_32 addr:$dst))),
(not (i64 (atomic_load_64 addr:$dst)))>;
-def : Pat<(atomic_store_8 addr:$dst, (i8 imm:$src)),
+def : Pat<(atomic_store_8 (i8 imm:$src), addr:$dst),
(MOV8mi addr:$dst, imm:$src)>;
-def : Pat<(atomic_store_16 addr:$dst, (i16 imm:$src)),
+def : Pat<(atomic_store_16 (i16 imm:$src), addr:$dst),
(MOV16mi addr:$dst, imm:$src)>;
-def : Pat<(atomic_store_32 addr:$dst, (i32 imm:$src)),
+def : Pat<(atomic_store_32 (i32 imm:$src), addr:$dst),
(MOV32mi addr:$dst, imm:$src)>;
-def : Pat<(atomic_store_64 addr:$dst, (i64immSExt32:$src)),
+def : Pat<(atomic_store_64 (i64immSExt32:$src), addr:$dst),
(MOV64mi32 addr:$dst, i64immSExt32:$src)>;
-def : Pat<(atomic_store_8 addr:$dst, GR8:$src),
+def : Pat<(atomic_store_8 GR8:$src, addr:$dst),
(MOV8mr addr:$dst, GR8:$src)>;
-def : Pat<(atomic_store_16 addr:$dst, GR16:$src),
+def : Pat<(atomic_store_16 GR16:$src, addr:$dst),
(MOV16mr addr:$dst, GR16:$src)>;
-def : Pat<(atomic_store_32 addr:$dst, GR32:$src),
+def : Pat<(atomic_store_32 GR32:$src, addr:$dst),
(MOV32mr addr:$dst, GR32:$src)>;
-def : Pat<(atomic_store_64 addr:$dst, GR64:$src),
+def : Pat<(atomic_store_64 GR64:$src, addr:$dst),
(MOV64mr addr:$dst, GR64:$src)>;
def : Pat<(i8 (atomic_load_8 addr:$src)), (MOV8rm addr:$src)>;
@@ -1189,18 +1177,18 @@ def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
// Floating point loads/stores.
-def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
+def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
-def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
+def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
(VMOVSSmr addr:$dst, FR32:$src)>, Requires<[UseAVX]>;
-def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
+def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
(VMOVSSZmr addr:$dst, FR32:$src)>, Requires<[HasAVX512]>;
-def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
+def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst),
(MOVSDmr addr:$dst, FR64:$src)>, Requires<[UseSSE2]>;
-def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
+def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst),
(VMOVSDmr addr:$dst, FR64:$src)>, Requires<[UseAVX]>;
-def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
+def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst),
(VMOVSDmr addr:$dst, FR64:$src)>, Requires<[HasAVX512]>;
def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
@@ -1306,31 +1294,6 @@ def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)),
// %r11. This happens when calling a vararg function with 6 arguments.
//
// Match an X86tcret that uses less than 7 volatile registers.
-def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
- (X86tcret node:$ptr, node:$off), [{
- // X86tcret args: (*chain, ptr, imm, regs..., glue)
- unsigned NumRegs = 0;
- for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
- if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
- return false;
- return true;
-}]>;
-
-def X86tcret_1reg : PatFrag<(ops node:$ptr, node:$off),
- (X86tcret node:$ptr, node:$off), [{
- // X86tcret args: (*chain, ptr, imm, regs..., glue)
- unsigned NumRegs = 1;
- const SDValue& BasePtr = cast<LoadSDNode>(N->getOperand(1))->getBasePtr();
- if (isa<FrameIndexSDNode>(BasePtr))
- NumRegs = 3;
- else if (BasePtr->getNumOperands() && isa<GlobalAddressSDNode>(BasePtr->getOperand(0)))
- NumRegs = 3;
- for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
- if (isa<RegisterSDNode>(N->getOperand(i)) && ( NumRegs-- == 0))
- return false;
- return true;
-}]>;
-
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, timm:$off)>,
Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
@@ -1451,32 +1414,8 @@ def : Pat<(i64 (anyext GR16:$src)),
def : Pat<(i64 (anyext GR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>;
-// If this is an anyext of the remainder of an 8-bit sdivrem, use a MOVSX
-// instead of a MOVZX. The sdivrem lowering will emit emit a MOVSX to move
-// %ah to the lower byte of a register. By using a MOVSX here we allow a
-// post-isel peephole to merge the two MOVSX instructions into one.
-def anyext_sdiv : PatFrag<(ops node:$lhs), (anyext node:$lhs),[{
- return (N->getOperand(0).getOpcode() == ISD::SDIVREM &&
- N->getOperand(0).getResNo() == 1);
-}]>;
def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>;
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying
-// anything about the upper 32 bits, they're probably just qualifying a
-// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit
-// operation will zero-extend up to 64 bits.
-def def32 : PatLeaf<(i32 GR32:$src), [{
- return N->getOpcode() != ISD::TRUNCATE &&
- N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
- N->getOpcode() != ISD::CopyFromReg &&
- N->getOpcode() != ISD::AssertSext &&
- N->getOpcode() != ISD::AssertZext &&
- N->getOpcode() != ISD::AssertAlign &&
- N->getOpcode() != ISD::FREEZE;
-}]>;
-
// In the case of a 32-bit def that is known to implicitly zero-extend,
// we can use a SUBREG_TO_REG.
def : Pat<(i64 (zext def32:$src)),
@@ -1494,17 +1433,6 @@ def : Pat<(i64 (and (anyext def32:$src), 0x00000000FFFFFFFF)),
// generator to make the generated code easier to read. To do this, we select
// into "disjoint bits" pseudo ops.
-// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
-def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
- KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
- KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
- return (~Known0.Zero & ~Known1.Zero) == 0;
-}]>;
-
-
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
// Try this before the selecting to OR.
let SchedRW = [WriteALU] in {
@@ -1822,23 +1750,6 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
-def shiftMask8 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
- return isUnneededShiftMask(N, 3);
-}]>;
-
-def shiftMask16 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
- return isUnneededShiftMask(N, 4);
-}]>;
-
-def shiftMask32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
- return isUnneededShiftMask(N, 5);
-}]>;
-
-def shiftMask64 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
- return isUnneededShiftMask(N, 6);
-}]>;
-
-
// Shift amount is implicitly masked.
multiclass MaskedShiftAmountPats<SDNode frag, string name> {
// (shift x (and y, 31)) ==> (shift x, y)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
index fd996603476d..5171c2249dee 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
@@ -188,6 +188,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
}
}
+def JMPABS64i : Ii64<0xA1, RawFrm, (outs), (ins i64imm:$dst), "jmpabs\t$dst", []>,
+ ExplicitREX2Prefix, Requires<[In64BitMode]>, Sched<[WriteJumpLd]>;
+
// Loop instructions
let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
index 66a2d27abf86..09655d939121 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -12,127 +12,6 @@
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// FPStack specific DAG Nodes.
-//===----------------------------------------------------------------------===//
-
-def SDTX86Fld : SDTypeProfile<1, 1, [SDTCisFP<0>,
- SDTCisPtrTy<1>]>;
-def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>,
- SDTCisPtrTy<1>]>;
-def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
-def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
-
-def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-
-def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>;
-def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp,
- [SDNPHasChain,SDNPCommutative]>;
-def any_X86fp80_add : PatFrags<(ops node:$lhs, node:$rhs),
- [(X86strict_fp80_add node:$lhs, node:$rhs),
- (X86fp80_add node:$lhs, node:$rhs)]>;
-
-def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
- [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
- SDNPMemOperand]>;
-def X86fp_cwd_set16 : SDNode<"X86ISD::FLDCW16m", SDTX86CwdLoad,
- [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
- SDNPMemOperand]>;
-def X86fpenv_get : SDNode<"X86ISD::FNSTENVm", SDTX86FPEnv,
- [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
- SDNPMemOperand]>;
-def X86fpenv_set : SDNode<"X86ISD::FLDENVm", SDTX86FPEnv,
- [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
- SDNPMemOperand]>;
-
-def X86fstf32 : PatFrag<(ops node:$val, node:$ptr),
- (X86fst node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
-}]>;
-def X86fstf64 : PatFrag<(ops node:$val, node:$ptr),
- (X86fst node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f64;
-}]>;
-def X86fstf80 : PatFrag<(ops node:$val, node:$ptr),
- (X86fst node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f80;
-}]>;
-
-def X86fldf32 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
-}]>;
-def X86fldf64 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f64;
-}]>;
-def X86fldf80 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f80;
-}]>;
-
-def X86fild16 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-def X86fild32 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
-
-def X86fist32 : PatFrag<(ops node:$val, node:$ptr),
- (X86fist node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-
-def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
- (X86fist node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
-
-def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
- (X86fp_to_mem node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-def X86fp_to_i32mem : PatFrag<(ops node:$val, node:$ptr),
- (X86fp_to_mem node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-def X86fp_to_i64mem : PatFrag<(ops node:$val, node:$ptr),
- (X86fp_to_mem node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
-
-//===----------------------------------------------------------------------===//
-// FPStack pattern fragments
-//===----------------------------------------------------------------------===//
-
-def fpimm0 : FPImmLeaf<fAny, [{
- return Imm.isExactlyValue(+0.0);
-}]>;
-
-def fpimmneg0 : FPImmLeaf<fAny, [{
- return Imm.isExactlyValue(-0.0);
-}]>;
-
-def fpimm1 : FPImmLeaf<fAny, [{
- return Imm.isExactlyValue(+1.0);
-}]>;
-
-def fpimmneg1 : FPImmLeaf<fAny, [{
- return Imm.isExactlyValue(-1.0);
-}]>;
-
// Some 'special' instructions - expanded after instruction selection.
// Clobbers EFLAGS due to OR instruction used internally.
// FIXME: Can we model this in SelectionDAG?
@@ -645,7 +524,7 @@ def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">;
}
// Floating point constant loads.
-let SchedRW = [WriteZero], Uses = [FPCW] in {
+let SchedRW = [WriteZero], Uses = [FPCW], isReMaterializable = 1 in {
def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
[(set RFP32:$dst, fpimm0)]>;
def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index e1feca25469b..c9d0f66c6e46 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -23,303 +23,10 @@ using namespace llvm;
// are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
// makes sorting these tables a simple matter of alphabetizing the table.
#include "X86GenFoldTables.inc"
-static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
- { X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD },
- { X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD },
- { X86::VADDPDZrr, X86::VADDPDZrmb, TB_BCAST_SD },
- { X86::VADDPSZ128rr, X86::VADDPSZ128rmb, TB_BCAST_SS },
- { X86::VADDPSZ256rr, X86::VADDPSZ256rmb, TB_BCAST_SS },
- { X86::VADDPSZrr, X86::VADDPSZrmb, TB_BCAST_SS },
- { X86::VANDNPDZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD },
- { X86::VANDNPDZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD },
- { X86::VANDNPDZrr, X86::VANDNPDZrmb, TB_BCAST_SD },
- { X86::VANDNPSZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS },
- { X86::VANDNPSZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS },
- { X86::VANDNPSZrr, X86::VANDNPSZrmb, TB_BCAST_SS },
- { X86::VANDPDZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD },
- { X86::VANDPDZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD },
- { X86::VANDPDZrr, X86::VANDPDZrmb, TB_BCAST_SD },
- { X86::VANDPSZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS },
- { X86::VANDPSZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS },
- { X86::VANDPSZrr, X86::VANDPSZrmb, TB_BCAST_SS },
- { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD },
- { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD },
- { X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD },
- { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmbi, TB_BCAST_SS },
- { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmbi, TB_BCAST_SS },
- { X86::VCMPPSZrri, X86::VCMPPSZrmbi, TB_BCAST_SS },
- { X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD },
- { X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD },
- { X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD },
- { X86::VDIVPSZ128rr, X86::VDIVPSZ128rmb, TB_BCAST_SS },
- { X86::VDIVPSZ256rr, X86::VDIVPSZ256rmb, TB_BCAST_SS },
- { X86::VDIVPSZrr, X86::VDIVPSZrmb, TB_BCAST_SS },
- { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rmb, TB_BCAST_SD },
- { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rmb, TB_BCAST_SD },
- { X86::VMAXCPDZrr, X86::VMAXCPDZrmb, TB_BCAST_SD },
- { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS },
- { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS },
- { X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS },
- { X86::VMAXPDZ128rr, X86::VMAXPDZ128rmb, TB_BCAST_SD },
- { X86::VMAXPDZ256rr, X86::VMAXPDZ256rmb, TB_BCAST_SD },
- { X86::VMAXPDZrr, X86::VMAXPDZrmb, TB_BCAST_SD },
- { X86::VMAXPSZ128rr, X86::VMAXPSZ128rmb, TB_BCAST_SS },
- { X86::VMAXPSZ256rr, X86::VMAXPSZ256rmb, TB_BCAST_SS },
- { X86::VMAXPSZrr, X86::VMAXPSZrmb, TB_BCAST_SS },
- { X86::VMINCPDZ128rr, X86::VMINCPDZ128rmb, TB_BCAST_SD },
- { X86::VMINCPDZ256rr, X86::VMINCPDZ256rmb, TB_BCAST_SD },
- { X86::VMINCPDZrr, X86::VMINCPDZrmb, TB_BCAST_SD },
- { X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS },
- { X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS },
- { X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS },
- { X86::VMINPDZ128rr, X86::VMINPDZ128rmb, TB_BCAST_SD },
- { X86::VMINPDZ256rr, X86::VMINPDZ256rmb, TB_BCAST_SD },
- { X86::VMINPDZrr, X86::VMINPDZrmb, TB_BCAST_SD },
- { X86::VMINPSZ128rr, X86::VMINPSZ128rmb, TB_BCAST_SS },
- { X86::VMINPSZ256rr, X86::VMINPSZ256rmb, TB_BCAST_SS },
- { X86::VMINPSZrr, X86::VMINPSZrmb, TB_BCAST_SS },
- { X86::VMULPDZ128rr, X86::VMULPDZ128rmb, TB_BCAST_SD },
- { X86::VMULPDZ256rr, X86::VMULPDZ256rmb, TB_BCAST_SD },
- { X86::VMULPDZrr, X86::VMULPDZrmb, TB_BCAST_SD },
- { X86::VMULPSZ128rr, X86::VMULPSZ128rmb, TB_BCAST_SS },
- { X86::VMULPSZ256rr, X86::VMULPSZ256rmb, TB_BCAST_SS },
- { X86::VMULPSZrr, X86::VMULPSZrmb, TB_BCAST_SS },
- { X86::VORPDZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD },
- { X86::VORPDZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD },
- { X86::VORPDZrr, X86::VORPDZrmb, TB_BCAST_SD },
- { X86::VORPSZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS },
- { X86::VORPSZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS },
- { X86::VORPSZrr, X86::VORPSZrmb, TB_BCAST_SS },
- { X86::VPADDDZ128rr, X86::VPADDDZ128rmb, TB_BCAST_D },
- { X86::VPADDDZ256rr, X86::VPADDDZ256rmb, TB_BCAST_D },
- { X86::VPADDDZrr, X86::VPADDDZrmb, TB_BCAST_D },
- { X86::VPADDQZ128rr, X86::VPADDQZ128rmb, TB_BCAST_Q },
- { X86::VPADDQZ256rr, X86::VPADDQZ256rmb, TB_BCAST_Q },
- { X86::VPADDQZrr, X86::VPADDQZrmb, TB_BCAST_Q },
- { X86::VPANDDZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D },
- { X86::VPANDDZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D },
- { X86::VPANDDZrr, X86::VPANDDZrmb, TB_BCAST_D },
- { X86::VPANDNDZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D },
- { X86::VPANDNDZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D },
- { X86::VPANDNDZrr, X86::VPANDNDZrmb, TB_BCAST_D },
- { X86::VPANDNQZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q },
- { X86::VPANDNQZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q },
- { X86::VPANDNQZrr, X86::VPANDNQZrmb, TB_BCAST_Q },
- { X86::VPANDQZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q },
- { X86::VPANDQZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q },
- { X86::VPANDQZrr, X86::VPANDQZrmb, TB_BCAST_Q },
- { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmib, TB_BCAST_D },
- { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmib, TB_BCAST_D },
- { X86::VPCMPDZrri, X86::VPCMPDZrmib, TB_BCAST_D },
- { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rmb, TB_BCAST_D },
- { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rmb, TB_BCAST_D },
- { X86::VPCMPEQDZrr, X86::VPCMPEQDZrmb, TB_BCAST_D },
- { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rmb, TB_BCAST_Q },
- { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rmb, TB_BCAST_Q },
- { X86::VPCMPEQQZrr, X86::VPCMPEQQZrmb, TB_BCAST_Q },
- { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rmb, TB_BCAST_D },
- { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rmb, TB_BCAST_D },
- { X86::VPCMPGTDZrr, X86::VPCMPGTDZrmb, TB_BCAST_D },
- { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rmb, TB_BCAST_Q },
- { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rmb, TB_BCAST_Q },
- { X86::VPCMPGTQZrr, X86::VPCMPGTQZrmb, TB_BCAST_Q },
- { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmib, TB_BCAST_Q },
- { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmib, TB_BCAST_Q },
- { X86::VPCMPQZrri, X86::VPCMPQZrmib, TB_BCAST_Q },
- { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmib, TB_BCAST_D },
- { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmib, TB_BCAST_D },
- { X86::VPCMPUDZrri, X86::VPCMPUDZrmib, TB_BCAST_D },
- { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmib, TB_BCAST_Q },
- { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmib, TB_BCAST_Q },
- { X86::VPCMPUQZrri, X86::VPCMPUQZrmib, TB_BCAST_Q },
- { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rmb, TB_BCAST_D },
- { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rmb, TB_BCAST_D },
- { X86::VPMAXSDZrr, X86::VPMAXSDZrmb, TB_BCAST_D },
- { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rmb, TB_BCAST_Q },
- { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rmb, TB_BCAST_Q },
- { X86::VPMAXSQZrr, X86::VPMAXSQZrmb, TB_BCAST_Q },
- { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rmb, TB_BCAST_D },
- { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rmb, TB_BCAST_D },
- { X86::VPMAXUDZrr, X86::VPMAXUDZrmb, TB_BCAST_D },
- { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rmb, TB_BCAST_Q },
- { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rmb, TB_BCAST_Q },
- { X86::VPMAXUQZrr, X86::VPMAXUQZrmb, TB_BCAST_Q },
- { X86::VPMINSDZ128rr, X86::VPMINSDZ128rmb, TB_BCAST_D },
- { X86::VPMINSDZ256rr, X86::VPMINSDZ256rmb, TB_BCAST_D },
- { X86::VPMINSDZrr, X86::VPMINSDZrmb, TB_BCAST_D },
- { X86::VPMINSQZ128rr, X86::VPMINSQZ128rmb, TB_BCAST_Q },
- { X86::VPMINSQZ256rr, X86::VPMINSQZ256rmb, TB_BCAST_Q },
- { X86::VPMINSQZrr, X86::VPMINSQZrmb, TB_BCAST_Q },
- { X86::VPMINUDZ128rr, X86::VPMINUDZ128rmb, TB_BCAST_D },
- { X86::VPMINUDZ256rr, X86::VPMINUDZ256rmb, TB_BCAST_D },
- { X86::VPMINUDZrr, X86::VPMINUDZrmb, TB_BCAST_D },
- { X86::VPMINUQZ128rr, X86::VPMINUQZ128rmb, TB_BCAST_Q },
- { X86::VPMINUQZ256rr, X86::VPMINUQZ256rmb, TB_BCAST_Q },
- { X86::VPMINUQZrr, X86::VPMINUQZrmb, TB_BCAST_Q },
- { X86::VPMULLDZ128rr, X86::VPMULLDZ128rmb, TB_BCAST_D },
- { X86::VPMULLDZ256rr, X86::VPMULLDZ256rmb, TB_BCAST_D },
- { X86::VPMULLDZrr, X86::VPMULLDZrmb, TB_BCAST_D },
- { X86::VPMULLQZ128rr, X86::VPMULLQZ128rmb, TB_BCAST_Q },
- { X86::VPMULLQZ256rr, X86::VPMULLQZ256rmb, TB_BCAST_Q },
- { X86::VPMULLQZrr, X86::VPMULLQZrmb, TB_BCAST_Q },
- { X86::VPORDZ128rr, X86::VPORDZ128rmb, TB_BCAST_D },
- { X86::VPORDZ256rr, X86::VPORDZ256rmb, TB_BCAST_D },
- { X86::VPORDZrr, X86::VPORDZrmb, TB_BCAST_D },
- { X86::VPORQZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q },
- { X86::VPORQZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q },
- { X86::VPORQZrr, X86::VPORQZrmb, TB_BCAST_Q },
- { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D },
- { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D },
- { X86::VPTESTMDZrr, X86::VPTESTMDZrmb, TB_BCAST_D },
- { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q },
- { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q },
- { X86::VPTESTMQZrr, X86::VPTESTMQZrmb, TB_BCAST_Q },
- { X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D },
- { X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D },
- { X86::VPTESTNMDZrr, X86::VPTESTNMDZrmb, TB_BCAST_D },
- { X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q },
- { X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q },
- { X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q },
- { X86::VPXORDZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D },
- { X86::VPXORDZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D },
- { X86::VPXORDZrr, X86::VPXORDZrmb, TB_BCAST_D },
- { X86::VPXORQZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q },
- { X86::VPXORQZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q },
- { X86::VPXORQZrr, X86::VPXORQZrmb, TB_BCAST_Q },
- { X86::VSUBPDZ128rr, X86::VSUBPDZ128rmb, TB_BCAST_SD },
- { X86::VSUBPDZ256rr, X86::VSUBPDZ256rmb, TB_BCAST_SD },
- { X86::VSUBPDZrr, X86::VSUBPDZrmb, TB_BCAST_SD },
- { X86::VSUBPSZ128rr, X86::VSUBPSZ128rmb, TB_BCAST_SS },
- { X86::VSUBPSZ256rr, X86::VSUBPSZ256rmb, TB_BCAST_SS },
- { X86::VSUBPSZrr, X86::VSUBPSZrmb, TB_BCAST_SS },
- { X86::VXORPDZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD },
- { X86::VXORPDZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD },
- { X86::VXORPDZrr, X86::VXORPDZrmb, TB_BCAST_SD },
- { X86::VXORPSZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS },
- { X86::VXORPSZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS },
- { X86::VXORPSZrr, X86::VXORPSZrmb, TB_BCAST_SS },
-};
-
-static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = {
- { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD },
- { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD },
- { X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD },
- { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS },
- { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS },
- { X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS },
- { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD },
- { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD },
- { X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD },
- { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS },
- { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS },
- { X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS },
- { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD },
- { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD },
- { X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD },
- { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128mb, TB_BCAST_SS },
- { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256mb, TB_BCAST_SS },
- { X86::VFMADD231PSZr, X86::VFMADD231PSZmb, TB_BCAST_SS },
- { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128mb, TB_BCAST_SD },
- { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256mb, TB_BCAST_SD },
- { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZmb, TB_BCAST_SD },
- { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128mb, TB_BCAST_SS },
- { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256mb, TB_BCAST_SS },
- { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZmb, TB_BCAST_SS },
- { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128mb, TB_BCAST_SD },
- { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256mb, TB_BCAST_SD },
- { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZmb, TB_BCAST_SD },
- { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128mb, TB_BCAST_SS },
- { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256mb, TB_BCAST_SS },
- { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZmb, TB_BCAST_SS },
- { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128mb, TB_BCAST_SD },
- { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256mb, TB_BCAST_SD },
- { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZmb, TB_BCAST_SD },
- { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS },
- { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS },
- { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS },
- { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD },
- { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD },
- { X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD },
- { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS },
- { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS },
- { X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS },
- { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD },
- { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD },
- { X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD },
- { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS },
- { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS },
- { X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS },
- { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD },
- { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD },
- { X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD },
- { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128mb, TB_BCAST_SS },
- { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256mb, TB_BCAST_SS },
- { X86::VFMSUB231PSZr, X86::VFMSUB231PSZmb, TB_BCAST_SS },
- { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128mb, TB_BCAST_SD },
- { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256mb, TB_BCAST_SD },
- { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZmb, TB_BCAST_SD },
- { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128mb, TB_BCAST_SS },
- { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256mb, TB_BCAST_SS },
- { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZmb, TB_BCAST_SS },
- { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128mb, TB_BCAST_SD },
- { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256mb, TB_BCAST_SD },
- { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZmb, TB_BCAST_SD },
- { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128mb, TB_BCAST_SS },
- { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256mb, TB_BCAST_SS },
- { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZmb, TB_BCAST_SS },
- { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128mb, TB_BCAST_SD },
- { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256mb, TB_BCAST_SD },
- { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZmb, TB_BCAST_SD },
- { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128mb, TB_BCAST_SS },
- { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256mb, TB_BCAST_SS },
- { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZmb, TB_BCAST_SS },
- { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD },
- { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD },
- { X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD },
- { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS },
- { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS },
- { X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS },
- { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD },
- { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD },
- { X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD },
- { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS },
- { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS },
- { X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS },
- { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD },
- { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD },
- { X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD },
- { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS },
- { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS },
- { X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS },
- { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD },
- { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD },
- { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD },
- { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS },
- { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS },
- { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS },
- { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD },
- { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD },
- { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD },
- { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS },
- { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS },
- { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS },
- { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD },
- { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD },
- { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD },
- { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS },
- { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS },
- { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS },
- { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D },
- { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D },
- { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D },
- { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q },
- { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q },
- { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q },
-};
// Table to map instructions safe to broadcast using a different width from the
// element width.
-static const X86MemoryFoldTableEntry BroadcastSizeFoldTable2[] = {
+static const X86FoldTableEntry BroadcastSizeTable2[] = {
{ X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS },
{ X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS },
{ X86::VANDNPDZrr, X86::VANDNPSZrmb, TB_BCAST_SS },
@@ -370,7 +77,7 @@ static const X86MemoryFoldTableEntry BroadcastSizeFoldTable2[] = {
{ X86::VXORPSZrr, X86::VXORPDZrmb, TB_BCAST_SD },
};
-static const X86MemoryFoldTableEntry BroadcastSizeFoldTable3[] = {
+static const X86FoldTableEntry BroadcastSizeTable3[] = {
{ X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q },
{ X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q },
{ X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q },
@@ -379,91 +86,59 @@ static const X86MemoryFoldTableEntry BroadcastSizeFoldTable3[] = {
{ X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D },
};
-static const X86MemoryFoldTableEntry *
-lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
+static const X86FoldTableEntry *
+lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) {
#ifndef NDEBUG
+#define CHECK_SORTED_UNIQUE(TABLE) \
+ assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \
+ assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \
+ std::end(Table) && \
+ #TABLE " is not unique");
+
// Make sure the tables are sorted.
static std::atomic<bool> FoldTablesChecked(false);
if (!FoldTablesChecked.load(std::memory_order_relaxed)) {
- assert(llvm::is_sorted(MemoryFoldTable2Addr) &&
- std::adjacent_find(std::begin(MemoryFoldTable2Addr),
- std::end(MemoryFoldTable2Addr)) ==
- std::end(MemoryFoldTable2Addr) &&
- "MemoryFoldTable2Addr is not sorted and unique!");
- assert(llvm::is_sorted(MemoryFoldTable0) &&
- std::adjacent_find(std::begin(MemoryFoldTable0),
- std::end(MemoryFoldTable0)) ==
- std::end(MemoryFoldTable0) &&
- "MemoryFoldTable0 is not sorted and unique!");
- assert(llvm::is_sorted(MemoryFoldTable1) &&
- std::adjacent_find(std::begin(MemoryFoldTable1),
- std::end(MemoryFoldTable1)) ==
- std::end(MemoryFoldTable1) &&
- "MemoryFoldTable1 is not sorted and unique!");
- assert(llvm::is_sorted(MemoryFoldTable2) &&
- std::adjacent_find(std::begin(MemoryFoldTable2),
- std::end(MemoryFoldTable2)) ==
- std::end(MemoryFoldTable2) &&
- "MemoryFoldTable2 is not sorted and unique!");
- assert(llvm::is_sorted(MemoryFoldTable3) &&
- std::adjacent_find(std::begin(MemoryFoldTable3),
- std::end(MemoryFoldTable3)) ==
- std::end(MemoryFoldTable3) &&
- "MemoryFoldTable3 is not sorted and unique!");
- assert(llvm::is_sorted(MemoryFoldTable4) &&
- std::adjacent_find(std::begin(MemoryFoldTable4),
- std::end(MemoryFoldTable4)) ==
- std::end(MemoryFoldTable4) &&
- "MemoryFoldTable4 is not sorted and unique!");
- assert(llvm::is_sorted(BroadcastFoldTable2) &&
- std::adjacent_find(std::begin(BroadcastFoldTable2),
- std::end(BroadcastFoldTable2)) ==
- std::end(BroadcastFoldTable2) &&
- "BroadcastFoldTable2 is not sorted and unique!");
- assert(llvm::is_sorted(BroadcastFoldTable3) &&
- std::adjacent_find(std::begin(BroadcastFoldTable3),
- std::end(BroadcastFoldTable3)) ==
- std::end(BroadcastFoldTable3) &&
- "BroadcastFoldTable3 is not sorted and unique!");
- assert(llvm::is_sorted(BroadcastSizeFoldTable2) &&
- std::adjacent_find(std::begin(BroadcastSizeFoldTable2),
- std::end(BroadcastSizeFoldTable2)) ==
- std::end(BroadcastSizeFoldTable2) &&
- "BroadcastSizeFoldTable2 is not sorted and unique!");
- assert(llvm::is_sorted(BroadcastSizeFoldTable3) &&
- std::adjacent_find(std::begin(BroadcastSizeFoldTable3),
- std::end(BroadcastSizeFoldTable3)) ==
- std::end(BroadcastSizeFoldTable3) &&
- "BroadcastSizeFoldTable3 is not sorted and unique!");
+ CHECK_SORTED_UNIQUE(Table2Addr)
+ CHECK_SORTED_UNIQUE(Table0)
+ CHECK_SORTED_UNIQUE(Table1)
+ CHECK_SORTED_UNIQUE(Table2)
+ CHECK_SORTED_UNIQUE(Table3)
+ CHECK_SORTED_UNIQUE(Table4)
+ CHECK_SORTED_UNIQUE(BroadcastTable1)
+ CHECK_SORTED_UNIQUE(BroadcastTable2)
+ CHECK_SORTED_UNIQUE(BroadcastTable3)
+ CHECK_SORTED_UNIQUE(BroadcastTable4)
+ CHECK_SORTED_UNIQUE(BroadcastSizeTable2)
+ CHECK_SORTED_UNIQUE(BroadcastSizeTable3)
FoldTablesChecked.store(true, std::memory_order_relaxed);
}
#endif
- const X86MemoryFoldTableEntry *Data = llvm::lower_bound(Table, RegOp);
+ const X86FoldTableEntry *Data = llvm::lower_bound(Table, RegOp);
if (Data != Table.end() && Data->KeyOp == RegOp &&
!(Data->Flags & TB_NO_FORWARD))
return Data;
return nullptr;
}
-const X86MemoryFoldTableEntry *
+const X86FoldTableEntry *
llvm::lookupTwoAddrFoldTable(unsigned RegOp) {
- return lookupFoldTableImpl(MemoryFoldTable2Addr, RegOp);
+ return lookupFoldTableImpl(Table2Addr, RegOp);
}
-const X86MemoryFoldTableEntry *
+const X86FoldTableEntry *
llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) {
- ArrayRef<X86MemoryFoldTableEntry> FoldTable;
+ ArrayRef<X86FoldTableEntry> FoldTable;
if (OpNum == 0)
- FoldTable = ArrayRef(MemoryFoldTable0);
+ FoldTable = ArrayRef(Table0);
else if (OpNum == 1)
- FoldTable = ArrayRef(MemoryFoldTable1);
+ FoldTable = ArrayRef(Table1);
else if (OpNum == 2)
- FoldTable = ArrayRef(MemoryFoldTable2);
+ FoldTable = ArrayRef(Table2);
else if (OpNum == 3)
- FoldTable = ArrayRef(MemoryFoldTable3);
+ FoldTable = ArrayRef(Table3);
else if (OpNum == 4)
- FoldTable = ArrayRef(MemoryFoldTable4);
+ FoldTable = ArrayRef(Table4);
else
return nullptr;
@@ -476,42 +151,46 @@ namespace {
// function scope static variable to lazily init the unfolding table.
struct X86MemUnfoldTable {
// Stores memory unfolding tables entries sorted by opcode.
- std::vector<X86MemoryFoldTableEntry> Table;
+ std::vector<X86FoldTableEntry> Table;
X86MemUnfoldTable() {
- for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable2Addr)
+ for (const X86FoldTableEntry &Entry : Table2Addr)
// Index 0, folded load and store, no alignment requirement.
addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
- for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable0)
+ for (const X86FoldTableEntry &Entry : Table0)
// Index 0, mix of loads and stores.
addTableEntry(Entry, TB_INDEX_0);
- for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable1)
+ for (const X86FoldTableEntry &Entry : Table1)
// Index 1, folded load
addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD);
- for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable2)
+ for (const X86FoldTableEntry &Entry : Table2)
// Index 2, folded load
addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD);
- for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable3)
+ for (const X86FoldTableEntry &Entry : Table3)
// Index 3, folded load
addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD);
- for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable4)
+ for (const X86FoldTableEntry &Entry : Table4)
// Index 4, folded load
addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD);
// Broadcast tables.
- for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable2)
+ for (const X86FoldTableEntry &Entry : BroadcastTable2)
// Index 2, folded broadcast
addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
- for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable3)
+ for (const X86FoldTableEntry &Entry : BroadcastTable3)
// Index 3, folded broadcast
addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
+ for (const X86FoldTableEntry &Entry : BroadcastTable4)
+ // Index 4, folded broadcast
+ addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
+
// Sort the memory->reg unfold table.
array_pod_sort(Table.begin(), Table.end());
@@ -520,7 +199,7 @@ struct X86MemUnfoldTable {
"Memory unfolding table is not unique!");
}
- void addTableEntry(const X86MemoryFoldTableEntry &Entry,
+ void addTableEntry(const X86FoldTableEntry &Entry,
uint16_t ExtraFlags) {
// NOTE: This swaps the KeyOp and DstOp in the table so we can sort it.
if ((Entry.Flags & TB_NO_REVERSE) == 0)
@@ -530,7 +209,7 @@ struct X86MemUnfoldTable {
};
}
-const X86MemoryFoldTableEntry *
+const X86FoldTableEntry *
llvm::lookupUnfoldTable(unsigned MemOp) {
static X86MemUnfoldTable MemUnfoldTable;
auto &Table = MemUnfoldTable.Table;
@@ -544,26 +223,26 @@ namespace {
// This class stores the memory -> broadcast folding tables. It is instantiated
// as a function scope static variable to lazily init the folding table.
-struct X86MemBroadcastFoldTable {
+struct X86BroadcastFoldTable {
// Stores memory broadcast folding tables entries sorted by opcode.
- std::vector<X86MemoryFoldTableEntry> Table;
+ std::vector<X86FoldTableEntry> Table;
- X86MemBroadcastFoldTable() {
+ X86BroadcastFoldTable() {
// Broadcast tables.
- for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable2) {
+ for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) {
unsigned RegOp = Reg2Bcst.KeyOp;
unsigned BcstOp = Reg2Bcst.DstOp;
- if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
+ if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
unsigned MemOp = Reg2Mem->DstOp;
uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
TB_FOLDED_LOAD | TB_FOLDED_BCAST;
Table.push_back({MemOp, BcstOp, Flags});
}
}
- for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable2) {
+ for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) {
unsigned RegOp = Reg2Bcst.KeyOp;
unsigned BcstOp = Reg2Bcst.DstOp;
- if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
+ if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
unsigned MemOp = Reg2Mem->DstOp;
uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
TB_FOLDED_LOAD | TB_FOLDED_BCAST;
@@ -571,20 +250,20 @@ struct X86MemBroadcastFoldTable {
}
}
- for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable3) {
+ for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) {
unsigned RegOp = Reg2Bcst.KeyOp;
unsigned BcstOp = Reg2Bcst.DstOp;
- if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
+ if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
unsigned MemOp = Reg2Mem->DstOp;
uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
TB_FOLDED_LOAD | TB_FOLDED_BCAST;
Table.push_back({MemOp, BcstOp, Flags});
}
}
- for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable3) {
+ for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) {
unsigned RegOp = Reg2Bcst.KeyOp;
unsigned BcstOp = Reg2Bcst.DstOp;
- if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
+ if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
unsigned MemOp = Reg2Mem->DstOp;
uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
TB_FOLDED_LOAD | TB_FOLDED_BCAST;
@@ -592,13 +271,24 @@ struct X86MemBroadcastFoldTable {
}
}
+ for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) {
+ unsigned RegOp = Reg2Bcst.KeyOp;
+ unsigned BcstOp = Reg2Bcst.DstOp;
+ if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 4)) {
+ unsigned MemOp = Reg2Mem->DstOp;
+ uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 |
+ TB_FOLDED_LOAD | TB_FOLDED_BCAST;
+ Table.push_back({MemOp, BcstOp, Flags});
+ }
+ }
+
// Sort the memory->broadcast fold table.
array_pod_sort(Table.begin(), Table.end());
}
};
} // namespace
-static bool matchBroadcastSize(const X86MemoryFoldTableEntry &Entry,
+static bool matchBroadcastSize(const X86FoldTableEntry &Entry,
unsigned BroadcastBits) {
switch (Entry.Flags & TB_BCAST_MASK) {
case TB_BCAST_SD:
@@ -611,10 +301,10 @@ static bool matchBroadcastSize(const X86MemoryFoldTableEntry &Entry,
return false;
}
-const X86MemoryFoldTableEntry *
+const X86FoldTableEntry *
llvm::lookupBroadcastFoldTable(unsigned MemOp, unsigned BroadcastBits) {
- static X86MemBroadcastFoldTable MemBroadcastFoldTable;
- auto &Table = MemBroadcastFoldTable.Table;
+ static X86BroadcastFoldTable BroadcastFoldTable;
+ auto &Table = BroadcastFoldTable.Table;
for (auto I = llvm::lower_bound(Table, MemOp);
I != Table.end() && I->KeyOp == MemOp; ++I) {
if (matchBroadcastSize(*I, BroadcastBits))
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h
index 28db61d9a3f8..e3890d6aa8eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h
@@ -20,37 +20,37 @@ namespace llvm {
// This struct is used for both the folding and unfold tables. They KeyOp
// is used to determine the sorting order.
-struct X86MemoryFoldTableEntry {
+struct X86FoldTableEntry {
unsigned KeyOp;
unsigned DstOp;
uint16_t Flags;
- bool operator<(const X86MemoryFoldTableEntry &RHS) const {
+ bool operator<(const X86FoldTableEntry &RHS) const {
return KeyOp < RHS.KeyOp;
}
- bool operator==(const X86MemoryFoldTableEntry &RHS) const {
+ bool operator==(const X86FoldTableEntry &RHS) const {
return KeyOp == RHS.KeyOp;
}
- friend bool operator<(const X86MemoryFoldTableEntry &TE, unsigned Opcode) {
+ friend bool operator<(const X86FoldTableEntry &TE, unsigned Opcode) {
return TE.KeyOp < Opcode;
}
};
// Look up the memory folding table entry for folding a load and a store into
// operand 0.
-const X86MemoryFoldTableEntry *lookupTwoAddrFoldTable(unsigned RegOp);
+const X86FoldTableEntry *lookupTwoAddrFoldTable(unsigned RegOp);
// Look up the memory folding table entry for folding a load or store with
// operand OpNum.
-const X86MemoryFoldTableEntry *lookupFoldTable(unsigned RegOp, unsigned OpNum);
+const X86FoldTableEntry *lookupFoldTable(unsigned RegOp, unsigned OpNum);
// Look up the memory unfolding table entry for this instruction.
-const X86MemoryFoldTableEntry *lookupUnfoldTable(unsigned MemOp);
+const X86FoldTableEntry *lookupUnfoldTable(unsigned MemOp);
-// Look up the broadcast memory folding table entry for this instruction from
+// Look up the broadcast folding table entry for this instruction from
// the regular memory instruction.
-const X86MemoryFoldTableEntry *lookupBroadcastFoldTable(unsigned MemOp,
- unsigned BroadcastBits);
+const X86FoldTableEntry *lookupBroadcastFoldTable(unsigned MemOp,
+ unsigned BroadcastBits);
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
index f45869e15267..df05a5788a50 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
@@ -161,8 +161,10 @@ def XOP8 : Map<4>;
def XOP9 : Map<5>;
def XOPA : Map<6>;
def ThreeDNow : Map<7>;
-def T_MAP5 : Map<8>;
-def T_MAP6 : Map<9>;
+def T_MAP4 : Map<8>;
+def T_MAP5 : Map<9>;
+def T_MAP6 : Map<10>;
+def T_MAP7 : Map<11>;
// Class specifying the encoding
class Encoding<bits<2> val> {
@@ -190,85 +192,14 @@ def AdSize16 : AddressSize<1>; // Encodes a 16-bit address.
def AdSize32 : AddressSize<2>; // Encodes a 32-bit address.
def AdSize64 : AddressSize<3>; // Encodes a 64-bit address.
-// Prefix byte classes which are used to indicate to the ad-hoc machine code
-// emitter that various prefix bytes are required.
-class OpSize16 { OperandSize OpSize = OpSize16; }
-class OpSize32 { OperandSize OpSize = OpSize32; }
-class AdSize16 { AddressSize AdSize = AdSize16; }
-class AdSize32 { AddressSize AdSize = AdSize32; }
-class AdSize64 { AddressSize AdSize = AdSize64; }
-class REX_W { bit hasREX_W = 1; }
-class LOCK { bit hasLockPrefix = 1; }
-class REP { bit hasREPPrefix = 1; }
-class TB { Map OpMap = TB; }
-class T8 { Map OpMap = T8; }
-class TA { Map OpMap = TA; }
-class XOP8 { Map OpMap = XOP8; Prefix OpPrefix = PS; }
-class XOP9 { Map OpMap = XOP9; Prefix OpPrefix = PS; }
-class XOPA { Map OpMap = XOPA; Prefix OpPrefix = PS; }
-class ThreeDNow { Map OpMap = ThreeDNow; }
-class T_MAP5 { Map OpMap = T_MAP5; }
-class T_MAP5PS : T_MAP5 { Prefix OpPrefix = PS; } // none
-class T_MAP5PD : T_MAP5 { Prefix OpPrefix = PD; } // 0x66
-class T_MAP5XS : T_MAP5 { Prefix OpPrefix = XS; } // 0xF3
-class T_MAP5XD : T_MAP5 { Prefix OpPrefix = XD; } // 0xF2
-class T_MAP6 { Map OpMap = T_MAP6; }
-class T_MAP6PS : T_MAP6 { Prefix OpPrefix = PS; }
-class T_MAP6PD : T_MAP6 { Prefix OpPrefix = PD; }
-class T_MAP6XS : T_MAP6 { Prefix OpPrefix = XS; }
-class T_MAP6XD : T_MAP6 { Prefix OpPrefix = XD; }
-class OBXS { Prefix OpPrefix = XS; }
-class PS : TB { Prefix OpPrefix = PS; }
-class PD : TB { Prefix OpPrefix = PD; }
-class XD : TB { Prefix OpPrefix = XD; }
-class XS : TB { Prefix OpPrefix = XS; }
-class T8PS : T8 { Prefix OpPrefix = PS; }
-class T8PD : T8 { Prefix OpPrefix = PD; }
-class T8XD : T8 { Prefix OpPrefix = XD; }
-class T8XS : T8 { Prefix OpPrefix = XS; }
-class TAPS : TA { Prefix OpPrefix = PS; }
-class TAPD : TA { Prefix OpPrefix = PD; }
-class TAXD : TA { Prefix OpPrefix = XD; }
-class TAXS : TA { Prefix OpPrefix = XS; }
-class VEX { Encoding OpEnc = EncVEX; }
-class WIG { bit IgnoresW = 1; }
-// Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX.
-class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; }
-class VEX_4V : VEX { bit hasVEX_4V = 1; }
-class VEX_L { bit hasVEX_L = 1; }
-class VEX_LIG { bit ignoresVEX_L = 1; }
-class EVEX { Encoding OpEnc = EncEVEX; }
-class EVEX_4V : EVEX { bit hasVEX_4V = 1; }
-class EVEX_K { bit hasEVEX_K = 1; }
-class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
-class EVEX_B { bit hasEVEX_B = 1; }
-class EVEX_RC { bit hasEVEX_RC = 1; }
-class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
-class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
-class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
-class NOTRACK { bit hasNoTrackPrefix = 1; }
-class SIMD_EXC { list<Register> Uses = [MXCSR]; bit mayRaiseFPException = 1; }
-
-// Specify AVX512 8-bit compressed displacement encoding based on the vector
-// element size in bits (8, 16, 32, 64) and the CDisp8 form.
-class EVEX_CD8<int esize, CD8VForm form> {
- int CD8_EltSize = !srl(esize, 3);
- bits<3> CD8_Form = form.Value;
-}
-
-class XOP { Encoding OpEnc = EncXOP; }
-class XOP_4V : XOP { bit hasVEX_4V = 1; }
-
-// Provide a specific instruction to be used by the EVEX2VEX conversion.
-class EVEX2VEXOverride<string VEXInstrName> {
- string EVEX2VEXOverride = VEXInstrName;
+// Force the instruction to use REX2/VEX/EVEX encoding.
+class ExplicitOpPrefix<bits<2> val> {
+ bits<2> Value = val;
}
-
-// Prevent EVEX->VEX conversion from considering this instruction.
-class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
-
-// Force the instruction to use VEX encoding.
-class ExplicitVEXPrefix { bit ExplicitVEXPrefix = 1; }
+def NoExplicitOpPrefix : ExplicitOpPrefix<0>;
+def ExplicitREX2 : ExplicitOpPrefix<1>;
+def ExplicitVEX : ExplicitOpPrefix<2>;
+def ExplicitEVEX : ExplicitOpPrefix<3>;
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
@@ -350,9 +281,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string EVEX2VEXOverride = ?;
bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
- bit ExplicitVEXPrefix = 0; // Force the instruction to use VEX encoding.
- // Force to check predicate before compress EVEX to VEX encoding.
- bit checkVEXPredicate = 0;
+ ExplicitOpPrefix explicitOpPrefix = NoExplicitOpPrefix;
+ bits<2> explicitOpPrefixBits = explicitOpPrefix.Value;
// TSFlags layout should be kept in sync with X86BaseInfo.h.
let TSFlags{6-0} = FormBits;
let TSFlags{8-7} = OpSizeBits;
@@ -377,635 +307,5 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{47-45} = !if(!eq(CD8_Scale, 0), 0, !add(!logtwo(CD8_Scale), 1));
let TSFlags{48} = hasEVEX_RC;
let TSFlags{49} = hasNoTrackPrefix;
- let TSFlags{50} = ExplicitVEXPrefix;
-}
-
-class PseudoI<dag oops, dag iops, list<dag> pattern>
- : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
- let Pattern = pattern;
-}
-
-class I<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d = GenericDomain>
- : X86Inst<o, f, NoImm, outs, ins, asm, d> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-class Ii8<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d = GenericDomain>
- : X86Inst<o, f, Imm8, outs, ins, asm, d> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-class Ii8Reg<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d = GenericDomain>
- : X86Inst<o, f, Imm8Reg, outs, ins, asm, d> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm16, outs, ins, asm> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm32, outs, ins, asm> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-class Ii32S<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm32S, outs, ins, asm> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-
-class Ii64<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm64, outs, ins, asm> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-
-class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-
-class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-
-// FPStack Instruction Templates:
-// FPI - Floating Point Instruction template.
-class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
- : I<o, F, outs, ins, asm, []> {
- let Defs = [FPSW];
- let Predicates = [HasX87];
-}
-
-// FpI_ - Floating Point Pseudo Instruction template.
-class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
- : PseudoI<outs, ins, pattern> {
- let FPForm = fp;
- let Defs = [FPSW];
- let Predicates = [HasX87];
-}
-
-// Templates for instructions that use a 16- or 32-bit segmented address as
-// their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
-//
-// Iseg16 - 16-bit segment selector, 16-bit offset
-// Iseg32 - 16-bit segment selector, 32-bit offset
-
-class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm16, outs, ins, asm> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-
-class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm32, outs, ins, asm> {
- let Pattern = pattern;
- let CodeSize = 3;
-}
-
-// SI - SSE 1 & 2 scalar instructions
-class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d = GenericDomain>
- : I<o, F, outs, ins, asm, pattern, d> {
- let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
- !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
- !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
- !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
- !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
- [UseSSE1])))));
-
- // AVX instructions have a 'v' prefix in the mnemonic
- let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
- !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
- asm));
-}
-
-// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
-class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d = GenericDomain>
- : I<o, F, outs, ins, asm, pattern, d> {
- let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
- !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
- !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
- !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
- !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
- [UseSSE1])))));
-
- // AVX instructions have a 'v' prefix in the mnemonic
- let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
- !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
- asm));
+ let TSFlags{51-50} = explicitOpPrefixBits;
}
-// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
-class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern> {
- let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
- !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
- !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
- [UseSSE2])));
-
- // AVX instructions have a 'v' prefix in the mnemonic
- let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
- !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
- asm));
-}
-
-// PI - SSE 1 & 2 packed instructions
-class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
- Domain d>
- : I<o, F, outs, ins, asm, pattern, d> {
- let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
- !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
- !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
- [UseSSE1])));
-
- // AVX instructions have a 'v' prefix in the mnemonic
- let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
- !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
- asm));
-}
-
-// MMXPI - SSE 1 & 2 packed instructions with MMX operands
-class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
- Domain d>
- : I<o, F, outs, ins, asm, pattern, d> {
- let Predicates = !if(!eq(OpPrefix.Value, PD.Value), [HasMMX, HasSSE2],
- [HasMMX, HasSSE1]);
-}
-
-// PIi8 - SSE 1 & 2 packed instructions with immediate
-class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d>
- : Ii8<o, F, outs, ins, asm, pattern, d> {
- let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
- !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
- !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
- [UseSSE1])));
-
- // AVX instructions have a 'v' prefix in the mnemonic
- let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
- !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
- asm));
-}
-
-// SSE1 Instruction Templates:
-//
-// SSI - SSE1 instructions with XS prefix.
-// PSI - SSE1 instructions with PS prefix.
-// PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
-// VSSI - SSE1 instructions with XS prefix in AVX form.
-// VPSI - SSE1 instructions with PS prefix in AVX form, packed single.
-
-class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
-class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
-class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
- Requires<[UseSSE1]>;
-class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
- Requires<[UseSSE1]>;
-class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
- Requires<[HasAVX]>;
-class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, PS,
- Requires<[HasAVX]>;
-
-// SSE2 Instruction Templates:
-//
-// SDI - SSE2 instructions with XD prefix.
-// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
-// S2SI - SSE2 instructions with XS prefix.
-// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
-// PDI - SSE2 instructions with PD prefix, packed double domain.
-// PDIi8 - SSE2 instructions with ImmT == Imm8 and PD prefix.
-// VSDI - SSE2 scalar instructions with XD prefix in AVX form.
-// VPDI - SSE2 vector instructions with PD prefix in AVX form,
-// packed double domain.
-// VS2I - SSE2 scalar instructions with PD prefix in AVX form.
-// S2I - SSE2 scalar instructions with PD prefix.
-// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
-// MMX operands.
-// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
-// MMX operands.
-
-class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
-class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
-class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
-class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
-class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
- Requires<[UseSSE2]>;
-class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
- Requires<[UseSSE2]>;
-class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
- Requires<[UseAVX]>;
-class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
- Requires<[HasAVX]>;
-class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
- PD, Requires<[HasAVX]>;
-class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern>, PD,
- Requires<[UseAVX]>;
-class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PD, Requires<[UseSSE2]>;
-class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX, HasSSE2]>;
-class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX, HasSSE2]>;
-
-// SSE3 Instruction Templates:
-//
-// S3I - SSE3 instructions with PD prefixes.
-// S3SI - SSE3 instructions with XS prefix.
-// S3DI - SSE3 instructions with XD prefix.
-
-class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
- Requires<[UseSSE3]>;
-class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
- Requires<[UseSSE3]>;
-class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
- Requires<[UseSSE3]>;
-
-
-// SSSE3 Instruction Templates:
-//
-// SS38I - SSSE3 instructions with T8 prefix.
-// SS3AI - SSSE3 instructions with TA prefix.
-// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
-// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
-//
-// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
-// uses the MMX registers. The 64-bit versions are grouped with the MMX
-// classes. They need to be enabled even if AVX is enabled.
-
-class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
- Requires<[UseSSSE3]>;
-class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
- Requires<[UseSSSE3]>;
-class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PS,
- Requires<[HasMMX, HasSSSE3]>;
-class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPS,
- Requires<[HasMMX, HasSSSE3]>;
-
-// SSE4.1 Instruction Templates:
-//
-// SS48I - SSE 4.1 instructions with T8 prefix.
-// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
-//
-class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
- Requires<[UseSSE41]>;
-class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
- Requires<[UseSSE41]>;
-
-// SSE4.2 Instruction Templates:
-//
-// SS428I - SSE 4.2 instructions with T8 prefix.
-class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
- Requires<[UseSSE42]>;
-
-// SS42AI = SSE 4.2 instructions with TA prefix
-class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
- Requires<[UseSSE42]>;
-
-// CRC32I - SSE 4.2 CRC32 instructions.
-// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly
-// controlled by the SSE42 flag.
-class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasCRC32]>;
-
-// AVX Instruction Templates:
-// Instructions introduced in AVX (no SSE equivalent forms)
-//
-// AVX8I - AVX instructions with T8PD prefix.
-// AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8.
-class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
- Requires<[HasAVX]>;
-class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
- Requires<[HasAVX]>;
-
-// AVX2 Instruction Templates:
-// Instructions introduced in AVX2 (no SSE equivalent forms)
-//
-// AVX28I - AVX2 instructions with T8PD prefix.
-// AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8.
-class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
- Requires<[HasAVX2]>;
-class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
- Requires<[HasAVX2]>;
-
-
-// AVX-512 Instruction Templates:
-// Instructions introduced in AVX-512 (no SSE equivalent forms)
-//
-// AVX5128I - AVX-512 instructions with T8PD prefix.
-// AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8.
-// AVX512PDI - AVX-512 instructions with PD, double packed.
-// AVX512PSI - AVX-512 instructions with PS, single packed.
-// AVX512XS8I - AVX-512 instructions with T8 and XS prefixes.
-// AVX512XSI - AVX-512 instructions with XS prefix, generic domain.
-// AVX512BI - AVX-512 instructions with PD, int packed domain.
-// AVX512SI - AVX-512 scalar instructions with PD prefix.
-
-class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
- Requires<[HasAVX512]>;
-class AVX5128IBase : T8PD {
- Domain ExeDomain = SSEPackedInt;
-}
-class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8XS,
- Requires<[HasAVX512]>;
-class AVX512XSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS,
- Requires<[HasAVX512]>;
-class AVX512XDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, XD,
- Requires<[HasAVX512]>;
-class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
- Requires<[HasAVX512]>;
-class AVX512BIBase : PD {
- Domain ExeDomain = SSEPackedInt;
-}
-class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
- Requires<[HasAVX512]>;
-class AVX512BIi8Base : PD {
- Domain ExeDomain = SSEPackedInt;
- ImmType ImmT = Imm8;
-}
-class AVX512XSIi8Base : XS {
- Domain ExeDomain = SSEPackedInt;
- ImmType ImmT = Imm8;
-}
-class AVX512XDIi8Base : XD {
- Domain ExeDomain = SSEPackedInt;
- ImmType ImmT = Imm8;
-}
-class AVX512PSIi8Base : PS {
- Domain ExeDomain = SSEPackedSingle;
- ImmType ImmT = Imm8;
-}
-class AVX512PDIi8Base : PD {
- Domain ExeDomain = SSEPackedDouble;
- ImmType ImmT = Imm8;
-}
-class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
- Requires<[HasAVX512]>;
-class AVX512AIi8Base : TAPD {
- ImmType ImmT = Imm8;
-}
-class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>,
- Requires<[HasAVX512]>;
-class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
- Requires<[HasAVX512]>;
-class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
- Requires<[HasAVX512]>;
-class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d>
- : Ii8<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
-class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, Domain d>
- : I<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
-class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern>, T8PD,
- EVEX_4V, Requires<[HasAVX512]>;
-
-class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern>, Requires<[HasAVX512]>;
-
-// AES Instruction Templates:
-//
-// AES8I
-// These use the same encoding as the SSE4.2 T8 and TA encodings.
-class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
- Requires<[NoAVX, HasAES]>;
-
-class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
- Requires<[NoAVX, HasAES]>;
-
-// PCLMUL Instruction Templates
-class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD;
-
-// FMA3 Instruction Templates
-class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern>, T8PD,
- VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
-class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern>, T8PD,
- VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
-class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : I<o, F, outs, ins, asm, pattern>, T8PD,
- VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>;
-
-// FMA4 Instruction Templates
-class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
- VEX_4V, FMASC, Requires<[HasFMA4, NoVLX]>;
-class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
- VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>;
-class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
- VEX_4V, FMASC, Requires<[HasFMA4]>;
-
-// XOP 2, 3 and 4 Operand Instruction Template
-class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
- XOP9, Requires<[HasXOP]>;
-
-// XOP 2 and 3 Operand Instruction Templates with imm byte
-class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
- XOP8, Requires<[HasXOP]>;
-// XOP 4 Operand Instruction Templates with imm byte
-class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
- XOP8, Requires<[HasXOP]>;
-
-// XOP 5 operand instruction (VEX encoding!)
-class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern>
- : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
- VEX_4V, Requires<[HasXOP]>;
-
-// X86-64 Instruction templates...
-//
-
-class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, REX_W;
-class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
-class RIi16 <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii16<o, F, outs, ins, asm, pattern>, REX_W;
-class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
-class RIi32S <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii32S<o, F, outs, ins, asm, pattern>, REX_W;
-class RIi64<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii64<o, F, outs, ins, asm, pattern>, REX_W;
-
-class RS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : S2I<o, F, outs, ins, asm, pattern>, REX_W;
-class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : VS2I<o, F, outs, ins, asm, pattern>, REX_W;
-
-// MMX Instruction templates
-//
-
-// MMXI - MMX instructions with TB prefix.
-// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
-// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
-// MMX2I - MMX / SSE2 instructions with PD prefix.
-// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
-// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
-// MMXID - MMX instructions with XD prefix.
-// MMXIS - MMX instructions with XS prefix.
-class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
-class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,Not64BitMode]>;
-class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,In64BitMode]>;
-class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PS, REX_W,
- Requires<[HasMMX,In64BitMode]>;
-class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, PD, Requires<[HasMMX]>;
-class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
-class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
-class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragments.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragments.td
new file mode 100644
index 000000000000..adf527d72f5b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragments.td
@@ -0,0 +1,841 @@
+//===----------X86InstrFragments - X86 Pattern fragments. --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// X86-specific DAG node.
+def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def SDTX86FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisFP<1>,
+ SDTCisSameAs<1, 2>]>;
+
+def SDTX86Cmov : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
+
+// Unary and binary operator instructions that set EFLAGS as a side-effect.
+def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
+ [SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+def SDTX86BrCond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86SetCC : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+def SDTX86SetCC_C : SDTypeProfile<1, 2,
+ [SDTCisInt<0>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>;
+
+def SDTX86rdrand : SDTypeProfile<2, 0, [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTX86rdpkru : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDTX86wrpkru : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+ SDTCisVT<2, i8>]>;
+def SDTX86cas8pair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86cas16pair : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i64>]>;
+
+def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+
+def SDTLockUnaryArithWithFlags : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>]>;
+
+def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+
+def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+
+def SDT_X86NtBrind : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+
+def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
+ SDTCisPtrTy<1>]>;
+
+def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i8>,
+ SDTCisVT<4, i32>]>;
+
+def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
+def SDTX86Void : SDTypeProfile<0, 0, []>;
+
+def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86DYN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+
+def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
+def SDT_X86PROBED_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
+def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+
+def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>;
+
+def SDT_X86AESENCDECKL : SDTypeProfile<2, 2, [SDTCisVT<0, v2i64>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, v2i64>,
+ SDTCisPtrTy<3>]>;
+
+def SDTX86Cmpccxadd : SDTypeProfile<1, 4, [SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<1>, SDTCisSameAs<2, 3>,
+ SDTCisVT<4, i8>]>;
+
+def X86MFence : SDNode<"X86ISD::MFENCE", SDTNone, [SDNPHasChain]>;
+
+
+def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
+def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
+def X86fshl : SDNode<"X86ISD::FSHL", SDTIntShiftDOp>;
+def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>;
+
+def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
+def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>;
+def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
+def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
+def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
+
+def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
+def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
+ [SDNPHasChain]>;
+def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
+
+def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86rdpkru : SDNode<"X86ISD::RDPKRU", SDTX86rdpkru,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86wrpkru : SDNode<"X86ISD::WRPKRU", SDTX86wrpkru,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8pair,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86cas16pair,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+
+def X86retglue : SDNode<"X86ISD::RET_GLUE", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def X86vastart_save_xmm_regs :
+ SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
+ SDT_X86VASTART_SAVE_XMM_REGS,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>;
+def X86vaarg64 :
+ SDNode<"X86ISD::VAARG_64", SDT_X86VAARG,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
+def X86vaargx32 :
+ SDNode<"X86ISD::VAARG_X32", SDT_X86VAARG,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
+def X86callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def X86callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+
+def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+
+
+def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+def X86NoTrackBrind : SDNode<"X86ISD::NT_BRIND", SDT_X86NtBrind,
+ [SDNPHasChain]>;
+
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad]>;
+
+def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
+def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
+
+def X86RecoverFrameAlloc : SDNode<"ISD::LOCAL_RECOVER",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisInt<1>]>>;
+
+def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
+ [SDNPHasChain]>;
+
+def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86eh_sjlj_setup_dispatch : SDNode<"X86ISD::EH_SJLJ_SETUP_DISPATCH",
+ SDTypeProfile<0, 0, []>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
+
+def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+
+def X86lock_add : SDNode<"X86ISD::LADD", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86lock_sub : SDNode<"X86ISD::LSUB", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86lock_or : SDNode<"X86ISD::LOR", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86lock_xor : SDNode<"X86ISD::LXOR", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+
+def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
+def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>;
+
+def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>;
+
+def X86pdep : SDNode<"X86ISD::PDEP", SDTIntBinOp>;
+def X86pext : SDNode<"X86ISD::PEXT", SDTIntBinOp>;
+
+def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+
+def X86DynAlloca : SDNode<"X86ISD::DYN_ALLOCA", SDT_X86DYN_ALLOCA,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
+ [SDNPHasChain]>;
+
+def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA,
+ [SDNPHasChain]>;
+
+def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86lwpins : SDNode<"X86ISD::LWPINS",
+ SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>;
+
+def X86umwait : SDNode<"X86ISD::UMWAIT",
+ SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86tpause : SDNode<"X86ISD::TPAUSE",
+ SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86testui : SDNode<"X86ISD::TESTUI",
+ SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def X86aesenc128kl : SDNode<"X86ISD::AESENC128KL", SDT_X86AESENCDECKL,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86aesdec128kl : SDNode<"X86ISD::AESDEC128KL", SDT_X86AESENCDECKL,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86aesenc256kl : SDNode<"X86ISD::AESENC256KL", SDT_X86AESENCDECKL,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86aesdec256kl : SDNode<"X86ISD::AESDEC256KL", SDT_X86AESENCDECKL,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
+
+def X86cmpccxadd : SDNode<"X86ISD::CMPCCXADD", SDTX86Cmpccxadd,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
+
+// Define X86-specific addressing mode.
+def addr : ComplexPattern<iPTR, 5, "selectAddr", [], [SDNPWantParent]>;
+def lea32addr : ComplexPattern<i32, 5, "selectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, xor, frameindex],
+ []>;
+// In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
+def lea64_32addr : ComplexPattern<i32, 5, "selectLEA64_32Addr",
+ [add, sub, mul, X86mul_imm, shl, or, xor,
+ frameindex, X86WrapperRIP],
+ []>;
+
+def tls32addr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+def tls32baseaddr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+def lea64addr : ComplexPattern<i64, 5, "selectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, xor, frameindex,
+ X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+def tls64baseaddr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
+def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr", [],[SDNPWantParent]>;
+
+// A relocatable immediate is an operand that can be relocated by the linker to
+// an immediate, such as a regular symbol in non-PIC code.
+def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
+ [X86Wrapper], [], 0>;
+
+// X86 specific condition code. These correspond to CondCode in
+// X86InstrInfo.h. They must be kept in synch.
+def X86_COND_O : PatLeaf<(i8 0)>;
+def X86_COND_NO : PatLeaf<(i8 1)>;
+def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C
+def X86_COND_AE : PatLeaf<(i8 3)>; // alt. COND_NC
+def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z
+def X86_COND_NE : PatLeaf<(i8 5)>; // alt. COND_NZ
+def X86_COND_BE : PatLeaf<(i8 6)>; // alt. COND_NA
+def X86_COND_A : PatLeaf<(i8 7)>; // alt. COND_NBE
+def X86_COND_S : PatLeaf<(i8 8)>;
+def X86_COND_NS : PatLeaf<(i8 9)>;
+def X86_COND_P : PatLeaf<(i8 10)>; // alt. COND_PE
+def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO
+def X86_COND_L : PatLeaf<(i8 12)>; // alt. COND_NGE
+def X86_COND_GE : PatLeaf<(i8 13)>; // alt. COND_NL
+def X86_COND_LE : PatLeaf<(i8 14)>; // alt. COND_NG
+def X86_COND_G : PatLeaf<(i8 15)>; // alt. COND_NLE
+
+def i16immSExt8 : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>;
+def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
+def i64immSExt8 : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
+def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
+def i64timmSExt32 : TImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
+
+def i16relocImmSExt8 : PatLeaf<(i16 relocImm), [{
+ return isSExtAbsoluteSymbolRef(8, N);
+}]>;
+def i32relocImmSExt8 : PatLeaf<(i32 relocImm), [{
+ return isSExtAbsoluteSymbolRef(8, N);
+}]>;
+def i64relocImmSExt8 : PatLeaf<(i64 relocImm), [{
+ return isSExtAbsoluteSymbolRef(8, N);
+}]>;
+def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
+ return isSExtAbsoluteSymbolRef(32, N);
+}]>;
+
+// If we have multiple users of an immediate, it's much smaller to reuse
+// the register, rather than encode the immediate in every instruction.
+// This has the risk of increasing register pressure from stretched live
+// ranges, however, the immediates should be trivial to rematerialize by
+// the RA in the event of high register pressure.
+// TODO : This is currently enabled for stores and binary ops. There are more
+// cases for which this can be enabled, though this catches the bulk of the
+// issues.
+// TODO2 : This should really also be enabled under O2, but there's currently
+// an issue with RA where we don't pull the constants into their users
+// when we rematerialize them. I'll follow-up on enabling O2 after we fix that
+// issue.
+// TODO3 : This is currently limited to single basic blocks (DAG creation
+// pulls block immediates to the top and merges them if necessary).
+// Eventually, it would be nice to allow ConstantHoisting to merge constants
+// globally for potentially added savings.
+//
+def imm_su : PatLeaf<(imm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64immSExt32_su : PatLeaf<(i64immSExt32), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
+def relocImm8_su : PatLeaf<(i8 relocImm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def relocImm16_su : PatLeaf<(i16 relocImm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def relocImm32_su : PatLeaf<(i32 relocImm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
+def i16relocImmSExt8_su : PatLeaf<(i16relocImmSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i32relocImmSExt8_su : PatLeaf<(i32relocImmSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
+def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64immSExt8_su : PatLeaf<(i64immSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
+// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+// unsigned field.
+def i64immZExt32 : ImmLeaf<i64, [{ return isUInt<32>(Imm); }]>;
+
+def i64immZExt32SExt8 : ImmLeaf<i64, [{
+ return isUInt<32>(Imm) && isInt<8>(static_cast<int32_t>(Imm));
+}]>;
+
+// Helper fragments for loads.
+
+// It's safe to fold a zextload/extload from i1 as a regular i8 load. The
+// upper bits are guaranteed to be zero and we were going to emit a MOV8rm
+// which might get folded during peephole anyway.
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ return ExtType == ISD::NON_EXTLOAD || ExtType == ISD::EXTLOAD ||
+ ExtType == ISD::ZEXTLOAD;
+}]>;
+
+// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
+// known to be 32-bit aligned or better. Ditto for i8 to i16.
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad)
+ return LD->getAlign() >= 2 && LD->isSimple();
+ return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad)
+ return LD->getAlign() >= 4 && LD->isSimple();
+ return false;
+}]>;
+
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf16 : PatFrag<(ops node:$ptr), (f16 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;
+def alignedloadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ return Ld->getAlign() >= Ld->getMemoryVT().getStoreSize();
+}]>;
+def memopf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ return Subtarget->hasSSEUnalignedMem() ||
+ Ld->getAlign() >= Ld->getMemoryVT().getStoreSize();
+}]>;
+
+def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
+def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
+def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
+def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
+def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
+def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
+def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+
+// We can treat an i8/i16 extending load to i64 as a 32 bit load if its known
+// to be 4 byte aligned or better.
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType != ISD::EXTLOAD)
+ return false;
+ if (LD->getMemoryVT() == MVT::i32)
+ return true;
+
+ return LD->getAlign() >= 4 && LD->isSimple();
+}]>;
+
+// binary op with only one user
+class binop_oneuse<SDPatternOperator operator>
+ : PatFrag<(ops node:$A, node:$B),
+ (operator node:$A, node:$B), [{
+ return N->hasOneUse();
+}]>;
+
+def add_su : binop_oneuse<add>;
+def and_su : binop_oneuse<and>;
+def srl_su : binop_oneuse<srl>;
+
+// unary op with only one user
+class unop_oneuse<SDPatternOperator operator>
+ : PatFrag<(ops node:$A),
+ (operator node:$A), [{
+ return N->hasOneUse();
+}]>;
+
+
+def ineg_su : unop_oneuse<ineg>;
+def trunc_su : unop_oneuse<trunc>;
+
+def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86add_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86sub_flag node:$lhs, node:$rhs), [{
+ // Only use DEC if the result is used.
+ return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
+
+
+def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
+ [(X86strict_fcmp node:$lhs, node:$rhs),
+ (X86fcmp node:$lhs, node:$rhs)]>;
+
+// PREFETCHWT1 is supported we want to use it for everything but T0.
+def PrefetchWLevel : PatFrag<(ops), (i32 timm), [{
+ return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
+}]>;
+
+// Use PREFETCHWT1 for NTA, T2, T1.
+def PrefetchWT1Level : TImmLeaf<i32, [{
+ return Imm < 3;
+}]>;
+
+def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_add node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
+
+def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_sub node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
+
+def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
+ (X86tcret node:$ptr, node:$off), [{
+ // X86tcret args: (*chain, ptr, imm, regs..., glue)
+ unsigned NumRegs = 0;
+ for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
+ if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
+ return false;
+ return true;
+}]>;
+
+def X86tcret_1reg : PatFrag<(ops node:$ptr, node:$off),
+ (X86tcret node:$ptr, node:$off), [{
+ // X86tcret args: (*chain, ptr, imm, regs..., glue)
+ unsigned NumRegs = 1;
+ const SDValue& BasePtr = cast<LoadSDNode>(N->getOperand(1))->getBasePtr();
+ if (isa<FrameIndexSDNode>(BasePtr))
+ NumRegs = 3;
+ else if (BasePtr->getNumOperands() && isa<GlobalAddressSDNode>(BasePtr->getOperand(0)))
+ NumRegs = 3;
+ for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
+ if (isa<RegisterSDNode>(N->getOperand(i)) && ( NumRegs-- == 0))
+ return false;
+ return true;
+}]>;
+
+// If this is an anyext of the remainder of an 8-bit sdivrem, use a MOVSX
+// instead of a MOVZX. The sdivrem lowering will emit emit a MOVSX to move
+// %ah to the lower byte of a register. By using a MOVSX here we allow a
+// post-isel peephole to merge the two MOVSX instructions into one.
+def anyext_sdiv : PatFrag<(ops node:$lhs), (anyext node:$lhs),[{
+ return (N->getOperand(0).getOpcode() == ISD::SDIVREM &&
+ N->getOperand(0).getResNo() == 1);
+}]>;
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying
+// anything about the upper 32 bits, they're probably just qualifying a
+// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit
+// operation will zero-extend up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg &&
+ N->getOpcode() != ISD::AssertSext &&
+ N->getOpcode() != ISD::AssertZext &&
+ N->getOpcode() != ISD::AssertAlign &&
+ N->getOpcode() != ISD::FREEZE;
+}]>;
+
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+
+ KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
+ KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
+ return (~Known0.Zero & ~Known1.Zero) == 0;
+}]>;
+
+def shiftMask8 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+ return isUnneededShiftMask(N, 3);
+}]>;
+
+def shiftMask16 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+ return isUnneededShiftMask(N, 4);
+}]>;
+
+def shiftMask32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+ return isUnneededShiftMask(N, 5);
+}]>;
+
+def shiftMask64 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+ return isUnneededShiftMask(N, 6);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments to auto generate BMI instructions.
+//===----------------------------------------------------------------------===//
+
+def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86or_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86xor_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def and_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86and_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86Fld : SDTypeProfile<1, 1, [SDTCisFP<0>,
+ SDTCisPtrTy<1>]>;
+def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>,
+ SDTCisPtrTy<1>]>;
+def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>;
+def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp,
+ [SDNPHasChain,SDNPCommutative]>;
+def any_X86fp80_add : PatFrags<(ops node:$lhs, node:$rhs),
+ [(X86strict_fp80_add node:$lhs, node:$rhs),
+ (X86fp80_add node:$lhs, node:$rhs)]>;
+
+def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86fp_cwd_set16 : SDNode<"X86ISD::FLDCW16m", SDTX86CwdLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86fpenv_get : SDNode<"X86ISD::FNSTENVm", SDTX86FPEnv,
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86fpenv_set : SDNode<"X86ISD::FLDENVm", SDTX86FPEnv,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
+
+def X86fstf32 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fst node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+def X86fstf64 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fst node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f64;
+}]>;
+def X86fstf80 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fst node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f80;
+}]>;
+
+def X86fldf32 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+def X86fldf64 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f64;
+}]>;
+def X86fldf80 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f80;
+}]>;
+
+def X86fild16 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def X86fild32 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def X86fist32 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fist node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fist node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
+ (X86fp_to_mem node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def X86fp_to_i32mem : PatFrag<(ops node:$val, node:$ptr),
+ (X86fp_to_mem node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def X86fp_to_i64mem : PatFrag<(ops node:$val, node:$ptr),
+ (X86fp_to_mem node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(-1.0);
+}]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 9c1f33e6f975..f86e15b3ed5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -1045,10 +1045,6 @@ def sse_load_f64 : PatFrags<(ops node:$ptr),
(v2f64 (X86vzload64 node:$ptr)),
(v2f64 (scalar_to_vector (loadf64 node:$ptr)))]>;
-def shmem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
-def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
-def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
-
def fp16imm0 : PatLeaf<(f16 fpimm), [{
return N->isExactlyValue(+0.0);
}]>;
@@ -1263,3 +1259,116 @@ def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3)
(X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
+
+def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86Vfpclasss node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
+def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86Vfpclass node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
+// These nodes use 'vnot' instead of 'not' to support vectors.
+def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
+def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
+
+// Used for matching masked operations. Ensures the operation part only has a
+// single use.
+def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
+ (vselect node:$mask, node:$src1, node:$src2), [{
+ return isProfitableToFormMaskedOp(N);
+}]>;
+
+def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
+ (X86selects node:$mask, node:$src1, node:$src2), [{
+ return isProfitableToFormMaskedOp(N);
+}]>;
+
+def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpms node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+
+// PatFrags that contain a select and a truncate op. The take operands in the
+// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
+// either to the multiclasses.
+def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
+ (vselect_mask node:$mask,
+ (trunc node:$src), node:$src0)>;
+def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
+ (vselect_mask node:$mask,
+ (X86vtruncs node:$src), node:$src0)>;
+def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
+ (vselect_mask node:$mask,
+ (X86vtruncus node:$src), node:$src0)>;
+
+def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86Vpshufbitqmb node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
+// This fragment treats X86cmpm as commutable to help match loads in both
+// operands for PCMPEQ.
+def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
+def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
+ (setcc node:$src1, node:$src2, SETGT)>;
+
+def X86pcmpm_imm : SDNodeXForm<setcc, [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ uint8_t SSECC = X86::getVPCMPImmForCond(CC);
+ return getI8Imm(SSECC, SDLoc(N));
+}]>;
+
+// Swapped operand version of the above.
+def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ uint8_t SSECC = X86::getVPCMPImmForCond(CC);
+ SSECC = X86::getSwappedVPCMPImm(SSECC);
+ return getI8Imm(SSECC, SDLoc(N));
+}]>;
+
+def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
+def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
+def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
+def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
+def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpm node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+
+def X86cmpm_imm_commute : SDNodeXForm<timm, [{
+ uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
+ return getI8Imm(Imm, SDLoc(N));
+}]>;
+
+def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86vpmaddwd node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
index 10a0ccdcb023..bc2d5ed1e17d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -58,26 +58,25 @@ static cl::opt<bool>
cl::desc("Disable fusing of spill code into instructions"),
cl::Hidden);
static cl::opt<bool>
-PrintFailedFusing("print-failed-fuse-candidates",
- cl::desc("Print instructions that the allocator wants to"
- " fuse, but the X86 backend currently can't"),
- cl::Hidden);
+ PrintFailedFusing("print-failed-fuse-candidates",
+ cl::desc("Print instructions that the allocator wants to"
+ " fuse, but the X86 backend currently can't"),
+ cl::Hidden);
static cl::opt<bool>
-ReMatPICStubLoad("remat-pic-stub-load",
- cl::desc("Re-materialize load from stub in PIC mode"),
- cl::init(false), cl::Hidden);
+ ReMatPICStubLoad("remat-pic-stub-load",
+ cl::desc("Re-materialize load from stub in PIC mode"),
+ cl::init(false), cl::Hidden);
static cl::opt<unsigned>
-PartialRegUpdateClearance("partial-reg-update-clearance",
- cl::desc("Clearance between two register writes "
- "for inserting XOR to avoid partial "
- "register update"),
- cl::init(64), cl::Hidden);
-static cl::opt<unsigned>
-UndefRegClearance("undef-reg-clearance",
- cl::desc("How many idle instructions we would like before "
- "certain undef register reads"),
- cl::init(128), cl::Hidden);
-
+ PartialRegUpdateClearance("partial-reg-update-clearance",
+ cl::desc("Clearance between two register writes "
+ "for inserting XOR to avoid partial "
+ "register update"),
+ cl::init(64), cl::Hidden);
+static cl::opt<unsigned> UndefRegClearance(
+ "undef-reg-clearance",
+ cl::desc("How many idle instructions we would like before "
+ "certain undef register reads"),
+ cl::init(128), cl::Hidden);
// Pin the vtable to this file.
void X86InstrInfo::anchor() {}
@@ -87,17 +86,46 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
: X86::ADJCALLSTACKDOWN32),
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
: X86::ADJCALLSTACKUP32),
- X86::CATCHRET,
- (STI.is64Bit() ? X86::RET64 : X86::RET32)),
- Subtarget(STI), RI(STI.getTargetTriple()) {
-}
+ X86::CATCHRET, (STI.is64Bit() ? X86::RET64 : X86::RET32)),
+ Subtarget(STI), RI(STI.getTargetTriple()) {}
+
+const TargetRegisterClass *
+X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const {
+ auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI, MF);
+ // If the target does not have egpr, then r16-r31 will be resereved for all
+ // instructions.
+ if (!RC || !Subtarget.hasEGPR())
+ return RC;
-bool
-X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- Register &SrcReg, Register &DstReg,
- unsigned &SubIdx) const {
+ if (X86II::canUseApxExtendedReg(MCID))
+ return RC;
+
+ switch (RC->getID()) {
+ default:
+ return RC;
+ case X86::GR8RegClassID:
+ return &X86::GR8_NOREX2RegClass;
+ case X86::GR16RegClassID:
+ return &X86::GR16_NOREX2RegClass;
+ case X86::GR32RegClassID:
+ return &X86::GR32_NOREX2RegClass;
+ case X86::GR64RegClassID:
+ return &X86::GR64_NOREX2RegClass;
+ case X86::GR32_NOSPRegClassID:
+ return &X86::GR32_NOREX2_NOSPRegClass;
+ case X86::GR64_NOSPRegClassID:
+ return &X86::GR64_NOREX2_NOSPRegClass;
+ }
+}
+
+bool X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ Register &SrcReg, Register &DstReg,
+ unsigned &SubIdx) const {
switch (MI.getOpcode()) {
- default: break;
+ default:
+ break;
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
@@ -118,7 +146,8 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
+ default:
+ llvm_unreachable("Unreachable!");
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
@@ -185,7 +214,7 @@ bool X86InstrInfo::isDataInvariant(MachineInstr &MI) {
isSBB(Opcode) || isSUB(Opcode) || isXOR(Opcode))
return true;
// Arithmetic with just 32-bit and 64-bit variants and no immediates.
- if (isADCX(Opcode) || isADOX(Opcode) || isANDN(Opcode))
+ if (isANDN(Opcode))
return true;
// Unary arithmetic operations.
if (isDEC(Opcode) || isINC(Opcode) || isNEG(Opcode))
@@ -284,14 +313,10 @@ bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) {
case X86::ADC16rm:
case X86::ADC32rm:
case X86::ADC64rm:
- case X86::ADCX32rm:
- case X86::ADCX64rm:
case X86::ADD8rm:
case X86::ADD16rm:
case X86::ADD32rm:
case X86::ADD64rm:
- case X86::ADOX32rm:
- case X86::ADOX64rm:
case X86::AND8rm:
case X86::AND16rm:
case X86::AND32rm:
@@ -414,8 +439,7 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
const MachineBasicBlock *MBB = MI.getParent();
auto I = ++MachineBasicBlock::const_iterator(MI);
for (auto E = MBB->end(); I != E; ++I) {
- if (I->getOpcode() == getCallFrameDestroyOpcode() ||
- I->isCall())
+ if (I->getOpcode() == getCallFrameDestroyOpcode() || I->isCall())
break;
}
@@ -468,10 +492,12 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
return false;
case X86::MOV8rm:
case X86::KMOVBkm:
+ case X86::KMOVBkm_EVEX:
MemBytes = 1;
return true;
case X86::MOV16rm:
case X86::KMOVWkm:
+ case X86::KMOVWkm_EVEX:
case X86::VMOVSHZrm:
case X86::VMOVSHZrm_alt:
MemBytes = 2;
@@ -484,6 +510,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
case X86::VMOVSSZrm:
case X86::VMOVSSZrm_alt:
case X86::KMOVDkm:
+ case X86::KMOVDkm_EVEX:
MemBytes = 4;
return true;
case X86::MOV64rm:
@@ -497,6 +524,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::KMOVQkm:
+ case X86::KMOVQkm_EVEX:
MemBytes = 8;
return true;
case X86::MOVAPSrm:
@@ -566,10 +594,12 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
return false;
case X86::MOV8mr:
case X86::KMOVBmk:
+ case X86::KMOVBmk_EVEX:
MemBytes = 1;
return true;
case X86::MOV16mr:
case X86::KMOVWmk:
+ case X86::KMOVWmk_EVEX:
case X86::VMOVSHZmr:
MemBytes = 2;
return true;
@@ -578,6 +608,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
case X86::VMOVSSmr:
case X86::VMOVSSZmr:
case X86::KMOVDmk:
+ case X86::KMOVDmk_EVEX:
MemBytes = 4;
return true;
case X86::MOV64mr:
@@ -589,6 +620,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
case X86::MMX_MOVQ64mr:
case X86::MMX_MOVNTQmr:
case X86::KMOVQmk:
+ case X86::KMOVQmk_EVEX:
MemBytes = 8;
return true;
case X86::MOVAPSmr:
@@ -729,7 +761,8 @@ static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) {
return false;
bool isPICBase = false;
for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg),
- E = MRI.def_instr_end(); I != E; ++I) {
+ E = MRI.def_instr_end();
+ I != E; ++I) {
MachineInstr *DefMI = &*I;
if (DefMI->getOpcode() != X86::MOVPC32r)
return false;
@@ -747,8 +780,16 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(
// flag set.
llvm_unreachable("Unknown rematerializable operation!");
break;
-
+ case X86::IMPLICIT_DEF:
+ // Defer to generic logic.
+ break;
case X86::LOAD_STACK_GUARD:
+ case X86::LD_Fp032:
+ case X86::LD_Fp064:
+ case X86::LD_Fp080:
+ case X86::LD_Fp132:
+ case X86::LD_Fp164:
+ case X86::LD_Fp180:
case X86::AVX1_SETALLONES:
case X86::AVX2_SETALLONES:
case X86::AVX512_128_SET0:
@@ -871,13 +912,14 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(
if (BaseReg == 0 || BaseReg == X86::RIP)
return true;
// Allow re-materialization of PIC load.
- if (!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal())
- return false;
- const MachineFunction &MF = *MI.getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- return regIsPICBase(BaseReg, MRI);
+ if (!(!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal())) {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (regIsPICBase(BaseReg, MRI))
+ return true;
+ }
}
- return false;
+ break;
}
case X86::LEA32r:
@@ -895,11 +937,13 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(
// Allow re-materialization of lea PICBase + x.
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- return regIsPICBase(BaseReg, MRI);
+ if (regIsPICBase(BaseReg, MRI))
+ return true;
}
- return false;
+ break;
}
}
+ return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
@@ -914,9 +958,15 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
// effects.
int Value;
switch (Orig.getOpcode()) {
- case X86::MOV32r0: Value = 0; break;
- case X86::MOV32r1: Value = 1; break;
- case X86::MOV32r_1: Value = -1; break;
+ case X86::MOV32r0:
+ Value = 0;
+ break;
+ case X86::MOV32r1:
+ Value = 1;
+ break;
+ case X86::MOV32r_1:
+ Value = -1;
+ break;
default:
llvm_unreachable("Unexpected instruction!");
}
@@ -937,8 +987,8 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const {
for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() &&
- MO.getReg() == X86::EFLAGS && !MO.isDead()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS &&
+ !MO.isDead()) {
return true;
}
}
@@ -1093,8 +1143,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
if (AllowSP) {
RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
} else {
- RC = Opc != X86::LEA32r ?
- &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
+ RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
}
Register SrcReg = Src.getReg();
isKill = MI.killsRegister(SrcReg);
@@ -1157,7 +1206,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
// We handle 8-bit adds and various 16-bit opcodes in the switch below.
MachineBasicBlock &MBB = *MI.getParent();
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
- assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
+ assert((Is8BitOp ||
+ RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
*RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
"Unexpected type for LEA transform");
@@ -1203,7 +1253,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
switch (MIOpc) {
- default: llvm_unreachable("Unreachable!");
+ default:
+ llvm_unreachable("Unreachable!");
case X86::SHL8ri:
case X86::SHL16ri: {
unsigned ShAmt = MI.getOperand(2).getImm();
@@ -1361,11 +1412,13 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
unsigned NumRegOperands = 2;
unsigned MIOpc = MI.getOpcode();
switch (MIOpc) {
- default: llvm_unreachable("Unreachable!");
+ default:
+ llvm_unreachable("Unreachable!");
case X86::SHL64ri: {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
+ if (!isTruncatedShiftCountForLEA(ShAmt))
+ return nullptr;
// LEA can't handle RSP.
if (Src.getReg().isVirtual() && !MF.getRegInfo().constrainRegClass(
@@ -1384,7 +1437,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
case X86::SHL32ri: {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
+ if (!isTruncatedShiftCountForLEA(ShAmt))
+ return nullptr;
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
@@ -1395,14 +1449,13 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
ImplicitOp, LV, LIS))
return nullptr;
- MachineInstrBuilder MIB =
- BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .add(Dest)
- .addReg(0)
- .addImm(1LL << ShAmt)
- .addReg(SrcReg, getKillRegState(isKill))
- .addImm(0)
- .addReg(0);
+ MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .addReg(0)
+ .addImm(1LL << ShAmt)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(0)
+ .addReg(0);
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
NewMI = MIB;
@@ -1425,18 +1478,18 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
case X86::INC64r:
case X86::INC32r: {
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
- unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
- (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ unsigned Opc = MIOpc == X86::INC64r
+ ? X86::LEA64r
+ : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
ImplicitOp, LV, LIS))
return nullptr;
- MachineInstrBuilder MIB =
- BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .add(Dest)
- .addReg(SrcReg, getKillRegState(isKill));
+ MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
@@ -1450,8 +1503,9 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
case X86::DEC64r:
case X86::DEC32r: {
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
- unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
- : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ unsigned Opc = MIOpc == X86::DEC64r
+ ? X86::LEA64r
+ : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
@@ -1616,8 +1670,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!");
- MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(),
- get(X86::LEA64r)).add(Dest).add(Src);
+ MachineInstrBuilder MIB =
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src);
NewMI = addOffset(MIB, -Imm);
break;
}
@@ -1628,18 +1682,30 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
case X86::VMOVDQU16Z128rmk:
case X86::VMOVDQU16Z256rmk:
case X86::VMOVDQU16Zrmk:
- case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk:
- case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk:
- case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk:
- case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk:
- case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk:
- case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk:
- case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk:
- case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk:
- case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
- case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
- case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
- case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk:
+ case X86::VMOVDQU32Z128rmk:
+ case X86::VMOVDQA32Z128rmk:
+ case X86::VMOVDQU32Z256rmk:
+ case X86::VMOVDQA32Z256rmk:
+ case X86::VMOVDQU32Zrmk:
+ case X86::VMOVDQA32Zrmk:
+ case X86::VMOVDQU64Z128rmk:
+ case X86::VMOVDQA64Z128rmk:
+ case X86::VMOVDQU64Z256rmk:
+ case X86::VMOVDQA64Z256rmk:
+ case X86::VMOVDQU64Zrmk:
+ case X86::VMOVDQA64Zrmk:
+ case X86::VMOVUPDZ128rmk:
+ case X86::VMOVAPDZ128rmk:
+ case X86::VMOVUPDZ256rmk:
+ case X86::VMOVAPDZ256rmk:
+ case X86::VMOVUPDZrmk:
+ case X86::VMOVAPDZrmk:
+ case X86::VMOVUPSZ128rmk:
+ case X86::VMOVAPSZ128rmk:
+ case X86::VMOVUPSZ256rmk:
+ case X86::VMOVAPSZ256rmk:
+ case X86::VMOVUPSZrmk:
+ case X86::VMOVAPSZrmk:
case X86::VBROADCASTSDZ256rmk:
case X86::VBROADCASTSDZrmk:
case X86::VBROADCASTSSZ128rmk:
@@ -1653,59 +1719,142 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
case X86::VPBROADCASTQZrmk: {
unsigned Opc;
switch (MIOpc) {
- default: llvm_unreachable("Unreachable!");
- case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
- case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
- case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
- case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
- case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
- case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
- case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
- case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
- case X86::VBROADCASTSDZ256rmk: Opc = X86::VBLENDMPDZ256rmbk; break;
- case X86::VBROADCASTSDZrmk: Opc = X86::VBLENDMPDZrmbk; break;
- case X86::VBROADCASTSSZ128rmk: Opc = X86::VBLENDMPSZ128rmbk; break;
- case X86::VBROADCASTSSZ256rmk: Opc = X86::VBLENDMPSZ256rmbk; break;
- case X86::VBROADCASTSSZrmk: Opc = X86::VBLENDMPSZrmbk; break;
- case X86::VPBROADCASTDZ128rmk: Opc = X86::VPBLENDMDZ128rmbk; break;
- case X86::VPBROADCASTDZ256rmk: Opc = X86::VPBLENDMDZ256rmbk; break;
- case X86::VPBROADCASTDZrmk: Opc = X86::VPBLENDMDZrmbk; break;
- case X86::VPBROADCASTQZ128rmk: Opc = X86::VPBLENDMQZ128rmbk; break;
- case X86::VPBROADCASTQZ256rmk: Opc = X86::VPBLENDMQZ256rmbk; break;
- case X86::VPBROADCASTQZrmk: Opc = X86::VPBLENDMQZrmbk; break;
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::VMOVDQU8Z128rmk:
+ Opc = X86::VPBLENDMBZ128rmk;
+ break;
+ case X86::VMOVDQU8Z256rmk:
+ Opc = X86::VPBLENDMBZ256rmk;
+ break;
+ case X86::VMOVDQU8Zrmk:
+ Opc = X86::VPBLENDMBZrmk;
+ break;
+ case X86::VMOVDQU16Z128rmk:
+ Opc = X86::VPBLENDMWZ128rmk;
+ break;
+ case X86::VMOVDQU16Z256rmk:
+ Opc = X86::VPBLENDMWZ256rmk;
+ break;
+ case X86::VMOVDQU16Zrmk:
+ Opc = X86::VPBLENDMWZrmk;
+ break;
+ case X86::VMOVDQU32Z128rmk:
+ Opc = X86::VPBLENDMDZ128rmk;
+ break;
+ case X86::VMOVDQU32Z256rmk:
+ Opc = X86::VPBLENDMDZ256rmk;
+ break;
+ case X86::VMOVDQU32Zrmk:
+ Opc = X86::VPBLENDMDZrmk;
+ break;
+ case X86::VMOVDQU64Z128rmk:
+ Opc = X86::VPBLENDMQZ128rmk;
+ break;
+ case X86::VMOVDQU64Z256rmk:
+ Opc = X86::VPBLENDMQZ256rmk;
+ break;
+ case X86::VMOVDQU64Zrmk:
+ Opc = X86::VPBLENDMQZrmk;
+ break;
+ case X86::VMOVUPDZ128rmk:
+ Opc = X86::VBLENDMPDZ128rmk;
+ break;
+ case X86::VMOVUPDZ256rmk:
+ Opc = X86::VBLENDMPDZ256rmk;
+ break;
+ case X86::VMOVUPDZrmk:
+ Opc = X86::VBLENDMPDZrmk;
+ break;
+ case X86::VMOVUPSZ128rmk:
+ Opc = X86::VBLENDMPSZ128rmk;
+ break;
+ case X86::VMOVUPSZ256rmk:
+ Opc = X86::VBLENDMPSZ256rmk;
+ break;
+ case X86::VMOVUPSZrmk:
+ Opc = X86::VBLENDMPSZrmk;
+ break;
+ case X86::VMOVDQA32Z128rmk:
+ Opc = X86::VPBLENDMDZ128rmk;
+ break;
+ case X86::VMOVDQA32Z256rmk:
+ Opc = X86::VPBLENDMDZ256rmk;
+ break;
+ case X86::VMOVDQA32Zrmk:
+ Opc = X86::VPBLENDMDZrmk;
+ break;
+ case X86::VMOVDQA64Z128rmk:
+ Opc = X86::VPBLENDMQZ128rmk;
+ break;
+ case X86::VMOVDQA64Z256rmk:
+ Opc = X86::VPBLENDMQZ256rmk;
+ break;
+ case X86::VMOVDQA64Zrmk:
+ Opc = X86::VPBLENDMQZrmk;
+ break;
+ case X86::VMOVAPDZ128rmk:
+ Opc = X86::VBLENDMPDZ128rmk;
+ break;
+ case X86::VMOVAPDZ256rmk:
+ Opc = X86::VBLENDMPDZ256rmk;
+ break;
+ case X86::VMOVAPDZrmk:
+ Opc = X86::VBLENDMPDZrmk;
+ break;
+ case X86::VMOVAPSZ128rmk:
+ Opc = X86::VBLENDMPSZ128rmk;
+ break;
+ case X86::VMOVAPSZ256rmk:
+ Opc = X86::VBLENDMPSZ256rmk;
+ break;
+ case X86::VMOVAPSZrmk:
+ Opc = X86::VBLENDMPSZrmk;
+ break;
+ case X86::VBROADCASTSDZ256rmk:
+ Opc = X86::VBLENDMPDZ256rmbk;
+ break;
+ case X86::VBROADCASTSDZrmk:
+ Opc = X86::VBLENDMPDZrmbk;
+ break;
+ case X86::VBROADCASTSSZ128rmk:
+ Opc = X86::VBLENDMPSZ128rmbk;
+ break;
+ case X86::VBROADCASTSSZ256rmk:
+ Opc = X86::VBLENDMPSZ256rmbk;
+ break;
+ case X86::VBROADCASTSSZrmk:
+ Opc = X86::VBLENDMPSZrmbk;
+ break;
+ case X86::VPBROADCASTDZ128rmk:
+ Opc = X86::VPBLENDMDZ128rmbk;
+ break;
+ case X86::VPBROADCASTDZ256rmk:
+ Opc = X86::VPBLENDMDZ256rmbk;
+ break;
+ case X86::VPBROADCASTDZrmk:
+ Opc = X86::VPBLENDMDZrmbk;
+ break;
+ case X86::VPBROADCASTQZ128rmk:
+ Opc = X86::VPBLENDMQZ128rmbk;
+ break;
+ case X86::VPBROADCASTQZ256rmk:
+ Opc = X86::VPBLENDMQZ256rmbk;
+ break;
+ case X86::VPBROADCASTQZrmk:
+ Opc = X86::VPBLENDMQZrmbk;
+ break;
}
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .add(Dest)
- .add(MI.getOperand(2))
- .add(Src)
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5))
- .add(MI.getOperand(6))
- .add(MI.getOperand(7));
+ .add(Dest)
+ .add(MI.getOperand(2))
+ .add(Src)
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(6))
+ .add(MI.getOperand(7));
NumRegOperands = 4;
break;
}
@@ -1716,66 +1865,140 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
case X86::VMOVDQU16Z128rrk:
case X86::VMOVDQU16Z256rrk:
case X86::VMOVDQU16Zrrk:
- case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk:
- case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk:
- case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk:
- case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk:
- case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk:
- case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk:
- case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk:
- case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk:
- case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk:
- case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk:
- case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk:
- case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: {
+ case X86::VMOVDQU32Z128rrk:
+ case X86::VMOVDQA32Z128rrk:
+ case X86::VMOVDQU32Z256rrk:
+ case X86::VMOVDQA32Z256rrk:
+ case X86::VMOVDQU32Zrrk:
+ case X86::VMOVDQA32Zrrk:
+ case X86::VMOVDQU64Z128rrk:
+ case X86::VMOVDQA64Z128rrk:
+ case X86::VMOVDQU64Z256rrk:
+ case X86::VMOVDQA64Z256rrk:
+ case X86::VMOVDQU64Zrrk:
+ case X86::VMOVDQA64Zrrk:
+ case X86::VMOVUPDZ128rrk:
+ case X86::VMOVAPDZ128rrk:
+ case X86::VMOVUPDZ256rrk:
+ case X86::VMOVAPDZ256rrk:
+ case X86::VMOVUPDZrrk:
+ case X86::VMOVAPDZrrk:
+ case X86::VMOVUPSZ128rrk:
+ case X86::VMOVAPSZ128rrk:
+ case X86::VMOVUPSZ256rrk:
+ case X86::VMOVAPSZ256rrk:
+ case X86::VMOVUPSZrrk:
+ case X86::VMOVAPSZrrk: {
unsigned Opc;
switch (MIOpc) {
- default: llvm_unreachable("Unreachable!");
- case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break;
- case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break;
- case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break;
- case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break;
- case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break;
- case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break;
- case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
- case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
- case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
- case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
- case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
- case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
- case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
- case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
- case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
- case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
- case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
- case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
- case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
- case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
- case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
- case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
- case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
- case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
- case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
- case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
- case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
- case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
- case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
- case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::VMOVDQU8Z128rrk:
+ Opc = X86::VPBLENDMBZ128rrk;
+ break;
+ case X86::VMOVDQU8Z256rrk:
+ Opc = X86::VPBLENDMBZ256rrk;
+ break;
+ case X86::VMOVDQU8Zrrk:
+ Opc = X86::VPBLENDMBZrrk;
+ break;
+ case X86::VMOVDQU16Z128rrk:
+ Opc = X86::VPBLENDMWZ128rrk;
+ break;
+ case X86::VMOVDQU16Z256rrk:
+ Opc = X86::VPBLENDMWZ256rrk;
+ break;
+ case X86::VMOVDQU16Zrrk:
+ Opc = X86::VPBLENDMWZrrk;
+ break;
+ case X86::VMOVDQU32Z128rrk:
+ Opc = X86::VPBLENDMDZ128rrk;
+ break;
+ case X86::VMOVDQU32Z256rrk:
+ Opc = X86::VPBLENDMDZ256rrk;
+ break;
+ case X86::VMOVDQU32Zrrk:
+ Opc = X86::VPBLENDMDZrrk;
+ break;
+ case X86::VMOVDQU64Z128rrk:
+ Opc = X86::VPBLENDMQZ128rrk;
+ break;
+ case X86::VMOVDQU64Z256rrk:
+ Opc = X86::VPBLENDMQZ256rrk;
+ break;
+ case X86::VMOVDQU64Zrrk:
+ Opc = X86::VPBLENDMQZrrk;
+ break;
+ case X86::VMOVUPDZ128rrk:
+ Opc = X86::VBLENDMPDZ128rrk;
+ break;
+ case X86::VMOVUPDZ256rrk:
+ Opc = X86::VBLENDMPDZ256rrk;
+ break;
+ case X86::VMOVUPDZrrk:
+ Opc = X86::VBLENDMPDZrrk;
+ break;
+ case X86::VMOVUPSZ128rrk:
+ Opc = X86::VBLENDMPSZ128rrk;
+ break;
+ case X86::VMOVUPSZ256rrk:
+ Opc = X86::VBLENDMPSZ256rrk;
+ break;
+ case X86::VMOVUPSZrrk:
+ Opc = X86::VBLENDMPSZrrk;
+ break;
+ case X86::VMOVDQA32Z128rrk:
+ Opc = X86::VPBLENDMDZ128rrk;
+ break;
+ case X86::VMOVDQA32Z256rrk:
+ Opc = X86::VPBLENDMDZ256rrk;
+ break;
+ case X86::VMOVDQA32Zrrk:
+ Opc = X86::VPBLENDMDZrrk;
+ break;
+ case X86::VMOVDQA64Z128rrk:
+ Opc = X86::VPBLENDMQZ128rrk;
+ break;
+ case X86::VMOVDQA64Z256rrk:
+ Opc = X86::VPBLENDMQZ256rrk;
+ break;
+ case X86::VMOVDQA64Zrrk:
+ Opc = X86::VPBLENDMQZrrk;
+ break;
+ case X86::VMOVAPDZ128rrk:
+ Opc = X86::VBLENDMPDZ128rrk;
+ break;
+ case X86::VMOVAPDZ256rrk:
+ Opc = X86::VBLENDMPDZ256rrk;
+ break;
+ case X86::VMOVAPDZrrk:
+ Opc = X86::VBLENDMPDZrrk;
+ break;
+ case X86::VMOVAPSZ128rrk:
+ Opc = X86::VBLENDMPSZ128rrk;
+ break;
+ case X86::VMOVAPSZ256rrk:
+ Opc = X86::VBLENDMPSZ256rrk;
+ break;
+ case X86::VMOVAPSZrrk:
+ Opc = X86::VBLENDMPSZrrk;
+ break;
}
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .add(Dest)
- .add(MI.getOperand(2))
- .add(Src)
- .add(MI.getOperand(3));
+ .add(Dest)
+ .add(MI.getOperand(2))
+ .add(Src)
+ .add(MI.getOperand(3));
NumRegOperands = 4;
break;
}
}
- if (!NewMI) return nullptr;
+ if (!NewMI)
+ return nullptr;
- if (LV) { // Update live variables
+ if (LV) { // Update live variables
for (unsigned I = 0; I < NumRegOperands; ++I) {
MachineOperand &Op = MI.getOperand(I);
if (Op.isReg() && (Op.isDead() || Op.isKill()))
@@ -1841,8 +2064,8 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
"Intrinsic instructions can't commute operand 1");
// Determine which case this commute is or if it can't be done.
- unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
- SrcOpIdx2);
+ unsigned Case =
+ getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, SrcOpIdx2);
assert(Case < 3 && "Unexpected case number!");
// Define the FMA forms mapping array that helps to map input FMA form
@@ -1852,22 +2075,21 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
const unsigned Form213Index = 1;
const unsigned Form231Index = 2;
static const unsigned FormMapping[][3] = {
- // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
- // FMA132 A, C, b; ==> FMA231 C, A, b;
- // FMA213 B, A, c; ==> FMA213 A, B, c;
- // FMA231 C, A, b; ==> FMA132 A, C, b;
- { Form231Index, Form213Index, Form132Index },
- // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
- // FMA132 A, c, B; ==> FMA132 B, c, A;
- // FMA213 B, a, C; ==> FMA231 C, a, B;
- // FMA231 C, a, B; ==> FMA213 B, a, C;
- { Form132Index, Form231Index, Form213Index },
- // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
- // FMA132 a, C, B; ==> FMA213 a, B, C;
- // FMA213 b, A, C; ==> FMA132 b, C, A;
- // FMA231 c, A, B; ==> FMA231 c, B, A;
- { Form213Index, Form132Index, Form231Index }
- };
+ // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
+ // FMA132 A, C, b; ==> FMA231 C, A, b;
+ // FMA213 B, A, c; ==> FMA213 A, B, c;
+ // FMA231 C, A, b; ==> FMA132 A, C, b;
+ {Form231Index, Form213Index, Form132Index},
+ // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
+ // FMA132 A, c, B; ==> FMA132 B, c, A;
+ // FMA213 B, a, C; ==> FMA231 C, a, B;
+ // FMA231 C, a, B; ==> FMA213 B, a, C;
+ {Form132Index, Form231Index, Form213Index},
+ // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
+ // FMA132 a, C, B; ==> FMA213 a, B, C;
+ // FMA213 b, A, C; ==> FMA132 b, C, A;
+ // FMA231 c, A, B; ==> FMA231 c, B, A;
+ {Form213Index, Form132Index, Form231Index}};
unsigned FMAForms[3];
FMAForms[0] = FMA3Group.get132Opcode();
@@ -1885,63 +2107,86 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
unsigned SrcOpIdx2) {
// Determine which case this commute is or if it can't be done.
- unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
- SrcOpIdx2);
+ unsigned Case =
+ getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, SrcOpIdx2);
assert(Case < 3 && "Unexpected case value!");
// For each case we need to swap two pairs of bits in the final immediate.
static const uint8_t SwapMasks[3][4] = {
- { 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5.
- { 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6.
- { 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6.
+ {0x04, 0x10, 0x08, 0x20}, // Swap bits 2/4 and 3/5.
+ {0x02, 0x10, 0x08, 0x40}, // Swap bits 1/4 and 3/6.
+ {0x02, 0x04, 0x20, 0x40}, // Swap bits 1/2 and 5/6.
};
- uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm();
+ uint8_t Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
// Clear out the bits we are swapping.
uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
SwapMasks[Case][2] | SwapMasks[Case][3]);
// If the immediate had a bit of the pair set, then set the opposite bit.
- if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1];
- if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0];
- if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3];
- if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2];
- MI.getOperand(MI.getNumOperands()-1).setImm(NewImm);
+ if (Imm & SwapMasks[Case][0])
+ NewImm |= SwapMasks[Case][1];
+ if (Imm & SwapMasks[Case][1])
+ NewImm |= SwapMasks[Case][0];
+ if (Imm & SwapMasks[Case][2])
+ NewImm |= SwapMasks[Case][3];
+ if (Imm & SwapMasks[Case][3])
+ NewImm |= SwapMasks[Case][2];
+ MI.getOperand(MI.getNumOperands() - 1).setImm(NewImm);
}
// Returns true if this is a VPERMI2 or VPERMT2 instruction that can be
// commuted.
static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
-#define VPERM_CASES(Suffix) \
- case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \
- case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \
- case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \
- case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \
- case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \
- case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \
- case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \
- case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \
- case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \
- case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \
- case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \
- case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz:
-
-#define VPERM_CASES_BROADCAST(Suffix) \
- VPERM_CASES(Suffix) \
- case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \
- case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \
- case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \
- case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \
- case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \
- case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz:
+#define VPERM_CASES(Suffix) \
+ case X86::VPERMI2##Suffix##Z128rr: \
+ case X86::VPERMT2##Suffix##Z128rr: \
+ case X86::VPERMI2##Suffix##Z256rr: \
+ case X86::VPERMT2##Suffix##Z256rr: \
+ case X86::VPERMI2##Suffix##Zrr: \
+ case X86::VPERMT2##Suffix##Zrr: \
+ case X86::VPERMI2##Suffix##Z128rm: \
+ case X86::VPERMT2##Suffix##Z128rm: \
+ case X86::VPERMI2##Suffix##Z256rm: \
+ case X86::VPERMT2##Suffix##Z256rm: \
+ case X86::VPERMI2##Suffix##Zrm: \
+ case X86::VPERMT2##Suffix##Zrm: \
+ case X86::VPERMI2##Suffix##Z128rrkz: \
+ case X86::VPERMT2##Suffix##Z128rrkz: \
+ case X86::VPERMI2##Suffix##Z256rrkz: \
+ case X86::VPERMT2##Suffix##Z256rrkz: \
+ case X86::VPERMI2##Suffix##Zrrkz: \
+ case X86::VPERMT2##Suffix##Zrrkz: \
+ case X86::VPERMI2##Suffix##Z128rmkz: \
+ case X86::VPERMT2##Suffix##Z128rmkz: \
+ case X86::VPERMI2##Suffix##Z256rmkz: \
+ case X86::VPERMT2##Suffix##Z256rmkz: \
+ case X86::VPERMI2##Suffix##Zrmkz: \
+ case X86::VPERMT2##Suffix##Zrmkz:
+
+#define VPERM_CASES_BROADCAST(Suffix) \
+ VPERM_CASES(Suffix) \
+ case X86::VPERMI2##Suffix##Z128rmb: \
+ case X86::VPERMT2##Suffix##Z128rmb: \
+ case X86::VPERMI2##Suffix##Z256rmb: \
+ case X86::VPERMT2##Suffix##Z256rmb: \
+ case X86::VPERMI2##Suffix##Zrmb: \
+ case X86::VPERMT2##Suffix##Zrmb: \
+ case X86::VPERMI2##Suffix##Z128rmbkz: \
+ case X86::VPERMT2##Suffix##Z128rmbkz: \
+ case X86::VPERMI2##Suffix##Z256rmbkz: \
+ case X86::VPERMT2##Suffix##Z256rmbkz: \
+ case X86::VPERMI2##Suffix##Zrmbkz: \
+ case X86::VPERMT2##Suffix##Zrmbkz:
switch (Opcode) {
- default: return false;
- VPERM_CASES(B)
- VPERM_CASES_BROADCAST(D)
- VPERM_CASES_BROADCAST(PD)
- VPERM_CASES_BROADCAST(PS)
- VPERM_CASES_BROADCAST(Q)
- VPERM_CASES(W)
+ default:
+ return false;
+ VPERM_CASES(B)
+ VPERM_CASES_BROADCAST(D)
+ VPERM_CASES_BROADCAST(PD)
+ VPERM_CASES_BROADCAST(PS)
+ VPERM_CASES_BROADCAST(Q)
+ VPERM_CASES(W)
return true;
}
#undef VPERM_CASES_BROADCAST
@@ -1951,42 +2196,60 @@ static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
// from the I opcode to the T opcode and vice versa.
static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
-#define VPERM_CASES(Orig, New) \
- case X86::Orig##128rr: return X86::New##128rr; \
- case X86::Orig##128rrkz: return X86::New##128rrkz; \
- case X86::Orig##128rm: return X86::New##128rm; \
- case X86::Orig##128rmkz: return X86::New##128rmkz; \
- case X86::Orig##256rr: return X86::New##256rr; \
- case X86::Orig##256rrkz: return X86::New##256rrkz; \
- case X86::Orig##256rm: return X86::New##256rm; \
- case X86::Orig##256rmkz: return X86::New##256rmkz; \
- case X86::Orig##rr: return X86::New##rr; \
- case X86::Orig##rrkz: return X86::New##rrkz; \
- case X86::Orig##rm: return X86::New##rm; \
- case X86::Orig##rmkz: return X86::New##rmkz;
-
-#define VPERM_CASES_BROADCAST(Orig, New) \
- VPERM_CASES(Orig, New) \
- case X86::Orig##128rmb: return X86::New##128rmb; \
- case X86::Orig##128rmbkz: return X86::New##128rmbkz; \
- case X86::Orig##256rmb: return X86::New##256rmb; \
- case X86::Orig##256rmbkz: return X86::New##256rmbkz; \
- case X86::Orig##rmb: return X86::New##rmb; \
- case X86::Orig##rmbkz: return X86::New##rmbkz;
+#define VPERM_CASES(Orig, New) \
+ case X86::Orig##Z128rr: \
+ return X86::New##Z128rr; \
+ case X86::Orig##Z128rrkz: \
+ return X86::New##Z128rrkz; \
+ case X86::Orig##Z128rm: \
+ return X86::New##Z128rm; \
+ case X86::Orig##Z128rmkz: \
+ return X86::New##Z128rmkz; \
+ case X86::Orig##Z256rr: \
+ return X86::New##Z256rr; \
+ case X86::Orig##Z256rrkz: \
+ return X86::New##Z256rrkz; \
+ case X86::Orig##Z256rm: \
+ return X86::New##Z256rm; \
+ case X86::Orig##Z256rmkz: \
+ return X86::New##Z256rmkz; \
+ case X86::Orig##Zrr: \
+ return X86::New##Zrr; \
+ case X86::Orig##Zrrkz: \
+ return X86::New##Zrrkz; \
+ case X86::Orig##Zrm: \
+ return X86::New##Zrm; \
+ case X86::Orig##Zrmkz: \
+ return X86::New##Zrmkz;
+
+#define VPERM_CASES_BROADCAST(Orig, New) \
+ VPERM_CASES(Orig, New) \
+ case X86::Orig##Z128rmb: \
+ return X86::New##Z128rmb; \
+ case X86::Orig##Z128rmbkz: \
+ return X86::New##Z128rmbkz; \
+ case X86::Orig##Z256rmb: \
+ return X86::New##Z256rmb; \
+ case X86::Orig##Z256rmbkz: \
+ return X86::New##Z256rmbkz; \
+ case X86::Orig##Zrmb: \
+ return X86::New##Zrmb; \
+ case X86::Orig##Zrmbkz: \
+ return X86::New##Zrmbkz;
switch (Opcode) {
- VPERM_CASES(VPERMI2B, VPERMT2B)
- VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
- VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
- VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
- VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
- VPERM_CASES(VPERMI2W, VPERMT2W)
- VPERM_CASES(VPERMT2B, VPERMI2B)
- VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
- VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
- VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
- VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
- VPERM_CASES(VPERMT2W, VPERMI2W)
+ VPERM_CASES(VPERMI2B, VPERMT2B)
+ VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
+ VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
+ VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
+ VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
+ VPERM_CASES(VPERMI2W, VPERMT2W)
+ VPERM_CASES(VPERMT2B, VPERMI2B)
+ VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
+ VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
+ VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
+ VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
+ VPERM_CASES(VPERMT2W, VPERMI2W)
}
llvm_unreachable("Unreachable!");
@@ -2009,17 +2272,37 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
- case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
+ case X86::SHLD64rri8: { // A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B,
+ // (64-I)
unsigned Opc;
unsigned Size;
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
- case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
- case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
- case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
- case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
- case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::SHRD16rri8:
+ Size = 16;
+ Opc = X86::SHLD16rri8;
+ break;
+ case X86::SHLD16rri8:
+ Size = 16;
+ Opc = X86::SHRD16rri8;
+ break;
+ case X86::SHRD32rri8:
+ Size = 32;
+ Opc = X86::SHLD32rri8;
+ break;
+ case X86::SHLD32rri8:
+ Size = 32;
+ Opc = X86::SHRD32rri8;
+ break;
+ case X86::SHRD64rri8:
+ Size = 64;
+ Opc = X86::SHLD64rri8;
+ break;
+ case X86::SHLD64rri8:
+ Size = 64;
+ Opc = X86::SHRD64rri8;
+ break;
}
unsigned Amt = MI.getOperand(3).getImm();
auto &WorkingMI = cloneIfNew(MI);
@@ -2047,19 +2330,32 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (MI.getParent()->getParent()->getFunction().hasOptSize()) {
unsigned Mask, Opc;
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::BLENDPDrri: Opc = X86::MOVSDrr; Mask = 0x03; break;
- case X86::BLENDPSrri: Opc = X86::MOVSSrr; Mask = 0x0F; break;
- case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break;
- case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break;
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::BLENDPDrri:
+ Opc = X86::MOVSDrr;
+ Mask = 0x03;
+ break;
+ case X86::BLENDPSrri:
+ Opc = X86::MOVSSrr;
+ Mask = 0x0F;
+ break;
+ case X86::VBLENDPDrri:
+ Opc = X86::VMOVSDrr;
+ Mask = 0x03;
+ break;
+ case X86::VBLENDPSrri:
+ Opc = X86::VMOVSSrr;
+ Mask = 0x0F;
+ break;
}
if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
WorkingMI.removeOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI,
- /*NewMI=*/false,
- OpIdx1, OpIdx2);
+ /*NewMI=*/false, OpIdx1,
+ OpIdx2);
}
}
[[fallthrough]];
@@ -2069,21 +2365,44 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VPBLENDDrri:
case X86::VPBLENDWrri:
case X86::VPBLENDDYrri:
- case X86::VPBLENDWYrri:{
+ case X86::VPBLENDWYrri: {
int8_t Mask;
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::BLENDPDrri: Mask = (int8_t)0x03; break;
- case X86::BLENDPSrri: Mask = (int8_t)0x0F; break;
- case X86::PBLENDWrri: Mask = (int8_t)0xFF; break;
- case X86::VBLENDPDrri: Mask = (int8_t)0x03; break;
- case X86::VBLENDPSrri: Mask = (int8_t)0x0F; break;
- case X86::VBLENDPDYrri: Mask = (int8_t)0x0F; break;
- case X86::VBLENDPSYrri: Mask = (int8_t)0xFF; break;
- case X86::VPBLENDDrri: Mask = (int8_t)0x0F; break;
- case X86::VPBLENDWrri: Mask = (int8_t)0xFF; break;
- case X86::VPBLENDDYrri: Mask = (int8_t)0xFF; break;
- case X86::VPBLENDWYrri: Mask = (int8_t)0xFF; break;
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::BLENDPDrri:
+ Mask = (int8_t)0x03;
+ break;
+ case X86::BLENDPSrri:
+ Mask = (int8_t)0x0F;
+ break;
+ case X86::PBLENDWrri:
+ Mask = (int8_t)0xFF;
+ break;
+ case X86::VBLENDPDrri:
+ Mask = (int8_t)0x03;
+ break;
+ case X86::VBLENDPSrri:
+ Mask = (int8_t)0x0F;
+ break;
+ case X86::VBLENDPDYrri:
+ Mask = (int8_t)0x0F;
+ break;
+ case X86::VBLENDPSYrri:
+ Mask = (int8_t)0xFF;
+ break;
+ case X86::VPBLENDDrri:
+ Mask = (int8_t)0x0F;
+ break;
+ case X86::VPBLENDWrri:
+ Mask = (int8_t)0xFF;
+ break;
+ case X86::VPBLENDDYrri:
+ Mask = (int8_t)0xFF;
+ break;
+ case X86::VPBLENDWYrri:
+ Mask = (int8_t)0xFF;
+ break;
}
// Only the least significant bits of Imm are used.
// Using int8_t to ensure it will be sign extended to the int64_t that
@@ -2119,16 +2438,29 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::MOVSDrr:
case X86::MOVSSrr:
case X86::VMOVSDrr:
- case X86::VMOVSSrr:{
+ case X86::VMOVSSrr: {
// On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
if (Subtarget.hasSSE41()) {
unsigned Mask, Opc;
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
- case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
- case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
- case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::MOVSDrr:
+ Opc = X86::BLENDPDrri;
+ Mask = 0x02;
+ break;
+ case X86::MOVSSrr:
+ Opc = X86::BLENDPSrri;
+ Mask = 0x0E;
+ break;
+ case X86::VMOVSDrr:
+ Opc = X86::VBLENDPDrri;
+ Mask = 0x02;
+ break;
+ case X86::VMOVSSrr:
+ Opc = X86::VBLENDPSrri;
+ Mask = 0x0E;
+ break;
}
auto &WorkingMI = cloneIfNew(MI);
@@ -2173,30 +2505,54 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
- case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
- case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
- case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
- case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
- case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
- case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
- case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
- case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
- case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
- case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
- case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
- case X86::VPCMPWZrri: case X86::VPCMPUWZrri:
- case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik:
- case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik:
- case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik:
- case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik:
- case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik:
- case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik:
- case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik:
- case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik:
- case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik:
- case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik:
- case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik:
- case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: {
+ case X86::VPCMPBZ128rri:
+ case X86::VPCMPUBZ128rri:
+ case X86::VPCMPBZ256rri:
+ case X86::VPCMPUBZ256rri:
+ case X86::VPCMPBZrri:
+ case X86::VPCMPUBZrri:
+ case X86::VPCMPDZ128rri:
+ case X86::VPCMPUDZ128rri:
+ case X86::VPCMPDZ256rri:
+ case X86::VPCMPUDZ256rri:
+ case X86::VPCMPDZrri:
+ case X86::VPCMPUDZrri:
+ case X86::VPCMPQZ128rri:
+ case X86::VPCMPUQZ128rri:
+ case X86::VPCMPQZ256rri:
+ case X86::VPCMPUQZ256rri:
+ case X86::VPCMPQZrri:
+ case X86::VPCMPUQZrri:
+ case X86::VPCMPWZ128rri:
+ case X86::VPCMPUWZ128rri:
+ case X86::VPCMPWZ256rri:
+ case X86::VPCMPUWZ256rri:
+ case X86::VPCMPWZrri:
+ case X86::VPCMPUWZrri:
+ case X86::VPCMPBZ128rrik:
+ case X86::VPCMPUBZ128rrik:
+ case X86::VPCMPBZ256rrik:
+ case X86::VPCMPUBZ256rrik:
+ case X86::VPCMPBZrrik:
+ case X86::VPCMPUBZrrik:
+ case X86::VPCMPDZ128rrik:
+ case X86::VPCMPUDZ128rrik:
+ case X86::VPCMPDZ256rrik:
+ case X86::VPCMPUDZ256rrik:
+ case X86::VPCMPDZrrik:
+ case X86::VPCMPUDZrrik:
+ case X86::VPCMPQZ128rrik:
+ case X86::VPCMPUQZ128rrik:
+ case X86::VPCMPQZ256rrik:
+ case X86::VPCMPUQZ256rrik:
+ case X86::VPCMPQZrrik:
+ case X86::VPCMPUQZrrik:
+ case X86::VPCMPWZ128rrik:
+ case X86::VPCMPUWZ128rrik:
+ case X86::VPCMPWZ256rrik:
+ case X86::VPCMPUWZ256rrik:
+ case X86::VPCMPWZrrik:
+ case X86::VPCMPUWZrrik: {
// Flip comparison mode immediate (if necessary).
unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7;
Imm = X86::getSwappedVPCMPImm(Imm);
@@ -2205,10 +2561,14 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
- case X86::VPCOMBri: case X86::VPCOMUBri:
- case X86::VPCOMDri: case X86::VPCOMUDri:
- case X86::VPCOMQri: case X86::VPCOMUQri:
- case X86::VPCOMWri: case X86::VPCOMUWri: {
+ case X86::VPCOMBri:
+ case X86::VPCOMUBri:
+ case X86::VPCOMDri:
+ case X86::VPCOMUDri:
+ case X86::VPCOMQri:
+ case X86::VPCOMUQri:
+ case X86::VPCOMWri:
+ case X86::VPCOMUWri: {
// Flip comparison mode immediate (if necessary).
unsigned Imm = MI.getOperand(3).getImm() & 0x7;
Imm = X86::getSwappedVPCOMImm(Imm);
@@ -2236,7 +2596,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VCMPPDZ256rrik:
case X86::VCMPPSZ256rrik: {
unsigned Imm =
- MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f;
+ MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f;
Imm = X86::getSwappedVCMPImm(Imm);
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(MI.getNumExplicitOperands() - 1).setImm(Imm);
@@ -2264,20 +2624,35 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned Opc = MI.getOpcode();
switch (Opc) {
- default: llvm_unreachable("Unreachable!");
- case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break;
- case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break;
- case X86::VMOVHLPSrr: Opc = X86::VUNPCKHPDrr; break;
- case X86::VUNPCKHPDrr: Opc = X86::VMOVHLPSrr; break;
- case X86::VMOVHLPSZrr: Opc = X86::VUNPCKHPDZ128rr; break;
- case X86::VUNPCKHPDZ128rr: Opc = X86::VMOVHLPSZrr; break;
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::MOVHLPSrr:
+ Opc = X86::UNPCKHPDrr;
+ break;
+ case X86::UNPCKHPDrr:
+ Opc = X86::MOVHLPSrr;
+ break;
+ case X86::VMOVHLPSrr:
+ Opc = X86::VUNPCKHPDrr;
+ break;
+ case X86::VUNPCKHPDrr:
+ Opc = X86::VMOVHLPSrr;
+ break;
+ case X86::VMOVHLPSZrr:
+ Opc = X86::VUNPCKHPDZ128rr;
+ break;
+ case X86::VUNPCKHPDZ128rr:
+ Opc = X86::VMOVHLPSZrr;
+ break;
}
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
- case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: {
+ case X86::CMOV16rr:
+ case X86::CMOV32rr:
+ case X86::CMOV64rr: {
auto &WorkingMI = cloneIfNew(MI);
unsigned OpNo = MI.getDesc().getNumOperands() - 1;
X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm());
@@ -2285,24 +2660,36 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
- case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
- case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
- case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
- case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
- case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
- case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
+ case X86::VPTERNLOGDZrri:
+ case X86::VPTERNLOGDZrmi:
+ case X86::VPTERNLOGDZ128rri:
+ case X86::VPTERNLOGDZ128rmi:
+ case X86::VPTERNLOGDZ256rri:
+ case X86::VPTERNLOGDZ256rmi:
+ case X86::VPTERNLOGQZrri:
+ case X86::VPTERNLOGQZrmi:
+ case X86::VPTERNLOGQZ128rri:
+ case X86::VPTERNLOGQZ128rmi:
+ case X86::VPTERNLOGQZ256rri:
+ case X86::VPTERNLOGQZ256rmi:
case X86::VPTERNLOGDZrrik:
case X86::VPTERNLOGDZ128rrik:
case X86::VPTERNLOGDZ256rrik:
case X86::VPTERNLOGQZrrik:
case X86::VPTERNLOGQZ128rrik:
case X86::VPTERNLOGQZ256rrik:
- case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
- case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
- case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
- case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
- case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
- case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
+ case X86::VPTERNLOGDZrrikz:
+ case X86::VPTERNLOGDZrmikz:
+ case X86::VPTERNLOGDZ128rrikz:
+ case X86::VPTERNLOGDZ128rmikz:
+ case X86::VPTERNLOGDZ256rrikz:
+ case X86::VPTERNLOGDZ256rmikz:
+ case X86::VPTERNLOGQZrrikz:
+ case X86::VPTERNLOGQZrmikz:
+ case X86::VPTERNLOGQZ128rrikz:
+ case X86::VPTERNLOGQZ128rmikz:
+ case X86::VPTERNLOGQZ256rrikz:
+ case X86::VPTERNLOGQZ256rmikz:
case X86::VPTERNLOGDZ128rmbi:
case X86::VPTERNLOGDZ256rmbi:
case X86::VPTERNLOGDZrmbi:
@@ -2329,11 +2716,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
OpIdx1, OpIdx2);
}
- const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
- MI.getDesc().TSFlags);
+ const X86InstrFMA3Group *FMA3Group =
+ getFMA3Group(MI.getOpcode(), MI.getDesc().TSFlags);
if (FMA3Group) {
unsigned Opc =
- getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
+ getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
@@ -2345,11 +2732,10 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
}
}
-bool
-X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
- unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2,
- bool IsIntrinsic) const {
+bool X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2,
+ bool IsIntrinsic) const {
uint64_t TSFlags = MI.getDesc().TSFlags;
unsigned FirstCommutableVecOp = 1;
@@ -2441,8 +2827,8 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
// Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
// to return those values.
- if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
- CommutableOpIdx1, CommutableOpIdx2))
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
+ CommutableOpIdx2))
return false;
}
@@ -2530,24 +2916,36 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
if (Subtarget.hasSSE2())
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return false;
- case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
- case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
- case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
- case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
- case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
- case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
+ case X86::VPTERNLOGDZrri:
+ case X86::VPTERNLOGDZrmi:
+ case X86::VPTERNLOGDZ128rri:
+ case X86::VPTERNLOGDZ128rmi:
+ case X86::VPTERNLOGDZ256rri:
+ case X86::VPTERNLOGDZ256rmi:
+ case X86::VPTERNLOGQZrri:
+ case X86::VPTERNLOGQZrmi:
+ case X86::VPTERNLOGQZ128rri:
+ case X86::VPTERNLOGQZ128rmi:
+ case X86::VPTERNLOGQZ256rri:
+ case X86::VPTERNLOGQZ256rmi:
case X86::VPTERNLOGDZrrik:
case X86::VPTERNLOGDZ128rrik:
case X86::VPTERNLOGDZ256rrik:
case X86::VPTERNLOGQZrrik:
case X86::VPTERNLOGQZ128rrik:
case X86::VPTERNLOGQZ256rrik:
- case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
- case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
- case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
- case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
- case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
- case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
+ case X86::VPTERNLOGDZrrikz:
+ case X86::VPTERNLOGDZrmikz:
+ case X86::VPTERNLOGDZ128rrikz:
+ case X86::VPTERNLOGDZ128rmikz:
+ case X86::VPTERNLOGDZ256rrikz:
+ case X86::VPTERNLOGDZ256rmikz:
+ case X86::VPTERNLOGQZrrikz:
+ case X86::VPTERNLOGQZrmikz:
+ case X86::VPTERNLOGQZ128rrikz:
+ case X86::VPTERNLOGQZ128rmikz:
+ case X86::VPTERNLOGQZ256rrikz:
+ case X86::VPTERNLOGQZ256rmikz:
case X86::VPTERNLOGDZ128rmbi:
case X86::VPTERNLOGDZ256rmbi:
case X86::VPTERNLOGDZrmbi:
@@ -2636,19 +3034,18 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
++CommutableOpIdx1;
++CommutableOpIdx2;
}
- if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
- CommutableOpIdx1, CommutableOpIdx2))
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
+ CommutableOpIdx2))
return false;
- if (!MI.getOperand(SrcOpIdx1).isReg() ||
- !MI.getOperand(SrcOpIdx2).isReg())
+ if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
// No idea.
return false;
return true;
}
default:
- const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
- MI.getDesc().TSFlags);
+ const X86InstrFMA3Group *FMA3Group =
+ getFMA3Group(MI.getOpcode(), MI.getDesc().TSFlags);
if (FMA3Group)
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
FMA3Group->isIntrinsic());
@@ -2676,8 +3073,8 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
}
}
- if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
- CommutableOpIdx1, CommutableOpIdx2))
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
+ CommutableOpIdx2))
return false;
if (!MI.getOperand(SrcOpIdx1).isReg() ||
@@ -2781,25 +3178,44 @@ X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) {
/// e.g. turning COND_E to COND_NE.
X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
switch (CC) {
- default: llvm_unreachable("Illegal condition code!");
- case X86::COND_E: return X86::COND_NE;
- case X86::COND_NE: return X86::COND_E;
- case X86::COND_L: return X86::COND_GE;
- case X86::COND_LE: return X86::COND_G;
- case X86::COND_G: return X86::COND_LE;
- case X86::COND_GE: return X86::COND_L;
- case X86::COND_B: return X86::COND_AE;
- case X86::COND_BE: return X86::COND_A;
- case X86::COND_A: return X86::COND_BE;
- case X86::COND_AE: return X86::COND_B;
- case X86::COND_S: return X86::COND_NS;
- case X86::COND_NS: return X86::COND_S;
- case X86::COND_P: return X86::COND_NP;
- case X86::COND_NP: return X86::COND_P;
- case X86::COND_O: return X86::COND_NO;
- case X86::COND_NO: return X86::COND_O;
- case X86::COND_NE_OR_P: return X86::COND_E_AND_NP;
- case X86::COND_E_AND_NP: return X86::COND_NE_OR_P;
+ default:
+ llvm_unreachable("Illegal condition code!");
+ case X86::COND_E:
+ return X86::COND_NE;
+ case X86::COND_NE:
+ return X86::COND_E;
+ case X86::COND_L:
+ return X86::COND_GE;
+ case X86::COND_LE:
+ return X86::COND_G;
+ case X86::COND_G:
+ return X86::COND_LE;
+ case X86::COND_GE:
+ return X86::COND_L;
+ case X86::COND_B:
+ return X86::COND_AE;
+ case X86::COND_BE:
+ return X86::COND_A;
+ case X86::COND_A:
+ return X86::COND_BE;
+ case X86::COND_AE:
+ return X86::COND_B;
+ case X86::COND_S:
+ return X86::COND_NS;
+ case X86::COND_NS:
+ return X86::COND_S;
+ case X86::COND_P:
+ return X86::COND_NP;
+ case X86::COND_NP:
+ return X86::COND_P;
+ case X86::COND_O:
+ return X86::COND_NO;
+ case X86::COND_NO:
+ return X86::COND_O;
+ case X86::COND_NE_OR_P:
+ return X86::COND_E_AND_NP;
+ case X86::COND_E_AND_NP:
+ return X86::COND_NE_OR_P;
}
}
@@ -2807,17 +3223,28 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
/// modify the instructions such that flags are set by MI(b,a).
static X86::CondCode getSwappedCondition(X86::CondCode CC) {
switch (CC) {
- default: return X86::COND_INVALID;
- case X86::COND_E: return X86::COND_E;
- case X86::COND_NE: return X86::COND_NE;
- case X86::COND_L: return X86::COND_G;
- case X86::COND_LE: return X86::COND_GE;
- case X86::COND_G: return X86::COND_L;
- case X86::COND_GE: return X86::COND_LE;
- case X86::COND_B: return X86::COND_A;
- case X86::COND_BE: return X86::COND_AE;
- case X86::COND_A: return X86::COND_B;
- case X86::COND_AE: return X86::COND_BE;
+ default:
+ return X86::COND_INVALID;
+ case X86::COND_E:
+ return X86::COND_E;
+ case X86::COND_NE:
+ return X86::COND_NE;
+ case X86::COND_L:
+ return X86::COND_G;
+ case X86::COND_LE:
+ return X86::COND_GE;
+ case X86::COND_G:
+ return X86::COND_L;
+ case X86::COND_GE:
+ return X86::COND_LE;
+ case X86::COND_B:
+ return X86::COND_A;
+ case X86::COND_BE:
+ return X86::COND_AE;
+ case X86::COND_A:
+ return X86::COND_B;
+ case X86::COND_AE:
+ return X86::COND_BE;
}
}
@@ -2826,34 +3253,82 @@ X86::getX86ConditionCode(CmpInst::Predicate Predicate) {
X86::CondCode CC = X86::COND_INVALID;
bool NeedSwap = false;
switch (Predicate) {
- default: break;
+ default:
+ break;
// Floating-point Predicates
- case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
- case CmpInst::FCMP_OLT: NeedSwap = true; [[fallthrough]];
- case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
- case CmpInst::FCMP_OLE: NeedSwap = true; [[fallthrough]];
- case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
- case CmpInst::FCMP_UGT: NeedSwap = true; [[fallthrough]];
- case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
- case CmpInst::FCMP_UGE: NeedSwap = true; [[fallthrough]];
- case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
- case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
- case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
- case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
- case CmpInst::FCMP_OEQ: [[fallthrough]];
- case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
+ case CmpInst::FCMP_UEQ:
+ CC = X86::COND_E;
+ break;
+ case CmpInst::FCMP_OLT:
+ NeedSwap = true;
+ [[fallthrough]];
+ case CmpInst::FCMP_OGT:
+ CC = X86::COND_A;
+ break;
+ case CmpInst::FCMP_OLE:
+ NeedSwap = true;
+ [[fallthrough]];
+ case CmpInst::FCMP_OGE:
+ CC = X86::COND_AE;
+ break;
+ case CmpInst::FCMP_UGT:
+ NeedSwap = true;
+ [[fallthrough]];
+ case CmpInst::FCMP_ULT:
+ CC = X86::COND_B;
+ break;
+ case CmpInst::FCMP_UGE:
+ NeedSwap = true;
+ [[fallthrough]];
+ case CmpInst::FCMP_ULE:
+ CC = X86::COND_BE;
+ break;
+ case CmpInst::FCMP_ONE:
+ CC = X86::COND_NE;
+ break;
+ case CmpInst::FCMP_UNO:
+ CC = X86::COND_P;
+ break;
+ case CmpInst::FCMP_ORD:
+ CC = X86::COND_NP;
+ break;
+ case CmpInst::FCMP_OEQ:
+ [[fallthrough]];
+ case CmpInst::FCMP_UNE:
+ CC = X86::COND_INVALID;
+ break;
// Integer Predicates
- case CmpInst::ICMP_EQ: CC = X86::COND_E; break;
- case CmpInst::ICMP_NE: CC = X86::COND_NE; break;
- case CmpInst::ICMP_UGT: CC = X86::COND_A; break;
- case CmpInst::ICMP_UGE: CC = X86::COND_AE; break;
- case CmpInst::ICMP_ULT: CC = X86::COND_B; break;
- case CmpInst::ICMP_ULE: CC = X86::COND_BE; break;
- case CmpInst::ICMP_SGT: CC = X86::COND_G; break;
- case CmpInst::ICMP_SGE: CC = X86::COND_GE; break;
- case CmpInst::ICMP_SLT: CC = X86::COND_L; break;
- case CmpInst::ICMP_SLE: CC = X86::COND_LE; break;
+ case CmpInst::ICMP_EQ:
+ CC = X86::COND_E;
+ break;
+ case CmpInst::ICMP_NE:
+ CC = X86::COND_NE;
+ break;
+ case CmpInst::ICMP_UGT:
+ CC = X86::COND_A;
+ break;
+ case CmpInst::ICMP_UGE:
+ CC = X86::COND_AE;
+ break;
+ case CmpInst::ICMP_ULT:
+ CC = X86::COND_B;
+ break;
+ case CmpInst::ICMP_ULE:
+ CC = X86::COND_BE;
+ break;
+ case CmpInst::ICMP_SGT:
+ CC = X86::COND_G;
+ break;
+ case CmpInst::ICMP_SGE:
+ CC = X86::COND_GE;
+ break;
+ case CmpInst::ICMP_SLT:
+ CC = X86::COND_L;
+ break;
+ case CmpInst::ICMP_SLE:
+ CC = X86::COND_LE;
+ break;
}
return std::make_pair(CC, NeedSwap);
@@ -2861,39 +3336,59 @@ X86::getX86ConditionCode(CmpInst::Predicate Predicate) {
/// Return a cmov opcode for the given register size in bytes, and operand type.
unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) {
- switch(RegBytes) {
- default: llvm_unreachable("Illegal register size!");
- case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr;
- case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr;
- case 8: return HasMemoryOperand ? X86::CMOV64rm : X86::CMOV64rr;
+ switch (RegBytes) {
+ default:
+ llvm_unreachable("Illegal register size!");
+ case 2:
+ return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr;
+ case 4:
+ return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr;
+ case 8:
+ return HasMemoryOperand ? X86::CMOV64rm : X86::CMOV64rr;
}
}
/// Get the VPCMP immediate for the given condition.
unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) {
switch (CC) {
- default: llvm_unreachable("Unexpected SETCC condition");
- case ISD::SETNE: return 4;
- case ISD::SETEQ: return 0;
+ default:
+ llvm_unreachable("Unexpected SETCC condition");
+ case ISD::SETNE:
+ return 4;
+ case ISD::SETEQ:
+ return 0;
case ISD::SETULT:
- case ISD::SETLT: return 1;
+ case ISD::SETLT:
+ return 1;
case ISD::SETUGT:
- case ISD::SETGT: return 6;
+ case ISD::SETGT:
+ return 6;
case ISD::SETUGE:
- case ISD::SETGE: return 5;
+ case ISD::SETGE:
+ return 5;
case ISD::SETULE:
- case ISD::SETLE: return 2;
+ case ISD::SETLE:
+ return 2;
}
}
/// Get the VPCMP immediate if the operands are swapped.
unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
switch (Imm) {
- default: llvm_unreachable("Unreachable!");
- case 0x01: Imm = 0x06; break; // LT -> NLE
- case 0x02: Imm = 0x05; break; // LE -> NLT
- case 0x05: Imm = 0x02; break; // NLT -> LE
- case 0x06: Imm = 0x01; break; // NLE -> LT
+ default:
+ llvm_unreachable("Unreachable!");
+ case 0x01:
+ Imm = 0x06;
+ break; // LT -> NLE
+ case 0x02:
+ Imm = 0x05;
+ break; // LE -> NLT
+ case 0x05:
+ Imm = 0x02;
+ break; // NLT -> LE
+ case 0x06:
+ Imm = 0x01;
+ break; // NLE -> LT
case 0x00: // EQ
case 0x03: // FALSE
case 0x04: // NE
@@ -2907,11 +3402,20 @@ unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
/// Get the VPCOM immediate if the operands are swapped.
unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
switch (Imm) {
- default: llvm_unreachable("Unreachable!");
- case 0x00: Imm = 0x02; break; // LT -> GT
- case 0x01: Imm = 0x03; break; // LE -> GE
- case 0x02: Imm = 0x00; break; // GT -> LT
- case 0x03: Imm = 0x01; break; // GE -> LE
+ default:
+ llvm_unreachable("Unreachable!");
+ case 0x00:
+ Imm = 0x02;
+ break; // LT -> GT
+ case 0x01:
+ Imm = 0x03;
+ break; // LE -> GE
+ case 0x02:
+ Imm = 0x00;
+ break; // GT -> LT
+ case 0x03:
+ Imm = 0x01;
+ break; // GE -> LE
case 0x04: // EQ
case 0x05: // NE
case 0x06: // FALSE
@@ -2926,11 +3430,14 @@ unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
unsigned X86::getSwappedVCMPImm(unsigned Imm) {
// Only need the lower 2 bits to distinquish.
switch (Imm & 0x3) {
- default: llvm_unreachable("Unreachable!");
- case 0x00: case 0x03:
+ default:
+ llvm_unreachable("Unreachable!");
+ case 0x00:
+ case 0x03:
// EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted.
break;
- case 0x01: case 0x02:
+ case 0x01:
+ case 0x02:
// Need to toggle bits 3:0. Bit 4 stays the same.
Imm ^= 0xf;
break;
@@ -3040,9 +3547,9 @@ void X86InstrInfo::replaceBranchWithTailCall(
auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
MIB->addOperand(TailCall.getOperand(0)); // Destination.
- MIB.addImm(0); // Stack offset (not used).
- MIB->addOperand(BranchCond[0]); // Condition.
- MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
+ MIB.addImm(0); // Stack offset (not used).
+ MIB->addOperand(BranchCond[0]); // Condition.
+ MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
// Add implicit uses and defs of all live regs potentially clobbered by the
// call. This way they still appear live across the call.
@@ -3135,7 +3642,7 @@ bool X86InstrInfo::AnalyzeBranchImpl(
// Handle conditional branches.
X86::CondCode BranchCode = X86::getCondFromBranch(*I);
if (BranchCode == X86::COND_INVALID)
- return true; // Can't handle indirect branch.
+ return true; // Can't handle indirect branch.
// In practice we should never have an undef eflags operand, if we do
// abort here as we are not prepared to preserve the flag.
@@ -3167,8 +3674,8 @@ bool X86InstrInfo::AnalyzeBranchImpl(
// we could handle more patterns here, but we shouldn't expect to see them
// if instruction selection has done a reasonable job.
if (TBB == NewTBB &&
- ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
- (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
+ ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
+ (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
BranchCode = X86::COND_NE_OR_P;
} else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
(OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
@@ -3370,8 +3877,7 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL,
- int *BytesAdded) const {
+ const DebugLoc &DL, int *BytesAdded) const {
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -3442,7 +3948,7 @@ bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
- RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
if (!RC)
return false;
@@ -3489,34 +3995,38 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
const X86Subtarget &Subtarget) {
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
+ bool HasEGPR = Subtarget.hasEGPR();
// SrcReg(MaskReg) -> DestReg(GR64)
// SrcReg(MaskReg) -> DestReg(GR32)
- // All KMASK RegClasses hold the same k registers, can be tested against anyone.
+ // All KMASK RegClasses hold the same k registers, can be tested against
+ // anyone.
if (X86::VK16RegClass.contains(SrcReg)) {
if (X86::GR64RegClass.contains(DestReg)) {
assert(Subtarget.hasBWI());
- return X86::KMOVQrk;
+ return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
}
if (X86::GR32RegClass.contains(DestReg))
- return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
+ return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
+ : (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
}
// SrcReg(GR64) -> DestReg(MaskReg)
// SrcReg(GR32) -> DestReg(MaskReg)
- // All KMASK RegClasses hold the same k registers, can be tested against anyone.
+ // All KMASK RegClasses hold the same k registers, can be tested against
+ // anyone.
if (X86::VK16RegClass.contains(DestReg)) {
if (X86::GR64RegClass.contains(SrcReg)) {
assert(Subtarget.hasBWI());
- return X86::KMOVQkr;
+ return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
}
if (X86::GR32RegClass.contains(SrcReg))
- return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
+ return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
+ : (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
}
-
// SrcReg(VR128) -> DestReg(GR64)
// SrcReg(VR64) -> DestReg(GR64)
// SrcReg(GR64) -> DestReg(VR128)
@@ -3525,18 +4035,18 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
if (X86::GR64RegClass.contains(DestReg)) {
if (X86::VR128XRegClass.contains(SrcReg))
// Copy from a VR128 register to a GR64 register.
- return HasAVX512 ? X86::VMOVPQIto64Zrr :
- HasAVX ? X86::VMOVPQIto64rr :
- X86::MOVPQIto64rr;
+ return HasAVX512 ? X86::VMOVPQIto64Zrr
+ : HasAVX ? X86::VMOVPQIto64rr
+ : X86::MOVPQIto64rr;
if (X86::VR64RegClass.contains(SrcReg))
// Copy from a VR64 register to a GR64 register.
return X86::MMX_MOVD64from64rr;
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
if (X86::VR128XRegClass.contains(DestReg))
- return HasAVX512 ? X86::VMOV64toPQIZrr :
- HasAVX ? X86::VMOV64toPQIrr :
- X86::MOV64toPQIrr;
+ return HasAVX512 ? X86::VMOV64toPQIZrr
+ : HasAVX ? X86::VMOV64toPQIrr
+ : X86::MOV64toPQIrr;
// Copy from a GR64 register to a VR64 register.
if (X86::VR64RegClass.contains(DestReg))
return X86::MMX_MOVD64to64rr;
@@ -3548,16 +4058,16 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
if (X86::GR32RegClass.contains(DestReg) &&
X86::VR128XRegClass.contains(SrcReg))
// Copy from a VR128 register to a GR32 register.
- return HasAVX512 ? X86::VMOVPDI2DIZrr :
- HasAVX ? X86::VMOVPDI2DIrr :
- X86::MOVPDI2DIrr;
+ return HasAVX512 ? X86::VMOVPDI2DIZrr
+ : HasAVX ? X86::VMOVPDI2DIrr
+ : X86::MOVPDI2DIrr;
if (X86::VR128XRegClass.contains(DestReg) &&
X86::GR32RegClass.contains(SrcReg))
// Copy from a VR128 register to a VR128 register.
- return HasAVX512 ? X86::VMOVDI2PDIZrr :
- HasAVX ? X86::VMOVDI2PDIrr :
- X86::MOVDI2PDIrr;
+ return HasAVX512 ? X86::VMOVDI2PDIZrr
+ : HasAVX ? X86::VMOVDI2PDIrr
+ : X86::MOVDI2PDIrr;
return 0;
}
@@ -3568,6 +4078,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// First deal with the normal symmetric copies.
bool HasAVX = Subtarget.hasAVX();
bool HasVLX = Subtarget.hasVLX();
+ bool HasEGPR = Subtarget.hasEGPR();
unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
@@ -3578,16 +4089,14 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
- if ((isHReg(DestReg) || isHReg(SrcReg)) &&
- Subtarget.is64Bit()) {
+ if ((isHReg(DestReg) || isHReg(SrcReg)) && Subtarget.is64Bit()) {
Opc = X86::MOV8rr_NOREX;
// Both operands must be encodable without an REX prefix.
assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
"8-bit H register can not be copied outside GR8_NOREX");
} else
Opc = X86::MOV8rr;
- }
- else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+ } else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
if (HasVLX)
@@ -3599,10 +4108,10 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// 512-bit move.
Opc = X86::VMOVAPSZrr;
const TargetRegisterInfo *TRI = &getRegisterInfo();
- DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_xmm,
- &X86::VR512RegClass);
- SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm,
- &X86::VR512RegClass);
+ DestReg =
+ TRI->getMatchingSuperReg(DestReg, X86::sub_xmm, &X86::VR512RegClass);
+ SrcReg =
+ TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
}
} else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
if (HasVLX)
@@ -3614,22 +4123,24 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// 512-bit move.
Opc = X86::VMOVAPSZrr;
const TargetRegisterInfo *TRI = &getRegisterInfo();
- DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_ymm,
- &X86::VR512RegClass);
- SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm,
- &X86::VR512RegClass);
+ DestReg =
+ TRI->getMatchingSuperReg(DestReg, X86::sub_ymm, &X86::VR512RegClass);
+ SrcReg =
+ TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
}
} else if (X86::VR512RegClass.contains(DestReg, SrcReg))
Opc = X86::VMOVAPSZrr;
- // All KMASK RegClasses hold the same k registers, can be tested against anyone.
+ // All KMASK RegClasses hold the same k registers, can be tested against
+ // anyone.
else if (X86::VK16RegClass.contains(DestReg, SrcReg))
- Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
+ Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
+ : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -3648,8 +4159,15 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
std::optional<DestSourcePair>
X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
- if (MI.isMoveReg())
+ if (MI.isMoveReg()) {
+ // FIXME: Dirty hack for apparent invariant that doesn't hold when
+ // subreg_to_reg is coalesced with ordinary copies, such that the bits that
+ // were asserted as 0 are now undef.
+ if (MI.getOperand(0).isUndef() && MI.getOperand(0).getSubReg())
+ return std::nullopt;
+
return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+ }
return std::nullopt;
}
@@ -3673,6 +4191,7 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
bool HasAVX = STI.hasAVX();
bool HasAVX512 = STI.hasAVX512();
bool HasVLX = STI.hasVLX();
+ bool HasEGPR = STI.hasEGPR();
assert(RC != nullptr && "Invalid target register class");
switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
@@ -3688,25 +4207,26 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::MOV8rm : X86::MOV8mr;
case 2:
if (X86::VK16RegClass.hasSubClassEq(RC))
- return Load ? X86::KMOVWkm : X86::KMOVWmk;
+ return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
+ : (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
return Load ? X86::MOV16rm : X86::MOV16mr;
case 4:
if (X86::GR32RegClass.hasSubClassEq(RC))
return Load ? X86::MOV32rm : X86::MOV32mr;
if (X86::FR32XRegClass.hasSubClassEq(RC))
- return Load ?
- (HasAVX512 ? X86::VMOVSSZrm_alt :
- HasAVX ? X86::VMOVSSrm_alt :
- X86::MOVSSrm_alt) :
- (HasAVX512 ? X86::VMOVSSZmr :
- HasAVX ? X86::VMOVSSmr :
- X86::MOVSSmr);
+ return Load ? (HasAVX512 ? X86::VMOVSSZrm_alt
+ : HasAVX ? X86::VMOVSSrm_alt
+ : X86::MOVSSrm_alt)
+ : (HasAVX512 ? X86::VMOVSSZmr
+ : HasAVX ? X86::VMOVSSmr
+ : X86::MOVSSmr);
if (X86::RFP32RegClass.hasSubClassEq(RC))
return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
if (X86::VK32RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && "KMOVD requires BWI");
- return Load ? X86::KMOVDkm : X86::KMOVDmk;
+ return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
+ : (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
}
// All of these mask pair classes have the same spill size, the same kind
// of kmov instructions can be used with all of them.
@@ -3724,20 +4244,20 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
if (X86::GR64RegClass.hasSubClassEq(RC))
return Load ? X86::MOV64rm : X86::MOV64mr;
if (X86::FR64XRegClass.hasSubClassEq(RC))
- return Load ?
- (HasAVX512 ? X86::VMOVSDZrm_alt :
- HasAVX ? X86::VMOVSDrm_alt :
- X86::MOVSDrm_alt) :
- (HasAVX512 ? X86::VMOVSDZmr :
- HasAVX ? X86::VMOVSDmr :
- X86::MOVSDmr);
+ return Load ? (HasAVX512 ? X86::VMOVSDZrm_alt
+ : HasAVX ? X86::VMOVSDrm_alt
+ : X86::MOVSDrm_alt)
+ : (HasAVX512 ? X86::VMOVSDZmr
+ : HasAVX ? X86::VMOVSDmr
+ : X86::MOVSDmr);
if (X86::VR64RegClass.hasSubClassEq(RC))
return Load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
if (X86::RFP64RegClass.hasSubClassEq(RC))
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
if (X86::VK64RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && "KMOVQ requires BWI");
- return Load ? X86::KMOVQkm : X86::KMOVQmk;
+ return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
+ : (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
}
llvm_unreachable("Unknown 8-byte regclass");
case 10:
@@ -3747,25 +4267,23 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
if (X86::VR128XRegClass.hasSubClassEq(RC)) {
// If stack is realigned we can use aligned stores.
if (IsStackAligned)
- return Load ?
- (HasVLX ? X86::VMOVAPSZ128rm :
- HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX :
- HasAVX ? X86::VMOVAPSrm :
- X86::MOVAPSrm):
- (HasVLX ? X86::VMOVAPSZ128mr :
- HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX :
- HasAVX ? X86::VMOVAPSmr :
- X86::MOVAPSmr);
+ return Load ? (HasVLX ? X86::VMOVAPSZ128rm
+ : HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX
+ : HasAVX ? X86::VMOVAPSrm
+ : X86::MOVAPSrm)
+ : (HasVLX ? X86::VMOVAPSZ128mr
+ : HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX
+ : HasAVX ? X86::VMOVAPSmr
+ : X86::MOVAPSmr);
else
- return Load ?
- (HasVLX ? X86::VMOVUPSZ128rm :
- HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX :
- HasAVX ? X86::VMOVUPSrm :
- X86::MOVUPSrm):
- (HasVLX ? X86::VMOVUPSZ128mr :
- HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX :
- HasAVX ? X86::VMOVUPSmr :
- X86::MOVUPSmr);
+ return Load ? (HasVLX ? X86::VMOVUPSZ128rm
+ : HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX
+ : HasAVX ? X86::VMOVUPSrm
+ : X86::MOVUPSrm)
+ : (HasVLX ? X86::VMOVUPSZ128mr
+ : HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX
+ : HasAVX ? X86::VMOVUPSmr
+ : X86::MOVUPSmr);
}
llvm_unreachable("Unknown 16-byte regclass");
}
@@ -3773,21 +4291,19 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
// If stack is realigned we can use aligned stores.
if (IsStackAligned)
- return Load ?
- (HasVLX ? X86::VMOVAPSZ256rm :
- HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX :
- X86::VMOVAPSYrm) :
- (HasVLX ? X86::VMOVAPSZ256mr :
- HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX :
- X86::VMOVAPSYmr);
+ return Load ? (HasVLX ? X86::VMOVAPSZ256rm
+ : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
+ : X86::VMOVAPSYrm)
+ : (HasVLX ? X86::VMOVAPSZ256mr
+ : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
+ : X86::VMOVAPSYmr);
else
- return Load ?
- (HasVLX ? X86::VMOVUPSZ256rm :
- HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX :
- X86::VMOVUPSYrm) :
- (HasVLX ? X86::VMOVUPSZ256mr :
- HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX :
- X86::VMOVUPSYmr);
+ return Load ? (HasVLX ? X86::VMOVUPSZ256rm
+ : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
+ : X86::VMOVUPSYrm)
+ : (HasVLX ? X86::VMOVUPSZ256mr
+ : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
+ : X86::VMOVUPSYmr);
case 64:
assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
assert(STI.hasAVX512() && "Using 512-bit register requires AVX512");
@@ -3836,7 +4352,7 @@ bool X86InstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
ExtAddrMode AM = *AMOrNone;
-
+ assert(AM.Form == ExtAddrMode::Formula::Basic);
if (AM.ScaledReg != X86::NoRegister) {
switch (AM.Scale) {
case 1:
@@ -3861,12 +4377,42 @@ bool X86InstrInfo::verifyInstruction(const MachineInstr &MI,
bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
const Register Reg,
int64_t &ImmVal) const {
- if (MI.getOpcode() != X86::MOV32ri && MI.getOpcode() != X86::MOV64ri)
+ Register MovReg = Reg;
+ const MachineInstr *MovMI = &MI;
+
+ // Follow use-def for SUBREG_TO_REG to find the real move immediate
+ // instruction. It is quite common for x86-64.
+ if (MI.isSubregToReg()) {
+ // We use following pattern to setup 64b immediate.
+ // %8:gr32 = MOV32r0 implicit-def dead $eflags
+ // %6:gr64 = SUBREG_TO_REG 0, killed %8:gr32, %subreg.sub_32bit
+ if (!MI.getOperand(1).isImm())
+ return false;
+ unsigned FillBits = MI.getOperand(1).getImm();
+ unsigned SubIdx = MI.getOperand(3).getImm();
+ MovReg = MI.getOperand(2).getReg();
+ if (SubIdx != X86::sub_32bit || FillBits != 0)
+ return false;
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ MovMI = MRI.getUniqueVRegDef(MovReg);
+ if (!MovMI)
+ return false;
+ }
+
+ if (MovMI->getOpcode() == X86::MOV32r0 &&
+ MovMI->getOperand(0).getReg() == MovReg) {
+ ImmVal = 0;
+ return true;
+ }
+
+ if (MovMI->getOpcode() != X86::MOV32ri &&
+ MovMI->getOpcode() != X86::MOV64ri &&
+ MovMI->getOpcode() != X86::MOV32ri64 && MovMI->getOpcode() != X86::MOV8ri)
return false;
// Mov Src can be a global address.
- if (!MI.getOperand(1).isImm() || MI.getOperand(0).getReg() != Reg)
+ if (!MovMI->getOperand(1).isImm() || MovMI->getOperand(0).getReg() != MovReg)
return false;
- ImmVal = MI.getOperand(1).getImm();
+ ImmVal = MovMI->getOperand(1).getImm();
return true;
}
@@ -4049,7 +4595,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
Register &SrcReg2, int64_t &CmpMask,
int64_t &CmpValue) const {
switch (MI.getOpcode()) {
- default: break;
+ default:
+ break;
case X86::CMP64ri32:
case X86::CMP32ri:
case X86::CMP16ri:
@@ -4212,104 +4759,225 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
}
switch (MI.getOpcode()) {
- default: return false;
+ default:
+ return false;
// The shift instructions only modify ZF if their shift count is non-zero.
// N.B.: The processor truncates the shift count depending on the encoding.
- case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri:
- case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri:
- return getTruncatedShiftCount(MI, 2) != 0;
+ case X86::SAR8ri:
+ case X86::SAR16ri:
+ case X86::SAR32ri:
+ case X86::SAR64ri:
+ case X86::SHR8ri:
+ case X86::SHR16ri:
+ case X86::SHR32ri:
+ case X86::SHR64ri:
+ return getTruncatedShiftCount(MI, 2) != 0;
// Some left shift instructions can be turned into LEA instructions but only
// if their flags aren't used. Avoid transforming such instructions.
- case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{
+ case X86::SHL8ri:
+ case X86::SHL16ri:
+ case X86::SHL32ri:
+ case X86::SHL64ri: {
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (isTruncatedShiftCountForLEA(ShAmt)) return false;
+ if (isTruncatedShiftCountForLEA(ShAmt))
+ return false;
return ShAmt != 0;
}
- case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8:
- case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8:
- return getTruncatedShiftCount(MI, 3) != 0;
-
- case X86::SUB64ri32: case X86::SUB32ri: case X86::SUB16ri:
- case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
- case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
- case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
- case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
- case X86::ADD64ri32: case X86::ADD32ri: case X86::ADD16ri:
- case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
- case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
- case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
- case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
- case X86::ADC64ri32: case X86::ADC32ri: case X86::ADC16ri:
- case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr:
- case X86::ADC16rr: case X86::ADC8rr: case X86::ADC64rm:
- case X86::ADC32rm: case X86::ADC16rm: case X86::ADC8rm:
- case X86::SBB64ri32: case X86::SBB32ri: case X86::SBB16ri:
- case X86::SBB8ri: case X86::SBB64rr: case X86::SBB32rr:
- case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm:
- case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm:
- case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
- case X86::LZCNT16rr: case X86::LZCNT16rm:
- case X86::LZCNT32rr: case X86::LZCNT32rm:
- case X86::LZCNT64rr: case X86::LZCNT64rm:
- case X86::POPCNT16rr:case X86::POPCNT16rm:
- case X86::POPCNT32rr:case X86::POPCNT32rm:
- case X86::POPCNT64rr:case X86::POPCNT64rm:
- case X86::TZCNT16rr: case X86::TZCNT16rm:
- case X86::TZCNT32rr: case X86::TZCNT32rm:
- case X86::TZCNT64rr: case X86::TZCNT64rm:
+ case X86::SHRD16rri8:
+ case X86::SHRD32rri8:
+ case X86::SHRD64rri8:
+ case X86::SHLD16rri8:
+ case X86::SHLD32rri8:
+ case X86::SHLD64rri8:
+ return getTruncatedShiftCount(MI, 3) != 0;
+
+ case X86::SUB64ri32:
+ case X86::SUB32ri:
+ case X86::SUB16ri:
+ case X86::SUB8ri:
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr:
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ case X86::DEC64r:
+ case X86::DEC32r:
+ case X86::DEC16r:
+ case X86::DEC8r:
+ case X86::ADD64ri32:
+ case X86::ADD32ri:
+ case X86::ADD16ri:
+ case X86::ADD8ri:
+ case X86::ADD64rr:
+ case X86::ADD32rr:
+ case X86::ADD16rr:
+ case X86::ADD8rr:
+ case X86::ADD64rm:
+ case X86::ADD32rm:
+ case X86::ADD16rm:
+ case X86::ADD8rm:
+ case X86::INC64r:
+ case X86::INC32r:
+ case X86::INC16r:
+ case X86::INC8r:
+ case X86::ADC64ri32:
+ case X86::ADC32ri:
+ case X86::ADC16ri:
+ case X86::ADC8ri:
+ case X86::ADC64rr:
+ case X86::ADC32rr:
+ case X86::ADC16rr:
+ case X86::ADC8rr:
+ case X86::ADC64rm:
+ case X86::ADC32rm:
+ case X86::ADC16rm:
+ case X86::ADC8rm:
+ case X86::SBB64ri32:
+ case X86::SBB32ri:
+ case X86::SBB16ri:
+ case X86::SBB8ri:
+ case X86::SBB64rr:
+ case X86::SBB32rr:
+ case X86::SBB16rr:
+ case X86::SBB8rr:
+ case X86::SBB64rm:
+ case X86::SBB32rm:
+ case X86::SBB16rm:
+ case X86::SBB8rm:
+ case X86::NEG8r:
+ case X86::NEG16r:
+ case X86::NEG32r:
+ case X86::NEG64r:
+ case X86::LZCNT16rr:
+ case X86::LZCNT16rm:
+ case X86::LZCNT32rr:
+ case X86::LZCNT32rm:
+ case X86::LZCNT64rr:
+ case X86::LZCNT64rm:
+ case X86::POPCNT16rr:
+ case X86::POPCNT16rm:
+ case X86::POPCNT32rr:
+ case X86::POPCNT32rm:
+ case X86::POPCNT64rr:
+ case X86::POPCNT64rm:
+ case X86::TZCNT16rr:
+ case X86::TZCNT16rm:
+ case X86::TZCNT32rr:
+ case X86::TZCNT32rm:
+ case X86::TZCNT64rr:
+ case X86::TZCNT64rm:
return true;
- case X86::AND64ri32: case X86::AND32ri: case X86::AND16ri:
- case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
- case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
- case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
- case X86::XOR64ri32: case X86::XOR32ri: case X86::XOR16ri:
- case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
- case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
- case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
- case X86::OR64ri32: case X86::OR32ri: case X86::OR16ri:
- case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
- case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
- case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
- case X86::ANDN32rr: case X86::ANDN32rm:
- case X86::ANDN64rr: case X86::ANDN64rm:
- case X86::BLSI32rr: case X86::BLSI32rm:
- case X86::BLSI64rr: case X86::BLSI64rm:
- case X86::BLSMSK32rr: case X86::BLSMSK32rm:
- case X86::BLSMSK64rr: case X86::BLSMSK64rm:
- case X86::BLSR32rr: case X86::BLSR32rm:
- case X86::BLSR64rr: case X86::BLSR64rm:
- case X86::BLCFILL32rr: case X86::BLCFILL32rm:
- case X86::BLCFILL64rr: case X86::BLCFILL64rm:
- case X86::BLCI32rr: case X86::BLCI32rm:
- case X86::BLCI64rr: case X86::BLCI64rm:
- case X86::BLCIC32rr: case X86::BLCIC32rm:
- case X86::BLCIC64rr: case X86::BLCIC64rm:
- case X86::BLCMSK32rr: case X86::BLCMSK32rm:
- case X86::BLCMSK64rr: case X86::BLCMSK64rm:
- case X86::BLCS32rr: case X86::BLCS32rm:
- case X86::BLCS64rr: case X86::BLCS64rm:
- case X86::BLSFILL32rr: case X86::BLSFILL32rm:
- case X86::BLSFILL64rr: case X86::BLSFILL64rm:
- case X86::BLSIC32rr: case X86::BLSIC32rm:
- case X86::BLSIC64rr: case X86::BLSIC64rm:
- case X86::BZHI32rr: case X86::BZHI32rm:
- case X86::BZHI64rr: case X86::BZHI64rm:
- case X86::T1MSKC32rr: case X86::T1MSKC32rm:
- case X86::T1MSKC64rr: case X86::T1MSKC64rm:
- case X86::TZMSK32rr: case X86::TZMSK32rm:
- case X86::TZMSK64rr: case X86::TZMSK64rm:
+ case X86::AND64ri32:
+ case X86::AND32ri:
+ case X86::AND16ri:
+ case X86::AND8ri:
+ case X86::AND64rr:
+ case X86::AND32rr:
+ case X86::AND16rr:
+ case X86::AND8rr:
+ case X86::AND64rm:
+ case X86::AND32rm:
+ case X86::AND16rm:
+ case X86::AND8rm:
+ case X86::XOR64ri32:
+ case X86::XOR32ri:
+ case X86::XOR16ri:
+ case X86::XOR8ri:
+ case X86::XOR64rr:
+ case X86::XOR32rr:
+ case X86::XOR16rr:
+ case X86::XOR8rr:
+ case X86::XOR64rm:
+ case X86::XOR32rm:
+ case X86::XOR16rm:
+ case X86::XOR8rm:
+ case X86::OR64ri32:
+ case X86::OR32ri:
+ case X86::OR16ri:
+ case X86::OR8ri:
+ case X86::OR64rr:
+ case X86::OR32rr:
+ case X86::OR16rr:
+ case X86::OR8rr:
+ case X86::OR64rm:
+ case X86::OR32rm:
+ case X86::OR16rm:
+ case X86::OR8rm:
+ case X86::ANDN32rr:
+ case X86::ANDN32rm:
+ case X86::ANDN64rr:
+ case X86::ANDN64rm:
+ case X86::BLSI32rr:
+ case X86::BLSI32rm:
+ case X86::BLSI64rr:
+ case X86::BLSI64rm:
+ case X86::BLSMSK32rr:
+ case X86::BLSMSK32rm:
+ case X86::BLSMSK64rr:
+ case X86::BLSMSK64rm:
+ case X86::BLSR32rr:
+ case X86::BLSR32rm:
+ case X86::BLSR64rr:
+ case X86::BLSR64rm:
+ case X86::BLCFILL32rr:
+ case X86::BLCFILL32rm:
+ case X86::BLCFILL64rr:
+ case X86::BLCFILL64rm:
+ case X86::BLCI32rr:
+ case X86::BLCI32rm:
+ case X86::BLCI64rr:
+ case X86::BLCI64rm:
+ case X86::BLCIC32rr:
+ case X86::BLCIC32rm:
+ case X86::BLCIC64rr:
+ case X86::BLCIC64rm:
+ case X86::BLCMSK32rr:
+ case X86::BLCMSK32rm:
+ case X86::BLCMSK64rr:
+ case X86::BLCMSK64rm:
+ case X86::BLCS32rr:
+ case X86::BLCS32rm:
+ case X86::BLCS64rr:
+ case X86::BLCS64rm:
+ case X86::BLSFILL32rr:
+ case X86::BLSFILL32rm:
+ case X86::BLSFILL64rr:
+ case X86::BLSFILL64rm:
+ case X86::BLSIC32rr:
+ case X86::BLSIC32rm:
+ case X86::BLSIC64rr:
+ case X86::BLSIC64rm:
+ case X86::BZHI32rr:
+ case X86::BZHI32rm:
+ case X86::BZHI64rr:
+ case X86::BZHI64rm:
+ case X86::T1MSKC32rr:
+ case X86::T1MSKC32rm:
+ case X86::T1MSKC64rr:
+ case X86::T1MSKC64rm:
+ case X86::TZMSK32rr:
+ case X86::TZMSK32rm:
+ case X86::TZMSK64rr:
+ case X86::TZMSK64rm:
// These instructions clear the overflow flag just like TEST.
// FIXME: These are not the only instructions in this switch that clear the
// overflow flag.
ClearsOverflowFlag = true;
return true;
- case X86::BEXTR32rr: case X86::BEXTR64rr:
- case X86::BEXTR32rm: case X86::BEXTR64rm:
- case X86::BEXTRI32ri: case X86::BEXTRI32mi:
- case X86::BEXTRI64ri: case X86::BEXTRI64mi:
+ case X86::BEXTR32rr:
+ case X86::BEXTR64rr:
+ case X86::BEXTR32rm:
+ case X86::BEXTR64rm:
+ case X86::BEXTRI32ri:
+ case X86::BEXTRI32mi:
+ case X86::BEXTRI64ri:
+ case X86::BEXTRI64mi:
// BEXTR doesn't update the sign flag so we can't use it. It does clear
// the overflow flag, but that's not useful without the sign flag.
NoSignFlag = true;
@@ -4320,7 +4988,8 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
/// Check whether the use can be converted to remove a comparison against zero.
static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
switch (MI.getOpcode()) {
- default: return X86::COND_INVALID;
+ default:
+ return X86::COND_INVALID;
case X86::NEG8r:
case X86::NEG16r:
case X86::NEG32r:
@@ -4353,7 +5022,7 @@ static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
case X86::BLSMSK32rr:
case X86::BLSMSK64rr:
return X86::COND_B;
- // TODO: TBM instructions.
+ // TODO: TBM instructions.
}
}
@@ -4366,7 +5035,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
const MachineRegisterInfo *MRI) const {
// Check whether we can replace SUB with CMP.
switch (CmpInstr.getOpcode()) {
- default: break;
+ default:
+ break;
case X86::SUB64ri32:
case X86::SUB32ri:
case X86::SUB16ri:
@@ -4384,19 +5054,44 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// There is no use of the destination register, we can replace SUB with CMP.
unsigned NewOpcode = 0;
switch (CmpInstr.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
- case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
- case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
- case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
- case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
- case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
- case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
- case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
- case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
- case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
- case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
- case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::SUB64rm:
+ NewOpcode = X86::CMP64rm;
+ break;
+ case X86::SUB32rm:
+ NewOpcode = X86::CMP32rm;
+ break;
+ case X86::SUB16rm:
+ NewOpcode = X86::CMP16rm;
+ break;
+ case X86::SUB8rm:
+ NewOpcode = X86::CMP8rm;
+ break;
+ case X86::SUB64rr:
+ NewOpcode = X86::CMP64rr;
+ break;
+ case X86::SUB32rr:
+ NewOpcode = X86::CMP32rr;
+ break;
+ case X86::SUB16rr:
+ NewOpcode = X86::CMP16rr;
+ break;
+ case X86::SUB8rr:
+ NewOpcode = X86::CMP8rr;
+ break;
+ case X86::SUB64ri32:
+ NewOpcode = X86::CMP64ri32;
+ break;
+ case X86::SUB32ri:
+ NewOpcode = X86::CMP32ri;
+ break;
+ case X86::SUB16ri:
+ NewOpcode = X86::CMP16ri;
+ break;
+ case X86::SUB8ri:
+ NewOpcode = X86::CMP8ri;
+ break;
}
CmpInstr.setDesc(get(NewOpcode));
CmpInstr.removeOperand(0);
@@ -4532,7 +5227,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// If we are done with the basic block, we need to check whether EFLAGS is
// live-out.
bool FlagsMayLiveOut = true;
- SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
+ SmallVector<std::pair<MachineInstr *, X86::CondCode>, 4> OpsToUpdate;
MachineBasicBlock::iterator AfterCmpInstr =
std::next(MachineBasicBlock::iterator(CmpInstr));
for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) {
@@ -4555,24 +5250,31 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
X86::CondCode ReplacementCC = X86::COND_INVALID;
if (MI) {
switch (OldCC) {
- default: break;
- case X86::COND_A: case X86::COND_AE:
- case X86::COND_B: case X86::COND_BE:
+ default:
+ break;
+ case X86::COND_A:
+ case X86::COND_AE:
+ case X86::COND_B:
+ case X86::COND_BE:
// CF is used, we can't perform this optimization.
return false;
- case X86::COND_G: case X86::COND_GE:
- case X86::COND_L: case X86::COND_LE:
+ case X86::COND_G:
+ case X86::COND_GE:
+ case X86::COND_L:
+ case X86::COND_LE:
// If SF is used, but the instruction doesn't update the SF, then we
// can't do the optimization.
if (NoSignFlag)
return false;
[[fallthrough]];
- case X86::COND_O: case X86::COND_NO:
+ case X86::COND_O:
+ case X86::COND_NO:
// If OF is used, the instruction needs to clear it like CmpZero does.
if (!ClearsOverflowFlag)
return false;
break;
- case X86::COND_S: case X86::COND_NS:
+ case X86::COND_S:
+ case X86::COND_NS:
// If SF is used, but the instruction doesn't update the SF, then we
// can't do the optimization.
if (NoSignFlag)
@@ -4763,6 +5465,310 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
return nullptr;
}
+/// Convert an ALUrr opcode to corresponding ALUri opcode. Such as
+/// ADD32rr ==> ADD32ri
+/// ShiftRotate will be set to true if the Opcode is shift or rotate.
+/// If the ALUri can be further changed to COPY when the immediate is 0, set
+/// CanConvert2Copy to true.
+static unsigned ConvertALUrr2ALUri(unsigned Opcode, bool &CanConvert2Copy,
+ bool &ShiftRotate) {
+ CanConvert2Copy = false;
+ ShiftRotate = false;
+ unsigned NewOpcode = 0;
+ switch (Opcode) {
+ case X86::ADD64rr:
+ NewOpcode = X86::ADD64ri32;
+ CanConvert2Copy = true;
+ break;
+ case X86::ADC64rr:
+ NewOpcode = X86::ADC64ri32;
+ break;
+ case X86::SUB64rr:
+ NewOpcode = X86::SUB64ri32;
+ CanConvert2Copy = true;
+ break;
+ case X86::SBB64rr:
+ NewOpcode = X86::SBB64ri32;
+ break;
+ case X86::AND64rr:
+ NewOpcode = X86::AND64ri32;
+ break;
+ case X86::OR64rr:
+ NewOpcode = X86::OR64ri32;
+ CanConvert2Copy = true;
+ break;
+ case X86::XOR64rr:
+ NewOpcode = X86::XOR64ri32;
+ CanConvert2Copy = true;
+ break;
+ case X86::TEST64rr:
+ NewOpcode = X86::TEST64ri32;
+ break;
+ case X86::CMP64rr:
+ NewOpcode = X86::CMP64ri32;
+ break;
+ case X86::SHR64rCL:
+ NewOpcode = X86::SHR64ri;
+ ShiftRotate = true;
+ break;
+ case X86::SHL64rCL:
+ NewOpcode = X86::SHL64ri;
+ ShiftRotate = true;
+ break;
+ case X86::SAR64rCL:
+ NewOpcode = X86::SAR64ri;
+ ShiftRotate = true;
+ break;
+ case X86::ROL64rCL:
+ NewOpcode = X86::ROL64ri;
+ ShiftRotate = true;
+ break;
+ case X86::ROR64rCL:
+ NewOpcode = X86::ROR64ri;
+ ShiftRotate = true;
+ break;
+ case X86::RCL64rCL:
+ NewOpcode = X86::RCL64ri;
+ ShiftRotate = true;
+ break;
+ case X86::RCR64rCL:
+ NewOpcode = X86::RCR64ri;
+ ShiftRotate = true;
+ break;
+ case X86::ADD32rr:
+ NewOpcode = X86::ADD32ri;
+ CanConvert2Copy = true;
+ break;
+ case X86::ADC32rr:
+ NewOpcode = X86::ADC32ri;
+ break;
+ case X86::SUB32rr:
+ NewOpcode = X86::SUB32ri;
+ CanConvert2Copy = true;
+ break;
+ case X86::SBB32rr:
+ NewOpcode = X86::SBB32ri;
+ break;
+ case X86::AND32rr:
+ NewOpcode = X86::AND32ri;
+ break;
+ case X86::OR32rr:
+ NewOpcode = X86::OR32ri;
+ CanConvert2Copy = true;
+ break;
+ case X86::XOR32rr:
+ NewOpcode = X86::XOR32ri;
+ CanConvert2Copy = true;
+ break;
+ case X86::TEST32rr:
+ NewOpcode = X86::TEST32ri;
+ break;
+ case X86::CMP32rr:
+ NewOpcode = X86::CMP32ri;
+ break;
+ case X86::SHR32rCL:
+ NewOpcode = X86::SHR32ri;
+ ShiftRotate = true;
+ break;
+ case X86::SHL32rCL:
+ NewOpcode = X86::SHL32ri;
+ ShiftRotate = true;
+ break;
+ case X86::SAR32rCL:
+ NewOpcode = X86::SAR32ri;
+ ShiftRotate = true;
+ break;
+ case X86::ROL32rCL:
+ NewOpcode = X86::ROL32ri;
+ ShiftRotate = true;
+ break;
+ case X86::ROR32rCL:
+ NewOpcode = X86::ROR32ri;
+ ShiftRotate = true;
+ break;
+ case X86::RCL32rCL:
+ NewOpcode = X86::RCL32ri;
+ ShiftRotate = true;
+ break;
+ case X86::RCR32rCL:
+ NewOpcode = X86::RCR32ri;
+ ShiftRotate = true;
+ break;
+ }
+ return NewOpcode;
+}
+
+/// Real implementation of FoldImmediate.
+/// Reg is assigned ImmVal in DefMI, and is used in UseMI.
+/// If MakeChange is true, this function tries to replace Reg by ImmVal in
+/// UseMI. If MakeChange is false, just check if folding is possible.
+/// Return true if folding is successful or possible.
+bool X86InstrInfo::FoldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI,
+ Register Reg, int64_t ImmVal,
+ MachineRegisterInfo *MRI,
+ bool MakeChange) const {
+ bool Modified = false;
+ bool ShiftRotate = false;
+ // When ImmVal is 0, some instructions can be changed to COPY.
+ bool CanChangeToCopy = false;
+ unsigned Opc = UseMI.getOpcode();
+
+ // 64 bit operations accept sign extended 32 bit immediates.
+ // 32 bit operations accept all 32 bit immediates, so we don't need to check
+ // them.
+ const TargetRegisterClass *RC = nullptr;
+ if (Reg.isVirtual())
+ RC = MRI->getRegClass(Reg);
+ if ((Reg.isPhysical() && X86::GR64RegClass.contains(Reg)) ||
+ (Reg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC))) {
+ if (!isInt<32>(ImmVal))
+ return false;
+ }
+
+ if (UseMI.findRegisterUseOperand(Reg)->getSubReg())
+ return false;
+ // Immediate has larger code size than register. So avoid folding the
+ // immediate if it has more than 1 use and we are optimizing for size.
+ if (UseMI.getMF()->getFunction().hasOptSize() && Reg.isVirtual() &&
+ !MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ unsigned NewOpc;
+ if (Opc == TargetOpcode::COPY) {
+ Register ToReg = UseMI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = nullptr;
+ if (ToReg.isVirtual())
+ RC = MRI->getRegClass(ToReg);
+ bool GR32Reg = (ToReg.isVirtual() && X86::GR32RegClass.hasSubClassEq(RC)) ||
+ (ToReg.isPhysical() && X86::GR32RegClass.contains(ToReg));
+ bool GR64Reg = (ToReg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC)) ||
+ (ToReg.isPhysical() && X86::GR64RegClass.contains(ToReg));
+ bool GR8Reg = (ToReg.isVirtual() && X86::GR8RegClass.hasSubClassEq(RC)) ||
+ (ToReg.isPhysical() && X86::GR8RegClass.contains(ToReg));
+
+ if (ImmVal == 0) {
+ // We have MOV32r0 only.
+ if (!GR32Reg)
+ return false;
+ }
+
+ if (GR64Reg) {
+ if (isUInt<32>(ImmVal))
+ NewOpc = X86::MOV32ri64;
+ else
+ NewOpc = X86::MOV64ri;
+ } else if (GR32Reg) {
+ NewOpc = X86::MOV32ri;
+ if (ImmVal == 0) {
+ // MOV32r0 clobbers EFLAGS.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (UseMI.getParent()->computeRegisterLiveness(
+ TRI, X86::EFLAGS, UseMI) != MachineBasicBlock::LQR_Dead)
+ return false;
+
+ // MOV32r0 is different than other cases because it doesn't encode the
+ // immediate in the instruction. So we directly modify it here.
+ if (!MakeChange)
+ return true;
+ UseMI.setDesc(get(X86::MOV32r0));
+ UseMI.removeOperand(UseMI.findRegisterUseOperandIdx(Reg));
+ UseMI.addOperand(MachineOperand::CreateReg(X86::EFLAGS, /*isDef=*/true,
+ /*isImp=*/true,
+ /*isKill=*/false,
+ /*isDead=*/true));
+ Modified = true;
+ }
+ } else if (GR8Reg)
+ NewOpc = X86::MOV8ri;
+ else
+ return false;
+ } else
+ NewOpc = ConvertALUrr2ALUri(Opc, CanChangeToCopy, ShiftRotate);
+
+ if (!NewOpc)
+ return false;
+
+ // For SUB instructions the immediate can only be the second source operand.
+ if ((NewOpc == X86::SUB64ri32 || NewOpc == X86::SUB32ri ||
+ NewOpc == X86::SBB64ri32 || NewOpc == X86::SBB32ri) &&
+ UseMI.findRegisterUseOperandIdx(Reg) != 2)
+ return false;
+ // For CMP instructions the immediate can only be at index 1.
+ if ((NewOpc == X86::CMP64ri32 || NewOpc == X86::CMP32ri) &&
+ UseMI.findRegisterUseOperandIdx(Reg) != 1)
+ return false;
+
+ if (ShiftRotate) {
+ unsigned RegIdx = UseMI.findRegisterUseOperandIdx(Reg);
+ if (RegIdx < 2)
+ return false;
+ if (!isInt<8>(ImmVal))
+ return false;
+ assert(Reg == X86::CL);
+
+ if (!MakeChange)
+ return true;
+ UseMI.setDesc(get(NewOpc));
+ UseMI.removeOperand(RegIdx);
+ UseMI.addOperand(MachineOperand::CreateImm(ImmVal));
+ // Reg is physical register $cl, so we don't know if DefMI is dead through
+ // MRI. Let the caller handle it, or pass dead-mi-elimination can delete
+ // the dead physical register define instruction.
+ return true;
+ }
+
+ if (!MakeChange)
+ return true;
+
+ if (!Modified) {
+ // Modify the instruction.
+ if (ImmVal == 0 && CanChangeToCopy &&
+ UseMI.registerDefIsDead(X86::EFLAGS)) {
+ // %100 = add %101, 0
+ // ==>
+ // %100 = COPY %101
+ UseMI.setDesc(get(TargetOpcode::COPY));
+ UseMI.removeOperand(UseMI.findRegisterUseOperandIdx(Reg));
+ UseMI.removeOperand(UseMI.findRegisterDefOperandIdx(X86::EFLAGS));
+ UseMI.untieRegOperand(0);
+ UseMI.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ UseMI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ } else {
+ unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
+ unsigned ImmOpNum = 2;
+ if (!UseMI.getOperand(0).isDef()) {
+ Op1 = 0; // TEST, CMP
+ ImmOpNum = 1;
+ }
+ if (Opc == TargetOpcode::COPY)
+ ImmOpNum = 1;
+ if (findCommutedOpIndices(UseMI, Op1, Op2) &&
+ UseMI.getOperand(Op1).getReg() == Reg)
+ commuteInstruction(UseMI);
+
+ assert(UseMI.getOperand(ImmOpNum).getReg() == Reg);
+ UseMI.setDesc(get(NewOpc));
+ UseMI.getOperand(ImmOpNum).ChangeToImmediate(ImmVal);
+ }
+ }
+
+ if (Reg.isVirtual() && MRI->use_nodbg_empty(Reg))
+ DefMI->eraseFromBundle();
+
+ return true;
+}
+
+/// FoldImmediate - 'Reg' is known to be defined by a move immediate
+/// instruction, try to fold the immediate into the use instruction.
+bool X86InstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ Register Reg, MachineRegisterInfo *MRI) const {
+ int64_t ImmVal;
+ if (!getConstValDefinedInReg(DefMI, Reg, ImmVal))
+ return false;
+
+ return FoldImmediateImpl(UseMI, &DefMI, Reg, ImmVal, MRI, true);
+}
+
/// Expand a single-def pseudo instruction to a two-addr
/// instruction with two undef reads of the register being defined.
/// This is used for mapping:
@@ -4780,8 +5786,7 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
// implicit operands.
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
// But we don't trust that.
- assert(MIB.getReg(1) == Reg &&
- MIB.getReg(2) == Reg && "Misplaced operand");
+ assert(MIB.getReg(1) == Reg && MIB.getReg(2) == Reg && "Misplaced operand");
return true;
}
@@ -4836,8 +5841,9 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
X86MachineFunctionInfo *X86FI =
MBB.getParent()->getInfo<X86MachineFunctionInfo>();
if (X86FI->getUsesRedZone()) {
- MIB->setDesc(TII.get(MIB->getOpcode() ==
- X86::MOV32ImmSExti8 ? X86::MOV32ri : X86::MOV64ri));
+ MIB->setDesc(TII.get(MIB->getOpcode() == X86::MOV32ImmSExti8
+ ? X86::MOV32ri
+ : X86::MOV64ri));
return true;
}
@@ -4846,8 +5852,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
StackAdjustment = 8;
BuildMI(MBB, I, DL, TII.get(X86::PUSH64i32)).addImm(Imm);
MIB->setDesc(TII.get(X86::POP64r));
- MIB->getOperand(0)
- .setReg(getX86SubSuperRegister(MIB.getReg(0), 64));
+ MIB->getOperand(0).setReg(getX86SubSuperRegister(MIB.getReg(0), 64));
} else {
assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
StackAdjustment = 4;
@@ -4864,9 +5869,11 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves();
bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
if (EmitCFI) {
- TFL->BuildCFI(MBB, I, DL,
+ TFL->BuildCFI(
+ MBB, I, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
- TFL->BuildCFI(MBB, std::next(I), DL,
+ TFL->BuildCFI(
+ MBB, std::next(I), DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
}
@@ -4889,8 +5896,12 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, Align(8));
MachineBasicBlock::iterator I = MIB.getInstr();
- BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
- .addReg(0).addGlobalAddress(GV, 0, X86II::MO_GOTPCREL).addReg(0)
+ BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg)
+ .addReg(X86::RIP)
+ .addImm(1)
+ .addReg(0)
+ .addGlobalAddress(GV, 0, X86II::MO_GOTPCREL)
+ .addReg(0)
.addMemOperand(MMO);
MIB->setDebugLoc(DL);
MIB->setDesc(TII.get(X86::MOV64rm));
@@ -4915,8 +5926,7 @@ static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) {
static bool expandNOVLXLoad(MachineInstrBuilder &MIB,
const TargetRegisterInfo *TRI,
const MCInstrDesc &LoadDesc,
- const MCInstrDesc &BroadcastDesc,
- unsigned SubIdx) {
+ const MCInstrDesc &BroadcastDesc, unsigned SubIdx) {
Register DestReg = MIB.getReg(0);
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(DestReg) < 16) {
@@ -4938,8 +5948,7 @@ static bool expandNOVLXLoad(MachineInstrBuilder &MIB,
static bool expandNOVLXStore(MachineInstrBuilder &MIB,
const TargetRegisterInfo *TRI,
const MCInstrDesc &StoreDesc,
- const MCInstrDesc &ExtractDesc,
- unsigned SubIdx) {
+ const MCInstrDesc &ExtractDesc, unsigned SubIdx) {
Register SrcReg = MIB.getReg(X86::AddrNumOperands);
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(SrcReg) < 16) {
@@ -4963,8 +5972,7 @@ static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
// Temporarily remove the immediate so we can add another source register.
MIB->removeOperand(2);
// Add the register. Don't copy the kill flag if there is one.
- MIB.addReg(MIB.getReg(1),
- getUndefRegState(MIB->getOperand(1).isUndef()));
+ MIB.addReg(MIB.getReg(1), getUndefRegState(MIB->getOperand(1).isUndef()));
// Add back the immediate.
MIB.addImm(ShiftAmt);
return true;
@@ -4977,9 +5985,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::MOV32r0:
return Expand2AddrUndef(MIB, get(X86::XOR32rr));
case X86::MOV32r1:
- return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/false);
case X86::MOV32r_1:
- return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/true);
case X86::MOV32ImmSExti8:
case X86::MOV64ImmSExti8:
return ExpandMOVImmSExti8(MIB, *this, Subtarget);
@@ -5030,21 +6038,21 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
MIB->getOperand(0).setReg(XReg);
- Expand2AddrUndef(MIB,
- get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
+ Expand2AddrUndef(MIB, get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
MIB.addReg(SrcReg, RegState::ImplicitDefine);
return true;
}
if (MI.getOpcode() == X86::AVX512_256_SET0) {
// No VLX so we must reference a zmm.
unsigned ZReg =
- TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
+ TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
MIB->getOperand(0).setReg(ZReg);
}
return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
}
case X86::V_SETALLONES:
- return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
+ return Expand2AddrUndef(MIB,
+ get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
case X86::AVX1_SETALLONES: {
@@ -5059,8 +6067,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MIB->setDesc(get(X86::VPTERNLOGDZrri));
// VPTERNLOGD needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
- MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef).addImm(0xff);
+ MIB.addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef)
+ .addImm(0xff);
return true;
}
case X86::AVX512_512_SEXT_MASK_32:
@@ -5068,14 +6078,18 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
Register Reg = MIB.getReg(0);
Register MaskReg = MIB.getReg(1);
unsigned MaskState = getRegState(MIB->getOperand(1));
- unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
- X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
+ unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64)
+ ? X86::VPTERNLOGQZrrikz
+ : X86::VPTERNLOGDZrrikz;
MI.removeOperand(1);
MIB->setDesc(get(Opc));
// VPTERNLOG needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
- MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState)
- .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff);
+ MIB.addReg(Reg, RegState::Undef)
+ .addReg(MaskReg, MaskState)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef)
+ .addImm(0xff);
return true;
}
case X86::VMOVAPSZ128rm_NOVLX:
@@ -5116,10 +6130,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned Is64Bit = MI.getOpcode() == X86::RDFLAGS64;
MachineBasicBlock &MBB = *MIB->getParent();
- MachineInstr *NewMI =
- BuildMI(MBB, MI, MIB->getDebugLoc(),
- get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
- .getInstr();
+ MachineInstr *NewMI = BuildMI(MBB, MI, MIB->getDebugLoc(),
+ get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
+ .getInstr();
// Permit reads of the EFLAGS and DF registers without them being defined.
// This intrinsic exists to read external processor state in flags, such as
@@ -5157,30 +6170,56 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// registers, since it is not usable as a write mask.
// FIXME: A more advanced approach would be to choose the best input mask
// register based on context.
- case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
- case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
- case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
- case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
- case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
- case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
+ case X86::KSET0W:
+ return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
+ case X86::KSET0D:
+ return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
+ case X86::KSET0Q:
+ return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
+ case X86::KSET1W:
+ return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
+ case X86::KSET1D:
+ return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
+ case X86::KSET1Q:
+ return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
case TargetOpcode::LOAD_STACK_GUARD:
expandLoadStackGuard(MIB, *this);
return true;
case X86::XOR64_FP:
case X86::XOR32_FP:
return expandXorFP(MIB, *this);
- case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8));
- case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8));
- case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8));
- case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8));
- case X86::ADD8rr_DB: MIB->setDesc(get(X86::OR8rr)); break;
- case X86::ADD16rr_DB: MIB->setDesc(get(X86::OR16rr)); break;
- case X86::ADD32rr_DB: MIB->setDesc(get(X86::OR32rr)); break;
- case X86::ADD64rr_DB: MIB->setDesc(get(X86::OR64rr)); break;
- case X86::ADD8ri_DB: MIB->setDesc(get(X86::OR8ri)); break;
- case X86::ADD16ri_DB: MIB->setDesc(get(X86::OR16ri)); break;
- case X86::ADD32ri_DB: MIB->setDesc(get(X86::OR32ri)); break;
- case X86::ADD64ri32_DB: MIB->setDesc(get(X86::OR64ri32)); break;
+ case X86::SHLDROT32ri:
+ return expandSHXDROT(MIB, get(X86::SHLD32rri8));
+ case X86::SHLDROT64ri:
+ return expandSHXDROT(MIB, get(X86::SHLD64rri8));
+ case X86::SHRDROT32ri:
+ return expandSHXDROT(MIB, get(X86::SHRD32rri8));
+ case X86::SHRDROT64ri:
+ return expandSHXDROT(MIB, get(X86::SHRD64rri8));
+ case X86::ADD8rr_DB:
+ MIB->setDesc(get(X86::OR8rr));
+ break;
+ case X86::ADD16rr_DB:
+ MIB->setDesc(get(X86::OR16rr));
+ break;
+ case X86::ADD32rr_DB:
+ MIB->setDesc(get(X86::OR32rr));
+ break;
+ case X86::ADD64rr_DB:
+ MIB->setDesc(get(X86::OR64rr));
+ break;
+ case X86::ADD8ri_DB:
+ MIB->setDesc(get(X86::OR8ri));
+ break;
+ case X86::ADD16ri_DB:
+ MIB->setDesc(get(X86::OR16ri));
+ break;
+ case X86::ADD32ri_DB:
+ MIB->setDesc(get(X86::OR32ri));
+ break;
+ case X86::ADD64ri32_DB:
+ MIB->setDesc(get(X86::OR64ri32));
+ break;
}
return false;
}
@@ -5201,8 +6240,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
///
/// FIXME: This should be turned into a TSFlags.
///
-static bool hasPartialRegUpdate(unsigned Opcode,
- const X86Subtarget &Subtarget,
+static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget,
bool ForLoadFold = false) {
switch (Opcode) {
case X86::CVTSI2SSrr:
@@ -6098,13 +7136,14 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(4)) {
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 &&
+ (MI.getOpcode() != X86::INSERTPSrr || Alignment >= Align(4))) {
int PtrOffset = SrcIdx * 4;
unsigned NewImm = (DstIdx << 4) | ZMask;
unsigned NewOpCode =
- (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm :
- (MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm :
- X86::INSERTPSrm;
+ (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm
+ : (MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm
+ : X86::INSERTPSrm;
MachineInstr *NewMI =
FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
@@ -6124,9 +7163,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(8)) {
unsigned NewOpCode =
- (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
- (MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm :
- X86::MOVLPSrm;
+ (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm
+ : (MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm
+ : X86::MOVLPSrm;
MachineInstr *NewMI =
FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
return NewMI;
@@ -6155,7 +7194,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
MachineInstr &MI) {
- if (!hasUndefRegUpdate(MI.getOpcode(), 1, /*ForLoadFold*/true) ||
+ if (!hasUndefRegUpdate(MI.getOpcode(), 1, /*ForLoadFold*/ true) ||
!MI.getOperand(1).isReg())
return false;
@@ -6190,7 +7229,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// Avoid partial and undef register update stalls unless optimizing for size.
if (!MF.getFunction().hasOptSize() &&
- (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
+ (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/ true) ||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
return nullptr;
@@ -6224,7 +7263,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MF, MI, OpNum, MOs, InsertPt, Size, Alignment))
return CustomMI;
- const X86MemoryFoldTableEntry *I = nullptr;
+ const X86FoldTableEntry *I = nullptr;
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
@@ -6252,13 +7291,13 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_LOAD) || OpNum > 0;
bool FoldedStore =
isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_STORE);
- if (Alignment < Align(1ULL << ((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT)))
+ if (Alignment <
+ Align(1ULL << ((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT)))
return nullptr;
bool NarrowToMOV32rm = false;
if (Size) {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum,
- &RI, MF);
+ const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
@@ -6361,19 +7400,17 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
return nullptr;
}
-MachineInstr *
-X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt,
- int FrameIndex, LiveIntervals *LIS,
- VirtRegMap *VRM) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
// Check switch flag
if (NoFusing)
return nullptr;
// Avoid partial and undef register update stalls unless optimizing for size.
if (!MF.getFunction().hasOptSize() &&
- (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
+ (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/ true) ||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
return nullptr;
@@ -6397,11 +7434,24 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
unsigned NewOpc = 0;
unsigned RCSize = 0;
switch (MI.getOpcode()) {
- default: return nullptr;
- case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
+ default:
+ return nullptr;
+ case X86::TEST8rr:
+ NewOpc = X86::CMP8ri;
+ RCSize = 1;
+ break;
+ case X86::TEST16rr:
+ NewOpc = X86::CMP16ri;
+ RCSize = 2;
+ break;
+ case X86::TEST32rr:
+ NewOpc = X86::CMP32ri;
+ RCSize = 4;
+ break;
+ case X86::TEST64rr:
+ NewOpc = X86::CMP64ri32;
+ RCSize = 8;
+ break;
}
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
@@ -6455,61 +7505,125 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VCVTSS2SDZrr_Int:
case X86::VCVTSS2SDZrr_Intk:
case X86::VCVTSS2SDZrr_Intkz:
- case X86::CVTSS2SIrr_Int: case X86::CVTSS2SI64rr_Int:
- case X86::VCVTSS2SIrr_Int: case X86::VCVTSS2SI64rr_Int:
- case X86::VCVTSS2SIZrr_Int: case X86::VCVTSS2SI64Zrr_Int:
- case X86::CVTTSS2SIrr_Int: case X86::CVTTSS2SI64rr_Int:
- case X86::VCVTTSS2SIrr_Int: case X86::VCVTTSS2SI64rr_Int:
- case X86::VCVTTSS2SIZrr_Int: case X86::VCVTTSS2SI64Zrr_Int:
- case X86::VCVTSS2USIZrr_Int: case X86::VCVTSS2USI64Zrr_Int:
- case X86::VCVTTSS2USIZrr_Int: case X86::VCVTTSS2USI64Zrr_Int:
- case X86::RCPSSr_Int: case X86::VRCPSSr_Int:
- case X86::RSQRTSSr_Int: case X86::VRSQRTSSr_Int:
- case X86::ROUNDSSr_Int: case X86::VROUNDSSr_Int:
- case X86::COMISSrr_Int: case X86::VCOMISSrr_Int: case X86::VCOMISSZrr_Int:
- case X86::UCOMISSrr_Int:case X86::VUCOMISSrr_Int:case X86::VUCOMISSZrr_Int:
- case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int:
- case X86::CMPSSrr_Int: case X86::VCMPSSrr_Int: case X86::VCMPSSZrr_Int:
- case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int:
- case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int:
- case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
- case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
- case X86::SQRTSSr_Int: case X86::VSQRTSSr_Int: case X86::VSQRTSSZr_Int:
- case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
- case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
+ case X86::CVTSS2SIrr_Int:
+ case X86::CVTSS2SI64rr_Int:
+ case X86::VCVTSS2SIrr_Int:
+ case X86::VCVTSS2SI64rr_Int:
+ case X86::VCVTSS2SIZrr_Int:
+ case X86::VCVTSS2SI64Zrr_Int:
+ case X86::CVTTSS2SIrr_Int:
+ case X86::CVTTSS2SI64rr_Int:
+ case X86::VCVTTSS2SIrr_Int:
+ case X86::VCVTTSS2SI64rr_Int:
+ case X86::VCVTTSS2SIZrr_Int:
+ case X86::VCVTTSS2SI64Zrr_Int:
+ case X86::VCVTSS2USIZrr_Int:
+ case X86::VCVTSS2USI64Zrr_Int:
+ case X86::VCVTTSS2USIZrr_Int:
+ case X86::VCVTTSS2USI64Zrr_Int:
+ case X86::RCPSSr_Int:
+ case X86::VRCPSSr_Int:
+ case X86::RSQRTSSr_Int:
+ case X86::VRSQRTSSr_Int:
+ case X86::ROUNDSSr_Int:
+ case X86::VROUNDSSr_Int:
+ case X86::COMISSrr_Int:
+ case X86::VCOMISSrr_Int:
+ case X86::VCOMISSZrr_Int:
+ case X86::UCOMISSrr_Int:
+ case X86::VUCOMISSrr_Int:
+ case X86::VUCOMISSZrr_Int:
+ case X86::ADDSSrr_Int:
+ case X86::VADDSSrr_Int:
+ case X86::VADDSSZrr_Int:
+ case X86::CMPSSrr_Int:
+ case X86::VCMPSSrr_Int:
+ case X86::VCMPSSZrr_Int:
+ case X86::DIVSSrr_Int:
+ case X86::VDIVSSrr_Int:
+ case X86::VDIVSSZrr_Int:
+ case X86::MAXSSrr_Int:
+ case X86::VMAXSSrr_Int:
+ case X86::VMAXSSZrr_Int:
+ case X86::MINSSrr_Int:
+ case X86::VMINSSrr_Int:
+ case X86::VMINSSZrr_Int:
+ case X86::MULSSrr_Int:
+ case X86::VMULSSrr_Int:
+ case X86::VMULSSZrr_Int:
+ case X86::SQRTSSr_Int:
+ case X86::VSQRTSSr_Int:
+ case X86::VSQRTSSZr_Int:
+ case X86::SUBSSrr_Int:
+ case X86::VSUBSSrr_Int:
+ case X86::VSUBSSZrr_Int:
+ case X86::VADDSSZrr_Intk:
+ case X86::VADDSSZrr_Intkz:
case X86::VCMPSSZrr_Intk:
- case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz:
- case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
- case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
- case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz:
- case X86::VSQRTSSZr_Intk: case X86::VSQRTSSZr_Intkz:
- case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz:
- case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
- case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int:
- case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int:
- case X86::VFMADD213SSr_Int: case X86::VFNMADD213SSr_Int:
- case X86::VFMADD231SSr_Int: case X86::VFNMADD231SSr_Int:
- case X86::VFMSUB132SSr_Int: case X86::VFNMSUB132SSr_Int:
- case X86::VFMSUB213SSr_Int: case X86::VFNMSUB213SSr_Int:
- case X86::VFMSUB231SSr_Int: case X86::VFNMSUB231SSr_Int:
- case X86::VFMADD132SSZr_Int: case X86::VFNMADD132SSZr_Int:
- case X86::VFMADD213SSZr_Int: case X86::VFNMADD213SSZr_Int:
- case X86::VFMADD231SSZr_Int: case X86::VFNMADD231SSZr_Int:
- case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int:
- case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int:
- case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int:
- case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk:
- case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk:
- case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk:
- case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk:
- case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk:
- case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk:
- case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz:
- case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz:
- case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz:
- case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz:
- case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz:
- case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz:
+ case X86::VDIVSSZrr_Intk:
+ case X86::VDIVSSZrr_Intkz:
+ case X86::VMAXSSZrr_Intk:
+ case X86::VMAXSSZrr_Intkz:
+ case X86::VMINSSZrr_Intk:
+ case X86::VMINSSZrr_Intkz:
+ case X86::VMULSSZrr_Intk:
+ case X86::VMULSSZrr_Intkz:
+ case X86::VSQRTSSZr_Intk:
+ case X86::VSQRTSSZr_Intkz:
+ case X86::VSUBSSZrr_Intk:
+ case X86::VSUBSSZrr_Intkz:
+ case X86::VFMADDSS4rr_Int:
+ case X86::VFNMADDSS4rr_Int:
+ case X86::VFMSUBSS4rr_Int:
+ case X86::VFNMSUBSS4rr_Int:
+ case X86::VFMADD132SSr_Int:
+ case X86::VFNMADD132SSr_Int:
+ case X86::VFMADD213SSr_Int:
+ case X86::VFNMADD213SSr_Int:
+ case X86::VFMADD231SSr_Int:
+ case X86::VFNMADD231SSr_Int:
+ case X86::VFMSUB132SSr_Int:
+ case X86::VFNMSUB132SSr_Int:
+ case X86::VFMSUB213SSr_Int:
+ case X86::VFNMSUB213SSr_Int:
+ case X86::VFMSUB231SSr_Int:
+ case X86::VFNMSUB231SSr_Int:
+ case X86::VFMADD132SSZr_Int:
+ case X86::VFNMADD132SSZr_Int:
+ case X86::VFMADD213SSZr_Int:
+ case X86::VFNMADD213SSZr_Int:
+ case X86::VFMADD231SSZr_Int:
+ case X86::VFNMADD231SSZr_Int:
+ case X86::VFMSUB132SSZr_Int:
+ case X86::VFNMSUB132SSZr_Int:
+ case X86::VFMSUB213SSZr_Int:
+ case X86::VFNMSUB213SSZr_Int:
+ case X86::VFMSUB231SSZr_Int:
+ case X86::VFNMSUB231SSZr_Int:
+ case X86::VFMADD132SSZr_Intk:
+ case X86::VFNMADD132SSZr_Intk:
+ case X86::VFMADD213SSZr_Intk:
+ case X86::VFNMADD213SSZr_Intk:
+ case X86::VFMADD231SSZr_Intk:
+ case X86::VFNMADD231SSZr_Intk:
+ case X86::VFMSUB132SSZr_Intk:
+ case X86::VFNMSUB132SSZr_Intk:
+ case X86::VFMSUB213SSZr_Intk:
+ case X86::VFNMSUB213SSZr_Intk:
+ case X86::VFMSUB231SSZr_Intk:
+ case X86::VFNMSUB231SSZr_Intk:
+ case X86::VFMADD132SSZr_Intkz:
+ case X86::VFNMADD132SSZr_Intkz:
+ case X86::VFMADD213SSZr_Intkz:
+ case X86::VFNMADD213SSZr_Intkz:
+ case X86::VFMADD231SSZr_Intkz:
+ case X86::VFNMADD231SSZr_Intkz:
+ case X86::VFMSUB132SSZr_Intkz:
+ case X86::VFNMSUB132SSZr_Intkz:
+ case X86::VFMSUB213SSZr_Intkz:
+ case X86::VFNMSUB213SSZr_Intkz:
+ case X86::VFMSUB231SSZr_Intkz:
+ case X86::VFNMSUB231SSZr_Intkz:
case X86::VFIXUPIMMSSZrri:
case X86::VFIXUPIMMSSZrrik:
case X86::VFIXUPIMMSSZrrikz:
@@ -6564,59 +7678,121 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VCVTSD2SSZrr_Int:
case X86::VCVTSD2SSZrr_Intk:
case X86::VCVTSD2SSZrr_Intkz:
- case X86::CVTSD2SIrr_Int: case X86::CVTSD2SI64rr_Int:
- case X86::VCVTSD2SIrr_Int: case X86::VCVTSD2SI64rr_Int:
- case X86::VCVTSD2SIZrr_Int: case X86::VCVTSD2SI64Zrr_Int:
- case X86::CVTTSD2SIrr_Int: case X86::CVTTSD2SI64rr_Int:
- case X86::VCVTTSD2SIrr_Int: case X86::VCVTTSD2SI64rr_Int:
- case X86::VCVTTSD2SIZrr_Int: case X86::VCVTTSD2SI64Zrr_Int:
- case X86::VCVTSD2USIZrr_Int: case X86::VCVTSD2USI64Zrr_Int:
- case X86::VCVTTSD2USIZrr_Int: case X86::VCVTTSD2USI64Zrr_Int:
- case X86::ROUNDSDr_Int: case X86::VROUNDSDr_Int:
- case X86::COMISDrr_Int: case X86::VCOMISDrr_Int: case X86::VCOMISDZrr_Int:
- case X86::UCOMISDrr_Int:case X86::VUCOMISDrr_Int:case X86::VUCOMISDZrr_Int:
- case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int:
- case X86::CMPSDrr_Int: case X86::VCMPSDrr_Int: case X86::VCMPSDZrr_Int:
- case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int:
- case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int:
- case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
- case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
- case X86::SQRTSDr_Int: case X86::VSQRTSDr_Int: case X86::VSQRTSDZr_Int:
- case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
- case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
+ case X86::CVTSD2SIrr_Int:
+ case X86::CVTSD2SI64rr_Int:
+ case X86::VCVTSD2SIrr_Int:
+ case X86::VCVTSD2SI64rr_Int:
+ case X86::VCVTSD2SIZrr_Int:
+ case X86::VCVTSD2SI64Zrr_Int:
+ case X86::CVTTSD2SIrr_Int:
+ case X86::CVTTSD2SI64rr_Int:
+ case X86::VCVTTSD2SIrr_Int:
+ case X86::VCVTTSD2SI64rr_Int:
+ case X86::VCVTTSD2SIZrr_Int:
+ case X86::VCVTTSD2SI64Zrr_Int:
+ case X86::VCVTSD2USIZrr_Int:
+ case X86::VCVTSD2USI64Zrr_Int:
+ case X86::VCVTTSD2USIZrr_Int:
+ case X86::VCVTTSD2USI64Zrr_Int:
+ case X86::ROUNDSDr_Int:
+ case X86::VROUNDSDr_Int:
+ case X86::COMISDrr_Int:
+ case X86::VCOMISDrr_Int:
+ case X86::VCOMISDZrr_Int:
+ case X86::UCOMISDrr_Int:
+ case X86::VUCOMISDrr_Int:
+ case X86::VUCOMISDZrr_Int:
+ case X86::ADDSDrr_Int:
+ case X86::VADDSDrr_Int:
+ case X86::VADDSDZrr_Int:
+ case X86::CMPSDrr_Int:
+ case X86::VCMPSDrr_Int:
+ case X86::VCMPSDZrr_Int:
+ case X86::DIVSDrr_Int:
+ case X86::VDIVSDrr_Int:
+ case X86::VDIVSDZrr_Int:
+ case X86::MAXSDrr_Int:
+ case X86::VMAXSDrr_Int:
+ case X86::VMAXSDZrr_Int:
+ case X86::MINSDrr_Int:
+ case X86::VMINSDrr_Int:
+ case X86::VMINSDZrr_Int:
+ case X86::MULSDrr_Int:
+ case X86::VMULSDrr_Int:
+ case X86::VMULSDZrr_Int:
+ case X86::SQRTSDr_Int:
+ case X86::VSQRTSDr_Int:
+ case X86::VSQRTSDZr_Int:
+ case X86::SUBSDrr_Int:
+ case X86::VSUBSDrr_Int:
+ case X86::VSUBSDZrr_Int:
+ case X86::VADDSDZrr_Intk:
+ case X86::VADDSDZrr_Intkz:
case X86::VCMPSDZrr_Intk:
- case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz:
- case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
- case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
- case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz:
- case X86::VSQRTSDZr_Intk: case X86::VSQRTSDZr_Intkz:
- case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz:
- case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
- case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int:
- case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int:
- case X86::VFMADD213SDr_Int: case X86::VFNMADD213SDr_Int:
- case X86::VFMADD231SDr_Int: case X86::VFNMADD231SDr_Int:
- case X86::VFMSUB132SDr_Int: case X86::VFNMSUB132SDr_Int:
- case X86::VFMSUB213SDr_Int: case X86::VFNMSUB213SDr_Int:
- case X86::VFMSUB231SDr_Int: case X86::VFNMSUB231SDr_Int:
- case X86::VFMADD132SDZr_Int: case X86::VFNMADD132SDZr_Int:
- case X86::VFMADD213SDZr_Int: case X86::VFNMADD213SDZr_Int:
- case X86::VFMADD231SDZr_Int: case X86::VFNMADD231SDZr_Int:
- case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int:
- case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int:
- case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int:
- case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk:
- case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk:
- case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk:
- case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk:
- case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk:
- case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk:
- case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz:
- case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz:
- case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz:
- case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz:
- case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz:
- case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz:
+ case X86::VDIVSDZrr_Intk:
+ case X86::VDIVSDZrr_Intkz:
+ case X86::VMAXSDZrr_Intk:
+ case X86::VMAXSDZrr_Intkz:
+ case X86::VMINSDZrr_Intk:
+ case X86::VMINSDZrr_Intkz:
+ case X86::VMULSDZrr_Intk:
+ case X86::VMULSDZrr_Intkz:
+ case X86::VSQRTSDZr_Intk:
+ case X86::VSQRTSDZr_Intkz:
+ case X86::VSUBSDZrr_Intk:
+ case X86::VSUBSDZrr_Intkz:
+ case X86::VFMADDSD4rr_Int:
+ case X86::VFNMADDSD4rr_Int:
+ case X86::VFMSUBSD4rr_Int:
+ case X86::VFNMSUBSD4rr_Int:
+ case X86::VFMADD132SDr_Int:
+ case X86::VFNMADD132SDr_Int:
+ case X86::VFMADD213SDr_Int:
+ case X86::VFNMADD213SDr_Int:
+ case X86::VFMADD231SDr_Int:
+ case X86::VFNMADD231SDr_Int:
+ case X86::VFMSUB132SDr_Int:
+ case X86::VFNMSUB132SDr_Int:
+ case X86::VFMSUB213SDr_Int:
+ case X86::VFNMSUB213SDr_Int:
+ case X86::VFMSUB231SDr_Int:
+ case X86::VFNMSUB231SDr_Int:
+ case X86::VFMADD132SDZr_Int:
+ case X86::VFNMADD132SDZr_Int:
+ case X86::VFMADD213SDZr_Int:
+ case X86::VFNMADD213SDZr_Int:
+ case X86::VFMADD231SDZr_Int:
+ case X86::VFNMADD231SDZr_Int:
+ case X86::VFMSUB132SDZr_Int:
+ case X86::VFNMSUB132SDZr_Int:
+ case X86::VFMSUB213SDZr_Int:
+ case X86::VFNMSUB213SDZr_Int:
+ case X86::VFMSUB231SDZr_Int:
+ case X86::VFNMSUB231SDZr_Int:
+ case X86::VFMADD132SDZr_Intk:
+ case X86::VFNMADD132SDZr_Intk:
+ case X86::VFMADD213SDZr_Intk:
+ case X86::VFNMADD213SDZr_Intk:
+ case X86::VFMADD231SDZr_Intk:
+ case X86::VFNMADD231SDZr_Intk:
+ case X86::VFMSUB132SDZr_Intk:
+ case X86::VFNMSUB132SDZr_Intk:
+ case X86::VFMSUB213SDZr_Intk:
+ case X86::VFNMSUB213SDZr_Intk:
+ case X86::VFMSUB231SDZr_Intk:
+ case X86::VFNMSUB231SDZr_Intk:
+ case X86::VFMADD132SDZr_Intkz:
+ case X86::VFNMADD132SDZr_Intkz:
+ case X86::VFMADD213SDZr_Intkz:
+ case X86::VFNMADD213SDZr_Intkz:
+ case X86::VFMADD231SDZr_Intkz:
+ case X86::VFNMADD231SDZr_Intkz:
+ case X86::VFMSUB132SDZr_Intkz:
+ case X86::VFNMSUB132SDZr_Intkz:
+ case X86::VFMSUB213SDZr_Intkz:
+ case X86::VFNMSUB213SDZr_Intkz:
+ case X86::VFMSUB231SDZr_Intkz:
+ case X86::VFNMSUB231SDZr_Intkz:
case X86::VFIXUPIMMSDZrri:
case X86::VFIXUPIMMSDZrrik:
case X86::VFIXUPIMMSDZrrikz:
@@ -6670,31 +7846,55 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VMINSHZrr_Int:
case X86::VMULSHZrr_Int:
case X86::VSUBSHZrr_Int:
- case X86::VADDSHZrr_Intk: case X86::VADDSHZrr_Intkz:
+ case X86::VADDSHZrr_Intk:
+ case X86::VADDSHZrr_Intkz:
case X86::VCMPSHZrr_Intk:
- case X86::VDIVSHZrr_Intk: case X86::VDIVSHZrr_Intkz:
- case X86::VMAXSHZrr_Intk: case X86::VMAXSHZrr_Intkz:
- case X86::VMINSHZrr_Intk: case X86::VMINSHZrr_Intkz:
- case X86::VMULSHZrr_Intk: case X86::VMULSHZrr_Intkz:
- case X86::VSUBSHZrr_Intk: case X86::VSUBSHZrr_Intkz:
- case X86::VFMADD132SHZr_Int: case X86::VFNMADD132SHZr_Int:
- case X86::VFMADD213SHZr_Int: case X86::VFNMADD213SHZr_Int:
- case X86::VFMADD231SHZr_Int: case X86::VFNMADD231SHZr_Int:
- case X86::VFMSUB132SHZr_Int: case X86::VFNMSUB132SHZr_Int:
- case X86::VFMSUB213SHZr_Int: case X86::VFNMSUB213SHZr_Int:
- case X86::VFMSUB231SHZr_Int: case X86::VFNMSUB231SHZr_Int:
- case X86::VFMADD132SHZr_Intk: case X86::VFNMADD132SHZr_Intk:
- case X86::VFMADD213SHZr_Intk: case X86::VFNMADD213SHZr_Intk:
- case X86::VFMADD231SHZr_Intk: case X86::VFNMADD231SHZr_Intk:
- case X86::VFMSUB132SHZr_Intk: case X86::VFNMSUB132SHZr_Intk:
- case X86::VFMSUB213SHZr_Intk: case X86::VFNMSUB213SHZr_Intk:
- case X86::VFMSUB231SHZr_Intk: case X86::VFNMSUB231SHZr_Intk:
- case X86::VFMADD132SHZr_Intkz: case X86::VFNMADD132SHZr_Intkz:
- case X86::VFMADD213SHZr_Intkz: case X86::VFNMADD213SHZr_Intkz:
- case X86::VFMADD231SHZr_Intkz: case X86::VFNMADD231SHZr_Intkz:
- case X86::VFMSUB132SHZr_Intkz: case X86::VFNMSUB132SHZr_Intkz:
- case X86::VFMSUB213SHZr_Intkz: case X86::VFNMSUB213SHZr_Intkz:
- case X86::VFMSUB231SHZr_Intkz: case X86::VFNMSUB231SHZr_Intkz:
+ case X86::VDIVSHZrr_Intk:
+ case X86::VDIVSHZrr_Intkz:
+ case X86::VMAXSHZrr_Intk:
+ case X86::VMAXSHZrr_Intkz:
+ case X86::VMINSHZrr_Intk:
+ case X86::VMINSHZrr_Intkz:
+ case X86::VMULSHZrr_Intk:
+ case X86::VMULSHZrr_Intkz:
+ case X86::VSUBSHZrr_Intk:
+ case X86::VSUBSHZrr_Intkz:
+ case X86::VFMADD132SHZr_Int:
+ case X86::VFNMADD132SHZr_Int:
+ case X86::VFMADD213SHZr_Int:
+ case X86::VFNMADD213SHZr_Int:
+ case X86::VFMADD231SHZr_Int:
+ case X86::VFNMADD231SHZr_Int:
+ case X86::VFMSUB132SHZr_Int:
+ case X86::VFNMSUB132SHZr_Int:
+ case X86::VFMSUB213SHZr_Int:
+ case X86::VFNMSUB213SHZr_Int:
+ case X86::VFMSUB231SHZr_Int:
+ case X86::VFNMSUB231SHZr_Int:
+ case X86::VFMADD132SHZr_Intk:
+ case X86::VFNMADD132SHZr_Intk:
+ case X86::VFMADD213SHZr_Intk:
+ case X86::VFNMADD213SHZr_Intk:
+ case X86::VFMADD231SHZr_Intk:
+ case X86::VFNMADD231SHZr_Intk:
+ case X86::VFMSUB132SHZr_Intk:
+ case X86::VFNMSUB132SHZr_Intk:
+ case X86::VFMSUB213SHZr_Intk:
+ case X86::VFNMSUB213SHZr_Intk:
+ case X86::VFMSUB231SHZr_Intk:
+ case X86::VFNMSUB231SHZr_Intk:
+ case X86::VFMADD132SHZr_Intkz:
+ case X86::VFNMADD132SHZr_Intkz:
+ case X86::VFMADD213SHZr_Intkz:
+ case X86::VFNMADD213SHZr_Intkz:
+ case X86::VFMADD231SHZr_Intkz:
+ case X86::VFNMADD231SHZr_Intkz:
+ case X86::VFMSUB132SHZr_Intkz:
+ case X86::VFNMSUB132SHZr_Intkz:
+ case X86::VFMSUB213SHZr_Intkz:
+ case X86::VFNMSUB213SHZr_Intkz:
+ case X86::VFMSUB231SHZr_Intkz:
+ case X86::VFNMSUB231SHZr_Intkz:
return false;
default:
return true;
@@ -6726,11 +7926,12 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
}
// Check switch flag
- if (NoFusing) return nullptr;
+ if (NoFusing)
+ return nullptr;
// Avoid partial and undef register update stalls unless optimizing for size.
if (!MF.getFunction().hasOptSize() &&
- (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
+ (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/ true) ||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
return nullptr;
@@ -6776,11 +7977,20 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
switch (MI.getOpcode()) {
- default: return nullptr;
- case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ default:
+ return nullptr;
+ case X86::TEST8rr:
+ NewOpc = X86::CMP8ri;
+ break;
+ case X86::TEST16rr:
+ NewOpc = X86::CMP16ri;
+ break;
+ case X86::TEST32rr:
+ NewOpc = X86::CMP32ri;
+ break;
+ case X86::TEST64rr:
+ NewOpc = X86::CMP64ri32;
+ break;
}
// Change to CMPXXri r, 0 first.
MI.setDesc(get(NewOpc));
@@ -6793,7 +8003,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
if (LoadMI.getOperand(0).getSubReg() != MI.getOperand(Ops[0]).getSubReg())
return nullptr;
- SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
+ SmallVector<MachineOperand, X86::AddrNumOperands> MOs;
switch (LoadMI.getOpcode()) {
case X86::MMX_SET0:
case X86::V_SET0:
@@ -6816,9 +8026,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
- // Medium and large mode can't fold loads this way.
- if (MF.getTarget().getCodeModel() != CodeModel::Small &&
- MF.getTarget().getCodeModel() != CodeModel::Kernel)
+ // Large code model can't fold loads this way.
+ if (MF.getTarget().getCodeModel() == CodeModel::Large)
return nullptr;
// x86-32 PIC requires a PIC base register for constant pools.
@@ -6861,11 +8070,11 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
4);
- bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
- Opc == X86::AVX512_512_SETALLONES ||
- Opc == X86::AVX1_SETALLONES);
- const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
- Constant::getNullValue(Ty);
+ bool IsAllOnes =
+ (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
+ Opc == X86::AVX512_512_SETALLONES || Opc == X86::AVX1_SETALLONES);
+ const Constant *C =
+ IsAllOnes ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
// Create operands to load from the constant pool entry.
@@ -6932,7 +8141,7 @@ extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
return StoreMMOs;
}
-static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I,
+static unsigned getBroadcastOpcode(const X86FoldTableEntry *I,
const TargetRegisterClass *RC,
const X86Subtarget &STI) {
assert(STI.hasAVX512() && "Expected at least AVX512!");
@@ -6941,37 +8150,54 @@ static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I,
"Can't broadcast less than 64 bytes without AVX512VL!");
switch (I->Flags & TB_BCAST_MASK) {
- default: llvm_unreachable("Unexpected broadcast type!");
+ default:
+ llvm_unreachable("Unexpected broadcast type!");
case TB_BCAST_D:
switch (SpillSize) {
- default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VPBROADCASTDZ128rm;
- case 32: return X86::VPBROADCASTDZ256rm;
- case 64: return X86::VPBROADCASTDZrm;
+ default:
+ llvm_unreachable("Unknown spill size");
+ case 16:
+ return X86::VPBROADCASTDZ128rm;
+ case 32:
+ return X86::VPBROADCASTDZ256rm;
+ case 64:
+ return X86::VPBROADCASTDZrm;
}
break;
case TB_BCAST_Q:
switch (SpillSize) {
- default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VPBROADCASTQZ128rm;
- case 32: return X86::VPBROADCASTQZ256rm;
- case 64: return X86::VPBROADCASTQZrm;
+ default:
+ llvm_unreachable("Unknown spill size");
+ case 16:
+ return X86::VPBROADCASTQZ128rm;
+ case 32:
+ return X86::VPBROADCASTQZ256rm;
+ case 64:
+ return X86::VPBROADCASTQZrm;
}
break;
case TB_BCAST_SS:
switch (SpillSize) {
- default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VBROADCASTSSZ128rm;
- case 32: return X86::VBROADCASTSSZ256rm;
- case 64: return X86::VBROADCASTSSZrm;
+ default:
+ llvm_unreachable("Unknown spill size");
+ case 16:
+ return X86::VBROADCASTSSZ128rm;
+ case 32:
+ return X86::VBROADCASTSSZ256rm;
+ case 64:
+ return X86::VBROADCASTSSZrm;
}
break;
case TB_BCAST_SD:
switch (SpillSize) {
- default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VMOVDDUPZ128rm;
- case 32: return X86::VBROADCASTSDZ256rm;
- case 64: return X86::VBROADCASTSDZrm;
+ default:
+ llvm_unreachable("Unknown spill size");
+ case 16:
+ return X86::VMOVDDUPZ128rm;
+ case 32:
+ return X86::VBROADCASTSDZ256rm;
+ case 64:
+ return X86::VBROADCASTSDZrm;
}
break;
}
@@ -6980,7 +8206,7 @@ static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I,
bool X86InstrInfo::unfoldMemoryOperand(
MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
- const X86MemoryFoldTableEntry *I = lookupUnfoldTable(MI.getOpcode());
+ const X86FoldTableEntry *I = lookupUnfoldTable(MI.getOpcode());
if (I == nullptr)
return false;
unsigned Opc = I->DstOp;
@@ -7007,9 +8233,9 @@ bool X86InstrInfo::unfoldMemoryOperand(
// performance.
return false;
SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
- SmallVector<MachineOperand,2> BeforeOps;
- SmallVector<MachineOperand,2> AfterOps;
- SmallVector<MachineOperand,4> ImpOps;
+ SmallVector<MachineOperand, 2> BeforeOps;
+ SmallVector<MachineOperand, 2> AfterOps;
+ SmallVector<MachineOperand, 4> ImpOps;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI.getOperand(i);
if (i >= Index && i < Index + X86::AddrNumOperands)
@@ -7065,16 +8291,16 @@ bool X86InstrInfo::unfoldMemoryOperand(
for (MachineOperand &AfterOp : AfterOps)
MIB.add(AfterOp);
for (MachineOperand &ImpOp : ImpOps) {
- MIB.addReg(ImpOp.getReg(),
- getDefRegState(ImpOp.isDef()) |
- RegState::Implicit |
- getKillRegState(ImpOp.isKill()) |
- getDeadRegState(ImpOp.isDead()) |
- getUndefRegState(ImpOp.isUndef()));
+ MIB.addReg(ImpOp.getReg(), getDefRegState(ImpOp.isDef()) |
+ RegState::Implicit |
+ getKillRegState(ImpOp.isKill()) |
+ getDeadRegState(ImpOp.isDead()) |
+ getUndefRegState(ImpOp.isUndef()));
}
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
switch (DataMI->getOpcode()) {
- default: break;
+ default:
+ break;
case X86::CMP64ri32:
case X86::CMP32ri:
case X86::CMP16ri:
@@ -7084,11 +8310,20 @@ bool X86InstrInfo::unfoldMemoryOperand(
if (MO1.isImm() && MO1.getImm() == 0) {
unsigned NewOpc;
switch (DataMI->getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
- case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
- case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
- case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::CMP64ri32:
+ NewOpc = X86::TEST64rr;
+ break;
+ case X86::CMP32ri:
+ NewOpc = X86::TEST32rr;
+ break;
+ case X86::CMP16ri:
+ NewOpc = X86::TEST16rr;
+ break;
+ case X86::CMP8ri:
+ NewOpc = X86::TEST8rr;
+ break;
}
DataMI->setDesc(get(NewOpc));
MO1.ChangeToRegister(MO0.getReg(), false);
@@ -7116,13 +8351,12 @@ bool X86InstrInfo::unfoldMemoryOperand(
return true;
}
-bool
-X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
- SmallVectorImpl<SDNode*> &NewNodes) const {
+bool X86InstrInfo::unfoldMemoryOperand(
+ SelectionDAG &DAG, SDNode *N, SmallVectorImpl<SDNode *> &NewNodes) const {
if (!N->isMachineOpcode())
return false;
- const X86MemoryFoldTableEntry *I = lookupUnfoldTable(N->getMachineOpcode());
+ const X86FoldTableEntry *I = lookupUnfoldTable(N->getMachineOpcode());
if (I == nullptr)
return false;
unsigned Opc = I->DstOp;
@@ -7140,16 +8374,16 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
std::vector<SDValue> AfterOps;
SDLoc dl(N);
unsigned NumOps = N->getNumOperands();
- for (unsigned i = 0; i != NumOps-1; ++i) {
+ for (unsigned i = 0; i != NumOps - 1; ++i) {
SDValue Op = N->getOperand(i);
- if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
+ if (i >= Index - NumDefs && i < Index - NumDefs + X86::AddrNumOperands)
AddrOps.push_back(Op);
- else if (i < Index-NumDefs)
+ else if (i < Index - NumDefs)
BeforeOps.push_back(Op);
- else if (i > Index-NumDefs)
+ else if (i > Index - NumDefs)
AfterOps.push_back(Op);
}
- SDValue Chain = N->getOperand(NumOps-1);
+ SDValue Chain = N->getOperand(NumOps - 1);
AddrOps.push_back(Chain);
// Emit the load instruction.
@@ -7197,23 +8431,33 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
llvm::append_range(BeforeOps, AfterOps);
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
switch (Opc) {
- default: break;
- case X86::CMP64ri32:
- case X86::CMP32ri:
- case X86::CMP16ri:
- case X86::CMP8ri:
- if (isNullConstant(BeforeOps[1])) {
- switch (Opc) {
- default: llvm_unreachable("Unreachable!");
- case X86::CMP64ri32: Opc = X86::TEST64rr; break;
- case X86::CMP32ri: Opc = X86::TEST32rr; break;
- case X86::CMP16ri: Opc = X86::TEST16rr; break;
- case X86::CMP8ri: Opc = X86::TEST8rr; break;
- }
- BeforeOps[1] = BeforeOps[0];
+ default:
+ break;
+ case X86::CMP64ri32:
+ case X86::CMP32ri:
+ case X86::CMP16ri:
+ case X86::CMP8ri:
+ if (isNullConstant(BeforeOps[1])) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unreachable!");
+ case X86::CMP64ri32:
+ Opc = X86::TEST64rr;
+ break;
+ case X86::CMP32ri:
+ Opc = X86::TEST32rr;
+ break;
+ case X86::CMP16ri:
+ Opc = X86::TEST16rr;
+ break;
+ case X86::CMP8ri:
+ Opc = X86::TEST8rr;
+ break;
}
+ BeforeOps[1] = BeforeOps[0];
+ }
}
- SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
+ SDNode *NewNode = DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
NewNodes.push_back(NewNode);
// Emit the store instruction.
@@ -7242,10 +8486,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
return true;
}
-unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
- bool UnfoldLoad, bool UnfoldStore,
- unsigned *LoadRegIndex) const {
- const X86MemoryFoldTableEntry *I = lookupUnfoldTable(Opc);
+unsigned
+X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad,
+ bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ const X86FoldTableEntry *I = lookupUnfoldTable(Opc);
if (I == nullptr)
return 0;
bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
@@ -7259,179 +8504,106 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
return I->DstOp;
}
-bool
-X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
- int64_t &Offset1, int64_t &Offset2) const {
+bool X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
return false;
- unsigned Opc1 = Load1->getMachineOpcode();
- unsigned Opc2 = Load2->getMachineOpcode();
- switch (Opc1) {
- default: return false;
- case X86::MOV8rm:
- case X86::MOV16rm:
- case X86::MOV32rm:
- case X86::MOV64rm:
- case X86::LD_Fp32m:
- case X86::LD_Fp64m:
- case X86::LD_Fp80m:
- case X86::MOVSSrm:
- case X86::MOVSSrm_alt:
- case X86::MOVSDrm:
- case X86::MOVSDrm_alt:
- case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm:
- case X86::MOVAPSrm:
- case X86::MOVUPSrm:
- case X86::MOVAPDrm:
- case X86::MOVUPDrm:
- case X86::MOVDQArm:
- case X86::MOVDQUrm:
- // AVX load instructions
- case X86::VMOVSSrm:
- case X86::VMOVSSrm_alt:
- case X86::VMOVSDrm:
- case X86::VMOVSDrm_alt:
- case X86::VMOVAPSrm:
- case X86::VMOVUPSrm:
- case X86::VMOVAPDrm:
- case X86::VMOVUPDrm:
- case X86::VMOVDQArm:
- case X86::VMOVDQUrm:
- case X86::VMOVAPSYrm:
- case X86::VMOVUPSYrm:
- case X86::VMOVAPDYrm:
- case X86::VMOVUPDYrm:
- case X86::VMOVDQAYrm:
- case X86::VMOVDQUYrm:
- // AVX512 load instructions
- case X86::VMOVSSZrm:
- case X86::VMOVSSZrm_alt:
- case X86::VMOVSDZrm:
- case X86::VMOVSDZrm_alt:
- case X86::VMOVAPSZ128rm:
- case X86::VMOVUPSZ128rm:
- case X86::VMOVAPSZ128rm_NOVLX:
- case X86::VMOVUPSZ128rm_NOVLX:
- case X86::VMOVAPDZ128rm:
- case X86::VMOVUPDZ128rm:
- case X86::VMOVDQU8Z128rm:
- case X86::VMOVDQU16Z128rm:
- case X86::VMOVDQA32Z128rm:
- case X86::VMOVDQU32Z128rm:
- case X86::VMOVDQA64Z128rm:
- case X86::VMOVDQU64Z128rm:
- case X86::VMOVAPSZ256rm:
- case X86::VMOVUPSZ256rm:
- case X86::VMOVAPSZ256rm_NOVLX:
- case X86::VMOVUPSZ256rm_NOVLX:
- case X86::VMOVAPDZ256rm:
- case X86::VMOVUPDZ256rm:
- case X86::VMOVDQU8Z256rm:
- case X86::VMOVDQU16Z256rm:
- case X86::VMOVDQA32Z256rm:
- case X86::VMOVDQU32Z256rm:
- case X86::VMOVDQA64Z256rm:
- case X86::VMOVDQU64Z256rm:
- case X86::VMOVAPSZrm:
- case X86::VMOVUPSZrm:
- case X86::VMOVAPDZrm:
- case X86::VMOVUPDZrm:
- case X86::VMOVDQU8Zrm:
- case X86::VMOVDQU16Zrm:
- case X86::VMOVDQA32Zrm:
- case X86::VMOVDQU32Zrm:
- case X86::VMOVDQA64Zrm:
- case X86::VMOVDQU64Zrm:
- case X86::KMOVBkm:
- case X86::KMOVWkm:
- case X86::KMOVDkm:
- case X86::KMOVQkm:
- break;
- }
- switch (Opc2) {
- default: return false;
- case X86::MOV8rm:
- case X86::MOV16rm:
- case X86::MOV32rm:
- case X86::MOV64rm:
- case X86::LD_Fp32m:
- case X86::LD_Fp64m:
- case X86::LD_Fp80m:
- case X86::MOVSSrm:
- case X86::MOVSSrm_alt:
- case X86::MOVSDrm:
- case X86::MOVSDrm_alt:
- case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm:
- case X86::MOVAPSrm:
- case X86::MOVUPSrm:
- case X86::MOVAPDrm:
- case X86::MOVUPDrm:
- case X86::MOVDQArm:
- case X86::MOVDQUrm:
- // AVX load instructions
- case X86::VMOVSSrm:
- case X86::VMOVSSrm_alt:
- case X86::VMOVSDrm:
- case X86::VMOVSDrm_alt:
- case X86::VMOVAPSrm:
- case X86::VMOVUPSrm:
- case X86::VMOVAPDrm:
- case X86::VMOVUPDrm:
- case X86::VMOVDQArm:
- case X86::VMOVDQUrm:
- case X86::VMOVAPSYrm:
- case X86::VMOVUPSYrm:
- case X86::VMOVAPDYrm:
- case X86::VMOVUPDYrm:
- case X86::VMOVDQAYrm:
- case X86::VMOVDQUYrm:
- // AVX512 load instructions
- case X86::VMOVSSZrm:
- case X86::VMOVSSZrm_alt:
- case X86::VMOVSDZrm:
- case X86::VMOVSDZrm_alt:
- case X86::VMOVAPSZ128rm:
- case X86::VMOVUPSZ128rm:
- case X86::VMOVAPSZ128rm_NOVLX:
- case X86::VMOVUPSZ128rm_NOVLX:
- case X86::VMOVAPDZ128rm:
- case X86::VMOVUPDZ128rm:
- case X86::VMOVDQU8Z128rm:
- case X86::VMOVDQU16Z128rm:
- case X86::VMOVDQA32Z128rm:
- case X86::VMOVDQU32Z128rm:
- case X86::VMOVDQA64Z128rm:
- case X86::VMOVDQU64Z128rm:
- case X86::VMOVAPSZ256rm:
- case X86::VMOVUPSZ256rm:
- case X86::VMOVAPSZ256rm_NOVLX:
- case X86::VMOVUPSZ256rm_NOVLX:
- case X86::VMOVAPDZ256rm:
- case X86::VMOVUPDZ256rm:
- case X86::VMOVDQU8Z256rm:
- case X86::VMOVDQU16Z256rm:
- case X86::VMOVDQA32Z256rm:
- case X86::VMOVDQU32Z256rm:
- case X86::VMOVDQA64Z256rm:
- case X86::VMOVDQU64Z256rm:
- case X86::VMOVAPSZrm:
- case X86::VMOVUPSZrm:
- case X86::VMOVAPDZrm:
- case X86::VMOVUPDZrm:
- case X86::VMOVDQU8Zrm:
- case X86::VMOVDQU16Zrm:
- case X86::VMOVDQA32Zrm:
- case X86::VMOVDQU32Zrm:
- case X86::VMOVDQA64Zrm:
- case X86::VMOVDQU64Zrm:
- case X86::KMOVBkm:
- case X86::KMOVWkm:
- case X86::KMOVDkm:
- case X86::KMOVQkm:
- break;
- }
+
+ auto IsLoadOpcode = [&](unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp32m:
+ case X86::LD_Fp64m:
+ case X86::LD_Fp80m:
+ case X86::MOVSSrm:
+ case X86::MOVSSrm_alt:
+ case X86::MOVSDrm:
+ case X86::MOVSDrm_alt:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVUPDrm:
+ case X86::MOVDQArm:
+ case X86::MOVDQUrm:
+ // AVX load instructions
+ case X86::VMOVSSrm:
+ case X86::VMOVSSrm_alt:
+ case X86::VMOVSDrm:
+ case X86::VMOVSDrm_alt:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVUPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVUPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
+ // AVX512 load instructions
+ case X86::VMOVSSZrm:
+ case X86::VMOVSSZrm_alt:
+ case X86::VMOVSDZrm:
+ case X86::VMOVSDZrm_alt:
+ case X86::VMOVAPSZ128rm:
+ case X86::VMOVUPSZ128rm:
+ case X86::VMOVAPSZ128rm_NOVLX:
+ case X86::VMOVUPSZ128rm_NOVLX:
+ case X86::VMOVAPDZ128rm:
+ case X86::VMOVUPDZ128rm:
+ case X86::VMOVDQU8Z128rm:
+ case X86::VMOVDQU16Z128rm:
+ case X86::VMOVDQA32Z128rm:
+ case X86::VMOVDQU32Z128rm:
+ case X86::VMOVDQA64Z128rm:
+ case X86::VMOVDQU64Z128rm:
+ case X86::VMOVAPSZ256rm:
+ case X86::VMOVUPSZ256rm:
+ case X86::VMOVAPSZ256rm_NOVLX:
+ case X86::VMOVUPSZ256rm_NOVLX:
+ case X86::VMOVAPDZ256rm:
+ case X86::VMOVUPDZ256rm:
+ case X86::VMOVDQU8Z256rm:
+ case X86::VMOVDQU16Z256rm:
+ case X86::VMOVDQA32Z256rm:
+ case X86::VMOVDQU32Z256rm:
+ case X86::VMOVDQA64Z256rm:
+ case X86::VMOVDQU64Z256rm:
+ case X86::VMOVAPSZrm:
+ case X86::VMOVUPSZrm:
+ case X86::VMOVAPDZrm:
+ case X86::VMOVUPDZrm:
+ case X86::VMOVDQU8Zrm:
+ case X86::VMOVDQU16Zrm:
+ case X86::VMOVDQA32Zrm:
+ case X86::VMOVDQU32Zrm:
+ case X86::VMOVDQA64Zrm:
+ case X86::VMOVDQU64Zrm:
+ case X86::KMOVBkm:
+ case X86::KMOVBkm_EVEX:
+ case X86::KMOVWkm:
+ case X86::KMOVWkm_EVEX:
+ case X86::KMOVDkm:
+ case X86::KMOVDkm_EVEX:
+ case X86::KMOVQkm:
+ case X86::KMOVQkm_EVEX:
+ return true;
+ }
+ };
+
+ if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
+ !IsLoadOpcode(Load2->getMachineOpcode()))
+ return false;
// Lambda to check if both the loads have the same value for an operand index.
auto HasSameOp = [&](int I) {
@@ -7468,10 +8640,11 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
unsigned Opc1 = Load1->getMachineOpcode();
unsigned Opc2 = Load2->getMachineOpcode();
if (Opc1 != Opc2)
- return false; // FIXME: overly conservative?
+ return false; // FIXME: overly conservative?
switch (Opc1) {
- default: break;
+ default:
+ break;
case X86::LD_Fp32m:
case X86::LD_Fp64m:
case X86::LD_Fp80m:
@@ -7519,16 +8692,16 @@ bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
}
-bool X86InstrInfo::
-reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+bool X86InstrInfo::reverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
Cond[0].setImm(GetOppositeBranchCondition(CC));
return false;
}
-bool X86InstrInfo::
-isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+bool X86InstrInfo::isSafeToMoveRegClassDefs(
+ const TargetRegisterClass *RC) const {
// FIXME: Return false for x87 stack register classes for now. We can't
// allow any loads of these registers before FpGet_ST0_80.
return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass ||
@@ -7543,11 +8716,6 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
///
unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
- assert((!Subtarget.is64Bit() ||
- MF->getTarget().getCodeModel() == CodeModel::Medium ||
- MF->getTarget().getCodeModel() == CodeModel::Large) &&
- "X86-64 PIC uses RIP relative addressing");
-
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
Register GlobalBaseReg = X86FI->getGlobalBaseReg();
if (GlobalBaseReg != 0)
@@ -7562,515 +8730,13 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
return GlobalBaseReg;
}
-// These are the replaceable SSE instructions. Some of these have Int variants
-// that we don't include here. We don't want to replace instructions selected
-// by intrinsics.
-static const uint16_t ReplaceableInstrs[][3] = {
- //PackedSingle PackedDouble PackedInt
- { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
- { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
- { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
- { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
- { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
- { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
- { X86::MOVSDmr, X86::MOVSDmr, X86::MOVPQI2QImr },
- { X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr },
- { X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm },
- { X86::MOVSDrm_alt,X86::MOVSDrm_alt,X86::MOVQI2PQIrm },
- { X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm },
- { X86::MOVSSrm_alt,X86::MOVSSrm_alt,X86::MOVDI2PDIrm },
- { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
- { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
- { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
- { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
- { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
- { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
- { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
- { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
- { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
- { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
- { X86::MOVLHPSrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
- { X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm },
- { X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr },
- { X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm },
- { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr },
- { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm },
- { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr },
- { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr },
- { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr },
- // AVX 128-bit support
- { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
- { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
- { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
- { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
- { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
- { X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
- { X86::VMOVSDmr, X86::VMOVSDmr, X86::VMOVPQI2QImr },
- { X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr },
- { X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm },
- { X86::VMOVSDrm_alt,X86::VMOVSDrm_alt,X86::VMOVQI2PQIrm },
- { X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm },
- { X86::VMOVSSrm_alt,X86::VMOVSSrm_alt,X86::VMOVDI2PDIrm },
- { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
- { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
- { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
- { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
- { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
- { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
- { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
- { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
- { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
- { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
- { X86::VMOVLHPSrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
- { X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm },
- { X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr },
- { X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm },
- { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr },
- { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm },
- { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr },
- { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr },
- { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr },
- // AVX 256-bit support
- { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
- { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
- { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
- { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
- { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
- { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
- { X86::VPERMPSYrm, X86::VPERMPSYrm, X86::VPERMDYrm },
- { X86::VPERMPSYrr, X86::VPERMPSYrr, X86::VPERMDYrr },
- { X86::VPERMPDYmi, X86::VPERMPDYmi, X86::VPERMQYmi },
- { X86::VPERMPDYri, X86::VPERMPDYri, X86::VPERMQYri },
- // AVX512 support
- { X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr },
- { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
- { X86::VMOVNTPSZ256mr, X86::VMOVNTPDZ256mr, X86::VMOVNTDQZ256mr },
- { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
- { X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr },
- { X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr },
- { X86::VMOVSDZrm, X86::VMOVSDZrm, X86::VMOVQI2PQIZrm },
- { X86::VMOVSDZrm_alt, X86::VMOVSDZrm_alt, X86::VMOVQI2PQIZrm },
- { X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm },
- { X86::VMOVSSZrm_alt, X86::VMOVSSZrm_alt, X86::VMOVDI2PDIZrm },
- { X86::VBROADCASTSSZ128rr,X86::VBROADCASTSSZ128rr,X86::VPBROADCASTDZ128rr },
- { X86::VBROADCASTSSZ128rm,X86::VBROADCASTSSZ128rm,X86::VPBROADCASTDZ128rm },
- { X86::VBROADCASTSSZ256rr,X86::VBROADCASTSSZ256rr,X86::VPBROADCASTDZ256rr },
- { X86::VBROADCASTSSZ256rm,X86::VBROADCASTSSZ256rm,X86::VPBROADCASTDZ256rm },
- { X86::VBROADCASTSSZrr, X86::VBROADCASTSSZrr, X86::VPBROADCASTDZrr },
- { X86::VBROADCASTSSZrm, X86::VBROADCASTSSZrm, X86::VPBROADCASTDZrm },
- { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128rr },
- { X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128rm },
- { X86::VBROADCASTSDZ256rr,X86::VBROADCASTSDZ256rr,X86::VPBROADCASTQZ256rr },
- { X86::VBROADCASTSDZ256rm,X86::VBROADCASTSDZ256rm,X86::VPBROADCASTQZ256rm },
- { X86::VBROADCASTSDZrr, X86::VBROADCASTSDZrr, X86::VPBROADCASTQZrr },
- { X86::VBROADCASTSDZrm, X86::VBROADCASTSDZrm, X86::VPBROADCASTQZrm },
- { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrr, X86::VINSERTI32x4Zrr },
- { X86::VINSERTF32x4Zrm, X86::VINSERTF32x4Zrm, X86::VINSERTI32x4Zrm },
- { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrr, X86::VINSERTI32x8Zrr },
- { X86::VINSERTF32x8Zrm, X86::VINSERTF32x8Zrm, X86::VINSERTI32x8Zrm },
- { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrr, X86::VINSERTI64x2Zrr },
- { X86::VINSERTF64x2Zrm, X86::VINSERTF64x2Zrm, X86::VINSERTI64x2Zrm },
- { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrr, X86::VINSERTI64x4Zrr },
- { X86::VINSERTF64x4Zrm, X86::VINSERTF64x4Zrm, X86::VINSERTI64x4Zrm },
- { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rr,X86::VINSERTI32x4Z256rr },
- { X86::VINSERTF32x4Z256rm,X86::VINSERTF32x4Z256rm,X86::VINSERTI32x4Z256rm },
- { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rr,X86::VINSERTI64x2Z256rr },
- { X86::VINSERTF64x2Z256rm,X86::VINSERTF64x2Z256rm,X86::VINSERTI64x2Z256rm },
- { X86::VEXTRACTF32x4Zrr, X86::VEXTRACTF32x4Zrr, X86::VEXTRACTI32x4Zrr },
- { X86::VEXTRACTF32x4Zmr, X86::VEXTRACTF32x4Zmr, X86::VEXTRACTI32x4Zmr },
- { X86::VEXTRACTF32x8Zrr, X86::VEXTRACTF32x8Zrr, X86::VEXTRACTI32x8Zrr },
- { X86::VEXTRACTF32x8Zmr, X86::VEXTRACTF32x8Zmr, X86::VEXTRACTI32x8Zmr },
- { X86::VEXTRACTF64x2Zrr, X86::VEXTRACTF64x2Zrr, X86::VEXTRACTI64x2Zrr },
- { X86::VEXTRACTF64x2Zmr, X86::VEXTRACTF64x2Zmr, X86::VEXTRACTI64x2Zmr },
- { X86::VEXTRACTF64x4Zrr, X86::VEXTRACTF64x4Zrr, X86::VEXTRACTI64x4Zrr },
- { X86::VEXTRACTF64x4Zmr, X86::VEXTRACTF64x4Zmr, X86::VEXTRACTI64x4Zmr },
- { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTI32x4Z256rr },
- { X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTI32x4Z256mr },
- { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTI64x2Z256rr },
- { X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTI64x2Z256mr },
- { X86::VPERMILPSmi, X86::VPERMILPSmi, X86::VPSHUFDmi },
- { X86::VPERMILPSri, X86::VPERMILPSri, X86::VPSHUFDri },
- { X86::VPERMILPSZ128mi, X86::VPERMILPSZ128mi, X86::VPSHUFDZ128mi },
- { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128ri, X86::VPSHUFDZ128ri },
- { X86::VPERMILPSZ256mi, X86::VPERMILPSZ256mi, X86::VPSHUFDZ256mi },
- { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256ri, X86::VPSHUFDZ256ri },
- { X86::VPERMILPSZmi, X86::VPERMILPSZmi, X86::VPSHUFDZmi },
- { X86::VPERMILPSZri, X86::VPERMILPSZri, X86::VPSHUFDZri },
- { X86::VPERMPSZ256rm, X86::VPERMPSZ256rm, X86::VPERMDZ256rm },
- { X86::VPERMPSZ256rr, X86::VPERMPSZ256rr, X86::VPERMDZ256rr },
- { X86::VPERMPDZ256mi, X86::VPERMPDZ256mi, X86::VPERMQZ256mi },
- { X86::VPERMPDZ256ri, X86::VPERMPDZ256ri, X86::VPERMQZ256ri },
- { X86::VPERMPDZ256rm, X86::VPERMPDZ256rm, X86::VPERMQZ256rm },
- { X86::VPERMPDZ256rr, X86::VPERMPDZ256rr, X86::VPERMQZ256rr },
- { X86::VPERMPSZrm, X86::VPERMPSZrm, X86::VPERMDZrm },
- { X86::VPERMPSZrr, X86::VPERMPSZrr, X86::VPERMDZrr },
- { X86::VPERMPDZmi, X86::VPERMPDZmi, X86::VPERMQZmi },
- { X86::VPERMPDZri, X86::VPERMPDZri, X86::VPERMQZri },
- { X86::VPERMPDZrm, X86::VPERMPDZrm, X86::VPERMQZrm },
- { X86::VPERMPDZrr, X86::VPERMPDZrr, X86::VPERMQZrr },
- { X86::VUNPCKLPDZ256rm, X86::VUNPCKLPDZ256rm, X86::VPUNPCKLQDQZ256rm },
- { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rr, X86::VPUNPCKLQDQZ256rr },
- { X86::VUNPCKHPDZ256rm, X86::VUNPCKHPDZ256rm, X86::VPUNPCKHQDQZ256rm },
- { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rr, X86::VPUNPCKHQDQZ256rr },
- { X86::VUNPCKLPSZ256rm, X86::VUNPCKLPSZ256rm, X86::VPUNPCKLDQZ256rm },
- { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rr, X86::VPUNPCKLDQZ256rr },
- { X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm },
- { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr },
- { X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm },
- { X86::VMOVLHPSZrr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
- { X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm },
- { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr },
- { X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm },
- { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rr, X86::VPUNPCKLDQZ128rr },
- { X86::VUNPCKHPSZ128rm, X86::VUNPCKHPSZ128rm, X86::VPUNPCKHDQZ128rm },
- { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rr, X86::VPUNPCKHDQZ128rr },
- { X86::VUNPCKLPDZrm, X86::VUNPCKLPDZrm, X86::VPUNPCKLQDQZrm },
- { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrr, X86::VPUNPCKLQDQZrr },
- { X86::VUNPCKHPDZrm, X86::VUNPCKHPDZrm, X86::VPUNPCKHQDQZrm },
- { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrr, X86::VPUNPCKHQDQZrr },
- { X86::VUNPCKLPSZrm, X86::VUNPCKLPSZrm, X86::VPUNPCKLDQZrm },
- { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr },
- { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm },
- { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr },
- { X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr },
- { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr },
-};
-
-static const uint16_t ReplaceableInstrsAVX2[][3] = {
- //PackedSingle PackedDouble PackedInt
- { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
- { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
- { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
- { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr },
- { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
- { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
- { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
- { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
- { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
- { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
- { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
- { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr},
- { X86::VMOVDDUPrm, X86::VMOVDDUPrm, X86::VPBROADCASTQrm},
- { X86::VMOVDDUPrr, X86::VMOVDDUPrr, X86::VPBROADCASTQrr},
- { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
- { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
- { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
- { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm},
- { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 },
- { X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri },
- { X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi },
- { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi },
- { X86::VPERMILPSYri, X86::VPERMILPSYri, X86::VPSHUFDYri },
- { X86::VUNPCKLPDYrm, X86::VUNPCKLPDYrm, X86::VPUNPCKLQDQYrm },
- { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrr, X86::VPUNPCKLQDQYrr },
- { X86::VUNPCKHPDYrm, X86::VUNPCKHPDYrm, X86::VPUNPCKHQDQYrm },
- { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrr, X86::VPUNPCKHQDQYrr },
- { X86::VUNPCKLPSYrm, X86::VUNPCKLPSYrm, X86::VPUNPCKLDQYrm },
- { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrr, X86::VPUNPCKLDQYrr },
- { X86::VUNPCKHPSYrm, X86::VUNPCKHPSYrm, X86::VPUNPCKHDQYrm },
- { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr },
-};
-
-static const uint16_t ReplaceableInstrsFP[][3] = {
- //PackedSingle PackedDouble
- { X86::MOVLPSrm, X86::MOVLPDrm, X86::INSTRUCTION_LIST_END },
- { X86::MOVHPSrm, X86::MOVHPDrm, X86::INSTRUCTION_LIST_END },
- { X86::MOVHPSmr, X86::MOVHPDmr, X86::INSTRUCTION_LIST_END },
- { X86::VMOVLPSrm, X86::VMOVLPDrm, X86::INSTRUCTION_LIST_END },
- { X86::VMOVHPSrm, X86::VMOVHPDrm, X86::INSTRUCTION_LIST_END },
- { X86::VMOVHPSmr, X86::VMOVHPDmr, X86::INSTRUCTION_LIST_END },
- { X86::VMOVLPSZ128rm, X86::VMOVLPDZ128rm, X86::INSTRUCTION_LIST_END },
- { X86::VMOVHPSZ128rm, X86::VMOVHPDZ128rm, X86::INSTRUCTION_LIST_END },
- { X86::VMOVHPSZ128mr, X86::VMOVHPDZ128mr, X86::INSTRUCTION_LIST_END },
-};
-
-static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
- //PackedSingle PackedDouble PackedInt
- { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
- { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
- { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
- { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
-};
-
-static const uint16_t ReplaceableInstrsAVX512[][4] = {
- // Two integer columns for 64-bit and 32-bit elements.
- //PackedSingle PackedDouble PackedInt PackedInt
- { X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA32Z128mr },
- { X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA32Z128rm },
- { X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA32Z128rr },
- { X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU32Z128mr },
- { X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU32Z128rm },
- { X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA32Z256mr },
- { X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA32Z256rm },
- { X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA32Z256rr },
- { X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU32Z256mr },
- { X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU32Z256rm },
- { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA32Zmr },
- { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA32Zrm },
- { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA32Zrr },
- { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU32Zmr },
- { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU32Zrm },
-};
-
-static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
- // Two integer columns for 64-bit and 32-bit elements.
- //PackedSingle PackedDouble PackedInt PackedInt
- { X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
- { X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
- { X86::VANDPSZ128rm, X86::VANDPDZ128rm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
- { X86::VANDPSZ128rr, X86::VANDPDZ128rr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
- { X86::VORPSZ128rm, X86::VORPDZ128rm, X86::VPORQZ128rm, X86::VPORDZ128rm },
- { X86::VORPSZ128rr, X86::VORPDZ128rr, X86::VPORQZ128rr, X86::VPORDZ128rr },
- { X86::VXORPSZ128rm, X86::VXORPDZ128rm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
- { X86::VXORPSZ128rr, X86::VXORPDZ128rr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
- { X86::VANDNPSZ256rm, X86::VANDNPDZ256rm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
- { X86::VANDNPSZ256rr, X86::VANDNPDZ256rr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
- { X86::VANDPSZ256rm, X86::VANDPDZ256rm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
- { X86::VANDPSZ256rr, X86::VANDPDZ256rr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
- { X86::VORPSZ256rm, X86::VORPDZ256rm, X86::VPORQZ256rm, X86::VPORDZ256rm },
- { X86::VORPSZ256rr, X86::VORPDZ256rr, X86::VPORQZ256rr, X86::VPORDZ256rr },
- { X86::VXORPSZ256rm, X86::VXORPDZ256rm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
- { X86::VXORPSZ256rr, X86::VXORPDZ256rr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
- { X86::VANDNPSZrm, X86::VANDNPDZrm, X86::VPANDNQZrm, X86::VPANDNDZrm },
- { X86::VANDNPSZrr, X86::VANDNPDZrr, X86::VPANDNQZrr, X86::VPANDNDZrr },
- { X86::VANDPSZrm, X86::VANDPDZrm, X86::VPANDQZrm, X86::VPANDDZrm },
- { X86::VANDPSZrr, X86::VANDPDZrr, X86::VPANDQZrr, X86::VPANDDZrr },
- { X86::VORPSZrm, X86::VORPDZrm, X86::VPORQZrm, X86::VPORDZrm },
- { X86::VORPSZrr, X86::VORPDZrr, X86::VPORQZrr, X86::VPORDZrr },
- { X86::VXORPSZrm, X86::VXORPDZrm, X86::VPXORQZrm, X86::VPXORDZrm },
- { X86::VXORPSZrr, X86::VXORPDZrr, X86::VPXORQZrr, X86::VPXORDZrr },
-};
-
-static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
- // Two integer columns for 64-bit and 32-bit elements.
- //PackedSingle PackedDouble
- //PackedInt PackedInt
- { X86::VANDNPSZ128rmk, X86::VANDNPDZ128rmk,
- X86::VPANDNQZ128rmk, X86::VPANDNDZ128rmk },
- { X86::VANDNPSZ128rmkz, X86::VANDNPDZ128rmkz,
- X86::VPANDNQZ128rmkz, X86::VPANDNDZ128rmkz },
- { X86::VANDNPSZ128rrk, X86::VANDNPDZ128rrk,
- X86::VPANDNQZ128rrk, X86::VPANDNDZ128rrk },
- { X86::VANDNPSZ128rrkz, X86::VANDNPDZ128rrkz,
- X86::VPANDNQZ128rrkz, X86::VPANDNDZ128rrkz },
- { X86::VANDPSZ128rmk, X86::VANDPDZ128rmk,
- X86::VPANDQZ128rmk, X86::VPANDDZ128rmk },
- { X86::VANDPSZ128rmkz, X86::VANDPDZ128rmkz,
- X86::VPANDQZ128rmkz, X86::VPANDDZ128rmkz },
- { X86::VANDPSZ128rrk, X86::VANDPDZ128rrk,
- X86::VPANDQZ128rrk, X86::VPANDDZ128rrk },
- { X86::VANDPSZ128rrkz, X86::VANDPDZ128rrkz,
- X86::VPANDQZ128rrkz, X86::VPANDDZ128rrkz },
- { X86::VORPSZ128rmk, X86::VORPDZ128rmk,
- X86::VPORQZ128rmk, X86::VPORDZ128rmk },
- { X86::VORPSZ128rmkz, X86::VORPDZ128rmkz,
- X86::VPORQZ128rmkz, X86::VPORDZ128rmkz },
- { X86::VORPSZ128rrk, X86::VORPDZ128rrk,
- X86::VPORQZ128rrk, X86::VPORDZ128rrk },
- { X86::VORPSZ128rrkz, X86::VORPDZ128rrkz,
- X86::VPORQZ128rrkz, X86::VPORDZ128rrkz },
- { X86::VXORPSZ128rmk, X86::VXORPDZ128rmk,
- X86::VPXORQZ128rmk, X86::VPXORDZ128rmk },
- { X86::VXORPSZ128rmkz, X86::VXORPDZ128rmkz,
- X86::VPXORQZ128rmkz, X86::VPXORDZ128rmkz },
- { X86::VXORPSZ128rrk, X86::VXORPDZ128rrk,
- X86::VPXORQZ128rrk, X86::VPXORDZ128rrk },
- { X86::VXORPSZ128rrkz, X86::VXORPDZ128rrkz,
- X86::VPXORQZ128rrkz, X86::VPXORDZ128rrkz },
- { X86::VANDNPSZ256rmk, X86::VANDNPDZ256rmk,
- X86::VPANDNQZ256rmk, X86::VPANDNDZ256rmk },
- { X86::VANDNPSZ256rmkz, X86::VANDNPDZ256rmkz,
- X86::VPANDNQZ256rmkz, X86::VPANDNDZ256rmkz },
- { X86::VANDNPSZ256rrk, X86::VANDNPDZ256rrk,
- X86::VPANDNQZ256rrk, X86::VPANDNDZ256rrk },
- { X86::VANDNPSZ256rrkz, X86::VANDNPDZ256rrkz,
- X86::VPANDNQZ256rrkz, X86::VPANDNDZ256rrkz },
- { X86::VANDPSZ256rmk, X86::VANDPDZ256rmk,
- X86::VPANDQZ256rmk, X86::VPANDDZ256rmk },
- { X86::VANDPSZ256rmkz, X86::VANDPDZ256rmkz,
- X86::VPANDQZ256rmkz, X86::VPANDDZ256rmkz },
- { X86::VANDPSZ256rrk, X86::VANDPDZ256rrk,
- X86::VPANDQZ256rrk, X86::VPANDDZ256rrk },
- { X86::VANDPSZ256rrkz, X86::VANDPDZ256rrkz,
- X86::VPANDQZ256rrkz, X86::VPANDDZ256rrkz },
- { X86::VORPSZ256rmk, X86::VORPDZ256rmk,
- X86::VPORQZ256rmk, X86::VPORDZ256rmk },
- { X86::VORPSZ256rmkz, X86::VORPDZ256rmkz,
- X86::VPORQZ256rmkz, X86::VPORDZ256rmkz },
- { X86::VORPSZ256rrk, X86::VORPDZ256rrk,
- X86::VPORQZ256rrk, X86::VPORDZ256rrk },
- { X86::VORPSZ256rrkz, X86::VORPDZ256rrkz,
- X86::VPORQZ256rrkz, X86::VPORDZ256rrkz },
- { X86::VXORPSZ256rmk, X86::VXORPDZ256rmk,
- X86::VPXORQZ256rmk, X86::VPXORDZ256rmk },
- { X86::VXORPSZ256rmkz, X86::VXORPDZ256rmkz,
- X86::VPXORQZ256rmkz, X86::VPXORDZ256rmkz },
- { X86::VXORPSZ256rrk, X86::VXORPDZ256rrk,
- X86::VPXORQZ256rrk, X86::VPXORDZ256rrk },
- { X86::VXORPSZ256rrkz, X86::VXORPDZ256rrkz,
- X86::VPXORQZ256rrkz, X86::VPXORDZ256rrkz },
- { X86::VANDNPSZrmk, X86::VANDNPDZrmk,
- X86::VPANDNQZrmk, X86::VPANDNDZrmk },
- { X86::VANDNPSZrmkz, X86::VANDNPDZrmkz,
- X86::VPANDNQZrmkz, X86::VPANDNDZrmkz },
- { X86::VANDNPSZrrk, X86::VANDNPDZrrk,
- X86::VPANDNQZrrk, X86::VPANDNDZrrk },
- { X86::VANDNPSZrrkz, X86::VANDNPDZrrkz,
- X86::VPANDNQZrrkz, X86::VPANDNDZrrkz },
- { X86::VANDPSZrmk, X86::VANDPDZrmk,
- X86::VPANDQZrmk, X86::VPANDDZrmk },
- { X86::VANDPSZrmkz, X86::VANDPDZrmkz,
- X86::VPANDQZrmkz, X86::VPANDDZrmkz },
- { X86::VANDPSZrrk, X86::VANDPDZrrk,
- X86::VPANDQZrrk, X86::VPANDDZrrk },
- { X86::VANDPSZrrkz, X86::VANDPDZrrkz,
- X86::VPANDQZrrkz, X86::VPANDDZrrkz },
- { X86::VORPSZrmk, X86::VORPDZrmk,
- X86::VPORQZrmk, X86::VPORDZrmk },
- { X86::VORPSZrmkz, X86::VORPDZrmkz,
- X86::VPORQZrmkz, X86::VPORDZrmkz },
- { X86::VORPSZrrk, X86::VORPDZrrk,
- X86::VPORQZrrk, X86::VPORDZrrk },
- { X86::VORPSZrrkz, X86::VORPDZrrkz,
- X86::VPORQZrrkz, X86::VPORDZrrkz },
- { X86::VXORPSZrmk, X86::VXORPDZrmk,
- X86::VPXORQZrmk, X86::VPXORDZrmk },
- { X86::VXORPSZrmkz, X86::VXORPDZrmkz,
- X86::VPXORQZrmkz, X86::VPXORDZrmkz },
- { X86::VXORPSZrrk, X86::VXORPDZrrk,
- X86::VPXORQZrrk, X86::VPXORDZrrk },
- { X86::VXORPSZrrkz, X86::VXORPDZrrkz,
- X86::VPXORQZrrkz, X86::VPXORDZrrkz },
- // Broadcast loads can be handled the same as masked operations to avoid
- // changing element size.
- { X86::VANDNPSZ128rmb, X86::VANDNPDZ128rmb,
- X86::VPANDNQZ128rmb, X86::VPANDNDZ128rmb },
- { X86::VANDPSZ128rmb, X86::VANDPDZ128rmb,
- X86::VPANDQZ128rmb, X86::VPANDDZ128rmb },
- { X86::VORPSZ128rmb, X86::VORPDZ128rmb,
- X86::VPORQZ128rmb, X86::VPORDZ128rmb },
- { X86::VXORPSZ128rmb, X86::VXORPDZ128rmb,
- X86::VPXORQZ128rmb, X86::VPXORDZ128rmb },
- { X86::VANDNPSZ256rmb, X86::VANDNPDZ256rmb,
- X86::VPANDNQZ256rmb, X86::VPANDNDZ256rmb },
- { X86::VANDPSZ256rmb, X86::VANDPDZ256rmb,
- X86::VPANDQZ256rmb, X86::VPANDDZ256rmb },
- { X86::VORPSZ256rmb, X86::VORPDZ256rmb,
- X86::VPORQZ256rmb, X86::VPORDZ256rmb },
- { X86::VXORPSZ256rmb, X86::VXORPDZ256rmb,
- X86::VPXORQZ256rmb, X86::VPXORDZ256rmb },
- { X86::VANDNPSZrmb, X86::VANDNPDZrmb,
- X86::VPANDNQZrmb, X86::VPANDNDZrmb },
- { X86::VANDPSZrmb, X86::VANDPDZrmb,
- X86::VPANDQZrmb, X86::VPANDDZrmb },
- { X86::VANDPSZrmb, X86::VANDPDZrmb,
- X86::VPANDQZrmb, X86::VPANDDZrmb },
- { X86::VORPSZrmb, X86::VORPDZrmb,
- X86::VPORQZrmb, X86::VPORDZrmb },
- { X86::VXORPSZrmb, X86::VXORPDZrmb,
- X86::VPXORQZrmb, X86::VPXORDZrmb },
- { X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk,
- X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk },
- { X86::VANDPSZ128rmbk, X86::VANDPDZ128rmbk,
- X86::VPANDQZ128rmbk, X86::VPANDDZ128rmbk },
- { X86::VORPSZ128rmbk, X86::VORPDZ128rmbk,
- X86::VPORQZ128rmbk, X86::VPORDZ128rmbk },
- { X86::VXORPSZ128rmbk, X86::VXORPDZ128rmbk,
- X86::VPXORQZ128rmbk, X86::VPXORDZ128rmbk },
- { X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk,
- X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk },
- { X86::VANDPSZ256rmbk, X86::VANDPDZ256rmbk,
- X86::VPANDQZ256rmbk, X86::VPANDDZ256rmbk },
- { X86::VORPSZ256rmbk, X86::VORPDZ256rmbk,
- X86::VPORQZ256rmbk, X86::VPORDZ256rmbk },
- { X86::VXORPSZ256rmbk, X86::VXORPDZ256rmbk,
- X86::VPXORQZ256rmbk, X86::VPXORDZ256rmbk },
- { X86::VANDNPSZrmbk, X86::VANDNPDZrmbk,
- X86::VPANDNQZrmbk, X86::VPANDNDZrmbk },
- { X86::VANDPSZrmbk, X86::VANDPDZrmbk,
- X86::VPANDQZrmbk, X86::VPANDDZrmbk },
- { X86::VANDPSZrmbk, X86::VANDPDZrmbk,
- X86::VPANDQZrmbk, X86::VPANDDZrmbk },
- { X86::VORPSZrmbk, X86::VORPDZrmbk,
- X86::VPORQZrmbk, X86::VPORDZrmbk },
- { X86::VXORPSZrmbk, X86::VXORPDZrmbk,
- X86::VPXORQZrmbk, X86::VPXORDZrmbk },
- { X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz,
- X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz},
- { X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz,
- X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz },
- { X86::VORPSZ128rmbkz, X86::VORPDZ128rmbkz,
- X86::VPORQZ128rmbkz, X86::VPORDZ128rmbkz },
- { X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz,
- X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz },
- { X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz,
- X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz},
- { X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz,
- X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz },
- { X86::VORPSZ256rmbkz, X86::VORPDZ256rmbkz,
- X86::VPORQZ256rmbkz, X86::VPORDZ256rmbkz },
- { X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz,
- X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz },
- { X86::VANDNPSZrmbkz, X86::VANDNPDZrmbkz,
- X86::VPANDNQZrmbkz, X86::VPANDNDZrmbkz },
- { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
- X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
- { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
- X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
- { X86::VORPSZrmbkz, X86::VORPDZrmbkz,
- X86::VPORQZrmbkz, X86::VPORDZrmbkz },
- { X86::VXORPSZrmbkz, X86::VXORPDZrmbkz,
- X86::VPXORQZrmbkz, X86::VPXORDZrmbkz },
-};
-
-// NOTE: These should only be used by the custom domain methods.
-static const uint16_t ReplaceableBlendInstrs[][3] = {
- //PackedSingle PackedDouble PackedInt
- { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi },
- { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri },
- { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDWrmi },
- { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDWrri },
- { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi },
- { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri },
-};
-static const uint16_t ReplaceableBlendAVX2Instrs[][3] = {
- //PackedSingle PackedDouble PackedInt
- { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi },
- { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri },
- { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDDYrmi },
- { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri },
-};
-
-// Special table for changing EVEX logic instructions to VEX.
-// TODO: Should we run EVEX->VEX earlier?
-static const uint16_t ReplaceableCustomAVX512LogicInstrs[][4] = {
- // Two integer columns for 64-bit and 32-bit elements.
- //PackedSingle PackedDouble PackedInt PackedInt
- { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
- { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
- { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
- { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
- { X86::VORPSrm, X86::VORPDrm, X86::VPORQZ128rm, X86::VPORDZ128rm },
- { X86::VORPSrr, X86::VORPDrr, X86::VPORQZ128rr, X86::VPORDZ128rr },
- { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
- { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
- { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
- { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
- { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
- { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
- { X86::VORPSYrm, X86::VORPDYrm, X86::VPORQZ256rm, X86::VPORDZ256rm },
- { X86::VORPSYrr, X86::VORPDYrr, X86::VPORQZ256rr, X86::VPORDZ256rr },
- { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
- { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
-};
-
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
static const uint16_t *lookup(unsigned opcode, unsigned domain,
ArrayRef<uint16_t[3]> Table) {
- for (const uint16_t (&Row)[3] : Table)
- if (Row[domain-1] == opcode)
+ for (const uint16_t(&Row)[3] : Table)
+ if (Row[domain - 1] == opcode)
return Row;
return nullptr;
}
@@ -8078,8 +8744,8 @@ static const uint16_t *lookup(unsigned opcode, unsigned domain,
static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain,
ArrayRef<uint16_t[4]> Table) {
// If this is the integer domain make sure to check both integer columns.
- for (const uint16_t (&Row)[4] : Table)
- if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode))
+ for (const uint16_t(&Row)[4] : Table)
+ if (Row[domain - 1] == opcode || (domain == 3 && Row[3] == opcode))
return Row;
return nullptr;
}
@@ -8163,22 +8829,38 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
case X86::VPBLENDWYrmi:
case X86::VPBLENDWYrri:
return GetBlendDomains(8, false);
- case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
- case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
- case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
- case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
- case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
- case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
- case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
- case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
- case X86::VPORDZ128rr: case X86::VPORDZ128rm:
- case X86::VPORDZ256rr: case X86::VPORDZ256rm:
- case X86::VPORQZ128rr: case X86::VPORQZ128rm:
- case X86::VPORQZ256rr: case X86::VPORQZ256rm:
- case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
- case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
- case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
- case X86::VPXORQZ256rr: case X86::VPXORQZ256rm:
+ case X86::VPANDDZ128rr:
+ case X86::VPANDDZ128rm:
+ case X86::VPANDDZ256rr:
+ case X86::VPANDDZ256rm:
+ case X86::VPANDQZ128rr:
+ case X86::VPANDQZ128rm:
+ case X86::VPANDQZ256rr:
+ case X86::VPANDQZ256rm:
+ case X86::VPANDNDZ128rr:
+ case X86::VPANDNDZ128rm:
+ case X86::VPANDNDZ256rr:
+ case X86::VPANDNDZ256rm:
+ case X86::VPANDNQZ128rr:
+ case X86::VPANDNQZ128rm:
+ case X86::VPANDNQZ256rr:
+ case X86::VPANDNQZ256rm:
+ case X86::VPORDZ128rr:
+ case X86::VPORDZ128rm:
+ case X86::VPORDZ256rr:
+ case X86::VPORDZ256rm:
+ case X86::VPORQZ128rr:
+ case X86::VPORQZ128rm:
+ case X86::VPORQZ256rr:
+ case X86::VPORQZ256rm:
+ case X86::VPXORDZ128rr:
+ case X86::VPXORDZ128rm:
+ case X86::VPXORDZ256rr:
+ case X86::VPXORDZ256rm:
+ case X86::VPXORQZ128rr:
+ case X86::VPXORQZ128rm:
+ case X86::VPXORQZ256rr:
+ case X86::VPXORQZ256rm:
// If we don't have DQI see if we can still switch from an EVEX integer
// instruction to a VEX floating point instruction.
if (Subtarget.hasDQI())
@@ -8204,8 +8886,7 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
// both inputs.
if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
MI.getOperand(0).getSubReg() == 0 &&
- MI.getOperand(1).getSubReg() == 0 &&
- MI.getOperand(2).getSubReg() == 0)
+ MI.getOperand(1).getSubReg() == 0 && MI.getOperand(2).getSubReg() == 0)
return 0x6;
return 0;
case X86::SHUFPDrri:
@@ -8214,6 +8895,8 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
return 0;
}
+#include "X86ReplaceableInstrs.def"
+
bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
unsigned Domain) const {
assert(Domain > 0 && Domain < 4 && "Invalid execution domain");
@@ -8286,28 +8969,44 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
case X86::VPBLENDWYrmi:
case X86::VPBLENDWYrri:
return SetBlendDomain(16, true);
- case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
- case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
- case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
- case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
- case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
- case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
- case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
- case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
- case X86::VPORDZ128rr: case X86::VPORDZ128rm:
- case X86::VPORDZ256rr: case X86::VPORDZ256rm:
- case X86::VPORQZ128rr: case X86::VPORQZ128rm:
- case X86::VPORQZ256rr: case X86::VPORQZ256rm:
- case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
- case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
- case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
- case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: {
+ case X86::VPANDDZ128rr:
+ case X86::VPANDDZ128rm:
+ case X86::VPANDDZ256rr:
+ case X86::VPANDDZ256rm:
+ case X86::VPANDQZ128rr:
+ case X86::VPANDQZ128rm:
+ case X86::VPANDQZ256rr:
+ case X86::VPANDQZ256rm:
+ case X86::VPANDNDZ128rr:
+ case X86::VPANDNDZ128rm:
+ case X86::VPANDNDZ256rr:
+ case X86::VPANDNDZ256rm:
+ case X86::VPANDNQZ128rr:
+ case X86::VPANDNQZ128rm:
+ case X86::VPANDNQZ256rr:
+ case X86::VPANDNQZ256rm:
+ case X86::VPORDZ128rr:
+ case X86::VPORDZ128rm:
+ case X86::VPORDZ256rr:
+ case X86::VPORDZ256rm:
+ case X86::VPORQZ128rr:
+ case X86::VPORQZ128rm:
+ case X86::VPORQZ256rr:
+ case X86::VPORQZ256rm:
+ case X86::VPXORDZ128rr:
+ case X86::VPXORDZ128rm:
+ case X86::VPXORDZ256rr:
+ case X86::VPXORDZ256rm:
+ case X86::VPXORQZ128rr:
+ case X86::VPXORQZ128rm:
+ case X86::VPXORQZ256rr:
+ case X86::VPXORQZ256rm: {
// Without DQI, convert EVEX instructions to VEX instructions.
if (Subtarget.hasDQI())
return false;
- const uint16_t *table = lookupAVX512(MI.getOpcode(), dom,
- ReplaceableCustomAVX512LogicInstrs);
+ const uint16_t *table =
+ lookupAVX512(MI.getOpcode(), dom, ReplaceableCustomAVX512LogicInstrs);
assert(table && "Instruction not found in table?");
// Don't change integer Q instructions to D instructions and
// use D intructions if we started with a PS instruction.
@@ -8335,8 +9034,10 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
if (Domain == 1) {
unsigned Imm = MI.getOperand(3).getImm();
unsigned NewImm = 0x44;
- if (Imm & 1) NewImm |= 0x0a;
- if (Imm & 2) NewImm |= 0xa0;
+ if (Imm & 1)
+ NewImm |= 0x0a;
+ if (Imm & 2)
+ NewImm |= 0xa0;
MI.getOperand(3).setImm(NewImm);
MI.setDesc(get(X86::SHUFPSrri));
}
@@ -8371,12 +9072,12 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
validDomains = 0xe;
} else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
validDomains = 0xe;
- } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain,
- ReplaceableInstrsAVX512DQ)) {
+ } else if (Subtarget.hasDQI() &&
+ lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) {
validDomains = 0xe;
} else if (Subtarget.hasDQI()) {
- if (const uint16_t *table = lookupAVX512(opcode, domain,
- ReplaceableInstrsAVX512DQMasked)) {
+ if (const uint16_t *table =
+ lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQMasked)) {
if (domain == 1 || (domain == 3 && table[3] == opcode))
validDomains = 0xa;
else
@@ -8388,7 +9089,7 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
}
void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
- assert(Domain>0 && Domain<4 && "Invalid execution domain");
+ assert(Domain > 0 && Domain < 4 && "Invalid execution domain");
uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
@@ -8437,6 +9138,12 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
MI.setDesc(get(table[Domain - 1]));
}
+void X86InstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(X86::NOOP));
+}
+
/// Return the noop instruction to use for a noop.
MCInst X86InstrInfo::getNop() const {
MCInst Nop;
@@ -8446,7 +9153,8 @@ MCInst X86InstrInfo::getNop() const {
bool X86InstrInfo::isHighLatencyDef(int opc) const {
switch (opc) {
- default: return false;
+ default:
+ return false;
case X86::DIVPDrm:
case X86::DIVPDrr:
case X86::DIVPSrm:
@@ -8775,8 +9483,7 @@ bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst,
// instructions that depend on the exact status flags (zero, sign, etc.)
// that are set by using these particular operands with this operation.
const MachineOperand *FlagDef = Inst.findRegisterDefOperand(X86::EFLAGS);
- assert((Inst.getNumDefs() == 1 || FlagDef) &&
- "Implicit def isn't flags?");
+ assert((Inst.getNumDefs() == 1 || FlagDef) && "Implicit def isn't flags?");
if (FlagDef && !FlagDef->isDead())
return false;
@@ -9296,20 +10003,6 @@ void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const {
- // Propagate FP flags from the original instructions.
- // But clear poison-generating flags because those may not be valid now.
- // TODO: There should be a helper function for copying only fast-math-flags.
- uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
- NewMI1.setFlags(IntersectedFlags);
- NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
- NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
- NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
-
- NewMI2.setFlags(IntersectedFlags);
- NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
- NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
- NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
-
// Integer instructions may define an implicit EFLAGS dest register operand.
MachineOperand *OldFlagDef1 = OldMI1.findRegisterDefOperand(X86::EFLAGS);
MachineOperand *OldFlagDef2 = OldMI2.findRegisterDefOperand(X86::EFLAGS);
@@ -9373,230 +10066,220 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
}
namespace {
- /// Create Global Base Reg pass. This initializes the PIC
- /// global base register for x86-32.
- struct CGBR : public MachineFunctionPass {
- static char ID;
- CGBR() : MachineFunctionPass(ID) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- const X86TargetMachine *TM =
+/// Create Global Base Reg pass. This initializes the PIC
+/// global base register for x86-32.
+struct CGBR : public MachineFunctionPass {
+ static char ID;
+ CGBR() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF.getTarget());
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-
- // Don't do anything in the 64-bit small and kernel code models. They use
- // RIP-relative addressing for everything.
- if (STI.is64Bit() && (TM->getCodeModel() == CodeModel::Small ||
- TM->getCodeModel() == CodeModel::Kernel))
- return false;
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- // Only emit a global base reg in PIC mode.
- if (!TM->isPositionIndependent())
- return false;
+ // Only emit a global base reg in PIC mode.
+ if (!TM->isPositionIndependent())
+ return false;
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- Register GlobalBaseReg = X86FI->getGlobalBaseReg();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ Register GlobalBaseReg = X86FI->getGlobalBaseReg();
- // If we didn't need a GlobalBaseReg, don't insert code.
- if (GlobalBaseReg == 0)
- return false;
+ // If we didn't need a GlobalBaseReg, don't insert code.
+ if (GlobalBaseReg == 0)
+ return false;
- // Insert the set of GlobalBaseReg into the first MBB of the function
- MachineBasicBlock &FirstMBB = MF.front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const X86InstrInfo *TII = STI.getInstrInfo();
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const X86InstrInfo *TII = STI.getInstrInfo();
- Register PC;
- if (STI.isPICStyleGOT())
- PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
- else
- PC = GlobalBaseReg;
-
- if (STI.is64Bit()) {
- if (TM->getCodeModel() == CodeModel::Medium) {
- // In the medium code model, use a RIP-relative LEA to materialize the
- // GOT.
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC)
- .addReg(X86::RIP)
- .addImm(0)
- .addReg(0)
- .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
- .addReg(0);
- } else if (TM->getCodeModel() == CodeModel::Large) {
- // In the large code model, we are aiming for this code, though the
- // register allocation may vary:
- // leaq .LN$pb(%rip), %rax
- // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
- // addq %rcx, %rax
- // RAX now holds address of _GLOBAL_OFFSET_TABLE_.
- Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
- Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
- .addReg(X86::RIP)
- .addImm(0)
- .addReg(0)
- .addSym(MF.getPICBaseSymbol())
- .addReg(0);
- std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol());
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg)
- .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
- X86II::MO_PIC_BASE_OFFSET);
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC)
- .addReg(PBReg, RegState::Kill)
- .addReg(GOTReg, RegState::Kill);
- } else {
- llvm_unreachable("unexpected code model");
- }
+ Register PC;
+ if (STI.isPICStyleGOT())
+ PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ else
+ PC = GlobalBaseReg;
+
+ if (STI.is64Bit()) {
+ if (TM->getCodeModel() == CodeModel::Large) {
+ // In the large code model, we are aiming for this code, though the
+ // register allocation may vary:
+ // leaq .LN$pb(%rip), %rax
+ // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
+ // addq %rcx, %rax
+ // RAX now holds address of _GLOBAL_OFFSET_TABLE_.
+ Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addSym(MF.getPICBaseSymbol())
+ .addReg(0);
+ std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol());
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg)
+ .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_PIC_BASE_OFFSET);
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC)
+ .addReg(PBReg, RegState::Kill)
+ .addReg(GOTReg, RegState::Kill);
} else {
- // Operand of MovePCtoStack is completely ignored by asm printer. It's
- // only used in JIT code emission as displacement to pc.
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-
- // If we're using vanilla 'GOT' PIC style, we should use relative
- // addressing not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
- if (STI.isPICStyleGOT()) {
- // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel],
- // %some_register
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
- .addReg(PC)
- .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
- X86II::MO_GOT_ABSOLUTE_ADDRESS);
- }
+ // In other code models, use a RIP-relative LEA to materialize the
+ // GOT.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
+ .addReg(0);
+ }
+ } else {
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative
+ // addressing not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+ if (STI.isPICStyleGOT()) {
+ // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel],
+ // %some_register
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC)
+ .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_GOT_ABSOLUTE_ADDRESS);
}
-
- return true;
}
- StringRef getPassName() const override {
- return "X86 PIC Global Base Reg Initialization";
- }
+ return true;
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
+ StringRef getPassName() const override {
+ return "X86 PIC Global Base Reg Initialization";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
} // namespace
char CGBR::ID = 0;
-FunctionPass*
-llvm::createX86GlobalBaseRegPass() { return new CGBR(); }
+FunctionPass *llvm::createX86GlobalBaseRegPass() { return new CGBR(); }
namespace {
- struct LDTLSCleanup : public MachineFunctionPass {
- static char ID;
- LDTLSCleanup() : MachineFunctionPass(ID) {}
+struct LDTLSCleanup : public MachineFunctionPass {
+ static char ID;
+ LDTLSCleanup() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) override {
- if (skipFunction(MF.getFunction()))
- return false;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
- X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
- // No point folding accesses if there isn't at least two.
- return false;
+ X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+ }
+
+ // Visit the dominator subtree rooted at Node in pre-order.
+ // If TLSBaseAddrReg is non-null, then use that to replace any
+ // TLS_base_addr instructions. Otherwise, create the register
+ // when the first such instruction is seen, and then use it
+ // as we encounter more instructions.
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ switch (I->getOpcode()) {
+ case X86::TLS_base_addr32:
+ case X86::TLS_base_addr64:
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
+ else
+ I = SetRegister(*I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
}
+ }
- MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
- return VisitNode(DT->getRootNode(), 0);
+ // Visit the children of this block in the dominator tree.
+ for (auto &I : *Node) {
+ Changed |= VisitNode(I, TLSBaseAddrReg);
}
- // Visit the dominator subtree rooted at Node in pre-order.
- // If TLSBaseAddrReg is non-null, then use that to replace any
- // TLS_base_addr instructions. Otherwise, create the register
- // when the first such instruction is seen, and then use it
- // as we encounter more instructions.
- bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
- MachineBasicBlock *BB = Node->getBlock();
- bool Changed = false;
-
- // Traverse the current block.
- for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
- ++I) {
- switch (I->getOpcode()) {
- case X86::TLS_base_addr32:
- case X86::TLS_base_addr64:
- if (TLSBaseAddrReg)
- I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
- else
- I = SetRegister(*I, &TLSBaseAddrReg);
- Changed = true;
- break;
- default:
- break;
- }
- }
+ return Changed;
+ }
- // Visit the children of this block in the dominator tree.
- for (auto &I : *Node) {
- Changed |= VisitNode(I, TLSBaseAddrReg);
- }
+ // Replace the TLS_base_addr instruction I with a copy from
+ // TLSBaseAddrReg, returning the new instruction.
+ MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr &I,
+ unsigned TLSBaseAddrReg) {
+ MachineFunction *MF = I.getParent()->getParent();
+ const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
+ const bool is64Bit = STI.is64Bit();
+ const X86InstrInfo *TII = STI.getInstrInfo();
- return Changed;
- }
+ // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
+ MachineInstr *Copy =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX)
+ .addReg(TLSBaseAddrReg);
- // Replace the TLS_base_addr instruction I with a copy from
- // TLSBaseAddrReg, returning the new instruction.
- MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr &I,
- unsigned TLSBaseAddrReg) {
- MachineFunction *MF = I.getParent()->getParent();
- const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
- const bool is64Bit = STI.is64Bit();
- const X86InstrInfo *TII = STI.getInstrInfo();
-
- // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
- MachineInstr *Copy =
- BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX)
- .addReg(TLSBaseAddrReg);
-
- // Erase the TLS_base_addr instruction.
- I.eraseFromParent();
-
- return Copy;
- }
+ // Erase the TLS_base_addr instruction.
+ I.eraseFromParent();
- // Create a virtual register in *TLSBaseAddrReg, and populate it by
- // inserting a copy instruction after I. Returns the new instruction.
- MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
- MachineFunction *MF = I.getParent()->getParent();
- const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
- const bool is64Bit = STI.is64Bit();
- const X86InstrInfo *TII = STI.getInstrInfo();
-
- // Create a virtual register for the TLS base address.
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
- ? &X86::GR64RegClass
- : &X86::GR32RegClass);
-
- // Insert a copy from RAX/EAX to TLSBaseAddrReg.
- MachineInstr *Next = I.getNextNode();
- MachineInstr *Copy =
- BuildMI(*I.getParent(), Next, I.getDebugLoc(),
- TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
- .addReg(is64Bit ? X86::RAX : X86::EAX);
-
- return Copy;
- }
+ return Copy;
+ }
- StringRef getPassName() const override {
- return "Local Dynamic TLS Access Clean-up";
- }
+ // Create a virtual register in *TLSBaseAddrReg, and populate it by
+ // inserting a copy instruction after I. Returns the new instruction.
+ MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
+ MachineFunction *MF = I.getParent()->getParent();
+ const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
+ const bool is64Bit = STI.is64Bit();
+ const X86InstrInfo *TII = STI.getInstrInfo();
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-}
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(
+ is64Bit ? &X86::GR64RegClass : &X86::GR32RegClass);
+
+ // Insert a copy from RAX/EAX to TLSBaseAddrReg.
+ MachineInstr *Next = I.getNextNode();
+ MachineInstr *Copy = BuildMI(*I.getParent(), Next, I.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
+ .addReg(is64Bit ? X86::RAX : X86::EAX);
+
+ return Copy;
+ }
+
+ StringRef getPassName() const override {
+ return "Local Dynamic TLS Access Clean-up";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // namespace
char LDTLSCleanup::ID = 0;
-FunctionPass*
-llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
+FunctionPass *llvm::createCleanupLocalDynamicTLSPass() {
+ return new LDTLSCleanup();
+}
/// Constants defining how certain sequences should be outlined.
///
@@ -9626,10 +10309,7 @@ llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
/// * Call construction overhead: 1 (jump instruction)
/// * Frame construction overhead: 0 (don't need to return)
///
-enum MachineOutlinerClass {
- MachineOutlinerDefault,
- MachineOutlinerTailCall
-};
+enum MachineOutlinerClass { MachineOutlinerDefault, MachineOutlinerTailCall };
std::optional<outliner::OutlinedFunction>
X86InstrInfo::getOutliningCandidateInfo(
@@ -9689,8 +10369,8 @@ X86InstrInfo::getOutliningCandidateInfo(
MachineOutlinerDefault);
}
-bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF,
- bool OutlineFromLinkOnceODRs) const {
+bool X86InstrInfo::isFunctionSafeToOutlineFrom(
+ MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
const Function &F = MF.getFunction();
// Does the function use a red zone? If it does, then we can't risk messing
@@ -9705,14 +10385,15 @@ bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF,
// If we *don't* want to outline from things that could potentially be deduped
// then return false.
if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
- return false;
+ return false;
// This function is viable for outlining, so return true.
return true;
}
outliner::InstrType
-X86InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
+X86InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
+ unsigned Flags) const {
MachineInstr &MI = *MIT;
// Is this a terminator for a basic block?
@@ -9748,10 +10429,9 @@ X86InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned F
return outliner::InstrType::Legal;
}
-void X86InstrInfo::buildOutlinedFrame(MachineBasicBlock &MBB,
- MachineFunction &MF,
- const outliner::OutlinedFunction &OF)
- const {
+void X86InstrInfo::buildOutlinedFrame(
+ MachineBasicBlock &MBB, MachineFunction &MF,
+ const outliner::OutlinedFunction &OF) const {
// If we're a tail call, we already have a return, so don't do anything.
if (OF.FrameConstructionID == MachineOutlinerTailCall)
return;
@@ -9762,27 +10442,88 @@ void X86InstrInfo::buildOutlinedFrame(MachineBasicBlock &MBB,
MBB.insert(MBB.end(), retq);
}
-MachineBasicBlock::iterator
-X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &It,
- MachineFunction &MF,
- outliner::Candidate &C) const {
+MachineBasicBlock::iterator X86InstrInfo::insertOutlinedCall(
+ Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+ MachineFunction &MF, outliner::Candidate &C) const {
// Is it a tail call?
if (C.CallConstructionID == MachineOutlinerTailCall) {
// Yes, just insert a JMP.
- It = MBB.insert(It,
- BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64))
- .addGlobalAddress(M.getNamedValue(MF.getName())));
+ It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
} else {
// No, insert a call.
- It = MBB.insert(It,
- BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32))
- .addGlobalAddress(M.getNamedValue(MF.getName())));
+ It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
}
return It;
}
+void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Iter,
+ DebugLoc &DL,
+ bool AllowSideEffects) const {
+ const MachineFunction &MF = *MBB.getParent();
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+
+ if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
+ // FIXME: Should we ignore MMX registers?
+ return;
+
+ if (TRI.isGeneralPurposeRegister(MF, Reg)) {
+ // Convert register to the 32-bit version. Both 'movl' and 'xorl' clear the
+ // upper bits of a 64-bit register automagically.
+ Reg = getX86SubSuperRegister(Reg, 32);
+
+ if (!AllowSideEffects)
+ // XOR affects flags, so use a MOV instead.
+ BuildMI(MBB, Iter, DL, get(X86::MOV32ri), Reg).addImm(0);
+ else
+ BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ } else if (X86::VR128RegClass.contains(Reg)) {
+ // XMM#
+ if (!ST.hasSSE1())
+ return;
+
+ // PXOR is safe to use because it doesn't affect flags.
+ BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ } else if (X86::VR256RegClass.contains(Reg)) {
+ // YMM#
+ if (!ST.hasAVX())
+ return;
+
+ // VPXOR is safe to use because it doesn't affect flags.
+ BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ } else if (X86::VR512RegClass.contains(Reg)) {
+ // ZMM#
+ if (!ST.hasAVX512())
+ return;
+
+ // VPXORY is safe to use because it doesn't affect flags.
+ BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ } else if (X86::VK1RegClass.contains(Reg) || X86::VK2RegClass.contains(Reg) ||
+ X86::VK4RegClass.contains(Reg) || X86::VK8RegClass.contains(Reg) ||
+ X86::VK16RegClass.contains(Reg)) {
+ if (!ST.hasVLX())
+ return;
+
+ // KXOR is safe to use because it doesn't affect flags.
+ unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
+ BuildMI(MBB, Iter, DL, get(Op), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ }
+}
+
bool X86InstrInfo::getMachineCombinerPatterns(
MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
bool DoRegPressureReduce) const {
@@ -9919,5 +10660,14 @@ void X86InstrInfo::genAlternativeCodeSequence(
}
}
+// See also: X86DAGToDAGISel::SelectInlineAsmMemoryOperand().
+void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
+ int FI) const {
+ X86AddressMode M;
+ M.BaseType = X86AddressMode::FrameIndexBase;
+ M.Base.FrameIndex = FI;
+ M.getFullAddress(Ops);
+}
+
#define GET_INSTRINFO_HELPERS
#include "X86GenInstrInfo.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
index 82554032ebd6..eac8d79eb8a3 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
@@ -150,6 +150,17 @@ class X86InstrInfo final : public X86GenInstrInfo {
public:
explicit X86InstrInfo(X86Subtarget &STI);
+ /// Given a machine instruction descriptor, returns the register
+ /// class constraint for OpNum, or NULL. Returned register class
+ /// may be different from the definition in the TD file, e.g.
+ /// GR*RegClass (definition in TD file)
+ /// ->
+ /// GR*_NOREX2RegClass (Returned register class)
+ const TargetRegisterClass *
+ getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const override;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
@@ -457,6 +468,9 @@ public:
int64_t Offset2,
unsigned NumLoads) const override;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+
MCInst getNop() const override;
bool
@@ -547,6 +561,15 @@ public:
Register &FoldAsLoadDefReg,
MachineInstr *&DefMI) const override;
+ bool FoldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI, Register Reg,
+ int64_t ImmVal, MachineRegisterInfo *MRI,
+ bool MakeChange) const;
+
+ /// Reg is known to be defined by a move immediate instruction, try to fold
+ /// the immediate into the use instruction.
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
+ MachineRegisterInfo *MRI) const override;
+
std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const override;
@@ -570,6 +593,10 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
outliner::Candidate &C) const override;
+ void buildClearRegister(Register Reg, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Iter, DebugLoc &DL,
+ bool AllowSideEffects = true) const override;
+
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
#define GET_INSTRINFO_HELPER_DECLS
@@ -632,6 +659,9 @@ protected:
return false;
}
+ void getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
+ int FI) const override;
+
private:
/// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
/// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
index 08e6e4e0627b..ee54796323b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
@@ -6,1408 +6,36 @@
//
//===----------------------------------------------------------------------===//
//
-// This file describes the X86 properties of the instructions which are needed
+// This file describes the X86 properties of the instructions which are needed
// for code generation, machine code emission, and analysis.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// X86 specific DAG Nodes.
+// X86 Pattern fragments.
//
-
-def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
- SDTCisSameAs<1, 2>]>;
-def SDTX86FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisFP<1>,
- SDTCisSameAs<1, 2>]>;
-
-def SDTX86Cmov : SDTypeProfile<1, 4,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
- SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
-
-// Unary and binary operator instructions that set EFLAGS as a side-effect.
-def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
- [SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisVT<1, i32>]>;
-
-def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
- [SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>,
- SDTCisInt<0>, SDTCisVT<1, i32>]>;
-
-// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
-def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
- [SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>,
- SDTCisInt<0>,
- SDTCisVT<1, i32>,
- SDTCisVT<4, i32>]>;
-// RES1, RES2, FLAGS = op LHS, RHS
-def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
- [SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>,
- SDTCisInt<0>, SDTCisVT<1, i32>]>;
-def SDTX86BrCond : SDTypeProfile<0, 3,
- [SDTCisVT<0, OtherVT>,
- SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
-
-def SDTX86SetCC : SDTypeProfile<1, 2,
- [SDTCisVT<0, i8>,
- SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
-def SDTX86SetCC_C : SDTypeProfile<1, 2,
- [SDTCisInt<0>,
- SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
-
-def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>;
-
-def SDTX86rdrand : SDTypeProfile<2, 0, [SDTCisInt<0>, SDTCisVT<1, i32>]>;
-
-def SDTX86rdpkru : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDTX86wrpkru : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
- SDTCisVT<2, i32>]>;
-
-def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
- SDTCisVT<2, i8>]>;
-def SDTX86cas8pair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def SDTX86cas16pair : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i64>]>;
-
-def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
- SDTCisPtrTy<1>,
- SDTCisInt<2>]>;
-
-def SDTLockUnaryArithWithFlags : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
- SDTCisPtrTy<1>]>;
-
-def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
-
-def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
- SDTCisVT<1, i32>]>;
-def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
- SDTCisVT<1, i32>]>;
-
-def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
-
-def SDT_X86NtBrind : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
-
-def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
- SDTCisPtrTy<1>]>;
-
-def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, i32>,
- SDTCisVT<3, i8>,
- SDTCisVT<4, i32>]>;
-
-def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
-
-def SDTX86Void : SDTypeProfile<0, 0, []>;
-
-def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
-
-def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-
-def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-
-def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-
-def SDT_X86DYN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
-
-def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
-
-def SDT_X86PROBED_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
-
-def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-
-def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
-
-def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
- SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>;
-
-def SDT_X86AESENCDECKL : SDTypeProfile<2, 2, [SDTCisVT<0, v2i64>,
- SDTCisVT<1, i32>,
- SDTCisVT<2, v2i64>,
- SDTCisPtrTy<3>]>;
-
-def SDTX86Cmpccxadd : SDTypeProfile<1, 4, [SDTCisSameAs<0, 2>,
- SDTCisPtrTy<1>, SDTCisSameAs<2, 3>,
- SDTCisVT<4, i8>]>;
-
-def X86MFence : SDNode<"X86ISD::MFENCE", SDTNone, [SDNPHasChain]>;
-
-
-def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
-def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
-def X86fshl : SDNode<"X86ISD::FSHL", SDTIntShiftDOp>;
-def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>;
-
-def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
-def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>;
-def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
-def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
-def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
-
-def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
-def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
- [SDNPHasChain]>;
-def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
-def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
-
-def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
- [SDNPHasChain, SDNPSideEffect]>;
-
-def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand,
- [SDNPHasChain, SDNPSideEffect]>;
-
-def X86rdpkru : SDNode<"X86ISD::RDPKRU", SDTX86rdpkru,
- [SDNPHasChain, SDNPSideEffect]>;
-def X86wrpkru : SDNode<"X86ISD::WRPKRU", SDTX86wrpkru,
- [SDNPHasChain, SDNPSideEffect]>;
-
-def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8pair,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86cas16pair,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-
-def X86retglue : SDNode<"X86ISD::RET_GLUE", SDTX86Ret,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
- [SDNPHasChain, SDNPOptInGlue]>;
-
-def X86vastart_save_xmm_regs :
- SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
- SDT_X86VASTART_SAVE_XMM_REGS,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>;
-def X86vaarg64 :
- SDNode<"X86ISD::VAARG_64", SDT_X86VAARG,
- [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
- SDNPMemOperand]>;
-def X86vaargx32 :
- SDNode<"X86ISD::VAARG_X32", SDT_X86VAARG,
- [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
- SDNPMemOperand]>;
-def X86callseq_start :
- SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
-def X86callseq_end :
- SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
- SDNPVariadic]>;
-
-def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
- SDNPVariadic]>;
-
-
-def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
- SDNPVariadic]>;
-def X86NoTrackBrind : SDNode<"X86ISD::NT_BRIND", SDT_X86NtBrind,
- [SDNPHasChain]>;
-
-def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
-def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
- SDNPMayLoad]>;
-
-def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
-def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
-
-def X86RecoverFrameAlloc : SDNode<"ISD::LOCAL_RECOVER",
- SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
- SDTCisInt<1>]>>;
-
-def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
- [SDNPHasChain]>;
-
-def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP",
- SDTypeProfile<1, 1, [SDTCisInt<0>,
- SDTCisPtrTy<1>]>,
- [SDNPHasChain, SDNPSideEffect]>;
-def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP",
- SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
- [SDNPHasChain, SDNPSideEffect]>;
-def X86eh_sjlj_setup_dispatch : SDNode<"X86ISD::EH_SJLJ_SETUP_DISPATCH",
- SDTypeProfile<0, 0, []>,
- [SDNPHasChain, SDNPSideEffect]>;
-
-def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
-def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
- [SDNPCommutative]>;
-def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
-def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
- [SDNPCommutative]>;
-def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
- [SDNPCommutative]>;
-def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
-def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
-
-def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
- [SDNPCommutative]>;
-def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
- [SDNPCommutative]>;
-def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
- [SDNPCommutative]>;
-
-def X86lock_add : SDNode<"X86ISD::LADD", SDTLockBinaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-def X86lock_sub : SDNode<"X86ISD::LSUB", SDTLockBinaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-def X86lock_or : SDNode<"X86ISD::LOR", SDTLockBinaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-def X86lock_xor : SDNode<"X86ISD::LXOR", SDTLockBinaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-
-def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
-def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>;
-
-def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>;
-
-def X86pdep : SDNode<"X86ISD::PDEP", SDTIntBinOp>;
-def X86pext : SDNode<"X86ISD::PEXT", SDTIntBinOp>;
-
-def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
-
-def X86DynAlloca : SDNode<"X86ISD::DYN_ALLOCA", SDT_X86DYN_ALLOCA,
- [SDNPHasChain, SDNPOutGlue]>;
-
-def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
- [SDNPHasChain]>;
-
-def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA,
- [SDNPHasChain]>;
-
-def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-def X86lwpins : SDNode<"X86ISD::LWPINS",
- SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>;
-
-def X86umwait : SDNode<"X86ISD::UMWAIT",
- SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
- [SDNPHasChain, SDNPSideEffect]>;
-
-def X86tpause : SDNode<"X86ISD::TPAUSE",
- SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
- [SDNPHasChain, SDNPSideEffect]>;
-
-def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD,
- [SDNPHasChain, SDNPSideEffect]>;
-def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD,
- [SDNPHasChain, SDNPSideEffect]>;
-def X86testui : SDNode<"X86ISD::TESTUI",
- SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
- [SDNPHasChain, SDNPSideEffect]>;
-
-def X86aesenc128kl : SDNode<"X86ISD::AESENC128KL", SDT_X86AESENCDECKL,
- [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
- SDNPMemOperand]>;
-def X86aesdec128kl : SDNode<"X86ISD::AESDEC128KL", SDT_X86AESENCDECKL,
- [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
- SDNPMemOperand]>;
-def X86aesenc256kl : SDNode<"X86ISD::AESENC256KL", SDT_X86AESENCDECKL,
- [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
- SDNPMemOperand]>;
-def X86aesdec256kl : SDNode<"X86ISD::AESDEC256KL", SDT_X86AESENCDECKL,
- [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
- SDNPMemOperand]>;
-
-def X86cmpccxadd : SDNode<"X86ISD::CMPCCXADD", SDTX86Cmpccxadd,
- [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
- SDNPMemOperand]>;
+include "X86InstrFragments.td"
+include "X86InstrFragmentsSIMD.td"
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
//
-
-// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
-// the index operand of an address, to conform to x86 encoding restrictions.
-def ptr_rc_nosp : PointerLikeRegClass<1>;
-
-// *mem - Operand definitions for the funky X86 addressing mode operands.
-//
-def X86MemAsmOperand : AsmOperandClass {
- let Name = "Mem";
-}
-let RenderMethod = "addMemOperands", SuperClasses = [X86MemAsmOperand] in {
- def X86Mem8AsmOperand : AsmOperandClass { let Name = "Mem8"; }
- def X86Mem16AsmOperand : AsmOperandClass { let Name = "Mem16"; }
- def X86Mem32AsmOperand : AsmOperandClass { let Name = "Mem32"; }
- def X86Mem64AsmOperand : AsmOperandClass { let Name = "Mem64"; }
- def X86Mem80AsmOperand : AsmOperandClass { let Name = "Mem80"; }
- def X86Mem128AsmOperand : AsmOperandClass { let Name = "Mem128"; }
- def X86Mem256AsmOperand : AsmOperandClass { let Name = "Mem256"; }
- def X86Mem512AsmOperand : AsmOperandClass { let Name = "Mem512"; }
- // Gather mem operands
- def X86Mem64_RC128Operand : AsmOperandClass { let Name = "Mem64_RC128"; }
- def X86Mem128_RC128Operand : AsmOperandClass { let Name = "Mem128_RC128"; }
- def X86Mem256_RC128Operand : AsmOperandClass { let Name = "Mem256_RC128"; }
- def X86Mem128_RC256Operand : AsmOperandClass { let Name = "Mem128_RC256"; }
- def X86Mem256_RC256Operand : AsmOperandClass { let Name = "Mem256_RC256"; }
-
- def X86Mem64_RC128XOperand : AsmOperandClass { let Name = "Mem64_RC128X"; }
- def X86Mem128_RC128XOperand : AsmOperandClass { let Name = "Mem128_RC128X"; }
- def X86Mem256_RC128XOperand : AsmOperandClass { let Name = "Mem256_RC128X"; }
- def X86Mem128_RC256XOperand : AsmOperandClass { let Name = "Mem128_RC256X"; }
- def X86Mem256_RC256XOperand : AsmOperandClass { let Name = "Mem256_RC256X"; }
- def X86Mem512_RC256XOperand : AsmOperandClass { let Name = "Mem512_RC256X"; }
- def X86Mem256_RC512Operand : AsmOperandClass { let Name = "Mem256_RC512"; }
- def X86Mem512_RC512Operand : AsmOperandClass { let Name = "Mem512_RC512"; }
- def X86Mem512_GR16Operand : AsmOperandClass { let Name = "Mem512_GR16"; }
- def X86Mem512_GR32Operand : AsmOperandClass { let Name = "Mem512_GR32"; }
- def X86Mem512_GR64Operand : AsmOperandClass { let Name = "Mem512_GR64"; }
-
- def X86SibMemOperand : AsmOperandClass { let Name = "SibMem"; }
-}
-
-def X86AbsMemAsmOperand : AsmOperandClass {
- let Name = "AbsMem";
- let SuperClasses = [X86MemAsmOperand];
-}
-
-class X86MemOperand<string printMethod,
- AsmOperandClass parserMatchClass = X86MemAsmOperand,
- int size = 0> : Operand<iPTR> {
- let PrintMethod = printMethod;
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
- let ParserMatchClass = parserMatchClass;
- let OperandType = "OPERAND_MEMORY";
- int Size = size;
-}
-
-// Gather mem operands
-class X86VMemOperand<RegisterClass RC, string printMethod,
- AsmOperandClass parserMatchClass, int size = 0>
- : X86MemOperand<printMethod, parserMatchClass, size> {
- let MIOperandInfo = (ops ptr_rc, i8imm, RC, i32imm, SEGMENT_REG);
-}
-
-def anymem : X86MemOperand<"printMemReference">;
-def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
- [(X86strict_fcmp node:$lhs, node:$rhs),
- (X86fcmp node:$lhs, node:$rhs)]>;
-
-// FIXME: Right now we allow any size during parsing, but we might want to
-// restrict to only unsized memory.
-def opaquemem : X86MemOperand<"printMemReference">;
-
-def sibmem: X86MemOperand<"printMemReference", X86SibMemOperand>;
-
-def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand, 8>;
-def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand, 16>;
-def i32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32>;
-def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64>;
-def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand, 128>;
-def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand, 256>;
-def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand, 512>;
-def f16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand, 16>;
-def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32>;
-def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64>;
-def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand, 80>;
-def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand, 128>;
-def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand, 256>;
-def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand, 512>;
-
-// 32/64 mode specific mem operands
-def i512mem_GR16 : X86MemOperand<"printzmmwordmem", X86Mem512_GR16Operand, 512>;
-def i512mem_GR32 : X86MemOperand<"printzmmwordmem", X86Mem512_GR32Operand, 512>;
-def i512mem_GR64 : X86MemOperand<"printzmmwordmem", X86Mem512_GR64Operand, 512>;
-
-// Gather mem operands
-def vx64mem : X86VMemOperand<VR128, "printqwordmem", X86Mem64_RC128Operand, 64>;
-def vx128mem : X86VMemOperand<VR128, "printxmmwordmem", X86Mem128_RC128Operand, 128>;
-def vx256mem : X86VMemOperand<VR128, "printymmwordmem", X86Mem256_RC128Operand, 256>;
-def vy128mem : X86VMemOperand<VR256, "printxmmwordmem", X86Mem128_RC256Operand, 128>;
-def vy256mem : X86VMemOperand<VR256, "printymmwordmem", X86Mem256_RC256Operand, 256>;
-
-def vx64xmem : X86VMemOperand<VR128X, "printqwordmem", X86Mem64_RC128XOperand, 64>;
-def vx128xmem : X86VMemOperand<VR128X, "printxmmwordmem", X86Mem128_RC128XOperand, 128>;
-def vx256xmem : X86VMemOperand<VR128X, "printymmwordmem", X86Mem256_RC128XOperand, 256>;
-def vy128xmem : X86VMemOperand<VR256X, "printxmmwordmem", X86Mem128_RC256XOperand, 128>;
-def vy256xmem : X86VMemOperand<VR256X, "printymmwordmem", X86Mem256_RC256XOperand, 256>;
-def vy512xmem : X86VMemOperand<VR256X, "printzmmwordmem", X86Mem512_RC256XOperand, 512>;
-def vz256mem : X86VMemOperand<VR512, "printymmwordmem", X86Mem256_RC512Operand, 256>;
-def vz512mem : X86VMemOperand<VR512, "printzmmwordmem", X86Mem512_RC512Operand, 512>;
-
-// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
-// of a plain GPR, so that it doesn't potentially require a REX prefix.
-def ptr_rc_norex : PointerLikeRegClass<2>;
-def ptr_rc_norex_nosp : PointerLikeRegClass<3>;
-
-def i8mem_NOREX : X86MemOperand<"printbytemem", X86Mem8AsmOperand, 8> {
- let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm,
- SEGMENT_REG);
-}
-
-// GPRs available for tailcall.
-// It represents GR32_TC, GR64_TC or GR64_TCW64.
-def ptr_rc_tailcall : PointerLikeRegClass<4>;
-
-// Special i32mem for addresses of load folding tail calls. These are not
-// allowed to use callee-saved registers since they must be scheduled
-// after callee-saved register are popped.
-def i32mem_TC : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32> {
- let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall,
- i32imm, SEGMENT_REG);
-}
-
-// Special i64mem for addresses of load folding tail calls. These are not
-// allowed to use callee-saved registers since they must be scheduled
-// after callee-saved register are popped.
-def i64mem_TC : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64> {
- let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
- ptr_rc_tailcall, i32imm, SEGMENT_REG);
-}
-
-// Special parser to detect 16-bit mode to select 16-bit displacement.
-def X86AbsMem16AsmOperand : AsmOperandClass {
- let Name = "AbsMem16";
- let RenderMethod = "addAbsMemOperands";
- let SuperClasses = [X86AbsMemAsmOperand];
-}
-
-// Branch targets print as pc-relative values.
-class BranchTargetOperand<ValueType ty> : Operand<ty> {
- let OperandType = "OPERAND_PCREL";
- let PrintMethod = "printPCRelImm";
- let ParserMatchClass = X86AbsMemAsmOperand;
-}
-
-def i32imm_brtarget : BranchTargetOperand<i32>;
-def i16imm_brtarget : BranchTargetOperand<i16>;
-
-// 64-bits but only 32 bits are significant, and those bits are treated as being
-// pc relative.
-def i64i32imm_brtarget : BranchTargetOperand<i64>;
-
-def brtarget : BranchTargetOperand<OtherVT>;
-def brtarget8 : BranchTargetOperand<OtherVT>;
-def brtarget16 : BranchTargetOperand<OtherVT> {
- let ParserMatchClass = X86AbsMem16AsmOperand;
-}
-def brtarget32 : BranchTargetOperand<OtherVT>;
-
-let RenderMethod = "addSrcIdxOperands" in {
- def X86SrcIdx8Operand : AsmOperandClass {
- let Name = "SrcIdx8";
- let SuperClasses = [X86Mem8AsmOperand];
- }
- def X86SrcIdx16Operand : AsmOperandClass {
- let Name = "SrcIdx16";
- let SuperClasses = [X86Mem16AsmOperand];
- }
- def X86SrcIdx32Operand : AsmOperandClass {
- let Name = "SrcIdx32";
- let SuperClasses = [X86Mem32AsmOperand];
- }
- def X86SrcIdx64Operand : AsmOperandClass {
- let Name = "SrcIdx64";
- let SuperClasses = [X86Mem64AsmOperand];
- }
-} // RenderMethod = "addSrcIdxOperands"
-
-let RenderMethod = "addDstIdxOperands" in {
- def X86DstIdx8Operand : AsmOperandClass {
- let Name = "DstIdx8";
- let SuperClasses = [X86Mem8AsmOperand];
- }
- def X86DstIdx16Operand : AsmOperandClass {
- let Name = "DstIdx16";
- let SuperClasses = [X86Mem16AsmOperand];
- }
- def X86DstIdx32Operand : AsmOperandClass {
- let Name = "DstIdx32";
- let SuperClasses = [X86Mem32AsmOperand];
- }
- def X86DstIdx64Operand : AsmOperandClass {
- let Name = "DstIdx64";
- let SuperClasses = [X86Mem64AsmOperand];
- }
-} // RenderMethod = "addDstIdxOperands"
-
-let RenderMethod = "addMemOffsOperands" in {
- def X86MemOffs16_8AsmOperand : AsmOperandClass {
- let Name = "MemOffs16_8";
- let SuperClasses = [X86Mem8AsmOperand];
- }
- def X86MemOffs16_16AsmOperand : AsmOperandClass {
- let Name = "MemOffs16_16";
- let SuperClasses = [X86Mem16AsmOperand];
- }
- def X86MemOffs16_32AsmOperand : AsmOperandClass {
- let Name = "MemOffs16_32";
- let SuperClasses = [X86Mem32AsmOperand];
- }
- def X86MemOffs32_8AsmOperand : AsmOperandClass {
- let Name = "MemOffs32_8";
- let SuperClasses = [X86Mem8AsmOperand];
- }
- def X86MemOffs32_16AsmOperand : AsmOperandClass {
- let Name = "MemOffs32_16";
- let SuperClasses = [X86Mem16AsmOperand];
- }
- def X86MemOffs32_32AsmOperand : AsmOperandClass {
- let Name = "MemOffs32_32";
- let SuperClasses = [X86Mem32AsmOperand];
- }
- def X86MemOffs32_64AsmOperand : AsmOperandClass {
- let Name = "MemOffs32_64";
- let SuperClasses = [X86Mem64AsmOperand];
- }
- def X86MemOffs64_8AsmOperand : AsmOperandClass {
- let Name = "MemOffs64_8";
- let SuperClasses = [X86Mem8AsmOperand];
- }
- def X86MemOffs64_16AsmOperand : AsmOperandClass {
- let Name = "MemOffs64_16";
- let SuperClasses = [X86Mem16AsmOperand];
- }
- def X86MemOffs64_32AsmOperand : AsmOperandClass {
- let Name = "MemOffs64_32";
- let SuperClasses = [X86Mem32AsmOperand];
- }
- def X86MemOffs64_64AsmOperand : AsmOperandClass {
- let Name = "MemOffs64_64";
- let SuperClasses = [X86Mem64AsmOperand];
- }
-} // RenderMethod = "addMemOffsOperands"
-
-class X86SrcIdxOperand<string printMethod, AsmOperandClass parserMatchClass>
- : X86MemOperand<printMethod, parserMatchClass> {
- let MIOperandInfo = (ops ptr_rc, SEGMENT_REG);
-}
-
-class X86DstIdxOperand<string printMethod, AsmOperandClass parserMatchClass>
- : X86MemOperand<printMethod, parserMatchClass> {
- let MIOperandInfo = (ops ptr_rc);
-}
-
-def srcidx8 : X86SrcIdxOperand<"printSrcIdx8", X86SrcIdx8Operand>;
-def srcidx16 : X86SrcIdxOperand<"printSrcIdx16", X86SrcIdx16Operand>;
-def srcidx32 : X86SrcIdxOperand<"printSrcIdx32", X86SrcIdx32Operand>;
-def srcidx64 : X86SrcIdxOperand<"printSrcIdx64", X86SrcIdx64Operand>;
-def dstidx8 : X86DstIdxOperand<"printDstIdx8", X86DstIdx8Operand>;
-def dstidx16 : X86DstIdxOperand<"printDstIdx16", X86DstIdx16Operand>;
-def dstidx32 : X86DstIdxOperand<"printDstIdx32", X86DstIdx32Operand>;
-def dstidx64 : X86DstIdxOperand<"printDstIdx64", X86DstIdx64Operand>;
-
-class X86MemOffsOperand<Operand immOperand, string printMethod,
- AsmOperandClass parserMatchClass>
- : X86MemOperand<printMethod, parserMatchClass> {
- let MIOperandInfo = (ops immOperand, SEGMENT_REG);
-}
-
-def offset16_8 : X86MemOffsOperand<i16imm, "printMemOffs8",
- X86MemOffs16_8AsmOperand>;
-def offset16_16 : X86MemOffsOperand<i16imm, "printMemOffs16",
- X86MemOffs16_16AsmOperand>;
-def offset16_32 : X86MemOffsOperand<i16imm, "printMemOffs32",
- X86MemOffs16_32AsmOperand>;
-def offset32_8 : X86MemOffsOperand<i32imm, "printMemOffs8",
- X86MemOffs32_8AsmOperand>;
-def offset32_16 : X86MemOffsOperand<i32imm, "printMemOffs16",
- X86MemOffs32_16AsmOperand>;
-def offset32_32 : X86MemOffsOperand<i32imm, "printMemOffs32",
- X86MemOffs32_32AsmOperand>;
-def offset32_64 : X86MemOffsOperand<i32imm, "printMemOffs64",
- X86MemOffs32_64AsmOperand>;
-def offset64_8 : X86MemOffsOperand<i64imm, "printMemOffs8",
- X86MemOffs64_8AsmOperand>;
-def offset64_16 : X86MemOffsOperand<i64imm, "printMemOffs16",
- X86MemOffs64_16AsmOperand>;
-def offset64_32 : X86MemOffsOperand<i64imm, "printMemOffs32",
- X86MemOffs64_32AsmOperand>;
-def offset64_64 : X86MemOffsOperand<i64imm, "printMemOffs64",
- X86MemOffs64_64AsmOperand>;
-
-def ccode : Operand<i8> {
- let PrintMethod = "printCondCode";
- let OperandNamespace = "X86";
- let OperandType = "OPERAND_COND_CODE";
-}
-
-class ImmSExtAsmOperandClass : AsmOperandClass {
- let SuperClasses = [ImmAsmOperand];
- let RenderMethod = "addImmOperands";
-}
-
-def X86GR32orGR64AsmOperand : AsmOperandClass {
- let Name = "GR32orGR64";
-}
-def GR32orGR64 : RegisterOperand<GR32> {
- let ParserMatchClass = X86GR32orGR64AsmOperand;
-}
-
-def X86GR16orGR32orGR64AsmOperand : AsmOperandClass {
- let Name = "GR16orGR32orGR64";
-}
-def GR16orGR32orGR64 : RegisterOperand<GR16> {
- let ParserMatchClass = X86GR16orGR32orGR64AsmOperand;
-}
-
-def AVX512RCOperand : AsmOperandClass {
- let Name = "AVX512RC";
-}
-def AVX512RC : Operand<i32> {
- let PrintMethod = "printRoundingControl";
- let OperandNamespace = "X86";
- let OperandType = "OPERAND_ROUNDING_CONTROL";
- let ParserMatchClass = AVX512RCOperand;
-}
-
-// Sign-extended immediate classes. We don't need to define the full lattice
-// here because there is no instruction with an ambiguity between ImmSExti64i32
-// and ImmSExti32i8.
-//
-// The strange ranges come from the fact that the assembler always works with
-// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
-// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
-
-// [0, 0x7FFFFFFF] |
-// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
-def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
- let Name = "ImmSExti64i32";
-}
-
-// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] |
-// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
-def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
- let Name = "ImmSExti16i8";
- let SuperClasses = [ImmSExti64i32AsmOperand];
-}
-
-// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] |
-// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
-def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
- let Name = "ImmSExti32i8";
-}
-
-// [0, 0x0000007F] |
-// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
-def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
- let Name = "ImmSExti64i8";
- let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand,
- ImmSExti64i32AsmOperand];
-}
-
-// 4-bit immediate used by some XOP instructions
-// [0, 0xF]
-def ImmUnsignedi4AsmOperand : AsmOperandClass {
- let Name = "ImmUnsignedi4";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "InvalidImmUnsignedi4";
-}
-
-// Unsigned immediate used by SSE/AVX instructions
-// [0, 0xFF]
-// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
-def ImmUnsignedi8AsmOperand : AsmOperandClass {
- let Name = "ImmUnsignedi8";
- let RenderMethod = "addImmOperands";
-}
-
-// A couple of more descriptive operand definitions.
-// 16-bits but only 8 bits are significant.
-def i16i8imm : Operand<i16> {
- let ParserMatchClass = ImmSExti16i8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-// 32-bits but only 8 bits are significant.
-def i32i8imm : Operand<i32> {
- let ParserMatchClass = ImmSExti32i8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-// 64-bits but only 32 bits are significant.
-def i64i32imm : Operand<i64> {
- let ParserMatchClass = ImmSExti64i32AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-// 64-bits but only 8 bits are significant.
-def i64i8imm : Operand<i64> {
- let ParserMatchClass = ImmSExti64i8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-// Unsigned 4-bit immediate used by some XOP instructions.
-def u4imm : Operand<i8> {
- let PrintMethod = "printU8Imm";
- let ParserMatchClass = ImmUnsignedi4AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-// Unsigned 8-bit immediate used by SSE/AVX instructions.
-def u8imm : Operand<i8> {
- let PrintMethod = "printU8Imm";
- let ParserMatchClass = ImmUnsignedi8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-// 16-bit immediate but only 8-bits are significant and they are unsigned.
-// Used by BT instructions.
-def i16u8imm : Operand<i16> {
- let PrintMethod = "printU8Imm";
- let ParserMatchClass = ImmUnsignedi8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-// 32-bit immediate but only 8-bits are significant and they are unsigned.
-// Used by some SSE/AVX instructions that use intrinsics.
-def i32u8imm : Operand<i32> {
- let PrintMethod = "printU8Imm";
- let ParserMatchClass = ImmUnsignedi8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-// 64-bit immediate but only 8-bits are significant and they are unsigned.
-// Used by BT instructions.
-def i64u8imm : Operand<i64> {
- let PrintMethod = "printU8Imm";
- let ParserMatchClass = ImmUnsignedi8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def lea64_32mem : Operand<i32> {
- let PrintMethod = "printMemReference";
- let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, SEGMENT_REG);
- let ParserMatchClass = X86MemAsmOperand;
-}
-
-// Memory operands that use 64-bit pointers in both ILP32 and LP64.
-def lea64mem : Operand<i64> {
- let PrintMethod = "printMemReference";
- let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, SEGMENT_REG);
- let ParserMatchClass = X86MemAsmOperand;
-}
-
-let RenderMethod = "addMaskPairOperands" in {
- def VK1PairAsmOperand : AsmOperandClass { let Name = "VK1Pair"; }
- def VK2PairAsmOperand : AsmOperandClass { let Name = "VK2Pair"; }
- def VK4PairAsmOperand : AsmOperandClass { let Name = "VK4Pair"; }
- def VK8PairAsmOperand : AsmOperandClass { let Name = "VK8Pair"; }
- def VK16PairAsmOperand : AsmOperandClass { let Name = "VK16Pair"; }
-}
-
-def VK1Pair : RegisterOperand<VK1PAIR, "printVKPair"> {
- let ParserMatchClass = VK1PairAsmOperand;
-}
-
-def VK2Pair : RegisterOperand<VK2PAIR, "printVKPair"> {
- let ParserMatchClass = VK2PairAsmOperand;
-}
-
-def VK4Pair : RegisterOperand<VK4PAIR, "printVKPair"> {
- let ParserMatchClass = VK4PairAsmOperand;
-}
-
-def VK8Pair : RegisterOperand<VK8PAIR, "printVKPair"> {
- let ParserMatchClass = VK8PairAsmOperand;
-}
-
-def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> {
- let ParserMatchClass = VK16PairAsmOperand;
-}
+include "X86InstrOperands.td"
//===----------------------------------------------------------------------===//
-// X86 Complex Pattern Definitions.
+// X86 Predicate Definitions.
//
-
-// Define X86-specific addressing mode.
-def addr : ComplexPattern<iPTR, 5, "selectAddr", [], [SDNPWantParent]>;
-def lea32addr : ComplexPattern<i32, 5, "selectLEAAddr",
- [add, sub, mul, X86mul_imm, shl, or, xor, frameindex],
- []>;
-// In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
-def lea64_32addr : ComplexPattern<i32, 5, "selectLEA64_32Addr",
- [add, sub, mul, X86mul_imm, shl, or, xor,
- frameindex, X86WrapperRIP],
- []>;
-
-def tls32addr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
- [tglobaltlsaddr], []>;
-
-def tls32baseaddr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
- [tglobaltlsaddr], []>;
-
-def lea64addr : ComplexPattern<i64, 5, "selectLEAAddr",
- [add, sub, mul, X86mul_imm, shl, or, xor, frameindex,
- X86WrapperRIP], []>;
-
-def tls64addr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
- [tglobaltlsaddr], []>;
-
-def tls64baseaddr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
- [tglobaltlsaddr], []>;
-
-def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr", [],[SDNPWantParent]>;
-
-// A relocatable immediate is an operand that can be relocated by the linker to
-// an immediate, such as a regular symbol in non-PIC code.
-def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
- [X86Wrapper], [], 0>;
-
-//===----------------------------------------------------------------------===//
-// X86 Instruction Predicate Definitions.
-def TruePredicate : Predicate<"true">;
-
-def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
-def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
-
-def HasNOPL : Predicate<"Subtarget->hasNOPL()">;
-def HasMMX : Predicate<"Subtarget->hasMMX()">;
-def Has3DNow : Predicate<"Subtarget->hasThreeDNow()">;
-def Has3DNowA : Predicate<"Subtarget->hasThreeDNowA()">;
-def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
-def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
-def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
-def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
-def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
-def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
-def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
-def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
-def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
-def NoSSE41 : Predicate<"!Subtarget->hasSSE41()">;
-def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
-def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
-def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
-def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
-def NoAVX : Predicate<"!Subtarget->hasAVX()">;
-def HasAVX : Predicate<"Subtarget->hasAVX()">;
-def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
-def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
-def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
-def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
-def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
-def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
-def HasCDI : Predicate<"Subtarget->hasCDI()">;
-def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">;
-def HasPFI : Predicate<"Subtarget->hasPFI()">;
-def HasERI : Predicate<"Subtarget->hasERI()">;
-def HasDQI : Predicate<"Subtarget->hasDQI()">;
-def NoDQI : Predicate<"!Subtarget->hasDQI()">;
-def HasBWI : Predicate<"Subtarget->hasBWI()">;
-def NoBWI : Predicate<"!Subtarget->hasBWI()">;
-def HasVLX : Predicate<"Subtarget->hasVLX()">;
-def NoVLX : Predicate<"!Subtarget->hasVLX()">;
-def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
-def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
-def HasPKU : Predicate<"Subtarget->hasPKU()">;
-def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
-def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
-def HasBF16 : Predicate<"Subtarget->hasBF16()">;
-def HasFP16 : Predicate<"Subtarget->hasFP16()">;
-def HasAVXVNNIINT16 : Predicate<"Subtarget->hasAVXVNNIINT16()">;
-def HasAVXVNNIINT8 : Predicate<"Subtarget->hasAVXVNNIINT8()">;
-def HasAVXVNNI : Predicate <"Subtarget->hasAVXVNNI()">;
-def NoVLX_Or_NoVNNI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVNNI()">;
-
-def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
-def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
-def HasAES : Predicate<"Subtarget->hasAES()">;
-def HasVAES : Predicate<"Subtarget->hasVAES()">;
-def NoVLX_Or_NoVAES : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVAES()">;
-def HasFXSR : Predicate<"Subtarget->hasFXSR()">;
-def HasX87 : Predicate<"Subtarget->hasX87()">;
-def HasXSAVE : Predicate<"Subtarget->hasXSAVE()">;
-def HasXSAVEOPT : Predicate<"Subtarget->hasXSAVEOPT()">;
-def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">;
-def HasXSAVES : Predicate<"Subtarget->hasXSAVES()">;
-def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">;
-def NoVLX_Or_NoVPCLMULQDQ :
- Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVPCLMULQDQ()">;
-def HasVPCLMULQDQ : Predicate<"Subtarget->hasVPCLMULQDQ()">;
-def HasGFNI : Predicate<"Subtarget->hasGFNI()">;
-def HasFMA : Predicate<"Subtarget->hasFMA()">;
-def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
-def NoFMA4 : Predicate<"!Subtarget->hasFMA4()">;
-def HasXOP : Predicate<"Subtarget->hasXOP()">;
-def HasTBM : Predicate<"Subtarget->hasTBM()">;
-def NoTBM : Predicate<"!Subtarget->hasTBM()">;
-def HasLWP : Predicate<"Subtarget->hasLWP()">;
-def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
-def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
-def HasF16C : Predicate<"Subtarget->hasF16C()">;
-def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
-def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
-def HasBMI : Predicate<"Subtarget->hasBMI()">;
-def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
-def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">;
-def HasVBMI : Predicate<"Subtarget->hasVBMI()">;
-def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">;
-def HasIFMA : Predicate<"Subtarget->hasIFMA()">;
-def HasAVXIFMA : Predicate<"Subtarget->hasAVXIFMA()">;
-def NoVLX_Or_NoIFMA : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasIFMA()">;
-def HasRTM : Predicate<"Subtarget->hasRTM()">;
-def HasADX : Predicate<"Subtarget->hasADX()">;
-def HasSHA : Predicate<"Subtarget->hasSHA()">;
-def HasSHA512 : Predicate<"Subtarget->hasSHA512()">;
-def HasSGX : Predicate<"Subtarget->hasSGX()">;
-def HasSM3 : Predicate<"Subtarget->hasSM3()">;
-def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
-def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
-def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
-def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
-def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">;
-def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
-def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
-def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
-def HasLAHFSAHF64 : Predicate<"Subtarget->hasLAHFSAHF64()">;
-def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
-def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
-def HasCLDEMOTE : Predicate<"Subtarget->hasCLDEMOTE()">;
-def HasMOVDIRI : Predicate<"Subtarget->hasMOVDIRI()">;
-def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">;
-def HasPTWRITE : Predicate<"Subtarget->hasPTWRITE()">;
-def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
-def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
-def HasSHSTK : Predicate<"Subtarget->hasSHSTK()">;
-def HasSM4 : Predicate<"Subtarget->hasSM4()">;
-def HasCLFLUSH : Predicate<"Subtarget->hasCLFLUSH()">;
-def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
-def HasCLWB : Predicate<"Subtarget->hasCLWB()">;
-def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">;
-def HasRDPID : Predicate<"Subtarget->hasRDPID()">;
-def HasRDPRU : Predicate<"Subtarget->hasRDPRU()">;
-def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">;
-def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">;
-def HasCX8 : Predicate<"Subtarget->hasCX8()">;
-def HasCX16 : Predicate<"Subtarget->hasCX16()">;
-def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">;
-def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">;
-def HasAMXFP16 : Predicate<"Subtarget->hasAMXFP16()">;
-def HasCMPCCXADD : Predicate<"Subtarget->hasCMPCCXADD()">;
-def HasAVXNECONVERT : Predicate<"Subtarget->hasAVXNECONVERT()">;
-def HasKL : Predicate<"Subtarget->hasKL()">;
-def HasRAOINT : Predicate<"Subtarget->hasRAOINT()">;
-def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">;
-def HasHRESET : Predicate<"Subtarget->hasHRESET()">;
-def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
-def HasTSXLDTRK : Predicate<"Subtarget->hasTSXLDTRK()">;
-def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">;
-def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">;
-def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
-def HasAMXCOMPLEX : Predicate<"Subtarget->hasAMXCOMPLEX()">;
-def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
-def HasCRC32 : Predicate<"Subtarget->hasCRC32()">;
-
-def HasX86_64 : Predicate<"Subtarget->hasX86_64()">;
-def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
- AssemblerPredicate<(all_of (not Is64Bit)), "Not 64-bit mode">;
-def In64BitMode : Predicate<"Subtarget->is64Bit()">,
- AssemblerPredicate<(all_of Is64Bit), "64-bit mode">;
-def IsLP64 : Predicate<"Subtarget->isTarget64BitLP64()">;
-def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">;
-def In16BitMode : Predicate<"Subtarget->is16Bit()">,
- AssemblerPredicate<(all_of Is16Bit), "16-bit mode">;
-def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
- AssemblerPredicate<(all_of (not Is16Bit)), "Not 16-bit mode">;
-def In32BitMode : Predicate<"Subtarget->is32Bit()">,
- AssemblerPredicate<(all_of Is32Bit), "32-bit mode">;
-def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
-def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
-def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
- "Subtarget->getFrameLowering()->hasFP(*MF)"> {
- let RecomputePerFunction = 1;
-}
-def IsPS : Predicate<"Subtarget->isTargetPS()">;
-def NotPS : Predicate<"!Subtarget->isTargetPS()">;
-def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
-def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
-def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
-def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
-def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
- "TM.getCodeModel() == CodeModel::Kernel">;
-def IsNotPIC : Predicate<"!TM.isPositionIndependent()">;
-
-// We could compute these on a per-module basis but doing so requires accessing
-// the Function object through the <Target>Subtarget and objections were raised
-// to that (see post-commit review comments for r301750).
-let RecomputePerFunction = 1 in {
- def OptForSize : Predicate<"shouldOptForSize(MF)">;
- def OptForMinSize : Predicate<"MF->getFunction().hasMinSize()">;
- def OptForSpeed : Predicate<"!shouldOptForSize(MF)">;
- def UseIncDec : Predicate<"!Subtarget->slowIncDec() || "
- "shouldOptForSize(MF)">;
- def NoSSE41_Or_OptForSize : Predicate<"shouldOptForSize(MF) || "
- "!Subtarget->hasSSE41()">;
-}
-
-def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
-def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">;
-def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
-def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
-def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
-def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
-def HasFSRM : Predicate<"Subtarget->hasFSRM()">;
-def HasMFence : Predicate<"Subtarget->hasMFence()">;
-def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
-def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
+include "X86InstrPredicates.td"
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
//
-
include "X86InstrFormats.td"
//===----------------------------------------------------------------------===//
-// Pattern fragments.
+// X86 Instruction utilities.
//
-
-// X86 specific condition code. These correspond to CondCode in
-// X86InstrInfo.h. They must be kept in synch.
-def X86_COND_O : PatLeaf<(i8 0)>;
-def X86_COND_NO : PatLeaf<(i8 1)>;
-def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C
-def X86_COND_AE : PatLeaf<(i8 3)>; // alt. COND_NC
-def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z
-def X86_COND_NE : PatLeaf<(i8 5)>; // alt. COND_NZ
-def X86_COND_BE : PatLeaf<(i8 6)>; // alt. COND_NA
-def X86_COND_A : PatLeaf<(i8 7)>; // alt. COND_NBE
-def X86_COND_S : PatLeaf<(i8 8)>;
-def X86_COND_NS : PatLeaf<(i8 9)>;
-def X86_COND_P : PatLeaf<(i8 10)>; // alt. COND_PE
-def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO
-def X86_COND_L : PatLeaf<(i8 12)>; // alt. COND_NGE
-def X86_COND_GE : PatLeaf<(i8 13)>; // alt. COND_NL
-def X86_COND_LE : PatLeaf<(i8 14)>; // alt. COND_NG
-def X86_COND_G : PatLeaf<(i8 15)>; // alt. COND_NLE
-
-def i16immSExt8 : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>;
-def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
-def i64immSExt8 : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
-def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
-def i64timmSExt32 : TImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
-
-def i16relocImmSExt8 : PatLeaf<(i16 relocImm), [{
- return isSExtAbsoluteSymbolRef(8, N);
-}]>;
-def i32relocImmSExt8 : PatLeaf<(i32 relocImm), [{
- return isSExtAbsoluteSymbolRef(8, N);
-}]>;
-def i64relocImmSExt8 : PatLeaf<(i64 relocImm), [{
- return isSExtAbsoluteSymbolRef(8, N);
-}]>;
-def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
- return isSExtAbsoluteSymbolRef(32, N);
-}]>;
-
-// If we have multiple users of an immediate, it's much smaller to reuse
-// the register, rather than encode the immediate in every instruction.
-// This has the risk of increasing register pressure from stretched live
-// ranges, however, the immediates should be trivial to rematerialize by
-// the RA in the event of high register pressure.
-// TODO : This is currently enabled for stores and binary ops. There are more
-// cases for which this can be enabled, though this catches the bulk of the
-// issues.
-// TODO2 : This should really also be enabled under O2, but there's currently
-// an issue with RA where we don't pull the constants into their users
-// when we rematerialize them. I'll follow-up on enabling O2 after we fix that
-// issue.
-// TODO3 : This is currently limited to single basic blocks (DAG creation
-// pulls block immediates to the top and merges them if necessary).
-// Eventually, it would be nice to allow ConstantHoisting to merge constants
-// globally for potentially added savings.
-//
-def imm_su : PatLeaf<(imm), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def i64immSExt32_su : PatLeaf<(i64immSExt32), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-
-def relocImm8_su : PatLeaf<(i8 relocImm), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def relocImm16_su : PatLeaf<(i16 relocImm), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def relocImm32_su : PatLeaf<(i32 relocImm), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-
-def i16relocImmSExt8_su : PatLeaf<(i16relocImmSExt8), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def i32relocImmSExt8_su : PatLeaf<(i32relocImmSExt8), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-
-def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def i64immSExt8_su : PatLeaf<(i64immSExt8), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-
-// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
-// unsigned field.
-def i64immZExt32 : ImmLeaf<i64, [{ return isUInt<32>(Imm); }]>;
-
-def i64immZExt32SExt8 : ImmLeaf<i64, [{
- return isUInt<32>(Imm) && isInt<8>(static_cast<int32_t>(Imm));
-}]>;
-
-// Helper fragments for loads.
-
-// It's safe to fold a zextload/extload from i1 as a regular i8 load. The
-// upper bits are guaranteed to be zero and we were going to emit a MOV8rm
-// which might get folded during peephole anyway.
-def loadi8 : PatFrag<(ops node:$ptr), (i8 (unindexedload node:$ptr)), [{
- LoadSDNode *LD = cast<LoadSDNode>(N);
- ISD::LoadExtType ExtType = LD->getExtensionType();
- return ExtType == ISD::NON_EXTLOAD || ExtType == ISD::EXTLOAD ||
- ExtType == ISD::ZEXTLOAD;
-}]>;
-
-// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
-// known to be 32-bit aligned or better. Ditto for i8 to i16.
-def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
- LoadSDNode *LD = cast<LoadSDNode>(N);
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType == ISD::NON_EXTLOAD)
- return true;
- if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad)
- return LD->getAlign() >= 2 && LD->isSimple();
- return false;
-}]>;
-
-def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
- LoadSDNode *LD = cast<LoadSDNode>(N);
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType == ISD::NON_EXTLOAD)
- return true;
- if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad)
- return LD->getAlign() >= 4 && LD->isSimple();
- return false;
-}]>;
-
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
-def loadf16 : PatFrag<(ops node:$ptr), (f16 (load node:$ptr))>;
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
-def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;
-def alignedloadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{
- LoadSDNode *Ld = cast<LoadSDNode>(N);
- return Ld->getAlign() >= Ld->getMemoryVT().getStoreSize();
-}]>;
-def memopf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{
- LoadSDNode *Ld = cast<LoadSDNode>(N);
- return Subtarget->hasSSEUnalignedMem() ||
- Ld->getAlign() >= Ld->getMemoryVT().getStoreSize();
-}]>;
-
-def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
-def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
-def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
-def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
-def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
-def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
-
-def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
-def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
-def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
-def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
-def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
-def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
-def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
-def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
-def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
-def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
-
-def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
-def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
-def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
-def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
-def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
-def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
-def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
-def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
-def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
-
-// We can treat an i8/i16 extending load to i64 as a 32 bit load if its known
-// to be 4 byte aligned or better.
-def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [{
- LoadSDNode *LD = cast<LoadSDNode>(N);
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType != ISD::EXTLOAD)
- return false;
- if (LD->getMemoryVT() == MVT::i32)
- return true;
-
- return LD->getAlign() >= 4 && LD->isSimple();
-}]>;
-
-// binary op with only one user
-class binop_oneuse<SDPatternOperator operator>
- : PatFrag<(ops node:$A, node:$B),
- (operator node:$A, node:$B), [{
- return N->hasOneUse();
-}]>;
-
-def add_su : binop_oneuse<add>;
-def and_su : binop_oneuse<and>;
-def srl_su : binop_oneuse<srl>;
-
-// unary op with only one user
-class unop_oneuse<SDPatternOperator operator>
- : PatFrag<(ops node:$A),
- (operator node:$A), [{
- return N->hasOneUse();
-}]>;
-
-
-def ineg_su : unop_oneuse<ineg>;
-def trunc_su : unop_oneuse<trunc>;
-
-//===----------------------------------------------------------------------===//
-// X86 Type infomation definitions
-//===----------------------------------------------------------------------===//
-
-/// X86TypeInfo - This is a bunch of information that describes relevant X86
-/// information about value types. For example, it can tell you what the
-/// register class and preferred load to use.
-class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
- PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
- Operand immoperand, SDPatternOperator immoperator,
- SDPatternOperator immnosuoperator, Operand imm8operand,
- SDPatternOperator imm8operator, SDPatternOperator imm8nosuoperator,
- bit hasOddOpcode, OperandSize opSize,
- bit hasREX_W> {
- /// VT - This is the value type itself.
- ValueType VT = vt;
-
- /// InstrSuffix - This is the suffix used on instructions with this type. For
- /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
- string InstrSuffix = instrsuffix;
-
- /// RegClass - This is the register class associated with this type. For
- /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
- RegisterClass RegClass = regclass;
-
- /// LoadNode - This is the load node associated with this type. For
- /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
- PatFrag LoadNode = loadnode;
-
- /// MemOperand - This is the memory operand associated with this type. For
- /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
- X86MemOperand MemOperand = memoperand;
-
- /// ImmEncoding - This is the encoding of an immediate of this type. For
- /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
- /// since the immediate fields of i64 instructions is a 32-bit sign extended
- /// value.
- ImmType ImmEncoding = immkind;
-
- /// ImmOperand - This is the operand kind of an immediate of this type. For
- /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
- /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
- /// extended value.
- Operand ImmOperand = immoperand;
-
- /// ImmOperator - This is the operator that should be used to match an
- /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
- SDPatternOperator ImmOperator = immoperator;
-
- SDPatternOperator ImmNoSuOperator = immnosuoperator;
-
- /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
- /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
- /// only used for instructions that have a sign-extended imm8 field form.
- Operand Imm8Operand = imm8operand;
-
- /// Imm8Operator - This is the operator that should be used to match an 8-bit
- /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
- SDPatternOperator Imm8Operator = imm8operator;
-
- SDPatternOperator Imm8NoSuOperator = imm8nosuoperator;
-
- /// HasOddOpcode - This bit is true if the instruction should have an odd (as
- /// opposed to even) opcode. Operations on i8 are usually even, operations on
- /// other datatypes are odd.
- bit HasOddOpcode = hasOddOpcode;
-
- /// OpSize - Selects whether the instruction needs a 0x66 prefix based on
- /// 16-bit vs 32-bit mode. i8/i64 set this to OpSizeFixed. i16 sets this
- /// to Opsize16. i32 sets this to OpSize32.
- OperandSize OpSize = opSize;
-
- /// HasREX_W - This bit is set to true if the instruction should have
- /// the 0x40 REX prefix. This is set for i64 types.
- bit HasREX_W = hasREX_W;
-}
-
-def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
-
-def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem, Imm8, i8imm,
- imm_su, imm, i8imm, invalid_node, invalid_node,
- 0, OpSizeFixed, 0>;
-def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem, Imm16, i16imm,
- imm_su, imm, i16i8imm, i16immSExt8_su, i16immSExt8,
- 1, OpSize16, 0>;
-def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem, Imm32, i32imm,
- imm_su, imm, i32i8imm, i32immSExt8_su, i32immSExt8,
- 1, OpSize32, 0>;
-def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, Imm32S, i64i32imm,
- i64immSExt32_su, i64immSExt32, i64i8imm, i64immSExt8_su,
- i64immSExt8, 1, OpSizeFixed, 1>;
-
-/// ITy - This instruction base class takes the type info for the instruction.
-/// Using this, it:
-/// 1. Concatenates together the instruction mnemonic with the appropriate
-/// suffix letter, a tab, and the arguments.
-/// 2. Infers whether the instruction should have a 0x66 prefix byte.
-/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
-/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
-/// or 1 (for i16,i32,i64 operations).
-class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
- string mnemonic, string args, list<dag> pattern>
- : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
- opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
- f, outs, ins,
- !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
-
- // Infer instruction prefixes from type info.
- let OpSize = typeinfo.OpSize;
- let hasREX_W = typeinfo.HasREX_W;
-}
+include "X86InstrUtils.td"
//===----------------------------------------------------------------------===//
// Subsystems.
@@ -1424,8 +52,6 @@ include "X86InstrShiftRotate.td"
// X87 Floating Point Stack.
include "X86InstrFPStack.td"
-// SIMD support (SSE, MMX and AVX)
-include "X86InstrFragmentsSIMD.td"
// FMA - Fused Multiply-Add support (requires FMA)
include "X86InstrFMA.td"
@@ -1442,10 +68,8 @@ include "X86Instr3DNow.td"
include "X86InstrVMX.td"
include "X86InstrSVM.td"
include "X86InstrSNP.td"
-
include "X86InstrTSX.td"
include "X86InstrSGX.td"
-
include "X86InstrTDX.td"
// Key Locker instructions
@@ -1466,4 +90,3 @@ include "X86InstrVecCompiler.td"
// Assembler mnemonic/instruction aliases
include "X86InstrAsmAlias.td"
-
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td
index 88e7a388713f..2ea10e317e12 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td
@@ -161,6 +161,15 @@ let isCodeGenOnly = 1, ForceDisassemble = 1 in {
def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
OpSize32, Requires<[In64BitMode]>;
} // isCodeGenOnly = 1, ForceDisassemble = 1
+def POPP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "popp\t$reg", []>,
+ REX_W, ExplicitREX2Prefix, Requires<[In64BitMode]>;
+def POP2: I<0x8F, MRM0r, (outs GR64:$reg1, GR64:$reg2), (ins),
+ "pop2\t{$reg2, $reg1|$reg1, $reg2}",
+ []>, EVEX_4V, EVEX_B, T_MAP4PS;
+def POP2P: I<0x8F, MRM0r, (outs GR64:$reg1, GR64:$reg2), (ins),
+ "pop2p\t{$reg2, $reg1|$reg1, $reg2}",
+ []>, EVEX_4V, EVEX_B, T_MAP4PS, REX_W;
+
} // mayLoad, SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>,
@@ -173,6 +182,14 @@ let isCodeGenOnly = 1, ForceDisassemble = 1 in {
def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
OpSize32, Requires<[In64BitMode]>;
} // isCodeGenOnly = 1, ForceDisassemble = 1
+def PUSHP64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "pushp\t$reg", []>,
+ REX_W, ExplicitREX2Prefix, Requires<[In64BitMode]>;
+def PUSH2: I<0xFF, MRM6r, (outs), (ins GR64:$reg1, GR64:$reg2),
+ "push2\t{$reg2, $reg1|$reg1, $reg2}",
+ []>, EVEX_4V, EVEX_B, T_MAP4PS;
+def PUSH2P: I<0xFF, MRM6r, (outs), (ins GR64:$reg1, GR64:$reg2),
+ "push2p\t{$reg2, $reg1|$reg1, $reg2}",
+ []>, EVEX_4V, EVEX_B, T_MAP4PS, REX_W;
} // mayStore, SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
@@ -1197,19 +1214,19 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
RegisterClass RC, X86MemOperand x86memop,
- X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched, string Suffix = ""> {
let hasSideEffects = 0 in {
- def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
- !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[sched]>;
+ def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
+ !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
+ T8PS, VEX_4V, Sched<[sched]>;
let mayLoad = 1 in
- def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[sched.Folded]>;
+ def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
+ T8PS, VEX_4V, Sched<[sched.Folded]>;
}
}
-let Predicates = [HasBMI], Defs = [EFLAGS] in {
+let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>;
defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W;
defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>;
@@ -1218,25 +1235,14 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W;
}
-//===----------------------------------------------------------------------===//
-// Pattern fragments to auto generate BMI instructions.
-//===----------------------------------------------------------------------===//
-
-def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86or_flag node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
-def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86xor_flag node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
-def and_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86and_flag node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
+let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
+ defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
+ defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
+ defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
+ defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
+ defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
+ defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
+}
let Predicates = [HasBMI] in {
// FIXME(1): patterns for the load versions are not implemented
@@ -1275,56 +1281,50 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
}
-multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
- X86MemOperand x86memop, SDNode OpNode,
- PatFrag ld_frag, X86FoldableSchedWrite Sched> {
- def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
- T8PS, VEX, Sched<[Sched]>;
- def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
- (implicit EFLAGS)]>, T8PS, VEX,
- Sched<[Sched.Folded,
- // x86memop:$src1
- ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault,
- // RC:$src2
- Sched.ReadAfterFold]>;
+multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, SDPatternOperator OpNode,
+ PatFrag ld_frag, X86FoldableSchedWrite Sched,
+ string Suffix = ""> {
+ def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
+ T8PS, VEX, Sched<[Sched]>;
+let mayLoad = 1 in
+ def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
+ (implicit EFLAGS)]>, T8PS, VEX,
+ Sched<[Sched.Folded,
+ // x86memop:$src1
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault,
+ // RC:$src2
+ Sched.ReadAfterFold]>;
}
-let Predicates = [HasBMI], Defs = [EFLAGS] in {
- defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem,
- X86bextr, loadi32, WriteBEXTR>;
- defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem,
- X86bextr, loadi64, WriteBEXTR>, REX_W;
-}
-
-multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
- X86MemOperand x86memop, SDNode Int,
- PatFrag ld_frag, X86FoldableSchedWrite Sched> {
- def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
- T8PS, VEX, Sched<[Sched]>;
- def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
- (implicit EFLAGS)]>, T8PS, VEX,
- Sched<[Sched.Folded,
- // x86memop:$src1
- ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault,
- // RC:$src2
- Sched.ReadAfterFold]>;
-}
-
-let Predicates = [HasBMI2], Defs = [EFLAGS] in {
- defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
- X86bzhi, loadi32, WriteBZHI>;
- defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
- X86bzhi, loadi64, WriteBZHI>, REX_W;
+let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
+ defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
+ X86bextr, loadi32, WriteBEXTR>;
+ defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
+ X86bextr, loadi64, WriteBEXTR>, REX_W;
+}
+let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in {
+ defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
+ X86bzhi, loadi32, WriteBZHI>;
+ defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
+ X86bzhi, loadi64, WriteBZHI>, REX_W;
+}
+let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
+ defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
+ X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX;
+ defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
+ X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W;
+}
+let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in {
+ defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
+ X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX;
+ defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
+ X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W;
}
def CountTrailingOnes : SDNodeXForm<imm, [{
@@ -1366,19 +1366,19 @@ let Predicates = [HasBMI2, NoTBM] in {
}
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
- X86MemOperand x86memop, SDNode OpNode,
- PatFrag ld_frag> {
- def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
- VEX_4V, Sched<[WriteALU]>;
- def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
- VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
-}
-
-let Predicates = [HasBMI2] in {
+ X86MemOperand x86memop, SDPatternOperator OpNode,
+ PatFrag ld_frag, string Suffix = ""> {
+ def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
+ VEX_4V, Sched<[WriteALU]>;
+ def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
+ VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
+}
+
+let Predicates = [HasBMI2, NoEGPR] in {
defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
X86pdep, loadi32>, T8XD;
defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
@@ -1389,6 +1389,17 @@ let Predicates = [HasBMI2] in {
X86pext, loadi64>, T8XS, REX_W;
}
+let Predicates = [HasBMI2, HasEGPR] in {
+ defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
+ X86pdep, loadi32, "_EVEX">, T8XD, EVEX;
+ defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
+ X86pdep, loadi64, "_EVEX">, T8XD, REX_W, EVEX;
+ defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
+ X86pext, loadi32, "_EVEX">, T8XS, EVEX;
+ defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
+ X86pext, loadi64, "_EVEX">, T8XS, REX_W, EVEX;
+}
+
//===----------------------------------------------------------------------===//
// Lightweight Profiling Instructions
@@ -1486,11 +1497,19 @@ let SchedRW = [WriteStore] in {
def MOVDIRI32 : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movdiri\t{$src, $dst|$dst, $src}",
[(int_x86_directstore32 addr:$dst, GR32:$src)]>,
- T8PS, Requires<[HasMOVDIRI]>;
+ T8PS, Requires<[HasMOVDIRI, NoEGPR]>;
def MOVDIRI64 : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movdiri\t{$src, $dst|$dst, $src}",
[(int_x86_directstore64 addr:$dst, GR64:$src)]>,
- T8PS, Requires<[In64BitMode, HasMOVDIRI]>;
+ T8PS, Requires<[In64BitMode, HasMOVDIRI, NoEGPR]>;
+def MOVDIRI32_EVEX : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movdiri\t{$src, $dst|$dst, $src}",
+ [(int_x86_directstore32 addr:$dst, GR32:$src)]>,
+ EVEX_NoCD8, T_MAP4PS, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>;
+def MOVDIRI64_EVEX : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movdiri\t{$src, $dst|$dst, $src}",
+ [(int_x86_directstore64 addr:$dst, GR64:$src)]>,
+ EVEX_NoCD8, T_MAP4PS, Requires<[In64BitMode, HasMOVDIRI, HasEGPR]>;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -1503,11 +1522,19 @@ def MOVDIR64B16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src),
def MOVDIR64B32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src),
"movdir64b\t{$src, $dst|$dst, $src}",
[(int_x86_movdir64b GR32:$dst, addr:$src)]>,
- T8PD, AdSize32, Requires<[HasMOVDIR64B]>;
+ T8PD, AdSize32, Requires<[HasMOVDIR64B, NoEGPR]>;
def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src),
"movdir64b\t{$src, $dst|$dst, $src}",
[(int_x86_movdir64b GR64:$dst, addr:$src)]>,
- T8PD, AdSize64, Requires<[HasMOVDIR64B, In64BitMode]>;
+ T8PD, AdSize64, Requires<[HasMOVDIR64B, NoEGPR, In64BitMode]>;
+def MOVDIR64B32_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src),
+ "movdir64b\t{$src, $dst|$dst, $src}",
+ [(int_x86_movdir64b GR32:$dst, addr:$src)]>,
+ EVEX_NoCD8, T_MAP4PD, AdSize32, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>;
+def MOVDIR64B64_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src),
+ "movdir64b\t{$src, $dst|$dst, $src}",
+ [(int_x86_movdir64b GR64:$dst, addr:$src)]>,
+ EVEX_NoCD8, T_MAP4PD, AdSize64, Requires<[HasMOVDIR64B, HasEGPR, In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrOperands.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrOperands.td
new file mode 100644
index 000000000000..761458f9cffc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrOperands.td
@@ -0,0 +1,497 @@
+//===------- X86InstrOperands.td - X86 Operand Definitions --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
+// the index operand of an address, to conform to x86 encoding restrictions.
+def ptr_rc_nosp : PointerLikeRegClass<1>;
+
+// *mem - Operand definitions for the funky X86 addressing mode operands.
+//
+def X86MemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+}
+let RenderMethod = "addMemOperands", SuperClasses = [X86MemAsmOperand] in {
+ def X86Mem8AsmOperand : AsmOperandClass { let Name = "Mem8"; }
+ def X86Mem16AsmOperand : AsmOperandClass { let Name = "Mem16"; }
+ def X86Mem32AsmOperand : AsmOperandClass { let Name = "Mem32"; }
+ def X86Mem64AsmOperand : AsmOperandClass { let Name = "Mem64"; }
+ def X86Mem80AsmOperand : AsmOperandClass { let Name = "Mem80"; }
+ def X86Mem128AsmOperand : AsmOperandClass { let Name = "Mem128"; }
+ def X86Mem256AsmOperand : AsmOperandClass { let Name = "Mem256"; }
+ def X86Mem512AsmOperand : AsmOperandClass { let Name = "Mem512"; }
+ // Gather mem operands
+ def X86Mem64_RC128Operand : AsmOperandClass { let Name = "Mem64_RC128"; }
+ def X86Mem128_RC128Operand : AsmOperandClass { let Name = "Mem128_RC128"; }
+ def X86Mem256_RC128Operand : AsmOperandClass { let Name = "Mem256_RC128"; }
+ def X86Mem128_RC256Operand : AsmOperandClass { let Name = "Mem128_RC256"; }
+ def X86Mem256_RC256Operand : AsmOperandClass { let Name = "Mem256_RC256"; }
+
+ def X86Mem64_RC128XOperand : AsmOperandClass { let Name = "Mem64_RC128X"; }
+ def X86Mem128_RC128XOperand : AsmOperandClass { let Name = "Mem128_RC128X"; }
+ def X86Mem256_RC128XOperand : AsmOperandClass { let Name = "Mem256_RC128X"; }
+ def X86Mem128_RC256XOperand : AsmOperandClass { let Name = "Mem128_RC256X"; }
+ def X86Mem256_RC256XOperand : AsmOperandClass { let Name = "Mem256_RC256X"; }
+ def X86Mem512_RC256XOperand : AsmOperandClass { let Name = "Mem512_RC256X"; }
+ def X86Mem256_RC512Operand : AsmOperandClass { let Name = "Mem256_RC512"; }
+ def X86Mem512_RC512Operand : AsmOperandClass { let Name = "Mem512_RC512"; }
+ def X86Mem512_GR16Operand : AsmOperandClass { let Name = "Mem512_GR16"; }
+ def X86Mem512_GR32Operand : AsmOperandClass { let Name = "Mem512_GR32"; }
+ def X86Mem512_GR64Operand : AsmOperandClass { let Name = "Mem512_GR64"; }
+
+ def X86SibMemOperand : AsmOperandClass { let Name = "SibMem"; }
+}
+
+def X86AbsMemAsmOperand : AsmOperandClass {
+ let Name = "AbsMem";
+ let SuperClasses = [X86MemAsmOperand];
+}
+
+class X86MemOperand<string printMethod,
+ AsmOperandClass parserMatchClass = X86MemAsmOperand,
+ int size = 0> : Operand<iPTR> {
+ let PrintMethod = printMethod;
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
+ let ParserMatchClass = parserMatchClass;
+ let OperandType = "OPERAND_MEMORY";
+ int Size = size;
+}
+
+// Gather mem operands
+class X86VMemOperand<RegisterClass RC, string printMethod,
+ AsmOperandClass parserMatchClass, int size = 0>
+ : X86MemOperand<printMethod, parserMatchClass, size> {
+ let MIOperandInfo = (ops ptr_rc, i8imm, RC, i32imm, SEGMENT_REG);
+}
+
+def anymem : X86MemOperand<"printMemReference">;
+
+// FIXME: Right now we allow any size during parsing, but we might want to
+// restrict to only unsized memory.
+def opaquemem : X86MemOperand<"printMemReference">;
+
+def sibmem: X86MemOperand<"printMemReference", X86SibMemOperand>;
+
+def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand, 8>;
+def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand, 16>;
+def i32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32>;
+def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64>;
+def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand, 128>;
+def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand, 256>;
+def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand, 512>;
+def f16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand, 16>;
+def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32>;
+def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64>;
+def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand, 80>;
+def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand, 128>;
+def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand, 256>;
+def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand, 512>;
+
+// 32/64 mode specific mem operands
+def i512mem_GR16 : X86MemOperand<"printzmmwordmem", X86Mem512_GR16Operand, 512>;
+def i512mem_GR32 : X86MemOperand<"printzmmwordmem", X86Mem512_GR32Operand, 512>;
+def i512mem_GR64 : X86MemOperand<"printzmmwordmem", X86Mem512_GR64Operand, 512>;
+
+// Gather mem operands
+def vx64mem : X86VMemOperand<VR128, "printqwordmem", X86Mem64_RC128Operand, 64>;
+def vx128mem : X86VMemOperand<VR128, "printxmmwordmem", X86Mem128_RC128Operand, 128>;
+def vx256mem : X86VMemOperand<VR128, "printymmwordmem", X86Mem256_RC128Operand, 256>;
+def vy128mem : X86VMemOperand<VR256, "printxmmwordmem", X86Mem128_RC256Operand, 128>;
+def vy256mem : X86VMemOperand<VR256, "printymmwordmem", X86Mem256_RC256Operand, 256>;
+
+def vx64xmem : X86VMemOperand<VR128X, "printqwordmem", X86Mem64_RC128XOperand, 64>;
+def vx128xmem : X86VMemOperand<VR128X, "printxmmwordmem", X86Mem128_RC128XOperand, 128>;
+def vx256xmem : X86VMemOperand<VR128X, "printymmwordmem", X86Mem256_RC128XOperand, 256>;
+def vy128xmem : X86VMemOperand<VR256X, "printxmmwordmem", X86Mem128_RC256XOperand, 128>;
+def vy256xmem : X86VMemOperand<VR256X, "printymmwordmem", X86Mem256_RC256XOperand, 256>;
+def vy512xmem : X86VMemOperand<VR256X, "printzmmwordmem", X86Mem512_RC256XOperand, 512>;
+def vz256mem : X86VMemOperand<VR512, "printymmwordmem", X86Mem256_RC512Operand, 256>;
+def vz512mem : X86VMemOperand<VR512, "printzmmwordmem", X86Mem512_RC512Operand, 512>;
+
+def shmem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
+def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
+def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
+
+// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
+// of a plain GPR, so that it doesn't potentially require a REX prefix.
+def ptr_rc_norex : PointerLikeRegClass<2>;
+def ptr_rc_norex_nosp : PointerLikeRegClass<3>;
+
+def i8mem_NOREX : X86MemOperand<"printbytemem", X86Mem8AsmOperand, 8> {
+ let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm,
+ SEGMENT_REG);
+}
+
+// GPRs available for tailcall.
+// It represents GR32_TC, GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<4>;
+
+// Special i32mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i32mem_TC : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32> {
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall,
+ i32imm, SEGMENT_REG);
+}
+
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64> {
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+ ptr_rc_tailcall, i32imm, SEGMENT_REG);
+}
+
+// Special parser to detect 16-bit mode to select 16-bit displacement.
+def X86AbsMem16AsmOperand : AsmOperandClass {
+ let Name = "AbsMem16";
+ let RenderMethod = "addAbsMemOperands";
+ let SuperClasses = [X86AbsMemAsmOperand];
+}
+
+// Branch targets print as pc-relative values.
+class BranchTargetOperand<ValueType ty> : Operand<ty> {
+ let OperandType = "OPERAND_PCREL";
+ let PrintMethod = "printPCRelImm";
+ let ParserMatchClass = X86AbsMemAsmOperand;
+}
+
+def i32imm_brtarget : BranchTargetOperand<i32>;
+def i16imm_brtarget : BranchTargetOperand<i16>;
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_brtarget : BranchTargetOperand<i64>;
+
+def brtarget : BranchTargetOperand<OtherVT>;
+def brtarget8 : BranchTargetOperand<OtherVT>;
+def brtarget16 : BranchTargetOperand<OtherVT> {
+ let ParserMatchClass = X86AbsMem16AsmOperand;
+}
+def brtarget32 : BranchTargetOperand<OtherVT>;
+
+let RenderMethod = "addSrcIdxOperands" in {
+ def X86SrcIdx8Operand : AsmOperandClass {
+ let Name = "SrcIdx8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86SrcIdx16Operand : AsmOperandClass {
+ let Name = "SrcIdx16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86SrcIdx32Operand : AsmOperandClass {
+ let Name = "SrcIdx32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86SrcIdx64Operand : AsmOperandClass {
+ let Name = "SrcIdx64";
+ let SuperClasses = [X86Mem64AsmOperand];
+ }
+} // RenderMethod = "addSrcIdxOperands"
+
+let RenderMethod = "addDstIdxOperands" in {
+ def X86DstIdx8Operand : AsmOperandClass {
+ let Name = "DstIdx8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86DstIdx16Operand : AsmOperandClass {
+ let Name = "DstIdx16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86DstIdx32Operand : AsmOperandClass {
+ let Name = "DstIdx32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86DstIdx64Operand : AsmOperandClass {
+ let Name = "DstIdx64";
+ let SuperClasses = [X86Mem64AsmOperand];
+ }
+} // RenderMethod = "addDstIdxOperands"
+
+let RenderMethod = "addMemOffsOperands" in {
+ def X86MemOffs16_8AsmOperand : AsmOperandClass {
+ let Name = "MemOffs16_8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86MemOffs16_16AsmOperand : AsmOperandClass {
+ let Name = "MemOffs16_16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86MemOffs16_32AsmOperand : AsmOperandClass {
+ let Name = "MemOffs16_32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86MemOffs32_8AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32_8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86MemOffs32_16AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32_16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86MemOffs32_32AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32_32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86MemOffs32_64AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32_64";
+ let SuperClasses = [X86Mem64AsmOperand];
+ }
+ def X86MemOffs64_8AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64_8";
+ let SuperClasses = [X86Mem8AsmOperand];
+ }
+ def X86MemOffs64_16AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64_16";
+ let SuperClasses = [X86Mem16AsmOperand];
+ }
+ def X86MemOffs64_32AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64_32";
+ let SuperClasses = [X86Mem32AsmOperand];
+ }
+ def X86MemOffs64_64AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64_64";
+ let SuperClasses = [X86Mem64AsmOperand];
+ }
+} // RenderMethod = "addMemOffsOperands"
+
+class X86SrcIdxOperand<string printMethod, AsmOperandClass parserMatchClass>
+ : X86MemOperand<printMethod, parserMatchClass> {
+ let MIOperandInfo = (ops ptr_rc, SEGMENT_REG);
+}
+
+class X86DstIdxOperand<string printMethod, AsmOperandClass parserMatchClass>
+ : X86MemOperand<printMethod, parserMatchClass> {
+ let MIOperandInfo = (ops ptr_rc);
+}
+
+def srcidx8 : X86SrcIdxOperand<"printSrcIdx8", X86SrcIdx8Operand>;
+def srcidx16 : X86SrcIdxOperand<"printSrcIdx16", X86SrcIdx16Operand>;
+def srcidx32 : X86SrcIdxOperand<"printSrcIdx32", X86SrcIdx32Operand>;
+def srcidx64 : X86SrcIdxOperand<"printSrcIdx64", X86SrcIdx64Operand>;
+def dstidx8 : X86DstIdxOperand<"printDstIdx8", X86DstIdx8Operand>;
+def dstidx16 : X86DstIdxOperand<"printDstIdx16", X86DstIdx16Operand>;
+def dstidx32 : X86DstIdxOperand<"printDstIdx32", X86DstIdx32Operand>;
+def dstidx64 : X86DstIdxOperand<"printDstIdx64", X86DstIdx64Operand>;
+
+class X86MemOffsOperand<Operand immOperand, string printMethod,
+ AsmOperandClass parserMatchClass>
+ : X86MemOperand<printMethod, parserMatchClass> {
+ let MIOperandInfo = (ops immOperand, SEGMENT_REG);
+}
+
+def offset16_8 : X86MemOffsOperand<i16imm, "printMemOffs8",
+ X86MemOffs16_8AsmOperand>;
+def offset16_16 : X86MemOffsOperand<i16imm, "printMemOffs16",
+ X86MemOffs16_16AsmOperand>;
+def offset16_32 : X86MemOffsOperand<i16imm, "printMemOffs32",
+ X86MemOffs16_32AsmOperand>;
+def offset32_8 : X86MemOffsOperand<i32imm, "printMemOffs8",
+ X86MemOffs32_8AsmOperand>;
+def offset32_16 : X86MemOffsOperand<i32imm, "printMemOffs16",
+ X86MemOffs32_16AsmOperand>;
+def offset32_32 : X86MemOffsOperand<i32imm, "printMemOffs32",
+ X86MemOffs32_32AsmOperand>;
+def offset32_64 : X86MemOffsOperand<i32imm, "printMemOffs64",
+ X86MemOffs32_64AsmOperand>;
+def offset64_8 : X86MemOffsOperand<i64imm, "printMemOffs8",
+ X86MemOffs64_8AsmOperand>;
+def offset64_16 : X86MemOffsOperand<i64imm, "printMemOffs16",
+ X86MemOffs64_16AsmOperand>;
+def offset64_32 : X86MemOffsOperand<i64imm, "printMemOffs32",
+ X86MemOffs64_32AsmOperand>;
+def offset64_64 : X86MemOffsOperand<i64imm, "printMemOffs64",
+ X86MemOffs64_64AsmOperand>;
+
+def ccode : Operand<i8> {
+ let PrintMethod = "printCondCode";
+ let OperandNamespace = "X86";
+ let OperandType = "OPERAND_COND_CODE";
+}
+
+class ImmSExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
+def X86GR32orGR64AsmOperand : AsmOperandClass {
+ let Name = "GR32orGR64";
+}
+def GR32orGR64 : RegisterOperand<GR32> {
+ let ParserMatchClass = X86GR32orGR64AsmOperand;
+}
+
+def X86GR16orGR32orGR64AsmOperand : AsmOperandClass {
+ let Name = "GR16orGR32orGR64";
+}
+def GR16orGR32orGR64 : RegisterOperand<GR16> {
+ let ParserMatchClass = X86GR16orGR32orGR64AsmOperand;
+}
+
+def AVX512RCOperand : AsmOperandClass {
+ let Name = "AVX512RC";
+}
+def AVX512RC : Operand<i32> {
+ let PrintMethod = "printRoundingControl";
+ let OperandNamespace = "X86";
+ let OperandType = "OPERAND_ROUNDING_CONTROL";
+ let ParserMatchClass = AVX512RCOperand;
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF] |
+// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti16i8";
+ let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti32i8";
+}
+
+// [0, 0x0000007F] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i8";
+ let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand,
+ ImmSExti64i32AsmOperand];
+}
+
+// 4-bit immediate used by some XOP instructions
+// [0, 0xF]
+def ImmUnsignedi4AsmOperand : AsmOperandClass {
+ let Name = "ImmUnsignedi4";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidImmUnsignedi4";
+}
+
+// Unsigned immediate used by SSE/AVX instructions
+// [0, 0xFF]
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmUnsignedi8AsmOperand : AsmOperandClass {
+ let Name = "ImmUnsignedi8";
+ let RenderMethod = "addImmOperands";
+}
+
+// A couple of more descriptive operand definitions.
+// 16-bits but only 8 bits are significant.
+def i16i8imm : Operand<i16> {
+ let ParserMatchClass = ImmSExti16i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+// 32-bits but only 8 bits are significant.
+def i32i8imm : Operand<i32> {
+ let ParserMatchClass = ImmSExti32i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// Unsigned 4-bit immediate used by some XOP instructions.
+def u4imm : Operand<i8> {
+ let PrintMethod = "printU8Imm";
+ let ParserMatchClass = ImmUnsignedi4AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// Unsigned 8-bit immediate used by SSE/AVX instructions.
+def u8imm : Operand<i8> {
+ let PrintMethod = "printU8Imm";
+ let ParserMatchClass = ImmUnsignedi8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 16-bit immediate but only 8-bits are significant and they are unsigned.
+// Used by BT instructions.
+def i16u8imm : Operand<i16> {
+ let PrintMethod = "printU8Imm";
+ let ParserMatchClass = ImmUnsignedi8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 32-bit immediate but only 8-bits are significant and they are unsigned.
+// Used by some SSE/AVX instructions that use intrinsics.
+def i32u8imm : Operand<i32> {
+ let PrintMethod = "printU8Imm";
+ let ParserMatchClass = ImmUnsignedi8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bit immediate but only 8-bits are significant and they are unsigned.
+// Used by BT instructions.
+def i64u8imm : Operand<i64> {
+ let PrintMethod = "printU8Imm";
+ let ParserMatchClass = ImmUnsignedi8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printMemReference";
+ let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, SEGMENT_REG);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+// Memory operands that use 64-bit pointers in both ILP32 and LP64.
+def lea64mem : Operand<i64> {
+ let PrintMethod = "printMemReference";
+ let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, SEGMENT_REG);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+let RenderMethod = "addMaskPairOperands" in {
+ def VK1PairAsmOperand : AsmOperandClass { let Name = "VK1Pair"; }
+ def VK2PairAsmOperand : AsmOperandClass { let Name = "VK2Pair"; }
+ def VK4PairAsmOperand : AsmOperandClass { let Name = "VK4Pair"; }
+ def VK8PairAsmOperand : AsmOperandClass { let Name = "VK8Pair"; }
+ def VK16PairAsmOperand : AsmOperandClass { let Name = "VK16Pair"; }
+}
+
+def VK1Pair : RegisterOperand<VK1PAIR, "printVKPair"> {
+ let ParserMatchClass = VK1PairAsmOperand;
+}
+
+def VK2Pair : RegisterOperand<VK2PAIR, "printVKPair"> {
+ let ParserMatchClass = VK2PairAsmOperand;
+}
+
+def VK4Pair : RegisterOperand<VK4PAIR, "printVKPair"> {
+ let ParserMatchClass = VK4PairAsmOperand;
+}
+
+def VK8Pair : RegisterOperand<VK8PAIR, "printVKPair"> {
+ let ParserMatchClass = VK8PairAsmOperand;
+}
+
+def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> {
+ let ParserMatchClass = VK16PairAsmOperand;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td
new file mode 100644
index 000000000000..8653f15d8602
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -0,0 +1,207 @@
+//===---X86InstrPredicates.td - X86 Predicate Definitions --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+def TruePredicate : Predicate<"true">;
+
+def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
+def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
+def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
+def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
+def HasNOPL : Predicate<"Subtarget->hasNOPL()">;
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def Has3DNow : Predicate<"Subtarget->hasThreeDNow()">;
+def Has3DNowA : Predicate<"Subtarget->hasThreeDNowA()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def NoSSE41 : Predicate<"!Subtarget->hasSSE41()">;
+def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
+def NoAVX : Predicate<"!Subtarget->hasAVX()">;
+def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
+def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
+def HasEVEX512 : Predicate<"Subtarget->hasEVEX512()">;
+def HasAVX10_1 : Predicate<"Subtarget->hasAVX10_1()">;
+def HasAVX10_1_512 : Predicate<"Subtarget->hasAVX10_1_512()">;
+def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
+def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
+def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
+def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
+def HasCDI : Predicate<"Subtarget->hasCDI()">;
+def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">;
+def HasPFI : Predicate<"Subtarget->hasPFI()">;
+def HasERI : Predicate<"Subtarget->hasERI()">;
+def HasDQI : Predicate<"Subtarget->hasDQI()">;
+def NoDQI : Predicate<"!Subtarget->hasDQI()">;
+def HasBWI : Predicate<"Subtarget->hasBWI()">;
+def NoBWI : Predicate<"!Subtarget->hasBWI()">;
+def HasVLX : Predicate<"Subtarget->hasVLX()">;
+def NoVLX : Predicate<"!Subtarget->hasVLX()">;
+def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
+def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
+def HasPKU : Predicate<"Subtarget->hasPKU()">;
+def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
+def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
+def HasBF16 : Predicate<"Subtarget->hasBF16()">;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">;
+def HasAVXVNNIINT16 : Predicate<"Subtarget->hasAVXVNNIINT16()">;
+def HasAVXVNNIINT8 : Predicate<"Subtarget->hasAVXVNNIINT8()">;
+def HasAVXVNNI : Predicate <"Subtarget->hasAVXVNNI()">;
+def NoVLX_Or_NoVNNI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVNNI()">;
+
+def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
+def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
+def HasAES : Predicate<"Subtarget->hasAES()">;
+def HasVAES : Predicate<"Subtarget->hasVAES()">;
+def NoVLX_Or_NoVAES : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVAES()">;
+def HasFXSR : Predicate<"Subtarget->hasFXSR()">;
+def HasX87 : Predicate<"Subtarget->hasX87()">;
+def HasXSAVE : Predicate<"Subtarget->hasXSAVE()">;
+def HasXSAVEOPT : Predicate<"Subtarget->hasXSAVEOPT()">;
+def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">;
+def HasXSAVES : Predicate<"Subtarget->hasXSAVES()">;
+def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">;
+def NoVLX_Or_NoVPCLMULQDQ :
+ Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVPCLMULQDQ()">;
+def HasVPCLMULQDQ : Predicate<"Subtarget->hasVPCLMULQDQ()">;
+def HasGFNI : Predicate<"Subtarget->hasGFNI()">;
+def HasFMA : Predicate<"Subtarget->hasFMA()">;
+def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
+def NoFMA4 : Predicate<"!Subtarget->hasFMA4()">;
+def HasXOP : Predicate<"Subtarget->hasXOP()">;
+def HasTBM : Predicate<"Subtarget->hasTBM()">;
+def NoTBM : Predicate<"!Subtarget->hasTBM()">;
+def HasLWP : Predicate<"Subtarget->hasLWP()">;
+def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
+def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
+def HasF16C : Predicate<"Subtarget->hasF16C()">;
+def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
+def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
+def HasBMI : Predicate<"Subtarget->hasBMI()">;
+def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
+def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">;
+def HasVBMI : Predicate<"Subtarget->hasVBMI()">;
+def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">;
+def HasIFMA : Predicate<"Subtarget->hasIFMA()">;
+def HasAVXIFMA : Predicate<"Subtarget->hasAVXIFMA()">;
+def NoVLX_Or_NoIFMA : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasIFMA()">;
+def HasRTM : Predicate<"Subtarget->hasRTM()">;
+def HasADX : Predicate<"Subtarget->hasADX()">;
+def HasSHA : Predicate<"Subtarget->hasSHA()">;
+def HasSHA512 : Predicate<"Subtarget->hasSHA512()">;
+def HasSGX : Predicate<"Subtarget->hasSGX()">;
+def HasSM3 : Predicate<"Subtarget->hasSM3()">;
+def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
+def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
+def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
+def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">;
+def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
+def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
+def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
+def HasLAHFSAHF64 : Predicate<"Subtarget->hasLAHFSAHF64()">;
+def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
+def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
+def HasCLDEMOTE : Predicate<"Subtarget->hasCLDEMOTE()">;
+def HasMOVDIRI : Predicate<"Subtarget->hasMOVDIRI()">;
+def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">;
+def HasPTWRITE : Predicate<"Subtarget->hasPTWRITE()">;
+def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
+def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
+def HasSHSTK : Predicate<"Subtarget->hasSHSTK()">;
+def HasSM4 : Predicate<"Subtarget->hasSM4()">;
+def HasCLFLUSH : Predicate<"Subtarget->hasCLFLUSH()">;
+def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
+def HasCLWB : Predicate<"Subtarget->hasCLWB()">;
+def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">;
+def HasRDPID : Predicate<"Subtarget->hasRDPID()">;
+def HasRDPRU : Predicate<"Subtarget->hasRDPRU()">;
+def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">;
+def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">;
+def HasCX8 : Predicate<"Subtarget->hasCX8()">;
+def HasCX16 : Predicate<"Subtarget->hasCX16()">;
+def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">;
+def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">;
+def HasAMXFP16 : Predicate<"Subtarget->hasAMXFP16()">;
+def HasCMPCCXADD : Predicate<"Subtarget->hasCMPCCXADD()">;
+def HasAVXNECONVERT : Predicate<"Subtarget->hasAVXNECONVERT()">;
+def HasKL : Predicate<"Subtarget->hasKL()">;
+def HasRAOINT : Predicate<"Subtarget->hasRAOINT()">;
+def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">;
+def HasHRESET : Predicate<"Subtarget->hasHRESET()">;
+def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
+def HasTSXLDTRK : Predicate<"Subtarget->hasTSXLDTRK()">;
+def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">;
+def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">;
+def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
+def HasAMXCOMPLEX : Predicate<"Subtarget->hasAMXCOMPLEX()">;
+def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
+def HasUSERMSR : Predicate<"Subtarget->hasUSERMSR()">;
+def HasCRC32 : Predicate<"Subtarget->hasCRC32()">;
+
+def HasX86_64 : Predicate<"Subtarget->hasX86_64()">;
+def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
+ AssemblerPredicate<(all_of (not Is64Bit)), "Not 64-bit mode">;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">,
+ AssemblerPredicate<(all_of Is64Bit), "64-bit mode">;
+def IsLP64 : Predicate<"Subtarget->isTarget64BitLP64()">;
+def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">;
+def In16BitMode : Predicate<"Subtarget->is16Bit()">,
+ AssemblerPredicate<(all_of Is16Bit), "16-bit mode">;
+def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
+ AssemblerPredicate<(all_of (not Is16Bit)), "Not 16-bit mode">;
+def In32BitMode : Predicate<"Subtarget->is32Bit()">,
+ AssemblerPredicate<(all_of Is32Bit), "32-bit mode">;
+def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
+def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
+def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
+ "Subtarget->getFrameLowering()->hasFP(*MF)"> {
+ let RecomputePerFunction = 1;
+}
+def IsPS : Predicate<"Subtarget->isTargetPS()">;
+def NotPS : Predicate<"!Subtarget->isTargetPS()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
+def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
+def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
+def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
+def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
+ "TM.getCodeModel() == CodeModel::Kernel">;
+def IsNotPIC : Predicate<"!TM.isPositionIndependent()">;
+
+// We could compute these on a per-module basis but doing so requires accessing
+// the Function object through the <Target>Subtarget and objections were raised
+// to that (see post-commit review comments for r301750).
+let RecomputePerFunction = 1 in {
+ def OptForSize : Predicate<"shouldOptForSize(MF)">;
+ def OptForMinSize : Predicate<"MF->getFunction().hasMinSize()">;
+ def OptForSpeed : Predicate<"!shouldOptForSize(MF)">;
+ def UseIncDec : Predicate<"!Subtarget->slowIncDec() || "
+ "shouldOptForSize(MF)">;
+ def NoSSE41_Or_OptForSize : Predicate<"shouldOptForSize(MF) || "
+ "!Subtarget->hasSSE41()">;
+}
+
+def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
+def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">;
+def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
+def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
+def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
+def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
+def HasFSRM : Predicate<"Subtarget->hasFSRM()">;
+def HasMFence : Predicate<"Subtarget->hasMFence()">;
+def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
+def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
index a6fcc804e1d0..cf57fe562ed5 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3212,13 +3212,13 @@ let Predicates = [UseSSE2] in {
// Prefetch intrinsic.
let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in {
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
+ "prefetcht0\t$src", [(prefetch addr:$src, timm, (i32 3), (i32 1))]>, TB;
def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
+ "prefetcht1\t$src", [(prefetch addr:$src, timm, (i32 2), (i32 1))]>, TB;
def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
+ "prefetcht2\t$src", [(prefetch addr:$src, timm, (i32 1), (i32 1))]>, TB;
def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
+ "prefetchnta\t$src", [(prefetch addr:$src, timm, (i32 0), (i32 1))]>, TB;
}
// FIXME: How should flush instruction be modeled?
@@ -7093,35 +7093,35 @@ def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
// halves of a 256-bit vector.
//
let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
-def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
- (ins i128mem:$src),
- "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
- Sched<[WriteShuffleLd]>, VEX, VEX_L;
+def VBROADCASTI128rm : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
+ (ins i128mem:$src),
+ "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteShuffleLd]>, VEX, VEX_L;
let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
ExeDomain = SSEPackedSingle in
-def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
- (ins f128mem:$src),
- "vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
+def VBROADCASTF128rm : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
+ (ins f128mem:$src),
+ "vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
+ (VBROADCASTF128rm addr:$src)>;
def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
+ (VBROADCASTF128rm addr:$src)>;
// NOTE: We're using FP instructions here, but execution domain fixing can
// convert to integer when profitable.
def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
+ (VBROADCASTF128rm addr:$src)>;
def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
+ (VBROADCASTF128rm addr:$src)>;
def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
+ (VBROADCASTF128rm addr:$src)>;
def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
+ (VBROADCASTF128rm addr:$src)>;
def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF128 addr:$src)>;
+ (VBROADCASTF128rm addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -7316,7 +7316,7 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
// AVX_VNNI
//===----------------------------------------------------------------------===//
let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst",
- ExplicitVEXPrefix = 1, checkVEXPredicate = 1 in
+ explicitOpPrefix = ExplicitVEX in
multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit IsCommutable> {
let isCommutable = IsCommutable in
@@ -7359,11 +7359,6 @@ defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>;
defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>;
defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>;
-def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
- (X86vpmaddwd node:$lhs, node:$rhs), [{
- return N->hasOneUse();
-}]>;
-
let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in {
def : Pat<(v8i32 (add VR256:$src1,
(X86vpmaddwd_su VR256:$src2, VR256:$src3))),
@@ -8142,8 +8137,7 @@ let isCommutable = 0 in {
}
// AVX-IFMA
-let Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst",
- checkVEXPredicate = 1 in
+let Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst" in
multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {
// NOTE: The SDNode have the multiply operands first with the add last.
// This enables commuted load patterns to be autogenerated by tablegen.
@@ -8287,7 +8281,6 @@ let Predicates = [HasAVXNECONVERT] in {
f256mem>, T8XD;
defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem,
f256mem>, T8PS;
- let checkVEXPredicate = 1 in
defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix;
def : Pat<(v8bf16 (X86vfpround (v8f32 VR256:$src))),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td
index e416e4495e22..48bf23f8cbf7 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -824,38 +824,40 @@ def ROT64L2R_imm8 : SDNodeXForm<imm, [{
// NOTE: We use WriteShift for these rotates as they avoid the stalls
// of many of the older x86 rotate instructions.
-multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
+multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop,
+ string Suffix = ""> {
let hasSideEffects = 0 in {
- def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TAXD, VEX, Sched<[WriteShift]>;
+ def ri#Suffix : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ TAXD, VEX, Sched<[WriteShift]>;
let mayLoad = 1 in
- def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, u8imm:$src2),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TAXD, VEX, Sched<[WriteShiftLd]>;
+ def mi#Suffix : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, u8imm:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ TAXD, VEX, Sched<[WriteShiftLd]>;
}
}
-multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
+multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop,
+ string Suffix = ""> {
let hasSideEffects = 0 in {
- def rr : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- VEX, Sched<[WriteShift]>;
+ def rr#Suffix : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ VEX, Sched<[WriteShift]>;
let mayLoad = 1 in
- def rm : I<0xF7, MRMSrcMem4VOp3,
- (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- VEX, Sched<[WriteShift.Folded,
- // x86memop:$src1
- ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault,
- // RC:$src2
- WriteShift.ReadAfterFold]>;
+ def rm#Suffix : I<0xF7, MRMSrcMem4VOp3,
+ (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ VEX, Sched<[WriteShift.Folded,
+ // x86memop:$src1
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault,
+ // RC:$src2
+ WriteShift.ReadAfterFold]>;
}
}
-let Predicates = [HasBMI2] in {
+let Predicates = [HasBMI2, NoEGPR] in {
defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>;
defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, REX_W;
defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS;
@@ -864,7 +866,20 @@ let Predicates = [HasBMI2] in {
defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, REX_W;
defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8PD;
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, REX_W;
+}
+let Predicates = [HasBMI2, HasEGPR] in {
+ defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem, "_EVEX">, EVEX;
+ defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem, "_EVEX">, REX_W, EVEX;
+ defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8XS, EVEX;
+ defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem, "_EVEX">, T8XS, REX_W, EVEX;
+ defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem, "_EVEX">, T8XD, EVEX;
+ defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem, "_EVEX">, T8XD, REX_W, EVEX;
+ defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem, "_EVEX">, T8PD, EVEX;
+ defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8PD, REX_W, EVEX;
+}
+
+let Predicates = [HasBMI2] in {
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
let AddedComplexity = 10 in {
def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
index 0272f7de0f9e..cbb5d4ed5bbd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
@@ -436,6 +436,22 @@ def WRMSRLIST : I<0x01, MRM_C6, (outs), (ins), "wrmsrlist", []>, XS;
def RDMSRLIST : I<0x01, MRM_C6, (outs), (ins), "rdmsrlist", []>, XD;
}
+let Predicates = [HasUSERMSR], mayLoad = 1 in {
+ def URDMSRrr : I<0xf8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "urdmsr\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (int_x86_urdmsr GR64:$src))]>, T8XD;
+ def URDMSRri : Ii32<0xf8, MRM0r, (outs GR64:$dst), (ins i64i32imm:$imm),
+ "urdmsr\t{$imm, $dst|$dst, $imm}",
+ [(set GR64:$dst, (int_x86_urdmsr i64immSExt32_su:$imm))]>, T_MAP7XD, VEX;
+}
+let Predicates = [HasUSERMSR], mayStore = 1 in {
+ def UWRMSRrr : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "uwrmsr\t{$src1, $src2|$src2, $src1}",
+ [(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8XS;
+ def UWRMSRir : Ii32<0xf8, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm),
+ "uwrmsr\t{$src, $imm|$imm, $src}",
+ [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7XS, VEX;
+}
let Defs = [RAX, RDX], Uses = [ECX] in
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
@@ -663,6 +679,10 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[In64BitMode, HasINVPCID]>;
+
+def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invpcid\t{$src2, $src1|$src1, $src2}", []>,
+ EVEX_NoCD8, T_MAP4XS, Requires<[In64BitMode, HasINVPCID]>;
} // SchedRW
let Predicates = [In64BitMode, HasINVPCID] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td
new file mode 100644
index 000000000000..2f056f2ead62
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td
@@ -0,0 +1,1015 @@
+//===-- X86InstrUtils.td - X86 Instruction Utilities --------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utilities for simplifying the instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Classes for setting the fields of X86Inst
+//===----------------------------------------------------------------------===//
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize16 { OperandSize OpSize = OpSize16; }
+class OpSize32 { OperandSize OpSize = OpSize32; }
+class AdSize16 { AddressSize AdSize = AdSize16; }
+class AdSize32 { AddressSize AdSize = AdSize32; }
+class AdSize64 { AddressSize AdSize = AdSize64; }
+class REX_W { bit hasREX_W = 1; }
+class LOCK { bit hasLockPrefix = 1; }
+class REP { bit hasREPPrefix = 1; }
+class TB { Map OpMap = TB; }
+class T8 { Map OpMap = T8; }
+class TA { Map OpMap = TA; }
+class XOP8 { Map OpMap = XOP8; Prefix OpPrefix = PS; }
+class XOP9 { Map OpMap = XOP9; Prefix OpPrefix = PS; }
+class XOPA { Map OpMap = XOPA; Prefix OpPrefix = PS; }
+class ThreeDNow { Map OpMap = ThreeDNow; }
+class T_MAP4 { Map OpMap = T_MAP4; }
+class T_MAP4PS : T_MAP4 { Prefix OpPrefix = PS; } // none
+class T_MAP4PD : T_MAP4 { Prefix OpPrefix = PD; } // 0x66
+class T_MAP4XS : T_MAP4 { Prefix OpPrefix = XS; } // 0xF3
+class T_MAP4XD : T_MAP4 { Prefix OpPrefix = XD; } // 0xF2
+class T_MAP5 { Map OpMap = T_MAP5; }
+class T_MAP5PS : T_MAP5 { Prefix OpPrefix = PS; } // none
+class T_MAP5PD : T_MAP5 { Prefix OpPrefix = PD; } // 0x66
+class T_MAP5XS : T_MAP5 { Prefix OpPrefix = XS; } // 0xF3
+class T_MAP5XD : T_MAP5 { Prefix OpPrefix = XD; } // 0xF2
+class T_MAP6 { Map OpMap = T_MAP6; }
+class T_MAP6PS : T_MAP6 { Prefix OpPrefix = PS; }
+class T_MAP6PD : T_MAP6 { Prefix OpPrefix = PD; }
+class T_MAP6XS : T_MAP6 { Prefix OpPrefix = XS; }
+class T_MAP6XD : T_MAP6 { Prefix OpPrefix = XD; }
+class T_MAP7 { Map OpMap = T_MAP7; }
+class T_MAP7XS : T_MAP7 { Prefix OpPrefix = XS; } // 0xF3
+class T_MAP7XD : T_MAP7 { Prefix OpPrefix = XD; } // 0xF2
+class OBXS { Prefix OpPrefix = XS; }
+class PS : TB { Prefix OpPrefix = PS; }
+class PD : TB { Prefix OpPrefix = PD; }
+class XD : TB { Prefix OpPrefix = XD; }
+class XS : TB { Prefix OpPrefix = XS; }
+class T8PS : T8 { Prefix OpPrefix = PS; }
+class T8PD : T8 { Prefix OpPrefix = PD; }
+class T8XD : T8 { Prefix OpPrefix = XD; }
+class T8XS : T8 { Prefix OpPrefix = XS; }
+class TAPS : TA { Prefix OpPrefix = PS; }
+class TAPD : TA { Prefix OpPrefix = PD; }
+class TAXD : TA { Prefix OpPrefix = XD; }
+class TAXS : TA { Prefix OpPrefix = XS; }
+class VEX { Encoding OpEnc = EncVEX; }
+class WIG { bit IgnoresW = 1; }
+// Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX.
+class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; }
+class VEX_4V : VEX { bit hasVEX_4V = 1; }
+class VEX_L { bit hasVEX_L = 1; }
+class VEX_LIG { bit ignoresVEX_L = 1; }
+class EVEX { Encoding OpEnc = EncEVEX; }
+class EVEX_4V : EVEX { bit hasVEX_4V = 1; }
+class EVEX_K { bit hasEVEX_K = 1; }
+class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
+class EVEX_B { bit hasEVEX_B = 1; }
+class EVEX_RC { bit hasEVEX_RC = 1; }
+class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
+class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
+class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
+class NOTRACK { bit hasNoTrackPrefix = 1; }
+class SIMD_EXC { list<Register> Uses = [MXCSR]; bit mayRaiseFPException = 1; }
+// Specify AVX512 8-bit compressed displacement encoding based on the vector
+// element size in bits (8, 16, 32, 64) and the CDisp8 form.
+class EVEX_CD8<int esize, CD8VForm form> {
+ int CD8_EltSize = !srl(esize, 3);
+ bits<3> CD8_Form = form.Value;
+}
+class EVEX_NoCD8 : EVEX { bits<7> CD8_Scale = 0; }
+class XOP { Encoding OpEnc = EncXOP; }
+class XOP_4V : XOP { bit hasVEX_4V = 1; }
+class EVEX2VEXOverride<string VEXInstrName> {
+ string EVEX2VEXOverride = VEXInstrName;
+}
+class AVX512BIi8Base : PD {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
+class AVX512XSIi8Base : XS {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
+class AVX512XDIi8Base : XD {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
+class AVX512PSIi8Base : PS {
+ Domain ExeDomain = SSEPackedSingle;
+ ImmType ImmT = Imm8;
+}
+class AVX512PDIi8Base : PD {
+ Domain ExeDomain = SSEPackedDouble;
+ ImmType ImmT = Imm8;
+}
+class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
+class ExplicitREX2Prefix { ExplicitOpPrefix explicitOpPrefix = ExplicitREX2; }
+class ExplicitVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitVEX; }
+class ExplicitEVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitEVEX; }
+
+// SchedModel info for instruction that loads one value and gets the second
+// (and possibly third) value from a register.
+// This is used for instructions that put the memory operands before other
+// uses.
+class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Register reads (implicit or explicit).
+ Sched.ReadAfterFold, Sched.ReadAfterFold]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Type infomation definitions
+//===----------------------------------------------------------------------===//
+
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types. For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+ PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+ Operand immoperand, SDPatternOperator immoperator,
+ SDPatternOperator immnosuoperator, Operand imm8operand,
+ SDPatternOperator imm8operator, SDPatternOperator imm8nosuoperator,
+ bit hasOddOpcode, OperandSize opSize,
+ bit hasREX_W> {
+ /// VT - This is the value type itself.
+ ValueType VT = vt;
+
+ /// InstrSuffix - This is the suffix used on instructions with this type. For
+ /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+ string InstrSuffix = instrsuffix;
+
+ /// RegClass - This is the register class associated with this type. For
+ /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+ RegisterClass RegClass = regclass;
+
+ /// LoadNode - This is the load node associated with this type. For
+ /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+ PatFrag LoadNode = loadnode;
+
+ /// MemOperand - This is the memory operand associated with this type. For
+ /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+ X86MemOperand MemOperand = memoperand;
+
+ /// ImmEncoding - This is the encoding of an immediate of this type. For
+ /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
+ /// since the immediate fields of i64 instructions is a 32-bit sign extended
+ /// value.
+ ImmType ImmEncoding = immkind;
+
+ /// ImmOperand - This is the operand kind of an immediate of this type. For
+ /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
+ /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+ /// extended value.
+ Operand ImmOperand = immoperand;
+
+ /// ImmOperator - This is the operator that should be used to match an
+ /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+ SDPatternOperator ImmOperator = immoperator;
+
+ SDPatternOperator ImmNoSuOperator = immnosuoperator;
+
+ /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+ /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
+ /// only used for instructions that have a sign-extended imm8 field form.
+ Operand Imm8Operand = imm8operand;
+
+ /// Imm8Operator - This is the operator that should be used to match an 8-bit
+ /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+ SDPatternOperator Imm8Operator = imm8operator;
+
+ SDPatternOperator Imm8NoSuOperator = imm8nosuoperator;
+
+ /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+ /// opposed to even) opcode. Operations on i8 are usually even, operations on
+ /// other datatypes are odd.
+ bit HasOddOpcode = hasOddOpcode;
+
+ /// OpSize - Selects whether the instruction needs a 0x66 prefix based on
+ /// 16-bit vs 32-bit mode. i8/i64 set this to OpSizeFixed. i16 sets this
+ /// to Opsize16. i32 sets this to OpSize32.
+ OperandSize OpSize = opSize;
+
+ /// HasREX_W - This bit is set to true if the instruction should have
+ /// the 0x40 REX prefix. This is set for i64 types.
+ bit HasREX_W = hasREX_W;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem, Imm8, i8imm,
+ imm_su, imm, i8imm, invalid_node, invalid_node,
+ 0, OpSizeFixed, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem, Imm16, i16imm,
+ imm_su, imm, i16i8imm, i16immSExt8_su, i16immSExt8,
+ 1, OpSize16, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem, Imm32, i32imm,
+ imm_su, imm, i32i8imm, i32immSExt8_su, i32immSExt8,
+ 1, OpSize32, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, Imm32S, i64i32imm,
+ i64immSExt32_su, i64immSExt32, i64i8imm, i64immSExt8_su,
+ i64immSExt8, 1, OpSizeFixed, 1>;
+
+// Group template arguments that can be derived from the vector type (EltNum x
+// EltVT). These are things like the register class for the writemask, etc.
+// The idea is to pass one of these as the template argument rather than the
+// individual arguments.
+// The template is also used for scalar types, in this case numelts is 1.
+class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
+ string suffix = ""> {
+ RegisterClass RC = rc;
+ ValueType EltVT = eltvt;
+ int NumElts = numelts;
+
+ // Corresponding mask register class.
+ RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
+
+ // Corresponding mask register pair class.
+ RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
+ !cast<RegisterOperand>("VK" # NumElts # "Pair"));
+
+ // Corresponding write-mask register class.
+ RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
+
+ // The mask VT.
+ ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
+
+ // Suffix used in the instruction mnemonic.
+ string Suffix = suffix;
+
+ // VTName is a string name for vector VT. For vector types it will be
+ // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
+ // It is a little bit complex for scalar types, where NumElts = 1.
+ // In this case we build v4f32 or v2f64
+ string VTName = "v" # !if (!eq (NumElts, 1),
+ !if (!eq (EltVT.Size, 16), 8,
+ !if (!eq (EltVT.Size, 32), 4,
+ !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
+
+ // The vector VT.
+ ValueType VT = !cast<ValueType>(VTName);
+
+ string EltTypeName = !cast<string>(EltVT);
+ // Size of the element type in bits, e.g. 32 for v16i32.
+ string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName)));
+ int EltSize = EltVT.Size;
+
+ // "i" for integer types and "f" for floating-point types
+ string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName));
+
+ // Size of RC in bits, e.g. 512 for VR512.
+ int Size = VT.Size;
+
+ // The corresponding memory operand, e.g. i512mem for VR512.
+ X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
+ X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem");
+ // FP scalar memory operand for intrinsics - ssmem/sdmem.
+ Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
+ !if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"),
+ !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
+ !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))));
+
+ // Load patterns
+ PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
+
+ PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
+
+ PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName));
+ PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
+
+ PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
+ !if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"),
+ !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
+ !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))));
+
+ // The string to specify embedded broadcast in assembly.
+ string BroadcastStr = "{1to" # NumElts # "}";
+
+ // 8-bit compressed displacement tuple/subvector format. This is only
+ // defined for NumElts <= 8.
+ CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
+ !cast<CD8VForm>("CD8VT" # NumElts), ?);
+
+ SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
+ !if (!eq (Size, 256), sub_ymm, ?));
+
+ Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
+ !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
+ !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
+ !if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME?
+ SSEPackedInt))));
+
+ RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
+ !if (!eq (EltTypeName, "f16"), FR16X,
+ !if (!eq (EltTypeName, "bf16"), FR16X,
+ FR64X)));
+
+ dag ImmAllZerosV = (VT immAllZerosV);
+
+ string ZSuffix = !if (!eq (Size, 128), "Z128",
+ !if (!eq (Size, 256), "Z256", "Z"));
+}
+
+def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
+def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
+def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
+def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
+def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
+def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">;
+def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
+def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
+
+// "x" in v32i8x_info means RC = VR256X
+def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
+def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
+def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
+def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
+def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
+def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">;
+def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
+def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
+
+def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
+def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
+def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
+def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
+def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
+def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf">;
+def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
+def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
+
+// We map scalar types to the smallest (128-bit) vector type
+// with the appropriate element type. This allows to use the same masking logic.
+def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
+def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
+def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">;
+def bf16x_info : X86VectorVTInfo<1, bf16, VR128X, "sbf">;
+def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
+def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
+
+class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
+ X86VectorVTInfo i128> {
+ X86VectorVTInfo info512 = i512;
+ X86VectorVTInfo info256 = i256;
+ X86VectorVTInfo info128 = i128;
+}
+
+def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
+ v16i8x_info>;
+def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
+ v8i16x_info>;
+def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
+ v4i32x_info>;
+def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
+ v2i64x_info>;
+def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
+ v8f16x_info>;
+def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info,
+ v8bf16x_info>;
+def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
+ v4f32x_info>;
+def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
+ v2f64x_info>;
+
+class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
+ ValueType _vt> {
+ RegisterClass KRC = _krc;
+ RegisterClass KRCWM = _krcwm;
+ ValueType KVT = _vt;
+}
+
+def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
+def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
+def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
+def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
+def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
+def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
+def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
+
+// Subclasses of X86Inst
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+ let Pattern = pattern;
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, d> {
+ let Pattern = pattern;
+}
+class Ii8<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, d> {
+ let Pattern = pattern;
+}
+class Ii8Reg<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8Reg, outs, ins, asm, d> {
+ let Pattern = pattern;
+}
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm16, outs, ins, asm> {
+ let Pattern = pattern;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm32, outs, ins, asm> {
+ let Pattern = pattern;
+}
+class Ii32S<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm32S, outs, ins, asm> {
+ let Pattern = pattern;
+}
+
+class Ii64<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm64, outs, ins, asm> {
+ let Pattern = pattern;
+}
+
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+}
+
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
+ : I<o, F, outs, ins, asm, []> {
+ let Defs = [FPSW];
+ let Predicates = [HasX87];
+}
+
+// FpI_ - Floating Point Pseudo Instruction template.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
+ : PseudoI<outs, ins, pattern> {
+ let FPForm = fp;
+ let Defs = [FPSW];
+ let Predicates = [HasX87];
+}
+
+// Templates for instructions that use a 16- or 32-bit segmented address as
+// their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+// Iseg16 - 16-bit segment selector, 16-bit offset
+// Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm16, outs, ins, asm> {
+ let Pattern = pattern;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm32, outs, ins, asm> {
+ let Pattern = pattern;
+}
+
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : I<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+ !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
+ !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+ !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
+ !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+ [UseSSE1])))));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+ !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+ asm));
+}
+
+// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
+class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : I<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+ !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
+ !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+ !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
+ !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+ [UseSSE1])))));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+ !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+ asm));
+}
+// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+ !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+ !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+ [UseSSE2])));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+ !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+ asm));
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ Domain d>
+ : I<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+ !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+ !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+ [UseSSE1])));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+ !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+ asm));
+}
+
+// MMXPI - SSE 1 & 2 packed instructions with MMX operands
+class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ Domain d>
+ : I<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(!eq(OpPrefix.Value, PD.Value), [HasMMX, HasSSE2],
+ [HasMMX, HasSSE1]);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d>
+ : Ii8<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+ !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+ !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+ [UseSSE1])));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+ !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+ asm));
+}
+
+// SSE1 Instruction Templates:
+//
+// SSI - SSE1 instructions with XS prefix.
+// PSI - SSE1 instructions with PS prefix.
+// PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
+// VSSI - SSE1 instructions with XS prefix in AVX form.
+// VPSI - SSE1 instructions with PS prefix in AVX form, packed single.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+ Requires<[UseSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+ Requires<[UseSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+ Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, PS,
+ Requires<[HasAVX]>;
+
+// SSE2 Instruction Templates:
+//
+// SDI - SSE2 instructions with XD prefix.
+// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
+// S2SI - SSE2 instructions with XS prefix.
+// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+// PDI - SSE2 instructions with PD prefix, packed double domain.
+// PDIi8 - SSE2 instructions with ImmT == Imm8 and PD prefix.
+// VSDI - SSE2 scalar instructions with XD prefix in AVX form.
+// VPDI - SSE2 vector instructions with PD prefix in AVX form,
+// packed double domain.
+// VS2I - SSE2 scalar instructions with PD prefix in AVX form.
+// S2I - SSE2 scalar instructions with PD prefix.
+// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
+// MMX operands.
+// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
+// MMX operands.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
+class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
+class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+ Requires<[UseSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+ Requires<[UseSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+ Requires<[UseAVX]>;
+class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+ Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+ PD, Requires<[HasAVX]>;
+class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, PD,
+ Requires<[UseAVX]>;
+class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, PD, Requires<[UseSSE2]>;
+class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX, HasSSE2]>;
+class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX, HasSSE2]>;
+
+// SSE3 Instruction Templates:
+//
+// S3I - SSE3 instructions with PD prefixes.
+// S3SI - SSE3 instructions with XS prefix.
+// S3DI - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ Requires<[UseSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ Requires<[UseSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+ Requires<[UseSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+//
+// SS38I - SSSE3 instructions with T8 prefix.
+// SS3AI - SSSE3 instructions with TA prefix.
+// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
+// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. The 64-bit versions are grouped with the MMX
+// classes. They need to be enabled even if AVX is enabled.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ Requires<[UseSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ Requires<[UseSSSE3]>;
+class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PS,
+ Requires<[HasMMX, HasSSSE3]>;
+class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPS,
+ Requires<[HasMMX, HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+//
+// SS48I - SSE 4.1 instructions with T8 prefix.
+// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ Requires<[UseSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ Requires<[UseSSE41]>;
+
+// SSE4.2 Instruction Templates:
+//
+// SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ Requires<[UseSSE42]>;
+
+// SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ Requires<[UseSSE42]>;
+
+// CRC32I - SSE 4.2 CRC32 instructions.
+// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly
+// controlled by the SSE42 flag.
+class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasCRC32]>;
+
+// AVX Instruction Templates:
+// Instructions introduced in AVX (no SSE equivalent forms)
+//
+// AVX8I - AVX instructions with T8PD prefix.
+// AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ Requires<[HasAVX]>;
+
+// AVX2 Instruction Templates:
+// Instructions introduced in AVX2 (no SSE equivalent forms)
+//
+// AVX28I - AVX2 instructions with T8PD prefix.
+// AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8.
+class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ Requires<[HasAVX2]>;
+class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ Requires<[HasAVX2]>;
+
+
+// AVX-512 Instruction Templates:
+// Instructions introduced in AVX-512 (no SSE equivalent forms)
+//
+// AVX5128I - AVX-512 instructions with T8PD prefix.
+// AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8.
+// AVX512PDI - AVX-512 instructions with PD, double packed.
+// AVX512PSI - AVX-512 instructions with PS, single packed.
+// AVX512XS8I - AVX-512 instructions with T8 and XS prefixes.
+// AVX512XSI - AVX-512 instructions with XS prefix, generic domain.
+// AVX512BI - AVX-512 instructions with PD, int packed domain.
+// AVX512SI - AVX-512 scalar instructions with PD prefix.
+
+class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ Requires<[HasAVX512]>;
+class AVX5128IBase : T8PD {
+ Domain ExeDomain = SSEPackedInt;
+}
+class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8XS,
+ Requires<[HasAVX512]>;
+class AVX512XSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XS,
+ Requires<[HasAVX512]>;
+class AVX512XDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, XD,
+ Requires<[HasAVX512]>;
+class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
+ Requires<[HasAVX512]>;
+class AVX512BIBase : PD {
+ Domain ExeDomain = SSEPackedInt;
+}
+class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
+ Requires<[HasAVX512]>;
+class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ Requires<[HasAVX512]>;
+class AVX512AIi8Base : TAPD {
+ ImmType ImmT = Imm8;
+}
+class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>,
+ Requires<[HasAVX512]>;
+class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+ Requires<[HasAVX512]>;
+class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+ Requires<[HasAVX512]>;
+class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d>
+ : Ii8<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
+class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d>
+ : I<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
+class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8PD,
+ EVEX_4V, Requires<[HasAVX512]>;
+
+class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern>, Requires<[HasAVX512]>;
+
+// AES Instruction Templates:
+//
+// AES8I
+// These use the same encoding as the SSE4.2 T8 and TA encodings.
+class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+ Requires<[NoAVX, HasAES]>;
+
+class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ Requires<[NoAVX, HasAES]>;
+
+// PCLMUL Instruction Templates
+class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8PD,
+ VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
+class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8PD,
+ VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
+class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8PD,
+ VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>;
+
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
+ VEX_4V, FMASC, Requires<[HasFMA4, NoVLX]>;
+class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
+ VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>;
+class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
+ VEX_4V, FMASC, Requires<[HasFMA4]>;
+
+// XOP 2, 3 and 4 Operand Instruction Template
+class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+ XOP9, Requires<[HasXOP]>;
+
+// XOP 2 and 3 Operand Instruction Templates with imm byte
+class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+ XOP8, Requires<[HasXOP]>;
+// XOP 4 Operand Instruction Templates with imm byte
+class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+ XOP8, Requires<[HasXOP]>;
+
+// XOP 5 operand instruction (VEX encoding!)
+class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+ VEX_4V, Requires<[HasXOP]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi16 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii16<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32S <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii32S<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi64<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii64<o, F, outs, ins, asm, pattern>, REX_W;
+
+class RS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : S2I<o, F, outs, ins, asm, pattern>, REX_W;
+class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : VS2I<o, F, outs, ins, asm, pattern>, REX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI - MMX instructions with TB prefix.
+// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I - MMX / SSE2 instructions with PD prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
+// MMXID - MMX instructions with XD prefix.
+// MMXIS - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
+class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,Not64BitMode]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, PS, REX_W,
+ Requires<[HasMMX,In64BitMode]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, PD, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+/// suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+/// or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
+ string mnemonic, string args, list<dag> pattern>
+ : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+ opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+ f, outs, ins,
+ !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+
+ // Infer instruction prefixes from type info.
+ let OpSize = typeinfo.OpSize;
+ let hasREX_W = typeinfo.HasREX_W;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td
index cfeddbccccac..5289819119ce 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td
@@ -22,6 +22,9 @@ def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[In64BitMode]>;
+def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invept\t{$src2, $src1|$src1, $src2}", []>,
+ EVEX_NoCD8, T_MAP4XS, Requires<[In64BitMode]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
@@ -30,6 +33,9 @@ def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[In64BitMode]>;
+def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>,
+ EVEX_NoCD8, T_MAP4XS, Requires<[In64BitMode]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index c17b96cc9fdd..2e492fa9c5ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -41,7 +41,6 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
index e562748c98fe..3b370d8c3eb1 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
-#include <bitset>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
index c7fe0cce8634..591a76e6fd6c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
@@ -17,7 +17,6 @@
//===----------------------------------------------------------------------===//
//
#include "X86.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -185,9 +184,7 @@ Value *X86LowerAMXIntrinsics::createTileLoadStoreLoops(
Value *CurrentColZExt = B.CreateZExt(CurrentCol, Stride->getType());
Value *Offset =
B.CreateAdd(B.CreateMul(CurrentRowZExt, Stride), CurrentColZExt);
- unsigned AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
- Value *EltBasePtr = B.CreatePointerCast(Ptr, PointerType::get(EltTy, AS));
- Value *EltPtr = B.CreateGEP(EltTy, EltBasePtr, Offset);
+ Value *EltPtr = B.CreateGEP(EltTy, Ptr, Offset);
Value *Idx = B.CreateAdd(B.CreateMul(CurrentRow, B.getInt16(16)), CurrentCol);
if (IsTileLoad) {
// tileload.scalarize.rows.header:
@@ -495,7 +492,7 @@ X86LowerAMXIntrinsics::lowerTileDP(Instruction *TileDP) {
KDWord, C, A, B);
// we cannot assume there always be bitcast after tiledpbssd. So we need to
// insert one bitcast as required
- Builder.SetInsertPoint(End->getFirstNonPHI());
+ Builder.SetInsertPoint(End, End->getFirstNonPHIIt());
Value *ResAMX =
Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext()));
// Delete TileDP intrinsic and do some clean-up.
@@ -539,7 +536,7 @@ bool X86LowerAMXIntrinsics::lowerTileLoadStore(Instruction *TileLoadStore) {
if (IsTileLoad) {
// we cannot assume there always be bitcast after tileload. So we need to
// insert one bitcast as required
- Builder.SetInsertPoint(End->getFirstNonPHI());
+ Builder.SetInsertPoint(End, End->getFirstNonPHIIt());
Value *ResAMX =
Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext()));
// Delete tileloadd6 intrinsic and do some clean-up
@@ -646,7 +643,7 @@ public:
return false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
if (!F.hasFnAttribute(Attribute::OptimizeNone) &&
- TM->getOptLevel() != CodeGenOpt::None)
+ TM->getOptLevel() != CodeGenOptLevel::None)
return false;
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXType.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 0416f0f0d2ec..a57c0fe15788 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -244,8 +244,7 @@ void X86LowerAMXType::combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast) {
IRBuilder<> Builder(Bitcast);
// Use the maximun column as stride.
Value *Stride = Builder.getInt64(64);
- Value *I8Ptr =
- Builder.CreateBitCast(LD->getOperand(0), Builder.getInt8PtrTy());
+ Value *I8Ptr = LD->getOperand(0);
std::array<Value *, 4> Args = {Row, Col, I8Ptr, Stride};
Value *NewInst = Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal,
@@ -272,8 +271,7 @@ void X86LowerAMXType::combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST) {
// Use the maximum column as stride. It must be the same with load
// stride.
Value *Stride = Builder.getInt64(64);
- Value *I8Ptr =
- Builder.CreateBitCast(ST->getOperand(1), Builder.getInt8PtrTy());
+ Value *I8Ptr = ST->getOperand(1);
std::array<Value *, 5> Args = {Row, Col, I8Ptr, Stride, Tile};
Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, std::nullopt,
Args);
@@ -301,7 +299,7 @@ bool X86LowerAMXType::transformBitcast(BitCastInst *Bitcast) {
auto Prepare = [&](Type *MemTy) {
AllocaAddr = createAllocaInstAtEntry(Builder, Bitcast->getParent(), MemTy);
- I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getInt8PtrTy());
+ I8Ptr = AllocaAddr;
Stride = Builder.getInt64(64);
};
@@ -459,7 +457,7 @@ static Value *getAllocaPos(BasicBlock *BB) {
BasicBlock::iterator Iter = AllocaRes->getIterator();
++Iter;
Builder.SetInsertPoint(&*Iter);
- Value *I8Ptr = Builder.CreateBitCast(AllocaRes, Builder.getInt8PtrTy());
+ Value *I8Ptr = Builder.CreateBitCast(AllocaRes, Builder.getPtrTy());
return I8Ptr;
}
@@ -496,7 +494,7 @@ static void replaceWithTileLoad(Use &U, Value *Ptr, bool IsPHI = false) {
Value *Row = II->getOperand(0);
Value *Col = II->getOperand(1);
- Instruction *UserI = dyn_cast<Instruction>(U.getUser());
+ Instruction *UserI = cast<Instruction>(U.getUser());
IRBuilder<> Builder(UserI);
Value *Stride = Builder.getInt64(64);
std::array<Value *, 4> Args = {Row, Col, Ptr, Stride};
@@ -936,8 +934,7 @@ bool X86LowerAMXCast::combineCastStore(IntrinsicInst *Cast, StoreInst *ST) {
IRBuilder<> Builder(ST);
// Stride should be equal to col(measured by bytes)
Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
- Value *I8Ptr =
- Builder.CreateBitCast(ST->getOperand(1), Builder.getInt8PtrTy());
+ Value *I8Ptr = Builder.CreateBitCast(ST->getOperand(1), Builder.getPtrTy());
std::array<Value *, 5> Args = {Row, Col, I8Ptr, Stride, Tile};
Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, std::nullopt,
Args);
@@ -977,10 +974,10 @@ bool X86LowerAMXCast::combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) {
Builder.CreateStore(LD, AllocaAddr);
Builder.SetInsertPoint(Cast);
- I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getInt8PtrTy());
+ I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getPtrTy());
EraseLoad = false;
} else {
- I8Ptr = Builder.CreateBitCast(LD->getOperand(0), Builder.getInt8PtrTy());
+ I8Ptr = Builder.CreateBitCast(LD->getOperand(0), Builder.getPtrTy());
}
std::array<Value *, 4> Args = {Row, Col, I8Ptr, Stride};
@@ -1139,7 +1136,7 @@ bool X86LowerAMXCast::transformAMXCast(IntrinsicInst *AMXCast) {
auto Prepare = [&](Type *MemTy) {
AllocaAddr = createAllocaInstAtEntry(Builder, AMXCast->getParent(), MemTy);
- I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getInt8PtrTy());
+ I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getPtrTy());
Stride = Builder.getInt64(64);
};
@@ -1249,8 +1246,8 @@ public:
// Prepare for fast register allocation at O0.
// Todo: May better check the volatile model of AMX code, not just
- // by checking Attribute::OptimizeNone and CodeGenOpt::None.
- if (TM->getOptLevel() == CodeGenOpt::None) {
+ // by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
+ if (TM->getOptLevel() == CodeGenOptLevel::None) {
// If Front End not use O0 but the Mid/Back end use O0, (e.g.
// "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
// sure the amx data is volatile, that is nessary for AMX fast
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
index ecab0c7e6179..e1a67f61e766 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -24,7 +24,6 @@
#include "X86Subtarget.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -975,7 +974,7 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
if (MinSize == 2 && Subtarget->is32Bit() &&
Subtarget->isTargetWindowsMSVC() &&
(Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
- // For compatibility reasons, when targetting MSVC, is is important to
+ // For compatibility reasons, when targetting MSVC, it is important to
// generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
// rely specifically on this pattern to be able to patch a function.
// This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
@@ -1843,6 +1842,18 @@ static void addConstantComments(const MachineInstr *MI,
case X86::VMOVUPS##Suffix##rm: \
case X86::VMOVUPD##Suffix##rm:
+#define CASE_128_MOV_RM() \
+ MOV_CASE(, ) /* SSE */ \
+ MOV_CASE(V, ) /* AVX-128 */ \
+ MOV_AVX512_CASE(Z128)
+
+#define CASE_256_MOV_RM() \
+ MOV_CASE(V, Y) /* AVX-256 */ \
+ MOV_AVX512_CASE(Z256)
+
+#define CASE_512_MOV_RM() \
+ MOV_AVX512_CASE(Z)
+
#define CASE_ALL_MOV_RM() \
MOV_CASE(, ) /* SSE */ \
MOV_CASE(V, ) /* AVX-128 */ \
@@ -1854,8 +1865,8 @@ static void addConstantComments(const MachineInstr *MI,
// For loads from a constant pool to a vector register, print the constant
// loaded.
CASE_ALL_MOV_RM()
- case X86::VBROADCASTF128:
- case X86::VBROADCASTI128:
+ case X86::VBROADCASTF128rm:
+ case X86::VBROADCASTI128rm:
case X86::VBROADCASTF32X4Z256rm:
case X86::VBROADCASTF32X4rm:
case X86::VBROADCASTF32X8rm:
@@ -1872,22 +1883,28 @@ static void addConstantComments(const MachineInstr *MI,
"Unexpected number of operands!");
if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
int NumLanes = 1;
- // Override NumLanes for the broadcast instructions.
+ int BitWidth = 128;
+ int CstEltSize = C->getType()->getScalarSizeInBits();
+
+ // Get destination BitWidth + override NumLanes for the broadcasts.
switch (MI->getOpcode()) {
- case X86::VBROADCASTF128: NumLanes = 2; break;
- case X86::VBROADCASTI128: NumLanes = 2; break;
- case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
- case X86::VBROADCASTF32X4rm: NumLanes = 4; break;
- case X86::VBROADCASTF32X8rm: NumLanes = 2; break;
- case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
- case X86::VBROADCASTF64X2rm: NumLanes = 4; break;
- case X86::VBROADCASTF64X4rm: NumLanes = 2; break;
- case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
- case X86::VBROADCASTI32X4rm: NumLanes = 4; break;
- case X86::VBROADCASTI32X8rm: NumLanes = 2; break;
- case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
- case X86::VBROADCASTI64X2rm: NumLanes = 4; break;
- case X86::VBROADCASTI64X4rm: NumLanes = 2; break;
+ CASE_128_MOV_RM() NumLanes = 1; BitWidth = 128; break;
+ CASE_256_MOV_RM() NumLanes = 1; BitWidth = 256; break;
+ CASE_512_MOV_RM() NumLanes = 1; BitWidth = 512; break;
+ case X86::VBROADCASTF128rm: NumLanes = 2; BitWidth = 128; break;
+ case X86::VBROADCASTI128rm: NumLanes = 2; BitWidth = 128; break;
+ case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; BitWidth = 128; break;
+ case X86::VBROADCASTF32X4rm: NumLanes = 4; BitWidth = 128; break;
+ case X86::VBROADCASTF32X8rm: NumLanes = 2; BitWidth = 256; break;
+ case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; BitWidth = 128; break;
+ case X86::VBROADCASTF64X2rm: NumLanes = 4; BitWidth = 128; break;
+ case X86::VBROADCASTF64X4rm: NumLanes = 2; BitWidth = 256; break;
+ case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; BitWidth = 128; break;
+ case X86::VBROADCASTI32X4rm: NumLanes = 4; BitWidth = 128; break;
+ case X86::VBROADCASTI32X8rm: NumLanes = 2; BitWidth = 256; break;
+ case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; BitWidth = 128; break;
+ case X86::VBROADCASTI64X2rm: NumLanes = 4; BitWidth = 128; break;
+ case X86::VBROADCASTI64X4rm: NumLanes = 2; BitWidth = 256; break;
}
std::string Comment;
@@ -1895,10 +1912,12 @@ static void addConstantComments(const MachineInstr *MI,
const MachineOperand &DstOp = MI->getOperand(0);
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ int NumElements = CDS->getNumElements();
+ if ((BitWidth % CstEltSize) == 0)
+ NumElements = std::min<int>(NumElements, BitWidth / CstEltSize);
CS << "[";
for (int l = 0; l != NumLanes; ++l) {
- for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
- ++i) {
+ for (int i = 0; i < NumElements; ++i) {
if (i != 0 || l != 0)
CS << ",";
if (CDS->getElementType()->isIntegerTy())
@@ -1914,10 +1933,12 @@ static void addConstantComments(const MachineInstr *MI,
CS << "]";
OutStreamer.AddComment(CS.str());
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ int NumOperands = CV->getNumOperands();
+ if ((BitWidth % CstEltSize) == 0)
+ NumOperands = std::min<int>(NumOperands, BitWidth / CstEltSize);
CS << "<";
for (int l = 0; l != NumLanes; ++l) {
- for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
- ++i) {
+ for (int i = 0; i < NumOperands; ++i) {
if (i != 0 || l != 0)
CS << ",";
printConstant(CV->getOperand(i),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 9b2cc35c57e0..f6e853270e07 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include <set>
namespace llvm {
@@ -117,6 +118,12 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// determine if we should insert tilerelease in frame lowering.
bool HasVirtualTileReg = false;
+ /// Ajust stack for push2/pop2
+ bool PadForPush2Pop2 = false;
+
+ /// Candidate registers for push2/pop2
+ std::set<Register> CandidatesForPush2Pop2;
+
/// True if this function has CFI directives that adjust the CFA.
/// This is used to determine if we should direct the debugger to use
/// the CFA instead of the stack pointer.
@@ -165,7 +172,9 @@ public:
const DenseMap<int, unsigned>& getWinEHXMMSlotInfo() const {
return WinEHXMMSlotInfo; }
- unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ unsigned getCalleeSavedFrameSize() const {
+ return CalleeSavedFrameSize + 8 * padForPush2Pop2();
+ }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
@@ -232,6 +241,19 @@ public:
bool hasVirtualTileReg() const { return HasVirtualTileReg; }
void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; }
+ bool padForPush2Pop2() const { return PadForPush2Pop2; }
+ void setPadForPush2Pop2(bool V) { PadForPush2Pop2 = V; }
+
+ bool isCandidateForPush2Pop2(Register Reg) const {
+ return CandidatesForPush2Pop2.find(Reg) != CandidatesForPush2Pop2.end();
+ }
+ void addCandidateForPush2Pop2(Register Reg) {
+ CandidatesForPush2Pop2.insert(Reg);
+ }
+ size_t getNumCandidatesForPush2Pop2() const {
+ return CandidatesForPush2Pop2.size();
+ }
+
bool hasCFIAdjustCfa() const { return HasCFIAdjustCfa; }
void setHasCFIAdjustCfa(bool v) { HasCFIAdjustCfa = v; }
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
index aa6e8645e092..82667b8cdbdb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -67,8 +67,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
namespace llvm {
-std::unique_ptr<ScheduleDAGMutation>
-createX86MacroFusionDAGMutation () {
+std::unique_ptr<ScheduleDAGMutation> createX86MacroFusionDAGMutation() {
return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PreAMXConfig.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86PreAMXConfig.cpp
deleted file mode 100644
index c9c59af8d6d7..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86PreAMXConfig.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-//===- Target/X86/X86PreAMXConfig.cpp - ------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// Insert tilecfg for each area of key AMX intrinsic.
-/// All the key AMX intrinsic's tile operand must come from tileload. And the
-/// def tile of key AMX intrinsic must be tilestored.
-/// take tdpbssd for example:
-/// --------------------------------------------------------------------------
-/// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(...) key
-/// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(...) |
-/// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(...) amx
-/// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(t1, t2, t3) |
-/// call void @llvm.x86.tilestored64.internal(... td) area
-/// --------------------------------------------------------------------------
-/// This pass will insert tilecfg before every key-amx-area, some like:
-/// --------------------------------------------------------------------------
-/// %cfgmem = alloca <16 x i32>, align 4 * allocate mem
-/// store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init
-/// ...
-/// ... pre-config shape of %t1 *
-/// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 *
-/// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config
-/// ... *
-/// ... pre-config shape of %t2 * shapes
-/// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 *
-/// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 *
-/// ...
-/// call void @llvm.x86.ldtilecfg(i8* %cfgmem) * tile config
-//
-//===----------------------------------------------------------------------===//
-//
-#include "X86.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IntrinsicsX86.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-using namespace PatternMatch;
-
-#define DEBUG_TYPE "pre-amx-config"
-
-static bool isAMXIntrinsic(IntrinsicInst *II) {
- for (Value *Operand : II->operands())
- if (Operand->getType()->isX86_AMXTy())
- return true;
- return II->getType()->isX86_AMXTy();
-}
-
-static bool isTileLoad(IntrinsicInst *II) {
- return II->getIntrinsicID() == Intrinsic::x86_tileloadd64_internal ||
- II->getIntrinsicID() == Intrinsic::x86_tileloaddt164_internal;
-}
-
-static bool isTileStore(IntrinsicInst *II) {
- return II->getIntrinsicID() == Intrinsic::x86_tilestored64_internal;
-}
-
-#ifndef NDEBUG
-static bool onlyTileDef(IntrinsicInst *II) {
- for (Value *Operand : II->operands())
- if (Operand->getType()->isX86_AMXTy())
- return false;
- return II->getType()->isX86_AMXTy();
-}
-
-static bool brokenVolatile(Instruction *I) {
- // Todo: it is weak to identify a normal call here.
- if ((isa<CallInst>(I) && !isa<IntrinsicInst>(I)) || I->isTerminator())
- return true;
- return false;
-}
-#endif
-
-namespace {
-class X86PreAMXConfig {
- using PosAndShapesMap = MapVector<Instruction *, SmallVector<Value *, 8>>;
-
- Function &F;
-
-public:
- X86PreAMXConfig(Function &Func) : F(Func) {}
- bool preTileConfig();
- void addTileConfig(Instruction *ModelStart, SmallVector<Value *, 8> &Shapes);
- bool findConfigShapes(PosAndShapesMap &PosAndShapes);
- bool getKeyAMXShapes(IntrinsicInst *KeyAMX, SmallVector<Value *, 8> &Shapes);
- void preWriteTileCfg(Value *I8Ptr, IRBuilderBase &Builder,
- SmallVector<Value *, 8> &Shapes);
- BasicBlock::iterator
- getShapesAndConfigPosEnd(BasicBlock::iterator Iter,
- SmallVector<Value *, 8> &Shapes);
- bool checkVolatileModel(SmallSet<Value *, 4> &Loads, IntrinsicInst *Store,
- IntrinsicInst *KeyAMX);
-};
-
-// Orderly write the shapes in tilecfg's mem. This maybe not right.
-// Because the first shape may not corresponding to the first tmm register,
-// so we need to handle at at X86FastTileConfig::materializeTileCfg()
-// after register allocation.
-// For example:
-// --------------------------------------------------------------------------
-// zeroinitialize tilecfg's mem (of ldtilecfg)
-// --------------------------------------------------------------------------
-// ... pre-config shape of %t1 *
-// %amx.tmm.0.shape.row = getelementptr i8, i8* %mem, i64 48 *
-// %amx.tmm.0.shape.col = getelementptr i16, i16* %mem, i64 16 *
-// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 *
-// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config
-// ... *
-// ... pre-config shape of %t2 *
-// %amx.tmm.1.shape.row = getelementptr i8, i8* %mem, i64 49 *
-// %amx.tmm.1.shape.col = getelementptr i16, i16* %mem, i64 18 *
-// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes
-// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 *
-// ... *
-// ... pre-config shape of %t3 * of
-// %amx.tmm.2.shape.row = getelementptr i8, i8* %mem, i64 50 *
-// %amx.tmm.2.shape.col = getelementptr i16, i16* %mem, i64 20 *
-// store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 *
-// store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 *
-// ... * tiles
-// ... pre-config shape of %td *
-// %amx.tmm.3.shape.row = getelementptr i8, i8* %mem, i64 51 *
-// %amx.tmm.3.shape.col = getelementptr i16, i16* %mem, i64 22 *
-// store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 *
-// store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 *
-// --------------------------------------------------------------------------
-// call void @llvm.x86.ldtilecfg(i8* %mem) * tile config
-// --------------------------------------------------------------------------
-// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key
-// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...)
-// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx
-// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3)
-// call void @llvm.x86.tilestored64.internal(... td) area
-// --------------------------------------------------------------------------
-void X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, IRBuilderBase &Builder,
- SmallVector<Value *, 8> &Shapes) {
- LLVMContext &Ctx = Builder.getContext();
- Type *I8Ty = Type::getInt8Ty(Ctx);
- Type *I16Ty = Type::getInt16Ty(Ctx);
-
- // TODO: Currently we defaultly set Palette = 1, it may be assigned to
- // other value in the future.
- Value *PaletteOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 0);
- Value *PaletteValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
- Value *PalettePos = Builder.CreateGEP(I8Ty, I8Ptr, PaletteOffset);
- Builder.CreateStore(PaletteValue, PalettePos);
-
- for (int I = 0, E = Shapes.size() / 2; I < E; I++) {
- Value *RowOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 48 + I);
- Value *ColOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 16 + I * 2);
- const std::string ShapeName = "amx.tmm." + itostr(I);
- Value *RowPos = Builder.CreateGEP(I8Ty, I8Ptr, RowOffset,
- ShapeName + ".shape.row");
- Value *ColPos = Builder.CreateGEP(I8Ty, I8Ptr, ColOffset);
- ColPos = Builder.CreateBitCast(ColPos, PointerType::get(I16Ty, 0),
- ShapeName + ".shape.col");
- Value *Row = Shapes[I * 2];
- Value *Col = Shapes[I * 2 + 1];
- Row = Builder.CreateTrunc(Row, I8Ty);
- Builder.CreateStore(Row, RowPos);
- Builder.CreateStore(Col, ColPos);
- }
-}
-
-void X86PreAMXConfig::addTileConfig(Instruction *ModelStart,
- SmallVector<Value *, 8> &Shapes) {
- Module *M = F.getParent();
- IRBuilder<> Builder(ModelStart);
- const DataLayout &DL = M->getDataLayout();
- unsigned AddrSpace = DL.getAllocaAddrSpace();
- LLVMContext &Ctx = Builder.getContext();
- Type *V512Ty = VectorType::get(Builder.getInt32Ty(), 16, false);
- Align Alignment = DL.getPrefTypeAlign(Type::getInt32Ty(Ctx));
-
- AllocaInst *Addr =
- new AllocaInst(V512Ty, AddrSpace, "", &F.getEntryBlock().front());
- Addr->setAlignment(Alignment);
- Value *I8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy());
-
- Builder.CreateAlignedStore(Constant::getNullValue(V512Ty), Addr, Alignment);
-
- preWriteTileCfg(I8Ptr, Builder, Shapes);
-
- Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, std::nullopt,
- {I8Ptr});
-}
-
-// Todo: We may need to handle "more than one store" case in the future.
-bool X86PreAMXConfig::checkVolatileModel(SmallSet<Value *, 4> &Loads,
- IntrinsicInst *Store,
- IntrinsicInst *KeyAMX) {
- Value *ST = Store->getOperand(4);
-
- // Only has tileload and tilestore.
- if (!KeyAMX)
- return (Loads.size() == 1) && Loads.contains(ST);
-
- // All Loads should be operands of KeyAMX.
- // All tile operands of KeyAMX should come from Loads.
- for (Value *Op : KeyAMX->operands()) {
- if (Op->getType()->isX86_AMXTy())
- if (!Loads.erase(Op))
- return false;
- }
-
- // The def of KeyAMX should be stored into mem.
- // Todo: is it key amx can be no def?
- return Loads.empty() && (ST == cast<Value>(KeyAMX));
-}
-
-bool X86PreAMXConfig::getKeyAMXShapes(IntrinsicInst *KeyAMX,
- SmallVector<Value *, 8> &Shapes) {
- for (unsigned I = 0; I < KeyAMX->getNumOperands(); I++) {
- Value *Op = KeyAMX->getOperand(I);
- if (!Op->getType()->isX86_AMXTy())
- continue;
- IntrinsicInst *TileDef = dyn_cast<IntrinsicInst>(Op);
- assert((TileDef && isTileLoad(TileDef)) &&
- "All KeyAMX's tile definiation should comes from TileLoad!");
- Shapes.push_back(TileDef->getOperand(0));
- Shapes.push_back(TileDef->getOperand(1));
- }
- if (!isTileStore(KeyAMX)) {
- Shapes.push_back(KeyAMX->getOperand(0));
- Shapes.push_back(KeyAMX->getOperand(1));
- }
- return Shapes.size() != 0;
-}
-
-// Collect the shapes and skip the area of current key amx intrinsic.
-//
-// For example:
-// ...
-// --------------------------------------------------------------------------
-// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) record (m,k)
-// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) record (m,k)
-// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) record (m,k)
-// %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3)
-// call void @llvm.x86.tilestored64.internal(m, n,... td) <--PosEnd record (m,k)
-// --------------------------------------------------------------------------
-BasicBlock::iterator
-X86PreAMXConfig::getShapesAndConfigPosEnd(BasicBlock::iterator Iter,
- SmallVector<Value *, 8> &Shapes) {
- IntrinsicInst *KeyAMX = nullptr;
- BasicBlock *BB = Iter->getParent();
- BasicBlock::iterator PosEnd = BB->end();
- SmallSet<Value *, 4> Loads;
-
- // See TileStore as "Config Position End" and check volatile model.
- for (auto I = Iter, E = BB->end(); I != E; ++I) {
- assert(!brokenVolatile(&*I) && "Not reach tile store!");
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I);
- if (!II || !isAMXIntrinsic(II))
- continue;
-
- if (isTileLoad(II)) {
- Loads.insert(II);
- } else if (isTileStore(II)) {
- if (!checkVolatileModel(Loads, II, KeyAMX))
- report_fatal_error("Not Volatile AMX Model!");
- PosEnd = I;
- break;
- } else {
- assert(!KeyAMX && "Too many key amx intrinsic!");
- KeyAMX = II;
- }
- }
- assert(PosEnd != BB->end() && "Not find TileStore!");
-
- // See KeyAMX as TileStore if only TileLoad and TileStore.
- if (!KeyAMX)
- KeyAMX = dyn_cast<IntrinsicInst>(&*PosEnd);
-
- // Get Shapes in order.
- assert(Shapes.empty() && "Shapes should be clean.");
- getKeyAMXShapes(KeyAMX, Shapes);
-
- return PosEnd;
-}
-
-// Record a key amx area's shapes with its position.
-// Use the first tileload as its position.
-// For example:
-// ...
-// --------------------------------------------------------------------------
-// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) <-- pos
-// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) /
-// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) shapes:
-// %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3) (m,k)(k,n)
-// call void @llvm.x86.tilestored64.internal(m, n,... td) (m,n)(m,n)
-// --------------------------------------------------------------------------
-bool X86PreAMXConfig::findConfigShapes(PosAndShapesMap &PosAndShapes) {
- bool Find = false;
- for (BasicBlock &BB : F) {
- for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I);
- if (!II)
- continue;
- if (!isAMXIntrinsic(II))
- continue;
- assert(onlyTileDef(II) && "Not volatile model for AMX at O0!");
-
- I = getShapesAndConfigPosEnd(I, PosAndShapes[&*I]);
- Find = true;
- }
- }
- return Find;
-}
-
-// Insert ldtilecfg and preconfig the shapes for each area of key AMX intrinsic.
-// e.g. (key amx = tdpbssd)
-// --------------------------------------------------------------------------
-// %cfgmem = alloca <16 x i32>, align 4 * allocate mem
-// store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init
-// ...
-// ... pre-config shape of %t1 *
-// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 *
-// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config
-// ... *
-// ... pre-config shape of %t2 *
-// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes
-// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 *
-// ... *
-// ... pre-config shape of %t3 * of
-// store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 *
-// store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 *
-// ... * tiles
-// ... pre-config shape of %td *
-// store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 *
-// store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 *
-//
-// call void @llvm.x86.ldtilecfg(i8* %cfgmem) * pre-config
-// --------------------------------------------------------------------------
-// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key
-// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...)
-// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx
-// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3)
-// call void @llvm.x86.tilestored64.internal(... td) area
-// --------------------------------------------------------------------------
-bool X86PreAMXConfig::preTileConfig() {
- PosAndShapesMap PosAndShapes;
- bool NeedCfg = findConfigShapes(PosAndShapes);
- if (!NeedCfg)
- return false;
- for (auto &IPAndShapes : PosAndShapes)
- addTileConfig(IPAndShapes.first, IPAndShapes.second);
-
- return true;
-}
-} // anonymous namespace
-
-namespace {
-
-class X86PreAMXConfigPass : public FunctionPass {
-public:
- static char ID;
-
- X86PreAMXConfigPass() : FunctionPass(ID) {
- initializeX86PreAMXConfigPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
- bool C = false;
-
- // Prepare for fast register allocation at O0.
- if (TM->getOptLevel() == CodeGenOpt::None) {
-
- // We pre-config each key AMX intrinsic at O0.
- // In theory, one tile config can cover several AMX intrinsics, but
- // it is very diffcult to classify the tile shapes at O0. So here we
- // let thing be easy, pre-config every key AMX intrinsic.
- X86PreAMXConfig PCFG(F);
- C = PCFG.preTileConfig();
- }
-
- return C;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<TargetPassConfig>();
- }
-};
-
-} // anonymous namespace
-
-static const char PassName[] = "Pre AMX Tile Config";
-char X86PreAMXConfigPass::ID = 0;
-INITIALIZE_PASS_BEGIN(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)
-
-FunctionPass *llvm::createX86PreAMXConfigPass() {
- return new X86PreAMXConfigPass();
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp
index a382db493fd4..75ad58e5cdcb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -28,6 +28,7 @@
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
index bd29e9317ca5..e76d0d7bf50e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -158,6 +158,10 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case X86::GR16RegClassID:
case X86::GR32RegClassID:
case X86::GR64RegClassID:
+ case X86::GR8_NOREX2RegClassID:
+ case X86::GR16_NOREX2RegClassID:
+ case X86::GR32_NOREX2RegClassID:
+ case X86::GR64_NOREX2RegClassID:
case X86::RFP32RegClassID:
case X86::RFP64RegClassID:
case X86::RFP80RegClassID:
@@ -306,7 +310,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_AllRegs_AVX_SaveList;
return CSR_64_AllRegs_SaveList;
case CallingConv::PreserveMost:
- return CSR_64_RT_MostRegs_SaveList;
+ return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
+ : CSR_64_RT_MostRegs_SaveList;
case CallingConv::PreserveAll:
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_SaveList;
@@ -427,7 +432,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_AllRegs_AVX_RegMask;
return CSR_64_AllRegs_RegMask;
case CallingConv::PreserveMost:
- return CSR_64_RT_MostRegs_RegMask;
+ return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
case CallingConv::PreserveAll:
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_RegMask;
@@ -604,18 +609,59 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
- for (unsigned n = 16; n != 32; ++n) {
- for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
+ for (unsigned n = 0; n != 16; ++n) {
+ for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
+ ++AI)
Reserved.set(*AI);
}
}
+ // Reserve the extended general purpose registers.
+ if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
+ Reserved.set(X86::R16, X86::R31WH + 1);
+
+ if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
+ for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
+ for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
+ }
+
assert(checkAllSuperRegsMarked(Reserved,
{X86::SIL, X86::DIL, X86::BPL, X86::SPL,
X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
return Reserved;
}
+unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
+ // All existing Intel CPUs that support AMX support AVX512 and all existing
+ // Intel CPUs that support APX support AMX. AVX512 implies AVX.
+ //
+ // We enumerate the registers in X86GenRegisterInfo.inc in this order:
+ //
+ // Registers before AVX512,
+ // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
+ // AMX registers (TMM)
+ // APX registers (R16-R31)
+ //
+ // and try to return the minimum number of registers supported by the target.
+ assert((X86::R15WH + 1 == X86 ::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
+ (X86::K6_K7 + 1 == X86::TMMCFG) && (X86::TMM7 + 1 == X86::R16) &&
+ (X86::R31WH + 1 == X86::NUM_TARGET_REGS) &&
+ "Register number may be incorrect");
+
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ if (ST.hasEGPR())
+ return X86::NUM_TARGET_REGS;
+ if (ST.hasAMXTILE())
+ return X86::TMM7 + 1;
+ if (ST.hasAVX512())
+ return X86::K6_K7 + 1;
+ if (ST.hasAVX())
+ return X86::YMM15 + 1;
+ return X86::R15WH + 1;
+}
+
bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
MCRegister Reg) const {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
@@ -1030,7 +1076,8 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
VirtReg, Order, Hints, MF, VRM, Matrix);
- if (RC.getID() != X86::TILERegClassID)
+ unsigned ID = RC.getID();
+ if (ID != X86::TILERegClassID)
return BaseImplRetVal;
ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h
index da7b171e4cf6..7296a5f021e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -51,6 +51,9 @@ private:
public:
explicit X86RegisterInfo(const Triple &TT);
+ /// Return the number of registers for the function.
+ unsigned getNumSupportedRegs(const MachineFunction &MF) const override;
+
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
@@ -143,6 +146,12 @@ public:
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
+ /// Process frame indices in forwards block order because
+ /// X86InstrInfo::getSPAdjust relies on it when searching for the
+ /// ADJCALLSTACKUP pseudo following a call.
+ /// TODO: Fix this and return true like all other targets.
+ bool eliminateFrameIndicesBackwards() const override { return false; }
+
/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
/// when it reaches the "return" instruction. We can then pop a stack object
/// to this register without worry about clobbering it.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
index 1e6477e658b9..166024bf3b53 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -73,6 +73,44 @@ def R12B : X86Reg<"r12b", 12>;
def R13B : X86Reg<"r13b", 13>;
def R14B : X86Reg<"r14b", 14>;
def R15B : X86Reg<"r15b", 15>;
+// RAGreedy prefers to select a cheaper register
+// For x86,
+// Cost(caller-save reg) < Cost(callee-save reg)
+// b/c callee-save register needs push/pop in prolog/epilog.
+// If both registers are callee-saved or caller-saved,
+// Cost(short-encoding reg) < Cost(long-encoding reg)
+//
+// To achieve this, we do the following things:
+// 1. Set CostPerUse=1 for registers that need prefix
+// 2. Consider callee-save register is never cheaper than a register w/ cost 1
+// 3. List caller-save register before callee-save regsiter in RegisterClass
+// or AllocationOrder
+//
+// NOTE:
+// D133902 stopped assigning register costs for R8-R15, which brought gain
+// and regression. We don't know if we should assign cost to R16-R31 w/o
+// performance data.
+// TODO:
+// Update the comment/cost after tuning.
+// APX only, requires REX2 or EVEX.
+let PositionOrder = 4 in {
+def R16B : X86Reg<"r16b", 16>;
+def R17B : X86Reg<"r17b", 17>;
+def R18B : X86Reg<"r18b", 18>;
+def R19B : X86Reg<"r19b", 19>;
+def R20B : X86Reg<"r20b", 20>;
+def R21B : X86Reg<"r21b", 21>;
+def R22B : X86Reg<"r22b", 22>;
+def R23B : X86Reg<"r23b", 23>;
+def R24B : X86Reg<"r24b", 24>;
+def R25B : X86Reg<"r25b", 25>;
+def R26B : X86Reg<"r26b", 26>;
+def R27B : X86Reg<"r27b", 27>;
+def R28B : X86Reg<"r28b", 28>;
+def R29B : X86Reg<"r29b", 29>;
+def R30B : X86Reg<"r30b", 30>;
+def R31B : X86Reg<"r31b", 31>;
+}
let isArtificial = 1 in {
// High byte of the low 16 bits of the super-register:
@@ -88,6 +126,24 @@ def R12BH : X86Reg<"", -1>;
def R13BH : X86Reg<"", -1>;
def R14BH : X86Reg<"", -1>;
def R15BH : X86Reg<"", -1>;
+let PositionOrder = 4 in {
+def R16BH : X86Reg<"", -1>;
+def R17BH : X86Reg<"", -1>;
+def R18BH : X86Reg<"", -1>;
+def R19BH : X86Reg<"", -1>;
+def R20BH : X86Reg<"", -1>;
+def R21BH : X86Reg<"", -1>;
+def R22BH : X86Reg<"", -1>;
+def R23BH : X86Reg<"", -1>;
+def R24BH : X86Reg<"", -1>;
+def R25BH : X86Reg<"", -1>;
+def R26BH : X86Reg<"", -1>;
+def R27BH : X86Reg<"", -1>;
+def R28BH : X86Reg<"", -1>;
+def R29BH : X86Reg<"", -1>;
+def R30BH : X86Reg<"", -1>;
+def R31BH : X86Reg<"", -1>;
+}
// High word of the low 32 bits of the super-register:
def HAX : X86Reg<"", -1>;
def HDX : X86Reg<"", -1>;
@@ -106,6 +162,24 @@ def R12WH : X86Reg<"", -1>;
def R13WH : X86Reg<"", -1>;
def R14WH : X86Reg<"", -1>;
def R15WH : X86Reg<"", -1>;
+let PositionOrder = 4 in {
+def R16WH : X86Reg<"", -1>;
+def R17WH : X86Reg<"", -1>;
+def R18WH : X86Reg<"", -1>;
+def R19WH : X86Reg<"", -1>;
+def R20WH : X86Reg<"", -1>;
+def R21WH : X86Reg<"", -1>;
+def R22WH : X86Reg<"", -1>;
+def R23WH : X86Reg<"", -1>;
+def R24WH : X86Reg<"", -1>;
+def R25WH : X86Reg<"", -1>;
+def R26WH : X86Reg<"", -1>;
+def R27WH : X86Reg<"", -1>;
+def R28WH : X86Reg<"", -1>;
+def R29WH : X86Reg<"", -1>;
+def R30WH : X86Reg<"", -1>;
+def R31WH : X86Reg<"", -1>;
+}
}
// 16-bit registers
@@ -134,6 +208,27 @@ def R13W : X86Reg<"r13w", 13, [R13B,R13BH]>;
def R14W : X86Reg<"r14w", 14, [R14B,R14BH]>;
def R15W : X86Reg<"r15w", 15, [R15B,R15BH]>;
}
+// APX only, requires REX2 or EVEX.
+let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CoveredBySubRegs = 1 in {
+let PositionOrder = 4 in {
+def R16W : X86Reg<"r16w", 16, [R16B,R16BH]>;
+def R17W : X86Reg<"r17w", 17, [R17B,R17BH]>;
+def R18W : X86Reg<"r18w", 18, [R18B,R18BH]>;
+def R19W : X86Reg<"r19w", 19, [R19B,R19BH]>;
+def R20W : X86Reg<"r20w", 20, [R20B,R20BH]>;
+def R21W : X86Reg<"r21w", 21, [R21B,R21BH]>;
+def R22W : X86Reg<"r22w", 22, [R22B,R22BH]>;
+def R23W : X86Reg<"r23w", 23, [R23B,R23BH]>;
+def R24W : X86Reg<"r24w", 24, [R24B,R24BH]>;
+def R25W : X86Reg<"r25w", 25, [R25B,R25BH]>;
+def R26W : X86Reg<"r26w", 26, [R26B,R26BH]>;
+def R27W : X86Reg<"r27w", 27, [R27B,R27BH]>;
+def R28W : X86Reg<"r28w", 28, [R28B,R28BH]>;
+def R29W : X86Reg<"r29w", 29, [R29B,R29BH]>;
+def R30W : X86Reg<"r30w", 30, [R30B,R30BH]>;
+def R31W : X86Reg<"r31w", 31, [R31B,R31BH]>;
+}
+}
// 32-bit registers
let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
@@ -160,6 +255,27 @@ def R14D : X86Reg<"r14d", 14, [R14W,R14WH]>;
def R15D : X86Reg<"r15d", 15, [R15W,R15WH]>;
}
+// APX only, requires REX2 or EVEX.
+let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
+let PositionOrder = 4 in {
+def R16D : X86Reg<"r16d", 16, [R16W,R16WH]>;
+def R17D : X86Reg<"r17d", 17, [R17W,R17WH]>;
+def R18D : X86Reg<"r18d", 18, [R18W,R18WH]>;
+def R19D : X86Reg<"r19d", 19, [R19W,R19WH]>;
+def R20D : X86Reg<"r20d", 20, [R20W,R20WH]>;
+def R21D : X86Reg<"r21d", 21, [R21W,R21WH]>;
+def R22D : X86Reg<"r22d", 22, [R22W,R22WH]>;
+def R23D : X86Reg<"r23d", 23, [R23W,R23WH]>;
+def R24D : X86Reg<"r24d", 24, [R24W,R24WH]>;
+def R25D : X86Reg<"r25d", 25, [R25W,R25WH]>;
+def R26D : X86Reg<"r26d", 26, [R26W,R26WH]>;
+def R27D : X86Reg<"r27d", 27, [R27W,R27WH]>;
+def R28D : X86Reg<"r28d", 28, [R28W,R28WH]>;
+def R29D : X86Reg<"r29d", 29, [R29W,R29WH]>;
+def R30D : X86Reg<"r30d", 30, [R30W,R30WH]>;
+def R31D : X86Reg<"r31d", 31, [R31W,R31WH]>;
+}
+}
// 64-bit registers, X86-64 only
let SubRegIndices = [sub_32bit] in {
def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
@@ -181,6 +297,25 @@ def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>;
+// APX only, requires REX2 or EVEX.
+let PositionOrder = 4 in {
+def R16 : X86Reg<"r16", 16, [R16D]>, DwarfRegNum<[130, -2, -2]>;
+def R17 : X86Reg<"r17", 17, [R17D]>, DwarfRegNum<[131, -2, -2]>;
+def R18 : X86Reg<"r18", 18, [R18D]>, DwarfRegNum<[132, -2, -2]>;
+def R19 : X86Reg<"r19", 19, [R19D]>, DwarfRegNum<[133, -2, -2]>;
+def R20 : X86Reg<"r20", 20, [R20D]>, DwarfRegNum<[134, -2, -2]>;
+def R21 : X86Reg<"r21", 21, [R21D]>, DwarfRegNum<[135, -2, -2]>;
+def R22 : X86Reg<"r22", 22, [R22D]>, DwarfRegNum<[136, -2, -2]>;
+def R23 : X86Reg<"r23", 23, [R23D]>, DwarfRegNum<[137, -2, -2]>;
+def R24 : X86Reg<"r24", 24, [R24D]>, DwarfRegNum<[138, -2, -2]>;
+def R25 : X86Reg<"r25", 25, [R25D]>, DwarfRegNum<[139, -2, -2]>;
+def R26 : X86Reg<"r26", 26, [R26D]>, DwarfRegNum<[140, -2, -2]>;
+def R27 : X86Reg<"r27", 27, [R27D]>, DwarfRegNum<[141, -2, -2]>;
+def R28 : X86Reg<"r28", 28, [R28D]>, DwarfRegNum<[142, -2, -2]>;
+def R29 : X86Reg<"r29", 29, [R29D]>, DwarfRegNum<[143, -2, -2]>;
+def R30 : X86Reg<"r30", 30, [R30D]>, DwarfRegNum<[144, -2, -2]>;
+def R31 : X86Reg<"r31", 31, [R31D]>, DwarfRegNum<[145, -2, -2]>;
+}
}
// MMX Registers. These are actually aliased to ST0 .. ST7
@@ -223,6 +358,8 @@ def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
+let PositionOrder = 2 in {
+// XMM16-31 registers, used by AVX-512 instructions.
def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[67, -2, -2]>;
def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[68, -2, -2]>;
def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[69, -2, -2]>;
@@ -239,27 +376,51 @@ def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[79, -2, -2]>;
def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[80, -2, -2]>;
def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[81, -2, -2]>;
def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[82, -2, -2]>;
+}
// YMM0-15 registers, used by AVX instructions and
// YMM16-31 registers, used by AVX-512 instructions.
-let SubRegIndices = [sub_xmm] in {
- foreach Index = 0-31 in {
+let SubRegIndices = [sub_xmm], PositionOrder = 1 in {
+ foreach Index = 0-15 in {
def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast<X86Reg>("XMM"#Index)]>,
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
}
}
+let SubRegIndices = [sub_xmm], PositionOrder = 2 in {
+ foreach Index = 16-31 in {
+ def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast<X86Reg>("XMM"#Index)]>,
+ DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
+ }
+}
+
// ZMM Registers, used by AVX-512 instructions.
-let SubRegIndices = [sub_ymm] in {
+let SubRegIndices = [sub_ymm], PositionOrder = 2 in {
foreach Index = 0-31 in {
def ZMM#Index : X86Reg<"zmm"#Index, Index, [!cast<X86Reg>("YMM"#Index)]>,
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
}
}
+let PositionOrder = 2 in {
+// Mask Registers, used by AVX-512 instructions.
+def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, 93, 93]>;
+def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, 94, 94]>;
+def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, 95, 95]>;
+def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, 96, 96]>;
+def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, 97, 97]>;
+def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, 98, 98]>;
+def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, 99, 99]>;
+def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
+// Mask register pairs
+def KPAIRS : RegisterTuples<[sub_mask_0, sub_mask_1],
+ [(add K0, K2, K4, K6), (add K1, K3, K5, K7)]>;
+}
+
+// TMM registers, used by AMX instructions.
+let PositionOrder = 3 in {
// Tile config registers.
def TMMCFG: X86Reg<"tmmcfg", 0>;
-
// Tile "registers".
def TMM0: X86Reg<"tmm0", 0>;
def TMM1: X86Reg<"tmm1", 1>;
@@ -269,16 +430,7 @@ def TMM4: X86Reg<"tmm4", 4>;
def TMM5: X86Reg<"tmm5", 5>;
def TMM6: X86Reg<"tmm6", 6>;
def TMM7: X86Reg<"tmm7", 7>;
-
-// Mask Registers, used by AVX-512 instructions.
-def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, 93, 93]>;
-def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, 94, 94]>;
-def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, 95, 95]>;
-def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, 96, 96]>;
-def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, 97, 97]>;
-def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, 98, 98]>;
-def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, 99, 99]>;
-def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
+}
// Floating point stack registers. These don't map one-to-one to the FP
// pseudo registers, but we still mark them as aliasing FP registers. That
@@ -390,9 +542,11 @@ def SSP : X86Reg<"ssp", 0>;
// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
// cannot be encoded.
-def GR8 : RegisterClass<"X86", [i8], 8,
+def GR8 : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
- R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+ R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R20B,
+ R21B, R22B, R23B, R24B, R25B, R26B, R27B, R28B, R29B,
+ R30B, R31B, R14B, R15B, R12B, R13B)> {
let AltOrders = [(sub GR8, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
@@ -400,23 +554,28 @@ def GR8 : RegisterClass<"X86", [i8], 8,
}
let isAllocatable = 0 in
-def GRH8 : RegisterClass<"X86", [i8], 8,
+def GRH8 : RegisterClass<"X86", [i8], 8,
(add SIH, DIH, BPH, SPH, R8BH, R9BH, R10BH, R11BH,
- R12BH, R13BH, R14BH, R15BH)>;
-
+ R12BH, R13BH, R14BH, R15BH, R16BH, R17BH, R18BH,
+ R19BH, R20BH, R21BH, R22BH, R23BH, R24BH, R25BH,
+ R26BH, R27BH, R28BH, R29BH, R30BH, R31BH)>;
def GR16 : RegisterClass<"X86", [i16], 16,
- (add AX, CX, DX, SI, DI, BX, BP, SP,
- R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
+ (add AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W,
+ R11W, R16W, R17W, R18W, R19W, R20W, R21W, R22W, R23W,
+ R24W, R25W, R26W, R27W, R28W, R29W, R30W, R31W, R14W,
+ R15W, R12W, R13W)>;
let isAllocatable = 0 in
def GRH16 : RegisterClass<"X86", [i16], 16,
- (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP,
- R8WH, R9WH, R10WH, R11WH, R12WH, R13WH, R14WH,
- R15WH)>;
-
+ (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP, R8WH,
+ R9WH, R10WH, R11WH, R12WH, R13WH, R14WH, R15WH, R16WH,
+ R17WH, R18WH, R19WH, R20WH, R21WH, R22WH, R23WH, R24WH,
+ R25WH, R26WH, R27WH, R28WH, R29WH, R30WH, R31WH)>;
def GR32 : RegisterClass<"X86", [i32], 32,
- (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
- R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D,
+ R10D, R11D, R16D, R17D, R18D, R19D, R20D, R21D, R22D,
+ R23D, R24D, R25D, R26D, R27D, R28D, R29D, R30D, R31D,
+ R14D, R15D, R12D, R13D)>;
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
@@ -424,8 +583,9 @@ def GR32 : RegisterClass<"X86", [i32], 32,
// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
// tests because of the inclusion of RIP in this register class.
def GR64 : RegisterClass<"X86", [i64], 64,
- (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+ (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R16, R17,
+ R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
// GR64PLTSafe - 64-bit GPRs without R10, R11, RSP and RIP. Could be used when
// emitting code for intrinsics, which use implict input registers.
@@ -491,6 +651,27 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
+// GeneratePressureSet = 0 here is a temporary workaround for lots of
+// LIT fail. Whether enabling in the future still needs discussion.
+let GeneratePressureSet = 0 in {
+// GR8_NOREX2 - GR8 registers which do not require a REX2 prefix.
+def GR8_NOREX2 : RegisterClass<"X86", [i8], 8,
+ (sub GR8, (sequence "R%uB", 16, 31))> {
+ let AltOrders = [(sub GR8_NOREX2, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<X86Subtarget>().is64Bit();
+ }];
+}
+// GR16_NOREX2 - GR16 registers which do not require a REX2 prefix.
+def GR16_NOREX2 : RegisterClass<"X86", [i16], 16,
+ (sub GR16, (sequence "R%uW", 16, 31))>;
+// GR32_NOREX2 - GR32 registers which do not require a REX2 prefix.
+def GR32_NOREX2 : RegisterClass<"X86", [i32], 32,
+ (sub GR32, (sequence "R%uD", 16, 31))>;
+// GR64_NOREX2 - GR64 registers which do not require a REX2 prefix.
+def GR64_NOREX2 : RegisterClass<"X86", [i64], 64,
+ (sub GR64, (sequence "R%u", 16, 31))>;
+}
// GR32_NOSP - GR32 registers except ESP.
def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
@@ -506,9 +687,18 @@ def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
(and GR64_NOREX, GR64_NOSP)>;
+let GeneratePressureSet = 0 in {
+// GR32_NOREX2_NOSP - GR32_NOREX2 registers except ESP.
+def GR32_NOREX2_NOSP : RegisterClass<"X86", [i32], 32,
+ (sub GR32_NOREX2, ESP)>;
+
+// GR64_NOREX2_NOSP - GR64_NOREX2 registers except RSP, RIP.
+def GR64_NOREX2_NOSP : RegisterClass<"X86", [i64], 64,
+ (sub GR64_NOREX2, RSP, RIP)>;
+}
// Register classes used for ABIs that use 32-bit address accesses,
-// while using the whole x84_64 ISA.
+// while using the whole x86_64 ISA.
// In such cases, it is fine to use RIP as we are sure the 32 high
// bits are not set. We do not need variants for NOSP as RIP is not
@@ -627,10 +817,6 @@ def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
-// Mask register pairs
-def KPAIRS : RegisterTuples<[sub_mask_0, sub_mask_1],
- [(add K0, K2, K4, K6), (add K1, K3, K5, K7)]>;
-
def VK1PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
def VK2PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
def VK4PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ReplaceableInstrs.def b/contrib/llvm-project/llvm/lib/Target/X86/X86ReplaceableInstrs.def
new file mode 100644
index 000000000000..e1383198d3fe
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ReplaceableInstrs.def
@@ -0,0 +1,426 @@
+//===- X86ReplaceableInstrs.def ----------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+
+#define ENTRY(A, B, C) {X86::A, X86::B, X86::C},
+static const uint16_t ReplaceableInstrs[][3] = {
+// PackedSingle, PackedDouble, PackedInt
+ENTRY(MOVAPSmr, MOVAPDmr, MOVDQAmr)
+ENTRY(MOVAPSrm, MOVAPDrm, MOVDQArm)
+ENTRY(MOVAPSrr, MOVAPDrr, MOVDQArr)
+ENTRY(MOVUPSmr, MOVUPDmr, MOVDQUmr)
+ENTRY(MOVUPSrm, MOVUPDrm, MOVDQUrm)
+ENTRY(MOVLPSmr, MOVLPDmr, MOVPQI2QImr)
+ENTRY(MOVSDmr, MOVSDmr, MOVPQI2QImr)
+ENTRY(MOVSSmr, MOVSSmr, MOVPDI2DImr)
+ENTRY(MOVSDrm, MOVSDrm, MOVQI2PQIrm)
+ENTRY(MOVSDrm_alt, MOVSDrm_alt, MOVQI2PQIrm)
+ENTRY(MOVSSrm, MOVSSrm, MOVDI2PDIrm)
+ENTRY(MOVSSrm_alt, MOVSSrm_alt, MOVDI2PDIrm)
+ENTRY(MOVNTPSmr, MOVNTPDmr, MOVNTDQmr)
+ENTRY(ANDNPSrm, ANDNPDrm, PANDNrm)
+ENTRY(ANDNPSrr, ANDNPDrr, PANDNrr)
+ENTRY(ANDPSrm, ANDPDrm, PANDrm)
+ENTRY(ANDPSrr, ANDPDrr, PANDrr)
+ENTRY(ORPSrm, ORPDrm, PORrm)
+ENTRY(ORPSrr, ORPDrr, PORrr)
+ENTRY(XORPSrm, XORPDrm, PXORrm)
+ENTRY(XORPSrr, XORPDrr, PXORrr)
+ENTRY(UNPCKLPDrm, UNPCKLPDrm, PUNPCKLQDQrm)
+ENTRY(MOVLHPSrr, UNPCKLPDrr, PUNPCKLQDQrr)
+ENTRY(UNPCKHPDrm, UNPCKHPDrm, PUNPCKHQDQrm)
+ENTRY(UNPCKHPDrr, UNPCKHPDrr, PUNPCKHQDQrr)
+ENTRY(UNPCKLPSrm, UNPCKLPSrm, PUNPCKLDQrm)
+ENTRY(UNPCKLPSrr, UNPCKLPSrr, PUNPCKLDQrr)
+ENTRY(UNPCKHPSrm, UNPCKHPSrm, PUNPCKHDQrm)
+ENTRY(UNPCKHPSrr, UNPCKHPSrr, PUNPCKHDQrr)
+ENTRY(EXTRACTPSmr, EXTRACTPSmr, PEXTRDmr)
+ENTRY(EXTRACTPSrr, EXTRACTPSrr, PEXTRDrr)
+// AVX 128-bit support
+ENTRY(VMOVAPSmr, VMOVAPDmr, VMOVDQAmr)
+ENTRY(VMOVAPSrm, VMOVAPDrm, VMOVDQArm)
+ENTRY(VMOVAPSrr, VMOVAPDrr, VMOVDQArr)
+ENTRY(VMOVUPSmr, VMOVUPDmr, VMOVDQUmr)
+ENTRY(VMOVUPSrm, VMOVUPDrm, VMOVDQUrm)
+ENTRY(VMOVLPSmr, VMOVLPDmr, VMOVPQI2QImr)
+ENTRY(VMOVSDmr, VMOVSDmr, VMOVPQI2QImr)
+ENTRY(VMOVSSmr, VMOVSSmr, VMOVPDI2DImr)
+ENTRY(VMOVSDrm, VMOVSDrm, VMOVQI2PQIrm)
+ENTRY(VMOVSDrm_alt, VMOVSDrm_alt, VMOVQI2PQIrm)
+ENTRY(VMOVSSrm, VMOVSSrm, VMOVDI2PDIrm)
+ENTRY(VMOVSSrm_alt, VMOVSSrm_alt, VMOVDI2PDIrm)
+ENTRY(VMOVNTPSmr, VMOVNTPDmr, VMOVNTDQmr)
+ENTRY(VANDNPSrm, VANDNPDrm, VPANDNrm)
+ENTRY(VANDNPSrr, VANDNPDrr, VPANDNrr)
+ENTRY(VANDPSrm, VANDPDrm, VPANDrm)
+ENTRY(VANDPSrr, VANDPDrr, VPANDrr)
+ENTRY(VORPSrm, VORPDrm, VPORrm)
+ENTRY(VORPSrr, VORPDrr, VPORrr)
+ENTRY(VXORPSrm, VXORPDrm, VPXORrm)
+ENTRY(VXORPSrr, VXORPDrr, VPXORrr)
+ENTRY(VUNPCKLPDrm, VUNPCKLPDrm, VPUNPCKLQDQrm)
+ENTRY(VMOVLHPSrr, VUNPCKLPDrr, VPUNPCKLQDQrr)
+ENTRY(VUNPCKHPDrm, VUNPCKHPDrm, VPUNPCKHQDQrm)
+ENTRY(VUNPCKHPDrr, VUNPCKHPDrr, VPUNPCKHQDQrr)
+ENTRY(VUNPCKLPSrm, VUNPCKLPSrm, VPUNPCKLDQrm)
+ENTRY(VUNPCKLPSrr, VUNPCKLPSrr, VPUNPCKLDQrr)
+ENTRY(VUNPCKHPSrm, VUNPCKHPSrm, VPUNPCKHDQrm)
+ENTRY(VUNPCKHPSrr, VUNPCKHPSrr, VPUNPCKHDQrr)
+ENTRY(VEXTRACTPSmr, VEXTRACTPSmr, VPEXTRDmr)
+ENTRY(VEXTRACTPSrr, VEXTRACTPSrr, VPEXTRDrr)
+// AVX 256-bit support
+ENTRY(VMOVAPSYmr, VMOVAPDYmr, VMOVDQAYmr)
+ENTRY(VMOVAPSYrm, VMOVAPDYrm, VMOVDQAYrm)
+ENTRY(VMOVAPSYrr, VMOVAPDYrr, VMOVDQAYrr)
+ENTRY(VMOVUPSYmr, VMOVUPDYmr, VMOVDQUYmr)
+ENTRY(VMOVUPSYrm, VMOVUPDYrm, VMOVDQUYrm)
+ENTRY(VMOVNTPSYmr, VMOVNTPDYmr, VMOVNTDQYmr)
+ENTRY(VPERMPSYrm, VPERMPSYrm, VPERMDYrm)
+ENTRY(VPERMPSYrr, VPERMPSYrr, VPERMDYrr)
+ENTRY(VPERMPDYmi, VPERMPDYmi, VPERMQYmi)
+ENTRY(VPERMPDYri, VPERMPDYri, VPERMQYri)
+// AVX512 support
+ENTRY(VMOVLPSZ128mr, VMOVLPDZ128mr, VMOVPQI2QIZmr)
+ENTRY(VMOVNTPSZ128mr, VMOVNTPDZ128mr, VMOVNTDQZ128mr)
+ENTRY(VMOVNTPSZ256mr, VMOVNTPDZ256mr, VMOVNTDQZ256mr)
+ENTRY(VMOVNTPSZmr, VMOVNTPDZmr, VMOVNTDQZmr)
+ENTRY(VMOVSDZmr, VMOVSDZmr, VMOVPQI2QIZmr)
+ENTRY(VMOVSSZmr, VMOVSSZmr, VMOVPDI2DIZmr)
+ENTRY(VMOVSDZrm, VMOVSDZrm, VMOVQI2PQIZrm)
+ENTRY(VMOVSDZrm_alt, VMOVSDZrm_alt, VMOVQI2PQIZrm)
+ENTRY(VMOVSSZrm, VMOVSSZrm, VMOVDI2PDIZrm)
+ENTRY(VMOVSSZrm_alt, VMOVSSZrm_alt, VMOVDI2PDIZrm)
+ENTRY(VBROADCASTSSZ128rr, VBROADCASTSSZ128rr, VPBROADCASTDZ128rr)
+ENTRY(VBROADCASTSSZ128rm, VBROADCASTSSZ128rm, VPBROADCASTDZ128rm)
+ENTRY(VBROADCASTSSZ256rr, VBROADCASTSSZ256rr, VPBROADCASTDZ256rr)
+ENTRY(VBROADCASTSSZ256rm, VBROADCASTSSZ256rm, VPBROADCASTDZ256rm)
+ENTRY(VBROADCASTSSZrr, VBROADCASTSSZrr, VPBROADCASTDZrr)
+ENTRY(VBROADCASTSSZrm, VBROADCASTSSZrm, VPBROADCASTDZrm)
+ENTRY(VMOVDDUPZ128rr, VMOVDDUPZ128rr, VPBROADCASTQZ128rr)
+ENTRY(VMOVDDUPZ128rm, VMOVDDUPZ128rm, VPBROADCASTQZ128rm)
+ENTRY(VBROADCASTSDZ256rr, VBROADCASTSDZ256rr, VPBROADCASTQZ256rr)
+ENTRY(VBROADCASTSDZ256rm, VBROADCASTSDZ256rm, VPBROADCASTQZ256rm)
+ENTRY(VBROADCASTSDZrr, VBROADCASTSDZrr, VPBROADCASTQZrr)
+ENTRY(VBROADCASTSDZrm, VBROADCASTSDZrm, VPBROADCASTQZrm)
+ENTRY(VINSERTF32x4Zrr, VINSERTF32x4Zrr, VINSERTI32x4Zrr)
+ENTRY(VINSERTF32x4Zrm, VINSERTF32x4Zrm, VINSERTI32x4Zrm)
+ENTRY(VINSERTF32x8Zrr, VINSERTF32x8Zrr, VINSERTI32x8Zrr)
+ENTRY(VINSERTF32x8Zrm, VINSERTF32x8Zrm, VINSERTI32x8Zrm)
+ENTRY(VINSERTF64x2Zrr, VINSERTF64x2Zrr, VINSERTI64x2Zrr)
+ENTRY(VINSERTF64x2Zrm, VINSERTF64x2Zrm, VINSERTI64x2Zrm)
+ENTRY(VINSERTF64x4Zrr, VINSERTF64x4Zrr, VINSERTI64x4Zrr)
+ENTRY(VINSERTF64x4Zrm, VINSERTF64x4Zrm, VINSERTI64x4Zrm)
+ENTRY(VINSERTF32x4Z256rr, VINSERTF32x4Z256rr, VINSERTI32x4Z256rr)
+ENTRY(VINSERTF32x4Z256rm, VINSERTF32x4Z256rm, VINSERTI32x4Z256rm)
+ENTRY(VINSERTF64x2Z256rr, VINSERTF64x2Z256rr, VINSERTI64x2Z256rr)
+ENTRY(VINSERTF64x2Z256rm, VINSERTF64x2Z256rm, VINSERTI64x2Z256rm)
+ENTRY(VEXTRACTF32x4Zrr, VEXTRACTF32x4Zrr, VEXTRACTI32x4Zrr)
+ENTRY(VEXTRACTF32x4Zmr, VEXTRACTF32x4Zmr, VEXTRACTI32x4Zmr)
+ENTRY(VEXTRACTF32x8Zrr, VEXTRACTF32x8Zrr, VEXTRACTI32x8Zrr)
+ENTRY(VEXTRACTF32x8Zmr, VEXTRACTF32x8Zmr, VEXTRACTI32x8Zmr)
+ENTRY(VEXTRACTF64x2Zrr, VEXTRACTF64x2Zrr, VEXTRACTI64x2Zrr)
+ENTRY(VEXTRACTF64x2Zmr, VEXTRACTF64x2Zmr, VEXTRACTI64x2Zmr)
+ENTRY(VEXTRACTF64x4Zrr, VEXTRACTF64x4Zrr, VEXTRACTI64x4Zrr)
+ENTRY(VEXTRACTF64x4Zmr, VEXTRACTF64x4Zmr, VEXTRACTI64x4Zmr)
+ENTRY(VEXTRACTF32x4Z256rr, VEXTRACTF32x4Z256rr, VEXTRACTI32x4Z256rr)
+ENTRY(VEXTRACTF32x4Z256mr, VEXTRACTF32x4Z256mr, VEXTRACTI32x4Z256mr)
+ENTRY(VEXTRACTF64x2Z256rr, VEXTRACTF64x2Z256rr, VEXTRACTI64x2Z256rr)
+ENTRY(VEXTRACTF64x2Z256mr, VEXTRACTF64x2Z256mr, VEXTRACTI64x2Z256mr)
+ENTRY(VPERMILPSmi, VPERMILPSmi, VPSHUFDmi)
+ENTRY(VPERMILPSri, VPERMILPSri, VPSHUFDri)
+ENTRY(VPERMILPSZ128mi, VPERMILPSZ128mi, VPSHUFDZ128mi)
+ENTRY(VPERMILPSZ128ri, VPERMILPSZ128ri, VPSHUFDZ128ri)
+ENTRY(VPERMILPSZ256mi, VPERMILPSZ256mi, VPSHUFDZ256mi)
+ENTRY(VPERMILPSZ256ri, VPERMILPSZ256ri, VPSHUFDZ256ri)
+ENTRY(VPERMILPSZmi, VPERMILPSZmi, VPSHUFDZmi)
+ENTRY(VPERMILPSZri, VPERMILPSZri, VPSHUFDZri)
+ENTRY(VPERMPSZ256rm, VPERMPSZ256rm, VPERMDZ256rm)
+ENTRY(VPERMPSZ256rr, VPERMPSZ256rr, VPERMDZ256rr)
+ENTRY(VPERMPDZ256mi, VPERMPDZ256mi, VPERMQZ256mi)
+ENTRY(VPERMPDZ256ri, VPERMPDZ256ri, VPERMQZ256ri)
+ENTRY(VPERMPDZ256rm, VPERMPDZ256rm, VPERMQZ256rm)
+ENTRY(VPERMPDZ256rr, VPERMPDZ256rr, VPERMQZ256rr)
+ENTRY(VPERMPSZrm, VPERMPSZrm, VPERMDZrm)
+ENTRY(VPERMPSZrr, VPERMPSZrr, VPERMDZrr)
+ENTRY(VPERMPDZmi, VPERMPDZmi, VPERMQZmi)
+ENTRY(VPERMPDZri, VPERMPDZri, VPERMQZri)
+ENTRY(VPERMPDZrm, VPERMPDZrm, VPERMQZrm)
+ENTRY(VPERMPDZrr, VPERMPDZrr, VPERMQZrr)
+ENTRY(VUNPCKLPDZ256rm, VUNPCKLPDZ256rm, VPUNPCKLQDQZ256rm)
+ENTRY(VUNPCKLPDZ256rr, VUNPCKLPDZ256rr, VPUNPCKLQDQZ256rr)
+ENTRY(VUNPCKHPDZ256rm, VUNPCKHPDZ256rm, VPUNPCKHQDQZ256rm)
+ENTRY(VUNPCKHPDZ256rr, VUNPCKHPDZ256rr, VPUNPCKHQDQZ256rr)
+ENTRY(VUNPCKLPSZ256rm, VUNPCKLPSZ256rm, VPUNPCKLDQZ256rm)
+ENTRY(VUNPCKLPSZ256rr, VUNPCKLPSZ256rr, VPUNPCKLDQZ256rr)
+ENTRY(VUNPCKHPSZ256rm, VUNPCKHPSZ256rm, VPUNPCKHDQZ256rm)
+ENTRY(VUNPCKHPSZ256rr, VUNPCKHPSZ256rr, VPUNPCKHDQZ256rr)
+ENTRY(VUNPCKLPDZ128rm, VUNPCKLPDZ128rm, VPUNPCKLQDQZ128rm)
+ENTRY(VMOVLHPSZrr, VUNPCKLPDZ128rr, VPUNPCKLQDQZ128rr)
+ENTRY(VUNPCKHPDZ128rm, VUNPCKHPDZ128rm, VPUNPCKHQDQZ128rm)
+ENTRY(VUNPCKHPDZ128rr, VUNPCKHPDZ128rr, VPUNPCKHQDQZ128rr)
+ENTRY(VUNPCKLPSZ128rm, VUNPCKLPSZ128rm, VPUNPCKLDQZ128rm)
+ENTRY(VUNPCKLPSZ128rr, VUNPCKLPSZ128rr, VPUNPCKLDQZ128rr)
+ENTRY(VUNPCKHPSZ128rm, VUNPCKHPSZ128rm, VPUNPCKHDQZ128rm)
+ENTRY(VUNPCKHPSZ128rr, VUNPCKHPSZ128rr, VPUNPCKHDQZ128rr)
+ENTRY(VUNPCKLPDZrm, VUNPCKLPDZrm, VPUNPCKLQDQZrm)
+ENTRY(VUNPCKLPDZrr, VUNPCKLPDZrr, VPUNPCKLQDQZrr)
+ENTRY(VUNPCKHPDZrm, VUNPCKHPDZrm, VPUNPCKHQDQZrm)
+ENTRY(VUNPCKHPDZrr, VUNPCKHPDZrr, VPUNPCKHQDQZrr)
+ENTRY(VUNPCKLPSZrm, VUNPCKLPSZrm, VPUNPCKLDQZrm)
+ENTRY(VUNPCKLPSZrr, VUNPCKLPSZrr, VPUNPCKLDQZrr)
+ENTRY(VUNPCKHPSZrm, VUNPCKHPSZrm, VPUNPCKHDQZrm)
+ENTRY(VUNPCKHPSZrr, VUNPCKHPSZrr, VPUNPCKHDQZrr)
+ENTRY(VEXTRACTPSZmr, VEXTRACTPSZmr, VPEXTRDZmr)
+ENTRY(VEXTRACTPSZrr, VEXTRACTPSZrr, VPEXTRDZrr)
+};
+
+static const uint16_t ReplaceableInstrsAVX2[][3] = {
+// PackedSingle, PackedDouble, PackedInt
+ENTRY(VANDNPSYrm, VANDNPDYrm, VPANDNYrm)
+ENTRY(VANDNPSYrr, VANDNPDYrr, VPANDNYrr)
+ENTRY(VANDPSYrm, VANDPDYrm, VPANDYrm)
+ENTRY(VANDPSYrr, VANDPDYrr, VPANDYrr)
+ENTRY(VORPSYrm, VORPDYrm, VPORYrm)
+ENTRY(VORPSYrr, VORPDYrr, VPORYrr)
+ENTRY(VXORPSYrm, VXORPDYrm, VPXORYrm)
+ENTRY(VXORPSYrr, VXORPDYrr, VPXORYrr)
+ENTRY(VPERM2F128rm, VPERM2F128rm, VPERM2I128rm)
+ENTRY(VPERM2F128rr, VPERM2F128rr, VPERM2I128rr)
+ENTRY(VBROADCASTSSrm, VBROADCASTSSrm, VPBROADCASTDrm)
+ENTRY(VBROADCASTSSrr, VBROADCASTSSrr, VPBROADCASTDrr)
+ENTRY(VMOVDDUPrm, VMOVDDUPrm, VPBROADCASTQrm)
+ENTRY(VMOVDDUPrr, VMOVDDUPrr, VPBROADCASTQrr)
+ENTRY(VBROADCASTSSYrr, VBROADCASTSSYrr, VPBROADCASTDYrr)
+ENTRY(VBROADCASTSSYrm, VBROADCASTSSYrm, VPBROADCASTDYrm)
+ENTRY(VBROADCASTSDYrr, VBROADCASTSDYrr, VPBROADCASTQYrr)
+ENTRY(VBROADCASTSDYrm, VBROADCASTSDYrm, VPBROADCASTQYrm)
+ENTRY(VBROADCASTF128rm, VBROADCASTF128rm, VBROADCASTI128rm)
+ENTRY(VBLENDPSYrri, VBLENDPSYrri, VPBLENDDYrri)
+ENTRY(VBLENDPSYrmi, VBLENDPSYrmi, VPBLENDDYrmi)
+ENTRY(VPERMILPSYmi, VPERMILPSYmi, VPSHUFDYmi)
+ENTRY(VPERMILPSYri, VPERMILPSYri, VPSHUFDYri)
+ENTRY(VUNPCKLPDYrm, VUNPCKLPDYrm, VPUNPCKLQDQYrm)
+ENTRY(VUNPCKLPDYrr, VUNPCKLPDYrr, VPUNPCKLQDQYrr)
+ENTRY(VUNPCKHPDYrm, VUNPCKHPDYrm, VPUNPCKHQDQYrm)
+ENTRY(VUNPCKHPDYrr, VUNPCKHPDYrr, VPUNPCKHQDQYrr)
+ENTRY(VUNPCKLPSYrm, VUNPCKLPSYrm, VPUNPCKLDQYrm)
+ENTRY(VUNPCKLPSYrr, VUNPCKLPSYrr, VPUNPCKLDQYrr)
+ENTRY(VUNPCKHPSYrm, VUNPCKHPSYrm, VPUNPCKHDQYrm)
+ENTRY(VUNPCKHPSYrr, VUNPCKHPSYrr, VPUNPCKHDQYrr)
+};
+
+static const uint16_t ReplaceableInstrsFP[][3] = {
+// PackedSingle, PackedDouble
+ENTRY(MOVLPSrm, MOVLPDrm, INSTRUCTION_LIST_END)
+ENTRY(MOVHPSrm, MOVHPDrm, INSTRUCTION_LIST_END)
+ENTRY(MOVHPSmr, MOVHPDmr, INSTRUCTION_LIST_END)
+ENTRY(VMOVLPSrm, VMOVLPDrm, INSTRUCTION_LIST_END)
+ENTRY(VMOVHPSrm, VMOVHPDrm, INSTRUCTION_LIST_END)
+ENTRY(VMOVHPSmr, VMOVHPDmr, INSTRUCTION_LIST_END)
+ENTRY(VMOVLPSZ128rm, VMOVLPDZ128rm, INSTRUCTION_LIST_END)
+ENTRY(VMOVHPSZ128rm, VMOVHPDZ128rm, INSTRUCTION_LIST_END)
+ENTRY(VMOVHPSZ128mr, VMOVHPDZ128mr, INSTRUCTION_LIST_END)
+};
+
+static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
+// PackedSingle, PackedDouble, PackedInt
+ENTRY(VEXTRACTF128mr, VEXTRACTF128mr, VEXTRACTI128mr)
+ENTRY(VEXTRACTF128rr, VEXTRACTF128rr, VEXTRACTI128rr)
+ENTRY(VINSERTF128rm, VINSERTF128rm, VINSERTI128rm)
+ENTRY(VINSERTF128rr, VINSERTF128rr, VINSERTI128rr)
+};
+
+// NOTE: These should only be used by the custom domain methods.
+static const uint16_t ReplaceableBlendInstrs[][3] = {
+//PackedSingle, PackedDouble, PackedInt
+ENTRY(BLENDPSrmi, BLENDPDrmi, PBLENDWrmi)
+ENTRY(BLENDPSrri, BLENDPDrri, PBLENDWrri)
+ENTRY(VBLENDPSrmi, VBLENDPDrmi, VPBLENDWrmi)
+ENTRY(VBLENDPSrri, VBLENDPDrri, VPBLENDWrri)
+ENTRY(VBLENDPSYrmi, VBLENDPDYrmi, VPBLENDWYrmi)
+ENTRY(VBLENDPSYrri, VBLENDPDYrri, VPBLENDWYrri)
+};
+
+static const uint16_t ReplaceableBlendAVX2Instrs[][3] = {
+// PackedSingle, PackedDouble, PackedInt
+ENTRY(VBLENDPSrmi, VBLENDPDrmi, VPBLENDDrmi)
+ENTRY(VBLENDPSrri, VBLENDPDrri, VPBLENDDrri)
+ENTRY(VBLENDPSYrmi, VBLENDPDYrmi, VPBLENDDYrmi)
+ENTRY(VBLENDPSYrri, VBLENDPDYrri, VPBLENDDYrri)
+};
+
+#undef ENTRY
+#define ENTRY(A, B, C, D) {X86::A, X86::B, X86::C, X86::D},
+static const uint16_t ReplaceableInstrsAVX512[][4] = {
+// Two integer columns for 64-bit and 32-bit elements.
+//PackedSingle, PackedDouble, PackedInt, PackedInt
+ENTRY(VMOVAPSZ128mr, VMOVAPDZ128mr, VMOVDQA64Z128mr, VMOVDQA32Z128mr)
+ENTRY(VMOVAPSZ128rm, VMOVAPDZ128rm, VMOVDQA64Z128rm, VMOVDQA32Z128rm)
+ENTRY(VMOVAPSZ128rr, VMOVAPDZ128rr, VMOVDQA64Z128rr, VMOVDQA32Z128rr)
+ENTRY(VMOVUPSZ128mr, VMOVUPDZ128mr, VMOVDQU64Z128mr, VMOVDQU32Z128mr)
+ENTRY(VMOVUPSZ128rm, VMOVUPDZ128rm, VMOVDQU64Z128rm, VMOVDQU32Z128rm)
+ENTRY(VMOVAPSZ256mr, VMOVAPDZ256mr, VMOVDQA64Z256mr, VMOVDQA32Z256mr)
+ENTRY(VMOVAPSZ256rm, VMOVAPDZ256rm, VMOVDQA64Z256rm, VMOVDQA32Z256rm)
+ENTRY(VMOVAPSZ256rr, VMOVAPDZ256rr, VMOVDQA64Z256rr, VMOVDQA32Z256rr)
+ENTRY(VMOVUPSZ256mr, VMOVUPDZ256mr, VMOVDQU64Z256mr, VMOVDQU32Z256mr)
+ENTRY(VMOVUPSZ256rm, VMOVUPDZ256rm, VMOVDQU64Z256rm, VMOVDQU32Z256rm)
+ENTRY(VMOVAPSZmr, VMOVAPDZmr, VMOVDQA64Zmr, VMOVDQA32Zmr)
+ENTRY(VMOVAPSZrm, VMOVAPDZrm, VMOVDQA64Zrm, VMOVDQA32Zrm)
+ENTRY(VMOVAPSZrr, VMOVAPDZrr, VMOVDQA64Zrr, VMOVDQA32Zrr)
+ENTRY(VMOVUPSZmr, VMOVUPDZmr, VMOVDQU64Zmr, VMOVDQU32Zmr)
+ENTRY(VMOVUPSZrm, VMOVUPDZrm, VMOVDQU64Zrm, VMOVDQU32Zrm)
+};
+
+static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
+// Two integer columns for 64-bit and 32-bit elements.
+// PackedSingle, PackedDouble, PackedInt, PackedInt
+ENTRY(VANDNPSZ128rm, VANDNPDZ128rm, VPANDNQZ128rm, VPANDNDZ128rm)
+ENTRY(VANDNPSZ128rr, VANDNPDZ128rr, VPANDNQZ128rr, VPANDNDZ128rr)
+ENTRY(VANDPSZ128rm, VANDPDZ128rm, VPANDQZ128rm, VPANDDZ128rm)
+ENTRY(VANDPSZ128rr, VANDPDZ128rr, VPANDQZ128rr, VPANDDZ128rr)
+ENTRY(VORPSZ128rm, VORPDZ128rm, VPORQZ128rm, VPORDZ128rm)
+ENTRY(VORPSZ128rr, VORPDZ128rr, VPORQZ128rr, VPORDZ128rr)
+ENTRY(VXORPSZ128rm, VXORPDZ128rm, VPXORQZ128rm, VPXORDZ128rm)
+ENTRY(VXORPSZ128rr, VXORPDZ128rr, VPXORQZ128rr, VPXORDZ128rr)
+ENTRY(VANDNPSZ256rm, VANDNPDZ256rm, VPANDNQZ256rm, VPANDNDZ256rm)
+ENTRY(VANDNPSZ256rr, VANDNPDZ256rr, VPANDNQZ256rr, VPANDNDZ256rr)
+ENTRY(VANDPSZ256rm, VANDPDZ256rm, VPANDQZ256rm, VPANDDZ256rm)
+ENTRY(VANDPSZ256rr, VANDPDZ256rr, VPANDQZ256rr, VPANDDZ256rr)
+ENTRY(VORPSZ256rm, VORPDZ256rm, VPORQZ256rm, VPORDZ256rm)
+ENTRY(VORPSZ256rr, VORPDZ256rr, VPORQZ256rr, VPORDZ256rr)
+ENTRY(VXORPSZ256rm, VXORPDZ256rm, VPXORQZ256rm, VPXORDZ256rm)
+ENTRY(VXORPSZ256rr, VXORPDZ256rr, VPXORQZ256rr, VPXORDZ256rr)
+ENTRY(VANDNPSZrm, VANDNPDZrm, VPANDNQZrm, VPANDNDZrm)
+ENTRY(VANDNPSZrr, VANDNPDZrr, VPANDNQZrr, VPANDNDZrr)
+ENTRY(VANDPSZrm, VANDPDZrm, VPANDQZrm, VPANDDZrm)
+ENTRY(VANDPSZrr, VANDPDZrr, VPANDQZrr, VPANDDZrr)
+ENTRY(VORPSZrm, VORPDZrm, VPORQZrm, VPORDZrm)
+ENTRY(VORPSZrr, VORPDZrr, VPORQZrr, VPORDZrr)
+ENTRY(VXORPSZrm, VXORPDZrm, VPXORQZrm, VPXORDZrm)
+ENTRY(VXORPSZrr, VXORPDZrr, VPXORQZrr, VPXORDZrr)
+};
+
+static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
+// Two integer columns for 64-bit and 32-bit elements.
+// PackedSingle, PackedDouble, PackedInt, PackedInt
+ENTRY(VANDNPSZ128rmk, VANDNPDZ128rmk, VPANDNQZ128rmk, VPANDNDZ128rmk)
+ENTRY(VANDNPSZ128rmkz, VANDNPDZ128rmkz, VPANDNQZ128rmkz, VPANDNDZ128rmkz)
+ENTRY(VANDNPSZ128rrk, VANDNPDZ128rrk, VPANDNQZ128rrk, VPANDNDZ128rrk)
+ENTRY(VANDNPSZ128rrkz, VANDNPDZ128rrkz, VPANDNQZ128rrkz, VPANDNDZ128rrkz)
+ENTRY(VANDPSZ128rmk, VANDPDZ128rmk, VPANDQZ128rmk, VPANDDZ128rmk)
+ENTRY(VANDPSZ128rmkz, VANDPDZ128rmkz, VPANDQZ128rmkz, VPANDDZ128rmkz)
+ENTRY(VANDPSZ128rrk, VANDPDZ128rrk, VPANDQZ128rrk, VPANDDZ128rrk)
+ENTRY(VANDPSZ128rrkz, VANDPDZ128rrkz, VPANDQZ128rrkz, VPANDDZ128rrkz)
+ENTRY(VORPSZ128rmk, VORPDZ128rmk, VPORQZ128rmk, VPORDZ128rmk)
+ENTRY(VORPSZ128rmkz, VORPDZ128rmkz, VPORQZ128rmkz, VPORDZ128rmkz)
+ENTRY(VORPSZ128rrk, VORPDZ128rrk, VPORQZ128rrk, VPORDZ128rrk)
+ENTRY(VORPSZ128rrkz, VORPDZ128rrkz, VPORQZ128rrkz, VPORDZ128rrkz)
+ENTRY(VXORPSZ128rmk, VXORPDZ128rmk, VPXORQZ128rmk, VPXORDZ128rmk)
+ENTRY(VXORPSZ128rmkz, VXORPDZ128rmkz, VPXORQZ128rmkz, VPXORDZ128rmkz)
+ENTRY(VXORPSZ128rrk, VXORPDZ128rrk, VPXORQZ128rrk, VPXORDZ128rrk)
+ENTRY(VXORPSZ128rrkz, VXORPDZ128rrkz, VPXORQZ128rrkz, VPXORDZ128rrkz)
+ENTRY(VANDNPSZ256rmk, VANDNPDZ256rmk, VPANDNQZ256rmk, VPANDNDZ256rmk)
+ENTRY(VANDNPSZ256rmkz, VANDNPDZ256rmkz, VPANDNQZ256rmkz, VPANDNDZ256rmkz)
+ENTRY(VANDNPSZ256rrk, VANDNPDZ256rrk, VPANDNQZ256rrk, VPANDNDZ256rrk)
+ENTRY(VANDNPSZ256rrkz, VANDNPDZ256rrkz, VPANDNQZ256rrkz, VPANDNDZ256rrkz)
+ENTRY(VANDPSZ256rmk, VANDPDZ256rmk, VPANDQZ256rmk, VPANDDZ256rmk)
+ENTRY(VANDPSZ256rmkz, VANDPDZ256rmkz, VPANDQZ256rmkz, VPANDDZ256rmkz)
+ENTRY(VANDPSZ256rrk, VANDPDZ256rrk, VPANDQZ256rrk, VPANDDZ256rrk)
+ENTRY(VANDPSZ256rrkz, VANDPDZ256rrkz, VPANDQZ256rrkz, VPANDDZ256rrkz)
+ENTRY(VORPSZ256rmk, VORPDZ256rmk, VPORQZ256rmk, VPORDZ256rmk)
+ENTRY(VORPSZ256rmkz, VORPDZ256rmkz, VPORQZ256rmkz, VPORDZ256rmkz)
+ENTRY(VORPSZ256rrk, VORPDZ256rrk, VPORQZ256rrk, VPORDZ256rrk)
+ENTRY(VORPSZ256rrkz, VORPDZ256rrkz, VPORQZ256rrkz, VPORDZ256rrkz)
+ENTRY(VXORPSZ256rmk, VXORPDZ256rmk, VPXORQZ256rmk, VPXORDZ256rmk)
+ENTRY(VXORPSZ256rmkz, VXORPDZ256rmkz, VPXORQZ256rmkz, VPXORDZ256rmkz)
+ENTRY(VXORPSZ256rrk, VXORPDZ256rrk, VPXORQZ256rrk, VPXORDZ256rrk)
+ENTRY(VXORPSZ256rrkz, VXORPDZ256rrkz, VPXORQZ256rrkz, VPXORDZ256rrkz)
+ENTRY(VANDNPSZrmk, VANDNPDZrmk, VPANDNQZrmk, VPANDNDZrmk)
+ENTRY(VANDNPSZrmkz, VANDNPDZrmkz, VPANDNQZrmkz, VPANDNDZrmkz)
+ENTRY(VANDNPSZrrk, VANDNPDZrrk, VPANDNQZrrk, VPANDNDZrrk)
+ENTRY(VANDNPSZrrkz, VANDNPDZrrkz, VPANDNQZrrkz, VPANDNDZrrkz)
+ENTRY(VANDPSZrmk, VANDPDZrmk, VPANDQZrmk, VPANDDZrmk)
+ENTRY(VANDPSZrmkz, VANDPDZrmkz, VPANDQZrmkz, VPANDDZrmkz)
+ENTRY(VANDPSZrrk, VANDPDZrrk, VPANDQZrrk, VPANDDZrrk)
+ENTRY(VANDPSZrrkz, VANDPDZrrkz, VPANDQZrrkz, VPANDDZrrkz)
+ENTRY(VORPSZrmk, VORPDZrmk, VPORQZrmk, VPORDZrmk)
+ENTRY(VORPSZrmkz, VORPDZrmkz, VPORQZrmkz, VPORDZrmkz)
+ENTRY(VORPSZrrk, VORPDZrrk, VPORQZrrk, VPORDZrrk)
+ENTRY(VORPSZrrkz, VORPDZrrkz, VPORQZrrkz, VPORDZrrkz)
+ENTRY(VXORPSZrmk, VXORPDZrmk, VPXORQZrmk, VPXORDZrmk)
+ENTRY(VXORPSZrmkz, VXORPDZrmkz, VPXORQZrmkz, VPXORDZrmkz)
+ENTRY(VXORPSZrrk, VXORPDZrrk, VPXORQZrrk, VPXORDZrrk)
+ENTRY(VXORPSZrrkz, VXORPDZrrkz, VPXORQZrrkz, VPXORDZrrkz)
+// Broadcast loads can be handled the same as masked operations to avoid
+// changing element size.
+ENTRY(VANDNPSZ128rmb, VANDNPDZ128rmb, VPANDNQZ128rmb, VPANDNDZ128rmb)
+ENTRY(VANDPSZ128rmb, VANDPDZ128rmb, VPANDQZ128rmb, VPANDDZ128rmb)
+ENTRY(VORPSZ128rmb, VORPDZ128rmb, VPORQZ128rmb, VPORDZ128rmb)
+ENTRY(VXORPSZ128rmb, VXORPDZ128rmb, VPXORQZ128rmb, VPXORDZ128rmb)
+ENTRY(VANDNPSZ256rmb, VANDNPDZ256rmb, VPANDNQZ256rmb, VPANDNDZ256rmb)
+ENTRY(VANDPSZ256rmb, VANDPDZ256rmb, VPANDQZ256rmb, VPANDDZ256rmb)
+ENTRY(VORPSZ256rmb, VORPDZ256rmb, VPORQZ256rmb, VPORDZ256rmb)
+ENTRY(VXORPSZ256rmb, VXORPDZ256rmb, VPXORQZ256rmb, VPXORDZ256rmb)
+ENTRY(VANDNPSZrmb, VANDNPDZrmb, VPANDNQZrmb, VPANDNDZrmb)
+ENTRY(VANDPSZrmb, VANDPDZrmb, VPANDQZrmb, VPANDDZrmb)
+ENTRY(VANDPSZrmb, VANDPDZrmb, VPANDQZrmb, VPANDDZrmb)
+ENTRY(VORPSZrmb, VORPDZrmb, VPORQZrmb, VPORDZrmb)
+ENTRY(VXORPSZrmb, VXORPDZrmb, VPXORQZrmb, VPXORDZrmb)
+ENTRY(VANDNPSZ128rmbk, VANDNPDZ128rmbk, VPANDNQZ128rmbk, VPANDNDZ128rmbk)
+ENTRY(VANDPSZ128rmbk, VANDPDZ128rmbk, VPANDQZ128rmbk, VPANDDZ128rmbk)
+ENTRY(VORPSZ128rmbk, VORPDZ128rmbk, VPORQZ128rmbk, VPORDZ128rmbk)
+ENTRY(VXORPSZ128rmbk, VXORPDZ128rmbk, VPXORQZ128rmbk, VPXORDZ128rmbk)
+ENTRY(VANDNPSZ256rmbk, VANDNPDZ256rmbk, VPANDNQZ256rmbk, VPANDNDZ256rmbk)
+ENTRY(VANDPSZ256rmbk, VANDPDZ256rmbk, VPANDQZ256rmbk, VPANDDZ256rmbk)
+ENTRY(VORPSZ256rmbk, VORPDZ256rmbk, VPORQZ256rmbk, VPORDZ256rmbk)
+ENTRY(VXORPSZ256rmbk, VXORPDZ256rmbk, VPXORQZ256rmbk, VPXORDZ256rmbk)
+ENTRY(VANDNPSZrmbk, VANDNPDZrmbk, VPANDNQZrmbk, VPANDNDZrmbk)
+ENTRY(VANDPSZrmbk, VANDPDZrmbk, VPANDQZrmbk, VPANDDZrmbk)
+ENTRY(VANDPSZrmbk, VANDPDZrmbk, VPANDQZrmbk, VPANDDZrmbk)
+ENTRY(VORPSZrmbk, VORPDZrmbk, VPORQZrmbk, VPORDZrmbk)
+ENTRY(VXORPSZrmbk, VXORPDZrmbk, VPXORQZrmbk, VPXORDZrmbk)
+ENTRY(VANDNPSZ128rmbkz, VANDNPDZ128rmbkz, VPANDNQZ128rmbkz, VPANDNDZ128rmbkz)
+ENTRY(VANDPSZ128rmbkz, VANDPDZ128rmbkz, VPANDQZ128rmbkz, VPANDDZ128rmbkz)
+ENTRY(VORPSZ128rmbkz, VORPDZ128rmbkz, VPORQZ128rmbkz, VPORDZ128rmbkz)
+ENTRY(VXORPSZ128rmbkz, VXORPDZ128rmbkz, VPXORQZ128rmbkz, VPXORDZ128rmbkz)
+ENTRY(VANDNPSZ256rmbkz, VANDNPDZ256rmbkz, VPANDNQZ256rmbkz, VPANDNDZ256rmbkz)
+ENTRY(VANDPSZ256rmbkz, VANDPDZ256rmbkz, VPANDQZ256rmbkz, VPANDDZ256rmbkz)
+ENTRY(VORPSZ256rmbkz, VORPDZ256rmbkz, VPORQZ256rmbkz, VPORDZ256rmbkz)
+ENTRY(VXORPSZ256rmbkz, VXORPDZ256rmbkz, VPXORQZ256rmbkz, VPXORDZ256rmbkz)
+ENTRY(VANDNPSZrmbkz, VANDNPDZrmbkz, VPANDNQZrmbkz, VPANDNDZrmbkz)
+ENTRY(VANDPSZrmbkz, VANDPDZrmbkz, VPANDQZrmbkz, VPANDDZrmbkz)
+ENTRY(VANDPSZrmbkz, VANDPDZrmbkz, VPANDQZrmbkz, VPANDDZrmbkz)
+ENTRY(VORPSZrmbkz, VORPDZrmbkz, VPORQZrmbkz, VPORDZrmbkz)
+ENTRY(VXORPSZrmbkz, VXORPDZrmbkz, VPXORQZrmbkz, VPXORDZrmbkz)
+};
+
+// Special table for changing EVEX logic instructions to VEX.
+// TODO: Should we run EVEX->VEX earlier?
+static const uint16_t ReplaceableCustomAVX512LogicInstrs[][4] = {
+// Two integer columns for 64-bit and 32-bit elements.
+// PackedSingle, PackedDouble, PackedInt, PackedInt
+ENTRY(VANDNPSrm, VANDNPDrm, VPANDNQZ128rm, VPANDNDZ128rm)
+ENTRY(VANDNPSrr, VANDNPDrr, VPANDNQZ128rr, VPANDNDZ128rr)
+ENTRY(VANDPSrm, VANDPDrm, VPANDQZ128rm, VPANDDZ128rm)
+ENTRY(VANDPSrr, VANDPDrr, VPANDQZ128rr, VPANDDZ128rr)
+ENTRY(VORPSrm, VORPDrm, VPORQZ128rm, VPORDZ128rm)
+ENTRY(VORPSrr, VORPDrr, VPORQZ128rr, VPORDZ128rr)
+ENTRY(VXORPSrm, VXORPDrm, VPXORQZ128rm, VPXORDZ128rm)
+ENTRY(VXORPSrr, VXORPDrr, VPXORQZ128rr, VPXORDZ128rr)
+ENTRY(VANDNPSYrm, VANDNPDYrm, VPANDNQZ256rm, VPANDNDZ256rm)
+ENTRY(VANDNPSYrr, VANDNPDYrr, VPANDNQZ256rr, VPANDNDZ256rr)
+ENTRY(VANDPSYrm, VANDPDYrm, VPANDQZ256rm, VPANDDZ256rm)
+ENTRY(VANDPSYrr, VANDPDYrr, VPANDQZ256rr, VPANDDZ256rr)
+ENTRY(VORPSYrm, VORPDYrm, VPORQZ256rm, VPORDZ256rm)
+ENTRY(VORPSYrr, VORPDYrr, VPORQZ256rr, VPORDZ256rr)
+ENTRY(VXORPSYrm, VXORPDYrm, VPXORQZ256rm, VPXORDZ256rm)
+ENTRY(VXORPSYrr, VXORPDYrr, VPXORQZ256rr, VPXORDZ256rr)
+};
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedAlderlakeP.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedAlderlakeP.td
index eb7dcfc6108b..8e3e55428264 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedAlderlakeP.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedAlderlakeP.td
@@ -103,7 +103,7 @@ multiclass ADLPWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -111,7 +111,7 @@ multiclass ADLPWriteResPair<X86FoldableSchedWrite SchedRW,
// the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([ADLPPort02_03_11], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !listconcat([1], Res);
+ let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -302,7 +302,7 @@ defm : ADLPWriteResPair<WriteFSqrt64X, [ADLPPort00], 18, [1], 1, 6>;
defm : ADLPWriteResPair<WriteFSqrt64Y, [ADLPPort00], 18, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
def : WriteRes<WriteFSqrt80, [ADLPPortInvalid, ADLPPort00]> {
- let ResourceCycles = [7, 1];
+ let ReleaseAtCycles = [7, 1];
let Latency = 21;
}
defm : ADLPWriteResPair<WriteFSqrtX, [ADLPPort00], 12, [1], 1, 7>;
@@ -517,7 +517,7 @@ def : InstRW<[ADLPWriteResGroup0], (instregex "^AA(D|N)D64mr$",
"^A(X?)OR64mr$")>;
def ADLPWriteResGroup1 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [2, 1, 1, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1, 1, 1];
let Latency = 12;
let NumMicroOps = 6;
}
@@ -540,7 +540,7 @@ def ADLPWriteResGroup3 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, A
def : InstRW<[ADLPWriteResGroup3], (instregex "^(ADC|SBB)8mi(8?)$")>;
def ADLPWriteResGroup4 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [2, 1, 1, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1, 1, 1];
let Latency = 13;
let NumMicroOps = 6;
}
@@ -598,7 +598,7 @@ def : InstRW<[ADLPWriteResGroup9], (instregex "^ADD_F(32|64)m$",
"^SUB(R?)_F(32|64)m$")>;
def ADLPWriteResGroup10 : SchedWriteRes<[ADLPPort02_03, ADLPPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 13;
let NumMicroOps = 3;
}
@@ -645,7 +645,7 @@ def ADLPWriteResGroup14 : SchedWriteRes<[ADLPPort01_05_10]> {
def : InstRW<[ADLPWriteResGroup14], (instregex "^ANDN(32|64)rr$")>;
def ADLPWriteResGroup15 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> {
- let ResourceCycles = [5, 2, 1, 1];
+ let ReleaseAtCycles = [5, 2, 1, 1];
let Latency = 10;
let NumMicroOps = 9;
}
@@ -658,7 +658,7 @@ def : InstRW<[ADLPWriteResGroup16], (instregex "^BT((C|R|S)?)64rr$",
"^P(DEP|EXT)(32|64)rr$")>;
def ADLPWriteResGroup17 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [4, 2, 1, 1, 1, 1];
+ let ReleaseAtCycles = [4, 2, 1, 1, 1, 1];
let Latency = 17;
let NumMicroOps = 10;
}
@@ -721,14 +721,14 @@ def ADLPWriteResGroup26 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09,
def : InstRW<[ADLPWriteResGroup26], (instrs CLFLUSHOPT)>;
def ADLPWriteResGroup27 : SchedWriteRes<[ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 3;
}
def : InstRW<[ADLPWriteResGroup27], (instrs CLI)>;
def ADLPWriteResGroup28 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort05]> {
- let ResourceCycles = [6, 1, 3];
+ let ReleaseAtCycles = [6, 1, 3];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 10;
}
@@ -742,35 +742,35 @@ def : InstRW<[ADLPWriteResGroup29], (instregex "^MOV16o(16|32|64)a$")>;
def : InstRW<[ADLPWriteResGroup29], (instrs CLWB)>;
def ADLPWriteResGroup30 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> {
- let ResourceCycles = [5, 2];
+ let ReleaseAtCycles = [5, 2];
let Latency = 6;
let NumMicroOps = 7;
}
def : InstRW<[ADLPWriteResGroup30], (instregex "^CMPS(B|L|Q|W)$")>;
def ADLPWriteResGroup31 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [2, 7, 6, 2, 1, 1, 2, 1];
+ let ReleaseAtCycles = [2, 7, 6, 2, 1, 1, 2, 1];
let Latency = 32;
let NumMicroOps = 22;
}
def : InstRW<[ADLPWriteResGroup31], (instrs CMPXCHG16B)>;
def ADLPWriteResGroup32 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [4, 7, 2, 1, 1, 1];
+ let ReleaseAtCycles = [4, 7, 2, 1, 1, 1];
let Latency = 25;
let NumMicroOps = 16;
}
def : InstRW<[ADLPWriteResGroup32], (instrs CMPXCHG8B)>;
def ADLPWriteResGroup33 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [1, 2, 1, 1, 1];
+ let ReleaseAtCycles = [1, 2, 1, 1, 1];
let Latency = 13;
let NumMicroOps = 6;
}
def : InstRW<[ADLPWriteResGroup33], (instrs CMPXCHG8rm)>;
def ADLPWriteResGroup34 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [2, 1, 10, 6, 1, 5, 1];
+ let ReleaseAtCycles = [2, 1, 10, 6, 1, 5, 1];
let Latency = 18;
let NumMicroOps = 26;
}
@@ -791,7 +791,7 @@ def : InstRW<[ADLPWriteResGroup36, ReadAfterVecLd], (instregex "^(V?)CVTSI642SSr
def : InstRW<[ADLPWriteResGroup36, ReadAfterVecLd], (instrs VCVTSI642SSrm)>;
def ADLPWriteResGroup37 : SchedWriteRes<[ADLPPort00_01, ADLPPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 8;
let NumMicroOps = 3;
}
@@ -870,7 +870,7 @@ def : InstRW<[ADLPWriteResGroup48], (instregex "^DIV_F(P?)rST0$")>;
def : InstRW<[ADLPWriteResGroup48], (instrs DIV_FST0r)>;
def ADLPWriteResGroup49 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [2, 21, 2, 14, 4, 9, 5];
+ let ReleaseAtCycles = [2, 21, 2, 14, 4, 9, 5];
let Latency = 126;
let NumMicroOps = 57;
}
@@ -912,14 +912,14 @@ def : InstRW<[ADLPWriteResGroup54], (instrs FBSTPm,
VMPTRSTm)>;
def ADLPWriteResGroup55 : SchedWriteRes<[ADLPPort00_05]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 2;
let NumMicroOps = 2;
}
def : InstRW<[ADLPWriteResGroup55], (instrs FDECSTP)>;
def ADLPWriteResGroup56 : SchedWriteRes<[ADLPPort02_03, ADLPPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 11;
let NumMicroOps = 3;
}
@@ -937,21 +937,21 @@ def ADLPWriteResGroup58 : SchedWriteRes<[ADLPPort00, ADLPPort00_05, ADLPPort02_0
def : InstRW<[ADLPWriteResGroup58], (instrs FLDCW16m)>;
def ADLPWriteResGroup59 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06, ADLPPort00_05, ADLPPort00_06, ADLPPort02_03]> {
- let ResourceCycles = [2, 39, 5, 10, 8];
+ let ReleaseAtCycles = [2, 39, 5, 10, 8];
let Latency = 62;
let NumMicroOps = 64;
}
def : InstRW<[ADLPWriteResGroup59], (instrs FLDENVm)>;
def ADLPWriteResGroup60 : SchedWriteRes<[ADLPPort00_01_05_06]> {
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let Latency = 4;
let NumMicroOps = 4;
}
def : InstRW<[ADLPWriteResGroup60], (instrs FNCLEX)>;
def ADLPWriteResGroup61 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort00_05, ADLPPort05]> {
- let ResourceCycles = [6, 3, 6];
+ let ReleaseAtCycles = [6, 3, 6];
let Latency = 75;
let NumMicroOps = 15;
}
@@ -976,28 +976,28 @@ def ADLPWriteResGroup64 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_07, ADLPPort0
def : InstRW<[ADLPWriteResGroup64], (instrs FNSTSWm)>;
def ADLPWriteResGroup65 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_07, ADLPPort04, ADLPPort05, ADLPPort06]> {
- let ResourceCycles = [9, 30, 21, 1, 11, 11, 16, 1];
+ let ReleaseAtCycles = [9, 30, 21, 1, 11, 11, 16, 1];
let Latency = 106;
let NumMicroOps = 100;
}
def : InstRW<[ADLPWriteResGroup65], (instrs FSTENVm)>;
def ADLPWriteResGroup66 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06, ADLPPort00_05, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03, ADLPPort06]> {
- let ResourceCycles = [4, 47, 1, 2, 1, 33, 2];
+ let ReleaseAtCycles = [4, 47, 1, 2, 1, 33, 2];
let Latency = 63;
let NumMicroOps = 90;
}
def : InstRW<[ADLPWriteResGroup66], (instrs FXRSTOR)>;
def ADLPWriteResGroup67 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06, ADLPPort00_05, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03, ADLPPort06]> {
- let ResourceCycles = [4, 45, 1, 2, 1, 31, 4];
+ let ReleaseAtCycles = [4, 45, 1, 2, 1, 31, 4];
let Latency = 63;
let NumMicroOps = 88;
}
def : InstRW<[ADLPWriteResGroup67], (instrs FXRSTOR64)>;
def ADLPWriteResGroup68 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [2, 5, 10, 10, 2, 38, 5, 38];
+ let ReleaseAtCycles = [2, 5, 10, 10, 2, 38, 5, 38];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 110;
}
@@ -1019,41 +1019,41 @@ def : InstRW<[ADLPWriteResGroup70], (instregex "^(V?)GF2P8MULBrr$")>;
def : InstRW<[ADLPWriteResGroup70], (instrs VGF2P8MULBYrr)>;
def ADLPWriteResGroup71 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [7, 5, 26, 19, 2, 7, 21];
+ let ReleaseAtCycles = [7, 5, 26, 19, 2, 7, 21];
let Latency = 35;
let NumMicroOps = 87;
}
def : InstRW<[ADLPWriteResGroup71], (instrs IN16ri)>;
def ADLPWriteResGroup72 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [7, 1, 4, 26, 19, 3, 7, 20];
+ let ReleaseAtCycles = [7, 1, 4, 26, 19, 3, 7, 20];
let Latency = 35;
let NumMicroOps = 87;
}
def : InstRW<[ADLPWriteResGroup72], (instrs IN16rr)>;
def ADLPWriteResGroup73 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [7, 6, 28, 21, 2, 10, 20];
+ let ReleaseAtCycles = [7, 6, 28, 21, 2, 10, 20];
let Latency = 35;
let NumMicroOps = 94;
}
def : InstRW<[ADLPWriteResGroup73], (instrs IN32ri)>;
def ADLPWriteResGroup74 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [7, 9, 28, 21, 2, 11, 21];
+ let ReleaseAtCycles = [7, 9, 28, 21, 2, 11, 21];
let NumMicroOps = 99;
}
def : InstRW<[ADLPWriteResGroup74], (instrs IN32rr)>;
def ADLPWriteResGroup75 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [7, 6, 25, 19, 2, 8, 20];
+ let ReleaseAtCycles = [7, 6, 25, 19, 2, 8, 20];
let Latency = 35;
let NumMicroOps = 87;
}
def : InstRW<[ADLPWriteResGroup75], (instrs IN8ri)>;
def ADLPWriteResGroup76 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [7, 6, 25, 19, 2, 7, 20];
+ let ReleaseAtCycles = [7, 6, 25, 19, 2, 7, 20];
let Latency = 35;
let NumMicroOps = 86;
}
@@ -1073,28 +1073,28 @@ def : InstRW<[ADLPWriteResGroup78], (instrs INC32r_alt,
VBROADCASTSSrm)>;
def ADLPWriteResGroup79 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [7, 6, 24, 17, 8, 1, 19, 1];
+ let ReleaseAtCycles = [7, 6, 24, 17, 8, 1, 19, 1];
let Latency = 20;
let NumMicroOps = 83;
}
def : InstRW<[ADLPWriteResGroup79], (instrs INSB)>;
def ADLPWriteResGroup80 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [7, 1, 5, 1, 27, 17, 11, 1, 21, 1];
+ let ReleaseAtCycles = [7, 1, 5, 1, 27, 17, 11, 1, 21, 1];
let Latency = 20;
let NumMicroOps = 92;
}
def : InstRW<[ADLPWriteResGroup80], (instrs INSL)>;
def ADLPWriteResGroup81 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [7, 1, 4, 1, 25, 17, 1, 9, 1, 19, 1];
+ let ReleaseAtCycles = [7, 1, 4, 1, 25, 17, 1, 9, 1, 19, 1];
let Latency = 20;
let NumMicroOps = 86;
}
def : InstRW<[ADLPWriteResGroup81], (instrs INSW)>;
def ADLPWriteResGroup82 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [5, 4, 8, 6, 2, 5, 7, 5];
+ let ReleaseAtCycles = [5, 4, 8, 6, 2, 5, 7, 5];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 42;
}
@@ -1124,35 +1124,35 @@ def : InstRW<[ADLPWriteResGroup86], (instregex "^JMP_(1|4)$")>;
def : InstRW<[ADLPWriteResGroup86], (instrs VZEROUPPER)>;
def ADLPWriteResGroup87 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [8, 2, 14, 3, 1];
+ let ReleaseAtCycles = [8, 2, 14, 3, 1];
let Latency = 198;
let NumMicroOps = 81;
}
def : InstRW<[ADLPWriteResGroup87], (instrs LAR16rm)>;
def ADLPWriteResGroup88 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [1, 3, 1, 8, 5, 1, 2, 1];
+ let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1];
let Latency = 66;
let NumMicroOps = 22;
}
def : InstRW<[ADLPWriteResGroup88], (instrs LAR16rr)>;
def ADLPWriteResGroup89 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [1, 2, 2, 9, 5, 3, 1];
+ let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1];
let Latency = 71;
let NumMicroOps = 85;
}
def : InstRW<[ADLPWriteResGroup89], (instrs LAR32rm)>;
def ADLPWriteResGroup90 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [1, 3, 1, 8, 5, 1, 2, 1];
+ let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1];
let Latency = 65;
let NumMicroOps = 22;
}
def : InstRW<[ADLPWriteResGroup90], (instregex "^LAR(32|64)rr$")>;
def ADLPWriteResGroup91 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [1, 2, 2, 9, 5, 3, 1];
+ let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1];
let Latency = 71;
let NumMicroOps = 87;
}
@@ -1170,7 +1170,7 @@ def ADLPWriteResGroup93 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01]> {
def : InstRW<[ADLPWriteResGroup93], (instrs LEA16r)>;
def ADLPWriteResGroup94 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 6;
let NumMicroOps = 4;
}
@@ -1179,77 +1179,77 @@ def : InstRW<[ADLPWriteResGroup94], (instregex "^LODS(B|W)$",
def : InstRW<[ADLPWriteResGroup94], (instrs LEAVE)>;
def ADLPWriteResGroup95 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 6;
let NumMicroOps = 3;
}
def : InstRW<[ADLPWriteResGroup95], (instrs LEAVE64)>;
def ADLPWriteResGroup96 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [1, 2, 4, 3, 2, 1, 1];
+ let ReleaseAtCycles = [1, 2, 4, 3, 2, 1, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 14;
}
def : InstRW<[ADLPWriteResGroup96], (instrs LGDT64m)>;
def ADLPWriteResGroup97 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [1, 1, 5, 3, 2, 1, 1];
+ let ReleaseAtCycles = [1, 1, 5, 3, 2, 1, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 14;
}
def : InstRW<[ADLPWriteResGroup97], (instrs LIDT64m)>;
def ADLPWriteResGroup98 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [5, 3, 2, 1, 1];
+ let ReleaseAtCycles = [5, 3, 2, 1, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 12;
}
def : InstRW<[ADLPWriteResGroup98], (instrs LLDT16m)>;
def ADLPWriteResGroup99 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [1, 4, 3, 1, 1, 1];
+ let ReleaseAtCycles = [1, 4, 3, 1, 1, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 11;
}
def : InstRW<[ADLPWriteResGroup99], (instrs LLDT16r)>;
def ADLPWriteResGroup100 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [1, 1, 2, 8, 3, 1, 2, 7, 2];
+ let ReleaseAtCycles = [1, 1, 2, 8, 3, 1, 2, 7, 2];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 27;
}
def : InstRW<[ADLPWriteResGroup100], (instrs LMSW16m)>;
def ADLPWriteResGroup101 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [5, 7, 1, 2, 5, 2];
+ let ReleaseAtCycles = [5, 7, 1, 2, 5, 2];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 22;
}
def : InstRW<[ADLPWriteResGroup101], (instrs LMSW16r)>;
def ADLPWriteResGroup102 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 5;
let NumMicroOps = 3;
}
def : InstRW<[ADLPWriteResGroup102], (instregex "^LODS(L|Q)$")>;
def ADLPWriteResGroup103 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [2, 4, 1];
+ let ReleaseAtCycles = [2, 4, 1];
let Latency = 3;
let NumMicroOps = 7;
}
def : InstRW<[ADLPWriteResGroup103], (instrs LOOP)>;
def ADLPWriteResGroup104 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [4, 6, 1];
+ let ReleaseAtCycles = [4, 6, 1];
let Latency = 3;
let NumMicroOps = 11;
}
def : InstRW<[ADLPWriteResGroup104], (instrs LOOPE)>;
def ADLPWriteResGroup105 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [4, 6, 1];
+ let ReleaseAtCycles = [4, 6, 1];
let Latency = 2;
let NumMicroOps = 11;
}
@@ -1262,14 +1262,14 @@ def ADLPWriteResGroup106 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort02_03, AD
def : InstRW<[ADLPWriteResGroup106], (instrs LRET64)>;
def ADLPWriteResGroup107 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [1, 5, 3, 3, 1];
+ let ReleaseAtCycles = [1, 5, 3, 3, 1];
let Latency = 70;
let NumMicroOps = 13;
}
def : InstRW<[ADLPWriteResGroup107], (instregex "^LSL(16|32|64)rm$")>;
def ADLPWriteResGroup108 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [1, 4, 4, 3, 2, 1];
+ let ReleaseAtCycles = [1, 4, 4, 3, 2, 1];
let Latency = 63;
let NumMicroOps = 15;
}
@@ -1312,7 +1312,7 @@ def ADLPWriteResGroup114 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05]> {
def : InstRW<[ADLPWriteResGroup114], (instregex "^MMX_CVT(T?)PS2PIrr$")>;
def ADLPWriteResGroup115 : SchedWriteRes<[ADLPPort00, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 12;
let NumMicroOps = 4;
}
@@ -1328,7 +1328,7 @@ def ADLPWriteResGroup117 : SchedWriteRes<[ADLPPort02_03_11]> {
let Latency = 8;
}
def : InstRW<[ADLPWriteResGroup117], (instregex "^MMX_MOV(D|Q)64rm$",
- "^VBROADCAST(F|I)128$",
+ "^VBROADCAST(F|I)128rm$",
"^VBROADCASTS(D|S)Yrm$",
"^VMOV(D|SH|SL)DUPYrm$",
"^VPBROADCAST(D|Q)Yrm$")>;
@@ -1347,7 +1347,7 @@ def ADLPWriteResGroup119 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05]> {
def : InstRW<[ADLPWriteResGroup119], (instregex "^MMX_MOVQ2(DQ|FR64)rr$")>;
def ADLPWriteResGroup120 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 12;
let NumMicroOps = 3;
}
@@ -1355,7 +1355,7 @@ def : InstRW<[ADLPWriteResGroup120, ReadAfterVecLd], (instregex "^MMX_PACKSS(DW|
def : InstRW<[ADLPWriteResGroup120, ReadAfterVecLd], (instrs MMX_PACKUSWBrm)>;
def ADLPWriteResGroup121 : SchedWriteRes<[ADLPPort05]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 4;
let NumMicroOps = 2;
}
@@ -1370,14 +1370,14 @@ def ADLPWriteResGroup122 : SchedWriteRes<[ADLPPort00_05, ADLPPort02_03_11]> {
def : InstRW<[ADLPWriteResGroup122, ReadAfterVecLd], (instregex "^MMX_P(ADD|SUB)(B|D|Q|W)rm$")>;
def ADLPWriteResGroup123 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 11;
let NumMicroOps = 4;
}
def : InstRW<[ADLPWriteResGroup123, ReadAfterVecLd], (instregex "^MMX_PH(ADD|SUB)SWrm$")>;
def ADLPWriteResGroup124 : SchedWriteRes<[ADLPPort00, ADLPPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 3;
let NumMicroOps = 3;
}
@@ -1434,7 +1434,7 @@ def ADLPWriteResGroup132 : SchedWriteRes<[ADLPPort02_03_11]> {
def : InstRW<[ADLPWriteResGroup132], (instrs MOV64ao32)>;
def ADLPWriteResGroup133 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [1, 2, 4, 16, 7, 2, 2, 12, 2];
+ let ReleaseAtCycles = [1, 2, 4, 16, 7, 2, 2, 12, 2];
let Latency = 217;
let NumMicroOps = 48;
}
@@ -1453,7 +1453,7 @@ def ADLPWriteResGroup135 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06,
def : InstRW<[ADLPWriteResGroup135], (instrs MOV64rc)>;
def ADLPWriteResGroup136 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort05]> {
- let ResourceCycles = [3, 4, 8, 4, 2, 3];
+ let ReleaseAtCycles = [3, 4, 8, 4, 2, 3];
let Latency = 181;
let NumMicroOps = 24;
}
@@ -1541,7 +1541,7 @@ def ADLPWriteResGroup149 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> {
def : InstRW<[ADLPWriteResGroup149], (instrs MOVNTImr)>;
def ADLPWriteResGroup150 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [4, 1, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1, 1];
let Latency = 8;
let NumMicroOps = 7;
}
@@ -1554,7 +1554,7 @@ def : InstRW<[ADLPWriteResGroup151], (instregex "^(V?)MOVS(D|S)rr((_REV)?)$",
def : InstRW<[ADLPWriteResGroup151], (instrs VPBLENDDrri)>;
def ADLPWriteResGroup152 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [4, 1, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1, 1];
let Latency = 7;
let NumMicroOps = 7;
}
@@ -1596,70 +1596,70 @@ def : InstRW<[ADLPWriteResGroup158], (instregex "^MUL_F(P?)rST0$")>;
def : InstRW<[ADLPWriteResGroup158], (instrs MUL_FST0r)>;
def ADLPWriteResGroup159 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort05, ADLPPort06]> {
- let ResourceCycles = [7, 1, 2];
+ let ReleaseAtCycles = [7, 1, 2];
let Latency = 20;
let NumMicroOps = 10;
}
def : InstRW<[ADLPWriteResGroup159], (instrs MWAITrr)>;
def ADLPWriteResGroup160 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [6, 4, 1, 28, 15, 7, 1, 16, 1];
+ let ReleaseAtCycles = [6, 4, 1, 28, 15, 7, 1, 16, 1];
let Latency = 35;
let NumMicroOps = 79;
}
def : InstRW<[ADLPWriteResGroup160], (instrs OUT16ir)>;
def ADLPWriteResGroup161 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [6, 6, 27, 15, 7, 1, 16, 1];
+ let ReleaseAtCycles = [6, 6, 27, 15, 7, 1, 16, 1];
let Latency = 35;
let NumMicroOps = 79;
}
def : InstRW<[ADLPWriteResGroup161], (instrs OUT16rr)>;
def ADLPWriteResGroup162 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [6, 4, 1, 30, 15, 9, 1, 18, 1];
+ let ReleaseAtCycles = [6, 4, 1, 30, 15, 9, 1, 18, 1];
let Latency = 35;
let NumMicroOps = 85;
}
def : InstRW<[ADLPWriteResGroup162], (instrs OUT32ir)>;
def ADLPWriteResGroup163 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [6, 6, 29, 15, 9, 1, 18, 1];
+ let ReleaseAtCycles = [6, 6, 29, 15, 9, 1, 18, 1];
let Latency = 35;
let NumMicroOps = 85;
}
def : InstRW<[ADLPWriteResGroup163], (instrs OUT32rr)>;
def ADLPWriteResGroup164 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [5, 5, 1, 25, 15, 5, 1, 15, 1];
+ let ReleaseAtCycles = [5, 5, 1, 25, 15, 5, 1, 15, 1];
let Latency = 35;
let NumMicroOps = 73;
}
def : InstRW<[ADLPWriteResGroup164], (instrs OUT8ir)>;
def ADLPWriteResGroup165 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [5, 5, 26, 15, 5, 1, 15, 1];
+ let ReleaseAtCycles = [5, 5, 26, 15, 5, 1, 15, 1];
let Latency = 35;
let NumMicroOps = 73;
}
def : InstRW<[ADLPWriteResGroup165], (instrs OUT8rr)>;
def ADLPWriteResGroup166 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [7, 6, 25, 16, 7, 1, 17, 1];
+ let ReleaseAtCycles = [7, 6, 25, 16, 7, 1, 17, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 80;
}
def : InstRW<[ADLPWriteResGroup166], (instrs OUTSB)>;
def ADLPWriteResGroup167 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [7, 6, 28, 16, 10, 1, 20, 1];
+ let ReleaseAtCycles = [7, 6, 28, 16, 10, 1, 20, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 89;
}
def : InstRW<[ADLPWriteResGroup167], (instrs OUTSL)>;
def ADLPWriteResGroup168 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [6, 1, 5, 27, 16, 8, 1, 18, 1];
+ let ReleaseAtCycles = [6, 1, 5, 27, 16, 8, 1, 18, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 83;
}
@@ -1718,14 +1718,14 @@ def ADLPWriteResGroup176 : SchedWriteRes<[ADLPPort01_05, ADLPPort04_09, ADLPPort
def : InstRW<[ADLPWriteResGroup176], (instregex "^(V?)PEXTR(D|Q)mr$")>;
def ADLPWriteResGroup177 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_11]> {
- let ResourceCycles = [1, 2, 1];
+ let ReleaseAtCycles = [1, 2, 1];
let Latency = 9;
let NumMicroOps = 4;
}
def : InstRW<[ADLPWriteResGroup177, ReadAfterVecXLd], (instregex "^(V?)PH(ADD|SUB)SWrm$")>;
def ADLPWriteResGroup178 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 2;
let NumMicroOps = 3;
}
@@ -1747,14 +1747,14 @@ def : InstRW<[ADLPWriteResGroup180], (instregex "^POPA(16|32)$",
def : InstRW<[ADLPWriteResGroup180], (instrs POPF32)>;
def ADLPWriteResGroup181 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> {
- let ResourceCycles = [6, 2, 1, 1];
+ let ReleaseAtCycles = [6, 2, 1, 1];
let Latency = 5;
let NumMicroOps = 10;
}
def : InstRW<[ADLPWriteResGroup181], (instrs POPF16)>;
def ADLPWriteResGroup182 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 5;
let NumMicroOps = 7;
}
@@ -1767,21 +1767,21 @@ def : InstRW<[ADLPWriteResGroup183], (instregex "^PREFETCHT(0|1|2)$")>;
def : InstRW<[ADLPWriteResGroup183], (instrs PREFETCHNTA)>;
def ADLPWriteResGroup184 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort06]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 4;
}
def : InstRW<[ADLPWriteResGroup184], (instregex "^PTWRITE((64)?)m$")>;
def ADLPWriteResGroup185 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort06]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 3;
}
def : InstRW<[ADLPWriteResGroup185], (instrs PTWRITE64r)>;
def ADLPWriteResGroup186 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort06]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 4;
}
@@ -1814,49 +1814,49 @@ def ADLPWriteResGroup191 : SchedWriteRes<[ADLPPort01, ADLPPort04_09, ADLPPort07_
def : InstRW<[ADLPWriteResGroup191], (instregex "^PUSH(F|G)S64$")>;
def ADLPWriteResGroup192 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [2, 3, 2];
+ let ReleaseAtCycles = [2, 3, 2];
let Latency = 8;
let NumMicroOps = 7;
}
def : InstRW<[ADLPWriteResGroup192], (instregex "^RC(L|R)(16|32|64)rCL$")>;
def ADLPWriteResGroup193 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 13;
let NumMicroOps = 3;
}
def : InstRW<[ADLPWriteResGroup193, WriteRMW], (instregex "^RC(L|R)8m(1|i)$")>;
def ADLPWriteResGroup194 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [1, 5, 2];
+ let ReleaseAtCycles = [1, 5, 2];
let Latency = 20;
let NumMicroOps = 8;
}
def : InstRW<[ADLPWriteResGroup194, WriteRMW], (instrs RCL8mCL)>;
def ADLPWriteResGroup195 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [2, 5, 2];
+ let ReleaseAtCycles = [2, 5, 2];
let Latency = 7;
let NumMicroOps = 9;
}
def : InstRW<[ADLPWriteResGroup195], (instrs RCL8rCL)>;
def ADLPWriteResGroup196 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [2, 4, 3];
+ let ReleaseAtCycles = [2, 4, 3];
let Latency = 20;
let NumMicroOps = 9;
}
def : InstRW<[ADLPWriteResGroup196, WriteRMW], (instrs RCR8mCL)>;
def ADLPWriteResGroup197 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [3, 4, 3];
+ let ReleaseAtCycles = [3, 4, 3];
let Latency = 9;
let NumMicroOps = 10;
}
def : InstRW<[ADLPWriteResGroup197], (instrs RCR8rCL)>;
def ADLPWriteResGroup198 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort01_05_10, ADLPPort05]> {
- let ResourceCycles = [1, 6, 1, 10, 20, 8, 5, 1, 2];
+ let ReleaseAtCycles = [1, 6, 1, 10, 20, 8, 5, 1, 2];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 54;
}
@@ -1874,49 +1874,49 @@ def ADLPWriteResGroup200 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06,
def : InstRW<[ADLPWriteResGroup200], (instrs RDPKRUr)>;
def ADLPWriteResGroup201 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort05]> {
- let ResourceCycles = [9, 6, 2, 1];
+ let ReleaseAtCycles = [9, 6, 2, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 18;
}
def : InstRW<[ADLPWriteResGroup201], (instrs RDPMC)>;
def ADLPWriteResGroup202 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [2, 3, 2, 5, 7, 3, 1, 2];
+ let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2];
let Latency = 1386;
let NumMicroOps = 25;
}
def : InstRW<[ADLPWriteResGroup202], (instrs RDRAND16r)>;
def ADLPWriteResGroup203 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [2, 3, 2, 5, 7, 3, 1, 2];
+ let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 25;
}
def : InstRW<[ADLPWriteResGroup203], (instregex "^RDRAND(32|64)r$")>;
def ADLPWriteResGroup204 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [2, 3, 3, 5, 7, 1, 4];
+ let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4];
let Latency = 1381;
let NumMicroOps = 25;
}
def : InstRW<[ADLPWriteResGroup204], (instrs RDSEED16r)>;
def ADLPWriteResGroup205 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [2, 3, 3, 5, 7, 1, 4];
+ let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 25;
}
def : InstRW<[ADLPWriteResGroup205], (instregex "^RDSEED(32|64)r$")>;
def ADLPWriteResGroup206 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort05]> {
- let ResourceCycles = [5, 6, 3, 1];
+ let ReleaseAtCycles = [5, 6, 3, 1];
let Latency = 18;
let NumMicroOps = 15;
}
def : InstRW<[ADLPWriteResGroup206], (instrs RDTSC)>;
def ADLPWriteResGroup207 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort05]> {
- let ResourceCycles = [2, 2, 1, 2, 7, 4, 3];
+ let ReleaseAtCycles = [2, 2, 1, 2, 7, 4, 3];
let Latency = 42;
let NumMicroOps = 21;
}
@@ -1929,7 +1929,7 @@ def ADLPWriteResGroup208 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> {
def : InstRW<[ADLPWriteResGroup208], (instrs RET64)>;
def ADLPWriteResGroup209 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 6;
let NumMicroOps = 3;
}
@@ -1939,20 +1939,20 @@ def ADLPWriteResGroup210 : SchedWriteRes<[]>;
def : InstRW<[ADLPWriteResGroup210], (instrs REX64_PREFIX)>;
def ADLPWriteResGroup211 : SchedWriteRes<[ADLPPort00_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 12;
let NumMicroOps = 2;
}
def : InstRW<[ADLPWriteResGroup211, WriteRMW], (instregex "^RO(L|R)(16|32|64)m(1|i|CL)$")>;
def ADLPWriteResGroup212 : SchedWriteRes<[ADLPPort00_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[ADLPWriteResGroup212], (instregex "^RO(L|R)(8|16|32|64)r(1|i)$")>;
def ADLPWriteResGroup213 : SchedWriteRes<[ADLPPort00_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 13;
let NumMicroOps = 2;
}
@@ -1961,7 +1961,7 @@ def : InstRW<[ADLPWriteResGroup213, WriteRMW], (instregex "^RO(L|R)8m(1|i)$",
"^(RO|SA|SH)R8mCL$")>;
def ADLPWriteResGroup214 : SchedWriteRes<[ADLPPort00_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 4;
let NumMicroOps = 2;
}
@@ -1987,7 +1987,7 @@ def : InstRW<[ADLPWriteResGroup217], (instregex "^S(A|H)RX(32|64)rr$",
"^SHLX(32|64)rr$")>;
def ADLPWriteResGroup218 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [2, 2, 1, 1, 1];
+ let ReleaseAtCycles = [2, 2, 1, 1, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 7;
}
@@ -2000,7 +2000,7 @@ def ADLPWriteResGroup219 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> {
def : InstRW<[ADLPWriteResGroup219], (instrs SFENCE)>;
def ADLPWriteResGroup220 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [1, 2, 2, 2];
+ let ReleaseAtCycles = [1, 2, 2, 2];
let Latency = 21;
let NumMicroOps = 7;
}
@@ -2019,14 +2019,14 @@ def ADLPWriteResGroup222 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort05]> {
def : InstRW<[ADLPWriteResGroup222], (instrs SHA1MSG1rr)>;
def ADLPWriteResGroup223 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_11]> {
- let ResourceCycles = [2, 2, 1, 2, 1];
+ let ReleaseAtCycles = [2, 2, 1, 2, 1];
let Latency = 13;
let NumMicroOps = 8;
}
def : InstRW<[ADLPWriteResGroup223, ReadAfterVecXLd], (instrs SHA1MSG2rm)>;
def ADLPWriteResGroup224 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort01_05]> {
- let ResourceCycles = [2, 2, 1, 2];
+ let ReleaseAtCycles = [2, 2, 1, 2];
let Latency = 6;
let NumMicroOps = 7;
}
@@ -2058,28 +2058,28 @@ def : InstRW<[ADLPWriteResGroup228], (instrs SHA1RNDS4rri,
SHA256RNDS2rr)>;
def ADLPWriteResGroup229 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [3, 2, 1, 1, 1];
+ let ReleaseAtCycles = [3, 2, 1, 1, 1];
let Latency = 12;
let NumMicroOps = 8;
}
def : InstRW<[ADLPWriteResGroup229, ReadAfterVecXLd], (instrs SHA256MSG1rm)>;
def ADLPWriteResGroup230 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort05]> {
- let ResourceCycles = [3, 2, 1, 1];
+ let ReleaseAtCycles = [3, 2, 1, 1];
let Latency = 5;
let NumMicroOps = 7;
}
def : InstRW<[ADLPWriteResGroup230], (instrs SHA256MSG1rr)>;
def ADLPWriteResGroup231 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 13;
let NumMicroOps = 3;
}
def : InstRW<[ADLPWriteResGroup231, ReadAfterVecXLd], (instrs SHA256MSG2rm)>;
def ADLPWriteResGroup232 : SchedWriteRes<[ADLPPort05]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 6;
let NumMicroOps = 2;
}
@@ -2121,21 +2121,21 @@ def ADLPWriteResGroup238 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]
def : InstRW<[ADLPWriteResGroup238], (instrs STD)>;
def ADLPWriteResGroup239 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [1, 4, 1];
+ let ReleaseAtCycles = [1, 4, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 6;
}
def : InstRW<[ADLPWriteResGroup239], (instrs STI)>;
def ADLPWriteResGroup240 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 8;
let NumMicroOps = 4;
}
def : InstRW<[ADLPWriteResGroup240], (instrs STOSB)>;
def ADLPWriteResGroup241 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 7;
let NumMicroOps = 4;
}
@@ -2154,7 +2154,7 @@ def : InstRW<[ADLPWriteResGroup243], (instregex "^(TST|XAM)_F$")>;
def : InstRW<[ADLPWriteResGroup243], (instrs UCOM_FPPr)>;
def ADLPWriteResGroup244 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 9;
let NumMicroOps = 4;
}
@@ -2162,7 +2162,7 @@ def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefaul
def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrm)>;
def ADLPWriteResGroup245 : SchedWriteRes<[ADLPPort00_01_05]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let Latency = 3;
let NumMicroOps = 3;
}
@@ -2170,35 +2170,35 @@ def : InstRW<[ADLPWriteResGroup245], (instregex "^VBLENDVP(D|S)rr$")>;
def : InstRW<[ADLPWriteResGroup245], (instrs VPBLENDVBrr)>;
def ADLPWriteResGroup246 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> {
- let ResourceCycles = [6, 7, 18];
+ let ReleaseAtCycles = [6, 7, 18];
let Latency = 81;
let NumMicroOps = 31;
}
def : InstRW<[ADLPWriteResGroup246], (instrs VERRm)>;
def ADLPWriteResGroup247 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> {
- let ResourceCycles = [6, 7, 17];
+ let ReleaseAtCycles = [6, 7, 17];
let Latency = 74;
let NumMicroOps = 30;
}
def : InstRW<[ADLPWriteResGroup247], (instrs VERRr)>;
def ADLPWriteResGroup248 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> {
- let ResourceCycles = [5, 8, 21];
+ let ReleaseAtCycles = [5, 8, 21];
let Latency = 81;
let NumMicroOps = 34;
}
def : InstRW<[ADLPWriteResGroup248], (instrs VERWm)>;
def ADLPWriteResGroup249 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> {
- let ResourceCycles = [5, 8, 20];
+ let ReleaseAtCycles = [5, 8, 20];
let Latency = 74;
let NumMicroOps = 33;
}
def : InstRW<[ADLPWriteResGroup249], (instrs VERWr)>;
def ADLPWriteResGroup250 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> {
- let ResourceCycles = [1, 1, 2, 4];
+ let ReleaseAtCycles = [1, 1, 2, 4];
let Latency = 29;
let NumMicroOps = 8;
}
@@ -2208,7 +2208,7 @@ def : InstRW<[ADLPWriteResGroup250, WriteVecMaskedGatherWriteback], (instrs VGAT
VPGATHERQDYrm)>;
def ADLPWriteResGroup251 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> {
- let ResourceCycles = [1, 1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 1, 2];
let Latency = 20;
let NumMicroOps = 5;
}
@@ -2218,7 +2218,7 @@ def : InstRW<[ADLPWriteResGroup251, WriteVecMaskedGatherWriteback], (instrs VGAT
VPGATHERQDrm)>;
def ADLPWriteResGroup252 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> {
- let ResourceCycles = [1, 1, 2, 8];
+ let ReleaseAtCycles = [1, 1, 2, 8];
let Latency = 30;
let NumMicroOps = 12;
}
@@ -2226,7 +2226,7 @@ def : InstRW<[ADLPWriteResGroup252, WriteVecMaskedGatherWriteback], (instrs VGAT
VPGATHERDDYrm)>;
def ADLPWriteResGroup253 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> {
- let ResourceCycles = [1, 1, 2, 4];
+ let ReleaseAtCycles = [1, 1, 2, 4];
let Latency = 28;
let NumMicroOps = 8;
}
@@ -2234,7 +2234,7 @@ def : InstRW<[ADLPWriteResGroup253, WriteVecMaskedGatherWriteback], (instrs VGAT
VPGATHERDDrm)>;
def ADLPWriteResGroup254 : SchedWriteRes<[ADLPPort01_05, ADLPPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 5;
let NumMicroOps = 3;
}
@@ -2254,7 +2254,7 @@ def ADLPWriteResGroup256 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort02_
def : InstRW<[ADLPWriteResGroup256], (instrs VLDMXCSR)>;
def ADLPWriteResGroup257 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03, ADLPPort02_03_07, ADLPPort04, ADLPPort05, ADLPPort06]> {
- let ResourceCycles = [8, 1, 1, 1, 1, 1, 2, 3];
+ let ReleaseAtCycles = [8, 1, 1, 1, 1, 1, 2, 3];
let Latency = 40;
let NumMicroOps = 18;
}
@@ -2305,98 +2305,98 @@ def : InstRW<[ADLPWriteResGroup264, ReadAfterVecYLd], (instregex "^VSHUFP(D|S)Yr
def : InstRW<[ADLPWriteResGroup264, ReadAfterVecYLd], (instrs VPBLENDWYrmi)>;
def ADLPWriteResGroup266 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_11]> {
- let ResourceCycles = [1, 2, 1];
+ let ReleaseAtCycles = [1, 2, 1];
let Latency = 10;
let NumMicroOps = 4;
}
def : InstRW<[ADLPWriteResGroup266, ReadAfterVecYLd], (instregex "^VPH(ADD|SUB)SWYrm$")>;
def ADLPWriteResGroup267 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10]> {
- let ResourceCycles = [1, 2, 3, 3, 1];
+ let ReleaseAtCycles = [1, 2, 3, 3, 1];
let Latency = 16;
let NumMicroOps = 10;
}
def : InstRW<[ADLPWriteResGroup267], (instrs VZEROALL)>;
def ADLPWriteResGroup268 : SchedWriteRes<[ADLPPort00_01_05_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 2;
let NumMicroOps = 2;
}
def : InstRW<[ADLPWriteResGroup268], (instrs WAIT)>;
def ADLPWriteResGroup269 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [8, 6, 19, 63, 21, 15, 1, 10, 1];
+ let ReleaseAtCycles = [8, 6, 19, 63, 21, 15, 1, 10, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 144;
}
def : InstRW<[ADLPWriteResGroup269], (instrs WRMSR)>;
def ADLPWriteResGroup270 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort05]> {
- let ResourceCycles = [2, 1, 4, 1];
+ let ReleaseAtCycles = [2, 1, 4, 1];
let Latency = AlderlakePModel.MaxLatency;
let NumMicroOps = 8;
}
def : InstRW<[ADLPWriteResGroup270], (instrs WRPKRUr)>;
def ADLPWriteResGroup271 : SchedWriteRes<[ADLPPort00_01_05_06_10]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 12;
let NumMicroOps = 2;
}
def : InstRW<[ADLPWriteResGroup271, WriteRMW], (instregex "^XADD(16|32|64)rm$")>;
def ADLPWriteResGroup272 : SchedWriteRes<[ADLPPort00_01_05_06_10]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 13;
let NumMicroOps = 2;
}
def : InstRW<[ADLPWriteResGroup272, WriteRMW], (instrs XADD8rm)>;
def ADLPWriteResGroup273 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> {
- let ResourceCycles = [4, 1];
+ let ReleaseAtCycles = [4, 1];
let Latency = 39;
let NumMicroOps = 5;
}
def : InstRW<[ADLPWriteResGroup273, WriteRMW], (instregex "^XCHG(16|32)rm$")>;
def ADLPWriteResGroup274 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> {
- let ResourceCycles = [5, 1];
+ let ReleaseAtCycles = [5, 1];
let Latency = 39;
let NumMicroOps = 6;
}
def : InstRW<[ADLPWriteResGroup274, WriteRMW], (instrs XCHG64rm)>;
def ADLPWriteResGroup275 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> {
- let ResourceCycles = [4, 1];
+ let ReleaseAtCycles = [4, 1];
let Latency = 40;
let NumMicroOps = 5;
}
def : InstRW<[ADLPWriteResGroup275, WriteRMW], (instrs XCHG8rm)>;
def ADLPWriteResGroup276 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06, ADLPPort00_05, ADLPPort01, ADLPPort05, ADLPPort06]> {
- let ResourceCycles = [2, 4, 2, 1, 2, 4];
+ let ReleaseAtCycles = [2, 4, 2, 1, 2, 4];
let Latency = 17;
let NumMicroOps = 15;
}
def : InstRW<[ADLPWriteResGroup276], (instrs XCH_F)>;
def ADLPWriteResGroup277 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01]> {
- let ResourceCycles = [7, 3, 8, 5];
+ let ReleaseAtCycles = [7, 3, 8, 5];
let Latency = 4;
let NumMicroOps = 23;
}
def : InstRW<[ADLPWriteResGroup277], (instrs XGETBV)>;
def ADLPWriteResGroup278 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 7;
let NumMicroOps = 3;
}
def : InstRW<[ADLPWriteResGroup278], (instrs XLAT)>;
def ADLPWriteResGroup279 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort01, ADLPPort02_03, ADLPPort06]> {
- let ResourceCycles = [21, 1, 1, 8];
+ let ReleaseAtCycles = [21, 1, 1, 8];
let Latency = 37;
let NumMicroOps = 31;
}
@@ -2404,63 +2404,63 @@ def : InstRW<[ADLPWriteResGroup279], (instregex "^XRSTOR((S|64)?)$")>;
def : InstRW<[ADLPWriteResGroup279], (instrs XRSTORS64)>;
def ADLPWriteResGroup280 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
+ let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
let Latency = 42;
let NumMicroOps = 140;
}
def : InstRW<[ADLPWriteResGroup280], (instrs XSAVE)>;
def ADLPWriteResGroup281 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
+ let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
let Latency = 41;
let NumMicroOps = 140;
}
def : InstRW<[ADLPWriteResGroup281], (instrs XSAVE64)>;
def ADLPWriteResGroup282 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [1, 19, 36, 52, 23, 4, 2, 12, 2];
+ let ReleaseAtCycles = [1, 19, 36, 52, 23, 4, 2, 12, 2];
let Latency = 42;
let NumMicroOps = 151;
}
def : InstRW<[ADLPWriteResGroup282], (instrs XSAVEC)>;
def ADLPWriteResGroup283 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [1, 19, 36, 53, 23, 4, 2, 12, 2];
+ let ReleaseAtCycles = [1, 19, 36, 53, 23, 4, 2, 12, 2];
let Latency = 42;
let NumMicroOps = 152;
}
def : InstRW<[ADLPWriteResGroup283], (instrs XSAVEC64)>;
def ADLPWriteResGroup284 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [25, 35, 52, 27, 4, 1, 10, 1];
+ let ReleaseAtCycles = [25, 35, 52, 27, 4, 1, 10, 1];
let Latency = 46;
let NumMicroOps = 155;
}
def : InstRW<[ADLPWriteResGroup284], (instrs XSAVEOPT)>;
def ADLPWriteResGroup285 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [25, 35, 53, 27, 4, 1, 10, 1];
+ let ReleaseAtCycles = [25, 35, 53, 27, 4, 1, 10, 1];
let Latency = 46;
let NumMicroOps = 156;
}
def : InstRW<[ADLPWriteResGroup285], (instrs XSAVEOPT64)>;
def ADLPWriteResGroup286 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [23, 32, 53, 29, 30, 4, 2, 9, 2];
+ let ReleaseAtCycles = [23, 32, 53, 29, 30, 4, 2, 9, 2];
let Latency = 42;
let NumMicroOps = 184;
}
def : InstRW<[ADLPWriteResGroup286], (instrs XSAVES)>;
def ADLPWriteResGroup287 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> {
- let ResourceCycles = [23, 33, 53, 29, 32, 4, 2, 8, 2];
+ let ReleaseAtCycles = [23, 33, 53, 29, 32, 4, 2, 8, 2];
let Latency = 42;
let NumMicroOps = 186;
}
def : InstRW<[ADLPWriteResGroup287], (instrs XSAVES64)>;
def ADLPWriteResGroup288 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort05]> {
- let ResourceCycles = [4, 23, 2, 14, 8, 1, 2];
+ let ReleaseAtCycles = [4, 23, 2, 14, 8, 1, 2];
let Latency = 5;
let NumMicroOps = 54;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td
index a9639e77712e..61a8832000e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -95,7 +95,7 @@ multiclass BWWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -103,7 +103,7 @@ multiclass BWWriteResPair<X86FoldableSchedWrite SchedRW,
// the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([BWPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !listconcat([1], Res);
+ let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -489,7 +489,7 @@ defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
def : WriteRes<WriteVecInsert, [BWPort5]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [BWPort5,BWPort23]> {
let Latency = 6;
@@ -511,48 +511,48 @@ def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> {
def : WriteRes<WritePCmpIStrM, [BWPort0]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [BWPort0, BWPort23]> {
let Latency = 16;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [BWPort0, BWPort5, BWPort015, BWPort0156]> {
let Latency = 19;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
def : WriteRes<WritePCmpEStrMLd, [BWPort0, BWPort5, BWPort23, BWPort015, BWPort0156]> {
let Latency = 24;
let NumMicroOps = 10;
- let ResourceCycles = [4,3,1,1,1];
+ let ReleaseAtCycles = [4,3,1,1,1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [BWPort0]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrILd, [BWPort0, BWPort23]> {
let Latency = 16;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [BWPort0, BWPort5, BWPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [4,3,1];
+ let ReleaseAtCycles = [4,3,1];
}
def : WriteRes<WritePCmpEStrILd, [BWPort0, BWPort5, BWPort23, BWPort0156]> {
let Latency = 23;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
// MOVMSK Instructions.
@@ -565,41 +565,41 @@ def : WriteRes<WriteMMXMOVMSK, [BWPort0]> { let Latency = 1; }
def : WriteRes<WriteAESDecEnc, [BWPort5]> { // Decryption, encryption.
let Latency = 7;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : WriteRes<WriteAESDecEncLd, [BWPort5, BWPort23]> {
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def : WriteRes<WriteAESIMC, [BWPort5]> { // InvMixColumn.
let Latency = 14;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteAESIMCLd, [BWPort5, BWPort23]> {
let Latency = 19;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def : WriteRes<WriteAESKeyGen, [BWPort0, BWPort5, BWPort015]> { // Key Generation.
let Latency = 29;
let NumMicroOps = 11;
- let ResourceCycles = [2,7,2];
+ let ReleaseAtCycles = [2,7,2];
}
def : WriteRes<WriteAESKeyGenLd, [BWPort0, BWPort5, BWPort23, BWPort015]> {
let Latency = 33;
let NumMicroOps = 11;
- let ResourceCycles = [2,7,1,1];
+ let ReleaseAtCycles = [2,7,1,1];
}
// Carry-less multiplication instructions.
defm : BWWriteResPair<WriteCLMul, [BWPort0], 5>;
// Load/store MXCSR.
-def : WriteRes<WriteLDMXCSR, [BWPort0,BWPort23,BWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-def : WriteRes<WriteSTMXCSR, [BWPort4,BWPort5,BWPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteLDMXCSR, [BWPort0,BWPort23,BWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [BWPort4,BWPort5,BWPort237]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
// Catch-all for expensive system instructions.
def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; }
@@ -628,7 +628,7 @@ defm : BWWriteResPair<WritePHAddY, [BWPort5,BWPort15], 3, [2,1], 3, 6>;
def BWWriteResGroup1 : SchedWriteRes<[BWPort0]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup1], (instregex "VPSLLVQ(Y?)rr",
"VPSRLVQ(Y?)rr")>;
@@ -636,7 +636,7 @@ def: InstRW<[BWWriteResGroup1], (instregex "VPSLLVQ(Y?)rr",
def BWWriteResGroup2 : SchedWriteRes<[BWPort1]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup2], (instregex "COM(P?)_FST0r",
"UCOM_F(P?)r")>;
@@ -644,49 +644,49 @@ def: InstRW<[BWWriteResGroup2], (instregex "COM(P?)_FST0r",
def BWWriteResGroup3 : SchedWriteRes<[BWPort5]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup3], (instrs MMX_MOVQ2DQrr)>;
def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup4], (instregex "JMP(16|32|64)r")>;
def BWWriteResGroup5 : SchedWriteRes<[BWPort01]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup5], (instrs FINCSTP, FNOP)>;
def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>;
def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup7], (instregex "ANDN(32|64)rr")>;
def BWWriteResGroup8 : SchedWriteRes<[BWPort015]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup8], (instregex "VPBLENDD(Y?)rri")>;
def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup9], (instrs SGDT64m,
SIDT64m,
@@ -697,7 +697,7 @@ def: InstRW<[BWWriteResGroup9], (instrs SGDT64m,
def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> {
let Latency = 1;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup10], (instrs FBSTPm)>;
def: InstRW<[BWWriteResGroup10], (instregex "ST_FP(32|64|80)m")>;
@@ -705,14 +705,14 @@ def: InstRW<[BWWriteResGroup10], (instregex "ST_FP(32|64|80)m")>;
def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[BWWriteResGroup12], (instrs FDECSTP)>;
def BWWriteResGroup14 : SchedWriteRes<[BWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[BWWriteResGroup14], (instrs LFENCE,
MFENCE,
@@ -722,28 +722,28 @@ def: InstRW<[BWWriteResGroup14], (instrs LFENCE,
def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup16], (instregex "CLFLUSH")>;
def BWWriteResGroup17 : SchedWriteRes<[BWPort01,BWPort015]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup17], (instrs MMX_MOVDQ2Qrr)>;
def BWWriteResGroup18 : SchedWriteRes<[BWPort237,BWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup18], (instrs SFENCE)>;
def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup20], (instrs CWD,
JCXZ, JECXZ, JRCXZ,
@@ -755,21 +755,21 @@ def: InstRW<[BWWriteResGroup20], (instrs CWD,
def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup22], (instrs FNSTCW16m)>;
def BWWriteResGroup24 : SchedWriteRes<[BWPort4,BWPort237,BWPort15]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup24], (instregex "MOVBE(16|32|64)mr")>;
def BWWriteResGroup25 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup25], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
@@ -778,14 +778,14 @@ def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr")>;
def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr")>;
def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup28], (instrs VPBROADCASTBrr,
VPBROADCASTWrr)>;
@@ -793,7 +793,7 @@ def: InstRW<[BWWriteResGroup28], (instrs VPBROADCASTBrr,
def BWWriteResGroup33 : SchedWriteRes<[BWPort5,BWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[BWWriteResGroup33], (instrs MMX_PACKSSDWrr,
MMX_PACKSSWBrr,
@@ -802,14 +802,14 @@ def: InstRW<[BWWriteResGroup33], (instrs MMX_PACKSSDWrr,
def BWWriteResGroup34 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[BWWriteResGroup34], (instregex "CLD")>;
def BWWriteResGroup35 : SchedWriteRes<[BWPort06,BWPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[BWWriteResGroup35], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
@@ -817,28 +817,28 @@ def: InstRW<[BWWriteResGroup35], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
def BWWriteResGroup36 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 8;
- let ResourceCycles = [2,4,2];
+ let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[BWWriteResGroup36], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
def BWWriteResGroup36b : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
let Latency = 6;
let NumMicroOps = 8;
- let ResourceCycles = [2,4,2];
+ let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[BWWriteResGroup36b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def BWWriteResGroup37 : SchedWriteRes<[BWPort4,BWPort6,BWPort237,BWPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[BWWriteResGroup37], (instregex "CALL(16|32|64)r")>;
def BWWriteResGroup38 : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[BWWriteResGroup38], (instrs CALL64pcrel32)>;
@@ -846,28 +846,28 @@ def: InstRW<[BWWriteResGroup38], (instrs CALL64pcrel32)>;
def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup41], (instrs FNSTSW16r)>;
def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PS2PIrr")>;
def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> {
let Latency = 4;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup43], (instrs FNSTSWm)>;
def BWWriteResGroup44 : SchedWriteRes<[BWPort1,BWPort4,BWPort237]> {
let Latency = 4;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup44], (instregex "IST(T?)_FP(16|32|64)m",
"IST_F(16|32)m")>;
@@ -875,28 +875,28 @@ def: InstRW<[BWWriteResGroup44], (instregex "IST(T?)_FP(16|32|64)m",
def BWWriteResGroup45 : SchedWriteRes<[BWPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def: InstRW<[BWWriteResGroup45], (instrs FNCLEX)>;
def BWWriteResGroup46 : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
}
def: InstRW<[BWWriteResGroup46], (instrs VZEROUPPER)>;
def BWWriteResGroup47 : SchedWriteRes<[BWPort0]> {
let Latency = 5;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup47], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> {
let Latency = 5;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup49], (instrs VBROADCASTSSrm,
VMOVDDUPrm, MOVDDUPrm,
@@ -908,46 +908,46 @@ def: InstRW<[BWWriteResGroup49], (instrs VBROADCASTSSrm,
def BWWriteResGroup50 : SchedWriteRes<[BWPort1,BWPort5]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[BWWriteResGroup50], (instregex "(V?)CVTSI642SSrr")>;
def BWWriteResGroup51 : SchedWriteRes<[BWPort1,BWPort6,BWPort06]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>;
def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
- let ResourceCycles = [1,4];
+ let ReleaseAtCycles = [1,4];
}
def: InstRW<[BWWriteResGroup54], (instrs PAUSE)>;
def BWWriteResGroup55 : SchedWriteRes<[BWPort06,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
- let ResourceCycles = [1,4];
+ let ReleaseAtCycles = [1,4];
}
def: InstRW<[BWWriteResGroup55], (instrs XSETBV)>;
def BWWriteResGroup57 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,4];
+ let ReleaseAtCycles = [1,1,4];
}
def: InstRW<[BWWriteResGroup57], (instregex "PUSHF(16|64)")>;
def BWWriteResGroup58 : SchedWriteRes<[BWPort23]> {
let Latency = 6;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup58], (instregex "LD_F(32|64|80)m")>;
-def: InstRW<[BWWriteResGroup58], (instrs VBROADCASTF128,
- VBROADCASTI128,
+def: InstRW<[BWWriteResGroup58], (instrs VBROADCASTF128rm,
+ VBROADCASTI128rm,
VBROADCASTSDYrm,
VBROADCASTSSYrm,
VMOVDDUPYrm,
@@ -959,14 +959,14 @@ def: InstRW<[BWWriteResGroup58], (instrs VBROADCASTF128,
def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup59], (instrs VPSLLVQrm, VPSRLVQrm)>;
def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup62], (instrs FARJMP64m)>;
def: InstRW<[BWWriteResGroup62], (instregex "JMP(16|32|64)m")>;
@@ -974,7 +974,7 @@ def: InstRW<[BWWriteResGroup62], (instregex "JMP(16|32|64)m")>;
def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup64], (instregex "ANDN(32|64)rm",
"MOVBE(16|32|64)rm")>;
@@ -982,7 +982,7 @@ def: InstRW<[BWWriteResGroup64], (instregex "ANDN(32|64)rm",
def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup65], (instrs VINSERTF128rm,
VINSERTI128rm,
@@ -991,7 +991,7 @@ def: InstRW<[BWWriteResGroup65], (instrs VINSERTF128rm,
def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup66], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>;
@@ -999,14 +999,14 @@ def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>;
def BWWriteResGroup68 : SchedWriteRes<[BWPort1,BWPort6,BWPort06,BWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[BWWriteResGroup68], (instregex "SLDT(16|32|64)r")>;
def BWWriteResGroup69 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[BWWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
"SHL(8|16|32|64)m(1|i)",
@@ -1015,7 +1015,7 @@ def: InstRW<[BWWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
def BWWriteResGroup70 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[BWWriteResGroup70], (instregex "POP(16|32|64)rmm",
"PUSH(16|32|64)rmm")>;
@@ -1023,14 +1023,14 @@ def: InstRW<[BWWriteResGroup70], (instregex "POP(16|32|64)rmm",
def BWWriteResGroup71 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 6;
let NumMicroOps = 6;
- let ResourceCycles = [1,5];
+ let ReleaseAtCycles = [1,5];
}
def: InstRW<[BWWriteResGroup71], (instrs STD)>;
def BWWriteResGroup73 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup73], (instrs VPSLLVQYrm,
VPSRLVQYrm)>;
@@ -1038,21 +1038,21 @@ def: InstRW<[BWWriteResGroup73], (instrs VPSLLVQYrm,
def BWWriteResGroup74 : SchedWriteRes<[BWPort1,BWPort23]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup74], (instregex "FCOM(P?)(32|64)m")>;
def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup77], (instrs VPBLENDDYrmi)>;
def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[BWWriteResGroup79], (instrs MMX_PACKSSDWrm,
MMX_PACKSSWBrm,
@@ -1061,7 +1061,7 @@ def: InstRW<[BWWriteResGroup79], (instrs MMX_PACKSSDWrm,
def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[BWWriteResGroup80], (instrs LEAVE, LEAVE64,
SCASB, SCASL, SCASQ, SCASW)>;
@@ -1069,21 +1069,21 @@ def: InstRW<[BWWriteResGroup80], (instrs LEAVE, LEAVE64,
def BWWriteResGroup82 : SchedWriteRes<[BWPort0,BWPort01,BWPort23]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup82], (instrs FLDCW16m)>;
def BWWriteResGroup84 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup84], (instrs LRET64, RET64)>;
def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
@@ -1091,7 +1091,7 @@ def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m(1|i)",
def BWWriteResGroup87_1 : SchedWriteRes<[BWPort06]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[BWWriteResGroup87_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
@@ -1099,14 +1099,14 @@ def: InstRW<[BWWriteResGroup87_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
def BWWriteResGroup88 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[BWWriteResGroup88], (instregex "XADD(8|16|32|64)rm")>;
def BWWriteResGroup89 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1,1];
}
def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m")>;
def: InstRW<[BWWriteResGroup89], (instrs FARCALL64m)>;
@@ -1114,21 +1114,21 @@ def: InstRW<[BWWriteResGroup89], (instrs FARCALL64m)>;
def BWWriteResGroup90 : SchedWriteRes<[BWPort6,BWPort06,BWPort15,BWPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
- let ResourceCycles = [2,2,1,2];
+ let ReleaseAtCycles = [2,2,1,2];
}
def: InstRW<[BWWriteResGroup90], (instrs LOOP)>;
def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>;
def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup92], (instrs VPMOVSXBDYrm,
VPMOVSXBQYrm,
@@ -1141,7 +1141,7 @@ def: InstRW<[BWWriteResGroup92], (instrs VPMOVSXBDYrm,
def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[BWWriteResGroup97], (instregex "RCL(8|16|32|64)m(1|i)",
"RCR(8|16|32|64)m(1|i)")>;
@@ -1149,14 +1149,14 @@ def: InstRW<[BWWriteResGroup97], (instregex "RCL(8|16|32|64)m(1|i)",
def BWWriteResGroup99 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 8;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,3];
+ let ReleaseAtCycles = [1,1,1,3];
}
def: InstRW<[BWWriteResGroup99], (instregex "XCHG(8|16|32|64)rm")>;
def BWWriteResGroup100 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 8;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
+ let ReleaseAtCycles = [1,1,1,2,1];
}
def : SchedAlias<WriteADCRMW, BWWriteResGroup100>;
def: InstRW<[BWWriteResGroup100], (instregex "ROL(8|16|32|64)mCL",
@@ -1168,7 +1168,7 @@ def: InstRW<[BWWriteResGroup100], (instregex "ROL(8|16|32|64)mCL",
def BWWriteResGroup101 : SchedWriteRes<[BWPort1,BWPort23]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m")>;
@@ -1176,7 +1176,7 @@ def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> {
let Latency = 9;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTB(Y?)rm",
"VPBROADCASTW(Y?)rm")>;
@@ -1184,14 +1184,14 @@ def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTB(Y?)rm",
def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,3];
+ let ReleaseAtCycles = [1,1,3];
}
def: InstRW<[BWWriteResGroup112], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
def BWWriteResGroup113 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[BWWriteResGroup113], (instregex "LAR(16|32|64)rm",
"LSL(16|32|64)rm")>;
@@ -1199,28 +1199,28 @@ def: InstRW<[BWWriteResGroup113], (instregex "LAR(16|32|64)rm",
def BWWriteResGroup115 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup115], (instregex "(V?)PCMPGTQrm")>;
def BWWriteResGroup117 : SchedWriteRes<[BWPort1,BWPort23]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[BWWriteResGroup117], (instregex "FICOM(P?)(16|32)m")>;
def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 11;
let NumMicroOps = 1;
- let ResourceCycles = [1,3]; // Really 2.5 cycle throughput
+ let ReleaseAtCycles = [1,3]; // Really 2.5 cycle throughput
}
def : SchedAlias<WriteFDiv, BWWriteResGroup122_1>; // TODO - convert to ZnWriteResFpuPair
def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m")>;
def: InstRW<[BWWriteResGroup123], (instrs VPCMPGTQYrm)>;
@@ -1228,7 +1228,7 @@ def: InstRW<[BWWriteResGroup123], (instrs VPCMPGTQYrm)>;
def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
- let ResourceCycles = [2,2,3];
+ let ReleaseAtCycles = [2,2,3];
}
def: InstRW<[BWWriteResGroup131], (instregex "RCL(16|32|64)rCL",
"RCR(16|32|64)rCL")>;
@@ -1236,14 +1236,14 @@ def: InstRW<[BWWriteResGroup131], (instregex "RCL(16|32|64)rCL",
def BWWriteResGroup132 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> {
let Latency = 11;
let NumMicroOps = 9;
- let ResourceCycles = [1,4,1,3];
+ let ReleaseAtCycles = [1,4,1,3];
}
def: InstRW<[BWWriteResGroup132], (instrs RCL8rCL)>;
def BWWriteResGroup133 : SchedWriteRes<[BWPort06,BWPort0156]> {
let Latency = 11;
let NumMicroOps = 11;
- let ResourceCycles = [2,9];
+ let ReleaseAtCycles = [2,9];
}
def: InstRW<[BWWriteResGroup133], (instrs LOOPE)>;
def: InstRW<[BWWriteResGroup133], (instrs LOOPNE)>;
@@ -1251,84 +1251,84 @@ def: InstRW<[BWWriteResGroup133], (instrs LOOPNE)>;
def BWWriteResGroup135 : SchedWriteRes<[BWPort1,BWPort23]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
def BWWriteResGroup139_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 14;
let NumMicroOps = 1;
- let ResourceCycles = [1,4];
+ let ReleaseAtCycles = [1,4];
}
def : SchedAlias<WriteFDiv64, BWWriteResGroup139_1>; // TODO - convert to ZnWriteResFpuPair
def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
let Latency = 14;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup141], (instregex "MUL_FI(16|32)m")>;
def BWWriteResGroup144 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> {
let Latency = 14;
let NumMicroOps = 8;
- let ResourceCycles = [2,2,1,3];
+ let ReleaseAtCycles = [2,2,1,3];
}
def: InstRW<[BWWriteResGroup144], (instregex "LAR(16|32|64)rr")>;
def BWWriteResGroup145 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> {
let Latency = 14;
let NumMicroOps = 10;
- let ResourceCycles = [2,3,1,4];
+ let ReleaseAtCycles = [2,3,1,4];
}
def: InstRW<[BWWriteResGroup145], (instrs RCR8rCL)>;
def BWWriteResGroup146 : SchedWriteRes<[BWPort0,BWPort1,BWPort6,BWPort0156]> {
let Latency = 14;
let NumMicroOps = 12;
- let ResourceCycles = [2,1,4,5];
+ let ReleaseAtCycles = [2,1,4,5];
}
def: InstRW<[BWWriteResGroup146], (instrs XCH_F)>;
def BWWriteResGroup147 : SchedWriteRes<[BWPort0]> {
let Latency = 15;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup147], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
def BWWriteResGroup149 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> {
let Latency = 15;
let NumMicroOps = 10;
- let ResourceCycles = [1,1,1,4,1,2];
+ let ReleaseAtCycles = [1,1,1,4,1,2];
}
def: InstRW<[BWWriteResGroup149], (instregex "RCL(8|16|32|64)mCL")>;
def BWWriteResGroup150 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
let Latency = 16;
let NumMicroOps = 2;
- let ResourceCycles = [1,1,5];
+ let ReleaseAtCycles = [1,1,5];
}
def : SchedAlias<WriteFDivLd, BWWriteResGroup150>; // TODO - convert to ZnWriteResFpuPair
def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> {
let Latency = 16;
let NumMicroOps = 14;
- let ResourceCycles = [1,1,1,4,2,5];
+ let ReleaseAtCycles = [1,1,1,4,2,5];
}
def: InstRW<[BWWriteResGroup153], (instrs CMPXCHG8B)>;
def BWWriteResGroup154 : SchedWriteRes<[BWPort5,BWPort6]> {
let Latency = 8;
let NumMicroOps = 20;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup154], (instrs VZEROALL)>;
def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,5];
+ let ReleaseAtCycles = [1,1,1,5];
}
def: InstRW<[BWWriteResGroup159], (instrs CPUID)>;
def: InstRW<[BWWriteResGroup159], (instrs RDTSC)>;
@@ -1336,84 +1336,84 @@ def: InstRW<[BWWriteResGroup159], (instrs RDTSC)>;
def BWWriteResGroup160 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> {
let Latency = 18;
let NumMicroOps = 11;
- let ResourceCycles = [2,1,1,3,1,3];
+ let ReleaseAtCycles = [2,1,1,3,1,3];
}
def: InstRW<[BWWriteResGroup160], (instregex "RCR(8|16|32|64)mCL")>;
def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
let Latency = 19;
let NumMicroOps = 2;
- let ResourceCycles = [1,1,8];
+ let ReleaseAtCycles = [1,1,8];
}
def : SchedAlias<WriteFDiv64Ld, BWWriteResGroup161>; // TODO - convert to ZnWriteResFpuPair
def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> {
let Latency = 20;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[BWWriteResGroup165], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
def BWWriteResGroup167 : SchedWriteRes<[BWPort4,BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 20;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,1,1,1,2];
}
def: InstRW<[BWWriteResGroup167], (instrs INSB, INSL, INSW)>;
def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 21;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup169], (instregex "DIV_F(32|64)m")>;
def BWWriteResGroup171 : SchedWriteRes<[BWPort0,BWPort4,BWPort5,BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 21;
let NumMicroOps = 19;
- let ResourceCycles = [2,1,4,1,1,4,6];
+ let ReleaseAtCycles = [2,1,4,1,1,4,6];
}
def: InstRW<[BWWriteResGroup171], (instrs CMPXCHG16B)>;
def BWWriteResGroup172 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let Latency = 22;
let NumMicroOps = 18;
- let ResourceCycles = [1,1,16];
+ let ReleaseAtCycles = [1,1,16];
}
def: InstRW<[BWWriteResGroup172], (instrs POPF64)>;
def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
- let ResourceCycles = [3,1,15];
+ let ReleaseAtCycles = [3,1,15];
}
def: InstRW<[BWWriteResGroup176], (instregex "XRSTOR(64)?")>;
def BWWriteResGroup177 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
let Latency = 24;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI(16|32)m")>;
def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 26;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F(32|64)m")>;
def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
let Latency = 29;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>;
def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 17;
let NumMicroOps = 7;
- let ResourceCycles = [1,3,2,1];
+ let ReleaseAtCycles = [1,3,2,1];
}
def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERDPDrm, VPGATHERDQrm,
VGATHERQPDrm, VPGATHERQQrm)>;
@@ -1421,7 +1421,7 @@ def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERDPDrm, VPGATHERDQrm,
def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 18;
let NumMicroOps = 9;
- let ResourceCycles = [1,3,4,1];
+ let ReleaseAtCycles = [1,3,4,1];
}
def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
VGATHERQPDYrm, VPGATHERQQYrm)>;
@@ -1429,14 +1429,14 @@ def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 19;
let NumMicroOps = 9;
- let ResourceCycles = [1,5,2,1];
+ let ReleaseAtCycles = [1,5,2,1];
}
def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 19;
let NumMicroOps = 10;
- let ResourceCycles = [1,4,4,1];
+ let ReleaseAtCycles = [1,4,4,1];
}
def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPSrm, VPGATHERDDrm,
VGATHERQPSYrm, VPGATHERQDYrm)>;
@@ -1444,21 +1444,21 @@ def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPSrm, VPGATHERDDrm,
def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 21;
let NumMicroOps = 14;
- let ResourceCycles = [1,4,8,1];
+ let ReleaseAtCycles = [1,4,8,1];
}
def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def BWWriteResGroup185 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> {
let Latency = 29;
let NumMicroOps = 27;
- let ResourceCycles = [1,5,1,1,19];
+ let ReleaseAtCycles = [1,5,1,1,19];
}
def: InstRW<[BWWriteResGroup185], (instrs XSAVE64)>;
def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> {
let Latency = 30;
let NumMicroOps = 28;
- let ResourceCycles = [1,6,1,1,19];
+ let ReleaseAtCycles = [1,6,1,1,19];
}
def: InstRW<[BWWriteResGroup186], (instrs XSAVE)>;
def: InstRW<[BWWriteResGroup186], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
@@ -1466,7 +1466,7 @@ def: InstRW<[BWWriteResGroup186], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort0156]> {
let Latency = 34;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,3,4,10];
+ let ReleaseAtCycles = [1,5,3,4,10];
}
def: InstRW<[BWWriteResGroup191], (instregex "IN(8|16|32)ri",
"IN(8|16|32)rr")>;
@@ -1474,7 +1474,7 @@ def: InstRW<[BWWriteResGroup191], (instregex "IN(8|16|32)ri",
def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,2,1,4,10];
+ let ReleaseAtCycles = [1,5,2,1,4,10];
}
def: InstRW<[BWWriteResGroup194], (instregex "OUT(8|16|32)ir",
"OUT(8|16|32)rr")>;
@@ -1482,42 +1482,42 @@ def: InstRW<[BWWriteResGroup194], (instregex "OUT(8|16|32)ir",
def BWWriteResGroup196 : SchedWriteRes<[BWPort5,BWPort0156]> {
let Latency = 42;
let NumMicroOps = 22;
- let ResourceCycles = [2,20];
+ let ReleaseAtCycles = [2,20];
}
def: InstRW<[BWWriteResGroup196], (instrs RDTSCP)>;
def BWWriteResGroup197 : SchedWriteRes<[BWPort0,BWPort01,BWPort23,BWPort05,BWPort06,BWPort015,BWPort0156]> {
let Latency = 60;
let NumMicroOps = 64;
- let ResourceCycles = [2,2,8,1,10,2,39];
+ let ReleaseAtCycles = [2,2,8,1,10,2,39];
}
def: InstRW<[BWWriteResGroup197], (instrs FLDENVm)>;
def BWWriteResGroup198 : SchedWriteRes<[BWPort0,BWPort6,BWPort23,BWPort05,BWPort06,BWPort15,BWPort0156]> {
let Latency = 63;
let NumMicroOps = 88;
- let ResourceCycles = [4,4,31,1,2,1,45];
+ let ReleaseAtCycles = [4,4,31,1,2,1,45];
}
def: InstRW<[BWWriteResGroup198], (instrs FXRSTOR64)>;
def BWWriteResGroup199 : SchedWriteRes<[BWPort0,BWPort6,BWPort23,BWPort05,BWPort06,BWPort15,BWPort0156]> {
let Latency = 63;
let NumMicroOps = 90;
- let ResourceCycles = [4,2,33,1,2,1,47];
+ let ReleaseAtCycles = [4,2,33,1,2,1,47];
}
def: InstRW<[BWWriteResGroup199], (instrs FXRSTOR)>;
def BWWriteResGroup200 : SchedWriteRes<[BWPort5,BWPort01,BWPort0156]> {
let Latency = 75;
let NumMicroOps = 15;
- let ResourceCycles = [6,3,6];
+ let ReleaseAtCycles = [6,3,6];
}
def: InstRW<[BWWriteResGroup200], (instrs FNINIT)>;
def BWWriteResGroup202 : SchedWriteRes<[BWPort0,BWPort1,BWPort4,BWPort5,BWPort6,BWPort237,BWPort06,BWPort0156]> {
let Latency = 115;
let NumMicroOps = 100;
- let ResourceCycles = [9,9,11,8,1,11,21,30];
+ let ReleaseAtCycles = [9,9,11,8,1,11,21,30];
}
def: InstRW<[BWWriteResGroup202], (instrs FSTENVm)>;
@@ -1596,7 +1596,7 @@ def : InstRW<[BWWriteVZeroIdiomALUY], (instrs VPSUBBYrr,
def BWWritePCMPGTQ : SchedWriteRes<[BWPort0]> {
let Latency = 5;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def BWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
@@ -1610,13 +1610,13 @@ def : InstRW<[BWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
// CMOVs that use both Z and C flag require an extra uop.
def BWWriteCMOVA_CMOVBErr : SchedWriteRes<[BWPort06,BWPort0156]> {
let Latency = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
let NumMicroOps = 2;
}
def BWWriteCMOVA_CMOVBErm : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
let Latency = 7;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
let NumMicroOps = 3;
}
@@ -1636,13 +1636,13 @@ def : InstRW<[BWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
// SETCCs that use both Z and C flag require an extra uop.
def BWWriteSETA_SETBEr : SchedWriteRes<[BWPort06,BWPort0156]> {
let Latency = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
let NumMicroOps = 2;
}
def BWWriteSETA_SETBEm : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> {
let Latency = 3;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
let NumMicroOps = 4;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td
index d871ef4c353e..8795ca95c559 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -100,7 +100,7 @@ multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -108,7 +108,7 @@ multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
// the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([HWPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !listconcat([1], Res);
+ let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -488,7 +488,7 @@ defm : HWWriteResPair<WriteVarVecShiftZ, [HWPort0, HWPort5], 3, [2,1], 3, 7>; //
def : WriteRes<WriteVecInsert, [HWPort5]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [HWPort5,HWPort23]> {
let Latency = 6;
@@ -511,48 +511,48 @@ def : WriteRes<WriteVecExtractSt, [HWPort4,HWPort5,HWPort237]> {
def : WriteRes<WritePCmpIStrM, [HWPort0]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [HWPort0, HWPort23]> {
let Latency = 17;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [HWPort0, HWPort5, HWPort015, HWPort0156]> {
let Latency = 19;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
def : WriteRes<WritePCmpEStrMLd, [HWPort0, HWPort5, HWPort23, HWPort015, HWPort0156]> {
let Latency = 25;
let NumMicroOps = 10;
- let ResourceCycles = [4,3,1,1,1];
+ let ReleaseAtCycles = [4,3,1,1,1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [HWPort0]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrILd, [HWPort0, HWPort23]> {
let Latency = 17;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [HWPort0, HWPort5, HWPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [4,3,1];
+ let ReleaseAtCycles = [4,3,1];
}
def : WriteRes<WritePCmpEStrILd, [HWPort0, HWPort5, HWPort23, HWPort0156]> {
let Latency = 24;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
// MOVMSK Instructions.
@@ -565,51 +565,51 @@ def : WriteRes<WriteMMXMOVMSK, [HWPort0]> { let Latency = 1; }
def : WriteRes<WriteAESDecEnc, [HWPort5]> {
let Latency = 7;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : WriteRes<WriteAESDecEncLd, [HWPort5, HWPort23]> {
let Latency = 13;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def : WriteRes<WriteAESIMC, [HWPort5]> {
let Latency = 14;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteAESIMCLd, [HWPort5, HWPort23]> {
let Latency = 20;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def : WriteRes<WriteAESKeyGen, [HWPort0,HWPort5,HWPort015]> {
let Latency = 29;
let NumMicroOps = 11;
- let ResourceCycles = [2,7,2];
+ let ReleaseAtCycles = [2,7,2];
}
def : WriteRes<WriteAESKeyGenLd, [HWPort0,HWPort5,HWPort23,HWPort015]> {
let Latency = 34;
let NumMicroOps = 11;
- let ResourceCycles = [2,7,1,1];
+ let ReleaseAtCycles = [2,7,1,1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [HWPort0, HWPort5]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> {
let Latency = 17;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
// Load/store MXCSR.
-def : WriteRes<WriteLDMXCSR, [HWPort0,HWPort23,HWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-def : WriteRes<WriteSTMXCSR, [HWPort4,HWPort5,HWPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteLDMXCSR, [HWPort0,HWPort23,HWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [HWPort4,HWPort5,HWPort237]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
// Catch-all for expensive system instructions.
def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
@@ -655,7 +655,7 @@ def HWWriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
def HWWrite2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
let NumMicroOps = 3;
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
}
// Starting with P1.
@@ -664,7 +664,7 @@ def HWWriteP1 : SchedWriteRes<[HWPort1]>;
def HWWrite2P1 : SchedWriteRes<[HWPort1]> {
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// Notation:
@@ -713,7 +713,7 @@ def : SchedAlias<WriteBitTestSetRegRMW, HWWriteBTRSCmr>;
// i.
def HWWriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> {
let NumMicroOps = 4;
- let ResourceCycles = [1, 2, 1];
+ let ReleaseAtCycles = [1, 2, 1];
}
def : InstRW<[HWWriteRETI], (instregex "RETI(16|32|64)", "LRETI(16|32|64)")>;
@@ -742,7 +742,7 @@ def : InstRW<[HWWriteP0156_P23], (instregex "LODS(L|Q)")>;
def HWWriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> {
let Latency = 4;
let NumMicroOps = 5;
- let ResourceCycles = [2, 1, 2];
+ let ReleaseAtCycles = [2, 1, 2];
}
def : InstRW<[HWWriteMOVS], (instrs MOVSB, MOVSL, MOVSQ, MOVSW)>;
@@ -750,7 +750,7 @@ def : InstRW<[HWWriteMOVS], (instrs MOVSB, MOVSL, MOVSQ, MOVSW)>;
def HWWriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> {
let Latency = 4;
let NumMicroOps = 5;
- let ResourceCycles = [2, 3];
+ let ReleaseAtCycles = [2, 3];
}
def : InstRW<[HWWriteCMPS], (instregex "CMPS(B|L|Q|W)")>;
@@ -765,7 +765,7 @@ def : InstRW<[HWWriteRDPMC], (instrs RDPMC)>;
// RDRAND.
def HWWriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> {
let NumMicroOps = 17;
- let ResourceCycles = [1, 16];
+ let ReleaseAtCycles = [1, 16];
}
def : InstRW<[HWWriteRDRAND], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
@@ -863,7 +863,7 @@ def : InstRW<[HWWriteFXTRACT], (instrs FXTRACT)>;
def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> {
let Latency = 6;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>;
def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm",
@@ -874,10 +874,10 @@ def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm",
def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
let Latency = 7;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
-def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128,
- VBROADCASTI128,
+def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128rm,
+ VBROADCASTI128rm,
VBROADCASTSDYrm,
VBROADCASTSSYrm,
VMOVDDUPYrm,
@@ -889,7 +889,7 @@ def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m",
def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
let Latency = 1;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup1], (instrs FBSTPm, VMPTRSTm)>;
def: InstRW<[HWWriteResGroup1], (instregex "ST_FP(32|64|80)m")>;
@@ -897,7 +897,7 @@ def: InstRW<[HWWriteResGroup1], (instregex "ST_FP(32|64|80)m")>;
def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup2], (instregex "VPSLLVQ(Y?)rr",
"VPSRLVQ(Y?)rr")>;
@@ -905,7 +905,7 @@ def: InstRW<[HWWriteResGroup2], (instregex "VPSLLVQ(Y?)rr",
def HWWriteResGroup3 : SchedWriteRes<[HWPort1]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup3], (instregex "COM(P?)_FST0r",
"UCOM_F(P?)r")>;
@@ -913,49 +913,49 @@ def: InstRW<[HWWriteResGroup3], (instregex "COM(P?)_FST0r",
def HWWriteResGroup4 : SchedWriteRes<[HWPort5]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup4], (instrs MMX_MOVQ2DQrr)>;
def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup5], (instregex "JMP(16|32|64)r")>;
def HWWriteResGroup6 : SchedWriteRes<[HWPort01]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup6], (instrs FINCSTP, FNOP)>;
def HWWriteResGroup7 : SchedWriteRes<[HWPort06]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>;
def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def HWWriteResGroup9 : SchedWriteRes<[HWPort015]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup9], (instregex "VPBLENDD(Y?)rri")>;
def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup10], (instrs SGDT64m,
SIDT64m,
@@ -966,21 +966,21 @@ def: InstRW<[HWWriteResGroup10], (instrs SGDT64m,
def HWWriteResGroup11_1 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup11_1], (instrs VPSLLVQrm, VPSRLVQrm)>;
def HWWriteResGroup11_2 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup11_2], (instrs VPSLLVQYrm, VPSRLVQYrm)>;
def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSrm)>;
def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
@@ -988,7 +988,7 @@ def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup13], (instregex "(V?)PMOV(SX|ZX)BDrm",
"(V?)PMOV(SX|ZX)BQrm",
@@ -1000,7 +1000,7 @@ def: InstRW<[HWWriteResGroup13], (instregex "(V?)PMOV(SX|ZX)BDrm",
def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup13_1], (instrs VPMOVSXBDYrm,
VPMOVSXBQYrm,
@@ -1009,7 +1009,7 @@ def: InstRW<[HWWriteResGroup13_1], (instrs VPMOVSXBDYrm,
def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup14], (instrs FARJMP64m)>;
def: InstRW<[HWWriteResGroup14], (instregex "JMP(16|32|64)m")>;
@@ -1017,7 +1017,7 @@ def: InstRW<[HWWriteResGroup14], (instregex "JMP(16|32|64)m")>;
def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort15]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm",
"MOVBE(16|32|64)rm")>;
@@ -1025,7 +1025,7 @@ def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm",
def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup17], (instrs VINSERTF128rm,
VINSERTI128rm,
@@ -1034,14 +1034,14 @@ def: InstRW<[HWWriteResGroup17], (instrs VINSERTF128rm,
def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup17_2], (instrs VPBLENDDYrmi)>;
def HWWriteResGroup18 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup18], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[HWWriteResGroup18], (instregex "POP(16|32|64)rmr")>;
@@ -1049,35 +1049,35 @@ def: InstRW<[HWWriteResGroup18], (instregex "POP(16|32|64)rmr")>;
def HWWriteResGroup19 : SchedWriteRes<[HWPort237,HWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup19], (instrs SFENCE)>;
def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup21], (instrs FNSTCW16m)>;
def HWWriteResGroup23 : SchedWriteRes<[HWPort4,HWPort237,HWPort15]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup23], (instregex "MOVBE(32|64)mr")>;
def HWWriteResGroup23_16 : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup23_16], (instrs MOVBE16mr)>;
def HWWriteResGroup24 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup24], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
@@ -1086,7 +1086,7 @@ def: InstRW<[HWWriteResGroup24], (instregex "PUSH(16|32|64)rmr")>;
def HWWriteResGroup25 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[HWWriteResGroup25], (instregex "SAR(8|16|32|64)m(1|i)",
"SHL(8|16|32|64)m(1|i)",
@@ -1095,7 +1095,7 @@ def: InstRW<[HWWriteResGroup25], (instregex "SAR(8|16|32|64)m(1|i)",
def HWWriteResGroup26 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[HWWriteResGroup26], (instregex "POP(16|32|64)rmm",
"PUSH(16|32|64)rmm")>;
@@ -1103,14 +1103,14 @@ def: InstRW<[HWWriteResGroup26], (instregex "POP(16|32|64)rmm",
def HWWriteResGroup28 : SchedWriteRes<[HWPort01]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[HWWriteResGroup28], (instrs FDECSTP)>;
def HWWriteResGroup30 : SchedWriteRes<[HWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[HWWriteResGroup30], (instrs LFENCE,
MFENCE,
@@ -1120,28 +1120,28 @@ def: InstRW<[HWWriteResGroup30], (instrs LFENCE,
def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup32], (instregex "CLFLUSH")>;
def HWWriteResGroup33 : SchedWriteRes<[HWPort01,HWPort015]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup33], (instrs MMX_MOVDQ2Qrr)>;
def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup35], (instrs CWD, JCXZ, JECXZ, JRCXZ)>;
def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[HWWriteResGroup36_2], (instrs MMX_PACKSSDWrm,
MMX_PACKSSWBrm,
@@ -1150,7 +1150,7 @@ def: InstRW<[HWWriteResGroup36_2], (instrs MMX_PACKSSDWrm,
def HWWriteResGroup37 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[HWWriteResGroup37], (instrs LEAVE, LEAVE64,
SCASB, SCASL, SCASQ, SCASW)>;
@@ -1158,35 +1158,35 @@ def: InstRW<[HWWriteResGroup37], (instrs LEAVE, LEAVE64,
def HWWriteResGroup39 : SchedWriteRes<[HWPort0,HWPort01,HWPort23]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup39], (instrs FLDCW16m)>;
def HWWriteResGroup41 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup41], (instrs LRET64, RET32, RET64)>;
def HWWriteResGroup44 : SchedWriteRes<[HWPort4,HWPort6,HWPort237,HWPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[HWWriteResGroup44], (instregex "CALL(16|32|64)r")>;
def HWWriteResGroup45 : SchedWriteRes<[HWPort4,HWPort237,HWPort06,HWPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[HWWriteResGroup45], (instrs CALL64pcrel32)>;
def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
@@ -1194,7 +1194,7 @@ def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m(1|i)",
def HWWriteResGroup46_1 : SchedWriteRes<[HWPort06]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[HWWriteResGroup46_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
@@ -1202,14 +1202,14 @@ def: InstRW<[HWWriteResGroup46_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
def HWWriteResGroup47 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[HWWriteResGroup47], (instregex "XADD(8|16|32|64)rm")>;
def HWWriteResGroup48 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1,1];
}
def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m")>;
def: InstRW<[HWWriteResGroup48], (instrs FARCALL64m)>;
@@ -1217,21 +1217,21 @@ def: InstRW<[HWWriteResGroup48], (instrs FARCALL64m)>;
def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr")>;
def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup51], (instregex "VPBROADCAST(B|W)rr")>;
def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m")>;
@@ -1239,7 +1239,7 @@ def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup53_1], (instrs VPMOVSXBWYrm,
VPMOVSXDQYrm,
@@ -1249,7 +1249,7 @@ def: InstRW<[HWWriteResGroup53_1], (instrs VPMOVSXBWYrm,
def HWWriteResGroup57 : SchedWriteRes<[HWPort5,HWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[HWWriteResGroup57], (instrs MMX_PACKSSDWrr,
MMX_PACKSSWBrr,
@@ -1258,14 +1258,14 @@ def: InstRW<[HWWriteResGroup57], (instrs MMX_PACKSSDWrr,
def HWWriteResGroup58 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[HWWriteResGroup58], (instregex "CLD")>;
def HWWriteResGroup59 : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[HWWriteResGroup59], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
@@ -1273,28 +1273,28 @@ def: InstRW<[HWWriteResGroup59], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
def HWWriteResGroup60 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
let Latency = 5;
let NumMicroOps = 8;
- let ResourceCycles = [2,4,2];
+ let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[HWWriteResGroup60], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
def HWWriteResGroup60b : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
let Latency = 6;
let NumMicroOps = 8;
- let ResourceCycles = [2,4,2];
+ let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[HWWriteResGroup60b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def HWWriteResGroup61 : SchedWriteRes<[HWPort0,HWPort4,HWPort237]> {
let Latency = 4;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup61], (instrs FNSTSWm)>;
def HWWriteResGroup62 : SchedWriteRes<[HWPort1,HWPort4,HWPort237]> {
let Latency = 4;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup62], (instregex "IST(T?)_FP(16|32|64)m",
"IST_F(16|32)m")>;
@@ -1302,7 +1302,7 @@ def: InstRW<[HWWriteResGroup62], (instregex "IST(T?)_FP(16|32|64)m",
def HWWriteResGroup66 : SchedWriteRes<[HWPort23,HWPort237,HWPort06,HWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[HWWriteResGroup66], (instregex "RCL(8|16|32|64)m(1|i)",
"RCR(8|16|32|64)m(1|i)")>;
@@ -1310,14 +1310,14 @@ def: InstRW<[HWWriteResGroup66], (instregex "RCL(8|16|32|64)m(1|i)",
def HWWriteResGroup68 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 9;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,3];
+ let ReleaseAtCycles = [1,1,1,3];
}
def: InstRW<[HWWriteResGroup68], (instregex "XCHG(8|16|32|64)rm")>;
def HWWriteResGroup69 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPort0156]> {
let Latency = 9;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
+ let ReleaseAtCycles = [1,1,1,2,1];
}
def: InstRW<[HWWriteResGroup69], (instregex "ROL(8|16|32|64)mCL",
"ROR(8|16|32|64)mCL",
@@ -1329,14 +1329,14 @@ def: SchedAlias<WriteADCRMW, HWWriteResGroup69>;
def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup72], (instrs FNSTSW16r)>;
def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPS2PIrr,
MMX_CVTTPS2PIrr)>;
@@ -1344,49 +1344,49 @@ def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPS2PIrr,
def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>;
def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm)>;
def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
let Latency = 9;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup80], (instregex "VPBROADCAST(B|W)(Y?)rm")>;
def HWWriteResGroup81 : SchedWriteRes<[HWPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def: InstRW<[HWWriteResGroup81], (instrs FNCLEX)>;
def HWWriteResGroup82 : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
}
def: InstRW<[HWWriteResGroup82], (instrs VZEROUPPER)>;
def HWWriteResGroup83 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
}
def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>;
def HWWriteResGroup87 : SchedWriteRes<[HWPort1,HWPort6,HWPort23,HWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[HWWriteResGroup87], (instregex "LAR(16|32|64)rm",
"LSL(16|32|64)rm")>;
@@ -1394,28 +1394,28 @@ def: InstRW<[HWWriteResGroup87], (instregex "LAR(16|32|64)rm",
def HWWriteResGroup88 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,4];
+ let ReleaseAtCycles = [1,1,4];
}
def: InstRW<[HWWriteResGroup88], (instregex "PUSHF(16|64)")>;
def HWWriteResGroup89 : SchedWriteRes<[HWPort0]> {
let Latency = 5;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup89], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm")>;
def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m")>;
def: InstRW<[HWWriteResGroup91_3], (instrs VPCMPGTQYrm)>;
@@ -1423,77 +1423,77 @@ def: InstRW<[HWWriteResGroup91_3], (instrs VPCMPGTQYrm)>;
def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[HWWriteResGroup93], (instregex "(V?)CVTSI642SSrr")>;
def HWWriteResGroup94 : SchedWriteRes<[HWPort1,HWPort6,HWPort06]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup94], (instregex "STR(16|32|64)r")>;
def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
- let ResourceCycles = [1,4];
+ let ReleaseAtCycles = [1,4];
}
def: InstRW<[HWWriteResGroup99], (instrs PAUSE)>;
def HWWriteResGroup100 : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
- let ResourceCycles = [1,4];
+ let ReleaseAtCycles = [1,4];
}
def: InstRW<[HWWriteResGroup100], (instrs XSETBV)>;
def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 13;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[HWWriteResGroup103], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[HWWriteResGroup107], (instregex "SLDT(16|32|64)r")>;
def HWWriteResGroup108 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 6;
let NumMicroOps = 6;
- let ResourceCycles = [1,5];
+ let ReleaseAtCycles = [1,5];
}
def: InstRW<[HWWriteResGroup108], (instrs STD)>;
def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
- let ResourceCycles = [2,2,1,2];
+ let ReleaseAtCycles = [2,2,1,2];
}
def: InstRW<[HWWriteResGroup114], (instrs LOOP)>;
def HWWriteResGroup115 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> {
let Latency = 15;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup115], (instregex "MUL_FI(16|32)m")>;
def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
let Latency = 16;
let NumMicroOps = 10;
- let ResourceCycles = [1,1,1,4,1,2];
+ let ReleaseAtCycles = [1,1,1,4,1,2];
}
def: InstRW<[HWWriteResGroup120], (instregex "RCL(8|16|32|64)mCL")>;
def HWWriteResGroup129 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
- let ResourceCycles = [2,2,3];
+ let ReleaseAtCycles = [2,2,3];
}
def: InstRW<[HWWriteResGroup129], (instregex "RCL(16|32|64)rCL",
"RCR(16|32|64)rCL")>;
@@ -1501,140 +1501,140 @@ def: InstRW<[HWWriteResGroup129], (instregex "RCL(16|32|64)rCL",
def HWWriteResGroup130 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
let Latency = 11;
let NumMicroOps = 9;
- let ResourceCycles = [1,4,1,3];
+ let ReleaseAtCycles = [1,4,1,3];
}
def: InstRW<[HWWriteResGroup130], (instrs RCL8rCL)>;
def HWWriteResGroup131 : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 11;
let NumMicroOps = 11;
- let ResourceCycles = [2,9];
+ let ReleaseAtCycles = [2,9];
}
def: InstRW<[HWWriteResGroup131], (instrs LOOPE, LOOPNE)>;
def HWWriteResGroup132 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
let Latency = 17;
let NumMicroOps = 14;
- let ResourceCycles = [1,1,1,4,2,5];
+ let ReleaseAtCycles = [1,1,1,4,2,5];
}
def: InstRW<[HWWriteResGroup132], (instrs CMPXCHG8B)>;
def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
let Latency = 19;
let NumMicroOps = 11;
- let ResourceCycles = [2,1,1,3,1,3];
+ let ReleaseAtCycles = [2,1,1,3,1,3];
}
def: InstRW<[HWWriteResGroup135], (instregex "RCR(8|16|32|64)mCL")>;
def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
let Latency = 14;
let NumMicroOps = 10;
- let ResourceCycles = [2,3,1,4];
+ let ReleaseAtCycles = [2,3,1,4];
}
def: InstRW<[HWWriteResGroup142], (instrs RCR8rCL)>;
def HWWriteResGroup143 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 19;
let NumMicroOps = 15;
- let ResourceCycles = [1,14];
+ let ReleaseAtCycles = [1,14];
}
def: InstRW<[HWWriteResGroup143], (instrs POPF16)>;
def HWWriteResGroup144 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort06,HWPort0156]> {
let Latency = 21;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,1,1,1,2];
}
def: InstRW<[HWWriteResGroup144], (instrs INSB, INSL, INSW)>;
def HWWriteResGroup145 : SchedWriteRes<[HWPort5, HWPort6]> {
let Latency = 8;
let NumMicroOps = 20;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup145], (instrs VZEROALL)>;
def HWWriteResGroup146 : SchedWriteRes<[HWPort0,HWPort4,HWPort5,HWPort23,HWPort237,HWPort06,HWPort0156]> {
let Latency = 22;
let NumMicroOps = 19;
- let ResourceCycles = [2,1,4,1,1,4,6];
+ let ReleaseAtCycles = [2,1,4,1,1,4,6];
}
def: InstRW<[HWWriteResGroup146], (instrs CMPXCHG16B)>;
def HWWriteResGroup147 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156]> {
let Latency = 17;
let NumMicroOps = 15;
- let ResourceCycles = [2,1,2,4,2,4];
+ let ReleaseAtCycles = [2,1,2,4,2,4];
}
def: InstRW<[HWWriteResGroup147], (instrs XCH_F)>;
def HWWriteResGroup149 : SchedWriteRes<[HWPort5,HWPort6,HWPort06,HWPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,5];
+ let ReleaseAtCycles = [1,1,1,5];
}
def: InstRW<[HWWriteResGroup149], (instrs CPUID, RDTSC)>;
def HWWriteResGroup151 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
- let ResourceCycles = [3,1,15];
+ let ReleaseAtCycles = [3,1,15];
}
def: InstRW<[HWWriteResGroup151], (instregex "XRSTOR(64)?")>;
def HWWriteResGroup154 : SchedWriteRes<[HWPort0]> {
let Latency = 20;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup154], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 27;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup155], (instregex "DIVR_F(32|64)m")>;
def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> {
let Latency = 20;
let NumMicroOps = 10;
- let ResourceCycles = [1,2,7];
+ let ReleaseAtCycles = [1,2,7];
}
def: InstRW<[HWWriteResGroup156], (instrs MWAITrr)>;
def HWWriteResGroup161 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> {
let Latency = 30;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup161], (instregex "DIVR_FI(16|32)m")>;
def HWWriteResGroup162 : SchedWriteRes<[HWPort0]> {
let Latency = 24;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[HWWriteResGroup162], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
def HWWriteResGroup163 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 31;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[HWWriteResGroup163], (instregex "DIV_F(32|64)m")>;
def HWWriteResGroup164 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> {
let Latency = 30;
let NumMicroOps = 27;
- let ResourceCycles = [1,5,1,1,19];
+ let ReleaseAtCycles = [1,5,1,1,19];
}
def: InstRW<[HWWriteResGroup164], (instrs XSAVE64)>;
def HWWriteResGroup165 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> {
let Latency = 31;
let NumMicroOps = 28;
- let ResourceCycles = [1,6,1,1,19];
+ let ReleaseAtCycles = [1,6,1,1,19];
}
def: InstRW<[HWWriteResGroup165], (instrs XSAVE)>;
def: InstRW<[HWWriteResGroup165], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
@@ -1642,14 +1642,14 @@ def: InstRW<[HWWriteResGroup165], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
def HWWriteResGroup166 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> {
let Latency = 34;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup166], (instregex "DIV_FI(16|32)m")>;
def HWWriteResGroup170 : SchedWriteRes<[HWPort5,HWPort6,HWPort23,HWPort06,HWPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,3,4,10];
+ let ReleaseAtCycles = [1,5,3,4,10];
}
def: InstRW<[HWWriteResGroup170], (instregex "IN(8|16|32)ri",
"IN(8|16|32)rr")>;
@@ -1657,7 +1657,7 @@ def: InstRW<[HWWriteResGroup170], (instregex "IN(8|16|32)ri",
def HWWriteResGroup171 : SchedWriteRes<[HWPort5,HWPort6,HWPort23,HWPort237,HWPort06,HWPort0156]> {
let Latency = 36;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,2,1,4,10];
+ let ReleaseAtCycles = [1,5,2,1,4,10];
}
def: InstRW<[HWWriteResGroup171], (instregex "OUT(8|16|32)ir",
"OUT(8|16|32)rr")>;
@@ -1665,91 +1665,91 @@ def: InstRW<[HWWriteResGroup171], (instregex "OUT(8|16|32)ir",
def HWWriteResGroup175 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> {
let Latency = 41;
let NumMicroOps = 18;
- let ResourceCycles = [1,1,2,3,1,1,1,8];
+ let ReleaseAtCycles = [1,1,2,3,1,1,1,8];
}
def: InstRW<[HWWriteResGroup175], (instrs VMCLEARm)>;
def HWWriteResGroup176 : SchedWriteRes<[HWPort5,HWPort0156]> {
let Latency = 42;
let NumMicroOps = 22;
- let ResourceCycles = [2,20];
+ let ReleaseAtCycles = [2,20];
}
def: InstRW<[HWWriteResGroup176], (instrs RDTSCP)>;
def HWWriteResGroup177 : SchedWriteRes<[HWPort0,HWPort01,HWPort23,HWPort05,HWPort06,HWPort015,HWPort0156]> {
let Latency = 61;
let NumMicroOps = 64;
- let ResourceCycles = [2,2,8,1,10,2,39];
+ let ReleaseAtCycles = [2,2,8,1,10,2,39];
}
def: InstRW<[HWWriteResGroup177], (instrs FLDENVm)>;
def HWWriteResGroup178 : SchedWriteRes<[HWPort0,HWPort6,HWPort23,HWPort05,HWPort06,HWPort15,HWPort0156]> {
let Latency = 64;
let NumMicroOps = 88;
- let ResourceCycles = [4,4,31,1,2,1,45];
+ let ReleaseAtCycles = [4,4,31,1,2,1,45];
}
def: InstRW<[HWWriteResGroup178], (instrs FXRSTOR64)>;
def HWWriteResGroup179 : SchedWriteRes<[HWPort0,HWPort6,HWPort23,HWPort05,HWPort06,HWPort15,HWPort0156]> {
let Latency = 64;
let NumMicroOps = 90;
- let ResourceCycles = [4,2,33,1,2,1,47];
+ let ReleaseAtCycles = [4,2,33,1,2,1,47];
}
def: InstRW<[HWWriteResGroup179], (instrs FXRSTOR)>;
def HWWriteResGroup180 : SchedWriteRes<[HWPort5,HWPort01,HWPort0156]> {
let Latency = 75;
let NumMicroOps = 15;
- let ResourceCycles = [6,3,6];
+ let ReleaseAtCycles = [6,3,6];
}
def: InstRW<[HWWriteResGroup180], (instrs FNINIT)>;
def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,HWPort237,HWPort06,HWPort0156]> {
let Latency = 115;
let NumMicroOps = 100;
- let ResourceCycles = [9,9,11,8,1,11,21,30];
+ let ReleaseAtCycles = [9,9,11,8,1,11,21,30];
}
def: InstRW<[HWWriteResGroup183], (instrs FSTENVm)>;
def HWWriteResGroup184 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 14;
let NumMicroOps = 12;
- let ResourceCycles = [2,2,2,1,3,2];
+ let ReleaseAtCycles = [2,2,2,1,3,2];
}
def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm, VPGATHERDQrm)>;
def HWWriteResGroup185 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 17;
let NumMicroOps = 20;
- let ResourceCycles = [3,3,4,1,5,4];
+ let ReleaseAtCycles = [3,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup185], (instrs VGATHERDPDYrm, VPGATHERDQYrm)>;
def HWWriteResGroup186 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 16;
let NumMicroOps = 20;
- let ResourceCycles = [3,3,4,1,5,4];
+ let ReleaseAtCycles = [3,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup186], (instrs VGATHERDPSrm, VPGATHERDDrm)>;
def HWWriteResGroup187 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 22;
let NumMicroOps = 34;
- let ResourceCycles = [5,3,8,1,9,8];
+ let ReleaseAtCycles = [5,3,8,1,9,8];
}
def: InstRW<[HWWriteResGroup187], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def HWWriteResGroup188 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 15;
let NumMicroOps = 14;
- let ResourceCycles = [3,3,2,1,3,2];
+ let ReleaseAtCycles = [3,3,2,1,3,2];
}
def: InstRW<[HWWriteResGroup188], (instrs VGATHERQPDrm, VPGATHERQQrm)>;
def HWWriteResGroup189 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 17;
let NumMicroOps = 22;
- let ResourceCycles = [5,3,4,1,5,4];
+ let ReleaseAtCycles = [5,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup189], (instrs VGATHERQPDYrm, VPGATHERQQYrm,
VGATHERQPSYrm, VPGATHERQDYrm)>;
@@ -1757,7 +1757,7 @@ def: InstRW<[HWWriteResGroup189], (instrs VGATHERQPDYrm, VPGATHERQQYrm,
def HWWriteResGroup190 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 16;
let NumMicroOps = 15;
- let ResourceCycles = [3,3,2,1,4,2];
+ let ReleaseAtCycles = [3,3,2,1,4,2];
}
def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
@@ -1836,7 +1836,7 @@ def : InstRW<[HWWriteVZeroIdiomALUY], (instrs VPSUBBYrr,
def HWWritePCMPGTQ : SchedWriteRes<[HWPort0]> {
let Latency = 5;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def HWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
@@ -1866,7 +1866,7 @@ def HWFastADC0 : MCSchedPredicate<
def HWWriteADC0 : SchedWriteRes<[HWPort06]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def HWWriteADC : SchedWriteVariant<[
@@ -1880,13 +1880,13 @@ def : InstRW<[HWWriteADC], (instrs ADC16ri8, ADC32ri8, ADC64ri8,
// CMOVs that use both Z and C flag require an extra uop.
def HWWriteCMOVA_CMOVBErr : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
let NumMicroOps = 3;
}
def HWWriteCMOVA_CMOVBErm : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> {
let Latency = 8;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
let NumMicroOps = 4;
}
@@ -1906,13 +1906,13 @@ def : InstRW<[HWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
// SETCCs that use both Z and C flag require an extra uop.
def HWWriteSETA_SETBEr : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
let NumMicroOps = 2;
}
def HWWriteSETA_SETBEm : SchedWriteRes<[HWPort4,HWPort237,HWPort06,HWPort0156]> {
let Latency = 3;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
let NumMicroOps = 4;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedIceLake.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedIceLake.td
index 283995f8203b..2c660fad2ec7 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -101,7 +101,7 @@ multiclass ICXWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -109,7 +109,7 @@ multiclass ICXWriteResPair<X86FoldableSchedWrite SchedRW,
// the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([ICXPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !listconcat([1], Res);
+ let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -316,7 +316,7 @@ defm : ICXWriteResPair<WriteFRsqrtZ,[ICXPort0,ICXPort5], 9, [2,1], 3, 7>;
defm : ICXWriteResPair<WriteFMA, [ICXPort01], 4, [1], 1, 5>; // Fused Multiply Add.
defm : ICXWriteResPair<WriteFMAX, [ICXPort01], 4, [1], 1, 6>;
defm : ICXWriteResPair<WriteFMAY, [ICXPort01], 4, [1], 1, 7>;
-defm : ICXWriteResPair<WriteFMAZ, [ICXPort05], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFMAZ, [ICXPort0], 4, [1], 1, 7>;
defm : ICXWriteResPair<WriteDPPD, [ICXPort5,ICXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
defm : ICXWriteResPair<WriteDPPS, [ICXPort5,ICXPort015], 13, [1,3], 4, 6>;
defm : ICXWriteResPair<WriteDPPSY,[ICXPort5,ICXPort015], 13, [1,3], 4, 7>;
@@ -432,7 +432,7 @@ defm : ICXWriteResPair<WriteVarVecShiftZ, [ICXPort0], 1, [1], 1, 7>;
def : WriteRes<WriteVecInsert, [ICXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [ICXPort5,ICXPort23]> {
let Latency = 6;
@@ -497,48 +497,48 @@ defm : X86WriteRes<WriteCvtPS2PHZSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort05], 8
def : WriteRes<WritePCmpIStrM, [ICXPort0]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [ICXPort0, ICXPort23]> {
let Latency = 16;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [ICXPort0, ICXPort5, ICXPort015, ICXPort0156]> {
let Latency = 19;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
def : WriteRes<WritePCmpEStrMLd, [ICXPort0, ICXPort5, ICXPort23, ICXPort015, ICXPort0156]> {
let Latency = 25;
let NumMicroOps = 10;
- let ResourceCycles = [4,3,1,1,1];
+ let ReleaseAtCycles = [4,3,1,1,1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [ICXPort0]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrILd, [ICXPort0, ICXPort23]> {
let Latency = 16;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [ICXPort0,ICXPort5,ICXPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [4,3,1];
+ let ReleaseAtCycles = [4,3,1];
}
def : WriteRes<WritePCmpEStrILd, [ICXPort0, ICXPort5, ICXPort23, ICXPort0156]> {
let Latency = 24;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
// MOVMSK Instructions.
@@ -551,46 +551,46 @@ def : WriteRes<WriteMMXMOVMSK, [ICXPort0]> { let Latency = 2; }
def : WriteRes<WriteAESDecEnc, [ICXPort0]> { // Decryption, encryption.
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : WriteRes<WriteAESDecEncLd, [ICXPort0, ICXPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def : WriteRes<WriteAESIMC, [ICXPort0]> { // InvMixColumn.
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteAESIMCLd, [ICXPort0, ICXPort23]> {
let Latency = 14;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def : WriteRes<WriteAESKeyGen, [ICXPort0,ICXPort5,ICXPort015]> { // Key Generation.
let Latency = 20;
let NumMicroOps = 11;
- let ResourceCycles = [3,6,2];
+ let ReleaseAtCycles = [3,6,2];
}
def : WriteRes<WriteAESKeyGenLd, [ICXPort0,ICXPort5,ICXPort23,ICXPort015]> {
let Latency = 25;
let NumMicroOps = 11;
- let ResourceCycles = [3,6,1,1];
+ let ReleaseAtCycles = [3,6,1,1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [ICXPort5]> {
let Latency = 6;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : WriteRes<WriteCLMulLd, [ICXPort5, ICXPort23]> {
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
// Catch-all for expensive system instructions.
@@ -610,8 +610,8 @@ def : WriteRes<WriteMicrocoded, [ICXPort0156]> { let Latency = 100; } // def Wri
def : WriteRes<WriteFence, [ICXPort78, ICXPort49]>;
// Load/store MXCSR.
-def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-def : WriteRes<WriteSTMXCSR, [ICXPort49,ICXPort5,ICXPort78]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [ICXPort49,ICXPort5,ICXPort78]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
// Nop, not very useful expect it provides a model for nops!
def : WriteRes<WriteNop, []>;
@@ -631,7 +631,7 @@ defm : ICXWriteResPair<WritePHAddY, [ICXPort5,ICXPort015], 3, [2,1], 3, 7>;
def ICXWriteResGroup1 : SchedWriteRes<[ICXPort0]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
"KANDN(B|D|Q|W)rr",
@@ -659,7 +659,7 @@ def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
def ICXWriteResGroup3 : SchedWriteRes<[ICXPort5]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r",
"KMOV(B|D|Q|W)kr",
@@ -676,35 +676,35 @@ def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r",
def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup4], (instregex "JMP(16|32|64)r")>;
def ICXWriteResGroup6 : SchedWriteRes<[ICXPort05]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup6], (instrs FINCSTP, FNOP)>;
def ICXWriteResGroup7 : SchedWriteRes<[ICXPort06]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
def ICXWriteResGroup8 : SchedWriteRes<[ICXPort15]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def ICXWriteResGroup9 : SchedWriteRes<[ICXPort015]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
"VBLENDMPS(Z128|Z256)rr",
@@ -723,7 +723,7 @@ def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
def ICXWriteResGroup10 : SchedWriteRes<[ICXPort0156]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup10], (instrs SGDT64m,
SIDT64m,
@@ -734,7 +734,7 @@ def: InstRW<[ICXWriteResGroup10], (instrs SGDT64m,
def ICXWriteResGroup11 : SchedWriteRes<[ICXPort49,ICXPort78]> {
let Latency = 1;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
def: InstRW<[ICXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
@@ -743,14 +743,14 @@ def: InstRW<[ICXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
def ICXWriteResGroup13 : SchedWriteRes<[ICXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[ICXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
def ICXWriteResGroup14 : SchedWriteRes<[ICXPort05]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[ICXWriteResGroup14], (instrs FDECSTP,
MMX_MOVDQ2Qrr)>;
@@ -758,7 +758,7 @@ def: InstRW<[ICXWriteResGroup14], (instrs FDECSTP,
def ICXWriteResGroup17 : SchedWriteRes<[ICXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[ICXWriteResGroup17], (instrs LFENCE,
WAIT,
@@ -767,21 +767,21 @@ def: InstRW<[ICXWriteResGroup17], (instrs LFENCE,
def ICXWriteResGroup20 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup20], (instregex "CLFLUSH")>;
def ICXWriteResGroup21 : SchedWriteRes<[ICXPort49,ICXPort78]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup21], (instrs SFENCE)>;
def ICXWriteResGroup23 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup23], (instrs CWD,
JCXZ, JECXZ, JRCXZ,
@@ -793,21 +793,21 @@ def: InstRW<[ICXWriteResGroup23], (instrs CWD,
def ICXWriteResGroup25 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup25], (instrs FNSTCW16m)>;
def ICXWriteResGroup27 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>;
def ICXWriteResGroup28 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
@@ -816,14 +816,14 @@ def: InstRW<[ICXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
def ICXWriteResGroup29 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> {
let Latency = 2;
let NumMicroOps = 5;
- let ResourceCycles = [2,2,1];
+ let ReleaseAtCycles = [2,2,1];
}
def: InstRW<[ICXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>;
def ICXWriteResGroup30 : SchedWriteRes<[ICXPort0]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
"KORTEST(B|D|Q|W)rr",
@@ -832,7 +832,7 @@ def: InstRW<[ICXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
def ICXWriteResGroup31 : SchedWriteRes<[ICXPort1]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup31], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
@@ -840,7 +840,7 @@ def: InstRW<[ICXWriteResGroup31], (instregex "PDEP(32|64)rr",
def ICXWriteResGroup32 : SchedWriteRes<[ICXPort5]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
"VALIGND(Z|Z128|Z256)rri",
@@ -852,7 +852,7 @@ def: InstRW<[ICXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> {
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
"KSHIFTL(B|D|Q|W)ri",
@@ -875,28 +875,28 @@ def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
def ICXWriteResGroup34 : SchedWriteRes<[ICXPort0,ICXPort0156]> {
let Latency = 3;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup34], (instrs FNSTSW16r)>;
def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[ICXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>;
def ICXWriteResGroup38 : SchedWriteRes<[ICXPort5,ICXPort01]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[ICXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>;
def ICXWriteResGroup41 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWrr,
MMX_PACKSSWBrr,
@@ -905,21 +905,21 @@ def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWrr,
def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[ICXWriteResGroup42], (instregex "CLD")>;
def ICXWriteResGroup43 : SchedWriteRes<[ICXPort49,ICXPort78]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[ICXWriteResGroup43], (instrs MFENCE)>;
def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[ICXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
@@ -927,49 +927,49 @@ def: InstRW<[ICXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
def ICXWriteResGroup44b : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
let Latency = 5;
let NumMicroOps = 7;
- let ResourceCycles = [2,3,2];
+ let ReleaseAtCycles = [2,3,2];
}
def: InstRW<[ICXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
def ICXWriteResGroup44c : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
let Latency = 6;
let NumMicroOps = 7;
- let ResourceCycles = [2,3,2];
+ let ReleaseAtCycles = [2,3,2];
}
def: InstRW<[ICXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup45], (instrs FNSTSWm)>;
def ICXWriteResGroup47 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78,ICXPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[ICXWriteResGroup47], (instregex "CALL(16|32|64)r")>;
def ICXWriteResGroup48 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[ICXWriteResGroup48], (instrs CALL64pcrel32)>;
def ICXWriteResGroup49 : SchedWriteRes<[ICXPort0]> {
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
def ICXWriteResGroup50 : SchedWriteRes<[ICXPort01]> {
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr",
"VCVTPD2UQQ(Z128|Z256)rr",
@@ -985,7 +985,7 @@ def: InstRW<[ICXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr",
def ICXWriteResGroup50z : SchedWriteRes<[ICXPort05]> {
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup50z], (instrs VCVTPD2QQZrr,
VCVTPD2UQQZrr,
@@ -999,7 +999,7 @@ def: InstRW<[ICXWriteResGroup50z], (instrs VCVTPD2QQZrr,
def ICXWriteResGroup51 : SchedWriteRes<[ICXPort5]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[ICXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
"VEXPANDPS(Z|Z128|Z256)rr",
@@ -1025,7 +1025,7 @@ def: InstRW<[ICXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
def ICXWriteResGroup54 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> {
let Latency = 4;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m",
"IST_F(16|32)m",
@@ -1034,28 +1034,28 @@ def: InstRW<[ICXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m",
def ICXWriteResGroup55 : SchedWriteRes<[ICXPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def: InstRW<[ICXWriteResGroup55], (instrs FNCLEX)>;
def ICXWriteResGroup56 : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
}
def: InstRW<[ICXWriteResGroup56], (instrs VZEROUPPER)>;
def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
}
def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort01]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr",
"MMX_CVT(T?)PS2PIrr",
@@ -1086,21 +1086,21 @@ def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr",
def ICXWriteResGroup62 : SchedWriteRes<[ICXPort5,ICXPort015]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[ICXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>;
def ICXWriteResGroup63 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup63], (instregex "STR(16|32|64)r")>;
def ICXWriteResGroup65 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort01]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
"VCVTPS2PHZ256mr(b?)",
@@ -1109,7 +1109,7 @@ def: InstRW<[ICXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
def ICXWriteResGroup66 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> {
let Latency = 5;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[ICXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)",
"VPMOVDW(Z|Z128|Z256)mr(b?)",
@@ -1132,21 +1132,21 @@ def: InstRW<[ICXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)",
def ICXWriteResGroup67 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
- let ResourceCycles = [1,4];
+ let ReleaseAtCycles = [1,4];
}
def: InstRW<[ICXWriteResGroup67], (instrs XSETBV)>;
def ICXWriteResGroup69 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,4];
+ let ReleaseAtCycles = [1,1,4];
}
def: InstRW<[ICXWriteResGroup69], (instregex "PUSHF(16|64)")>;
def ICXWriteResGroup71 : SchedWriteRes<[ICXPort23]> {
let Latency = 6;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm,
VPBROADCASTDrm,
@@ -1161,7 +1161,7 @@ def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm,
def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[ICXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>;
def: InstRW<[ICXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
@@ -1173,7 +1173,7 @@ def: InstRW<[ICXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
def ICXWriteResGroup73 : SchedWriteRes<[ICXPort0,ICXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBrm,
MMX_PADDSWrm,
@@ -1199,7 +1199,7 @@ def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBrm,
def ICXWriteResGroup76 : SchedWriteRes<[ICXPort6,ICXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup76], (instrs FARJMP64m)>;
def: InstRW<[ICXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
@@ -1207,7 +1207,7 @@ def: InstRW<[ICXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
def ICXWriteResGroup79 : SchedWriteRes<[ICXPort23,ICXPort15]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup79], (instregex "ANDN(32|64)rm",
"MOVBE(16|32|64)rm")>;
@@ -1215,7 +1215,7 @@ def: InstRW<[ICXWriteResGroup79], (instregex "ANDN(32|64)rm",
def ICXWriteResGroup80 : SchedWriteRes<[ICXPort23,ICXPort015]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
def: InstRW<[ICXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
@@ -1223,7 +1223,7 @@ def: InstRW<[ICXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
def ICXWriteResGroup81 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[ICXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;
@@ -1231,7 +1231,7 @@ def: InstRW<[ICXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;
def ICXWriteResGroup82 : SchedWriteRes<[ICXPort5,ICXPort01]> {
let Latency = 6;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[ICXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
"VCVTSI642SSZrr",
@@ -1240,14 +1240,14 @@ def: InstRW<[ICXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
def ICXWriteResGroup84 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06,ICXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[ICXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
def ICXWriteResGroup86 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[ICXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
"SHL(8|16|32|64)m(1|i)",
@@ -1256,7 +1256,7 @@ def: InstRW<[ICXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
def ICXWriteResGroup87 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[ICXWriteResGroup87], (instregex "POP(16|32|64)rmm",
"PUSH(16|32|64)rmm")>;
@@ -1264,18 +1264,18 @@ def: InstRW<[ICXWriteResGroup87], (instregex "POP(16|32|64)rmm",
def ICXWriteResGroup88 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
let Latency = 6;
let NumMicroOps = 6;
- let ResourceCycles = [1,5];
+ let ReleaseAtCycles = [1,5];
}
def: InstRW<[ICXWriteResGroup88], (instrs STD)>;
def ICXWriteResGroup89 : SchedWriteRes<[ICXPort23]> {
let Latency = 7;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
-def: InstRW<[ICXWriteResGroup89], (instrs VBROADCASTF128,
- VBROADCASTI128,
+def: InstRW<[ICXWriteResGroup89], (instrs VBROADCASTF128rm,
+ VBROADCASTI128rm,
VBROADCASTSDYrm,
VBROADCASTSSYrm,
VMOVDDUPYrm,
@@ -1287,14 +1287,14 @@ def: InstRW<[ICXWriteResGroup89], (instrs VBROADCASTF128,
def ICXWriteResGroup90 : SchedWriteRes<[ICXPort01,ICXPort5]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
def ICXWriteResGroup92 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup92], (instregex "VMOV(SD|SS)Zrm(b?)",
"VPBROADCAST(B|W)(Z128)?rm",
@@ -1307,7 +1307,7 @@ def: InstRW<[ICXWriteResGroup92], (instregex "VMOV(SD|SS)Zrm(b?)",
def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort01]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
"VCVTPD2DQ(Y|Z256)rr",
@@ -1326,7 +1326,7 @@ def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
def ICXWriteResGroup93z : SchedWriteRes<[ICXPort5,ICXPort05]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
VCVTPD2DQZrr,
@@ -1345,7 +1345,7 @@ def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
def ICXWriteResGroup95 : SchedWriteRes<[ICXPort23,ICXPort015]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
VPBLENDDrmi)>;
@@ -1381,7 +1381,7 @@ def: InstRW<[ICXWriteResGroup95, ReadAfterVecXLd],
def ICXWriteResGroup96 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWrm,
MMX_PACKSSWBrm,
@@ -1390,19 +1390,19 @@ def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWrm,
def ICXWriteResGroup97 : SchedWriteRes<[ICXPort5,ICXPort015]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
-def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2W128rr",
- "VPERMI2W256rr",
- "VPERMI2Wrr",
- "VPERMT2W128rr",
- "VPERMT2W256rr",
- "VPERMT2Wrr")>;
+def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2WZ128rr",
+ "VPERMI2WZ256rr",
+ "VPERMI2WZrr",
+ "VPERMT2WZ128rr",
+ "VPERMT2WZ256rr",
+ "VPERMT2WZrr")>;
def ICXWriteResGroup99 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[ICXWriteResGroup99], (instrs LEAVE, LEAVE64,
SCASB, SCASL, SCASQ, SCASW)>;
@@ -1410,7 +1410,7 @@ def: InstRW<[ICXWriteResGroup99], (instrs LEAVE, LEAVE64,
def ICXWriteResGroup100 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort01]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr",
"VCVT(T?)SS2USI64Zrr")>;
@@ -1418,28 +1418,28 @@ def: InstRW<[ICXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr",
def ICXWriteResGroup101 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup101], (instrs FLDCW16m)>;
def ICXWriteResGroup103 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>;
def ICXWriteResGroup104 : SchedWriteRes<[ICXPort6,ICXPort23,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup104], (instrs LRET64, RET64)>;
def ICXWriteResGroup106 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[ICXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
"VCOMPRESSPS(Z|Z128|Z256)mr(b?)",
@@ -1449,7 +1449,7 @@ def: InstRW<[ICXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
def ICXWriteResGroup107 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[ICXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
@@ -1457,7 +1457,7 @@ def: InstRW<[ICXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
def ICXWriteResGroup107_1 : SchedWriteRes<[ICXPort06]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[ICXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
@@ -1465,14 +1465,14 @@ def: InstRW<[ICXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
def ICXWriteResGroup108 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[ICXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>;
def ICXWriteResGroup109 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1,1];
}
def: InstRW<[ICXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
def: InstRW<[ICXWriteResGroup109], (instrs FARCALL64m)>;
@@ -1480,7 +1480,7 @@ def: InstRW<[ICXWriteResGroup109], (instrs FARCALL64m)>;
def ICXWriteResGroup110 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
- let ResourceCycles = [1,2,2,2];
+ let ReleaseAtCycles = [1,2,2,2];
}
def: InstRW<[ICXWriteResGroup110], (instrs VPSCATTERDQZ128mr,
VPSCATTERQQZ128mr,
@@ -1490,14 +1490,14 @@ def: InstRW<[ICXWriteResGroup110], (instrs VPSCATTERDQZ128mr,
def ICXWriteResGroup111 : SchedWriteRes<[ICXPort6,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
- let ResourceCycles = [1,3,1,2];
+ let ReleaseAtCycles = [1,3,1,2];
}
def: InstRW<[ICXWriteResGroup111], (instrs LOOP)>;
def ICXWriteResGroup112 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 11;
- let ResourceCycles = [1,4,4,2];
+ let ReleaseAtCycles = [1,4,4,2];
}
def: InstRW<[ICXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
VPSCATTERQQZ256mr,
@@ -1507,7 +1507,7 @@ def: InstRW<[ICXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 19;
- let ResourceCycles = [1,8,8,2];
+ let ReleaseAtCycles = [1,8,8,2];
}
def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
VPSCATTERQQZmr,
@@ -1517,14 +1517,14 @@ def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 36;
- let ResourceCycles = [1,16,1,16,2];
+ let ReleaseAtCycles = [1,16,1,16,2];
}
def: InstRW<[ICXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
def ICXWriteResGroup118 : SchedWriteRes<[ICXPort1,ICXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup118], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
@@ -1532,7 +1532,7 @@ def: InstRW<[ICXWriteResGroup118], (instregex "PDEP(32|64)rm",
def ICXWriteResGroup119 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
"VPBROADCASTB(Z|Z256)rm(b?)",
@@ -1550,7 +1550,7 @@ def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm,
def ICXWriteResGroup121 : SchedWriteRes<[ICXPort23,ICXPort015]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
VPBLENDDYrmi)>;
@@ -1609,14 +1609,14 @@ def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd],
def ICXWriteResGroup123 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
let Latency = 8;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[ICXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[ICXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
"RCR(8|16|32|64)m(1|i)")>;
@@ -1624,7 +1624,7 @@ def: InstRW<[ICXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
def ICXWriteResGroup128 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> {
let Latency = 8;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,3];
+ let ReleaseAtCycles = [1,1,1,3];
}
def: InstRW<[ICXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
"ROR(8|16|32|64)mCL",
@@ -1635,14 +1635,14 @@ def: InstRW<[ICXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
def ICXWriteResGroup130 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
+ let ReleaseAtCycles = [1,1,1,2,1];
}
def: SchedAlias<WriteADCRMW, ICXWriteResGroup130>;
def ICXWriteResGroup131 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 8;
- let ResourceCycles = [1,2,1,2,2];
+ let ReleaseAtCycles = [1,2,1,2,2];
}
def: InstRW<[ICXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
VPSCATTERQDZ256mr,
@@ -1652,7 +1652,7 @@ def: InstRW<[ICXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 12;
- let ResourceCycles = [1,4,1,4,2];
+ let ReleaseAtCycles = [1,4,1,4,2];
}
def: InstRW<[ICXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
VSCATTERDPSZ128mr)>;
@@ -1660,7 +1660,7 @@ def: InstRW<[ICXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 20;
- let ResourceCycles = [1,8,1,8,2];
+ let ReleaseAtCycles = [1,8,1,8,2];
}
def: InstRW<[ICXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
VSCATTERDPSZ256mr)>;
@@ -1668,21 +1668,21 @@ def: InstRW<[ICXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
def ICXWriteResGroup134 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 36;
- let ResourceCycles = [1,16,1,16,2];
+ let ReleaseAtCycles = [1,16,1,16,2];
}
def: InstRW<[ICXWriteResGroup134], (instrs VPSCATTERDDZmr)>;
def ICXWriteResGroup135 : SchedWriteRes<[ICXPort0,ICXPort23]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>;
def ICXWriteResGroup136 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup136], (instrs VPMOVSXBWYrm,
VPMOVSXDQYrm,
@@ -1692,14 +1692,14 @@ def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
"VFPCLASSSDZrm(b?)",
"VFPCLASSSSZrm(b?)",
"(V?)PCMPGTQrm",
- "VPERMI2D128rm(b?)",
- "VPERMI2PD128rm(b?)",
- "VPERMI2PS128rm(b?)",
- "VPERMI2Q128rm(b?)",
- "VPERMT2D128rm(b?)",
- "VPERMT2PD128rm(b?)",
- "VPERMT2PS128rm(b?)",
- "VPERMT2Q128rm(b?)",
+ "VPERMI2DZ128rm(b?)",
+ "VPERMI2PDZ128rm(b?)",
+ "VPERMI2PSZ128rm(b?)",
+ "VPERMI2QZ128rm(b?)",
+ "VPERMT2DZ128rm(b?)",
+ "VPERMT2PDZ128rm(b?)",
+ "VPERMT2PSZ128rm(b?)",
+ "VPERMT2QZ128rm(b?)",
"VPMAXSQZ128rm(b?)",
"VPMAXUQZ128rm(b?)",
"VPMINSQZ128rm(b?)",
@@ -1708,7 +1708,7 @@ def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
def ICXWriteResGroup136_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
"VCMP(SD|SS)Zrm",
@@ -1734,7 +1734,7 @@ def: InstRW<[ICXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort01]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
"(V?)CVTPS2PDrm")>;
@@ -1742,7 +1742,7 @@ def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
def ICXWriteResGroup143 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
let Latency = 9;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[ICXWriteResGroup143], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
@@ -1750,7 +1750,7 @@ def: InstRW<[ICXWriteResGroup143], (instregex "(V?)PHADDSWrm",
def ICXWriteResGroup146 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[ICXWriteResGroup146], (instregex "LAR(16|32|64)rm",
"LSL(16|32|64)rm")>;
@@ -1758,7 +1758,7 @@ def: InstRW<[ICXWriteResGroup146], (instregex "LAR(16|32|64)rm",
def ICXWriteResGroup148 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup148], (instrs VPCMPGTQYrm)>;
def: InstRW<[ICXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
@@ -1773,7 +1773,7 @@ def: InstRW<[ICXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
def ICXWriteResGroup148_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
"VCMPPS(Z|Z256)rm(b?)i",
@@ -1802,7 +1802,7 @@ def: InstRW<[ICXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
def ICXWriteResGroup149 : SchedWriteRes<[ICXPort23,ICXPort01]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)",
"VCVTDQ2PSZ128rm(b?)",
@@ -1835,7 +1835,7 @@ def: InstRW<[ICXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)",
def ICXWriteResGroup151 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
"VEXPANDPSZ128rm(b?)",
@@ -1845,7 +1845,7 @@ def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
let Latency = 10;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[ICXWriteResGroup154], (instrs VPHADDSWYrm,
VPHSUBSWYrm)>;
@@ -1853,21 +1853,21 @@ def: InstRW<[ICXWriteResGroup154], (instrs VPHADDSWYrm,
def ICXWriteResGroup157 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 10;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,1,1,3];
+ let ReleaseAtCycles = [1,1,1,1,1,3];
}
def: InstRW<[ICXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
def ICXWriteResGroup160 : SchedWriteRes<[ICXPort0,ICXPort23]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup160], (instregex "MUL_F(32|64)m")>;
def ICXWriteResGroup161 : SchedWriteRes<[ICXPort23,ICXPort01]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup161], (instrs VCVTDQ2PSYrm,
VCVTPS2PDYrm)>;
@@ -1890,7 +1890,7 @@ def: InstRW<[ICXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
def ICXWriteResGroup162 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m",
"VEXPANDPD(Z|Z256)rm(b?)",
@@ -1901,14 +1901,14 @@ def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m",
def ICXWriteResGroup164 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>;
def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2DQrm,
CVTTPD2DQrm,
@@ -1918,14 +1918,14 @@ def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2DQrm,
def ICXWriteResGroup167 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
let Latency = 11;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[ICXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
def ICXWriteResGroup169 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
- let ResourceCycles = [2,3,2];
+ let ReleaseAtCycles = [2,3,2];
}
def: InstRW<[ICXWriteResGroup169], (instregex "RCL(16|32|64)rCL",
"RCR(16|32|64)rCL")>;
@@ -1933,42 +1933,42 @@ def: InstRW<[ICXWriteResGroup169], (instregex "RCL(16|32|64)rCL",
def ICXWriteResGroup170 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 11;
let NumMicroOps = 9;
- let ResourceCycles = [1,5,1,2];
+ let ReleaseAtCycles = [1,5,1,2];
}
def: InstRW<[ICXWriteResGroup170], (instrs RCL8rCL)>;
def ICXWriteResGroup171 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
let Latency = 11;
let NumMicroOps = 11;
- let ResourceCycles = [2,9];
+ let ReleaseAtCycles = [2,9];
}
def: InstRW<[ICXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
def ICXWriteResGroup174 : SchedWriteRes<[ICXPort01]> {
let Latency = 15;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def: InstRW<[ICXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
def ICXWriteResGroup174z : SchedWriteRes<[ICXPort0]> {
let Latency = 15;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def: InstRW<[ICXWriteResGroup174z], (instregex "VPMULLQZrr")>;
def ICXWriteResGroup175 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[ICXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>;
def ICXWriteResGroup176 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
"VCVT(T?)SS2USI64Zrm(b?)")>;
@@ -1976,7 +1976,7 @@ def: InstRW<[ICXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
def ICXWriteResGroup177 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
"VCVT(T?)PS2UQQZrm(b?)")>;
@@ -1984,7 +1984,7 @@ def: InstRW<[ICXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
def ICXWriteResGroup180 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 13;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[ICXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
"VPERMWZ256rm(b?)",
@@ -1993,29 +1993,29 @@ def: InstRW<[ICXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
def ICXWriteResGroup181 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
let Latency = 13;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
def ICXWriteResGroup183 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
let Latency = 13;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
-def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2W128rm(b?)",
- "VPERMT2W128rm(b?)")>;
+def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)",
+ "VPERMT2WZ128rm(b?)")>;
def ICXWriteResGroup187 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
let Latency = 14;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup187], (instregex "MUL_FI(16|32)m")>;
def ICXWriteResGroup188 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> {
let Latency = 14;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)",
"VCVTPD2UDQZrm(b?)",
@@ -2027,108 +2027,108 @@ def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)",
def ICXWriteResGroup189 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
let Latency = 14;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
-def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2W256rm(b?)",
- "VPERMI2Wrm(b?)",
- "VPERMT2W256rm(b?)",
- "VPERMT2Wrm(b?)")>;
+def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)",
+ "VPERMI2WZrm(b?)",
+ "VPERMT2WZ256rm(b?)",
+ "VPERMT2WZrm(b?)")>;
def ICXWriteResGroup190 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 14;
let NumMicroOps = 10;
- let ResourceCycles = [2,4,1,3];
+ let ReleaseAtCycles = [2,4,1,3];
}
def: InstRW<[ICXWriteResGroup190], (instrs RCR8rCL)>;
def ICXWriteResGroup191 : SchedWriteRes<[ICXPort0]> {
let Latency = 15;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
def ICXWriteResGroup194 : SchedWriteRes<[ICXPort1,ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
let Latency = 15;
let NumMicroOps = 8;
- let ResourceCycles = [1,2,2,1,2];
+ let ReleaseAtCycles = [1,2,2,1,2];
}
def: InstRW<[ICXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>;
def ICXWriteResGroup195 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 15;
let NumMicroOps = 10;
- let ResourceCycles = [1,1,1,5,1,1];
+ let ReleaseAtCycles = [1,1,1,5,1,1];
}
def: InstRW<[ICXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
def ICXWriteResGroup199 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 16;
let NumMicroOps = 14;
- let ResourceCycles = [1,1,1,4,2,5];
+ let ReleaseAtCycles = [1,1,1,4,2,5];
}
def: InstRW<[ICXWriteResGroup199], (instrs CMPXCHG8B)>;
def ICXWriteResGroup200 : SchedWriteRes<[ICXPort1, ICXPort05, ICXPort6]> {
let Latency = 12;
let NumMicroOps = 34;
- let ResourceCycles = [1, 4, 5];
+ let ReleaseAtCycles = [1, 4, 5];
}
def: InstRW<[ICXWriteResGroup200], (instrs VZEROALL)>;
def ICXWriteResGroup202 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156]> {
let Latency = 17;
let NumMicroOps = 15;
- let ResourceCycles = [2,1,2,4,2,4];
+ let ReleaseAtCycles = [2,1,2,4,2,4];
}
def: InstRW<[ICXWriteResGroup202], (instrs XCH_F)>;
def ICXWriteResGroup205 : SchedWriteRes<[ICXPort23,ICXPort01]> {
let Latency = 21;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[ICXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>;
def ICXWriteResGroup207 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort06,ICXPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,5];
+ let ReleaseAtCycles = [1,1,1,5];
}
def: InstRW<[ICXWriteResGroup207], (instrs CPUID, RDTSC)>;
def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 18;
let NumMicroOps = 11;
- let ResourceCycles = [2,1,1,4,1,2];
+ let ReleaseAtCycles = [2,1,1,4,1,2];
}
def: InstRW<[ICXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>;
def ICXWriteResGroup211 : SchedWriteRes<[ICXPort23,ICXPort01]> {
let Latency = 22;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[ICXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
def ICXWriteResGroup211_1 : SchedWriteRes<[ICXPort23,ICXPort0]> {
let Latency = 22;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[ICXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
def ICXWriteResGroup215 : SchedWriteRes<[ICXPort0]> {
let Latency = 20;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
def ICXWriteGatherEVEX2 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
let Latency = 17;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[ICXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
VGATHERDPDZ128rm, VPGATHERDQZ128rm,
@@ -2137,7 +2137,7 @@ def: InstRW<[ICXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
def ICXWriteGatherEVEX4 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
let Latency = 19;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,4,1,1];
+ let ReleaseAtCycles = [1,4,1,1];
}
def: InstRW<[ICXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
VGATHERQPDZ256rm, VPGATHERQQZ256rm,
@@ -2147,7 +2147,7 @@ def: InstRW<[ICXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
def ICXWriteGatherEVEX8 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
let Latency = 21;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,8,1,1];
+ let ReleaseAtCycles = [1,8,1,1];
}
def: InstRW<[ICXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
VGATHERDPDZrm, VPGATHERDQZrm,
@@ -2157,35 +2157,35 @@ def: InstRW<[ICXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
def ICXWriteGatherEVEX16 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
let Latency = 25;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,16,1,1];
+ let ReleaseAtCycles = [1,16,1,1];
}
def: InstRW<[ICXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
def ICXWriteResGroup219 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 20;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,1,1,1,2];
}
def: InstRW<[ICXWriteResGroup219], (instrs INSB, INSL, INSW)>;
def ICXWriteResGroup220 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort0156]> {
let Latency = 20;
let NumMicroOps = 10;
- let ResourceCycles = [1,2,7];
+ let ReleaseAtCycles = [1,2,7];
}
def: InstRW<[ICXWriteResGroup220], (instrs MWAITrr)>;
def ICXWriteResGroup223 : SchedWriteRes<[ICXPort0,ICXPort23]> {
let Latency = 22;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
def ICXWriteResGroupVEX2 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
let Latency = 18;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[ICXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
VGATHERQPDrm, VPGATHERQQrm,
@@ -2194,7 +2194,7 @@ def: InstRW<[ICXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
def ICXWriteResGroupVEX4 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
let Latency = 20;
let NumMicroOps = 5; // 2 uops peform multiple loads
- let ResourceCycles = [1,4,1,1];
+ let ReleaseAtCycles = [1,4,1,1];
}
def: InstRW<[ICXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
VGATHERDPSrm, VPGATHERDDrm,
@@ -2204,14 +2204,14 @@ def: InstRW<[ICXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
def ICXWriteResGroupVEX8 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
let Latency = 22;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,8,1,1];
+ let ReleaseAtCycles = [1,8,1,1];
}
def: InstRW<[ICXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def ICXWriteResGroup225 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
let Latency = 22;
let NumMicroOps = 14;
- let ResourceCycles = [5,5,4];
+ let ReleaseAtCycles = [5,5,4];
}
def: InstRW<[ICXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
"VPCONFLICTQZ256rr")>;
@@ -2219,42 +2219,42 @@ def: InstRW<[ICXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
def ICXWriteResGroup228 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
- let ResourceCycles = [2,1,4,1,1,4,6];
+ let ReleaseAtCycles = [2,1,4,1,1,4,6];
}
def: InstRW<[ICXWriteResGroup228], (instrs CMPXCHG16B)>;
def ICXWriteResGroup233 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
let Latency = 25;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
def ICXWriteResGroup239 : SchedWriteRes<[ICXPort0,ICXPort23]> {
let Latency = 27;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[ICXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
def ICXWriteResGroup242 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
let Latency = 29;
let NumMicroOps = 15;
- let ResourceCycles = [5,5,1,4];
+ let ReleaseAtCycles = [5,5,1,4];
}
def: InstRW<[ICXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>;
def ICXWriteResGroup243 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
let Latency = 30;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
def ICXWriteResGroup247 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort06,ICXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,3,4,10];
+ let ReleaseAtCycles = [1,5,3,4,10];
}
def: InstRW<[ICXWriteResGroup247], (instregex "IN(8|16|32)ri",
"IN(8|16|32)rr")>;
@@ -2262,7 +2262,7 @@ def: InstRW<[ICXWriteResGroup247], (instregex "IN(8|16|32)ri",
def ICXWriteResGroup248 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,2,1,4,10];
+ let ReleaseAtCycles = [1,5,2,1,4,10];
}
def: InstRW<[ICXWriteResGroup248], (instregex "OUT(8|16|32)ir",
"OUT(8|16|32)rr")>;
@@ -2270,7 +2270,7 @@ def: InstRW<[ICXWriteResGroup248], (instregex "OUT(8|16|32)ir",
def ICXWriteResGroup249 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
let Latency = 37;
let NumMicroOps = 21;
- let ResourceCycles = [9,7,5];
+ let ReleaseAtCycles = [9,7,5];
}
def: InstRW<[ICXWriteResGroup249], (instregex "VPCONFLICTDZ256rr",
"VPCONFLICTQZrr")>;
@@ -2278,35 +2278,35 @@ def: InstRW<[ICXWriteResGroup249], (instregex "VPCONFLICTDZ256rr",
def ICXWriteResGroup250 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
let Latency = 37;
let NumMicroOps = 31;
- let ResourceCycles = [1,8,1,21];
+ let ReleaseAtCycles = [1,8,1,21];
}
def: InstRW<[ICXWriteResGroup250], (instregex "XRSTOR(64)?")>;
def ICXWriteResGroup252 : SchedWriteRes<[ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort15,ICXPort0156]> {
let Latency = 40;
let NumMicroOps = 18;
- let ResourceCycles = [1,1,2,3,1,1,1,8];
+ let ReleaseAtCycles = [1,1,2,3,1,1,1,8];
}
def: InstRW<[ICXWriteResGroup252], (instrs VMCLEARm)>;
def ICXWriteResGroup253 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 41;
let NumMicroOps = 39;
- let ResourceCycles = [1,10,1,1,26];
+ let ReleaseAtCycles = [1,10,1,1,26];
}
def: InstRW<[ICXWriteResGroup253], (instrs XSAVE64)>;
def ICXWriteResGroup254 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
let Latency = 42;
let NumMicroOps = 22;
- let ResourceCycles = [2,20];
+ let ReleaseAtCycles = [2,20];
}
def: InstRW<[ICXWriteResGroup254], (instrs RDTSCP)>;
def ICXWriteResGroup255 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 42;
let NumMicroOps = 40;
- let ResourceCycles = [1,11,1,1,26];
+ let ReleaseAtCycles = [1,11,1,1,26];
}
def: InstRW<[ICXWriteResGroup255], (instrs XSAVE)>;
def: InstRW<[ICXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
@@ -2314,7 +2314,7 @@ def: InstRW<[ICXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
def ICXWriteResGroup256 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
let Latency = 44;
let NumMicroOps = 22;
- let ResourceCycles = [9,7,1,5];
+ let ReleaseAtCycles = [9,7,1,5];
}
def: InstRW<[ICXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)",
"VPCONFLICTQZrm(b?)")>;
@@ -2322,56 +2322,56 @@ def: InstRW<[ICXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)",
def ICXWriteResGroup258 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05,ICXPort06,ICXPort0156]> {
let Latency = 62;
let NumMicroOps = 64;
- let ResourceCycles = [2,8,5,10,39];
+ let ReleaseAtCycles = [2,8,5,10,39];
}
def: InstRW<[ICXWriteResGroup258], (instrs FLDENVm)>;
def ICXWriteResGroup259 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 63;
let NumMicroOps = 88;
- let ResourceCycles = [4,4,31,1,2,1,45];
+ let ReleaseAtCycles = [4,4,31,1,2,1,45];
}
def: InstRW<[ICXWriteResGroup259], (instrs FXRSTOR64)>;
def ICXWriteResGroup260 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 63;
let NumMicroOps = 90;
- let ResourceCycles = [4,2,33,1,2,1,47];
+ let ReleaseAtCycles = [4,2,33,1,2,1,47];
}
def: InstRW<[ICXWriteResGroup260], (instrs FXRSTOR)>;
def ICXWriteResGroup261 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
let Latency = 67;
let NumMicroOps = 35;
- let ResourceCycles = [17,11,7];
+ let ReleaseAtCycles = [17,11,7];
}
def: InstRW<[ICXWriteResGroup261], (instregex "VPCONFLICTDZrr")>;
def ICXWriteResGroup262 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
let Latency = 74;
let NumMicroOps = 36;
- let ResourceCycles = [17,11,1,7];
+ let ReleaseAtCycles = [17,11,1,7];
}
def: InstRW<[ICXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>;
def ICXWriteResGroup263 : SchedWriteRes<[ICXPort5,ICXPort05,ICXPort0156]> {
let Latency = 75;
let NumMicroOps = 15;
- let ResourceCycles = [6,3,6];
+ let ReleaseAtCycles = [6,3,6];
}
def: InstRW<[ICXWriteResGroup263], (instrs FNINIT)>;
def ICXWriteResGroup266 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 106;
let NumMicroOps = 100;
- let ResourceCycles = [9,1,11,16,1,11,21,30];
+ let ReleaseAtCycles = [9,1,11,16,1,11,21,30];
}
def: InstRW<[ICXWriteResGroup266], (instrs FSTENVm)>;
def ICXWriteResGroup267 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
let Latency = 140;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[ICXWriteResGroup267], (instrs PAUSE)>;
@@ -2458,7 +2458,7 @@ def : InstRW<[ICXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
def ICXWritePSUB : SchedWriteRes<[ICXPort015]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def ICXWriteVZeroIdiomPSUB : SchedWriteVariant<[
@@ -2481,7 +2481,7 @@ def : InstRW<[ICXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
def ICXWritePCMPGTQ : SchedWriteRes<[ICXPort5]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def ICXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
@@ -2495,13 +2495,13 @@ def : InstRW<[ICXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
// CMOVs that use both Z and C flag require an extra uop.
def ICXWriteCMOVA_CMOVBErr : SchedWriteRes<[ICXPort06]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def ICXWriteCMOVA_CMOVBErm : SchedWriteRes<[ICXPort23,ICXPort06]> {
let Latency = 7;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
let NumMicroOps = 3;
}
@@ -2521,13 +2521,13 @@ def : InstRW<[ICXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
// SETCCs that use both Z and C flag require an extra uop.
def ICXWriteSETA_SETBEr : SchedWriteRes<[ICXPort06]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def ICXWriteSETA_SETBEm : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06]> {
let Latency = 3;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
let NumMicroOps = 4;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 8c01119ed9b8..7b33aed6351c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -90,7 +90,7 @@ multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -98,7 +98,7 @@ multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
// the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([SBPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !listconcat([1], Res);
+ let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -224,10 +224,10 @@ defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>;
@@ -294,9 +294,11 @@ defm : SBWriteResPair<WriteFSqrt64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44
defm : SBWriteResPair<WriteFSqrt64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFSqrt80, [SBPort0,SBFPDivider], 24, [1,24], 1, 6>;
-defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
-defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
-defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
+defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
+defm : X86WriteRes<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4>;
+defm : X86WriteRes<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4>;
+defm : X86WriteRes<WriteDPPSLd, [SBPort0,SBPort1,SBPort5,SBPort23], 18, [1,2,2,1], 6>;
+defm : X86WriteRes<WriteDPPSYLd, [SBPort0,SBPort1,SBPort5,SBPort23], 19, [1,2,2,1], 6>;
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
@@ -382,10 +384,10 @@ defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;
@@ -486,44 +488,44 @@ defm : SBWriteResPair<WritePHAddZ, [SBPort15], 3, [3], 3, 7>; // Unsupported = 1
def : WriteRes<WritePCmpIStrM, [SBPort0]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [SBPort0, SBPort23]> {
let Latency = 17;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [SBPort015]> {
let Latency = 11;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
let Latency = 17;
- let ResourceCycles = [7, 1];
+ let ReleaseAtCycles = [7, 1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [SBPort0]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> {
let Latency = 17;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [SBPort015]> {
let Latency = 4;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
let Latency = 10;
- let ResourceCycles = [7, 1];
+ let ReleaseAtCycles = [7, 1];
}
// MOVMSK Instructions.
@@ -536,48 +538,48 @@ def : WriteRes<WriteMMXMOVMSK, [SBPort0]> { let Latency = 1; }
def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> {
let Latency = 13;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def : WriteRes<WriteAESIMC, [SBPort5]> {
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> {
let Latency = 18;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def : WriteRes<WriteAESKeyGen, [SBPort015]> {
let Latency = 8;
- let ResourceCycles = [11];
+ let ReleaseAtCycles = [11];
}
def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
let Latency = 14;
- let ResourceCycles = [10, 1];
+ let ReleaseAtCycles = [10, 1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [SBPort015]> {
let Latency = 14;
- let ResourceCycles = [18];
+ let ReleaseAtCycles = [18];
}
def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
let Latency = 20;
- let ResourceCycles = [17, 1];
+ let ReleaseAtCycles = [17, 1];
}
// Load/store MXCSR.
// FIXME: This is probably wrong. Only STMXCSR should require Port4.
-def : WriteRes<WriteLDMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; }
-def : WriteRes<WriteSTMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; }
+def : WriteRes<WriteLDMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ReleaseAtCycles = [1,1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ReleaseAtCycles = [1,1,1,1]; }
def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
@@ -601,7 +603,7 @@ defm : SBWriteResPair<WriteFMAZ, [SBPort01], 5>; // Unsupported = 1
def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SBWriteResGroup1], (instrs COMP_FST0r,
COM_FST0r,
@@ -611,7 +613,7 @@ def: InstRW<[SBWriteResGroup1], (instrs COMP_FST0r,
def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP,
LD_Frr, ST_Frr, ST_FPrr)>;
@@ -620,14 +622,14 @@ def: InstRW<[SBWriteResGroup2], (instrs RET64)>;
def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
MMX_PABSDrr,
@@ -641,7 +643,7 @@ def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SBWriteResGroup11], (instrs SCASB,
SCASL,
@@ -651,14 +653,14 @@ def: InstRW<[SBWriteResGroup11], (instrs SCASB,
def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup12], (instregex "(V?)(U?)COMI(SD|SS)rr")>;
def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup15], (instrs CWD,
FNSTSW16r)>;
@@ -666,7 +668,7 @@ def: InstRW<[SBWriteResGroup15], (instrs CWD,
def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ,
MMX_MOVDQ2Qrr)>;
@@ -674,21 +676,21 @@ def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ,
def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>;
def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
let Latency = 3;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
def SBWriteResGroup23 : SchedWriteRes<[SBPort05,SBPort015]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SBWriteResGroup23], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
@@ -696,63 +698,63 @@ def: InstRW<[SBWriteResGroup23], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
def SBWriteResGroup24 : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> {
let Latency = 3;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,4,2];
+ let ReleaseAtCycles = [1,1,4,2];
}
def: InstRW<[SBWriteResGroup24], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
def SBWriteResGroup24b : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> {
let Latency = 4;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,4,2];
+ let ReleaseAtCycles = [1,1,4,2];
}
def: InstRW<[SBWriteResGroup24b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SBWriteResGroup25_1], (instrs LEAVE, LEAVE64)>;
def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup29], (instrs MOV64sr)>;
def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
def SBWriteResGroup30 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
let Latency = 3;
let NumMicroOps = 8;
- let ResourceCycles = [1,3,4];
+ let ReleaseAtCycles = [1,3,4];
}
def: InstRW<[SBWriteResGroup30], (instrs LOOP)>;
def SBWriteResGroup31 : SchedWriteRes<[SBPort1,SBPort5,SBPort015,SBPort05]> {
let Latency = 4;
let NumMicroOps = 12;
- let ResourceCycles = [1,3,6,2];
+ let ReleaseAtCycles = [1,3,6,2];
}
def: InstRW<[SBWriteResGroup31], (instrs LOOPE, LOOPNE)>;
def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> {
let Latency = 5;
let NumMicroOps = 8;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)rCL",
"RCR(8|16|32|64)rCL")>;
@@ -760,21 +762,21 @@ def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)rCL",
def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup33], (instregex "PUSH(16r|32r|64r|64i8)")>;
def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SBWriteResGroup35], (instrs CLI)>;
def SBWriteResGroup35_2 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup35_2], (instrs PUSHGS64)>;
def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m")>;
@@ -782,7 +784,7 @@ def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m")>;
def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup36], (instrs CALL64pcrel32)>;
def: InstRW<[SBWriteResGroup36], (instregex "CALL(16|32|64)r",
@@ -791,21 +793,21 @@ def: InstRW<[SBWriteResGroup36], (instregex "CALL(16|32|64)r",
def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup40], (instrs STOSB, STOSL, STOSQ, STOSW)>;
def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
let Latency = 5;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>;
def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
let Latency = 5;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SBWriteResGroup45], (instregex "(V?)PEXTR(D|Q)mr",
"PUSHF(16|64)")>;
@@ -813,21 +815,21 @@ def: InstRW<[SBWriteResGroup45], (instregex "(V?)PEXTR(D|Q)mr",
def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
let Latency = 5;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>;
def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
let Latency = 5;
let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>;
def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
let Latency = 6;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SBWriteResGroup48], (instrs VBROADCASTSSrm)>;
def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r",
@@ -843,14 +845,14 @@ def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r",
def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup49], (instrs MOV16sm)>;
def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup51], (instrs MMX_PABSBrm,
MMX_PABSDrm,
@@ -863,14 +865,14 @@ def: InstRW<[SBWriteResGroup51], (instrs MMX_PABSBrm,
def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup52], (instrs LODSL, LODSQ)>;
def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
let Latency = 6;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SBWriteResGroup53], (instregex "ST_F(32|64)m",
"ST_FP(32|64|80)m")>;
@@ -878,60 +880,58 @@ def: InstRW<[SBWriteResGroup53], (instregex "ST_F(32|64)m",
def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
let Latency = 7;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
-def: InstRW<[SBWriteResGroup54], (instrs VBROADCASTSDYrm,
- VBROADCASTSSYrm,
- VMOVDDUPYrm,
+def: InstRW<[SBWriteResGroup54], (instrs VMOVDDUPYrm,
VMOVSHDUPYrm,
VMOVSLDUPYrm)>;
def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup58], (instrs VINSERTF128rm)>;
def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQrm)>;
def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SBWriteResGroup62], (instrs VERRm, VERWm)>;
def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SBWriteResGroup63], (instrs LODSB, LODSW)>;
def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup64], (instrs FARJMP64m)>;
def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
}
def: InstRW<[SBWriteResGroup66], (instrs FNSTSWm)>;
def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[SBWriteResGroup67], (instregex "SLDT(16|32|64)r",
"STR(16|32|64)r")>;
@@ -939,7 +939,7 @@ def: InstRW<[SBWriteResGroup67], (instregex "SLDT(16|32|64)r",
def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
}
def: InstRW<[SBWriteResGroup68], (instrs FNSTCW16m)>;
def: InstRW<[SBWriteResGroup68], (instregex "CALL(16|32|64)m")>;
@@ -947,7 +947,7 @@ def: InstRW<[SBWriteResGroup68], (instregex "CALL(16|32|64)m")>;
def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[SBWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
"SHL(8|16|32|64)m(1|i)",
@@ -956,21 +956,21 @@ def: InstRW<[SBWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 8;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>;
def SBWriteResGroup81 : SchedWriteRes<[SBPort4, SBPort23, SBPort015]> {
let Latency = 6;
let NumMicroOps = 3;
- let ResourceCycles = [1, 2, 1];
+ let ReleaseAtCycles = [1, 2, 1];
}
def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16)B")>;
def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [2,3];
+ let ReleaseAtCycles = [2,3];
}
def: InstRW<[SBWriteResGroup83], (instrs CMPSB,
CMPSL,
@@ -980,14 +980,14 @@ def: InstRW<[SBWriteResGroup83], (instrs CMPSB,
def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,2,2];
+ let ReleaseAtCycles = [1,2,2];
}
def: InstRW<[SBWriteResGroup84], (instrs FLDCW16m)>;
def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,2,2];
+ let ReleaseAtCycles = [1,2,2];
}
def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
@@ -995,7 +995,7 @@ def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)",
def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,2,2];
+ let ReleaseAtCycles = [1,2,2];
}
def: InstRW<[SBWriteResGroup86], (instrs MOVSB, MOVSL, MOVSQ, MOVSW)>;
def: InstRW<[SBWriteResGroup86], (instregex "XADD(8|16|32|64)rm")>;
@@ -1003,21 +1003,21 @@ def: InstRW<[SBWriteResGroup86], (instregex "XADD(8|16|32|64)rm")>;
def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SBWriteResGroup87], (instrs FARCALL64m)>;
def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
let Latency = 9;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup95], (instregex "LD_F(32|64|80)m")>;
def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
let Latency = 9;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
}
def: InstRW<[SBWriteResGroup97], (instregex "IST_F(16|32)m",
"IST_FP(16|32|64)m")>;
@@ -1025,7 +1025,7 @@ def: InstRW<[SBWriteResGroup97], (instregex "IST_F(16|32)m",
def SBWriteResGroup97_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let Latency = 9;
let NumMicroOps = 6;
- let ResourceCycles = [1,2,3];
+ let ReleaseAtCycles = [1,2,3];
}
def: InstRW<[SBWriteResGroup97_2], (instregex "ROL(8|16|32|64)mCL",
"ROR(8|16|32|64)mCL",
@@ -1036,14 +1036,14 @@ def: InstRW<[SBWriteResGroup97_2], (instregex "ROL(8|16|32|64)mCL",
def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 9;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,3];
+ let ReleaseAtCycles = [1,2,3];
}
def: SchedAlias<WriteADCRMW, SBWriteResGroup98>;
def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
let Latency = 9;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,2,1];
+ let ReleaseAtCycles = [1,2,2,1];
}
def: InstRW<[SBWriteResGroup99, ReadAfterLd], (instrs ADC8mr, ADC16mr, ADC32mr, ADC64mr,
SBB8mr, SBB16mr, SBB32mr, SBB64mr)>;
@@ -1051,14 +1051,14 @@ def: InstRW<[SBWriteResGroup99, ReadAfterLd], (instrs ADC8mr, ADC16mr, ADC32mr,
def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort05,SBPort015]> {
let Latency = 9;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,2,1,1];
+ let ReleaseAtCycles = [1,1,2,1,1];
}
def : SchedAlias<WriteBitTestRegLd, SBWriteResGroup100>; // TODO - this is incorrect - no RMW
def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m")>;
@@ -1066,21 +1066,21 @@ def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup104], (instregex "(V?)PCMPGTQrm")>;
def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SBWriteResGroup106], (instregex "FICOM(P?)(16|32)m")>;
def SBWriteResGroup108 : SchedWriteRes<[SBPort05,SBPort23]> {
let Latency = 11;
let NumMicroOps = 11;
- let ResourceCycles = [7,4];
+ let ReleaseAtCycles = [7,4];
}
def: InstRW<[SBWriteResGroup108], (instregex "RCL(8|16|32|64)m",
"RCR(8|16|32|64)m")>;
@@ -1088,49 +1088,49 @@ def: InstRW<[SBWriteResGroup108], (instregex "RCL(8|16|32|64)m",
def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup111], (instregex "MUL_F(32|64)m")>;
def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
let Latency = 13;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 15;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>;
def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 31;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SBWriteResGroup130], (instregex "DIV(R?)_F(32|64)m")>;
def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 34;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>;
def SBWriteResGroupVzeroall : SchedWriteRes<[SBPort5]> {
let Latency = 9;
let NumMicroOps = 20;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>;
def SBWriteResGroupVzeroupper : SchedWriteRes<[]> {
let Latency = 1;
let NumMicroOps = 4;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
}
def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>;
@@ -1190,7 +1190,7 @@ def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> {
let Latency = 5;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
@@ -1202,13 +1202,13 @@ def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
// CMOVs that use both Z and C flag require an extra uop.
def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> {
let Latency = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
let NumMicroOps = 3;
}
def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> {
let Latency = 8;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
let NumMicroOps = 4;
}
@@ -1228,13 +1228,13 @@ def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
// SETCCs that use both Z and C flag require an extra uop.
def SBWriteSETA_SETBEr : SchedWriteRes<[SBPort05]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def SBWriteSETA_SETBEm : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let Latency = 3;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
let NumMicroOps = 4;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index bcf1601f26bb..bf9e4b7dc6d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -102,7 +102,7 @@ multiclass SPRWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -110,7 +110,7 @@ multiclass SPRWriteResPair<X86FoldableSchedWrite SchedRW,
// the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([SPRPort02_03_11], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !listconcat([1], Res);
+ let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -305,7 +305,7 @@ defm : SPRWriteResPair<WriteFSqrt64Y, [SPRPort00], 18, [1], 1, 3>;
// Warning: negtive load latency.
defm : SPRWriteResPair<WriteFSqrt64Z, [SPRPort00, SPRPort00_05], 32, [2, 1], 3, -1>;
def : WriteRes<WriteFSqrt80, [SPRPortInvalid, SPRPort00]> {
- let ResourceCycles = [7, 1];
+ let ReleaseAtCycles = [7, 1];
let Latency = 21;
}
defm : SPRWriteResPair<WriteFSqrtX, [SPRPort00], 12, [1], 1, 7>;
@@ -524,7 +524,7 @@ def : InstRW<[SPRWriteResGroup0], (instregex "^AA(D|N)D64mr$",
"^A(X?)OR64mr$")>;
def SPRWriteResGroup1 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [2, 1, 1, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1, 1, 1];
let Latency = 12;
let NumMicroOps = 6;
}
@@ -545,7 +545,7 @@ def SPRWriteResGroup3 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRP
def : InstRW<[SPRWriteResGroup3], (instregex "^(ADC|SBB)8mi(8?)$")>;
def SPRWriteResGroup4 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [2, 1, 1, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1, 1, 1];
let Latency = 13;
let NumMicroOps = 6;
}
@@ -635,14 +635,14 @@ def : InstRW<[SPRWriteResGroup10, ReadAfterVecXLd], (instregex "^(V?)PACK(S|U)S(
"^VPMULTISHIFTQBZ128rm(b?)$")>;
def : InstRW<[SPRWriteResGroup10, ReadAfterVecXLd], (instrs VFPCLASSPHZ128rm)>;
def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instregex "^VFPCLASSP(D|H|S)Z((256)?)rm$",
- "^VPERM(I|T)2(D|Q|PS)128rm((b|k|bk|kz)?)$",
- "^VPERM(I|T)2(D|Q|PS)128rmbkz$",
- "^VPERM(I|T)2PD128rm((b|k|bk|kz)?)$",
- "^VPERM(I|T)2PD128rmbkz$")>;
+ "^VPERM(I|T)2(D|Q|PS)Z128rm((b|k|bk|kz)?)$",
+ "^VPERM(I|T)2(D|Q|PS)Z128rmbkz$",
+ "^VPERM(I|T)2PDZ128rm((b|k|bk|kz)?)$",
+ "^VPERM(I|T)2PDZ128rmbkz$")>;
def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instrs VPERMBZ128rm)>;
def SPRWriteResGroup11 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 13;
let NumMicroOps = 3;
}
@@ -678,8 +678,8 @@ def : InstRW<[SPRWriteResGroup12], (instregex "^ADD_F(P?)rST0$",
"^VPERM(B|D|Q)Zrr$",
"^VPERM(D|Q)Z256rr((k|kz)?)$",
"^VPERM(D|Q)Zrrk(z?)$",
- "^VPERM(I|T)2(D|Q)(128|256)rr((k|kz)?)$",
- "^VPERM(I|T)2(D|Q)rr((k|kz)?)$",
+ "^VPERM(I|T)2(D|Q)Z(128|256)rr((k|kz)?)$",
+ "^VPERM(I|T)2(D|Q)Zrr((k|kz)?)$",
"^VPM(AX|IN)(S|U)QZ(128|256)rr((k|kz)?)$",
"^VPMULTISHIFTQBZ(128|256)rr$",
"^VPOPCNT(B|D|Q|W)Z(128|256)rr$",
@@ -730,7 +730,7 @@ def SPRWriteResGroup16 : SchedWriteRes<[SPRPort01_05_10]> {
def : InstRW<[SPRWriteResGroup16], (instregex "^ANDN(32|64)rr$")>;
def SPRWriteResGroup17 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11]> {
- let ResourceCycles = [5, 2, 1, 1];
+ let ReleaseAtCycles = [5, 2, 1, 1];
let Latency = 10;
let NumMicroOps = 9;
}
@@ -743,7 +743,7 @@ def : InstRW<[SPRWriteResGroup18], (instregex "^BT((C|R|S)?)64rr$",
"^P(DEP|EXT)(32|64)rr$")>;
def SPRWriteResGroup19 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [4, 2, 1, 1, 1, 1];
+ let ReleaseAtCycles = [4, 2, 1, 1, 1, 1];
let Latency = 17;
let NumMicroOps = 10;
}
@@ -809,14 +809,14 @@ def SPRWriteResGroup28 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPR
def : InstRW<[SPRWriteResGroup28], (instrs CLFLUSHOPT)>;
def SPRWriteResGroup29 : SchedWriteRes<[SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup29], (instrs CLI)>;
def SPRWriteResGroup30 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort05]> {
- let ResourceCycles = [6, 1, 3];
+ let ReleaseAtCycles = [6, 1, 3];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 10;
}
@@ -830,35 +830,35 @@ def : InstRW<[SPRWriteResGroup31], (instregex "^MOV16o(16|32|64)a$")>;
def : InstRW<[SPRWriteResGroup31], (instrs CLWB)>;
def SPRWriteResGroup32 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
- let ResourceCycles = [5, 2];
+ let ReleaseAtCycles = [5, 2];
let Latency = 6;
let NumMicroOps = 7;
}
def : InstRW<[SPRWriteResGroup32], (instregex "^CMPS(B|L|Q|W)$")>;
def SPRWriteResGroup33 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [2, 7, 6, 2, 1, 1, 2, 1];
+ let ReleaseAtCycles = [2, 7, 6, 2, 1, 1, 2, 1];
let Latency = 32;
let NumMicroOps = 22;
}
def : InstRW<[SPRWriteResGroup33], (instrs CMPXCHG16B)>;
def SPRWriteResGroup34 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [4, 7, 2, 1, 1, 1];
+ let ReleaseAtCycles = [4, 7, 2, 1, 1, 1];
let Latency = 25;
let NumMicroOps = 16;
}
def : InstRW<[SPRWriteResGroup34], (instrs CMPXCHG8B)>;
def SPRWriteResGroup35 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [1, 2, 1, 1, 1];
+ let ReleaseAtCycles = [1, 2, 1, 1, 1];
let Latency = 13;
let NumMicroOps = 6;
}
def : InstRW<[SPRWriteResGroup35], (instrs CMPXCHG8rm)>;
def SPRWriteResGroup36 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [2, 1, 10, 6, 1, 5, 1];
+ let ReleaseAtCycles = [2, 1, 10, 6, 1, 5, 1];
let Latency = 18;
let NumMicroOps = 26;
}
@@ -901,7 +901,7 @@ def : InstRW<[SPRWriteResGroup39, ReadDefault, ReadInt2Fpu], (instregex "^(V?)CV
def : InstRW<[SPRWriteResGroup39, ReadDefault, ReadInt2Fpu], (instrs VCVTSI2SSrr)>;
def SPRWriteResGroup40 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 8;
let NumMicroOps = 3;
}
@@ -994,7 +994,7 @@ def : InstRW<[SPRWriteResGroup52], (instregex "^ENQCMD(S?)(16|32|64)$",
def : InstRW<[SPRWriteResGroup52], (instrs PUSHF32)>;
def SPRWriteResGroup53 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [2, 21, 2, 14, 4, 9, 5];
+ let ReleaseAtCycles = [2, 21, 2, 14, 4, 9, 5];
let Latency = 126;
let NumMicroOps = 57;
}
@@ -1040,14 +1040,14 @@ def : InstRW<[SPRWriteResGroup58], (instrs FBSTPm,
VMPTRSTm)>;
def SPRWriteResGroup59 : SchedWriteRes<[SPRPort00_05]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 2;
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup59], (instrs FDECSTP)>;
def SPRWriteResGroup60 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 11;
let NumMicroOps = 3;
}
@@ -1071,21 +1071,21 @@ def SPRWriteResGroup62 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11
def : InstRW<[SPRWriteResGroup62], (instrs FLDCW16m)>;
def SPRWriteResGroup63 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03, SPRPort02_03_11]> {
- let ResourceCycles = [2, 5, 10, 39, 8];
+ let ReleaseAtCycles = [2, 5, 10, 39, 8];
let Latency = 62;
let NumMicroOps = 64;
}
def : InstRW<[SPRWriteResGroup63], (instrs FLDENVm)>;
def SPRWriteResGroup64 : SchedWriteRes<[SPRPort00_01_05_06]> {
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let Latency = 4;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup64], (instrs FNCLEX)>;
def SPRWriteResGroup65 : SchedWriteRes<[SPRPort00_01_05_06, SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [6, 3, 6];
+ let ReleaseAtCycles = [6, 3, 6];
let Latency = 75;
let NumMicroOps = 15;
}
@@ -1110,28 +1110,28 @@ def SPRWriteResGroup68 : SchedWriteRes<[SPRPort00, SPRPort04, SPRPort04_09]> {
def : InstRW<[SPRWriteResGroup68], (instrs FNSTSWm)>;
def SPRWriteResGroup69 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06, SPRPort00_06, SPRPort01, SPRPort04, SPRPort04_09, SPRPort05, SPRPort06]> {
- let ResourceCycles = [9, 11, 21, 1, 30, 11, 16, 1];
+ let ReleaseAtCycles = [9, 11, 21, 1, 30, 11, 16, 1];
let Latency = 106;
let NumMicroOps = 100;
}
def : InstRW<[SPRWriteResGroup69], (instrs FSTENVm)>;
def SPRWriteResGroup70 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort06]> {
- let ResourceCycles = [4, 1, 2, 1, 47, 33, 2];
+ let ReleaseAtCycles = [4, 1, 2, 1, 47, 33, 2];
let Latency = 63;
let NumMicroOps = 90;
}
def : InstRW<[SPRWriteResGroup70], (instrs FXRSTOR)>;
def SPRWriteResGroup71 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort06]> {
- let ResourceCycles = [4, 1, 2, 1, 45, 31, 4];
+ let ReleaseAtCycles = [4, 1, 2, 1, 45, 31, 4];
let Latency = 63;
let NumMicroOps = 88;
}
def : InstRW<[SPRWriteResGroup71], (instrs FXRSTOR64)>;
def SPRWriteResGroup72 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [2, 5, 10, 10, 2, 38, 5, 38];
+ let ReleaseAtCycles = [2, 5, 10, 10, 2, 38, 5, 38];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 110;
}
@@ -1208,41 +1208,41 @@ def : InstRW<[SPRWriteResGroup74], (instrs VCVTSH2SSZrr,
VGF2P8MULBYrr)>;
def SPRWriteResGroup75 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [7, 5, 26, 19, 2, 7, 21];
+ let ReleaseAtCycles = [7, 5, 26, 19, 2, 7, 21];
let Latency = 35;
let NumMicroOps = 87;
}
def : InstRW<[SPRWriteResGroup75], (instrs IN16ri)>;
def SPRWriteResGroup76 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [7, 1, 4, 26, 19, 3, 7, 20];
+ let ReleaseAtCycles = [7, 1, 4, 26, 19, 3, 7, 20];
let Latency = 35;
let NumMicroOps = 87;
}
def : InstRW<[SPRWriteResGroup76], (instrs IN16rr)>;
def SPRWriteResGroup77 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [7, 6, 28, 21, 2, 10, 20];
+ let ReleaseAtCycles = [7, 6, 28, 21, 2, 10, 20];
let Latency = 35;
let NumMicroOps = 94;
}
def : InstRW<[SPRWriteResGroup77], (instrs IN32ri)>;
def SPRWriteResGroup78 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [7, 9, 28, 21, 2, 11, 21];
+ let ReleaseAtCycles = [7, 9, 28, 21, 2, 11, 21];
let NumMicroOps = 99;
}
def : InstRW<[SPRWriteResGroup78], (instrs IN32rr)>;
def SPRWriteResGroup79 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [7, 6, 25, 19, 2, 8, 20];
+ let ReleaseAtCycles = [7, 6, 25, 19, 2, 8, 20];
let Latency = 35;
let NumMicroOps = 87;
}
def : InstRW<[SPRWriteResGroup79], (instrs IN8ri)>;
def SPRWriteResGroup80 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [7, 6, 25, 19, 2, 7, 20];
+ let ReleaseAtCycles = [7, 6, 25, 19, 2, 7, 20];
let Latency = 35;
let NumMicroOps = 86;
}
@@ -1265,28 +1265,28 @@ def : InstRW<[SPRWriteResGroup82], (instrs INC32r_alt,
VBROADCASTI32X2Z128rm)>;
def SPRWriteResGroup83 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [7, 6, 24, 17, 8, 1, 19, 1];
+ let ReleaseAtCycles = [7, 6, 24, 17, 8, 1, 19, 1];
let Latency = 20;
let NumMicroOps = 83;
}
def : InstRW<[SPRWriteResGroup83], (instrs INSB)>;
def SPRWriteResGroup84 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [7, 1, 5, 1, 27, 17, 11, 1, 21, 1];
+ let ReleaseAtCycles = [7, 1, 5, 1, 27, 17, 11, 1, 21, 1];
let Latency = 20;
let NumMicroOps = 92;
}
def : InstRW<[SPRWriteResGroup84], (instrs INSL)>;
def SPRWriteResGroup85 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [7, 1, 4, 1, 25, 17, 1, 9, 1, 19, 1];
+ let ReleaseAtCycles = [7, 1, 4, 1, 25, 17, 1, 9, 1, 19, 1];
let Latency = 20;
let NumMicroOps = 86;
}
def : InstRW<[SPRWriteResGroup85], (instrs INSW)>;
def SPRWriteResGroup86 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [5, 4, 8, 6, 2, 5, 7, 5];
+ let ReleaseAtCycles = [5, 4, 8, 6, 2, 5, 7, 5];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 42;
}
@@ -1393,35 +1393,35 @@ def : InstRW<[SPRWriteResGroup96], (instregex "^K((OR)?)TEST(B|D|Q|W)rr$",
def : InstRW<[SPRWriteResGroup96], (instrs VMOVSDto64Zrr)>;
def SPRWriteResGroup97 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [8, 2, 14, 3, 1];
+ let ReleaseAtCycles = [8, 2, 14, 3, 1];
let Latency = 198;
let NumMicroOps = 81;
}
def : InstRW<[SPRWriteResGroup97], (instrs LAR16rm)>;
def SPRWriteResGroup98 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 3, 1, 8, 5, 1, 2, 1];
+ let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1];
let Latency = 66;
let NumMicroOps = 22;
}
def : InstRW<[SPRWriteResGroup98], (instrs LAR16rr)>;
def SPRWriteResGroup99 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 2, 2, 9, 5, 3, 1];
+ let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1];
let Latency = 71;
let NumMicroOps = 85;
}
def : InstRW<[SPRWriteResGroup99], (instrs LAR32rm)>;
def SPRWriteResGroup100 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 3, 1, 8, 5, 1, 2, 1];
+ let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1];
let Latency = 65;
let NumMicroOps = 22;
}
def : InstRW<[SPRWriteResGroup100], (instregex "^LAR(32|64)rr$")>;
def SPRWriteResGroup101 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 2, 2, 9, 5, 3, 1];
+ let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1];
let Latency = 71;
let NumMicroOps = 87;
}
@@ -1434,7 +1434,7 @@ def SPRWriteResGroup102 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> {
def : InstRW<[SPRWriteResGroup102], (instrs LEA16r)>;
def SPRWriteResGroup103 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 6;
let NumMicroOps = 4;
}
@@ -1443,77 +1443,77 @@ def : InstRW<[SPRWriteResGroup103], (instregex "^LODS(B|W)$",
def : InstRW<[SPRWriteResGroup103], (instrs LEAVE)>;
def SPRWriteResGroup104 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 6;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup104], (instrs LEAVE64)>;
def SPRWriteResGroup105 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [1, 2, 4, 3, 2, 1, 1];
+ let ReleaseAtCycles = [1, 2, 4, 3, 2, 1, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 14;
}
def : InstRW<[SPRWriteResGroup105], (instrs LGDT64m)>;
def SPRWriteResGroup106 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [1, 1, 5, 3, 2, 1, 1];
+ let ReleaseAtCycles = [1, 1, 5, 3, 2, 1, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 14;
}
def : InstRW<[SPRWriteResGroup106], (instrs LIDT64m)>;
def SPRWriteResGroup107 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [5, 3, 2, 1, 1];
+ let ReleaseAtCycles = [5, 3, 2, 1, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 12;
}
def : InstRW<[SPRWriteResGroup107], (instrs LLDT16m)>;
def SPRWriteResGroup108 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [1, 4, 3, 1, 1, 1];
+ let ReleaseAtCycles = [1, 4, 3, 1, 1, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 11;
}
def : InstRW<[SPRWriteResGroup108], (instrs LLDT16r)>;
def SPRWriteResGroup109 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 1, 2, 8, 3, 1, 2, 7, 2];
+ let ReleaseAtCycles = [1, 1, 2, 8, 3, 1, 2, 7, 2];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 27;
}
def : InstRW<[SPRWriteResGroup109], (instrs LMSW16m)>;
def SPRWriteResGroup110 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [5, 7, 1, 2, 5, 2];
+ let ReleaseAtCycles = [5, 7, 1, 2, 5, 2];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 22;
}
def : InstRW<[SPRWriteResGroup110], (instrs LMSW16r)>;
def SPRWriteResGroup111 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 5;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup111], (instregex "^LODS(L|Q)$")>;
def SPRWriteResGroup112 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [2, 4, 1];
+ let ReleaseAtCycles = [2, 4, 1];
let Latency = 3;
let NumMicroOps = 7;
}
def : InstRW<[SPRWriteResGroup112], (instrs LOOP)>;
def SPRWriteResGroup113 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [4, 6, 1];
+ let ReleaseAtCycles = [4, 6, 1];
let Latency = 3;
let NumMicroOps = 11;
}
def : InstRW<[SPRWriteResGroup113], (instrs LOOPE)>;
def SPRWriteResGroup114 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [4, 6, 1];
+ let ReleaseAtCycles = [4, 6, 1];
let Latency = 2;
let NumMicroOps = 11;
}
@@ -1526,14 +1526,14 @@ def SPRWriteResGroup115 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort0
def : InstRW<[SPRWriteResGroup115], (instrs LRET64)>;
def SPRWriteResGroup116 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 5, 3, 3, 1];
+ let ReleaseAtCycles = [1, 5, 3, 3, 1];
let Latency = 70;
let NumMicroOps = 13;
}
def : InstRW<[SPRWriteResGroup116], (instregex "^LSL(16|32|64)rm$")>;
def SPRWriteResGroup117 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 4, 4, 3, 2, 1];
+ let ReleaseAtCycles = [1, 4, 4, 3, 2, 1];
let Latency = 63;
let NumMicroOps = 15;
}
@@ -1582,7 +1582,7 @@ def SPRWriteResGroup123 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
def : InstRW<[SPRWriteResGroup123], (instregex "^MMX_CVT(T?)PS2PIrr$")>;
def SPRWriteResGroup124 : SchedWriteRes<[SPRPort00, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 12;
let NumMicroOps = 4;
}
@@ -1599,7 +1599,7 @@ def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_11]> {
let Latency = 8;
}
def : InstRW<[SPRWriteResGroup126], (instregex "^MMX_MOV(D|Q)64rm$",
- "^VBROADCAST(F|I)128$",
+ "^VBROADCAST(F|I)128rm$",
"^VBROADCAST(F|I)32X(2|4)Z256rm$",
"^VBROADCAST(F|I)32X(8|2Z)rm$",
"^VBROADCAST(F|I)(32|64)X4rm$",
@@ -1627,7 +1627,7 @@ def SPRWriteResGroup128 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
def : InstRW<[SPRWriteResGroup128], (instregex "^MMX_MOVQ2(DQ|FR64)rr$")>;
def SPRWriteResGroup129 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 12;
let NumMicroOps = 3;
}
@@ -1635,7 +1635,7 @@ def : InstRW<[SPRWriteResGroup129, ReadAfterVecLd], (instregex "^MMX_PACKSS(DW|W
def : InstRW<[SPRWriteResGroup129, ReadAfterVecLd], (instrs MMX_PACKUSWBrm)>;
def SPRWriteResGroup130 : SchedWriteRes<[SPRPort05]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 4;
let NumMicroOps = 2;
}
@@ -1671,14 +1671,14 @@ def : InstRW<[SPRWriteResGroup131, ReadAfterVecYLd], (instregex "^VINSERT(F|I)(3
"^VPTERNLOG(D|Q)Zrmi((kz)?)$")>;
def SPRWriteResGroup132 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 11;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup132, ReadAfterVecLd], (instregex "^MMX_PH(ADD|SUB)SWrm$")>;
def SPRWriteResGroup133 : SchedWriteRes<[SPRPort00, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 3;
let NumMicroOps = 3;
}
@@ -1747,7 +1747,7 @@ def SPRWriteResGroup142 : SchedWriteRes<[SPRPort02_03_11]> {
def : InstRW<[SPRWriteResGroup142], (instrs MOV64ao32)>;
def SPRWriteResGroup143 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 2, 4, 16, 7, 2, 2, 12, 2];
+ let ReleaseAtCycles = [1, 2, 4, 16, 7, 2, 2, 12, 2];
let Latency = 217;
let NumMicroOps = 48;
}
@@ -1766,7 +1766,7 @@ def SPRWriteResGroup145 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SP
def : InstRW<[SPRWriteResGroup145], (instrs MOV64rc)>;
def SPRWriteResGroup146 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort05]> {
- let ResourceCycles = [3, 4, 8, 4, 2, 3];
+ let ReleaseAtCycles = [3, 4, 8, 4, 2, 3];
let Latency = 181;
let NumMicroOps = 24;
}
@@ -1851,7 +1851,7 @@ def SPRWriteResGroup158 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
def : InstRW<[SPRWriteResGroup158], (instrs MOVNTImr)>;
def SPRWriteResGroup159 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [4, 1, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1, 1];
let Latency = 8;
let NumMicroOps = 7;
}
@@ -1872,7 +1872,7 @@ def : InstRW<[SPRWriteResGroup160], (instregex "^(V?)MOVS(D|S)rr((_REV)?)$",
def : InstRW<[SPRWriteResGroup160], (instrs VPBLENDDrri)>;
def SPRWriteResGroup161 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [4, 1, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1, 1];
let Latency = 7;
let NumMicroOps = 7;
}
@@ -1934,70 +1934,70 @@ def : InstRW<[SPRWriteResGroup167], (instregex "^MUL_F(P?)rST0$",
def : InstRW<[SPRWriteResGroup167], (instrs MUL_FST0r)>;
def SPRWriteResGroup168 : SchedWriteRes<[SPRPort00_01_05_06, SPRPort05, SPRPort06]> {
- let ResourceCycles = [7, 1, 2];
+ let ReleaseAtCycles = [7, 1, 2];
let Latency = 20;
let NumMicroOps = 10;
}
def : InstRW<[SPRWriteResGroup168], (instrs MWAITrr)>;
def SPRWriteResGroup169 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [6, 4, 1, 28, 15, 7, 1, 16, 1];
+ let ReleaseAtCycles = [6, 4, 1, 28, 15, 7, 1, 16, 1];
let Latency = 35;
let NumMicroOps = 79;
}
def : InstRW<[SPRWriteResGroup169], (instrs OUT16ir)>;
def SPRWriteResGroup170 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [6, 6, 27, 15, 7, 1, 16, 1];
+ let ReleaseAtCycles = [6, 6, 27, 15, 7, 1, 16, 1];
let Latency = 35;
let NumMicroOps = 79;
}
def : InstRW<[SPRWriteResGroup170], (instrs OUT16rr)>;
def SPRWriteResGroup171 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [6, 4, 1, 30, 15, 9, 1, 18, 1];
+ let ReleaseAtCycles = [6, 4, 1, 30, 15, 9, 1, 18, 1];
let Latency = 35;
let NumMicroOps = 85;
}
def : InstRW<[SPRWriteResGroup171], (instrs OUT32ir)>;
def SPRWriteResGroup172 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [6, 6, 29, 15, 9, 1, 18, 1];
+ let ReleaseAtCycles = [6, 6, 29, 15, 9, 1, 18, 1];
let Latency = 35;
let NumMicroOps = 85;
}
def : InstRW<[SPRWriteResGroup172], (instrs OUT32rr)>;
def SPRWriteResGroup173 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [5, 5, 1, 25, 15, 5, 1, 15, 1];
+ let ReleaseAtCycles = [5, 5, 1, 25, 15, 5, 1, 15, 1];
let Latency = 35;
let NumMicroOps = 73;
}
def : InstRW<[SPRWriteResGroup173], (instrs OUT8ir)>;
def SPRWriteResGroup174 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [5, 5, 26, 15, 5, 1, 15, 1];
+ let ReleaseAtCycles = [5, 5, 26, 15, 5, 1, 15, 1];
let Latency = 35;
let NumMicroOps = 73;
}
def : InstRW<[SPRWriteResGroup174], (instrs OUT8rr)>;
def SPRWriteResGroup175 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [7, 6, 25, 16, 7, 1, 17, 1];
+ let ReleaseAtCycles = [7, 6, 25, 16, 7, 1, 17, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 80;
}
def : InstRW<[SPRWriteResGroup175], (instrs OUTSB)>;
def SPRWriteResGroup176 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [7, 6, 28, 16, 10, 1, 20, 1];
+ let ReleaseAtCycles = [7, 6, 28, 16, 10, 1, 20, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 89;
}
def : InstRW<[SPRWriteResGroup176], (instrs OUTSL)>;
def SPRWriteResGroup177 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [6, 1, 5, 27, 16, 8, 1, 18, 1];
+ let ReleaseAtCycles = [6, 1, 5, 27, 16, 8, 1, 18, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 83;
}
@@ -2055,14 +2055,14 @@ def : InstRW<[SPRWriteResGroup182], (instregex "^(V?)PEXTR(D|Q)mr$",
"^VPMOVQDZ128mr(k?)$")>;
def SPRWriteResGroup183 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 2, 1];
+ let ReleaseAtCycles = [1, 2, 1];
let Latency = 9;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup183, ReadAfterVecXLd], (instregex "^(V?)PH(ADD|SUB)SWrm$")>;
def SPRWriteResGroup184 : SchedWriteRes<[SPRPort00_01, SPRPort01_05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 2;
let NumMicroOps = 3;
}
@@ -2077,14 +2077,14 @@ def : InstRW<[SPRWriteResGroup185], (instregex "^POP(16|32|64)rmm$",
"^PUSH(16|32)rmm$")>;
def SPRWriteResGroup186 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11]> {
- let ResourceCycles = [6, 2, 1, 1];
+ let ReleaseAtCycles = [6, 2, 1, 1];
let Latency = 5;
let NumMicroOps = 10;
}
def : InstRW<[SPRWriteResGroup186], (instrs POPF16)>;
def SPRWriteResGroup187 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 5;
let NumMicroOps = 7;
}
@@ -2097,21 +2097,21 @@ def : InstRW<[SPRWriteResGroup188], (instregex "^PREFETCHT(0|1|2)$")>;
def : InstRW<[SPRWriteResGroup188], (instrs PREFETCHNTA)>;
def SPRWriteResGroup189 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort06]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup189], (instregex "^PTWRITE((64)?)m$")>;
def SPRWriteResGroup190 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort06]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup190], (instrs PTWRITE64r)>;
def SPRWriteResGroup191 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort06]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 4;
}
@@ -2139,49 +2139,49 @@ def SPRWriteResGroup195 : SchedWriteRes<[SPRPort01, SPRPort04_09, SPRPort07_08]>
def : InstRW<[SPRWriteResGroup195], (instregex "^PUSH(F|G)S64$")>;
def SPRWriteResGroup196 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [2, 3, 2];
+ let ReleaseAtCycles = [2, 3, 2];
let Latency = 8;
let NumMicroOps = 7;
}
def : InstRW<[SPRWriteResGroup196], (instregex "^RC(L|R)(16|32|64)rCL$")>;
def SPRWriteResGroup197 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 13;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup197, WriteRMW], (instregex "^RC(L|R)8m(1|i)$")>;
def SPRWriteResGroup198 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [1, 5, 2];
+ let ReleaseAtCycles = [1, 5, 2];
let Latency = 20;
let NumMicroOps = 8;
}
def : InstRW<[SPRWriteResGroup198, WriteRMW], (instrs RCL8mCL)>;
def SPRWriteResGroup199 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [2, 5, 2];
+ let ReleaseAtCycles = [2, 5, 2];
let Latency = 7;
let NumMicroOps = 9;
}
def : InstRW<[SPRWriteResGroup199], (instrs RCL8rCL)>;
def SPRWriteResGroup200 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [2, 4, 3];
+ let ReleaseAtCycles = [2, 4, 3];
let Latency = 20;
let NumMicroOps = 9;
}
def : InstRW<[SPRWriteResGroup200, WriteRMW], (instrs RCR8mCL)>;
def SPRWriteResGroup201 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [3, 4, 3];
+ let ReleaseAtCycles = [3, 4, 3];
let Latency = 9;
let NumMicroOps = 10;
}
def : InstRW<[SPRWriteResGroup201], (instrs RCR8rCL)>;
def SPRWriteResGroup202 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort01_05_10, SPRPort05]> {
- let ResourceCycles = [1, 6, 1, 10, 20, 8, 5, 1, 2];
+ let ReleaseAtCycles = [1, 6, 1, 10, 20, 8, 5, 1, 2];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 54;
}
@@ -2199,49 +2199,49 @@ def SPRWriteResGroup204 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SP
def : InstRW<[SPRWriteResGroup204], (instrs RDPKRUr)>;
def SPRWriteResGroup205 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> {
- let ResourceCycles = [9, 6, 2, 1];
+ let ReleaseAtCycles = [9, 6, 2, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 18;
}
def : InstRW<[SPRWriteResGroup205], (instrs RDPMC)>;
def SPRWriteResGroup206 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 3, 2, 5, 7, 3, 1, 2];
+ let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2];
let Latency = 1386;
let NumMicroOps = 25;
}
def : InstRW<[SPRWriteResGroup206], (instrs RDRAND16r)>;
def SPRWriteResGroup207 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 3, 2, 5, 7, 3, 1, 2];
+ let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 25;
}
def : InstRW<[SPRWriteResGroup207], (instregex "^RDRAND(32|64)r$")>;
def SPRWriteResGroup208 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 3, 3, 5, 7, 1, 4];
+ let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4];
let Latency = 1381;
let NumMicroOps = 25;
}
def : InstRW<[SPRWriteResGroup208], (instrs RDSEED16r)>;
def SPRWriteResGroup209 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 3, 3, 5, 7, 1, 4];
+ let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 25;
}
def : InstRW<[SPRWriteResGroup209], (instregex "^RDSEED(32|64)r$")>;
def SPRWriteResGroup210 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> {
- let ResourceCycles = [5, 6, 3, 1];
+ let ReleaseAtCycles = [5, 6, 3, 1];
let Latency = 18;
let NumMicroOps = 15;
}
def : InstRW<[SPRWriteResGroup210], (instrs RDTSC)>;
def SPRWriteResGroup211 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort05]> {
- let ResourceCycles = [2, 2, 1, 2, 7, 4, 3];
+ let ReleaseAtCycles = [2, 2, 1, 2, 7, 4, 3];
let Latency = 42;
let NumMicroOps = 21;
}
@@ -2254,7 +2254,7 @@ def SPRWriteResGroup212 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> {
def : InstRW<[SPRWriteResGroup212], (instrs RET64)>;
def SPRWriteResGroup213 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 6;
let NumMicroOps = 3;
}
@@ -2264,20 +2264,20 @@ def SPRWriteResGroup214 : SchedWriteRes<[]>;
def : InstRW<[SPRWriteResGroup214], (instrs REX64_PREFIX)>;
def SPRWriteResGroup215 : SchedWriteRes<[SPRPort00_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 12;
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup215, WriteRMW], (instregex "^RO(L|R)(16|32|64)m(1|i|CL)$")>;
def SPRWriteResGroup216 : SchedWriteRes<[SPRPort00_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup216], (instregex "^RO(L|R)(8|16|32|64)r(1|i)$")>;
def SPRWriteResGroup217 : SchedWriteRes<[SPRPort00_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 13;
let NumMicroOps = 2;
}
@@ -2286,7 +2286,7 @@ def : InstRW<[SPRWriteResGroup217, WriteRMW], (instregex "^RO(L|R)8m(1|i)$",
"^(RO|SA|SH)R8mCL$")>;
def SPRWriteResGroup218 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 15;
let NumMicroOps = 3;
}
@@ -2299,7 +2299,7 @@ def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S
"^VRNDSCALES(D|S)Zm_Int((k|kz)?)$")>;
def SPRWriteResGroup219 : SchedWriteRes<[SPRPort00_01]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 8;
let NumMicroOps = 2;
}
@@ -2312,7 +2312,7 @@ def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)r$",
"^VROUNDP(D|S)Yr$")>;
def SPRWriteResGroup220 : SchedWriteRes<[SPRPort00_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 4;
let NumMicroOps = 2;
}
@@ -2338,7 +2338,7 @@ def : InstRW<[SPRWriteResGroup223], (instregex "^S(A|H)RX(32|64)rr$",
"^SHLX(32|64)rr$")>;
def SPRWriteResGroup224 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [2, 2, 1, 1, 1];
+ let ReleaseAtCycles = [2, 2, 1, 1, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 7;
}
@@ -2351,7 +2351,7 @@ def SPRWriteResGroup225 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
def : InstRW<[SPRWriteResGroup225], (instrs SFENCE)>;
def SPRWriteResGroup226 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [1, 2, 2, 2];
+ let ReleaseAtCycles = [1, 2, 2, 2];
let Latency = 21;
let NumMicroOps = 7;
}
@@ -2370,14 +2370,14 @@ def SPRWriteResGroup228 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
def : InstRW<[SPRWriteResGroup228], (instrs SHA1MSG1rr)>;
def SPRWriteResGroup229 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 2, 1, 2, 1];
+ let ReleaseAtCycles = [2, 2, 1, 2, 1];
let Latency = 13;
let NumMicroOps = 8;
}
def : InstRW<[SPRWriteResGroup229, ReadAfterVecXLd], (instrs SHA1MSG2rm)>;
def SPRWriteResGroup230 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05]> {
- let ResourceCycles = [2, 2, 1, 2];
+ let ReleaseAtCycles = [2, 2, 1, 2];
let Latency = 6;
let NumMicroOps = 7;
}
@@ -2428,21 +2428,21 @@ def : InstRW<[SPRWriteResGroup234], (instrs SHA1RNDS4rri,
SHA256RNDS2rr)>;
def SPRWriteResGroup235 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [3, 2, 1, 1, 1];
+ let ReleaseAtCycles = [3, 2, 1, 1, 1];
let Latency = 12;
let NumMicroOps = 8;
}
def : InstRW<[SPRWriteResGroup235, ReadAfterVecXLd], (instrs SHA256MSG1rm)>;
def SPRWriteResGroup236 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort05]> {
- let ResourceCycles = [3, 2, 1, 1];
+ let ReleaseAtCycles = [3, 2, 1, 1];
let Latency = 5;
let NumMicroOps = 7;
}
def : InstRW<[SPRWriteResGroup236], (instrs SHA256MSG1rr)>;
def SPRWriteResGroup237 : SchedWriteRes<[SPRPort05]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 6;
let NumMicroOps = 2;
}
@@ -2489,21 +2489,21 @@ def SPRWriteResGroup243 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
def : InstRW<[SPRWriteResGroup243], (instrs STD)>;
def SPRWriteResGroup244 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [1, 4, 1];
+ let ReleaseAtCycles = [1, 4, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 6;
}
def : InstRW<[SPRWriteResGroup244], (instrs STI)>;
def SPRWriteResGroup245 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 8;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup245], (instrs STOSB)>;
def SPRWriteResGroup246 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 7;
let NumMicroOps = 4;
}
@@ -2669,7 +2669,7 @@ def : InstRW<[SPRWriteResGroup258, ReadAfterVecXLd], (instregex "^VPALIGNRZ128rm
def : InstRW<[SPRWriteResGroup258, ReadAfterVecXLd], (instrs VPCLMULQDQZ256rm)>;
def SPRWriteResGroup259 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 10;
let NumMicroOps = 4;
}
@@ -2677,7 +2677,7 @@ def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault
def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBYrm)>;
def SPRWriteResGroup260 : SchedWriteRes<[SPRPort00_01_05]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let Latency = 3;
let NumMicroOps = 3;
}
@@ -2686,7 +2686,7 @@ def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rr$",
"^VPBLENDVB(Y?)rr$")>;
def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 9;
let NumMicroOps = 4;
}
@@ -2750,7 +2750,7 @@ def SPRWriteResGroup264 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
def : InstRW<[SPRWriteResGroup264, ReadAfterVecLd], (instregex "^V(U?)COMISHZrm((_Int)?)$")>;
def SPRWriteResGroup265 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 2, 1];
+ let ReleaseAtCycles = [1, 2, 1];
let Latency = 12;
let NumMicroOps = 4;
}
@@ -2764,7 +2764,7 @@ def : InstRW<[SPRWriteResGroup265], (instregex "^VCOMPRESSP(D|S)Z(128|256)mr$",
"^VPMOVUS(Q|W)BZmr$")>;
def SPRWriteResGroup266 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 2, 1];
+ let ReleaseAtCycles = [1, 2, 1];
let Latency = 15;
let NumMicroOps = 4;
}
@@ -2778,7 +2778,7 @@ def : InstRW<[SPRWriteResGroup266], (instregex "^VCOMPRESSP(D|S)Z(128|256)mrk$",
"^VPMOVUS(Q|W)BZmrk$")>;
def SPRWriteResGroup267 : SchedWriteRes<[SPRPort05]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 3;
let NumMicroOps = 2;
}
@@ -2855,14 +2855,14 @@ def SPRWriteResGroup276 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]>
def : InstRW<[SPRWriteResGroup276], (instregex "^VCVT(U?)DQ2PHZ256rrk(z?)$")>;
def SPRWriteResGroup277 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 17;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup277], (instregex "^VCVT(U?)DQ2PHZrm(b?)$")>;
def SPRWriteResGroup278 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 21;
let NumMicroOps = 4;
}
@@ -2870,14 +2870,14 @@ def : InstRW<[SPRWriteResGroup278], (instregex "^VCVT(U?)DQ2PHZrm(bk|kz)$",
"^VCVT(U?)DQ2PHZrm(k|bkz)$")>;
def SPRWriteResGroup279 : SchedWriteRes<[SPRPort00, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 9;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup279], (instregex "^VCVT(U?)DQ2PHZrr(b?)$")>;
def SPRWriteResGroup280 : SchedWriteRes<[SPRPort00, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 14;
let NumMicroOps = 3;
}
@@ -2885,14 +2885,14 @@ def : InstRW<[SPRWriteResGroup280], (instregex "^VCVT(U?)DQ2PHZrr(bk|kz)$",
"^VCVT(U?)DQ2PHZrr(k|bkz)$")>;
def SPRWriteResGroup281 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1, 1];
let Latency = 15;
let NumMicroOps = 5;
}
def : InstRW<[SPRWriteResGroup281, ReadAfterVecXLd], (instregex "^VCVTNE2PS2BF16Z128rm(b?)$")>;
def SPRWriteResGroup282 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1, 1];
let Latency = 17;
let NumMicroOps = 5;
}
@@ -2900,28 +2900,28 @@ def : InstRW<[SPRWriteResGroup282, ReadAfterVecXLd], (instregex "^VCVTNE2PS2BF16
"^VCVTNE2PS2BF16Z128rm(k|bkz)$")>;
def SPRWriteResGroup283 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 8;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup283], (instregex "^VCVTNE2PS2BF16Z(128|256)rr$")>;
def SPRWriteResGroup284 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 10;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup284], (instregex "^VCVTNE2PS2BF16Z(128|256)rrk(z?)$")>;
def SPRWriteResGroup285 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1, 1];
let Latency = 16;
let NumMicroOps = 5;
}
def : InstRW<[SPRWriteResGroup285, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16Z256rm(b?)$")>;
def SPRWriteResGroup286 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1, 1];
let Latency = 18;
let NumMicroOps = 5;
}
@@ -2929,7 +2929,7 @@ def : InstRW<[SPRWriteResGroup286, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16
"^VCVTNE2PS2BF16Z256rm(k|bkz)$")>;
def SPRWriteResGroup287 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 2];
+ let ReleaseAtCycles = [2, 1, 2];
let Latency = 16;
let NumMicroOps = 5;
}
@@ -2938,7 +2938,7 @@ def : InstRW<[SPRWriteResGroup287, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16
def : InstRW<[SPRWriteResGroup287, ReadAfterVecYLd], (instrs VDPBF16PSZmbkz)>;
def SPRWriteResGroup288 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 2];
+ let ReleaseAtCycles = [2, 1, 2];
let Latency = 18;
let NumMicroOps = 5;
}
@@ -2946,7 +2946,7 @@ def : InstRW<[SPRWriteResGroup288, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16
"^VCVTNE2PS2BF16Zrm(k|bkz)$")>;
def SPRWriteResGroup289 : SchedWriteRes<[SPRPort00, SPRPort05]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = 8;
let NumMicroOps = 4;
}
@@ -2954,7 +2954,7 @@ def : InstRW<[SPRWriteResGroup289], (instregex "^VDPBF16PSZr((k|kz)?)$")>;
def : InstRW<[SPRWriteResGroup289], (instrs VCVTNE2PS2BF16Zrr)>;
def SPRWriteResGroup290 : SchedWriteRes<[SPRPort00, SPRPort05]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = 10;
let NumMicroOps = 4;
}
@@ -2986,14 +2986,14 @@ def : InstRW<[SPRWriteResGroup294], (instregex "^VCVTNEPS2BF16Z256rm(bk|kz)$",
"^VCVTNEPS2BF16Z256rm(k|bkz)$")>;
def SPRWriteResGroup295 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 16;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup295], (instregex "^VCVTNEPS2BF16Zrm(b?)$")>;
def SPRWriteResGroup296 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 18;
let NumMicroOps = 4;
}
@@ -3001,14 +3001,14 @@ def : InstRW<[SPRWriteResGroup296], (instregex "^VCVTNEPS2BF16Zrm(bk|kz)$",
"^VCVTNEPS2BF16Zrm(k|bkz)$")>;
def SPRWriteResGroup297 : SchedWriteRes<[SPRPort00, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 8;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup297], (instrs VCVTNEPS2BF16Zrr)>;
def SPRWriteResGroup298 : SchedWriteRes<[SPRPort00, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 10;
let NumMicroOps = 3;
}
@@ -3040,14 +3040,14 @@ def : InstRW<[SPRWriteResGroup300], (instregex "^VCVT(T?)P(D|H)2(U?)DQZrm(b?)$",
"^VCVT(U?)QQ2PSZrmbkz$")>;
def SPRWriteResGroup301 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 1, 2];
let Latency = 19;
let NumMicroOps = 7;
}
def : InstRW<[SPRWriteResGroup301], (instregex "^VCVTPD2PHZ128rm(b?)$")>;
def SPRWriteResGroup302 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 1, 2];
let Latency = 22;
let NumMicroOps = 7;
}
@@ -3055,28 +3055,28 @@ def : InstRW<[SPRWriteResGroup302], (instregex "^VCVTPD2PHZ128rm(bk|kz)$",
"^VCVTPD2PHZ128rm(k|bkz)$")>;
def SPRWriteResGroup303 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [2, 1, 2];
+ let ReleaseAtCycles = [2, 1, 2];
let Latency = 12;
let NumMicroOps = 5;
}
def : InstRW<[SPRWriteResGroup303], (instrs VCVTPD2PHZ128rr)>;
def SPRWriteResGroup304 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [2, 1, 2];
+ let ReleaseAtCycles = [2, 1, 2];
let Latency = 15;
let NumMicroOps = 5;
}
def : InstRW<[SPRWriteResGroup304], (instregex "^VCVTPD2PHZ128rrk(z?)$")>;
def SPRWriteResGroup305 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 2];
let Latency = 21;
let NumMicroOps = 6;
}
def : InstRW<[SPRWriteResGroup305], (instregex "^VCVTPD2PHZ256rm(b?)$")>;
def SPRWriteResGroup306 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 2];
let Latency = 24;
let NumMicroOps = 6;
}
@@ -3084,28 +3084,28 @@ def : InstRW<[SPRWriteResGroup306], (instregex "^VCVTPD2PHZ256rm(bk|kz)$",
"^VCVTPD2PHZ256rm(k|bkz)$")>;
def SPRWriteResGroup307 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = 13;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup307], (instrs VCVTPD2PHZ256rr)>;
def SPRWriteResGroup308 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = 16;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup308], (instregex "^VCVTPD2PHZ256rrk(z?)$")>;
def SPRWriteResGroup309 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 2];
let Latency = 23;
let NumMicroOps = 6;
}
def : InstRW<[SPRWriteResGroup309], (instregex "^VCVTP(D2PH|H2PD)Zrm(b?)$")>;
def SPRWriteResGroup310 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 2];
let Latency = 26;
let NumMicroOps = 6;
}
@@ -3113,14 +3113,14 @@ def : InstRW<[SPRWriteResGroup310], (instregex "^VCVTP(D2PH|H2PD)Zrm(bk|kz)$",
"^VCVTP(D2PH|H2PD)Zrm(k|bkz)$")>;
def SPRWriteResGroup311 : SchedWriteRes<[SPRPort00, SPRPort05]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = 15;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup311], (instregex "^VCVTP(D2PH|H2PD)Zrr(b?)$")>;
def SPRWriteResGroup312 : SchedWriteRes<[SPRPort00, SPRPort05]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = 18;
let NumMicroOps = 4;
}
@@ -3227,14 +3227,14 @@ def : InstRW<[SPRWriteResGroup321], (instregex "^VCVT(T?)PH2(U?)DQZrr(bk|kz)$",
"^VCVTP(H2PS|S2PH)XZrr(k|bkz)$")>;
def SPRWriteResGroup322 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 1, 2];
let Latency = 23;
let NumMicroOps = 7;
}
def : InstRW<[SPRWriteResGroup322], (instregex "^VCVTPH2PDZ128rm(b?)$")>;
def SPRWriteResGroup323 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 1, 2];
let Latency = 26;
let NumMicroOps = 7;
}
@@ -3242,28 +3242,28 @@ def : InstRW<[SPRWriteResGroup323], (instregex "^VCVTPH2PDZ128rm(bk|kz)$",
"^VCVTPH2PDZ128rm(k|bkz)$")>;
def SPRWriteResGroup324 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 2];
let Latency = 16;
let NumMicroOps = 6;
}
def : InstRW<[SPRWriteResGroup324], (instrs VCVTPH2PDZ128rr)>;
def SPRWriteResGroup325 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort05]> {
- let ResourceCycles = [2, 1, 1, 2];
+ let ReleaseAtCycles = [2, 1, 1, 2];
let Latency = 19;
let NumMicroOps = 6;
}
def : InstRW<[SPRWriteResGroup325], (instregex "^VCVTPH2PDZ128rrk(z?)$")>;
def SPRWriteResGroup326 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 2];
+ let ReleaseAtCycles = [2, 1, 2];
let Latency = 22;
let NumMicroOps = 5;
}
def : InstRW<[SPRWriteResGroup326], (instregex "^VCVTPH2PDZ256rm(b?)$")>;
def SPRWriteResGroup327 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 2];
+ let ReleaseAtCycles = [2, 1, 2];
let Latency = 25;
let NumMicroOps = 5;
}
@@ -3271,14 +3271,14 @@ def : InstRW<[SPRWriteResGroup327], (instregex "^VCVTPH2PDZ256rm(bk|kz)$",
"^VCVTPH2PDZ256rm(k|bkz)$")>;
def SPRWriteResGroup328 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = 15;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup328], (instrs VCVTPH2PDZ256rr)>;
def SPRWriteResGroup329 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = 18;
let NumMicroOps = 4;
}
@@ -3313,7 +3313,7 @@ def : InstRW<[SPRWriteResGroup332], (instrs VCVTPH2PSZrm)>;
def : InstRW<[SPRWriteResGroup332, ReadAfterVecYLd], (instregex "^VPERMWZrmk(z?)$")>;
def SPRWriteResGroup333 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 2, 1, 1, 1];
+ let ReleaseAtCycles = [1, 2, 1, 1, 1];
let Latency = 17;
let NumMicroOps = 6;
}
@@ -3321,14 +3321,14 @@ def : InstRW<[SPRWriteResGroup333], (instregex "^VCVT(T?)PH2(U?)QQZ128rm((b|k|bk
"^VCVT(T?)PH2(U?)QQZ128rmbkz$")>;
def SPRWriteResGroup334 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [1, 2, 1];
+ let ReleaseAtCycles = [1, 2, 1];
let Latency = 10;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup334], (instregex "^VCVT(T?)PH2(U?)QQZ(128|256)rr((k|kz)?)$")>;
def SPRWriteResGroup335 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 2, 1, 1, 1];
+ let ReleaseAtCycles = [1, 2, 1, 1, 1];
let Latency = 18;
let NumMicroOps = 6;
}
@@ -3414,14 +3414,14 @@ def SPRWriteResGroup347 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0
def : InstRW<[SPRWriteResGroup347], (instregex "^VCVT(U?)QQ2PHZ256rrk(z?)$")>;
def SPRWriteResGroup348 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 1, 2];
let Latency = 18;
let NumMicroOps = 5;
}
def : InstRW<[SPRWriteResGroup348], (instregex "^VCVT(U?)QQ2PHZrm(b?)$")>;
def SPRWriteResGroup349 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 1, 2];
let Latency = 20;
let NumMicroOps = 5;
}
@@ -3429,14 +3429,14 @@ def : InstRW<[SPRWriteResGroup349], (instregex "^VCVT(U?)QQ2PHZrm(bk|kz)$",
"^VCVT(U?)QQ2PHZrm(k|bkz)$")>;
def SPRWriteResGroup350 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 10;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup350], (instregex "^VCVT(U?)QQ2PHZrr(b?)$")>;
def SPRWriteResGroup351 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 12;
let NumMicroOps = 4;
}
@@ -3444,21 +3444,21 @@ def : InstRW<[SPRWriteResGroup351], (instregex "^VCVT(U?)QQ2PHZrr(bk|kz)$",
"^VCVT(U?)QQ2PHZrr(k|bkz)$")>;
def SPRWriteResGroup352 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 2, 1, 1, 1];
+ let ReleaseAtCycles = [2, 2, 1, 1, 1];
let Latency = 18;
let NumMicroOps = 7;
}
def : InstRW<[SPRWriteResGroup352, ReadAfterVecLd], (instregex "^VCVTSD2SHZrm((_Int)?)$")>;
def SPRWriteResGroup353 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 2, 1, 1, 1];
+ let ReleaseAtCycles = [2, 2, 1, 1, 1];
let Latency = 21;
let NumMicroOps = 7;
}
def : InstRW<[SPRWriteResGroup353, ReadAfterVecLd], (instregex "^VCVTSD2SHZrm_Intk(z?)$")>;
def SPRWriteResGroup354 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 11;
let NumMicroOps = 4;
}
@@ -3466,28 +3466,28 @@ def : InstRW<[SPRWriteResGroup354], (instregex "^VCVTSD2SHZrr(b?)_Int$")>;
def : InstRW<[SPRWriteResGroup354], (instrs VCVTSD2SHZrr)>;
def SPRWriteResGroup355 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 14;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup355], (instregex "^VCVTSD2SHZrr(b?)_Intk(z?)$")>;
def SPRWriteResGroup356 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 18;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup356, ReadAfterVecLd], (instregex "^VCVTSH2SDZrm((_Int)?)$")>;
def SPRWriteResGroup357 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 20;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup357, ReadAfterVecLd], (instregex "^VCVTSH2SDZrm_Intk(z?)$")>;
def SPRWriteResGroup358 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 10;
let NumMicroOps = 3;
}
@@ -3495,7 +3495,7 @@ def : InstRW<[SPRWriteResGroup358], (instregex "^VCVTSH2SDZrr(b?)_Int$")>;
def : InstRW<[SPRWriteResGroup358], (instrs VCVTSH2SDZrr)>;
def SPRWriteResGroup359 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 13;
let NumMicroOps = 3;
}
@@ -3568,14 +3568,14 @@ def : InstRW<[SPRWriteResGroup367], (instregex "^VDBPSADBWZ(128|256)rrik(z?)$",
"^VPOPCNT(B|W)Zrrk(z?)$")>;
def SPRWriteResGroup368 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 36;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup368, ReadAfterVecXLd], (instregex "^VDIVPHZ128rm(b?)$")>;
def SPRWriteResGroup369 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 38;
let NumMicroOps = 4;
}
@@ -3583,14 +3583,14 @@ def : InstRW<[SPRWriteResGroup369, ReadAfterVecXLd], (instregex "^VDIVPHZ128rm(b
"^VDIVPHZ128rm(k|bkz)$")>;
def SPRWriteResGroup370 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 31;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup370], (instregex "^VDIVPHZ(128|256)rr$")>;
def SPRWriteResGroup371 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 33;
let NumMicroOps = 3;
}
@@ -3599,14 +3599,14 @@ def : InstRW<[SPRWriteResGroup371], (instregex "^VDIVPHZ(128|256)rrk$",
def : InstRW<[SPRWriteResGroup371], (instrs VDIVPHZ128rrkz)>;
def SPRWriteResGroup372 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 37;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup372, ReadAfterVecYLd], (instregex "^VDIVPHZ256rm(b?)$")>;
def SPRWriteResGroup373 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 39;
let NumMicroOps = 4;
}
@@ -3615,21 +3615,21 @@ def : InstRW<[SPRWriteResGroup373, ReadAfterVecYLd], (instregex "^VDIVPHZ256rm(b
def : InstRW<[SPRWriteResGroup373, ReadAfterVecXLd], (instregex "^VSQRTPHZ128m(b?)$")>;
def SPRWriteResGroup374 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 11;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup374], (instrs VDIVPHZ256rrkz)>;
def SPRWriteResGroup375 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [4, 2, 1, 1, 1];
+ let ReleaseAtCycles = [4, 2, 1, 1, 1];
let Latency = 49;
let NumMicroOps = 9;
}
def : InstRW<[SPRWriteResGroup375, ReadAfterVecYLd], (instregex "^VDIVPHZrm(b?)$")>;
def SPRWriteResGroup376 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [4, 2, 1, 1, 1];
+ let ReleaseAtCycles = [4, 2, 1, 1, 1];
let Latency = 51;
let NumMicroOps = 9;
}
@@ -3637,14 +3637,14 @@ def : InstRW<[SPRWriteResGroup376, ReadAfterVecYLd], (instregex "^VDIVPHZrm(bk|k
"^VDIVPHZrm(k|bkz)$")>;
def SPRWriteResGroup377 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort05]> {
- let ResourceCycles = [4, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1];
let Latency = 41;
let NumMicroOps = 6;
}
def : InstRW<[SPRWriteResGroup377], (instregex "^VDIVPHZrr(b?)$")>;
def SPRWriteResGroup378 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort05]> {
- let ResourceCycles = [4, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1];
let Latency = 43;
let NumMicroOps = 6;
}
@@ -3652,7 +3652,7 @@ def : InstRW<[SPRWriteResGroup378], (instregex "^VDIVPHZrr(bk|kz)$",
"^VDIVPHZrr(k|bkz)$")>;
def SPRWriteResGroup379 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 17;
let NumMicroOps = 3;
}
@@ -3672,7 +3672,7 @@ def : InstRW<[SPRWriteResGroup381], (instrs VDIVSHZrr_Int,
VSQRTSHZr_Int)>;
def SPRWriteResGroup382 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 2];
+ let ReleaseAtCycles = [2, 1, 2];
let Latency = 15;
let NumMicroOps = 5;
}
@@ -3680,14 +3680,14 @@ def : InstRW<[SPRWriteResGroup382, ReadAfterVecXLd], (instregex "^VDPBF16PSZ128m
def : InstRW<[SPRWriteResGroup382, ReadAfterVecXLd], (instrs VDPBF16PSZ128mbkz)>;
def SPRWriteResGroup383 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
- let ResourceCycles = [2, 2];
+ let ReleaseAtCycles = [2, 2];
let Latency = 8;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup383], (instregex "^VDPBF16PSZ(128|256)r((k|kz)?)$")>;
def SPRWriteResGroup384 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [2, 1, 2];
+ let ReleaseAtCycles = [2, 1, 2];
let Latency = 16;
let NumMicroOps = 5;
}
@@ -3695,35 +3695,35 @@ def : InstRW<[SPRWriteResGroup384, ReadAfterVecYLd], (instregex "^VDPBF16PSZ256m
def : InstRW<[SPRWriteResGroup384, ReadAfterVecYLd], (instrs VDPBF16PSZ256mbkz)>;
def SPRWriteResGroup385 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> {
- let ResourceCycles = [6, 7, 18];
+ let ReleaseAtCycles = [6, 7, 18];
let Latency = 81;
let NumMicroOps = 31;
}
def : InstRW<[SPRWriteResGroup385], (instrs VERRm)>;
def SPRWriteResGroup386 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> {
- let ResourceCycles = [6, 7, 17];
+ let ReleaseAtCycles = [6, 7, 17];
let Latency = 74;
let NumMicroOps = 30;
}
def : InstRW<[SPRWriteResGroup386], (instrs VERRr)>;
def SPRWriteResGroup387 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> {
- let ResourceCycles = [5, 8, 21];
+ let ReleaseAtCycles = [5, 8, 21];
let Latency = 81;
let NumMicroOps = 34;
}
def : InstRW<[SPRWriteResGroup387], (instrs VERWm)>;
def SPRWriteResGroup388 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> {
- let ResourceCycles = [5, 8, 20];
+ let ReleaseAtCycles = [5, 8, 20];
let Latency = 74;
let NumMicroOps = 33;
}
def : InstRW<[SPRWriteResGroup388], (instrs VERWr)>;
def SPRWriteResGroup389 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 10;
let NumMicroOps = 3;
}
@@ -3732,7 +3732,7 @@ def : InstRW<[SPRWriteResGroup389, ReadAfterVecYLd], (instregex "^VEXPANDP(D|S)Z
"^VPEXPAND(D|Q)Z128rmk(z?)$")>;
def SPRWriteResGroup390 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 16;
let NumMicroOps = 3;
}
@@ -3751,7 +3751,7 @@ def : InstRW<[SPRWriteResGroup390, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZ2
def : InstRW<[SPRWriteResGroup390, ReadAfterVecLd], (instrs VSCALEFSHZrm)>;
def SPRWriteResGroup391 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 21;
let NumMicroOps = 3;
}
@@ -3765,7 +3765,7 @@ def : InstRW<[SPRWriteResGroup391, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZ2
"^VF(C?)MULCPHZ256rm(k|bkz)$")>;
def SPRWriteResGroup392 : SchedWriteRes<[SPRPort00_01]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 9;
let NumMicroOps = 2;
}
@@ -3781,7 +3781,7 @@ def : InstRW<[SPRWriteResGroup392], (instrs VRNDSCALESHZr,
VSCALEFSHZrrb_Int)>;
def SPRWriteResGroup393 : SchedWriteRes<[SPRPort00_01]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 15;
let NumMicroOps = 2;
}
@@ -3793,7 +3793,7 @@ def : InstRW<[SPRWriteResGroup393], (instregex "^VF(C?)MADDCPHZ(128|256)rk(z?)$"
"^VF(C?)MULCSHZrr(k|bkz)$")>;
def SPRWriteResGroup394 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 16;
let NumMicroOps = 3;
}
@@ -3804,7 +3804,7 @@ def : InstRW<[SPRWriteResGroup394, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZr
"^VSCALEFPHZrm(b?)$")>;
def SPRWriteResGroup395 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 21;
let NumMicroOps = 3;
}
@@ -3814,7 +3814,7 @@ def : InstRW<[SPRWriteResGroup395, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZr
"^VF(C?)MULCPHZrm(k|bkz)$")>;
def SPRWriteResGroup396 : SchedWriteRes<[SPRPort00]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 9;
let NumMicroOps = 2;
}
@@ -3824,7 +3824,7 @@ def : InstRW<[SPRWriteResGroup396], (instregex "^VF(C?)MADDCPHZr(b?)$",
"^VSCALEFPHZrr(b?)$")>;
def SPRWriteResGroup397 : SchedWriteRes<[SPRPort00]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 15;
let NumMicroOps = 2;
}
@@ -3834,7 +3834,7 @@ def : InstRW<[SPRWriteResGroup397], (instregex "^VF(C?)MADDCPHZr(bk|kz)$",
"^VF(C?)MULCPHZrr(k|bkz)$")>;
def SPRWriteResGroup398 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 1, 2, 4];
+ let ReleaseAtCycles = [1, 1, 2, 4];
let Latency = 29;
let NumMicroOps = 8;
}
@@ -3844,7 +3844,7 @@ def : InstRW<[SPRWriteResGroup398, WriteVecMaskedGatherWriteback], (instrs VGATH
VPGATHERQDYrm)>;
def SPRWriteResGroup399 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 20;
let NumMicroOps = 4;
}
@@ -3854,7 +3854,7 @@ def : InstRW<[SPRWriteResGroup399, WriteVecMaskedGatherWriteback], (instrs VGATH
VPGATHERQDZ128rm)>;
def SPRWriteResGroup400 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 2, 4];
+ let ReleaseAtCycles = [1, 2, 4];
let Latency = 28;
let NumMicroOps = 7;
}
@@ -3864,7 +3864,7 @@ def : InstRW<[SPRWriteResGroup400, WriteVecMaskedGatherWriteback], (instrs VGATH
VPGATHERQDZ256rm)>;
def SPRWriteResGroup401 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 8, 2];
+ let ReleaseAtCycles = [1, 8, 2];
let Latency = 28;
let NumMicroOps = 11;
}
@@ -3874,7 +3874,7 @@ def : InstRW<[SPRWriteResGroup401, WriteVecMaskedGatherWriteback], (instrs VGATH
VPGATHERQDZrm)>;
def SPRWriteResGroup402 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 1, 2];
let Latency = 20;
let NumMicroOps = 5;
}
@@ -3884,7 +3884,7 @@ def : InstRW<[SPRWriteResGroup402, WriteVecMaskedGatherWriteback], (instrs VGATH
VPGATHERQDrm)>;
def SPRWriteResGroup403 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 1, 2, 8];
+ let ReleaseAtCycles = [1, 1, 2, 8];
let Latency = 30;
let NumMicroOps = 12;
}
@@ -3892,7 +3892,7 @@ def : InstRW<[SPRWriteResGroup403, WriteVecMaskedGatherWriteback], (instrs VGATH
VPGATHERDDYrm)>;
def SPRWriteResGroup404 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 2, 4];
+ let ReleaseAtCycles = [1, 2, 4];
let Latency = 27;
let NumMicroOps = 7;
}
@@ -3900,7 +3900,7 @@ def : InstRW<[SPRWriteResGroup404, WriteVecMaskedGatherWriteback], (instrs VGATH
VPGATHERDDZ128rm)>;
def SPRWriteResGroup405 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 2, 8];
+ let ReleaseAtCycles = [1, 2, 8];
let Latency = 29;
let NumMicroOps = 11;
}
@@ -3908,7 +3908,7 @@ def : InstRW<[SPRWriteResGroup405, WriteVecMaskedGatherWriteback], (instrs VGATH
VPGATHERDDZ256rm)>;
def SPRWriteResGroup406 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 16, 2];
+ let ReleaseAtCycles = [1, 16, 2];
let Latency = 30;
let NumMicroOps = 19;
}
@@ -3916,7 +3916,7 @@ def : InstRW<[SPRWriteResGroup406, WriteVecMaskedGatherWriteback], (instrs VGATH
VPGATHERDDZrm)>;
def SPRWriteResGroup407 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 1, 2, 4];
+ let ReleaseAtCycles = [1, 1, 2, 4];
let Latency = 28;
let NumMicroOps = 8;
}
@@ -3964,7 +3964,7 @@ def : InstRW<[SPRWriteResGroup413], (instregex "^VGF2P8AFFINE((INV)?)QBZrrikz$")
def : InstRW<[SPRWriteResGroup413], (instrs VGF2P8MULBZrrkz)>;
def SPRWriteResGroup414 : SchedWriteRes<[SPRPort01_05, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 5;
let NumMicroOps = 3;
}
@@ -3977,7 +3977,7 @@ def SPRWriteResGroup415 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_1
def : InstRW<[SPRWriteResGroup415], (instrs VLDMXCSR)>;
def SPRWriteResGroup416 : SchedWriteRes<[SPRPort01, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09, SPRPort05, SPRPort06]> {
- let ResourceCycles = [1, 1, 1, 8, 1, 1, 2, 3];
+ let ReleaseAtCycles = [1, 1, 1, 8, 1, 1, 2, 3];
let Latency = 40;
let NumMicroOps = 18;
}
@@ -4004,7 +4004,7 @@ def : InstRW<[SPRWriteResGroup418], (instregex "^VMOVDQU(8|16)Z(128|256)rrk(z?)(
"^VPMOVM2(B|W)Z(128|256)rr$")>;
def SPRWriteResGroup419 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [1, 2, 2];
+ let ReleaseAtCycles = [1, 2, 2];
let Latency = 12;
let NumMicroOps = 5;
}
@@ -4078,7 +4078,7 @@ def SPRWriteResGroup430 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
def : InstRW<[SPRWriteResGroup430], (instrs VMOVNTPSZmr)>;
def SPRWriteResGroup431 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [3, 1, 8];
+ let ReleaseAtCycles = [3, 1, 8];
let Latency = 10;
let NumMicroOps = 12;
}
@@ -4086,7 +4086,7 @@ def : InstRW<[SPRWriteResGroup431, ReadAfterVecXLd], (instregex "^VP2INTERSECTDZ
def : InstRW<[SPRWriteResGroup431, ReadAfterVecYLd], (instregex "^VP2INTERSECTQZ256rm(b?)$")>;
def SPRWriteResGroup432 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [4, 8];
+ let ReleaseAtCycles = [4, 8];
let Latency = 10;
let NumMicroOps = 12;
}
@@ -4094,56 +4094,56 @@ def : InstRW<[SPRWriteResGroup432], (instrs VP2INTERSECTDZ128rr,
VP2INTERSECTQZ256rr)>;
def SPRWriteResGroup433 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 8, 7, 2, 1, 11];
+ let ReleaseAtCycles = [1, 8, 7, 2, 1, 11];
let Latency = 27;
let NumMicroOps = 30;
}
def : InstRW<[SPRWriteResGroup433, ReadAfterVecYLd], (instregex "^VP2INTERSECTDZ256rm(b?)$")>;
def SPRWriteResGroup434 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort05]> {
- let ResourceCycles = [1, 8, 8, 2, 11];
+ let ReleaseAtCycles = [1, 8, 8, 2, 11];
let Latency = 27;
let NumMicroOps = 30;
}
def : InstRW<[SPRWriteResGroup434], (instrs VP2INTERSECTDZ256rr)>;
def SPRWriteResGroup435 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [13, 9, 1, 23];
+ let ReleaseAtCycles = [13, 9, 1, 23];
let Latency = 40;
let NumMicroOps = 46;
}
def : InstRW<[SPRWriteResGroup435, ReadAfterVecYLd], (instregex "^VP2INTERSECTDZrm(b?)$")>;
def SPRWriteResGroup436 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [13, 10, 23];
+ let ReleaseAtCycles = [13, 10, 23];
let Latency = 40;
let NumMicroOps = 46;
}
def : InstRW<[SPRWriteResGroup436], (instrs VP2INTERSECTDZrr)>;
def SPRWriteResGroup437 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 4];
+ let ReleaseAtCycles = [1, 4];
let Latency = 6;
let NumMicroOps = 5;
}
def : InstRW<[SPRWriteResGroup437, ReadAfterVecXLd], (instregex "^VP2INTERSECTQZ128rm(b?)$")>;
def SPRWriteResGroup438 : SchedWriteRes<[SPRPort05]> {
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let Latency = 6;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup438], (instrs VP2INTERSECTQZ128rr)>;
def SPRWriteResGroup439 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [8, 7, 1, 14];
+ let ReleaseAtCycles = [8, 7, 1, 14];
let Latency = 29;
let NumMicroOps = 30;
}
def : InstRW<[SPRWriteResGroup439, ReadAfterVecYLd], (instregex "^VP2INTERSECTQZrm(b?)$")>;
def SPRWriteResGroup440 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [8, 8, 14];
+ let ReleaseAtCycles = [8, 8, 14];
let Latency = 30;
let NumMicroOps = 30;
}
@@ -4186,7 +4186,7 @@ def : InstRW<[SPRWriteResGroup443], (instregex "^VPBROADCASTM(B2Q|W2D)Z(128|256)
def : InstRW<[SPRWriteResGroup443], (instrs VPSHUFBITQMBZrr)>;
def SPRWriteResGroup444 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 1, 1, 2, 1];
+ let ReleaseAtCycles = [1, 1, 1, 2, 1];
let Latency = 12;
let NumMicroOps = 6;
}
@@ -4194,7 +4194,7 @@ def : InstRW<[SPRWriteResGroup444], (instregex "^VPCOMPRESS(B|W)Z(128|256)mr$")>
def : InstRW<[SPRWriteResGroup444], (instrs VPCOMPRESSWZmr)>;
def SPRWriteResGroup445 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 1, 1, 2, 1];
+ let ReleaseAtCycles = [1, 1, 1, 2, 1];
let Latency = 14;
let NumMicroOps = 6;
}
@@ -4202,21 +4202,21 @@ def : InstRW<[SPRWriteResGroup445], (instregex "^VPCOMPRESS(B|W)Z(128|256)mrk$")
def : InstRW<[SPRWriteResGroup445], (instrs VPCOMPRESSWZmrk)>;
def SPRWriteResGroup446 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 1, 2, 2, 2];
+ let ReleaseAtCycles = [1, 1, 2, 2, 2];
let Latency = 12;
let NumMicroOps = 8;
}
def : InstRW<[SPRWriteResGroup446], (instrs VPCOMPRESSBZmr)>;
def SPRWriteResGroup447 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 1, 2, 2, 2];
+ let ReleaseAtCycles = [1, 1, 2, 2, 2];
let Latency = 14;
let NumMicroOps = 8;
}
def : InstRW<[SPRWriteResGroup447], (instrs VPCOMPRESSBZmrk)>;
def SPRWriteResGroup448 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [5, 4, 1, 5];
+ let ReleaseAtCycles = [5, 4, 1, 5];
let Latency = 17;
let NumMicroOps = 15;
}
@@ -4224,14 +4224,14 @@ def : InstRW<[SPRWriteResGroup448], (instregex "^VPCONFLICTDZ128rm((b|k|bk|kz)?)
def : InstRW<[SPRWriteResGroup448], (instrs VPCONFLICTDZ128rmbkz)>;
def SPRWriteResGroup449 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [5, 5, 5];
+ let ReleaseAtCycles = [5, 5, 5];
let Latency = 12;
let NumMicroOps = 15;
}
def : InstRW<[SPRWriteResGroup449], (instregex "^VPCONFLICTDZ128rr((k|kz)?)$")>;
def SPRWriteResGroup450 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [7, 5, 1, 1, 9];
+ let ReleaseAtCycles = [7, 5, 1, 1, 9];
let Latency = 24;
let NumMicroOps = 23;
}
@@ -4239,14 +4239,14 @@ def : InstRW<[SPRWriteResGroup450], (instregex "^VPCONFLICTDZ256rm((b|k|bk|kz)?)
def : InstRW<[SPRWriteResGroup450], (instrs VPCONFLICTDZ256rmbkz)>;
def SPRWriteResGroup451 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort05]> {
- let ResourceCycles = [7, 6, 1, 9];
+ let ReleaseAtCycles = [7, 6, 1, 9];
let Latency = 17;
let NumMicroOps = 23;
}
def : InstRW<[SPRWriteResGroup451], (instregex "^VPCONFLICTDZ256rr((k|kz)?)$")>;
def SPRWriteResGroup452 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [11, 8, 1, 17];
+ let ReleaseAtCycles = [11, 8, 1, 17];
let Latency = 33;
let NumMicroOps = 37;
}
@@ -4254,37 +4254,37 @@ def : InstRW<[SPRWriteResGroup452], (instregex "^VPCONFLICTDZrm((b|k|bk|kz)?)$")
def : InstRW<[SPRWriteResGroup452], (instrs VPCONFLICTDZrmbkz)>;
def SPRWriteResGroup453 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [11, 9, 17];
+ let ReleaseAtCycles = [11, 9, 17];
let Latency = 26;
let NumMicroOps = 37;
}
def : InstRW<[SPRWriteResGroup453], (instregex "^VPCONFLICTDZrr((kz)?)$")>;
def SPRWriteResGroup454 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [11, 9, 17];
+ let ReleaseAtCycles = [11, 9, 17];
let Latency = 25;
let NumMicroOps = 37;
}
def : InstRW<[SPRWriteResGroup454], (instrs VPCONFLICTDZrrk)>;
def SPRWriteResGroup455 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 11;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup455], (instregex "^VPCONFLICTQZ128rm((b|k|bk|kz)?)$")>;
def : InstRW<[SPRWriteResGroup455], (instrs VPCONFLICTQZ128rmbkz)>;
-def : InstRW<[SPRWriteResGroup455, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B128rm$")>;
+def : InstRW<[SPRWriteResGroup455, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ128rm$")>;
def SPRWriteResGroup456 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 4;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup456], (instregex "^VPCONFLICTQZ128rr((k|kz)?)$")>;
def SPRWriteResGroup457 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [5, 4, 1, 5];
+ let ReleaseAtCycles = [5, 4, 1, 5];
let Latency = 20;
let NumMicroOps = 15;
}
@@ -4292,14 +4292,14 @@ def : InstRW<[SPRWriteResGroup457], (instregex "^VPCONFLICTQZ256rm((b|k|bk|kz)?)
def : InstRW<[SPRWriteResGroup457], (instrs VPCONFLICTQZ256rmbkz)>;
def SPRWriteResGroup458 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [5, 5, 5];
+ let ReleaseAtCycles = [5, 5, 5];
let Latency = 13;
let NumMicroOps = 15;
}
def : InstRW<[SPRWriteResGroup458], (instregex "^VPCONFLICTQZ256rr((k|kz)?)$")>;
def SPRWriteResGroup459 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [7, 5, 1, 9];
+ let ReleaseAtCycles = [7, 5, 1, 9];
let Latency = 23;
let NumMicroOps = 22;
}
@@ -4307,145 +4307,145 @@ def : InstRW<[SPRWriteResGroup459], (instregex "^VPCONFLICTQZrm((b|k|bk|kz)?)$")
def : InstRW<[SPRWriteResGroup459], (instrs VPCONFLICTQZrmbkz)>;
def SPRWriteResGroup460 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [7, 6, 9];
+ let ReleaseAtCycles = [7, 6, 9];
let Latency = 17;
let NumMicroOps = 22;
}
def : InstRW<[SPRWriteResGroup460], (instregex "^VPCONFLICTQZrr((kz)?)$")>;
def SPRWriteResGroup461 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [7, 6, 9];
+ let ReleaseAtCycles = [7, 6, 9];
let Latency = 16;
let NumMicroOps = 22;
}
def : InstRW<[SPRWriteResGroup461], (instrs VPCONFLICTQZrrk)>;
def SPRWriteResGroup462 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 13;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B128rmk(z?)$")>;
-def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instrs VPERMT2W128rm)>;
+def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ128rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instrs VPERMT2WZ128rm)>;
def SPRWriteResGroup463 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 5;
let NumMicroOps = 3;
}
-def : InstRW<[SPRWriteResGroup463], (instregex "^VPERM(I|T)2B(128|256)rr$")>;
+def : InstRW<[SPRWriteResGroup463], (instregex "^VPERM(I|T)2BZ(128|256)rr$")>;
def SPRWriteResGroup464 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 7;
let NumMicroOps = 3;
}
-def : InstRW<[SPRWriteResGroup464], (instregex "^VPERM(I|T)2B(128|256)rrk(z?)$",
- "^VPERM(I|T)2W(128|256)rr$")>;
+def : InstRW<[SPRWriteResGroup464], (instregex "^VPERM(I|T)2BZ(128|256)rrk(z?)$",
+ "^VPERM(I|T)2WZ(128|256)rr$")>;
def SPRWriteResGroup465 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 12;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup465, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B256rm$")>;
+def : InstRW<[SPRWriteResGroup465, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ256rm$")>;
def SPRWriteResGroup466 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 14;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B256rmk(z?)$")>;
-def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instrs VPERMI2W128rm,
- VPERMT2W256rm)>;
+def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ256rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instrs VPERMI2WZ128rm,
+ VPERMT2WZ256rm)>;
def SPRWriteResGroup467 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 12;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup467, ReadAfterVecYLd], (instregex "^VPERM(I|T)2Brm$")>;
+def : InstRW<[SPRWriteResGroup467, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZrm$")>;
def SPRWriteResGroup468 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 14;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instregex "^VPERM(I|T)2Brmk(z?)$")>;
-def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instrs VPERMT2Wrm)>;
+def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZrmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instrs VPERMT2WZrm)>;
def SPRWriteResGroup469 : SchedWriteRes<[SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 5;
let NumMicroOps = 3;
}
-def : InstRW<[SPRWriteResGroup469], (instregex "^VPERM(I|T)2Brr$")>;
+def : InstRW<[SPRWriteResGroup469], (instregex "^VPERM(I|T)2BZrr$")>;
def SPRWriteResGroup470 : SchedWriteRes<[SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 7;
let NumMicroOps = 3;
}
-def : InstRW<[SPRWriteResGroup470], (instregex "^VPERM(I|T)2Brrk(z?)$",
- "^VPERM(I|T)2Wrr$")>;
+def : InstRW<[SPRWriteResGroup470], (instregex "^VPERM(I|T)2BZrrk(z?)$",
+ "^VPERM(I|T)2WZrr$")>;
def SPRWriteResGroup471 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 16;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup471, ReadAfterVecYLd], (instregex "^VPERMI2W128rmk(z?)$",
- "^VPERMT2W256rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup471, ReadAfterVecYLd], (instregex "^VPERMI2WZ128rmk(z?)$",
+ "^VPERMT2WZ256rmk(z?)$")>;
def SPRWriteResGroup472 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 9;
let NumMicroOps = 3;
}
-def : InstRW<[SPRWriteResGroup472], (instregex "^VPERM(I|T)2W(128|256)rrk(z?)$")>;
+def : InstRW<[SPRWriteResGroup472], (instregex "^VPERM(I|T)2WZ(128|256)rrk(z?)$")>;
def SPRWriteResGroup473 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 15;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instregex "^VPERMT2W128rmk(z?)$")>;
-def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instrs VPERMI2W256rm)>;
+def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instregex "^VPERMT2WZ128rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instrs VPERMI2WZ256rm)>;
def SPRWriteResGroup474 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 17;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup474, ReadAfterVecYLd], (instregex "^VPERMI2W256rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup474, ReadAfterVecYLd], (instregex "^VPERMI2WZ256rmk(z?)$")>;
def SPRWriteResGroup475 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 15;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup475, ReadAfterVecYLd], (instrs VPERMI2Wrm)>;
+def : InstRW<[SPRWriteResGroup475, ReadAfterVecYLd], (instrs VPERMI2WZrm)>;
def SPRWriteResGroup476 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 17;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup476, ReadAfterVecYLd], (instregex "^VPERMI2Wrmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup476, ReadAfterVecYLd], (instregex "^VPERMI2WZrmk(z?)$")>;
def SPRWriteResGroup477 : SchedWriteRes<[SPRPort00_05, SPRPort05]> {
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let Latency = 9;
let NumMicroOps = 3;
}
-def : InstRW<[SPRWriteResGroup477], (instregex "^VPERM(I|T)2Wrrk(z?)$")>;
+def : InstRW<[SPRWriteResGroup477], (instregex "^VPERM(I|T)2WZrrk(z?)$")>;
def SPRWriteResGroup478 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let Latency = 16;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup478, ReadAfterVecYLd], (instregex "^VPERMT2Wrmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup478, ReadAfterVecYLd], (instregex "^VPERMT2WZrmk(z?)$")>;
def SPRWriteResGroup479 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
let Latency = 10;
@@ -4478,7 +4478,7 @@ def SPRWriteResGroup483 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]>
def : InstRW<[SPRWriteResGroup483, ReadAfterVecYLd], (instrs VPERMWZrm)>;
def SPRWriteResGroup484 : SchedWriteRes<[SPRPort05]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 8;
let NumMicroOps = 2;
}
@@ -4486,7 +4486,7 @@ def : InstRW<[SPRWriteResGroup484], (instregex "^VPEXPAND(B|W)Z(128|256)rrk(z?)$
"^VPEXPAND(B|W)Zrrk(z?)$")>;
def SPRWriteResGroup485 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11]> {
- let ResourceCycles = [1, 2, 1];
+ let ReleaseAtCycles = [1, 2, 1];
let Latency = 10;
let NumMicroOps = 4;
}
@@ -4591,7 +4591,7 @@ def SPRWriteResGroup496 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]>
def : InstRW<[SPRWriteResGroup496], (instregex "^VPMOVQDZ((256)?)mrk$")>;
def SPRWriteResGroup497 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 23;
let NumMicroOps = 4;
}
@@ -4601,14 +4601,14 @@ def : InstRW<[SPRWriteResGroup497, ReadAfterVecYLd], (instregex "^VPMULLQZ256rm(
def : InstRW<[SPRWriteResGroup497, ReadAfterVecYLd], (instrs VPMULLQZ256rmbkz)>;
def SPRWriteResGroup498 : SchedWriteRes<[SPRPort00_01]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let Latency = 15;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup498], (instregex "^VPMULLQZ(128|256)rr((k|kz)?)$")>;
def SPRWriteResGroup499 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 23;
let NumMicroOps = 4;
}
@@ -4616,14 +4616,14 @@ def : InstRW<[SPRWriteResGroup499, ReadAfterVecYLd], (instregex "^VPMULLQZrm((b|
def : InstRW<[SPRWriteResGroup499, ReadAfterVecYLd], (instrs VPMULLQZrmbkz)>;
def SPRWriteResGroup500 : SchedWriteRes<[SPRPort00]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let Latency = 15;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup500], (instregex "^VPMULLQZrr((k|kz)?)$")>;
def SPRWriteResGroup501 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [1, 1, 1, 4, 4];
+ let ReleaseAtCycles = [1, 1, 1, 4, 4];
let Latency = 12;
let NumMicroOps = 11;
}
@@ -4635,7 +4635,7 @@ def : InstRW<[SPRWriteResGroup501], (instrs VPSCATTERDDZ128mr,
VSCATTERQPSZ256mr)>;
def SPRWriteResGroup502 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [1, 1, 1, 8, 8];
+ let ReleaseAtCycles = [1, 1, 1, 8, 8];
let Latency = 12;
let NumMicroOps = 19;
}
@@ -4643,7 +4643,7 @@ def : InstRW<[SPRWriteResGroup502], (instrs VPSCATTERDDZ256mr,
VSCATTERDPSZ256mr)>;
def SPRWriteResGroup503 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [2, 1, 16, 16];
+ let ReleaseAtCycles = [2, 1, 16, 16];
let Latency = 19;
let NumMicroOps = 35;
}
@@ -4651,7 +4651,7 @@ def : InstRW<[SPRWriteResGroup503], (instrs VPSCATTERDDZmr,
VSCATTERDPSZmr)>;
def SPRWriteResGroup504 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [1, 1, 1, 2, 2];
+ let ReleaseAtCycles = [1, 1, 1, 2, 2];
let Latency = 12;
let NumMicroOps = 7;
}
@@ -4661,7 +4661,7 @@ def : InstRW<[SPRWriteResGroup504], (instrs VPSCATTERQDZ128mr,
VSCATTERQPSZ128mr)>;
def SPRWriteResGroup505 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
- let ResourceCycles = [2, 1, 8, 8];
+ let ReleaseAtCycles = [2, 1, 8, 8];
let Latency = 12;
let NumMicroOps = 19;
}
@@ -4787,7 +4787,7 @@ def : InstRW<[SPRWriteResGroup522], (instregex "^VPS(L|R)LWZ128rrk(z?)$",
"^VPSRAWZ128rrk(z?)$")>;
def SPRWriteResGroup523 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 16;
let NumMicroOps = 4;
}
@@ -4795,14 +4795,14 @@ def : InstRW<[SPRWriteResGroup523, ReadAfterVecYLd], (instregex "^VR(CP|SQRT)PHZ
"^VR(CP|SQRT)PHZm(k|bkz)$")>;
def SPRWriteResGroup524 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 9;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup524], (instregex "^VRCPPHZrk(z?)$")>;
def SPRWriteResGroup525 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 20;
let NumMicroOps = 4;
}
@@ -4811,7 +4811,7 @@ def : InstRW<[SPRWriteResGroup525, ReadAfterVecXLd], (instrs VREDUCESHZrmi)>;
def : InstRW<[SPRWriteResGroup525, ReadAfterVecYLd], (instregex "^VREDUCEPHZ256rm(b?)i$")>;
def SPRWriteResGroup526 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 22;
let NumMicroOps = 4;
}
@@ -4820,7 +4820,7 @@ def : InstRW<[SPRWriteResGroup526, ReadAfterVecXLd], (instregex "^VREDUCEPHZ128r
def : InstRW<[SPRWriteResGroup526, ReadAfterVecYLd], (instregex "^VREDUCEPHZ256rm(b?)ik(z?)$")>;
def SPRWriteResGroup527 : SchedWriteRes<[SPRPort00_01]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let Latency = 13;
let NumMicroOps = 3;
}
@@ -4828,7 +4828,7 @@ def : InstRW<[SPRWriteResGroup527], (instregex "^VREDUCEPHZ(128|256)rri$",
"^VREDUCESHZrri(b?)$")>;
def SPRWriteResGroup528 : SchedWriteRes<[SPRPort00_01]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let Latency = 16;
let NumMicroOps = 3;
}
@@ -4837,28 +4837,28 @@ def : InstRW<[SPRWriteResGroup528], (instregex "^VREDUCEPHZ(128|256)rrik(z?)$",
"^VREDUCESHZrri(k|bkz)$")>;
def SPRWriteResGroup529 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 20;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup529, ReadAfterVecYLd], (instregex "^VREDUCEPHZrm(b?)i$")>;
def SPRWriteResGroup530 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let Latency = 22;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup530, ReadAfterVecYLd], (instregex "^VREDUCEPHZrm(b?)ik(z?)$")>;
def SPRWriteResGroup531 : SchedWriteRes<[SPRPort00]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let Latency = 13;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup531], (instregex "^VREDUCEPHZrri(b?)$")>;
def SPRWriteResGroup532 : SchedWriteRes<[SPRPort00]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let Latency = 16;
let NumMicroOps = 3;
}
@@ -4866,7 +4866,7 @@ def : InstRW<[SPRWriteResGroup532], (instregex "^VREDUCEPHZrri(bk|kz)$",
"^VREDUCEPHZrri(k|bkz)$")>;
def SPRWriteResGroup533 : SchedWriteRes<[SPRPort00]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 8;
let NumMicroOps = 2;
}
@@ -4874,7 +4874,7 @@ def : InstRW<[SPRWriteResGroup533], (instregex "^VRNDSCALEP(D|S)Zrri((b|k|bk|kz)
"^VRNDSCALEP(D|S)Zrribkz$")>;
def SPRWriteResGroup534 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 17;
let NumMicroOps = 3;
}
@@ -4888,7 +4888,7 @@ def : InstRW<[SPRWriteResGroup534, ReadAfterVecYLd], (instregex "^VRNDSCALEPHZ25
def : InstRW<[SPRWriteResGroup534, ReadAfterVecLd], (instregex "^VSCALEFSHZrmk(z?)$")>;
def SPRWriteResGroup535 : SchedWriteRes<[SPRPort00_01]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 11;
let NumMicroOps = 2;
}
@@ -4899,7 +4899,7 @@ def : InstRW<[SPRWriteResGroup535], (instregex "^VRNDSCALEPHZ(128|256)rrik(z?)$"
"^VSCALEFSHZrrk(z?)$")>;
def SPRWriteResGroup536 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 17;
let NumMicroOps = 3;
}
@@ -4908,7 +4908,7 @@ def : InstRW<[SPRWriteResGroup536, ReadAfterVecYLd], (instregex "^VRNDSCALEPHZrm
"^VSCALEFPHZrm(k|bkz)$")>;
def SPRWriteResGroup537 : SchedWriteRes<[SPRPort00]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 11;
let NumMicroOps = 2;
}
@@ -4918,7 +4918,7 @@ def : InstRW<[SPRWriteResGroup537], (instregex "^VRNDSCALEPHZrri(bk|kz)$",
"^VSCALEFPHZrr(k|bkz)$")>;
def SPRWriteResGroup538 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 6;
let NumMicroOps = 3;
}
@@ -4942,28 +4942,28 @@ def : InstRW<[SPRWriteResGroup540, ReadAfterVecXLd], (instregex "^VSQRTPDZ128m(b
def : InstRW<[SPRWriteResGroup540, ReadAfterVecLd], (instregex "^VSQRTSDZm_Intk(z?)$")>;
def SPRWriteResGroup541 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 38;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup541, ReadAfterVecYLd], (instrs VSQRTPDZm)>;
def SPRWriteResGroup542 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 39;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup542, ReadAfterVecYLd], (instrs VSQRTPDZmb)>;
def SPRWriteResGroup543 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 31;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup543], (instrs VSQRTPDZr)>;
def SPRWriteResGroup544 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 41;
let NumMicroOps = 4;
}
@@ -4971,7 +4971,7 @@ def : InstRW<[SPRWriteResGroup544, ReadAfterVecXLd], (instregex "^VSQRTPHZ128m(b
"^VSQRTPHZ128m(k|bkz)$")>;
def SPRWriteResGroup545 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 35;
let NumMicroOps = 3;
}
@@ -4979,21 +4979,21 @@ def : InstRW<[SPRWriteResGroup545], (instregex "^VSQRTPHZ(128|256)rk$")>;
def : InstRW<[SPRWriteResGroup545], (instrs VSQRTPHZ256rkz)>;
def SPRWriteResGroup546 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 12;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup546], (instrs VSQRTPHZ128rkz)>;
def SPRWriteResGroup547 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 40;
let NumMicroOps = 4;
}
def : InstRW<[SPRWriteResGroup547, ReadAfterVecYLd], (instregex "^VSQRTPHZ256m(b?)$")>;
def SPRWriteResGroup548 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1, 1];
+ let ReleaseAtCycles = [2, 1, 1];
let Latency = 42;
let NumMicroOps = 4;
}
@@ -5001,14 +5001,14 @@ def : InstRW<[SPRWriteResGroup548, ReadAfterVecYLd], (instregex "^VSQRTPHZ256m(b
"^VSQRTPHZ256m(k|bkz)$")>;
def SPRWriteResGroup549 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [4, 2, 1, 1, 1];
+ let ReleaseAtCycles = [4, 2, 1, 1, 1];
let Latency = 53;
let NumMicroOps = 9;
}
def : InstRW<[SPRWriteResGroup549, ReadAfterVecYLd], (instregex "^VSQRTPHZm(b?)$")>;
def SPRWriteResGroup550 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
- let ResourceCycles = [4, 2, 1, 1, 1];
+ let ReleaseAtCycles = [4, 2, 1, 1, 1];
let Latency = 55;
let NumMicroOps = 9;
}
@@ -5016,14 +5016,14 @@ def : InstRW<[SPRWriteResGroup550, ReadAfterVecYLd], (instregex "^VSQRTPHZm(bk|k
"^VSQRTPHZm(k|bkz)$")>;
def SPRWriteResGroup551 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort05]> {
- let ResourceCycles = [4, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1];
let Latency = 45;
let NumMicroOps = 6;
}
def : InstRW<[SPRWriteResGroup551], (instregex "^VSQRTPHZr(b?)$")>;
def SPRWriteResGroup552 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort05]> {
- let ResourceCycles = [4, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1];
let Latency = 47;
let NumMicroOps = 6;
}
@@ -5031,98 +5031,98 @@ def : InstRW<[SPRWriteResGroup552], (instregex "^VSQRTPHZr(bk|kz)$",
"^VSQRTPHZr(k|bkz)$")>;
def SPRWriteResGroup553 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 19;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup553], (instrs VSQRTPSZr)>;
def SPRWriteResGroup554 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10]> {
- let ResourceCycles = [1, 2, 3, 3, 1];
+ let ReleaseAtCycles = [1, 2, 3, 3, 1];
let Latency = 12;
let NumMicroOps = 10;
}
def : InstRW<[SPRWriteResGroup554], (instrs VZEROALL)>;
def SPRWriteResGroup555 : SchedWriteRes<[SPRPort00_01_05_06]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 2;
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup555], (instrs WAIT)>;
def SPRWriteResGroup556 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [8, 6, 19, 63, 21, 15, 1, 10, 1];
+ let ReleaseAtCycles = [8, 6, 19, 63, 21, 15, 1, 10, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 144;
}
def : InstRW<[SPRWriteResGroup556], (instrs WRMSR)>;
def SPRWriteResGroup557 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> {
- let ResourceCycles = [2, 1, 4, 1];
+ let ReleaseAtCycles = [2, 1, 4, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 8;
}
def : InstRW<[SPRWriteResGroup557], (instrs WRPKRUr)>;
def SPRWriteResGroup558 : SchedWriteRes<[SPRPort00_01_05_06_10]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 12;
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup558, WriteRMW], (instregex "^XADD(16|32|64)rm$")>;
def SPRWriteResGroup559 : SchedWriteRes<[SPRPort00_01_05_06_10]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let Latency = 13;
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup559, WriteRMW], (instrs XADD8rm)>;
def SPRWriteResGroup560 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
- let ResourceCycles = [4, 1];
+ let ReleaseAtCycles = [4, 1];
let Latency = 39;
let NumMicroOps = 5;
}
def : InstRW<[SPRWriteResGroup560, WriteRMW], (instregex "^XCHG(16|32)rm$")>;
def SPRWriteResGroup561 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
- let ResourceCycles = [5, 1];
+ let ReleaseAtCycles = [5, 1];
let Latency = 39;
let NumMicroOps = 6;
}
def : InstRW<[SPRWriteResGroup561, WriteRMW], (instrs XCHG64rm)>;
def SPRWriteResGroup562 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
- let ResourceCycles = [4, 1];
+ let ReleaseAtCycles = [4, 1];
let Latency = 40;
let NumMicroOps = 5;
}
def : InstRW<[SPRWriteResGroup562, WriteRMW], (instrs XCHG8rm)>;
def SPRWriteResGroup563 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06, SPRPort00_05, SPRPort01, SPRPort05, SPRPort06]> {
- let ResourceCycles = [2, 4, 2, 1, 2, 4];
+ let ReleaseAtCycles = [2, 4, 2, 1, 2, 4];
let Latency = 17;
let NumMicroOps = 15;
}
def : InstRW<[SPRWriteResGroup563], (instrs XCH_F)>;
def SPRWriteResGroup564 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01]> {
- let ResourceCycles = [7, 3, 8, 5];
+ let ReleaseAtCycles = [7, 3, 8, 5];
let Latency = 4;
let NumMicroOps = 23;
}
def : InstRW<[SPRWriteResGroup564], (instrs XGETBV)>;
def SPRWriteResGroup565 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = 7;
let NumMicroOps = 3;
}
def : InstRW<[SPRWriteResGroup565], (instrs XLAT)>;
def SPRWriteResGroup566 : SchedWriteRes<[SPRPort01, SPRPort02_03, SPRPort02_03_11, SPRPort06]> {
- let ResourceCycles = [1, 21, 1, 8];
+ let ReleaseAtCycles = [1, 21, 1, 8];
let Latency = 37;
let NumMicroOps = 31;
}
@@ -5130,70 +5130,70 @@ def : InstRW<[SPRWriteResGroup566], (instregex "^XRSTOR((S|64)?)$")>;
def : InstRW<[SPRWriteResGroup566], (instrs XRSTORS64)>;
def SPRWriteResGroup567 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
+ let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
let Latency = 42;
let NumMicroOps = 140;
}
def : InstRW<[SPRWriteResGroup567], (instrs XSAVE)>;
def SPRWriteResGroup568 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
+ let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
let Latency = 41;
let NumMicroOps = 140;
}
def : InstRW<[SPRWriteResGroup568], (instrs XSAVE64)>;
def SPRWriteResGroup569 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 19, 36, 52, 23, 4, 2, 12, 2];
+ let ReleaseAtCycles = [1, 19, 36, 52, 23, 4, 2, 12, 2];
let Latency = 42;
let NumMicroOps = 151;
}
def : InstRW<[SPRWriteResGroup569], (instrs XSAVEC)>;
def SPRWriteResGroup570 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [1, 19, 36, 53, 23, 4, 2, 12, 2];
+ let ReleaseAtCycles = [1, 19, 36, 53, 23, 4, 2, 12, 2];
let Latency = 42;
let NumMicroOps = 152;
}
def : InstRW<[SPRWriteResGroup570], (instrs XSAVEC64)>;
def SPRWriteResGroup571 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [25, 35, 52, 27, 4, 1, 10, 1];
+ let ReleaseAtCycles = [25, 35, 52, 27, 4, 1, 10, 1];
let Latency = 42;
let NumMicroOps = 155;
}
def : InstRW<[SPRWriteResGroup571], (instrs XSAVEOPT)>;
def SPRWriteResGroup572 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [25, 35, 53, 27, 4, 1, 10, 1];
+ let ReleaseAtCycles = [25, 35, 53, 27, 4, 1, 10, 1];
let Latency = 42;
let NumMicroOps = 156;
}
def : InstRW<[SPRWriteResGroup572], (instrs XSAVEOPT64)>;
def SPRWriteResGroup573 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [23, 32, 53, 29, 30, 4, 2, 9, 2];
+ let ReleaseAtCycles = [23, 32, 53, 29, 30, 4, 2, 9, 2];
let Latency = 42;
let NumMicroOps = 184;
}
def : InstRW<[SPRWriteResGroup573], (instrs XSAVES)>;
def SPRWriteResGroup574 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
- let ResourceCycles = [23, 33, 53, 29, 32, 4, 2, 8, 2];
+ let ReleaseAtCycles = [23, 33, 53, 29, 32, 4, 2, 8, 2];
let Latency = 42;
let NumMicroOps = 186;
}
def : InstRW<[SPRWriteResGroup574], (instrs XSAVES64)>;
def SPRWriteResGroup575 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort05]> {
- let ResourceCycles = [4, 23, 2, 14, 8, 1, 2];
+ let ReleaseAtCycles = [4, 23, 2, 14, 8, 1, 2];
let Latency = 5;
let NumMicroOps = 54;
}
def : InstRW<[SPRWriteResGroup575], (instrs XSETBV)>;
def SPRWriteResGroup576 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let Latency = SapphireRapidsModel.MaxLatency;
let NumMicroOps = 3;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 9412a40be48c..4fa138f69fb9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -94,7 +94,7 @@ multiclass SKLWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -102,7 +102,7 @@ multiclass SKLWriteResPair<X86FoldableSchedWrite SchedRW,
// the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([SKLPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !listconcat([1], Res);
+ let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -424,7 +424,7 @@ defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
def : WriteRes<WriteVecInsert, [SKLPort5]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [SKLPort5,SKLPort23]> {
let Latency = 6;
@@ -494,48 +494,48 @@ defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
def : WriteRes<WritePCmpIStrM, [SKLPort0]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [SKLPort0, SKLPort23]> {
let Latency = 16;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [SKLPort0, SKLPort5, SKLPort015, SKLPort0156]> {
let Latency = 19;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
def : WriteRes<WritePCmpEStrMLd, [SKLPort0, SKLPort5,SKLPort23, SKLPort015, SKLPort0156]> {
let Latency = 25;
let NumMicroOps = 10;
- let ResourceCycles = [4,3,1,1,1];
+ let ReleaseAtCycles = [4,3,1,1,1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [SKLPort0]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrILd, [SKLPort0, SKLPort23]> {
let Latency = 16;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [SKLPort0, SKLPort5, SKLPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [4,3,1];
+ let ReleaseAtCycles = [4,3,1];
}
def : WriteRes<WritePCmpEStrILd, [SKLPort0, SKLPort5, SKLPort23, SKLPort0156]> {
let Latency = 24;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
// MOVMSK Instructions.
@@ -548,46 +548,46 @@ def : WriteRes<WriteMMXMOVMSK, [SKLPort0]> { let Latency = 2; }
def : WriteRes<WriteAESDecEnc, [SKLPort0]> { // Decryption, encryption.
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : WriteRes<WriteAESDecEncLd, [SKLPort0, SKLPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def : WriteRes<WriteAESIMC, [SKLPort0]> { // InvMixColumn.
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteAESIMCLd, [SKLPort0, SKLPort23]> {
let Latency = 14;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def : WriteRes<WriteAESKeyGen, [SKLPort0, SKLPort5, SKLPort015]> { // Key Generation.
let Latency = 20;
let NumMicroOps = 11;
- let ResourceCycles = [3,6,2];
+ let ReleaseAtCycles = [3,6,2];
}
def : WriteRes<WriteAESKeyGenLd, [SKLPort0, SKLPort5, SKLPort23, SKLPort015]> {
let Latency = 25;
let NumMicroOps = 11;
- let ResourceCycles = [3,6,1,1];
+ let ReleaseAtCycles = [3,6,1,1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [SKLPort5]> {
let Latency = 6;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : WriteRes<WriteCLMulLd, [SKLPort5, SKLPort23]> {
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
// Catch-all for expensive system instructions.
@@ -607,8 +607,8 @@ def : WriteRes<WriteMicrocoded, [SKLPort0156]> { let Latency = 100; } // def Wri
def : WriteRes<WriteFence, [SKLPort23, SKLPort4]>;
// Load/store MXCSR.
-def : WriteRes<WriteLDMXCSR, [SKLPort0,SKLPort23,SKLPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-def : WriteRes<WriteSTMXCSR, [SKLPort4,SKLPort5,SKLPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteLDMXCSR, [SKLPort0,SKLPort23,SKLPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [SKLPort4,SKLPort5,SKLPort237]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
// Nop, not very useful expect it provides a model for nops!
def : WriteRes<WriteNop, []>;
@@ -628,7 +628,7 @@ defm : SKLWriteResPair<WritePHAddY, [SKLPort5,SKLPort015], 3, [2,1], 3, 7>;
def SKLWriteResGroup1 : SchedWriteRes<[SKLPort0]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDS(B|W)rr",
"MMX_PADDUS(B|W)rr",
@@ -643,7 +643,7 @@ def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDS(B|W)rr",
def SKLWriteResGroup3 : SchedWriteRes<[SKLPort5]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup3], (instregex "COM(P?)_FST0r",
"UCOM_F(P?)r")>;
@@ -651,35 +651,35 @@ def: InstRW<[SKLWriteResGroup3], (instregex "COM(P?)_FST0r",
def SKLWriteResGroup4 : SchedWriteRes<[SKLPort6]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup4], (instregex "JMP(16|32|64)r")>;
def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup6], (instrs FINCSTP, FNOP)>;
def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup9], (instregex "(V?)PADD(B|D|Q|W)(Y?)rr",
"VPBLENDD(Y?)rri")>;
@@ -687,7 +687,7 @@ def: InstRW<[SKLWriteResGroup9], (instregex "(V?)PADD(B|D|Q|W)(Y?)rr",
def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup10], (instrs SGDT64m,
SIDT64m,
@@ -698,7 +698,7 @@ def: InstRW<[SKLWriteResGroup10], (instrs SGDT64m,
def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> {
let Latency = 1;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP(32|64|80)m")>;
@@ -706,14 +706,14 @@ def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP(32|64|80)m")>;
def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKLWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP,
MMX_MOVDQ2Qrr)>;
@@ -721,7 +721,7 @@ def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP,
def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKLWriteResGroup17], (instrs LFENCE,
WAIT,
@@ -730,21 +730,21 @@ def: InstRW<[SKLWriteResGroup17], (instrs LFENCE,
def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup20], (instregex "CLFLUSH")>;
def SKLWriteResGroup21 : SchedWriteRes<[SKLPort237,SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup21], (instrs SFENCE)>;
def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup23], (instrs CWD,
JCXZ, JECXZ, JRCXZ,
@@ -756,21 +756,21 @@ def: InstRW<[SKLWriteResGroup23], (instrs CWD,
def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup25], (instrs FNSTCW16m)>;
def SKLWriteResGroup27 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort15]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>;
def SKLWriteResGroup28 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
@@ -779,7 +779,7 @@ def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
@@ -787,7 +787,7 @@ def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
"VPBROADCAST(B|W)rr")>;
@@ -795,21 +795,21 @@ def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup32], (instrs FNSTSW16r)>;
def SKLWriteResGroup35 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup35], (instregex "MMX_PH(ADD|SUB)SWrr")>;
def SKLWriteResGroup36 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKLWriteResGroup36], (instregex "(V?)PHADDSW(Y?)rr",
"(V?)PHSUBSW(Y?)rr")>;
@@ -817,7 +817,7 @@ def: InstRW<[SKLWriteResGroup36], (instregex "(V?)PHADDSW(Y?)rr",
def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKLWriteResGroup39], (instrs MMX_PACKSSDWrr,
MMX_PACKSSWBrr,
@@ -826,21 +826,21 @@ def: InstRW<[SKLWriteResGroup39], (instrs MMX_PACKSSDWrr,
def SKLWriteResGroup40 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup40], (instregex "CLD")>;
def SKLWriteResGroup41 : SchedWriteRes<[SKLPort237,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup41], (instrs MFENCE)>;
def SKLWriteResGroup42 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup42], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
@@ -848,49 +848,49 @@ def: InstRW<[SKLWriteResGroup42], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
def SKLWriteResGroup42b : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
let Latency = 5;
let NumMicroOps = 8;
- let ResourceCycles = [2,4,2];
+ let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[SKLWriteResGroup42b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
def SKLWriteResGroup42c : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
let Latency = 6;
let NumMicroOps = 8;
- let ResourceCycles = [2,4,2];
+ let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[SKLWriteResGroup42c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup43], (instrs FNSTSWm)>;
def SKLWriteResGroup45 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKLWriteResGroup45], (instregex "CALL(16|32|64)r")>;
def SKLWriteResGroup46 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKLWriteResGroup46], (instrs CALL64pcrel32)>;
def SKLWriteResGroup47 : SchedWriteRes<[SKLPort0]> {
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup47], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> {
let Latency = 4;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup53], (instregex "IST(T?)_FP(16|32|64)m",
"IST_F(16|32)m")>;
@@ -898,63 +898,63 @@ def: InstRW<[SKLWriteResGroup53], (instregex "IST(T?)_FP(16|32|64)m",
def SKLWriteResGroup54 : SchedWriteRes<[SKLPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def: InstRW<[SKLWriteResGroup54], (instrs FNCLEX)>;
def SKLWriteResGroup55 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[SKLWriteResGroup55], (instrs PAUSE)>;
def SKLWriteResGroup56 : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
}
def: InstRW<[SKLWriteResGroup56], (instrs VZEROUPPER)>;
def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
}
def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PS2PIrr")>;
def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>;
def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
- let ResourceCycles = [1,4];
+ let ReleaseAtCycles = [1,4];
}
def: InstRW<[SKLWriteResGroup63], (instrs XSETBV)>;
def SKLWriteResGroup65 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,4];
+ let ReleaseAtCycles = [1,1,4];
}
def: InstRW<[SKLWriteResGroup65], (instregex "PUSHF(16|64)")>;
def SKLWriteResGroup67 : SchedWriteRes<[SKLPort23]> {
let Latency = 6;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup67], (instrs VBROADCASTSSrm,
VPBROADCASTDrm,
@@ -966,14 +966,14 @@ def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm",
def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKLWriteResGroup68], (instrs MMX_CVTPI2PSrr)>;
def SKLWriteResGroup69 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup69], (instrs MMX_PADDSBrm,
MMX_PADDSWrm,
@@ -999,7 +999,7 @@ def: InstRW<[SKLWriteResGroup69], (instrs MMX_PADDSBrm,
def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup72], (instrs FARJMP64m)>;
def: InstRW<[SKLWriteResGroup72], (instregex "JMP(16|32|64)m")>;
@@ -1007,7 +1007,7 @@ def: InstRW<[SKLWriteResGroup72], (instregex "JMP(16|32|64)m")>;
def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup75], (instregex "ANDN(32|64)rm",
"MOVBE(16|32|64)rm")>;
@@ -1015,7 +1015,7 @@ def: InstRW<[SKLWriteResGroup75], (instregex "ANDN(32|64)rm",
def SKLWriteResGroup76 : SchedWriteRes<[SKLPort23,SKLPort0156]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup76], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[SKLWriteResGroup76], (instregex "POP(16|32|64)rmr")>;
@@ -1023,21 +1023,21 @@ def: InstRW<[SKLWriteResGroup76], (instregex "POP(16|32|64)rmr")>;
def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 6;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKLWriteResGroup78], (instregex "(V?)CVTSI642SSrr")>;
def SKLWriteResGroup80 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKLWriteResGroup80], (instregex "SLDT(16|32|64)r")>;
def SKLWriteResGroup82 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKLWriteResGroup82], (instregex "SAR(8|16|32|64)m(1|i)",
"SHL(8|16|32|64)m(1|i)",
@@ -1046,7 +1046,7 @@ def: InstRW<[SKLWriteResGroup82], (instregex "SAR(8|16|32|64)m(1|i)",
def SKLWriteResGroup83 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKLWriteResGroup83], (instregex "POP(16|32|64)rmm",
"PUSH(16|32|64)rmm")>;
@@ -1054,18 +1054,18 @@ def: InstRW<[SKLWriteResGroup83], (instregex "POP(16|32|64)rmm",
def SKLWriteResGroup84 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 6;
let NumMicroOps = 6;
- let ResourceCycles = [1,5];
+ let ReleaseAtCycles = [1,5];
}
def: InstRW<[SKLWriteResGroup84], (instrs STD)>;
def SKLWriteResGroup85 : SchedWriteRes<[SKLPort23]> {
let Latency = 7;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup85], (instregex "LD_F(32|64|80)m")>;
-def: InstRW<[SKLWriteResGroup85], (instrs VBROADCASTF128,
- VBROADCASTI128,
+def: InstRW<[SKLWriteResGroup85], (instrs VBROADCASTF128rm,
+ VBROADCASTI128rm,
VBROADCASTSDYrm,
VBROADCASTSSYrm,
VMOVDDUPYrm,
@@ -1077,7 +1077,7 @@ def: InstRW<[SKLWriteResGroup85], (instrs VBROADCASTF128,
def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PMOV(SX|ZX)BDrm",
"(V?)PMOV(SX|ZX)BQrm",
@@ -1089,7 +1089,7 @@ def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PMOV(SX|ZX)BDrm",
def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup91], (instrs VINSERTF128rm,
VINSERTI128rm,
@@ -1101,7 +1101,7 @@ def: InstRW<[SKLWriteResGroup91, ReadAfterVecXLd],
def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKLWriteResGroup92], (instrs MMX_PACKSSDWrm,
MMX_PACKSSWBrm,
@@ -1110,7 +1110,7 @@ def: InstRW<[SKLWriteResGroup92], (instrs MMX_PACKSSDWrm,
def SKLWriteResGroup94 : SchedWriteRes<[SKLPort23,SKLPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup94], (instrs LEAVE, LEAVE64,
SCASB, SCASL, SCASQ, SCASW)>;
@@ -1118,28 +1118,28 @@ def: InstRW<[SKLWriteResGroup94], (instrs LEAVE, LEAVE64,
def SKLWriteResGroup95 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort01]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup95], (instregex "(V?)CVT(T?)SS2SI64rr")>;
def SKLWriteResGroup96 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup96], (instrs FLDCW16m)>;
def SKLWriteResGroup98 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup98], (instrs LRET64, RET64)>;
def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
@@ -1147,7 +1147,7 @@ def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m(1|i)",
def SKLWriteResGroup100_1 : SchedWriteRes<[SKLPort06]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKLWriteResGroup100_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
@@ -1155,14 +1155,14 @@ def: InstRW<[SKLWriteResGroup100_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
def SKLWriteResGroup101 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SKLWriteResGroup101], (instregex "XADD(8|16|32|64)rm")>;
def SKLWriteResGroup102 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1,1];
}
def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m")>;
def: InstRW<[SKLWriteResGroup102], (instrs FARCALL64m)>;
@@ -1170,14 +1170,14 @@ def: InstRW<[SKLWriteResGroup102], (instrs FARCALL64m)>;
def SKLWriteResGroup103 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
- let ResourceCycles = [1,3,1,2];
+ let ReleaseAtCycles = [1,3,1,2];
}
def: InstRW<[SKLWriteResGroup103], (instrs LOOP)>;
def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
@@ -1185,7 +1185,7 @@ def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm",
def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup108], (instregex "FCOM(P?)(32|64)m")>;
def: InstRW<[SKLWriteResGroup108], (instrs VPBROADCASTBYrm,
@@ -1197,7 +1197,7 @@ def: InstRW<[SKLWriteResGroup108], (instrs VPBROADCASTBYrm,
def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup110], (instrs VPBLENDDYrmi)>;
def: InstRW<[SKLWriteResGroup110, ReadAfterVecYLd],
@@ -1207,14 +1207,14 @@ def: InstRW<[SKLWriteResGroup110, ReadAfterVecYLd],
def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>;
def SKLWriteResGroup116 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SKLWriteResGroup116], (instregex "RCL(8|16|32|64)m(1|i)",
"RCR(8|16|32|64)m(1|i)")>;
@@ -1222,7 +1222,7 @@ def: InstRW<[SKLWriteResGroup116], (instregex "RCL(8|16|32|64)m(1|i)",
def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
let Latency = 8;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,3];
+ let ReleaseAtCycles = [1,1,1,3];
}
def: InstRW<[SKLWriteResGroup117], (instregex "ROL(8|16|32|64)mCL",
"ROR(8|16|32|64)mCL",
@@ -1233,21 +1233,21 @@ def: InstRW<[SKLWriteResGroup117], (instregex "ROL(8|16|32|64)mCL",
def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 8;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
+ let ReleaseAtCycles = [1,1,1,2,1];
}
def: SchedAlias<WriteADCRMW, SKLWriteResGroup119>;
def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup120], (instrs MMX_CVTPI2PSrm)>;
def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup121], (instrs PCMPGTQrm,
VPCMPGTQrm,
@@ -1259,14 +1259,14 @@ def: InstRW<[SKLWriteResGroup121], (instrs PCMPGTQrm,
def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIrm")>;
def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
@@ -1274,7 +1274,7 @@ def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm",
def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[SKLWriteResGroup131], (instregex "LAR(16|32|64)rm",
"LSL(16|32|64)rm")>;
@@ -1282,7 +1282,7 @@ def: InstRW<[SKLWriteResGroup131], (instregex "LAR(16|32|64)rm",
def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m")>;
@@ -1291,14 +1291,14 @@ def: InstRW<[SKLWriteResGroup133], (instrs VPCMPGTQYrm)>;
def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDrm)>;
def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let Latency = 10;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKLWriteResGroup140], (instrs VPHADDSWYrm,
VPHSUBSWYrm)>;
@@ -1306,28 +1306,28 @@ def: InstRW<[SKLWriteResGroup140], (instrs VPHADDSWYrm,
def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 10;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,1,1,3];
+ let ReleaseAtCycles = [1,1,1,1,1,3];
}
def: InstRW<[SKLWriteResGroup143], (instregex "XCHG(8|16|32|64)rm")>;
def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F(32|64)m")>;
def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKLWriteResGroup149], (instregex "FICOM(P?)(16|32)m")>;
def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
- let ResourceCycles = [2,3,2];
+ let ReleaseAtCycles = [2,3,2];
}
def: InstRW<[SKLWriteResGroup154], (instregex "RCL(16|32|64)rCL",
"RCR(16|32|64)rCL")>;
@@ -1335,119 +1335,119 @@ def: InstRW<[SKLWriteResGroup154], (instregex "RCL(16|32|64)rCL",
def SKLWriteResGroup155 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 11;
let NumMicroOps = 9;
- let ResourceCycles = [1,5,1,2];
+ let ReleaseAtCycles = [1,5,1,2];
}
def: InstRW<[SKLWriteResGroup155], (instrs RCL8rCL)>;
def SKLWriteResGroup156 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 11;
let NumMicroOps = 11;
- let ResourceCycles = [2,9];
+ let ReleaseAtCycles = [2,9];
}
def: InstRW<[SKLWriteResGroup156], (instrs LOOPE, LOOPNE)>;
def SKLWriteResGroup162 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 13;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKLWriteResGroup162], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 14;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup169], (instregex "MUL_FI(16|32)m")>;
def SKLWriteResGroup170 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 14;
let NumMicroOps = 10;
- let ResourceCycles = [2,4,1,3];
+ let ReleaseAtCycles = [2,4,1,3];
}
def: InstRW<[SKLWriteResGroup170], (instrs RCR8rCL)>;
def SKLWriteResGroup171 : SchedWriteRes<[SKLPort0]> {
let Latency = 15;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup171], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 15;
let NumMicroOps = 10;
- let ResourceCycles = [1,1,1,5,1,1];
+ let ReleaseAtCycles = [1,1,1,5,1,1];
}
def: InstRW<[SKLWriteResGroup174], (instregex "RCL(8|16|32|64)mCL")>;
def SKLWriteResGroup177 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 16;
let NumMicroOps = 14;
- let ResourceCycles = [1,1,1,4,2,5];
+ let ReleaseAtCycles = [1,1,1,4,2,5];
}
def: InstRW<[SKLWriteResGroup177], (instrs CMPXCHG8B)>;
def SKLWriteResGroup178 : SchedWriteRes<[SKLPort0156]> {
let Latency = 16;
let NumMicroOps = 16;
- let ResourceCycles = [16];
+ let ReleaseAtCycles = [16];
}
def: InstRW<[SKLWriteResGroup178], (instrs VZEROALL)>;
def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
let Latency = 17;
let NumMicroOps = 15;
- let ResourceCycles = [2,1,2,4,2,4];
+ let ReleaseAtCycles = [2,1,2,4,2,4];
}
def: InstRW<[SKLWriteResGroup180], (instrs XCH_F)>;
def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,5];
+ let ReleaseAtCycles = [1,1,1,5];
}
def: InstRW<[SKLWriteResGroup184], (instrs CPUID, RDTSC)>;
def SKLWriteResGroup185 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 18;
let NumMicroOps = 11;
- let ResourceCycles = [2,1,1,4,1,2];
+ let ReleaseAtCycles = [2,1,1,4,1,2];
}
def: InstRW<[SKLWriteResGroup185], (instregex "RCR(8|16|32|64)mCL")>;
def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> {
let Latency = 20;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKLWriteResGroup189], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 20;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,1,1,1,2];
}
def: InstRW<[SKLWriteResGroup192], (instrs INSB, INSL, INSW)>;
def SKLWriteResGroup193 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort0156]> {
let Latency = 20;
let NumMicroOps = 10;
- let ResourceCycles = [1,2,7];
+ let ReleaseAtCycles = [1,2,7];
}
def: InstRW<[SKLWriteResGroup193], (instrs MWAITrr)>;
def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 22;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup196], (instregex "DIV_F(32|64)m")>;
def SKLWriteResGroupVEX2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
let Latency = 18;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[SKLWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
VGATHERQPDrm, VPGATHERQQrm,
@@ -1456,7 +1456,7 @@ def: InstRW<[SKLWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
def SKLWriteResGroupVEX4 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
let Latency = 20;
let NumMicroOps = 5; // 2 uops peform multiple loads
- let ResourceCycles = [1,4,1,1];
+ let ReleaseAtCycles = [1,4,1,1];
}
def: InstRW<[SKLWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
VGATHERDPSrm, VPGATHERDDrm,
@@ -1466,42 +1466,42 @@ def: InstRW<[SKLWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
def SKLWriteResGroupVEX8 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
let Latency = 22;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,8,1,1];
+ let ReleaseAtCycles = [1,8,1,1];
}
def: InstRW<[SKLWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
- let ResourceCycles = [2,1,4,1,1,4,6];
+ let ReleaseAtCycles = [2,1,4,1,1,4,6];
}
def: InstRW<[SKLWriteResGroup198], (instrs CMPXCHG16B)>;
def SKLWriteResGroup202 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 25;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup202], (instregex "DIV_FI(16|32)m")>;
def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 27;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup206], (instregex "DIVR_F(32|64)m")>;
def SKLWriteResGroup208 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 30;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup208], (instregex "DIVR_FI(16|32)m")>;
def SKLWriteResGroup209 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort06,SKLPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,3,4,10];
+ let ReleaseAtCycles = [1,5,3,4,10];
}
def: InstRW<[SKLWriteResGroup209], (instregex "IN(8|16|32)ri",
"IN(8|16|32)rr")>;
@@ -1509,7 +1509,7 @@ def: InstRW<[SKLWriteResGroup209], (instregex "IN(8|16|32)ri",
def SKLWriteResGroup210 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,2,1,4,10];
+ let ReleaseAtCycles = [1,5,2,1,4,10];
}
def: InstRW<[SKLWriteResGroup210], (instregex "OUT(8|16|32)ir",
"OUT(8|16|32)rr")>;
@@ -1517,35 +1517,35 @@ def: InstRW<[SKLWriteResGroup210], (instregex "OUT(8|16|32)ir",
def SKLWriteResGroup211 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> {
let Latency = 37;
let NumMicroOps = 31;
- let ResourceCycles = [1,8,1,21];
+ let ReleaseAtCycles = [1,8,1,21];
}
def: InstRW<[SKLWriteResGroup211], (instregex "XRSTOR(64)?")>;
def SKLWriteResGroup212 : SchedWriteRes<[SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort15,SKLPort0156]> {
let Latency = 40;
let NumMicroOps = 18;
- let ResourceCycles = [1,1,2,3,1,1,1,8];
+ let ReleaseAtCycles = [1,1,2,3,1,1,1,8];
}
def: InstRW<[SKLWriteResGroup212], (instrs VMCLEARm)>;
def SKLWriteResGroup213 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 41;
let NumMicroOps = 39;
- let ResourceCycles = [1,10,1,1,26];
+ let ReleaseAtCycles = [1,10,1,1,26];
}
def: InstRW<[SKLWriteResGroup213], (instrs XSAVE64)>;
def SKLWriteResGroup214 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
let Latency = 42;
let NumMicroOps = 22;
- let ResourceCycles = [2,20];
+ let ReleaseAtCycles = [2,20];
}
def: InstRW<[SKLWriteResGroup214], (instrs RDTSCP)>;
def SKLWriteResGroup215 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 42;
let NumMicroOps = 40;
- let ResourceCycles = [1,11,1,1,26];
+ let ReleaseAtCycles = [1,11,1,1,26];
}
def: InstRW<[SKLWriteResGroup215], (instrs XSAVE)>;
def: InstRW<[SKLWriteResGroup215], (instregex "XSAVEC", "XSAVES")>;
@@ -1553,42 +1553,42 @@ def: InstRW<[SKLWriteResGroup215], (instregex "XSAVEC", "XSAVES")>;
def SKLWriteResGroup216 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 46;
let NumMicroOps = 44;
- let ResourceCycles = [1,11,1,1,30];
+ let ReleaseAtCycles = [1,11,1,1,30];
}
def: InstRW<[SKLWriteResGroup216], (instregex "XSAVEOPT")>;
def SKLWriteResGroup217 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05,SKLPort06,SKLPort0156]> {
let Latency = 62;
let NumMicroOps = 64;
- let ResourceCycles = [2,8,5,10,39];
+ let ReleaseAtCycles = [2,8,5,10,39];
}
def: InstRW<[SKLWriteResGroup217], (instrs FLDENVm)>;
def SKLWriteResGroup218 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 63;
let NumMicroOps = 88;
- let ResourceCycles = [4,4,31,1,2,1,45];
+ let ReleaseAtCycles = [4,4,31,1,2,1,45];
}
def: InstRW<[SKLWriteResGroup218], (instrs FXRSTOR64)>;
def SKLWriteResGroup219 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 63;
let NumMicroOps = 90;
- let ResourceCycles = [4,2,33,1,2,1,47];
+ let ReleaseAtCycles = [4,2,33,1,2,1,47];
}
def: InstRW<[SKLWriteResGroup219], (instrs FXRSTOR)>;
def SKLWriteResGroup220 : SchedWriteRes<[SKLPort5,SKLPort05,SKLPort0156]> {
let Latency = 75;
let NumMicroOps = 15;
- let ResourceCycles = [6,3,6];
+ let ReleaseAtCycles = [6,3,6];
}
def: InstRW<[SKLWriteResGroup220], (instrs FNINIT)>;
def SKLWriteResGroup223 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 106;
let NumMicroOps = 100;
- let ResourceCycles = [9,1,11,16,1,11,21,30];
+ let ReleaseAtCycles = [9,1,11,16,1,11,21,30];
}
def: InstRW<[SKLWriteResGroup223], (instrs FSTENVm)>;
@@ -1658,7 +1658,7 @@ def : InstRW<[SKLWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
def SKLWritePSUB : SchedWriteRes<[SKLPort015]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def SKLWriteVZeroIdiomPSUB : SchedWriteVariant<[
@@ -1677,7 +1677,7 @@ def : InstRW<[SKLWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr,
def SKLWritePCMPGTQ : SchedWriteRes<[SKLPort5]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def SKLWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
@@ -1691,13 +1691,13 @@ def : InstRW<[SKLWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
// CMOVs that use both Z and C flag require an extra uop.
def SKLWriteCMOVA_CMOVBErr : SchedWriteRes<[SKLPort06]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def SKLWriteCMOVA_CMOVBErm : SchedWriteRes<[SKLPort23,SKLPort06]> {
let Latency = 7;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
let NumMicroOps = 3;
}
@@ -1717,13 +1717,13 @@ def : InstRW<[SKLCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
// SETCCs that use both Z and C flag require an extra uop.
def SKLWriteSETA_SETBEr : SchedWriteRes<[SKLPort06]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def SKLWriteSETA_SETBEm : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> {
let Latency = 3;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
let NumMicroOps = 4;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 2f9d075891f8..3da688cda2c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -94,7 +94,7 @@ multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -102,7 +102,7 @@ multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW,
// the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([SKXPort23], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !listconcat([1], Res);
+ let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -425,7 +425,7 @@ defm : SKXWriteResPair<WriteVarVecShiftZ, [SKXPort0], 1, [1], 1, 7>;
def : WriteRes<WriteVecInsert, [SKXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> {
let Latency = 6;
@@ -490,48 +490,48 @@ defm : X86WriteRes<WriteCvtPS2PHZSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort05], 8
def : WriteRes<WritePCmpIStrM, [SKXPort0]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [SKXPort0, SKXPort23]> {
let Latency = 16;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [SKXPort0, SKXPort5, SKXPort015, SKXPort0156]> {
let Latency = 19;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
def : WriteRes<WritePCmpEStrMLd, [SKXPort0, SKXPort5, SKXPort23, SKXPort015, SKXPort0156]> {
let Latency = 25;
let NumMicroOps = 10;
- let ResourceCycles = [4,3,1,1,1];
+ let ReleaseAtCycles = [4,3,1,1,1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [SKXPort0]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : WriteRes<WritePCmpIStrILd, [SKXPort0, SKXPort23]> {
let Latency = 16;
let NumMicroOps = 4;
- let ResourceCycles = [3,1];
+ let ReleaseAtCycles = [3,1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [SKXPort0,SKXPort5,SKXPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [4,3,1];
+ let ReleaseAtCycles = [4,3,1];
}
def : WriteRes<WritePCmpEStrILd, [SKXPort0, SKXPort5, SKXPort23, SKXPort0156]> {
let Latency = 24;
let NumMicroOps = 9;
- let ResourceCycles = [4,3,1,1];
+ let ReleaseAtCycles = [4,3,1,1];
}
// MOVMSK Instructions.
@@ -544,46 +544,46 @@ def : WriteRes<WriteMMXMOVMSK, [SKXPort0]> { let Latency = 2; }
def : WriteRes<WriteAESDecEnc, [SKXPort0]> { // Decryption, encryption.
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : WriteRes<WriteAESDecEncLd, [SKXPort0, SKXPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def : WriteRes<WriteAESIMC, [SKXPort0]> { // InvMixColumn.
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteAESIMCLd, [SKXPort0, SKXPort23]> {
let Latency = 14;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def : WriteRes<WriteAESKeyGen, [SKXPort0,SKXPort5,SKXPort015]> { // Key Generation.
let Latency = 20;
let NumMicroOps = 11;
- let ResourceCycles = [3,6,2];
+ let ReleaseAtCycles = [3,6,2];
}
def : WriteRes<WriteAESKeyGenLd, [SKXPort0,SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 25;
let NumMicroOps = 11;
- let ResourceCycles = [3,6,1,1];
+ let ReleaseAtCycles = [3,6,1,1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [SKXPort5]> {
let Latency = 6;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : WriteRes<WriteCLMulLd, [SKXPort5, SKXPort23]> {
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
// Catch-all for expensive system instructions.
@@ -603,8 +603,8 @@ def : WriteRes<WriteMicrocoded, [SKXPort0156]> { let Latency = 100; } // def Wri
def : WriteRes<WriteFence, [SKXPort23, SKXPort4]>;
// Load/store MXCSR.
-def : WriteRes<WriteLDMXCSR, [SKXPort0,SKXPort23,SKXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-def : WriteRes<WriteSTMXCSR, [SKXPort4,SKXPort5,SKXPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteLDMXCSR, [SKXPort0,SKXPort23,SKXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [SKXPort4,SKXPort5,SKXPort237]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; }
// Nop, not very useful expect it provides a model for nops!
def : WriteRes<WriteNop, []>;
@@ -624,7 +624,7 @@ defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>;
def SKXWriteResGroup1 : SchedWriteRes<[SKXPort0]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
"KANDN(B|D|Q|W)rr",
@@ -652,7 +652,7 @@ def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup3], (instregex "COM(P?)_FST0r",
"KMOV(B|D|Q|W)kr",
@@ -661,35 +661,35 @@ def: InstRW<[SKXWriteResGroup3], (instregex "COM(P?)_FST0r",
def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>;
def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>;
def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
"VBLENDMPS(Z128|Z256)rr",
@@ -707,7 +707,7 @@ def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup10], (instrs SGDT64m,
SIDT64m,
@@ -718,7 +718,7 @@ def: InstRW<[SKXWriteResGroup10], (instrs SGDT64m,
def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> {
let Latency = 1;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
@@ -727,14 +727,14 @@ def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP,
MMX_MOVDQ2Qrr)>;
@@ -742,7 +742,7 @@ def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP,
def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup17], (instrs LFENCE,
WAIT,
@@ -751,21 +751,21 @@ def: InstRW<[SKXWriteResGroup17], (instrs LFENCE,
def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup20], (instregex "CLFLUSH")>;
def SKXWriteResGroup21 : SchedWriteRes<[SKXPort237,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup21], (instrs SFENCE)>;
def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup23], (instrs CWD,
JCXZ, JECXZ, JRCXZ,
@@ -777,21 +777,21 @@ def: InstRW<[SKXWriteResGroup23], (instrs CWD,
def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup25], (instrs FNSTCW16m)>;
def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>;
def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
@@ -800,14 +800,14 @@ def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> {
let Latency = 2;
let NumMicroOps = 5;
- let ResourceCycles = [2,2,1];
+ let ReleaseAtCycles = [2,2,1];
}
def: InstRW<[SKXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>;
def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
"KORTEST(B|D|Q|W)rr",
@@ -816,7 +816,7 @@ def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
@@ -824,7 +824,7 @@ def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
"VALIGND(Z|Z128|Z256)rri",
@@ -835,7 +835,7 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> {
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
"KSHIFTL(B|D|Q|W)ri",
@@ -858,28 +858,28 @@ def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup34], (instrs FNSTSW16r)>;
def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>;
def SKXWriteResGroup38 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>;
def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWrr,
MMX_PACKSSWBrr,
@@ -888,21 +888,21 @@ def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWrr,
def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup42], (instregex "CLD")>;
def SKXWriteResGroup43 : SchedWriteRes<[SKXPort237,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup43], (instrs MFENCE)>;
def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
@@ -910,49 +910,49 @@ def: InstRW<[SKXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
def SKXWriteResGroup44b : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 5;
let NumMicroOps = 8;
- let ResourceCycles = [2,4,2];
+ let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[SKXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
def SKXWriteResGroup44c : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 8;
- let ResourceCycles = [2,4,2];
+ let ReleaseAtCycles = [2,4,2];
}
def: InstRW<[SKXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> {
let Latency = 3;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup45], (instrs FNSTSWm)>;
def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup47], (instregex "CALL(16|32|64)r")>;
def SKXWriteResGroup48 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup48], (instrs CALL64pcrel32)>;
def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> {
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> {
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr",
"VCVTPD2UQQ(Z128|Z256)rr",
@@ -968,7 +968,7 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr",
def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> {
let Latency = 4;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup50z], (instrs VCVTPD2QQZrr,
VCVTPD2UQQZrr,
@@ -982,7 +982,7 @@ def: InstRW<[SKXWriteResGroup50z], (instrs VCVTPD2QQZrr,
def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
"VEXPANDPS(Z|Z128|Z256)rr",
@@ -1008,7 +1008,7 @@ def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 4;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m",
"IST_F(16|32)m",
@@ -1017,28 +1017,28 @@ def: InstRW<[SKXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m",
def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>;
def SKXWriteResGroup56 : SchedWriteRes<[]> {
let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
}
def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>;
def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> {
let Latency = 4;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
}
def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr",
"MMX_CVT(T?)PS2PIrr",
@@ -1069,21 +1069,21 @@ def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr",
def SKXWriteResGroup62 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>;
def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>;
def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort01]> {
let Latency = 5;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
"VCVTPS2PHZ256mr(b?)",
@@ -1092,7 +1092,7 @@ def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
def SKXWriteResGroup66 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 5;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)",
"VPMOVDW(Z|Z128|Z256)mr(b?)",
@@ -1115,21 +1115,21 @@ def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)",
def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
- let ResourceCycles = [1,4];
+ let ReleaseAtCycles = [1,4];
}
def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>;
def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,4];
+ let ReleaseAtCycles = [1,1,4];
}
def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF(16|64)")>;
def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> {
let Latency = 6;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm,
VPBROADCASTDrm,
@@ -1141,7 +1141,7 @@ def: InstRW<[SKXWriteResGroup71], (instregex "(V?)MOVSHDUPrm",
def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>;
def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
@@ -1153,7 +1153,7 @@ def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBrm,
MMX_PADDSWrm,
@@ -1179,7 +1179,7 @@ def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBrm,
def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64m)>;
def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
@@ -1187,7 +1187,7 @@ def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm",
"MOVBE(16|32|64)rm")>;
@@ -1195,7 +1195,7 @@ def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm",
def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
@@ -1203,7 +1203,7 @@ def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;
@@ -1211,7 +1211,7 @@ def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;
def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 6;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
"VCVTSI642SSZrr",
@@ -1220,14 +1220,14 @@ def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
"SHL(8|16|32|64)m(1|i)",
@@ -1236,7 +1236,7 @@ def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1];
}
def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm",
"PUSH(16|32|64)rmm")>;
@@ -1244,18 +1244,18 @@ def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm",
def SKXWriteResGroup88 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 6;
- let ResourceCycles = [1,5];
+ let ReleaseAtCycles = [1,5];
}
def: InstRW<[SKXWriteResGroup88], (instrs STD)>;
def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> {
let Latency = 7;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
-def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128,
- VBROADCASTI128,
+def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128rm,
+ VBROADCASTI128rm,
VBROADCASTSDYrm,
VBROADCASTSSYrm,
VMOVDDUPYrm,
@@ -1267,14 +1267,14 @@ def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128,
def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
"VMOVSSZrm(b?)")>;
@@ -1282,7 +1282,7 @@ def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
def SKXWriteResGroup92a : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm",
"(V?)PMOV(SX|ZX)BQrm",
@@ -1294,7 +1294,7 @@ def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm",
def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort01]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
"VCVTPD2DQ(Y|Z256)rr",
@@ -1313,7 +1313,7 @@ def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
VCVTPD2DQZrr,
@@ -1332,7 +1332,7 @@ def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 7;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
VPBLENDDrmi)>;
@@ -1368,7 +1368,7 @@ def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWrm,
MMX_PACKSSWBrm,
@@ -1377,19 +1377,19 @@ def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWrm,
def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
-def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2W128rr",
- "VPERMI2W256rr",
- "VPERMI2Wrr",
- "VPERMT2W128rr",
- "VPERMT2W256rr",
- "VPERMT2Wrr")>;
+def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2WZ128rr",
+ "VPERMI2WZ256rr",
+ "VPERMI2WZrr",
+ "VPERMT2WZ128rr",
+ "VPERMT2WZ256rr",
+ "VPERMT2WZrr")>;
def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup99], (instrs LEAVE, LEAVE64,
SCASB, SCASL, SCASQ, SCASW)>;
@@ -1397,7 +1397,7 @@ def: InstRW<[SKXWriteResGroup99], (instrs LEAVE, LEAVE64,
def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort01]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr",
"VCVT(T?)SS2USI64Zrr")>;
@@ -1405,28 +1405,28 @@ def: InstRW<[SKXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr",
def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup101], (instrs FLDCW16m)>;
def SKXWriteResGroup103 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>;
def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup104], (instrs LRET64, RET64)>;
def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 7;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
"VCOMPRESSPS(Z|Z128|Z256)mr(b?)",
@@ -1436,7 +1436,7 @@ def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
@@ -1444,7 +1444,7 @@ def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
def SKXWriteResGroup107_1 : SchedWriteRes<[SKXPort06]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
@@ -1452,14 +1452,14 @@ def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SKXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>;
def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,1,1];
+ let ReleaseAtCycles = [1,1,1,1,1];
}
def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64m)>;
@@ -1467,7 +1467,7 @@ def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64m)>;
def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
- let ResourceCycles = [1,2,2,2];
+ let ReleaseAtCycles = [1,2,2,2];
}
def: InstRW<[SKXWriteResGroup110], (instrs VPSCATTERDQZ128mr,
VPSCATTERQQZ128mr,
@@ -1477,14 +1477,14 @@ def: InstRW<[SKXWriteResGroup110], (instrs VPSCATTERDQZ128mr,
def SKXWriteResGroup111 : SchedWriteRes<[SKXPort6,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
- let ResourceCycles = [1,3,1,2];
+ let ReleaseAtCycles = [1,3,1,2];
}
def: InstRW<[SKXWriteResGroup111], (instrs LOOP)>;
def SKXWriteResGroup112 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 11;
- let ResourceCycles = [1,4,4,2];
+ let ReleaseAtCycles = [1,4,4,2];
}
def: InstRW<[SKXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
VPSCATTERQQZ256mr,
@@ -1494,7 +1494,7 @@ def: InstRW<[SKXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 19;
- let ResourceCycles = [1,8,8,2];
+ let ReleaseAtCycles = [1,8,8,2];
}
def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr,
VPSCATTERQQZmr,
@@ -1504,14 +1504,14 @@ def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr,
def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 36;
- let ResourceCycles = [1,16,1,16,2];
+ let ReleaseAtCycles = [1,16,1,16,2];
}
def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
@@ -1519,7 +1519,7 @@ def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm",
def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
"VPBROADCASTB(Z|Z256)rm(b?)",
@@ -1533,7 +1533,7 @@ def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm,
def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
VPBLENDDYrmi)>;
@@ -1592,14 +1592,14 @@ def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
+ let ReleaseAtCycles = [1,2,1];
}
def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
- let ResourceCycles = [1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,2];
}
def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
"RCR(8|16|32|64)m(1|i)")>;
@@ -1607,7 +1607,7 @@ def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 8;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,3];
+ let ReleaseAtCycles = [1,1,1,3];
}
def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
"ROR(8|16|32|64)mCL",
@@ -1618,14 +1618,14 @@ def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
+ let ReleaseAtCycles = [1,1,1,2,1];
}
def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>;
def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 8;
- let ResourceCycles = [1,2,1,2,2];
+ let ReleaseAtCycles = [1,2,1,2,2];
}
def: InstRW<[SKXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
VPSCATTERQDZ256mr,
@@ -1635,7 +1635,7 @@ def: InstRW<[SKXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
def SKXWriteResGroup132 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 12;
- let ResourceCycles = [1,4,1,4,2];
+ let ReleaseAtCycles = [1,4,1,4,2];
}
def: InstRW<[SKXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
VSCATTERDPSZ128mr)>;
@@ -1643,7 +1643,7 @@ def: InstRW<[SKXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
def SKXWriteResGroup133 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 20;
- let ResourceCycles = [1,8,1,8,2];
+ let ReleaseAtCycles = [1,8,1,8,2];
}
def: InstRW<[SKXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
VSCATTERDPSZ256mr)>;
@@ -1651,21 +1651,21 @@ def: InstRW<[SKXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
def SKXWriteResGroup134 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 36;
- let ResourceCycles = [1,16,1,16,2];
+ let ReleaseAtCycles = [1,16,1,16,2];
}
def: InstRW<[SKXWriteResGroup134], (instrs VPSCATTERDDZmr)>;
def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>;
def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm,
VPMOVSXDQYrm,
@@ -1675,14 +1675,14 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
"VFPCLASSSDZrm(b?)",
"VFPCLASSSSZrm(b?)",
"(V?)PCMPGTQrm",
- "VPERMI2D128rm(b?)",
- "VPERMI2PD128rm(b?)",
- "VPERMI2PS128rm(b?)",
- "VPERMI2Q128rm(b?)",
- "VPERMT2D128rm(b?)",
- "VPERMT2PD128rm(b?)",
- "VPERMT2PS128rm(b?)",
- "VPERMT2Q128rm(b?)",
+ "VPERMI2DZ128rm(b?)",
+ "VPERMI2PDZ128rm(b?)",
+ "VPERMI2PSZ128rm(b?)",
+ "VPERMI2QZ128rm(b?)",
+ "VPERMT2DZ128rm(b?)",
+ "VPERMT2PDZ128rm(b?)",
+ "VPERMT2PSZ128rm(b?)",
+ "VPERMT2QZ128rm(b?)",
"VPMAXSQZ128rm(b?)",
"VPMAXUQZ128rm(b?)",
"VPMINSQZ128rm(b?)",
@@ -1691,7 +1691,7 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
"VCMP(SD|SS)Zrm",
@@ -1716,7 +1716,7 @@ def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
"(V?)CVTPS2PDrm")>;
@@ -1724,7 +1724,7 @@ def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
@@ -1732,7 +1732,7 @@ def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm",
def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[SKXWriteResGroup146], (instregex "LAR(16|32|64)rm",
"LSL(16|32|64)rm")>;
@@ -1740,7 +1740,7 @@ def: InstRW<[SKXWriteResGroup146], (instregex "LAR(16|32|64)rm",
def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup148], (instrs VPCMPGTQYrm)>;
def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
@@ -1755,7 +1755,7 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
"VCMPPS(Z|Z256)rm(b?)i",
@@ -1783,7 +1783,7 @@ def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)",
"VCVTDQ2PSZ128rm(b?)",
@@ -1816,7 +1816,7 @@ def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)",
def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 10;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
"VEXPANDPSZ128rm(b?)",
@@ -1826,7 +1826,7 @@ def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let Latency = 10;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm,
VPHSUBSWYrm)>;
@@ -1834,21 +1834,21 @@ def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm,
def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 10;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,1,1,3];
+ let ReleaseAtCycles = [1,1,1,1,1,3];
}
def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m")>;
def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup161], (instrs VCVTDQ2PSYrm,
VCVTPS2PDYrm)>;
@@ -1871,7 +1871,7 @@ def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup162], (instregex "FICOM(P?)(16|32)m",
"VEXPANDPD(Z|Z256)rm(b?)",
@@ -1882,14 +1882,14 @@ def: InstRW<[SKXWriteResGroup162], (instregex "FICOM(P?)(16|32)m",
def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>;
def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> {
let Latency = 11;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2DQrm,
CVTTPD2DQrm,
@@ -1899,14 +1899,14 @@ def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2DQrm,
def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 11;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
- let ResourceCycles = [2,3,2];
+ let ReleaseAtCycles = [2,3,2];
}
def: InstRW<[SKXWriteResGroup169], (instregex "RCL(16|32|64)rCL",
"RCR(16|32|64)rCL")>;
@@ -1914,42 +1914,42 @@ def: InstRW<[SKXWriteResGroup169], (instregex "RCL(16|32|64)rCL",
def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 11;
let NumMicroOps = 9;
- let ResourceCycles = [1,5,1,2];
+ let ReleaseAtCycles = [1,5,1,2];
}
def: InstRW<[SKXWriteResGroup170], (instrs RCL8rCL)>;
def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 11;
let NumMicroOps = 11;
- let ResourceCycles = [2,9];
+ let ReleaseAtCycles = [2,9];
}
def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> {
let Latency = 15;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> {
let Latency = 15;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>;
def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>;
def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
"VCVT(T?)SS2USI64Zrm(b?)")>;
@@ -1957,7 +1957,7 @@ def: InstRW<[SKXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
"VCVT(T?)PS2UQQZrm(b?)")>;
@@ -1965,7 +1965,7 @@ def: InstRW<[SKXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
def SKXWriteResGroup180 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 13;
let NumMicroOps = 3;
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
"VPERMWZ256rm(b?)",
@@ -1974,29 +1974,29 @@ def: InstRW<[SKXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 13;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 13;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
-def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2W128rm(b?)",
- "VPERMT2W128rm(b?)")>;
+def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)",
+ "VPERMT2WZ128rm(b?)")>;
def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 14;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI(16|32)m")>;
def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> {
let Latency = 14;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)",
"VCVTPD2UDQZrm(b?)",
@@ -2008,108 +2008,108 @@ def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)",
def SKXWriteResGroup189 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 14;
let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
+ let ReleaseAtCycles = [2,1,1];
}
-def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2W256rm(b?)",
- "VPERMI2Wrm(b?)",
- "VPERMT2W256rm(b?)",
- "VPERMT2Wrm(b?)")>;
+def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)",
+ "VPERMI2WZrm(b?)",
+ "VPERMT2WZ256rm(b?)",
+ "VPERMT2WZrm(b?)")>;
def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 14;
let NumMicroOps = 10;
- let ResourceCycles = [2,4,1,3];
+ let ReleaseAtCycles = [2,4,1,3];
}
def: InstRW<[SKXWriteResGroup190], (instrs RCR8rCL)>;
def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> {
let Latency = 15;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 15;
let NumMicroOps = 8;
- let ResourceCycles = [1,2,2,1,2];
+ let ReleaseAtCycles = [1,2,2,1,2];
}
def: InstRW<[SKXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>;
def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 15;
let NumMicroOps = 10;
- let ResourceCycles = [1,1,1,5,1,1];
+ let ReleaseAtCycles = [1,1,1,5,1,1];
}
def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 16;
let NumMicroOps = 14;
- let ResourceCycles = [1,1,1,4,2,5];
+ let ReleaseAtCycles = [1,1,1,4,2,5];
}
def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>;
def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> {
let Latency = 12;
let NumMicroOps = 34;
- let ResourceCycles = [1, 4, 5];
+ let ReleaseAtCycles = [1, 4, 5];
}
def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>;
def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
let Latency = 17;
let NumMicroOps = 15;
- let ResourceCycles = [2,1,2,4,2,4];
+ let ReleaseAtCycles = [2,1,2,4,2,4];
}
def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>;
def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 21;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[SKXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>;
def SKXWriteResGroup207 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,5];
+ let ReleaseAtCycles = [1,1,1,5];
}
def: InstRW<[SKXWriteResGroup207], (instrs CPUID, RDTSC)>;
def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 18;
let NumMicroOps = 11;
- let ResourceCycles = [2,1,1,4,1,2];
+ let ReleaseAtCycles = [2,1,1,4,1,2];
}
def: InstRW<[SKXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>;
def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> {
let Latency = 22;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
def SKXWriteResGroup211_1 : SchedWriteRes<[SKXPort23,SKXPort05]> {
let Latency = 22;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[SKXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> {
let Latency = 20;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 17;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
VGATHERDPDZ128rm, VPGATHERDQZ128rm,
@@ -2118,7 +2118,7 @@ def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 19;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,4,1,1];
+ let ReleaseAtCycles = [1,4,1,1];
}
def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
VGATHERQPDZ256rm, VPGATHERQQZ256rm,
@@ -2128,7 +2128,7 @@ def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 21;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,8,1,1];
+ let ReleaseAtCycles = [1,8,1,1];
}
def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
VGATHERDPDZrm, VPGATHERDQZrm,
@@ -2138,35 +2138,35 @@ def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 25;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,16,1,1];
+ let ReleaseAtCycles = [1,16,1,1];
}
def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 20;
let NumMicroOps = 8;
- let ResourceCycles = [1,1,1,1,1,1,2];
+ let ReleaseAtCycles = [1,1,1,1,1,1,2];
}
def: InstRW<[SKXWriteResGroup219], (instrs INSB, INSL, INSW)>;
def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> {
let Latency = 20;
let NumMicroOps = 10;
- let ResourceCycles = [1,2,7];
+ let ReleaseAtCycles = [1,2,7];
}
def: InstRW<[SKXWriteResGroup220], (instrs MWAITrr)>;
def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 22;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
let Latency = 18;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,2,1,1];
+ let ReleaseAtCycles = [1,2,1,1];
}
def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
VGATHERQPDrm, VPGATHERQQrm,
@@ -2175,7 +2175,7 @@ def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
let Latency = 20;
let NumMicroOps = 5; // 2 uops peform multiple loads
- let ResourceCycles = [1,4,1,1];
+ let ReleaseAtCycles = [1,4,1,1];
}
def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
VGATHERDPSrm, VPGATHERDDrm,
@@ -2185,14 +2185,14 @@ def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
let Latency = 22;
let NumMicroOps = 5; // 2 uops perform multiple loads
- let ResourceCycles = [1,8,1,1];
+ let ReleaseAtCycles = [1,8,1,1];
}
def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
let Latency = 22;
let NumMicroOps = 14;
- let ResourceCycles = [5,5,4];
+ let ReleaseAtCycles = [5,5,4];
}
def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
"VPCONFLICTQZ256rr")>;
@@ -2200,42 +2200,42 @@ def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
- let ResourceCycles = [2,1,4,1,1,4,6];
+ let ReleaseAtCycles = [2,1,4,1,1,4,6];
}
def: InstRW<[SKXWriteResGroup228], (instrs CMPXCHG16B)>;
def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 25;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 27;
let NumMicroOps = 2;
- let ResourceCycles = [1,1];
+ let ReleaseAtCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 29;
let NumMicroOps = 15;
- let ResourceCycles = [5,5,1,4];
+ let ReleaseAtCycles = [5,5,1,4];
}
def: InstRW<[SKXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>;
def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 30;
let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
+ let ReleaseAtCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,3,4,10];
+ let ReleaseAtCycles = [1,5,3,4,10];
}
def: InstRW<[SKXWriteResGroup247], (instregex "IN(8|16|32)ri",
"IN(8|16|32)rr")>;
@@ -2243,7 +2243,7 @@ def: InstRW<[SKXWriteResGroup247], (instregex "IN(8|16|32)ri",
def SKXWriteResGroup248 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
- let ResourceCycles = [1,5,2,1,4,10];
+ let ReleaseAtCycles = [1,5,2,1,4,10];
}
def: InstRW<[SKXWriteResGroup248], (instregex "OUT(8|16|32)ir",
"OUT(8|16|32)rr")>;
@@ -2251,7 +2251,7 @@ def: InstRW<[SKXWriteResGroup248], (instregex "OUT(8|16|32)ir",
def SKXWriteResGroup249 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
let Latency = 37;
let NumMicroOps = 21;
- let ResourceCycles = [9,7,5];
+ let ReleaseAtCycles = [9,7,5];
}
def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTDZ256rr",
"VPCONFLICTQZrr")>;
@@ -2259,35 +2259,35 @@ def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTDZ256rr",
def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> {
let Latency = 37;
let NumMicroOps = 31;
- let ResourceCycles = [1,8,1,21];
+ let ReleaseAtCycles = [1,8,1,21];
}
def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>;
def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> {
let Latency = 40;
let NumMicroOps = 18;
- let ResourceCycles = [1,1,2,3,1,1,1,8];
+ let ReleaseAtCycles = [1,1,2,3,1,1,1,8];
}
def: InstRW<[SKXWriteResGroup252], (instrs VMCLEARm)>;
def SKXWriteResGroup253 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 41;
let NumMicroOps = 39;
- let ResourceCycles = [1,10,1,1,26];
+ let ReleaseAtCycles = [1,10,1,1,26];
}
def: InstRW<[SKXWriteResGroup253], (instrs XSAVE64)>;
def SKXWriteResGroup254 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let Latency = 42;
let NumMicroOps = 22;
- let ResourceCycles = [2,20];
+ let ReleaseAtCycles = [2,20];
}
def: InstRW<[SKXWriteResGroup254], (instrs RDTSCP)>;
def SKXWriteResGroup255 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 42;
let NumMicroOps = 40;
- let ResourceCycles = [1,11,1,1,26];
+ let ReleaseAtCycles = [1,11,1,1,26];
}
def: InstRW<[SKXWriteResGroup255], (instrs XSAVE)>;
def: InstRW<[SKXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
@@ -2295,7 +2295,7 @@ def: InstRW<[SKXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
def SKXWriteResGroup256 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 44;
let NumMicroOps = 22;
- let ResourceCycles = [9,7,1,5];
+ let ReleaseAtCycles = [9,7,1,5];
}
def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)",
"VPCONFLICTQZrm(b?)")>;
@@ -2303,56 +2303,56 @@ def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)",
def SKXWriteResGroup258 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05,SKXPort06,SKXPort0156]> {
let Latency = 62;
let NumMicroOps = 64;
- let ResourceCycles = [2,8,5,10,39];
+ let ReleaseAtCycles = [2,8,5,10,39];
}
def: InstRW<[SKXWriteResGroup258], (instrs FLDENVm)>;
def SKXWriteResGroup259 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 63;
let NumMicroOps = 88;
- let ResourceCycles = [4,4,31,1,2,1,45];
+ let ReleaseAtCycles = [4,4,31,1,2,1,45];
}
def: InstRW<[SKXWriteResGroup259], (instrs FXRSTOR64)>;
def SKXWriteResGroup260 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 63;
let NumMicroOps = 90;
- let ResourceCycles = [4,2,33,1,2,1,47];
+ let ReleaseAtCycles = [4,2,33,1,2,1,47];
}
def: InstRW<[SKXWriteResGroup260], (instrs FXRSTOR)>;
def SKXWriteResGroup261 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
let Latency = 67;
let NumMicroOps = 35;
- let ResourceCycles = [17,11,7];
+ let ReleaseAtCycles = [17,11,7];
}
def: InstRW<[SKXWriteResGroup261], (instregex "VPCONFLICTDZrr")>;
def SKXWriteResGroup262 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 74;
let NumMicroOps = 36;
- let ResourceCycles = [17,11,1,7];
+ let ReleaseAtCycles = [17,11,1,7];
}
def: InstRW<[SKXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>;
def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> {
let Latency = 75;
let NumMicroOps = 15;
- let ResourceCycles = [6,3,6];
+ let ReleaseAtCycles = [6,3,6];
}
def: InstRW<[SKXWriteResGroup263], (instrs FNINIT)>;
def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 106;
let NumMicroOps = 100;
- let ResourceCycles = [9,1,11,16,1,11,21,30];
+ let ReleaseAtCycles = [9,1,11,16,1,11,21,30];
}
def: InstRW<[SKXWriteResGroup266], (instrs FSTENVm)>;
def SKXWriteResGroup267 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 140;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>;
@@ -2439,7 +2439,7 @@ def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
def SKXWritePSUB : SchedWriteRes<[SKXPort015]> {
let Latency = 1;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[
@@ -2462,7 +2462,7 @@ def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> {
let Latency = 3;
let NumMicroOps = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
@@ -2476,13 +2476,13 @@ def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
// CMOVs that use both Z and C flag require an extra uop.
def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> {
let Latency = 7;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
let NumMicroOps = 3;
}
@@ -2502,13 +2502,13 @@ def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
// SETCCs that use both Z and C flag require an extra uop.
def SKXWriteSETA_SETBEr : SchedWriteRes<[SKXPort06]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def SKXWriteSETA_SETBEm : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> {
let Latency = 3;
- let ResourceCycles = [1,1,2];
+ let ReleaseAtCycles = [1,1,2];
let NumMicroOps = 4;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td
index 3321ed737a44..faa8be05d179 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td
@@ -27,13 +27,13 @@ def ReadInt2Fpu : SchedRead;
// load + WriteRMW.
def WriteRMW : SchedWrite;
-// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
+// Helper to set SchedWrite ExePorts/Latency/ReleaseAtCycles/NumMicroOps.
multiclass X86WriteRes<SchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res, int UOps> {
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td
index b7336161b2e0..c92bc97cfb38 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -62,14 +62,14 @@ multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant.
def : WriteRes<SchedRW, RRPorts> {
let Latency = RRLat;
- let ResourceCycles = RRRes;
+ let ReleaseAtCycles = RRRes;
let NumMicroOps = RRUOps;
}
// Memory variant.
def : WriteRes<SchedRW.Folded, RMPorts> {
let Latency = RMLat;
- let ResourceCycles = RMRes;
+ let ReleaseAtCycles = RMRes;
let NumMicroOps = RMUOps;
}
}
@@ -122,11 +122,11 @@ defm : X86WriteRes<WriteFCMOV, [AtomPort01], 9, [9], 1>; // x87 conditional move
def : WriteRes<WriteSETCC, [AtomPort01]>;
def : WriteRes<WriteSETCCStore, [AtomPort01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
defm : X86WriteRes<WriteBitTest, [AtomPort1], 1, [1], 1>;
defm : X86WriteRes<WriteBitTestImmLd, [AtomPort0], 1, [1], 1>;
@@ -462,10 +462,10 @@ defm : X86WriteResPairUnsupported<WritePCmpEStrM>;
// MOVMSK Instructions.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteFMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
-def : WriteRes<WriteVecMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def : WriteRes<WriteFMOVMSK, [AtomPort0]> { let Latency = 3; let ReleaseAtCycles = [3]; }
+def : WriteRes<WriteVecMOVMSK, [AtomPort0]> { let Latency = 3; let ReleaseAtCycles = [3]; }
defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
-def : WriteRes<WriteMMXMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def : WriteRes<WriteMMXMOVMSK, [AtomPort0]> { let Latency = 3; let ReleaseAtCycles = [3]; }
////////////////////////////////////////////////////////////////////////////////
// AES instructions.
@@ -505,7 +505,7 @@ defm : X86WriteRes<WriteSTMXCSR, [AtomPort0,AtomPort1], 15, [15,15], 4>;
// Port0
def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : InstRW<[AtomWrite0_1], (instrs XAM_F, LD_Frr,
MOVSX64rr32)>;
@@ -517,14 +517,14 @@ def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|6
// Port1
def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r")>;
def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
let Latency = 5;
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
}
def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSrr, MMX_CVTPI2PSrm,
MMX_CVTPS2PIrr, MMX_CVTTPS2PIrr)>;
@@ -532,7 +532,7 @@ def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSrr, MMX_CVTPI2PSrm,
// Port0 and Port1
def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
let Latency = 1;
- let ResourceCycles = [1, 1];
+ let ReleaseAtCycles = [1, 1];
}
def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
POP16rmr, POP32rmr, POP64rmr,
@@ -546,41 +546,41 @@ def : InstRW<[AtomWrite0_1_1], (instregex "RETI(16|32|64)$",
def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
let Latency = 5;
- let ResourceCycles = [5, 5];
+ let ReleaseAtCycles = [5, 5];
}
def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIrm, MMX_CVTTPS2PIrm)>;
def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
def AtomWrite0_1_7 : SchedWriteRes<[AtomPort0,AtomPort1]> {
let Latency = 7;
- let ResourceCycles = [6,6];
+ let ReleaseAtCycles = [6,6];
}
def : InstRW<[AtomWrite0_1_7], (instregex "CVTSI642SDrm(_Int)?")>;
def AtomWrite0_1_7_4 : SchedWriteRes<[AtomPort0,AtomPort1]> {
let Latency = 7;
- let ResourceCycles = [8,8];
+ let ReleaseAtCycles = [8,8];
let NumMicroOps = 4;
}
def : InstRW<[AtomWrite0_1_7_4], (instregex "CVTSI642SSrr(_Int)?")>;
def AtomWrite0_1_8_4 : SchedWriteRes<[AtomPort0,AtomPort1]> {
let Latency = 8;
- let ResourceCycles = [8,8];
+ let ReleaseAtCycles = [8,8];
let NumMicroOps = 4;
}
def : InstRW<[AtomWrite0_1_7_4], (instregex "CVTSI642SSrm(_Int)?")>;
def AtomWrite0_1_9 : SchedWriteRes<[AtomPort0,AtomPort1]> {
let Latency = 9;
- let ResourceCycles = [9,9];
+ let ReleaseAtCycles = [9,9];
let NumMicroOps = 4;
}
def : InstRW<[AtomWrite0_1_9], (instregex "CVT(T)?SS2SI64rr(_Int)?")>;
def AtomWrite0_1_10 : SchedWriteRes<[AtomPort0,AtomPort1]> {
let Latency = 10;
- let ResourceCycles = [11,11];
+ let ReleaseAtCycles = [11,11];
let NumMicroOps = 5;
}
def : InstRW<[AtomWrite0_1_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
@@ -588,7 +588,7 @@ def : InstRW<[AtomWrite0_1_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
// Port0 or Port1
def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
}
def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
LFENCE,
@@ -597,7 +597,7 @@ def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
PUSH16rmm, PUSH32rmm, PUSH64rmm,
@@ -614,7 +614,7 @@ def : SchedAlias<WriteBitTestSetImmRMW, AtomWrite01_2>;
def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
let Latency = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm,
CMPSB, CMPSL, CMPSQ, CMPSW,
@@ -630,7 +630,7 @@ def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
let Latency = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
JCXZ, JECXZ, JRCXZ,
@@ -640,14 +640,14 @@ def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
let Latency = 5;
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
}
def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m)>;
def : InstRW<[AtomWrite01_5], (instregex "MMX_PH(ADD|SUB)S?Wrr")>;
def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {
let Latency = 6;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
}
def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT,
SHLD16rrCL, SHRD16rrCL,
@@ -659,13 +659,13 @@ def : InstRW<[AtomWrite01_6], (instregex "IST_F(P)?(16|32|64)?m",
def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
let Latency = 7;
- let ResourceCycles = [7];
+ let ReleaseAtCycles = [7];
}
def : InstRW<[AtomWrite01_7], (instrs AAD8i8)>;
def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
let Latency = 8;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
def : InstRW<[AtomWrite01_8], (instrs LOOPE,
PUSHA16, PUSHA32,
@@ -674,7 +674,7 @@ def : InstRW<[AtomWrite01_8], (instrs LOOPE,
def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
let Latency = 9;
- let ResourceCycles = [9];
+ let ReleaseAtCycles = [9];
}
def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
PUSHF16, PUSHF32, PUSHF64,
@@ -686,256 +686,256 @@ def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F")>;
def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
let Latency = 10;
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
}
def : SchedAlias<WriteFLDC, AtomWrite01_10>;
def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let Latency = 11;
- let ResourceCycles = [11];
+ let ReleaseAtCycles = [11];
}
def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
def : SchedAlias<WriteBitTestSetRegRMW, AtomWrite01_11>;
def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
let Latency = 13;
- let ResourceCycles = [13];
+ let ReleaseAtCycles = [13];
}
def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>;
def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
let Latency = 14;
- let ResourceCycles = [14];
+ let ReleaseAtCycles = [14];
}
def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
let Latency = 17;
- let ResourceCycles = [17];
+ let ReleaseAtCycles = [17];
}
def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>;
def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> {
let Latency = 18;
- let ResourceCycles = [18];
+ let ReleaseAtCycles = [18];
}
def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>;
def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> {
let Latency = 20;
- let ResourceCycles = [20];
+ let ReleaseAtCycles = [20];
}
def : InstRW<[AtomWrite01_20], (instrs DAS)>;
def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> {
let Latency = 21;
- let ResourceCycles = [21];
+ let ReleaseAtCycles = [21];
}
def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>;
def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> {
let Latency = 22;
- let ResourceCycles = [22];
+ let ReleaseAtCycles = [22];
}
def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>;
def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> {
let Latency = 23;
- let ResourceCycles = [23];
+ let ReleaseAtCycles = [23];
}
def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>;
def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> {
let Latency = 25;
- let ResourceCycles = [25];
+ let ReleaseAtCycles = [25];
}
def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>;
def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> {
let Latency = 26;
- let ResourceCycles = [26];
+ let ReleaseAtCycles = [26];
}
def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>;
def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> {
let Latency = 29;
- let ResourceCycles = [29];
+ let ReleaseAtCycles = [29];
}
def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>;
def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> {
let Latency = 30;
- let ResourceCycles = [30];
+ let ReleaseAtCycles = [30];
}
def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>;
def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> {
let Latency = 32;
- let ResourceCycles = [32];
+ let ReleaseAtCycles = [32];
}
def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>;
def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
let Latency = 45;
- let ResourceCycles = [45];
+ let ReleaseAtCycles = [45];
}
def : InstRW<[AtomWrite01_45], (instrs MONITOR32rrr, MONITOR64rrr)>;
def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
let Latency = 46;
- let ResourceCycles = [46];
+ let ReleaseAtCycles = [46];
}
def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>;
def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> {
let Latency = 48;
- let ResourceCycles = [48];
+ let ReleaseAtCycles = [48];
}
def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>;
def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> {
let Latency = 55;
- let ResourceCycles = [55];
+ let ReleaseAtCycles = [55];
}
def : InstRW<[AtomWrite01_55], (instrs FPREM)>;
def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
let Latency = 59;
- let ResourceCycles = [59];
+ let ReleaseAtCycles = [59];
}
def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
let Latency = 63;
- let ResourceCycles = [63];
+ let ReleaseAtCycles = [63];
}
def : InstRW<[AtomWrite01_63], (instrs FNINIT)>;
def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
let Latency = 68;
- let ResourceCycles = [68];
+ let ReleaseAtCycles = [68];
}
def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
let Latency = 71;
- let ResourceCycles = [71];
+ let ReleaseAtCycles = [71];
}
def : InstRW<[AtomWrite01_71], (instrs FPREM1,
INVLPG, INVLPGA32, INVLPGA64)>;
def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
let Latency = 72;
- let ResourceCycles = [72];
+ let ReleaseAtCycles = [72];
}
def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>;
def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> {
let Latency = 74;
- let ResourceCycles = [74];
+ let ReleaseAtCycles = [74];
}
def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>;
def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> {
let Latency = 77;
- let ResourceCycles = [77];
+ let ReleaseAtCycles = [77];
}
def : InstRW<[AtomWrite01_77], (instrs FSCALE)>;
def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> {
let Latency = 78;
- let ResourceCycles = [78];
+ let ReleaseAtCycles = [78];
}
def : InstRW<[AtomWrite01_78], (instrs RDMSR)>;
def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
let Latency = 79;
- let ResourceCycles = [79];
+ let ReleaseAtCycles = [79];
}
def : InstRW<[AtomWrite01_79], (instregex "RET(16|32|64)?$",
"LRETI?(16|32|64)")>;
def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
let Latency = 92;
- let ResourceCycles = [92];
+ let ReleaseAtCycles = [92];
}
def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>;
def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> {
let Latency = 94;
- let ResourceCycles = [94];
+ let ReleaseAtCycles = [94];
}
def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>;
def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> {
let Latency = 99;
- let ResourceCycles = [99];
+ let ReleaseAtCycles = [99];
}
def : InstRW<[AtomWrite01_99], (instrs F2XM1)>;
def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
let Latency = 121;
- let ResourceCycles = [121];
+ let ReleaseAtCycles = [121];
}
def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
let Latency = 127;
- let ResourceCycles = [127];
+ let ReleaseAtCycles = [127];
}
def : InstRW<[AtomWrite01_127], (instrs INT)>;
def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> {
let Latency = 130;
- let ResourceCycles = [130];
+ let ReleaseAtCycles = [130];
}
def : InstRW<[AtomWrite01_130], (instrs INT3)>;
def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> {
let Latency = 140;
- let ResourceCycles = [140];
+ let ReleaseAtCycles = [140];
}
def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>;
def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> {
let Latency = 141;
- let ResourceCycles = [141];
+ let ReleaseAtCycles = [141];
}
def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>;
def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> {
let Latency = 146;
- let ResourceCycles = [146];
+ let ReleaseAtCycles = [146];
}
def : InstRW<[AtomWrite01_146], (instrs FYL2X)>;
def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> {
let Latency = 147;
- let ResourceCycles = [147];
+ let ReleaseAtCycles = [147];
}
def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>;
def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> {
let Latency = 168;
- let ResourceCycles = [168];
+ let ReleaseAtCycles = [168];
}
def : InstRW<[AtomWrite01_168], (instrs FPTAN)>;
def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
let Latency = 174;
- let ResourceCycles = [174];
+ let ReleaseAtCycles = [174];
}
def : InstRW<[AtomWrite01_174], (instrs FSINCOS, FSIN, FCOS)>;
def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
let Latency = 183;
- let ResourceCycles = [183];
+ let ReleaseAtCycles = [183];
}
def : InstRW<[AtomWrite01_183], (instrs FPATAN)>;
def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> {
let Latency = 202;
- let ResourceCycles = [202];
+ let ReleaseAtCycles = [202];
}
def : InstRW<[AtomWrite01_202], (instrs WRMSR)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index c8dafcdeebd7..c9749979576f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -192,7 +192,7 @@ multiclass PdWriteRes<SchedWrite SchedRW,
list<int> Res = [], int UOps = 1> {
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
}
@@ -265,16 +265,16 @@ def : WriteRes<WriteRMW, [PdStore]>;
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ResourceCycles = [2]; }
+def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ReleaseAtCycles = [2]; }
def : WriteRes<WriteStore, [PdStore]>;
def : WriteRes<WriteStoreNT, [PdStore]>;
-def : WriteRes<WriteMove, [PdEX01]> { let ResourceCycles = [2]; }
+def : WriteRes<WriteMove, [PdEX01]> { let ReleaseAtCycles = [2]; }
defm : X86WriteResUnsupported<WriteVecMaskedGatherWriteback>;
// Load/store MXCSR.
// FIXME: These are copy and pasted from WriteLoad/Store.
def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; }
-def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ResourceCycles = [18]; }
+def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ReleaseAtCycles = [18]; }
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
@@ -308,7 +308,7 @@ def : InstRW<[PdWriteXLAT], (instrs XLAT)>;
def PdWriteLARrr : SchedWriteRes<[PdEX01]> {
let Latency = 184;
- let ResourceCycles = [375];
+ let ReleaseAtCycles = [375];
let NumMicroOps = 45;
}
def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
@@ -316,7 +316,7 @@ def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
// Nops don't have dependencies, so there's no actual latency, but we set this
// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
-def : WriteRes<WriteNop, [PdEX01]> { let ResourceCycles = [2]; }
+def : WriteRes<WriteNop, [PdEX01]> { let ReleaseAtCycles = [2]; }
////////////////////////////////////////////////////////////////////////////////
// Arithmetic.
@@ -326,21 +326,21 @@ defm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>;
def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> {
let Latency = 6;
- let ResourceCycles = [3, 2, 1];
+ let ReleaseAtCycles = [3, 2, 1];
let NumMicroOps = 1;
}
def : SchedAlias<WriteALURMW, PdWriteALURMW>;
def PdWriteLXADD : SchedWriteRes<[PdEX01]> {
let Latency = 6;
- let ResourceCycles = [88];
+ let ReleaseAtCycles = [88];
let NumMicroOps = 4;
}
def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>;
def PdWriteBMI1 : SchedWriteRes<[PdEX01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteBMI1],
@@ -352,7 +352,7 @@ def : InstRW<[PdWriteBMI1],
def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> {
let Latency = 6;
- let ResourceCycles = [3, 3];
+ let ReleaseAtCycles = [3, 3];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteBMI1m],
@@ -365,7 +365,7 @@ def : InstRW<[PdWriteBMI1m],
defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>;
def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
}
def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>;
@@ -377,21 +377,21 @@ defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [],
def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> {
let Latency = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let NumMicroOps = 3;
}
def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> {
let Latency = 3;
- let ResourceCycles = [23];
+ let ReleaseAtCycles = [23];
let NumMicroOps = 5;
}
def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>;
def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> {
let Latency = 3;
- let ResourceCycles = [21];
+ let ReleaseAtCycles = [21];
let NumMicroOps = 6;
}
def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
@@ -399,21 +399,21 @@ def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> {
let Latency = 3;
- let ResourceCycles = [26];
+ let ReleaseAtCycles = [26];
let NumMicroOps = 18;
}
def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> {
let Latency = 3;
- let ResourceCycles = [69];
+ let ReleaseAtCycles = [69];
let NumMicroOps = 22;
}
def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
def PdWriteXADDm : SchedWriteRes<[PdEX1]> {
let Latency = 6;
- let ResourceCycles = [20];
+ let ReleaseAtCycles = [20];
let NumMicroOps = 4;
}
def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>;
@@ -449,21 +449,21 @@ defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4],
def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
let Latency = 5;
- let ResourceCycles = [10];
+ let ReleaseAtCycles = [10];
let NumMicroOps = 5;
}
def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>;
def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> {
let Latency = 6;
- let ResourceCycles = [12];
+ let ReleaseAtCycles = [12];
let NumMicroOps = 7;
}
def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>;
def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> {
let Latency = 10;
- let ResourceCycles = [17];
+ let ReleaseAtCycles = [17];
let NumMicroOps = 11;
}
def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>;
@@ -472,7 +472,7 @@ defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move.
def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> {
let Latency = 5;
- let ResourceCycles = [3, 3];
+ let ReleaseAtCycles = [3, 3];
let NumMicroOps = 2;
}
@@ -494,7 +494,7 @@ def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>;
def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> {
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
@@ -511,14 +511,14 @@ defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>;
def PdWriteLAHF : SchedWriteRes<[PdEX01]> {
let Latency = 2;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 4;
}
def : InstRW<[PdWriteLAHF], (instrs LAHF)>;
def PdWriteSAHF : SchedWriteRes<[PdEX01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteSAHF], (instrs SAHF)>;
@@ -532,24 +532,24 @@ defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>;
def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> {
let Latency = 7;
- let ResourceCycles = [42, 1];
+ let ReleaseAtCycles = [42, 1];
let NumMicroOps = 4;
}
def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>;
def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> {
let Latency = 7;
- let ResourceCycles = [44, 1];
+ let ReleaseAtCycles = [44, 1];
let NumMicroOps = 10;
}
def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>;
// This is for simple LEAs with one or two input operands.
-def : WriteRes<WriteLEA, [PdEX01]> { let ResourceCycles = [2]; }
+def : WriteRes<WriteLEA, [PdEX01]> { let ReleaseAtCycles = [2]; }
// This write is used for slow LEA instructions.
def PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset),
@@ -574,7 +574,7 @@ def PdWriteLEA : SchedWriteVariant<[
def : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
def PdWriteLEA16r : SchedWriteRes<[PdEX01]> {
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteLEA16r], (instrs LEA16r)>;
@@ -593,14 +593,14 @@ defm : PdWriteResExPair<WriteBZHI, [PdEX01]>;
def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> {
let Latency = 2;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>;
def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> {
let Latency = 2;
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>;
@@ -616,56 +616,56 @@ defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>;
def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> {
let Latency = 12;
- let ResourceCycles = [24];
+ let ReleaseAtCycles = [24];
let NumMicroOps = 26;
}
def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>;
def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> {
let Latency = 12;
- let ResourceCycles = [23];
+ let ReleaseAtCycles = [23];
let NumMicroOps = 23;
}
def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>;
def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> {
let Latency = 11;
- let ResourceCycles = [22];
+ let ReleaseAtCycles = [22];
let NumMicroOps = 24;
}
def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>;
def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> {
let Latency = 10;
- let ResourceCycles = [20];
+ let ReleaseAtCycles = [20];
let NumMicroOps = 22;
}
def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>;
def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> {
let Latency = 10;
- let ResourceCycles = [19];
+ let ReleaseAtCycles = [19];
let NumMicroOps = 19;
}
def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> {
let Latency = 7;
- let ResourceCycles = [14];
+ let ReleaseAtCycles = [14];
let NumMicroOps = 17;
}
def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>;
def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> {
let Latency = 7;
- let ResourceCycles = [13];
+ let ReleaseAtCycles = [13];
let NumMicroOps = 16;
}
def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>;
def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> {
let Latency = 7;
- let ResourceCycles = [14];
+ let ReleaseAtCycles = [14];
let NumMicroOps = 15;
}
def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
@@ -673,28 +673,28 @@ def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> {
let Latency = 9;
- let ResourceCycles = [18];
+ let ReleaseAtCycles = [18];
let NumMicroOps = 20;
}
def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>;
def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> {
let Latency = 11;
- let ResourceCycles = [21];
+ let ReleaseAtCycles = [21];
let NumMicroOps = 21;
}
def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>;
def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> {
let Latency = 8;
- let ResourceCycles = [15];
+ let ReleaseAtCycles = [15];
let NumMicroOps = 16;
}
def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>;
def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> {
let Latency = 13;
- let ResourceCycles = [25];
+ let ReleaseAtCycles = [25];
let NumMicroOps = 25;
}
def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
@@ -705,7 +705,7 @@ defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>;
def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
let Latency = 3;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
let NumMicroOps = 7;
}
def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL,
@@ -736,7 +736,7 @@ defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3
def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> {
let Latency = 2;
- let ResourceCycles = [1, 3, 1];
+ let ReleaseAtCycles = [1, 3, 1];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>;
@@ -769,7 +769,7 @@ defm : X86WriteResPairUnsupported<WriteFAddZ>;
def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
let Latency = 5;
- let ResourceCycles = [3, 1, 10];
+ let ReleaseAtCycles = [3, 1, 10];
}
def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m,
SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m,
@@ -808,7 +808,7 @@ defm : X86WriteResPairUnsupported<WriteFMulZ>;
def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> {
let Latency = 5;
- let ResourceCycles = [3, 1, 10];
+ let ReleaseAtCycles = [3, 1, 10];
}
def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>;
@@ -829,7 +829,7 @@ defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or
def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
let Latency = 27;
- let ResourceCycles = [1, 14];
+ let ReleaseAtCycles = [1, 14];
let NumMicroOps = 17;
}
def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>;
@@ -851,7 +851,7 @@ defm : X86WriteResPairUnsupported<WriteFDivZ>;
def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
let Latency = 9;
- let ResourceCycles = [3, 1, 18];
+ let ReleaseAtCycles = [3, 1, 18];
}
def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m,
DIVR_FI16m, DIVR_FI32m,
@@ -882,21 +882,21 @@ defm : X86WriteResPairUnsupported<WriteFRndZ>;
def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 10;
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>;
def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 10;
- let ResourceCycles = [10, 1];
+ let ReleaseAtCycles = [10, 1];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>;
def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 15;
- let ResourceCycles = [2, 1];
+ let ReleaseAtCycles = [2, 1];
let NumMicroOps = 3;
}
def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
@@ -904,14 +904,14 @@ def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 10;
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let NumMicroOps = 4;
}
def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>;
def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 15;
- let ResourceCycles = [4, 1];
+ let ReleaseAtCycles = [4, 1];
let NumMicroOps = 8;
}
def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>;
@@ -930,10 +930,10 @@ defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 7;
- let ResourceCycles = [1, 3];
+ let ReleaseAtCycles = [1, 3];
let NumMicroOps = 2;
}
-def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>;
+def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128rm)>;
defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU1, PdFPXBR], 3>;
defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU1, PdFPXBR], 3, [2, 2], 2>;
@@ -952,27 +952,27 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>;
def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 7;
- let ResourceCycles = [1, 4];
+ let ReleaseAtCycles = [1, 4];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>;
def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 4;
- let ResourceCycles = [1, 6];
+ let ReleaseAtCycles = [1, 6];
let NumMicroOps = 8;
}
def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>;
def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 8; // 4 + 4
- let ResourceCycles = [1, 8];
+ let ReleaseAtCycles = [1, 8];
let NumMicroOps = 10;
}
def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
@@ -1012,7 +1012,7 @@ defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
let Latency = 13;
- let ResourceCycles = [1, 3, 1];
+ let ReleaseAtCycles = [1, 3, 1];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>;
@@ -1161,13 +1161,13 @@ defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
def PdWriteVPPERM : SchedWriteRes<[PdFPU1, PdFPXBR]> {
let Latency = 2;
- let ResourceCycles = [1, 1];
+ let ReleaseAtCycles = [1, 1];
}
def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>;
def PdWriteVPPERMLd : SchedWriteRes<[PdFPU1, PdFPXBR, PdLoad]> {
let Latency = 7;
- let ResourceCycles = [1, 1, 3];
+ let ReleaseAtCycles = [1, 1, 3];
}
def : InstRW<[PdWriteVPPERMLd], (instrs VPPERMrrm, VPPERMrmr)>;
@@ -1208,7 +1208,7 @@ defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1],
def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
let Latency = 3;
- let ResourceCycles = [1, 3];
+ let ReleaseAtCycles = [1, 3];
}
def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
@@ -1277,7 +1277,7 @@ defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>;
def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> {
let Latency = 12;
- let ResourceCycles = [1, 7];
+ let ReleaseAtCycles = [1, 7];
let NumMicroOps = 6;
}
def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
@@ -1288,13 +1288,13 @@ def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
let Latency = 3;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>;
def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> {
let Latency = 3;
- let ResourceCycles = [1, 3];
+ let ReleaseAtCycles = [1, 3];
}
def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>;
@@ -1304,7 +1304,7 @@ def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>;
def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> {
let Latency = 6;
- let ResourceCycles = [1, 2, 4];
+ let ReleaseAtCycles = [1, 2, 4];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 222d65ca5971..9cba933e82b0 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -125,7 +125,7 @@ multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -133,7 +133,7 @@ multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
// latency.
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 3);
- let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+ let ReleaseAtCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -145,7 +145,7 @@ multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -153,7 +153,7 @@ multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
// latency.
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
- let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+ let ReleaseAtCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -165,7 +165,7 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -173,7 +173,7 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
// latency.
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
- let ResourceCycles = !listconcat([2], Res);
+ let ReleaseAtCycles = !listconcat([2], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -313,49 +313,49 @@ def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
def JWriteCMPXCHG8rr : SchedWriteRes<[JALU01]> {
let Latency = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let NumMicroOps = 3;
}
def JWriteLOCK_CMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
let Latency = 16;
- let ResourceCycles = [3,16,16];
+ let ReleaseAtCycles = [3,16,16];
let NumMicroOps = 5;
}
def JWriteLOCK_CMPXCHGrm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
let Latency = 17;
- let ResourceCycles = [3,17,17];
+ let ReleaseAtCycles = [3,17,17];
let NumMicroOps = 6;
}
def JWriteCMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
let Latency = 11;
- let ResourceCycles = [3,1,1];
+ let ReleaseAtCycles = [3,1,1];
let NumMicroOps = 5;
}
def JWriteCMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
let Latency = 11;
- let ResourceCycles = [3,1,1];
+ let ReleaseAtCycles = [3,1,1];
let NumMicroOps = 18;
}
def JWriteCMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
let Latency = 32;
- let ResourceCycles = [6,1,1];
+ let ReleaseAtCycles = [6,1,1];
let NumMicroOps = 28;
}
def JWriteLOCK_CMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
let Latency = 19;
- let ResourceCycles = [3,19,19];
+ let ReleaseAtCycles = [3,19,19];
let NumMicroOps = 18;
}
def JWriteLOCK_CMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
let Latency = 38;
- let ResourceCycles = [6,38,38];
+ let ReleaseAtCycles = [6,38,38];
let NumMicroOps = 28;
}
@@ -394,7 +394,7 @@ def : InstRW<[JWriteCMPXCHGVariant,
def JWriteLOCK_ALURMW : SchedWriteRes<[JALU01, JLAGU, JSAGU]> {
let Latency = 19;
- let ResourceCycles = [1,19,19];
+ let ReleaseAtCycles = [1,19,19];
let NumMicroOps = 1;
}
@@ -409,7 +409,7 @@ def : InstRW<[JWriteLOCK_ALURMWVariant], (instrs INC8m, INC16m, INC32m, INC64m,
def JWriteXCHG8rr_XADDrr : SchedWriteRes<[JALU01]> {
let Latency = 2;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let NumMicroOps = 3;
}
def : InstRW<[JWriteXCHG8rr_XADDrr], (instrs XCHG8rr, XADD8rr, XADD16rr,
@@ -434,7 +434,7 @@ def : InstRW<[JWriteXCHG8rr_XADDrr], (instrs XCHG8rr, XADD8rr, XADD16rr,
// latency is assumed to be 3cy.
def JWriteXADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
let Latency = 3; // load-to-use latency
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let NumMicroOps = 3;
}
@@ -451,7 +451,7 @@ def JWriteXADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
// execution. This write is used to specifically set that operand latency.
def JWriteLOCK_XADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
let Latency = 11;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let NumMicroOps = 3;
}
@@ -463,19 +463,19 @@ def JWriteLOCK_XADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
// set the instruction latency to 16cy.
def JWriteXCHGrm_XCHG_Part : SchedWriteRes<[JALU01]> {
let Latency = 11;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def JWriteXADDrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> {
let Latency = 11;
- let ResourceCycles = [1, 1];
+ let ReleaseAtCycles = [1, 1];
let NumMicroOps = 1;
}
def JWriteXCHGrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> {
let Latency = 16;
- let ResourceCycles = [16, 16];
+ let ReleaseAtCycles = [16, 16];
let NumMicroOps = 1;
}
@@ -798,7 +798,7 @@ defm : JWriteResFpuPair<WriteCLMul, [JFPU0, JVIMUL], 2>;
def JWriteINSERTQ: SchedWriteRes<[JFPU01, JVALU]> {
let Latency = 2;
- let ResourceCycles = [1, 4];
+ let ReleaseAtCycles = [1, 4];
}
def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
@@ -811,12 +811,12 @@ def : InstRW<[JWriteVecExtractF128], (instrs VEXTRACTF128rr)>;
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
- let ResourceCycles = [1, 2, 4];
+ let ReleaseAtCycles = [1, 2, 4];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVBROADCASTYLd], (instrs VBROADCASTSDYrm,
VBROADCASTSSYrm,
- VBROADCASTF128)>;
+ VBROADCASTF128rm)>;
def JWriteJVZEROALL: SchedWriteRes<[]> {
let Latency = 90;
@@ -836,7 +836,7 @@ def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>;
def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> {
let Latency = 34;
- let ResourceCycles = [1, 1, 2, 2, 2, 16, 42];
+ let ReleaseAtCycles = [1, 1, 2, 2, 2, 16, 42];
let NumMicroOps = 63;
}
def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64,
@@ -960,7 +960,7 @@ def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
def JSlowLEA16r : SchedWriteRes<[JALU01]> {
let Latency = 3;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
}
def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td
index ff50c6dfc621..d5fa9f7f9628 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -66,7 +66,7 @@ multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -74,7 +74,7 @@ multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
// the latency (default = 3).
def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !listconcat([1], Res);
+ let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -139,7 +139,7 @@ defm : X86WriteRes<WriteFCMOV, [SLM_FPC_RSV1], 3, [1], 1>; // x87 conditional mo
def : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>;
def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
// FIXME Latency and NumMicrOps?
- let ResourceCycles = [2,1];
+ let ReleaseAtCycles = [2,1];
}
defm : X86WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteBitTest, [SLM_IEC_RSV0, SLM_IEC_RSV1], 1, [1,1], 1>;
@@ -413,7 +413,7 @@ def : WriteRes<WriteVecExtract, [SLM_FPC_RSV0]> {
def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 4;
let NumMicroOps = 5;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
////////////////////////////////////////////////////////////////////////////////
@@ -465,7 +465,7 @@ def : WriteRes<WriteNop, []>;
def SLMWriteResGroup1rr : SchedWriteRes<[SLM_FPC_RSV01]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
}
def: InstRW<[SLMWriteResGroup1rr], (instrs MMX_PADDQrr, PADDQrr,
MMX_PSUBQrr, PSUBQrr,
@@ -474,14 +474,14 @@ def: InstRW<[SLMWriteResGroup1rr], (instrs MMX_PADDQrr, PADDQrr,
def SLMWriteResGroup2rr : SchedWriteRes<[SLM_FPC_RSV0]> {
let Latency = 5;
let NumMicroOps = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def: InstRW<[SLMWriteResGroup2rr], (instrs PCMPGTQrr)>;
def SLMWriteResGroup1rm : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV01]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,8];
+ let ReleaseAtCycles = [1,8];
}
def: InstRW<[SLMWriteResGroup1rm], (instrs MMX_PADDQrm, PADDQrm,
@@ -491,7 +491,7 @@ def: InstRW<[SLMWriteResGroup1rm], (instrs MMX_PADDQrm, PADDQrm,
def SLMWriteResGroup2rm : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def: InstRW<[SLMWriteResGroup2rm], (instrs PCMPGTQrm)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 0e001638d03d..7ee9eadf8439 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -136,7 +136,7 @@ multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant takes 1-cycle on Execution Port.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -144,7 +144,7 @@ multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
// adds LoadLat cycles to the latency (default = 4).
def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+ let ReleaseAtCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -157,7 +157,7 @@ multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
// Register variant takes 1-cycle on Execution Port.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -165,7 +165,7 @@ multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
// adds LoadLat cycles to the latency (default = 7).
def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+ let ReleaseAtCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -455,12 +455,12 @@ defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> {
let Latency = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2, 3];
+ let ReleaseAtCycles = [1, 2, 3];
}
// MOVMSK Instructions.
@@ -471,7 +471,7 @@ def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
def : WriteRes<WriteVecMOVMSKY, [ZnFPU2]> {
let NumMicroOps = 2;
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// AES Instructions.
@@ -869,7 +869,7 @@ def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]>
{
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
// FICOM(P).
@@ -910,12 +910,12 @@ def : InstRW<[ZnWriteFPU01Y], (instrs VPBLENDDYrri)>;
def ZnWriteFPU01Op2 : SchedWriteRes<[ZnAGU, ZnFPU01]> {
let NumMicroOps = 2;
let Latency = 8;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def ZnWriteFPU01Op2Y : SchedWriteRes<[ZnAGU, ZnFPU01]> {
let NumMicroOps = 2;
let Latency = 9;
- let ResourceCycles = [1, 3];
+ let ReleaseAtCycles = [1, 3];
}
def : InstRW<[ZnWriteFPU01Op2], (instrs VPBLENDDrmi)>;
def : InstRW<[ZnWriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
@@ -938,7 +938,7 @@ def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : InstRW<[ZnWriteVPBROADCAST128Ld],
(instregex "VPBROADCAST(B|W)rm")>;
@@ -947,7 +947,7 @@ def : InstRW<[ZnWriteVPBROADCAST128Ld],
def ZnWriteVPBROADCAST256Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : InstRW<[ZnWriteVPBROADCAST256Ld],
(instregex "VPBROADCAST(B|W)Yrm")>;
@@ -977,7 +977,7 @@ def ZnWritePCMPGTQm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
def ZnWritePCMPGTQYm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
@@ -996,22 +996,22 @@ def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
let Latency = 8;
}
// VBROADCASTF128 / VBROADCASTI128.
-def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128,
- VBROADCASTI128)>;
+def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128rm,
+ VBROADCASTI128rm)>;
// EXTRACTPS.
// r32,x,i.
def ZnWriteEXTRACTPSr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [5, 1, 2];
+ let ReleaseAtCycles = [5, 1, 2];
}
// m32,x,i.
def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
@@ -1027,12 +1027,12 @@ def : InstRW<[ZnWriteFPU013m], (instrs VEXTRACTF128mr,
def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
let Latency = 9;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
// VINSERTF128 / VINSERTI128.
// y,y,x,i.
@@ -1051,7 +1051,7 @@ def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// CVTPD2PS.
@@ -1072,7 +1072,7 @@ def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>;
def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
// z,m512
@@ -1121,7 +1121,7 @@ def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>;
def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let Latency = 11;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>;
@@ -1243,13 +1243,13 @@ def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
// x,x.
def ZnWriteSHA1MSG1r : SchedWriteRes<[ZnFPU12]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
def : InstRW<[ZnWriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
// x,m.
def ZnWriteSHA1MSG1Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
let Latency = 9;
- let ResourceCycles = [1,2];
+ let ReleaseAtCycles = [1,2];
}
def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 117885406a0a..c0775847798d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -135,7 +135,7 @@ multiclass Zn2WriteResPair<X86FoldableSchedWrite SchedRW,
// Register variant takes 1-cycle on Execution Port.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -143,7 +143,7 @@ multiclass Zn2WriteResPair<X86FoldableSchedWrite SchedRW,
// adds LoadLat cycles to the latency (default = 4).
def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+ let ReleaseAtCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -156,7 +156,7 @@ multiclass Zn2WriteResFpuPair<X86FoldableSchedWrite SchedRW,
// Register variant takes 1-cycle on Execution Port.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
@@ -164,7 +164,7 @@ multiclass Zn2WriteResFpuPair<X86FoldableSchedWrite SchedRW,
// adds LoadLat cycles to the latency (default = 7).
def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
let Latency = !add(Lat, LoadLat);
- let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+ let ReleaseAtCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -454,12 +454,12 @@ defm : Zn2WriteResFpuPair<WriteVecInsert, [Zn2FPU], 1>;
def : WriteRes<WriteVecExtract, [Zn2FPU12, Zn2FPU2]> {
let Latency = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : WriteRes<WriteVecExtractSt, [Zn2AGU, Zn2FPU12, Zn2FPU2]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2, 3];
+ let ReleaseAtCycles = [1, 2, 3];
}
// MOVMSK Instructions.
@@ -470,7 +470,7 @@ def : WriteRes<WriteVecMOVMSK, [Zn2FPU2]>;
def : WriteRes<WriteVecMOVMSKY, [Zn2FPU2]> {
let NumMicroOps = 2;
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
}
// AES Instructions.
@@ -879,7 +879,7 @@ def Zn2WriteFPU03 : SchedWriteRes<[Zn2AGU, Zn2FPU03]>
{
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [1,3];
+ let ReleaseAtCycles = [1,3];
}
// FICOM(P).
@@ -920,12 +920,12 @@ def : InstRW<[Zn2WriteFPU01Y], (instrs VPBLENDDYrri)>;
def Zn2WriteFPU01Op2 : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
let NumMicroOps = 2;
let Latency = 8;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def Zn2WriteFPU01Op2Y : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
let NumMicroOps = 2;
let Latency = 9;
- let ResourceCycles = [1, 3];
+ let ReleaseAtCycles = [1, 3];
}
def : InstRW<[Zn2WriteFPU01Op2], (instrs VPBLENDDrmi)>;
def : InstRW<[Zn2WriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
@@ -948,7 +948,7 @@ def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
def Zn2WriteVPBROADCAST128Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : InstRW<[Zn2WriteVPBROADCAST128Ld],
(instregex "VPBROADCAST(B|W)rm")>;
@@ -957,7 +957,7 @@ def : InstRW<[Zn2WriteVPBROADCAST128Ld],
def Zn2WriteVPBROADCAST256Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
let Latency = 8;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : InstRW<[Zn2WriteVPBROADCAST256Ld],
(instregex "VPBROADCAST(B|W)Yrm")>;
@@ -1004,22 +1004,22 @@ def Zn2WriteBROADCAST : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
let Latency = 8;
}
// VBROADCASTF128 / VBROADCASTI128.
-def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128,
- VBROADCASTI128)>;
+def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128rm,
+ VBROADCASTI128rm)>;
// EXTRACTPS.
// r32,x,i.
def Zn2WriteEXTRACTPSr : SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
let Latency = 2;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : InstRW<[Zn2WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
def Zn2WriteEXTRACTPSm : SchedWriteRes<[Zn2AGU,Zn2FPU12, Zn2FPU2]> {
let Latency = 5;
let NumMicroOps = 2;
- let ResourceCycles = [5, 1, 2];
+ let ReleaseAtCycles = [5, 1, 2];
}
// m32,x,i.
def : InstRW<[Zn2WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
@@ -1035,7 +1035,7 @@ def : InstRW<[Zn2WriteFPU013m], (instrs VEXTRACTF128mr,
def Zn2WriteVINSERT128r: SchedWriteRes<[Zn2FPU013]> {
let Latency = 2;
-// let ResourceCycles = [2];
+// let ReleaseAtCycles = [2];
}
def Zn2WriteVINSERT128Ld: SchedWriteRes<[Zn2AGU,Zn2FPU013]> {
let Latency = 9;
@@ -1124,7 +1124,7 @@ def : SchedAlias<WriteCvtSS2SD, Zn2WriteCVTSS2SDr>;
def Zn2WriteCVTSS2SDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
let Latency = 10;
let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
}
def : SchedAlias<WriteCvtSS2SDLd, Zn2WriteCVTSS2SDLd>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver3.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver3.td
index fd617765b167..d90c8bd284eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -402,7 +402,7 @@ multiclass __zn3WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
int Lat = 1, list<int> Res = [], int UOps = 1> {
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
}
@@ -501,7 +501,7 @@ defm : Zn3WriteResInt<WriteVecMaskedGatherWriteback, [], !add(Znver3Model.LoadLa
def Zn3WriteMOVSlow : SchedWriteRes<[Zn3AGU012, Zn3Load]> {
let Latency = !add(Znver3Model.LoadLatency, 1);
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteMOVSlow], (instrs MOV8rm, MOV8rm_NOREX, MOV16rm, MOVSX16rm16, MOVSX16rm32, MOVZX16rm16, MOVSX16rm8, MOVZX16rm8)>;
@@ -515,14 +515,14 @@ def : InstRW<[WriteMove], (instrs COPY)>;
def Zn3WriteMOVBE16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> {
let Latency = Znver3Model.LoadLatency;
- let ResourceCycles = [1, 1, 4];
+ let ReleaseAtCycles = [1, 1, 4];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteMOVBE16rm], (instrs MOVBE16rm)>;
def Zn3WriteMOVBEmr : SchedWriteRes<[Zn3ALU0123, Zn3AGU012, Zn3Store]> {
let Latency = Znver3Model.StoreLatency;
- let ResourceCycles = [4, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteMOVBEmr], (instrs MOVBE16mr, MOVBE32mr, MOVBE64mr)>;
@@ -532,7 +532,7 @@ defm : Zn3WriteResIntPair<WriteALU, [Zn3ALU0123], 1, [1], 1>; // Simple integer
def Zn3WriteALUSlow : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 1;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32,
@@ -543,21 +543,21 @@ def : InstRW<[Zn3WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32,
def Zn3WriteMoveExtend : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 1;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteMoveExtend], (instrs MOVSX16rr16, MOVSX16rr32, MOVZX16rr16, MOVSX16rr8, MOVZX16rr8)>;
def Zn3WriteMaterialize32bitImm: SchedWriteRes<[Zn3ALU0123]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteMaterialize32bitImm], (instrs MOV32ri, MOV32ri_alt, MOV64ri32)>;
def Zn3WritePDEP_PEXT : SchedWriteRes<[Zn3ALU1]> {
let Latency = 3;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WritePDEP_PEXT], (instrs PDEP32rr, PDEP64rr,
@@ -567,7 +567,7 @@ defm : Zn3WriteResIntPair<WriteADC, [Zn3ALU0123], 1, [4], 1>; // Integer ALU + f
def Zn3WriteADC8mr_SBB8mr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123, Zn3Store]> {
let Latency = 1;
- let ResourceCycles = [1, 1, 7, 1];
+ let ReleaseAtCycles = [1, 1, 7, 1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>;
@@ -578,7 +578,7 @@ defm : Zn3WriteResInt<WriteLEA, [Zn3AGU012], 1, [1], 1>; // LEA instructions
// This write is used for slow LEA instructions.
def Zn3Write3OpsLEA : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 2;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
@@ -605,7 +605,7 @@ def : InstRW<[Zn3WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
def Zn3SlowLEA16r : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 2; // FIXME: not from llvm-exegesis
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 2;
}
@@ -634,7 +634,7 @@ defm : Zn3WriteResIntPair<WriteCMPXCHG, [Zn3ALU0123], 3, [12], 5>; // Compare an
def Zn3WriteCMPXCHG8rr : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 3;
- let ResourceCycles = [12];
+ let ReleaseAtCycles = [12];
let NumMicroOps = 3;
}
def : InstRW<[Zn3WriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
@@ -643,42 +643,42 @@ defm : Zn3WriteResInt<WriteCMPXCHGRMW, [Zn3ALU0123], 3, [12], 6>; // Compare
def Zn3WriteCMPXCHG8rm_LCMPXCHG8 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteCMPXCHG8rr.Latency);
- let ResourceCycles = [1, 1, 12];
+ let ReleaseAtCycles = [1, 1, 12];
let NumMicroOps = !add(Zn3WriteCMPXCHG8rr.NumMicroOps, 2);
}
def : InstRW<[Zn3WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>;
def Zn3WriteCMPXCHG8B : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 3; // FIXME: not from llvm-exegesis
- let ResourceCycles = [24];
+ let ReleaseAtCycles = [24];
let NumMicroOps = 19;
}
def : InstRW<[Zn3WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
def Zn3WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 4; // FIXME: not from llvm-exegesis
- let ResourceCycles = [59];
+ let ReleaseAtCycles = [59];
let NumMicroOps = 28;
}
def : InstRW<[Zn3WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>;
def Zn3WriteWriteXCHGUnrenameable : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16ar)>;
def Zn3WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> {
let Latency = !add(Znver3Model.LoadLatency, 3); // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 5;
}
def : InstRW<[Zn3WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>;
def Zn3WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> {
let Latency = !add(Znver3Model.LoadLatency, 2); // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>;
@@ -702,7 +702,7 @@ defm : Zn3WriteResIntPair<WritePOPCNT, [Zn3ALU0123], 1, [1], 1>; // Bit populati
def Zn3WritePOPCNT16rr : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 1;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WritePOPCNT16rr], (instrs POPCNT16rr)>;
@@ -711,7 +711,7 @@ defm : Zn3WriteResIntPair<WriteLZCNT, [Zn3ALU0123], 1, [1], 1>; // Leading zero
def Zn3WriteLZCNT16rr : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 1;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteLZCNT16rr], (instrs LZCNT16rr)>;
@@ -720,7 +720,7 @@ defm : Zn3WriteResIntPair<WriteTZCNT, [Zn3ALU12], 2, [1], 2>; // Trailing zero c
def Zn3WriteTZCNT16rr : SchedWriteRes<[Zn3ALU0123]> {
let Latency = 2;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteTZCNT16rr], (instrs TZCNT16rr)>;
@@ -746,7 +746,7 @@ defm : Zn3WriteResIntPair<WriteRotate, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>;
def Zn3WriteRotateR1 : SchedWriteRes<[Zn3ALU12]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
@@ -754,7 +754,7 @@ def : InstRW<[Zn3WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
def Zn3WriteRotateM1 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateR1.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteRotateR1.NumMicroOps, 1);
}
def : InstRW<[Zn3WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1,
@@ -762,28 +762,28 @@ def : InstRW<[Zn3WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1,
def Zn3WriteRotateRightRI : SchedWriteRes<[Zn3ALU12]> {
let Latency = 3;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
let NumMicroOps = 7;
}
def : InstRW<[Zn3WriteRotateRightRI], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
def Zn3WriteRotateRightMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRI.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn3WriteRotateRightRI.NumMicroOps, 3);
}
def : InstRW<[Zn3WriteRotateRightMI], (instrs RCR8mi, RCR16mi, RCR32mi, RCR64mi)>;
def Zn3WriteRotateLeftRI : SchedWriteRes<[Zn3ALU12]> {
let Latency = 4;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 9;
}
def : InstRW<[Zn3WriteRotateLeftRI], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def Zn3WriteRotateLeftMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRI.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn3WriteRotateLeftRI.NumMicroOps, 2);
}
def : InstRW<[Zn3WriteRotateLeftMI], (instrs RCL8mi, RCL16mi, RCL32mi, RCL64mi)>;
@@ -792,28 +792,28 @@ defm : Zn3WriteResIntPair<WriteRotateCL, [Zn3ALU12], 1, [1], 1, /*LoadUOps=*/1>;
def Zn3WriteRotateRightRCL : SchedWriteRes<[Zn3ALU12]> {
let Latency = 3;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
let NumMicroOps = 7;
}
def : InstRW<[Zn3WriteRotateRightRCL], (instrs RCR8rCL, RCR16rCL, RCR32rCL, RCR64rCL)>;
def Zn3WriteRotateRightMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRCL.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn3WriteRotateRightRCL.NumMicroOps, 2);
}
def : InstRW<[Zn3WriteRotateRightMCL], (instrs RCR8mCL, RCR16mCL, RCR32mCL, RCR64mCL)>;
def Zn3WriteRotateLeftRCL : SchedWriteRes<[Zn3ALU12]> {
let Latency = 4;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 9;
}
def : InstRW<[Zn3WriteRotateLeftRCL], (instrs RCL8rCL, RCL16rCL, RCL32rCL, RCL64rCL)>;
def Zn3WriteRotateLeftMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRCL.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn3WriteRotateLeftRCL.NumMicroOps, 2);
}
def : InstRW<[Zn3WriteRotateLeftMCL], (instrs RCL8mCL, RCL16mCL, RCL32mCL, RCL64mCL)>;
@@ -850,7 +850,7 @@ defm : Zn3WriteResXMM<WriteFStore, [Zn3FPSt, Zn3Store], Znver3Model.StoreLatency
def Zn3WriteWriteFStoreMMX : SchedWriteRes<[Zn3FPSt, Zn3Store]> {
let Latency = 2; // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1];
+ let ReleaseAtCycles = [1, 1];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteWriteFStoreMMX], (instrs MOVHPDmr, MOVHPSmr,
@@ -871,7 +871,7 @@ defm : Zn3WriteResXMMPair<WriteFAdd, [Zn3FPFAdd01], 3, [1], 1>; // Floating poi
def Zn3WriteX87Arith : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1, 24];
+ let ReleaseAtCycles = [1, 1, 24];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m,
@@ -881,7 +881,7 @@ def : InstRW<[Zn3WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m,
def Zn3WriteX87Div : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1, 62];
+ let ReleaseAtCycles = [1, 1, 62];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteX87Div], (instrs DIV_FI16m, DIV_FI32m,
@@ -989,21 +989,21 @@ defm : Zn3WriteResXMM<WriteVecStoreX, [Zn3FPSt, Zn3Store], Znver3Model.StoreLate
def Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn3FPFMisc0]> {
let Latency = 4;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rr, VEXTRACTI128rr)>;
def Zn3WriteVEXTRACTI128mr : SchedWriteRes<[Zn3FPFMisc0, Zn3FPSt, Zn3Store]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
- let ResourceCycles = [1, 1, 1];
+ let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
}
def : InstRW<[Zn3WriteVEXTRACTI128mr], (instrs VEXTRACTI128mr, VEXTRACTF128mr)>;
def Zn3WriteVINSERTF128rmr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPFMisc0]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
- let ResourceCycles = [1, 1, 1];
+ let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
}
def : InstRW<[Zn3WriteVINSERTF128rmr], (instrs VINSERTF128rm)>;
@@ -1021,14 +1021,14 @@ defm : Zn3WriteResXMM<WriteVecMoveFromGpr, [Zn3FPLd01], 1, [2], 1>;
def Zn3WriteMOVMMX : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> {
let Latency = 1;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteMOVMMX], (instrs MMX_MOVQ2FR64rr, MMX_MOVQ2DQrr)>;
def Zn3WriteMOVMMXSlow : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> {
let Latency = 1;
- let ResourceCycles = [1, 4];
+ let ReleaseAtCycles = [1, 4];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteMOVMMXSlow], (instrs MMX_MOVD64rr, MMX_MOVD64to64rr)>;
@@ -1037,14 +1037,14 @@ defm : Zn3WriteResXMMPair<WriteVecALU, [Zn3FPVAdd0123], 1, [1], 1>; // Vector i
def Zn3WriteEXTRQ_INSERTQ : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> {
let Latency = 3;
- let ResourceCycles = [1, 1];
+ let ReleaseAtCycles = [1, 1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteEXTRQ_INSERTQ], (instrs EXTRQ, INSERTQ)>;
def Zn3WriteEXTRQI_INSERTQI : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> {
let Latency = 3;
- let ResourceCycles = [1, 1];
+ let ReleaseAtCycles = [1, 1];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteEXTRQI_INSERTQI], (instrs EXTRQI, INSERTQI)>;
@@ -1053,7 +1053,7 @@ defm : Zn3WriteResXMMPair<WriteVecALUX, [Zn3FPVAdd0123], 1, [1], 1>; // Vector i
def Zn3WriteVecALUXSlow : SchedWriteRes<[Zn3FPVAdd01]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr,
@@ -1069,7 +1069,7 @@ def : InstRW<[Zn3WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr,
def Zn3WriteVecALUXMMX : SchedWriteRes<[Zn3FPVAdd01]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteVecALUXMMX], (instrs MMX_PABSBrr, MMX_PABSDrr, MMX_PABSWrr,
@@ -1082,7 +1082,7 @@ defm : Zn3WriteResYMMPair<WriteVecALUY, [Zn3FPVAdd0123], 1, [1], 1>; // Vector i
def Zn3WriteVecALUYSlow : SchedWriteRes<[Zn3FPVAdd01]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteVecALUYSlow], (instrs VPABSBYrr, VPABSDYrr, VPABSWYrr,
@@ -1157,7 +1157,7 @@ defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; // Double -> Integer (ZMM).
def Zn3WriteCvtPD2IMMX : SchedWriteRes<[Zn3FPFCvt01]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteCvtPD2IMMX], (instrs MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm, MMX_CVTPD2PIrr, MMX_CVTTPD2PIrr)>;
@@ -1175,7 +1175,7 @@ defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; // Integer -> Double (ZMM).
def Zn3WriteCvtI2PDMMX : SchedWriteRes<[Zn3FPFCvt01]> {
let Latency = 2;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteCvtI2PDMMX], (instrs MMX_CVTPI2PDrm, MMX_CVTPI2PDrr)>;
@@ -1187,7 +1187,7 @@ defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; // Integer -> Float (ZMM).
def Zn3WriteCvtI2PSMMX : SchedWriteRes<[Zn3FPFCvt01]> {
let Latency = 3;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteCvtI2PSMMX], (instrs MMX_CVTPI2PSrr)>;
@@ -1218,70 +1218,70 @@ defm : Zn3WriteResIntPair<WriteCRC32, [Zn3ALU1], 3, [1], 1>;
def Zn3WriteSHA1MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
def Zn3WriteSHA1MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG1rr.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteSHA1MSG1rr.NumMicroOps, 0);
}
def : InstRW<[Zn3WriteSHA1MSG1rm], (instrs SHA1MSG1rm)>;
def Zn3WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn3FPU0123]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
def Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
}
def : InstRW<[Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm], (instrs SHA1MSG2rm, SHA1NEXTErm)>;
def Zn3WriteSHA256MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
let Latency = 2;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
def Zn3Writerm_SHA256MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG1rr.Latency);
- let ResourceCycles = [1, 1, 3];
+ let ReleaseAtCycles = [1, 1, 3];
let NumMicroOps = !add(Zn3WriteSHA256MSG1rr.NumMicroOps, 0);
}
def : InstRW<[Zn3Writerm_SHA256MSG1rm], (instrs SHA256MSG1rm)>;
def Zn3WriteSHA256MSG2rr : SchedWriteRes<[Zn3FPU0123]> {
let Latency = 3;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 4;
}
def : InstRW<[Zn3WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
def Zn3WriteSHA256MSG2rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG2rr.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn3WriteSHA256MSG2rr.NumMicroOps, 1);
}
def : InstRW<[Zn3WriteSHA256MSG2rm], (instrs SHA256MSG2rm)>;
def Zn3WriteSHA1RNDS4rri : SchedWriteRes<[Zn3FPU0123]> {
let Latency = 6;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteSHA1RNDS4rri], (instrs SHA1RNDS4rri)>;
def Zn3WriteSHA256RNDS2rr : SchedWriteRes<[Zn3FPU0123]> {
let Latency = 4;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>;
@@ -1316,14 +1316,14 @@ defm : Zn3WriteResInt<WriteSystem, [Zn3ALU0123], 100, [100], 100>;
def Zn3WriteVZEROUPPER : SchedWriteRes<[Zn3FPU0123]> {
let Latency = 0; // FIXME: not from llvm-exegesis
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteVZEROUPPER], (instrs VZEROUPPER)>;
def Zn3WriteVZEROALL : SchedWriteRes<[Zn3FPU0123]> {
let Latency = 10; // FIXME: not from llvm-exegesis
- let ResourceCycles = [24];
+ let ReleaseAtCycles = [24];
let NumMicroOps = 18;
}
def : InstRW<[Zn3WriteVZEROALL], (instrs VZEROALL)>;
@@ -1335,42 +1335,42 @@ defm : Zn3WriteResYMMPair<WriteShuffle256, [Zn3FPVShuf], 2, [1], 1>; // 256-bit
def Zn3WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn3FPVShuf]> {
let Latency = 3;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rr, VPERM2F128rr)>;
def Zn3WriteVPERM2F128rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency);
- let ResourceCycles = [1, 1, 1];
+ let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn3WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
}
def : InstRW<[Zn3WriteVPERM2F128rm], (instrs VPERM2F128rm)>;
def Zn3WriteVPERMPSYrm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
let Latency = !add(Znver3Model.LoadLatency, 7);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 3;
}
def : InstRW<[Zn3WriteVPERMPSYrm], (instrs VPERMPSYrm)>;
def Zn3WriteVPERMYri : SchedWriteRes<[Zn3FPVShuf]> {
let Latency = 6;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
def Zn3WriteVPERMPDYmi : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMYri.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteVPERMYri.NumMicroOps, 1);
}
def : InstRW<[Zn3WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
def Zn3WriteVPERMDYm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
let Latency = !add(Znver3Model.LoadLatency, 5);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteVPERMDYm], (instrs VPERMQYmi, VPERMDYrm)>;
@@ -1389,14 +1389,14 @@ defm : Zn3WriteResInt<WriteFence, [Zn3ALU0123], 1, [100], 1>;
def Zn3WriteLFENCE : SchedWriteRes<[Zn3LSU]> {
let Latency = 1;
- let ResourceCycles = [30];
+ let ReleaseAtCycles = [30];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteLFENCE], (instrs LFENCE)>;
def Zn3WriteSFENCE : SchedWriteRes<[Zn3LSU]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteSFENCE], (instrs SFENCE)>;
@@ -1411,7 +1411,7 @@ defm : Zn3WriteResInt<WriteNop, [Zn3ALU0123], 0, [1], 1>; // FIXME: latency not
def Zn3WriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
let NumMicroOps = 1;
}
def : InstRW<[Zn3WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV,
@@ -1420,7 +1420,7 @@ def : InstRW<[Zn3WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV,
def Zn3WriteSwapRenameable : SchedWriteRes<[]> {
let Latency = 0;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
let NumMicroOps = 2;
}
def : InstRW<[Zn3WriteSwapRenameable], (instrs XCHG32rr, XCHG32ar,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver4.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 269d77374beb..dac4d8422582 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -37,7 +37,8 @@ def Znver4Model : SchedMachineModel {
// Ideally for znver4, we should have 6.75K. However we don't add that
// considerting the impact compile time and prefer using default values
// instead.
- // let LoopMicroOpBufferSize = 6750;
+ // Retaining minimal value to influence unrolling as we did for znver3.
+ let LoopMicroOpBufferSize = 512;
// AMD SOG 19h, 2.6.2 L1 Data Cache
// The L1 data cache has a 4- or 5- cycle integer load-to-use latency.
// AMD SOG 19h, 2.12 L1 Data Cache
@@ -401,7 +402,7 @@ multiclass __Zn4WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
int Lat = 1, list<int> Res = [], int UOps = 1> {
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
- let ResourceCycles = Res;
+ let ReleaseAtCycles = Res;
let NumMicroOps = UOps;
}
}
@@ -514,7 +515,7 @@ defm : Zn4WriteResInt<WriteVecMaskedGatherWriteback, [], !add(Znver4Model.LoadLa
def Zn4WriteMOVSlow : SchedWriteRes<[Zn4AGU012, Zn4Load]> {
let Latency = !add(Znver4Model.LoadLatency, 1);
- let ResourceCycles = [3, 1];
+ let ReleaseAtCycles = [3, 1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteMOVSlow], (instrs MOV8rm, MOV8rm_NOREX, MOV16rm, MOVSX16rm16, MOVSX16rm32, MOVZX16rm16, MOVSX16rm8, MOVZX16rm8)>;
@@ -528,14 +529,14 @@ def : InstRW<[WriteMove], (instrs COPY)>;
def Zn4WriteMOVBE16rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
let Latency = Znver4Model.LoadLatency;
- let ResourceCycles = [1, 1, 4];
+ let ReleaseAtCycles = [1, 1, 4];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteMOVBE16rm], (instrs MOVBE16rm)>;
def Zn4WriteMOVBEmr : SchedWriteRes<[Zn4ALU0123, Zn4AGU012, Zn4Store]> {
let Latency = Znver4Model.StoreLatency;
- let ResourceCycles = [4, 1, 1];
+ let ReleaseAtCycles = [4, 1, 1];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteMOVBEmr], (instrs MOVBE16mr, MOVBE32mr, MOVBE64mr)>;
@@ -545,7 +546,7 @@ defm : Zn4WriteResIntPair<WriteALU, [Zn4ALU0123], 1, [1], 1>; // Simple integer
def Zn4WriteALUSlow : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 1;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32,
@@ -556,21 +557,21 @@ def : InstRW<[Zn4WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32,
def Zn4WriteMoveExtend : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 1;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteMoveExtend], (instrs MOVSX16rr16, MOVSX16rr32, MOVZX16rr16, MOVSX16rr8, MOVZX16rr8)>;
def Zn4WriteMaterialize32bitImm: SchedWriteRes<[Zn4ALU0123]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteMaterialize32bitImm], (instrs MOV32ri, MOV32ri_alt, MOV64ri32)>;
def Zn4WritePDEP_PEXT : SchedWriteRes<[Zn4ALU1]> {
let Latency = 3;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WritePDEP_PEXT], (instrs PDEP32rr, PDEP64rr,
@@ -580,7 +581,7 @@ defm : Zn4WriteResIntPair<WriteADC, [Zn4ALU0123], 1, [4], 1>; // Integer ALU + f
def Zn4WriteADC8mr_SBB8mr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123, Zn4Store]> {
let Latency = 1;
- let ResourceCycles = [1, 1, 7, 1];
+ let ReleaseAtCycles = [1, 1, 7, 1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>;
@@ -591,7 +592,7 @@ defm : Zn4WriteResInt<WriteLEA, [Zn4AGU012], 1, [1], 1>; // LEA instructions
// This write is used for slow LEA instructions.
def Zn4Write3OpsLEA : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 2;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
@@ -618,7 +619,7 @@ def : InstRW<[Zn4WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
def Zn4SlowLEA16r : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 2; // FIXME: not from llvm-exegesis
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 2;
}
@@ -647,7 +648,7 @@ defm : Zn4WriteResIntPair<WriteCMPXCHG, [Zn4ALU0123], 3, [12], 5>; // Compare an
def Zn4WriteCMPXCHG8rr : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 3;
- let ResourceCycles = [12];
+ let ReleaseAtCycles = [12];
let NumMicroOps = 3;
}
def : InstRW<[Zn4WriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
@@ -656,42 +657,42 @@ defm : Zn4WriteResInt<WriteCMPXCHGRMW, [Zn4ALU0123], 3, [12], 6>; // Compare
def Zn4WriteCMPXCHG8rm_LCMPXCHG8 : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteCMPXCHG8rr.Latency);
- let ResourceCycles = [1, 1, 12];
+ let ReleaseAtCycles = [1, 1, 12];
let NumMicroOps = !add(Zn4WriteCMPXCHG8rr.NumMicroOps, 2);
}
def : InstRW<[Zn4WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>;
def Zn4WriteCMPXCHG8B : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 3; // FIXME: not from llvm-exegesis
- let ResourceCycles = [24];
+ let ReleaseAtCycles = [24];
let NumMicroOps = 19;
}
def : InstRW<[Zn4WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
def Zn4WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 4; // FIXME: not from llvm-exegesis
- let ResourceCycles = [59];
+ let ReleaseAtCycles = [59];
let NumMicroOps = 28;
}
def : InstRW<[Zn4WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>;
def Zn4WriteWriteXCHGUnrenameable : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16ar)>;
def Zn4WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
let Latency = !add(Znver4Model.LoadLatency, 3); // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 5;
}
def : InstRW<[Zn4WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>;
def Zn4WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
let Latency = !add(Znver4Model.LoadLatency, 2); // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>;
@@ -715,7 +716,7 @@ defm : Zn4WriteResIntPair<WritePOPCNT, [Zn4ALU0123], 1, [1], 1>; // Bit populati
def Zn4WritePOPCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 1;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WritePOPCNT16rr], (instrs POPCNT16rr)>;
@@ -724,7 +725,7 @@ defm : Zn4WriteResIntPair<WriteLZCNT, [Zn4ALU0123], 1, [1], 1>; // Leading zero
def Zn4WriteLZCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 1;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteLZCNT16rr], (instrs LZCNT16rr)>;
@@ -733,7 +734,7 @@ defm : Zn4WriteResIntPair<WriteTZCNT, [Zn4ALU12], 2, [1], 2>; // Trailing zero c
def Zn4WriteTZCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 2;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteTZCNT16rr], (instrs TZCNT16rr)>;
@@ -759,7 +760,7 @@ defm : Zn4WriteResIntPair<WriteRotate, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
def Zn4WriteRotateR1 : SchedWriteRes<[Zn4ALU12]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
@@ -767,7 +768,7 @@ def : InstRW<[Zn4WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
def Zn4WriteRotateM1 : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateR1.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteRotateR1.NumMicroOps, 1);
}
def : InstRW<[Zn4WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1,
@@ -775,28 +776,28 @@ def : InstRW<[Zn4WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1,
def Zn4WriteRotateRightRI : SchedWriteRes<[Zn4ALU12]> {
let Latency = 3;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
let NumMicroOps = 7;
}
def : InstRW<[Zn4WriteRotateRightRI], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
def Zn4WriteRotateRightMI : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateRightRI.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn4WriteRotateRightRI.NumMicroOps, 3);
}
def : InstRW<[Zn4WriteRotateRightMI], (instrs RCR8mi, RCR16mi, RCR32mi, RCR64mi)>;
def Zn4WriteRotateLeftRI : SchedWriteRes<[Zn4ALU12]> {
let Latency = 4;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 9;
}
def : InstRW<[Zn4WriteRotateLeftRI], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def Zn4WriteRotateLeftMI : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateLeftRI.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn4WriteRotateLeftRI.NumMicroOps, 2);
}
def : InstRW<[Zn4WriteRotateLeftMI], (instrs RCL8mi, RCL16mi, RCL32mi, RCL64mi)>;
@@ -805,28 +806,28 @@ defm : Zn4WriteResIntPair<WriteRotateCL, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
def Zn4WriteRotateRightRCL : SchedWriteRes<[Zn4ALU12]> {
let Latency = 3;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
let NumMicroOps = 7;
}
def : InstRW<[Zn4WriteRotateRightRCL], (instrs RCR8rCL, RCR16rCL, RCR32rCL, RCR64rCL)>;
def Zn4WriteRotateRightMCL : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateRightRCL.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn4WriteRotateRightRCL.NumMicroOps, 2);
}
def : InstRW<[Zn4WriteRotateRightMCL], (instrs RCR8mCL, RCR16mCL, RCR32mCL, RCR64mCL)>;
def Zn4WriteRotateLeftRCL : SchedWriteRes<[Zn4ALU12]> {
let Latency = 4;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 9;
}
def : InstRW<[Zn4WriteRotateLeftRCL], (instrs RCL8rCL, RCL16rCL, RCL32rCL, RCL64rCL)>;
def Zn4WriteRotateLeftMCL : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateLeftRCL.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn4WriteRotateLeftRCL.NumMicroOps, 2);
}
def : InstRW<[Zn4WriteRotateLeftMCL], (instrs RCL8mCL, RCL16mCL, RCL32mCL, RCL64mCL)>;
@@ -863,7 +864,7 @@ defm : Zn4WriteResXMM<WriteFStore, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency
def Zn4WriteWriteFStoreMMX : SchedWriteRes<[Zn4FPSt, Zn4Store]> {
let Latency = 2; // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1];
+ let ReleaseAtCycles = [1, 1];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteWriteFStoreMMX], (instrs MOVHPDmr, MOVHPSmr,
@@ -884,7 +885,7 @@ defm : Zn4WriteResXMMPair<WriteFAdd, [Zn4FPFAdd01], 3, [1], 1>; // Floating poi
def Zn4WriteX87Arith : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
let Latency = !add(Znver4Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1, 24];
+ let ReleaseAtCycles = [1, 1, 24];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m,
@@ -894,7 +895,7 @@ def : InstRW<[Zn4WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m,
def Zn4WriteX87Div : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
let Latency = !add(Znver4Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
- let ResourceCycles = [1, 1, 62];
+ let ReleaseAtCycles = [1, 1, 62];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteX87Div], (instrs DIV_FI16m, DIV_FI32m,
@@ -1003,21 +1004,21 @@ defm : Zn4WriteResXMM<WriteVecStoreX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLate
def Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn4FPFMisc0]> {
let Latency = 4;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rr, VEXTRACTI128rr)>;
def Zn4WriteVEXTRACTI128mr : SchedWriteRes<[Zn4FPFMisc0, Zn4FPSt, Zn4Store]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
- let ResourceCycles = [1, 1, 1];
+ let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
}
def : InstRW<[Zn4WriteVEXTRACTI128mr], (instrs VEXTRACTI128mr, VEXTRACTF128mr)>;
def Zn4WriteVINSERTF128rmr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPFMisc0]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
- let ResourceCycles = [1, 1, 1];
+ let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
}
def : InstRW<[Zn4WriteVINSERTF128rmr], (instrs VINSERTF128rm)>;
@@ -1035,14 +1036,14 @@ defm : Zn4WriteResXMM<WriteVecMoveFromGpr, [Zn4FPLd01], 1, [2], 1>;
def Zn4WriteMOVMMX : SchedWriteRes<[Zn4FPLd01, Zn4FPFMisc0123]> {
let Latency = 1;
- let ResourceCycles = [1, 2];
+ let ReleaseAtCycles = [1, 2];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteMOVMMX], (instrs MMX_MOVQ2FR64rr, MMX_MOVQ2DQrr)>;
def Zn4WriteMOVMMXSlow : SchedWriteRes<[Zn4FPLd01, Zn4FPFMisc0123]> {
let Latency = 1;
- let ResourceCycles = [1, 4];
+ let ReleaseAtCycles = [1, 4];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteMOVMMXSlow], (instrs MMX_MOVD64rr, MMX_MOVD64to64rr)>;
@@ -1051,14 +1052,14 @@ defm : Zn4WriteResXMMPair<WriteVecALU, [Zn4FPVAdd0123], 1, [1], 1>; // Vector i
def Zn4WriteEXTRQ_INSERTQ : SchedWriteRes<[Zn4FPVShuf01, Zn4FPLd01]> {
let Latency = 3;
- let ResourceCycles = [1, 1];
+ let ReleaseAtCycles = [1, 1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteEXTRQ_INSERTQ], (instrs EXTRQ, INSERTQ)>;
def Zn4WriteEXTRQI_INSERTQI : SchedWriteRes<[Zn4FPVShuf01, Zn4FPLd01]> {
let Latency = 3;
- let ResourceCycles = [1, 1];
+ let ReleaseAtCycles = [1, 1];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteEXTRQI_INSERTQI], (instrs EXTRQI, INSERTQI)>;
@@ -1067,7 +1068,7 @@ defm : Zn4WriteResXMMPair<WriteVecALUX, [Zn4FPVAdd0123], 1, [1], 1>; // Vector i
def Zn4WriteVecALUXSlow : SchedWriteRes<[Zn4FPVAdd01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr,
@@ -1083,7 +1084,7 @@ def : InstRW<[Zn4WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr,
def Zn4WriteVecOpMask : SchedWriteRes<[Zn4FPOpMask01]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVecOpMask], (instrs KADDBrr, KADDDrr, KADDQrr, KADDWrr,
@@ -1101,14 +1102,14 @@ def : InstRW<[Zn4WriteVecOpMask], (instrs KADDBrr, KADDDrr, KADDQrr, KADDWrr,
def Zn4WriteVecOpMaskMemMov : SchedWriteRes<[Zn4FPOpMask4]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVecOpMaskMemMov], (instrs KMOVBmk, KMOVDmk, KMOVQmk, KMOVWmk)>;
def Zn4WriteVecOpMaskKRMov : SchedWriteRes<[Zn4FPOpMask4]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVecOpMaskKRMov], (instrs KMOVBkr, KMOVDkr, KMOVQkr, KMOVWkr)>;
@@ -1116,7 +1117,7 @@ def : InstRW<[Zn4WriteVecOpMaskKRMov], (instrs KMOVBkr, KMOVDkr, KMOVQkr, KMOVWk
def Zn4WriteVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
// TODO: All align instructions are expected to be of 4 cycle latency
let Latency = 4;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVecALU2Slow], (instrs VALIGNDZrri, VALIGNDZ128rri, VALIGNDZ256rri,
@@ -1126,7 +1127,7 @@ defm : Zn4WriteResYMMPair<WriteVecALUY, [Zn4FPVAdd0123], 1, [1], 1>; // Vector i
def Zn4WriteVecALUYSlow : SchedWriteRes<[Zn4FPVAdd01]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVecALUYSlow], (instrs VPABSBYrr, VPABSDYrr, VPABSWYrr,
@@ -1202,7 +1203,7 @@ defm : Zn4WriteResZMMPair<WriteCvtPD2IZ, [Zn4FPFCvt01], 3, [4], 2>; // Double ->
def Zn4WriteCvtPD2IMMX : SchedWriteRes<[Zn4FPFCvt01]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
defm : Zn4WriteResXMMPair<WriteCvtSS2I, [Zn4FPFCvt01], 5, [5], 2>; // Float -> Integer.
@@ -1218,7 +1219,7 @@ defm : Zn4WriteResZMMPair<WriteCvtI2PDZ, [Zn4FPFCvt01], 4, [4], 4, /*LoadUOps=*/
def Zn4WriteCvtI2PDMMX : SchedWriteRes<[Zn4FPFCvt01]> {
let Latency = 2;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
let NumMicroOps = 2;
}
@@ -1229,7 +1230,7 @@ defm : Zn4WriteResZMMPair<WriteCvtI2PSZ, [Zn4FPFCvt01], 3, [2], 2>; // Integer -
def Zn4WriteCvtI2PSMMX : SchedWriteRes<[Zn4FPFCvt01]> {
let Latency = 3;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
@@ -1260,70 +1261,70 @@ defm : Zn4WriteResIntPair<WriteCRC32, [Zn4ALU1], 3, [1], 1>;
def Zn4WriteSHA1MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
def Zn4WriteSHA1MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG1rr.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteSHA1MSG1rr.NumMicroOps, 0);
}
def : InstRW<[Zn4WriteSHA1MSG1rm], (instrs SHA1MSG1rm)>;
def Zn4WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn4FPU0123]> {
let Latency = 1;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
def Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
}
def : InstRW<[Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm], (instrs SHA1MSG2rm, SHA1NEXTErm)>;
def Zn4WriteSHA256MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
let Latency = 2;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
def Zn4Writerm_SHA256MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG1rr.Latency);
- let ResourceCycles = [1, 1, 3];
+ let ReleaseAtCycles = [1, 1, 3];
let NumMicroOps = !add(Zn4WriteSHA256MSG1rr.NumMicroOps, 0);
}
def : InstRW<[Zn4Writerm_SHA256MSG1rm], (instrs SHA256MSG1rm)>;
def Zn4WriteSHA256MSG2rr : SchedWriteRes<[Zn4FPU0123]> {
let Latency = 3;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 4;
}
def : InstRW<[Zn4WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
def Zn4WriteSHA256MSG2rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG2rr.Latency);
- let ResourceCycles = [1, 1, 8];
+ let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn4WriteSHA256MSG2rr.NumMicroOps, 1);
}
def : InstRW<[Zn4WriteSHA256MSG2rm], (instrs SHA256MSG2rm)>;
def Zn4WriteSHA1RNDS4rri : SchedWriteRes<[Zn4FPU0123]> {
let Latency = 6;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteSHA1RNDS4rri], (instrs SHA1RNDS4rri)>;
def Zn4WriteSHA256RNDS2rr : SchedWriteRes<[Zn4FPU0123]> {
let Latency = 4;
- let ResourceCycles = [8];
+ let ReleaseAtCycles = [8];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>;
@@ -1358,14 +1359,14 @@ defm : Zn4WriteResInt<WriteSystem, [Zn4ALU0123], 100, [100], 100>;
def Zn4WriteVZEROUPPER : SchedWriteRes<[Zn4FPU0123]> {
let Latency = 0; // FIXME: not from llvm-exegesis
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVZEROUPPER], (instrs VZEROUPPER)>;
def Zn4WriteVZEROALL : SchedWriteRes<[Zn4FPU0123]> {
let Latency = 10; // FIXME: not from llvm-exegesis
- let ResourceCycles = [24];
+ let ReleaseAtCycles = [24];
let NumMicroOps = 18;
}
def : InstRW<[Zn4WriteVZEROALL], (instrs VZEROALL)>;
@@ -1377,56 +1378,56 @@ defm : Zn4WriteResYMMPair<WriteShuffle256, [Zn4FPVShuf], 1, [1], 1>; // 256-bit
def Zn4WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn4FPVShuf]> {
let Latency = 3;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rr, VPERM2F128rr)>;
def Zn4WriteVPERM2F128rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency);
- let ResourceCycles = [1, 1, 1];
+ let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn4WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
}
def : InstRW<[Zn4WriteVPERM2F128rm], (instrs VPERM2F128rm)>;
def Zn4WriteVPERMPSYrr : SchedWriteRes<[Zn4FPVShuf]> {
let Latency = 7;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>;
def Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMPSYrr.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteVPERMPSYrr.NumMicroOps, 1);
}
def : InstRW<[Zn4WriteVPERMPSYrm], (instrs VPERMPSYrm)>;
def Zn4WriteVPERMYri : SchedWriteRes<[Zn4FPVShuf]> {
let Latency = 6;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
def Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMYri.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteVPERMYri.NumMicroOps, 1);
}
def : InstRW<[Zn4WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
def Zn4WriteVPERMDYrr : SchedWriteRes<[Zn4FPVShuf]> {
let Latency = 5;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>;
def Zn4WriteVPERMYm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMDYrr.Latency);
- let ResourceCycles = [1, 1, 2];
+ let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteVPERMDYrr.NumMicroOps, 0);
}
def : InstRW<[Zn4WriteVPERMYm], (instrs VPERMQYmi, VPERMDYrm)>;
@@ -1445,14 +1446,14 @@ defm : Zn4WriteResInt<WriteFence, [Zn4ALU0123], 1, [100], 1>;
def Zn4WriteLFENCE : SchedWriteRes<[Zn4LSU]> {
let Latency = 1;
- let ResourceCycles = [30];
+ let ReleaseAtCycles = [30];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteLFENCE], (instrs LFENCE)>;
def Zn4WriteSFENCE : SchedWriteRes<[Zn4LSU]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteSFENCE], (instrs SFENCE)>;
@@ -1467,7 +1468,7 @@ defm : Zn4WriteResInt<WriteNop, [Zn4ALU0123], 0, [1], 1>; // FIXME: latency not
def Zn4WriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV,
@@ -1476,7 +1477,7 @@ def : InstRW<[Zn4WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV,
def Zn4WriteSwapRenameable : SchedWriteRes<[]> {
let Latency = 0;
- let ResourceCycles = [];
+ let ReleaseAtCycles = [];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteSwapRenameable], (instrs XCHG32rr, XCHG32ar,
@@ -1534,7 +1535,7 @@ def : IsOptimizableRegisterMove<[
// FIXUP and RANGE Instructions
def Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr : SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr], (instregex
@@ -1546,7 +1547,7 @@ def : InstRW<[Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr], (instregex
// SCALE & REDUCE instructions
def Zn4WriteSCALErr: SchedWriteRes<[Zn4FPFMisc23]> {
let Latency = 6;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteSCALErr], (instregex
@@ -1557,7 +1558,7 @@ def : InstRW<[Zn4WriteSCALErr], (instregex
//BF16PS Instructions
def Zn4WriteBF16: SchedWriteRes<[Zn4FPFMisc23]> {
let Latency = 6;
- let ResourceCycles = [6];
+ let ReleaseAtCycles = [6];
let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteBF16], (instregex
@@ -1567,7 +1568,7 @@ def : InstRW<[Zn4WriteBF16], (instregex
// BUSD and VPMADD Instructions
def Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
@@ -1578,7 +1579,7 @@ def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
// SHIFT instructions
def Zn4WriteSHIFTrr: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteSHIFTrr], (instregex
@@ -1595,7 +1596,7 @@ def : InstRW<[Zn4WriteSHIFTrr], (instregex
def Zn4WriteSHIFTri: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 1;
- let ResourceCycles = [1];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteSHIFTri], (instregex
@@ -1605,7 +1606,7 @@ def : InstRW<[Zn4WriteSHIFTri], (instregex
// ALIGN Instructions
def Zn4WriteALIGN: SchedWriteRes<[Zn4FPFMisc12]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteALIGN], (instregex
@@ -1615,7 +1616,7 @@ def : InstRW<[Zn4WriteALIGN], (instregex
//PACK Instructions
def Zn4WritePACK: SchedWriteRes<[Zn4FPFMisc12]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WritePACK], (instregex
@@ -1625,7 +1626,7 @@ def : InstRW<[Zn4WritePACK], (instregex
// MAX and MIN Instructions
def Zn4WriteFCmp64: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteFCmp64], (instregex
@@ -1638,7 +1639,7 @@ def : InstRW<[Zn4WriteFCmp64], (instregex
// MOV Instructions
def Zn4MOVS: SchedWriteRes<[Zn4FPFMisc12]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4MOVS], (instregex
@@ -1651,7 +1652,7 @@ def : InstRW<[Zn4MOVS], (instregex
def Zn4MOVSZ: SchedWriteRes<[Zn4FPFMisc12]> {
let Latency = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn4MOVSZ], (instregex
@@ -1660,7 +1661,7 @@ def : InstRW<[Zn4MOVSZ], (instregex
def Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> {
let Latency = 5;
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
let NumMicroOps = 1;
}
def : InstRW<[Zn4MOVSrr], (instregex
@@ -1671,7 +1672,7 @@ def : InstRW<[Zn4MOVSrr], (instregex
//VPTEST Instructions
def Zn4VPTESTZ128: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 3;
- let ResourceCycles = [3];
+ let ReleaseAtCycles = [3];
let NumMicroOps = 1;
}
def : InstRW<[Zn4VPTESTZ128], (instregex
@@ -1680,7 +1681,7 @@ def : InstRW<[Zn4VPTESTZ128], (instregex
def Zn4VPTESTZ256: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 4;
- let ResourceCycles = [4];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 1;
}
def : InstRW<[Zn4VPTESTZ256], (instregex
@@ -1689,7 +1690,7 @@ def : InstRW<[Zn4VPTESTZ256], (instregex
def Zn4VPTESTZ: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 5;
- let ResourceCycles = [5];
+ let ReleaseAtCycles = [5];
let NumMicroOps = 1;
}
def : InstRW<[Zn4VPTESTZ], (instregex
@@ -1699,7 +1700,7 @@ def : InstRW<[Zn4VPTESTZ], (instregex
// CONFLICT Instructions
def Zn4CONFLICTZ128: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4CONFLICTZ128], (instregex
@@ -1708,7 +1709,7 @@ def : InstRW<[Zn4CONFLICTZ128], (instregex
def Zn4CONFLICTrr: SchedWriteRes<[Zn4FPFMisc01,Zn4FPFMisc12,Zn4FPFMisc23]> {
let Latency = 6;
- let ResourceCycles = [2,2,2];
+ let ReleaseAtCycles = [2,2,2];
let NumMicroOps = 4;
}
def : InstRW<[Zn4CONFLICTrr], (instregex
@@ -1718,7 +1719,7 @@ def : InstRW<[Zn4CONFLICTrr], (instregex
// RSQRT Instructions
def Zn4VRSQRT14PDZ256: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 5;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4VRSQRT14PDZ256], (instregex
@@ -1729,7 +1730,7 @@ def : InstRW<[Zn4VRSQRT14PDZ256], (instregex
// PERM Instructions
def Zn4PERMILP: SchedWriteRes<[Zn4FPFMisc123]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4PERMILP], (instregex
@@ -1738,17 +1739,17 @@ def : InstRW<[Zn4PERMILP], (instregex
def Zn4PERMIT2_128: SchedWriteRes<[Zn4FPFMisc12]> {
let Latency = 3;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4PERMIT2_128], (instregex
- "VPERM(I2|T2)(PS|PD|W)128(rr|rrk|rrkz)",
- "VPERM(I2|T2)(B|D|Q)128(rr|rrk|rrkz)"
+ "VPERM(I2|T2)(PS|PD|W)Z128(rr|rrk|rrkz)",
+ "VPERM(I2|T2)(B|D|Q)Z128(rr|rrk|rrkz)"
)>;
def Zn4PERMIT2_128rr:SchedWriteRes<[Zn4FPFMisc12]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4PERMIT2_128rr], (instregex
@@ -1758,36 +1759,36 @@ def : InstRW<[Zn4PERMIT2_128rr], (instregex
def Zn4PERMIT2_256: SchedWriteRes<[Zn4FPFMisc12]> {
let Latency = 4;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4PERMIT2_256], (instregex
- "VPERM(I2|T2)(PS|PD|W)256(rr|rrk|rrkz)",
+ "VPERM(I2|T2)(PS|PD|W)Z256(rr|rrk|rrkz)",
"VPERMP(S|D)Z256(rr|rrk|rrkz)",
"V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z256(rr|rrk|rrkz)",
"VPERM(B|D|Q|W)Z256(rr|rrk|rrkz)",
- "VPERM(I2|Q|T2)(B|D|Q)(Z?)256(rr|rrk|rrkz)",
+ "VPERM(I2|Q|T2)(B|D|Q)Z256(rr|rrk|rrkz)",
"VPEXPAND(B|W)Z256(rr|rrk|rrkz)"
)>;
def Zn4PERMIT2Z: SchedWriteRes<[Zn4FPFMisc12]> {
let Latency = 5;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4PERMIT2Z], (instregex
- "VPERM(I2|T2)(PS|PD|W)(rr|rrk|rrkz)",
+ "VPERM(I2|T2)(PS|PD|W)Z(rr|rrk|rrkz)",
"VPERM(B|D|W)Z(rr|rrk|rrkz)",
- "VPERM(I2|Q|T2)(B|D|Q)(Z?)(rr|rrk|rrkz)",
+ "VPERM(I2|Q|T2)(B|D|Q)Z(rr|rrk|rrkz)",
"V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z(rr|rrk|rrkz)",
"VPEXPAND(B|W)Z(rr|rrk|rrkz)",
- "VPERMP(S|D)Z(rr|rrk|rrkz)"
+ "VPERMP(S|D)Z(rr|rrk|rrkz)"
)>;
// ALU SLOW Misc Instructions
def Zn4VecALUZSlow: SchedWriteRes<[Zn4FPFMisc01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
def : InstRW<[Zn4VecALUZSlow], (instrs
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
index d57871130b0c..5d93a370782d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
@@ -95,7 +95,7 @@ bool X86SpeculativeExecutionSideEffectSuppression::runOnMachineFunction(
// user explicitly passed an SESES flag, or whether the SESES target feature
// was set.
if (!EnableSpeculativeExecutionSideEffectSuppression &&
- !(Subtarget.useLVILoadHardening() && OptLevel == CodeGenOpt::None) &&
+ !(Subtarget.useLVILoadHardening() && OptLevel == CodeGenOptLevel::None) &&
!Subtarget.useSpeculativeExecutionSideEffectSuppression())
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 2a896314875d..6301285fe954 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -26,7 +26,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
index c2fe6690479e..d63f1ca1695b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -11,12 +11,12 @@
//===----------------------------------------------------------------------===//
#include "X86Subtarget.h"
+#include "GISel/X86CallLowering.h"
+#include "GISel/X86LegalizerInfo.h"
+#include "GISel/X86RegisterBankInfo.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86.h"
-#include "X86CallLowering.h"
-#include "X86LegalizerInfo.h"
#include "X86MacroFusion.h"
-#include "X86RegisterBankInfo.h"
#include "X86TargetMachine.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
@@ -83,27 +83,20 @@ X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
if (is64Bit()) {
// 64-bit ELF PIC local references may use GOTOFF relocations.
if (isTargetELF()) {
- switch (TM.getCodeModel()) {
- // 64-bit small code model is simple: All rip-relative.
- case CodeModel::Tiny:
- llvm_unreachable("Tiny codesize model not supported on X86");
- case CodeModel::Small:
- case CodeModel::Kernel:
- return X86II::MO_NO_FLAG;
-
- // The large PIC code model uses GOTOFF.
- case CodeModel::Large:
+ CodeModel::Model CM = TM.getCodeModel();
+ assert(CM != CodeModel::Tiny &&
+ "Tiny codesize model not supported on X86");
+ // In the large code model, all text is far from any global data, so we
+ // use GOTOFF.
+ if (CM == CodeModel::Large)
return X86II::MO_GOTOFF;
-
- // Medium is a hybrid: RIP-rel for code, GOTOFF for DSO local data.
- case CodeModel::Medium:
- // Constant pool and jump table handling pass a nullptr to this
- // function so we need to use isa_and_nonnull.
- if (isa_and_nonnull<Function>(GV))
- return X86II::MO_NO_FLAG; // All code is RIP-relative
- return X86II::MO_GOTOFF; // Local symbols use GOTOFF.
- }
- llvm_unreachable("invalid code model");
+ // Large GlobalValues use GOTOFF, otherwise use RIP-rel access.
+ if (GV)
+ return TM.isLargeGlobalValue(GV) ? X86II::MO_GOTOFF : X86II::MO_NO_FLAG;
+ // GV == nullptr is for all other non-GlobalValue global data like the
+ // constant pool, jump tables, labels, etc. The small and medium code
+ // models treat these as accessible with a RIP-rel access.
+ return X86II::MO_NO_FLAG;
}
// Otherwise, this is either a RIP-relative reference or a 64-bit movabsq,
@@ -268,6 +261,24 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
if (!FS.empty())
FullFS = (Twine(FullFS) + "," + FS).str();
+ // Attach EVEX512 feature when we have AVX512 features with a default CPU.
+ // "pentium4" is default CPU for 32-bit targets.
+ // "x86-64" is default CPU for 64-bit targets.
+ if (CPU == "generic" || CPU == "pentium4" || CPU == "x86-64") {
+ size_t posNoEVEX512 = FS.rfind("-evex512");
+ // Make sure we won't be cheated by "-avx512fp16".
+ size_t posNoAVX512F =
+ FS.ends_with("-avx512f") ? FS.size() - 8 : FS.rfind("-avx512f,");
+ size_t posEVEX512 = FS.rfind("+evex512");
+ // Any AVX512XXX will enable AVX512F.
+ size_t posAVX512F = FS.rfind("+avx512");
+
+ if (posAVX512F != StringRef::npos &&
+ (posNoAVX512F == StringRef::npos || posNoAVX512F < posAVX512F))
+ if (posEVEX512 == StringRef::npos && posNoEVEX512 == StringRef::npos)
+ FullFS += ",+evex512";
+ }
+
// Parse features string and set the CPU.
ParseSubtargetFeatures(CPU, TuneCPU, FullFS);
@@ -323,7 +334,9 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)),
TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
- if (!isPositionIndependent())
+ if (!isPositionIndependent() || TM.getCodeModel() == CodeModel::Large)
+ // With the large code model, None forces all memory accesses to be indirect
+ // rather than RIP-relative.
setPICStyle(PICStyles::Style::None);
else if (is64Bit())
setPICStyle(PICStyles::Style::RIPRel);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
index 4c11a4212c31..a458b5f9ec8f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
@@ -17,6 +17,7 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/TargetParser/Triple.h"
@@ -263,7 +264,8 @@ public:
// If there are no 512-bit vectors and we prefer not to use 512-bit registers,
// disable them in the legalizer.
bool useAVX512Regs() const {
- return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256);
+ return hasAVX512() && hasEVEX512() &&
+ (canExtendTo512DQ() || RequiredVectorWidth > 256);
}
bool useLight256BitInstructions() const {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
index c096e6dd9686..5668b514d6de 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -14,8 +14,6 @@
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "TargetInfo/X86TargetInfo.h"
#include "X86.h"
-#include "X86CallLowering.h"
-#include "X86LegalizerInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86MacroFusion.h"
#include "X86Subtarget.h"
@@ -73,7 +71,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeX86LowerAMXIntrinsicsLegacyPassPass(PR);
initializeX86LowerAMXTypeLegacyPassPass(PR);
- initializeX86PreAMXConfigPassPass(PR);
initializeX86PreTileConfigPass(PR);
initializeGlobalISel(PR);
initializeWinEHStatePassPass(PR);
@@ -132,12 +129,14 @@ static std::string computeDataLayout(const Triple &TT) {
Ret += "-p270:32:32-p271:32:32-p272:64:64";
// Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
+ // 128 bit integers are not specified in the 32-bit ABIs but are used
+ // internally for lowering f128, so we match the alignment to that.
if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
- Ret += "-i64:64";
+ Ret += "-i64:64-i128:128";
else if (TT.isOSIAMCU())
Ret += "-i64:32-f64:32";
else
- Ret += "-f64:32:64";
+ Ret += "-i128:128-f64:32:64";
// Some ABIs align long double to 128 bits, others to 32.
if (TT.isOSNaCl() || TT.isOSIAMCU())
@@ -226,7 +225,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(
T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, JIT, RM),
@@ -445,7 +444,7 @@ void X86PassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createInterleavedAccessPass());
addPass(createX86PartialReductionPass());
}
@@ -475,7 +474,7 @@ bool X86PassConfig::addInstSelector() {
// For ELF, cleanup any local-dynamic TLS accesses.
if (TM->getTargetTriple().isOSBinFormatELF() &&
- getOptLevel() != CodeGenOpt::None)
+ getOptLevel() != CodeGenOptLevel::None)
addPass(createCleanupLocalDynamicTLSPass());
addPass(createX86GlobalBaseRegPass());
@@ -520,7 +519,7 @@ bool X86PassConfig::addPreISel() {
}
void X86PassConfig::addPreRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addPass(&LiveRangeShrinkID);
addPass(createX86FixupSetCC());
addPass(createX86OptimizeLEAs());
@@ -532,7 +531,7 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86FlagsCopyLoweringPass());
addPass(createX86DynAllocaExpander());
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createX86PreTileConfigPass());
else
addPass(createX86FastPreTileConfigPass());
@@ -550,7 +549,7 @@ void X86PassConfig::addPostRegAlloc() {
// to using the Speculative Execution Side Effect Suppression pass for
// mitigation. This is to prevent slow downs due to
// analyses needed by the LVIHardening pass when compiling at -O0.
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createX86LoadValueInjectionLoadHardeningPass());
}
@@ -560,7 +559,7 @@ void X86PassConfig::addPreSched2() {
}
void X86PassConfig::addPreEmitPass() {
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addPass(new X86ExecutionDomainFix());
addPass(createBreakFalseDeps());
}
@@ -569,7 +568,7 @@ void X86PassConfig::addPreEmitPass() {
addPass(createX86IssueVZeroUpperPass());
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
addPass(createX86FixupBWInsts());
addPass(createX86PadShortFunctions());
addPass(createX86FixupLEAs());
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h
index 5ea51e2fc22c..4836be4db0e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h
@@ -35,7 +35,7 @@ public:
X86TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~X86TargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index b88ad5a478f3..53c692060f08 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -56,3 +56,13 @@ const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol(
const MCSymbol *Sym) const {
return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
}
+
+const MCExpr *X86ELFTargetObjectFile::getIndirectSymViaGOTPCRel(
+ const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV,
+ int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ int64_t FinalOffset = Offset + MV.getConstant();
+ const MCExpr *Res =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+ const MCExpr *Off = MCConstantExpr::create(FinalOffset, getContext());
+ return MCBinaryExpr::createAdd(Res, Off, getContext());
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h
index f4bf52c83771..ed9390d1fad1 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -42,9 +42,16 @@ namespace llvm {
public:
X86ELFTargetObjectFile() {
PLTRelativeVariantKind = MCSymbolRefExpr::VK_PLT;
+ SupportIndirectSymViaGOTPCRel = true;
}
/// Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
+
+ const MCExpr *
+ getIndirectSymViaGOTPCRel(const GlobalValue *GV, const MCSymbol *Sym,
+ const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 129a2646dbb7..8a04987e768a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -180,7 +180,7 @@ X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
case TargetTransformInfo::RGK_FixedWidthVector:
- if (ST->hasAVX512() && PreferVectorWidth >= 512)
+ if (ST->hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
return TypeSize::getFixed(512);
if (ST->hasAVX() && PreferVectorWidth >= 256)
return TypeSize::getFixed(256);
@@ -1469,7 +1469,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(BaseTp);
- Kind = improveShuffleKindFromMask(Kind, Mask);
+ Kind = improveShuffleKindFromMask(Kind, Mask, BaseTp, Index, SubTp);
// Treat Transpose as 2-op shuffles - there's no difference in lowering.
if (Kind == TTI::SK_Transpose)
@@ -1481,6 +1481,10 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (Kind == TTI::SK_Broadcast)
LT.first = 1;
+ // Treat <X x bfloat> shuffles as <X x half>.
+ if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
+ LT.second = LT.second.changeVectorElementType(MVT::f16);
+
// Subvector extractions are free if they start at the beginning of a
// vector and cheap if the subvectors are aligned.
if (Kind == TTI::SK_ExtractSubvector && LT.second.isVector()) {
@@ -1596,7 +1600,6 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
BaseTp->getElementType()->getPrimitiveSizeInBits() &&
LegalVT.getVectorNumElements() <
cast<FixedVectorType>(BaseTp)->getNumElements()) {
-
unsigned VecTySize = DL.getTypeStoreSize(BaseTp);
unsigned LegalVTSize = LegalVT.getStoreSize();
// Number of source vectors after legalization:
@@ -1621,6 +1624,10 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// copy of the previous destination register (the cost is
// TTI::TCC_Basic). If the source register is just reused, the cost for
// this operation is 0.
+ NumOfDests =
+ getTypeLegalizationCost(
+ FixedVectorType::get(BaseTp->getElementType(), Mask.size()))
+ .first;
unsigned E = *NumOfDests.getValue();
unsigned NormalizedVF =
LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
@@ -1635,7 +1642,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
[this, SingleOpTy, CostKind, &PrevSrcReg, &PrevRegMask,
&Cost](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
- if (!ShuffleVectorInst::isIdentityMask(RegMask)) {
+ if (!ShuffleVectorInst::isIdentityMask(RegMask, RegMask.size())) {
// Check if the previous register can be just copied to the next
// one.
if (PrevRegMask.empty() || PrevSrcReg != SrcReg ||
@@ -3945,6 +3952,7 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTPOP, MVT::i64, { 10, 6, 19, 19 } },
{ ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
{ ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
+ { X86ISD::VROTLI, MVT::i64, { 1, 1, 1, 1 } },
{ ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
{ ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
{ ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
@@ -3984,6 +3992,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
{ ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
{ ISD::ROTR, MVT::i8, { 2, 3, 1, 3 } },
+ { X86ISD::VROTLI, MVT::i32, { 1, 1, 1, 1 } },
+ { X86ISD::VROTLI, MVT::i16, { 1, 1, 1, 1 } },
+ { X86ISD::VROTLI, MVT::i8, { 1, 1, 1, 1 } },
{ ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
{ ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
{ ISD::FSHL, MVT::i8, { 4, 4, 2, 5 } },
@@ -4039,8 +4050,13 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
ISD = ISD::FSHL;
if (!ICA.isTypeBasedOnly()) {
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
- if (Args[0] == Args[1])
+ if (Args[0] == Args[1]) {
ISD = ISD::ROTL;
+ // Handle scalar constant rotation amounts.
+ // TODO: Handle vector + funnel-shift cases.
+ if (isa_and_nonnull<ConstantInt>(Args[2]))
+ ISD = X86ISD::VROTLI;
+ }
}
break;
case Intrinsic::fshr:
@@ -4048,8 +4064,13 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
ISD = ISD::FSHL;
if (!ICA.isTypeBasedOnly()) {
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
- if (Args[0] == Args[1])
+ if (Args[0] == Args[1]) {
+ // Handle scalar constant rotation amount.
+ // TODO: Handle vector + funnel-shift cases.
ISD = ISD::ROTR;
+ if (isa_and_nonnull<ConstantInt>(Args[2]))
+ ISD = X86ISD::VROTLI;
+ }
}
break;
case Intrinsic::maxnum:
@@ -5746,8 +5767,8 @@ InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
}
InstructionCost AddressUnpackCost = getScalarizationOverhead(
- FixedVectorType::get(ScalarTy->getPointerTo(), VF), DemandedElts,
- /*Insert=*/false, /*Extract=*/true, CostKind);
+ FixedVectorType::get(PointerType::getUnqual(ScalarTy->getContext()), VF),
+ DemandedElts, /*Insert=*/false, /*Extract=*/true, CostKind);
// The cost of the scalar loads/stores.
InstructionCost MemoryOpCost =
@@ -6110,7 +6131,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
// Only enable vector loads for equality comparison. Right now the vector
// version is not as fast for three way compare (see #33329).
const unsigned PreferredWidth = ST->getPreferVectorWidth();
- if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
+ if (PreferredWidth >= 512 && ST->hasAVX512() && ST->hasEVEX512())
+ Options.LoadSizes.push_back(64);
if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32);
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
index fe9088ec1ec6..578d653c1e0a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -172,7 +172,7 @@ bool WinEHStatePass::runOnFunction(Function &F) {
if (!HasPads)
return false;
- Type *Int8PtrType = Type::getInt8PtrTy(TheModule->getContext());
+ Type *Int8PtrType = PointerType::getUnqual(TheModule->getContext());
SetJmp3 = TheModule->getOrInsertFunction(
"_setjmp3", FunctionType::get(
Type::getInt32Ty(TheModule->getContext()),
@@ -214,7 +214,7 @@ Type *WinEHStatePass::getEHLinkRegistrationType() {
Type *FieldTys[] = {
PointerType::getUnqual(
EHLinkRegistrationTy->getContext()), // EHRegistrationNode *Next
- Type::getInt8PtrTy(Context) // EXCEPTION_DISPOSITION (*Handler)(...)
+ PointerType::getUnqual(Context) // EXCEPTION_DISPOSITION (*Handler)(...)
};
EHLinkRegistrationTy->setBody(FieldTys, false);
return EHLinkRegistrationTy;
@@ -231,9 +231,9 @@ Type *WinEHStatePass::getCXXEHRegistrationType() {
return CXXEHRegistrationTy;
LLVMContext &Context = TheModule->getContext();
Type *FieldTys[] = {
- Type::getInt8PtrTy(Context), // void *SavedESP
- getEHLinkRegistrationType(), // EHRegistrationNode SubRecord
- Type::getInt32Ty(Context) // int32_t TryLevel
+ PointerType::getUnqual(Context), // void *SavedESP
+ getEHLinkRegistrationType(), // EHRegistrationNode SubRecord
+ Type::getInt32Ty(Context) // int32_t TryLevel
};
CXXEHRegistrationTy =
StructType::create(FieldTys, "CXXExceptionRegistration");
@@ -253,11 +253,11 @@ Type *WinEHStatePass::getSEHRegistrationType() {
return SEHRegistrationTy;
LLVMContext &Context = TheModule->getContext();
Type *FieldTys[] = {
- Type::getInt8PtrTy(Context), // void *SavedESP
- Type::getInt8PtrTy(Context), // void *ExceptionPointers
- getEHLinkRegistrationType(), // EHRegistrationNode SubRecord
- Type::getInt32Ty(Context), // int32_t EncodedScopeTable
- Type::getInt32Ty(Context) // int32_t TryLevel
+ PointerType::getUnqual(Context), // void *SavedESP
+ PointerType::getUnqual(Context), // void *ExceptionPointers
+ getEHLinkRegistrationType(), // EHRegistrationNode SubRecord
+ Type::getInt32Ty(Context), // int32_t EncodedScopeTable
+ Type::getInt32Ty(Context) // int32_t TryLevel
};
SEHRegistrationTy = StructType::create(FieldTys, "SEHExceptionRegistration");
return SEHRegistrationTy;
@@ -275,7 +275,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
Type *RegNodeTy;
IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin());
- Type *Int8PtrType = Builder.getInt8PtrTy();
+ Type *Int8PtrType = Builder.getPtrTy();
Type *Int32Ty = Builder.getInt32Ty();
Type *VoidTy = Builder.getVoidTy();
@@ -283,8 +283,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
RegNodeTy = getCXXEHRegistrationType();
RegNode = Builder.CreateAlloca(RegNodeTy);
// SavedESP = llvm.stacksave()
- Value *SP = Builder.CreateCall(
- Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
+ Value *SP = Builder.CreateStackSave();
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -1
StateFieldIndex = 2;
@@ -313,8 +312,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
EHGuardNode = Builder.CreateAlloca(Int32Ty);
// SavedESP = llvm.stacksave()
- Value *SP = Builder.CreateCall(
- Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
+ Value *SP = Builder.CreateStackSave();
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -2 / -1
StateFieldIndex = 4;
@@ -338,7 +336,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
Value *FrameAddr = Builder.CreateCall(
Intrinsic::getDeclaration(
TheModule, Intrinsic::frameaddress,
- Builder.getInt8PtrTy(
+ Builder.getPtrTy(
TheModule->getDataLayout().getAllocaAddrSpace())),
Builder.getInt32(0), "frameaddr");
Value *FrameAddrI32 = Builder.CreatePtrToInt(FrameAddr, Int32Ty);
@@ -371,9 +369,8 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
}
Value *WinEHStatePass::emitEHLSDA(IRBuilder<> &Builder, Function *F) {
- Value *FI8 = Builder.CreateBitCast(F, Type::getInt8PtrTy(F->getContext()));
return Builder.CreateCall(
- Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_lsda), FI8);
+ Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_lsda), F);
}
/// Generate a thunk that puts the LSDA of ParentFunc in EAX and then calls
@@ -386,7 +383,7 @@ Value *WinEHStatePass::emitEHLSDA(IRBuilder<> &Builder, Function *F) {
Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
LLVMContext &Context = ParentFunc->getContext();
Type *Int32Ty = Type::getInt32Ty(Context);
- Type *Int8PtrType = Type::getInt8PtrTy(Context);
+ Type *Int8PtrType = PointerType::getUnqual(Context);
Type *ArgTys[5] = {Int8PtrType, Int8PtrType, Int8PtrType, Int8PtrType,
Int8PtrType};
FunctionType *TrampolineTy =
@@ -421,14 +418,13 @@ void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder,
// Emit the .safeseh directive for this function.
Handler->addFnAttr("safeseh");
+ LLVMContext &C = Builder.getContext();
Type *LinkTy = getEHLinkRegistrationType();
// Handler = Handler
- Value *HandlerI8 = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy());
- Builder.CreateStore(HandlerI8, Builder.CreateStructGEP(LinkTy, Link, 1));
+ Builder.CreateStore(Handler, Builder.CreateStructGEP(LinkTy, Link, 1));
// Next = [fs:00]
- Constant *FSZero =
- Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
- Value *Next = Builder.CreateLoad(LinkTy->getPointerTo(), FSZero);
+ Constant *FSZero = Constant::getNullValue(PointerType::get(C, 257));
+ Value *Next = Builder.CreateLoad(PointerType::getUnqual(C), FSZero);
Builder.CreateStore(Next, Builder.CreateStructGEP(LinkTy, Link, 0));
// [fs:00] = Link
Builder.CreateStore(Link, FSZero);
@@ -441,12 +437,13 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
Builder.Insert(GEP);
Link = GEP;
}
+
+ LLVMContext &C = Builder.getContext();
Type *LinkTy = getEHLinkRegistrationType();
// [fs:00] = Link->Next
- Value *Next = Builder.CreateLoad(LinkTy->getPointerTo(),
+ Value *Next = Builder.CreateLoad(PointerType::getUnqual(C),
Builder.CreateStructGEP(LinkTy, Link, 0));
- Constant *FSZero =
- Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
+ Constant *FSZero = Constant::getNullValue(PointerType::get(C, 257));
Builder.CreateStore(Next, FSZero);
}
@@ -479,7 +476,7 @@ void WinEHStatePass::rewriteSetJmpCall(IRBuilder<> &Builder, Function &F,
SmallVector<Value *, 5> Args;
Args.push_back(
- Builder.CreateBitCast(Call.getArgOperand(0), Builder.getInt8PtrTy()));
+ Builder.CreateBitCast(Call.getArgOperand(0), Builder.getPtrTy()));
Args.push_back(Builder.getInt32(OptionalArgs.size()));
Args.append(OptionalArgs.begin(), OptionalArgs.end());
@@ -626,7 +623,7 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
// Mark the registration node. The backend needs to know which alloca it is so
// that it can recover the original frame pointer.
IRBuilder<> Builder(RegNode->getNextNode());
- Value *RegNodeI8 = Builder.CreateBitCast(RegNode, Builder.getInt8PtrTy());
+ Value *RegNodeI8 = Builder.CreateBitCast(RegNode, Builder.getPtrTy());
Builder.CreateCall(
Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_ehregnode),
{RegNodeI8});
@@ -634,7 +631,7 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
if (EHGuardNode) {
IRBuilder<> Builder(EHGuardNode->getNextNode());
Value *EHGuardNodeI8 =
- Builder.CreateBitCast(EHGuardNode, Builder.getInt8PtrTy());
+ Builder.CreateBitCast(EHGuardNode, Builder.getPtrTy());
Builder.CreateCall(
Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_ehguard),
{EHGuardNodeI8});
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCore.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCore.h
index aae1e34fd5ef..f019fa49f185 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCore.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCore.h
@@ -29,7 +29,7 @@ namespace llvm {
FunctionPass *createXCoreFrameToArgsOffsetEliminationPass();
FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
ModulePass *createXCoreLowerThreadLocalPass();
void initializeXCoreDAGToDAGISelPass(PassRegistry &);
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 949b88ad8d4e..1288597fc6b0 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -45,8 +45,8 @@ namespace {
XCoreDAGToDAGISel() = delete;
- XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(ID, TM, OptLevel) {}
+ XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOptLevel OptLevel)
+ : SelectionDAGISel(ID, TM, OptLevel) {}
void Select(SDNode *N) override;
bool tryBRIND(SDNode *N);
@@ -71,7 +71,8 @@ namespace {
// Complex Pattern Selectors.
bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -87,8 +88,8 @@ INITIALIZE_PASS(XCoreDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
/// XCore-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new XCoreDAGToDAGISel(TM, OptLevel);
+ CodeGenOptLevel OptLevel) {
+ return new XCoreDAGToDAGISel(TM, OptLevel);
}
bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
@@ -114,13 +115,13 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
return false;
}
-bool XCoreDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
- std::vector<SDValue> &OutOps) {
+bool XCoreDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
SDValue Reg;
switch (ConstraintID) {
default: return true;
- case InlineAsm::Constraint_m: // Memory.
+ case InlineAsm::ConstraintCode::m: // Memory.
switch (Op.getOpcode()) {
default: return true;
case XCoreISD::CPRelativeWrapper:
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 34f2a0576e7c..7736adab19e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -147,12 +147,7 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
- // Atomic operations
- // We request a fence for ATOMIC_* instructions, to reduce them to Monotonic.
- // As we are always Sequential Consistent, an ATOMIC_FENCE becomes a no OP.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
// TRAMPOLINE is custom lowered.
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
@@ -171,6 +166,9 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
setMinFunctionAlignment(Align(2));
setPrefFunctionAlignment(Align(4));
+
+ // This target doesn't implement native atomics.
+ setMaxAtomicSizeInBitsSupported(0);
}
bool XCoreTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
@@ -215,9 +213,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
- case ISD::ATOMIC_LOAD: return LowerATOMIC_LOAD(Op, DAG);
- case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG);
+ case ISD::ATOMIC_FENCE:
+ return LowerATOMIC_FENCE(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
}
@@ -252,7 +249,7 @@ SDValue XCoreTargetLowering::getGlobalAddressWrapper(SDValue GA,
return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
const auto *GVar = dyn_cast<GlobalVariable>(GV);
- if ((GV->hasSection() && GV->getSection().startswith(".cp.")) ||
+ if ((GV->hasSection() && GV->getSection().starts_with(".cp.")) ||
(GVar && GVar->isConstant() && GV->hasLocalLinkage()))
return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
@@ -292,12 +289,10 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
return GA;
} else {
// Ideally we would not fold in offset with an index <= 11.
- Type *Ty = Type::getInt8PtrTy(*DAG.getContext());
- Constant *GA = ConstantExpr::getBitCast(const_cast<GlobalValue*>(GV), Ty);
- Ty = Type::getInt32Ty(*DAG.getContext());
+ Type *Ty = Type::getInt32Ty(*DAG.getContext());
Constant *Idx = ConstantInt::get(Ty, Offset);
Constant *GAI = ConstantExpr::getGetElementPtr(
- Type::getInt8Ty(*DAG.getContext()), GA, Idx);
+ Type::getInt8Ty(*DAG.getContext()), const_cast<GlobalValue *>(GV), Idx);
SDValue CP = DAG.getConstantPool(GAI, MVT::i32);
return DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL,
DAG.getEntryNode(), CP, MachinePointerInfo());
@@ -930,88 +925,6 @@ LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}
-SDValue XCoreTargetLowering::
-LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const {
- AtomicSDNode *N = cast<AtomicSDNode>(Op);
- assert(N->getOpcode() == ISD::ATOMIC_LOAD && "Bad Atomic OP");
- assert((N->getSuccessOrdering() == AtomicOrdering::Unordered ||
- N->getSuccessOrdering() == AtomicOrdering::Monotonic) &&
- "setInsertFencesForAtomic(true) expects unordered / monotonic");
- if (N->getMemoryVT() == MVT::i32) {
- if (N->getAlign() < Align(4))
- report_fatal_error("atomic load must be aligned");
- return DAG.getLoad(getPointerTy(DAG.getDataLayout()), SDLoc(Op),
- N->getChain(), N->getBasePtr(), N->getPointerInfo(),
- N->getAlign(), N->getMemOperand()->getFlags(),
- N->getAAInfo(), N->getRanges());
- }
- if (N->getMemoryVT() == MVT::i16) {
- if (N->getAlign() < Align(2))
- report_fatal_error("atomic load must be aligned");
- return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(),
- N->getBasePtr(), N->getPointerInfo(), MVT::i16,
- N->getAlign(), N->getMemOperand()->getFlags(),
- N->getAAInfo());
- }
- if (N->getMemoryVT() == MVT::i8)
- return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(),
- N->getBasePtr(), N->getPointerInfo(), MVT::i8,
- N->getAlign(), N->getMemOperand()->getFlags(),
- N->getAAInfo());
- return SDValue();
-}
-
-SDValue XCoreTargetLowering::
-LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const {
- AtomicSDNode *N = cast<AtomicSDNode>(Op);
- assert(N->getOpcode() == ISD::ATOMIC_STORE && "Bad Atomic OP");
- assert((N->getSuccessOrdering() == AtomicOrdering::Unordered ||
- N->getSuccessOrdering() == AtomicOrdering::Monotonic) &&
- "setInsertFencesForAtomic(true) expects unordered / monotonic");
- if (N->getMemoryVT() == MVT::i32) {
- if (N->getAlign() < Align(4))
- report_fatal_error("atomic store must be aligned");
- return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(),
- N->getPointerInfo(), N->getAlign(),
- N->getMemOperand()->getFlags(), N->getAAInfo());
- }
- if (N->getMemoryVT() == MVT::i16) {
- if (N->getAlign() < Align(2))
- report_fatal_error("atomic store must be aligned");
- return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(),
- N->getBasePtr(), N->getPointerInfo(), MVT::i16,
- N->getAlign(), N->getMemOperand()->getFlags(),
- N->getAAInfo());
- }
- if (N->getMemoryVT() == MVT::i8)
- return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(),
- N->getBasePtr(), N->getPointerInfo(), MVT::i8,
- N->getAlign(), N->getMemOperand()->getFlags(),
- N->getAAInfo());
- return SDValue();
-}
-
-MachineMemOperand::Flags
-XCoreTargetLowering::getTargetMMOFlags(const Instruction &I) const {
- // Because of how we convert atomic_load and atomic_store to normal loads and
- // stores in the DAG, we need to ensure that the MMOs are marked volatile
- // since DAGCombine hasn't been updated to account for atomic, but non
- // volatile loads. (See D57601)
- if (auto *SI = dyn_cast<StoreInst>(&I))
- if (SI->isAtomic())
- return MachineMemOperand::MOVolatile;
- if (auto *LI = dyn_cast<LoadInst>(&I))
- if (LI->isAtomic())
- return MachineMemOperand::MOVolatile;
- if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
- if (AI->isAtomic())
- return MachineMemOperand::MOVolatile;
- if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
- if (AI->isAtomic())
- return MachineMemOperand::MOVolatile;
- return MachineMemOperand::MONone;
-}
-
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h
index cfd0619cba8f..eaa36d40cba9 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -181,11 +181,6 @@ namespace llvm {
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
-
- MachineMemOperand::Flags getTargetMMOFlags(
- const Instruction &I) const override;
// Inline asm support
std::pair<unsigned, const TargetRegisterClass *>
@@ -219,14 +214,10 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const override;
- bool
- CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
- LLVMContext &Context) const override;
- bool shouldInsertFencesForAtomic(const Instruction *I) const override {
- return true;
- }
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+ LLVMContext &Context) const override;
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index 8d420ab712f1..b72875c29c34 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -34,8 +34,6 @@ public:
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
- bool supportsBackwardScavenger() const override { return true; }
-
bool eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 410c854a0210..345a8365ed49 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -47,7 +47,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(
T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.h
index b45287da3a1c..23276935713b 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -31,7 +31,7 @@ public:
XCoreTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
~XCoreTargetMachine() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 52a0a09d3ea5..ae697f43b0ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -98,7 +98,7 @@ MCSection *XCoreTargetObjectFile::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
StringRef SectionName = GO->getSection();
// Infer section flags from the section name if we can.
- bool IsCPRel = SectionName.startswith(".cp.");
+ bool IsCPRel = SectionName.starts_with(".cp.");
if (IsCPRel && !Kind.isReadOnly())
report_fatal_error("Using .cp. section for writeable object.");
return getContext().getELFSection(SectionName, getXCoreSectionType(Kind),
diff --git a/contrib/llvm-project/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp
index 8ffe1253aa01..3f808298527f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp
@@ -35,8 +35,7 @@ class XtensaAsmParser : public MCTargetAsmParser {
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
- bool parseRegister(MCRegister &RegNo,
- SMLoc &StartLoc, SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -50,19 +49,19 @@ class XtensaAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "XtensaGenAsmMatcher.inc"
- OperandMatchResultTy parseImmediate(OperandVector &Operands);
- OperandMatchResultTy parseRegister(OperandVector &Operands,
- bool AllowParens = false, bool SR = false);
- OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
+ ParseStatus parseImmediate(OperandVector &Operands);
+ ParseStatus parseRegister(OperandVector &Operands, bool AllowParens = false,
+ bool SR = false);
+ ParseStatus parseOperandWithModifier(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic,
bool SR = false);
bool ParseInstructionWithSR(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands);
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override {
- return MatchOperand_NoMatch;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override {
+ return ParseStatus::NoMatch;
}
- OperandMatchResultTy parsePCRelTarget(OperandVector &Operands);
+ ParseStatus parsePCRelTarget(OperandVector &Operands);
public:
enum XtensaMatchResultTy {
@@ -432,8 +431,7 @@ bool XtensaAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
report_fatal_error("Unknown match type detected!");
}
-OperandMatchResultTy
-XtensaAsmParser::parsePCRelTarget(OperandVector &Operands) {
+ParseStatus XtensaAsmParser::parsePCRelTarget(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
LLVM_DEBUG(dbgs() << "parsePCRelTarget\n");
@@ -443,25 +441,23 @@ XtensaAsmParser::parsePCRelTarget(OperandVector &Operands) {
const MCExpr *Expr = nullptr;
if (Parser.parseExpression(Expr)) {
// We have no way of knowing if a symbol was consumed so we must ParseFail
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
// Currently not support constants
- if (Expr->getKind() == MCExpr::ExprKind::Constant) {
- Error(getLoc(), "unknown operand");
- return MatchOperand_ParseFail;
- }
+ if (Expr->getKind() == MCExpr::ExprKind::Constant)
+ return Error(getLoc(), "unknown operand");
Operands.push_back(XtensaOperand::createImm(Expr, S, getLexer().getLoc()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-bool XtensaAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool XtensaAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- RegNo = 0;
+ Reg = Xtensa::NoRegister;
StringRef Name = getLexer().getTok().getIdentifier();
if (!MatchRegisterName(Name) && !MatchRegisterAltName(Name)) {
@@ -472,8 +468,8 @@ bool XtensaAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
return Error(StartLoc, "invalid register name");
}
-OperandMatchResultTy XtensaAsmParser::parseRegister(OperandVector &Operands,
- bool AllowParens, bool SR) {
+ParseStatus XtensaAsmParser::parseRegister(OperandVector &Operands,
+ bool AllowParens, bool SR) {
SMLoc FirstS = getLoc();
bool HadParens = false;
AsmToken Buf[2];
@@ -484,7 +480,7 @@ OperandMatchResultTy XtensaAsmParser::parseRegister(OperandVector &Operands,
size_t ReadCount = getLexer().peekTokens(Buf);
if (ReadCount == 2 && Buf[1].getKind() == AsmToken::RParen) {
if ((Buf[0].getKind() == AsmToken::Integer) && (!SR))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
HadParens = true;
getParser().Lex(); // Eat '('
}
@@ -494,10 +490,10 @@ OperandMatchResultTy XtensaAsmParser::parseRegister(OperandVector &Operands,
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::Integer:
if (!SR)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
RegName = StringRef(std::to_string(getLexer().getTok().getIntVal()));
RegNo = MatchRegisterName(RegName);
if (RegNo == 0)
@@ -514,7 +510,7 @@ OperandMatchResultTy XtensaAsmParser::parseRegister(OperandVector &Operands,
if (RegNo == 0) {
if (HadParens)
getLexer().UnLex(Buf[0]);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
if (HadParens)
Operands.push_back(XtensaOperand::createToken("(", FirstS));
@@ -528,17 +524,17 @@ OperandMatchResultTy XtensaAsmParser::parseRegister(OperandVector &Operands,
Operands.push_back(XtensaOperand::createToken(")", getLoc()));
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy XtensaAsmParser::parseImmediate(OperandVector &Operands) {
+ParseStatus XtensaAsmParser::parseImmediate(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
const MCExpr *Res;
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -546,12 +542,12 @@ OperandMatchResultTy XtensaAsmParser::parseImmediate(OperandVector &Operands) {
case AsmToken::Integer:
case AsmToken::String:
if (getParser().parseExpression(Res))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
break;
case AsmToken::Identifier: {
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
@@ -563,12 +559,11 @@ OperandMatchResultTy XtensaAsmParser::parseImmediate(OperandVector &Operands) {
E = SMLoc::getFromPointer(S.getPointer() - 1);
Operands.push_back(XtensaOperand::createImm(Res, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-XtensaAsmParser::parseOperandWithModifier(OperandVector &Operands) {
- return MatchOperand_ParseFail;
+ParseStatus XtensaAsmParser::parseOperandWithModifier(OperandVector &Operands) {
+ return ParseStatus::Failure;
}
/// Looks at a token type and creates the relevant operand
@@ -578,35 +573,33 @@ bool XtensaAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
bool SR) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
- if (ResTy == MatchOperand_Success)
+ ParseStatus Res = MatchOperandParserImpl(Operands, Mnemonic);
+ if (Res.isSuccess())
return false;
// If there wasn't a custom match, try the generic matcher below. Otherwise,
// there was a match, but an error occurred, in which case, just return that
// the operand parsing failed.
- if (ResTy == MatchOperand_ParseFail)
+ if (Res.isFailure())
return true;
// Attempt to parse token as register
- if (parseRegister(Operands, true, SR) == MatchOperand_Success)
+ if (parseRegister(Operands, true, SR).isSuccess())
return false;
// Attempt to parse token as an immediate
- if (parseImmediate(Operands) == MatchOperand_Success) {
+ if (parseImmediate(Operands).isSuccess())
return false;
- }
// Finally we have exhausted all options and must declare defeat.
- Error(getLoc(), "unknown operand");
- return true;
+ return Error(getLoc(), "unknown operand");
}
bool XtensaAsmParser::ParseInstructionWithSR(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
- if ((Name.startswith("wsr.") || Name.startswith("rsr.") ||
- Name.startswith("xsr.")) &&
+ if ((Name.starts_with("wsr.") || Name.starts_with("rsr.") ||
+ Name.starts_with("xsr.")) &&
(Name.size() > 4)) {
// Parse case when instruction name is concatenated with SR register
// name, like "wsr.sar a1"
@@ -620,10 +613,8 @@ bool XtensaAsmParser::ParseInstructionWithSR(ParseInstructionInfo &Info,
if (RegNo == 0)
RegNo = MatchRegisterAltName(RegName);
- if (RegNo == 0) {
- Error(NameLoc, "invalid register name");
- return true;
- }
+ if (RegNo == 0)
+ return Error(NameLoc, "invalid register name");
// Parse operand
if (parseOperand(Operands, Name))
@@ -664,8 +655,8 @@ bool XtensaAsmParser::ParseInstructionWithSR(ParseInstructionInfo &Info,
bool XtensaAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
- if (Name.startswith("wsr") || Name.startswith("rsr") ||
- Name.startswith("xsr")) {
+ if (Name.starts_with("wsr") || Name.starts_with("rsr") ||
+ Name.starts_with("xsr")) {
return ParseInstructionWithSR(Info, Name, NameLoc, Operands);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp
index 61417a2f2455..db4484bb57c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp
@@ -30,7 +30,8 @@ class XtensaMCAsmBackend : public MCAsmBackend {
public:
XtensaMCAsmBackend(uint8_t osABI, bool isLE)
- : MCAsmBackend(support::little), OSABI(osABI), IsLittleEndian(isLE) {}
+ : MCAsmBackend(llvm::endianness::little), OSABI(osABI),
+ IsLittleEndian(isLE) {}
unsigned getNumFixupKinds() const override {
return Xtensa::NumTargetFixupKinds;
diff --git a/contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaELFObjectWriter.cpp
index 7788790ee66c..7472371932f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaELFObjectWriter.cpp
@@ -32,7 +32,7 @@ public:
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
- bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
unsigned Type) const override;
};
} // namespace
@@ -60,7 +60,8 @@ llvm::createXtensaObjectWriter(uint8_t OSABI, bool IsLittleEndian) {
return std::make_unique<XtensaObjectWriter>(OSABI);
}
-bool XtensaObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+bool XtensaObjectWriter::needsRelocateWithSymbol(const MCValue &,
+ const MCSymbol &,
unsigned Type) const {
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
index 561ff4f0d5bb..c891ecd9c0c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
@@ -47,7 +47,7 @@ XtensaTargetMachine::XtensaTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT,
+ CodeGenOptLevel OL, bool JIT,
bool IsLittle)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, IsLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(JIT, RM),
@@ -61,7 +61,7 @@ XtensaTargetMachine::XtensaTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: XtensaTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
TargetPassConfig *XtensaTargetMachine::createPassConfig(PassManagerBase &PM) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.h
index 866ccdc1e85d..dd76f45b3bb7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/Xtensa/XtensaTargetMachine.h
@@ -27,13 +27,13 @@ public:
XtensaTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT, bool isLittle);
XtensaTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/AArch64TargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/AArch64TargetParser.cpp
index 3a1f549b2803..d3c72497c41c 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/AArch64TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/AArch64TargetParser.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/TargetParser/AArch64TargetParser.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/ARMTargetParserCommon.h"
#include "llvm/TargetParser/Triple.h"
#include <cctype>
@@ -54,11 +56,12 @@ uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
return FeaturesMask;
}
-bool AArch64::getExtensionFeatures(uint64_t InputExts,
- std::vector<StringRef> &Features) {
+bool AArch64::getExtensionFeatures(
+ const AArch64::ExtensionBitset &InputExts,
+ std::vector<StringRef> &Features) {
for (const auto &E : Extensions)
/* INVALID and NONE have no feature name. */
- if ((InputExts & E.ID) && !E.Feature.empty())
+ if (InputExts.test(E.ID) && !E.Feature.empty())
Features.push_back(E.Feature);
return true;
@@ -72,7 +75,7 @@ StringRef AArch64::resolveCPUAlias(StringRef Name) {
}
StringRef AArch64::getArchExtFeature(StringRef ArchExt) {
- if (ArchExt.startswith("no")) {
+ if (ArchExt.starts_with("no")) {
StringRef ArchExtBase(ArchExt.substr(2));
for (const auto &AE : Extensions) {
if (!AE.NegFeature.empty() && ArchExtBase == AE.Name)
@@ -107,7 +110,7 @@ std::optional<AArch64::ArchInfo> AArch64::parseArch(StringRef Arch) {
StringRef Syn = llvm::ARM::getArchSynonym(Arch);
for (const auto *A : ArchInfos) {
- if (A->Name.endswith(Syn))
+ if (A->Name.ends_with(Syn))
return *A;
}
return {};
@@ -132,3 +135,18 @@ std::optional<AArch64::CpuInfo> AArch64::parseCpu(StringRef Name) {
return {};
}
+
+void AArch64::PrintSupportedExtensions(StringMap<StringRef> DescMap) {
+ outs() << "All available -march extensions for AArch64\n\n"
+ << " " << left_justify("Name", 20)
+ << (DescMap.empty() ? "\n" : "Description\n");
+ for (const auto &Ext : Extensions) {
+ // Extensions without a feature cannot be used with -march.
+ if (!Ext.Feature.empty()) {
+ std::string Description = DescMap[Ext.Name].str();
+ outs() << " "
+ << format(Description.empty() ? "%s\n" : "%-20s%s\n",
+ Ext.Name.str().c_str(), Description.c_str());
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp
index 785e9a4fe3fb..27d168020ce6 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -13,6 +13,8 @@
#include "llvm/TargetParser/ARMTargetParser.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/ARMTargetParserCommon.h"
#include "llvm/TargetParser/Triple.h"
#include <cctype>
@@ -30,7 +32,7 @@ ARM::ArchKind ARM::parseArch(StringRef Arch) {
Arch = getCanonicalArchName(Arch);
StringRef Syn = getArchSynonym(Arch);
for (const auto &A : ARMArchNames) {
- if (A.Name.endswith(Syn))
+ if (A.Name.ends_with(Syn))
return A.ID;
}
return ArchKind::INVALID;
@@ -346,7 +348,7 @@ StringRef ARM::getArchExtName(uint64_t ArchExtKind) {
}
static bool stripNegationPrefix(StringRef &Name) {
- if (Name.startswith("no")) {
+ if (Name.starts_with("no")) {
Name = Name.substr(2);
return true;
}
@@ -364,26 +366,51 @@ StringRef ARM::getArchExtFeature(StringRef ArchExt) {
}
static ARM::FPUKind findDoublePrecisionFPU(ARM::FPUKind InputFPUKind) {
+ if (InputFPUKind == ARM::FK_INVALID || InputFPUKind == ARM::FK_NONE)
+ return ARM::FK_INVALID;
+
const ARM::FPUName &InputFPU = ARM::FPUNames[InputFPUKind];
// If the input FPU already supports double-precision, then there
// isn't any different FPU we can return here.
- //
- // The current available FPURestriction values are None (no
- // restriction), D16 (only 16 d-regs) and SP_D16 (16 d-regs
- // and single precision only); there's no value representing
- // SP restriction without D16. So this test just means 'is it
- // SP only?'.
- if (InputFPU.Restriction != ARM::FPURestriction::SP_D16)
+ if (ARM::isDoublePrecision(InputFPU.Restriction))
+ return InputFPUKind;
+
+ // Otherwise, look for an FPU entry with all the same fields, except
+ // that it supports double precision.
+ for (const ARM::FPUName &CandidateFPU : ARM::FPUNames) {
+ if (CandidateFPU.FPUVer == InputFPU.FPUVer &&
+ CandidateFPU.NeonSupport == InputFPU.NeonSupport &&
+ ARM::has32Regs(CandidateFPU.Restriction) ==
+ ARM::has32Regs(InputFPU.Restriction) &&
+ ARM::isDoublePrecision(CandidateFPU.Restriction)) {
+ return CandidateFPU.ID;
+ }
+ }
+
+ // nothing found
+ return ARM::FK_INVALID;
+}
+
+static ARM::FPUKind findSinglePrecisionFPU(ARM::FPUKind InputFPUKind) {
+ if (InputFPUKind == ARM::FK_INVALID || InputFPUKind == ARM::FK_NONE)
return ARM::FK_INVALID;
+ const ARM::FPUName &InputFPU = ARM::FPUNames[InputFPUKind];
+
+ // If the input FPU already is single-precision only, then there
+ // isn't any different FPU we can return here.
+ if (!ARM::isDoublePrecision(InputFPU.Restriction))
+ return InputFPUKind;
+
// Otherwise, look for an FPU entry with all the same fields, except
- // that SP_D16 has been replaced with just D16, representing adding
- // double precision and not changing anything else.
+ // that it does not support double precision.
for (const ARM::FPUName &CandidateFPU : ARM::FPUNames) {
if (CandidateFPU.FPUVer == InputFPU.FPUVer &&
CandidateFPU.NeonSupport == InputFPU.NeonSupport &&
- CandidateFPU.Restriction == ARM::FPURestriction::D16) {
+ ARM::has32Regs(CandidateFPU.Restriction) ==
+ ARM::has32Regs(InputFPU.Restriction) &&
+ !ARM::isDoublePrecision(CandidateFPU.Restriction)) {
return CandidateFPU.ID;
}
}
@@ -418,20 +445,35 @@ bool ARM::appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK,
CPU = "generic";
if (ArchExt == "fp" || ArchExt == "fp.dp") {
+ const ARM::FPUKind DefaultFPU = getDefaultFPU(CPU, AK);
ARM::FPUKind FPUKind;
if (ArchExt == "fp.dp") {
+ const bool IsDP = ArgFPUKind != ARM::FK_INVALID &&
+ ArgFPUKind != ARM::FK_NONE &&
+ isDoublePrecision(getFPURestriction(ArgFPUKind));
if (Negated) {
- Features.push_back("-fp64");
- return true;
+ /* If there is no FPU selected yet, we still need to set ArgFPUKind, as
+ * leaving it as FK_INVALID, would cause default FPU to be selected
+ * later and that could be double precision one. */
+ if (ArgFPUKind != ARM::FK_INVALID && !IsDP)
+ return true;
+ FPUKind = findSinglePrecisionFPU(DefaultFPU);
+ if (FPUKind == ARM::FK_INVALID)
+ FPUKind = ARM::FK_NONE;
+ } else {
+ if (IsDP)
+ return true;
+ FPUKind = findDoublePrecisionFPU(DefaultFPU);
+ if (FPUKind == ARM::FK_INVALID)
+ return false;
}
- FPUKind = findDoublePrecisionFPU(getDefaultFPU(CPU, AK));
} else if (Negated) {
FPUKind = ARM::FK_NONE;
} else {
- FPUKind = getDefaultFPU(CPU, AK);
+ FPUKind = DefaultFPU;
}
ArgFPUKind = FPUKind;
- return ARM::getFPUFeatures(FPUKind, Features);
+ return true;
}
return StartingNumFeatures != Features.size();
}
@@ -517,6 +559,7 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
case Triple::GNUEABIHF:
case Triple::MuslEABI:
case Triple::MuslEABIHF:
+ case Triple::OpenHOS:
return "aapcs-linux";
case Triple::EABIHF:
case Triple::EABI:
@@ -524,7 +567,8 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
default:
if (TT.isOSNetBSD())
return "apcs-gnu";
- if (TT.isOSFreeBSD() || TT.isOSOpenBSD() || TT.isOHOSFamily())
+ if (TT.isOSFreeBSD() || TT.isOSOpenBSD() || TT.isOSHaiku() ||
+ TT.isOHOSFamily())
return "aapcs-linux";
return "aapcs";
}
@@ -540,6 +584,7 @@ StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) {
case llvm::Triple::FreeBSD:
case llvm::Triple::NetBSD:
case llvm::Triple::OpenBSD:
+ case llvm::Triple::Haiku:
if (!MArch.empty() && MArch == "v6")
return "arm1176jzf-s";
if (!MArch.empty() && MArch == "v7")
@@ -572,6 +617,8 @@ StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) {
// If no specific architecture version is requested, return the minimum CPU
// required by the OS and environment.
switch (Triple.getOS()) {
+ case llvm::Triple::Haiku:
+ return "arm1176jzf-s";
case llvm::Triple::NetBSD:
switch (Triple.getEnvironment()) {
case llvm::Triple::EABI:
@@ -598,3 +645,18 @@ StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) {
llvm_unreachable("invalid arch name");
}
+
+void ARM::PrintSupportedExtensions(StringMap<StringRef> DescMap) {
+ outs() << "All available -march extensions for ARM\n\n"
+ << " " << left_justify("Name", 20)
+ << (DescMap.empty() ? "\n" : "Description\n");
+ for (const auto &Ext : ARCHExtNames) {
+ // Extensions without a feature cannot be used with -march.
+ if (!Ext.Feature.empty()) {
+ std::string Description = DescMap[Ext.Name].str();
+ outs() << " "
+ << format(Description.empty() ? "%s\n" : "%-20s%s\n",
+ Ext.Name.str().c_str(), Description.c_str());
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
index ba517d6cf1bc..10b80cad4347 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
@@ -44,6 +44,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v9.2a", "v9.2-a")
.Case("v9.3a", "v9.3-a")
.Case("v9.4a", "v9.4-a")
+ .Case("v9.5a", "v9.5-a")
.Case("v8m.base", "v8-m.base")
.Case("v8m.main", "v8-m.main")
.Case("v8.1m.main", "v8.1-m.main")
@@ -56,19 +57,19 @@ StringRef ARM::getCanonicalArchName(StringRef Arch) {
StringRef Error = "";
// Begins with "arm" / "thumb", move past it.
- if (A.startswith("arm64_32"))
+ if (A.starts_with("arm64_32"))
offset = 8;
- else if (A.startswith("arm64e"))
+ else if (A.starts_with("arm64e"))
offset = 6;
- else if (A.startswith("arm64"))
+ else if (A.starts_with("arm64"))
offset = 5;
- else if (A.startswith("aarch64_32"))
+ else if (A.starts_with("aarch64_32"))
offset = 10;
- else if (A.startswith("arm"))
+ else if (A.starts_with("arm"))
offset = 3;
- else if (A.startswith("thumb"))
+ else if (A.starts_with("thumb"))
offset = 5;
- else if (A.startswith("aarch64")) {
+ else if (A.starts_with("aarch64")) {
offset = 7;
// AArch64 uses "_be", not "eb" suffix.
if (A.contains("eb"))
@@ -81,7 +82,7 @@ StringRef ARM::getCanonicalArchName(StringRef Arch) {
if (offset != StringRef::npos && A.substr(offset, 2) == "eb")
offset += 2;
// Or, if it ends with eb ("armv7eb"), chop it off.
- else if (A.endswith("eb"))
+ else if (A.ends_with("eb"))
A = A.substr(0, A.size() - 2);
// Trim the head
if (offset != StringRef::npos)
@@ -115,18 +116,18 @@ ARM::ISAKind ARM::parseArchISA(StringRef Arch) {
}
ARM::EndianKind ARM::parseArchEndian(StringRef Arch) {
- if (Arch.startswith("armeb") || Arch.startswith("thumbeb") ||
- Arch.startswith("aarch64_be"))
+ if (Arch.starts_with("armeb") || Arch.starts_with("thumbeb") ||
+ Arch.starts_with("aarch64_be"))
return EndianKind::BIG;
- if (Arch.startswith("arm") || Arch.startswith("thumb")) {
- if (Arch.endswith("eb"))
+ if (Arch.starts_with("arm") || Arch.starts_with("thumb")) {
+ if (Arch.ends_with("eb"))
return EndianKind::BIG;
else
return EndianKind::LITTLE;
}
- if (Arch.startswith("aarch64") || Arch.startswith("aarch64_32"))
+ if (Arch.starts_with("aarch64") || Arch.starts_with("aarch64_32"))
return EndianKind::LITTLE;
return EndianKind::INVALID;
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/CSKYTargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/CSKYTargetParser.cpp
index 493f253cd716..006d2bb342ac 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/CSKYTargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/CSKYTargetParser.cpp
@@ -150,7 +150,7 @@ StringRef CSKY::getArchExtName(uint64_t ArchExtKind) {
}
static bool stripNegationPrefix(StringRef &Name) {
- if (Name.startswith("no")) {
+ if (Name.starts_with("no")) {
Name = Name.substr(2);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
index 81309280a44b..e61fcb248fae 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
@@ -170,18 +170,18 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
StringRef Hardware;
StringRef Part;
for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("CPU implementer"))
+ if (Lines[I].starts_with("CPU implementer"))
Implementer = Lines[I].substr(15).ltrim("\t :");
- if (Lines[I].startswith("Hardware"))
+ if (Lines[I].starts_with("Hardware"))
Hardware = Lines[I].substr(8).ltrim("\t :");
- if (Lines[I].startswith("CPU part"))
+ if (Lines[I].starts_with("CPU part"))
Part = Lines[I].substr(8).ltrim("\t :");
}
if (Implementer == "0x41") { // ARM Ltd.
// MSM8992/8994 may give cpu part for the core that the kernel is running on,
// which is undeterministic and wrong. Always return cortex-a53 for these SoC.
- if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
+ if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996"))
return "cortex-a53";
@@ -202,12 +202,14 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Case("0xc20", "cortex-m0")
.Case("0xc23", "cortex-m3")
.Case("0xc24", "cortex-m4")
+ .Case("0xd24", "cortex-m52")
.Case("0xd22", "cortex-m55")
.Case("0xd02", "cortex-a34")
.Case("0xd04", "cortex-a35")
.Case("0xd03", "cortex-a53")
.Case("0xd05", "cortex-a55")
.Case("0xd46", "cortex-a510")
+ .Case("0xd80", "cortex-a520")
.Case("0xd07", "cortex-a57")
.Case("0xd08", "cortex-a72")
.Case("0xd09", "cortex-a73")
@@ -217,10 +219,12 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Case("0xd41", "cortex-a78")
.Case("0xd47", "cortex-a710")
.Case("0xd4d", "cortex-a715")
+ .Case("0xd81", "cortex-a720")
.Case("0xd44", "cortex-x1")
.Case("0xd4c", "cortex-x1c")
.Case("0xd48", "cortex-x2")
.Case("0xd4e", "cortex-x3")
+ .Case("0xd82", "cortex-x4")
.Case("0xd0c", "neoverse-n1")
.Case("0xd49", "neoverse-n2")
.Case("0xd40", "neoverse-v1")
@@ -363,7 +367,7 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
// Look for the CPU features.
SmallVector<StringRef, 32> CPUFeatures;
for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("features")) {
+ if (Lines[I].starts_with("features")) {
size_t Pos = Lines[I].find(':');
if (Pos != StringRef::npos) {
Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
@@ -382,7 +386,7 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
// Now check the processor machine type.
for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("processor ")) {
+ if (Lines[I].starts_with("processor ")) {
size_t Pos = Lines[I].find("machine = ");
if (Pos != StringRef::npos) {
Pos += sizeof("machine = ") - 1;
@@ -405,7 +409,7 @@ StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
// Look for uarch line to determine cpu name
StringRef UArch;
for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("uarch")) {
+ if (Lines[I].starts_with("uarch")) {
UArch = Lines[I].substr(5).ltrim("\t :");
break;
}
@@ -822,8 +826,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// Alderlake:
case 0x97:
case 0x9a:
+ // Gracemont
+ case 0xbe:
// Raptorlake:
case 0xb7:
+ case 0xba:
+ case 0xbf:
// Meteorlake:
case 0xaa:
case 0xac:
@@ -832,6 +840,29 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = X86::INTEL_COREI7_ALDERLAKE;
break;
+ // Arrowlake:
+ case 0xc5:
+ CPU = "arrowlake";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_ARROWLAKE;
+ break;
+
+ // Arrowlake S:
+ case 0xc6:
+ // Lunarlake:
+ case 0xbd:
+ CPU = "arrowlake-s";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_ARROWLAKE_S;
+ break;
+
+ // Pantherlake:
+ case 0xcc:
+ CPU = "pantherlake";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_PANTHERLAKE;
+ break;
+
// Graniterapids:
case 0xad:
CPU = "graniterapids";
@@ -893,6 +924,9 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Type = X86::INTEL_GOLDMONT_PLUS;
break;
case 0x86:
+ case 0x8a: // Lakefield
+ case 0x96: // Elkhart Lake
+ case 0x9c: // Jasper Lake
CPU = "tremont";
*Type = X86::INTEL_TREMONT;
break;
@@ -909,6 +943,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Type = X86::INTEL_GRANDRIDGE;
break;
+ // Clearwaterforest:
+ case 0xdd:
+ CPU = "clearwaterforest";
+ *Type = X86::INTEL_CLEARWATERFOREST;
+ break;
+
// Xeon Phi (Knights Landing + Knights Mill):
case 0x57:
CPU = "knl";
@@ -1496,7 +1536,7 @@ StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
// Look for cpu line to determine cpu name
StringRef Cpu;
for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("cpu")) {
+ if (Lines[I].starts_with("cpu")) {
Cpu = Lines[I].substr(5).ltrim("\t :");
break;
}
@@ -1773,6 +1813,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
+ Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1);
+ Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1);
bool HasLeafD = MaxLevel >= 0xd &&
!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
@@ -1791,6 +1833,11 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
+ bool HasLeaf24 =
+ MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
+ Features["avx10.1-512"] =
+ Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1);
+
return true;
}
#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
@@ -1806,7 +1853,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
// Look for the CPU features.
for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("Features")) {
+ if (Lines[I].starts_with("Features")) {
Lines[I].split(CPUFeatures, ' ');
break;
}
@@ -1912,6 +1959,9 @@ static Triple withHostArch(Triple T) {
#elif defined(__x86_64__)
T.setArch(Triple::x86_64);
T.setArchName("x86_64");
+#elif defined(__i386__)
+ T.setArch(Triple::x86);
+ T.setArchName("i386");
#elif defined(__powerpc__)
T.setArch(Triple::ppc);
T.setArchName("powerpc");
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/RISCVTargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/RISCVTargetParser.cpp
index 30a1023c0673..85cdd1289a95 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/RISCVTargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/RISCVTargetParser.cpp
@@ -20,7 +20,7 @@ namespace llvm {
namespace RISCV {
enum CPUKind : unsigned {
-#define PROC(ENUM, NAME, DEFAULT_MARCH) CK_##ENUM,
+#define PROC(ENUM, NAME, DEFAULT_MARCH, FAST_UNALIGN) CK_##ENUM,
#define TUNE_PROC(ENUM, NAME) CK_##ENUM,
#include "llvm/TargetParser/RISCVTargetParserDef.inc"
};
@@ -28,12 +28,13 @@ enum CPUKind : unsigned {
struct CPUInfo {
StringLiteral Name;
StringLiteral DefaultMarch;
+ bool FastUnalignedAccess;
bool is64Bit() const { return DefaultMarch.starts_with("rv64"); }
};
constexpr CPUInfo RISCVCPUInfo[] = {
-#define PROC(ENUM, NAME, DEFAULT_MARCH) \
- {NAME, DEFAULT_MARCH},
+#define PROC(ENUM, NAME, DEFAULT_MARCH, FAST_UNALIGN) \
+ {NAME, DEFAULT_MARCH, FAST_UNALIGN},
#include "llvm/TargetParser/RISCVTargetParserDef.inc"
};
@@ -44,6 +45,11 @@ static const CPUInfo *getCPUInfoByName(StringRef CPU) {
return nullptr;
}
+bool hasFastUnalignedAccess(StringRef CPU) {
+ const CPUInfo *Info = getCPUInfoByName(CPU);
+ return Info && Info->FastUnalignedAccess;
+}
+
bool parseCPU(StringRef CPU, bool IsRV64) {
const CPUInfo *Info = getCPUInfoByName(CPU);
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/SubtargetFeature.cpp b/contrib/llvm-project/llvm/lib/TargetParser/SubtargetFeature.cpp
index 7c8bd44f7885..2c51c403c193 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/SubtargetFeature.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/SubtargetFeature.cpp
@@ -71,7 +71,7 @@ LLVM_DUMP_METHOD void SubtargetFeatures::dump() const {
void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) {
// FIXME: This is an inelegant way of specifying the features of a
// subtarget. It would be better if we could encode this information
- // into the IR. See <rdar://5972456>.
+ // into the IR.
if (Triple.getVendor() == Triple::Apple) {
if (Triple.getArch() == Triple::ppc) {
// powerpc-apple-*
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp
index 7faa992e472e..d741d2ce7942 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp
@@ -61,68 +61,72 @@ constexpr GPUInfo R600GPUs[] = {
// This table should be sorted by the value of GPUKind
// Don't bother listing the implicitly true features
constexpr GPUInfo AMDGCNGPUs[] = {
- // Name Canonical Kind Features
- // Name
- {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
- {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
- {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
- {{"pitcairn"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
- {{"verde"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
- {{"gfx602"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
- {{"hainan"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
- {{"oland"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
- {{"gfx700"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
- {{"kaveri"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
- {{"gfx701"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
- {{"hawaii"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
- {{"gfx702"}, {"gfx702"}, GK_GFX702, FEATURE_FAST_FMA_F32},
- {{"gfx703"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
- {{"kabini"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
- {{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
- {{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
- {{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
- {{"gfx705"}, {"gfx705"}, GK_GFX705, FEATURE_NONE},
- {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
- {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
- {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
- {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
- {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
- {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
- {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
- {{"gfx805"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32},
- {{"tongapro"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32},
- {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
- {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
- {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
- {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
- {{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
- {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
- {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
- {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
- {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
- {{"gfx1013"}, {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
- {{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1034"}, {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1035"}, {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1036"}, {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1100"}, {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1101"}, {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1102"}, {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1150"}, {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ // clang-format off
+ // Name Canonical Kind Features
+ // Name
+ {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
+ {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
+ {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
+ {{"pitcairn"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
+ {{"verde"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
+ {{"gfx602"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
+ {{"hainan"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
+ {{"oland"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
+ {{"gfx700"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
+ {{"kaveri"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
+ {{"gfx701"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
+ {{"hawaii"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
+ {{"gfx702"}, {"gfx702"}, GK_GFX702, FEATURE_FAST_FMA_F32},
+ {{"gfx703"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
+ {{"kabini"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
+ {{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
+ {{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
+ {{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
+ {{"gfx705"}, {"gfx705"}, GK_GFX705, FEATURE_NONE},
+ {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
+ {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
+ {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
+ {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
+ {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
+ {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
+ {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
+ {{"gfx805"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32},
+ {{"tongapro"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32},
+ {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx1013"}, {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1034"}, {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1035"}, {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1036"}, {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1100"}, {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1101"}, {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1102"}, {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1150"}, {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ // clang-format on
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
@@ -133,7 +137,7 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
return A.Kind < B.Kind;
});
- if (I == Table.end())
+ if (I == Table.end() || I->Kind != Search.Kind)
return nullptr;
return I;
}
@@ -203,6 +207,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
return {0, 0, 0};
}
+ // clang-format off
switch (AK) {
case GK_GFX600: return {6, 0, 0};
case GK_GFX601: return {6, 0, 1};
@@ -246,8 +251,11 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1103: return {11, 0, 3};
case GK_GFX1150: return {11, 5, 0};
case GK_GFX1151: return {11, 5, 1};
+ case GK_GFX1200: return {12, 0, 0};
+ case GK_GFX1201: return {12, 0, 1};
default: return {0, 0, 0};
}
+ // clang-format on
}
StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
@@ -264,6 +272,27 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
// XXX - What does the member GPU mean if device name string passed here?
if (T.isAMDGCN()) {
switch (parseArchAMDGCN(GPU)) {
+ case GK_GFX1201:
+ case GK_GFX1200:
+ Features["ci-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot8-insts"] = true;
+ Features["dot9-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["gfx11-insts"] = true;
+ Features["gfx12-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["image-insts"] = true;
+ Features["gws"] = true;
+ break;
case GK_GFX1151:
case GK_GFX1150:
case GK_GFX1103:
@@ -286,6 +315,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gfx11-insts"] = true;
Features["atomic-fadd-rtn-insts"] = true;
Features["image-insts"] = true;
+ Features["gws"] = true;
break;
case GK_GFX1036:
case GK_GFX1035:
@@ -311,6 +341,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["image-insts"] = true;
Features["s-memrealtime"] = true;
Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
break;
case GK_GFX1012:
case GK_GFX1011:
@@ -333,6 +364,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["image-insts"] = true;
Features["s-memrealtime"] = true;
Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
break;
case GK_GFX942:
case GK_GFX941:
@@ -362,6 +394,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["s-memrealtime"] = true;
Features["ci-insts"] = true;
Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
break;
case GK_GFX90A:
Features["gfx90a-insts"] = true;
@@ -412,6 +445,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX600:
Features["image-insts"] = true;
Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
break;
case GK_NONE:
break;
@@ -453,6 +487,8 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) {
// XXX - What does the member GPU mean if device name string passed here?
if (T.isAMDGCN()) {
switch (parseArchAMDGCN(GPU)) {
+ case GK_GFX1201:
+ case GK_GFX1200:
case GK_GFX1151:
case GK_GFX1150:
case GK_GFX1103:
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp
index a3d6a06af976..ac04dab04897 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp
@@ -70,6 +70,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case sparcv9: return "sparcv9";
case spir64: return "spir64";
case spir: return "spir";
+ case spirv: return "spirv";
case spirv32: return "spirv32";
case spirv64: return "spirv64";
case systemz: return "s390x";
@@ -89,6 +90,36 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
llvm_unreachable("Invalid ArchType!");
}
+StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) {
+ switch (Kind) {
+ case Triple::mips:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa32r6";
+ break;
+ case Triple::mipsel:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa32r6el";
+ break;
+ case Triple::mips64:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa64r6";
+ break;
+ case Triple::mips64el:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa64r6el";
+ break;
+ case Triple::aarch64:
+ if (SubArch == AArch64SubArch_arm64ec)
+ return "arm64ec";
+ if (SubArch == AArch64SubArch_arm64e)
+ return "arm64e";
+ break;
+ default:
+ break;
+ }
+ return getArchTypeName(Kind);
+}
+
StringRef Triple::getArchTypePrefix(ArchType Kind) {
switch (Kind) {
default:
@@ -154,6 +185,7 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case spir:
case spir64: return "spir";
+ case spirv:
case spirv32:
case spirv64: return "spirv";
@@ -190,7 +222,6 @@ StringRef Triple::getVendorTypeName(VendorType Kind) {
case ImaginationTechnologies: return "img";
case Mesa: return "mesa";
case MipsTechnologies: return "mti";
- case Myriad: return "myriad";
case NVIDIA: return "nvidia";
case OpenEmbedded: return "oe";
case PC: return "pc";
@@ -208,10 +239,7 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case AIX: return "aix";
case AMDHSA: return "amdhsa";
case AMDPAL: return "amdpal";
- case Ananas: return "ananas";
case CUDA: return "cuda";
- case CloudABI: return "cloudabi";
- case Contiki: return "contiki";
case Darwin: return "darwin";
case DragonFly: return "dragonfly";
case DriverKit: return "driverkit";
@@ -228,7 +256,6 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case Lv2: return "lv2";
case MacOSX: return "macosx";
case Mesa3D: return "mesa3d";
- case Minix: return "minix";
case NVCL: return "nvcl";
case NaCl: return "nacl";
case NetBSD: return "netbsd";
@@ -237,6 +264,7 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case PS5: return "ps5";
case RTEMS: return "rtems";
case Solaris: return "solaris";
+ case Serenity: return "serenity";
case TvOS: return "tvos";
case UEFI: return "uefi";
case WASI: return "wasi";
@@ -381,6 +409,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("hsail64", hsail64)
.Case("spir", spir)
.Case("spir64", spir64)
+ .Case("spirv", spirv)
.Case("spirv32", spirv32)
.Case("spirv64", spirv64)
.Case("kalimba", kalimba)
@@ -448,7 +477,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
// Thumb only exists in v4+
if (ISA == ARM::ISAKind::THUMB &&
- (ArchName.startswith("v2") || ArchName.startswith("v3")))
+ (ArchName.starts_with("v2") || ArchName.starts_with("v3")))
return Triple::UnknownArch;
// Thumb only for v6m
@@ -521,6 +550,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("hsail64", Triple::hsail64)
.Case("spir", Triple::spir)
.Case("spir64", Triple::spir64)
+ .Cases("spirv", "spirv1.0", "spirv1.1", "spirv1.2",
+ "spirv1.3", "spirv1.4", "spirv1.5", Triple::spirv)
.Cases("spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2",
"spirv32v1.3", "spirv32v1.4", "spirv32v1.5", Triple::spirv32)
.Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2",
@@ -543,10 +574,10 @@ static Triple::ArchType parseArch(StringRef ArchName) {
// Some architectures require special parsing logic just to compute the
// ArchType result.
if (AT == Triple::UnknownArch) {
- if (ArchName.startswith("arm") || ArchName.startswith("thumb") ||
- ArchName.startswith("aarch64"))
+ if (ArchName.starts_with("arm") || ArchName.starts_with("thumb") ||
+ ArchName.starts_with("aarch64"))
return parseARMArch(ArchName);
- if (ArchName.startswith("bpf"))
+ if (ArchName.starts_with("bpf"))
return parseBPFArch(ArchName);
}
@@ -565,7 +596,6 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
.Case("mti", Triple::MipsTechnologies)
.Case("nvidia", Triple::NVIDIA)
.Case("csr", Triple::CSR)
- .Case("myriad", Triple::Myriad)
.Case("amd", Triple::AMD)
.Case("mesa", Triple::Mesa)
.Case("suse", Triple::SUSE)
@@ -575,8 +605,6 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
static Triple::OSType parseOS(StringRef OSName) {
return StringSwitch<Triple::OSType>(OSName)
- .StartsWith("ananas", Triple::Ananas)
- .StartsWith("cloudabi", Triple::CloudABI)
.StartsWith("darwin", Triple::Darwin)
.StartsWith("dragonfly", Triple::DragonFly)
.StartsWith("freebsd", Triple::FreeBSD)
@@ -594,7 +622,6 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("windows", Triple::Win32)
.StartsWith("zos", Triple::ZOS)
.StartsWith("haiku", Triple::Haiku)
- .StartsWith("minix", Triple::Minix)
.StartsWith("rtems", Triple::RTEMS)
.StartsWith("nacl", Triple::NaCl)
.StartsWith("aix", Triple::AIX)
@@ -608,7 +635,6 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("watchos", Triple::WatchOS)
.StartsWith("driverkit", Triple::DriverKit)
.StartsWith("mesa3d", Triple::Mesa3D)
- .StartsWith("contiki", Triple::Contiki)
.StartsWith("amdpal", Triple::AMDPAL)
.StartsWith("hermit", Triple::HermitCore)
.StartsWith("hurd", Triple::Hurd)
@@ -616,6 +642,7 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("emscripten", Triple::Emscripten)
.StartsWith("shadermodel", Triple::ShaderModel)
.StartsWith("liteos", Triple::LiteOS)
+ .StartsWith("serenity", Triple::Serenity)
.Default(Triple::UnknownOS);
}
@@ -679,8 +706,8 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
}
static Triple::SubArchType parseSubArch(StringRef SubArchName) {
- if (SubArchName.startswith("mips") &&
- (SubArchName.endswith("r6el") || SubArchName.endswith("r6")))
+ if (SubArchName.starts_with("mips") &&
+ (SubArchName.ends_with("r6el") || SubArchName.ends_with("r6")))
return Triple::MipsSubArch_r6;
if (SubArchName == "powerpcspe")
@@ -692,7 +719,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
if (SubArchName == "arm64ec")
return Triple::AArch64SubArch_arm64ec;
- if (SubArchName.startswith("spirv"))
+ if (SubArchName.starts_with("spirv"))
return StringSwitch<Triple::SubArchType>(SubArchName)
.EndsWith("v1.0", Triple::SPIRVSubArch_v10)
.EndsWith("v1.1", Triple::SPIRVSubArch_v11)
@@ -792,6 +819,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
}
static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
+ if (T.isOSDarwin())
+ return Triple::MachO;
switch (T.getArch()) {
case Triple::UnknownArch:
case Triple::aarch64:
@@ -800,9 +829,9 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::thumb:
case Triple::x86:
case Triple::x86_64:
- if (T.isOSDarwin())
- return Triple::MachO;
- else if (T.isOSWindows())
+ if (T.isOSWindows())
+ return Triple::COFF;
+ else if (T.isUEFI())
return Triple::COFF;
return Triple::ELF;
@@ -869,6 +898,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::wasm64:
return Triple::Wasm;
+ case Triple::spirv:
case Triple::spirv32:
case Triple::spirv64:
return Triple::SPIRV;
@@ -974,8 +1004,8 @@ std::string Triple::normalize(StringRef Str) {
OSType OS = UnknownOS;
if (Components.size() > 2) {
OS = parseOS(Components[2]);
- IsCygwin = Components[2].startswith("cygwin");
- IsMinGW32 = Components[2].startswith("mingw");
+ IsCygwin = Components[2].starts_with("cygwin");
+ IsMinGW32 = Components[2].starts_with("mingw");
}
EnvironmentType Environment = UnknownEnvironment;
if (Components.size() > 3)
@@ -1019,8 +1049,8 @@ std::string Triple::normalize(StringRef Str) {
break;
case 2:
OS = parseOS(Comp);
- IsCygwin = Comp.startswith("cygwin");
- IsMinGW32 = Comp.startswith("mingw");
+ IsCygwin = Comp.starts_with("cygwin");
+ IsMinGW32 = Comp.starts_with("mingw");
Valid = OS != UnknownOS || IsCygwin || IsMinGW32;
break;
case 3:
@@ -1097,7 +1127,8 @@ std::string Triple::normalize(StringRef Str) {
// Special case logic goes here. At this point Arch, Vendor and OS have the
// correct values for the computed components.
std::string NormalizedEnvironment;
- if (Environment == Triple::Android && Components[3].startswith("androideabi")) {
+ if (Environment == Triple::Android &&
+ Components[3].starts_with("androideabi")) {
StringRef AndroidVersion = Components[3].drop_front(strlen("androideabi"));
if (AndroidVersion.empty()) {
Components[3] = "android";
@@ -1145,34 +1176,6 @@ StringRef Triple::getArchName() const {
return StringRef(Data).split('-').first; // Isolate first component
}
-StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) const {
- switch (Kind) {
- case Triple::mips:
- if (SubArch == MipsSubArch_r6)
- return "mipsisa32r6";
- break;
- case Triple::mipsel:
- if (SubArch == MipsSubArch_r6)
- return "mipsisa32r6el";
- break;
- case Triple::mips64:
- if (SubArch == MipsSubArch_r6)
- return "mipsisa64r6";
- break;
- case Triple::mips64el:
- if (SubArch == MipsSubArch_r6)
- return "mipsisa64r6el";
- break;
- case Triple::aarch64:
- if (SubArch == AArch64SubArch_arm64ec)
- return "arm64ec";
- break;
- default:
- break;
- }
- return getArchTypeName(Kind);
-}
-
StringRef Triple::getVendorName() const {
StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
return Tmp.split('-').first; // Isolate second component
@@ -1204,7 +1207,7 @@ static VersionTuple parseVersionFromName(StringRef Name) {
VersionTuple Triple::getEnvironmentVersion() const {
StringRef EnvironmentName = getEnvironmentName();
StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment());
- if (EnvironmentName.startswith(EnvironmentTypeName))
+ if (EnvironmentName.starts_with(EnvironmentTypeName))
EnvironmentName = EnvironmentName.substr(EnvironmentTypeName.size());
return parseVersionFromName(EnvironmentName);
@@ -1214,7 +1217,7 @@ VersionTuple Triple::getOSVersion() const {
StringRef OSName = getOSName();
// Assume that the OS portion of the triple starts with the canonical name.
StringRef OSTypeName = getOSTypeName(getOS());
- if (OSName.startswith(OSTypeName))
+ if (OSName.starts_with(OSTypeName))
OSName = OSName.substr(OSTypeName.size());
else if (getOS() == MacOSX)
OSName.consume_front("macos");
@@ -1452,6 +1455,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::renderscript64:
case llvm::Triple::riscv64:
case llvm::Triple::sparcv9:
+ case llvm::Triple::spirv:
case llvm::Triple::spir64:
case llvm::Triple::spirv64:
case llvm::Triple::systemz:
@@ -1546,6 +1550,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::riscv64: T.setArch(Triple::riscv32); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::spir64: T.setArch(Triple::spir); break;
+ case Triple::spirv:
case Triple::spirv64:
T.setArch(Triple::spirv32, getSubArch());
break;
@@ -1624,6 +1629,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::riscv32: T.setArch(Triple::riscv64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;
case Triple::spir: T.setArch(Triple::spir64); break;
+ case Triple::spirv:
case Triple::spirv32:
T.setArch(Triple::spirv64, getSubArch());
break;
@@ -1666,6 +1672,7 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::shave:
case Triple::spir64:
case Triple::spir:
+ case Triple::spirv:
case Triple::spirv32:
case Triple::spirv64:
case Triple::wasm32:
@@ -1775,6 +1782,7 @@ bool Triple::isLittleEndian() const {
case Triple::sparcel:
case Triple::spir64:
case Triple::spir:
+ case Triple::spirv:
case Triple::spirv32:
case Triple::spirv64:
case Triple::tcele:
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp
index 8bd063116cf6..085554f18b2b 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/TargetParser/X86TargetParser.h"
+#include "llvm/ADT/Bitset.h"
#include "llvm/ADT/StringSwitch.h"
#include <numeric>
@@ -19,88 +20,7 @@ using namespace llvm::X86;
namespace {
-/// Container class for CPU features.
-/// This is a constexpr reimplementation of a subset of std::bitset. It would be
-/// nice to use std::bitset directly, but it doesn't support constant
-/// initialization.
-class FeatureBitset {
- static constexpr unsigned NUM_FEATURE_WORDS =
- (X86::CPU_FEATURE_MAX + 31) / 32;
-
- // This cannot be a std::array, operator[] is not constexpr until C++17.
- uint32_t Bits[NUM_FEATURE_WORDS] = {};
-
-public:
- constexpr FeatureBitset() = default;
- constexpr FeatureBitset(std::initializer_list<unsigned> Init) {
- for (auto I : Init)
- set(I);
- }
-
- bool any() const {
- return llvm::any_of(Bits, [](uint64_t V) { return V != 0; });
- }
-
- constexpr FeatureBitset &set(unsigned I) {
- // GCC <6.2 crashes if this is written in a single statement.
- uint32_t NewBits = Bits[I / 32] | (uint32_t(1) << (I % 32));
- Bits[I / 32] = NewBits;
- return *this;
- }
-
- constexpr bool operator[](unsigned I) const {
- uint32_t Mask = uint32_t(1) << (I % 32);
- return (Bits[I / 32] & Mask) != 0;
- }
-
- constexpr FeatureBitset &operator&=(const FeatureBitset &RHS) {
- for (unsigned I = 0, E = std::size(Bits); I != E; ++I) {
- // GCC <6.2 crashes if this is written in a single statement.
- uint32_t NewBits = Bits[I] & RHS.Bits[I];
- Bits[I] = NewBits;
- }
- return *this;
- }
-
- constexpr FeatureBitset &operator|=(const FeatureBitset &RHS) {
- for (unsigned I = 0, E = std::size(Bits); I != E; ++I) {
- // GCC <6.2 crashes if this is written in a single statement.
- uint32_t NewBits = Bits[I] | RHS.Bits[I];
- Bits[I] = NewBits;
- }
- return *this;
- }
-
- // gcc 5.3 miscompiles this if we try to write this using operator&=.
- constexpr FeatureBitset operator&(const FeatureBitset &RHS) const {
- FeatureBitset Result;
- for (unsigned I = 0, E = std::size(Bits); I != E; ++I)
- Result.Bits[I] = Bits[I] & RHS.Bits[I];
- return Result;
- }
-
- // gcc 5.3 miscompiles this if we try to write this using operator&=.
- constexpr FeatureBitset operator|(const FeatureBitset &RHS) const {
- FeatureBitset Result;
- for (unsigned I = 0, E = std::size(Bits); I != E; ++I)
- Result.Bits[I] = Bits[I] | RHS.Bits[I];
- return Result;
- }
-
- constexpr FeatureBitset operator~() const {
- FeatureBitset Result;
- for (unsigned I = 0, E = std::size(Bits); I != E; ++I)
- Result.Bits[I] = ~Bits[I];
- return Result;
- }
-
- constexpr bool operator!=(const FeatureBitset &RHS) const {
- for (unsigned I = 0, E = std::size(Bits); I != E; ++I)
- if (Bits[I] != RHS.Bits[I])
- return true;
- return false;
- }
-};
+using FeatureBitset = Bitset<X86::CPU_FEATURE_MAX>;
struct ProcInfo {
StringLiteral Name;
@@ -112,8 +32,15 @@ struct ProcInfo {
};
struct FeatureInfo {
- StringLiteral Name;
+ StringLiteral NameWithPlus;
FeatureBitset ImpliedFeatures;
+
+ StringRef getName(bool WithPlus = false) const {
+ assert(NameWithPlus[0] == '+' && "Expected string to start with '+'");
+ if (WithPlus)
+ return NameWithPlus;
+ return NameWithPlus.drop_front();
+ }
};
} // end anonymous namespace
@@ -145,7 +72,7 @@ constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
constexpr FeatureBitset FeaturesX86_64_V3 =
FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
-constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 |
+constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | FeatureEVEX512 |
FeatureAVX512BW | FeatureAVX512CD |
FeatureAVX512DQ | FeatureAVX512VL;
@@ -169,8 +96,8 @@ constexpr FeatureBitset FeaturesBroadwell =
// Intel Knights Landing and Knights Mill
// Knights Landing has feature parity with Broadwell.
constexpr FeatureBitset FeaturesKNL =
- FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureAVX512CD |
- FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
+ FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 |
+ FeatureAVX512CD | FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ;
// Intel Skylake processors.
@@ -180,9 +107,9 @@ constexpr FeatureBitset FeaturesSkylakeClient =
// SkylakeServer inherits all SkylakeClient features except SGX.
// FIXME: That doesn't match gcc.
constexpr FeatureBitset FeaturesSkylakeServer =
- (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureAVX512CD |
- FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureCLWB |
- FeaturePKU;
+ (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureEVEX512 |
+ FeatureAVX512CD | FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL |
+ FeatureCLWB | FeaturePKU;
constexpr FeatureBitset FeaturesCascadeLake =
FeaturesSkylakeServer | FeatureAVX512VNNI;
constexpr FeatureBitset FeaturesCooperLake =
@@ -190,9 +117,9 @@ constexpr FeatureBitset FeaturesCooperLake =
// Intel 10nm processors.
constexpr FeatureBitset FeaturesCannonlake =
- FeaturesSkylakeClient | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ |
- FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI |
- FeaturePKU | FeatureSHA;
+ FeaturesSkylakeClient | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD |
+ FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA |
+ FeatureAVX512VBMI | FeaturePKU | FeatureSHA;
constexpr FeatureBitset FeaturesICLClient =
FeaturesCannonlake | FeatureAVX512BITALG | FeatureAVX512VBMI2 |
FeatureAVX512VNNI | FeatureAVX512VPOPCNTDQ | FeatureGFNI | FeatureRDPID |
@@ -237,6 +164,12 @@ constexpr FeatureBitset FeaturesSierraforest =
FeatureENQCMD | FeatureAVXNECONVERT | FeatureAVXVNNIINT8;
constexpr FeatureBitset FeaturesGrandridge =
FeaturesSierraforest | FeatureRAOINT;
+constexpr FeatureBitset FeaturesArrowlakeS = FeaturesSierraforest |
+ FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4;
+constexpr FeatureBitset FeaturesPantherlake =
+ FeaturesArrowlakeS | FeaturePREFETCHI;
+constexpr FeatureBitset FeaturesClearwaterforest =
+ FeaturesArrowlakeS | FeatureUSERMSR | FeaturePREFETCHI;
// Geode Processor.
constexpr FeatureBitset FeaturesGeode =
@@ -301,11 +234,11 @@ static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 |
FeatureINVPCID | FeaturePKU |
FeatureVAES | FeatureVPCLMULQDQ;
static constexpr FeatureBitset FeaturesZNVER4 =
- FeaturesZNVER3 | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ |
- FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI |
- FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512BITALG |
- FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI |
- FeatureSHSTK;
+ FeaturesZNVER3 | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD |
+ FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA |
+ FeatureAVX512VBMI | FeatureAVX512VBMI2 | FeatureAVX512VNNI |
+ FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |
+ FeatureGFNI | FeatureSHSTK;
// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
// X86TargetParser.def to here. They are assigned by following ways:
@@ -315,6 +248,7 @@ static constexpr FeatureBitset FeaturesZNVER4 =
// listed here before, which means it doesn't support -march, -mtune and so on.
// FIXME: Remove OnlyForCPUDispatchSpecific after all CPUs here support both
// cpu_dispatch/specific() feature and -march, -mtune, and so on.
+// clang-format off
constexpr ProcInfo Processors[] = {
// Empty processor. Include X87 and CMPXCHG8 for backwards compatibility.
{ {""}, CK_None, ~0U, FeatureX87 | FeatureCMPXCHG8B, '\0', false },
@@ -422,6 +356,16 @@ constexpr ProcInfo Processors[] = {
{ {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
// Meteorlake microarchitecture based processors.
{ {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
+ // Arrowlake microarchitecture based processors.
+ { {"arrowlake"}, CK_Arrowlake, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
+ { {"arrowlake-s"}, CK_ArrowlakeS, FEATURE_AVX2, FeaturesArrowlakeS, '\0', false },
+ { {"arrowlake_s"}, CK_ArrowlakeS, FEATURE_AVX2, FeaturesArrowlakeS, 'p', true },
+ // Lunarlake microarchitecture based processors.
+ { {"lunarlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesArrowlakeS, 'p', false },
+ // Gracemont microarchitecture based processors.
+ { {"gracemont"}, CK_Gracemont, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
+ // Pantherlake microarchitecture based processors.
+ { {"pantherlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
// Sierraforest microarchitecture based processors.
{ {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
// Grandridge microarchitecture based processors.
@@ -433,6 +377,8 @@ constexpr ProcInfo Processors[] = {
{ {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true },
// Emerald Rapids microarchitecture based processors.
{ {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
+ // Clearwaterforest microarchitecture based processors.
+ { {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
// Knights Landing processor.
{ {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false },
{ {"mic_avx512"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', true },
@@ -474,13 +420,14 @@ constexpr ProcInfo Processors[] = {
{ {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
{ {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
// Generic 64-bit processor.
- { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64, '\0', false },
- { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2, '\0', false },
- { {"x86-64-v3"}, CK_x86_64_v3, ~0U, FeaturesX86_64_V3, '\0', false },
- { {"x86-64-v4"}, CK_x86_64_v4, ~0U, FeaturesX86_64_V4, '\0', false },
+ { {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },
+ { {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },
+ { {"x86-64-v3"}, CK_x86_64_v3, FEATURE_AVX2, FeaturesX86_64_V3, '\0', false },
+ { {"x86-64-v4"}, CK_x86_64_v4, FEATURE_AVX512VL, FeaturesX86_64_V4, '\0', false },
// Geode processors.
{ {"geode"}, CK_Geode, ~0U, FeaturesGeode, '\0', false },
};
+// clang-format on
constexpr const char *NoTuneList[] = {"x86-64-v2", "x86-64-v3", "x86-64-v4"};
@@ -570,6 +517,7 @@ constexpr FeatureBitset ImpliedFeaturesSHSTK = {};
constexpr FeatureBitset ImpliedFeaturesTBM = {};
constexpr FeatureBitset ImpliedFeaturesTSXLDTRK = {};
constexpr FeatureBitset ImpliedFeaturesUINTR = {};
+constexpr FeatureBitset ImpliedFeaturesUSERMSR = {};
constexpr FeatureBitset ImpliedFeaturesWAITPKG = {};
constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {};
constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {};
@@ -603,6 +551,7 @@ constexpr FeatureBitset ImpliedFeaturesSSE4_1 = FeatureSSSE3;
constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1;
constexpr FeatureBitset ImpliedFeaturesAVX = FeatureSSE4_2;
constexpr FeatureBitset ImpliedFeaturesAVX2 = FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesEVEX512 = {};
constexpr FeatureBitset ImpliedFeaturesAVX512F =
FeatureAVX2 | FeatureF16C | FeatureFMA;
@@ -613,10 +562,10 @@ constexpr FeatureBitset ImpliedFeaturesFMA = FeatureAVX;
constexpr FeatureBitset ImpliedFeaturesGFNI = FeatureSSE2;
constexpr FeatureBitset ImpliedFeaturesPCLMUL = FeatureSSE2;
constexpr FeatureBitset ImpliedFeaturesSHA = FeatureSSE2;
-constexpr FeatureBitset ImpliedFeaturesVAES = FeatureAES | FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesVAES = FeatureAES | FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesVPCLMULQDQ = FeatureAVX | FeaturePCLMUL;
constexpr FeatureBitset ImpliedFeaturesSM3 = FeatureAVX;
-constexpr FeatureBitset ImpliedFeaturesSM4 = FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesSM4 = FeatureAVX2;
// AVX512 features.
constexpr FeatureBitset ImpliedFeaturesAVX512CD = FeatureAVX512F;
@@ -660,7 +609,7 @@ constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT16 = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2;
-constexpr FeatureBitset ImpliedFeaturesSHA512 = FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesSHA512 = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
// Key Locker Features
@@ -670,19 +619,31 @@ constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL;
// AVXVNNI Features
constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2;
-constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
-#define X86_FEATURE(ENUM, STR) {{STR}, ImpliedFeatures##ENUM},
-#include "llvm/TargetParser/X86TargetParser.def"
-};
+// AVX10 Features
+constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
+ FeatureAVX512CD | FeatureAVX512VBMI | FeatureAVX512IFMA |
+ FeatureAVX512VNNI | FeatureAVX512BF16 | FeatureAVX512VPOPCNTDQ |
+ FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureVAES | FeatureVPCLMULQDQ |
+ FeatureAVX512FP16;
+constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 =
+ FeatureAVX10_1 | FeatureEVEX512;
+
+// APX Features
+constexpr FeatureBitset ImpliedFeaturesEGPR = {};
+constexpr FeatureBitset ImpliedFeaturesPush2Pop2 = {};
+constexpr FeatureBitset ImpliedFeaturesPPX = {};
+constexpr FeatureBitset ImpliedFeaturesNDD = {};
+constexpr FeatureBitset ImpliedFeaturesCCMP = {};
+constexpr FeatureBitset ImpliedFeaturesCF = {};
-constexpr FeatureInfo FeatureInfos_WithPLUS[X86::CPU_FEATURE_MAX] = {
+constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
#define X86_FEATURE(ENUM, STR) {{"+" STR}, ImpliedFeatures##ENUM},
#include "llvm/TargetParser/X86TargetParser.def"
};
void llvm::X86::getFeaturesForCPU(StringRef CPU,
SmallVectorImpl<StringRef> &EnabledFeatures,
- bool IfNeedPlus) {
+ bool NeedPlus) {
auto I = llvm::find_if(Processors,
[&](const ProcInfo &P) { return P.Name == CPU; });
assert(I != std::end(Processors) && "Processor not found!");
@@ -695,11 +656,8 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU,
// Add the string version of all set bits.
for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
- if (Bits[i] && !FeatureInfos[i].Name.empty() &&
- !FeatureInfos_WithPLUS[i].Name.empty()){
- EnabledFeatures.push_back(IfNeedPlus ? FeatureInfos_WithPLUS[i].Name
- : FeatureInfos[i].Name);
- }
+ if (Bits[i] && !FeatureInfos[i].getName(NeedPlus).empty())
+ EnabledFeatures.push_back(FeatureInfos[i].getName(NeedPlus));
}
// For each feature that is (transitively) implied by this feature, set it.
@@ -736,8 +694,9 @@ static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) {
void llvm::X86::updateImpliedFeatures(
StringRef Feature, bool Enabled,
StringMap<bool> &Features) {
- auto I = llvm::find_if(
- FeatureInfos, [&](const FeatureInfo &FI) { return FI.Name == Feature; });
+ auto I = llvm::find_if(FeatureInfos, [&](const FeatureInfo &FI) {
+ return FI.getName() == Feature;
+ });
if (I == std::end(FeatureInfos)) {
// FIXME: This shouldn't happen, but may not have all features in the table
// yet.
@@ -753,8 +712,8 @@ void llvm::X86::updateImpliedFeatures(
// Update the map entry for all implied features.
for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
- if (ImpliedBits[i] && !FeatureInfos[i].Name.empty())
- Features[FeatureInfos[i].Name] = Enabled;
+ if (ImpliedBits[i] && !FeatureInfos[i].getName().empty())
+ Features[FeatureInfos[i].getName()] = Enabled;
}
char llvm::X86::getCPUDispatchMangling(StringRef CPU) {
@@ -771,18 +730,22 @@ bool llvm::X86::validateCPUSpecificCPUDispatch(StringRef Name) {
return I != std::end(Processors);
}
-uint64_t llvm::X86::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
+std::array<uint32_t, 4>
+llvm::X86::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
// Processor features and mapping to processor feature value.
- uint64_t FeaturesMask = 0;
- for (const StringRef &FeatureStr : FeatureStrs) {
+ std::array<uint32_t, 4> FeatureMask{};
+ for (StringRef FeatureStr : FeatureStrs) {
unsigned Feature = StringSwitch<unsigned>(FeatureStr)
#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
.Case(STR, llvm::X86::FEATURE_##ENUM)
+#define X86_MICROARCH_LEVEL(ENUM, STR, PRIORITY) \
+ .Case(STR, llvm::X86::FEATURE_##ENUM)
#include "llvm/TargetParser/X86TargetParser.def"
;
- FeaturesMask |= (1ULL << Feature);
+ assert(Feature / 32 < FeatureMask.size());
+ FeatureMask[Feature / 32] |= 1U << (Feature % 32);
}
- return FeaturesMask;
+ return FeatureMask;
}
unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) {
@@ -793,13 +756,11 @@ unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) {
#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) PRIORITY,
unsigned Priorities[] = {
#include "llvm/TargetParser/X86TargetParser.def"
- std::numeric_limits<unsigned>::max() // Need to consume last comma.
};
- std::array<unsigned, std::size(Priorities) - 1> HelperList;
+ std::array<unsigned, std::size(Priorities)> HelperList;
std::iota(HelperList.begin(), HelperList.end(), 0);
assert(std::is_permutation(HelperList.begin(), HelperList.end(),
- std::begin(Priorities),
- std::prev(std::end(Priorities))) &&
+ std::begin(Priorities), std::end(Priorities)) &&
"Priorities don't form consecutive range!");
#endif
diff --git a/contrib/llvm-project/llvm/lib/Testing/Support/Error.cpp b/contrib/llvm-project/llvm/lib/Testing/Support/Error.cpp
index a5f8f9b47b3f..657092146cae 100644
--- a/contrib/llvm-project/llvm/lib/Testing/Support/Error.cpp
+++ b/contrib/llvm-project/llvm/lib/Testing/Support/Error.cpp
@@ -8,8 +8,6 @@
#include "llvm/Testing/Support/Error.h"
-#include "llvm/ADT/StringRef.h"
-
using namespace llvm;
llvm::detail::ErrorHolder llvm::detail::TakeError(llvm::Error Err) {
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/InterfaceFile.cpp b/contrib/llvm-project/llvm/lib/TextAPI/InterfaceFile.cpp
index b7f967aa754e..3689ab919191 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/InterfaceFile.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/InterfaceFile.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/TextAPI/InterfaceFile.h"
+#include "llvm/TextAPI/TextAPIError.h"
#include <iomanip>
#include <sstream>
@@ -47,16 +48,16 @@ void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) {
}
void InterfaceFile::addRPath(const Target &InputTarget, StringRef RPath) {
- auto Iter = lower_bound(RPaths, InputTarget,
- [](const std::pair<Target, std::string> &LHS,
- Target RHS) { return LHS.first < RHS; });
+ using RPathEntryT = const std::pair<Target, std::string>;
+ RPathEntryT Entry(InputTarget, RPath);
+ auto Iter =
+ lower_bound(RPaths, Entry,
+ [](RPathEntryT &LHS, RPathEntryT &RHS) { return LHS < RHS; });
- if ((Iter != RPaths.end()) && !(InputTarget < Iter->first)) {
- Iter->second = std::string(RPath);
+ if ((Iter != RPaths.end()) && (*Iter == Entry))
return;
- }
- RPaths.emplace(Iter, InputTarget, std::string(RPath));
+ RPaths.emplace(Iter, Entry);
}
void InterfaceFile::addTarget(const Target &Target) {
@@ -81,6 +82,267 @@ void InterfaceFile::addDocument(std::shared_ptr<InterfaceFile> &&Document) {
Documents.insert(Pos, Document);
}
+void InterfaceFile::inlineLibrary(std::shared_ptr<InterfaceFile> Library,
+ bool Overwrite) {
+ auto AddFwk = [&](std::shared_ptr<InterfaceFile> &&Reexport) {
+ auto It = lower_bound(
+ Documents, Reexport->getInstallName(),
+ [](std::shared_ptr<InterfaceFile> &Lhs, const StringRef Rhs) {
+ return Lhs->getInstallName() < Rhs;
+ });
+
+ if (Overwrite && It != Documents.end() &&
+ Reexport->getInstallName() == (*It)->getInstallName()) {
+ std::replace(Documents.begin(), Documents.end(), *It,
+ std::move(Reexport));
+ return;
+ }
+
+ if ((It != Documents.end()) &&
+ !(Reexport->getInstallName() < (*It)->getInstallName()))
+ return;
+
+ Documents.emplace(It, std::move(Reexport));
+ };
+ for (auto Doc : Library->documents())
+ AddFwk(std::move(Doc));
+
+ Library->Documents.clear();
+ AddFwk(std::move(Library));
+}
+
+Expected<std::unique_ptr<InterfaceFile>>
+InterfaceFile::merge(const InterfaceFile *O) const {
+ // Verify files can be merged.
+ if (getInstallName() != O->getInstallName()) {
+ return make_error<StringError>("install names do not match",
+ inconvertibleErrorCode());
+ }
+
+ if (getCurrentVersion() != O->getCurrentVersion()) {
+ return make_error<StringError>("current versions do not match",
+ inconvertibleErrorCode());
+ }
+
+ if (getCompatibilityVersion() != O->getCompatibilityVersion()) {
+ return make_error<StringError>("compatibility versions do not match",
+ inconvertibleErrorCode());
+ }
+
+ if ((getSwiftABIVersion() != 0) && (O->getSwiftABIVersion() != 0) &&
+ (getSwiftABIVersion() != O->getSwiftABIVersion())) {
+ return make_error<StringError>("swift ABI versions do not match",
+ inconvertibleErrorCode());
+ }
+
+ if (isTwoLevelNamespace() != O->isTwoLevelNamespace()) {
+ return make_error<StringError>("two level namespace flags do not match",
+ inconvertibleErrorCode());
+ }
+
+ if (isApplicationExtensionSafe() != O->isApplicationExtensionSafe()) {
+ return make_error<StringError>(
+ "application extension safe flags do not match",
+ inconvertibleErrorCode());
+ }
+
+ std::unique_ptr<InterfaceFile> IF(new InterfaceFile());
+ IF->setFileType(std::max(getFileType(), O->getFileType()));
+ IF->setPath(getPath());
+ IF->setInstallName(getInstallName());
+ IF->setCurrentVersion(getCurrentVersion());
+ IF->setCompatibilityVersion(getCompatibilityVersion());
+
+ if (getSwiftABIVersion() == 0)
+ IF->setSwiftABIVersion(O->getSwiftABIVersion());
+ else
+ IF->setSwiftABIVersion(getSwiftABIVersion());
+
+ IF->setTwoLevelNamespace(isTwoLevelNamespace());
+ IF->setApplicationExtensionSafe(isApplicationExtensionSafe());
+
+ for (const auto &It : umbrellas()) {
+ if (!It.second.empty())
+ IF->addParentUmbrella(It.first, It.second);
+ }
+ for (const auto &It : O->umbrellas()) {
+ if (!It.second.empty())
+ IF->addParentUmbrella(It.first, It.second);
+ }
+ IF->addTargets(targets());
+ IF->addTargets(O->targets());
+
+ for (const auto &Lib : allowableClients())
+ for (const auto &Target : Lib.targets())
+ IF->addAllowableClient(Lib.getInstallName(), Target);
+
+ for (const auto &Lib : O->allowableClients())
+ for (const auto &Target : Lib.targets())
+ IF->addAllowableClient(Lib.getInstallName(), Target);
+
+ for (const auto &Lib : reexportedLibraries())
+ for (const auto &Target : Lib.targets())
+ IF->addReexportedLibrary(Lib.getInstallName(), Target);
+
+ for (const auto &Lib : O->reexportedLibraries())
+ for (const auto &Target : Lib.targets())
+ IF->addReexportedLibrary(Lib.getInstallName(), Target);
+
+ for (const auto &[Target, Path] : rpaths())
+ IF->addRPath(Target, Path);
+ for (const auto &[Target, Path] : O->rpaths())
+ IF->addRPath(Target, Path);
+
+ for (const auto *Sym : symbols()) {
+ IF->addSymbol(Sym->getKind(), Sym->getName(), Sym->targets(),
+ Sym->getFlags());
+ }
+
+ for (const auto *Sym : O->symbols()) {
+ IF->addSymbol(Sym->getKind(), Sym->getName(), Sym->targets(),
+ Sym->getFlags());
+ }
+
+ return std::move(IF);
+}
+
+Expected<std::unique_ptr<InterfaceFile>>
+InterfaceFile::remove(Architecture Arch) const {
+ if (getArchitectures() == Arch)
+ return make_error<StringError>("cannot remove last architecture slice '" +
+ getArchitectureName(Arch) + "'",
+ inconvertibleErrorCode());
+
+ if (!getArchitectures().has(Arch)) {
+ bool Found = false;
+ for (auto &Doc : Documents) {
+ if (Doc->getArchitectures().has(Arch)) {
+ Found = true;
+ break;
+ }
+ }
+
+ if (!Found)
+ return make_error<TextAPIError>(TextAPIErrorCode::NoSuchArchitecture);
+ }
+
+ std::unique_ptr<InterfaceFile> IF(new InterfaceFile());
+ IF->setFileType(getFileType());
+ IF->setPath(getPath());
+ IF->addTargets(targets(ArchitectureSet::All().clear(Arch)));
+ IF->setInstallName(getInstallName());
+ IF->setCurrentVersion(getCurrentVersion());
+ IF->setCompatibilityVersion(getCompatibilityVersion());
+ IF->setSwiftABIVersion(getSwiftABIVersion());
+ IF->setTwoLevelNamespace(isTwoLevelNamespace());
+ IF->setApplicationExtensionSafe(isApplicationExtensionSafe());
+ for (const auto &It : umbrellas())
+ if (It.first.Arch != Arch)
+ IF->addParentUmbrella(It.first, It.second);
+
+ for (const auto &Lib : allowableClients()) {
+ for (const auto &Target : Lib.targets())
+ if (Target.Arch != Arch)
+ IF->addAllowableClient(Lib.getInstallName(), Target);
+ }
+
+ for (const auto &Lib : reexportedLibraries()) {
+ for (const auto &Target : Lib.targets())
+ if (Target.Arch != Arch)
+ IF->addReexportedLibrary(Lib.getInstallName(), Target);
+ }
+
+ for (const auto *Sym : symbols()) {
+ auto Archs = Sym->getArchitectures();
+ Archs.clear(Arch);
+ if (Archs.empty())
+ continue;
+
+ IF->addSymbol(Sym->getKind(), Sym->getName(), Sym->targets(Archs),
+ Sym->getFlags());
+ }
+
+ for (auto &Doc : Documents) {
+ // Skip the inlined document if the to be removed architecture is the
+ // only one left.
+ if (Doc->getArchitectures() == Arch)
+ continue;
+
+ // If the document doesn't contain the arch, then no work is to be done
+ // and it can be copied over.
+ if (!Doc->getArchitectures().has(Arch)) {
+ auto NewDoc = Doc;
+ IF->addDocument(std::move(NewDoc));
+ continue;
+ }
+
+ auto Result = Doc->remove(Arch);
+ if (!Result)
+ return Result;
+
+ IF->addDocument(std::move(Result.get()));
+ }
+
+ return std::move(IF);
+}
+
+Expected<std::unique_ptr<InterfaceFile>>
+InterfaceFile::extract(Architecture Arch) const {
+ if (!getArchitectures().has(Arch)) {
+ return make_error<StringError>("file doesn't have architecture '" +
+ getArchitectureName(Arch) + "'",
+ inconvertibleErrorCode());
+ }
+
+ std::unique_ptr<InterfaceFile> IF(new InterfaceFile());
+ IF->setFileType(getFileType());
+ IF->setPath(getPath());
+ IF->addTargets(targets(Arch));
+ IF->setInstallName(getInstallName());
+ IF->setCurrentVersion(getCurrentVersion());
+ IF->setCompatibilityVersion(getCompatibilityVersion());
+ IF->setSwiftABIVersion(getSwiftABIVersion());
+ IF->setTwoLevelNamespace(isTwoLevelNamespace());
+ IF->setApplicationExtensionSafe(isApplicationExtensionSafe());
+ for (const auto &It : umbrellas())
+ if (It.first.Arch == Arch)
+ IF->addParentUmbrella(It.first, It.second);
+
+ for (const auto &It : rpaths())
+ if (It.first.Arch == Arch)
+ IF->addRPath(It.first, It.second);
+
+ for (const auto &Lib : allowableClients())
+ for (const auto &Target : Lib.targets())
+ if (Target.Arch == Arch)
+ IF->addAllowableClient(Lib.getInstallName(), Target);
+
+ for (const auto &Lib : reexportedLibraries())
+ for (const auto &Target : Lib.targets())
+ if (Target.Arch == Arch)
+ IF->addReexportedLibrary(Lib.getInstallName(), Target);
+
+ for (const auto *Sym : symbols()) {
+ if (Sym->hasArchitecture(Arch))
+ IF->addSymbol(Sym->getKind(), Sym->getName(), Sym->targets(Arch),
+ Sym->getFlags());
+ }
+
+ for (auto &Doc : Documents) {
+ // Skip documents that don't have the requested architecture.
+ if (!Doc->getArchitectures().has(Arch))
+ continue;
+
+ auto Result = Doc->extract(Arch);
+ if (!Result)
+ return Result;
+
+ IF->addDocument(std::move(Result.get()));
+ }
+
+ return std::move(IF);
+}
+
static bool isYAMLTextStub(const FileType &Kind) {
return (Kind >= FileType::TBD_V1) && (Kind < FileType::TBD_V5);
}
@@ -99,6 +361,10 @@ bool InterfaceFile::operator==(const InterfaceFile &O) const {
return false;
if (IsAppExtensionSafe != O.IsAppExtensionSafe)
return false;
+ if (IsOSLibNotForSharedCache != O.IsOSLibNotForSharedCache)
+ return false;
+ if (HasSimSupport != O.HasSimSupport)
+ return false;
if (ParentUmbrellas != O.ParentUmbrellas)
return false;
if (AllowableClients != O.AllowableClients)
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/PackedVersion.cpp b/contrib/llvm-project/llvm/lib/TextAPI/PackedVersion.cpp
index 22960c33e9ee..4742be79f457 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/PackedVersion.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/PackedVersion.cpp
@@ -28,7 +28,7 @@ bool PackedVersion::parse32(StringRef Str) {
SmallVector<StringRef, 3> Parts;
SplitString(Str, Parts, ".");
- if (Parts.size() > 3)
+ if (Parts.size() > 3 || Parts.empty())
return false;
unsigned long long Num;
@@ -63,7 +63,7 @@ std::pair<bool, bool> PackedVersion::parse64(StringRef Str) {
SmallVector<StringRef, 5> Parts;
SplitString(Str, Parts, ".");
- if (Parts.size() > 5)
+ if (Parts.size() > 5 || Parts.empty())
return std::make_pair(false, Truncated);
unsigned long long Num;
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/Platform.cpp b/contrib/llvm-project/llvm/lib/TextAPI/Platform.cpp
index d0575847a876..ed041af40aa5 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/Platform.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/Platform.cpp
@@ -62,28 +62,11 @@ PlatformSet mapToPlatformSet(ArrayRef<Triple> Targets) {
StringRef getPlatformName(PlatformType Platform) {
switch (Platform) {
- case PLATFORM_UNKNOWN:
- return "unknown";
- case PLATFORM_MACOS:
- return "macOS";
- case PLATFORM_IOS:
- return "iOS";
- case PLATFORM_TVOS:
- return "tvOS";
- case PLATFORM_WATCHOS:
- return "watchOS";
- case PLATFORM_BRIDGEOS:
- return "bridgeOS";
- case PLATFORM_MACCATALYST:
- return "macCatalyst";
- case PLATFORM_IOSSIMULATOR:
- return "iOS Simulator";
- case PLATFORM_TVOSSIMULATOR:
- return "tvOS Simulator";
- case PLATFORM_WATCHOSSIMULATOR:
- return "watchOS Simulator";
- case PLATFORM_DRIVERKIT:
- return "DriverKit";
+#define PLATFORM(platform, id, name, build_name, target, tapi_target, \
+ marketing) \
+ case PLATFORM_##platform: \
+ return #marketing;
+#include "llvm/BinaryFormat/MachO.def"
}
llvm_unreachable("Unknown llvm::MachO::PlatformType enum");
}
@@ -91,16 +74,10 @@ StringRef getPlatformName(PlatformType Platform) {
PlatformType getPlatformFromName(StringRef Name) {
return StringSwitch<PlatformType>(Name)
.Case("osx", PLATFORM_MACOS)
- .Case("macos", PLATFORM_MACOS)
- .Case("ios", PLATFORM_IOS)
- .Case("tvos", PLATFORM_TVOS)
- .Case("watchos", PLATFORM_WATCHOS)
- .Case("bridgeos", PLATFORM_BRIDGEOS)
- .Case("ios-macabi", PLATFORM_MACCATALYST)
- .Case("ios-simulator", PLATFORM_IOSSIMULATOR)
- .Case("tvos-simulator", PLATFORM_TVOSSIMULATOR)
- .Case("watchos-simulator", PLATFORM_WATCHOSSIMULATOR)
- .Case("driverkit", PLATFORM_DRIVERKIT)
+#define PLATFORM(platform, id, name, build_name, target, tapi_target, \
+ marketing) \
+ .Case(#target, PLATFORM_##platform)
+#include "llvm/BinaryFormat/MachO.def"
.Default(PLATFORM_UNKNOWN);
}
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp b/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp
new file mode 100644
index 000000000000..a220b255aea3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/TextAPI/RecordsSlice.cpp
@@ -0,0 +1,224 @@
+//===- RecordsSlice.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the Records Slice APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/RecordsSlice.h"
+#include "llvm/TextAPI/Record.h"
+#include "llvm/TextAPI/Symbol.h"
+#include <utility>
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+Record *RecordsSlice::addRecord(StringRef Name, SymbolFlags Flags,
+ GlobalRecord::Kind GV, RecordLinkage Linkage) {
+ // Find a specific Record type to capture.
+ auto [APIName, SymKind] = parseSymbol(Name, Flags);
+ Name = APIName;
+ switch (SymKind) {
+ case SymbolKind::GlobalSymbol:
+ return addGlobal(Name, Linkage, GV, Flags);
+ case SymbolKind::ObjectiveCClass:
+ return addObjCInterface(Name, Linkage);
+ case SymbolKind::ObjectiveCClassEHType:
+ return addObjCInterface(Name, Linkage, /*HasEHType=*/true);
+ case SymbolKind::ObjectiveCInstanceVariable: {
+ auto [Super, IVar] = Name.split('.');
+ // Attempt to find super class.
+ ObjCContainerRecord *Container = findContainer(/*isIVar=*/false, Super);
+ // If not found, create extension since there is no mapped class symbol.
+ if (Container == nullptr)
+ Container = addObjCCategory(Super, {});
+ return addObjCIVar(Container, IVar, Linkage);
+ }
+ }
+
+ llvm_unreachable("unexpected symbol kind when adding to Record Slice");
+}
+
+ObjCContainerRecord *RecordsSlice::findContainer(bool IsIVar,
+ StringRef Name) const {
+ StringRef Super = IsIVar ? Name.split('.').first : Name;
+ ObjCContainerRecord *Container = findObjCInterface(Super);
+ // Ivars can only exist with extensions, if they did not come from
+ // class.
+ if (Container == nullptr)
+ Container = findObjCCategory(Super, "");
+ return Container;
+}
+
+template <typename R, typename C = RecordMap<R>, typename K = StringRef>
+R *findRecord(K Key, const C &Container) {
+ const auto *Record = Container.find(Key);
+ if (Record == Container.end())
+ return nullptr;
+ return Record->second.get();
+}
+
+GlobalRecord *RecordsSlice::findGlobal(StringRef Name,
+ GlobalRecord::Kind GV) const {
+ auto *Record = findRecord<GlobalRecord>(Name, Globals);
+ if (!Record)
+ return nullptr;
+
+ switch (GV) {
+ case GlobalRecord::Kind::Variable: {
+ if (!Record->isVariable())
+ return nullptr;
+ break;
+ }
+ case GlobalRecord::Kind::Function: {
+ if (!Record->isFunction())
+ return nullptr;
+ break;
+ }
+ case GlobalRecord::Kind::Unknown:
+ return Record;
+ }
+
+ return Record;
+}
+
+ObjCInterfaceRecord *RecordsSlice::findObjCInterface(StringRef Name) const {
+ return findRecord<ObjCInterfaceRecord>(Name, Classes);
+}
+
+ObjCCategoryRecord *RecordsSlice::findObjCCategory(StringRef ClassToExtend,
+ StringRef Category) const {
+ return findRecord<ObjCCategoryRecord>(std::make_pair(ClassToExtend, Category),
+ Categories);
+}
+
+ObjCIVarRecord *ObjCContainerRecord::findObjCIVar(StringRef IVar) const {
+ return findRecord<ObjCIVarRecord>(IVar, IVars);
+}
+
+ObjCIVarRecord *RecordsSlice::findObjCIVar(bool IsScopedName,
+ StringRef Name) const {
+ // If scoped name, the name of the container is known.
+ if (IsScopedName) {
+ // IVar does not exist if there is not a container assigned to it.
+ auto *Container = findContainer(/*IsIVar=*/true, Name);
+ if (!Container)
+ return nullptr;
+
+ StringRef IVar = Name.substr(Name.find_first_of('.') + 1);
+ return Container->findObjCIVar(IVar);
+ }
+
+ // Otherwise traverse through containers and attempt to find IVar.
+ auto getIVar = [Name](auto &Records) -> ObjCIVarRecord * {
+ for (const auto &[_, Container] : Records) {
+ if (auto *IVarR = Container->findObjCIVar(Name))
+ return IVarR;
+ }
+ return nullptr;
+ };
+
+ if (auto *IVarRecord = getIVar(Classes))
+ return IVarRecord;
+
+ return getIVar(Categories);
+}
+
+GlobalRecord *RecordsSlice::addGlobal(StringRef Name, RecordLinkage Linkage,
+ GlobalRecord::Kind GV,
+ SymbolFlags Flags) {
+ if (GV == GlobalRecord::Kind::Function)
+ Flags |= SymbolFlags::Text;
+ else if (GV == GlobalRecord::Kind::Variable)
+ Flags |= SymbolFlags::Data;
+
+ Name = copyString(Name);
+ auto Result = Globals.insert({Name, nullptr});
+ if (Result.second)
+ Result.first->second =
+ std::make_unique<GlobalRecord>(Name, Linkage, Flags, GV);
+ else
+ updateLinkage(Result.first->second.get(), Linkage);
+ return Result.first->second.get();
+}
+
+ObjCInterfaceRecord *RecordsSlice::addObjCInterface(StringRef Name,
+ RecordLinkage Linkage,
+ bool HasEHType) {
+ Name = copyString(Name);
+ auto Result = Classes.insert({Name, nullptr});
+ if (Result.second) {
+ Result.first->second =
+ std::make_unique<ObjCInterfaceRecord>(Name, Linkage, HasEHType);
+ } else {
+ // ObjC classes represent multiple symbols that could have competing
+ // linkages, in those cases assign the largest one.
+ if (Linkage >= RecordLinkage::Rexported)
+ updateLinkage(Result.first->second.get(), Linkage);
+ }
+
+ return Result.first->second.get();
+}
+
+bool ObjCInterfaceRecord::addObjCCategory(ObjCCategoryRecord *Record) {
+ auto Result = Categories.insert({Name, Record});
+ return Result.second;
+}
+
+ObjCCategoryRecord *RecordsSlice::addObjCCategory(StringRef ClassToExtend,
+ StringRef Category) {
+ Category = copyString(Category);
+
+ // Add owning record first into record slice.
+ auto Result =
+ Categories.insert({std::make_pair(ClassToExtend, Category), nullptr});
+ if (Result.second)
+ Result.first->second =
+ std::make_unique<ObjCCategoryRecord>(ClassToExtend, Category);
+
+ // Then add reference to it in in the class.
+ if (auto *ObjCClass = findObjCInterface(ClassToExtend))
+ ObjCClass->addObjCCategory(Result.first->second.get());
+
+ return Result.first->second.get();
+}
+
+ObjCIVarRecord *ObjCContainerRecord::addObjCIVar(StringRef IVar,
+ RecordLinkage Linkage) {
+ auto Result = IVars.insert({IVar, nullptr});
+ if (Result.second)
+ Result.first->second = std::make_unique<ObjCIVarRecord>(Name, Linkage);
+ return Result.first->second.get();
+}
+
+ObjCIVarRecord *RecordsSlice::addObjCIVar(ObjCContainerRecord *Container,
+ StringRef Name,
+ RecordLinkage Linkage) {
+ Name = copyString(Name);
+ ObjCIVarRecord *Record = Container->addObjCIVar(Name, Linkage);
+ updateLinkage(Record, Linkage);
+ return Record;
+}
+
+StringRef RecordsSlice::copyString(StringRef String) {
+ if (String.empty())
+ return {};
+
+ if (StringAllocator.identifyObject(String.data()))
+ return String;
+
+ void *Ptr = StringAllocator.Allocate(String.size(), 1);
+ memcpy(Ptr, String.data(), String.size());
+ return StringRef(reinterpret_cast<const char *>(Ptr), String.size());
+}
+
+RecordsSlice::BinaryAttrs &RecordsSlice::getBinaryAttrs() {
+ if (!hasBinaryAttrs())
+ BA = std::make_unique<BinaryAttrs>();
+ return *BA;
+}
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/Symbol.cpp b/contrib/llvm-project/llvm/lib/TextAPI/Symbol.cpp
index 20fa6362716a..fd395436051d 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/Symbol.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/Symbol.cpp
@@ -72,5 +72,31 @@ bool Symbol::operator==(const Symbol &O) const {
std::tie(O.Name, O.Kind, O.Targets, RHSFlags);
}
+SimpleSymbol parseSymbol(StringRef SymName, const SymbolFlags Flags) {
+ if (SymName.starts_with(ObjC1ClassNamePrefix))
+ return {SymName.drop_front(ObjC1ClassNamePrefix.size()),
+ SymbolKind::ObjectiveCClass};
+ if (SymName.starts_with(ObjC2ClassNamePrefix))
+ return {SymName.drop_front(ObjC2ClassNamePrefix.size()),
+ SymbolKind::ObjectiveCClass};
+ if (SymName.starts_with(ObjC2MetaClassNamePrefix))
+ return {SymName.drop_front(ObjC2MetaClassNamePrefix.size()),
+ SymbolKind::ObjectiveCClass};
+ if (SymName.starts_with(ObjC2EHTypePrefix)) {
+ // When classes without ehtype are used in try/catch blocks
+ // a weak-defined symbol is exported. In those cases, treat these as a
+ // global instead.
+ if ((Flags & SymbolFlags::WeakDefined) == SymbolFlags::WeakDefined)
+ return {SymName, SymbolKind::GlobalSymbol};
+ return {SymName.drop_front(ObjC2EHTypePrefix.size()),
+ SymbolKind::ObjectiveCClassEHType};
+ }
+
+ if (SymName.starts_with(ObjC2IVarPrefix))
+ return {SymName.drop_front(ObjC2IVarPrefix.size()),
+ SymbolKind::ObjectiveCInstanceVariable};
+ return {SymName, SymbolKind::GlobalSymbol};
+}
+
} // end namespace MachO.
} // end namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/SymbolSet.cpp b/contrib/llvm-project/llvm/lib/TextAPI/SymbolSet.cpp
index 157e13749729..5b9882dadcdf 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/SymbolSet.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/SymbolSet.cpp
@@ -29,8 +29,5 @@ Symbol *SymbolSet::addGlobal(SymbolKind Kind, StringRef Name, SymbolFlags Flags,
}
const Symbol *SymbolSet::findSymbol(SymbolKind Kind, StringRef Name) const {
- auto It = Symbols.find({Kind, Name});
- if (It != Symbols.end())
- return It->second;
- return nullptr;
+ return Symbols.lookup({Kind, Name});
}
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/Target.cpp b/contrib/llvm-project/llvm/lib/TextAPI/Target.cpp
index e20842498331..a50abeeca194 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/Target.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/Target.cpp
@@ -21,20 +21,14 @@ Expected<Target> Target::create(StringRef TargetValue) {
auto PlatformStr = Result.second;
PlatformType Platform;
Platform = StringSwitch<PlatformType>(PlatformStr)
- .Case("macos", PLATFORM_MACOS)
- .Case("ios", PLATFORM_IOS)
- .Case("tvos", PLATFORM_TVOS)
- .Case("watchos", PLATFORM_WATCHOS)
- .Case("bridgeos", PLATFORM_BRIDGEOS)
- .Case("maccatalyst", PLATFORM_MACCATALYST)
- .Case("ios-simulator", PLATFORM_IOSSIMULATOR)
- .Case("tvos-simulator", PLATFORM_TVOSSIMULATOR)
- .Case("watchos-simulator", PLATFORM_WATCHOSSIMULATOR)
- .Case("driverkit", PLATFORM_DRIVERKIT)
+#define PLATFORM(platform, id, name, build_name, target, tapi_target, \
+ marketing) \
+ .Case(#tapi_target, PLATFORM_##platform)
+#include "llvm/BinaryFormat/MachO.def"
.Default(PLATFORM_UNKNOWN);
if (Platform == PLATFORM_UNKNOWN) {
- if (PlatformStr.startswith("<") && PlatformStr.endswith(">")) {
+ if (PlatformStr.starts_with("<") && PlatformStr.ends_with(">")) {
PlatformStr = PlatformStr.drop_front().drop_back();
unsigned long long RawValue;
if (!PlatformStr.getAsInteger(10, RawValue))
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/TextAPIError.cpp b/contrib/llvm-project/llvm/lib/TextAPI/TextAPIError.cpp
new file mode 100644
index 000000000000..23954a9e3466
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/TextAPI/TextAPIError.cpp
@@ -0,0 +1,39 @@
+//===- TextAPIError.cpp - Tapi Error ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Implements TAPI Error.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/TextAPIError.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+char TextAPIError::ID = 0;
+
+void TextAPIError::log(raw_ostream &OS) const {
+ switch (EC) {
+ case TextAPIErrorCode::NoSuchArchitecture:
+ OS << "no such architecture";
+ break;
+ case TextAPIErrorCode::InvalidInputFormat:
+ OS << "invalid input format";
+ break;
+ default:
+ llvm_unreachable("unhandled TextAPIErrorCode");
+ }
+ if (!Msg.empty())
+ OS << ": " << Msg;
+ OS << "\n";
+}
+
+std::error_code TextAPIError::convertToErrorCode() const {
+ llvm_unreachable("convertToErrorCode is not supported.");
+}
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/TextStub.cpp b/contrib/llvm-project/llvm/lib/TextAPI/TextStub.cpp
index 78de3ebf3f3a..9fa1459e9557 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/TextStub.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/TextStub.cpp
@@ -360,6 +360,8 @@ template <> struct ScalarBitSetTraits<TBDFlags> {
IO.bitSetCase(Flags, "not_app_extension_safe",
TBDFlags::NotApplicationExtensionSafe);
IO.bitSetCase(Flags, "installapi", TBDFlags::InstallAPI);
+ IO.bitSetCase(Flags, "not_for_dyld_shared_cache",
+ TBDFlags::OSLibNotForSharedCache);
}
};
@@ -367,39 +369,12 @@ template <> struct ScalarTraits<Target> {
static void output(const Target &Value, void *, raw_ostream &OS) {
OS << Value.Arch << "-";
switch (Value.Platform) {
- default:
- OS << "unknown";
- break;
- case PLATFORM_MACOS:
- OS << "macos";
- break;
- case PLATFORM_IOS:
- OS << "ios";
- break;
- case PLATFORM_TVOS:
- OS << "tvos";
- break;
- case PLATFORM_WATCHOS:
- OS << "watchos";
- break;
- case PLATFORM_BRIDGEOS:
- OS << "bridgeos";
- break;
- case PLATFORM_MACCATALYST:
- OS << "maccatalyst";
- break;
- case PLATFORM_IOSSIMULATOR:
- OS << "ios-simulator";
- break;
- case PLATFORM_TVOSSIMULATOR:
- OS << "tvos-simulator";
- break;
- case PLATFORM_WATCHOSSIMULATOR:
- OS << "watchos-simulator";
- break;
- case PLATFORM_DRIVERKIT:
- OS << "driverkit";
- break;
+#define PLATFORM(platform, id, name, build_name, target, tapi_target, \
+ marketing) \
+ case PLATFORM_##platform: \
+ OS << #tapi_target; \
+ break;
+#include "llvm/BinaryFormat/MachO.def"
}
}
@@ -620,6 +595,11 @@ template <> struct MappingTraits<const InterfaceFile *> {
!(Flags & TBDFlags::NotApplicationExtensionSafe));
}
+ // For older file formats, the segment where the symbol
+ // comes from is unknown, treat all symbols as Data
+ // in these cases.
+ const auto Flags = SymbolFlags::Data;
+
for (const auto &Section : Exports) {
const auto Targets =
synthesizeTargets(Section.Architectures, Platforms);
@@ -634,33 +614,34 @@ template <> struct MappingTraits<const InterfaceFile *> {
for (const auto &Symbol : Section.Symbols) {
if (Ctx->FileKind != FileType::TBD_V3 &&
- Symbol.value.startswith("_OBJC_EHTYPE_$_"))
+ Symbol.value.starts_with(ObjC2EHTypePrefix))
File->addSymbol(SymbolKind::ObjectiveCClassEHType,
- Symbol.value.drop_front(15), Targets);
+ Symbol.value.drop_front(15), Targets, Flags);
else
- File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets);
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, Flags);
}
for (auto &Symbol : Section.Classes) {
auto Name = Symbol.value;
if (Ctx->FileKind != FileType::TBD_V3)
Name = Name.drop_front();
- File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets);
+ File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets, Flags);
}
for (auto &Symbol : Section.ClassEHs)
- File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets);
+ File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets,
+ Flags);
for (auto &Symbol : Section.IVars) {
auto Name = Symbol.value;
if (Ctx->FileKind != FileType::TBD_V3)
Name = Name.drop_front();
- File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name,
- Targets);
+ File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, Targets,
+ Flags);
}
for (auto &Symbol : Section.WeakDefSymbols)
File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
- SymbolFlags::WeakDefined);
+ SymbolFlags::WeakDefined | Flags);
for (auto &Symbol : Section.TLVSymbols)
File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
- SymbolFlags::ThreadLocalValue);
+ SymbolFlags::ThreadLocalValue | Flags);
}
for (const auto &Section : Undefineds) {
@@ -668,34 +649,35 @@ template <> struct MappingTraits<const InterfaceFile *> {
synthesizeTargets(Section.Architectures, Platforms);
for (auto &Symbol : Section.Symbols) {
if (Ctx->FileKind != FileType::TBD_V3 &&
- Symbol.value.startswith("_OBJC_EHTYPE_$_"))
+ Symbol.value.starts_with(ObjC2EHTypePrefix))
File->addSymbol(SymbolKind::ObjectiveCClassEHType,
Symbol.value.drop_front(15), Targets,
- SymbolFlags::Undefined);
+ SymbolFlags::Undefined | Flags);
else
File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
- SymbolFlags::Undefined);
+ SymbolFlags::Undefined | Flags);
}
for (auto &Symbol : Section.Classes) {
auto Name = Symbol.value;
if (Ctx->FileKind != FileType::TBD_V3)
Name = Name.drop_front();
File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets,
- SymbolFlags::Undefined);
+ SymbolFlags::Undefined | Flags);
}
for (auto &Symbol : Section.ClassEHs)
File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets,
- SymbolFlags::Undefined);
+ SymbolFlags::Undefined | Flags);
for (auto &Symbol : Section.IVars) {
auto Name = Symbol.value;
if (Ctx->FileKind != FileType::TBD_V3)
Name = Name.drop_front();
File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, Targets,
- SymbolFlags::Undefined);
+ SymbolFlags::Undefined | Flags);
}
for (auto &Symbol : Section.WeakRefSymbols)
File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets,
- SymbolFlags::Undefined | SymbolFlags::WeakReferenced);
+ SymbolFlags::Undefined | SymbolFlags::WeakReferenced |
+ Flags);
}
return File;
@@ -787,7 +769,7 @@ template <> struct MappingTraits<const InterfaceFile *> {
NormalizedTBD_V4(IO &IO, const InterfaceFile *&File) {
auto Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
assert(Ctx);
- TBDVersion = Ctx->FileKind >> 1;
+ TBDVersion = Ctx->FileKind >> 4;
Targets.insert(Targets.begin(), File->targets().begin(),
File->targets().end());
InstallName = File->getInstallName();
@@ -802,6 +784,9 @@ template <> struct MappingTraits<const InterfaceFile *> {
if (!File->isTwoLevelNamespace())
Flags |= TBDFlags::FlatNamespace;
+ if (File->isOSLibNotForSharedCache())
+ Flags |= TBDFlags::OSLibNotForSharedCache;
+
{
std::map<std::string, TargetList> valueToTargetList;
for (const auto &it : File->umbrellas())
@@ -892,6 +877,8 @@ template <> struct MappingTraits<const InterfaceFile *> {
File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace));
File->setApplicationExtensionSafe(
!(Flags & TBDFlags::NotApplicationExtensionSafe));
+ File->setOSLibNotForSharedCache(
+ (Flags & TBDFlags::OSLibNotForSharedCache));
for (const auto &CurrentSection : AllowableClients) {
for (const auto &lib : CurrentSection.Values)
@@ -906,7 +893,12 @@ template <> struct MappingTraits<const InterfaceFile *> {
}
auto handleSymbols = [File](const SectionList &CurrentSections,
- SymbolFlags Flag = SymbolFlags::None) {
+ SymbolFlags InputFlag = SymbolFlags::None) {
+ // For older file formats, the segment where the symbol
+ // comes from is unknown, treat all symbols as Data
+ // in these cases.
+ const SymbolFlags Flag = InputFlag | SymbolFlags::Data;
+
for (const auto &CurrentSection : CurrentSections) {
for (auto &sym : CurrentSection.Symbols)
File->addSymbol(SymbolKind::GlobalSymbol, sym,
@@ -924,9 +916,10 @@ template <> struct MappingTraits<const InterfaceFile *> {
File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, sym,
CurrentSection.Targets, Flag);
- SymbolFlags SymFlag = (Flag == SymbolFlags::Undefined)
- ? SymbolFlags::WeakReferenced
- : SymbolFlags::WeakDefined;
+ SymbolFlags SymFlag =
+ ((Flag & SymbolFlags::Undefined) == SymbolFlags::Undefined)
+ ? SymbolFlags::WeakReferenced
+ : SymbolFlags::WeakDefined;
for (auto &sym : CurrentSection.WeakSymbols) {
File->addSymbol(SymbolKind::GlobalSymbol, sym,
CurrentSection.Targets, Flag | SymFlag);
@@ -1078,38 +1071,35 @@ static void DiagHandler(const SMDiagnostic &Diag, void *Context) {
File->ErrorMessage = ("malformed file\n" + Message).str();
}
-namespace {
-
-Expected<FileType> canReadFileType(MemoryBufferRef InputBuffer) {
+Expected<FileType> TextAPIReader::canRead(MemoryBufferRef InputBuffer) {
auto TAPIFile = InputBuffer.getBuffer().trim();
- if (TAPIFile.startswith("{") && TAPIFile.endswith("}"))
+ if (TAPIFile.starts_with("{") && TAPIFile.ends_with("}"))
return FileType::TBD_V5;
- if (!TAPIFile.endswith("..."))
+ if (!TAPIFile.ends_with("..."))
return createStringError(std::errc::not_supported, "unsupported file type");
- if (TAPIFile.startswith("--- !tapi-tbd\n"))
+ if (TAPIFile.starts_with("--- !tapi-tbd\n"))
return FileType::TBD_V4;
- if (TAPIFile.startswith("--- !tapi-tbd-v3\n"))
+ if (TAPIFile.starts_with("--- !tapi-tbd-v3\n"))
return FileType::TBD_V3;
- if (TAPIFile.startswith("--- !tapi-tbd-v2\n"))
+ if (TAPIFile.starts_with("--- !tapi-tbd-v2\n"))
return FileType::TBD_V2;
- if (TAPIFile.startswith("--- !tapi-tbd-v1\n") ||
- TAPIFile.startswith("---\narchs:"))
+ if (TAPIFile.starts_with("--- !tapi-tbd-v1\n") ||
+ TAPIFile.starts_with("---\narchs:"))
return FileType::TBD_V1;
return createStringError(std::errc::not_supported, "unsupported file type");
}
-} // namespace
Expected<std::unique_ptr<InterfaceFile>>
TextAPIReader::get(MemoryBufferRef InputBuffer) {
TextAPIContext Ctx;
Ctx.Path = std::string(InputBuffer.getBufferIdentifier());
- if (auto FTOrErr = canReadFileType(InputBuffer))
+ if (auto FTOrErr = canRead(InputBuffer))
Ctx.FileKind = *FTOrErr;
else
return FTOrErr.takeError();
@@ -1119,6 +1109,8 @@ TextAPIReader::get(MemoryBufferRef InputBuffer) {
auto FileOrErr = getInterfaceFileFromJSON(InputBuffer.getBuffer());
if (!FileOrErr)
return FileOrErr.takeError();
+
+ (*FileOrErr)->setPath(Ctx.Path);
return std::move(*FileOrErr);
}
yaml::Input YAMLIn(InputBuffer.getBuffer(), &Ctx, DiagHandler, &Ctx);
@@ -1143,14 +1135,18 @@ TextAPIReader::get(MemoryBufferRef InputBuffer) {
}
Error TextAPIWriter::writeToStream(raw_ostream &OS, const InterfaceFile &File,
- bool Compact) {
+ const FileType FileKind, bool Compact) {
TextAPIContext Ctx;
Ctx.Path = std::string(File.getPath());
- Ctx.FileKind = File.getFileType();
+
+ // Prefer parameter for format if passed, otherwise fallback to the File
+ // FileType.
+ Ctx.FileKind =
+ (FileKind == FileType::Invalid) ? File.getFileType() : FileKind;
// Write out in JSON format.
if (Ctx.FileKind >= FileType::TBD_V5) {
- return serializeInterfaceFileToJSON(OS, File, Compact);
+ return serializeInterfaceFileToJSON(OS, File, Ctx.FileKind, Compact);
}
llvm::yaml::Output YAMLOut(OS, &Ctx, /*WrapColumn=*/80);
@@ -1158,7 +1154,7 @@ Error TextAPIWriter::writeToStream(raw_ostream &OS, const InterfaceFile &File,
std::vector<const InterfaceFile *> Files;
Files.emplace_back(&File);
- for (auto Document : File.documents())
+ for (const auto &Document : File.documents())
Files.emplace_back(Document.get());
// Stream out yaml.
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/TextStubCommon.h b/contrib/llvm-project/llvm/lib/TextAPI/TextStubCommon.h
index d4dcd3af447a..360910c48d4f 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/TextStubCommon.h
+++ b/contrib/llvm-project/llvm/lib/TextAPI/TextStubCommon.h
@@ -28,7 +28,9 @@ enum TBDFlags : unsigned {
FlatNamespace = 1U << 0,
NotApplicationExtensionSafe = 1U << 1,
InstallAPI = 1U << 2,
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/InstallAPI),
+ SimulatorSupport = 1U << 3,
+ OSLibNotForSharedCache = 1U << 4,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OSLibNotForSharedCache),
};
// clang-format on
@@ -47,7 +49,7 @@ Expected<std::unique_ptr<InterfaceFile>>
getInterfaceFileFromJSON(StringRef JSON);
Error serializeInterfaceFileToJSON(raw_ostream &OS, const InterfaceFile &File,
- bool Compact);
+ const FileType FileKind, bool Compact);
} // namespace MachO
namespace yaml {
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp b/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp
index 5b3d69b8d94a..2f82bc03480b 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp
@@ -27,7 +27,7 @@ All library level keys, accept target values and are defaulted if not specified.
"target_info": [ # Required: target information
{
"target": "x86_64-macos",
- "min_deployment": "10.14" # Required: minimum OS deployment version
+ "min_deployment": "10.14" # Optional: minOS defaults to 0
},
{
"target": "arm64-macos",
@@ -283,17 +283,16 @@ Expected<TargetList> getTargetsSection(const Object *Section) {
getRequiredValue<StringRef>(TBDKey::Target, Obj, &Object::getString);
if (!TargetStr)
return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Target));
- auto VersionStr = getRequiredValue<StringRef>(TBDKey::Deployment, Obj,
- &Object::getString);
- if (!VersionStr)
- return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Deployment));
- VersionTuple Version;
- if (Version.tryParse(*VersionStr))
- return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Deployment));
auto TargetOrErr = Target::create(*TargetStr);
if (!TargetOrErr)
return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Target));
+
+ auto VersionStr = Obj->getString(Keys[TBDKey::Deployment]);
+ VersionTuple Version;
+ if (VersionStr && Version.tryParse(*VersionStr))
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Deployment));
TargetOrErr->MinDeployment = Version;
+
// Convert to LLVM::Triple to accurately compute minOS + platform + arch
// pairing.
IFTargets.push_back(
@@ -548,11 +547,11 @@ Expected<PackedVersion> getPackedVersion(const Object *File, TBDKey Key) {
Expected<TBDFlags> getFlags(const Object *File) {
TBDFlags Flags = TBDFlags::None;
const Array *Section = File->getArray(Keys[TBDKey::Flags]);
- if (!Section)
+ if (!Section || Section->empty())
return Flags;
for (auto &Val : *Section) {
- // TODO: Just take first for now.
+ // FIXME: Flags currently apply to all target triples.
const auto *Obj = Val.getAsObject();
if (!Obj)
return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Flags));
@@ -564,6 +563,9 @@ Expected<TBDFlags> getFlags(const Object *File) {
.Case("flat_namespace", TBDFlags::FlatNamespace)
.Case("not_app_extension_safe",
TBDFlags::NotApplicationExtensionSafe)
+ .Case("sim_support", TBDFlags::SimulatorSupport)
+ .Case("not_for_dyld_shared_cache",
+ TBDFlags::OSLibNotForSharedCache)
.Default(TBDFlags::None);
Flags |= TBDFlag;
});
@@ -654,6 +656,8 @@ Expected<IFPtr> parseToInterfaceFile(const Object *File) {
F->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace));
F->setApplicationExtensionSafe(
!(Flags & TBDFlags::NotApplicationExtensionSafe));
+ F->setSimulatorSupport((Flags & TBDFlags::SimulatorSupport));
+ F->setOSLibNotForSharedCache((Flags & TBDFlags::OSLibNotForSharedCache));
for (auto &T : Targets)
F->addTarget(T);
for (auto &[Lib, Targets] : Clients)
@@ -753,9 +757,9 @@ std::vector<std::string> serializeTargets(const AggregateT Targets,
if (Targets.size() == ActiveTargets.size())
return TargetsStr;
- llvm::for_each(Targets, [&TargetsStr](const MachO::Target &Target) {
+ for (const MachO::Target &Target : Targets)
TargetsStr.emplace_back(getFormattedStr(Target));
- });
+
return TargetsStr;
}
@@ -763,7 +767,8 @@ Array serializeTargetInfo(const TargetList &ActiveTargets) {
Array Targets;
for (const auto Targ : ActiveTargets) {
Object TargetInfo;
- TargetInfo[Keys[TBDKey::Deployment]] = Targ.MinDeployment.getAsString();
+ if (!Targ.MinDeployment.empty())
+ TargetInfo[Keys[TBDKey::Deployment]] = Targ.MinDeployment.getAsString();
TargetInfo[Keys[TBDKey::Target]] = getFormattedStr(Targ);
Targets.emplace_back(std::move(TargetInfo));
}
@@ -920,6 +925,10 @@ Array serializeFlags(const InterfaceFile *File) {
Flags.emplace_back("flat_namespace");
if (!File->isApplicationExtensionSafe())
Flags.emplace_back("not_app_extension_safe");
+ if (File->hasSimulatorSupport())
+ Flags.emplace_back("sim_support");
+ if (File->isOSLibNotForSharedCache())
+ Flags.emplace_back("not_for_dyld_shared_cache");
return serializeScalar(TBDKey::Attributes, std::move(Flags));
}
@@ -983,9 +992,8 @@ Expected<Object> serializeIF(const InterfaceFile *File) {
return std::move(Library);
}
-Expected<Object> getJSON(const InterfaceFile *File) {
- assert(File->getFileType() == FileType::TBD_V5 &&
- "unexpected json file format version");
+Expected<Object> getJSON(const InterfaceFile *File, const FileType FileKind) {
+ assert(FileKind == FileType::TBD_V5 && "unexpected json file format version");
Object Root;
auto MainLibOrErr = serializeIF(File);
@@ -1009,8 +1017,9 @@ Expected<Object> getJSON(const InterfaceFile *File) {
Error MachO::serializeInterfaceFileToJSON(raw_ostream &OS,
const InterfaceFile &File,
+ const FileType FileKind,
bool Compact) {
- auto TextFile = getJSON(&File);
+ auto TextFile = getJSON(&File, FileKind);
if (!TextFile)
return TextFile.takeError();
if (Compact)
diff --git a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index 39bb8dd8ec85..781bc9a058e1 100644
--- a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -17,6 +17,7 @@
#include "llvm/Object/COFFModuleDefinition.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
+#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/Path.h"
#include "llvm/TargetParser/Host.h"
@@ -32,7 +33,7 @@ namespace {
enum {
OPT_INVALID = 0,
-#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID,
+#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
#include "Options.inc"
#undef OPTION
};
@@ -44,10 +45,9 @@ enum {
#include "Options.inc"
#undef PREFIX
+using namespace llvm::opt;
static constexpr opt::OptTable::Info InfoTable[] = {
-#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
- {X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \
- X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12},
+#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
#include "Options.inc"
#undef OPTION
};
diff --git a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 747e4c5928ea..1c140edb07ac 100644
--- a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -23,6 +23,7 @@
#include "llvm/Object/WindowsMachineFlag.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
+#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Path.h"
@@ -38,7 +39,7 @@ namespace {
enum {
OPT_INVALID = 0,
-#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID,
+#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
#include "Options.inc"
#undef OPTION
};
@@ -50,10 +51,9 @@ enum {
#include "Options.inc"
#undef PREFIX
+using namespace llvm::opt;
static constexpr opt::OptTable::Info InfoTable[] = {
-#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
- {X1, X2, X10, X11, OPT_##ID, opt::Option::KIND##Class, \
- X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12},
+#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
#include "Options.inc"
#undef OPTION
};
@@ -482,12 +482,10 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
std::reverse(Members.begin(), Members.end());
bool Thin = Args.hasArg(OPT_llvmlibthin);
- if (Error E =
- writeArchive(OutputPath, Members,
- /*WriteSymtab=*/true,
- Thin ? object::Archive::K_GNU : object::Archive::K_COFF,
- /*Deterministic*/ true, Thin, nullptr,
- COFF::isArm64EC(LibMachine))) {
+ if (Error E = writeArchive(
+ OutputPath, Members, SymtabWritingMode::NormalSymtab,
+ Thin ? object::Archive::K_GNU : object::Archive::K_COFF,
+ /*Deterministic=*/true, Thin, nullptr, COFF::isArm64EC(LibMachine))) {
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
llvm::errs() << OutputPath << ": " << EI.message() << "\n";
});
diff --git a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 503ce019dc84..d09ac1c099c1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -371,7 +371,7 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
InstructionCost SatCost = TTI.getIntrinsicInstrCost(
IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
TTI::TCK_RecipThroughput);
- SatCost += TTI.getCastInstrCost(Instruction::SExt, SatTy, IntTy,
+ SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,
TTI::CastContextHint::None,
TTI::TCK_RecipThroughput);
@@ -493,7 +493,8 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
// %shr = lshr i32 %mul, 27
// %idxprom = zext i32 %shr to i64
// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
-// i64 %idxprom %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
+// i64 %idxprom
+// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
//
// CASE 2:
// %sub = sub i32 0, %x
@@ -501,8 +502,9 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
// %mul = mul i32 %and, 72416175
// %shr = lshr i32 %mul, 26
// %idxprom = zext i32 %shr to i64
-// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table, i64
-// 0, i64 %idxprom %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
+// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,
+// i64 0, i64 %idxprom
+// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
//
// CASE 3:
// %sub = sub i32 0, %x
@@ -510,16 +512,18 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
// %mul = mul i32 %and, 81224991
// %shr = lshr i32 %mul, 27
// %idxprom = zext i32 %shr to i64
-// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table, i64
-// 0, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
+// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,
+// i64 0, i64 %idxprom
+// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
//
// CASE 4:
// %sub = sub i64 0, %x
// %and = and i64 %sub, %x
// %mul = mul i64 %and, 283881067100198605
// %shr = lshr i64 %mul, 58
-// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0, i64
-// %shr %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
+// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,
+// i64 %shr
+// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
//
// All this can be lowered to @llvm.cttz.i32/64 intrinsic.
static bool tryToRecognizeTableBasedCttz(Instruction &I) {
@@ -702,7 +706,10 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
make_range(Start->getIterator(), End->getIterator())) {
if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
return false;
- if (++NumScanned > MaxInstrsToScan)
+
+ // Ignore debug info so that's not counted against MaxInstrsToScan.
+ // Otherwise debug info could affect codegen.
+ if (!isa<DbgInfoIntrinsic>(Inst) && ++NumScanned > MaxInstrsToScan)
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index 6c62e84077ac..4d9050be5c55 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -366,7 +366,7 @@ static Type *getReducedType(Value *V, Type *Ty) {
Value *TruncInstCombine::getReducedOperand(Value *V, Type *SclTy) {
Type *Ty = getReducedType(V, SclTy);
if (auto *C = dyn_cast<Constant>(V)) {
- C = ConstantExpr::getIntegerCast(C, Ty, false);
+ C = ConstantExpr::getTrunc(C, Ty);
// If we got a constantexpr back, try to simplify it with DL info.
return ConstantFoldConstant(C, DL, &TLI);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/CFGuard/CFGuard.cpp b/contrib/llvm-project/llvm/lib/Transforms/CFGuard/CFGuard.cpp
index bf823ac55497..4d4306576017 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/CFGuard/CFGuard.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/CFGuard/CFGuard.cpp
@@ -34,25 +34,22 @@ namespace {
/// Adds Control Flow Guard (CFG) checks on indirect function calls/invokes.
/// These checks ensure that the target address corresponds to the start of an
-/// address-taken function. X86_64 targets use the CF_Dispatch mechanism. X86,
-/// ARM, and AArch64 targets use the CF_Check machanism.
-class CFGuard : public FunctionPass {
+/// address-taken function. X86_64 targets use the Mechanism::Dispatch
+/// mechanism. X86, ARM, and AArch64 targets use the Mechanism::Check machanism.
+class CFGuardImpl {
public:
- static char ID;
-
- enum Mechanism { CF_Check, CF_Dispatch };
-
- // Default constructor required for the INITIALIZE_PASS macro.
- CFGuard() : FunctionPass(ID) {
- initializeCFGuardPass(*PassRegistry::getPassRegistry());
- // By default, use the guard check mechanism.
- GuardMechanism = CF_Check;
- }
-
- // Recommended constructor used to specify the type of guard mechanism.
- CFGuard(Mechanism Var) : FunctionPass(ID) {
- initializeCFGuardPass(*PassRegistry::getPassRegistry());
- GuardMechanism = Var;
+ using Mechanism = CFGuardPass::Mechanism;
+
+ CFGuardImpl(Mechanism M) : GuardMechanism(M) {
+ // Get or insert the guard check or dispatch global symbols.
+ switch (GuardMechanism) {
+ case Mechanism::Check:
+ GuardFnName = "__guard_check_icall_fptr";
+ break;
+ case Mechanism::Dispatch:
+ GuardFnName = "__guard_dispatch_icall_fptr";
+ break;
+ }
}
/// Inserts a Control Flow Guard (CFG) check on an indirect call using the CFG
@@ -141,21 +138,37 @@ public:
/// \param CB indirect call to instrument.
void insertCFGuardDispatch(CallBase *CB);
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
private:
// Only add checks if the module has the cfguard=2 flag.
int cfguard_module_flag = 0;
- Mechanism GuardMechanism = CF_Check;
+ StringRef GuardFnName;
+ Mechanism GuardMechanism = Mechanism::Check;
FunctionType *GuardFnType = nullptr;
PointerType *GuardFnPtrType = nullptr;
Constant *GuardFnGlobal = nullptr;
};
+class CFGuard : public FunctionPass {
+ CFGuardImpl Impl;
+
+public:
+ static char ID;
+
+ // Default constructor required for the INITIALIZE_PASS macro.
+ CFGuard(CFGuardImpl::Mechanism M) : FunctionPass(ID), Impl(M) {
+ initializeCFGuardPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override { return Impl.doInitialization(M); }
+ bool runOnFunction(Function &F) override { return Impl.runOnFunction(F); }
+};
+
} // end anonymous namespace
-void CFGuard::insertCFGuardCheck(CallBase *CB) {
+void CFGuardImpl::insertCFGuardCheck(CallBase *CB) {
assert(Triple(CB->getModule()->getTargetTriple()).isOSWindows() &&
"Only applicable for Windows targets");
@@ -177,15 +190,14 @@ void CFGuard::insertCFGuardCheck(CallBase *CB) {
// Create new call instruction. The CFGuard check should always be a call,
// even if the original CallBase is an Invoke or CallBr instruction.
CallInst *GuardCheck =
- B.CreateCall(GuardFnType, GuardCheckLoad,
- {B.CreateBitCast(CalledOperand, B.getInt8PtrTy())}, Bundles);
+ B.CreateCall(GuardFnType, GuardCheckLoad, {CalledOperand}, Bundles);
// Ensure that the first argument is passed in the correct register
// (e.g. ECX on 32-bit X86 targets).
GuardCheck->setCallingConv(CallingConv::CFGuard_Check);
}
-void CFGuard::insertCFGuardDispatch(CallBase *CB) {
+void CFGuardImpl::insertCFGuardDispatch(CallBase *CB) {
assert(Triple(CB->getModule()->getTargetTriple()).isOSWindows() &&
"Only applicable for Windows targets");
@@ -196,11 +208,6 @@ void CFGuard::insertCFGuardDispatch(CallBase *CB) {
Value *CalledOperand = CB->getCalledOperand();
Type *CalledOperandType = CalledOperand->getType();
- // Cast the guard dispatch global to the type of the called operand.
- PointerType *PTy = PointerType::get(CalledOperandType, 0);
- if (GuardFnGlobal->getType() != PTy)
- GuardFnGlobal = ConstantExpr::getBitCast(GuardFnGlobal, PTy);
-
// Load the global as a pointer to a function of the same type.
LoadInst *GuardDispatchLoad = B.CreateLoad(CalledOperandType, GuardFnGlobal);
@@ -224,7 +231,7 @@ void CFGuard::insertCFGuardDispatch(CallBase *CB) {
CB->eraseFromParent();
}
-bool CFGuard::doInitialization(Module &M) {
+bool CFGuardImpl::doInitialization(Module &M) {
// Check if this module has the cfguard flag and read its value.
if (auto *MD =
@@ -236,19 +243,11 @@ bool CFGuard::doInitialization(Module &M) {
return false;
// Set up prototypes for the guard check and dispatch functions.
- GuardFnType = FunctionType::get(Type::getVoidTy(M.getContext()),
- {Type::getInt8PtrTy(M.getContext())}, false);
+ GuardFnType =
+ FunctionType::get(Type::getVoidTy(M.getContext()),
+ {PointerType::getUnqual(M.getContext())}, false);
GuardFnPtrType = PointerType::get(GuardFnType, 0);
- // Get or insert the guard check or dispatch global symbols.
- llvm::StringRef GuardFnName;
- if (GuardMechanism == CF_Check) {
- GuardFnName = "__guard_check_icall_fptr";
- } else if (GuardMechanism == CF_Dispatch) {
- GuardFnName = "__guard_dispatch_icall_fptr";
- } else {
- assert(false && "Invalid CFGuard mechanism");
- }
GuardFnGlobal = M.getOrInsertGlobal(GuardFnName, GuardFnPtrType, [&] {
auto *Var = new GlobalVariable(M, GuardFnPtrType, false,
GlobalVariable::ExternalLinkage, nullptr,
@@ -260,7 +259,7 @@ bool CFGuard::doInitialization(Module &M) {
return true;
}
-bool CFGuard::runOnFunction(Function &F) {
+bool CFGuardImpl::runOnFunction(Function &F) {
// Skip modules for which CFGuard checks have been disabled.
if (cfguard_module_flag != 2)
@@ -288,7 +287,7 @@ bool CFGuard::runOnFunction(Function &F) {
}
// For each indirect call/invoke, add the appropriate dispatch or check.
- if (GuardMechanism == CF_Dispatch) {
+ if (GuardMechanism == Mechanism::Dispatch) {
for (CallBase *CB : IndirectCalls) {
insertCFGuardDispatch(CB);
}
@@ -301,13 +300,20 @@ bool CFGuard::runOnFunction(Function &F) {
return true;
}
+PreservedAnalyses CFGuardPass::run(Function &F, FunctionAnalysisManager &FAM) {
+ CFGuardImpl Impl(GuardMechanism);
+ bool Changed = Impl.doInitialization(*F.getParent());
+ Changed |= Impl.runOnFunction(F);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
char CFGuard::ID = 0;
INITIALIZE_PASS(CFGuard, "CFGuard", "CFGuard", false, false)
FunctionPass *llvm::createCFGuardCheckPass() {
- return new CFGuard(CFGuard::CF_Check);
+ return new CFGuard(CFGuardPass::Mechanism::Check);
}
FunctionPass *llvm::createCFGuardDispatchPass() {
- return new CFGuard(CFGuard::CF_Dispatch);
+ return new CFGuard(CFGuardPass::Mechanism::Dispatch);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index 29978bef661c..3e3825fcd50e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -29,15 +29,13 @@ struct Lowerer : coro::LowererBase {
static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
Builder.SetInsertPoint(SubFn);
- Value *FrameRaw = SubFn->getFrame();
+ Value *FramePtr = SubFn->getFrame();
int Index = SubFn->getIndex();
- auto *FrameTy = StructType::get(
- SubFn->getContext(), {Builder.getInt8PtrTy(), Builder.getInt8PtrTy()});
- PointerType *FramePtrTy = FrameTy->getPointerTo();
+ auto *FrameTy = StructType::get(SubFn->getContext(),
+ {Builder.getPtrTy(), Builder.getPtrTy()});
Builder.SetInsertPoint(SubFn);
- auto *FramePtr = Builder.CreateBitCast(FrameRaw, FramePtrTy);
auto *Gep = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index);
auto *Load = Builder.CreateLoad(FrameTy->getElementType(Index), Gep);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index d0606c15f3d5..2f4083028ae0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -165,7 +165,7 @@ void Lowerer::elideHeapAllocations(Function *F, uint64_t FrameSize,
auto *Frame = new AllocaInst(FrameTy, DL.getAllocaAddrSpace(), "", InsertPt);
Frame->setAlignment(FrameAlign);
auto *FrameVoidPtr =
- new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt);
+ new BitCastInst(Frame, PointerType::getUnqual(C), "vFrame", InsertPt);
for (auto *CB : CoroBegins) {
CB->replaceAllUsesWith(FrameVoidPtr);
@@ -227,7 +227,7 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
PotentiallyEscaped |= EscapingBBs.count(BB);
if (TIs.count(BB)) {
- if (!BB->getTerminator()->isExceptionalTerminator() || PotentiallyEscaped)
+ if (isa<ReturnInst>(BB->getTerminator()) || PotentiallyEscaped)
return true;
// If the function ends with the exceptional terminator, the memory used
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 1f373270f951..f37b4dc938d3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -63,7 +63,7 @@ public:
llvm::sort(V);
}
- size_t blockToIndex(BasicBlock *BB) const {
+ size_t blockToIndex(BasicBlock const *BB) const {
auto *I = llvm::lower_bound(V, BB);
assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
return I - V.begin();
@@ -112,10 +112,11 @@ class SuspendCrossingInfo {
}
/// Compute the BlockData for the current function in one iteration.
- /// Returns whether the BlockData changes in this iteration.
/// Initialize - Whether this is the first iteration, we can optimize
/// the initial case a little bit by manual loop switch.
- template <bool Initialize = false> bool computeBlockData();
+ /// Returns whether the BlockData changes in this iteration.
+ template <bool Initialize = false>
+ bool computeBlockData(const ReversePostOrderTraversal<Function *> &RPOT);
public:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -223,12 +224,14 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
}
#endif
-template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
- const size_t N = Mapping.size();
+template <bool Initialize>
+bool SuspendCrossingInfo::computeBlockData(
+ const ReversePostOrderTraversal<Function *> &RPOT) {
bool Changed = false;
- for (size_t I = 0; I < N; ++I) {
- auto &B = Block[I];
+ for (const BasicBlock *BB : RPOT) {
+ auto BBNo = Mapping.blockToIndex(BB);
+ auto &B = Block[BBNo];
// We don't need to count the predecessors when initialization.
if constexpr (!Initialize)
@@ -261,7 +264,7 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
}
if (B.Suspend) {
- // If block S is a suspend block, it should kill all of the blocks it
+ // If block B is a suspend block, it should kill all of the blocks it
// consumes.
B.Kills |= B.Consumes;
} else if (B.End) {
@@ -273,8 +276,8 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
} else {
// This is reached when B block it not Suspend nor coro.end and it
// need to make sure that it is not in the kill set.
- B.KillLoop |= B.Kills[I];
- B.Kills.reset(I);
+ B.KillLoop |= B.Kills[BBNo];
+ B.Kills.reset(BBNo);
}
if constexpr (!Initialize) {
@@ -283,9 +286,6 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
}
}
- if constexpr (Initialize)
- return true;
-
return Changed;
}
@@ -325,9 +325,11 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
markSuspendBlock(Save);
}
- computeBlockData</*Initialize=*/true>();
-
- while (computeBlockData())
+ // It is considered to be faster to use RPO traversal for forward-edges
+ // dataflow analysis.
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ computeBlockData</*Initialize=*/true>(RPOT);
+ while (computeBlockData</*Initialize*/ false>(RPOT))
;
LLVM_DEBUG(dump());
@@ -961,12 +963,18 @@ static void cacheDIVar(FrameDataInfo &FrameData,
if (DIVarCache.contains(V))
continue;
- auto DDIs = FindDbgDeclareUses(V);
- auto *I = llvm::find_if(DDIs, [](DbgDeclareInst *DDI) {
- return DDI->getExpression()->getNumElements() == 0;
- });
- if (I != DDIs.end())
- DIVarCache.insert({V, (*I)->getVariable()});
+ SmallVector<DbgDeclareInst *, 1> DDIs;
+ SmallVector<DPValue *, 1> DPVs;
+ findDbgDeclares(DDIs, V, &DPVs);
+ auto CacheIt = [&DIVarCache, V](auto &Container) {
+ auto *I = llvm::find_if(Container, [](auto *DDI) {
+ return DDI->getExpression()->getNumElements() == 0;
+ });
+ if (I != Container.end())
+ DIVarCache.insert({V, (*I)->getVariable()});
+ };
+ CacheIt(DDIs);
+ CacheIt(DPVs);
}
}
@@ -1073,7 +1081,7 @@ static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
RetType = CharSizeType;
else {
if (Size % 8 != 0)
- Size = TypeSize::Fixed(Size + 8 - (Size % 8));
+ Size = TypeSize::getFixed(Size + 8 - (Size % 8));
RetType = Builder.createArrayType(
Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType,
@@ -1117,15 +1125,26 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
assert(PromiseAlloca &&
"Coroutine with switch ABI should own Promise alloca");
- TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(PromiseAlloca);
- if (DIs.empty())
+ SmallVector<DbgDeclareInst *, 1> DIs;
+ SmallVector<DPValue *, 1> DPVs;
+ findDbgDeclares(DIs, PromiseAlloca, &DPVs);
+
+ DILocalVariable *PromiseDIVariable = nullptr;
+ DILocation *DILoc = nullptr;
+ if (!DIs.empty()) {
+ DbgDeclareInst *PromiseDDI = DIs.front();
+ PromiseDIVariable = PromiseDDI->getVariable();
+ DILoc = PromiseDDI->getDebugLoc().get();
+ } else if (!DPVs.empty()) {
+ DPValue *PromiseDPV = DPVs.front();
+ PromiseDIVariable = PromiseDPV->getVariable();
+ DILoc = PromiseDPV->getDebugLoc().get();
+ } else {
return;
+ }
- DbgDeclareInst *PromiseDDI = DIs.front();
- DILocalVariable *PromiseDIVariable = PromiseDDI->getVariable();
DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();
DIFile *DFile = PromiseDIScope->getFile();
- DILocation *DILoc = PromiseDDI->getDebugLoc().get();
unsigned LineNum = PromiseDIVariable->getLine();
DICompositeType *FrameDITy = DBuilder.createStructType(
@@ -1239,7 +1258,7 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
auto *FrameDIVar = DBuilder.createAutoVariable(PromiseDIScope, "__coro_frame",
DFile, LineNum, FrameDITy,
true, DINode::FlagArtificial);
- assert(FrameDIVar->isValidLocationForIntrinsic(PromiseDDI->getDebugLoc()));
+ assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
// Subprogram would have ContainedNodes field which records the debug
// variables it contained. So we need to add __coro_frame to the
@@ -1257,9 +1276,17 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
}
- DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
- DBuilder.createExpression(), DILoc,
- Shape.getInsertPtAfterFramePtr());
+ if (UseNewDbgInfoFormat) {
+ DPValue *NewDPV = new DPValue(ValueAsMetadata::get(Shape.FramePtr),
+ FrameDIVar, DBuilder.createExpression(),
+ DILoc, DPValue::LocationType::Declare);
+ BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
+ It->getParent()->insertDPValueBefore(NewDPV, It);
+ } else {
+ DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
+ DBuilder.createExpression(), DILoc,
+ &*Shape.getInsertPtAfterFramePtr());
+ }
}
// Build a struct that will keep state for an active coroutine.
@@ -1290,10 +1317,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
std::optional<FieldIDType> SwitchIndexFieldId;
if (Shape.ABI == coro::ABI::Switch) {
- auto *FramePtrTy = FrameTy->getPointerTo();
- auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
- /*IsVarArg=*/false);
- auto *FnPtrTy = FnTy->getPointerTo();
+ auto *FnPtrTy = PointerType::getUnqual(C);
// Add header fields for the resume and destroy functions.
// We can rely on these being perfectly packed.
@@ -1680,15 +1704,6 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
return CleanupRet;
}
-static void createFramePtr(coro::Shape &Shape) {
- auto *CB = Shape.CoroBegin;
- IRBuilder<> Builder(CB->getNextNode());
- StructType *FrameTy = Shape.FrameTy;
- PointerType *FramePtrTy = FrameTy->getPointerTo();
- Shape.FramePtr =
- cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
-}
-
// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
@@ -1700,7 +1715,6 @@ static void createFramePtr(coro::Shape &Shape) {
// becomes:
//
// %hdl = coro.begin(...)
-// %FramePtr = bitcast i8* hdl to %f.frame*
// br label %AllocaSpillBB
//
// AllocaSpillBB:
@@ -1764,8 +1778,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
// Note: If we change the strategy dealing with alignment, we need to refine
// this casting.
if (GEP->getType() != Orig->getType())
- return Builder.CreateBitCast(GEP, Orig->getType(),
- Orig->getName() + Twine(".cast"));
+ return Builder.CreateAddrSpaceCast(GEP, Orig->getType(),
+ Orig->getName() + Twine(".cast"));
}
return GEP;
};
@@ -1775,12 +1789,11 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
auto SpillAlignment = Align(FrameData.getAlign(Def));
// Create a store instruction storing the value into the
// coroutine frame.
- Instruction *InsertPt = nullptr;
+ BasicBlock::iterator InsertPt;
Type *ByValTy = nullptr;
if (auto *Arg = dyn_cast<Argument>(Def)) {
// For arguments, we will place the store instruction right after
- // the coroutine frame pointer instruction, i.e. bitcast of
- // coro.begin from i8* to %f.frame*.
+ // the coroutine frame pointer instruction, i.e. coro.begin.
InsertPt = Shape.getInsertPtAfterFramePtr();
// If we're spilling an Argument, make sure we clear 'nocapture'
@@ -1792,7 +1805,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
} else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
// Don't spill immediately after a suspend; splitting assumes
// that the suspend will be followed by a branch.
- InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
+ InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
} else {
auto *I = cast<Instruction>(Def);
if (!DT.dominates(CB, I)) {
@@ -1803,24 +1816,24 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
// If we are spilling the result of the invoke instruction, split
// the normal edge and insert the spill in the new block.
auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
- InsertPt = NewBB->getTerminator();
+ InsertPt = NewBB->getTerminator()->getIterator();
} else if (isa<PHINode>(I)) {
// Skip the PHINodes and EH pads instructions.
BasicBlock *DefBlock = I->getParent();
if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
- InsertPt = splitBeforeCatchSwitch(CSI);
+ InsertPt = splitBeforeCatchSwitch(CSI)->getIterator();
else
- InsertPt = &*DefBlock->getFirstInsertionPt();
+ InsertPt = DefBlock->getFirstInsertionPt();
} else {
assert(!I->isTerminator() && "unexpected terminator");
// For all other values, the spill is placed immediately after
// the definition.
- InsertPt = I->getNextNode();
+ InsertPt = I->getNextNode()->getIterator();
}
}
auto Index = FrameData.getFieldIndex(Def);
- Builder.SetInsertPoint(InsertPt);
+ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
auto *G = Builder.CreateConstInBoundsGEP2_32(
FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
if (ByValTy) {
@@ -1840,7 +1853,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
// reference provided with the frame GEP.
if (CurrentBlock != U->getParent()) {
CurrentBlock = U->getParent();
- Builder.SetInsertPoint(&*CurrentBlock->getFirstInsertionPt());
+ Builder.SetInsertPoint(CurrentBlock,
+ CurrentBlock->getFirstInsertionPt());
auto *GEP = GetFramePointer(E.first);
GEP->setName(E.first->getName() + Twine(".reload.addr"));
@@ -1851,7 +1865,9 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
SpillAlignment, E.first->getName() + Twine(".reload"));
- TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
+ SmallVector<DbgDeclareInst *, 1> DIs;
+ SmallVector<DPValue *, 1> DPVs;
+ findDbgDeclares(DIs, Def, &DPVs);
// Try best to find dbg.declare. If the spill is a temp, there may not
// be a direct dbg.declare. Walk up the load chain to find one from an
// alias.
@@ -1863,23 +1879,39 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
if (LdInst->getPointerOperandType() != LdInst->getType())
break;
CurDef = LdInst->getPointerOperand();
- DIs = FindDbgDeclareUses(CurDef);
+ if (!isa<AllocaInst, LoadInst>(CurDef))
+ break;
+ DIs.clear();
+ DPVs.clear();
+ findDbgDeclares(DIs, CurDef, &DPVs);
}
}
- for (DbgDeclareInst *DDI : DIs) {
+ auto SalvageOne = [&](auto *DDI) {
bool AllowUnresolved = false;
// This dbg.declare is preserved for all coro-split function
// fragments. It will be unreachable in the main function, and
// processed by coro::salvageDebugInfo() by CoroCloner.
- DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
- .insertDeclare(CurrentReload, DDI->getVariable(),
- DDI->getExpression(), DDI->getDebugLoc(),
- &*Builder.GetInsertPoint());
+ if (UseNewDbgInfoFormat) {
+ DPValue *NewDPV =
+ new DPValue(ValueAsMetadata::get(CurrentReload),
+ DDI->getVariable(), DDI->getExpression(),
+ DDI->getDebugLoc(), DPValue::LocationType::Declare);
+ Builder.GetInsertPoint()->getParent()->insertDPValueBefore(
+ NewDPV, Builder.GetInsertPoint());
+ } else {
+ DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
+ .insertDeclare(CurrentReload, DDI->getVariable(),
+ DDI->getExpression(), DDI->getDebugLoc(),
+ &*Builder.GetInsertPoint());
+ }
// This dbg.declare is for the main function entry point. It
// will be deleted in all coro-split functions.
- coro::salvageDebugInfo(ArgToAllocaMap, DDI, Shape.OptimizeFrame);
- }
+ coro::salvageDebugInfo(ArgToAllocaMap, *DDI, Shape.OptimizeFrame,
+ false /*UseEntryValue*/);
+ };
+ for_each(DIs, SalvageOne);
+ for_each(DPVs, SalvageOne);
}
// If we have a single edge PHINode, remove it and replace it with a
@@ -1897,6 +1929,10 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
// Replace all uses of CurrentValue in the current instruction with
// reload.
U->replaceUsesOfWith(Def, CurrentReload);
+ // Instructions are added to Def's user list if the attached
+ // debug records use Def. Update those now.
+ for (auto &DPV : U->getDbgValueRange())
+ DPV.replaceVariableLocationOp(Def, CurrentReload, true);
}
}
@@ -1911,7 +1947,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
Shape.ABI == coro::ABI::Async) {
// If we found any allocas, replace all of their remaining uses with Geps.
- Builder.SetInsertPoint(&SpillBlock->front());
+ Builder.SetInsertPoint(SpillBlock, SpillBlock->begin());
for (const auto &P : FrameData.Allocas) {
AllocaInst *Alloca = P.Alloca;
auto *G = GetFramePointer(Alloca);
@@ -1930,7 +1966,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
// dbg.declares and dbg.values with the reload from the frame.
// Note: We cannot replace the alloca with GEP instructions indiscriminately,
// as some of the uses may not be dominated by CoroBegin.
- Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
+ Builder.SetInsertPoint(Shape.AllocaSpillBlock,
+ Shape.AllocaSpillBlock->begin());
SmallVector<Instruction *, 4> UsersToUpdate;
for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca;
@@ -1946,9 +1983,12 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
G->setName(Alloca->getName() + Twine(".reload.addr"));
SmallVector<DbgVariableIntrinsic *, 4> DIs;
- findDbgUsers(DIs, Alloca);
+ SmallVector<DPValue *> DPValues;
+ findDbgUsers(DIs, Alloca, &DPValues);
for (auto *DVI : DIs)
DVI->replaceUsesOfWith(Alloca, G);
+ for (auto *DPV : DPValues)
+ DPV->replaceVariableLocationOp(Alloca, G);
for (Instruction *I : UsersToUpdate) {
// It is meaningless to retain the lifetime intrinsics refer for the
@@ -1962,7 +2002,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
I->replaceUsesOfWith(Alloca, G);
}
}
- Builder.SetInsertPoint(Shape.getInsertPtAfterFramePtr());
+ Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca;
if (A.MayWriteBeforeCoroBegin) {
@@ -1980,16 +2020,12 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
// to the pointer in the frame.
for (const auto &Alias : A.Aliases) {
auto *FramePtr = GetFramePointer(Alloca);
- auto *FramePtrRaw =
- Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
auto &Value = *Alias.second;
auto ITy = IntegerType::get(C, Value.getBitWidth());
- auto *AliasPtr = Builder.CreateGEP(Type::getInt8Ty(C), FramePtrRaw,
+ auto *AliasPtr = Builder.CreateGEP(Type::getInt8Ty(C), FramePtr,
ConstantInt::get(ITy, Value));
- auto *AliasPtrTyped =
- Builder.CreateBitCast(AliasPtr, Alias.first->getType());
Alias.first->replaceUsesWithIf(
- AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); });
+ AliasPtr, [&](Use &U) { return DT.dominates(CB, U); });
}
}
@@ -2027,7 +2063,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
isa<BitCastInst>(Inst);
});
if (HasAccessingPromiseBeforeCB) {
- Builder.SetInsertPoint(Shape.getInsertPtAfterFramePtr());
+ Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
auto *G = GetFramePointer(PA);
auto *Value = Builder.CreateLoad(PA->getAllocatedType(), PA);
Builder.CreateStore(Value, G);
@@ -2046,8 +2082,8 @@ static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
int Index = PN->getBasicBlockIndex(InsertedBB);
Value *V = PN->getIncomingValue(Index);
PHINode *InputV = PHINode::Create(
- V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(),
- &InsertedBB->front());
+ V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName());
+ InputV->insertBefore(InsertedBB->begin());
InputV->addIncoming(V, PredBB);
PN->setIncomingValue(Index, InputV);
PN = dyn_cast<PHINode>(PN->getNextNode());
@@ -2193,7 +2229,8 @@ static void rewritePHIs(BasicBlock &BB) {
// ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
// We replace the original landing pad with a PHINode that will collect the
// results from all of them.
- ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
+ ReplPHI = PHINode::Create(LandingPad->getType(), 1, "");
+ ReplPHI->insertBefore(LandingPad->getIterator());
ReplPHI->takeName(LandingPad);
LandingPad->replaceAllUsesWith(ReplPHI);
// We will erase the original landing pad at the end of this function after
@@ -2428,15 +2465,13 @@ static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
SmallVectorImpl<Instruction*> &DeadInsts) {
for (auto *AI : LocalAllocas) {
- auto M = AI->getModule();
IRBuilder<> Builder(AI);
// Save the stack depth. Try to avoid doing this if the stackrestore
// is going to immediately precede a return or something.
Value *StackSave = nullptr;
if (localAllocaNeedsStackSave(AI))
- StackSave = Builder.CreateCall(
- Intrinsic::getDeclaration(M, Intrinsic::stacksave));
+ StackSave = Builder.CreateStackSave();
// Allocate memory.
auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
@@ -2454,9 +2489,7 @@ static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
auto FI = cast<CoroAllocaFreeInst>(U);
if (StackSave) {
Builder.SetInsertPoint(FI);
- Builder.CreateCall(
- Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
- StackSave);
+ Builder.CreateStackRestore(StackSave);
}
}
DeadInsts.push_back(cast<Instruction>(U));
@@ -2498,7 +2531,7 @@ static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
coro::Shape &Shape) {
// Make a fake function pointer as a sort of intrinsic.
auto FnTy = FunctionType::get(ValueTy, {}, false);
- auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
+ auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
auto Call = Builder.CreateCall(FnTy, Fn, {});
Shape.SwiftErrorOps.push_back(Call);
@@ -2512,9 +2545,9 @@ static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
coro::Shape &Shape) {
// Make a fake function pointer as a sort of intrinsic.
- auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
+ auto FnTy = FunctionType::get(Builder.getPtrTy(),
{V->getType()}, false);
- auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
+ auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
auto Call = Builder.CreateCall(FnTy, Fn, { V });
Shape.SwiftErrorOps.push_back(Call);
@@ -2765,17 +2798,8 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
// Sink lifetime.start markers to dominate block when they are
// only used outside the region.
if (Valid && Lifetimes.size() != 0) {
- // May be AI itself, when the type of AI is i8*
- auto *NewBitCast = [&](AllocaInst *AI) -> Value* {
- if (isa<AllocaInst>(Lifetimes[0]->getOperand(1)))
- return AI;
- auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext());
- return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "",
- DomBB->getTerminator());
- }(AI);
-
auto *NewLifetime = Lifetimes[0]->clone();
- NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast);
+ NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), AI);
NewLifetime->insertBefore(DomBB->getTerminator());
// All the outsided lifetime.start markers are no longer necessary.
@@ -2800,6 +2824,11 @@ static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
if (AI == Shape.SwitchLowering.PromiseAlloca)
return;
+ // The __coro_gro alloca should outlive the promise, make sure we
+ // keep it outside the frame.
+ if (AI->hasMetadata(LLVMContext::MD_coro_outside_frame))
+ return;
+
// The code that uses lifetime.start intrinsic does not work for functions
// with loops without exit. Disable it on ABIs we know to generate such
// code.
@@ -2816,21 +2845,16 @@ static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
Visitor.getMayWriteBeforeCoroBegin());
}
-void coro::salvageDebugInfo(
- SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
- DbgVariableIntrinsic *DVI, bool OptimizeFrame) {
- Function *F = DVI->getFunction();
+static std::optional<std::pair<Value &, DIExpression &>>
+salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
+ bool OptimizeFrame, bool UseEntryValue, Function *F,
+ Value *Storage, DIExpression *Expr,
+ bool SkipOutermostLoad) {
IRBuilder<> Builder(F->getContext());
auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
while (isa<IntrinsicInst>(InsertPt))
++InsertPt;
Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
- DIExpression *Expr = DVI->getExpression();
- // Follow the pointer arithmetic all the way to the incoming
- // function argument and convert into a DIExpression.
- bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
- Value *Storage = DVI->getVariableLocationOp(0);
- Value *OriginalStorage = Storage;
while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
@@ -2862,7 +2886,7 @@ void coro::salvageDebugInfo(
SkipOutermostLoad = false;
}
if (!Storage)
- return;
+ return std::nullopt;
auto *StorageAsArg = dyn_cast<Argument>(Storage);
const bool IsSwiftAsyncArg =
@@ -2870,7 +2894,9 @@ void coro::salvageDebugInfo(
// Swift async arguments are described by an entry value of the ABI-defined
// register containing the coroutine context.
- if (IsSwiftAsyncArg && !Expr->isEntryValue())
+ // Entry values in variadic expressions are not supported.
+ if (IsSwiftAsyncArg && UseEntryValue && !Expr->isEntryValue() &&
+ Expr->isSingleLocationExpression())
Expr = DIExpression::prepend(Expr, DIExpression::EntryValue);
// If the coroutine frame is an Argument, store it in an alloca to improve
@@ -2896,19 +2922,78 @@ void coro::salvageDebugInfo(
Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
}
- DVI->replaceVariableLocationOp(OriginalStorage, Storage);
- DVI->setExpression(Expr);
+ return {{*Storage, *Expr}};
+}
+
+void coro::salvageDebugInfo(
+ SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
+ DbgVariableIntrinsic &DVI, bool OptimizeFrame, bool UseEntryValue) {
+
+ Function *F = DVI.getFunction();
+ // Follow the pointer arithmetic all the way to the incoming
+ // function argument and convert into a DIExpression.
+ bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
+ Value *OriginalStorage = DVI.getVariableLocationOp(0);
+
+ auto SalvagedInfo = ::salvageDebugInfoImpl(
+ ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, OriginalStorage,
+ DVI.getExpression(), SkipOutermostLoad);
+ if (!SalvagedInfo)
+ return;
+
+ Value *Storage = &SalvagedInfo->first;
+ DIExpression *Expr = &SalvagedInfo->second;
+
+ DVI.replaceVariableLocationOp(OriginalStorage, Storage);
+ DVI.setExpression(Expr);
// We only hoist dbg.declare today since it doesn't make sense to hoist
// dbg.value since it does not have the same function wide guarantees that
// dbg.declare does.
if (isa<DbgDeclareInst>(DVI)) {
- Instruction *InsertPt = nullptr;
+ std::optional<BasicBlock::iterator> InsertPt;
if (auto *I = dyn_cast<Instruction>(Storage))
InsertPt = I->getInsertionPointAfterDef();
else if (isa<Argument>(Storage))
- InsertPt = &*F->getEntryBlock().begin();
+ InsertPt = F->getEntryBlock().begin();
if (InsertPt)
- DVI->moveBefore(InsertPt);
+ DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt);
+ }
+}
+
+void coro::salvageDebugInfo(
+ SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap, DPValue &DPV,
+ bool OptimizeFrame, bool UseEntryValue) {
+
+ Function *F = DPV.getFunction();
+ // Follow the pointer arithmetic all the way to the incoming
+ // function argument and convert into a DIExpression.
+ bool SkipOutermostLoad = DPV.getType() == DPValue::LocationType::Declare;
+ Value *OriginalStorage = DPV.getVariableLocationOp(0);
+
+ auto SalvagedInfo = ::salvageDebugInfoImpl(
+ ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, OriginalStorage,
+ DPV.getExpression(), SkipOutermostLoad);
+ if (!SalvagedInfo)
+ return;
+
+ Value *Storage = &SalvagedInfo->first;
+ DIExpression *Expr = &SalvagedInfo->second;
+
+ DPV.replaceVariableLocationOp(OriginalStorage, Storage);
+ DPV.setExpression(Expr);
+ // We only hoist dbg.declare today since it doesn't make sense to hoist
+ // dbg.value since it does not have the same function wide guarantees that
+ // dbg.declare does.
+ if (DPV.getType() == DPValue::LocationType::Declare) {
+ std::optional<BasicBlock::iterator> InsertPt;
+ if (auto *I = dyn_cast<Instruction>(Storage))
+ InsertPt = I->getInsertionPointAfterDef();
+ else if (isa<Argument>(Storage))
+ InsertPt = F->getEntryBlock().begin();
+ if (InsertPt) {
+ DPV.removeFromParent();
+ (*InsertPt)->getParent()->insertDPValueBefore(&DPV, *InsertPt);
+ }
}
}
@@ -3099,10 +3184,15 @@ void coro::buildCoroutineFrame(
for (auto &Iter : FrameData.Spills) {
auto *V = Iter.first;
SmallVector<DbgValueInst *, 16> DVIs;
- findDbgValues(DVIs, V);
+ SmallVector<DPValue *, 16> DPVs;
+ findDbgValues(DVIs, V, &DPVs);
for (DbgValueInst *DVI : DVIs)
if (Checker.isDefinitionAcrossSuspend(*V, DVI))
FrameData.Spills[V].push_back(DVI);
+ // Add the instructions which carry debug info that is in the frame.
+ for (DPValue *DPV : DPVs)
+ if (Checker.isDefinitionAcrossSuspend(*V, DPV->Marker->MarkedInstr))
+ FrameData.Spills[V].push_back(DPV->Marker->MarkedInstr);
}
LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
@@ -3110,7 +3200,7 @@ void coro::buildCoroutineFrame(
Shape.ABI == coro::ABI::Async)
sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);
Shape.FrameTy = buildFrameType(F, Shape, FrameData);
- createFramePtr(Shape);
+ Shape.FramePtr = Shape.CoroBegin;
// For now, this works for C++ programs only.
buildFrameDebugInfo(F, Shape, FrameData);
insertSpills(FrameData, Shape);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
index 014938c15a0a..f01aa58eb899 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -123,8 +123,8 @@ public:
void clearPromise() {
Value *Arg = getArgOperand(PromiseArg);
- setArgOperand(PromiseArg,
- ConstantPointerNull::get(Type::getInt8PtrTy(getContext())));
+ setArgOperand(PromiseArg, ConstantPointerNull::get(
+ PointerType::getUnqual(getContext())));
if (isa<AllocaInst>(Arg))
return;
assert((isa<BitCastInst>(Arg) || isa<GetElementPtrInst>(Arg)) &&
@@ -185,9 +185,7 @@ public:
void setCoroutineSelf() {
assert(isa<ConstantPointerNull>(getArgOperand(CoroutineArg)) &&
"Coroutine argument is already assigned");
- auto *const Int8PtrTy = Type::getInt8PtrTy(getContext());
- setArgOperand(CoroutineArg,
- ConstantExpr::getBitCast(getFunction(), Int8PtrTy));
+ setArgOperand(CoroutineArg, getFunction());
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
@@ -611,8 +609,37 @@ public:
}
};
+/// This represents the llvm.end.results instruction.
+class LLVM_LIBRARY_VISIBILITY CoroEndResults : public IntrinsicInst {
+public:
+ op_iterator retval_begin() { return arg_begin(); }
+ const_op_iterator retval_begin() const { return arg_begin(); }
+
+ op_iterator retval_end() { return arg_end(); }
+ const_op_iterator retval_end() const { return arg_end(); }
+
+ iterator_range<op_iterator> return_values() {
+ return make_range(retval_begin(), retval_end());
+ }
+ iterator_range<const_op_iterator> return_values() const {
+ return make_range(retval_begin(), retval_end());
+ }
+
+ unsigned numReturns() const {
+ return std::distance(retval_begin(), retval_end());
+ }
+
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_end_results;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst {
- enum { FrameArg, UnwindArg };
+ enum { FrameArg, UnwindArg, TokenArg };
public:
bool isFallthrough() const { return !isUnwind(); }
@@ -620,6 +647,15 @@ public:
return cast<Constant>(getArgOperand(UnwindArg))->isOneValue();
}
+ bool hasResults() const {
+ return !isa<ConstantTokenNone>(getArgOperand(TokenArg));
+ }
+
+ CoroEndResults *getResults() const {
+ assert(hasResults());
+ return cast<CoroEndResults>(getArgOperand(TokenArg));
+ }
+
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
auto ID = I->getIntrinsicID();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 067fb6bba47e..fb16a4090689 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -32,7 +32,10 @@ void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
/// OptimizeFrame is false.
void salvageDebugInfo(
SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
- DbgVariableIntrinsic *DVI, bool OptimizeFrame);
+ DbgVariableIntrinsic &DVI, bool OptimizeFrame, bool IsEntryPoint);
+void salvageDebugInfo(
+ SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap, DPValue &DPV,
+ bool OptimizeFrame, bool UseEntryValue);
// Keeps data and helper functions for lowering coroutine intrinsics.
struct LowererBase {
@@ -185,7 +188,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
switch (ABI) {
case coro::ABI::Switch:
return FunctionType::get(Type::getVoidTy(FrameTy->getContext()),
- FrameTy->getPointerTo(), /*IsVarArg*/false);
+ PointerType::getUnqual(FrameTy->getContext()),
+ /*IsVarArg=*/false);
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
return RetconLowering.ResumePrototype->getFunctionType();
@@ -239,10 +243,13 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
return nullptr;
}
- Instruction *getInsertPtAfterFramePtr() const {
- if (auto *I = dyn_cast<Instruction>(FramePtr))
- return I->getNextNode();
- return &cast<Argument>(FramePtr)->getParent()->getEntryBlock().front();
+ BasicBlock::iterator getInsertPtAfterFramePtr() const {
+ if (auto *I = dyn_cast<Instruction>(FramePtr)) {
+ BasicBlock::iterator It = std::next(I->getIterator());
+ It.setHeadBit(true); // Copy pre-RemoveDIs behaviour.
+ return It;
+ }
+ return cast<Argument>(FramePtr)->getParent()->getEntryBlock().begin();
}
/// Allocate memory according to the rules of the active lowering.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 39e909bf3316..7758b52abc20 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -234,6 +234,8 @@ static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
switch (Shape.ABI) {
// The cloned functions in switch-lowering always return void.
case coro::ABI::Switch:
+ assert(!cast<CoroEndInst>(End)->hasResults() &&
+ "switch coroutine should not return any values");
// coro.end doesn't immediately end the coroutine in the main function
// in this lowering, because we need to deallocate the coroutine.
if (!InResume)
@@ -251,14 +253,45 @@ static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
// In unique continuation lowering, the continuations always return void.
// But we may have implicitly allocated storage.
- case coro::ABI::RetconOnce:
+ case coro::ABI::RetconOnce: {
maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
- Builder.CreateRetVoid();
+ auto *CoroEnd = cast<CoroEndInst>(End);
+ auto *RetTy = Shape.getResumeFunctionType()->getReturnType();
+
+ if (!CoroEnd->hasResults()) {
+ assert(RetTy->isVoidTy());
+ Builder.CreateRetVoid();
+ break;
+ }
+
+ auto *CoroResults = CoroEnd->getResults();
+ unsigned NumReturns = CoroResults->numReturns();
+
+ if (auto *RetStructTy = dyn_cast<StructType>(RetTy)) {
+ assert(RetStructTy->getNumElements() == NumReturns &&
+ "numbers of returns should match resume function singature");
+ Value *ReturnValue = UndefValue::get(RetStructTy);
+ unsigned Idx = 0;
+ for (Value *RetValEl : CoroResults->return_values())
+ ReturnValue = Builder.CreateInsertValue(ReturnValue, RetValEl, Idx++);
+ Builder.CreateRet(ReturnValue);
+ } else if (NumReturns == 0) {
+ assert(RetTy->isVoidTy());
+ Builder.CreateRetVoid();
+ } else {
+ assert(NumReturns == 1);
+ Builder.CreateRet(*CoroResults->retval_begin());
+ }
+ CoroResults->replaceAllUsesWith(ConstantTokenNone::get(CoroResults->getContext()));
+ CoroResults->eraseFromParent();
break;
+ }
// In non-unique continuation lowering, we signal completion by returning
// a null continuation.
case coro::ABI::Retcon: {
+ assert(!cast<CoroEndInst>(End)->hasResults() &&
+ "retcon coroutine should not return any values");
maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
auto RetTy = Shape.getResumeFunctionType()->getReturnType();
auto RetStructTy = dyn_cast<StructType>(RetTy);
@@ -457,7 +490,8 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
Switch->addCase(IndexVal, ResumeBB);
cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
- auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front());
+ auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "");
+ PN->insertBefore(LandingBB->begin());
S->replaceAllUsesWith(PN);
PN->addIncoming(Builder.getInt8(-1), SuspendBB);
PN->addIncoming(S, ResumeBB);
@@ -495,13 +529,20 @@ void CoroCloner::handleFinalSuspend() {
BasicBlock *OldSwitchBB = Switch->getParent();
auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
Builder.SetInsertPoint(OldSwitchBB->getTerminator());
- auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr,
- coro::Shape::SwitchFieldIndex::Resume,
- "ResumeFn.addr");
- auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(),
- GepIndex);
- auto *Cond = Builder.CreateIsNull(Load);
- Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
+
+ if (NewF->isCoroOnlyDestroyWhenComplete()) {
+ // When the coroutine can only be destroyed when complete, we don't need
+ // to generate code for other cases.
+ Builder.CreateBr(ResumeBB);
+ } else {
+ auto *GepIndex = Builder.CreateStructGEP(
+ Shape.FrameTy, NewFramePtr, coro::Shape::SwitchFieldIndex::Resume,
+ "ResumeFn.addr");
+ auto *Load =
+ Builder.CreateLoad(Shape.getSwitchResumePointerType(), GepIndex);
+ auto *Cond = Builder.CreateIsNull(Load);
+ Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
+ }
OldSwitchBB->getTerminator()->eraseFromParent();
}
}
@@ -684,13 +725,17 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
}
/// Returns all DbgVariableIntrinsic in F.
-static SmallVector<DbgVariableIntrinsic *, 8>
+static std::pair<SmallVector<DbgVariableIntrinsic *, 8>, SmallVector<DPValue *>>
collectDbgVariableIntrinsics(Function &F) {
SmallVector<DbgVariableIntrinsic *, 8> Intrinsics;
- for (auto &I : instructions(F))
+ SmallVector<DPValue *> DPValues;
+ for (auto &I : instructions(F)) {
+ for (DPValue &DPV : I.getDbgValueRange())
+ DPValues.push_back(&DPV);
if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
Intrinsics.push_back(DVI);
- return Intrinsics;
+ }
+ return {Intrinsics, DPValues};
}
void CoroCloner::replaceSwiftErrorOps() {
@@ -698,11 +743,18 @@ void CoroCloner::replaceSwiftErrorOps() {
}
void CoroCloner::salvageDebugInfo() {
- SmallVector<DbgVariableIntrinsic *, 8> Worklist =
- collectDbgVariableIntrinsics(*NewF);
+ auto [Worklist, DPValues] = collectDbgVariableIntrinsics(*NewF);
SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
+
+ // Only 64-bit ABIs have a register we can refer to with the entry value.
+ bool UseEntryValue =
+ llvm::Triple(OrigF.getParent()->getTargetTriple()).isArch64Bit();
for (DbgVariableIntrinsic *DVI : Worklist)
- coro::salvageDebugInfo(ArgToAllocaMap, DVI, Shape.OptimizeFrame);
+ coro::salvageDebugInfo(ArgToAllocaMap, *DVI, Shape.OptimizeFrame,
+ UseEntryValue);
+ for (DPValue *DPV : DPValues)
+ coro::salvageDebugInfo(ArgToAllocaMap, *DPV, Shape.OptimizeFrame,
+ UseEntryValue);
// Remove all salvaged dbg.declare intrinsics that became
// either unreachable or stale due to the CoroSplit transformation.
@@ -711,7 +763,7 @@ void CoroCloner::salvageDebugInfo() {
return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr,
&DomTree);
};
- for (DbgVariableIntrinsic *DVI : Worklist) {
+ auto RemoveOne = [&](auto *DVI) {
if (IsUnreachableBlock(DVI->getParent()))
DVI->eraseFromParent();
else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) {
@@ -724,7 +776,9 @@ void CoroCloner::salvageDebugInfo() {
if (!Uses)
DVI->eraseFromParent();
}
- }
+ };
+ for_each(Worklist, RemoveOne);
+ for_each(DPValues, RemoveOne);
}
void CoroCloner::replaceEntryBlock() {
@@ -811,7 +865,6 @@ Value *CoroCloner::deriveNewFramePointer() {
auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff;
auto *CalleeContext = NewF->getArg(ContextIdx);
- auto *FramePtrTy = Shape.FrameTy->getPointerTo();
auto *ProjectionFunc =
ActiveAsyncSuspend->getAsyncContextProjectionFunction();
auto DbgLoc =
@@ -831,22 +884,20 @@ Value *CoroCloner::deriveNewFramePointer() {
auto InlineRes = InlineFunction(*CallerContext, InlineInfo);
assert(InlineRes.isSuccess());
(void)InlineRes;
- return Builder.CreateBitCast(FramePtrAddr, FramePtrTy);
+ return FramePtrAddr;
}
// In continuation-lowering, the argument is the opaque storage.
case coro::ABI::Retcon:
case coro::ABI::RetconOnce: {
Argument *NewStorage = &*NewF->arg_begin();
- auto FramePtrTy = Shape.FrameTy->getPointerTo();
+ auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext());
// If the storage is inline, just bitcast to the storage to the frame type.
if (Shape.RetconLowering.IsFrameInlineInStorage)
- return Builder.CreateBitCast(NewStorage, FramePtrTy);
+ return NewStorage;
// Otherwise, load the real frame from the opaque storage.
- auto FramePtrPtr =
- Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
- return Builder.CreateLoad(FramePtrTy, FramePtrPtr);
+ return Builder.CreateLoad(FramePtrTy, NewStorage);
}
}
llvm_unreachable("bad ABI");
@@ -940,9 +991,22 @@ void CoroCloner::create() {
// abstract specification, since the DWARF backend expects the
// abstract specification to contain the linkage name and asserts
// that they are identical.
- if (!SP->getDeclaration() && SP->getUnit() &&
- SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift)
+ if (SP->getUnit() &&
+ SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) {
SP->replaceLinkageName(MDString::get(Context, NewF->getName()));
+ if (auto *Decl = SP->getDeclaration()) {
+ auto *NewDecl = DISubprogram::get(
+ Decl->getContext(), Decl->getScope(), Decl->getName(),
+ NewF->getName(), Decl->getFile(), Decl->getLine(), Decl->getType(),
+ Decl->getScopeLine(), Decl->getContainingType(),
+ Decl->getVirtualIndex(), Decl->getThisAdjustment(),
+ Decl->getFlags(), Decl->getSPFlags(), Decl->getUnit(),
+ Decl->getTemplateParams(), nullptr, Decl->getRetainedNodes(),
+ Decl->getThrownTypes(), Decl->getAnnotations(),
+ Decl->getTargetFuncName());
+ SP->replaceDeclaration(NewDecl);
+ }
+ }
}
NewF->setLinkage(savedLinkage);
@@ -1047,7 +1111,7 @@ void CoroCloner::create() {
// Remap vFrame pointer.
auto *NewVFrame = Builder.CreateBitCast(
- NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame");
+ NewFramePtr, PointerType::getUnqual(Builder.getContext()), "vFrame");
Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
if (OldVFrame != NewVFrame)
OldVFrame->replaceAllUsesWith(NewVFrame);
@@ -1178,7 +1242,7 @@ static void setCoroInfo(Function &F, coro::Shape &Shape,
// Update coro.begin instruction to refer to this constant.
LLVMContext &C = F.getContext();
- auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C));
+ auto *BC = ConstantExpr::getPointerCast(GV, PointerType::getUnqual(C));
Shape.getSwitchCoroId()->setInfo(BC);
}
@@ -1187,7 +1251,7 @@ static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
Function *DestroyFn, Function *CleanupFn) {
assert(Shape.ABI == coro::ABI::Switch);
- IRBuilder<> Builder(Shape.getInsertPtAfterFramePtr());
+ IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr());
auto *ResumeAddr = Builder.CreateStructGEP(
Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
@@ -1425,10 +1489,9 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) {
IRBuilder<> Builder(AllocInst);
auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
Frame->setAlignment(Shape.FrameAlign);
- auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
AllocInst->replaceAllUsesWith(Builder.getFalse());
AllocInst->eraseFromParent();
- CoroBegin->replaceAllUsesWith(VFrame);
+ CoroBegin->replaceAllUsesWith(Frame);
} else {
CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
}
@@ -1658,7 +1721,7 @@ static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
Value *Continuation) {
auto *ResumeIntrinsic = Suspend->getResumeFunction();
auto &Context = Suspend->getParent()->getParent()->getContext();
- auto *Int8PtrTy = Type::getInt8PtrTy(Context);
+ auto *Int8PtrTy = PointerType::getUnqual(Context);
IRBuilder<> Builder(ResumeIntrinsic);
auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy);
@@ -1711,7 +1774,7 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
F.removeRetAttr(Attribute::NonNull);
auto &Context = F.getContext();
- auto *Int8PtrTy = Type::getInt8PtrTy(Context);
+ auto *Int8PtrTy = PointerType::getUnqual(Context);
auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId());
IRBuilder<> Builder(Id);
@@ -1829,9 +1892,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
// Stash the allocated frame pointer in the continuation storage.
- auto Dest = Builder.CreateBitCast(Id->getStorage(),
- RawFramePtr->getType()->getPointerTo());
- Builder.CreateStore(RawFramePtr, Dest);
+ Builder.CreateStore(RawFramePtr, Id->getStorage());
}
// Map all uses of llvm.coro.begin to the allocated frame pointer.
@@ -1986,9 +2047,13 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
// original function. The Cloner has already salvaged debug info in the new
// coroutine funclets.
SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
- for (auto *DDI : collectDbgVariableIntrinsics(F))
- coro::salvageDebugInfo(ArgToAllocaMap, DDI, Shape.OptimizeFrame);
-
+ auto [DbgInsts, DPValues] = collectDbgVariableIntrinsics(F);
+ for (auto *DDI : DbgInsts)
+ coro::salvageDebugInfo(ArgToAllocaMap, *DDI, Shape.OptimizeFrame,
+ false /*UseEntryValue*/);
+ for (DPValue *DPV : DPValues)
+ coro::salvageDebugInfo(ArgToAllocaMap, *DPV, Shape.OptimizeFrame,
+ false /*UseEntryValue*/);
return Shape;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index cde74c5e693b..eef5543bae24 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -37,16 +37,15 @@ using namespace llvm;
// Construct the lowerer base class and initialize its members.
coro::LowererBase::LowererBase(Module &M)
: TheModule(M), Context(M.getContext()),
- Int8Ptr(Type::getInt8PtrTy(Context)),
+ Int8Ptr(PointerType::get(Context, 0)),
ResumeFnType(FunctionType::get(Type::getVoidTy(Context), Int8Ptr,
/*isVarArg=*/false)),
NullPtr(ConstantPointerNull::get(Int8Ptr)) {}
-// Creates a sequence of instructions to obtain a resume function address using
-// llvm.coro.subfn.addr. It generates the following sequence:
+// Creates a call to llvm.coro.subfn.addr to obtain a resume function address.
+// It generates the following:
//
-// call i8* @llvm.coro.subfn.addr(i8* %Arg, i8 %index)
-// bitcast i8* %2 to void(i8*)*
+// call ptr @llvm.coro.subfn.addr(ptr %Arg, i8 %index)
Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
Instruction *InsertPt) {
@@ -56,11 +55,7 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
assert(Index >= CoroSubFnInst::IndexFirst &&
Index < CoroSubFnInst::IndexLast &&
"makeSubFnCall: Index value out of range");
- auto *Call = CallInst::Create(Fn, {Arg, IndexVal}, "", InsertPt);
-
- auto *Bitcast =
- new BitCastInst(Call, ResumeFnType->getPointerTo(), "", InsertPt);
- return Bitcast;
+ return CallInst::Create(Fn, {Arg, IndexVal}, "", InsertPt);
}
// NOTE: Must be sorted!
@@ -137,8 +132,9 @@ void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) {
return;
Value *Replacement =
- Elide ? ConstantPointerNull::get(Type::getInt8PtrTy(CoroId->getContext()))
- : CoroFrees.front()->getFrame();
+ Elide
+ ? ConstantPointerNull::get(PointerType::get(CoroId->getContext(), 0))
+ : CoroFrees.front()->getFrame();
for (CoroFreeInst *CF : CoroFrees) {
CF->replaceAllUsesWith(Replacement);
@@ -267,7 +263,7 @@ void coro::Shape::buildFrom(Function &F) {
if (!CoroBegin) {
// Replace coro.frame which are supposed to be lowered to the result of
// coro.begin with undef.
- auto *Undef = UndefValue::get(Type::getInt8PtrTy(F.getContext()));
+ auto *Undef = UndefValue::get(PointerType::get(F.getContext(), 0));
for (CoroFrameInst *CF : CoroFrames) {
CF->replaceAllUsesWith(Undef);
CF->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp b/contrib/llvm-project/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
new file mode 100644
index 000000000000..fb7cba9edbdb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
@@ -0,0 +1,312 @@
+//===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file implements two passes that enable HIP C++ Standard Parallelism
+// Support:
+//
+// 1. AcceleratorCodeSelection (required): Given that only algorithms are
+// accelerated, and that the accelerated implementation exists in the form of
+// a compute kernel, we assume that only the kernel, and all functions
+// reachable from it, constitute code that the user expects the accelerator
+// to execute. Thus, we identify the set of all functions reachable from
+// kernels, and then remove all unreachable ones. This last part is necessary
+// because it is possible for code that the user did not expect to execute on
+// an accelerator to contain constructs that cannot be handled by the target
+// BE, which cannot be provably demonstrated to be dead code in general, and
+// thus can lead to mis-compilation. The degenerate case of this is when a
+// Module contains no kernels (the parent TU had no algorithm invocations fit
+// for acceleration), which we handle by completely emptying said module.
+// **NOTE**: The above does not handle indirectly reachable functions i.e.
+// it is possible to obtain a case where the target of an indirect
+// call is otherwise unreachable and thus is removed; this
+// restriction is aligned with the current `-hipstdpar` limitations
+// and will be relaxed in the future.
+//
+// 2. AllocationInterposition (required only when on-demand paging is
+// unsupported): Some accelerators or operating systems might not support
+// transparent on-demand paging. Thus, they would only be able to access
+// memory that is allocated by an accelerator-aware mechanism. For such cases
+// the user can opt into enabling allocation / deallocation interposition,
+// whereby we replace calls to known allocation / deallocation functions with
+// calls to runtime implemented equivalents that forward the requests to
+// accelerator-aware interfaces. We also support freeing system allocated
+// memory that ends up in one of the runtime equivalents, since this can
+// happen if e.g. a library that was compiled without interposition returns
+// an allocation that can be validly passed to `free`.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/HipStdPar/HipStdPar.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#include <cassert>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+template<typename T>
+static inline void eraseFromModule(T &ToErase) {
+ ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType()));
+ ToErase.eraseFromParent();
+}
+
+static inline bool checkIfSupported(GlobalVariable &G) {
+ if (!G.isThreadLocal())
+ return true;
+
+ G.dropDroppableUses();
+
+ if (!G.isConstantUsed())
+ return true;
+
+ std::string W;
+ raw_string_ostream OS(W);
+
+ OS << "Accelerator does not support the thread_local variable "
+ << G.getName();
+
+ Instruction *I = nullptr;
+ SmallVector<User *> Tmp(G.user_begin(), G.user_end());
+ SmallPtrSet<User *, 5> Visited;
+ do {
+ auto U = std::move(Tmp.back());
+ Tmp.pop_back();
+
+ if (Visited.contains(U))
+ continue;
+
+ if (isa<Instruction>(U))
+ I = cast<Instruction>(U);
+ else
+ Tmp.insert(Tmp.end(), U->user_begin(), U->user_end());
+
+ Visited.insert(U);
+ } while (!I && !Tmp.empty());
+
+ assert(I && "thread_local global should have at least one non-constant use.");
+
+ G.getContext().diagnose(
+ DiagnosticInfoUnsupported(*I->getParent()->getParent(), W,
+ I->getDebugLoc(), DS_Error));
+
+ return false;
+}
+
+static inline void clearModule(Module &M) { // TODO: simplify.
+ while (!M.functions().empty())
+ eraseFromModule(*M.begin());
+ while (!M.globals().empty())
+ eraseFromModule(*M.globals().begin());
+ while (!M.aliases().empty())
+ eraseFromModule(*M.aliases().begin());
+ while (!M.ifuncs().empty())
+ eraseFromModule(*M.ifuncs().begin());
+}
+
+static inline void maybeHandleGlobals(Module &M) {
+ unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
+ for (auto &&G : M.globals()) { // TODO: should we handle these in the FE?
+ if (!checkIfSupported(G))
+ return clearModule(M);
+
+ if (G.isThreadLocal())
+ continue;
+ if (G.isConstant())
+ continue;
+ if (G.getAddressSpace() != GlobAS)
+ continue;
+ if (G.getLinkage() != GlobalVariable::ExternalLinkage)
+ continue;
+
+ G.setLinkage(GlobalVariable::ExternalWeakLinkage);
+ G.setExternallyInitialized(true);
+ }
+}
+
+template<unsigned N>
+static inline void removeUnreachableFunctions(
+ const SmallPtrSet<const Function *, N>& Reachable, Module &M) {
+ removeFromUsedLists(M, [&](Constant *C) {
+ if (auto F = dyn_cast<Function>(C))
+ return !Reachable.contains(F);
+
+ return false;
+ });
+
+ SmallVector<std::reference_wrapper<Function>> ToRemove;
+ copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) {
+ return !F.isIntrinsic() && !Reachable.contains(&F);
+ });
+
+ for_each(ToRemove, eraseFromModule<Function>);
+}
+
+static inline bool isAcceleratorExecutionRoot(const Function *F) {
+ if (!F)
+ return false;
+
+ return F->getCallingConv() == CallingConv::AMDGPU_KERNEL;
+}
+
+static inline bool checkIfSupported(const Function *F, const CallBase *CB) {
+ const auto Dx = F->getName().rfind("__hipstdpar_unsupported");
+
+ if (Dx == StringRef::npos)
+ return true;
+
+ const auto N = F->getName().substr(0, Dx);
+
+ std::string W;
+ raw_string_ostream OS(W);
+
+ if (N == "__ASM")
+ OS << "Accelerator does not support the ASM block:\n"
+ << cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString();
+ else
+ OS << "Accelerator does not support the " << N << " function.";
+
+ auto Caller = CB->getParent()->getParent();
+
+ Caller->getContext().diagnose(
+ DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error));
+
+ return false;
+}
+
+PreservedAnalyses
+ HipStdParAcceleratorCodeSelectionPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ auto &CGA = MAM.getResult<CallGraphAnalysis>(M);
+
+ SmallPtrSet<const Function *, 32> Reachable;
+ for (auto &&CGN : CGA) {
+ if (!isAcceleratorExecutionRoot(CGN.first))
+ continue;
+
+ Reachable.insert(CGN.first);
+
+ SmallVector<const Function *> Tmp({CGN.first});
+ do {
+ auto F = std::move(Tmp.back());
+ Tmp.pop_back();
+
+ for (auto &&N : *CGA[F]) {
+ if (!N.second)
+ continue;
+ if (!N.second->getFunction())
+ continue;
+ if (Reachable.contains(N.second->getFunction()))
+ continue;
+
+ if (!checkIfSupported(N.second->getFunction(),
+ dyn_cast<CallBase>(*N.first)))
+ return PreservedAnalyses::none();
+
+ Reachable.insert(N.second->getFunction());
+ Tmp.push_back(N.second->getFunction());
+ }
+ } while (!std::empty(Tmp));
+ }
+
+ if (std::empty(Reachable))
+ clearModule(M);
+ else
+ removeUnreachableFunctions(Reachable, M);
+
+ maybeHandleGlobals(M);
+
+ return PreservedAnalyses::none();
+}
+
+static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
+ {"aligned_alloc", "__hipstdpar_aligned_alloc"},
+ {"calloc", "__hipstdpar_calloc"},
+ {"free", "__hipstdpar_free"},
+ {"malloc", "__hipstdpar_malloc"},
+ {"memalign", "__hipstdpar_aligned_alloc"},
+ {"posix_memalign", "__hipstdpar_posix_aligned_alloc"},
+ {"realloc", "__hipstdpar_realloc"},
+ {"reallocarray", "__hipstdpar_realloc_array"},
+ {"_ZdaPv", "__hipstdpar_operator_delete"},
+ {"_ZdaPvm", "__hipstdpar_operator_delete_sized"},
+ {"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
+ {"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
+ {"_ZdlPv", "__hipstdpar_operator_delete"},
+ {"_ZdlPvm", "__hipstdpar_operator_delete_sized"},
+ {"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
+ {"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
+ {"_Znam", "__hipstdpar_operator_new"},
+ {"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
+ {"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"},
+ {"_ZnamSt11align_val_tRKSt9nothrow_t",
+ "__hipstdpar_operator_new_aligned_nothrow"},
+
+ {"_Znwm", "__hipstdpar_operator_new"},
+ {"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
+ {"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"},
+ {"_ZnwmSt11align_val_tRKSt9nothrow_t",
+ "__hipstdpar_operator_new_aligned_nothrow"},
+ {"__builtin_calloc", "__hipstdpar_calloc"},
+ {"__builtin_free", "__hipstdpar_free"},
+ {"__builtin_malloc", "__hipstdpar_malloc"},
+ {"__builtin_operator_delete", "__hipstdpar_operator_delete"},
+ {"__builtin_operator_new", "__hipstdpar_operator_new"},
+ {"__builtin_realloc", "__hipstdpar_realloc"},
+ {"__libc_calloc", "__hipstdpar_calloc"},
+ {"__libc_free", "__hipstdpar_free"},
+ {"__libc_malloc", "__hipstdpar_malloc"},
+ {"__libc_memalign", "__hipstdpar_aligned_alloc"},
+ {"__libc_realloc", "__hipstdpar_realloc"}
+};
+
+PreservedAnalyses
+HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
+ SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap),
+ std::cend(ReplaceMap));
+
+ for (auto &&F : M) {
+ if (!F.hasName())
+ continue;
+ if (!AllocReplacements.contains(F.getName()))
+ continue;
+
+ if (auto R = M.getFunction(AllocReplacements[F.getName()])) {
+ F.replaceAllUsesWith(R);
+ } else {
+ std::string W;
+ raw_string_ostream OS(W);
+
+ OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()]
+ << ". Tried to run the allocation interposition pass without the "
+ << "replacement functions available.";
+
+ F.getContext().diagnose(DiagnosticInfoUnsupported(F, W,
+ F.getSubprogram(),
+ DS_Warning));
+ }
+ }
+
+ if (auto F = M.getFunction("__hipstdpar_hidden_free")) {
+ auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(),
+ F->getAttributes());
+ F->replaceAllUsesWith(LibcFree.getCallee());
+
+ eraseFromModule(*F);
+ }
+
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 824da6395f2e..fb3fa8d23daa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -121,19 +121,24 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
// that we are *not* promoting. For the ones that we do promote, the parameter
// attributes are lost
SmallVector<AttributeSet, 8> ArgAttrVec;
+ // Mapping from old to new argument indices. -1 for promoted or removed
+ // arguments.
+ SmallVector<unsigned> NewArgIndices;
AttributeList PAL = F->getAttributes();
// First, determine the new argument list
- unsigned ArgNo = 0;
+ unsigned ArgNo = 0, NewArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgNo) {
if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
+ NewArgIndices.push_back(NewArgNo++);
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
+ NewArgIndices.push_back((unsigned)-1);
} else {
const auto &ArgParts = ArgsToPromote.find(&*I)->second;
for (const auto &Pair : ArgParts) {
@@ -141,6 +146,8 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
ArgAttrVec.push_back(AttributeSet());
}
++NumArgumentsPromoted;
+ NewArgIndices.push_back((unsigned)-1);
+ NewArgNo += ArgParts.size();
}
}
@@ -154,6 +161,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
F->getName());
NF->copyAttributesFrom(F);
NF->copyMetadata(F, 0);
+ NF->setIsNewDbgInfoFormat(F->IsNewDbgInfoFormat);
// The new function will have the !dbg metadata copied from the original
// function. The original function may not be deleted, and dbg metadata need
@@ -173,6 +181,19 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
// the function.
NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
PAL.getRetAttrs(), ArgAttrVec));
+
+ // Remap argument indices in allocsize attribute.
+ if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) {
+ unsigned Arg1 = NewArgIndices[AllocSize->first];
+ assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument");
+ std::optional<unsigned> Arg2;
+ if (AllocSize->second) {
+ Arg2 = NewArgIndices[*AllocSize->second];
+ assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument");
+ }
+ NF->addFnAttr(Attribute::getWithAllocSizeArgs(F->getContext(), Arg1, Arg2));
+ }
+
AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
ArgAttrVec.clear();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
index 847d07a49dee..d8e290cbc8a4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
@@ -50,6 +51,7 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstdint>
+#include <memory>
#ifdef EXPENSIVE_CHECKS
#include "llvm/IR/Verifier.h"
@@ -93,6 +95,13 @@ static cl::opt<unsigned>
cl::desc("Maximal number of fixpoint iterations."),
cl::init(32));
+static cl::opt<unsigned>
+ MaxSpecializationPerCB("attributor-max-specializations-per-call-base",
+ cl::Hidden,
+ cl::desc("Maximal number of callees specialized for "
+ "a call base"),
+ cl::init(UINT32_MAX));
+
static cl::opt<unsigned, true> MaxInitializationChainLengthX(
"attributor-max-initialization-chain-length", cl::Hidden,
cl::desc(
@@ -166,6 +175,10 @@ static cl::opt<bool> SimplifyAllLoads("attributor-simplify-all-loads",
cl::desc("Try to simplify all loads."),
cl::init(true));
+static cl::opt<bool> CloseWorldAssumption(
+ "attributor-assume-closed-world", cl::Hidden,
+ cl::desc("Should a closed world be assumed, or not. Default if not set."));
+
/// Logic operators for the change status enum class.
///
///{
@@ -226,10 +239,10 @@ bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
return InstanceInfoAA && InstanceInfoAA->isAssumedUniqueForAnalysis();
}
-Constant *AA::getInitialValueForObj(Attributor &A, Value &Obj, Type &Ty,
- const TargetLibraryInfo *TLI,
- const DataLayout &DL,
- AA::RangeTy *RangePtr) {
+Constant *
+AA::getInitialValueForObj(Attributor &A, const AbstractAttribute &QueryingAA,
+ Value &Obj, Type &Ty, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, AA::RangeTy *RangePtr) {
if (isa<AllocaInst>(Obj))
return UndefValue::get(&Ty);
if (Constant *Init = getInitialValueOfAllocation(&Obj, TLI, &Ty))
@@ -242,12 +255,13 @@ Constant *AA::getInitialValueForObj(Attributor &A, Value &Obj, Type &Ty,
Constant *Initializer = nullptr;
if (A.hasGlobalVariableSimplificationCallback(*GV)) {
auto AssumedGV = A.getAssumedInitializerFromCallBack(
- *GV, /* const AbstractAttribute *AA */ nullptr, UsedAssumedInformation);
+ *GV, &QueryingAA, UsedAssumedInformation);
Initializer = *AssumedGV;
if (!Initializer)
return nullptr;
} else {
- if (!GV->hasLocalLinkage() && !(GV->isConstant() && GV->hasInitializer()))
+ if (!GV->hasLocalLinkage() &&
+ (GV->isInterposable() || !(GV->isConstant() && GV->hasInitializer())))
return nullptr;
if (!GV->hasInitializer())
return UndefValue::get(&Ty);
@@ -316,7 +330,7 @@ Value *AA::getWithType(Value &V, Type &Ty) {
if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
- return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ return ConstantFoldCastInstruction(Instruction::FPTrunc, C, &Ty);
}
}
return nullptr;
@@ -350,7 +364,7 @@ AA::combineOptionalValuesInAAValueLatice(const std::optional<Value *> &A,
template <bool IsLoad, typename Ty>
static bool getPotentialCopiesOfMemoryValue(
Attributor &A, Ty &I, SmallSetVector<Value *, 4> &PotentialCopies,
- SmallSetVector<Instruction *, 4> &PotentialValueOrigins,
+ SmallSetVector<Instruction *, 4> *PotentialValueOrigins,
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
bool OnlyExact) {
LLVM_DEBUG(dbgs() << "Trying to determine the potential copies of " << I
@@ -361,8 +375,8 @@ static bool getPotentialCopiesOfMemoryValue(
// sure that we can find all of them. If we abort we want to avoid spurious
// dependences and potential copies in the provided container.
SmallVector<const AAPointerInfo *> PIs;
- SmallVector<Value *> NewCopies;
- SmallVector<Instruction *> NewCopyOrigins;
+ SmallSetVector<Value *, 8> NewCopies;
+ SmallSetVector<Instruction *, 8> NewCopyOrigins;
const auto *TLI =
A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction());
@@ -425,6 +439,30 @@ static bool getPotentialCopiesOfMemoryValue(
return AdjV;
};
+ auto SkipCB = [&](const AAPointerInfo::Access &Acc) {
+ if ((IsLoad && !Acc.isWriteOrAssumption()) || (!IsLoad && !Acc.isRead()))
+ return true;
+ if (IsLoad) {
+ if (Acc.isWrittenValueYetUndetermined())
+ return true;
+ if (PotentialValueOrigins && !isa<AssumeInst>(Acc.getRemoteInst()))
+ return false;
+ if (!Acc.isWrittenValueUnknown())
+ if (Value *V = AdjustWrittenValueType(Acc, *Acc.getWrittenValue()))
+ if (NewCopies.count(V)) {
+ NewCopyOrigins.insert(Acc.getRemoteInst());
+ return true;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(Acc.getRemoteInst()))
+ if (Value *V = AdjustWrittenValueType(Acc, *SI->getValueOperand()))
+ if (NewCopies.count(V)) {
+ NewCopyOrigins.insert(Acc.getRemoteInst());
+ return true;
+ }
+ }
+ return false;
+ };
+
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
if ((IsLoad && !Acc.isWriteOrAssumption()) || (!IsLoad && !Acc.isRead()))
return true;
@@ -449,8 +487,9 @@ static bool getPotentialCopiesOfMemoryValue(
Value *V = AdjustWrittenValueType(Acc, *Acc.getWrittenValue());
if (!V)
return false;
- NewCopies.push_back(V);
- NewCopyOrigins.push_back(Acc.getRemoteInst());
+ NewCopies.insert(V);
+ if (PotentialValueOrigins)
+ NewCopyOrigins.insert(Acc.getRemoteInst());
return true;
}
auto *SI = dyn_cast<StoreInst>(Acc.getRemoteInst());
@@ -463,8 +502,9 @@ static bool getPotentialCopiesOfMemoryValue(
Value *V = AdjustWrittenValueType(Acc, *SI->getValueOperand());
if (!V)
return false;
- NewCopies.push_back(V);
- NewCopyOrigins.push_back(SI);
+ NewCopies.insert(V);
+ if (PotentialValueOrigins)
+ NewCopyOrigins.insert(SI);
} else {
assert(isa<StoreInst>(I) && "Expected load or store instruction only!");
auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
@@ -474,7 +514,7 @@ static bool getPotentialCopiesOfMemoryValue(
<< *Acc.getRemoteInst() << "\n";);
return false;
}
- NewCopies.push_back(Acc.getRemoteInst());
+ NewCopies.insert(Acc.getRemoteInst());
}
return true;
};
@@ -486,11 +526,11 @@ static bool getPotentialCopiesOfMemoryValue(
AA::RangeTy Range;
auto *PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(Obj),
DepClassTy::NONE);
- if (!PI ||
- !PI->forallInterferingAccesses(A, QueryingAA, I,
- /* FindInterferingWrites */ IsLoad,
- /* FindInterferingReads */ !IsLoad,
- CheckAccess, HasBeenWrittenTo, Range)) {
+ if (!PI || !PI->forallInterferingAccesses(
+ A, QueryingAA, I,
+ /* FindInterferingWrites */ IsLoad,
+ /* FindInterferingReads */ !IsLoad, CheckAccess,
+ HasBeenWrittenTo, Range, SkipCB)) {
LLVM_DEBUG(
dbgs()
<< "Failed to verify all interfering accesses for underlying object: "
@@ -500,8 +540,8 @@ static bool getPotentialCopiesOfMemoryValue(
if (IsLoad && !HasBeenWrittenTo && !Range.isUnassigned()) {
const DataLayout &DL = A.getDataLayout();
- Value *InitialValue =
- AA::getInitialValueForObj(A, Obj, *I.getType(), TLI, DL, &Range);
+ Value *InitialValue = AA::getInitialValueForObj(
+ A, QueryingAA, Obj, *I.getType(), TLI, DL, &Range);
if (!InitialValue) {
LLVM_DEBUG(dbgs() << "Could not determine required initial value of "
"underlying object, abort!\n");
@@ -514,8 +554,9 @@ static bool getPotentialCopiesOfMemoryValue(
return false;
}
- NewCopies.push_back(InitialValue);
- NewCopyOrigins.push_back(nullptr);
+ NewCopies.insert(InitialValue);
+ if (PotentialValueOrigins)
+ NewCopyOrigins.insert(nullptr);
}
PIs.push_back(PI);
@@ -540,7 +581,8 @@ static bool getPotentialCopiesOfMemoryValue(
A.recordDependence(*PI, QueryingAA, DepClassTy::OPTIONAL);
}
PotentialCopies.insert(NewCopies.begin(), NewCopies.end());
- PotentialValueOrigins.insert(NewCopyOrigins.begin(), NewCopyOrigins.end());
+ if (PotentialValueOrigins)
+ PotentialValueOrigins->insert(NewCopyOrigins.begin(), NewCopyOrigins.end());
return true;
}
@@ -551,7 +593,7 @@ bool AA::getPotentiallyLoadedValues(
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
bool OnlyExact) {
return getPotentialCopiesOfMemoryValue</* IsLoad */ true>(
- A, LI, PotentialValues, PotentialValueOrigins, QueryingAA,
+ A, LI, PotentialValues, &PotentialValueOrigins, QueryingAA,
UsedAssumedInformation, OnlyExact);
}
@@ -559,10 +601,9 @@ bool AA::getPotentialCopiesOfStoredValue(
Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
bool OnlyExact) {
- SmallSetVector<Instruction *, 4> PotentialValueOrigins;
return getPotentialCopiesOfMemoryValue</* IsLoad */ false>(
- A, SI, PotentialCopies, PotentialValueOrigins, QueryingAA,
- UsedAssumedInformation, OnlyExact);
+ A, SI, PotentialCopies, nullptr, QueryingAA, UsedAssumedInformation,
+ OnlyExact);
}
static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
@@ -723,7 +764,7 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
// Check if we can reach returns.
bool UsedAssumedInformation = false;
- if (A.checkForAllInstructions(ReturnInstCB, FromFn, QueryingAA,
+ if (A.checkForAllInstructions(ReturnInstCB, FromFn, &QueryingAA,
{Instruction::Ret}, UsedAssumedInformation)) {
LLVM_DEBUG(dbgs() << "[AA] No return is reachable, done\n");
continue;
@@ -1021,6 +1062,23 @@ ChangeStatus AbstractAttribute::update(Attributor &A) {
return HasChanged;
}
+Attributor::Attributor(SetVector<Function *> &Functions,
+ InformationCache &InfoCache,
+ AttributorConfig Configuration)
+ : Allocator(InfoCache.Allocator), Functions(Functions),
+ InfoCache(InfoCache), Configuration(Configuration) {
+ if (!isClosedWorldModule())
+ return;
+ for (Function *Fn : Functions)
+ if (Fn->hasAddressTaken(/*PutOffender=*/nullptr,
+ /*IgnoreCallbackUses=*/false,
+ /*IgnoreAssumeLikeCalls=*/true,
+ /*IgnoreLLVMUsed=*/true,
+ /*IgnoreARCAttachedCall=*/false,
+ /*IgnoreCastedDirectCall=*/true))
+ InfoCache.IndirectlyCallableFunctions.push_back(Fn);
+}
+
bool Attributor::getAttrsFromAssumes(const IRPosition &IRP,
Attribute::AttrKind AK,
SmallVectorImpl<Attribute> &Attrs) {
@@ -1053,8 +1111,7 @@ bool Attributor::getAttrsFromAssumes(const IRPosition &IRP,
template <typename DescTy>
ChangeStatus
-Attributor::updateAttrMap(const IRPosition &IRP,
- const ArrayRef<DescTy> &AttrDescs,
+Attributor::updateAttrMap(const IRPosition &IRP, ArrayRef<DescTy> AttrDescs,
function_ref<bool(const DescTy &, AttributeSet,
AttributeMask &, AttrBuilder &)>
CB) {
@@ -1161,9 +1218,8 @@ void Attributor::getAttrs(const IRPosition &IRP,
getAttrsFromAssumes(IRP, AK, Attrs);
}
-ChangeStatus
-Attributor::removeAttrs(const IRPosition &IRP,
- const ArrayRef<Attribute::AttrKind> &AttrKinds) {
+ChangeStatus Attributor::removeAttrs(const IRPosition &IRP,
+ ArrayRef<Attribute::AttrKind> AttrKinds) {
auto RemoveAttrCB = [&](const Attribute::AttrKind &Kind, AttributeSet AttrSet,
AttributeMask &AM, AttrBuilder &) {
if (!AttrSet.hasAttribute(Kind))
@@ -1174,8 +1230,21 @@ Attributor::removeAttrs(const IRPosition &IRP,
return updateAttrMap<Attribute::AttrKind>(IRP, AttrKinds, RemoveAttrCB);
}
+ChangeStatus Attributor::removeAttrs(const IRPosition &IRP,
+ ArrayRef<StringRef> Attrs) {
+ auto RemoveAttrCB = [&](StringRef Attr, AttributeSet AttrSet,
+ AttributeMask &AM, AttrBuilder &) -> bool {
+ if (!AttrSet.hasAttribute(Attr))
+ return false;
+ AM.addAttribute(Attr);
+ return true;
+ };
+
+ return updateAttrMap<StringRef>(IRP, Attrs, RemoveAttrCB);
+}
+
ChangeStatus Attributor::manifestAttrs(const IRPosition &IRP,
- const ArrayRef<Attribute> &Attrs,
+ ArrayRef<Attribute> Attrs,
bool ForceReplace) {
LLVMContext &Ctx = IRP.getAnchorValue().getContext();
auto AddAttrCB = [&](const Attribute &Attr, AttributeSet AttrSet,
@@ -1665,6 +1734,21 @@ bool Attributor::isAssumedDead(const BasicBlock &BB,
return false;
}
+bool Attributor::checkForAllCallees(
+ function_ref<bool(ArrayRef<const Function *>)> Pred,
+ const AbstractAttribute &QueryingAA, const CallBase &CB) {
+ if (const Function *Callee = dyn_cast<Function>(CB.getCalledOperand()))
+ return Pred(Callee);
+
+ const auto *CallEdgesAA = getAAFor<AACallEdges>(
+ QueryingAA, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
+ if (!CallEdgesAA || CallEdgesAA->hasUnknownCallee())
+ return false;
+
+ const auto &Callees = CallEdgesAA->getOptimisticEdges();
+ return Pred(Callees.getArrayRef());
+}
+
bool Attributor::checkForAllUses(
function_ref<bool(const Use &, bool &)> Pred,
const AbstractAttribute &QueryingAA, const Value &V,
@@ -1938,7 +2022,7 @@ bool Attributor::checkForAllReturnedValues(function_ref<bool(Value &)> Pred,
static bool checkForAllInstructionsImpl(
Attributor *A, InformationCache::OpcodeInstMapTy &OpcodeInstMap,
function_ref<bool(Instruction &)> Pred, const AbstractAttribute *QueryingAA,
- const AAIsDead *LivenessAA, const ArrayRef<unsigned> &Opcodes,
+ const AAIsDead *LivenessAA, ArrayRef<unsigned> Opcodes,
bool &UsedAssumedInformation, bool CheckBBLivenessOnly = false,
bool CheckPotentiallyDead = false) {
for (unsigned Opcode : Opcodes) {
@@ -1967,8 +2051,8 @@ static bool checkForAllInstructionsImpl(
bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
const Function *Fn,
- const AbstractAttribute &QueryingAA,
- const ArrayRef<unsigned> &Opcodes,
+ const AbstractAttribute *QueryingAA,
+ ArrayRef<unsigned> Opcodes,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly,
bool CheckPotentiallyDead) {
@@ -1978,12 +2062,12 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
const IRPosition &QueryIRP = IRPosition::function(*Fn);
const auto *LivenessAA =
- CheckPotentiallyDead
- ? nullptr
- : (getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE));
+ CheckPotentiallyDead && QueryingAA
+ ? (getAAFor<AAIsDead>(*QueryingAA, QueryIRP, DepClassTy::NONE))
+ : nullptr;
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
- if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
+ if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, QueryingAA,
LivenessAA, Opcodes, UsedAssumedInformation,
CheckBBLivenessOnly, CheckPotentiallyDead))
return false;
@@ -1993,13 +2077,13 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
const AbstractAttribute &QueryingAA,
- const ArrayRef<unsigned> &Opcodes,
+ ArrayRef<unsigned> Opcodes,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly,
bool CheckPotentiallyDead) {
const IRPosition &IRP = QueryingAA.getIRPosition();
const Function *AssociatedFunction = IRP.getAssociatedFunction();
- return checkForAllInstructions(Pred, AssociatedFunction, QueryingAA, Opcodes,
+ return checkForAllInstructions(Pred, AssociatedFunction, &QueryingAA, Opcodes,
UsedAssumedInformation, CheckBBLivenessOnly,
CheckPotentiallyDead);
}
@@ -2964,6 +3048,18 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
NewArgumentAttributes));
AttributeFuncs::updateMinLegalVectorWidthAttr(*NewFn, LargestVectorWidth);
+ // Remove argmem from the memory effects if we have no more pointer
+ // arguments, or they are readnone.
+ MemoryEffects ME = NewFn->getMemoryEffects();
+ int ArgNo = -1;
+ if (ME.doesAccessArgPointees() && all_of(NewArgumentTypes, [&](Type *T) {
+ ++ArgNo;
+ return !T->isPtrOrPtrVectorTy() ||
+ NewFn->hasParamAttribute(ArgNo, Attribute::ReadNone);
+ })) {
+ NewFn->setMemoryEffects(ME - MemoryEffects::argMemOnly());
+ }
+
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
// function empty.
@@ -3203,6 +3299,12 @@ InformationCache::FunctionInfo::~FunctionInfo() {
It.getSecond()->~InstructionVectorTy();
}
+const ArrayRef<Function *>
+InformationCache::getIndirectlyCallableFunctions(Attributor &A) const {
+ assert(A.isClosedWorldModule() && "Cannot see all indirect callees!");
+ return IndirectlyCallableFunctions;
+}
+
void Attributor::recordDependence(const AbstractAttribute &FromAA,
const AbstractAttribute &ToAA,
DepClassTy DepClass) {
@@ -3236,9 +3338,10 @@ void Attributor::checkAndQueryIRAttr(const IRPosition &IRP,
AttributeSet Attrs) {
bool IsKnown;
if (!Attrs.hasAttribute(AK))
- if (!AA::hasAssumedIRAttr<AK>(*this, nullptr, IRP, DepClassTy::NONE,
- IsKnown))
- getOrCreateAAFor<AAType>(IRP);
+ if (!Configuration.Allowed || Configuration.Allowed->count(&AAType::ID))
+ if (!AA::hasAssumedIRAttr<AK>(*this, nullptr, IRP, DepClassTy::NONE,
+ IsKnown))
+ getOrCreateAAFor<AAType>(IRP);
}
void Attributor::identifyDefaultAbstractAttributes(Function &F) {
@@ -3285,6 +3388,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function might be "will-return".
checkAndQueryIRAttr<Attribute::WillReturn, AAWillReturn>(FPos, FnAttrs);
+ // Every function might be marked "nosync"
+ checkAndQueryIRAttr<Attribute::NoSync, AANoSync>(FPos, FnAttrs);
+
// Everything that is visible from the outside (=function, argument, return
// positions), cannot be changed if the function is not IPO amendable. We can
// however analyse the code inside.
@@ -3293,9 +3399,6 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function can be nounwind.
checkAndQueryIRAttr<Attribute::NoUnwind, AANoUnwind>(FPos, FnAttrs);
- // Every function might be marked "nosync"
- checkAndQueryIRAttr<Attribute::NoSync, AANoSync>(FPos, FnAttrs);
-
// Every function might be "no-return".
checkAndQueryIRAttr<Attribute::NoReturn, AANoReturn>(FPos, FnAttrs);
@@ -3315,6 +3418,14 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function can track active assumptions.
getOrCreateAAFor<AAAssumptionInfo>(FPos);
+ // If we're not using a dynamic mode for float, there's nothing worthwhile
+ // to infer. This misses the edge case denormal-fp-math="dynamic" and
+ // denormal-fp-math-f32=something, but that likely has no real world use.
+ DenormalMode Mode = F.getDenormalMode(APFloat::IEEEsingle());
+ if (Mode.Input == DenormalMode::Dynamic ||
+ Mode.Output == DenormalMode::Dynamic)
+ getOrCreateAAFor<AADenormalFPMath>(FPos);
+
// Return attributes are only appropriate if the return type is non void.
Type *ReturnType = F.getReturnType();
if (!ReturnType->isVoidTy()) {
@@ -3420,8 +3531,10 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
Function *Callee = dyn_cast_if_present<Function>(CB.getCalledOperand());
// TODO: Even if the callee is not known now we might be able to simplify
// the call/callee.
- if (!Callee)
+ if (!Callee) {
+ getOrCreateAAFor<AAIndirectCallInfo>(CBFnPos);
return true;
+ }
// Every call site can track active assumptions.
getOrCreateAAFor<AAAssumptionInfo>(CBFnPos);
@@ -3498,14 +3611,13 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
};
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
- bool Success;
+ [[maybe_unused]] bool Success;
bool UsedAssumedInformation = false;
Success = checkForAllInstructionsImpl(
nullptr, OpcodeInstMap, CallSitePred, nullptr, nullptr,
{(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
(unsigned)Instruction::Call},
UsedAssumedInformation);
- (void)Success;
assert(Success && "Expected the check call to be successful!");
auto LoadStorePred = [&](Instruction &I) -> bool {
@@ -3531,10 +3643,26 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
nullptr, OpcodeInstMap, LoadStorePred, nullptr, nullptr,
{(unsigned)Instruction::Load, (unsigned)Instruction::Store},
UsedAssumedInformation);
- (void)Success;
+ assert(Success && "Expected the check call to be successful!");
+
+ // AllocaInstPredicate
+ auto AAAllocationInfoPred = [&](Instruction &I) -> bool {
+ getOrCreateAAFor<AAAllocationInfo>(IRPosition::value(I));
+ return true;
+ };
+
+ Success = checkForAllInstructionsImpl(
+ nullptr, OpcodeInstMap, AAAllocationInfoPred, nullptr, nullptr,
+ {(unsigned)Instruction::Alloca}, UsedAssumedInformation);
assert(Success && "Expected the check call to be successful!");
}
+bool Attributor::isClosedWorldModule() const {
+ if (CloseWorldAssumption.getNumOccurrences())
+ return CloseWorldAssumption;
+ return isModulePass() && Configuration.IsClosedWorldModule;
+}
+
/// Helpers to ease debugging through output streams and print calls.
///
///{
@@ -3696,6 +3824,26 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
AttributorConfig AC(CGUpdater);
AC.IsModulePass = IsModulePass;
AC.DeleteFns = DeleteFns;
+
+ /// Tracking callback for specialization of indirect calls.
+ DenseMap<CallBase *, std::unique_ptr<SmallPtrSet<Function *, 8>>>
+ IndirectCalleeTrackingMap;
+ if (MaxSpecializationPerCB.getNumOccurrences()) {
+ AC.IndirectCalleeSpecializationCallback =
+ [&](Attributor &, const AbstractAttribute &AA, CallBase &CB,
+ Function &Callee) {
+ if (MaxSpecializationPerCB == 0)
+ return false;
+ auto &Set = IndirectCalleeTrackingMap[&CB];
+ if (!Set)
+ Set = std::make_unique<SmallPtrSet<Function *, 8>>();
+ if (Set->size() >= MaxSpecializationPerCB)
+ return Set->contains(&Callee);
+ Set->insert(&Callee);
+ return true;
+ };
+ }
+
Attributor A(Functions, InfoCache, AC);
// Create shallow wrappers for all functions that are not IPO amendable
@@ -3759,6 +3907,88 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
return Changed == ChangeStatus::CHANGED;
}
+static bool runAttributorLightOnFunctions(InformationCache &InfoCache,
+ SetVector<Function *> &Functions,
+ AnalysisGetter &AG,
+ CallGraphUpdater &CGUpdater,
+ FunctionAnalysisManager &FAM,
+ bool IsModulePass) {
+ if (Functions.empty())
+ return false;
+
+ LLVM_DEBUG({
+ dbgs() << "[AttributorLight] Run on module with " << Functions.size()
+ << " functions:\n";
+ for (Function *Fn : Functions)
+ dbgs() << " - " << Fn->getName() << "\n";
+ });
+
+ // Create an Attributor and initially empty information cache that is filled
+ // while we identify default attribute opportunities.
+ AttributorConfig AC(CGUpdater);
+ AC.IsModulePass = IsModulePass;
+ AC.DeleteFns = false;
+ DenseSet<const char *> Allowed(
+ {&AAWillReturn::ID, &AANoUnwind::ID, &AANoRecurse::ID, &AANoSync::ID,
+ &AANoFree::ID, &AANoReturn::ID, &AAMemoryLocation::ID,
+ &AAMemoryBehavior::ID, &AAUnderlyingObjects::ID, &AANoCapture::ID,
+ &AAInterFnReachability::ID, &AAIntraFnReachability::ID, &AACallEdges::ID,
+ &AANoFPClass::ID, &AAMustProgress::ID, &AANonNull::ID});
+ AC.Allowed = &Allowed;
+ AC.UseLiveness = false;
+
+ Attributor A(Functions, InfoCache, AC);
+
+ for (Function *F : Functions) {
+ if (F->hasExactDefinition())
+ NumFnWithExactDefinition++;
+ else
+ NumFnWithoutExactDefinition++;
+
+ // We look at internal functions only on-demand but if any use is not a
+ // direct call or outside the current set of analyzed functions, we have
+ // to do it eagerly.
+ if (F->hasLocalLinkage()) {
+ if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
+ const auto *CB = dyn_cast<CallBase>(U.getUser());
+ return CB && CB->isCallee(&U) &&
+ Functions.count(const_cast<Function *>(CB->getCaller()));
+ }))
+ continue;
+ }
+
+ // Populate the Attributor with abstract attribute opportunities in the
+ // function and the information cache with IR information.
+ A.identifyDefaultAbstractAttributes(*F);
+ }
+
+ ChangeStatus Changed = A.run();
+
+ if (Changed == ChangeStatus::CHANGED) {
+ // Invalidate analyses for modified functions so that we don't have to
+ // invalidate all analyses for all functions in this SCC.
+ PreservedAnalyses FuncPA;
+ // We haven't changed the CFG for modified functions.
+ FuncPA.preserveSet<CFGAnalyses>();
+ for (Function *Changed : A.getModifiedFunctions()) {
+ FAM.invalidate(*Changed, FuncPA);
+ // Also invalidate any direct callers of changed functions since analyses
+ // may care about attributes of direct callees. For example, MemorySSA
+ // cares about whether or not a call's callee modifies memory and queries
+ // that through function attributes.
+ for (auto *U : Changed->users()) {
+ if (auto *Call = dyn_cast<CallBase>(U)) {
+ if (Call->getCalledFunction() == Changed)
+ FAM.invalidate(*Call->getFunction(), FuncPA);
+ }
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
+ << " functions, result: " << Changed << ".\n");
+ return Changed == ChangeStatus::CHANGED;
+}
+
void AADepGraph::viewGraph() { llvm::ViewGraph(this, "Dependency Graph"); }
void AADepGraph::dumpGraph() {
@@ -3839,6 +4069,62 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
return PreservedAnalyses::all();
}
+PreservedAnalyses AttributorLightPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ AnalysisGetter AG(FAM, /* CachedOnly */ true);
+
+ SetVector<Function *> Functions;
+ for (Function &F : M)
+ Functions.insert(&F);
+
+ CallGraphUpdater CGUpdater;
+ BumpPtrAllocator Allocator;
+ InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
+ if (runAttributorLightOnFunctions(InfoCache, Functions, AG, CGUpdater, FAM,
+ /* IsModulePass */ true)) {
+ PreservedAnalyses PA;
+ // We have not added or removed functions.
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We already invalidated all relevant function analyses above.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses AttributorLightCGSCCPass::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+ AnalysisGetter AG(FAM);
+
+ SetVector<Function *> Functions;
+ for (LazyCallGraph::Node &N : C)
+ Functions.insert(&N.getFunction());
+
+ if (Functions.empty())
+ return PreservedAnalyses::all();
+
+ Module &M = *Functions.back()->getParent();
+ CallGraphUpdater CGUpdater;
+ CGUpdater.initialize(CG, C, AM, UR);
+ BumpPtrAllocator Allocator;
+ InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
+ if (runAttributorLightOnFunctions(InfoCache, Functions, AG, CGUpdater, FAM,
+ /* IsModulePass */ false)) {
+ PreservedAnalyses PA;
+ // We have not added or removed functions.
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We already invalidated all relevant function analyses above.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
namespace llvm {
template <> struct GraphTraits<AADepGraphNode *> {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3a9a89d61355..8e1f782f7cd8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -55,6 +55,7 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
@@ -64,12 +65,16 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
#include <numeric>
#include <optional>
+#include <string>
using namespace llvm;
@@ -188,6 +193,10 @@ PIPE_OPERATOR(AAPointerInfo)
PIPE_OPERATOR(AAAssumptionInfo)
PIPE_OPERATOR(AAUnderlyingObjects)
PIPE_OPERATOR(AAAddressSpace)
+PIPE_OPERATOR(AAAllocationInfo)
+PIPE_OPERATOR(AAIndirectCallInfo)
+PIPE_OPERATOR(AAGlobalValueInfo)
+PIPE_OPERATOR(AADenormalFPMath)
#undef PIPE_OPERATOR
@@ -281,20 +290,19 @@ static const Value *getPointerOperand(const Instruction *I,
return nullptr;
}
-/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and
-/// advanced by \p Offset bytes. To aid later analysis the method tries to build
+/// Helper function to create a pointer based on \p Ptr, and advanced by \p
+/// Offset bytes. To aid later analysis the method tries to build
/// getelement pointer instructions that traverse the natural type of \p Ptr if
/// possible. If that fails, the remaining offset is adjusted byte-wise, hence
/// through a cast to i8*.
///
/// TODO: This could probably live somewhere more prominantly if it doesn't
/// already exist.
-static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
- int64_t Offset, IRBuilder<NoFolder> &IRB,
- const DataLayout &DL) {
+static Value *constructPointer(Type *PtrElemTy, Value *Ptr, int64_t Offset,
+ IRBuilder<NoFolder> &IRB, const DataLayout &DL) {
assert(Offset >= 0 && "Negative offset not supported yet!");
LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset
- << "-bytes as " << *ResTy << "\n");
+ << "-bytes\n");
if (Offset) {
Type *Ty = PtrElemTy;
@@ -313,16 +321,11 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
// If an offset is left we use byte-wise adjustment.
if (IntOffset != 0) {
- Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(IntOffset),
GEPName + ".b" + Twine(IntOffset.getZExtValue()));
}
}
- // Ensure the result has the requested type.
- Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, ResTy,
- Ptr->getName() + ".cast");
-
LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
return Ptr;
}
@@ -377,7 +380,7 @@ getMinimalBaseOfPointer(Attributor &A, const AbstractAttribute &QueryingAA,
/// Clamp the information known for all returned values of a function
/// (identified by \p QueryingAA) into \p S.
template <typename AAType, typename StateType = typename AAType::StateType,
- Attribute::AttrKind IRAttributeKind = Attribute::None,
+ Attribute::AttrKind IRAttributeKind = AAType::IRAttributeKind,
bool RecurseForSelectAndPHI = true>
static void clampReturnedValueStates(
Attributor &A, const AAType &QueryingAA, StateType &S,
@@ -400,7 +403,7 @@ static void clampReturnedValueStates(
auto CheckReturnValue = [&](Value &RV) -> bool {
const IRPosition &RVPos = IRPosition::value(RV, CBContext);
// If possible, use the hasAssumedIRAttr interface.
- if (IRAttributeKind != Attribute::None) {
+ if (Attribute::isEnumAttrKind(IRAttributeKind)) {
bool IsKnown;
return AA::hasAssumedIRAttr<IRAttributeKind>(
A, &QueryingAA, RVPos, DepClassTy::REQUIRED, IsKnown);
@@ -434,7 +437,7 @@ namespace {
template <typename AAType, typename BaseType,
typename StateType = typename BaseType::StateType,
bool PropagateCallBaseContext = false,
- Attribute::AttrKind IRAttributeKind = Attribute::None,
+ Attribute::AttrKind IRAttributeKind = AAType::IRAttributeKind,
bool RecurseForSelectAndPHI = true>
struct AAReturnedFromReturnedValues : public BaseType {
AAReturnedFromReturnedValues(const IRPosition &IRP, Attributor &A)
@@ -455,7 +458,7 @@ struct AAReturnedFromReturnedValues : public BaseType {
/// Clamp the information known at all call sites for a given argument
/// (identified by \p QueryingAA) into \p S.
template <typename AAType, typename StateType = typename AAType::StateType,
- Attribute::AttrKind IRAttributeKind = Attribute::None>
+ Attribute::AttrKind IRAttributeKind = AAType::IRAttributeKind>
static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
StateType &S) {
LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for "
@@ -480,7 +483,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
return false;
// If possible, use the hasAssumedIRAttr interface.
- if (IRAttributeKind != Attribute::None) {
+ if (Attribute::isEnumAttrKind(IRAttributeKind)) {
bool IsKnown;
return AA::hasAssumedIRAttr<IRAttributeKind>(
A, &QueryingAA, ACSArgPos, DepClassTy::REQUIRED, IsKnown);
@@ -514,7 +517,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
/// context.
template <typename AAType, typename BaseType,
typename StateType = typename AAType::StateType,
- Attribute::AttrKind IRAttributeKind = Attribute::None>
+ Attribute::AttrKind IRAttributeKind = AAType::IRAttributeKind>
bool getArgumentStateFromCallBaseContext(Attributor &A,
BaseType &QueryingAttribute,
IRPosition &Pos, StateType &State) {
@@ -529,7 +532,7 @@ bool getArgumentStateFromCallBaseContext(Attributor &A,
const IRPosition CBArgPos = IRPosition::callsite_argument(*CBContext, ArgNo);
// If possible, use the hasAssumedIRAttr interface.
- if (IRAttributeKind != Attribute::None) {
+ if (Attribute::isEnumAttrKind(IRAttributeKind)) {
bool IsKnown;
return AA::hasAssumedIRAttr<IRAttributeKind>(
A, &QueryingAttribute, CBArgPos, DepClassTy::REQUIRED, IsKnown);
@@ -555,7 +558,7 @@ bool getArgumentStateFromCallBaseContext(Attributor &A,
template <typename AAType, typename BaseType,
typename StateType = typename AAType::StateType,
bool BridgeCallBaseContext = false,
- Attribute::AttrKind IRAttributeKind = Attribute::None>
+ Attribute::AttrKind IRAttributeKind = AAType::IRAttributeKind>
struct AAArgumentFromCallSiteArguments : public BaseType {
AAArgumentFromCallSiteArguments(const IRPosition &IRP, Attributor &A)
: BaseType(IRP, A) {}
@@ -585,45 +588,55 @@ struct AAArgumentFromCallSiteArguments : public BaseType {
template <typename AAType, typename BaseType,
typename StateType = typename BaseType::StateType,
bool IntroduceCallBaseContext = false,
- Attribute::AttrKind IRAttributeKind = Attribute::None>
-struct AACallSiteReturnedFromReturned : public BaseType {
- AACallSiteReturnedFromReturned(const IRPosition &IRP, Attributor &A)
- : BaseType(IRP, A) {}
+ Attribute::AttrKind IRAttributeKind = AAType::IRAttributeKind>
+struct AACalleeToCallSite : public BaseType {
+ AACalleeToCallSite(const IRPosition &IRP, Attributor &A) : BaseType(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- assert(this->getIRPosition().getPositionKind() ==
- IRPosition::IRP_CALL_SITE_RETURNED &&
- "Can only wrap function returned positions for call site returned "
- "positions!");
+ auto IRPKind = this->getIRPosition().getPositionKind();
+ assert((IRPKind == IRPosition::IRP_CALL_SITE_RETURNED ||
+ IRPKind == IRPosition::IRP_CALL_SITE) &&
+ "Can only wrap function returned positions for call site "
+ "returned positions!");
auto &S = this->getState();
- const Function *AssociatedFunction =
- this->getIRPosition().getAssociatedFunction();
- if (!AssociatedFunction)
- return S.indicatePessimisticFixpoint();
-
- CallBase &CBContext = cast<CallBase>(this->getAnchorValue());
+ CallBase &CB = cast<CallBase>(this->getAnchorValue());
if (IntroduceCallBaseContext)
- LLVM_DEBUG(dbgs() << "[Attributor] Introducing call base context:"
- << CBContext << "\n");
-
- IRPosition FnPos = IRPosition::returned(
- *AssociatedFunction, IntroduceCallBaseContext ? &CBContext : nullptr);
+ LLVM_DEBUG(dbgs() << "[Attributor] Introducing call base context:" << CB
+ << "\n");
- // If possible, use the hasAssumedIRAttr interface.
- if (IRAttributeKind != Attribute::None) {
- bool IsKnown;
- if (!AA::hasAssumedIRAttr<IRAttributeKind>(A, this, FnPos,
- DepClassTy::REQUIRED, IsKnown))
- return S.indicatePessimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ auto CalleePred = [&](ArrayRef<const Function *> Callees) {
+ for (const Function *Callee : Callees) {
+ IRPosition FnPos =
+ IRPKind == llvm::IRPosition::IRP_CALL_SITE_RETURNED
+ ? IRPosition::returned(*Callee,
+ IntroduceCallBaseContext ? &CB : nullptr)
+ : IRPosition::function(
+ *Callee, IntroduceCallBaseContext ? &CB : nullptr);
+ // If possible, use the hasAssumedIRAttr interface.
+ if (Attribute::isEnumAttrKind(IRAttributeKind)) {
+ bool IsKnown;
+ if (!AA::hasAssumedIRAttr<IRAttributeKind>(
+ A, this, FnPos, DepClassTy::REQUIRED, IsKnown))
+ return false;
+ continue;
+ }
- const AAType *AA = A.getAAFor<AAType>(*this, FnPos, DepClassTy::REQUIRED);
- if (!AA)
+ const AAType *AA =
+ A.getAAFor<AAType>(*this, FnPos, DepClassTy::REQUIRED);
+ if (!AA)
+ return false;
+ Changed |= clampStateAndIndicateChange(S, AA->getState());
+ if (S.isAtFixpoint())
+ return S.isValidState();
+ }
+ return true;
+ };
+ if (!A.checkForAllCallees(CalleePred, *this, CB))
return S.indicatePessimisticFixpoint();
- return clampStateAndIndicateChange(S, AA->getState());
+ return Changed;
}
};
@@ -865,11 +878,9 @@ struct AA::PointerInfo::State : public AbstractState {
AAPointerInfo::AccessKind Kind, Type *Ty,
Instruction *RemoteI = nullptr);
- using OffsetBinsTy = DenseMap<RangeTy, SmallSet<unsigned, 4>>;
-
- using const_bin_iterator = OffsetBinsTy::const_iterator;
- const_bin_iterator begin() const { return OffsetBins.begin(); }
- const_bin_iterator end() const { return OffsetBins.end(); }
+ AAPointerInfo::const_bin_iterator begin() const { return OffsetBins.begin(); }
+ AAPointerInfo::const_bin_iterator end() const { return OffsetBins.end(); }
+ int64_t numOffsetBins() const { return OffsetBins.size(); }
const AAPointerInfo::Access &getAccess(unsigned Index) const {
return AccessList[Index];
@@ -889,7 +900,7 @@ protected:
// are all combined into a single Access object. This may result in loss of
// information in RangeTy in the Access object.
SmallVector<AAPointerInfo::Access> AccessList;
- OffsetBinsTy OffsetBins;
+ AAPointerInfo::OffsetBinsTy OffsetBins;
DenseMap<const Instruction *, SmallVector<unsigned>> RemoteIMap;
/// See AAPointerInfo::forallInterferingAccesses.
@@ -1093,6 +1104,12 @@ struct AAPointerInfoImpl
return AAPointerInfo::manifest(A);
}
+ virtual const_bin_iterator begin() const override { return State::begin(); }
+ virtual const_bin_iterator end() const override { return State::end(); }
+ virtual int64_t numOffsetBins() const override {
+ return State::numOffsetBins();
+ }
+
bool forallInterferingAccesses(
AA::RangeTy Range,
function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
@@ -1104,7 +1121,8 @@ struct AAPointerInfoImpl
Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
bool FindInterferingWrites, bool FindInterferingReads,
function_ref<bool(const Access &, bool)> UserCB, bool &HasBeenWrittenTo,
- AA::RangeTy &Range) const override {
+ AA::RangeTy &Range,
+ function_ref<bool(const Access &)> SkipCB) const override {
HasBeenWrittenTo = false;
SmallPtrSet<const Access *, 8> DominatingWrites;
@@ -1183,6 +1201,11 @@ struct AAPointerInfoImpl
A, this, IRPosition::function(Scope), DepClassTy::OPTIONAL,
IsKnownNoRecurse);
+ // TODO: Use reaching kernels from AAKernelInfo (or move it to
+ // AAExecutionDomain) such that we allow scopes other than kernels as long
+ // as the reaching kernels are disjoint.
+ bool InstInKernel = Scope.hasFnAttribute("kernel");
+ bool ObjHasKernelLifetime = false;
const bool UseDominanceReasoning =
FindInterferingWrites && IsKnownNoRecurse;
const DominatorTree *DT =
@@ -1215,6 +1238,7 @@ struct AAPointerInfoImpl
// If the alloca containing function is not recursive the alloca
// must be dead in the callee.
const Function *AIFn = AI->getFunction();
+ ObjHasKernelLifetime = AIFn->hasFnAttribute("kernel");
bool IsKnownNoRecurse;
if (AA::hasAssumedIRAttr<Attribute::NoRecurse>(
A, this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL,
@@ -1224,7 +1248,8 @@ struct AAPointerInfoImpl
} else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) {
// If the global has kernel lifetime we can stop if we reach a kernel
// as it is "dead" in the (unknown) callees.
- if (HasKernelLifetime(GV, *GV->getParent()))
+ ObjHasKernelLifetime = HasKernelLifetime(GV, *GV->getParent());
+ if (ObjHasKernelLifetime)
IsLiveInCalleeCB = [](const Function &Fn) {
return !Fn.hasFnAttribute("kernel");
};
@@ -1235,6 +1260,15 @@ struct AAPointerInfoImpl
AA::InstExclusionSetTy ExclusionSet;
auto AccessCB = [&](const Access &Acc, bool Exact) {
+ Function *AccScope = Acc.getRemoteInst()->getFunction();
+ bool AccInSameScope = AccScope == &Scope;
+
+ // If the object has kernel lifetime we can ignore accesses only reachable
+ // by other kernels. For now we only skip accesses *in* other kernels.
+ if (InstInKernel && ObjHasKernelLifetime && !AccInSameScope &&
+ AccScope->hasFnAttribute("kernel"))
+ return true;
+
if (Exact && Acc.isMustAccess() && Acc.getRemoteInst() != &I) {
if (Acc.isWrite() || (isa<LoadInst>(I) && Acc.isWriteOrAssumption()))
ExclusionSet.insert(Acc.getRemoteInst());
@@ -1245,8 +1279,7 @@ struct AAPointerInfoImpl
return true;
bool Dominates = FindInterferingWrites && DT && Exact &&
- Acc.isMustAccess() &&
- (Acc.getRemoteInst()->getFunction() == &Scope) &&
+ Acc.isMustAccess() && AccInSameScope &&
DT->dominates(Acc.getRemoteInst(), &I);
if (Dominates)
DominatingWrites.insert(&Acc);
@@ -1276,6 +1309,8 @@ struct AAPointerInfoImpl
// Helper to determine if we can skip a specific write access.
auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
+ if (SkipCB && SkipCB(Acc))
+ return true;
if (!CanIgnoreThreading(Acc))
return false;
@@ -1817,9 +1852,14 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Assumption found "
<< *Assumption.second << ": " << *LoadI
<< " == " << *Assumption.first << "\n");
-
+ bool UsedAssumedInformation = false;
+ std::optional<Value *> Content = nullptr;
+ if (Assumption.first)
+ Content =
+ A.getAssumedSimplified(*Assumption.first, *this,
+ UsedAssumedInformation, AA::Interprocedural);
return handleAccess(
- A, *Assumption.second, Assumption.first, AccessKind::AK_ASSUMPTION,
+ A, *Assumption.second, Content, AccessKind::AK_ASSUMPTION,
OffsetInfoMap[CurPtr].Offsets, Changed, *LoadI->getType());
}
@@ -2083,24 +2123,10 @@ struct AANoUnwindFunction final : public AANoUnwindImpl {
};
/// NoUnwind attribute deduction for a call sites.
-struct AANoUnwindCallSite final : AANoUnwindImpl {
+struct AANoUnwindCallSite final
+ : AACalleeToCallSite<AANoUnwind, AANoUnwindImpl> {
AANoUnwindCallSite(const IRPosition &IRP, Attributor &A)
- : AANoUnwindImpl(IRP, A) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- bool IsKnownNoUnwind;
- if (AA::hasAssumedIRAttr<Attribute::NoUnwind>(
- A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoUnwind))
- return ChangeStatus::UNCHANGED;
- return indicatePessimisticFixpoint();
- }
+ : AACalleeToCallSite<AANoUnwind, AANoUnwindImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
@@ -2200,8 +2226,15 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
if (I.mayReadOrWriteMemory())
return true;
+ bool IsKnown;
+ CallBase &CB = cast<CallBase>(I);
+ if (AA::hasAssumedIRAttr<Attribute::NoSync>(
+ A, this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL,
+ IsKnown))
+ return true;
+
// non-convergent and readnone imply nosync.
- return !cast<CallBase>(I).isConvergent();
+ return !CB.isConvergent();
};
bool UsedAssumedInformation = false;
@@ -2223,24 +2256,9 @@ struct AANoSyncFunction final : public AANoSyncImpl {
};
/// NoSync attribute deduction for a call sites.
-struct AANoSyncCallSite final : AANoSyncImpl {
+struct AANoSyncCallSite final : AACalleeToCallSite<AANoSync, AANoSyncImpl> {
AANoSyncCallSite(const IRPosition &IRP, Attributor &A)
- : AANoSyncImpl(IRP, A) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- bool IsKnownNoSycn;
- if (AA::hasAssumedIRAttr<Attribute::NoSync>(
- A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoSycn))
- return ChangeStatus::UNCHANGED;
- return indicatePessimisticFixpoint();
- }
+ : AACalleeToCallSite<AANoSync, AANoSyncImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); }
@@ -2292,24 +2310,9 @@ struct AANoFreeFunction final : public AANoFreeImpl {
};
/// NoFree attribute deduction for a call sites.
-struct AANoFreeCallSite final : AANoFreeImpl {
+struct AANoFreeCallSite final : AACalleeToCallSite<AANoFree, AANoFreeImpl> {
AANoFreeCallSite(const IRPosition &IRP, Attributor &A)
- : AANoFreeImpl(IRP, A) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- bool IsKnown;
- if (AA::hasAssumedIRAttr<Attribute::NoFree>(A, this, FnPos,
- DepClassTy::REQUIRED, IsKnown))
- return ChangeStatus::UNCHANGED;
- return indicatePessimisticFixpoint();
- }
+ : AACalleeToCallSite<AANoFree, AANoFreeImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); }
@@ -2450,9 +2453,6 @@ bool AANonNull::isImpliedByIR(Attributor &A, const IRPosition &IRP,
if (A.hasAttr(IRP, AttrKinds, IgnoreSubsumingPositions, Attribute::NonNull))
return true;
- if (IRP.getPositionKind() == IRP_RETURNED)
- return false;
-
DominatorTree *DT = nullptr;
AssumptionCache *AC = nullptr;
InformationCache &InfoCache = A.getInfoCache();
@@ -2463,9 +2463,27 @@ bool AANonNull::isImpliedByIR(Attributor &A, const IRPosition &IRP,
}
}
- if (!isKnownNonZero(&IRP.getAssociatedValue(), A.getDataLayout(), 0, AC,
- IRP.getCtxI(), DT))
+ SmallVector<AA::ValueAndContext> Worklist;
+ if (IRP.getPositionKind() != IRP_RETURNED) {
+ Worklist.push_back({IRP.getAssociatedValue(), IRP.getCtxI()});
+ } else {
+ bool UsedAssumedInformation = false;
+ if (!A.checkForAllInstructions(
+ [&](Instruction &I) {
+ Worklist.push_back({*cast<ReturnInst>(I).getReturnValue(), &I});
+ return true;
+ },
+ IRP.getAssociatedFunction(), nullptr, {Instruction::Ret},
+ UsedAssumedInformation))
+ return false;
+ }
+
+ if (llvm::any_of(Worklist, [&](AA::ValueAndContext VAC) {
+ return !isKnownNonZero(VAC.getValue(), A.getDataLayout(), 0, AC,
+ VAC.getCtxI(), DT);
+ }))
return false;
+
A.manifestAttrs(IRP, {Attribute::get(IRP.getAnchorValue().getContext(),
Attribute::NonNull)});
return true;
@@ -2529,7 +2547,8 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
}
std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
- if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile())
+ if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() ||
+ Loc->Size.isScalable() || I->isVolatile())
return 0;
int64_t Offset;
@@ -2610,6 +2629,23 @@ struct AANonNullFloating : public AANonNullImpl {
Values.size() != 1 || Values.front().getValue() != AssociatedValue;
if (!Stripped) {
+ bool IsKnown;
+ if (auto *PHI = dyn_cast<PHINode>(AssociatedValue))
+ if (llvm::all_of(PHI->incoming_values(), [&](Value *Op) {
+ return AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, IRPosition::value(*Op), DepClassTy::OPTIONAL,
+ IsKnown);
+ }))
+ return ChangeStatus::UNCHANGED;
+ if (auto *Select = dyn_cast<SelectInst>(AssociatedValue))
+ if (AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, IRPosition::value(*Select->getFalseValue()),
+ DepClassTy::OPTIONAL, IsKnown) &&
+ AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, IRPosition::value(*Select->getTrueValue()),
+ DepClassTy::OPTIONAL, IsKnown))
+ return ChangeStatus::UNCHANGED;
+
// If we haven't stripped anything we might still be able to use a
// different AA, but only if the IRP changes. Effectively when we
// interpret this not as a call site value but as a floating/argument
@@ -2634,10 +2670,11 @@ struct AANonNullFloating : public AANonNullImpl {
/// NonNull attribute for function return value.
struct AANonNullReturned final
: AAReturnedFromReturnedValues<AANonNull, AANonNull, AANonNull::StateType,
- false, AANonNull::IRAttributeKind> {
+ false, AANonNull::IRAttributeKind, false> {
AANonNullReturned(const IRPosition &IRP, Attributor &A)
: AAReturnedFromReturnedValues<AANonNull, AANonNull, AANonNull::StateType,
- false, Attribute::NonNull>(IRP, A) {}
+ false, Attribute::NonNull, false>(IRP, A) {
+ }
/// See AbstractAttribute::getAsStr().
const std::string getAsStr(Attributor *A) const override {
@@ -2650,13 +2687,9 @@ struct AANonNullReturned final
/// NonNull attribute for function argument.
struct AANonNullArgument final
- : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl,
- AANonNull::StateType, false,
- AANonNull::IRAttributeKind> {
+ : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl> {
AANonNullArgument(const IRPosition &IRP, Attributor &A)
- : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl,
- AANonNull::StateType, false,
- AANonNull::IRAttributeKind>(IRP, A) {}
+ : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) }
@@ -2672,13 +2705,9 @@ struct AANonNullCallSiteArgument final : AANonNullFloating {
/// NonNull attribute for a call site return position.
struct AANonNullCallSiteReturned final
- : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl,
- AANonNull::StateType, false,
- AANonNull::IRAttributeKind> {
+ : AACalleeToCallSite<AANonNull, AANonNullImpl> {
AANonNullCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl,
- AANonNull::StateType, false,
- AANonNull::IRAttributeKind>(IRP, A) {}
+ : AACalleeToCallSite<AANonNull, AANonNullImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
@@ -2830,24 +2859,10 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
};
/// NoRecurse attribute deduction for a call sites.
-struct AANoRecurseCallSite final : AANoRecurseImpl {
+struct AANoRecurseCallSite final
+ : AACalleeToCallSite<AANoRecurse, AANoRecurseImpl> {
AANoRecurseCallSite(const IRPosition &IRP, Attributor &A)
- : AANoRecurseImpl(IRP, A) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- bool IsKnownNoRecurse;
- if (!AA::hasAssumedIRAttr<Attribute::NoRecurse>(
- A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoRecurse))
- return indicatePessimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
+ : AACalleeToCallSite<AANoRecurse, AANoRecurseImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); }
@@ -3355,26 +3370,17 @@ struct AAWillReturnFunction final : AAWillReturnImpl {
};
/// WillReturn attribute deduction for a call sites.
-struct AAWillReturnCallSite final : AAWillReturnImpl {
+struct AAWillReturnCallSite final
+ : AACalleeToCallSite<AAWillReturn, AAWillReturnImpl> {
AAWillReturnCallSite(const IRPosition &IRP, Attributor &A)
- : AAWillReturnImpl(IRP, A) {}
+ : AACalleeToCallSite<AAWillReturn, AAWillReturnImpl>(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
if (isImpliedByMustprogressAndReadonly(A, /* KnownOnly */ false))
return ChangeStatus::UNCHANGED;
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- bool IsKnown;
- if (AA::hasAssumedIRAttr<Attribute::WillReturn>(
- A, this, FnPos, DepClassTy::REQUIRED, IsKnown))
- return ChangeStatus::UNCHANGED;
- return indicatePessimisticFixpoint();
+ return AACalleeToCallSite::updateImpl(A);
}
/// See AbstractAttribute::trackStatistics()
@@ -3402,6 +3408,18 @@ template <typename ToTy> struct ReachabilityQueryInfo {
/// and remember if it worked:
Reachable Result = Reachable::No;
+ /// Precomputed hash for this RQI.
+ unsigned Hash = 0;
+
+ unsigned computeHashValue() const {
+ assert(Hash == 0 && "Computed hash twice!");
+ using InstSetDMI = DenseMapInfo<const AA::InstExclusionSetTy *>;
+ using PairDMI = DenseMapInfo<std::pair<const Instruction *, const ToTy *>>;
+ return const_cast<ReachabilityQueryInfo<ToTy> *>(this)->Hash =
+ detail::combineHashValue(PairDMI ::getHashValue({From, To}),
+ InstSetDMI::getHashValue(ExclusionSet));
+ }
+
ReachabilityQueryInfo(const Instruction *From, const ToTy *To)
: From(From), To(To) {}
@@ -3435,9 +3453,7 @@ template <typename ToTy> struct DenseMapInfo<ReachabilityQueryInfo<ToTy> *> {
return &TombstoneKey;
}
static unsigned getHashValue(const ReachabilityQueryInfo<ToTy> *RQI) {
- unsigned H = PairDMI ::getHashValue({RQI->From, RQI->To});
- H += InstSetDMI::getHashValue(RQI->ExclusionSet);
- return H;
+ return RQI->Hash ? RQI->Hash : RQI->computeHashValue();
}
static bool isEqual(const ReachabilityQueryInfo<ToTy> *LHS,
const ReachabilityQueryInfo<ToTy> *RHS) {
@@ -3480,24 +3496,24 @@ struct CachedReachabilityAA : public BaseTy {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- InUpdate = true;
for (unsigned u = 0, e = QueryVector.size(); u < e; ++u) {
RQITy *RQI = QueryVector[u];
- if (RQI->Result == RQITy::Reachable::No && isReachableImpl(A, *RQI))
+ if (RQI->Result == RQITy::Reachable::No &&
+ isReachableImpl(A, *RQI, /*IsTemporaryRQI=*/false))
Changed = ChangeStatus::CHANGED;
}
- InUpdate = false;
return Changed;
}
- virtual bool isReachableImpl(Attributor &A, RQITy &RQI) = 0;
+ virtual bool isReachableImpl(Attributor &A, RQITy &RQI,
+ bool IsTemporaryRQI) = 0;
bool rememberResult(Attributor &A, typename RQITy::Reachable Result,
- RQITy &RQI, bool UsedExclusionSet) {
+ RQITy &RQI, bool UsedExclusionSet, bool IsTemporaryRQI) {
RQI.Result = Result;
// Remove the temporary RQI from the cache.
- if (!InUpdate)
+ if (IsTemporaryRQI)
QueryCache.erase(&RQI);
// Insert a plain RQI (w/o exclusion set) if that makes sense. Two options:
@@ -3515,7 +3531,7 @@ struct CachedReachabilityAA : public BaseTy {
}
// Check if we need to insert a new permanent RQI with the exclusion set.
- if (!InUpdate && Result != RQITy::Reachable::Yes && UsedExclusionSet) {
+ if (IsTemporaryRQI && Result != RQITy::Reachable::Yes && UsedExclusionSet) {
assert((!RQI.ExclusionSet || !RQI.ExclusionSet->empty()) &&
"Did not expect empty set!");
RQITy *RQIPtr = new (A.Allocator)
@@ -3527,7 +3543,7 @@ struct CachedReachabilityAA : public BaseTy {
QueryCache.insert(RQIPtr);
}
- if (Result == RQITy::Reachable::No && !InUpdate)
+ if (Result == RQITy::Reachable::No && IsTemporaryRQI)
A.registerForUpdate(*this);
return Result == RQITy::Reachable::Yes;
}
@@ -3568,7 +3584,6 @@ struct CachedReachabilityAA : public BaseTy {
}
private:
- bool InUpdate = false;
SmallVector<RQITy *> QueryVector;
DenseSet<RQITy *> QueryCache;
};
@@ -3577,7 +3592,10 @@ struct AAIntraFnReachabilityFunction final
: public CachedReachabilityAA<AAIntraFnReachability, Instruction> {
using Base = CachedReachabilityAA<AAIntraFnReachability, Instruction>;
AAIntraFnReachabilityFunction(const IRPosition &IRP, Attributor &A)
- : Base(IRP, A) {}
+ : Base(IRP, A) {
+ DT = A.getInfoCache().getAnalysisResultForFunction<DominatorTreeAnalysis>(
+ *IRP.getAssociatedFunction());
+ }
bool isAssumedReachable(
Attributor &A, const Instruction &From, const Instruction &To,
@@ -3589,7 +3607,8 @@ struct AAIntraFnReachabilityFunction final
RQITy StackRQI(A, From, To, ExclusionSet, false);
typename RQITy::Reachable Result;
if (!NonConstThis->checkQueryCache(A, StackRQI, Result))
- return NonConstThis->isReachableImpl(A, StackRQI);
+ return NonConstThis->isReachableImpl(A, StackRQI,
+ /*IsTemporaryRQI=*/true);
return Result == RQITy::Reachable::Yes;
}
@@ -3598,16 +3617,24 @@ struct AAIntraFnReachabilityFunction final
// of them changed.
auto *LivenessAA =
A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
- if (LivenessAA && llvm::all_of(DeadEdges, [&](const auto &DeadEdge) {
- return LivenessAA->isEdgeDead(DeadEdge.first, DeadEdge.second);
+ if (LivenessAA &&
+ llvm::all_of(DeadEdges,
+ [&](const auto &DeadEdge) {
+ return LivenessAA->isEdgeDead(DeadEdge.first,
+ DeadEdge.second);
+ }) &&
+ llvm::all_of(DeadBlocks, [&](const BasicBlock *BB) {
+ return LivenessAA->isAssumedDead(BB);
})) {
return ChangeStatus::UNCHANGED;
}
DeadEdges.clear();
+ DeadBlocks.clear();
return Base::updateImpl(A);
}
- bool isReachableImpl(Attributor &A, RQITy &RQI) override {
+ bool isReachableImpl(Attributor &A, RQITy &RQI,
+ bool IsTemporaryRQI) override {
const Instruction *Origin = RQI.From;
bool UsedExclusionSet = false;
@@ -3633,31 +3660,41 @@ struct AAIntraFnReachabilityFunction final
// possible.
if (FromBB == ToBB &&
WillReachInBlock(*RQI.From, *RQI.To, RQI.ExclusionSet))
- return rememberResult(A, RQITy::Reachable::Yes, RQI, UsedExclusionSet);
+ return rememberResult(A, RQITy::Reachable::Yes, RQI, UsedExclusionSet,
+ IsTemporaryRQI);
// Check if reaching the ToBB block is sufficient or if even that would not
// ensure reaching the target. In the latter case we are done.
if (!WillReachInBlock(ToBB->front(), *RQI.To, RQI.ExclusionSet))
- return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet);
+ return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet,
+ IsTemporaryRQI);
+ const Function *Fn = FromBB->getParent();
SmallPtrSet<const BasicBlock *, 16> ExclusionBlocks;
if (RQI.ExclusionSet)
for (auto *I : *RQI.ExclusionSet)
- ExclusionBlocks.insert(I->getParent());
+ if (I->getFunction() == Fn)
+ ExclusionBlocks.insert(I->getParent());
// Check if we make it out of the FromBB block at all.
if (ExclusionBlocks.count(FromBB) &&
!WillReachInBlock(*RQI.From, *FromBB->getTerminator(),
RQI.ExclusionSet))
- return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet);
+ return rememberResult(A, RQITy::Reachable::No, RQI, true, IsTemporaryRQI);
+
+ auto *LivenessAA =
+ A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
+ if (LivenessAA && LivenessAA->isAssumedDead(ToBB)) {
+ DeadBlocks.insert(ToBB);
+ return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet,
+ IsTemporaryRQI);
+ }
SmallPtrSet<const BasicBlock *, 16> Visited;
SmallVector<const BasicBlock *, 16> Worklist;
Worklist.push_back(FromBB);
DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> LocalDeadEdges;
- auto *LivenessAA =
- A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
while (!Worklist.empty()) {
const BasicBlock *BB = Worklist.pop_back_val();
if (!Visited.insert(BB).second)
@@ -3669,8 +3706,12 @@ struct AAIntraFnReachabilityFunction final
}
// We checked before if we just need to reach the ToBB block.
if (SuccBB == ToBB)
- return rememberResult(A, RQITy::Reachable::Yes, RQI,
- UsedExclusionSet);
+ return rememberResult(A, RQITy::Reachable::Yes, RQI, UsedExclusionSet,
+ IsTemporaryRQI);
+ if (DT && ExclusionBlocks.empty() && DT->dominates(BB, ToBB))
+ return rememberResult(A, RQITy::Reachable::Yes, RQI, UsedExclusionSet,
+ IsTemporaryRQI);
+
if (ExclusionBlocks.count(SuccBB)) {
UsedExclusionSet = true;
continue;
@@ -3680,16 +3721,24 @@ struct AAIntraFnReachabilityFunction final
}
DeadEdges.insert(LocalDeadEdges.begin(), LocalDeadEdges.end());
- return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet);
+ return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet,
+ IsTemporaryRQI);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
private:
+ // Set of assumed dead blocks we used in the last query. If any changes we
+ // update the state.
+ DenseSet<const BasicBlock *> DeadBlocks;
+
// Set of assumed dead edges we used in the last query. If any changes we
// update the state.
DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> DeadEdges;
+
+ /// The dominator tree of the function to short-circuit reasoning.
+ const DominatorTree *DT = nullptr;
};
} // namespace
@@ -3754,12 +3803,8 @@ struct AANoAliasFloating final : AANoAliasImpl {
/// NoAlias attribute for an argument.
struct AANoAliasArgument final
- : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl,
- AANoAlias::StateType, false,
- Attribute::NoAlias> {
- using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl,
- AANoAlias::StateType, false,
- Attribute::NoAlias>;
+ : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> {
+ using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>;
AANoAliasArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
/// See AbstractAttribute::update(...).
@@ -4027,24 +4072,10 @@ struct AANoAliasReturned final : AANoAliasImpl {
};
/// NoAlias attribute deduction for a call site return value.
-struct AANoAliasCallSiteReturned final : AANoAliasImpl {
+struct AANoAliasCallSiteReturned final
+ : AACalleeToCallSite<AANoAlias, AANoAliasImpl> {
AANoAliasCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AANoAliasImpl(IRP, A) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::returned(*F);
- bool IsKnownNoAlias;
- if (!AA::hasAssumedIRAttr<Attribute::NoAlias>(
- A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoAlias))
- return indicatePessimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
+ : AACalleeToCallSite<AANoAlias, AANoAliasImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); }
@@ -4696,23 +4727,53 @@ identifyAliveSuccessors(Attributor &A, const SwitchInst &SI,
AbstractAttribute &AA,
SmallVectorImpl<const Instruction *> &AliveSuccessors) {
bool UsedAssumedInformation = false;
- std::optional<Constant *> C =
- A.getAssumedConstant(*SI.getCondition(), AA, UsedAssumedInformation);
- if (!C || isa_and_nonnull<UndefValue>(*C)) {
- // No value yet, assume all edges are dead.
- } else if (isa_and_nonnull<ConstantInt>(*C)) {
- for (const auto &CaseIt : SI.cases()) {
- if (CaseIt.getCaseValue() == *C) {
- AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front());
- return UsedAssumedInformation;
- }
- }
- AliveSuccessors.push_back(&SI.getDefaultDest()->front());
+ SmallVector<AA::ValueAndContext> Values;
+ if (!A.getAssumedSimplifiedValues(IRPosition::value(*SI.getCondition()), &AA,
+ Values, AA::AnyScope,
+ UsedAssumedInformation)) {
+ // Something went wrong, assume all successors are live.
+ for (const BasicBlock *SuccBB : successors(SI.getParent()))
+ AliveSuccessors.push_back(&SuccBB->front());
+ return false;
+ }
+
+ if (Values.empty() ||
+ (Values.size() == 1 &&
+ isa_and_nonnull<UndefValue>(Values.front().getValue()))) {
+ // No valid value yet, assume all edges are dead.
return UsedAssumedInformation;
- } else {
+ }
+
+ Type &Ty = *SI.getCondition()->getType();
+ SmallPtrSet<ConstantInt *, 8> Constants;
+ auto CheckForConstantInt = [&](Value *V) {
+ if (auto *CI = dyn_cast_if_present<ConstantInt>(AA::getWithType(*V, Ty))) {
+ Constants.insert(CI);
+ return true;
+ }
+ return false;
+ };
+
+ if (!all_of(Values, [&](AA::ValueAndContext &VAC) {
+ return CheckForConstantInt(VAC.getValue());
+ })) {
for (const BasicBlock *SuccBB : successors(SI.getParent()))
AliveSuccessors.push_back(&SuccBB->front());
+ return UsedAssumedInformation;
}
+
+ unsigned MatchedCases = 0;
+ for (const auto &CaseIt : SI.cases()) {
+ if (Constants.count(CaseIt.getCaseValue())) {
+ ++MatchedCases;
+ AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front());
+ }
+ }
+
+ // If all potential values have been matched, we will not visit the default
+ // case.
+ if (MatchedCases < Constants.size())
+ AliveSuccessors.push_back(&SI.getDefaultDest()->front());
return UsedAssumedInformation;
}
@@ -5103,9 +5164,8 @@ struct AADereferenceableCallSiteArgument final : AADereferenceableFloating {
/// Dereferenceable attribute deduction for a call site return value.
struct AADereferenceableCallSiteReturned final
- : AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl> {
- using Base =
- AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl>;
+ : AACalleeToCallSite<AADereferenceable, AADereferenceableImpl> {
+ using Base = AACalleeToCallSite<AADereferenceable, AADereferenceableImpl>;
AADereferenceableCallSiteReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
@@ -5400,8 +5460,8 @@ struct AAAlignCallSiteArgument final : AAAlignFloating {
/// Align attribute deduction for a call site return value.
struct AAAlignCallSiteReturned final
- : AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl> {
- using Base = AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl>;
+ : AACalleeToCallSite<AAAlign, AAAlignImpl> {
+ using Base = AACalleeToCallSite<AAAlign, AAAlignImpl>;
AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
@@ -5449,24 +5509,10 @@ struct AANoReturnFunction final : AANoReturnImpl {
};
/// NoReturn attribute deduction for a call sites.
-struct AANoReturnCallSite final : AANoReturnImpl {
+struct AANoReturnCallSite final
+ : AACalleeToCallSite<AANoReturn, AANoReturnImpl> {
AANoReturnCallSite(const IRPosition &IRP, Attributor &A)
- : AANoReturnImpl(IRP, A) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- bool IsKnownNoReturn;
- if (!AA::hasAssumedIRAttr<Attribute::NoReturn>(
- A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoReturn))
- return indicatePessimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
+ : AACalleeToCallSite<AANoReturn, AANoReturnImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); }
@@ -5805,8 +5851,8 @@ struct AANoCaptureImpl : public AANoCapture {
// For stores we already checked if we can follow them, if they make it
// here we give up.
if (isa<StoreInst>(UInst))
- return isCapturedIn(State, /* Memory */ true, /* Integer */ false,
- /* Return */ false);
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ true,
+ /* Return */ true);
// Explicitly catch return instructions.
if (isa<ReturnInst>(UInst)) {
@@ -6476,7 +6522,7 @@ struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- return indicatePessimisticFixpoint();
+ return indicatePessimisticFixpoint();
}
void trackStatistics() const override {
@@ -6937,13 +6983,17 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
<< **DI->PotentialAllocationCalls.begin() << "\n");
return false;
}
- Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
- if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) {
- LLVM_DEBUG(
- dbgs()
- << "[H2S] unique free call might not be executed with the allocation "
- << *UniqueFree << "\n");
- return false;
+
+ // __kmpc_alloc_shared and __kmpc_alloc_free are by construction matched.
+ if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared) {
+ Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
+ if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[H2S] unique free call might not be executed with the allocation "
+ << *UniqueFree << "\n");
+ return false;
+ }
}
return true;
};
@@ -7437,19 +7487,16 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
- Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo();
- Value *Ptr =
- constructPointer(PointeeTy, PrivType, &Base,
- PrivStructLayout->getElementOffset(u), IRB, DL);
+ Value *Ptr = constructPointer(
+ PrivType, &Base, PrivStructLayout->getElementOffset(u), IRB, DL);
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
Type *PointeeTy = PrivArrayType->getElementType();
- Type *PointeePtrTy = PointeeTy->getPointerTo();
uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
- Value *Ptr = constructPointer(PointeePtrTy, PrivType, &Base,
- u * PointeeTySize, IRB, DL);
+ Value *Ptr =
+ constructPointer(PrivType, &Base, u * PointeeTySize, IRB, DL);
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else {
@@ -7469,19 +7516,13 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
IRBuilder<NoFolder> IRB(IP);
const DataLayout &DL = IP->getModule()->getDataLayout();
- Type *PrivPtrType = PrivType->getPointerTo();
- if (Base->getType() != PrivPtrType)
- Base = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
- Base, PrivPtrType, "", ACS.getInstruction());
-
// Traverse the type, build GEPs and loads.
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
Type *PointeeTy = PrivStructType->getElementType(u);
- Value *Ptr =
- constructPointer(PointeeTy->getPointerTo(), PrivType, Base,
- PrivStructLayout->getElementOffset(u), IRB, DL);
+ Value *Ptr = constructPointer(
+ PrivType, Base, PrivStructLayout->getElementOffset(u), IRB, DL);
LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
L->setAlignment(Alignment);
ReplacementValues.push_back(L);
@@ -7489,10 +7530,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
Type *PointeeTy = PrivArrayType->getElementType();
uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
- Type *PointeePtrTy = PointeeTy->getPointerTo();
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
- Value *Ptr = constructPointer(PointeePtrTy, PrivType, Base,
- u * PointeeTySize, IRB, DL);
+ Value *Ptr =
+ constructPointer(PrivType, Base, u * PointeeTySize, IRB, DL);
LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
L->setAlignment(Alignment);
ReplacementValues.push_back(L);
@@ -7796,6 +7836,9 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
// Clear existing attributes.
A.removeAttrs(IRP, AttrKinds);
+ // Clear conflicting writable attribute.
+ if (isAssumedReadOnly())
+ A.removeAttrs(IRP, Attribute::Writable);
// Use the generic manifest method.
return IRAttribute::manifest(A);
@@ -7983,6 +8026,10 @@ struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl {
ME = MemoryEffects::writeOnly();
A.removeAttrs(getIRPosition(), AttrKinds);
+ // Clear conflicting writable attribute.
+ if (ME.onlyReadsMemory())
+ for (Argument &Arg : F.args())
+ A.removeAttrs(IRPosition::argument(Arg), Attribute::Writable);
return A.manifestAttrs(getIRPosition(),
Attribute::getWithMemoryEffects(F.getContext(), ME));
}
@@ -7999,24 +8046,10 @@ struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl {
};
/// AAMemoryBehavior attribute for call sites.
-struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
+struct AAMemoryBehaviorCallSite final
+ : AACalleeToCallSite<AAMemoryBehavior, AAMemoryBehaviorImpl> {
AAMemoryBehaviorCallSite(const IRPosition &IRP, Attributor &A)
- : AAMemoryBehaviorImpl(IRP, A) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- auto *FnAA =
- A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::REQUIRED);
- if (!FnAA)
- return indicatePessimisticFixpoint();
- return clampStateAndIndicateChange(getState(), FnAA->getState());
- }
+ : AACalleeToCallSite<AAMemoryBehavior, AAMemoryBehaviorImpl>(IRP, A) {}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
@@ -8031,6 +8064,11 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
ME = MemoryEffects::writeOnly();
A.removeAttrs(getIRPosition(), AttrKinds);
+ // Clear conflicting writable attribute.
+ if (ME.onlyReadsMemory())
+ for (Use &U : CB.args())
+ A.removeAttrs(IRPosition::callsite_argument(CB, U.getOperandNo()),
+ Attribute::Writable);
return A.manifestAttrs(
getIRPosition(), Attribute::getWithMemoryEffects(CB.getContext(), ME));
}
@@ -8821,6 +8859,108 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
};
} // namespace
+/// ------------------ denormal-fp-math Attribute -------------------------
+
+namespace {
+struct AADenormalFPMathImpl : public AADenormalFPMath {
+ AADenormalFPMathImpl(const IRPosition &IRP, Attributor &A)
+ : AADenormalFPMath(IRP, A) {}
+
+ const std::string getAsStr(Attributor *A) const override {
+ std::string Str("AADenormalFPMath[");
+ raw_string_ostream OS(Str);
+
+ DenormalState Known = getKnown();
+ if (Known.Mode.isValid())
+ OS << "denormal-fp-math=" << Known.Mode;
+ else
+ OS << "invalid";
+
+ if (Known.ModeF32.isValid())
+ OS << " denormal-fp-math-f32=" << Known.ModeF32;
+ OS << ']';
+ return OS.str();
+ }
+};
+
+struct AADenormalFPMathFunction final : AADenormalFPMathImpl {
+ AADenormalFPMathFunction(const IRPosition &IRP, Attributor &A)
+ : AADenormalFPMathImpl(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ const Function *F = getAnchorScope();
+ DenormalMode Mode = F->getDenormalModeRaw();
+ DenormalMode ModeF32 = F->getDenormalModeF32Raw();
+
+ // TODO: Handling this here prevents handling the case where a callee has a
+ // fixed denormal-fp-math with dynamic denormal-fp-math-f32, but called from
+ // a function with a fully fixed mode.
+ if (ModeF32 == DenormalMode::getInvalid())
+ ModeF32 = Mode;
+ Known = DenormalState{Mode, ModeF32};
+ if (isModeFixed())
+ indicateFixpoint();
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ auto CheckCallSite = [=, &Change, &A](AbstractCallSite CS) {
+ Function *Caller = CS.getInstruction()->getFunction();
+ LLVM_DEBUG(dbgs() << "[AADenormalFPMath] Call " << Caller->getName()
+ << "->" << getAssociatedFunction()->getName() << '\n');
+
+ const auto *CallerInfo = A.getAAFor<AADenormalFPMath>(
+ *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+ if (!CallerInfo)
+ return false;
+
+ Change = Change | clampStateAndIndicateChange(this->getState(),
+ CallerInfo->getState());
+ return true;
+ };
+
+ bool AllCallSitesKnown = true;
+ if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ if (Change == ChangeStatus::CHANGED && isModeFixed())
+ indicateFixpoint();
+ return Change;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ LLVMContext &Ctx = getAssociatedFunction()->getContext();
+
+ SmallVector<Attribute, 2> AttrToAdd;
+ SmallVector<StringRef, 2> AttrToRemove;
+ if (Known.Mode == DenormalMode::getDefault()) {
+ AttrToRemove.push_back("denormal-fp-math");
+ } else {
+ AttrToAdd.push_back(
+ Attribute::get(Ctx, "denormal-fp-math", Known.Mode.str()));
+ }
+
+ if (Known.ModeF32 != Known.Mode) {
+ AttrToAdd.push_back(
+ Attribute::get(Ctx, "denormal-fp-math-f32", Known.ModeF32.str()));
+ } else {
+ AttrToRemove.push_back("denormal-fp-math-f32");
+ }
+
+ auto &IRP = getIRPosition();
+
+ // TODO: There should be a combined add and remove API.
+ return A.removeAttrs(IRP, AttrToRemove) |
+ A.manifestAttrs(IRP, AttrToAdd, /*ForceReplace=*/true);
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FN_ATTR(denormal_fp_math)
+ }
+};
+} // namespace
+
/// ------------------ Value Constant Range Attribute -------------------------
namespace {
@@ -8911,7 +9051,8 @@ struct AAValueConstantRangeImpl : AAValueConstantRange {
if (!LVI || !CtxI)
return getWorstState(getBitWidth());
return LVI->getConstantRange(&getAssociatedValue(),
- const_cast<Instruction *>(CtxI));
+ const_cast<Instruction *>(CtxI),
+ /*UndefAllowed*/ false);
}
/// Return true if \p CtxI is valid for querying outside analyses.
@@ -9427,17 +9568,13 @@ struct AAValueConstantRangeCallSite : AAValueConstantRangeFunction {
};
struct AAValueConstantRangeCallSiteReturned
- : AACallSiteReturnedFromReturned<AAValueConstantRange,
- AAValueConstantRangeImpl,
- AAValueConstantRangeImpl::StateType,
- /* IntroduceCallBaseContext */ true> {
+ : AACalleeToCallSite<AAValueConstantRange, AAValueConstantRangeImpl,
+ AAValueConstantRangeImpl::StateType,
+ /* IntroduceCallBaseContext */ true> {
AAValueConstantRangeCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AAValueConstantRange,
- AAValueConstantRangeImpl,
- AAValueConstantRangeImpl::StateType,
- /* IntroduceCallBaseContext */ true>(IRP,
- A) {
- }
+ : AACalleeToCallSite<AAValueConstantRange, AAValueConstantRangeImpl,
+ AAValueConstantRangeImpl::StateType,
+ /* IntroduceCallBaseContext */ true>(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
@@ -9956,12 +10093,12 @@ struct AAPotentialConstantValuesCallSite : AAPotentialConstantValuesFunction {
};
struct AAPotentialConstantValuesCallSiteReturned
- : AACallSiteReturnedFromReturned<AAPotentialConstantValues,
- AAPotentialConstantValuesImpl> {
+ : AACalleeToCallSite<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl> {
AAPotentialConstantValuesCallSiteReturned(const IRPosition &IRP,
Attributor &A)
- : AACallSiteReturnedFromReturned<AAPotentialConstantValues,
- AAPotentialConstantValuesImpl>(IRP, A) {}
+ : AACalleeToCallSite<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
@@ -10101,7 +10238,8 @@ struct AANoUndefFloating : public AANoUndefImpl {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoUndefImpl::initialize(A);
- if (!getState().isAtFixpoint())
+ if (!getState().isAtFixpoint() && getAnchorScope() &&
+ !getAnchorScope()->isDeclaration())
if (Instruction *CtxI = getCtxI())
followUsesInMBEC(*this, A, getState(), *CtxI);
}
@@ -10148,26 +10286,18 @@ struct AANoUndefFloating : public AANoUndefImpl {
};
struct AANoUndefReturned final
- : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl,
- AANoUndef::StateType, false,
- Attribute::NoUndef> {
+ : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl> {
AANoUndefReturned(const IRPosition &IRP, Attributor &A)
- : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl,
- AANoUndef::StateType, false,
- Attribute::NoUndef>(IRP, A) {}
+ : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
};
struct AANoUndefArgument final
- : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl,
- AANoUndef::StateType, false,
- Attribute::NoUndef> {
+ : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl> {
AANoUndefArgument(const IRPosition &IRP, Attributor &A)
- : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl,
- AANoUndef::StateType, false,
- Attribute::NoUndef>(IRP, A) {}
+ : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noundef) }
@@ -10182,13 +10312,9 @@ struct AANoUndefCallSiteArgument final : AANoUndefFloating {
};
struct AANoUndefCallSiteReturned final
- : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl,
- AANoUndef::StateType, false,
- Attribute::NoUndef> {
+ : AACalleeToCallSite<AANoUndef, AANoUndefImpl> {
AANoUndefCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl,
- AANoUndef::StateType, false,
- Attribute::NoUndef>(IRP, A) {}
+ : AACalleeToCallSite<AANoUndef, AANoUndefImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) }
@@ -10212,7 +10338,6 @@ struct AANoFPClassImpl : AANoFPClass {
A.getAttrs(getIRPosition(), {Attribute::NoFPClass}, Attrs, false);
for (const auto &Attr : Attrs) {
addKnownBits(Attr.getNoFPClass());
- return;
}
const DataLayout &DL = A.getDataLayout();
@@ -10248,8 +10373,22 @@ struct AANoFPClassImpl : AANoFPClass {
/*Depth=*/0, TLI, AC, I, DT);
State.addKnownBits(~KnownFPClass.KnownFPClasses);
- bool TrackUse = false;
- return TrackUse;
+ if (auto *CI = dyn_cast<CallInst>(UseV)) {
+ // Special case FP intrinsic with struct return type.
+ switch (CI->getIntrinsicID()) {
+ case Intrinsic::frexp:
+ return true;
+ case Intrinsic::not_intrinsic:
+ // TODO: Could recognize math libcalls
+ return false;
+ default:
+ break;
+ }
+ }
+
+ if (!UseV->getType()->isFPOrFPVectorTy())
+ return false;
+ return !isa<LoadInst, AtomicRMWInst>(UseV);
}
const std::string getAsStr(Attributor *A) const override {
@@ -10339,9 +10478,9 @@ struct AANoFPClassCallSiteArgument final : AANoFPClassFloating {
};
struct AANoFPClassCallSiteReturned final
- : AACallSiteReturnedFromReturned<AANoFPClass, AANoFPClassImpl> {
+ : AACalleeToCallSite<AANoFPClass, AANoFPClassImpl> {
AANoFPClassCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AANoFPClass, AANoFPClassImpl>(IRP, A) {}
+ : AACalleeToCallSite<AANoFPClass, AANoFPClassImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
@@ -10446,15 +10585,12 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
return Change;
}
- // Process callee metadata if available.
- if (auto *MD = getCtxI()->getMetadata(LLVMContext::MD_callees)) {
- for (const auto &Op : MD->operands()) {
- Function *Callee = mdconst::dyn_extract_or_null<Function>(Op);
- if (Callee)
- addCalledFunction(Callee, Change);
- }
- return Change;
- }
+ if (CB->isIndirectCall())
+ if (auto *IndirectCallAA = A.getAAFor<AAIndirectCallInfo>(
+ *this, getIRPosition(), DepClassTy::OPTIONAL))
+ if (IndirectCallAA->foreachCallee(
+ [&](Function *Fn) { return VisitValue(*Fn, CB); }))
+ return Change;
// The most simple case.
ProcessCalledOperand(CB->getCalledOperand(), CB);
@@ -10519,28 +10655,26 @@ struct AAInterFnReachabilityFunction
bool instructionCanReach(
Attributor &A, const Instruction &From, const Function &To,
- const AA::InstExclusionSetTy *ExclusionSet,
- SmallPtrSet<const Function *, 16> *Visited) const override {
+ const AA::InstExclusionSetTy *ExclusionSet) const override {
assert(From.getFunction() == getAnchorScope() && "Queried the wrong AA!");
auto *NonConstThis = const_cast<AAInterFnReachabilityFunction *>(this);
RQITy StackRQI(A, From, To, ExclusionSet, false);
typename RQITy::Reachable Result;
if (!NonConstThis->checkQueryCache(A, StackRQI, Result))
- return NonConstThis->isReachableImpl(A, StackRQI);
+ return NonConstThis->isReachableImpl(A, StackRQI,
+ /*IsTemporaryRQI=*/true);
return Result == RQITy::Reachable::Yes;
}
- bool isReachableImpl(Attributor &A, RQITy &RQI) override {
- return isReachableImpl(A, RQI, nullptr);
- }
-
bool isReachableImpl(Attributor &A, RQITy &RQI,
- SmallPtrSet<const Function *, 16> *Visited) {
-
- SmallPtrSet<const Function *, 16> LocalVisited;
- if (!Visited)
- Visited = &LocalVisited;
+ bool IsTemporaryRQI) override {
+ const Instruction *EntryI =
+ &RQI.From->getFunction()->getEntryBlock().front();
+ if (EntryI != RQI.From &&
+ !instructionCanReach(A, *EntryI, *RQI.To, nullptr))
+ return rememberResult(A, RQITy::Reachable::No, RQI, false,
+ IsTemporaryRQI);
auto CheckReachableCallBase = [&](CallBase *CB) {
auto *CBEdges = A.getAAFor<AACallEdges>(
@@ -10554,8 +10688,7 @@ struct AAInterFnReachabilityFunction
for (Function *Fn : CBEdges->getOptimisticEdges()) {
if (Fn == RQI.To)
return false;
- if (!Visited->insert(Fn).second)
- continue;
+
if (Fn->isDeclaration()) {
if (Fn->hasFnAttribute(Attribute::NoCallback))
continue;
@@ -10563,15 +10696,20 @@ struct AAInterFnReachabilityFunction
return false;
}
- const AAInterFnReachability *InterFnReachability = this;
- if (Fn != getAnchorScope())
- InterFnReachability = A.getAAFor<AAInterFnReachability>(
- *this, IRPosition::function(*Fn), DepClassTy::OPTIONAL);
+ if (Fn == getAnchorScope()) {
+ if (EntryI == RQI.From)
+ continue;
+ return false;
+ }
+
+ const AAInterFnReachability *InterFnReachability =
+ A.getAAFor<AAInterFnReachability>(*this, IRPosition::function(*Fn),
+ DepClassTy::OPTIONAL);
const Instruction &FnFirstInst = Fn->getEntryBlock().front();
if (!InterFnReachability ||
InterFnReachability->instructionCanReach(A, FnFirstInst, *RQI.To,
- RQI.ExclusionSet, Visited))
+ RQI.ExclusionSet))
return false;
}
return true;
@@ -10583,10 +10721,12 @@ struct AAInterFnReachabilityFunction
// Determine call like instructions that we can reach from the inst.
auto CheckCallBase = [&](Instruction &CBInst) {
- if (!IntraFnReachability || !IntraFnReachability->isAssumedReachable(
- A, *RQI.From, CBInst, RQI.ExclusionSet))
+ // There are usually less nodes in the call graph, check inter function
+ // reachability first.
+ if (CheckReachableCallBase(cast<CallBase>(&CBInst)))
return true;
- return CheckReachableCallBase(cast<CallBase>(&CBInst));
+ return IntraFnReachability && !IntraFnReachability->isAssumedReachable(
+ A, *RQI.From, CBInst, RQI.ExclusionSet);
};
bool UsedExclusionSet = /* conservative */ true;
@@ -10594,16 +10734,14 @@ struct AAInterFnReachabilityFunction
if (!A.checkForAllCallLikeInstructions(CheckCallBase, *this,
UsedAssumedInformation,
/* CheckBBLivenessOnly */ true))
- return rememberResult(A, RQITy::Reachable::Yes, RQI, UsedExclusionSet);
+ return rememberResult(A, RQITy::Reachable::Yes, RQI, UsedExclusionSet,
+ IsTemporaryRQI);
- return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet);
+ return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet,
+ IsTemporaryRQI);
}
void trackStatistics() const override {}
-
-private:
- SmallVector<RQITy *> QueryVector;
- DenseSet<RQITy *> QueryCache;
};
} // namespace
@@ -10880,64 +11018,104 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
// Simplify the operands first.
bool UsedAssumedInformation = false;
- const auto &SimplifiedLHS = A.getAssumedSimplified(
- IRPosition::value(*LHS, getCallBaseContext()), *this,
- UsedAssumedInformation, AA::Intraprocedural);
- if (!SimplifiedLHS.has_value())
+ SmallVector<AA::ValueAndContext> LHSValues, RHSValues;
+ auto GetSimplifiedValues = [&](Value &V,
+ SmallVector<AA::ValueAndContext> &Values) {
+ if (!A.getAssumedSimplifiedValues(
+ IRPosition::value(V, getCallBaseContext()), this, Values,
+ AA::Intraprocedural, UsedAssumedInformation)) {
+ Values.clear();
+ Values.push_back(AA::ValueAndContext{V, II.I.getCtxI()});
+ }
+ return Values.empty();
+ };
+ if (GetSimplifiedValues(*LHS, LHSValues))
return true;
- if (!*SimplifiedLHS)
- return false;
- LHS = *SimplifiedLHS;
-
- const auto &SimplifiedRHS = A.getAssumedSimplified(
- IRPosition::value(*RHS, getCallBaseContext()), *this,
- UsedAssumedInformation, AA::Intraprocedural);
- if (!SimplifiedRHS.has_value())
+ if (GetSimplifiedValues(*RHS, RHSValues))
return true;
- if (!*SimplifiedRHS)
- return false;
- RHS = *SimplifiedRHS;
LLVMContext &Ctx = LHS->getContext();
- // Handle the trivial case first in which we don't even need to think about
- // null or non-null.
- if (LHS == RHS &&
- (CmpInst::isTrueWhenEqual(Pred) || CmpInst::isFalseWhenEqual(Pred))) {
- Constant *NewV = ConstantInt::get(Type::getInt1Ty(Ctx),
- CmpInst::isTrueWhenEqual(Pred));
- addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S,
- getAnchorScope());
- return true;
- }
- // From now on we only handle equalities (==, !=).
- if (!CmpInst::isEquality(Pred))
- return false;
+ InformationCache &InfoCache = A.getInfoCache();
+ Instruction *CmpI = dyn_cast<Instruction>(&Cmp);
+ Function *F = CmpI ? CmpI->getFunction() : nullptr;
+ const auto *DT =
+ F ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F)
+ : nullptr;
+ const auto *TLI =
+ F ? A.getInfoCache().getTargetLibraryInfoForFunction(*F) : nullptr;
+ auto *AC =
+ F ? InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F)
+ : nullptr;
- bool LHSIsNull = isa<ConstantPointerNull>(LHS);
- bool RHSIsNull = isa<ConstantPointerNull>(RHS);
- if (!LHSIsNull && !RHSIsNull)
- return false;
+ const DataLayout &DL = A.getDataLayout();
+ SimplifyQuery Q(DL, TLI, DT, AC, CmpI);
- // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the
- // non-nullptr operand and if we assume it's non-null we can conclude the
- // result of the comparison.
- assert((LHSIsNull || RHSIsNull) &&
- "Expected nullptr versus non-nullptr comparison at this point");
+ auto CheckPair = [&](Value &LHSV, Value &RHSV) {
+ if (isa<UndefValue>(LHSV) || isa<UndefValue>(RHSV)) {
+ addValue(A, getState(), *UndefValue::get(Cmp.getType()),
+ /* CtxI */ nullptr, II.S, getAnchorScope());
+ return true;
+ }
- // The index is the operand that we assume is not null.
- unsigned PtrIdx = LHSIsNull;
- bool IsKnownNonNull;
- bool IsAssumedNonNull = AA::hasAssumedIRAttr<Attribute::NonNull>(
- A, this, IRPosition::value(*(PtrIdx ? RHS : LHS)), DepClassTy::REQUIRED,
- IsKnownNonNull);
- if (!IsAssumedNonNull)
- return false;
+ // Handle the trivial case first in which we don't even need to think
+ // about null or non-null.
+ if (&LHSV == &RHSV &&
+ (CmpInst::isTrueWhenEqual(Pred) || CmpInst::isFalseWhenEqual(Pred))) {
+ Constant *NewV = ConstantInt::get(Type::getInt1Ty(Ctx),
+ CmpInst::isTrueWhenEqual(Pred));
+ addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S,
+ getAnchorScope());
+ return true;
+ }
+
+ auto *TypedLHS = AA::getWithType(LHSV, *LHS->getType());
+ auto *TypedRHS = AA::getWithType(RHSV, *RHS->getType());
+ if (TypedLHS && TypedRHS) {
+ Value *NewV = simplifyCmpInst(Pred, TypedLHS, TypedRHS, Q);
+ if (NewV && NewV != &Cmp) {
+ addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S,
+ getAnchorScope());
+ return true;
+ }
+ }
+
+ // From now on we only handle equalities (==, !=).
+ if (!CmpInst::isEquality(Pred))
+ return false;
+
+ bool LHSIsNull = isa<ConstantPointerNull>(LHSV);
+ bool RHSIsNull = isa<ConstantPointerNull>(RHSV);
+ if (!LHSIsNull && !RHSIsNull)
+ return false;
+
+ // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the
+ // non-nullptr operand and if we assume it's non-null we can conclude the
+ // result of the comparison.
+ assert((LHSIsNull || RHSIsNull) &&
+ "Expected nullptr versus non-nullptr comparison at this point");
- // The new value depends on the predicate, true for != and false for ==.
- Constant *NewV =
- ConstantInt::get(Type::getInt1Ty(Ctx), Pred == CmpInst::ICMP_NE);
- addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S, getAnchorScope());
+ // The index is the operand that we assume is not null.
+ unsigned PtrIdx = LHSIsNull;
+ bool IsKnownNonNull;
+ bool IsAssumedNonNull = AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, IRPosition::value(*(PtrIdx ? &RHSV : &LHSV)),
+ DepClassTy::REQUIRED, IsKnownNonNull);
+ if (!IsAssumedNonNull)
+ return false;
+
+ // The new value depends on the predicate, true for != and false for ==.
+ Constant *NewV =
+ ConstantInt::get(Type::getInt1Ty(Ctx), Pred == CmpInst::ICMP_NE);
+ addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S,
+ getAnchorScope());
+ return true;
+ };
+
+ for (auto &LHSValue : LHSValues)
+ for (auto &RHSValue : RHSValues)
+ if (!CheckPair(*LHSValue.getValue(), *RHSValue.getValue()))
+ return false;
return true;
}
@@ -11152,9 +11330,8 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
SmallVectorImpl<ItemInfo> &Worklist,
SmallMapVector<const Function *, LivenessInfo, 4> &LivenessAAs) {
if (auto *CI = dyn_cast<CmpInst>(&I))
- if (handleCmp(A, *CI, CI->getOperand(0), CI->getOperand(1),
- CI->getPredicate(), II, Worklist))
- return true;
+ return handleCmp(A, *CI, CI->getOperand(0), CI->getOperand(1),
+ CI->getPredicate(), II, Worklist);
switch (I.getOpcode()) {
case Instruction::Select:
@@ -11272,12 +11449,12 @@ struct AAPotentialValuesArgument final : AAPotentialValuesImpl {
ChangeStatus updateImpl(Attributor &A) override {
auto AssumedBefore = getAssumed();
- unsigned CSArgNo = getCallSiteArgNo();
+ unsigned ArgNo = getCalleeArgNo();
bool UsedAssumedInformation = false;
SmallVector<AA::ValueAndContext> Values;
auto CallSitePred = [&](AbstractCallSite ACS) {
- const auto CSArgIRP = IRPosition::callsite_argument(ACS, CSArgNo);
+ const auto CSArgIRP = IRPosition::callsite_argument(ACS, ArgNo);
if (CSArgIRP.getPositionKind() == IRP_INVALID)
return false;
@@ -11889,6 +12066,455 @@ struct AAUnderlyingObjectsFunction final : AAUnderlyingObjectsImpl {
};
} // namespace
+/// ------------------------ Global Value Info -------------------------------
+namespace {
+struct AAGlobalValueInfoFloating : public AAGlobalValueInfo {
+ AAGlobalValueInfoFloating(const IRPosition &IRP, Attributor &A)
+ : AAGlobalValueInfo(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {}
+
+ bool checkUse(Attributor &A, const Use &U, bool &Follow,
+ SmallVectorImpl<const Value *> &Worklist) {
+ Instruction *UInst = dyn_cast<Instruction>(U.getUser());
+ if (!UInst) {
+ Follow = true;
+ return true;
+ }
+
+ LLVM_DEBUG(dbgs() << "[AAGlobalValueInfo] Check use: " << *U.get() << " in "
+ << *UInst << "\n");
+
+ if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
+ int Idx = &Cmp->getOperandUse(0) == &U;
+ if (isa<Constant>(Cmp->getOperand(Idx)))
+ return true;
+ return U == &getAnchorValue();
+ }
+
+ // Explicitly catch return instructions.
+ if (isa<ReturnInst>(UInst)) {
+ auto CallSitePred = [&](AbstractCallSite ACS) {
+ Worklist.push_back(ACS.getInstruction());
+ return true;
+ };
+ bool UsedAssumedInformation = false;
+ // TODO: We should traverse the uses or add a "non-call-site" CB.
+ if (!A.checkForAllCallSites(CallSitePred, *UInst->getFunction(),
+ /*RequireAllCallSites=*/true, this,
+ UsedAssumedInformation))
+ return false;
+ return true;
+ }
+
+ // For now we only use special logic for call sites. However, the tracker
+ // itself knows about a lot of other non-capturing cases already.
+ auto *CB = dyn_cast<CallBase>(UInst);
+ if (!CB)
+ return false;
+ // Direct calls are OK uses.
+ if (CB->isCallee(&U))
+ return true;
+ // Non-argument uses are scary.
+ if (!CB->isArgOperand(&U))
+ return false;
+ // TODO: Iterate callees.
+ auto *Fn = dyn_cast<Function>(CB->getCalledOperand());
+ if (!Fn || !A.isFunctionIPOAmendable(*Fn))
+ return false;
+
+ unsigned ArgNo = CB->getArgOperandNo(&U);
+ Worklist.push_back(Fn->getArg(ArgNo));
+ return true;
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ unsigned NumUsesBefore = Uses.size();
+
+ SmallPtrSet<const Value *, 8> Visited;
+ SmallVector<const Value *> Worklist;
+ Worklist.push_back(&getAnchorValue());
+
+ auto UsePred = [&](const Use &U, bool &Follow) -> bool {
+ Uses.insert(&U);
+ switch (DetermineUseCaptureKind(U, nullptr)) {
+ case UseCaptureKind::NO_CAPTURE:
+ return checkUse(A, U, Follow, Worklist);
+ case UseCaptureKind::MAY_CAPTURE:
+ return checkUse(A, U, Follow, Worklist);
+ case UseCaptureKind::PASSTHROUGH:
+ Follow = true;
+ return true;
+ }
+ return true;
+ };
+ auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) {
+ Uses.insert(&OldU);
+ return true;
+ };
+
+ while (!Worklist.empty()) {
+ const Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+ if (!A.checkForAllUses(UsePred, *this, *V,
+ /* CheckBBLivenessOnly */ true,
+ DepClassTy::OPTIONAL,
+ /* IgnoreDroppableUses */ true, EquivalentUseCB)) {
+ return indicatePessimisticFixpoint();
+ }
+ }
+
+ return Uses.size() == NumUsesBefore ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ bool isPotentialUse(const Use &U) const override {
+ return !isValidState() || Uses.contains(&U);
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr(Attributor *A) const override {
+ return "[" + std::to_string(Uses.size()) + " uses]";
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(GlobalValuesTracked);
+ }
+
+private:
+ /// Set of (transitive) uses of this GlobalValue.
+ SmallPtrSet<const Use *, 8> Uses;
+};
+} // namespace
+
+/// ------------------------ Indirect Call Info -------------------------------
+namespace {
+struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
+ AAIndirectCallInfoCallSite(const IRPosition &IRP, Attributor &A)
+ : AAIndirectCallInfo(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ auto *MD = getCtxI()->getMetadata(LLVMContext::MD_callees);
+ if (!MD && !A.isClosedWorldModule())
+ return;
+
+ if (MD) {
+ for (const auto &Op : MD->operands())
+ if (Function *Callee = mdconst::dyn_extract_or_null<Function>(Op))
+ PotentialCallees.insert(Callee);
+ } else if (A.isClosedWorldModule()) {
+ ArrayRef<Function *> IndirectlyCallableFunctions =
+ A.getInfoCache().getIndirectlyCallableFunctions(A);
+ PotentialCallees.insert(IndirectlyCallableFunctions.begin(),
+ IndirectlyCallableFunctions.end());
+ }
+
+ if (PotentialCallees.empty())
+ indicateOptimisticFixpoint();
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ CallBase *CB = cast<CallBase>(getCtxI());
+ const Use &CalleeUse = CB->getCalledOperandUse();
+ Value *FP = CB->getCalledOperand();
+
+ SmallSetVector<Function *, 4> AssumedCalleesNow;
+ bool AllCalleesKnownNow = AllCalleesKnown;
+
+ auto CheckPotentialCalleeUse = [&](Function &PotentialCallee,
+ bool &UsedAssumedInformation) {
+ const auto *GIAA = A.getAAFor<AAGlobalValueInfo>(
+ *this, IRPosition::value(PotentialCallee), DepClassTy::OPTIONAL);
+ if (!GIAA || GIAA->isPotentialUse(CalleeUse))
+ return true;
+ UsedAssumedInformation = !GIAA->isAtFixpoint();
+ return false;
+ };
+
+ auto AddPotentialCallees = [&]() {
+ for (auto *PotentialCallee : PotentialCallees) {
+ bool UsedAssumedInformation = false;
+ if (CheckPotentialCalleeUse(*PotentialCallee, UsedAssumedInformation))
+ AssumedCalleesNow.insert(PotentialCallee);
+ }
+ };
+
+ // Use simplification to find potential callees, if !callees was present,
+ // fallback to that set if necessary.
+ bool UsedAssumedInformation = false;
+ SmallVector<AA::ValueAndContext> Values;
+ if (!A.getAssumedSimplifiedValues(IRPosition::value(*FP), this, Values,
+ AA::ValueScope::AnyScope,
+ UsedAssumedInformation)) {
+ if (PotentialCallees.empty())
+ return indicatePessimisticFixpoint();
+ AddPotentialCallees();
+ }
+
+ // Try to find a reason for \p Fn not to be a potential callee. If none was
+ // found, add it to the assumed callees set.
+ auto CheckPotentialCallee = [&](Function &Fn) {
+ if (!PotentialCallees.empty() && !PotentialCallees.count(&Fn))
+ return false;
+
+ auto &CachedResult = FilterResults[&Fn];
+ if (CachedResult.has_value())
+ return CachedResult.value();
+
+ bool UsedAssumedInformation = false;
+ if (!CheckPotentialCalleeUse(Fn, UsedAssumedInformation)) {
+ if (!UsedAssumedInformation)
+ CachedResult = false;
+ return false;
+ }
+
+ int NumFnArgs = Fn.arg_size();
+ int NumCBArgs = CB->arg_size();
+
+ // Check if any excess argument (which we fill up with poison) is known to
+ // be UB on undef.
+ for (int I = NumCBArgs; I < NumFnArgs; ++I) {
+ bool IsKnown = false;
+ if (AA::hasAssumedIRAttr<Attribute::NoUndef>(
+ A, this, IRPosition::argument(*Fn.getArg(I)),
+ DepClassTy::OPTIONAL, IsKnown)) {
+ if (IsKnown)
+ CachedResult = false;
+ return false;
+ }
+ }
+
+ CachedResult = true;
+ return true;
+ };
+
+ // Check simplification result, prune known UB callees, also restrict it to
+ // the !callees set, if present.
+ for (auto &VAC : Values) {
+ if (isa<UndefValue>(VAC.getValue()))
+ continue;
+ if (isa<ConstantPointerNull>(VAC.getValue()) &&
+ VAC.getValue()->getType()->getPointerAddressSpace() == 0)
+ continue;
+ // TODO: Check for known UB, e.g., poison + noundef.
+ if (auto *VACFn = dyn_cast<Function>(VAC.getValue())) {
+ if (CheckPotentialCallee(*VACFn))
+ AssumedCalleesNow.insert(VACFn);
+ continue;
+ }
+ if (!PotentialCallees.empty()) {
+ AddPotentialCallees();
+ break;
+ }
+ AllCalleesKnownNow = false;
+ }
+
+ if (AssumedCalleesNow == AssumedCallees &&
+ AllCalleesKnown == AllCalleesKnownNow)
+ return ChangeStatus::UNCHANGED;
+
+ std::swap(AssumedCallees, AssumedCalleesNow);
+ AllCalleesKnown = AllCalleesKnownNow;
+ return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // If we can't specialize at all, give up now.
+ if (!AllCalleesKnown && AssumedCallees.empty())
+ return ChangeStatus::UNCHANGED;
+
+ CallBase *CB = cast<CallBase>(getCtxI());
+ bool UsedAssumedInformation = false;
+ if (A.isAssumedDead(*CB, this, /*LivenessAA=*/nullptr,
+ UsedAssumedInformation))
+ return ChangeStatus::UNCHANGED;
+
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ Value *FP = CB->getCalledOperand();
+ if (FP->getType()->getPointerAddressSpace())
+ FP = new AddrSpaceCastInst(FP, PointerType::get(FP->getType(), 0),
+ FP->getName() + ".as0", CB);
+
+ bool CBIsVoid = CB->getType()->isVoidTy();
+ Instruction *IP = CB;
+ FunctionType *CSFT = CB->getFunctionType();
+ SmallVector<Value *> CSArgs(CB->arg_begin(), CB->arg_end());
+
+ // If we know all callees and there are none, the call site is (effectively)
+ // dead (or UB).
+ if (AssumedCallees.empty()) {
+ assert(AllCalleesKnown &&
+ "Expected all callees to be known if there are none.");
+ A.changeToUnreachableAfterManifest(CB);
+ return ChangeStatus::CHANGED;
+ }
+
+ // Special handling for the single callee case.
+ if (AllCalleesKnown && AssumedCallees.size() == 1) {
+ auto *NewCallee = AssumedCallees.front();
+ if (isLegalToPromote(*CB, NewCallee)) {
+ promoteCall(*CB, NewCallee, nullptr);
+ return ChangeStatus::CHANGED;
+ }
+ Instruction *NewCall = CallInst::Create(FunctionCallee(CSFT, NewCallee),
+ CSArgs, CB->getName(), CB);
+ if (!CBIsVoid)
+ A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewCall);
+ A.deleteAfterManifest(*CB);
+ return ChangeStatus::CHANGED;
+ }
+
+ // For each potential value we create a conditional
+ //
+ // ```
+ // if (ptr == value) value(args);
+ // else ...
+ // ```
+ //
+ bool SpecializedForAnyCallees = false;
+ bool SpecializedForAllCallees = AllCalleesKnown;
+ ICmpInst *LastCmp = nullptr;
+ SmallVector<Function *, 8> SkippedAssumedCallees;
+ SmallVector<std::pair<CallInst *, Instruction *>> NewCalls;
+ for (Function *NewCallee : AssumedCallees) {
+ if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) {
+ SkippedAssumedCallees.push_back(NewCallee);
+ SpecializedForAllCallees = false;
+ continue;
+ }
+ SpecializedForAnyCallees = true;
+
+ LastCmp = new ICmpInst(IP, llvm::CmpInst::ICMP_EQ, FP, NewCallee);
+ Instruction *ThenTI =
+ SplitBlockAndInsertIfThen(LastCmp, IP, /* Unreachable */ false);
+ BasicBlock *CBBB = CB->getParent();
+ A.registerManifestAddedBasicBlock(*ThenTI->getParent());
+ A.registerManifestAddedBasicBlock(*CBBB);
+ auto *SplitTI = cast<BranchInst>(LastCmp->getNextNode());
+ BasicBlock *ElseBB;
+ if (IP == CB) {
+ ElseBB = BasicBlock::Create(ThenTI->getContext(), "",
+ ThenTI->getFunction(), CBBB);
+ A.registerManifestAddedBasicBlock(*ElseBB);
+ IP = BranchInst::Create(CBBB, ElseBB);
+ SplitTI->replaceUsesOfWith(CBBB, ElseBB);
+ } else {
+ ElseBB = IP->getParent();
+ ThenTI->replaceUsesOfWith(ElseBB, CBBB);
+ }
+ CastInst *RetBC = nullptr;
+ CallInst *NewCall = nullptr;
+ if (isLegalToPromote(*CB, NewCallee)) {
+ auto *CBClone = cast<CallBase>(CB->clone());
+ CBClone->insertBefore(ThenTI);
+ NewCall = &cast<CallInst>(promoteCall(*CBClone, NewCallee, &RetBC));
+ } else {
+ NewCall = CallInst::Create(FunctionCallee(CSFT, NewCallee), CSArgs,
+ CB->getName(), ThenTI);
+ }
+ NewCalls.push_back({NewCall, RetBC});
+ }
+
+ auto AttachCalleeMetadata = [&](CallBase &IndirectCB) {
+ if (!AllCalleesKnown)
+ return ChangeStatus::UNCHANGED;
+ MDBuilder MDB(IndirectCB.getContext());
+ MDNode *Callees = MDB.createCallees(SkippedAssumedCallees);
+ IndirectCB.setMetadata(LLVMContext::MD_callees, Callees);
+ return ChangeStatus::CHANGED;
+ };
+
+ if (!SpecializedForAnyCallees)
+ return AttachCalleeMetadata(*CB);
+
+ // Check if we need the fallback indirect call still.
+ if (SpecializedForAllCallees) {
+ LastCmp->replaceAllUsesWith(ConstantInt::getTrue(LastCmp->getContext()));
+ LastCmp->eraseFromParent();
+ new UnreachableInst(IP->getContext(), IP);
+ IP->eraseFromParent();
+ } else {
+ auto *CBClone = cast<CallInst>(CB->clone());
+ CBClone->setName(CB->getName());
+ CBClone->insertBefore(IP);
+ NewCalls.push_back({CBClone, nullptr});
+ AttachCalleeMetadata(*CBClone);
+ }
+
+ // Check if we need a PHI to merge the results.
+ if (!CBIsVoid) {
+ auto *PHI = PHINode::Create(CB->getType(), NewCalls.size(),
+ CB->getName() + ".phi",
+ &*CB->getParent()->getFirstInsertionPt());
+ for (auto &It : NewCalls) {
+ CallBase *NewCall = It.first;
+ Instruction *CallRet = It.second ? It.second : It.first;
+ if (CallRet->getType() == CB->getType())
+ PHI->addIncoming(CallRet, CallRet->getParent());
+ else if (NewCall->getType()->isVoidTy())
+ PHI->addIncoming(PoisonValue::get(CB->getType()),
+ NewCall->getParent());
+ else
+ llvm_unreachable("Call return should match or be void!");
+ }
+ A.changeAfterManifest(IRPosition::callsite_returned(*CB), *PHI);
+ }
+
+ A.deleteAfterManifest(*CB);
+ Changed = ChangeStatus::CHANGED;
+
+ return Changed;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr(Attributor *A) const override {
+ return std::string(AllCalleesKnown ? "eliminate" : "specialize") +
+ " indirect call site with " + std::to_string(AssumedCallees.size()) +
+ " functions";
+ }
+
+ void trackStatistics() const override {
+ if (AllCalleesKnown) {
+ STATS_DECLTRACK(
+ Eliminated, CallSites,
+ "Number of indirect call sites eliminated via specialization")
+ } else {
+ STATS_DECLTRACK(Specialized, CallSites,
+ "Number of indirect call sites specialized")
+ }
+ }
+
+ bool foreachCallee(function_ref<bool(Function *)> CB) const override {
+ return isValidState() && AllCalleesKnown && all_of(AssumedCallees, CB);
+ }
+
+private:
+ /// Map to remember filter results.
+ DenseMap<Function *, std::optional<bool>> FilterResults;
+
+ /// If the !callee metadata was present, this set will contain all potential
+ /// callees (superset).
+ SmallSetVector<Function *, 4> PotentialCallees;
+
+ /// This set contains all currently assumed calllees, which might grow over
+ /// time.
+ SmallSetVector<Function *, 4> AssumedCallees;
+
+ /// Flag to indicate if all possible callees are in the AssumedCallees set or
+ /// if there could be others.
+ bool AllCalleesKnown = true;
+};
+} // namespace
+
/// ------------------------ Address Space ------------------------------------
namespace {
struct AAAddressSpaceImpl : public AAAddressSpace {
@@ -11961,8 +12587,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
// CGSCC if the AA is run on CGSCC instead of the entire module.
if (!A.isRunOn(Inst->getFunction()))
return true;
- if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
+ if (isa<LoadInst>(Inst))
MakeChange(Inst, const_cast<Use &>(U));
+ if (isa<StoreInst>(Inst)) {
+ // We only make changes if the use is the pointer operand.
+ if (U.getOperandNo() == 1)
+ MakeChange(Inst, const_cast<Use &>(U));
+ }
return true;
};
@@ -12064,6 +12695,224 @@ struct AAAddressSpaceCallSiteArgument final : AAAddressSpaceImpl {
};
} // namespace
+/// ----------- Allocation Info ----------
+namespace {
+struct AAAllocationInfoImpl : public AAAllocationInfo {
+ AAAllocationInfoImpl(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfo(IRP, A) {}
+
+ std::optional<TypeSize> getAllocatedSize() const override {
+ assert(isValidState() && "the AA is invalid");
+ return AssumedAllocatedSize;
+ }
+
+ std::optional<TypeSize> findInitialAllocationSize(Instruction *I,
+ const DataLayout &DL) {
+
+ // TODO: implement case for malloc like instructions
+ switch (I->getOpcode()) {
+ case Instruction::Alloca: {
+ AllocaInst *AI = cast<AllocaInst>(I);
+ return AI->getAllocationSize(DL);
+ }
+ default:
+ return std::nullopt;
+ }
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+
+ const IRPosition &IRP = getIRPosition();
+ Instruction *I = IRP.getCtxI();
+
+ // TODO: update check for malloc like calls
+ if (!isa<AllocaInst>(I))
+ return indicatePessimisticFixpoint();
+
+ bool IsKnownNoCapture;
+ if (!AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, IRP, DepClassTy::OPTIONAL, IsKnownNoCapture))
+ return indicatePessimisticFixpoint();
+
+ const AAPointerInfo *PI =
+ A.getOrCreateAAFor<AAPointerInfo>(IRP, *this, DepClassTy::REQUIRED);
+
+ if (!PI)
+ return indicatePessimisticFixpoint();
+
+ if (!PI->getState().isValidState())
+ return indicatePessimisticFixpoint();
+
+ const DataLayout &DL = A.getDataLayout();
+ const auto AllocationSize = findInitialAllocationSize(I, DL);
+
+ // If allocation size is nullopt, we give up.
+ if (!AllocationSize)
+ return indicatePessimisticFixpoint();
+
+ // For zero sized allocations, we give up.
+ // Since we can't reduce further
+ if (*AllocationSize == 0)
+ return indicatePessimisticFixpoint();
+
+ int64_t BinSize = PI->numOffsetBins();
+
+ // TODO: implement for multiple bins
+ if (BinSize > 1)
+ return indicatePessimisticFixpoint();
+
+ if (BinSize == 0) {
+ auto NewAllocationSize = std::optional<TypeSize>(TypeSize(0, false));
+ if (!changeAllocationSize(NewAllocationSize))
+ return ChangeStatus::UNCHANGED;
+ return ChangeStatus::CHANGED;
+ }
+
+ // TODO: refactor this to be part of multiple bin case
+ const auto &It = PI->begin();
+
+ // TODO: handle if Offset is not zero
+ if (It->first.Offset != 0)
+ return indicatePessimisticFixpoint();
+
+ uint64_t SizeOfBin = It->first.Offset + It->first.Size;
+
+ if (SizeOfBin >= *AllocationSize)
+ return indicatePessimisticFixpoint();
+
+ auto NewAllocationSize =
+ std::optional<TypeSize>(TypeSize(SizeOfBin * 8, false));
+
+ if (!changeAllocationSize(NewAllocationSize))
+ return ChangeStatus::UNCHANGED;
+
+ return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+
+ assert(isValidState() &&
+ "Manifest should only be called if the state is valid.");
+
+ Instruction *I = getIRPosition().getCtxI();
+
+ auto FixedAllocatedSizeInBits = getAllocatedSize()->getFixedValue();
+
+ unsigned long NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8;
+
+ switch (I->getOpcode()) {
+ // TODO: add case for malloc like calls
+ case Instruction::Alloca: {
+
+ AllocaInst *AI = cast<AllocaInst>(I);
+
+ Type *CharType = Type::getInt8Ty(I->getContext());
+
+ auto *NumBytesToValue =
+ ConstantInt::get(I->getContext(), APInt(32, NumBytesToAllocate));
+
+ AllocaInst *NewAllocaInst =
+ new AllocaInst(CharType, AI->getAddressSpace(), NumBytesToValue,
+ AI->getAlign(), AI->getName(), AI->getNextNode());
+
+ if (A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst))
+ return ChangeStatus::CHANGED;
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr(Attributor *A) const override {
+ if (!isValidState())
+ return "allocationinfo(<invalid>)";
+ return "allocationinfo(" +
+ (AssumedAllocatedSize == HasNoAllocationSize
+ ? "none"
+ : std::to_string(AssumedAllocatedSize->getFixedValue())) +
+ ")";
+ }
+
+private:
+ std::optional<TypeSize> AssumedAllocatedSize = HasNoAllocationSize;
+
+ // Maintain the computed allocation size of the object.
+ // Returns (bool) weather the size of the allocation was modified or not.
+ bool changeAllocationSize(std::optional<TypeSize> Size) {
+ if (AssumedAllocatedSize == HasNoAllocationSize ||
+ AssumedAllocatedSize != Size) {
+ AssumedAllocatedSize = Size;
+ return true;
+ }
+ return false;
+ }
+};
+
+struct AAAllocationInfoFloating : AAAllocationInfoImpl {
+ AAAllocationInfoFloating(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(allocationinfo);
+ }
+};
+
+struct AAAllocationInfoReturned : AAAllocationInfoImpl {
+ AAAllocationInfoReturned(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: we don't rewrite function argument for now because it will need to
+ // rewrite the function signature and all call sites
+ (void)indicatePessimisticFixpoint();
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(allocationinfo);
+ }
+};
+
+struct AAAllocationInfoCallSiteReturned : AAAllocationInfoImpl {
+ AAAllocationInfoCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(allocationinfo);
+ }
+};
+
+struct AAAllocationInfoArgument : AAAllocationInfoImpl {
+ AAAllocationInfoArgument(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(allocationinfo);
+ }
+};
+
+struct AAAllocationInfoCallSiteArgument : AAAllocationInfoImpl {
+ AAAllocationInfoCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAAllocationInfoImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+
+ (void)indicatePessimisticFixpoint();
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(allocationinfo);
+ }
+};
+} // namespace
+
const char AANoUnwind::ID = 0;
const char AANoSync::ID = 0;
const char AANoFree::ID = 0;
@@ -12097,6 +12946,10 @@ const char AAPointerInfo::ID = 0;
const char AAAssumptionInfo::ID = 0;
const char AAUnderlyingObjects::ID = 0;
const char AAAddressSpace::ID = 0;
+const char AAAllocationInfo::ID = 0;
+const char AAIndirectCallInfo::ID = 0;
+const char AAGlobalValueInfo::ID = 0;
+const char AADenormalFPMath::ID = 0;
// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
@@ -12143,6 +12996,18 @@ const char AAAddressSpace::ID = 0;
return *AA; \
}
+#define CREATE_ABSTRACT_ATTRIBUTE_FOR_ONE_POSITION(POS, SUFFIX, CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_CREATE(CLASS, IRP, POS, SUFFIX) \
+ default: \
+ llvm_unreachable("Cannot create " #CLASS " for position otherthan " #POS \
+ " position!"); \
+ } \
+ return *AA; \
+ }
+
#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
CLASS *AA = nullptr; \
@@ -12215,17 +13080,24 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUnderlyingObjects)
+CREATE_ABSTRACT_ATTRIBUTE_FOR_ONE_POSITION(IRP_CALL_SITE, CallSite,
+ AAIndirectCallInfo)
+CREATE_ABSTRACT_ATTRIBUTE_FOR_ONE_POSITION(IRP_FLOAT, Floating,
+ AAGlobalValueInfo)
+
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonConvergent)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIntraFnReachability)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInterFnReachability)
+CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADenormalFPMath)
CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
@@ -12234,5 +13106,6 @@ CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION
#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION
#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_ABSTRACT_ATTRIBUTE_FOR_ONE_POSITION
#undef SWITCH_PK_CREATE
#undef SWITCH_PK_INV
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
index 93d15f59a036..5cc8258a495a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -85,7 +85,7 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
LLVMContext &Ctx = M.getContext();
FunctionCallee C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
- Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
+ PointerType::getUnqual(Ctx), PointerType::getUnqual(Ctx));
Function *F = cast<Function>(C.getCallee());
// Take over the existing function. The frontend emits a weak stub so that the
// linker knows about the symbol; this pass replaces the function body.
@@ -110,9 +110,9 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
BasicBlock *TrapBB = BasicBlock::Create(Ctx, "fail", F);
IRBuilder<> IRBFail(TrapBB);
- FunctionCallee CFICheckFailFn =
- M.getOrInsertFunction("__cfi_check_fail", Type::getVoidTy(Ctx),
- Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
+ FunctionCallee CFICheckFailFn = M.getOrInsertFunction(
+ "__cfi_check_fail", Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx),
+ PointerType::getUnqual(Ctx));
IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr});
IRBFail.CreateBr(ExitBB);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 01834015f3fd..4f65748c19e6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -174,6 +174,7 @@ bool DeadArgumentEliminationPass::deleteDeadVarargs(Function &F) {
NF->setComdat(F.getComdat());
F.getParent()->getFunctionList().insert(F.getIterator(), NF);
NF->takeName(&F);
+ NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
// Loop over all the callers of the function, transforming the call sites
// to pass in a smaller number of arguments into the new function.
@@ -248,7 +249,7 @@ bool DeadArgumentEliminationPass::deleteDeadVarargs(Function &F) {
NF->addMetadata(KindID, *Node);
// Fix up any BlockAddresses that refer to the function.
- F.replaceAllUsesWith(ConstantExpr::getBitCast(NF, F.getType()));
+ F.replaceAllUsesWith(NF);
// Delete the bitcast that we just created, so that NF does not
// appear to be address-taken.
NF->removeDeadConstantUsers();
@@ -877,6 +878,7 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) {
// it again.
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
+ NF->IsNewDbgInfoFormat = F->IsNewDbgInfoFormat;
// Loop over all the callers of the function, transforming the call sites to
// pass in a smaller number of arguments into the new function.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
index fa56a5b564ae..48ef0772e800 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
@@ -7,8 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
@@ -16,10 +14,8 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
-#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include <memory>
#include <string>
using namespace llvm;
@@ -34,19 +30,9 @@ PreservedAnalyses EmbedBitcodePass::run(Module &M, ModuleAnalysisManager &AM) {
report_fatal_error(
"EmbedBitcode pass currently only supports ELF object format",
/*gen_crash_diag=*/false);
-
- std::unique_ptr<Module> NewModule = CloneModule(M);
- MPM.run(*NewModule, AM);
-
std::string Data;
raw_string_ostream OS(Data);
- if (IsThinLTO)
- ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(*NewModule, AM);
- else
- BitcodeWriterPass(OS, /*ShouldPreserveUseListOrder=*/false, EmitLTOSummary)
- .run(*NewModule, AM);
-
+ ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(M, AM);
embedBufferInModule(M, MemoryBufferRef(Data, "ModuleData"), ".llvm.lto");
-
return PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 74931e1032d1..9cf4e448c9b6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -11,38 +11,57 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "forceattrs"
-static cl::list<std::string>
- ForceAttributes("force-attribute", cl::Hidden,
- cl::desc("Add an attribute to a function. This should be a "
- "pair of 'function-name:attribute-name', for "
- "example -force-attribute=foo:noinline. This "
- "option can be specified multiple times."));
+static cl::list<std::string> ForceAttributes(
+ "force-attribute", cl::Hidden,
+ cl::desc(
+ "Add an attribute to a function. This can be a "
+ "pair of 'function-name:attribute-name', to apply an attribute to a "
+ "specific function. For "
+ "example -force-attribute=foo:noinline. Specifying only an attribute "
+ "will apply the attribute to every function in the module. This "
+ "option can be specified multiple times."));
static cl::list<std::string> ForceRemoveAttributes(
"force-remove-attribute", cl::Hidden,
- cl::desc("Remove an attribute from a function. This should be a "
- "pair of 'function-name:attribute-name', for "
- "example -force-remove-attribute=foo:noinline. This "
+ cl::desc("Remove an attribute from a function. This can be a "
+ "pair of 'function-name:attribute-name' to remove an attribute "
+ "from a specific function. For "
+ "example -force-remove-attribute=foo:noinline. Specifying only an "
+ "attribute will remove the attribute from all functions in the "
+ "module. This "
"option can be specified multiple times."));
+static cl::opt<std::string> CSVFilePath(
+ "forceattrs-csv-path", cl::Hidden,
+ cl::desc(
+ "Path to CSV file containing lines of function names and attributes to "
+ "add to them in the form of `f1,attr1` or `f2,attr2=str`."));
+
/// If F has any forced attributes given on the command line, add them.
/// If F has any forced remove attributes given on the command line, remove
/// them. When both force and force-remove are given to a function, the latter
/// takes precedence.
static void forceAttributes(Function &F) {
auto ParseFunctionAndAttr = [&](StringRef S) {
- auto Kind = Attribute::None;
- auto KV = StringRef(S).split(':');
- if (KV.first != F.getName())
- return Kind;
- Kind = Attribute::getAttrKindFromName(KV.second);
+ StringRef AttributeText;
+ if (S.contains(':')) {
+ auto KV = StringRef(S).split(':');
+ if (KV.first != F.getName())
+ return Attribute::None;
+ AttributeText = KV.second;
+ } else {
+ AttributeText = S;
+ }
+ auto Kind = Attribute::getAttrKindFromName(AttributeText);
if (Kind == Attribute::None || !Attribute::canUseAsFnAttr(Kind)) {
- LLVM_DEBUG(dbgs() << "ForcedAttribute: " << KV.second
+ LLVM_DEBUG(dbgs() << "ForcedAttribute: " << AttributeText
<< " unknown or not a function attribute!\n");
}
return Kind;
@@ -69,12 +88,52 @@ static bool hasForceAttributes() {
PreservedAnalyses ForceFunctionAttrsPass::run(Module &M,
ModuleAnalysisManager &) {
- if (!hasForceAttributes())
- return PreservedAnalyses::all();
-
- for (Function &F : M.functions())
- forceAttributes(F);
-
- // Just conservatively invalidate analyses, this isn't likely to be important.
- return PreservedAnalyses::none();
+ bool Changed = false;
+ if (!CSVFilePath.empty()) {
+ auto BufferOrError = MemoryBuffer::getFileOrSTDIN(CSVFilePath);
+ if (!BufferOrError)
+ report_fatal_error("Cannot open CSV file.");
+ StringRef Buffer = BufferOrError.get()->getBuffer();
+ auto MemoryBuffer = MemoryBuffer::getMemBuffer(Buffer);
+ line_iterator It(*MemoryBuffer);
+ for (; !It.is_at_end(); ++It) {
+ auto SplitPair = It->split(',');
+ if (SplitPair.second.empty())
+ continue;
+ Function *Func = M.getFunction(SplitPair.first);
+ if (Func) {
+ if (Func->isDeclaration())
+ continue;
+ auto SecondSplitPair = SplitPair.second.split('=');
+ if (!SecondSplitPair.second.empty()) {
+ Func->addFnAttr(SecondSplitPair.first, SecondSplitPair.second);
+ Changed = true;
+ } else {
+ auto AttrKind = Attribute::getAttrKindFromName(SplitPair.second);
+ if (AttrKind != Attribute::None &&
+ Attribute::canUseAsFnAttr(AttrKind)) {
+ // TODO: There could be string attributes without a value, we should
+ // support those, too.
+ Func->addFnAttr(AttrKind);
+ Changed = true;
+ } else
+ errs() << "Cannot add " << SplitPair.second
+ << " as an attribute name.\n";
+ }
+ } else {
+ errs() << "Function in CSV file at line " << It.line_number()
+ << " does not exist.\n";
+ // TODO: `report_fatal_error at end of pass for missing functions.
+ continue;
+ }
+ }
+ }
+ if (hasForceAttributes()) {
+ for (Function &F : M.functions())
+ forceAttributes(F);
+ Changed = true;
+ }
+ // Just conservatively invalidate analyses if we've made any changes, this
+ // isn't likely to be important.
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 34299f9dbb23..7c277518b21d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -110,6 +110,39 @@ using SCCNodeSet = SmallSetVector<Function *, 8>;
} // end anonymous namespace
+static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc,
+ ModRefInfo MR, AAResults &AAR) {
+ // Ignore accesses to known-invariant or local memory.
+ MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true);
+ if (isNoModRef(MR))
+ return;
+
+ const Value *UO = getUnderlyingObject(Loc.Ptr);
+ assert(!isa<AllocaInst>(UO) &&
+ "Should have been handled by getModRefInfoMask()");
+ if (isa<Argument>(UO)) {
+ ME |= MemoryEffects::argMemOnly(MR);
+ return;
+ }
+
+ // If it's not an identified object, it might be an argument.
+ if (!isIdentifiedObject(UO))
+ ME |= MemoryEffects::argMemOnly(MR);
+ ME |= MemoryEffects(IRMemLocation::Other, MR);
+}
+
+static void addArgLocs(MemoryEffects &ME, const CallBase *Call,
+ ModRefInfo ArgMR, AAResults &AAR) {
+ for (const Value *Arg : Call->args()) {
+ if (!Arg->getType()->isPtrOrPtrVectorTy())
+ continue;
+
+ addLocAccess(ME,
+ MemoryLocation::getBeforeOrAfter(Arg, Call->getAAMetadata()),
+ ArgMR, AAR);
+ }
+}
+
/// Returns the memory access attribute for function F using AAR for AA results,
/// where SCCNodes is the current SCC.
///
@@ -118,54 +151,48 @@ using SCCNodeSet = SmallSetVector<Function *, 8>;
/// result will be based only on AA results for the function declaration; it
/// will be assumed that some other (perhaps less optimized) version of the
/// function may be selected at link time.
-static MemoryEffects checkFunctionMemoryAccess(Function &F, bool ThisBody,
- AAResults &AAR,
- const SCCNodeSet &SCCNodes) {
+///
+/// The return value is split into two parts: Memory effects that always apply,
+/// and additional memory effects that apply if any of the functions in the SCC
+/// can access argmem.
+static std::pair<MemoryEffects, MemoryEffects>
+checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
+ const SCCNodeSet &SCCNodes) {
MemoryEffects OrigME = AAR.getMemoryEffects(&F);
if (OrigME.doesNotAccessMemory())
// Already perfect!
- return OrigME;
+ return {OrigME, MemoryEffects::none()};
if (!ThisBody)
- return OrigME;
+ return {OrigME, MemoryEffects::none()};
MemoryEffects ME = MemoryEffects::none();
+ // Additional locations accessed if the SCC accesses argmem.
+ MemoryEffects RecursiveArgME = MemoryEffects::none();
+
// Inalloca and preallocated arguments are always clobbered by the call.
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
F.getAttributes().hasAttrSomewhere(Attribute::Preallocated))
ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef);
- auto AddLocAccess = [&](const MemoryLocation &Loc, ModRefInfo MR) {
- // Ignore accesses to known-invariant or local memory.
- MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true);
- if (isNoModRef(MR))
- return;
-
- const Value *UO = getUnderlyingObject(Loc.Ptr);
- assert(!isa<AllocaInst>(UO) &&
- "Should have been handled by getModRefInfoMask()");
- if (isa<Argument>(UO)) {
- ME |= MemoryEffects::argMemOnly(MR);
- return;
- }
-
- // If it's not an identified object, it might be an argument.
- if (!isIdentifiedObject(UO))
- ME |= MemoryEffects::argMemOnly(MR);
- ME |= MemoryEffects(IRMemLocation::Other, MR);
- };
// Scan the function body for instructions that may read or write memory.
for (Instruction &I : instructions(F)) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
if (auto *Call = dyn_cast<CallBase>(&I)) {
- // Ignore calls to functions in the same SCC, as long as the call sites
- // don't have operand bundles. Calls with operand bundles are allowed to
- // have memory effects not described by the memory effects of the call
- // target.
+ // We can optimistically ignore calls to functions in the same SCC, with
+ // two caveats:
+ // * Calls with operand bundles may have additional effects.
+ // * Argument memory accesses may imply additional effects depending on
+ // what the argument location is.
if (!Call->hasOperandBundles() && Call->getCalledFunction() &&
- SCCNodes.count(Call->getCalledFunction()))
+ SCCNodes.count(Call->getCalledFunction())) {
+ // Keep track of which additional locations are accessed if the SCC
+ // turns out to access argmem.
+ addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR);
continue;
+ }
+
MemoryEffects CallME = AAR.getMemoryEffects(Call);
// If the call doesn't access memory, we're done.
@@ -190,15 +217,8 @@ static MemoryEffects checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Check whether all pointer arguments point to local memory, and
// ignore calls that only access local memory.
ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem);
- if (ArgMR != ModRefInfo::NoModRef) {
- for (const Use &U : Call->args()) {
- const Value *Arg = U;
- if (!Arg->getType()->isPtrOrPtrVectorTy())
- continue;
-
- AddLocAccess(MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata()), ArgMR);
- }
- }
+ if (ArgMR != ModRefInfo::NoModRef)
+ addArgLocs(ME, Call, ArgMR, AAR);
continue;
}
@@ -222,15 +242,15 @@ static MemoryEffects checkFunctionMemoryAccess(Function &F, bool ThisBody,
if (I.isVolatile())
ME |= MemoryEffects::inaccessibleMemOnly(MR);
- AddLocAccess(*Loc, MR);
+ addLocAccess(ME, *Loc, MR, AAR);
}
- return OrigME & ME;
+ return {OrigME & ME, RecursiveArgME};
}
MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F,
AAResults &AAR) {
- return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {});
+ return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first;
}
/// Deduce readonly/readnone/writeonly attributes for the SCC.
@@ -238,24 +258,37 @@ template <typename AARGetterT>
static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
SmallSet<Function *, 8> &Changed) {
MemoryEffects ME = MemoryEffects::none();
+ MemoryEffects RecursiveArgME = MemoryEffects::none();
for (Function *F : SCCNodes) {
// Call the callable parameter to look up AA results for this function.
AAResults &AAR = AARGetter(*F);
// Non-exact function definitions may not be selected at link time, and an
// alternative version that writes to memory may be selected. See the
// comment on GlobalValue::isDefinitionExact for more details.
- ME |= checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes);
+ auto [FnME, FnRecursiveArgME] =
+ checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes);
+ ME |= FnME;
+ RecursiveArgME |= FnRecursiveArgME;
// Reached bottom of the lattice, we will not be able to improve the result.
if (ME == MemoryEffects::unknown())
return;
}
+ // If the SCC accesses argmem, add recursive accesses resulting from that.
+ ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
+ if (ArgMR != ModRefInfo::NoModRef)
+ ME |= RecursiveArgME & MemoryEffects(ArgMR);
+
for (Function *F : SCCNodes) {
MemoryEffects OldME = F->getMemoryEffects();
MemoryEffects NewME = ME & OldME;
if (NewME != OldME) {
++NumMemoryAttr;
F->setMemoryEffects(NewME);
+ // Remove conflicting writable attributes.
+ if (!isModSet(NewME.getModRef(IRMemLocation::ArgMem)))
+ for (Argument &A : F->args())
+ A.removeAttr(Attribute::Writable);
Changed.insert(F);
}
}
@@ -625,7 +658,15 @@ determinePointerAccessAttrs(Argument *A,
// must be a data operand (e.g. argument or operand bundle)
const unsigned UseIndex = CB.getDataOperandNo(U);
- if (!CB.doesNotCapture(UseIndex)) {
+ // Some intrinsics (for instance ptrmask) do not capture their results,
+ // but return results thas alias their pointer argument, and thus should
+ // be handled like GEP or addrspacecast above.
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+ &CB, /*MustPreserveNullness=*/false)) {
+ for (Use &UU : CB.uses())
+ if (Visited.insert(&UU).second)
+ Worklist.push_back(&UU);
+ } else if (!CB.doesNotCapture(UseIndex)) {
if (!CB.onlyReadsMemory())
// If the callee can save a copy into other memory, then simply
// scanning uses of the call is insufficient. We have no way
@@ -639,7 +680,8 @@ determinePointerAccessAttrs(Argument *A,
Worklist.push_back(&UU);
}
- if (CB.doesNotAccessMemory())
+ ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(IRMemLocation::ArgMem);
+ if (isNoModRef(ArgMR))
continue;
if (Function *F = CB.getCalledFunction())
@@ -654,9 +696,9 @@ determinePointerAccessAttrs(Argument *A,
// invokes with operand bundles.
if (CB.doesNotAccessMemory(UseIndex)) {
/* nop */
- } else if (CB.onlyReadsMemory() || CB.onlyReadsMemory(UseIndex)) {
+ } else if (!isModSet(ArgMR) || CB.onlyReadsMemory(UseIndex)) {
IsRead = true;
- } else if (CB.hasFnAttr(Attribute::WriteOnly) ||
+ } else if (!isRefSet(ArgMR) ||
CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) {
IsWrite = true;
} else {
@@ -810,6 +852,9 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
A->removeAttr(Attribute::WriteOnly);
A->removeAttr(Attribute::ReadOnly);
A->removeAttr(Attribute::ReadNone);
+ // Remove conflicting writable attribute.
+ if (R == Attribute::ReadNone || R == Attribute::ReadOnly)
+ A->removeAttr(Attribute::Writable);
A->addAttr(R);
if (R == Attribute::ReadOnly)
++NumReadOnlyArg;
@@ -1720,7 +1765,8 @@ static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
template <typename AARGetterT>
static SmallSet<Function *, 8>
-deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
+deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
+ bool ArgAttrsOnly) {
SCCNodesResult Nodes = createSCCNodeSet(Functions);
// Bail if the SCC only contains optnone functions.
@@ -1728,6 +1774,10 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
return {};
SmallSet<Function *, 8> Changed;
+ if (ArgAttrsOnly) {
+ addArgumentAttrs(Nodes.SCCNodes, Changed);
+ return Changed;
+ }
addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
@@ -1762,10 +1812,13 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
LazyCallGraph &CG,
CGSCCUpdateResult &) {
// Skip non-recursive functions if requested.
+ // Only infer argument attributes for non-recursive functions, because
+ // it can affect optimization behavior in conjunction with noalias.
+ bool ArgAttrsOnly = false;
if (C.size() == 1 && SkipNonRecursive) {
LazyCallGraph::Node &N = *C.begin();
if (!N->lookup(N))
- return PreservedAnalyses::all();
+ ArgAttrsOnly = true;
}
FunctionAnalysisManager &FAM =
@@ -1782,7 +1835,8 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
Functions.push_back(&N.getFunction());
}
- auto ChangedFunctions = deriveAttrsInPostOrder(Functions, AARGetter);
+ auto ChangedFunctions =
+ deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly);
if (ChangedFunctions.empty())
return PreservedAnalyses::all();
@@ -1818,7 +1872,7 @@ void PostOrderFunctionAttrsPass::printPipeline(
static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
if (SkipNonRecursive)
- OS << "<skip-non-recursive>";
+ OS << "<skip-non-recursive-function-attrs>";
}
template <typename AARGetterT>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
index f635b14cd2a9..49b3f2b085e1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/AutoUpgrade.h"
@@ -38,6 +37,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/Internalize.h"
@@ -139,6 +139,29 @@ static cl::opt<bool>
ImportAllIndex("import-all-index",
cl::desc("Import all external functions in index."));
+/// Pass a workload description file - an example of workload would be the
+/// functions executed to satisfy a RPC request. A workload is defined by a root
+/// function and the list of functions that are (frequently) needed to satisfy
+/// it. The module that defines the root will have all those functions imported.
+/// The file contains a JSON dictionary. The keys are root functions, the values
+/// are lists of functions to import in the module defining the root. It is
+/// assumed -funique-internal-linkage-names was used, thus ensuring function
+/// names are unique even for local linkage ones.
+static cl::opt<std::string> WorkloadDefinitions(
+ "thinlto-workload-def",
+ cl::desc("Pass a workload definition. This is a file containing a JSON "
+ "dictionary. The keys are root functions, the values are lists of "
+ "functions to import in the module defining the root. It is "
+ "assumed -funique-internal-linkage-names was used, to ensure "
+ "local linkage functions have unique names. For example: \n"
+ "{\n"
+ " \"rootFunction_1\": [\"function_to_import_1\", "
+ "\"function_to_import_2\"], \n"
+ " \"rootFunction_2\": [\"function_to_import_3\", "
+ "\"function_to_import_4\"] \n"
+ "}"),
+ cl::Hidden);
+
// Load lazily a module from \p FileName in \p Context.
static std::unique_ptr<Module> loadFile(const std::string &FileName,
LLVMContext &Context) {
@@ -272,7 +295,7 @@ class GlobalsImporter final {
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing;
FunctionImporter::ImportMapTy &ImportList;
- StringMap<FunctionImporter::ExportSetTy> *const ExportLists;
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
bool shouldImportGlobal(const ValueInfo &VI) {
const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
@@ -357,7 +380,7 @@ public:
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing,
FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists)
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
: Index(Index), DefinedGVSummaries(DefinedGVSummaries),
IsPrevailing(IsPrevailing), ImportList(ImportList),
ExportLists(ExportLists) {}
@@ -370,6 +393,264 @@ public:
}
};
+static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
+
+/// Determine the list of imports and exports for each module.
+class ModuleImportsManager {
+protected:
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing;
+ const ModuleSummaryIndex &Index;
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
+
+ ModuleImportsManager(
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing,
+ const ModuleSummaryIndex &Index,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
+ : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
+
+public:
+ virtual ~ModuleImportsManager() = default;
+
+ /// Given the list of globals defined in a module, compute the list of imports
+ /// as well as the list of "exports", i.e. the list of symbols referenced from
+ /// another module (that may require promotion).
+ virtual void
+ computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
+ StringRef ModName,
+ FunctionImporter::ImportMapTy &ImportList);
+
+ static std::unique_ptr<ModuleImportsManager>
+ create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing,
+ const ModuleSummaryIndex &Index,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
+ nullptr);
+};
+
+/// A ModuleImportsManager that operates based on a workload definition (see
+/// -thinlto-workload-def). For modules that do not define workload roots, it
+/// applies the base ModuleImportsManager import policy.
+class WorkloadImportsManager : public ModuleImportsManager {
+ // Keep a module name -> value infos to import association. We use it to
+ // determine if a module's import list should be done by the base
+ // ModuleImportsManager or by us.
+ StringMap<DenseSet<ValueInfo>> Workloads;
+
+ void
+ computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
+ StringRef ModName,
+ FunctionImporter::ImportMapTy &ImportList) override {
+ auto SetIter = Workloads.find(ModName);
+ if (SetIter == Workloads.end()) {
+ LLVM_DEBUG(dbgs() << "[Workload] " << ModName
+ << " does not contain the root of any context.\n");
+ return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
+ ModName, ImportList);
+ }
+ LLVM_DEBUG(dbgs() << "[Workload] " << ModName
+ << " contains the root(s) of context(s).\n");
+
+ GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
+ ExportLists);
+ auto &ValueInfos = SetIter->second;
+ SmallVector<EdgeInfo, 128> GlobWorklist;
+ for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
+ auto It = DefinedGVSummaries.find(VI.getGUID());
+ if (It != DefinedGVSummaries.end() &&
+ IsPrevailing(VI.getGUID(), It->second)) {
+ LLVM_DEBUG(
+ dbgs() << "[Workload] " << VI.name()
+ << " has the prevailing variant already in the module "
+ << ModName << ". No need to import\n");
+ continue;
+ }
+ auto Candidates =
+ qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
+
+ const GlobalValueSummary *GVS = nullptr;
+ auto PotentialCandidates = llvm::map_range(
+ llvm::make_filter_range(
+ Candidates,
+ [&](const auto &Candidate) {
+ LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
+ << " from " << Candidate.second->modulePath()
+ << " ImportFailureReason: "
+ << getFailureName(Candidate.first) << "\n");
+ return Candidate.first ==
+ FunctionImporter::ImportFailureReason::None;
+ }),
+ [](const auto &Candidate) { return Candidate.second; });
+ if (PotentialCandidates.empty()) {
+ LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
+ << " because can't find eligible Callee. Guid is: "
+ << Function::getGUID(VI.name()) << "\n");
+ continue;
+ }
+ /// We will prefer importing the prevailing candidate, if not, we'll
+ /// still pick the first available candidate. The reason we want to make
+ /// sure we do import the prevailing candidate is because the goal of
+ /// workload-awareness is to enable optimizations specializing the call
+ /// graph of that workload. Suppose a function is already defined in the
+ /// module, but it's not the prevailing variant. Suppose also we do not
+ /// inline it (in fact, if it were interposable, we can't inline it),
+ /// but we could specialize it to the workload in other ways. However,
+ /// the linker would drop it in the favor of the prevailing copy.
+ /// Instead, by importing the prevailing variant (assuming also the use
+ /// of `-avail-extern-to-local`), we keep the specialization. We could
+ /// alteranatively make the non-prevailing variant local, but the
+ /// prevailing one is also the one for which we would have previously
+ /// collected profiles, making it preferrable.
+ auto PrevailingCandidates = llvm::make_filter_range(
+ PotentialCandidates, [&](const auto *Candidate) {
+ return IsPrevailing(VI.getGUID(), Candidate);
+ });
+ if (PrevailingCandidates.empty()) {
+ GVS = *PotentialCandidates.begin();
+ if (!llvm::hasSingleElement(PotentialCandidates) &&
+ GlobalValue::isLocalLinkage(GVS->linkage()))
+ LLVM_DEBUG(
+ dbgs()
+ << "[Workload] Found multiple non-prevailing candidates for "
+ << VI.name()
+ << ". This is unexpected. Are module paths passed to the "
+ "compiler unique for the modules passed to the linker?");
+ // We could in theory have multiple (interposable) copies of a symbol
+ // when there is no prevailing candidate, if say the prevailing copy was
+ // in a native object being linked in. However, we should in theory be
+ // marking all of these non-prevailing IR copies dead in that case, in
+ // which case they won't be candidates.
+ assert(GVS->isLive());
+ } else {
+ assert(llvm::hasSingleElement(PrevailingCandidates));
+ GVS = *PrevailingCandidates.begin();
+ }
+
+ auto ExportingModule = GVS->modulePath();
+ // We checked that for the prevailing case, but if we happen to have for
+ // example an internal that's defined in this module, it'd have no
+ // PrevailingCandidates.
+ if (ExportingModule == ModName) {
+ LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
+ << " because its defining module is the same as the "
+ "current module\n");
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
+ << ExportingModule << " : "
+ << Function::getGUID(VI.name()) << "\n");
+ ImportList[ExportingModule].insert(VI.getGUID());
+ GVI.onImportingSummary(*GVS);
+ if (ExportLists)
+ (*ExportLists)[ExportingModule].insert(VI);
+ }
+ LLVM_DEBUG(dbgs() << "[Workload] Done\n");
+ }
+
+public:
+ WorkloadImportsManager(
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing,
+ const ModuleSummaryIndex &Index,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
+ : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
+ // Since the workload def uses names, we need a quick lookup
+ // name->ValueInfo.
+ StringMap<ValueInfo> NameToValueInfo;
+ StringSet<> AmbiguousNames;
+ for (auto &I : Index) {
+ ValueInfo VI = Index.getValueInfo(I);
+ if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
+ LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
+ }
+ auto DbgReportIfAmbiguous = [&](StringRef Name) {
+ LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
+ dbgs() << "[Workload] Function name " << Name
+ << " present in the workload definition is ambiguous. Consider "
+ "compiling with -funique-internal-linkage-names.";
+ });
+ };
+ std::error_code EC;
+ auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ report_fatal_error("Failed to open context file");
+ return;
+ }
+ auto Buffer = std::move(BufferOrErr.get());
+ std::map<std::string, std::vector<std::string>> WorkloadDefs;
+ json::Path::Root NullRoot;
+ // The JSON is supposed to contain a dictionary matching the type of
+ // WorkloadDefs. For example:
+ // {
+ // "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
+ // "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
+ // }
+ auto Parsed = json::parse(Buffer->getBuffer());
+ if (!Parsed)
+ report_fatal_error(Parsed.takeError());
+ if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
+ report_fatal_error("Invalid thinlto contextual profile format.");
+ for (const auto &Workload : WorkloadDefs) {
+ const auto &Root = Workload.first;
+ DbgReportIfAmbiguous(Root);
+ LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
+ const auto &AllCallees = Workload.second;
+ auto RootIt = NameToValueInfo.find(Root);
+ if (RootIt == NameToValueInfo.end()) {
+ LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
+ << " not found in this linkage unit.\n");
+ continue;
+ }
+ auto RootVI = RootIt->second;
+ if (RootVI.getSummaryList().size() != 1) {
+ LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
+ << " should have exactly one summary, but has "
+ << RootVI.getSummaryList().size() << ". Skipping.\n");
+ continue;
+ }
+ StringRef RootDefiningModule =
+ RootVI.getSummaryList().front()->modulePath();
+ LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
+ << " is : " << RootDefiningModule << "\n");
+ auto &Set = Workloads[RootDefiningModule];
+ for (const auto &Callee : AllCallees) {
+ LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
+ DbgReportIfAmbiguous(Callee);
+ auto ElemIt = NameToValueInfo.find(Callee);
+ if (ElemIt == NameToValueInfo.end()) {
+ LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
+ continue;
+ }
+ Set.insert(ElemIt->second);
+ }
+ LLVM_DEBUG({
+ dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
+ << " distinct callees.\n";
+ for (const auto &VI : Set) {
+ dbgs() << "[Workload] Root: " << Root
+ << " Would include: " << VI.getGUID() << "\n";
+ }
+ });
+ }
+ }
+};
+
+std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing,
+ const ModuleSummaryIndex &Index,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
+ if (WorkloadDefinitions.empty()) {
+ LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
+ return std::unique_ptr<ModuleImportsManager>(
+ new ModuleImportsManager(IsPrevailing, Index, ExportLists));
+ }
+ LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
+ return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
+ ExportLists);
+}
+
static const char *
getFailureName(FunctionImporter::ImportFailureReason Reason) {
switch (Reason) {
@@ -403,7 +684,7 @@ static void computeImportForFunction(
isPrevailing,
SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
FunctionImporter::ImportThresholdsTy &ImportThresholds) {
GVImporter.onImportingSummary(Summary);
static int ImportCount = 0;
@@ -482,7 +763,7 @@ static void computeImportForFunction(
continue;
}
- FunctionImporter::ImportFailureReason Reason;
+ FunctionImporter::ImportFailureReason Reason{};
CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
Summary.modulePath(), Reason);
if (!CalleeSummary) {
@@ -567,20 +848,13 @@ static void computeImportForFunction(
}
}
-/// Given the list of globals defined in a module, compute the list of imports
-/// as well as the list of "exports", i.e. the list of symbols referenced from
-/// another module (that may require promotion).
-static void ComputeImportForModule(
- const GVSummaryMapTy &DefinedGVSummaries,
- function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
- isPrevailing,
- const ModuleSummaryIndex &Index, StringRef ModName,
- FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
+void ModuleImportsManager::computeImportForModule(
+ const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
+ FunctionImporter::ImportMapTy &ImportList) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
SmallVector<EdgeInfo, 128> Worklist;
- GlobalsImporter GVI(Index, DefinedGVSummaries, isPrevailing, ImportList,
+ GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
ExportLists);
FunctionImporter::ImportThresholdsTy ImportThresholds;
@@ -603,7 +877,7 @@ static void ComputeImportForModule(
continue;
LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
- DefinedGVSummaries, isPrevailing, Worklist, GVI,
+ DefinedGVSummaries, IsPrevailing, Worklist, GVI,
ImportList, ExportLists, ImportThresholds);
}
@@ -615,7 +889,7 @@ static void ComputeImportForModule(
if (auto *FS = dyn_cast<FunctionSummary>(Summary))
computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
- isPrevailing, Worklist, GVI, ImportList,
+ IsPrevailing, Worklist, GVI, ImportList,
ExportLists, ImportThresholds);
}
@@ -671,10 +945,10 @@ static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
#endif
#ifndef NDEBUG
-static bool
-checkVariableImport(const ModuleSummaryIndex &Index,
- StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
+static bool checkVariableImport(
+ const ModuleSummaryIndex &Index,
+ DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
DenseSet<GlobalValue::GUID> FlattenedImports;
@@ -702,7 +976,7 @@ checkVariableImport(const ModuleSummaryIndex &Index,
for (auto &ExportPerModule : ExportLists)
for (auto &VI : ExportPerModule.second)
if (!FlattenedImports.count(VI.getGUID()) &&
- IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first(), VI))
+ IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
return false;
return true;
@@ -712,19 +986,19 @@ checkVariableImport(const ModuleSummaryIndex &Index,
/// Compute all the import and export for every module using the Index.
void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
- const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
- StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
+ DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
+ auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
// For each module that has function defined, compute the import/export lists.
for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
- auto &ImportList = ImportLists[DefinedGVSummaries.first()];
+ auto &ImportList = ImportLists[DefinedGVSummaries.first];
LLVM_DEBUG(dbgs() << "Computing import for Module '"
- << DefinedGVSummaries.first() << "'\n");
- ComputeImportForModule(DefinedGVSummaries.second, isPrevailing, Index,
- DefinedGVSummaries.first(), ImportList,
- &ExportLists);
+ << DefinedGVSummaries.first << "'\n");
+ MIS->computeImportForModule(DefinedGVSummaries.second,
+ DefinedGVSummaries.first, ImportList);
}
// When computing imports we only added the variables and functions being
@@ -735,7 +1009,7 @@ void llvm::ComputeCrossModuleImport(
for (auto &ELI : ExportLists) {
FunctionImporter::ExportSetTy NewExports;
const auto &DefinedGVSummaries =
- ModuleToDefinedGVSummaries.lookup(ELI.first());
+ ModuleToDefinedGVSummaries.lookup(ELI.first);
for (auto &EI : ELI.second) {
// Find the copy defined in the exporting module so that we can mark the
// values it references in that specific definition as exported.
@@ -783,7 +1057,7 @@ void llvm::ComputeCrossModuleImport(
LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
<< " modules:\n");
for (auto &ModuleImports : ImportLists) {
- auto ModName = ModuleImports.first();
+ auto ModName = ModuleImports.first;
auto &Exports = ExportLists[ModName];
unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
@@ -791,7 +1065,7 @@ void llvm::ComputeCrossModuleImport(
<< " vars. Imports from " << ModuleImports.second.size()
<< " modules.\n");
for (auto &Src : ModuleImports.second) {
- auto SrcModName = Src.first();
+ auto SrcModName = Src.first;
unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second);
LLVM_DEBUG(dbgs() << " - " << Src.second.size() - NumGVSPerMod
<< " functions imported from " << SrcModName << "\n");
@@ -809,7 +1083,7 @@ static void dumpImportListForModule(const ModuleSummaryIndex &Index,
LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
<< ImportList.size() << " modules.\n");
for (auto &Src : ImportList) {
- auto SrcModName = Src.first();
+ auto SrcModName = Src.first;
unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second);
LLVM_DEBUG(dbgs() << " - " << Src.second.size() - NumGVSPerMod
<< " functions imported from " << SrcModName << "\n");
@@ -819,8 +1093,15 @@ static void dumpImportListForModule(const ModuleSummaryIndex &Index,
}
#endif
-/// Compute all the imports for the given module in the Index.
-void llvm::ComputeCrossModuleImportForModule(
+/// Compute all the imports for the given module using the Index.
+///
+/// \p isPrevailing is a callback that will be called with a global value's GUID
+/// and summary and should return whether the module corresponding to the
+/// summary contains the linker-prevailing copy of that value.
+///
+/// \p ImportList will be populated with a map that can be passed to
+/// FunctionImporter::importFunctions() above (see description there).
+static void ComputeCrossModuleImportForModuleForTest(
StringRef ModulePath,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
@@ -833,17 +1114,20 @@ void llvm::ComputeCrossModuleImportForModule(
// Compute the import list for this module.
LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
- ComputeImportForModule(FunctionSummaryMap, isPrevailing, Index, ModulePath,
- ImportList);
+ auto MIS = ModuleImportsManager::create(isPrevailing, Index);
+ MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
#ifndef NDEBUG
dumpImportListForModule(Index, ModulePath, ImportList);
#endif
}
-// Mark all external summaries in Index for import into the given module.
-// Used for distributed builds using a distributed index.
-void llvm::ComputeCrossModuleImportForModuleFromIndex(
+/// Mark all external summaries in \p Index for import into the given module.
+/// Used for testing the case of distributed builds using a distributed index.
+///
+/// \p ImportList will be populated with a map that can be passed to
+/// FunctionImporter::importFunctions() above (see description there).
+static void ComputeCrossModuleImportForModuleFromIndexForTest(
StringRef ModulePath, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList) {
for (const auto &GlobalList : Index) {
@@ -1041,7 +1325,7 @@ void llvm::computeDeadSymbolsWithConstProp(
/// \p ModulePath.
void llvm::gatherImportedSummariesForModule(
StringRef ModulePath,
- const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
const FunctionImporter::ImportMapTy &ImportList,
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
// Include all summaries from the importing module.
@@ -1049,10 +1333,9 @@ void llvm::gatherImportedSummariesForModule(
ModuleToDefinedGVSummaries.lookup(ModulePath);
// Include summaries for imports.
for (const auto &ILI : ImportList) {
- auto &SummariesForIndex =
- ModuleToSummariesForIndex[std::string(ILI.first())];
+ auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
const auto &DefinedGVSummaries =
- ModuleToDefinedGVSummaries.lookup(ILI.first());
+ ModuleToDefinedGVSummaries.lookup(ILI.first);
for (const auto &GI : ILI.second) {
const auto &DS = DefinedGVSummaries.find(GI);
assert(DS != DefinedGVSummaries.end() &&
@@ -1298,7 +1581,7 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
// ensure all uses of alias instead use the new clone (casted if necessary).
NewFn->setLinkage(GA->getLinkage());
NewFn->setVisibility(GA->getVisibility());
- GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, GA->getType()));
+ GA->replaceAllUsesWith(NewFn);
NewFn->takeName(GA);
return NewFn;
}
@@ -1327,7 +1610,7 @@ Expected<bool> FunctionImporter::importFunctions(
// Do the actual import of functions now, one Module at a time
std::set<StringRef> ModuleNameOrderedList;
for (const auto &FunctionsToImportPerModule : ImportList) {
- ModuleNameOrderedList.insert(FunctionsToImportPerModule.first());
+ ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
}
for (const auto &Name : ModuleNameOrderedList) {
// Get the module for the import
@@ -1461,7 +1744,7 @@ Expected<bool> FunctionImporter::importFunctions(
return ImportedCount;
}
-static bool doImportingForModule(
+static bool doImportingForModuleForTest(
Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing) {
if (SummaryFile.empty())
@@ -1481,11 +1764,11 @@ static bool doImportingForModule(
// when testing distributed backend handling via the opt tool, when
// we have distributed indexes containing exactly the summaries to import.
if (ImportAllIndex)
- ComputeCrossModuleImportForModuleFromIndex(M.getModuleIdentifier(), *Index,
- ImportList);
+ ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
+ *Index, ImportList);
else
- ComputeCrossModuleImportForModule(M.getModuleIdentifier(), isPrevailing,
- *Index, ImportList);
+ ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
+ isPrevailing, *Index, ImportList);
// Conservatively mark all internal values as promoted. This interface is
// only used when doing importing via the function importing pass. The pass
@@ -1533,7 +1816,7 @@ PreservedAnalyses FunctionImportPass::run(Module &M,
auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
return true;
};
- if (!doImportingForModule(M, isPrevailing))
+ if (!doImportingForModuleForTest(M, isPrevailing))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index ac5dbc7cfb2a..a4c12006ee24 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -5,45 +5,6 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// This specialises functions with constant parameters. Constant parameters
-// like function pointers and constant globals are propagated to the callee by
-// specializing the function. The main benefit of this pass at the moment is
-// that indirect calls are transformed into direct calls, which provides inline
-// opportunities that the inliner would not have been able to achieve. That's
-// why function specialisation is run before the inliner in the optimisation
-// pipeline; that is by design. Otherwise, we would only benefit from constant
-// passing, which is a valid use-case too, but hasn't been explored much in
-// terms of performance uplifts, cost-model and compile-time impact.
-//
-// Current limitations:
-// - It does not yet handle integer ranges. We do support "literal constants",
-// but that's off by default under an option.
-// - The cost-model could be further looked into (it mainly focuses on inlining
-// benefits),
-//
-// Ideas:
-// - With a function specialization attribute for arguments, we could have
-// a direct way to steer function specialization, avoiding the cost-model,
-// and thus control compile-times / code-size.
-//
-// Todos:
-// - Specializing recursive functions relies on running the transformation a
-// number of times, which is controlled by option
-// `func-specialization-max-iters`. Thus, increasing this value and the
-// number of iterations, will linearly increase the number of times recursive
-// functions get specialized, see also the discussion in
-// https://reviews.llvm.org/D106426 for details. Perhaps there is a
-// compile-time friendlier way to control/limit the number of specialisations
-// for recursive functions.
-// - Don't transform the function if function specialization does not trigger;
-// the SCCPSolver may make IR changes.
-//
-// References:
-// - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
-// it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
-//
-//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/FunctionSpecialization.h"
#include "llvm/ADT/Statistic.h"
@@ -78,11 +39,47 @@ static cl::opt<unsigned> MaxClones(
"The maximum number of clones allowed for a single function "
"specialization"));
+static cl::opt<unsigned>
+ MaxDiscoveryIterations("funcspec-max-discovery-iterations", cl::init(100),
+ cl::Hidden,
+ cl::desc("The maximum number of iterations allowed "
+ "when searching for transitive "
+ "phis"));
+
+static cl::opt<unsigned> MaxIncomingPhiValues(
+ "funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden,
+ cl::desc("The maximum number of incoming values a PHI node can have to be "
+ "considered during the specialization bonus estimation"));
+
+static cl::opt<unsigned> MaxBlockPredecessors(
+ "funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc(
+ "The maximum number of predecessors a basic block can have to be "
+ "considered during the estimation of dead code"));
+
static cl::opt<unsigned> MinFunctionSize(
- "funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc(
+ "funcspec-min-function-size", cl::init(300), cl::Hidden, cl::desc(
"Don't specialize functions that have less than this number of "
"instructions"));
+static cl::opt<unsigned> MaxCodeSizeGrowth(
+ "funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc(
+ "Maximum codesize growth allowed per function"));
+
+static cl::opt<unsigned> MinCodeSizeSavings(
+ "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc(
+ "Reject specializations whose codesize savings are less than this"
+ "much percent of the original function size"));
+
+static cl::opt<unsigned> MinLatencySavings(
+ "funcspec-min-latency-savings", cl::init(40), cl::Hidden,
+ cl::desc("Reject specializations whose latency savings are less than this"
+ "much percent of the original function size"));
+
+static cl::opt<unsigned> MinInliningBonus(
+ "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
+ "Reject specializations whose inlining bonus is less than this"
+ "much percent of the original function size"));
+
static cl::opt<bool> SpecializeOnAddress(
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
"Enable function specialization on the address of global values"));
@@ -96,26 +93,33 @@ static cl::opt<bool> SpecializeLiteralConstant(
"Enable specialization of functions that take a literal constant as an "
"argument"));
-// Estimates the instruction cost of all the basic blocks in \p WorkList.
-// The successors of such blocks are added to the list as long as they are
-// executable and they have a unique predecessor. \p WorkList represents
-// the basic blocks of a specialization which become dead once we replace
-// instructions that are known to be constants. The aim here is to estimate
-// the combination of size and latency savings in comparison to the non
-// specialized version of the function.
-static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
- ConstMap &KnownConstants, SCCPSolver &Solver,
- BlockFrequencyInfo &BFI,
- TargetTransformInfo &TTI) {
- Cost Bonus = 0;
+bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
+ DenseSet<BasicBlock *> &DeadBlocks) {
+ unsigned I = 0;
+ return all_of(predecessors(Succ),
+ [&I, BB, Succ, &DeadBlocks] (BasicBlock *Pred) {
+ return I++ < MaxBlockPredecessors &&
+ (Pred == BB || Pred == Succ || DeadBlocks.contains(Pred));
+ });
+}
+// Estimates the codesize savings due to dead code after constant propagation.
+// \p WorkList represents the basic blocks of a specialization which will
+// eventually become dead once we replace instructions that are known to be
+// constants. The successors of such blocks are added to the list as long as
+// the \p Solver found they were executable prior to specialization, and only
+// if all their predecessors are dead.
+Cost InstCostVisitor::estimateBasicBlocks(
+ SmallVectorImpl<BasicBlock *> &WorkList) {
+ Cost CodeSize = 0;
// Accumulate the instruction cost of each basic block weighted by frequency.
while (!WorkList.empty()) {
BasicBlock *BB = WorkList.pop_back_val();
- uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
- BFI.getEntryFreq();
- if (!Weight)
+ // These blocks are considered dead as far as the InstCostVisitor
+ // is concerned. They haven't been proven dead yet by the Solver,
+ // but may become if we propagate the specialization arguments.
+ if (!DeadBlocks.insert(BB).second)
continue;
for (Instruction &I : *BB) {
@@ -127,67 +131,105 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
if (KnownConstants.contains(&I))
continue;
- Bonus += Weight *
- TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+ Cost C = TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
- LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
- << " after user " << I << "\n");
+ LLVM_DEBUG(dbgs() << "FnSpecialization: CodeSize " << C
+ << " for user " << I << "\n");
+ CodeSize += C;
}
// Keep adding dead successors to the list as long as they are
- // executable and they have a unique predecessor.
+ // executable and only reachable from dead blocks.
for (BasicBlock *SuccBB : successors(BB))
- if (Solver.isBlockExecutable(SuccBB) &&
- SuccBB->getUniquePredecessor() == BB)
+ if (isBlockExecutable(SuccBB) &&
+ canEliminateSuccessor(BB, SuccBB, DeadBlocks))
WorkList.push_back(SuccBB);
}
- return Bonus;
+ return CodeSize;
}
static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
if (auto *C = dyn_cast<Constant>(V))
return C;
- if (auto It = KnownConstants.find(V); It != KnownConstants.end())
- return It->second;
- return nullptr;
+ return KnownConstants.lookup(V);
}
-Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
- // Cache the iterator before visiting.
- LastVisited = KnownConstants.insert({Use, C}).first;
+Bonus InstCostVisitor::getBonusFromPendingPHIs() {
+ Bonus B;
+ while (!PendingPHIs.empty()) {
+ Instruction *Phi = PendingPHIs.pop_back_val();
+ // The pending PHIs could have been proven dead by now.
+ if (isBlockExecutable(Phi->getParent()))
+ B += getUserBonus(Phi);
+ }
+ return B;
+}
+
+/// Compute a bonus for replacing argument \p A with constant \p C.
+Bonus InstCostVisitor::getSpecializationBonus(Argument *A, Constant *C) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
+ << C->getNameOrAsOperand() << "\n");
+ Bonus B;
+ for (auto *U : A->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (isBlockExecutable(UI->getParent()))
+ B += getUserBonus(UI, A, C);
- if (auto *I = dyn_cast<SwitchInst>(User))
- return estimateSwitchInst(*I);
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "
+ << B.CodeSize << ", Latency = " << B.Latency
+ << "} for argument " << *A << "\n");
+ return B;
+}
- if (auto *I = dyn_cast<BranchInst>(User))
- return estimateBranchInst(*I);
+Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
+ // We have already propagated a constant for this user.
+ if (KnownConstants.contains(User))
+ return {0, 0};
- C = visit(*User);
- if (!C)
- return 0;
+ // Cache the iterator before visiting.
+ LastVisited = Use ? KnownConstants.insert({Use, C}).first
+ : KnownConstants.end();
+
+ Cost CodeSize = 0;
+ if (auto *I = dyn_cast<SwitchInst>(User)) {
+ CodeSize = estimateSwitchInst(*I);
+ } else if (auto *I = dyn_cast<BranchInst>(User)) {
+ CodeSize = estimateBranchInst(*I);
+ } else {
+ C = visit(*User);
+ if (!C)
+ return {0, 0};
+ }
+ // Even though it doesn't make sense to bind switch and branch instructions
+ // with a constant, unlike any other instruction type, it prevents estimating
+ // their bonus multiple times.
KnownConstants.insert({User, C});
+ CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize);
+
uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
- BFI.getEntryFreq();
- if (!Weight)
- return 0;
+ BFI.getEntryFreq().getFrequency();
- Cost Bonus = Weight *
- TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency);
+ Cost Latency = Weight *
+ TTI.getInstructionCost(User, TargetTransformInfo::TCK_Latency);
- LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
- << " for user " << *User << "\n");
+ LLVM_DEBUG(dbgs() << "FnSpecialization: {CodeSize = " << CodeSize
+ << ", Latency = " << Latency << "} for user "
+ << *User << "\n");
+ Bonus B(CodeSize, Latency);
for (auto *U : User->users())
if (auto *UI = dyn_cast<Instruction>(U))
- if (Solver.isBlockExecutable(UI->getParent()))
- Bonus += getUserBonus(UI, User, C);
+ if (UI != User && isBlockExecutable(UI->getParent()))
+ B += getUserBonus(UI, User, C);
- return Bonus;
+ return B;
}
Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
if (I.getCondition() != LastVisited->first)
return 0;
@@ -202,16 +244,17 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
SmallVector<BasicBlock *> WorkList;
for (const auto &Case : I.cases()) {
BasicBlock *BB = Case.getCaseSuccessor();
- if (BB == Succ || !Solver.isBlockExecutable(BB) ||
- BB->getUniquePredecessor() != I.getParent())
- continue;
- WorkList.push_back(BB);
+ if (BB != Succ && isBlockExecutable(BB) &&
+ canEliminateSuccessor(I.getParent(), BB, DeadBlocks))
+ WorkList.push_back(BB);
}
- return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI);
+ return estimateBasicBlocks(WorkList);
}
Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
if (I.getCondition() != LastVisited->first)
return 0;
@@ -219,14 +262,115 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
// Initialize the worklist with the dead successor as long as
// it is executable and has a unique predecessor.
SmallVector<BasicBlock *> WorkList;
- if (Solver.isBlockExecutable(Succ) &&
- Succ->getUniquePredecessor() == I.getParent())
+ if (isBlockExecutable(Succ) &&
+ canEliminateSuccessor(I.getParent(), Succ, DeadBlocks))
WorkList.push_back(Succ);
- return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI);
+ return estimateBasicBlocks(WorkList);
+}
+
+bool InstCostVisitor::discoverTransitivelyIncomingValues(
+ Constant *Const, PHINode *Root, DenseSet<PHINode *> &TransitivePHIs) {
+
+ SmallVector<PHINode *, 64> WorkList;
+ WorkList.push_back(Root);
+ unsigned Iter = 0;
+
+ while (!WorkList.empty()) {
+ PHINode *PN = WorkList.pop_back_val();
+
+ if (++Iter > MaxDiscoveryIterations ||
+ PN->getNumIncomingValues() > MaxIncomingPhiValues)
+ return false;
+
+ if (!TransitivePHIs.insert(PN).second)
+ continue;
+
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+ Value *V = PN->getIncomingValue(I);
+
+ // Disregard self-references and dead incoming values.
+ if (auto *Inst = dyn_cast<Instruction>(V))
+ if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
+ continue;
+
+ if (Constant *C = findConstantFor(V, KnownConstants)) {
+ // Not all incoming values are the same constant. Bail immediately.
+ if (C != Const)
+ return false;
+ continue;
+ }
+
+ if (auto *Phi = dyn_cast<PHINode>(V)) {
+ WorkList.push_back(Phi);
+ continue;
+ }
+
+ // We can't reason about anything else.
+ return false;
+ }
+ }
+ return true;
+}
+
+Constant *InstCostVisitor::visitPHINode(PHINode &I) {
+ if (I.getNumIncomingValues() > MaxIncomingPhiValues)
+ return nullptr;
+
+ bool Inserted = VisitedPHIs.insert(&I).second;
+ Constant *Const = nullptr;
+ bool HaveSeenIncomingPHI = false;
+
+ for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
+ Value *V = I.getIncomingValue(Idx);
+
+ // Disregard self-references and dead incoming values.
+ if (auto *Inst = dyn_cast<Instruction>(V))
+ if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
+ continue;
+
+ if (Constant *C = findConstantFor(V, KnownConstants)) {
+ if (!Const)
+ Const = C;
+ // Not all incoming values are the same constant. Bail immediately.
+ if (C != Const)
+ return nullptr;
+ continue;
+ }
+
+ if (Inserted) {
+ // First time we are seeing this phi. We will retry later, after
+ // all the constant arguments have been propagated. Bail for now.
+ PendingPHIs.push_back(&I);
+ return nullptr;
+ }
+
+ if (isa<PHINode>(V)) {
+ // Perhaps it is a Transitive Phi. We will confirm later.
+ HaveSeenIncomingPHI = true;
+ continue;
+ }
+
+ // We can't reason about anything else.
+ return nullptr;
+ }
+
+ if (!Const)
+ return nullptr;
+
+ if (!HaveSeenIncomingPHI)
+ return Const;
+
+ DenseSet<PHINode *> TransitivePHIs;
+ if (!discoverTransitivelyIncomingValues(Const, &I, TransitivePHIs))
+ return nullptr;
+
+ return Const;
}
Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
if (isGuaranteedNotToBeUndefOrPoison(LastVisited->second))
return LastVisited->second;
return nullptr;
@@ -253,6 +397,8 @@ Constant *InstCostVisitor::visitCallBase(CallBase &I) {
}
Constant *InstCostVisitor::visitLoadInst(LoadInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
if (isa<ConstantPointerNull>(LastVisited->second))
return nullptr;
return ConstantFoldLoadFromConstPtr(LastVisited->second, I.getType(), DL);
@@ -275,6 +421,8 @@ Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
}
Constant *InstCostVisitor::visitSelectInst(SelectInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
if (I.getCondition() != LastVisited->first)
return nullptr;
@@ -290,6 +438,8 @@ Constant *InstCostVisitor::visitCastInst(CastInst &I) {
}
Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
bool Swap = I.getOperand(1) == LastVisited->first;
Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
Constant *Other = findConstantFor(V, KnownConstants);
@@ -303,10 +453,14 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
}
Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
return ConstantFoldUnaryOpOperand(I.getOpcode(), LastVisited->second, DL);
}
Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
bool Swap = I.getOperand(1) == LastVisited->first;
Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
Constant *Other = findConstantFor(V, KnownConstants);
@@ -413,10 +567,7 @@ void FunctionSpecializer::promoteConstantStackValues(Function *F) {
Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
GlobalValue::InternalLinkage, ConstVal,
- "funcspec.arg");
- if (ArgOpType != ConstVal->getType())
- GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType);
-
+ "specialized.arg." + Twine(++NGlobals));
Call->setArgOperand(Idx, GV);
}
}
@@ -506,13 +657,18 @@ bool FunctionSpecializer::run() {
if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
continue;
+ int64_t Sz = *Metrics.NumInsts.getValue();
+ assert(Sz > 0 && "CodeSize should be positive");
+ // It is safe to down cast from int64_t, NumInsts is always positive.
+ unsigned FuncSize = static_cast<unsigned>(Sz);
+
LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
- << F.getName() << " is " << Metrics.NumInsts << "\n");
+ << F.getName() << " is " << FuncSize << "\n");
if (Inserted && Metrics.isRecursive)
promoteConstantStackValues(&F);
- if (!findSpecializations(&F, Metrics.NumInsts, AllSpecs, SM)) {
+ if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: No possible specializations found for "
<< F.getName() << "\n");
@@ -640,14 +796,15 @@ void FunctionSpecializer::removeDeadFunctions() {
/// Clone the function \p F and remove the ssa_copy intrinsics added by
/// the SCCPSolver in the cloned version.
-static Function *cloneCandidateFunction(Function *F) {
+static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
ValueToValueMapTy Mappings;
Function *Clone = CloneFunction(F, Mappings);
+ Clone->setName(F->getName() + ".specialized." + Twine(NSpecs));
removeSSACopy(*Clone);
return Clone;
}
-bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
+bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
SmallVectorImpl<Spec> &AllSpecs,
SpecMap &SM) {
// A mapping from a specialisation signature to the index of the respective
@@ -713,16 +870,48 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
AllSpecs[Index].CallSites.push_back(&CS);
} else {
// Calculate the specialisation gain.
- Cost Score = 0 - SpecCost;
+ Bonus B;
+ unsigned Score = 0;
InstCostVisitor Visitor = getInstCostVisitorFor(F);
- for (ArgInfo &A : S.Args)
- Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
+ for (ArgInfo &A : S.Args) {
+ B += Visitor.getSpecializationBonus(A.Formal, A.Actual);
+ Score += getInliningBonus(A.Formal, A.Actual);
+ }
+ B += Visitor.getBonusFromPendingPHIs();
+
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "
+ << B.CodeSize << ", Latency = " << B.Latency
+ << ", Inlining = " << Score << "}\n");
+
+ FunctionGrowth[F] += FuncSize - B.CodeSize;
+
+ auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
+ unsigned FuncGrowth) -> bool {
+ // No check required.
+ if (ForceSpecialization)
+ return true;
+ // Minimum inlining bonus.
+ if (Score > MinInliningBonus * FuncSize / 100)
+ return true;
+ // Minimum codesize savings.
+ if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100)
+ return false;
+ // Minimum latency savings.
+ if (B.Latency < MinLatencySavings * FuncSize / 100)
+ return false;
+ // Maximum codesize growth.
+ if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
+ return false;
+ return true;
+ };
// Discard unprofitable specialisations.
- if (!ForceSpecialization && Score <= 0)
+ if (!IsProfitable(B, Score, FuncSize, FunctionGrowth[F]))
continue;
// Create a new specialisation entry.
+ Score += std::max(B.CodeSize, B.Latency);
auto &Spec = AllSpecs.emplace_back(F, S, Score);
if (CS.getFunction() != F)
Spec.CallSites.push_back(&CS);
@@ -768,7 +957,7 @@ bool FunctionSpecializer::isCandidateFunction(Function *F) {
Function *FunctionSpecializer::createSpecialization(Function *F,
const SpecSig &S) {
- Function *Clone = cloneCandidateFunction(F);
+ Function *Clone = cloneCandidateFunction(F, Specializations.size() + 1);
// The original function does not neccessarily have internal linkage, but the
// clone must.
@@ -789,30 +978,14 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
return Clone;
}
-/// Compute a bonus for replacing argument \p A with constant \p C.
-Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
- InstCostVisitor &Visitor) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
- << C->getNameOrAsOperand() << "\n");
-
- Cost TotalCost = 0;
- for (auto *U : A->users())
- if (auto *UI = dyn_cast<Instruction>(U))
- if (Solver.isBlockExecutable(UI->getParent()))
- TotalCost += Visitor.getUserBonus(UI, A, C);
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus "
- << TotalCost << " for argument " << *A << "\n");
-
- // The below heuristic is only concerned with exposing inlining
- // opportunities via indirect call promotion. If the argument is not a
- // (potentially casted) function pointer, give up.
- //
- // TODO: Perhaps we should consider checking such inlining opportunities
- // while traversing the users of the specialization arguments ?
+/// Compute the inlining bonus for replacing argument \p A with constant \p C.
+/// The below heuristic is only concerned with exposing inlining
+/// opportunities via indirect call promotion. If the argument is not a
+/// (potentially casted) function pointer, give up.
+unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) {
Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
if (!CalledFunction)
- return TotalCost;
+ return 0;
// Get TTI for the called function (used for the inline cost).
auto &CalleeTTI = (GetTTI)(*CalledFunction);
@@ -822,7 +995,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
// calls to be promoted to direct calls. If the indirect call promotion
// would likely enable the called function to be inlined, specializing is a
// good idea.
- int Bonus = 0;
+ int InliningBonus = 0;
for (User *U : A->users()) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
@@ -849,15 +1022,15 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
// We clamp the bonus for this call to be between zero and the default
// threshold.
if (IC.isAlways())
- Bonus += Params.DefaultThreshold;
+ InliningBonus += Params.DefaultThreshold;
else if (IC.isVariable() && IC.getCostDelta() > 0)
- Bonus += IC.getCostDelta();
+ InliningBonus += IC.getCostDelta();
- LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << InliningBonus
<< " for user " << *U << "\n");
}
- return TotalCost + Bonus;
+ return InliningBonus > 0 ? static_cast<unsigned>(InliningBonus) : 0;
}
/// Determine if it is possible to specialise the function for constant values
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 8012e1e650a0..951372adcfa9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
@@ -390,7 +389,7 @@ static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts,
}
// Scalable types not currently supported.
- if (isa<ScalableVectorType>(Ty))
+ if (Ty->isScalableTy())
return false;
auto IsStored = [](Value *V, Constant *Initializer) {
@@ -930,25 +929,7 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
}
// Update users of the allocation to use the new global instead.
- BitCastInst *TheBC = nullptr;
- while (!CI->use_empty()) {
- Instruction *User = cast<Instruction>(CI->user_back());
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
- if (BCI->getType() == NewGV->getType()) {
- BCI->replaceAllUsesWith(NewGV);
- BCI->eraseFromParent();
- } else {
- BCI->setOperand(0, NewGV);
- }
- } else {
- if (!TheBC)
- TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
- User->replaceUsesOfWith(CI, TheBC);
- }
- }
-
- SmallSetVector<Constant *, 1> RepValues;
- RepValues.insert(NewGV);
+ CI->replaceAllUsesWith(NewGV);
// If there is a comparison against null, we will insert a global bool to
// keep track of whether the global was initialized yet or not.
@@ -980,9 +961,7 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
Use &LoadUse = *LI->use_begin();
ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
if (!ICI) {
- auto *CE = ConstantExpr::getBitCast(NewGV, LI->getType());
- RepValues.insert(CE);
- LoadUse.set(CE);
+ LoadUse.set(NewGV);
continue;
}
@@ -1028,8 +1007,7 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
// To further other optimizations, loop over all users of NewGV and try to
// constant prop them. This will promote GEP instructions with constant
// indices into GEP constant-exprs, which will allow global-opt to hack on it.
- for (auto *CE : RepValues)
- ConstantPropUsersOf(CE, DL, TLI);
+ ConstantPropUsersOf(NewGV, DL, TLI);
return NewGV;
}
@@ -1474,7 +1452,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
if (!GS.HasMultipleAccessingFunctions &&
GS.AccessingFunction &&
GV->getValueType()->isSingleValueType() &&
- GV->getType()->getAddressSpace() == 0 &&
+ GV->getType()->getAddressSpace() == DL.getAllocaAddrSpace() &&
!GV->isExternallyInitialized() &&
GS.AccessingFunction->doesNotRecurse() &&
isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
@@ -1584,7 +1562,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
GV->getAddressSpace());
NGV->takeName(GV);
NGV->copyAttributesFrom(GV);
- GV->replaceAllUsesWith(ConstantExpr::getBitCast(NGV, GV->getType()));
+ GV->replaceAllUsesWith(NGV);
GV->eraseFromParent();
GV = NGV;
}
@@ -1635,7 +1613,7 @@ processGlobal(GlobalValue &GV,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
- if (GV.getName().startswith("llvm."))
+ if (GV.getName().starts_with("llvm."))
return false;
GlobalStatus GS;
@@ -1885,12 +1863,9 @@ static void RemovePreallocated(Function *F) {
CB->eraseFromParent();
Builder.SetInsertPoint(PreallocatedSetup);
- auto *StackSave =
- Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave));
-
+ auto *StackSave = Builder.CreateStackSave();
Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction());
- Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
- StackSave);
+ Builder.CreateStackRestore(StackSave);
// Replace @llvm.call.preallocated.arg() with alloca.
// Cannot modify users() while iterating over it, so make a copy.
@@ -1917,10 +1892,8 @@ static void RemovePreallocated(Function *F) {
Builder.SetInsertPoint(InsertBefore);
auto *Alloca =
Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
- auto *BitCast = Builder.CreateBitCast(
- Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName());
- ArgAllocas[AllocArgIndex] = BitCast;
- AllocaReplacement = BitCast;
+ ArgAllocas[AllocArgIndex] = Alloca;
+ AllocaReplacement = Alloca;
}
UseCall->replaceAllUsesWith(AllocaReplacement);
@@ -2131,19 +2104,18 @@ static void setUsedInitializer(GlobalVariable &V,
const auto *VEPT = cast<PointerType>(VAT->getArrayElementType());
// Type of pointer to the array of pointers.
- PointerType *Int8PtrTy =
- Type::getInt8PtrTy(V.getContext(), VEPT->getAddressSpace());
+ PointerType *PtrTy =
+ PointerType::get(V.getContext(), VEPT->getAddressSpace());
SmallVector<Constant *, 8> UsedArray;
for (GlobalValue *GV : Init) {
- Constant *Cast =
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
+ Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, PtrTy);
UsedArray.push_back(Cast);
}
// Sort to get deterministic order.
array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
- ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
+ ArrayType *ATy = ArrayType::get(PtrTy, UsedArray.size());
Module *M = V.getParent();
V.removeFromParent();
@@ -2313,7 +2285,7 @@ OptimizeGlobalAliases(Module &M,
if (!hasUsesToReplace(J, Used, RenameTarget))
continue;
- J.replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J.getType()));
+ J.replaceAllUsesWith(Aliasee);
++NumAliasesResolved;
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index 599ace9ca79f..fabb3c5fb921 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -44,6 +44,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
@@ -86,6 +87,11 @@ static cl::opt<int> MaxParametersForSplit(
"hotcoldsplit-max-params", cl::init(4), cl::Hidden,
cl::desc("Maximum number of parameters for a split function"));
+static cl::opt<int> ColdBranchProbDenom(
+ "hotcoldsplit-cold-probability-denom", cl::init(100), cl::Hidden,
+ cl::desc("Divisor of cold branch probability."
+ "BranchProbability = 1/ColdBranchProbDenom"));
+
namespace {
// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.
@@ -102,6 +108,32 @@ bool blockEndsInUnreachable(const BasicBlock &BB) {
return !(isa<ReturnInst>(I) || isa<IndirectBrInst>(I));
}
+void analyzeProfMetadata(BasicBlock *BB,
+ BranchProbability ColdProbThresh,
+ SmallPtrSetImpl<BasicBlock *> &AnnotatedColdBlocks) {
+ // TODO: Handle branches with > 2 successors.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!CondBr)
+ return;
+
+ uint64_t TrueWt, FalseWt;
+ if (!extractBranchWeights(*CondBr, TrueWt, FalseWt))
+ return;
+
+ auto SumWt = TrueWt + FalseWt;
+ if (SumWt == 0)
+ return;
+
+ auto TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
+ auto FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
+
+ if (TrueProb <= ColdProbThresh)
+ AnnotatedColdBlocks.insert(CondBr->getSuccessor(0));
+
+ if (FalseProb <= ColdProbThresh)
+ AnnotatedColdBlocks.insert(CondBr->getSuccessor(1));
+}
+
bool unlikelyExecuted(BasicBlock &BB) {
// Exception handling blocks are unlikely executed.
if (BB.isEHPad() || isa<ResumeInst>(BB.getTerminator()))
@@ -183,6 +215,34 @@ bool HotColdSplitting::isFunctionCold(const Function &F) const {
return false;
}
+bool HotColdSplitting::isBasicBlockCold(BasicBlock *BB,
+ BranchProbability ColdProbThresh,
+ SmallPtrSetImpl<BasicBlock *> &ColdBlocks,
+ SmallPtrSetImpl<BasicBlock *> &AnnotatedColdBlocks,
+ BlockFrequencyInfo *BFI) const {
+ // This block is already part of some outlining region.
+ if (ColdBlocks.count(BB))
+ return true;
+
+ if (BFI) {
+ if (PSI->isColdBlock(BB, BFI))
+ return true;
+ } else {
+ // Find cold blocks of successors of BB during a reverse postorder traversal.
+ analyzeProfMetadata(BB, ColdProbThresh, AnnotatedColdBlocks);
+
+ // A statically cold BB would be known before it is visited
+ // because the prof-data of incoming edges are 'analyzed' as part of RPOT.
+ if (AnnotatedColdBlocks.count(BB))
+ return true;
+ }
+
+ if (EnableStaticAnalysis && unlikelyExecuted(*BB))
+ return true;
+
+ return false;
+}
+
// Returns false if the function should not be considered for hot-cold split
// optimization.
bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
@@ -565,6 +625,9 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
// The set of cold blocks.
SmallPtrSet<BasicBlock *, 4> ColdBlocks;
+ // Set of cold blocks obtained with RPOT.
+ SmallPtrSet<BasicBlock *, 4> AnnotatedColdBlocks;
+
// The worklist of non-intersecting regions left to outline.
SmallVector<OutliningRegion, 2> OutliningWorklist;
@@ -587,16 +650,15 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
TargetTransformInfo &TTI = GetTTI(F);
OptimizationRemarkEmitter &ORE = (*GetORE)(F);
AssumptionCache *AC = LookupAC(F);
+ auto ColdProbThresh = TTI.getPredictableBranchThreshold().getCompl();
+
+ if (ColdBranchProbDenom.getNumOccurrences())
+ ColdProbThresh = BranchProbability(1, ColdBranchProbDenom.getValue());
// Find all cold regions.
for (BasicBlock *BB : RPOT) {
- // This block is already part of some outlining region.
- if (ColdBlocks.count(BB))
- continue;
-
- bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) ||
- (EnableStaticAnalysis && unlikelyExecuted(*BB));
- if (!Cold)
+ if (!isBasicBlockCold(BB, ColdProbThresh, ColdBlocks, AnnotatedColdBlocks,
+ BFI))
continue;
LLVM_DEBUG({
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
index e258299c6a4c..a6e19df7c5f1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -155,7 +155,7 @@ struct OutlinableGroup {
/// \param TargetBB - the BasicBlock to put Instruction into.
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
for (Instruction &I : llvm::make_early_inc_range(SourceBB))
- I.moveBefore(TargetBB, TargetBB.end());
+ I.moveBeforePreserving(TargetBB, TargetBB.end());
}
/// A function to sort the keys of \p Map, which must be a mapping of constant
@@ -198,7 +198,7 @@ Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
BasicBlock *
OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other,
BasicBlock *BB) {
- Instruction *FirstNonPHI = BB->getFirstNonPHI();
+ Instruction *FirstNonPHI = BB->getFirstNonPHIOrDbg();
assert(FirstNonPHI && "block is empty?");
Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
if (!CorrespondingVal)
@@ -557,7 +557,7 @@ collectRegionsConstants(OutlinableRegion &Region,
// Iterate over the operands in an instruction. If the global value number,
// assigned by the IRSimilarityCandidate, has been seen before, we check if
- // the the number has been found to be not the same value in each instance.
+ // the number has been found to be not the same value in each instance.
for (Value *V : ID.OperVals) {
std::optional<unsigned> GVNOpt = C.getGVN(V);
assert(GVNOpt && "Expected a GVN for operand?");
@@ -766,7 +766,7 @@ static void moveFunctionData(Function &Old, Function &New,
}
}
-/// Find the the constants that will need to be lifted into arguments
+/// Find the constants that will need to be lifted into arguments
/// as they are not the same in each instance of the region.
///
/// \param [in] C - The IRSimilarityCandidate containing the region we are
@@ -1346,7 +1346,7 @@ findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region,
// the output, so we add a pointer type to the argument types of the overall
// function to handle this output and create a mapping to it.
if (!TypeFound) {
- Group.ArgumentTypes.push_back(Output->getType()->getPointerTo(
+ Group.ArgumentTypes.push_back(PointerType::get(Output->getContext(),
M.getDataLayout().getAllocaAddrSpace()));
// Mark the new pointer type as the last value in the aggregate argument
// list.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
index 3e00aebce372..a9747aebf67b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/Inliner.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
@@ -63,7 +62,6 @@
#include <cassert>
#include <functional>
#include <utility>
-#include <vector>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 9b4b3efd7283..733f290b1bc9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -381,8 +381,7 @@ struct ScopedSaveAliaseesAndUsed {
appendToCompilerUsed(M, CompilerUsed);
for (auto P : FunctionAliases)
- P.first->setAliasee(
- ConstantExpr::getBitCast(P.second, P.first->getType()));
+ P.first->setAliasee(P.second);
for (auto P : ResolverIFuncs) {
// This does not preserve pointer casts that may have been stripped by the
@@ -411,16 +410,19 @@ class LowerTypeTestsModule {
// selectJumpTableArmEncoding may decide to use Thumb in either case.
bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false;
+ // Cache variable used by hasBranchTargetEnforcement().
+ int HasBranchTargetEnforcement = -1;
+
// The jump table type we ended up deciding on. (Usually the same as
// Arch, except that 'arm' and 'thumb' are often interchangeable.)
Triple::ArchType JumpTableArch = Triple::UnknownArch;
IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
- PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ PointerType *Int8PtrTy = PointerType::getUnqual(M.getContext());
ArrayType *Int8Arr0Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), 0);
IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());
- PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty);
+ PointerType *Int32PtrTy = PointerType::getUnqual(M.getContext());
IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());
IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext(), 0);
@@ -492,6 +494,7 @@ class LowerTypeTestsModule {
ArrayRef<GlobalTypeMember *> Globals);
Triple::ArchType
selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions);
+ bool hasBranchTargetEnforcement();
unsigned getJumpTableEntrySize();
Type *getJumpTableEntryType();
void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS,
@@ -755,9 +758,9 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
// also conveniently gives us a bit offset to use during the load from
// the bitset.
Value *OffsetSHR =
- B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy));
+ B.CreateLShr(PtrOffset, B.CreateZExt(TIL.AlignLog2, IntPtrTy));
Value *OffsetSHL = B.CreateShl(
- PtrOffset, ConstantExpr::getZExt(
+ PtrOffset, B.CreateZExt(
ConstantExpr::getSub(
ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)),
TIL.AlignLog2),
@@ -962,7 +965,6 @@ LowerTypeTestsModule::importTypeId(StringRef TypeId) {
Int8Arr0Ty);
if (auto *GV = dyn_cast<GlobalVariable>(C))
GV->setVisibility(GlobalValue::HiddenVisibility);
- C = ConstantExpr::getBitCast(C, Int8PtrTy);
return C;
};
@@ -1100,15 +1102,13 @@ void LowerTypeTestsModule::importFunction(
replaceCfiUses(F, FDecl, isJumpTableCanonical);
// Set visibility late because it's used in replaceCfiUses() to determine
- // whether uses need to to be replaced.
+ // whether uses need to be replaced.
F->setVisibility(Visibility);
}
void LowerTypeTestsModule::lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
- CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy);
-
// For each type identifier in this disjoint set...
for (Metadata *TypeId : TypeIds) {
// Build the bitset.
@@ -1196,6 +1196,20 @@ static const unsigned kARMJumpTableEntrySize = 4;
static const unsigned kARMBTIJumpTableEntrySize = 8;
static const unsigned kARMv6MJumpTableEntrySize = 16;
static const unsigned kRISCVJumpTableEntrySize = 8;
+static const unsigned kLOONGARCH64JumpTableEntrySize = 8;
+
+bool LowerTypeTestsModule::hasBranchTargetEnforcement() {
+ if (HasBranchTargetEnforcement == -1) {
+ // First time this query has been called. Find out the answer by checking
+ // the module flags.
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("branch-target-enforcement")))
+ HasBranchTargetEnforcement = (BTE->getZExtValue() != 0);
+ else
+ HasBranchTargetEnforcement = 0;
+ }
+ return HasBranchTargetEnforcement;
+}
unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
switch (JumpTableArch) {
@@ -1209,19 +1223,22 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
case Triple::arm:
return kARMJumpTableEntrySize;
case Triple::thumb:
- if (CanUseThumbBWJumpTable)
+ if (CanUseThumbBWJumpTable) {
+ if (hasBranchTargetEnforcement())
+ return kARMBTIJumpTableEntrySize;
return kARMJumpTableEntrySize;
- else
+ } else {
return kARMv6MJumpTableEntrySize;
+ }
case Triple::aarch64:
- if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
- M.getModuleFlag("branch-target-enforcement")))
- if (BTE->getZExtValue())
- return kARMBTIJumpTableEntrySize;
+ if (hasBranchTargetEnforcement())
+ return kARMBTIJumpTableEntrySize;
return kARMJumpTableEntrySize;
case Triple::riscv32:
case Triple::riscv64:
return kRISCVJumpTableEntrySize;
+ case Triple::loongarch64:
+ return kLOONGARCH64JumpTableEntrySize;
default:
report_fatal_error("Unsupported architecture for jump tables");
}
@@ -1251,10 +1268,8 @@ void LowerTypeTestsModule::createJumpTableEntry(
} else if (JumpTableArch == Triple::arm) {
AsmOS << "b $" << ArgIndex << "\n";
} else if (JumpTableArch == Triple::aarch64) {
- if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
- Dest->getParent()->getModuleFlag("branch-target-enforcement")))
- if (BTE->getZExtValue())
- AsmOS << "bti c\n";
+ if (hasBranchTargetEnforcement())
+ AsmOS << "bti c\n";
AsmOS << "b $" << ArgIndex << "\n";
} else if (JumpTableArch == Triple::thumb) {
if (!CanUseThumbBWJumpTable) {
@@ -1281,11 +1296,16 @@ void LowerTypeTestsModule::createJumpTableEntry(
<< ".balign 4\n"
<< "1: .word $" << ArgIndex << " - (0b + 4)\n";
} else {
+ if (hasBranchTargetEnforcement())
+ AsmOS << "bti\n";
AsmOS << "b.w $" << ArgIndex << "\n";
}
} else if (JumpTableArch == Triple::riscv32 ||
JumpTableArch == Triple::riscv64) {
AsmOS << "tail $" << ArgIndex << "@plt\n";
+ } else if (JumpTableArch == Triple::loongarch64) {
+ AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n"
+ << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n";
} else {
report_fatal_error("Unsupported architecture for jump tables");
}
@@ -1304,7 +1324,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctions(
ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
Arch == Triple::thumb || Arch == Triple::aarch64 ||
- Arch == Triple::riscv32 || Arch == Triple::riscv64)
+ Arch == Triple::riscv32 || Arch == Triple::riscv64 ||
+ Arch == Triple::loongarch64)
buildBitSetsFromFunctionsNative(TypeIds, Functions);
else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
buildBitSetsFromFunctionsWASM(TypeIds, Functions);
@@ -1446,9 +1467,19 @@ void LowerTypeTestsModule::createJumpTable(
SmallVector<Value *, 16> AsmArgs;
AsmArgs.reserve(Functions.size() * 2);
- for (GlobalTypeMember *GTM : Functions)
+ // Check if all entries have the NoUnwind attribute.
+ // If all entries have it, we can safely mark the
+ // cfi.jumptable as NoUnwind, otherwise, direct calls
+ // to the jump table will not handle exceptions properly
+ bool areAllEntriesNounwind = true;
+ for (GlobalTypeMember *GTM : Functions) {
+ if (!llvm::cast<llvm::Function>(GTM->getGlobal())
+ ->hasFnAttribute(llvm::Attribute::NoUnwind)) {
+ areAllEntriesNounwind = false;
+ }
createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs,
cast<Function>(GTM->getGlobal()));
+ }
// Align the whole table by entry size.
F->setAlignment(Align(getJumpTableEntrySize()));
@@ -1461,17 +1492,23 @@ void LowerTypeTestsModule::createJumpTable(
if (JumpTableArch == Triple::arm)
F->addFnAttr("target-features", "-thumb-mode");
if (JumpTableArch == Triple::thumb) {
- F->addFnAttr("target-features", "+thumb-mode");
- if (CanUseThumbBWJumpTable) {
- // Thumb jump table assembly needs Thumb2. The following attribute is
- // added by Clang for -march=armv7.
- F->addFnAttr("target-cpu", "cortex-a8");
+ if (hasBranchTargetEnforcement()) {
+ // If we're generating a Thumb jump table with BTI, add a target-features
+ // setting to ensure BTI can be assembled.
+ F->addFnAttr("target-features", "+thumb-mode,+pacbti");
+ } else {
+ F->addFnAttr("target-features", "+thumb-mode");
+ if (CanUseThumbBWJumpTable) {
+ // Thumb jump table assembly needs Thumb2. The following attribute is
+ // added by Clang for -march=armv7.
+ F->addFnAttr("target-cpu", "cortex-a8");
+ }
}
}
// When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI
// for the function to avoid double BTI. This is a no-op without
// -mbranch-protection=.
- if (JumpTableArch == Triple::aarch64) {
+ if (JumpTableArch == Triple::aarch64 || JumpTableArch == Triple::thumb) {
F->addFnAttr("branch-target-enforcement", "false");
F->addFnAttr("sign-return-address", "none");
}
@@ -1485,8 +1522,13 @@ void LowerTypeTestsModule::createJumpTable(
// -fcf-protection=.
if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64)
F->addFnAttr(Attribute::NoCfCheck);
- // Make sure we don't emit .eh_frame for this function.
- F->addFnAttr(Attribute::NoUnwind);
+
+ // Make sure we don't emit .eh_frame for this function if it isn't needed.
+ if (areAllEntriesNounwind)
+ F->addFnAttr(Attribute::NoUnwind);
+
+ // Make sure we do not inline any calls to the cfi.jumptable.
+ F->addFnAttr(Attribute::NoInline);
BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F);
IRBuilder<> IRB(BB);
@@ -1618,12 +1660,10 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
Function *F = cast<Function>(Functions[I]->getGlobal());
bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical();
- Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
- ConstantExpr::getInBoundsGetElementPtr(
- JumpTableType, JumpTable,
- ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
- ConstantInt::get(IntPtrTy, I)}),
- F->getType());
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getInBoundsGetElementPtr(
+ JumpTableType, JumpTable,
+ ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, I)});
const bool IsExported = Functions[I]->isExported();
if (!IsJumpTableCanonical) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index f835fb26fcb8..70a3f3067d9d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -104,11 +104,13 @@ static cl::opt<std::string> MemProfImportSummary(
cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
cl::Hidden);
+namespace llvm {
// Indicate we are linking with an allocator that supports hot/cold operator
// new interfaces.
cl::opt<bool> SupportsHotColdNew(
"supports-hot-cold-new", cl::init(false), cl::Hidden,
cl::desc("Linking with hot/cold operator new interfaces"));
+} // namespace llvm
namespace {
/// CRTP base for graphs built from either IR or ThinLTO summary index.
@@ -791,11 +793,10 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
eraseCalleeEdge(const ContextEdge *Edge) {
- auto EI =
- std::find_if(CalleeEdges.begin(), CalleeEdges.end(),
- [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) {
- return CalleeEdge.get() == Edge;
- });
+ auto EI = llvm::find_if(
+ CalleeEdges, [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) {
+ return CalleeEdge.get() == Edge;
+ });
assert(EI != CalleeEdges.end());
CalleeEdges.erase(EI);
}
@@ -803,11 +804,10 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
eraseCallerEdge(const ContextEdge *Edge) {
- auto EI =
- std::find_if(CallerEdges.begin(), CallerEdges.end(),
- [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) {
- return CallerEdge.get() == Edge;
- });
+ auto EI = llvm::find_if(
+ CallerEdges, [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) {
+ return CallerEdge.get() == Edge;
+ });
assert(EI != CallerEdges.end());
CallerEdges.erase(EI);
}
@@ -2093,8 +2093,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
for (auto &Edge : CallerEdges) {
// Skip any that have been removed by an earlier recursive call.
if (Edge->Callee == nullptr && Edge->Caller == nullptr) {
- assert(!std::count(Node->CallerEdges.begin(), Node->CallerEdges.end(),
- Edge));
+ assert(!llvm::count(Node->CallerEdges, Edge));
continue;
}
// Ignore any caller we previously visited via another edge.
@@ -2985,6 +2984,21 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
if (!mayHaveMemprofSummary(CB))
continue;
+ auto *CalledValue = CB->getCalledOperand();
+ auto *CalledFunction = CB->getCalledFunction();
+ if (CalledValue && !CalledFunction) {
+ CalledValue = CalledValue->stripPointerCasts();
+ // Stripping pointer casts can reveal a called function.
+ CalledFunction = dyn_cast<Function>(CalledValue);
+ }
+ // Check if this is an alias to a function. If so, get the
+ // called aliasee for the checks below.
+ if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
+ assert(!CalledFunction &&
+ "Expected null called function in callsite for alias");
+ CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
+ }
+
CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
I.getMetadata(LLVMContext::MD_callsite));
auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
@@ -3116,13 +3130,13 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
// Should have skipped indirect calls via mayHaveMemprofSummary.
- assert(CB->getCalledFunction());
- assert(!IsMemProfClone(*CB->getCalledFunction()));
+ assert(CalledFunction);
+ assert(!IsMemProfClone(*CalledFunction));
// Update the calls per the summary info.
// Save orig name since it gets updated in the first iteration
// below.
- auto CalleeOrigName = CB->getCalledFunction()->getName();
+ auto CalleeOrigName = CalledFunction->getName();
for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
// Do nothing if this version calls the original version of its
// callee.
@@ -3130,7 +3144,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
continue;
auto NewF = M.getOrInsertFunction(
getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
- CB->getCalledFunction()->getFunctionType());
+ CalledFunction->getFunctionType());
CallBase *CBClone;
// Copy 0 is the original function.
if (!J)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index feda5d6459cb..c8c011d94e4a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -107,6 +107,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/StructuralHash.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -171,15 +172,14 @@ namespace {
class FunctionNode {
mutable AssertingVH<Function> F;
- FunctionComparator::FunctionHash Hash;
+ IRHash Hash;
public:
// Note the hash is recalculated potentially multiple times, but it is cheap.
- FunctionNode(Function *F)
- : F(F), Hash(FunctionComparator::functionHash(*F)) {}
+ FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
Function *getFunc() const { return F; }
- FunctionComparator::FunctionHash getHash() const { return Hash; }
+ IRHash getHash() const { return Hash; }
/// Replace the reference to the function F by the function G, assuming their
/// implementations are equal.
@@ -375,9 +375,32 @@ bool MergeFunctions::doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist) {
}
#endif
+/// Check whether \p F has an intrinsic which references
+/// distinct metadata as an operand. The most common
+/// instance of this would be CFI checks for function-local types.
+static bool hasDistinctMetadataIntrinsic(const Function &F) {
+ for (const BasicBlock &BB : F) {
+ for (const Instruction &I : BB.instructionsWithoutDebug()) {
+ if (!isa<IntrinsicInst>(&I))
+ continue;
+
+ for (Value *Op : I.operands()) {
+ auto *MDL = dyn_cast<MetadataAsValue>(Op);
+ if (!MDL)
+ continue;
+ if (MDNode *N = dyn_cast<MDNode>(MDL->getMetadata()))
+ if (N->isDistinct())
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/// Check whether \p F is eligible for function merging.
static bool isEligibleForMerging(Function &F) {
- return !F.isDeclaration() && !F.hasAvailableExternallyLinkage();
+ return !F.isDeclaration() && !F.hasAvailableExternallyLinkage() &&
+ !hasDistinctMetadataIntrinsic(F);
}
bool MergeFunctions::runOnModule(Module &M) {
@@ -390,11 +413,10 @@ bool MergeFunctions::runOnModule(Module &M) {
// All functions in the module, ordered by hash. Functions with a unique
// hash value are easily eliminated.
- std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
- HashedFuncs;
+ std::vector<std::pair<IRHash, Function *>> HashedFuncs;
for (Function &Func : M) {
if (isEligibleForMerging(Func)) {
- HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
+ HashedFuncs.push_back({StructuralHash(Func), &Func});
}
}
@@ -441,7 +463,6 @@ bool MergeFunctions::runOnModule(Module &M) {
// Replace direct callers of Old with New.
void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
- Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
for (Use &U : llvm::make_early_inc_range(Old->uses())) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
if (CB && CB->isCallee(&U)) {
@@ -450,7 +471,7 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
// type congruences in byval(), in which case we need to keep the byval
// type of the call-site, not the callee function.
remove(CB->getFunction());
- U.set(BitcastNew);
+ U.set(New);
}
}
}
@@ -632,7 +653,7 @@ static bool canCreateThunkFor(Function *F) {
// Don't merge tiny functions using a thunk, since it can just end up
// making the function larger.
if (F->size() == 1) {
- if (F->front().size() <= 2) {
+ if (F->front().sizeWithoutDebug() < 2) {
LLVM_DEBUG(dbgs() << "canCreateThunkFor: " << F->getName()
<< " is too small to bother creating a thunk for\n");
return false;
@@ -641,6 +662,13 @@ static bool canCreateThunkFor(Function *F) {
return true;
}
+/// Copy metadata from one function to another.
+static void copyMetadataIfPresent(Function *From, Function *To, StringRef Key) {
+ if (MDNode *MD = From->getMetadata(Key)) {
+ To->setMetadata(Key, MD);
+ }
+}
+
// Replace G with a simple tail call to bitcast(F). Also (unless
// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
// delete G. Under MergeFunctionsPDI, we use G itself for creating
@@ -719,6 +747,9 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
} else {
NewG->copyAttributesFrom(G);
NewG->takeName(G);
+ // Ensure CFI type metadata is propagated to the new function.
+ copyMetadataIfPresent(G, NewG, "type");
+ copyMetadataIfPresent(G, NewG, "kcfi_type");
removeUsers(G);
G->replaceAllUsesWith(NewG);
G->eraseFromParent();
@@ -741,10 +772,9 @@ static bool canCreateAliasFor(Function *F) {
// Replace G with an alias to F (deleting function G)
void MergeFunctions::writeAlias(Function *F, Function *G) {
- Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
PointerType *PtrType = G->getType();
auto *GA = GlobalAlias::create(G->getValueType(), PtrType->getAddressSpace(),
- G->getLinkage(), "", BitcastF, G->getParent());
+ G->getLinkage(), "", F, G->getParent());
const MaybeAlign FAlign = F->getAlign();
const MaybeAlign GAlign = G->getAlign();
@@ -795,6 +825,9 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
F->getAddressSpace(), "", F->getParent());
NewF->copyAttributesFrom(F);
NewF->takeName(F);
+ // Ensure CFI type metadata is propagated to the new function.
+ copyMetadataIfPresent(F, NewF, "type");
+ copyMetadataIfPresent(F, NewF, "kcfi_type");
removeUsers(F);
F->replaceAllUsesWith(NewF);
@@ -825,9 +858,8 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// to replace a key in ValueMap<GlobalValue *> with a non-global.
GlobalNumbers.erase(G);
// If G's address is not significant, replace it entirely.
- Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
removeUsers(G);
- G->replaceAllUsesWith(BitcastF);
+ G->replaceAllUsesWith(F);
} else {
// Redirect direct callers of G to F. (See note on MergeFunctionsPDI
// above).
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 588f3901e3cb..b2665161c090 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/BasicBlock.h"
@@ -42,6 +43,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -156,6 +158,8 @@ STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
"Number of OpenMP runtime function uses identified");
STATISTIC(NumOpenMPTargetRegionKernels,
"Number of OpenMP target region entry points (=kernels) identified");
+STATISTIC(NumNonOpenMPTargetRegionKernels,
+ "Number of non-OpenMP target region kernels identified");
STATISTIC(NumOpenMPTargetRegionKernelsSPMD,
"Number of OpenMP target region entry points (=kernels) executed in "
"SPMD-mode instead of generic-mode");
@@ -181,6 +185,92 @@ STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
static constexpr auto TAG = "[" DEBUG_TYPE "]";
#endif
+namespace KernelInfo {
+
+// struct ConfigurationEnvironmentTy {
+// uint8_t UseGenericStateMachine;
+// uint8_t MayUseNestedParallelism;
+// llvm::omp::OMPTgtExecModeFlags ExecMode;
+// int32_t MinThreads;
+// int32_t MaxThreads;
+// int32_t MinTeams;
+// int32_t MaxTeams;
+// };
+
+// struct DynamicEnvironmentTy {
+// uint16_t DebugIndentionLevel;
+// };
+
+// struct KernelEnvironmentTy {
+// ConfigurationEnvironmentTy Configuration;
+// IdentTy *Ident;
+// DynamicEnvironmentTy *DynamicEnv;
+// };
+
+#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \
+ constexpr const unsigned MEMBER##Idx = IDX;
+
+KERNEL_ENVIRONMENT_IDX(Configuration, 0)
+KERNEL_ENVIRONMENT_IDX(Ident, 1)
+
+#undef KERNEL_ENVIRONMENT_IDX
+
+#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \
+ constexpr const unsigned MEMBER##Idx = IDX;
+
+KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
+KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
+KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2)
+KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinThreads, 3)
+KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxThreads, 4)
+KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinTeams, 5)
+KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxTeams, 6)
+
+#undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX
+
+#define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE) \
+ RETURNTYPE *get##MEMBER##FromKernelEnvironment(ConstantStruct *KernelEnvC) { \
+ return cast<RETURNTYPE>(KernelEnvC->getAggregateElement(MEMBER##Idx)); \
+ }
+
+KERNEL_ENVIRONMENT_GETTER(Ident, Constant)
+KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct)
+
+#undef KERNEL_ENVIRONMENT_GETTER
+
+#define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER) \
+ ConstantInt *get##MEMBER##FromKernelEnvironment( \
+ ConstantStruct *KernelEnvC) { \
+ ConstantStruct *ConfigC = \
+ getConfigurationFromKernelEnvironment(KernelEnvC); \
+ return dyn_cast<ConstantInt>(ConfigC->getAggregateElement(MEMBER##Idx)); \
+ }
+
+KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine)
+KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism)
+KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode)
+KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinThreads)
+KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxThreads)
+KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinTeams)
+KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)
+
+#undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER
+
+GlobalVariable *
+getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) {
+ constexpr const int InitKernelEnvironmentArgNo = 0;
+ return cast<GlobalVariable>(
+ KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)
+ ->stripPointerCasts());
+}
+
+ConstantStruct *getKernelEnvironementFromKernelInitCB(CallBase *KernelInitCB) {
+ GlobalVariable *KernelEnvGV =
+ getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
+ return cast<ConstantStruct>(KernelEnvGV->getInitializer());
+}
+} // namespace KernelInfo
+
namespace {
struct AAHeapToShared;
@@ -196,6 +286,7 @@ struct OMPInformationCache : public InformationCache {
: InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
OpenMPPostLink(OpenMPPostLink) {
+ OMPBuilder.Config.IsTargetDevice = isOpenMPDevice(OMPBuilder.M);
OMPBuilder.initialize();
initializeRuntimeFunctions(M);
initializeInternalControlVars();
@@ -531,7 +622,7 @@ struct OMPInformationCache : public InformationCache {
for (Function &F : M) {
for (StringRef Prefix : {"__kmpc", "_ZN4ompx", "omp_"})
if (F.hasFnAttribute(Attribute::NoInline) &&
- F.getName().startswith(Prefix) &&
+ F.getName().starts_with(Prefix) &&
!F.hasFnAttribute(Attribute::OptimizeNone))
F.removeFnAttr(Attribute::NoInline);
}
@@ -595,7 +686,7 @@ struct KernelInfoState : AbstractState {
/// The parallel regions (identified by the outlined parallel functions) that
/// can be reached from the associated function.
- BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false>
+ BooleanStateWithPtrSetVector<CallBase, /* InsertInvalidates */ false>
ReachedKnownParallelRegions;
/// State to track what parallel region we might reach.
@@ -610,6 +701,10 @@ struct KernelInfoState : AbstractState {
/// one we abort as the kernel is malformed.
CallBase *KernelInitCB = nullptr;
+ /// The constant kernel environement as taken from and passed to
+ /// __kmpc_target_init.
+ ConstantStruct *KernelEnvC = nullptr;
+
/// The __kmpc_target_deinit call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
CallBase *KernelDeinitCB = nullptr;
@@ -651,6 +746,7 @@ struct KernelInfoState : AbstractState {
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
ReachedKnownParallelRegions.indicatePessimisticFixpoint();
ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
+ NestedParallelism = true;
return ChangeStatus::CHANGED;
}
@@ -680,6 +776,8 @@ struct KernelInfoState : AbstractState {
return false;
if (ParallelLevels != RHS.ParallelLevels)
return false;
+ if (NestedParallelism != RHS.NestedParallelism)
+ return false;
return true;
}
@@ -714,6 +812,12 @@ struct KernelInfoState : AbstractState {
"assumptions.");
KernelDeinitCB = KIS.KernelDeinitCB;
}
+ if (KIS.KernelEnvC) {
+ if (KernelEnvC && KernelEnvC != KIS.KernelEnvC)
+ llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
+ "assumptions.");
+ KernelEnvC = KIS.KernelEnvC;
+ }
SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
@@ -875,6 +979,9 @@ struct OpenMPOpt {
}
}
+ if (OMPInfoCache.OpenMPPostLink)
+ Changed |= removeRuntimeSymbols();
+
return Changed;
}
@@ -903,7 +1010,7 @@ struct OpenMPOpt {
/// Print OpenMP GPU kernels for testing.
void printKernels() const {
for (Function *F : SCC) {
- if (!omp::isKernel(*F))
+ if (!omp::isOpenMPKernel(*F))
continue;
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
@@ -1404,6 +1511,37 @@ private:
return Changed;
}
+ /// Tries to remove known runtime symbols that are optional from the module.
+ bool removeRuntimeSymbols() {
+ // The RPC client symbol is defined in `libc` and indicates that something
+ // required an RPC server. If its users were all optimized out then we can
+ // safely remove it.
+ // TODO: This should be somewhere more common in the future.
+ if (GlobalVariable *GV = M.getNamedGlobal("__llvm_libc_rpc_client")) {
+ if (!GV->getType()->isPointerTy())
+ return false;
+
+ Constant *C = GV->getInitializer();
+ if (!C)
+ return false;
+
+ // Check to see if the only user of the RPC client is the external handle.
+ GlobalVariable *Client = dyn_cast<GlobalVariable>(C->stripPointerCasts());
+ if (!Client || Client->getNumUses() > 1 ||
+ Client->user_back() != GV->getInitializer())
+ return false;
+
+ Client->replaceAllUsesWith(PoisonValue::get(Client->getType()));
+ Client->eraseFromParent();
+
+ GV->replaceAllUsesWith(PoisonValue::get(GV->getType()));
+ GV->eraseFromParent();
+
+ return true;
+ }
+ return false;
+ }
+
/// Tries to hide the latency of runtime calls that involve host to
/// device memory transfers by splitting them into their "issue" and "wait"
/// versions. The "issue" is moved upwards as much as possible. The "wait" is
@@ -1858,7 +1996,7 @@ private:
Function *F = I->getParent()->getParent();
auto &ORE = OREGetter(F);
- if (RemarkName.startswith("OMP"))
+ if (RemarkName.starts_with("OMP"))
ORE.emit([&]() {
return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I))
<< " [" << RemarkName << "]";
@@ -1874,7 +2012,7 @@ private:
RemarkCallBack &&RemarkCB) const {
auto &ORE = OREGetter(F);
- if (RemarkName.startswith("OMP"))
+ if (RemarkName.starts_with("OMP"))
ORE.emit([&]() {
return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F))
<< " [" << RemarkName << "]";
@@ -1944,7 +2082,7 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
// TODO: We should use an AA to create an (optimistic and callback
// call-aware) call graph. For now we stick to simple patterns that
// are less powerful, basically the worst fixpoint.
- if (isKernel(F)) {
+ if (isOpenMPKernel(F)) {
CachedKernel = Kernel(&F);
return *CachedKernel;
}
@@ -2535,6 +2673,17 @@ struct AAICVTrackerCallSiteReturned : AAICVTracker {
}
};
+/// Determines if \p BB exits the function unconditionally itself or reaches a
+/// block that does through only unique successors.
+static bool hasFunctionEndAsUniqueSuccessor(const BasicBlock *BB) {
+ if (succ_empty(BB))
+ return true;
+ const BasicBlock *const Successor = BB->getUniqueSuccessor();
+ if (!Successor)
+ return false;
+ return hasFunctionEndAsUniqueSuccessor(Successor);
+}
+
struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
: AAExecutionDomain(IRP, A) {}
@@ -2587,18 +2736,22 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
if (!ED.IsReachedFromAlignedBarrierOnly ||
ED.EncounteredNonLocalSideEffect)
return;
+ if (!ED.EncounteredAssumes.empty() && !A.isModulePass())
+ return;
- // We can remove this barrier, if it is one, or all aligned barriers
- // reaching the kernel end. In the latter case we can transitively work
- // our way back until we find a barrier that guards a side-effect if we
- // are dealing with the kernel end here.
+ // We can remove this barrier, if it is one, or aligned barriers reaching
+ // the kernel end (if CB is nullptr). Aligned barriers reaching the kernel
+ // end should only be removed if the kernel end is their unique successor;
+ // otherwise, they may have side-effects that aren't accounted for in the
+ // kernel end in their other successors. If those barriers have other
+ // barriers reaching them, those can be transitively removed as well as
+ // long as the kernel end is also their unique successor.
if (CB) {
DeletedBarriers.insert(CB);
A.deleteAfterManifest(*CB);
++NumBarriersEliminated;
Changed = ChangeStatus::CHANGED;
} else if (!ED.AlignedBarriers.empty()) {
- NumBarriersEliminated += ED.AlignedBarriers.size();
Changed = ChangeStatus::CHANGED;
SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(),
ED.AlignedBarriers.end());
@@ -2609,7 +2762,10 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
continue;
if (LastCB->getFunction() != getAnchorScope())
continue;
+ if (!hasFunctionEndAsUniqueSuccessor(LastCB->getParent()))
+ continue;
if (!DeletedBarriers.count(LastCB)) {
+ ++NumBarriersEliminated;
A.deleteAfterManifest(*LastCB);
continue;
}
@@ -2633,7 +2789,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
HandleAlignedBarrier(CB);
// Handle the "kernel end barrier" for kernels too.
- if (omp::isKernel(*getAnchorScope()))
+ if (omp::isOpenMPKernel(*getAnchorScope()))
HandleAlignedBarrier(nullptr);
return Changed;
@@ -2779,9 +2935,11 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
if (!CB)
return false;
- const int InitModeArgNo = 1;
- auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
- return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
+ ConstantStruct *KernelEnvC =
+ KernelInfo::getKernelEnvironementFromKernelInitCB(CB);
+ ConstantInt *ExecModeC =
+ KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
+ return ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC;
}
if (C->isZero()) {
@@ -2884,11 +3042,11 @@ bool AAExecutionDomainFunction::handleCallees(Attributor &A,
} else {
// We could not find all predecessors, so this is either a kernel or a
// function with external linkage (or with some other weird uses).
- if (omp::isKernel(*getAnchorScope())) {
+ if (omp::isOpenMPKernel(*getAnchorScope())) {
EntryBBED.IsExecutedByInitialThreadOnly = false;
EntryBBED.IsReachedFromAlignedBarrierOnly = true;
EntryBBED.EncounteredNonLocalSideEffect = false;
- ExitED.IsReachingAlignedBarrierOnly = true;
+ ExitED.IsReachingAlignedBarrierOnly = false;
} else {
EntryBBED.IsExecutedByInitialThreadOnly = false;
EntryBBED.IsReachedFromAlignedBarrierOnly = false;
@@ -2938,7 +3096,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
Function *F = getAnchorScope();
BasicBlock &EntryBB = F->getEntryBlock();
- bool IsKernel = omp::isKernel(*F);
+ bool IsKernel = omp::isOpenMPKernel(*F);
SmallVector<Instruction *> SyncInstWorklist;
for (auto &RIt : *RPOT) {
@@ -3063,7 +3221,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
if (EDAA && EDAA->getState().isValidState()) {
const auto &CalleeED = EDAA->getFunctionExecutionDomain();
ED.IsReachedFromAlignedBarrierOnly =
- CalleeED.IsReachedFromAlignedBarrierOnly;
+ CalleeED.IsReachedFromAlignedBarrierOnly;
AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly;
if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
ED.EncounteredNonLocalSideEffect |=
@@ -3442,6 +3600,10 @@ struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ /// The callee value is tracked beyond a simple stripPointerCasts, so we allow
+ /// unknown callees.
+ static bool requiresCalleeForCallBase() { return false; }
+
/// Statistics are tracked as part of manifest for now.
void trackStatistics() const override {}
@@ -3468,7 +3630,8 @@ struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
", #ParLevels: " +
(ParallelLevels.isValidState()
? std::to_string(ParallelLevels.size())
- : "<invalid>");
+ : "<invalid>") +
+ ", NestedPar: " + (NestedParallelism ? "yes" : "no");
}
/// Create an abstract attribute biew for the position \p IRP.
@@ -3500,6 +3663,33 @@ struct AAKernelInfoFunction : AAKernelInfo {
return GuardedInstructions;
}
+ void setConfigurationOfKernelEnvironment(ConstantStruct *ConfigC) {
+ Constant *NewKernelEnvC = ConstantFoldInsertValueInstruction(
+ KernelEnvC, ConfigC, {KernelInfo::ConfigurationIdx});
+ assert(NewKernelEnvC && "Failed to create new kernel environment");
+ KernelEnvC = cast<ConstantStruct>(NewKernelEnvC);
+ }
+
+#define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER) \
+ void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) { \
+ ConstantStruct *ConfigC = \
+ KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC); \
+ Constant *NewConfigC = ConstantFoldInsertValueInstruction( \
+ ConfigC, NewVal, {KernelInfo::MEMBER##Idx}); \
+ assert(NewConfigC && "Failed to create new configuration environment"); \
+ setConfigurationOfKernelEnvironment(cast<ConstantStruct>(NewConfigC)); \
+ }
+
+ KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine)
+ KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism)
+ KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode)
+ KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinThreads)
+ KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxThreads)
+ KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinTeams)
+ KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxTeams)
+
+#undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER
+
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// This is a high-level transform that might change the constant arguments
@@ -3548,61 +3738,73 @@ struct AAKernelInfoFunction : AAKernelInfo {
ReachingKernelEntries.insert(Fn);
IsKernelEntry = true;
- // For kernels we might need to initialize/finalize the IsSPMD state and
- // we need to register a simplification callback so that the Attributor
- // knows the constant arguments to __kmpc_target_init and
- // __kmpc_target_deinit might actually change.
-
- Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
- [&](const IRPosition &IRP, const AbstractAttribute *AA,
- bool &UsedAssumedInformation) -> std::optional<Value *> {
- return nullptr;
- };
-
- Attributor::SimplifictionCallbackTy ModeSimplifyCB =
- [&](const IRPosition &IRP, const AbstractAttribute *AA,
- bool &UsedAssumedInformation) -> std::optional<Value *> {
- // IRP represents the "SPMDCompatibilityTracker" argument of an
- // __kmpc_target_init or
- // __kmpc_target_deinit call. We will answer this one with the internal
- // state.
- if (!SPMDCompatibilityTracker.isValidState())
- return nullptr;
- if (!SPMDCompatibilityTracker.isAtFixpoint()) {
- if (AA)
- A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
+ KernelEnvC =
+ KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
+ GlobalVariable *KernelEnvGV =
+ KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
+
+ Attributor::GlobalVariableSimplifictionCallbackTy
+ KernelConfigurationSimplifyCB =
+ [&](const GlobalVariable &GV, const AbstractAttribute *AA,
+ bool &UsedAssumedInformation) -> std::optional<Constant *> {
+ if (!isAtFixpoint()) {
+ if (!AA)
+ return nullptr;
UsedAssumedInformation = true;
- } else {
- UsedAssumedInformation = false;
+ A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
}
- auto *Val = ConstantInt::getSigned(
- IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
- SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
- : OMP_TGT_EXEC_MODE_GENERIC);
- return Val;
+ return KernelEnvC;
};
- constexpr const int InitModeArgNo = 1;
- constexpr const int DeinitModeArgNo = 1;
- constexpr const int InitUseStateMachineArgNo = 2;
- A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
- StateMachineSimplifyCB);
- A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
- ModeSimplifyCB);
- A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
- ModeSimplifyCB);
+ A.registerGlobalVariableSimplificationCallback(
+ *KernelEnvGV, KernelConfigurationSimplifyCB);
// Check if we know we are in SPMD-mode already.
- ConstantInt *ModeArg =
- dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
- if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
+ ConstantInt *ExecModeC =
+ KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
+ ConstantInt *AssumedExecModeC = ConstantInt::get(
+ ExecModeC->getType(),
+ ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD);
+ if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
- // This is a generic region but SPMDization is disabled so stop tracking.
else if (DisableOpenMPOptSPMDization)
+ // This is a generic region but SPMDization is disabled so stop
+ // tracking.
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ else
+ setExecModeOfKernelEnvironment(AssumedExecModeC);
+
+ const Triple T(Fn->getParent()->getTargetTriple());
+ auto *Int32Ty = Type::getInt32Ty(Fn->getContext());
+ auto [MinThreads, MaxThreads] =
+ OpenMPIRBuilder::readThreadBoundsForKernel(T, *Fn);
+ if (MinThreads)
+ setMinThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinThreads));
+ if (MaxThreads)
+ setMaxThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxThreads));
+ auto [MinTeams, MaxTeams] =
+ OpenMPIRBuilder::readTeamBoundsForKernel(T, *Fn);
+ if (MinTeams)
+ setMinTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinTeams));
+ if (MaxTeams)
+ setMaxTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxTeams));
+
+ ConstantInt *MayUseNestedParallelismC =
+ KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC);
+ ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get(
+ MayUseNestedParallelismC->getType(), NestedParallelism);
+ setMayUseNestedParallelismOfKernelEnvironment(
+ AssumedMayUseNestedParallelismC);
+
+ if (!DisableOpenMPOptStateMachineRewrite) {
+ ConstantInt *UseGenericStateMachineC =
+ KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
+ KernelEnvC);
+ ConstantInt *AssumedUseGenericStateMachineC =
+ ConstantInt::get(UseGenericStateMachineC->getType(), false);
+ setUseGenericStateMachineOfKernelEnvironment(
+ AssumedUseGenericStateMachineC);
+ }
// Register virtual uses of functions we might need to preserve.
auto RegisterVirtualUse = [&](RuntimeFunction RFKind,
@@ -3703,22 +3905,32 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!KernelInitCB || !KernelDeinitCB)
return ChangeStatus::UNCHANGED;
- /// Insert nested Parallelism global variable
- Function *Kernel = getAnchorScope();
- Module &M = *Kernel->getParent();
- Type *Int8Ty = Type::getInt8Ty(M.getContext());
- auto *GV = new GlobalVariable(
- M, Int8Ty, /* isConstant */ true, GlobalValue::WeakAnyLinkage,
- ConstantInt::get(Int8Ty, NestedParallelism ? 1 : 0),
- Kernel->getName() + "_nested_parallelism");
- GV->setVisibility(GlobalValue::HiddenVisibility);
-
- // If we can we change the execution mode to SPMD-mode otherwise we build a
- // custom state machine.
ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ bool HasBuiltStateMachine = true;
if (!changeToSPMDMode(A, Changed)) {
if (!KernelInitCB->getCalledFunction()->isDeclaration())
- return buildCustomStateMachine(A);
+ HasBuiltStateMachine = buildCustomStateMachine(A, Changed);
+ else
+ HasBuiltStateMachine = false;
+ }
+
+ // We need to reset KernelEnvC if specific rewriting is not done.
+ ConstantStruct *ExistingKernelEnvC =
+ KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
+ ConstantInt *OldUseGenericStateMachineVal =
+ KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
+ ExistingKernelEnvC);
+ if (!HasBuiltStateMachine)
+ setUseGenericStateMachineOfKernelEnvironment(
+ OldUseGenericStateMachineVal);
+
+ // At last, update the KernelEnvc
+ GlobalVariable *KernelEnvGV =
+ KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
+ if (KernelEnvGV->getInitializer() != KernelEnvC) {
+ KernelEnvGV->setInitializer(KernelEnvC);
+ Changed = ChangeStatus::CHANGED;
}
return Changed;
@@ -3788,14 +4000,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Find escaping outputs from the guarded region to outside users and
// broadcast their values to them.
for (Instruction &I : *RegionStartBB) {
- SmallPtrSet<Instruction *, 4> OutsideUsers;
- for (User *Usr : I.users()) {
- Instruction &UsrI = *cast<Instruction>(Usr);
+ SmallVector<Use *, 4> OutsideUses;
+ for (Use &U : I.uses()) {
+ Instruction &UsrI = *cast<Instruction>(U.getUser());
if (UsrI.getParent() != RegionStartBB)
- OutsideUsers.insert(&UsrI);
+ OutsideUses.push_back(&U);
}
- if (OutsideUsers.empty())
+ if (OutsideUses.empty())
continue;
HasBroadcastValues = true;
@@ -3818,8 +4030,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
RegionBarrierBB->getTerminator());
// Emit a load instruction and replace uses of the output value.
- for (Instruction *UsrI : OutsideUsers)
- UsrI->replaceUsesOfWith(&I, LoadI);
+ for (Use *U : OutsideUses)
+ A.changeUseAfterManifest(*U, *LoadI);
}
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
@@ -4043,19 +4255,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
auto *CB = cast<CallBase>(Kernel->user_back());
Kernel = CB->getCaller();
}
- assert(omp::isKernel(*Kernel) && "Expected kernel function!");
+ assert(omp::isOpenMPKernel(*Kernel) && "Expected kernel function!");
// Check if the kernel is already in SPMD mode, if so, return success.
- GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
- (Kernel->getName() + "_exec_mode").str());
- assert(ExecMode && "Kernel without exec mode?");
- assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
-
- // Set the global exec mode flag to indicate SPMD-Generic mode.
- assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
- "ExecMode is not an integer!");
- const int8_t ExecModeVal =
- cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
+ ConstantStruct *ExistingKernelEnvC =
+ KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
+ auto *ExecModeC =
+ KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
+ const int8_t ExecModeVal = ExecModeC->getSExtValue();
if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
return true;
@@ -4073,27 +4280,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
// kernel is executed in.
assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
"Initially non-SPMD kernel has SPMD exec mode!");
- ExecMode->setInitializer(
- ConstantInt::get(ExecMode->getInitializer()->getType(),
- ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
-
- // Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
- const int InitModeArgNo = 1;
- const int DeinitModeArgNo = 1;
- const int InitUseStateMachineArgNo = 2;
-
- auto &Ctx = getAnchorValue().getContext();
- A.changeUseAfterManifest(
- KernelInitCB->getArgOperandUse(InitModeArgNo),
- *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
- OMP_TGT_EXEC_MODE_SPMD));
- A.changeUseAfterManifest(
- KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
- *ConstantInt::getBool(Ctx, false));
- A.changeUseAfterManifest(
- KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
- *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
- OMP_TGT_EXEC_MODE_SPMD));
+ setExecModeOfKernelEnvironment(ConstantInt::get(
+ ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
++NumOpenMPTargetRegionKernelsSPMD;
@@ -4104,46 +4292,47 @@ struct AAKernelInfoFunction : AAKernelInfo {
return true;
};
- ChangeStatus buildCustomStateMachine(Attributor &A) {
+ bool buildCustomStateMachine(Attributor &A, ChangeStatus &Changed) {
// If we have disabled state machine rewrites, don't make a custom one
if (DisableOpenMPOptStateMachineRewrite)
- return ChangeStatus::UNCHANGED;
+ return false;
// Don't rewrite the state machine if we are not in a valid state.
if (!ReachedKnownParallelRegions.isValidState())
- return ChangeStatus::UNCHANGED;
+ return false;
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
if (!OMPInfoCache.runtimeFnsAvailable(
{OMPRTL___kmpc_get_hardware_num_threads_in_block,
OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic,
OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
- return ChangeStatus::UNCHANGED;
+ return false;
- const int InitModeArgNo = 1;
- const int InitUseStateMachineArgNo = 2;
+ ConstantStruct *ExistingKernelEnvC =
+ KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
// Check if the current configuration is non-SPMD and generic state machine.
// If we already have SPMD mode or a custom state machine we do not need to
// go any further. If it is anything but a constant something is weird and
// we give up.
- ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
- KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
- ConstantInt *Mode =
- dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
+ ConstantInt *UseStateMachineC =
+ KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
+ ExistingKernelEnvC);
+ ConstantInt *ModeC =
+ KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
// If we are stuck with generic mode, try to create a custom device (=GPU)
// state machine which is specialized for the parallel regions that are
// reachable by the kernel.
- if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
- (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
- return ChangeStatus::UNCHANGED;
+ if (UseStateMachineC->isZero() ||
+ (ModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
+ return false;
+
+ Changed = ChangeStatus::CHANGED;
// If not SPMD mode, indicate we use a custom state machine now.
- auto &Ctx = getAnchorValue().getContext();
- auto *FalseVal = ConstantInt::getBool(Ctx, false);
- A.changeUseAfterManifest(
- KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);
+ setUseGenericStateMachineOfKernelEnvironment(
+ ConstantInt::get(UseStateMachineC->getType(), false));
// If we don't actually need a state machine we are done here. This can
// happen if there simply are no parallel regions. In the resulting kernel
@@ -4157,7 +4346,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
};
A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark);
- return ChangeStatus::CHANGED;
+ return true;
}
// Keep track in the statistics of our new shiny custom state machine.
@@ -4222,6 +4411,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// UserCodeEntryBB: // user code
// __kmpc_target_deinit(...)
//
+ auto &Ctx = getAnchorValue().getContext();
Function *Kernel = getAssociatedFunction();
assert(Kernel && "Expected an associated function!");
@@ -4292,7 +4482,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Create local storage for the work function pointer.
const DataLayout &DL = M.getDataLayout();
- Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
+ Type *VoidPtrTy = PointerType::getUnqual(Ctx);
Instruction *WorkFnAI =
new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
"worker.work_fn.addr", &Kernel->getEntryBlock().front());
@@ -4304,7 +4494,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
StateMachineBeginBB->end()),
DLoc));
- Value *Ident = KernelInitCB->getArgOperand(0);
+ Value *Ident = KernelInfo::getIdentFromKernelEnvironment(KernelEnvC);
Value *GTid = KernelInitCB;
FunctionCallee BarrierFn =
@@ -4337,9 +4527,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
FunctionType *ParallelRegionFnTy = FunctionType::get(
Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
false);
- Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
- WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast",
- StateMachineBeginBB);
Instruction *IsDone =
ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
@@ -4358,11 +4545,15 @@ struct AAKernelInfoFunction : AAKernelInfo {
Value *ZeroArg =
Constant::getNullValue(ParallelRegionFnTy->getParamType(0));
+ const unsigned int WrapperFunctionArgNo = 6;
+
// Now that we have most of the CFG skeleton it is time for the if-cascade
// that checks the function pointer we got from the runtime against the
// parallel regions we expect, if there are any.
for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
- auto *ParallelRegion = ReachedKnownParallelRegions[I];
+ auto *CB = ReachedKnownParallelRegions[I];
+ auto *ParallelRegion = dyn_cast<Function>(
+ CB->getArgOperand(WrapperFunctionArgNo)->stripPointerCasts());
BasicBlock *PRExecuteBB = BasicBlock::Create(
Ctx, "worker_state_machine.parallel_region.execute", Kernel,
StateMachineEndParallelBB);
@@ -4374,13 +4565,15 @@ struct AAKernelInfoFunction : AAKernelInfo {
BasicBlock *PRNextBB =
BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
Kernel, StateMachineEndParallelBB);
+ A.registerManifestAddedBasicBlock(*PRExecuteBB);
+ A.registerManifestAddedBasicBlock(*PRNextBB);
// Check if we need to compare the pointer at all or if we can just
// call the parallel region function.
Value *IsPR;
if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
Instruction *CmpI = ICmpInst::Create(
- ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
+ ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, ParallelRegion,
"worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
CmpI->setDebugLoc(DLoc);
IsPR = CmpI;
@@ -4400,7 +4593,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!ReachedUnknownParallelRegions.empty()) {
StateMachineIfCascadeCurrentBB->setName(
"worker_state_machine.parallel_region.fallback.execute");
- CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "",
+ CallInst::Create(ParallelRegionFnTy, WorkFn, {ZeroArg, GTid}, "",
StateMachineIfCascadeCurrentBB)
->setDebugLoc(DLoc);
}
@@ -4423,7 +4616,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
->setDebugLoc(DLoc);
- return ChangeStatus::CHANGED;
+ return true;
}
/// Fixpoint iteration update function. Will be called every time a dependence
@@ -4431,6 +4624,46 @@ struct AAKernelInfoFunction : AAKernelInfo {
ChangeStatus updateImpl(Attributor &A) override {
KernelInfoState StateBefore = getState();
+ // When we leave this function this RAII will make sure the member
+ // KernelEnvC is updated properly depending on the state. That member is
+ // used for simplification of values and needs to be up to date at all
+ // times.
+ struct UpdateKernelEnvCRAII {
+ AAKernelInfoFunction &AA;
+
+ UpdateKernelEnvCRAII(AAKernelInfoFunction &AA) : AA(AA) {}
+
+ ~UpdateKernelEnvCRAII() {
+ if (!AA.KernelEnvC)
+ return;
+
+ ConstantStruct *ExistingKernelEnvC =
+ KernelInfo::getKernelEnvironementFromKernelInitCB(AA.KernelInitCB);
+
+ if (!AA.isValidState()) {
+ AA.KernelEnvC = ExistingKernelEnvC;
+ return;
+ }
+
+ if (!AA.ReachedKnownParallelRegions.isValidState())
+ AA.setUseGenericStateMachineOfKernelEnvironment(
+ KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
+ ExistingKernelEnvC));
+
+ if (!AA.SPMDCompatibilityTracker.isValidState())
+ AA.setExecModeOfKernelEnvironment(
+ KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC));
+
+ ConstantInt *MayUseNestedParallelismC =
+ KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(
+ AA.KernelEnvC);
+ ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get(
+ MayUseNestedParallelismC->getType(), AA.NestedParallelism);
+ AA.setMayUseNestedParallelismOfKernelEnvironment(
+ NewMayUseNestedParallelismC);
+ }
+ } RAII(*this);
+
// Callback to check a read/write instruction.
auto CheckRWInst = [&](Instruction &I) {
// We handle calls later.
@@ -4634,15 +4867,13 @@ struct AAKernelInfoCallSite : AAKernelInfo {
AAKernelInfo::initialize(A);
CallBase &CB = cast<CallBase>(getAssociatedValue());
- Function *Callee = getAssociatedFunction();
-
auto *AssumptionAA = A.getAAFor<AAAssumptionInfo>(
*this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
// Check for SPMD-mode assumptions.
if (AssumptionAA && AssumptionAA->hasAssumption("ompx_spmd_amenable")) {
- SPMDCompatibilityTracker.indicateOptimisticFixpoint();
indicateOptimisticFixpoint();
+ return;
}
// First weed out calls we do not care about, that is readonly/readnone
@@ -4657,124 +4888,156 @@ struct AAKernelInfoCallSite : AAKernelInfo {
// we will handle them explicitly in the switch below. If it is not, we
// will use an AAKernelInfo object on the callee to gather information and
// merge that into the current state. The latter happens in the updateImpl.
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
- const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
- if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
- // Unknown caller or declarations are not analyzable, we give up.
- if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
-
- // Unknown callees might contain parallel regions, except if they have
- // an appropriate assumption attached.
- if (!AssumptionAA ||
- !(AssumptionAA->hasAssumption("omp_no_openmp") ||
- AssumptionAA->hasAssumption("omp_no_parallelism")))
- ReachedUnknownParallelRegions.insert(&CB);
-
- // If SPMDCompatibilityTracker is not fixed, we need to give up on the
- // idea we can run something unknown in SPMD-mode.
- if (!SPMDCompatibilityTracker.isAtFixpoint()) {
- SPMDCompatibilityTracker.indicatePessimisticFixpoint();
- SPMDCompatibilityTracker.insert(&CB);
- }
+ auto CheckCallee = [&](Function *Callee, unsigned NumCallees) {
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
+ if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
+ // Unknown caller or declarations are not analyzable, we give up.
+ if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
+
+ // Unknown callees might contain parallel regions, except if they have
+ // an appropriate assumption attached.
+ if (!AssumptionAA ||
+ !(AssumptionAA->hasAssumption("omp_no_openmp") ||
+ AssumptionAA->hasAssumption("omp_no_parallelism")))
+ ReachedUnknownParallelRegions.insert(&CB);
+
+ // If SPMDCompatibilityTracker is not fixed, we need to give up on the
+ // idea we can run something unknown in SPMD-mode.
+ if (!SPMDCompatibilityTracker.isAtFixpoint()) {
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ SPMDCompatibilityTracker.insert(&CB);
+ }
- // We have updated the state for this unknown call properly, there won't
- // be any change so we indicate a fixpoint.
- indicateOptimisticFixpoint();
+ // We have updated the state for this unknown call properly, there
+ // won't be any change so we indicate a fixpoint.
+ indicateOptimisticFixpoint();
+ }
+ // If the callee is known and can be used in IPO, we will update the
+ // state based on the callee state in updateImpl.
+ return;
+ }
+ if (NumCallees > 1) {
+ indicatePessimisticFixpoint();
+ return;
}
- // If the callee is known and can be used in IPO, we will update the state
- // based on the callee state in updateImpl.
- return;
- }
- const unsigned int WrapperFunctionArgNo = 6;
- RuntimeFunction RF = It->getSecond();
- switch (RF) {
- // All the functions we know are compatible with SPMD mode.
- case OMPRTL___kmpc_is_spmd_exec_mode:
- case OMPRTL___kmpc_distribute_static_fini:
- case OMPRTL___kmpc_for_static_fini:
- case OMPRTL___kmpc_global_thread_num:
- case OMPRTL___kmpc_get_hardware_num_threads_in_block:
- case OMPRTL___kmpc_get_hardware_num_blocks:
- case OMPRTL___kmpc_single:
- case OMPRTL___kmpc_end_single:
- case OMPRTL___kmpc_master:
- case OMPRTL___kmpc_end_master:
- case OMPRTL___kmpc_barrier:
- case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2:
- case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2:
- case OMPRTL___kmpc_nvptx_end_reduce_nowait:
- break;
- case OMPRTL___kmpc_distribute_static_init_4:
- case OMPRTL___kmpc_distribute_static_init_4u:
- case OMPRTL___kmpc_distribute_static_init_8:
- case OMPRTL___kmpc_distribute_static_init_8u:
- case OMPRTL___kmpc_for_static_init_4:
- case OMPRTL___kmpc_for_static_init_4u:
- case OMPRTL___kmpc_for_static_init_8:
- case OMPRTL___kmpc_for_static_init_8u: {
- // Check the schedule and allow static schedule in SPMD mode.
- unsigned ScheduleArgOpNo = 2;
- auto *ScheduleTypeCI =
- dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
- unsigned ScheduleTypeVal =
- ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
- switch (OMPScheduleType(ScheduleTypeVal)) {
- case OMPScheduleType::UnorderedStatic:
- case OMPScheduleType::UnorderedStaticChunked:
- case OMPScheduleType::OrderedDistribute:
- case OMPScheduleType::OrderedDistributeChunked:
+ RuntimeFunction RF = It->getSecond();
+ switch (RF) {
+ // All the functions we know are compatible with SPMD mode.
+ case OMPRTL___kmpc_is_spmd_exec_mode:
+ case OMPRTL___kmpc_distribute_static_fini:
+ case OMPRTL___kmpc_for_static_fini:
+ case OMPRTL___kmpc_global_thread_num:
+ case OMPRTL___kmpc_get_hardware_num_threads_in_block:
+ case OMPRTL___kmpc_get_hardware_num_blocks:
+ case OMPRTL___kmpc_single:
+ case OMPRTL___kmpc_end_single:
+ case OMPRTL___kmpc_master:
+ case OMPRTL___kmpc_end_master:
+ case OMPRTL___kmpc_barrier:
+ case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2:
+ case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2:
+ case OMPRTL___kmpc_error:
+ case OMPRTL___kmpc_flush:
+ case OMPRTL___kmpc_get_hardware_thread_id_in_block:
+ case OMPRTL___kmpc_get_warp_size:
+ case OMPRTL_omp_get_thread_num:
+ case OMPRTL_omp_get_num_threads:
+ case OMPRTL_omp_get_max_threads:
+ case OMPRTL_omp_in_parallel:
+ case OMPRTL_omp_get_dynamic:
+ case OMPRTL_omp_get_cancellation:
+ case OMPRTL_omp_get_nested:
+ case OMPRTL_omp_get_schedule:
+ case OMPRTL_omp_get_thread_limit:
+ case OMPRTL_omp_get_supported_active_levels:
+ case OMPRTL_omp_get_max_active_levels:
+ case OMPRTL_omp_get_level:
+ case OMPRTL_omp_get_ancestor_thread_num:
+ case OMPRTL_omp_get_team_size:
+ case OMPRTL_omp_get_active_level:
+ case OMPRTL_omp_in_final:
+ case OMPRTL_omp_get_proc_bind:
+ case OMPRTL_omp_get_num_places:
+ case OMPRTL_omp_get_num_procs:
+ case OMPRTL_omp_get_place_proc_ids:
+ case OMPRTL_omp_get_place_num:
+ case OMPRTL_omp_get_partition_num_places:
+ case OMPRTL_omp_get_partition_place_nums:
+ case OMPRTL_omp_get_wtime:
break;
- default:
+ case OMPRTL___kmpc_distribute_static_init_4:
+ case OMPRTL___kmpc_distribute_static_init_4u:
+ case OMPRTL___kmpc_distribute_static_init_8:
+ case OMPRTL___kmpc_distribute_static_init_8u:
+ case OMPRTL___kmpc_for_static_init_4:
+ case OMPRTL___kmpc_for_static_init_4u:
+ case OMPRTL___kmpc_for_static_init_8:
+ case OMPRTL___kmpc_for_static_init_8u: {
+ // Check the schedule and allow static schedule in SPMD mode.
+ unsigned ScheduleArgOpNo = 2;
+ auto *ScheduleTypeCI =
+ dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
+ unsigned ScheduleTypeVal =
+ ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
+ switch (OMPScheduleType(ScheduleTypeVal)) {
+ case OMPScheduleType::UnorderedStatic:
+ case OMPScheduleType::UnorderedStaticChunked:
+ case OMPScheduleType::OrderedDistribute:
+ case OMPScheduleType::OrderedDistributeChunked:
+ break;
+ default:
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ SPMDCompatibilityTracker.insert(&CB);
+ break;
+ };
+ } break;
+ case OMPRTL___kmpc_target_init:
+ KernelInitCB = &CB;
+ break;
+ case OMPRTL___kmpc_target_deinit:
+ KernelDeinitCB = &CB;
+ break;
+ case OMPRTL___kmpc_parallel_51:
+ if (!handleParallel51(A, CB))
+ indicatePessimisticFixpoint();
+ return;
+ case OMPRTL___kmpc_omp_task:
+ // We do not look into tasks right now, just give up.
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
+ ReachedUnknownParallelRegions.insert(&CB);
break;
- };
- } break;
- case OMPRTL___kmpc_target_init:
- KernelInitCB = &CB;
- break;
- case OMPRTL___kmpc_target_deinit:
- KernelDeinitCB = &CB;
- break;
- case OMPRTL___kmpc_parallel_51:
- if (auto *ParallelRegion = dyn_cast<Function>(
- CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) {
- ReachedKnownParallelRegions.insert(ParallelRegion);
- /// Check nested parallelism
- auto *FnAA = A.getAAFor<AAKernelInfo>(
- *this, IRPosition::function(*ParallelRegion), DepClassTy::OPTIONAL);
- NestedParallelism |= !FnAA || !FnAA->getState().isValidState() ||
- !FnAA->ReachedKnownParallelRegions.empty() ||
- !FnAA->ReachedUnknownParallelRegions.empty();
+ case OMPRTL___kmpc_alloc_shared:
+ case OMPRTL___kmpc_free_shared:
+ // Return without setting a fixpoint, to be resolved in updateImpl.
+ return;
+ default:
+ // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
+ // generally. However, they do not hide parallel regions.
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ SPMDCompatibilityTracker.insert(&CB);
break;
}
- // The condition above should usually get the parallel region function
- // pointer and record it. In the off chance it doesn't we assume the
- // worst.
- ReachedUnknownParallelRegions.insert(&CB);
- break;
- case OMPRTL___kmpc_omp_task:
- // We do not look into tasks right now, just give up.
- SPMDCompatibilityTracker.indicatePessimisticFixpoint();
- SPMDCompatibilityTracker.insert(&CB);
- ReachedUnknownParallelRegions.insert(&CB);
- break;
- case OMPRTL___kmpc_alloc_shared:
- case OMPRTL___kmpc_free_shared:
- // Return without setting a fixpoint, to be resolved in updateImpl.
+ // All other OpenMP runtime calls will not reach parallel regions so they
+ // can be safely ignored for now. Since it is a known OpenMP runtime call
+ // we have now modeled all effects and there is no need for any update.
+ indicateOptimisticFixpoint();
+ };
+
+ const auto *AACE =
+ A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::OPTIONAL);
+ if (!AACE || !AACE->getState().isValidState() || AACE->hasUnknownCallee()) {
+ CheckCallee(getAssociatedFunction(), 1);
return;
- default:
- // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
- // generally. However, they do not hide parallel regions.
- SPMDCompatibilityTracker.indicatePessimisticFixpoint();
- SPMDCompatibilityTracker.insert(&CB);
- break;
}
- // All other OpenMP runtime calls will not reach parallel regions so they
- // can be safely ignored for now. Since it is a known OpenMP runtime call we
- // have now modeled all effects and there is no need for any update.
- indicateOptimisticFixpoint();
+ const auto &OptimisticEdges = AACE->getOptimisticEdges();
+ for (auto *Callee : OptimisticEdges) {
+ CheckCallee(Callee, OptimisticEdges.size());
+ if (isAtFixpoint())
+ break;
+ }
}
ChangeStatus updateImpl(Attributor &A) override {
@@ -4782,62 +5045,115 @@ struct AAKernelInfoCallSite : AAKernelInfo {
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
-
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
- const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);
-
- // If F is not a runtime function, propagate the AAKernelInfo of the callee.
- if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
- const IRPosition &FnPos = IRPosition::function(*F);
- auto *FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
- if (!FnAA)
- return indicatePessimisticFixpoint();
- if (getState() == FnAA->getState())
- return ChangeStatus::UNCHANGED;
- getState() = FnAA->getState();
- return ChangeStatus::CHANGED;
- }
-
- // F is a runtime function that allocates or frees memory, check
- // AAHeapToStack and AAHeapToShared.
KernelInfoState StateBefore = getState();
- assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||
- It->getSecond() == OMPRTL___kmpc_free_shared) &&
- "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call");
-
- CallBase &CB = cast<CallBase>(getAssociatedValue());
- auto *HeapToStackAA = A.getAAFor<AAHeapToStack>(
- *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
- auto *HeapToSharedAA = A.getAAFor<AAHeapToShared>(
- *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
+ auto CheckCallee = [&](Function *F, int NumCallees) {
+ const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);
+
+ // If F is not a runtime function, propagate the AAKernelInfo of the
+ // callee.
+ if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto *FnAA =
+ A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
+ if (!FnAA)
+ return indicatePessimisticFixpoint();
+ if (getState() == FnAA->getState())
+ return ChangeStatus::UNCHANGED;
+ getState() = FnAA->getState();
+ return ChangeStatus::CHANGED;
+ }
+ if (NumCallees > 1)
+ return indicatePessimisticFixpoint();
- RuntimeFunction RF = It->getSecond();
+ CallBase &CB = cast<CallBase>(getAssociatedValue());
+ if (It->getSecond() == OMPRTL___kmpc_parallel_51) {
+ if (!handleParallel51(A, CB))
+ return indicatePessimisticFixpoint();
+ return StateBefore == getState() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
- switch (RF) {
- // If neither HeapToStack nor HeapToShared assume the call is removed,
- // assume SPMD incompatibility.
- case OMPRTL___kmpc_alloc_shared:
- if ((!HeapToStackAA || !HeapToStackAA->isAssumedHeapToStack(CB)) &&
- (!HeapToSharedAA || !HeapToSharedAA->isAssumedHeapToShared(CB)))
- SPMDCompatibilityTracker.insert(&CB);
- break;
- case OMPRTL___kmpc_free_shared:
- if ((!HeapToStackAA ||
- !HeapToStackAA->isAssumedHeapToStackRemovedFree(CB)) &&
- (!HeapToSharedAA ||
- !HeapToSharedAA->isAssumedHeapToSharedRemovedFree(CB)))
+ // F is a runtime function that allocates or frees memory, check
+ // AAHeapToStack and AAHeapToShared.
+ assert(
+ (It->getSecond() == OMPRTL___kmpc_alloc_shared ||
+ It->getSecond() == OMPRTL___kmpc_free_shared) &&
+ "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call");
+
+ auto *HeapToStackAA = A.getAAFor<AAHeapToStack>(
+ *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
+ auto *HeapToSharedAA = A.getAAFor<AAHeapToShared>(
+ *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
+
+ RuntimeFunction RF = It->getSecond();
+
+ switch (RF) {
+ // If neither HeapToStack nor HeapToShared assume the call is removed,
+ // assume SPMD incompatibility.
+ case OMPRTL___kmpc_alloc_shared:
+ if ((!HeapToStackAA || !HeapToStackAA->isAssumedHeapToStack(CB)) &&
+ (!HeapToSharedAA || !HeapToSharedAA->isAssumedHeapToShared(CB)))
+ SPMDCompatibilityTracker.insert(&CB);
+ break;
+ case OMPRTL___kmpc_free_shared:
+ if ((!HeapToStackAA ||
+ !HeapToStackAA->isAssumedHeapToStackRemovedFree(CB)) &&
+ (!HeapToSharedAA ||
+ !HeapToSharedAA->isAssumedHeapToSharedRemovedFree(CB)))
+ SPMDCompatibilityTracker.insert(&CB);
+ break;
+ default:
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
- break;
- default:
- SPMDCompatibilityTracker.indicatePessimisticFixpoint();
- SPMDCompatibilityTracker.insert(&CB);
+ }
+ return ChangeStatus::CHANGED;
+ };
+
+ const auto *AACE =
+ A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::OPTIONAL);
+ if (!AACE || !AACE->getState().isValidState() || AACE->hasUnknownCallee()) {
+ if (Function *F = getAssociatedFunction())
+ CheckCallee(F, /*NumCallees=*/1);
+ } else {
+ const auto &OptimisticEdges = AACE->getOptimisticEdges();
+ for (auto *Callee : OptimisticEdges) {
+ CheckCallee(Callee, OptimisticEdges.size());
+ if (isAtFixpoint())
+ break;
+ }
}
return StateBefore == getState() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
+
+ /// Deal with a __kmpc_parallel_51 call (\p CB). Returns true if the call was
+ /// handled, if a problem occurred, false is returned.
+ bool handleParallel51(Attributor &A, CallBase &CB) {
+ const unsigned int NonWrapperFunctionArgNo = 5;
+ const unsigned int WrapperFunctionArgNo = 6;
+ auto ParallelRegionOpArgNo = SPMDCompatibilityTracker.isAssumed()
+ ? NonWrapperFunctionArgNo
+ : WrapperFunctionArgNo;
+
+ auto *ParallelRegion = dyn_cast<Function>(
+ CB.getArgOperand(ParallelRegionOpArgNo)->stripPointerCasts());
+ if (!ParallelRegion)
+ return false;
+
+ ReachedKnownParallelRegions.insert(&CB);
+ /// Check nested parallelism
+ auto *FnAA = A.getAAFor<AAKernelInfo>(
+ *this, IRPosition::function(*ParallelRegion), DepClassTy::OPTIONAL);
+ NestedParallelism |= !FnAA || !FnAA->getState().isValidState() ||
+ !FnAA->ReachedKnownParallelRegions.empty() ||
+ !FnAA->ReachedKnownParallelRegions.isValidState() ||
+ !FnAA->ReachedUnknownParallelRegions.isValidState() ||
+ !FnAA->ReachedUnknownParallelRegions.empty();
+ return true;
+ }
};
struct AAFoldRuntimeCall
@@ -5251,6 +5567,11 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
UsedAssumedInformation, AA::Interprocedural);
continue;
}
+ if (auto *CI = dyn_cast<CallBase>(&I)) {
+ if (CI->isIndirectCall())
+ A.getOrCreateAAFor<AAIndirectCallInfo>(
+ IRPosition::callsite_function(*CI));
+ }
if (auto *SI = dyn_cast<StoreInst>(&I)) {
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
continue;
@@ -5569,7 +5890,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
return PreservedAnalyses::all();
}
-bool llvm::omp::isKernel(Function &Fn) { return Fn.hasFnAttribute("kernel"); }
+bool llvm::omp::isOpenMPKernel(Function &Fn) {
+ return Fn.hasFnAttribute("kernel");
+}
KernelSet llvm::omp::getDeviceKernels(Module &M) {
// TODO: Create a more cross-platform way of determining device kernels.
@@ -5591,10 +5914,13 @@ KernelSet llvm::omp::getDeviceKernels(Module &M) {
if (!KernelFn)
continue;
- assert(isKernel(*KernelFn) && "Inconsistent kernel function annotation");
- ++NumOpenMPTargetRegionKernels;
-
- Kernels.insert(KernelFn);
+ // We are only interested in OpenMP target regions. Others, such as kernels
+ // generated by CUDA but linked together, are not interesting to this pass.
+ if (isOpenMPKernel(*KernelFn)) {
+ ++NumOpenMPTargetRegionKernels;
+ Kernels.insert(KernelFn);
+ } else
+ ++NumNonOpenMPTargetRegionKernels;
}
return Kernels;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
index b88ba2dec24b..aa4f205ec5bd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -161,7 +161,7 @@ struct FunctionOutliningInfo {
// The dominating block of the region to be outlined.
BasicBlock *NonReturnBlock = nullptr;
- // The set of blocks in Entries that that are predecessors to ReturnBlock
+ // The set of blocks in Entries that are predecessors to ReturnBlock
SmallVector<BasicBlock *, 4> ReturnBlockPreds;
};
@@ -767,7 +767,7 @@ bool PartialInlinerImpl::shouldPartialInline(
const DataLayout &DL = Caller->getParent()->getDataLayout();
// The savings of eliminating the call:
- int NonWeightedSavings = getCallsiteCost(CB, DL);
+ int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
BlockFrequency NormWeightedSavings(NonWeightedSavings);
// Weighted saving is smaller than weighted cost, return false
@@ -842,12 +842,12 @@ PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
}
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- InlineCost += getCallsiteCost(*CI, DL);
+ InlineCost += getCallsiteCost(*TTI, *CI, DL);
continue;
}
if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
- InlineCost += getCallsiteCost(*II, DL);
+ InlineCost += getCallsiteCost(*TTI, *II, DL);
continue;
}
@@ -1042,7 +1042,7 @@ void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
BasicBlock::iterator I = PreReturn->begin();
- Instruction *Ins = &ClonedOI->ReturnBlock->front();
+ BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin();
SmallVector<Instruction *, 4> DeadPhis;
while (I != PreReturn->end()) {
PHINode *OldPhi = dyn_cast<PHINode>(I);
@@ -1050,9 +1050,10 @@ void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
break;
PHINode *RetPhi =
- PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
+ PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "");
+ RetPhi->insertBefore(Ins);
OldPhi->replaceAllUsesWith(RetPhi);
- Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
+ Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
RetPhi->addIncoming(&*I, PreReturn);
for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp
index e2e6364df906..b1f9b827dcba 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ModRef.h"
@@ -43,7 +44,7 @@ STATISTIC(NumInstReplaced,
"Number of instructions replaced with (simpler) instruction");
static cl::opt<unsigned> FuncSpecMaxIters(
- "funcspec-max-iters", cl::init(1), cl::Hidden, cl::desc(
+ "funcspec-max-iters", cl::init(10), cl::Hidden, cl::desc(
"The maximum number of iterations function specialization is run"));
static void findReturnsToZap(Function &F,
@@ -235,11 +236,11 @@ static bool runIPSCCP(
// nodes in executable blocks we found values for. The function's entry
// block is not part of BlocksToErase, so we have to handle it separately.
for (BasicBlock *BB : BlocksToErase) {
- NumInstRemoved += changeToUnreachable(BB->getFirstNonPHI(),
+ NumInstRemoved += changeToUnreachable(BB->getFirstNonPHIOrDbg(),
/*PreserveLCSSA=*/false, &DTU);
}
if (!Solver.isBlockExecutable(&F.front()))
- NumInstRemoved += changeToUnreachable(F.front().getFirstNonPHI(),
+ NumInstRemoved += changeToUnreachable(F.front().getFirstNonPHIOrDbg(),
/*PreserveLCSSA=*/false, &DTU);
BasicBlock *NewUnreachableBB = nullptr;
@@ -371,6 +372,18 @@ static bool runIPSCCP(
StoreInst *SI = cast<StoreInst>(GV->user_back());
SI->eraseFromParent();
}
+
+ // Try to create a debug constant expression for the global variable
+ // initializer value.
+ SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+ GV->getDebugInfo(GVEs);
+ if (GVEs.size() == 1) {
+ DIBuilder DIB(M);
+ if (DIExpression *InitExpr = getExpressionForConstant(
+ DIB, *GV->getInitializer(), *GV->getValueType()))
+ GVEs[0]->replaceOperandWith(1, InitExpr);
+ }
+
MadeChanges = true;
M.eraseGlobalVariable(GV);
++NumGlobalConst;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 3ddf5fe20edb..f7a54d428f20 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/SampleContextTracker.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/InstrTypes.h"
@@ -29,7 +28,7 @@ using namespace sampleprof;
namespace llvm {
ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
- StringRef CalleeName) {
+ FunctionId CalleeName) {
if (CalleeName.empty())
return getHottestChildContext(CallSite);
@@ -104,7 +103,7 @@ SampleContextTracker::moveContextSamples(ContextTrieNode &ToNodeParent,
}
void ContextTrieNode::removeChildContext(const LineLocation &CallSite,
- StringRef CalleeName) {
+ FunctionId CalleeName) {
uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
// Note this essentially calls dtor and destroys that child context
AllChildContext.erase(Hash);
@@ -114,7 +113,7 @@ std::map<uint64_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
return AllChildContext;
}
-StringRef ContextTrieNode::getFuncName() const { return FuncName; }
+FunctionId ContextTrieNode::getFuncName() const { return FuncName; }
FunctionSamples *ContextTrieNode::getFunctionSamples() const {
return FuncSamples;
@@ -178,7 +177,7 @@ void ContextTrieNode::dumpTree() {
}
ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
- const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) {
+ const LineLocation &CallSite, FunctionId CalleeName, bool AllowCreate) {
uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
if (It != AllChildContext.end()) {
@@ -201,7 +200,7 @@ SampleContextTracker::SampleContextTracker(
: GUIDToFuncNameMap(GUIDToFuncNameMap) {
for (auto &FuncSample : Profiles) {
FunctionSamples *FSamples = &FuncSample.second;
- SampleContext Context = FuncSample.first;
+ SampleContext Context = FuncSample.second.getContext();
LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
<< "\n");
ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
@@ -232,14 +231,12 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
return nullptr;
CalleeName = FunctionSamples::getCanonicalFnName(CalleeName);
- // Convert real function names to MD5 names, if the input profile is
- // MD5-based.
- std::string FGUID;
- CalleeName = getRepInFormat(CalleeName, FunctionSamples::UseMD5, FGUID);
+
+ FunctionId FName = getRepInFormat(CalleeName);
// For indirect call, CalleeName will be empty, in which case the context
// profile for callee with largest total samples will be returned.
- ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName);
+ ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, FName);
if (CalleeContext) {
FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
LLVM_DEBUG(if (FSamples) {
@@ -305,27 +302,23 @@ SampleContextTracker::getContextSamplesFor(const SampleContext &Context) {
SampleContextTracker::ContextSamplesTy &
SampleContextTracker::getAllContextSamplesFor(const Function &Func) {
StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
- return FuncToCtxtProfiles[CanonName];
+ return FuncToCtxtProfiles[getRepInFormat(CanonName)];
}
SampleContextTracker::ContextSamplesTy &
SampleContextTracker::getAllContextSamplesFor(StringRef Name) {
- return FuncToCtxtProfiles[Name];
+ return FuncToCtxtProfiles[getRepInFormat(Name)];
}
FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
bool MergeContext) {
StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
- return getBaseSamplesFor(CanonName, MergeContext);
+ return getBaseSamplesFor(getRepInFormat(CanonName), MergeContext);
}
-FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
+FunctionSamples *SampleContextTracker::getBaseSamplesFor(FunctionId Name,
bool MergeContext) {
LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n");
- // Convert real function names to MD5 names, if the input profile is
- // MD5-based.
- std::string FGUID;
- Name = getRepInFormat(Name, FunctionSamples::UseMD5, FGUID);
// Base profile is top-level node (child of root node), so try to retrieve
// existing top-level node for given function first. If it exists, it could be
@@ -373,7 +366,7 @@ void SampleContextTracker::markContextSamplesInlined(
ContextTrieNode &SampleContextTracker::getRootContext() { return RootContext; }
void SampleContextTracker::promoteMergeContextSamplesTree(
- const Instruction &Inst, StringRef CalleeName) {
+ const Instruction &Inst, FunctionId CalleeName) {
LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n"
<< Inst << "\n");
// Get the caller context for the call instruction, we don't use callee
@@ -458,9 +451,9 @@ void SampleContextTracker::dump() { RootContext.dumpTree(); }
StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const {
if (!FunctionSamples::UseMD5)
- return Node->getFuncName();
+ return Node->getFuncName().stringRef();
assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first");
- return GUIDToFuncNameMap->lookup(std::stoull(Node->getFuncName().data()));
+ return GUIDToFuncNameMap->lookup(Node->getFuncName().getHashCode());
}
ContextTrieNode *
@@ -470,7 +463,7 @@ SampleContextTracker::getContextFor(const SampleContext &Context) {
ContextTrieNode *
SampleContextTracker::getCalleeContextFor(const DILocation *DIL,
- StringRef CalleeName) {
+ FunctionId CalleeName) {
assert(DIL && "Expect non-null location");
ContextTrieNode *CallContext = getContextFor(DIL);
@@ -485,7 +478,7 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL,
ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
assert(DIL && "Expect non-null location");
- SmallVector<std::pair<LineLocation, StringRef>, 10> S;
+ SmallVector<std::pair<LineLocation, FunctionId>, 10> S;
// Use C++ linkage name if possible.
const DILocation *PrevDIL = DIL;
@@ -494,7 +487,8 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
if (Name.empty())
Name = PrevDIL->getScope()->getSubprogram()->getName();
S.push_back(
- std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL), Name));
+ std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL),
+ getRepInFormat(Name)));
PrevDIL = DIL;
}
@@ -503,24 +497,14 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
StringRef RootName = PrevDIL->getScope()->getSubprogram()->getLinkageName();
if (RootName.empty())
RootName = PrevDIL->getScope()->getSubprogram()->getName();
- S.push_back(std::make_pair(LineLocation(0, 0), RootName));
-
- // Convert real function names to MD5 names, if the input profile is
- // MD5-based.
- std::list<std::string> MD5Names;
- if (FunctionSamples::UseMD5) {
- for (auto &Location : S) {
- MD5Names.emplace_back();
- getRepInFormat(Location.second, FunctionSamples::UseMD5, MD5Names.back());
- Location.second = MD5Names.back();
- }
- }
+ S.push_back(std::make_pair(LineLocation(0, 0),
+ getRepInFormat(RootName)));
ContextTrieNode *ContextNode = &RootContext;
int I = S.size();
while (--I >= 0 && ContextNode) {
LineLocation &CallSite = S[I].first;
- StringRef CalleeName = S[I].second;
+ FunctionId CalleeName = S[I].second;
ContextNode = ContextNode->getChildContext(CallSite, CalleeName);
}
@@ -540,10 +524,10 @@ SampleContextTracker::getOrCreateContextPath(const SampleContext &Context,
// Create child node at parent line/disc location
if (AllowCreate) {
ContextNode =
- ContextNode->getOrCreateChildContext(CallSiteLoc, Callsite.FuncName);
+ ContextNode->getOrCreateChildContext(CallSiteLoc, Callsite.Func);
} else {
ContextNode =
- ContextNode->getChildContext(CallSiteLoc, Callsite.FuncName);
+ ContextNode->getChildContext(CallSiteLoc, Callsite.Func);
}
CallSiteLoc = Callsite.Location;
}
@@ -553,12 +537,14 @@ SampleContextTracker::getOrCreateContextPath(const SampleContext &Context,
return ContextNode;
}
-ContextTrieNode *SampleContextTracker::getTopLevelContextNode(StringRef FName) {
+ContextTrieNode *
+SampleContextTracker::getTopLevelContextNode(FunctionId FName) {
assert(!FName.empty() && "Top level node query must provide valid name");
return RootContext.getChildContext(LineLocation(0, 0), FName);
}
-ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) {
+ContextTrieNode &
+SampleContextTracker::addTopLevelContextNode(FunctionId FName) {
assert(!getTopLevelContextNode(FName) && "Node to add must not exist");
return *RootContext.getOrCreateChildContext(LineLocation(0, 0), FName);
}
@@ -638,7 +624,7 @@ void SampleContextTracker::createContextLessProfileMap(
FunctionSamples *FProfile = Node->getFunctionSamples();
// Profile's context can be empty, use ContextNode's func name.
if (FProfile)
- ContextLessProfiles[Node->getFuncName()].merge(*FProfile);
+ ContextLessProfiles.Create(Node->getFuncName()).merge(*FProfile);
}
}
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
index a53baecd4776..6c6f0a0eca72 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -56,6 +56,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/ProfileData/InstrProf.h"
@@ -142,11 +143,6 @@ static cl::opt<bool> PersistProfileStaleness(
cl::desc("Compute stale profile statistical metrics and write it into the "
"native object file(.llvm_stats section)."));
-static cl::opt<bool> FlattenProfileForMatching(
- "flatten-profile-for-matching", cl::Hidden, cl::init(true),
- cl::desc(
- "Use flattened profile for stale profile detection and matching."));
-
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
@@ -429,7 +425,7 @@ struct CandidateComparer {
return LCS->getBodySamples().size() > RCS->getBodySamples().size();
// Tie breaker using GUID so we have stable/deterministic inlining order
- return LCS->getGUID(LCS->getName()) < RCS->getGUID(RCS->getName());
+ return LCS->getGUID() < RCS->getGUID();
}
};
@@ -458,32 +454,44 @@ class SampleProfileMatcher {
uint64_t MismatchedFuncHashSamples = 0;
uint64_t TotalFuncHashSamples = 0;
+ // A dummy name for unknown indirect callee, used to differentiate from a
+ // non-call instruction that also has an empty callee name.
+ static constexpr const char *UnknownIndirectCallee =
+ "unknown.indirect.callee";
+
public:
SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
const PseudoProbeManager *ProbeManager)
- : M(M), Reader(Reader), ProbeManager(ProbeManager) {
- if (FlattenProfileForMatching) {
- ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
- FunctionSamples::ProfileIsCS);
- }
- }
+ : M(M), Reader(Reader), ProbeManager(ProbeManager){};
void runOnModule();
private:
FunctionSamples *getFlattenedSamplesFor(const Function &F) {
StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
- auto It = FlattenedProfiles.find(CanonFName);
+ auto It = FlattenedProfiles.find(FunctionId(CanonFName));
if (It != FlattenedProfiles.end())
return &It->second;
return nullptr;
}
- void runOnFunction(const Function &F, const FunctionSamples &FS);
+ void runOnFunction(const Function &F);
+ void findIRAnchors(const Function &F,
+ std::map<LineLocation, StringRef> &IRAnchors);
+ void findProfileAnchors(
+ const FunctionSamples &FS,
+ std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors);
+ void countMismatchedSamples(const FunctionSamples &FS);
void countProfileMismatches(
+ const Function &F, const FunctionSamples &FS,
+ const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors);
+ void countProfileCallsiteMismatches(
const FunctionSamples &FS,
- const std::unordered_set<LineLocation, LineLocationHash>
- &MatchedCallsiteLocs,
+ const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites);
-
LocToLocMap &getIRToProfileLocationMap(const Function &F) {
auto Ret = FuncMappings.try_emplace(
FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
@@ -491,12 +499,10 @@ private:
}
void distributeIRToProfileLocationMap();
void distributeIRToProfileLocationMap(FunctionSamples &FS);
- void populateProfileCallsites(
- const FunctionSamples &FS,
- StringMap<std::set<LineLocation>> &CalleeToCallsitesMap);
void runStaleProfileMatching(
- const std::map<LineLocation, StringRef> &IRLocations,
- StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
+ const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
LocToLocMap &IRToProfileLocationMap);
};
@@ -538,7 +544,6 @@ protected:
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
DenseSet<GlobalValue::GUID> &InlinedGUIDs,
- const StringMap<Function *> &SymbolMap,
uint64_t Threshold);
// Attempt to promote indirect call and also inline the promoted call
bool tryPromoteAndInlineCandidate(
@@ -573,7 +578,7 @@ protected:
/// the function name. If the function name contains suffix, additional
/// entry is added to map from the stripped name to the function if there
/// is one-to-one mapping.
- StringMap<Function *> SymbolMap;
+ HashKeyMap<std::unordered_map, FunctionId, Function *> SymbolMap;
std::function<AssumptionCache &(Function &)> GetAC;
std::function<TargetTransformInfo &(Function &)> GetTTI;
@@ -615,6 +620,11 @@ protected:
// All the Names used in FunctionSamples including outline function
// names, inline instance names and call target names.
StringSet<> NamesInProfile;
+ // MD5 version of NamesInProfile. Either NamesInProfile or GUIDsInProfile is
+ // populated, depends on whether the profile uses MD5. Because the name table
+ // generally contains several magnitude more entries than the number of
+ // functions, we do not want to convert all names from one form to another.
+ llvm::DenseSet<uint64_t> GUIDsInProfile;
// For symbol in profile symbol list, whether to regard their profiles
// to be accurate. It is mainly decided by existance of profile symbol
@@ -759,8 +769,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
assert(L && R && "Expect non-null FunctionSamples");
if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
- return FunctionSamples::getGUID(L->getName()) <
- FunctionSamples::getGUID(R->getName());
+ return L->getGUID() < R->getGUID();
};
if (FunctionSamples::ProfileIsCS) {
@@ -970,13 +979,13 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
// This prevents allocating an array of zero length in callees below.
if (MaxNumPromotions == 0)
return false;
- auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
+ auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
auto R = SymbolMap.find(CalleeFunctionName);
- if (R == SymbolMap.end() || !R->getValue())
+ if (R == SymbolMap.end() || !R->second)
return false;
auto &CI = *Candidate.CallInstr;
- if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
+ if (!doesHistoryAllowICP(CI, R->second->getName()))
return false;
const char *Reason = "Callee function not available";
@@ -986,17 +995,17 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
// clone the caller first, and inline the cloned caller if it is
// recursive. As llvm does not inline recursive calls, we will
// simply ignore it instead of handling it explicitly.
- if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
- R->getValue()->hasFnAttribute("use-sample-profile") &&
- R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
+ if (!R->second->isDeclaration() && R->second->getSubprogram() &&
+ R->second->hasFnAttribute("use-sample-profile") &&
+ R->second != &F && isLegalToPromote(CI, R->second, &Reason)) {
// For promoted target, set its value with NOMORE_ICP_MAGICNUM count
// in the value profile metadata so the target won't be promoted again.
SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
- Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
+ Function::getGUID(R->second->getName()), NOMORE_ICP_MAGICNUM}};
updateIDTMetaData(CI, SortedCallTargets, 0);
auto *DI = &pgo::promoteIndirectCall(
- CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
+ CI, R->second, Candidate.CallsiteCount, Sum, false, ORE);
if (DI) {
Sum -= Candidate.CallsiteCount;
// Do not prorate the indirect callsite distribution since the original
@@ -1025,7 +1034,8 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
}
} else {
LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
- << Candidate.CalleeSamples->getFuncName() << " because "
+ << FunctionSamples::getCanonicalFnName(
+ Candidate.CallInstr->getName())<< " because "
<< Reason << "\n");
}
return false;
@@ -1070,8 +1080,7 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
void SampleProfileLoader::findExternalInlineCandidate(
CallBase *CB, const FunctionSamples *Samples,
- DenseSet<GlobalValue::GUID> &InlinedGUIDs,
- const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) {
// If ExternalInlineAdvisor(ReplayInlineAdvisor) wants to inline an external
// function make sure it's imported
@@ -1080,7 +1089,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
// just add the direct GUID and move on
if (!Samples) {
InlinedGUIDs.insert(
- FunctionSamples::getGUID(CB->getCalledFunction()->getName()));
+ Function::getGUID(CB->getCalledFunction()->getName()));
return;
}
// Otherwise, drop the threshold to import everything that we can
@@ -1121,22 +1130,20 @@ void SampleProfileLoader::findExternalInlineCandidate(
CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
continue;
-
- StringRef Name = CalleeSample->getFuncName();
- Function *Func = SymbolMap.lookup(Name);
+
+ Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
+ InlinedGUIDs.insert(CalleeSample->getGUID());
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
for (const auto &BS : CalleeSample->getBodySamples())
for (const auto &TS : BS.second.getCallTargets())
- if (TS.getValue() > Threshold) {
- StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
- const Function *Callee = SymbolMap.lookup(CalleeName);
+ if (TS.second > Threshold) {
+ const Function *Callee = SymbolMap.lookup(TS.first);
if (!Callee || Callee->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
+ InlinedGUIDs.insert(TS.first.getHashCode());
}
// Import hot child context profile associted with callees. Note that this
@@ -1234,7 +1241,7 @@ bool SampleProfileLoader::inlineHotFunctions(
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
uint64_t SumOrigin = Sum;
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
+ findExternalInlineCandidate(I, FS, InlinedGUIDs,
PSI->getOrCompHotCountThreshold());
continue;
}
@@ -1255,7 +1262,7 @@ bool SampleProfileLoader::inlineHotFunctions(
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
- InlinedGUIDs, SymbolMap,
+ InlinedGUIDs,
PSI->getOrCompHotCountThreshold());
}
}
@@ -1504,7 +1511,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
for (const auto *FS : CalleeSamples) {
// TODO: Consider disable pre-lTO ICP for MonoLTO as well
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
+ findExternalInlineCandidate(I, FS, InlinedGUIDs,
PSI->getOrCompHotCountThreshold());
continue;
}
@@ -1557,7 +1564,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
- InlinedGUIDs, SymbolMap,
+ InlinedGUIDs,
PSI->getOrCompHotCountThreshold());
}
}
@@ -1619,7 +1626,12 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
// Note that we have to do the merge right after processing function.
// This allows OutlineFS's profile to be used for annotation during
// top-down processing of functions' annotation.
- FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
+ FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);
+ // If outlined function does not exist in the profile, add it to a
+ // separate map so that it does not rehash the original profile.
+ if (!OutlineFS)
+ OutlineFS = &OutlineFunctionSamples[
+ FunctionId(FunctionSamples::getCanonicalFnName(Callee->getName()))];
OutlineFS->merge(*FS, 1);
// Set outlined profile to be synthetic to not bias the inliner.
OutlineFS->SetContextSynthetic();
@@ -1638,7 +1650,7 @@ GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) {
SmallVector<InstrProfValueData, 2> R;
for (const auto &I : SampleRecord::SortCallTargets(M)) {
R.emplace_back(
- InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
+ InstrProfValueData{I.first.getHashCode(), I.second});
}
return R;
}
@@ -1699,9 +1711,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
else if (OverwriteExistingWeights)
I.setMetadata(LLVMContext::MD_prof, nullptr);
} else if (!isa<IntrinsicInst>(&I)) {
- I.setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(
- {static_cast<uint32_t>(BlockWeights[BB])}));
+ setBranchWeights(I, {static_cast<uint32_t>(BlockWeights[BB])});
}
}
} else if (OverwriteExistingWeights || ProfileSampleBlockAccurate) {
@@ -1709,10 +1719,11 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
// clear it for cold code.
for (auto &I : *BB) {
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- if (cast<CallBase>(I).isIndirectCall())
+ if (cast<CallBase>(I).isIndirectCall()) {
I.setMetadata(LLVMContext::MD_prof, nullptr);
- else
- I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
+ } else {
+ setBranchWeights(I, {uint32_t(0)});
+ }
}
}
}
@@ -1792,7 +1803,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
if (MaxWeight > 0 &&
(!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
- TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ setBranchWeights(*TI, Weights);
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
<< "most popular destination for conditional branches at "
@@ -1865,7 +1876,8 @@ SampleProfileLoader::buildProfiledCallGraph(Module &M) {
for (Function &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(F));
+ ProfiledCG->addProfiledFunction(
+ getRepInFormat(FunctionSamples::getCanonicalFnName(F)));
}
return ProfiledCG;
@@ -1913,7 +1925,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
// on the profile to favor more inlining. This is only a problem with CS
// profile.
// 3. Transitive indirect call edges due to inlining. When a callee function
- // (say B) is inlined into into a caller function (say A) in LTO prelink,
+ // (say B) is inlined into a caller function (say A) in LTO prelink,
// every call edge originated from the callee B will be transferred to
// the caller A. If any transferred edge (say A->C) is indirect, the
// original profiled indirect edge B->C, even if considered, would not
@@ -2016,8 +2028,16 @@ bool SampleProfileLoader::doInitialization(Module &M,
ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
if (ProfAccForSymsInList) {
NamesInProfile.clear();
- if (auto NameTable = Reader->getNameTable())
- NamesInProfile.insert(NameTable->begin(), NameTable->end());
+ GUIDsInProfile.clear();
+ if (auto NameTable = Reader->getNameTable()) {
+ if (FunctionSamples::UseMD5) {
+ for (auto Name : *NameTable)
+ GUIDsInProfile.insert(Name.getHashCode());
+ } else {
+ for (auto Name : *NameTable)
+ NamesInProfile.insert(Name.stringRef());
+ }
+ }
CoverageTracker.setProfAccForSymsInList(true);
}
@@ -2103,77 +2123,200 @@ bool SampleProfileLoader::doInitialization(Module &M,
return true;
}
-void SampleProfileMatcher::countProfileMismatches(
- const FunctionSamples &FS,
- const std::unordered_set<LineLocation, LineLocationHash>
- &MatchedCallsiteLocs,
- uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) {
+void SampleProfileMatcher::findIRAnchors(
+ const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
+ // For inlined code, recover the original callsite and callee by finding the
+ // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
+ // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
+ auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
+ assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
+ const DILocation *PrevDIL = nullptr;
+ do {
+ PrevDIL = DIL;
+ DIL = DIL->getInlinedAt();
+ } while (DIL->getInlinedAt());
+
+ LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL);
+ StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
+ return std::make_pair(Callsite, CalleeName);
+ };
- auto isInvalidLineOffset = [](uint32_t LineOffset) {
- return LineOffset & 0x8000;
+ auto GetCanonicalCalleeName = [](const CallBase *CB) {
+ StringRef CalleeName = UnknownIndirectCallee;
+ if (Function *Callee = CB->getCalledFunction())
+ CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
+ return CalleeName;
};
- // Check if there are any callsites in the profile that does not match to any
- // IR callsites, those callsite samples will be discarded.
- for (auto &I : FS.getBodySamples()) {
- const LineLocation &Loc = I.first;
- if (isInvalidLineOffset(Loc.LineOffset))
- continue;
+ // Extract profile matching anchors in the IR.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ DILocation *DIL = I.getDebugLoc();
+ if (!DIL)
+ continue;
+
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (auto Probe = extractProbe(I)) {
+ // Flatten inlined IR for the matching.
+ if (DIL->getInlinedAt()) {
+ IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
+ } else {
+ // Use empty StringRef for basic block probe.
+ StringRef CalleeName;
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ // Skip the probe inst whose callee name is "llvm.pseudoprobe".
+ if (!isa<IntrinsicInst>(&I))
+ CalleeName = GetCanonicalCalleeName(CB);
+ }
+ IRAnchors.emplace(LineLocation(Probe->Id, 0), CalleeName);
+ }
+ }
+ } else {
+ // TODO: For line-number based profile(AutoFDO), currently only support
+ // find callsite anchors. In future, we need to parse all the non-call
+ // instructions to extract the line locations for profile matching.
+ if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
+ continue;
- uint64_t Count = I.second.getSamples();
- if (!I.second.getCallTargets().empty()) {
- TotalCallsiteSamples += Count;
- FuncProfiledCallsites++;
- if (!MatchedCallsiteLocs.count(Loc)) {
- MismatchedCallsiteSamples += Count;
- FuncMismatchedCallsites++;
+ if (DIL->getInlinedAt()) {
+ IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
+ } else {
+ LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL);
+ StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
+ IRAnchors.emplace(Callsite, CalleeName);
+ }
}
}
}
+}
- for (auto &I : FS.getCallsiteSamples()) {
- const LineLocation &Loc = I.first;
- if (isInvalidLineOffset(Loc.LineOffset))
- continue;
+void SampleProfileMatcher::countMismatchedSamples(const FunctionSamples &FS) {
+ const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
+ // Skip the function that is external or renamed.
+ if (!FuncDesc)
+ return;
- uint64_t Count = 0;
- for (auto &FM : I.second) {
- Count += FM.second.getHeadSamplesEstimate();
+ if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+ MismatchedFuncHashSamples += FS.getTotalSamples();
+ return;
+ }
+ for (const auto &I : FS.getCallsiteSamples())
+ for (const auto &CS : I.second)
+ countMismatchedSamples(CS.second);
+}
+
+void SampleProfileMatcher::countProfileMismatches(
+ const Function &F, const FunctionSamples &FS,
+ const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors) {
+ [[maybe_unused]] bool IsFuncHashMismatch = false;
+ if (FunctionSamples::ProfileIsProbeBased) {
+ TotalFuncHashSamples += FS.getTotalSamples();
+ TotalProfiledFunc++;
+ const auto *FuncDesc = ProbeManager->getDesc(F);
+ if (FuncDesc) {
+ if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+ NumMismatchedFuncHash++;
+ IsFuncHashMismatch = true;
+ }
+ countMismatchedSamples(FS);
}
- TotalCallsiteSamples += Count;
+ }
+
+ uint64_t FuncMismatchedCallsites = 0;
+ uint64_t FuncProfiledCallsites = 0;
+ countProfileCallsiteMismatches(FS, IRAnchors, ProfileAnchors,
+ FuncMismatchedCallsites,
+ FuncProfiledCallsites);
+ TotalProfiledCallsites += FuncProfiledCallsites;
+ NumMismatchedCallsites += FuncMismatchedCallsites;
+ LLVM_DEBUG({
+ if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch &&
+ FuncMismatchedCallsites)
+ dbgs() << "Function checksum is matched but there are "
+ << FuncMismatchedCallsites << "/" << FuncProfiledCallsites
+ << " mismatched callsites.\n";
+ });
+}
+
+void SampleProfileMatcher::countProfileCallsiteMismatches(
+ const FunctionSamples &FS,
+ const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
+ uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) {
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (const auto &I : ProfileAnchors) {
+ const auto &Loc = I.first;
+ const auto &Callees = I.second;
+ assert(!Callees.empty() && "Callees should not be empty");
+
+ StringRef IRCalleeName;
+ const auto &IR = IRAnchors.find(Loc);
+ if (IR != IRAnchors.end())
+ IRCalleeName = IR->second;
+
+ // Compute number of samples in the original profile.
+ uint64_t CallsiteSamples = 0;
+ auto CTM = FS.findCallTargetMapAt(Loc);
+ if (CTM) {
+ for (const auto &I : CTM.get())
+ CallsiteSamples += I.second;
+ }
+ const auto *FSMap = FS.findFunctionSamplesMapAt(Loc);
+ if (FSMap) {
+ for (const auto &I : *FSMap)
+ CallsiteSamples += I.second.getTotalSamples();
+ }
+
+ bool CallsiteIsMatched = false;
+ // Since indirect call does not have CalleeName, check conservatively if
+ // callsite in the profile is a callsite location. This is to reduce num of
+ // false positive since otherwise all the indirect call samples will be
+ // reported as mismatching.
+ if (IRCalleeName == UnknownIndirectCallee)
+ CallsiteIsMatched = true;
+ else if (Callees.size() == 1 && Callees.count(getRepInFormat(IRCalleeName)))
+ CallsiteIsMatched = true;
+
FuncProfiledCallsites++;
- if (!MatchedCallsiteLocs.count(Loc)) {
- MismatchedCallsiteSamples += Count;
+ TotalCallsiteSamples += CallsiteSamples;
+ if (!CallsiteIsMatched) {
FuncMismatchedCallsites++;
+ MismatchedCallsiteSamples += CallsiteSamples;
}
}
}
-// Populate the anchors(direct callee name) from profile.
-void SampleProfileMatcher::populateProfileCallsites(
- const FunctionSamples &FS,
- StringMap<std::set<LineLocation>> &CalleeToCallsitesMap) {
+void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
+ std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
+ auto isInvalidLineOffset = [](uint32_t LineOffset) {
+ return LineOffset & 0x8000;
+ };
+
for (const auto &I : FS.getBodySamples()) {
- const auto &Loc = I.first;
- const auto &CTM = I.second.getCallTargets();
- // Filter out possible indirect calls, use direct callee name as anchor.
- if (CTM.size() == 1) {
- StringRef CalleeName = CTM.begin()->first();
- const auto &Candidates = CalleeToCallsitesMap.try_emplace(
- CalleeName, std::set<LineLocation>());
- Candidates.first->second.insert(Loc);
+ const LineLocation &Loc = I.first;
+ if (isInvalidLineOffset(Loc.LineOffset))
+ continue;
+ for (const auto &I : I.second.getCallTargets()) {
+ auto Ret = ProfileAnchors.try_emplace(Loc,
+ std::unordered_set<FunctionId>());
+ Ret.first->second.insert(I.first);
}
}
for (const auto &I : FS.getCallsiteSamples()) {
const LineLocation &Loc = I.first;
+ if (isInvalidLineOffset(Loc.LineOffset))
+ continue;
const auto &CalleeMap = I.second;
- // Filter out possible indirect calls, use direct callee name as anchor.
- if (CalleeMap.size() == 1) {
- StringRef CalleeName = CalleeMap.begin()->first;
- const auto &Candidates = CalleeToCallsitesMap.try_emplace(
- CalleeName, std::set<LineLocation>());
- Candidates.first->second.insert(Loc);
+ for (const auto &I : CalleeMap) {
+ auto Ret = ProfileAnchors.try_emplace(Loc,
+ std::unordered_set<FunctionId>());
+ Ret.first->second.insert(I.first);
}
}
}
@@ -2196,12 +2339,30 @@ void SampleProfileMatcher::populateProfileCallsites(
// [1, 2, 3(foo), 4, 7, 8(bar), 9]
// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
void SampleProfileMatcher::runStaleProfileMatching(
- const std::map<LineLocation, StringRef> &IRLocations,
- StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
+ const Function &F,
+ const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
LocToLocMap &IRToProfileLocationMap) {
+ LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
+ << "\n");
assert(IRToProfileLocationMap.empty() &&
"Run stale profile matching only once per function");
+ std::unordered_map<FunctionId, std::set<LineLocation>>
+ CalleeToCallsitesMap;
+ for (const auto &I : ProfileAnchors) {
+ const auto &Loc = I.first;
+ const auto &Callees = I.second;
+ // Filter out possible indirect calls, use direct callee name as anchor.
+ if (Callees.size() == 1) {
+ FunctionId CalleeName = *Callees.begin();
+ const auto &Candidates = CalleeToCallsitesMap.try_emplace(
+ CalleeName, std::set<LineLocation>());
+ Candidates.first->second.insert(Loc);
+ }
+ }
+
auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
// Skip the unchanged location mapping to save memory.
if (From != To)
@@ -2212,18 +2373,19 @@ void SampleProfileMatcher::runStaleProfileMatching(
int32_t LocationDelta = 0;
SmallVector<LineLocation> LastMatchedNonAnchors;
- for (const auto &IR : IRLocations) {
+ for (const auto &IR : IRAnchors) {
const auto &Loc = IR.first;
- StringRef CalleeName = IR.second;
+ auto CalleeName = IR.second;
bool IsMatchedAnchor = false;
// Match the anchor location in lexical order.
if (!CalleeName.empty()) {
- auto ProfileAnchors = CalleeToCallsitesMap.find(CalleeName);
- if (ProfileAnchors != CalleeToCallsitesMap.end() &&
- !ProfileAnchors->second.empty()) {
- auto CI = ProfileAnchors->second.begin();
+ auto CandidateAnchors = CalleeToCallsitesMap.find(
+ getRepInFormat(CalleeName));
+ if (CandidateAnchors != CalleeToCallsitesMap.end() &&
+ !CandidateAnchors->second.empty()) {
+ auto CI = CandidateAnchors->second.begin();
const auto Candidate = *CI;
- ProfileAnchors->second.erase(CI);
+ CandidateAnchors->second.erase(CI);
InsertMatching(Loc, Candidate);
LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
<< " is matched from " << Loc << " to " << Candidate
@@ -2261,122 +2423,56 @@ void SampleProfileMatcher::runStaleProfileMatching(
}
}
-void SampleProfileMatcher::runOnFunction(const Function &F,
- const FunctionSamples &FS) {
- bool IsFuncHashMismatch = false;
- if (FunctionSamples::ProfileIsProbeBased) {
- uint64_t Count = FS.getTotalSamples();
- TotalFuncHashSamples += Count;
- TotalProfiledFunc++;
- if (!ProbeManager->profileIsValid(F, FS)) {
- MismatchedFuncHashSamples += Count;
- NumMismatchedFuncHash++;
- IsFuncHashMismatch = true;
- }
- }
-
- std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs;
- // The value of the map is the name of direct callsite and use empty StringRef
- // for non-direct-call site.
- std::map<LineLocation, StringRef> IRLocations;
-
- // Extract profile matching anchors and profile mismatch metrics in the IR.
- for (auto &BB : F) {
- for (auto &I : BB) {
- // TODO: Support line-number based location(AutoFDO).
- if (FunctionSamples::ProfileIsProbeBased && isa<PseudoProbeInst>(&I)) {
- if (std::optional<PseudoProbe> Probe = extractProbe(I))
- IRLocations.emplace(LineLocation(Probe->Id, 0), StringRef());
- }
+void SampleProfileMatcher::runOnFunction(const Function &F) {
+ // We need to use flattened function samples for matching.
+ // Unlike IR, which includes all callsites from the source code, the callsites
+ // in profile only show up when they are hit by samples, i,e. the profile
+ // callsites in one context may differ from those in another context. To get
+ // the maximum number of callsites, we merge the function profiles from all
+ // contexts, aka, the flattened profile to find profile anchors.
+ const auto *FSFlattened = getFlattenedSamplesFor(F);
+ if (!FSFlattened)
+ return;
- if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
- continue;
-
- const auto *CB = dyn_cast<CallBase>(&I);
- if (auto &DLoc = I.getDebugLoc()) {
- LineLocation IRCallsite = FunctionSamples::getCallSiteIdentifier(DLoc);
-
- StringRef CalleeName;
- if (Function *Callee = CB->getCalledFunction())
- CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
-
- // Force to overwrite the callee name in case any non-call location was
- // written before.
- auto R = IRLocations.emplace(IRCallsite, CalleeName);
- R.first->second = CalleeName;
- assert((!FunctionSamples::ProfileIsProbeBased || R.second ||
- R.first->second == CalleeName) &&
- "Overwrite non-call or different callee name location for "
- "pseudo probe callsite");
-
- // Go through all the callsites on the IR and flag the callsite if the
- // target name is the same as the one in the profile.
- const auto CTM = FS.findCallTargetMapAt(IRCallsite);
- const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite);
-
- // Indirect call case.
- if (CalleeName.empty()) {
- // Since indirect call does not have the CalleeName, check
- // conservatively if callsite in the profile is a callsite location.
- // This is to avoid nums of false positive since otherwise all the
- // indirect call samples will be reported as mismatching.
- if ((CTM && !CTM->empty()) || (CallsiteFS && !CallsiteFS->empty()))
- MatchedCallsiteLocs.insert(IRCallsite);
- } else {
- // Check if the call target name is matched for direct call case.
- if ((CTM && CTM->count(CalleeName)) ||
- (CallsiteFS && CallsiteFS->count(CalleeName)))
- MatchedCallsiteLocs.insert(IRCallsite);
- }
- }
- }
- }
+ // Anchors for IR. It's a map from IR location to callee name, callee name is
+ // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
+ // for unknown indrect callee name.
+ std::map<LineLocation, StringRef> IRAnchors;
+ findIRAnchors(F, IRAnchors);
+ // Anchors for profile. It's a map from callsite location to a set of callee
+ // name.
+ std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
+ findProfileAnchors(*FSFlattened, ProfileAnchors);
// Detect profile mismatch for profile staleness metrics report.
- if (ReportProfileStaleness || PersistProfileStaleness) {
- uint64_t FuncMismatchedCallsites = 0;
- uint64_t FuncProfiledCallsites = 0;
- countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites,
- FuncProfiledCallsites);
- TotalProfiledCallsites += FuncProfiledCallsites;
- NumMismatchedCallsites += FuncMismatchedCallsites;
- LLVM_DEBUG({
- if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch &&
- FuncMismatchedCallsites)
- dbgs() << "Function checksum is matched but there are "
- << FuncMismatchedCallsites << "/" << FuncProfiledCallsites
- << " mismatched callsites.\n";
- });
- }
-
- if (IsFuncHashMismatch && SalvageStaleProfile) {
- LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
- << "\n");
-
- StringMap<std::set<LineLocation>> CalleeToCallsitesMap;
- populateProfileCallsites(FS, CalleeToCallsitesMap);
-
+ // Skip reporting the metrics for imported functions.
+ if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
+ (ReportProfileStaleness || PersistProfileStaleness)) {
+ // Use top-level nested FS for counting profile mismatch metrics since
+ // currently once a callsite is mismatched, all its children profiles are
+ // dropped.
+ if (const auto *FS = Reader.getSamplesFor(F))
+ countProfileMismatches(F, *FS, IRAnchors, ProfileAnchors);
+ }
+
+ // Run profile matching for checksum mismatched profile, currently only
+ // support for pseudo-probe.
+ if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
+ !ProbeManager->profileIsValid(F, *FSFlattened)) {
// The matching result will be saved to IRToProfileLocationMap, create a new
// map for each function.
- auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
-
- runStaleProfileMatching(IRLocations, CalleeToCallsitesMap,
- IRToProfileLocationMap);
+ runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
+ getIRToProfileLocationMap(F));
}
}
void SampleProfileMatcher::runOnModule() {
+ ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
+ FunctionSamples::ProfileIsCS);
for (auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- FunctionSamples *FS = nullptr;
- if (FlattenProfileForMatching)
- FS = getFlattenedSamplesFor(F);
- else
- FS = Reader.getSamplesFor(F);
- if (!FS)
- continue;
- runOnFunction(F, *FS);
+ runOnFunction(F);
}
if (SalvageStaleProfile)
distributeIRToProfileLocationMap();
@@ -2424,7 +2520,7 @@ void SampleProfileMatcher::runOnModule() {
void SampleProfileMatcher::distributeIRToProfileLocationMap(
FunctionSamples &FS) {
- const auto ProfileMappings = FuncMappings.find(FS.getName());
+ const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
if (ProfileMappings != FuncMappings.end()) {
FS.setIRToProfileLocationMap(&(ProfileMappings->second));
}
@@ -2466,10 +2562,10 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
Function *F = dyn_cast<Function>(N_F.getValue());
if (F == nullptr || OrigName.empty())
continue;
- SymbolMap[OrigName] = F;
+ SymbolMap[FunctionId(OrigName)] = F;
StringRef NewName = FunctionSamples::getCanonicalFnName(*F);
if (OrigName != NewName && !NewName.empty()) {
- auto r = SymbolMap.insert(std::make_pair(NewName, F));
+ auto r = SymbolMap.emplace(FunctionId(NewName), F);
// Failiing to insert means there is already an entry in SymbolMap,
// thus there are multiple functions that are mapped to the same
// stripped name. In this case of name conflicting, set the value
@@ -2482,11 +2578,11 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
if (Remapper) {
if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
if (*MapName != OrigName && !MapName->empty())
- SymbolMap.insert(std::make_pair(*MapName, F));
+ SymbolMap.emplace(FunctionId(*MapName), F);
}
}
}
- assert(SymbolMap.count(StringRef()) == 0 &&
+ assert(SymbolMap.count(FunctionId()) == 0 &&
"No empty StringRef should be added in SymbolMap");
if (ReportProfileStaleness || PersistProfileStaleness ||
@@ -2550,7 +2646,9 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
// but not cold accumulatively...), so the outline function showing up as
// cold in sampled binary will actually not be cold after current build.
StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
- if (NamesInProfile.count(CanonName))
+ if ((FunctionSamples::UseMD5 &&
+ GUIDsInProfile.count(Function::getGUID(CanonName))) ||
+ (!FunctionSamples::UseMD5 && NamesInProfile.count(CanonName)))
initialEntryCount = -1;
}
@@ -2571,8 +2669,24 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
if (FunctionSamples::ProfileIsCS)
Samples = ContextTracker->getBaseSamplesFor(F);
- else
+ else {
Samples = Reader->getSamplesFor(F);
+ // Try search in previously inlined functions that were split or duplicated
+ // into base.
+ if (!Samples) {
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ auto It = OutlineFunctionSamples.find(FunctionId(CanonName));
+ if (It != OutlineFunctionSamples.end()) {
+ Samples = &It->second;
+ } else if (auto Remapper = Reader->getRemapper()) {
+ if (auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
+ It = OutlineFunctionSamples.find(FunctionId(*RemppedName));
+ if (It != OutlineFunctionSamples.end())
+ Samples = &It->second;
+ }
+ }
+ }
+ }
if (Samples && !Samples->empty())
return emitAnnotations(F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 0a42de7224b4..8f0b12d0cfed 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -95,13 +96,13 @@ void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
std::string Banner =
"\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
dbgs() << Banner;
- if (const auto **M = any_cast<const Module *>(&IR))
+ if (const auto **M = llvm::any_cast<const Module *>(&IR))
runAfterPass(*M);
- else if (const auto **F = any_cast<const Function *>(&IR))
+ else if (const auto **F = llvm::any_cast<const Function *>(&IR))
runAfterPass(*F);
- else if (const auto **C = any_cast<const LazyCallGraph::SCC *>(&IR))
+ else if (const auto **C = llvm::any_cast<const LazyCallGraph::SCC *>(&IR))
runAfterPass(*C);
- else if (const auto **L = any_cast<const Loop *>(&IR))
+ else if (const auto **L = llvm::any_cast<const Loop *>(&IR))
runAfterPass(*L);
else
llvm_unreachable("Unknown IR unit");
@@ -221,12 +222,26 @@ void SampleProfileProber::computeProbeIdForBlocks() {
}
void SampleProfileProber::computeProbeIdForCallsites() {
+ LLVMContext &Ctx = F->getContext();
+ Module *M = F->getParent();
+
for (auto &BB : *F) {
for (auto &I : BB) {
if (!isa<CallBase>(I))
continue;
if (isa<IntrinsicInst>(&I))
continue;
+
+ // The current implementation uses the lower 16 bits of the discriminator
+ // so anything larger than 0xFFFF will be ignored.
+ if (LastProbeId >= 0xFFFF) {
+ std::string Msg = "Pseudo instrumentation incomplete for " +
+ std::string(F->getName()) + " because it's too large";
+ Ctx.diagnose(
+ DiagnosticInfoSampleProfile(M->getName().data(), Msg, DS_Warning));
+ return;
+ }
+
CallProbeIds[&I] = ++LastProbeId;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 147513452789..28d7d4ba6b01 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -30,12 +30,18 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/StripSymbols.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+static cl::opt<bool>
+ StripGlobalConstants("strip-global-constants", cl::init(false), cl::Hidden,
+ cl::desc("Removes debug compile units which reference "
+ "to non-existing global constants"));
+
/// OnlyUsedBy - Return true if V is only used by Usr.
static bool OnlyUsedBy(Value *V, Value *Usr) {
for (User *U : V->users())
@@ -73,7 +79,7 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
Value *V = VI->getValue();
++VI;
if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
- if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
+ if (!PreserveDbgInfo || !V->getName().starts_with("llvm.dbg"))
// Set name to "", removing from symbol table!
V->setName("");
}
@@ -88,7 +94,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
for (StructType *STy : StructTypes) {
if (STy->isLiteral() || STy->getName().empty()) continue;
- if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
+ if (PreserveDbgInfo && STy->getName().starts_with("llvm.dbg"))
continue;
STy->setName("");
@@ -118,13 +124,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
for (GlobalVariable &GV : M.globals()) {
if (GV.hasLocalLinkage() && !llvmUsedValues.contains(&GV))
- if (!PreserveDbgInfo || !GV.getName().startswith("llvm.dbg"))
+ if (!PreserveDbgInfo || !GV.getName().starts_with("llvm.dbg"))
GV.setName(""); // Internal symbols can't participate in linkage
}
for (Function &I : M) {
if (I.hasLocalLinkage() && !llvmUsedValues.contains(&I))
- if (!PreserveDbgInfo || !I.getName().startswith("llvm.dbg"))
+ if (!PreserveDbgInfo || !I.getName().starts_with("llvm.dbg"))
I.setName(""); // Internal symbols can't participate in linkage
if (auto *Symtab = I.getValueSymbolTable())
StripSymtab(*Symtab, PreserveDbgInfo);
@@ -216,7 +222,8 @@ static bool stripDeadDebugInfoImpl(Module &M) {
// Create our live global variable list.
bool GlobalVariableChange = false;
for (auto *DIG : DIC->getGlobalVariables()) {
- if (DIG->getExpression() && DIG->getExpression()->isConstant())
+ if (DIG->getExpression() && DIG->getExpression()->isConstant() &&
+ !StripGlobalConstants)
LiveGVs.insert(DIG);
// Make sure we only visit each global variable only once.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index d46f9a6c6757..f6f895676084 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -111,7 +111,7 @@ PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
// Now compute the callsite count from relative frequency and
// entry count:
BasicBlock *CSBB = CB.getParent();
- Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
+ Scaled64 EntryFreq(BFI.getEntryFreq().getFrequency(), 0);
Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
BBCount /= EntryFreq;
BBCount *= Counts[Caller];
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index fc1e70b1b3d3..e5f9fa1dda88 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -186,7 +186,7 @@ void simplifyExternals(Module &M) {
if (!F.isDeclaration() || F.getFunctionType() == EmptyFT ||
// Changing the type of an intrinsic may invalidate the IR.
- F.getName().startswith("llvm."))
+ F.getName().starts_with("llvm."))
continue;
Function *NewF =
@@ -198,7 +198,7 @@ void simplifyExternals(Module &M) {
AttributeList::FunctionIndex,
F.getAttributes().getFnAttrs()));
NewF->takeName(&F);
- F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
+ F.replaceAllUsesWith(NewF);
F.eraseFromParent();
}
@@ -329,7 +329,7 @@ void splitAndWriteThinLTOBitcode(
// comdat in MergedM to keep the comdat together.
DenseSet<const Comdat *> MergedMComdats;
for (GlobalVariable &GV : M.globals())
- if (HasTypeMetadata(&GV)) {
+ if (!GV.isDeclaration() && HasTypeMetadata(&GV)) {
if (const auto *C = GV.getComdat())
MergedMComdats.insert(C);
forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index d33258642365..85afc020dbf8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -58,7 +58,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -369,8 +368,6 @@ template <> struct DenseMapInfo<VTableSlotSummary> {
} // end namespace llvm
-namespace {
-
// Returns true if the function must be unreachable based on ValueInfo.
//
// In particular, identifies a function as unreachable in the following
@@ -378,7 +375,7 @@ namespace {
// 1) All summaries are live.
// 2) All function summaries indicate it's unreachable
// 3) There is no non-function with the same GUID (which is rare)
-bool mustBeUnreachableFunction(ValueInfo TheFnVI) {
+static bool mustBeUnreachableFunction(ValueInfo TheFnVI) {
if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
// Returns false if ValueInfo is absent, or the summary list is empty
// (e.g., function declarations).
@@ -403,6 +400,7 @@ bool mustBeUnreachableFunction(ValueInfo TheFnVI) {
return true;
}
+namespace {
// A virtual call site. VTable is the loaded virtual table pointer, and CS is
// the indirect virtual call.
struct VirtualCallSite {
@@ -590,7 +588,7 @@ struct DevirtModule {
: M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
ExportSummary(ExportSummary), ImportSummary(ImportSummary),
Int8Ty(Type::getInt8Ty(M.getContext())),
- Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
+ Int8PtrTy(PointerType::getUnqual(M.getContext())),
Int32Ty(Type::getInt32Ty(M.getContext())),
Int64Ty(Type::getInt64Ty(M.getContext())),
IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
@@ -776,20 +774,59 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::none();
}
-namespace llvm {
// Enable whole program visibility if enabled by client (e.g. linker) or
// internal option, and not force disabled.
-bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
+bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
!DisableWholeProgramVisibility;
}
+static bool
+typeIDVisibleToRegularObj(StringRef TypeID,
+ function_ref<bool(StringRef)> IsVisibleToRegularObj) {
+ // TypeID for member function pointer type is an internal construct
+ // and won't exist in IsVisibleToRegularObj. The full TypeID
+ // will be present and participate in invalidation.
+ if (TypeID.ends_with(".virtual"))
+ return false;
+
+ // TypeID that doesn't start with Itanium mangling (_ZTS) will be
+ // non-externally visible types which cannot interact with
+ // external native files. See CodeGenModule::CreateMetadataIdentifierImpl.
+ if (!TypeID.consume_front("_ZTS"))
+ return false;
+
+ // TypeID is keyed off the type name symbol (_ZTS). However, the native
+ // object may not contain this symbol if it does not contain a key
+ // function for the base type and thus only contains a reference to the
+ // type info (_ZTI). To catch this case we query using the type info
+ // symbol corresponding to the TypeID.
+ std::string typeInfo = ("_ZTI" + TypeID).str();
+ return IsVisibleToRegularObj(typeInfo);
+}
+
+static bool
+skipUpdateDueToValidation(GlobalVariable &GV,
+ function_ref<bool(StringRef)> IsVisibleToRegularObj) {
+ SmallVector<MDNode *, 2> Types;
+ GV.getMetadata(LLVMContext::MD_type, Types);
+
+ for (auto Type : Types)
+ if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))
+ return typeIDVisibleToRegularObj(TypeID->getString(),
+ IsVisibleToRegularObj);
+
+ return false;
+}
+
/// If whole program visibility asserted, then upgrade all public vcall
/// visibility metadata on vtable definitions to linkage unit visibility in
/// Module IR (for regular or hybrid LTO).
-void updateVCallVisibilityInModule(
+void llvm::updateVCallVisibilityInModule(
Module &M, bool WholeProgramVisibilityEnabledInLTO,
- const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) {
+ const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
+ bool ValidateAllVtablesHaveTypeInfos,
+ function_ref<bool(StringRef)> IsVisibleToRegularObj) {
if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
return;
for (GlobalVariable &GV : M.globals()) {
@@ -800,13 +837,19 @@ void updateVCallVisibilityInModule(
GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic &&
// Don't upgrade the visibility for symbols exported to the dynamic
// linker, as we have no information on their eventual use.
- !DynamicExportSymbols.count(GV.getGUID()))
+ !DynamicExportSymbols.count(GV.getGUID()) &&
+ // With validation enabled, we want to exclude symbols visible to
+ // regular objects. Local symbols will be in this group due to the
+ // current implementation but those with VCallVisibilityTranslationUnit
+ // will have already been marked in clang so are unaffected.
+ !(ValidateAllVtablesHaveTypeInfos &&
+ skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))
GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit);
}
}
-void updatePublicTypeTestCalls(Module &M,
- bool WholeProgramVisibilityEnabledInLTO) {
+void llvm::updatePublicTypeTestCalls(Module &M,
+ bool WholeProgramVisibilityEnabledInLTO) {
Function *PublicTypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));
if (!PublicTypeTestFunc)
@@ -832,12 +875,26 @@ void updatePublicTypeTestCalls(Module &M,
}
}
+/// Based on typeID string, get all associated vtable GUIDS that are
+/// visible to regular objects.
+void llvm::getVisibleToRegularObjVtableGUIDs(
+ ModuleSummaryIndex &Index,
+ DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,
+ function_ref<bool(StringRef)> IsVisibleToRegularObj) {
+ for (const auto &typeID : Index.typeIdCompatibleVtableMap()) {
+ if (typeIDVisibleToRegularObj(typeID.first, IsVisibleToRegularObj))
+ for (const TypeIdOffsetVtableInfo &P : typeID.second)
+ VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());
+ }
+}
+
/// If whole program visibility asserted, then upgrade all public vcall
/// visibility metadata on vtable definition summaries to linkage unit
/// visibility in Module summary index (for ThinLTO).
-void updateVCallVisibilityInIndex(
+void llvm::updateVCallVisibilityInIndex(
ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
- const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) {
+ const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,
+ const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {
if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
return;
for (auto &P : Index) {
@@ -850,18 +907,24 @@ void updateVCallVisibilityInIndex(
if (!GVar ||
GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
continue;
+ // With validation enabled, we want to exclude symbols visible to regular
+ // objects. Local symbols will be in this group due to the current
+ // implementation but those with VCallVisibilityTranslationUnit will have
+ // already been marked in clang so are unaffected.
+ if (VisibleToRegularObjSymbols.count(P.first))
+ continue;
GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
}
}
}
-void runWholeProgramDevirtOnIndex(
+void llvm::runWholeProgramDevirtOnIndex(
ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
}
-void updateIndexWPDForExports(
+void llvm::updateIndexWPDForExports(
ModuleSummaryIndex &Summary,
function_ref<bool(StringRef, ValueInfo)> isExported,
std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
@@ -887,8 +950,6 @@ void updateIndexWPDForExports(
}
}
-} // end namespace llvm
-
static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) {
// Check that summary index contains regular LTO module when performing
// export to prevent occasional use of index from pure ThinLTO compilation
@@ -942,7 +1003,7 @@ bool DevirtModule::runForTesting(
ExitOnError ExitOnErr(
"-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
std::error_code EC;
- if (StringRef(ClWriteSummary).endswith(".bc")) {
+ if (StringRef(ClWriteSummary).ends_with(".bc")) {
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
ExitOnErr(errorCodeToError(EC));
writeIndexToFile(*Summary, OS);
@@ -1045,8 +1106,8 @@ bool DevirtModule::tryFindVirtualCallTargets(
}
bool DevirtIndex::tryFindVirtualCallTargets(
- std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
- uint64_t ByteOffset) {
+ std::vector<ValueInfo> &TargetsForSlot,
+ const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {
for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
// Find a representative copy of the vtable initializer.
// We can have multiple available_externally, linkonce_odr and weak_odr
@@ -1203,7 +1264,8 @@ static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
// to better ensure we have the opportunity to inline them.
bool IsExported = false;
auto &S = Callee.getSummaryList()[0];
- CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
+ CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* HasTailCall = */ false,
+ /* RelBF = */ 0);
auto AddCalls = [&](CallSiteInfo &CSInfo) {
for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
FS->addCall({Callee, CI});
@@ -1437,7 +1499,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
IRBuilder<> IRB(&CB);
std::vector<Value *> Args;
- Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy));
+ Args.push_back(VCallSite.VTable);
llvm::append_range(Args, CB.args());
CallBase *NewCS = nullptr;
@@ -1471,10 +1533,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
// llvm.type.test and therefore require an llvm.type.test resolution for the
// type identifier.
- std::for_each(CallBases.begin(), CallBases.end(), [](auto &CBs) {
- CBs.first->replaceAllUsesWith(CBs.second);
- CBs.first->eraseFromParent();
- });
+ for (auto &[Old, New] : CallBases) {
+ Old->replaceAllUsesWith(New);
+ Old->eraseFromParent();
+ }
};
Apply(SlotInfo.CSInfo);
for (auto &P : SlotInfo.ConstCSInfo)
@@ -1648,8 +1710,7 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
}
Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
- Constant *C = ConstantExpr::getBitCast(M->Bits->GV, Int8PtrTy);
- return ConstantExpr::getGetElementPtr(Int8Ty, C,
+ return ConstantExpr::getGetElementPtr(Int8Ty, M->Bits->GV,
ConstantInt::get(Int64Ty, M->Offset));
}
@@ -1708,8 +1769,7 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
continue;
auto *RetType = cast<IntegerType>(Call.CB.getType());
IRBuilder<> B(&Call.CB);
- Value *Addr =
- B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
+ Value *Addr = B.CreateGEP(Int8Ty, Call.VTable, Byte);
if (RetType->getBitWidth() == 1) {
Value *Bits = B.CreateLoad(Int8Ty, Addr);
Value *BitsAndBit = B.CreateAnd(Bits, Bit);
@@ -2007,17 +2067,14 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
if (TypeCheckedLoadFunc->getIntrinsicID() ==
Intrinsic::type_checked_load_relative) {
Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
- Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int32Ty));
- LoadedValue = LoadB.CreateLoad(Int32Ty, GEPPtr);
+ LoadedValue = LoadB.CreateLoad(Int32Ty, GEP);
LoadedValue = LoadB.CreateSExt(LoadedValue, IntPtrTy);
GEP = LoadB.CreatePtrToInt(GEP, IntPtrTy);
LoadedValue = LoadB.CreateAdd(GEP, LoadedValue);
LoadedValue = LoadB.CreateIntToPtr(LoadedValue, Int8PtrTy);
} else {
Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
- Value *GEPPtr =
- LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy));
- LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr);
+ LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);
}
for (Instruction *LoadedPtr : LoadedPtrs) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 91ca44e0f11e..719a2678fc18 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -830,15 +830,15 @@ static Instruction *foldNoWrapAdd(BinaryOperator &Add,
// (sext (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)
Constant *NarrowC;
if (match(Op0, m_OneUse(m_SExt(m_NSWAdd(m_Value(X), m_Constant(NarrowC)))))) {
- Constant *WideC = ConstantExpr::getSExt(NarrowC, Ty);
- Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);
+ Value *WideC = Builder.CreateSExt(NarrowC, Ty);
+ Value *NewC = Builder.CreateAdd(WideC, Op1C);
Value *WideX = Builder.CreateSExt(X, Ty);
return BinaryOperator::CreateAdd(WideX, NewC);
}
// (zext (X +nuw NarrowC)) + C --> (zext X) + (zext(NarrowC) + C)
if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_Constant(NarrowC)))))) {
- Constant *WideC = ConstantExpr::getZExt(NarrowC, Ty);
- Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);
+ Value *WideC = Builder.CreateZExt(NarrowC, Ty);
+ Value *NewC = Builder.CreateAdd(WideC, Op1C);
Value *WideX = Builder.CreateZExt(X, Ty);
return BinaryOperator::CreateAdd(WideX, NewC);
}
@@ -903,8 +903,7 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add`
Constant *Op01C;
- if (match(Op0, m_Or(m_Value(X), m_ImmConstant(Op01C))) &&
- haveNoCommonBitsSet(X, Op01C, DL, &AC, &Add, &DT))
+ if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C))))
return BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C));
// (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)
@@ -995,6 +994,69 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
return nullptr;
}
+// match variations of a^2 + 2*a*b + b^2
+//
+// to reuse the code between the FP and Int versions, the instruction OpCodes
+// and constant types have been turned into template parameters.
+//
+// Mul2Rhs: The constant to perform the multiplicative equivalent of X*2 with;
+// should be `m_SpecificFP(2.0)` for FP and `m_SpecificInt(1)` for Int
+// (we're matching `X<<1` instead of `X*2` for Int)
+template <bool FP, typename Mul2Rhs>
+static bool matchesSquareSum(BinaryOperator &I, Mul2Rhs M2Rhs, Value *&A,
+ Value *&B) {
+ constexpr unsigned MulOp = FP ? Instruction::FMul : Instruction::Mul;
+ constexpr unsigned AddOp = FP ? Instruction::FAdd : Instruction::Add;
+ constexpr unsigned Mul2Op = FP ? Instruction::FMul : Instruction::Shl;
+
+ // (a * a) + (((a * 2) + b) * b)
+ if (match(&I, m_c_BinOp(
+ AddOp, m_OneUse(m_BinOp(MulOp, m_Value(A), m_Deferred(A))),
+ m_OneUse(m_BinOp(
+ MulOp,
+ m_c_BinOp(AddOp, m_BinOp(Mul2Op, m_Deferred(A), M2Rhs),
+ m_Value(B)),
+ m_Deferred(B))))))
+ return true;
+
+ // ((a * b) * 2) or ((a * 2) * b)
+ // +
+ // (a * a + b * b) or (b * b + a * a)
+ return match(
+ &I,
+ m_c_BinOp(AddOp,
+ m_CombineOr(
+ m_OneUse(m_BinOp(
+ Mul2Op, m_BinOp(MulOp, m_Value(A), m_Value(B)), M2Rhs)),
+ m_OneUse(m_BinOp(MulOp, m_BinOp(Mul2Op, m_Value(A), M2Rhs),
+ m_Value(B)))),
+ m_OneUse(m_c_BinOp(
+ AddOp, m_BinOp(MulOp, m_Deferred(A), m_Deferred(A)),
+ m_BinOp(MulOp, m_Deferred(B), m_Deferred(B))))));
+}
+
+// Fold integer variations of a^2 + 2*a*b + b^2 -> (a + b)^2
+Instruction *InstCombinerImpl::foldSquareSumInt(BinaryOperator &I) {
+ Value *A, *B;
+ if (matchesSquareSum</*FP*/ false>(I, m_SpecificInt(1), A, B)) {
+ Value *AB = Builder.CreateAdd(A, B);
+ return BinaryOperator::CreateMul(AB, AB);
+ }
+ return nullptr;
+}
+
+// Fold floating point variations of a^2 + 2*a*b + b^2 -> (a + b)^2
+// Requires `nsz` and `reassoc`.
+Instruction *InstCombinerImpl::foldSquareSumFP(BinaryOperator &I) {
+ assert(I.hasAllowReassoc() && I.hasNoSignedZeros() && "Assumption mismatch");
+ Value *A, *B;
+ if (matchesSquareSum</*FP*/ true>(I, m_SpecificFP(2.0), A, B)) {
+ Value *AB = Builder.CreateFAddFMF(A, B, &I);
+ return BinaryOperator::CreateFMulFMF(AB, AB, &I);
+ }
+ return nullptr;
+}
+
// Matches multiplication expression Op * C where C is a constant. Returns the
// constant value in C and the other operand in Op. Returns true if such a
// match is found.
@@ -1146,6 +1208,21 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
return nullptr;
}
+// Transform:
+// (add A, (shl (neg B), Y))
+// -> (sub A, (shl B, Y))
+static Instruction *combineAddSubWithShlAddSub(InstCombiner::BuilderTy &Builder,
+ const BinaryOperator &I) {
+ Value *A, *B, *Cnt;
+ if (match(&I,
+ m_c_Add(m_OneUse(m_Shl(m_OneUse(m_Neg(m_Value(B))), m_Value(Cnt))),
+ m_Value(A)))) {
+ Value *NewShl = Builder.CreateShl(B, Cnt);
+ return BinaryOperator::CreateSub(A, NewShl);
+ }
+ return nullptr;
+}
+
/// Try to reduce signed division by power-of-2 to an arithmetic shift right.
static Instruction *foldAddToAshr(BinaryOperator &Add) {
// Division must be by power-of-2, but not the minimum signed value.
@@ -1156,18 +1233,28 @@ static Instruction *foldAddToAshr(BinaryOperator &Add) {
return nullptr;
// Rounding is done by adding -1 if the dividend (X) is negative and has any
- // low bits set. The canonical pattern for that is an "ugt" compare with SMIN:
- // sext (icmp ugt (X & (DivC - 1)), SMIN)
- const APInt *MaskC;
+ // low bits set. It recognizes two canonical patterns:
+ // 1. For an 'ugt' cmp with the signed minimum value (SMIN), the
+ // pattern is: sext (icmp ugt (X & (DivC - 1)), SMIN).
+ // 2. For an 'eq' cmp, the pattern's: sext (icmp eq X & (SMIN + 1), SMIN + 1).
+ // Note that, by the time we end up here, if possible, ugt has been
+ // canonicalized into eq.
+ const APInt *MaskC, *MaskCCmp;
ICmpInst::Predicate Pred;
if (!match(Add.getOperand(1),
m_SExt(m_ICmp(Pred, m_And(m_Specific(X), m_APInt(MaskC)),
- m_SignMask()))) ||
- Pred != ICmpInst::ICMP_UGT)
+ m_APInt(MaskCCmp)))))
+ return nullptr;
+
+ if ((Pred != ICmpInst::ICMP_UGT || !MaskCCmp->isSignMask()) &&
+ (Pred != ICmpInst::ICMP_EQ || *MaskCCmp != *MaskC))
return nullptr;
APInt SMin = APInt::getSignedMinValue(Add.getType()->getScalarSizeInBits());
- if (*MaskC != (SMin | (*DivC - 1)))
+ bool IsMaskValid = Pred == ICmpInst::ICMP_UGT
+ ? (*MaskC == (SMin | (*DivC - 1)))
+ : (*DivC == 2 && *MaskC == SMin + 1);
+ if (!IsMaskValid)
return nullptr;
// (X / DivC) + sext ((X & (SMin | (DivC - 1)) >u SMin) --> X >>s log2(DivC)
@@ -1327,8 +1414,10 @@ static Instruction *foldBoxMultiply(BinaryOperator &I) {
// ResLo = (CrossSum << HalfBits) + (YLo * XLo)
Value *XLo, *YLo;
Value *CrossSum;
+ // Require one-use on the multiply to avoid increasing the number of
+ // multiplications.
if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),
- m_Mul(m_Value(YLo), m_Value(XLo)))))
+ m_OneUse(m_Mul(m_Value(YLo), m_Value(XLo))))))
return nullptr;
// XLo = X & HalfMask
@@ -1386,6 +1475,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (Instruction *R = foldBinOpShiftWithShift(I))
return R;
+ if (Instruction *R = combineAddSubWithShlAddSub(Builder, I))
+ return R;
+
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
Type *Ty = I.getType();
if (Ty->isIntOrIntVectorTy(1))
@@ -1406,7 +1498,11 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateNeg(Builder.CreateAdd(A, B));
// -A + B --> B - A
- return BinaryOperator::CreateSub(RHS, A);
+ auto *Sub = BinaryOperator::CreateSub(RHS, A);
+ auto *OB0 = cast<OverflowingBinaryOperator>(LHS);
+ Sub->setHasNoSignedWrap(I.hasNoSignedWrap() && OB0->hasNoSignedWrap());
+
+ return Sub;
}
// A + -B --> A - B
@@ -1485,8 +1581,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
return replaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// A+B --> A|B iff A and B have no bits set in common.
- if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
- return BinaryOperator::CreateOr(LHS, RHS);
+ WithCache<const Value *> LHSCache(LHS), RHSCache(RHS);
+ if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ.getWithInstruction(&I)))
+ return BinaryOperator::CreateDisjointOr(LHS, RHS);
if (Instruction *Ext = narrowMathIfNoOverflow(I))
return Ext;
@@ -1576,15 +1673,33 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
m_c_UMin(m_Deferred(A), m_Deferred(B))))))
return BinaryOperator::CreateWithCopiedFlags(Instruction::Add, A, B, &I);
+ // (~X) + (~Y) --> -2 - (X + Y)
+ {
+ // To ensure we can save instructions we need to ensure that we consume both
+ // LHS/RHS (i.e they have a `not`).
+ bool ConsumesLHS, ConsumesRHS;
+ if (isFreeToInvert(LHS, LHS->hasOneUse(), ConsumesLHS) && ConsumesLHS &&
+ isFreeToInvert(RHS, RHS->hasOneUse(), ConsumesRHS) && ConsumesRHS) {
+ Value *NotLHS = getFreelyInverted(LHS, LHS->hasOneUse(), &Builder);
+ Value *NotRHS = getFreelyInverted(RHS, RHS->hasOneUse(), &Builder);
+ assert(NotLHS != nullptr && NotRHS != nullptr &&
+ "isFreeToInvert desynced with getFreelyInverted");
+ Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS);
+ return BinaryOperator::CreateSub(ConstantInt::get(RHS->getType(), -2),
+ LHSPlusRHS);
+ }
+ }
+
// TODO(jingyue): Consider willNotOverflowSignedAdd and
// willNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
bool Changed = false;
- if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS, RHS, I)) {
+ if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHSCache, RHSCache, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
- if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedAdd(LHS, RHS, I)) {
+ if (!I.hasNoUnsignedWrap() &&
+ willNotOverflowUnsignedAdd(LHSCache, RHSCache, I)) {
Changed = true;
I.setHasNoUnsignedWrap(true);
}
@@ -1610,11 +1725,14 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
// ctpop(A) + ctpop(B) => ctpop(A | B) if A and B have no bits set in common.
if (match(LHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(A)))) &&
match(RHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(B)))) &&
- haveNoCommonBitsSet(A, B, DL, &AC, &I, &DT))
+ haveNoCommonBitsSet(A, B, SQ.getWithInstruction(&I)))
return replaceInstUsesWith(
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
{Builder.CreateOr(A, B)}));
+ if (Instruction *Res = foldSquareSumInt(I))
+ return Res;
+
if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
return Res;
@@ -1755,10 +1873,11 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
// instcombined.
if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
if (IsValidPromotion(FPType, LHSIntVal->getType())) {
- Constant *CI =
- ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
+ Constant *CI = ConstantFoldCastOperand(Instruction::FPToSI, CFP,
+ LHSIntVal->getType(), DL);
if (LHSConv->hasOneUse() &&
- ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+ ConstantFoldCastOperand(Instruction::SIToFP, CI, I.getType(), DL) ==
+ CFP &&
willNotOverflowSignedAdd(LHSIntVal, CI, I)) {
// Insert the new integer add.
Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv");
@@ -1794,6 +1913,9 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
if (Instruction *F = factorizeFAddFSub(I, Builder))
return F;
+ if (Instruction *F = foldSquareSumFP(I))
+ return F;
+
// Try to fold fadd into start value of reduction intrinsic.
if (match(&I, m_c_FAdd(m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(
m_AnyZeroFP(), m_Value(X))),
@@ -2017,14 +2139,16 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
// C-(X+C2) --> (C-C2)-X
if (match(Op1, m_Add(m_Value(X), m_ImmConstant(C2)))) {
- // C-C2 never overflow, and C-(X+C2), (X+C2) has NSW
- // => (C-C2)-X can have NSW
+ // C-C2 never overflow, and C-(X+C2), (X+C2) has NSW/NUW
+ // => (C-C2)-X can have NSW/NUW
bool WillNotSOV = willNotOverflowSignedSub(C, C2, I);
BinaryOperator *Res =
BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
Res->setHasNoSignedWrap(I.hasNoSignedWrap() && OBO1->hasNoSignedWrap() &&
WillNotSOV);
+ Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap() &&
+ OBO1->hasNoUnsignedWrap());
return Res;
}
}
@@ -2058,7 +2182,9 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
m_Select(m_Value(), m_Specific(Op1), m_Specific(&I))) ||
match(UI, m_Select(m_Value(), m_Specific(&I), m_Specific(Op1)));
})) {
- if (Value *NegOp1 = Negator::Negate(IsNegation, Op1, *this))
+ if (Value *NegOp1 = Negator::Negate(IsNegation, /* IsNSW */ IsNegation &&
+ I.hasNoSignedWrap(),
+ Op1, *this))
return BinaryOperator::CreateAdd(NegOp1, Op0);
}
if (IsNegation)
@@ -2093,19 +2219,50 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
// ((X - Y) - Op1) --> X - (Y + Op1)
if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y))))) {
- Value *Add = Builder.CreateAdd(Y, Op1);
- return BinaryOperator::CreateSub(X, Add);
+ OverflowingBinaryOperator *LHSSub = cast<OverflowingBinaryOperator>(Op0);
+ bool HasNUW = I.hasNoUnsignedWrap() && LHSSub->hasNoUnsignedWrap();
+ bool HasNSW = HasNUW && I.hasNoSignedWrap() && LHSSub->hasNoSignedWrap();
+ Value *Add = Builder.CreateAdd(Y, Op1, "", /* HasNUW */ HasNUW,
+ /* HasNSW */ HasNSW);
+ BinaryOperator *Sub = BinaryOperator::CreateSub(X, Add);
+ Sub->setHasNoUnsignedWrap(HasNUW);
+ Sub->setHasNoSignedWrap(HasNSW);
+ return Sub;
+ }
+
+ {
+ // (X + Z) - (Y + Z) --> (X - Y)
+ // This is done in other passes, but we want to be able to consume this
+ // pattern in InstCombine so we can generate it without creating infinite
+ // loops.
+ if (match(Op0, m_Add(m_Value(X), m_Value(Z))) &&
+ match(Op1, m_c_Add(m_Value(Y), m_Specific(Z))))
+ return BinaryOperator::CreateSub(X, Y);
+
+ // (X + C0) - (Y + C1) --> (X - Y) + (C0 - C1)
+ Constant *CX, *CY;
+ if (match(Op0, m_OneUse(m_Add(m_Value(X), m_ImmConstant(CX)))) &&
+ match(Op1, m_OneUse(m_Add(m_Value(Y), m_ImmConstant(CY))))) {
+ Value *OpsSub = Builder.CreateSub(X, Y);
+ Constant *ConstsSub = ConstantExpr::getSub(CX, CY);
+ return BinaryOperator::CreateAdd(OpsSub, ConstsSub);
+ }
}
// (~X) - (~Y) --> Y - X
- // This is placed after the other reassociations and explicitly excludes a
- // sub-of-sub pattern to avoid infinite looping.
- if (isFreeToInvert(Op0, Op0->hasOneUse()) &&
- isFreeToInvert(Op1, Op1->hasOneUse()) &&
- !match(Op0, m_Sub(m_ImmConstant(), m_Value()))) {
- Value *NotOp0 = Builder.CreateNot(Op0);
- Value *NotOp1 = Builder.CreateNot(Op1);
- return BinaryOperator::CreateSub(NotOp1, NotOp0);
+ {
+ // Need to ensure we can consume at least one of the `not` instructions,
+ // otherwise this can inf loop.
+ bool ConsumesOp0, ConsumesOp1;
+ if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&
+ isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&
+ (ConsumesOp0 || ConsumesOp1)) {
+ Value *NotOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder);
+ Value *NotOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder);
+ assert(NotOp0 != nullptr && NotOp1 != nullptr &&
+ "isFreeToInvert desynced with getFreelyInverted");
+ return BinaryOperator::CreateSub(NotOp1, NotOp0);
+ }
}
auto m_AddRdx = [](Value *&Vec) {
@@ -2520,18 +2677,33 @@ static Instruction *foldFNegIntoConstant(Instruction &I, const DataLayout &DL) {
return nullptr;
}
-static Instruction *hoistFNegAboveFMulFDiv(Instruction &I,
- InstCombiner::BuilderTy &Builder) {
- Value *FNeg;
- if (!match(&I, m_FNeg(m_Value(FNeg))))
- return nullptr;
-
+Instruction *InstCombinerImpl::hoistFNegAboveFMulFDiv(Value *FNegOp,
+ Instruction &FMFSource) {
Value *X, *Y;
- if (match(FNeg, m_OneUse(m_FMul(m_Value(X), m_Value(Y)))))
- return BinaryOperator::CreateFMulFMF(Builder.CreateFNegFMF(X, &I), Y, &I);
+ if (match(FNegOp, m_FMul(m_Value(X), m_Value(Y)))) {
+ return cast<Instruction>(Builder.CreateFMulFMF(
+ Builder.CreateFNegFMF(X, &FMFSource), Y, &FMFSource));
+ }
- if (match(FNeg, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))))
- return BinaryOperator::CreateFDivFMF(Builder.CreateFNegFMF(X, &I), Y, &I);
+ if (match(FNegOp, m_FDiv(m_Value(X), m_Value(Y)))) {
+ return cast<Instruction>(Builder.CreateFDivFMF(
+ Builder.CreateFNegFMF(X, &FMFSource), Y, &FMFSource));
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(FNegOp)) {
+ // Make sure to preserve flags and metadata on the call.
+ if (II->getIntrinsicID() == Intrinsic::ldexp) {
+ FastMathFlags FMF = FMFSource.getFastMathFlags() | II->getFastMathFlags();
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(FMF);
+
+ CallInst *New = Builder.CreateCall(
+ II->getCalledFunction(),
+ {Builder.CreateFNeg(II->getArgOperand(0)), II->getArgOperand(1)});
+ New->copyMetadata(*II);
+ return New;
+ }
+ }
return nullptr;
}
@@ -2553,13 +2725,13 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y)))))
return BinaryOperator::CreateFSubFMF(Y, X, &I);
- if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder))
- return R;
-
Value *OneUse;
if (!match(Op, m_OneUse(m_Value(OneUse))))
return nullptr;
+ if (Instruction *R = hoistFNegAboveFMulFDiv(OneUse, I))
+ return replaceInstUsesWith(I, R);
+
// Try to eliminate fneg if at least 1 arm of the select is negated.
Value *Cond;
if (match(OneUse, m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))) {
@@ -2569,8 +2741,7 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
auto propagateSelectFMF = [&](SelectInst *S, bool CommonOperand) {
S->copyFastMathFlags(&I);
if (auto *OldSel = dyn_cast<SelectInst>(Op)) {
- FastMathFlags FMF = I.getFastMathFlags();
- FMF |= OldSel->getFastMathFlags();
+ FastMathFlags FMF = I.getFastMathFlags() | OldSel->getFastMathFlags();
S->setFastMathFlags(FMF);
if (!OldSel->hasNoSignedZeros() && !CommonOperand &&
!isGuaranteedNotToBeUndefOrPoison(OldSel->getCondition()))
@@ -2638,9 +2809,6 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
if (Instruction *X = foldFNegIntoConstant(I, DL))
return X;
- if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder))
- return R;
-
Value *X, *Y;
Constant *C;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 8a1fb6b7f17e..5e362f4117d0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1099,39 +1099,6 @@ static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp,
return Builder.CreateICmpUGE(Builder.CreateNeg(B), A);
}
- Value *Base, *Offset;
- if (!match(ZeroCmpOp, m_Sub(m_Value(Base), m_Value(Offset))))
- return nullptr;
-
- if (!match(UnsignedICmp,
- m_c_ICmp(UnsignedPred, m_Specific(Base), m_Specific(Offset))) ||
- !ICmpInst::isUnsigned(UnsignedPred))
- return nullptr;
-
- // Base >=/> Offset && (Base - Offset) != 0 <--> Base > Offset
- // (no overflow and not null)
- if ((UnsignedPred == ICmpInst::ICMP_UGE ||
- UnsignedPred == ICmpInst::ICMP_UGT) &&
- EqPred == ICmpInst::ICMP_NE && IsAnd)
- return Builder.CreateICmpUGT(Base, Offset);
-
- // Base <=/< Offset || (Base - Offset) == 0 <--> Base <= Offset
- // (overflow or null)
- if ((UnsignedPred == ICmpInst::ICMP_ULE ||
- UnsignedPred == ICmpInst::ICMP_ULT) &&
- EqPred == ICmpInst::ICMP_EQ && !IsAnd)
- return Builder.CreateICmpULE(Base, Offset);
-
- // Base <= Offset && (Base - Offset) != 0 --> Base < Offset
- if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
- IsAnd)
- return Builder.CreateICmpULT(Base, Offset);
-
- // Base > Offset || (Base - Offset) == 0 --> Base >= Offset
- if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
- !IsAnd)
- return Builder.CreateICmpUGE(Base, Offset);
-
return nullptr;
}
@@ -1179,13 +1146,40 @@ Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
- if (Cmp0->getPredicate() != Pred || Cmp1->getPredicate() != Pred)
- return nullptr;
+ auto GetMatchPart = [&](ICmpInst *Cmp,
+ unsigned OpNo) -> std::optional<IntPart> {
+ if (Pred == Cmp->getPredicate())
+ return matchIntPart(Cmp->getOperand(OpNo));
+
+ const APInt *C;
+ // (icmp eq (lshr x, C), (lshr y, C)) gets optimized to:
+ // (icmp ult (xor x, y), 1 << C) so also look for that.
+ if (Pred == CmpInst::ICMP_EQ && Cmp->getPredicate() == CmpInst::ICMP_ULT) {
+ if (!match(Cmp->getOperand(1), m_Power2(C)) ||
+ !match(Cmp->getOperand(0), m_Xor(m_Value(), m_Value())))
+ return std::nullopt;
+ }
- std::optional<IntPart> L0 = matchIntPart(Cmp0->getOperand(0));
- std::optional<IntPart> R0 = matchIntPart(Cmp0->getOperand(1));
- std::optional<IntPart> L1 = matchIntPart(Cmp1->getOperand(0));
- std::optional<IntPart> R1 = matchIntPart(Cmp1->getOperand(1));
+ // (icmp ne (lshr x, C), (lshr y, C)) gets optimized to:
+ // (icmp ugt (xor x, y), (1 << C) - 1) so also look for that.
+ else if (Pred == CmpInst::ICMP_NE &&
+ Cmp->getPredicate() == CmpInst::ICMP_UGT) {
+ if (!match(Cmp->getOperand(1), m_LowBitMask(C)) ||
+ !match(Cmp->getOperand(0), m_Xor(m_Value(), m_Value())))
+ return std::nullopt;
+ } else {
+ return std::nullopt;
+ }
+
+ unsigned From = Pred == CmpInst::ICMP_NE ? C->popcount() : C->countr_zero();
+ Instruction *I = cast<Instruction>(Cmp->getOperand(0));
+ return {{I->getOperand(OpNo), From, C->getBitWidth() - From}};
+ };
+
+ std::optional<IntPart> L0 = GetMatchPart(Cmp0, 0);
+ std::optional<IntPart> R0 = GetMatchPart(Cmp0, 1);
+ std::optional<IntPart> L1 = GetMatchPart(Cmp1, 0);
+ std::optional<IntPart> R1 = GetMatchPart(Cmp1, 1);
if (!L0 || !R0 || !L1 || !R1)
return nullptr;
@@ -1616,7 +1610,7 @@ static Instruction *reassociateFCmps(BinaryOperator &BO,
/// (~A & ~B) == (~(A | B))
/// (~A | ~B) == (~(A & B))
static Instruction *matchDeMorgansLaws(BinaryOperator &I,
- InstCombiner::BuilderTy &Builder) {
+ InstCombiner &IC) {
const Instruction::BinaryOps Opcode = I.getOpcode();
assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
"Trying to match De Morgan's Laws with something other than and/or");
@@ -1629,10 +1623,10 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I,
Value *A, *B;
if (match(Op0, m_OneUse(m_Not(m_Value(A)))) &&
match(Op1, m_OneUse(m_Not(m_Value(B)))) &&
- !InstCombiner::isFreeToInvert(A, A->hasOneUse()) &&
- !InstCombiner::isFreeToInvert(B, B->hasOneUse())) {
+ !IC.isFreeToInvert(A, A->hasOneUse()) &&
+ !IC.isFreeToInvert(B, B->hasOneUse())) {
Value *AndOr =
- Builder.CreateBinOp(FlippedOpcode, A, B, I.getName() + ".demorgan");
+ IC.Builder.CreateBinOp(FlippedOpcode, A, B, I.getName() + ".demorgan");
return BinaryOperator::CreateNot(AndOr);
}
@@ -1644,8 +1638,8 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I,
Value *C;
if (match(Op0, m_OneUse(m_c_BinOp(Opcode, m_Value(A), m_Not(m_Value(B))))) &&
match(Op1, m_Not(m_Value(C)))) {
- Value *FlippedBO = Builder.CreateBinOp(FlippedOpcode, B, C);
- return BinaryOperator::Create(Opcode, A, Builder.CreateNot(FlippedBO));
+ Value *FlippedBO = IC.Builder.CreateBinOp(FlippedOpcode, B, C);
+ return BinaryOperator::Create(Opcode, A, IC.Builder.CreateNot(FlippedBO));
}
return nullptr;
@@ -1669,7 +1663,7 @@ bool InstCombinerImpl::shouldOptimizeCast(CastInst *CI) {
/// Fold {and,or,xor} (cast X), C.
static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
- InstCombiner::BuilderTy &Builder) {
+ InstCombinerImpl &IC) {
Constant *C = dyn_cast<Constant>(Logic.getOperand(1));
if (!C)
return nullptr;
@@ -1684,21 +1678,17 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
// instruction may be cheaper (particularly in the case of vectors).
Value *X;
if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) {
- Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);
- Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy);
- if (ZextTruncC == C) {
+ if (Constant *TruncC = IC.getLosslessUnsignedTrunc(C, SrcTy)) {
// LogicOpc (zext X), C --> zext (LogicOpc X, C)
- Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC);
+ Value *NewOp = IC.Builder.CreateBinOp(LogicOpc, X, TruncC);
return new ZExtInst(NewOp, DestTy);
}
}
if (match(Cast, m_OneUse(m_SExt(m_Value(X))))) {
- Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);
- Constant *SextTruncC = ConstantExpr::getSExt(TruncC, DestTy);
- if (SextTruncC == C) {
+ if (Constant *TruncC = IC.getLosslessSignedTrunc(C, SrcTy)) {
// LogicOpc (sext X), C --> sext (LogicOpc X, C)
- Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC);
+ Value *NewOp = IC.Builder.CreateBinOp(LogicOpc, X, TruncC);
return new SExtInst(NewOp, DestTy);
}
}
@@ -1756,7 +1746,7 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {
if (!SrcTy->isIntOrIntVectorTy())
return nullptr;
- if (Instruction *Ret = foldLogicCastConstant(I, Cast0, Builder))
+ if (Instruction *Ret = foldLogicCastConstant(I, Cast0, *this))
return Ret;
CastInst *Cast1 = dyn_cast<CastInst>(Op1);
@@ -1802,29 +1792,6 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {
return CastInst::Create(CastOpcode, NewOp, DestTy);
}
- // For now, only 'and'/'or' have optimizations after this.
- if (LogicOpc == Instruction::Xor)
- return nullptr;
-
- // If this is logic(cast(icmp), cast(icmp)), try to fold this even if the
- // cast is otherwise not optimizable. This happens for vector sexts.
- ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src);
- ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src);
- if (ICmp0 && ICmp1) {
- if (Value *Res =
- foldAndOrOfICmps(ICmp0, ICmp1, I, LogicOpc == Instruction::And))
- return CastInst::Create(CastOpcode, Res, DestTy);
- return nullptr;
- }
-
- // If this is logic(cast(fcmp), cast(fcmp)), try to fold this even if the
- // cast is otherwise not optimizable. This happens for vector sexts.
- FCmpInst *FCmp0 = dyn_cast<FCmpInst>(Cast0Src);
- FCmpInst *FCmp1 = dyn_cast<FCmpInst>(Cast1Src);
- if (FCmp0 && FCmp1)
- if (Value *R = foldLogicOfFCmps(FCmp0, FCmp1, LogicOpc == Instruction::And))
- return CastInst::Create(CastOpcode, R, DestTy);
-
return nullptr;
}
@@ -2160,10 +2127,10 @@ Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) {
Constant *ShiftedC1, *ShiftedC2, *AddC;
Type *Ty = I.getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
- if (!match(&I,
- m_c_BinOp(m_Shift(m_ImmConstant(ShiftedC1), m_Value(ShAmt)),
- m_Shift(m_ImmConstant(ShiftedC2),
- m_Add(m_Deferred(ShAmt), m_ImmConstant(AddC))))))
+ if (!match(&I, m_c_BinOp(m_Shift(m_ImmConstant(ShiftedC1), m_Value(ShAmt)),
+ m_Shift(m_ImmConstant(ShiftedC2),
+ m_AddLike(m_Deferred(ShAmt),
+ m_ImmConstant(AddC))))))
return nullptr;
// Make sure the add constant is a valid shift amount.
@@ -2254,6 +2221,14 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);
}
+ // Canonicalize:
+ // (X +/- Y) & Y --> ~X & Y when Y is a power of 2.
+ if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr(
+ m_c_Add(m_Value(X), m_Deferred(Y)),
+ m_Sub(m_Value(X), m_Deferred(Y)))))) &&
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, /*Depth*/ 0, &I))
+ return BinaryOperator::CreateAnd(Builder.CreateNot(X), Y);
+
const APInt *C;
if (match(Op1, m_APInt(C))) {
const APInt *XorC;
@@ -2300,13 +2275,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
const APInt *AddC;
if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) {
- // If we add zeros to every bit below a mask, the add has no effect:
- // (X + AddC) & LowMaskC --> X & LowMaskC
- unsigned Ctlz = C->countl_zero();
- APInt LowMask(APInt::getLowBitsSet(Width, Width - Ctlz));
- if ((*AddC & LowMask).isZero())
- return BinaryOperator::CreateAnd(X, Op1);
-
// If we are masking the result of the add down to exactly one bit and
// the constant we are adding has no bits set below that bit, then the
// add is flipping a single bit. Example:
@@ -2455,6 +2423,28 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
}
}
+ // If we are clearing the sign bit of a floating-point value, convert this to
+ // fabs, then cast back to integer.
+ //
+ // This is a generous interpretation for noimplicitfloat, this is not a true
+ // floating-point operation.
+ //
+ // Assumes any IEEE-represented type has the sign bit in the high bit.
+ // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt
+ Value *CastOp;
+ if (match(Op0, m_BitCast(m_Value(CastOp))) &&
+ match(Op1, m_MaxSignedValue()) &&
+ !Builder.GetInsertBlock()->getParent()->hasFnAttribute(
+ Attribute::NoImplicitFloat)) {
+ Type *EltTy = CastOp->getType()->getScalarType();
+ if (EltTy->isFloatingPointTy() && EltTy->isIEEE() &&
+ EltTy->getPrimitiveSizeInBits() ==
+ I.getType()->getScalarType()->getPrimitiveSizeInBits()) {
+ Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp);
+ return new BitCastInst(FAbs, I.getType());
+ }
+ }
+
if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))),
m_SignMask())) &&
match(Y, m_SpecificInt_ICMP(
@@ -2479,21 +2469,21 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (I.getType()->isIntOrIntVectorTy(1)) {
if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
- if (auto *I =
+ if (auto *R =
foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ true))
- return I;
+ return R;
}
if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {
- if (auto *I =
+ if (auto *R =
foldAndOrOfSelectUsingImpliedCond(Op0, *SI1, /* IsAnd */ true))
- return I;
+ return R;
}
}
if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
return FoldedLogic;
- if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this))
return DeMorgan;
{
@@ -2513,16 +2503,24 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return BinaryOperator::CreateAnd(Op1, B);
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
- if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
- if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
- if (Op1->hasOneUse() || isFreeToInvert(C, C->hasOneUse()))
- return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C));
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) {
+ Value *NotC = Op1->hasOneUse()
+ ? Builder.CreateNot(C)
+ : getFreelyInverted(C, C->hasOneUse(), &Builder);
+ if (NotC != nullptr)
+ return BinaryOperator::CreateAnd(Op0, NotC);
+ }
// ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
- if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
- if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
- if (Op0->hasOneUse() || isFreeToInvert(C, C->hasOneUse()))
- return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));
+ if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))) &&
+ match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) {
+ Value *NotC = Op0->hasOneUse()
+ ? Builder.CreateNot(C)
+ : getFreelyInverted(C, C->hasOneUse(), &Builder);
+ if (NotC != nullptr)
+ return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));
+ }
// (A | B) & (~A ^ B) -> A & B
// (A | B) & (B ^ ~A) -> A & B
@@ -2621,23 +2619,34 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
// with binop identity constant. But creating a select with non-constant
// arm may not be reversible due to poison semantics. Is that a good
// canonicalization?
- Value *A;
- if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
- A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, Op1, Constant::getNullValue(Ty));
- if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
+ Value *A, *B;
+ if (match(&I, m_c_And(m_OneUse(m_SExt(m_Value(A))), m_Value(B))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, Op0, Constant::getNullValue(Ty));
+ return SelectInst::Create(A, B, Constant::getNullValue(Ty));
// Similarly, a 'not' of the bool translates to a swap of the select arms:
- // ~sext(A) & Op1 --> A ? 0 : Op1
- // Op0 & ~sext(A) --> A ? 0 : Op0
- if (match(Op0, m_Not(m_SExt(m_Value(A)))) &&
+ // ~sext(A) & B / B & ~sext(A) --> A ? 0 : B
+ if (match(&I, m_c_And(m_Not(m_SExt(m_Value(A))), m_Value(B))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, Constant::getNullValue(Ty), Op1);
- if (match(Op1, m_Not(m_SExt(m_Value(A)))) &&
+ return SelectInst::Create(A, Constant::getNullValue(Ty), B);
+
+ // and(zext(A), B) -> A ? (B & 1) : 0
+ if (match(&I, m_c_And(m_OneUse(m_ZExt(m_Value(A))), m_Value(B))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, Constant::getNullValue(Ty), Op0);
+ return SelectInst::Create(A, Builder.CreateAnd(B, ConstantInt::get(Ty, 1)),
+ Constant::getNullValue(Ty));
+
+ // (-1 + A) & B --> A ? 0 : B where A is 0/1.
+ if (match(&I, m_c_And(m_OneUse(m_Add(m_ZExtOrSelf(m_Value(A)), m_AllOnes())),
+ m_Value(B)))) {
+ if (A->getType()->isIntOrIntVectorTy(1))
+ return SelectInst::Create(A, Constant::getNullValue(Ty), B);
+ if (computeKnownBits(A, /* Depth */ 0, &I).countMaxActiveBits() <= 1) {
+ return SelectInst::Create(
+ Builder.CreateICmpEQ(A, Constant::getNullValue(A->getType())), B,
+ Constant::getNullValue(Ty));
+ }
+ }
// (iN X s>> (N-1)) & Y --> (X s< 0) ? Y : 0 -- with optional sext
if (match(&I, m_c_And(m_OneUse(m_SExtOrSelf(
@@ -2698,105 +2707,178 @@ Instruction *InstCombinerImpl::matchBSwapOrBitReverse(Instruction &I,
}
/// Match UB-safe variants of the funnel shift intrinsic.
-static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
+static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC,
+ const DominatorTree &DT) {
// TODO: Can we reduce the code duplication between this and the related
// rotate matching code under visitSelect and visitTrunc?
unsigned Width = Or.getType()->getScalarSizeInBits();
+ Instruction *Or0, *Or1;
+ if (!match(Or.getOperand(0), m_Instruction(Or0)) ||
+ !match(Or.getOperand(1), m_Instruction(Or1)))
+ return nullptr;
+
+ bool IsFshl = true; // Sub on LSHR.
+ SmallVector<Value *, 3> FShiftArgs;
+
// First, find an or'd pair of opposite shifts:
// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
- BinaryOperator *Or0, *Or1;
- if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
- !match(Or.getOperand(1), m_BinOp(Or1)))
- return nullptr;
+ if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {
+ Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
+ if (!match(Or0,
+ m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
+ !match(Or1,
+ m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
+ Or0->getOpcode() == Or1->getOpcode())
+ return nullptr;
- Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
- if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
- !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
- Or0->getOpcode() == Or1->getOpcode())
- return nullptr;
+ // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
+ if (Or0->getOpcode() == BinaryOperator::LShr) {
+ std::swap(Or0, Or1);
+ std::swap(ShVal0, ShVal1);
+ std::swap(ShAmt0, ShAmt1);
+ }
+ assert(Or0->getOpcode() == BinaryOperator::Shl &&
+ Or1->getOpcode() == BinaryOperator::LShr &&
+ "Illegal or(shift,shift) pair");
+
+ // Match the shift amount operands for a funnel shift pattern. This always
+ // matches a subtraction on the R operand.
+ auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
+ // Check for constant shift amounts that sum to the bitwidth.
+ const APInt *LI, *RI;
+ if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))
+ if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
+ return ConstantInt::get(L->getType(), *LI);
+
+ Constant *LC, *RC;
+ if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
+ match(L,
+ m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
+ match(R,
+ m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
+ match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
+ return ConstantExpr::mergeUndefsWith(LC, RC);
+
+ // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
+ // We limit this to X < Width in case the backend re-expands the
+ // intrinsic, and has to reintroduce a shift modulo operation (InstCombine
+ // might remove it after this fold). This still doesn't guarantee that the
+ // final codegen will match this original pattern.
+ if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
+ KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
+ return KnownL.getMaxValue().ult(Width) ? L : nullptr;
+ }
- // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
- if (Or0->getOpcode() == BinaryOperator::LShr) {
- std::swap(Or0, Or1);
- std::swap(ShVal0, ShVal1);
- std::swap(ShAmt0, ShAmt1);
- }
- assert(Or0->getOpcode() == BinaryOperator::Shl &&
- Or1->getOpcode() == BinaryOperator::LShr &&
- "Illegal or(shift,shift) pair");
-
- // Match the shift amount operands for a funnel shift pattern. This always
- // matches a subtraction on the R operand.
- auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
- // Check for constant shift amounts that sum to the bitwidth.
- const APInt *LI, *RI;
- if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))
- if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
- return ConstantInt::get(L->getType(), *LI);
-
- Constant *LC, *RC;
- if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
- match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
- match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
- match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
- return ConstantExpr::mergeUndefsWith(LC, RC);
-
- // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
- // We limit this to X < Width in case the backend re-expands the intrinsic,
- // and has to reintroduce a shift modulo operation (InstCombine might remove
- // it after this fold). This still doesn't guarantee that the final codegen
- // will match this original pattern.
- if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
- KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
- return KnownL.getMaxValue().ult(Width) ? L : nullptr;
+ // For non-constant cases, the following patterns currently only work for
+ // rotation patterns.
+ // TODO: Add general funnel-shift compatible patterns.
+ if (ShVal0 != ShVal1)
+ return nullptr;
+
+ // For non-constant cases we don't support non-pow2 shift masks.
+ // TODO: Is it worth matching urem as well?
+ if (!isPowerOf2_32(Width))
+ return nullptr;
+
+ // The shift amount may be masked with negation:
+ // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+ Value *X;
+ unsigned Mask = Width - 1;
+ if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
+ match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
+ return X;
+
+ // Similar to above, but the shift amount may be extended after masking,
+ // so return the extended value as the parameter for the intrinsic.
+ if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+ match(R,
+ m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
+ m_SpecificInt(Mask))))
+ return L;
+
+ if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+ match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
+ return L;
+
+ return nullptr;
+ };
+
+ Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
+ if (!ShAmt) {
+ ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
+ IsFshl = false; // Sub on SHL.
}
+ if (!ShAmt)
+ return nullptr;
- // For non-constant cases, the following patterns currently only work for
- // rotation patterns.
- // TODO: Add general funnel-shift compatible patterns.
- if (ShVal0 != ShVal1)
+ FShiftArgs = {ShVal0, ShVal1, ShAmt};
+ } else if (isa<ZExtInst>(Or0) || isa<ZExtInst>(Or1)) {
+ // If there are two 'or' instructions concat variables in opposite order:
+ //
+ // Slot1 and Slot2 are all zero bits.
+ // | Slot1 | Low | Slot2 | High |
+ // LowHigh = or (shl (zext Low), ZextLowShlAmt), (zext High)
+ // | Slot2 | High | Slot1 | Low |
+ // HighLow = or (shl (zext High), ZextHighShlAmt), (zext Low)
+ //
+ // the latter 'or' can be safely convert to
+ // -> HighLow = fshl LowHigh, LowHigh, ZextHighShlAmt
+ // if ZextLowShlAmt + ZextHighShlAmt == Width.
+ if (!isa<ZExtInst>(Or1))
+ std::swap(Or0, Or1);
+
+ Value *High, *ZextHigh, *Low;
+ const APInt *ZextHighShlAmt;
+ if (!match(Or0,
+ m_OneUse(m_Shl(m_Value(ZextHigh), m_APInt(ZextHighShlAmt)))))
return nullptr;
- // For non-constant cases we don't support non-pow2 shift masks.
- // TODO: Is it worth matching urem as well?
- if (!isPowerOf2_32(Width))
+ if (!match(Or1, m_ZExt(m_Value(Low))) ||
+ !match(ZextHigh, m_ZExt(m_Value(High))))
return nullptr;
- // The shift amount may be masked with negation:
- // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
- Value *X;
- unsigned Mask = Width - 1;
- if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
- match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
- return X;
+ unsigned HighSize = High->getType()->getScalarSizeInBits();
+ unsigned LowSize = Low->getType()->getScalarSizeInBits();
+ // Make sure High does not overlap with Low and most significant bits of
+ // High aren't shifted out.
+ if (ZextHighShlAmt->ult(LowSize) || ZextHighShlAmt->ugt(Width - HighSize))
+ return nullptr;
- // Similar to above, but the shift amount may be extended after masking,
- // so return the extended value as the parameter for the intrinsic.
- if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
- match(R, m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
- m_SpecificInt(Mask))))
- return L;
+ for (User *U : ZextHigh->users()) {
+ Value *X, *Y;
+ if (!match(U, m_Or(m_Value(X), m_Value(Y))))
+ continue;
- if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
- match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
- return L;
+ if (!isa<ZExtInst>(Y))
+ std::swap(X, Y);
- return nullptr;
- };
+ const APInt *ZextLowShlAmt;
+ if (!match(X, m_Shl(m_Specific(Or1), m_APInt(ZextLowShlAmt))) ||
+ !match(Y, m_Specific(ZextHigh)) || !DT.dominates(U, &Or))
+ continue;
- Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
- bool IsFshl = true; // Sub on LSHR.
- if (!ShAmt) {
- ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
- IsFshl = false; // Sub on SHL.
+ // HighLow is good concat. If sum of two shifts amount equals to Width,
+ // LowHigh must also be a good concat.
+ if (*ZextLowShlAmt + *ZextHighShlAmt != Width)
+ continue;
+
+ // Low must not overlap with High and most significant bits of Low must
+ // not be shifted out.
+ assert(ZextLowShlAmt->uge(HighSize) &&
+ ZextLowShlAmt->ule(Width - LowSize) && "Invalid concat");
+
+ FShiftArgs = {U, U, ConstantInt::get(Or0->getType(), *ZextHighShlAmt)};
+ break;
+ }
}
- if (!ShAmt)
+
+ if (FShiftArgs.empty())
return nullptr;
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
- return CallInst::Create(F, {ShVal0, ShVal1, ShAmt});
+ return CallInst::Create(F, FShiftArgs);
}
/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
@@ -3272,14 +3354,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
Type *Ty = I.getType();
if (Ty->isIntOrIntVectorTy(1)) {
if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
- if (auto *I =
+ if (auto *R =
foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ false))
- return I;
+ return R;
}
if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {
- if (auto *I =
+ if (auto *R =
foldAndOrOfSelectUsingImpliedCond(Op0, *SI1, /* IsAnd */ false))
- return I;
+ return R;
}
}
@@ -3290,7 +3372,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
/*MatchBitReversals*/ true))
return BitOp;
- if (Instruction *Funnel = matchFunnelShift(I, *this))
+ if (Instruction *Funnel = matchFunnelShift(I, *this, DT))
return Funnel;
if (Instruction *Concat = matchOrConcat(I, Builder))
@@ -3311,9 +3393,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// If the operands have no common bits set:
// or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1)
- if (match(&I,
- m_c_Or(m_OneUse(m_Mul(m_Value(X), m_Value(Y))), m_Deferred(X))) &&
- haveNoCommonBitsSet(Op0, Op1, DL)) {
+ if (match(&I, m_c_DisjointOr(m_OneUse(m_Mul(m_Value(X), m_Value(Y))),
+ m_Deferred(X)))) {
Value *IncrementY = Builder.CreateAdd(Y, ConstantInt::get(Ty, 1));
return BinaryOperator::CreateMul(X, IncrementY);
}
@@ -3435,7 +3516,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C));
- if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this))
return DeMorgan;
// Canonicalize xor to the RHS.
@@ -3581,12 +3662,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// with binop identity constant. But creating a select with non-constant
// arm may not be reversible due to poison semantics. Is that a good
// canonicalization?
- if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
- A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op1);
- if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
+ if (match(&I, m_c_Or(m_OneUse(m_SExt(m_Value(A))), m_Value(B))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op0);
+ return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), B);
// Note: If we've gotten to the point of visiting the outer OR, then the
// inner one couldn't be simplified. If it was a constant, then it won't
@@ -3628,6 +3706,26 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
}
}
+ {
+ // ((A & B) ^ A) | ((A & B) ^ B) -> A ^ B
+ // (A ^ (A & B)) | (B ^ (A & B)) -> A ^ B
+ // ((A & B) ^ B) | ((A & B) ^ A) -> A ^ B
+ // (B ^ (A & B)) | (A ^ (A & B)) -> A ^ B
+ const auto TryXorOpt = [&](Value *Lhs, Value *Rhs) -> Instruction * {
+ if (match(Lhs, m_c_Xor(m_And(m_Value(A), m_Value(B)), m_Deferred(A))) &&
+ match(Rhs,
+ m_c_Xor(m_And(m_Specific(A), m_Specific(B)), m_Deferred(B)))) {
+ return BinaryOperator::CreateXor(A, B);
+ }
+ return nullptr;
+ };
+
+ if (Instruction *Result = TryXorOpt(Op0, Op1))
+ return Result;
+ if (Instruction *Result = TryXorOpt(Op1, Op0))
+ return Result;
+ }
+
if (Instruction *V =
canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))
return V;
@@ -3658,6 +3756,35 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
}
}
+ /// Res, Overflow = xxx_with_overflow X, C1
+ /// Try to canonicalize the pattern "Overflow | icmp pred Res, C2" into
+ /// "Overflow | icmp pred X, C2 +/- C1".
+ const WithOverflowInst *WO;
+ const Value *WOV;
+ const APInt *C1, *C2;
+ if (match(&I, m_c_Or(m_CombineAnd(m_ExtractValue<1>(m_CombineAnd(
+ m_WithOverflowInst(WO), m_Value(WOV))),
+ m_Value(Ov)),
+ m_OneUse(m_ICmp(Pred, m_ExtractValue<0>(m_Deferred(WOV)),
+ m_APInt(C2))))) &&
+ (WO->getBinaryOp() == Instruction::Add ||
+ WO->getBinaryOp() == Instruction::Sub) &&
+ (ICmpInst::isEquality(Pred) ||
+ WO->isSigned() == ICmpInst::isSigned(Pred)) &&
+ match(WO->getRHS(), m_APInt(C1))) {
+ bool Overflow;
+ APInt NewC = WO->getBinaryOp() == Instruction::Add
+ ? (ICmpInst::isSigned(Pred) ? C2->ssub_ov(*C1, Overflow)
+ : C2->usub_ov(*C1, Overflow))
+ : (ICmpInst::isSigned(Pred) ? C2->sadd_ov(*C1, Overflow)
+ : C2->uadd_ov(*C1, Overflow));
+ if (!Overflow || ICmpInst::isEquality(Pred)) {
+ Value *NewCmp = Builder.CreateICmp(
+ Pred, WO->getLHS(), ConstantInt::get(WO->getLHS()->getType(), NewC));
+ return BinaryOperator::CreateOr(Ov, NewCmp);
+ }
+ }
+
// (~x) | y --> ~(x & (~y)) iff that gets rid of inversions
if (sinkNotIntoOtherHandOfLogicalOp(I))
return &I;
@@ -3720,6 +3847,31 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
return Res;
+ // If we are setting the sign bit of a floating-point value, convert
+ // this to fneg(fabs), then cast back to integer.
+ //
+ // If the result isn't immediately cast back to a float, this will increase
+ // the number of instructions. This is still probably a better canonical form
+ // as it enables FP value tracking.
+ //
+ // Assumes any IEEE-represented type has the sign bit in the high bit.
+ //
+ // This is generous interpretation of noimplicitfloat, this is not a true
+ // floating-point operation.
+ Value *CastOp;
+ if (match(Op0, m_BitCast(m_Value(CastOp))) && match(Op1, m_SignMask()) &&
+ !Builder.GetInsertBlock()->getParent()->hasFnAttribute(
+ Attribute::NoImplicitFloat)) {
+ Type *EltTy = CastOp->getType()->getScalarType();
+ if (EltTy->isFloatingPointTy() && EltTy->isIEEE() &&
+ EltTy->getPrimitiveSizeInBits() ==
+ I.getType()->getScalarType()->getPrimitiveSizeInBits()) {
+ Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp);
+ Value *FNegFAbs = Builder.CreateFNeg(FAbs);
+ return new BitCastInst(FNegFAbs, I.getType());
+ }
+ }
+
return nullptr;
}
@@ -3931,26 +4083,6 @@ static Instruction *visitMaskedMerge(BinaryOperator &I,
return nullptr;
}
-// Transform
-// ~(x ^ y)
-// into:
-// (~x) ^ y
-// or into
-// x ^ (~y)
-static Instruction *sinkNotIntoXor(BinaryOperator &I, Value *X, Value *Y,
- InstCombiner::BuilderTy &Builder) {
- // We only want to do the transform if it is free to do.
- if (InstCombiner::isFreeToInvert(X, X->hasOneUse())) {
- // Ok, good.
- } else if (InstCombiner::isFreeToInvert(Y, Y->hasOneUse())) {
- std::swap(X, Y);
- } else
- return nullptr;
-
- Value *NotX = Builder.CreateNot(X, X->getName() + ".not");
- return BinaryOperator::CreateXor(NotX, Y, I.getName() + ".demorgan");
-}
-
static Instruction *foldNotXor(BinaryOperator &I,
InstCombiner::BuilderTy &Builder) {
Value *X, *Y;
@@ -3959,9 +4091,6 @@ static Instruction *foldNotXor(BinaryOperator &I,
if (!match(&I, m_Not(m_OneUse(m_Xor(m_Value(X), m_Value(Y))))))
return nullptr;
- if (Instruction *NewXor = sinkNotIntoXor(I, X, Y, Builder))
- return NewXor;
-
auto hasCommonOperand = [](Value *A, Value *B, Value *C, Value *D) {
return A == C || A == D || B == C || B == D;
};
@@ -4023,13 +4152,13 @@ static bool canFreelyInvert(InstCombiner &IC, Value *Op,
Instruction *IgnoredUser) {
auto *I = dyn_cast<Instruction>(Op);
return I && IC.isFreeToInvert(I, /*WillInvertAllUses=*/true) &&
- InstCombiner::canFreelyInvertAllUsersOf(I, IgnoredUser);
+ IC.canFreelyInvertAllUsersOf(I, IgnoredUser);
}
static Value *freelyInvert(InstCombinerImpl &IC, Value *Op,
Instruction *IgnoredUser) {
auto *I = cast<Instruction>(Op);
- IC.Builder.SetInsertPoint(&*I->getInsertionPointAfterDef());
+ IC.Builder.SetInsertPoint(*I->getInsertionPointAfterDef());
Value *NotOp = IC.Builder.CreateNot(Op, Op->getName() + ".not");
Op->replaceUsesWithIf(NotOp,
[NotOp](Use &U) { return U.getUser() != NotOp; });
@@ -4067,7 +4196,7 @@ bool InstCombinerImpl::sinkNotIntoLogicalOp(Instruction &I) {
Op0 = freelyInvert(*this, Op0, &I);
Op1 = freelyInvert(*this, Op1, &I);
- Builder.SetInsertPoint(I.getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*I.getInsertionPointAfterDef());
Value *NewLogicOp;
if (IsBinaryOp)
NewLogicOp = Builder.CreateBinOp(NewOpc, Op0, Op1, I.getName() + ".not");
@@ -4115,7 +4244,7 @@ bool InstCombinerImpl::sinkNotIntoOtherHandOfLogicalOp(Instruction &I) {
*OpToInvert = freelyInvert(*this, *OpToInvert, &I);
- Builder.SetInsertPoint(&*I.getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*I.getInsertionPointAfterDef());
Value *NewBinOp;
if (IsBinaryOp)
NewBinOp = Builder.CreateBinOp(NewOpc, Op0, Op1, I.getName() + ".not");
@@ -4180,6 +4309,12 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
return BinaryOperator::CreateAShr(X, Y);
+ // Treat lshr with non-negative operand as ashr.
+ // ~(~X >>u Y) --> (X >>s Y) iff X is known negative
+ if (match(NotVal, m_LShr(m_Not(m_Value(X)), m_Value(Y))) &&
+ isKnownNegative(X, SQ.getWithInstruction(NotVal)))
+ return BinaryOperator::CreateAShr(X, Y);
+
// Bit-hack form of a signbit test for iN type:
// ~(X >>s (N - 1)) --> sext i1 (X > -1) to iN
unsigned FullShift = Ty->getScalarSizeInBits() - 1;
@@ -4259,15 +4394,6 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
// ~max(~X, Y) --> min(X, ~Y)
auto *II = dyn_cast<IntrinsicInst>(NotOp);
if (II && II->hasOneUse()) {
- if (match(NotOp, m_MaxOrMin(m_Value(X), m_Value(Y))) &&
- isFreeToInvert(X, X->hasOneUse()) &&
- isFreeToInvert(Y, Y->hasOneUse())) {
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
- Value *NotX = Builder.CreateNot(X);
- Value *NotY = Builder.CreateNot(Y);
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, NotX, NotY);
- return replaceInstUsesWith(I, InvMaxMin);
- }
if (match(NotOp, m_c_MaxOrMin(m_Not(m_Value(X)), m_Value(Y)))) {
Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
Value *NotY = Builder.CreateNot(Y);
@@ -4317,6 +4443,11 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
if (Instruction *NewXor = foldNotXor(I, Builder))
return NewXor;
+ // TODO: Could handle multi-use better by checking if all uses of NotOp (other
+ // than I) can be inverted.
+ if (Value *R = getFreelyInverted(NotOp, NotOp->hasOneUse(), &Builder))
+ return replaceInstUsesWith(I, R);
+
return nullptr;
}
@@ -4366,7 +4497,7 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
Value *M;
if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),
m_c_And(m_Deferred(M), m_Value()))))
- return BinaryOperator::CreateOr(Op0, Op1);
+ return BinaryOperator::CreateDisjointOr(Op0, Op1);
if (Instruction *Xor = visitMaskedMerge(I, Builder))
return Xor;
@@ -4466,6 +4597,27 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
// a 'not' op and moving it before the shift. Doing that requires
// preventing the inverse fold in canShiftBinOpWithConstantRHS().
}
+
+ // If we are XORing the sign bit of a floating-point value, convert
+ // this to fneg, then cast back to integer.
+ //
+ // This is generous interpretation of noimplicitfloat, this is not a true
+ // floating-point operation.
+ //
+ // Assumes any IEEE-represented type has the sign bit in the high bit.
+ // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt
+ Value *CastOp;
+ if (match(Op0, m_BitCast(m_Value(CastOp))) && match(Op1, m_SignMask()) &&
+ !Builder.GetInsertBlock()->getParent()->hasFnAttribute(
+ Attribute::NoImplicitFloat)) {
+ Type *EltTy = CastOp->getType()->getScalarType();
+ if (EltTy->isFloatingPointTy() && EltTy->isIEEE() &&
+ EltTy->getPrimitiveSizeInBits() ==
+ I.getType()->getScalarType()->getPrimitiveSizeInBits()) {
+ Value *FNeg = Builder.CreateFNeg(CastOp);
+ return new BitCastInst(FNeg, I.getType());
+ }
+ }
}
// FIXME: This should not be limited to scalar (pull into APInt match above).
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d3ec6a7aa667..1539fa9a3269 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -89,12 +89,6 @@ static cl::opt<unsigned> GuardWideningWindow(
cl::desc("How wide an instruction window to bypass looking for "
"another guard"));
-namespace llvm {
-/// enable preservation of attributes in assume like:
-/// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
-extern cl::opt<bool> EnableKnowledgeRetention;
-} // namespace llvm
-
/// Return the specified type promoted as it would be to pass though a va_arg
/// area.
static Type *getPromotedType(Type *Ty) {
@@ -174,14 +168,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
return nullptr;
// Use an integer load+store unless we can find something better.
- unsigned SrcAddrSp =
- cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
- unsigned DstAddrSp =
- cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
-
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
- Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
- Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
// If the memcpy has metadata describing the members, see if we can get the
// TBAA tag describing our copy.
@@ -200,8 +187,8 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
CopyMD = cast<MDNode>(M->getOperand(2));
}
- Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
- Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
+ Value *Src = MI->getArgOperand(1);
+ Value *Dest = MI->getArgOperand(0);
LoadInst *L = Builder.CreateLoad(IntType, Src);
// Alignment from the mem intrinsic will be better, so use it.
L->setAlignment(*CopySrcAlign);
@@ -291,9 +278,6 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
Value *Dest = MI->getDest();
- unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
- Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
- Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
// Extract the fill value and store.
const uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
@@ -301,7 +285,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
for (auto *DAI : at::getAssignmentMarkers(S)) {
- if (any_of(DAI->location_ops(), [&](Value *V) { return V == FillC; }))
+ if (llvm::is_contained(DAI->location_ops(), FillC))
DAI->replaceVariableLocationOp(FillC, FillVal);
}
@@ -500,8 +484,6 @@ static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
if (Result->getType()->getPointerAddressSpace() !=
II.getType()->getPointerAddressSpace())
Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
- if (Result->getType() != II.getType())
- Result = IC.Builder.CreateBitCast(Result, II.getType());
return cast<Instruction>(Result);
}
@@ -532,6 +514,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
}
+ Constant *C;
+
if (IsTZ) {
// cttz(-x) -> cttz(x)
if (match(Op0, m_Neg(m_Value(X))))
@@ -567,6 +551,38 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
return IC.replaceOperand(II, 0, X);
+
+ // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
+ if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
+ match(Op1, m_One())) {
+ Value *ConstCttz =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
+ return BinaryOperator::CreateAdd(ConstCttz, X);
+ }
+
+ // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
+ if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
+ match(Op1, m_One())) {
+ Value *ConstCttz =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
+ return BinaryOperator::CreateSub(ConstCttz, X);
+ }
+ } else {
+ // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
+ if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
+ match(Op1, m_One())) {
+ Value *ConstCtlz =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
+ return BinaryOperator::CreateAdd(ConstCtlz, X);
+ }
+
+ // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
+ if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
+ match(Op1, m_One())) {
+ Value *ConstCtlz =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
+ return BinaryOperator::CreateSub(ConstCtlz, X);
+ }
}
KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
@@ -911,11 +927,27 @@ Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
Value *FAbsSrc;
if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
- II.setArgOperand(1, ConstantInt::get(Src1->getType(), fabs(Mask)));
+ II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
return replaceOperand(II, 0, FAbsSrc);
}
- // TODO: is.fpclass(x, fcInf) -> fabs(x) == inf
+ if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
+ (IsOrdered || IsUnordered) && !IsStrict) {
+ // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
+ // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
+ // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
+ // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
+ Constant *Inf = ConstantFP::getInfinity(Src0->getType());
+ FCmpInst::Predicate Pred =
+ IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
+ if (OrderedInvertedMask == fcInf)
+ Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
+
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Src0);
+ Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
+ CmpInf->takeName(&II);
+ return replaceInstUsesWith(II, CmpInf);
+ }
if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
(IsOrdered || IsUnordered) && !IsStrict) {
@@ -992,8 +1024,7 @@ Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
return replaceInstUsesWith(II, FCmp);
}
- KnownFPClass Known = computeKnownFPClass(
- Src0, DL, Mask, 0, &getTargetLibraryInfo(), &AC, &II, &DT);
+ KnownFPClass Known = computeKnownFPClass(Src0, Mask, &II);
// Clear test bits we know must be false from the source value.
// fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
@@ -1030,6 +1061,20 @@ static std::optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
}
+static std::optional<bool> getKnownSignOrZero(Value *Op, Instruction *CxtI,
+ const DataLayout &DL,
+ AssumptionCache *AC,
+ DominatorTree *DT) {
+ if (std::optional<bool> Sign = getKnownSign(Op, CxtI, DL, AC, DT))
+ return Sign;
+
+ Value *X, *Y;
+ if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
+ return isImpliedByDomCondition(ICmpInst::ICMP_SLE, X, Y, CxtI, DL);
+
+ return std::nullopt;
+}
+
/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, Instruction *CxtI,
const DataLayout &DL, AssumptionCache *AC,
@@ -1491,6 +1536,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
if (II->isCommutative()) {
+ if (Instruction *I = foldCommutativeIntrinsicOverSelects(*II))
+ return I;
+
if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
return NewCall;
}
@@ -1530,12 +1578,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))
return replaceOperand(*II, 0, X);
- if (std::optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) {
- // abs(x) -> x if x >= 0
- if (!*Sign)
+ if (std::optional<bool> Known =
+ getKnownSignOrZero(IIOperand, II, DL, &AC, &DT)) {
+ // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
+ // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
+ if (!*Known)
return replaceInstUsesWith(*II, IIOperand);
// abs(x) -> -x if x < 0
+ // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
if (IntMinIsPoison)
return BinaryOperator::CreateNSWNeg(IIOperand);
return BinaryOperator::CreateNeg(IIOperand);
@@ -1580,8 +1631,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Constant *C;
if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
I0->hasOneUse()) {
- Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
- if (ConstantExpr::getZExt(NarrowC, II->getType()) == C) {
+ if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType())) {
Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
}
@@ -1603,13 +1653,26 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Constant *C;
if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
I0->hasOneUse()) {
- Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
- if (ConstantExpr::getSExt(NarrowC, II->getType()) == C) {
+ if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType())) {
Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
}
}
+ // umin(i1 X, i1 Y) -> and i1 X, Y
+ // smax(i1 X, i1 Y) -> and i1 X, Y
+ if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
+ II->getType()->isIntOrIntVectorTy(1)) {
+ return BinaryOperator::CreateAnd(I0, I1);
+ }
+
+ // umax(i1 X, i1 Y) -> or i1 X, Y
+ // smin(i1 X, i1 Y) -> or i1 X, Y
+ if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
+ II->getType()->isIntOrIntVectorTy(1)) {
+ return BinaryOperator::CreateOr(I0, I1);
+ }
+
if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
// smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
// smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
@@ -1672,12 +1735,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
Value *A;
if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
- !isFreeToInvert(A, A->hasOneUse()) &&
- isFreeToInvert(Y, Y->hasOneUse())) {
- Value *NotY = Builder.CreateNot(Y);
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
- return BinaryOperator::CreateNot(InvMaxMin);
+ !isFreeToInvert(A, A->hasOneUse())) {
+ if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
+ return BinaryOperator::CreateNot(InvMaxMin);
+ }
}
return nullptr;
};
@@ -1929,6 +1992,52 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return &CI;
break;
}
+ case Intrinsic::ptrmask: {
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
+ KnownBits Known(BitWidth);
+ if (SimplifyDemandedInstructionBits(*II, Known))
+ return II;
+
+ Value *InnerPtr, *InnerMask;
+ bool Changed = false;
+ // Combine:
+ // (ptrmask (ptrmask p, A), B)
+ // -> (ptrmask p, (and A, B))
+ if (match(II->getArgOperand(0),
+ m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),
+ m_Value(InnerMask))))) {
+ assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
+ "Mask types must match");
+ // TODO: If InnerMask == Op1, we could copy attributes from inner
+ // callsite -> outer callsite.
+ Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
+ replaceOperand(CI, 0, InnerPtr);
+ replaceOperand(CI, 1, NewMask);
+ Changed = true;
+ }
+
+ // See if we can deduce non-null.
+ if (!CI.hasRetAttr(Attribute::NonNull) &&
+ (Known.isNonZero() ||
+ isKnownNonZero(II, DL, /*Depth*/ 0, &AC, II, &DT))) {
+ CI.addRetAttr(Attribute::NonNull);
+ Changed = true;
+ }
+
+ unsigned NewAlignmentLog =
+ std::min(Value::MaxAlignmentExponent,
+ std::min(BitWidth - 1, Known.countMinTrailingZeros()));
+ // Known bits will capture if we had alignment information associated with
+ // the pointer argument.
+ if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
+ CI.addRetAttr(Attribute::getWithAlignment(
+ CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
+ Changed = true;
+ }
+ if (Changed)
+ return &CI;
+ break;
+ }
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow: {
if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
@@ -2493,10 +2602,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
VectorType *NewVT = cast<VectorType>(II->getType());
if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
- CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
- CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
-
- return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
+ Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
+ Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
+ return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
}
// Couldn't simplify - canonicalize constant to the RHS.
@@ -2950,24 +3058,27 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return replaceOperand(CI, 0, InsertTuple);
}
- auto *DstTy = dyn_cast<FixedVectorType>(ReturnType);
- auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ auto *DstTy = dyn_cast<VectorType>(ReturnType);
+ auto *VecTy = dyn_cast<VectorType>(Vec->getType());
- // Only canonicalize if the the destination vector and Vec are fixed
- // vectors.
if (DstTy && VecTy) {
- unsigned DstNumElts = DstTy->getNumElements();
- unsigned VecNumElts = VecTy->getNumElements();
+ auto DstEltCnt = DstTy->getElementCount();
+ auto VecEltCnt = VecTy->getElementCount();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
// Extracting the entirety of Vec is a nop.
- if (VecNumElts == DstNumElts) {
+ if (DstEltCnt == VecTy->getElementCount()) {
replaceInstUsesWith(CI, Vec);
return eraseInstFromFunction(CI);
}
+ // Only canonicalize to shufflevector if the destination vector and
+ // Vec are fixed vectors.
+ if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
+ break;
+
SmallVector<int, 8> Mask;
- for (unsigned i = 0; i != DstNumElts; ++i)
+ for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
Mask.push_back(IdxN + i);
Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
@@ -3943,9 +4054,9 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
NC->setDebugLoc(Caller->getDebugLoc());
- Instruction *InsertPt = NewCall->getInsertionPointAfterDef();
- assert(InsertPt && "No place to insert cast");
- InsertNewInstBefore(NC, *InsertPt);
+ auto OptInsertPt = NewCall->getInsertionPointAfterDef();
+ assert(OptInsertPt && "No place to insert cast");
+ InsertNewInstBefore(NC, *OptInsertPt);
Worklist.pushUsersToWorkList(*Caller);
} else {
NV = PoisonValue::get(Caller->getType());
@@ -3972,8 +4083,6 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
Instruction *
InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
IntrinsicInst &Tramp) {
- Value *Callee = Call.getCalledOperand();
- Type *CalleeTy = Callee->getType();
FunctionType *FTy = Call.getFunctionType();
AttributeList Attrs = Call.getAttributes();
@@ -4070,12 +4179,8 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
// Replace the trampoline call with a direct call. Let the generic
// code sort out any function type mismatches.
- FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
- FTy->isVarArg());
- Constant *NewCallee =
- NestF->getType() == PointerType::getUnqual(NewFTy) ?
- NestF : ConstantExpr::getBitCast(NestF,
- PointerType::getUnqual(NewFTy));
+ FunctionType *NewFTy =
+ FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
AttributeList NewPAL =
AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
Attrs.getRetAttrs(), NewArgAttrs);
@@ -4085,19 +4190,18 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
Instruction *NewCaller;
if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
- NewCaller = InvokeInst::Create(NewFTy, NewCallee,
- II->getNormalDest(), II->getUnwindDest(),
- NewArgs, OpBundles);
+ NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
+ II->getUnwindDest(), NewArgs, OpBundles);
cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
NewCaller =
- CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),
+ CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
CBI->getIndirectDests(), NewArgs, OpBundles);
cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
} else {
- NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);
+ NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
cast<CallInst>(NewCaller)->setTailCallKind(
cast<CallInst>(Call).getTailCallKind());
cast<CallInst>(NewCaller)->setCallingConv(
@@ -4113,7 +4217,23 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
// Replace the trampoline call with a direct call. Since there is no 'nest'
// parameter, there is no need to adjust the argument list. Let the generic
// code sort out any function type mismatches.
- Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);
- Call.setCalledFunction(FTy, NewCallee);
+ Call.setCalledFunction(FTy, NestF);
return &Call;
}
+
+// op(select(%v, %x, %y), select(%v, %y, %x)) --> op(%x, %y)
+Instruction *
+InstCombinerImpl::foldCommutativeIntrinsicOverSelects(IntrinsicInst &II) {
+ assert(II.isCommutative());
+
+ Value *A, *B, *C;
+ if (match(II.getOperand(0), m_Select(m_Value(A), m_Value(B), m_Value(C))) &&
+ match(II.getOperand(1),
+ m_Select(m_Specific(A), m_Specific(C), m_Specific(B)))) {
+ replaceOperand(II, 0, B);
+ replaceOperand(II, 1, C);
+ return &II;
+ }
+
+ return nullptr;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 5c84f666616d..6629ca840a67 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -29,11 +29,8 @@ using namespace PatternMatch;
/// true for, actually insert the code to evaluate the expression.
Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
bool isSigned) {
- if (Constant *C = dyn_cast<Constant>(V)) {
- C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
- // If we got a constantexpr back, try to simplify it with DL info.
- return ConstantFoldConstant(C, DL, &TLI);
- }
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantFoldIntegerCast(C, Ty, isSigned, DL);
// Otherwise, it must be an instruction.
Instruction *I = cast<Instruction>(V);
@@ -112,7 +109,7 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
}
Res->takeName(I);
- return InsertNewInstWith(Res, *I);
+ return InsertNewInstWith(Res, I->getIterator());
}
Instruction::CastOps
@@ -217,7 +214,8 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
/// free to be evaluated in that type. This is a helper for canEvaluate*.
static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
if (isa<Constant>(V))
- return true;
+ return match(V, m_ImmConstant());
+
Value *X;
if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
X->getType() == Ty)
@@ -229,7 +227,6 @@ static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
/// Filter out values that we can not evaluate in the destination type for free.
/// This is a helper for canEvaluate*.
static bool canNotEvaluateInType(Value *V, Type *Ty) {
- assert(!isa<Constant>(V) && "Constant should already be handled.");
if (!isa<Instruction>(V))
return true;
// We don't extend or shrink something that has multiple uses -- doing so
@@ -505,11 +502,13 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
return nullptr;
- // We have an unnecessarily wide rotate!
- // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
- // Narrow the inputs and convert to funnel shift intrinsic:
- // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
- Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
+ // Adjust the width of ShAmt for narrowed funnel shift operation:
+ // - Zero-extend if ShAmt is narrower than the destination type.
+ // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
+ // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
+ // zext/trunc(ShAmt)).
+ Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
+
Value *X, *Y;
X = Y = Builder.CreateTrunc(ShVal0, DestTy);
if (ShVal0 != ShVal1)
@@ -582,13 +581,15 @@ Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
APInt(SrcWidth, MaxShiftAmt)))) {
auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
bool IsExact = OldShift->isExact();
- auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
- ShAmt = Constant::mergeUndefsWith(ShAmt, C);
- Value *Shift =
- OldShift->getOpcode() == Instruction::AShr
- ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
- : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
- return CastInst::CreateTruncOrBitCast(Shift, DestTy);
+ if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
+ /*IsSigned*/ true, DL)) {
+ ShAmt = Constant::mergeUndefsWith(ShAmt, C);
+ Value *Shift =
+ OldShift->getOpcode() == Instruction::AShr
+ ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
+ : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
+ return CastInst::CreateTruncOrBitCast(Shift, DestTy);
+ }
}
}
break;
@@ -904,19 +905,18 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
// zext (X != 0) to i32 --> X iff X has only the low bit set.
// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
- if (Op1CV->isZero() && Cmp->isEquality() &&
- (Cmp->getOperand(0)->getType() == Zext.getType() ||
- Cmp->getPredicate() == ICmpInst::ICMP_NE)) {
- // If Op1C some other power of two, convert:
- KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
+ if (Op1CV->isZero() && Cmp->isEquality()) {
// Exactly 1 possible 1? But not the high-bit because that is
// canonicalized to this form.
+ KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
APInt KnownZeroMask(~Known.Zero);
- if (KnownZeroMask.isPowerOf2() &&
- (Zext.getType()->getScalarSizeInBits() !=
- KnownZeroMask.logBase2() + 1)) {
- uint32_t ShAmt = KnownZeroMask.logBase2();
+ uint32_t ShAmt = KnownZeroMask.logBase2();
+ bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
+ (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
+ if (IsExpectShAmt &&
+ (Cmp->getOperand(0)->getType() == Zext.getType() ||
+ Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
Value *In = Cmp->getOperand(0);
if (ShAmt) {
// Perform a logical shr by shiftamt.
@@ -1184,14 +1184,14 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
Value *X;
if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
X->getType() == DestTy)
- return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, DestTy));
+ return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
// zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
Value *And;
if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
X->getType() == DestTy) {
- Constant *ZC = ConstantExpr::getZExt(C, DestTy);
+ Value *ZC = Builder.CreateZExt(C, DestTy);
return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
}
@@ -1202,7 +1202,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
// zext (and (trunc X), C) --> and X, (zext C)
if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
X->getType() == DestTy) {
- Constant *ZextC = ConstantExpr::getZExt(C, DestTy);
+ Value *ZextC = Builder.CreateZExt(C, DestTy);
return BinaryOperator::CreateAnd(X, ZextC);
}
@@ -1221,6 +1221,22 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
}
}
+ if (!Zext.hasNonNeg()) {
+ // If this zero extend is only used by a shift, add nneg flag.
+ if (Zext.hasOneUse() &&
+ SrcTy->getScalarSizeInBits() >
+ Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
+ match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
+ Zext.setNonNeg();
+ return &Zext;
+ }
+
+ if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
+ Zext.setNonNeg();
+ return &Zext;
+ }
+ }
+
return nullptr;
}
@@ -1373,8 +1389,11 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
unsigned DestBitSize = DestTy->getScalarSizeInBits();
// If the value being extended is zero or positive, use a zext instead.
- if (isKnownNonNegative(Src, DL, 0, &AC, &Sext, &DT))
- return CastInst::Create(Instruction::ZExt, Src, DestTy);
+ if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
+ auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
+ CI->setNonNeg(true);
+ return CI;
+ }
// Try to extend the entire expression tree to the wide destination type.
if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
@@ -1445,9 +1464,11 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
// TODO: Eventually this could be subsumed by EvaluateInDifferentType.
Constant *BA = nullptr, *CA = nullptr;
if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
- m_Constant(CA))) &&
+ m_ImmConstant(CA))) &&
BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
- Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
+ Constant *WideCurrShAmt =
+ ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
+ assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
Constant *NumLowbitsLeft = ConstantExpr::getSub(
ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
Constant *NewShAmt = ConstantExpr::getSub(
@@ -1915,29 +1936,6 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
return nullptr;
}
-/// Implement the transforms for cast of pointer (bitcast/ptrtoint)
-Instruction *InstCombinerImpl::commonPointerCastTransforms(CastInst &CI) {
- Value *Src = CI.getOperand(0);
-
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
- // If casting the result of a getelementptr instruction with no offset, turn
- // this into a cast of the original pointer!
- if (GEP->hasAllZeroIndices() &&
- // If CI is an addrspacecast and GEP changes the poiner type, merging
- // GEP into CI would undo canonicalizing addrspacecast with different
- // pointer types, causing infinite loops.
- (!isa<AddrSpaceCastInst>(CI) ||
- GEP->getType() == GEP->getPointerOperandType())) {
- // Changing the cast operand is usually not a good idea but it is safe
- // here because the pointer operand is being replaced with another
- // pointer operand so the opcode doesn't need to change.
- return replaceOperand(CI, 0, GEP->getOperand(0));
- }
- }
-
- return commonCastTransforms(CI);
-}
-
Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
@@ -1955,6 +1953,15 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
+ // (ptrtoint (ptrmask P, M))
+ // -> (and (ptrtoint P), M)
+ // This is generally beneficial as `and` is better supported than `ptrmask`.
+ Value *Ptr, *Mask;
+ if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
+ m_Value(Mask)))) &&
+ Mask->getType() == Ty)
+ return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
+
if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
// Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
// While this can increase the number of instructions it doesn't actually
@@ -1979,7 +1986,7 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
return InsertElementInst::Create(Vec, NewCast, Index);
}
- return commonPointerCastTransforms(CI);
+ return commonCastTransforms(CI);
}
/// This input value (which is known to have vector type) is being zero extended
@@ -2136,9 +2143,12 @@ static bool collectInsertionElements(Value *V, unsigned Shift,
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
for (unsigned i = 0; i != NumElts; ++i) {
- unsigned ShiftI = Shift+i*ElementSize;
- Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
- ShiftI));
+ unsigned ShiftI = Shift + i * ElementSize;
+ Constant *Piece = ConstantFoldBinaryInstruction(
+ Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
+ if (!Piece)
+ return false;
+
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
isBigEndian))
@@ -2701,11 +2711,9 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
if (Instruction *I = foldBitCastSelect(CI, Builder))
return I;
- if (SrcTy->isPointerTy())
- return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
}
Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
- return commonPointerCastTransforms(CI);
+ return commonCastTransforms(CI);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 656f04370e17..289976718e52 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -12,20 +12,22 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include <bitset>
using namespace llvm;
using namespace PatternMatch;
@@ -412,7 +414,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
/// Returns true if we can rewrite Start as a GEP with pointer Base
/// and some integer offset. The nodes that need to be re-written
/// for this transformation will be added to Explored.
-static bool canRewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
+static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
const DataLayout &DL,
SetVector<Value *> &Explored) {
SmallVector<Value *, 16> WorkList(1, Start);
@@ -440,27 +442,15 @@ static bool canRewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
continue;
}
- if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
- !isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
+ if (!isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
// We've found some value that we can't explore which is different from
// the base. Therefore we can't do this transformation.
return false;
- if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
- auto *CI = cast<CastInst>(V);
- if (!CI->isNoopCast(DL))
- return false;
-
- if (!Explored.contains(CI->getOperand(0)))
- WorkList.push_back(CI->getOperand(0));
- }
-
if (auto *GEP = dyn_cast<GEPOperator>(V)) {
- // We're limiting the GEP to having one index. This will preserve
- // the original pointer type. We could handle more cases in the
- // future.
- if (GEP->getNumIndices() != 1 || !GEP->isInBounds() ||
- GEP->getSourceElementType() != ElemTy)
+ // Only allow inbounds GEPs with at most one variable offset.
+ auto IsNonConst = [](Value *V) { return !isa<ConstantInt>(V); };
+ if (!GEP->isInBounds() || count_if(GEP->indices(), IsNonConst) > 1)
return false;
if (!Explored.contains(GEP->getOperand(0)))
@@ -514,7 +504,8 @@ static bool canRewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
bool Before = true) {
if (auto *PHI = dyn_cast<PHINode>(V)) {
- Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
+ BasicBlock *Parent = PHI->getParent();
+ Builder.SetInsertPoint(Parent, Parent->getFirstInsertionPt());
return;
}
if (auto *I = dyn_cast<Instruction>(V)) {
@@ -526,7 +517,7 @@ static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
if (auto *A = dyn_cast<Argument>(V)) {
// Set the insertion point in the entry block.
BasicBlock &Entry = A->getParent()->getEntryBlock();
- Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
+ Builder.SetInsertPoint(&Entry, Entry.getFirstInsertionPt());
return;
}
// Otherwise, this is a constant and we don't need to set a new
@@ -536,7 +527,7 @@ static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
/// Returns a re-written value of Start as an indexed GEP using Base as a
/// pointer.
-static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
+static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
const DataLayout &DL,
SetVector<Value *> &Explored,
InstCombiner &IC) {
@@ -567,36 +558,18 @@ static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
// Create all the other instructions.
for (Value *Val : Explored) {
-
if (NewInsts.contains(Val))
continue;
- if (auto *CI = dyn_cast<CastInst>(Val)) {
- // Don't get rid of the intermediate variable here; the store can grow
- // the map which will invalidate the reference to the input value.
- Value *V = NewInsts[CI->getOperand(0)];
- NewInsts[CI] = V;
- continue;
- }
if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
- Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)]
- : GEP->getOperand(1);
setInsertionPoint(Builder, GEP);
- // Indices might need to be sign extended. GEPs will magically do
- // this, but we need to do it ourselves here.
- if (Index->getType()->getScalarSizeInBits() !=
- NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
- Index = Builder.CreateSExtOrTrunc(
- Index, NewInsts[GEP->getOperand(0)]->getType(),
- GEP->getOperand(0)->getName() + ".sext");
- }
-
- auto *Op = NewInsts[GEP->getOperand(0)];
+ Value *Op = NewInsts[GEP->getOperand(0)];
+ Value *OffsetV = emitGEPOffset(&Builder, DL, GEP);
if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
- NewInsts[GEP] = Index;
+ NewInsts[GEP] = OffsetV;
else
NewInsts[GEP] = Builder.CreateNSWAdd(
- Op, Index, GEP->getOperand(0)->getName() + ".add");
+ Op, OffsetV, GEP->getOperand(0)->getName() + ".add");
continue;
}
if (isa<PHINode>(Val))
@@ -624,23 +597,14 @@ static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
}
}
- PointerType *PtrTy =
- ElemTy->getPointerTo(Start->getType()->getPointerAddressSpace());
for (Value *Val : Explored) {
if (Val == Base)
continue;
- // Depending on the type, for external users we have to emit
- // a GEP or a GEP + ptrtoint.
setInsertionPoint(Builder, Val, false);
-
- // Cast base to the expected type.
- Value *NewVal = Builder.CreateBitOrPointerCast(
- Base, PtrTy, Start->getName() + "to.ptr");
- NewVal = Builder.CreateInBoundsGEP(ElemTy, NewVal, ArrayRef(NewInsts[Val]),
- Val->getName() + ".ptr");
- NewVal = Builder.CreateBitOrPointerCast(
- NewVal, Val->getType(), Val->getName() + ".conv");
+ // Create GEP for external users.
+ Value *NewVal = Builder.CreateInBoundsGEP(
+ Builder.getInt8Ty(), Base, NewInsts[Val], Val->getName() + ".ptr");
IC.replaceInstUsesWith(*cast<Instruction>(Val), NewVal);
// Add old instruction to worklist for DCE. We don't directly remove it
// here because the original compare is one of the users.
@@ -650,48 +614,6 @@ static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
return NewInsts[Start];
}
-/// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
-/// the input Value as a constant indexed GEP. Returns a pair containing
-/// the GEPs Pointer and Index.
-static std::pair<Value *, Value *>
-getAsConstantIndexedAddress(Type *ElemTy, Value *V, const DataLayout &DL) {
- Type *IndexType = IntegerType::get(V->getContext(),
- DL.getIndexTypeSizeInBits(V->getType()));
-
- Constant *Index = ConstantInt::getNullValue(IndexType);
- while (true) {
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- // We accept only inbouds GEPs here to exclude the possibility of
- // overflow.
- if (!GEP->isInBounds())
- break;
- if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 &&
- GEP->getSourceElementType() == ElemTy) {
- V = GEP->getOperand(0);
- Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
- Index = ConstantExpr::getAdd(
- Index, ConstantExpr::getSExtOrTrunc(GEPIndex, IndexType));
- continue;
- }
- break;
- }
- if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
- if (!CI->isNoopCast(DL))
- break;
- V = CI->getOperand(0);
- continue;
- }
- if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
- if (!CI->isNoopCast(DL))
- break;
- V = CI->getOperand(0);
- continue;
- }
- break;
- }
- return {V, Index};
-}
-
/// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
/// We can look through PHIs, GEPs and casts in order to determine a common base
/// between GEPLHS and RHS.
@@ -706,14 +628,19 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
if (!GEPLHS->hasAllConstantIndices())
return nullptr;
- Type *ElemTy = GEPLHS->getSourceElementType();
- Value *PtrBase, *Index;
- std::tie(PtrBase, Index) = getAsConstantIndexedAddress(ElemTy, GEPLHS, DL);
+ APInt Offset(DL.getIndexTypeSizeInBits(GEPLHS->getType()), 0);
+ Value *PtrBase =
+ GEPLHS->stripAndAccumulateConstantOffsets(DL, Offset,
+ /*AllowNonInbounds*/ false);
+
+ // Bail if we looked through addrspacecast.
+ if (PtrBase->getType() != GEPLHS->getType())
+ return nullptr;
// The set of nodes that will take part in this transformation.
SetVector<Value *> Nodes;
- if (!canRewriteGEPAsOffset(ElemTy, RHS, PtrBase, DL, Nodes))
+ if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
return nullptr;
// We know we can re-write this as
@@ -722,13 +649,14 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
// can't have overflow on either side. We can therefore re-write
// this as:
// OFFSET1 cmp OFFSET2
- Value *NewRHS = rewriteGEPAsOffset(ElemTy, RHS, PtrBase, DL, Nodes, IC);
+ Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes, IC);
// RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
// GEP having PtrBase as the pointer base, and has returned in NewRHS the
// offset. Since Index is the offset of LHS to the base pointer, we will now
// compare the offsets instead of comparing the pointers.
- return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
+ IC.Builder.getInt(Offset), NewRHS);
}
/// Fold comparisons between a GEP instruction and something else. At this point
@@ -844,17 +772,6 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
}
- // If one of the GEPs has all zero indices, recurse.
- // FIXME: Handle vector of pointers.
- if (!GEPLHS->getType()->isVectorTy() && GEPLHS->hasAllZeroIndices())
- return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
- ICmpInst::getSwappedPredicate(Cond), I);
-
- // If the other GEP has all zero indices, recurse.
- // FIXME: Handle vector of pointers.
- if (!GEPRHS->getType()->isVectorTy() && GEPRHS->hasAllZeroIndices())
- return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
-
bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType()) {
@@ -894,8 +811,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Only lower this if the icmp is the only user of the GEP or if we expect
// the result to fold to a constant!
if ((GEPsInBounds || CmpInst::isEquality(Cond)) &&
- (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
- (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
+ (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
+ (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse())) {
// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
Value *L = EmitGEPOffset(GEPLHS);
Value *R = EmitGEPOffset(GEPRHS);
@@ -1285,9 +1202,9 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
if (Pred == ICmpInst::ICMP_SGT) {
Value *A, *B;
if (match(Cmp.getOperand(0), m_SMin(m_Value(A), m_Value(B)))) {
- if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
+ if (isKnownPositive(A, SQ.getWithInstruction(&Cmp)))
return new ICmpInst(Pred, B, Cmp.getOperand(1));
- if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))
+ if (isKnownPositive(B, SQ.getWithInstruction(&Cmp)))
return new ICmpInst(Pred, A, Cmp.getOperand(1));
}
}
@@ -1406,34 +1323,21 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
/// Canonicalize icmp instructions based on dominating conditions.
Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
- // This is a cheap/incomplete check for dominance - just match a single
- // predecessor with a conditional branch.
- BasicBlock *CmpBB = Cmp.getParent();
- BasicBlock *DomBB = CmpBB->getSinglePredecessor();
- if (!DomBB)
- return nullptr;
-
- Value *DomCond;
- BasicBlock *TrueBB, *FalseBB;
- if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
- return nullptr;
-
- assert((TrueBB == CmpBB || FalseBB == CmpBB) &&
- "Predecessor block does not point to successor?");
-
- // The branch should get simplified. Don't bother simplifying this condition.
- if (TrueBB == FalseBB)
- return nullptr;
-
// We already checked simple implication in InstSimplify, only handle complex
// cases here.
-
- CmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
ICmpInst::Predicate DomPred;
- const APInt *C, *DomC;
- if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))) &&
- match(Y, m_APInt(C))) {
+ const APInt *C;
+ if (!match(Y, m_APInt(C)))
+ return nullptr;
+
+ CmpInst::Predicate Pred = Cmp.getPredicate();
+ ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, *C);
+
+ auto handleDomCond = [&](Value *DomCond, bool CondIsTrue) -> Instruction * {
+ const APInt *DomC;
+ if (!match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))))
+ return nullptr;
// We have 2 compares of a variable with constants. Calculate the constant
// ranges of those compares to see if we can transform the 2nd compare:
// DomBB:
@@ -1441,11 +1345,10 @@ Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
// br DomCond, CmpBB, FalseBB
// CmpBB:
// Cmp = icmp Pred X, C
- ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, *C);
+ if (!CondIsTrue)
+ DomPred = CmpInst::getInversePredicate(DomPred);
ConstantRange DominatingCR =
- (CmpBB == TrueBB) ? ConstantRange::makeExactICmpRegion(DomPred, *DomC)
- : ConstantRange::makeExactICmpRegion(
- CmpInst::getInversePredicate(DomPred), *DomC);
+ ConstantRange::makeExactICmpRegion(DomPred, *DomC);
ConstantRange Intersection = DominatingCR.intersectWith(CR);
ConstantRange Difference = DominatingCR.difference(CR);
if (Intersection.isEmptySet())
@@ -1473,6 +1376,21 @@ Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
if (const APInt *NeC = Difference.getSingleElement())
return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
+ return nullptr;
+ };
+
+ for (BranchInst *BI : DC.conditionsFor(X)) {
+ auto *Cond = BI->getCondition();
+ BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
+ if (DT.dominates(Edge0, Cmp.getParent())) {
+ if (auto *V = handleDomCond(Cond, true))
+ return V;
+ } else {
+ BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
+ if (DT.dominates(Edge1, Cmp.getParent()))
+ if (auto *V = handleDomCond(Cond, false))
+ return V;
+ }
}
return nullptr;
@@ -1554,6 +1472,61 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
return nullptr;
}
+/// Fold icmp (trunc X), (trunc Y).
+/// Fold icmp (trunc X), (zext Y).
+Instruction *
+InstCombinerImpl::foldICmpTruncWithTruncOrExt(ICmpInst &Cmp,
+ const SimplifyQuery &Q) {
+ if (Cmp.isSigned())
+ return nullptr;
+
+ Value *X, *Y;
+ ICmpInst::Predicate Pred;
+ bool YIsZext = false;
+ // Try to match icmp (trunc X), (trunc Y)
+ if (match(&Cmp, m_ICmp(Pred, m_Trunc(m_Value(X)), m_Trunc(m_Value(Y))))) {
+ if (X->getType() != Y->getType() &&
+ (!Cmp.getOperand(0)->hasOneUse() || !Cmp.getOperand(1)->hasOneUse()))
+ return nullptr;
+ if (!isDesirableIntType(X->getType()->getScalarSizeInBits()) &&
+ isDesirableIntType(Y->getType()->getScalarSizeInBits())) {
+ std::swap(X, Y);
+ Pred = Cmp.getSwappedPredicate(Pred);
+ }
+ }
+ // Try to match icmp (trunc X), (zext Y)
+ else if (match(&Cmp, m_c_ICmp(Pred, m_Trunc(m_Value(X)),
+ m_OneUse(m_ZExt(m_Value(Y))))))
+
+ YIsZext = true;
+ else
+ return nullptr;
+
+ Type *TruncTy = Cmp.getOperand(0)->getType();
+ unsigned TruncBits = TruncTy->getScalarSizeInBits();
+
+ // If this transform will end up changing from desirable types -> undesirable
+ // types skip it.
+ if (isDesirableIntType(TruncBits) &&
+ !isDesirableIntType(X->getType()->getScalarSizeInBits()))
+ return nullptr;
+
+ // Check if the trunc is unneeded.
+ KnownBits KnownX = llvm::computeKnownBits(X, /*Depth*/ 0, Q);
+ if (KnownX.countMaxActiveBits() > TruncBits)
+ return nullptr;
+
+ if (!YIsZext) {
+ // If Y is also a trunc, make sure it is unneeded.
+ KnownBits KnownY = llvm::computeKnownBits(Y, /*Depth*/ 0, Q);
+ if (KnownY.countMaxActiveBits() > TruncBits)
+ return nullptr;
+ }
+
+ Value *NewY = Builder.CreateZExtOrTrunc(Y, X->getType());
+ return new ICmpInst(Pred, X, NewY);
+}
+
/// Fold icmp (xor X, Y), C.
Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp,
BinaryOperator *Xor,
@@ -1944,19 +1917,18 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
return nullptr;
}
-/// Fold icmp eq/ne (or (xor (X1, X2), xor(X3, X4))), 0.
-static Value *foldICmpOrXorChain(ICmpInst &Cmp, BinaryOperator *Or,
- InstCombiner::BuilderTy &Builder) {
- // Are we using xors to bitwise check for a pair or pairs of (in)equalities?
- // Convert to a shorter form that has more potential to be folded even
- // further.
- // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
- // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
- // ((X1 ^ X2) || (X3 ^ X4) || (X5 ^ X6)) == 0 -->
+/// Fold icmp eq/ne (or (xor/sub (X1, X2), xor/sub (X3, X4))), 0.
+static Value *foldICmpOrXorSubChain(ICmpInst &Cmp, BinaryOperator *Or,
+ InstCombiner::BuilderTy &Builder) {
+ // Are we using xors or subs to bitwise check for a pair or pairs of
+ // (in)equalities? Convert to a shorter form that has more potential to be
+ // folded even further.
+ // ((X1 ^/- X2) || (X3 ^/- X4)) == 0 --> (X1 == X2) && (X3 == X4)
+ // ((X1 ^/- X2) || (X3 ^/- X4)) != 0 --> (X1 != X2) || (X3 != X4)
+ // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) == 0 -->
// (X1 == X2) && (X3 == X4) && (X5 == X6)
- // ((X1 ^ X2) || (X3 ^ X4) || (X5 ^ X6)) != 0 -->
+ // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) != 0 -->
// (X1 != X2) || (X3 != X4) || (X5 != X6)
- // TODO: Implement for sub
SmallVector<std::pair<Value *, Value *>, 2> CmpValues;
SmallVector<Value *, 16> WorkList(1, Or);
@@ -1967,9 +1939,16 @@ static Value *foldICmpOrXorChain(ICmpInst &Cmp, BinaryOperator *Or,
if (match(OrOperatorArgument,
m_OneUse(m_Xor(m_Value(Lhs), m_Value(Rhs))))) {
CmpValues.emplace_back(Lhs, Rhs);
- } else {
- WorkList.push_back(OrOperatorArgument);
+ return;
}
+
+ if (match(OrOperatorArgument,
+ m_OneUse(m_Sub(m_Value(Lhs), m_Value(Rhs))))) {
+ CmpValues.emplace_back(Lhs, Rhs);
+ return;
+ }
+
+ WorkList.push_back(OrOperatorArgument);
};
Value *CurrentValue = WorkList.pop_back_val();
@@ -2082,7 +2061,7 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
return BinaryOperator::Create(BOpc, CmpP, CmpQ);
}
- if (Value *V = foldICmpOrXorChain(Cmp, Or, Builder))
+ if (Value *V = foldICmpOrXorSubChain(Cmp, Or, Builder))
return replaceInstUsesWith(Cmp, V);
return nullptr;
@@ -2443,7 +2422,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
// constant-value-based preconditions in the folds below, then we could assert
// those conditions rather than checking them. This is difficult because of
// undef/poison (PR34838).
- if (IsAShr) {
+ if (IsAShr && Shr->hasOneUse()) {
if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) {
// When ShAmtC can be shifted losslessly:
// icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC)
@@ -2483,7 +2462,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
ConstantInt::getAllOnesValue(ShrTy));
}
}
- } else {
+ } else if (!IsAShr) {
if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
// icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
// icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
@@ -2888,19 +2867,97 @@ Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
return new ICmpInst(SwappedPred, Add, ConstantInt::get(Ty, ~C));
}
+static Value *createLogicFromTable(const std::bitset<4> &Table, Value *Op0,
+ Value *Op1, IRBuilderBase &Builder,
+ bool HasOneUse) {
+ auto FoldConstant = [&](bool Val) {
+ Constant *Res = Val ? Builder.getTrue() : Builder.getFalse();
+ if (Op0->getType()->isVectorTy())
+ Res = ConstantVector::getSplat(
+ cast<VectorType>(Op0->getType())->getElementCount(), Res);
+ return Res;
+ };
+
+ switch (Table.to_ulong()) {
+ case 0: // 0 0 0 0
+ return FoldConstant(false);
+ case 1: // 0 0 0 1
+ return HasOneUse ? Builder.CreateNot(Builder.CreateOr(Op0, Op1)) : nullptr;
+ case 2: // 0 0 1 0
+ return HasOneUse ? Builder.CreateAnd(Builder.CreateNot(Op0), Op1) : nullptr;
+ case 3: // 0 0 1 1
+ return Builder.CreateNot(Op0);
+ case 4: // 0 1 0 0
+ return HasOneUse ? Builder.CreateAnd(Op0, Builder.CreateNot(Op1)) : nullptr;
+ case 5: // 0 1 0 1
+ return Builder.CreateNot(Op1);
+ case 6: // 0 1 1 0
+ return Builder.CreateXor(Op0, Op1);
+ case 7: // 0 1 1 1
+ return HasOneUse ? Builder.CreateNot(Builder.CreateAnd(Op0, Op1)) : nullptr;
+ case 8: // 1 0 0 0
+ return Builder.CreateAnd(Op0, Op1);
+ case 9: // 1 0 0 1
+ return HasOneUse ? Builder.CreateNot(Builder.CreateXor(Op0, Op1)) : nullptr;
+ case 10: // 1 0 1 0
+ return Op1;
+ case 11: // 1 0 1 1
+ return HasOneUse ? Builder.CreateOr(Builder.CreateNot(Op0), Op1) : nullptr;
+ case 12: // 1 1 0 0
+ return Op0;
+ case 13: // 1 1 0 1
+ return HasOneUse ? Builder.CreateOr(Op0, Builder.CreateNot(Op1)) : nullptr;
+ case 14: // 1 1 1 0
+ return Builder.CreateOr(Op0, Op1);
+ case 15: // 1 1 1 1
+ return FoldConstant(true);
+ default:
+ llvm_unreachable("Invalid Operation");
+ }
+ return nullptr;
+}
+
/// Fold icmp (add X, Y), C.
Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
BinaryOperator *Add,
const APInt &C) {
Value *Y = Add->getOperand(1);
+ Value *X = Add->getOperand(0);
+
+ Value *Op0, *Op1;
+ Instruction *Ext0, *Ext1;
+ const CmpInst::Predicate Pred = Cmp.getPredicate();
+ if (match(Add,
+ m_Add(m_CombineAnd(m_Instruction(Ext0), m_ZExtOrSExt(m_Value(Op0))),
+ m_CombineAnd(m_Instruction(Ext1),
+ m_ZExtOrSExt(m_Value(Op1))))) &&
+ Op0->getType()->isIntOrIntVectorTy(1) &&
+ Op1->getType()->isIntOrIntVectorTy(1)) {
+ unsigned BW = C.getBitWidth();
+ std::bitset<4> Table;
+ auto ComputeTable = [&](bool Op0Val, bool Op1Val) {
+ int Res = 0;
+ if (Op0Val)
+ Res += isa<ZExtInst>(Ext0) ? 1 : -1;
+ if (Op1Val)
+ Res += isa<ZExtInst>(Ext1) ? 1 : -1;
+ return ICmpInst::compare(APInt(BW, Res, true), C, Pred);
+ };
+
+ Table[0] = ComputeTable(false, false);
+ Table[1] = ComputeTable(false, true);
+ Table[2] = ComputeTable(true, false);
+ Table[3] = ComputeTable(true, true);
+ if (auto *Cond =
+ createLogicFromTable(Table, Op0, Op1, Builder, Add->hasOneUse()))
+ return replaceInstUsesWith(Cmp, Cond);
+ }
const APInt *C2;
if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
return nullptr;
// Fold icmp pred (add X, C2), C.
- Value *X = Add->getOperand(0);
Type *Ty = Add->getType();
- const CmpInst::Predicate Pred = Cmp.getPredicate();
// If the add does not wrap, we can always adjust the compare by subtracting
// the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
@@ -3172,18 +3229,6 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
}
}
- // Test to see if the operands of the icmp are casted versions of other
- // values. If the ptr->ptr cast can be stripped off both arguments, do so.
- if (DstType->isPointerTy() && (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
- // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
- // so eliminate it as well.
- if (auto *BC2 = dyn_cast<BitCastInst>(Op1))
- Op1 = BC2->getOperand(0);
-
- Op1 = Builder.CreateBitCast(Op1, SrcType);
- return new ICmpInst(Pred, BCSrcOp, Op1);
- }
-
const APInt *C;
if (!match(Cmp.getOperand(1), m_APInt(C)) || !DstType->isIntegerTy() ||
!SrcType->isIntOrIntVectorTy())
@@ -3196,10 +3241,12 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
// icmp eq/ne (bitcast (not X) to iN), -1 --> icmp eq/ne (bitcast X to iN), 0
// Example: are all elements equal? --> are zero elements not equal?
// TODO: Try harder to reduce compare of 2 freely invertible operands?
- if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse() &&
- isFreeToInvert(BCSrcOp, BCSrcOp->hasOneUse())) {
- Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), DstType);
- return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType));
+ if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse()) {
+ if (Value *NotBCSrcOp =
+ getFreelyInverted(BCSrcOp, BCSrcOp->hasOneUse(), &Builder)) {
+ Value *Cast = Builder.CreateBitCast(NotBCSrcOp, DstType);
+ return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType));
+ }
}
// If this is checking if all elements of an extended vector are clear or not,
@@ -3878,21 +3925,9 @@ Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) {
return nullptr;
switch (LHSI->getOpcode()) {
- case Instruction::GetElementPtr:
- // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
- if (RHSC->isNullValue() &&
- cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
- return new ICmpInst(
- I.getPredicate(), LHSI->getOperand(0),
- Constant::getNullValue(LHSI->getOperand(0)->getType()));
- break;
case Instruction::PHI:
- // Only fold icmp into the PHI if the phi and icmp are in the same
- // block. If in the same block, we're encouraging jump threading. If
- // not, we are just pessimizing the code by making an i1 phi.
- if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
- return NV;
+ if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
+ return NV;
break;
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
@@ -4243,7 +4278,12 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
/*isNUW=*/false, SQ.getWithInstruction(&I)));
if (!NewShAmt)
return nullptr;
- NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy);
+ if (NewShAmt->getType() != WidestTy) {
+ NewShAmt =
+ ConstantFoldCastOperand(Instruction::ZExt, NewShAmt, WidestTy, SQ.DL);
+ if (!NewShAmt)
+ return nullptr;
+ }
unsigned WidestBitWidth = WidestTy->getScalarSizeInBits();
// Is the new shift amount smaller than the bit width?
@@ -4424,6 +4464,65 @@ static Instruction *foldICmpXNegX(ICmpInst &I,
return nullptr;
}
+static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,
+ InstCombinerImpl &IC) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
+ // Normalize and operand as operand 0.
+ CmpInst::Predicate Pred = I.getPredicate();
+ if (match(Op1, m_c_And(m_Specific(Op0), m_Value()))) {
+ std::swap(Op0, Op1);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ if (!match(Op0, m_c_And(m_Specific(Op1), m_Value(A))))
+ return nullptr;
+
+ // (icmp (X & Y) u< X --> (X & Y) != X
+ if (Pred == ICmpInst::ICMP_ULT)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+
+ // (icmp (X & Y) u>= X --> (X & Y) == X
+ if (Pred == ICmpInst::ICMP_UGE)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
+
+ return nullptr;
+}
+
+static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
+ InstCombinerImpl &IC) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
+
+ // Normalize or operand as operand 0.
+ CmpInst::Predicate Pred = I.getPredicate();
+ if (match(Op1, m_c_Or(m_Specific(Op0), m_Value(A)))) {
+ std::swap(Op0, Op1);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ } else if (!match(Op0, m_c_Or(m_Specific(Op1), m_Value(A)))) {
+ return nullptr;
+ }
+
+ // icmp (X | Y) u<= X --> (X | Y) == X
+ if (Pred == ICmpInst::ICMP_ULE)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
+
+ // icmp (X | Y) u> X --> (X | Y) != X
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+
+ if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
+ // icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible
+ if (Value *NotOp1 =
+ IC.getFreelyInverted(Op1, Op1->hasOneUse(), &IC.Builder))
+ return new ICmpInst(Pred, IC.Builder.CreateAnd(A, NotOp1),
+ Constant::getNullValue(Op1->getType()));
+ // icmp (X | Y) eq/ne Y --> (~X | Y) eq/ne -1 if X is freely invertible.
+ if (Value *NotA = IC.getFreelyInverted(A, A->hasOneUse(), &IC.Builder))
+ return new ICmpInst(Pred, IC.Builder.CreateOr(Op1, NotA),
+ Constant::getAllOnesValue(Op1->getType()));
+ }
+ return nullptr;
+}
+
static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
InstCombinerImpl &IC) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
@@ -4746,6 +4845,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
if (Instruction * R = foldICmpXorXX(I, Q, *this))
return R;
+ if (Instruction *R = foldICmpOrXX(I, Q, *this))
+ return R;
{
// Try to remove shared multiplier from comparison:
@@ -4915,6 +5016,9 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
return replaceInstUsesWith(I, V);
+ if (Instruction *R = foldICmpAndXX(I, Q, *this))
+ return R;
+
if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
return replaceInstUsesWith(I, V);
@@ -4924,88 +5028,153 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return nullptr;
}
-/// Fold icmp Pred min|max(X, Y), X.
-static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) {
- ICmpInst::Predicate Pred = Cmp.getPredicate();
- Value *Op0 = Cmp.getOperand(0);
- Value *X = Cmp.getOperand(1);
-
- // Canonicalize minimum or maximum operand to LHS of the icmp.
- if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) ||
- match(X, m_c_SMax(m_Specific(Op0), m_Value())) ||
- match(X, m_c_UMin(m_Specific(Op0), m_Value())) ||
- match(X, m_c_UMax(m_Specific(Op0), m_Value()))) {
- std::swap(Op0, X);
- Pred = Cmp.getSwappedPredicate();
- }
-
- Value *Y;
- if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) {
- // smin(X, Y) == X --> X s<= Y
- // smin(X, Y) s>= X --> X s<= Y
- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE)
- return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
-
- // smin(X, Y) != X --> X s> Y
- // smin(X, Y) s< X --> X s> Y
- if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT)
- return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
-
- // These cases should be handled in InstSimplify:
- // smin(X, Y) s<= X --> true
- // smin(X, Y) s> X --> false
+/// Fold icmp Pred min|max(X, Y), Z.
+Instruction *
+InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I,
+ MinMaxIntrinsic *MinMax, Value *Z,
+ ICmpInst::Predicate Pred) {
+ Value *X = MinMax->getLHS();
+ Value *Y = MinMax->getRHS();
+ if (ICmpInst::isSigned(Pred) && !MinMax->isSigned())
return nullptr;
- }
-
- if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) {
- // smax(X, Y) == X --> X s>= Y
- // smax(X, Y) s<= X --> X s>= Y
- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE)
- return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
-
- // smax(X, Y) != X --> X s< Y
- // smax(X, Y) s> X --> X s< Y
- if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT)
- return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
-
- // These cases should be handled in InstSimplify:
- // smax(X, Y) s>= X --> true
- // smax(X, Y) s< X --> false
+ if (ICmpInst::isUnsigned(Pred) && MinMax->isSigned())
return nullptr;
+ SimplifyQuery Q = SQ.getWithInstruction(&I);
+ auto IsCondKnownTrue = [](Value *Val) -> std::optional<bool> {
+ if (!Val)
+ return std::nullopt;
+ if (match(Val, m_One()))
+ return true;
+ if (match(Val, m_Zero()))
+ return false;
+ return std::nullopt;
+ };
+ auto CmpXZ = IsCondKnownTrue(simplifyICmpInst(Pred, X, Z, Q));
+ auto CmpYZ = IsCondKnownTrue(simplifyICmpInst(Pred, Y, Z, Q));
+ if (!CmpXZ.has_value() && !CmpYZ.has_value())
+ return nullptr;
+ if (!CmpXZ.has_value()) {
+ std::swap(X, Y);
+ std::swap(CmpXZ, CmpYZ);
}
- if (match(Op0, m_c_UMin(m_Specific(X), m_Value(Y)))) {
- // umin(X, Y) == X --> X u<= Y
- // umin(X, Y) u>= X --> X u<= Y
- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_UGE)
- return new ICmpInst(ICmpInst::ICMP_ULE, X, Y);
-
- // umin(X, Y) != X --> X u> Y
- // umin(X, Y) u< X --> X u> Y
- if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT)
- return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
+ auto FoldIntoCmpYZ = [&]() -> Instruction * {
+ if (CmpYZ.has_value())
+ return replaceInstUsesWith(I, ConstantInt::getBool(I.getType(), *CmpYZ));
+ return ICmpInst::Create(Instruction::ICmp, Pred, Y, Z);
+ };
- // These cases should be handled in InstSimplify:
- // umin(X, Y) u<= X --> true
- // umin(X, Y) u> X --> false
- return nullptr;
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE: {
+ // If X == Z:
+ // Expr Result
+ // min(X, Y) == Z X <= Y
+ // max(X, Y) == Z X >= Y
+ // min(X, Y) != Z X > Y
+ // max(X, Y) != Z X < Y
+ if ((Pred == ICmpInst::ICMP_EQ) == *CmpXZ) {
+ ICmpInst::Predicate NewPred =
+ ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
+ if (Pred == ICmpInst::ICMP_NE)
+ NewPred = ICmpInst::getInversePredicate(NewPred);
+ return ICmpInst::Create(Instruction::ICmp, NewPred, X, Y);
+ }
+ // Otherwise (X != Z):
+ ICmpInst::Predicate NewPred = MinMax->getPredicate();
+ auto MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q));
+ if (!MinMaxCmpXZ.has_value()) {
+ std::swap(X, Y);
+ std::swap(CmpXZ, CmpYZ);
+ // Re-check pre-condition X != Z
+ if (!CmpXZ.has_value() || (Pred == ICmpInst::ICMP_EQ) == *CmpXZ)
+ break;
+ MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q));
+ }
+ if (!MinMaxCmpXZ.has_value())
+ break;
+ if (*MinMaxCmpXZ) {
+ // Expr Fact Result
+ // min(X, Y) == Z X < Z false
+ // max(X, Y) == Z X > Z false
+ // min(X, Y) != Z X < Z true
+ // max(X, Y) != Z X > Z true
+ return replaceInstUsesWith(
+ I, ConstantInt::getBool(I.getType(), Pred == ICmpInst::ICMP_NE));
+ } else {
+ // Expr Fact Result
+ // min(X, Y) == Z X > Z Y == Z
+ // max(X, Y) == Z X < Z Y == Z
+ // min(X, Y) != Z X > Z Y != Z
+ // max(X, Y) != Z X < Z Y != Z
+ return FoldIntoCmpYZ();
+ }
+ break;
+ }
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE: {
+ bool IsSame = MinMax->getPredicate() == ICmpInst::getStrictPredicate(Pred);
+ if (*CmpXZ) {
+ if (IsSame) {
+ // Expr Fact Result
+ // min(X, Y) < Z X < Z true
+ // min(X, Y) <= Z X <= Z true
+ // max(X, Y) > Z X > Z true
+ // max(X, Y) >= Z X >= Z true
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ } else {
+ // Expr Fact Result
+ // max(X, Y) < Z X < Z Y < Z
+ // max(X, Y) <= Z X <= Z Y <= Z
+ // min(X, Y) > Z X > Z Y > Z
+ // min(X, Y) >= Z X >= Z Y >= Z
+ return FoldIntoCmpYZ();
+ }
+ } else {
+ if (IsSame) {
+ // Expr Fact Result
+ // min(X, Y) < Z X >= Z Y < Z
+ // min(X, Y) <= Z X > Z Y <= Z
+ // max(X, Y) > Z X <= Z Y > Z
+ // max(X, Y) >= Z X < Z Y >= Z
+ return FoldIntoCmpYZ();
+ } else {
+ // Expr Fact Result
+ // max(X, Y) < Z X >= Z false
+ // max(X, Y) <= Z X > Z false
+ // min(X, Y) > Z X <= Z false
+ // min(X, Y) >= Z X < Z false
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ }
+ }
+ break;
+ }
+ default:
+ break;
}
- if (match(Op0, m_c_UMax(m_Specific(X), m_Value(Y)))) {
- // umax(X, Y) == X --> X u>= Y
- // umax(X, Y) u<= X --> X u>= Y
- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_ULE)
- return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
+ return nullptr;
+}
+Instruction *InstCombinerImpl::foldICmpWithMinMax(ICmpInst &Cmp) {
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *Lhs = Cmp.getOperand(0);
+ Value *Rhs = Cmp.getOperand(1);
- // umax(X, Y) != X --> X u< Y
- // umax(X, Y) u> X --> X u< Y
- if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_UGT)
- return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
+ if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Lhs)) {
+ if (Instruction *Res = foldICmpWithMinMaxImpl(Cmp, MinMax, Rhs, Pred))
+ return Res;
+ }
- // These cases should be handled in InstSimplify:
- // umax(X, Y) u>= X --> true
- // umax(X, Y) u< X --> false
- return nullptr;
+ if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Rhs)) {
+ if (Instruction *Res = foldICmpWithMinMaxImpl(
+ Cmp, MinMax, Lhs, ICmpInst::getSwappedPredicate(Pred)))
+ return Res;
}
return nullptr;
@@ -5173,35 +5342,6 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
}
- // Test if 2 values have different or same signbits:
- // (X u>> BitWidth - 1) == zext (Y s> -1) --> (X ^ Y) < 0
- // (X u>> BitWidth - 1) != zext (Y s> -1) --> (X ^ Y) > -1
- // (X s>> BitWidth - 1) == sext (Y s> -1) --> (X ^ Y) < 0
- // (X s>> BitWidth - 1) != sext (Y s> -1) --> (X ^ Y) > -1
- Instruction *ExtI;
- if (match(Op1, m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(A)))) &&
- (Op0->hasOneUse() || Op1->hasOneUse())) {
- unsigned OpWidth = Op0->getType()->getScalarSizeInBits();
- Instruction *ShiftI;
- Value *X, *Y;
- ICmpInst::Predicate Pred2;
- if (match(Op0, m_CombineAnd(m_Instruction(ShiftI),
- m_Shr(m_Value(X),
- m_SpecificIntAllowUndef(OpWidth - 1)))) &&
- match(A, m_ICmp(Pred2, m_Value(Y), m_AllOnes())) &&
- Pred2 == ICmpInst::ICMP_SGT && X->getType() == Y->getType()) {
- unsigned ExtOpc = ExtI->getOpcode();
- unsigned ShiftOpc = ShiftI->getOpcode();
- if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) ||
- (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) {
- Value *Xor = Builder.CreateXor(X, Y, "xor.signbits");
- Value *R = (Pred == ICmpInst::ICMP_EQ) ? Builder.CreateIsNeg(Xor)
- : Builder.CreateIsNotNeg(Xor);
- return replaceInstUsesWith(I, R);
- }
- }
- }
-
// (A >> C) == (B >> C) --> (A^B) u< (1 << C)
// For lshr and ashr pairs.
const APInt *AP1, *AP2;
@@ -5307,6 +5447,40 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
Pred, A,
Builder.CreateIntrinsic(Op0->getType(), Intrinsic::fshl, {A, A, B}));
+ // Canonicalize:
+ // icmp eq/ne OneUse(A ^ Cst), B --> icmp eq/ne (A ^ B), Cst
+ Constant *Cst;
+ if (match(&I, m_c_ICmp(PredUnused,
+ m_OneUse(m_Xor(m_Value(A), m_ImmConstant(Cst))),
+ m_CombineAnd(m_Value(B), m_Unless(m_ImmConstant())))))
+ return new ICmpInst(Pred, Builder.CreateXor(A, B), Cst);
+
+ {
+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)
+ auto m_Matcher =
+ m_CombineOr(m_CombineOr(m_c_Add(m_Value(B), m_Deferred(A)),
+ m_c_Xor(m_Value(B), m_Deferred(A))),
+ m_Sub(m_Value(B), m_Deferred(A)));
+ std::optional<bool> IsZero = std::nullopt;
+ if (match(&I, m_c_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)),
+ m_Deferred(A))))
+ IsZero = false;
+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)
+ else if (match(&I,
+ m_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)),
+ m_Zero())))
+ IsZero = true;
+
+ if (IsZero && isKnownToBeAPowerOfTwo(A, /* OrZero */ true, /*Depth*/ 0, &I))
+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)
+ // -> (icmp eq/ne (and X, P2), 0)
+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)
+ // -> (icmp eq/ne (and X, P2), P2)
+ return new ICmpInst(Pred, Builder.CreateAnd(B, A),
+ *IsZero ? A
+ : ConstantInt::getNullValue(A->getType()));
+ }
+
return nullptr;
}
@@ -5383,8 +5557,8 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
// icmp Pred (ext X), (ext Y)
Value *Y;
if (match(ICmp.getOperand(1), m_ZExtOrSExt(m_Value(Y)))) {
- bool IsZext0 = isa<ZExtOperator>(ICmp.getOperand(0));
- bool IsZext1 = isa<ZExtOperator>(ICmp.getOperand(1));
+ bool IsZext0 = isa<ZExtInst>(ICmp.getOperand(0));
+ bool IsZext1 = isa<ZExtInst>(ICmp.getOperand(1));
if (IsZext0 != IsZext1) {
// If X and Y and both i1
@@ -5396,11 +5570,16 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y),
Constant::getNullValue(X->getType()));
- // If we have mismatched casts, treat the zext of a non-negative source as
- // a sext to simulate matching casts. Otherwise, we are done.
- // TODO: Can we handle some predicates (equality) without non-negative?
- if ((IsZext0 && isKnownNonNegative(X, DL, 0, &AC, &ICmp, &DT)) ||
- (IsZext1 && isKnownNonNegative(Y, DL, 0, &AC, &ICmp, &DT)))
+ // If we have mismatched casts and zext has the nneg flag, we can
+ // treat the "zext nneg" as "sext". Otherwise, we cannot fold and quit.
+
+ auto *NonNegInst0 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(0));
+ auto *NonNegInst1 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(1));
+
+ bool IsNonNeg0 = NonNegInst0 && NonNegInst0->hasNonNeg();
+ bool IsNonNeg1 = NonNegInst1 && NonNegInst1->hasNonNeg();
+
+ if ((IsZext0 && IsNonNeg0) || (IsZext1 && IsNonNeg1))
IsSignedExt = true;
else
return nullptr;
@@ -5442,25 +5621,20 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
if (!C)
return nullptr;
- // Compute the constant that would happen if we truncated to SrcTy then
- // re-extended to DestTy.
+ // If a lossless truncate is possible...
Type *SrcTy = CastOp0->getSrcTy();
- Type *DestTy = CastOp0->getDestTy();
- Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy);
- Constant *Res2 = ConstantExpr::getCast(CastOp0->getOpcode(), Res1, DestTy);
-
- // If the re-extended constant didn't change...
- if (Res2 == C) {
+ Constant *Res = getLosslessTrunc(C, SrcTy, CastOp0->getOpcode());
+ if (Res) {
if (ICmp.isEquality())
- return new ICmpInst(ICmp.getPredicate(), X, Res1);
+ return new ICmpInst(ICmp.getPredicate(), X, Res);
// A signed comparison of sign extended values simplifies into a
// signed comparison.
if (IsSignedExt && IsSignedCmp)
- return new ICmpInst(ICmp.getPredicate(), X, Res1);
+ return new ICmpInst(ICmp.getPredicate(), X, Res);
// The other three cases all fold into an unsigned comparison.
- return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res1);
+ return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res);
}
// The re-extended constant changed, partly changed (in the case of a vector),
@@ -5518,13 +5692,8 @@ Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {
Value *NewOp1 = nullptr;
if (auto *PtrToIntOp1 = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
Value *PtrSrc = PtrToIntOp1->getOperand(0);
- if (PtrSrc->getType()->getPointerAddressSpace() ==
- Op0Src->getType()->getPointerAddressSpace()) {
+ if (PtrSrc->getType() == Op0Src->getType())
NewOp1 = PtrToIntOp1->getOperand(0);
- // If the pointer types don't match, insert a bitcast.
- if (Op0Src->getType() != NewOp1->getType())
- NewOp1 = Builder.CreateBitCast(NewOp1, Op0Src->getType());
- }
} else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy);
}
@@ -5641,22 +5810,20 @@ bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp,
/// \returns Instruction which must replace the compare instruction, NULL if no
/// replacement required.
static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
- Value *OtherVal,
+ const APInt *OtherVal,
InstCombinerImpl &IC) {
// Don't bother doing this transformation for pointers, don't do it for
// vectors.
if (!isa<IntegerType>(MulVal->getType()))
return nullptr;
- assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
- assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
auto *MulInstr = dyn_cast<Instruction>(MulVal);
if (!MulInstr)
return nullptr;
assert(MulInstr->getOpcode() == Instruction::Mul);
- auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
- *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));
+ auto *LHS = cast<ZExtInst>(MulInstr->getOperand(0)),
+ *RHS = cast<ZExtInst>(MulInstr->getOperand(1));
assert(LHS->getOpcode() == Instruction::ZExt);
assert(RHS->getOpcode() == Instruction::ZExt);
Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
@@ -5709,70 +5876,26 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
// Recognize patterns
switch (I.getPredicate()) {
- case ICmpInst::ICMP_EQ:
- case ICmpInst::ICMP_NE:
- // Recognize pattern:
- // mulval = mul(zext A, zext B)
- // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
- ConstantInt *CI;
- Value *ValToMask;
- if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
- if (ValToMask != MulVal)
- return nullptr;
- const APInt &CVal = CI->getValue() + 1;
- if (CVal.isPowerOf2()) {
- unsigned MaskWidth = CVal.logBase2();
- if (MaskWidth == MulWidth)
- break; // Recognized
- }
- }
- return nullptr;
-
- case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGT: {
// Recognize pattern:
// mulval = mul(zext A, zext B)
// cmp ugt mulval, max
- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
- APInt MaxVal = APInt::getMaxValue(MulWidth);
- MaxVal = MaxVal.zext(CI->getBitWidth());
- if (MaxVal.eq(CI->getValue()))
- break; // Recognized
- }
- return nullptr;
-
- case ICmpInst::ICMP_UGE:
- // Recognize pattern:
- // mulval = mul(zext A, zext B)
- // cmp uge mulval, max+1
- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
- APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
- if (MaxVal.eq(CI->getValue()))
- break; // Recognized
- }
- return nullptr;
-
- case ICmpInst::ICMP_ULE:
- // Recognize pattern:
- // mulval = mul(zext A, zext B)
- // cmp ule mulval, max
- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
- APInt MaxVal = APInt::getMaxValue(MulWidth);
- MaxVal = MaxVal.zext(CI->getBitWidth());
- if (MaxVal.eq(CI->getValue()))
- break; // Recognized
- }
+ APInt MaxVal = APInt::getMaxValue(MulWidth);
+ MaxVal = MaxVal.zext(OtherVal->getBitWidth());
+ if (MaxVal.eq(*OtherVal))
+ break; // Recognized
return nullptr;
+ }
- case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULT: {
// Recognize pattern:
// mulval = mul(zext A, zext B)
// cmp ule mulval, max + 1
- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
- APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
- if (MaxVal.eq(CI->getValue()))
- break; // Recognized
- }
+ APInt MaxVal = APInt::getOneBitSet(OtherVal->getBitWidth(), MulWidth);
+ if (MaxVal.eq(*OtherVal))
+ break; // Recognized
return nullptr;
+ }
default:
return nullptr;
@@ -5798,7 +5921,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
if (MulVal->hasNUsesOrMore(2)) {
Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
for (User *U : make_early_inc_range(MulVal->users())) {
- if (U == &I || U == OtherVal)
+ if (U == &I)
continue;
if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
@@ -5819,34 +5942,10 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
IC.addToWorklist(cast<Instruction>(U));
}
}
- if (isa<Instruction>(OtherVal))
- IC.addToWorklist(cast<Instruction>(OtherVal));
// The original icmp gets replaced with the overflow value, maybe inverted
// depending on predicate.
- bool Inverse = false;
- switch (I.getPredicate()) {
- case ICmpInst::ICMP_NE:
- break;
- case ICmpInst::ICMP_EQ:
- Inverse = true;
- break;
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE:
- if (I.getOperand(0) == MulVal)
- break;
- Inverse = true;
- break;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE:
- if (I.getOperand(1) == MulVal)
- break;
- Inverse = true;
- break;
- default:
- llvm_unreachable("Unexpected predicate");
- }
- if (Inverse) {
+ if (I.getPredicate() == ICmpInst::ICMP_ULT) {
Value *Res = Builder.CreateExtractValue(Call, 1);
return BinaryOperator::CreateNot(Res);
}
@@ -6015,13 +6114,19 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
KnownBits Op0Known(BitWidth);
KnownBits Op1Known(BitWidth);
- if (SimplifyDemandedBits(&I, 0,
- getDemandedBitsLHSMask(I, BitWidth),
- Op0Known, 0))
- return &I;
+ {
+ // Don't use dominating conditions when folding icmp using known bits. This
+ // may convert signed into unsigned predicates in ways that other passes
+ // (especially IndVarSimplify) may not be able to reliably undo.
+ SQ.DC = nullptr;
+ auto _ = make_scope_exit([&]() { SQ.DC = &DC; });
+ if (SimplifyDemandedBits(&I, 0, getDemandedBitsLHSMask(I, BitWidth),
+ Op0Known, 0))
+ return &I;
- if (SimplifyDemandedBits(&I, 1, APInt::getAllOnes(BitWidth), Op1Known, 0))
- return &I;
+ if (SimplifyDemandedBits(&I, 1, APInt::getAllOnes(BitWidth), Op1Known, 0))
+ return &I;
+ }
// Given the known and unknown bits, compute a range that the LHS could be
// in. Compute the Min, Max and RHS values based on the known bits. For the
@@ -6269,57 +6374,70 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE)
return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y);
+ // icmp eq/ne X, (zext/sext (icmp eq/ne X, C))
+ ICmpInst::Predicate Pred1, Pred2;
const APInt *C;
- if (match(I.getOperand(0), m_c_Add(m_ZExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
- match(I.getOperand(1), m_APInt(C)) &&
- X->getType()->isIntOrIntVectorTy(1) &&
- Y->getType()->isIntOrIntVectorTy(1)) {
- unsigned BitWidth = C->getBitWidth();
- Pred = I.getPredicate();
- APInt Zero = APInt::getZero(BitWidth);
- APInt MinusOne = APInt::getAllOnes(BitWidth);
- APInt One(BitWidth, 1);
- if ((C->sgt(Zero) && Pred == ICmpInst::ICMP_SGT) ||
- (C->slt(Zero) && Pred == ICmpInst::ICMP_SLT))
- return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if ((C->sgt(One) && Pred == ICmpInst::ICMP_SLT) ||
- (C->slt(MinusOne) && Pred == ICmpInst::ICMP_SGT))
- return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-
- if (I.getOperand(0)->hasOneUse()) {
- APInt NewC = *C;
- // canonicalize predicate to eq/ne
- if ((*C == Zero && Pred == ICmpInst::ICMP_SLT) ||
- (*C != Zero && *C != MinusOne && Pred == ICmpInst::ICMP_UGT)) {
- // x s< 0 in [-1, 1] --> x == -1
- // x u> 1(or any const !=0 !=-1) in [-1, 1] --> x == -1
- NewC = MinusOne;
- Pred = ICmpInst::ICMP_EQ;
- } else if ((*C == MinusOne && Pred == ICmpInst::ICMP_SGT) ||
- (*C != Zero && *C != One && Pred == ICmpInst::ICMP_ULT)) {
- // x s> -1 in [-1, 1] --> x != -1
- // x u< -1 in [-1, 1] --> x != -1
- Pred = ICmpInst::ICMP_NE;
- } else if (*C == Zero && Pred == ICmpInst::ICMP_SGT) {
- // x s> 0 in [-1, 1] --> x == 1
- NewC = One;
- Pred = ICmpInst::ICMP_EQ;
- } else if (*C == One && Pred == ICmpInst::ICMP_SLT) {
- // x s< 1 in [-1, 1] --> x != 1
- Pred = ICmpInst::ICMP_NE;
+ Instruction *ExtI;
+ if (match(&I, m_c_ICmp(Pred1, m_Value(X),
+ m_CombineAnd(m_Instruction(ExtI),
+ m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X),
+ m_APInt(C)))))) &&
+ ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) {
+ bool IsSExt = ExtI->getOpcode() == Instruction::SExt;
+ bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse();
+ auto CreateRangeCheck = [&] {
+ Value *CmpV1 =
+ Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType()));
+ Value *CmpV2 = Builder.CreateICmp(
+ Pred1, X, ConstantInt::getSigned(X->getType(), IsSExt ? -1 : 1));
+ return BinaryOperator::Create(
+ Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And,
+ CmpV1, CmpV2);
+ };
+ if (C->isZero()) {
+ if (Pred2 == ICmpInst::ICMP_EQ) {
+ // icmp eq X, (zext/sext (icmp eq X, 0)) --> false
+ // icmp ne X, (zext/sext (icmp eq X, 0)) --> true
+ return replaceInstUsesWith(
+ I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE));
+ } else if (!IsSExt || HasOneUse) {
+ // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1
+ // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1
+ // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1
+ // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1
+ return CreateRangeCheck();
}
-
- if (NewC == MinusOne) {
- if (Pred == ICmpInst::ICMP_EQ)
- return BinaryOperator::CreateAnd(Builder.CreateNot(X), Y);
- if (Pred == ICmpInst::ICMP_NE)
- return BinaryOperator::CreateOr(X, Builder.CreateNot(Y));
- } else if (NewC == One) {
- if (Pred == ICmpInst::ICMP_EQ)
- return BinaryOperator::CreateAnd(X, Builder.CreateNot(Y));
- if (Pred == ICmpInst::ICMP_NE)
- return BinaryOperator::CreateOr(Builder.CreateNot(X), Y);
+ } else if (IsSExt ? C->isAllOnes() : C->isOne()) {
+ if (Pred2 == ICmpInst::ICMP_NE) {
+ // icmp eq X, (zext (icmp ne X, 1)) --> false
+ // icmp ne X, (zext (icmp ne X, 1)) --> true
+ // icmp eq X, (sext (icmp ne X, -1)) --> false
+ // icmp ne X, (sext (icmp ne X, -1)) --> true
+ return replaceInstUsesWith(
+ I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE));
+ } else if (!IsSExt || HasOneUse) {
+ // icmp eq X, (zext (icmp eq X, 1)) --> X == 0 || X == 1
+ // icmp ne X, (zext (icmp eq X, 1)) --> X != 0 && X != 1
+ // icmp eq X, (sext (icmp eq X, -1)) --> X == 0 || X == -1
+ // icmp ne X, (sext (icmp eq X, -1)) --> X != 0 && X == -1
+ return CreateRangeCheck();
}
+ } else {
+ // when C != 0 && C != 1:
+ // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0
+ // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1
+ // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0
+ // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1
+ // when C != 0 && C != -1:
+ // icmp eq X, (sext (icmp eq X, C)) --> icmp eq X, 0
+ // icmp eq X, (sext (icmp ne X, C)) --> icmp eq X, -1
+ // icmp ne X, (sext (icmp eq X, C)) --> icmp ne X, 0
+ // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1
+ return ICmpInst::Create(
+ Instruction::ICmp, Pred1, X,
+ ConstantInt::getSigned(X->getType(), Pred2 == ICmpInst::ICMP_NE
+ ? (IsSExt ? -1 : 1)
+ : 0));
}
}
@@ -6783,6 +6901,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpUsingKnownBits(I))
return Res;
+ if (Instruction *Res = foldICmpTruncWithTruncOrExt(I, Q))
+ return Res;
+
// Test if the ICmpInst instruction is used exclusively by a select as
// part of a minimum or maximum operation. If so, refrain from doing
// any other folding. This helps out other analyses which understand
@@ -6913,38 +7034,40 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
return Res;
{
- Value *A, *B;
- // Transform (A & ~B) == 0 --> (A & B) != 0
- // and (A & ~B) != 0 --> (A & B) == 0
+ Value *X, *Y;
+ // Transform (X & ~Y) == 0 --> (X & Y) != 0
+ // and (X & ~Y) != 0 --> (X & Y) == 0
// if A is a power of 2.
- if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_Zero()) &&
- isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())
- return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),
+ if (match(Op0, m_And(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(X, false, 0, &I) &&
+ I.isEquality())
+ return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(X, Y),
Op1);
- // ~X < ~Y --> Y < X
- // ~X < C --> X > ~C
- if (match(Op0, m_Not(m_Value(A)))) {
- if (match(Op1, m_Not(m_Value(B))))
- return new ICmpInst(I.getPredicate(), B, A);
-
- const APInt *C;
- if (match(Op1, m_APInt(C)))
- return new ICmpInst(I.getSwappedPredicate(), A,
- ConstantInt::get(Op1->getType(), ~(*C)));
+ // Op0 pred Op1 -> ~Op1 pred ~Op0, if this allows us to drop an instruction.
+ if (Op0->getType()->isIntOrIntVectorTy()) {
+ bool ConsumesOp0, ConsumesOp1;
+ if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&
+ isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&
+ (ConsumesOp0 || ConsumesOp1)) {
+ Value *InvOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder);
+ Value *InvOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder);
+ assert(InvOp0 && InvOp1 &&
+ "Mismatch between isFreeToInvert and getFreelyInverted");
+ return new ICmpInst(I.getSwappedPredicate(), InvOp0, InvOp1);
+ }
}
Instruction *AddI = nullptr;
- if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+ if (match(&I, m_UAddWithOverflow(m_Value(X), m_Value(Y),
m_Instruction(AddI))) &&
- isa<IntegerType>(A->getType())) {
+ isa<IntegerType>(X->getType())) {
Value *Result;
Constant *Overflow;
// m_UAddWithOverflow can match patterns that do not include an explicit
// "add" instruction, so check the opcode of the matched op.
if (AddI->getOpcode() == Instruction::Add &&
- OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, A, B, *AddI,
+ OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, X, Y, *AddI,
Result, Overflow)) {
replaceInstUsesWith(*AddI, Result);
eraseInstFromFunction(*AddI);
@@ -6952,14 +7075,37 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
}
}
- // (zext a) * (zext b) --> llvm.umul.with.overflow.
- if (match(Op0, m_NUWMul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
- if (Instruction *R = processUMulZExtIdiom(I, Op0, Op1, *this))
+ // (zext X) * (zext Y) --> llvm.umul.with.overflow.
+ if (match(Op0, m_NUWMul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
+ match(Op1, m_APInt(C))) {
+ if (Instruction *R = processUMulZExtIdiom(I, Op0, C, *this))
return R;
}
- if (match(Op1, m_NUWMul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
- if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this))
- return R;
+
+ // Signbit test folds
+ // Fold (X u>> BitWidth - 1 Pred ZExt(i1)) --> X s< 0 Pred i1
+ // Fold (X s>> BitWidth - 1 Pred SExt(i1)) --> X s< 0 Pred i1
+ Instruction *ExtI;
+ if ((I.isUnsigned() || I.isEquality()) &&
+ match(Op1,
+ m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(Y)))) &&
+ Y->getType()->getScalarSizeInBits() == 1 &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ unsigned OpWidth = Op0->getType()->getScalarSizeInBits();
+ Instruction *ShiftI;
+ if (match(Op0, m_CombineAnd(m_Instruction(ShiftI),
+ m_Shr(m_Value(X), m_SpecificIntAllowUndef(
+ OpWidth - 1))))) {
+ unsigned ExtOpc = ExtI->getOpcode();
+ unsigned ShiftOpc = ShiftI->getOpcode();
+ if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) ||
+ (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) {
+ Value *SLTZero =
+ Builder.CreateICmpSLT(X, Constant::getNullValue(X->getType()));
+ Value *Cmp = Builder.CreateICmp(Pred, SLTZero, Y, I.getName());
+ return replaceInstUsesWith(I, Cmp);
+ }
+ }
}
}
@@ -7177,17 +7323,14 @@ Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I,
}
// Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
- // [0, UMAX], but it may still be fractional. See if it is fractional by
- // casting the FP value to the integer value and back, checking for equality.
+ // [0, UMAX], but it may still be fractional. Check whether this is the case
+ // using the IsExact flag.
// Don't do this for zero, because -0.0 is not fractional.
- Constant *RHSInt = LHSUnsigned
- ? ConstantExpr::getFPToUI(RHSC, IntTy)
- : ConstantExpr::getFPToSI(RHSC, IntTy);
+ APSInt RHSInt(IntWidth, LHSUnsigned);
+ bool IsExact;
+ RHS.convertToInteger(RHSInt, APFloat::rmTowardZero, &IsExact);
if (!RHS.isZero()) {
- bool Equal = LHSUnsigned
- ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
- : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
- if (!Equal) {
+ if (!IsExact) {
// If we had a comparison against a fractional value, we have to adjust
// the compare predicate and sometimes the value. RHSC is rounded towards
// zero at this point.
@@ -7253,7 +7396,7 @@ Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I,
// Lower this FP comparison into an appropriate integer version of the
// comparison.
- return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+ return new ICmpInst(Pred, LHSI->getOperand(0), Builder.getInt(RHSInt));
}
/// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary.
@@ -7532,12 +7675,8 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) {
switch (LHSI->getOpcode()) {
case Instruction::PHI:
- // Only fold fcmp into the PHI if the phi and fcmp are in the same
- // block. If in the same block, we're encouraging jump threading. If
- // not, we are just pessimizing the code by making an i1 phi.
- if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
- return NV;
+ if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
+ return NV;
break;
case Instruction::SIToFP:
case Instruction::UIToFP:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 701579e1de48..1d50fa9b6bf7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -16,6 +16,7 @@
#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -73,6 +74,10 @@ public:
virtual ~InstCombinerImpl() = default;
+ /// Perform early cleanup and prepare the InstCombine worklist.
+ bool prepareWorklist(Function &F,
+ ReversePostOrderTraversal<BasicBlock *> &RPOT);
+
/// Run the combiner over the entire worklist until it is empty.
///
/// \returns true if the IR is changed.
@@ -93,6 +98,7 @@ public:
Instruction *visitSub(BinaryOperator &I);
Instruction *visitFSub(BinaryOperator &I);
Instruction *visitMul(BinaryOperator &I);
+ Instruction *foldFMulReassoc(BinaryOperator &I);
Instruction *visitFMul(BinaryOperator &I);
Instruction *visitURem(BinaryOperator &I);
Instruction *visitSRem(BinaryOperator &I);
@@ -126,7 +132,6 @@ public:
Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I);
Instruction *commonCastTransforms(CastInst &CI);
- Instruction *commonPointerCastTransforms(CastInst &CI);
Instruction *visitTrunc(TruncInst &CI);
Instruction *visitZExt(ZExtInst &Zext);
Instruction *visitSExt(SExtInst &Sext);
@@ -193,6 +198,44 @@ public:
LoadInst *combineLoadToNewType(LoadInst &LI, Type *NewTy,
const Twine &Suffix = "");
+ KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF,
+ FPClassTest Interested = fcAllFlags,
+ const Instruction *CtxI = nullptr,
+ unsigned Depth = 0) const {
+ return llvm::computeKnownFPClass(Val, FMF, DL, Interested, Depth, &TLI, &AC,
+ CtxI, &DT);
+ }
+
+ KnownFPClass computeKnownFPClass(Value *Val,
+ FPClassTest Interested = fcAllFlags,
+ const Instruction *CtxI = nullptr,
+ unsigned Depth = 0) const {
+ return llvm::computeKnownFPClass(Val, DL, Interested, Depth, &TLI, &AC,
+ CtxI, &DT);
+ }
+
+ /// Check if fmul \p MulVal, +0.0 will yield +0.0 (or signed zero is
+ /// ignorable).
+ bool fmulByZeroIsZero(Value *MulVal, FastMathFlags FMF,
+ const Instruction *CtxI) const;
+
+ Constant *getLosslessTrunc(Constant *C, Type *TruncTy, unsigned ExtOp) {
+ Constant *TruncC = ConstantExpr::getTrunc(C, TruncTy);
+ Constant *ExtTruncC =
+ ConstantFoldCastOperand(ExtOp, TruncC, C->getType(), DL);
+ if (ExtTruncC && ExtTruncC == C)
+ return TruncC;
+ return nullptr;
+ }
+
+ Constant *getLosslessUnsignedTrunc(Constant *C, Type *TruncTy) {
+ return getLosslessTrunc(C, TruncTy, Instruction::ZExt);
+ }
+
+ Constant *getLosslessSignedTrunc(Constant *C, Type *TruncTy) {
+ return getLosslessTrunc(C, TruncTy, Instruction::SExt);
+ }
+
private:
bool annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);
bool isDesirableIntType(unsigned BitWidth) const;
@@ -233,6 +276,7 @@ private:
bool transformConstExprCastCall(CallBase &Call);
Instruction *transformCallThroughTrampoline(CallBase &Call,
IntrinsicInst &Tramp);
+ Instruction *foldCommutativeIntrinsicOverSelects(IntrinsicInst &II);
Value *simplifyMaskedLoad(IntrinsicInst &II);
Instruction *simplifyMaskedStore(IntrinsicInst &II);
@@ -252,13 +296,15 @@ private:
Instruction *transformSExtICmp(ICmpInst *Cmp, SExtInst &Sext);
- bool willNotOverflowSignedAdd(const Value *LHS, const Value *RHS,
+ bool willNotOverflowSignedAdd(const WithCache<const Value *> &LHS,
+ const WithCache<const Value *> &RHS,
const Instruction &CxtI) const {
return computeOverflowForSignedAdd(LHS, RHS, &CxtI) ==
OverflowResult::NeverOverflows;
}
- bool willNotOverflowUnsignedAdd(const Value *LHS, const Value *RHS,
+ bool willNotOverflowUnsignedAdd(const WithCache<const Value *> &LHS,
+ const WithCache<const Value *> &RHS,
const Instruction &CxtI) const {
return computeOverflowForUnsignedAdd(LHS, RHS, &CxtI) ==
OverflowResult::NeverOverflows;
@@ -387,15 +433,17 @@ private:
Instruction *foldAndOrOfSelectUsingImpliedCond(Value *Op, SelectInst &SI,
bool IsAnd);
+ Instruction *hoistFNegAboveFMulFDiv(Value *FNegOp, Instruction &FMFSource);
+
public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
auto &Ctx = InsertAt->getContext();
auto *SI = new StoreInst(ConstantInt::getTrue(Ctx),
- PoisonValue::get(Type::getInt1PtrTy(Ctx)),
+ PoisonValue::get(PointerType::getUnqual(Ctx)),
/*isVolatile*/ false, Align(1));
- InsertNewInstBefore(SI, *InsertAt);
+ InsertNewInstBefore(SI, InsertAt->getIterator());
}
/// Combiner aware instruction erasure.
@@ -412,6 +460,7 @@ public:
// use counts.
SmallVector<Value *> Ops(I.operands());
Worklist.remove(&I);
+ DC.removeValue(&I);
I.eraseFromParent();
for (Value *Op : Ops)
Worklist.handleUseCountDecrement(Op);
@@ -498,6 +547,7 @@ public:
/// Tries to simplify operands to an integer instruction based on its
/// demanded bits.
bool SimplifyDemandedInstructionBits(Instruction &Inst);
+ bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts, unsigned Depth = 0,
@@ -535,6 +585,9 @@ public:
Instruction *foldAddWithConstant(BinaryOperator &Add);
+ Instruction *foldSquareSumInt(BinaryOperator &I);
+ Instruction *foldSquareSumFP(BinaryOperator &I);
+
/// Try to rotate an operation below a PHI node, using PHI nodes for
/// its operands.
Instruction *foldPHIArgOpIntoPHI(PHINode &PN);
@@ -580,6 +633,9 @@ public:
Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,
const APInt &C);
Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ);
+ Instruction *foldICmpWithMinMaxImpl(Instruction &I, MinMaxIntrinsic *MinMax,
+ Value *Z, ICmpInst::Predicate Pred);
+ Instruction *foldICmpWithMinMax(ICmpInst &Cmp);
Instruction *foldICmpEquality(ICmpInst &Cmp);
Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);
Instruction *foldSignBitTest(ICmpInst &I);
@@ -593,6 +649,8 @@ public:
ConstantInt *C);
Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc,
const APInt &C);
+ Instruction *foldICmpTruncWithTruncOrExt(ICmpInst &Cmp,
+ const SimplifyQuery &Q);
Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And,
const APInt &C);
Instruction *foldICmpXorConstant(ICmpInst &Cmp, BinaryOperator *Xor,
@@ -667,8 +725,12 @@ public:
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock);
bool removeInstructionsBeforeUnreachable(Instruction &I);
- bool handleUnreachableFrom(Instruction *I);
- bool handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc);
+ void addDeadEdge(BasicBlock *From, BasicBlock *To,
+ SmallVectorImpl<BasicBlock *> &Worklist);
+ void handleUnreachableFrom(Instruction *I,
+ SmallVectorImpl<BasicBlock *> &Worklist);
+ void handlePotentiallyDeadBlocks(SmallVectorImpl<BasicBlock *> &Worklist);
+ void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc);
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser = nullptr);
};
@@ -679,16 +741,11 @@ class Negator final {
using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;
BuilderTy Builder;
- const DataLayout &DL;
- AssumptionCache &AC;
- const DominatorTree &DT;
-
const bool IsTrulyNegation;
SmallDenseMap<Value *, Value *> NegationsCache;
- Negator(LLVMContext &C, const DataLayout &DL, AssumptionCache &AC,
- const DominatorTree &DT, bool IsTrulyNegation);
+ Negator(LLVMContext &C, const DataLayout &DL, bool IsTrulyNegation);
#if LLVM_ENABLE_STATS
unsigned NumValuesVisitedInThisNegator = 0;
@@ -700,13 +757,13 @@ class Negator final {
std::array<Value *, 2> getSortedOperandsOfBinOp(Instruction *I);
- [[nodiscard]] Value *visitImpl(Value *V, unsigned Depth);
+ [[nodiscard]] Value *visitImpl(Value *V, bool IsNSW, unsigned Depth);
- [[nodiscard]] Value *negate(Value *V, unsigned Depth);
+ [[nodiscard]] Value *negate(Value *V, bool IsNSW, unsigned Depth);
/// Recurse depth-first and attempt to sink the negation.
/// FIXME: use worklist?
- [[nodiscard]] std::optional<Result> run(Value *Root);
+ [[nodiscard]] std::optional<Result> run(Value *Root, bool IsNSW);
Negator(const Negator &) = delete;
Negator(Negator &&) = delete;
@@ -716,7 +773,7 @@ class Negator final {
public:
/// Attempt to negate \p Root. Retuns nullptr if negation can't be performed,
/// otherwise returns negated value.
- [[nodiscard]] static Value *Negate(bool LHSIsZero, Value *Root,
+ [[nodiscard]] static Value *Negate(bool LHSIsZero, bool IsNSW, Value *Root,
InstCombinerImpl &IC);
};
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6aa20ee26b9a..bb2a77daa60a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -36,6 +36,13 @@ static cl::opt<unsigned> MaxCopiedFromConstantUsers(
cl::desc("Maximum users to visit in copy from constant transform"),
cl::Hidden);
+namespace llvm {
+cl::opt<bool> EnableInferAlignmentPass(
+ "enable-infer-alignment-pass", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Enable the InferAlignment pass, disabling alignment inference in "
+ "InstCombine"));
+}
+
/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
/// pointer to an alloca. Ignore any reads of the pointer, return false if we
/// see any stores or other unknown uses. If we see pointer arithmetic, keep
@@ -206,29 +213,10 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
nullptr, AI.getName());
New->setAlignment(AI.getAlign());
+ New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
replaceAllDbgUsesWith(AI, *New, *New, DT);
-
- // Scan to the end of the allocation instructions, to skip over a block of
- // allocas if possible...also skip interleaved debug info
- //
- BasicBlock::iterator It(New);
- while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
- ++It;
-
- // Now that I is pointing to the first non-allocation-inst in the block,
- // insert our getelementptr instruction...
- //
- Type *IdxTy = IC.getDataLayout().getIndexType(AI.getType());
- Value *NullIdx = Constant::getNullValue(IdxTy);
- Value *Idx[2] = {NullIdx, NullIdx};
- Instruction *GEP = GetElementPtrInst::CreateInBounds(
- NewTy, New, Idx, New->getName() + ".sub");
- IC.InsertNewInstBefore(GEP, *It);
-
- // Now make everything use the getelementptr instead of the original
- // allocation.
- return IC.replaceInstUsesWith(AI, GEP);
+ return IC.replaceInstUsesWith(AI, New);
}
}
@@ -380,7 +368,7 @@ void PointerReplacer::replace(Instruction *I) {
NewI->takeName(LT);
copyMetadataForLoad(*NewI, *LT);
- IC.InsertNewInstWith(NewI, *LT);
+ IC.InsertNewInstWith(NewI, LT->getIterator());
IC.replaceInstUsesWith(*LT, NewI);
WorkMap[LT] = NewI;
} else if (auto *PHI = dyn_cast<PHINode>(I)) {
@@ -398,7 +386,7 @@ void PointerReplacer::replace(Instruction *I) {
Indices.append(GEP->idx_begin(), GEP->idx_end());
auto *NewI =
GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
- IC.InsertNewInstWith(NewI, *GEP);
+ IC.InsertNewInstWith(NewI, GEP->getIterator());
NewI->takeName(GEP);
WorkMap[GEP] = NewI;
} else if (auto *BC = dyn_cast<BitCastInst>(I)) {
@@ -407,14 +395,14 @@ void PointerReplacer::replace(Instruction *I) {
auto *NewT = PointerType::get(BC->getType()->getContext(),
V->getType()->getPointerAddressSpace());
auto *NewI = new BitCastInst(V, NewT);
- IC.InsertNewInstWith(NewI, *BC);
+ IC.InsertNewInstWith(NewI, BC->getIterator());
NewI->takeName(BC);
WorkMap[BC] = NewI;
} else if (auto *SI = dyn_cast<SelectInst>(I)) {
auto *NewSI = SelectInst::Create(
SI->getCondition(), getReplacement(SI->getTrueValue()),
getReplacement(SI->getFalseValue()), SI->getName(), nullptr, SI);
- IC.InsertNewInstWith(NewSI, *SI);
+ IC.InsertNewInstWith(NewSI, SI->getIterator());
NewSI->takeName(SI);
WorkMap[SI] = NewSI;
} else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
@@ -449,7 +437,7 @@ void PointerReplacer::replace(Instruction *I) {
ASC->getType()->getPointerAddressSpace()) {
auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
NewI->takeName(ASC);
- IC.InsertNewInstWith(NewI, *ASC);
+ IC.InsertNewInstWith(NewI, ASC->getIterator());
NewV = NewI;
}
IC.replaceInstUsesWith(*ASC, NewV);
@@ -507,8 +495,6 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
// types.
const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
EntryAI->setAlignment(MaxAlign);
- if (AI.getType() != EntryAI->getType())
- return new BitCastInst(EntryAI, AI.getType());
return replaceInstUsesWith(AI, EntryAI);
}
}
@@ -534,13 +520,11 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
- auto *DestTy = PointerType::get(AI.getAllocatedType(), SrcAddrSpace);
if (AI.getAddressSpace() == SrcAddrSpace) {
for (Instruction *Delete : ToDelete)
eraseInstFromFunction(*Delete);
- Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
- Instruction *NewI = replaceInstUsesWith(AI, Cast);
+ Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
eraseInstFromFunction(*Copy);
++NumGlobalCopies;
return NewI;
@@ -551,8 +535,7 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
for (Instruction *Delete : ToDelete)
eraseInstFromFunction(*Delete);
- Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
- PtrReplacer.replacePointer(Cast);
+ PtrReplacer.replacePointer(TheSrc);
++NumGlobalCopies;
}
}
@@ -582,16 +565,9 @@ LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy,
assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
"can't fold an atomic load to requested type");
- Value *Ptr = LI.getPointerOperand();
- unsigned AS = LI.getPointerAddressSpace();
- Type *NewPtrTy = NewTy->getPointerTo(AS);
- Value *NewPtr = nullptr;
- if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
- NewPtr->getType() == NewPtrTy))
- NewPtr = Builder.CreateBitCast(Ptr, NewPtrTy);
-
- LoadInst *NewLoad = Builder.CreateAlignedLoad(
- NewTy, NewPtr, LI.getAlign(), LI.isVolatile(), LI.getName() + Suffix);
+ LoadInst *NewLoad =
+ Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(),
+ LI.isVolatile(), LI.getName() + Suffix);
NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
copyMetadataForLoad(*NewLoad, LI);
return NewLoad;
@@ -606,13 +582,11 @@ static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,
"can't fold an atomic store of requested type");
Value *Ptr = SI.getPointerOperand();
- unsigned AS = SI.getPointerAddressSpace();
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
SI.getAllMetadata(MD);
- StoreInst *NewStore = IC.Builder.CreateAlignedStore(
- V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
- SI.getAlign(), SI.isVolatile());
+ StoreInst *NewStore =
+ IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
for (const auto &MDPair : MD) {
unsigned ID = MDPair.first;
@@ -655,29 +629,6 @@ static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,
return NewStore;
}
-/// Returns true if instruction represent minmax pattern like:
-/// select ((cmp load V1, load V2), V1, V2).
-static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) {
- assert(V->getType()->isPointerTy() && "Expected pointer type.");
- // Ignore possible ty* to ixx* bitcast.
- V = InstCombiner::peekThroughBitcast(V);
- // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
- // pattern.
- CmpInst::Predicate Pred;
- Instruction *L1;
- Instruction *L2;
- Value *LHS;
- Value *RHS;
- if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
- m_Value(LHS), m_Value(RHS))))
- return false;
- LoadTy = L1->getType();
- return (match(L1, m_Load(m_Specific(LHS))) &&
- match(L2, m_Load(m_Specific(RHS)))) ||
- (match(L1, m_Load(m_Specific(RHS))) &&
- match(L2, m_Load(m_Specific(LHS))));
-}
-
/// Combine loads to match the type of their uses' value after looking
/// through intervening bitcasts.
///
@@ -818,7 +769,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
return nullptr;
const DataLayout &DL = IC.getDataLayout();
- auto EltSize = DL.getTypeAllocSize(ET);
+ TypeSize EltSize = DL.getTypeAllocSize(ET);
const auto Align = LI.getAlign();
auto *Addr = LI.getPointerOperand();
@@ -826,7 +777,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
auto *Zero = ConstantInt::get(IdxType, 0);
Value *V = PoisonValue::get(T);
- uint64_t Offset = 0;
+ TypeSize Offset = TypeSize::get(0, ET->isScalableTy());
for (uint64_t i = 0; i < NumElements; i++) {
Value *Indices[2] = {
Zero,
@@ -834,9 +785,9 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
};
auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
Name + ".elt");
+ auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
- commonAlignment(Align, Offset),
- Name + ".unpack");
+ EltAlign, Name + ".unpack");
L->setAAMetadata(LI.getAAMetadata());
V = IC.Builder.CreateInsertValue(V, L, i);
Offset += EltSize;
@@ -971,7 +922,7 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
Type *SourceElementType = GEPI->getSourceElementType();
// Size information about scalable vectors is not available, so we cannot
// deduce whether indexing at n is undefined behaviour or not. Bail out.
- if (isa<ScalableVectorType>(SourceElementType))
+ if (SourceElementType->isScalableTy())
return false;
Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
@@ -1020,7 +971,7 @@ static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
Instruction *NewGEPI = GEPI->clone();
NewGEPI->setOperand(Idx,
ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
- IC.InsertNewInstBefore(NewGEPI, *GEPI);
+ IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
return NewGEPI;
}
}
@@ -1062,11 +1013,13 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
if (Instruction *Res = combineLoadToOperationType(*this, LI))
return Res;
- // Attempt to improve the alignment.
- Align KnownAlign = getOrEnforceKnownAlignment(
- Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);
- if (KnownAlign > LI.getAlign())
- LI.setAlignment(KnownAlign);
+ if (!EnableInferAlignmentPass) {
+ // Attempt to improve the alignment.
+ Align KnownAlign = getOrEnforceKnownAlignment(
+ Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);
+ if (KnownAlign > LI.getAlign())
+ LI.setAlignment(KnownAlign);
+ }
// Replace GEP indices if possible.
if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
@@ -1337,7 +1290,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
return false;
const DataLayout &DL = IC.getDataLayout();
- auto EltSize = DL.getTypeAllocSize(AT->getElementType());
+ TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
const auto Align = SI.getAlign();
SmallString<16> EltName = V->getName();
@@ -1349,7 +1302,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *IdxType = Type::getInt64Ty(T->getContext());
auto *Zero = ConstantInt::get(IdxType, 0);
- uint64_t Offset = 0;
+ TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy());
for (uint64_t i = 0; i < NumElements; i++) {
Value *Indices[2] = {
Zero,
@@ -1358,7 +1311,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *Ptr =
IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
- auto EltAlign = commonAlignment(Align, Offset);
+ auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
NS->setAAMetadata(SI.getAAMetadata());
Offset += EltSize;
@@ -1399,58 +1352,6 @@ static bool equivalentAddressValues(Value *A, Value *B) {
return false;
}
-/// Converts store (bitcast (load (bitcast (select ...)))) to
-/// store (load (select ...)), where select is minmax:
-/// select ((cmp load V1, load V2), V1, V2).
-static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC,
- StoreInst &SI) {
- // bitcast?
- if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
- return false;
- // load? integer?
- Value *LoadAddr;
- if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
- return false;
- auto *LI = cast<LoadInst>(SI.getValueOperand());
- if (!LI->getType()->isIntegerTy())
- return false;
- Type *CmpLoadTy;
- if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy))
- return false;
-
- // Make sure the type would actually change.
- // This condition can be hit with chains of bitcasts.
- if (LI->getType() == CmpLoadTy)
- return false;
-
- // Make sure we're not changing the size of the load/store.
- const auto &DL = IC.getDataLayout();
- if (DL.getTypeStoreSizeInBits(LI->getType()) !=
- DL.getTypeStoreSizeInBits(CmpLoadTy))
- return false;
-
- if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
- auto *SI = dyn_cast<StoreInst>(U);
- return SI && SI->getPointerOperand() != LI &&
- InstCombiner::peekThroughBitcast(SI->getPointerOperand()) !=
- LoadAddr &&
- !SI->getPointerOperand()->isSwiftError();
- }))
- return false;
-
- IC.Builder.SetInsertPoint(LI);
- LoadInst *NewLI = IC.combineLoadToNewType(*LI, CmpLoadTy);
- // Replace all the stores with stores of the newly loaded value.
- for (auto *UI : LI->users()) {
- auto *USI = cast<StoreInst>(UI);
- IC.Builder.SetInsertPoint(USI);
- combineStoreToNewValue(IC, *USI, NewLI);
- }
- IC.replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
- IC.eraseInstFromFunction(*LI);
- return true;
-}
-
Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
Value *Val = SI.getOperand(0);
Value *Ptr = SI.getOperand(1);
@@ -1459,19 +1360,18 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
if (combineStoreToValueType(*this, SI))
return eraseInstFromFunction(SI);
- // Attempt to improve the alignment.
- const Align KnownAlign = getOrEnforceKnownAlignment(
- Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);
- if (KnownAlign > SI.getAlign())
- SI.setAlignment(KnownAlign);
+ if (!EnableInferAlignmentPass) {
+ // Attempt to improve the alignment.
+ const Align KnownAlign = getOrEnforceKnownAlignment(
+ Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);
+ if (KnownAlign > SI.getAlign())
+ SI.setAlignment(KnownAlign);
+ }
// Try to canonicalize the stored type.
if (unpackStoreToAggregate(*this, SI))
return eraseInstFromFunction(SI);
- if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
- return eraseInstFromFunction(SI);
-
// Replace GEP indices if possible.
if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
return replaceOperand(SI, 1, NewGEPI);
@@ -1508,8 +1408,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
--BBI;
// Don't count debug info directives, lest they affect codegen,
// and we skip pointer-to-pointer bitcasts, which are NOPs.
- if (BBI->isDebugOrPseudoInst() ||
- (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ if (BBI->isDebugOrPseudoInst()) {
ScanInsts++;
continue;
}
@@ -1560,11 +1459,15 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
// This is a non-terminator unreachable marker. Don't remove it.
if (isa<UndefValue>(Ptr)) {
- // Remove all instructions after the marker and guaranteed-to-transfer
- // instructions before the marker.
- if (handleUnreachableFrom(SI.getNextNode()) ||
- removeInstructionsBeforeUnreachable(SI))
+ // Remove guaranteed-to-transfer instructions before the marker.
+ if (removeInstructionsBeforeUnreachable(SI))
return &SI;
+
+ // Remove all instructions after the marker and handle dead blocks this
+ // implies.
+ SmallVector<BasicBlock *> Worklist;
+ handleUnreachableFrom(SI.getNextNode(), Worklist);
+ handlePotentiallyDeadBlocks(Worklist);
return nullptr;
}
@@ -1626,8 +1529,7 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
if (OtherBr->isUnconditional()) {
--BBI;
// Skip over debugging info and pseudo probes.
- while (BBI->isDebugOrPseudoInst() ||
- (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ while (BBI->isDebugOrPseudoInst()) {
if (BBI==OtherBB->begin())
return false;
--BBI;
@@ -1681,7 +1583,7 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
Builder.SetInsertPoint(OtherStore);
PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
OtherBB);
- MergedVal = InsertNewInstBefore(PN, DestBB->front());
+ MergedVal = InsertNewInstBefore(PN, DestBB->begin());
PN->setDebugLoc(MergedLoc);
}
@@ -1690,7 +1592,7 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
StoreInst *NewSI =
new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
SI.getOrdering(), SI.getSyncScopeID());
- InsertNewInstBefore(NewSI, *BBI);
+ InsertNewInstBefore(NewSI, BBI);
NewSI->setDebugLoc(MergedLoc);
NewSI->mergeDIAssignID({&SI, OtherStore});
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 50458e2773e6..e5566578869d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -258,9 +258,14 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {
// Interpret X * (-1<<C) as (-X) * (1<<C) and try to sink the negation.
// The "* (1<<C)" thus becomes a potential shifting opportunity.
- if (Value *NegOp0 = Negator::Negate(/*IsNegation*/ true, Op0, *this))
- return BinaryOperator::CreateMul(
- NegOp0, ConstantExpr::getNeg(cast<Constant>(Op1)), I.getName());
+ if (Value *NegOp0 =
+ Negator::Negate(/*IsNegation*/ true, HasNSW, Op0, *this)) {
+ auto *Op1C = cast<Constant>(Op1);
+ return replaceInstUsesWith(
+ I, Builder.CreateMul(NegOp0, ConstantExpr::getNeg(Op1C), "",
+ /* HasNUW */ false,
+ HasNSW && Op1C->isNotMinSignedValue()));
+ }
// Try to convert multiply of extended operand to narrow negate and shift
// for better analysis.
@@ -295,9 +300,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
// Canonicalize (X|C1)*MulC -> X*MulC+C1*MulC.
Value *X;
Constant *C1;
- if ((match(Op0, m_OneUse(m_Add(m_Value(X), m_ImmConstant(C1))))) ||
- (match(Op0, m_OneUse(m_Or(m_Value(X), m_ImmConstant(C1)))) &&
- haveNoCommonBitsSet(X, C1, DL, &AC, &I, &DT))) {
+ if (match(Op0, m_OneUse(m_AddLike(m_Value(X), m_ImmConstant(C1))))) {
// C1*MulC simplifies to a tidier constant.
Value *NewC = Builder.CreateMul(C1, MulC);
auto *BOp0 = cast<BinaryOperator>(Op0);
@@ -555,6 +558,180 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {
return nullptr;
}
+Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *X, *Y;
+ Constant *C;
+
+ // Reassociate constant RHS with another constant to form constant
+ // expression.
+ if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {
+ Constant *C1;
+ if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
+ // (C1 / X) * C --> (C * C1) / X
+ Constant *CC1 =
+ ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);
+ if (CC1 && CC1->isNormalFP())
+ return BinaryOperator::CreateFDivFMF(CC1, X, &I);
+ }
+ if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
+ // (X / C1) * C --> X * (C / C1)
+ Constant *CDivC1 =
+ ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);
+ if (CDivC1 && CDivC1->isNormalFP())
+ return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
+
+ // If the constant was a denormal, try reassociating differently.
+ // (X / C1) * C --> X / (C1 / C)
+ Constant *C1DivC =
+ ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);
+ if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())
+ return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
+ }
+
+ // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
+ // canonicalized to 'fadd X, C'. Distributing the multiply may allow
+ // further folds and (X * C) + C2 is 'fma'.
+ if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
+ // (X + C1) * C --> (X * C) + (C * C1)
+ if (Constant *CC1 =
+ ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
+ Value *XC = Builder.CreateFMulFMF(X, C, &I);
+ return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
+ }
+ }
+ if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
+ // (C1 - X) * C --> (C * C1) - (X * C)
+ if (Constant *CC1 =
+ ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
+ Value *XC = Builder.CreateFMulFMF(X, C, &I);
+ return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+ }
+ }
+ }
+
+ Value *Z;
+ if (match(&I,
+ m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))), m_Value(Z)))) {
+ // Sink division: (X / Y) * Z --> (X * Z) / Y
+ Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);
+ return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
+ }
+
+ // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
+ // nnan disallows the possibility of returning a number if both operands are
+ // negative (in that case, we should return NaN).
+ if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) &&
+ match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) {
+ Value *XY = Builder.CreateFMulFMF(X, Y, &I);
+ Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I);
+ return replaceInstUsesWith(I, Sqrt);
+ }
+
+ // The following transforms are done irrespective of the number of uses
+ // for the expression "1.0/sqrt(X)".
+ // 1) 1.0/sqrt(X) * X -> X/sqrt(X)
+ // 2) X * 1.0/sqrt(X) -> X/sqrt(X)
+ // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it
+ // has the necessary (reassoc) fast-math-flags.
+ if (I.hasNoSignedZeros() &&
+ match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
+ match(Y, m_Sqrt(m_Value(X))) && Op1 == X)
+ return BinaryOperator::CreateFDivFMF(X, Y, &I);
+ if (I.hasNoSignedZeros() &&
+ match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
+ match(Y, m_Sqrt(m_Value(X))) && Op0 == X)
+ return BinaryOperator::CreateFDivFMF(X, Y, &I);
+
+ // Like the similar transform in instsimplify, this requires 'nsz' because
+ // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
+ if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 && Op0->hasNUses(2)) {
+ // Peek through fdiv to find squaring of square root:
+ // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
+ if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) {
+ Value *XX = Builder.CreateFMulFMF(X, X, &I);
+ return BinaryOperator::CreateFDivFMF(XX, Y, &I);
+ }
+ // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
+ if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) {
+ Value *XX = Builder.CreateFMulFMF(X, X, &I);
+ return BinaryOperator::CreateFDivFMF(Y, XX, &I);
+ }
+ }
+
+ // pow(X, Y) * X --> pow(X, Y+1)
+ // X * pow(X, Y) --> pow(X, Y+1)
+ if (match(&I, m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X),
+ m_Value(Y))),
+ m_Deferred(X)))) {
+ Value *Y1 = Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), 1.0), &I);
+ Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, Y1, &I);
+ return replaceInstUsesWith(I, Pow);
+ }
+
+ if (I.isOnlyUserOfAnyOperand()) {
+ // pow(X, Y) * pow(X, Z) -> pow(X, Y + Z)
+ if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&
+ match(Op1, m_Intrinsic<Intrinsic::pow>(m_Specific(X), m_Value(Z)))) {
+ auto *YZ = Builder.CreateFAddFMF(Y, Z, &I);
+ auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, YZ, &I);
+ return replaceInstUsesWith(I, NewPow);
+ }
+ // pow(X, Y) * pow(Z, Y) -> pow(X * Z, Y)
+ if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&
+ match(Op1, m_Intrinsic<Intrinsic::pow>(m_Value(Z), m_Specific(Y)))) {
+ auto *XZ = Builder.CreateFMulFMF(X, Z, &I);
+ auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, XZ, Y, &I);
+ return replaceInstUsesWith(I, NewPow);
+ }
+
+ // powi(x, y) * powi(x, z) -> powi(x, y + z)
+ if (match(Op0, m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y))) &&
+ match(Op1, m_Intrinsic<Intrinsic::powi>(m_Specific(X), m_Value(Z))) &&
+ Y->getType() == Z->getType()) {
+ auto *YZ = Builder.CreateAdd(Y, Z);
+ auto *NewPow = Builder.CreateIntrinsic(
+ Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
+ return replaceInstUsesWith(I, NewPow);
+ }
+
+ // exp(X) * exp(Y) -> exp(X + Y)
+ if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
+ match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) {
+ Value *XY = Builder.CreateFAddFMF(X, Y, &I);
+ Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I);
+ return replaceInstUsesWith(I, Exp);
+ }
+
+ // exp2(X) * exp2(Y) -> exp2(X + Y)
+ if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) &&
+ match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y)))) {
+ Value *XY = Builder.CreateFAddFMF(X, Y, &I);
+ Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I);
+ return replaceInstUsesWith(I, Exp2);
+ }
+ }
+
+ // (X*Y) * X => (X*X) * Y where Y != X
+ // The purpose is two-fold:
+ // 1) to form a power expression (of X).
+ // 2) potentially shorten the critical path: After transformation, the
+ // latency of the instruction Y is amortized by the expression of X*X,
+ // and therefore Y is in a "less critical" position compared to what it
+ // was before the transformation.
+ if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) && Op1 != Y) {
+ Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);
+ return BinaryOperator::CreateFMulFMF(XX, Y, &I);
+ }
+ if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) && Op0 != Y) {
+ Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);
+ return BinaryOperator::CreateFMulFMF(XX, Y, &I);
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
if (Value *V = simplifyFMulInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
@@ -602,176 +779,9 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
return replaceInstUsesWith(I, V);
- if (I.hasAllowReassoc()) {
- // Reassociate constant RHS with another constant to form constant
- // expression.
- if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {
- Constant *C1;
- if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
- // (C1 / X) * C --> (C * C1) / X
- Constant *CC1 =
- ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);
- if (CC1 && CC1->isNormalFP())
- return BinaryOperator::CreateFDivFMF(CC1, X, &I);
- }
- if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
- // (X / C1) * C --> X * (C / C1)
- Constant *CDivC1 =
- ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);
- if (CDivC1 && CDivC1->isNormalFP())
- return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
-
- // If the constant was a denormal, try reassociating differently.
- // (X / C1) * C --> X / (C1 / C)
- Constant *C1DivC =
- ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);
- if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())
- return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
- }
-
- // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
- // canonicalized to 'fadd X, C'. Distributing the multiply may allow
- // further folds and (X * C) + C2 is 'fma'.
- if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
- // (X + C1) * C --> (X * C) + (C * C1)
- if (Constant *CC1 = ConstantFoldBinaryOpOperands(
- Instruction::FMul, C, C1, DL)) {
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
- }
- }
- if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
- // (C1 - X) * C --> (C * C1) - (X * C)
- if (Constant *CC1 = ConstantFoldBinaryOpOperands(
- Instruction::FMul, C, C1, DL)) {
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
- }
- }
- }
-
- Value *Z;
- if (match(&I, m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))),
- m_Value(Z)))) {
- // Sink division: (X / Y) * Z --> (X * Z) / Y
- Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);
- return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
- }
-
- // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
- // nnan disallows the possibility of returning a number if both operands are
- // negative (in that case, we should return NaN).
- if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) &&
- match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) {
- Value *XY = Builder.CreateFMulFMF(X, Y, &I);
- Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I);
- return replaceInstUsesWith(I, Sqrt);
- }
-
- // The following transforms are done irrespective of the number of uses
- // for the expression "1.0/sqrt(X)".
- // 1) 1.0/sqrt(X) * X -> X/sqrt(X)
- // 2) X * 1.0/sqrt(X) -> X/sqrt(X)
- // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it
- // has the necessary (reassoc) fast-math-flags.
- if (I.hasNoSignedZeros() &&
- match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
- match(Y, m_Sqrt(m_Value(X))) && Op1 == X)
- return BinaryOperator::CreateFDivFMF(X, Y, &I);
- if (I.hasNoSignedZeros() &&
- match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
- match(Y, m_Sqrt(m_Value(X))) && Op0 == X)
- return BinaryOperator::CreateFDivFMF(X, Y, &I);
-
- // Like the similar transform in instsimplify, this requires 'nsz' because
- // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
- if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 &&
- Op0->hasNUses(2)) {
- // Peek through fdiv to find squaring of square root:
- // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
- if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) {
- Value *XX = Builder.CreateFMulFMF(X, X, &I);
- return BinaryOperator::CreateFDivFMF(XX, Y, &I);
- }
- // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
- if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) {
- Value *XX = Builder.CreateFMulFMF(X, X, &I);
- return BinaryOperator::CreateFDivFMF(Y, XX, &I);
- }
- }
-
- // pow(X, Y) * X --> pow(X, Y+1)
- // X * pow(X, Y) --> pow(X, Y+1)
- if (match(&I, m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X),
- m_Value(Y))),
- m_Deferred(X)))) {
- Value *Y1 =
- Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), 1.0), &I);
- Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, Y1, &I);
- return replaceInstUsesWith(I, Pow);
- }
-
- if (I.isOnlyUserOfAnyOperand()) {
- // pow(X, Y) * pow(X, Z) -> pow(X, Y + Z)
- if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&
- match(Op1, m_Intrinsic<Intrinsic::pow>(m_Specific(X), m_Value(Z)))) {
- auto *YZ = Builder.CreateFAddFMF(Y, Z, &I);
- auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, YZ, &I);
- return replaceInstUsesWith(I, NewPow);
- }
- // pow(X, Y) * pow(Z, Y) -> pow(X * Z, Y)
- if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&
- match(Op1, m_Intrinsic<Intrinsic::pow>(m_Value(Z), m_Specific(Y)))) {
- auto *XZ = Builder.CreateFMulFMF(X, Z, &I);
- auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, XZ, Y, &I);
- return replaceInstUsesWith(I, NewPow);
- }
-
- // powi(x, y) * powi(x, z) -> powi(x, y + z)
- if (match(Op0, m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y))) &&
- match(Op1, m_Intrinsic<Intrinsic::powi>(m_Specific(X), m_Value(Z))) &&
- Y->getType() == Z->getType()) {
- auto *YZ = Builder.CreateAdd(Y, Z);
- auto *NewPow = Builder.CreateIntrinsic(
- Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
- return replaceInstUsesWith(I, NewPow);
- }
-
- // exp(X) * exp(Y) -> exp(X + Y)
- if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
- match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) {
- Value *XY = Builder.CreateFAddFMF(X, Y, &I);
- Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I);
- return replaceInstUsesWith(I, Exp);
- }
-
- // exp2(X) * exp2(Y) -> exp2(X + Y)
- if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) &&
- match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y)))) {
- Value *XY = Builder.CreateFAddFMF(X, Y, &I);
- Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I);
- return replaceInstUsesWith(I, Exp2);
- }
- }
-
- // (X*Y) * X => (X*X) * Y where Y != X
- // The purpose is two-fold:
- // 1) to form a power expression (of X).
- // 2) potentially shorten the critical path: After transformation, the
- // latency of the instruction Y is amortized by the expression of X*X,
- // and therefore Y is in a "less critical" position compared to what it
- // was before the transformation.
- if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) &&
- Op1 != Y) {
- Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);
- return BinaryOperator::CreateFMulFMF(XX, Y, &I);
- }
- if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) &&
- Op0 != Y) {
- Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);
- return BinaryOperator::CreateFMulFMF(XX, Y, &I);
- }
- }
+ if (I.hasAllowReassoc())
+ if (Instruction *FoldedMul = foldFMulReassoc(I))
+ return FoldedMul;
// log2(X * 0.5) * Y = log2(X) * Y - Y
if (I.isFast()) {
@@ -802,7 +812,7 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
I.hasNoSignedZeros() && match(Start, m_Zero()))
return replaceInstUsesWith(I, Start);
- // minimun(X, Y) * maximum(X, Y) => X * Y.
+ // minimum(X, Y) * maximum(X, Y) => X * Y.
if (match(&I,
m_c_FMul(m_Intrinsic<Intrinsic::maximum>(m_Value(X), m_Value(Y)),
m_c_Intrinsic<Intrinsic::minimum>(m_Deferred(X),
@@ -918,8 +928,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
return Remainder.isMinValue();
}
-static Instruction *foldIDivShl(BinaryOperator &I,
- InstCombiner::BuilderTy &Builder) {
+static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) {
assert((I.getOpcode() == Instruction::SDiv ||
I.getOpcode() == Instruction::UDiv) &&
"Expected integer divide");
@@ -928,7 +937,6 @@ static Instruction *foldIDivShl(BinaryOperator &I,
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
- Instruction *Ret = nullptr;
Value *X, *Y, *Z;
// With appropriate no-wrap constraints, remove a common factor in the
@@ -943,12 +951,12 @@ static Instruction *foldIDivShl(BinaryOperator &I,
// (X * Y) u/ (X << Z) --> Y u>> Z
if (!IsSigned && HasNUW)
- Ret = BinaryOperator::CreateLShr(Y, Z);
+ return Builder.CreateLShr(Y, Z, "", I.isExact());
// (X * Y) s/ (X << Z) --> Y s/ (1 << Z)
if (IsSigned && HasNSW && (Op0->hasOneUse() || Op1->hasOneUse())) {
Value *Shl = Builder.CreateShl(ConstantInt::get(Ty, 1), Z);
- Ret = BinaryOperator::CreateSDiv(Y, Shl);
+ return Builder.CreateSDiv(Y, Shl, "", I.isExact());
}
}
@@ -966,20 +974,38 @@ static Instruction *foldIDivShl(BinaryOperator &I,
((Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) ||
(Shl0->hasNoUnsignedWrap() && Shl0->hasNoSignedWrap() &&
Shl1->hasNoSignedWrap())))
- Ret = BinaryOperator::CreateUDiv(X, Y);
+ return Builder.CreateUDiv(X, Y, "", I.isExact());
// For signed div, we need 'nsw' on both shifts + 'nuw' on the divisor.
// (X << Z) / (Y << Z) --> X / Y
if (IsSigned && Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap() &&
Shl1->hasNoUnsignedWrap())
- Ret = BinaryOperator::CreateSDiv(X, Y);
+ return Builder.CreateSDiv(X, Y, "", I.isExact());
}
- if (!Ret)
- return nullptr;
+ // If X << Y and X << Z does not overflow, then:
+ // (X << Y) / (X << Z) -> (1 << Y) / (1 << Z) -> 1 << Y >> Z
+ if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) &&
+ match(Op1, m_Shl(m_Specific(X), m_Value(Z)))) {
+ auto *Shl0 = cast<OverflowingBinaryOperator>(Op0);
+ auto *Shl1 = cast<OverflowingBinaryOperator>(Op1);
+
+ if (IsSigned ? (Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap())
+ : (Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap())) {
+ Constant *One = ConstantInt::get(X->getType(), 1);
+ // Only preserve the nsw flag if dividend has nsw
+ // or divisor has nsw and operator is sdiv.
+ Value *Dividend = Builder.CreateShl(
+ One, Y, "shl.dividend",
+ /*HasNUW*/ true,
+ /*HasNSW*/
+ IsSigned ? (Shl0->hasNoUnsignedWrap() || Shl1->hasNoUnsignedWrap())
+ : Shl0->hasNoSignedWrap());
+ return Builder.CreateLShr(Dividend, Z, "", I.isExact());
+ }
+ }
- Ret->setIsExact(I.isExact());
- return Ret;
+ return nullptr;
}
/// This function implements the transforms common to both integer division
@@ -1156,8 +1182,8 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
return NewDiv;
}
- if (Instruction *R = foldIDivShl(I, Builder))
- return R;
+ if (Value *R = foldIDivShl(I, Builder))
+ return replaceInstUsesWith(I, R);
// With the appropriate no-wrap constraint, remove a multiply by the divisor
// after peeking through another divide:
@@ -1179,6 +1205,38 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
}
}
+ // (X * Y) / (X * Z) --> Y / Z (and commuted variants)
+ if (match(Op0, m_Mul(m_Value(X), m_Value(Y)))) {
+ auto OB0HasNSW = cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap();
+ auto OB0HasNUW = cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap();
+
+ auto CreateDivOrNull = [&](Value *A, Value *B) -> Instruction * {
+ auto OB1HasNSW = cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap();
+ auto OB1HasNUW =
+ cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap();
+ const APInt *C1, *C2;
+ if (IsSigned && OB0HasNSW) {
+ if (OB1HasNSW && match(B, m_APInt(C1)) && !C1->isAllOnes())
+ return BinaryOperator::CreateSDiv(A, B);
+ }
+ if (!IsSigned && OB0HasNUW) {
+ if (OB1HasNUW)
+ return BinaryOperator::CreateUDiv(A, B);
+ if (match(A, m_APInt(C1)) && match(B, m_APInt(C2)) && C2->ule(*C1))
+ return BinaryOperator::CreateUDiv(A, B);
+ }
+ return nullptr;
+ };
+
+ if (match(Op1, m_c_Mul(m_Specific(X), m_Value(Z)))) {
+ if (auto *Val = CreateDivOrNull(Y, Z))
+ return Val;
+ }
+ if (match(Op1, m_c_Mul(m_Specific(Y), m_Value(Z)))) {
+ if (auto *Val = CreateDivOrNull(X, Z))
+ return Val;
+ }
+ }
return nullptr;
}
@@ -1263,7 +1321,7 @@ static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,
/// If we have zero-extended operands of an unsigned div or rem, we may be able
/// to narrow the operation (sink the zext below the math).
static Instruction *narrowUDivURem(BinaryOperator &I,
- InstCombiner::BuilderTy &Builder) {
+ InstCombinerImpl &IC) {
Instruction::BinaryOps Opcode = I.getOpcode();
Value *N = I.getOperand(0);
Value *D = I.getOperand(1);
@@ -1273,7 +1331,7 @@ static Instruction *narrowUDivURem(BinaryOperator &I,
X->getType() == Y->getType() && (N->hasOneUse() || D->hasOneUse())) {
// udiv (zext X), (zext Y) --> zext (udiv X, Y)
// urem (zext X), (zext Y) --> zext (urem X, Y)
- Value *NarrowOp = Builder.CreateBinOp(Opcode, X, Y);
+ Value *NarrowOp = IC.Builder.CreateBinOp(Opcode, X, Y);
return new ZExtInst(NarrowOp, Ty);
}
@@ -1281,24 +1339,24 @@ static Instruction *narrowUDivURem(BinaryOperator &I,
if (isa<Instruction>(N) && match(N, m_OneUse(m_ZExt(m_Value(X)))) &&
match(D, m_Constant(C))) {
// If the constant is the same in the smaller type, use the narrow version.
- Constant *TruncC = ConstantExpr::getTrunc(C, X->getType());
- if (ConstantExpr::getZExt(TruncC, Ty) != C)
+ Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType());
+ if (!TruncC)
return nullptr;
// udiv (zext X), C --> zext (udiv X, C')
// urem (zext X), C --> zext (urem X, C')
- return new ZExtInst(Builder.CreateBinOp(Opcode, X, TruncC), Ty);
+ return new ZExtInst(IC.Builder.CreateBinOp(Opcode, X, TruncC), Ty);
}
if (isa<Instruction>(D) && match(D, m_OneUse(m_ZExt(m_Value(X)))) &&
match(N, m_Constant(C))) {
// If the constant is the same in the smaller type, use the narrow version.
- Constant *TruncC = ConstantExpr::getTrunc(C, X->getType());
- if (ConstantExpr::getZExt(TruncC, Ty) != C)
+ Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType());
+ if (!TruncC)
return nullptr;
// udiv C, (zext X) --> zext (udiv C', X)
// urem C, (zext X) --> zext (urem C', X)
- return new ZExtInst(Builder.CreateBinOp(Opcode, TruncC, X), Ty);
+ return new ZExtInst(IC.Builder.CreateBinOp(Opcode, TruncC, X), Ty);
}
return nullptr;
@@ -1346,23 +1404,10 @@ Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) {
return CastInst::CreateZExtOrBitCast(Cmp, Ty);
}
- if (Instruction *NarrowDiv = narrowUDivURem(I, Builder))
+ if (Instruction *NarrowDiv = narrowUDivURem(I, *this))
return NarrowDiv;
- // If the udiv operands are non-overflowing multiplies with a common operand,
- // then eliminate the common factor:
- // (A * B) / (A * X) --> B / X (and commuted variants)
- // TODO: The code would be reduced if we had m_c_NUWMul pattern matching.
- // TODO: If -reassociation handled this generally, we could remove this.
Value *A, *B;
- if (match(Op0, m_NUWMul(m_Value(A), m_Value(B)))) {
- if (match(Op1, m_NUWMul(m_Specific(A), m_Value(X))) ||
- match(Op1, m_NUWMul(m_Value(X), m_Specific(A))))
- return BinaryOperator::CreateUDiv(B, X);
- if (match(Op1, m_NUWMul(m_Specific(B), m_Value(X))) ||
- match(Op1, m_NUWMul(m_Value(X), m_Specific(B))))
- return BinaryOperator::CreateUDiv(A, X);
- }
// Look through a right-shift to find the common factor:
// ((Op1 *nuw A) >> B) / Op1 --> A >> B
@@ -1405,7 +1450,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
// sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined)
if (match(Op1, m_AllOnes()) ||
(match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
- return BinaryOperator::CreateNeg(Op0);
+ return BinaryOperator::CreateNSWNeg(Op0);
// X / INT_MIN --> X == INT_MIN
if (match(Op1, m_SignMask()))
@@ -1428,7 +1473,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
Constant *NegPow2C = ConstantExpr::getNeg(cast<Constant>(Op1));
Constant *C = ConstantExpr::getExactLogBase2(NegPow2C);
Value *Ashr = Builder.CreateAShr(Op0, C, I.getName() + ".neg", true);
- return BinaryOperator::CreateNeg(Ashr);
+ return BinaryOperator::CreateNSWNeg(Ashr);
}
}
@@ -1490,7 +1535,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
if (KnownDividend.isNonNegative()) {
// If both operands are unsigned, turn this into a udiv.
- if (isKnownNonNegative(Op1, DL, 0, &AC, &I, &DT)) {
+ if (isKnownNonNegative(Op1, SQ.getWithInstruction(&I))) {
auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
BO->setIsExact(I.isExact());
return BO;
@@ -1516,6 +1561,13 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
}
}
+ // -X / X --> X == INT_MIN ? 1 : -1
+ if (isKnownNegation(Op0, Op1)) {
+ APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
+ Value *Cond = Builder.CreateICmpEQ(Op0, ConstantInt::get(Ty, MinVal));
+ return SelectInst::Create(Cond, ConstantInt::get(Ty, 1),
+ ConstantInt::getAllOnesValue(Ty));
+ }
return nullptr;
}
@@ -1759,6 +1811,21 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
return replaceInstUsesWith(I, Pow);
}
+ // powi(X, Y) / X --> powi(X, Y-1)
+ // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
+ // are required.
+ // TODO: Multi-use may be also better off creating Powi(x,y-1)
+ if (I.hasAllowReassoc() && I.hasNoNaNs() &&
+ match(Op0, m_OneUse(m_Intrinsic<Intrinsic::powi>(m_Specific(Op1),
+ m_Value(Y)))) &&
+ willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+ Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+ Value *Y1 = Builder.CreateAdd(Y, NegOne);
+ Type *Types[] = {Op1->getType(), Y1->getType()};
+ Value *Pow = Builder.CreateIntrinsic(Intrinsic::powi, Types, {Op1, Y1}, &I);
+ return replaceInstUsesWith(I, Pow);
+ }
+
return nullptr;
}
@@ -1936,7 +2003,7 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
if (Instruction *common = commonIRemTransforms(I))
return common;
- if (Instruction *NarrowRem = narrowUDivURem(I, Builder))
+ if (Instruction *NarrowRem = narrowUDivURem(I, *this))
return NarrowRem;
// X urem Y -> X and Y-1, where Y is a power of 2,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
index e24abc48424d..62e49469cb01 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
@@ -20,7 +20,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
@@ -44,14 +43,11 @@
#include <cassert>
#include <cstdint>
#include <functional>
-#include <tuple>
#include <type_traits>
#include <utility>
namespace llvm {
-class AssumptionCache;
class DataLayout;
-class DominatorTree;
class LLVMContext;
} // namespace llvm
@@ -98,14 +94,13 @@ static cl::opt<unsigned>
cl::desc("What is the maximal lookup depth when trying to "
"check for viability of negation sinking."));
-Negator::Negator(LLVMContext &C, const DataLayout &DL_, AssumptionCache &AC_,
- const DominatorTree &DT_, bool IsTrulyNegation_)
- : Builder(C, TargetFolder(DL_),
+Negator::Negator(LLVMContext &C, const DataLayout &DL, bool IsTrulyNegation_)
+ : Builder(C, TargetFolder(DL),
IRBuilderCallbackInserter([&](Instruction *I) {
++NegatorNumInstructionsCreatedTotal;
NewInstructions.push_back(I);
})),
- DL(DL_), AC(AC_), DT(DT_), IsTrulyNegation(IsTrulyNegation_) {}
+ IsTrulyNegation(IsTrulyNegation_) {}
#if LLVM_ENABLE_STATS
Negator::~Negator() {
@@ -128,7 +123,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
// FIXME: can this be reworked into a worklist-based algorithm while preserving
// the depth-first, early bailout traversal?
-[[nodiscard]] Value *Negator::visitImpl(Value *V, unsigned Depth) {
+[[nodiscard]] Value *Negator::visitImpl(Value *V, bool IsNSW, unsigned Depth) {
// -(undef) -> undef.
if (match(V, m_Undef()))
return V;
@@ -237,7 +232,8 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
// However, only do this either if the old `sub` doesn't stick around, or
// it was subtracting from a constant. Otherwise, this isn't profitable.
return Builder.CreateSub(I->getOperand(1), I->getOperand(0),
- I->getName() + ".neg");
+ I->getName() + ".neg", /* HasNUW */ false,
+ IsNSW && I->hasNoSignedWrap());
}
// Some other cases, while still don't require recursion,
@@ -302,7 +298,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Freeze: {
// `freeze` is negatible if its operand is negatible.
- Value *NegOp = negate(I->getOperand(0), Depth + 1);
+ Value *NegOp = negate(I->getOperand(0), IsNSW, Depth + 1);
if (!NegOp) // Early return.
return nullptr;
return Builder.CreateFreeze(NegOp, I->getName() + ".neg");
@@ -313,7 +309,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
SmallVector<Value *, 4> NegatedIncomingValues(PHI->getNumOperands());
for (auto I : zip(PHI->incoming_values(), NegatedIncomingValues)) {
if (!(std::get<1>(I) =
- negate(std::get<0>(I), Depth + 1))) // Early return.
+ negate(std::get<0>(I), IsNSW, Depth + 1))) // Early return.
return nullptr;
}
// All incoming values are indeed negatible. Create negated PHI node.
@@ -336,10 +332,10 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
return NewSelect;
}
// `select` is negatible if both hands of `select` are negatible.
- Value *NegOp1 = negate(I->getOperand(1), Depth + 1);
+ Value *NegOp1 = negate(I->getOperand(1), IsNSW, Depth + 1);
if (!NegOp1) // Early return.
return nullptr;
- Value *NegOp2 = negate(I->getOperand(2), Depth + 1);
+ Value *NegOp2 = negate(I->getOperand(2), IsNSW, Depth + 1);
if (!NegOp2)
return nullptr;
// Do preserve the metadata!
@@ -349,10 +345,10 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
case Instruction::ShuffleVector: {
// `shufflevector` is negatible if both operands are negatible.
auto *Shuf = cast<ShuffleVectorInst>(I);
- Value *NegOp0 = negate(I->getOperand(0), Depth + 1);
+ Value *NegOp0 = negate(I->getOperand(0), IsNSW, Depth + 1);
if (!NegOp0) // Early return.
return nullptr;
- Value *NegOp1 = negate(I->getOperand(1), Depth + 1);
+ Value *NegOp1 = negate(I->getOperand(1), IsNSW, Depth + 1);
if (!NegOp1)
return nullptr;
return Builder.CreateShuffleVector(NegOp0, NegOp1, Shuf->getShuffleMask(),
@@ -361,7 +357,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
case Instruction::ExtractElement: {
// `extractelement` is negatible if source operand is negatible.
auto *EEI = cast<ExtractElementInst>(I);
- Value *NegVector = negate(EEI->getVectorOperand(), Depth + 1);
+ Value *NegVector = negate(EEI->getVectorOperand(), IsNSW, Depth + 1);
if (!NegVector) // Early return.
return nullptr;
return Builder.CreateExtractElement(NegVector, EEI->getIndexOperand(),
@@ -371,10 +367,10 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
// `insertelement` is negatible if both the source vector and
// element-to-be-inserted are negatible.
auto *IEI = cast<InsertElementInst>(I);
- Value *NegVector = negate(IEI->getOperand(0), Depth + 1);
+ Value *NegVector = negate(IEI->getOperand(0), IsNSW, Depth + 1);
if (!NegVector) // Early return.
return nullptr;
- Value *NegNewElt = negate(IEI->getOperand(1), Depth + 1);
+ Value *NegNewElt = negate(IEI->getOperand(1), IsNSW, Depth + 1);
if (!NegNewElt) // Early return.
return nullptr;
return Builder.CreateInsertElement(NegVector, NegNewElt, IEI->getOperand(2),
@@ -382,15 +378,17 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
}
case Instruction::Trunc: {
// `trunc` is negatible if its operand is negatible.
- Value *NegOp = negate(I->getOperand(0), Depth + 1);
+ Value *NegOp = negate(I->getOperand(0), /* IsNSW */ false, Depth + 1);
if (!NegOp) // Early return.
return nullptr;
return Builder.CreateTrunc(NegOp, I->getType(), I->getName() + ".neg");
}
case Instruction::Shl: {
// `shl` is negatible if the first operand is negatible.
- if (Value *NegOp0 = negate(I->getOperand(0), Depth + 1))
- return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg");
+ IsNSW &= I->hasNoSignedWrap();
+ if (Value *NegOp0 = negate(I->getOperand(0), IsNSW, Depth + 1))
+ return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg",
+ /* HasNUW */ false, IsNSW);
// Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<<C`.
auto *Op1C = dyn_cast<Constant>(I->getOperand(1));
if (!Op1C || !IsTrulyNegation)
@@ -398,11 +396,10 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
return Builder.CreateMul(
I->getOperand(0),
ConstantExpr::getShl(Constant::getAllOnesValue(Op1C->getType()), Op1C),
- I->getName() + ".neg");
+ I->getName() + ".neg", /* HasNUW */ false, IsNSW);
}
case Instruction::Or: {
- if (!haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL, &AC, I,
- &DT))
+ if (!cast<PossiblyDisjointInst>(I)->isDisjoint())
return nullptr; // Don't know how to handle `or` in general.
std::array<Value *, 2> Ops = getSortedOperandsOfBinOp(I);
// `or`/`add` are interchangeable when operands have no common bits set.
@@ -417,7 +414,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
SmallVector<Value *, 2> NegatedOps, NonNegatedOps;
for (Value *Op : I->operands()) {
// Can we sink the negation into this operand?
- if (Value *NegOp = negate(Op, Depth + 1)) {
+ if (Value *NegOp = negate(Op, /* IsNSW */ false, Depth + 1)) {
NegatedOps.emplace_back(NegOp); // Successfully negated operand!
continue;
}
@@ -446,9 +443,11 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
// `xor` is negatible if one of its operands is invertible.
// FIXME: InstCombineInverter? But how to connect Inverter and Negator?
if (auto *C = dyn_cast<Constant>(Ops[1])) {
- Value *Xor = Builder.CreateXor(Ops[0], ConstantExpr::getNot(C));
- return Builder.CreateAdd(Xor, ConstantInt::get(Xor->getType(), 1),
- I->getName() + ".neg");
+ if (IsTrulyNegation) {
+ Value *Xor = Builder.CreateXor(Ops[0], ConstantExpr::getNot(C));
+ return Builder.CreateAdd(Xor, ConstantInt::get(Xor->getType(), 1),
+ I->getName() + ".neg");
+ }
}
return nullptr;
}
@@ -458,16 +457,17 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
Value *NegatedOp, *OtherOp;
// First try the second operand, in case it's a constant it will be best to
// just invert it instead of sinking the `neg` deeper.
- if (Value *NegOp1 = negate(Ops[1], Depth + 1)) {
+ if (Value *NegOp1 = negate(Ops[1], /* IsNSW */ false, Depth + 1)) {
NegatedOp = NegOp1;
OtherOp = Ops[0];
- } else if (Value *NegOp0 = negate(Ops[0], Depth + 1)) {
+ } else if (Value *NegOp0 = negate(Ops[0], /* IsNSW */ false, Depth + 1)) {
NegatedOp = NegOp0;
OtherOp = Ops[1];
} else
// Can't negate either of them.
return nullptr;
- return Builder.CreateMul(NegatedOp, OtherOp, I->getName() + ".neg");
+ return Builder.CreateMul(NegatedOp, OtherOp, I->getName() + ".neg",
+ /* HasNUW */ false, IsNSW && I->hasNoSignedWrap());
}
default:
return nullptr; // Don't know, likely not negatible for free.
@@ -476,7 +476,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
llvm_unreachable("Can't get here. We always return from switch.");
}
-[[nodiscard]] Value *Negator::negate(Value *V, unsigned Depth) {
+[[nodiscard]] Value *Negator::negate(Value *V, bool IsNSW, unsigned Depth) {
NegatorMaxDepthVisited.updateMax(Depth);
++NegatorNumValuesVisited;
@@ -506,15 +506,16 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
#endif
// No luck. Try negating it for real.
- Value *NegatedV = visitImpl(V, Depth);
+ Value *NegatedV = visitImpl(V, IsNSW, Depth);
// And cache the (real) result for the future.
NegationsCache[V] = NegatedV;
return NegatedV;
}
-[[nodiscard]] std::optional<Negator::Result> Negator::run(Value *Root) {
- Value *Negated = negate(Root, /*Depth=*/0);
+[[nodiscard]] std::optional<Negator::Result> Negator::run(Value *Root,
+ bool IsNSW) {
+ Value *Negated = negate(Root, IsNSW, /*Depth=*/0);
if (!Negated) {
// We must cleanup newly-inserted instructions, to avoid any potential
// endless combine looping.
@@ -525,7 +526,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
return std::make_pair(ArrayRef<Instruction *>(NewInstructions), Negated);
}
-[[nodiscard]] Value *Negator::Negate(bool LHSIsZero, Value *Root,
+[[nodiscard]] Value *Negator::Negate(bool LHSIsZero, bool IsNSW, Value *Root,
InstCombinerImpl &IC) {
++NegatorTotalNegationsAttempted;
LLVM_DEBUG(dbgs() << "Negator: attempting to sink negation into " << *Root
@@ -534,9 +535,8 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
if (!NegatorEnabled || !DebugCounter::shouldExecute(NegatorCounter))
return nullptr;
- Negator N(Root->getContext(), IC.getDataLayout(), IC.getAssumptionCache(),
- IC.getDominatorTree(), LHSIsZero);
- std::optional<Result> Res = N.run(Root);
+ Negator N(Root->getContext(), IC.getDataLayout(), LHSIsZero);
+ std::optional<Result> Res = N.run(Root, IsNSW);
if (!Res) { // Negation failed.
LLVM_DEBUG(dbgs() << "Negator: failed to sink negation into " << *Root
<< "\n");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 2f6aa85062a5..20b34c1379d5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -248,7 +248,7 @@ bool InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
PHINode *NewPtrPHI = PHINode::Create(
IntToPtr->getType(), PN.getNumIncomingValues(), PN.getName() + ".ptr");
- InsertNewInstBefore(NewPtrPHI, PN);
+ InsertNewInstBefore(NewPtrPHI, PN.getIterator());
SmallDenseMap<Value *, Instruction *> Casts;
for (auto Incoming : zip(PN.blocks(), AvailablePtrVals)) {
auto *IncomingBB = std::get<0>(Incoming);
@@ -285,10 +285,10 @@ bool InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
if (isa<PHINode>(IncomingI))
InsertPos = BB->getFirstInsertionPt();
assert(InsertPos != BB->end() && "should have checked above");
- InsertNewInstBefore(CI, *InsertPos);
+ InsertNewInstBefore(CI, InsertPos);
} else {
auto *InsertBB = &IncomingBB->getParent()->getEntryBlock();
- InsertNewInstBefore(CI, *InsertBB->getFirstInsertionPt());
+ InsertNewInstBefore(CI, InsertBB->getFirstInsertionPt());
}
}
NewPtrPHI->addIncoming(CI, IncomingBB);
@@ -353,7 +353,7 @@ InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) {
NewOperand->addIncoming(
cast<InsertValueInst>(std::get<1>(Incoming))->getOperand(OpIdx),
std::get<0>(Incoming));
- InsertNewInstBefore(NewOperand, PN);
+ InsertNewInstBefore(NewOperand, PN.getIterator());
}
// And finally, create `insertvalue` over the newly-formed PHI nodes.
@@ -391,7 +391,7 @@ InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) {
NewAggregateOperand->addIncoming(
cast<ExtractValueInst>(std::get<1>(Incoming))->getAggregateOperand(),
std::get<0>(Incoming));
- InsertNewInstBefore(NewAggregateOperand, PN);
+ InsertNewInstBefore(NewAggregateOperand, PN.getIterator());
// And finally, create `extractvalue` over the newly-formed PHI nodes.
auto *NewEVI = ExtractValueInst::Create(NewAggregateOperand,
@@ -450,7 +450,7 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(0)->getName() + ".pn");
NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
- InsertNewInstBefore(NewLHS, PN);
+ InsertNewInstBefore(NewLHS, PN.getIterator());
LHSVal = NewLHS;
}
@@ -458,7 +458,7 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(1)->getName() + ".pn");
NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
- InsertNewInstBefore(NewRHS, PN);
+ InsertNewInstBefore(NewRHS, PN.getIterator());
RHSVal = NewRHS;
}
@@ -581,7 +581,7 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
Value *FirstOp = FirstInst->getOperand(I);
PHINode *NewPN =
PHINode::Create(FirstOp->getType(), E, FirstOp->getName() + ".pn");
- InsertNewInstBefore(NewPN, PN);
+ InsertNewInstBefore(NewPN, PN.getIterator());
NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
OperandPhis[I] = NewPN;
@@ -769,7 +769,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
NewLI->setOperand(0, InVal);
delete NewPN;
} else {
- InsertNewInstBefore(NewPN, PN);
+ InsertNewInstBefore(NewPN, PN.getIterator());
}
// If this was a volatile load that we are merging, make sure to loop through
@@ -825,8 +825,8 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) {
NumZexts++;
} else if (auto *C = dyn_cast<Constant>(V)) {
// Make sure that constants can fit in the new type.
- Constant *Trunc = ConstantExpr::getTrunc(C, NarrowType);
- if (ConstantExpr::getZExt(Trunc, C->getType()) != C)
+ Constant *Trunc = getLosslessUnsignedTrunc(C, NarrowType);
+ if (!Trunc)
return nullptr;
NewIncoming.push_back(Trunc);
NumConsts++;
@@ -853,7 +853,7 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) {
for (unsigned I = 0; I != NumIncomingValues; ++I)
NewPhi->addIncoming(NewIncoming[I], Phi.getIncomingBlock(I));
- InsertNewInstBefore(NewPhi, Phi);
+ InsertNewInstBefore(NewPhi, Phi.getIterator());
return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());
}
@@ -943,7 +943,7 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
PhiVal = InVal;
delete NewPN;
} else {
- InsertNewInstBefore(NewPN, PN);
+ InsertNewInstBefore(NewPN, PN.getIterator());
PhiVal = NewPN;
}
@@ -996,8 +996,8 @@ static bool isDeadPHICycle(PHINode *PN,
/// Return true if this phi node is always equal to NonPhiInVal.
/// This happens with mutually cyclic phi nodes like:
/// z = some value; x = phi (y, z); y = phi (x, z)
-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
- SmallPtrSetImpl<PHINode*> &ValueEqualPHIs) {
+static bool PHIsEqualValue(PHINode *PN, Value *&NonPhiInVal,
+ SmallPtrSetImpl<PHINode *> &ValueEqualPHIs) {
// See if we already saw this PHI node.
if (!ValueEqualPHIs.insert(PN).second)
return true;
@@ -1010,8 +1010,11 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
// the value.
for (Value *Op : PN->incoming_values()) {
if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
- if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
- return false;
+ if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) {
+ if (NonPhiInVal)
+ return false;
+ NonPhiInVal = OpPN;
+ }
} else if (Op != NonPhiInVal)
return false;
}
@@ -1368,7 +1371,7 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
// sinking.
auto InsertPt = BB->getFirstInsertionPt();
if (InsertPt != BB->end()) {
- Self.Builder.SetInsertPoint(&*InsertPt);
+ Self.Builder.SetInsertPoint(&*BB, InsertPt);
return Self.Builder.CreateNot(Cond);
}
@@ -1437,22 +1440,45 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
// are induction variable analysis (sometimes) and ADCE, which is only run
// late.
if (PHIUser->hasOneUse() &&
- (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
+ (isa<BinaryOperator>(PHIUser) || isa<UnaryOperator>(PHIUser) ||
+ isa<GetElementPtrInst>(PHIUser)) &&
PHIUser->user_back() == &PN) {
return replaceInstUsesWith(PN, PoisonValue::get(PN.getType()));
}
- // When a PHI is used only to be compared with zero, it is safe to replace
- // an incoming value proved as known nonzero with any non-zero constant.
- // For example, in the code below, the incoming value %v can be replaced
- // with any non-zero constant based on the fact that the PHI is only used to
- // be compared with zero and %v is a known non-zero value:
- // %v = select %cond, 1, 2
- // %p = phi [%v, BB] ...
- // icmp eq, %p, 0
- auto *CmpInst = dyn_cast<ICmpInst>(PHIUser);
- // FIXME: To be simple, handle only integer type for now.
- if (CmpInst && isa<IntegerType>(PN.getType()) && CmpInst->isEquality() &&
- match(CmpInst->getOperand(1), m_Zero())) {
+ }
+
+ // When a PHI is used only to be compared with zero, it is safe to replace
+ // an incoming value proved as known nonzero with any non-zero constant.
+ // For example, in the code below, the incoming value %v can be replaced
+ // with any non-zero constant based on the fact that the PHI is only used to
+ // be compared with zero and %v is a known non-zero value:
+ // %v = select %cond, 1, 2
+ // %p = phi [%v, BB] ...
+ // icmp eq, %p, 0
+ // FIXME: To be simple, handle only integer type for now.
+ // This handles a small number of uses to keep the complexity down, and an
+ // icmp(or(phi)) can equally be replaced with any non-zero constant as the
+ // "or" will only add bits.
+ if (!PN.hasNUsesOrMore(3)) {
+ SmallVector<Instruction *> DropPoisonFlags;
+ bool AllUsesOfPhiEndsInCmp = all_of(PN.users(), [&](User *U) {
+ auto *CmpInst = dyn_cast<ICmpInst>(U);
+ if (!CmpInst) {
+ // This is always correct as OR only add bits and we are checking
+ // against 0.
+ if (U->hasOneUse() && match(U, m_c_Or(m_Specific(&PN), m_Value()))) {
+ DropPoisonFlags.push_back(cast<Instruction>(U));
+ CmpInst = dyn_cast<ICmpInst>(U->user_back());
+ }
+ }
+ if (!CmpInst || !isa<IntegerType>(PN.getType()) ||
+ !CmpInst->isEquality() || !match(CmpInst->getOperand(1), m_Zero())) {
+ return false;
+ }
+ return true;
+ });
+ // All uses of PHI results in a compare with zero.
+ if (AllUsesOfPhiEndsInCmp) {
ConstantInt *NonZeroConst = nullptr;
bool MadeChange = false;
for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
@@ -1461,9 +1487,11 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) {
if (!NonZeroConst)
NonZeroConst = getAnyNonZeroConstInt(PN);
-
if (NonZeroConst != VA) {
replaceOperand(PN, I, NonZeroConst);
+ // The "disjoint" flag may no longer hold after the transform.
+ for (Instruction *I : DropPoisonFlags)
+ I->dropPoisonGeneratingFlags();
MadeChange = true;
}
}
@@ -1478,7 +1506,9 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
// z = some value; x = phi (y, z); y = phi (x, z)
// where the phi nodes don't necessarily need to be in the same block. Do a
// quick check to see if the PHI node only contains a single non-phi value, if
- // so, scan to see if the phi cycle is actually equal to that value.
+ // so, scan to see if the phi cycle is actually equal to that value. If the
+ // phi has no non-phi values then allow the "NonPhiInVal" to be set later if
+ // one of the phis itself does not have a single input.
{
unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();
// Scan for the first non-phi operand.
@@ -1486,25 +1516,25 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
isa<PHINode>(PN.getIncomingValue(InValNo)))
++InValNo;
- if (InValNo != NumIncomingVals) {
- Value *NonPhiInVal = PN.getIncomingValue(InValNo);
+ Value *NonPhiInVal =
+ InValNo != NumIncomingVals ? PN.getIncomingValue(InValNo) : nullptr;
- // Scan the rest of the operands to see if there are any conflicts, if so
- // there is no need to recursively scan other phis.
+ // Scan the rest of the operands to see if there are any conflicts, if so
+ // there is no need to recursively scan other phis.
+ if (NonPhiInVal)
for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
Value *OpVal = PN.getIncomingValue(InValNo);
if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
break;
}
- // If we scanned over all operands, then we have one unique value plus
- // phi values. Scan PHI nodes to see if they all merge in each other or
- // the value.
- if (InValNo == NumIncomingVals) {
- SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
- if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
- return replaceInstUsesWith(PN, NonPhiInVal);
- }
+ // If we scanned over all operands, then we have one unique value plus
+ // phi values. Scan PHI nodes to see if they all merge in each other or
+ // the value.
+ if (InValNo == NumIncomingVals) {
+ SmallPtrSet<PHINode *, 16> ValueEqualPHIs;
+ if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+ return replaceInstUsesWith(PN, NonPhiInVal);
}
}
@@ -1512,11 +1542,12 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
// the blocks in the same order. This will help identical PHIs be eliminated
// by other passes. Other passes shouldn't depend on this for correctness
// however.
- PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
- if (&PN != FirstPN)
- for (unsigned I = 0, E = FirstPN->getNumIncomingValues(); I != E; ++I) {
+ auto Res = PredOrder.try_emplace(PN.getParent());
+ if (!Res.second) {
+ const auto &Preds = Res.first->second;
+ for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
BasicBlock *BBA = PN.getIncomingBlock(I);
- BasicBlock *BBB = FirstPN->getIncomingBlock(I);
+ BasicBlock *BBB = Preds[I];
if (BBA != BBB) {
Value *VA = PN.getIncomingValue(I);
unsigned J = PN.getBasicBlockIndex(BBB);
@@ -1531,6 +1562,10 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
// this in this case.
}
}
+ } else {
+ // Remember the block order of the first encountered phi node.
+ append_range(Res.first->second, PN.blocks());
+ }
// Is there an identical PHI node in this basic block?
for (PHINode &IdenticalPN : PN.getParent()->phis()) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 661c50062223..2dda46986f0f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -689,34 +689,40 @@ static Value *foldSelectICmpLshrAshr(const ICmpInst *IC, Value *TrueVal,
}
/// We want to turn:
-/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
+/// (select (icmp eq (and X, C1), 0), Y, (BinOp Y, C2))
/// into:
-/// (or (shl (and X, C1), C3), Y)
+/// IF C2 u>= C1
+/// (BinOp Y, (shl (and X, C1), C3))
+/// ELSE
+/// (BinOp Y, (lshr (and X, C1), C3))
/// iff:
+/// 0 on the RHS is the identity value (i.e add, xor, shl, etc...)
/// C1 and C2 are both powers of 2
/// where:
-/// C3 = Log(C2) - Log(C1)
+/// IF C2 u>= C1
+/// C3 = Log(C2) - Log(C1)
+/// ELSE
+/// C3 = Log(C1) - Log(C2)
///
/// This transform handles cases where:
/// 1. The icmp predicate is inverted
/// 2. The select operands are reversed
/// 3. The magnitude of C2 and C1 are flipped
-static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,
+static Value *foldSelectICmpAndBinOp(const ICmpInst *IC, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy &Builder) {
// Only handle integer compares. Also, if this is a vector select, we need a
// vector compare.
if (!TrueVal->getType()->isIntOrIntVectorTy() ||
- TrueVal->getType()->isVectorTy() != IC->getType()->isVectorTy())
+ TrueVal->getType()->isVectorTy() != IC->getType()->isVectorTy())
return nullptr;
Value *CmpLHS = IC->getOperand(0);
Value *CmpRHS = IC->getOperand(1);
- Value *V;
unsigned C1Log;
- bool IsEqualZero;
bool NeedAnd = false;
+ CmpInst::Predicate Pred = IC->getPredicate();
if (IC->isEquality()) {
if (!match(CmpRHS, m_Zero()))
return nullptr;
@@ -725,49 +731,49 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,
if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1))))
return nullptr;
- V = CmpLHS;
C1Log = C1->logBase2();
- IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_EQ;
- } else if (IC->getPredicate() == ICmpInst::ICMP_SLT ||
- IC->getPredicate() == ICmpInst::ICMP_SGT) {
- // We also need to recognize (icmp slt (trunc (X)), 0) and
- // (icmp sgt (trunc (X)), -1).
- IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_SGT;
- if ((IsEqualZero && !match(CmpRHS, m_AllOnes())) ||
- (!IsEqualZero && !match(CmpRHS, m_Zero())))
- return nullptr;
-
- if (!match(CmpLHS, m_OneUse(m_Trunc(m_Value(V)))))
+ } else {
+ APInt C1;
+ if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CmpLHS, C1) ||
+ !C1.isPowerOf2())
return nullptr;
- C1Log = CmpLHS->getType()->getScalarSizeInBits() - 1;
+ C1Log = C1.logBase2();
NeedAnd = true;
- } else {
- return nullptr;
}
+ Value *Y, *V = CmpLHS;
+ BinaryOperator *BinOp;
const APInt *C2;
- bool OrOnTrueVal = false;
- bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
- if (!OrOnFalseVal)
- OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
-
- if (!OrOnFalseVal && !OrOnTrueVal)
+ bool NeedXor;
+ if (match(FalseVal, m_BinOp(m_Specific(TrueVal), m_Power2(C2)))) {
+ Y = TrueVal;
+ BinOp = cast<BinaryOperator>(FalseVal);
+ NeedXor = Pred == ICmpInst::ICMP_NE;
+ } else if (match(TrueVal, m_BinOp(m_Specific(FalseVal), m_Power2(C2)))) {
+ Y = FalseVal;
+ BinOp = cast<BinaryOperator>(TrueVal);
+ NeedXor = Pred == ICmpInst::ICMP_EQ;
+ } else {
return nullptr;
+ }
- Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
+ // Check that 0 on RHS is identity value for this binop.
+ auto *IdentityC =
+ ConstantExpr::getBinOpIdentity(BinOp->getOpcode(), BinOp->getType(),
+ /*AllowRHSConstant*/ true);
+ if (IdentityC == nullptr || !IdentityC->isNullValue())
+ return nullptr;
unsigned C2Log = C2->logBase2();
- bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal);
bool NeedShift = C1Log != C2Log;
bool NeedZExtTrunc = Y->getType()->getScalarSizeInBits() !=
V->getType()->getScalarSizeInBits();
// Make sure we don't create more instructions than we save.
- Value *Or = OrOnFalseVal ? FalseVal : TrueVal;
- if ((NeedShift + NeedXor + NeedZExtTrunc) >
- (IC->hasOneUse() + Or->hasOneUse()))
+ if ((NeedShift + NeedXor + NeedZExtTrunc + NeedAnd) >
+ (IC->hasOneUse() + BinOp->hasOneUse()))
return nullptr;
if (NeedAnd) {
@@ -788,7 +794,7 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,
if (NeedXor)
V = Builder.CreateXor(V, *C2);
- return Builder.CreateOr(V, Y);
+ return Builder.CreateBinOp(BinOp->getOpcode(), Y, V);
}
/// Canonicalize a set or clear of a masked set of constant bits to
@@ -870,7 +876,7 @@ static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) {
auto *FalseValI = cast<Instruction>(FalseVal);
auto *FrY = IC.InsertNewInstBefore(new FreezeInst(Y, Y->getName() + ".fr"),
- *FalseValI);
+ FalseValI->getIterator());
IC.replaceOperand(*FalseValI, FalseValI->getOperand(0) == Y ? 0 : 1, FrY);
return IC.replaceInstUsesWith(SI, FalseValI);
}
@@ -1303,45 +1309,28 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
return nullptr;
// InstSimplify already performed this fold if it was possible subject to
- // current poison-generating flags. Try the transform again with
- // poison-generating flags temporarily dropped.
- bool WasNUW = false, WasNSW = false, WasExact = false, WasInBounds = false;
- if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(FalseVal)) {
- WasNUW = OBO->hasNoUnsignedWrap();
- WasNSW = OBO->hasNoSignedWrap();
- FalseInst->setHasNoUnsignedWrap(false);
- FalseInst->setHasNoSignedWrap(false);
- }
- if (auto *PEO = dyn_cast<PossiblyExactOperator>(FalseVal)) {
- WasExact = PEO->isExact();
- FalseInst->setIsExact(false);
- }
- if (auto *GEP = dyn_cast<GetElementPtrInst>(FalseVal)) {
- WasInBounds = GEP->isInBounds();
- GEP->setIsInBounds(false);
- }
+ // current poison-generating flags. Check whether dropping poison-generating
+ // flags enables the transform.
// Try each equivalence substitution possibility.
// We have an 'EQ' comparison, so the select's false value will propagate.
// Example:
// (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1
+ SmallVector<Instruction *> DropFlags;
if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ,
- /* AllowRefinement */ false) == TrueVal ||
+ /* AllowRefinement */ false,
+ &DropFlags) == TrueVal ||
simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ,
- /* AllowRefinement */ false) == TrueVal) {
+ /* AllowRefinement */ false,
+ &DropFlags) == TrueVal) {
+ for (Instruction *I : DropFlags) {
+ I->dropPoisonGeneratingFlagsAndMetadata();
+ Worklist.add(I);
+ }
+
return replaceInstUsesWith(Sel, FalseVal);
}
- // Restore poison-generating flags if the transform did not apply.
- if (WasNUW)
- FalseInst->setHasNoUnsignedWrap();
- if (WasNSW)
- FalseInst->setHasNoSignedWrap();
- if (WasExact)
- FalseInst->setIsExact();
- if (WasInBounds)
- cast<GetElementPtrInst>(FalseInst)->setIsInBounds();
-
return nullptr;
}
@@ -1506,8 +1495,13 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
if (!match(ReplacementLow, m_ImmConstant(LowC)) ||
!match(ReplacementHigh, m_ImmConstant(HighC)))
return nullptr;
- ReplacementLow = ConstantExpr::getSExt(LowC, X->getType());
- ReplacementHigh = ConstantExpr::getSExt(HighC, X->getType());
+ const DataLayout &DL = Sel0.getModule()->getDataLayout();
+ ReplacementLow =
+ ConstantFoldCastOperand(Instruction::SExt, LowC, X->getType(), DL);
+ ReplacementHigh =
+ ConstantFoldCastOperand(Instruction::SExt, HighC, X->getType(), DL);
+ assert(ReplacementLow && ReplacementHigh &&
+ "Constant folding of ImmConstant cannot fail");
}
// All good, finally emit the new pattern.
@@ -1797,7 +1791,7 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *V = foldSelectZeroOrOnes(ICI, TrueVal, FalseVal, Builder))
return V;
- if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))
+ if (Value *V = foldSelectICmpAndBinOp(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
if (Value *V = foldSelectICmpLshrAshr(ICI, TrueVal, FalseVal, Builder))
@@ -2094,9 +2088,8 @@ Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) {
// If the constant is the same after truncation to the smaller type and
// extension to the original type, we can narrow the select.
Type *SelType = Sel.getType();
- Constant *TruncC = ConstantExpr::getTrunc(C, SmallType);
- Constant *ExtC = ConstantExpr::getCast(ExtOpcode, TruncC, SelType);
- if (ExtC == C && ExtInst->hasOneUse()) {
+ Constant *TruncC = getLosslessTrunc(C, SmallType, ExtOpcode);
+ if (TruncC && ExtInst->hasOneUse()) {
Value *TruncCVal = cast<Value>(TruncC);
if (ExtInst == Sel.getFalseValue())
std::swap(X, TruncCVal);
@@ -2107,23 +2100,6 @@ Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) {
return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType);
}
- // If one arm of the select is the extend of the condition, replace that arm
- // with the extension of the appropriate known bool value.
- if (Cond == X) {
- if (ExtInst == Sel.getTrueValue()) {
- // select X, (sext X), C --> select X, -1, C
- // select X, (zext X), C --> select X, 1, C
- Constant *One = ConstantInt::getTrue(SmallType);
- Constant *AllOnesOrOne = ConstantExpr::getCast(ExtOpcode, One, SelType);
- return SelectInst::Create(Cond, AllOnesOrOne, C, "", nullptr, &Sel);
- } else {
- // select X, C, (sext X) --> select X, C, 0
- // select X, C, (zext X) --> select X, C, 0
- Constant *Zero = ConstantInt::getNullValue(SelType);
- return SelectInst::Create(Cond, C, Zero, "", nullptr, &Sel);
- }
- }
-
return nullptr;
}
@@ -2561,7 +2537,7 @@ static Instruction *foldSelectToPhiImpl(SelectInst &Sel, BasicBlock *BB,
return nullptr;
}
- Builder.SetInsertPoint(&*BB->begin());
+ Builder.SetInsertPoint(BB, BB->begin());
auto *PN = Builder.CreatePHI(Sel.getType(), Inputs.size());
for (auto *Pred : predecessors(BB))
PN->addIncoming(Inputs[Pred], Pred);
@@ -2584,6 +2560,61 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,
return nullptr;
}
+/// Tries to reduce a pattern that arises when calculating the remainder of the
+/// Euclidean division. When the divisor is a power of two and is guaranteed not
+/// to be negative, a signed remainder can be folded with a bitwise and.
+///
+/// (x % n) < 0 ? (x % n) + n : (x % n)
+/// -> x & (n - 1)
+static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC,
+ IRBuilderBase &Builder) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ ICmpInst::Predicate Pred;
+ Value *Op, *RemRes, *Remainder;
+ const APInt *C;
+ bool TrueIfSigned = false;
+
+ if (!(match(CondVal, m_ICmp(Pred, m_Value(RemRes), m_APInt(C))) &&
+ IC.isSignBitCheck(Pred, *C, TrueIfSigned)))
+ return nullptr;
+
+ // If the sign bit is not set, we have a SGE/SGT comparison, and the operands
+ // of the select are inverted.
+ if (!TrueIfSigned)
+ std::swap(TrueVal, FalseVal);
+
+ auto FoldToBitwiseAnd = [&](Value *Remainder) -> Instruction * {
+ Value *Add = Builder.CreateAdd(
+ Remainder, Constant::getAllOnesValue(RemRes->getType()));
+ return BinaryOperator::CreateAnd(Op, Add);
+ };
+
+ // Match the general case:
+ // %rem = srem i32 %x, %n
+ // %cnd = icmp slt i32 %rem, 0
+ // %add = add i32 %rem, %n
+ // %sel = select i1 %cnd, i32 %add, i32 %rem
+ if (match(TrueVal, m_Add(m_Value(RemRes), m_Value(Remainder))) &&
+ match(RemRes, m_SRem(m_Value(Op), m_Specific(Remainder))) &&
+ IC.isKnownToBeAPowerOfTwo(Remainder, /*OrZero*/ true) &&
+ FalseVal == RemRes)
+ return FoldToBitwiseAnd(Remainder);
+
+ // Match the case where the one arm has been replaced by constant 1:
+ // %rem = srem i32 %n, 2
+ // %cnd = icmp slt i32 %rem, 0
+ // %sel = select i1 %cnd, i32 1, i32 %rem
+ if (match(TrueVal, m_One()) &&
+ match(RemRes, m_SRem(m_Value(Op), m_SpecificInt(2))) &&
+ FalseVal == RemRes)
+ return FoldToBitwiseAnd(ConstantInt::get(RemRes->getType(), 2));
+
+ return nullptr;
+}
+
static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());
if (!FI)
@@ -2860,8 +2891,15 @@ static Instruction *foldNestedSelects(SelectInst &OuterSelVal,
std::swap(InnerSel.TrueVal, InnerSel.FalseVal);
Value *AltCond = nullptr;
- auto matchOuterCond = [OuterSel, &AltCond](auto m_InnerCond) {
- return match(OuterSel.Cond, m_c_LogicalOp(m_InnerCond, m_Value(AltCond)));
+ auto matchOuterCond = [OuterSel, IsAndVariant, &AltCond](auto m_InnerCond) {
+ // An unsimplified select condition can match both LogicalAnd and LogicalOr
+ // (select true, true, false). Since below we assume that LogicalAnd implies
+ // InnerSel match the FVal and vice versa for LogicalOr, we can't match the
+ // alternative pattern here.
+ return IsAndVariant ? match(OuterSel.Cond,
+ m_c_LogicalAnd(m_InnerCond, m_Value(AltCond)))
+ : match(OuterSel.Cond,
+ m_c_LogicalOr(m_InnerCond, m_Value(AltCond)));
};
// Finally, match the condition that was driving the outermost `select`,
@@ -3024,31 +3062,37 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
if (match(CondVal, m_Select(m_Value(A), m_Value(B), m_Zero())) &&
match(TrueVal, m_Specific(B)) && match(FalseVal, m_Zero()))
return replaceOperand(SI, 0, A);
- // select a, (select ~a, true, b), false -> select a, b, false
- if (match(TrueVal, m_c_LogicalOr(m_Not(m_Specific(CondVal)), m_Value(B))) &&
- match(FalseVal, m_Zero()))
- return replaceOperand(SI, 1, B);
- // select a, true, (select ~a, b, false) -> select a, true, b
- if (match(FalseVal, m_c_LogicalAnd(m_Not(m_Specific(CondVal)), m_Value(B))) &&
- match(TrueVal, m_One()))
- return replaceOperand(SI, 2, B);
// ~(A & B) & (A | B) --> A ^ B
if (match(&SI, m_c_LogicalAnd(m_Not(m_LogicalAnd(m_Value(A), m_Value(B))),
m_c_LogicalOr(m_Deferred(A), m_Deferred(B)))))
return BinaryOperator::CreateXor(A, B);
- // select (~a | c), a, b -> and a, (or c, freeze(b))
- if (match(CondVal, m_c_Or(m_Not(m_Specific(TrueVal)), m_Value(C))) &&
- CondVal->hasOneUse()) {
- FalseVal = Builder.CreateFreeze(FalseVal);
- return BinaryOperator::CreateAnd(TrueVal, Builder.CreateOr(C, FalseVal));
+ // select (~a | c), a, b -> select a, (select c, true, b), false
+ if (match(CondVal,
+ m_OneUse(m_c_Or(m_Not(m_Specific(TrueVal)), m_Value(C))))) {
+ Value *OrV = Builder.CreateSelect(C, One, FalseVal);
+ return SelectInst::Create(TrueVal, OrV, Zero);
+ }
+ // select (c & b), a, b -> select b, (select ~c, true, a), false
+ if (match(CondVal, m_OneUse(m_c_And(m_Value(C), m_Specific(FalseVal))))) {
+ if (Value *NotC = getFreelyInverted(C, C->hasOneUse(), &Builder)) {
+ Value *OrV = Builder.CreateSelect(NotC, One, TrueVal);
+ return SelectInst::Create(FalseVal, OrV, Zero);
+ }
}
- // select (~c & b), a, b -> and b, (or freeze(a), c)
- if (match(CondVal, m_c_And(m_Not(m_Value(C)), m_Specific(FalseVal))) &&
- CondVal->hasOneUse()) {
- TrueVal = Builder.CreateFreeze(TrueVal);
- return BinaryOperator::CreateAnd(FalseVal, Builder.CreateOr(C, TrueVal));
+ // select (a | c), a, b -> select a, true, (select ~c, b, false)
+ if (match(CondVal, m_OneUse(m_c_Or(m_Specific(TrueVal), m_Value(C))))) {
+ if (Value *NotC = getFreelyInverted(C, C->hasOneUse(), &Builder)) {
+ Value *AndV = Builder.CreateSelect(NotC, FalseVal, Zero);
+ return SelectInst::Create(TrueVal, One, AndV);
+ }
+ }
+ // select (c & ~b), a, b -> select b, true, (select c, a, false)
+ if (match(CondVal,
+ m_OneUse(m_c_And(m_Value(C), m_Not(m_Specific(FalseVal)))))) {
+ Value *AndV = Builder.CreateSelect(C, TrueVal, Zero);
+ return SelectInst::Create(FalseVal, One, AndV);
}
if (match(FalseVal, m_Zero()) || match(TrueVal, m_One())) {
@@ -3057,7 +3101,7 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
Value *Op1 = IsAnd ? TrueVal : FalseVal;
if (isCheckForZeroAndMulWithOverflow(CondVal, Op1, IsAnd, Y)) {
auto *FI = new FreezeInst(*Y, (*Y)->getName() + ".fr");
- InsertNewInstBefore(FI, *cast<Instruction>(Y->getUser()));
+ InsertNewInstBefore(FI, cast<Instruction>(Y->getUser())->getIterator());
replaceUse(*Y, FI);
return replaceInstUsesWith(SI, Op1);
}
@@ -3272,6 +3316,31 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) {
Masked);
}
+bool InstCombinerImpl::fmulByZeroIsZero(Value *MulVal, FastMathFlags FMF,
+ const Instruction *CtxI) const {
+ KnownFPClass Known = computeKnownFPClass(MulVal, FMF, fcNegative, CtxI);
+
+ return Known.isKnownNeverNaN() && Known.isKnownNeverInfinity() &&
+ (FMF.noSignedZeros() || Known.signBitIsZeroOrNaN());
+}
+
+static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0,
+ Value *Cmp1, Value *TrueVal,
+ Value *FalseVal, Instruction &CtxI,
+ bool SelectIsNSZ) {
+ Value *MulRHS;
+ if (match(Cmp1, m_PosZeroFP()) &&
+ match(TrueVal, m_c_FMul(m_Specific(Cmp0), m_Value(MulRHS)))) {
+ FastMathFlags FMF = cast<FPMathOperator>(TrueVal)->getFastMathFlags();
+ // nsz must be on the select, it must be ignored on the multiply. We
+ // need nnan and ninf on the multiply for the other value.
+ FMF.setNoSignedZeros(SelectIsNSZ);
+ return IC.fmulByZeroIsZero(MulRHS, FMF, &CtxI);
+ }
+
+ return false;
+}
+
Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
@@ -3303,28 +3372,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
ConstantInt::getFalse(CondType), SQ,
/* AllowRefinement */ true))
return replaceOperand(SI, 2, S);
-
- // Handle patterns involving sext/zext + not explicitly,
- // as simplifyWithOpReplaced() only looks past one instruction.
- Value *NotCond;
-
- // select a, sext(!a), b -> select !a, b, 0
- // select a, zext(!a), b -> select !a, b, 0
- if (match(TrueVal, m_ZExtOrSExt(m_CombineAnd(m_Value(NotCond),
- m_Not(m_Specific(CondVal))))))
- return SelectInst::Create(NotCond, FalseVal,
- Constant::getNullValue(SelType));
-
- // select a, b, zext(!a) -> select !a, 1, b
- if (match(FalseVal, m_ZExt(m_CombineAnd(m_Value(NotCond),
- m_Not(m_Specific(CondVal))))))
- return SelectInst::Create(NotCond, ConstantInt::get(SelType, 1), TrueVal);
-
- // select a, b, sext(!a) -> select !a, -1, b
- if (match(FalseVal, m_SExt(m_CombineAnd(m_Value(NotCond),
- m_Not(m_Specific(CondVal))))))
- return SelectInst::Create(NotCond, Constant::getAllOnesValue(SelType),
- TrueVal);
}
if (Instruction *R = foldSelectOfBools(SI))
@@ -3362,7 +3409,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
}
}
+ auto *SIFPOp = dyn_cast<FPMathOperator>(&SI);
+
if (auto *FCmp = dyn_cast<FCmpInst>(CondVal)) {
+ FCmpInst::Predicate Pred = FCmp->getPredicate();
Value *Cmp0 = FCmp->getOperand(0), *Cmp1 = FCmp->getOperand(1);
// Are we selecting a value based on a comparison of the two values?
if ((Cmp0 == TrueVal && Cmp1 == FalseVal) ||
@@ -3372,7 +3422,7 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
//
// e.g.
// (X ugt Y) ? X : Y -> (X ole Y) ? Y : X
- if (FCmp->hasOneUse() && FCmpInst::isUnordered(FCmp->getPredicate())) {
+ if (FCmp->hasOneUse() && FCmpInst::isUnordered(Pred)) {
FCmpInst::Predicate InvPred = FCmp->getInversePredicate();
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
// FIXME: The FMF should propagate from the select, not the fcmp.
@@ -3383,14 +3433,47 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
return replaceInstUsesWith(SI, NewSel);
}
}
+
+ if (SIFPOp) {
+ // Fold out scale-if-equals-zero pattern.
+ //
+ // This pattern appears in code with denormal range checks after it's
+ // assumed denormals are treated as zero. This drops a canonicalization.
+
+ // TODO: Could relax the signed zero logic. We just need to know the sign
+ // of the result matches (fmul x, y has the same sign as x).
+ //
+ // TODO: Handle always-canonicalizing variant that selects some value or 1
+ // scaling factor in the fmul visitor.
+
+ // TODO: Handle ldexp too
+
+ Value *MatchCmp0 = nullptr;
+ Value *MatchCmp1 = nullptr;
+
+ // (select (fcmp [ou]eq x, 0.0), (fmul x, K), x => x
+ // (select (fcmp [ou]ne x, 0.0), x, (fmul x, K) => x
+ if (Pred == CmpInst::FCMP_OEQ || Pred == CmpInst::FCMP_UEQ) {
+ MatchCmp0 = FalseVal;
+ MatchCmp1 = TrueVal;
+ } else if (Pred == CmpInst::FCMP_ONE || Pred == CmpInst::FCMP_UNE) {
+ MatchCmp0 = TrueVal;
+ MatchCmp1 = FalseVal;
+ }
+
+ if (Cmp0 == MatchCmp0 &&
+ matchFMulByZeroIfResultEqZero(*this, Cmp0, Cmp1, MatchCmp1, MatchCmp0,
+ SI, SIFPOp->hasNoSignedZeros()))
+ return replaceInstUsesWith(SI, Cmp0);
+ }
}
- if (isa<FPMathOperator>(SI)) {
+ if (SIFPOp) {
// TODO: Try to forward-propagate FMF from select arms to the select.
// Canonicalize select of FP values where NaN and -0.0 are not valid as
// minnum/maxnum intrinsics.
- if (SI.hasNoNaNs() && SI.hasNoSignedZeros()) {
+ if (SIFPOp->hasNoNaNs() && SIFPOp->hasNoSignedZeros()) {
Value *X, *Y;
if (match(&SI, m_OrdFMax(m_Value(X), m_Value(Y))))
return replaceInstUsesWith(
@@ -3430,6 +3513,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Instruction *I = foldSelectExtConst(SI))
return I;
+ if (Instruction *I = foldSelectWithSRem(SI, *this, Builder))
+ return I;
+
// Fold (select C, (gep Ptr, Idx), Ptr) -> (gep Ptr, (select C, Idx, 0))
// Fold (select C, Ptr, (gep Ptr, Idx)) -> (gep Ptr, (select C, 0, Idx))
auto SelectGepWithBase = [&](GetElementPtrInst *Gep, Value *Base,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 89dad455f015..b7958978c450 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -136,9 +136,14 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
assert(IdenticalShOpcodes && "Should not get here with different shifts.");
- // All good, we can do this fold.
- NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType());
+ if (NewShAmt->getType() != X->getType()) {
+ NewShAmt = ConstantFoldCastOperand(Instruction::ZExt, NewShAmt,
+ X->getType(), SQ.DL);
+ if (!NewShAmt)
+ return nullptr;
+ }
+ // All good, we can do this fold.
BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);
// The flags can only be propagated if there wasn't a trunc.
@@ -245,7 +250,11 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
SumOfShAmts = Constant::replaceUndefsWith(
SumOfShAmts, ConstantInt::get(SumOfShAmts->getType()->getScalarType(),
ExtendedTy->getScalarSizeInBits()));
- auto *ExtendedSumOfShAmts = ConstantExpr::getZExt(SumOfShAmts, ExtendedTy);
+ auto *ExtendedSumOfShAmts = ConstantFoldCastOperand(
+ Instruction::ZExt, SumOfShAmts, ExtendedTy, Q.DL);
+ if (!ExtendedSumOfShAmts)
+ return nullptr;
+
// And compute the mask as usual: ~(-1 << (SumOfShAmts))
auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);
auto *ExtendedInvertedMask =
@@ -278,16 +287,22 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
ShAmtsDiff = Constant::replaceUndefsWith(
ShAmtsDiff, ConstantInt::get(ShAmtsDiff->getType()->getScalarType(),
-WidestTyBitWidth));
- auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt(
+ auto *ExtendedNumHighBitsToClear = ConstantFoldCastOperand(
+ Instruction::ZExt,
ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff->getType(),
WidestTyBitWidth,
/*isSigned=*/false),
ShAmtsDiff),
- ExtendedTy);
+ ExtendedTy, Q.DL);
+ if (!ExtendedNumHighBitsToClear)
+ return nullptr;
+
// And compute the mask as usual: (-1 l>> (NumHighBitsToClear))
auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);
- NewMask =
- ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear);
+ NewMask = ConstantFoldBinaryOpOperands(Instruction::LShr, ExtendedAllOnes,
+ ExtendedNumHighBitsToClear, Q.DL);
+ if (!NewMask)
+ return nullptr;
} else
return nullptr; // Don't know anything about this pattern.
@@ -545,8 +560,8 @@ static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl,
/// this succeeds, getShiftedValue() will be called to produce the value.
static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
InstCombinerImpl &IC, Instruction *CxtI) {
- // We can always evaluate constants shifted.
- if (isa<Constant>(V))
+ // We can always evaluate immediate constants.
+ if (match(V, m_ImmConstant()))
return true;
Instruction *I = dyn_cast<Instruction>(V);
@@ -709,13 +724,13 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Mul: {
assert(!isLeftShift && "Unexpected shift direction!");
auto *Neg = BinaryOperator::CreateNeg(I->getOperand(0));
- IC.InsertNewInstWith(Neg, *I);
+ IC.InsertNewInstWith(Neg, I->getIterator());
unsigned TypeWidth = I->getType()->getScalarSizeInBits();
APInt Mask = APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits);
auto *And = BinaryOperator::CreateAnd(Neg,
ConstantInt::get(I->getType(), Mask));
And->takeName(I);
- return IC.InsertNewInstWith(And, *I);
+ return IC.InsertNewInstWith(And, I->getIterator());
}
}
}
@@ -745,7 +760,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *C1,
// (C2 >> X) >> C1 --> (C2 >> C1) >> X
Constant *C2;
Value *X;
- if (match(Op0, m_BinOp(I.getOpcode(), m_Constant(C2), m_Value(X))))
+ if (match(Op0, m_BinOp(I.getOpcode(), m_ImmConstant(C2), m_Value(X))))
return BinaryOperator::Create(
I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), C2, C1), X);
@@ -928,6 +943,60 @@ Instruction *InstCombinerImpl::foldLShrOverflowBit(BinaryOperator &I) {
return new ZExtInst(Overflow, Ty);
}
+// Try to set nuw/nsw flags on shl or exact flag on lshr/ashr using knownbits.
+static bool setShiftFlags(BinaryOperator &I, const SimplifyQuery &Q) {
+ assert(I.isShift() && "Expected a shift as input");
+ // We already have all the flags.
+ if (I.getOpcode() == Instruction::Shl) {
+ if (I.hasNoUnsignedWrap() && I.hasNoSignedWrap())
+ return false;
+ } else {
+ if (I.isExact())
+ return false;
+
+ // shr (shl X, Y), Y
+ if (match(I.getOperand(0), m_Shl(m_Value(), m_Specific(I.getOperand(1))))) {
+ I.setIsExact();
+ return true;
+ }
+ }
+
+ // Compute what we know about shift count.
+ KnownBits KnownCnt = computeKnownBits(I.getOperand(1), /* Depth */ 0, Q);
+ unsigned BitWidth = KnownCnt.getBitWidth();
+ // Since shift produces a poison value if RHS is equal to or larger than the
+ // bit width, we can safely assume that RHS is less than the bit width.
+ uint64_t MaxCnt = KnownCnt.getMaxValue().getLimitedValue(BitWidth - 1);
+
+ KnownBits KnownAmt = computeKnownBits(I.getOperand(0), /* Depth */ 0, Q);
+ bool Changed = false;
+
+ if (I.getOpcode() == Instruction::Shl) {
+ // If we have as many leading zeros than maximum shift cnt we have nuw.
+ if (!I.hasNoUnsignedWrap() && MaxCnt <= KnownAmt.countMinLeadingZeros()) {
+ I.setHasNoUnsignedWrap();
+ Changed = true;
+ }
+ // If we have more sign bits than maximum shift cnt we have nsw.
+ if (!I.hasNoSignedWrap()) {
+ if (MaxCnt < KnownAmt.countMinSignBits() ||
+ MaxCnt < ComputeNumSignBits(I.getOperand(0), Q.DL, /*Depth*/ 0, Q.AC,
+ Q.CxtI, Q.DT)) {
+ I.setHasNoSignedWrap();
+ Changed = true;
+ }
+ }
+ return Changed;
+ }
+
+ // If we have at least as many trailing zeros as maximum count then we have
+ // exact.
+ Changed = MaxCnt <= KnownAmt.countMinTrailingZeros();
+ I.setIsExact(Changed);
+
+ return Changed;
+}
+
Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -976,7 +1045,11 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
// If C1 < C: (X >>?,exact C1) << C --> X << (C - C1)
Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShrAmt);
auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
- NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ NewShl->setHasNoUnsignedWrap(
+ I.hasNoUnsignedWrap() ||
+ (ShrAmt &&
+ cast<Instruction>(Op0)->getOpcode() == Instruction::LShr &&
+ I.hasNoSignedWrap()));
NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
return NewShl;
}
@@ -997,7 +1070,11 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
// If C1 < C: (X >>? C1) << C --> (X << (C - C1)) & (-1 << C)
Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShrAmt);
auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
- NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ NewShl->setHasNoUnsignedWrap(
+ I.hasNoUnsignedWrap() ||
+ (ShrAmt &&
+ cast<Instruction>(Op0)->getOpcode() == Instruction::LShr &&
+ I.hasNoSignedWrap()));
NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
Builder.Insert(NewShl);
APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
@@ -1108,22 +1185,11 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
Value *NewShift = Builder.CreateShl(X, Op1);
return BinaryOperator::CreateSub(NewLHS, NewShift);
}
-
- // If the shifted-out value is known-zero, then this is a NUW shift.
- if (!I.hasNoUnsignedWrap() &&
- MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, ShAmtC), 0,
- &I)) {
- I.setHasNoUnsignedWrap();
- return &I;
- }
-
- // If the shifted-out value is all signbits, then this is a NSW shift.
- if (!I.hasNoSignedWrap() && ComputeNumSignBits(Op0, 0, &I) > ShAmtC) {
- I.setHasNoSignedWrap();
- return &I;
- }
}
+ if (setShiftFlags(I, Q))
+ return &I;
+
// Transform (x >> y) << y to x & (-1 << y)
// Valid for any type of right-shift.
Value *X;
@@ -1161,15 +1227,6 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
Value *NegX = Builder.CreateNeg(X, "neg");
return BinaryOperator::CreateAnd(NegX, X);
}
-
- // The only way to shift out the 1 is with an over-shift, so that would
- // be poison with or without "nuw". Undef is excluded because (undef << X)
- // is not undef (it is zero).
- Constant *ConstantOne = cast<Constant>(Op0);
- if (!I.hasNoUnsignedWrap() && !ConstantOne->containsUndefElement()) {
- I.setHasNoUnsignedWrap();
- return &I;
- }
}
return nullptr;
@@ -1235,9 +1292,10 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
unsigned ShlAmtC = C1->getZExtValue();
Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmtC - ShAmtC);
if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
- // (X <<nuw C1) >>u C --> X <<nuw (C1 - C)
+ // (X <<nuw C1) >>u C --> X <<nuw/nsw (C1 - C)
auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
NewShl->setHasNoUnsignedWrap(true);
+ NewShl->setHasNoSignedWrap(ShAmtC > 0);
return NewShl;
}
if (Op0->hasOneUse()) {
@@ -1370,12 +1428,13 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
if (Op0->hasOneUse()) {
APInt NewMulC = MulC->lshr(ShAmtC);
// if c is divisible by (1 << ShAmtC):
- // lshr (mul nuw x, MulC), ShAmtC -> mul nuw x, (MulC >> ShAmtC)
+ // lshr (mul nuw x, MulC), ShAmtC -> mul nuw nsw x, (MulC >> ShAmtC)
if (MulC->eq(NewMulC.shl(ShAmtC))) {
auto *NewMul =
BinaryOperator::CreateNUWMul(X, ConstantInt::get(Ty, NewMulC));
- BinaryOperator *OrigMul = cast<BinaryOperator>(Op0);
- NewMul->setHasNoSignedWrap(OrigMul->hasNoSignedWrap());
+ assert(ShAmtC != 0 &&
+ "lshr X, 0 should be handled by simplifyLShrInst.");
+ NewMul->setHasNoSignedWrap(true);
return NewMul;
}
}
@@ -1414,15 +1473,12 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
Value *And = Builder.CreateAnd(BoolX, BoolY);
return new ZExtInst(And, Ty);
}
-
- // If the shifted-out value is known-zero, then this is an exact shift.
- if (!I.isExact() &&
- MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmtC), 0, &I)) {
- I.setIsExact();
- return &I;
- }
}
+ const SimplifyQuery Q = SQ.getWithInstruction(&I);
+ if (setShiftFlags(I, Q))
+ return &I;
+
// Transform (x << y) >> y to x & (-1 >> y)
if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))))) {
Constant *AllOnes = ConstantInt::getAllOnesValue(Ty);
@@ -1581,15 +1637,12 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
if (match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)))))
return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty);
}
-
- // If the shifted-out value is known-zero, then this is an exact shift.
- if (!I.isExact() &&
- MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {
- I.setIsExact();
- return &I;
- }
}
+ const SimplifyQuery Q = SQ.getWithInstruction(&I);
+ if (setShiftFlags(I, Q))
+ return &I;
+
// Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)`
// as the pattern to splat the lowest bit.
// FIXME: iff X is already masked, we don't need the one-use check.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 00eece9534b0..846116a929b1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -24,6 +24,12 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "instcombine"
+static cl::opt<bool>
+ VerifyKnownBits("instcombine-verify-known-bits",
+ cl::desc("Verify that computeKnownBits() and "
+ "SimplifyDemandedBits() are consistent"),
+ cl::Hidden, cl::init(false));
+
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
/// constant that are not demanded. If so, shrink the constant and return true.
@@ -48,15 +54,20 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
return true;
}
+/// Returns the bitwidth of the given scalar or pointer type. For vector types,
+/// returns the element type's bitwidth.
+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
+ if (unsigned BitWidth = Ty->getScalarSizeInBits())
+ return BitWidth;
+ return DL.getPointerTypeSizeInBits(Ty);
+}
/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
/// the instruction has any properties that allow us to simplify its operands.
-bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
- unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
- KnownBits Known(BitWidth);
- APInt DemandedMask(APInt::getAllOnes(BitWidth));
-
+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst,
+ KnownBits &Known) {
+ APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth()));
Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
0, &Inst);
if (!V) return false;
@@ -65,6 +76,13 @@ bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
return true;
}
+/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
+/// the instruction has any properties that allow us to simplify its operands.
+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
+ KnownBits Known(getBitWidth(Inst.getType(), DL));
+ return SimplifyDemandedInstructionBits(Inst, Known);
+}
+
/// This form of SimplifyDemandedBits simplifies the specified instruction
/// operand if possible, updating it in place. It returns true if it made any
/// change and false otherwise.
@@ -95,8 +113,8 @@ bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
/// expression.
/// Known.One and Known.Zero always follow the invariant that:
/// Known.One & Known.Zero == 0.
-/// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
-/// Known.Zero may only be accurate for those bits set in DemandedMask. Note
+/// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero
+/// are accurate even for bits not in DemandedMask. Note
/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
/// be the same.
///
@@ -143,7 +161,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
-
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedMask to all bits so that we can try to simplify the
// operands. This allows visitTruncInst (for example) to simplify the
@@ -196,7 +213,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
- Depth, DL, &AC, CxtI, &DT);
+ Depth, SQ.getWithInstruction(CxtI));
// If the client is only demanding bits that we know, return the known
// constant.
@@ -220,13 +237,16 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If either the LHS or the RHS are One, the result is One.
if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,
- Depth + 1))
+ Depth + 1)) {
+ // Disjoint flag may not longer hold.
+ I->dropPoisonGeneratingFlags();
return I;
+ }
assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
- Depth, DL, &AC, CxtI, &DT);
+ Depth, SQ.getWithInstruction(CxtI));
// If the client is only demanding bits that we know, return the known
// constant.
@@ -244,6 +264,16 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (ShrinkDemandedConstant(I, 1, DemandedMask))
return I;
+ // Infer disjoint flag if no common bits are set.
+ if (!cast<PossiblyDisjointInst>(I)->isDisjoint()) {
+ WithCache<const Value *> LHSCache(I->getOperand(0), LHSKnown),
+ RHSCache(I->getOperand(1), RHSKnown);
+ if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ.getWithInstruction(I))) {
+ cast<PossiblyDisjointInst>(I)->setIsDisjoint(true);
+ return I;
+ }
+ }
+
break;
}
case Instruction::Xor: {
@@ -265,7 +295,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
- Depth, DL, &AC, CxtI, &DT);
+ Depth, SQ.getWithInstruction(CxtI));
// If the client is only demanding bits that we know, return the known
// constant.
@@ -284,9 +314,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {
Instruction *Or =
- BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
- I->getName());
- return InsertNewInstWith(Or, *I);
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1));
+ if (DemandedMask.isAllOnes())
+ cast<PossiblyDisjointInst>(Or)->setIsDisjoint(true);
+ Or->takeName(I);
+ return InsertNewInstWith(Or, I->getIterator());
}
// If all of the demanded bits on one side are known, and all of the set
@@ -298,7 +330,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Constant *AndC = Constant::getIntegerValue(VTy,
~RHSKnown.One & DemandedMask);
Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
- return InsertNewInstWith(And, *I);
+ return InsertNewInstWith(And, I->getIterator());
}
// If the RHS is a constant, see if we can change it. Don't alter a -1
@@ -330,11 +362,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Constant *AndC = ConstantInt::get(VTy, NewMask & AndRHS->getValue());
Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
- InsertNewInstWith(NewAnd, *I);
+ InsertNewInstWith(NewAnd, I->getIterator());
Constant *XorC = ConstantInt::get(VTy, NewMask & XorRHS->getValue());
Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
- return InsertNewInstWith(NewXor, *I);
+ return InsertNewInstWith(NewXor, I->getIterator());
}
}
break;
@@ -411,36 +443,21 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
KnownBits InputKnown(SrcBitWidth);
- if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1)) {
+ // For zext nneg, we may have dropped the instruction which made the
+ // input non-negative.
+ I->dropPoisonGeneratingFlags();
return I;
+ }
assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
+ if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() &&
+ !InputKnown.isNegative())
+ InputKnown.makeNonNegative();
Known = InputKnown.zextOrTrunc(BitWidth);
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- break;
- }
- case Instruction::BitCast:
- if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
- return nullptr; // vector->int or fp->int?
-
- if (auto *DstVTy = dyn_cast<VectorType>(VTy)) {
- if (auto *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) {
- if (isa<ScalableVectorType>(DstVTy) ||
- isa<ScalableVectorType>(SrcVTy) ||
- cast<FixedVectorType>(DstVTy)->getNumElements() !=
- cast<FixedVectorType>(SrcVTy)->getNumElements())
- // Don't touch a bitcast between vectors of different element counts.
- return nullptr;
- } else
- // Don't touch a scalar-to-vector bitcast.
- return nullptr;
- } else if (I->getOperand(0)->getType()->isVectorTy())
- // Don't touch a vector-to-scalar bitcast.
- return nullptr;
- if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
- return I;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
+ }
case Instruction::SExt: {
// Compute the bits in the result that are not present in the input.
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
@@ -461,8 +478,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (InputKnown.isNonNegative() ||
DemandedMask.getActiveBits() <= SrcBitWidth) {
// Convert to ZExt cast.
- CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
- return InsertNewInstWith(NewCast, *I);
+ CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy);
+ NewCast->takeName(I);
+ return InsertNewInstWith(NewCast, I->getIterator());
}
// If the sign bit of the input is known set or clear, then we know the
@@ -534,6 +552,17 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
+ // (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask
+ {
+ const APInt *C;
+ if (match(I->getOperand(1), m_APInt(C)) &&
+ C->isOneBitSet(DemandedMask.getActiveBits() - 1)) {
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(I);
+ return Builder.CreateXor(I->getOperand(0), ConstantInt::get(VTy, *C));
+ }
+ }
+
// Otherwise just compute the known bits of the result.
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
Known = KnownBits::computeForAddSub(true, NSW, LHSKnown, RHSKnown);
@@ -586,7 +615,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (match(I->getOperand(1), m_APInt(C)) && C->countr_zero() == CTZ) {
Constant *ShiftC = ConstantInt::get(VTy, CTZ);
Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC);
- return InsertNewInstWith(Shl, *I);
+ return InsertNewInstWith(Shl, I->getIterator());
}
}
// For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
@@ -595,7 +624,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (I->getOperand(0) == I->getOperand(1) && DemandedMask.ult(4)) {
Constant *One = ConstantInt::get(VTy, 1);
Instruction *And1 = BinaryOperator::CreateAnd(I->getOperand(0), One);
- return InsertNewInstWith(And1, *I);
+ return InsertNewInstWith(And1, I->getIterator());
}
computeKnownBits(I, Known, Depth, CxtI);
@@ -624,10 +653,12 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (DemandedMask.countr_zero() >= ShiftAmt &&
match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) {
Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
- Constant *NewC = ConstantExpr::getShl(C, LeftShiftAmtC);
- if (ConstantExpr::getLShr(NewC, LeftShiftAmtC) == C) {
+ Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C,
+ LeftShiftAmtC, DL);
+ if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC, LeftShiftAmtC,
+ DL) == C) {
Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X);
- return InsertNewInstWith(Lshr, *I);
+ return InsertNewInstWith(Lshr, I->getIterator());
}
}
@@ -688,24 +719,23 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Constant *C;
if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {
Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
- Constant *NewC = ConstantExpr::getLShr(C, RightShiftAmtC);
- if (ConstantExpr::getShl(NewC, RightShiftAmtC) == C) {
+ Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::LShr, C,
+ RightShiftAmtC, DL);
+ if (ConstantFoldBinaryOpOperands(Instruction::Shl, NewC,
+ RightShiftAmtC, DL) == C) {
Instruction *Shl = BinaryOperator::CreateShl(NewC, X);
- return InsertNewInstWith(Shl, *I);
+ return InsertNewInstWith(Shl, I->getIterator());
}
}
}
// Unsigned shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
-
- // If the shift is exact, then it does demand the low bits (and knows that
- // they are zero).
- if (cast<LShrOperator>(I)->isExact())
- DemandedMaskIn.setLowBits(ShiftAmt);
-
- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) {
+ // exact flag may not longer hold.
+ I->dropPoisonGeneratingFlags();
return I;
+ }
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShiftAmt);
Known.One.lshrInPlace(ShiftAmt);
@@ -733,7 +763,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Perform the logical shift right.
Instruction *NewVal = BinaryOperator::CreateLShr(
I->getOperand(0), I->getOperand(1), I->getName());
- return InsertNewInstWith(NewVal, *I);
+ return InsertNewInstWith(NewVal, I->getIterator());
}
const APInt *SA;
@@ -747,13 +777,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (DemandedMask.countl_zero() <= ShiftAmt)
DemandedMaskIn.setSignBit();
- // If the shift is exact, then it does demand the low bits (and knows that
- // they are zero).
- if (cast<AShrOperator>(I)->isExact())
- DemandedMaskIn.setLowBits(ShiftAmt);
-
- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) {
+ // exact flag may not longer hold.
+ I->dropPoisonGeneratingFlags();
return I;
+ }
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now plus sign bits.
@@ -770,7 +798,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
I->getOperand(1));
LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
- return InsertNewInstWith(LShr, *I);
+ LShr->takeName(I);
+ return InsertNewInstWith(LShr, I->getIterator());
} else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one.
Known.One |= HighBits;
}
@@ -867,7 +896,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
match(II->getArgOperand(0), m_Not(m_Value(X)))) {
Function *Ctpop = Intrinsic::getDeclaration(
II->getModule(), Intrinsic::ctpop, VTy);
- return InsertNewInstWith(CallInst::Create(Ctpop, {X}), *I);
+ return InsertNewInstWith(CallInst::Create(Ctpop, {X}), I->getIterator());
}
break;
}
@@ -894,10 +923,52 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
NewVal = BinaryOperator::CreateShl(
II->getArgOperand(0), ConstantInt::get(VTy, NTZ - NLZ));
NewVal->takeName(I);
- return InsertNewInstWith(NewVal, *I);
+ return InsertNewInstWith(NewVal, I->getIterator());
}
break;
}
+ case Intrinsic::ptrmask: {
+ unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits();
+ RHSKnown = KnownBits(MaskWidth);
+ // If either the LHS or the RHS are Zero, the result is zero.
+ if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(
+ I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth),
+ RHSKnown, Depth + 1))
+ return I;
+
+ // TODO: Should be 1-extend
+ RHSKnown = RHSKnown.anyextOrTrunc(BitWidth);
+ assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
+ assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
+
+ Known = LHSKnown & RHSKnown;
+ KnownBitsComputed = true;
+
+ // If the client is only demanding bits we know to be zero, return
+ // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer
+ // provenance, but making the mask zero will be easily optimizable in
+ // the backend.
+ if (DemandedMask.isSubsetOf(Known.Zero) &&
+ !match(I->getOperand(1), m_Zero()))
+ return replaceOperand(
+ *I, 1, Constant::getNullValue(I->getOperand(1)->getType()));
+
+ // Mask in demanded space does nothing.
+ // NOTE: We may have attributes associated with the return value of the
+ // llvm.ptrmask intrinsic that will be lost when we just return the
+ // operand. We should try to preserve them.
+ if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+ return I->getOperand(0);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(
+ I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))
+ return I;
+
+ break;
+ }
+
case Intrinsic::fshr:
case Intrinsic::fshl: {
const APInt *SA;
@@ -918,7 +989,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
return I;
} else { // fshl is a rotate
- // Avoid converting rotate into funnel shift.
+ // Avoid converting rotate into funnel shift.
// Only simplify if one operand is constant.
LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, I);
if (DemandedMaskLHS.isSubsetOf(LHSKnown.Zero | LHSKnown.One) &&
@@ -982,10 +1053,29 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
}
+ if (V->getType()->isPointerTy()) {
+ Align Alignment = V->getPointerAlignment(DL);
+ Known.Zero.setLowBits(Log2(Alignment));
+ }
+
// If the client is only demanding bits that we know, return the known
- // constant.
- if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
+ // constant. We can't directly simplify pointers as a constant because of
+ // pointer provenance.
+ // TODO: We could return `(inttoptr const)` for pointers.
+ if (!V->getType()->isPointerTy() && DemandedMask.isSubsetOf(Known.Zero | Known.One))
return Constant::getIntegerValue(VTy, Known.One);
+
+ if (VerifyKnownBits) {
+ KnownBits ReferenceKnown = computeKnownBits(V, Depth, CxtI);
+ if (Known != ReferenceKnown) {
+ errs() << "Mismatched known bits for " << *V << " in "
+ << I->getFunction()->getName() << "\n";
+ errs() << "computeKnownBits(): " << ReferenceKnown << "\n";
+ errs() << "SimplifyDemandedBits(): " << Known << "\n";
+ std::abort();
+ }
+ }
+
return nullptr;
}
@@ -1009,8 +1099,9 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
case Instruction::And: {
computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
- Known = LHSKnown & RHSKnown;
- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
+ Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
+ Depth, SQ.getWithInstruction(CxtI));
+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));
// If the client is only demanding bits that we know, return the known
// constant.
@@ -1029,8 +1120,9 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
case Instruction::Or: {
computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
- Known = LHSKnown | RHSKnown;
- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
+ Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
+ Depth, SQ.getWithInstruction(CxtI));
+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));
// If the client is only demanding bits that we know, return the known
// constant.
@@ -1051,8 +1143,9 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
case Instruction::Xor: {
computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
- Known = LHSKnown ^ RHSKnown;
- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
+ Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
+ Depth, SQ.getWithInstruction(CxtI));
+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));
// If the client is only demanding bits that we know, return the known
// constant.
@@ -1085,7 +1178,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
Known = KnownBits::computeForAddSub(/*Add*/ true, NSW, LHSKnown, RHSKnown);
- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));
break;
}
case Instruction::Sub: {
@@ -1101,7 +1194,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
Known = KnownBits::computeForAddSub(/*Add*/ false, NSW, LHSKnown, RHSKnown);
- computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
+ computeKnownBitsFromContext(I, Known, Depth, SQ.getWithInstruction(CxtI));
break;
}
case Instruction::AShr: {
@@ -1219,7 +1312,7 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits(
New->setIsExact(true);
}
- return InsertNewInstWith(New, *Shl);
+ return InsertNewInstWith(New, Shl->getIterator());
}
return nullptr;
@@ -1549,7 +1642,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
Instruction *New = InsertElementInst::Create(
Op, Value, ConstantInt::get(Type::getInt64Ty(I->getContext()), Idx),
Shuffle->getName());
- InsertNewInstWith(New, *Shuffle);
+ InsertNewInstWith(New, Shuffle->getIterator());
return New;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4a5ffef2b08e..c8b58c51d4e6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -132,7 +132,7 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
// Create a scalar PHI node that will replace the vector PHI node
// just before the current PHI node.
PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
- PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
+ PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), PN->getIterator()));
// Scalarize each PHI operand.
for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
Value *PHIInVal = PN->getIncomingValue(i);
@@ -148,10 +148,10 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
Value *Op = InsertNewInstWith(
ExtractElementInst::Create(B0->getOperand(opId), Elt,
B0->getOperand(opId)->getName() + ".Elt"),
- *B0);
+ B0->getIterator());
Value *newPHIUser = InsertNewInstWith(
BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
- scalarPHI, Op, B0), *B0);
+ scalarPHI, Op, B0), B0->getIterator());
scalarPHI->addIncoming(newPHIUser, inBB);
} else {
// Scalarize PHI input:
@@ -165,7 +165,7 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
InsertPos = inBB->getFirstInsertionPt();
}
- InsertNewInstWith(newEI, *InsertPos);
+ InsertNewInstWith(newEI, InsertPos);
scalarPHI->addIncoming(newEI, inBB);
}
@@ -441,7 +441,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
if (IndexC->getValue().getActiveBits() <= BitWidth)
Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
else
- Idx = UndefValue::get(Ty);
+ Idx = PoisonValue::get(Ty);
return replaceInstUsesWith(EI, Idx);
}
}
@@ -742,7 +742,7 @@ static bool replaceExtractElements(InsertElementInst *InsElt,
if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
WideVec->insertAfter(ExtVecOpInst);
else
- IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
+ IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
// Replace extracts from the original narrow vector with extracts from the new
// wide vector.
@@ -751,7 +751,7 @@ static bool replaceExtractElements(InsertElementInst *InsElt,
if (!OldExt || OldExt->getParent() != WideVec->getParent())
continue;
auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
- IC.InsertNewInstWith(NewExt, *OldExt);
+ IC.InsertNewInstWith(NewExt, OldExt->getIterator());
IC.replaceInstUsesWith(*OldExt, NewExt);
// Add the old extracts to the worklist for DCE. We can't remove the
// extracts directly, because they may still be used by the calling code.
@@ -1121,7 +1121,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
// Note that the same block can be a predecessor more than once,
// and we need to preserve that invariant for the PHI node.
BuilderTy::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(UseBB->getFirstNonPHI());
+ Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
auto *PHI =
Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
for (BasicBlock *Pred : Preds)
@@ -2122,8 +2122,8 @@ static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
// A select mask with undef elements might look like an identity mask.
- assert((ShuffleVectorInst::isSelectMask(NewMask) ||
- ShuffleVectorInst::isIdentityMask(NewMask)) &&
+ assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
+ ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
"Unexpected shuffle mask");
return new ShuffleVectorInst(X, Y, NewMask);
}
@@ -2197,9 +2197,9 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
!match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
return nullptr;
- // Insert into element 0 of an undef vector.
- UndefValue *UndefVec = UndefValue::get(Shuf.getType());
- Value *NewIns = Builder.CreateInsertElement(UndefVec, X, (uint64_t)0);
+ // Insert into element 0 of a poison vector.
+ PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
+ Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
// Splat from element 0. Any mask element that is undefined remains undefined.
// For example:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 767b7c7defbb..a7ddadc25de4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -130,13 +130,6 @@ STATISTIC(NumReassoc , "Number of reassociations");
DEBUG_COUNTER(VisitCounter, "instcombine-visit",
"Controls which instructions are visited");
-// FIXME: these limits eventually should be as low as 2.
-#ifndef NDEBUG
-static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
-#else
-static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
-#endif
-
static cl::opt<bool>
EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
cl::init(true));
@@ -145,12 +138,6 @@ static cl::opt<unsigned> MaxSinkNumUsers(
"instcombine-max-sink-users", cl::init(32),
cl::desc("Maximum number of undroppable users for instruction sinking"));
-static cl::opt<unsigned> InfiniteLoopDetectionThreshold(
- "instcombine-infinite-loop-threshold",
- cl::desc("Number of instruction combining iterations considered an "
- "infinite loop"),
- cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden);
-
static cl::opt<unsigned>
MaxArraySize("instcombine-maxarray-size", cl::init(1024),
cl::desc("Maximum array size considered when doing a combine"));
@@ -358,15 +345,19 @@ static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
// Fold the constants together in the destination type:
// (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
+ const DataLayout &DL = IC.getDataLayout();
Type *DestTy = C1->getType();
- Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
- Constant *FoldedC =
- ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, IC.getDataLayout());
+ Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
+ if (!CastC2)
+ return false;
+ Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
if (!FoldedC)
return false;
IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
IC.replaceOperand(*BinOp1, 1, FoldedC);
+ BinOp1->dropPoisonGeneratingFlags();
+ Cast->dropPoisonGeneratingFlags();
return true;
}
@@ -542,12 +533,12 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
BinaryOperator::Create(Opcode, A, B);
if (isa<FPMathOperator>(NewBO)) {
- FastMathFlags Flags = I.getFastMathFlags();
- Flags &= Op0->getFastMathFlags();
- Flags &= Op1->getFastMathFlags();
- NewBO->setFastMathFlags(Flags);
+ FastMathFlags Flags = I.getFastMathFlags() &
+ Op0->getFastMathFlags() &
+ Op1->getFastMathFlags();
+ NewBO->setFastMathFlags(Flags);
}
- InsertNewInstWith(NewBO, I);
+ InsertNewInstWith(NewBO, I.getIterator());
NewBO->takeName(Op1);
replaceOperand(I, 0, NewBO);
replaceOperand(I, 1, CRes);
@@ -619,7 +610,7 @@ static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
/// allow more factorization opportunities.
static Instruction::BinaryOps
getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
- Value *&LHS, Value *&RHS) {
+ Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
assert(Op && "Expected a binary operator");
LHS = Op->getOperand(0);
RHS = Op->getOperand(1);
@@ -632,6 +623,13 @@ getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
}
// TODO: We can add other conversions e.g. shr => div etc.
}
+ if (Instruction::isBitwiseLogicOp(TopOpcode)) {
+ if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
+ match(Op, m_LShr(m_NonNegative(), m_Value()))) {
+ // lshr nneg C, X --> ashr nneg C, X
+ return Instruction::AShr;
+ }
+ }
return Op->getOpcode();
}
@@ -749,7 +747,16 @@ static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
//
// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
+//
+// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
+// IFF
+// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
+// 2) Binop2 is `not`
+//
+// -> (arithmetic_shift Binop1((not X), Y), Amt)
+
Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
+ const DataLayout &DL = I.getModule()->getDataLayout();
auto IsValidBinOpc = [](unsigned Opc) {
switch (Opc) {
default:
@@ -768,11 +775,13 @@ Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
// constraints.
auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
unsigned ShOpc) {
+ assert(ShOpc != Instruction::AShr);
return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
ShOpc == Instruction::Shl;
};
auto GetInvShift = [](unsigned ShOpc) {
+ assert(ShOpc != Instruction::AShr);
return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
};
@@ -796,23 +805,23 @@ Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
// Otherwise, need mask that meets the below requirement.
// (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
- return ConstantExpr::get(
- ShOpc, ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift),
- CShift) == CMask;
+ Constant *MaskInvShift =
+ ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
+ return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
+ CMask;
};
auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
Constant *CMask, *CShift;
Value *X, *Y, *ShiftedX, *Mask, *Shift;
if (!match(I.getOperand(ShOpnum),
- m_OneUse(m_LogicalShift(m_Value(Y), m_Value(Shift)))))
+ m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
return nullptr;
if (!match(I.getOperand(1 - ShOpnum),
m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
return nullptr;
- if (!match(ShiftedX,
- m_OneUse(m_LogicalShift(m_Value(X), m_Specific(Shift)))))
+ if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift)))))
return nullptr;
// Make sure we are matching instruction shifts and not ConstantExpr
@@ -836,6 +845,18 @@ Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
return nullptr;
+ if (ShOpc == Instruction::AShr) {
+ if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
+ BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
+ Value *NotX = Builder.CreateNot(X);
+ Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
+ return BinaryOperator::Create(
+ static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
+ }
+
+ return nullptr;
+ }
+
// If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
// distribute to drop the shift irrelevant of constants.
if (BinOpc == I.getOpcode() &&
@@ -857,7 +878,8 @@ Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
return nullptr;
- Constant *NewCMask = ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift);
+ Constant *NewCMask =
+ ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
Value *NewBinOp2 = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
@@ -906,7 +928,7 @@ InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
bool IsCastOpRHS = (CastOp == RHS);
- bool IsZExt = isa<ZExtOperator>(CastOp);
+ bool IsZExt = isa<ZExtInst>(CastOp);
Constant *C;
if (IsTrueArm) {
@@ -924,13 +946,17 @@ InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
// If the value used in the zext/sext is the select condition, or the negated
// of the select condition, the binop can be simplified.
- if (CondVal == A)
- return SelectInst::Create(CondVal, NewFoldedConst(false, TrueVal),
+ if (CondVal == A) {
+ Value *NewTrueVal = NewFoldedConst(false, TrueVal);
+ return SelectInst::Create(CondVal, NewTrueVal,
NewFoldedConst(true, FalseVal));
+ }
- if (match(A, m_Not(m_Specific(CondVal))))
- return SelectInst::Create(CondVal, NewFoldedConst(true, TrueVal),
+ if (match(A, m_Not(m_Specific(CondVal)))) {
+ Value *NewTrueVal = NewFoldedConst(true, TrueVal);
+ return SelectInst::Create(CondVal, NewTrueVal,
NewFoldedConst(false, FalseVal));
+ }
return nullptr;
}
@@ -944,9 +970,9 @@ Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
Instruction::BinaryOps LHSOpcode, RHSOpcode;
if (Op0)
- LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
+ LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
if (Op1)
- RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
+ RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
// The instruction has the form "(A op' B) op (C op' D)". Try to factorize
// a common term.
@@ -1113,6 +1139,14 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
};
if (LHSIsSelect && RHSIsSelect && A == D) {
+ // op(select(%v, %x, %y), select(%v, %y, %x)) --> op(%x, %y)
+ if (I.isCommutative() && B == F && C == E) {
+ Value *BI = Builder.CreateBinOp(I.getOpcode(), B, E);
+ if (auto *BO = dyn_cast<BinaryOperator>(BI))
+ BO->copyIRFlags(&I);
+ return BI;
+ }
+
// (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
Cond = A;
True = simplifyBinOp(Opcode, B, E, FMF, Q);
@@ -1167,6 +1201,8 @@ void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
break;
case Instruction::Xor:
replaceInstUsesWith(cast<Instruction>(*U), I);
+ // Add to worklist for DCE.
+ addToWorklist(cast<Instruction>(U));
break;
default:
llvm_unreachable("Got unexpected user - out of sync with "
@@ -1268,7 +1304,7 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
Value *NewOp, InstCombiner &IC) {
Instruction *Clone = I.clone();
Clone->replaceUsesOfWith(SI, NewOp);
- IC.InsertNewInstBefore(Clone, *SI);
+ IC.InsertNewInstBefore(Clone, SI->getIterator());
return Clone;
}
@@ -1302,6 +1338,21 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
return nullptr;
}
+ // Test if a FCmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms. And in this case, at
+ // least one of the comparison operands has at least one user besides
+ // the compare (the select), which would often largely negate the
+ // benefit of folding anyway.
+ if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
+ if (CI->hasOneUse()) {
+ Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1))
+ return nullptr;
+ }
+ }
+
// Make sure that one of the select arms constant folds successfully.
Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
@@ -1316,6 +1367,47 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
}
+static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,
+ Value *InValue, BasicBlock *InBB,
+ const DataLayout &DL,
+ const SimplifyQuery SQ) {
+ // NB: It is a precondition of this transform that the operands be
+ // phi translatable! This is usually trivially satisfied by limiting it
+ // to constant ops, and for selects we do a more sophisticated check.
+ SmallVector<Value *> Ops;
+ for (Value *Op : I.operands()) {
+ if (Op == PN)
+ Ops.push_back(InValue);
+ else
+ Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
+ }
+
+ // Don't consider the simplification successful if we get back a constant
+ // expression. That's just an instruction in hiding.
+ // Also reject the case where we simplify back to the phi node. We wouldn't
+ // be able to remove it in that case.
+ Value *NewVal = simplifyInstructionWithOperands(
+ &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
+ if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
+ return NewVal;
+
+ // Check if incoming PHI value can be replaced with constant
+ // based on implied condition.
+ BranchInst *TerminatorBI = dyn_cast<BranchInst>(InBB->getTerminator());
+ const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
+ if (TerminatorBI && TerminatorBI->isConditional() &&
+ TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
+ bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
+ std::optional<bool> ImpliedCond =
+ isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(),
+ Ops[0], Ops[1], DL, LHSIsTrue);
+ if (ImpliedCond)
+ return ConstantInt::getBool(I.getType(), ImpliedCond.value());
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
unsigned NumPHIValues = PN->getNumIncomingValues();
if (NumPHIValues == 0)
@@ -1344,29 +1436,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
Value *InVal = PN->getIncomingValue(i);
BasicBlock *InBB = PN->getIncomingBlock(i);
- // NB: It is a precondition of this transform that the operands be
- // phi translatable! This is usually trivially satisfied by limiting it
- // to constant ops, and for selects we do a more sophisticated check.
- SmallVector<Value *> Ops;
- for (Value *Op : I.operands()) {
- if (Op == PN)
- Ops.push_back(InVal);
- else
- Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
- }
-
- // Don't consider the simplification successful if we get back a constant
- // expression. That's just an instruction in hiding.
- // Also reject the case where we simplify back to the phi node. We wouldn't
- // be able to remove it in that case.
- Value *NewVal = simplifyInstructionWithOperands(
- &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
- if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr())) {
+ if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
NewPhiValues.push_back(NewVal);
continue;
}
- if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
NonSimplifiedBB = InBB;
@@ -1402,7 +1476,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// Okay, we can do the transformation: create the new PHI node.
PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
- InsertNewInstBefore(NewPN, *PN);
+ InsertNewInstBefore(NewPN, PN->getIterator());
NewPN->takeName(PN);
NewPN->setDebugLoc(PN->getDebugLoc());
@@ -1417,7 +1491,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
else
U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
}
- InsertNewInstBefore(Clone, *NonSimplifiedBB->getTerminator());
+ InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
}
for (unsigned i = 0; i != NumPHIValues; ++i) {
@@ -1848,8 +1922,8 @@ Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
Constant *WideC;
if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
return nullptr;
- Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
- if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
+ Constant *NarrowC = getLosslessTrunc(WideC, X->getType(), CastOpc);
+ if (!NarrowC)
return nullptr;
Y = NarrowC;
}
@@ -1940,7 +2014,7 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
if (NumVarIndices != Src->getNumIndices()) {
// FIXME: getIndexedOffsetInType() does not handled scalable vectors.
- if (isa<ScalableVectorType>(BaseType))
+ if (BaseType->isScalableTy())
return nullptr;
SmallVector<Value *> ConstantIndices;
@@ -2048,12 +2122,116 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
return nullptr;
}
+Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
+ BuilderTy *Builder,
+ bool &DoesConsume, unsigned Depth) {
+ static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
+ // ~(~(X)) -> X.
+ Value *A, *B;
+ if (match(V, m_Not(m_Value(A)))) {
+ DoesConsume = true;
+ return A;
+ }
+
+ Constant *C;
+ // Constants can be considered to be not'ed values.
+ if (match(V, m_ImmConstant(C)))
+ return ConstantExpr::getNot(C);
+
+ if (Depth++ >= MaxAnalysisRecursionDepth)
+ return nullptr;
+
+ // The rest of the cases require that we invert all uses so don't bother
+ // doing the analysis if we know we can't use the result.
+ if (!WillInvertAllUses)
+ return nullptr;
+
+ // Compares can be inverted if all of their uses are being modified to use
+ // the ~V.
+ if (auto *I = dyn_cast<CmpInst>(V)) {
+ if (Builder != nullptr)
+ return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
+ I->getOperand(1));
+ return NonNull;
+ }
+
+ // If `V` is of the form `A + B` then `-1 - V` can be folded into
+ // `(-1 - B) - A` if we are willing to invert all of the uses.
+ if (match(V, m_Add(m_Value(A), m_Value(B)))) {
+ if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
+ DoesConsume, Depth))
+ return Builder ? Builder->CreateSub(BV, A) : NonNull;
+ if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+ DoesConsume, Depth))
+ return Builder ? Builder->CreateSub(AV, B) : NonNull;
+ return nullptr;
+ }
+
+ // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
+ // into `A ^ B` if we are willing to invert all of the uses.
+ if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
+ if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
+ DoesConsume, Depth))
+ return Builder ? Builder->CreateXor(A, BV) : NonNull;
+ if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+ DoesConsume, Depth))
+ return Builder ? Builder->CreateXor(AV, B) : NonNull;
+ return nullptr;
+ }
+
+ // If `V` is of the form `B - A` then `-1 - V` can be folded into
+ // `A + (-1 - B)` if we are willing to invert all of the uses.
+ if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
+ if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+ DoesConsume, Depth))
+ return Builder ? Builder->CreateAdd(AV, B) : NonNull;
+ return nullptr;
+ }
+
+ // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
+ // into `A s>> B` if we are willing to invert all of the uses.
+ if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
+ if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+ DoesConsume, Depth))
+ return Builder ? Builder->CreateAShr(AV, B) : NonNull;
+ return nullptr;
+ }
+
+ Value *Cond;
+ // LogicOps are special in that we canonicalize them at the cost of an
+ // instruction.
+ bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
+ !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));
+ // Selects/min/max with invertible operands are freely invertible
+ if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
+ if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
+ DoesConsume, Depth))
+ return nullptr;
+ if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+ DoesConsume, Depth)) {
+ if (Builder != nullptr) {
+ Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
+ DoesConsume, Depth);
+ assert(NotB != nullptr &&
+ "Unable to build inverted value for known freely invertable op");
+ if (auto *II = dyn_cast<IntrinsicInst>(V))
+ return Builder->CreateBinaryIntrinsic(
+ getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
+ return Builder->CreateSelect(Cond, NotA, NotB);
+ }
+ return NonNull;
+ }
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Value *PtrOp = GEP.getOperand(0);
SmallVector<Value *, 8> Indices(GEP.indices());
Type *GEPType = GEP.getType();
Type *GEPEltType = GEP.getSourceElementType();
- bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
+ bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);
@@ -2221,7 +2399,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
NewGEP->setOperand(DI, NewPN);
}
- NewGEP->insertInto(GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
+ NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
return replaceOperand(GEP, 0, NewGEP);
}
@@ -2264,11 +2442,43 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
}
}
-
// We do not handle pointer-vector geps here.
if (GEPType->isVectorTy())
return nullptr;
+ if (GEP.getNumIndices() == 1) {
+ // Try to replace ADD + GEP with GEP + GEP.
+ Value *Idx1, *Idx2;
+ if (match(GEP.getOperand(1),
+ m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
+ // %idx = add i64 %idx1, %idx2
+ // %gep = getelementptr i32, ptr %ptr, i64 %idx
+ // as:
+ // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
+ // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
+ auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(),
+ GEP.getPointerOperand(), Idx1);
+ return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,
+ Idx2);
+ }
+ ConstantInt *C;
+ if (match(GEP.getOperand(1), m_OneUse(m_SExt(m_OneUse(m_NSWAdd(
+ m_Value(Idx1), m_ConstantInt(C))))))) {
+ // %add = add nsw i32 %idx1, idx2
+ // %sidx = sext i32 %add to i64
+ // %gep = getelementptr i32, ptr %ptr, i64 %sidx
+ // as:
+ // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
+ // %newgep = getelementptr i32, ptr %newptr, i32 idx2
+ auto *NewPtr = Builder.CreateGEP(
+ GEP.getResultElementType(), GEP.getPointerOperand(),
+ Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()));
+ return GetElementPtrInst::Create(
+ GEP.getResultElementType(), NewPtr,
+ Builder.CreateSExt(C, GEP.getOperand(1)->getType()));
+ }
+ }
+
if (!GEP.isInBounds()) {
unsigned IdxWidth =
DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
@@ -2362,6 +2572,26 @@ static bool isAllocSiteRemovable(Instruction *AI,
unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
return false;
+
+ // Do not fold compares to aligned_alloc calls, as they may have to
+ // return null in case the required alignment cannot be satisfied,
+ // unless we can prove that both alignment and size are valid.
+ auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
+ // Check if alignment and size of a call to aligned_alloc is valid,
+ // that is alignment is a power-of-2 and the size is a multiple of the
+ // alignment.
+ const APInt *Alignment;
+ const APInt *Size;
+ return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
+ match(CB->getArgOperand(1), m_APInt(Size)) &&
+ Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
+ };
+ auto *CB = dyn_cast<CallBase>(AI);
+ LibFunc TheLibFunc;
+ if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
+ TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
+ !AlignmentAndSizeKnownValid(CB))
+ return false;
Users.emplace_back(I);
continue;
}
@@ -2451,9 +2681,10 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
// If we are removing an alloca with a dbg.declare, insert dbg.value calls
// before each store.
SmallVector<DbgVariableIntrinsic *, 8> DVIs;
+ SmallVector<DPValue *, 8> DPVs;
std::unique_ptr<DIBuilder> DIB;
if (isa<AllocaInst>(MI)) {
- findDbgUsers(DVIs, &MI);
+ findDbgUsers(DVIs, &MI, &DPVs);
DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
}
@@ -2493,6 +2724,9 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
for (auto *DVI : DVIs)
if (DVI->isAddressOfVariable())
ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
+ for (auto *DPV : DPVs)
+ if (DPV->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DPV, SI, *DIB);
} else {
// Casts, GEP, or anything else: we're about to delete this instruction,
// so it can not have any valid uses.
@@ -2531,9 +2765,15 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
// If there is a dead store to `%a` in @trivially_inlinable_no_op, the
// "arg0" dbg.value may be stale after the call. However, failing to remove
// the DW_OP_deref dbg.value causes large gaps in location coverage.
+ //
+ // FIXME: the Assignment Tracking project has now likely made this
+ // redundant (and it's sometimes harmful).
for (auto *DVI : DVIs)
if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
DVI->eraseFromParent();
+ for (auto *DPV : DPVs)
+ if (DPV->isAddressOfVariable() || DPV->getExpression()->startsWithDeref())
+ DPV->eraseFromParent();
return eraseInstFromFunction(MI);
}
@@ -2612,7 +2852,7 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
if (&Instr == FreeInstrBBTerminator)
break;
- Instr.moveBefore(TI);
+ Instr.moveBeforePreserving(TI);
}
assert(FreeInstrBB->size() == 1 &&
"Only the branch instruction should remain");
@@ -2746,55 +2986,77 @@ Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
return nullptr;
}
+void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To,
+ SmallVectorImpl<BasicBlock *> &Worklist) {
+ if (!DeadEdges.insert({From, To}).second)
+ return;
+
+ // Replace phi node operands in successor with poison.
+ for (PHINode &PN : To->phis())
+ for (Use &U : PN.incoming_values())
+ if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
+ replaceUse(U, PoisonValue::get(PN.getType()));
+ addToWorklist(&PN);
+ MadeIRChange = true;
+ }
+
+ Worklist.push_back(To);
+}
+
// Under the assumption that I is unreachable, remove it and following
-// instructions.
-bool InstCombinerImpl::handleUnreachableFrom(Instruction *I) {
- bool Changed = false;
+// instructions. Changes are reported directly to MadeIRChange.
+void InstCombinerImpl::handleUnreachableFrom(
+ Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) {
BasicBlock *BB = I->getParent();
for (Instruction &Inst : make_early_inc_range(
make_range(std::next(BB->getTerminator()->getReverseIterator()),
std::next(I->getReverseIterator())))) {
if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
- Changed = true;
+ MadeIRChange = true;
}
if (Inst.isEHPad() || Inst.getType()->isTokenTy())
continue;
+ // RemoveDIs: erase debug-info on this instruction manually.
+ Inst.dropDbgValues();
eraseInstFromFunction(Inst);
- Changed = true;
+ MadeIRChange = true;
}
- // Replace phi node operands in successor blocks with poison.
+ // RemoveDIs: to match behaviour in dbg.value mode, drop debug-info on
+ // terminator too.
+ BB->getTerminator()->dropDbgValues();
+
+ // Handle potentially dead successors.
for (BasicBlock *Succ : successors(BB))
- for (PHINode &PN : Succ->phis())
- for (Use &U : PN.incoming_values())
- if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
- replaceUse(U, PoisonValue::get(PN.getType()));
- addToWorklist(&PN);
- Changed = true;
- }
+ addDeadEdge(BB, Succ, Worklist);
+}
- // TODO: Successor blocks may also be dead.
- return Changed;
+void InstCombinerImpl::handlePotentiallyDeadBlocks(
+ SmallVectorImpl<BasicBlock *> &Worklist) {
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
+ return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
+ }))
+ continue;
+
+ handleUnreachableFrom(&BB->front(), Worklist);
+ }
}
-bool InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
+void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
BasicBlock *LiveSucc) {
- bool Changed = false;
+ SmallVector<BasicBlock *> Worklist;
for (BasicBlock *Succ : successors(BB)) {
// The live successor isn't dead.
if (Succ == LiveSucc)
continue;
- if (!all_of(predecessors(Succ), [&](BasicBlock *Pred) {
- return DT.dominates(BasicBlockEdge(BB, Succ),
- BasicBlockEdge(Pred, Succ));
- }))
- continue;
-
- Changed |= handleUnreachableFrom(&Succ->front());
+ addDeadEdge(BB, Succ, Worklist);
}
- return Changed;
+
+ handlePotentiallyDeadBlocks(Worklist);
}
Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
@@ -2840,14 +3102,17 @@ Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
return &BI;
}
- if (isa<UndefValue>(Cond) &&
- handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr))
- return &BI;
- if (auto *CI = dyn_cast<ConstantInt>(Cond))
- if (handlePotentiallyDeadSuccessors(BI.getParent(),
- BI.getSuccessor(!CI->getZExtValue())))
- return &BI;
+ if (isa<UndefValue>(Cond)) {
+ handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
+ return nullptr;
+ }
+ if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
+ handlePotentiallyDeadSuccessors(BI.getParent(),
+ BI.getSuccessor(!CI->getZExtValue()));
+ return nullptr;
+ }
+ DC.registerBranch(&BI);
return nullptr;
}
@@ -2866,14 +3131,6 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
return replaceOperand(SI, 0, Op0);
}
- if (isa<UndefValue>(Cond) &&
- handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr))
- return &SI;
- if (auto *CI = dyn_cast<ConstantInt>(Cond))
- if (handlePotentiallyDeadSuccessors(
- SI.getParent(), SI.findCaseValue(CI)->getCaseSuccessor()))
- return &SI;
-
KnownBits Known = computeKnownBits(Cond, 0, &SI);
unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
@@ -2906,6 +3163,16 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
return replaceOperand(SI, 0, NewCond);
}
+ if (isa<UndefValue>(Cond)) {
+ handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
+ return nullptr;
+ }
+ if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
+ handlePotentiallyDeadSuccessors(SI.getParent(),
+ SI.findCaseValue(CI)->getCaseSuccessor());
+ return nullptr;
+ }
+
return nullptr;
}
@@ -3532,7 +3799,7 @@ Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
Value *StartV = StartU->get();
BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
- // We can't insert freeze if the the start value is the result of the
+ // We can't insert freeze if the start value is the result of the
// terminator (e.g. an invoke).
if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
return nullptr;
@@ -3583,19 +3850,27 @@ bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
// *all* uses if the operand is an invoke/callbr and the use is in a phi on
// the normal/default destination. This is why the domination check in the
// replacement below is still necessary.
- Instruction *MoveBefore;
+ BasicBlock::iterator MoveBefore;
if (isa<Argument>(Op)) {
MoveBefore =
- &*FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
+ FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
} else {
- MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef();
- if (!MoveBefore)
+ auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
+ if (!MoveBeforeOpt)
return false;
+ MoveBefore = *MoveBeforeOpt;
}
+ // Don't move to the position of a debug intrinsic.
+ if (isa<DbgInfoIntrinsic>(MoveBefore))
+ MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator();
+ // Re-point iterator to come after any debug-info records, if we're
+ // running in "RemoveDIs" mode
+ MoveBefore.setHeadBit(false);
+
bool Changed = false;
- if (&FI != MoveBefore) {
- FI.moveBefore(MoveBefore);
+ if (&FI != &*MoveBefore) {
+ FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
Changed = true;
}
@@ -3798,7 +4073,7 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
/// the new position.
BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
- I->moveBefore(&*InsertPos);
+ I->moveBefore(*DestBlock, InsertPos);
++NumSunkInst;
// Also sink all related debug uses from the source basic block. Otherwise we
@@ -3808,10 +4083,19 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
// here, but that computation has been sunk.
SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
findDbgUsers(DbgUsers, I);
- // Process the sinking DbgUsers in reverse order, as we only want to clone the
- // last appearing debug intrinsic for each given variable.
+
+ // For all debug values in the destination block, the sunk instruction
+ // will still be available, so they do not need to be dropped.
+ SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSalvage;
+ SmallVector<DPValue *, 2> DPValuesToSalvage;
+ for (auto &DbgUser : DbgUsers)
+ if (DbgUser->getParent() != DestBlock)
+ DbgUsersToSalvage.push_back(DbgUser);
+
+ // Process the sinking DbgUsersToSalvage in reverse order, as we only want
+ // to clone the last appearing debug intrinsic for each given variable.
SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSink;
- for (DbgVariableIntrinsic *DVI : DbgUsers)
+ for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
if (DVI->getParent() == SrcBlock)
DbgUsersToSink.push_back(DVI);
llvm::sort(DbgUsersToSink,
@@ -3847,7 +4131,10 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
// Perform salvaging without the clones, then sink the clones.
if (!DIIClones.empty()) {
- salvageDebugInfoForDbgValues(*I, DbgUsers);
+ // RemoveDIs: pass in empty vector of DPValues until we get to instrumenting
+ // this pass.
+ SmallVector<DPValue *, 1> DummyDPValues;
+ salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, DummyDPValues);
// The clones are in reverse order of original appearance, reverse again to
// maintain the original order.
for (auto &DIIClone : llvm::reverse(DIIClones)) {
@@ -4093,43 +4380,52 @@ public:
}
};
-/// Populate the IC worklist from a function, by walking it in depth-first
-/// order and adding all reachable code to the worklist.
+/// Populate the IC worklist from a function, by walking it in reverse
+/// post-order and adding all reachable code to the worklist.
///
/// This has a couple of tricks to make the code faster and more powerful. In
/// particular, we constant fold and DCE instructions as we go, to avoid adding
/// them to the worklist (this significantly speeds up instcombine on code where
/// many instructions are dead or constant). Additionally, if we find a branch
/// whose condition is a known constant, we only visit the reachable successors.
-static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- InstructionWorklist &ICWorklist) {
+bool InstCombinerImpl::prepareWorklist(
+ Function &F, ReversePostOrderTraversal<BasicBlock *> &RPOT) {
bool MadeIRChange = false;
- SmallPtrSet<BasicBlock *, 32> Visited;
- SmallVector<BasicBlock*, 256> Worklist;
- Worklist.push_back(&F.front());
-
+ SmallPtrSet<BasicBlock *, 32> LiveBlocks;
SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
DenseMap<Constant *, Constant *> FoldedConstants;
AliasScopeTracker SeenAliasScopes;
- do {
- BasicBlock *BB = Worklist.pop_back_val();
+ auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
+ for (BasicBlock *Succ : successors(BB))
+ if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
+ for (PHINode &PN : Succ->phis())
+ for (Use &U : PN.incoming_values())
+ if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
+ U.set(PoisonValue::get(PN.getType()));
+ MadeIRChange = true;
+ }
+ };
- // We have now visited this block! If we've already been here, ignore it.
- if (!Visited.insert(BB).second)
+ for (BasicBlock *BB : RPOT) {
+ if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
+ return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
+ })) {
+ HandleOnlyLiveSuccessor(BB, nullptr);
continue;
+ }
+ LiveBlocks.insert(BB);
for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
// ConstantProp instruction if trivially constant.
if (!Inst.use_empty() &&
(Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
- if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
+ if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
<< '\n');
Inst.replaceAllUsesWith(C);
++NumConstProp;
- if (isInstructionTriviallyDead(&Inst, TLI))
+ if (isInstructionTriviallyDead(&Inst, &TLI))
Inst.eraseFromParent();
MadeIRChange = true;
continue;
@@ -4143,7 +4439,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
auto *C = cast<Constant>(U);
Constant *&FoldRes = FoldedConstants[C];
if (!FoldRes)
- FoldRes = ConstantFoldConstant(C, DL, TLI);
+ FoldRes = ConstantFoldConstant(C, DL, &TLI);
if (FoldRes != C) {
LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
@@ -4163,37 +4459,39 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
}
}
- // Recursively visit successors. If this is a branch or switch on a
- // constant, only visit the reachable successor.
+ // If this is a branch or switch on a constant, mark only the single
+ // live successor. Otherwise assume all successors are live.
Instruction *TI = BB->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
- if (isa<UndefValue>(BI->getCondition()))
+ if (isa<UndefValue>(BI->getCondition())) {
// Branch on undef is UB.
+ HandleOnlyLiveSuccessor(BB, nullptr);
continue;
+ }
if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
bool CondVal = Cond->getZExtValue();
- BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
- Worklist.push_back(ReachableBB);
+ HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
continue;
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- if (isa<UndefValue>(SI->getCondition()))
+ if (isa<UndefValue>(SI->getCondition())) {
// Switch on undef is UB.
+ HandleOnlyLiveSuccessor(BB, nullptr);
continue;
+ }
if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
- Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
+ HandleOnlyLiveSuccessor(BB,
+ SI->findCaseValue(Cond)->getCaseSuccessor());
continue;
}
}
-
- append_range(Worklist, successors(TI));
- } while (!Worklist.empty());
+ }
// Remove instructions inside unreachable blocks. This prevents the
// instcombine code from having to deal with some bad special cases, and
// reduces use counts of instructions.
for (BasicBlock &BB : F) {
- if (Visited.count(&BB))
+ if (LiveBlocks.count(&BB))
continue;
unsigned NumDeadInstInBB;
@@ -4210,11 +4508,11 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// of the function down. This jives well with the way that it adds all uses
// of instructions to the worklist after doing a transformation, thus avoiding
// some N^2 behavior in pathological cases.
- ICWorklist.reserve(InstrsForInstructionWorklist.size());
+ Worklist.reserve(InstrsForInstructionWorklist.size());
for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
// DCE instruction if trivially dead. As we iterate in reverse program
// order here, we will clean up whole chains of dead instructions.
- if (isInstructionTriviallyDead(Inst, TLI) ||
+ if (isInstructionTriviallyDead(Inst, &TLI) ||
SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
++NumDeadInst;
LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
@@ -4224,7 +4522,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
continue;
}
- ICWorklist.push(Inst);
+ Worklist.push(Inst);
}
return MadeIRChange;
@@ -4234,7 +4532,7 @@ static bool combineInstructionsOverFunction(
Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
+ ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts) {
auto &DL = F.getParent()->getDataLayout();
/// Builder - This is an IRBuilder that automatically inserts new
@@ -4247,6 +4545,8 @@ static bool combineInstructionsOverFunction(
AC.registerAssumption(Assume);
}));
+ ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
+
// Lower dbg.declare intrinsics otherwise their value may be clobbered
// by instcombiner.
bool MadeIRChange = false;
@@ -4256,35 +4556,33 @@ static bool combineInstructionsOverFunction(
// Iterate while there is work to do.
unsigned Iteration = 0;
while (true) {
- ++NumWorklistIterations;
++Iteration;
- if (Iteration > InfiniteLoopDetectionThreshold) {
- report_fatal_error(
- "Instruction Combining seems stuck in an infinite loop after " +
- Twine(InfiniteLoopDetectionThreshold) + " iterations.");
- }
-
- if (Iteration > MaxIterations) {
- LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
+ if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
+ LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
<< " on " << F.getName()
- << " reached; stopping before reaching a fixpoint\n");
+ << " reached; stopping without verifying fixpoint\n");
break;
}
+ ++NumWorklistIterations;
LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getName() << "\n");
- MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
-
InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
ORE, BFI, PSI, DL, LI);
IC.MaxArraySizeForCombine = MaxArraySize;
-
- if (!IC.run())
+ bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
+ MadeChangeInThisIteration |= IC.run();
+ if (!MadeChangeInThisIteration)
break;
MadeIRChange = true;
+ if (Iteration > Opts.MaxIterations) {
+ report_fatal_error(
+ "Instruction Combining did not reach a fixpoint after " +
+ Twine(Opts.MaxIterations) + " iterations");
+ }
}
if (Iteration == 1)
@@ -4307,7 +4605,8 @@ void InstCombinePass::printPipeline(
OS, MapClassName2PassName);
OS << '<';
OS << "max-iterations=" << Options.MaxIterations << ";";
- OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info";
+ OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
+ OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
OS << '>';
}
@@ -4333,7 +4632,7 @@ PreservedAnalyses InstCombinePass::run(Function &F,
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
- BFI, PSI, Options.MaxIterations, LI))
+ BFI, PSI, LI, Options))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -4382,8 +4681,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
nullptr;
return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
- BFI, PSI,
- InstCombineDefaultMaxIterations, LI);
+ BFI, PSI, LI, InstCombineOptions());
}
char InstructionCombiningPass::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index f4bf6db569f2..6468d07b4f4f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -201,8 +201,8 @@ static cl::opt<bool> ClRecover(
static cl::opt<bool> ClInsertVersionCheck(
"asan-guard-against-version-mismatch",
- cl::desc("Guard against compiler/runtime version mismatch."),
- cl::Hidden, cl::init(true));
+ cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden,
+ cl::init(true));
// This flag may need to be replaced with -f[no-]asan-reads.
static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
@@ -323,10 +323,9 @@ static cl::opt<unsigned> ClRealignStack(
static cl::opt<int> ClInstrumentationWithCallsThreshold(
"asan-instrumentation-with-call-threshold",
- cl::desc(
- "If the function being instrumented contains more than "
- "this number of memory accesses, use callbacks instead of "
- "inline checks (-1 means never use callbacks)."),
+ cl::desc("If the function being instrumented contains more than "
+ "this number of memory accesses, use callbacks instead of "
+ "inline checks (-1 means never use callbacks)."),
cl::Hidden, cl::init(7000));
static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
@@ -491,7 +490,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
bool IsMIPS32 = TargetTriple.isMIPS32();
bool IsMIPS64 = TargetTriple.isMIPS64();
bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb();
- bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64;
+ bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 ||
+ TargetTriple.getArch() == Triple::aarch64_be;
bool IsLoongArch64 = TargetTriple.isLoongArch64();
bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64;
bool IsWindows = TargetTriple.isOSWindows();
@@ -644,8 +644,9 @@ namespace {
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer {
AddressSanitizer(Module &M, const StackSafetyGlobalInfo *SSGI,
- bool CompileKernel = false, bool Recover = false,
- bool UseAfterScope = false,
+ int InstrumentationWithCallsThreshold,
+ uint32_t MaxInlinePoisoningSize, bool CompileKernel = false,
+ bool Recover = false, bool UseAfterScope = false,
AsanDetectStackUseAfterReturnMode UseAfterReturn =
AsanDetectStackUseAfterReturnMode::Runtime)
: CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan
@@ -654,12 +655,19 @@ struct AddressSanitizer {
UseAfterScope(UseAfterScope || ClUseAfterScope),
UseAfterReturn(ClUseAfterReturn.getNumOccurrences() ? ClUseAfterReturn
: UseAfterReturn),
- SSGI(SSGI) {
+ SSGI(SSGI),
+ InstrumentationWithCallsThreshold(
+ ClInstrumentationWithCallsThreshold.getNumOccurrences() > 0
+ ? ClInstrumentationWithCallsThreshold
+ : InstrumentationWithCallsThreshold),
+ MaxInlinePoisoningSize(ClMaxInlinePoisoningSize.getNumOccurrences() > 0
+ ? ClMaxInlinePoisoningSize
+ : MaxInlinePoisoningSize) {
C = &(M.getContext());
DL = &M.getDataLayout();
LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
- Int8PtrTy = Type::getInt8PtrTy(*C);
+ PtrTy = PointerType::getUnqual(*C);
Int32Ty = Type::getInt32Ty(*C);
TargetTriple = Triple(M.getTargetTriple());
@@ -751,8 +759,8 @@ private:
bool UseAfterScope;
AsanDetectStackUseAfterReturnMode UseAfterReturn;
Type *IntptrTy;
- Type *Int8PtrTy;
Type *Int32Ty;
+ PointerType *PtrTy;
ShadowMapping Mapping;
FunctionCallee AsanHandleNoReturnFunc;
FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction;
@@ -773,17 +781,22 @@ private:
FunctionCallee AMDGPUAddressShared;
FunctionCallee AMDGPUAddressPrivate;
+ int InstrumentationWithCallsThreshold;
+ uint32_t MaxInlinePoisoningSize;
};
class ModuleAddressSanitizer {
public:
- ModuleAddressSanitizer(Module &M, bool CompileKernel = false,
- bool Recover = false, bool UseGlobalsGC = true,
- bool UseOdrIndicator = true,
+ ModuleAddressSanitizer(Module &M, bool InsertVersionCheck,
+ bool CompileKernel = false, bool Recover = false,
+ bool UseGlobalsGC = true, bool UseOdrIndicator = true,
AsanDtorKind DestructorKind = AsanDtorKind::Global,
AsanCtorKind ConstructorKind = AsanCtorKind::Global)
: CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan
: CompileKernel),
+ InsertVersionCheck(ClInsertVersionCheck.getNumOccurrences() > 0
+ ? ClInsertVersionCheck
+ : InsertVersionCheck),
Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover),
UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC && !this->CompileKernel),
// Enable aliases as they should have no downside with ODR indicators.
@@ -802,10 +815,13 @@ public:
// do globals-gc.
UseCtorComdat(UseGlobalsGC && ClWithComdat && !this->CompileKernel),
DestructorKind(DestructorKind),
- ConstructorKind(ConstructorKind) {
+ ConstructorKind(ClConstructorKind.getNumOccurrences() > 0
+ ? ClConstructorKind
+ : ConstructorKind) {
C = &(M.getContext());
int LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
+ PtrTy = PointerType::getUnqual(*C);
TargetTriple = Triple(M.getTargetTriple());
Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
@@ -854,6 +870,7 @@ private:
int GetAsanVersion(const Module &M) const;
bool CompileKernel;
+ bool InsertVersionCheck;
bool Recover;
bool UseGlobalsGC;
bool UsePrivateAlias;
@@ -862,6 +879,7 @@ private:
AsanDtorKind DestructorKind;
AsanCtorKind ConstructorKind;
Type *IntptrTy;
+ PointerType *PtrTy;
LLVMContext *C;
Triple TargetTriple;
ShadowMapping Mapping;
@@ -1148,22 +1166,22 @@ AddressSanitizerPass::AddressSanitizerPass(
AsanCtorKind ConstructorKind)
: Options(Options), UseGlobalGC(UseGlobalGC),
UseOdrIndicator(UseOdrIndicator), DestructorKind(DestructorKind),
- ConstructorKind(ClConstructorKind) {}
+ ConstructorKind(ConstructorKind) {}
PreservedAnalyses AddressSanitizerPass::run(Module &M,
ModuleAnalysisManager &MAM) {
- ModuleAddressSanitizer ModuleSanitizer(M, Options.CompileKernel,
- Options.Recover, UseGlobalGC,
- UseOdrIndicator, DestructorKind,
- ConstructorKind);
+ ModuleAddressSanitizer ModuleSanitizer(
+ M, Options.InsertVersionCheck, Options.CompileKernel, Options.Recover,
+ UseGlobalGC, UseOdrIndicator, DestructorKind, ConstructorKind);
bool Modified = false;
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
const StackSafetyGlobalInfo *const SSGI =
ClUseStackSafety ? &MAM.getResult<StackSafetyGlobalAnalysis>(M) : nullptr;
for (Function &F : M) {
- AddressSanitizer FunctionSanitizer(M, SSGI, Options.CompileKernel,
- Options.Recover, Options.UseAfterScope,
- Options.UseAfterReturn);
+ AddressSanitizer FunctionSanitizer(
+ M, SSGI, Options.InstrumentationWithCallsThreshold,
+ Options.MaxInlinePoisoningSize, Options.CompileKernel, Options.Recover,
+ Options.UseAfterScope, Options.UseAfterReturn);
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
Modified |= FunctionSanitizer.instrumentFunction(F, &TLI);
}
@@ -1188,17 +1206,17 @@ static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
/// Check if \p G has been created by a trusted compiler pass.
static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
// Do not instrument @llvm.global_ctors, @llvm.used, etc.
- if (G->getName().startswith("llvm.") ||
+ if (G->getName().starts_with("llvm.") ||
// Do not instrument gcov counter arrays.
- G->getName().startswith("__llvm_gcov_ctr") ||
+ G->getName().starts_with("__llvm_gcov_ctr") ||
// Do not instrument rtti proxy symbols for function sanitizer.
- G->getName().startswith("__llvm_rtti_proxy"))
+ G->getName().starts_with("__llvm_rtti_proxy"))
return true;
// Do not instrument asan globals.
- if (G->getName().startswith(kAsanGenPrefix) ||
- G->getName().startswith(kSanCovGenPrefix) ||
- G->getName().startswith(kODRGenPrefix))
+ if (G->getName().starts_with(kAsanGenPrefix) ||
+ G->getName().starts_with(kSanCovGenPrefix) ||
+ G->getName().starts_with(kODRGenPrefix))
return true;
return false;
@@ -1232,15 +1250,13 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
InstrumentationIRBuilder IRB(MI);
if (isa<MemTransferInst>(MI)) {
- IRB.CreateCall(
- isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ IRB.CreateCall(isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy,
+ {MI->getOperand(0), MI->getOperand(1),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
} else if (isa<MemSetInst>(MI)) {
IRB.CreateCall(
AsanMemset,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ {MI->getOperand(0),
IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
}
@@ -1570,7 +1586,7 @@ void AddressSanitizer::instrumentMaskedLoadOrStore(
InstrumentedAddress = IRB.CreateExtractElement(Addr, Index);
} else if (Stride) {
Index = IRB.CreateMul(Index, Stride);
- Addr = IRB.CreateBitCast(Addr, Type::getInt8PtrTy(*C));
+ Addr = IRB.CreateBitCast(Addr, PointerType::getUnqual(*C));
InstrumentedAddress = IRB.CreateGEP(Type::getInt8Ty(*C), Addr, {Index});
} else {
InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index});
@@ -1695,9 +1711,8 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress(
return InsertBefore;
// Instrument generic addresses in supported addressspaces.
IRBuilder<> IRB(InsertBefore);
- Value *AddrLong = IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy());
- Value *IsShared = IRB.CreateCall(AMDGPUAddressShared, {AddrLong});
- Value *IsPrivate = IRB.CreateCall(AMDGPUAddressPrivate, {AddrLong});
+ Value *IsShared = IRB.CreateCall(AMDGPUAddressShared, {Addr});
+ Value *IsPrivate = IRB.CreateCall(AMDGPUAddressPrivate, {Addr});
Value *IsSharedOrPrivate = IRB.CreateOr(IsShared, IsPrivate);
Value *Cmp = IRB.CreateNot(IsSharedOrPrivate);
Value *AddrSpaceZeroLanding =
@@ -1728,7 +1743,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
IRB.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::asan_check_memaccess),
- {IRB.CreatePointerCast(Addr, Int8PtrTy),
+ {IRB.CreatePointerCast(Addr, PtrTy),
ConstantInt::get(Int32Ty, AccessInfo.Packed)});
return;
}
@@ -1869,7 +1884,7 @@ ModuleAddressSanitizer::getExcludedAliasedGlobal(const GlobalAlias &GA) const {
// When compiling the kernel, globals that are aliased by symbols prefixed
// by "__" are special and cannot be padded with a redzone.
- if (GA.getName().startswith("__"))
+ if (GA.getName().starts_with("__"))
return dyn_cast<GlobalVariable>(C->stripPointerCastsAndAliases());
return nullptr;
@@ -1939,9 +1954,9 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
// Do not instrument function pointers to initialization and termination
// routines: dynamic linker will not properly handle redzones.
- if (Section.startswith(".preinit_array") ||
- Section.startswith(".init_array") ||
- Section.startswith(".fini_array")) {
+ if (Section.starts_with(".preinit_array") ||
+ Section.starts_with(".init_array") ||
+ Section.starts_with(".fini_array")) {
return false;
}
@@ -1978,7 +1993,7 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
// those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
// them.
if (ParsedSegment == "__OBJC" ||
- (ParsedSegment == "__DATA" && ParsedSection.startswith("__objc_"))) {
+ (ParsedSegment == "__DATA" && ParsedSection.starts_with("__objc_"))) {
LLVM_DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n");
return false;
}
@@ -2006,7 +2021,7 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
if (CompileKernel) {
// Globals that prefixed by "__" are special and cannot be padded with a
// redzone.
- if (G->getName().startswith("__"))
+ if (G->getName().starts_with("__"))
return false;
}
@@ -2129,6 +2144,9 @@ ModuleAddressSanitizer::CreateMetadataGlobal(Module &M, Constant *Initializer,
M, Initializer->getType(), false, Linkage, Initializer,
Twine("__asan_global_") + GlobalValue::dropLLVMManglingEscape(OriginalName));
Metadata->setSection(getGlobalMetadataSection());
+ // Place metadata in a large section for x86-64 ELF binaries to mitigate
+ // relocation pressure.
+ setGlobalVariableLargeSection(TargetTriple, *Metadata);
return Metadata;
}
@@ -2451,7 +2469,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
G->eraseFromParent();
NewGlobals[i] = NewGlobal;
- Constant *ODRIndicator = ConstantExpr::getNullValue(IRB.getInt8PtrTy());
+ Constant *ODRIndicator = ConstantPointerNull::get(PtrTy);
GlobalValue *InstrumentedGlobal = NewGlobal;
bool CanUsePrivateAliases =
@@ -2466,8 +2484,8 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
// ODR should not happen for local linkage.
if (NewGlobal->hasLocalLinkage()) {
- ODRIndicator = ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1),
- IRB.getInt8PtrTy());
+ ODRIndicator =
+ ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1), PtrTy);
} else if (UseOdrIndicator) {
// With local aliases, we need to provide another externally visible
// symbol __odr_asan_XXX to detect ODR violation.
@@ -2591,7 +2609,7 @@ bool ModuleAddressSanitizer::instrumentModule(Module &M) {
} else {
std::string AsanVersion = std::to_string(GetAsanVersion(M));
std::string VersionCheckName =
- ClInsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : "";
+ InsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : "";
std::tie(AsanCtorFunction, std::ignore) =
createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName,
kAsanInitName, /*InitArgTypes=*/{},
@@ -2687,15 +2705,12 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T
? std::string("")
: ClMemoryAccessCallbackPrefix;
AsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy);
- AsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy);
+ PtrTy, PtrTy, PtrTy, IntptrTy);
+ AsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy", PtrTy,
+ PtrTy, PtrTy, IntptrTy);
AsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
TLI->getAttrList(C, {1}, /*Signed=*/false),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt32Ty(), IntptrTy);
+ PtrTy, PtrTy, IRB.getInt32Ty(), IntptrTy);
AsanHandleNoReturnFunc =
M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy());
@@ -2708,10 +2723,10 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T
AsanShadowGlobal = M.getOrInsertGlobal("__asan_shadow",
ArrayType::get(IRB.getInt8Ty(), 0));
- AMDGPUAddressShared = M.getOrInsertFunction(
- kAMDGPUAddressSharedName, IRB.getInt1Ty(), IRB.getInt8PtrTy());
- AMDGPUAddressPrivate = M.getOrInsertFunction(
- kAMDGPUAddressPrivateName, IRB.getInt1Ty(), IRB.getInt8PtrTy());
+ AMDGPUAddressShared =
+ M.getOrInsertFunction(kAMDGPUAddressSharedName, IRB.getInt1Ty(), PtrTy);
+ AMDGPUAddressPrivate =
+ M.getOrInsertFunction(kAMDGPUAddressPrivateName, IRB.getInt1Ty(), PtrTy);
}
bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
@@ -2802,7 +2817,7 @@ bool AddressSanitizer::instrumentFunction(Function &F,
return false;
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
- if (F.getName().startswith("__asan_")) return false;
+ if (F.getName().starts_with("__asan_")) return false;
bool FunctionModified = false;
@@ -2893,9 +2908,9 @@ bool AddressSanitizer::instrumentFunction(Function &F,
}
}
- bool UseCalls = (ClInstrumentationWithCallsThreshold >= 0 &&
+ bool UseCalls = (InstrumentationWithCallsThreshold >= 0 &&
OperandsToInstrument.size() + IntrinToInstrument.size() >
- (unsigned)ClInstrumentationWithCallsThreshold);
+ (unsigned)InstrumentationWithCallsThreshold);
const DataLayout &DL = F.getParent()->getDataLayout();
ObjectSizeOpts ObjSizeOpts;
ObjSizeOpts.RoundToAlign = true;
@@ -3037,7 +3052,7 @@ void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
Value *Poison = IRB.getIntN(StoreSizeInBytes * 8, Val);
IRB.CreateAlignedStore(
- Poison, IRB.CreateIntToPtr(Ptr, Poison->getType()->getPointerTo()),
+ Poison, IRB.CreateIntToPtr(Ptr, PointerType::getUnqual(Poison->getContext())),
Align(1));
i += StoreSizeInBytes;
@@ -3069,7 +3084,7 @@ void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
for (; j < End && ShadowMask[j] && Val == ShadowBytes[j]; ++j) {
}
- if (j - i >= ClMaxInlinePoisoningSize) {
+ if (j - i >= ASan.MaxInlinePoisoningSize) {
copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase);
IRB.CreateCall(AsanSetShadowFunc[Val],
{IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)),
@@ -3490,7 +3505,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm);
IRBuilder<> IRBPoison(ThenTerm);
- if (StackMallocIdx <= 4) {
+ if (ASan.MaxInlinePoisoningSize != 0 && StackMallocIdx <= 4) {
int ClassSize = kMinStackMallocSize << StackMallocIdx;
ShadowAfterReturn.resize(ClassSize / L.Granularity,
kAsanStackUseAfterReturnMagic);
@@ -3503,7 +3518,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
IntptrTy, IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
IRBPoison.CreateStore(
Constant::getNullValue(IRBPoison.getInt8Ty()),
- IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
+ IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getPtrTy()));
} else {
// For larger frames call __asan_stack_free_*.
IRBPoison.CreateCall(
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 709095184af5..ee5b81960417 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -37,6 +37,9 @@ using namespace llvm;
static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
cl::desc("Use one trap block per function"));
+static cl::opt<bool> DebugTrapBB("bounds-checking-unique-traps",
+ cl::desc("Always use one trap per check"));
+
STATISTIC(ChecksAdded, "Bounds checks added");
STATISTIC(ChecksSkipped, "Bounds checks skipped");
STATISTIC(ChecksUnable, "Bounds checks unable to add");
@@ -180,19 +183,27 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
// will create a fresh block every time it is called.
BasicBlock *TrapBB = nullptr;
auto GetTrapBB = [&TrapBB](BuilderTy &IRB) {
- if (TrapBB && SingleTrapBB)
- return TrapBB;
-
Function *Fn = IRB.GetInsertBlock()->getParent();
- // FIXME: This debug location doesn't make a lot of sense in the
- // `SingleTrapBB` case.
auto DebugLoc = IRB.getCurrentDebugLocation();
IRBuilder<>::InsertPointGuard Guard(IRB);
+
+ if (TrapBB && SingleTrapBB && !DebugTrapBB)
+ return TrapBB;
+
TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
IRB.SetInsertPoint(TrapBB);
- auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
- CallInst *TrapCall = IRB.CreateCall(F, {});
+ Intrinsic::ID IntrID = DebugTrapBB ? Intrinsic::ubsantrap : Intrinsic::trap;
+ auto *F = Intrinsic::getDeclaration(Fn->getParent(), IntrID);
+
+ CallInst *TrapCall;
+ if (DebugTrapBB) {
+ TrapCall =
+ IRB.CreateCall(F, ConstantInt::get(IRB.getInt8Ty(), Fn->size()));
+ } else {
+ TrapCall = IRB.CreateCall(F, {});
+ }
+
TrapCall->setDoesNotReturn();
TrapCall->setDoesNotThrow();
TrapCall->setDebugLoc(DebugLoc);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index d53e12ad1ff5..e2e5f21b376b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -66,7 +66,7 @@ static bool runCGProfilePass(
if (F.isDeclaration() || !F.getEntryCount())
continue;
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
- if (BFI.getEntryFreq() == 0)
+ if (BFI.getEntryFreq() == BlockFrequency(0))
continue;
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
for (auto &BB : F) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 597cec8e61c9..0a3d8d6000cf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1593,8 +1593,8 @@ static void insertTrivialPHIs(CHRScope *Scope,
// Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
// ExitBlock. Replace I with the new phi in UI unless UI is another
// phi at ExitBlock.
- PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
- &ExitBlock->front());
+ PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "");
+ PN->insertBefore(ExitBlock->begin());
for (BasicBlock *Pred : predecessors(ExitBlock)) {
PN->addIncoming(&I, Pred);
}
@@ -1780,17 +1780,10 @@ void CHR::cloneScopeBlocks(CHRScope *Scope,
// Unreachable predecessors will not be cloned and will not have an edge
// to the cloned block. As such, also remove them from any phi nodes.
- // To avoid iterator invalidation, first collect the dead predecessors
- // from the first phi node, and then perform the actual removal.
- if (auto *FirstPN = dyn_cast<PHINode>(NewBB->begin())) {
- SmallVector<BasicBlock *> DeadPreds;
- for (BasicBlock *Pred : FirstPN->blocks())
- if (!DT.isReachableFromEntry(Pred))
- DeadPreds.push_back(Pred);
- for (PHINode &PN : make_early_inc_range(NewBB->phis()))
- for (BasicBlock *Pred : DeadPreds)
- PN.removeIncomingValue(Pred);
- }
+ for (PHINode &PN : make_early_inc_range(NewBB->phis()))
+ PN.removeIncomingValueIf([&](unsigned Idx) {
+ return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx));
+ });
}
// Place the cloned blocks right after the original blocks (right before the
@@ -1885,8 +1878,7 @@ void CHR::fixupBranchesAndSelects(CHRScope *Scope,
static_cast<uint32_t>(CHRBranchBias.scale(1000)),
static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
};
- MDBuilder MDB(F.getContext());
- MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ setBranchWeights(*MergedBR, Weights);
CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
<< "\n");
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 8caee5bed8ed..2ba127bba6f6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -564,7 +564,7 @@ class DataFlowSanitizer {
/// getShadowTy([n x T]) = [n x getShadowTy(T)]
/// getShadowTy(other type) = i16
Type *getShadowTy(Type *OrigTy);
- /// Returns the shadow type of of V's type.
+ /// Returns the shadow type of V's type.
Type *getShadowTy(Value *V);
const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
@@ -1145,7 +1145,7 @@ bool DataFlowSanitizer::initializeModule(Module &M) {
Mod = &M;
Ctx = &M.getContext();
- Int8Ptr = Type::getInt8PtrTy(*Ctx);
+ Int8Ptr = PointerType::getUnqual(*Ctx);
OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
OriginPtrTy = PointerType::getUnqual(OriginTy);
PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
@@ -1162,19 +1162,19 @@ bool DataFlowSanitizer::initializeModule(Module &M) {
FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
/*isVarArg=*/false);
DFSanUnimplementedFnTy = FunctionType::get(
- Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+ Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
Type *DFSanWrapperExternWeakNullArgs[2] = {Int8Ptr, Int8Ptr};
DFSanWrapperExternWeakNullFnTy =
FunctionType::get(Type::getVoidTy(*Ctx), DFSanWrapperExternWeakNullArgs,
/*isVarArg=*/false);
Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
- Type::getInt8PtrTy(*Ctx), IntptrTy};
+ PointerType::getUnqual(*Ctx), IntptrTy};
DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
DFSanSetLabelArgs, /*isVarArg=*/false);
DFSanNonzeroLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), std::nullopt,
/*isVarArg=*/false);
DFSanVarargWrapperFnTy = FunctionType::get(
- Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+ Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);
DFSanConditionalCallbackFnTy =
FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
/*isVarArg=*/false);
@@ -1288,7 +1288,7 @@ void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB,
// for a extern weak function, add a check here to help identify the issue.
if (GlobalValue::isExternalWeakLinkage(F->getLinkage())) {
std::vector<Value *> Args;
- Args.push_back(IRB.CreatePointerCast(F, IRB.getInt8PtrTy()));
+ Args.push_back(F);
Args.push_back(IRB.CreateGlobalStringPtr(F->getName()));
IRB.CreateCall(DFSanWrapperExternWeakNullFn, Args);
}
@@ -1553,7 +1553,7 @@ bool DataFlowSanitizer::runImpl(
assert(isa<Function>(C) && "Personality routine is not a function!");
Function *F = cast<Function>(C);
if (!isInstrumented(F))
- llvm::erase_value(FnsToInstrument, F);
+ llvm::erase(FnsToInstrument, F);
}
}
@@ -1575,7 +1575,7 @@ bool DataFlowSanitizer::runImpl(
// below will take care of instrumenting it.
Function *NewF =
buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
- GA.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA.getType()));
+ GA.replaceAllUsesWith(NewF);
NewF->takeName(&GA);
GA.eraseFromParent();
FnsToInstrument.push_back(NewF);
@@ -1622,9 +1622,6 @@ bool DataFlowSanitizer::runImpl(
WrapperLinkage, FT);
NewF->removeFnAttrs(ReadOnlyNoneAttrs);
- Value *WrappedFnCst =
- ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
-
// Extern weak functions can sometimes be null at execution time.
// Code will sometimes check if an extern weak function is null.
// This could look something like:
@@ -1657,9 +1654,9 @@ bool DataFlowSanitizer::runImpl(
}
return true;
};
- F.replaceUsesWithIf(WrappedFnCst, IsNotCmpUse);
+ F.replaceUsesWithIf(NewF, IsNotCmpUse);
- UnwrappedFnMap[WrappedFnCst] = &F;
+ UnwrappedFnMap[NewF] = &F;
*FI = NewF;
if (!F.isDeclaration()) {
@@ -2273,8 +2270,7 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
IRBuilder<> IRB(Pos);
CallInst *Call =
IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
- {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
- ConstantInt::get(DFS.IntptrTy, Size)});
+ {Addr, ConstantInt::get(DFS.IntptrTy, Size)});
Call->addRetAttr(Attribute::ZExt);
return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
DFS.PrimitiveShadowTy),
@@ -2436,9 +2432,9 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
if (ClEventCallbacks) {
IRBuilder<> IRB(Pos);
- Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
+ Value *Addr = LI.getPointerOperand();
CallInst *CI =
- IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
+ IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr});
CI->addParamAttr(0, Attribute::ZExt);
}
@@ -2530,10 +2526,9 @@ void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size,
}
if (shouldInstrumentWithCall()) {
- IRB.CreateCall(DFS.DFSanMaybeStoreOriginFn,
- {CollapsedShadow,
- IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
- ConstantInt::get(DFS.IntptrTy, Size), Origin});
+ IRB.CreateCall(
+ DFS.DFSanMaybeStoreOriginFn,
+ {CollapsedShadow, Addr, ConstantInt::get(DFS.IntptrTy, Size), Origin});
} else {
Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
@@ -2554,9 +2549,7 @@ void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
- Value *ExtShadowAddr =
- IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
- IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
+ IRB.CreateAlignedStore(ExtZeroShadow, ShadowAddr, ShadowAlign);
// Do not write origins for 0 shadows because we do not trace origins for
// untainted sinks.
}
@@ -2611,11 +2604,9 @@ void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
ShadowVec, PrimitiveShadow,
ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
}
- Value *ShadowVecAddr =
- IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
do {
Value *CurShadowVecAddr =
- IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
+ IRB.CreateConstGEP1_32(ShadowVecTy, ShadowAddr, Offset);
IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
LeftSize -= ShadowVecSize;
++Offset;
@@ -2699,9 +2690,9 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
PrimitiveShadow, Origin, &SI);
if (ClEventCallbacks) {
IRBuilder<> IRB(&SI);
- Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr);
+ Value *Addr = SI.getPointerOperand();
CallInst *CI =
- IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr8});
+ IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr});
CI->addParamAttr(0, Attribute::ZExt);
}
}
@@ -2918,11 +2909,9 @@ void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
? DFSF.getOrigin(I.getValue())
: DFSF.DFS.ZeroOrigin;
- IRB.CreateCall(
- DFSF.DFS.DFSanSetLabelFn,
- {ValShadow, ValOrigin,
- IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
- IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
+ IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
+ {ValShadow, ValOrigin, I.getDest(),
+ IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
}
void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
@@ -2933,28 +2922,24 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
if (DFSF.DFS.shouldTrackOrigins()) {
IRB.CreateCall(
DFSF.DFS.DFSanMemOriginTransferFn,
- {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+ {I.getArgOperand(0), I.getArgOperand(1),
IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
}
- Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
+ Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
Value *LenShadow =
IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
DFSF.DFS.ShadowWidthBytes));
- Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
- Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr);
- SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
auto *MTI = cast<MemTransferInst>(
IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
{DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
MTI->setDestAlignment(DFSF.getShadowAlign(I.getDestAlign().valueOrOne()));
MTI->setSourceAlignment(DFSF.getShadowAlign(I.getSourceAlign().valueOrOne()));
if (ClEventCallbacks) {
- IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn,
- {RawDestShadow,
- IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
+ IRB.CreateCall(
+ DFSF.DFS.DFSanMemTransferCallbackFn,
+ {DestShadow, IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
}
}
@@ -3225,10 +3210,9 @@ void DFSanVisitor::visitLibAtomicLoad(CallBase &CB) {
// TODO: Support ClCombinePointerLabelsOnLoad
// TODO: Support ClEventCallbacks
- NextIRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginTransferFn,
- {NextIRB.CreatePointerCast(DstPtr, NextIRB.getInt8PtrTy()),
- NextIRB.CreatePointerCast(SrcPtr, NextIRB.getInt8PtrTy()),
- NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
+ NextIRB.CreateCall(
+ DFSF.DFS.DFSanMemShadowOriginTransferFn,
+ {DstPtr, SrcPtr, NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
}
Value *DFSanVisitor::makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
@@ -3264,10 +3248,9 @@ void DFSanVisitor::visitLibAtomicStore(CallBase &CB) {
// TODO: Support ClCombinePointerLabelsOnStore
// TODO: Support ClEventCallbacks
- IRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginTransferFn,
- {IRB.CreatePointerCast(DstPtr, IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(SrcPtr, IRB.getInt8PtrTy()),
- IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
+ IRB.CreateCall(
+ DFSF.DFS.DFSanMemShadowOriginTransferFn,
+ {DstPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
}
void DFSanVisitor::visitLibAtomicExchange(CallBase &CB) {
@@ -3285,16 +3268,14 @@ void DFSanVisitor::visitLibAtomicExchange(CallBase &CB) {
// the additional complexity to address this is not warrented.
// Current Target to Dest
- IRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginTransferFn,
- {IRB.CreatePointerCast(DstPtr, IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(TargetPtr, IRB.getInt8PtrTy()),
- IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
+ IRB.CreateCall(
+ DFSF.DFS.DFSanMemShadowOriginTransferFn,
+ {DstPtr, TargetPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
// Current Src to Target (overriding)
- IRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginTransferFn,
- {IRB.CreatePointerCast(TargetPtr, IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(SrcPtr, IRB.getInt8PtrTy()),
- IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
+ IRB.CreateCall(
+ DFSF.DFS.DFSanMemShadowOriginTransferFn,
+ {TargetPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
}
void DFSanVisitor::visitLibAtomicCompareExchange(CallBase &CB) {
@@ -3317,13 +3298,10 @@ void DFSanVisitor::visitLibAtomicCompareExchange(CallBase &CB) {
// If original call returned true, copy Desired to Target.
// If original call returned false, copy Target to Expected.
- NextIRB.CreateCall(
- DFSF.DFS.DFSanMemShadowOriginConditionalExchangeFn,
- {NextIRB.CreateIntCast(&CB, NextIRB.getInt8Ty(), false),
- NextIRB.CreatePointerCast(TargetPtr, NextIRB.getInt8PtrTy()),
- NextIRB.CreatePointerCast(ExpectedPtr, NextIRB.getInt8PtrTy()),
- NextIRB.CreatePointerCast(DesiredPtr, NextIRB.getInt8PtrTy()),
- NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
+ NextIRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginConditionalExchangeFn,
+ {NextIRB.CreateIntCast(&CB, NextIRB.getInt8Ty(), false),
+ TargetPtr, ExpectedPtr, DesiredPtr,
+ NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});
}
void DFSanVisitor::visitCallBase(CallBase &CB) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 75adcabc0d34..1ff0a34bae24 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -148,7 +148,7 @@ private:
std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
GCOVOptions Options;
- support::endianness Endian;
+ llvm::endianness Endian;
raw_ostream *os;
// Checksum, produced by hash of EdgeDestinations
@@ -750,7 +750,7 @@ static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
#ifndef NDEBUG
static void dumpEdges(CFGMST<Edge, BBInfo> &MST, GCOVFunction &GF) {
size_t ID = 0;
- for (auto &E : make_pointee_range(MST.AllEdges)) {
+ for (const auto &E : make_pointee_range(MST.allEdges())) {
GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock();
GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock();
dbgs() << " Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number
@@ -788,8 +788,8 @@ bool GCOVProfiler::emitProfileNotes(
std::vector<uint8_t> EdgeDestinations;
SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
- Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little
- : support::endianness::big;
+ Endian = M->getDataLayout().isLittleEndian() ? llvm::endianness::little
+ : llvm::endianness::big;
unsigned FunctionIdent = 0;
for (auto &F : M->functions()) {
DISubprogram *SP = F.getSubprogram();
@@ -820,8 +820,8 @@ bool GCOVProfiler::emitProfileNotes(
CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
// getInstrBB can split basic blocks and push elements to AllEdges.
- for (size_t I : llvm::seq<size_t>(0, MST.AllEdges.size())) {
- auto &E = *MST.AllEdges[I];
+ for (size_t I : llvm::seq<size_t>(0, MST.numEdges())) {
+ auto &E = *MST.allEdges()[I];
// For now, disable spanning tree optimization when fork or exec* is
// used.
if (HasExecOrFork)
@@ -836,16 +836,16 @@ bool GCOVProfiler::emitProfileNotes(
// Some non-tree edges are IndirectBr which cannot be split. Ignore them
// as well.
- llvm::erase_if(MST.AllEdges, [](std::unique_ptr<Edge> &E) {
+ llvm::erase_if(MST.allEdges(), [](std::unique_ptr<Edge> &E) {
return E->Removed || (!E->InMST && !E->Place);
});
const size_t Measured =
std::stable_partition(
- MST.AllEdges.begin(), MST.AllEdges.end(),
+ MST.allEdges().begin(), MST.allEdges().end(),
[](std::unique_ptr<Edge> &E) { return E->Place; }) -
- MST.AllEdges.begin();
+ MST.allEdges().begin();
for (size_t I : llvm::seq<size_t>(0, Measured)) {
- Edge &E = *MST.AllEdges[I];
+ Edge &E = *MST.allEdges()[I];
GCOVBlock &Src =
E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
GCOVBlock &Dst =
@@ -854,13 +854,13 @@ bool GCOVProfiler::emitProfileNotes(
E.DstNumber = Dst.Number;
}
std::stable_sort(
- MST.AllEdges.begin(), MST.AllEdges.begin() + Measured,
+ MST.allEdges().begin(), MST.allEdges().begin() + Measured,
[](const std::unique_ptr<Edge> &L, const std::unique_ptr<Edge> &R) {
return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
: L->DstNumber < R->DstNumber;
});
- for (const Edge &E : make_pointee_range(MST.AllEdges)) {
+ for (const Edge &E : make_pointee_range(MST.allEdges())) {
GCOVBlock &Src =
E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
GCOVBlock &Dst =
@@ -917,7 +917,7 @@ bool GCOVProfiler::emitProfileNotes(
CountersBySP.emplace_back(Counters, SP);
for (size_t I : llvm::seq<size_t>(0, Measured)) {
- const Edge &E = *MST.AllEdges[I];
+ const Edge &E = *MST.allEdges()[I];
IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
Value *V = Builder.CreateConstInBoundsGEP2_64(
Counters->getValueType(), Counters, 0, I);
@@ -957,7 +957,7 @@ bool GCOVProfiler::emitProfileNotes(
continue;
}
os = &out;
- if (Endian == support::endianness::big) {
+ if (Endian == llvm::endianness::big) {
out.write("gcno", 4);
out.write(Options.Version, 4);
} else {
@@ -1031,9 +1031,9 @@ void GCOVProfiler::emitGlobalConstructor(
FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
Type *Args[] = {
- Type::getInt8PtrTy(*Ctx), // const char *orig_filename
- Type::getInt32Ty(*Ctx), // uint32_t version
- Type::getInt32Ty(*Ctx), // uint32_t checksum
+ PointerType::getUnqual(*Ctx), // const char *orig_filename
+ Type::getInt32Ty(*Ctx), // uint32_t version
+ Type::getInt32Ty(*Ctx), // uint32_t checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_start_file", FTy,
@@ -1053,8 +1053,8 @@ FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
Type *Args[] = {
- Type::getInt32Ty(*Ctx), // uint32_t num_counters
- Type::getInt64PtrTy(*Ctx), // uint64_t *counters
+ Type::getInt32Ty(*Ctx), // uint32_t num_counters
+ PointerType::getUnqual(*Ctx), // uint64_t *counters
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy,
@@ -1100,19 +1100,16 @@ Function *GCOVProfiler::insertCounterWriteout(
// Collect the relevant data into a large constant data structure that we can
// walk to write out everything.
StructType *StartFileCallArgsTy = StructType::create(
- {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
+ {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
"start_file_args_ty");
StructType *EmitFunctionCallArgsTy = StructType::create(
{Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
"emit_function_args_ty");
- StructType *EmitArcsCallArgsTy = StructType::create(
- {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()},
- "emit_arcs_args_ty");
- StructType *FileInfoTy =
- StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(),
- EmitFunctionCallArgsTy->getPointerTo(),
- EmitArcsCallArgsTy->getPointerTo()},
- "file_info");
+ auto *PtrTy = Builder.getPtrTy();
+ StructType *EmitArcsCallArgsTy =
+ StructType::create({Builder.getInt32Ty(), PtrTy}, "emit_arcs_args_ty");
+ StructType *FileInfoTy = StructType::create(
+ {StartFileCallArgsTy, Builder.getInt32Ty(), PtrTy, PtrTy}, "file_info");
Constant *Zero32 = Builder.getInt32(0);
// Build an explicit array of two zeros for use in ConstantExpr GEP building.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 28db47a19092..f7f8fed643e9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -17,9 +17,11 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -42,7 +44,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
@@ -52,6 +53,7 @@
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -134,7 +136,7 @@ static cl::opt<size_t> ClMaxLifetimes(
static cl::opt<bool>
ClUseAfterScope("hwasan-use-after-scope",
cl::desc("detect use after scope within function"),
- cl::Hidden, cl::init(false));
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClGenerateTagsWithCalls(
"hwasan-generate-tags-with-calls",
@@ -223,6 +225,10 @@ static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
cl::desc("inline all checks"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClInlineFastPathChecks("hwasan-inline-fast-path-checks",
+ cl::desc("inline all checks"),
+ cl::Hidden, cl::init(false));
+
// Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
cl::desc("Use page aliasing in HWASan"),
@@ -274,9 +280,18 @@ public:
initializeModule();
}
+ void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
+
+private:
+ struct ShadowTagCheckInfo {
+ Instruction *TagMismatchTerm = nullptr;
+ Value *PtrLong = nullptr;
+ Value *AddrLong = nullptr;
+ Value *PtrTag = nullptr;
+ Value *MemTag = nullptr;
+ };
void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
- void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
void initializeModule();
void createHwasanCtorComdat();
@@ -291,18 +306,24 @@ public:
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
+ ShadowTagCheckInfo insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
+ DomTreeUpdater &DTU, LoopInfo *LI);
void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
- Instruction *InsertBefore);
+ Instruction *InsertBefore,
+ DomTreeUpdater &DTU, LoopInfo *LI);
void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
- Instruction *InsertBefore);
+ Instruction *InsertBefore, DomTreeUpdater &DTU,
+ LoopInfo *LI);
bool ignoreMemIntrinsic(MemIntrinsic *MI);
void instrumentMemIntrinsic(MemIntrinsic *MI);
- bool instrumentMemAccess(InterestingMemoryOperand &O);
+ bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
+ LoopInfo *LI);
bool ignoreAccess(Instruction *Inst, Value *Ptr);
void getInterestingMemoryOperands(
- Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
+ Instruction *I, const TargetLibraryInfo &TLI,
+ SmallVectorImpl<InterestingMemoryOperand> &Interesting);
void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
@@ -332,7 +353,6 @@ public:
void instrumentPersonalityFunctions();
-private:
LLVMContext *C;
Module &M;
const StackSafetyGlobalInfo *SSI;
@@ -364,7 +384,7 @@ private:
Type *VoidTy = Type::getVoidTy(M.getContext());
Type *IntptrTy;
- Type *Int8PtrTy;
+ PointerType *PtrTy;
Type *Int8Ty;
Type *Int32Ty;
Type *Int64Ty = Type::getInt64Ty(M.getContext());
@@ -372,6 +392,7 @@ private:
bool CompileKernel;
bool Recover;
bool OutlinedChecks;
+ bool InlineFastPath;
bool UseShortGranules;
bool InstrumentLandingPads;
bool InstrumentWithCalls;
@@ -420,6 +441,12 @@ PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
HWASan.sanitizeFunction(F, FAM);
PreservedAnalyses PA = PreservedAnalyses::none();
+ // DominatorTreeAnalysis, PostDominatorTreeAnalysis, and LoopAnalysis
+ // are incrementally updated throughout this pass whenever
+ // SplitBlockAndInsertIfThen is called.
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<PostDominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
// GlobalsAA is considered stateless and does not get invalidated unless
// explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
// make changes that require GlobalsAA to be invalidated.
@@ -560,7 +587,7 @@ void HWAddressSanitizer::initializeModule() {
C = &(M.getContext());
IRBuilder<> IRB(*C);
IntptrTy = IRB.getIntPtrTy(DL);
- Int8PtrTy = IRB.getInt8PtrTy();
+ PtrTy = IRB.getPtrTy();
Int8Ty = IRB.getInt8Ty();
Int32Ty = IRB.getInt32Ty();
@@ -579,6 +606,13 @@ void HWAddressSanitizer::initializeModule() {
TargetTriple.isOSBinFormatELF() &&
(ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
+ InlineFastPath =
+ (ClInlineFastPathChecks.getNumOccurrences()
+ ? ClInlineFastPathChecks
+ : !(TargetTriple.isAndroid() ||
+ TargetTriple.isOSFuchsia())); // These platforms may prefer less
+ // inlining to reduce binary size.
+
if (ClMatchAllTag.getNumOccurrences()) {
if (ClMatchAllTag != -1) {
MatchAllTag = ClMatchAllTag & 0xFF;
@@ -633,19 +667,19 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false);
HwasanMemoryAccessCallbackFnTy =
FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false);
- HwasanMemTransferFnTy = FunctionType::get(
- Int8PtrTy, {Int8PtrTy, Int8PtrTy, IntptrTy, Int8Ty}, false);
- HwasanMemsetFnTy = FunctionType::get(
- Int8PtrTy, {Int8PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
+ HwasanMemTransferFnTy =
+ FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy, Int8Ty}, false);
+ HwasanMemsetFnTy =
+ FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
} else {
HwasanMemoryAccessCallbackSizedFnTy =
FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false);
HwasanMemoryAccessCallbackFnTy =
FunctionType::get(VoidTy, {IntptrTy}, false);
HwasanMemTransferFnTy =
- FunctionType::get(Int8PtrTy, {Int8PtrTy, Int8PtrTy, IntptrTy}, false);
+ FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy}, false);
HwasanMemsetFnTy =
- FunctionType::get(Int8PtrTy, {Int8PtrTy, Int32Ty, IntptrTy}, false);
+ FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy}, false);
}
for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
@@ -679,7 +713,7 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy);
HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy,
- Int8PtrTy, Int8Ty, IntptrTy);
+ PtrTy, Int8Ty, IntptrTy);
HwasanGenerateTagFunc =
M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
@@ -699,7 +733,7 @@ Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
// This prevents code bloat as a result of rematerializing trivial definitions
// such as constants or global addresses at every load and store.
InlineAsm *Asm =
- InlineAsm::get(FunctionType::get(Int8PtrTy, {Val->getType()}, false),
+ InlineAsm::get(FunctionType::get(PtrTy, {Val->getType()}, false),
StringRef(""), StringRef("=r,0"),
/*hasSideEffects=*/false);
return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
@@ -713,15 +747,15 @@ Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
if (Mapping.Offset != kDynamicShadowSentinel)
return getOpaqueNoopCast(
IRB, ConstantExpr::getIntToPtr(
- ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
+ ConstantInt::get(IntptrTy, Mapping.Offset), PtrTy));
if (Mapping.InGlobal)
return getDynamicShadowIfunc(IRB);
Value *GlobalDynamicAddress =
IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
- kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
- return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
+ kHwasanShadowMemoryDynamicAddress, PtrTy);
+ return IRB.CreateLoad(PtrTy, GlobalDynamicAddress);
}
bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
@@ -748,7 +782,8 @@ bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
}
void HWAddressSanitizer::getInterestingMemoryOperands(
- Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
+ Instruction *I, const TargetLibraryInfo &TLI,
+ SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
// Skip memory accesses inserted by another instrumentation.
if (I->hasMetadata(LLVMContext::MD_nosanitize))
return;
@@ -786,6 +821,7 @@ void HWAddressSanitizer::getInterestingMemoryOperands(
Type *Ty = CI->getParamByValType(ArgNo);
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
}
+ maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
}
}
@@ -824,7 +860,7 @@ Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
// Mem >> Scale
Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
if (Mapping.Offset == 0)
- return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
+ return IRB.CreateIntToPtr(Shadow, PtrTy);
// (Mem >> Scale) + Offset
return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
}
@@ -839,14 +875,48 @@ int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
(AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
}
+HWAddressSanitizer::ShadowTagCheckInfo
+HWAddressSanitizer::insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
+ DomTreeUpdater &DTU, LoopInfo *LI) {
+ ShadowTagCheckInfo R;
+
+ IRBuilder<> IRB(InsertBefore);
+
+ R.PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
+ R.PtrTag =
+ IRB.CreateTrunc(IRB.CreateLShr(R.PtrLong, PointerTagShift), Int8Ty);
+ R.AddrLong = untagPointer(IRB, R.PtrLong);
+ Value *Shadow = memToShadow(R.AddrLong, IRB);
+ R.MemTag = IRB.CreateLoad(Int8Ty, Shadow);
+ Value *TagMismatch = IRB.CreateICmpNE(R.PtrTag, R.MemTag);
+
+ if (MatchAllTag.has_value()) {
+ Value *TagNotIgnored = IRB.CreateICmpNE(
+ R.PtrTag, ConstantInt::get(R.PtrTag->getType(), *MatchAllTag));
+ TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
+ }
+
+ R.TagMismatchTerm = SplitBlockAndInsertIfThen(
+ TagMismatch, InsertBefore, false,
+ MDBuilder(*C).createBranchWeights(1, 100000), &DTU, LI);
+
+ return R;
+}
+
void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
- Instruction *InsertBefore) {
+ Instruction *InsertBefore,
+ DomTreeUpdater &DTU,
+ LoopInfo *LI) {
assert(!UsePageAliases);
const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
+
+ if (InlineFastPath)
+ InsertBefore =
+ insertShadowTagCheck(Ptr, InsertBefore, DTU, LI).TagMismatchTerm;
+
IRBuilder<> IRB(InsertBefore);
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
IRB.CreateCall(Intrinsic::getDeclaration(
M, UseShortGranules
? Intrinsic::hwasan_check_memaccess_shortgranules
@@ -856,55 +926,38 @@ void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
- Instruction *InsertBefore) {
+ Instruction *InsertBefore,
+ DomTreeUpdater &DTU,
+ LoopInfo *LI) {
assert(!UsePageAliases);
const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
- IRBuilder<> IRB(InsertBefore);
-
- Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
- Value *PtrTag =
- IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift), Int8Ty);
- Value *AddrLong = untagPointer(IRB, PtrLong);
- Value *Shadow = memToShadow(AddrLong, IRB);
- Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
- Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
-
- if (MatchAllTag.has_value()) {
- Value *TagNotIgnored = IRB.CreateICmpNE(
- PtrTag, ConstantInt::get(PtrTag->getType(), *MatchAllTag));
- TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
- }
- Instruction *CheckTerm =
- SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
- MDBuilder(*C).createBranchWeights(1, 100000));
+ ShadowTagCheckInfo TCI = insertShadowTagCheck(Ptr, InsertBefore, DTU, LI);
- IRB.SetInsertPoint(CheckTerm);
+ IRBuilder<> IRB(TCI.TagMismatchTerm);
Value *OutOfShortGranuleTagRange =
- IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
- Instruction *CheckFailTerm =
- SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
- MDBuilder(*C).createBranchWeights(1, 100000));
+ IRB.CreateICmpUGT(TCI.MemTag, ConstantInt::get(Int8Ty, 15));
+ Instruction *CheckFailTerm = SplitBlockAndInsertIfThen(
+ OutOfShortGranuleTagRange, TCI.TagMismatchTerm, !Recover,
+ MDBuilder(*C).createBranchWeights(1, 100000), &DTU, LI);
- IRB.SetInsertPoint(CheckTerm);
- Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
+ IRB.SetInsertPoint(TCI.TagMismatchTerm);
+ Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(TCI.PtrLong, 15), Int8Ty);
PtrLowBits = IRB.CreateAdd(
PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
- Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
- SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
- MDBuilder(*C).createBranchWeights(1, 100000),
- (DomTreeUpdater *)nullptr, nullptr,
- CheckFailTerm->getParent());
-
- IRB.SetInsertPoint(CheckTerm);
- Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
- InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
+ Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, TCI.MemTag);
+ SplitBlockAndInsertIfThen(PtrLowBitsOOB, TCI.TagMismatchTerm, false,
+ MDBuilder(*C).createBranchWeights(1, 100000), &DTU,
+ LI, CheckFailTerm->getParent());
+
+ IRB.SetInsertPoint(TCI.TagMismatchTerm);
+ Value *InlineTagAddr = IRB.CreateOr(TCI.AddrLong, 15);
+ InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, PtrTy);
Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
- Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
- SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
- MDBuilder(*C).createBranchWeights(1, 100000),
- (DomTreeUpdater *)nullptr, nullptr,
- CheckFailTerm->getParent());
+ Value *InlineTagMismatch = IRB.CreateICmpNE(TCI.PtrTag, InlineTag);
+ SplitBlockAndInsertIfThen(InlineTagMismatch, TCI.TagMismatchTerm, false,
+ MDBuilder(*C).createBranchWeights(1, 100000), &DTU,
+ LI, CheckFailTerm->getParent());
IRB.SetInsertPoint(CheckFailTerm);
InlineAsm *Asm;
@@ -912,7 +965,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
case Triple::x86_64:
// The signal handler will find the data address in rdi.
Asm = InlineAsm::get(
- FunctionType::get(VoidTy, {PtrLong->getType()}, false),
+ FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
"int3\nnopl " +
itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
"(%rax)",
@@ -923,7 +976,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
case Triple::aarch64_be:
// The signal handler will find the data address in x0.
Asm = InlineAsm::get(
- FunctionType::get(VoidTy, {PtrLong->getType()}, false),
+ FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
"brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
"{x0}",
/*hasSideEffects=*/true);
@@ -931,7 +984,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
case Triple::riscv64:
// The signal handler will find the data address in x10.
Asm = InlineAsm::get(
- FunctionType::get(VoidTy, {PtrLong->getType()}, false),
+ FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
"ebreak\naddiw x0, x11, " +
itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
"{x10}",
@@ -940,9 +993,10 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
default:
report_fatal_error("unsupported architecture");
}
- IRB.CreateCall(Asm, PtrLong);
+ IRB.CreateCall(Asm, TCI.PtrLong);
if (Recover)
- cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
+ cast<BranchInst>(CheckFailTerm)
+ ->setSuccessor(0, TCI.TagMismatchTerm->getParent());
}
bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
@@ -958,40 +1012,28 @@ bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
IRBuilder<> IRB(MI);
if (isa<MemTransferInst>(MI)) {
- if (UseMatchAllCallback) {
- IRB.CreateCall(
- isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false),
- ConstantInt::get(Int8Ty, *MatchAllTag)});
- } else {
- IRB.CreateCall(
- isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
- }
+ SmallVector<Value *, 4> Args{
+ MI->getOperand(0), MI->getOperand(1),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
+
+ if (UseMatchAllCallback)
+ Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
+ IRB.CreateCall(isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy, Args);
} else if (isa<MemSetInst>(MI)) {
- if (UseMatchAllCallback) {
- IRB.CreateCall(
- HwasanMemset,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false),
- ConstantInt::get(Int8Ty, *MatchAllTag)});
- } else {
- IRB.CreateCall(
- HwasanMemset,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
- }
+ SmallVector<Value *, 4> Args{
+ MI->getOperand(0),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
+ if (UseMatchAllCallback)
+ Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
+ IRB.CreateCall(HwasanMemset, Args);
}
MI->eraseFromParent();
}
-bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
+bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O,
+ DomTreeUpdater &DTU,
+ LoopInfo *LI) {
Value *Addr = O.getPtr();
LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
@@ -1006,34 +1048,26 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
*O.Alignment >= O.TypeStoreSize / 8)) {
size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize);
if (InstrumentWithCalls) {
- if (UseMatchAllCallback) {
- IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
- {IRB.CreatePointerCast(Addr, IntptrTy),
- ConstantInt::get(Int8Ty, *MatchAllTag)});
- } else {
- IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
- IRB.CreatePointerCast(Addr, IntptrTy));
- }
+ SmallVector<Value *, 2> Args{IRB.CreatePointerCast(Addr, IntptrTy)};
+ if (UseMatchAllCallback)
+ Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
+ IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
+ Args);
} else if (OutlinedChecks) {
- instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
+ instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
+ DTU, LI);
} else {
- instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
+ instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
+ DTU, LI);
}
} else {
- if (UseMatchAllCallback) {
- IRB.CreateCall(
- HwasanMemoryAccessCallbackSized[O.IsWrite],
- {IRB.CreatePointerCast(Addr, IntptrTy),
- IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
- ConstantInt::get(IntptrTy, 8)),
- ConstantInt::get(Int8Ty, *MatchAllTag)});
- } else {
- IRB.CreateCall(
- HwasanMemoryAccessCallbackSized[O.IsWrite],
- {IRB.CreatePointerCast(Addr, IntptrTy),
- IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
- ConstantInt::get(IntptrTy, 8))});
- }
+ SmallVector<Value *, 3> Args{
+ IRB.CreatePointerCast(Addr, IntptrTy),
+ IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
+ ConstantInt::get(IntptrTy, 8))};
+ if (UseMatchAllCallback)
+ Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
+ IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], Args);
}
untagPointerOperand(O.getInsn(), Addr);
@@ -1049,7 +1083,7 @@ void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
Tag = IRB.CreateTrunc(Tag, Int8Ty);
if (InstrumentWithCalls) {
IRB.CreateCall(HwasanTagMemoryFunc,
- {IRB.CreatePointerCast(AI, Int8PtrTy), Tag,
+ {IRB.CreatePointerCast(AI, PtrTy), Tag,
ConstantInt::get(IntptrTy, AlignedSize)});
} else {
size_t ShadowSize = Size >> Mapping.Scale;
@@ -1067,9 +1101,9 @@ void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
- IRB.CreateStore(Tag, IRB.CreateConstGEP1_32(
- Int8Ty, IRB.CreatePointerCast(AI, Int8PtrTy),
- AlignedSize - 1));
+ IRB.CreateStore(
+ Tag, IRB.CreateConstGEP1_32(Int8Ty, IRB.CreatePointerCast(AI, PtrTy),
+ AlignedSize - 1));
}
}
}
@@ -1183,10 +1217,8 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
// in Bionic's libc/private/bionic_tls.h.
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
- Value *SlotPtr = IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(Int8Ty, IRB.CreateCall(ThreadPointerFunc), 0x30),
- Ty->getPointerTo(0));
- return SlotPtr;
+ return IRB.CreateConstGEP1_32(Int8Ty, IRB.CreateCall(ThreadPointerFunc),
+ 0x30);
}
if (ThreadPtrGlobal)
return ThreadPtrGlobal;
@@ -1208,7 +1240,7 @@ Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) {
Module *M = F->getParent();
auto *GetStackPointerFn = Intrinsic::getDeclaration(
M, Intrinsic::frameaddress,
- IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+ IRB.getPtrTy(M->getDataLayout().getAllocaAddrSpace()));
CachedSP = IRB.CreatePtrToInt(
IRB.CreateCall(GetStackPointerFn, {Constant::getNullValue(Int32Ty)}),
IntptrTy);
@@ -1271,8 +1303,8 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
// Store data to ring buffer.
Value *FrameRecordInfo = getFrameRecordInfo(IRB);
- Value *RecordPtr = IRB.CreateIntToPtr(ThreadLongMaybeUntagged,
- IntptrTy->getPointerTo(0));
+ Value *RecordPtr =
+ IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IRB.getPtrTy(0));
IRB.CreateStore(FrameRecordInfo, RecordPtr);
// Update the ring buffer. Top byte of ThreadLong defines the size of the
@@ -1309,7 +1341,7 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
ThreadLongMaybeUntagged,
ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
- ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
+ ShadowBase = IRB.CreateIntToPtr(ShadowBase, PtrTy);
}
}
@@ -1369,7 +1401,7 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
size_t Size = memtag::getAllocaSizeInBytes(*AI);
size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
- Value *AICast = IRB.CreatePointerCast(AI, Int8PtrTy);
+ Value *AICast = IRB.CreatePointerCast(AI, PtrTy);
auto HandleLifetime = [&](IntrinsicInst *II) {
// Set the lifetime intrinsic to cover the whole alloca. This reduces the
@@ -1462,6 +1494,7 @@ void HWAddressSanitizer::sanitizeFunction(Function &F,
SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
SmallVector<Instruction *, 8> LandingPadVec;
+ const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
memtag::StackInfoBuilder SIB(SSI);
for (auto &Inst : instructions(F)) {
@@ -1472,7 +1505,7 @@ void HWAddressSanitizer::sanitizeFunction(Function &F,
if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
LandingPadVec.push_back(&Inst);
- getInterestingMemoryOperands(&Inst, OperandsToInstrument);
+ getInterestingMemoryOperands(&Inst, TLI, OperandsToInstrument);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
if (!ignoreMemIntrinsic(MI))
@@ -1528,8 +1561,13 @@ void HWAddressSanitizer::sanitizeFunction(Function &F,
}
}
+ DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(F);
+ LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(F);
+ DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
for (auto &Operand : OperandsToInstrument)
- instrumentMemAccess(Operand);
+ instrumentMemAccess(Operand, DTU, LI);
+ DTU.flush();
if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
for (auto *Inst : IntrinToInstrument)
@@ -1624,7 +1662,7 @@ void HWAddressSanitizer::instrumentGlobals() {
if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
continue;
- if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
+ if (GV.isDeclarationForLinker() || GV.getName().starts_with("llvm.") ||
GV.isThreadLocal())
continue;
@@ -1682,8 +1720,8 @@ void HWAddressSanitizer::instrumentPersonalityFunctions() {
return;
FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
- "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
- Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
+ "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, PtrTy,
+ PtrTy, PtrTy, PtrTy, PtrTy);
FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
@@ -1692,7 +1730,7 @@ void HWAddressSanitizer::instrumentPersonalityFunctions() {
if (P.first)
ThunkName += ("." + P.first->getName()).str();
FunctionType *ThunkFnTy = FunctionType::get(
- Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
+ Int32Ty, {Int32Ty, Int32Ty, Int64Ty, PtrTy, PtrTy}, false);
bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
cast<GlobalValue>(P.first)->hasLocalLinkage());
auto *ThunkFn = Function::Create(ThunkFnTy,
@@ -1710,10 +1748,8 @@ void HWAddressSanitizer::instrumentPersonalityFunctions() {
HwasanPersonalityWrapper,
{ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
ThunkFn->getArg(3), ThunkFn->getArg(4),
- P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
- : Constant::getNullValue(Int8PtrTy),
- IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
- IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
+ P.first ? P.first : Constant::getNullValue(PtrTy),
+ UnwindGetGR.getCallee(), UnwindGetCFA.getCallee()});
WrapperCall->setTailCall();
IRB.CreateRet(WrapperCall);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 5c9799235017..7344fea17517 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Value.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
@@ -256,10 +257,7 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
if (AttachProfToDirectCall) {
- MDBuilder MDB(NewInst.getContext());
- NewInst.setMetadata(
- LLVMContext::MD_prof,
- MDB.createBranchWeights({static_cast<uint32_t>(Count)}));
+ setBranchWeights(NewInst, {static_cast<uint32_t>(Count)});
}
using namespace ore;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index a7b1953ce81c..fe5a0578bd97 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
+// This pass lowers instrprof_* intrinsics emitted by an instrumentor.
// It also builds the data structures and initialization code needed for
// updating execution counts and emitting the profile at runtime.
//
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -23,6 +24,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
@@ -47,6 +49,9 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
@@ -59,10 +64,24 @@ using namespace llvm;
#define DEBUG_TYPE "instrprof"
namespace llvm {
-cl::opt<bool>
- DebugInfoCorrelate("debug-info-correlate",
- cl::desc("Use debug info to correlate profiles."),
- cl::init(false));
+// TODO: Remove -debug-info-correlate in next LLVM release, in favor of
+// -profile-correlate=debug-info.
+cl::opt<bool> DebugInfoCorrelate(
+ "debug-info-correlate",
+ cl::desc("Use debug info to correlate profiles. (Deprecated, use "
+ "-profile-correlate=debug-info)"),
+ cl::init(false));
+
+cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
+ "profile-correlate",
+ cl::desc("Use debug info or binary file to correlate profiles."),
+ cl::init(InstrProfCorrelator::NONE),
+ cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
+ "No profile correlation"),
+ clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
+ "Use debug info to correlate"),
+ clEnumValN(InstrProfCorrelator::BINARY, "binary",
+ "Use binary to correlate")));
} // namespace llvm
namespace {
@@ -147,6 +166,155 @@ cl::opt<bool> SkipRetExitBlock(
"skip-ret-exit-block", cl::init(true),
cl::desc("Suppress counter promotion if exit blocks contain ret."));
+using LoadStorePair = std::pair<Instruction *, Instruction *>;
+
+class InstrLowerer final {
+public:
+ InstrLowerer(Module &M, const InstrProfOptions &Options,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
+ bool IsCS)
+ : M(M), Options(Options), TT(Triple(M.getTargetTriple())), IsCS(IsCS),
+ GetTLI(GetTLI) {}
+
+ bool lower();
+
+private:
+ Module &M;
+ const InstrProfOptions Options;
+ const Triple TT;
+ // Is this lowering for the context-sensitive instrumentation.
+ const bool IsCS;
+
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
+ struct PerFunctionProfileData {
+ uint32_t NumValueSites[IPVK_Last + 1] = {};
+ GlobalVariable *RegionCounters = nullptr;
+ GlobalVariable *DataVar = nullptr;
+ GlobalVariable *RegionBitmaps = nullptr;
+ uint32_t NumBitmapBytes = 0;
+
+ PerFunctionProfileData() = default;
+ };
+ DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
+ /// If runtime relocation is enabled, this maps functions to the load
+ /// instruction that produces the profile relocation bias.
+ DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
+ std::vector<GlobalValue *> CompilerUsedVars;
+ std::vector<GlobalValue *> UsedVars;
+ std::vector<GlobalVariable *> ReferencedNames;
+ GlobalVariable *NamesVar = nullptr;
+ size_t NamesSize = 0;
+
+ // vector of counter load/store pairs to be register promoted.
+ std::vector<LoadStorePair> PromotionCandidates;
+
+ int64_t TotalCountersPromoted = 0;
+
+ /// Lower instrumentation intrinsics in the function. Returns true if there
+ /// any lowering.
+ bool lowerIntrinsics(Function *F);
+
+ /// Register-promote counter loads and stores in loops.
+ void promoteCounterLoadStores(Function *F);
+
+ /// Returns true if relocating counters at runtime is enabled.
+ bool isRuntimeCounterRelocationEnabled() const;
+
+ /// Returns true if profile counter update register promotion is enabled.
+ bool isCounterPromotionEnabled() const;
+
+ /// Count the number of instrumented value sites for the function.
+ void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
+
+ /// Replace instrprof.value.profile with a call to runtime library.
+ void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
+
+ /// Replace instrprof.cover with a store instruction to the coverage byte.
+ void lowerCover(InstrProfCoverInst *Inc);
+
+ /// Replace instrprof.timestamp with a call to
+ /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
+ void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
+
+ /// Replace instrprof.increment with an increment of the appropriate value.
+ void lowerIncrement(InstrProfIncrementInst *Inc);
+
+ /// Force emitting of name vars for unused functions.
+ void lowerCoverageData(GlobalVariable *CoverageNamesVar);
+
+ /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
+ /// using the index represented by the a temp value into a bitmap.
+ void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
+
+ /// Replace instrprof.mcdc.temp.update with a shift and or instruction using
+ /// the corresponding condition ID.
+ void lowerMCDCCondBitmapUpdate(InstrProfMCDCCondBitmapUpdate *Ins);
+
+ /// Compute the address of the counter value that this profiling instruction
+ /// acts on.
+ Value *getCounterAddress(InstrProfCntrInstBase *I);
+
+ /// Get the region counters for an increment, creating them if necessary.
+ ///
+ /// If the counter array doesn't yet exist, the profile data variables
+ /// referring to them will also be created.
+ GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
+
+ /// Create the region counters.
+ GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
+ StringRef Name,
+ GlobalValue::LinkageTypes Linkage);
+
+ /// Compute the address of the test vector bitmap that this profiling
+ /// instruction acts on.
+ Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
+
+ /// Get the region bitmaps for an increment, creating them if necessary.
+ ///
+ /// If the bitmap array doesn't yet exist, the profile data variables
+ /// referring to them will also be created.
+ GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
+
+ /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
+ /// an MC/DC Decision region. The number of bytes required is indicated by
+ /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
+ /// as part of setupProfileSection() and is conceptually very similar to
+ /// what is done for profile data counters in createRegionCounters().
+ GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
+ StringRef Name,
+ GlobalValue::LinkageTypes Linkage);
+
+ /// Set Comdat property of GV, if required.
+ void maybeSetComdat(GlobalVariable *GV, Function *Fn, StringRef VarName);
+
+ /// Setup the sections into which counters and bitmaps are allocated.
+ GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
+ InstrProfSectKind IPSK);
+
+ /// Create INSTR_PROF_DATA variable for counters and bitmaps.
+ void createDataVariable(InstrProfCntrInstBase *Inc);
+
+ /// Emit the section with compressed function names.
+ void emitNameData();
+
+ /// Emit value nodes section for value profiling.
+ void emitVNodes();
+
+ /// Emit runtime registration functions for each profile data variable.
+ void emitRegistration();
+
+ /// Emit the necessary plumbing to pull in the runtime initialization.
+ /// Returns true if a change was made.
+ bool emitRuntimeHook();
+
+ /// Add uses of our data variables and runtime hook.
+ void emitUses();
+
+ /// Create a static initializer for our data, on platforms that need it,
+ /// and for any profile output file that was specified.
+ void emitInitialization();
+};
+
///
/// A helper class to promote one counter RMW operation in the loop
/// into register update.
@@ -190,7 +358,8 @@ public:
auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));
assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
Value *BiasInst = Builder.Insert(OrigBiasInst->clone());
- Addr = Builder.CreateIntToPtr(BiasInst, Ty->getPointerTo());
+ Addr = Builder.CreateIntToPtr(BiasInst,
+ PointerType::getUnqual(Ty->getContext()));
}
if (AtomicCounterUpdatePromoted)
// automic update currently can only be promoted across the current
@@ -241,7 +410,10 @@ public:
return;
for (BasicBlock *ExitBlock : LoopExitBlocks) {
- if (BlockSet.insert(ExitBlock).second) {
+ if (BlockSet.insert(ExitBlock).second &&
+ llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) {
+ return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock);
+ })) {
ExitBlocks.push_back(ExitBlock);
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
}
@@ -398,19 +570,21 @@ enum class ValueProfilingCallType {
} // end anonymous namespace
-PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
+PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
return FAM.getResult<TargetLibraryAnalysis>(F);
};
- if (!run(M, GetTLI))
+ InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
+ if (!Lowerer.lower())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
-bool InstrProfiling::lowerIntrinsics(Function *F) {
+bool InstrLowerer::lowerIntrinsics(Function *F) {
bool MadeChange = false;
PromotionCandidates.clear();
for (BasicBlock &BB : *F) {
@@ -430,6 +604,15 @@ bool InstrProfiling::lowerIntrinsics(Function *F) {
} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
lowerValueProfileInst(IPVP);
MadeChange = true;
+ } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(&Instr)) {
+ IPMP->eraseFromParent();
+ MadeChange = true;
+ } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(&Instr)) {
+ lowerMCDCTestVectorBitmapUpdate(IPBU);
+ MadeChange = true;
+ } else if (auto *IPTU = dyn_cast<InstrProfMCDCCondBitmapUpdate>(&Instr)) {
+ lowerMCDCCondBitmapUpdate(IPTU);
+ MadeChange = true;
}
}
}
@@ -441,7 +624,7 @@ bool InstrProfiling::lowerIntrinsics(Function *F) {
return true;
}
-bool InstrProfiling::isRuntimeCounterRelocationEnabled() const {
+bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
// Mach-O don't support weak external references.
if (TT.isOSBinFormatMachO())
return false;
@@ -453,14 +636,14 @@ bool InstrProfiling::isRuntimeCounterRelocationEnabled() const {
return TT.isOSFuchsia();
}
-bool InstrProfiling::isCounterPromotionEnabled() const {
+bool InstrLowerer::isCounterPromotionEnabled() const {
if (DoCounterPromotion.getNumOccurrences() > 0)
return DoCounterPromotion;
return Options.DoCounterPromotion;
}
-void InstrProfiling::promoteCounterLoadStores(Function *F) {
+void InstrLowerer::promoteCounterLoadStores(Function *F) {
if (!isCounterPromotionEnabled())
return;
@@ -517,17 +700,7 @@ static bool containsProfilingIntrinsics(Module &M) {
containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
}
-bool InstrProfiling::run(
- Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
- this->M = &M;
- this->GetTLI = std::move(GetTLI);
- NamesVar = nullptr;
- NamesSize = 0;
- ProfileDataMap.clear();
- CompilerUsedVars.clear();
- UsedVars.clear();
- TT = Triple(M.getTargetTriple());
-
+bool InstrLowerer::lower() {
bool MadeChange = false;
bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
if (NeedsRuntimeHook)
@@ -544,19 +717,27 @@ bool InstrProfiling::run(
// the instrumented function. This is counting the number of instrumented
// target value sites to enter it as field in the profile data variable.
for (Function &F : M) {
- InstrProfInstBase *FirstProfInst = nullptr;
- for (BasicBlock &BB : F)
- for (auto I = BB.begin(), E = BB.end(); I != E; I++)
+ InstrProfCntrInstBase *FirstProfInst = nullptr;
+ for (BasicBlock &BB : F) {
+ for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
computeNumValueSiteCounts(Ind);
- else if (FirstProfInst == nullptr &&
- (isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I)))
- FirstProfInst = dyn_cast<InstrProfInstBase>(I);
+ else {
+ if (FirstProfInst == nullptr &&
+ (isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I)))
+ FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I);
+ // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
+ if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I))
+ static_cast<void>(getOrCreateRegionBitmaps(Params));
+ }
+ }
+ }
- // Value profiling intrinsic lowering requires per-function profile data
- // variable to be created first.
- if (FirstProfInst != nullptr)
+ // Use a profile intrinsic to create the region counters and data variable.
+ // Also create the data variable based on the MCDCParams.
+ if (FirstProfInst != nullptr) {
static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
+ }
}
for (Function &F : M)
@@ -611,7 +792,7 @@ static FunctionCallee getOrInsertValueProfilingCall(
return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
}
-void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
+void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
GlobalVariable *Name = Ind->getName();
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
uint64_t Index = Ind->getIndex()->getZExtValue();
@@ -620,12 +801,12 @@ void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
}
-void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
+void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
// TODO: Value profiling heavily depends on the data section which is omitted
// in lightweight mode. We need to move the value profile pointer to the
// Counter struct to get this working.
assert(
- !DebugInfoCorrelate &&
+ !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE &&
"Value profiling is not yet supported with lightweight instrumentation");
GlobalVariable *Name = Ind->getName();
auto It = ProfileDataMap.find(Name);
@@ -651,17 +832,13 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
SmallVector<OperandBundleDef, 1> OpBundles;
Ind->getOperandBundlesAsDefs(OpBundles);
if (!IsMemOpSize) {
- Value *Args[3] = {Ind->getTargetValue(),
- Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
- Builder.getInt32(Index)};
- Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args,
+ Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
+ Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
OpBundles);
} else {
- Value *Args[3] = {Ind->getTargetValue(),
- Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
- Builder.getInt32(Index)};
+ Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
Call = Builder.CreateCall(
- getOrInsertValueProfilingCall(*M, *TLI, ValueProfilingCallType::MemOp),
+ getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
Args, OpBundles);
}
if (auto AK = TLI->getExtAttrForI32Param(false))
@@ -670,7 +847,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
Ind->eraseFromParent();
}
-Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
+Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
auto *Counters = getOrCreateRegionCounters(I);
IRBuilder<> Builder(I);
@@ -683,18 +860,18 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
if (!isRuntimeCounterRelocationEnabled())
return Addr;
- Type *Int64Ty = Type::getInt64Ty(M->getContext());
+ Type *Int64Ty = Type::getInt64Ty(M.getContext());
Function *Fn = I->getParent()->getParent();
LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
if (!BiasLI) {
IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
- auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
+ auto *Bias = M.getGlobalVariable(getInstrProfCounterBiasVarName());
if (!Bias) {
// Compiler must define this variable when runtime counter relocation
// is being used. Runtime has a weak external reference that is used
// to check whether that's the case or not.
Bias = new GlobalVariable(
- *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
+ M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
Bias->setVisibility(GlobalVariable::HiddenVisibility);
// A definition that's weak (linkonce_odr) without being in a COMDAT
@@ -702,7 +879,7 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
// data word from every TU but one. Putting it in COMDAT ensures there
// will be exactly one data slot in the link.
if (TT.supportsCOMDAT())
- Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
+ Bias->setComdat(M.getOrInsertComdat(Bias->getName()));
}
BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias);
}
@@ -710,7 +887,26 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
return Builder.CreateIntToPtr(Add, Addr->getType());
}
-void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) {
+Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
+ auto *Bitmaps = getOrCreateRegionBitmaps(I);
+ IRBuilder<> Builder(I);
+
+ auto *Addr = Builder.CreateConstInBoundsGEP2_32(
+ Bitmaps->getValueType(), Bitmaps, 0, I->getBitmapIndex()->getZExtValue());
+
+ if (isRuntimeCounterRelocationEnabled()) {
+ LLVMContext &Ctx = M.getContext();
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ M.getName().data(),
+ Twine("Runtime counter relocation is presently not supported for MC/DC "
+ "bitmaps."),
+ DS_Warning));
+ }
+
+ return Addr;
+}
+
+void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
auto *Addr = getCounterAddress(CoverInstruction);
IRBuilder<> Builder(CoverInstruction);
// We store zero to represent that this block is covered.
@@ -718,22 +914,22 @@ void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) {
CoverInstruction->eraseFromParent();
}
-void InstrProfiling::lowerTimestamp(
+void InstrLowerer::lowerTimestamp(
InstrProfTimestampInst *TimestampInstruction) {
assert(TimestampInstruction->getIndex()->isZeroValue() &&
"timestamp probes are always the first probe for a function");
- auto &Ctx = M->getContext();
+ auto &Ctx = M.getContext();
auto *TimestampAddr = getCounterAddress(TimestampInstruction);
IRBuilder<> Builder(TimestampInstruction);
auto *CalleeTy =
FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false);
- auto Callee = M->getOrInsertFunction(
+ auto Callee = M.getOrInsertFunction(
INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy);
Builder.CreateCall(Callee, {TimestampAddr});
TimestampInstruction->eraseFromParent();
}
-void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
+void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
auto *Addr = getCounterAddress(Inc);
IRBuilder<> Builder(Inc);
@@ -752,7 +948,7 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
Inc->eraseFromParent();
}
-void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
+void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
ConstantArray *Names =
cast<ConstantArray>(CoverageNamesVar->getInitializer());
for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
@@ -769,6 +965,86 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
CoverageNamesVar->eraseFromParent();
}
+void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
+ InstrProfMCDCTVBitmapUpdate *Update) {
+ IRBuilder<> Builder(Update);
+ auto *Int8Ty = Type::getInt8Ty(M.getContext());
+ auto *Int8PtrTy = PointerType::getUnqual(M.getContext());
+ auto *Int32Ty = Type::getInt32Ty(M.getContext());
+ auto *Int64Ty = Type::getInt64Ty(M.getContext());
+ auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
+ auto *BitmapAddr = getBitmapAddress(Update);
+
+ // Load Temp Val.
+ // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
+ auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp");
+
+ // Calculate byte offset using div8.
+ // %1 = lshr i32 %mcdc.temp, 3
+ auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3);
+
+ // Add byte offset to section base byte address.
+ // %2 = zext i32 %1 to i64
+ // %3 = add i64 ptrtoint (ptr @__profbm_test to i64), %2
+ auto *BitmapByteAddr =
+ Builder.CreateAdd(Builder.CreatePtrToInt(BitmapAddr, Int64Ty),
+ Builder.CreateZExtOrBitCast(BitmapByteOffset, Int64Ty));
+
+ // Convert to a pointer.
+ // %4 = inttoptr i32 %3 to ptr
+ BitmapByteAddr = Builder.CreateIntToPtr(BitmapByteAddr, Int8PtrTy);
+
+ // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
+ // %5 = and i32 %mcdc.temp, 7
+ // %6 = trunc i32 %5 to i8
+ auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty);
+
+ // Shift bit offset left to form a bitmap.
+ // %7 = shl i8 1, %6
+ auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet);
+
+ // Load profile bitmap byte.
+ // %mcdc.bits = load i8, ptr %4, align 1
+ auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits");
+
+ // Perform logical OR of profile bitmap byte and shifted bit offset.
+ // %8 = or i8 %mcdc.bits, %7
+ auto *Result = Builder.CreateOr(Bitmap, ShiftedVal);
+
+ // Store the updated profile bitmap byte.
+ // store i8 %8, ptr %3, align 1
+ Builder.CreateStore(Result, BitmapByteAddr);
+ Update->eraseFromParent();
+}
+
+void InstrLowerer::lowerMCDCCondBitmapUpdate(
+ InstrProfMCDCCondBitmapUpdate *Update) {
+ IRBuilder<> Builder(Update);
+ auto *Int32Ty = Type::getInt32Ty(M.getContext());
+ auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
+
+ // Load the MCDC temporary value from the stack.
+ // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
+ auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp");
+
+ // Zero-extend the evaluated condition boolean value (0 or 1) by 32bits.
+ // %1 = zext i1 %tobool to i32
+ auto *CondV_32 = Builder.CreateZExt(Update->getCondBool(), Int32Ty);
+
+ // Shift the boolean value left (by the condition's ID) to form a bitmap.
+ // %2 = shl i32 %1, <Update->getCondID()>
+ auto *ShiftedVal = Builder.CreateShl(CondV_32, Update->getCondID());
+
+ // Perform logical OR of the bitmap against the loaded MCDC temporary value.
+ // %3 = or i32 %mcdc.temp, %2
+ auto *Result = Builder.CreateOr(Temp, ShiftedVal);
+
+ // Store the updated temporary value back to the stack.
+ // store i32 %3, ptr %mcdc.addr, align 4
+ Builder.CreateStore(Result, MCDCCondBitmapAddr);
+ Update->eraseFromParent();
+}
+
/// Get the name of a profiling variable for a particular function.
static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
bool &Renamed) {
@@ -784,7 +1060,7 @@ static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
Renamed = true;
uint64_t FuncHash = Inc->getHash()->getZExtValue();
SmallVector<char, 24> HashPostfix;
- if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
+ if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
return (Prefix + Name).str();
return (Prefix + Name + "." + Twine(FuncHash)).str();
}
@@ -878,7 +1154,7 @@ static inline bool shouldUsePublicSymbol(Function *Fn) {
}
static inline Constant *getFuncAddrForProfData(Function *Fn) {
- auto *Int8PtrTy = Type::getInt8PtrTy(Fn->getContext());
+ auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext());
// Store a nullptr in __llvm_profd, if we shouldn't use a real address
if (!shouldRecordFunctionAddr(Fn))
return ConstantPointerNull::get(Int8PtrTy);
@@ -886,7 +1162,7 @@ static inline Constant *getFuncAddrForProfData(Function *Fn) {
// If we can't use an alias, we must use the public symbol, even though this
// may require a symbolic relocation.
if (shouldUsePublicSymbol(Fn))
- return ConstantExpr::getBitCast(Fn, Int8PtrTy);
+ return Fn;
// When possible use a private alias to avoid symbolic relocations.
auto *GA = GlobalAlias::create(GlobalValue::LinkageTypes::PrivateLinkage,
@@ -909,7 +1185,7 @@ static inline Constant *getFuncAddrForProfData(Function *Fn) {
// appendToCompilerUsed(*Fn->getParent(), {GA});
- return ConstantExpr::getBitCast(GA, Int8PtrTy);
+ return GA;
}
static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
@@ -924,37 +1200,31 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
return true;
}
-GlobalVariable *
-InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
- GlobalValue::LinkageTypes Linkage) {
- uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
- auto &Ctx = M->getContext();
- GlobalVariable *GV;
- if (isa<InstrProfCoverInst>(Inc)) {
- auto *CounterTy = Type::getInt8Ty(Ctx);
- auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
- // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
- std::vector<Constant *> InitialValues(NumCounters,
- Constant::getAllOnesValue(CounterTy));
- GV = new GlobalVariable(*M, CounterArrTy, false, Linkage,
- ConstantArray::get(CounterArrTy, InitialValues),
- Name);
- GV->setAlignment(Align(1));
- } else {
- auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
- GV = new GlobalVariable(*M, CounterTy, false, Linkage,
- Constant::getNullValue(CounterTy), Name);
- GV->setAlignment(Align(8));
- }
- return GV;
+void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn,
+ StringRef VarName) {
+ bool DataReferencedByCode = profDataReferencedByCode(M);
+ bool NeedComdat = needsComdatForCounter(*Fn, M);
+ bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
+
+ if (!UseComdat)
+ return;
+
+ StringRef GroupName =
+ TT.isOSBinFormatCOFF() && DataReferencedByCode ? GV->getName() : VarName;
+ Comdat *C = M.getOrInsertComdat(GroupName);
+ if (!NeedComdat)
+ C->setSelectionKind(Comdat::NoDeduplicate);
+ GV->setComdat(C);
+ // COFF doesn't allow the comdat group leader to have private linkage, so
+ // upgrade private linkage to internal linkage to produce a symbol table
+ // entry.
+ if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
+ GV->setLinkage(GlobalValue::InternalLinkage);
}
-GlobalVariable *
-InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
+GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
+ InstrProfSectKind IPSK) {
GlobalVariable *NamePtr = Inc->getName();
- auto &PD = ProfileDataMap[NamePtr];
- if (PD.RegionCounters)
- return PD.RegionCounters;
// Match the linkage and visibility of the name global.
Function *Fn = Inc->getParent()->getParent();
@@ -963,8 +1233,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
// Use internal rather than private linkage so the counter variable shows up
// in the symbol table when using debug info for correlation.
- if (DebugInfoCorrelate && TT.isOSBinFormatMachO() &&
- Linkage == GlobalValue::PrivateLinkage)
+ if ((DebugInfoCorrelate ||
+ ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) &&
+ TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
Linkage = GlobalValue::InternalLinkage;
// Due to the limitation of binder as of 2021/09/28, the duplicate weak
@@ -993,44 +1264,104 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
// nodeduplicate COMDAT which is lowered to a zero-flag section group. This
// allows -z start-stop-gc to discard the entire group when the function is
// discarded.
- bool DataReferencedByCode = profDataReferencedByCode(*M);
- bool NeedComdat = needsComdatForCounter(*Fn, *M);
bool Renamed;
- std::string CntsVarName =
- getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed);
- std::string DataVarName =
- getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
- auto MaybeSetComdat = [&](GlobalVariable *GV) {
- bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
- if (UseComdat) {
- StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
- ? GV->getName()
- : CntsVarName;
- Comdat *C = M->getOrInsertComdat(GroupName);
- if (!NeedComdat)
- C->setSelectionKind(Comdat::NoDeduplicate);
- GV->setComdat(C);
- // COFF doesn't allow the comdat group leader to have private linkage, so
- // upgrade private linkage to internal linkage to produce a symbol table
- // entry.
- if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
- GV->setLinkage(GlobalValue::InternalLinkage);
- }
- };
+ GlobalVariable *Ptr;
+ StringRef VarPrefix;
+ std::string VarName;
+ if (IPSK == IPSK_cnts) {
+ VarPrefix = getInstrProfCountersVarPrefix();
+ VarName = getVarName(Inc, VarPrefix, Renamed);
+ InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc);
+ Ptr = createRegionCounters(CntrIncrement, VarName, Linkage);
+ } else if (IPSK == IPSK_bitmap) {
+ VarPrefix = getInstrProfBitmapVarPrefix();
+ VarName = getVarName(Inc, VarPrefix, Renamed);
+ InstrProfMCDCBitmapInstBase *BitmapUpdate =
+ dyn_cast<InstrProfMCDCBitmapInstBase>(Inc);
+ Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage);
+ } else {
+ llvm_unreachable("Profile Section must be for Counters or Bitmaps");
+ }
+
+ Ptr->setVisibility(Visibility);
+ // Put the counters and bitmaps in their own sections so linkers can
+ // remove unneeded sections.
+ Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat()));
+ Ptr->setLinkage(Linkage);
+ maybeSetComdat(Ptr, Fn, VarName);
+ return Ptr;
+}
+
+GlobalVariable *
+InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
+ StringRef Name,
+ GlobalValue::LinkageTypes Linkage) {
+ uint64_t NumBytes = Inc->getNumBitmapBytes()->getZExtValue();
+ auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes);
+ auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
+ Constant::getNullValue(BitmapTy), Name);
+ GV->setAlignment(Align(1));
+ return GV;
+}
+GlobalVariable *
+InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
+ GlobalVariable *NamePtr = Inc->getName();
+ auto &PD = ProfileDataMap[NamePtr];
+ if (PD.RegionBitmaps)
+ return PD.RegionBitmaps;
+
+ // If RegionBitmaps doesn't already exist, create it by first setting up
+ // the corresponding profile section.
+ auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap);
+ PD.RegionBitmaps = BitmapPtr;
+ PD.NumBitmapBytes = Inc->getNumBitmapBytes()->getZExtValue();
+ return PD.RegionBitmaps;
+}
+
+GlobalVariable *
+InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
+ GlobalValue::LinkageTypes Linkage) {
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
- LLVMContext &Ctx = M->getContext();
-
- auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage);
- CounterPtr->setVisibility(Visibility);
- CounterPtr->setSection(
- getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
- CounterPtr->setLinkage(Linkage);
- MaybeSetComdat(CounterPtr);
+ auto &Ctx = M.getContext();
+ GlobalVariable *GV;
+ if (isa<InstrProfCoverInst>(Inc)) {
+ auto *CounterTy = Type::getInt8Ty(Ctx);
+ auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
+ // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
+ std::vector<Constant *> InitialValues(NumCounters,
+ Constant::getAllOnesValue(CounterTy));
+ GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
+ ConstantArray::get(CounterArrTy, InitialValues),
+ Name);
+ GV->setAlignment(Align(1));
+ } else {
+ auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
+ GV = new GlobalVariable(M, CounterTy, false, Linkage,
+ Constant::getNullValue(CounterTy), Name);
+ GV->setAlignment(Align(8));
+ }
+ return GV;
+}
+
+GlobalVariable *
+InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
+ GlobalVariable *NamePtr = Inc->getName();
+ auto &PD = ProfileDataMap[NamePtr];
+ if (PD.RegionCounters)
+ return PD.RegionCounters;
+
+ // If RegionCounters doesn't already exist, create it by first setting up
+ // the corresponding profile section.
+ auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);
PD.RegionCounters = CounterPtr;
- if (DebugInfoCorrelate) {
+
+ if (DebugInfoCorrelate ||
+ ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
+ LLVMContext &Ctx = M.getContext();
+ Function *Fn = Inc->getParent()->getParent();
if (auto *SP = Fn->getSubprogram()) {
- DIBuilder DB(*M, true, SP->getUnit());
+ DIBuilder DB(M, true, SP->getUnit());
Metadata *FunctionNameAnnotation[] = {
MDString::get(Ctx, InstrProfCorrelator::FunctionNameAttributeName),
MDString::get(Ctx, getPGOFuncNameVarInitializer(NamePtr)),
@@ -1056,16 +1387,58 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
Annotations);
CounterPtr->addDebugInfo(DICounter);
DB.finalize();
- } else {
- std::string Msg = ("Missing debug info for function " + Fn->getName() +
- "; required for profile correlation.")
- .str();
- Ctx.diagnose(
- DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
}
+
+ // Mark the counter variable as used so that it isn't optimized out.
+ CompilerUsedVars.push_back(PD.RegionCounters);
}
- auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ // Create the data variable (if it doesn't already exist).
+ createDataVariable(Inc);
+
+ return PD.RegionCounters;
+}
+
+void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
+ // When debug information is correlated to profile data, a data variable
+ // is not needed.
+ if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
+ return;
+
+ GlobalVariable *NamePtr = Inc->getName();
+ auto &PD = ProfileDataMap[NamePtr];
+
+ // Return if data variable was already created.
+ if (PD.DataVar)
+ return;
+
+ LLVMContext &Ctx = M.getContext();
+
+ Function *Fn = Inc->getParent()->getParent();
+ GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
+ GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
+
+ // Due to the limitation of binder as of 2021/09/28, the duplicate weak
+ // symbols in the same csect won't be discarded. When there are duplicate weak
+ // symbols, we can NOT guarantee that the relocations get resolved to the
+ // intended weak symbol, so we can not ensure the correctness of the relative
+ // CounterPtr, so we have to use private linkage for counter and data symbols.
+ if (TT.isOSBinFormatXCOFF()) {
+ Linkage = GlobalValue::PrivateLinkage;
+ Visibility = GlobalValue::DefaultVisibility;
+ }
+
+ bool DataReferencedByCode = profDataReferencedByCode(M);
+ bool NeedComdat = needsComdatForCounter(*Fn, M);
+ bool Renamed;
+
+ // The Data Variable section is anchored to profile counters.
+ std::string CntsVarName =
+ getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed);
+ std::string DataVarName =
+ getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
+
+ auto *Int8PtrTy = PointerType::getUnqual(Ctx);
// Allocate statically the array of pointers to value profile nodes for
// the current function.
Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
@@ -1076,25 +1449,24 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
!needsRuntimeRegistrationOfSectionRange(TT)) {
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
auto *ValuesVar = new GlobalVariable(
- *M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
+ M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
ValuesVar->setVisibility(Visibility);
+ setGlobalVariableLargeSection(TT, *ValuesVar);
ValuesVar->setSection(
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
ValuesVar->setAlignment(Align(8));
- MaybeSetComdat(ValuesVar);
- ValuesPtrExpr =
- ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
+ maybeSetComdat(ValuesVar, Fn, CntsVarName);
+ ValuesPtrExpr = ValuesVar;
}
- if (DebugInfoCorrelate) {
- // Mark the counter variable as used so that it isn't optimized out.
- CompilerUsedVars.push_back(PD.RegionCounters);
- return PD.RegionCounters;
- }
+ uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
+ auto *CounterPtr = PD.RegionCounters;
+
+ uint64_t NumBitmapBytes = PD.NumBitmapBytes;
// Create data variable.
- auto *IntPtrTy = M->getDataLayout().getIntPtrType(M->getContext());
+ auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
auto *Int16Ty = Type::getInt16Ty(Ctx);
auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
Type *DataTypes[] = {
@@ -1127,12 +1499,30 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
Visibility = GlobalValue::DefaultVisibility;
}
auto *Data =
- new GlobalVariable(*M, DataTy, false, Linkage, nullptr, DataVarName);
- // Reference the counter variable with a label difference (link-time
- // constant).
- auto *RelativeCounterPtr =
- ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
- ConstantExpr::getPtrToInt(Data, IntPtrTy));
+ new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
+ Constant *RelativeCounterPtr;
+ GlobalVariable *BitmapPtr = PD.RegionBitmaps;
+ Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0);
+ InstrProfSectKind DataSectionKind;
+ // With binary profile correlation, profile data is not loaded into memory.
+ // profile data must reference profile counter with an absolute relocation.
+ if (ProfileCorrelate == InstrProfCorrelator::BINARY) {
+ DataSectionKind = IPSK_covdata;
+ RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
+ if (BitmapPtr != nullptr)
+ RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);
+ } else {
+ // Reference the counter variable with a label difference (link-time
+ // constant).
+ DataSectionKind = IPSK_data;
+ RelativeCounterPtr =
+ ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
+ ConstantExpr::getPtrToInt(Data, IntPtrTy));
+ if (BitmapPtr != nullptr)
+ RelativeBitmapPtr =
+ ConstantExpr::getSub(ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy),
+ ConstantExpr::getPtrToInt(Data, IntPtrTy));
+ }
Constant *DataVals[] = {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
@@ -1141,9 +1531,10 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
Data->setVisibility(Visibility);
- Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
+ Data->setSection(
+ getInstrProfSectionName(DataSectionKind, TT.getObjectFormat()));
Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
- MaybeSetComdat(Data);
+ maybeSetComdat(Data, Fn, CntsVarName);
PD.DataVar = Data;
@@ -1155,11 +1546,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
NamePtr->setLinkage(GlobalValue::PrivateLinkage);
// Collect the referenced names to be used by emitNameData.
ReferencedNames.push_back(NamePtr);
-
- return PD.RegionCounters;
}
-void InstrProfiling::emitVNodes() {
+void InstrLowerer::emitVNodes() {
if (!ValueProfileStaticAlloc)
return;
@@ -1190,7 +1579,7 @@ void InstrProfiling::emitVNodes() {
if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
- auto &Ctx = M->getContext();
+ auto &Ctx = M.getContext();
Type *VNodeTypes[] = {
#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
#include "llvm/ProfileData/InstrProfData.inc"
@@ -1199,17 +1588,18 @@ void InstrProfiling::emitVNodes() {
ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
auto *VNodesVar = new GlobalVariable(
- *M, VNodesTy, false, GlobalValue::PrivateLinkage,
+ M, VNodesTy, false, GlobalValue::PrivateLinkage,
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
+ setGlobalVariableLargeSection(TT, *VNodesVar);
VNodesVar->setSection(
getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
- VNodesVar->setAlignment(M->getDataLayout().getABITypeAlign(VNodesTy));
+ VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy));
// VNodesVar is used by runtime but not referenced via relocation by other
// sections. Conservatively make it linker retained.
UsedVars.push_back(VNodesVar);
}
-void InstrProfiling::emitNameData() {
+void InstrLowerer::emitNameData() {
std::string UncompressedData;
if (ReferencedNames.empty())
@@ -1221,15 +1611,18 @@ void InstrProfiling::emitNameData() {
report_fatal_error(Twine(toString(std::move(E))), false);
}
- auto &Ctx = M->getContext();
+ auto &Ctx = M.getContext();
auto *NamesVal =
ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
- NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
+ NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());
NamesSize = CompressedNameStr.size();
+ setGlobalVariableLargeSection(TT, *NamesVar);
NamesVar->setSection(
- getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
+ ProfileCorrelate == InstrProfCorrelator::BINARY
+ ? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat())
+ : getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
// On COFF, it's important to reduce the alignment down to 1 to prevent the
// linker from inserting padding before the start of the names section or
// between names entries.
@@ -1242,14 +1635,14 @@ void InstrProfiling::emitNameData() {
NamePtr->eraseFromParent();
}
-void InstrProfiling::emitRegistration() {
+void InstrLowerer::emitRegistration() {
if (!needsRuntimeRegistrationOfSectionRange(TT))
return;
// Construct the function.
- auto *VoidTy = Type::getVoidTy(M->getContext());
- auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
- auto *Int64Ty = Type::getInt64Ty(M->getContext());
+ auto *VoidTy = Type::getVoidTy(M.getContext());
+ auto *VoidPtrTy = PointerType::getUnqual(M.getContext());
+ auto *Int64Ty = Type::getInt64Ty(M.getContext());
auto *RegisterFTy = FunctionType::get(VoidTy, false);
auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
getInstrProfRegFuncsName(), M);
@@ -1262,13 +1655,13 @@ void InstrProfiling::emitRegistration() {
Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
getInstrProfRegFuncName(), M);
- IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
+ IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
for (Value *Data : CompilerUsedVars)
if (!isa<Function>(Data))
- IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
+ IRB.CreateCall(RuntimeRegisterF, Data);
for (Value *Data : UsedVars)
if (Data != NamesVar && !isa<Function>(Data))
- IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
+ IRB.CreateCall(RuntimeRegisterF, Data);
if (NamesVar) {
Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
@@ -1277,27 +1670,26 @@ void InstrProfiling::emitRegistration() {
auto *NamesRegisterF =
Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
getInstrProfNamesRegFuncName(), M);
- IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
- IRB.getInt64(NamesSize)});
+ IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
}
IRB.CreateRetVoid();
}
-bool InstrProfiling::emitRuntimeHook() {
+bool InstrLowerer::emitRuntimeHook() {
// We expect the linker to be invoked with -u<hook_var> flag for Linux
// in which case there is no need to emit the external variable.
if (TT.isOSLinux() || TT.isOSAIX())
return false;
// If the module's provided its own runtime, we don't need to do anything.
- if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
+ if (M.getGlobalVariable(getInstrProfRuntimeHookVarName()))
return false;
// Declare an external variable that will pull in the runtime initialization.
- auto *Int32Ty = Type::getInt32Ty(M->getContext());
+ auto *Int32Ty = Type::getInt32Ty(M.getContext());
auto *Var =
- new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
+ new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
Var->setVisibility(GlobalValue::HiddenVisibility);
@@ -1314,9 +1706,9 @@ bool InstrProfiling::emitRuntimeHook() {
User->addFnAttr(Attribute::NoRedZone);
User->setVisibility(GlobalValue::HiddenVisibility);
if (TT.supportsCOMDAT())
- User->setComdat(M->getOrInsertComdat(User->getName()));
+ User->setComdat(M.getOrInsertComdat(User->getName()));
- IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
+ IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User));
auto *Load = IRB.CreateLoad(Int32Ty, Var);
IRB.CreateRet(Load);
@@ -1326,7 +1718,7 @@ bool InstrProfiling::emitRuntimeHook() {
return true;
}
-void InstrProfiling::emitUses() {
+void InstrLowerer::emitUses() {
// The metadata sections are parallel arrays. Optimizers (e.g.
// GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
// we conservatively retain all unconditionally in the compiler.
@@ -1337,30 +1729,30 @@ void InstrProfiling::emitUses() {
// and ensure this GC property as well. Otherwise, we have to conservatively
// make all of the sections retained by the linker.
if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
- (TT.isOSBinFormatCOFF() && !profDataReferencedByCode(*M)))
- appendToCompilerUsed(*M, CompilerUsedVars);
+ (TT.isOSBinFormatCOFF() && !profDataReferencedByCode(M)))
+ appendToCompilerUsed(M, CompilerUsedVars);
else
- appendToUsed(*M, CompilerUsedVars);
+ appendToUsed(M, CompilerUsedVars);
// We do not add proper references from used metadata sections to NamesVar and
// VNodesVar, so we have to be conservative and place them in llvm.used
// regardless of the target,
- appendToUsed(*M, UsedVars);
+ appendToUsed(M, UsedVars);
}
-void InstrProfiling::emitInitialization() {
+void InstrLowerer::emitInitialization() {
// Create ProfileFileName variable. Don't don't this for the
// context-sensitive instrumentation lowering: This lowering is after
// LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
// have already create the variable before LTO/ThinLTO linking.
if (!IsCS)
- createProfileFileNameVar(*M, Options.InstrProfileOutput);
- Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
+ createProfileFileNameVar(M, Options.InstrProfileOutput);
+ Function *RegisterF = M.getFunction(getInstrProfRegFuncsName());
if (!RegisterF)
return;
// Create the initialization function.
- auto *VoidTy = Type::getVoidTy(M->getContext());
+ auto *VoidTy = Type::getVoidTy(M.getContext());
auto *F = Function::Create(FunctionType::get(VoidTy, false),
GlobalValue::InternalLinkage,
getInstrProfInitFuncName(), M);
@@ -1370,9 +1762,9 @@ void InstrProfiling::emitInitialization() {
F->addFnAttr(Attribute::NoRedZone);
// Add the basic block and the necessary calls.
- IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
+ IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F));
IRB.CreateCall(RegisterF, {});
IRB.CreateRetVoid();
- appendToGlobalCtors(*M, F, 0);
+ appendToGlobalCtors(M, F, 0);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 806afc8fcdf7..b842d9eef407 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -85,3 +85,15 @@ Comdat *llvm::getOrCreateFunctionComdat(Function &F, Triple &T) {
return C;
}
+void llvm::setGlobalVariableLargeSection(const Triple &TargetTriple,
+ GlobalVariable &GV) {
+ // Limit to x86-64 ELF.
+ if (TargetTriple.getArch() != Triple::x86_64 ||
+ TargetTriple.getObjectFormat() != Triple::ELF)
+ return;
+ // Limit to medium/large code models.
+ std::optional<CodeModel::Model> CM = GV.getParent()->getCodeModel();
+ if (!CM || (*CM != CodeModel::Medium && *CM != CodeModel::Large))
+ return;
+ GV.setCodeModel(CodeModel::Large);
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 789ed005d03d..539b7441d24b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -182,6 +182,7 @@ public:
C = &(M.getContext());
LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
+ PtrTy = PointerType::getUnqual(*C);
}
/// If it is an interesting memory access, populate information
@@ -209,6 +210,7 @@ private:
LLVMContext *C;
int LongSize;
Type *IntptrTy;
+ PointerType *PtrTy;
ShadowMapping Mapping;
// These arrays is indexed by AccessIsWrite
@@ -267,15 +269,13 @@ Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) {
IRBuilder<> IRB(MI);
if (isa<MemTransferInst>(MI)) {
- IRB.CreateCall(
- isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy,
+ {MI->getOperand(0), MI->getOperand(1),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
} else if (isa<MemSetInst>(MI)) {
IRB.CreateCall(
MemProfMemset,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ {MI->getOperand(0),
IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
}
@@ -364,13 +364,13 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
StringRef SectionName = GV->getSection();
// Check if the global is in the PGO counters section.
auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat();
- if (SectionName.endswith(
+ if (SectionName.ends_with(
getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
return std::nullopt;
}
// Do not instrument accesses to LLVM internal variables.
- if (GV->getName().startswith("__llvm"))
+ if (GV->getName().starts_with("__llvm"))
return std::nullopt;
}
@@ -519,14 +519,12 @@ void MemProfiler::initializeCallbacks(Module &M) {
FunctionType::get(IRB.getVoidTy(), Args1, false));
}
MemProfMemmove = M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy",
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy);
- MemProfMemset = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset",
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt32Ty(), IntptrTy);
+ PtrTy, PtrTy, PtrTy, IntptrTy);
+ MemProfMemset =
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy,
+ PtrTy, IRB.getInt32Ty(), IntptrTy);
}
bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) {
@@ -562,7 +560,7 @@ bool MemProfiler::instrumentFunction(Function &F) {
return false;
if (ClDebugFunc == F.getName())
return false;
- if (F.getName().startswith("__memprof_"))
+ if (F.getName().starts_with("__memprof_"))
return false;
bool FunctionModified = false;
@@ -628,7 +626,7 @@ static void addCallsiteMetadata(Instruction &I,
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
uint32_t Column) {
- llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
+ llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
HashBuilder;
HashBuilder.add(Function, LineOffset, Column);
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
@@ -678,13 +676,19 @@ static void readMemprof(Module &M, Function &F,
IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI) {
auto &Ctx = M.getContext();
-
- auto FuncName = getPGOFuncName(F);
+ // Previously we used getIRPGOFuncName() here. If F is local linkage,
+ // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
+ // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
+ // contain FileName's prefix. It caused local linkage function can't
+ // find MemProfRecord. So we use getName() now.
+ // 'unique-internal-linkage-names' can make MemProf work better for local
+ // linkage function.
+ auto FuncName = F.getName();
auto FuncGUID = Function::getGUID(FuncName);
- Expected<memprof::MemProfRecord> MemProfResult =
- MemProfReader->getMemProfRecord(FuncGUID);
- if (Error E = MemProfResult.takeError()) {
- handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+ std::optional<memprof::MemProfRecord> MemProfRec;
+ auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
+ if (Err) {
+ handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
auto Err = IPE.get();
bool SkipWarning = false;
LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
@@ -715,6 +719,12 @@ static void readMemprof(Module &M, Function &F,
return;
}
+ // Detect if there are non-zero column numbers in the profile. If not,
+ // treat all column numbers as 0 when matching (i.e. ignore any non-zero
+ // columns in the IR). The profiled binary might have been built with
+ // column numbers disabled, for example.
+ bool ProfileHasColumns = false;
+
// Build maps of the location hash to all profile data with that leaf location
// (allocation info and the callsites).
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
@@ -722,21 +732,22 @@ static void readMemprof(Module &M, Function &F,
// the frame array (see comments below where the map entries are added).
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
LocHashToCallSites;
- const auto MemProfRec = std::move(MemProfResult.get());
- for (auto &AI : MemProfRec.AllocSites) {
+ for (auto &AI : MemProfRec->AllocSites) {
// Associate the allocation info with the leaf frame. The later matching
// code will match any inlined call sequences in the IR with a longer prefix
// of call stack frames.
uint64_t StackId = computeStackId(AI.CallStack[0]);
LocHashToAllocInfo[StackId].insert(&AI);
+ ProfileHasColumns |= AI.CallStack[0].Column;
}
- for (auto &CS : MemProfRec.CallSites) {
+ for (auto &CS : MemProfRec->CallSites) {
// Need to record all frames from leaf up to and including this function,
// as any of these may or may not have been inlined at this point.
unsigned Idx = 0;
for (auto &StackFrame : CS) {
uint64_t StackId = computeStackId(StackFrame);
LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
+ ProfileHasColumns |= StackFrame.Column;
// Once we find this function, we can stop recording.
if (StackFrame.Function == FuncGUID)
break;
@@ -785,21 +796,21 @@ static void readMemprof(Module &M, Function &F,
if (Name.empty())
Name = DIL->getScope()->getSubprogram()->getName();
auto CalleeGUID = Function::getGUID(Name);
- auto StackId =
- computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
- // LeafFound will only be false on the first iteration, since we either
- // set it true or break out of the loop below.
+ auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
+ ProfileHasColumns ? DIL->getColumn() : 0);
+ // Check if we have found the profile's leaf frame. If yes, collect
+ // the rest of the call's inlined context starting here. If not, see if
+ // we find a match further up the inlined context (in case the profile
+ // was missing debug frames at the leaf).
if (!LeafFound) {
AllocInfoIter = LocHashToAllocInfo.find(StackId);
CallSitesIter = LocHashToCallSites.find(StackId);
- // Check if the leaf is in one of the maps. If not, no need to look
- // further at this call.
- if (AllocInfoIter == LocHashToAllocInfo.end() &&
- CallSitesIter == LocHashToCallSites.end())
- break;
- LeafFound = true;
+ if (AllocInfoIter != LocHashToAllocInfo.end() ||
+ CallSitesIter != LocHashToCallSites.end())
+ LeafFound = true;
}
- InlinedCallStack.push_back(StackId);
+ if (LeafFound)
+ InlinedCallStack.push_back(StackId);
}
// If leaf not in either of the maps, skip inst.
if (!LeafFound)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index de266590ad92..94af63da38c8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -152,7 +152,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -550,6 +549,7 @@ public:
private:
friend struct MemorySanitizerVisitor;
+ friend struct VarArgHelperBase;
friend struct VarArgAMD64Helper;
friend struct VarArgMIPS64Helper;
friend struct VarArgAArch64Helper;
@@ -574,8 +574,9 @@ private:
Triple TargetTriple;
LLVMContext *C;
- Type *IntptrTy;
+ Type *IntptrTy; ///< Integer type with the size of a ptr in default AS.
Type *OriginTy;
+ PointerType *PtrTy; ///< Integer type with the size of a ptr in default AS.
// XxxTLS variables represent the per-thread state in MSan and per-task state
// in KMSAN.
@@ -595,16 +596,13 @@ private:
/// Thread-local origin storage for function return value.
Value *RetvalOriginTLS;
- /// Thread-local shadow storage for in-register va_arg function
- /// parameters (x86_64-specific).
+ /// Thread-local shadow storage for in-register va_arg function.
Value *VAArgTLS;
- /// Thread-local shadow storage for in-register va_arg function
- /// parameters (x86_64-specific).
+ /// Thread-local shadow storage for in-register va_arg function.
Value *VAArgOriginTLS;
- /// Thread-local shadow storage for va_arg overflow area
- /// (x86_64-specific).
+ /// Thread-local shadow storage for va_arg overflow area.
Value *VAArgOverflowSizeTLS;
/// Are the instrumentation callbacks set up?
@@ -823,11 +821,10 @@ void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) {
PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
// Functions for poisoning and unpoisoning memory.
- MsanPoisonAllocaFn =
- M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
+ MsanPoisonAllocaFn = M.getOrInsertFunction(
+ "__msan_poison_alloca", IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy);
MsanUnpoisonAllocaFn = M.getOrInsertFunction(
- "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
+ "__msan_unpoison_alloca", IRB.getVoidTy(), PtrTy, IntptrTy);
}
static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
@@ -894,18 +891,18 @@ void MemorySanitizer::createUserspaceApi(Module &M, const TargetLibraryInfo &TLI
FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false),
- IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
+ IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), PtrTy,
IRB.getInt32Ty());
}
- MsanSetAllocaOriginWithDescriptionFn = M.getOrInsertFunction(
- "__msan_set_alloca_origin_with_descr", IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
- MsanSetAllocaOriginNoDescriptionFn = M.getOrInsertFunction(
- "__msan_set_alloca_origin_no_descr", IRB.getVoidTy(), IRB.getInt8PtrTy(),
- IntptrTy, IRB.getInt8PtrTy());
- MsanPoisonStackFn = M.getOrInsertFunction(
- "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
+ MsanSetAllocaOriginWithDescriptionFn =
+ M.getOrInsertFunction("__msan_set_alloca_origin_with_descr",
+ IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy, PtrTy);
+ MsanSetAllocaOriginNoDescriptionFn =
+ M.getOrInsertFunction("__msan_set_alloca_origin_no_descr",
+ IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy);
+ MsanPoisonStackFn = M.getOrInsertFunction("__msan_poison_stack",
+ IRB.getVoidTy(), PtrTy, IntptrTy);
}
/// Insert extern declaration of runtime-provided functions and globals.
@@ -923,16 +920,14 @@ void MemorySanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo &TL
IRB.getInt32Ty());
MsanSetOriginFn = M.getOrInsertFunction(
"__msan_set_origin", TLI.getAttrList(C, {2}, /*Signed=*/false),
- IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
+ IRB.getVoidTy(), PtrTy, IntptrTy, IRB.getInt32Ty());
MemmoveFn =
- M.getOrInsertFunction("__msan_memmove", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ M.getOrInsertFunction("__msan_memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
MemcpyFn =
- M.getOrInsertFunction("__msan_memcpy", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
- MemsetFn = M.getOrInsertFunction(
- "__msan_memset", TLI.getAttrList(C, {1}, /*Signed=*/true),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
+ M.getOrInsertFunction("__msan_memcpy", PtrTy, PtrTy, PtrTy, IntptrTy);
+ MemsetFn = M.getOrInsertFunction("__msan_memset",
+ TLI.getAttrList(C, {1}, /*Signed=*/true),
+ PtrTy, PtrTy, IRB.getInt32Ty(), IntptrTy);
MsanInstrumentAsmStoreFn =
M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
@@ -1046,6 +1041,7 @@ void MemorySanitizer::initializeModule(Module &M) {
IRBuilder<> IRB(*C);
IntptrTy = IRB.getIntPtrTy(DL);
OriginTy = IRB.getInt32Ty();
+ PtrTy = IRB.getPtrTy();
ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
@@ -1304,9 +1300,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
Value *ConvertedShadow2 =
IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
- CallBase *CB = IRB.CreateCall(
- Fn, {ConvertedShadow2,
- IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), Origin});
+ CallBase *CB = IRB.CreateCall(Fn, {ConvertedShadow2, Addr, Origin});
CB->addParamAttr(0, Attribute::ZExt);
CB->addParamAttr(2, Attribute::ZExt);
} else {
@@ -1676,7 +1670,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
VectTy->getElementCount());
}
assert(IntPtrTy == MS.IntptrTy);
- return ShadowTy->getPointerTo();
+ return PointerType::get(*MS.C, 0);
}
Constant *constToIntPtr(Type *IntPtrTy, uint64_t C) const {
@@ -1806,11 +1800,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// TODO: Support callbacs with vectors of addresses.
unsigned NumElements = cast<FixedVectorType>(VectTy)->getNumElements();
Value *ShadowPtrs = ConstantInt::getNullValue(
- FixedVectorType::get(ShadowTy->getPointerTo(), NumElements));
+ FixedVectorType::get(IRB.getPtrTy(), NumElements));
Value *OriginPtrs = nullptr;
if (MS.TrackOrigins)
OriginPtrs = ConstantInt::getNullValue(
- FixedVectorType::get(MS.OriginTy->getPointerTo(), NumElements));
+ FixedVectorType::get(IRB.getPtrTy(), NumElements));
for (unsigned i = 0; i < NumElements; ++i) {
Value *OneAddr =
IRB.CreateExtractElement(Addr, ConstantInt::get(IRB.getInt32Ty(), i));
@@ -1838,33 +1832,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Compute the shadow address for a given function argument.
///
/// Shadow = ParamTLS+ArgOffset.
- Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB, int ArgOffset) {
+ Value *getShadowPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
if (ArgOffset)
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
- "_msarg");
+ return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg");
}
/// Compute the origin address for a given function argument.
- Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB, int ArgOffset) {
+ Value *getOriginPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
if (!MS.TrackOrigins)
return nullptr;
Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
if (ArgOffset)
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
- "_msarg_o");
+ return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg_o");
}
/// Compute the shadow address for a retval.
- Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
- return IRB.CreatePointerCast(MS.RetvalTLS,
- PointerType::get(getShadowTy(A), 0), "_msret");
+ Value *getShadowPtrForRetval(IRBuilder<> &IRB) {
+ return IRB.CreatePointerCast(MS.RetvalTLS, IRB.getPtrTy(0), "_msret");
}
/// Compute the origin address for a retval.
- Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
+ Value *getOriginPtrForRetval() {
// We keep a single origin for the entire retval. Might be too optimistic.
return MS.RetvalOriginTLS;
}
@@ -1988,7 +1979,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
Size, ArgAlign);
} else {
- Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
+ Value *Base = getShadowPtrForArgument(EntryIRB, ArgOffset);
const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
CopyAlign, Size);
@@ -1997,7 +1988,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins) {
Value *OriginPtr =
- getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
+ getOriginPtrForArgument(EntryIRB, ArgOffset);
// FIXME: OriginSize should be:
// alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
@@ -2016,12 +2007,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(A, getCleanOrigin());
} else {
// Shadow over TLS
- Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
+ Value *Base = getShadowPtrForArgument(EntryIRB, ArgOffset);
ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
kShadowTLSAlignment);
if (MS.TrackOrigins) {
Value *OriginPtr =
- getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
+ getOriginPtrForArgument(EntryIRB, ArgOffset);
setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
}
}
@@ -2844,11 +2835,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitMemMoveInst(MemMoveInst &I) {
getShadow(I.getArgOperand(1)); // Ensure shadow initialized
IRBuilder<> IRB(&I);
- IRB.CreateCall(
- MS.MemmoveFn,
- {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
+ IRB.CreateCall(MS.MemmoveFn,
+ {I.getArgOperand(0), I.getArgOperand(1),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
I.eraseFromParent();
}
@@ -2869,11 +2858,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitMemCpyInst(MemCpyInst &I) {
getShadow(I.getArgOperand(1)); // Ensure shadow initialized
IRBuilder<> IRB(&I);
- IRB.CreateCall(
- MS.MemcpyFn,
- {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
+ IRB.CreateCall(MS.MemcpyFn,
+ {I.getArgOperand(0), I.getArgOperand(1),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
I.eraseFromParent();
}
@@ -2882,7 +2869,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
IRB.CreateCall(
MS.MemsetFn,
- {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ {I.getArgOperand(0),
IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
I.eraseFromParent();
@@ -3391,8 +3378,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *ShadowPtr =
getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
- IRB.CreateStore(getCleanShadow(Ty),
- IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
+ IRB.CreateStore(getCleanShadow(Ty), ShadowPtr);
if (ClCheckAccessAddress)
insertShadowCheck(Addr, &I);
@@ -4168,7 +4154,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (Function *Func = CB.getCalledFunction()) {
// __sanitizer_unaligned_{load,store} functions may be called by users
// and always expects shadows in the TLS. So don't check them.
- MayCheckCall &= !Func->getName().startswith("__sanitizer_unaligned_");
+ MayCheckCall &= !Func->getName().starts_with("__sanitizer_unaligned_");
}
unsigned ArgOffset = 0;
@@ -4194,7 +4180,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// in that case getShadow() will copy the actual arg shadow to
// __msan_param_tls.
Value *ArgShadow = getShadow(A);
- Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
+ Value *ArgShadowBase = getShadowPtrForArgument(IRB, ArgOffset);
LLVM_DEBUG(dbgs() << " Arg#" << i << ": " << *A
<< " Shadow: " << *ArgShadow << "\n");
if (ByVal) {
@@ -4221,7 +4207,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
Alignment, Size);
if (MS.TrackOrigins) {
- Value *ArgOriginBase = getOriginPtrForArgument(A, IRB, ArgOffset);
+ Value *ArgOriginBase = getOriginPtrForArgument(IRB, ArgOffset);
// FIXME: OriginSize should be:
// alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
@@ -4243,7 +4229,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Constant *Cst = dyn_cast<Constant>(ArgShadow);
if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
IRB.CreateStore(getOrigin(A),
- getOriginPtrForArgument(A, IRB, ArgOffset));
+ getOriginPtrForArgument(IRB, ArgOffset));
}
}
(void)Store;
@@ -4275,7 +4261,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRBBefore(&CB);
// Until we have full dynamic coverage, make sure the retval shadow is 0.
- Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
+ Value *Base = getShadowPtrForRetval(IRBBefore);
IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
kShadowTLSAlignment);
BasicBlock::iterator NextInsn;
@@ -4300,12 +4286,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
IRBuilder<> IRBAfter(&*NextInsn);
Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
- getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
+ getShadowTy(&CB), getShadowPtrForRetval(IRBAfter),
kShadowTLSAlignment, "_msret");
setShadow(&CB, RetvalShadow);
if (MS.TrackOrigins)
setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
- getOriginPtrForRetval(IRBAfter)));
+ getOriginPtrForRetval()));
}
bool isAMustTailRetVal(Value *RetVal) {
@@ -4326,7 +4312,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Don't emit the epilogue for musttail call returns.
if (isAMustTailRetVal(RetVal))
return;
- Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+ Value *ShadowPtr = getShadowPtrForRetval(IRB);
bool HasNoUndef = F.hasRetAttribute(Attribute::NoUndef);
bool StoreShadow = !(MS.EagerChecks && HasNoUndef);
// FIXME: Consider using SpecialCaseList to specify a list of functions that
@@ -4346,7 +4332,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (StoreShadow) {
IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
if (MS.TrackOrigins && StoreOrigin)
- IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
+ IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval());
}
}
@@ -4380,8 +4366,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
if (PoisonStack && ClPoisonStackWithCall) {
- IRB.CreateCall(MS.MsanPoisonStackFn,
- {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
+ IRB.CreateCall(MS.MsanPoisonStackFn, {&I, Len});
} else {
Value *ShadowBase, *OriginBase;
std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
@@ -4396,13 +4381,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ClPrintStackNames) {
Value *Descr = getLocalVarDescription(I);
IRB.CreateCall(MS.MsanSetAllocaOriginWithDescriptionFn,
- {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
- IRB.CreatePointerCast(Idptr, IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
+ {&I, Len, Idptr, Descr});
} else {
- IRB.CreateCall(MS.MsanSetAllocaOriginNoDescriptionFn,
- {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
- IRB.CreatePointerCast(Idptr, IRB.getInt8PtrTy())});
+ IRB.CreateCall(MS.MsanSetAllocaOriginNoDescriptionFn, {&I, Len, Idptr});
}
}
}
@@ -4410,12 +4391,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
Value *Descr = getLocalVarDescription(I);
if (PoisonStack) {
- IRB.CreateCall(MS.MsanPoisonAllocaFn,
- {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
- IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
+ IRB.CreateCall(MS.MsanPoisonAllocaFn, {&I, Len, Descr});
} else {
- IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
- {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
+ IRB.CreateCall(MS.MsanUnpoisonAllocaFn, {&I, Len});
}
}
@@ -4577,10 +4555,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
if (!ElemTy->isSized())
return;
- Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
Value *SizeVal =
IRB.CreateTypeSize(MS.IntptrTy, DL.getTypeStoreSize(ElemTy));
- IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
+ IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Operand, SizeVal});
}
/// Get the number of output arguments returned by pointers.
@@ -4674,8 +4651,91 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
};
+struct VarArgHelperBase : public VarArgHelper {
+ Function &F;
+ MemorySanitizer &MS;
+ MemorySanitizerVisitor &MSV;
+ SmallVector<CallInst *, 16> VAStartInstrumentationList;
+ const unsigned VAListTagSize;
+
+ VarArgHelperBase(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV, unsigned VAListTagSize)
+ : F(F), MS(MS), MSV(MSV), VAListTagSize(VAListTagSize) {}
+
+ Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ }
+
+ /// Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+ unsigned ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+ "_msarg_va_s");
+ }
+
+ /// Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+ unsigned ArgOffset, unsigned ArgSize) {
+ // Make sure we don't overflow __msan_va_arg_tls.
+ if (ArgOffset + ArgSize > kParamTLSSize)
+ return nullptr;
+ return getShadowPtrForVAArgument(Ty, IRB, ArgOffset);
+ }
+
+ /// Compute the origin address for a given va_arg.
+ Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
+ // getOriginPtrForVAArgument() is always called after
+ // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
+ // overflow.
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+ "_msarg_va_o");
+ }
+
+ void CleanUnusedTLS(IRBuilder<> &IRB, Value *ShadowBase,
+ unsigned BaseOffset) {
+ // The tails of __msan_va_arg_tls is not large enough to fit full
+ // value shadow, but it will be copied to backup anyway. Make it
+ // clean.
+ if (BaseOffset >= kParamTLSSize)
+ return;
+ Value *TailSize =
+ ConstantInt::getSigned(IRB.getInt32Ty(), kParamTLSSize - BaseOffset);
+ IRB.CreateMemSet(ShadowBase, ConstantInt::getNullValue(IRB.getInt8Ty()),
+ TailSize, Align(8));
+ }
+
+ void unpoisonVAListTagForInst(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ const Align Alignment = Align(8);
+ auto [ShadowPtr, OriginPtr] = MSV.getShadowOriginPtr(
+ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+ // Unpoison the whole __va_list_tag.
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ VAListTagSize, Alignment, false);
+ }
+
+ void visitVAStartInst(VAStartInst &I) override {
+ if (F.getCallingConv() == CallingConv::Win64)
+ return;
+ VAStartInstrumentationList.push_back(&I);
+ unpoisonVAListTagForInst(I);
+ }
+
+ void visitVACopyInst(VACopyInst &I) override {
+ if (F.getCallingConv() == CallingConv::Win64)
+ return;
+ unpoisonVAListTagForInst(I);
+ }
+};
+
/// AMD64-specific implementation of VarArgHelper.
-struct VarArgAMD64Helper : public VarArgHelper {
+struct VarArgAMD64Helper : public VarArgHelperBase {
// An unfortunate workaround for asymmetric lowering of va_arg stuff.
// See a comment in visitCallBase for more details.
static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
@@ -4684,20 +4744,15 @@ struct VarArgAMD64Helper : public VarArgHelper {
static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
unsigned AMD64FpEndOffset;
- Function &F;
- MemorySanitizer &MS;
- MemorySanitizerVisitor &MSV;
AllocaInst *VAArgTLSCopy = nullptr;
AllocaInst *VAArgTLSOriginCopy = nullptr;
Value *VAArgOverflowSize = nullptr;
- SmallVector<CallInst *, 16> VAStartInstrumentationList;
-
enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
MemorySanitizerVisitor &MSV)
- : F(F), MS(MS), MSV(MSV) {
+ : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/24) {
AMD64FpEndOffset = AMD64FpEndOffsetSSE;
for (const auto &Attr : F.getAttributes().getFnAttrs()) {
if (Attr.isStringAttribute() &&
@@ -4712,6 +4767,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
ArgKind classifyArgument(Value *arg) {
// A very rough approximation of X86_64 argument classification rules.
Type *T = arg->getType();
+ if (T->isX86_FP80Ty())
+ return AK_Memory;
if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
return AK_FloatingPoint;
if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
@@ -4734,6 +4791,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
unsigned FpOffset = AMD64GpEndOffset;
unsigned OverflowOffset = AMD64FpEndOffset;
const DataLayout &DL = F.getParent()->getDataLayout();
+
for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
@@ -4746,19 +4804,24 @@ struct VarArgAMD64Helper : public VarArgHelper {
assert(A->getType()->isPointerTy());
Type *RealTy = CB.getParamByValType(ArgNo);
uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
- Value *ShadowBase = getShadowPtrForVAArgument(
- RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
+ uint64_t AlignedSize = alignTo(ArgSize, 8);
+ unsigned BaseOffset = OverflowOffset;
+ Value *ShadowBase =
+ getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
Value *OriginBase = nullptr;
if (MS.TrackOrigins)
- OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
- OverflowOffset += alignTo(ArgSize, 8);
- if (!ShadowBase)
- continue;
+ OriginBase = getOriginPtrForVAArgument(IRB, OverflowOffset);
+ OverflowOffset += AlignedSize;
+
+ if (OverflowOffset > kParamTLSSize) {
+ CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
+ continue; // We have no space to copy shadow there.
+ }
+
Value *ShadowPtr, *OriginPtr;
std::tie(ShadowPtr, OriginPtr) =
MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
/*isStore*/ false);
-
IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
kShadowTLSAlignment, ArgSize);
if (MS.TrackOrigins)
@@ -4773,37 +4836,42 @@ struct VarArgAMD64Helper : public VarArgHelper {
Value *ShadowBase, *OriginBase = nullptr;
switch (AK) {
case AK_GeneralPurpose:
- ShadowBase =
- getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
+ ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset);
if (MS.TrackOrigins)
- OriginBase = getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
+ OriginBase = getOriginPtrForVAArgument(IRB, GpOffset);
GpOffset += 8;
+ assert(GpOffset <= kParamTLSSize);
break;
case AK_FloatingPoint:
- ShadowBase =
- getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
+ ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset);
if (MS.TrackOrigins)
- OriginBase = getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
+ OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
FpOffset += 16;
+ assert(FpOffset <= kParamTLSSize);
break;
case AK_Memory:
if (IsFixed)
continue;
uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+ uint64_t AlignedSize = alignTo(ArgSize, 8);
+ unsigned BaseOffset = OverflowOffset;
ShadowBase =
- getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
- if (MS.TrackOrigins)
- OriginBase =
- getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
- OverflowOffset += alignTo(ArgSize, 8);
+ getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+ if (MS.TrackOrigins) {
+ OriginBase = getOriginPtrForVAArgument(IRB, OverflowOffset);
+ }
+ OverflowOffset += AlignedSize;
+ if (OverflowOffset > kParamTLSSize) {
+ // We have no space to copy shadow there.
+ CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
+ continue;
+ }
}
// Take fixed arguments into account for GpOffset and FpOffset,
// but don't actually store shadows for them.
// TODO(glider): don't call get*PtrForVAArgument() for them.
if (IsFixed)
continue;
- if (!ShadowBase)
- continue;
Value *Shadow = MSV.getShadow(A);
IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
if (MS.TrackOrigins) {
@@ -4819,59 +4887,6 @@ struct VarArgAMD64Helper : public VarArgHelper {
IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
}
- /// Compute the shadow address for a given va_arg.
- Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
- unsigned ArgOffset, unsigned ArgSize) {
- // Make sure we don't overflow __msan_va_arg_tls.
- if (ArgOffset + ArgSize > kParamTLSSize)
- return nullptr;
- Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
- "_msarg_va_s");
- }
-
- /// Compute the origin address for a given va_arg.
- Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
- Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
- // getOriginPtrForVAArgument() is always called after
- // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
- // overflow.
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
- "_msarg_va_o");
- }
-
- void unpoisonVAListTagForInst(IntrinsicInst &I) {
- IRBuilder<> IRB(&I);
- Value *VAListTag = I.getArgOperand(0);
- Value *ShadowPtr, *OriginPtr;
- const Align Alignment = Align(8);
- std::tie(ShadowPtr, OriginPtr) =
- MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
- /*isStore*/ true);
-
- // Unpoison the whole __va_list_tag.
- // FIXME: magic ABI constants.
- IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- /* size */ 24, Alignment, false);
- // We shouldn't need to zero out the origins, as they're only checked for
- // nonzero shadow.
- }
-
- void visitVAStartInst(VAStartInst &I) override {
- if (F.getCallingConv() == CallingConv::Win64)
- return;
- VAStartInstrumentationList.push_back(&I);
- unpoisonVAListTagForInst(I);
- }
-
- void visitVACopyInst(VACopyInst &I) override {
- if (F.getCallingConv() == CallingConv::Win64)
- return;
- unpoisonVAListTagForInst(I);
- }
-
void finalizeInstrumentation() override {
assert(!VAArgOverflowSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
@@ -4908,7 +4923,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
NextNodeIRBuilder IRB(OrigInst);
Value *VAListTag = OrigInst->getArgOperand(0);
- Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, 16)),
@@ -4925,7 +4940,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
if (MS.TrackOrigins)
IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
Alignment, AMD64FpEndOffset);
- Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Type *OverflowArgAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, 8)),
@@ -4951,18 +4966,14 @@ struct VarArgAMD64Helper : public VarArgHelper {
};
/// MIPS64-specific implementation of VarArgHelper.
-struct VarArgMIPS64Helper : public VarArgHelper {
- Function &F;
- MemorySanitizer &MS;
- MemorySanitizerVisitor &MSV;
+/// NOTE: This is also used for LoongArch64.
+struct VarArgMIPS64Helper : public VarArgHelperBase {
AllocaInst *VAArgTLSCopy = nullptr;
Value *VAArgSize = nullptr;
- SmallVector<CallInst *, 16> VAStartInstrumentationList;
-
VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
MemorySanitizerVisitor &MSV)
- : F(F), MS(MS), MSV(MSV) {}
+ : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/8) {}
void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
unsigned VAArgOffset = 0;
@@ -4992,42 +5003,6 @@ struct VarArgMIPS64Helper : public VarArgHelper {
IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
}
- /// Compute the shadow address for a given va_arg.
- Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
- unsigned ArgOffset, unsigned ArgSize) {
- // Make sure we don't overflow __msan_va_arg_tls.
- if (ArgOffset + ArgSize > kParamTLSSize)
- return nullptr;
- Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
- "_msarg");
- }
-
- void visitVAStartInst(VAStartInst &I) override {
- IRBuilder<> IRB(&I);
- VAStartInstrumentationList.push_back(&I);
- Value *VAListTag = I.getArgOperand(0);
- Value *ShadowPtr, *OriginPtr;
- const Align Alignment = Align(8);
- std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
- VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
- IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- /* size */ 8, Alignment, false);
- }
-
- void visitVACopyInst(VACopyInst &I) override {
- IRBuilder<> IRB(&I);
- VAStartInstrumentationList.push_back(&I);
- Value *VAListTag = I.getArgOperand(0);
- Value *ShadowPtr, *OriginPtr;
- const Align Alignment = Align(8);
- std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
- VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
- IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- /* size */ 8, Alignment, false);
- }
-
void finalizeInstrumentation() override {
assert(!VAArgSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
@@ -5057,7 +5032,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
CallInst *OrigInst = VAStartInstrumentationList[i];
NextNodeIRBuilder IRB(OrigInst);
Value *VAListTag = OrigInst->getArgOperand(0);
- Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
Value *RegSaveAreaPtrPtr =
IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
PointerType::get(RegSaveAreaPtrTy, 0));
@@ -5075,7 +5050,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
};
/// AArch64-specific implementation of VarArgHelper.
-struct VarArgAArch64Helper : public VarArgHelper {
+struct VarArgAArch64Helper : public VarArgHelperBase {
static const unsigned kAArch64GrArgSize = 64;
static const unsigned kAArch64VrArgSize = 128;
@@ -5087,28 +5062,36 @@ struct VarArgAArch64Helper : public VarArgHelper {
AArch64VrBegOffset + kAArch64VrArgSize;
static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
- Function &F;
- MemorySanitizer &MS;
- MemorySanitizerVisitor &MSV;
AllocaInst *VAArgTLSCopy = nullptr;
Value *VAArgOverflowSize = nullptr;
- SmallVector<CallInst *, 16> VAStartInstrumentationList;
-
enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
MemorySanitizerVisitor &MSV)
- : F(F), MS(MS), MSV(MSV) {}
+ : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/32) {}
+
+ // A very rough approximation of aarch64 argument classification rules.
+ std::pair<ArgKind, uint64_t> classifyArgument(Type *T) {
+ if (T->isIntOrPtrTy() && T->getPrimitiveSizeInBits() <= 64)
+ return {AK_GeneralPurpose, 1};
+ if (T->isFloatingPointTy() && T->getPrimitiveSizeInBits() <= 128)
+ return {AK_FloatingPoint, 1};
+
+ if (T->isArrayTy()) {
+ auto R = classifyArgument(T->getArrayElementType());
+ R.second *= T->getScalarType()->getArrayNumElements();
+ return R;
+ }
- ArgKind classifyArgument(Value *arg) {
- Type *T = arg->getType();
- if (T->isFPOrFPVectorTy())
- return AK_FloatingPoint;
- if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) ||
- (T->isPointerTy()))
- return AK_GeneralPurpose;
- return AK_Memory;
+ if (const FixedVectorType *FV = dyn_cast<FixedVectorType>(T)) {
+ auto R = classifyArgument(FV->getScalarType());
+ R.second *= FV->getNumElements();
+ return R;
+ }
+
+ LLVM_DEBUG(errs() << "Unknown vararg type: " << *T << "\n");
+ return {AK_Memory, 0};
}
// The instrumentation stores the argument shadow in a non ABI-specific
@@ -5116,7 +5099,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
// like x86_64 case, lowers the va_args in the frontend and this pass only
// sees the low level code that deals with va_list internals).
// The first seven GR registers are saved in the first 56 bytes of the
- // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
+ // va_arg tls arra, followed by the first 8 FP/SIMD registers, and then
// the remaining arguments.
// Using constant offset within the va_arg TLS array allows fast copy
// in the finalize instrumentation.
@@ -5128,20 +5111,22 @@ struct VarArgAArch64Helper : public VarArgHelper {
const DataLayout &DL = F.getParent()->getDataLayout();
for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
- ArgKind AK = classifyArgument(A);
- if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
+ auto [AK, RegNum] = classifyArgument(A->getType());
+ if (AK == AK_GeneralPurpose &&
+ (GrOffset + RegNum * 8) > AArch64GrEndOffset)
AK = AK_Memory;
- if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
+ if (AK == AK_FloatingPoint &&
+ (VrOffset + RegNum * 16) > AArch64VrEndOffset)
AK = AK_Memory;
Value *Base;
switch (AK) {
case AK_GeneralPurpose:
- Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
- GrOffset += 8;
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset);
+ GrOffset += 8 * RegNum;
break;
case AK_FloatingPoint:
- Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
- VrOffset += 16;
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset);
+ VrOffset += 16 * RegNum;
break;
case AK_Memory:
// Don't count fixed arguments in the overflow area - va_start will
@@ -5149,17 +5134,21 @@ struct VarArgAArch64Helper : public VarArgHelper {
if (IsFixed)
continue;
uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
- Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
- alignTo(ArgSize, 8));
- OverflowOffset += alignTo(ArgSize, 8);
+ uint64_t AlignedSize = alignTo(ArgSize, 8);
+ unsigned BaseOffset = OverflowOffset;
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, BaseOffset);
+ OverflowOffset += AlignedSize;
+ if (OverflowOffset > kParamTLSSize) {
+ // We have no space to copy shadow there.
+ CleanUnusedTLS(IRB, Base, BaseOffset);
+ continue;
+ }
break;
}
// Count Gp/Vr fixed arguments to their respective offsets, but don't
// bother to actually store a shadow.
if (IsFixed)
continue;
- if (!Base)
- continue;
IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
}
Constant *OverflowSize =
@@ -5167,48 +5156,12 @@ struct VarArgAArch64Helper : public VarArgHelper {
IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
}
- /// Compute the shadow address for a given va_arg.
- Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
- unsigned ArgOffset, unsigned ArgSize) {
- // Make sure we don't overflow __msan_va_arg_tls.
- if (ArgOffset + ArgSize > kParamTLSSize)
- return nullptr;
- Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
- "_msarg");
- }
-
- void visitVAStartInst(VAStartInst &I) override {
- IRBuilder<> IRB(&I);
- VAStartInstrumentationList.push_back(&I);
- Value *VAListTag = I.getArgOperand(0);
- Value *ShadowPtr, *OriginPtr;
- const Align Alignment = Align(8);
- std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
- VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
- IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- /* size */ 32, Alignment, false);
- }
-
- void visitVACopyInst(VACopyInst &I) override {
- IRBuilder<> IRB(&I);
- VAStartInstrumentationList.push_back(&I);
- Value *VAListTag = I.getArgOperand(0);
- Value *ShadowPtr, *OriginPtr;
- const Align Alignment = Align(8);
- std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
- VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
- IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- /* size */ 32, Alignment, false);
- }
-
// Retrieve a va_list field of 'void*' size.
Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
Value *SaveAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, offset)),
- Type::getInt64PtrTy(*MS.C));
+ PointerType::get(*MS.C, 0));
return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
}
@@ -5217,7 +5170,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
Value *SaveAreaPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, offset)),
- Type::getInt32PtrTy(*MS.C));
+ PointerType::get(*MS.C, 0));
Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
}
@@ -5268,7 +5221,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
// we need to adjust the offset for both GR and VR fields based on
// the __{gr,vr}_offs value (since they are stores based on incoming
// named arguments).
- Type *RegSaveAreaPtrTy = IRB.getInt8PtrTy();
+ Type *RegSaveAreaPtrTy = IRB.getPtrTy();
// Read the stack pointer from the va_list.
Value *StackSaveAreaPtr =
@@ -5342,18 +5295,13 @@ struct VarArgAArch64Helper : public VarArgHelper {
};
/// PowerPC64-specific implementation of VarArgHelper.
-struct VarArgPowerPC64Helper : public VarArgHelper {
- Function &F;
- MemorySanitizer &MS;
- MemorySanitizerVisitor &MSV;
+struct VarArgPowerPC64Helper : public VarArgHelperBase {
AllocaInst *VAArgTLSCopy = nullptr;
Value *VAArgSize = nullptr;
- SmallVector<CallInst *, 16> VAStartInstrumentationList;
-
VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
MemorySanitizerVisitor &MSV)
- : F(F), MS(MS), MSV(MSV) {}
+ : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/8) {}
void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
// For PowerPC, we need to deal with alignment of stack arguments -
@@ -5441,43 +5389,6 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
}
- /// Compute the shadow address for a given va_arg.
- Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
- unsigned ArgOffset, unsigned ArgSize) {
- // Make sure we don't overflow __msan_va_arg_tls.
- if (ArgOffset + ArgSize > kParamTLSSize)
- return nullptr;
- Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
- "_msarg");
- }
-
- void visitVAStartInst(VAStartInst &I) override {
- IRBuilder<> IRB(&I);
- VAStartInstrumentationList.push_back(&I);
- Value *VAListTag = I.getArgOperand(0);
- Value *ShadowPtr, *OriginPtr;
- const Align Alignment = Align(8);
- std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
- VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
- IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- /* size */ 8, Alignment, false);
- }
-
- void visitVACopyInst(VACopyInst &I) override {
- IRBuilder<> IRB(&I);
- Value *VAListTag = I.getArgOperand(0);
- Value *ShadowPtr, *OriginPtr;
- const Align Alignment = Align(8);
- std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
- VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
- // Unpoison the whole __va_list_tag.
- // FIXME: magic ABI constants.
- IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- /* size */ 8, Alignment, false);
- }
-
void finalizeInstrumentation() override {
assert(!VAArgSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
@@ -5508,7 +5419,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
CallInst *OrigInst = VAStartInstrumentationList[i];
NextNodeIRBuilder IRB(OrigInst);
Value *VAListTag = OrigInst->getArgOperand(0);
- Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
Value *RegSaveAreaPtrPtr =
IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
PointerType::get(RegSaveAreaPtrTy, 0));
@@ -5526,7 +5437,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
};
/// SystemZ-specific implementation of VarArgHelper.
-struct VarArgSystemZHelper : public VarArgHelper {
+struct VarArgSystemZHelper : public VarArgHelperBase {
static const unsigned SystemZGpOffset = 16;
static const unsigned SystemZGpEndOffset = 56;
static const unsigned SystemZFpOffset = 128;
@@ -5538,16 +5449,11 @@ struct VarArgSystemZHelper : public VarArgHelper {
static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
static const unsigned SystemZRegSaveAreaPtrOffset = 24;
- Function &F;
- MemorySanitizer &MS;
- MemorySanitizerVisitor &MSV;
bool IsSoftFloatABI;
AllocaInst *VAArgTLSCopy = nullptr;
AllocaInst *VAArgTLSOriginCopy = nullptr;
Value *VAArgOverflowSize = nullptr;
- SmallVector<CallInst *, 16> VAStartInstrumentationList;
-
enum class ArgKind {
GeneralPurpose,
FloatingPoint,
@@ -5560,7 +5466,7 @@ struct VarArgSystemZHelper : public VarArgHelper {
VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
MemorySanitizerVisitor &MSV)
- : F(F), MS(MS), MSV(MSV),
+ : VarArgHelperBase(F, MS, MSV, SystemZVAListTagSize),
IsSoftFloatABI(F.getFnAttribute("use-soft-float").getValueAsBool()) {}
ArgKind classifyArgument(Type *T) {
@@ -5721,39 +5627,8 @@ struct VarArgSystemZHelper : public VarArgHelper {
IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
}
- Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
- Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
- return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- }
-
- Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
- Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
- "_msarg_va_o");
- }
-
- void unpoisonVAListTagForInst(IntrinsicInst &I) {
- IRBuilder<> IRB(&I);
- Value *VAListTag = I.getArgOperand(0);
- Value *ShadowPtr, *OriginPtr;
- const Align Alignment = Align(8);
- std::tie(ShadowPtr, OriginPtr) =
- MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
- /*isStore*/ true);
- IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- SystemZVAListTagSize, Alignment, false);
- }
-
- void visitVAStartInst(VAStartInst &I) override {
- VAStartInstrumentationList.push_back(&I);
- unpoisonVAListTagForInst(I);
- }
-
- void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
-
void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
- Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(
IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
@@ -5777,8 +5652,10 @@ struct VarArgSystemZHelper : public VarArgHelper {
Alignment, RegSaveAreaSize);
}
+ // FIXME: This implementation limits OverflowOffset to kParamTLSSize, so we
+ // don't know real overflow size and can't clear shadow beyond kParamTLSSize.
void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
- Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Type *OverflowArgAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(
IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
@@ -5846,6 +5723,10 @@ struct VarArgSystemZHelper : public VarArgHelper {
}
};
+// Loongarch64 is not a MIPS, but the current vargs calling convention matches
+// the MIPS.
+using VarArgLoongArch64Helper = VarArgMIPS64Helper;
+
/// A no-op implementation of VarArgHelper.
struct VarArgNoOpHelper : public VarArgHelper {
VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
@@ -5878,6 +5759,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
return new VarArgPowerPC64Helper(Func, Msan, Visitor);
else if (TargetTriple.getArch() == Triple::systemz)
return new VarArgSystemZHelper(Func, Msan, Visitor);
+ else if (TargetTriple.isLoongArch64())
+ return new VarArgLoongArch64Helper(Func, Msan, Visitor);
else
return new VarArgNoOpHelper(Func, Msan, Visitor);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 3c8f25d73c62..3a57709c4e8b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -327,7 +327,7 @@ extern cl::opt<PGOViewCountsType> PGOViewCounts;
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
-extern cl::opt<bool> DebugInfoCorrelate;
+extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
} // namespace llvm
static cl::opt<bool>
@@ -382,7 +382,7 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
ProfileVersion |= VARIANT_MASK_CSIR_PROF;
if (PGOInstrumentEntry)
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
- if (DebugInfoCorrelate)
+ if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
if (PGOFunctionEntryCoverage)
ProfileVersion |=
@@ -525,6 +525,7 @@ public:
std::vector<std::vector<VPCandidateInfo>> ValueSites;
SelectInstVisitor SIVisitor;
std::string FuncName;
+ std::string DeprecatedFuncName;
GlobalVariable *FuncNameVar;
// CFG hash value for this function.
@@ -582,21 +583,22 @@ public:
if (!IsCS) {
NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
- NumOfPGOBB += MST.BBInfos.size();
+ NumOfPGOBB += MST.bbInfoSize();
ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
} else {
NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
- NumOfCSPGOBB += MST.BBInfos.size();
+ NumOfCSPGOBB += MST.bbInfoSize();
}
- FuncName = getPGOFuncName(F);
+ FuncName = getIRPGOFuncName(F);
+ DeprecatedFuncName = getPGOFuncName(F);
computeCFGHash();
if (!ComdatMembers.empty())
renameComdatFunction();
LLVM_DEBUG(dumpInfo("after CFGMST"));
- for (auto &E : MST.AllEdges) {
+ for (const auto &E : MST.allEdges()) {
if (E->Removed)
continue;
IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
@@ -639,7 +641,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
(uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
//(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
- (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+ (uint64_t)MST.numEdges() << 32 | JC.getCRC();
} else {
// The higher 32 bits.
auto updateJCH = [&JCH](uint64_t Num) {
@@ -653,7 +655,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
if (BCI) {
updateJCH(BCI->getInstrumentedBlocksHash());
} else {
- updateJCH((uint64_t)MST.AllEdges.size());
+ updateJCH((uint64_t)MST.numEdges());
}
// Hash format for context sensitive profile. Reserve 4 bits for other
@@ -668,7 +670,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
<< " CRC = " << JC.getCRC()
<< ", Selects = " << SIVisitor.getNumOfSelectInsts()
- << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
+ << ", Edges = " << MST.numEdges() << ", ICSites = "
<< ValueSites[IPVK_IndirectCallTarget].size());
if (!PGOOldCFGHashing) {
LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
@@ -756,8 +758,8 @@ void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
// Use a worklist as we will update the vector during the iteration.
std::vector<Edge *> EdgeList;
- EdgeList.reserve(MST.AllEdges.size());
- for (auto &E : MST.AllEdges)
+ EdgeList.reserve(MST.numEdges());
+ for (const auto &E : MST.allEdges())
EdgeList.push_back(E.get());
for (auto &E : EdgeList) {
@@ -874,8 +876,7 @@ static void instrumentOneFunc(
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
PGOBlockCoverage);
- Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
- auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
+ auto Name = FuncInfo.FuncNameVar;
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
FuncInfo.FunctionHash);
if (PGOFunctionEntryCoverage) {
@@ -964,9 +965,8 @@ static void instrumentOneFunc(
populateEHOperandBundle(Cand, BlockColors, OpBundles);
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
- {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
- Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
- Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
+ {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
+ ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
OpBundles);
}
} // IPVK_First <= Kind <= IPVK_Last
@@ -1164,12 +1164,12 @@ private:
} // end anonymous namespace
/// Set up InEdges/OutEdges for all BBs in the MST.
-static void
-setupBBInfoEdges(FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
+static void setupBBInfoEdges(
+ const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
// This is not required when there is block coverage inference.
if (FuncInfo.BCI)
return;
- for (auto &E : FuncInfo.MST.AllEdges) {
+ for (const auto &E : FuncInfo.MST.allEdges()) {
if (E->Removed)
continue;
const BasicBlock *SrcBB = E->SrcBB;
@@ -1225,7 +1225,7 @@ bool PGOUseFunc::setInstrumentedCounts(
// Set the profile count the Instrumented edges. There are BBs that not in
// MST but not instrumented. Need to set the edge count value so that we can
// populate the profile counts later.
- for (auto &E : FuncInfo.MST.AllEdges) {
+ for (const auto &E : FuncInfo.MST.allEdges()) {
if (E->Removed || E->InMST)
continue;
const BasicBlock *SrcBB = E->SrcBB;
@@ -1336,7 +1336,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
auto &Ctx = M->getContext();
uint64_t MismatchedFuncSum = 0;
Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
- FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
+ FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
+ &MismatchedFuncSum);
if (Error E = Result.takeError()) {
handleInstrProfError(std::move(E), MismatchedFuncSum);
return false;
@@ -1381,7 +1382,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
uint64_t MismatchedFuncSum = 0;
Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
- FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
+ FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
+ &MismatchedFuncSum);
if (auto Err = Result.takeError()) {
handleInstrProfError(std::move(Err), MismatchedFuncSum);
return;
@@ -1436,12 +1438,11 @@ void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
// If A is uncovered, set weight=1.
// This setup will allow BFI to give nonzero profile counts to only covered
// blocks.
- SmallVector<unsigned, 4> Weights;
+ SmallVector<uint32_t, 4> Weights;
for (auto *Succ : successors(&BB))
Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
if (Weights.size() >= 2)
- BB.getTerminator()->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(Weights));
+ llvm::setBranchWeights(*BB.getTerminator(), Weights);
}
unsigned NumCorruptCoverage = 0;
@@ -1647,12 +1648,10 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
Module *M = F.getParent();
IRBuilder<> Builder(&SI);
Type *Int64Ty = Builder.getInt64Ty();
- Type *I8PtrTy = Builder.getInt8PtrTy();
auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
- {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
- Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
+ {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
Builder.getInt32(*CurCtrIdx), Step});
++(*CurCtrIdx);
}
@@ -1757,17 +1756,10 @@ static void collectComdatMembers(
ComdatMembers.insert(std::make_pair(C, &GA));
}
-// Don't perform PGO instrumeatnion / profile-use.
-static bool skipPGO(const Function &F) {
+// Return true if we should not find instrumentation data for this function
+static bool skipPGOUse(const Function &F) {
if (F.isDeclaration())
return true;
- if (F.hasFnAttribute(llvm::Attribute::NoProfile))
- return true;
- if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
- return true;
- if (F.getInstructionCount() < PGOFunctionSizeThreshold)
- return true;
-
// If there are too many critical edges, PGO might cause
// compiler time problem. Skip PGO if the number of
// critical edges execeed the threshold.
@@ -1785,7 +1777,21 @@ static bool skipPGO(const Function &F) {
<< " exceed the threshold. Skip PGO.\n");
return true;
}
+ return false;
+}
+// Return true if we should not instrument this function
+static bool skipPGOGen(const Function &F) {
+ if (skipPGOUse(F))
+ return true;
+ if (F.hasFnAttribute(llvm::Attribute::Naked))
+ return true;
+ if (F.hasFnAttribute(llvm::Attribute::NoProfile))
+ return true;
+ if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
+ return true;
+ if (F.getInstructionCount() < PGOFunctionSizeThreshold)
+ return true;
return false;
}
@@ -1801,7 +1807,7 @@ static bool InstrumentAllFunctions(
collectComdatMembers(M, ComdatMembers);
for (auto &F : M) {
- if (skipPGO(F))
+ if (skipPGOGen(F))
continue;
auto &TLI = LookupTLI(F);
auto *BPI = LookupBPI(F);
@@ -2028,7 +2034,7 @@ static bool annotateAllFunctions(
InstrumentFuncEntry = PGOInstrumentEntry;
bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
for (auto &F : M) {
- if (skipPGO(F))
+ if (skipPGOUse(F))
continue;
auto &TLI = LookupTLI(F);
auto *BPI = LookupBPI(F);
@@ -2201,7 +2207,6 @@ static std::string getSimpleNodeName(const BasicBlock *Node) {
void llvm::setProfMetadata(Module *M, Instruction *TI,
ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
- MDBuilder MDB(M->getContext());
assert(MaxCount > 0 && "Bad max count");
uint64_t Scale = calculateCountScale(MaxCount);
SmallVector<unsigned, 4> Weights;
@@ -2215,7 +2220,7 @@ void llvm::setProfMetadata(Module *M, Instruction *TI,
misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
- TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ setBranchWeights(*TI, Weights);
if (EmitBranchProbability) {
std::string BrCondStr = getBranchCondString(TI);
if (BrCondStr.empty())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 2906fe190984..fd0f69eca96e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -378,7 +378,7 @@ bool MemOPSizeOpt::perform(MemOp MO) {
assert(It != DefaultBB->end());
BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
MergeBB->setName("MemOP.Merge");
- BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
+ BFI.setBlockFreq(MergeBB, OrigBBFreq);
DefaultBB->setName("MemOP.Default");
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
index d83a3a991c89..230bb8b0a5dc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
@@ -198,17 +198,16 @@ bool SanitizerBinaryMetadata::run() {
// metadata features.
//
- auto *Int8PtrTy = IRB.getInt8PtrTy();
- auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy);
+ auto *PtrTy = IRB.getPtrTy();
auto *Int32Ty = IRB.getInt32Ty();
- const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy};
+ const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy};
auto *Version = ConstantInt::get(Int32Ty, getVersion());
for (const MetadataInfo *MI : MIS) {
const std::array<Value *, InitTypes.size()> InitArgs = {
Version,
- getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy),
- getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy),
+ getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy),
+ getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy),
};
// We declare the _add and _del functions as weak, and only call them if
// there is a valid symbol linked. This allows building binaries with
@@ -306,11 +305,11 @@ bool isUARSafeCall(CallInst *CI) {
// It's safe to both pass pointers to local variables to them
// and to tail-call them.
return F && (F->isIntrinsic() || F->doesNotReturn() ||
- F->getName().startswith("__asan_") ||
- F->getName().startswith("__hwsan_") ||
- F->getName().startswith("__ubsan_") ||
- F->getName().startswith("__msan_") ||
- F->getName().startswith("__tsan_"));
+ F->getName().starts_with("__asan_") ||
+ F->getName().starts_with("__hwsan_") ||
+ F->getName().starts_with("__ubsan_") ||
+ F->getName().starts_with("__msan_") ||
+ F->getName().starts_with("__tsan_"));
}
bool hasUseAfterReturnUnsafeUses(Value &V) {
@@ -368,11 +367,11 @@ bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
const auto ProfSec =
getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false);
- if (GV->getSection().endswith(ProfSec))
+ if (GV->getSection().ends_with(ProfSec))
return true;
}
- if (GV->getName().startswith("__llvm_gcov") ||
- GV->getName().startswith("__llvm_gcda"))
+ if (GV->getName().starts_with("__llvm_gcov") ||
+ GV->getName().starts_with("__llvm_gcda"))
return true;
return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index f22918141f6e..fe672a4377a1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -261,9 +261,7 @@ private:
FunctionCallee SanCovTraceGepFunction;
FunctionCallee SanCovTraceSwitchFunction;
GlobalVariable *SanCovLowestStack;
- Type *Int128PtrTy, *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty,
- *Int32PtrTy, *Int16PtrTy, *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty,
- *Int1PtrTy;
+ Type *PtrTy, *IntptrTy, *Int64Ty, *Int32Ty, *Int16Ty, *Int8Ty, *Int1Ty;
Module *CurModule;
std::string CurModuleUniqueId;
Triple TargetTriple;
@@ -331,11 +329,9 @@ ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section,
// Account for the fact that on windows-msvc __start_* symbols actually
// point to a uint64_t before the start of the array.
- auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
- auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr,
+ auto GEP = IRB.CreateGEP(Int8Ty, SecStart,
ConstantInt::get(IntptrTy, sizeof(uint64_t)));
- return std::make_pair(IRB.CreatePointerCast(GEP, PointerType::getUnqual(Ty)),
- SecEnd);
+ return std::make_pair(GEP, SecEnd);
}
Function *ModuleSanitizerCoverage::CreateInitCallsForSections(
@@ -345,7 +341,6 @@ Function *ModuleSanitizerCoverage::CreateInitCallsForSections(
auto SecStart = SecStartEnd.first;
auto SecEnd = SecStartEnd.second;
Function *CtorFunc;
- Type *PtrTy = PointerType::getUnqual(Ty);
std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
M, CtorName, InitFunctionName, {PtrTy, PtrTy}, {SecStart, SecEnd});
assert(CtorFunc->getName() == CtorName);
@@ -391,15 +386,9 @@ bool ModuleSanitizerCoverage::instrumentModule(
FunctionPCsArray = nullptr;
FunctionCFsArray = nullptr;
IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
- IntptrPtrTy = PointerType::getUnqual(IntptrTy);
+ PtrTy = PointerType::getUnqual(*C);
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
- Int128PtrTy = PointerType::getUnqual(IRB.getInt128Ty());
- Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
- Int16PtrTy = PointerType::getUnqual(IRB.getInt16Ty());
- Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
- Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
- Int1PtrTy = PointerType::getUnqual(IRB.getInt1Ty());
Int64Ty = IRB.getInt64Ty();
Int32Ty = IRB.getInt32Ty();
Int16Ty = IRB.getInt16Ty();
@@ -438,26 +427,26 @@ bool ModuleSanitizerCoverage::instrumentModule(
M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty);
// Loads.
- SanCovLoadFunction[0] = M.getOrInsertFunction(SanCovLoad1, VoidTy, Int8PtrTy);
+ SanCovLoadFunction[0] = M.getOrInsertFunction(SanCovLoad1, VoidTy, PtrTy);
SanCovLoadFunction[1] =
- M.getOrInsertFunction(SanCovLoad2, VoidTy, Int16PtrTy);
+ M.getOrInsertFunction(SanCovLoad2, VoidTy, PtrTy);
SanCovLoadFunction[2] =
- M.getOrInsertFunction(SanCovLoad4, VoidTy, Int32PtrTy);
+ M.getOrInsertFunction(SanCovLoad4, VoidTy, PtrTy);
SanCovLoadFunction[3] =
- M.getOrInsertFunction(SanCovLoad8, VoidTy, Int64PtrTy);
+ M.getOrInsertFunction(SanCovLoad8, VoidTy, PtrTy);
SanCovLoadFunction[4] =
- M.getOrInsertFunction(SanCovLoad16, VoidTy, Int128PtrTy);
+ M.getOrInsertFunction(SanCovLoad16, VoidTy, PtrTy);
// Stores.
SanCovStoreFunction[0] =
- M.getOrInsertFunction(SanCovStore1, VoidTy, Int8PtrTy);
+ M.getOrInsertFunction(SanCovStore1, VoidTy, PtrTy);
SanCovStoreFunction[1] =
- M.getOrInsertFunction(SanCovStore2, VoidTy, Int16PtrTy);
+ M.getOrInsertFunction(SanCovStore2, VoidTy, PtrTy);
SanCovStoreFunction[2] =
- M.getOrInsertFunction(SanCovStore4, VoidTy, Int32PtrTy);
+ M.getOrInsertFunction(SanCovStore4, VoidTy, PtrTy);
SanCovStoreFunction[3] =
- M.getOrInsertFunction(SanCovStore8, VoidTy, Int64PtrTy);
+ M.getOrInsertFunction(SanCovStore8, VoidTy, PtrTy);
SanCovStoreFunction[4] =
- M.getOrInsertFunction(SanCovStore16, VoidTy, Int128PtrTy);
+ M.getOrInsertFunction(SanCovStore16, VoidTy, PtrTy);
{
AttributeList AL;
@@ -470,7 +459,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
SanCovTraceGepFunction =
M.getOrInsertFunction(SanCovTraceGep, VoidTy, IntptrTy);
SanCovTraceSwitchFunction =
- M.getOrInsertFunction(SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy);
+ M.getOrInsertFunction(SanCovTraceSwitchName, VoidTy, Int64Ty, PtrTy);
Constant *SanCovLowestStackConstant =
M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy);
@@ -487,7 +476,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy);
SanCovTracePCGuard =
- M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
+ M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, PtrTy);
for (auto &F : M)
instrumentFunction(F, DTCallback, PDTCallback);
@@ -510,7 +499,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
if (Ctor && Options.PCTable) {
auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrTy);
FunctionCallee InitFunction = declareSanitizerInitFunction(
- M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
+ M, SanCovPCsInitName, {PtrTy, PtrTy});
IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
}
@@ -518,7 +507,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
if (Ctor && Options.CollectControlFlow) {
auto SecStartEnd = CreateSecStartEnd(M, SanCovCFsSectionName, IntptrTy);
FunctionCallee InitFunction = declareSanitizerInitFunction(
- M, SanCovCFsInitName, {IntptrPtrTy, IntptrPtrTy});
+ M, SanCovCFsInitName, {PtrTy, PtrTy});
IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
}
@@ -616,7 +605,7 @@ void ModuleSanitizerCoverage::instrumentFunction(
return;
if (F.getName().find(".module_ctor") != std::string::npos)
return; // Should not instrument sanitizer init functions.
- if (F.getName().startswith("__sanitizer_"))
+ if (F.getName().starts_with("__sanitizer_"))
return; // Don't instrument __sanitizer_* callbacks.
// Don't touch available_externally functions, their actual body is elewhere.
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
@@ -744,19 +733,19 @@ ModuleSanitizerCoverage::CreatePCArray(Function &F,
IRBuilder<> IRB(&*F.getEntryBlock().getFirstInsertionPt());
for (size_t i = 0; i < N; i++) {
if (&F.getEntryBlock() == AllBlocks[i]) {
- PCs.push_back((Constant *)IRB.CreatePointerCast(&F, IntptrPtrTy));
+ PCs.push_back((Constant *)IRB.CreatePointerCast(&F, PtrTy));
PCs.push_back((Constant *)IRB.CreateIntToPtr(
- ConstantInt::get(IntptrTy, 1), IntptrPtrTy));
+ ConstantInt::get(IntptrTy, 1), PtrTy));
} else {
PCs.push_back((Constant *)IRB.CreatePointerCast(
- BlockAddress::get(AllBlocks[i]), IntptrPtrTy));
- PCs.push_back(Constant::getNullValue(IntptrPtrTy));
+ BlockAddress::get(AllBlocks[i]), PtrTy));
+ PCs.push_back(Constant::getNullValue(PtrTy));
}
}
- auto *PCArray = CreateFunctionLocalArrayInSection(N * 2, F, IntptrPtrTy,
+ auto *PCArray = CreateFunctionLocalArrayInSection(N * 2, F, PtrTy,
SanCovPCsSectionName);
PCArray->setInitializer(
- ConstantArray::get(ArrayType::get(IntptrPtrTy, N * 2), PCs));
+ ConstantArray::get(ArrayType::get(PtrTy, N * 2), PCs));
PCArray->setConstant(true);
return PCArray;
@@ -833,10 +822,9 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch(
Int64Ty->getScalarSizeInBits())
Cond = IRB.CreateIntCast(Cond, Int64Ty, false);
for (auto It : SI->cases()) {
- Constant *C = It.getCaseValue();
- if (C->getType()->getScalarSizeInBits() <
- Int64Ty->getScalarSizeInBits())
- C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty);
+ ConstantInt *C = It.getCaseValue();
+ if (C->getType()->getScalarSizeInBits() < 64)
+ C = ConstantInt::get(C->getContext(), C->getValue().zext(64));
Initializers.push_back(C);
}
llvm::sort(drop_begin(Initializers, 2),
@@ -849,8 +837,7 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch(
*CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage,
ConstantArray::get(ArrayOfInt64Ty, Initializers),
"__sancov_gen_cov_switch_values");
- IRB.CreateCall(SanCovTraceSwitchFunction,
- {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)});
+ IRB.CreateCall(SanCovTraceSwitchFunction, {Cond, GV});
}
}
}
@@ -895,16 +882,13 @@ void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
: TypeSize == 128 ? 4
: -1;
};
- Type *PointerType[5] = {Int8PtrTy, Int16PtrTy, Int32PtrTy, Int64PtrTy,
- Int128PtrTy};
for (auto *LI : Loads) {
InstrumentationIRBuilder IRB(LI);
auto Ptr = LI->getPointerOperand();
int Idx = CallbackIdx(LI->getType());
if (Idx < 0)
continue;
- IRB.CreateCall(SanCovLoadFunction[Idx],
- IRB.CreatePointerCast(Ptr, PointerType[Idx]));
+ IRB.CreateCall(SanCovLoadFunction[Idx], Ptr);
}
for (auto *SI : Stores) {
InstrumentationIRBuilder IRB(SI);
@@ -912,8 +896,7 @@ void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
int Idx = CallbackIdx(SI->getValueOperand()->getType());
if (Idx < 0)
continue;
- IRB.CreateCall(SanCovStoreFunction[Idx],
- IRB.CreatePointerCast(Ptr, PointerType[Idx]));
+ IRB.CreateCall(SanCovStoreFunction[Idx], Ptr);
}
}
@@ -978,7 +961,7 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
auto GuardPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
ConstantInt::get(IntptrTy, Idx * 4)),
- Int32PtrTy);
+ PtrTy);
IRB.CreateCall(SanCovTracePCGuard, GuardPtr)->setCannotMerge();
}
if (Options.Inline8bitCounters) {
@@ -1008,7 +991,7 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
Module *M = F.getParent();
Function *GetFrameAddr = Intrinsic::getDeclaration(
M, Intrinsic::frameaddress,
- IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+ IRB.getPtrTy(M->getDataLayout().getAllocaAddrSpace()));
auto FrameAddrPtr =
IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)});
auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy);
@@ -1059,40 +1042,40 @@ void ModuleSanitizerCoverage::createFunctionControlFlow(Function &F) {
for (auto &BB : F) {
// blockaddress can not be used on function's entry block.
if (&BB == &F.getEntryBlock())
- CFs.push_back((Constant *)IRB.CreatePointerCast(&F, IntptrPtrTy));
+ CFs.push_back((Constant *)IRB.CreatePointerCast(&F, PtrTy));
else
CFs.push_back((Constant *)IRB.CreatePointerCast(BlockAddress::get(&BB),
- IntptrPtrTy));
+ PtrTy));
for (auto SuccBB : successors(&BB)) {
assert(SuccBB != &F.getEntryBlock());
CFs.push_back((Constant *)IRB.CreatePointerCast(BlockAddress::get(SuccBB),
- IntptrPtrTy));
+ PtrTy));
}
- CFs.push_back((Constant *)Constant::getNullValue(IntptrPtrTy));
+ CFs.push_back((Constant *)Constant::getNullValue(PtrTy));
for (auto &Inst : BB) {
if (CallBase *CB = dyn_cast<CallBase>(&Inst)) {
if (CB->isIndirectCall()) {
// TODO(navidem): handle indirect calls, for now mark its existence.
CFs.push_back((Constant *)IRB.CreateIntToPtr(
- ConstantInt::get(IntptrTy, -1), IntptrPtrTy));
+ ConstantInt::get(IntptrTy, -1), PtrTy));
} else {
auto CalledF = CB->getCalledFunction();
if (CalledF && !CalledF->isIntrinsic())
CFs.push_back(
- (Constant *)IRB.CreatePointerCast(CalledF, IntptrPtrTy));
+ (Constant *)IRB.CreatePointerCast(CalledF, PtrTy));
}
}
}
- CFs.push_back((Constant *)Constant::getNullValue(IntptrPtrTy));
+ CFs.push_back((Constant *)Constant::getNullValue(PtrTy));
}
FunctionCFsArray = CreateFunctionLocalArrayInSection(
- CFs.size(), F, IntptrPtrTy, SanCovCFsSectionName);
+ CFs.size(), F, PtrTy, SanCovCFsSectionName);
FunctionCFsArray->setInitializer(
- ConstantArray::get(ArrayType::get(IntptrPtrTy, CFs.size()), CFs));
+ ConstantArray::get(ArrayType::get(PtrTy, CFs.size()), CFs));
FunctionCFsArray->setConstant(true);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index ce35eefb63fa..8ee0bca7e354 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -205,7 +205,7 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) {
Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind);
// Initialize the callbacks.
TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", Attr,
- IRB.getVoidTy(), IRB.getInt8PtrTy());
+ IRB.getVoidTy(), IRB.getPtrTy());
TsanFuncExit =
M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy());
TsanIgnoreBegin = M.getOrInsertFunction("__tsan_ignore_thread_begin", Attr,
@@ -220,49 +220,49 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) {
std::string BitSizeStr = utostr(BitSize);
SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
TsanRead[i] = M.getOrInsertFunction(ReadName, Attr, IRB.getVoidTy(),
- IRB.getInt8PtrTy());
+ IRB.getPtrTy());
SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
TsanWrite[i] = M.getOrInsertFunction(WriteName, Attr, IRB.getVoidTy(),
- IRB.getInt8PtrTy());
+ IRB.getPtrTy());
SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
TsanUnalignedRead[i] = M.getOrInsertFunction(
- UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+ UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getPtrTy());
SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
TsanUnalignedWrite[i] = M.getOrInsertFunction(
- UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+ UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getPtrTy());
SmallString<64> VolatileReadName("__tsan_volatile_read" + ByteSizeStr);
TsanVolatileRead[i] = M.getOrInsertFunction(
- VolatileReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+ VolatileReadName, Attr, IRB.getVoidTy(), IRB.getPtrTy());
SmallString<64> VolatileWriteName("__tsan_volatile_write" + ByteSizeStr);
TsanVolatileWrite[i] = M.getOrInsertFunction(
- VolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+ VolatileWriteName, Attr, IRB.getVoidTy(), IRB.getPtrTy());
SmallString<64> UnalignedVolatileReadName("__tsan_unaligned_volatile_read" +
ByteSizeStr);
TsanUnalignedVolatileRead[i] = M.getOrInsertFunction(
- UnalignedVolatileReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+ UnalignedVolatileReadName, Attr, IRB.getVoidTy(), IRB.getPtrTy());
SmallString<64> UnalignedVolatileWriteName(
"__tsan_unaligned_volatile_write" + ByteSizeStr);
TsanUnalignedVolatileWrite[i] = M.getOrInsertFunction(
- UnalignedVolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+ UnalignedVolatileWriteName, Attr, IRB.getVoidTy(), IRB.getPtrTy());
SmallString<64> CompoundRWName("__tsan_read_write" + ByteSizeStr);
TsanCompoundRW[i] = M.getOrInsertFunction(
- CompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+ CompoundRWName, Attr, IRB.getVoidTy(), IRB.getPtrTy());
SmallString<64> UnalignedCompoundRWName("__tsan_unaligned_read_write" +
ByteSizeStr);
TsanUnalignedCompoundRW[i] = M.getOrInsertFunction(
- UnalignedCompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+ UnalignedCompoundRWName, Attr, IRB.getVoidTy(), IRB.getPtrTy());
Type *Ty = Type::getIntNTy(Ctx, BitSize);
- Type *PtrTy = Ty->getPointerTo();
+ Type *PtrTy = PointerType::get(Ctx, 0);
SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
TsanAtomicLoad[i] =
M.getOrInsertFunction(AtomicLoadName,
@@ -318,9 +318,9 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) {
}
TsanVptrUpdate =
M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
+ IRB.getPtrTy(), IRB.getPtrTy());
TsanVptrLoad = M.getOrInsertFunction("__tsan_vptr_read", Attr,
- IRB.getVoidTy(), IRB.getInt8PtrTy());
+ IRB.getVoidTy(), IRB.getPtrTy());
TsanAtomicThreadFence = M.getOrInsertFunction(
"__tsan_atomic_thread_fence",
TLI.getAttrList(&Ctx, {0}, /*Signed=*/true, /*Ret=*/false, Attr),
@@ -332,15 +332,15 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) {
IRB.getVoidTy(), OrdTy);
MemmoveFn =
- M.getOrInsertFunction("__tsan_memmove", Attr, IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ M.getOrInsertFunction("__tsan_memmove", Attr, IRB.getPtrTy(),
+ IRB.getPtrTy(), IRB.getPtrTy(), IntptrTy);
MemcpyFn =
- M.getOrInsertFunction("__tsan_memcpy", Attr, IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ M.getOrInsertFunction("__tsan_memcpy", Attr, IRB.getPtrTy(),
+ IRB.getPtrTy(), IRB.getPtrTy(), IntptrTy);
MemsetFn = M.getOrInsertFunction(
"__tsan_memset",
TLI.getAttrList(&Ctx, {1}, /*Signed=*/true, /*Ret=*/false, Attr),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
+ IRB.getPtrTy(), IRB.getPtrTy(), IRB.getInt32Ty(), IntptrTy);
}
static bool isVtableAccess(Instruction *I) {
@@ -360,15 +360,10 @@ static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) {
StringRef SectionName = GV->getSection();
// Check if the global is in the PGO counters section.
auto OF = Triple(M->getTargetTriple()).getObjectFormat();
- if (SectionName.endswith(
+ if (SectionName.ends_with(
getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
return false;
}
-
- // Check if the global is private gcov data.
- if (GV->getName().startswith("__llvm_gcov") ||
- GV->getName().startswith("__llvm_gcda"))
- return false;
}
// Do not instrument accesses from different address spaces; we cannot deal
@@ -522,6 +517,9 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
// Traverse all instructions, collect loads/stores/returns, check for calls.
for (auto &BB : F) {
for (auto &Inst : BB) {
+ // Skip instructions inserted by another instrumentation.
+ if (Inst.hasMetadata(LLVMContext::MD_nosanitize))
+ continue;
if (isTsanAtomic(&Inst))
AtomicAccesses.push_back(&Inst);
else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
@@ -613,17 +611,14 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
StoredValue = IRB.CreateExtractElement(
StoredValue, ConstantInt::get(IRB.getInt32Ty(), 0));
if (StoredValue->getType()->isIntegerTy())
- StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy());
+ StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getPtrTy());
// Call TsanVptrUpdate.
- IRB.CreateCall(TsanVptrUpdate,
- {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())});
+ IRB.CreateCall(TsanVptrUpdate, {Addr, StoredValue});
NumInstrumentedVtableWrites++;
return true;
}
if (!IsWrite && isVtableAccess(II.Inst)) {
- IRB.CreateCall(TsanVptrLoad,
- IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+ IRB.CreateCall(TsanVptrLoad, Addr);
NumInstrumentedVtableReads++;
return true;
}
@@ -655,7 +650,7 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
else
OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx];
}
- IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+ IRB.CreateCall(OnAccessFunc, Addr);
if (IsCompoundRW || IsWrite)
NumInstrumentedWrites++;
if (IsCompoundRW || !IsWrite)
@@ -691,17 +686,19 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
InstrumentationIRBuilder IRB(I);
if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
+ Value *Cast1 = IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false);
+ Value *Cast2 = IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false);
IRB.CreateCall(
MemsetFn,
- {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
- IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
+ {M->getArgOperand(0),
+ Cast1,
+ Cast2});
I->eraseFromParent();
} else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
IRB.CreateCall(
isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
- {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+ {M->getArgOperand(0),
+ M->getArgOperand(1),
IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
I->eraseFromParent();
}
@@ -724,11 +721,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int Idx = getMemoryAccessFuncIndex(OrigTy, Addr, DL);
if (Idx < 0)
return false;
- const unsigned ByteSize = 1U << Idx;
- const unsigned BitSize = ByteSize * 8;
- Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
- Type *PtrTy = Ty->getPointerTo();
- Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ Value *Args[] = {Addr,
createOrdering(&IRB, LI->getOrdering())};
Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args);
Value *Cast = IRB.CreateBitOrPointerCast(C, OrigTy);
@@ -742,8 +735,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
const unsigned ByteSize = 1U << Idx;
const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
- Type *PtrTy = Ty->getPointerTo();
- Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ Value *Args[] = {Addr,
IRB.CreateBitOrPointerCast(SI->getValueOperand(), Ty),
createOrdering(&IRB, SI->getOrdering())};
CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args);
@@ -760,8 +752,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
const unsigned ByteSize = 1U << Idx;
const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
- Type *PtrTy = Ty->getPointerTo();
- Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ Value *Args[] = {Addr,
IRB.CreateIntCast(RMWI->getValOperand(), Ty, false),
createOrdering(&IRB, RMWI->getOrdering())};
CallInst *C = CallInst::Create(F, Args);
@@ -775,12 +766,11 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
const unsigned ByteSize = 1U << Idx;
const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
- Type *PtrTy = Ty->getPointerTo();
Value *CmpOperand =
IRB.CreateBitOrPointerCast(CASI->getCompareOperand(), Ty);
Value *NewOperand =
IRB.CreateBitOrPointerCast(CASI->getNewValOperand(), Ty);
- Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ Value *Args[] = {Addr,
CmpOperand,
NewOperand,
createOrdering(&IRB, CASI->getSuccessOrdering()),
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
index dd6a1c3f9795..7732eeb4b9c8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -22,7 +22,6 @@
#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_DEPENDENCYANALYSIS_H
#define LLVM_LIB_TRANSFORMS_OBJCARC_DEPENDENCYANALYSIS_H
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/ObjCARCInstKind.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index adf86526ebf1..b51e4d46bffe 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -933,7 +933,8 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
if (IsNullOrUndef(CI->getArgOperand(0))) {
Changed = true;
new StoreInst(ConstantInt::getTrue(CI->getContext()),
- PoisonValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
+ PoisonValue::get(PointerType::getUnqual(CI->getContext())),
+ CI);
Value *NewValue = PoisonValue::get(CI->getType());
LLVM_DEBUG(
dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
@@ -952,7 +953,8 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
IsNullOrUndef(CI->getArgOperand(1))) {
Changed = true;
new StoreInst(ConstantInt::getTrue(CI->getContext()),
- PoisonValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
+ PoisonValue::get(PointerType::getUnqual(CI->getContext())),
+ CI);
Value *NewValue = PoisonValue::get(CI->getType());
LLVM_DEBUG(
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 9f15772f2fa1..e563ecfb1622 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -19,7 +19,7 @@ using namespace llvm::objcarc;
static StringRef getName(Value *V) {
StringRef Name = V->getName();
- if (Name.startswith("\1"))
+ if (Name.starts_with("\1"))
return Name.substr(1);
return Name;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
index 24354211341f..9af275a9f4e2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -544,6 +544,16 @@ ADCEChanged AggressiveDeadCodeElimination::removeDeadInstructions() {
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
for (Instruction &I : llvm::reverse(instructions(F))) {
+ // With "RemoveDIs" debug-info stored in DPValue objects, debug-info
+ // attached to this instruction, and drop any for scopes that aren't alive,
+ // like the rest of this loop does. Extending support to assignment tracking
+ // is future work.
+ for (DPValue &DPV : make_early_inc_range(I.getDbgValueRange())) {
+ if (AliveScopes.count(DPV.getDebugLoc()->getScope()))
+ continue;
+ I.dropOneDbgValue(&DPV);
+ }
+
// Check if the instruction is alive.
if (isLive(&I))
continue;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index b259c76fc3a5..f3422a705dca 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -83,11 +83,7 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
const SCEV *OffSCEV, Value *Ptr,
ScalarEvolution *SE) {
const SCEV *PtrSCEV = SE->getSCEV(Ptr);
- // On a platform with 32-bit allocas, but 64-bit flat/global pointer sizes
- // (*cough* AMDGPU), the effective SCEV type of AASCEV and PtrSCEV
- // may disagree. Trunc/extend so they agree.
- PtrSCEV = SE->getTruncateOrZeroExtend(
- PtrSCEV, SE->getEffectiveSCEVType(AASCEV->getType()));
+
const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
if (isa<SCEVCouldNotCompute>(DiffSCEV))
return Align(1);
@@ -179,6 +175,9 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
// Added to suppress a crash because consumer doesn't expect non-constant
// alignments in the assume bundle. TODO: Consider generalizing caller.
return false;
+ if (!cast<SCEVConstant>(AlignSCEV)->getAPInt().isPowerOf2())
+ // Only power of two alignments are supported.
+ return false;
if (AlignOB.Inputs.size() == 3)
OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
else
@@ -264,11 +263,17 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
// Now that we've updated that use of the pointer, look for other uses of
// the pointer to update.
Visited.insert(J);
- for (User *UJ : J->users()) {
- Instruction *K = cast<Instruction>(UJ);
- if (!Visited.count(K))
- WorkList.push_back(K);
- }
+ if (isa<GetElementPtrInst>(J) || isa<PHINode>(J))
+ for (auto &U : J->uses()) {
+ if (U->getType()->isPointerTy()) {
+ Instruction *K = cast<Instruction>(U.getUser());
+ StoreInst *SI = dyn_cast<StoreInst>(K);
+ if (SI && SI->getPointerOperandIndex() != U.getOperandNo())
+ continue;
+ if (!Visited.count(K))
+ WorkList.push_back(K);
+ }
+ }
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index aeb7c5d461f0..47f663fa0cf0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -62,10 +62,8 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -374,10 +372,10 @@ static void splitCallSite(CallBase &CB,
return;
}
- auto *OriginalBegin = &*TailBB->begin();
+ BasicBlock::iterator OriginalBegin = TailBB->begin();
// Replace users of the original call with a PHI mering call-sites split.
if (CallPN) {
- CallPN->insertBefore(OriginalBegin);
+ CallPN->insertBefore(*TailBB, OriginalBegin);
CB.replaceAllUsesWith(CallPN);
}
@@ -389,6 +387,7 @@ static void splitCallSite(CallBase &CB,
// do not introduce unnecessary PHI nodes for def-use chains from the call
// instruction to the beginning of the block.
auto I = CB.getReverseIterator();
+ Instruction *OriginalBeginInst = &*OriginalBegin;
while (I != TailBB->rend()) {
Instruction *CurrentI = &*I++;
if (!CurrentI->use_empty()) {
@@ -401,12 +400,13 @@ static void splitCallSite(CallBase &CB,
for (auto &Mapping : ValueToValueMaps)
NewPN->addIncoming(Mapping[CurrentI],
cast<Instruction>(Mapping[CurrentI])->getParent());
- NewPN->insertBefore(&*TailBB->begin());
+ NewPN->insertBefore(*TailBB, TailBB->begin());
CurrentI->replaceAllUsesWith(NewPN);
}
+ CurrentI->dropDbgValues();
CurrentI->eraseFromParent();
// We are done once we handled the first original instruction in TailBB.
- if (CurrentI == OriginalBegin)
+ if (CurrentI == OriginalBeginInst)
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 611e64bd0976..1fb9d7fff32f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -523,7 +523,8 @@ void ConstantHoistingPass::collectConstantCandidates(Function &Fn) {
if (!DT->isReachableFromEntry(&BB))
continue;
for (Instruction &Inst : BB)
- collectConstantCandidates(ConstCandMap, &Inst);
+ if (!TTI->preferToKeepConstantsAttached(Inst, Fn))
+ collectConstantCandidates(ConstCandMap, &Inst);
}
}
@@ -761,11 +762,9 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
if (Adj->Offset) {
if (Adj->Ty) {
// Constant being rebased is a ConstantExpr.
- PointerType *Int8PtrTy = Type::getInt8PtrTy(
- *Ctx, cast<PointerType>(Adj->Ty)->getAddressSpace());
- Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", Adj->MatInsertPt);
Mat = GetElementPtrInst::Create(Type::getInt8Ty(*Ctx), Base, Adj->Offset,
"mat_gep", Adj->MatInsertPt);
+ // Hide it behind a bitcast.
Mat = new BitCastInst(Mat, Adj->Ty, "mat_bitcast", Adj->MatInsertPt);
} else
// Constant being rebased is a ConstantInt.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 5365bca0ab47..18266ba07898 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -18,13 +18,16 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstraintSystem.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Verifier.h"
@@ -32,7 +35,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
-#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -83,32 +85,69 @@ static Instruction *getContextInstForUse(Use &U) {
}
namespace {
+/// Struct to express a condition of the form %Op0 Pred %Op1.
+struct ConditionTy {
+ CmpInst::Predicate Pred;
+ Value *Op0;
+ Value *Op1;
+
+ ConditionTy()
+ : Pred(CmpInst::BAD_ICMP_PREDICATE), Op0(nullptr), Op1(nullptr) {}
+ ConditionTy(CmpInst::Predicate Pred, Value *Op0, Value *Op1)
+ : Pred(Pred), Op0(Op0), Op1(Op1) {}
+};
+
/// Represents either
-/// * a condition that holds on entry to a block (=conditional fact)
+/// * a condition that holds on entry to a block (=condition fact)
/// * an assume (=assume fact)
/// * a use of a compare instruction to simplify.
/// It also tracks the Dominator DFS in and out numbers for each entry.
struct FactOrCheck {
+ enum class EntryTy {
+ ConditionFact, /// A condition that holds on entry to a block.
+ InstFact, /// A fact that holds after Inst executed (e.g. an assume or
+ /// min/mix intrinsic.
+ InstCheck, /// An instruction to simplify (e.g. an overflow math
+ /// intrinsics).
+ UseCheck /// An use of a compare instruction to simplify.
+ };
+
union {
Instruction *Inst;
Use *U;
+ ConditionTy Cond;
};
+
+ /// A pre-condition that must hold for the current fact to be added to the
+ /// system.
+ ConditionTy DoesHold;
+
unsigned NumIn;
unsigned NumOut;
- bool HasInst;
- bool Not;
+ EntryTy Ty;
- FactOrCheck(DomTreeNode *DTN, Instruction *Inst, bool Not)
+ FactOrCheck(EntryTy Ty, DomTreeNode *DTN, Instruction *Inst)
: Inst(Inst), NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()),
- HasInst(true), Not(Not) {}
+ Ty(Ty) {}
FactOrCheck(DomTreeNode *DTN, Use *U)
- : U(U), NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()),
- HasInst(false), Not(false) {}
+ : U(U), DoesHold(CmpInst::BAD_ICMP_PREDICATE, nullptr, nullptr),
+ NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()),
+ Ty(EntryTy::UseCheck) {}
+
+ FactOrCheck(DomTreeNode *DTN, CmpInst::Predicate Pred, Value *Op0, Value *Op1,
+ ConditionTy Precond = ConditionTy())
+ : Cond(Pred, Op0, Op1), DoesHold(Precond), NumIn(DTN->getDFSNumIn()),
+ NumOut(DTN->getDFSNumOut()), Ty(EntryTy::ConditionFact) {}
+
+ static FactOrCheck getConditionFact(DomTreeNode *DTN, CmpInst::Predicate Pred,
+ Value *Op0, Value *Op1,
+ ConditionTy Precond = ConditionTy()) {
+ return FactOrCheck(DTN, Pred, Op0, Op1, Precond);
+ }
- static FactOrCheck getFact(DomTreeNode *DTN, Instruction *Inst,
- bool Not = false) {
- return FactOrCheck(DTN, Inst, Not);
+ static FactOrCheck getInstFact(DomTreeNode *DTN, Instruction *Inst) {
+ return FactOrCheck(EntryTy::InstFact, DTN, Inst);
}
static FactOrCheck getCheck(DomTreeNode *DTN, Use *U) {
@@ -116,39 +155,47 @@ struct FactOrCheck {
}
static FactOrCheck getCheck(DomTreeNode *DTN, CallInst *CI) {
- return FactOrCheck(DTN, CI, false);
+ return FactOrCheck(EntryTy::InstCheck, DTN, CI);
}
bool isCheck() const {
- return !HasInst ||
- match(Inst, m_Intrinsic<Intrinsic::ssub_with_overflow>());
+ return Ty == EntryTy::InstCheck || Ty == EntryTy::UseCheck;
}
Instruction *getContextInst() const {
- if (HasInst)
- return Inst;
- return getContextInstForUse(*U);
+ if (Ty == EntryTy::UseCheck)
+ return getContextInstForUse(*U);
+ return Inst;
}
+
Instruction *getInstructionToSimplify() const {
assert(isCheck());
- if (HasInst)
+ if (Ty == EntryTy::InstCheck)
return Inst;
// The use may have been simplified to a constant already.
return dyn_cast<Instruction>(*U);
}
- bool isConditionFact() const { return !isCheck() && isa<CmpInst>(Inst); }
+
+ bool isConditionFact() const { return Ty == EntryTy::ConditionFact; }
};
/// Keep state required to build worklist.
struct State {
DominatorTree &DT;
+ LoopInfo &LI;
+ ScalarEvolution &SE;
SmallVector<FactOrCheck, 64> WorkList;
- State(DominatorTree &DT) : DT(DT) {}
+ State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE)
+ : DT(DT), LI(LI), SE(SE) {}
/// Process block \p BB and add known facts to work-list.
void addInfoFor(BasicBlock &BB);
+ /// Try to add facts for loop inductions (AddRecs) in EQ/NE compares
+ /// controlling the loop header.
+ void addInfoForInductions(BasicBlock &BB);
+
/// Returns true if we can add a known condition from BB to its successor
/// block Succ.
bool canAddSuccessor(BasicBlock &BB, BasicBlock *Succ) const {
@@ -172,19 +219,9 @@ struct StackEntry {
ValuesToRelease(ValuesToRelease) {}
};
-/// Struct to express a pre-condition of the form %Op0 Pred %Op1.
-struct PreconditionTy {
- CmpInst::Predicate Pred;
- Value *Op0;
- Value *Op1;
-
- PreconditionTy(CmpInst::Predicate Pred, Value *Op0, Value *Op1)
- : Pred(Pred), Op0(Op0), Op1(Op1) {}
-};
-
struct ConstraintTy {
SmallVector<int64_t, 8> Coefficients;
- SmallVector<PreconditionTy, 2> Preconditions;
+ SmallVector<ConditionTy, 2> Preconditions;
SmallVector<SmallVector<int64_t, 8>> ExtraInfo;
@@ -327,10 +364,57 @@ struct Decomposition {
}
};
+// Variable and constant offsets for a chain of GEPs, with base pointer BasePtr.
+struct OffsetResult {
+ Value *BasePtr;
+ APInt ConstantOffset;
+ MapVector<Value *, APInt> VariableOffsets;
+ bool AllInbounds;
+
+ OffsetResult() : BasePtr(nullptr), ConstantOffset(0, uint64_t(0)) {}
+
+ OffsetResult(GEPOperator &GEP, const DataLayout &DL)
+ : BasePtr(GEP.getPointerOperand()), AllInbounds(GEP.isInBounds()) {
+ ConstantOffset = APInt(DL.getIndexTypeSizeInBits(BasePtr->getType()), 0);
+ }
+};
} // namespace
+// Try to collect variable and constant offsets for \p GEP, partly traversing
+// nested GEPs. Returns an OffsetResult with nullptr as BasePtr of collecting
+// the offset fails.
+static OffsetResult collectOffsets(GEPOperator &GEP, const DataLayout &DL) {
+ OffsetResult Result(GEP, DL);
+ unsigned BitWidth = Result.ConstantOffset.getBitWidth();
+ if (!GEP.collectOffset(DL, BitWidth, Result.VariableOffsets,
+ Result.ConstantOffset))
+ return {};
+
+ // If we have a nested GEP, check if we can combine the constant offset of the
+ // inner GEP with the outer GEP.
+ if (auto *InnerGEP = dyn_cast<GetElementPtrInst>(Result.BasePtr)) {
+ MapVector<Value *, APInt> VariableOffsets2;
+ APInt ConstantOffset2(BitWidth, 0);
+ bool CanCollectInner = InnerGEP->collectOffset(
+ DL, BitWidth, VariableOffsets2, ConstantOffset2);
+ // TODO: Support cases with more than 1 variable offset.
+ if (!CanCollectInner || Result.VariableOffsets.size() > 1 ||
+ VariableOffsets2.size() > 1 ||
+ (Result.VariableOffsets.size() >= 1 && VariableOffsets2.size() >= 1)) {
+ // More than 1 variable index, use outer result.
+ return Result;
+ }
+ Result.BasePtr = InnerGEP->getPointerOperand();
+ Result.ConstantOffset += ConstantOffset2;
+ if (Result.VariableOffsets.size() == 0 && VariableOffsets2.size() == 1)
+ Result.VariableOffsets = VariableOffsets2;
+ Result.AllInbounds &= InnerGEP->isInBounds();
+ }
+ return Result;
+}
+
static Decomposition decompose(Value *V,
- SmallVectorImpl<PreconditionTy> &Preconditions,
+ SmallVectorImpl<ConditionTy> &Preconditions,
bool IsSigned, const DataLayout &DL);
static bool canUseSExt(ConstantInt *CI) {
@@ -338,51 +422,22 @@ static bool canUseSExt(ConstantInt *CI) {
return Val.sgt(MinSignedConstraintValue) && Val.slt(MaxConstraintValue);
}
-static Decomposition
-decomposeGEP(GEPOperator &GEP, SmallVectorImpl<PreconditionTy> &Preconditions,
- bool IsSigned, const DataLayout &DL) {
+static Decomposition decomposeGEP(GEPOperator &GEP,
+ SmallVectorImpl<ConditionTy> &Preconditions,
+ bool IsSigned, const DataLayout &DL) {
// Do not reason about pointers where the index size is larger than 64 bits,
// as the coefficients used to encode constraints are 64 bit integers.
if (DL.getIndexTypeSizeInBits(GEP.getPointerOperand()->getType()) > 64)
return &GEP;
- if (!GEP.isInBounds())
- return &GEP;
-
assert(!IsSigned && "The logic below only supports decomposition for "
- "unsinged predicates at the moment.");
- Type *PtrTy = GEP.getType()->getScalarType();
- unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
- MapVector<Value *, APInt> VariableOffsets;
- APInt ConstantOffset(BitWidth, 0);
- if (!GEP.collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset))
+ "unsigned predicates at the moment.");
+ const auto &[BasePtr, ConstantOffset, VariableOffsets, AllInbounds] =
+ collectOffsets(GEP, DL);
+ if (!BasePtr || !AllInbounds)
return &GEP;
- // Handle the (gep (gep ....), C) case by incrementing the constant
- // coefficient of the inner GEP, if C is a constant.
- auto *InnerGEP = dyn_cast<GEPOperator>(GEP.getPointerOperand());
- if (VariableOffsets.empty() && InnerGEP && InnerGEP->getNumOperands() == 2) {
- auto Result = decompose(InnerGEP, Preconditions, IsSigned, DL);
- Result.add(ConstantOffset.getSExtValue());
-
- if (ConstantOffset.isNegative()) {
- unsigned Scale = DL.getTypeAllocSize(InnerGEP->getResultElementType());
- int64_t ConstantOffsetI = ConstantOffset.getSExtValue();
- if (ConstantOffsetI % Scale != 0)
- return &GEP;
- // Add pre-condition ensuring the GEP is increasing monotonically and
- // can be de-composed.
- // Both sides are normalized by being divided by Scale.
- Preconditions.emplace_back(
- CmpInst::ICMP_SGE, InnerGEP->getOperand(1),
- ConstantInt::get(InnerGEP->getOperand(1)->getType(),
- -1 * (ConstantOffsetI / Scale)));
- }
- return Result;
- }
-
- Decomposition Result(ConstantOffset.getSExtValue(),
- DecompEntry(1, GEP.getPointerOperand()));
+ Decomposition Result(ConstantOffset.getSExtValue(), DecompEntry(1, BasePtr));
for (auto [Index, Scale] : VariableOffsets) {
auto IdxResult = decompose(Index, Preconditions, IsSigned, DL);
IdxResult.mul(Scale.getSExtValue());
@@ -401,7 +456,7 @@ decomposeGEP(GEPOperator &GEP, SmallVectorImpl<PreconditionTy> &Preconditions,
// Variable } where Coefficient * Variable. The sum of the constant offset and
// pairs equals \p V.
static Decomposition decompose(Value *V,
- SmallVectorImpl<PreconditionTy> &Preconditions,
+ SmallVectorImpl<ConditionTy> &Preconditions,
bool IsSigned, const DataLayout &DL) {
auto MergeResults = [&Preconditions, IsSigned, &DL](Value *A, Value *B,
@@ -416,6 +471,9 @@ static Decomposition decompose(Value *V,
if (Ty->isPointerTy() && !IsSigned) {
if (auto *GEP = dyn_cast<GEPOperator>(V))
return decomposeGEP(*GEP, Preconditions, IsSigned, DL);
+ if (isa<ConstantPointerNull>(V))
+ return int64_t(0);
+
return V;
}
@@ -484,10 +542,8 @@ static Decomposition decompose(Value *V,
}
// Decompose or as an add if there are no common bits between the operands.
- if (match(V, m_Or(m_Value(Op0), m_ConstantInt(CI))) &&
- haveNoCommonBitsSet(Op0, CI, DL)) {
+ if (match(V, m_DisjointOr(m_Value(Op0), m_ConstantInt(CI))))
return MergeResults(Op0, CI, IsSigned);
- }
if (match(V, m_NUWShl(m_Value(Op1), m_ConstantInt(CI))) && canUseSExt(CI)) {
if (CI->getSExtValue() < 0 || CI->getSExtValue() >= 64)
@@ -554,7 +610,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
Pred != CmpInst::ICMP_SLE && Pred != CmpInst::ICMP_SLT)
return {};
- SmallVector<PreconditionTy, 4> Preconditions;
+ SmallVector<ConditionTy, 4> Preconditions;
bool IsSigned = CmpInst::isSigned(Pred);
auto &Value2Index = getValue2Index(IsSigned);
auto ADec = decompose(Op0->stripPointerCastsSameRepresentation(),
@@ -647,6 +703,17 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
ConstraintTy ConstraintInfo::getConstraintForSolving(CmpInst::Predicate Pred,
Value *Op0,
Value *Op1) const {
+ Constant *NullC = Constant::getNullValue(Op0->getType());
+ // Handle trivially true compares directly to avoid adding V UGE 0 constraints
+ // for all variables in the unsigned system.
+ if ((Pred == CmpInst::ICMP_ULE && Op0 == NullC) ||
+ (Pred == CmpInst::ICMP_UGE && Op1 == NullC)) {
+ auto &Value2Index = getValue2Index(false);
+ // Return constraint that's trivially true.
+ return ConstraintTy(SmallVector<int64_t, 8>(Value2Index.size(), 0), false,
+ false, false);
+ }
+
// If both operands are known to be non-negative, change signed predicates to
// unsigned ones. This increases the reasoning effectiveness in combination
// with the signed <-> unsigned transfer logic.
@@ -664,7 +731,7 @@ ConstraintTy ConstraintInfo::getConstraintForSolving(CmpInst::Predicate Pred,
bool ConstraintTy::isValid(const ConstraintInfo &Info) const {
return Coefficients.size() > 0 &&
- all_of(Preconditions, [&Info](const PreconditionTy &C) {
+ all_of(Preconditions, [&Info](const ConditionTy &C) {
return Info.doesHold(C.Pred, C.Op0, C.Op1);
});
}
@@ -723,6 +790,10 @@ bool ConstraintInfo::doesHold(CmpInst::Predicate Pred, Value *A,
void ConstraintInfo::transferToOtherSystem(
CmpInst::Predicate Pred, Value *A, Value *B, unsigned NumIn,
unsigned NumOut, SmallVectorImpl<StackEntry> &DFSInStack) {
+ auto IsKnownNonNegative = [this](Value *V) {
+ return doesHold(CmpInst::ICMP_SGE, V, ConstantInt::get(V->getType(), 0)) ||
+ isKnownNonNegative(V, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1);
+ };
// Check if we can combine facts from the signed and unsigned systems to
// derive additional facts.
if (!A->getType()->isIntegerTy())
@@ -734,30 +805,41 @@ void ConstraintInfo::transferToOtherSystem(
default:
break;
case CmpInst::ICMP_ULT:
- // If B is a signed positive constant, A >=s 0 and A <s B.
- if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0))) {
+ case CmpInst::ICMP_ULE:
+ // If B is a signed positive constant, then A >=s 0 and A <s (or <=s) B.
+ if (IsKnownNonNegative(B)) {
addFact(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0), NumIn,
NumOut, DFSInStack);
- addFact(CmpInst::ICMP_SLT, A, B, NumIn, NumOut, DFSInStack);
+ addFact(CmpInst::getSignedPredicate(Pred), A, B, NumIn, NumOut,
+ DFSInStack);
+ }
+ break;
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_UGT:
+ // If A is a signed positive constant, then B >=s 0 and A >s (or >=s) B.
+ if (IsKnownNonNegative(A)) {
+ addFact(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0), NumIn,
+ NumOut, DFSInStack);
+ addFact(CmpInst::getSignedPredicate(Pred), A, B, NumIn, NumOut,
+ DFSInStack);
}
break;
case CmpInst::ICMP_SLT:
- if (doesHold(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0)))
+ if (IsKnownNonNegative(A))
addFact(CmpInst::ICMP_ULT, A, B, NumIn, NumOut, DFSInStack);
break;
case CmpInst::ICMP_SGT: {
if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
addFact(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0), NumIn,
NumOut, DFSInStack);
- if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0)))
+ if (IsKnownNonNegative(B))
addFact(CmpInst::ICMP_UGT, A, B, NumIn, NumOut, DFSInStack);
break;
}
case CmpInst::ICMP_SGE:
- if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0))) {
+ if (IsKnownNonNegative(B))
addFact(CmpInst::ICMP_UGE, A, B, NumIn, NumOut, DFSInStack);
- }
break;
}
}
@@ -772,7 +854,138 @@ static void dumpConstraint(ArrayRef<int64_t> C,
}
#endif
+void State::addInfoForInductions(BasicBlock &BB) {
+ auto *L = LI.getLoopFor(&BB);
+ if (!L || L->getHeader() != &BB)
+ return;
+
+ Value *A;
+ Value *B;
+ CmpInst::Predicate Pred;
+
+ if (!match(BB.getTerminator(),
+ m_Br(m_ICmp(Pred, m_Value(A), m_Value(B)), m_Value(), m_Value())))
+ return;
+ PHINode *PN = dyn_cast<PHINode>(A);
+ if (!PN) {
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ std::swap(A, B);
+ PN = dyn_cast<PHINode>(A);
+ }
+
+ if (!PN || PN->getParent() != &BB || PN->getNumIncomingValues() != 2 ||
+ !SE.isSCEVable(PN->getType()))
+ return;
+
+ BasicBlock *InLoopSucc = nullptr;
+ if (Pred == CmpInst::ICMP_NE)
+ InLoopSucc = cast<BranchInst>(BB.getTerminator())->getSuccessor(0);
+ else if (Pred == CmpInst::ICMP_EQ)
+ InLoopSucc = cast<BranchInst>(BB.getTerminator())->getSuccessor(1);
+ else
+ return;
+
+ if (!L->contains(InLoopSucc) || !L->isLoopExiting(&BB) || InLoopSucc == &BB)
+ return;
+
+ auto *AR = dyn_cast_or_null<SCEVAddRecExpr>(SE.getSCEV(PN));
+ BasicBlock *LoopPred = L->getLoopPredecessor();
+ if (!AR || AR->getLoop() != L || !LoopPred)
+ return;
+
+ const SCEV *StartSCEV = AR->getStart();
+ Value *StartValue = nullptr;
+ if (auto *C = dyn_cast<SCEVConstant>(StartSCEV)) {
+ StartValue = C->getValue();
+ } else {
+ StartValue = PN->getIncomingValueForBlock(LoopPred);
+ assert(SE.getSCEV(StartValue) == StartSCEV && "inconsistent start value");
+ }
+
+ DomTreeNode *DTN = DT.getNode(InLoopSucc);
+ auto Inc = SE.getMonotonicPredicateType(AR, CmpInst::ICMP_UGT);
+ bool MonotonicallyIncreasing =
+ Inc && *Inc == ScalarEvolution::MonotonicallyIncreasing;
+ if (MonotonicallyIncreasing) {
+ // SCEV guarantees that AR does not wrap, so PN >= StartValue can be added
+ // unconditionally.
+ WorkList.push_back(
+ FactOrCheck::getConditionFact(DTN, CmpInst::ICMP_UGE, PN, StartValue));
+ }
+
+ APInt StepOffset;
+ if (auto *C = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE)))
+ StepOffset = C->getAPInt();
+ else
+ return;
+
+ // Make sure the bound B is loop-invariant.
+ if (!L->isLoopInvariant(B))
+ return;
+
+ // Handle negative steps.
+ if (StepOffset.isNegative()) {
+ // TODO: Extend to allow steps > -1.
+ if (!(-StepOffset).isOne())
+ return;
+
+ // AR may wrap.
+ // Add StartValue >= PN conditional on B <= StartValue which guarantees that
+ // the loop exits before wrapping with a step of -1.
+ WorkList.push_back(FactOrCheck::getConditionFact(
+ DTN, CmpInst::ICMP_UGE, StartValue, PN,
+ ConditionTy(CmpInst::ICMP_ULE, B, StartValue)));
+ // Add PN > B conditional on B <= StartValue which guarantees that the loop
+ // exits when reaching B with a step of -1.
+ WorkList.push_back(FactOrCheck::getConditionFact(
+ DTN, CmpInst::ICMP_UGT, PN, B,
+ ConditionTy(CmpInst::ICMP_ULE, B, StartValue)));
+ return;
+ }
+
+ // Make sure AR either steps by 1 or that the value we compare against is a
+ // GEP based on the same start value and all offsets are a multiple of the
+ // step size, to guarantee that the induction will reach the value.
+ if (StepOffset.isZero() || StepOffset.isNegative())
+ return;
+
+ if (!StepOffset.isOne()) {
+ auto *UpperGEP = dyn_cast<GetElementPtrInst>(B);
+ if (!UpperGEP || UpperGEP->getPointerOperand() != StartValue ||
+ !UpperGEP->isInBounds())
+ return;
+
+ MapVector<Value *, APInt> UpperVariableOffsets;
+ APInt UpperConstantOffset(StepOffset.getBitWidth(), 0);
+ const DataLayout &DL = BB.getModule()->getDataLayout();
+ if (!UpperGEP->collectOffset(DL, StepOffset.getBitWidth(),
+ UpperVariableOffsets, UpperConstantOffset))
+ return;
+ // All variable offsets and the constant offset have to be a multiple of the
+ // step.
+ if (!UpperConstantOffset.urem(StepOffset).isZero() ||
+ any_of(UpperVariableOffsets, [&StepOffset](const auto &P) {
+ return !P.second.urem(StepOffset).isZero();
+ }))
+ return;
+ }
+
+ // AR may wrap. Add PN >= StartValue conditional on StartValue <= B which
+ // guarantees that the loop exits before wrapping in combination with the
+ // restrictions on B and the step above.
+ if (!MonotonicallyIncreasing) {
+ WorkList.push_back(FactOrCheck::getConditionFact(
+ DTN, CmpInst::ICMP_UGE, PN, StartValue,
+ ConditionTy(CmpInst::ICMP_ULE, StartValue, B)));
+ }
+ WorkList.push_back(FactOrCheck::getConditionFact(
+ DTN, CmpInst::ICMP_ULT, PN, B,
+ ConditionTy(CmpInst::ICMP_ULE, StartValue, B)));
+}
+
void State::addInfoFor(BasicBlock &BB) {
+ addInfoForInductions(BB);
+
// True as long as long as the current instruction is guaranteed to execute.
bool GuaranteedToExecute = true;
// Queue conditions and assumes.
@@ -795,27 +1008,40 @@ void State::addInfoFor(BasicBlock &BB) {
}
if (isa<MinMaxIntrinsic>(&I)) {
- WorkList.push_back(FactOrCheck::getFact(DT.getNode(&BB), &I));
+ WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I));
continue;
}
- Value *Cond;
+ Value *A, *B;
+ CmpInst::Predicate Pred;
// For now, just handle assumes with a single compare as condition.
- if (match(&I, m_Intrinsic<Intrinsic::assume>(m_Value(Cond))) &&
- isa<ICmpInst>(Cond)) {
+ if (match(&I, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_Value(A), m_Value(B))))) {
if (GuaranteedToExecute) {
// The assume is guaranteed to execute when BB is entered, hence Cond
// holds on entry to BB.
- WorkList.emplace_back(FactOrCheck::getFact(DT.getNode(I.getParent()),
- cast<Instruction>(Cond)));
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ DT.getNode(I.getParent()), Pred, A, B));
} else {
WorkList.emplace_back(
- FactOrCheck::getFact(DT.getNode(I.getParent()), &I));
+ FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I));
}
}
GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
}
+ if (auto *Switch = dyn_cast<SwitchInst>(BB.getTerminator())) {
+ for (auto &Case : Switch->cases()) {
+ BasicBlock *Succ = Case.getCaseSuccessor();
+ Value *V = Case.getCaseValue();
+ if (!canAddSuccessor(BB, Succ))
+ continue;
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ DT.getNode(Succ), CmpInst::ICMP_EQ, Switch->getCondition(), V));
+ }
+ return;
+ }
+
auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
if (!Br || !Br->isConditional())
return;
@@ -847,8 +1073,11 @@ void State::addInfoFor(BasicBlock &BB) {
while (!CondWorkList.empty()) {
Value *Cur = CondWorkList.pop_back_val();
if (auto *Cmp = dyn_cast<ICmpInst>(Cur)) {
- WorkList.emplace_back(
- FactOrCheck::getFact(DT.getNode(Successor), Cmp, IsOr));
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ DT.getNode(Successor),
+ IsOr ? CmpInst::getInversePredicate(Cmp->getPredicate())
+ : Cmp->getPredicate(),
+ Cmp->getOperand(0), Cmp->getOperand(1)));
continue;
}
if (IsOr && match(Cur, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
@@ -870,13 +1099,26 @@ void State::addInfoFor(BasicBlock &BB) {
if (!CmpI)
return;
if (canAddSuccessor(BB, Br->getSuccessor(0)))
- WorkList.emplace_back(
- FactOrCheck::getFact(DT.getNode(Br->getSuccessor(0)), CmpI));
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ DT.getNode(Br->getSuccessor(0)), CmpI->getPredicate(),
+ CmpI->getOperand(0), CmpI->getOperand(1)));
if (canAddSuccessor(BB, Br->getSuccessor(1)))
- WorkList.emplace_back(
- FactOrCheck::getFact(DT.getNode(Br->getSuccessor(1)), CmpI, true));
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ DT.getNode(Br->getSuccessor(1)),
+ CmpInst::getInversePredicate(CmpI->getPredicate()), CmpI->getOperand(0),
+ CmpI->getOperand(1)));
}
+#ifndef NDEBUG
+static void dumpUnpackedICmp(raw_ostream &OS, ICmpInst::Predicate Pred,
+ Value *LHS, Value *RHS) {
+ OS << "icmp " << Pred << ' ';
+ LHS->printAsOperand(OS, /*PrintType=*/true);
+ OS << ", ";
+ RHS->printAsOperand(OS, /*PrintType=*/false);
+}
+#endif
+
namespace {
/// Helper to keep track of a condition and if it should be treated as negated
/// for reproducer construction.
@@ -1009,10 +1251,9 @@ static void generateReproducer(CmpInst *Cond, Module *M,
if (Entry.Pred == ICmpInst::BAD_ICMP_PREDICATE)
continue;
- LLVM_DEBUG(
- dbgs() << " Materializing assumption icmp " << Entry.Pred << ' ';
- Entry.LHS->printAsOperand(dbgs(), /*PrintType=*/true); dbgs() << ", ";
- Entry.RHS->printAsOperand(dbgs(), /*PrintType=*/false); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << " Materializing assumption ";
+ dumpUnpackedICmp(dbgs(), Entry.Pred, Entry.LHS, Entry.RHS);
+ dbgs() << "\n");
CloneInstructions({Entry.LHS, Entry.RHS}, CmpInst::isSigned(Entry.Pred));
auto *Cmp = Builder.CreateICmp(Entry.Pred, Entry.LHS, Entry.RHS);
@@ -1028,14 +1269,12 @@ static void generateReproducer(CmpInst *Cond, Module *M,
assert(!verifyFunction(*F, &dbgs()));
}
-static std::optional<bool> checkCondition(CmpInst *Cmp, ConstraintInfo &Info,
- unsigned NumIn, unsigned NumOut,
+static std::optional<bool> checkCondition(CmpInst::Predicate Pred, Value *A,
+ Value *B, Instruction *CheckInst,
+ ConstraintInfo &Info, unsigned NumIn,
+ unsigned NumOut,
Instruction *ContextInst) {
- LLVM_DEBUG(dbgs() << "Checking " << *Cmp << "\n");
-
- CmpInst::Predicate Pred = Cmp->getPredicate();
- Value *A = Cmp->getOperand(0);
- Value *B = Cmp->getOperand(1);
+ LLVM_DEBUG(dbgs() << "Checking " << *CheckInst << "\n");
auto R = Info.getConstraintForSolving(Pred, A, B);
if (R.empty() || !R.isValid(Info)){
@@ -1060,13 +1299,10 @@ static std::optional<bool> checkCondition(CmpInst *Cmp, ConstraintInfo &Info,
return std::nullopt;
LLVM_DEBUG({
- if (*ImpliedCondition) {
- dbgs() << "Condition " << *Cmp;
- } else {
- auto InversePred = Cmp->getInversePredicate();
- dbgs() << "Condition " << CmpInst::getPredicateName(InversePred) << " "
- << *A << ", " << *B;
- }
+ dbgs() << "Condition ";
+ dumpUnpackedICmp(
+ dbgs(), *ImpliedCondition ? Pred : CmpInst::getInversePredicate(Pred),
+ A, B);
dbgs() << " implied by dominating constraints\n";
CSToUse.dump();
});
@@ -1079,7 +1315,8 @@ static std::optional<bool> checkCondition(CmpInst *Cmp, ConstraintInfo &Info,
static bool checkAndReplaceCondition(
CmpInst *Cmp, ConstraintInfo &Info, unsigned NumIn, unsigned NumOut,
Instruction *ContextInst, Module *ReproducerModule,
- ArrayRef<ReproducerEntry> ReproducerCondStack, DominatorTree &DT) {
+ ArrayRef<ReproducerEntry> ReproducerCondStack, DominatorTree &DT,
+ SmallVectorImpl<Instruction *> &ToRemove) {
auto ReplaceCmpWithConstant = [&](CmpInst *Cmp, bool IsTrue) {
generateReproducer(Cmp, ReproducerModule, ReproducerCondStack, Info, DT);
Constant *ConstantC = ConstantInt::getBool(
@@ -1100,11 +1337,14 @@ static bool checkAndReplaceCondition(
return !II || II->getIntrinsicID() != Intrinsic::assume;
});
NumCondsRemoved++;
+ if (Cmp->use_empty())
+ ToRemove.push_back(Cmp);
return true;
};
- if (auto ImpliedCondition =
- checkCondition(Cmp, Info, NumIn, NumOut, ContextInst))
+ if (auto ImpliedCondition = checkCondition(
+ Cmp->getPredicate(), Cmp->getOperand(0), Cmp->getOperand(1), Cmp,
+ Info, NumIn, NumOut, ContextInst))
return ReplaceCmpWithConstant(Cmp, *ImpliedCondition);
return false;
}
@@ -1130,6 +1370,7 @@ static bool checkAndSecondOpImpliedByFirst(
FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
SmallVectorImpl<ReproducerEntry> &ReproducerCondStack,
SmallVectorImpl<StackEntry> &DFSInStack) {
+
CmpInst::Predicate Pred;
Value *A, *B;
Instruction *And = CB.getContextInst();
@@ -1144,9 +1385,10 @@ static bool checkAndSecondOpImpliedByFirst(
bool Changed = false;
// Check if the second condition can be simplified now.
- if (auto ImpliedCondition =
- checkCondition(cast<ICmpInst>(And->getOperand(1)), Info, CB.NumIn,
- CB.NumOut, CB.getContextInst())) {
+ ICmpInst *Cmp = cast<ICmpInst>(And->getOperand(1));
+ if (auto ImpliedCondition = checkCondition(
+ Cmp->getPredicate(), Cmp->getOperand(0), Cmp->getOperand(1), Cmp,
+ Info, CB.NumIn, CB.NumOut, CB.getContextInst())) {
And->setOperand(1, ConstantInt::getBool(And->getType(), *ImpliedCondition));
Changed = true;
}
@@ -1172,9 +1414,8 @@ void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B,
if (!R.isValid(*this) || R.isNe())
return;
- LLVM_DEBUG(dbgs() << "Adding '" << Pred << " ";
- A->printAsOperand(dbgs(), false); dbgs() << ", ";
- B->printAsOperand(dbgs(), false); dbgs() << "'\n");
+ LLVM_DEBUG(dbgs() << "Adding '"; dumpUnpackedICmp(dbgs(), Pred, A, B);
+ dbgs() << "'\n");
bool Added = false;
auto &CSToUse = getCS(R.IsSigned);
if (R.Coefficients.empty())
@@ -1273,7 +1514,8 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
return Changed;
}
-static bool eliminateConstraints(Function &F, DominatorTree &DT,
+static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution &SE,
OptimizationRemarkEmitter &ORE) {
bool Changed = false;
DT.updateDFSNumbers();
@@ -1281,7 +1523,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT,
for (Value &Arg : F.args())
FunctionArgs.push_back(&Arg);
ConstraintInfo Info(F.getParent()->getDataLayout(), FunctionArgs);
- State S(DT);
+ State S(DT, LI, SE);
std::unique_ptr<Module> ReproducerModule(
DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
@@ -1303,8 +1545,9 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT,
// transfer logic.
stable_sort(S.WorkList, [](const FactOrCheck &A, const FactOrCheck &B) {
auto HasNoConstOp = [](const FactOrCheck &B) {
- return !isa<ConstantInt>(B.Inst->getOperand(0)) &&
- !isa<ConstantInt>(B.Inst->getOperand(1));
+ Value *V0 = B.isConditionFact() ? B.Cond.Op0 : B.Inst->getOperand(0);
+ Value *V1 = B.isConditionFact() ? B.Cond.Op1 : B.Inst->getOperand(1);
+ return !isa<ConstantInt>(V0) && !isa<ConstantInt>(V1);
};
// If both entries have the same In numbers, conditional facts come first.
// Otherwise use the relative order in the basic block.
@@ -1365,7 +1608,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT,
} else if (auto *Cmp = dyn_cast<ICmpInst>(Inst)) {
bool Simplified = checkAndReplaceCondition(
Cmp, Info, CB.NumIn, CB.NumOut, CB.getContextInst(),
- ReproducerModule.get(), ReproducerCondStack, S.DT);
+ ReproducerModule.get(), ReproducerCondStack, S.DT, ToRemove);
if (!Simplified && match(CB.getContextInst(),
m_LogicalAnd(m_Value(), m_Specific(Inst)))) {
Simplified =
@@ -1377,8 +1620,9 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT,
continue;
}
- LLVM_DEBUG(dbgs() << "fact to add to the system: " << *CB.Inst << "\n");
auto AddFact = [&](CmpInst::Predicate Pred, Value *A, Value *B) {
+ LLVM_DEBUG(dbgs() << "fact to add to the system: ";
+ dumpUnpackedICmp(dbgs(), Pred, A, B); dbgs() << "\n");
if (Info.getCS(CmpInst::isSigned(Pred)).size() > MaxRows) {
LLVM_DEBUG(
dbgs()
@@ -1404,23 +1648,30 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT,
};
ICmpInst::Predicate Pred;
- if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
- Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
- AddFact(Pred, MinMax, MinMax->getLHS());
- AddFact(Pred, MinMax, MinMax->getRHS());
- continue;
+ if (!CB.isConditionFact()) {
+ if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
+ Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
+ AddFact(Pred, MinMax, MinMax->getLHS());
+ AddFact(Pred, MinMax, MinMax->getRHS());
+ continue;
+ }
}
- Value *A, *B;
- Value *Cmp = CB.Inst;
- match(Cmp, m_Intrinsic<Intrinsic::assume>(m_Value(Cmp)));
- if (match(Cmp, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
- // Use the inverse predicate if required.
- if (CB.Not)
- Pred = CmpInst::getInversePredicate(Pred);
-
- AddFact(Pred, A, B);
+ Value *A = nullptr, *B = nullptr;
+ if (CB.isConditionFact()) {
+ Pred = CB.Cond.Pred;
+ A = CB.Cond.Op0;
+ B = CB.Cond.Op1;
+ if (CB.DoesHold.Pred != CmpInst::BAD_ICMP_PREDICATE &&
+ !Info.doesHold(CB.DoesHold.Pred, CB.DoesHold.Op0, CB.DoesHold.Op1))
+ continue;
+ } else {
+ bool Matched = match(CB.Inst, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_Value(A), m_Value(B))));
+ (void)Matched;
+ assert(Matched && "Must have an assume intrinsic with a icmp operand");
}
+ AddFact(Pred, A, B);
}
if (ReproducerModule && !ReproducerModule->functions().empty()) {
@@ -1450,12 +1701,16 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT,
PreservedAnalyses ConstraintEliminationPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- if (!eliminateConstraints(F, DT, ORE))
+ if (!eliminateConstraints(F, DT, LI, SE, ORE))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
PA.preserveSet<CFGAnalyses>();
return PA;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 523196e5e6ea..d2dfc764d042 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -55,7 +55,6 @@ static cl::opt<bool> CanonicalizeICmpPredicatesToUnsigned(
STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value");
STATISTIC(NumSelects, "Number of selects propagated");
-STATISTIC(NumMemAccess, "Number of memory access targets propagated");
STATISTIC(NumCmps, "Number of comparisons propagated");
STATISTIC(NumReturns, "Number of return values propagated");
STATISTIC(NumDeadCases, "Number of switch cases removed");
@@ -93,6 +92,7 @@ STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null");
STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed");
STATISTIC(NumUDivURemsNarrowedExpanded,
"Number of bound udiv's/urem's expanded");
+STATISTIC(NumZExt, "Number of non-negative deductions");
static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
if (S->getType()->isVectorTy() || isa<Constant>(S->getCondition()))
@@ -263,23 +263,6 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
return Changed;
}
-static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
- Value *Pointer = nullptr;
- if (LoadInst *L = dyn_cast<LoadInst>(I))
- Pointer = L->getPointerOperand();
- else
- Pointer = cast<StoreInst>(I)->getPointerOperand();
-
- if (isa<Constant>(Pointer)) return false;
-
- Constant *C = LVI->getConstant(Pointer, I);
- if (!C) return false;
-
- ++NumMemAccess;
- I->replaceUsesOfWith(Pointer, C);
- return true;
-}
-
static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
if (!CanonicalizeICmpPredicatesToUnsigned)
return false;
@@ -294,8 +277,11 @@ static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
ICmpInst::Predicate UnsignedPred =
ConstantRange::getEquivalentPredWithFlippedSignedness(
- Cmp->getPredicate(), LVI->getConstantRange(Cmp->getOperand(0), Cmp),
- LVI->getConstantRange(Cmp->getOperand(1), Cmp));
+ Cmp->getPredicate(),
+ LVI->getConstantRangeAtUse(Cmp->getOperandUse(0),
+ /*UndefAllowed*/ true),
+ LVI->getConstantRangeAtUse(Cmp->getOperandUse(1),
+ /*UndefAllowed*/ true));
if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE)
return false;
@@ -409,8 +395,10 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// See if we can prove that the given binary op intrinsic will not overflow.
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
- ConstantRange LRange = LVI->getConstantRangeAtUse(BO->getOperandUse(0));
- ConstantRange RRange = LVI->getConstantRangeAtUse(BO->getOperandUse(1));
+ ConstantRange LRange =
+ LVI->getConstantRangeAtUse(BO->getOperandUse(0), /*UndefAllowed*/ false);
+ ConstantRange RRange =
+ LVI->getConstantRangeAtUse(BO->getOperandUse(1), /*UndefAllowed*/ false);
ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
BO->getBinaryOp(), RRange, BO->getNoWrapKind());
return NWRegion.contains(LRange);
@@ -773,7 +761,7 @@ static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
// NOTE: this transformation introduces two uses of X,
// but it may be undef so we must freeze it first.
Value *FrozenX = X;
- if (!isGuaranteedNotToBeUndefOrPoison(X))
+ if (!isGuaranteedNotToBeUndef(X))
FrozenX = B.CreateFreeze(X, X->getName() + ".frozen");
auto *AdjX = B.CreateNUWSub(FrozenX, Y, Instr->getName() + ".urem");
auto *Cmp =
@@ -837,8 +825,11 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
if (Instr->getType()->isVectorTy())
return false;
- ConstantRange XCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(0));
- ConstantRange YCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(1));
+ ConstantRange XCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(0),
+ /*UndefAllowed*/ false);
+ // Allow undef for RHS, as we can assume it is division by zero UB.
+ ConstantRange YCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(1),
+ /*UndefAllowed*/ true);
if (expandUDivOrURem(Instr, XCR, YCR))
return true;
@@ -909,6 +900,14 @@ static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR,
assert(SDI->getOpcode() == Instruction::SDiv);
assert(!SDI->getType()->isVectorTy());
+ // Check whether the division folds to a constant.
+ ConstantRange DivCR = LCR.sdiv(RCR);
+ if (const APInt *Elem = DivCR.getSingleElement()) {
+ SDI->replaceAllUsesWith(ConstantInt::get(SDI->getType(), *Elem));
+ SDI->eraseFromParent();
+ return true;
+ }
+
struct Operand {
Value *V;
Domain D;
@@ -957,8 +956,11 @@ static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
if (Instr->getType()->isVectorTy())
return false;
- ConstantRange LCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(0));
- ConstantRange RCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(1));
+ ConstantRange LCR =
+ LVI->getConstantRangeAtUse(Instr->getOperandUse(0), /*AllowUndef*/ false);
+ // Allow undef for RHS, as we can assume it is division by zero UB.
+ ConstantRange RCR =
+ LVI->getConstantRangeAtUse(Instr->getOperandUse(1), /*AlloweUndef*/ true);
if (Instr->getOpcode() == Instruction::SDiv)
if (processSDiv(Instr, LCR, RCR, LVI))
return true;
@@ -1016,12 +1018,31 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "", SDI);
ZExt->takeName(SDI);
ZExt->setDebugLoc(SDI->getDebugLoc());
+ ZExt->setNonNeg();
SDI->replaceAllUsesWith(ZExt);
SDI->eraseFromParent();
return true;
}
+static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) {
+ if (ZExt->getType()->isVectorTy())
+ return false;
+
+ if (ZExt->hasNonNeg())
+ return false;
+
+ const Use &Base = ZExt->getOperandUse(0);
+ if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false)
+ .isAllNonNegative())
+ return false;
+
+ ++NumZExt;
+ ZExt->setNonNeg();
+
+ return true;
+}
+
static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
using OBO = OverflowingBinaryOperator;
@@ -1037,8 +1058,10 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
Value *LHS = BinOp->getOperand(0);
Value *RHS = BinOp->getOperand(1);
- ConstantRange LRange = LVI->getConstantRange(LHS, BinOp);
- ConstantRange RRange = LVI->getConstantRange(RHS, BinOp);
+ ConstantRange LRange =
+ LVI->getConstantRange(LHS, BinOp, /*UndefAllowed*/ false);
+ ConstantRange RRange =
+ LVI->getConstantRange(RHS, BinOp, /*UndefAllowed*/ false);
bool Changed = false;
bool NewNUW = false, NewNSW = false;
@@ -1130,10 +1153,6 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
case Instruction::FCmp:
BBChanged |= processCmp(cast<CmpInst>(&II), LVI);
break;
- case Instruction::Load:
- case Instruction::Store:
- BBChanged |= processMemAccess(&II, LVI);
- break;
case Instruction::Call:
case Instruction::Invoke:
BBChanged |= processCallSite(cast<CallBase>(II), LVI);
@@ -1152,6 +1171,9 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
case Instruction::SExt:
BBChanged |= processSExt(cast<SExtInst>(&II), LVI);
break;
+ case Instruction::ZExt:
+ BBChanged |= processZExt(cast<ZExtInst>(&II), LVI);
+ break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp
index d309799d95f0..2ad46130dc94 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -36,39 +36,6 @@ STATISTIC(DCEEliminated, "Number of insts removed");
DEBUG_COUNTER(DCECounter, "dce-transform",
"Controls which instructions are eliminated");
-//===--------------------------------------------------------------------===//
-// RedundantDbgInstElimination pass implementation
-//
-
-namespace {
-struct RedundantDbgInstElimination : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- RedundantDbgInstElimination() : FunctionPass(ID) {
- initializeRedundantDbgInstEliminationPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- bool Changed = false;
- for (auto &BB : F)
- Changed |= RemoveRedundantDbgInstrs(&BB);
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- }
-};
-}
-
-char RedundantDbgInstElimination::ID = 0;
-INITIALIZE_PASS(RedundantDbgInstElimination, "redundant-dbg-inst-elim",
- "Redundant Dbg Instruction Elimination", false, false)
-
-Pass *llvm::createRedundantDbgInstEliminationPass() {
- return new RedundantDbgInstElimination();
-}
-
PreservedAnalyses
RedundantDbgInstEliminationPass::run(Function &F, FunctionAnalysisManager &AM) {
bool Changed = false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index f2efe60bdf88..edfeb36f3422 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -100,10 +100,10 @@ static cl::opt<unsigned> MaxPathLength(
cl::desc("Max number of blocks searched to find a threading path"),
cl::Hidden, cl::init(20));
-static cl::opt<unsigned> MaxNumPaths(
- "dfa-max-num-paths",
- cl::desc("Max number of paths enumerated around a switch"),
- cl::Hidden, cl::init(200));
+static cl::opt<unsigned>
+ MaxNumPaths("dfa-max-num-paths",
+ cl::desc("Max number of paths enumerated around a switch"),
+ cl::Hidden, cl::init(200));
static cl::opt<unsigned>
CostThreshold("dfa-cost-threshold",
@@ -249,16 +249,20 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
FT = FalseBlock;
// Update the phi node of SI.
- SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false);
SIUse->addIncoming(SI->getTrueValue(), TrueBlock);
SIUse->addIncoming(SI->getFalseValue(), FalseBlock);
// Update any other PHI nodes in EndBlock.
for (PHINode &Phi : EndBlock->phis()) {
if (&Phi != SIUse) {
- Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), TrueBlock);
- Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), FalseBlock);
+ Value *OrigValue = Phi.getIncomingValueForBlock(StartBlock);
+ Phi.addIncoming(OrigValue, TrueBlock);
+ Phi.addIncoming(OrigValue, FalseBlock);
}
+
+ // Remove incoming place of original StartBlock, which comes in a indirect
+ // way (through TrueBlock and FalseBlock) now.
+ Phi.removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false);
}
} else {
BasicBlock *NewBlock = nullptr;
@@ -297,6 +301,7 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
{DominatorTree::Insert, StartBlock, FT}});
// The select is now dead.
+ assert(SI->use_empty() && "Select must be dead now");
SI->eraseFromParent();
}
@@ -466,8 +471,9 @@ private:
if (!SITerm || !SITerm->isUnconditional())
return false;
- if (isa<PHINode>(SIUse) &&
- SIBB->getSingleSuccessor() != cast<Instruction>(SIUse)->getParent())
+ // Only fold the select coming from directly where it is defined.
+ PHINode *PHIUser = dyn_cast<PHINode>(SIUse);
+ if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB)
return false;
// If select will not be sunk during unfolding, and it is in the same basic
@@ -728,6 +734,10 @@ private:
CodeMetrics Metrics;
SwitchInst *Switch = SwitchPaths->getSwitchInst();
+ // Don't thread switch without multiple successors.
+ if (Switch->getNumSuccessors() <= 1)
+ return false;
+
// Note that DuplicateBlockMap is not being used as intended here. It is
// just being used to ensure (BB, State) pairs are only counted once.
DuplicateBlockMap DuplicateMap;
@@ -805,6 +815,8 @@ private:
// using binary search, hence the LogBase2().
unsigned CondBranches =
APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
+ assert(CondBranches > 0 &&
+ "The threaded switch must have multiple branches");
DuplicationCost = Metrics.NumInsts / CondBranches;
} else {
// Compared with jump tables, the DFA optimizer removes an indirect branch
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 62a9b6a41c5b..203fcdfc87d9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -38,9 +38,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -205,16 +203,17 @@ static bool isShortenableAtTheBeginning(Instruction *I) {
return isa<AnyMemSetInst>(I);
}
-static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo &TLI,
- const Function *F) {
+static std::optional<TypeSize> getPointerSize(const Value *V,
+ const DataLayout &DL,
+ const TargetLibraryInfo &TLI,
+ const Function *F) {
uint64_t Size;
ObjectSizeOpts Opts;
Opts.NullIsUnknownSize = NullPointerIsDefined(F);
if (getObjectSize(V, Size, DL, &TLI, Opts))
- return Size;
- return MemoryLocation::UnknownSize;
+ return TypeSize::getFixed(Size);
+ return std::nullopt;
}
namespace {
@@ -629,20 +628,11 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
Value *OrigDest = DeadIntrinsic->getRawDest();
if (!IsOverwriteEnd) {
- Type *Int8PtrTy =
- Type::getInt8PtrTy(DeadIntrinsic->getContext(),
- OrigDest->getType()->getPointerAddressSpace());
- Value *Dest = OrigDest;
- if (OrigDest->getType() != Int8PtrTy)
- Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
Value *Indices[1] = {
ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
- Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
+ Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "", DeadI);
NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
- if (NewDestGEP->getType() != OrigDest->getType())
- NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
- "", DeadI);
DeadIntrinsic->setDest(NewDestGEP);
}
@@ -850,9 +840,6 @@ struct DSEState {
// Post-order numbers for each basic block. Used to figure out if memory
// accesses are executed before another access.
DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
- // Values that are only used with assumes. Used to refine pointer escape
- // analysis.
- SmallPtrSet<const Value *, 32> EphValues;
/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
/// basic block.
@@ -872,10 +859,10 @@ struct DSEState {
DSEState &operator=(const DSEState &) = delete;
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
- PostDominatorTree &PDT, AssumptionCache &AC,
- const TargetLibraryInfo &TLI, const LoopInfo &LI)
- : F(F), AA(AA), EI(DT, LI, EphValues), BatchAA(AA, &EI), MSSA(MSSA),
- DT(DT), PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
+ PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
+ const LoopInfo &LI)
+ : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
+ PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
@@ -905,8 +892,6 @@ struct DSEState {
AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
return isa<UnreachableInst>(E->getTerminator());
});
-
- CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
}
LocationSize strengthenLocationSize(const Instruction *I,
@@ -958,10 +943,11 @@ struct DSEState {
// Check whether the killing store overwrites the whole object, in which
// case the size/offset of the dead store does not matter.
- if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise()) {
- uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
- if (KillingUndObjSize != MemoryLocation::UnknownSize &&
- KillingUndObjSize == KillingLocSize.getValue())
+ if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
+ isIdentifiedObject(KillingUndObj)) {
+ std::optional<TypeSize> KillingUndObjSize =
+ getPointerSize(KillingUndObj, DL, TLI, &F);
+ if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
return OW_Complete;
}
@@ -984,9 +970,15 @@ struct DSEState {
return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
}
- const uint64_t KillingSize = KillingLocSize.getValue();
- const uint64_t DeadSize = DeadLoc.Size.getValue();
+ const TypeSize KillingSize = KillingLocSize.getValue();
+ const TypeSize DeadSize = DeadLoc.Size.getValue();
+ // Bail on doing Size comparison which depends on AA for now
+ // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
+ const bool AnyScalable =
+ DeadSize.isScalable() || KillingLocSize.isScalable();
+ if (AnyScalable)
+ return OW_Unknown;
// Query the alias information
AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
@@ -1076,7 +1068,7 @@ struct DSEState {
if (!isInvisibleToCallerOnUnwind(V)) {
I.first->second = false;
} else if (isNoAliasCall(V)) {
- I.first->second = !PointerMayBeCaptured(V, true, false, EphValues);
+ I.first->second = !PointerMayBeCaptured(V, true, false);
}
}
return I.first->second;
@@ -1095,7 +1087,7 @@ struct DSEState {
// with the killing MemoryDef. But we refrain from doing so for now to
// limit compile-time and this does not cause any changes to the number
// of stores removed on a large test set in practice.
- I.first->second = PointerMayBeCaptured(V, false, true, EphValues);
+ I.first->second = PointerMayBeCaptured(V, false, true);
return !I.first->second;
}
@@ -1861,6 +1853,10 @@ struct DSEState {
if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
Func != LibFunc_malloc)
return false;
+ // Gracefully handle malloc with unexpected memory attributes.
+ auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
+ if (!MallocDef)
+ return false;
auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
@@ -1895,11 +1891,9 @@ struct DSEState {
return false;
MemorySSAUpdater Updater(&MSSA);
- auto *LastDef =
- cast<MemoryDef>(Updater.getMemorySSA()->getMemoryAccess(Malloc));
auto *NewAccess =
- Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), LastDef,
- LastDef);
+ Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,
+ MallocDef);
auto *NewAccessMD = cast<MemoryDef>(NewAccess);
Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
Malloc->replaceAllUsesWith(Calloc);
@@ -2064,12 +2058,11 @@ struct DSEState {
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
- AssumptionCache &AC,
const TargetLibraryInfo &TLI,
const LoopInfo &LI) {
bool MadeChange = false;
- DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI);
+ DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
@@ -2177,7 +2170,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
auto *DeadSI = dyn_cast<StoreInst>(DeadI);
auto *KillingSI = dyn_cast<StoreInst>(KillingI);
// We are re-using tryToMergePartialOverlappingStores, which requires
- // DeadSI to dominate DeadSI.
+ // DeadSI to dominate KillingSI.
// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
if (Constant *Merged = tryToMergePartialOverlappingStores(
@@ -2250,10 +2243,9 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 67e8e82e408f..f736d429cb63 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -67,6 +67,7 @@ STATISTIC(NumCSE, "Number of instructions CSE'd");
STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
STATISTIC(NumCSECall, "Number of call instructions CSE'd");
+STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd");
STATISTIC(NumDSE, "Number of trivial dead stores removed");
DEBUG_COUNTER(CSECounter, "early-cse",
@@ -143,11 +144,11 @@ struct SimpleValue {
!CI->getFunction()->isPresplitCoroutine();
}
return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
- isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
- isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
- isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
- isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
- isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
+ isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||
+ isa<FreezeInst>(Inst);
}
};
@@ -307,21 +308,20 @@ static unsigned getHashValueImpl(SimpleValue Val) {
IVI->getOperand(1),
hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
- assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
- isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
- isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
- isa<FreezeInst>(Inst)) &&
+ assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
"Invalid/unknown instruction");
// Handle intrinsics with commutative operands.
- // TODO: Extend this to handle intrinsics with >2 operands where the 1st
- // 2 operands are commutative.
auto *II = dyn_cast<IntrinsicInst>(Inst);
- if (II && II->isCommutative() && II->arg_size() == 2) {
+ if (II && II->isCommutative() && II->arg_size() >= 2) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
if (LHS > RHS)
std::swap(LHS, RHS);
- return hash_combine(II->getOpcode(), LHS, RHS);
+ return hash_combine(
+ II->getOpcode(), LHS, RHS,
+ hash_combine_range(II->value_op_begin() + 2, II->value_op_end()));
}
// gc.relocate is 'special' call: its second and third operands are
@@ -396,13 +396,14 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
}
- // TODO: Extend this for >2 args by matching the trailing N-2 args.
auto *LII = dyn_cast<IntrinsicInst>(LHSI);
auto *RII = dyn_cast<IntrinsicInst>(RHSI);
if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
- LII->isCommutative() && LII->arg_size() == 2) {
+ LII->isCommutative() && LII->arg_size() >= 2) {
return LII->getArgOperand(0) == RII->getArgOperand(1) &&
- LII->getArgOperand(1) == RII->getArgOperand(0);
+ LII->getArgOperand(1) == RII->getArgOperand(0) &&
+ std::equal(LII->arg_begin() + 2, LII->arg_end(),
+ RII->arg_begin() + 2, RII->arg_end());
}
// See comment above in `getHashValue()`.
@@ -548,12 +549,82 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
// currently executing, so conservatively return false if they are in
// different basic blocks.
if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
- return false;
+ return false;
return LHSI->isIdenticalTo(RHSI);
}
//===----------------------------------------------------------------------===//
+// GEPValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+struct GEPValue {
+ Instruction *Inst;
+ std::optional<int64_t> ConstantOffset;
+
+ GEPValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ GEPValue(Instruction *I, std::optional<int64_t> ConstantOffset)
+ : Inst(I), ConstantOffset(ConstantOffset) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ return isa<GetElementPtrInst>(Inst);
+ }
+};
+
+} // namespace
+
+namespace llvm {
+
+template <> struct DenseMapInfo<GEPValue> {
+ static inline GEPValue getEmptyKey() {
+ return DenseMapInfo<Instruction *>::getEmptyKey();
+ }
+
+ static inline GEPValue getTombstoneKey() {
+ return DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
+
+ static unsigned getHashValue(const GEPValue &Val);
+ static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
+};
+
+} // end namespace llvm
+
+unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
+ auto *GEP = cast<GetElementPtrInst>(Val.Inst);
+ if (Val.ConstantOffset.has_value())
+ return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
+ Val.ConstantOffset.value());
+ return hash_combine(
+ GEP->getOpcode(),
+ hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));
+}
+
+bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHS.Inst == RHS.Inst;
+ auto *LGEP = cast<GetElementPtrInst>(LHS.Inst);
+ auto *RGEP = cast<GetElementPtrInst>(RHS.Inst);
+ if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
+ return false;
+ if (LHS.ConstantOffset.has_value() && RHS.ConstantOffset.has_value())
+ return LHS.ConstantOffset.value() == RHS.ConstantOffset.value();
+ return LGEP->isIdenticalToWhenDefined(RGEP);
+}
+
+//===----------------------------------------------------------------------===//
// EarlyCSE implementation
//===----------------------------------------------------------------------===//
@@ -647,6 +718,13 @@ public:
ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
CallHTType AvailableCalls;
+ using GEPMapAllocatorTy =
+ RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<GEPValue, Value *>>;
+ using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
+ GEPMapAllocatorTy>;
+ GEPHTType AvailableGEPs;
+
/// This is the current generation of the memory value.
unsigned CurrentGeneration = 0;
@@ -667,9 +745,11 @@ private:
class NodeScope {
public:
NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
- InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
- : Scope(AvailableValues), LoadScope(AvailableLoads),
- InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
+ InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
+ GEPHTType &AvailableGEPs)
+ : Scope(AvailableValues), LoadScope(AvailableLoads),
+ InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
+ GEPScope(AvailableGEPs) {}
NodeScope(const NodeScope &) = delete;
NodeScope &operator=(const NodeScope &) = delete;
@@ -678,6 +758,7 @@ private:
LoadHTType::ScopeTy LoadScope;
InvariantHTType::ScopeTy InvariantScope;
CallHTType::ScopeTy CallScope;
+ GEPHTType::ScopeTy GEPScope;
};
// Contains all the needed information to create a stack for doing a depth
@@ -688,13 +769,13 @@ private:
public:
StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
- unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
+ GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
+ DomTreeNode::const_iterator child,
DomTreeNode::const_iterator end)
: CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
EndIter(end),
Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
- AvailableCalls)
- {}
+ AvailableCalls, AvailableGEPs) {}
StackNode(const StackNode &) = delete;
StackNode &operator=(const StackNode &) = delete;
@@ -1214,6 +1295,20 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
return Result;
}
+static void combineIRFlags(Instruction &From, Value *To) {
+ if (auto *I = dyn_cast<Instruction>(To)) {
+ // If I being poison triggers UB, there is no need to drop those
+ // flags. Otherwise, only retain flags present on both I and Inst.
+ // TODO: Currently some fast-math flags are not treated as
+ // poison-generating even though they should. Until this is fixed,
+ // always retain flags present on both I and Inst for floating point
+ // instructions.
+ if (isa<FPMathOperator>(I) ||
+ (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
+ I->andIRFlags(&From);
+ }
+}
+
bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
const ParseMemoryInst &Later) {
// Can we remove Earlier store because of Later store?
@@ -1424,7 +1519,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If this is a simple instruction that we can value number, process it.
if (SimpleValue::canHandle(&Inst)) {
- if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
+ if ([[maybe_unused]] auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
assert(CI->getExceptionBehavior() != fp::ebStrict &&
"Unexpected ebStrict from SimpleValue::canHandle()");
assert((!CI->getRoundingMode() ||
@@ -1439,16 +1534,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
- if (auto *I = dyn_cast<Instruction>(V)) {
- // If I being poison triggers UB, there is no need to drop those
- // flags. Otherwise, only retain flags present on both I and Inst.
- // TODO: Currently some fast-math flags are not treated as
- // poison-generating even though they should. Until this is fixed,
- // always retain flags present on both I and Inst for floating point
- // instructions.
- if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
- I->andIRFlags(&Inst);
- }
+ combineIRFlags(Inst, V);
Inst.replaceAllUsesWith(V);
salvageKnowledge(&Inst, &AC);
removeMSSA(Inst);
@@ -1561,6 +1647,31 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // Compare GEP instructions based on offset.
+ if (GEPValue::canHandle(&Inst)) {
+ auto *GEP = cast<GetElementPtrInst>(&Inst);
+ APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+ GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset)
+ ? Offset.trySExtValue()
+ : std::nullopt);
+ if (Value *V = AvailableGEPs.lookup(GEPVal)) {
+ LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V
+ << '\n');
+ combineIRFlags(Inst, V);
+ Inst.replaceAllUsesWith(V);
+ salvageKnowledge(&Inst, &AC);
+ removeMSSA(Inst);
+ Inst.eraseFromParent();
+ Changed = true;
+ ++NumCSEGEP;
+ continue;
+ }
+
+ // Otherwise, just remember that we have this GEP.
+ AvailableGEPs.insert(GEPVal, &Inst);
+ continue;
+ }
+
// A release fence requires that all stores complete before it, but does
// not prevent the reordering of following loads 'before' the fence. As a
// result, we don't need to consider it as writing to memory and don't need
@@ -1675,7 +1786,7 @@ bool EarlyCSE::run() {
// Process the root node.
nodesToProcess.push_back(new StackNode(
AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
- CurrentGeneration, DT.getRootNode(),
+ AvailableGEPs, CurrentGeneration, DT.getRootNode(),
DT.getRootNode()->begin(), DT.getRootNode()->end()));
assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
@@ -1698,10 +1809,10 @@ bool EarlyCSE::run() {
} else if (NodeToProcess->childIter() != NodeToProcess->end()) {
// Push the next child onto the stack.
DomTreeNode *child = NodeToProcess->nextChild();
- nodesToProcess.push_back(
- new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
- AvailableCalls, NodeToProcess->childGeneration(),
- child, child->begin(), child->end()));
+ nodesToProcess.push_back(new StackNode(
+ AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
+ AvailableGEPs, NodeToProcess->childGeneration(), child,
+ child->begin(), child->end()));
} else {
// It has been processed, and there are no more children to process,
// so delete it and pop it off the stack.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
index 1ede4e7932af..5e58af0edc15 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -760,7 +760,7 @@ PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
auto &AA = AM.getResult<AAManager>(F);
auto *MemDep =
isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(F) : nullptr;
- auto *LI = AM.getCachedResult<LoopAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
@@ -772,8 +772,7 @@ PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<TargetLibraryAnalysis>();
if (MSSA)
PA.preserve<MemorySSAAnalysis>();
- if (LI)
- PA.preserve<LoopAnalysis>();
+ PA.preserve<LoopAnalysis>();
return PA;
}
@@ -1152,13 +1151,11 @@ static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy,
BasicBlock *FromBB = From->getParent();
BatchAAResults BatchAA(*AA);
for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
- for (auto I = BB == FromBB ? From->getReverseIterator() : BB->rbegin(),
- E = BB->rend();
- I != E; ++I) {
+ for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
+ Inst != nullptr; Inst = Inst->getPrevNonDebugInstruction()) {
// Stop the search if limit is reached.
if (++NumVisitedInsts > MaxNumVisitedInsts)
return nullptr;
- Instruction *Inst = &*I;
if (isModSet(BatchAA.getModRefInfo(Inst, Loc)))
return nullptr;
if (auto *LI = dyn_cast<LoadInst>(Inst))
@@ -1373,7 +1370,7 @@ LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
LoadInst *Load) {
// For simplicity we handle a Pred has 2 successors only.
auto *Term = Pred->getTerminator();
- if (Term->getNumSuccessors() != 2 || Term->isExceptionalTerminator())
+ if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
return nullptr;
auto *SuccBB = Term->getSuccessor(0);
if (SuccBB == LoadBB)
@@ -1421,16 +1418,8 @@ void GVNPass::eliminatePartiallyRedundantLoad(
Load->getSyncScopeID(), UnavailableBlock->getTerminator());
NewLoad->setDebugLoc(Load->getDebugLoc());
if (MSSAU) {
- auto *MSSA = MSSAU->getMemorySSA();
- // Get the defining access of the original load or use the load if it is a
- // MemoryDef (e.g. because it is volatile). The inserted loads are
- // guaranteed to load from the same definition.
- auto *LoadAcc = MSSA->getMemoryAccess(Load);
- auto *DefiningAcc =
- isa<MemoryDef>(LoadAcc) ? LoadAcc : LoadAcc->getDefiningAccess();
auto *NewAccess = MSSAU->createMemoryAccessInBB(
- NewLoad, DefiningAcc, NewLoad->getParent(),
- MemorySSA::BeforeTerminator);
+ NewLoad, nullptr, NewLoad->getParent(), MemorySSA::BeforeTerminator);
if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
MSSAU->insertDef(NewDef, /*RenameUses=*/true);
else
@@ -1449,8 +1438,7 @@ void GVNPass::eliminatePartiallyRedundantLoad(
if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range))
NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group))
- if (LI &&
- LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock))
+ if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock))
NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD);
// We do not propagate the old load's debug location, because the new
@@ -1487,6 +1475,7 @@ void GVNPass::eliminatePartiallyRedundantLoad(
// Perform PHI construction.
Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
// ConstructSSAForLoadSet is responsible for combining metadata.
+ ICF->removeUsersOf(Load);
Load->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(Load);
@@ -1757,9 +1746,6 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
bool GVNPass::performLoopLoadPRE(LoadInst *Load,
AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks) {
- if (!LI)
- return false;
-
const Loop *L = LI->getLoopFor(Load->getParent());
// TODO: Generalize to other loop blocks that dominate the latch.
if (!L || L->getHeader() != Load->getParent())
@@ -1906,6 +1892,7 @@ bool GVNPass::processNonLocalLoad(LoadInst *Load) {
// Perform PHI construction.
Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
// ConstructSSAForLoadSet is responsible for combining metadata.
+ ICF->removeUsersOf(Load);
Load->replaceAllUsesWith(V);
if (isa<PHINode>(V))
@@ -1927,7 +1914,7 @@ bool GVNPass::processNonLocalLoad(LoadInst *Load) {
// Step 4: Eliminate partial redundancy.
if (!isPREEnabled() || !isLoadPREEnabled())
return Changed;
- if (!isLoadInLoopPREEnabled() && LI && LI->getLoopFor(Load->getParent()))
+ if (!isLoadInLoopPREEnabled() && LI->getLoopFor(Load->getParent()))
return Changed;
if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
@@ -2003,12 +1990,12 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
if (Cond->isZero()) {
Type *Int8Ty = Type::getInt8Ty(V->getContext());
+ Type *PtrTy = PointerType::get(V->getContext(), 0);
// Insert a new store to null instruction before the load to indicate that
// this code is not reachable. FIXME: We could insert unreachable
// instruction directly because we can modify the CFG.
auto *NewS = new StoreInst(PoisonValue::get(Int8Ty),
- Constant::getNullValue(Int8Ty->getPointerTo()),
- IntrinsicI);
+ Constant::getNullValue(PtrTy), IntrinsicI);
if (MSSAU) {
const MemoryUseOrDef *FirstNonDom = nullptr;
const auto *AL =
@@ -2028,14 +2015,12 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
}
}
- // This added store is to null, so it will never executed and we can
- // just use the LiveOnEntry def as defining access.
auto *NewDef =
FirstNonDom ? MSSAU->createMemoryAccessBefore(
- NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(),
+ NewS, nullptr,
const_cast<MemoryUseOrDef *>(FirstNonDom))
: MSSAU->createMemoryAccessInBB(
- NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(),
+ NewS, nullptr,
NewS->getParent(), MemorySSA::BeforeTerminator);
MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
@@ -2182,6 +2167,7 @@ bool GVNPass::processLoad(LoadInst *L) {
Value *AvailableValue = AV->MaterializeAdjustedValue(L, L, *this);
// MaterializeAdjustedValue is responsible for combining metadata.
+ ICF->removeUsersOf(L);
L->replaceAllUsesWith(AvailableValue);
markInstructionForDeletion(L);
if (MSSAU)
@@ -2700,7 +2686,7 @@ bool GVNPass::processInstruction(Instruction *I) {
/// runOnFunction - This is the main transformation entry point for a function.
bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
const TargetLibraryInfo &RunTLI, AAResults &RunAA,
- MemoryDependenceResults *RunMD, LoopInfo *LI,
+ MemoryDependenceResults *RunMD, LoopInfo &LI,
OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
AC = &RunAC;
DT = &RunDT;
@@ -2710,7 +2696,7 @@ bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
MD = RunMD;
ImplicitControlFlowTracking ImplicitCFT;
ICF = &ImplicitCFT;
- this->LI = LI;
+ this->LI = &LI;
VN.setMemDep(MD);
ORE = RunORE;
InvalidBlockRPONumbers = true;
@@ -2724,7 +2710,7 @@ bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
- bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, LI, MSSAU, MD);
+ bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, &LI, MSSAU, MD);
if (removedBlock)
++NumGVNBlocks;
@@ -3007,9 +2993,9 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
++NumGVNPRE;
// Create a PHI to make the value available in this block.
- PHINode *Phi =
- PHINode::Create(CurInst->getType(), predMap.size(),
- CurInst->getName() + ".pre-phi", &CurrentBlock->front());
+ PHINode *Phi = PHINode::Create(CurInst->getType(), predMap.size(),
+ CurInst->getName() + ".pre-phi");
+ Phi->insertBefore(CurrentBlock->begin());
for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
if (Value *V = predMap[i].first) {
// If we use an existing value in this phi, we have to patch the original
@@ -3300,8 +3286,6 @@ public:
if (skipFunction(F))
return false;
- auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
-
auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
return Impl.runImpl(
F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
@@ -3311,7 +3295,7 @@ public:
Impl.isMemDepEnabled()
? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
: nullptr,
- LIWP ? &LIWP->getLoopInfo() : nullptr,
+ getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
MSSAWP ? &MSSAWP->getMSSA() : nullptr);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 26a6978656e6..2b38831139a5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -850,8 +850,9 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
// Create a new PHI in the successor block and populate it.
auto *Op = I0->getOperand(O);
assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
- auto *PN = PHINode::Create(Op->getType(), Insts.size(),
- Op->getName() + ".sink", &BBEnd->front());
+ auto *PN =
+ PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
+ PN->insertBefore(BBEnd->begin());
for (auto *I : Insts)
PN->addIncoming(I->getOperand(O), I->getParent());
NewOperands.push_back(PN);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 62b40a23e38c..3bbf6642a90c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -45,16 +45,14 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
@@ -123,12 +121,12 @@ static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
/// condition should stay invariant. Otherwise there can be a miscompile, like
/// the one described at https://github.com/llvm/llvm-project/issues/60234. The
/// safest way to do it is to expand the new condition at WC's block.
-static Instruction *findInsertionPointForWideCondition(Instruction *Guard) {
- Value *Condition, *WC;
- BasicBlock *IfTrue, *IfFalse;
- if (parseWidenableBranch(Guard, Condition, WC, IfTrue, IfFalse))
+static Instruction *findInsertionPointForWideCondition(Instruction *WCOrGuard) {
+ if (isGuard(WCOrGuard))
+ return WCOrGuard;
+ if (auto WC = extractWidenableCondition(WCOrGuard))
return cast<Instruction>(WC);
- return Guard;
+ return nullptr;
}
class GuardWideningImpl {
@@ -157,8 +155,8 @@ class GuardWideningImpl {
/// maps BasicBlocks to the set of guards seen in that block.
bool eliminateInstrViaWidening(
Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
- const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
- GuardsPerBlock, bool InvertCondition = false);
+ const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>>
+ &GuardsPerBlock);
/// Used to keep track of which widening potential is more effective.
enum WideningScore {
@@ -181,11 +179,12 @@ class GuardWideningImpl {
static StringRef scoreTypeToString(WideningScore WS);
/// Compute the score for widening the condition in \p DominatedInstr
- /// into \p DominatingGuard. If \p InvertCond is set, then we widen the
- /// inverted condition of the dominating guard.
+ /// into \p WideningPoint.
WideningScore computeWideningScore(Instruction *DominatedInstr,
- Instruction *DominatingGuard,
- bool InvertCond);
+ Instruction *ToWiden,
+ Instruction *WideningPoint,
+ SmallVectorImpl<Value *> &ChecksToHoist,
+ SmallVectorImpl<Value *> &ChecksToWiden);
/// Helper to check if \p V can be hoisted to \p InsertPos.
bool canBeHoistedTo(const Value *V, const Instruction *InsertPos) const {
@@ -196,19 +195,36 @@ class GuardWideningImpl {
bool canBeHoistedTo(const Value *V, const Instruction *InsertPos,
SmallPtrSetImpl<const Instruction *> &Visited) const;
+ bool canBeHoistedTo(const SmallVectorImpl<Value *> &Checks,
+ const Instruction *InsertPos) const {
+ return all_of(Checks,
+ [&](const Value *V) { return canBeHoistedTo(V, InsertPos); });
+ }
/// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
/// canBeHoistedTo returned true.
void makeAvailableAt(Value *V, Instruction *InsertPos) const;
+ void makeAvailableAt(const SmallVectorImpl<Value *> &Checks,
+ Instruction *InsertPos) const {
+ for (Value *V : Checks)
+ makeAvailableAt(V, InsertPos);
+ }
+
/// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
- /// to generate an expression computing the logical AND of \p Cond0 and (\p
- /// Cond1 XOR \p InvertCondition).
- /// Return true if the expression computing the AND is only as
- /// expensive as computing one of the two. If \p InsertPt is true then
- /// actually generate the resulting expression, make it available at \p
- /// InsertPt and return it in \p Result (else no change to the IR is made).
- bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
- Value *&Result, bool InvertCondition);
+ /// to generate an expression computing the logical AND of \p ChecksToHoist
+ /// and \p ChecksToWiden. Return true if the expression computing the AND is
+ /// only as expensive as computing one of the set of expressions. If \p
+ /// InsertPt is true then actually generate the resulting expression, make it
+ /// available at \p InsertPt and return it in \p Result (else no change to the
+ /// IR is made).
+ std::optional<Value *> mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,
+ SmallVectorImpl<Value *> &ChecksToWiden,
+ Instruction *InsertPt);
+
+ /// Generate the logical AND of \p ChecksToHoist and \p OldCondition and make
+ /// it available at InsertPt
+ Value *hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist,
+ Value *OldCondition, Instruction *InsertPt);
/// Adds freeze to Orig and push it as far as possible very aggressively.
/// Also replaces all uses of frozen instruction with frozen version.
@@ -253,16 +269,19 @@ class GuardWideningImpl {
}
};
- /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and
+ /// Parse \p ToParse into a conjunction (logical-and) of range checks; and
/// append them to \p Checks. Returns true on success, may clobber \c Checks
/// on failure.
- bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) {
- SmallPtrSet<const Value *, 8> Visited;
- return parseRangeChecks(CheckCond, Checks, Visited);
+ bool parseRangeChecks(SmallVectorImpl<Value *> &ToParse,
+ SmallVectorImpl<RangeCheck> &Checks) {
+ for (auto CheckCond : ToParse) {
+ if (!parseRangeChecks(CheckCond, Checks))
+ return false;
+ }
+ return true;
}
- bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks,
- SmallPtrSetImpl<const Value *> &Visited);
+ bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks);
/// Combine the checks in \p Checks into a smaller set of checks and append
/// them into \p CombinedChecks. Return true on success (i.e. all of checks
@@ -271,23 +290,24 @@ class GuardWideningImpl {
bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,
SmallVectorImpl<RangeCheck> &CombinedChecks) const;
- /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
- /// computing only one of the two expressions?
- bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) {
- Value *ResultUnused;
- return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused,
- InvertCond);
+ /// Can we compute the logical AND of \p ChecksToHoist and \p ChecksToWiden
+ /// for the price of computing only one of the set of expressions?
+ bool isWideningCondProfitable(SmallVectorImpl<Value *> &ChecksToHoist,
+ SmallVectorImpl<Value *> &ChecksToWiden) {
+ return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/nullptr)
+ .has_value();
}
- /// If \p InvertCondition is false, Widen \p ToWiden to fail if
- /// \p NewCondition is false, otherwise make it fail if \p NewCondition is
- /// true (in addition to whatever it is already checking).
- void widenGuard(Instruction *ToWiden, Value *NewCondition,
- bool InvertCondition) {
- Value *Result;
+ /// Widen \p ChecksToWiden to fail if any of \p ChecksToHoist is false
+ void widenGuard(SmallVectorImpl<Value *> &ChecksToHoist,
+ SmallVectorImpl<Value *> &ChecksToWiden,
+ Instruction *ToWiden) {
Instruction *InsertPt = findInsertionPointForWideCondition(ToWiden);
- widenCondCommon(getCondition(ToWiden), NewCondition, InsertPt, Result,
- InvertCondition);
+ auto MergedCheck = mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt);
+ Value *Result = MergedCheck ? *MergedCheck
+ : hoistChecks(ChecksToHoist,
+ getCondition(ToWiden), InsertPt);
+
if (isGuardAsWidenableBranch(ToWiden)) {
setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);
return;
@@ -353,12 +373,15 @@ bool GuardWideningImpl::run() {
bool GuardWideningImpl::eliminateInstrViaWidening(
Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
- const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
- GuardsInBlock, bool InvertCondition) {
+ const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>>
+ &GuardsInBlock) {
+ SmallVector<Value *> ChecksToHoist;
+ parseWidenableGuard(Instr, ChecksToHoist);
// Ignore trivial true or false conditions. These instructions will be
// trivially eliminated by any cleanup pass. Do not erase them because other
// guards can possibly be widened into them.
- if (isa<ConstantInt>(getCondition(Instr)))
+ if (ChecksToHoist.empty() ||
+ (ChecksToHoist.size() == 1 && isa<ConstantInt>(ChecksToHoist.front())))
return false;
Instruction *BestSoFar = nullptr;
@@ -394,10 +417,15 @@ bool GuardWideningImpl::eliminateInstrViaWidening(
assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?");
for (auto *Candidate : make_range(I, E)) {
- auto Score = computeWideningScore(Instr, Candidate, InvertCondition);
- LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr)
- << " and " << *getCondition(Candidate) << " is "
- << scoreTypeToString(Score) << "\n");
+ auto *WideningPoint = findInsertionPointForWideCondition(Candidate);
+ if (!WideningPoint)
+ continue;
+ SmallVector<Value *> CandidateChecks;
+ parseWidenableGuard(Candidate, CandidateChecks);
+ auto Score = computeWideningScore(Instr, Candidate, WideningPoint,
+ ChecksToHoist, CandidateChecks);
+ LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate
+ << " is " << scoreTypeToString(Score) << "\n");
if (Score > BestScoreSoFar) {
BestScoreSoFar = Score;
BestSoFar = Candidate;
@@ -416,22 +444,22 @@ bool GuardWideningImpl::eliminateInstrViaWidening(
LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar
<< " with score " << scoreTypeToString(BestScoreSoFar)
<< "\n");
- widenGuard(BestSoFar, getCondition(Instr), InvertCondition);
- auto NewGuardCondition = InvertCondition
- ? ConstantInt::getFalse(Instr->getContext())
- : ConstantInt::getTrue(Instr->getContext());
+ SmallVector<Value *> ChecksToWiden;
+ parseWidenableGuard(BestSoFar, ChecksToWiden);
+ widenGuard(ChecksToHoist, ChecksToWiden, BestSoFar);
+ auto NewGuardCondition = ConstantInt::getTrue(Instr->getContext());
setCondition(Instr, NewGuardCondition);
EliminatedGuardsAndBranches.push_back(Instr);
WidenedGuards.insert(BestSoFar);
return true;
}
-GuardWideningImpl::WideningScore
-GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
- Instruction *DominatingGuard,
- bool InvertCond) {
+GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
+ Instruction *DominatedInstr, Instruction *ToWiden,
+ Instruction *WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist,
+ SmallVectorImpl<Value *> &ChecksToWiden) {
Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent());
- Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent());
+ Loop *DominatingGuardLoop = LI.getLoopFor(WideningPoint->getParent());
bool HoistingOutOfLoop = false;
if (DominatingGuardLoop != DominatedInstrLoop) {
@@ -444,10 +472,12 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
HoistingOutOfLoop = true;
}
- auto *WideningPoint = findInsertionPointForWideCondition(DominatingGuard);
- if (!canBeHoistedTo(getCondition(DominatedInstr), WideningPoint))
+ if (!canBeHoistedTo(ChecksToHoist, WideningPoint))
return WS_IllegalOrNegative;
- if (!canBeHoistedTo(getCondition(DominatingGuard), WideningPoint))
+ // Further in the GuardWideningImpl::hoistChecks the entire condition might be
+ // widened, not the parsed list of checks. So we need to check the possibility
+ // of that condition hoisting.
+ if (!canBeHoistedTo(getCondition(ToWiden), WideningPoint))
return WS_IllegalOrNegative;
// If the guard was conditional executed, it may never be reached
@@ -458,8 +488,7 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
// here. TODO: evaluate cost model for spurious deopt
// NOTE: As written, this also lets us hoist right over another guard which
// is essentially just another spelling for control flow.
- if (isWideningCondProfitable(getCondition(DominatedInstr),
- getCondition(DominatingGuard), InvertCond))
+ if (isWideningCondProfitable(ChecksToHoist, ChecksToWiden))
return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
if (HoistingOutOfLoop)
@@ -495,7 +524,7 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
// control flow (guards, calls which throw, etc...). That choice appears
// arbitrary (we assume that implicit control flow exits are all rare).
auto MaybeHoistingToHotterBlock = [&]() {
- const auto *DominatingBlock = DominatingGuard->getParent();
+ const auto *DominatingBlock = WideningPoint->getParent();
const auto *DominatedBlock = DominatedInstr->getParent();
// Descend as low as we can, always taking the likely successor.
@@ -521,7 +550,8 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
if (!DT.dominates(DominatingBlock, DominatedBlock))
return true;
// TODO: diamond, triangle cases
- if (!PDT) return true;
+ if (!PDT)
+ return true;
return !PDT->dominates(DominatedBlock, DominatingBlock);
};
@@ -566,35 +596,47 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
}
// Return Instruction before which we can insert freeze for the value V as close
-// to def as possible. If there is no place to add freeze, return nullptr.
-static Instruction *getFreezeInsertPt(Value *V, const DominatorTree &DT) {
+// to def as possible. If there is no place to add freeze, return empty.
+static std::optional<BasicBlock::iterator>
+getFreezeInsertPt(Value *V, const DominatorTree &DT) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
- return &*DT.getRoot()->getFirstNonPHIOrDbgOrAlloca();
+ return DT.getRoot()->getFirstNonPHIOrDbgOrAlloca()->getIterator();
- auto *Res = I->getInsertionPointAfterDef();
+ std::optional<BasicBlock::iterator> Res = I->getInsertionPointAfterDef();
// If there is no place to add freeze - return nullptr.
- if (!Res || !DT.dominates(I, Res))
- return nullptr;
+ if (!Res || !DT.dominates(I, &**Res))
+ return std::nullopt;
+
+ Instruction *ResInst = &**Res;
// If there is a User dominated by original I, then it should be dominated
// by Freeze instruction as well.
if (any_of(I->users(), [&](User *U) {
Instruction *User = cast<Instruction>(U);
- return Res != User && DT.dominates(I, User) && !DT.dominates(Res, User);
+ return ResInst != User && DT.dominates(I, User) &&
+ !DT.dominates(ResInst, User);
}))
- return nullptr;
+ return std::nullopt;
return Res;
}
Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
if (isGuaranteedNotToBePoison(Orig, nullptr, InsertPt, &DT))
return Orig;
- Instruction *InsertPtAtDef = getFreezeInsertPt(Orig, DT);
- if (!InsertPtAtDef)
- return new FreezeInst(Orig, "gw.freeze", InsertPt);
- if (isa<Constant>(Orig) || isa<GlobalValue>(Orig))
- return new FreezeInst(Orig, "gw.freeze", InsertPtAtDef);
+ std::optional<BasicBlock::iterator> InsertPtAtDef =
+ getFreezeInsertPt(Orig, DT);
+ if (!InsertPtAtDef) {
+ FreezeInst *FI = new FreezeInst(Orig, "gw.freeze");
+ FI->insertBefore(InsertPt);
+ return FI;
+ }
+ if (isa<Constant>(Orig) || isa<GlobalValue>(Orig)) {
+ BasicBlock::iterator InsertPt = *InsertPtAtDef;
+ FreezeInst *FI = new FreezeInst(Orig, "gw.freeze");
+ FI->insertBefore(*InsertPt->getParent(), InsertPt);
+ return FI;
+ }
SmallSet<Value *, 16> Visited;
SmallVector<Value *, 16> Worklist;
@@ -613,8 +655,10 @@ Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
if (Visited.insert(Def).second) {
if (isGuaranteedNotToBePoison(Def, nullptr, InsertPt, &DT))
return true;
- CacheOfFreezes[Def] = new FreezeInst(Def, Def->getName() + ".gw.fr",
- getFreezeInsertPt(Def, DT));
+ BasicBlock::iterator InsertPt = *getFreezeInsertPt(Def, DT);
+ FreezeInst *FI = new FreezeInst(Def, Def->getName() + ".gw.fr");
+ FI->insertBefore(*InsertPt->getParent(), InsertPt);
+ CacheOfFreezes[Def] = FI;
}
if (CacheOfFreezes.count(Def))
@@ -655,8 +699,9 @@ Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
Value *Result = Orig;
for (Value *V : NeedFreeze) {
- auto *FreezeInsertPt = getFreezeInsertPt(V, DT);
- FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr", FreezeInsertPt);
+ BasicBlock::iterator FreezeInsertPt = *getFreezeInsertPt(V, DT);
+ FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr");
+ FI->insertBefore(*FreezeInsertPt->getParent(), FreezeInsertPt);
++FreezeAdded;
if (V == Orig)
Result = FI;
@@ -667,20 +712,25 @@ Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
return Result;
}
-bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
- Instruction *InsertPt, Value *&Result,
- bool InvertCondition) {
+std::optional<Value *>
+GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,
+ SmallVectorImpl<Value *> &ChecksToWiden,
+ Instruction *InsertPt) {
using namespace llvm::PatternMatch;
+ Value *Result = nullptr;
{
// L >u C0 && L >u C1 -> L >u max(C0, C1)
ConstantInt *RHS0, *RHS1;
Value *LHS;
ICmpInst::Predicate Pred0, Pred1;
- if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&
- match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {
- if (InvertCondition)
- Pred1 = ICmpInst::getInversePredicate(Pred1);
+ // TODO: Support searching for pairs to merge from both whole lists of
+ // ChecksToHoist and ChecksToWiden.
+ if (ChecksToWiden.size() == 1 && ChecksToHoist.size() == 1 &&
+ match(ChecksToWiden.front(),
+ m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&
+ match(ChecksToHoist.front(),
+ m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {
ConstantRange CR0 =
ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue());
@@ -697,12 +747,12 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) {
if (InsertPt) {
ConstantInt *NewRHS =
- ConstantInt::get(Cond0->getContext(), NewRHSAP);
+ ConstantInt::get(InsertPt->getContext(), NewRHSAP);
assert(canBeHoistedTo(LHS, InsertPt) && "must be");
makeAvailableAt(LHS, InsertPt);
Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
}
- return true;
+ return Result;
}
}
}
@@ -710,12 +760,10 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
{
SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks;
- // TODO: Support InvertCondition case?
- if (!InvertCondition &&
- parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
+ if (parseRangeChecks(ChecksToWiden, Checks) &&
+ parseRangeChecks(ChecksToHoist, Checks) &&
combineRangeChecks(Checks, CombinedChecks)) {
if (InsertPt) {
- Result = nullptr;
for (auto &RC : CombinedChecks) {
makeAvailableAt(RC.getCheckInst(), InsertPt);
if (Result)
@@ -728,40 +776,32 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
Result->setName("wide.chk");
Result = freezeAndPush(Result, InsertPt);
}
- return true;
+ return Result;
}
}
+ // We were not able to compute ChecksToHoist AND ChecksToWiden for the price
+ // of one.
+ return std::nullopt;
+}
- // Base case -- just logical-and the two conditions together.
-
- if (InsertPt) {
- makeAvailableAt(Cond0, InsertPt);
- makeAvailableAt(Cond1, InsertPt);
- if (InvertCondition)
- Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt);
- Cond1 = freezeAndPush(Cond1, InsertPt);
- Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
- }
-
- // We were not able to compute Cond0 AND Cond1 for the price of one.
- return false;
+Value *GuardWideningImpl::hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist,
+ Value *OldCondition,
+ Instruction *InsertPt) {
+ assert(!ChecksToHoist.empty());
+ IRBuilder<> Builder(InsertPt);
+ makeAvailableAt(ChecksToHoist, InsertPt);
+ makeAvailableAt(OldCondition, InsertPt);
+ Value *Result = Builder.CreateAnd(ChecksToHoist);
+ Result = freezeAndPush(Result, InsertPt);
+ Result = Builder.CreateAnd(OldCondition, Result);
+ Result->setName("wide.chk");
+ return Result;
}
bool GuardWideningImpl::parseRangeChecks(
- Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
- SmallPtrSetImpl<const Value *> &Visited) {
- if (!Visited.insert(CheckCond).second)
- return true;
-
+ Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks) {
using namespace llvm::PatternMatch;
- {
- Value *AndLHS, *AndRHS;
- if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS))))
- return parseRangeChecks(AndLHS, Checks) &&
- parseRangeChecks(AndRHS, Checks);
- }
-
auto *IC = dyn_cast<ICmpInst>(CheckCond);
if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() ||
(IC->getPredicate() != ICmpInst::ICMP_ULT &&
@@ -934,6 +974,15 @@ StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) {
PreservedAnalyses GuardWideningPass::run(Function &F,
FunctionAnalysisManager &AM) {
+ // Avoid requesting analyses if there are no guards or widenable conditions.
+ auto *GuardDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();
+ auto *WCDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_widenable_condition));
+ bool HasWidenableConditions = WCDecl && !WCDecl->use_empty();
+ if (!HasIntrinsicGuards && !HasWidenableConditions)
+ return PreservedAnalyses::all();
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
@@ -976,109 +1025,3 @@ PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<MemorySSAAnalysis>();
return PA;
}
-
-namespace {
-struct GuardWideningLegacyPass : public FunctionPass {
- static char ID;
-
- GuardWideningLegacyPass() : FunctionPass(ID) {
- initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- if (MSSAWP)
- MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
- return GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr,
- DT.getRootNode(),
- [](BasicBlock *) { return true; })
- .run();
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
-};
-
-/// Same as above, but restricted to a single loop at a time. Can be
-/// scheduled with other loop passes w/o breaking out of LPM
-struct LoopGuardWideningLegacyPass : public LoopPass {
- static char ID;
-
- LoopGuardWideningLegacyPass() : LoopPass(ID) {
- initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *L->getHeader()->getParent());
- auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
- auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
- auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- if (MSSAWP)
- MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
-
- BasicBlock *RootBB = L->getLoopPredecessor();
- if (!RootBB)
- RootBB = L->getHeader();
- auto BlockFilter = [&](BasicBlock *BB) {
- return BB == RootBB || L->contains(BB);
- };
- return GuardWideningImpl(DT, PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr,
- DT.getNode(RootBB), BlockFilter)
- .run();
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- getLoopAnalysisUsage(AU);
- AU.addPreserved<PostDominatorTreeWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
-};
-}
-
-char GuardWideningLegacyPass::ID = 0;
-char LoopGuardWideningLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
- false, false)
-
-INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening",
- "Widen guards (within a single loop, as a loop pass)",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening",
- "Widen guards (within a single loop, as a loop pass)",
- false, false)
-
-FunctionPass *llvm::createGuardWideningPass() {
- return new GuardWideningLegacyPass();
-}
-
-Pass *llvm::createLoopGuardWideningPass() {
- return new LoopGuardWideningLegacyPass();
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 40475d9563b2..41c4d6236173 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1997,20 +1997,12 @@ bool IndVarSimplify::run(Loop *L) {
TTI, PreHeader->getTerminator()))
continue;
- // Check preconditions for proper SCEVExpander operation. SCEV does not
- // express SCEVExpander's dependencies, such as LoopSimplify. Instead
- // any pass that uses the SCEVExpander must do it. This does not work
- // well for loop passes because SCEVExpander makes assumptions about
- // all loops, while LoopPassManager only forces the current loop to be
- // simplified.
- //
- // FIXME: SCEV expansion has no way to bail out, so the caller must
- // explicitly check any assumptions made by SCEV. Brittle.
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
- if (!AR || AR->getLoop()->getLoopPreheader())
- Changed |= linearFunctionTestReplace(L, ExitingBB,
- ExitCount, IndVar,
- Rewriter);
+ if (!Rewriter.isSafeToExpand(ExitCount))
+ continue;
+
+ Changed |= linearFunctionTestReplace(L, ExitingBB,
+ ExitCount, IndVar,
+ Rewriter);
}
}
// Clear the rewriter cache, because values that are in the rewriter's cache
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index b52589baeee7..9df28747570c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -81,6 +81,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopConstrainer.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -88,10 +89,8 @@
#include <algorithm>
#include <cassert>
#include <iterator>
-#include <limits>
#include <optional>
#include <utility>
-#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -129,8 +128,6 @@ static cl::opt<bool>
PrintScaledBoundaryRangeChecks("irce-print-scaled-boundary-range-checks",
cl::Hidden, cl::init(false));
-static const char *ClonedLoopTag = "irce.loop.clone";
-
#define DEBUG_TYPE "irce"
namespace {
@@ -241,8 +238,6 @@ public:
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
};
-struct LoopStructure;
-
class InductiveRangeCheckElimination {
ScalarEvolution &SE;
BranchProbabilityInfo *BPI;
@@ -554,649 +549,6 @@ void InductiveRangeCheck::extractRangeChecksFromBranch(
Checks, Visited);
}
-// Add metadata to the loop L to disable loop optimizations. Callers need to
-// confirm that optimizing loop L is not beneficial.
-static void DisableAllLoopOptsOnLoop(Loop &L) {
- // We do not care about any existing loopID related metadata for L, since we
- // are setting all loop metadata to false.
- LLVMContext &Context = L.getHeader()->getContext();
- // Reserve first location for self reference to the LoopID metadata node.
- MDNode *Dummy = MDNode::get(Context, {});
- MDNode *DisableUnroll = MDNode::get(
- Context, {MDString::get(Context, "llvm.loop.unroll.disable")});
- Metadata *FalseVal =
- ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context), 0));
- MDNode *DisableVectorize = MDNode::get(
- Context,
- {MDString::get(Context, "llvm.loop.vectorize.enable"), FalseVal});
- MDNode *DisableLICMVersioning = MDNode::get(
- Context, {MDString::get(Context, "llvm.loop.licm_versioning.disable")});
- MDNode *DisableDistribution= MDNode::get(
- Context,
- {MDString::get(Context, "llvm.loop.distribute.enable"), FalseVal});
- MDNode *NewLoopID =
- MDNode::get(Context, {Dummy, DisableUnroll, DisableVectorize,
- DisableLICMVersioning, DisableDistribution});
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
- L.setLoopID(NewLoopID);
-}
-
-namespace {
-
-// Keeps track of the structure of a loop. This is similar to llvm::Loop,
-// except that it is more lightweight and can track the state of a loop through
-// changing and potentially invalid IR. This structure also formalizes the
-// kinds of loops we can deal with -- ones that have a single latch that is also
-// an exiting block *and* have a canonical induction variable.
-struct LoopStructure {
- const char *Tag = "";
-
- BasicBlock *Header = nullptr;
- BasicBlock *Latch = nullptr;
-
- // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
- // successor is `LatchExit', the exit block of the loop.
- BranchInst *LatchBr = nullptr;
- BasicBlock *LatchExit = nullptr;
- unsigned LatchBrExitIdx = std::numeric_limits<unsigned>::max();
-
- // The loop represented by this instance of LoopStructure is semantically
- // equivalent to:
- //
- // intN_ty inc = IndVarIncreasing ? 1 : -1;
- // pred_ty predicate = IndVarIncreasing ? ICMP_SLT : ICMP_SGT;
- //
- // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarBase)
- // ... body ...
-
- Value *IndVarBase = nullptr;
- Value *IndVarStart = nullptr;
- Value *IndVarStep = nullptr;
- Value *LoopExitAt = nullptr;
- bool IndVarIncreasing = false;
- bool IsSignedPredicate = true;
-
- LoopStructure() = default;
-
- template <typename M> LoopStructure map(M Map) const {
- LoopStructure Result;
- Result.Tag = Tag;
- Result.Header = cast<BasicBlock>(Map(Header));
- Result.Latch = cast<BasicBlock>(Map(Latch));
- Result.LatchBr = cast<BranchInst>(Map(LatchBr));
- Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
- Result.LatchBrExitIdx = LatchBrExitIdx;
- Result.IndVarBase = Map(IndVarBase);
- Result.IndVarStart = Map(IndVarStart);
- Result.IndVarStep = Map(IndVarStep);
- Result.LoopExitAt = Map(LoopExitAt);
- Result.IndVarIncreasing = IndVarIncreasing;
- Result.IsSignedPredicate = IsSignedPredicate;
- return Result;
- }
-
- static std::optional<LoopStructure> parseLoopStructure(ScalarEvolution &,
- Loop &, const char *&);
-};
-
-/// This class is used to constrain loops to run within a given iteration space.
-/// The algorithm this class implements is given a Loop and a range [Begin,
-/// End). The algorithm then tries to break out a "main loop" out of the loop
-/// it is given in a way that the "main loop" runs with the induction variable
-/// in a subset of [Begin, End). The algorithm emits appropriate pre and post
-/// loops to run any remaining iterations. The pre loop runs any iterations in
-/// which the induction variable is < Begin, and the post loop runs any
-/// iterations in which the induction variable is >= End.
-class LoopConstrainer {
- // The representation of a clone of the original loop we started out with.
- struct ClonedLoop {
- // The cloned blocks
- std::vector<BasicBlock *> Blocks;
-
- // `Map` maps values in the clonee into values in the cloned version
- ValueToValueMapTy Map;
-
- // An instance of `LoopStructure` for the cloned loop
- LoopStructure Structure;
- };
-
- // Result of rewriting the range of a loop. See changeIterationSpaceEnd for
- // more details on what these fields mean.
- struct RewrittenRangeInfo {
- BasicBlock *PseudoExit = nullptr;
- BasicBlock *ExitSelector = nullptr;
- std::vector<PHINode *> PHIValuesAtPseudoExit;
- PHINode *IndVarEnd = nullptr;
-
- RewrittenRangeInfo() = default;
- };
-
- // Calculated subranges we restrict the iteration space of the main loop to.
- // See the implementation of `calculateSubRanges' for more details on how
- // these fields are computed. `LowLimit` is std::nullopt if there is no
- // restriction on low end of the restricted iteration space of the main loop.
- // `HighLimit` is std::nullopt if there is no restriction on high end of the
- // restricted iteration space of the main loop.
-
- struct SubRanges {
- std::optional<const SCEV *> LowLimit;
- std::optional<const SCEV *> HighLimit;
- };
-
- // Compute a safe set of limits for the main loop to run in -- effectively the
- // intersection of `Range' and the iteration space of the original loop.
- // Return std::nullopt if unable to compute the set of subranges.
- std::optional<SubRanges> calculateSubRanges(bool IsSignedPredicate) const;
-
- // Clone `OriginalLoop' and return the result in CLResult. The IR after
- // running `cloneLoop' is well formed except for the PHI nodes in CLResult --
- // the PHI nodes say that there is an incoming edge from `OriginalPreheader`
- // but there is no such edge.
- void cloneLoop(ClonedLoop &CLResult, const char *Tag) const;
-
- // Create the appropriate loop structure needed to describe a cloned copy of
- // `Original`. The clone is described by `VM`.
- Loop *createClonedLoopStructure(Loop *Original, Loop *Parent,
- ValueToValueMapTy &VM, bool IsSubloop);
-
- // Rewrite the iteration space of the loop denoted by (LS, Preheader). The
- // iteration space of the rewritten loop ends at ExitLoopAt. The start of the
- // iteration space is not changed. `ExitLoopAt' is assumed to be slt
- // `OriginalHeaderCount'.
- //
- // If there are iterations left to execute, control is made to jump to
- // `ContinuationBlock', otherwise they take the normal loop exit. The
- // returned `RewrittenRangeInfo' object is populated as follows:
- //
- // .PseudoExit is a basic block that unconditionally branches to
- // `ContinuationBlock'.
- //
- // .ExitSelector is a basic block that decides, on exit from the loop,
- // whether to branch to the "true" exit or to `PseudoExit'.
- //
- // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value
- // for each PHINode in the loop header on taking the pseudo exit.
- //
- // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate
- // preheader because it is made to branch to the loop header only
- // conditionally.
- RewrittenRangeInfo
- changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader,
- Value *ExitLoopAt,
- BasicBlock *ContinuationBlock) const;
-
- // The loop denoted by `LS' has `OldPreheader' as its preheader. This
- // function creates a new preheader for `LS' and returns it.
- BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader,
- const char *Tag) const;
-
- // `ContinuationBlockAndPreheader' was the continuation block for some call to
- // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
- // This function rewrites the PHI nodes in `LS.Header' to start with the
- // correct value.
- void rewriteIncomingValuesForPHIs(
- LoopStructure &LS, BasicBlock *ContinuationBlockAndPreheader,
- const LoopConstrainer::RewrittenRangeInfo &RRI) const;
-
- // Even though we do not preserve any passes at this time, we at least need to
- // keep the parent loop structure consistent. The `LPPassManager' seems to
- // verify this after running a loop pass. This function adds the list of
- // blocks denoted by BBs to this loops parent loop if required.
- void addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs);
-
- // Some global state.
- Function &F;
- LLVMContext &Ctx;
- ScalarEvolution &SE;
- DominatorTree &DT;
- LoopInfo &LI;
- function_ref<void(Loop *, bool)> LPMAddNewLoop;
-
- // Information about the original loop we started out with.
- Loop &OriginalLoop;
-
- const IntegerType *ExitCountTy = nullptr;
- BasicBlock *OriginalPreheader = nullptr;
-
- // The preheader of the main loop. This may or may not be different from
- // `OriginalPreheader'.
- BasicBlock *MainLoopPreheader = nullptr;
-
- // The range we need to run the main loop in.
- InductiveRangeCheck::Range Range;
-
- // The structure of the main loop (see comment at the beginning of this class
- // for a definition)
- LoopStructure MainLoopStructure;
-
-public:
- LoopConstrainer(Loop &L, LoopInfo &LI,
- function_ref<void(Loop *, bool)> LPMAddNewLoop,
- const LoopStructure &LS, ScalarEvolution &SE,
- DominatorTree &DT, InductiveRangeCheck::Range R)
- : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()),
- SE(SE), DT(DT), LI(LI), LPMAddNewLoop(LPMAddNewLoop), OriginalLoop(L),
- Range(R), MainLoopStructure(LS) {}
-
- // Entry point for the algorithm. Returns true on success.
- bool run();
-};
-
-} // end anonymous namespace
-
-/// Given a loop with an deccreasing induction variable, is it possible to
-/// safely calculate the bounds of a new loop using the given Predicate.
-static bool isSafeDecreasingBound(const SCEV *Start,
- const SCEV *BoundSCEV, const SCEV *Step,
- ICmpInst::Predicate Pred,
- unsigned LatchBrExitIdx,
- Loop *L, ScalarEvolution &SE) {
- if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT &&
- Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT)
- return false;
-
- if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
- return false;
-
- assert(SE.isKnownNegative(Step) && "expecting negative step");
-
- LLVM_DEBUG(dbgs() << "irce: isSafeDecreasingBound with:\n");
- LLVM_DEBUG(dbgs() << "irce: Start: " << *Start << "\n");
- LLVM_DEBUG(dbgs() << "irce: Step: " << *Step << "\n");
- LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV << "\n");
- LLVM_DEBUG(dbgs() << "irce: Pred: " << Pred << "\n");
- LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n");
-
- bool IsSigned = ICmpInst::isSigned(Pred);
- // The predicate that we need to check that the induction variable lies
- // within bounds.
- ICmpInst::Predicate BoundPred =
- IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT;
-
- if (LatchBrExitIdx == 1)
- return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
-
- assert(LatchBrExitIdx == 0 &&
- "LatchBrExitIdx should be either 0 or 1");
-
- const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));
- unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
- APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) :
- APInt::getMinValue(BitWidth);
- const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne);
-
- const SCEV *MinusOne =
- SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType()));
-
- return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) &&
- SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit);
-
-}
-
-/// Given a loop with an increasing induction variable, is it possible to
-/// safely calculate the bounds of a new loop using the given Predicate.
-static bool isSafeIncreasingBound(const SCEV *Start,
- const SCEV *BoundSCEV, const SCEV *Step,
- ICmpInst::Predicate Pred,
- unsigned LatchBrExitIdx,
- Loop *L, ScalarEvolution &SE) {
- if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT &&
- Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT)
- return false;
-
- if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
- return false;
-
- LLVM_DEBUG(dbgs() << "irce: isSafeIncreasingBound with:\n");
- LLVM_DEBUG(dbgs() << "irce: Start: " << *Start << "\n");
- LLVM_DEBUG(dbgs() << "irce: Step: " << *Step << "\n");
- LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV << "\n");
- LLVM_DEBUG(dbgs() << "irce: Pred: " << Pred << "\n");
- LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n");
-
- bool IsSigned = ICmpInst::isSigned(Pred);
- // The predicate that we need to check that the induction variable lies
- // within bounds.
- ICmpInst::Predicate BoundPred =
- IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
-
- if (LatchBrExitIdx == 1)
- return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
-
- assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1");
-
- const SCEV *StepMinusOne =
- SE.getMinusSCEV(Step, SE.getOne(Step->getType()));
- unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
- APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :
- APInt::getMaxValue(BitWidth);
- const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne);
-
- return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start,
- SE.getAddExpr(BoundSCEV, Step)) &&
- SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
-}
-
-/// Returns estimate for max latch taken count of the loop of the narrowest
-/// available type. If the latch block has such estimate, it is returned.
-/// Otherwise, we use max exit count of whole loop (that is potentially of wider
-/// type than latch check itself), which is still better than no estimate.
-static const SCEV *getNarrowestLatchMaxTakenCountEstimate(ScalarEvolution &SE,
- const Loop &L) {
- const SCEV *FromBlock =
- SE.getExitCount(&L, L.getLoopLatch(), ScalarEvolution::SymbolicMaximum);
- if (isa<SCEVCouldNotCompute>(FromBlock))
- return SE.getSymbolicMaxBackedgeTakenCount(&L);
- return FromBlock;
-}
-
-std::optional<LoopStructure>
-LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
- const char *&FailureReason) {
- if (!L.isLoopSimplifyForm()) {
- FailureReason = "loop not in LoopSimplify form";
- return std::nullopt;
- }
-
- BasicBlock *Latch = L.getLoopLatch();
- assert(Latch && "Simplified loops only have one latch!");
-
- if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) {
- FailureReason = "loop has already been cloned";
- return std::nullopt;
- }
-
- if (!L.isLoopExiting(Latch)) {
- FailureReason = "no loop latch";
- return std::nullopt;
- }
-
- BasicBlock *Header = L.getHeader();
- BasicBlock *Preheader = L.getLoopPreheader();
- if (!Preheader) {
- FailureReason = "no preheader";
- return std::nullopt;
- }
-
- BranchInst *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
- if (!LatchBr || LatchBr->isUnconditional()) {
- FailureReason = "latch terminator not conditional branch";
- return std::nullopt;
- }
-
- unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;
-
- ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
- if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
- FailureReason = "latch terminator branch not conditional on integral icmp";
- return std::nullopt;
- }
-
- const SCEV *MaxBETakenCount = getNarrowestLatchMaxTakenCountEstimate(SE, L);
- if (isa<SCEVCouldNotCompute>(MaxBETakenCount)) {
- FailureReason = "could not compute latch count";
- return std::nullopt;
- }
- assert(SE.getLoopDisposition(MaxBETakenCount, &L) ==
- ScalarEvolution::LoopInvariant &&
- "loop variant exit count doesn't make sense!");
-
- ICmpInst::Predicate Pred = ICI->getPredicate();
- Value *LeftValue = ICI->getOperand(0);
- const SCEV *LeftSCEV = SE.getSCEV(LeftValue);
- IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType());
-
- Value *RightValue = ICI->getOperand(1);
- const SCEV *RightSCEV = SE.getSCEV(RightValue);
-
- // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
- if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
- if (isa<SCEVAddRecExpr>(RightSCEV)) {
- std::swap(LeftSCEV, RightSCEV);
- std::swap(LeftValue, RightValue);
- Pred = ICmpInst::getSwappedPredicate(Pred);
- } else {
- FailureReason = "no add recurrences in the icmp";
- return std::nullopt;
- }
- }
-
- auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) {
- if (AR->getNoWrapFlags(SCEV::FlagNSW))
- return true;
-
- IntegerType *Ty = cast<IntegerType>(AR->getType());
- IntegerType *WideTy =
- IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
-
- const SCEVAddRecExpr *ExtendAfterOp =
- dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
- if (ExtendAfterOp) {
- const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
- const SCEV *ExtendedStep =
- SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
-
- bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
- ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
-
- if (NoSignedWrap)
- return true;
- }
-
- // We may have proved this when computing the sign extension above.
- return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
- };
-
- // `ICI` is interpreted as taking the backedge if the *next* value of the
- // induction variable satisfies some constraint.
-
- const SCEVAddRecExpr *IndVarBase = cast<SCEVAddRecExpr>(LeftSCEV);
- if (IndVarBase->getLoop() != &L) {
- FailureReason = "LHS in cmp is not an AddRec for this loop";
- return std::nullopt;
- }
- if (!IndVarBase->isAffine()) {
- FailureReason = "LHS in icmp not induction variable";
- return std::nullopt;
- }
- const SCEV* StepRec = IndVarBase->getStepRecurrence(SE);
- if (!isa<SCEVConstant>(StepRec)) {
- FailureReason = "LHS in icmp not induction variable";
- return std::nullopt;
- }
- ConstantInt *StepCI = cast<SCEVConstant>(StepRec)->getValue();
-
- if (ICI->isEquality() && !HasNoSignedWrap(IndVarBase)) {
- FailureReason = "LHS in icmp needs nsw for equality predicates";
- return std::nullopt;
- }
-
- assert(!StepCI->isZero() && "Zero step?");
- bool IsIncreasing = !StepCI->isNegative();
- bool IsSignedPredicate;
- const SCEV *StartNext = IndVarBase->getStart();
- const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE));
- const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
- const SCEV *Step = SE.getSCEV(StepCI);
-
- const SCEV *FixedRightSCEV = nullptr;
-
- // If RightValue resides within loop (but still being loop invariant),
- // regenerate it as preheader.
- if (auto *I = dyn_cast<Instruction>(RightValue))
- if (L.contains(I->getParent()))
- FixedRightSCEV = RightSCEV;
-
- if (IsIncreasing) {
- bool DecreasedRightValueByOne = false;
- if (StepCI->isOne()) {
- // Try to turn eq/ne predicates to those we can work with.
- if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
- // while (++i != len) { while (++i < len) {
- // ... ---> ...
- // } }
- // If both parts are known non-negative, it is profitable to use
- // unsigned comparison in increasing loop. This allows us to make the
- // comparison check against "RightSCEV + 1" more optimistic.
- if (isKnownNonNegativeInLoop(IndVarStart, &L, SE) &&
- isKnownNonNegativeInLoop(RightSCEV, &L, SE))
- Pred = ICmpInst::ICMP_ULT;
- else
- Pred = ICmpInst::ICMP_SLT;
- else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {
- // while (true) { while (true) {
- // if (++i == len) ---> if (++i > len - 1)
- // break; break;
- // ... ...
- // } }
- if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
- cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) {
- Pred = ICmpInst::ICMP_UGT;
- RightSCEV = SE.getMinusSCEV(RightSCEV,
- SE.getOne(RightSCEV->getType()));
- DecreasedRightValueByOne = true;
- } else if (cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) {
- Pred = ICmpInst::ICMP_SGT;
- RightSCEV = SE.getMinusSCEV(RightSCEV,
- SE.getOne(RightSCEV->getType()));
- DecreasedRightValueByOne = true;
- }
- }
- }
-
- bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
- bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT);
- bool FoundExpectedPred =
- (LTPred && LatchBrExitIdx == 1) || (GTPred && LatchBrExitIdx == 0);
-
- if (!FoundExpectedPred) {
- FailureReason = "expected icmp slt semantically, found something else";
- return std::nullopt;
- }
-
- IsSignedPredicate = ICmpInst::isSigned(Pred);
- if (!IsSignedPredicate && !AllowUnsignedLatchCondition) {
- FailureReason = "unsigned latch conditions are explicitly prohibited";
- return std::nullopt;
- }
-
- if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred,
- LatchBrExitIdx, &L, SE)) {
- FailureReason = "Unsafe loop bounds";
- return std::nullopt;
- }
- if (LatchBrExitIdx == 0) {
- // We need to increase the right value unless we have already decreased
- // it virtually when we replaced EQ with SGT.
- if (!DecreasedRightValueByOne)
- FixedRightSCEV =
- SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
- } else {
- assert(!DecreasedRightValueByOne &&
- "Right value can be decreased only for LatchBrExitIdx == 0!");
- }
- } else {
- bool IncreasedRightValueByOne = false;
- if (StepCI->isMinusOne()) {
- // Try to turn eq/ne predicates to those we can work with.
- if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
- // while (--i != len) { while (--i > len) {
- // ... ---> ...
- // } }
- // We intentionally don't turn the predicate into UGT even if we know
- // that both operands are non-negative, because it will only pessimize
- // our check against "RightSCEV - 1".
- Pred = ICmpInst::ICMP_SGT;
- else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {
- // while (true) { while (true) {
- // if (--i == len) ---> if (--i < len + 1)
- // break; break;
- // ... ...
- // } }
- if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
- cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) {
- Pred = ICmpInst::ICMP_ULT;
- RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
- IncreasedRightValueByOne = true;
- } else if (cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) {
- Pred = ICmpInst::ICMP_SLT;
- RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
- IncreasedRightValueByOne = true;
- }
- }
- }
-
- bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
- bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT);
-
- bool FoundExpectedPred =
- (GTPred && LatchBrExitIdx == 1) || (LTPred && LatchBrExitIdx == 0);
-
- if (!FoundExpectedPred) {
- FailureReason = "expected icmp sgt semantically, found something else";
- return std::nullopt;
- }
-
- IsSignedPredicate =
- Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT;
-
- if (!IsSignedPredicate && !AllowUnsignedLatchCondition) {
- FailureReason = "unsigned latch conditions are explicitly prohibited";
- return std::nullopt;
- }
-
- if (!isSafeDecreasingBound(IndVarStart, RightSCEV, Step, Pred,
- LatchBrExitIdx, &L, SE)) {
- FailureReason = "Unsafe bounds";
- return std::nullopt;
- }
-
- if (LatchBrExitIdx == 0) {
- // We need to decrease the right value unless we have already increased
- // it virtually when we replaced EQ with SLT.
- if (!IncreasedRightValueByOne)
- FixedRightSCEV =
- SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
- } else {
- assert(!IncreasedRightValueByOne &&
- "Right value can be increased only for LatchBrExitIdx == 0!");
- }
- }
- BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
-
- assert(!L.contains(LatchExit) && "expected an exit block!");
- const DataLayout &DL = Preheader->getModule()->getDataLayout();
- SCEVExpander Expander(SE, DL, "irce");
- Instruction *Ins = Preheader->getTerminator();
-
- if (FixedRightSCEV)
- RightValue =
- Expander.expandCodeFor(FixedRightSCEV, FixedRightSCEV->getType(), Ins);
-
- Value *IndVarStartV = Expander.expandCodeFor(IndVarStart, IndVarTy, Ins);
- IndVarStartV->setName("indvar.start");
-
- LoopStructure Result;
-
- Result.Tag = "main";
- Result.Header = Header;
- Result.Latch = Latch;
- Result.LatchBr = LatchBr;
- Result.LatchExit = LatchExit;
- Result.LatchBrExitIdx = LatchBrExitIdx;
- Result.IndVarStart = IndVarStartV;
- Result.IndVarStep = StepCI;
- Result.IndVarBase = LeftValue;
- Result.IndVarIncreasing = IsIncreasing;
- Result.LoopExitAt = RightValue;
- Result.IsSignedPredicate = IsSignedPredicate;
-
- FailureReason = nullptr;
-
- return Result;
-}
-
/// If the type of \p S matches with \p Ty, return \p S. Otherwise, return
/// signed or unsigned extension of \p S to type \p Ty.
static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,
@@ -1204,17 +556,23 @@ static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,
return Signed ? SE.getNoopOrSignExtend(S, Ty) : SE.getNoopOrZeroExtend(S, Ty);
}
-std::optional<LoopConstrainer::SubRanges>
-LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
+// Compute a safe set of limits for the main loop to run in -- effectively the
+// intersection of `Range' and the iteration space of the original loop.
+// Return std::nullopt if unable to compute the set of subranges.
+static std::optional<LoopConstrainer::SubRanges>
+calculateSubRanges(ScalarEvolution &SE, const Loop &L,
+ InductiveRangeCheck::Range &Range,
+ const LoopStructure &MainLoopStructure) {
auto *RTy = cast<IntegerType>(Range.getType());
// We only support wide range checks and narrow latches.
- if (!AllowNarrowLatchCondition && RTy != ExitCountTy)
+ if (!AllowNarrowLatchCondition && RTy != MainLoopStructure.ExitCountTy)
return std::nullopt;
- if (RTy->getBitWidth() < ExitCountTy->getBitWidth())
+ if (RTy->getBitWidth() < MainLoopStructure.ExitCountTy->getBitWidth())
return std::nullopt;
LoopConstrainer::SubRanges Result;
+ bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;
// I think we can be more aggressive here and make this nuw / nsw if the
// addition that feeds into the icmp for the latch's terminating branch is nuw
// / nsw. In any case, a wrapping 2's complement addition is safe.
@@ -1245,7 +603,7 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
// `End`, decrementing by one every time.
//
// * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the
- // induction variable is decreasing we know that that the smallest value
+ // induction variable is decreasing we know that the smallest value
// the loop body is actually executed with is `INT_SMIN` == `Smallest`.
//
// * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In
@@ -1258,7 +616,7 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
GreatestSeen = Start;
}
- auto Clamp = [this, Smallest, Greatest, IsSignedPredicate](const SCEV *S) {
+ auto Clamp = [&SE, Smallest, Greatest, IsSignedPredicate](const SCEV *S) {
return IsSignedPredicate
? SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S))
: SE.getUMaxExpr(Smallest, SE.getUMinExpr(Greatest, S));
@@ -1283,464 +641,6 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
return Result;
}
-void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
- const char *Tag) const {
- for (BasicBlock *BB : OriginalLoop.getBlocks()) {
- BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F);
- Result.Blocks.push_back(Clone);
- Result.Map[BB] = Clone;
- }
-
- auto GetClonedValue = [&Result](Value *V) {
- assert(V && "null values not in domain!");
- auto It = Result.Map.find(V);
- if (It == Result.Map.end())
- return V;
- return static_cast<Value *>(It->second);
- };
-
- auto *ClonedLatch =
- cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch()));
- ClonedLatch->getTerminator()->setMetadata(ClonedLoopTag,
- MDNode::get(Ctx, {}));
-
- Result.Structure = MainLoopStructure.map(GetClonedValue);
- Result.Structure.Tag = Tag;
-
- for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) {
- BasicBlock *ClonedBB = Result.Blocks[i];
- BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i];
-
- assert(Result.Map[OriginalBB] == ClonedBB && "invariant!");
-
- for (Instruction &I : *ClonedBB)
- RemapInstruction(&I, Result.Map,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
-
- // Exit blocks will now have one more predecessor and their PHI nodes need
- // to be edited to reflect that. No phi nodes need to be introduced because
- // the loop is in LCSSA.
-
- for (auto *SBB : successors(OriginalBB)) {
- if (OriginalLoop.contains(SBB))
- continue; // not an exit block
-
- for (PHINode &PN : SBB->phis()) {
- Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB);
- PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB);
- SE.forgetValue(&PN);
- }
- }
- }
-}
-
-LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
- const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt,
- BasicBlock *ContinuationBlock) const {
- // We start with a loop with a single latch:
- //
- // +--------------------+
- // | |
- // | preheader |
- // | |
- // +--------+-----------+
- // | ----------------\
- // | / |
- // +--------v----v------+ |
- // | | |
- // | header | |
- // | | |
- // +--------------------+ |
- // |
- // ..... |
- // |
- // +--------------------+ |
- // | | |
- // | latch >----------/
- // | |
- // +-------v------------+
- // |
- // |
- // | +--------------------+
- // | | |
- // +---> original exit |
- // | |
- // +--------------------+
- //
- // We change the control flow to look like
- //
- //
- // +--------------------+
- // | |
- // | preheader >-------------------------+
- // | | |
- // +--------v-----------+ |
- // | /-------------+ |
- // | / | |
- // +--------v--v--------+ | |
- // | | | |
- // | header | | +--------+ |
- // | | | | | |
- // +--------------------+ | | +-----v-----v-----------+
- // | | | |
- // | | | .pseudo.exit |
- // | | | |
- // | | +-----------v-----------+
- // | | |
- // ..... | | |
- // | | +--------v-------------+
- // +--------------------+ | | | |
- // | | | | | ContinuationBlock |
- // | latch >------+ | | |
- // | | | +----------------------+
- // +---------v----------+ |
- // | |
- // | |
- // | +---------------^-----+
- // | | |
- // +-----> .exit.selector |
- // | |
- // +----------v----------+
- // |
- // +--------------------+ |
- // | | |
- // | original exit <----+
- // | |
- // +--------------------+
-
- RewrittenRangeInfo RRI;
-
- BasicBlock *BBInsertLocation = LS.Latch->getNextNode();
- RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
- &F, BBInsertLocation);
- RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
- BBInsertLocation);
-
- BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator());
- bool Increasing = LS.IndVarIncreasing;
- bool IsSignedPredicate = LS.IsSignedPredicate;
-
- IRBuilder<> B(PreheaderJump);
- auto *RangeTy = Range.getBegin()->getType();
- auto NoopOrExt = [&](Value *V) {
- if (V->getType() == RangeTy)
- return V;
- return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName())
- : B.CreateZExt(V, RangeTy, "wide." + V->getName());
- };
-
- // EnterLoopCond - is it okay to start executing this `LS'?
- Value *EnterLoopCond = nullptr;
- auto Pred =
- Increasing
- ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
- : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
- Value *IndVarStart = NoopOrExt(LS.IndVarStart);
- EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
-
- B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
- PreheaderJump->eraseFromParent();
-
- LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
- B.SetInsertPoint(LS.LatchBr);
- Value *IndVarBase = NoopOrExt(LS.IndVarBase);
- Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
-
- Value *CondForBranch = LS.LatchBrExitIdx == 1
- ? TakeBackedgeLoopCond
- : B.CreateNot(TakeBackedgeLoopCond);
-
- LS.LatchBr->setCondition(CondForBranch);
-
- B.SetInsertPoint(RRI.ExitSelector);
-
- // IterationsLeft - are there any more iterations left, given the original
- // upper bound on the induction variable? If not, we branch to the "real"
- // exit.
- Value *LoopExitAt = NoopOrExt(LS.LoopExitAt);
- Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt);
- B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
-
- BranchInst *BranchToContinuation =
- BranchInst::Create(ContinuationBlock, RRI.PseudoExit);
-
- // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
- // each of the PHI nodes in the loop header. This feeds into the initial
- // value of the same PHI nodes if/when we continue execution.
- for (PHINode &PN : LS.Header->phis()) {
- PHINode *NewPHI = PHINode::Create(PN.getType(), 2, PN.getName() + ".copy",
- BranchToContinuation);
-
- NewPHI->addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader);
- NewPHI->addIncoming(PN.getIncomingValueForBlock(LS.Latch),
- RRI.ExitSelector);
- RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
- }
-
- RRI.IndVarEnd = PHINode::Create(IndVarBase->getType(), 2, "indvar.end",
- BranchToContinuation);
- RRI.IndVarEnd->addIncoming(IndVarStart, Preheader);
- RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector);
-
- // The latch exit now has a branch from `RRI.ExitSelector' instead of
- // `LS.Latch'. The PHI nodes need to be updated to reflect that.
- LS.LatchExit->replacePhiUsesWith(LS.Latch, RRI.ExitSelector);
-
- return RRI;
-}
-
-void LoopConstrainer::rewriteIncomingValuesForPHIs(
- LoopStructure &LS, BasicBlock *ContinuationBlock,
- const LoopConstrainer::RewrittenRangeInfo &RRI) const {
- unsigned PHIIndex = 0;
- for (PHINode &PN : LS.Header->phis())
- PN.setIncomingValueForBlock(ContinuationBlock,
- RRI.PHIValuesAtPseudoExit[PHIIndex++]);
-
- LS.IndVarStart = RRI.IndVarEnd;
-}
-
-BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
- BasicBlock *OldPreheader,
- const char *Tag) const {
- BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
- BranchInst::Create(LS.Header, Preheader);
-
- LS.Header->replacePhiUsesWith(OldPreheader, Preheader);
-
- return Preheader;
-}
-
-void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
- Loop *ParentLoop = OriginalLoop.getParentLoop();
- if (!ParentLoop)
- return;
-
- for (BasicBlock *BB : BBs)
- ParentLoop->addBasicBlockToLoop(BB, LI);
-}
-
-Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
- ValueToValueMapTy &VM,
- bool IsSubloop) {
- Loop &New = *LI.AllocateLoop();
- if (Parent)
- Parent->addChildLoop(&New);
- else
- LI.addTopLevelLoop(&New);
- LPMAddNewLoop(&New, IsSubloop);
-
- // Add all of the blocks in Original to the new loop.
- for (auto *BB : Original->blocks())
- if (LI.getLoopFor(BB) == Original)
- New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), LI);
-
- // Add all of the subloops to the new loop.
- for (Loop *SubLoop : *Original)
- createClonedLoopStructure(SubLoop, &New, VM, /* IsSubloop */ true);
-
- return &New;
-}
-
-bool LoopConstrainer::run() {
- BasicBlock *Preheader = nullptr;
- const SCEV *MaxBETakenCount =
- getNarrowestLatchMaxTakenCountEstimate(SE, OriginalLoop);
- Preheader = OriginalLoop.getLoopPreheader();
- assert(!isa<SCEVCouldNotCompute>(MaxBETakenCount) && Preheader != nullptr &&
- "preconditions!");
- ExitCountTy = cast<IntegerType>(MaxBETakenCount->getType());
-
- OriginalPreheader = Preheader;
- MainLoopPreheader = Preheader;
-
- bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;
- std::optional<SubRanges> MaybeSR = calculateSubRanges(IsSignedPredicate);
- if (!MaybeSR) {
- LLVM_DEBUG(dbgs() << "irce: could not compute subranges\n");
- return false;
- }
-
- SubRanges SR = *MaybeSR;
- bool Increasing = MainLoopStructure.IndVarIncreasing;
- IntegerType *IVTy =
- cast<IntegerType>(Range.getBegin()->getType());
-
- SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
- Instruction *InsertPt = OriginalPreheader->getTerminator();
-
- // It would have been better to make `PreLoop' and `PostLoop'
- // `std::optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
- // constructor.
- ClonedLoop PreLoop, PostLoop;
- bool NeedsPreLoop =
- Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value();
- bool NeedsPostLoop =
- Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value();
-
- Value *ExitPreLoopAt = nullptr;
- Value *ExitMainLoopAt = nullptr;
- const SCEVConstant *MinusOneS =
- cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */));
-
- if (NeedsPreLoop) {
- const SCEV *ExitPreLoopAtSCEV = nullptr;
-
- if (Increasing)
- ExitPreLoopAtSCEV = *SR.LowLimit;
- else if (cannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE,
- IsSignedPredicate))
- ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
- else {
- LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
- << "preloop exit limit. HighLimit = "
- << *(*SR.HighLimit) << "\n");
- return false;
- }
-
- if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) {
- LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
- << " preloop exit limit " << *ExitPreLoopAtSCEV
- << " at block " << InsertPt->getParent()->getName()
- << "\n");
- return false;
- }
-
- ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
- ExitPreLoopAt->setName("exit.preloop.at");
- }
-
- if (NeedsPostLoop) {
- const SCEV *ExitMainLoopAtSCEV = nullptr;
-
- if (Increasing)
- ExitMainLoopAtSCEV = *SR.HighLimit;
- else if (cannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE,
- IsSignedPredicate))
- ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
- else {
- LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
- << "mainloop exit limit. LowLimit = "
- << *(*SR.LowLimit) << "\n");
- return false;
- }
-
- if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) {
- LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
- << " main loop exit limit " << *ExitMainLoopAtSCEV
- << " at block " << InsertPt->getParent()->getName()
- << "\n");
- return false;
- }
-
- ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
- ExitMainLoopAt->setName("exit.mainloop.at");
- }
-
- // We clone these ahead of time so that we don't have to deal with changing
- // and temporarily invalid IR as we transform the loops.
- if (NeedsPreLoop)
- cloneLoop(PreLoop, "preloop");
- if (NeedsPostLoop)
- cloneLoop(PostLoop, "postloop");
-
- RewrittenRangeInfo PreLoopRRI;
-
- if (NeedsPreLoop) {
- Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header,
- PreLoop.Structure.Header);
-
- MainLoopPreheader =
- createPreheader(MainLoopStructure, Preheader, "mainloop");
- PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
- ExitPreLoopAt, MainLoopPreheader);
- rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
- PreLoopRRI);
- }
-
- BasicBlock *PostLoopPreheader = nullptr;
- RewrittenRangeInfo PostLoopRRI;
-
- if (NeedsPostLoop) {
- PostLoopPreheader =
- createPreheader(PostLoop.Structure, Preheader, "postloop");
- PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
- ExitMainLoopAt, PostLoopPreheader);
- rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
- PostLoopRRI);
- }
-
- BasicBlock *NewMainLoopPreheader =
- MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr;
- BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit,
- PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit,
- PostLoopRRI.ExitSelector, NewMainLoopPreheader};
-
- // Some of the above may be nullptr, filter them out before passing to
- // addToParentLoopIfNeeded.
- auto NewBlocksEnd =
- std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr);
-
- addToParentLoopIfNeeded(ArrayRef(std::begin(NewBlocks), NewBlocksEnd));
-
- DT.recalculate(F);
-
- // We need to first add all the pre and post loop blocks into the loop
- // structures (as part of createClonedLoopStructure), and then update the
- // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating
- // LI when LoopSimplifyForm is generated.
- Loop *PreL = nullptr, *PostL = nullptr;
- if (!PreLoop.Blocks.empty()) {
- PreL = createClonedLoopStructure(&OriginalLoop,
- OriginalLoop.getParentLoop(), PreLoop.Map,
- /* IsSubLoop */ false);
- }
-
- if (!PostLoop.Blocks.empty()) {
- PostL =
- createClonedLoopStructure(&OriginalLoop, OriginalLoop.getParentLoop(),
- PostLoop.Map, /* IsSubLoop */ false);
- }
-
- // This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
- auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) {
- formLCSSARecursively(*L, DT, &LI, &SE);
- simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, true);
- // Pre/post loops are slow paths, we do not need to perform any loop
- // optimizations on them.
- if (!IsOriginalLoop)
- DisableAllLoopOptsOnLoop(*L);
- };
- if (PreL)
- CanonicalizeLoop(PreL, false);
- if (PostL)
- CanonicalizeLoop(PostL, false);
- CanonicalizeLoop(&OriginalLoop, true);
-
- /// At this point:
- /// - We've broken a "main loop" out of the loop in a way that the "main loop"
- /// runs with the induction variable in a subset of [Begin, End).
- /// - There is no overflow when computing "main loop" exit limit.
- /// - Max latch taken count of the loop is limited.
- /// It guarantees that induction variable will not overflow iterating in the
- /// "main loop".
- if (auto BO = dyn_cast<BinaryOperator>(MainLoopStructure.IndVarBase))
- if (IsSignedPredicate)
- BO->setHasNoSignedWrap(true);
- /// TODO: support unsigned predicate.
- /// To add NUW flag we need to prove that both operands of BO are
- /// non-negative. E.g:
- /// ...
- /// %iv.next = add nsw i32 %iv, -1
- /// %cmp = icmp ult i32 %iv.next, %n
- /// br i1 %cmp, label %loopexit, label %loop
- ///
- /// -1 is MAX_UINT in terms of unsigned int. Adding anything but zero will
- /// overflow, therefore NUW flag is not legal here.
-
- return true;
-}
-
/// Computes and returns a range of values for the induction variable (IndVar)
/// in which the range check can be safely elided. If it cannot compute such a
/// range, returns std::nullopt.
@@ -2108,7 +1008,8 @@ bool InductiveRangeCheckElimination::run(
const char *FailureReason = nullptr;
std::optional<LoopStructure> MaybeLoopStructure =
- LoopStructure::parseLoopStructure(SE, *L, FailureReason);
+ LoopStructure::parseLoopStructure(SE, *L, AllowUnsignedLatchCondition,
+ FailureReason);
if (!MaybeLoopStructure) {
LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
<< FailureReason << "\n";);
@@ -2147,7 +1048,15 @@ bool InductiveRangeCheckElimination::run(
if (!SafeIterRange)
return Changed;
- LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, *SafeIterRange);
+ std::optional<LoopConstrainer::SubRanges> MaybeSR =
+ calculateSubRanges(SE, *L, *SafeIterRange, LS);
+ if (!MaybeSR) {
+ LLVM_DEBUG(dbgs() << "irce: could not compute subranges\n");
+ return false;
+ }
+
+ LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT,
+ SafeIterRange->getBegin()->getType(), *MaybeSR);
if (LC.run()) {
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index c2b5a12fd63f..1bf50d79e533 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -164,9 +164,13 @@ class InferAddressSpaces : public FunctionPass {
public:
static char ID;
- InferAddressSpaces() :
- FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {}
- InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {}
+ InferAddressSpaces()
+ : FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {
+ initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry());
+ }
+ InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {
+ initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -221,8 +225,8 @@ class InferAddressSpacesImpl {
Value *V, PostorderStackTy &PostorderStack,
DenseSet<Value *> &Visited) const;
- bool rewriteIntrinsicOperands(IntrinsicInst *II,
- Value *OldV, Value *NewV) const;
+ bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV,
+ Value *NewV) const;
void collectRewritableIntrinsicOperands(IntrinsicInst *II,
PostorderStackTy &PostorderStack,
DenseSet<Value *> &Visited) const;
@@ -473,7 +477,7 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
}
// Returns all flat address expressions in function F. The elements are ordered
-// ordered in postorder.
+// in postorder.
std::vector<WeakTrackingVH>
InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
// This function implements a non-recursive postorder traversal of a partial
@@ -483,8 +487,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
DenseSet<Value *> Visited;
auto PushPtrOperand = [&](Value *Ptr) {
- appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack,
- Visited);
+ appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, Visited);
};
// Look at operations that may be interesting accelerate by moving to a known
@@ -519,8 +522,11 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
PushPtrOperand(ASC->getPointerOperand());
} else if (auto *I2P = dyn_cast<IntToPtrInst>(&I)) {
if (isNoopPtrIntCastPair(cast<Operator>(I2P), *DL, TTI))
- PushPtrOperand(
- cast<Operator>(I2P->getOperand(0))->getOperand(0));
+ PushPtrOperand(cast<Operator>(I2P->getOperand(0))->getOperand(0));
+ } else if (auto *RI = dyn_cast<ReturnInst>(&I)) {
+ if (auto *RV = RI->getReturnValue();
+ RV && RV->getType()->isPtrOrPtrVectorTy())
+ PushPtrOperand(RV);
}
}
@@ -923,12 +929,14 @@ bool InferAddressSpacesImpl::updateAddressSpace(
Value *Src1 = Op.getOperand(2);
auto I = InferredAddrSpace.find(Src0);
- unsigned Src0AS = (I != InferredAddrSpace.end()) ?
- I->second : Src0->getType()->getPointerAddressSpace();
+ unsigned Src0AS = (I != InferredAddrSpace.end())
+ ? I->second
+ : Src0->getType()->getPointerAddressSpace();
auto J = InferredAddrSpace.find(Src1);
- unsigned Src1AS = (J != InferredAddrSpace.end()) ?
- J->second : Src1->getType()->getPointerAddressSpace();
+ unsigned Src1AS = (J != InferredAddrSpace.end())
+ ? J->second
+ : Src1->getType()->getPointerAddressSpace();
auto *C0 = dyn_cast<Constant>(Src0);
auto *C1 = dyn_cast<Constant>(Src1);
@@ -1097,7 +1105,8 @@ bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C,
// If we already have a constant addrspacecast, it should be safe to cast it
// off.
if (Op->getOpcode() == Instruction::AddrSpaceCast)
- return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)), NewAS);
+ return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)),
+ NewAS);
if (Op->getOpcode() == Instruction::IntToPtr &&
Op->getType()->getPointerAddressSpace() == FlatAddrSpace)
@@ -1128,7 +1137,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
// construction.
ValueToValueMapTy ValueWithNewAddrSpace;
SmallVector<const Use *, 32> PoisonUsesToFix;
- for (Value* V : Postorder) {
+ for (Value *V : Postorder) {
unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
// In some degenerate cases (e.g. invalid IR in unreachable code), we may
@@ -1161,6 +1170,8 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
}
SmallVector<Instruction *, 16> DeadInstructions;
+ ValueToValueMapTy VMap;
+ ValueMapper VMapper(VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
// Replaces the uses of the old address expressions with the new ones.
for (const WeakTrackingVH &WVH : Postorder) {
@@ -1174,18 +1185,41 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
<< *NewV << '\n');
if (Constant *C = dyn_cast<Constant>(V)) {
- Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
- C->getType());
+ Constant *Replace =
+ ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), C->getType());
if (C != Replace) {
LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace
<< ": " << *Replace << '\n');
- C->replaceAllUsesWith(Replace);
+ SmallVector<User *, 16> WorkList;
+ for (User *U : make_early_inc_range(C->users())) {
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (I->getFunction() == F)
+ I->replaceUsesOfWith(C, Replace);
+ } else {
+ WorkList.append(U->user_begin(), U->user_end());
+ }
+ }
+ if (!WorkList.empty()) {
+ VMap[C] = Replace;
+ DenseSet<User *> Visited{WorkList.begin(), WorkList.end()};
+ while (!WorkList.empty()) {
+ User *U = WorkList.pop_back_val();
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (I->getFunction() == F)
+ VMapper.remapInstruction(*I);
+ continue;
+ }
+ for (User *U2 : U->users())
+ if (Visited.insert(U2).second)
+ WorkList.push_back(U2);
+ }
+ }
V = Replace;
}
}
Value::use_iterator I, E, Next;
- for (I = V->use_begin(), E = V->use_end(); I != E; ) {
+ for (I = V->use_begin(), E = V->use_end(); I != E;) {
Use &U = *I;
// Some users may see the same pointer operand in multiple operands. Skip
@@ -1205,6 +1239,11 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
// Skip if the current user is the new value itself.
if (CurUser == NewV)
continue;
+
+ if (auto *CurUserI = dyn_cast<Instruction>(CurUser);
+ CurUserI && CurUserI->getFunction() != F)
+ continue;
+
// Handle more complex cases like intrinsic that need to be remangled.
if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
@@ -1241,8 +1280,8 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (auto *KOtherSrc = dyn_cast<Constant>(OtherSrc)) {
if (isSafeToCastConstAddrSpace(KOtherSrc, NewAS)) {
Cmp->setOperand(SrcIdx, NewV);
- Cmp->setOperand(OtherIdx,
- ConstantExpr::getAddrSpaceCast(KOtherSrc, NewV->getType()));
+ Cmp->setOperand(OtherIdx, ConstantExpr::getAddrSpaceCast(
+ KOtherSrc, NewV->getType()));
continue;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAlignment.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAlignment.cpp
new file mode 100644
index 000000000000..b75b8d486fbb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAlignment.cpp
@@ -0,0 +1,91 @@
+//===- InferAlignment.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Infer alignment for load, stores and other memory operations based on
+// trailing zero known bits information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/InferAlignment.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+static bool tryToImproveAlign(
+ const DataLayout &DL, Instruction *I,
+ function_ref<Align(Value *PtrOp, Align OldAlign, Align PrefAlign)> Fn) {
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ Value *PtrOp = LI->getPointerOperand();
+ Align OldAlign = LI->getAlign();
+ Align NewAlign = Fn(PtrOp, OldAlign, DL.getPrefTypeAlign(LI->getType()));
+ if (NewAlign > OldAlign) {
+ LI->setAlignment(NewAlign);
+ return true;
+ }
+ } else if (auto *SI = dyn_cast<StoreInst>(I)) {
+ Value *PtrOp = SI->getPointerOperand();
+ Value *ValOp = SI->getValueOperand();
+ Align OldAlign = SI->getAlign();
+ Align NewAlign = Fn(PtrOp, OldAlign, DL.getPrefTypeAlign(ValOp->getType()));
+ if (NewAlign > OldAlign) {
+ SI->setAlignment(NewAlign);
+ return true;
+ }
+ }
+ // TODO: Also handle memory intrinsics.
+ return false;
+}
+
+bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ bool Changed = false;
+
+ // Enforce preferred type alignment if possible. We do this as a separate
+ // pass first, because it may improve the alignments we infer below.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ Changed |= tryToImproveAlign(
+ DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) {
+ if (PrefAlign > OldAlign)
+ return std::max(OldAlign,
+ tryEnforceAlignment(PtrOp, PrefAlign, DL));
+ return OldAlign;
+ });
+ }
+ }
+
+ // Compute alignment from known bits.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ Changed |= tryToImproveAlign(
+ DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) {
+ KnownBits Known = computeKnownBits(PtrOp, DL, 0, &AC, &I, &DT);
+ unsigned TrailZ = std::min(Known.countMinTrailingZeros(),
+ +Value::MaxAlignmentExponent);
+ return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
+ });
+ }
+ }
+
+ return Changed;
+}
+
+PreservedAnalyses InferAlignmentPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ inferAlignment(F, AC, DT);
+ // Changes to alignment shouldn't invalidated analyses.
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 5b8f1b00dc03..8603c5cf9c02 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -102,11 +102,6 @@ static cl::opt<unsigned> PhiDuplicateThreshold(
cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
cl::Hidden);
-static cl::opt<bool> PrintLVIAfterJumpThreading(
- "print-lvi-after-jump-threading",
- cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
- cl::Hidden);
-
static cl::opt<bool> ThreadAcrossLoopHeaders(
"jump-threading-across-loop-headers",
cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
@@ -228,17 +223,15 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
if (BP >= BranchProbability(50, 100))
continue;
- SmallVector<uint32_t, 2> Weights;
+ uint32_t Weights[2];
if (PredBr->getSuccessor(0) == PredOutEdge.second) {
- Weights.push_back(BP.getNumerator());
- Weights.push_back(BP.getCompl().getNumerator());
+ Weights[0] = BP.getNumerator();
+ Weights[1] = BP.getCompl().getNumerator();
} else {
- Weights.push_back(BP.getCompl().getNumerator());
- Weights.push_back(BP.getNumerator());
+ Weights[0] = BP.getCompl().getNumerator();
+ Weights[1] = BP.getNumerator();
}
- PredBr->setMetadata(LLVMContext::MD_prof,
- MDBuilder(PredBr->getParent()->getContext())
- .createBranchWeights(Weights));
+ setBranchWeights(*PredBr, Weights);
}
}
@@ -259,11 +252,6 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
&DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
std::nullopt, std::nullopt);
- if (PrintLVIAfterJumpThreading) {
- dbgs() << "LVI for function '" << F.getName() << "':\n";
- LVI.printLVI(F, getDomTreeUpdater()->getDomTree(), dbgs());
- }
-
if (!Changed)
return PreservedAnalyses::all();
@@ -412,6 +400,10 @@ static bool replaceFoldableUses(Instruction *Cond, Value *ToVal,
if (Cond->getParent() == KnownAtEndOfBB)
Changed |= replaceNonLocalUsesWith(Cond, ToVal);
for (Instruction &I : reverse(*KnownAtEndOfBB)) {
+ // Replace any debug-info record users of Cond with ToVal.
+ for (DPValue &DPV : I.getDbgValueRange())
+ DPV.replaceVariableLocationOp(Cond, ToVal, true);
+
// Reached the Cond whose uses we are trying to replace, so there are no
// more uses.
if (&I == Cond)
@@ -568,6 +560,8 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
Value *V, BasicBlock *BB, PredValueInfo &Result,
ConstantPreference Preference, DenseSet<Value *> &RecursionSet,
Instruction *CxtI) {
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
@@ -635,16 +629,19 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
// Handle Cast instructions.
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Source = CI->getOperand(0);
- computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
+ PredValueInfoTy Vals;
+ computeValueKnownInPredecessorsImpl(Source, BB, Vals, Preference,
RecursionSet, CxtI);
- if (Result.empty())
+ if (Vals.empty())
return false;
// Convert the known values.
- for (auto &R : Result)
- R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
+ for (auto &Val : Vals)
+ if (Constant *Folded = ConstantFoldCastOperand(CI->getOpcode(), Val.first,
+ CI->getType(), DL))
+ Result.emplace_back(Folded, Val.second);
- return true;
+ return !Result.empty();
}
if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
@@ -726,7 +723,6 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
if (Preference != WantInteger)
return false;
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
- const DataLayout &DL = BO->getModule()->getDataLayout();
PredValueInfoTy LHSVals;
computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
WantInteger, RecursionSet, CxtI);
@@ -757,7 +753,10 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
PHINode *PN = dyn_cast<PHINode>(CmpLHS);
if (!PN)
PN = dyn_cast<PHINode>(CmpRHS);
- if (PN && PN->getParent() == BB) {
+ // Do not perform phi translation across a loop header phi, because this
+ // may result in comparison of values from two different loop iterations.
+ // FIXME: This check is broken if LoopHeaders is not populated.
+ if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {
const DataLayout &DL = PN->getModule()->getDataLayout();
// We can do this simplification if any comparisons fold to true or false.
// See if any do.
@@ -1433,8 +1432,8 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// Create a PHI node at the start of the block for the PRE'd load value.
pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "",
- &LoadBB->front());
+ PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "");
+ PN->insertBefore(LoadBB->begin());
PN->takeName(LoadI);
PN->setDebugLoc(LoadI->getDebugLoc());
@@ -1901,7 +1900,7 @@ bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
return false;
const Instruction *TI = SinglePred->getTerminator();
- if (TI->isExceptionalTerminator() || TI->getNumSuccessors() != 1 ||
+ if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||
SinglePred == BB || hasAddressTakenAndUsed(BB))
return false;
@@ -1956,6 +1955,7 @@ void JumpThreadingPass::updateSSA(
SSAUpdater SSAUpdate;
SmallVector<Use *, 16> UsesToRename;
SmallVector<DbgValueInst *, 4> DbgValues;
+ SmallVector<DPValue *, 4> DPValues;
for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
@@ -1972,15 +1972,16 @@ void JumpThreadingPass::updateSSA(
}
// Find debug values outside of the block
- findDbgValues(DbgValues, &I);
- DbgValues.erase(remove_if(DbgValues,
- [&](const DbgValueInst *DbgVal) {
- return DbgVal->getParent() == BB;
- }),
- DbgValues.end());
+ findDbgValues(DbgValues, &I, &DPValues);
+ llvm::erase_if(DbgValues, [&](const DbgValueInst *DbgVal) {
+ return DbgVal->getParent() == BB;
+ });
+ llvm::erase_if(DPValues, [&](const DPValue *DPVal) {
+ return DPVal->getParent() == BB;
+ });
// If there are no uses outside the block, we're done with this instruction.
- if (UsesToRename.empty() && DbgValues.empty())
+ if (UsesToRename.empty() && DbgValues.empty() && DPValues.empty())
continue;
LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
@@ -1993,9 +1994,11 @@ void JumpThreadingPass::updateSSA(
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- if (!DbgValues.empty()) {
+ if (!DbgValues.empty() || !DPValues.empty()) {
SSAUpdate.UpdateDebugValues(&I, DbgValues);
+ SSAUpdate.UpdateDebugValues(&I, DPValues);
DbgValues.clear();
+ DPValues.clear();
}
LLVM_DEBUG(dbgs() << "\n");
@@ -2038,6 +2041,26 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
return true;
};
+ // Duplicate implementation of the above dbg.value code, using DPValues
+ // instead.
+ auto RetargetDPValueIfPossible = [&](DPValue *DPV) {
+ SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
+ for (auto *Op : DPV->location_ops()) {
+ Instruction *OpInst = dyn_cast<Instruction>(Op);
+ if (!OpInst)
+ continue;
+
+ auto I = ValueMapping.find(OpInst);
+ if (I != ValueMapping.end())
+ OperandsToRemap.insert({OpInst, I->second});
+ }
+
+ for (auto &[OldOp, MappedOp] : OperandsToRemap)
+ DPV->replaceVariableLocationOp(OldOp, MappedOp);
+ };
+
+ BasicBlock *RangeBB = BI->getParent();
+
// Clone the phi nodes of the source basic block into NewBB. The resulting
// phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
// might need to rewrite the operand of the cloned phi.
@@ -2056,6 +2079,12 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
+ auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
+ auto DPVRange = NewInst->cloneDebugInfoFrom(From);
+ for (DPValue &DPV : DPVRange)
+ RetargetDPValueIfPossible(&DPV);
+ };
+
// Clone the non-phi instructions of the source basic block into NewBB,
// keeping track of the mapping and using it to remap operands in the cloned
// instructions.
@@ -2066,6 +2095,8 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
ValueMapping[&*BI] = New;
adaptNoAliasScopes(New, ClonedScopes, Context);
+ CloneAndRemapDbgInfo(New, &*BI);
+
if (RetargetDbgValueIfPossible(New))
continue;
@@ -2078,6 +2109,17 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
}
}
+ // There may be DPValues on the terminator, clone directly from marker
+ // to marker as there isn't an instruction there.
+ if (BE != RangeBB->end() && BE->hasDbgValues()) {
+ // Dump them at the end.
+ DPMarker *Marker = RangeBB->getMarker(BE);
+ DPMarker *EndMarker = NewBB->createMarker(NewBB->end());
+ auto DPVRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
+ for (DPValue &DPV : DPVRange)
+ RetargetDPValueIfPossible(&DPV);
+ }
+
return ValueMapping;
}
@@ -2247,7 +2289,7 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
assert(BPI && "It's expected BPI to exist along with BFI");
auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
BPI->getEdgeProbability(PredPredBB, PredBB);
- BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ BFI->setBlockFreq(NewBB, NewBBFreq);
}
// We are going to have to map operands from the original BB block to the new
@@ -2373,7 +2415,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
assert(BPI && "It's expected BPI to exist along with BFI");
auto NewBBFreq =
BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
- BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ BFI->setBlockFreq(NewBB, NewBBFreq);
}
// Copy all the instructions from BB to NewBB except the terminator.
@@ -2458,7 +2500,7 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
NewBBFreq += FreqMap.lookup(Pred);
}
if (BFI) // Apply the summed frequency to NewBB.
- BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ BFI->setBlockFreq(NewBB, NewBBFreq);
}
DTU->applyUpdatesPermissive(Updates);
@@ -2498,7 +2540,7 @@ void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
auto NewBBFreq = BFI->getBlockFreq(NewBB);
auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
auto BBNewFreq = BBOrigFreq - NewBBFreq;
- BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
+ BFI->setBlockFreq(BB, BBNewFreq);
// Collect updated outgoing edges' frequencies from BB and use them to update
// edge probabilities.
@@ -2569,9 +2611,7 @@ void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
Weights.push_back(Prob.getNumerator());
auto TI = BB->getTerminator();
- TI->setMetadata(
- LLVMContext::MD_prof,
- MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
+ setBranchWeights(*TI, Weights);
}
}
@@ -2665,6 +2705,9 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
if (!New->mayHaveSideEffects()) {
New->eraseFromParent();
New = nullptr;
+ // Clone debug-info on the elided instruction to the destination
+ // position.
+ OldPredBranch->cloneDebugInfoFrom(&*BI, std::nullopt, true);
}
} else {
ValueMapping[&*BI] = New;
@@ -2672,6 +2715,8 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
if (New) {
// Otherwise, insert the new instruction into the block.
New->setName(BI->getName());
+ // Clone across any debug-info attached to the old instruction.
+ New->cloneDebugInfoFrom(&*BI);
// Update Dominance from simplified New instruction operands.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
@@ -2756,7 +2801,7 @@ void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
BranchProbability PredToNewBBProb = BranchProbability::getBranchProbability(
TrueWeight, TrueWeight + FalseWeight);
auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
- BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ BFI->setBlockFreq(NewBB, NewBBFreq);
}
// The select is now dead.
@@ -2926,7 +2971,9 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
Value *Cond = SI->getCondition();
if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
Cond = new FreezeInst(Cond, "cond.fr", SI);
- Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
+ MDNode *BranchWeights = getBranchWeightMDNode(*SI);
+ Instruction *Term =
+ SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
BasicBlock *SplitBB = SI->getParent();
BasicBlock *NewBB = Term->getParent();
PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
@@ -3061,8 +3108,8 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
if (!isa<PHINode>(&*BI))
ToRemove.push_back(&*BI);
- Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
- assert(InsertionPoint && "Empty block?");
+ BasicBlock::iterator InsertionPoint = BB->getFirstInsertionPt();
+ assert(InsertionPoint != BB->end() && "Empty block?");
// Substitute with Phis & remove.
for (auto *Inst : reverse(ToRemove)) {
if (!Inst->use_empty()) {
@@ -3072,6 +3119,7 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
NewPN->insertBefore(InsertionPoint);
Inst->replaceAllUsesWith(NewPN);
}
+ Inst->dropDbgValues();
Inst->eraseFromParent();
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
index f8fab03f151d..9117378568b7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -108,6 +108,8 @@ STATISTIC(NumGEPsHoisted,
"Number of geps reassociated and hoisted out of the loop");
STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
"and hoisted out of the loop");
+STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
+ "reassociated and hoisted out of the loop");
/// Memory promotion is enabled by default.
static cl::opt<bool>
@@ -127,6 +129,12 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
cl::desc("Max num uses visited for identifying load "
"invariance in loop using invariant start (default = 8)"));
+static cl::opt<unsigned> FPAssociationUpperLimit(
+ "licm-max-num-fp-reassociations", cl::init(5U), cl::Hidden,
+ cl::desc(
+ "Set upper limit for the number of transformations performed "
+ "during a single round of hoisting the reassociated expressions."));
+
// Experimental option to allow imprecision in LICM in pathological cases, in
// exchange for faster compile. This is to be removed if MemorySSA starts to
// address the same issue. LICM calls MemorySSAWalker's
@@ -185,7 +193,7 @@ static Instruction *cloneInstructionInExitBlock(
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU);
-static void moveInstructionBefore(Instruction &I, Instruction &Dest,
+static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
@@ -473,12 +481,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
});
if (!HasCatchSwitch) {
- SmallVector<Instruction *, 8> InsertPts;
+ SmallVector<BasicBlock::iterator, 8> InsertPts;
SmallVector<MemoryAccess *, 8> MSSAInsertPts;
InsertPts.reserve(ExitBlocks.size());
MSSAInsertPts.reserve(ExitBlocks.size());
for (BasicBlock *ExitBlock : ExitBlocks) {
- InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+ InsertPts.push_back(ExitBlock->getFirstInsertionPt());
MSSAInsertPts.push_back(nullptr);
}
@@ -985,7 +993,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// loop invariant). If so make them unconditional by moving them to their
// immediate dominator. We iterate through the instructions in reverse order
// which ensures that when we rehoist an instruction we rehoist its operands,
- // and also keep track of where in the block we are rehoisting to to make sure
+ // and also keep track of where in the block we are rehoisting to make sure
// that we rehoist instructions before the instructions that use them.
Instruction *HoistPoint = nullptr;
if (ControlFlowHoisting) {
@@ -1003,7 +1011,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
LLVM_DEBUG(dbgs() << "LICM rehoisting to "
<< HoistPoint->getParent()->getNameOrAsOperand()
<< ": " << *I << "\n");
- moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
+ moveInstructionBefore(*I, HoistPoint->getIterator(), *SafetyInfo, MSSAU,
+ SE);
HoistPoint = I;
Changed = true;
}
@@ -1031,7 +1040,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// invariant.start has no uses.
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
Loop *CurLoop) {
- Value *Addr = LI->getOperand(0);
+ Value *Addr = LI->getPointerOperand();
const DataLayout &DL = LI->getModule()->getDataLayout();
const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
@@ -1047,20 +1056,6 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
if (LocSizeInBits.isScalable())
return false;
- // if the type is i8 addrspace(x)*, we know this is the type of
- // llvm.invariant.start operand
- auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
- LI->getPointerAddressSpace());
- unsigned BitcastsVisited = 0;
- // Look through bitcasts until we reach the i8* type (this is invariant.start
- // operand type).
- while (Addr->getType() != PtrInt8Ty) {
- auto *BC = dyn_cast<BitCastInst>(Addr);
- // Avoid traversing high number of bitcast uses.
- if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
- return false;
- Addr = BC->getOperand(0);
- }
// If we've ended up at a global/constant, bail. We shouldn't be looking at
// uselists for non-local Values in a loop pass.
if (isa<Constant>(Addr))
@@ -1480,8 +1475,9 @@ static Instruction *cloneInstructionInExitBlock(
if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
auto *OInst = cast<Instruction>(Op.get());
PHINode *OpPN =
- PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
- OInst->getName() + ".lcssa", &ExitBlock.front());
+ PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
+ OInst->getName() + ".lcssa");
+ OpPN->insertBefore(ExitBlock.begin());
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
Op = OpPN;
@@ -1496,16 +1492,17 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
I.eraseFromParent();
}
-static void moveInstructionBefore(Instruction &I, Instruction &Dest,
+static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU,
ScalarEvolution *SE) {
SafetyInfo.removeInstruction(&I);
- SafetyInfo.insertInstructionTo(&I, Dest.getParent());
- I.moveBefore(&Dest);
+ SafetyInfo.insertInstructionTo(&I, Dest->getParent());
+ I.moveBefore(*Dest->getParent(), Dest);
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
MSSAU.getMemorySSA()->getMemoryAccess(&I)))
- MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
+ MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
+ MemorySSA::BeforeTerminator);
if (SE)
SE->forgetBlockAndLoopDispositions(&I);
}
@@ -1752,10 +1749,11 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
if (isa<PHINode>(I))
// Move the new node to the end of the phi list in the destination block.
- moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
+ moveInstructionBefore(I, Dest->getFirstNonPHIIt(), *SafetyInfo, MSSAU, SE);
else
// Move the new node to the destination block, before its terminator.
- moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
+ moveInstructionBefore(I, Dest->getTerminator()->getIterator(), *SafetyInfo,
+ MSSAU, SE);
I.updateLocationAfterHoist();
@@ -1799,7 +1797,7 @@ namespace {
class LoopPromoter : public LoadAndStorePromoter {
Value *SomePtr; // Designated pointer to store to.
SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
- SmallVectorImpl<Instruction *> &LoopInsertPts;
+ SmallVectorImpl<BasicBlock::iterator> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
MemorySSAUpdater &MSSAU;
@@ -1823,7 +1821,8 @@ class LoopPromoter : public LoadAndStorePromoter {
// We need to create an LCSSA PHI node for the incoming value and
// store that.
PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
- I->getName() + ".lcssa", &BB->front());
+ I->getName() + ".lcssa");
+ PN->insertBefore(BB->begin());
for (BasicBlock *Pred : PredCache.get(BB))
PN->addIncoming(I, Pred);
return PN;
@@ -1832,7 +1831,7 @@ class LoopPromoter : public LoadAndStorePromoter {
public:
LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
SmallVectorImpl<BasicBlock *> &LEB,
- SmallVectorImpl<Instruction *> &LIP,
+ SmallVectorImpl<BasicBlock::iterator> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
@@ -1855,7 +1854,7 @@ public:
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
- Instruction *InsertPos = LoopInsertPts[i];
+ BasicBlock::iterator InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
if (UnorderedAtomic)
NewSI->setOrdering(AtomicOrdering::Unordered);
@@ -1934,23 +1933,6 @@ bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
isNotCapturedBeforeOrInLoop(Object, L, DT);
}
-// We don't consider globals as writable: While the physical memory is writable,
-// we may not have provenance to perform the write.
-bool isWritableObject(const Value *Object) {
- // TODO: Alloca might not be writable after its lifetime ends.
- // See https://github.com/llvm/llvm-project/issues/51838.
- if (isa<AllocaInst>(Object))
- return true;
-
- // TODO: Also handle sret.
- if (auto *A = dyn_cast<Argument>(Object))
- return A->hasByValAttr();
-
- // TODO: Noalias has nothing to do with writability, this should check for
- // an allocator function.
- return isNoAliasCall(Object);
-}
-
bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
TargetTransformInfo *TTI) {
// The object must be function-local to start with, and then not captured
@@ -1970,7 +1952,7 @@ bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
bool llvm::promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &PointerMustAliases,
SmallVectorImpl<BasicBlock *> &ExitBlocks,
- SmallVectorImpl<Instruction *> &InsertPts,
+ SmallVectorImpl<BasicBlock::iterator> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
@@ -2192,7 +2174,10 @@ bool llvm::promoteLoopAccessesToScalars(
// violating the memory model.
if (StoreSafety == StoreSafetyUnknown) {
Value *Object = getUnderlyingObject(SomePtr);
- if (isWritableObject(Object) &&
+ bool ExplicitlyDereferenceableOnly;
+ if (isWritableObject(Object, ExplicitlyDereferenceableOnly) &&
+ (!ExplicitlyDereferenceableOnly ||
+ isDereferenceablePointer(SomePtr, AccessTy, MDL)) &&
isThreadLocalObject(Object, CurLoop, DT, TTI))
StoreSafety = StoreSafe;
}
@@ -2511,7 +2496,7 @@ static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
// handle both offsets being non-negative.
const DataLayout &DL = GEP->getModule()->getDataLayout();
auto NonNegative = [&](Value *V) {
- return isKnownNonNegative(V, DL, 0, AC, GEP, DT);
+ return isKnownNonNegative(V, SimplifyQuery(DL, DT, AC, GEP));
};
bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
all_of(Src->indices(), NonNegative) &&
@@ -2561,8 +2546,9 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
// we want to avoid this.
auto &DL = L.getHeader()->getModule()->getDataLayout();
bool ProvedNoOverflowAfterReassociate =
- computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp,
- DT) == llvm::OverflowResult::NeverOverflows;
+ computeOverflowForSignedSub(InvariantRHS, InvariantOp,
+ SimplifyQuery(DL, DT, AC, &ICmp)) ==
+ llvm::OverflowResult::NeverOverflows;
if (!ProvedNoOverflowAfterReassociate)
return false;
auto *Preheader = L.getLoopPreheader();
@@ -2612,15 +2598,16 @@ static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
// we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
// "C1 - C2" does not overflow.
auto &DL = L.getHeader()->getModule()->getDataLayout();
+ SimplifyQuery SQ(DL, DT, AC, &ICmp);
if (VariantSubtracted) {
// C1 - LV < C2 --> LV > C1 - C2
- if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, &ICmp,
- DT) != llvm::OverflowResult::NeverOverflows)
+ if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, SQ) !=
+ llvm::OverflowResult::NeverOverflows)
return false;
} else {
// LV - C1 < C2 --> LV < C1 + C2
- if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, &ICmp,
- DT) != llvm::OverflowResult::NeverOverflows)
+ if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, SQ) !=
+ llvm::OverflowResult::NeverOverflows)
return false;
}
auto *Preheader = L.getLoopPreheader();
@@ -2674,6 +2661,72 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
return false;
}
+/// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where
+/// A1, A2, ... and C are loop invariants into expressions like
+/// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...
+/// invariant expressions. This functions returns true only if any hoisting has
+/// actually occured.
+static bool hoistFPAssociation(Instruction &I, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT) {
+ using namespace PatternMatch;
+ Value *VariantOp = nullptr, *InvariantOp = nullptr;
+
+ if (!match(&I, m_FMul(m_Value(VariantOp), m_Value(InvariantOp))) ||
+ !I.hasAllowReassoc() || !I.hasNoSignedZeros())
+ return false;
+ if (L.isLoopInvariant(VariantOp))
+ std::swap(VariantOp, InvariantOp);
+ if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
+ return false;
+ Value *Factor = InvariantOp;
+
+ // First, we need to make sure we should do the transformation.
+ SmallVector<Use *> Changes;
+ SmallVector<BinaryOperator *> Worklist;
+ if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
+ Worklist.push_back(VariantBinOp);
+ while (!Worklist.empty()) {
+ BinaryOperator *BO = Worklist.pop_back_val();
+ if (!BO->hasOneUse() || !BO->hasAllowReassoc() || !BO->hasNoSignedZeros())
+ return false;
+ BinaryOperator *Op0, *Op1;
+ if (match(BO, m_FAdd(m_BinOp(Op0), m_BinOp(Op1)))) {
+ Worklist.push_back(Op0);
+ Worklist.push_back(Op1);
+ continue;
+ }
+ if (BO->getOpcode() != Instruction::FMul || L.isLoopInvariant(BO))
+ return false;
+ Use &U0 = BO->getOperandUse(0);
+ Use &U1 = BO->getOperandUse(1);
+ if (L.isLoopInvariant(U0))
+ Changes.push_back(&U0);
+ else if (L.isLoopInvariant(U1))
+ Changes.push_back(&U1);
+ else
+ return false;
+ if (Changes.size() > FPAssociationUpperLimit)
+ return false;
+ }
+ if (Changes.empty())
+ return false;
+
+ // We know we should do it so let's do the transformation.
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
+ IRBuilder<> Builder(Preheader->getTerminator());
+ for (auto *U : Changes) {
+ assert(L.isLoopInvariant(U->get()));
+ Instruction *Ins = cast<Instruction>(U->getUser());
+ U->set(Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul"));
+ }
+ I.replaceAllUsesWith(VariantOp);
+ eraseInstruction(I, SafetyInfo, MSSAU);
+ return true;
+}
+
static bool hoistArithmetics(Instruction &I, Loop &L,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2701,6 +2754,12 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
return true;
}
+ if (hoistFPAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
+ ++NumHoisted;
+ ++NumFPAssociationsHoisted;
+ return true;
+ }
+
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
index 9ae55b9018da..3d3f22d686e3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
@@ -20,7 +20,8 @@ PreservedAnalyses LoopAccessInfoPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &LAIs = AM.getResult<LoopAccessAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
- OS << "Loop access info in function '" << F.getName() << "':\n";
+ OS << "Printing analysis 'Loop Access Analysis' for function '" << F.getName()
+ << "':\n";
SmallPriorityWorklist<Loop *, 4> Worklist;
appendLoopsToWorklist(LI, Worklist);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 2b9800f11912..9a27a08c86eb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -430,7 +430,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
ExitingCond.BI->setSuccessor(1, PostLoopPreHeader);
// Update phi node in exit block of post-loop.
- Builder.SetInsertPoint(&PostLoopPreHeader->front());
+ Builder.SetInsertPoint(PostLoopPreHeader, PostLoopPreHeader->begin());
for (PHINode &PN : PostLoop->getExitBlock()->phis()) {
for (auto i : seq<int>(0, PN.getNumOperands())) {
// Check incoming block is pre-loop's exiting block.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 7c2770979a90..cc1f56014eee 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -399,7 +399,7 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
continue;
unsigned PtrAddrSpace = NextLSCEV->getType()->getPointerAddressSpace();
- Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);
+ Type *I8Ptr = PointerType::get(BB->getContext(), PtrAddrSpace);
Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
IRBuilder<> Builder(P.InsertPt);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 27196e46ca56..626888c74bad 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -104,9 +104,9 @@ static cl::opt<unsigned> DistributeSCEVCheckThreshold(
static cl::opt<unsigned> PragmaDistributeSCEVCheckThreshold(
"loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
cl::Hidden,
- cl::desc(
- "The maximum number of SCEV checks allowed for Loop "
- "Distribution for loop marked with #pragma loop distribute(enable)"));
+ cl::desc("The maximum number of SCEV checks allowed for Loop "
+ "Distribution for loop marked with #pragma clang loop "
+ "distribute(enable)"));
static cl::opt<bool> EnableLoopDistribute(
"enable-loop-distribute", cl::Hidden,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index edc8a4956dd1..b1add3c42976 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -641,8 +641,9 @@ static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
// Check if the multiply could not overflow due to known ranges of the
// input values.
OverflowResult OR = computeOverflowForUnsignedMul(
- FI.InnerTripCount, FI.OuterTripCount, DL, AC,
- FI.OuterLoop->getLoopPreheader()->getTerminator(), DT);
+ FI.InnerTripCount, FI.OuterTripCount,
+ SimplifyQuery(DL, DT, AC,
+ FI.OuterLoop->getLoopPreheader()->getTerminator()));
if (OR != OverflowResult::MayOverflow)
return OR;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index d35b562be0aa..e0b224d5ef73 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -1411,7 +1411,7 @@ private:
}
// Walk through all uses in FC1. For each use, find the reaching def. If the
- // def is located in FC0 then it is is not safe to fuse.
+ // def is located in FC0 then it is not safe to fuse.
for (BasicBlock *BB : FC1.L->blocks())
for (Instruction &I : *BB)
for (auto &Op : I.operands())
@@ -1473,12 +1473,13 @@ private:
for (Instruction *I : HoistInsts) {
assert(I->getParent() == FC1.Preheader);
- I->moveBefore(FC0.Preheader->getTerminator());
+ I->moveBefore(*FC0.Preheader,
+ FC0.Preheader->getTerminator()->getIterator());
}
// insert instructions in reverse order to maintain dominance relationship
for (Instruction *I : reverse(SinkInsts)) {
assert(I->getParent() == FC1.Preheader);
- I->moveBefore(&*FC1.ExitBlock->getFirstInsertionPt());
+ I->moveBefore(*FC1.ExitBlock, FC1.ExitBlock->getFirstInsertionPt());
}
}
@@ -1491,7 +1492,7 @@ private:
/// 2. The successors of the guard have the same flow into/around the loop.
/// If the compare instructions are identical, then the first successor of the
/// guard must go to the same place (either the preheader of the loop or the
- /// NonLoopBlock). In other words, the the first successor of both loops must
+ /// NonLoopBlock). In other words, the first successor of both loops must
/// both go into the loop (i.e., the preheader) or go around the loop (i.e.,
/// the NonLoopBlock). The same must be true for the second successor.
bool haveIdenticalGuards(const FusionCandidate &FC0,
@@ -1624,7 +1625,7 @@ private:
// first, or undef otherwise. This is sound as exiting the first implies the
// second will exit too, __without__ taking the back-edge. [Their
// trip-counts are equal after all.
- // KB: Would this sequence be simpler to just just make FC0.ExitingBlock go
+ // KB: Would this sequence be simpler to just make FC0.ExitingBlock go
// to FC1.Header? I think this is basically what the three sequences are
// trying to accomplish; however, doing this directly in the CFG may mean
// the DT/PDT becomes invalid
@@ -1671,7 +1672,7 @@ private:
// exiting the first and jumping to the header of the second does not break
// the SSA property of the phis originally in the first loop. See also the
// comment above.
- Instruction *L1HeaderIP = &FC1.Header->front();
+ BasicBlock::iterator L1HeaderIP = FC1.Header->begin();
for (PHINode *LCPHI : OriginalFC0PHIs) {
int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
assert(L1LatchBBIdx >= 0 &&
@@ -1679,8 +1680,9 @@ private:
Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
- PHINode *L1HeaderPHI = PHINode::Create(
- LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
+ PHINode *L1HeaderPHI =
+ PHINode::Create(LCV->getType(), 2, LCPHI->getName() + ".afterFC0");
+ L1HeaderPHI->insertBefore(L1HeaderIP);
L1HeaderPHI->addIncoming(LCV, FC0.Latch);
L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
FC0.ExitingBlock);
@@ -1953,7 +1955,7 @@ private:
// exiting the first and jumping to the header of the second does not break
// the SSA property of the phis originally in the first loop. See also the
// comment above.
- Instruction *L1HeaderIP = &FC1.Header->front();
+ BasicBlock::iterator L1HeaderIP = FC1.Header->begin();
for (PHINode *LCPHI : OriginalFC0PHIs) {
int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
assert(L1LatchBBIdx >= 0 &&
@@ -1961,8 +1963,9 @@ private:
Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
- PHINode *L1HeaderPHI = PHINode::Create(
- LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
+ PHINode *L1HeaderPHI =
+ PHINode::Create(LCV->getType(), 2, LCPHI->getName() + ".afterFC0");
+ L1HeaderPHI->insertBefore(L1HeaderIP);
L1HeaderPHI->addIncoming(LCV, FC0.Latch);
L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
FC0.ExitingBlock);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 8572a442e784..3721564890dd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -24,12 +24,6 @@
// memcmp, strlen, etc.
// Future floating point idioms to recognize in -ffast-math mode:
// fpowi
-// Future integer operation idioms to recognize:
-// ctpop
-//
-// Beware that isel's default lowering for ctpop is highly inefficient for
-// i64 and larger types when i64 is legal and the value has few bits set. It
-// would be good to enhance isel to emit a loop for ctpop in this case.
//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
@@ -948,9 +942,13 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
- if (BECst && ConstSize)
- AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
- ConstSize->getValue()->getZExtValue());
+ if (BECst && ConstSize) {
+ std::optional<uint64_t> BEInt = BECst->getAPInt().tryZExtValue();
+ std::optional<uint64_t> SizeInt = ConstSize->getAPInt().tryZExtValue();
+ // FIXME: Should this check for overflow?
+ if (BEInt && SizeInt)
+ AccessSize = LocationSize::precise((*BEInt + 1) * *SizeInt);
+ }
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -1023,7 +1021,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
SCEVExpander Expander(*SE, *DL, "loop-idiom");
SCEVExpanderCleaner ExpCleaner(Expander);
- Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+ Type *DestInt8PtrTy = Builder.getPtrTy(DestAS);
Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
bool Changed = false;
@@ -1107,7 +1105,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
GV->setAlignment(Align(16));
- Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
+ Value *PatternPtr = GV;
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
// Set the TBAA info if present.
@@ -1284,7 +1282,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// feeds the stores. Check for an alias by generating the base address and
// checking everything.
Value *StoreBasePtr = Expander.expandCodeFor(
- StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
+ StrStart, Builder.getPtrTy(StrAS), Preheader->getTerminator());
// From here on out, conservatively report to the pass manager that we've
// changed the IR, even if we later clean up these added instructions. There
@@ -1336,8 +1334,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
- Value *LoadBasePtr = Expander.expandCodeFor(
- LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
+ Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS),
+ Preheader->getTerminator());
// If the store is a memcpy instruction, we must check if it will write to
// the load memory locations. So remove it from the ignored stores.
@@ -2026,7 +2024,8 @@ void LoopIdiomRecognize::transformLoopToCountable(
auto *LbBr = cast<BranchInst>(Body->getTerminator());
ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
- PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi", &Body->front());
+ PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi");
+ TcPhi->insertBefore(Body->begin());
Builder.SetInsertPoint(LbCond);
Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
@@ -2132,7 +2131,8 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
Type *Ty = TripCnt->getType();
- PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
+ PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi");
+ TcPhi->insertBefore(Body->begin());
Builder.SetInsertPoint(LbCond);
Instruction *TcDec = cast<Instruction>(
@@ -2411,7 +2411,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// it's use count.
Instruction *InsertPt = nullptr;
if (auto *BitPosI = dyn_cast<Instruction>(BitPos))
- InsertPt = BitPosI->getInsertionPointAfterDef();
+ InsertPt = &**BitPosI->getInsertionPointAfterDef();
else
InsertPt = &*DT->getRoot()->getFirstNonPHIOrDbgOrAlloca();
if (!InsertPt)
@@ -2493,7 +2493,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// Step 4: Rewrite the loop into a countable form, with canonical IV.
// The new canonical induction variable.
- Builder.SetInsertPoint(&LoopHeaderBB->front());
+ Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
// The induction itself.
@@ -2817,11 +2817,11 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// Step 3: Rewrite the loop into a countable form, with canonical IV.
// The new canonical induction variable.
- Builder.SetInsertPoint(&LoopHeaderBB->front());
+ Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
// The induction itself.
- Builder.SetInsertPoint(LoopHeaderBB->getFirstNonPHI());
+ Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->getFirstNonPHIIt());
auto *CIVNext =
Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
/*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index c9798a80978d..cfe069d00bce 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -29,8 +29,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -172,46 +170,6 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
return Changed;
}
-namespace {
-
-class LoopInstSimplifyLegacyPass : public LoopPass {
-public:
- static char ID; // Pass ID, replacement for typeid
-
- LoopInstSimplifyLegacyPass() : LoopPass(ID) {
- initializeLoopInstSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- AssumptionCache &AC =
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *L->getHeader()->getParent());
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
- *L->getHeader()->getParent());
- MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MemorySSAUpdater MSSAU(MSSA);
-
- return simplifyLoopInst(*L, DT, LI, AC, TLI, &MSSAU);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.setPreservesCFG();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
-};
-
-} // end anonymous namespace
-
PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
@@ -231,18 +189,3 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<MemorySSAAnalysis>();
return PA;
}
-
-char LoopInstSimplifyLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(LoopInstSimplifyLegacyPass, "loop-instsimplify",
- "Simplify instructions in loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(LoopInstSimplifyLegacyPass, "loop-instsimplify",
- "Simplify instructions in loops", false, false)
-
-Pass *llvm::createLoopInstSimplifyPass() {
- return new LoopInstSimplifyLegacyPass();
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 91286ebcea33..277f530ee25f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1374,7 +1374,7 @@ bool LoopInterchangeTransform::transform() {
for (Instruction &I :
make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
std::prev(InnerLoopPreHeader->end()))))
- I.moveBefore(OuterLoopHeader->getTerminator());
+ I.moveBeforePreserving(OuterLoopHeader->getTerminator());
}
Transformed |= adjustLoopLinks();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 179ccde8d035..5ec387300aac 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -195,7 +195,8 @@ public:
Instruction *Source = Dep.getSource(LAI);
Instruction *Destination = Dep.getDestination(LAI);
- if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
+ if (Dep.Type == MemoryDepChecker::Dependence::Unknown ||
+ Dep.Type == MemoryDepChecker::Dependence::IndirectUnsafe) {
if (isa<LoadInst>(Source))
LoadsWithUnknownDepedence.insert(Source);
if (isa<LoadInst>(Destination))
@@ -443,8 +444,8 @@ public:
Cand.Load->getType(), InitialPtr, "load_initial",
/* isVolatile */ false, Cand.Load->getAlign(), PH->getTerminator());
- PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
- &L->getHeader()->front());
+ PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded");
+ PHI->insertBefore(L->getHeader()->begin());
PHI->addIncoming(Initial, PH);
Type *LoadType = Initial->getType();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 2c8a3351281b..a4f2dbf9a582 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -269,11 +269,12 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
PI.pushBeforeNonSkippedPassCallback([&LAR, &LI](StringRef PassID, Any IR) {
if (isSpecialPass(PassID, {"PassManager"}))
return;
- assert(any_cast<const Loop *>(&IR) || any_cast<const LoopNest *>(&IR));
- const Loop **LPtr = any_cast<const Loop *>(&IR);
+ assert(llvm::any_cast<const Loop *>(&IR) ||
+ llvm::any_cast<const LoopNest *>(&IR));
+ const Loop **LPtr = llvm::any_cast<const Loop *>(&IR);
const Loop *L = LPtr ? *LPtr : nullptr;
if (!L)
- L = &any_cast<const LoopNest *>(IR)->getOutermostLoop();
+ L = &llvm::any_cast<const LoopNest *>(IR)->getOutermostLoop();
assert(L && "Loop should be valid for printing");
// Verify the loop structure and LCSSA form before visiting the loop.
@@ -312,7 +313,8 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
if (LAR.MSSA && !PassPA.getChecker<MemorySSAAnalysis>().preserved())
report_fatal_error("Loop pass manager using MemorySSA contains a pass "
- "that does not preserve MemorySSA");
+ "that does not preserve MemorySSA",
+ /*gen_crash_diag*/ false);
#ifndef NDEBUG
// LoopAnalysisResults should always be valid.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 12852ae5c460..027dbb9c0f71 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -282,7 +282,7 @@ class LoopPredication {
Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);
/// Same as above, *except* that this uses the SCEV definition of invariant
/// which is that an expression *can be made* invariant via SCEVExpander.
- /// Thus, this version is only suitable for finding an insert point to be be
+ /// Thus, this version is only suitable for finding an insert point to be
/// passed to SCEVExpander!
Instruction *findInsertPt(const SCEVExpander &Expander, Instruction *User,
ArrayRef<const SCEV *> Ops);
@@ -307,8 +307,9 @@ class LoopPredication {
widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck, LoopICmp RangeCheck,
SCEVExpander &Expander,
Instruction *Guard);
- unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition,
- SCEVExpander &Expander, Instruction *Guard);
+ void widenChecks(SmallVectorImpl<Value *> &Checks,
+ SmallVectorImpl<Value *> &WidenedChecks,
+ SCEVExpander &Expander, Instruction *Guard);
bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);
// If the loop always exits through another block in the loop, we should not
@@ -326,49 +327,8 @@ public:
bool runOnLoop(Loop *L);
};
-class LoopPredicationLegacyPass : public LoopPass {
-public:
- static char ID;
- LoopPredicationLegacyPass() : LoopPass(ID) {
- initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
- getLoopAnalysisUsage(AU);
- AU.addPreserved<MemorySSAWrapperPass>();
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- if (MSSAWP)
- MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
- auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- LoopPredication LP(AA, DT, SE, LI, MSSAU ? MSSAU.get() : nullptr);
- return LP.runOnLoop(L);
- }
-};
-
-char LoopPredicationLegacyPass::ID = 0;
} // end namespace
-INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
- "Loop predication", false, false)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
- "Loop predication", false, false)
-
-Pass *llvm::createLoopPredicationPass() {
- return new LoopPredicationLegacyPass();
-}
-
PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
@@ -754,58 +714,15 @@ LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
}
}
-unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
- Value *Condition,
- SCEVExpander &Expander,
- Instruction *Guard) {
- unsigned NumWidened = 0;
- // The guard condition is expected to be in form of:
- // cond1 && cond2 && cond3 ...
- // Iterate over subconditions looking for icmp conditions which can be
- // widened across loop iterations. Widening these conditions remember the
- // resulting list of subconditions in Checks vector.
- SmallVector<Value *, 4> Worklist(1, Condition);
- SmallPtrSet<Value *, 4> Visited;
- Visited.insert(Condition);
- Value *WideableCond = nullptr;
- do {
- Value *Condition = Worklist.pop_back_val();
- Value *LHS, *RHS;
- using namespace llvm::PatternMatch;
- if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) {
- if (Visited.insert(LHS).second)
- Worklist.push_back(LHS);
- if (Visited.insert(RHS).second)
- Worklist.push_back(RHS);
- continue;
- }
-
- if (match(Condition,
- m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
- // Pick any, we don't care which
- WideableCond = Condition;
- continue;
- }
-
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
- if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
- Guard)) {
- Checks.push_back(*NewRangeCheck);
- NumWidened++;
- continue;
+void LoopPredication::widenChecks(SmallVectorImpl<Value *> &Checks,
+ SmallVectorImpl<Value *> &WidenedChecks,
+ SCEVExpander &Expander, Instruction *Guard) {
+ for (auto &Check : Checks)
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Check))
+ if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Guard)) {
+ WidenedChecks.push_back(Check);
+ Check = *NewRangeCheck;
}
- }
-
- // Save the condition as is if we can't widen it
- Checks.push_back(Condition);
- } while (!Worklist.empty());
- // At the moment, our matching logic for wideable conditions implicitly
- // assumes we preserve the form: (br (and Cond, WC())). FIXME
- // Note that if there were multiple calls to wideable condition in the
- // traversal, we only need to keep one, and which one is arbitrary.
- if (WideableCond)
- Checks.push_back(WideableCond);
- return NumWidened;
}
bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
@@ -815,12 +732,13 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
TotalConsidered++;
SmallVector<Value *, 4> Checks;
- unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander,
- Guard);
- if (NumWidened == 0)
+ SmallVector<Value *> WidenedChecks;
+ parseWidenableGuard(Guard, Checks);
+ widenChecks(Checks, WidenedChecks, Expander, Guard);
+ if (WidenedChecks.empty())
return false;
- TotalWidened += NumWidened;
+ TotalWidened += WidenedChecks.size();
// Emit the new guard condition
IRBuilder<> Builder(findInsertPt(Guard, Checks));
@@ -833,7 +751,7 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
}
RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
- LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+ LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n");
return true;
}
@@ -843,20 +761,19 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
LLVM_DEBUG(dbgs() << "Processing guard:\n");
LLVM_DEBUG(BI->dump());
- Value *Cond, *WC;
- BasicBlock *IfTrueBB, *IfFalseBB;
- bool Parsed = parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB);
- assert(Parsed && "Must be able to parse widenable branch");
- (void)Parsed;
-
TotalConsidered++;
SmallVector<Value *, 4> Checks;
- unsigned NumWidened = collectChecks(Checks, BI->getCondition(),
- Expander, BI);
- if (NumWidened == 0)
+ SmallVector<Value *> WidenedChecks;
+ parseWidenableGuard(BI, Checks);
+ // At the moment, our matching logic for wideable conditions implicitly
+ // assumes we preserve the form: (br (and Cond, WC())). FIXME
+ auto WC = extractWidenableCondition(BI);
+ Checks.push_back(WC);
+ widenChecks(Checks, WidenedChecks, Expander, BI);
+ if (WidenedChecks.empty())
return false;
- TotalWidened += NumWidened;
+ TotalWidened += WidenedChecks.size();
// Emit the new guard condition
IRBuilder<> Builder(findInsertPt(BI, Checks));
@@ -864,17 +781,18 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
auto *OldCond = BI->getCondition();
BI->setCondition(AllChecks);
if (InsertAssumesOfPredicatedGuardsConditions) {
+ BasicBlock *IfTrueBB = BI->getSuccessor(0);
Builder.SetInsertPoint(IfTrueBB, IfTrueBB->getFirstInsertionPt());
// If this block has other predecessors, we might not be able to use Cond.
// In this case, create a Phi where every other input is `true` and input
// from guard block is Cond.
- Value *AssumeCond = Cond;
+ Value *AssumeCond = Builder.CreateAnd(WidenedChecks);
if (!IfTrueBB->getUniquePredecessor()) {
auto *GuardBB = BI->getParent();
- auto *PN = Builder.CreatePHI(Cond->getType(), pred_size(IfTrueBB),
+ auto *PN = Builder.CreatePHI(AssumeCond->getType(), pred_size(IfTrueBB),
"assume.cond");
for (auto *Pred : predecessors(IfTrueBB))
- PN->addIncoming(Pred == GuardBB ? Cond : Builder.getTrue(), Pred);
+ PN->addIncoming(Pred == GuardBB ? AssumeCond : Builder.getTrue(), Pred);
AssumeCond = PN;
}
Builder.CreateAssumption(AssumeCond);
@@ -883,7 +801,7 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
assert(isGuardAsWidenableBranch(BI) &&
"Stopped being a guard after transform?");
- LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+ LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n");
return true;
}
@@ -1008,6 +926,9 @@ bool LoopPredication::isLoopProfitableToPredicate() {
Numerator += Weight;
Denominator += Weight;
}
+ // If all weights are zero act as if there was no profile data
+ if (Denominator == 0)
+ return BranchProbability::getBranchProbability(1, NumSucc);
return BranchProbability::getBranchProbability(Numerator, Denominator);
} else {
assert(LatchBlock != ExitingBlock &&
@@ -1070,13 +991,9 @@ static BranchInst *FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI) {
} while (true);
if (BasicBlock *Pred = BB->getSinglePredecessor()) {
- auto *Term = Pred->getTerminator();
-
- Value *Cond, *WC;
- BasicBlock *IfTrueBB, *IfFalseBB;
- if (parseWidenableBranch(Term, Cond, WC, IfTrueBB, IfFalseBB) &&
- IfTrueBB == BB)
- return cast<BranchInst>(Term);
+ if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
+ if (BI->getSuccessor(0) == BB && isWidenableBranch(BI))
+ return BI;
}
return nullptr;
}
@@ -1164,13 +1081,13 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (!BI)
continue;
- Use *Cond, *WC;
- BasicBlock *IfTrueBB, *IfFalseBB;
- if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
- L->contains(IfTrueBB)) {
- WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
- ChangedLoop = true;
- }
+ if (auto WC = extractWidenableCondition(BI))
+ if (L->contains(BI->getSuccessor(0))) {
+ assert(WC->hasOneUse() && "Not appropriate widenable branch!");
+ WC->user_back()->replaceUsesOfWith(
+ WC, ConstantInt::getTrue(BI->getContext()));
+ ChangedLoop = true;
+ }
}
if (ChangedLoop)
SE->forgetLoop(L);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 8d59fdff9236..028a487ecdbc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -20,13 +20,11 @@
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -734,52 +732,3 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<MemorySSAAnalysis>();
return PA;
}
-
-namespace {
-class LoopSimplifyCFGLegacyPass : public LoopPass {
-public:
- static char ID; // Pass ID, replacement for typeid
- LoopSimplifyCFGLegacyPass() : LoopPass(ID) {
- initializeLoopSimplifyCFGLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
-
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- std::optional<MemorySSAUpdater> MSSAU;
- if (MSSAA)
- MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
- if (MSSAA && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
- bool DeleteCurrentLoop = false;
- bool Changed = simplifyLoopCFG(*L, DT, LI, SE, MSSAU ? &*MSSAU : nullptr,
- DeleteCurrentLoop);
- if (DeleteCurrentLoop)
- LPM.markLoopAsDeleted(*L);
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<MemorySSAWrapperPass>();
- AU.addPreserved<DependenceAnalysisWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
-};
-} // end namespace
-
-char LoopSimplifyCFGLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSimplifyCFGLegacyPass, "loop-simplifycfg",
- "Simplify loop CFG", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_END(LoopSimplifyCFGLegacyPass, "loop-simplifycfg",
- "Simplify loop CFG", false, false)
-
-Pass *llvm::createLoopSimplifyCFGPass() {
- return new LoopSimplifyCFGLegacyPass();
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 597c159682c5..6eedf95e7575 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -36,13 +36,11 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
@@ -79,7 +77,7 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
/// AdjustedFreq(BBs) = 99 / SinkFrequencyPercentThreshold%
static BlockFrequency adjustedSumFreq(SmallPtrSetImpl<BasicBlock *> &BBs,
BlockFrequencyInfo &BFI) {
- BlockFrequency T = 0;
+ BlockFrequency T(0);
for (BasicBlock *B : BBs)
T += BFI.getBlockFreq(B);
if (BBs.size() > 1)
@@ -222,9 +220,11 @@ static bool sinkInstruction(
// order. No need to stable sort as the block numbers are a total ordering.
SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
llvm::append_range(SortedBBsToSinkInto, BBsToSinkInto);
- llvm::sort(SortedBBsToSinkInto, [&](BasicBlock *A, BasicBlock *B) {
- return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second;
- });
+ if (SortedBBsToSinkInto.size() > 1) {
+ llvm::sort(SortedBBsToSinkInto, [&](BasicBlock *A, BasicBlock *B) {
+ return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second;
+ });
+ }
BasicBlock *MoveBB = *SortedBBsToSinkInto.begin();
// FIXME: Optimize the efficiency for cloned value replacement. The current
@@ -388,58 +388,3 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
return PA;
}
-
-namespace {
-struct LegacyLoopSinkPass : public LoopPass {
- static char ID;
- LegacyLoopSinkPass() : LoopPass(ID) {
- initializeLegacyLoopSinkPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
-
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader)
- return false;
-
- // Enable LoopSink only when runtime profile is available.
- // With static profile, the sinking decision may be sub-optimal.
- if (!Preheader->getParent()->hasProfileData())
- return false;
-
- AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
- auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- bool Changed = sinkLoopInvariantInstructions(
- *L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
- getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
- MSSA, SE ? &SE->getSE() : nullptr);
-
- if (VerifyMemorySSA)
- MSSA.verifyMemorySSA();
-
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- getLoopAnalysisUsage(AU);
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
-};
-}
-
-char LegacyLoopSinkPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false)
-
-Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a4369b83e732..39607464dd00 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -67,6 +67,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -188,8 +189,8 @@ static cl::opt<unsigned> SetupCostDepthLimit(
"lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
cl::desc("The limit on recursion depth for LSRs setup cost"));
-static cl::opt<bool> AllowTerminatingConditionFoldingAfterLSR(
- "lsr-term-fold", cl::Hidden, cl::init(false),
+static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR(
+ "lsr-term-fold", cl::Hidden,
cl::desc("Attempt to replace primary IV with other IV."));
static cl::opt<bool> AllowDropSolutionIfLessProfitable(
@@ -943,12 +944,6 @@ static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
}
}
- // All pointers have the same requirements, so canonicalize them to an
- // arbitrary pointer type to minimize variation.
- if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
- AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
- PTy->getAddressSpace());
-
return AccessTy;
}
@@ -2794,18 +2789,6 @@ static Value *getWideOperand(Value *Oper) {
return Oper;
}
-/// Return true if we allow an IV chain to include both types.
-static bool isCompatibleIVType(Value *LVal, Value *RVal) {
- Type *LType = LVal->getType();
- Type *RType = RVal->getType();
- return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() &&
- // Different address spaces means (possibly)
- // different types of the pointer implementation,
- // e.g. i16 vs i32 so disallow that.
- (LType->getPointerAddressSpace() ==
- RType->getPointerAddressSpace()));
-}
-
/// Return an approximation of this SCEV expression's "base", or NULL for any
/// constant. Returning the expression itself is conservative. Returning a
/// deeper subexpression is more precise and valid as long as it isn't less
@@ -2985,7 +2968,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
continue;
Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
- if (!isCompatibleIVType(PrevIV, NextIV))
+ if (PrevIV->getType() != NextIV->getType())
continue;
// A phi node terminates a chain.
@@ -3279,7 +3262,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain,
// do this if we also found a wide value for the head of the chain.
if (isa<PHINode>(Chain.tailUserInst())) {
for (PHINode &Phi : L->getHeader()->phis()) {
- if (!isCompatibleIVType(&Phi, IVSrc))
+ if (Phi.getType() != IVSrc->getType())
continue;
Instruction *PostIncV = dyn_cast<Instruction>(
Phi.getIncomingValueForBlock(L->getLoopLatch()));
@@ -3488,6 +3471,11 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
SmallPtrSet<const SCEV *, 32> Visited;
+ // Don't collect outside uses if we are favoring postinc - the instructions in
+ // the loop are more important than the ones outside of it.
+ if (AMK == TTI::AMK_PostIndexed)
+ return;
+
while (!Worklist.empty()) {
const SCEV *S = Worklist.pop_back_val();
@@ -5559,10 +5547,12 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
-(uint64_t)Offset);
- if (C->getType() != OpTy)
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- OpTy, false),
- C, OpTy);
+ if (C->getType() != OpTy) {
+ C = ConstantFoldCastOperand(
+ CastInst::getCastOpcode(C, false, OpTy, false), C, OpTy,
+ CI->getModule()->getDataLayout());
+ assert(C && "Cast of ConstantInt should have folded");
+ }
CI->setOperand(1, C);
}
@@ -5610,7 +5600,8 @@ void LSRInstance::RewriteForPHI(
.setKeepOneInputPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DTU, &LI);
NewBB = NewBBs[0];
}
// If NewBB==NULL, then SplitCriticalEdge refused to split because all
@@ -6949,7 +6940,19 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
}
}
- if (AllowTerminatingConditionFoldingAfterLSR) {
+ const bool EnableFormTerm = [&] {
+ switch (AllowTerminatingConditionFoldingAfterLSR) {
+ case cl::BOU_TRUE:
+ return true;
+ case cl::BOU_FALSE:
+ return false;
+ case cl::BOU_UNSET:
+ return TTI.shouldFoldTerminatingConditionAfterLSR();
+ }
+ llvm_unreachable("Unhandled cl::boolOrDefault enum");
+ }();
+
+ if (EnableFormTerm) {
if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI)) {
auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 9c6e4ebf62a9..7b4c54370e48 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -111,7 +111,7 @@ static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
if (!S)
continue;
- if (S->getString().startswith(Prefix))
+ if (S->getString().starts_with(Prefix))
return true;
}
}
@@ -153,9 +153,11 @@ static bool computeUnrollAndJamCount(
LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE,
const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
- unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
- unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
+ unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE,
+ unsigned InnerTripCount, unsigned InnerLoopSize,
+ TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP) {
+ unsigned OuterLoopSize = OuterUCE.getRolledLoopSize();
// First up use computeUnrollCount from the loop unroller to get a count
// for unrolling the outer loop, plus any loops requiring explicit
// unrolling we leave to the unroller. This uses UP.Threshold /
@@ -165,7 +167,7 @@ static bool computeUnrollAndJamCount(
bool UseUpperBound = false;
bool ExplicitUnroll = computeUnrollCount(
L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
- /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
+ /*MaxOrZero*/ false, OuterTripMultiple, OuterUCE, UP, PP,
UseUpperBound);
if (ExplicitUnroll || UseUpperBound) {
// If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
@@ -318,39 +320,28 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
}
// Approximate the loop size and collect useful info
- unsigned NumInlineCandidates;
- bool NotDuplicatable;
- bool Convergent;
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
Loop *SubLoop = L->getSubLoops()[0];
- InstructionCost InnerLoopSizeIC =
- ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
- Convergent, TTI, EphValues, UP.BEInsns);
- InstructionCost OuterLoopSizeIC =
- ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
- TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSizeIC << "\n");
- LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSizeIC << "\n");
-
- if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) {
+ UnrollCostEstimator InnerUCE(SubLoop, TTI, EphValues, UP.BEInsns);
+ UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns);
+
+ if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) {
LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
- << " with invalid cost.\n");
+ << " which cannot be duplicated or have invalid cost.\n");
return LoopUnrollResult::Unmodified;
}
- unsigned InnerLoopSize = *InnerLoopSizeIC.getValue();
- unsigned OuterLoopSize = *OuterLoopSizeIC.getValue();
- if (NotDuplicatable) {
- LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
- "instructions.\n");
- return LoopUnrollResult::Unmodified;
- }
- if (NumInlineCandidates != 0) {
+ unsigned InnerLoopSize = InnerUCE.getRolledLoopSize();
+ LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterUCE.getRolledLoopSize()
+ << "\n");
+ LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
+
+ if (InnerUCE.NumInlineCandidates != 0 || OuterUCE.NumInlineCandidates != 0) {
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
}
- if (Convergent) {
+ if (InnerUCE.Convergent || OuterUCE.Convergent) {
LLVM_DEBUG(
dbgs() << " Not unrolling loop with convergent instructions.\n");
return LoopUnrollResult::Unmodified;
@@ -379,7 +370,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// Decide if, and by how much, to unroll
bool IsCountSetExplicitly = computeUnrollAndJamCount(
L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,
- OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
+ OuterTripMultiple, OuterUCE, InnerTripCount, InnerLoopSize, UP, PP);
if (UP.Count <= 1)
return LoopUnrollResult::Unmodified;
// Unroll factor (Count) must be less or equal to TripCount.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 335b489d3cb2..f14541a1a037 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -662,19 +662,16 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
unsigned(*RolledDynamicCost.getValue())}};
}
-/// ApproximateLoopSize - Approximate the size of the loop.
-InstructionCost llvm::ApproximateLoopSize(
- const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
- const TargetTransformInfo &TTI,
+UnrollCostEstimator::UnrollCostEstimator(
+ const Loop *L, const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
CodeMetrics Metrics;
for (BasicBlock *BB : L->blocks())
Metrics.analyzeBasicBlock(BB, TTI, EphValues);
- NumCalls = Metrics.NumInlineCandidates;
+ NumInlineCandidates = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
Convergent = Metrics.convergent;
-
- InstructionCost LoopSize = Metrics.NumInsts;
+ LoopSize = Metrics.NumInsts;
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
@@ -685,8 +682,17 @@ InstructionCost llvm::ApproximateLoopSize(
if (LoopSize.isValid() && LoopSize < BEInsns + 1)
// This is an open coded max() on InstructionCost
LoopSize = BEInsns + 1;
+}
- return LoopSize;
+uint64_t UnrollCostEstimator::getUnrolledLoopSize(
+ const TargetTransformInfo::UnrollingPreferences &UP,
+ unsigned CountOverwrite) const {
+ unsigned LS = *LoopSize.getValue();
+ assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
+ if (CountOverwrite)
+ return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns;
+ else
+ return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns;
}
// Returns the loop hint metadata node with the given name (for example,
@@ -746,36 +752,10 @@ static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
return MaxPercentThresholdBoost;
}
-// Produce an estimate of the unrolled cost of the specified loop. This
-// is used to a) produce a cost estimate for partial unrolling and b) to
-// cheaply estimate cost for full unrolling when we don't want to symbolically
-// evaluate all iterations.
-class UnrollCostEstimator {
- const unsigned LoopSize;
-
-public:
- UnrollCostEstimator(Loop &L, unsigned LoopSize) : LoopSize(LoopSize) {}
-
- // Returns loop size estimation for unrolled loop, given the unrolling
- // configuration specified by UP.
- uint64_t
- getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
- const unsigned CountOverwrite = 0) const {
- assert(LoopSize >= UP.BEInsns &&
- "LoopSize should not be less than BEInsns!");
- if (CountOverwrite)
- return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
- UP.BEInsns;
- else
- return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
- UP.BEInsns;
- }
-};
-
static std::optional<unsigned>
shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
const unsigned TripMultiple, const unsigned TripCount,
- const UnrollCostEstimator UCE,
+ unsigned MaxTripCount, const UnrollCostEstimator UCE,
const TargetTransformInfo::UnrollingPreferences &UP) {
// Using unroll pragma
@@ -796,6 +776,10 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
if (PInfo.PragmaFullUnroll && TripCount != 0)
return TripCount;
+ if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
+ MaxTripCount <= UnrollMaxUpperBound)
+ return MaxTripCount;
+
// if didn't return until here, should continue to other priorties
return std::nullopt;
}
@@ -888,14 +872,14 @@ shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
// refactored into it own function.
bool llvm::computeUnrollCount(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
- AssumptionCache *AC,
- ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
+ AssumptionCache *AC, ScalarEvolution &SE,
+ const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
- bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize,
+ bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE,
TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
- UnrollCostEstimator UCE(*L, LoopSize);
+ unsigned LoopSize = UCE.getRolledLoopSize();
const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
const bool PragmaFullUnroll = hasUnrollFullPragma(L);
@@ -922,7 +906,7 @@ bool llvm::computeUnrollCount(
// 1st priority is unroll count set by "unroll-count" option.
// 2nd priority is unroll count set by pragma.
if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
- UCE, UP)) {
+ MaxTripCount, UCE, UP)) {
UP.Count = *UnrollFactor;
if (UserUnrollCount || (PragmaCount > 0)) {
@@ -1177,9 +1161,6 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
return LoopUnrollResult::Unmodified;
bool OptForSize = L->getHeader()->getParent()->hasOptSize();
- unsigned NumInlineCandidates;
- bool NotDuplicatable;
- bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
@@ -1196,30 +1177,22 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
- InstructionCost LoopSizeIC =
- ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
- TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSizeIC << "\n");
-
- if (!LoopSizeIC.isValid()) {
+ UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
+ if (!UCE.canUnroll()) {
LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
- << " with invalid cost.\n");
+ << " which cannot be duplicated or have invalid cost.\n");
return LoopUnrollResult::Unmodified;
}
- unsigned LoopSize = *LoopSizeIC.getValue();
- if (NotDuplicatable) {
- LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
- << " instructions.\n");
- return LoopUnrollResult::Unmodified;
- }
+ unsigned LoopSize = UCE.getRolledLoopSize();
+ LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
// When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
// later), to (fully) unroll loops, if it does not increase code size.
if (OptForSize)
UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
- if (NumInlineCandidates != 0) {
+ if (UCE.NumInlineCandidates != 0) {
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
}
@@ -1261,7 +1234,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// Assuming n is the same on all threads, any kind of unrolling is
// safe. But currently llvm's notion of convergence isn't powerful
// enough to express this.
- if (Convergent)
+ if (UCE.Convergent)
UP.AllowRemainder = false;
// Try to find the trip count upper bound if we cannot find the exact trip
@@ -1277,8 +1250,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// fully unroll the loop.
bool UseUpperBound = false;
bool IsCountSetExplicitly = computeUnrollCount(
- L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
- TripMultiple, LoopSize, UP, PP, UseUpperBound);
+ L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,
+ MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 13e06c79d0d7..9d5e6693c0e5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -266,6 +266,11 @@ bool LoopVersioningLICM::legalLoopMemoryAccesses() {
for (const auto &A : AS) {
Value *Ptr = A.getValue();
// Alias tracker should have pointers of same data type.
+ //
+ // FIXME: check no longer effective since opaque pointers?
+ // If the intent is to check that the memory accesses use the
+ // same data type (such that LICM can promote them), then we
+ // can no longer see this from the pointer value types.
TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType()));
}
// At least one alias tracker should have pointers of same data type.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 454aa56be531..6f87e4d91d2c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -13,7 +13,6 @@
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -21,10 +20,8 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/MisExpect.h"
#include <cmath>
@@ -105,10 +102,7 @@ static bool handleSwitchExpect(SwitchInst &SI) {
misexpect::checkExpectAnnotations(SI, Weights, /*IsFrontend=*/true);
SI.setCondition(ArgValue);
-
- SI.setMetadata(LLVMContext::MD_prof,
- MDBuilder(CI->getContext()).createBranchWeights(Weights));
-
+ setBranchWeights(SI, Weights);
return true;
}
@@ -416,29 +410,3 @@ PreservedAnalyses LowerExpectIntrinsicPass::run(Function &F,
return PreservedAnalyses::all();
}
-
-namespace {
-/// Legacy pass for lowering expect intrinsics out of the IR.
-///
-/// When this pass is run over a function it uses expect intrinsics which feed
-/// branches and switches to provide branch weight metadata for those
-/// terminators. It then removes the expect intrinsics from the IR so the rest
-/// of the optimizer can ignore them.
-class LowerExpectIntrinsic : public FunctionPass {
-public:
- static char ID;
- LowerExpectIntrinsic() : FunctionPass(ID) {
- initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); }
-};
-} // namespace
-
-char LowerExpectIntrinsic::ID = 0;
-INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect",
- "Lower 'expect' Intrinsics", false, false)
-
-FunctionPass *llvm::createLowerExpectIntrinsicPass() {
- return new LowerExpectIntrinsic();
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 8dc037b10cc8..a59ecdda1746 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -20,25 +20,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
using namespace llvm;
-namespace {
-struct LowerGuardIntrinsicLegacyPass : public FunctionPass {
- static char ID;
- LowerGuardIntrinsicLegacyPass() : FunctionPass(ID) {
- initializeLowerGuardIntrinsicLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-};
-}
-
static bool lowerGuardIntrinsic(Function &F) {
// Check if we can cheaply rule out the possibility of not having any work to
// do.
@@ -71,19 +56,6 @@ static bool lowerGuardIntrinsic(Function &F) {
return true;
}
-bool LowerGuardIntrinsicLegacyPass::runOnFunction(Function &F) {
- return lowerGuardIntrinsic(F);
-}
-
-char LowerGuardIntrinsicLegacyPass::ID = 0;
-INITIALIZE_PASS(LowerGuardIntrinsicLegacyPass, "lower-guard-intrinsic",
- "Lower the guard intrinsic to normal control flow", false,
- false)
-
-Pass *llvm::createLowerGuardIntrinsicPass() {
- return new LowerGuardIntrinsicLegacyPass();
-}
-
PreservedAnalyses LowerGuardIntrinsicPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (lowerGuardIntrinsic(F))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index f46ea6a20afa..72b9db1e73d7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -19,6 +19,7 @@
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
@@ -36,12 +37,9 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MatrixBuilder.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/MatrixUtils.h"
@@ -180,7 +178,6 @@ Value *computeVectorAddr(Value *BasePtr, Value *VecIdx, Value *Stride,
assert((!isa<ConstantInt>(Stride) ||
cast<ConstantInt>(Stride)->getZExtValue() >= NumElements) &&
"Stride must be >= the number of elements in the result vector.");
- unsigned AS = cast<PointerType>(BasePtr->getType())->getAddressSpace();
// Compute the start of the vector with index VecIdx as VecIdx * Stride.
Value *VecStart = Builder.CreateMul(VecIdx, Stride, "vec.start");
@@ -192,11 +189,7 @@ Value *computeVectorAddr(Value *BasePtr, Value *VecIdx, Value *Stride,
else
VecStart = Builder.CreateGEP(EltType, BasePtr, VecStart, "vec.gep");
- // Cast elementwise vector start pointer to a pointer to a vector
- // (EltType x NumElements)*.
- auto *VecType = FixedVectorType::get(EltType, NumElements);
- Type *VecPtrType = PointerType::get(VecType, AS);
- return Builder.CreatePointerCast(VecStart, VecPtrType, "vec.cast");
+ return VecStart;
}
/// LowerMatrixIntrinsics contains the methods used to lower matrix intrinsics.
@@ -1063,13 +1056,6 @@ public:
return Changed;
}
- /// Turns \p BasePtr into an elementwise pointer to \p EltType.
- Value *createElementPtr(Value *BasePtr, Type *EltType, IRBuilder<> &Builder) {
- unsigned AS = cast<PointerType>(BasePtr->getType())->getAddressSpace();
- Type *EltPtrType = PointerType::get(EltType, AS);
- return Builder.CreatePointerCast(BasePtr, EltPtrType);
- }
-
/// Replace intrinsic calls
bool VisitCallInst(CallInst *Inst) {
if (!Inst->getCalledFunction() || !Inst->getCalledFunction()->isIntrinsic())
@@ -1121,7 +1107,7 @@ public:
auto *VType = cast<VectorType>(Ty);
Type *EltTy = VType->getElementType();
Type *VecTy = FixedVectorType::get(EltTy, Shape.getStride());
- Value *EltPtr = createElementPtr(Ptr, EltTy, Builder);
+ Value *EltPtr = Ptr;
MatrixTy Result;
for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) {
Value *GEP = computeVectorAddr(
@@ -1147,17 +1133,11 @@ public:
Value *Offset = Builder.CreateAdd(
Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I);
- unsigned AS = cast<PointerType>(MatrixPtr->getType())->getAddressSpace();
- Value *EltPtr =
- Builder.CreatePointerCast(MatrixPtr, PointerType::get(EltTy, AS));
- Value *TileStart = Builder.CreateGEP(EltTy, EltPtr, Offset);
+ Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset);
auto *TileTy = FixedVectorType::get(EltTy, ResultShape.NumRows *
ResultShape.NumColumns);
- Type *TilePtrTy = PointerType::get(TileTy, AS);
- Value *TilePtr =
- Builder.CreatePointerCast(TileStart, TilePtrTy, "col.cast");
- return loadMatrix(TileTy, TilePtr, Align,
+ return loadMatrix(TileTy, TileStart, Align,
Builder.getInt64(MatrixShape.getStride()), IsVolatile,
ResultShape, Builder);
}
@@ -1193,17 +1173,11 @@ public:
Value *Offset = Builder.CreateAdd(
Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I);
- unsigned AS = cast<PointerType>(MatrixPtr->getType())->getAddressSpace();
- Value *EltPtr =
- Builder.CreatePointerCast(MatrixPtr, PointerType::get(EltTy, AS));
- Value *TileStart = Builder.CreateGEP(EltTy, EltPtr, Offset);
+ Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset);
auto *TileTy = FixedVectorType::get(EltTy, StoreVal.getNumRows() *
StoreVal.getNumColumns());
- Type *TilePtrTy = PointerType::get(TileTy, AS);
- Value *TilePtr =
- Builder.CreatePointerCast(TileStart, TilePtrTy, "col.cast");
- storeMatrix(TileTy, StoreVal, TilePtr, MAlign,
+ storeMatrix(TileTy, StoreVal, TileStart, MAlign,
Builder.getInt64(MatrixShape.getStride()), IsVolatile, Builder);
}
@@ -1213,7 +1187,7 @@ public:
MaybeAlign MAlign, Value *Stride, bool IsVolatile,
IRBuilder<> &Builder) {
auto VType = cast<VectorType>(Ty);
- Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
+ Value *EltPtr = Ptr;
for (auto Vec : enumerate(StoreVal.vectors())) {
Value *GEP = computeVectorAddr(
EltPtr,
@@ -2180,7 +2154,7 @@ public:
/// Returns true if \p V is a matrix value in the given subprogram.
bool isMatrix(Value *V) const { return ExprsInSubprogram.count(V); }
- /// If \p V is a matrix value, print its shape as as NumRows x NumColumns to
+ /// If \p V is a matrix value, print its shape as NumRows x NumColumns to
/// \p SS.
void prettyPrintMatrixType(Value *V, raw_string_ostream &SS) {
auto M = Inst2Matrix.find(V);
@@ -2201,7 +2175,7 @@ public:
write("<no called fn>");
else {
StringRef Name = CI->getCalledFunction()->getName();
- if (!Name.startswith("llvm.matrix")) {
+ if (!Name.starts_with("llvm.matrix")) {
write(Name);
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
index e2de322933bc..3c977b816a05 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
@@ -19,24 +19,10 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
-namespace {
-struct LowerWidenableConditionLegacyPass : public FunctionPass {
- static char ID;
- LowerWidenableConditionLegacyPass() : FunctionPass(ID) {
- initializeLowerWidenableConditionLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-};
-}
-
static bool lowerWidenableCondition(Function &F) {
// Check if we can cheaply rule out the possibility of not having any work to
// do.
@@ -65,19 +51,6 @@ static bool lowerWidenableCondition(Function &F) {
return true;
}
-bool LowerWidenableConditionLegacyPass::runOnFunction(Function &F) {
- return lowerWidenableCondition(F);
-}
-
-char LowerWidenableConditionLegacyPass::ID = 0;
-INITIALIZE_PASS(LowerWidenableConditionLegacyPass, "lower-widenable-condition",
- "Lower the widenable condition to default true value", false,
- false)
-
-Pass *llvm::createLowerWidenableConditionPass() {
- return new LowerWidenableConditionLegacyPass();
-}
-
PreservedAnalyses LowerWidenableConditionPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (lowerWidenableCondition(F))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
index a3f09a5a33c3..78e474f925b5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -42,17 +42,6 @@
using namespace llvm;
-namespace {
-struct MakeGuardsExplicitLegacyPass : public FunctionPass {
- static char ID;
- MakeGuardsExplicitLegacyPass() : FunctionPass(ID) {
- initializeMakeGuardsExplicitLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-};
-}
-
static void turnToExplicitForm(CallInst *Guard, Function *DeoptIntrinsic) {
// Replace the guard with an explicit branch (just like in GuardWidening).
BasicBlock *OriginalBB = Guard->getParent();
@@ -89,15 +78,6 @@ static bool explicifyGuards(Function &F) {
return true;
}
-bool MakeGuardsExplicitLegacyPass::runOnFunction(Function &F) {
- return explicifyGuards(F);
-}
-
-char MakeGuardsExplicitLegacyPass::ID = 0;
-INITIALIZE_PASS(MakeGuardsExplicitLegacyPass, "make-guards-explicit",
- "Lower the guard intrinsic to explicit control flow form",
- false, false)
-
PreservedAnalyses MakeGuardsExplicitPass::run(Function &F,
FunctionAnalysisManager &) {
if (explicifyGuards(F))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a95d6adf36d6..9d058e0d2483 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -19,12 +19,15 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
@@ -66,9 +69,10 @@ static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
-STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
-STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
-STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
+STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
+STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
+STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
+STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
namespace {
@@ -367,21 +371,13 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// Keeps track of the last memory use or def before the insertion point for
// the new memset. The new MemoryDef for the inserted memsets will be inserted
- // after MemInsertPoint. It points to either LastMemDef or to the last user
- // before the insertion point of the memset, if there are any such users.
+ // after MemInsertPoint.
MemoryUseOrDef *MemInsertPoint = nullptr;
- // Keeps track of the last MemoryDef between StartInst and the insertion point
- // for the new memset. This will become the defining access of the inserted
- // memsets.
- MemoryDef *LastMemDef = nullptr;
for (++BI; !BI->isTerminator(); ++BI) {
auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
- if (CurrentAcc) {
+ if (CurrentAcc)
MemInsertPoint = CurrentAcc;
- if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
- LastMemDef = CurrentDef;
- }
// Calls that only access inaccessible memory do not block merging
// accessible stores.
@@ -485,16 +481,13 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
- assert(LastMemDef && MemInsertPoint &&
- "Both LastMemDef and MemInsertPoint need to be set");
auto *NewDef =
cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
? MSSAU->createMemoryAccessBefore(
- AMemSet, LastMemDef, MemInsertPoint)
+ AMemSet, nullptr, MemInsertPoint)
: MSSAU->createMemoryAccessAfter(
- AMemSet, LastMemDef, MemInsertPoint));
+ AMemSet, nullptr, MemInsertPoint));
MSSAU->insertDef(NewDef, /*RenameUses=*/true);
- LastMemDef = NewDef;
MemInsertPoint = NewDef;
// Zap all the stores.
@@ -703,7 +696,7 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
auto *LastDef =
cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(SI);
@@ -741,6 +734,23 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
return true;
}
+ // If this is a load-store pair from a stack slot to a stack slot, we
+ // might be able to perform the stack-move optimization just as we do for
+ // memcpys from an alloca to an alloca.
+ if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
+ if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
+ if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
+ DL.getTypeStoreSize(T), BAA)) {
+ // Avoid invalidating the iterator.
+ BBI = SI->getNextNonDebugInstruction()->getIterator();
+ eraseInstruction(SI);
+ eraseInstruction(LI);
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+
return false;
}
@@ -807,7 +817,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// store, so we do not need to rename uses.
auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
auto *NewAccess = MSSAU->createMemoryAccessBefore(
- M, StoreDef->getDefiningAccess(), StoreDef);
+ M, nullptr, StoreDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
eraseInstruction(SI);
@@ -870,8 +880,11 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
return false;
const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
- uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
- srcArraySize->getZExtValue();
+ TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
+ // We can't optimize scalable types.
+ if (SrcAllocaSize.isScalable())
+ return false;
+ uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
if (cpySize < srcSize)
return false;
@@ -915,10 +928,12 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
return false;
}
- // Check that accessing the first srcSize bytes of dest will not cause a
- // trap. Otherwise the transform is invalid since it might cause a trap
- // to occur earlier than it otherwise would.
- if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
+ // Check that storing to the first srcSize bytes of dest will not cause a
+ // trap or data race.
+ bool ExplicitlyDereferenceableOnly;
+ if (!isWritableObject(getUnderlyingObject(cpyDest),
+ ExplicitlyDereferenceableOnly) ||
+ !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
DL, C, AC, DT)) {
LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
return false;
@@ -1033,12 +1048,13 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
+ bool NeedMoveGEP = false;
if (!DT->dominates(cpyDest, C)) {
// Support moving a constant index GEP before the call.
auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
if (GEP && GEP->hasAllConstantIndices() &&
DT->dominates(GEP->getPointerOperand(), C))
- GEP->moveBefore(C);
+ NeedMoveGEP = true;
else
return false;
}
@@ -1057,29 +1073,19 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// We can't create address space casts here because we don't know if they're
// safe for the target.
- if (cpySrc->getType()->getPointerAddressSpace() !=
- cpyDest->getType()->getPointerAddressSpace())
+ if (cpySrc->getType() != cpyDest->getType())
return false;
for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
- cpySrc->getType()->getPointerAddressSpace() !=
- C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
+ cpySrc->getType() != C->getArgOperand(ArgI)->getType())
return false;
// All the checks have passed, so do the transformation.
bool changedArgument = false;
for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
- Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
- : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
- cpyDest->getName(), C);
changedArgument = true;
- if (C->getArgOperand(ArgI)->getType() == Dest->getType())
- C->setArgOperand(ArgI, Dest);
- else
- C->setArgOperand(ArgI, CastInst::CreatePointerCast(
- Dest, C->getArgOperand(ArgI)->getType(),
- Dest->getName(), C));
+ C->setArgOperand(ArgI, cpyDest);
}
if (!changedArgument)
@@ -1091,6 +1097,11 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
}
+ if (NeedMoveGEP) {
+ auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
+ GEP->moveBefore(C);
+ }
+
if (SkippedLifetimeStart) {
SkippedLifetimeStart->moveBefore(C);
MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
@@ -1189,7 +1200,7 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
// Remove the instruction we're replacing.
@@ -1286,12 +1297,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
Value *MemsetLen = Builder.CreateSelect(
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
- unsigned DestAS = Dest->getType()->getPointerAddressSpace();
Instruction *NewMemSet = Builder.CreateMemSet(
- Builder.CreateGEP(
- Builder.getInt8Ty(),
- Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
- SrcSize),
+ Builder.CreateGEP(Builder.getInt8Ty(), Dest, SrcSize),
MemSet->getOperand(1), MemsetLen, Alignment);
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
@@ -1301,7 +1308,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
auto *LastDef =
cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
auto *NewAccess = MSSAU->createMemoryAccessBefore(
- NewMemSet, LastDef->getDefiningAccess(), LastDef);
+ NewMemSet, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(MemSet);
@@ -1406,12 +1413,241 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
CopySize, MemCpy->getDestAlign());
auto *LastDef =
cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
return true;
}
+// Attempts to optimize the pattern whereby memory is copied from an alloca to
+// another alloca, where the two allocas don't have conflicting mod/ref. If
+// successful, the two allocas can be merged into one and the transfer can be
+// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
+// move operations in that language.
+//
+// Once we determine that the optimization is safe to perform, we replace all
+// uses of the destination alloca with the source alloca. We also "shrink wrap"
+// the lifetime markers of the single merged alloca to before the first use
+// and after the last use. Note that the "shrink wrapping" procedure is a safe
+// transformation only because we restrict the scope of this optimization to
+// allocas that aren't captured.
+bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
+ AllocaInst *DestAlloca,
+ AllocaInst *SrcAlloca, TypeSize Size,
+ BatchAAResults &BAA) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
+ << *Store << "\n");
+
+ // Make sure the two allocas are in the same address space.
+ if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
+ return false;
+ }
+
+ // Check that copy is full with static size.
+ const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
+ std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
+ if (!SrcSize || Size != *SrcSize) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
+ return false;
+ }
+ std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
+ if (!DestSize || Size != *DestSize) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
+ return false;
+ }
+
+ if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
+ return false;
+
+ // Check that src and dest are never captured, unescaped allocas. Also
+ // find the nearest common dominator and postdominator for all users in
+ // order to shrink wrap the lifetimes, and instructions with noalias metadata
+ // to remove them.
+
+ SmallVector<Instruction *, 4> LifetimeMarkers;
+ SmallSet<Instruction *, 4> NoAliasInstrs;
+ bool SrcNotDom = false;
+
+ // Recursively track the user and check whether modified alias exist.
+ auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
+ bool CanBeNull, CanBeFreed;
+ return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
+ };
+
+ auto CaptureTrackingWithModRef =
+ [&](Instruction *AI,
+ function_ref<bool(Instruction *)> ModRefCallback) -> bool {
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.push_back(AI);
+ unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
+ Worklist.reserve(MaxUsesToExplore);
+ SmallSet<const Use *, 20> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+ for (const Use &U : I->uses()) {
+ auto *UI = cast<Instruction>(U.getUser());
+ // If any use that isn't dominated by SrcAlloca exists, we move src
+ // alloca to the entry before the transformation.
+ if (!DT->dominates(SrcAlloca, UI))
+ SrcNotDom = true;
+
+ if (Visited.size() >= MaxUsesToExplore) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
+ return false;
+ }
+ if (!Visited.insert(&U).second)
+ continue;
+ switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
+ case UseCaptureKind::MAY_CAPTURE:
+ return false;
+ case UseCaptureKind::PASSTHROUGH:
+ // Instructions cannot have non-instruction users.
+ Worklist.push_back(UI);
+ continue;
+ case UseCaptureKind::NO_CAPTURE: {
+ if (UI->isLifetimeStartOrEnd()) {
+ // We note the locations of these intrinsic calls so that we can
+ // delete them later if the optimization succeeds, this is safe
+ // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
+ // practically fill all the bytes of the alloca with an undefined
+ // value, although conceptually marked as alive/dead.
+ int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
+ if (Size < 0 || Size == DestSize) {
+ LifetimeMarkers.push_back(UI);
+ continue;
+ }
+ }
+ if (UI->hasMetadata(LLVMContext::MD_noalias))
+ NoAliasInstrs.insert(UI);
+ if (!ModRefCallback(UI))
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ };
+
+ // Check that dest has no Mod/Ref, from the alloca to the Store, except full
+ // size lifetime intrinsics. And collect modref inst for the reachability
+ // check.
+ ModRefInfo DestModRef = ModRefInfo::NoModRef;
+ MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
+ SmallVector<BasicBlock *, 8> ReachabilityWorklist;
+ auto DestModRefCallback = [&](Instruction *UI) -> bool {
+ // We don't care about the store itself.
+ if (UI == Store)
+ return true;
+ ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
+ DestModRef |= Res;
+ if (isModOrRefSet(Res)) {
+ // Instructions reachability checks.
+ // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
+ // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
+ if (UI->getParent() == Store->getParent()) {
+ // The same block case is special because it's the only time we're
+ // looking within a single block to see which instruction comes first.
+ // Once we start looking at multiple blocks, the first instruction of
+ // the block is reachable, so we only need to determine reachability
+ // between whole blocks.
+ BasicBlock *BB = UI->getParent();
+
+ // If A comes before B, then B is definitively reachable from A.
+ if (UI->comesBefore(Store))
+ return false;
+
+ // If the user's parent block is entry, no predecessor exists.
+ if (BB->isEntryBlock())
+ return true;
+
+ // Otherwise, continue doing the normal per-BB CFG walk.
+ ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
+ } else {
+ ReachabilityWorklist.push_back(UI->getParent());
+ }
+ }
+ return true;
+ };
+
+ if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
+ return false;
+ // Bailout if Dest may have any ModRef before Store.
+ if (!ReachabilityWorklist.empty() &&
+ isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
+ nullptr, DT, nullptr))
+ return false;
+
+ // Check that, from after the Load to the end of the BB,
+ // - if the dest has any Mod, src has no Ref, and
+ // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
+ MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
+
+ auto SrcModRefCallback = [&](Instruction *UI) -> bool {
+ // Any ModRef post-dominated by Load doesn't matter, also Load and Store
+ // themselves can be ignored.
+ if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
+ return true;
+ ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
+ if ((isModSet(DestModRef) && isRefSet(Res)) ||
+ (isRefSet(DestModRef) && isModSet(Res)))
+ return false;
+
+ return true;
+ };
+
+ if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
+ return false;
+
+ // We can do the transformation. First, move the SrcAlloca to the start of the
+ // BB.
+ if (SrcNotDom)
+ SrcAlloca->moveBefore(*SrcAlloca->getParent(),
+ SrcAlloca->getParent()->getFirstInsertionPt());
+ // Align the allocas appropriately.
+ SrcAlloca->setAlignment(
+ std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
+
+ // Merge the two allocas.
+ DestAlloca->replaceAllUsesWith(SrcAlloca);
+ eraseInstruction(DestAlloca);
+
+ // Drop metadata on the source alloca.
+ SrcAlloca->dropUnknownNonDebugMetadata();
+
+ // TODO: Reconstruct merged lifetime markers.
+ // Remove all other lifetime markers. if the original lifetime intrinsics
+ // exists.
+ if (!LifetimeMarkers.empty()) {
+ for (Instruction *I : LifetimeMarkers)
+ eraseInstruction(I);
+ }
+
+ // As this transformation can cause memory accesses that didn't previously
+ // alias to begin to alias one another, we remove !noalias metadata from any
+ // uses of either alloca. This is conservative, but more precision doesn't
+ // seem worthwhile right now.
+ for (Instruction *I : NoAliasInstrs)
+ I->setMetadata(LLVMContext::MD_noalias, nullptr);
+
+ LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
+ NumStackMove++;
+ return true;
+}
+
+static bool isZeroSize(Value *Size) {
+ if (auto *I = dyn_cast<Instruction>(Size))
+ if (auto *Res = simplifyInstruction(I, I->getModule()->getDataLayout()))
+ Size = Res;
+ // Treat undef/poison size like zero.
+ if (auto *C = dyn_cast<Constant>(Size))
+ return isa<UndefValue>(C) || C->isNullValue();
+ return false;
+}
+
/// Perform simplification of memcpy's. If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
@@ -1428,6 +1664,19 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
return true;
}
+ // If the size is zero, remove the memcpy. This also prevents infinite loops
+ // in processMemSetMemCpyDependence, which is a no-op for zero-length memcpys.
+ if (isZeroSize(M->getLength())) {
+ ++BBI;
+ eraseInstruction(M);
+ return true;
+ }
+
+ MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
+ if (!MA)
+ // Degenerate case: memcpy marked as not accessing memory.
+ return false;
+
// If copying from a constant, try to turn the memcpy into a memset.
if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
if (GV->isConstant() && GV->hasDefinitiveInitializer())
@@ -1436,10 +1685,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
IRBuilder<> Builder(M);
Instruction *NewM = Builder.CreateMemSet(
M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *LastDef = cast<MemoryDef>(MA);
auto *NewAccess =
- MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(M);
@@ -1448,7 +1696,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
BatchAAResults BAA(*AA);
- MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
// FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
MemoryAccess *AnyClobber = MA->getDefiningAccess();
MemoryLocation DestLoc = MemoryLocation::getForDest(M);
@@ -1468,13 +1715,14 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
AnyClobber, MemoryLocation::getForSource(M), BAA);
- // There are four possible optimizations we can do for memcpy:
+ // There are five possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
// c) memcpy from freshly alloca'd space or space that has just started
// its lifetime copies undefined data, and we can therefore eliminate
// the memcpy in favor of the data that was already at the destination.
// d) memcpy from a just-memset'd source can be turned into memset.
+ // e) elimination of memcpy via stack-move optimization.
if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
if (Instruction *MI = MD->getMemoryInst()) {
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
@@ -1493,7 +1741,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
if (auto *MDep = dyn_cast<MemCpyInst>(MI))
- return processMemCpyMemCpyDependence(M, MDep, BAA);
+ if (processMemCpyMemCpyDependence(M, MDep, BAA))
+ return true;
if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
@@ -1512,6 +1761,27 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
+ // If the transfer is from a stack slot to a stack slot, then we may be able
+ // to perform the stack-move optimization. See the comments in
+ // performStackMoveOptzn() for more details.
+ auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
+ if (!DestAlloca)
+ return false;
+ auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
+ if (!SrcAlloca)
+ return false;
+ ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
+ if (Len == nullptr)
+ return false;
+ if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
+ TypeSize::getFixed(Len->getZExtValue()), BAA)) {
+ // Avoid invalidating the iterator.
+ BBI = M->getNextNonDebugInstruction()->getIterator();
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+
return false;
}
@@ -1583,9 +1853,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
DT) < *ByValAlign)
return false;
- // The address space of the memcpy source must match the byval argument
- if (MDep->getSource()->getType()->getPointerAddressSpace() !=
- ByValArg->getType()->getPointerAddressSpace())
+ // The type of the memcpy source must match the byval argument
+ if (MDep->getSource()->getType() != ByValArg->getType())
return false;
// Verify that the copied-from memory doesn't change in between the memcpy and
@@ -1660,9 +1929,8 @@ bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
return false;
- // The address space of the memcpy source must match the immut argument
- if (MDep->getSource()->getType()->getPointerAddressSpace() !=
- ImmutArg->getType()->getPointerAddressSpace())
+ // The type of the memcpy source must match the immut argument
+ if (MDep->getSource()->getType() != ImmutArg->getType())
return false;
// 2-1. The length of the memcpy must be equal to the size of the alloca.
@@ -1758,9 +2026,10 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
auto *AA = &AM.getResult<AAManager>(F);
auto *AC = &AM.getResult<AssumptionAnalysis>(F);
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
- bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
+ bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
if (!MadeChange)
return PreservedAnalyses::all();
@@ -1772,12 +2041,14 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
AliasAnalysis *AA_, AssumptionCache *AC_,
- DominatorTree *DT_, MemorySSA *MSSA_) {
+ DominatorTree *DT_, PostDominatorTree *PDT_,
+ MemorySSA *MSSA_) {
bool MadeChange = false;
TLI = TLI_;
AA = AA_;
AC = AC_;
DT = DT_;
+ PDT = PDT_;
MSSA = MSSA_;
MemorySSAUpdater MSSAU_(MSSA_);
MSSAU = &MSSAU_;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index 311a6435ba7c..1e0906717549 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -275,7 +275,7 @@ void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const {
// Do the actual spliting.
for (Instruction *Inst : reverse(OtherInsts))
- Inst->moveBefore(*NewParent, NewParent->begin());
+ Inst->moveBeforePreserving(*NewParent, NewParent->begin());
}
bool BCECmpBlock::canSplit(AliasAnalysis &AA) const {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 6c5453831ade..d65054a6ff9d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -80,7 +80,6 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
@@ -217,8 +216,8 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
if (Opd1 == Opd2)
return nullptr;
- auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
- &BB->front());
+ auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink");
+ NewPN->insertBefore(BB->begin());
NewPN->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
NewPN->addIncoming(Opd1, S0->getParent());
NewPN->addIncoming(Opd2, S1->getParent());
@@ -269,7 +268,7 @@ void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0,
// Create the new store to be inserted at the join point.
StoreInst *SNew = cast<StoreInst>(S0->clone());
- SNew->insertBefore(&*InsertPt);
+ SNew->insertBefore(InsertPt);
// New PHI operand? Use it.
if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
SNew->setOperand(0, NewPN);
@@ -378,52 +377,6 @@ bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) {
return Changed;
}
-namespace {
-class MergedLoadStoreMotionLegacyPass : public FunctionPass {
- const bool SplitFooterBB;
-public:
- static char ID; // Pass identification, replacement for typeid
- MergedLoadStoreMotionLegacyPass(bool SplitFooterBB = false)
- : FunctionPass(ID), SplitFooterBB(SplitFooterBB) {
- initializeMergedLoadStoreMotionLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- ///
- /// Run the transformation for each function
- ///
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- MergedLoadStoreMotion Impl(SplitFooterBB);
- return Impl.run(F, getAnalysis<AAResultsWrapperPass>().getAAResults());
- }
-
-private:
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- if (!SplitFooterBB)
- AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
-
-char MergedLoadStoreMotionLegacyPass::ID = 0;
-} // anonymous namespace
-
-///
-/// createMergedLoadStoreMotionPass - The public interface to this file.
-///
-FunctionPass *llvm::createMergedLoadStoreMotionPass(bool SplitFooterBB) {
- return new MergedLoadStoreMotionLegacyPass(SplitFooterBB);
-}
-
-INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion",
- "MergedLoadStoreMotion", false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion",
- "MergedLoadStoreMotion", false, false)
-
PreservedAnalyses
MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
MergedLoadStoreMotion Impl(Options.SplitFooterBB);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index 9c3e9a2fd018..7fe1a222021e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -359,12 +359,13 @@ bool NaryReassociatePass::requiresSignExtension(Value *Index,
GetElementPtrInst *
NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
unsigned I, Type *IndexedType) {
+ SimplifyQuery SQ(*DL, DT, AC, GEP);
Value *IndexToSplit = GEP->getOperand(I + 1);
if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {
IndexToSplit = SExt->getOperand(0);
} else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
// zext can be treated as sext if the source is non-negative.
- if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT))
+ if (isKnownNonNegative(ZExt->getOperand(0), SQ))
IndexToSplit = ZExt->getOperand(0);
}
@@ -373,8 +374,7 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
// nsw, we cannot split the add because
// sext(LHS + RHS) != sext(LHS) + sext(RHS).
if (requiresSignExtension(IndexToSplit, GEP) &&
- computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) !=
- OverflowResult::NeverOverflows)
+ computeOverflowForSignedAdd(AO, SQ) != OverflowResult::NeverOverflows)
return nullptr;
Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
@@ -402,7 +402,7 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
IndexExprs.push_back(SE->getSCEV(Index));
// Replace the I-th index with LHS.
IndexExprs[I] = SE->getSCEV(LHS);
- if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) &&
+ if (isKnownNonNegative(LHS, SimplifyQuery(*DL, DT, AC, GEP)) &&
DL->getTypeSizeInBits(LHS->getType()).getFixedValue() <
DL->getTypeSizeInBits(GEP->getOperand(I)->getType())
.getFixedValue()) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 1af40e2c4e62..19ac9526b5f8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -774,7 +774,7 @@ private:
// Symbolic evaluation.
ExprResult checkExprResults(Expression *, Instruction *, Value *) const;
- ExprResult performSymbolicEvaluation(Value *,
+ ExprResult performSymbolicEvaluation(Instruction *,
SmallPtrSetImpl<Value *> &) const;
const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
Instruction *,
@@ -1904,7 +1904,7 @@ NewGVN::ExprResult NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
LastPredInfo = PI;
// In phi of ops cases, we may have predicate info that we are evaluating
// in a different context.
- if (!DT->dominates(PBranch->To, getBlockForValue(I)))
+ if (!DT->dominates(PBranch->To, I->getParent()))
continue;
// TODO: Along the false edge, we may know more things too, like
// icmp of
@@ -1961,95 +1961,88 @@ NewGVN::ExprResult NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
return createExpression(I);
}
-// Substitute and symbolize the value before value numbering.
+// Substitute and symbolize the instruction before value numbering.
NewGVN::ExprResult
-NewGVN::performSymbolicEvaluation(Value *V,
+NewGVN::performSymbolicEvaluation(Instruction *I,
SmallPtrSetImpl<Value *> &Visited) const {
const Expression *E = nullptr;
- if (auto *C = dyn_cast<Constant>(V))
- E = createConstantExpression(C);
- else if (isa<Argument>(V) || isa<GlobalVariable>(V)) {
- E = createVariableExpression(V);
- } else {
- // TODO: memory intrinsics.
- // TODO: Some day, we should do the forward propagation and reassociation
- // parts of the algorithm.
- auto *I = cast<Instruction>(V);
- switch (I->getOpcode()) {
- case Instruction::ExtractValue:
- case Instruction::InsertValue:
- E = performSymbolicAggrValueEvaluation(I);
- break;
- case Instruction::PHI: {
- SmallVector<ValPair, 3> Ops;
- auto *PN = cast<PHINode>(I);
- for (unsigned i = 0; i < PN->getNumOperands(); ++i)
- Ops.push_back({PN->getIncomingValue(i), PN->getIncomingBlock(i)});
- // Sort to ensure the invariant createPHIExpression requires is met.
- sortPHIOps(Ops);
- E = performSymbolicPHIEvaluation(Ops, I, getBlockForValue(I));
- } break;
- case Instruction::Call:
- return performSymbolicCallEvaluation(I);
- break;
- case Instruction::Store:
- E = performSymbolicStoreEvaluation(I);
- break;
- case Instruction::Load:
- E = performSymbolicLoadEvaluation(I);
- break;
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
- case Instruction::Freeze:
- return createExpression(I);
- break;
- case Instruction::ICmp:
- case Instruction::FCmp:
- return performSymbolicCmpEvaluation(I);
- break;
- case Instruction::FNeg:
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::Select:
- case Instruction::ExtractElement:
- case Instruction::InsertElement:
- case Instruction::GetElementPtr:
- return createExpression(I);
- break;
- case Instruction::ShuffleVector:
- // FIXME: Add support for shufflevector to createExpression.
- return ExprResult::none();
- default:
- return ExprResult::none();
- }
+ // TODO: memory intrinsics.
+ // TODO: Some day, we should do the forward propagation and reassociation
+ // parts of the algorithm.
+ switch (I->getOpcode()) {
+ case Instruction::ExtractValue:
+ case Instruction::InsertValue:
+ E = performSymbolicAggrValueEvaluation(I);
+ break;
+ case Instruction::PHI: {
+ SmallVector<ValPair, 3> Ops;
+ auto *PN = cast<PHINode>(I);
+ for (unsigned i = 0; i < PN->getNumOperands(); ++i)
+ Ops.push_back({PN->getIncomingValue(i), PN->getIncomingBlock(i)});
+ // Sort to ensure the invariant createPHIExpression requires is met.
+ sortPHIOps(Ops);
+ E = performSymbolicPHIEvaluation(Ops, I, getBlockForValue(I));
+ } break;
+ case Instruction::Call:
+ return performSymbolicCallEvaluation(I);
+ break;
+ case Instruction::Store:
+ E = performSymbolicStoreEvaluation(I);
+ break;
+ case Instruction::Load:
+ E = performSymbolicLoadEvaluation(I);
+ break;
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ case Instruction::Freeze:
+ return createExpression(I);
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return performSymbolicCmpEvaluation(I);
+ break;
+ case Instruction::FNeg:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::Select:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
+ case Instruction::GetElementPtr:
+ return createExpression(I);
+ break;
+ case Instruction::ShuffleVector:
+ // FIXME: Add support for shufflevector to createExpression.
+ return ExprResult::none();
+ default:
+ return ExprResult::none();
}
return ExprResult::some(E);
}
@@ -2772,6 +2765,9 @@ NewGVN::makePossiblePHIOfOps(Instruction *I,
// Clone the instruction, create an expression from it that is
// translated back into the predecessor, and see if we have a leader.
Instruction *ValueOp = I->clone();
+ // Emit the temporal instruction in the predecessor basic block where the
+ // corresponding value is defined.
+ ValueOp->insertBefore(PredBB->getTerminator());
if (MemAccess)
TempToMemory.insert({ValueOp, MemAccess});
bool SafeForPHIOfOps = true;
@@ -2801,7 +2797,7 @@ NewGVN::makePossiblePHIOfOps(Instruction *I,
FoundVal = !SafeForPHIOfOps ? nullptr
: findLeaderForInst(ValueOp, Visited,
MemAccess, I, PredBB);
- ValueOp->deleteValue();
+ ValueOp->eraseFromParent();
if (!FoundVal) {
// We failed to find a leader for the current ValueOp, but this might
// change in case of the translated operands change.
@@ -3542,7 +3538,7 @@ struct NewGVN::ValueDFS {
// the second. We only want it to be less than if the DFS orders are equal.
//
// Each LLVM instruction only produces one value, and thus the lowest-level
- // differentiator that really matters for the stack (and what we use as as a
+ // differentiator that really matters for the stack (and what we use as a
// replacement) is the local dfs number.
// Everything else in the structure is instruction level, and only affects
// the order in which we will replace operands of a given instruction.
@@ -4034,9 +4030,18 @@ bool NewGVN::eliminateInstructions(Function &F) {
// because stores are put in terms of the stored value, we skip
// stored values here. If the stored value is really dead, it will
// still be marked for deletion when we process it in its own class.
- if (!EliminationStack.empty() && Def != EliminationStack.back() &&
- isa<Instruction>(Def) && !FromStore)
- markInstructionForDeletion(cast<Instruction>(Def));
+ auto *DefI = dyn_cast<Instruction>(Def);
+ if (!EliminationStack.empty() && DefI && !FromStore) {
+ Value *DominatingLeader = EliminationStack.back();
+ if (DominatingLeader != Def) {
+ // Even if the instruction is removed, we still need to update
+ // flags/metadata due to downstreams users of the leader.
+ if (!match(DefI, m_Intrinsic<Intrinsic::ssa_copy>()))
+ patchReplacementInstruction(DefI, DominatingLeader);
+
+ markInstructionForDeletion(DefI);
+ }
+ }
continue;
}
// At this point, we know it is a Use we are trying to possibly
@@ -4095,9 +4100,12 @@ bool NewGVN::eliminateInstructions(Function &F) {
// For copy instructions, we use their operand as a leader,
// which means we remove a user of the copy and it may become dead.
if (isSSACopy) {
- unsigned &IIUseCount = UseCounts[II];
- if (--IIUseCount == 0)
- ProbablyDead.insert(II);
+ auto It = UseCounts.find(II);
+ if (It != UseCounts.end()) {
+ unsigned &IIUseCount = It->second;
+ if (--IIUseCount == 0)
+ ProbablyDead.insert(II);
+ }
}
++LeaderUseCount;
AnythingReplaced = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 40c84e249523..818c7b40d489 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -466,7 +466,8 @@ using RepeatedValue = std::pair<Value*, APInt>;
/// type and thus make the expression bigger.
static bool LinearizeExprTree(Instruction *I,
SmallVectorImpl<RepeatedValue> &Ops,
- ReassociatePass::OrderedSet &ToRedo) {
+ ReassociatePass::OrderedSet &ToRedo,
+ bool &HasNUW) {
assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
"Expected a UnaryOperator or BinaryOperator!");
LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
@@ -515,6 +516,9 @@ static bool LinearizeExprTree(Instruction *I,
std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
I = P.first; // We examine the operands of this binary operator.
+ if (isa<OverflowingBinaryOperator>(I))
+ HasNUW &= I->hasNoUnsignedWrap();
+
for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
Value *Op = I->getOperand(OpIdx);
APInt Weight = P.second; // Number of paths to this operand.
@@ -657,7 +661,8 @@ static bool LinearizeExprTree(Instruction *I,
/// Now that the operands for this expression tree are
/// linearized and optimized, emit them in-order.
void ReassociatePass::RewriteExprTree(BinaryOperator *I,
- SmallVectorImpl<ValueEntry> &Ops) {
+ SmallVectorImpl<ValueEntry> &Ops,
+ bool HasNUW) {
assert(Ops.size() > 1 && "Single values should be used directly!");
// Since our optimizations should never increase the number of operations, the
@@ -814,14 +819,20 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
if (ExpressionChangedStart) {
bool ClearFlags = true;
do {
- // Preserve FastMathFlags.
+ // Preserve flags.
if (ClearFlags) {
if (isa<FPMathOperator>(I)) {
FastMathFlags Flags = I->getFastMathFlags();
ExpressionChangedStart->clearSubclassOptionalData();
ExpressionChangedStart->setFastMathFlags(Flags);
- } else
+ } else {
ExpressionChangedStart->clearSubclassOptionalData();
+ // Note that it doesn't hold for mul if one of the operands is zero.
+ // TODO: We can preserve NUW flag if we prove that all mul operands
+ // are non-zero.
+ if (HasNUW && ExpressionChangedStart->getOpcode() == Instruction::Add)
+ ExpressionChangedStart->setHasNoUnsignedWrap();
+ }
}
if (ExpressionChangedStart == ExpressionChangedEnd)
@@ -921,16 +932,20 @@ static Value *NegateValue(Value *V, Instruction *BI,
TheNeg->getParent()->getParent() != BI->getParent()->getParent())
continue;
- Instruction *InsertPt;
+ BasicBlock::iterator InsertPt;
if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
- InsertPt = InstInput->getInsertionPointAfterDef();
- if (!InsertPt)
+ auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
+ if (!InsertPtOpt)
continue;
+ InsertPt = *InsertPtOpt;
} else {
- InsertPt = &*TheNeg->getFunction()->getEntryBlock().begin();
+ InsertPt = TheNeg->getFunction()
+ ->getEntryBlock()
+ .getFirstNonPHIOrDbg()
+ ->getIterator();
}
- TheNeg->moveBefore(InsertPt);
+ TheNeg->moveBefore(*InsertPt->getParent(), InsertPt);
if (TheNeg->getOpcode() == Instruction::Sub) {
TheNeg->setHasNoUnsignedWrap(false);
TheNeg->setHasNoSignedWrap(false);
@@ -1171,7 +1186,8 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
return nullptr;
SmallVector<RepeatedValue, 8> Tree;
- MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts);
+ bool HasNUW = true;
+ MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, HasNUW);
SmallVector<ValueEntry, 8> Factors;
Factors.reserve(Tree.size());
for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
@@ -1213,7 +1229,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
if (!FoundFactor) {
// Make sure to restore the operands to the expression tree.
- RewriteExprTree(BO, Factors);
+ RewriteExprTree(BO, Factors, HasNUW);
return nullptr;
}
@@ -1225,7 +1241,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
RedoInsts.insert(BO);
V = Factors[0].Op;
} else {
- RewriteExprTree(BO, Factors);
+ RewriteExprTree(BO, Factors, HasNUW);
V = BO;
}
@@ -2252,9 +2268,10 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
// with no common bits set, convert it to X+Y.
if (I->getOpcode() == Instruction::Or &&
shouldConvertOrWithNoCommonBitsToAdd(I) && !isLoadCombineCandidate(I) &&
- haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
- I->getModule()->getDataLayout(), /*AC=*/nullptr, I,
- /*DT=*/nullptr)) {
+ (cast<PossiblyDisjointInst>(I)->isDisjoint() ||
+ haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
+ SimplifyQuery(I->getModule()->getDataLayout(),
+ /*DT=*/nullptr, /*AC=*/nullptr, I)))) {
Instruction *NI = convertOrWithNoCommonBitsToAdd(I);
RedoInsts.insert(I);
MadeChange = true;
@@ -2349,7 +2366,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
SmallVector<RepeatedValue, 8> Tree;
- MadeChange |= LinearizeExprTree(I, Tree, RedoInsts);
+ bool HasNUW = true;
+ MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, HasNUW);
SmallVector<ValueEntry, 8> Ops;
Ops.reserve(Tree.size());
for (const RepeatedValue &E : Tree)
@@ -2542,7 +2560,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
dbgs() << '\n');
// Now that we ordered and optimized the expressions, splat them back into
// the expression tree, removing any unneeded nodes.
- RewriteExprTree(I, Ops);
+ RewriteExprTree(I, Ops, HasNUW);
}
void
@@ -2550,7 +2568,7 @@ ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
// Make a "pairmap" of how often each operand pair occurs.
for (BasicBlock *BI : RPOT) {
for (Instruction &I : *BI) {
- if (!I.isAssociative())
+ if (!I.isAssociative() || !I.isBinaryOp())
continue;
// Ignore nodes that aren't at the root of trees.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index db7a1f24660c..6c2b3e9bd4a7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -25,8 +25,6 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -107,36 +105,3 @@ PreservedAnalyses RegToMemPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<LoopAnalysis>();
return PA;
}
-
-namespace {
-struct RegToMemLegacy : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- RegToMemLegacy() : FunctionPass(ID) {
- initializeRegToMemLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(BreakCriticalEdgesID);
- AU.addPreservedID(BreakCriticalEdgesID);
- }
-
- bool runOnFunction(Function &F) override {
- if (F.isDeclaration() || skipFunction(F))
- return false;
- return runPass(F);
- }
-};
-} // namespace
-
-char RegToMemLegacy::ID = 0;
-INITIALIZE_PASS_BEGIN(RegToMemLegacy, "reg2mem",
- "Demote all values to stack slots", false, false)
-INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
-INITIALIZE_PASS_END(RegToMemLegacy, "reg2mem",
- "Demote all values to stack slots", false, false)
-
-// createDemoteRegisterToMemory - Provide an entry point to create this pass.
-char &llvm::DemoteRegisterToMemoryID = RegToMemLegacy::ID;
-FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
- return new RegToMemLegacy();
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 908bda5709a0..40b4ea92e1ff 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -54,15 +55,12 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -995,7 +993,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
NewState.meet(OpState);
});
- BDVState OldState = States[BDV];
+ BDVState OldState = Pair.second;
if (OldState != NewState) {
Progress = true;
States[BDV] = NewState;
@@ -1014,8 +1012,44 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
}
#endif
- // Handle all instructions that have a vector BDV, but the instruction itself
- // is of scalar type.
+ // Even though we have identified a concrete base (or a conflict) for all live
+ // pointers at this point, there are cases where the base is of an
+ // incompatible type compared to the original instruction. We conservatively
+ // mark those as conflicts to ensure that corresponding BDVs will be generated
+ // in the next steps.
+
+ // this is a rather explicit check for all cases where we should mark the
+ // state as a conflict to force the latter stages of the algorithm to emit
+ // the BDVs.
+ // TODO: in many cases the instructions emited for the conflicting states
+ // will be identical to the I itself (if the I's operate on their BDVs
+ // themselves). We should expoit this, but can't do it here since it would
+ // break the invariant about the BDVs not being known to be a base.
+ // TODO: the code also does not handle constants at all - the algorithm relies
+ // on all constants having the same BDV and therefore constant-only insns
+ // will never be in conflict, but this check is ignored here. If the
+ // constant conflicts will be to BDVs themselves, they will be identical
+ // instructions and will get optimized away (as in the above TODO)
+ auto MarkConflict = [&](Instruction *I, Value *BaseValue) {
+ // II and EE mixes vector & scalar so is always a conflict
+ if (isa<InsertElementInst>(I) || isa<ExtractElementInst>(I))
+ return true;
+ // Shuffle vector is always a conflict as it creates new vector from
+ // existing ones.
+ if (isa<ShuffleVectorInst>(I))
+ return true;
+ // Any instructions where the computed base type differs from the
+ // instruction type. An example is where an extract instruction is used by a
+ // select. Here the select's BDV is a vector (because of extract's BDV),
+ // while the select itself is a scalar type. Note that the IE and EE
+ // instruction check is not fully subsumed by the vector<->scalar check at
+ // the end, this is due to the BDV algorithm being ignorant of BDV types at
+ // this junction.
+ if (!areBothVectorOrScalar(BaseValue, I))
+ return true;
+ return false;
+ };
+
for (auto Pair : States) {
Instruction *I = cast<Instruction>(Pair.first);
BDVState State = Pair.second;
@@ -1028,30 +1062,13 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
"why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
- if (!State.isBase() || !isa<VectorType>(BaseValue->getType()))
+ // since we only mark vec-scalar insns as conflicts in the pass, our work is
+ // done if the instruction already conflicts
+ if (State.isConflict())
continue;
- // extractelement instructions are a bit special in that we may need to
- // insert an extract even when we know an exact base for the instruction.
- // The problem is that we need to convert from a vector base to a scalar
- // base for the particular indice we're interested in.
- if (isa<ExtractElementInst>(I)) {
- auto *EE = cast<ExtractElementInst>(I);
- // TODO: In many cases, the new instruction is just EE itself. We should
- // exploit this, but can't do it here since it would break the invariant
- // about the BDV not being known to be a base.
- auto *BaseInst = ExtractElementInst::Create(
- State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
- BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
- States[I] = BDVState(I, BDVState::Base, BaseInst);
- setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
- } else if (!isa<VectorType>(I->getType())) {
- // We need to handle cases that have a vector base but the instruction is
- // a scalar type (these could be phis or selects or any instruction that
- // are of scalar type, but the base can be a vector type). We
- // conservatively set this as conflict. Setting the base value for these
- // conflicts is handled in the next loop which traverses States.
+
+ if (MarkConflict(I, BaseValue))
States[I] = BDVState(I, BDVState::Conflict);
- }
}
#ifndef NDEBUG
@@ -1234,6 +1251,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
VerifyStates();
#endif
+ // get the data layout to compare the sizes of base/derived pointer values
+ [[maybe_unused]] auto &DL =
+ cast<llvm::Instruction>(Def)->getModule()->getDataLayout();
// Cache all of our results so we can cheaply reuse them
// NOTE: This is actually two caches: one of the base defining value
// relation and one of the base pointer relation! FIXME
@@ -1241,6 +1261,11 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
auto *BDV = Pair.first;
Value *Base = Pair.second.getBaseValue();
assert(BDV && Base);
+ // Whenever we have a derived ptr(s), their base
+ // ptr(s) must be of the same size, not necessarily the same type
+ assert(DL.getTypeAllocSize(BDV->getType()) ==
+ DL.getTypeAllocSize(Base->getType()) &&
+ "Derived and base values should have same size");
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
@@ -1425,14 +1450,15 @@ static constexpr Attribute::AttrKind FnAttrsToStrip[] =
{Attribute::Memory, Attribute::NoSync, Attribute::NoFree};
// Create new attribute set containing only attributes which can be transferred
-// from original call to the safepoint.
-static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
- AttributeList OrigAL,
+// from the original call to the safepoint.
+static AttributeList legalizeCallAttributes(CallBase *Call, bool IsMemIntrinsic,
AttributeList StatepointAL) {
+ AttributeList OrigAL = Call->getAttributes();
if (OrigAL.isEmpty())
return StatepointAL;
// Remove the readonly, readnone, and statepoint function attributes.
+ LLVMContext &Ctx = Call->getContext();
AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs());
for (auto Attr : FnAttrsToStrip)
FnAttrs.removeAttribute(Attr);
@@ -1442,8 +1468,24 @@ static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
FnAttrs.removeAttribute(A);
}
- // Just skip parameter and return attributes for now
- return StatepointAL.addFnAttributes(Ctx, FnAttrs);
+ StatepointAL = StatepointAL.addFnAttributes(Ctx, FnAttrs);
+
+ // The memory intrinsics do not have a 1:1 correspondence of the original
+ // call arguments to the produced statepoint. Do not transfer the argument
+ // attributes to avoid putting them on incorrect arguments.
+ if (IsMemIntrinsic)
+ return StatepointAL;
+
+ // Attach the argument attributes from the original call at the corresponding
+ // arguments in the statepoint. Note that any argument attributes that are
+ // invalid after lowering are stripped in stripNonValidDataFromBody.
+ for (unsigned I : llvm::seq(Call->arg_size()))
+ StatepointAL = StatepointAL.addParamAttributes(
+ Ctx, GCStatepointInst::CallArgsBeginPos + I,
+ AttrBuilder(Ctx, OrigAL.getParamAttrs(I)));
+
+ // Return attributes are later attached to the gc.result intrinsic.
+ return StatepointAL;
}
/// Helper function to place all gc relocates necessary for the given
@@ -1480,7 +1522,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
auto getGCRelocateDecl = [&](Type *Ty) {
assert(isHandledGCPointerType(Ty, GC));
auto AS = Ty->getScalarType()->getPointerAddressSpace();
- Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
+ Type *NewTy = PointerType::get(M->getContext(), AS);
if (auto *VT = dyn_cast<VectorType>(Ty))
NewTy = FixedVectorType::get(NewTy,
cast<FixedVectorType>(VT)->getNumElements());
@@ -1633,6 +1675,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// with a return value, we lower then as never returning calls to
// __llvm_deoptimize that are followed by unreachable to get better codegen.
bool IsDeoptimize = false;
+ bool IsMemIntrinsic = false;
StatepointDirectives SD =
parseStatepointDirectivesFromAttrs(Call->getAttributes());
@@ -1673,6 +1716,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
IsDeoptimize = true;
} else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
IID == Intrinsic::memmove_element_unordered_atomic) {
+ IsMemIntrinsic = true;
+
// Unordered atomic memcpy and memmove intrinsics which are not explicitly
// marked as "gc-leaf-function" should be lowered in a GC parseable way.
// Specifically, these calls should be lowered to the
@@ -1788,12 +1833,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
SPCall->setTailCallKind(CI->getTailCallKind());
SPCall->setCallingConv(CI->getCallingConv());
- // Currently we will fail on parameter attributes and on certain
- // function attributes. In case if we can handle this set of attributes -
- // set up function attrs directly on statepoint and return attrs later for
+ // Set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- SPCall->setAttributes(legalizeCallAttributes(
- CI->getContext(), CI->getAttributes(), SPCall->getAttributes()));
+ SPCall->setAttributes(
+ legalizeCallAttributes(CI, IsMemIntrinsic, SPCall->getAttributes()));
Token = cast<GCStatepointInst>(SPCall);
@@ -1815,12 +1858,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
SPInvoke->setCallingConv(II->getCallingConv());
- // Currently we will fail on parameter attributes and on certain
- // function attributes. In case if we can handle this set of attributes -
- // set up function attrs directly on statepoint and return attrs later for
+ // Set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- SPInvoke->setAttributes(legalizeCallAttributes(
- II->getContext(), II->getAttributes(), SPInvoke->getAttributes()));
+ SPInvoke->setAttributes(
+ legalizeCallAttributes(II, IsMemIntrinsic, SPInvoke->getAttributes()));
Token = cast<GCStatepointInst>(SPInvoke);
@@ -1830,7 +1871,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
UnwindBlock->getUniquePredecessor() &&
"can't safely insert in this block!");
- Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
+ Builder.SetInsertPoint(UnwindBlock, UnwindBlock->getFirstInsertionPt());
Builder.SetCurrentDebugLocation(II->getDebugLoc());
// Attach exceptional gc relocates to the landingpad.
@@ -1845,7 +1886,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
NormalDest->getUniquePredecessor() &&
"can't safely insert in this block!");
- Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt());
+ Builder.SetInsertPoint(NormalDest, NormalDest->getFirstInsertionPt());
// gc relocates will be generated later as if it were regular call
// statepoint
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
index fcdc503c54a4..8a491e74b91c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -17,10 +17,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/SCCP.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -49,9 +46,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SCCPSolver.h"
-#include <cassert>
#include <utility>
-#include <vector>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
index 983a75e1d708..24da26c9f0f2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -70,6 +71,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -91,10 +93,10 @@
#include <string>
#include <tuple>
#include <utility>
+#include <variant>
#include <vector>
using namespace llvm;
-using namespace llvm::sroa;
#define DEBUG_TYPE "sroa"
@@ -123,6 +125,138 @@ static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
cl::Hidden);
namespace {
+class AllocaSliceRewriter;
+class AllocaSlices;
+class Partition;
+
+class SelectHandSpeculativity {
+ unsigned char Storage = 0; // None are speculatable by default.
+ using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
+ using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
+public:
+ SelectHandSpeculativity() = default;
+ SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
+ bool isSpeculatable(bool isTrueVal) const;
+ bool areAllSpeculatable() const;
+ bool areAnySpeculatable() const;
+ bool areNoneSpeculatable() const;
+ // For interop as int half of PointerIntPair.
+ explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
+ explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
+};
+static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
+
+using PossiblySpeculatableLoad =
+ PointerIntPair<LoadInst *, 2, SelectHandSpeculativity>;
+using UnspeculatableStore = StoreInst *;
+using RewriteableMemOp =
+ std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
+using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>;
+
+/// An optimization pass providing Scalar Replacement of Aggregates.
+///
+/// This pass takes allocations which can be completely analyzed (that is, they
+/// don't escape) and tries to turn them into scalar SSA values. There are
+/// a few steps to this process.
+///
+/// 1) It takes allocations of aggregates and analyzes the ways in which they
+/// are used to try to split them into smaller allocations, ideally of
+/// a single scalar data type. It will split up memcpy and memset accesses
+/// as necessary and try to isolate individual scalar accesses.
+/// 2) It will transform accesses into forms which are suitable for SSA value
+/// promotion. This can be replacing a memset with a scalar store of an
+/// integer value, or it can involve speculating operations on a PHI or
+/// select to be a PHI or select of the results.
+/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
+/// onto insert and extract operations on a vector value, and convert them to
+/// this form. By doing so, it will enable promotion of vector aggregates to
+/// SSA vector values.
+class SROA {
+ LLVMContext *const C;
+ DomTreeUpdater *const DTU;
+ AssumptionCache *const AC;
+ const bool PreserveCFG;
+
+ /// Worklist of alloca instructions to simplify.
+ ///
+ /// Each alloca in the function is added to this. Each new alloca formed gets
+ /// added to it as well to recursively simplify unless that alloca can be
+ /// directly promoted. Finally, each time we rewrite a use of an alloca other
+ /// the one being actively rewritten, we add it back onto the list if not
+ /// already present to ensure it is re-visited.
+ SmallSetVector<AllocaInst *, 16> Worklist;
+
+ /// A collection of instructions to delete.
+ /// We try to batch deletions to simplify code and make things a bit more
+ /// efficient. We also make sure there is no dangling pointers.
+ SmallVector<WeakVH, 8> DeadInsts;
+
+ /// Post-promotion worklist.
+ ///
+ /// Sometimes we discover an alloca which has a high probability of becoming
+ /// viable for SROA after a round of promotion takes place. In those cases,
+ /// the alloca is enqueued here for re-processing.
+ ///
+ /// Note that we have to be very careful to clear allocas out of this list in
+ /// the event they are deleted.
+ SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
+
+ /// A collection of alloca instructions we can directly promote.
+ std::vector<AllocaInst *> PromotableAllocas;
+
+ /// A worklist of PHIs to speculate prior to promoting allocas.
+ ///
+ /// All of these PHIs have been checked for the safety of speculation and by
+ /// being speculated will allow promoting allocas currently in the promotable
+ /// queue.
+ SmallSetVector<PHINode *, 8> SpeculatablePHIs;
+
+ /// A worklist of select instructions to rewrite prior to promoting
+ /// allocas.
+ SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
+
+ /// Select instructions that use an alloca and are subsequently loaded can be
+ /// rewritten to load both input pointers and then select between the result,
+ /// allowing the load of the alloca to be promoted.
+ /// From this:
+ /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other
+ /// %V = load <type>, ptr %P2
+ /// to:
+ /// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd
+ /// %V2 = load <type>, ptr %Other
+ /// %V = select i1 %cond, <type> %V1, <type> %V2
+ ///
+ /// We can do this to a select if its only uses are loads
+ /// and if either the operand to the select can be loaded unconditionally,
+ /// or if we are allowed to perform CFG modifications.
+ /// If found an intervening bitcast with a single use of the load,
+ /// allow the promotion.
+ static std::optional<RewriteableMemOps>
+ isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
+
+public:
+ SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
+ SROAOptions PreserveCFG_)
+ : C(C), DTU(DTU), AC(AC),
+ PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
+
+ /// Main run method used by both the SROAPass and by the legacy pass.
+ std::pair<bool /*Changed*/, bool /*CFGChanged*/> runSROA(Function &F);
+
+private:
+ friend class AllocaSliceRewriter;
+
+ bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
+ AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
+ bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
+ std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
+ void clobberUse(Use &U);
+ bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
+ bool promoteAllocas(Function &F);
+};
+
+} // end anonymous namespace
+
/// Calculate the fragment of a variable to use when slicing a store
/// based on the slice dimensions, existing fragment, and base storage
/// fragment.
@@ -131,7 +265,9 @@ namespace {
/// UseNoFrag - The new slice already covers the whole variable.
/// Skip - The new alloca slice doesn't include this variable.
/// FIXME: Can we use calculateFragmentIntersect instead?
+namespace {
enum FragCalcResult { UseFrag, UseNoFrag, Skip };
+}
static FragCalcResult
calculateFragment(DILocalVariable *Variable,
uint64_t NewStorageSliceOffsetInBits,
@@ -330,6 +466,8 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
}
}
+namespace {
+
/// A custom IRBuilder inserter which prefixes all names, but only in
/// Assert builds.
class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
@@ -422,8 +560,6 @@ public:
bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
};
-} // end anonymous namespace
-
/// Representation of the alloca slices.
///
/// This class represents the slices of an alloca which are formed by its
@@ -431,7 +567,7 @@ public:
/// for the slices used and we reflect that in this structure. The uses are
/// stored, sorted by increasing beginning offset and with unsplittable slices
/// starting at a particular offset before splittable slices.
-class llvm::sroa::AllocaSlices {
+class AllocaSlices {
public:
/// Construct the slices of a particular alloca.
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
@@ -563,7 +699,7 @@ private:
///
/// Objects of this type are produced by traversing the alloca's slices, but
/// are only ephemeral and not persistent.
-class llvm::sroa::Partition {
+class Partition {
private:
friend class AllocaSlices;
friend class AllocaSlices::partition_iterator;
@@ -628,6 +764,8 @@ public:
ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
};
+} // end anonymous namespace
+
/// An iterator over partitions of the alloca's slices.
///
/// This iterator implements the core algorithm for partitioning the alloca's
@@ -1144,6 +1282,7 @@ private:
}
if (II.isLaunderOrStripInvariantGroup()) {
+ insertUse(II, Offset, AllocSize, true);
enqueueUsers(II);
return;
}
@@ -1169,16 +1308,24 @@ private:
std::tie(UsedI, I) = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- Size =
- std::max(Size, DL.getTypeStoreSize(LI->getType()).getFixedValue());
+ TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
+ if (LoadSize.isScalable()) {
+ PI.setAborted(LI);
+ return nullptr;
+ }
+ Size = std::max(Size, LoadSize.getFixedValue());
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Value *Op = SI->getOperand(0);
if (Op == UsedI)
return SI;
- Size =
- std::max(Size, DL.getTypeStoreSize(Op->getType()).getFixedValue());
+ TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
+ if (StoreSize.isScalable()) {
+ PI.setAborted(SI);
+ return nullptr;
+ }
+ Size = std::max(Size, StoreSize.getFixedValue());
continue;
}
@@ -1525,38 +1672,37 @@ static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
PN.eraseFromParent();
}
-sroa::SelectHandSpeculativity &
-sroa::SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
+SelectHandSpeculativity &
+SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
if (isTrueVal)
- Bitfield::set<sroa::SelectHandSpeculativity::TrueVal>(Storage, true);
+ Bitfield::set<SelectHandSpeculativity::TrueVal>(Storage, true);
else
- Bitfield::set<sroa::SelectHandSpeculativity::FalseVal>(Storage, true);
+ Bitfield::set<SelectHandSpeculativity::FalseVal>(Storage, true);
return *this;
}
-bool sroa::SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
- return isTrueVal
- ? Bitfield::get<sroa::SelectHandSpeculativity::TrueVal>(Storage)
- : Bitfield::get<sroa::SelectHandSpeculativity::FalseVal>(Storage);
+bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
+ return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
+ : Bitfield::get<SelectHandSpeculativity::FalseVal>(Storage);
}
-bool sroa::SelectHandSpeculativity::areAllSpeculatable() const {
+bool SelectHandSpeculativity::areAllSpeculatable() const {
return isSpeculatable(/*isTrueVal=*/true) &&
isSpeculatable(/*isTrueVal=*/false);
}
-bool sroa::SelectHandSpeculativity::areAnySpeculatable() const {
+bool SelectHandSpeculativity::areAnySpeculatable() const {
return isSpeculatable(/*isTrueVal=*/true) ||
isSpeculatable(/*isTrueVal=*/false);
}
-bool sroa::SelectHandSpeculativity::areNoneSpeculatable() const {
+bool SelectHandSpeculativity::areNoneSpeculatable() const {
return !areAnySpeculatable();
}
-static sroa::SelectHandSpeculativity
+static SelectHandSpeculativity
isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG) {
assert(LI.isSimple() && "Only for simple loads");
- sroa::SelectHandSpeculativity Spec;
+ SelectHandSpeculativity Spec;
const DataLayout &DL = SI.getModule()->getDataLayout();
for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
@@ -1569,8 +1715,8 @@ isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG) {
return Spec;
}
-std::optional<sroa::RewriteableMemOps>
-SROAPass::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
+std::optional<RewriteableMemOps>
+SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
RewriteableMemOps Ops;
for (User *U : SI.users()) {
@@ -1604,7 +1750,7 @@ SROAPass::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
continue;
}
- sroa::SelectHandSpeculativity Spec =
+ SelectHandSpeculativity Spec =
isSafeLoadOfSelectToSpeculate(*LI, SI, PreserveCFG);
if (PreserveCFG && !Spec.areAllSpeculatable())
return {}; // Give up on this `select`.
@@ -1655,7 +1801,7 @@ static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI,
template <typename T>
static void rewriteMemOpOfSelect(SelectInst &SI, T &I,
- sroa::SelectHandSpeculativity Spec,
+ SelectHandSpeculativity Spec,
DomTreeUpdater &DTU) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!");
LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
@@ -1711,7 +1857,7 @@ static void rewriteMemOpOfSelect(SelectInst &SI, T &I,
}
static void rewriteMemOpOfSelect(SelectInst &SelInst, Instruction &I,
- sroa::SelectHandSpeculativity Spec,
+ SelectHandSpeculativity Spec,
DomTreeUpdater &DTU) {
if (auto *LI = dyn_cast<LoadInst>(&I))
rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
@@ -1722,13 +1868,13 @@ static void rewriteMemOpOfSelect(SelectInst &SelInst, Instruction &I,
}
static bool rewriteSelectInstMemOps(SelectInst &SI,
- const sroa::RewriteableMemOps &Ops,
+ const RewriteableMemOps &Ops,
IRBuilderTy &IRB, DomTreeUpdater *DTU) {
bool CFGChanged = false;
LLVM_DEBUG(dbgs() << " original select: " << SI << "\n");
for (const RewriteableMemOp &Op : Ops) {
- sroa::SelectHandSpeculativity Spec;
+ SelectHandSpeculativity Spec;
Instruction *I;
if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
I = *US;
@@ -2421,14 +2567,15 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
return V;
}
+namespace {
+
/// Visitor to rewrite instructions using p particular slice of an alloca
/// to use a new alloca.
///
/// Also implements the rewriting to vector-based accesses when the partition
/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
/// lives here.
-class llvm::sroa::AllocaSliceRewriter
- : public InstVisitor<AllocaSliceRewriter, bool> {
+class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class InstVisitor<AllocaSliceRewriter, bool>;
@@ -2436,7 +2583,7 @@ class llvm::sroa::AllocaSliceRewriter
const DataLayout &DL;
AllocaSlices &AS;
- SROAPass &Pass;
+ SROA &Pass;
AllocaInst &OldAI, &NewAI;
const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
Type *NewAllocaTy;
@@ -2489,12 +2636,12 @@ class llvm::sroa::AllocaSliceRewriter
if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
return &NewAI;
- Type *AccessTy = NewAI.getAllocatedType()->getPointerTo(AddrSpace);
+ Type *AccessTy = IRB.getPtrTy(AddrSpace);
return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
}
public:
- AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass,
+ AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
uint64_t NewAllocaBeginOffset,
uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
@@ -2697,7 +2844,7 @@ private:
NewEndOffset == NewAllocaEndOffset &&
(canConvertValue(DL, NewAllocaTy, TargetTy) ||
(IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
- TargetTy->isIntegerTy()))) {
+ TargetTy->isIntegerTy() && !LI.isVolatile()))) {
Value *NewPtr =
getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
@@ -2732,7 +2879,7 @@ private:
"endian_shift");
}
} else {
- Type *LTy = TargetTy->getPointerTo(AS);
+ Type *LTy = IRB.getPtrTy(AS);
LoadInst *NewLI =
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(), LI.isVolatile(), LI.getName());
@@ -2762,9 +2909,9 @@ private:
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
// LI only used for this computation.
- Value *Placeholder = new LoadInst(
- LI.getType(), PoisonValue::get(LI.getType()->getPointerTo(AS)), "",
- false, Align(1));
+ Value *Placeholder =
+ new LoadInst(LI.getType(), PoisonValue::get(IRB.getPtrTy(AS)), "",
+ false, Align(1));
V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
"insert");
LI.replaceAllUsesWith(V);
@@ -2875,26 +3022,10 @@ private:
if (IntTy && V->getType()->isIntegerTy())
return rewriteIntegerStore(V, SI, AATags);
- const bool IsStorePastEnd =
- DL.getTypeStoreSize(V->getType()).getFixedValue() > SliceSize;
StoreInst *NewSI;
if (NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset &&
- (canConvertValue(DL, V->getType(), NewAllocaTy) ||
- (IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
- V->getType()->isIntegerTy()))) {
- // If this is an integer store past the end of slice (and thus the bytes
- // past that point are irrelevant or this is unreachable), truncate the
- // value prior to storing.
- if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
- if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
- if (VITy->getBitWidth() > AITy->getBitWidth()) {
- if (DL.isBigEndian())
- V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
- "endian_shift");
- V = IRB.CreateTrunc(V, AITy, "load.trunc");
- }
-
+ canConvertValue(DL, V->getType(), NewAllocaTy)) {
V = convertValue(DL, IRB, V, NewAllocaTy);
Value *NewPtr =
getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
@@ -2903,7 +3034,7 @@ private:
IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
} else {
unsigned AS = SI.getPointerAddressSpace();
- Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS));
+ Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
NewSI =
IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
}
@@ -3126,8 +3257,7 @@ private:
if (IsDest) {
// Update the address component of linked dbg.assigns.
for (auto *DAI : at::getAssignmentMarkers(&II)) {
- if (any_of(DAI->location_ops(),
- [&](Value *V) { return V == II.getDest(); }) ||
+ if (llvm::is_contained(DAI->location_ops(), II.getDest()) ||
DAI->getAddress() == II.getDest())
DAI->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
}
@@ -3259,7 +3389,6 @@ private:
} else {
OtherTy = NewAllocaTy;
}
- OtherPtrTy = OtherTy->getPointerTo(OtherAS);
Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
@@ -3337,7 +3466,8 @@ private:
}
bool visitIntrinsicInst(IntrinsicInst &II) {
- assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
+ assert((II.isLifetimeStartOrEnd() || II.isLaunderOrStripInvariantGroup() ||
+ II.isDroppable()) &&
"Unexpected intrinsic!");
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
@@ -3351,6 +3481,9 @@ private:
return true;
}
+ if (II.isLaunderOrStripInvariantGroup())
+ return true;
+
assert(II.getArgOperand(1) == OldPtr);
// Lifetime intrinsics are only promotable if they cover the whole alloca.
// Therefore, we drop lifetime intrinsics which don't cover the whole
@@ -3368,7 +3501,7 @@ private:
NewEndOffset - NewBeginOffset);
// Lifetime intrinsics always expect an i8* so directly get such a pointer
// for the new alloca slice.
- Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
+ Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace());
Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
@@ -3422,7 +3555,8 @@ private:
// dominate the PHI.
IRBuilderBase::InsertPointGuard Guard(IRB);
if (isa<PHINode>(OldPtr))
- IRB.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
+ IRB.SetInsertPoint(OldPtr->getParent(),
+ OldPtr->getParent()->getFirstInsertionPt());
else
IRB.SetInsertPoint(OldPtr);
IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
@@ -3472,8 +3606,6 @@ private:
}
};
-namespace {
-
/// Visitor to rewrite aggregate loads and stores as scalar.
///
/// This pass aggressively rewrites all aggregate loads and stores on
@@ -3811,7 +3943,7 @@ private:
SmallVector<Value *, 4> Index(GEPI.indices());
bool IsInBounds = GEPI.isInBounds();
- IRB.SetInsertPoint(GEPI.getParent()->getFirstNonPHI());
+ IRB.SetInsertPoint(GEPI.getParent(), GEPI.getParent()->getFirstNonPHIIt());
PHINode *NewPN = IRB.CreatePHI(GEPI.getType(), PHI->getNumIncomingValues(),
PHI->getName() + ".sroa.phi");
for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
@@ -4046,7 +4178,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
/// there all along.
///
/// \returns true if any changes are made.
-bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
+bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
// Track the loads and stores which are candidates for pre-splitting here, in
@@ -4268,7 +4400,7 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
for (;;) {
auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
auto AS = LI->getPointerAddressSpace();
- auto *PartPtrTy = PartTy->getPointerTo(AS);
+ auto *PartPtrTy = LI->getPointerOperandType();
LoadInst *PLoad = IRB.CreateAlignedLoad(
PartTy,
getAdjustedPtr(IRB, DL, BasePtr,
@@ -4323,8 +4455,7 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
LoadInst *PLoad = SplitLoads[Idx];
uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
- auto *PartPtrTy =
- PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
+ auto *PartPtrTy = SI->getPointerOperandType();
auto AS = SI->getPointerAddressSpace();
StoreInst *PStore = IRB.CreateAlignedStore(
@@ -4404,8 +4535,8 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
- auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
- auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
+ auto *LoadPartPtrTy = LI->getPointerOperandType();
+ auto *StorePartPtrTy = SI->getPointerOperandType();
// Either lookup a split load or create one.
LoadInst *PLoad;
@@ -4526,8 +4657,8 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
/// appropriate new offsets. It also evaluates how successful the rewrite was
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
-AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- Partition &P) {
+AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+ Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
@@ -4707,9 +4838,39 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
return NewAI;
}
+static void insertNewDbgInst(DIBuilder &DIB, DbgDeclareInst *Orig,
+ AllocaInst *NewAddr, DIExpression *NewFragmentExpr,
+ Instruction *BeforeInst) {
+ DIB.insertDeclare(NewAddr, Orig->getVariable(), NewFragmentExpr,
+ Orig->getDebugLoc(), BeforeInst);
+}
+static void insertNewDbgInst(DIBuilder &DIB, DbgAssignIntrinsic *Orig,
+ AllocaInst *NewAddr, DIExpression *NewFragmentExpr,
+ Instruction *BeforeInst) {
+ (void)BeforeInst;
+ if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
+ NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
+ DIAssignID::getDistinct(NewAddr->getContext()));
+ }
+ auto *NewAssign = DIB.insertDbgAssign(
+ NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
+ Orig->getAddressExpression(), Orig->getDebugLoc());
+ LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign << "\n");
+ (void)NewAssign;
+}
+static void insertNewDbgInst(DIBuilder &DIB, DPValue *Orig, AllocaInst *NewAddr,
+ DIExpression *NewFragmentExpr,
+ Instruction *BeforeInst) {
+ (void)DIB;
+ DPValue *New = new DPValue(ValueAsMetadata::get(NewAddr), Orig->getVariable(),
+ NewFragmentExpr, Orig->getDebugLoc(),
+ DPValue::LocationType::Declare);
+ BeforeInst->getParent()->insertDPValueBefore(New, BeforeInst->getIterator());
+}
+
/// Walks the slices of an alloca and form partitions based on them,
/// rewriting each of their uses.
-bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
+bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
if (AS.begin() == AS.end())
return false;
@@ -4808,12 +4969,7 @@ bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
- TinyPtrVector<DbgVariableIntrinsic *> DbgVariables;
- for (auto *DbgDeclare : FindDbgDeclareUses(&AI))
- DbgVariables.push_back(DbgDeclare);
- for (auto *DbgAssign : at::getAssignmentMarkers(&AI))
- DbgVariables.push_back(DbgAssign);
- for (DbgVariableIntrinsic *DbgVariable : DbgVariables) {
+ auto MigrateOne = [&](auto *DbgVariable) {
auto *Expr = DbgVariable->getExpression();
DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
uint64_t AllocaSize =
@@ -4866,41 +5022,39 @@ bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Remove any existing intrinsics on the new alloca describing
// the variable fragment.
- for (DbgDeclareInst *OldDII : FindDbgDeclareUses(Fragment.Alloca)) {
- auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS,
- const DbgVariableIntrinsic *RHS) {
+ SmallVector<DbgDeclareInst *, 1> FragDbgDeclares;
+ SmallVector<DPValue *, 1> FragDPVs;
+ findDbgDeclares(FragDbgDeclares, Fragment.Alloca, &FragDPVs);
+ auto RemoveOne = [DbgVariable](auto *OldDII) {
+ auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
return LHS->getVariable() == RHS->getVariable() &&
LHS->getDebugLoc()->getInlinedAt() ==
RHS->getDebugLoc()->getInlinedAt();
};
if (SameVariableFragment(OldDII, DbgVariable))
OldDII->eraseFromParent();
- }
+ };
+ for_each(FragDbgDeclares, RemoveOne);
+ for_each(FragDPVs, RemoveOne);
- if (auto *DbgAssign = dyn_cast<DbgAssignIntrinsic>(DbgVariable)) {
- if (!Fragment.Alloca->hasMetadata(LLVMContext::MD_DIAssignID)) {
- Fragment.Alloca->setMetadata(
- LLVMContext::MD_DIAssignID,
- DIAssignID::getDistinct(AI.getContext()));
- }
- auto *NewAssign = DIB.insertDbgAssign(
- Fragment.Alloca, DbgAssign->getValue(), DbgAssign->getVariable(),
- FragmentExpr, Fragment.Alloca, DbgAssign->getAddressExpression(),
- DbgAssign->getDebugLoc());
- NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
- LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign
- << "\n");
- } else {
- DIB.insertDeclare(Fragment.Alloca, DbgVariable->getVariable(),
- FragmentExpr, DbgVariable->getDebugLoc(), &AI);
- }
+ insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, FragmentExpr, &AI);
}
- }
+ };
+
+ // Migrate debug information from the old alloca to the new alloca(s)
+ // and the individual partitions.
+ SmallVector<DbgDeclareInst *, 1> DbgDeclares;
+ SmallVector<DPValue *, 1> DPValues;
+ findDbgDeclares(DbgDeclares, &AI, &DPValues);
+ for_each(DbgDeclares, MigrateOne);
+ for_each(DPValues, MigrateOne);
+ for_each(at::getAssignmentMarkers(&AI), MigrateOne);
+
return Changed;
}
/// Clobber a use with poison, deleting the used value if it becomes dead.
-void SROAPass::clobberUse(Use &U) {
+void SROA::clobberUse(Use &U) {
Value *OldV = U;
// Replace the use with an poison value.
U = PoisonValue::get(OldV->getType());
@@ -4920,7 +5074,7 @@ void SROAPass::clobberUse(Use &U) {
/// the slices of the alloca, and then hands it off to be split and
/// rewritten as needed.
std::pair<bool /*Changed*/, bool /*CFGChanged*/>
-SROAPass::runOnAlloca(AllocaInst &AI) {
+SROA::runOnAlloca(AllocaInst &AI) {
bool Changed = false;
bool CFGChanged = false;
@@ -5002,7 +5156,7 @@ SROAPass::runOnAlloca(AllocaInst &AI) {
///
/// We also record the alloca instructions deleted here so that they aren't
/// subsequently handed to mem2reg to promote.
-bool SROAPass::deleteDeadInstructions(
+bool SROA::deleteDeadInstructions(
SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
bool Changed = false;
while (!DeadInsts.empty()) {
@@ -5016,7 +5170,12 @@ bool SROAPass::deleteDeadInstructions(
// not be able to find it.
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
DeletedAllocas.insert(AI);
- for (DbgDeclareInst *OldDII : FindDbgDeclareUses(AI))
+ SmallVector<DbgDeclareInst *, 1> DbgDeclares;
+ SmallVector<DPValue *, 1> DPValues;
+ findDbgDeclares(DbgDeclares, AI, &DPValues);
+ for (DbgDeclareInst *OldDII : DbgDeclares)
+ OldDII->eraseFromParent();
+ for (DPValue *OldDII : DPValues)
OldDII->eraseFromParent();
}
@@ -5043,7 +5202,7 @@ bool SROAPass::deleteDeadInstructions(
/// This attempts to promote whatever allocas have been identified as viable in
/// the PromotableAllocas list. If that list is empty, there is nothing to do.
/// This function returns whether any promotion occurred.
-bool SROAPass::promoteAllocas(Function &F) {
+bool SROA::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
@@ -5060,12 +5219,8 @@ bool SROAPass::promoteAllocas(Function &F) {
return true;
}
-PreservedAnalyses SROAPass::runImpl(Function &F, DomTreeUpdater &RunDTU,
- AssumptionCache &RunAC) {
+std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
- C = &F.getContext();
- DTU = &RunDTU;
- AC = &RunAC;
const DataLayout &DL = F.getParent()->getDataLayout();
BasicBlock &EntryBB = F.getEntryBlock();
@@ -5116,56 +5271,50 @@ PreservedAnalyses SROAPass::runImpl(Function &F, DomTreeUpdater &RunDTU,
assert((!CFGChanged || !PreserveCFG) &&
"Should not have modified the CFG when told to preserve it.");
- if (!Changed)
- return PreservedAnalyses::all();
-
- if (isAssignmentTrackingEnabled(*F.getParent())) {
+ if (Changed && isAssignmentTrackingEnabled(*F.getParent())) {
for (auto &BB : F)
RemoveRedundantDbgInstrs(&BB);
}
- PreservedAnalyses PA;
- if (!CFGChanged)
- PA.preserveSet<CFGAnalyses>();
- PA.preserve<DominatorTreeAnalysis>();
- return PA;
-}
-
-PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT,
- AssumptionCache &RunAC) {
- DomTreeUpdater DTU(RunDT, DomTreeUpdater::UpdateStrategy::Lazy);
- return runImpl(F, DTU, RunAC);
+ return {Changed, CFGChanged};
}
PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) {
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
- return runImpl(F, DT, AC);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ auto [Changed, CFGChanged] =
+ SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ if (!CFGChanged)
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
}
void SROAPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << (PreserveCFG ? "<preserve-cfg>" : "<modify-cfg>");
+ OS << (PreserveCFG == SROAOptions::PreserveCFG ? "<preserve-cfg>"
+ : "<modify-cfg>");
}
-SROAPass::SROAPass(SROAOptions PreserveCFG_)
- : PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
+SROAPass::SROAPass(SROAOptions PreserveCFG) : PreserveCFG(PreserveCFG) {}
+
+namespace {
/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
-///
-/// This is in the llvm namespace purely to allow it to be a friend of the \c
-/// SROA pass.
-class llvm::sroa::SROALegacyPass : public FunctionPass {
- /// The SROA implementation.
- SROAPass Impl;
+class SROALegacyPass : public FunctionPass {
+ SROAOptions PreserveCFG;
public:
static char ID;
SROALegacyPass(SROAOptions PreserveCFG = SROAOptions::PreserveCFG)
- : FunctionPass(ID), Impl(PreserveCFG) {
+ : FunctionPass(ID), PreserveCFG(PreserveCFG) {
initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
}
@@ -5173,10 +5322,13 @@ public:
if (skipFunction(F))
return false;
- auto PA = Impl.runImpl(
- F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
- return !PA.areAllPreserved();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ auto [Changed, _] =
+ SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
+ return Changed;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -5189,6 +5341,8 @@ public:
StringRef getPassName() const override { return "SROA"; }
};
+} // end anonymous namespace
+
char SROALegacyPass::ID = 0;
FunctionPass *llvm::createSROAPass(bool PreserveCFG) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
index 37b032e4d7c7..4ce6ce93be33 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -21,41 +21,27 @@ using namespace llvm;
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeConstantHoistingLegacyPassPass(Registry);
initializeDCELegacyPassPass(Registry);
- initializeScalarizerLegacyPassPass(Registry);
- initializeGuardWideningLegacyPassPass(Registry);
- initializeLoopGuardWideningLegacyPassPass(Registry);
initializeGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
- initializeMakeGuardsExplicitLegacyPassPass(Registry);
initializeFlattenCFGLegacyPassPass(Registry);
initializeInferAddressSpacesPass(Registry);
initializeInstSimplifyLegacyPassPass(Registry);
initializeLegacyLICMPassPass(Registry);
- initializeLegacyLoopSinkPassPass(Registry);
initializeLoopDataPrefetchLegacyPassPass(Registry);
- initializeLoopInstSimplifyLegacyPassPass(Registry);
- initializeLoopPredicationLegacyPassPass(Registry);
initializeLoopRotateLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
initializeLoopUnrollPass(Registry);
initializeLowerAtomicLegacyPassPass(Registry);
initializeLowerConstantIntrinsicsPass(Registry);
- initializeLowerExpectIntrinsicPass(Registry);
- initializeLowerGuardIntrinsicLegacyPassPass(Registry);
- initializeLowerWidenableConditionLegacyPassPass(Registry);
initializeMergeICmpsLegacyPassPass(Registry);
- initializeMergedLoadStoreMotionLegacyPassPass(Registry);
initializeNaryReassociateLegacyPassPass(Registry);
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
initializeReassociateLegacyPassPass(Registry);
- initializeRedundantDbgInstEliminationPass(Registry);
- initializeRegToMemLegacyPass(Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeSROALegacyPassPass(Registry);
initializeCFGSimplifyPassPass(Registry);
initializeStructurizeCFGLegacyPassPass(Registry);
- initializeSimpleLoopUnswitchLegacyPassPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
initializeTLSVariableHoistLegacyPassPass(Registry);
@@ -63,5 +49,4 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSpeculativeExecutionLegacyPassPass(Registry);
initializeStraightLineStrengthReduceLegacyPassPass(Registry);
initializePlaceBackedgeSafepointsLegacyPassPass(Registry);
- initializeLoopSimplifyCFGLegacyPassPass(Registry);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 86b55dfd304a..3eca9ac7c267 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -36,8 +36,6 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -282,12 +280,10 @@ T getWithDefaultOverride(const cl::opt<T> &ClOption,
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
- ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT,
- ScalarizerPassOptions Options)
- : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT),
- ScalarizeVariableInsertExtract(
- getWithDefaultOverride(ClScalarizeVariableInsertExtract,
- Options.ScalarizeVariableInsertExtract)),
+ ScalarizerVisitor(DominatorTree *DT, ScalarizerPassOptions Options)
+ : DT(DT), ScalarizeVariableInsertExtract(getWithDefaultOverride(
+ ClScalarizeVariableInsertExtract,
+ Options.ScalarizeVariableInsertExtract)),
ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore,
Options.ScalarizeLoadStore)),
ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits,
@@ -337,8 +333,6 @@ private:
SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
- unsigned ParallelLoopAccessMDKind;
-
DominatorTree *DT;
const bool ScalarizeVariableInsertExtract;
@@ -346,31 +340,8 @@ private:
const unsigned ScalarizeMinBits;
};
-class ScalarizerLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- ScalarizerLegacyPass() : FunctionPass(ID) {
- initializeScalarizerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage& AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- }
-};
-
} // end anonymous namespace
-char ScalarizerLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
- "Scalarize vector operations", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
- "Scalarize vector operations", false, false)
-
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
const VectorSplit &VS, ValueVector *cachePtr)
: BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
@@ -443,22 +414,6 @@ Value *Scatterer::operator[](unsigned Frag) {
return CV[Frag];
}
-bool ScalarizerLegacyPass::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- Module &M = *F.getParent();
- unsigned ParallelLoopAccessMDKind =
- M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, ScalarizerPassOptions());
- return Impl.visit(F);
-}
-
-FunctionPass *llvm::createScalarizerPass() {
- return new ScalarizerLegacyPass();
-}
-
bool ScalarizerVisitor::visit(Function &F) {
assert(Gathered.empty() && Scattered.empty());
@@ -558,7 +513,7 @@ bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
|| Tag == LLVMContext::MD_invariant_load
|| Tag == LLVMContext::MD_alias_scope
|| Tag == LLVMContext::MD_noalias
- || Tag == ParallelLoopAccessMDKind
+ || Tag == LLVMContext::MD_mem_parallel_loop_access
|| Tag == LLVMContext::MD_access_group);
}
@@ -730,7 +685,8 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
// vector type, which is true for all current intrinsics.
for (unsigned I = 0; I != NumArgs; ++I) {
Value *OpI = CI.getOperand(I);
- if (auto *OpVecTy = dyn_cast<FixedVectorType>(OpI->getType())) {
+ if ([[maybe_unused]] auto *OpVecTy =
+ dyn_cast<FixedVectorType>(OpI->getType())) {
assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
@@ -1253,11 +1209,8 @@ bool ScalarizerVisitor::finish() {
}
PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) {
- Module &M = *F.getParent();
- unsigned ParallelLoopAccessMDKind =
- M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, Options);
+ ScalarizerVisitor Impl(DT, Options);
bool Changed = Impl.visit(F);
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 89d0b7c33e0d..b8c9d9d100f1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -524,7 +524,7 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
// FIXME: this does not appear to be covered by any tests
// (with x86/aarch64 backends at least)
if (BO->getOpcode() == Instruction::Or &&
- !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
+ !haveNoCommonBitsSet(LHS, RHS, SimplifyQuery(DL, DT, /*AC*/ nullptr, BO)))
return false;
// FIXME: We don't currently support constants from the RHS of subs,
@@ -661,15 +661,16 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) {
// in the reversed order.
for (CastInst *I : llvm::reverse(ExtInsts)) {
if (Constant *C = dyn_cast<Constant>(Current)) {
- // If Current is a constant, apply s/zext using ConstantExpr::getCast.
- // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
- Current = ConstantExpr::getCast(I->getOpcode(), C, I->getType());
- } else {
- Instruction *Ext = I->clone();
- Ext->setOperand(0, Current);
- Ext->insertBefore(IP);
- Current = Ext;
+ // Try to constant fold the cast.
+ Current = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), DL);
+ if (Current)
+ continue;
}
+
+ Instruction *Ext = I->clone();
+ Ext->setOperand(0, Current);
+ Ext->insertBefore(IP);
+ Current = Ext;
}
return Current;
}
@@ -830,7 +831,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
if (GTI.isSequential()) {
// Constant offsets of scalable types are not really constant.
- if (isa<ScalableVectorType>(GTI.getIndexedType()))
+ if (GTI.getIndexedType()->isScalableTy())
continue;
// Tries to extract a constant offset from this GEP index.
@@ -1019,7 +1020,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
if (GTI.isSequential()) {
// Constant offsets of scalable types are not really constant.
- if (isa<ScalableVectorType>(GTI.getIndexedType()))
+ if (GTI.getIndexedType()->isScalableTy())
continue;
// Splits this GEP index into a variadic part and a constant offset, and
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 633d077e6492..7eb0ba1c2c17 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -24,7 +24,6 @@
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
@@ -46,8 +45,6 @@
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -368,10 +365,11 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
bool FullUnswitch) {
assert(&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!");
- Instruction *InsertPt = &*UnswitchedBB.begin();
+ BasicBlock::iterator InsertPt = UnswitchedBB.begin();
for (PHINode &PN : ExitBB.phis()) {
auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
- PN.getName() + ".split", InsertPt);
+ PN.getName() + ".split");
+ NewPN->insertBefore(InsertPt);
// Walk backwards over the old PHI node's inputs to minimize the cost of
// removing each one. We have to do this weird loop manually so that we
@@ -609,7 +607,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
UnswitchedBB = LoopExitBB;
} else {
UnswitchedBB =
- SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
+ SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
}
if (MSSAU && VerifyMemorySSA)
@@ -623,7 +621,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// If fully unswitching, we can use the existing branch instruction.
// Splice it into the old PH to gate reaching the new preheader and re-point
// its successors.
- OldPH->splice(OldPH->end(), BI.getParent(), BI.getIterator());
+ BI.moveBefore(*OldPH, OldPH->end());
BI.setCondition(Cond);
if (MSSAU) {
// Temporarily clone the terminator, to make MSSA update cheaper by
@@ -882,7 +880,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
} else {
auto *SplitBB =
- SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
+ SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
*ParentBB, *OldPH,
/*FullUnswitch*/ true);
@@ -909,7 +907,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
if (!SplitExitBB) {
// If this is the first time we see this, do the split and remember it.
- SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
+ SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
*ParentBB, *OldPH,
/*FullUnswitch*/ true);
@@ -1210,7 +1208,7 @@ static BasicBlock *buildClonedLoopBlocks(
// place to merge the CFG, so split the exit first. This is always safe to
// do because there cannot be any non-loop predecessors of a loop exit in
// loop simplified form.
- auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
+ auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
// Rearrange the names to make it easier to write test cases by having the
// exit block carry the suffix rather than the merge block carrying the
@@ -1246,8 +1244,8 @@ static BasicBlock *buildClonedLoopBlocks(
SE->forgetValue(&I);
auto *MergePN =
- PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
- &*MergeBB->getFirstInsertionPt());
+ PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
+ MergePN->insertBefore(MergeBB->getFirstInsertionPt());
I.replaceAllUsesWith(MergePN);
MergePN->addIncoming(&I, ExitBB);
MergePN->addIncoming(&ClonedI, ClonedExitBB);
@@ -1259,8 +1257,11 @@ static BasicBlock *buildClonedLoopBlocks(
// everything available. Also, we have inserted new instructions which may
// include assume intrinsics, so we update the assumption cache while
// processing this.
+ Module *M = ClonedPH->getParent()->getParent();
for (auto *ClonedBB : NewBlocks)
for (Instruction &I : *ClonedBB) {
+ RemapDPValueRange(M, I.getDbgValueRange(), VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
RemapInstruction(&I, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
if (auto *II = dyn_cast<AssumeInst>(&I))
@@ -1684,13 +1685,12 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
BB->eraseFromParent();
}
-static void
-deleteDeadBlocksFromLoop(Loop &L,
- SmallVectorImpl<BasicBlock *> &ExitBlocks,
- DominatorTree &DT, LoopInfo &LI,
- MemorySSAUpdater *MSSAU,
- ScalarEvolution *SE,
- function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
+static void deleteDeadBlocksFromLoop(Loop &L,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ DominatorTree &DT, LoopInfo &LI,
+ MemorySSAUpdater *MSSAU,
+ ScalarEvolution *SE,
+ LPMUpdater &LoopUpdater) {
// Find all the dead blocks tied to this loop, and remove them from their
// successors.
SmallSetVector<BasicBlock *, 8> DeadBlockSet;
@@ -1740,7 +1740,7 @@ deleteDeadBlocksFromLoop(Loop &L,
}) &&
"If the child loop header is dead all blocks in the child loop must "
"be dead as well!");
- DestroyLoopCB(*ChildL, ChildL->getName());
+ LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
if (SE)
SE->forgetBlockAndLoopDispositions();
LI.destroy(ChildL);
@@ -2084,8 +2084,8 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
ParentL->removeChildLoop(llvm::find(*ParentL, &L));
else
LI.removeLoop(llvm::find(LI, &L));
- // markLoopAsDeleted for L should be triggered by the caller (it is typically
- // done by using the UnswitchCB callback).
+ // markLoopAsDeleted for L should be triggered by the caller (it is
+ // typically done within postUnswitch).
if (SE)
SE->forgetBlockAndLoopDispositions();
LI.destroy(&L);
@@ -2122,18 +2122,56 @@ void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
} while (!DomWorklist.empty());
}
+void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName,
+ bool CurrentLoopValid, bool PartiallyInvariant,
+ bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
+ // If we did a non-trivial unswitch, we have added new (cloned) loops.
+ if (!NewLoops.empty())
+ U.addSiblingLoops(NewLoops);
+
+ // If the current loop remains valid, we should revisit it to catch any
+ // other unswitch opportunities. Otherwise, we need to mark it as deleted.
+ if (CurrentLoopValid) {
+ if (PartiallyInvariant) {
+ // Mark the new loop as partially unswitched, to avoid unswitching on
+ // the same condition again.
+ auto &Context = L.getHeader()->getContext();
+ MDNode *DisableUnswitchMD = MDNode::get(
+ Context,
+ MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
+ MDNode *NewLoopID = makePostTransformationMetadata(
+ Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
+ {DisableUnswitchMD});
+ L.setLoopID(NewLoopID);
+ } else if (InjectedCondition) {
+ // Do the same for injection of invariant conditions.
+ auto &Context = L.getHeader()->getContext();
+ MDNode *DisableUnswitchMD = MDNode::get(
+ Context,
+ MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
+ MDNode *NewLoopID = makePostTransformationMetadata(
+ Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
+ {DisableUnswitchMD});
+ L.setLoopID(NewLoopID);
+ } else
+ U.revisitCurrentLoop();
+ } else
+ U.markLoopAsDeleted(L, LoopName);
+}
+
static void unswitchNontrivialInvariants(
Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
- AssumptionCache &AC,
- function_ref<void(bool, bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- function_ref<void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze,
- bool InjectedCondition) {
+ AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
auto *ParentBB = TI.getParent();
BranchInst *BI = dyn_cast<BranchInst>(&TI);
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
+ // Save the current loop name in a variable so that we can report it even
+ // after it has been deleted.
+ std::string LoopName(L.getName());
+
// We can only unswitch switches, conditional branches with an invariant
// condition, or combining invariant conditions with an instruction or
// partially invariant instructions.
@@ -2296,7 +2334,7 @@ static void unswitchNontrivialInvariants(
if (FullUnswitch) {
// Splice the terminator from the original loop and rewrite its
// successors.
- SplitBB->splice(SplitBB->end(), ParentBB, TI.getIterator());
+ TI.moveBefore(*SplitBB, SplitBB->end());
// Keep a clone of the terminator for MSSA updates.
Instruction *NewTI = TI.clone();
@@ -2446,7 +2484,7 @@ static void unswitchNontrivialInvariants(
// Now that our cloned loops have been built, we can update the original loop.
// First we delete the dead blocks from it and then we rebuild the loop
// structure taking these deletions into account.
- deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE,DestroyLoopCB);
+ deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -2582,7 +2620,8 @@ static void unswitchNontrivialInvariants(
for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
if (UpdatedL->getParentLoop() == ParentL)
SibLoops.push_back(UpdatedL);
- UnswitchCB(IsStillLoop, PartiallyInvariant, InjectedCondition, SibLoops);
+ postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
+ InjectedCondition, SibLoops);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -3429,12 +3468,11 @@ static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT,
Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
}
-static bool unswitchBestCondition(
- Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
- AAResults &AA, TargetTransformInfo &TTI,
- function_ref<void(bool, bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
+static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
+ AssumptionCache &AC, AAResults &AA,
+ TargetTransformInfo &TTI, ScalarEvolution *SE,
+ MemorySSAUpdater *MSSAU,
+ LPMUpdater &LoopUpdater) {
// Collect all invariant conditions within this loop (as opposed to an inner
// loop which would be handled when visiting that inner loop).
SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
@@ -3497,8 +3535,8 @@ static bool unswitchBestCondition(
LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
<< ") terminator: " << *Best.TI << "\n");
unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
- LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB,
- InsertFreeze, InjectedCondition);
+ LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
+ InjectedCondition);
return true;
}
@@ -3517,20 +3555,18 @@ static bool unswitchBestCondition(
/// true, we will attempt to do non-trivial unswitching as well as trivial
/// unswitching.
///
-/// The `UnswitchCB` callback provided will be run after unswitching is
-/// complete, with the first parameter set to `true` if the provided loop
-/// remains a loop, and a list of new sibling loops created.
+/// The `postUnswitch` function will be run after unswitching is complete
+/// with information on whether or not the provided loop remains a loop and
+/// a list of new sibling loops created.
///
/// If `SE` is non-null, we will update that analysis based on the unswitching
/// done.
-static bool
-unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
- AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
- bool NonTrivial,
- function_ref<void(bool, bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
- function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
+static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
+ AssumptionCache &AC, AAResults &AA,
+ TargetTransformInfo &TTI, bool Trivial,
+ bool NonTrivial, ScalarEvolution *SE,
+ MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@@ -3542,8 +3578,9 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
// If we unswitched successfully we will want to clean up the loop before
// processing it further so just mark it as unswitched and return.
- UnswitchCB(/*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
- /*InjectedCondition*/ false, {});
+ postUnswitch(L, LoopUpdater, L.getName(),
+ /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
+ /*InjectedCondition*/ false, {});
return true;
}
@@ -3612,8 +3649,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
// Try to unswitch the best invariant condition. We prefer this full unswitch to
// a partial unswitch when possible below the threshold.
- if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
- DestroyLoopCB))
+ if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
return true;
// No other opportunities to unswitch.
@@ -3633,52 +3669,6 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
<< "\n");
- // Save the current loop name in a variable so that we can report it even
- // after it has been deleted.
- std::string LoopName = std::string(L.getName());
-
- auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
- bool PartiallyInvariant,
- bool InjectedCondition,
- ArrayRef<Loop *> NewLoops) {
- // If we did a non-trivial unswitch, we have added new (cloned) loops.
- if (!NewLoops.empty())
- U.addSiblingLoops(NewLoops);
-
- // If the current loop remains valid, we should revisit it to catch any
- // other unswitch opportunities. Otherwise, we need to mark it as deleted.
- if (CurrentLoopValid) {
- if (PartiallyInvariant) {
- // Mark the new loop as partially unswitched, to avoid unswitching on
- // the same condition again.
- auto &Context = L.getHeader()->getContext();
- MDNode *DisableUnswitchMD = MDNode::get(
- Context,
- MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
- MDNode *NewLoopID = makePostTransformationMetadata(
- Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
- {DisableUnswitchMD});
- L.setLoopID(NewLoopID);
- } else if (InjectedCondition) {
- // Do the same for injection of invariant conditions.
- auto &Context = L.getHeader()->getContext();
- MDNode *DisableUnswitchMD = MDNode::get(
- Context,
- MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
- MDNode *NewLoopID = makePostTransformationMetadata(
- Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
- {DisableUnswitchMD});
- L.setLoopID(NewLoopID);
- } else
- U.revisitCurrentLoop();
- } else
- U.markLoopAsDeleted(L, LoopName);
- };
-
- auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
- U.markLoopAsDeleted(L, Name);
- };
-
std::optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA) {
MSSAU = MemorySSAUpdater(AR.MSSA);
@@ -3686,8 +3676,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
- UnswitchCB, &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI,
- DestroyLoopCB))
+ &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
return PreservedAnalyses::all();
if (AR.MSSA && VerifyMemorySSA)
@@ -3713,105 +3702,3 @@ void SimpleLoopUnswitchPass::printPipeline(
OS << (Trivial ? "" : "no-") << "trivial";
OS << '>';
}
-
-namespace {
-
-class SimpleLoopUnswitchLegacyPass : public LoopPass {
- bool NonTrivial;
-
-public:
- static char ID; // Pass ID, replacement for typeid
-
- explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
- : LoopPass(ID), NonTrivial(NonTrivial) {
- initializeSimpleLoopUnswitchLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
-};
-
-} // end anonymous namespace
-
-bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipLoop(L))
- return false;
-
- Function &F = *L->getHeader()->getParent();
-
- LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
- << "\n");
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MemorySSAUpdater MSSAU(MSSA);
-
- auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- auto *SE = SEWP ? &SEWP->getSE() : nullptr;
-
- auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
- bool InjectedCondition,
- ArrayRef<Loop *> NewLoops) {
- // If we did a non-trivial unswitch, we have added new (cloned) loops.
- for (auto *NewL : NewLoops)
- LPM.addLoop(*NewL);
-
- // If the current loop remains valid, re-add it to the queue. This is
- // a little wasteful as we'll finish processing the current loop as well,
- // but it is the best we can do in the old PM.
- if (CurrentLoopValid) {
- // If the current loop has been unswitched using a partially invariant
- // condition or injected invariant condition, we should not re-add the
- // current loop to avoid unswitching on the same condition again.
- if (!PartiallyInvariant && !InjectedCondition)
- LPM.addLoop(*L);
- } else
- LPM.markLoopAsDeleted(*L);
- };
-
- auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
- LPM.markLoopAsDeleted(L);
- };
-
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- bool Changed =
- unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
- &MSSAU, nullptr, nullptr, DestroyLoopCB);
-
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
- // Historically this pass has had issues with the dominator tree so verify it
- // in asserts builds.
- assert(DT.verify(DominatorTree::VerificationLevel::Fast));
-
- return Changed;
-}
-
-char SimpleLoopUnswitchLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
- "Simple unswitch loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
- "Simple unswitch loops", false, false)
-
-Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
- return new SimpleLoopUnswitchLegacyPass(NonTrivial);
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp
index 8b99f73b850b..46bcfd6b41ce 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -67,9 +67,8 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo,
assert(Inst && "Instruction to be sunk is null");
assert(SuccToSinkTo && "Candidate sink target is null");
- // It's never legal to sink an instruction into a block which terminates in an
- // EH-pad.
- if (SuccToSinkTo->getTerminator()->isExceptionalTerminator())
+ // It's never legal to sink an instruction into an EH-pad block.
+ if (SuccToSinkTo->isEHPad())
return false;
// If the block has multiple predecessors, this would introduce computation
@@ -131,15 +130,16 @@ static bool SinkInstruction(Instruction *Inst,
for (Use &U : Inst->uses()) {
Instruction *UseInst = cast<Instruction>(U.getUser());
BasicBlock *UseBlock = UseInst->getParent();
- // Don't worry about dead users.
- if (!DT.isReachableFromEntry(UseBlock))
- continue;
if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo());
UseBlock = PN->getIncomingBlock(Num);
}
+ // Don't worry about dead users.
+ if (!DT.isReachableFromEntry(UseBlock))
+ continue;
+
if (SuccToSinkTo)
SuccToSinkTo = DT.findNearestCommonDominator(SuccToSinkTo, UseBlock);
else
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index e866fe681127..7a5318d4404c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -316,7 +316,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
auto Current = I;
++I;
if (!NotHoisted.count(&*Current)) {
- Current->moveBefore(ToBlock.getTerminator());
+ Current->moveBeforePreserving(ToBlock.getTerminator());
}
}
return true;
@@ -346,4 +346,14 @@ PreservedAnalyses SpeculativeExecutionPass::run(Function &F,
PA.preserveSet<CFGAnalyses>();
return PA;
}
+
+void SpeculativeExecutionPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<SpeculativeExecutionPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << '<';
+ if (OnlyIfDivergentTarget)
+ OS << "only-if-divergent-target";
+ OS << '>';
+}
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index fdb41cb415df..543469d62fe7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -680,7 +680,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
if (BumpWithUglyGEP) {
// C = (char *)Basis + Bump
unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
- Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
+ Type *CharTy = PointerType::get(Basis.Ins->getContext(), AS);
Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
Reduced =
Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index fac5695c7bea..7d96a3478858 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
@@ -353,7 +354,6 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
AU.addRequired<UniformityInfoWrapperPass>();
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -368,7 +368,6 @@ char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
@@ -1173,6 +1172,8 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
this->DT = DT;
Func = R->getEntry()->getParent();
+ assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
+
ParentRegion = R;
orderNodes();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
index 4ec7181ad859..58ea5b68d548 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
@@ -32,7 +32,6 @@
#include <cassert>
#include <cstdint>
#include <iterator>
-#include <tuple>
#include <utility>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 2031e70bee1d..c6e8505d5ab4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -369,8 +369,14 @@ static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) {
if (!I->isAssociative() || !I->isCommutative())
return false;
- assert(I->getNumOperands() == 2 &&
- "Associative/commutative operations should have 2 args!");
+ assert(I->getNumOperands() >= 2 &&
+ "Associative/commutative operations should have at least 2 args!");
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ // Accumulators must have an identity.
+ if (!ConstantExpr::getIntrinsicIdentity(II->getIntrinsicID(), I->getType()))
+ return false;
+ }
// Exactly one operand should be the result of the call instruction.
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
@@ -518,10 +524,10 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
// block, insert a PHI node for each argument of the function.
// For now, we initialize each PHI to only have the real arguments
// which are passed in.
- Instruction *InsertPos = &HeaderBB->front();
+ BasicBlock::iterator InsertPos = HeaderBB->begin();
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
- PHINode *PN =
- PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos);
+ PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr");
+ PN->insertBefore(InsertPos);
I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
PN->addIncoming(&*I, NewEntry);
ArgumentPHIs.push_back(PN);
@@ -534,8 +540,10 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
Type *RetType = F.getReturnType();
if (!RetType->isVoidTy()) {
Type *BoolType = Type::getInt1Ty(F.getContext());
- RetPN = PHINode::Create(RetType, 2, "ret.tr", InsertPos);
- RetKnownPN = PHINode::Create(BoolType, 2, "ret.known.tr", InsertPos);
+ RetPN = PHINode::Create(RetType, 2, "ret.tr");
+ RetPN->insertBefore(InsertPos);
+ RetKnownPN = PHINode::Create(BoolType, 2, "ret.known.tr");
+ RetKnownPN->insertBefore(InsertPos);
RetPN->addIncoming(PoisonValue::get(RetType), NewEntry);
RetKnownPN->addIncoming(ConstantInt::getFalse(BoolType), NewEntry);
@@ -555,7 +563,8 @@ void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
// Start by inserting a new PHI node for the accumulator.
pred_iterator PB = pred_begin(HeaderBB), PE = pred_end(HeaderBB);
AccPN = PHINode::Create(F.getReturnType(), std::distance(PB, PE) + 1,
- "accumulator.tr", &HeaderBB->front());
+ "accumulator.tr");
+ AccPN->insertBefore(HeaderBB->begin());
// Loop over all of the predecessors of the tail recursion block. For the
// real entry into the function we seed the PHI with the identity constant for
@@ -566,8 +575,8 @@ void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
if (P == &F.getEntryBlock()) {
- Constant *Identity = ConstantExpr::getBinOpIdentity(
- AccRecInstr->getOpcode(), AccRecInstr->getType());
+ Constant *Identity =
+ ConstantExpr::getIdentity(AccRecInstr, AccRecInstr->getType());
AccPN->addIncoming(Identity, P);
} else {
AccPN->addIncoming(AccPN, P);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index 2195406c144c..6ca737df49b9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -153,19 +153,17 @@ static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
Value *Length, bool isLast) {
auto Int64Ty = Builder.getInt64Ty();
- auto CharPtrTy = Builder.getInt8PtrTy();
+ auto PtrTy = Builder.getPtrTy();
auto Int32Ty = Builder.getInt32Ty();
auto M = Builder.GetInsertBlock()->getModule();
auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
- Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
+ Int64Ty, PtrTy, Int64Ty, Int32Ty);
auto IsLastInt32 = Builder.getInt32(isLast);
return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
}
static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
bool IsLast) {
- Arg = Builder.CreateBitCast(
- Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace()));
auto Length = getStrlenWithNull(Builder, Arg);
return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
}
@@ -299,9 +297,9 @@ static Value *callBufferedPrintfStart(
Builder.getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind);
Type *Tys_alloc[1] = {Builder.getInt32Ty()};
- Type *I8Ptr =
- Builder.getInt8PtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace());
- FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false);
+ Type *PtrTy =
+ Builder.getPtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace());
+ FunctionType *FTy_alloc = FunctionType::get(PtrTy, Tys_alloc, false);
auto PrintfAllocFn =
M->getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index 7d127400651e..f95d5e23c9c8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -63,13 +63,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <utility>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index 45cf98e65a5a..efa8e874b955 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -19,7 +19,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -587,37 +586,3 @@ PreservedAnalyses AssumeBuilderPass::run(Function &F,
PA.preserveSet<CFGAnalyses>();
return PA;
}
-
-namespace {
-class AssumeBuilderPassLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- AssumeBuilderPassLegacyPass() : FunctionPass(ID) {
- initializeAssumeBuilderPassLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override {
- AssumptionCache &AC =
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- for (Instruction &I : instructions(F))
- salvageKnowledge(&I, &AC, DTWP ? &DTWP->getDomTree() : nullptr);
- return true;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
-
- AU.setPreservesAll();
- }
-};
-} // namespace
-
-char AssumeBuilderPassLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(AssumeBuilderPassLegacyPass, "assume-builder",
- "Assume Builder", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(AssumeBuilderPassLegacyPass, "assume-builder",
- "Assume Builder", false, false)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index f06ea89cc61d..8b5a6d618412 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -194,7 +194,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// Don't break unwinding instructions or terminators with other side-effects.
Instruction *PTI = PredBB->getTerminator();
- if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects())
+ if (PTI->isSpecialTerminator() || PTI->mayHaveSideEffects())
return false;
// Can't merge if there are multiple distinct successors.
@@ -300,7 +300,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
PredBB->back().eraseFromParent();
// Move terminator instruction.
- PredBB->splice(PredBB->end(), BB);
+ BB->back().moveBeforePreserving(*PredBB, PredBB->end());
// Terminator may be a memory accessing instruction too.
if (MSSAU)
@@ -382,7 +382,51 @@ bool llvm::MergeBlockSuccessorsIntoGivenBlocks(
/// - Check fully overlapping fragments and not only identical fragments.
/// - Support dbg.declare. dbg.label, and possibly other meta instructions being
/// part of the sequence of consecutive instructions.
+static bool DPValuesRemoveRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
+ SmallVector<DPValue *, 8> ToBeRemoved;
+ SmallDenseSet<DebugVariable> VariableSet;
+ for (auto &I : reverse(*BB)) {
+ for (DPValue &DPV : reverse(I.getDbgValueRange())) {
+ // Skip declare-type records, as the debug intrinsic method only works
+ // on dbg.value intrinsics.
+ if (DPV.getType() == DPValue::LocationType::Declare) {
+ // The debug intrinsic method treats dbg.declares are "non-debug"
+ // instructions (i.e., a break in a consecutive range of debug
+ // intrinsics). Emulate that to create identical outputs. See
+ // "Possible improvements" above.
+ // FIXME: Delete the line below.
+ VariableSet.clear();
+ continue;
+ }
+
+ DebugVariable Key(DPV.getVariable(), DPV.getExpression(),
+ DPV.getDebugLoc()->getInlinedAt());
+ auto R = VariableSet.insert(Key);
+ // If the same variable fragment is described more than once it is enough
+ // to keep the last one (i.e. the first found since we for reverse
+ // iteration).
+ // FIXME: add assignment tracking support (see parallel implementation
+ // below).
+ if (!R.second)
+ ToBeRemoved.push_back(&DPV);
+ continue;
+ }
+ // Sequence with consecutive dbg.value instrs ended. Clear the map to
+ // restart identifying redundant instructions if case we find another
+ // dbg.value sequence.
+ VariableSet.clear();
+ }
+
+ for (auto &DPV : ToBeRemoved)
+ DPV->eraseFromParent();
+
+ return !ToBeRemoved.empty();
+}
+
static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
+ if (BB->IsNewDbgInfoFormat)
+ return DPValuesRemoveRedundantDbgInstrsUsingBackwardScan(BB);
+
SmallVector<DbgValueInst *, 8> ToBeRemoved;
SmallDenseSet<DebugVariable> VariableSet;
for (auto &I : reverse(*BB)) {
@@ -440,7 +484,40 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
///
/// Possible improvements:
/// - Keep track of non-overlapping fragments.
+static bool DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
+ SmallVector<DPValue *, 8> ToBeRemoved;
+ DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>>
+ VariableMap;
+ for (auto &I : *BB) {
+ for (DPValue &DPV : I.getDbgValueRange()) {
+ if (DPV.getType() == DPValue::LocationType::Declare)
+ continue;
+ DebugVariable Key(DPV.getVariable(), std::nullopt,
+ DPV.getDebugLoc()->getInlinedAt());
+ auto VMI = VariableMap.find(Key);
+ // Update the map if we found a new value/expression describing the
+ // variable, or if the variable wasn't mapped already.
+ SmallVector<Value *, 4> Values(DPV.location_ops());
+ if (VMI == VariableMap.end() || VMI->second.first != Values ||
+ VMI->second.second != DPV.getExpression()) {
+ VariableMap[Key] = {Values, DPV.getExpression()};
+ continue;
+ }
+ // Found an identical mapping. Remember the instruction for later removal.
+ ToBeRemoved.push_back(&DPV);
+ }
+ }
+
+ for (auto *DPV : ToBeRemoved)
+ DPV->eraseFromParent();
+
+ return !ToBeRemoved.empty();
+}
+
static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
+ if (BB->IsNewDbgInfoFormat)
+ return DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BB);
+
SmallVector<DbgValueInst *, 8> ToBeRemoved;
DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>>
VariableMap;
@@ -852,9 +929,11 @@ void llvm::createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
continue;
// Otherwise a new PHI is needed. Create one and populate it.
- PHINode *NewPN = PHINode::Create(
- PN.getType(), Preds.size(), "split",
- SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
+ PHINode *NewPN = PHINode::Create(PN.getType(), Preds.size(), "split");
+ BasicBlock::iterator InsertPos =
+ SplitBB->isLandingPad() ? SplitBB->begin()
+ : SplitBB->getTerminator()->getIterator();
+ NewPN->insertBefore(InsertPos);
for (BasicBlock *BB : Preds)
NewPN->addIncoming(V, BB);
@@ -877,7 +956,7 @@ llvm::SplitAllCriticalEdges(Function &F,
return NumBroken;
}
-static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
+static BasicBlock *SplitBlockImpl(BasicBlock *Old, BasicBlock::iterator SplitPt,
DomTreeUpdater *DTU, DominatorTree *DT,
LoopInfo *LI, MemorySSAUpdater *MSSAU,
const Twine &BBName, bool Before) {
@@ -887,7 +966,7 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU,
BBName);
}
- BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ BasicBlock::iterator SplitIt = SplitPt;
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) {
++SplitIt;
assert(SplitIt != SplitPt->getParent()->end());
@@ -933,14 +1012,14 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
return New;
}
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt,
DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU, const Twine &BBName,
bool Before) {
return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName,
Before);
}
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt,
DomTreeUpdater *DTU, LoopInfo *LI,
MemorySSAUpdater *MSSAU, const Twine &BBName,
bool Before) {
@@ -948,12 +1027,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
Before);
}
-BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt,
+BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, BasicBlock::iterator SplitPt,
DomTreeUpdater *DTU, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
const Twine &BBName) {
- BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ BasicBlock::iterator SplitIt = SplitPt;
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
std::string Name = BBName.str();
@@ -1137,14 +1216,11 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
// If all incoming values for the new PHI would be the same, just don't
// make a new PHI. Instead, just remove the incoming values from the old
// PHI.
-
- // NOTE! This loop walks backwards for a reason! First off, this minimizes
- // the cost of removal if we end up removing a large number of values, and
- // second off, this ensures that the indices for the incoming values
- // aren't invalidated when we remove one.
- for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i)
- if (PredSet.count(PN->getIncomingBlock(i)))
- PN->removeIncomingValue(i, false);
+ PN->removeIncomingValueIf(
+ [&](unsigned Idx) {
+ return PredSet.contains(PN->getIncomingBlock(Idx));
+ },
+ /* DeletePHIIfEmpty */ false);
// Add an incoming value to the PHI node in the loop for the preheader
// edge.
@@ -1394,17 +1470,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix1, const char *Suffix2,
SmallVectorImpl<BasicBlock *> &NewBBs,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
- return SplitLandingPadPredecessorsImpl(
- OrigBB, Preds, Suffix1, Suffix2, NewBBs,
- /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA);
-}
-void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix1, const char *Suffix2,
- SmallVectorImpl<BasicBlock *> &NewBBs,
DomTreeUpdater *DTU, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
bool PreserveLCSSA) {
@@ -1472,7 +1537,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
}
Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
- Instruction *SplitBefore,
+ BasicBlock::iterator SplitBefore,
bool Unreachable,
MDNode *BranchWeights,
DomTreeUpdater *DTU, LoopInfo *LI,
@@ -1485,7 +1550,7 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
}
Instruction *llvm::SplitBlockAndInsertIfElse(Value *Cond,
- Instruction *SplitBefore,
+ BasicBlock::iterator SplitBefore,
bool Unreachable,
MDNode *BranchWeights,
DomTreeUpdater *DTU, LoopInfo *LI,
@@ -1497,7 +1562,7 @@ Instruction *llvm::SplitBlockAndInsertIfElse(Value *Cond,
return ElseBlock->getTerminator();
}
-void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
+void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore,
Instruction **ThenTerm,
Instruction **ElseTerm,
MDNode *BranchWeights,
@@ -1513,7 +1578,7 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
}
void llvm::SplitBlockAndInsertIfThenElse(
- Value *Cond, Instruction *SplitBefore, BasicBlock **ThenBlock,
+ Value *Cond, BasicBlock::iterator SplitBefore, BasicBlock **ThenBlock,
BasicBlock **ElseBlock, bool UnreachableThen, bool UnreachableElse,
MDNode *BranchWeights, DomTreeUpdater *DTU, LoopInfo *LI) {
assert((ThenBlock || ElseBlock) &&
@@ -1530,7 +1595,7 @@ void llvm::SplitBlockAndInsertIfThenElse(
}
LLVMContext &C = Head->getContext();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
BasicBlock *TrueBlock = Tail;
BasicBlock *FalseBlock = Tail;
bool ThenToTailEdge = false;
@@ -2077,3 +2142,25 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
PBI->setCondition(NewCond);
PBI->swapSuccessors();
}
+
+bool llvm::hasOnlySimpleTerminator(const Function &F) {
+ for (auto &BB : F) {
+ auto *Term = BB.getTerminator();
+ if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
+ isa<BranchInst>(Term)))
+ return false;
+ }
+ return true;
+}
+
+bool llvm::isPresplitCoroSuspendExitEdge(const BasicBlock &Src,
+ const BasicBlock &Dest) {
+ assert(Src.getParent() == Dest.getParent());
+ if (!Src.getParent()->isPresplitCoroutine())
+ return false;
+ if (auto *SW = dyn_cast<SwitchInst>(Src.getTerminator()))
+ if (auto *Intr = dyn_cast<IntrinsicInst>(SW->getCondition()))
+ return Intr->getIntrinsicID() == Intrinsic::coro_suspend &&
+ SW->getDefaultDest() == &Dest;
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index ddb35756030f..5fb796cc3db6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -387,7 +387,7 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
if (ShouldUpdateAnalysis) {
// Copy the BFI/BPI from Target to BodyBlock.
BPI->setEdgeProbability(BodyBlock, EdgeProbabilities);
- BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
+ BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target));
}
// It's possible Target was its own successor through an indirectbr.
// In this case, the indirectbr now comes from BodyBlock.
@@ -411,10 +411,10 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
BPI->getEdgeProbability(Src, DirectSucc);
}
if (ShouldUpdateAnalysis) {
- BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
+ BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc);
BlockFrequency NewBlockFreqForTarget =
BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
- BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
+ BFI->setBlockFreq(Target, NewBlockFreqForTarget);
}
// Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
@@ -449,8 +449,8 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
// Create a PHI in the body block, to merge the direct and indirect
// predecessors.
- PHINode *MergePHI =
- PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
+ PHINode *MergePHI = PHINode::Create(IndPHI->getType(), 2, "merge");
+ MergePHI->insertBefore(MergeInsert);
MergePHI->addIncoming(NewIndPHI, Target);
MergePHI->addIncoming(DirPHI, DirectSucc);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 5de8ff84de77..12741dc5af5a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1425,11 +1425,6 @@ StringRef llvm::getFloatFn(const Module *M, const TargetLibraryInfo *TLI,
//- Emit LibCalls ------------------------------------------------------------//
-Value *llvm::castToCStr(Value *V, IRBuilderBase &B) {
- unsigned AS = V->getType()->getPointerAddressSpace();
- return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
-}
-
static IntegerType *getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) {
return B.getIntNTy(TLI->getIntSize());
}
@@ -1461,63 +1456,64 @@ static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_strlen, SizeTTy,
- B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
+ return emitLibCall(LibFunc_strlen, SizeTTy, CharPtrTy, Ptr, B, TLI);
}
Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(),
- castToCStr(Ptr, B), B, TLI);
+ Type *CharPtrTy = B.getPtrTy();
+ return emitLibCall(LibFunc_strdup, CharPtrTy, CharPtrTy, Ptr, B, TLI);
}
Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
- return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, IntTy},
- {castToCStr(Ptr, B), ConstantInt::get(IntTy, C)}, B, TLI);
+ return emitLibCall(LibFunc_strchr, CharPtrTy, {CharPtrTy, IntTy},
+ {Ptr, ConstantInt::get(IntTy, C)}, B, TLI);
}
Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(
LibFunc_strncmp, IntTy,
- {B.getInt8PtrTy(), B.getInt8PtrTy(), SizeTTy},
- {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+ {CharPtrTy, CharPtrTy, SizeTTy},
+ {Ptr1, Ptr2, Len}, B, TLI);
}
Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = Dst->getType();
- return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
- {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+ Type *CharPtrTy = Dst->getType();
+ return emitLibCall(LibFunc_strcpy, CharPtrTy, {CharPtrTy, CharPtrTy},
+ {Dst, Src}, B, TLI);
}
Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
- return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr},
- {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+ Type *CharPtrTy = B.getPtrTy();
+ return emitLibCall(LibFunc_stpcpy, CharPtrTy, {CharPtrTy, CharPtrTy},
+ {Dst, Src}, B, TLI);
}
Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+ return emitLibCall(LibFunc_strncpy, CharPtrTy, {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dst, Src, Len}, B, TLI);
}
Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+ return emitLibCall(LibFunc_stpncpy, CharPtrTy, {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dst, Src, Len}, B, TLI);
}
Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
@@ -1530,13 +1526,11 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
AttributeList AS;
AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
Attribute::NoUnwind);
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
FunctionCallee MemCpy = getOrInsertLibFunc(M, *TLI, LibFunc_memcpy_chk,
- AttributeList::get(M->getContext(), AS), I8Ptr,
- I8Ptr, I8Ptr, SizeTTy, SizeTTy);
- Dst = castToCStr(Dst, B);
- Src = castToCStr(Src, B);
+ AttributeList::get(M->getContext(), AS), VoidPtrTy,
+ VoidPtrTy, VoidPtrTy, SizeTTy, SizeTTy);
CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
if (const Function *F =
dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts()))
@@ -1546,140 +1540,141 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_mempcpy, I8Ptr,
- {I8Ptr, I8Ptr, SizeTTy},
+ return emitLibCall(LibFunc_mempcpy, VoidPtrTy,
+ {VoidPtrTy, VoidPtrTy, SizeTTy},
{Dst, Src, Len}, B, TLI);
}
Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_memchr, I8Ptr,
- {I8Ptr, IntTy, SizeTTy},
- {castToCStr(Ptr, B), Val, Len}, B, TLI);
+ return emitLibCall(LibFunc_memchr, VoidPtrTy,
+ {VoidPtrTy, IntTy, SizeTTy},
+ {Ptr, Val, Len}, B, TLI);
}
Value *llvm::emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_memrchr, I8Ptr,
- {I8Ptr, IntTy, SizeTTy},
- {castToCStr(Ptr, B), Val, Len}, B, TLI);
+ return emitLibCall(LibFunc_memrchr, VoidPtrTy,
+ {VoidPtrTy, IntTy, SizeTTy},
+ {Ptr, Val, Len}, B, TLI);
}
Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(LibFunc_memcmp, IntTy,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+ {VoidPtrTy, VoidPtrTy, SizeTTy},
+ {Ptr1, Ptr2, Len}, B, TLI);
}
Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(LibFunc_bcmp, IntTy,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+ {VoidPtrTy, VoidPtrTy, SizeTTy},
+ {Ptr1, Ptr2, Len}, B, TLI);
}
Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
IRBuilderBase &B, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_memccpy, I8Ptr,
- {I8Ptr, I8Ptr, IntTy, SizeTTy},
+ return emitLibCall(LibFunc_memccpy, VoidPtrTy,
+ {VoidPtrTy, VoidPtrTy, IntTy, SizeTTy},
{Ptr1, Ptr2, Val, Len}, B, TLI);
}
Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
- SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
+ SmallVector<Value *, 8> Args{Dest, Size, Fmt};
llvm::append_range(Args, VariadicArgs);
return emitLibCall(LibFunc_snprintf, IntTy,
- {I8Ptr, SizeTTy, I8Ptr},
+ {CharPtrTy, SizeTTy, CharPtrTy},
Args, B, TLI, /*IsVaArgs=*/true);
}
Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
- SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
+ SmallVector<Value *, 8> Args{Dest, Fmt};
llvm::append_range(Args, VariadicArgs);
return emitLibCall(LibFunc_sprintf, IntTy,
- {I8Ptr, I8Ptr}, Args, B, TLI,
+ {CharPtrTy, CharPtrTy}, Args, B, TLI,
/*IsVaArgs=*/true);
}
Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(),
- {B.getInt8PtrTy(), B.getInt8PtrTy()},
- {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI);
+ Type *CharPtrTy = B.getPtrTy();
+ return emitLibCall(LibFunc_strcat, CharPtrTy,
+ {CharPtrTy, CharPtrTy},
+ {Dest, Src}, B, TLI);
}
Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(LibFunc_strlcpy, SizeTTy,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+ {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dest, Src, Size}, B, TLI);
}
Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(LibFunc_strlcat, SizeTTy,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+ {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dest, Src, Size}, B, TLI);
}
Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_strncat, I8Ptr,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+ return emitLibCall(LibFunc_strncat, CharPtrTy,
+ {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dest, Src, Size}, B, TLI);
}
Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
IRBuilderBase &B, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(
LibFunc_vsnprintf, IntTy,
- {I8Ptr, SizeTTy, I8Ptr, VAList->getType()},
- {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI);
+ {CharPtrTy, SizeTTy, CharPtrTy, VAList->getType()},
+ {Dest, Size, Fmt, VAList}, B, TLI);
}
Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
IRBuilderBase &B, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
return emitLibCall(LibFunc_vsprintf, IntTy,
- {I8Ptr, I8Ptr, VAList->getType()},
- {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI);
+ {CharPtrTy, CharPtrTy, VAList->getType()},
+ {Dest, Fmt, VAList}, B, TLI);
}
/// Append a suffix to the function name according to the type of 'Op'.
@@ -1829,9 +1824,9 @@ Value *llvm::emitPutS(Value *Str, IRBuilderBase &B,
Type *IntTy = getIntTy(B, TLI);
StringRef PutsName = TLI->getName(LibFunc_puts);
FunctionCallee PutS = getOrInsertLibFunc(M, *TLI, LibFunc_puts, IntTy,
- B.getInt8PtrTy());
+ B.getPtrTy());
inferNonMandatoryLibFuncAttrs(M, PutsName, *TLI);
- CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName);
+ CallInst *CI = B.CreateCall(PutS, Str, PutsName);
if (const Function *F =
dyn_cast<Function>(PutS.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1867,10 +1862,10 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
Type *IntTy = getIntTy(B, TLI);
StringRef FPutsName = TLI->getName(LibFunc_fputs);
FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputs, IntTy,
- B.getInt8PtrTy(), File->getType());
+ B.getPtrTy(), File->getType());
if (File->getType()->isPointerTy())
inferNonMandatoryLibFuncAttrs(M, FPutsName, *TLI);
- CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName);
+ CallInst *CI = B.CreateCall(F, {Str, File}, FPutsName);
if (const Function *Fn =
dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
@@ -1887,13 +1882,13 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
Type *SizeTTy = getSizeTTy(B, TLI);
StringRef FWriteName = TLI->getName(LibFunc_fwrite);
FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fwrite,
- SizeTTy, B.getInt8PtrTy(), SizeTTy,
+ SizeTTy, B.getPtrTy(), SizeTTy,
SizeTTy, File->getType());
if (File->getType()->isPointerTy())
inferNonMandatoryLibFuncAttrs(M, FWriteName, *TLI);
CallInst *CI =
- B.CreateCall(F, {castToCStr(Ptr, B), Size,
+ B.CreateCall(F, {Ptr, Size,
ConstantInt::get(SizeTTy, 1), File});
if (const Function *Fn =
@@ -1911,7 +1906,7 @@ Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
StringRef MallocName = TLI->getName(LibFunc_malloc);
Type *SizeTTy = getSizeTTy(B, TLI);
FunctionCallee Malloc = getOrInsertLibFunc(M, *TLI, LibFunc_malloc,
- B.getInt8PtrTy(), SizeTTy);
+ B.getPtrTy(), SizeTTy);
inferNonMandatoryLibFuncAttrs(M, MallocName, *TLI);
CallInst *CI = B.CreateCall(Malloc, Num, MallocName);
@@ -1931,7 +1926,7 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
StringRef CallocName = TLI.getName(LibFunc_calloc);
Type *SizeTTy = getSizeTTy(B, &TLI);
FunctionCallee Calloc = getOrInsertLibFunc(M, TLI, LibFunc_calloc,
- B.getInt8PtrTy(), SizeTTy, SizeTTy);
+ B.getPtrTy(), SizeTTy, SizeTTy);
inferNonMandatoryLibFuncAttrs(M, CallocName, TLI);
CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
@@ -1950,7 +1945,7 @@ Value *llvm::emitHotColdNew(Value *Num, IRBuilderBase &B,
return nullptr;
StringRef Name = TLI->getName(NewFunc);
- FunctionCallee Func = M->getOrInsertFunction(Name, B.getInt8PtrTy(),
+ FunctionCallee Func = M->getOrInsertFunction(Name, B.getPtrTy(),
Num->getType(), B.getInt8Ty());
inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
CallInst *CI = B.CreateCall(Func, {Num, B.getInt8(HotCold)}, Name);
@@ -1971,7 +1966,7 @@ Value *llvm::emitHotColdNewNoThrow(Value *Num, Value *NoThrow, IRBuilderBase &B,
StringRef Name = TLI->getName(NewFunc);
FunctionCallee Func =
- M->getOrInsertFunction(Name, B.getInt8PtrTy(), Num->getType(),
+ M->getOrInsertFunction(Name, B.getPtrTy(), Num->getType(),
NoThrow->getType(), B.getInt8Ty());
inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
CallInst *CI = B.CreateCall(Func, {Num, NoThrow, B.getInt8(HotCold)}, Name);
@@ -1992,7 +1987,7 @@ Value *llvm::emitHotColdNewAligned(Value *Num, Value *Align, IRBuilderBase &B,
StringRef Name = TLI->getName(NewFunc);
FunctionCallee Func = M->getOrInsertFunction(
- Name, B.getInt8PtrTy(), Num->getType(), Align->getType(), B.getInt8Ty());
+ Name, B.getPtrTy(), Num->getType(), Align->getType(), B.getInt8Ty());
inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
CallInst *CI = B.CreateCall(Func, {Num, Align, B.getInt8(HotCold)}, Name);
@@ -2013,7 +2008,7 @@ Value *llvm::emitHotColdNewAlignedNoThrow(Value *Num, Value *Align,
StringRef Name = TLI->getName(NewFunc);
FunctionCallee Func = M->getOrInsertFunction(
- Name, B.getInt8PtrTy(), Num->getType(), Align->getType(),
+ Name, B.getPtrTy(), Num->getType(), Align->getType(),
NoThrow->getType(), B.getInt8Ty());
inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
CallInst *CI =
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index b488e3bb0cbd..e42cdab64446 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -111,7 +111,7 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty())
return;
- Builder.SetInsertPoint(&MergeBlock->front());
+ Builder.SetInsertPoint(MergeBlock, MergeBlock->begin());
PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
SmallVector<User *, 16> UsersToUpdate(OrigInst->users());
for (User *U : UsersToUpdate)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index a1ee3df907ec..fb4d82885377 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -30,6 +30,7 @@
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
index d55208602b71..c0f333364fa5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -44,6 +44,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
ClonedCodeInfo *CodeInfo,
DebugInfoFinder *DIFinder) {
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+ NewBB->IsNewDbgInfoFormat = BB->IsNewDbgInfoFormat;
if (BB->hasName())
NewBB->setName(BB->getName() + NameSuffix);
@@ -58,7 +59,10 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
Instruction *NewInst = I.clone();
if (I.hasName())
NewInst->setName(I.getName() + NameSuffix);
- NewInst->insertInto(NewBB, NewBB->end());
+
+ NewInst->insertBefore(*NewBB, NewBB->end());
+ NewInst->cloneDebugInfoFrom(&I);
+
VMap[&I] = NewInst; // Add instruction map to value.
if (isa<CallInst>(I) && !I.isDebugOrPseudoInst()) {
@@ -90,6 +94,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
const char *NameSuffix, ClonedCodeInfo *CodeInfo,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
+ NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat);
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
@@ -267,9 +272,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
BE = NewFunc->end();
BB != BE; ++BB)
- // Loop over all instructions, fixing each one as we find it...
- for (Instruction &II : *BB)
+ // Loop over all instructions, fixing each one as we find it, and any
+ // attached debug-info records.
+ for (Instruction &II : *BB) {
RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
+ RemapDPValueRange(II.getModule(), II.getDbgValueRange(), VMap, RemapFlag,
+ TypeMapper, Materializer);
+ }
// Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
// same module, the compile unit will already be listed (or not). When
@@ -327,6 +336,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
// Create the new function...
Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(),
F->getName(), F->getParent());
+ NewF->setIsNewDbgInfoFormat(F->IsNewDbgInfoFormat);
// Loop over the arguments, copying the names of the mapped arguments over...
Function::arg_iterator DestI = NewF->arg_begin();
@@ -472,6 +482,7 @@ void PruningFunctionCloner::CloneBlock(
BasicBlock *NewBB;
Twine NewName(BB->hasName() ? Twine(BB->getName()) + NameSuffix : "");
BBEntry = NewBB = BasicBlock::Create(BB->getContext(), NewName, NewFunc);
+ NewBB->IsNewDbgInfoFormat = BB->IsNewDbgInfoFormat;
// It is only legal to clone a function if a block address within that
// function is never referenced outside of the function. Given that, we
@@ -491,6 +502,22 @@ void PruningFunctionCloner::CloneBlock(
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
bool hasMemProfMetadata = false;
+ // Keep a cursor pointing at the last place we cloned debug-info records from.
+ BasicBlock::const_iterator DbgCursor = StartingInst;
+ auto CloneDbgRecordsToHere =
+ [NewBB, &DbgCursor](Instruction *NewInst, BasicBlock::const_iterator II) {
+ if (!NewBB->IsNewDbgInfoFormat)
+ return;
+
+ // Clone debug-info records onto this instruction. Iterate through any
+ // source-instructions we've cloned and then subsequently optimised
+ // away, so that their debug-info doesn't go missing.
+ for (; DbgCursor != II; ++DbgCursor)
+ NewInst->cloneDebugInfoFrom(&*DbgCursor, std::nullopt, false);
+ NewInst->cloneDebugInfoFrom(&*II);
+ DbgCursor = std::next(II);
+ };
+
// Loop over all instructions, and copy them over, DCE'ing as we go. This
// loop doesn't include the terminator.
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
@@ -540,6 +567,8 @@ void PruningFunctionCloner::CloneBlock(
hasMemProfMetadata |= II->hasMetadata(LLVMContext::MD_memprof);
}
+ CloneDbgRecordsToHere(NewInst, II);
+
if (CodeInfo) {
CodeInfo->OrigVMap[&*II] = NewInst;
if (auto *CB = dyn_cast<CallBase>(&*II))
@@ -597,6 +626,9 @@ void PruningFunctionCloner::CloneBlock(
if (OldTI->hasName())
NewInst->setName(OldTI->getName() + NameSuffix);
NewInst->insertInto(NewBB, NewBB->end());
+
+ CloneDbgRecordsToHere(NewInst, OldTI->getIterator());
+
VMap[OldTI] = NewInst; // Add instruction map to value.
if (CodeInfo) {
@@ -608,6 +640,13 @@ void PruningFunctionCloner::CloneBlock(
// Recursively clone any reachable successor blocks.
append_range(ToClone, successors(BB->getTerminator()));
+ } else {
+ // If we didn't create a new terminator, clone DPValues from the old
+ // terminator onto the new terminator.
+ Instruction *NewInst = NewBB->getTerminator();
+ assert(NewInst);
+
+ CloneDbgRecordsToHere(NewInst, OldTI->getIterator());
}
if (CodeInfo) {
@@ -845,12 +884,22 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
TypeMapper, Materializer);
}
+ // Do the same for DPValues, touching all the instructions in the cloned
+ // range of blocks.
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
+ for (BasicBlock &BB : make_range(Begin, NewFunc->end())) {
+ for (Instruction &I : BB) {
+ RemapDPValueRange(I.getModule(), I.getDbgValueRange(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
+ }
+ }
+
// Simplify conditional branches and switches with a constant operand. We try
// to prune these out when cloning, but if the simplification required
// looking through PHI nodes, those are only available after forming the full
// basic block. That may leave some here, and we still want to prune the dead
// code as early as possible.
- Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
ConstantFoldTerminator(&BB);
@@ -939,10 +988,14 @@ void llvm::CloneAndPruneFunctionInto(
void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks,
ValueToValueMapTy &VMap) {
// Rewrite the code to refer to itself.
- for (auto *BB : Blocks)
- for (auto &Inst : *BB)
+ for (auto *BB : Blocks) {
+ for (auto &Inst : *BB) {
+ RemapDPValueRange(Inst.getModule(), Inst.getDbgValueRange(), VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
RemapInstruction(&Inst, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ }
+ }
}
/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
@@ -1066,6 +1119,7 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
Instruction *New = BI->clone();
New->setName(BI->getName());
New->insertBefore(NewTerm);
+ New->cloneDebugInfoFrom(&*BI);
ValueMapping[&*BI] = New;
// Remap operands to patch up intra-block references.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
index 55e051298a9a..00e40fe73d90 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -34,6 +34,8 @@ static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
/// copies of global variables and functions, and making their (initializers and
/// references, respectively) refer to the right globals.
///
+/// Cloning un-materialized modules is not currently supported, so any
+/// modules initialized via lazy loading should be materialized before cloning
std::unique_ptr<Module> llvm::CloneModule(const Module &M) {
// Create the value map that maps things from the old module over to the new
// module.
@@ -49,6 +51,9 @@ std::unique_ptr<Module> llvm::CloneModule(const Module &M,
std::unique_ptr<Module> llvm::CloneModule(
const Module &M, ValueToValueMapTy &VMap,
function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
+
+ assert(M.isMaterialized() && "Module must be materialized before cloning!");
+
// First off, we need to create the new module.
std::unique_ptr<Module> New =
std::make_unique<Module>(M.getModuleIdentifier(), M.getContext());
@@ -56,6 +61,7 @@ std::unique_ptr<Module> llvm::CloneModule(
New->setDataLayout(M.getDataLayout());
New->setTargetTriple(M.getTargetTriple());
New->setModuleInlineAsm(M.getModuleInlineAsm());
+ New->IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
// Loop over all of the global variables, making corresponding globals in the
// new module. Here we add them to the VMap and to the new Module. We
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index c390af351a69..f5abed0dd517 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -245,12 +245,13 @@ CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI, AssumptionCache *AC,
bool AllowVarArgs, bool AllowAlloca,
- BasicBlock *AllocationBlock, std::string Suffix)
+ BasicBlock *AllocationBlock, std::string Suffix,
+ bool ArgsInZeroAddressSpace)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
BPI(BPI), AC(AC), AllocationBlock(AllocationBlock),
AllowVarArgs(AllowVarArgs),
Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
- Suffix(Suffix) {}
+ Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
@@ -567,7 +568,7 @@ void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
for (Instruction *I : LifetimeBitcastUsers) {
Module *M = AIFunc->getParent();
LLVMContext &Ctx = M->getContext();
- auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ auto *Int8PtrTy = PointerType::getUnqual(Ctx);
CastInst *CastI =
CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I);
I->replaceUsesOfWith(I->getOperand(1), CastI);
@@ -721,7 +722,8 @@ void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
// Create a new PHI node in the new region, which has an incoming value
// from OldPred of PN.
PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
- PN->getName() + ".ce", &NewBB->front());
+ PN->getName() + ".ce");
+ NewPN->insertBefore(NewBB->begin());
PN->replaceAllUsesWith(NewPN);
NewPN->addIncoming(PN, OldPred);
@@ -766,6 +768,7 @@ void CodeExtractor::severSplitPHINodesOfExits(
NewBB = BasicBlock::Create(ExitBB->getContext(),
ExitBB->getName() + ".split",
ExitBB->getParent(), ExitBB);
+ NewBB->IsNewDbgInfoFormat = ExitBB->IsNewDbgInfoFormat;
SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB));
for (BasicBlock *PredBB : Preds)
if (Blocks.count(PredBB))
@@ -775,9 +778,9 @@ void CodeExtractor::severSplitPHINodesOfExits(
}
// Split this PHI.
- PHINode *NewPN =
- PHINode::Create(PN.getType(), IncomingVals.size(),
- PN.getName() + ".ce", NewBB->getFirstNonPHI());
+ PHINode *NewPN = PHINode::Create(PN.getType(), IncomingVals.size(),
+ PN.getName() + ".ce");
+ NewPN->insertBefore(NewBB->getFirstNonPHIIt());
for (unsigned i : IncomingVals)
NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
for (unsigned i : reverse(IncomingVals))
@@ -865,7 +868,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
StructType *StructTy = nullptr;
if (AggregateArgs && !AggParamTy.empty()) {
StructTy = StructType::get(M->getContext(), AggParamTy);
- ParamTy.push_back(PointerType::get(StructTy, DL.getAllocaAddrSpace()));
+ ParamTy.push_back(PointerType::get(
+ StructTy, ArgsInZeroAddressSpace ? 0 : DL.getAllocaAddrSpace()));
}
LLVM_DEBUG({
@@ -886,6 +890,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Function *newFunction = Function::Create(
funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
oldFunction->getName() + "." + SuffixToUse, M);
+ newFunction->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat;
// Inherit all of the target dependent attributes and white-listed
// target independent attributes.
@@ -919,6 +924,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::PresplitCoroutine:
case Attribute::Memory:
case Attribute::NoFPClass:
+ case Attribute::CoroDestroyOnlyWhenComplete:
continue;
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
@@ -940,6 +946,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::NoSanitizeBounds:
case Attribute::NoSanitizeCoverage:
case Attribute::NullPointerIsValid:
+ case Attribute::OptimizeForDebugging:
case Attribute::OptForFuzzing:
case Attribute::OptimizeNone:
case Attribute::OptimizeForSize:
@@ -990,6 +997,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::ImmArg:
case Attribute::ByRef:
case Attribute::WriteOnly:
+ case Attribute::Writable:
+ case Attribute::DeadOnUnwind:
// These are not really attributes.
case Attribute::None:
case Attribute::EndAttrKinds:
@@ -1185,8 +1194,15 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg",
AllocationBlock ? &*AllocationBlock->getFirstInsertionPt()
: &codeReplacer->getParent()->front().front());
- params.push_back(Struct);
+ if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) {
+ auto *StructSpaceCast = new AddrSpaceCastInst(
+ Struct, PointerType ::get(Context, 0), "structArg.ascast");
+ StructSpaceCast->insertAfter(Struct);
+ params.push_back(StructSpaceCast);
+ } else {
+ params.push_back(Struct);
+ }
// Store aggregated inputs in the struct.
for (unsigned i = 0, e = StructValues.size(); i != e; ++i) {
if (inputs.contains(StructValues[i])) {
@@ -1492,10 +1508,14 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
for (Instruction &I : instructions(F)) {
SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
- findDbgUsers(DbgUsers, &I);
+ SmallVector<DPValue *, 4> DPValues;
+ findDbgUsers(DbgUsers, &I, &DPValues);
for (DbgVariableIntrinsic *DVI : DbgUsers)
if (DVI->getFunction() != &F)
DVI->eraseFromParent();
+ for (DPValue *DPV : DPValues)
+ if (DPV->getFunction() != &F)
+ DPV->eraseFromParent();
}
}
@@ -1531,6 +1551,16 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
/*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
NewFunc.setSubprogram(NewSP);
+ auto IsInvalidLocation = [&NewFunc](Value *Location) {
+ // Location is invalid if it isn't a constant or an instruction, or is an
+ // instruction but isn't in the new function.
+ if (!Location ||
+ (!isa<Constant>(Location) && !isa<Instruction>(Location)))
+ return true;
+ Instruction *LocationInst = dyn_cast<Instruction>(Location);
+ return LocationInst && LocationInst->getFunction() != &NewFunc;
+ };
+
// Debug intrinsics in the new function need to be updated in one of two
// ways:
// 1) They need to be deleted, because they describe a value in the old
@@ -1539,8 +1569,41 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
// point to a variable in the wrong scope.
SmallDenseMap<DINode *, DINode *> RemappedMetadata;
SmallVector<Instruction *, 4> DebugIntrinsicsToDelete;
+ SmallVector<DPValue *, 4> DPVsToDelete;
DenseMap<const MDNode *, MDNode *> Cache;
+
+ auto GetUpdatedDIVariable = [&](DILocalVariable *OldVar) {
+ DINode *&NewVar = RemappedMetadata[OldVar];
+ if (!NewVar) {
+ DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram(
+ *OldVar->getScope(), *NewSP, Ctx, Cache);
+ NewVar = DIB.createAutoVariable(
+ NewScope, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
+ OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
+ OldVar->getAlignInBits());
+ }
+ return cast<DILocalVariable>(NewVar);
+ };
+
+ auto UpdateDPValuesOnInst = [&](Instruction &I) -> void {
+ for (auto &DPV : I.getDbgValueRange()) {
+ // Apply the two updates that dbg.values get: invalid operands, and
+ // variable metadata fixup.
+ // FIXME: support dbg.assign form of DPValues.
+ if (any_of(DPV.location_ops(), IsInvalidLocation)) {
+ DPVsToDelete.push_back(&DPV);
+ continue;
+ }
+ if (!DPV.getDebugLoc().getInlinedAt())
+ DPV.setVariable(GetUpdatedDIVariable(DPV.getVariable()));
+ DPV.setDebugLoc(DebugLoc::replaceInlinedAtSubprogram(DPV.getDebugLoc(),
+ *NewSP, Ctx, Cache));
+ }
+ };
+
for (Instruction &I : instructions(NewFunc)) {
+ UpdateDPValuesOnInst(I);
+
auto *DII = dyn_cast<DbgInfoIntrinsic>(&I);
if (!DII)
continue;
@@ -1562,41 +1625,28 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
continue;
}
- auto IsInvalidLocation = [&NewFunc](Value *Location) {
- // Location is invalid if it isn't a constant or an instruction, or is an
- // instruction but isn't in the new function.
- if (!Location ||
- (!isa<Constant>(Location) && !isa<Instruction>(Location)))
- return true;
- Instruction *LocationInst = dyn_cast<Instruction>(Location);
- return LocationInst && LocationInst->getFunction() != &NewFunc;
- };
-
auto *DVI = cast<DbgVariableIntrinsic>(DII);
// If any of the used locations are invalid, delete the intrinsic.
if (any_of(DVI->location_ops(), IsInvalidLocation)) {
DebugIntrinsicsToDelete.push_back(DVI);
continue;
}
+ // DbgAssign intrinsics have an extra Value argument:
+ if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI);
+ DAI && IsInvalidLocation(DAI->getAddress())) {
+ DebugIntrinsicsToDelete.push_back(DVI);
+ continue;
+ }
// If the variable was in the scope of the old function, i.e. it was not
// inlined, point the intrinsic to a fresh variable within the new function.
- if (!DVI->getDebugLoc().getInlinedAt()) {
- DILocalVariable *OldVar = DVI->getVariable();
- DINode *&NewVar = RemappedMetadata[OldVar];
- if (!NewVar) {
- DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram(
- *OldVar->getScope(), *NewSP, Ctx, Cache);
- NewVar = DIB.createAutoVariable(
- NewScope, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
- OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
- OldVar->getAlignInBits());
- }
- DVI->setVariable(cast<DILocalVariable>(NewVar));
- }
+ if (!DVI->getDebugLoc().getInlinedAt())
+ DVI->setVariable(GetUpdatedDIVariable(DVI->getVariable()));
}
for (auto *DII : DebugIntrinsicsToDelete)
DII->eraseFromParent();
+ for (auto *DPV : DPVsToDelete)
+ DPV->getMarker()->MarkedInstr->dropOneDbgValue(DPV);
DIB.finalizeSubprogram(NewSP);
// Fix up the scope information attached to the line locations in the new
@@ -1702,11 +1752,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
"codeRepl", oldFunction,
header);
+ codeReplacer->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat;
// The new function needs a root node because other nodes can branch to the
// head of the region, but the entry node of a function cannot have preds.
BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
"newFuncRoot");
+ newFuncRoot->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat;
+
auto *BranchI = BranchInst::Create(header);
// If the original function has debug info, we have to add a debug location
// to the new branch instruction from the artificial entry block.
@@ -1772,11 +1825,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
// Update the entry count of the function.
if (BFI) {
- auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
+ auto Count = BFI->getProfileCountFromFreq(EntryFreq);
if (Count)
newFunction->setEntryCount(
ProfileCount(*Count, Function::PCT_Real)); // FIXME
- BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
+ BFI->setBlockFreq(codeReplacer, EntryFreq);
}
CallInst *TheCall =
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeLayout.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeLayout.cpp
index ac74a1c116cc..95edd27c675d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -45,8 +45,11 @@
#include "llvm/Support/Debug.h"
#include <cmath>
+#include <set>
using namespace llvm;
+using namespace llvm::codelayout;
+
#define DEBUG_TYPE "code-layout"
namespace llvm {
@@ -61,8 +64,8 @@ cl::opt<bool> ApplyExtTspWithoutProfile(
cl::init(true), cl::Hidden);
} // namespace llvm
-// Algorithm-specific params. The values are tuned for the best performance
-// of large-scale front-end bound binaries.
+// Algorithm-specific params for Ext-TSP. The values are tuned for the best
+// performance of large-scale front-end bound binaries.
static cl::opt<double> ForwardWeightCond(
"ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1),
cl::desc("The weight of conditional forward jumps for ExtTSP value"));
@@ -96,10 +99,10 @@ static cl::opt<unsigned> BackwardDistance(
cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
// The maximum size of a chain created by the algorithm. The size is bounded
-// so that the algorithm can efficiently process extremely large instance.
+// so that the algorithm can efficiently process extremely large instances.
static cl::opt<unsigned>
- MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096),
- cl::desc("The maximum size of a chain to create."));
+ MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512),
+ cl::desc("The maximum size of a chain to create"));
// The maximum size of a chain for splitting. Larger values of the threshold
// may yield better quality at the cost of worsen run-time.
@@ -107,11 +110,29 @@ static cl::opt<unsigned> ChainSplitThreshold(
"ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128),
cl::desc("The maximum size of a chain to apply splitting"));
-// The option enables splitting (large) chains along in-coming and out-going
-// jumps. This typically results in a better quality.
-static cl::opt<bool> EnableChainSplitAlongJumps(
- "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true),
- cl::desc("The maximum size of a chain to apply splitting"));
+// The maximum ratio between densities of two chains for merging.
+static cl::opt<double> MaxMergeDensityRatio(
+ "ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(100),
+ cl::desc("The maximum ratio between densities of two chains for merging"));
+
+// Algorithm-specific options for CDSort.
+static cl::opt<unsigned> CacheEntries("cdsort-cache-entries", cl::ReallyHidden,
+ cl::desc("The size of the cache"));
+
+static cl::opt<unsigned> CacheSize("cdsort-cache-size", cl::ReallyHidden,
+ cl::desc("The size of a line in the cache"));
+
+static cl::opt<unsigned>
+ CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden,
+ cl::desc("The maximum size of a chain to create"));
+
+static cl::opt<double> DistancePower(
+ "cdsort-distance-power", cl::ReallyHidden,
+ cl::desc("The power exponent for the distance-based locality"));
+
+static cl::opt<double> FrequencyScale(
+ "cdsort-frequency-scale", cl::ReallyHidden,
+ cl::desc("The scale factor for the frequency-based locality"));
namespace {
@@ -199,11 +220,14 @@ struct NodeT {
NodeT &operator=(const NodeT &) = delete;
NodeT &operator=(NodeT &&) = default;
- explicit NodeT(size_t Index, uint64_t Size, uint64_t EC)
- : Index(Index), Size(Size), ExecutionCount(EC) {}
+ explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)
+ : Index(Index), Size(Size), ExecutionCount(Count) {}
bool isEntry() const { return Index == 0; }
+ // Check if Other is a successor of the node.
+ bool isSuccessor(const NodeT *Other) const;
+
// The total execution count of outgoing jumps.
uint64_t outCount() const;
@@ -267,7 +291,7 @@ struct ChainT {
size_t numBlocks() const { return Nodes.size(); }
- double density() const { return static_cast<double>(ExecutionCount) / Size; }
+ double density() const { return ExecutionCount / Size; }
bool isEntry() const { return Nodes[0]->Index == 0; }
@@ -280,9 +304,9 @@ struct ChainT {
}
ChainEdge *getEdge(ChainT *Other) const {
- for (auto It : Edges) {
- if (It.first == Other)
- return It.second;
+ for (const auto &[Chain, ChainEdge] : Edges) {
+ if (Chain == Other)
+ return ChainEdge;
}
return nullptr;
}
@@ -302,13 +326,13 @@ struct ChainT {
Edges.push_back(std::make_pair(Other, Edge));
}
- void merge(ChainT *Other, const std::vector<NodeT *> &MergedBlocks) {
- Nodes = MergedBlocks;
- // Update the chain's data
+ void merge(ChainT *Other, std::vector<NodeT *> MergedBlocks) {
+ Nodes = std::move(MergedBlocks);
+ // Update the chain's data.
ExecutionCount += Other->ExecutionCount;
Size += Other->Size;
Id = Nodes[0]->Index;
- // Update the node's data
+ // Update the node's data.
for (size_t Idx = 0; Idx < Nodes.size(); Idx++) {
Nodes[Idx]->CurChain = this;
Nodes[Idx]->CurIndex = Idx;
@@ -328,8 +352,9 @@ struct ChainT {
uint64_t Id;
// Cached ext-tsp score for the chain.
double Score{0};
- // The total execution count of the chain.
- uint64_t ExecutionCount{0};
+ // The total execution count of the chain. Since the execution count of
+ // a basic block is uint64_t, using doubles here to avoid overflow.
+ double ExecutionCount{0};
// The total size of the chain.
uint64_t Size{0};
// Nodes of the chain.
@@ -340,7 +365,7 @@ struct ChainT {
/// An edge in the graph representing jumps between two chains.
/// When nodes are merged into chains, the edges are combined too so that
-/// there is always at most one edge between a pair of chains
+/// there is always at most one edge between a pair of chains.
struct ChainEdge {
ChainEdge(const ChainEdge &) = delete;
ChainEdge(ChainEdge &&) = default;
@@ -424,53 +449,57 @@ private:
bool CacheValidBackward{false};
};
+bool NodeT::isSuccessor(const NodeT *Other) const {
+ for (JumpT *Jump : OutJumps)
+ if (Jump->Target == Other)
+ return true;
+ return false;
+}
+
uint64_t NodeT::outCount() const {
uint64_t Count = 0;
- for (JumpT *Jump : OutJumps) {
+ for (JumpT *Jump : OutJumps)
Count += Jump->ExecutionCount;
- }
return Count;
}
uint64_t NodeT::inCount() const {
uint64_t Count = 0;
- for (JumpT *Jump : InJumps) {
+ for (JumpT *Jump : InJumps)
Count += Jump->ExecutionCount;
- }
return Count;
}
void ChainT::mergeEdges(ChainT *Other) {
- // Update edges adjacent to chain Other
- for (auto EdgeIt : Other->Edges) {
- ChainT *DstChain = EdgeIt.first;
- ChainEdge *DstEdge = EdgeIt.second;
+ // Update edges adjacent to chain Other.
+ for (const auto &[DstChain, DstEdge] : Other->Edges) {
ChainT *TargetChain = DstChain == Other ? this : DstChain;
ChainEdge *CurEdge = getEdge(TargetChain);
if (CurEdge == nullptr) {
DstEdge->changeEndpoint(Other, this);
this->addEdge(TargetChain, DstEdge);
- if (DstChain != this && DstChain != Other) {
+ if (DstChain != this && DstChain != Other)
DstChain->addEdge(this, DstEdge);
- }
} else {
CurEdge->moveJumps(DstEdge);
}
- // Cleanup leftover edge
- if (DstChain != Other) {
+ // Cleanup leftover edge.
+ if (DstChain != Other)
DstChain->removeEdge(Other);
- }
}
}
using NodeIter = std::vector<NodeT *>::const_iterator;
-
-/// A wrapper around three chains of nodes; it is used to avoid extra
-/// instantiation of the vectors.
-struct MergedChain {
- MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(),
- NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(),
- NodeIter End3 = NodeIter())
+static std::vector<NodeT *> EmptyList;
+
+/// A wrapper around three concatenated vectors (chains) of nodes; it is used
+/// to avoid extra instantiation of the vectors.
+struct MergedNodesT {
+ MergedNodesT(NodeIter Begin1, NodeIter End1,
+ NodeIter Begin2 = EmptyList.begin(),
+ NodeIter End2 = EmptyList.end(),
+ NodeIter Begin3 = EmptyList.begin(),
+ NodeIter End3 = EmptyList.end())
: Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
End3(End3) {}
@@ -504,15 +533,35 @@ private:
NodeIter End3;
};
+/// A wrapper around two concatenated vectors (chains) of jumps.
+struct MergedJumpsT {
+ MergedJumpsT(const std::vector<JumpT *> *Jumps1,
+ const std::vector<JumpT *> *Jumps2 = nullptr) {
+ assert(!Jumps1->empty() && "cannot merge empty jump list");
+ JumpArray[0] = Jumps1;
+ JumpArray[1] = Jumps2;
+ }
+
+ template <typename F> void forEach(const F &Func) const {
+ for (auto Jumps : JumpArray)
+ if (Jumps != nullptr)
+ for (JumpT *Jump : *Jumps)
+ Func(Jump);
+ }
+
+private:
+ std::array<const std::vector<JumpT *> *, 2> JumpArray{nullptr, nullptr};
+};
+
/// Merge two chains of nodes respecting a given 'type' and 'offset'.
///
/// If MergeType == 0, then the result is a concatenation of two chains.
/// Otherwise, the first chain is cut into two sub-chains at the offset,
/// and merged using all possible ways of concatenating three chains.
-MergedChain mergeNodes(const std::vector<NodeT *> &X,
- const std::vector<NodeT *> &Y, size_t MergeOffset,
- MergeTypeT MergeType) {
- // Split the first chain, X, into X1 and X2
+MergedNodesT mergeNodes(const std::vector<NodeT *> &X,
+ const std::vector<NodeT *> &Y, size_t MergeOffset,
+ MergeTypeT MergeType) {
+ // Split the first chain, X, into X1 and X2.
NodeIter BeginX1 = X.begin();
NodeIter EndX1 = X.begin() + MergeOffset;
NodeIter BeginX2 = X.begin() + MergeOffset;
@@ -520,18 +569,18 @@ MergedChain mergeNodes(const std::vector<NodeT *> &X,
NodeIter BeginY = Y.begin();
NodeIter EndY = Y.end();
- // Construct a new chain from the three existing ones
+ // Construct a new chain from the three existing ones.
switch (MergeType) {
case MergeTypeT::X_Y:
- return MergedChain(BeginX1, EndX2, BeginY, EndY);
+ return MergedNodesT(BeginX1, EndX2, BeginY, EndY);
case MergeTypeT::Y_X:
- return MergedChain(BeginY, EndY, BeginX1, EndX2);
+ return MergedNodesT(BeginY, EndY, BeginX1, EndX2);
case MergeTypeT::X1_Y_X2:
- return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
+ return MergedNodesT(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
case MergeTypeT::Y_X2_X1:
- return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
+ return MergedNodesT(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
case MergeTypeT::X2_X1_Y:
- return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
+ return MergedNodesT(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
}
llvm_unreachable("unexpected chain merge type");
}
@@ -539,15 +588,14 @@ MergedChain mergeNodes(const std::vector<NodeT *> &X,
/// The implementation of the ExtTSP algorithm.
class ExtTSPImpl {
public:
- ExtTSPImpl(const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts)
+ ExtTSPImpl(ArrayRef<uint64_t> NodeSizes, ArrayRef<uint64_t> NodeCounts,
+ ArrayRef<EdgeCount> EdgeCounts)
: NumNodes(NodeSizes.size()) {
initialize(NodeSizes, NodeCounts, EdgeCounts);
}
/// Run the algorithm and return an optimized ordering of nodes.
- void run(std::vector<uint64_t> &Result) {
+ std::vector<uint64_t> run() {
// Pass 1: Merge nodes with their mutually forced successors
mergeForcedPairs();
@@ -558,78 +606,80 @@ public:
mergeColdChains();
// Collect nodes from all chains
- concatChains(Result);
+ return concatChains();
}
private:
/// Initialize the algorithm's data structures.
- void initialize(const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts) {
- // Initialize nodes
+ void initialize(const ArrayRef<uint64_t> &NodeSizes,
+ const ArrayRef<uint64_t> &NodeCounts,
+ const ArrayRef<EdgeCount> &EdgeCounts) {
+ // Initialize nodes.
AllNodes.reserve(NumNodes);
for (uint64_t Idx = 0; Idx < NumNodes; Idx++) {
uint64_t Size = std::max<uint64_t>(NodeSizes[Idx], 1ULL);
uint64_t ExecutionCount = NodeCounts[Idx];
- // The execution count of the entry node is set to at least one
+ // The execution count of the entry node is set to at least one.
if (Idx == 0 && ExecutionCount == 0)
ExecutionCount = 1;
AllNodes.emplace_back(Idx, Size, ExecutionCount);
}
- // Initialize jumps between nodes
+ // Initialize jumps between the nodes.
SuccNodes.resize(NumNodes);
PredNodes.resize(NumNodes);
std::vector<uint64_t> OutDegree(NumNodes, 0);
AllJumps.reserve(EdgeCounts.size());
- for (auto It : EdgeCounts) {
- uint64_t Pred = It.first.first;
- uint64_t Succ = It.first.second;
- OutDegree[Pred]++;
- // Ignore self-edges
- if (Pred == Succ)
+ for (auto Edge : EdgeCounts) {
+ ++OutDegree[Edge.src];
+ // Ignore self-edges.
+ if (Edge.src == Edge.dst)
continue;
- SuccNodes[Pred].push_back(Succ);
- PredNodes[Succ].push_back(Pred);
- uint64_t ExecutionCount = It.second;
- if (ExecutionCount > 0) {
- NodeT &PredNode = AllNodes[Pred];
- NodeT &SuccNode = AllNodes[Succ];
- AllJumps.emplace_back(&PredNode, &SuccNode, ExecutionCount);
+ SuccNodes[Edge.src].push_back(Edge.dst);
+ PredNodes[Edge.dst].push_back(Edge.src);
+ if (Edge.count > 0) {
+ NodeT &PredNode = AllNodes[Edge.src];
+ NodeT &SuccNode = AllNodes[Edge.dst];
+ AllJumps.emplace_back(&PredNode, &SuccNode, Edge.count);
SuccNode.InJumps.push_back(&AllJumps.back());
PredNode.OutJumps.push_back(&AllJumps.back());
+ // Adjust execution counts.
+ PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Edge.count);
+ SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Edge.count);
}
}
for (JumpT &Jump : AllJumps) {
- assert(OutDegree[Jump.Source->Index] > 0);
+ assert(OutDegree[Jump.Source->Index] > 0 &&
+ "incorrectly computed out-degree of the block");
Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;
}
- // Initialize chains
+ // Initialize chains.
AllChains.reserve(NumNodes);
HotChains.reserve(NumNodes);
for (NodeT &Node : AllNodes) {
+ // Create a chain.
AllChains.emplace_back(Node.Index, &Node);
Node.CurChain = &AllChains.back();
- if (Node.ExecutionCount > 0) {
+ if (Node.ExecutionCount > 0)
HotChains.push_back(&AllChains.back());
- }
}
- // Initialize chain edges
+ // Initialize chain edges.
AllEdges.reserve(AllJumps.size());
for (NodeT &PredNode : AllNodes) {
for (JumpT *Jump : PredNode.OutJumps) {
+ assert(Jump->ExecutionCount > 0 && "incorrectly initialized jump");
NodeT *SuccNode = Jump->Target;
ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
- // this edge is already present in the graph
+ // This edge is already present in the graph.
if (CurEdge != nullptr) {
assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
CurEdge->appendJump(Jump);
continue;
}
- // this is a new edge
+ // This is a new edge.
AllEdges.emplace_back(Jump);
PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
@@ -642,7 +692,7 @@ private:
/// to B are from A. Such nodes should be adjacent in the optimal ordering;
/// the method finds and merges such pairs of nodes.
void mergeForcedPairs() {
- // Find fallthroughs based on edge weights
+ // Find forced pairs of blocks.
for (NodeT &Node : AllNodes) {
if (SuccNodes[Node.Index].size() == 1 &&
PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
@@ -669,12 +719,12 @@ private:
}
if (SuccNode == nullptr)
continue;
- // Break the cycle
+ // Break the cycle.
AllNodes[Node.ForcedPred->Index].ForcedSucc = nullptr;
Node.ForcedPred = nullptr;
}
- // Merge nodes with their fallthrough successors
+ // Merge nodes with their fallthrough successors.
for (NodeT &Node : AllNodes) {
if (Node.ForcedPred == nullptr && Node.ForcedSucc != nullptr) {
const NodeT *CurBlock = &Node;
@@ -689,33 +739,42 @@ private:
/// Merge pairs of chains while improving the ExtTSP objective.
void mergeChainPairs() {
- /// Deterministically compare pairs of chains
+ /// Deterministically compare pairs of chains.
auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
const ChainT *A2, const ChainT *B2) {
- if (A1 != A2)
- return A1->Id < A2->Id;
- return B1->Id < B2->Id;
+ return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);
};
while (HotChains.size() > 1) {
ChainT *BestChainPred = nullptr;
ChainT *BestChainSucc = nullptr;
MergeGainT BestGain;
- // Iterate over all pairs of chains
+ // Iterate over all pairs of chains.
for (ChainT *ChainPred : HotChains) {
- // Get candidates for merging with the current chain
- for (auto EdgeIt : ChainPred->Edges) {
- ChainT *ChainSucc = EdgeIt.first;
- ChainEdge *Edge = EdgeIt.second;
- // Ignore loop edges
- if (ChainPred == ChainSucc)
+ // Get candidates for merging with the current chain.
+ for (const auto &[ChainSucc, Edge] : ChainPred->Edges) {
+ // Ignore loop edges.
+ if (Edge->isSelfEdge())
continue;
-
- // Stop early if the combined chain violates the maximum allowed size
+ // Skip the merge if the combined chain violates the maximum specified
+ // size.
if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
continue;
+ // Don't merge the chains if they have vastly different densities.
+ // Skip the merge if the ratio between the densities exceeds
+ // MaxMergeDensityRatio. Smaller values of the option result in fewer
+ // merges, and hence, more chains.
+ const double ChainPredDensity = ChainPred->density();
+ const double ChainSuccDensity = ChainSucc->density();
+ assert(ChainPredDensity > 0.0 && ChainSuccDensity > 0.0 &&
+ "incorrectly computed chain densities");
+ auto [MinDensity, MaxDensity] =
+ std::minmax(ChainPredDensity, ChainSuccDensity);
+ const double Ratio = MaxDensity / MinDensity;
+ if (Ratio > MaxMergeDensityRatio)
+ continue;
- // Compute the gain of merging the two chains
+ // Compute the gain of merging the two chains.
MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge);
if (CurGain.score() <= EPS)
continue;
@@ -731,11 +790,11 @@ private:
}
}
- // Stop merging when there is no improvement
+ // Stop merging when there is no improvement.
if (BestGain.score() <= EPS)
break;
- // Merge the best pair of chains
+ // Merge the best pair of chains.
mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
BestGain.mergeType());
}
@@ -743,7 +802,7 @@ private:
/// Merge remaining nodes into chains w/o taking jump counts into
/// consideration. This allows to maintain the original node order in the
- /// absence of profile data
+ /// absence of profile data.
void mergeColdChains() {
for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
// Iterating in reverse order to make sure original fallthrough jumps are
@@ -764,24 +823,22 @@ private:
}
/// Compute the Ext-TSP score for a given node order and a list of jumps.
- double extTSPScore(const MergedChain &MergedBlocks,
- const std::vector<JumpT *> &Jumps) const {
- if (Jumps.empty())
- return 0.0;
+ double extTSPScore(const MergedNodesT &Nodes,
+ const MergedJumpsT &Jumps) const {
uint64_t CurAddr = 0;
- MergedBlocks.forEach([&](const NodeT *Node) {
+ Nodes.forEach([&](const NodeT *Node) {
Node->EstimatedAddr = CurAddr;
CurAddr += Node->Size;
});
double Score = 0;
- for (JumpT *Jump : Jumps) {
+ Jumps.forEach([&](const JumpT *Jump) {
const NodeT *SrcBlock = Jump->Source;
const NodeT *DstBlock = Jump->Target;
Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
DstBlock->EstimatedAddr, Jump->ExecutionCount,
Jump->IsConditional);
- }
+ });
return Score;
}
@@ -793,74 +850,76 @@ private:
/// element being the corresponding merging type.
MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
ChainEdge *Edge) const {
- if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
+ if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))
return Edge->getCachedMergeGain(ChainPred, ChainSucc);
- }
- // Precompute jumps between ChainPred and ChainSucc
- auto Jumps = Edge->jumps();
+ assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
+ // Precompute jumps between ChainPred and ChainSucc.
ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
- if (EdgePP != nullptr) {
- Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
- }
- assert(!Jumps.empty() && "trying to merge chains w/o jumps");
+ MergedJumpsT Jumps(&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr);
- // The object holds the best currently chosen gain of merging the two chains
+ // This object holds the best chosen gain of merging two chains.
MergeGainT Gain = MergeGainT();
/// Given a merge offset and a list of merge types, try to merge two chains
- /// and update Gain with a better alternative
+ /// and update Gain with a better alternative.
auto tryChainMerging = [&](size_t Offset,
const std::vector<MergeTypeT> &MergeTypes) {
- // Skip merging corresponding to concatenation w/o splitting
+ // Skip merging corresponding to concatenation w/o splitting.
if (Offset == 0 || Offset == ChainPred->Nodes.size())
return;
- // Skip merging if it breaks Forced successors
+ // Skip merging if it breaks Forced successors.
NodeT *Node = ChainPred->Nodes[Offset - 1];
if (Node->ForcedSucc != nullptr)
return;
// Apply the merge, compute the corresponding gain, and update the best
- // value, if the merge is beneficial
+ // value, if the merge is beneficial.
for (const MergeTypeT &MergeType : MergeTypes) {
Gain.updateIfLessThan(
computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));
}
};
- // Try to concatenate two chains w/o splitting
+ // Try to concatenate two chains w/o splitting.
Gain.updateIfLessThan(
computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y));
- if (EnableChainSplitAlongJumps) {
- // Attach (a part of) ChainPred before the first node of ChainSucc
- for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
- const NodeT *SrcBlock = Jump->Source;
- if (SrcBlock->CurChain != ChainPred)
- continue;
- size_t Offset = SrcBlock->CurIndex + 1;
- tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
- }
+ // Attach (a part of) ChainPred before the first node of ChainSucc.
+ for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
+ const NodeT *SrcBlock = Jump->Source;
+ if (SrcBlock->CurChain != ChainPred)
+ continue;
+ size_t Offset = SrcBlock->CurIndex + 1;
+ tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
+ }
- // Attach (a part of) ChainPred after the last node of ChainSucc
- for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
- const NodeT *DstBlock = Jump->Source;
- if (DstBlock->CurChain != ChainPred)
- continue;
- size_t Offset = DstBlock->CurIndex;
- tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
- }
+ // Attach (a part of) ChainPred after the last node of ChainSucc.
+ for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
+ const NodeT *DstBlock = Jump->Target;
+ if (DstBlock->CurChain != ChainPred)
+ continue;
+ size_t Offset = DstBlock->CurIndex;
+ tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
}
- // Try to break ChainPred in various ways and concatenate with ChainSucc
+ // Try to break ChainPred in various ways and concatenate with ChainSucc.
if (ChainPred->Nodes.size() <= ChainSplitThreshold) {
for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {
- // Try to split the chain in different ways. In practice, applying
- // X2_Y_X1 merging is almost never provides benefits; thus, we exclude
- // it from consideration to reduce the search space
+ // Do not split the chain along a fall-through jump. One of the two
+ // loops above may still "break" such a jump whenever it results in a
+ // new fall-through.
+ const NodeT *BB = ChainPred->Nodes[Offset - 1];
+ const NodeT *BB2 = ChainPred->Nodes[Offset];
+ if (BB->isSuccessor(BB2))
+ continue;
+
+ // In practice, applying X2_Y_X1 merging almost never provides benefits;
+ // thus, we exclude it from consideration to reduce the search space.
tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,
MergeTypeT::X2_X1_Y});
}
}
+
Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
return Gain;
}
@@ -870,19 +929,20 @@ private:
///
/// The two chains are not modified in the method.
MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
- const std::vector<JumpT *> &Jumps,
- size_t MergeOffset, MergeTypeT MergeType) const {
- auto MergedBlocks =
+ const MergedJumpsT &Jumps, size_t MergeOffset,
+ MergeTypeT MergeType) const {
+ MergedNodesT MergedNodes =
mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
- // Do not allow a merge that does not preserve the original entry point
+ // Do not allow a merge that does not preserve the original entry point.
if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
- !MergedBlocks.getFirstNode()->isEntry())
+ !MergedNodes.getFirstNode()->isEntry())
return MergeGainT();
- // The gain for the new chain
- auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score;
- return MergeGainT(NewGainScore, MergeOffset, MergeType);
+ // The gain for the new chain.
+ double NewScore = extTSPScore(MergedNodes, Jumps);
+ double CurScore = ChainPred->Score;
+ return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);
}
/// Merge chain From into chain Into, update the list of active chains,
@@ -891,39 +951,398 @@ private:
MergeTypeT MergeType) {
assert(Into != From && "a chain cannot be merged with itself");
- // Merge the nodes
- MergedChain MergedNodes =
+ // Merge the nodes.
+ MergedNodesT MergedNodes =
mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
Into->merge(From, MergedNodes.getNodes());
- // Merge the edges
+ // Merge the edges.
Into->mergeEdges(From);
From->clear();
- // Update cached ext-tsp score for the new chain
+ // Update cached ext-tsp score for the new chain.
ChainEdge *SelfEdge = Into->getEdge(Into);
if (SelfEdge != nullptr) {
- MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end());
- Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps());
+ MergedNodes = MergedNodesT(Into->Nodes.begin(), Into->Nodes.end());
+ MergedJumpsT MergedJumps(&SelfEdge->jumps());
+ Into->Score = extTSPScore(MergedNodes, MergedJumps);
}
- // Remove the chain from the list of active chains
- llvm::erase_value(HotChains, From);
+ // Remove the chain from the list of active chains.
+ llvm::erase(HotChains, From);
- // Invalidate caches
+ // Invalidate caches.
for (auto EdgeIt : Into->Edges)
EdgeIt.second->invalidateCache();
}
/// Concatenate all chains into the final order.
- void concatChains(std::vector<uint64_t> &Order) {
- // Collect chains and calculate density stats for their sorting
+ std::vector<uint64_t> concatChains() {
+ // Collect non-empty chains.
+ std::vector<const ChainT *> SortedChains;
+ for (ChainT &Chain : AllChains) {
+ if (!Chain.Nodes.empty())
+ SortedChains.push_back(&Chain);
+ }
+
+ // Sorting chains by density in the decreasing order.
+ std::sort(SortedChains.begin(), SortedChains.end(),
+ [&](const ChainT *L, const ChainT *R) {
+ // Place the entry point at the beginning of the order.
+ if (L->isEntry() != R->isEntry())
+ return L->isEntry();
+
+ // Compare by density and break ties by chain identifiers.
+ return std::make_tuple(-L->density(), L->Id) <
+ std::make_tuple(-R->density(), R->Id);
+ });
+
+ // Collect the nodes in the order specified by their chains.
+ std::vector<uint64_t> Order;
+ Order.reserve(NumNodes);
+ for (const ChainT *Chain : SortedChains)
+ for (NodeT *Node : Chain->Nodes)
+ Order.push_back(Node->Index);
+ return Order;
+ }
+
+private:
+ /// The number of nodes in the graph.
+ const size_t NumNodes;
+
+ /// Successors of each node.
+ std::vector<std::vector<uint64_t>> SuccNodes;
+
+ /// Predecessors of each node.
+ std::vector<std::vector<uint64_t>> PredNodes;
+
+ /// All nodes (basic blocks) in the graph.
+ std::vector<NodeT> AllNodes;
+
+ /// All jumps between the nodes.
+ std::vector<JumpT> AllJumps;
+
+ /// All chains of nodes.
+ std::vector<ChainT> AllChains;
+
+ /// All edges between the chains.
+ std::vector<ChainEdge> AllEdges;
+
+ /// Active chains. The vector gets updated at runtime when chains are merged.
+ std::vector<ChainT *> HotChains;
+};
+
+/// The implementation of the Cache-Directed Sort (CDSort) algorithm for
+/// ordering functions represented by a call graph.
+class CDSortImpl {
+public:
+ CDSortImpl(const CDSortConfig &Config, ArrayRef<uint64_t> NodeSizes,
+ ArrayRef<uint64_t> NodeCounts, ArrayRef<EdgeCount> EdgeCounts,
+ ArrayRef<uint64_t> EdgeOffsets)
+ : Config(Config), NumNodes(NodeSizes.size()) {
+ initialize(NodeSizes, NodeCounts, EdgeCounts, EdgeOffsets);
+ }
+
+ /// Run the algorithm and return an ordered set of function clusters.
+ std::vector<uint64_t> run() {
+ // Merge pairs of chains while improving the objective.
+ mergeChainPairs();
+
+ // Collect nodes from all the chains.
+ return concatChains();
+ }
+
+private:
+ /// Initialize the algorithm's data structures.
+ void initialize(const ArrayRef<uint64_t> &NodeSizes,
+ const ArrayRef<uint64_t> &NodeCounts,
+ const ArrayRef<EdgeCount> &EdgeCounts,
+ const ArrayRef<uint64_t> &EdgeOffsets) {
+ // Initialize nodes.
+ AllNodes.reserve(NumNodes);
+ for (uint64_t Node = 0; Node < NumNodes; Node++) {
+ uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);
+ uint64_t ExecutionCount = NodeCounts[Node];
+ AllNodes.emplace_back(Node, Size, ExecutionCount);
+ TotalSamples += ExecutionCount;
+ if (ExecutionCount > 0)
+ TotalSize += Size;
+ }
+
+ // Initialize jumps between the nodes.
+ SuccNodes.resize(NumNodes);
+ PredNodes.resize(NumNodes);
+ AllJumps.reserve(EdgeCounts.size());
+ for (size_t I = 0; I < EdgeCounts.size(); I++) {
+ auto [Pred, Succ, Count] = EdgeCounts[I];
+ // Ignore recursive calls.
+ if (Pred == Succ)
+ continue;
+
+ SuccNodes[Pred].push_back(Succ);
+ PredNodes[Succ].push_back(Pred);
+ if (Count > 0) {
+ NodeT &PredNode = AllNodes[Pred];
+ NodeT &SuccNode = AllNodes[Succ];
+ AllJumps.emplace_back(&PredNode, &SuccNode, Count);
+ AllJumps.back().Offset = EdgeOffsets[I];
+ SuccNode.InJumps.push_back(&AllJumps.back());
+ PredNode.OutJumps.push_back(&AllJumps.back());
+ // Adjust execution counts.
+ PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Count);
+ SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Count);
+ }
+ }
+
+ // Initialize chains.
+ AllChains.reserve(NumNodes);
+ for (NodeT &Node : AllNodes) {
+ // Adjust execution counts.
+ Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount());
+ Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount());
+ // Create chain.
+ AllChains.emplace_back(Node.Index, &Node);
+ Node.CurChain = &AllChains.back();
+ }
+
+ // Initialize chain edges.
+ AllEdges.reserve(AllJumps.size());
+ for (NodeT &PredNode : AllNodes) {
+ for (JumpT *Jump : PredNode.OutJumps) {
+ NodeT *SuccNode = Jump->Target;
+ ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
+ // This edge is already present in the graph.
+ if (CurEdge != nullptr) {
+ assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
+ CurEdge->appendJump(Jump);
+ continue;
+ }
+ // This is a new edge.
+ AllEdges.emplace_back(Jump);
+ PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
+ SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
+ }
+ }
+ }
+
+ /// Merge pairs of chains while there is an improvement in the objective.
+ void mergeChainPairs() {
+ // Create a priority queue containing all edges ordered by the merge gain.
+ auto GainComparator = [](ChainEdge *L, ChainEdge *R) {
+ return std::make_tuple(-L->gain(), L->srcChain()->Id, L->dstChain()->Id) <
+ std::make_tuple(-R->gain(), R->srcChain()->Id, R->dstChain()->Id);
+ };
+ std::set<ChainEdge *, decltype(GainComparator)> Queue(GainComparator);
+
+ // Insert the edges into the queue.
+ [[maybe_unused]] size_t NumActiveChains = 0;
+ for (NodeT &Node : AllNodes) {
+ if (Node.ExecutionCount == 0)
+ continue;
+ ++NumActiveChains;
+ for (const auto &[_, Edge] : Node.CurChain->Edges) {
+ // Ignore self-edges.
+ if (Edge->isSelfEdge())
+ continue;
+ // Ignore already processed edges.
+ if (Edge->gain() != -1.0)
+ continue;
+
+ // Compute the gain of merging the two chains.
+ MergeGainT Gain = getBestMergeGain(Edge);
+ Edge->setMergeGain(Gain);
+
+ if (Edge->gain() > EPS)
+ Queue.insert(Edge);
+ }
+ }
+
+ // Merge the chains while the gain of merging is positive.
+ while (!Queue.empty()) {
+ // Extract the best (top) edge for merging.
+ ChainEdge *BestEdge = *Queue.begin();
+ Queue.erase(Queue.begin());
+ ChainT *BestSrcChain = BestEdge->srcChain();
+ ChainT *BestDstChain = BestEdge->dstChain();
+
+ // Remove outdated edges from the queue.
+ for (const auto &[_, ChainEdge] : BestSrcChain->Edges)
+ Queue.erase(ChainEdge);
+ for (const auto &[_, ChainEdge] : BestDstChain->Edges)
+ Queue.erase(ChainEdge);
+
+ // Merge the best pair of chains.
+ MergeGainT BestGain = BestEdge->getMergeGain();
+ mergeChains(BestSrcChain, BestDstChain, BestGain.mergeOffset(),
+ BestGain.mergeType());
+ --NumActiveChains;
+
+ // Insert newly created edges into the queue.
+ for (const auto &[_, Edge] : BestSrcChain->Edges) {
+ // Ignore loop edges.
+ if (Edge->isSelfEdge())
+ continue;
+ if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
+ Config.MaxChainSize)
+ continue;
+
+ // Compute the gain of merging the two chains.
+ MergeGainT Gain = getBestMergeGain(Edge);
+ Edge->setMergeGain(Gain);
+
+ if (Edge->gain() > EPS)
+ Queue.insert(Edge);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Cache-directed function sorting reduced the number"
+ << " of chains from " << NumNodes << " to "
+ << NumActiveChains << "\n");
+ }
+
+ /// Compute the gain of merging two chains.
+ ///
+ /// The function considers all possible ways of merging two chains and
+ /// computes the one having the largest increase in ExtTSP objective. The
+ /// result is a pair with the first element being the gain and the second
+ /// element being the corresponding merging type.
+ MergeGainT getBestMergeGain(ChainEdge *Edge) const {
+ assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
+ // Precompute jumps between ChainPred and ChainSucc.
+ MergedJumpsT Jumps(&Edge->jumps());
+ ChainT *SrcChain = Edge->srcChain();
+ ChainT *DstChain = Edge->dstChain();
+
+ // This object holds the best currently chosen gain of merging two chains.
+ MergeGainT Gain = MergeGainT();
+
+ /// Given a list of merge types, try to merge two chains and update Gain
+ /// with a better alternative.
+ auto tryChainMerging = [&](const std::vector<MergeTypeT> &MergeTypes) {
+ // Apply the merge, compute the corresponding gain, and update the best
+ // value, if the merge is beneficial.
+ for (const MergeTypeT &MergeType : MergeTypes) {
+ MergeGainT NewGain =
+ computeMergeGain(SrcChain, DstChain, Jumps, MergeType);
+
+ // When forward and backward gains are the same, prioritize merging that
+ // preserves the original order of the functions in the binary.
+ if (std::abs(Gain.score() - NewGain.score()) < EPS) {
+ if ((MergeType == MergeTypeT::X_Y && SrcChain->Id < DstChain->Id) ||
+ (MergeType == MergeTypeT::Y_X && SrcChain->Id > DstChain->Id)) {
+ Gain = NewGain;
+ }
+ } else if (NewGain.score() > Gain.score() + EPS) {
+ Gain = NewGain;
+ }
+ }
+ };
+
+ // Try to concatenate two chains w/o splitting.
+ tryChainMerging({MergeTypeT::X_Y, MergeTypeT::Y_X});
+
+ return Gain;
+ }
+
+ /// Compute the score gain of merging two chains, respecting a given type.
+ ///
+ /// The two chains are not modified in the method.
+ MergeGainT computeMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
+ const MergedJumpsT &Jumps,
+ MergeTypeT MergeType) const {
+ // This doesn't depend on the ordering of the nodes
+ double FreqGain = freqBasedLocalityGain(ChainPred, ChainSucc);
+
+ // Merge offset is always 0, as the chains are not split.
+ size_t MergeOffset = 0;
+ auto MergedBlocks =
+ mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
+ double DistGain = distBasedLocalityGain(MergedBlocks, Jumps);
+
+ double GainScore = DistGain + Config.FrequencyScale * FreqGain;
+ // Scale the result to increase the importance of merging short chains.
+ if (GainScore >= 0.0)
+ GainScore /= std::min(ChainPred->Size, ChainSucc->Size);
+
+ return MergeGainT(GainScore, MergeOffset, MergeType);
+ }
+
+ /// Compute the change of the frequency locality after merging the chains.
+ double freqBasedLocalityGain(ChainT *ChainPred, ChainT *ChainSucc) const {
+ auto missProbability = [&](double ChainDensity) {
+ double PageSamples = ChainDensity * Config.CacheSize;
+ if (PageSamples >= TotalSamples)
+ return 0.0;
+ double P = PageSamples / TotalSamples;
+ return pow(1.0 - P, static_cast<double>(Config.CacheEntries));
+ };
+
+ // Cache misses on the chains before merging.
+ double CurScore =
+ ChainPred->ExecutionCount * missProbability(ChainPred->density()) +
+ ChainSucc->ExecutionCount * missProbability(ChainSucc->density());
+
+ // Cache misses on the merged chain
+ double MergedCounts = ChainPred->ExecutionCount + ChainSucc->ExecutionCount;
+ double MergedSize = ChainPred->Size + ChainSucc->Size;
+ double MergedDensity = static_cast<double>(MergedCounts) / MergedSize;
+ double NewScore = MergedCounts * missProbability(MergedDensity);
+
+ return CurScore - NewScore;
+ }
+
+ /// Compute the distance locality for a jump / call.
+ double distScore(uint64_t SrcAddr, uint64_t DstAddr, uint64_t Count) const {
+ uint64_t Dist = SrcAddr <= DstAddr ? DstAddr - SrcAddr : SrcAddr - DstAddr;
+ double D = Dist == 0 ? 0.1 : static_cast<double>(Dist);
+ return static_cast<double>(Count) * std::pow(D, -Config.DistancePower);
+ }
+
+ /// Compute the change of the distance locality after merging the chains.
+ double distBasedLocalityGain(const MergedNodesT &Nodes,
+ const MergedJumpsT &Jumps) const {
+ uint64_t CurAddr = 0;
+ Nodes.forEach([&](const NodeT *Node) {
+ Node->EstimatedAddr = CurAddr;
+ CurAddr += Node->Size;
+ });
+
+ double CurScore = 0;
+ double NewScore = 0;
+ Jumps.forEach([&](const JumpT *Jump) {
+ uint64_t SrcAddr = Jump->Source->EstimatedAddr + Jump->Offset;
+ uint64_t DstAddr = Jump->Target->EstimatedAddr;
+ NewScore += distScore(SrcAddr, DstAddr, Jump->ExecutionCount);
+ CurScore += distScore(0, TotalSize, Jump->ExecutionCount);
+ });
+ return NewScore - CurScore;
+ }
+
+ /// Merge chain From into chain Into, update the list of active chains,
+ /// adjacency information, and the corresponding cached values.
+ void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,
+ MergeTypeT MergeType) {
+ assert(Into != From && "a chain cannot be merged with itself");
+
+ // Merge the nodes.
+ MergedNodesT MergedNodes =
+ mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
+ Into->merge(From, MergedNodes.getNodes());
+
+ // Merge the edges.
+ Into->mergeEdges(From);
+ From->clear();
+ }
+
+ /// Concatenate all chains into the final order.
+ std::vector<uint64_t> concatChains() {
+ // Collect chains and calculate density stats for their sorting.
std::vector<const ChainT *> SortedChains;
DenseMap<const ChainT *, double> ChainDensity;
for (ChainT &Chain : AllChains) {
if (!Chain.Nodes.empty()) {
SortedChains.push_back(&Chain);
- // Using doubles to avoid overflow of ExecutionCounts
+ // Using doubles to avoid overflow of ExecutionCounts.
double Size = 0;
double ExecutionCount = 0;
for (NodeT *Node : Chain.Nodes) {
@@ -935,30 +1354,29 @@ private:
}
}
- // Sorting chains by density in the decreasing order
- std::stable_sort(SortedChains.begin(), SortedChains.end(),
- [&](const ChainT *L, const ChainT *R) {
- // Make sure the original entry point is at the
- // beginning of the order
- if (L->isEntry() != R->isEntry())
- return L->isEntry();
-
- const double DL = ChainDensity[L];
- const double DR = ChainDensity[R];
- // Compare by density and break ties by chain identifiers
- return (DL != DR) ? (DL > DR) : (L->Id < R->Id);
- });
-
- // Collect the nodes in the order specified by their chains
+ // Sort chains by density in the decreasing order.
+ std::sort(SortedChains.begin(), SortedChains.end(),
+ [&](const ChainT *L, const ChainT *R) {
+ const double DL = ChainDensity[L];
+ const double DR = ChainDensity[R];
+ // Compare by density and break ties by chain identifiers.
+ return std::make_tuple(-DL, L->Id) <
+ std::make_tuple(-DR, R->Id);
+ });
+
+ // Collect the nodes in the order specified by their chains.
+ std::vector<uint64_t> Order;
Order.reserve(NumNodes);
- for (const ChainT *Chain : SortedChains) {
- for (NodeT *Node : Chain->Nodes) {
+ for (const ChainT *Chain : SortedChains)
+ for (NodeT *Node : Chain->Nodes)
Order.push_back(Node->Index);
- }
- }
+ return Order;
}
private:
+ /// Config for the algorithm.
+ const CDSortConfig Config;
+
/// The number of nodes in the graph.
const size_t NumNodes;
@@ -968,10 +1386,10 @@ private:
/// Predecessors of each node.
std::vector<std::vector<uint64_t>> PredNodes;
- /// All nodes (basic blocks) in the graph.
+ /// All nodes (functions) in the graph.
std::vector<NodeT> AllNodes;
- /// All jumps between the nodes.
+ /// All jumps (function calls) between the nodes.
std::vector<JumpT> AllJumps;
/// All chains of nodes.
@@ -980,65 +1398,95 @@ private:
/// All edges between the chains.
std::vector<ChainEdge> AllEdges;
- /// Active chains. The vector gets updated at runtime when chains are merged.
- std::vector<ChainT *> HotChains;
+ /// The total number of samples in the graph.
+ uint64_t TotalSamples{0};
+
+ /// The total size of the nodes in the graph.
+ uint64_t TotalSize{0};
};
} // end of anonymous namespace
std::vector<uint64_t>
-llvm::applyExtTspLayout(const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts) {
- // Verify correctness of the input data
+codelayout::computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,
+ ArrayRef<uint64_t> NodeCounts,
+ ArrayRef<EdgeCount> EdgeCounts) {
+ // Verify correctness of the input data.
assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");
assert(NodeSizes.size() > 2 && "Incorrect input");
- // Apply the reordering algorithm
+ // Apply the reordering algorithm.
ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts);
- std::vector<uint64_t> Result;
- Alg.run(Result);
+ std::vector<uint64_t> Result = Alg.run();
- // Verify correctness of the output
+ // Verify correctness of the output.
assert(Result.front() == 0 && "Original entry point is not preserved");
assert(Result.size() == NodeSizes.size() && "Incorrect size of layout");
return Result;
}
-double llvm::calcExtTspScore(const std::vector<uint64_t> &Order,
- const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts) {
- // Estimate addresses of the blocks in memory
+double codelayout::calcExtTspScore(ArrayRef<uint64_t> Order,
+ ArrayRef<uint64_t> NodeSizes,
+ ArrayRef<uint64_t> NodeCounts,
+ ArrayRef<EdgeCount> EdgeCounts) {
+ // Estimate addresses of the blocks in memory.
std::vector<uint64_t> Addr(NodeSizes.size(), 0);
for (size_t Idx = 1; Idx < Order.size(); Idx++) {
Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]];
}
std::vector<uint64_t> OutDegree(NodeSizes.size(), 0);
- for (auto It : EdgeCounts) {
- uint64_t Pred = It.first.first;
- OutDegree[Pred]++;
- }
+ for (auto Edge : EdgeCounts)
+ ++OutDegree[Edge.src];
- // Increase the score for each jump
+ // Increase the score for each jump.
double Score = 0;
- for (auto It : EdgeCounts) {
- uint64_t Pred = It.first.first;
- uint64_t Succ = It.first.second;
- uint64_t Count = It.second;
- bool IsConditional = OutDegree[Pred] > 1;
- Score += ::extTSPScore(Addr[Pred], NodeSizes[Pred], Addr[Succ], Count,
- IsConditional);
+ for (auto Edge : EdgeCounts) {
+ bool IsConditional = OutDegree[Edge.src] > 1;
+ Score += ::extTSPScore(Addr[Edge.src], NodeSizes[Edge.src], Addr[Edge.dst],
+ Edge.count, IsConditional);
}
return Score;
}
-double llvm::calcExtTspScore(const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts) {
+double codelayout::calcExtTspScore(ArrayRef<uint64_t> NodeSizes,
+ ArrayRef<uint64_t> NodeCounts,
+ ArrayRef<EdgeCount> EdgeCounts) {
std::vector<uint64_t> Order(NodeSizes.size());
for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) {
Order[Idx] = Idx;
}
return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts);
}
+
+std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
+ const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes,
+ ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts,
+ ArrayRef<uint64_t> CallOffsets) {
+ // Verify correctness of the input data.
+ assert(FuncCounts.size() == FuncSizes.size() && "Incorrect input");
+
+ // Apply the reordering algorithm.
+ CDSortImpl Alg(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets);
+ std::vector<uint64_t> Result = Alg.run();
+ assert(Result.size() == FuncSizes.size() && "Incorrect size of layout");
+ return Result;
+}
+
+std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
+ ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts,
+ ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets) {
+ CDSortConfig Config;
+ // Populate the config from the command-line options.
+ if (CacheEntries.getNumOccurrences() > 0)
+ Config.CacheEntries = CacheEntries;
+ if (CacheSize.getNumOccurrences() > 0)
+ Config.CacheSize = CacheSize;
+ if (CDMaxChainSize.getNumOccurrences() > 0)
+ Config.MaxChainSize = CDMaxChainSize;
+ if (DistancePower.getNumOccurrences() > 0)
+ Config.DistancePower = DistancePower;
+ if (FrequencyScale.getNumOccurrences() > 0)
+ Config.FrequencyScale = FrequencyScale;
+ return computeCacheDirectedLayout(Config, FuncSizes, FuncCounts, CallCounts,
+ CallOffsets);
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 4a6719741719..6a2dae5bab68 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -417,7 +417,7 @@ void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
- I.moveBefore(MovePos);
+ I.moveBeforePreserving(MovePos);
}
}
@@ -429,7 +429,7 @@ void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
while (FromBB.size() > 1) {
Instruction &I = FromBB.front();
if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
- I.moveBefore(MovePos);
+ I.moveBeforePreserving(MovePos);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CtorUtils.cpp
index e07c92df2265..507729bc5ebc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -52,12 +52,9 @@ static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemov
NGV->takeName(GCL);
// Nuke the old list, replacing any uses with the new one.
- if (!GCL->use_empty()) {
- Constant *V = NGV;
- if (V->getType() != GCL->getType())
- V = ConstantExpr::getBitCast(V, GCL->getType());
- GCL->replaceAllUsesWith(V);
- }
+ if (!GCL->use_empty())
+ GCL->replaceAllUsesWith(NGV);
+
GCL->eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp
new file mode 100644
index 000000000000..735686ddce38
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/DXILUpgrade.cpp
@@ -0,0 +1,36 @@
+//===- DXILUpgrade.cpp - Upgrade DXIL metadata to LLVM constructs ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/DXILUpgrade.h"
+
+using namespace llvm;
+
+static bool handleValVerMetadata(Module &M) {
+ NamedMDNode *ValVer = M.getNamedMetadata("dx.valver");
+ if (!ValVer)
+ return false;
+
+ // We don't need the validation version internally, so we drop it.
+ ValVer->dropAllReferences();
+ ValVer->eraseFromParent();
+ return true;
+}
+
+PreservedAnalyses DXILUpgradePass::run(Module &M, ModuleAnalysisManager &AM) {
+ PreservedAnalyses PA;
+ // We never add, remove, or change functions here.
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ PA.preserveSet<AllAnalysesOn<Function>>();
+
+ bool Changed = false;
+ Changed |= handleValVerMetadata(M);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
index 93cad0888a56..d0cc603426d2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -801,7 +801,15 @@ bool checkDebugifyMetadata(Module &M,
/// legacy module pass manager.
struct DebugifyModulePass : public ModulePass {
bool runOnModule(Module &M) override {
- return applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+ bool NewDebugMode = M.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ M.convertFromNewDbgValues();
+
+ bool Result = applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+
+ if (NewDebugMode)
+ M.convertToNewDbgValues();
+ return Result;
}
DebugifyModulePass(enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
@@ -826,7 +834,15 @@ private:
/// single function, used with the legacy module pass manager.
struct DebugifyFunctionPass : public FunctionPass {
bool runOnFunction(Function &F) override {
- return applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+ bool NewDebugMode = F.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ F.convertFromNewDbgValues();
+
+ bool Result = applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+
+ if (NewDebugMode)
+ F.convertToNewDbgValues();
+ return Result;
}
DebugifyFunctionPass(
@@ -852,13 +868,24 @@ private:
/// legacy module pass manager.
struct CheckDebugifyModulePass : public ModulePass {
bool runOnModule(Module &M) override {
+ bool NewDebugMode = M.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ M.convertFromNewDbgValues();
+
+ bool Result;
if (Mode == DebugifyMode::SyntheticDebugInfo)
- return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
+ Result = checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
"CheckModuleDebugify", Strip, StatsMap);
- return checkDebugInfoMetadata(
+ else
+ Result = checkDebugInfoMetadata(
M, M.functions(), *DebugInfoBeforePass,
"CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
OrigDIVerifyBugsReportFilePath);
+
+ if (NewDebugMode)
+ M.convertToNewDbgValues();
+
+ return Result;
}
CheckDebugifyModulePass(
@@ -891,16 +918,26 @@ private:
/// with the legacy module pass manager.
struct CheckDebugifyFunctionPass : public FunctionPass {
bool runOnFunction(Function &F) override {
+ bool NewDebugMode = F.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ F.convertFromNewDbgValues();
+
Module &M = *F.getParent();
auto FuncIt = F.getIterator();
+ bool Result;
if (Mode == DebugifyMode::SyntheticDebugInfo)
- return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+ Result = checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
NameOfWrappedPass, "CheckFunctionDebugify",
Strip, StatsMap);
- return checkDebugInfoMetadata(
+ else
+ Result = checkDebugInfoMetadata(
M, make_range(FuncIt, std::next(FuncIt)), *DebugInfoBeforePass,
"CheckFunctionDebugify (original debuginfo)", NameOfWrappedPass,
OrigDIVerifyBugsReportFilePath);
+
+ if (NewDebugMode)
+ F.convertToNewDbgValues();
+ return Result;
}
CheckDebugifyFunctionPass(
@@ -972,6 +1009,10 @@ createDebugifyFunctionPass(enum DebugifyMode Mode,
}
PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
+ bool NewDebugMode = M.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ M.convertFromNewDbgValues();
+
if (Mode == DebugifyMode::SyntheticDebugInfo)
applyDebugifyMetadata(M, M.functions(),
"ModuleDebugify: ", /*ApplyToMF*/ nullptr);
@@ -979,6 +1020,10 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
"ModuleDebugify (original debuginfo)",
NameOfWrappedPass);
+
+ if (NewDebugMode)
+ M.convertToNewDbgValues();
+
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
@@ -1010,6 +1055,10 @@ FunctionPass *createCheckDebugifyFunctionPass(
PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
ModuleAnalysisManager &) {
+ bool NewDebugMode = M.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ M.convertFromNewDbgValues();
+
if (Mode == DebugifyMode::SyntheticDebugInfo)
checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
"CheckModuleDebugify", Strip, StatsMap);
@@ -1018,6 +1067,10 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
M, M.functions(), *DebugInfoBeforePass,
"CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
OrigDIVerifyBugsReportFilePath);
+
+ if (NewDebugMode)
+ M.convertToNewDbgValues();
+
return PreservedAnalyses::all();
}
@@ -1035,13 +1088,13 @@ void DebugifyEachInstrumentation::registerCallbacks(
return;
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
- if (const auto **CF = any_cast<const Function *>(&IR)) {
+ if (const auto **CF = llvm::any_cast<const Function *>(&IR)) {
Function &F = *const_cast<Function *>(*CF);
applyDebugify(F, Mode, DebugInfoBeforePass, P);
MAM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
.getManager()
.invalidate(F, PA);
- } else if (const auto **CM = any_cast<const Module *>(&IR)) {
+ } else if (const auto **CM = llvm::any_cast<const Module *>(&IR)) {
Module &M = *const_cast<Module *>(*CM);
applyDebugify(M, Mode, DebugInfoBeforePass, P);
MAM.invalidate(M, PA);
@@ -1053,7 +1106,7 @@ void DebugifyEachInstrumentation::registerCallbacks(
return;
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
- if (const auto **CF = any_cast<const Function *>(&IR)) {
+ if (const auto **CF = llvm::any_cast<const Function *>(&IR)) {
auto &F = *const_cast<Function *>(*CF);
Module &M = *F.getParent();
auto It = F.getIterator();
@@ -1069,7 +1122,7 @@ void DebugifyEachInstrumentation::registerCallbacks(
MAM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
.getManager()
.invalidate(F, PA);
- } else if (const auto **CM = any_cast<const Module *>(&IR)) {
+ } else if (const auto **CM = llvm::any_cast<const Module *>(&IR)) {
Module &M = *const_cast<Module *>(*CM);
if (Mode == DebugifyMode::SyntheticDebugInfo)
checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index d424ebbef99d..092f1799755d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -35,7 +35,7 @@ static void insertCall(Function &CurFn, StringRef Func,
Triple TargetTriple(M.getTargetTriple());
if (TargetTriple.isOSAIX() && Func == "__mcount") {
Type *SizeTy = M.getDataLayout().getIntPtrType(C);
- Type *SizePtrTy = SizeTy->getPointerTo();
+ Type *SizePtrTy = PointerType::getUnqual(C);
GlobalVariable *GV = new GlobalVariable(M, SizeTy, /*isConstant=*/false,
GlobalValue::InternalLinkage,
ConstantInt::get(SizeTy, 0));
@@ -54,7 +54,7 @@ static void insertCall(Function &CurFn, StringRef Func,
}
if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
- Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
+ Type *ArgTypes[] = {PointerType::getUnqual(C), PointerType::getUnqual(C)};
FunctionCallee Fn = M.getOrInsertFunction(
Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
@@ -65,9 +65,7 @@ static void insertCall(Function &CurFn, StringRef Func,
InsertionPt);
RetAddr->setDebugLoc(DL);
- Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)),
- RetAddr};
-
+ Value *Args[] = {&CurFn, RetAddr};
CallInst *Call =
CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt);
Call->setDebugLoc(DL);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index 88c838685bca..cc00106fcbfe 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -70,7 +70,7 @@ IRBuilder<> *EscapeEnumerator::Next() {
// Create a cleanup block.
LLVMContext &C = F.getContext();
BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
- Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
+ Type *ExnTy = StructType::get(PointerType::getUnqual(C), Type::getInt32Ty(C));
if (!F.hasPersonalityFn()) {
FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent());
F.setPersonalityFn(cast<Constant>(PersFn.getCallee()));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index dda236167363..11e24d0585be 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -87,10 +87,8 @@ struct FixIrreducible : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreservedID(LowerSwitchID);
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
}
@@ -106,7 +104,6 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
"Convert irreducible control-flow into natural loops",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
@@ -317,6 +314,8 @@ static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
+ assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
+
bool Changed = false;
SmallVector<Loop *, 8> WorkList;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 8daeb92130ba..79ca99d1566c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -160,10 +160,23 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
int FunctionComparator::cmpMetadata(const Metadata *L,
const Metadata *R) const {
// TODO: the following routine coerce the metadata contents into constants
- // before comparison.
+ // or MDStrings before comparison.
// It ignores any other cases, so that the metadata nodes are considered
// equal even though this is not correct.
// We should structurally compare the metadata nodes to be perfect here.
+
+ auto *MDStringL = dyn_cast<MDString>(L);
+ auto *MDStringR = dyn_cast<MDString>(R);
+ if (MDStringL && MDStringR) {
+ if (MDStringL == MDStringR)
+ return 0;
+ return MDStringL->getString().compare(MDStringR->getString());
+ }
+ if (MDStringR)
+ return -1;
+ if (MDStringL)
+ return 1;
+
auto *CL = dyn_cast<ConstantAsMetadata>(L);
auto *CR = dyn_cast<ConstantAsMetadata>(R);
if (CL == CR)
@@ -820,6 +833,21 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
if (ConstR)
return -1;
+ const MetadataAsValue *MetadataValueL = dyn_cast<MetadataAsValue>(L);
+ const MetadataAsValue *MetadataValueR = dyn_cast<MetadataAsValue>(R);
+ if (MetadataValueL && MetadataValueR) {
+ if (MetadataValueL == MetadataValueR)
+ return 0;
+
+ return cmpMetadata(MetadataValueL->getMetadata(),
+ MetadataValueR->getMetadata());
+ }
+
+ if (MetadataValueL)
+ return 1;
+ if (MetadataValueR)
+ return -1;
+
const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
@@ -958,67 +986,3 @@ int FunctionComparator::compare() {
}
return 0;
}
-
-namespace {
-
-// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
-// hash of a sequence of 64bit ints, but the entire input does not need to be
-// available at once. This interface is necessary for functionHash because it
-// needs to accumulate the hash as the structure of the function is traversed
-// without saving these values to an intermediate buffer. This form of hashing
-// is not often needed, as usually the object to hash is just read from a
-// buffer.
-class HashAccumulator64 {
- uint64_t Hash;
-
-public:
- // Initialize to random constant, so the state isn't zero.
- HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
-
- void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
-
- // No finishing is required, because the entire hash value is used.
- uint64_t getHash() { return Hash; }
-};
-
-} // end anonymous namespace
-
-// A function hash is calculated by considering only the number of arguments and
-// whether a function is varargs, the order of basic blocks (given by the
-// successors of each basic block in depth first order), and the order of
-// opcodes of each instruction within each of these basic blocks. This mirrors
-// the strategy compare() uses to compare functions by walking the BBs in depth
-// first order and comparing each instruction in sequence. Because this hash
-// does not look at the operands, it is insensitive to things such as the
-// target of calls and the constants used in the function, which makes it useful
-// when possibly merging functions which are the same modulo constants and call
-// targets.
-FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
- HashAccumulator64 H;
- H.add(F.isVarArg());
- H.add(F.arg_size());
-
- SmallVector<const BasicBlock *, 8> BBs;
- SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
-
- // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
- // accumulating the hash of the function "structure." (BB and opcode sequence)
- BBs.push_back(&F.getEntryBlock());
- VisitedBBs.insert(BBs[0]);
- while (!BBs.empty()) {
- const BasicBlock *BB = BBs.pop_back_val();
- // This random value acts as a block header, as otherwise the partition of
- // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
- H.add(45798);
- for (const auto &Inst : *BB) {
- H.add(Inst.getOpcode());
- }
- const Instruction *Term = BB->getTerminator();
- for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
- continue;
- BBs.push_back(Term->getSuccessor(i));
- }
- }
- return H.getHash();
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index dab0be3a9fde..0990c750af55 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -91,18 +91,16 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
Mappings.end());
auto AddVariantDecl = [&](const ElementCount &VF, bool Predicate) {
- const std::string TLIName =
- std::string(TLI.getVectorizedFunction(ScalarName, VF, Predicate));
- if (!TLIName.empty()) {
- std::string MangledName = VFABI::mangleTLIVectorName(
- TLIName, ScalarName, CI.arg_size(), VF, Predicate);
+ const VecDesc *VD = TLI.getVectorMappingInfo(ScalarName, VF, Predicate);
+ if (VD && !VD->getVectorFnName().empty()) {
+ std::string MangledName = VD->getVectorFunctionABIVariantString();
if (!OriginalSetOfMappings.count(MangledName)) {
Mappings.push_back(MangledName);
++NumCallInjected;
}
- Function *VariantF = M->getFunction(TLIName);
+ Function *VariantF = M->getFunction(VD->getVectorFnName());
if (!VariantF)
- addVariantDeclaration(CI, VF, Predicate, TLIName);
+ addVariantDeclaration(CI, VF, Predicate, VD->getVectorFnName());
}
};
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
index f7b93fc8fd06..39d5f6e53c1d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -30,6 +30,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -189,20 +190,21 @@ BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
const unsigned PHICapacity = 2;
// Create corresponding new PHIs for all the PHIs in the outer landing pad.
- Instruction *InsertPoint = &InnerResumeDest->front();
+ BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
BasicBlock::iterator I = OuterResumeDest->begin();
for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
PHINode *OuterPHI = cast<PHINode>(I);
PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
- OuterPHI->getName() + ".lpad-body",
- InsertPoint);
+ OuterPHI->getName() + ".lpad-body");
+ InnerPHI->insertBefore(InsertPoint);
OuterPHI->replaceAllUsesWith(InnerPHI);
InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
}
// Create a PHI for the exception values.
- InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
- "eh.lpad-body", InsertPoint);
+ InnerEHValuesPHI =
+ PHINode::Create(CallerLPad->getType(), PHICapacity, "eh.lpad-body");
+ InnerEHValuesPHI->insertBefore(InsertPoint);
CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
@@ -1331,38 +1333,51 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
}
}
-static bool MayContainThrowingOrExitingCall(Instruction *Begin,
- Instruction *End) {
+static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
+ ReturnInst *End) {
assert(Begin->getParent() == End->getParent() &&
"Expected to be in same basic block!");
+ auto BeginIt = Begin->getIterator();
+ assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
return !llvm::isGuaranteedToTransferExecutionToSuccessor(
- Begin->getIterator(), End->getIterator(), InlinerAttributeWindow + 1);
+ ++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
}
-static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
+// Only allow these white listed attributes to be propagated back to the
+// callee. This is because other attributes may only be valid on the call
+// itself, i.e. attributes such as signext and zeroext.
- AttrBuilder AB(CB.getContext(), CB.getAttributes().getRetAttrs());
- if (!AB.hasAttributes())
- return AB;
+// Attributes that are always okay to propagate as if they are violated its
+// immediate UB.
+static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
AttrBuilder Valid(CB.getContext());
- // Only allow these white listed attributes to be propagated back to the
- // callee. This is because other attributes may only be valid on the call
- // itself, i.e. attributes such as signext and zeroext.
- if (auto DerefBytes = AB.getDereferenceableBytes())
+ if (auto DerefBytes = CB.getRetDereferenceableBytes())
Valid.addDereferenceableAttr(DerefBytes);
- if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes())
+ if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
Valid.addDereferenceableOrNullAttr(DerefOrNullBytes);
- if (AB.contains(Attribute::NoAlias))
+ if (CB.hasRetAttr(Attribute::NoAlias))
Valid.addAttribute(Attribute::NoAlias);
- if (AB.contains(Attribute::NonNull))
+ if (CB.hasRetAttr(Attribute::NoUndef))
+ Valid.addAttribute(Attribute::NoUndef);
+ return Valid;
+}
+
+// Attributes that need additional checks as propagating them may change
+// behavior or cause new UB.
+static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
+ AttrBuilder Valid(CB.getContext());
+ if (CB.hasRetAttr(Attribute::NonNull))
Valid.addAttribute(Attribute::NonNull);
+ if (CB.hasRetAttr(Attribute::Alignment))
+ Valid.addAlignmentAttr(CB.getRetAlign());
return Valid;
}
static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
- AttrBuilder Valid = IdentifyValidAttributes(CB);
- if (!Valid.hasAttributes())
+ AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
+ AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
+ if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
return;
auto *CalledFunction = CB.getCalledFunction();
auto &Context = CalledFunction->getContext();
@@ -1397,7 +1412,7 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
// limit the check to both RetVal and RI are in the same basic block and
// there are no throwing/exiting instructions between these instructions.
if (RI->getParent() != RetVal->getParent() ||
- MayContainThrowingOrExitingCall(RetVal, RI))
+ MayContainThrowingOrExitingCallAfterCB(RetVal, RI))
continue;
// Add to the existing attributes of NewRetVal, i.e. the cloned call
// instruction.
@@ -1406,7 +1421,62 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
// existing attribute value (i.e. attributes such as dereferenceable,
// dereferenceable_or_null etc). See AttrBuilder::merge for more details.
AttributeList AL = NewRetVal->getAttributes();
- AttributeList NewAL = AL.addRetAttributes(Context, Valid);
+ if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
+ ValidUB.removeAttribute(Attribute::Dereferenceable);
+ if (ValidUB.getDereferenceableOrNullBytes() <
+ AL.getRetDereferenceableOrNullBytes())
+ ValidUB.removeAttribute(Attribute::DereferenceableOrNull);
+ AttributeList NewAL = AL.addRetAttributes(Context, ValidUB);
+ // Attributes that may generate poison returns are a bit tricky. If we
+ // propagate them, other uses of the callsite might have their behavior
+ // change or cause UB (if they have noundef) b.c of the new potential
+ // poison.
+ // Take the following three cases:
+ //
+ // 1)
+ // define nonnull ptr @foo() {
+ // %p = call ptr @bar()
+ // call void @use(ptr %p) willreturn nounwind
+ // ret ptr %p
+ // }
+ //
+ // 2)
+ // define noundef nonnull ptr @foo() {
+ // %p = call ptr @bar()
+ // call void @use(ptr %p) willreturn nounwind
+ // ret ptr %p
+ // }
+ //
+ // 3)
+ // define nonnull ptr @foo() {
+ // %p = call noundef ptr @bar()
+ // ret ptr %p
+ // }
+ //
+ // In case 1, we can't propagate nonnull because poison value in @use may
+ // change behavior or trigger UB.
+ // In case 2, we don't need to be concerned about propagating nonnull, as
+ // any new poison at @use will trigger UB anyways.
+ // In case 3, we can never propagate nonnull because it may create UB due to
+ // the noundef on @bar.
+ if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
+ ValidPG.removeAttribute(Attribute::Alignment);
+ if (ValidPG.hasAttributes()) {
+ // Three checks.
+ // If the callsite has `noundef`, then a poison due to violating the
+ // return attribute will create UB anyways so we can always propagate.
+ // Otherwise, if the return value (callee to be inlined) has `noundef`, we
+ // can't propagate as a new poison return will cause UB.
+ // Finally, check if the return value has no uses whose behavior may
+ // change/may cause UB if we potentially return poison. At the moment this
+ // is implemented overly conservatively with a single-use check.
+ // TODO: Update the single-use check to iterate through uses and only bail
+ // if we have a potentially dangerous use.
+
+ if (CB.hasRetAttr(Attribute::NoUndef) ||
+ (RetVal->hasOneUse() && !RetVal->hasRetAttr(Attribute::NoUndef)))
+ NewAL = NewAL.addRetAttributes(Context, ValidPG);
+ }
NewRetVal->setAttributes(NewAL);
}
}
@@ -1515,10 +1585,10 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg,
if (ByValAlignment)
Alignment = std::max(Alignment, *ByValAlignment);
- Value *NewAlloca =
- new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment,
- Arg->getName(), &*Caller->begin()->begin());
- IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
+ AllocaInst *NewAlloca = new AllocaInst(ByValType, DL.getAllocaAddrSpace(),
+ nullptr, Alignment, Arg->getName());
+ NewAlloca->insertBefore(Caller->begin()->begin());
+ IFI.StaticAllocas.push_back(NewAlloca);
// Uses of the argument in the function should use our new alloca
// instead.
@@ -1538,8 +1608,8 @@ static bool isUsedByLifetimeMarker(Value *V) {
// lifetime.start or lifetime.end intrinsics.
static bool hasLifetimeMarkers(AllocaInst *AI) {
Type *Ty = AI->getType();
- Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
- Ty->getPointerAddressSpace());
+ Type *Int8PtrTy =
+ PointerType::get(Ty->getContext(), Ty->getPointerAddressSpace());
if (Ty == Int8PtrTy)
return isUsedByLifetimeMarker(AI);
@@ -1596,48 +1666,71 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
// the call site location instead.
bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables");
- for (; FI != Fn->end(); ++FI) {
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- // Loop metadata needs to be updated so that the start and end locs
- // reference inlined-at locations.
- auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
- &IANodes](Metadata *MD) -> Metadata * {
- if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
- return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get();
- return MD;
- };
- updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc);
-
- if (!NoInlineLineTables)
- if (DebugLoc DL = BI->getDebugLoc()) {
- DebugLoc IDL =
- inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
- BI->setDebugLoc(IDL);
- continue;
- }
+ // Helper-util for updating the metadata attached to an instruction.
+ auto UpdateInst = [&](Instruction &I) {
+ // Loop metadata needs to be updated so that the start and end locs
+ // reference inlined-at locations.
+ auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
+ &IANodes](Metadata *MD) -> Metadata * {
+ if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
+ return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get();
+ return MD;
+ };
+ updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
+
+ if (!NoInlineLineTables)
+ if (DebugLoc DL = I.getDebugLoc()) {
+ DebugLoc IDL =
+ inlineDebugLoc(DL, InlinedAtNode, I.getContext(), IANodes);
+ I.setDebugLoc(IDL);
+ return;
+ }
- if (CalleeHasDebugInfo && !NoInlineLineTables)
- continue;
+ if (CalleeHasDebugInfo && !NoInlineLineTables)
+ return;
- // If the inlined instruction has no line number, or if inline info
- // is not being generated, make it look as if it originates from the call
- // location. This is important for ((__always_inline, __nodebug__))
- // functions which must use caller location for all instructions in their
- // function body.
+ // If the inlined instruction has no line number, or if inline info
+ // is not being generated, make it look as if it originates from the call
+ // location. This is important for ((__always_inline, __nodebug__))
+ // functions which must use caller location for all instructions in their
+ // function body.
- // Don't update static allocas, as they may get moved later.
- if (auto *AI = dyn_cast<AllocaInst>(BI))
- if (allocaWouldBeStaticInEntry(AI))
- continue;
+ // Don't update static allocas, as they may get moved later.
+ if (auto *AI = dyn_cast<AllocaInst>(&I))
+ if (allocaWouldBeStaticInEntry(AI))
+ return;
- // Do not force a debug loc for pseudo probes, since they do not need to
- // be debuggable, and also they are expected to have a zero/null dwarf
- // discriminator at this point which could be violated otherwise.
- if (isa<PseudoProbeInst>(BI))
- continue;
+ // Do not force a debug loc for pseudo probes, since they do not need to
+ // be debuggable, and also they are expected to have a zero/null dwarf
+ // discriminator at this point which could be violated otherwise.
+ if (isa<PseudoProbeInst>(I))
+ return;
+
+ I.setDebugLoc(TheCallDL);
+ };
- BI->setDebugLoc(TheCallDL);
+ // Helper-util for updating debug-info records attached to instructions.
+ auto UpdateDPV = [&](DPValue *DPV) {
+ assert(DPV->getDebugLoc() && "Debug Value must have debug loc");
+ if (NoInlineLineTables) {
+ DPV->setDebugLoc(TheCallDL);
+ return;
+ }
+ DebugLoc DL = DPV->getDebugLoc();
+ DebugLoc IDL =
+ inlineDebugLoc(DL, InlinedAtNode,
+ DPV->getMarker()->getParent()->getContext(), IANodes);
+ DPV->setDebugLoc(IDL);
+ };
+
+ // Iterate over all instructions, updating metadata and debug-info records.
+ for (; FI != Fn->end(); ++FI) {
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+ ++BI) {
+ UpdateInst(*BI);
+ for (DPValue &DPV : BI->getDbgValueRange()) {
+ UpdateDPV(&DPV);
+ }
}
// Remove debug info intrinsics if we're not keeping inline info.
@@ -1647,11 +1740,12 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
if (isa<DbgInfoIntrinsic>(BI)) {
BI = BI->eraseFromParent();
continue;
+ } else {
+ BI->dropDbgValues();
}
++BI;
}
}
-
}
}
@@ -1760,12 +1854,12 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
continue;
auto *OrigBB = cast<BasicBlock>(Entry.first);
auto *ClonedBB = cast<BasicBlock>(Entry.second);
- uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
+ BlockFrequency Freq = CalleeBFI->getBlockFreq(OrigBB);
if (!ClonedBBs.insert(ClonedBB).second) {
// Multiple blocks in the callee might get mapped to one cloned block in
// the caller since we prune the callee as we clone it. When that happens,
// we want to use the maximum among the original blocks' frequencies.
- uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
+ BlockFrequency NewFreq = CallerBFI->getBlockFreq(ClonedBB);
if (NewFreq > Freq)
Freq = NewFreq;
}
@@ -1773,8 +1867,7 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
}
BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
CallerBFI->setBlockFreqAndScale(
- EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
- ClonedBBs);
+ EntryClone, CallerBFI->getBlockFreq(CallSiteBlock), ClonedBBs);
}
/// Update the branch metadata for cloned call instructions.
@@ -1882,8 +1975,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
Builder.SetInsertPoint(II);
Function *IFn =
Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
- Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
- Builder.CreateCall(IFn, BC, "");
+ Builder.CreateCall(IFn, RetOpnd, "");
}
II->eraseFromParent();
InsertRetainCall = false;
@@ -1918,8 +2010,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
// to objc_retain.
Builder.SetInsertPoint(RI);
Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain);
- Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
- Builder.CreateCall(IFn, BC, "");
+ Builder.CreateCall(IFn, RetOpnd, "");
}
}
}
@@ -1953,9 +2044,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// The inliner does not know how to inline through calls with operand bundles
// in general ...
+ Value *ConvergenceControlToken = nullptr;
if (CB.hasOperandBundles()) {
for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
- uint32_t Tag = CB.getOperandBundleAt(i).getTagID();
+ auto OBUse = CB.getOperandBundleAt(i);
+ uint32_t Tag = OBUse.getTagID();
// ... but it knows how to inline through "deopt" operand bundles ...
if (Tag == LLVMContext::OB_deopt)
continue;
@@ -1966,11 +2059,37 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
continue;
if (Tag == LLVMContext::OB_kcfi)
continue;
+ if (Tag == LLVMContext::OB_convergencectrl) {
+ ConvergenceControlToken = OBUse.Inputs[0].get();
+ continue;
+ }
return InlineResult::failure("unsupported operand bundle");
}
}
+ // FIXME: The check below is redundant and incomplete. According to spec, if a
+ // convergent call is missing a token, then the caller is using uncontrolled
+ // convergence. If the callee has an entry intrinsic, then the callee is using
+ // controlled convergence, and the call cannot be inlined. A proper
+ // implemenation of this check requires a whole new analysis that identifies
+ // convergence in every function. For now, we skip that and just do this one
+ // cursory check. The underlying assumption is that in a compiler flow that
+ // fully implements convergence control tokens, there is no mixing of
+ // controlled and uncontrolled convergent operations in the whole program.
+ if (CB.isConvergent()) {
+ auto *I = CalledFunc->getEntryBlock().getFirstNonPHI();
+ if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) {
+ if (IntrinsicCall->getIntrinsicID() ==
+ Intrinsic::experimental_convergence_entry) {
+ if (!ConvergenceControlToken) {
+ return InlineResult::failure(
+ "convergent call needs convergencectrl operand");
+ }
+ }
+ }
+ }
+
// If the call to the callee cannot throw, set the 'nounwind' flag on any
// calls that we inline.
bool MarkNoUnwind = CB.doesNotThrow();
@@ -2260,6 +2379,17 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
IFI.GetAssumptionCache(*Caller).registerAssumption(II);
}
+ if (ConvergenceControlToken) {
+ auto *I = FirstNewBlock->getFirstNonPHI();
+ if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) {
+ if (IntrinsicCall->getIntrinsicID() ==
+ Intrinsic::experimental_convergence_entry) {
+ IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken);
+ IntrinsicCall->eraseFromParent();
+ }
+ }
+ }
+
// If there are any alloca instructions in the block that used to be the entry
// block for the callee, move them to the entry block of the caller. First
// calculate which instruction they should be inserted before. We insert the
@@ -2296,6 +2426,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Transfer all of the allocas over in a block. Using splice means
// that the instructions aren't removed from the symbol table, then
// reinserted.
+ I.setTailBit(true);
Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock,
AI->getIterator(), I);
}
@@ -2400,7 +2531,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
!IFI.StaticAllocas.empty()) {
- IRBuilder<> builder(&FirstNewBlock->front());
+ IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
AllocaInst *AI = IFI.StaticAllocas[ai];
// Don't mark swifterror allocas. They can't have bitcast uses.
@@ -2454,14 +2585,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// If the inlined code contained dynamic alloca instructions, wrap the inlined
// code with llvm.stacksave/llvm.stackrestore intrinsics.
if (InlinedFunctionInfo.ContainsDynamicAllocas) {
- Module *M = Caller->getParent();
- // Get the two intrinsics we care about.
- Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
- Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
-
// Insert the llvm.stacksave.
CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
- .CreateCall(StackSave, {}, "savedstack");
+ .CreateStackSave("savedstack");
// Insert a call to llvm.stackrestore before any return instructions in the
// inlined function.
@@ -2472,7 +2598,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
continue;
if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
continue;
- IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
+ IRBuilder<>(RI).CreateStackRestore(SavedPtr);
}
}
@@ -2574,6 +2700,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
Builder.CreateRetVoid();
else
Builder.CreateRet(NewDeoptCall);
+ // Since the ret type is changed, remove the incompatible attributes.
+ NewDeoptCall->removeRetAttrs(
+ AttributeFuncs::typeIncompatible(NewDeoptCall->getType()));
}
// Leave behind the normal returns so we can merge control flow.
@@ -2704,8 +2833,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
if (IFI.CallerBFI) {
// Copy original BB's block frequency to AfterCallBB
- IFI.CallerBFI->setBlockFreq(
- AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
+ IFI.CallerBFI->setBlockFreq(AfterCallBB,
+ IFI.CallerBFI->getBlockFreq(OrigBB));
}
// Change the branch that used to go to AfterCallBB to branch to the first
@@ -2731,8 +2860,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
if (!CB.use_empty()) {
- PHI = PHINode::Create(RTy, Returns.size(), CB.getName(),
- &AfterCallBB->front());
+ PHI = PHINode::Create(RTy, Returns.size(), CB.getName());
+ PHI->insertBefore(AfterCallBB->begin());
// Anything that used the result of the function call should now use the
// PHI node as their operand.
CB.replaceAllUsesWith(PHI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
index c36b0533580b..5e0c312fe149 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -160,7 +160,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
if (SSAUpdate.HasValueForBlock(ExitBB))
continue;
PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
- I->getName() + ".lcssa", &ExitBB->front());
+ I->getName() + ".lcssa");
+ PN->insertBefore(ExitBB->begin());
if (InsertedPHIs)
InsertedPHIs->push_back(PN);
// Get the debug location from the original instruction.
@@ -241,7 +242,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
}
SmallVector<DbgValueInst *, 4> DbgValues;
- llvm::findDbgValues(DbgValues, I);
+ SmallVector<DPValue *, 4> DPValues;
+ llvm::findDbgValues(DbgValues, I, &DPValues);
// Update pre-existing debug value uses that reside outside the loop.
for (auto *DVI : DbgValues) {
@@ -257,6 +259,21 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
DVI->replaceVariableLocationOp(I, V);
}
+ // RemoveDIs: copy-paste of block above, using non-instruction debug-info
+ // records.
+ for (DPValue *DPV : DPValues) {
+ BasicBlock *UserBB = DPV->getMarker()->getParent();
+ if (InstBB == UserBB || L->contains(UserBB))
+ continue;
+ // We currently only handle debug values residing in blocks that were
+ // traversed while rewriting the uses. If we inserted just a single PHI,
+ // we will handle all relevant debug values.
+ Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
+ : SSAUpdate.FindValueForBlock(UserBB);
+ if (V)
+ DPV->replaceVariableLocationOp(I, V);
+ }
+
// SSAUpdater might have inserted phi-nodes inside other loops. We'll need
// to post-process them to keep LCSSA form.
for (PHINode *InsertedPN : LocalInsertedPHIs) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index cdcfb5050bff..6220f8509309 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -101,7 +101,7 @@ private:
float Val) {
Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val));
if (!Arg->getType()->isFloatTy())
- V = ConstantExpr::getFPExtend(V, Arg->getType());
+ V = ConstantFoldCastInstruction(Instruction::FPExt, V, Arg->getType());
if (BBBuilder.GetInsertBlock()->getParent()->hasFnAttribute(Attribute::StrictFP))
BBBuilder.setIsFPConstrained(true);
return BBBuilder.CreateFCmp(Cmp, Arg, V);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index eeb0446c1197..a758fb306982 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -69,6 +69,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -86,6 +87,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+extern cl::opt<bool> UseNewDbgInfoFormat;
+
#define DEBUG_TYPE "local"
STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
@@ -227,9 +230,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Remove weight for this case.
std::swap(Weights[Idx + 1], Weights.back());
Weights.pop_back();
- SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BB->getContext()).
- createBranchWeights(Weights));
+ setBranchWeights(*SI, Weights);
}
// Remove this entry.
BasicBlock *ParentBB = SI->getParent();
@@ -414,7 +415,7 @@ bool llvm::wouldInstructionBeTriviallyDeadOnUnusedPaths(
return wouldInstructionBeTriviallyDead(I, TLI);
}
-bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
+bool llvm::wouldInstructionBeTriviallyDead(const Instruction *I,
const TargetLibraryInfo *TLI) {
if (I->isTerminator())
return false;
@@ -428,7 +429,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (isa<DbgVariableIntrinsic>(I))
return false;
- if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) {
+ if (const DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) {
if (DLI->getLabel())
return false;
return true;
@@ -443,9 +444,16 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (!II)
return false;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::experimental_guard: {
+ // Guards on true are operationally no-ops. In the future we can
+ // consider more sophisticated tradeoffs for guards considering potential
+ // for check widening, but for now we keep things simple.
+ auto *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0));
+ return Cond && Cond->isOne();
+ }
// TODO: These intrinsics are not safe to remove, because this may remove
// a well-defined trap.
- switch (II->getIntrinsicID()) {
case Intrinsic::wasm_trunc_signed:
case Intrinsic::wasm_trunc_unsigned:
case Intrinsic::ptrauth_auth:
@@ -461,7 +469,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
// Special case intrinsics that "may have side effects" but can be deleted
// when dead.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
// Safe to delete llvm.stacksave and launder.invariant.group if dead.
if (II->getIntrinsicID() == Intrinsic::stacksave ||
II->getIntrinsicID() == Intrinsic::launder_invariant_group)
@@ -484,13 +492,9 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return false;
}
- // Assumptions are dead if their condition is trivially true. Guards on
- // true are operationally no-ops. In the future we can consider more
- // sophisticated tradeoffs for guards considering potential for check
- // widening, but for now we keep things simple.
- if ((II->getIntrinsicID() == Intrinsic::assume &&
- isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) ||
- II->getIntrinsicID() == Intrinsic::experimental_guard) {
+ // Assumptions are dead if their condition is trivially true.
+ if (II->getIntrinsicID() == Intrinsic::assume &&
+ isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)))
return !Cond->isZero();
@@ -605,10 +609,13 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
bool llvm::replaceDbgUsesWithUndef(Instruction *I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, I);
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgUsers(DbgUsers, I, &DPUsers);
for (auto *DII : DbgUsers)
DII->setKillLocation();
- return !DbgUsers.empty();
+ for (auto *DPV : DPUsers)
+ DPV->setKillLocation();
+ return !DbgUsers.empty() || !DPUsers.empty();
}
/// areAllUsesEqual - Check whether the uses of a value are all the same.
@@ -847,17 +854,17 @@ static bool CanMergeValues(Value *First, Value *Second) {
/// branch to Succ, into Succ.
///
/// Assumption: Succ is the single successor for BB.
-static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+static bool
+CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ,
+ const SmallPtrSetImpl<BasicBlock *> &BBPreds) {
assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
<< Succ->getName() << "\n");
// Shortcut, if there is only a single predecessor it must be BB and merging
// is always safe
- if (Succ->getSinglePredecessor()) return true;
-
- // Make a list of the predecessors of BB
- SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+ if (Succ->getSinglePredecessor())
+ return true;
// Look at all the phi nodes in Succ, to see if they present a conflict when
// merging these blocks
@@ -997,6 +1004,35 @@ static void replaceUndefValuesInPhi(PHINode *PN,
}
}
+// Only when they shares a single common predecessor, return true.
+// Only handles cases when BB can't be merged while its predecessors can be
+// redirected.
+static bool
+CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
+ const SmallPtrSetImpl<BasicBlock *> &BBPreds,
+ const SmallPtrSetImpl<BasicBlock *> &SuccPreds,
+ BasicBlock *&CommonPred) {
+
+ // There must be phis in BB, otherwise BB will be merged into Succ directly
+ if (BB->phis().empty() || Succ->phis().empty())
+ return false;
+
+ // BB must have predecessors not shared that can be redirected to Succ
+ if (!BB->hasNPredecessorsOrMore(2))
+ return false;
+
+ // Get single common predecessors of both BB and Succ
+ for (BasicBlock *SuccPred : SuccPreds) {
+ if (BBPreds.count(SuccPred)) {
+ if (CommonPred)
+ return false;
+ CommonPred = SuccPred;
+ }
+ }
+
+ return true;
+}
+
/// Replace a value flowing from a block to a phi with
/// potentially multiple instances of that value flowing from the
/// block's predecessors to the phi.
@@ -1004,9 +1040,11 @@ static void replaceUndefValuesInPhi(PHINode *PN,
/// \param BB The block with the value flowing into the phi.
/// \param BBPreds The predecessors of BB.
/// \param PN The phi that we are updating.
+/// \param CommonPred The common predecessor of BB and PN's BasicBlock
static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
const PredBlockVector &BBPreds,
- PHINode *PN) {
+ PHINode *PN,
+ BasicBlock *CommonPred) {
Value *OldVal = PN->removeIncomingValue(BB, false);
assert(OldVal && "No entry in PHI for Pred BB!");
@@ -1034,26 +1072,39 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
// will trigger asserts if we try to clean it up now, without also
// simplifying the corresponding conditional branch).
BasicBlock *PredBB = OldValPN->getIncomingBlock(i);
+
+ if (PredBB == CommonPred)
+ continue;
+
Value *PredVal = OldValPN->getIncomingValue(i);
- Value *Selected = selectIncomingValueForBlock(PredVal, PredBB,
- IncomingValues);
+ Value *Selected =
+ selectIncomingValueForBlock(PredVal, PredBB, IncomingValues);
// And add a new incoming value for this predecessor for the
// newly retargeted branch.
PN->addIncoming(Selected, PredBB);
}
+ if (CommonPred)
+ PN->addIncoming(OldValPN->getIncomingValueForBlock(CommonPred), BB);
+
} else {
for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) {
// Update existing incoming values in PN for this
// predecessor of BB.
BasicBlock *PredBB = BBPreds[i];
- Value *Selected = selectIncomingValueForBlock(OldVal, PredBB,
- IncomingValues);
+
+ if (PredBB == CommonPred)
+ continue;
+
+ Value *Selected =
+ selectIncomingValueForBlock(OldVal, PredBB, IncomingValues);
// And add a new incoming value for this predecessor for the
// newly retargeted branch.
PN->addIncoming(Selected, PredBB);
}
+ if (CommonPred)
+ PN->addIncoming(OldVal, BB);
}
replaceUndefValuesInPhi(PN, IncomingValues);
@@ -1064,13 +1115,30 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
assert(BB != &BB->getParent()->getEntryBlock() &&
"TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
- // We can't eliminate infinite loops.
+ // We can't simplify infinite loops.
BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
- if (BB == Succ) return false;
+ if (BB == Succ)
+ return false;
+
+ SmallPtrSet<BasicBlock *, 16> BBPreds(pred_begin(BB), pred_end(BB));
+ SmallPtrSet<BasicBlock *, 16> SuccPreds(pred_begin(Succ), pred_end(Succ));
+
+ // The single common predecessor of BB and Succ when BB cannot be killed
+ BasicBlock *CommonPred = nullptr;
+
+ bool BBKillable = CanPropagatePredecessorsForPHIs(BB, Succ, BBPreds);
+
+ // Even if we can not fold bB into Succ, we may be able to redirect the
+ // predecessors of BB to Succ.
+ bool BBPhisMergeable =
+ BBKillable ||
+ CanRedirectPredsOfEmptyBBToSucc(BB, Succ, BBPreds, SuccPreds, CommonPred);
- // Check to see if merging these blocks would cause conflicts for any of the
- // phi nodes in BB or Succ. If not, we can safely merge.
- if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+ if (!BBKillable && !BBPhisMergeable)
+ return false;
+
+ // Check to see if merging these blocks/phis would cause conflicts for any of
+ // the phi nodes in BB or Succ. If not, we can safely merge.
// Check for cases where Succ has multiple predecessors and a PHI node in BB
// has uses which will not disappear when the PHI nodes are merged. It is
@@ -1099,6 +1167,11 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
}
}
+ if (BBPhisMergeable && CommonPred)
+ LLVM_DEBUG(dbgs() << "Found Common Predecessor between: " << BB->getName()
+ << " and " << Succ->getName() << " : "
+ << CommonPred->getName() << "\n");
+
// 'BB' and 'BB->Pred' are loop latches, bail out to presrve inner loop
// metadata.
//
@@ -1171,25 +1244,37 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
if (PredTI->hasMetadata(LLVMContext::MD_loop))
return false;
- LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+ if (BBKillable)
+ LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+ else if (BBPhisMergeable)
+ LLVM_DEBUG(dbgs() << "Merge Phis in Trivial BB: \n" << *BB);
SmallVector<DominatorTree::UpdateType, 32> Updates;
+
if (DTU) {
// To avoid processing the same predecessor more than once.
SmallPtrSet<BasicBlock *, 8> SeenPreds;
- // All predecessors of BB will be moved to Succ.
- SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ));
+ // All predecessors of BB (except the common predecessor) will be moved to
+ // Succ.
Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1);
- for (auto *PredOfBB : predecessors(BB))
- // This predecessor of BB may already have Succ as a successor.
- if (!PredsOfSucc.contains(PredOfBB))
+
+ for (auto *PredOfBB : predecessors(BB)) {
+ // Do not modify those common predecessors of BB and Succ
+ if (!SuccPreds.contains(PredOfBB))
if (SeenPreds.insert(PredOfBB).second)
Updates.push_back({DominatorTree::Insert, PredOfBB, Succ});
+ }
+
SeenPreds.clear();
+
for (auto *PredOfBB : predecessors(BB))
- if (SeenPreds.insert(PredOfBB).second)
+ // When BB cannot be killed, do not remove the edge between BB and
+ // CommonPred.
+ if (SeenPreds.insert(PredOfBB).second && PredOfBB != CommonPred)
Updates.push_back({DominatorTree::Delete, PredOfBB, BB});
- Updates.push_back({DominatorTree::Delete, BB, Succ});
+
+ if (BBKillable)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
if (isa<PHINode>(Succ->begin())) {
@@ -1201,21 +1286,19 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
// Loop over all of the PHI nodes in the successor of BB.
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
-
- redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN);
+ redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN, CommonPred);
}
}
if (Succ->getSinglePredecessor()) {
// BB is the only predecessor of Succ, so Succ will end up with exactly
// the same predecessors BB had.
-
// Copy over any phi, debug or lifetime instruction.
BB->getTerminator()->eraseFromParent();
- Succ->splice(Succ->getFirstNonPHI()->getIterator(), BB);
+ Succ->splice(Succ->getFirstNonPHIIt(), BB);
} else {
while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
- // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ // We explicitly check for such uses for merging phis.
assert(PN->use_empty() && "There shouldn't be any uses here!");
PN->eraseFromParent();
}
@@ -1228,21 +1311,35 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
for (BasicBlock *Pred : predecessors(BB))
Pred->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopMD);
- // Everything that jumped to BB now goes to Succ.
- BB->replaceAllUsesWith(Succ);
- if (!Succ->hasName()) Succ->takeName(BB);
-
- // Clear the successor list of BB to match updates applying to DTU later.
- if (BB->getTerminator())
- BB->back().eraseFromParent();
- new UnreachableInst(BB->getContext(), BB);
- assert(succ_empty(BB) && "The successor list of BB isn't empty before "
- "applying corresponding DTU updates.");
+ if (BBKillable) {
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+
+ if (!Succ->hasName())
+ Succ->takeName(BB);
+
+ // Clear the successor list of BB to match updates applying to DTU later.
+ if (BB->getTerminator())
+ BB->back().eraseFromParent();
+
+ new UnreachableInst(BB->getContext(), BB);
+ assert(succ_empty(BB) && "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
+ } else if (BBPhisMergeable) {
+ // Everything except CommonPred that jumped to BB now goes to Succ.
+ BB->replaceUsesWithIf(Succ, [BBPreds, CommonPred](Use &U) -> bool {
+ if (Instruction *UseInst = dyn_cast<Instruction>(U.getUser()))
+ return UseInst->getParent() != CommonPred &&
+ BBPreds.contains(UseInst->getParent());
+ return false;
+ });
+ }
if (DTU)
DTU->applyUpdates(Updates);
- DeleteDeadBlock(BB, DTU);
+ if (BBKillable)
+ DeleteDeadBlock(BB, DTU);
return true;
}
@@ -1388,15 +1485,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
return Changed;
}
-/// If the specified pointer points to an object that we control, try to modify
-/// the object's alignment to PrefAlign. Returns a minimum known alignment of
-/// the value after the operation, which may be lower than PrefAlign.
-///
-/// Increating value alignment isn't often possible though. If alignment is
-/// important, a more reliable approach is to simply align all global variables
-/// and allocation instructions to their preferred alignment from the beginning.
-static Align tryEnforceAlignment(Value *V, Align PrefAlign,
- const DataLayout &DL) {
+Align llvm::tryEnforceAlignment(Value *V, Align PrefAlign,
+ const DataLayout &DL) {
V = V->stripPointerCasts();
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
@@ -1480,12 +1570,18 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
// is removed by LowerDbgDeclare(), we need to make sure that we are
// not inserting the same dbg.value intrinsic over and over.
SmallVector<DbgValueInst *, 1> DbgValues;
- findDbgValues(DbgValues, APN);
+ SmallVector<DPValue *, 1> DPValues;
+ findDbgValues(DbgValues, APN, &DPValues);
for (auto *DVI : DbgValues) {
assert(is_contained(DVI->getValues(), APN));
if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
return true;
}
+ for (auto *DPV : DPValues) {
+ assert(is_contained(DPV->location_ops(), APN));
+ if ((DPV->getVariable() == DIVar) && (DPV->getExpression() == DIExpr))
+ return true;
+ }
return false;
}
@@ -1521,6 +1617,67 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
// Could not determine size of variable. Conservatively return false.
return false;
}
+// RemoveDIs: duplicate implementation of the above, using DPValues, the
+// replacement for dbg.values.
+static bool valueCoversEntireFragment(Type *ValTy, DPValue *DPV) {
+ const DataLayout &DL = DPV->getModule()->getDataLayout();
+ TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy);
+ if (std::optional<uint64_t> FragmentSize = DPV->getFragmentSizeInBits())
+ return TypeSize::isKnownGE(ValueSize, TypeSize::getFixed(*FragmentSize));
+
+ // We can't always calculate the size of the DI variable (e.g. if it is a
+ // VLA). Try to use the size of the alloca that the dbg intrinsic describes
+ // intead.
+ if (DPV->isAddressOfVariable()) {
+ // DPV should have exactly 1 location when it is an address.
+ assert(DPV->getNumVariableLocationOps() == 1 &&
+ "address of variable must have exactly 1 location operand.");
+ if (auto *AI =
+ dyn_cast_or_null<AllocaInst>(DPV->getVariableLocationOp(0))) {
+ if (std::optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) {
+ return TypeSize::isKnownGE(ValueSize, *FragmentSize);
+ }
+ }
+ }
+ // Could not determine size of variable. Conservatively return false.
+ return false;
+}
+
+static void insertDbgValueOrDPValue(DIBuilder &Builder, Value *DV,
+ DILocalVariable *DIVar,
+ DIExpression *DIExpr,
+ const DebugLoc &NewLoc,
+ BasicBlock::iterator Instr) {
+ if (!UseNewDbgInfoFormat) {
+ auto *DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc,
+ (Instruction *)nullptr);
+ DbgVal->insertBefore(Instr);
+ } else {
+ // RemoveDIs: if we're using the new debug-info format, allocate a
+ // DPValue directly instead of a dbg.value intrinsic.
+ ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
+ DPValue *DV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
+ Instr->getParent()->insertDPValueBefore(DV, Instr);
+ }
+}
+
+static void insertDbgValueOrDPValueAfter(DIBuilder &Builder, Value *DV,
+ DILocalVariable *DIVar,
+ DIExpression *DIExpr,
+ const DebugLoc &NewLoc,
+ BasicBlock::iterator Instr) {
+ if (!UseNewDbgInfoFormat) {
+ auto *DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc,
+ (Instruction *)nullptr);
+ DbgVal->insertAfter(&*Instr);
+ } else {
+ // RemoveDIs: if we're using the new debug-info format, allocate a
+ // DPValue directly instead of a dbg.value intrinsic.
+ ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
+ DPValue *DV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
+ Instr->getParent()->insertDPValueAfter(DV, &*Instr);
+ }
+}
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.declare intrinsic.
@@ -1550,7 +1707,8 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
DIExpr->isDeref() || (!DIExpr->startsWithDeref() &&
valueCoversEntireFragment(DV->getType(), DII));
if (CanConvert) {
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
+ insertDbgValueOrDPValue(Builder, DV, DIVar, DIExpr, NewLoc,
+ SI->getIterator());
return;
}
@@ -1562,8 +1720,23 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
// know which part) we insert an dbg.value intrinsic to indicate that we
// know nothing about the variable's content.
DV = UndefValue::get(DV->getType());
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
+ insertDbgValueOrDPValue(Builder, DV, DIVar, DIExpr, NewLoc,
+ SI->getIterator());
+}
+
+namespace llvm {
+// RemoveDIs: duplicate the getDebugValueLoc method using DPValues instead of
+// dbg.value intrinsics. In llvm namespace so that it overloads the
+// DbgVariableIntrinsic version.
+static DebugLoc getDebugValueLoc(DPValue *DPV) {
+ // Original dbg.declare must have a location.
+ const DebugLoc &DeclareLoc = DPV->getDebugLoc();
+ MDNode *Scope = DeclareLoc.getScope();
+ DILocation *InlinedAt = DeclareLoc.getInlinedAt();
+ // Produce an unknown location with the correct scope / inlinedAt fields.
+ return DILocation::get(DPV->getContext(), 0, 0, Scope, InlinedAt);
}
+} // namespace llvm
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
/// that has an associated llvm.dbg.declare intrinsic.
@@ -1588,9 +1761,54 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
// future if multi-location support is added to the IR, it might be
// preferable to keep tracking both the loaded value and the original
// address in case the alloca can not be elided.
- Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
- LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr);
- DbgValue->insertAfter(LI);
+ insertDbgValueOrDPValueAfter(Builder, LI, DIVar, DIExpr, NewLoc,
+ LI->getIterator());
+}
+
+void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, StoreInst *SI,
+ DIBuilder &Builder) {
+ assert(DPV->isAddressOfVariable());
+ auto *DIVar = DPV->getVariable();
+ assert(DIVar && "Missing variable");
+ auto *DIExpr = DPV->getExpression();
+ Value *DV = SI->getValueOperand();
+
+ DebugLoc NewLoc = getDebugValueLoc(DPV);
+
+ // If the alloca describes the variable itself, i.e. the expression in the
+ // dbg.declare doesn't start with a dereference, we can perform the
+ // conversion if the value covers the entire fragment of DII.
+ // If the alloca describes the *address* of DIVar, i.e. DIExpr is
+ // *just* a DW_OP_deref, we use DV as is for the dbg.value.
+ // We conservatively ignore other dereferences, because the following two are
+ // not equivalent:
+ // dbg.declare(alloca, ..., !Expr(deref, plus_uconstant, 2))
+ // dbg.value(DV, ..., !Expr(deref, plus_uconstant, 2))
+ // The former is adding 2 to the address of the variable, whereas the latter
+ // is adding 2 to the value of the variable. As such, we insist on just a
+ // deref expression.
+ bool CanConvert =
+ DIExpr->isDeref() || (!DIExpr->startsWithDeref() &&
+ valueCoversEntireFragment(DV->getType(), DPV));
+ if (CanConvert) {
+ insertDbgValueOrDPValue(Builder, DV, DIVar, DIExpr, NewLoc,
+ SI->getIterator());
+ return;
+ }
+
+ // FIXME: If storing to a part of the variable described by the dbg.declare,
+ // then we want to insert a dbg.value for the corresponding fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " << *DPV
+ << '\n');
+ assert(UseNewDbgInfoFormat);
+
+ // For now, when there is a store to parts of the variable (but we do not
+ // know which part) we insert an dbg.value intrinsic to indicate that we
+ // know nothing about the variable's content.
+ DV = UndefValue::get(DV->getType());
+ ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
+ DPValue *NewDPV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
+ SI->getParent()->insertDPValueBefore(NewDPV, SI->getIterator());
}
/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
@@ -1621,8 +1839,38 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
// The block may be a catchswitch block, which does not have a valid
// insertion point.
// FIXME: Insert dbg.value markers in the successors when appropriate.
- if (InsertionPt != BB->end())
- Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt);
+ if (InsertionPt != BB->end()) {
+ insertDbgValueOrDPValue(Builder, APN, DIVar, DIExpr, NewLoc, InsertionPt);
+ }
+}
+
+void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, LoadInst *LI,
+ DIBuilder &Builder) {
+ auto *DIVar = DPV->getVariable();
+ auto *DIExpr = DPV->getExpression();
+ assert(DIVar && "Missing variable");
+
+ if (!valueCoversEntireFragment(LI->getType(), DPV)) {
+ // FIXME: If only referring to a part of the variable described by the
+ // dbg.declare, then we want to insert a DPValue for the corresponding
+ // fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to DPValue: " << *DPV
+ << '\n');
+ return;
+ }
+
+ DebugLoc NewLoc = getDebugValueLoc(DPV);
+
+ // We are now tracking the loaded value instead of the address. In the
+ // future if multi-location support is added to the IR, it might be
+ // preferable to keep tracking both the loaded value and the original
+ // address in case the alloca can not be elided.
+ assert(UseNewDbgInfoFormat);
+
+ // Create a DPValue directly and insert.
+ ValueAsMetadata *LIVAM = ValueAsMetadata::get(LI);
+ DPValue *DV = new DPValue(LIVAM, DIVar, DIExpr, NewLoc.get());
+ LI->getParent()->insertDPValueAfter(DV, LI);
}
/// Determine whether this alloca is either a VLA or an array.
@@ -1635,6 +1883,36 @@ static bool isArray(AllocaInst *AI) {
static bool isStructure(AllocaInst *AI) {
return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy();
}
+void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, PHINode *APN,
+ DIBuilder &Builder) {
+ auto *DIVar = DPV->getVariable();
+ auto *DIExpr = DPV->getExpression();
+ assert(DIVar && "Missing variable");
+
+ if (PhiHasDebugValue(DIVar, DIExpr, APN))
+ return;
+
+ if (!valueCoversEntireFragment(APN->getType(), DPV)) {
+ // FIXME: If only referring to a part of the variable described by the
+ // dbg.declare, then we want to insert a DPValue for the corresponding
+ // fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to DPValue: " << *DPV
+ << '\n');
+ return;
+ }
+
+ BasicBlock *BB = APN->getParent();
+ auto InsertionPt = BB->getFirstInsertionPt();
+
+ DebugLoc NewLoc = getDebugValueLoc(DPV);
+
+ // The block may be a catchswitch block, which does not have a valid
+ // insertion point.
+ // FIXME: Insert DPValue markers in the successors when appropriate.
+ if (InsertionPt != BB->end()) {
+ insertDbgValueOrDPValue(Builder, APN, DIVar, DIExpr, NewLoc, InsertionPt);
+ }
+}
/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
/// of llvm.dbg.value intrinsics.
@@ -1642,17 +1920,24 @@ bool llvm::LowerDbgDeclare(Function &F) {
bool Changed = false;
DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
SmallVector<DbgDeclareInst *, 4> Dbgs;
- for (auto &FI : F)
- for (Instruction &BI : FI)
- if (auto DDI = dyn_cast<DbgDeclareInst>(&BI))
+ SmallVector<DPValue *> DPVs;
+ for (auto &FI : F) {
+ for (Instruction &BI : FI) {
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(&BI))
Dbgs.push_back(DDI);
+ for (DPValue &DPV : BI.getDbgValueRange()) {
+ if (DPV.getType() == DPValue::LocationType::Declare)
+ DPVs.push_back(&DPV);
+ }
+ }
+ }
- if (Dbgs.empty())
+ if (Dbgs.empty() && DPVs.empty())
return Changed;
- for (auto &I : Dbgs) {
- DbgDeclareInst *DDI = I;
- AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ auto LowerOne = [&](auto *DDI) {
+ AllocaInst *AI =
+ dyn_cast_or_null<AllocaInst>(DDI->getVariableLocationOp(0));
// If this is an alloca for a scalar variable, insert a dbg.value
// at each load and store to the alloca and erase the dbg.declare.
// The dbg.values allow tracking a variable even if it is not
@@ -1660,7 +1945,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
// the stack slot (and at a lexical-scope granularity). Later
// passes will attempt to elide the stack slot.
if (!AI || isArray(AI) || isStructure(AI))
- continue;
+ return;
// A volatile load/store means that the alloca can't be elided anyway.
if (llvm::any_of(AI->users(), [](User *U) -> bool {
@@ -1670,7 +1955,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
return SI->isVolatile();
return false;
}))
- continue;
+ return;
SmallVector<const Value *, 8> WorkList;
WorkList.push_back(AI);
@@ -1691,8 +1976,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
DebugLoc NewLoc = getDebugValueLoc(DDI);
auto *DerefExpr =
DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
- DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr,
- NewLoc, CI);
+ insertDbgValueOrDPValue(DIB, AI, DDI->getVariable(), DerefExpr,
+ NewLoc, CI->getIterator());
}
} else if (BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
if (BI->getType()->isPointerTy())
@@ -1702,15 +1987,81 @@ bool llvm::LowerDbgDeclare(Function &F) {
}
DDI->eraseFromParent();
Changed = true;
- }
+ };
+
+ for_each(Dbgs, LowerOne);
+ for_each(DPVs, LowerOne);
if (Changed)
- for (BasicBlock &BB : F)
- RemoveRedundantDbgInstrs(&BB);
+ for (BasicBlock &BB : F)
+ RemoveRedundantDbgInstrs(&BB);
return Changed;
}
+// RemoveDIs: re-implementation of insertDebugValuesForPHIs, but which pulls the
+// debug-info out of the block's DPValues rather than dbg.value intrinsics.
+static void insertDPValuesForPHIs(BasicBlock *BB,
+ SmallVectorImpl<PHINode *> &InsertedPHIs) {
+ assert(BB && "No BasicBlock to clone DPValue(s) from.");
+ if (InsertedPHIs.size() == 0)
+ return;
+
+ // Map existing PHI nodes to their DPValues.
+ DenseMap<Value *, DPValue *> DbgValueMap;
+ for (auto &I : *BB) {
+ for (auto &DPV : I.getDbgValueRange()) {
+ for (Value *V : DPV.location_ops())
+ if (auto *Loc = dyn_cast_or_null<PHINode>(V))
+ DbgValueMap.insert({Loc, &DPV});
+ }
+ }
+ if (DbgValueMap.size() == 0)
+ return;
+
+ // Map a pair of the destination BB and old DPValue to the new DPValue,
+ // so that if a DPValue is being rewritten to use more than one of the
+ // inserted PHIs in the same destination BB, we can update the same DPValue
+ // with all the new PHIs instead of creating one copy for each.
+ MapVector<std::pair<BasicBlock *, DPValue *>, DPValue *> NewDbgValueMap;
+ // Then iterate through the new PHIs and look to see if they use one of the
+ // previously mapped PHIs. If so, create a new DPValue that will propagate
+ // the info through the new PHI. If we use more than one new PHI in a single
+ // destination BB with the same old dbg.value, merge the updates so that we
+ // get a single new DPValue with all the new PHIs.
+ for (auto PHI : InsertedPHIs) {
+ BasicBlock *Parent = PHI->getParent();
+ // Avoid inserting a debug-info record into an EH block.
+ if (Parent->getFirstNonPHI()->isEHPad())
+ continue;
+ for (auto VI : PHI->operand_values()) {
+ auto V = DbgValueMap.find(VI);
+ if (V != DbgValueMap.end()) {
+ DPValue *DbgII = cast<DPValue>(V->second);
+ auto NewDI = NewDbgValueMap.find({Parent, DbgII});
+ if (NewDI == NewDbgValueMap.end()) {
+ DPValue *NewDbgII = DbgII->clone();
+ NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first;
+ }
+ DPValue *NewDbgII = NewDI->second;
+ // If PHI contains VI as an operand more than once, we may
+ // replaced it in NewDbgII; confirm that it is present.
+ if (is_contained(NewDbgII->location_ops(), VI))
+ NewDbgII->replaceVariableLocationOp(VI, PHI);
+ }
+ }
+ }
+ // Insert the new DPValues into their destination blocks.
+ for (auto DI : NewDbgValueMap) {
+ BasicBlock *Parent = DI.first.first;
+ DPValue *NewDbgII = DI.second;
+ auto InsertionPt = Parent->getFirstInsertionPt();
+ assert(InsertionPt != Parent->end() && "Ill-formed basic block");
+
+ InsertionPt->DbgMarker->insertDPValue(NewDbgII, true);
+ }
+}
+
/// Propagate dbg.value intrinsics through the newly inserted PHIs.
void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
SmallVectorImpl<PHINode *> &InsertedPHIs) {
@@ -1718,6 +2069,8 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
if (InsertedPHIs.size() == 0)
return;
+ insertDPValuesForPHIs(BB, InsertedPHIs);
+
// Map existing PHI nodes to their dbg.values.
ValueToValueMapTy DbgValueMap;
for (auto &I : *BB) {
@@ -1777,59 +2130,78 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
DIBuilder &Builder, uint8_t DIExprFlags,
int Offset) {
- auto DbgDeclares = FindDbgDeclareUses(Address);
- for (DbgVariableIntrinsic *DII : DbgDeclares) {
- const DebugLoc &Loc = DII->getDebugLoc();
- auto *DIVar = DII->getVariable();
+ SmallVector<DbgDeclareInst *, 1> DbgDeclares;
+ SmallVector<DPValue *, 1> DPValues;
+ findDbgDeclares(DbgDeclares, Address, &DPValues);
+
+ auto ReplaceOne = [&](auto *DII) {
+ assert(DII->getVariable() && "Missing variable");
auto *DIExpr = DII->getExpression();
- assert(DIVar && "Missing variable");
DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset);
- // Insert llvm.dbg.declare immediately before DII, and remove old
- // llvm.dbg.declare.
- Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, DII);
- DII->eraseFromParent();
- }
- return !DbgDeclares.empty();
+ DII->setExpression(DIExpr);
+ DII->replaceVariableLocationOp(Address, NewAddress);
+ };
+
+ for_each(DbgDeclares, ReplaceOne);
+ for_each(DPValues, ReplaceOne);
+
+ return !DbgDeclares.empty() || !DPValues.empty();
}
-static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
- DIBuilder &Builder, int Offset) {
- const DebugLoc &Loc = DVI->getDebugLoc();
- auto *DIVar = DVI->getVariable();
- auto *DIExpr = DVI->getExpression();
+static void updateOneDbgValueForAlloca(const DebugLoc &Loc,
+ DILocalVariable *DIVar,
+ DIExpression *DIExpr, Value *NewAddress,
+ DbgValueInst *DVI, DPValue *DPV,
+ DIBuilder &Builder, int Offset) {
assert(DIVar && "Missing variable");
- // This is an alloca-based llvm.dbg.value. The first thing it should do with
- // the alloca pointer is dereference it. Otherwise we don't know how to handle
- // it and give up.
+ // This is an alloca-based dbg.value/DPValue. The first thing it should do
+ // with the alloca pointer is dereference it. Otherwise we don't know how to
+ // handle it and give up.
if (!DIExpr || DIExpr->getNumElements() < 1 ||
DIExpr->getElement(0) != dwarf::DW_OP_deref)
return;
// Insert the offset before the first deref.
- // We could just change the offset argument of dbg.value, but it's unsigned...
if (Offset)
DIExpr = DIExpression::prepend(DIExpr, 0, Offset);
- Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI);
- DVI->eraseFromParent();
+ if (DVI) {
+ DVI->setExpression(DIExpr);
+ DVI->replaceVariableLocationOp(0u, NewAddress);
+ } else {
+ assert(DPV);
+ DPV->setExpression(DIExpr);
+ DPV->replaceVariableLocationOp(0u, NewAddress);
+ }
}
void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
DIBuilder &Builder, int Offset) {
- if (auto *L = LocalAsMetadata::getIfExists(AI))
- if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
- for (Use &U : llvm::make_early_inc_range(MDV->uses()))
- if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser()))
- replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset);
+ SmallVector<DbgValueInst *, 1> DbgUsers;
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgValues(DbgUsers, AI, &DPUsers);
+
+ // Attempt to replace dbg.values that use this alloca.
+ for (auto *DVI : DbgUsers)
+ updateOneDbgValueForAlloca(DVI->getDebugLoc(), DVI->getVariable(),
+ DVI->getExpression(), NewAllocaAddress, DVI,
+ nullptr, Builder, Offset);
+
+ // Replace any DPValues that use this alloca.
+ for (DPValue *DPV : DPUsers)
+ updateOneDbgValueForAlloca(DPV->getDebugLoc(), DPV->getVariable(),
+ DPV->getExpression(), NewAllocaAddress, nullptr,
+ DPV, Builder, Offset);
}
/// Where possible to salvage debug information for \p I do so.
/// If not possible mark undef.
void llvm::salvageDebugInfo(Instruction &I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, &I);
- salvageDebugInfoForDbgValues(I, DbgUsers);
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgUsers(DbgUsers, &I, &DPUsers);
+ salvageDebugInfoForDbgValues(I, DbgUsers, DPUsers);
}
/// Salvage the address component of \p DAI.
@@ -1867,7 +2239,8 @@ static void salvageDbgAssignAddress(DbgAssignIntrinsic *DAI) {
}
void llvm::salvageDebugInfoForDbgValues(
- Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
+ Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers,
+ ArrayRef<DPValue *> DPUsers) {
// These are arbitrary chosen limits on the maximum number of values and the
// maximum size of a debug expression we can salvage up to, used for
// performance reasons.
@@ -1933,12 +2306,70 @@ void llvm::salvageDebugInfoForDbgValues(
LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
Salvaged = true;
}
+ // Duplicate of above block for DPValues.
+ for (auto *DPV : DPUsers) {
+ // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
+ // are implicitly pointing out the value as a DWARF memory location
+ // description.
+ bool StackValue = DPV->getType() == DPValue::LocationType::Value;
+ auto DPVLocation = DPV->location_ops();
+ assert(
+ is_contained(DPVLocation, &I) &&
+ "DbgVariableIntrinsic must use salvaged instruction as its location");
+ SmallVector<Value *, 4> AdditionalValues;
+ // 'I' may appear more than once in DPV's location ops, and each use of 'I'
+ // must be updated in the DIExpression and potentially have additional
+ // values added; thus we call salvageDebugInfoImpl for each 'I' instance in
+ // DPVLocation.
+ Value *Op0 = nullptr;
+ DIExpression *SalvagedExpr = DPV->getExpression();
+ auto LocItr = find(DPVLocation, &I);
+ while (SalvagedExpr && LocItr != DPVLocation.end()) {
+ SmallVector<uint64_t, 16> Ops;
+ unsigned LocNo = std::distance(DPVLocation.begin(), LocItr);
+ uint64_t CurrentLocOps = SalvagedExpr->getNumLocationOperands();
+ Op0 = salvageDebugInfoImpl(I, CurrentLocOps, Ops, AdditionalValues);
+ if (!Op0)
+ break;
+ SalvagedExpr =
+ DIExpression::appendOpsToArg(SalvagedExpr, Ops, LocNo, StackValue);
+ LocItr = std::find(++LocItr, DPVLocation.end(), &I);
+ }
+ // salvageDebugInfoImpl should fail on examining the first element of
+ // DbgUsers, or none of them.
+ if (!Op0)
+ break;
+
+ DPV->replaceVariableLocationOp(&I, Op0);
+ bool IsValidSalvageExpr =
+ SalvagedExpr->getNumElements() <= MaxExpressionSize;
+ if (AdditionalValues.empty() && IsValidSalvageExpr) {
+ DPV->setExpression(SalvagedExpr);
+ } else if (DPV->getType() == DPValue::LocationType::Value &&
+ IsValidSalvageExpr &&
+ DPV->getNumVariableLocationOps() + AdditionalValues.size() <=
+ MaxDebugArgs) {
+ DPV->addVariableLocationOps(AdditionalValues, SalvagedExpr);
+ } else {
+ // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is
+ // currently only valid for stack value expressions.
+ // Also do not salvage if the resulting DIArgList would contain an
+ // unreasonably large number of values.
+ Value *Undef = UndefValue::get(I.getOperand(0)->getType());
+ DPV->replaceVariableLocationOp(I.getOperand(0), Undef);
+ }
+ LLVM_DEBUG(dbgs() << "SALVAGE: " << DPV << '\n');
+ Salvaged = true;
+ }
if (Salvaged)
return;
for (auto *DII : DbgUsers)
DII->setKillLocation();
+
+ for (auto *DPV : DPUsers)
+ DPV->setKillLocation();
}
Value *getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
@@ -2153,16 +2584,20 @@ using DbgValReplacement = std::optional<DIExpression *>;
/// changes are made.
static bool rewriteDebugUsers(
Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
- function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) {
+ function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr,
+ function_ref<DbgValReplacement(DPValue &DPV)> RewriteDPVExpr) {
// Find debug users of From.
SmallVector<DbgVariableIntrinsic *, 1> Users;
- findDbgUsers(Users, &From);
- if (Users.empty())
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgUsers(Users, &From, &DPUsers);
+ if (Users.empty() && DPUsers.empty())
return false;
// Prevent use-before-def of To.
bool Changed = false;
+
SmallPtrSet<DbgVariableIntrinsic *, 1> UndefOrSalvage;
+ SmallPtrSet<DPValue *, 1> UndefOrSalvageDPV;
if (isa<Instruction>(&To)) {
bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
@@ -2180,6 +2615,25 @@ static bool rewriteDebugUsers(
UndefOrSalvage.insert(DII);
}
}
+
+ // DPValue implementation of the above.
+ for (auto *DPV : DPUsers) {
+ Instruction *MarkedInstr = DPV->getMarker()->MarkedInstr;
+ Instruction *NextNonDebug = MarkedInstr;
+ // The next instruction might still be a dbg.declare, skip over it.
+ if (isa<DbgVariableIntrinsic>(NextNonDebug))
+ NextNonDebug = NextNonDebug->getNextNonDebugInstruction();
+
+ if (DomPointAfterFrom && NextNonDebug == &DomPoint) {
+ LLVM_DEBUG(dbgs() << "MOVE: " << *DPV << '\n');
+ DPV->removeFromParent();
+ // Ensure there's a marker.
+ DomPoint.getParent()->insertDPValueAfter(DPV, &DomPoint);
+ Changed = true;
+ } else if (!DT.dominates(&DomPoint, MarkedInstr)) {
+ UndefOrSalvageDPV.insert(DPV);
+ }
+ }
}
// Update debug users without use-before-def risk.
@@ -2196,8 +2650,21 @@ static bool rewriteDebugUsers(
LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n');
Changed = true;
}
+ for (auto *DPV : DPUsers) {
+ if (UndefOrSalvageDPV.count(DPV))
+ continue;
+
+ DbgValReplacement DVR = RewriteDPVExpr(*DPV);
+ if (!DVR)
+ continue;
+
+ DPV->replaceVariableLocationOp(&From, &To);
+ DPV->setExpression(*DVR);
+ LLVM_DEBUG(dbgs() << "REWRITE: " << DPV << '\n');
+ Changed = true;
+ }
- if (!UndefOrSalvage.empty()) {
+ if (!UndefOrSalvage.empty() || !UndefOrSalvageDPV.empty()) {
// Try to salvage the remaining debug users.
salvageDebugInfo(From);
Changed = true;
@@ -2245,12 +2712,15 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
return DII.getExpression();
};
+ auto IdentityDPV = [&](DPValue &DPV) -> DbgValReplacement {
+ return DPV.getExpression();
+ };
// Handle no-op conversions.
Module &M = *From.getModule();
const DataLayout &DL = M.getDataLayout();
if (isBitCastSemanticsPreserving(DL, FromTy, ToTy))
- return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
+ return rewriteDebugUsers(From, To, DomPoint, DT, Identity, IdentityDPV);
// Handle integer-to-integer widening and narrowing.
// FIXME: Use DW_OP_convert when it's available everywhere.
@@ -2262,7 +2732,7 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
// When the width of the result grows, assume that a debugger will only
// access the low `FromBits` bits when inspecting the source variable.
if (FromBits < ToBits)
- return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
+ return rewriteDebugUsers(From, To, DomPoint, DT, Identity, IdentityDPV);
// The width of the result has shrunk. Use sign/zero extension to describe
// the source variable's high bits.
@@ -2278,7 +2748,22 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
return DIExpression::appendExt(DII.getExpression(), ToBits, FromBits,
Signed);
};
- return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
+ // RemoveDIs: duplicate implementation working on DPValues rather than on
+ // dbg.value intrinsics.
+ auto SignOrZeroExtDPV = [&](DPValue &DPV) -> DbgValReplacement {
+ DILocalVariable *Var = DPV.getVariable();
+
+ // Without knowing signedness, sign/zero extension isn't possible.
+ auto Signedness = Var->getSignedness();
+ if (!Signedness)
+ return std::nullopt;
+
+ bool Signed = *Signedness == DIBasicType::Signedness::Signed;
+ return DIExpression::appendExt(DPV.getExpression(), ToBits, FromBits,
+ Signed);
+ };
+ return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt,
+ SignOrZeroExtDPV);
}
// TODO: Floating-point conversions, vectors.
@@ -2292,12 +2777,17 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
// Delete the instructions backwards, as it has a reduced likelihood of
// having to update as many def-use and use-def chains.
Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ // RemoveDIs: erasing debug-info must be done manually.
+ EndInst->dropDbgValues();
while (EndInst != &BB->front()) {
// Delete the next to last instruction.
Instruction *Inst = &*--EndInst->getIterator();
if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
+ // EHPads can't have DPValues attached to them, but it might be possible
+ // for things with token type.
+ Inst->dropDbgValues();
EndInst = Inst;
continue;
}
@@ -2305,6 +2795,8 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
++NumDeadDbgInst;
else
++NumDeadInst;
+ // RemoveDIs: erasing debug-info must be done manually.
+ Inst->dropDbgValues();
Inst->eraseFromParent();
}
return {NumDeadInst, NumDeadDbgInst};
@@ -2346,6 +2838,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA,
Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
DTU->applyUpdates(Updates);
}
+ BB->flushTerminatorDbgValues();
return NumInstrsRemoved;
}
@@ -2499,9 +2992,9 @@ static bool markAliveBlocks(Function &F,
// If we found a call to a no-return function, insert an unreachable
// instruction after it. Make sure there isn't *already* one there
// though.
- if (!isa<UnreachableInst>(CI->getNextNode())) {
+ if (!isa<UnreachableInst>(CI->getNextNonDebugInstruction())) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI->getNextNode(), false, DTU);
+ changeToUnreachable(CI->getNextNonDebugInstruction(), false, DTU);
Changed = true;
}
break;
@@ -2913,9 +3406,10 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,
for (Use &U : llvm::make_early_inc_range(From->uses())) {
if (!Dominates(Root, U))
continue;
+ LLVM_DEBUG(dbgs() << "Replace dominated use of '";
+ From->printAsOperand(dbgs());
+ dbgs() << "' with " << *To << " in " << *U.getUser() << "\n");
U.set(To);
- LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName()
- << "' as " << *To << " in " << *U << "\n");
++Count;
}
return Count;
@@ -3034,9 +3528,12 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
void llvm::dropDebugUsers(Instruction &I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, &I);
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgUsers(DbgUsers, &I, &DPUsers);
for (auto *DII : DbgUsers)
DII->eraseFromParent();
+ for (auto *DPV : DPUsers)
+ DPV->eraseFromParent();
}
void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
@@ -3068,6 +3565,8 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
I->dropUBImplyingAttrsAndMetadata();
if (I->isUsedByMetadata())
dropDebugUsers(*I);
+ // RemoveDIs: drop debug-info too as the following code does.
+ I->dropDbgValues();
if (I->isDebugOrPseudoInst()) {
// Remove DbgInfo and pseudo probe Intrinsics.
II = I->eraseFromParent();
@@ -3080,6 +3579,41 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
BB->getTerminator()->getIterator());
}
+DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,
+ Type &Ty) {
+ // Create integer constant expression.
+ auto createIntegerExpression = [&DIB](const Constant &CV) -> DIExpression * {
+ const APInt &API = cast<ConstantInt>(&CV)->getValue();
+ std::optional<int64_t> InitIntOpt = API.trySExtValue();
+ return InitIntOpt ? DIB.createConstantValueExpression(
+ static_cast<uint64_t>(*InitIntOpt))
+ : nullptr;
+ };
+
+ if (isa<ConstantInt>(C))
+ return createIntegerExpression(C);
+
+ if (Ty.isFloatTy() || Ty.isDoubleTy()) {
+ const APFloat &APF = cast<ConstantFP>(&C)->getValueAPF();
+ return DIB.createConstantValueExpression(
+ APF.bitcastToAPInt().getZExtValue());
+ }
+
+ if (!Ty.isPointerTy())
+ return nullptr;
+
+ if (isa<ConstantPointerNull>(C))
+ return DIB.createConstantValueExpression(0);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(&C))
+ if (CE->getOpcode() == Instruction::IntToPtr) {
+ const Value *V = CE->getOperand(0);
+ if (auto CI = dyn_cast_or_null<ConstantInt>(V))
+ return createIntegerExpression(*CI);
+ }
+ return nullptr;
+}
+
namespace {
/// A potential constituent of a bitreverse or bswap expression. See
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopConstrainer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
new file mode 100644
index 000000000000..ea6d952cfa7d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
@@ -0,0 +1,904 @@
+#include "llvm/Transforms/Utils/LoopConstrainer.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+
+using namespace llvm;
+
+static const char *ClonedLoopTag = "loop_constrainer.loop.clone";
+
+#define DEBUG_TYPE "loop-constrainer"
+
+/// Given a loop with an deccreasing induction variable, is it possible to
+/// safely calculate the bounds of a new loop using the given Predicate.
+static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
+ const SCEV *Step, ICmpInst::Predicate Pred,
+ unsigned LatchBrExitIdx, Loop *L,
+ ScalarEvolution &SE) {
+ if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT &&
+ Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT)
+ return false;
+
+ if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
+ return false;
+
+ assert(SE.isKnownNegative(Step) && "expecting negative step");
+
+ LLVM_DEBUG(dbgs() << "isSafeDecreasingBound with:\n");
+ LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n");
+ LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n");
+ LLVM_DEBUG(dbgs() << "BoundSCEV: " << *BoundSCEV << "\n");
+ LLVM_DEBUG(dbgs() << "Pred: " << Pred << "\n");
+ LLVM_DEBUG(dbgs() << "LatchExitBrIdx: " << LatchBrExitIdx << "\n");
+
+ bool IsSigned = ICmpInst::isSigned(Pred);
+ // The predicate that we need to check that the induction variable lies
+ // within bounds.
+ ICmpInst::Predicate BoundPred =
+ IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT;
+
+ if (LatchBrExitIdx == 1)
+ return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
+
+ assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be either 0 or 1");
+
+ const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));
+ unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
+ APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth)
+ : APInt::getMinValue(BitWidth);
+ const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne);
+
+ const SCEV *MinusOne =
+ SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType()));
+
+ return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) &&
+ SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit);
+}
+
+/// Given a loop with an increasing induction variable, is it possible to
+/// safely calculate the bounds of a new loop using the given Predicate.
+static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
+ const SCEV *Step, ICmpInst::Predicate Pred,
+ unsigned LatchBrExitIdx, Loop *L,
+ ScalarEvolution &SE) {
+ if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT &&
+ Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT)
+ return false;
+
+ if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "isSafeIncreasingBound with:\n");
+ LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n");
+ LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n");
+ LLVM_DEBUG(dbgs() << "BoundSCEV: " << *BoundSCEV << "\n");
+ LLVM_DEBUG(dbgs() << "Pred: " << Pred << "\n");
+ LLVM_DEBUG(dbgs() << "LatchExitBrIdx: " << LatchBrExitIdx << "\n");
+
+ bool IsSigned = ICmpInst::isSigned(Pred);
+ // The predicate that we need to check that the induction variable lies
+ // within bounds.
+ ICmpInst::Predicate BoundPred =
+ IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
+
+ if (LatchBrExitIdx == 1)
+ return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
+
+ assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1");
+
+ const SCEV *StepMinusOne = SE.getMinusSCEV(Step, SE.getOne(Step->getType()));
+ unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
+ APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth)
+ : APInt::getMaxValue(BitWidth);
+ const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne);
+
+ return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start,
+ SE.getAddExpr(BoundSCEV, Step)) &&
+ SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
+}
+
+/// Returns estimate for max latch taken count of the loop of the narrowest
+/// available type. If the latch block has such estimate, it is returned.
+/// Otherwise, we use max exit count of whole loop (that is potentially of wider
+/// type than latch check itself), which is still better than no estimate.
+static const SCEV *getNarrowestLatchMaxTakenCountEstimate(ScalarEvolution &SE,
+ const Loop &L) {
+ const SCEV *FromBlock =
+ SE.getExitCount(&L, L.getLoopLatch(), ScalarEvolution::SymbolicMaximum);
+ if (isa<SCEVCouldNotCompute>(FromBlock))
+ return SE.getSymbolicMaxBackedgeTakenCount(&L);
+ return FromBlock;
+}
+
+std::optional<LoopStructure>
+LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
+ bool AllowUnsignedLatchCond,
+ const char *&FailureReason) {
+ if (!L.isLoopSimplifyForm()) {
+ FailureReason = "loop not in LoopSimplify form";
+ return std::nullopt;
+ }
+
+ BasicBlock *Latch = L.getLoopLatch();
+ assert(Latch && "Simplified loops only have one latch!");
+
+ if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) {
+ FailureReason = "loop has already been cloned";
+ return std::nullopt;
+ }
+
+ if (!L.isLoopExiting(Latch)) {
+ FailureReason = "no loop latch";
+ return std::nullopt;
+ }
+
+ BasicBlock *Header = L.getHeader();
+ BasicBlock *Preheader = L.getLoopPreheader();
+ if (!Preheader) {
+ FailureReason = "no preheader";
+ return std::nullopt;
+ }
+
+ BranchInst *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LatchBr || LatchBr->isUnconditional()) {
+ FailureReason = "latch terminator not conditional branch";
+ return std::nullopt;
+ }
+
+ unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
+ if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
+ FailureReason = "latch terminator branch not conditional on integral icmp";
+ return std::nullopt;
+ }
+
+ const SCEV *MaxBETakenCount = getNarrowestLatchMaxTakenCountEstimate(SE, L);
+ if (isa<SCEVCouldNotCompute>(MaxBETakenCount)) {
+ FailureReason = "could not compute latch count";
+ return std::nullopt;
+ }
+ assert(SE.getLoopDisposition(MaxBETakenCount, &L) ==
+ ScalarEvolution::LoopInvariant &&
+ "loop variant exit count doesn't make sense!");
+
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *LeftValue = ICI->getOperand(0);
+ const SCEV *LeftSCEV = SE.getSCEV(LeftValue);
+ IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType());
+
+ Value *RightValue = ICI->getOperand(1);
+ const SCEV *RightSCEV = SE.getSCEV(RightValue);
+
+ // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
+ if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
+ if (isa<SCEVAddRecExpr>(RightSCEV)) {
+ std::swap(LeftSCEV, RightSCEV);
+ std::swap(LeftValue, RightValue);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ } else {
+ FailureReason = "no add recurrences in the icmp";
+ return std::nullopt;
+ }
+ }
+
+ auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) {
+ if (AR->getNoWrapFlags(SCEV::FlagNSW))
+ return true;
+
+ IntegerType *Ty = cast<IntegerType>(AR->getType());
+ IntegerType *WideTy =
+ IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
+
+ const SCEVAddRecExpr *ExtendAfterOp =
+ dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+ if (ExtendAfterOp) {
+ const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
+ const SCEV *ExtendedStep =
+ SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
+
+ bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
+ ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
+
+ if (NoSignedWrap)
+ return true;
+ }
+
+ // We may have proved this when computing the sign extension above.
+ return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
+ };
+
+ // `ICI` is interpreted as taking the backedge if the *next* value of the
+ // induction variable satisfies some constraint.
+
+ const SCEVAddRecExpr *IndVarBase = cast<SCEVAddRecExpr>(LeftSCEV);
+ if (IndVarBase->getLoop() != &L) {
+ FailureReason = "LHS in cmp is not an AddRec for this loop";
+ return std::nullopt;
+ }
+ if (!IndVarBase->isAffine()) {
+ FailureReason = "LHS in icmp not induction variable";
+ return std::nullopt;
+ }
+ const SCEV *StepRec = IndVarBase->getStepRecurrence(SE);
+ if (!isa<SCEVConstant>(StepRec)) {
+ FailureReason = "LHS in icmp not induction variable";
+ return std::nullopt;
+ }
+ ConstantInt *StepCI = cast<SCEVConstant>(StepRec)->getValue();
+
+ if (ICI->isEquality() && !HasNoSignedWrap(IndVarBase)) {
+ FailureReason = "LHS in icmp needs nsw for equality predicates";
+ return std::nullopt;
+ }
+
+ assert(!StepCI->isZero() && "Zero step?");
+ bool IsIncreasing = !StepCI->isNegative();
+ bool IsSignedPredicate;
+ const SCEV *StartNext = IndVarBase->getStart();
+ const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE));
+ const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
+ const SCEV *Step = SE.getSCEV(StepCI);
+
+ const SCEV *FixedRightSCEV = nullptr;
+
+ // If RightValue resides within loop (but still being loop invariant),
+ // regenerate it as preheader.
+ if (auto *I = dyn_cast<Instruction>(RightValue))
+ if (L.contains(I->getParent()))
+ FixedRightSCEV = RightSCEV;
+
+ if (IsIncreasing) {
+ bool DecreasedRightValueByOne = false;
+ if (StepCI->isOne()) {
+ // Try to turn eq/ne predicates to those we can work with.
+ if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+ // while (++i != len) { while (++i < len) {
+ // ... ---> ...
+ // } }
+ // If both parts are known non-negative, it is profitable to use
+ // unsigned comparison in increasing loop. This allows us to make the
+ // comparison check against "RightSCEV + 1" more optimistic.
+ if (isKnownNonNegativeInLoop(IndVarStart, &L, SE) &&
+ isKnownNonNegativeInLoop(RightSCEV, &L, SE))
+ Pred = ICmpInst::ICMP_ULT;
+ else
+ Pred = ICmpInst::ICMP_SLT;
+ else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {
+ // while (true) { while (true) {
+ // if (++i == len) ---> if (++i > len - 1)
+ // break; break;
+ // ... ...
+ // } }
+ if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
+ cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/ false)) {
+ Pred = ICmpInst::ICMP_UGT;
+ RightSCEV =
+ SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
+ DecreasedRightValueByOne = true;
+ } else if (cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/ true)) {
+ Pred = ICmpInst::ICMP_SGT;
+ RightSCEV =
+ SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
+ DecreasedRightValueByOne = true;
+ }
+ }
+ }
+
+ bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
+ bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT);
+ bool FoundExpectedPred =
+ (LTPred && LatchBrExitIdx == 1) || (GTPred && LatchBrExitIdx == 0);
+
+ if (!FoundExpectedPred) {
+ FailureReason = "expected icmp slt semantically, found something else";
+ return std::nullopt;
+ }
+
+ IsSignedPredicate = ICmpInst::isSigned(Pred);
+ if (!IsSignedPredicate && !AllowUnsignedLatchCond) {
+ FailureReason = "unsigned latch conditions are explicitly prohibited";
+ return std::nullopt;
+ }
+
+ if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred,
+ LatchBrExitIdx, &L, SE)) {
+ FailureReason = "Unsafe loop bounds";
+ return std::nullopt;
+ }
+ if (LatchBrExitIdx == 0) {
+ // We need to increase the right value unless we have already decreased
+ // it virtually when we replaced EQ with SGT.
+ if (!DecreasedRightValueByOne)
+ FixedRightSCEV =
+ SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
+ } else {
+ assert(!DecreasedRightValueByOne &&
+ "Right value can be decreased only for LatchBrExitIdx == 0!");
+ }
+ } else {
+ bool IncreasedRightValueByOne = false;
+ if (StepCI->isMinusOne()) {
+ // Try to turn eq/ne predicates to those we can work with.
+ if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+ // while (--i != len) { while (--i > len) {
+ // ... ---> ...
+ // } }
+ // We intentionally don't turn the predicate into UGT even if we know
+ // that both operands are non-negative, because it will only pessimize
+ // our check against "RightSCEV - 1".
+ Pred = ICmpInst::ICMP_SGT;
+ else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {
+ // while (true) { while (true) {
+ // if (--i == len) ---> if (--i < len + 1)
+ // break; break;
+ // ... ...
+ // } }
+ if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
+ cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) {
+ Pred = ICmpInst::ICMP_ULT;
+ RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
+ IncreasedRightValueByOne = true;
+ } else if (cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) {
+ Pred = ICmpInst::ICMP_SLT;
+ RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
+ IncreasedRightValueByOne = true;
+ }
+ }
+ }
+
+ bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
+ bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT);
+
+ bool FoundExpectedPred =
+ (GTPred && LatchBrExitIdx == 1) || (LTPred && LatchBrExitIdx == 0);
+
+ if (!FoundExpectedPred) {
+ FailureReason = "expected icmp sgt semantically, found something else";
+ return std::nullopt;
+ }
+
+ IsSignedPredicate =
+ Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT;
+
+ if (!IsSignedPredicate && !AllowUnsignedLatchCond) {
+ FailureReason = "unsigned latch conditions are explicitly prohibited";
+ return std::nullopt;
+ }
+
+ if (!isSafeDecreasingBound(IndVarStart, RightSCEV, Step, Pred,
+ LatchBrExitIdx, &L, SE)) {
+ FailureReason = "Unsafe bounds";
+ return std::nullopt;
+ }
+
+ if (LatchBrExitIdx == 0) {
+ // We need to decrease the right value unless we have already increased
+ // it virtually when we replaced EQ with SLT.
+ if (!IncreasedRightValueByOne)
+ FixedRightSCEV =
+ SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
+ } else {
+ assert(!IncreasedRightValueByOne &&
+ "Right value can be increased only for LatchBrExitIdx == 0!");
+ }
+ }
+ BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
+
+ assert(!L.contains(LatchExit) && "expected an exit block!");
+ const DataLayout &DL = Preheader->getModule()->getDataLayout();
+ SCEVExpander Expander(SE, DL, "loop-constrainer");
+ Instruction *Ins = Preheader->getTerminator();
+
+ if (FixedRightSCEV)
+ RightValue =
+ Expander.expandCodeFor(FixedRightSCEV, FixedRightSCEV->getType(), Ins);
+
+ Value *IndVarStartV = Expander.expandCodeFor(IndVarStart, IndVarTy, Ins);
+ IndVarStartV->setName("indvar.start");
+
+ LoopStructure Result;
+
+ Result.Tag = "main";
+ Result.Header = Header;
+ Result.Latch = Latch;
+ Result.LatchBr = LatchBr;
+ Result.LatchExit = LatchExit;
+ Result.LatchBrExitIdx = LatchBrExitIdx;
+ Result.IndVarStart = IndVarStartV;
+ Result.IndVarStep = StepCI;
+ Result.IndVarBase = LeftValue;
+ Result.IndVarIncreasing = IsIncreasing;
+ Result.LoopExitAt = RightValue;
+ Result.IsSignedPredicate = IsSignedPredicate;
+ Result.ExitCountTy = cast<IntegerType>(MaxBETakenCount->getType());
+
+ FailureReason = nullptr;
+
+ return Result;
+}
+
+// Add metadata to the loop L to disable loop optimizations. Callers need to
+// confirm that optimizing loop L is not beneficial.
+static void DisableAllLoopOptsOnLoop(Loop &L) {
+ // We do not care about any existing loopID related metadata for L, since we
+ // are setting all loop metadata to false.
+ LLVMContext &Context = L.getHeader()->getContext();
+ // Reserve first location for self reference to the LoopID metadata node.
+ MDNode *Dummy = MDNode::get(Context, {});
+ MDNode *DisableUnroll = MDNode::get(
+ Context, {MDString::get(Context, "llvm.loop.unroll.disable")});
+ Metadata *FalseVal =
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context), 0));
+ MDNode *DisableVectorize = MDNode::get(
+ Context,
+ {MDString::get(Context, "llvm.loop.vectorize.enable"), FalseVal});
+ MDNode *DisableLICMVersioning = MDNode::get(
+ Context, {MDString::get(Context, "llvm.loop.licm_versioning.disable")});
+ MDNode *DisableDistribution = MDNode::get(
+ Context,
+ {MDString::get(Context, "llvm.loop.distribute.enable"), FalseVal});
+ MDNode *NewLoopID =
+ MDNode::get(Context, {Dummy, DisableUnroll, DisableVectorize,
+ DisableLICMVersioning, DisableDistribution});
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ L.setLoopID(NewLoopID);
+}
+
+LoopConstrainer::LoopConstrainer(Loop &L, LoopInfo &LI,
+ function_ref<void(Loop *, bool)> LPMAddNewLoop,
+ const LoopStructure &LS, ScalarEvolution &SE,
+ DominatorTree &DT, Type *T, SubRanges SR)
+ : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE),
+ DT(DT), LI(LI), LPMAddNewLoop(LPMAddNewLoop), OriginalLoop(L), RangeTy(T),
+ MainLoopStructure(LS), SR(SR) {}
+
+void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
+ const char *Tag) const {
+ for (BasicBlock *BB : OriginalLoop.getBlocks()) {
+ BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F);
+ Result.Blocks.push_back(Clone);
+ Result.Map[BB] = Clone;
+ }
+
+ auto GetClonedValue = [&Result](Value *V) {
+ assert(V && "null values not in domain!");
+ auto It = Result.Map.find(V);
+ if (It == Result.Map.end())
+ return V;
+ return static_cast<Value *>(It->second);
+ };
+
+ auto *ClonedLatch =
+ cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch()));
+ ClonedLatch->getTerminator()->setMetadata(ClonedLoopTag,
+ MDNode::get(Ctx, {}));
+
+ Result.Structure = MainLoopStructure.map(GetClonedValue);
+ Result.Structure.Tag = Tag;
+
+ for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) {
+ BasicBlock *ClonedBB = Result.Blocks[i];
+ BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i];
+
+ assert(Result.Map[OriginalBB] == ClonedBB && "invariant!");
+
+ for (Instruction &I : *ClonedBB)
+ RemapInstruction(&I, Result.Map,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ // Exit blocks will now have one more predecessor and their PHI nodes need
+ // to be edited to reflect that. No phi nodes need to be introduced because
+ // the loop is in LCSSA.
+
+ for (auto *SBB : successors(OriginalBB)) {
+ if (OriginalLoop.contains(SBB))
+ continue; // not an exit block
+
+ for (PHINode &PN : SBB->phis()) {
+ Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB);
+ PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB);
+ SE.forgetValue(&PN);
+ }
+ }
+ }
+}
+
+LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
+ const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt,
+ BasicBlock *ContinuationBlock) const {
+ // We start with a loop with a single latch:
+ //
+ // +--------------------+
+ // | |
+ // | preheader |
+ // | |
+ // +--------+-----------+
+ // | ----------------\
+ // | / |
+ // +--------v----v------+ |
+ // | | |
+ // | header | |
+ // | | |
+ // +--------------------+ |
+ // |
+ // ..... |
+ // |
+ // +--------------------+ |
+ // | | |
+ // | latch >----------/
+ // | |
+ // +-------v------------+
+ // |
+ // |
+ // | +--------------------+
+ // | | |
+ // +---> original exit |
+ // | |
+ // +--------------------+
+ //
+ // We change the control flow to look like
+ //
+ //
+ // +--------------------+
+ // | |
+ // | preheader >-------------------------+
+ // | | |
+ // +--------v-----------+ |
+ // | /-------------+ |
+ // | / | |
+ // +--------v--v--------+ | |
+ // | | | |
+ // | header | | +--------+ |
+ // | | | | | |
+ // +--------------------+ | | +-----v-----v-----------+
+ // | | | |
+ // | | | .pseudo.exit |
+ // | | | |
+ // | | +-----------v-----------+
+ // | | |
+ // ..... | | |
+ // | | +--------v-------------+
+ // +--------------------+ | | | |
+ // | | | | | ContinuationBlock |
+ // | latch >------+ | | |
+ // | | | +----------------------+
+ // +---------v----------+ |
+ // | |
+ // | |
+ // | +---------------^-----+
+ // | | |
+ // +-----> .exit.selector |
+ // | |
+ // +----------v----------+
+ // |
+ // +--------------------+ |
+ // | | |
+ // | original exit <----+
+ // | |
+ // +--------------------+
+
+ RewrittenRangeInfo RRI;
+
+ BasicBlock *BBInsertLocation = LS.Latch->getNextNode();
+ RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
+ &F, BBInsertLocation);
+ RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
+ BBInsertLocation);
+
+ BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator());
+ bool Increasing = LS.IndVarIncreasing;
+ bool IsSignedPredicate = LS.IsSignedPredicate;
+
+ IRBuilder<> B(PreheaderJump);
+ auto NoopOrExt = [&](Value *V) {
+ if (V->getType() == RangeTy)
+ return V;
+ return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName())
+ : B.CreateZExt(V, RangeTy, "wide." + V->getName());
+ };
+
+ // EnterLoopCond - is it okay to start executing this `LS'?
+ Value *EnterLoopCond = nullptr;
+ auto Pred =
+ Increasing
+ ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
+ : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+ Value *IndVarStart = NoopOrExt(LS.IndVarStart);
+ EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
+
+ B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
+ PreheaderJump->eraseFromParent();
+
+ LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
+ B.SetInsertPoint(LS.LatchBr);
+ Value *IndVarBase = NoopOrExt(LS.IndVarBase);
+ Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
+
+ Value *CondForBranch = LS.LatchBrExitIdx == 1
+ ? TakeBackedgeLoopCond
+ : B.CreateNot(TakeBackedgeLoopCond);
+
+ LS.LatchBr->setCondition(CondForBranch);
+
+ B.SetInsertPoint(RRI.ExitSelector);
+
+ // IterationsLeft - are there any more iterations left, given the original
+ // upper bound on the induction variable? If not, we branch to the "real"
+ // exit.
+ Value *LoopExitAt = NoopOrExt(LS.LoopExitAt);
+ Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt);
+ B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
+
+ BranchInst *BranchToContinuation =
+ BranchInst::Create(ContinuationBlock, RRI.PseudoExit);
+
+ // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
+ // each of the PHI nodes in the loop header. This feeds into the initial
+ // value of the same PHI nodes if/when we continue execution.
+ for (PHINode &PN : LS.Header->phis()) {
+ PHINode *NewPHI = PHINode::Create(PN.getType(), 2, PN.getName() + ".copy",
+ BranchToContinuation);
+
+ NewPHI->addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader);
+ NewPHI->addIncoming(PN.getIncomingValueForBlock(LS.Latch),
+ RRI.ExitSelector);
+ RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
+ }
+
+ RRI.IndVarEnd = PHINode::Create(IndVarBase->getType(), 2, "indvar.end",
+ BranchToContinuation);
+ RRI.IndVarEnd->addIncoming(IndVarStart, Preheader);
+ RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector);
+
+ // The latch exit now has a branch from `RRI.ExitSelector' instead of
+ // `LS.Latch'. The PHI nodes need to be updated to reflect that.
+ LS.LatchExit->replacePhiUsesWith(LS.Latch, RRI.ExitSelector);
+
+ return RRI;
+}
+
+void LoopConstrainer::rewriteIncomingValuesForPHIs(
+ LoopStructure &LS, BasicBlock *ContinuationBlock,
+ const LoopConstrainer::RewrittenRangeInfo &RRI) const {
+ unsigned PHIIndex = 0;
+ for (PHINode &PN : LS.Header->phis())
+ PN.setIncomingValueForBlock(ContinuationBlock,
+ RRI.PHIValuesAtPseudoExit[PHIIndex++]);
+
+ LS.IndVarStart = RRI.IndVarEnd;
+}
+
+BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
+ BasicBlock *OldPreheader,
+ const char *Tag) const {
+ BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
+ BranchInst::Create(LS.Header, Preheader);
+
+ LS.Header->replacePhiUsesWith(OldPreheader, Preheader);
+
+ return Preheader;
+}
+
+void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
+ Loop *ParentLoop = OriginalLoop.getParentLoop();
+ if (!ParentLoop)
+ return;
+
+ for (BasicBlock *BB : BBs)
+ ParentLoop->addBasicBlockToLoop(BB, LI);
+}
+
+Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
+ ValueToValueMapTy &VM,
+ bool IsSubloop) {
+ Loop &New = *LI.AllocateLoop();
+ if (Parent)
+ Parent->addChildLoop(&New);
+ else
+ LI.addTopLevelLoop(&New);
+ LPMAddNewLoop(&New, IsSubloop);
+
+ // Add all of the blocks in Original to the new loop.
+ for (auto *BB : Original->blocks())
+ if (LI.getLoopFor(BB) == Original)
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), LI);
+
+ // Add all of the subloops to the new loop.
+ for (Loop *SubLoop : *Original)
+ createClonedLoopStructure(SubLoop, &New, VM, /* IsSubloop */ true);
+
+ return &New;
+}
+
+bool LoopConstrainer::run() {
+ BasicBlock *Preheader = OriginalLoop.getLoopPreheader();
+ assert(Preheader != nullptr && "precondition!");
+
+ OriginalPreheader = Preheader;
+ MainLoopPreheader = Preheader;
+ bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;
+ bool Increasing = MainLoopStructure.IndVarIncreasing;
+ IntegerType *IVTy = cast<IntegerType>(RangeTy);
+
+ SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "loop-constrainer");
+ Instruction *InsertPt = OriginalPreheader->getTerminator();
+
+ // It would have been better to make `PreLoop' and `PostLoop'
+ // `std::optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
+ // constructor.
+ ClonedLoop PreLoop, PostLoop;
+ bool NeedsPreLoop =
+ Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value();
+ bool NeedsPostLoop =
+ Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value();
+
+ Value *ExitPreLoopAt = nullptr;
+ Value *ExitMainLoopAt = nullptr;
+ const SCEVConstant *MinusOneS =
+ cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */));
+
+ if (NeedsPreLoop) {
+ const SCEV *ExitPreLoopAtSCEV = nullptr;
+
+ if (Increasing)
+ ExitPreLoopAtSCEV = *SR.LowLimit;
+ else if (cannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE,
+ IsSignedPredicate))
+ ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
+ else {
+ LLVM_DEBUG(dbgs() << "could not prove no-overflow when computing "
+ << "preloop exit limit. HighLimit = "
+ << *(*SR.HighLimit) << "\n");
+ return false;
+ }
+
+ if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) {
+ LLVM_DEBUG(dbgs() << "could not prove that it is safe to expand the"
+ << " preloop exit limit " << *ExitPreLoopAtSCEV
+ << " at block " << InsertPt->getParent()->getName()
+ << "\n");
+ return false;
+ }
+
+ ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
+ ExitPreLoopAt->setName("exit.preloop.at");
+ }
+
+ if (NeedsPostLoop) {
+ const SCEV *ExitMainLoopAtSCEV = nullptr;
+
+ if (Increasing)
+ ExitMainLoopAtSCEV = *SR.HighLimit;
+ else if (cannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE,
+ IsSignedPredicate))
+ ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
+ else {
+ LLVM_DEBUG(dbgs() << "could not prove no-overflow when computing "
+ << "mainloop exit limit. LowLimit = "
+ << *(*SR.LowLimit) << "\n");
+ return false;
+ }
+
+ if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) {
+ LLVM_DEBUG(dbgs() << "could not prove that it is safe to expand the"
+ << " main loop exit limit " << *ExitMainLoopAtSCEV
+ << " at block " << InsertPt->getParent()->getName()
+ << "\n");
+ return false;
+ }
+
+ ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
+ ExitMainLoopAt->setName("exit.mainloop.at");
+ }
+
+ // We clone these ahead of time so that we don't have to deal with changing
+ // and temporarily invalid IR as we transform the loops.
+ if (NeedsPreLoop)
+ cloneLoop(PreLoop, "preloop");
+ if (NeedsPostLoop)
+ cloneLoop(PostLoop, "postloop");
+
+ RewrittenRangeInfo PreLoopRRI;
+
+ if (NeedsPreLoop) {
+ Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header,
+ PreLoop.Structure.Header);
+
+ MainLoopPreheader =
+ createPreheader(MainLoopStructure, Preheader, "mainloop");
+ PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
+ ExitPreLoopAt, MainLoopPreheader);
+ rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
+ PreLoopRRI);
+ }
+
+ BasicBlock *PostLoopPreheader = nullptr;
+ RewrittenRangeInfo PostLoopRRI;
+
+ if (NeedsPostLoop) {
+ PostLoopPreheader =
+ createPreheader(PostLoop.Structure, Preheader, "postloop");
+ PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
+ ExitMainLoopAt, PostLoopPreheader);
+ rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
+ PostLoopRRI);
+ }
+
+ BasicBlock *NewMainLoopPreheader =
+ MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr;
+ BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit,
+ PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit,
+ PostLoopRRI.ExitSelector, NewMainLoopPreheader};
+
+ // Some of the above may be nullptr, filter them out before passing to
+ // addToParentLoopIfNeeded.
+ auto NewBlocksEnd =
+ std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr);
+
+ addToParentLoopIfNeeded(ArrayRef(std::begin(NewBlocks), NewBlocksEnd));
+
+ DT.recalculate(F);
+
+ // We need to first add all the pre and post loop blocks into the loop
+ // structures (as part of createClonedLoopStructure), and then update the
+ // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating
+ // LI when LoopSimplifyForm is generated.
+ Loop *PreL = nullptr, *PostL = nullptr;
+ if (!PreLoop.Blocks.empty()) {
+ PreL = createClonedLoopStructure(&OriginalLoop,
+ OriginalLoop.getParentLoop(), PreLoop.Map,
+ /* IsSubLoop */ false);
+ }
+
+ if (!PostLoop.Blocks.empty()) {
+ PostL =
+ createClonedLoopStructure(&OriginalLoop, OriginalLoop.getParentLoop(),
+ PostLoop.Map, /* IsSubLoop */ false);
+ }
+
+ // This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
+ auto CanonicalizeLoop = [&](Loop *L, bool IsOriginalLoop) {
+ formLCSSARecursively(*L, DT, &LI, &SE);
+ simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, true);
+ // Pre/post loops are slow paths, we do not need to perform any loop
+ // optimizations on them.
+ if (!IsOriginalLoop)
+ DisableAllLoopOptsOnLoop(*L);
+ };
+ if (PreL)
+ CanonicalizeLoop(PreL, false);
+ if (PostL)
+ CanonicalizeLoop(PostL, false);
+ CanonicalizeLoop(&OriginalLoop, true);
+
+ /// At this point:
+ /// - We've broken a "main loop" out of the loop in a way that the "main loop"
+ /// runs with the induction variable in a subset of [Begin, End).
+ /// - There is no overflow when computing "main loop" exit limit.
+ /// - Max latch taken count of the loop is limited.
+ /// It guarantees that induction variable will not overflow iterating in the
+ /// "main loop".
+ if (isa<OverflowingBinaryOperator>(MainLoopStructure.IndVarBase))
+ if (IsSignedPredicate)
+ cast<BinaryOperator>(MainLoopStructure.IndVarBase)
+ ->setHasNoSignedWrap(true);
+ /// TODO: support unsigned predicate.
+ /// To add NUW flag we need to prove that both operands of BO are
+ /// non-negative. E.g:
+ /// ...
+ /// %iv.next = add nsw i32 %iv, -1
+ /// %cmp = icmp ult i32 %iv.next, %n
+ /// br i1 %cmp, label %loopexit, label %loop
+ ///
+ /// -1 is MAX_UINT in terms of unsigned int. Adding anything but zero will
+ /// overflow, therefore NUW flag is not legal here.
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
index d701cf110154..f76fa3bb6c61 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -351,11 +351,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
MaxPeelCount =
std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
- auto ComputePeelCount = [&](Value *Condition) -> void {
- if (!Condition->getType()->isIntegerTy())
+ const unsigned MaxDepth = 4;
+ std::function<void(Value *, unsigned)> ComputePeelCount =
+ [&](Value *Condition, unsigned Depth) -> void {
+ if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
return;
Value *LeftVal, *RightVal;
+ if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
+ match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
+ ComputePeelCount(LeftVal, Depth + 1);
+ ComputePeelCount(RightVal, Depth + 1);
+ return;
+ }
+
CmpInst::Predicate Pred;
if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
return;
@@ -443,7 +452,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
for (BasicBlock *BB : L.blocks()) {
for (Instruction &I : *BB) {
if (SelectInst *SI = dyn_cast<SelectInst>(&I))
- ComputePeelCount(SI->getCondition());
+ ComputePeelCount(SI->getCondition(), 0);
}
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
@@ -454,7 +463,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
if (L.getLoopLatch() == BB)
continue;
- ComputePeelCount(BI->getCondition());
+ ComputePeelCount(BI->getCondition(), 0);
}
return DesiredPeelCount;
@@ -624,21 +633,24 @@ struct WeightInfo {
/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
/// go to exit.
/// Then, Estimated ExitCount = F / E.
-/// For I-th (counting from 0) peeled off iteration we set the the weights for
+/// For I-th (counting from 0) peeled off iteration we set the weights for
/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
/// The probability to go to exit 1/(EC-I) increases. At the same time
/// the estimated exit count in the remainder loop reduces by I.
/// To avoid dealing with division rounding we can just multiple both part
/// of weights to E and use weight as (F - I * E, E).
static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
- MDBuilder MDB(Term->getContext());
- Term->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(Info.Weights));
+ setBranchWeights(*Term, Info.Weights);
for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
if (SubWeight != 0)
- Info.Weights[Idx] = Info.Weights[Idx] > SubWeight
- ? Info.Weights[Idx] - SubWeight
- : 1;
+ // Don't set the probability of taking the edge from latch to loop header
+ // to less than 1:1 ratio (meaning Weight should not be lower than
+ // SubWeight), as this could significantly reduce the loop's hotness,
+ // which would be incorrect in the case of underestimating the trip count.
+ Info.Weights[Idx] =
+ Info.Weights[Idx] > SubWeight
+ ? std::max(Info.Weights[Idx] - SubWeight, SubWeight)
+ : SubWeight;
}
/// Initialize the weights for all exiting blocks.
@@ -685,14 +697,6 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
}
}
-/// Update the weights of original exiting block after peeling off all
-/// iterations.
-static void fixupBranchWeights(Instruction *Term, const WeightInfo &Info) {
- MDBuilder MDB(Term->getContext());
- Term->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(Info.Weights));
-}
-
/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
@@ -1028,8 +1032,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
}
- for (const auto &[Term, Info] : Weights)
- fixupBranchWeights(Term, Info);
+ for (const auto &[Term, Info] : Weights) {
+ setBranchWeights(*Term, Info.Weights);
+ }
// Update Metadata for count of peeled off iterations.
unsigned AlreadyPeeled = 0;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index d81db5647c60..504f4430dc2c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -25,6 +25,8 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -50,6 +52,9 @@ static cl::opt<bool>
cl::desc("Allow loop rotation multiple times in order to reach "
"a better latch exit"));
+// Probability that a rotated loop has zero trip count / is never entered.
+static constexpr uint32_t ZeroTripCountWeights[] = {1, 127};
+
namespace {
/// A simple loop rotation transformation.
class LoopRotate {
@@ -154,7 +159,8 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
// intrinsics.
SmallVector<DbgValueInst *, 1> DbgValues;
- llvm::findDbgValues(DbgValues, OrigHeaderVal);
+ SmallVector<DPValue *, 1> DPValues;
+ llvm::findDbgValues(DbgValues, OrigHeaderVal, &DPValues);
for (auto &DbgValue : DbgValues) {
// The original users in the OrigHeader are already using the original
// definitions.
@@ -175,6 +181,29 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
NewVal = UndefValue::get(OrigHeaderVal->getType());
DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
}
+
+ // RemoveDIs: duplicate implementation for non-instruction debug-info
+ // storage in DPValues.
+ for (DPValue *DPV : DPValues) {
+ // The original users in the OrigHeader are already using the original
+ // definitions.
+ BasicBlock *UserBB = DPV->getMarker()->getParent();
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped and anything else can be handled by
+ // the SSAUpdater. To avoid adding PHINodes, check if the value is
+ // available in UserBB, if not substitute undef.
+ Value *NewVal;
+ if (UserBB == OrigPreheader)
+ NewVal = OrigPreHeaderVal;
+ else if (SSA.HasValueForBlock(UserBB))
+ NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
+ else
+ NewVal = UndefValue::get(OrigHeaderVal->getType());
+ DPV->replaceVariableLocationOp(OrigHeaderVal, NewVal);
+ }
}
}
@@ -244,6 +273,123 @@ static bool canRotateDeoptimizingLatchExit(Loop *L) {
return false;
}
+static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI,
+ bool HasConditionalPreHeader,
+ bool SuccsSwapped) {
+ MDNode *WeightMD = getBranchWeightMDNode(PreHeaderBI);
+ if (WeightMD == nullptr)
+ return;
+
+ // LoopBI should currently be a clone of PreHeaderBI with the same
+ // metadata. But we double check to make sure we don't have a degenerate case
+ // where instsimplify changed the instructions.
+ if (WeightMD != getBranchWeightMDNode(LoopBI))
+ return;
+
+ SmallVector<uint32_t, 2> Weights;
+ extractFromBranchWeightMD(WeightMD, Weights);
+ if (Weights.size() != 2)
+ return;
+ uint32_t OrigLoopExitWeight = Weights[0];
+ uint32_t OrigLoopBackedgeWeight = Weights[1];
+
+ if (SuccsSwapped)
+ std::swap(OrigLoopExitWeight, OrigLoopBackedgeWeight);
+
+ // Update branch weights. Consider the following edge-counts:
+ //
+ // | |-------- |
+ // V V | V
+ // Br i1 ... | Br i1 ...
+ // | | | | |
+ // x| y| | becomes: | y0| |-----
+ // V V | | V V |
+ // Exit Loop | | Loop |
+ // | | | Br i1 ... |
+ // ----- | | | |
+ // x0| x1| y1 | |
+ // V V ----
+ // Exit
+ //
+ // The following must hold:
+ // - x == x0 + x1 # counts to "exit" must stay the same.
+ // - y0 == x - x0 == x1 # how often loop was entered at all.
+ // - y1 == y - y0 # How often loop was repeated (after first iter.).
+ //
+ // We cannot generally deduce how often we had a zero-trip count loop so we
+ // have to make a guess for how to distribute x among the new x0 and x1.
+
+ uint32_t ExitWeight0; // aka x0
+ uint32_t ExitWeight1; // aka x1
+ uint32_t EnterWeight; // aka y0
+ uint32_t LoopBackWeight; // aka y1
+ if (OrigLoopExitWeight > 0 && OrigLoopBackedgeWeight > 0) {
+ ExitWeight0 = 0;
+ if (HasConditionalPreHeader) {
+ // Here we cannot know how many 0-trip count loops we have, so we guess:
+ if (OrigLoopBackedgeWeight >= OrigLoopExitWeight) {
+ // If the loop count is bigger than the exit count then we set
+ // probabilities as if 0-trip count nearly never happens.
+ ExitWeight0 = ZeroTripCountWeights[0];
+ // Scale up counts if necessary so we can match `ZeroTripCountWeights`
+ // for the `ExitWeight0`:`ExitWeight1` (aka `x0`:`x1` ratio`) ratio.
+ while (OrigLoopExitWeight < ZeroTripCountWeights[1] + ExitWeight0) {
+ // ... but don't overflow.
+ uint32_t const HighBit = uint32_t{1} << (sizeof(uint32_t) * 8 - 1);
+ if ((OrigLoopBackedgeWeight & HighBit) != 0 ||
+ (OrigLoopExitWeight & HighBit) != 0)
+ break;
+ OrigLoopBackedgeWeight <<= 1;
+ OrigLoopExitWeight <<= 1;
+ }
+ } else {
+ // If there's a higher exit-count than backedge-count then we set
+ // probabilities as if there are only 0-trip and 1-trip cases.
+ ExitWeight0 = OrigLoopExitWeight - OrigLoopBackedgeWeight;
+ }
+ }
+ ExitWeight1 = OrigLoopExitWeight - ExitWeight0;
+ EnterWeight = ExitWeight1;
+ LoopBackWeight = OrigLoopBackedgeWeight - EnterWeight;
+ } else if (OrigLoopExitWeight == 0) {
+ if (OrigLoopBackedgeWeight == 0) {
+ // degenerate case... keep everything zero...
+ ExitWeight0 = 0;
+ ExitWeight1 = 0;
+ EnterWeight = 0;
+ LoopBackWeight = 0;
+ } else {
+ // Special case "LoopExitWeight == 0" weights which behaves like an
+ // endless where we don't want loop-enttry (y0) to be the same as
+ // loop-exit (x1).
+ ExitWeight0 = 0;
+ ExitWeight1 = 0;
+ EnterWeight = 1;
+ LoopBackWeight = OrigLoopBackedgeWeight;
+ }
+ } else {
+ // loop is never entered.
+ assert(OrigLoopBackedgeWeight == 0 && "remaining case is backedge zero");
+ ExitWeight0 = 1;
+ ExitWeight1 = 1;
+ EnterWeight = 0;
+ LoopBackWeight = 0;
+ }
+
+ const uint32_t LoopBIWeights[] = {
+ SuccsSwapped ? LoopBackWeight : ExitWeight1,
+ SuccsSwapped ? ExitWeight1 : LoopBackWeight,
+ };
+ setBranchWeights(LoopBI, LoopBIWeights);
+ if (HasConditionalPreHeader) {
+ const uint32_t PreHeaderBIWeights[] = {
+ SuccsSwapped ? EnterWeight : ExitWeight0,
+ SuccsSwapped ? ExitWeight0 : EnterWeight,
+ };
+ setBranchWeights(PreHeaderBI, PreHeaderBIWeights);
+ }
+}
+
/// Rotate loop LP. Return true if the loop is rotated.
///
/// \param SimplifiedLatch is true if the latch was just folded into the final
@@ -363,7 +509,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// loop. Otherwise loop is not suitable for rotation.
BasicBlock *Exit = BI->getSuccessor(0);
BasicBlock *NewHeader = BI->getSuccessor(1);
- if (L->contains(Exit))
+ bool BISuccsSwapped = L->contains(Exit);
+ if (BISuccsSwapped)
std::swap(Exit, NewHeader);
assert(NewHeader && "Unable to determine new loop header");
assert(L->contains(NewHeader) && !L->contains(Exit) &&
@@ -394,20 +541,32 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// duplication.
using DbgIntrinsicHash =
std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>;
- auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
+ auto makeHash = [](auto *D) -> DbgIntrinsicHash {
auto VarLocOps = D->location_ops();
return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()),
D->getVariable()},
D->getExpression()};
};
+
SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) {
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
DbgIntrinsics.insert(makeHash(DII));
- else
+ // Until RemoveDIs supports dbg.declares in DPValue format, we'll need
+ // to collect DPValues attached to any other debug intrinsics.
+ for (const DPValue &DPV : DII->getDbgValueRange())
+ DbgIntrinsics.insert(makeHash(&DPV));
+ } else {
break;
+ }
}
+ // Build DPValue hashes for DPValues attached to the terminator, which isn't
+ // considered in the loop above.
+ for (const DPValue &DPV :
+ OrigPreheader->getTerminator()->getDbgValueRange())
+ DbgIntrinsics.insert(makeHash(&DPV));
+
// Remember the local noalias scope declarations in the header. After the
// rotation, they must be duplicated and the scope must be cloned. This
// avoids unwanted interaction across iterations.
@@ -416,6 +575,29 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
NoAliasDeclInstructions.push_back(Decl);
+ Module *M = OrigHeader->getModule();
+
+ // Track the next DPValue to clone. If we have a sequence where an
+ // instruction is hoisted instead of being cloned:
+ // DPValue blah
+ // %foo = add i32 0, 0
+ // DPValue xyzzy
+ // %bar = call i32 @foobar()
+ // where %foo is hoisted, then the DPValue "blah" will be seen twice, once
+ // attached to %foo, then when %foo his hoisted it will "fall down" onto the
+ // function call:
+ // DPValue blah
+ // DPValue xyzzy
+ // %bar = call i32 @foobar()
+ // causing it to appear attached to the call too.
+ //
+ // To avoid this, cloneDebugInfoFrom takes an optional "start cloning from
+ // here" position to account for this behaviour. We point it at any DPValues
+ // on the next instruction, here labelled xyzzy, before we hoist %foo.
+ // Later, we only only clone DPValues from that position (xyzzy) onwards,
+ // which avoids cloning DPValue "blah" multiple times.
+ std::optional<DPValue::self_iterator> NextDbgInst = std::nullopt;
+
while (I != E) {
Instruction *Inst = &*I++;
@@ -428,7 +610,21 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
!Inst->mayWriteToMemory() && !Inst->isTerminator() &&
!isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
+
+ if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat) {
+ auto DbgValueRange =
+ LoopEntryBranch->cloneDebugInfoFrom(Inst, NextDbgInst);
+ RemapDPValueRange(M, DbgValueRange, ValueMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ // Erase anything we've seen before.
+ for (DPValue &DPV : make_early_inc_range(DbgValueRange))
+ if (DbgIntrinsics.count(makeHash(&DPV)))
+ DPV.eraseFromParent();
+ }
+
+ NextDbgInst = I->getDbgValueRange().begin();
Inst->moveBefore(LoopEntryBranch);
+
++NumInstrsHoisted;
continue;
}
@@ -439,6 +635,17 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
++NumInstrsDuplicated;
+ if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat) {
+ auto Range = C->cloneDebugInfoFrom(Inst, NextDbgInst);
+ RemapDPValueRange(M, Range, ValueMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ NextDbgInst = std::nullopt;
+ // Erase anything we've seen before.
+ for (DPValue &DPV : make_early_inc_range(Range))
+ if (DbgIntrinsics.count(makeHash(&DPV)))
+ DPV.eraseFromParent();
+ }
+
// Eagerly remap the operands of the instruction.
RemapInstruction(C, ValueMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
@@ -501,12 +708,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// as U1'' and U1' scopes will not be compatible wrt to the local restrict
// Clone the llvm.experimental.noalias.decl again for the NewHeader.
- Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI());
+ BasicBlock::iterator NewHeaderInsertionPoint =
+ NewHeader->getFirstNonPHIIt();
for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) {
LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:"
<< *NAD << "\n");
Instruction *NewNAD = NAD->clone();
- NewNAD->insertBefore(NewHeaderInsertionPoint);
+ NewNAD->insertBefore(*NewHeader, NewHeaderInsertionPoint);
}
// Scopes must now be duplicated, once for OrigHeader and once for
@@ -553,6 +761,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// OrigPreHeader's old terminator (the original branch into the loop), and
// remove the corresponding incoming values from the PHI nodes in OrigHeader.
LoopEntryBranch->eraseFromParent();
+ OrigPreheader->flushTerminatorDbgValues();
// Update MemorySSA before the rewrite call below changes the 1:1
// instruction:cloned_instruction_or_value mapping.
@@ -605,9 +814,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// to split as many edges.
BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
assert(PHBI->isConditional() && "Should be clone of BI condbr!");
- if (!isa<ConstantInt>(PHBI->getCondition()) ||
- PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
- NewHeader) {
+ const Value *Cond = PHBI->getCondition();
+ const bool HasConditionalPreHeader =
+ !isa<ConstantInt>(Cond) ||
+ PHBI->getSuccessor(cast<ConstantInt>(Cond)->isZero()) != NewHeader;
+
+ updateBranchWeights(*PHBI, *BI, HasConditionalPreHeader, BISuccsSwapped);
+
+ if (HasConditionalPreHeader) {
// The conditional branch can't be folded, handle the general case.
// Split edges as necessary to preserve LoopSimplify form.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 3e604fdf2e11..07e622b1577f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -429,8 +429,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
}
// Nuke all entries except the zero'th.
- for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
- PN->removeIncomingValue(e-i, false);
+ PN->removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
+ /* DeletePHIIfEmpty */ false);
// Finally, add the newly constructed PHI node as the entry for the BEBlock.
PN->addIncoming(NewPN, BEBlock);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 511dd61308f9..ee6f7b35750a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist_iterator.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -838,7 +837,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
DTUToUse ? nullptr : DT)) {
// Dest has been folded into Fold. Update our worklists accordingly.
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
- llvm::erase_value(UnrolledLoopBlocks, Dest);
+ llvm::erase(UnrolledLoopBlocks, Dest);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index 31b8cd34eb24..3c06a6e47a30 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 1e22eca30d2d..612f69970881 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -56,6 +56,17 @@ static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
"unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
cl::desc("Assume the non latch exit block to be predictable"));
+// Probability that the loop trip count is so small that after the prolog
+// we do not enter the unrolled loop at all.
+// It is unlikely that the loop trip count is smaller than the unroll factor;
+// other than that, the choice of constant is not tuned yet.
+static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127};
+// Probability that the loop trip count is so small that we skip the unrolled
+// loop completely and immediately enter the epilogue loop.
+// It is unlikely that the loop trip count is smaller than the unroll factor;
+// other than that, the choice of constant is not tuned yet.
+static const uint32_t EpilogHeaderWeights[] = {1, 127};
+
/// Connect the unrolling prolog code to the original loop.
/// The unrolling prolog code contains code to execute the
/// 'extra' iterations if the run-time trip count modulo the
@@ -105,8 +116,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// PrologLatch. When supporting multiple-exiting block loops, we can have
// two or more blocks that have the LatchExit as the target in the
// original loop.
- PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
- PrologExit->getFirstNonPHI());
+ PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
+ NewPN->insertBefore(PrologExit->getFirstNonPHIIt());
// Adding a value to the new PHI node from the original loop preheader.
// This is the value that skips all the prolog code.
if (L->contains(&PN)) {
@@ -169,7 +180,14 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
nullptr, PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
- B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
+ MDNode *BranchWeights = nullptr;
+ if (hasBranchWeightMD(*Latch->getTerminator())) {
+ // Assume loop is nearly always entered.
+ MDBuilder MDB(B.getContext());
+ BranchWeights = MDB.createBranchWeights(UnrolledLoopHeaderWeights);
+ }
+ B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader,
+ BranchWeights);
InsertPt->eraseFromParent();
if (DT) {
auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
@@ -194,8 +212,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
BasicBlock *Exit, BasicBlock *PreHeader,
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA,
- ScalarEvolution &SE) {
+ LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
+ unsigned Count) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -269,8 +287,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
for (PHINode &PN : Succ->phis()) {
// Add new PHI nodes to the loop exit block and update epilog
// PHIs with the new PHI values.
- PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
- NewExit->getFirstNonPHI());
+ PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
+ NewPN->insertBefore(NewExit->getFirstNonPHIIt());
// Adding a value to the new PHI node from the unrolling loop preheader.
NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
// Adding a value to the new PHI node from the unrolling loop latch.
@@ -292,7 +310,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolling loop)
- B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
+ MDNode *BranchWeights = nullptr;
+ if (hasBranchWeightMD(*Latch->getTerminator())) {
+ // Assume equal distribution in interval [0, Count).
+ MDBuilder MDB(B.getContext());
+ BranchWeights = MDB.createBranchWeights(1, Count - 1);
+ }
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
InsertPt->eraseFromParent();
if (DT) {
auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
@@ -316,8 +340,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
const bool UnrollRemainder,
BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Preheader,
- std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
- ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
+ std::vector<BasicBlock *> &NewBlocks,
+ LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
+ DominatorTree *DT, LoopInfo *LI, unsigned Count) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@@ -363,14 +388,34 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
IRBuilder<> Builder(LatchBR);
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
- suffix + ".iter",
- FirstLoopBB->getFirstNonPHI());
+ PHINode *NewIdx =
+ PHINode::Create(NewIter->getType(), 2, suffix + ".iter");
+ NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt());
auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
auto *One = ConstantInt::get(NewIdx->getType(), 1);
- Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+ Value *IdxNext =
+ Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
- Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
+ MDNode *BranchWeights = nullptr;
+ if (hasBranchWeightMD(*LatchBR)) {
+ uint32_t ExitWeight;
+ uint32_t BackEdgeWeight;
+ if (Count >= 3) {
+ // Note: We do not enter this loop for zero-remainders. The check
+ // is at the end of the loop. We assume equal distribution between
+ // possible remainders in [1, Count).
+ ExitWeight = 1;
+ BackEdgeWeight = (Count - 2) / 2;
+ } else {
+ // Unnecessary backedge, should never be taken. The conditional
+ // jump should be optimized away later.
+ ExitWeight = 1;
+ BackEdgeWeight = 0;
+ }
+ MDBuilder MDB(Builder.getContext());
+ BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ }
+ Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
NewIdx->addIncoming(Zero, InsertTop);
NewIdx->addIncoming(IdxNext, NewBB);
LatchBR->eraseFromParent();
@@ -464,32 +509,6 @@ static bool canProfitablyUnrollMultiExitLoop(
// know of kinds of multiexit loops that would benefit from unrolling.
}
-// Assign the maximum possible trip count as the back edge weight for the
-// remainder loop if the original loop comes with a branch weight.
-static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
- Loop *RemainderLoop,
- uint64_t UnrollFactor) {
- uint64_t TrueWeight, FalseWeight;
- BranchInst *LatchBR =
- cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
- if (!extractBranchWeights(*LatchBR, TrueWeight, FalseWeight))
- return;
- uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
- ? FalseWeight
- : TrueWeight;
- assert(UnrollFactor > 1);
- uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
- BasicBlock *Header = RemainderLoop->getHeader();
- BasicBlock *Latch = RemainderLoop->getLoopLatch();
- auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
- unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
- MDBuilder MDB(RemainderLatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
- RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
-}
-
/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
/// accounting for the possibility of unsigned overflow in the 2s complement
/// domain. Preconditions:
@@ -775,7 +794,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
// Branch to either remainder (extra iterations) loop or unrolling loop.
- B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
+ MDNode *BranchWeights = nullptr;
+ if (hasBranchWeightMD(*Latch->getTerminator())) {
+ // Assume loop is nearly always entered.
+ MDBuilder MDB(B.getContext());
+ BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);
+ }
+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
PreHeaderBR->eraseFromParent();
if (DT) {
if (UseEpilogRemainder)
@@ -804,12 +829,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
Loop *remainderLoop = CloneLoopBlocks(
L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
- NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
-
- // Assign the maximum possible trip count as the back edge weight for the
- // remainder loop if the original loop comes with a branch weight.
- if (remainderLoop && !UnrollRemainder)
- updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
+ NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
// Insert the cloned blocks into the function.
F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
@@ -893,9 +913,12 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Rewrite the cloned instruction operands to use the values created when the
// clone is created.
for (BasicBlock *BB : NewBlocks) {
+ Module *M = BB->getModule();
for (Instruction &I : *BB) {
RemapInstruction(&I, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ RemapDPValueRange(M, I.getDbgValueRange(), VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
}
}
@@ -903,7 +926,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Connect the epilog code to the original loop and update the
// PHI functions.
ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
- NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
// Update counter in loop for unrolling.
// Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
@@ -912,8 +935,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
IRBuilder<> B2(NewPreHeader->getTerminator());
Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
- Header->getFirstNonPHI());
+ PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter");
+ NewIdx->insertBefore(Header->getFirstNonPHIIt());
B2.SetInsertPoint(LatchBR);
auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
auto *One = ConstantInt::get(NewIdx->getType(), 1);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 7d6662c44f07..59485126b280 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -296,7 +296,7 @@ std::optional<MDNode *> llvm::makeFollowupLoopID(
StringRef AttrName = cast<MDString>(NameMD)->getString();
// Do not inherit excluded attributes.
- return !AttrName.startswith(InheritOptionsExceptPrefix);
+ return !AttrName.starts_with(InheritOptionsExceptPrefix);
};
if (InheritThisAttribute(Op))
@@ -556,12 +556,8 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// Removes all incoming values from all other exiting blocks (including
// duplicate values from an exiting block).
// Nuke all entries except the zero'th entry which is the preheader entry.
- // NOTE! We need to remove Incoming Values in the reverse order as done
- // below, to keep the indices valid for deletion (removeIncomingValues
- // updates getNumIncomingValues and shifts all values down into the
- // operand being deleted).
- for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
- P.removeIncomingValue(e - i, false);
+ P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
+ /* DeletePHIIfEmpty */ false);
assert((P.getNumIncomingValues() == 1 &&
P.getIncomingBlock(PredIndex) == Preheader) &&
@@ -608,6 +604,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// Use a map to unique and a vector to guarantee deterministic ordering.
llvm::SmallDenseSet<DebugVariable, 4> DeadDebugSet;
llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
+ llvm::SmallVector<DPValue *, 4> DeadDPValues;
if (ExitBlock) {
// Given LCSSA form is satisfied, we should not have users of instructions
@@ -632,6 +629,24 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
"Unexpected user in reachable block");
U.set(Poison);
}
+
+ // RemoveDIs: do the same as below for DPValues.
+ if (Block->IsNewDbgInfoFormat) {
+ for (DPValue &DPV :
+ llvm::make_early_inc_range(I.getDbgValueRange())) {
+ DebugVariable Key(DPV.getVariable(), DPV.getExpression(),
+ DPV.getDebugLoc().get());
+ if (!DeadDebugSet.insert(Key).second)
+ continue;
+ // Unlinks the DPV from it's container, for later insertion.
+ DPV.removeFromParent();
+ DeadDPValues.push_back(&DPV);
+ }
+ }
+
+ // For one of each variable encountered, preserve a debug intrinsic (set
+ // to Poison) and transfer it to the loop exit. This terminates any
+ // variable locations that were set during the loop.
auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
if (!DVI)
continue;
@@ -646,12 +661,22 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// be be replaced with undef. Loop invariant values will still be available.
// Move dbg.values out the loop so that earlier location ranges are still
// terminated and loop invariant assignments are preserved.
- Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
- assert(InsertDbgValueBefore &&
+ DIBuilder DIB(*ExitBlock->getModule());
+ BasicBlock::iterator InsertDbgValueBefore =
+ ExitBlock->getFirstInsertionPt();
+ assert(InsertDbgValueBefore != ExitBlock->end() &&
"There should be a non-PHI instruction in exit block, else these "
"instructions will have no parent.");
+
for (auto *DVI : DeadDebugInst)
- DVI->moveBefore(InsertDbgValueBefore);
+ DVI->moveBefore(*ExitBlock, InsertDbgValueBefore);
+
+ // Due to the "head" bit in BasicBlock::iterator, we're going to insert
+ // each DPValue right at the start of the block, wheras dbg.values would be
+ // repeatedly inserted before the first instruction. To replicate this
+ // behaviour, do it backwards.
+ for (DPValue *DPV : llvm::reverse(DeadDPValues))
+ ExitBlock->insertDPValueBefore(DPV, InsertDbgValueBefore);
}
// Remove the block from the reference counting scheme, so that we can
@@ -937,8 +962,8 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
}
}
-Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
- RecurKind RK, Value *Left, Value *Right) {
+Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
+ RecurKind RK, Value *Left, Value *Right) {
if (auto VTy = dyn_cast<VectorType>(Left->getType()))
StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
Value *Cmp =
@@ -1028,14 +1053,12 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}
-Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
- const TargetTransformInfo *TTI,
- Value *Src,
- const RecurrenceDescriptor &Desc,
- PHINode *OrigPhi) {
- assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
- Desc.getRecurrenceKind()) &&
- "Unexpected reduction kind");
+Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
+ const RecurrenceDescriptor &Desc,
+ PHINode *OrigPhi) {
+ assert(
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(Desc.getRecurrenceKind()) &&
+ "Unexpected reduction kind");
Value *InitVal = Desc.getRecurrenceStartValue();
Value *NewVal = nullptr;
@@ -1068,9 +1091,8 @@ Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
}
-Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
- const TargetTransformInfo *TTI,
- Value *Src, RecurKind RdxKind) {
+Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,
+ RecurKind RdxKind) {
auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
switch (RdxKind) {
case RecurKind::Add:
@@ -1111,7 +1133,6 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
}
Value *llvm::createTargetReduction(IRBuilderBase &B,
- const TargetTransformInfo *TTI,
const RecurrenceDescriptor &Desc, Value *Src,
PHINode *OrigPhi) {
// TODO: Support in-order reductions based on the recurrence descriptor.
@@ -1121,10 +1142,10 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,
B.setFastMathFlags(Desc.getFastMathFlags());
RecurKind RK = Desc.getRecurrenceKind();
- if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
- return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
+ return createAnyOfTargetReduction(B, Src, Desc, OrigPhi);
- return createSimpleTargetReduction(B, TTI, Src, RK);
+ return createSimpleTargetReduction(B, Src, RK);
}
Value *llvm::createOrderedReduction(IRBuilderBase &B,
@@ -1453,7 +1474,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
// Note that we must not perform expansions until after
// we query *all* the costs, because if we perform temporary expansion
// inbetween, one that we might not intend to keep, said expansion
- // *may* affect cost calculation of the the next SCEV's we'll query,
+ // *may* affect cost calculation of the next SCEV's we'll query,
// and next SCEV may errneously get smaller cost.
// Collect all the candidate PHINodes to be rewritten.
@@ -1632,42 +1653,92 @@ Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
struct PointerBounds {
TrackingVH<Value> Start;
TrackingVH<Value> End;
+ Value *StrideToCheck;
};
/// Expand code for the lower and upper bound of the pointer group \p CG
/// in \p TheLoop. \return the values for the bounds.
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
Loop *TheLoop, Instruction *Loc,
- SCEVExpander &Exp) {
+ SCEVExpander &Exp, bool HoistRuntimeChecks) {
LLVMContext &Ctx = Loc->getContext();
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace);
+ Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace);
Value *Start = nullptr, *End = nullptr;
LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
- Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
- End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;
+
+ // If the Low and High values are themselves loop-variant, then we may want
+ // to expand the range to include those covered by the outer loop as well.
+ // There is a trade-off here with the advantage being that creating checks
+ // using the expanded range permits the runtime memory checks to be hoisted
+ // out of the outer loop. This reduces the cost of entering the inner loop,
+ // which can be significant for low trip counts. The disadvantage is that
+ // there is a chance we may now never enter the vectorized inner loop,
+ // whereas using a restricted range check could have allowed us to enter at
+ // least once. This is why the behaviour is not currently the default and is
+ // controlled by the parameter 'HoistRuntimeChecks'.
+ if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
+ isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) {
+ auto *HighAR = cast<SCEVAddRecExpr>(High);
+ auto *LowAR = cast<SCEVAddRecExpr>(Low);
+ const Loop *OuterLoop = TheLoop->getParentLoop();
+ const SCEV *Recur = LowAR->getStepRecurrence(*Exp.getSE());
+ if (Recur == HighAR->getStepRecurrence(*Exp.getSE()) &&
+ HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+ const SCEV *OuterExitCount =
+ Exp.getSE()->getExitCount(OuterLoop, OuterLoopLatch);
+ if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&
+ OuterExitCount->getType()->isIntegerTy()) {
+ const SCEV *NewHigh = cast<SCEVAddRecExpr>(High)->evaluateAtIteration(
+ OuterExitCount, *Exp.getSE());
+ if (!isa<SCEVCouldNotCompute>(NewHigh)) {
+ LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
+ "outer loop in order to permit hoisting\n");
+ High = NewHigh;
+ Low = cast<SCEVAddRecExpr>(Low)->getStart();
+ // If there is a possibility that the stride is negative then we have
+ // to generate extra checks to ensure the stride is positive.
+ if (!Exp.getSE()->isKnownNonNegative(Recur)) {
+ Stride = Recur;
+ LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
+ "positive: "
+ << *Stride << '\n');
+ }
+ }
+ }
+ }
+ }
+
+ Start = Exp.expandCodeFor(Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(High, PtrArithTy, Loc);
if (CG->NeedsFreeze) {
IRBuilder<> Builder(Loc);
Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
End = Builder.CreateFreeze(End, End->getName() + ".fr");
}
- LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
- return {Start, End};
+ Value *StrideVal =
+ Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr;
+ LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");
+ return {Start, End, StrideVal};
}
/// Turns a collection of checks into a collection of expanded upper and
/// lower bounds for both pointers in the check.
static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
- Instruction *Loc, SCEVExpander &Exp) {
+ Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) {
SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
// Here we're relying on the SCEV Expander's cache to only emit code for the
// same bounds once.
transform(PointerChecks, std::back_inserter(ChecksWithBounds),
[&](const RuntimePointerCheck &Check) {
- PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
- Second = expandBounds(Check.second, L, Loc, Exp);
+ PointerBounds First = expandBounds(Check.first, L, Loc, Exp,
+ HoistRuntimeChecks),
+ Second = expandBounds(Check.second, L, Loc, Exp,
+ HoistRuntimeChecks);
return std::make_pair(First, Second);
});
@@ -1677,10 +1748,11 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
Value *llvm::addRuntimeChecks(
Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
- SCEVExpander &Exp) {
+ SCEVExpander &Exp, bool HoistRuntimeChecks) {
// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
- auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
+ auto ExpandedChecks =
+ expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);
LLVMContext &Ctx = Loc->getContext();
IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
@@ -1693,21 +1765,13 @@ Value *llvm::addRuntimeChecks(
const PointerBounds &A = Check.first, &B = Check.second;
// Check if two pointers (A and B) conflict where conflict is computed as:
// start(A) <= end(B) && start(B) <= end(A)
- unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
- unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
- assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
- (AS1 == A.End->getType()->getPointerAddressSpace()) &&
+ assert((A.Start->getType()->getPointerAddressSpace() ==
+ B.End->getType()->getPointerAddressSpace()) &&
+ (B.Start->getType()->getPointerAddressSpace() ==
+ A.End->getType()->getPointerAddressSpace()) &&
"Trying to bounds check pointers with different address spaces");
- Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
- Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
-
- Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
-
// [A|B].Start points to the first accessed byte under base [A|B].
// [A|B].End points to the last accessed byte, plus one.
// There is no conflict when the intervals are disjoint:
@@ -1716,9 +1780,21 @@ Value *llvm::addRuntimeChecks(
// bound0 = (B.Start < A.End)
// bound1 = (A.Start < B.End)
// IsConflict = bound0 & bound1
- Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
- Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
+ Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0");
+ Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1");
Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ if (A.StrideToCheck) {
+ Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
+ A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0),
+ "stride.check");
+ IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
+ }
+ if (B.StrideToCheck) {
+ Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
+ B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0),
+ "stride.check");
+ IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
+ }
if (MemoryRuntimeCheck) {
IsConflict =
ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
@@ -1740,23 +1816,31 @@ Value *llvm::addDiffRuntimeChecks(
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
+ auto &SE = *Expander.getSE();
+ // Map to keep track of created compares, The key is the pair of operands for
+ // the compare, to allow detecting and re-using redundant compares.
+ DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
for (const auto &C : Checks) {
Type *Ty = C.SinkStart->getType();
// Compute VF * IC * AccessSize.
auto *VFTimesUFTimesSize =
ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
ConstantInt::get(Ty, IC * C.AccessSize));
- Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc);
- Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc);
- if (C.NeedsFreeze) {
- IRBuilder<> Builder(Loc);
- Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr");
- Src = Builder.CreateFreeze(Src, Src->getName() + ".fr");
- }
- Value *Diff = ChkBuilder.CreateSub(Sink, Src);
- Value *IsConflict =
- ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
+ Value *Diff = Expander.expandCodeFor(
+ SE.getMinusSCEV(C.SinkStart, C.SrcStart), Ty, Loc);
+
+ // Check if the same compare has already been created earlier. In that case,
+ // there is no need to check it again.
+ Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize});
+ if (IsConflict)
+ continue;
+ IsConflict =
+ ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
+ SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict});
+ if (C.NeedsFreeze)
+ IsConflict =
+ ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
if (MemoryRuntimeCheck) {
IsConflict =
ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 78ebe75c121b..548b0f3c55f0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -145,8 +145,8 @@ void LoopVersioning::addPHINodes(
}
// If not create it.
if (!PN) {
- PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
- &PHIBlock->front());
+ PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver");
+ PN->insertBefore(PHIBlock->begin());
SmallVector<User*, 8> UsersToUpdate;
for (User *U : Inst->users())
if (!VersionedLoop->contains(cast<Instruction>(U)->getParent()))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp
index 195c274ff18e..4908535cba54 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp
@@ -128,7 +128,7 @@ static bool runImpl(Module &M) {
// extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
LLVMContext &C = M.getContext();
- PointerType *VoidStar = Type::getInt8PtrTy(C);
+ PointerType *VoidStar = PointerType::getUnqual(C);
Type *AtExitFuncArgs[] = {VoidStar};
FunctionType *AtExitFuncTy =
FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs,
@@ -140,6 +140,17 @@ static bool runImpl(Module &M) {
{PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar},
/*isVarArg=*/false));
+ // If __cxa_atexit is defined (e.g. in the case of LTO) and arg0 is not
+ // actually used (i.e. it's dummy/stub function as used in emscripten when
+ // the program never exits) we can simply return early and clear out
+ // @llvm.global_dtors.
+ if (auto F = dyn_cast<Function>(AtExit.getCallee())) {
+ if (F && F->hasExactDefinition() && F->getArg(0)->getNumUses() == 0) {
+ GV->eraseFromParent();
+ return true;
+ }
+ }
+
// Declare __dso_local.
Type *DsoHandleTy = Type::getInt8Ty(C);
Constant *DsoHandle = M.getOrInsertGlobal("__dso_handle", DsoHandleTy, [&] {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 906eb71fc2d9..c75de8687879 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -64,17 +64,6 @@ void llvm::createMemCpyLoopKnownSize(
IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
- // Cast the Src and Dst pointers to pointers to the loop operand type (if
- // needed).
- PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
- PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
- if (SrcAddr->getType() != SrcOpType) {
- SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
- }
- if (DstAddr->getType() != DstOpType) {
- DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
- }
-
Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
@@ -137,13 +126,9 @@ void llvm::createMemCpyLoopKnownSize(
uint64_t GepIndex = BytesCopied / OperandSize;
assert(GepIndex * OperandSize == BytesCopied &&
"Division should have no Remainder!");
- // Cast source to operand type and load
- PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
- Value *CastedSrc = SrcAddr->getType() == SrcPtrType
- ? SrcAddr
- : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
+
Value *SrcGEP = RBuilder.CreateInBoundsGEP(
- OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ OpTy, SrcAddr, ConstantInt::get(TypeOfCopyLen, GepIndex));
LoadInst *Load =
RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
if (!CanOverlap) {
@@ -151,13 +136,8 @@ void llvm::createMemCpyLoopKnownSize(
Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
}
- // Cast destination to operand type and store.
- PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
- Value *CastedDst = DstAddr->getType() == DstPtrType
- ? DstAddr
- : RBuilder.CreateBitCast(DstAddr, DstPtrType);
Value *DstGEP = RBuilder.CreateInBoundsGEP(
- OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ OpTy, DstAddr, ConstantInt::get(TypeOfCopyLen, GepIndex));
StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
DstIsVolatile);
if (!CanOverlap) {
@@ -206,15 +186,6 @@ void llvm::createMemCpyLoopUnknownSize(
IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
- PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
- PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
- if (SrcAddr->getType() != SrcOpType) {
- SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
- }
- if (DstAddr->getType() != DstOpType) {
- DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
- }
-
// Calculate the loop trip count, and remaining bytes to copy after the loop.
Type *CopyLenType = CopyLen->getType();
IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
@@ -305,13 +276,9 @@ void llvm::createMemCpyLoopUnknownSize(
ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
ResidualIndex->addIncoming(Zero, ResHeaderBB);
- Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast(
- SrcAddr, PointerType::get(ResLoopOpType, SrcAS));
- Value *DstAsResLoopOpType = ResBuilder.CreateBitCast(
- DstAddr, PointerType::get(ResLoopOpType, DstAS));
Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
- Value *SrcGEP = ResBuilder.CreateInBoundsGEP(
- ResLoopOpType, SrcAsResLoopOpType, FullOffset);
+ Value *SrcGEP =
+ ResBuilder.CreateInBoundsGEP(ResLoopOpType, SrcAddr, FullOffset);
LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
PartSrcAlign, SrcIsVolatile);
if (!CanOverlap) {
@@ -319,8 +286,8 @@ void llvm::createMemCpyLoopUnknownSize(
Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
}
- Value *DstGEP = ResBuilder.CreateInBoundsGEP(
- ResLoopOpType, DstAsResLoopOpType, FullOffset);
+ Value *DstGEP =
+ ResBuilder.CreateInBoundsGEP(ResLoopOpType, DstAddr, FullOffset);
StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
DstIsVolatile);
if (!CanOverlap) {
@@ -479,11 +446,6 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
IRBuilder<> Builder(OrigBB->getTerminator());
- // Cast pointer to the type of value getting stored
- unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- DstAddr = Builder.CreateBitCast(DstAddr,
- PointerType::get(SetValue->getType(), dstAS));
-
Builder.CreateCondBr(
Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
LoopBB);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 227de425ff85..4131d36b572d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -42,7 +42,6 @@
#include <cassert>
#include <cstdint>
#include <iterator>
-#include <limits>
#include <vector>
using namespace llvm;
@@ -413,7 +412,8 @@ void ProcessSwitchInst(SwitchInst *SI,
// TODO Shouldn't this create a signed range?
ConstantRange KnownBitsRange =
ConstantRange::fromKnownBits(Known, /*IsSigned=*/false);
- const ConstantRange LVIRange = LVI->getConstantRange(Val, SI);
+ const ConstantRange LVIRange =
+ LVI->getConstantRange(Val, SI, /*UndefAllowed*/ false);
ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange);
// We delegate removal of unreachable non-default cases to other passes. In
// the unlikely event that some of them survived, we just conservatively
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp
index 531b0a624daf..47c6bcbaf26e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp
@@ -321,8 +321,10 @@ void MemoryOpRemark::visitVariable(const Value *V,
bool FoundDI = false;
// Try to get an llvm.dbg.declare, which has a DILocalVariable giving us the
// real debug info name and size of the variable.
- for (const DbgVariableIntrinsic *DVI :
- FindDbgDeclareUses(const_cast<Value *>(V))) {
+ SmallVector<DbgDeclareInst *, 1> DbgDeclares;
+ SmallVector<DPValue *, 1> DPValues;
+ findDbgDeclares(DbgDeclares, const_cast<Value *>(V), &DPValues);
+ auto FindDI = [&](const auto *DVI) {
if (DILocalVariable *DILV = DVI->getVariable()) {
std::optional<uint64_t> DISize = getSizeInBytes(DILV->getSizeInBits());
VariableInfo Var{DILV->getName(), DISize};
@@ -331,7 +333,10 @@ void MemoryOpRemark::visitVariable(const Value *V,
FoundDI = true;
}
}
- }
+ };
+ for_each(DbgDeclares, FindDI);
+ for_each(DPValues, FindDI);
+
if (FoundDI) {
assert(!Result.empty());
return;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 44ac65f265f0..fd0112ae529c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -151,7 +151,7 @@ void MetaRename(Module &M,
auto IsNameExcluded = [](StringRef &Name,
SmallVectorImpl<StringRef> &ExcludedPrefixes) {
return any_of(ExcludedPrefixes,
- [&Name](auto &Prefix) { return Name.startswith(Prefix); });
+ [&Name](auto &Prefix) { return Name.starts_with(Prefix); });
};
// Leave library functions alone because their presence or absence could
@@ -159,7 +159,7 @@ void MetaRename(Module &M,
auto ExcludeLibFuncs = [&](Function &F) {
LibFunc Tmp;
StringRef Name = F.getName();
- return Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ return Name.starts_with("llvm.") || (!Name.empty() && Name[0] == 1) ||
GetTLI(F).getLibFunc(F, Tmp) ||
IsNameExcluded(Name, ExcludedFuncPrefixes);
};
@@ -177,7 +177,7 @@ void MetaRename(Module &M,
// Rename all aliases
for (GlobalAlias &GA : M.aliases()) {
StringRef Name = GA.getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ if (Name.starts_with("llvm.") || (!Name.empty() && Name[0] == 1) ||
IsNameExcluded(Name, ExcludedAliasesPrefixes))
continue;
@@ -187,7 +187,7 @@ void MetaRename(Module &M,
// Rename all global variables
for (GlobalVariable &GV : M.globals()) {
StringRef Name = GV.getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ if (Name.starts_with("llvm.") || (!Name.empty() && Name[0] == 1) ||
IsNameExcluded(Name, ExcludedGlobalsPrefixes))
continue;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 1e243ef74df7..7de0959ca57e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -44,17 +44,17 @@ static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
}
GVCtor->eraseFromParent();
} else {
- EltTy = StructType::get(
- IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
- IRB.getInt8PtrTy());
+ EltTy = StructType::get(IRB.getInt32Ty(),
+ PointerType::get(FnTy, F->getAddressSpace()),
+ IRB.getPtrTy());
}
// Build a 3 field global_ctor entry. We don't take a comdat key.
Constant *CSVals[3];
CSVals[0] = IRB.getInt32(Priority);
CSVals[1] = F;
- CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
- : Constant::getNullValue(IRB.getInt8PtrTy());
+ CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy())
+ : Constant::getNullValue(IRB.getPtrTy());
Constant *RuntimeCtorInit =
ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
@@ -96,7 +96,7 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *>
if (GV)
GV->eraseFromParent();
- Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext());
+ Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
for (auto *V : Values)
Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
@@ -301,7 +301,7 @@ std::string llvm::getUniqueModuleId(Module *M) {
MD5 Md5;
bool ExportsSymbols = false;
auto AddGlobal = [&](GlobalValue &GV) {
- if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+ if (GV.isDeclaration() || GV.getName().starts_with("llvm.") ||
!GV.hasExternalLinkage() || GV.hasComdat())
return;
ExportsSymbols = true;
@@ -346,7 +346,8 @@ void VFABI::setVectorVariantNames(CallInst *CI,
#ifndef NDEBUG
for (const std::string &VariantMapping : VariantMappings) {
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
- std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
+ std::optional<VFInfo> VI =
+ VFABI::tryDemangleForVFABI(VariantMapping, CI->getFunctionType());
assert(VI && "Cannot add an invalid VFABI name.");
assert(M->getNamedValue(VI->VectorName) &&
"Cannot add variant to attribute: "
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MoveAutoInit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
index b0ca0b15c08e..a977ad87b79f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
@@ -14,7 +14,6 @@
#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -50,7 +49,7 @@ static std::optional<MemoryLocation> writeToAlloca(const Instruction &I) {
else if (auto *SI = dyn_cast<StoreInst>(&I))
ML = MemoryLocation::get(SI);
else
- assert(false && "memory location set");
+ return std::nullopt;
if (isa<AllocaInst>(getUnderlyingObject(ML.Ptr)))
return ML;
@@ -202,7 +201,7 @@ static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) {
// if two instructions are moved from the same BB to the same BB, we insert
// the second one in the front, then the first on top of it.
for (auto &Job : reverse(JobList)) {
- Job.first->moveBefore(&*Job.second->getFirstInsertionPt());
+ Job.first->moveBefore(*Job.second, Job.second->getFirstInsertionPt());
MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(),
MemorySSA::InsertionPlace::Beginning);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 1f16ba78bdb0..902977b08d15 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -23,7 +23,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
@@ -33,12 +32,6 @@
using namespace llvm;
using namespace PatternMatch;
-INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
- "PredicateInfo Printer", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
- "PredicateInfo Printer", false, false)
static cl::opt<bool> VerifyPredicateInfo(
"verify-predicateinfo", cl::init(false), cl::Hidden,
cl::desc("Verify PredicateInfo in legacy printer pass."));
@@ -835,20 +828,6 @@ std::optional<PredicateConstraint> PredicateBase::getConstraint() const {
void PredicateInfo::verifyPredicateInfo() const {}
-char PredicateInfoPrinterLegacyPass::ID = 0;
-
-PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass()
- : FunctionPass(ID) {
- initializePredicateInfoPrinterLegacyPassPass(
- *PassRegistry::getPassRegistry());
-}
-
-void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
-}
-
// Replace ssa_copy calls created by PredicateInfo with their operand.
static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
@@ -862,18 +841,6 @@ static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
}
}
-bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
- PredInfo->print(dbgs());
- if (VerifyPredicateInfo)
- PredInfo->verifyPredicateInfo();
-
- replaceCreatedSSACopys(*PredInfo, F);
- return false;
-}
-
PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 2e5f40d39912..717b6d301c8c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugProgramInstruction.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -172,6 +173,7 @@ public:
struct AllocaInfo {
using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>;
+ using DPUserVec = SmallVector<DPValue *, 1>;
SmallVector<BasicBlock *, 32> DefiningBlocks;
SmallVector<BasicBlock *, 32> UsingBlocks;
@@ -182,6 +184,7 @@ struct AllocaInfo {
/// Debug users of the alloca - does not include dbg.assign intrinsics.
DbgUserVec DbgUsers;
+ DPUserVec DPUsers;
/// Helper to update assignment tracking debug info.
AssignmentTrackingInfo AssignmentTracking;
@@ -192,6 +195,7 @@ struct AllocaInfo {
OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
DbgUsers.clear();
+ DPUsers.clear();
AssignmentTracking.clear();
}
@@ -225,7 +229,7 @@ struct AllocaInfo {
}
}
DbgUserVec AllDbgUsers;
- findDbgUsers(AllDbgUsers, AI);
+ findDbgUsers(AllDbgUsers, AI, &DPUsers);
std::copy_if(AllDbgUsers.begin(), AllDbgUsers.end(),
std::back_inserter(DbgUsers), [](DbgVariableIntrinsic *DII) {
return !isa<DbgAssignIntrinsic>(DII);
@@ -329,6 +333,7 @@ struct PromoteMem2Reg {
/// describes it, if any, so that we can convert it to a dbg.value
/// intrinsic if the alloca gets promoted.
SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers;
+ SmallVector<AllocaInfo::DPUserVec, 8> AllocaDPUsers;
/// For each alloca, keep an instance of a helper class that gives us an easy
/// way to update assignment tracking debug info if the alloca is promoted.
@@ -525,14 +530,18 @@ static bool rewriteSingleStoreAlloca(
// Record debuginfo for the store and remove the declaration's
// debuginfo.
- for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
- if (DII->isAddressOfVariable()) {
- ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
- DII->eraseFromParent();
- } else if (DII->getExpression()->startsWithDeref()) {
- DII->eraseFromParent();
+ auto ConvertDebugInfoForStore = [&](auto &Container) {
+ for (auto *DbgItem : Container) {
+ if (DbgItem->isAddressOfVariable()) {
+ ConvertDebugDeclareToDebugValue(DbgItem, Info.OnlyStore, DIB);
+ DbgItem->eraseFromParent();
+ } else if (DbgItem->getExpression()->startsWithDeref()) {
+ DbgItem->eraseFromParent();
+ }
}
- }
+ };
+ ConvertDebugInfoForStore(Info.DbgUsers);
+ ConvertDebugInfoForStore(Info.DPUsers);
// Remove dbg.assigns linked to the alloca as these are now redundant.
at::deleteAssignmentMarkers(AI);
@@ -629,12 +638,18 @@ static bool promoteSingleBlockAlloca(
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Update assignment tracking info for the store we're going to delete.
Info.AssignmentTracking.updateForDeletedStore(SI, DIB, DbgAssignsToDelete);
+
// Record debuginfo for the store before removing it.
- for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
- if (DII->isAddressOfVariable()) {
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ auto DbgUpdateForStore = [&](auto &Container) {
+ for (auto *DbgItem : Container) {
+ if (DbgItem->isAddressOfVariable()) {
+ ConvertDebugDeclareToDebugValue(DbgItem, SI, DIB);
+ }
}
- }
+ };
+ DbgUpdateForStore(Info.DbgUsers);
+ DbgUpdateForStore(Info.DPUsers);
+
SI->eraseFromParent();
LBI.deleteValue(SI);
}
@@ -644,9 +659,14 @@ static bool promoteSingleBlockAlloca(
AI->eraseFromParent();
// The alloca's debuginfo can be removed as well.
- for (DbgVariableIntrinsic *DII : Info.DbgUsers)
- if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
- DII->eraseFromParent();
+ auto DbgUpdateForAlloca = [&](auto &Container) {
+ for (auto *DbgItem : Container)
+ if (DbgItem->isAddressOfVariable() ||
+ DbgItem->getExpression()->startsWithDeref())
+ DbgItem->eraseFromParent();
+ };
+ DbgUpdateForAlloca(Info.DbgUsers);
+ DbgUpdateForAlloca(Info.DPUsers);
++NumLocalPromoted;
return true;
@@ -657,6 +677,7 @@ void PromoteMem2Reg::run() {
AllocaDbgUsers.resize(Allocas.size());
AllocaATInfo.resize(Allocas.size());
+ AllocaDPUsers.resize(Allocas.size());
AllocaInfo Info;
LargeBlockInfo LBI;
@@ -720,6 +741,8 @@ void PromoteMem2Reg::run() {
AllocaDbgUsers[AllocaNum] = Info.DbgUsers;
if (!Info.AssignmentTracking.empty())
AllocaATInfo[AllocaNum] = Info.AssignmentTracking;
+ if (!Info.DPUsers.empty())
+ AllocaDPUsers[AllocaNum] = Info.DPUsers;
// Keep the reverse mapping of the 'Allocas' array for the rename pass.
AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
@@ -795,11 +818,16 @@ void PromoteMem2Reg::run() {
}
// Remove alloca's dbg.declare intrinsics from the function.
- for (auto &DbgUsers : AllocaDbgUsers) {
- for (auto *DII : DbgUsers)
- if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
- DII->eraseFromParent();
- }
+ auto RemoveDbgDeclares = [&](auto &Container) {
+ for (auto &DbgUsers : Container) {
+ for (auto *DbgItem : DbgUsers)
+ if (DbgItem->isAddressOfVariable() ||
+ DbgItem->getExpression()->startsWithDeref())
+ DbgItem->eraseFromParent();
+ }
+ };
+ RemoveDbgDeclares(AllocaDbgUsers);
+ RemoveDbgDeclares(AllocaDPUsers);
// Loop over all of the PHI nodes and see if there are any that we can get
// rid of because they merge all of the same incoming values. This can
@@ -981,8 +1009,8 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
// Create a PhiNode using the dereferenced type... and add the phi-node to the
// BasicBlock.
PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
- Allocas[AllocaNo]->getName() + "." + Twine(Version++),
- &BB->front());
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++));
+ PN->insertBefore(BB->begin());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
return true;
@@ -1041,9 +1069,13 @@ NextIteration:
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
- for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo])
- if (DII->isAddressOfVariable())
- ConvertDebugDeclareToDebugValue(DII, APN, DIB);
+ auto ConvertDbgDeclares = [&](auto &Container) {
+ for (auto *DbgItem : Container)
+ if (DbgItem->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DbgItem, APN, DIB);
+ };
+ ConvertDbgDeclares(AllocaDbgUsers[AllocaNo]);
+ ConvertDbgDeclares(AllocaDPUsers[AllocaNo]);
// Get the next phi node.
++PNI;
@@ -1098,9 +1130,13 @@ NextIteration:
IncomingLocs[AllocaNo] = SI->getDebugLoc();
AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB,
&DbgAssignsToDelete);
- for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second])
- if (DII->isAddressOfVariable())
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ auto ConvertDbgDeclares = [&](auto &Container) {
+ for (auto *DbgItem : Container)
+ if (DbgItem->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DbgItem, SI, DIB);
+ };
+ ConvertDbgDeclares(AllocaDbgUsers[ai->second]);
+ ConvertDbgDeclares(AllocaDPUsers[ai->second]);
SI->eraseFromParent();
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index c9ff94dc9744..ea628d7c3d7d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -153,17 +153,12 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) {
Builder.SetInsertPoint(Load);
Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration(
&M, Intrinsic::load_relative, {Index->getType()});
- Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy());
// Create a call to load.relative intrinsic that computes the target address
// by adding base address (lookup table address) and relative offset.
- Value *Result = Builder.CreateCall(LoadRelIntrinsic, {Base, Offset},
+ Value *Result = Builder.CreateCall(LoadRelIntrinsic, {RelLookupTable, Offset},
"reltable.intrinsic");
- // Create a bitcast instruction if necessary.
- if (Load->getType() != Builder.getInt8PtrTy())
- Result = Builder.CreateBitCast(Result, Load->getType(), "reltable.bitcast");
-
// Replace load instruction with the new generated instruction sequence.
Load->replaceAllUsesWith(Result);
// Remove Load and GEP instructions.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index de3626a24212..ab95698abc43 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -107,9 +107,7 @@ bool SCCPSolver::tryToReplaceWithConstant(Value *V) {
static bool refineInstruction(SCCPSolver &Solver,
const SmallPtrSetImpl<Value *> &InsertedValues,
Instruction &Inst) {
- if (!isa<OverflowingBinaryOperator>(Inst))
- return false;
-
+ bool Changed = false;
auto GetRange = [&Solver, &InsertedValues](Value *Op) {
if (auto *Const = dyn_cast<ConstantInt>(Op))
return ConstantRange(Const->getValue());
@@ -120,23 +118,32 @@ static bool refineInstruction(SCCPSolver &Solver,
return getConstantRange(Solver.getLatticeValueFor(Op), Op->getType(),
/*UndefAllowed=*/false);
};
- auto RangeA = GetRange(Inst.getOperand(0));
- auto RangeB = GetRange(Inst.getOperand(1));
- bool Changed = false;
- if (!Inst.hasNoUnsignedWrap()) {
- auto NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- Instruction::BinaryOps(Inst.getOpcode()), RangeB,
- OverflowingBinaryOperator::NoUnsignedWrap);
- if (NUWRange.contains(RangeA)) {
- Inst.setHasNoUnsignedWrap();
- Changed = true;
+
+ if (isa<OverflowingBinaryOperator>(Inst)) {
+ auto RangeA = GetRange(Inst.getOperand(0));
+ auto RangeB = GetRange(Inst.getOperand(1));
+ if (!Inst.hasNoUnsignedWrap()) {
+ auto NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
+ Instruction::BinaryOps(Inst.getOpcode()), RangeB,
+ OverflowingBinaryOperator::NoUnsignedWrap);
+ if (NUWRange.contains(RangeA)) {
+ Inst.setHasNoUnsignedWrap();
+ Changed = true;
+ }
}
- }
- if (!Inst.hasNoSignedWrap()) {
- auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- Instruction::BinaryOps(Inst.getOpcode()), RangeB, OverflowingBinaryOperator::NoSignedWrap);
- if (NSWRange.contains(RangeA)) {
- Inst.setHasNoSignedWrap();
+ if (!Inst.hasNoSignedWrap()) {
+ auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
+ Instruction::BinaryOps(Inst.getOpcode()), RangeB,
+ OverflowingBinaryOperator::NoSignedWrap);
+ if (NSWRange.contains(RangeA)) {
+ Inst.setHasNoSignedWrap();
+ Changed = true;
+ }
+ }
+ } else if (isa<ZExtInst>(Inst) && !Inst.hasNonNeg()) {
+ auto Range = GetRange(Inst.getOperand(0));
+ if (Range.isAllNonNegative()) {
+ Inst.setNonNeg();
Changed = true;
}
}
@@ -171,6 +178,7 @@ static bool replaceSignedInst(SCCPSolver &Solver,
if (InsertedValues.count(Op0) || !isNonNegative(Op0))
return false;
NewInst = new ZExtInst(Op0, Inst.getType(), "", &Inst);
+ NewInst->setNonNeg();
break;
}
case Instruction::AShr: {
@@ -179,6 +187,7 @@ static bool replaceSignedInst(SCCPSolver &Solver,
if (InsertedValues.count(Op0) || !isNonNegative(Op0))
return false;
NewInst = BinaryOperator::CreateLShr(Op0, Inst.getOperand(1), "", &Inst);
+ NewInst->setIsExact(Inst.isExact());
break;
}
case Instruction::SDiv:
@@ -191,6 +200,8 @@ static bool replaceSignedInst(SCCPSolver &Solver,
auto NewOpcode = Inst.getOpcode() == Instruction::SDiv ? Instruction::UDiv
: Instruction::URem;
NewInst = BinaryOperator::Create(NewOpcode, Op0, Op1, "", &Inst);
+ if (Inst.getOpcode() == Instruction::SDiv)
+ NewInst->setIsExact(Inst.isExact());
break;
}
default:
@@ -1029,8 +1040,9 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
return;
}
- // Unwinding instructions successors are always executable.
- if (TI.isExceptionalTerminator()) {
+ // We cannot analyze special terminators, so consider all successors
+ // executable.
+ if (TI.isSpecialTerminator()) {
Succs.assign(TI.getNumSuccessors(), true);
return;
}
@@ -1098,13 +1110,6 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
return;
}
- // In case of callbr, we pessimistically assume that all successors are
- // feasible.
- if (isa<CallBrInst>(&TI)) {
- Succs.assign(TI.getNumSuccessors(), true);
- return;
- }
-
LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n');
llvm_unreachable("SCCP: Don't know how to handle this terminator!");
}
@@ -1231,10 +1236,12 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
if (Constant *OpC = getConstant(OpSt, I.getOperand(0)->getType())) {
// Fold the constant as we build.
- Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
- markConstant(&I, C);
- } else if (I.getDestTy()->isIntegerTy() &&
- I.getSrcTy()->isIntOrIntVectorTy()) {
+ if (Constant *C =
+ ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL))
+ return (void)markConstant(&I, C);
+ }
+
+ if (I.getDestTy()->isIntegerTy() && I.getSrcTy()->isIntOrIntVectorTy()) {
auto &LV = getValueState(&I);
ConstantRange OpRange = getConstantRange(OpSt, I.getSrcTy());
@@ -1539,11 +1546,8 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
return (void)markOverdefined(&I);
}
- Constant *Ptr = Operands[0];
- auto Indices = ArrayRef(Operands.begin() + 1, Operands.end());
- Constant *C =
- ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
- markConstant(&I, C);
+ if (Constant *C = ConstantFoldInstOperands(&I, Operands, DL))
+ markConstant(&I, C);
}
void SCCPInstVisitor::visitStoreInst(StoreInst &SI) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index ebe9cb27f5ab..fc21fb552137 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -156,8 +156,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
}
// Ok, we have no way out, insert a new one now.
- PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
- ProtoName, &BB->front());
+ PHINode *InsertedPHI =
+ PHINode::Create(ProtoType, PredValues.size(), ProtoName);
+ InsertedPHI->insertBefore(BB->begin());
// Fill in all the predecessors of the PHI.
for (const auto &PredValue : PredValues)
@@ -198,12 +199,18 @@ void SSAUpdater::RewriteUse(Use &U) {
void SSAUpdater::UpdateDebugValues(Instruction *I) {
SmallVector<DbgValueInst *, 4> DbgValues;
- llvm::findDbgValues(DbgValues, I);
+ SmallVector<DPValue *, 4> DPValues;
+ llvm::findDbgValues(DbgValues, I, &DPValues);
for (auto &DbgValue : DbgValues) {
if (DbgValue->getParent() == I->getParent())
continue;
UpdateDebugValue(I, DbgValue);
}
+ for (auto &DPV : DPValues) {
+ if (DPV->getParent() == I->getParent())
+ continue;
+ UpdateDebugValue(I, DPV);
+ }
}
void SSAUpdater::UpdateDebugValues(Instruction *I,
@@ -213,16 +220,31 @@ void SSAUpdater::UpdateDebugValues(Instruction *I,
}
}
+void SSAUpdater::UpdateDebugValues(Instruction *I,
+ SmallVectorImpl<DPValue *> &DPValues) {
+ for (auto &DPV : DPValues) {
+ UpdateDebugValue(I, DPV);
+ }
+}
+
void SSAUpdater::UpdateDebugValue(Instruction *I, DbgValueInst *DbgValue) {
BasicBlock *UserBB = DbgValue->getParent();
if (HasValueForBlock(UserBB)) {
Value *NewVal = GetValueAtEndOfBlock(UserBB);
DbgValue->replaceVariableLocationOp(I, NewVal);
- }
- else
+ } else
DbgValue->setKillLocation();
}
+void SSAUpdater::UpdateDebugValue(Instruction *I, DPValue *DPV) {
+ BasicBlock *UserBB = DPV->getParent();
+ if (HasValueForBlock(UserBB)) {
+ Value *NewVal = GetValueAtEndOfBlock(UserBB);
+ DPV->replaceVariableLocationOp(I, NewVal);
+ } else
+ DPV->setKillLocation();
+}
+
void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
@@ -295,8 +317,9 @@ public:
/// Reserve space for the operands but do not fill them in yet.
static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
SSAUpdater *Updater) {
- PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
- Updater->ProtoName, &BB->front());
+ PHINode *PHI =
+ PHINode::Create(Updater->ProtoType, NumPreds, Updater->ProtoName);
+ PHI->insertBefore(BB->begin());
return PHI;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index 31d62fbf0618..101b70d8def4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -159,7 +159,7 @@ public:
/// Get the total flow from a given source node.
/// Returns a list of pairs (target node, amount of flow to the target).
- const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
+ std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
std::vector<std::pair<uint64_t, int64_t>> Flow;
for (const auto &Edge : Edges[Src]) {
if (Edge.Flow > 0)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SanitizerStats.cpp
index fd21ee4cc408..b80c5a6f9d68 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SanitizerStats.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SanitizerStats.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) {
- StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2);
+ StatTy = ArrayType::get(PointerType::getUnqual(M->getContext()), 2);
EmptyModuleStatsTy = makeModuleStatsTy();
ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false,
@@ -33,28 +33,28 @@ ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() {
}
StructType *SanitizerStatReport::makeModuleStatsTy() {
- return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()),
- Type::getInt32Ty(M->getContext()),
- makeModuleStatsArrayTy()});
+ return StructType::get(M->getContext(),
+ {PointerType::getUnqual(M->getContext()),
+ Type::getInt32Ty(M->getContext()),
+ makeModuleStatsArrayTy()});
}
void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
Function *F = B.GetInsertBlock()->getParent();
Module *M = F->getParent();
- PointerType *Int8PtrTy = B.getInt8PtrTy();
+ PointerType *PtrTy = B.getPtrTy();
IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout());
- ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2);
+ ArrayType *StatTy = ArrayType::get(PtrTy, 2);
Inits.push_back(ConstantArray::get(
StatTy,
- {Constant::getNullValue(Int8PtrTy),
+ {Constant::getNullValue(PtrTy),
ConstantExpr::getIntToPtr(
ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() -
kSanitizerStatKindBits)),
- Int8PtrTy)}));
+ PtrTy)}));
- FunctionType *StatReportTy =
- FunctionType::get(B.getVoidTy(), Int8PtrTy, false);
+ FunctionType *StatReportTy = FunctionType::get(B.getVoidTy(), PtrTy, false);
FunctionCallee StatReport =
M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy);
@@ -64,7 +64,7 @@ void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2),
ConstantInt::get(IntPtrTy, Inits.size() - 1),
});
- B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy));
+ B.CreateCall(StatReport, InitAddr);
}
void SanitizerStatReport::finish() {
@@ -73,7 +73,7 @@ void SanitizerStatReport::finish() {
return;
}
- PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
+ PointerType *Int8PtrTy = PointerType::getUnqual(M->getContext());
IntegerType *Int32Ty = Type::getInt32Ty(M->getContext());
Type *VoidTy = Type::getVoidTy(M->getContext());
@@ -85,8 +85,7 @@ void SanitizerStatReport::finish() {
{Constant::getNullValue(Int8PtrTy),
ConstantInt::get(Int32Ty, Inits.size()),
ConstantArray::get(makeModuleStatsArrayTy(), Inits)}));
- ModuleStatsGV->replaceAllUsesWith(
- ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType()));
+ ModuleStatsGV->replaceAllUsesWith(NewModuleStatsGV);
ModuleStatsGV->eraseFromParent();
// Create a global constructor to register NewModuleStatsGV.
@@ -99,7 +98,7 @@ void SanitizerStatReport::finish() {
FunctionCallee StatInit =
M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy);
- B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy));
+ B.CreateCall(StatInit, NewModuleStatsGV);
B.CreateRetVoid();
appendToGlobalCtors(*M, F, 0);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 20844271b943..cd3ac317cd23 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -170,11 +170,10 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
if (Op == Instruction::IntToPtr) {
auto *PtrTy = cast<PointerType>(Ty);
if (DL.isNonIntegralPointerType(PtrTy)) {
- auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 &&
"alloc size of i8 must by 1 byte for the GEP to be correct");
return Builder.CreateGEP(
- Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "scevgep");
+ Builder.getInt8Ty(), Constant::getNullValue(PtrTy), V, "scevgep");
}
}
// Short-circuit unnecessary bitcasts.
@@ -313,11 +312,11 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
/// loop-invariant portions of expressions, after considering what
/// can be folded using target addressing modes.
///
-Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Type *Ty, Value *V) {
+Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
assert(!isa<Instruction>(V) ||
SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
- Value *Idx = expandCodeForImpl(Offset, Ty);
+ Value *Idx = expand(Offset);
// Fold a GEP with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(V))
@@ -339,7 +338,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Type *Ty, Value *V) {
if (IP->getOpcode() == Instruction::GetElementPtr &&
IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
cast<GEPOperator>(&*IP)->getSourceElementType() ==
- Type::getInt8Ty(Ty->getContext()))
+ Builder.getInt8Ty())
return &*IP;
if (IP == BlockBegin) break;
}
@@ -457,8 +456,6 @@ public:
}
Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
-
// Collect all the add operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal, and
// so that pointer operands are inserted first, which the code below relies on
@@ -498,20 +495,19 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
X = SE.getSCEV(U->getValue());
NewOps.push_back(X);
}
- Sum = expandAddToGEP(SE.getAddExpr(NewOps), Ty, Sum);
+ Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum);
} else if (Op->isNonConstantNegative()) {
// Instead of doing a negate and add, just do a subtract.
- Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty);
- Sum = InsertNoopCastOfTo(Sum, Ty);
+ Value *W = expand(SE.getNegativeSCEV(Op));
Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ true);
++I;
} else {
// A simple add.
- Value *W = expandCodeForImpl(Op, Ty);
- Sum = InsertNoopCastOfTo(Sum, Ty);
+ Value *W = expand(Op);
// Canonicalize a constant to the RHS.
- if (isa<Constant>(Sum)) std::swap(Sum, W);
+ if (isa<Constant>(Sum))
+ std::swap(Sum, W);
Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
/*IsSafeToHoist*/ true);
++I;
@@ -522,7 +518,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
}
Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Type *Ty = S->getType();
// Collect all the mul operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal.
@@ -541,7 +537,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Expand the calculation of X pow N in the following manner:
// Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
// X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
- const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
+ const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
auto E = I;
// Calculate how many times the same operand from the same loop is included
// into this power.
@@ -559,7 +555,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
// that are needed into the result.
- Value *P = expandCodeForImpl(I->second, Ty);
+ Value *P = expand(I->second);
Value *Result = nullptr;
if (Exponent & 1)
Result = P;
@@ -584,14 +580,12 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
Prod = ExpandOpBinPowN();
} else if (I->second->isAllOnesValue()) {
// Instead of doing a multiply by negative one, just do a negate.
- Prod = InsertNoopCastOfTo(Prod, Ty);
Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
++I;
} else {
// A simple mul.
Value *W = ExpandOpBinPowN();
- Prod = InsertNoopCastOfTo(Prod, Ty);
// Canonicalize a constant to the RHS.
if (isa<Constant>(Prod)) std::swap(Prod, W);
const APInt *RHS;
@@ -616,18 +610,16 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
}
Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
-
- Value *LHS = expandCodeForImpl(S->getLHS(), Ty);
+ Value *LHS = expand(S->getLHS());
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
const APInt &RHS = SC->getAPInt();
if (RHS.isPowerOf2())
return InsertBinop(Instruction::LShr, LHS,
- ConstantInt::get(Ty, RHS.logBase2()),
+ ConstantInt::get(SC->getType(), RHS.logBase2()),
SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
}
- Value *RHS = expandCodeForImpl(S->getRHS(), Ty);
+ Value *RHS = expand(S->getRHS());
return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
@@ -803,12 +795,11 @@ bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
/// need to materialize IV increments elsewhere to handle difficult situations.
Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
- Type *ExpandTy, Type *IntTy,
bool useSubtract) {
Value *IncV;
// If the PHI is a pointer, use a GEP, otherwise use an add or sub.
- if (ExpandTy->isPointerTy()) {
- IncV = expandAddToGEP(SE.getSCEV(StepV), IntTy, PN);
+ if (PN->getType()->isPointerTy()) {
+ IncV = expandAddToGEP(SE.getSCEV(StepV), PN);
} else {
IncV = useSubtract ?
Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
@@ -824,12 +815,11 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE,
const SCEVAddRecExpr *Requested,
bool &InvertStep) {
// We can't transform to match a pointer PHI.
- if (Phi->getType()->isPointerTy())
+ Type *PhiTy = Phi->getType();
+ Type *RequestedTy = Requested->getType();
+ if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
return false;
- Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
- Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
-
if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
return false;
@@ -886,12 +876,10 @@ static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
/// values, and return the PHI.
PHINode *
SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
- const Loop *L,
- Type *ExpandTy,
- Type *IntTy,
- Type *&TruncTy,
+ const Loop *L, Type *&TruncTy,
bool &InvertStep) {
- assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+ assert((!IVIncInsertLoop || IVIncInsertPos) &&
+ "Uninitialized insert position");
// Reuse a previously-inserted PHI, if present.
BasicBlock *LatchBlock = L->getLoopLatch();
@@ -962,7 +950,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// later.
AddRecPhiMatch = &PN;
IncV = TempIncV;
- TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
+ TruncTy = Normalized->getType();
}
}
@@ -996,8 +984,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
assert(L->getLoopPreheader() &&
"Can't expand add recurrences without a loop preheader!");
Value *StartV =
- expandCodeForImpl(Normalized->getStart(), ExpandTy,
- L->getLoopPreheader()->getTerminator());
+ expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator());
// StartV must have been be inserted into L's preheader to dominate the new
// phi.
@@ -1008,6 +995,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Expand code for the step value. Do this before creating the PHI so that PHI
// reuse code doesn't see an incomplete PHI.
const SCEV *Step = Normalized->getStepRecurrence(SE);
+ Type *ExpandTy = Normalized->getType();
// If the stride is negative, insert a sub instead of an add for the increment
// (unless it's a constant, because subtracts of constants are canonicalized
// to adds).
@@ -1015,8 +1003,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
// Expand the step somewhere that dominates the loop header.
- Value *StepV = expandCodeForImpl(
- Step, IntTy, &*L->getHeader()->getFirstInsertionPt());
+ Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
// The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
// we actually do emit an addition. It does not apply if we emit a
@@ -1047,7 +1034,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
Instruction *InsertPos = L == IVIncInsertLoop ?
IVIncInsertPos : Pred->getTerminator();
Builder.SetInsertPoint(InsertPos);
- Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+ Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
if (isa<OverflowingBinaryOperator>(IncV)) {
if (IncrementIsNUW)
@@ -1070,8 +1057,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
}
Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
- Type *STy = S->getType();
- Type *IntTy = SE.getEffectiveSCEVType(STy);
const Loop *L = S->getLoop();
// Determine a normalized form of this expression, which is the expression
@@ -1084,51 +1069,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
}
- // Strip off any non-loop-dominating component from the addrec start.
- const SCEV *Start = Normalized->getStart();
- const SCEV *PostLoopOffset = nullptr;
- if (!SE.properlyDominates(Start, L->getHeader())) {
- PostLoopOffset = Start;
- Start = SE.getConstant(Normalized->getType(), 0);
- Normalized = cast<SCEVAddRecExpr>(
- SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
- Normalized->getLoop(),
- Normalized->getNoWrapFlags(SCEV::FlagNW)));
- }
-
- // Strip off any non-loop-dominating component from the addrec step.
+ [[maybe_unused]] const SCEV *Start = Normalized->getStart();
const SCEV *Step = Normalized->getStepRecurrence(SE);
- const SCEV *PostLoopScale = nullptr;
- if (!SE.dominates(Step, L->getHeader())) {
- PostLoopScale = Step;
- Step = SE.getConstant(Normalized->getType(), 1);
- if (!Start->isZero()) {
- // The normalization below assumes that Start is constant zero, so if
- // it isn't re-associate Start to PostLoopOffset.
- assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?");
- PostLoopOffset = Start;
- Start = SE.getConstant(Normalized->getType(), 0);
- }
- Normalized =
- cast<SCEVAddRecExpr>(SE.getAddRecExpr(
- Start, Step, Normalized->getLoop(),
- Normalized->getNoWrapFlags(SCEV::FlagNW)));
- }
-
- // Expand the core addrec. If we need post-loop scaling, force it to
- // expand to an integer type to avoid the need for additional casting.
- Type *ExpandTy = PostLoopScale ? IntTy : STy;
- // We can't use a pointer type for the addrec if the pointer type is
- // non-integral.
- Type *AddRecPHIExpandTy =
- DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy;
+ assert(SE.properlyDominates(Start, L->getHeader()) &&
+ "Start does not properly dominate loop header");
+ assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
// In some cases, we decide to reuse an existing phi node but need to truncate
// it and/or invert the step.
Type *TruncTy = nullptr;
bool InvertStep = false;
- PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy,
- IntTy, TruncTy, InvertStep);
+ PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
// Accommodate post-inc mode, if necessary.
Value *Result;
@@ -1167,59 +1118,29 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// inserting an extra IV increment. StepV might fold into PostLoopOffset,
// but hopefully expandCodeFor handles that.
bool useSubtract =
- !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ !S->getType()->isPointerTy() && Step->isNonConstantNegative();
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
Value *StepV;
{
// Expand the step somewhere that dominates the loop header.
SCEVInsertPointGuard Guard(Builder, this);
- StepV = expandCodeForImpl(
- Step, IntTy, &*L->getHeader()->getFirstInsertionPt());
+ StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
}
- Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+ Result = expandIVInc(PN, StepV, L, useSubtract);
}
}
// We have decided to reuse an induction variable of a dominating loop. Apply
// truncation and/or inversion of the step.
if (TruncTy) {
- Type *ResTy = Result->getType();
- // Normalize the result type.
- if (ResTy != SE.getEffectiveSCEVType(ResTy))
- Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
// Truncate the result.
if (TruncTy != Result->getType())
Result = Builder.CreateTrunc(Result, TruncTy);
// Invert the result.
if (InvertStep)
- Result = Builder.CreateSub(
- expandCodeForImpl(Normalized->getStart(), TruncTy), Result);
- }
-
- // Re-apply any non-loop-dominating scale.
- if (PostLoopScale) {
- assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
- Result = InsertNoopCastOfTo(Result, IntTy);
- Result = Builder.CreateMul(Result,
- expandCodeForImpl(PostLoopScale, IntTy));
- }
-
- // Re-apply any non-loop-dominating offset.
- if (PostLoopOffset) {
- if (isa<PointerType>(ExpandTy)) {
- if (Result->getType()->isIntegerTy()) {
- Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy);
- Result = expandAddToGEP(SE.getUnknown(Result), IntTy, Base);
- } else {
- Result = expandAddToGEP(PostLoopOffset, IntTy, Result);
- }
- } else {
- Result = InsertNoopCastOfTo(Result, IntTy);
- Result = Builder.CreateAdd(
- Result, expandCodeForImpl(PostLoopOffset, IntTy));
- }
+ Result = Builder.CreateSub(expand(Normalized->getStart()), Result);
}
return Result;
@@ -1260,8 +1181,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
S->getNoWrapFlags(SCEV::FlagNW)));
BasicBlock::iterator NewInsertPt =
findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
- V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
- &*NewInsertPt);
+ V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
return V;
}
@@ -1269,7 +1189,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
if (!S->getStart()->isZero()) {
if (isa<PointerType>(S->getType())) {
Value *StartV = expand(SE.getPointerBase(S));
- return expandAddToGEP(SE.removePointerBase(S), Ty, StartV);
+ return expandAddToGEP(SE.removePointerBase(S), StartV);
}
SmallVector<const SCEV *, 4> NewOps(S->operands());
@@ -1292,8 +1212,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// specified loop.
BasicBlock *Header = L->getHeader();
pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
- CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
- &Header->front());
+ CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar");
+ CanonicalIV->insertBefore(Header->begin());
rememberInstruction(CanonicalIV);
SmallSet<BasicBlock *, 4> PredSeen;
@@ -1361,34 +1281,25 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
}
Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
- Value *V =
- expandCodeForImpl(S->getOperand(), S->getOperand()->getType());
+ Value *V = expand(S->getOperand());
return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
GetOptimalInsertionPointForCastOf(V));
}
Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeForImpl(
- S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
- );
- return Builder.CreateTrunc(V, Ty);
+ Value *V = expand(S->getOperand());
+ return Builder.CreateTrunc(V, S->getType());
}
Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeForImpl(
- S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
- );
- return Builder.CreateZExt(V, Ty);
+ Value *V = expand(S->getOperand());
+ return Builder.CreateZExt(V, S->getType(), "",
+ SE.isKnownNonNegative(S->getOperand()));
}
Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeForImpl(
- S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
- );
- return Builder.CreateSExt(V, Ty);
+ Value *V = expand(S->getOperand());
+ return Builder.CreateSExt(V, S->getType());
}
Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
@@ -1399,7 +1310,7 @@ Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
if (IsSequential)
LHS = Builder.CreateFreeze(LHS);
for (int i = S->getNumOperands() - 2; i >= 0; --i) {
- Value *RHS = expandCodeForImpl(S->getOperand(i), Ty);
+ Value *RHS = expand(S->getOperand(i));
if (IsSequential && i != 0)
RHS = Builder.CreateFreeze(RHS);
Value *Sel;
@@ -1440,14 +1351,14 @@ Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
return Builder.CreateVScale(ConstantInt::get(S->getType(), 1));
}
-Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,
- Instruction *IP) {
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
+ BasicBlock::iterator IP) {
setInsertPoint(IP);
- Value *V = expandCodeForImpl(SH, Ty);
+ Value *V = expandCodeFor(SH, Ty);
return V;
}
-Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty) {
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
// Expand the code for this SCEV.
Value *V = expand(SH);
@@ -1459,8 +1370,64 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty) {
return V;
}
-Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
- const Instruction *InsertPt) {
+static bool
+canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I,
+ SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
+ // If the instruction cannot be poison, it's always safe to reuse.
+ if (programUndefinedIfPoison(I))
+ return true;
+
+ // Otherwise, it is possible that I is more poisonous that S. Collect the
+ // poison-contributors of S, and then check whether I has any additional
+ // poison-contributors. Poison that is contributed through poison-generating
+ // flags is handled by dropping those flags instead.
+ SmallPtrSet<const Value *, 8> PoisonVals;
+ SE.getPoisonGeneratingValues(PoisonVals, S);
+
+ SmallVector<Value *> Worklist;
+ SmallPtrSet<Value *, 8> Visited;
+ Worklist.push_back(I);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+
+ // Avoid walking large instruction graphs.
+ if (Visited.size() > 16)
+ return false;
+
+ // Either the value can't be poison, or the S would also be poison if it
+ // is.
+ if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V))
+ continue;
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // FIXME: Ignore vscale, even though it technically could be poison. Do this
+ // because SCEV currently assumes it can't be poison. Remove this special
+ // case once we proper model when vscale can be poison.
+ if (auto *II = dyn_cast<IntrinsicInst>(I);
+ II && II->getIntrinsicID() == Intrinsic::vscale)
+ continue;
+
+ if (canCreatePoison(cast<Operator>(I), /*ConsiderFlagsAndMetadata*/ false))
+ return false;
+
+ // If the instruction can't create poison, we can recurse to its operands.
+ if (I->hasPoisonGeneratingFlagsOrMetadata())
+ DropPoisonGeneratingInsts.push_back(I);
+
+ for (Value *Op : I->operands())
+ Worklist.push_back(Op);
+ }
+ return true;
+}
+
+Value *SCEVExpander::FindValueInExprValueMap(
+ const SCEV *S, const Instruction *InsertPt,
+ SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
// If the expansion is not in CanonicalMode, and the SCEV contains any
// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
if (!CanonicalMode && SE.containsAddRecurrence(S))
@@ -1470,20 +1437,24 @@ Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
if (isa<SCEVConstant>(S))
return nullptr;
- // Choose a Value from the set which dominates the InsertPt.
- // InsertPt should be inside the Value's parent loop so as not to break
- // the LCSSA form.
for (Value *V : SE.getSCEVValues(S)) {
Instruction *EntInst = dyn_cast<Instruction>(V);
if (!EntInst)
continue;
+ // Choose a Value from the set which dominates the InsertPt.
+ // InsertPt should be inside the Value's parent loop so as not to break
+ // the LCSSA form.
assert(EntInst->getFunction() == InsertPt->getFunction());
- if (S->getType() == V->getType() &&
- SE.DT.dominates(EntInst, InsertPt) &&
- (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
- SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) ||
+ !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ continue;
+
+ // Make sure reusing the instruction is poison-safe.
+ if (canReuseInstruction(SE, S, EntInst, DropPoisonGeneratingInsts))
return V;
+ DropPoisonGeneratingInsts.clear();
}
return nullptr;
}
@@ -1497,7 +1468,7 @@ Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
Value *SCEVExpander::expand(const SCEV *S) {
// Compute an insertion point for this SCEV object. Hoist the instructions
// as far out in the loop nest as possible.
- Instruction *InsertPt = &*Builder.GetInsertPoint();
+ BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
// We can move insertion point only if there is no div or rem operations
// otherwise we are risky to move it over the check for zero denominator.
@@ -1521,24 +1492,25 @@ Value *SCEVExpander::expand(const SCEV *S) {
L = L->getParentLoop()) {
if (SE.isLoopInvariant(S, L)) {
if (!L) break;
- if (BasicBlock *Preheader = L->getLoopPreheader())
- InsertPt = Preheader->getTerminator();
- else
+ if (BasicBlock *Preheader = L->getLoopPreheader()) {
+ InsertPt = Preheader->getTerminator()->getIterator();
+ } else {
// LSR sets the insertion point for AddRec start/step values to the
// block start to simplify value reuse, even though it's an invalid
// position. SCEVExpander must correct for this in all cases.
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ InsertPt = L->getHeader()->getFirstInsertionPt();
+ }
} else {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ InsertPt = L->getHeader()->getFirstInsertionPt();
- while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
- (isInsertedInstruction(InsertPt) ||
- isa<DbgInfoIntrinsic>(InsertPt))) {
- InsertPt = &*std::next(InsertPt->getIterator());
+ while (InsertPt != Builder.GetInsertPoint() &&
+ (isInsertedInstruction(&*InsertPt) ||
+ isa<DbgInfoIntrinsic>(&*InsertPt))) {
+ InsertPt = std::next(InsertPt);
}
break;
}
@@ -1546,26 +1518,40 @@ Value *SCEVExpander::expand(const SCEV *S) {
}
// Check to see if we already expanded this here.
- auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
+ auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt));
if (I != InsertedExpressions.end())
return I->second;
SCEVInsertPointGuard Guard(Builder, this);
- Builder.SetInsertPoint(InsertPt);
+ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
// Expand the expression into instructions.
- Value *V = FindValueInExprValueMap(S, InsertPt);
+ SmallVector<Instruction *> DropPoisonGeneratingInsts;
+ Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts);
if (!V) {
V = visit(S);
V = fixupLCSSAFormFor(V);
} else {
- // If we're reusing an existing instruction, we are effectively CSEing two
- // copies of the instruction (with potentially different flags). As such,
- // we need to drop any poison generating flags unless we can prove that
- // said flags must be valid for all new users.
- if (auto *I = dyn_cast<Instruction>(V))
- if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))
- I->dropPoisonGeneratingFlags();
+ for (Instruction *I : DropPoisonGeneratingInsts) {
+ I->dropPoisonGeneratingFlagsAndMetadata();
+ // See if we can re-infer from first principles any of the flags we just
+ // dropped.
+ if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
+ if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
+ auto *BO = cast<BinaryOperator>(I);
+ BO->setHasNoUnsignedWrap(
+ ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW);
+ BO->setHasNoSignedWrap(
+ ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW);
+ }
+ if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
+ auto *Src = NNI->getOperand(0);
+ if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
+ Constant::getNullValue(Src->getType()), I,
+ DL).value_or(false))
+ NNI->setNonNeg(true);
+ }
+ }
}
// Remember the expanded value for this SCEV at this location.
//
@@ -1573,7 +1559,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
// the expression at this insertion point. If the mapped value happened to be
// a postinc expansion, it could be reused by a non-postinc user, but only if
// its insertion point was already at the head of the loop.
- InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+ InsertedExpressions[std::make_pair(S, &*InsertPt)] = V;
return V;
}
@@ -1710,13 +1696,13 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
<< *IsomorphicInc << '\n');
Value *NewInc = OrigInc;
if (OrigInc->getType() != IsomorphicInc->getType()) {
- Instruction *IP = nullptr;
+ BasicBlock::iterator IP;
if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
- IP = &*PN->getParent()->getFirstInsertionPt();
+ IP = PN->getParent()->getFirstInsertionPt();
else
- IP = OrigInc->getNextNode();
+ IP = OrigInc->getNextNonDebugInstruction()->getIterator();
- IRBuilder<> Builder(IP);
+ IRBuilder<> Builder(IP->getParent(), IP);
Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
NewInc = Builder.CreateTruncOrBitCast(
OrigInc, IsomorphicInc->getType(), IVName);
@@ -1734,7 +1720,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
++NumElim;
Value *NewIV = OrigPhiRef;
if (OrigPhiRef->getType() != Phi->getType()) {
- IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
+ IRBuilder<> Builder(L->getHeader(),
+ L->getHeader()->getFirstInsertionPt());
Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
}
@@ -1744,9 +1731,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
-Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S,
- const Instruction *At,
- Loop *L) {
+bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
+ const Instruction *At,
+ Loop *L) {
using namespace llvm::PatternMatch;
SmallVector<BasicBlock *, 4> ExitingBlocks;
@@ -1763,17 +1750,18 @@ Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S,
continue;
if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
- return LHS;
+ return true;
if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
- return RHS;
+ return true;
}
// Use expand's logic which is used for reusing a previous Value in
// ExprValueMap. Note that we don't currently model the cost of
// needing to drop poison generating flags on the instruction if we
// want to reuse it. We effectively assume that has zero cost.
- return FindValueInExprValueMap(S, At);
+ SmallVector<Instruction *> DropPoisonGeneratingInsts;
+ return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr;
}
template<typename T> static InstructionCost costAndCollectOperands(
@@ -1951,7 +1939,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
// If we can find an existing value for this scev available at the point "At"
// then consider the expression cheap.
- if (getRelatedExistingExpansion(S, &At, L))
+ if (hasRelatedExistingExpansion(S, &At, L))
return false; // Consider the expression to be free.
TargetTransformInfo::TargetCostKind CostKind =
@@ -1993,7 +1981,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
// At the beginning of this function we already tried to find existing
// value for plain 'S'. Now try to lookup 'S + 1' since it is common
// pattern involving division. This is just a simple search heuristic.
- if (getRelatedExistingExpansion(
+ if (hasRelatedExistingExpansion(
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
return false; // Consider it to be free.
@@ -2045,10 +2033,8 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
Instruction *IP) {
- Value *Expr0 =
- expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP);
- Value *Expr1 =
- expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+ Value *Expr0 = expand(Pred->getLHS(), IP);
+ Value *Expr1 = expand(Pred->getRHS(), IP);
Builder.SetInsertPoint(IP);
auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
@@ -2080,17 +2066,15 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// Step >= 0, Start + |Step| * Backedge > Start
// and |Step| * Backedge doesn't unsigned overflow.
- IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits);
Builder.SetInsertPoint(Loc);
- Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc);
+ Value *TripCountVal = expand(ExitCount, Loc);
IntegerType *Ty =
IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
- Value *StepValue = expandCodeForImpl(Step, Ty, Loc);
- Value *NegStepValue =
- expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc);
- Value *StartValue = expandCodeForImpl(Start, ARTy, Loc);
+ Value *StepValue = expand(Step, Loc);
+ Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc);
+ Value *StartValue = expand(Start, Loc);
ConstantInt *Zero =
ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
@@ -2136,9 +2120,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
bool NeedPosCheck = !SE.isKnownNegative(Step);
bool NeedNegCheck = !SE.isKnownPositive(Step);
- if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
- StartValue = InsertNoopCastOfTo(
- StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
+ if (isa<PointerType>(ARTy)) {
Value *NegMulV = Builder.CreateNeg(MulV);
if (NeedPosCheck)
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
@@ -2171,7 +2153,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// If the backedge taken count type is larger than the AR type,
// check that we don't drop any bits by truncating it. If we are
// dropping bits, then we have overflow (unless the step is zero).
- if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) {
+ if (SrcBits > DstBits) {
auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
auto *BackedgeCheck =
Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
@@ -2244,7 +2226,7 @@ Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
// instruction.
Type *ToTy;
if (DefI->getType()->isIntegerTy())
- ToTy = DefI->getType()->getPointerTo();
+ ToTy = PointerType::get(DefI->getContext(), 0);
else
ToTy = Type::getInt32Ty(DefI->getContext());
Instruction *User =
@@ -2306,12 +2288,6 @@ struct SCEVFindUnsafe {
}
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- const SCEV *Step = AR->getStepRecurrence(SE);
- if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
- IsUnsafe = true;
- return false;
- }
-
// For non-affine addrecs or in non-canonical mode we need a preheader
// to insert into.
if (!AR->getLoop()->getLoopPreheader() &&
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index bd7ab7c98781..89494a7f6497 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
@@ -271,7 +270,10 @@ class SimplifyCFGOpt {
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
IRBuilder<> &Builder);
- bool HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly);
+ bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
+ bool hoistSuccIdenticalTerminatorToSwitchOrIf(
+ Instruction *TI, Instruction *I1,
+ SmallVectorImpl<Instruction *> &OtherSuccTIs);
bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
BasicBlock *TrueBB, BasicBlock *FalseBB,
@@ -499,7 +501,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
return CI;
else
return cast<ConstantInt>(
- ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+ ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
}
return nullptr;
}
@@ -819,7 +821,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
static void
EliminateBlockCases(BasicBlock *BB,
std::vector<ValueEqualityComparisonCase> &Cases) {
- llvm::erase_value(Cases, BB);
+ llvm::erase(Cases, BB);
}
/// Return true if there are any keys in C1 that exist in C2 as well.
@@ -1098,12 +1100,13 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// Note that there may be multiple predecessor blocks, so we cannot move
// bonus instructions to a predecessor block.
for (Instruction &BonusInst : *BB) {
- if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator())
+ if (BonusInst.isTerminator())
continue;
Instruction *NewBonusInst = BonusInst.clone();
- if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
+ if (!isa<DbgInfoIntrinsic>(BonusInst) &&
+ PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
// Unless the instruction has the same !dbg location as the original
// branch, drop it. When we fold the bonus instructions we want to make
// sure we reset their debug locations in order to avoid stepping on
@@ -1113,7 +1116,6 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
RemapInstruction(NewBonusInst, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- VMap[&BonusInst] = NewBonusInst;
// If we speculated an instruction, we need to drop any metadata that may
// result in undefined behavior, as the metadata might have been valid
@@ -1123,8 +1125,16 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
NewBonusInst->dropUBImplyingAttrsAndMetadata();
NewBonusInst->insertInto(PredBlock, PTI->getIterator());
+ auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
+ RemapDPValueRange(NewBonusInst->getModule(), Range, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ if (isa<DbgInfoIntrinsic>(BonusInst))
+ continue;
+
NewBonusInst->takeName(&BonusInst);
BonusInst.setName(NewBonusInst->getName() + ".old");
+ VMap[&BonusInst] = NewBonusInst;
// Update (liveout) uses of bonus instructions,
// now that the bonus instruction has been cloned into predecessor.
@@ -1303,7 +1313,7 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
}
for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
NewSuccessors) {
- for (auto I : seq(0, NewSuccessor.second)) {
+ for (auto I : seq(NewSuccessor.second)) {
(void)I;
AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
}
@@ -1408,8 +1418,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
}
// If we would need to insert a select that uses the value of this invoke
-// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
-// can't hoist the invoke, as there is nowhere to put the select in this case.
+// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
+// need to do this), we can't hoist the invoke, as there is nowhere to put the
+// select in this case.
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
Instruction *I1, Instruction *I2) {
for (BasicBlock *Succ : successors(BB1)) {
@@ -1424,9 +1435,9 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
return true;
}
-// Get interesting characteristics of instructions that `HoistThenElseCodeToIf`
-// didn't hoist. They restrict what kind of instructions can be reordered
-// across.
+// Get interesting characteristics of instructions that
+// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
+// instructions can be reordered across.
enum SkipFlags {
SkipReadMem = 1,
SkipSideEffect = 2,
@@ -1484,7 +1495,7 @@ static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
-/// Helper function for HoistThenElseCodeToIf. Return true if identical
+/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
/// instructions \p I1 and \p I2 can and should be hoisted.
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
const TargetTransformInfo &TTI) {
@@ -1515,62 +1526,51 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
return true;
}
-/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
-/// in the two blocks up into the branch block. The caller of this function
-/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
-/// only perform hoisting in case both blocks only contain a terminator. In that
-/// case, only the original BI will be replaced and selects for PHIs are added.
-bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) {
+/// Hoist any common code in the successor blocks up into the block. This
+/// function guarantees that BB dominates all successors. If EqTermsOnly is
+/// given, only perform hoisting in case both blocks only contain a terminator.
+/// In that case, only the original BI will be replaced and selects for PHIs are
+/// added.
+bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
+ bool EqTermsOnly) {
// This does very trivial matching, with limited scanning, to find identical
- // instructions in the two blocks. In particular, we don't want to get into
- // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
+ // instructions in the two blocks. In particular, we don't want to get into
+ // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
// such, we currently just scan for obviously identical instructions in an
// identical order, possibly separated by the same number of non-identical
// instructions.
- BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
- BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+ unsigned int SuccSize = succ_size(BB);
+ if (SuccSize < 2)
+ return false;
// If either of the blocks has it's address taken, then we can't do this fold,
// because the code we'd hoist would no longer run when we jump into the block
// by it's address.
- if (BB1->hasAddressTaken() || BB2->hasAddressTaken())
- return false;
+ for (auto *Succ : successors(BB))
+ if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
+ return false;
- BasicBlock::iterator BB1_Itr = BB1->begin();
- BasicBlock::iterator BB2_Itr = BB2->begin();
+ auto *TI = BB->getTerminator();
- Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
- // Skip debug info if it is not identical.
- DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
- DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
- if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
- while (isa<DbgInfoIntrinsic>(I1))
- I1 = &*BB1_Itr++;
- while (isa<DbgInfoIntrinsic>(I2))
- I2 = &*BB2_Itr++;
+ // The second of pair is a SkipFlags bitmask.
+ using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
+ SmallVector<SuccIterPair, 8> SuccIterPairs;
+ for (auto *Succ : successors(BB)) {
+ BasicBlock::iterator SuccItr = Succ->begin();
+ if (isa<PHINode>(*SuccItr))
+ return false;
+ SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
}
- if (isa<PHINode>(I1))
- return false;
-
- BasicBlock *BIParent = BI->getParent();
-
- bool Changed = false;
-
- auto _ = make_scope_exit([&]() {
- if (Changed)
- ++NumHoistCommonCode;
- });
// Check if only hoisting terminators is allowed. This does not add new
// instructions to the hoist location.
if (EqTermsOnly) {
// Skip any debug intrinsics, as they are free to hoist.
- auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator());
- auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator());
- if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg))
- return false;
- if (!I1NonDbg->isTerminator())
- return false;
+ for (auto &SuccIter : make_first_range(SuccIterPairs)) {
+ auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
+ if (!INonDbg->isTerminator())
+ return false;
+ }
// Now we know that we only need to hoist debug intrinsics and the
// terminator. Let the loop below handle those 2 cases.
}
@@ -1579,153 +1579,235 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) {
// many instructions we skip, serving as a compilation time control as well as
// preventing excessive increase of life ranges.
unsigned NumSkipped = 0;
+ // If we find an unreachable instruction at the beginning of a basic block, we
+ // can still hoist instructions from the rest of the basic blocks.
+ if (SuccIterPairs.size() > 2) {
+ erase_if(SuccIterPairs,
+ [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
+ if (SuccIterPairs.size() < 2)
+ return false;
+ }
- // Record any skipped instuctions that may read memory, write memory or have
- // side effects, or have implicit control flow.
- unsigned SkipFlagsBB1 = 0;
- unsigned SkipFlagsBB2 = 0;
+ bool Changed = false;
for (;;) {
+ auto *SuccIterPairBegin = SuccIterPairs.begin();
+ auto &BB1ItrPair = *SuccIterPairBegin++;
+ auto OtherSuccIterPairRange =
+ iterator_range(SuccIterPairBegin, SuccIterPairs.end());
+ auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
+
+ Instruction *I1 = &*BB1ItrPair.first;
+ auto *BB1 = I1->getParent();
+
+ // Skip debug info if it is not identical.
+ bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
+ Instruction *I2 = &*Iter;
+ return I1->isIdenticalToWhenDefined(I2);
+ });
+ if (!AllDbgInstsAreIdentical) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = &*++BB1ItrPair.first;
+ for (auto &SuccIter : OtherSuccIterRange) {
+ Instruction *I2 = &*SuccIter;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = &*++SuccIter;
+ }
+ }
+
+ bool AllInstsAreIdentical = true;
+ bool HasTerminator = I1->isTerminator();
+ for (auto &SuccIter : OtherSuccIterRange) {
+ Instruction *I2 = &*SuccIter;
+ HasTerminator |= I2->isTerminator();
+ if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2))
+ AllInstsAreIdentical = false;
+ }
+
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
- if (I1->isTerminator() || I2->isTerminator()) {
+ if (HasTerminator) {
+ // Even if BB, which contains only one unreachable instruction, is ignored
+ // at the beginning of the loop, we can hoist the terminator instruction.
// If any instructions remain in the block, we cannot hoist terminators.
- if (NumSkipped || !I1->isIdenticalToWhenDefined(I2))
+ if (NumSkipped || !AllInstsAreIdentical)
return Changed;
- goto HoistTerminator;
+ SmallVector<Instruction *, 8> Insts;
+ for (auto &SuccIter : OtherSuccIterRange)
+ Insts.push_back(&*SuccIter);
+ return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed;
}
- if (I1->isIdenticalToWhenDefined(I2) &&
- // Even if the instructions are identical, it may not be safe to hoist
- // them if we have skipped over instructions with side effects or their
- // operands weren't hoisted.
- isSafeToHoistInstr(I1, SkipFlagsBB1) &&
- isSafeToHoistInstr(I2, SkipFlagsBB2) &&
- shouldHoistCommonInstructions(I1, I2, TTI)) {
- if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
- assert(isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
+ if (AllInstsAreIdentical) {
+ unsigned SkipFlagsBB1 = BB1ItrPair.second;
+ AllInstsAreIdentical =
+ isSafeToHoistInstr(I1, SkipFlagsBB1) &&
+ all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
+ Instruction *I2 = &*Pair.first;
+ unsigned SkipFlagsBB2 = Pair.second;
+ // Even if the instructions are identical, it may not
+ // be safe to hoist them if we have skipped over
+ // instructions with side effects or their operands
+ // weren't hoisted.
+ return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
+ shouldHoistCommonInstructions(I1, I2, TTI);
+ });
+ }
+
+ if (AllInstsAreIdentical) {
+ BB1ItrPair.first++;
+ if (isa<DbgInfoIntrinsic>(I1)) {
// The debug location is an integral part of a debug info intrinsic
// and can't be separated from it or replaced. Instead of attempting
// to merge locations, simply hoist both copies of the intrinsic.
- BIParent->splice(BI->getIterator(), BB1, I1->getIterator());
- BIParent->splice(BI->getIterator(), BB2, I2->getIterator());
+ I1->moveBeforePreserving(TI);
+ for (auto &SuccIter : OtherSuccIterRange) {
+ auto *I2 = &*SuccIter++;
+ assert(isa<DbgInfoIntrinsic>(I2));
+ I2->moveBeforePreserving(TI);
+ }
} else {
// For a normal instruction, we just move one to right before the
// branch, then replace all uses of the other with the first. Finally,
// we remove the now redundant second instruction.
- BIParent->splice(BI->getIterator(), BB1, I1->getIterator());
- if (!I2->use_empty())
- I2->replaceAllUsesWith(I1);
- I1->andIRFlags(I2);
- combineMetadataForCSE(I1, I2, true);
-
- // I1 and I2 are being combined into a single instruction. Its debug
- // location is the merged locations of the original instructions.
- I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
-
- I2->eraseFromParent();
+ I1->moveBeforePreserving(TI);
+ BB->splice(TI->getIterator(), BB1, I1->getIterator());
+ for (auto &SuccIter : OtherSuccIterRange) {
+ Instruction *I2 = &*SuccIter++;
+ assert(I2 != I1);
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->andIRFlags(I2);
+ combineMetadataForCSE(I1, I2, true);
+ // I1 and I2 are being combined into a single instruction. Its debug
+ // location is the merged locations of the original instructions.
+ I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+ I2->eraseFromParent();
+ }
}
+ if (!Changed)
+ NumHoistCommonCode += SuccIterPairs.size();
Changed = true;
- ++NumHoistCommonInstrs;
+ NumHoistCommonInstrs += SuccIterPairs.size();
} else {
if (NumSkipped >= HoistCommonSkipLimit)
return Changed;
// We are about to skip over a pair of non-identical instructions. Record
// if any have characteristics that would prevent reordering instructions
// across them.
- SkipFlagsBB1 |= skippedInstrFlags(I1);
- SkipFlagsBB2 |= skippedInstrFlags(I2);
+ for (auto &SuccIterPair : SuccIterPairs) {
+ Instruction *I = &*SuccIterPair.first++;
+ SuccIterPair.second |= skippedInstrFlags(I);
+ }
++NumSkipped;
}
-
- I1 = &*BB1_Itr++;
- I2 = &*BB2_Itr++;
- // Skip debug info if it is not identical.
- DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
- DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
- if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
- while (isa<DbgInfoIntrinsic>(I1))
- I1 = &*BB1_Itr++;
- while (isa<DbgInfoIntrinsic>(I2))
- I2 = &*BB2_Itr++;
- }
}
+}
- return Changed;
+bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
+ Instruction *TI, Instruction *I1,
+ SmallVectorImpl<Instruction *> &OtherSuccTIs) {
-HoistTerminator:
- // It may not be possible to hoist an invoke.
+ auto *BI = dyn_cast<BranchInst>(TI);
+
+ bool Changed = false;
+ BasicBlock *TIParent = TI->getParent();
+ BasicBlock *BB1 = I1->getParent();
+
+ // Use only for an if statement.
+ auto *I2 = *OtherSuccTIs.begin();
+ auto *BB2 = I2->getParent();
+ if (BI) {
+ assert(OtherSuccTIs.size() == 1);
+ assert(BI->getSuccessor(0) == I1->getParent());
+ assert(BI->getSuccessor(1) == I2->getParent());
+ }
+
+ // In the case of an if statement, we try to hoist an invoke.
// FIXME: Can we define a safety predicate for CallBr?
- if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
- return Changed;
+ // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
+ // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
+ if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+ return false;
// TODO: callbr hoisting currently disabled pending further study.
if (isa<CallBrInst>(I1))
- return Changed;
+ return false;
for (BasicBlock *Succ : successors(BB1)) {
for (PHINode &PN : Succ->phis()) {
Value *BB1V = PN.getIncomingValueForBlock(BB1);
- Value *BB2V = PN.getIncomingValueForBlock(BB2);
- if (BB1V == BB2V)
- continue;
+ for (Instruction *OtherSuccTI : OtherSuccTIs) {
+ Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
+ if (BB1V == BB2V)
+ continue;
- // Check for passingValueIsAlwaysUndefined here because we would rather
- // eliminate undefined control flow then converting it to a select.
- if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
- passingValueIsAlwaysUndefined(BB2V, &PN))
- return Changed;
+ // In the case of an if statement, check for
+ // passingValueIsAlwaysUndefined here because we would rather eliminate
+ // undefined control flow then converting it to a select.
+ if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
+ passingValueIsAlwaysUndefined(BB2V, &PN))
+ return false;
+ }
}
}
// Okay, it is safe to hoist the terminator.
Instruction *NT = I1->clone();
- NT->insertInto(BIParent, BI->getIterator());
+ NT->insertInto(TIParent, TI->getIterator());
if (!NT->getType()->isVoidTy()) {
I1->replaceAllUsesWith(NT);
- I2->replaceAllUsesWith(NT);
+ for (Instruction *OtherSuccTI : OtherSuccTIs)
+ OtherSuccTI->replaceAllUsesWith(NT);
NT->takeName(I1);
}
Changed = true;
- ++NumHoistCommonInstrs;
+ NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
// Ensure terminator gets a debug location, even an unknown one, in case
// it involves inlinable calls.
- NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+ SmallVector<DILocation *, 4> Locs;
+ Locs.push_back(I1->getDebugLoc());
+ for (auto *OtherSuccTI : OtherSuccTIs)
+ Locs.push_back(OtherSuccTI->getDebugLoc());
+ NT->setDebugLoc(DILocation::getMergedLocations(Locs));
// PHIs created below will adopt NT's merged DebugLoc.
IRBuilder<NoFolder> Builder(NT);
- // Hoisting one of the terminators from our successor is a great thing.
- // Unfortunately, the successors of the if/else blocks may have PHI nodes in
- // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
- // nodes, so we insert select instruction to compute the final result.
- std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
- for (BasicBlock *Succ : successors(BB1)) {
- for (PHINode &PN : Succ->phis()) {
- Value *BB1V = PN.getIncomingValueForBlock(BB1);
- Value *BB2V = PN.getIncomingValueForBlock(BB2);
- if (BB1V == BB2V)
- continue;
+ // In the case of an if statement, hoisting one of the terminators from our
+ // successor is a great thing. Unfortunately, the successors of the if/else
+ // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
+ // must agree for all PHI nodes, so we insert select instruction to compute
+ // the final result.
+ if (BI) {
+ std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
+ for (BasicBlock *Succ : successors(BB1)) {
+ for (PHINode &PN : Succ->phis()) {
+ Value *BB1V = PN.getIncomingValueForBlock(BB1);
+ Value *BB2V = PN.getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
- // These values do not agree. Insert a select instruction before NT
- // that determines the right value.
- SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (!SI) {
- // Propagate fast-math-flags from phi node to its replacement select.
- IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
- if (isa<FPMathOperator>(PN))
- Builder.setFastMathFlags(PN.getFastMathFlags());
-
- SI = cast<SelectInst>(
- Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
- BB1V->getName() + "." + BB2V->getName(), BI));
- }
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (!SI) {
+ // Propagate fast-math-flags from phi node to its replacement select.
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ if (isa<FPMathOperator>(PN))
+ Builder.setFastMathFlags(PN.getFastMathFlags());
+
+ SI = cast<SelectInst>(Builder.CreateSelect(
+ BI->getCondition(), BB1V, BB2V,
+ BB1V->getName() + "." + BB2V->getName(), BI));
+ }
- // Make the PHI node use the select for all incoming values for BB1/BB2
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
- if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
- PN.setIncomingValue(i, SI);
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
+ PN.setIncomingValue(i, SI);
+ }
}
}
@@ -1733,16 +1815,16 @@ HoistTerminator:
// Update any PHI nodes in our new successors.
for (BasicBlock *Succ : successors(BB1)) {
- AddPredecessorToBlock(Succ, BIParent, BB1);
+ AddPredecessorToBlock(Succ, TIParent, BB1);
if (DTU)
- Updates.push_back({DominatorTree::Insert, BIParent, Succ});
+ Updates.push_back({DominatorTree::Insert, TIParent, Succ});
}
if (DTU)
- for (BasicBlock *Succ : successors(BI))
- Updates.push_back({DominatorTree::Delete, BIParent, Succ});
+ for (BasicBlock *Succ : successors(TI))
+ Updates.push_back({DominatorTree::Delete, TIParent, Succ});
- EraseTerminatorAndDCECond(BI);
+ EraseTerminatorAndDCECond(TI);
if (DTU)
DTU->applyUpdates(Updates);
return Changed;
@@ -1808,10 +1890,19 @@ static bool canSinkInstructions(
}
const Instruction *I0 = Insts.front();
- for (auto *I : Insts)
+ for (auto *I : Insts) {
if (!I->isSameOperationAs(I0))
return false;
+ // swifterror pointers can only be used by a load or store; sinking a load
+ // or store would require introducing a select for the pointer operand,
+ // which isn't allowed for swifterror pointers.
+ if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
+ return false;
+ if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
+ return false;
+ }
+
// All instructions in Insts are known to be the same opcode. If they have a
// use, check that the only user is a PHI or in the same block as the
// instruction, because if a user is in the same block as an instruction we're
@@ -1952,8 +2043,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
// Create a new PHI in the successor block and populate it.
auto *Op = I0->getOperand(O);
assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
- auto *PN = PHINode::Create(Op->getType(), Insts.size(),
- Op->getName() + ".sink", &BBEnd->front());
+ auto *PN =
+ PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
+ PN->insertBefore(BBEnd->begin());
for (auto *I : Insts)
PN->addIncoming(I->getOperand(O), I->getParent());
NewOperands.push_back(PN);
@@ -1963,7 +2055,8 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
// and move it to the start of the successor block.
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
I0->getOperandUse(O).set(NewOperands[O]);
- I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+ I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
// Update metadata and IR flags, and merge debug locations.
for (auto *I : Insts)
@@ -2765,8 +2858,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
Value *OrigV = PN.getIncomingValueForBlock(BB);
Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
- // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
- // Skip PHIs which are trivial.
+ // FIXME: Try to remove some of the duplication with
+ // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
if (ThenV == OrigV)
continue;
@@ -3009,7 +3102,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
// store %merge, %x.dest, !DIAssignID !2
// dbg.assign %merge, "x", ..., !2
for (auto *DAI : at::getAssignmentMarkers(SpeculatedStore)) {
- if (any_of(DAI->location_ops(), [&](Value *V) { return V == OrigV; }))
+ if (llvm::is_contained(DAI->location_ops(), OrigV))
DAI->replaceVariableLocationOp(OrigV, S);
}
}
@@ -3036,6 +3129,11 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
}
// Hoist the instructions.
+ // In "RemoveDIs" non-instr debug-info mode, drop DPValues attached to these
+ // instructions, in the same way that dbg.value intrinsics are dropped at the
+ // end of this block.
+ for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
+ It.dropDbgValues();
BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
std::prev(ThenBB->end()));
@@ -3207,6 +3305,10 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
DenseMap<Value *, Value *> TranslateMap; // Track translated values.
TranslateMap[Cond] = CB;
+
+ // RemoveDIs: track instructions that we optimise away while folding, so
+ // that we can copy DPValues from them later.
+ BasicBlock::iterator SrcDbgCursor = BB->begin();
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
@@ -3241,6 +3343,15 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
TranslateMap[&*BBI] = N;
}
if (N) {
+ // Copy all debug-info attached to instructions from the last we
+ // successfully clone, up to this instruction (they might have been
+ // folded away).
+ for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
+ N->cloneDebugInfoFrom(&*SrcDbgCursor);
+ SrcDbgCursor = std::next(BBI);
+ // Clone debug-info on this instruction too.
+ N->cloneDebugInfoFrom(&*BBI);
+
// Register the new instruction with the assumption cache if necessary.
if (auto *Assume = dyn_cast<AssumeInst>(N))
if (AC)
@@ -3248,6 +3359,10 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
}
}
+ for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
+ InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
+ InsertPt->cloneDebugInfoFrom(BI);
+
BB->removePredecessor(EdgeBB);
BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
EdgeBI->setSuccessor(0, RealDest);
@@ -3652,22 +3767,22 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
ValueToValueMapTy VMap; // maps original values to cloned values
CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
+ Module *M = BB->getModule();
+
+ if (PredBlock->IsNewDbgInfoFormat) {
+ PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
+ for (DPValue &DPV : PredBlock->getTerminator()->getDbgValueRange()) {
+ RemapDPValue(M, &DPV, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ }
+ }
+
// Now that the Cond was cloned into the predecessor basic block,
// or/and the two conditions together.
Value *BICond = VMap[BI->getCondition()];
PBI->setCondition(
createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
- // Copy any debug value intrinsics into the end of PredBlock.
- for (Instruction &I : *BB) {
- if (isa<DbgInfoIntrinsic>(I)) {
- Instruction *NewI = I.clone();
- RemapInstruction(NewI, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- NewI->insertBefore(PBI);
- }
- }
-
++NumFoldBranchToCommonDest;
return true;
}
@@ -3867,7 +3982,8 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
(!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
return V;
- PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
+ PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
+ PHI->insertBefore(Succ->begin());
PHI->addIncoming(V, BB);
for (BasicBlock *PredBB : predecessors(Succ))
if (PredBB != BB)
@@ -3991,7 +4107,9 @@ static bool mergeConditionalStoreToAddress(
Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
QStore->getParent(), PPHI);
- IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
+ BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
+ IRBuilder<> QB(PostBB, PostBBFirst);
+ QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
@@ -4002,9 +4120,11 @@ static bool mergeConditionalStoreToAddress(
QPred = QB.CreateNot(QPred);
Value *CombinedPred = QB.CreateOr(PPred, QPred);
- auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(),
+ BasicBlock::iterator InsertPt = QB.GetInsertPoint();
+ auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
/*Unreachable=*/false,
/*BranchWeights=*/nullptr, DTU);
+
QB.SetInsertPoint(T);
StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
@@ -4140,10 +4260,10 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// 2) We can sink side effecting instructions into BI's fallthrough
// successor provided they doesn't contribute to computation of
// BI's condition.
- Value *CondWB, *WC;
- BasicBlock *IfTrueBB, *IfFalseBB;
- if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
- IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
+ BasicBlock *IfTrueBB = PBI->getSuccessor(0);
+ BasicBlock *IfFalseBB = PBI->getSuccessor(1);
+ if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
+ !BI->getParent()->getSinglePredecessor())
return false;
if (!IfFalseBB->phis().empty())
return false; // TODO
@@ -4256,6 +4376,21 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (PBI->getSuccessor(PBIOp) == BB)
return false;
+ // If predecessor's branch probability to BB is too low don't merge branches.
+ SmallVector<uint32_t, 2> PredWeights;
+ if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
+ extractBranchWeights(*PBI, PredWeights) &&
+ (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
+
+ BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
+ PredWeights[PBIOp],
+ static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
+
+ BranchProbability Likely = TTI.getPredictableBranchThreshold();
+ if (CommonDestProb >= Likely)
+ return false;
+ }
+
// Do not perform this transformation if it would require
// insertion of a large number of select instructions. For targets
// without predication/cmovs, this is a big pessimization.
@@ -5088,6 +5223,15 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
bool Changed = false;
+ // Ensure that any debug-info records that used to occur after the Unreachable
+ // are moved to in front of it -- otherwise they'll "dangle" at the end of
+ // the block.
+ BB->flushTerminatorDbgValues();
+
+ // Debug-info records on the unreachable inst itself should be deleted, as
+ // below we delete everything past the final executable instruction.
+ UI->dropDbgValues();
+
// If there are any instructions immediately before the unreachable that can
// be removed, do so.
while (UI->getIterator() != BB->begin()) {
@@ -5104,6 +5248,10 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
// block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
// and we can therefore guarantee this block will be erased.
+ // If we're deleting this, we're deleting any subsequent dbg.values, so
+ // delete DPValue records of variable information.
+ BBI->dropDbgValues();
+
// Delete this instruction (any uses are guaranteed to be dead)
BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
BBI->eraseFromParent();
@@ -5667,7 +5815,7 @@ getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
if (I.isTerminator()) {
// If the terminator is a simple branch, continue to the next block.
- if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
+ if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
return false;
Pred = CaseDest;
CaseDest = I.getSuccessor(0);
@@ -5890,8 +6038,8 @@ static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
// Remove the switch.
- while (PHI->getBasicBlockIndex(SelectBB) >= 0)
- PHI->removeIncomingValue(SelectBB);
+ PHI->removeIncomingValueIf(
+ [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
PHI->addIncoming(SelectValue, SelectBB);
SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
@@ -6507,9 +6655,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If the default destination is unreachable, or if the lookup table covers
// all values of the conditional variable, branch directly to the lookup table
// BB. Otherwise, check that the condition is within the case range.
- const bool DefaultIsReachable =
+ bool DefaultIsReachable =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
- const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
// Create the BB that does the lookups.
Module &Mod = *CommonDest->getParent()->getParent();
@@ -6540,6 +6687,28 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
BranchInst *RangeCheckBranch = nullptr;
+ // Grow the table to cover all possible index values to avoid the range check.
+ // It will use the default result to fill in the table hole later, so make
+ // sure it exist.
+ if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
+ ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
+ // Grow the table shouldn't have any size impact by checking
+ // WouldFitInRegister.
+ // TODO: Consider growing the table also when it doesn't fit in a register
+ // if no optsize is specified.
+ const uint64_t UpperBound = CR.getUpper().getLimitedValue();
+ if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
+ return SwitchLookupTable::WouldFitInRegister(
+ DL, UpperBound, KV.second /* ResultType */);
+ })) {
+ // The default branch is unreachable after we enlarge the lookup table.
+ // Adjust DefaultIsReachable to reuse code path.
+ TableSize = UpperBound;
+ DefaultIsReachable = false;
+ }
+ }
+
+ const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
if (DTU)
@@ -6701,9 +6870,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
// This transform can be done speculatively because it is so cheap - it
// results in a single rotate operation being inserted.
- // FIXME: It's possible that optimizing a switch on powers of two might also
- // be beneficial - flag values are often powers of two and we could use a CLZ
- // as the key function.
// countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
// one element and LLVM disallows duplicate cases, Shift is guaranteed to be
@@ -6748,6 +6914,80 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
return true;
}
+/// Tries to transform switch of powers of two to reduce switch range.
+/// For example, switch like:
+/// switch (C) { case 1: case 2: case 64: case 128: }
+/// will be transformed to:
+/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
+///
+/// This transformation allows better lowering and could allow transforming into
+/// a lookup table.
+static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ Value *Condition = SI->getCondition();
+ LLVMContext &Context = SI->getContext();
+ auto *CondTy = cast<IntegerType>(Condition->getType());
+
+ if (CondTy->getIntegerBitWidth() > 64 ||
+ !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
+ return false;
+
+ const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
+ IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
+ {Condition, ConstantInt::getTrue(Context)}),
+ TTI::TCK_SizeAndLatency);
+
+ if (CttzIntrinsicCost > TTI::TCC_Basic)
+ // Inserting intrinsic is too expensive.
+ return false;
+
+ // Only bother with this optimization if there are more than 3 switch cases.
+ // SDAG will only bother creating jump tables for 4 or more cases.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // We perform this optimization only for switches with
+ // unreachable default case.
+ // This assumtion will save us from checking if `Condition` is a power of two.
+ if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
+ return false;
+
+ // Check that switch cases are powers of two.
+ SmallVector<uint64_t, 4> Values;
+ for (const auto &Case : SI->cases()) {
+ uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
+ if (llvm::has_single_bit(CaseValue))
+ Values.push_back(CaseValue);
+ else
+ return false;
+ }
+
+ // isSwichDense requires case values to be sorted.
+ llvm::sort(Values);
+ if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
+ llvm::countr_zero(Values.front()) + 1))
+ // Transform is unable to generate dense switch.
+ return false;
+
+ Builder.SetInsertPoint(SI);
+
+ // Replace each case with its trailing zeros number.
+ for (auto &Case : SI->cases()) {
+ auto *OrigValue = Case.getCaseValue();
+ Case.setValue(ConstantInt::get(OrigValue->getType(),
+ OrigValue->getValue().countr_zero()));
+ }
+
+ // Replace condition with its trailing zeros number.
+ auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
+ Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
+
+ SI->setCondition(ConditionTrailingZeros);
+
+ return true;
+}
+
bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
BasicBlock *BB = SI->getParent();
@@ -6795,9 +7035,16 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
return requestResimplify();
+ if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
+ return requestResimplify();
+
if (ReduceSwitchRange(SI, Builder, DL, TTI))
return requestResimplify();
+ if (HoistCommon &&
+ hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
+ return requestResimplify();
+
return false;
}
@@ -6982,7 +7229,8 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ if (Options.SpeculateBlocks &&
+ FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
Options.BonusInstThreshold))
return requestResimplify();
return false;
@@ -7052,7 +7300,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ if (Options.SpeculateBlocks &&
+ FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
Options.BonusInstThreshold))
return requestResimplify();
@@ -7062,7 +7311,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistCommon && HoistThenElseCodeToIf(BI, !Options.HoistCommonInsts))
+ if (HoistCommon && hoistCommonCodeFromSuccessors(
+ BI->getParent(), !Options.HoistCommonInsts))
return requestResimplify();
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index a28916bc9baf..722ed03db3de 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -539,7 +539,8 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
for (auto *ICI : ICmpUsers) {
bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
- Instruction *Ext = nullptr;
+ IRBuilder<> Builder(ICI);
+ Value *Ext = nullptr;
// For signed/unsigned predicate, replace the old comparison with comparison
// of immediate IV against sext/zext of the invariant argument. If we can
// use either sext or zext (i.e. we are dealing with equality predicate),
@@ -550,18 +551,18 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
if (CanUseZExt(ICI)) {
assert(DoesZExtCollapse && "Unprofitable zext?");
- Ext = new ZExtInst(Op1, IVTy, "zext", ICI);
+ Ext = Builder.CreateZExt(Op1, IVTy, "zext");
Pred = ICmpInst::getUnsignedPredicate(Pred);
} else {
assert(DoesSExtCollapse && "Unprofitable sext?");
- Ext = new SExtInst(Op1, IVTy, "sext", ICI);
+ Ext = Builder.CreateSExt(Op1, IVTy, "sext");
assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!");
}
bool Changed;
L->makeLoopInvariant(Ext, Changed);
(void)Changed;
- ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext);
- ICI->replaceAllUsesWith(NewICI);
+ auto *NewCmp = Builder.CreateICmp(Pred, IV, Ext);
+ ICI->replaceAllUsesWith(NewCmp);
DeadInsts.emplace_back(ICI);
}
@@ -659,12 +660,12 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0));
// Get the symbolic expression for this instruction.
const SCEV *IV = SE->getSCEV(IVOperand);
- unsigned MaskBits;
+ int MaskBits;
if (UseInst->getOpcode() == CastInst::SIToFP)
- MaskBits = SE->getSignedRange(IV).getMinSignedBits();
+ MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits();
else
- MaskBits = SE->getUnsignedRange(IV).getActiveBits();
- unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
+ MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits();
+ int DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
if (MaskBits <= DestNumSigBits) {
for (User *U : UseInst->users()) {
// Match for fptosi/fptoui of sitofp and with same type.
@@ -908,8 +909,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
if (replaceIVUserWithLoopInvariant(UseInst))
continue;
- // Go further for the bitcast ''prtoint ptr to i64'
- if (isa<PtrToIntInst>(UseInst))
+ // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done
+ // by truncation
+ if ((isa<PtrToIntInst>(UseInst)) || (isa<TruncInst>(UseInst)))
for (Use &U : UseInst->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (replaceIVUserWithLoopInvariant(User))
@@ -1373,16 +1375,32 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
- const SCEV *ExtendOperExpr = nullptr;
const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(DU.NarrowUse);
ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
- if (ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap())
- ExtendOperExpr = SE->getSignExtendExpr(
- SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
- else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())
- ExtendOperExpr = SE->getZeroExtendExpr(
- SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ if (!(ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) &&
+ !(ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())) {
+ ExtKind = ExtendKind::Unknown;
+
+ // For a non-negative NarrowDef, we can choose either type of
+ // extension. We want to use the current extend kind if legal
+ // (see above), and we only hit this code if we need to check
+ // the opposite case.
+ if (DU.NeverNegative) {
+ if (OBO->hasNoSignedWrap()) {
+ ExtKind = ExtendKind::Sign;
+ } else if (OBO->hasNoUnsignedWrap()) {
+ ExtKind = ExtendKind::Zero;
+ }
+ }
+ }
+
+ const SCEV *ExtendOperExpr =
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx));
+ if (ExtKind == ExtendKind::Sign)
+ ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType);
+ else if (ExtKind == ExtendKind::Zero)
+ ExtendOperExpr = SE->getZeroExtendExpr(ExtendOperExpr, WideType);
else
return {nullptr, ExtendKind::Unknown};
@@ -1493,10 +1511,6 @@ bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
// Widen the compare instruction.
- auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
- if (!InsertPt)
- return false;
- IRBuilder<> Builder(InsertPt);
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
// Widen the other operand of the compare, if necessary.
@@ -1673,7 +1687,8 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
"Not a LCSSA Phi?");
WidePN->addIncoming(WideBO, LoopExitingBlock);
- Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt());
+ Builder.SetInsertPoint(User->getParent(),
+ User->getParent()->getFirstInsertionPt());
auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
User->replaceAllUsesWith(TruncPN);
DeadInsts.emplace_back(User);
@@ -1726,7 +1741,8 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
UsePhi);
WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
- IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
+ BasicBlock *WidePhiBB = WidePhi->getParent();
+ IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt());
Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
UsePhi->replaceAllUsesWith(Trunc);
DeadInsts.emplace_back(UsePhi);
@@ -1786,65 +1802,70 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
return nullptr;
}
- // Does this user itself evaluate to a recurrence after widening?
- WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
- if (!WideAddRec.first)
- WideAddRec = getWideRecurrence(DU);
-
- assert((WideAddRec.first == nullptr) ==
- (WideAddRec.second == ExtendKind::Unknown));
- if (!WideAddRec.first) {
- // If use is a loop condition, try to promote the condition instead of
- // truncating the IV first.
- if (widenLoopCompare(DU))
+ auto tryAddRecExpansion = [&]() -> Instruction* {
+ // Does this user itself evaluate to a recurrence after widening?
+ WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
+ if (!WideAddRec.first)
+ WideAddRec = getWideRecurrence(DU);
+ assert((WideAddRec.first == nullptr) ==
+ (WideAddRec.second == ExtendKind::Unknown));
+ if (!WideAddRec.first)
return nullptr;
- // We are here about to generate a truncate instruction that may hurt
- // performance because the scalar evolution expression computed earlier
- // in WideAddRec.first does not indicate a polynomial induction expression.
- // In that case, look at the operands of the use instruction to determine
- // if we can still widen the use instead of truncating its operand.
- if (widenWithVariantUse(DU))
+ // Reuse the IV increment that SCEVExpander created as long as it dominates
+ // NarrowUse.
+ Instruction *WideUse = nullptr;
+ if (WideAddRec.first == WideIncExpr &&
+ Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
+ WideUse = WideInc;
+ else {
+ WideUse = cloneIVUser(DU, WideAddRec.first);
+ if (!WideUse)
+ return nullptr;
+ }
+ // Evaluation of WideAddRec ensured that the narrow expression could be
+ // extended outside the loop without overflow. This suggests that the wide use
+ // evaluates to the same expression as the extended narrow use, but doesn't
+ // absolutely guarantee it. Hence the following failsafe check. In rare cases
+ // where it fails, we simply throw away the newly created wide use.
+ if (WideAddRec.first != SE->getSCEV(WideUse)) {
+ LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
+ << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
+ << "\n");
+ DeadInsts.emplace_back(WideUse);
return nullptr;
+ };
- // This user does not evaluate to a recurrence after widening, so don't
- // follow it. Instead insert a Trunc to kill off the original use,
- // eventually isolating the original narrow IV so it can be removed.
- truncateIVUse(DU, DT, LI);
- return nullptr;
- }
+ // if we reached this point then we are going to replace
+ // DU.NarrowUse with WideUse. Reattach DbgValue then.
+ replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
- // Reuse the IV increment that SCEVExpander created as long as it dominates
- // NarrowUse.
- Instruction *WideUse = nullptr;
- if (WideAddRec.first == WideIncExpr &&
- Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
- WideUse = WideInc;
- else {
- WideUse = cloneIVUser(DU, WideAddRec.first);
- if (!WideUse)
- return nullptr;
- }
- // Evaluation of WideAddRec ensured that the narrow expression could be
- // extended outside the loop without overflow. This suggests that the wide use
- // evaluates to the same expression as the extended narrow use, but doesn't
- // absolutely guarantee it. Hence the following failsafe check. In rare cases
- // where it fails, we simply throw away the newly created wide use.
- if (WideAddRec.first != SE->getSCEV(WideUse)) {
- LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
- << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
- << "\n");
- DeadInsts.emplace_back(WideUse);
+ ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
+ // Returning WideUse pushes it on the worklist.
+ return WideUse;
+ };
+
+ if (auto *I = tryAddRecExpansion())
+ return I;
+
+ // If use is a loop condition, try to promote the condition instead of
+ // truncating the IV first.
+ if (widenLoopCompare(DU))
return nullptr;
- }
- // if we reached this point then we are going to replace
- // DU.NarrowUse with WideUse. Reattach DbgValue then.
- replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
+ // We are here about to generate a truncate instruction that may hurt
+ // performance because the scalar evolution expression computed earlier
+ // in WideAddRec.first does not indicate a polynomial induction expression.
+ // In that case, look at the operands of the use instruction to determine
+ // if we can still widen the use instead of truncating its operand.
+ if (widenWithVariantUse(DU))
+ return nullptr;
- ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
- // Returning WideUse pushes it on the worklist.
- return WideUse;
+ // This user does not evaluate to a recurrence after widening, so don't
+ // follow it. Instead insert a Trunc to kill off the original use,
+ // eventually isolating the original narrow IV so it can be removed.
+ truncateIVUse(DU, DT, LI);
+ return nullptr;
}
/// Add eligible users of NarrowDef to NarrowIVUsers.
@@ -1944,13 +1965,15 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
// SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
WideInc =
- cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
- WideIncExpr = SE->getSCEV(WideInc);
- // Propagate the debug location associated with the original loop increment
- // to the new (widened) increment.
- auto *OrigInc =
- cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
- WideInc->setDebugLoc(OrigInc->getDebugLoc());
+ dyn_cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
+ if (WideInc) {
+ WideIncExpr = SE->getSCEV(WideInc);
+ // Propagate the debug location associated with the original loop
+ // increment to the new (widened) increment.
+ auto *OrigInc =
+ cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+ WideInc->setDebugLoc(OrigInc->getDebugLoc());
+ }
}
LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 3ad97613fe7a..760a626c8b6f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1148,7 +1148,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) {
Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
// fold strstr(x, x) -> x.
if (CI->getArgOperand(0) == CI->getArgOperand(1))
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+ return CI->getArgOperand(0);
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
@@ -1176,7 +1176,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
// fold strstr(x, "") -> x.
if (HasStr2 && ToFindStr.empty())
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+ return CI->getArgOperand(0);
// If both strings are known, constant fold it.
if (HasStr1 && HasStr2) {
@@ -1186,16 +1186,13 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
return Constant::getNullValue(CI->getType());
// strstr("abcd", "bc") -> gep((char*)"abcd", 1)
- Value *Result = castToCStr(CI->getArgOperand(0), B);
- Result =
- B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr");
- return B.CreateBitCast(Result, CI->getType());
+ return B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), CI->getArgOperand(0),
+ Offset, "strstr");
}
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
- return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
+ return emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
}
annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
@@ -1392,7 +1389,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
if (isOnlyUsedInEqualityComparison(CI, SrcStr))
// S is dereferenceable so it's safe to load from it and fold
// memchr(S, C, N) == S to N && *S == C for any C and N.
- // TODO: This is safe even even for nonconstant S.
+ // TODO: This is safe even for nonconstant S.
return memChrToCharCompare(CI, Size, B, DL);
// From now on we need a constant length and constant array.
@@ -1534,12 +1531,10 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
// memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
if (Len == 1) {
- Value *LHSV =
- B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"),
- CI->getType(), "lhsv");
- Value *RHSV =
- B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"),
- CI->getType(), "rhsv");
+ Value *LHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), LHS, "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), RHS, "rhsc"),
+ CI->getType(), "rhsv");
return B.CreateSub(LHSV, RHSV, "chardiff");
}
@@ -1845,7 +1840,7 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
StringRef CallerName = CI->getFunction()->getName();
if (!CallerName.empty() && CallerName.back() == 'f' &&
CallerName.size() == (CalleeName.size() + 1) &&
- CallerName.startswith(CalleeName))
+ CallerName.starts_with(CalleeName))
return nullptr;
}
@@ -2380,8 +2375,8 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
FMF.setNoSignedZeros();
B.setFastMathFlags(FMF);
- Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
- : Intrinsic::maxnum;
+ Intrinsic::ID IID = Callee->getName().starts_with("fmin") ? Intrinsic::minnum
+ : Intrinsic::maxnum;
Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
return copyFlags(
*CI, B.CreateCall(F, {CI->getArgOperand(0), CI->getArgOperand(1)}));
@@ -3078,7 +3073,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
if (!CI->getArgOperand(2)->getType()->isIntegerTy())
return nullptr;
Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
- Value *Ptr = castToCStr(Dest, B);
+ Value *Ptr = Dest;
B.CreateStore(V, Ptr);
Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
B.CreateStore(B.getInt8(0), Ptr);
@@ -3105,9 +3100,6 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
return ConstantInt::get(CI->getType(), SrcLen - 1);
} else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
// sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
- // Handle mismatched pointer types (goes away with typeless pointers?).
- V = B.CreatePointerCast(V, B.getInt8PtrTy());
- Dest = B.CreatePointerCast(Dest, B.getInt8PtrTy());
Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest);
return B.CreateIntCast(PtrDiff, CI->getType(), false);
}
@@ -3273,7 +3265,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
if (!CI->getArgOperand(3)->getType()->isIntegerTy())
return nullptr;
Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
- Value *Ptr = castToCStr(DstArg, B);
+ Value *Ptr = DstArg;
B.CreateStore(V, Ptr);
Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
B.CreateStore(B.getInt8(0), Ptr);
@@ -3409,8 +3401,7 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) {
// If this is writing one byte, turn it into fputc.
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(B.getInt8Ty(),
- castToCStr(CI->getArgOperand(0), B), "char");
+ Value *Char = B.CreateLoad(B.getInt8Ty(), CI->getArgOperand(0), "char");
Type *IntTy = B.getIntNTy(TLI->getIntSize());
Value *Cast = B.CreateIntCast(Char, IntTy, /*isSigned*/ true, "chari");
Value *NewCI = emitFPutC(Cast, CI->getArgOperand(3), B, TLI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
index 0ff88e8b4612..6094f36a77f4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -18,8 +18,6 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Statepoint.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
using namespace llvm;
@@ -66,21 +64,3 @@ PreservedAnalyses StripGCRelocates::run(Function &F,
PA.preserveSet<CFGAnalyses>();
return PA;
}
-
-namespace {
-struct StripGCRelocatesLegacy : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- StripGCRelocatesLegacy() : FunctionPass(ID) {
- initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &Info) const override {}
-
- bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); }
-};
-char StripGCRelocatesLegacy::ID = 0;
-} // namespace
-
-INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates",
- "Strip gc.relocates inserted through RewriteStatepointsForGC",
- true, false)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index c3ae43e567b0..8b4f34209e85 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -68,8 +68,6 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 2b706858cbed..d5468909dd4e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -16,33 +16,9 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
-char UnifyFunctionExitNodesLegacyPass::ID = 0;
-
-UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass()
- : FunctionPass(ID) {
- initializeUnifyFunctionExitNodesLegacyPassPass(
- *PassRegistry::getPassRegistry());
-}
-
-INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn",
- "Unify function exit nodes", false, false)
-
-Pass *llvm::createUnifyFunctionExitNodesPass() {
- return new UnifyFunctionExitNodesLegacyPass();
-}
-
-void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage(
- AnalysisUsage &AU) const {
- // We preserve the non-critical-edgeness property
- AU.addPreservedID(BreakCriticalEdgesID);
- // This is a cluster of orthogonal Transforms
- AU.addPreservedID(LowerSwitchID);
-}
-
namespace {
bool unifyUnreachableBlocks(Function &F) {
@@ -110,16 +86,6 @@ bool unifyReturnBlocks(Function &F) {
}
} // namespace
-// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting
-// all returns to unconditional branches to this new basic block. Also, unify
-// all unreachable blocks.
-bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) {
- bool Changed = false;
- Changed |= unifyUnreachableBlocks(F);
- Changed |= unifyReturnBlocks(F);
- return Changed;
-}
-
PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F,
FunctionAnalysisManager &AM) {
bool Changed = false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 8c781f59ff5a..2f37f7f972cb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -44,10 +44,8 @@ struct UnifyLoopExitsLegacyPass : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreservedID(LowerSwitchID);
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
@@ -65,7 +63,6 @@ FunctionPass *llvm::createUnifyLoopExitsPass() {
INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
"Fixup each natural loop to have a single exit block",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
@@ -234,6 +231,8 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
+
return runImpl(LI, DT);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
index 91c743f17764..51e1e824dd26 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
@@ -21,7 +21,6 @@ using namespace llvm;
/// initializeTransformUtils - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeTransformUtils(PassRegistry &Registry) {
- initializeAssumeBuilderPassLegacyPassPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
initializeCanonicalizeFreezeInLoopsPass(Registry);
initializeLCSSAWrapperPassPass(Registry);
@@ -30,9 +29,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeLowerInvokeLegacyPassPass(Registry);
initializeLowerSwitchLegacyPassPass(Registry);
initializePromoteLegacyPassPass(Registry);
- initializeUnifyFunctionExitNodesLegacyPassPass(Registry);
- initializeStripGCRelocatesLegacyPass(Registry);
- initializePredicateInfoPrinterLegacyPassPass(Registry);
initializeFixIrreduciblePass(Registry);
initializeUnifyLoopExitsLegacyPassPass(Registry);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 3446e31cc2ef..71d0f09e4771 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
@@ -145,6 +146,7 @@ public:
Value *mapValue(const Value *V);
void remapInstruction(Instruction *I);
void remapFunction(Function &F);
+ void remapDPValue(DPValue &DPV);
Constant *mapConstant(const Constant *C) {
return cast_or_null<Constant>(mapValue(C));
@@ -535,6 +537,39 @@ Value *Mapper::mapValue(const Value *V) {
return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
}
+void Mapper::remapDPValue(DPValue &V) {
+ // Remap variables and DILocations.
+ auto *MappedVar = mapMetadata(V.getVariable());
+ auto *MappedDILoc = mapMetadata(V.getDebugLoc());
+ V.setVariable(cast<DILocalVariable>(MappedVar));
+ V.setDebugLoc(DebugLoc(cast<DILocation>(MappedDILoc)));
+
+ // Find Value operands and remap those.
+ SmallVector<Value *, 4> Vals, NewVals;
+ for (Value *Val : V.location_ops())
+ Vals.push_back(Val);
+ for (Value *Val : Vals)
+ NewVals.push_back(mapValue(Val));
+
+ // If there are no changes to the Value operands, finished.
+ if (Vals == NewVals)
+ return;
+
+ bool IgnoreMissingLocals = Flags & RF_IgnoreMissingLocals;
+
+ // Otherwise, do some replacement.
+ if (!IgnoreMissingLocals &&
+ llvm::any_of(NewVals, [&](Value *V) { return V == nullptr; })) {
+ V.setKillLocation();
+ } else {
+ // Either we have all non-empty NewVals, or we're permitted to ignore
+ // missing locals.
+ for (unsigned int I = 0; I < Vals.size(); ++I)
+ if (NewVals[I])
+ V.replaceVariableLocationOp(I, NewVals[I]);
+ }
+}
+
Value *Mapper::mapBlockAddress(const BlockAddress &BA) {
Function *F = cast<Function>(mapValue(BA.getFunction()));
@@ -1179,6 +1214,17 @@ void ValueMapper::remapInstruction(Instruction &I) {
FlushingMapper(pImpl)->remapInstruction(&I);
}
+void ValueMapper::remapDPValue(Module *M, DPValue &V) {
+ FlushingMapper(pImpl)->remapDPValue(V);
+}
+
+void ValueMapper::remapDPValueRange(
+ Module *M, iterator_range<DPValue::self_iterator> Range) {
+ for (DPValue &DPV : Range) {
+ remapDPValue(M, DPV);
+ }
+}
+
void ValueMapper::remapFunction(Function &F) {
FlushingMapper(pImpl)->remapFunction(F);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 260d7889906b..fa2459d1ca02 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -103,13 +103,11 @@
#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <iterator>
-#include <limits>
#include <numeric>
#include <optional>
#include <tuple>
@@ -900,9 +898,9 @@ bool Vectorizer::vectorizeChain(Chain &C) {
// Chain is in offset order, so C[0] is the instr with the lowest offset,
// i.e. the root of the vector.
- Value *Bitcast = Builder.CreateBitCast(
- getLoadStorePointerOperand(C[0].Inst), VecTy->getPointerTo(AS));
- VecInst = Builder.CreateAlignedLoad(VecTy, Bitcast, Alignment);
+ VecInst = Builder.CreateAlignedLoad(VecTy,
+ getLoadStorePointerOperand(C[0].Inst),
+ Alignment);
unsigned VecIdx = 0;
for (const ChainElem &E : C) {
@@ -976,8 +974,7 @@ bool Vectorizer::vectorizeChain(Chain &C) {
// i.e. the root of the vector.
VecInst = Builder.CreateAlignedStore(
Vec,
- Builder.CreateBitCast(getLoadStorePointerOperand(C[0].Inst),
- VecTy->getPointerTo(AS)),
+ getLoadStorePointerOperand(C[0].Inst),
Alignment);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index f923f0be6621..37a356c43e29 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -289,7 +289,7 @@ void LoopVectorizeHints::getHintsFromMetadata() {
}
void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
- if (!Name.startswith(Prefix()))
+ if (!Name.starts_with(Prefix()))
return;
Name = Name.substr(Prefix().size(), StringRef::npos);
@@ -943,6 +943,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
}
+ // If we found a vectorized variant of a function, note that so LV can
+ // make better decisions about maximum VF.
+ if (CI && !VFDatabase::getMappings(*CI).empty())
+ VecCallVariantsFound = true;
+
// Check that the instruction return type is vectorizable.
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(I.getType()) &&
@@ -1242,13 +1247,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
bool LoopVectorizationLegality::blockCanBePredicated(
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
- SmallPtrSetImpl<const Instruction *> &MaskedOp,
- SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
+ SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
for (Instruction &I : *BB) {
// We can predicate blocks with calls to assume, as long as we drop them in
// case we flatten the CFG via predication.
if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
- ConditionalAssumes.insert(&I);
+ MaskedOp.insert(&I);
continue;
}
@@ -1345,16 +1349,13 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
}
// We must be able to predicate all blocks that need to be predicated.
- if (blockNeedsPredication(BB)) {
- if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
- ConditionalAssumes)) {
- reportVectorizationFailure(
- "Control flow cannot be substituted for a select",
- "control flow cannot be substituted for a select",
- "NoCFGForSelect", ORE, TheLoop,
- BB->getTerminator());
- return false;
- }
+ if (blockNeedsPredication(BB) &&
+ !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
+ reportVectorizationFailure(
+ "Control flow cannot be substituted for a select",
+ "control flow cannot be substituted for a select", "NoCFGForSelect",
+ ORE, TheLoop, BB->getTerminator());
+ return false;
}
}
@@ -1554,14 +1555,14 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
// The list of pointers that we can safely read and write to remains empty.
SmallPtrSet<Value *, 8> SafePointers;
+ // Collect masked ops in temporary set first to avoid partially populating
+ // MaskedOp if a block cannot be predicated.
SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
- SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
// Check and mark all blocks for predication, including those that ordinarily
// do not need predication such as the header block.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
- TmpConditionalAssumes)) {
+ if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
return false;
}
@@ -1570,9 +1571,6 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
- ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
- TmpConditionalAssumes.end());
-
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 13357cb06c55..577ce8000de2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -31,6 +31,7 @@
namespace llvm {
class LoopInfo;
+class DominatorTree;
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class PredicatedScalarEvolution;
@@ -45,13 +46,17 @@ class VPBuilder {
VPBasicBlock *BB = nullptr;
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
+ /// Insert \p VPI in BB at InsertPt if BB is set.
+ VPInstruction *tryInsertInstruction(VPInstruction *VPI) {
+ if (BB)
+ BB->insert(VPI, InsertPt);
+ return VPI;
+ }
+
VPInstruction *createInstruction(unsigned Opcode,
ArrayRef<VPValue *> Operands, DebugLoc DL,
const Twine &Name = "") {
- VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL, Name);
- if (BB)
- BB->insert(Instr, InsertPt);
- return Instr;
+ return tryInsertInstruction(new VPInstruction(Opcode, Operands, DL, Name));
}
VPInstruction *createInstruction(unsigned Opcode,
@@ -62,6 +67,7 @@ class VPBuilder {
public:
VPBuilder() = default;
+ VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
/// Clear the insertion point: created instructions will not be inserted into
/// a block.
@@ -116,10 +122,11 @@ public:
InsertPt = IP;
}
- /// Insert and return the specified instruction.
- VPInstruction *insert(VPInstruction *I) const {
- BB->insert(I, InsertPt);
- return I;
+ /// This specifies that created instructions should be inserted at the
+ /// specified point.
+ void setInsertPoint(VPRecipeBase *IP) {
+ BB = IP->getParent();
+ InsertPt = IP->getIterator();
}
/// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
@@ -138,6 +145,13 @@ public:
return createInstruction(Opcode, Operands, DL, Name);
}
+ VPInstruction *createOverflowingOp(unsigned Opcode,
+ std::initializer_list<VPValue *> Operands,
+ VPRecipeWithIRFlags::WrapFlagsTy WrapFlags,
+ DebugLoc DL, const Twine &Name = "") {
+ return tryInsertInstruction(
+ new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
+ }
VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") {
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
}
@@ -158,6 +172,12 @@ public:
Name);
}
+ /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
+ /// and \p B.
+ /// TODO: add createFCmp when needed.
+ VPValue *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
+ DebugLoc DL = {}, const Twine &Name = "");
+
//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//
@@ -268,6 +288,9 @@ class LoopVectorizationPlanner {
/// Loop Info analysis.
LoopInfo *LI;
+ /// The dominator tree.
+ DominatorTree *DT;
+
/// Target Library Info.
const TargetLibraryInfo *TLI;
@@ -298,16 +321,14 @@ class LoopVectorizationPlanner {
VPBuilder Builder;
public:
- LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
- const TargetTransformInfo &TTI,
- LoopVectorizationLegality *Legal,
- LoopVectorizationCostModel &CM,
- InterleavedAccessInfo &IAI,
- PredicatedScalarEvolution &PSE,
- const LoopVectorizeHints &Hints,
- OptimizationRemarkEmitter *ORE)
- : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
- PSE(PSE), Hints(Hints), ORE(ORE) {}
+ LoopVectorizationPlanner(
+ Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal,
+ LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI,
+ PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,
+ OptimizationRemarkEmitter *ORE)
+ : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
+ IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
/// Plan how to best vectorize, return the best VF and its cost, or
/// std::nullopt if vectorization and interleaving should be avoided up front.
@@ -333,7 +354,7 @@ public:
executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
InnerLoopVectorizer &LB, DominatorTree *DT,
bool IsEpilogueVectorization,
- DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
+ const DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void printPlans(raw_ostream &O);
@@ -377,8 +398,7 @@ private:
/// returned VPlan is valid for. If no VPlan can be built for the input range,
/// set the largest included VF to the maximum VF for which no plan could be
/// built.
- std::optional<VPlanPtr> tryToBuildVPlanWithVPRecipes(
- VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions);
+ VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b603bbe55dc9..f82e161fb846 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -27,7 +27,7 @@
//
// There is a development effort going on to migrate loop vectorizer to the
// VPlan infrastructure and to introduce outer loop vectorization support (see
-// docs/Proposal/VectorizationPlan.rst and
+// docs/VectorizationPlan.rst and
// http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). For this
// purpose, we temporarily introduced the VPlan-native vectorization path: an
// alternative vectorization path that is natively implemented on top of the
@@ -57,6 +57,7 @@
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
#include "VPlan.h"
+#include "VPlanAnalysis.h"
#include "VPlanHCFGBuilder.h"
#include "VPlanTransforms.h"
#include "llvm/ADT/APInt.h"
@@ -111,10 +112,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -390,6 +393,21 @@ static cl::opt<cl::boolOrDefault> ForceSafeDivisor(
cl::desc(
"Override cost based safe divisor widening for div/rem instructions"));
+static cl::opt<bool> UseWiderVFIfCallVariantsPresent(
+ "vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true),
+ cl::Hidden,
+ cl::desc("Try wider VFs if they enable the use of vector variants"));
+
+// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
+// variables not overflowing do not hold. See `emitSCEVChecks`.
+static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127};
+// Likelyhood of bypassing the vectorized loop because pointers overlap. See
+// `emitMemRuntimeChecks`.
+static constexpr uint32_t MemCheckBypassWeights[] = {1, 127};
+// Likelyhood of bypassing the vectorized loop because there are zero trips left
+// after prolog. See `emitIterationCountCheck`.
+static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
+
/// A helper function that returns true if the given type is irregular. The
/// type is irregular if its allocated size doesn't equal the store size of an
/// element of the corresponding vector type.
@@ -408,13 +426,6 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
/// we always assume predicated blocks have a 50% chance of executing.
static unsigned getReciprocalPredBlockProb() { return 2; }
-/// A helper function that returns an integer or floating-point constant with
-/// value C.
-static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
- return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
- : ConstantFP::get(Ty, C);
-}
-
/// Returns "best known" trip count for the specified loop \p L as defined by
/// the following procedure:
/// 1) Returns exact trip count if it is known.
@@ -556,10 +567,6 @@ public:
const VPIteration &Instance,
VPTransformState &State);
- /// Construct the vector value of a scalarized value \p V one lane at a time.
- void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance,
- VPTransformState &State);
-
/// Try to vectorize interleaved access group \p Group with the base address
/// given in \p Addr, optionally masking the vector operations if \p
/// BlockInMask is non-null. Use \p State to translate given VPValues to IR
@@ -634,10 +641,6 @@ protected:
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
- /// Shrinks vector element sizes to the smallest bitwidth they can be legally
- /// represented as.
- void truncateToMinimalBitwidths(VPTransformState &State);
-
/// Returns (and creates if needed) the trip count of the widened loop.
Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
@@ -943,21 +946,21 @@ protected:
/// Look for a meaningful debug location on the instruction or it's
/// operands.
-static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
+static DebugLoc getDebugLocFromInstOrOperands(Instruction *I) {
if (!I)
- return I;
+ return DebugLoc();
DebugLoc Empty;
if (I->getDebugLoc() != Empty)
- return I;
+ return I->getDebugLoc();
for (Use &Op : I->operands()) {
if (Instruction *OpInst = dyn_cast<Instruction>(Op))
if (OpInst->getDebugLoc() != Empty)
- return OpInst;
+ return OpInst->getDebugLoc();
}
- return I;
+ return I->getDebugLoc();
}
/// Write a \p DebugMsg about vectorization to the debug output stream. If \p I
@@ -1021,14 +1024,6 @@ const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
return SE.getTripCountFromExitCount(BackedgeTakenCount, IdxTy, OrigLoop);
}
-static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
- ElementCount VF) {
- assert(FTy->isFloatingPointTy() && "Expected floating point type!");
- Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
- Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
- return B.CreateUIToFP(RuntimeVF, FTy);
-}
-
void reportVectorizationFailure(const StringRef DebugMsg,
const StringRef OREMsg, const StringRef ORETag,
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
@@ -1050,6 +1045,23 @@ void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag,
<< Msg);
}
+/// Report successful vectorization of the loop. In case an outer loop is
+/// vectorized, prepend "outer" to the vectorization remark.
+static void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop,
+ VectorizationFactor VF, unsigned IC) {
+ LLVM_DEBUG(debugVectorizationMessage(
+ "Vectorizing: ", TheLoop->isInnermost() ? "innermost loop" : "outer loop",
+ nullptr));
+ StringRef LoopType = TheLoop->isInnermost() ? "" : "outer ";
+ ORE->emit([&]() {
+ return OptimizationRemark(LV_NAME, "Vectorized", TheLoop->getStartLoc(),
+ TheLoop->getHeader())
+ << "vectorized " << LoopType << "loop (vectorization width: "
+ << ore::NV("VectorizationFactor", VF.Width)
+ << ", interleaved count: " << ore::NV("InterleaveCount", IC) << ")";
+ });
+}
+
} // end namespace llvm
#ifndef NDEBUG
@@ -1104,7 +1116,8 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
RecWithFlags->dropPoisonGeneratingFlags();
} else {
- Instruction *Instr = CurRec->getUnderlyingInstr();
+ Instruction *Instr = dyn_cast_or_null<Instruction>(
+ CurRec->getVPSingleValue()->getUnderlyingValue());
(void)Instr;
assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
"found instruction with poison generating flags not covered by "
@@ -1247,6 +1260,13 @@ public:
/// avoid redundant calculations.
void setCostBasedWideningDecision(ElementCount VF);
+ /// A call may be vectorized in different ways depending on whether we have
+ /// vectorized variants available and whether the target supports masking.
+ /// This function analyzes all calls in the function at the supplied VF,
+ /// makes a decision based on the costs of available options, and stores that
+ /// decision in a map for use in planning and plan execution.
+ void setVectorizedCallDecision(ElementCount VF);
+
/// A struct that represents some properties of the register usage
/// of a loop.
struct RegisterUsage {
@@ -1270,7 +1290,7 @@ public:
void collectElementTypesForWidening();
/// Split reductions into those that happen in the loop, and those that happen
- /// outside. In loop reductions are collected into InLoopReductionChains.
+ /// outside. In loop reductions are collected into InLoopReductions.
void collectInLoopReductions();
/// Returns true if we should use strict in-order reductions for the given
@@ -1358,7 +1378,9 @@ public:
CM_Widen_Reverse, // For consecutive accesses with stride -1.
CM_Interleave,
CM_GatherScatter,
- CM_Scalarize
+ CM_Scalarize,
+ CM_VectorCall,
+ CM_IntrinsicCall
};
/// Save vectorization decision \p W and \p Cost taken by the cost model for
@@ -1414,6 +1436,29 @@ public:
return WideningDecisions[InstOnVF].second;
}
+ struct CallWideningDecision {
+ InstWidening Kind;
+ Function *Variant;
+ Intrinsic::ID IID;
+ std::optional<unsigned> MaskPos;
+ InstructionCost Cost;
+ };
+
+ void setCallWideningDecision(CallInst *CI, ElementCount VF, InstWidening Kind,
+ Function *Variant, Intrinsic::ID IID,
+ std::optional<unsigned> MaskPos,
+ InstructionCost Cost) {
+ assert(!VF.isScalar() && "Expected vector VF");
+ CallWideningDecisions[std::make_pair(CI, VF)] = {Kind, Variant, IID,
+ MaskPos, Cost};
+ }
+
+ CallWideningDecision getCallWideningDecision(CallInst *CI,
+ ElementCount VF) const {
+ assert(!VF.isScalar() && "Expected vector VF");
+ return CallWideningDecisions.at(std::make_pair(CI, VF));
+ }
+
/// Return True if instruction \p I is an optimizable truncate whose operand
/// is an induction variable. Such a truncate will be removed by adding a new
/// induction variable with the destination type.
@@ -1447,11 +1492,15 @@ public:
/// Collect Uniform and Scalar values for the given \p VF.
/// The sets depend on CM decision for Load/Store instructions
/// that may be vectorized as interleave, gather-scatter or scalarized.
+ /// Also make a decision on what to do about call instructions in the loop
+ /// at that VF -- scalarize, call a known vector routine, or call a
+ /// vector intrinsic.
void collectUniformsAndScalars(ElementCount VF) {
// Do the analysis once.
if (VF.isScalar() || Uniforms.contains(VF))
return;
setCostBasedWideningDecision(VF);
+ setVectorizedCallDecision(VF);
collectLoopUniforms(VF);
collectLoopScalars(VF);
}
@@ -1606,20 +1655,9 @@ public:
return foldTailByMasking() || Legal->blockNeedsPredication(BB);
}
- /// A SmallMapVector to store the InLoop reduction op chains, mapping phi
- /// nodes to the chain of instructions representing the reductions. Uses a
- /// MapVector to ensure deterministic iteration order.
- using ReductionChainMap =
- SmallMapVector<PHINode *, SmallVector<Instruction *, 4>, 4>;
-
- /// Return the chain of instructions representing an inloop reduction.
- const ReductionChainMap &getInLoopReductionChains() const {
- return InLoopReductionChains;
- }
-
/// Returns true if the Phi is part of an inloop reduction.
bool isInLoopReduction(PHINode *Phi) const {
- return InLoopReductionChains.count(Phi);
+ return InLoopReductions.contains(Phi);
}
/// Estimate cost of an intrinsic call instruction CI if it were vectorized
@@ -1629,16 +1667,13 @@ public:
/// Estimate cost of a call instruction CI if it were vectorized with factor
/// VF. Return the cost of the instruction, including scalarization overhead
- /// if it's needed. The flag NeedToScalarize shows if the call needs to be
- /// scalarized -
- /// i.e. either vector version isn't available, or is too expensive.
- InstructionCost getVectorCallCost(CallInst *CI, ElementCount VF,
- Function **Variant,
- bool *NeedsMask = nullptr) const;
+ /// if it's needed.
+ InstructionCost getVectorCallCost(CallInst *CI, ElementCount VF) const;
/// Invalidates decisions already taken by the cost model.
void invalidateCostModelingDecisions() {
WideningDecisions.clear();
+ CallWideningDecisions.clear();
Uniforms.clear();
Scalars.clear();
}
@@ -1675,14 +1710,14 @@ private:
/// elements is a power-of-2 larger than zero. If scalable vectorization is
/// disabled or unsupported, then the scalable part will be equal to
/// ElementCount::getScalable(0).
- FixedScalableVFPair computeFeasibleMaxVF(unsigned ConstTripCount,
+ FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount,
ElementCount UserVF,
bool FoldTailByMasking);
/// \return the maximized element count based on the targets vector
/// registers and the loop trip-count, but limited to a maximum safe VF.
/// This is a helper function of computeFeasibleMaxVF.
- ElementCount getMaximizedVFForTarget(unsigned ConstTripCount,
+ ElementCount getMaximizedVFForTarget(unsigned MaxTripCount,
unsigned SmallestType,
unsigned WidestType,
ElementCount MaxSafeVF,
@@ -1705,7 +1740,7 @@ private:
/// part of that pattern.
std::optional<InstructionCost>
getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
- TTI::TargetCostKind CostKind);
+ TTI::TargetCostKind CostKind) const;
/// Calculate vectorization cost of memory instruction \p I.
InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF);
@@ -1783,15 +1818,12 @@ private:
/// scalarized.
DenseMap<ElementCount, SmallPtrSet<Instruction *, 4>> ForcedScalars;
- /// PHINodes of the reductions that should be expanded in-loop along with
- /// their associated chains of reduction operations, in program order from top
- /// (PHI) to bottom
- ReductionChainMap InLoopReductionChains;
+ /// PHINodes of the reductions that should be expanded in-loop.
+ SmallPtrSet<PHINode *, 4> InLoopReductions;
/// A Map of inloop reduction operations and their immediate chain operand.
/// FIXME: This can be removed once reductions can be costed correctly in
- /// vplan. This was added to allow quick lookup to the inloop operations,
- /// without having to loop through InLoopReductionChains.
+ /// VPlan. This was added to allow quick lookup of the inloop operations.
DenseMap<Instruction *, Instruction *> InLoopReductionImmediateChains;
/// Returns the expected difference in cost from scalarizing the expression
@@ -1830,6 +1862,11 @@ private:
DecisionList WideningDecisions;
+ using CallDecisionList =
+ DenseMap<std::pair<CallInst *, ElementCount>, CallWideningDecision>;
+
+ CallDecisionList CallWideningDecisions;
+
/// Returns true if \p V is expected to be vectorized and it needs to be
/// extracted.
bool needsExtract(Value *V, ElementCount VF) const {
@@ -1933,12 +1970,14 @@ class GeneratedRTChecks {
SCEVExpander MemCheckExp;
bool CostTooHigh = false;
+ const bool AddBranchWeights;
public:
GeneratedRTChecks(ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI,
- TargetTransformInfo *TTI, const DataLayout &DL)
+ TargetTransformInfo *TTI, const DataLayout &DL,
+ bool AddBranchWeights)
: DT(DT), LI(LI), TTI(TTI), SCEVExp(SE, DL, "scev.check"),
- MemCheckExp(SE, DL, "scev.check") {}
+ MemCheckExp(SE, DL, "scev.check"), AddBranchWeights(AddBranchWeights) {}
/// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can
/// accurately estimate the cost of the runtime checks. The blocks are
@@ -1990,9 +2029,9 @@ public:
},
IC);
} else {
- MemRuntimeCheckCond =
- addRuntimeChecks(MemCheckBlock->getTerminator(), L,
- RtPtrChecking.getChecks(), MemCheckExp);
+ MemRuntimeCheckCond = addRuntimeChecks(
+ MemCheckBlock->getTerminator(), L, RtPtrChecking.getChecks(),
+ MemCheckExp, VectorizerParams::HoistRuntimeChecks);
}
assert(MemRuntimeCheckCond &&
"no RT checks generated although RtPtrChecking "
@@ -2131,8 +2170,10 @@ public:
DT->addNewBlock(SCEVCheckBlock, Pred);
DT->changeImmediateDominator(LoopVectorPreHeader, SCEVCheckBlock);
- ReplaceInstWithInst(SCEVCheckBlock->getTerminator(),
- BranchInst::Create(Bypass, LoopVectorPreHeader, Cond));
+ BranchInst &BI = *BranchInst::Create(Bypass, LoopVectorPreHeader, Cond);
+ if (AddBranchWeights)
+ setBranchWeights(BI, SCEVCheckBypassWeights);
+ ReplaceInstWithInst(SCEVCheckBlock->getTerminator(), &BI);
return SCEVCheckBlock;
}
@@ -2156,9 +2197,12 @@ public:
if (auto *PL = LI->getLoopFor(LoopVectorPreHeader))
PL->addBasicBlockToLoop(MemCheckBlock, *LI);
- ReplaceInstWithInst(
- MemCheckBlock->getTerminator(),
- BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheckCond));
+ BranchInst &BI =
+ *BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheckCond);
+ if (AddBranchWeights) {
+ setBranchWeights(BI, MemCheckBypassWeights);
+ }
+ ReplaceInstWithInst(MemCheckBlock->getTerminator(), &BI);
MemCheckBlock->getTerminator()->setDebugLoc(
Pred->getTerminator()->getDebugLoc());
@@ -2252,157 +2296,17 @@ static void collectSupportedLoops(Loop &L, LoopInfo *LI,
// LoopVectorizationCostModel and LoopVectorizationPlanner.
//===----------------------------------------------------------------------===//
-/// This function adds
-/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
-/// to each vector element of Val. The sequence starts at StartIndex.
-/// \p Opcode is relevant for FP induction variable.
-static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
- Instruction::BinaryOps BinOp, ElementCount VF,
- IRBuilderBase &Builder) {
- assert(VF.isVector() && "only vector VFs are supported");
-
- // Create and check the types.
- auto *ValVTy = cast<VectorType>(Val->getType());
- ElementCount VLen = ValVTy->getElementCount();
-
- Type *STy = Val->getType()->getScalarType();
- assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
- "Induction Step must be an integer or FP");
- assert(Step->getType() == STy && "Step has wrong type");
-
- SmallVector<Constant *, 8> Indices;
-
- // Create a vector of consecutive numbers from zero to VF.
- VectorType *InitVecValVTy = ValVTy;
- if (STy->isFloatingPointTy()) {
- Type *InitVecValSTy =
- IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
- InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
- }
- Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
-
- // Splat the StartIdx
- Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
-
- if (STy->isIntegerTy()) {
- InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
- Step = Builder.CreateVectorSplat(VLen, Step);
- assert(Step->getType() == Val->getType() && "Invalid step vec");
- // FIXME: The newly created binary instructions should contain nsw/nuw
- // flags, which can be found from the original scalar operations.
- Step = Builder.CreateMul(InitVec, Step);
- return Builder.CreateAdd(Val, Step, "induction");
- }
-
- // Floating point induction.
- assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
- "Binary Opcode should be specified for FP induction");
- InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
- InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
-
- Step = Builder.CreateVectorSplat(VLen, Step);
- Value *MulOp = Builder.CreateFMul(InitVec, Step);
- return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
-}
-
-/// Compute scalar induction steps. \p ScalarIV is the scalar induction
-/// variable on which to base the steps, \p Step is the size of the step.
-static void buildScalarSteps(Value *ScalarIV, Value *Step,
- const InductionDescriptor &ID, VPValue *Def,
- VPTransformState &State) {
- IRBuilderBase &Builder = State.Builder;
-
- // Ensure step has the same type as that of scalar IV.
- Type *ScalarIVTy = ScalarIV->getType()->getScalarType();
- if (ScalarIVTy != Step->getType()) {
- // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
- // avoid separate truncate here.
- assert(Step->getType()->isIntegerTy() &&
- "Truncation requires an integer step");
- Step = State.Builder.CreateTrunc(Step, ScalarIVTy);
- }
-
- // We build scalar steps for both integer and floating-point induction
- // variables. Here, we determine the kind of arithmetic we will perform.
- Instruction::BinaryOps AddOp;
- Instruction::BinaryOps MulOp;
- if (ScalarIVTy->isIntegerTy()) {
- AddOp = Instruction::Add;
- MulOp = Instruction::Mul;
- } else {
- AddOp = ID.getInductionOpcode();
- MulOp = Instruction::FMul;
- }
-
- // Determine the number of scalars we need to generate for each unroll
- // iteration.
- bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def);
- // Compute the scalar steps and save the results in State.
- Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(),
- ScalarIVTy->getScalarSizeInBits());
- Type *VecIVTy = nullptr;
- Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
- if (!FirstLaneOnly && State.VF.isScalable()) {
- VecIVTy = VectorType::get(ScalarIVTy, State.VF);
- UnitStepVec =
- Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
- SplatStep = Builder.CreateVectorSplat(State.VF, Step);
- SplatIV = Builder.CreateVectorSplat(State.VF, ScalarIV);
- }
-
- unsigned StartPart = 0;
- unsigned EndPart = State.UF;
- unsigned StartLane = 0;
- unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
- if (State.Instance) {
- StartPart = State.Instance->Part;
- EndPart = StartPart + 1;
- StartLane = State.Instance->Lane.getKnownLane();
- EndLane = StartLane + 1;
- }
- for (unsigned Part = StartPart; Part < EndPart; ++Part) {
- Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
-
- if (!FirstLaneOnly && State.VF.isScalable()) {
- auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
- auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
- if (ScalarIVTy->isFloatingPointTy())
- InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
- auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
- auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
- State.set(Def, Add, Part);
- // It's useful to record the lane values too for the known minimum number
- // of elements so we do those below. This improves the code quality when
- // trying to extract the first element, for example.
- }
-
- if (ScalarIVTy->isFloatingPointTy())
- StartIdx0 = Builder.CreateSIToFP(StartIdx0, ScalarIVTy);
-
- for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
- Value *StartIdx = Builder.CreateBinOp(
- AddOp, StartIdx0, getSignedIntOrFpConstant(ScalarIVTy, Lane));
- // The step returned by `createStepForVF` is a runtime-evaluated value
- // when VF is scalable. Otherwise, it should be folded into a Constant.
- assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
- "Expected StartIdx to be folded to a constant when VF is not "
- "scalable");
- auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
- auto *Add = Builder.CreateBinOp(AddOp, ScalarIV, Mul);
- State.set(Def, Add, VPIteration(Part, Lane));
- }
- }
-}
-
/// Compute the transformed value of Index at offset StartValue using step
/// StepValue.
/// For integer induction, returns StartValue + Index * StepValue.
/// For pointer induction, returns StartValue[Index * StepValue].
/// FIXME: The newly created binary instructions should contain nsw/nuw
/// flags, which can be found from the original scalar operations.
-static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
- Value *StartValue, Value *Step,
- const InductionDescriptor &ID) {
+static Value *
+emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
+ Value *Step,
+ InductionDescriptor::InductionKind InductionKind,
+ const BinaryOperator *InductionBinOp) {
Type *StepTy = Step->getType();
Value *CastedIndex = StepTy->isIntegerTy()
? B.CreateSExtOrTrunc(Index, StepTy)
@@ -2446,7 +2350,7 @@ static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
return B.CreateMul(X, Y);
};
- switch (ID.getKind()) {
+ switch (InductionKind) {
case InductionDescriptor::IK_IntInduction: {
assert(!isa<VectorType>(Index->getType()) &&
"Vector indices not supported for integer inductions yet");
@@ -2464,7 +2368,6 @@ static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
assert(!isa<VectorType>(Index->getType()) &&
"Vector indices not supported for FP inductions yet");
assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
- auto InductionBinOp = ID.getInductionBinOp();
assert(InductionBinOp &&
(InductionBinOp->getOpcode() == Instruction::FAdd ||
InductionBinOp->getOpcode() == Instruction::FSub) &&
@@ -2524,17 +2427,6 @@ static bool isIndvarOverflowCheckKnownFalse(
return false;
}
-void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def,
- const VPIteration &Instance,
- VPTransformState &State) {
- Value *ScalarInst = State.get(Def, Instance);
- Value *VectorValue = State.get(Def, Instance.Part);
- VectorValue = Builder.CreateInsertElement(
- VectorValue, ScalarInst,
- Instance.Lane.getAsRuntimeExpr(State.Builder, VF));
- State.set(Def, VectorValue, Instance.Part);
-}
-
// Return whether we allow using masked interleave-groups (for dealing with
// strided loads/stores that reside in predicated blocks, or for dealing
// with gaps).
@@ -2612,7 +2504,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
for (unsigned Part = 0; Part < UF; Part++) {
Value *AddrPart = State.get(Addr, VPIteration(Part, 0));
- State.setDebugLocFromInst(AddrPart);
+ if (auto *I = dyn_cast<Instruction>(AddrPart))
+ State.setDebugLocFrom(I->getDebugLoc());
// Notice current instruction could be any index. Need to adjust the address
// to the member of index 0.
@@ -2630,14 +2523,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts()))
InBounds = gep->isInBounds();
AddrPart = Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds);
-
- // Cast to the vector pointer type.
- unsigned AddressSpace = AddrPart->getType()->getPointerAddressSpace();
- Type *PtrTy = VecTy->getPointerTo(AddressSpace);
- AddrParts.push_back(Builder.CreateBitCast(AddrPart, PtrTy));
+ AddrParts.push_back(AddrPart);
}
- State.setDebugLocFromInst(Instr);
+ State.setDebugLocFrom(Instr->getDebugLoc());
Value *PoisonVec = PoisonValue::get(VecTy);
auto CreateGroupMask = [this, &BlockInMask, &State, &InterleaveFactor](
@@ -2835,13 +2724,20 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
bool IsVoidRetTy = Instr->getType()->isVoidTy();
Instruction *Cloned = Instr->clone();
- if (!IsVoidRetTy)
+ if (!IsVoidRetTy) {
Cloned->setName(Instr->getName() + ".cloned");
+#if !defined(NDEBUG)
+ // Verify that VPlan type inference results agree with the type of the
+ // generated values.
+ assert(State.TypeAnalysis.inferScalarType(RepRecipe) == Cloned->getType() &&
+ "inferred type and type from generated instructions do not match");
+#endif
+ }
RepRecipe->setFlags(Cloned);
- if (Instr->getDebugLoc())
- State.setDebugLocFromInst(Instr);
+ if (auto DL = Instr->getDebugLoc())
+ State.setDebugLocFrom(DL);
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
@@ -3019,9 +2915,11 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
// dominator of the exit blocks.
DT->changeImmediateDominator(LoopExitBlock, TCCheckBlock);
- ReplaceInstWithInst(
- TCCheckBlock->getTerminator(),
- BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters));
+ BranchInst &BI =
+ *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
+ if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
+ setBranchWeights(BI, MinItersBypassWeights);
+ ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
LoopBypassBlocks.push_back(TCCheckBlock);
}
@@ -3151,15 +3049,17 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
- EndValue =
- emitTransformedIndex(B, VectorTripCount, II.getStartValue(), Step, II);
+ EndValue = emitTransformedIndex(B, VectorTripCount, II.getStartValue(),
+ Step, II.getKind(), II.getInductionBinOp());
EndValue->setName("ind.end");
// Compute the end value for the additional bypass (if applicable).
if (AdditionalBypass.first) {
- B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt()));
- EndValueFromAdditionalBypass = emitTransformedIndex(
- B, AdditionalBypass.second, II.getStartValue(), Step, II);
+ B.SetInsertPoint(AdditionalBypass.first,
+ AdditionalBypass.first->getFirstInsertionPt());
+ EndValueFromAdditionalBypass =
+ emitTransformedIndex(B, AdditionalBypass.second, II.getStartValue(),
+ Step, II.getKind(), II.getInductionBinOp());
EndValueFromAdditionalBypass->setName("ind.end");
}
}
@@ -3240,16 +3140,25 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
// 3) Otherwise, construct a runtime check.
if (!Cost->requiresScalarEpilogue(VF.isVector()) &&
!Cost->foldTailByMasking()) {
- Instruction *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
- Count, VectorTripCount, "cmp.n",
- LoopMiddleBlock->getTerminator());
-
// Here we use the same DebugLoc as the scalar loop latch terminator instead
// of the corresponding compare because they may have ended up with
// different line numbers and we want to avoid awkward line stepping while
// debugging. Eg. if the compare has got a line number inside the loop.
- CmpN->setDebugLoc(ScalarLatchTerm->getDebugLoc());
- cast<BranchInst>(LoopMiddleBlock->getTerminator())->setCondition(CmpN);
+ // TODO: At the moment, CreateICmpEQ will simplify conditions with constant
+ // operands. Perform simplification directly on VPlan once the branch is
+ // modeled there.
+ IRBuilder<> B(LoopMiddleBlock->getTerminator());
+ B.SetCurrentDebugLocation(ScalarLatchTerm->getDebugLoc());
+ Value *CmpN = B.CreateICmpEQ(Count, VectorTripCount, "cmp.n");
+ BranchInst &BI = *cast<BranchInst>(LoopMiddleBlock->getTerminator());
+ BI.setCondition(CmpN);
+ if (hasBranchWeightMD(*ScalarLatchTerm)) {
+ // Assume that `Count % VectorTripCount` is equally distributed.
+ unsigned TripCount = UF * VF.getKnownMinValue();
+ assert(TripCount > 0 && "trip count should not be zero");
+ const uint32_t Weights[] = {1, TripCount - 1};
+ setBranchWeights(BI, Weights);
+ }
}
#ifdef EXPENSIVE_CHECKS
@@ -3373,7 +3282,8 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
: State.get(StepVPV, {0, 0});
Value *Escape =
- emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, II);
+ emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step,
+ II.getKind(), II.getInductionBinOp());
Escape->setName("ind.escape");
MissingVals[UI] = Escape;
}
@@ -3445,76 +3355,33 @@ static void cse(BasicBlock *BB) {
}
}
-InstructionCost LoopVectorizationCostModel::getVectorCallCost(
- CallInst *CI, ElementCount VF, Function **Variant, bool *NeedsMask) const {
- Function *F = CI->getCalledFunction();
- Type *ScalarRetTy = CI->getType();
- SmallVector<Type *, 4> Tys, ScalarTys;
- bool MaskRequired = Legal->isMaskRequired(CI);
- for (auto &ArgOp : CI->args())
- ScalarTys.push_back(ArgOp->getType());
+InstructionCost
+LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
+ ElementCount VF) const {
+ // We only need to calculate a cost if the VF is scalar; for actual vectors
+ // we should already have a pre-calculated cost at each VF.
+ if (!VF.isScalar())
+ return CallWideningDecisions.at(std::make_pair(CI, VF)).Cost;
- // Estimate cost of scalarized vector call. The source operands are assumed
- // to be vectors, so we need to extract individual elements from there,
- // execute VF scalar calls, and then gather the result into the vector return
- // value.
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- InstructionCost ScalarCallCost =
- TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, CostKind);
- if (VF.isScalar())
- return ScalarCallCost;
-
- // Compute corresponding vector type for return value and arguments.
- Type *RetTy = ToVectorTy(ScalarRetTy, VF);
- for (Type *ScalarTy : ScalarTys)
- Tys.push_back(ToVectorTy(ScalarTy, VF));
-
- // Compute costs of unpacking argument values for the scalar calls and
- // packing the return values to a vector.
- InstructionCost ScalarizationCost =
- getScalarizationOverhead(CI, VF, CostKind);
+ Type *RetTy = CI->getType();
+ if (RecurrenceDescriptor::isFMulAddIntrinsic(CI))
+ if (auto RedCost = getReductionPatternCost(CI, VF, RetTy, CostKind))
+ return *RedCost;
- InstructionCost Cost =
- ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost;
-
- // If we can't emit a vector call for this function, then the currently found
- // cost is the cost we need to return.
- InstructionCost MaskCost = 0;
- VFShape Shape = VFShape::get(*CI, VF, MaskRequired);
- if (NeedsMask)
- *NeedsMask = MaskRequired;
- Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
- // If we want an unmasked vector function but can't find one matching the VF,
- // maybe we can find vector function that does use a mask and synthesize
- // an all-true mask.
- if (!VecFunc && !MaskRequired) {
- Shape = VFShape::get(*CI, VF, /*HasGlobalPred=*/true);
- VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
- // If we found one, add in the cost of creating a mask
- if (VecFunc) {
- if (NeedsMask)
- *NeedsMask = true;
- MaskCost = TTI.getShuffleCost(
- TargetTransformInfo::SK_Broadcast,
- VectorType::get(
- IntegerType::getInt1Ty(VecFunc->getFunctionType()->getContext()),
- VF));
- }
- }
+ SmallVector<Type *, 4> Tys;
+ for (auto &ArgOp : CI->args())
+ Tys.push_back(ArgOp->getType());
- // We don't support masked function calls yet, but we can scalarize a
- // masked call with branches (unless VF is scalable).
- if (!TLI || CI->isNoBuiltin() || !VecFunc)
- return VF.isScalable() ? InstructionCost::getInvalid() : Cost;
+ InstructionCost ScalarCallCost =
+ TTI.getCallInstrCost(CI->getCalledFunction(), RetTy, Tys, CostKind);
- // If the corresponding vector cost is cheaper, return its cost.
- InstructionCost VectorCallCost =
- TTI.getCallInstrCost(nullptr, RetTy, Tys, CostKind) + MaskCost;
- if (VectorCallCost < Cost) {
- *Variant = VecFunc;
- Cost = VectorCallCost;
+ // If this is an intrinsic we may have a lower cost for it.
+ if (getVectorIntrinsicIDForCall(CI, TLI)) {
+ InstructionCost IntrinsicCost = getVectorIntrinsicCost(CI, VF);
+ return std::min(ScalarCallCost, IntrinsicCost);
}
- return Cost;
+ return ScalarCallCost;
}
static Type *MaybeVectorizeType(Type *Elt, ElementCount VF) {
@@ -3558,146 +3425,8 @@ static Type *largestIntegerVectorType(Type *T1, Type *T2) {
return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2;
}
-void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) {
- // For every instruction `I` in MinBWs, truncate the operands, create a
- // truncated version of `I` and reextend its result. InstCombine runs
- // later and will remove any ext/trunc pairs.
- SmallPtrSet<Value *, 4> Erased;
- for (const auto &KV : Cost->getMinimalBitwidths()) {
- // If the value wasn't vectorized, we must maintain the original scalar
- // type. The absence of the value from State indicates that it
- // wasn't vectorized.
- // FIXME: Should not rely on getVPValue at this point.
- VPValue *Def = State.Plan->getVPValue(KV.first, true);
- if (!State.hasAnyVectorValue(Def))
- continue;
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *I = State.get(Def, Part);
- if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
- continue;
- Type *OriginalTy = I->getType();
- Type *ScalarTruncatedTy =
- IntegerType::get(OriginalTy->getContext(), KV.second);
- auto *TruncatedTy = VectorType::get(
- ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getElementCount());
- if (TruncatedTy == OriginalTy)
- continue;
-
- IRBuilder<> B(cast<Instruction>(I));
- auto ShrinkOperand = [&](Value *V) -> Value * {
- if (auto *ZI = dyn_cast<ZExtInst>(V))
- if (ZI->getSrcTy() == TruncatedTy)
- return ZI->getOperand(0);
- return B.CreateZExtOrTrunc(V, TruncatedTy);
- };
-
- // The actual instruction modification depends on the instruction type,
- // unfortunately.
- Value *NewI = nullptr;
- if (auto *BO = dyn_cast<BinaryOperator>(I)) {
- NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)),
- ShrinkOperand(BO->getOperand(1)));
-
- // Any wrapping introduced by shrinking this operation shouldn't be
- // considered undefined behavior. So, we can't unconditionally copy
- // arithmetic wrapping flags to NewI.
- cast<BinaryOperator>(NewI)->copyIRFlags(I, /*IncludeWrapFlags=*/false);
- } else if (auto *CI = dyn_cast<ICmpInst>(I)) {
- NewI =
- B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)),
- ShrinkOperand(CI->getOperand(1)));
- } else if (auto *SI = dyn_cast<SelectInst>(I)) {
- NewI = B.CreateSelect(SI->getCondition(),
- ShrinkOperand(SI->getTrueValue()),
- ShrinkOperand(SI->getFalseValue()));
- } else if (auto *CI = dyn_cast<CastInst>(I)) {
- switch (CI->getOpcode()) {
- default:
- llvm_unreachable("Unhandled cast!");
- case Instruction::Trunc:
- NewI = ShrinkOperand(CI->getOperand(0));
- break;
- case Instruction::SExt:
- NewI = B.CreateSExtOrTrunc(
- CI->getOperand(0),
- smallestIntegerVectorType(OriginalTy, TruncatedTy));
- break;
- case Instruction::ZExt:
- NewI = B.CreateZExtOrTrunc(
- CI->getOperand(0),
- smallestIntegerVectorType(OriginalTy, TruncatedTy));
- break;
- }
- } else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {
- auto Elements0 =
- cast<VectorType>(SI->getOperand(0)->getType())->getElementCount();
- auto *O0 = B.CreateZExtOrTrunc(
- SI->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements0));
- auto Elements1 =
- cast<VectorType>(SI->getOperand(1)->getType())->getElementCount();
- auto *O1 = B.CreateZExtOrTrunc(
- SI->getOperand(1), VectorType::get(ScalarTruncatedTy, Elements1));
-
- NewI = B.CreateShuffleVector(O0, O1, SI->getShuffleMask());
- } else if (isa<LoadInst>(I) || isa<PHINode>(I)) {
- // Don't do anything with the operands, just extend the result.
- continue;
- } else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
- auto Elements =
- cast<VectorType>(IE->getOperand(0)->getType())->getElementCount();
- auto *O0 = B.CreateZExtOrTrunc(
- IE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
- auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
- NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
- } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
- auto Elements =
- cast<VectorType>(EE->getOperand(0)->getType())->getElementCount();
- auto *O0 = B.CreateZExtOrTrunc(
- EE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
- NewI = B.CreateExtractElement(O0, EE->getOperand(2));
- } else {
- // If we don't know what to do, be conservative and don't do anything.
- continue;
- }
-
- // Lastly, extend the result.
- NewI->takeName(cast<Instruction>(I));
- Value *Res = B.CreateZExtOrTrunc(NewI, OriginalTy);
- I->replaceAllUsesWith(Res);
- cast<Instruction>(I)->eraseFromParent();
- Erased.insert(I);
- State.reset(Def, Res, Part);
- }
- }
-
- // We'll have created a bunch of ZExts that are now parentless. Clean up.
- for (const auto &KV : Cost->getMinimalBitwidths()) {
- // If the value wasn't vectorized, we must maintain the original scalar
- // type. The absence of the value from State indicates that it
- // wasn't vectorized.
- // FIXME: Should not rely on getVPValue at this point.
- VPValue *Def = State.Plan->getVPValue(KV.first, true);
- if (!State.hasAnyVectorValue(Def))
- continue;
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *I = State.get(Def, Part);
- ZExtInst *Inst = dyn_cast<ZExtInst>(I);
- if (Inst && Inst->use_empty()) {
- Value *NewI = Inst->getOperand(0);
- Inst->eraseFromParent();
- State.reset(Def, NewI, Part);
- }
- }
- }
-}
-
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
VPlan &Plan) {
- // Insert truncates and extends for any truncated instructions as hints to
- // InstCombine.
- if (VF.isVector())
- truncateToMinimalBitwidths(State);
-
// Fix widened non-induction PHIs by setting up the PHI operands.
if (EnableVPlanNativePath)
fixNonInductionPHIs(Plan, State);
@@ -3710,6 +3439,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
+ PSE.getSE()->forgetBlockAndLoopDispositions();
// After vectorization, the exit blocks of the original loop will have
// additional predecessors. Invalidate SCEVs for the exit phis in case SE
@@ -3718,7 +3448,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
OrigLoop->getExitBlocks(ExitBlocks);
for (BasicBlock *Exit : ExitBlocks)
for (PHINode &PN : Exit->phis())
- PSE.getSE()->forgetValue(&PN);
+ PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
@@ -3744,7 +3474,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
// Fix LCSSA phis not already fixed earlier. Extracts may need to be generated
// in the exit block, so update the builder.
- State.Builder.SetInsertPoint(State.CFG.ExitBB->getFirstNonPHI());
+ State.Builder.SetInsertPoint(State.CFG.ExitBB,
+ State.CFG.ExitBB->getFirstNonPHIIt());
for (const auto &KV : Plan.getLiveOuts())
KV.second->fixPhi(Plan, State);
@@ -3782,40 +3513,10 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
VPBasicBlock *Header =
State.Plan->getVectorLoopRegion()->getEntryBasicBlock();
- // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
- // sank outside of the loop would keep the same order as they had in the
- // original loop.
- SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
for (VPRecipeBase &R : Header->phis()) {
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
- ReductionPHIList.emplace_back(ReductionPhi);
+ fixReduction(ReductionPhi, State);
}
- stable_sort(ReductionPHIList, [this](const VPReductionPHIRecipe *R1,
- const VPReductionPHIRecipe *R2) {
- auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore;
- auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore;
-
- // If neither of the recipes has an intermediate store, keep the order the
- // same.
- if (!IS1 && !IS2)
- return false;
-
- // If only one of the recipes has an intermediate store, then move it
- // towards the beginning of the list.
- if (IS1 && !IS2)
- return true;
-
- if (!IS1 && IS2)
- return false;
-
- // If both recipes have an intermediate store, then the recipe with the
- // later store should be processed earlier. So it should go to the beginning
- // of the list.
- return DT->dominates(IS2, IS1);
- });
-
- for (VPReductionPHIRecipe *ReductionPhi : ReductionPHIList)
- fixReduction(ReductionPhi, State);
for (VPRecipeBase &R : Header->phis()) {
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
@@ -3929,7 +3630,7 @@ void InnerLoopVectorizer::fixFixedOrderRecurrence(
}
// Fix the initial value of the original recurrence in the scalar loop.
- Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
+ Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingValue());
auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init");
auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue();
@@ -3953,90 +3654,56 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
RecurKind RK = RdxDesc.getRecurrenceKind();
TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
- State.setDebugLocFromInst(ReductionStartValue);
+ if (auto *I = dyn_cast<Instruction>(&*ReductionStartValue))
+ State.setDebugLocFrom(I->getDebugLoc());
VPValue *LoopExitInstDef = PhiR->getBackedgeValue();
- // This is the vector-clone of the value that leaves the loop.
- Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
// Before each round, move the insertion point right between
// the PHIs and the values we are going to write.
// This allows us to write both PHINodes and the extractelement
// instructions.
- Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
+ Builder.SetInsertPoint(LoopMiddleBlock,
+ LoopMiddleBlock->getFirstInsertionPt());
- State.setDebugLocFromInst(LoopExitInst);
+ State.setDebugLocFrom(LoopExitInst->getDebugLoc());
Type *PhiTy = OrigPhi->getType();
-
- VPBasicBlock *LatchVPBB =
- PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock();
- BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB];
// If tail is folded by masking, the vector value to leave the loop should be
// a Select choosing between the vectorized LoopExitInst and vectorized Phi,
// instead of the former. For an inloop reduction the reduction will already
// be predicated, and does not need to be handled here.
if (Cost->foldTailByMasking() && !PhiR->isInLoop()) {
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *VecLoopExitInst = State.get(LoopExitInstDef, Part);
- SelectInst *Sel = nullptr;
- for (User *U : VecLoopExitInst->users()) {
- if (isa<SelectInst>(U)) {
- assert(!Sel && "Reduction exit feeding two selects");
- Sel = cast<SelectInst>(U);
- } else
- assert(isa<PHINode>(U) && "Reduction exit must feed Phi's or select");
- }
- assert(Sel && "Reduction exit feeds no select");
- State.reset(LoopExitInstDef, Sel, Part);
-
- if (isa<FPMathOperator>(Sel))
- Sel->setFastMathFlags(RdxDesc.getFastMathFlags());
-
- // If the target can create a predicated operator for the reduction at no
- // extra cost in the loop (for example a predicated vadd), it can be
- // cheaper for the select to remain in the loop than be sunk out of it,
- // and so use the select value for the phi instead of the old
- // LoopExitValue.
- if (PreferPredicatedReductionSelect ||
- TTI->preferPredicatedReductionSelect(
- RdxDesc.getOpcode(), PhiTy,
- TargetTransformInfo::ReductionFlags())) {
- auto *VecRdxPhi =
- cast<PHINode>(State.get(PhiR, Part));
- VecRdxPhi->setIncomingValueForBlock(VectorLoopLatch, Sel);
+ VPValue *Def = nullptr;
+ for (VPUser *U : LoopExitInstDef->users()) {
+ auto *S = dyn_cast<VPInstruction>(U);
+ if (S && S->getOpcode() == Instruction::Select) {
+ Def = S;
+ break;
}
}
+ if (Def)
+ LoopExitInstDef = Def;
}
+ VectorParts RdxParts(UF);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ RdxParts[Part] = State.get(LoopExitInstDef, Part);
+
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
- assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
+ Builder.SetInsertPoint(LoopMiddleBlock,
+ LoopMiddleBlock->getFirstInsertionPt());
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
- Builder.SetInsertPoint(VectorLoopLatch->getTerminator());
- VectorParts RdxParts(UF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- RdxParts[Part] = State.get(LoopExitInstDef, Part);
- Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
- Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
- : Builder.CreateZExt(Trunc, VecTy);
- for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users()))
- if (U != Trunc) {
- U->replaceUsesOfWith(RdxParts[Part], Extnd);
- RdxParts[Part] = Extnd;
- }
- }
- Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
for (unsigned Part = 0; Part < UF; ++Part) {
RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
- State.reset(LoopExitInstDef, RdxParts[Part], Part);
}
}
// Reduce all of the unrolled parts into a single vector.
- Value *ReducedPartRdx = State.get(LoopExitInstDef, 0);
+ Value *ReducedPartRdx = RdxParts[0];
unsigned Op = RecurrenceDescriptor::getOpcode(RK);
// The middle block terminator has already been assigned a DebugLoc here (the
@@ -4046,21 +3713,21 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// conditional branch, and (c) other passes may add new predecessors which
// terminate on this line. This is the easiest way to ensure we don't
// accidentally cause an extra step back into the loop while debugging.
- State.setDebugLocFromInst(LoopMiddleBlock->getTerminator());
+ State.setDebugLocFrom(LoopMiddleBlock->getTerminator()->getDebugLoc());
if (PhiR->isOrdered())
- ReducedPartRdx = State.get(LoopExitInstDef, UF - 1);
+ ReducedPartRdx = RdxParts[UF - 1];
else {
// Floating-point operations should have some FMF to enable the reduction.
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
for (unsigned Part = 1; Part < UF; ++Part) {
- Value *RdxPart = State.get(LoopExitInstDef, Part);
- if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
+ Value *RdxPart = RdxParts[Part];
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
- } else if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
- ReducedPartRdx = createSelectCmpOp(Builder, ReductionStartValue, RK,
- ReducedPartRdx, RdxPart);
+ else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
+ ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
+ ReducedPartRdx, RdxPart);
else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
@@ -4070,7 +3737,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// target reduction in the loop using a Reduction recipe.
if (VF.isVector() && !PhiR->isInLoop()) {
ReducedPartRdx =
- createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, OrigPhi);
+ createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
if (PhiTy != RdxDesc.getRecurrenceType())
@@ -4107,7 +3774,8 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// inside the loop, create the final store here.
if (StoreInst *SI = RdxDesc.IntermediateStore) {
StoreInst *NewSI =
- Builder.CreateStore(ReducedPartRdx, SI->getPointerOperand());
+ Builder.CreateAlignedStore(ReducedPartRdx, SI->getPointerOperand(),
+ SI->getAlign());
propagateMetadata(NewSI, SI);
// If the reduction value is used in other places,
@@ -4436,7 +4104,10 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
default:
return true;
case Instruction::Call:
- return !VFDatabase::hasMaskedVariant(*(cast<CallInst>(I)), VF);
+ if (VF.isScalar())
+ return true;
+ return CallWideningDecisions.at(std::make_pair(cast<CallInst>(I), VF))
+ .Kind == CM_Scalarize;
case Instruction::Load:
case Instruction::Store: {
auto *Ptr = getLoadStorePointerOperand(I);
@@ -4988,7 +4659,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
}
FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
- unsigned ConstTripCount, ElementCount UserVF, bool FoldTailByMasking) {
+ unsigned MaxTripCount, ElementCount UserVF, bool FoldTailByMasking) {
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
unsigned SmallestType, WidestType;
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
@@ -5076,12 +4747,12 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
FixedScalableVFPair Result(ElementCount::getFixed(1),
ElementCount::getScalable(0));
if (auto MaxVF =
- getMaximizedVFForTarget(ConstTripCount, SmallestType, WidestType,
+ getMaximizedVFForTarget(MaxTripCount, SmallestType, WidestType,
MaxSafeFixedVF, FoldTailByMasking))
Result.FixedVF = MaxVF;
if (auto MaxVF =
- getMaximizedVFForTarget(ConstTripCount, SmallestType, WidestType,
+ getMaximizedVFForTarget(MaxTripCount, SmallestType, WidestType,
MaxSafeScalableVF, FoldTailByMasking))
if (MaxVF.isScalable()) {
Result.ScalableVF = MaxVF;
@@ -5105,6 +4776,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
}
unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
+ unsigned MaxTC = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop);
LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
if (TC == 1) {
reportVectorizationFailure("Single iteration (non) loop",
@@ -5115,7 +4787,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
switch (ScalarEpilogueStatus) {
case CM_ScalarEpilogueAllowed:
- return computeFeasibleMaxVF(TC, UserVF, false);
+ return computeFeasibleMaxVF(MaxTC, UserVF, false);
case CM_ScalarEpilogueNotAllowedUsePredicate:
[[fallthrough]];
case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -5153,7 +4825,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
"scalar epilogue instead.\n");
ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
- return computeFeasibleMaxVF(TC, UserVF, false);
+ return computeFeasibleMaxVF(MaxTC, UserVF, false);
}
return FixedScalableVFPair::getNone();
}
@@ -5170,7 +4842,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
}
- FixedScalableVFPair MaxFactors = computeFeasibleMaxVF(TC, UserVF, true);
+ FixedScalableVFPair MaxFactors = computeFeasibleMaxVF(MaxTC, UserVF, true);
// Avoid tail folding if the trip count is known to be a multiple of any VF
// we choose.
@@ -5246,7 +4918,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
}
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
- unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType,
+ unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
ElementCount MaxSafeVF, bool FoldTailByMasking) {
bool ComputeScalableMaxVF = MaxSafeVF.isScalable();
const TypeSize WidestRegister = TTI.getRegisterBitWidth(
@@ -5285,31 +4957,35 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
}
// When a scalar epilogue is required, at least one iteration of the scalar
- // loop has to execute. Adjust ConstTripCount accordingly to avoid picking a
+ // loop has to execute. Adjust MaxTripCount accordingly to avoid picking a
// max VF that results in a dead vector loop.
- if (ConstTripCount > 0 && requiresScalarEpilogue(true))
- ConstTripCount -= 1;
-
- if (ConstTripCount && ConstTripCount <= WidestRegisterMinEC &&
- (!FoldTailByMasking || isPowerOf2_32(ConstTripCount))) {
- // If loop trip count (TC) is known at compile time there is no point in
- // choosing VF greater than TC (as done in the loop below). Select maximum
- // power of two which doesn't exceed TC.
- // If MaxVectorElementCount is scalable, we only fall back on a fixed VF
- // when the TC is less than or equal to the known number of lanes.
- auto ClampedConstTripCount = llvm::bit_floor(ConstTripCount);
+ if (MaxTripCount > 0 && requiresScalarEpilogue(true))
+ MaxTripCount -= 1;
+
+ if (MaxTripCount && MaxTripCount <= WidestRegisterMinEC &&
+ (!FoldTailByMasking || isPowerOf2_32(MaxTripCount))) {
+ // If upper bound loop trip count (TC) is known at compile time there is no
+ // point in choosing VF greater than TC (as done in the loop below). Select
+ // maximum power of two which doesn't exceed TC. If MaxVectorElementCount is
+ // scalable, we only fall back on a fixed VF when the TC is less than or
+ // equal to the known number of lanes.
+ auto ClampedUpperTripCount = llvm::bit_floor(MaxTripCount);
LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to maximum power of two not "
"exceeding the constant trip count: "
- << ClampedConstTripCount << "\n");
- return ElementCount::getFixed(ClampedConstTripCount);
+ << ClampedUpperTripCount << "\n");
+ return ElementCount::get(
+ ClampedUpperTripCount,
+ FoldTailByMasking ? MaxVectorElementCount.isScalable() : false);
}
TargetTransformInfo::RegisterKind RegKind =
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
: TargetTransformInfo::RGK_FixedWidthVector;
ElementCount MaxVF = MaxVectorElementCount;
- if (MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
- TTI.shouldMaximizeVectorBandwidth(RegKind))) {
+ if (MaximizeBandwidth ||
+ (MaximizeBandwidth.getNumOccurrences() == 0 &&
+ (TTI.shouldMaximizeVectorBandwidth(RegKind) ||
+ (UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants())))) {
auto MaxVectorElementCountMaxBW = ElementCount::get(
llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
ComputeScalableMaxVF);
@@ -5981,7 +5657,7 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
HasReductions &&
any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
const RecurrenceDescriptor &RdxDesc = Reduction.second;
- return RecurrenceDescriptor::isSelectCmpRecurrenceKind(
+ return RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind());
});
if (HasSelectCmpReductions) {
@@ -6149,6 +5825,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
if (ValuesToIgnore.count(I))
continue;
+ collectInLoopReductions();
+
// For each VF find the maximum usage of registers.
for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
// Count the number of registers used, per register class, given all open
@@ -6668,10 +6346,11 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
std::optional<InstructionCost>
LoopVectorizationCostModel::getReductionPatternCost(
- Instruction *I, ElementCount VF, Type *Ty, TTI::TargetCostKind CostKind) {
+ Instruction *I, ElementCount VF, Type *Ty,
+ TTI::TargetCostKind CostKind) const {
using namespace llvm::PatternMatch;
// Early exit for no inloop reductions
- if (InLoopReductionChains.empty() || VF.isScalar() || !isa<VectorType>(Ty))
+ if (InLoopReductions.empty() || VF.isScalar() || !isa<VectorType>(Ty))
return std::nullopt;
auto *VectorTy = cast<VectorType>(Ty);
@@ -6706,10 +6385,10 @@ LoopVectorizationCostModel::getReductionPatternCost(
// Find the reduction this chain is a part of and calculate the basic cost of
// the reduction on its own.
- Instruction *LastChain = InLoopReductionImmediateChains[RetI];
+ Instruction *LastChain = InLoopReductionImmediateChains.at(RetI);
Instruction *ReductionPhi = LastChain;
while (!isa<PHINode>(ReductionPhi))
- ReductionPhi = InLoopReductionImmediateChains[ReductionPhi];
+ ReductionPhi = InLoopReductionImmediateChains.at(ReductionPhi);
const RecurrenceDescriptor &RdxDesc =
Legal->getReductionVars().find(cast<PHINode>(ReductionPhi))->second;
@@ -7127,6 +6806,168 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
}
}
+void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
+ assert(!VF.isScalar() &&
+ "Trying to set a vectorization decision for a scalar VF");
+
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ // For each instruction in the old loop.
+ for (Instruction &I : *BB) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+
+ if (!CI)
+ continue;
+
+ InstructionCost ScalarCost = InstructionCost::getInvalid();
+ InstructionCost VectorCost = InstructionCost::getInvalid();
+ InstructionCost IntrinsicCost = InstructionCost::getInvalid();
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+
+ Function *ScalarFunc = CI->getCalledFunction();
+ Type *ScalarRetTy = CI->getType();
+ SmallVector<Type *, 4> Tys, ScalarTys;
+ bool MaskRequired = Legal->isMaskRequired(CI);
+ for (auto &ArgOp : CI->args())
+ ScalarTys.push_back(ArgOp->getType());
+
+ // Compute corresponding vector type for return value and arguments.
+ Type *RetTy = ToVectorTy(ScalarRetTy, VF);
+ for (Type *ScalarTy : ScalarTys)
+ Tys.push_back(ToVectorTy(ScalarTy, VF));
+
+ // An in-loop reduction using an fmuladd intrinsic is a special case;
+ // we don't want the normal cost for that intrinsic.
+ if (RecurrenceDescriptor::isFMulAddIntrinsic(CI))
+ if (auto RedCost = getReductionPatternCost(CI, VF, RetTy, CostKind)) {
+ setCallWideningDecision(CI, VF, CM_IntrinsicCall, nullptr,
+ getVectorIntrinsicIDForCall(CI, TLI),
+ std::nullopt, *RedCost);
+ continue;
+ }
+
+ // Estimate cost of scalarized vector call. The source operands are
+ // assumed to be vectors, so we need to extract individual elements from
+ // there, execute VF scalar calls, and then gather the result into the
+ // vector return value.
+ InstructionCost ScalarCallCost =
+ TTI.getCallInstrCost(ScalarFunc, ScalarRetTy, ScalarTys, CostKind);
+
+ // Compute costs of unpacking argument values for the scalar calls and
+ // packing the return values to a vector.
+ InstructionCost ScalarizationCost =
+ getScalarizationOverhead(CI, VF, CostKind);
+
+ ScalarCost = ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost;
+
+ // Find the cost of vectorizing the call, if we can find a suitable
+ // vector variant of the function.
+ bool UsesMask = false;
+ VFInfo FuncInfo;
+ Function *VecFunc = nullptr;
+ // Search through any available variants for one we can use at this VF.
+ for (VFInfo &Info : VFDatabase::getMappings(*CI)) {
+ // Must match requested VF.
+ if (Info.Shape.VF != VF)
+ continue;
+
+ // Must take a mask argument if one is required
+ if (MaskRequired && !Info.isMasked())
+ continue;
+
+ // Check that all parameter kinds are supported
+ bool ParamsOk = true;
+ for (VFParameter Param : Info.Shape.Parameters) {
+ switch (Param.ParamKind) {
+ case VFParamKind::Vector:
+ break;
+ case VFParamKind::OMP_Uniform: {
+ Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+ // Make sure the scalar parameter in the loop is invariant.
+ if (!PSE.getSE()->isLoopInvariant(PSE.getSCEV(ScalarParam),
+ TheLoop))
+ ParamsOk = false;
+ break;
+ }
+ case VFParamKind::OMP_Linear: {
+ Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+ // Find the stride for the scalar parameter in this loop and see if
+ // it matches the stride for the variant.
+ // TODO: do we need to figure out the cost of an extract to get the
+ // first lane? Or do we hope that it will be folded away?
+ ScalarEvolution *SE = PSE.getSE();
+ const auto *SAR =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(ScalarParam));
+
+ if (!SAR || SAR->getLoop() != TheLoop) {
+ ParamsOk = false;
+ break;
+ }
+
+ const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(SAR->getStepRecurrence(*SE));
+
+ if (!Step ||
+ Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
+ ParamsOk = false;
+
+ break;
+ }
+ case VFParamKind::GlobalPredicate:
+ UsesMask = true;
+ break;
+ default:
+ ParamsOk = false;
+ break;
+ }
+ }
+
+ if (!ParamsOk)
+ continue;
+
+ // Found a suitable candidate, stop here.
+ VecFunc = CI->getModule()->getFunction(Info.VectorName);
+ FuncInfo = Info;
+ break;
+ }
+
+ // Add in the cost of synthesizing a mask if one wasn't required.
+ InstructionCost MaskCost = 0;
+ if (VecFunc && UsesMask && !MaskRequired)
+ MaskCost = TTI.getShuffleCost(
+ TargetTransformInfo::SK_Broadcast,
+ VectorType::get(IntegerType::getInt1Ty(
+ VecFunc->getFunctionType()->getContext()),
+ VF));
+
+ if (TLI && VecFunc && !CI->isNoBuiltin())
+ VectorCost =
+ TTI.getCallInstrCost(nullptr, RetTy, Tys, CostKind) + MaskCost;
+
+ // Find the cost of an intrinsic; some targets may have instructions that
+ // perform the operation without needing an actual call.
+ Intrinsic::ID IID = getVectorIntrinsicIDForCall(CI, TLI);
+ if (IID != Intrinsic::not_intrinsic)
+ IntrinsicCost = getVectorIntrinsicCost(CI, VF);
+
+ InstructionCost Cost = ScalarCost;
+ InstWidening Decision = CM_Scalarize;
+
+ if (VectorCost <= Cost) {
+ Cost = VectorCost;
+ Decision = CM_VectorCall;
+ }
+
+ if (IntrinsicCost <= Cost) {
+ Cost = IntrinsicCost;
+ Decision = CM_IntrinsicCall;
+ }
+
+ setCallWideningDecision(CI, VF, Decision, VecFunc, IID,
+ FuncInfo.getParamIndexForOptionalMask(), Cost);
+ }
+ }
+}
+
InstructionCost
LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
Type *&VectorTy) {
@@ -7156,7 +6997,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
// With the exception of GEPs and PHIs, after scalarization there should
// only be one copy of the instruction generated in the loop. This is
// because the VF is either 1, or any instructions that need scalarizing
- // have already been dealt with by the the time we get here. As a result,
+ // have already been dealt with by the time we get here. As a result,
// it means we don't have to multiply the instruction cost by VF.
assert(I->getOpcode() == Instruction::GetElementPtr ||
I->getOpcode() == Instruction::PHI ||
@@ -7384,6 +7225,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
return TTI::CastContextHint::Reversed;
case LoopVectorizationCostModel::CM_Unknown:
llvm_unreachable("Instr did not go through cost modelling?");
+ case LoopVectorizationCostModel::CM_VectorCall:
+ case LoopVectorizationCostModel::CM_IntrinsicCall:
+ llvm_unreachable_internal("Instr has invalid widening decision");
}
llvm_unreachable("Unhandled case!");
@@ -7441,19 +7285,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
}
- case Instruction::Call: {
- if (RecurrenceDescriptor::isFMulAddIntrinsic(I))
- if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind))
- return *RedCost;
- Function *Variant;
- CallInst *CI = cast<CallInst>(I);
- InstructionCost CallCost = getVectorCallCost(CI, VF, &Variant);
- if (getVectorIntrinsicIDForCall(CI, TLI)) {
- InstructionCost IntrinsicCost = getVectorIntrinsicCost(CI, VF);
- return std::min(CallCost, IntrinsicCost);
- }
- return CallCost;
- }
+ case Instruction::Call:
+ return getVectorCallCost(cast<CallInst>(I), VF);
case Instruction::ExtractValue:
return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput);
case Instruction::Alloca:
@@ -7521,8 +7354,9 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
SmallVector<Instruction *, 4> ReductionOperations =
RdxDesc.getReductionOpChain(Phi, TheLoop);
bool InLoop = !ReductionOperations.empty();
+
if (InLoop) {
- InLoopReductionChains[Phi] = ReductionOperations;
+ InLoopReductions.insert(Phi);
// Add the elements to InLoopReductionImmediateChains for cost modelling.
Instruction *LastChain = Phi;
for (auto *I : ReductionOperations) {
@@ -7535,21 +7369,38 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
}
}
+VPValue *VPBuilder::createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
+ DebugLoc DL, const Twine &Name) {
+ assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
+ Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
+ return tryInsertInstruction(
+ new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
+}
+
+// This function will select a scalable VF if the target supports scalable
+// vectors and a fixed one otherwise.
// TODO: we could return a pair of values that specify the max VF and
// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
// doesn't have a cost model that can choose which plan to execute if
// more than one is generated.
-static unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
- LoopVectorizationCostModel &CM) {
+static ElementCount determineVPlanVF(const TargetTransformInfo &TTI,
+ LoopVectorizationCostModel &CM) {
unsigned WidestType;
std::tie(std::ignore, WidestType) = CM.getSmallestAndWidestTypes();
- return WidestVectorRegBits / WidestType;
+
+ TargetTransformInfo::RegisterKind RegKind =
+ TTI.enableScalableVectorization()
+ ? TargetTransformInfo::RGK_ScalableVector
+ : TargetTransformInfo::RGK_FixedWidthVector;
+
+ TypeSize RegSize = TTI.getRegisterBitWidth(RegKind);
+ unsigned N = RegSize.getKnownMinValue() / WidestType;
+ return ElementCount::get(N, RegSize.isScalable());
}
VectorizationFactor
LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
- assert(!UserVF.isScalable() && "scalable vectors not yet supported");
ElementCount VF = UserVF;
// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
@@ -7559,10 +7410,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
// If the user doesn't provide a vectorization factor, determine a
// reasonable one.
if (UserVF.isZero()) {
- VF = ElementCount::getFixed(determineVPlanVF(
- TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
- .getFixedValue(),
- CM));
+ VF = determineVPlanVF(TTI, CM);
LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
// Make sure we have a VF > 1 for stress testing.
@@ -7571,6 +7419,17 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
<< "overriding computed VF.\n");
VF = ElementCount::getFixed(4);
}
+ } else if (UserVF.isScalable() && !TTI.supportsScalableVectors() &&
+ !ForceTargetSupportsScalableVectors) {
+ LLVM_DEBUG(dbgs() << "LV: Not vectorizing. Scalable VF requested, but "
+ << "not supported by the target.\n");
+ reportVectorizationFailure(
+ "Scalable vectorization requested but not supported by the target",
+ "the scalable user-specified vectorization width for outer-loop "
+ "vectorization cannot be used because the target does not support "
+ "scalable vectors.",
+ "ScalableVFUnfeasible", ORE, OrigLoop);
+ return VectorizationFactor::Disabled();
}
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
assert(isPowerOf2_32(VF.getKnownMinValue()) &&
@@ -7624,9 +7483,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
"VF needs to be a power of two");
// Collect the instructions (and their associated costs) that will be more
// profitable to scalarize.
+ CM.collectInLoopReductions();
if (CM.selectUserVectorizationFactor(UserVF)) {
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
- CM.collectInLoopReductions();
buildVPlansWithVPRecipes(UserVF, UserVF);
if (!hasPlanWithVF(UserVF)) {
LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << UserVF
@@ -7650,6 +7509,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
ElementCount::isKnownLE(VF, MaxFactors.ScalableVF); VF *= 2)
VFCandidates.insert(VF);
+ CM.collectInLoopReductions();
for (const auto &VF : VFCandidates) {
// Collect Uniform and Scalar instructions after vectorization with VF.
CM.collectUniformsAndScalars(VF);
@@ -7660,7 +7520,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
CM.collectInstsToScalarize(VF);
}
- CM.collectInLoopReductions();
buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxFactors.FixedVF);
buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF);
@@ -7705,7 +7564,7 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
if (MD) {
const auto *S = dyn_cast<MDString>(MD->getOperand(0));
IsUnrollMetadata =
- S && S->getString().startswith("llvm.loop.unroll.disable");
+ S && S->getString().starts_with("llvm.loop.unroll.disable");
}
MDs.push_back(LoopID->getOperand(i));
}
@@ -7729,7 +7588,7 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
SCEV2ValueTy LoopVectorizationPlanner::executePlan(
ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization,
- DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
+ const DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
assert(BestVPlan.hasVF(BestVF) &&
"Trying to execute plan with unsupported VF");
assert(BestVPlan.hasUF(BestUF) &&
@@ -7745,7 +7604,8 @@ SCEV2ValueTy LoopVectorizationPlanner::executePlan(
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
// Perform the actual loop transformation.
- VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
+ VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan,
+ OrigLoop->getHeader()->getContext());
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
// before making any changes to the CFG.
@@ -7798,9 +7658,9 @@ SCEV2ValueTy LoopVectorizationPlanner::executePlan(
//===------------------------------------------------===//
// 2. Copy and widen instructions from the old loop into the new loop.
- BestVPlan.prepareToExecute(
- ILV.getTripCount(), ILV.getOrCreateVectorTripCount(nullptr),
- CanonicalIVStartValue, State, IsEpilogueVectorization);
+ BestVPlan.prepareToExecute(ILV.getTripCount(),
+ ILV.getOrCreateVectorTripCount(nullptr),
+ CanonicalIVStartValue, State);
BestVPlan.execute(&State);
@@ -7964,9 +7824,11 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
EPI.TripCount = Count;
}
- ReplaceInstWithInst(
- TCCheckBlock->getTerminator(),
- BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters));
+ BranchInst &BI =
+ *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
+ if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
+ setBranchWeights(BI, MinItersBypassWeights);
+ ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
return TCCheckBlock;
}
@@ -8064,8 +7926,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
// Generate a resume induction for the vector epilogue and put it in the
// vector epilogue preheader
Type *IdxTy = Legal->getWidestInductionType();
- PHINode *EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val",
- LoopVectorPreHeader->getFirstNonPHI());
+ PHINode *EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val");
+ EPResumeVal->insertBefore(LoopVectorPreHeader->getFirstNonPHIIt());
EPResumeVal->addIncoming(EPI.VectorTripCount, VecEpilogueIterationCountCheck);
EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
EPI.MainLoopIterationCountCheck);
@@ -8110,9 +7972,22 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
EPI.EpilogueVF, EPI.EpilogueUF),
"min.epilog.iters.check");
- ReplaceInstWithInst(
- Insert->getTerminator(),
- BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters));
+ BranchInst &BI =
+ *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
+ if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
+ unsigned MainLoopStep = UF * VF.getKnownMinValue();
+ unsigned EpilogueLoopStep =
+ EPI.EpilogueUF * EPI.EpilogueVF.getKnownMinValue();
+ // We assume the remaining `Count` is equally distributed in
+ // [0, MainLoopStep)
+ // So the probability for `Count < EpilogueLoopStep` should be
+ // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
+ unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
+ const uint32_t Weights[] = {EstimatedSkipCount,
+ MainLoopStep - EstimatedSkipCount};
+ setBranchWeights(BI, Weights);
+ }
+ ReplaceInstWithInst(Insert->getTerminator(), &BI);
LoopBypassBlocks.push_back(Insert);
return Insert;
@@ -8206,6 +8081,33 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
return EdgeMaskCache[Edge] = EdgeMask;
}
+void VPRecipeBuilder::createHeaderMask(VPlan &Plan) {
+ BasicBlock *Header = OrigLoop->getHeader();
+
+ // When not folding the tail, use nullptr to model all-true mask.
+ if (!CM.foldTailByMasking()) {
+ BlockMaskCache[Header] = nullptr;
+ return;
+ }
+
+ // Introduce the early-exit compare IV <= BTC to form header block mask.
+ // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
+ // constructing the desired canonical IV in the header block as its first
+ // non-phi instructions.
+
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
+ auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
+ HeaderVPBB->insert(IV, NewInsertionPoint);
+
+ VPBuilder::InsertPointGuard Guard(Builder);
+ Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
+ VPValue *BlockMask = nullptr;
+ VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
+ BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
+ BlockMaskCache[Header] = BlockMask;
+}
+
VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
@@ -8214,45 +8116,12 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
if (BCEntryIt != BlockMaskCache.end())
return BCEntryIt->second;
+ assert(OrigLoop->getHeader() != BB &&
+ "Loop header must have cached block mask");
+
// All-one mask is modelled as no-mask following the convention for masked
// load/store/gather/scatter. Initialize BlockMask to no-mask.
VPValue *BlockMask = nullptr;
-
- if (OrigLoop->getHeader() == BB) {
- if (!CM.blockNeedsPredicationForAnyReason(BB))
- return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
-
- assert(CM.foldTailByMasking() && "must fold the tail");
-
- // If we're using the active lane mask for control flow, then we get the
- // mask from the active lane mask PHI that is cached in the VPlan.
- TailFoldingStyle TFStyle = CM.getTailFoldingStyle();
- if (useActiveLaneMaskForControlFlow(TFStyle))
- return BlockMaskCache[BB] = Plan.getActiveLaneMaskPhi();
-
- // Introduce the early-exit compare IV <= BTC to form header block mask.
- // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
- // constructing the desired canonical IV in the header block as its first
- // non-phi instructions.
-
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
- auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
- auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
- HeaderVPBB->insert(IV, HeaderVPBB->getFirstNonPhi());
-
- VPBuilder::InsertPointGuard Guard(Builder);
- Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
- if (useActiveLaneMask(TFStyle)) {
- VPValue *TC = Plan.getTripCount();
- BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC},
- nullptr, "active.lane.mask");
- } else {
- VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
- BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC});
- }
- return BlockMaskCache[BB] = BlockMask;
- }
-
// This is the block mask. We OR all incoming edges.
for (auto *Predecessor : predecessors(BB)) {
VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan);
@@ -8458,22 +8327,15 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
bool ShouldUseVectorIntrinsic =
ID && LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) -> bool {
- Function *Variant;
- // Is it beneficial to perform intrinsic call compared to lib
- // call?
- InstructionCost CallCost =
- CM.getVectorCallCost(CI, VF, &Variant);
- InstructionCost IntrinsicCost =
- CM.getVectorIntrinsicCost(CI, VF);
- return IntrinsicCost <= CallCost;
+ return CM.getCallWideningDecision(CI, VF).Kind ==
+ LoopVectorizationCostModel::CM_IntrinsicCall;
},
Range);
if (ShouldUseVectorIntrinsic)
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID);
Function *Variant = nullptr;
- ElementCount VariantVF;
- bool NeedsMask = false;
+ std::optional<unsigned> MaskPos;
// Is better to call a vectorized version of the function than to to scalarize
// the call?
auto ShouldUseVectorCall = LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -8492,16 +8354,19 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
// finds a valid variant.
if (Variant)
return false;
- CM.getVectorCallCost(CI, VF, &Variant, &NeedsMask);
- // If we found a valid vector variant at this VF, then store the VF
- // in case we need to generate a mask.
- if (Variant)
- VariantVF = VF;
- return Variant != nullptr;
+ LoopVectorizationCostModel::CallWideningDecision Decision =
+ CM.getCallWideningDecision(CI, VF);
+ if (Decision.Kind == LoopVectorizationCostModel::CM_VectorCall) {
+ Variant = Decision.Variant;
+ MaskPos = Decision.MaskPos;
+ return true;
+ }
+
+ return false;
},
Range);
if (ShouldUseVectorCall) {
- if (NeedsMask) {
+ if (MaskPos.has_value()) {
// We have 2 cases that would require a mask:
// 1) The block needs to be predicated, either due to a conditional
// in the scalar loop or use of an active lane mask with
@@ -8516,17 +8381,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
Mask = Plan->getVPValueOrAddLiveIn(ConstantInt::getTrue(
IntegerType::getInt1Ty(Variant->getFunctionType()->getContext())));
- VFShape Shape = VFShape::get(*CI, VariantVF, /*HasGlobalPred=*/true);
- unsigned MaskPos = 0;
-
- for (const VFInfo &Info : VFDatabase::getMappings(*CI))
- if (Info.Shape == Shape) {
- assert(Info.isMasked() && "Vector function info shape mismatch");
- MaskPos = Info.getParamIndexForOptionalMask().value();
- break;
- }
-
- Ops.insert(Ops.begin() + MaskPos, Mask);
+ Ops.insert(Ops.begin() + *MaskPos, Mask);
}
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()),
@@ -8747,8 +8602,8 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
}
if (auto *CI = dyn_cast<CastInst>(Instr)) {
- return toVPRecipeResult(
- new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), CI));
+ return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0],
+ CI->getType(), *CI));
}
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
@@ -8758,27 +8613,26 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
ElementCount MaxVF) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
- // Add assume instructions we need to drop to DeadInstructions, to prevent
- // them from being added to the VPlan.
- // TODO: We only need to drop assumes in blocks that get flattend. If the
- // control flow is preserved, we should keep them.
- SmallPtrSet<Instruction *, 4> DeadInstructions;
- auto &ConditionalAssumes = Legal->getConditionalAssumes();
- DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end());
-
auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
- if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, DeadInstructions))
- VPlans.push_back(std::move(*Plan));
+ if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange)) {
+ // Now optimize the initial VPlan.
+ if (!Plan->hasVF(ElementCount::getFixed(1)))
+ VPlanTransforms::truncateToMinimalBitwidths(
+ *Plan, CM.getMinimalBitwidths(), PSE.getSE()->getContext());
+ VPlanTransforms::optimize(*Plan, *PSE.getSE());
+ assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid");
+ VPlans.push_back(std::move(Plan));
+ }
VF = SubRange.End;
}
}
// Add the necessary canonical IV and branch recipes required to control the
// loop.
-static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
- TailFoldingStyle Style) {
+static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
+ DebugLoc DL) {
Value *StartIdx = ConstantInt::get(IdxTy, 0);
auto *StartV = Plan.getVPValueOrAddLiveIn(StartIdx);
@@ -8790,102 +8644,24 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
// Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar
// IV by VF * UF.
- bool HasNUW = Style == TailFoldingStyle::None;
auto *CanonicalIVIncrement =
- new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementNUW
- : VPInstruction::CanonicalIVIncrement,
- {CanonicalIVPHI}, DL, "index.next");
+ new VPInstruction(Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()},
+ {HasNUW, false}, DL, "index.next");
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
- if (useActiveLaneMaskForControlFlow(Style)) {
- // Create the active lane mask instruction in the vplan preheader.
- VPBasicBlock *VecPreheader =
- cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
-
- // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
- // we have to take unrolling into account. Each part needs to start at
- // Part * VF
- auto *CanonicalIVIncrementParts =
- new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW
- : VPInstruction::CanonicalIVIncrementForPart,
- {StartV}, DL, "index.part.next");
- VecPreheader->appendRecipe(CanonicalIVIncrementParts);
-
- // Create the ActiveLaneMask instruction using the correct start values.
- VPValue *TC = Plan.getTripCount();
-
- VPValue *TripCount, *IncrementValue;
- if (Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
- // When avoiding a runtime check, the active.lane.mask inside the loop
- // uses a modified trip count and the induction variable increment is
- // done after the active.lane.mask intrinsic is called.
- auto *TCMinusVF =
- new VPInstruction(VPInstruction::CalculateTripCountMinusVF, {TC}, DL);
- VecPreheader->appendRecipe(TCMinusVF);
- IncrementValue = CanonicalIVPHI;
- TripCount = TCMinusVF;
- } else {
- // When the loop is guarded by a runtime overflow check for the loop
- // induction variable increment by VF, we can increment the value before
- // the get.active.lane mask and use the unmodified tripcount.
- EB->appendRecipe(CanonicalIVIncrement);
- IncrementValue = CanonicalIVIncrement;
- TripCount = TC;
- }
-
- auto *EntryALM = new VPInstruction(VPInstruction::ActiveLaneMask,
- {CanonicalIVIncrementParts, TC}, DL,
- "active.lane.mask.entry");
- VecPreheader->appendRecipe(EntryALM);
-
- // Now create the ActiveLaneMaskPhi recipe in the main loop using the
- // preheader ActiveLaneMask instruction.
- auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());
- Header->insert(LaneMaskPhi, Header->getFirstNonPhi());
-
- // Create the active lane mask for the next iteration of the loop.
- CanonicalIVIncrementParts =
- new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW
- : VPInstruction::CanonicalIVIncrementForPart,
- {IncrementValue}, DL);
- EB->appendRecipe(CanonicalIVIncrementParts);
-
- auto *ALM = new VPInstruction(VPInstruction::ActiveLaneMask,
- {CanonicalIVIncrementParts, TripCount}, DL,
- "active.lane.mask.next");
- EB->appendRecipe(ALM);
- LaneMaskPhi->addOperand(ALM);
-
- if (Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
- // Do the increment of the canonical IV after the active.lane.mask, because
- // that value is still based off %CanonicalIVPHI
- EB->appendRecipe(CanonicalIVIncrement);
- }
-
- // We have to invert the mask here because a true condition means jumping
- // to the exit block.
- auto *NotMask = new VPInstruction(VPInstruction::Not, ALM, DL);
- EB->appendRecipe(NotMask);
-
- VPInstruction *BranchBack =
- new VPInstruction(VPInstruction::BranchOnCond, {NotMask}, DL);
- EB->appendRecipe(BranchBack);
- } else {
- EB->appendRecipe(CanonicalIVIncrement);
+ EB->appendRecipe(CanonicalIVIncrement);
- // Add the BranchOnCount VPInstruction to the latch.
- VPInstruction *BranchBack = new VPInstruction(
- VPInstruction::BranchOnCount,
- {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
- EB->appendRecipe(BranchBack);
- }
+ // Add the BranchOnCount VPInstruction to the latch.
+ VPInstruction *BranchBack =
+ new VPInstruction(VPInstruction::BranchOnCount,
+ {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
+ EB->appendRecipe(BranchBack);
}
// Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the
// original exit block.
-static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB,
- VPBasicBlock *MiddleVPBB, Loop *OrigLoop,
+static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop,
VPlan &Plan) {
BasicBlock *ExitBB = OrigLoop->getUniqueExitBlock();
BasicBlock *ExitingBB = OrigLoop->getExitingBlock();
@@ -8902,8 +8678,8 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB,
}
}
-std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
- VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions) {
+VPlanPtr
+LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -8914,24 +8690,6 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// process after constructing the initial VPlan.
// ---------------------------------------------------------------------------
- for (const auto &Reduction : CM.getInLoopReductionChains()) {
- PHINode *Phi = Reduction.first;
- RecurKind Kind =
- Legal->getReductionVars().find(Phi)->second.getRecurrenceKind();
- const SmallVector<Instruction *, 4> &ReductionOperations = Reduction.second;
-
- RecipeBuilder.recordRecipeOf(Phi);
- for (const auto &R : ReductionOperations) {
- RecipeBuilder.recordRecipeOf(R);
- // For min/max reductions, where we have a pair of icmp/select, we also
- // need to record the ICmp recipe, so it can be removed later.
- assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
- "Only min/max recurrences allowed for inloop reductions");
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
- RecipeBuilder.recordRecipeOf(cast<Instruction>(R->getOperand(0)));
- }
- }
-
// For each interleave group which is relevant for this (possibly trimmed)
// Range, add it to the set of groups to be later applied to the VPlan and add
// placeholders for its members' Recipes which we'll be replacing with a
@@ -8972,23 +8730,27 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
- auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
- VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry());
- VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
- VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
+ Plan->getVectorLoopRegion()->setEntry(HeaderVPBB);
+ Plan->getVectorLoopRegion()->setExiting(LatchVPBB);
// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
// it up into different VPlans.
+ // TODO: Consider using getDecisionAndClampRange here to split up VPlans.
bool IVUpdateMayOverflow = false;
for (ElementCount VF : Range)
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);
- Instruction *DLInst =
- getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
- addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(),
- DLInst ? DLInst->getDebugLoc() : DebugLoc(),
- CM.getTailFoldingStyle(IVUpdateMayOverflow));
+ DebugLoc DL = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
+ TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
+ // When not folding the tail, we know that the induction increment will not
+ // overflow.
+ bool HasNUW = Style == TailFoldingStyle::None;
+ addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
+
+ // Proactively create header mask. Masks for other blocks are created on
+ // demand.
+ RecipeBuilder.createHeaderMask(*Plan);
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
@@ -9005,14 +8767,8 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Introduce each ingredient into VPlan.
// TODO: Model and preserve debug intrinsics in VPlan.
- for (Instruction &I : BB->instructionsWithoutDebug(false)) {
+ for (Instruction &I : drop_end(BB->instructionsWithoutDebug(false))) {
Instruction *Instr = &I;
-
- // First filter out irrelevant instructions, to ensure no recipes are
- // built for them.
- if (isa<BranchInst>(Instr) || DeadInstructions.count(Instr))
- continue;
-
SmallVector<VPValue *, 4> Operands;
auto *Phi = dyn_cast<PHINode>(Instr);
if (Phi && Phi->getParent() == OrigLoop->getHeader()) {
@@ -9052,11 +8808,18 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
}
RecipeBuilder.setRecipe(Instr, Recipe);
- if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) &&
- HeaderVPBB->getFirstNonPhi() != VPBB->end()) {
- // Move VPWidenIntOrFpInductionRecipes for optimized truncates to the
- // phi section of HeaderVPBB.
- assert(isa<TruncInst>(Instr));
+ if (isa<VPHeaderPHIRecipe>(Recipe)) {
+ // VPHeaderPHIRecipes must be kept in the phi section of HeaderVPBB. In
+ // the following cases, VPHeaderPHIRecipes may be created after non-phi
+ // recipes and need to be moved to the phi section of HeaderVPBB:
+ // * tail-folding (non-phi recipes computing the header mask are
+ // introduced earlier than regular header phi recipes, and should appear
+ // after them)
+ // * Optimizing truncates to VPWidenIntOrFpInductionRecipe.
+
+ assert((HeaderVPBB->getFirstNonPhi() == VPBB->end() ||
+ CM.foldTailByMasking() || isa<TruncInst>(Instr)) &&
+ "unexpected recipe needs moving");
Recipe->insertBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi());
} else
VPBB->appendRecipe(Recipe);
@@ -9074,7 +8837,7 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// and there is nothing to fix from vector loop; phis should have incoming
// from scalar loop only.
} else
- addUsersInExitBlock(HeaderVPBB, MiddleVPBB, OrigLoop, *Plan);
+ addUsersInExitBlock(HeaderVPBB, OrigLoop, *Plan);
assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
!Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
@@ -9088,8 +8851,7 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// ---------------------------------------------------------------------------
// Adjust the recipes for any inloop reductions.
- adjustRecipesForReductions(cast<VPBasicBlock>(TopRegion->getExiting()), Plan,
- RecipeBuilder, Range.Start);
+ adjustRecipesForReductions(LatchVPBB, Plan, RecipeBuilder, Range.Start);
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
@@ -9150,21 +8912,18 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Sink users of fixed-order recurrence past the recipe defining the previous
// value and introduce FirstOrderRecurrenceSplice VPInstructions.
if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder))
- return std::nullopt;
-
- VPlanTransforms::removeRedundantCanonicalIVs(*Plan);
- VPlanTransforms::removeRedundantInductionCasts(*Plan);
-
- VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE());
- VPlanTransforms::removeDeadRecipes(*Plan);
-
- VPlanTransforms::createAndOptimizeReplicateRegions(*Plan);
-
- VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan);
- VPlanTransforms::mergeBlocksIntoPredecessors(*Plan);
+ return nullptr;
- assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid");
- return std::make_optional(std::move(Plan));
+ if (useActiveLaneMask(Style)) {
+ // TODO: Move checks to VPlanTransforms::addActiveLaneMask once
+ // TailFoldingStyle is visible there.
+ bool ForControlFlow = useActiveLaneMaskForControlFlow(Style);
+ bool WithoutRuntimeCheck =
+ Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
+ VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
+ WithoutRuntimeCheck);
+ }
+ return Plan;
}
VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
@@ -9198,8 +8957,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
Plan->getVectorLoopRegion()->getExitingBasicBlock()->getTerminator();
Term->eraseFromParent();
- addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(),
- CM.getTailFoldingStyle());
+ // Tail folding is not supported for outer loops, so the induction increment
+ // is guaranteed to not wrap.
+ bool HasNUW = true;
+ addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
+ DebugLoc());
return Plan;
}
@@ -9211,105 +8973,211 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
void LoopVectorizationPlanner::adjustRecipesForReductions(
VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF) {
- for (const auto &Reduction : CM.getInLoopReductionChains()) {
- PHINode *Phi = Reduction.first;
- const RecurrenceDescriptor &RdxDesc =
- Legal->getReductionVars().find(Phi)->second;
- const SmallVector<Instruction *, 4> &ReductionOperations = Reduction.second;
-
- if (MinVF.isScalar() && !CM.useOrderedReductions(RdxDesc))
+ VPBasicBlock *Header = Plan->getVectorLoopRegion()->getEntryBasicBlock();
+ // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
+ // sank outside of the loop would keep the same order as they had in the
+ // original loop.
+ SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
+ for (VPRecipeBase &R : Header->phis()) {
+ if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
+ ReductionPHIList.emplace_back(ReductionPhi);
+ }
+ bool HasIntermediateStore = false;
+ stable_sort(ReductionPHIList,
+ [this, &HasIntermediateStore](const VPReductionPHIRecipe *R1,
+ const VPReductionPHIRecipe *R2) {
+ auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore;
+ auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore;
+ HasIntermediateStore |= IS1 || IS2;
+
+ // If neither of the recipes has an intermediate store, keep the
+ // order the same.
+ if (!IS1 && !IS2)
+ return false;
+
+ // If only one of the recipes has an intermediate store, then
+ // move it towards the beginning of the list.
+ if (IS1 && !IS2)
+ return true;
+
+ if (!IS1 && IS2)
+ return false;
+
+ // If both recipes have an intermediate store, then the recipe
+ // with the later store should be processed earlier. So it
+ // should go to the beginning of the list.
+ return DT->dominates(IS2, IS1);
+ });
+
+ if (HasIntermediateStore && ReductionPHIList.size() > 1)
+ for (VPRecipeBase *R : ReductionPHIList)
+ R->moveBefore(*Header, Header->getFirstNonPhi());
+
+ SmallVector<VPReductionPHIRecipe *> InLoopReductionPhis;
+ for (VPRecipeBase &R : Header->phis()) {
+ auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
+ if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
continue;
+ InLoopReductionPhis.push_back(PhiR);
+ }
+
+ for (VPReductionPHIRecipe *PhiR : InLoopReductionPhis) {
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ RecurKind Kind = RdxDesc.getRecurrenceKind();
+ assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
+ "AnyOf reductions are not allowed for in-loop reductions");
+
+ // Collect the chain of "link" recipes for the reduction starting at PhiR.
+ SetVector<VPRecipeBase *> Worklist;
+ Worklist.insert(PhiR);
+ for (unsigned I = 0; I != Worklist.size(); ++I) {
+ VPRecipeBase *Cur = Worklist[I];
+ for (VPUser *U : Cur->getVPSingleValue()->users()) {
+ auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
+ if (!UserRecipe)
+ continue;
+ assert(UserRecipe->getNumDefinedValues() == 1 &&
+ "recipes must define exactly one result value");
+ Worklist.insert(UserRecipe);
+ }
+ }
+
+ // Visit operation "Links" along the reduction chain top-down starting from
+ // the phi until LoopExitValue. We keep track of the previous item
+ // (PreviousLink) to tell which of the two operands of a Link will remain
+ // scalar and which will be reduced. For minmax by select(cmp), Link will be
+ // the select instructions.
+ VPRecipeBase *PreviousLink = PhiR; // Aka Worklist[0].
+ for (VPRecipeBase *CurrentLink : Worklist.getArrayRef().drop_front()) {
+ VPValue *PreviousLinkV = PreviousLink->getVPSingleValue();
+
+ Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
- // ReductionOperations are orders top-down from the phi's use to the
- // LoopExitValue. We keep a track of the previous item (the Chain) to tell
- // which of the two operands will remain scalar and which will be reduced.
- // For minmax the chain will be the select instructions.
- Instruction *Chain = Phi;
- for (Instruction *R : ReductionOperations) {
- VPRecipeBase *WidenRecipe = RecipeBuilder.getRecipe(R);
- RecurKind Kind = RdxDesc.getRecurrenceKind();
-
- VPValue *ChainOp = Plan->getVPValue(Chain);
- unsigned FirstOpId;
- assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
- "Only min/max recurrences allowed for inloop reductions");
+ // Index of the first operand which holds a non-mask vector operand.
+ unsigned IndexOfFirstOperand;
// Recognize a call to the llvm.fmuladd intrinsic.
bool IsFMulAdd = (Kind == RecurKind::FMulAdd);
- assert((!IsFMulAdd || RecurrenceDescriptor::isFMulAddIntrinsic(R)) &&
- "Expected instruction to be a call to the llvm.fmuladd intrinsic");
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
- assert(isa<VPWidenSelectRecipe>(WidenRecipe) &&
- "Expected to replace a VPWidenSelectSC");
- FirstOpId = 1;
+ VPValue *VecOp;
+ VPBasicBlock *LinkVPBB = CurrentLink->getParent();
+ if (IsFMulAdd) {
+ assert(
+ RecurrenceDescriptor::isFMulAddIntrinsic(CurrentLinkI) &&
+ "Expected instruction to be a call to the llvm.fmuladd intrinsic");
+ assert(((MinVF.isScalar() && isa<VPReplicateRecipe>(CurrentLink)) ||
+ isa<VPWidenCallRecipe>(CurrentLink)) &&
+ CurrentLink->getOperand(2) == PreviousLinkV &&
+ "expected a call where the previous link is the added operand");
+
+ // If the instruction is a call to the llvm.fmuladd intrinsic then we
+ // need to create an fmul recipe (multiplying the first two operands of
+ // the fmuladd together) to use as the vector operand for the fadd
+ // reduction.
+ VPInstruction *FMulRecipe = new VPInstruction(
+ Instruction::FMul,
+ {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
+ CurrentLinkI->getFastMathFlags());
+ LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
+ VecOp = FMulRecipe;
} else {
- assert((MinVF.isScalar() || isa<VPWidenRecipe>(WidenRecipe) ||
- (IsFMulAdd && isa<VPWidenCallRecipe>(WidenRecipe))) &&
- "Expected to replace a VPWidenSC");
- FirstOpId = 0;
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
+ if (isa<VPWidenRecipe>(CurrentLink)) {
+ assert(isa<CmpInst>(CurrentLinkI) &&
+ "need to have the compare of the select");
+ continue;
+ }
+ assert(isa<VPWidenSelectRecipe>(CurrentLink) &&
+ "must be a select recipe");
+ IndexOfFirstOperand = 1;
+ } else {
+ assert((MinVF.isScalar() || isa<VPWidenRecipe>(CurrentLink)) &&
+ "Expected to replace a VPWidenSC");
+ IndexOfFirstOperand = 0;
+ }
+ // Note that for non-commutable operands (cmp-selects), the semantics of
+ // the cmp-select are captured in the recurrence kind.
+ unsigned VecOpId =
+ CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLinkV
+ ? IndexOfFirstOperand + 1
+ : IndexOfFirstOperand;
+ VecOp = CurrentLink->getOperand(VecOpId);
+ assert(VecOp != PreviousLinkV &&
+ CurrentLink->getOperand(CurrentLink->getNumOperands() - 1 -
+ (VecOpId - IndexOfFirstOperand)) ==
+ PreviousLinkV &&
+ "PreviousLinkV must be the operand other than VecOp");
}
- unsigned VecOpId =
- R->getOperand(FirstOpId) == Chain ? FirstOpId + 1 : FirstOpId;
- VPValue *VecOp = Plan->getVPValue(R->getOperand(VecOpId));
+ BasicBlock *BB = CurrentLinkI->getParent();
VPValue *CondOp = nullptr;
- if (CM.blockNeedsPredicationForAnyReason(R->getParent())) {
+ if (CM.blockNeedsPredicationForAnyReason(BB)) {
VPBuilder::InsertPointGuard Guard(Builder);
- Builder.setInsertPoint(WidenRecipe->getParent(),
- WidenRecipe->getIterator());
- CondOp = RecipeBuilder.createBlockInMask(R->getParent(), *Plan);
+ Builder.setInsertPoint(CurrentLink);
+ CondOp = RecipeBuilder.createBlockInMask(BB, *Plan);
}
- if (IsFMulAdd) {
- // If the instruction is a call to the llvm.fmuladd intrinsic then we
- // need to create an fmul recipe to use as the vector operand for the
- // fadd reduction.
- VPInstruction *FMulRecipe = new VPInstruction(
- Instruction::FMul, {VecOp, Plan->getVPValue(R->getOperand(1))});
- FMulRecipe->setFastMathFlags(R->getFastMathFlags());
- WidenRecipe->getParent()->insert(FMulRecipe,
- WidenRecipe->getIterator());
- VecOp = FMulRecipe;
- }
- VPReductionRecipe *RedRecipe =
- new VPReductionRecipe(&RdxDesc, R, ChainOp, VecOp, CondOp, &TTI);
- WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
- Plan->removeVPValueFor(R);
- Plan->addVPValue(R, RedRecipe);
+ VPReductionRecipe *RedRecipe = new VPReductionRecipe(
+ RdxDesc, CurrentLinkI, PreviousLinkV, VecOp, CondOp);
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
- WidenRecipe->getParent()->appendRecipe(RedRecipe);
- WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
- WidenRecipe->eraseFromParent();
-
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
- VPRecipeBase *CompareRecipe =
- RecipeBuilder.getRecipe(cast<Instruction>(R->getOperand(0)));
- assert(isa<VPWidenRecipe>(CompareRecipe) &&
- "Expected to replace a VPWidenSC");
- assert(cast<VPWidenRecipe>(CompareRecipe)->getNumUsers() == 0 &&
- "Expected no remaining users");
- CompareRecipe->eraseFromParent();
- }
- Chain = R;
+ // Note that this transformation may leave over dead recipes (including
+ // CurrentLink), which will be cleaned by a later VPlan transform.
+ LinkVPBB->appendRecipe(RedRecipe);
+ CurrentLink->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
+ PreviousLink = RedRecipe;
}
}
-
- // If tail is folded by masking, introduce selects between the phi
- // and the live-out instruction of each reduction, at the beginning of the
- // dedicated latch block.
- if (CM.foldTailByMasking()) {
- Builder.setInsertPoint(LatchVPBB, LatchVPBB->begin());
+ Builder.setInsertPoint(&*LatchVPBB->begin());
for (VPRecipeBase &R :
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
- if (!PhiR || PhiR->isInLoop())
- continue;
+ VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
+ if (!PhiR || PhiR->isInLoop())
+ continue;
+
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe();
+ // If tail is folded by masking, introduce selects between the phi
+ // and the live-out instruction of each reduction, at the beginning of the
+ // dedicated latch block.
+ if (CM.foldTailByMasking()) {
VPValue *Cond =
RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan);
VPValue *Red = PhiR->getBackedgeValue();
assert(Red->getDefiningRecipe()->getParent() != LatchVPBB &&
"reduction recipe must be defined before latch");
- Builder.createNaryOp(Instruction::Select, {Cond, Red, PhiR});
+ FastMathFlags FMFs = RdxDesc.getFastMathFlags();
+ Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
+ Result =
+ PhiTy->isFloatingPointTy()
+ ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs)
+ : new VPInstruction(Instruction::Select, {Cond, Red, PhiR});
+ Result->insertBefore(&*Builder.getInsertPoint());
+ Red->replaceUsesWithIf(
+ Result->getVPSingleValue(),
+ [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
+ if (PreferPredicatedReductionSelect ||
+ TTI.preferPredicatedReductionSelect(
+ PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy,
+ TargetTransformInfo::ReductionFlags()))
+ PhiR->setOperand(1, Result->getVPSingleValue());
+ }
+ // If the vector reduction can be performed in a smaller type, we truncate
+ // then extend the loop exit value to enable InstCombine to evaluate the
+ // entire expression in the smaller type.
+ Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
+ if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
+ assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
+ Type *RdxTy = RdxDesc.getRecurrenceType();
+ auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc,
+ Result->getVPSingleValue(), RdxTy);
+ auto *Extnd =
+ RdxDesc.isSigned()
+ ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
+ : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
+
+ Trunc->insertAfter(Result);
+ Extnd->insertAfter(Trunc);
+ Result->getVPSingleValue()->replaceAllUsesWith(Extnd);
+ Trunc->setOperand(0, Result->getVPSingleValue());
}
}
@@ -9347,107 +9215,6 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
- assert(!State.Instance && "Int or FP induction being replicated.");
-
- Value *Start = getStartValue()->getLiveInIRValue();
- const InductionDescriptor &ID = getInductionDescriptor();
- TruncInst *Trunc = getTruncInst();
- IRBuilderBase &Builder = State.Builder;
- assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
- assert(State.VF.isVector() && "must have vector VF");
-
- // The value from the original loop to which we are mapping the new induction
- // variable.
- Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
-
- // Fast-math-flags propagate from the original induction instruction.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
- Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
-
- // Now do the actual transformations, and start with fetching the step value.
- Value *Step = State.get(getStepValue(), VPIteration(0, 0));
-
- assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
- "Expected either an induction phi-node or a truncate of it!");
-
- // Construct the initial value of the vector IV in the vector loop preheader
- auto CurrIP = Builder.saveIP();
- BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
- Builder.SetInsertPoint(VectorPH->getTerminator());
- if (isa<TruncInst>(EntryVal)) {
- assert(Start->getType()->isIntegerTy() &&
- "Truncation requires an integer type");
- auto *TruncType = cast<IntegerType>(EntryVal->getType());
- Step = Builder.CreateTrunc(Step, TruncType);
- Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
- }
-
- Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
- Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
- Value *SteppedStart = getStepVector(
- SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
-
- // We create vector phi nodes for both integer and floating-point induction
- // variables. Here, we determine the kind of arithmetic we will perform.
- Instruction::BinaryOps AddOp;
- Instruction::BinaryOps MulOp;
- if (Step->getType()->isIntegerTy()) {
- AddOp = Instruction::Add;
- MulOp = Instruction::Mul;
- } else {
- AddOp = ID.getInductionOpcode();
- MulOp = Instruction::FMul;
- }
-
- // Multiply the vectorization factor by the step using integer or
- // floating-point arithmetic as appropriate.
- Type *StepType = Step->getType();
- Value *RuntimeVF;
- if (Step->getType()->isFloatingPointTy())
- RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
- else
- RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
- Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
-
- // Create a vector splat to use in the induction update.
- //
- // FIXME: If the step is non-constant, we create the vector splat with
- // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
- // handle a constant vector splat.
- Value *SplatVF = isa<Constant>(Mul)
- ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
- : Builder.CreateVectorSplat(State.VF, Mul);
- Builder.restoreIP(CurrIP);
-
- // We may need to add the step a number of times, depending on the unroll
- // factor. The last of those goes into the PHI.
- PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
- &*State.CFG.PrevBB->getFirstInsertionPt());
- VecInd->setDebugLoc(EntryVal->getDebugLoc());
- Instruction *LastInduction = VecInd;
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- State.set(this, LastInduction, Part);
-
- if (isa<TruncInst>(EntryVal))
- State.addMetadata(LastInduction, EntryVal);
-
- LastInduction = cast<Instruction>(
- Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
- LastInduction->setDebugLoc(EntryVal->getDebugLoc());
- }
-
- LastInduction->setName("vec.ind.next");
- VecInd->addIncoming(SteppedStart, VectorPH);
- // Add induction update using an incorrect block temporarily. The phi node
- // will be fixed after VPlan execution. Note that at this point the latch
- // block cannot be used, as it does not exist yet.
- // TODO: Model increment value in VPlan, by turning the recipe into a
- // multi-def and a subclass of VPHeaderPHIRecipe.
- VecInd->addIncoming(LastInduction, VectorPH);
-}
-
void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction &&
"Not a pointer induction according to InductionDescriptor!");
@@ -9480,7 +9247,8 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
Value *Step = State.get(getOperand(1), VPIteration(Part, Lane));
Value *SclrGep = emitTransformedIndex(
- State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, IndDesc);
+ State.Builder, GlobalIdx, IndDesc.getStartValue(), Step,
+ IndDesc.getKind(), IndDesc.getInductionBinOp());
SclrGep->setName("next.gep");
State.set(this, SclrGep, VPIteration(Part, Lane));
}
@@ -9547,41 +9315,26 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
- if (IndDesc.getInductionBinOp() &&
- isa<FPMathOperator>(IndDesc.getInductionBinOp()))
- State.Builder.setFastMathFlags(
- IndDesc.getInductionBinOp()->getFastMathFlags());
+ if (FPBinOp)
+ State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags());
Value *Step = State.get(getStepValue(), VPIteration(0, 0));
Value *CanonicalIV = State.get(getCanonicalIV(), VPIteration(0, 0));
- Value *DerivedIV =
- emitTransformedIndex(State.Builder, CanonicalIV,
- getStartValue()->getLiveInIRValue(), Step, IndDesc);
+ Value *DerivedIV = emitTransformedIndex(
+ State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
+ Kind, cast_if_present<BinaryOperator>(FPBinOp));
DerivedIV->setName("offset.idx");
- if (ResultTy != DerivedIV->getType()) {
- assert(Step->getType()->isIntegerTy() &&
+ if (TruncResultTy) {
+ assert(TruncResultTy != DerivedIV->getType() &&
+ Step->getType()->isIntegerTy() &&
"Truncation requires an integer step");
- DerivedIV = State.Builder.CreateTrunc(DerivedIV, ResultTy);
+ DerivedIV = State.Builder.CreateTrunc(DerivedIV, TruncResultTy);
}
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
State.set(this, DerivedIV, VPIteration(0, 0));
}
-void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
- // Fast-math-flags propagate from the original induction instruction.
- IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
- if (IndDesc.getInductionBinOp() &&
- isa<FPMathOperator>(IndDesc.getInductionBinOp()))
- State.Builder.setFastMathFlags(
- IndDesc.getInductionBinOp()->getFastMathFlags());
-
- Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
- Value *Step = State.get(getStepValue(), VPIteration(0, 0));
-
- buildScalarSteps(BaseIV, Step, IndDesc, this, State);
-}
-
void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Interleave group being replicated.");
State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(),
@@ -9592,48 +9345,51 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), 0);
- RecurKind Kind = RdxDesc->getRecurrenceKind();
- bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc);
+ RecurKind Kind = RdxDesc.getRecurrenceKind();
+ bool IsOrdered = State.ILV->useOrderedReductions(RdxDesc);
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
- State.Builder.setFastMathFlags(RdxDesc->getFastMathFlags());
+ State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewVecOp = State.get(getVecOp(), Part);
if (VPValue *Cond = getCondOp()) {
- Value *NewCond = State.get(Cond, Part);
- VectorType *VecTy = cast<VectorType>(NewVecOp->getType());
- Value *Iden = RdxDesc->getRecurrenceIdentity(
- Kind, VecTy->getElementType(), RdxDesc->getFastMathFlags());
- Value *IdenVec =
- State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
- Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
+ Value *NewCond = State.VF.isVector() ? State.get(Cond, Part)
+ : State.get(Cond, {Part, 0});
+ VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
+ Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
+ Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
+ RdxDesc.getFastMathFlags());
+ if (State.VF.isVector()) {
+ Iden =
+ State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
+ }
+
+ Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
NewVecOp = Select;
}
Value *NewRed;
Value *NextInChain;
if (IsOrdered) {
if (State.VF.isVector())
- NewRed = createOrderedReduction(State.Builder, *RdxDesc, NewVecOp,
+ NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
PrevInChain);
else
NewRed = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)RdxDesc->getOpcode(Kind), PrevInChain,
+ (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
NewVecOp);
PrevInChain = NewRed;
} else {
PrevInChain = State.get(getChainOp(), Part);
- NewRed = createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp);
+ NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
}
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
- NextInChain =
- createMinMaxOp(State.Builder, RdxDesc->getRecurrenceKind(),
- NewRed, PrevInChain);
+ NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
+ NewRed, PrevInChain);
} else if (IsOrdered)
NextInChain = NewRed;
else
NextInChain = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)RdxDesc->getOpcode(Kind), NewRed,
- PrevInChain);
+ (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
State.set(this, NextInChain, Part);
}
}
@@ -9652,7 +9408,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
VectorType::get(UI->getType(), State.VF));
State.set(this, Poison, State.Instance->Part);
}
- State.ILV->packScalarIntoVectorValue(this, *State.Instance, State);
+ State.packScalarIntoVectorValue(this, *State.Instance);
}
return;
}
@@ -9718,9 +9474,16 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
bool isMaskRequired = getMask();
- if (isMaskRequired)
- for (unsigned Part = 0; Part < State.UF; ++Part)
- BlockInMaskParts[Part] = State.get(getMask(), Part);
+ if (isMaskRequired) {
+ // Mask reversal is only neede for non-all-one (null) masks, as reverse of a
+ // null all-one mask is a null mask.
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Mask = State.get(getMask(), Part);
+ if (isReverse())
+ Mask = Builder.CreateVectorReverse(Mask, "reverse");
+ BlockInMaskParts[Part] = Mask;
+ }
+ }
const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
// Calculate the pointer for the specific unroll-part.
@@ -9731,7 +9494,8 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
const DataLayout &DL =
Builder.GetInsertBlock()->getModule()->getDataLayout();
Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0)
- ? DL.getIndexType(ScalarDataTy->getPointerTo())
+ ? DL.getIndexType(PointerType::getUnqual(
+ ScalarDataTy->getContext()))
: Builder.getInt32Ty();
bool InBounds = false;
if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
@@ -9751,21 +9515,17 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds);
PartPtr =
Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds);
- if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
- BlockInMaskParts[Part] =
- Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse");
} else {
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds);
}
- unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
- return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
+ return PartPtr;
};
// Handle Stores:
if (SI) {
- State.setDebugLocFromInst(SI);
+ State.setDebugLocFrom(SI->getDebugLoc());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Instruction *NewSI = nullptr;
@@ -9798,7 +9558,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// Handle loads.
assert(LI && "Must have a load instruction");
- State.setDebugLocFromInst(LI);
+ State.setDebugLocFrom(LI->getDebugLoc());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewLI;
if (CreateGatherScatter) {
@@ -9877,95 +9637,6 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
return CM_ScalarEpilogueAllowed;
}
-Value *VPTransformState::get(VPValue *Def, unsigned Part) {
- // If Values have been set for this Def return the one relevant for \p Part.
- if (hasVectorValue(Def, Part))
- return Data.PerPartOutput[Def][Part];
-
- auto GetBroadcastInstrs = [this, Def](Value *V) {
- bool SafeToHoist = Def->isDefinedOutsideVectorRegions();
- if (VF.isScalar())
- return V;
- // Place the code for broadcasting invariant variables in the new preheader.
- IRBuilder<>::InsertPointGuard Guard(Builder);
- if (SafeToHoist) {
- BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB[cast<VPBasicBlock>(
- Plan->getVectorLoopRegion()->getSinglePredecessor())];
- if (LoopVectorPreHeader)
- Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
- }
-
- // Place the code for broadcasting invariant variables in the new preheader.
- // Broadcast the scalar into all locations in the vector.
- Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
-
- return Shuf;
- };
-
- if (!hasScalarValue(Def, {Part, 0})) {
- Value *IRV = Def->getLiveInIRValue();
- Value *B = GetBroadcastInstrs(IRV);
- set(Def, B, Part);
- return B;
- }
-
- Value *ScalarValue = get(Def, {Part, 0});
- // If we aren't vectorizing, we can just copy the scalar map values over
- // to the vector map.
- if (VF.isScalar()) {
- set(Def, ScalarValue, Part);
- return ScalarValue;
- }
-
- bool IsUniform = vputils::isUniformAfterVectorization(Def);
-
- unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1;
- // Check if there is a scalar value for the selected lane.
- if (!hasScalarValue(Def, {Part, LastLane})) {
- // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
- // VPExpandSCEVRecipes can also be uniform.
- assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
- isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe()) ||
- isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
- "unexpected recipe found to be invariant");
- IsUniform = true;
- LastLane = 0;
- }
-
- auto *LastInst = cast<Instruction>(get(Def, {Part, LastLane}));
- // Set the insert point after the last scalarized instruction or after the
- // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
- // will directly follow the scalar definitions.
- auto OldIP = Builder.saveIP();
- auto NewIP =
- isa<PHINode>(LastInst)
- ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
- : std::next(BasicBlock::iterator(LastInst));
- Builder.SetInsertPoint(&*NewIP);
-
- // However, if we are vectorizing, we need to construct the vector values.
- // If the value is known to be uniform after vectorization, we can just
- // broadcast the scalar value corresponding to lane zero for each unroll
- // iteration. Otherwise, we construct the vector values using
- // insertelement instructions. Since the resulting vectors are stored in
- // State, we will only generate the insertelements once.
- Value *VectorValue = nullptr;
- if (IsUniform) {
- VectorValue = GetBroadcastInstrs(ScalarValue);
- set(Def, VectorValue, Part);
- } else {
- // Initialize packing with insertelements to start from undef.
- assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
- set(Def, Undef, Part);
- for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
- ILV->packScalarIntoVectorValue(Def, {Part, Lane}, *this);
- VectorValue = get(Def, Part);
- }
- Builder.restoreIP(OldIP);
- return VectorValue;
-}
-
// Process the loop in the VPlan-native vectorization path. This path builds
// VPlan upfront in the vectorization pipeline, which allows to apply
// VPlan-to-VPlan transformations from the very beginning without modifying the
@@ -9994,7 +9665,8 @@ static bool processLoopInVPlanNativePath(
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
- LoopVectorizationPlanner LVP(L, LI, TLI, *TTI, LVL, CM, IAI, PSE, Hints, ORE);
+ LoopVectorizationPlanner LVP(L, LI, DT, TLI, *TTI, LVL, CM, IAI, PSE, Hints,
+ ORE);
// Get user vectorization factor.
ElementCount UserVF = Hints.getWidth();
@@ -10013,8 +9685,10 @@ static bool processLoopInVPlanNativePath(
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
{
+ bool AddBranchWeights =
+ hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
- F->getParent()->getDataLayout());
+ F->getParent()->getDataLayout(), AddBranchWeights);
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.Width, 1, LVL, &CM, BFI, PSI, Checks);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
@@ -10022,6 +9696,8 @@ static bool processLoopInVPlanNativePath(
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
}
+ reportVectorization(ORE, L, VF, 1);
+
// Mark the loop as already vectorized to avoid vectorizing again.
Hints.setAlreadyVectorized();
assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()));
@@ -10076,7 +9752,8 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
VectorizationFactor &VF,
std::optional<unsigned> VScale, Loop *L,
- ScalarEvolution &SE) {
+ ScalarEvolution &SE,
+ ScalarEpilogueLowering SEL) {
InstructionCost CheckCost = Checks.getCost();
if (!CheckCost.isValid())
return false;
@@ -10146,11 +9823,13 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
// RtC < ScalarC * TC * (1 / X) ==> RtC * X / ScalarC < TC
double MinTC2 = RtC * 10 / ScalarC;
- // Now pick the larger minimum. If it is not a multiple of VF, choose the
- // next closest multiple of VF. This should partly compensate for ignoring
- // the epilogue cost.
+ // Now pick the larger minimum. If it is not a multiple of VF and a scalar
+ // epilogue is allowed, choose the next closest multiple of VF. This should
+ // partly compensate for ignoring the epilogue cost.
uint64_t MinTC = std::ceil(std::max(MinTC1, MinTC2));
- VF.MinProfitableTripCount = ElementCount::getFixed(alignTo(MinTC, IntVF));
+ if (SEL == CM_ScalarEpilogueAllowed)
+ MinTC = alignTo(MinTC, IntVF);
+ VF.MinProfitableTripCount = ElementCount::getFixed(MinTC);
LLVM_DEBUG(
dbgs() << "LV: Minimum required TC for runtime checks to be profitable:"
@@ -10270,7 +9949,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
else {
if (*ExpectedTC > TTI->getMinTripCountTailFoldingThreshold()) {
LLVM_DEBUG(dbgs() << "\n");
- SEL = CM_ScalarEpilogueNotAllowedLowTripLoop;
+ // Predicate tail-folded loops are efficient even when the loop
+ // iteration count is low. However, setting the epilogue policy to
+ // `CM_ScalarEpilogueNotAllowedLowTripLoop` prevents vectorizing loops
+ // with runtime checks. It's more effective to let
+ // `areRuntimeChecksProfitable` determine if vectorization is beneficial
+ // for the loop.
+ if (SEL != CM_ScalarEpilogueNotNeededUsePredicate)
+ SEL = CM_ScalarEpilogueNotAllowedLowTripLoop;
} else {
LLVM_DEBUG(dbgs() << " But the target considers the trip count too "
"small to consider vectorizing.\n");
@@ -10334,7 +10020,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
F, &Hints, IAI);
// Use the planner for vectorization.
- LoopVectorizationPlanner LVP(L, LI, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
+ LoopVectorizationPlanner LVP(L, LI, DT, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
ORE);
// Get user vectorization factor and interleave count.
@@ -10347,8 +10033,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizationFactor VF = VectorizationFactor::Disabled();
unsigned IC = 1;
+ bool AddBranchWeights =
+ hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
- F->getParent()->getDataLayout());
+ F->getParent()->getDataLayout(), AddBranchWeights);
if (MaybeVF) {
VF = *MaybeVF;
// Select the interleave count.
@@ -10365,7 +10053,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
if (!ForceVectorization &&
!areRuntimeChecksProfitable(Checks, VF, getVScaleForTuning(L, *TTI), L,
- *PSE.getSE())) {
+ *PSE.getSE(), SEL)) {
ORE->emit([&]() {
return OptimizationRemarkAnalysisAliasing(
DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(),
@@ -10587,13 +10275,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
DisableRuntimeUnroll = true;
}
// Report the vectorization decision.
- ORE->emit([&]() {
- return OptimizationRemark(LV_NAME, "Vectorized", L->getStartLoc(),
- L->getHeader())
- << "vectorized loop (vectorization width: "
- << NV("VectorizationFactor", VF.Width)
- << ", interleaved count: " << NV("InterleaveCount", IC) << ")";
- });
+ reportVectorization(ORE, L, VF, IC);
}
if (ORE->allowExtraAnalysis(LV_NAME))
@@ -10676,8 +10358,14 @@ LoopVectorizeResult LoopVectorizePass::runImpl(
Changed |= CFGChanged |= processLoop(L);
- if (Changed)
+ if (Changed) {
LAIs->clear();
+
+#ifndef NDEBUG
+ if (VerifySCEV)
+ SE->verify();
+#endif
+ }
}
// Process each loop nest in the function.
@@ -10725,10 +10413,6 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
PA.preserve<LoopAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<ScalarEvolutionAnalysis>();
-
-#ifdef EXPENSIVE_CHECKS
- SE.verify();
-#endif
}
if (Result.MadeCFGChange) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9870ffbb586c..9d799124074c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -19,7 +19,6 @@
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
@@ -34,6 +33,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVDescriptors.h"
@@ -97,7 +97,6 @@
#include <string>
#include <tuple>
#include <utility>
-#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -108,8 +107,9 @@ using namespace slpvectorizer;
STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
-cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
- cl::desc("Run the SLP vectorization passes"));
+static cl::opt<bool>
+ RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
+ cl::desc("Run the SLP vectorization passes"));
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
@@ -140,10 +140,6 @@ static cl::opt<unsigned>
MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
-static cl::opt<int>
-MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
- cl::desc("Maximum depth of the lookup for consecutive stores."));
-
/// Limits the size of scheduling regions in a block.
/// It avoid long compile times for _very_ large blocks where vector
/// instructions are spread over a wide range.
@@ -232,6 +228,17 @@ static bool isVectorLikeInstWithConstOps(Value *V) {
return isConstant(I->getOperand(2));
}
+#if !defined(NDEBUG)
+/// Print a short descriptor of the instruction bundle suitable for debug output.
+static std::string shortBundleName(ArrayRef<Value *> VL) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "n=" << VL.size() << " [" << *VL.front() << ", ..]";
+ OS.flush();
+ return Result;
+}
+#endif
+
/// \returns true if all of the instructions in \p VL are in the same block or
/// false otherwise.
static bool allSameBlock(ArrayRef<Value *> VL) {
@@ -384,8 +391,10 @@ static SmallBitVector isUndefVector(const Value *V,
if (isa<T>(II->getOperand(1)))
continue;
std::optional<unsigned> Idx = getInsertIndex(II);
- if (!Idx)
- continue;
+ if (!Idx) {
+ Res.reset();
+ return Res;
+ }
if (*Idx < UseMask.size() && !UseMask.test(*Idx))
Res.reset(*Idx);
}
@@ -429,26 +438,6 @@ static SmallBitVector isUndefVector(const Value *V,
/// i32 6>
/// %2 = mul <4 x i8> %1, %1
/// ret <4 x i8> %2
-/// We convert this initially to something like:
-/// %x0 = extractelement <4 x i8> %x, i32 0
-/// %x3 = extractelement <4 x i8> %x, i32 3
-/// %y1 = extractelement <4 x i8> %y, i32 1
-/// %y2 = extractelement <4 x i8> %y, i32 2
-/// %1 = insertelement <4 x i8> poison, i8 %x0, i32 0
-/// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
-/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2
-/// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3
-/// %5 = mul <4 x i8> %4, %4
-/// %6 = extractelement <4 x i8> %5, i32 0
-/// %ins1 = insertelement <4 x i8> poison, i8 %6, i32 0
-/// %7 = extractelement <4 x i8> %5, i32 1
-/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1
-/// %8 = extractelement <4 x i8> %5, i32 2
-/// %ins3 = insertelement <4 x i8> %ins2, i8 %8, i32 2
-/// %9 = extractelement <4 x i8> %5, i32 3
-/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3
-/// ret <4 x i8> %ins4
-/// InstCombiner transforms this into a shuffle and vector mul
/// Mask will return the Shuffle Mask equivalent to the extracted elements.
/// TODO: Can we split off and reuse the shuffle mask detection from
/// ShuffleVectorInst/getShuffleCost?
@@ -539,117 +528,6 @@ static std::optional<unsigned> getExtractIndex(Instruction *E) {
return *EI->idx_begin();
}
-/// Tries to find extractelement instructions with constant indices from fixed
-/// vector type and gather such instructions into a bunch, which highly likely
-/// might be detected as a shuffle of 1 or 2 input vectors. If this attempt was
-/// successful, the matched scalars are replaced by poison values in \p VL for
-/// future analysis.
-static std::optional<TTI::ShuffleKind>
-tryToGatherExtractElements(SmallVectorImpl<Value *> &VL,
- SmallVectorImpl<int> &Mask) {
- // Scan list of gathered scalars for extractelements that can be represented
- // as shuffles.
- MapVector<Value *, SmallVector<int>> VectorOpToIdx;
- SmallVector<int> UndefVectorExtracts;
- for (int I = 0, E = VL.size(); I < E; ++I) {
- auto *EI = dyn_cast<ExtractElementInst>(VL[I]);
- if (!EI) {
- if (isa<UndefValue>(VL[I]))
- UndefVectorExtracts.push_back(I);
- continue;
- }
- auto *VecTy = dyn_cast<FixedVectorType>(EI->getVectorOperandType());
- if (!VecTy || !isa<ConstantInt, UndefValue>(EI->getIndexOperand()))
- continue;
- std::optional<unsigned> Idx = getExtractIndex(EI);
- // Undefined index.
- if (!Idx) {
- UndefVectorExtracts.push_back(I);
- continue;
- }
- SmallBitVector ExtractMask(VecTy->getNumElements(), true);
- ExtractMask.reset(*Idx);
- if (isUndefVector(EI->getVectorOperand(), ExtractMask).all()) {
- UndefVectorExtracts.push_back(I);
- continue;
- }
- VectorOpToIdx[EI->getVectorOperand()].push_back(I);
- }
- // Sort the vector operands by the maximum number of uses in extractelements.
- MapVector<unsigned, SmallVector<Value *>> VFToVector;
- for (const auto &Data : VectorOpToIdx)
- VFToVector[cast<FixedVectorType>(Data.first->getType())->getNumElements()]
- .push_back(Data.first);
- for (auto &Data : VFToVector) {
- stable_sort(Data.second, [&VectorOpToIdx](Value *V1, Value *V2) {
- return VectorOpToIdx.find(V1)->second.size() >
- VectorOpToIdx.find(V2)->second.size();
- });
- }
- // Find the best pair of the vectors with the same number of elements or a
- // single vector.
- const int UndefSz = UndefVectorExtracts.size();
- unsigned SingleMax = 0;
- Value *SingleVec = nullptr;
- unsigned PairMax = 0;
- std::pair<Value *, Value *> PairVec(nullptr, nullptr);
- for (auto &Data : VFToVector) {
- Value *V1 = Data.second.front();
- if (SingleMax < VectorOpToIdx[V1].size() + UndefSz) {
- SingleMax = VectorOpToIdx[V1].size() + UndefSz;
- SingleVec = V1;
- }
- Value *V2 = nullptr;
- if (Data.second.size() > 1)
- V2 = *std::next(Data.second.begin());
- if (V2 && PairMax < VectorOpToIdx[V1].size() + VectorOpToIdx[V2].size() +
- UndefSz) {
- PairMax = VectorOpToIdx[V1].size() + VectorOpToIdx[V2].size() + UndefSz;
- PairVec = std::make_pair(V1, V2);
- }
- }
- if (SingleMax == 0 && PairMax == 0 && UndefSz == 0)
- return std::nullopt;
- // Check if better to perform a shuffle of 2 vectors or just of a single
- // vector.
- SmallVector<Value *> SavedVL(VL.begin(), VL.end());
- SmallVector<Value *> GatheredExtracts(
- VL.size(), PoisonValue::get(VL.front()->getType()));
- if (SingleMax >= PairMax && SingleMax) {
- for (int Idx : VectorOpToIdx[SingleVec])
- std::swap(GatheredExtracts[Idx], VL[Idx]);
- } else {
- for (Value *V : {PairVec.first, PairVec.second})
- for (int Idx : VectorOpToIdx[V])
- std::swap(GatheredExtracts[Idx], VL[Idx]);
- }
- // Add extracts from undefs too.
- for (int Idx : UndefVectorExtracts)
- std::swap(GatheredExtracts[Idx], VL[Idx]);
- // Check that gather of extractelements can be represented as just a
- // shuffle of a single/two vectors the scalars are extracted from.
- std::optional<TTI::ShuffleKind> Res =
- isFixedVectorShuffle(GatheredExtracts, Mask);
- if (!Res) {
- // TODO: try to check other subsets if possible.
- // Restore the original VL if attempt was not successful.
- VL.swap(SavedVL);
- return std::nullopt;
- }
- // Restore unused scalars from mask, if some of the extractelements were not
- // selected for shuffle.
- for (int I = 0, E = GatheredExtracts.size(); I < E; ++I) {
- auto *EI = dyn_cast<ExtractElementInst>(VL[I]);
- if (!EI || !isa<FixedVectorType>(EI->getVectorOperandType()) ||
- !isa<ConstantInt, UndefValue>(EI->getIndexOperand()) ||
- is_contained(UndefVectorExtracts, I))
- continue;
- if (Mask[I] == PoisonMaskElem && !isa<PoisonValue>(GatheredExtracts[I]))
- std::swap(VL[I], GatheredExtracts[I]);
- }
- return Res;
-}
-
namespace {
/// Main data required for vectorization of instructions.
@@ -695,7 +573,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) {
return S.OpValue;
}
-/// \returns true if \p Opcode is allowed as part of of the main/alternate
+/// \returns true if \p Opcode is allowed as part of the main/alternate
/// instruction for SLP vectorization.
///
/// Example of unsupported opcode is SDIV that can potentially cause UB if the
@@ -889,18 +767,14 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
/// \returns true if all of the values in \p VL have the same type or false
/// otherwise.
static bool allSameType(ArrayRef<Value *> VL) {
- Type *Ty = VL[0]->getType();
- for (int i = 1, e = VL.size(); i < e; i++)
- if (VL[i]->getType() != Ty)
- return false;
-
- return true;
+ Type *Ty = VL.front()->getType();
+ return all_of(VL.drop_front(), [&](Value *V) { return V->getType() == Ty; });
}
/// \returns True if in-tree use also needs extract. This refers to
/// possible scalar operand in vectorized instruction.
-static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
- TargetLibraryInfo *TLI) {
+static bool doesInTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
+ TargetLibraryInfo *TLI) {
unsigned Opcode = UserInst->getOpcode();
switch (Opcode) {
case Instruction::Load: {
@@ -914,11 +788,10 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
case Instruction::Call: {
CallInst *CI = cast<CallInst>(UserInst);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
- if (isVectorIntrinsicWithScalarOpAtArg(ID, i))
- return (CI->getArgOperand(i) == Scalar);
- }
- [[fallthrough]];
+ return any_of(enumerate(CI->args()), [&](auto &&Arg) {
+ return isVectorIntrinsicWithScalarOpAtArg(ID, Arg.index()) &&
+ Arg.value().get() == Scalar;
+ });
}
default:
return false;
@@ -1181,6 +1054,7 @@ public:
void deleteTree() {
VectorizableTree.clear();
ScalarToTreeEntry.clear();
+ MultiNodeScalars.clear();
MustGather.clear();
EntryToLastInstruction.clear();
ExternalUses.clear();
@@ -1273,7 +1147,7 @@ public:
/// {{{i16, i16}, {i16, i16}}, {{i16, i16}, {i16, i16}}} and so on.
///
/// \returns number of elements in vector if isomorphism exists, 0 otherwise.
- unsigned canMapToVector(Type *T, const DataLayout &DL) const;
+ unsigned canMapToVector(Type *T) const;
/// \returns True if the VectorizableTree is both tiny and not fully
/// vectorizable. We do not vectorize such trees.
@@ -1324,6 +1198,9 @@ public:
}
LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
#endif
+ bool operator == (const EdgeInfo &Other) const {
+ return UserTE == Other.UserTE && EdgeIdx == Other.EdgeIdx;
+ }
};
/// A helper class used for scoring candidates for two consecutive lanes.
@@ -1764,7 +1641,7 @@ public:
auto *IdxLaneI = dyn_cast<Instruction>(IdxLaneV);
if (!IdxLaneI || !isa<Instruction>(OpIdxLaneV))
return 0;
- return R.areAllUsersVectorized(IdxLaneI, std::nullopt)
+ return R.areAllUsersVectorized(IdxLaneI)
? LookAheadHeuristics::ScoreAllUserVectorized
: 0;
}
@@ -1941,7 +1818,7 @@ public:
HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
} else if (NumFreeOpsHash.NumOfAPOs == Min &&
NumFreeOpsHash.NumOpsWithSameOpcodeParent == SameOpNumber) {
- auto It = HashMap.find(NumFreeOpsHash.Hash);
+ auto *It = HashMap.find(NumFreeOpsHash.Hash);
if (It == HashMap.end())
HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
else
@@ -2203,7 +2080,7 @@ public:
for (int Pass = 0; Pass != 2; ++Pass) {
// Check if no need to reorder operands since they're are perfect or
// shuffled diamond match.
- // Need to to do it to avoid extra external use cost counting for
+ // Need to do it to avoid extra external use cost counting for
// shuffled matches, which may cause regressions.
if (SkipReordering())
break;
@@ -2388,6 +2265,18 @@ public:
~BoUpSLP();
private:
+ /// Determine if a vectorized value \p V in can be demoted to
+ /// a smaller type with a truncation. We collect the values that will be
+ /// demoted in ToDemote and additional roots that require investigating in
+ /// Roots.
+ /// \param DemotedConsts list of Instruction/OperandIndex pairs that are
+ /// constant and to be demoted. Required to correctly identify constant nodes
+ /// to be demoted.
+ bool collectValuesToDemote(
+ Value *V, SmallVectorImpl<Value *> &ToDemote,
+ DenseMap<Instruction *, SmallVector<unsigned>> &DemotedConsts,
+ SmallVectorImpl<Value *> &Roots, DenseSet<Value *> &Visited) const;
+
/// Check if the operands on the edges \p Edges of the \p UserTE allows
/// reordering (i.e. the operands can be reordered because they have only one
/// user and reordarable).
@@ -2410,12 +2299,25 @@ private:
TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
TreeEntry *TE = nullptr;
- const auto *It = find_if(VL, [this, &TE](Value *V) {
+ const auto *It = find_if(VL, [&](Value *V) {
TE = getTreeEntry(V);
- return TE;
+ if (TE && is_contained(TE->UserTreeIndices, EdgeInfo(UserTE, OpIdx)))
+ return true;
+ auto It = MultiNodeScalars.find(V);
+ if (It != MultiNodeScalars.end()) {
+ for (TreeEntry *E : It->second) {
+ if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) {
+ TE = E;
+ return true;
+ }
+ }
+ }
+ return false;
});
- if (It != VL.end() && TE->isSame(VL))
+ if (It != VL.end()) {
+ assert(TE->isSame(VL) && "Expected same scalars.");
return TE;
+ }
return nullptr;
}
@@ -2428,13 +2330,16 @@ private:
}
/// Checks if all users of \p I are the part of the vectorization tree.
- bool areAllUsersVectorized(Instruction *I,
- ArrayRef<Value *> VectorizedVals) const;
+ bool areAllUsersVectorized(
+ Instruction *I,
+ const SmallDenseSet<Value *> *VectorizedVals = nullptr) const;
/// Return information about the vector formed for the specified index
/// of a vector of (the same) instruction.
- TargetTransformInfo::OperandValueInfo getOperandInfo(ArrayRef<Value *> VL,
- unsigned OpIdx);
+ TargetTransformInfo::OperandValueInfo getOperandInfo(ArrayRef<Value *> Ops);
+
+ /// \ returns the graph entry for the \p Idx operand of the \p E entry.
+ const TreeEntry *getOperandEntry(const TreeEntry *E, unsigned Idx) const;
/// \returns the cost of the vectorizable entry.
InstructionCost getEntryCost(const TreeEntry *E,
@@ -2450,15 +2355,22 @@ private:
/// vector) and sets \p CurrentOrder to the identity permutation; otherwise
/// returns false, setting \p CurrentOrder to either an empty vector or a
/// non-identity permutation that allows to reuse extract instructions.
+ /// \param ResizeAllowed indicates whether it is allowed to handle subvector
+ /// extract order.
bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
- SmallVectorImpl<unsigned> &CurrentOrder) const;
+ SmallVectorImpl<unsigned> &CurrentOrder,
+ bool ResizeAllowed = false) const;
/// Vectorize a single entry in the tree.
- Value *vectorizeTree(TreeEntry *E);
+ /// \param PostponedPHIs true, if need to postpone emission of phi nodes to
+ /// avoid issues with def-use order.
+ Value *vectorizeTree(TreeEntry *E, bool PostponedPHIs);
/// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
/// \p E.
- Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx);
+ /// \param PostponedPHIs true, if need to postpone emission of phi nodes to
+ /// avoid issues with def-use order.
+ Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx, bool PostponedPHIs);
/// Create a new vector from a list of scalar values. Produces a sequence
/// which exploits values reused across lanes, and arranges the inserts
@@ -2477,17 +2389,50 @@ private:
/// instruction in the list).
Instruction &getLastInstructionInBundle(const TreeEntry *E);
- /// Checks if the gathered \p VL can be represented as shuffle(s) of previous
- /// tree entries.
+ /// Tries to find extractelement instructions with constant indices from fixed
+ /// vector type and gather such instructions into a bunch, which highly likely
+ /// might be detected as a shuffle of 1 or 2 input vectors. If this attempt
+ /// was successful, the matched scalars are replaced by poison values in \p VL
+ /// for future analysis.
+ std::optional<TargetTransformInfo::ShuffleKind>
+ tryToGatherSingleRegisterExtractElements(MutableArrayRef<Value *> VL,
+ SmallVectorImpl<int> &Mask) const;
+
+ /// Tries to find extractelement instructions with constant indices from fixed
+ /// vector type and gather such instructions into a bunch, which highly likely
+ /// might be detected as a shuffle of 1 or 2 input vectors. If this attempt
+ /// was successful, the matched scalars are replaced by poison values in \p VL
+ /// for future analysis.
+ SmallVector<std::optional<TargetTransformInfo::ShuffleKind>>
+ tryToGatherExtractElements(SmallVectorImpl<Value *> &VL,
+ SmallVectorImpl<int> &Mask,
+ unsigned NumParts) const;
+
+ /// Checks if the gathered \p VL can be represented as a single register
+ /// shuffle(s) of previous tree entries.
/// \param TE Tree entry checked for permutation.
/// \param VL List of scalars (a subset of the TE scalar), checked for
- /// permutations.
+ /// permutations. Must form single-register vector.
/// \returns ShuffleKind, if gathered values can be represented as shuffles of
- /// previous tree entries. \p Mask is filled with the shuffle mask.
+ /// previous tree entries. \p Part of \p Mask is filled with the shuffle mask.
std::optional<TargetTransformInfo::ShuffleKind>
- isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
- SmallVectorImpl<int> &Mask,
- SmallVectorImpl<const TreeEntry *> &Entries);
+ isGatherShuffledSingleRegisterEntry(
+ const TreeEntry *TE, ArrayRef<Value *> VL, MutableArrayRef<int> Mask,
+ SmallVectorImpl<const TreeEntry *> &Entries, unsigned Part);
+
+ /// Checks if the gathered \p VL can be represented as multi-register
+ /// shuffle(s) of previous tree entries.
+ /// \param TE Tree entry checked for permutation.
+ /// \param VL List of scalars (a subset of the TE scalar), checked for
+ /// permutations.
+ /// \returns per-register series of ShuffleKind, if gathered values can be
+ /// represented as shuffles of previous tree entries. \p Mask is filled with
+ /// the shuffle mask (also on per-register base).
+ SmallVector<std::optional<TargetTransformInfo::ShuffleKind>>
+ isGatherShuffledEntry(
+ const TreeEntry *TE, ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask,
+ SmallVectorImpl<SmallVector<const TreeEntry *>> &Entries,
+ unsigned NumParts);
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
@@ -2517,14 +2462,14 @@ private:
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
/// users of \p TE and collects the stores. It returns the map from the store
/// pointers to the collected stores.
- DenseMap<Value *, SmallVector<StoreInst *, 4>>
+ DenseMap<Value *, SmallVector<StoreInst *>>
collectUserStores(const BoUpSLP::TreeEntry *TE) const;
/// Helper for `findExternalStoreUsersReorderIndices()`. It checks if the
- /// stores in \p StoresVec can form a vector instruction. If so it returns true
- /// and populates \p ReorderIndices with the shuffle indices of the the stores
- /// when compared to the sorted vector.
- bool canFormVector(const SmallVector<StoreInst *, 4> &StoresVec,
+ /// stores in \p StoresVec can form a vector instruction. If so it returns
+ /// true and populates \p ReorderIndices with the shuffle indices of the
+ /// stores when compared to the sorted vector.
+ bool canFormVector(ArrayRef<StoreInst *> StoresVec,
OrdersType &ReorderIndices) const;
/// Iterates through the users of \p TE, looking for scalar stores that can be
@@ -2621,10 +2566,18 @@ private:
/// The Scalars are vectorized into this value. It is initialized to Null.
WeakTrackingVH VectorizedValue = nullptr;
+ /// New vector phi instructions emitted for the vectorized phi nodes.
+ PHINode *PHI = nullptr;
+
/// Do we need to gather this sequence or vectorize it
/// (either with vector instruction or with scatter/gather
/// intrinsics for store/load)?
- enum EntryState { Vectorize, ScatterVectorize, NeedToGather };
+ enum EntryState {
+ Vectorize,
+ ScatterVectorize,
+ PossibleStridedVectorize,
+ NeedToGather
+ };
EntryState State;
/// Does this sequence require some shuffling?
@@ -2772,6 +2725,14 @@ private:
return FoundLane;
}
+ /// Build a shuffle mask for graph entry which represents a merge of main
+ /// and alternate operations.
+ void
+ buildAltOpShuffleMask(const function_ref<bool(Instruction *)> IsAltOp,
+ SmallVectorImpl<int> &Mask,
+ SmallVectorImpl<Value *> *OpScalars = nullptr,
+ SmallVectorImpl<Value *> *AltScalars = nullptr) const;
+
#ifndef NDEBUG
/// Debug printer.
LLVM_DUMP_METHOD void dump() const {
@@ -2792,6 +2753,9 @@ private:
case ScatterVectorize:
dbgs() << "ScatterVectorize\n";
break;
+ case PossibleStridedVectorize:
+ dbgs() << "PossibleStridedVectorize\n";
+ break;
case NeedToGather:
dbgs() << "NeedToGather\n";
break;
@@ -2892,7 +2856,14 @@ private:
}
if (Last->State != TreeEntry::NeedToGather) {
for (Value *V : VL) {
- assert(!getTreeEntry(V) && "Scalar already in tree!");
+ const TreeEntry *TE = getTreeEntry(V);
+ assert((!TE || TE == Last || doesNotNeedToBeScheduled(V)) &&
+ "Scalar already in tree!");
+ if (TE) {
+ if (TE != Last)
+ MultiNodeScalars.try_emplace(V).first->getSecond().push_back(Last);
+ continue;
+ }
ScalarToTreeEntry[V] = Last;
}
// Update the scheduler bundle to point to this TreeEntry.
@@ -2905,7 +2876,8 @@ private:
for (Value *V : VL) {
if (doesNotNeedToBeScheduled(V))
continue;
- assert(BundleMember && "Unexpected end of bundle.");
+ if (!BundleMember)
+ continue;
BundleMember->TE = Last;
BundleMember = BundleMember->NextInBundle;
}
@@ -2913,6 +2885,10 @@ private:
assert(!BundleMember && "Bundle and VL out of sync");
} else {
MustGather.insert(VL.begin(), VL.end());
+ // Build a map for gathered scalars to the nodes where they are used.
+ for (Value *V : VL)
+ if (!isConstant(V))
+ ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last);
}
if (UserTreeIdx.UserTE)
@@ -2950,6 +2926,10 @@ private:
/// Maps a specific scalar to its tree entry.
SmallDenseMap<Value *, TreeEntry *> ScalarToTreeEntry;
+ /// List of scalars, used in several vectorize nodes, and the list of the
+ /// nodes.
+ SmallDenseMap<Value *, SmallVector<TreeEntry *>> MultiNodeScalars;
+
/// Maps a value to the proposed vectorizable size.
SmallDenseMap<Value *, unsigned> InstrElementSize;
@@ -2995,25 +2975,25 @@ private:
/// is invariant in the calling loop.
bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1,
Instruction *Inst2) {
+ if (!Loc1.Ptr || !isSimple(Inst1) || !isSimple(Inst2))
+ return true;
// First check if the result is already in the cache.
- AliasCacheKey key = std::make_pair(Inst1, Inst2);
- std::optional<bool> &result = AliasCache[key];
- if (result) {
- return *result;
- }
- bool aliased = true;
- if (Loc1.Ptr && isSimple(Inst1))
- aliased = isModOrRefSet(BatchAA.getModRefInfo(Inst2, Loc1));
+ AliasCacheKey Key = std::make_pair(Inst1, Inst2);
+ auto It = AliasCache.find(Key);
+ if (It != AliasCache.end())
+ return It->second;
+ bool Aliased = isModOrRefSet(BatchAA.getModRefInfo(Inst2, Loc1));
// Store the result in the cache.
- result = aliased;
- return aliased;
+ AliasCache.try_emplace(Key, Aliased);
+ AliasCache.try_emplace(std::make_pair(Inst2, Inst1), Aliased);
+ return Aliased;
}
using AliasCacheKey = std::pair<Instruction *, Instruction *>;
/// Cache for alias results.
/// TODO: consider moving this to the AliasAnalysis itself.
- DenseMap<AliasCacheKey, std::optional<bool>> AliasCache;
+ DenseMap<AliasCacheKey, bool> AliasCache;
// Cache for pointerMayBeCaptured calls inside AA. This is preserved
// globally through SLP because we don't perform any action which
@@ -3047,7 +3027,7 @@ private:
SetVector<Instruction *> GatherShuffleExtractSeq;
/// A list of blocks that we are going to CSE.
- SetVector<BasicBlock *> CSEBlocks;
+ DenseSet<BasicBlock *> CSEBlocks;
/// Contains all scheduling relevant data for an instruction.
/// A ScheduleData either represents a single instruction or a member of an
@@ -3497,7 +3477,7 @@ private:
BasicBlock *BB;
/// Simple memory allocation for ScheduleData.
- std::vector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks;
+ SmallVector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks;
/// The size of a ScheduleData array in ScheduleDataChunks.
int ChunkSize;
@@ -3607,7 +3587,7 @@ private:
/// where "width" indicates the minimum bit width and "signed" is True if the
/// value must be signed-extended, rather than zero-extended, back to its
/// original width.
- MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
+ DenseMap<const TreeEntry *, std::pair<uint64_t, bool>> MinBWs;
};
} // end namespace slpvectorizer
@@ -3676,7 +3656,7 @@ template <> struct GraphTraits<BoUpSLP *> {
template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
using TreeEntry = BoUpSLP::TreeEntry;
- DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+ DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *R) {
std::string Str;
@@ -3699,7 +3679,8 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
const BoUpSLP *) {
if (Entry->State == TreeEntry::NeedToGather)
return "color=red";
- if (Entry->State == TreeEntry::ScatterVectorize)
+ if (Entry->State == TreeEntry::ScatterVectorize ||
+ Entry->State == TreeEntry::PossibleStridedVectorize)
return "color=blue";
return "";
}
@@ -3761,7 +3742,7 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
inversePermutation(Order, MaskOrder);
}
reorderReuses(MaskOrder, Mask);
- if (ShuffleVectorInst::isIdentityMask(MaskOrder)) {
+ if (ShuffleVectorInst::isIdentityMask(MaskOrder, MaskOrder.size())) {
Order.clear();
return;
}
@@ -3779,7 +3760,40 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
OrdersType CurrentOrder(NumScalars, NumScalars);
SmallVector<int> Positions;
SmallBitVector UsedPositions(NumScalars);
- const TreeEntry *STE = nullptr;
+ DenseMap<const TreeEntry *, unsigned> UsedEntries;
+ DenseMap<Value *, std::pair<const TreeEntry *, unsigned>> ValueToEntryPos;
+ for (Value *V : TE.Scalars) {
+ if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
+ continue;
+ const auto *LocalSTE = getTreeEntry(V);
+ if (!LocalSTE)
+ continue;
+ unsigned Lane =
+ std::distance(LocalSTE->Scalars.begin(), find(LocalSTE->Scalars, V));
+ if (Lane >= NumScalars)
+ continue;
+ ++UsedEntries.try_emplace(LocalSTE, 0).first->getSecond();
+ ValueToEntryPos.try_emplace(V, LocalSTE, Lane);
+ }
+ if (UsedEntries.empty())
+ return std::nullopt;
+ const TreeEntry &BestSTE =
+ *std::max_element(UsedEntries.begin(), UsedEntries.end(),
+ [](const std::pair<const TreeEntry *, unsigned> &P1,
+ const std::pair<const TreeEntry *, unsigned> &P2) {
+ return P1.second < P2.second;
+ })
+ ->first;
+ UsedEntries.erase(&BestSTE);
+ const TreeEntry *SecondBestSTE = nullptr;
+ if (!UsedEntries.empty())
+ SecondBestSTE =
+ std::max_element(UsedEntries.begin(), UsedEntries.end(),
+ [](const std::pair<const TreeEntry *, unsigned> &P1,
+ const std::pair<const TreeEntry *, unsigned> &P2) {
+ return P1.second < P2.second;
+ })
+ ->first;
// Try to find all gathered scalars that are gets vectorized in other
// vectorize node. Here we can have only one single tree vector node to
// correctly identify order of the gathered scalars.
@@ -3787,58 +3801,56 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
Value *V = TE.Scalars[I];
if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
continue;
- if (const auto *LocalSTE = getTreeEntry(V)) {
- if (!STE)
- STE = LocalSTE;
- else if (STE != LocalSTE)
- // Take the order only from the single vector node.
- return std::nullopt;
- unsigned Lane =
- std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
- if (Lane >= NumScalars)
- return std::nullopt;
- if (CurrentOrder[Lane] != NumScalars) {
- if (Lane != I)
- continue;
- UsedPositions.reset(CurrentOrder[Lane]);
- }
- // The partial identity (where only some elements of the gather node are
- // in the identity order) is good.
- CurrentOrder[Lane] = I;
- UsedPositions.set(I);
+ const auto [LocalSTE, Lane] = ValueToEntryPos.lookup(V);
+ if (!LocalSTE || (LocalSTE != &BestSTE && LocalSTE != SecondBestSTE))
+ continue;
+ if (CurrentOrder[Lane] != NumScalars) {
+ if ((CurrentOrder[Lane] >= BestSTE.Scalars.size() ||
+ BestSTE.Scalars[CurrentOrder[Lane]] == V) &&
+ (Lane != I || LocalSTE == SecondBestSTE))
+ continue;
+ UsedPositions.reset(CurrentOrder[Lane]);
}
+ // The partial identity (where only some elements of the gather node are
+ // in the identity order) is good.
+ CurrentOrder[Lane] = I;
+ UsedPositions.set(I);
}
// Need to keep the order if we have a vector entry and at least 2 scalars or
// the vectorized entry has just 2 scalars.
- if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
- auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
- for (unsigned I = 0; I < NumScalars; ++I)
- if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
- return false;
- return true;
- };
- if (IsIdentityOrder(CurrentOrder))
- return OrdersType();
- auto *It = CurrentOrder.begin();
- for (unsigned I = 0; I < NumScalars;) {
- if (UsedPositions.test(I)) {
- ++I;
- continue;
- }
- if (*It == NumScalars) {
- *It = I;
- ++I;
- }
- ++It;
+ if (BestSTE.Scalars.size() != 2 && UsedPositions.count() <= 1)
+ return std::nullopt;
+ auto IsIdentityOrder = [&](ArrayRef<unsigned> CurrentOrder) {
+ for (unsigned I = 0; I < NumScalars; ++I)
+ if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
+ return false;
+ return true;
+ };
+ if (IsIdentityOrder(CurrentOrder))
+ return OrdersType();
+ auto *It = CurrentOrder.begin();
+ for (unsigned I = 0; I < NumScalars;) {
+ if (UsedPositions.test(I)) {
+ ++I;
+ continue;
}
- return std::move(CurrentOrder);
+ if (*It == NumScalars) {
+ *It = I;
+ ++I;
+ }
+ ++It;
}
- return std::nullopt;
+ return std::move(CurrentOrder);
}
namespace {
/// Tracks the state we can represent the loads in the given sequence.
-enum class LoadsState { Gather, Vectorize, ScatterVectorize };
+enum class LoadsState {
+ Gather,
+ Vectorize,
+ ScatterVectorize,
+ PossibleStridedVectorize
+};
} // anonymous namespace
static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
@@ -3898,6 +3910,7 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
if (IsSorted || all_of(PointerOps, [&](Value *P) {
return arePointersCompatible(P, PointerOps.front(), TLI);
})) {
+ bool IsPossibleStrided = false;
if (IsSorted) {
Value *Ptr0;
Value *PtrN;
@@ -3913,6 +3926,8 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
// Check that the sorted loads are consecutive.
if (static_cast<unsigned>(*Diff) == VL.size() - 1)
return LoadsState::Vectorize;
+ // Simple check if not a strided access - clear order.
+ IsPossibleStrided = *Diff % (VL.size() - 1) == 0;
}
// TODO: need to improve analysis of the pointers, if not all of them are
// GEPs or have > 2 operands, we end up with a gather node, which just
@@ -3934,7 +3949,8 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) &&
!TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment))
- return LoadsState::ScatterVectorize;
+ return IsPossibleStrided ? LoadsState::PossibleStridedVectorize
+ : LoadsState::ScatterVectorize;
}
}
@@ -4050,7 +4066,8 @@ static bool areTwoInsertFromSameBuildVector(
// Go through the vector operand of insertelement instructions trying to find
// either VU as the original vector for IE2 or V as the original vector for
// IE1.
- SmallSet<int, 8> ReusedIdx;
+ SmallBitVector ReusedIdx(
+ cast<VectorType>(VU->getType())->getElementCount().getKnownMinValue());
bool IsReusedIdx = false;
do {
if (IE2 == VU && !IE1)
@@ -4058,16 +4075,18 @@ static bool areTwoInsertFromSameBuildVector(
if (IE1 == V && !IE2)
return V->hasOneUse();
if (IE1 && IE1 != V) {
- IsReusedIdx |=
- !ReusedIdx.insert(getInsertIndex(IE1).value_or(*Idx2)).second;
+ unsigned Idx1 = getInsertIndex(IE1).value_or(*Idx2);
+ IsReusedIdx |= ReusedIdx.test(Idx1);
+ ReusedIdx.set(Idx1);
if ((IE1 != VU && !IE1->hasOneUse()) || IsReusedIdx)
IE1 = nullptr;
else
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
}
if (IE2 && IE2 != VU) {
- IsReusedIdx |=
- !ReusedIdx.insert(getInsertIndex(IE2).value_or(*Idx1)).second;
+ unsigned Idx2 = getInsertIndex(IE2).value_or(*Idx1);
+ IsReusedIdx |= ReusedIdx.test(Idx2);
+ ReusedIdx.set(Idx2);
if ((IE2 != V && !IE2->hasOneUse()) || IsReusedIdx)
IE2 = nullptr;
else
@@ -4135,13 +4154,16 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
return std::nullopt; // No need to reorder.
return std::move(ResOrder);
}
- if (TE.State == TreeEntry::Vectorize &&
+ if ((TE.State == TreeEntry::Vectorize ||
+ TE.State == TreeEntry::PossibleStridedVectorize) &&
(isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE.getMainOp()) ||
(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
!TE.isAltShuffle())
return TE.ReorderIndices;
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
- auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) {
+ auto PHICompare = [&](unsigned I1, unsigned I2) {
+ Value *V1 = TE.Scalars[I1];
+ Value *V2 = TE.Scalars[I2];
if (V1 == V2)
return false;
if (!V1->hasOneUse() || !V2->hasOneUse())
@@ -4180,14 +4202,13 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
};
if (!TE.ReorderIndices.empty())
return TE.ReorderIndices;
- DenseMap<Value *, unsigned> PhiToId;
- SmallVector<Value *, 4> Phis;
+ DenseMap<unsigned, unsigned> PhiToId;
+ SmallVector<unsigned> Phis(TE.Scalars.size());
+ std::iota(Phis.begin(), Phis.end(), 0);
OrdersType ResOrder(TE.Scalars.size());
- for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) {
- PhiToId[TE.Scalars[Id]] = Id;
- Phis.push_back(TE.Scalars[Id]);
- }
- llvm::stable_sort(Phis, PHICompare);
+ for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id)
+ PhiToId[Id] = Id;
+ stable_sort(Phis, PHICompare);
for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
ResOrder[Id] = PhiToId[Phis[Id]];
if (IsIdentityOrder(ResOrder))
@@ -4214,7 +4235,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
// Check that gather of extractelements can be represented as
// just a shuffle of a single vector.
OrdersType CurrentOrder;
- bool Reuse = canReuseExtract(TE.Scalars, TE.getMainOp(), CurrentOrder);
+ bool Reuse = canReuseExtract(TE.Scalars, TE.getMainOp(), CurrentOrder,
+ /*ResizeAllowed=*/true);
if (Reuse || !CurrentOrder.empty()) {
if (!CurrentOrder.empty())
fixupOrderingIndices(CurrentOrder);
@@ -4270,7 +4292,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
static bool isRepeatedNonIdentityClusteredMask(ArrayRef<int> Mask,
unsigned Sz) {
ArrayRef<int> FirstCluster = Mask.slice(0, Sz);
- if (ShuffleVectorInst::isIdentityMask(FirstCluster))
+ if (ShuffleVectorInst::isIdentityMask(FirstCluster, Sz))
return false;
for (unsigned I = Sz, E = Mask.size(); I < E; I += Sz) {
ArrayRef<int> Cluster = Mask.slice(I, Sz);
@@ -4386,7 +4408,9 @@ void BoUpSLP::reorderTopToBottom() {
++Cnt;
}
VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
- if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty())
+ if (!(TE->State == TreeEntry::Vectorize ||
+ TE->State == TreeEntry::PossibleStridedVectorize) ||
+ !TE->ReuseShuffleIndices.empty())
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
if (TE->State == TreeEntry::Vectorize &&
TE->getOpcode() == Instruction::PHI)
@@ -4409,6 +4433,9 @@ void BoUpSLP::reorderTopToBottom() {
MapVector<OrdersType, unsigned,
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
OrdersUses;
+ // Last chance orders - scatter vectorize. Try to use their orders if no
+ // other orders or the order is counted already.
+ SmallVector<OrdersType> StridedVectorizeOrders;
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
for (const TreeEntry *OpTE : OrderedEntries) {
// No need to reorder this nodes, still need to extend and to use shuffle,
@@ -4455,6 +4482,11 @@ void BoUpSLP::reorderTopToBottom() {
if (Order.empty())
continue;
}
+ // Postpone scatter orders.
+ if (OpTE->State == TreeEntry::PossibleStridedVectorize) {
+ StridedVectorizeOrders.push_back(Order);
+ continue;
+ }
// Stores actually store the mask, not the order, need to invert.
if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
@@ -4472,8 +4504,21 @@ void BoUpSLP::reorderTopToBottom() {
}
}
// Set order of the user node.
- if (OrdersUses.empty())
- continue;
+ if (OrdersUses.empty()) {
+ if (StridedVectorizeOrders.empty())
+ continue;
+ // Add (potentially!) strided vectorize orders.
+ for (OrdersType &Order : StridedVectorizeOrders)
+ ++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
+ } else {
+ // Account (potentially!) strided vectorize orders only if it was used
+ // already.
+ for (OrdersType &Order : StridedVectorizeOrders) {
+ auto *It = OrdersUses.find(Order);
+ if (It != OrdersUses.end())
+ ++It->second;
+ }
+ }
// Choose the most used order.
ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
unsigned Cnt = OrdersUses.front().second;
@@ -4514,7 +4559,8 @@ void BoUpSLP::reorderTopToBottom() {
}
continue;
}
- if (TE->State == TreeEntry::Vectorize &&
+ if ((TE->State == TreeEntry::Vectorize ||
+ TE->State == TreeEntry::PossibleStridedVectorize) &&
isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst,
InsertElementInst>(TE->getMainOp()) &&
!TE->isAltShuffle()) {
@@ -4555,6 +4601,10 @@ bool BoUpSLP::canReorderOperands(
}))
continue;
if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
+ // FIXME: Do not reorder (possible!) strided vectorized nodes, they
+ // require reordering of the operands, which is not implemented yet.
+ if (TE->State == TreeEntry::PossibleStridedVectorize)
+ return false;
// Do not reorder if operand node is used by many user nodes.
if (any_of(TE->UserTreeIndices,
[UserTE](const EdgeInfo &EI) { return EI.UserTE != UserTE; }))
@@ -4567,7 +4617,8 @@ bool BoUpSLP::canReorderOperands(
// simply add to the list of gathered ops.
// If there are reused scalars, process this node as a regular vectorize
// node, just reorder reuses mask.
- if (TE->State != TreeEntry::Vectorize && TE->ReuseShuffleIndices.empty())
+ if (TE->State != TreeEntry::Vectorize &&
+ TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())
GatherOps.push_back(TE);
continue;
}
@@ -4602,18 +4653,19 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
// Currently the are vectorized loads,extracts without alternate operands +
// some gathering of extracts.
SmallVector<TreeEntry *> NonVectorized;
- for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
- &NonVectorized](
- const std::unique_ptr<TreeEntry> &TE) {
- if (TE->State != TreeEntry::Vectorize)
+ for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
+ if (TE->State != TreeEntry::Vectorize &&
+ TE->State != TreeEntry::PossibleStridedVectorize)
NonVectorized.push_back(TE.get());
if (std::optional<OrdersType> CurrentOrder =
getReorderingData(*TE, /*TopToBottom=*/false)) {
OrderedEntries.insert(TE.get());
- if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty())
+ if (!(TE->State == TreeEntry::Vectorize ||
+ TE->State == TreeEntry::PossibleStridedVectorize) ||
+ !TE->ReuseShuffleIndices.empty())
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
}
- });
+ }
// 1. Propagate order to the graph nodes, which use only reordered nodes.
// I.e., if the node has operands, that are reordered, try to make at least
@@ -4627,6 +4679,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
SmallVector<TreeEntry *> Filtered;
for (TreeEntry *TE : OrderedEntries) {
if (!(TE->State == TreeEntry::Vectorize ||
+ TE->State == TreeEntry::PossibleStridedVectorize ||
(TE->State == TreeEntry::NeedToGather &&
GathersToOrders.count(TE))) ||
TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
@@ -4649,8 +4702,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
}
}
// Erase filtered entries.
- for_each(Filtered,
- [&OrderedEntries](TreeEntry *TE) { OrderedEntries.remove(TE); });
+ for (TreeEntry *TE : Filtered)
+ OrderedEntries.remove(TE);
SmallVector<
std::pair<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>>>
UsersVec(Users.begin(), Users.end());
@@ -4662,10 +4715,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
SmallVector<TreeEntry *> GatherOps;
if (!canReorderOperands(Data.first, Data.second, NonVectorized,
GatherOps)) {
- for_each(Data.second,
- [&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
- OrderedEntries.remove(Op.second);
- });
+ for (const std::pair<unsigned, TreeEntry *> &Op : Data.second)
+ OrderedEntries.remove(Op.second);
continue;
}
// All operands are reordered and used only in this node - propagate the
@@ -4673,6 +4724,9 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
MapVector<OrdersType, unsigned,
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
OrdersUses;
+ // Last chance orders - scatter vectorize. Try to use their orders if no
+ // other orders or the order is counted already.
+ SmallVector<std::pair<OrdersType, unsigned>> StridedVectorizeOrders;
// Do the analysis for each tree entry only once, otherwise the order of
// the same node my be considered several times, though might be not
// profitable.
@@ -4694,6 +4748,11 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
Data.second, [OpTE](const std::pair<unsigned, TreeEntry *> &P) {
return P.second == OpTE;
});
+ // Postpone scatter orders.
+ if (OpTE->State == TreeEntry::PossibleStridedVectorize) {
+ StridedVectorizeOrders.emplace_back(Order, NumOps);
+ continue;
+ }
// Stores actually store the mask, not the order, need to invert.
if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
@@ -4754,11 +4813,27 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
}
// If no orders - skip current nodes and jump to the next one, if any.
if (OrdersUses.empty()) {
- for_each(Data.second,
- [&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
- OrderedEntries.remove(Op.second);
- });
- continue;
+ if (StridedVectorizeOrders.empty() ||
+ (Data.first->ReorderIndices.empty() &&
+ Data.first->ReuseShuffleIndices.empty() &&
+ !(IgnoreReorder &&
+ Data.first == VectorizableTree.front().get()))) {
+ for (const std::pair<unsigned, TreeEntry *> &Op : Data.second)
+ OrderedEntries.remove(Op.second);
+ continue;
+ }
+ // Add (potentially!) strided vectorize orders.
+ for (std::pair<OrdersType, unsigned> &Pair : StridedVectorizeOrders)
+ OrdersUses.insert(std::make_pair(Pair.first, 0)).first->second +=
+ Pair.second;
+ } else {
+ // Account (potentially!) strided vectorize orders only if it was used
+ // already.
+ for (std::pair<OrdersType, unsigned> &Pair : StridedVectorizeOrders) {
+ auto *It = OrdersUses.find(Pair.first);
+ if (It != OrdersUses.end())
+ It->second += Pair.second;
+ }
}
// Choose the best order.
ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
@@ -4771,10 +4846,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
}
// Set order of the user node (reordering of operands and user nodes).
if (BestOrder.empty()) {
- for_each(Data.second,
- [&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
- OrderedEntries.remove(Op.second);
- });
+ for (const std::pair<unsigned, TreeEntry *> &Op : Data.second)
+ OrderedEntries.remove(Op.second);
continue;
}
// Erase operands from OrderedEntries list and adjust their orders.
@@ -4796,7 +4869,10 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
continue;
}
// Gathers are processed separately.
- if (TE->State != TreeEntry::Vectorize)
+ if (TE->State != TreeEntry::Vectorize &&
+ TE->State != TreeEntry::PossibleStridedVectorize &&
+ (TE->State != TreeEntry::ScatterVectorize ||
+ TE->ReorderIndices.empty()))
continue;
assert((BestOrder.size() == TE->ReorderIndices.size() ||
TE->ReorderIndices.empty()) &&
@@ -4825,7 +4901,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
Data.first->isAltShuffle())
Data.first->reorderOperands(Mask);
if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) ||
- Data.first->isAltShuffle()) {
+ Data.first->isAltShuffle() ||
+ Data.first->State == TreeEntry::PossibleStridedVectorize) {
reorderScalars(Data.first->Scalars, Mask);
reorderOrder(Data.first->ReorderIndices, MaskOrder);
if (Data.first->ReuseShuffleIndices.empty() &&
@@ -4859,10 +4936,12 @@ void BoUpSLP::buildExternalUses(
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
+ if (!isa<Instruction>(Scalar))
+ continue;
int FoundLane = Entry->findLaneForValue(Scalar);
// Check if the scalar is externally used as an extra arg.
- auto ExtI = ExternallyUsedValues.find(Scalar);
+ const auto *ExtI = ExternallyUsedValues.find(Scalar);
if (ExtI != ExternallyUsedValues.end()) {
LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
<< Lane << " from " << *Scalar << ".\n");
@@ -4886,7 +4965,8 @@ void BoUpSLP::buildExternalUses(
// be used.
if (UseScalar != U ||
UseEntry->State == TreeEntry::ScatterVectorize ||
- !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
+ UseEntry->State == TreeEntry::PossibleStridedVectorize ||
+ !doesInTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");
@@ -4906,9 +4986,9 @@ void BoUpSLP::buildExternalUses(
}
}
-DenseMap<Value *, SmallVector<StoreInst *, 4>>
+DenseMap<Value *, SmallVector<StoreInst *>>
BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const {
- DenseMap<Value *, SmallVector<StoreInst *, 4>> PtrToStoresMap;
+ DenseMap<Value *, SmallVector<StoreInst *>> PtrToStoresMap;
for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size())) {
Value *V = TE->Scalars[Lane];
// To save compilation time we don't visit if we have too many users.
@@ -4947,14 +5027,14 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const {
return PtrToStoresMap;
}
-bool BoUpSLP::canFormVector(const SmallVector<StoreInst *, 4> &StoresVec,
+bool BoUpSLP::canFormVector(ArrayRef<StoreInst *> StoresVec,
OrdersType &ReorderIndices) const {
// We check whether the stores in StoreVec can form a vector by sorting them
// and checking whether they are consecutive.
// To avoid calling getPointersDiff() while sorting we create a vector of
// pairs {store, offset from first} and sort this instead.
- SmallVector<std::pair<StoreInst *, int>, 4> StoreOffsetVec(StoresVec.size());
+ SmallVector<std::pair<StoreInst *, int>> StoreOffsetVec(StoresVec.size());
StoreInst *S0 = StoresVec[0];
StoreOffsetVec[0] = {S0, 0};
Type *S0Ty = S0->getValueOperand()->getType();
@@ -5023,7 +5103,7 @@ SmallVector<BoUpSLP::OrdersType, 1>
BoUpSLP::findExternalStoreUsersReorderIndices(TreeEntry *TE) const {
unsigned NumLanes = TE->Scalars.size();
- DenseMap<Value *, SmallVector<StoreInst *, 4>> PtrToStoresMap =
+ DenseMap<Value *, SmallVector<StoreInst *>> PtrToStoresMap =
collectUserStores(TE);
// Holds the reorder indices for each candidate store vector that is a user of
@@ -5244,6 +5324,8 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
return TreeEntry::Vectorize;
case LoadsState::ScatterVectorize:
return TreeEntry::ScatterVectorize;
+ case LoadsState::PossibleStridedVectorize:
+ return TreeEntry::PossibleStridedVectorize;
case LoadsState::Gather:
#ifndef NDEBUG
Type *ScalarTy = VL0->getType();
@@ -5416,7 +5498,8 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
VFShape Shape = VFShape::get(
- *CI, ElementCount::getFixed(static_cast<unsigned int>(VL.size())),
+ CI->getFunctionType(),
+ ElementCount::getFixed(static_cast<unsigned int>(VL.size())),
false /*HasGlobalPred*/);
Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
@@ -5488,9 +5571,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
SmallVector<int> ReuseShuffleIndicies;
SmallVector<Value *> UniqueValues;
- auto &&TryToFindDuplicates = [&VL, &ReuseShuffleIndicies, &UniqueValues,
- &UserTreeIdx,
- this](const InstructionsState &S) {
+ SmallVector<Value *> NonUniqueValueVL;
+ auto TryToFindDuplicates = [&](const InstructionsState &S,
+ bool DoNotFail = false) {
// Check that every instruction appears once in this bundle.
DenseMap<Value *, unsigned> UniquePositions(VL.size());
for (Value *V : VL) {
@@ -5517,6 +5600,24 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
!isConstant(V);
})) ||
!llvm::has_single_bit<uint32_t>(NumUniqueScalarValues)) {
+ if (DoNotFail && UniquePositions.size() > 1 &&
+ NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
+ all_of(UniqueValues, [=](Value *V) {
+ return isa<ExtractElementInst>(V) ||
+ areAllUsersVectorized(cast<Instruction>(V),
+ UserIgnoreList);
+ })) {
+ unsigned PWSz = PowerOf2Ceil(UniqueValues.size());
+ if (PWSz == VL.size()) {
+ ReuseShuffleIndicies.clear();
+ } else {
+ NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
+ NonUniqueValueVL.append(PWSz - UniqueValues.size(),
+ UniqueValues.back());
+ VL = NonUniqueValueVL;
+ }
+ return true;
+ }
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return false;
@@ -5528,6 +5629,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
InstructionsState S = getSameOpcode(VL, *TLI);
+ // Don't vectorize ephemeral values.
+ if (!EphValues.empty()) {
+ for (Value *V : VL) {
+ if (EphValues.count(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
+ << ") is ephemeral.\n");
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
+ return;
+ }
+ }
+ }
+
// Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
// a load), in which case peek through to include it in the tree, without
// ballooning over-budget.
@@ -5633,7 +5746,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
BasicBlock *BB = nullptr;
bool IsScatterVectorizeUserTE =
UserTreeIdx.UserTE &&
- UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
+ (UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize ||
+ UserTreeIdx.UserTE->State == TreeEntry::PossibleStridedVectorize);
bool AreAllSameInsts =
(S.getOpcode() && allSameBlock(VL)) ||
(S.OpValue->getType()->isPointerTy() && IsScatterVectorizeUserTE &&
@@ -5665,39 +5779,44 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// We now know that this is a vector of instructions of the same type from
// the same block.
- // Don't vectorize ephemeral values.
- if (!EphValues.empty()) {
- for (Value *V : VL) {
- if (EphValues.count(V)) {
- LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
- << ") is ephemeral.\n");
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
- return;
- }
- }
- }
-
// Check if this is a duplicate of another entry.
if (TreeEntry *E = getTreeEntry(S.OpValue)) {
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
if (!E->isSame(VL)) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- if (TryToFindDuplicates(S))
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
+ auto It = MultiNodeScalars.find(S.OpValue);
+ if (It != MultiNodeScalars.end()) {
+ auto *TEIt = find_if(It->getSecond(),
+ [&](TreeEntry *ME) { return ME->isSame(VL); });
+ if (TEIt != It->getSecond().end())
+ E = *TEIt;
+ else
+ E = nullptr;
+ } else {
+ E = nullptr;
+ }
+ }
+ if (!E) {
+ if (!doesNotNeedToBeScheduled(S.OpValue)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ return;
+ }
+ } else {
+ // Record the reuse of the tree node. FIXME, currently this is only used
+ // to properly draw the graph rather than for the actual vectorization.
+ E->UserTreeIndices.push_back(UserTreeIdx);
+ LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
+ << ".\n");
return;
}
- // Record the reuse of the tree node. FIXME, currently this is only used to
- // properly draw the graph rather than for the actual vectorization.
- E->UserTreeIndices.push_back(UserTreeIdx);
- LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
- << ".\n");
- return;
}
// Check that none of the instructions in the bundle are already in the tree.
for (Value *V : VL) {
- if (!IsScatterVectorizeUserTE && !isa<Instruction>(V))
+ if ((!IsScatterVectorizeUserTE && !isa<Instruction>(V)) ||
+ doesNotNeedToBeScheduled(V))
continue;
if (getTreeEntry(V)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
@@ -5725,7 +5844,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Special processing for sorted pointers for ScatterVectorize node with
// constant indeces only.
if (AreAllSameInsts && UserTreeIdx.UserTE &&
- UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize &&
+ (UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize ||
+ UserTreeIdx.UserTE->State == TreeEntry::PossibleStridedVectorize) &&
!(S.getOpcode() && allSameBlock(VL))) {
assert(S.OpValue->getType()->isPointerTy() &&
count_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); }) >=
@@ -5760,7 +5880,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Check that every instruction appears once in this bundle.
- if (!TryToFindDuplicates(S))
+ if (!TryToFindDuplicates(S, /*DoNotFail=*/true))
return;
// Perform specific checks for each particular instruction kind.
@@ -5780,7 +5900,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
BlockScheduling &BS = *BSRef;
- std::optional<ScheduleData *> Bundle = BS.tryScheduleBundle(VL, this, S);
+ std::optional<ScheduleData *> Bundle =
+ BS.tryScheduleBundle(UniqueValues, this, S);
#ifdef EXPENSIVE_CHECKS
// Make sure we didn't break any internal invariants
BS.verify();
@@ -5905,6 +6026,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.
TreeEntry *TE = nullptr;
+ fixupOrderingIndices(CurrentOrder);
switch (State) {
case TreeEntry::Vectorize:
if (CurrentOrder.empty()) {
@@ -5913,7 +6035,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
} else {
- fixupOrderingIndices(CurrentOrder);
// Need to reorder.
TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies, CurrentOrder);
@@ -5921,6 +6042,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
TE->setOperandsInOrder();
break;
+ case TreeEntry::PossibleStridedVectorize:
+ // Vectorizing non-consecutive loads with `llvm.masked.gather`.
+ if (CurrentOrder.empty()) {
+ TE = newTreeEntry(VL, TreeEntry::PossibleStridedVectorize, Bundle, S,
+ UserTreeIdx, ReuseShuffleIndicies);
+ } else {
+ TE = newTreeEntry(VL, TreeEntry::PossibleStridedVectorize, Bundle, S,
+ UserTreeIdx, ReuseShuffleIndicies, CurrentOrder);
+ }
+ TE->setOperandsInOrder();
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
+ break;
case TreeEntry::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
@@ -5951,13 +6085,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
TE->setOperandsInOrder();
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands())) {
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL)
- Operands.push_back(cast<Instruction>(V)->getOperand(i));
+ Operands.push_back(cast<Instruction>(V)->getOperand(I));
- buildTree_rec(Operands, Depth + 1, {TE, i});
+ buildTree_rec(Operands, Depth + 1, {TE, I});
}
return;
}
@@ -6031,13 +6165,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
TE->setOperandsInOrder();
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands())) {
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL)
- Operands.push_back(cast<Instruction>(V)->getOperand(i));
+ Operands.push_back(cast<Instruction>(V)->getOperand(I));
- buildTree_rec(Operands, Depth + 1, {TE, i});
+ buildTree_rec(Operands, Depth + 1, {TE, I});
}
return;
}
@@ -6087,8 +6221,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (!CI)
Operands.back().push_back(Op);
else
- Operands.back().push_back(ConstantExpr::getIntegerCast(
- CI, Ty, CI->getValue().isSignBitSet()));
+ Operands.back().push_back(ConstantFoldIntegerCast(
+ CI, Ty, CI->getValue().isSignBitSet(), *DL));
}
TE->setOperand(IndexIdx, Operands.back());
@@ -6132,18 +6266,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
TE->setOperandsInOrder();
- for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
- // For scalar operands no need to to create an entry since no need to
+ for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
+ // For scalar operands no need to create an entry since no need to
// vectorize it.
- if (isVectorIntrinsicWithScalarOpAtArg(ID, i))
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
continue;
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL) {
auto *CI2 = cast<CallInst>(V);
- Operands.push_back(CI2->getArgOperand(i));
+ Operands.push_back(CI2->getArgOperand(I));
}
- buildTree_rec(Operands, Depth + 1, {TE, i});
+ buildTree_rec(Operands, Depth + 1, {TE, I});
}
return;
}
@@ -6194,13 +6328,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
TE->setOperandsInOrder();
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands())) {
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL)
- Operands.push_back(cast<Instruction>(V)->getOperand(i));
+ Operands.push_back(cast<Instruction>(V)->getOperand(I));
- buildTree_rec(Operands, Depth + 1, {TE, i});
+ buildTree_rec(Operands, Depth + 1, {TE, I});
}
return;
}
@@ -6210,7 +6344,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
llvm_unreachable("Unexpected vectorization of the instructions.");
}
-unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
+unsigned BoUpSLP::canMapToVector(Type *T) const {
unsigned N = 1;
Type *EltTy = T;
@@ -6234,15 +6368,16 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
if (!isValidElementType(EltTy))
return 0;
- uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
+ uint64_t VTSize = DL->getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize ||
- VTSize != DL.getTypeStoreSizeInBits(T))
+ VTSize != DL->getTypeStoreSizeInBits(T))
return 0;
return N;
}
bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
- SmallVectorImpl<unsigned> &CurrentOrder) const {
+ SmallVectorImpl<unsigned> &CurrentOrder,
+ bool ResizeAllowed) const {
const auto *It = find_if(VL, [](Value *V) {
return isa<ExtractElementInst, ExtractValueInst>(V);
});
@@ -6263,8 +6398,7 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
// We have to extract from a vector/aggregate with the same number of elements.
unsigned NElts;
if (E0->getOpcode() == Instruction::ExtractValue) {
- const DataLayout &DL = E0->getModule()->getDataLayout();
- NElts = canMapToVector(Vec->getType(), DL);
+ NElts = canMapToVector(Vec->getType());
if (!NElts)
return false;
// Check if load can be rewritten as load of vector.
@@ -6275,46 +6409,55 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
NElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
}
- if (NElts != VL.size())
- return false;
-
- // Check that all of the indices extract from the correct offset.
- bool ShouldKeepOrder = true;
unsigned E = VL.size();
- // Assign to all items the initial value E + 1 so we can check if the extract
- // instruction index was used already.
- // Also, later we can check that all the indices are used and we have a
- // consecutive access in the extract instructions, by checking that no
- // element of CurrentOrder still has value E + 1.
- CurrentOrder.assign(E, E);
- unsigned I = 0;
- for (; I < E; ++I) {
- auto *Inst = dyn_cast<Instruction>(VL[I]);
+ if (!ResizeAllowed && NElts != E)
+ return false;
+ SmallVector<int> Indices(E, PoisonMaskElem);
+ unsigned MinIdx = NElts, MaxIdx = 0;
+ for (auto [I, V] : enumerate(VL)) {
+ auto *Inst = dyn_cast<Instruction>(V);
if (!Inst)
continue;
if (Inst->getOperand(0) != Vec)
- break;
+ return false;
if (auto *EE = dyn_cast<ExtractElementInst>(Inst))
if (isa<UndefValue>(EE->getIndexOperand()))
continue;
std::optional<unsigned> Idx = getExtractIndex(Inst);
if (!Idx)
- break;
+ return false;
const unsigned ExtIdx = *Idx;
- if (ExtIdx != I) {
- if (ExtIdx >= E || CurrentOrder[ExtIdx] != E)
- break;
- ShouldKeepOrder = false;
- CurrentOrder[ExtIdx] = I;
- } else {
- if (CurrentOrder[I] != E)
- break;
- CurrentOrder[I] = I;
- }
+ if (ExtIdx >= NElts)
+ continue;
+ Indices[I] = ExtIdx;
+ if (MinIdx > ExtIdx)
+ MinIdx = ExtIdx;
+ if (MaxIdx < ExtIdx)
+ MaxIdx = ExtIdx;
}
- if (I < E) {
- CurrentOrder.clear();
+ if (MaxIdx - MinIdx + 1 > E)
return false;
+ if (MaxIdx + 1 <= E)
+ MinIdx = 0;
+
+ // Check that all of the indices extract from the correct offset.
+ bool ShouldKeepOrder = true;
+ // Assign to all items the initial value E + 1 so we can check if the extract
+ // instruction index was used already.
+ // Also, later we can check that all the indices are used and we have a
+ // consecutive access in the extract instructions, by checking that no
+ // element of CurrentOrder still has value E + 1.
+ CurrentOrder.assign(E, E);
+ for (unsigned I = 0; I < E; ++I) {
+ if (Indices[I] == PoisonMaskElem)
+ continue;
+ const unsigned ExtIdx = Indices[I] - MinIdx;
+ if (CurrentOrder[ExtIdx] != E) {
+ CurrentOrder.clear();
+ return false;
+ }
+ ShouldKeepOrder &= ExtIdx == I;
+ CurrentOrder[ExtIdx] = I;
}
if (ShouldKeepOrder)
CurrentOrder.clear();
@@ -6322,9 +6465,9 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
return ShouldKeepOrder;
}
-bool BoUpSLP::areAllUsersVectorized(Instruction *I,
- ArrayRef<Value *> VectorizedVals) const {
- return (I->hasOneUse() && is_contained(VectorizedVals, I)) ||
+bool BoUpSLP::areAllUsersVectorized(
+ Instruction *I, const SmallDenseSet<Value *> *VectorizedVals) const {
+ return (I->hasOneUse() && (!VectorizedVals || VectorizedVals->contains(I))) ||
all_of(I->users(), [this](User *U) {
return ScalarToTreeEntry.count(U) > 0 ||
isVectorLikeInstWithConstOps(U) ||
@@ -6351,8 +6494,8 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
auto IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
- auto Shape = VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
- VecTy->getNumElements())),
+ auto Shape = VFShape::get(CI->getFunctionType(),
+ ElementCount::getFixed(VecTy->getNumElements()),
false /*HasGlobalPred*/);
Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
auto LibCost = IntrinsicCost;
@@ -6365,16 +6508,11 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
return {IntrinsicCost, LibCost};
}
-/// Build shuffle mask for shuffle graph entries and lists of main and alternate
-/// operations operands.
-static void
-buildShuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
- ArrayRef<int> ReusesIndices,
- const function_ref<bool(Instruction *)> IsAltOp,
- SmallVectorImpl<int> &Mask,
- SmallVectorImpl<Value *> *OpScalars = nullptr,
- SmallVectorImpl<Value *> *AltScalars = nullptr) {
- unsigned Sz = VL.size();
+void BoUpSLP::TreeEntry::buildAltOpShuffleMask(
+ const function_ref<bool(Instruction *)> IsAltOp, SmallVectorImpl<int> &Mask,
+ SmallVectorImpl<Value *> *OpScalars,
+ SmallVectorImpl<Value *> *AltScalars) const {
+ unsigned Sz = Scalars.size();
Mask.assign(Sz, PoisonMaskElem);
SmallVector<int> OrderMask;
if (!ReorderIndices.empty())
@@ -6383,7 +6521,7 @@ buildShuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
unsigned Idx = I;
if (!ReorderIndices.empty())
Idx = OrderMask[I];
- auto *OpInst = cast<Instruction>(VL[Idx]);
+ auto *OpInst = cast<Instruction>(Scalars[Idx]);
if (IsAltOp(OpInst)) {
Mask[I] = Sz + Idx;
if (AltScalars)
@@ -6394,9 +6532,9 @@ buildShuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
OpScalars->push_back(OpInst);
}
}
- if (!ReusesIndices.empty()) {
- SmallVector<int> NewMask(ReusesIndices.size(), PoisonMaskElem);
- transform(ReusesIndices, NewMask.begin(), [&Mask](int Idx) {
+ if (!ReuseShuffleIndices.empty()) {
+ SmallVector<int> NewMask(ReuseShuffleIndices.size(), PoisonMaskElem);
+ transform(ReuseShuffleIndices, NewMask.begin(), [&Mask](int Idx) {
return Idx != PoisonMaskElem ? Mask[Idx] : PoisonMaskElem;
});
Mask.swap(NewMask);
@@ -6429,52 +6567,27 @@ static bool isAlternateInstruction(const Instruction *I,
return I->getOpcode() == AltOp->getOpcode();
}
-TTI::OperandValueInfo BoUpSLP::getOperandInfo(ArrayRef<Value *> VL,
- unsigned OpIdx) {
- assert(!VL.empty());
- const auto *I0 = cast<Instruction>(*find_if(VL, Instruction::classof));
- const auto *Op0 = I0->getOperand(OpIdx);
+TTI::OperandValueInfo BoUpSLP::getOperandInfo(ArrayRef<Value *> Ops) {
+ assert(!Ops.empty());
+ const auto *Op0 = Ops.front();
- const bool IsConstant = all_of(VL, [&](Value *V) {
+ const bool IsConstant = all_of(Ops, [](Value *V) {
// TODO: We should allow undef elements here
- const auto *I = dyn_cast<Instruction>(V);
- if (!I)
- return true;
- auto *Op = I->getOperand(OpIdx);
- return isConstant(Op) && !isa<UndefValue>(Op);
+ return isConstant(V) && !isa<UndefValue>(V);
});
- const bool IsUniform = all_of(VL, [&](Value *V) {
+ const bool IsUniform = all_of(Ops, [=](Value *V) {
// TODO: We should allow undef elements here
- const auto *I = dyn_cast<Instruction>(V);
- if (!I)
- return false;
- return I->getOperand(OpIdx) == Op0;
+ return V == Op0;
});
- const bool IsPowerOfTwo = all_of(VL, [&](Value *V) {
+ const bool IsPowerOfTwo = all_of(Ops, [](Value *V) {
// TODO: We should allow undef elements here
- const auto *I = dyn_cast<Instruction>(V);
- if (!I) {
- assert((isa<UndefValue>(V) ||
- I0->getOpcode() == Instruction::GetElementPtr) &&
- "Expected undef or GEP.");
- return true;
- }
- auto *Op = I->getOperand(OpIdx);
- if (auto *CI = dyn_cast<ConstantInt>(Op))
+ if (auto *CI = dyn_cast<ConstantInt>(V))
return CI->getValue().isPowerOf2();
return false;
});
- const bool IsNegatedPowerOfTwo = all_of(VL, [&](Value *V) {
+ const bool IsNegatedPowerOfTwo = all_of(Ops, [](Value *V) {
// TODO: We should allow undef elements here
- const auto *I = dyn_cast<Instruction>(V);
- if (!I) {
- assert((isa<UndefValue>(V) ||
- I0->getOpcode() == Instruction::GetElementPtr) &&
- "Expected undef or GEP.");
- return true;
- }
- const auto *Op = I->getOperand(OpIdx);
- if (auto *CI = dyn_cast<ConstantInt>(Op))
+ if (auto *CI = dyn_cast<ConstantInt>(V))
return CI->getValue().isNegatedPowerOf2();
return false;
});
@@ -6505,9 +6618,24 @@ protected:
bool IsStrict) {
int Limit = Mask.size();
int VF = VecTy->getNumElements();
- return (VF == Limit || !IsStrict) &&
- all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) &&
- ShuffleVectorInst::isIdentityMask(Mask);
+ int Index = -1;
+ if (VF == Limit && ShuffleVectorInst::isIdentityMask(Mask, Limit))
+ return true;
+ if (!IsStrict) {
+ // Consider extract subvector starting from index 0.
+ if (ShuffleVectorInst::isExtractSubvectorMask(Mask, VF, Index) &&
+ Index == 0)
+ return true;
+ // All VF-size submasks are identity (e.g.
+ // <poison,poison,poison,poison,0,1,2,poison,poison,1,2,3> etc. for VF 4).
+ if (Limit % VF == 0 && all_of(seq<int>(0, Limit / VF), [=](int Idx) {
+ ArrayRef<int> Slice = Mask.slice(Idx * VF, VF);
+ return all_of(Slice, [](int I) { return I == PoisonMaskElem; }) ||
+ ShuffleVectorInst::isIdentityMask(Slice, VF);
+ }))
+ return true;
+ }
+ return false;
}
/// Tries to combine 2 different masks into single one.
@@ -6577,7 +6705,8 @@ protected:
if (isIdentityMask(Mask, SVTy, /*IsStrict=*/false)) {
if (!IdentityOp || !SinglePermute ||
(isIdentityMask(Mask, SVTy, /*IsStrict=*/true) &&
- !ShuffleVectorInst::isZeroEltSplatMask(IdentityMask))) {
+ !ShuffleVectorInst::isZeroEltSplatMask(IdentityMask,
+ IdentityMask.size()))) {
IdentityOp = SV;
// Store current mask in the IdentityMask so later we did not lost
// this info if IdentityOp is selected as the best candidate for the
@@ -6647,7 +6776,7 @@ protected:
}
if (auto *OpTy = dyn_cast<FixedVectorType>(Op->getType());
!OpTy || !isIdentityMask(Mask, OpTy, SinglePermute) ||
- ShuffleVectorInst::isZeroEltSplatMask(Mask)) {
+ ShuffleVectorInst::isZeroEltSplatMask(Mask, Mask.size())) {
if (IdentityOp) {
V = IdentityOp;
assert(Mask.size() == IdentityMask.size() &&
@@ -6663,7 +6792,7 @@ protected:
/*IsStrict=*/true) ||
(Shuffle && Mask.size() == Shuffle->getShuffleMask().size() &&
Shuffle->isZeroEltSplat() &&
- ShuffleVectorInst::isZeroEltSplatMask(Mask)));
+ ShuffleVectorInst::isZeroEltSplatMask(Mask, Mask.size())));
}
V = Op;
return false;
@@ -6768,11 +6897,9 @@ protected:
CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF);
}
}
- const int Limit = CombinedMask1.size() * 2;
- if (Op1 == Op2 && Limit == 2 * VF &&
- all_of(CombinedMask1, [=](int Idx) { return Idx < Limit; }) &&
- (ShuffleVectorInst::isIdentityMask(CombinedMask1) ||
- (ShuffleVectorInst::isZeroEltSplatMask(CombinedMask1) &&
+ if (Op1 == Op2 &&
+ (ShuffleVectorInst::isIdentityMask(CombinedMask1, VF) ||
+ (ShuffleVectorInst::isZeroEltSplatMask(CombinedMask1, VF) &&
isa<ShuffleVectorInst>(Op1) &&
cast<ShuffleVectorInst>(Op1)->getShuffleMask() ==
ArrayRef(CombinedMask1))))
@@ -6807,10 +6934,29 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
SmallVector<PointerUnion<Value *, const TreeEntry *>, 2> InVectors;
const TargetTransformInfo &TTI;
InstructionCost Cost = 0;
- ArrayRef<Value *> VectorizedVals;
+ SmallDenseSet<Value *> VectorizedVals;
BoUpSLP &R;
SmallPtrSetImpl<Value *> &CheckedExtracts;
constexpr static TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ /// While set, still trying to estimate the cost for the same nodes and we
+ /// can delay actual cost estimation (virtual shuffle instruction emission).
+ /// May help better estimate the cost if same nodes must be permuted + allows
+ /// to move most of the long shuffles cost estimation to TTI.
+ bool SameNodesEstimated = true;
+
+ static Constant *getAllOnesValue(const DataLayout &DL, Type *Ty) {
+ if (Ty->getScalarType()->isPointerTy()) {
+ Constant *Res = ConstantExpr::getIntToPtr(
+ ConstantInt::getAllOnesValue(
+ IntegerType::get(Ty->getContext(),
+ DL.getTypeStoreSizeInBits(Ty->getScalarType()))),
+ Ty->getScalarType());
+ if (auto *VTy = dyn_cast<VectorType>(Ty))
+ Res = ConstantVector::getSplat(VTy->getElementCount(), Res);
+ return Res;
+ }
+ return Constant::getAllOnesValue(Ty);
+ }
InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
if ((!Root && allConstant(VL)) || all_of(VL, UndefValue::classof))
@@ -6821,20 +6967,35 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
// Improve gather cost for gather of loads, if we can group some of the
// loads into vector loads.
InstructionsState S = getSameOpcode(VL, *R.TLI);
- if (VL.size() > 2 && S.getOpcode() == Instruction::Load &&
- !S.isAltShuffle() &&
+ const unsigned Sz = R.DL->getTypeSizeInBits(VL.front()->getType());
+ unsigned MinVF = R.getMinVF(2 * Sz);
+ if (VL.size() > 2 &&
+ ((S.getOpcode() == Instruction::Load && !S.isAltShuffle()) ||
+ (InVectors.empty() &&
+ any_of(seq<unsigned>(0, VL.size() / MinVF),
+ [&](unsigned Idx) {
+ ArrayRef<Value *> SubVL = VL.slice(Idx * MinVF, MinVF);
+ InstructionsState S = getSameOpcode(SubVL, *R.TLI);
+ return S.getOpcode() == Instruction::Load &&
+ !S.isAltShuffle();
+ }))) &&
!all_of(Gathers, [&](Value *V) { return R.getTreeEntry(V); }) &&
!isSplat(Gathers)) {
- BoUpSLP::ValueSet VectorizedLoads;
+ SetVector<Value *> VectorizedLoads;
+ SmallVector<LoadInst *> VectorizedStarts;
+ SmallVector<std::pair<unsigned, unsigned>> ScatterVectorized;
unsigned StartIdx = 0;
unsigned VF = VL.size() / 2;
- unsigned VectorizedCnt = 0;
- unsigned ScatterVectorizeCnt = 0;
- const unsigned Sz = R.DL->getTypeSizeInBits(S.MainOp->getType());
- for (unsigned MinVF = R.getMinVF(2 * Sz); VF >= MinVF; VF /= 2) {
+ for (; VF >= MinVF; VF /= 2) {
for (unsigned Cnt = StartIdx, End = VL.size(); Cnt + VF <= End;
Cnt += VF) {
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
+ if (S.getOpcode() != Instruction::Load || S.isAltShuffle()) {
+ InstructionsState SliceS = getSameOpcode(Slice, *R.TLI);
+ if (SliceS.getOpcode() != Instruction::Load ||
+ SliceS.isAltShuffle())
+ continue;
+ }
if (!VectorizedLoads.count(Slice.front()) &&
!VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
SmallVector<Value *> PointerOps;
@@ -6845,12 +7006,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
switch (LS) {
case LoadsState::Vectorize:
case LoadsState::ScatterVectorize:
+ case LoadsState::PossibleStridedVectorize:
// Mark the vectorized loads so that we don't vectorize them
// again.
- if (LS == LoadsState::Vectorize)
- ++VectorizedCnt;
+ // TODO: better handling of loads with reorders.
+ if (LS == LoadsState::Vectorize && CurrentOrder.empty())
+ VectorizedStarts.push_back(cast<LoadInst>(Slice.front()));
else
- ++ScatterVectorizeCnt;
+ ScatterVectorized.emplace_back(Cnt, VF);
VectorizedLoads.insert(Slice.begin(), Slice.end());
// If we vectorized initial block, no need to try to vectorize
// it again.
@@ -6881,8 +7044,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
// Exclude potentially vectorized loads from list of gathered
// scalars.
- auto *LI = cast<LoadInst>(S.MainOp);
- Gathers.assign(Gathers.size(), PoisonValue::get(LI->getType()));
+ Gathers.assign(Gathers.size(), PoisonValue::get(VL.front()->getType()));
// The cost for vectorized loads.
InstructionCost ScalarsCost = 0;
for (Value *V : VectorizedLoads) {
@@ -6892,17 +7054,24 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
LI->getAlign(), LI->getPointerAddressSpace(),
CostKind, TTI::OperandValueInfo(), LI);
}
- auto *LoadTy = FixedVectorType::get(LI->getType(), VF);
- Align Alignment = LI->getAlign();
- GatherCost +=
- VectorizedCnt *
- TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
- LI->getPointerAddressSpace(), CostKind,
- TTI::OperandValueInfo(), LI);
- GatherCost += ScatterVectorizeCnt *
- TTI.getGatherScatterOpCost(
- Instruction::Load, LoadTy, LI->getPointerOperand(),
- /*VariableMask=*/false, Alignment, CostKind, LI);
+ auto *LoadTy = FixedVectorType::get(VL.front()->getType(), VF);
+ for (LoadInst *LI : VectorizedStarts) {
+ Align Alignment = LI->getAlign();
+ GatherCost +=
+ TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
+ LI->getPointerAddressSpace(), CostKind,
+ TTI::OperandValueInfo(), LI);
+ }
+ for (std::pair<unsigned, unsigned> P : ScatterVectorized) {
+ auto *LI0 = cast<LoadInst>(VL[P.first]);
+ Align CommonAlignment = LI0->getAlign();
+ for (Value *V : VL.slice(P.first + 1, VF - 1))
+ CommonAlignment =
+ std::min(CommonAlignment, cast<LoadInst>(V)->getAlign());
+ GatherCost += TTI.getGatherScatterOpCost(
+ Instruction::Load, LoadTy, LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment, CostKind, LI0);
+ }
if (NeedInsertSubvectorAnalysis) {
// Add the cost for the subvectors insert.
for (int I = VF, E = VL.size(); I < E; I += VF)
@@ -6938,77 +7107,137 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
: R.getGatherCost(Gathers, !Root && VL.equals(Gathers)));
};
- /// Compute the cost of creating a vector of type \p VecTy containing the
- /// extracted values from \p VL.
- InstructionCost computeExtractCost(ArrayRef<Value *> VL, ArrayRef<int> Mask,
- TTI::ShuffleKind ShuffleKind) {
- auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
- unsigned NumOfParts = TTI.getNumberOfParts(VecTy);
-
- if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc ||
- !NumOfParts || VecTy->getNumElements() < NumOfParts)
- return TTI.getShuffleCost(ShuffleKind, VecTy, Mask);
-
- bool AllConsecutive = true;
- unsigned EltsPerVector = VecTy->getNumElements() / NumOfParts;
- unsigned Idx = -1;
+ /// Compute the cost of creating a vector containing the extracted values from
+ /// \p VL.
+ InstructionCost
+ computeExtractCost(ArrayRef<Value *> VL, ArrayRef<int> Mask,
+ ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
+ unsigned NumParts) {
+ assert(VL.size() > NumParts && "Unexpected scalarized shuffle.");
+ unsigned NumElts =
+ std::accumulate(VL.begin(), VL.end(), 0, [](unsigned Sz, Value *V) {
+ auto *EE = dyn_cast<ExtractElementInst>(V);
+ if (!EE)
+ return Sz;
+ auto *VecTy = cast<FixedVectorType>(EE->getVectorOperandType());
+ return std::max(Sz, VecTy->getNumElements());
+ });
+ unsigned NumSrcRegs = TTI.getNumberOfParts(
+ FixedVectorType::get(VL.front()->getType(), NumElts));
+ if (NumSrcRegs == 0)
+ NumSrcRegs = 1;
+ // FIXME: this must be moved to TTI for better estimation.
+ unsigned EltsPerVector = PowerOf2Ceil(std::max(
+ divideCeil(VL.size(), NumParts), divideCeil(NumElts, NumSrcRegs)));
+ auto CheckPerRegistersShuffle =
+ [&](MutableArrayRef<int> Mask) -> std::optional<TTI::ShuffleKind> {
+ DenseSet<int> RegIndices;
+ // Check that if trying to permute same single/2 input vectors.
+ TTI::ShuffleKind ShuffleKind = TTI::SK_PermuteSingleSrc;
+ int FirstRegId = -1;
+ for (int &I : Mask) {
+ if (I == PoisonMaskElem)
+ continue;
+ int RegId = (I / NumElts) * NumParts + (I % NumElts) / EltsPerVector;
+ if (FirstRegId < 0)
+ FirstRegId = RegId;
+ RegIndices.insert(RegId);
+ if (RegIndices.size() > 2)
+ return std::nullopt;
+ if (RegIndices.size() == 2)
+ ShuffleKind = TTI::SK_PermuteTwoSrc;
+ I = (I % NumElts) % EltsPerVector +
+ (RegId == FirstRegId ? 0 : EltsPerVector);
+ }
+ return ShuffleKind;
+ };
InstructionCost Cost = 0;
// Process extracts in blocks of EltsPerVector to check if the source vector
// operand can be re-used directly. If not, add the cost of creating a
// shuffle to extract the values into a vector register.
- SmallVector<int> RegMask(EltsPerVector, PoisonMaskElem);
- for (auto *V : VL) {
- ++Idx;
-
- // Reached the start of a new vector registers.
- if (Idx % EltsPerVector == 0) {
- RegMask.assign(EltsPerVector, PoisonMaskElem);
- AllConsecutive = true;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ if (!ShuffleKinds[Part])
continue;
- }
-
- // Need to exclude undefs from analysis.
- if (isa<UndefValue>(V) || Mask[Idx] == PoisonMaskElem)
+ ArrayRef<int> MaskSlice =
+ Mask.slice(Part * EltsPerVector,
+ (Part == NumParts - 1 && Mask.size() % EltsPerVector != 0)
+ ? Mask.size() % EltsPerVector
+ : EltsPerVector);
+ SmallVector<int> SubMask(EltsPerVector, PoisonMaskElem);
+ copy(MaskSlice, SubMask.begin());
+ std::optional<TTI::ShuffleKind> RegShuffleKind =
+ CheckPerRegistersShuffle(SubMask);
+ if (!RegShuffleKind) {
+ Cost += TTI.getShuffleCost(
+ *ShuffleKinds[Part],
+ FixedVectorType::get(VL.front()->getType(), NumElts), MaskSlice);
continue;
-
- // Check all extracts for a vector register on the target directly
- // extract values in order.
- unsigned CurrentIdx = *getExtractIndex(cast<Instruction>(V));
- if (!isa<UndefValue>(VL[Idx - 1]) && Mask[Idx - 1] != PoisonMaskElem) {
- unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
- AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
- CurrentIdx % EltsPerVector == Idx % EltsPerVector;
- RegMask[Idx % EltsPerVector] = CurrentIdx % EltsPerVector;
}
-
- if (AllConsecutive)
- continue;
-
- // Skip all indices, except for the last index per vector block.
- if ((Idx + 1) % EltsPerVector != 0 && Idx + 1 != VL.size())
- continue;
-
- // If we have a series of extracts which are not consecutive and hence
- // cannot re-use the source vector register directly, compute the shuffle
- // cost to extract the vector with EltsPerVector elements.
- Cost += TTI.getShuffleCost(
- TargetTransformInfo::SK_PermuteSingleSrc,
- FixedVectorType::get(VecTy->getElementType(), EltsPerVector),
- RegMask);
+ if (*RegShuffleKind != TTI::SK_PermuteSingleSrc ||
+ !ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
+ Cost += TTI.getShuffleCost(
+ *RegShuffleKind,
+ FixedVectorType::get(VL.front()->getType(), EltsPerVector),
+ SubMask);
+ }
}
return Cost;
}
+ /// Transforms mask \p CommonMask per given \p Mask to make proper set after
+ /// shuffle emission.
+ static void transformMaskAfterShuffle(MutableArrayRef<int> CommonMask,
+ ArrayRef<int> Mask) {
+ for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
+ if (Mask[Idx] != PoisonMaskElem)
+ CommonMask[Idx] = Idx;
+ }
+ /// Adds the cost of reshuffling \p E1 and \p E2 (if present), using given
+ /// mask \p Mask, register number \p Part, that includes \p SliceSize
+ /// elements.
+ void estimateNodesPermuteCost(const TreeEntry &E1, const TreeEntry *E2,
+ ArrayRef<int> Mask, unsigned Part,
+ unsigned SliceSize) {
+ if (SameNodesEstimated) {
+ // Delay the cost estimation if the same nodes are reshuffling.
+ // If we already requested the cost of reshuffling of E1 and E2 before, no
+ // need to estimate another cost with the sub-Mask, instead include this
+ // sub-Mask into the CommonMask to estimate it later and avoid double cost
+ // estimation.
+ if ((InVectors.size() == 2 &&
+ InVectors.front().get<const TreeEntry *>() == &E1 &&
+ InVectors.back().get<const TreeEntry *>() == E2) ||
+ (!E2 && InVectors.front().get<const TreeEntry *>() == &E1)) {
+ assert(all_of(ArrayRef(CommonMask).slice(Part * SliceSize, SliceSize),
+ [](int Idx) { return Idx == PoisonMaskElem; }) &&
+ "Expected all poisoned elements.");
+ ArrayRef<int> SubMask =
+ ArrayRef(Mask).slice(Part * SliceSize, SliceSize);
+ copy(SubMask, std::next(CommonMask.begin(), SliceSize * Part));
+ return;
+ }
+ // Found non-matching nodes - need to estimate the cost for the matched
+ // and transform mask.
+ Cost += createShuffle(InVectors.front(),
+ InVectors.size() == 1 ? nullptr : InVectors.back(),
+ CommonMask);
+ transformMaskAfterShuffle(CommonMask, CommonMask);
+ }
+ SameNodesEstimated = false;
+ Cost += createShuffle(&E1, E2, Mask);
+ transformMaskAfterShuffle(CommonMask, Mask);
+ }
class ShuffleCostBuilder {
const TargetTransformInfo &TTI;
static bool isEmptyOrIdentity(ArrayRef<int> Mask, unsigned VF) {
- int Limit = 2 * VF;
+ int Index = -1;
return Mask.empty() ||
(VF == Mask.size() &&
- all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) &&
- ShuffleVectorInst::isIdentityMask(Mask));
+ ShuffleVectorInst::isIdentityMask(Mask, VF)) ||
+ (ShuffleVectorInst::isExtractSubvectorMask(Mask, VF, Index) &&
+ Index == 0);
}
public:
@@ -7021,21 +7250,17 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
if (isEmptyOrIdentity(Mask, VF))
return TTI::TCC_Free;
- return TTI.getShuffleCost(
- TTI::SK_PermuteTwoSrc,
- FixedVectorType::get(
- cast<VectorType>(V1->getType())->getElementType(), Mask.size()),
- Mask);
+ return TTI.getShuffleCost(TTI::SK_PermuteTwoSrc,
+ cast<VectorType>(V1->getType()), Mask);
}
InstructionCost createShuffleVector(Value *V1, ArrayRef<int> Mask) const {
// Empty mask or identity mask are free.
- if (isEmptyOrIdentity(Mask, Mask.size()))
+ unsigned VF =
+ cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
+ if (isEmptyOrIdentity(Mask, VF))
return TTI::TCC_Free;
- return TTI.getShuffleCost(
- TTI::SK_PermuteSingleSrc,
- FixedVectorType::get(
- cast<VectorType>(V1->getType())->getElementType(), Mask.size()),
- Mask);
+ return TTI.getShuffleCost(TTI::SK_PermuteSingleSrc,
+ cast<VectorType>(V1->getType()), Mask);
}
InstructionCost createIdentity(Value *) const { return TTI::TCC_Free; }
InstructionCost createPoison(Type *Ty, unsigned VF) const {
@@ -7052,139 +7277,226 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
const PointerUnion<Value *, const TreeEntry *> &P2,
ArrayRef<int> Mask) {
ShuffleCostBuilder Builder(TTI);
+ SmallVector<int> CommonMask(Mask.begin(), Mask.end());
Value *V1 = P1.dyn_cast<Value *>(), *V2 = P2.dyn_cast<Value *>();
- unsigned CommonVF = 0;
- if (!V1) {
+ unsigned CommonVF = Mask.size();
+ if (!V1 && !V2 && !P2.isNull()) {
+ // Shuffle 2 entry nodes.
const TreeEntry *E = P1.get<const TreeEntry *>();
unsigned VF = E->getVectorFactor();
- if (V2) {
- unsigned V2VF = cast<FixedVectorType>(V2->getType())->getNumElements();
- if (V2VF != VF && V2VF == E->Scalars.size())
- VF = E->Scalars.size();
- } else if (!P2.isNull()) {
- const TreeEntry *E2 = P2.get<const TreeEntry *>();
- if (E->Scalars.size() == E2->Scalars.size())
- CommonVF = VF = E->Scalars.size();
- } else {
- // P2 is empty, check that we have same node + reshuffle (if any).
- if (E->Scalars.size() == Mask.size() && VF != Mask.size()) {
- VF = E->Scalars.size();
- SmallVector<int> CommonMask(Mask.begin(), Mask.end());
- ::addMask(CommonMask, E->getCommonMask());
- V1 = Constant::getNullValue(
- FixedVectorType::get(E->Scalars.front()->getType(), VF));
- return BaseShuffleAnalysis::createShuffle<InstructionCost>(
- V1, nullptr, CommonMask, Builder);
+ const TreeEntry *E2 = P2.get<const TreeEntry *>();
+ CommonVF = std::max(VF, E2->getVectorFactor());
+ assert(all_of(Mask,
+ [=](int Idx) {
+ return Idx < 2 * static_cast<int>(CommonVF);
+ }) &&
+ "All elements in mask must be less than 2 * CommonVF.");
+ if (E->Scalars.size() == E2->Scalars.size()) {
+ SmallVector<int> EMask = E->getCommonMask();
+ SmallVector<int> E2Mask = E2->getCommonMask();
+ if (!EMask.empty() || !E2Mask.empty()) {
+ for (int &Idx : CommonMask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ if (Idx < static_cast<int>(CommonVF) && !EMask.empty())
+ Idx = EMask[Idx];
+ else if (Idx >= static_cast<int>(CommonVF))
+ Idx = (E2Mask.empty() ? Idx - CommonVF : E2Mask[Idx - CommonVF]) +
+ E->Scalars.size();
+ }
}
+ CommonVF = E->Scalars.size();
}
V1 = Constant::getNullValue(
- FixedVectorType::get(E->Scalars.front()->getType(), VF));
- }
- if (!V2 && !P2.isNull()) {
- const TreeEntry *E = P2.get<const TreeEntry *>();
+ FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
+ V2 = getAllOnesValue(
+ *R.DL, FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
+ } else if (!V1 && P2.isNull()) {
+ // Shuffle single entry node.
+ const TreeEntry *E = P1.get<const TreeEntry *>();
unsigned VF = E->getVectorFactor();
- unsigned V1VF = cast<FixedVectorType>(V1->getType())->getNumElements();
- if (!CommonVF && V1VF == E->Scalars.size())
+ CommonVF = VF;
+ assert(
+ all_of(Mask,
+ [=](int Idx) { return Idx < static_cast<int>(CommonVF); }) &&
+ "All elements in mask must be less than CommonVF.");
+ if (E->Scalars.size() == Mask.size() && VF != Mask.size()) {
+ SmallVector<int> EMask = E->getCommonMask();
+ assert(!EMask.empty() && "Expected non-empty common mask.");
+ for (int &Idx : CommonMask) {
+ if (Idx != PoisonMaskElem)
+ Idx = EMask[Idx];
+ }
CommonVF = E->Scalars.size();
- if (CommonVF)
- VF = CommonVF;
- V2 = Constant::getNullValue(
- FixedVectorType::get(E->Scalars.front()->getType(), VF));
- }
- return BaseShuffleAnalysis::createShuffle<InstructionCost>(V1, V2, Mask,
- Builder);
+ }
+ V1 = Constant::getNullValue(
+ FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
+ } else if (V1 && P2.isNull()) {
+ // Shuffle single vector.
+ CommonVF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ assert(
+ all_of(Mask,
+ [=](int Idx) { return Idx < static_cast<int>(CommonVF); }) &&
+ "All elements in mask must be less than CommonVF.");
+ } else if (V1 && !V2) {
+ // Shuffle vector and tree node.
+ unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ const TreeEntry *E2 = P2.get<const TreeEntry *>();
+ CommonVF = std::max(VF, E2->getVectorFactor());
+ assert(all_of(Mask,
+ [=](int Idx) {
+ return Idx < 2 * static_cast<int>(CommonVF);
+ }) &&
+ "All elements in mask must be less than 2 * CommonVF.");
+ if (E2->Scalars.size() == VF && VF != CommonVF) {
+ SmallVector<int> E2Mask = E2->getCommonMask();
+ assert(!E2Mask.empty() && "Expected non-empty common mask.");
+ for (int &Idx : CommonMask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ if (Idx >= static_cast<int>(CommonVF))
+ Idx = E2Mask[Idx - CommonVF] + VF;
+ }
+ CommonVF = VF;
+ }
+ V1 = Constant::getNullValue(
+ FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
+ V2 = getAllOnesValue(
+ *R.DL,
+ FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
+ } else if (!V1 && V2) {
+ // Shuffle vector and tree node.
+ unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
+ const TreeEntry *E1 = P1.get<const TreeEntry *>();
+ CommonVF = std::max(VF, E1->getVectorFactor());
+ assert(all_of(Mask,
+ [=](int Idx) {
+ return Idx < 2 * static_cast<int>(CommonVF);
+ }) &&
+ "All elements in mask must be less than 2 * CommonVF.");
+ if (E1->Scalars.size() == VF && VF != CommonVF) {
+ SmallVector<int> E1Mask = E1->getCommonMask();
+ assert(!E1Mask.empty() && "Expected non-empty common mask.");
+ for (int &Idx : CommonMask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ if (Idx >= static_cast<int>(CommonVF))
+ Idx = E1Mask[Idx - CommonVF] + VF;
+ }
+ CommonVF = VF;
+ }
+ V1 = Constant::getNullValue(
+ FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
+ V2 = getAllOnesValue(
+ *R.DL,
+ FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
+ } else {
+ assert(V1 && V2 && "Expected both vectors.");
+ unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ CommonVF =
+ std::max(VF, cast<FixedVectorType>(V2->getType())->getNumElements());
+ assert(all_of(Mask,
+ [=](int Idx) {
+ return Idx < 2 * static_cast<int>(CommonVF);
+ }) &&
+ "All elements in mask must be less than 2 * CommonVF.");
+ if (V1->getType() != V2->getType()) {
+ V1 = Constant::getNullValue(FixedVectorType::get(
+ cast<FixedVectorType>(V1->getType())->getElementType(), CommonVF));
+ V2 = getAllOnesValue(
+ *R.DL, FixedVectorType::get(
+ cast<FixedVectorType>(V1->getType())->getElementType(),
+ CommonVF));
+ }
+ }
+ InVectors.front() = Constant::getNullValue(FixedVectorType::get(
+ cast<FixedVectorType>(V1->getType())->getElementType(),
+ CommonMask.size()));
+ if (InVectors.size() == 2)
+ InVectors.pop_back();
+ return BaseShuffleAnalysis::createShuffle<InstructionCost>(
+ V1, V2, CommonMask, Builder);
}
public:
ShuffleCostEstimator(TargetTransformInfo &TTI,
ArrayRef<Value *> VectorizedVals, BoUpSLP &R,
SmallPtrSetImpl<Value *> &CheckedExtracts)
- : TTI(TTI), VectorizedVals(VectorizedVals), R(R),
- CheckedExtracts(CheckedExtracts) {}
- Value *adjustExtracts(const TreeEntry *E, ArrayRef<int> Mask,
- TTI::ShuffleKind ShuffleKind) {
+ : TTI(TTI), VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()),
+ R(R), CheckedExtracts(CheckedExtracts) {}
+ Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
+ ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
+ unsigned NumParts, bool &UseVecBaseAsInput) {
+ UseVecBaseAsInput = false;
if (Mask.empty())
return nullptr;
Value *VecBase = nullptr;
ArrayRef<Value *> VL = E->Scalars;
- auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
// If the resulting type is scalarized, do not adjust the cost.
- unsigned VecNumParts = TTI.getNumberOfParts(VecTy);
- if (VecNumParts == VecTy->getNumElements())
+ if (NumParts == VL.size())
return nullptr;
- DenseMap<Value *, int> ExtractVectorsTys;
- for (auto [I, V] : enumerate(VL)) {
- // Ignore non-extractelement scalars.
- if (isa<UndefValue>(V) || (!Mask.empty() && Mask[I] == PoisonMaskElem))
- continue;
- // If all users of instruction are going to be vectorized and this
- // instruction itself is not going to be vectorized, consider this
- // instruction as dead and remove its cost from the final cost of the
- // vectorized tree.
- // Also, avoid adjusting the cost for extractelements with multiple uses
- // in different graph entries.
- const TreeEntry *VE = R.getTreeEntry(V);
- if (!CheckedExtracts.insert(V).second ||
- !R.areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
- (VE && VE != E))
- continue;
- auto *EE = cast<ExtractElementInst>(V);
- VecBase = EE->getVectorOperand();
- std::optional<unsigned> EEIdx = getExtractIndex(EE);
- if (!EEIdx)
- continue;
- unsigned Idx = *EEIdx;
- if (VecNumParts != TTI.getNumberOfParts(EE->getVectorOperandType())) {
- auto It =
- ExtractVectorsTys.try_emplace(EE->getVectorOperand(), Idx).first;
- It->getSecond() = std::min<int>(It->second, Idx);
- }
- // Take credit for instruction that will become dead.
- if (EE->hasOneUse()) {
- Instruction *Ext = EE->user_back();
- if (isa<SExtInst, ZExtInst>(Ext) && all_of(Ext->users(), [](User *U) {
- return isa<GetElementPtrInst>(U);
- })) {
- // Use getExtractWithExtendCost() to calculate the cost of
- // extractelement/ext pair.
- Cost -= TTI.getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
- EE->getVectorOperandType(), Idx);
- // Add back the cost of s|zext which is subtracted separately.
- Cost += TTI.getCastInstrCost(
- Ext->getOpcode(), Ext->getType(), EE->getType(),
- TTI::getCastContextHint(Ext), CostKind, Ext);
+ // Check if it can be considered reused if same extractelements were
+ // vectorized already.
+ bool PrevNodeFound = any_of(
+ ArrayRef(R.VectorizableTree).take_front(E->Idx),
+ [&](const std::unique_ptr<TreeEntry> &TE) {
+ return ((!TE->isAltShuffle() &&
+ TE->getOpcode() == Instruction::ExtractElement) ||
+ TE->State == TreeEntry::NeedToGather) &&
+ all_of(enumerate(TE->Scalars), [&](auto &&Data) {
+ return VL.size() > Data.index() &&
+ (Mask[Data.index()] == PoisonMaskElem ||
+ isa<UndefValue>(VL[Data.index()]) ||
+ Data.value() == VL[Data.index()]);
+ });
+ });
+ SmallPtrSet<Value *, 4> UniqueBases;
+ unsigned SliceSize = VL.size() / NumParts;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ ArrayRef<int> SubMask = Mask.slice(Part * SliceSize, SliceSize);
+ for (auto [I, V] : enumerate(VL.slice(Part * SliceSize, SliceSize))) {
+ // Ignore non-extractelement scalars.
+ if (isa<UndefValue>(V) ||
+ (!SubMask.empty() && SubMask[I] == PoisonMaskElem))
continue;
- }
- }
- Cost -= TTI.getVectorInstrCost(*EE, EE->getVectorOperandType(), CostKind,
- Idx);
- }
- // Add a cost for subvector extracts/inserts if required.
- for (const auto &Data : ExtractVectorsTys) {
- auto *EEVTy = cast<FixedVectorType>(Data.first->getType());
- unsigned NumElts = VecTy->getNumElements();
- if (Data.second % NumElts == 0)
- continue;
- if (TTI.getNumberOfParts(EEVTy) > VecNumParts) {
- unsigned Idx = (Data.second / NumElts) * NumElts;
- unsigned EENumElts = EEVTy->getNumElements();
- if (Idx % NumElts == 0)
+ // If all users of instruction are going to be vectorized and this
+ // instruction itself is not going to be vectorized, consider this
+ // instruction as dead and remove its cost from the final cost of the
+ // vectorized tree.
+ // Also, avoid adjusting the cost for extractelements with multiple uses
+ // in different graph entries.
+ auto *EE = cast<ExtractElementInst>(V);
+ VecBase = EE->getVectorOperand();
+ UniqueBases.insert(VecBase);
+ const TreeEntry *VE = R.getTreeEntry(V);
+ if (!CheckedExtracts.insert(V).second ||
+ !R.areAllUsersVectorized(cast<Instruction>(V), &VectorizedVals) ||
+ (VE && VE != E))
continue;
- if (Idx + NumElts <= EENumElts) {
- Cost += TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- EEVTy, std::nullopt, CostKind, Idx, VecTy);
- } else {
- // Need to round up the subvector type vectorization factor to avoid a
- // crash in cost model functions. Make SubVT so that Idx + VF of SubVT
- // <= EENumElts.
- auto *SubVT =
- FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx);
- Cost += TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- EEVTy, std::nullopt, CostKind, Idx, SubVT);
+ std::optional<unsigned> EEIdx = getExtractIndex(EE);
+ if (!EEIdx)
+ continue;
+ unsigned Idx = *EEIdx;
+ // Take credit for instruction that will become dead.
+ if (EE->hasOneUse() || !PrevNodeFound) {
+ Instruction *Ext = EE->user_back();
+ if (isa<SExtInst, ZExtInst>(Ext) && all_of(Ext->users(), [](User *U) {
+ return isa<GetElementPtrInst>(U);
+ })) {
+ // Use getExtractWithExtendCost() to calculate the cost of
+ // extractelement/ext pair.
+ Cost -=
+ TTI.getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
+ EE->getVectorOperandType(), Idx);
+ // Add back the cost of s|zext which is subtracted separately.
+ Cost += TTI.getCastInstrCost(
+ Ext->getOpcode(), Ext->getType(), EE->getType(),
+ TTI::getCastContextHint(Ext), CostKind, Ext);
+ continue;
+ }
}
- } else {
- Cost += TTI.getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
- VecTy, std::nullopt, CostKind, 0, EEVTy);
+ Cost -= TTI.getVectorInstrCost(*EE, EE->getVectorOperandType(),
+ CostKind, Idx);
}
}
// Check that gather of extractelements can be represented as just a
@@ -7192,31 +7504,152 @@ public:
// Found the bunch of extractelement instructions that must be gathered
// into a vector and can be represented as a permutation elements in a
// single input vector or of 2 input vectors.
- Cost += computeExtractCost(VL, Mask, ShuffleKind);
+ // Done for reused if same extractelements were vectorized already.
+ if (!PrevNodeFound)
+ Cost += computeExtractCost(VL, Mask, ShuffleKinds, NumParts);
+ InVectors.assign(1, E);
+ CommonMask.assign(Mask.begin(), Mask.end());
+ transformMaskAfterShuffle(CommonMask, CommonMask);
+ SameNodesEstimated = false;
+ if (NumParts != 1 && UniqueBases.size() != 1) {
+ UseVecBaseAsInput = true;
+ VecBase = Constant::getNullValue(
+ FixedVectorType::get(VL.front()->getType(), CommonMask.size()));
+ }
return VecBase;
}
- void add(const TreeEntry *E1, const TreeEntry *E2, ArrayRef<int> Mask) {
- CommonMask.assign(Mask.begin(), Mask.end());
- InVectors.assign({E1, E2});
+ /// Checks if the specified entry \p E needs to be delayed because of its
+ /// dependency nodes.
+ std::optional<InstructionCost>
+ needToDelay(const TreeEntry *,
+ ArrayRef<SmallVector<const TreeEntry *>>) const {
+ // No need to delay the cost estimation during analysis.
+ return std::nullopt;
}
- void add(const TreeEntry *E1, ArrayRef<int> Mask) {
- CommonMask.assign(Mask.begin(), Mask.end());
- InVectors.assign(1, E1);
+ void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
+ if (&E1 == &E2) {
+ assert(all_of(Mask,
+ [&](int Idx) {
+ return Idx < static_cast<int>(E1.getVectorFactor());
+ }) &&
+ "Expected single vector shuffle mask.");
+ add(E1, Mask);
+ return;
+ }
+ if (InVectors.empty()) {
+ CommonMask.assign(Mask.begin(), Mask.end());
+ InVectors.assign({&E1, &E2});
+ return;
+ }
+ assert(!CommonMask.empty() && "Expected non-empty common mask.");
+ auto *MaskVecTy =
+ FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
+ unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
+ if (NumParts == 0 || NumParts >= Mask.size())
+ NumParts = 1;
+ unsigned SliceSize = Mask.size() / NumParts;
+ const auto *It =
+ find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; });
+ unsigned Part = std::distance(Mask.begin(), It) / SliceSize;
+ estimateNodesPermuteCost(E1, &E2, Mask, Part, SliceSize);
+ }
+ void add(const TreeEntry &E1, ArrayRef<int> Mask) {
+ if (InVectors.empty()) {
+ CommonMask.assign(Mask.begin(), Mask.end());
+ InVectors.assign(1, &E1);
+ return;
+ }
+ assert(!CommonMask.empty() && "Expected non-empty common mask.");
+ auto *MaskVecTy =
+ FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
+ unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
+ if (NumParts == 0 || NumParts >= Mask.size())
+ NumParts = 1;
+ unsigned SliceSize = Mask.size() / NumParts;
+ const auto *It =
+ find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; });
+ unsigned Part = std::distance(Mask.begin(), It) / SliceSize;
+ estimateNodesPermuteCost(E1, nullptr, Mask, Part, SliceSize);
+ if (!SameNodesEstimated && InVectors.size() == 1)
+ InVectors.emplace_back(&E1);
+ }
+ /// Adds 2 input vectors and the mask for their shuffling.
+ void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
+ // May come only for shuffling of 2 vectors with extractelements, already
+ // handled in adjustExtracts.
+ assert(InVectors.size() == 1 &&
+ all_of(enumerate(CommonMask),
+ [&](auto P) {
+ if (P.value() == PoisonMaskElem)
+ return Mask[P.index()] == PoisonMaskElem;
+ auto *EI =
+ cast<ExtractElementInst>(InVectors.front()
+ .get<const TreeEntry *>()
+ ->Scalars[P.index()]);
+ return EI->getVectorOperand() == V1 ||
+ EI->getVectorOperand() == V2;
+ }) &&
+ "Expected extractelement vectors.");
}
/// Adds another one input vector and the mask for the shuffling.
- void add(Value *V1, ArrayRef<int> Mask) {
- assert(CommonMask.empty() && InVectors.empty() &&
- "Expected empty input mask/vectors.");
- CommonMask.assign(Mask.begin(), Mask.end());
- InVectors.assign(1, V1);
+ void add(Value *V1, ArrayRef<int> Mask, bool ForExtracts = false) {
+ if (InVectors.empty()) {
+ assert(CommonMask.empty() && !ForExtracts &&
+ "Expected empty input mask/vectors.");
+ CommonMask.assign(Mask.begin(), Mask.end());
+ InVectors.assign(1, V1);
+ return;
+ }
+ if (ForExtracts) {
+ // No need to add vectors here, already handled them in adjustExtracts.
+ assert(InVectors.size() == 1 &&
+ InVectors.front().is<const TreeEntry *>() && !CommonMask.empty() &&
+ all_of(enumerate(CommonMask),
+ [&](auto P) {
+ Value *Scalar = InVectors.front()
+ .get<const TreeEntry *>()
+ ->Scalars[P.index()];
+ if (P.value() == PoisonMaskElem)
+ return P.value() == Mask[P.index()] ||
+ isa<UndefValue>(Scalar);
+ if (isa<Constant>(V1))
+ return true;
+ auto *EI = cast<ExtractElementInst>(Scalar);
+ return EI->getVectorOperand() == V1;
+ }) &&
+ "Expected only tree entry for extractelement vectors.");
+ return;
+ }
+ assert(!InVectors.empty() && !CommonMask.empty() &&
+ "Expected only tree entries from extracts/reused buildvectors.");
+ unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ if (InVectors.size() == 2) {
+ Cost += createShuffle(InVectors.front(), InVectors.back(), CommonMask);
+ transformMaskAfterShuffle(CommonMask, CommonMask);
+ VF = std::max<unsigned>(VF, CommonMask.size());
+ } else if (const auto *InTE =
+ InVectors.front().dyn_cast<const TreeEntry *>()) {
+ VF = std::max(VF, InTE->getVectorFactor());
+ } else {
+ VF = std::max(
+ VF, cast<FixedVectorType>(InVectors.front().get<Value *>()->getType())
+ ->getNumElements());
+ }
+ InVectors.push_back(V1);
+ for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
+ if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
+ CommonMask[Idx] = Mask[Idx] + VF;
}
- Value *gather(ArrayRef<Value *> VL, Value *Root = nullptr) {
+ Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
+ Value *Root = nullptr) {
Cost += getBuildVectorCost(VL, Root);
if (!Root) {
- assert(InVectors.empty() && "Unexpected input vectors for buildvector.");
// FIXME: Need to find a way to avoid use of getNullValue here.
SmallVector<Constant *> Vals;
- for (Value *V : VL) {
+ unsigned VF = VL.size();
+ if (MaskVF != 0)
+ VF = std::min(VF, MaskVF);
+ for (Value *V : VL.take_front(VF)) {
if (isa<UndefValue>(V)) {
Vals.push_back(cast<Constant>(V));
continue;
@@ -7226,9 +7659,11 @@ public:
return ConstantVector::get(Vals);
}
return ConstantVector::getSplat(
- ElementCount::getFixed(VL.size()),
- Constant::getNullValue(VL.front()->getType()));
+ ElementCount::getFixed(
+ cast<FixedVectorType>(Root->getType())->getNumElements()),
+ getAllOnesValue(*R.DL, VL.front()->getType()));
}
+ InstructionCost createFreeze(InstructionCost Cost) { return Cost; }
/// Finalize emission of the shuffles.
InstructionCost
finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
@@ -7236,31 +7671,24 @@ public:
IsFinalized = true;
if (Action) {
const PointerUnion<Value *, const TreeEntry *> &Vec = InVectors.front();
- if (InVectors.size() == 2) {
+ if (InVectors.size() == 2)
Cost += createShuffle(Vec, InVectors.back(), CommonMask);
- InVectors.pop_back();
- } else {
+ else
Cost += createShuffle(Vec, nullptr, CommonMask);
- }
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
if (CommonMask[Idx] != PoisonMaskElem)
CommonMask[Idx] = Idx;
assert(VF > 0 &&
"Expected vector length for the final value before action.");
- Value *V = Vec.dyn_cast<Value *>();
- if (!Vec.isNull() && !V)
- V = Constant::getNullValue(FixedVectorType::get(
- Vec.get<const TreeEntry *>()->Scalars.front()->getType(),
- CommonMask.size()));
+ Value *V = Vec.get<Value *>();
Action(V, CommonMask);
+ InVectors.front() = V;
}
::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true);
- if (CommonMask.empty())
- return Cost;
- int Limit = CommonMask.size() * 2;
- if (all_of(CommonMask, [=](int Idx) { return Idx < Limit; }) &&
- ShuffleVectorInst::isIdentityMask(CommonMask))
+ if (CommonMask.empty()) {
+ assert(InVectors.size() == 1 && "Expected only one vector with no mask");
return Cost;
+ }
return Cost +
createShuffle(InVectors.front(),
InVectors.size() == 2 ? InVectors.back() : nullptr,
@@ -7273,28 +7701,63 @@ public:
}
};
+const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E,
+ unsigned Idx) const {
+ Value *Op = E->getOperand(Idx).front();
+ if (const TreeEntry *TE = getTreeEntry(Op)) {
+ if (find_if(E->UserTreeIndices, [&](const EdgeInfo &EI) {
+ return EI.EdgeIdx == Idx && EI.UserTE == E;
+ }) != TE->UserTreeIndices.end())
+ return TE;
+ auto MIt = MultiNodeScalars.find(Op);
+ if (MIt != MultiNodeScalars.end()) {
+ for (const TreeEntry *TE : MIt->second) {
+ if (find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) {
+ return EI.EdgeIdx == Idx && EI.UserTE == E;
+ }) != TE->UserTreeIndices.end())
+ return TE;
+ }
+ }
+ }
+ const auto *It =
+ find_if(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->State == TreeEntry::NeedToGather &&
+ find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) {
+ return EI.EdgeIdx == Idx && EI.UserTE == E;
+ }) != TE->UserTreeIndices.end();
+ });
+ assert(It != VectorizableTree.end() && "Expected vectorizable entry.");
+ return It->get();
+}
+
InstructionCost
BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
SmallPtrSetImpl<Value *> &CheckedExtracts) {
ArrayRef<Value *> VL = E->Scalars;
Type *ScalarTy = VL[0]->getType();
- if (auto *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
- else if (auto *CI = dyn_cast<CmpInst>(VL[0]))
- ScalarTy = CI->getOperand(0)->getType();
- else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
- ScalarTy = IE->getOperand(1)->getType();
+ if (E->State != TreeEntry::NeedToGather) {
+ if (auto *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ else if (auto *CI = dyn_cast<CmpInst>(VL[0]))
+ ScalarTy = CI->getOperand(0)->getType();
+ else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
+ ScalarTy = IE->getOperand(1)->getType();
+ }
+ if (!FixedVectorType::isValidElementType(ScalarTy))
+ return InstructionCost::getInvalid();
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// If we have computed a smaller type for the expression, update VecTy so
// that the costs will be accurate.
- if (MinBWs.count(VL[0]))
- VecTy = FixedVectorType::get(
- IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
+ auto It = MinBWs.find(E);
+ if (It != MinBWs.end()) {
+ ScalarTy = IntegerType::get(F->getContext(), It->second.first);
+ VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ }
unsigned EntryVF = E->getVectorFactor();
- auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF);
+ auto *FinalVecTy = FixedVectorType::get(ScalarTy, EntryVF);
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
if (E->State == TreeEntry::NeedToGather) {
@@ -7302,121 +7765,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return 0;
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
- ShuffleCostEstimator Estimator(*TTI, VectorizedVals, *this,
- CheckedExtracts);
- unsigned VF = E->getVectorFactor();
- SmallVector<int> ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(),
- E->ReuseShuffleIndices.end());
- SmallVector<Value *> GatheredScalars(E->Scalars.begin(), E->Scalars.end());
- // Build a mask out of the reorder indices and reorder scalars per this
- // mask.
- SmallVector<int> ReorderMask;
- inversePermutation(E->ReorderIndices, ReorderMask);
- if (!ReorderMask.empty())
- reorderScalars(GatheredScalars, ReorderMask);
- SmallVector<int> Mask;
- SmallVector<int> ExtractMask;
- std::optional<TargetTransformInfo::ShuffleKind> ExtractShuffle;
- std::optional<TargetTransformInfo::ShuffleKind> GatherShuffle;
- SmallVector<const TreeEntry *> Entries;
- Type *ScalarTy = GatheredScalars.front()->getType();
- // Check for gathered extracts.
- ExtractShuffle = tryToGatherExtractElements(GatheredScalars, ExtractMask);
- SmallVector<Value *> IgnoredVals;
- if (UserIgnoreList)
- IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end());
-
- bool Resized = false;
- if (Value *VecBase = Estimator.adjustExtracts(
- E, ExtractMask, ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc)))
- if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
- if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) {
- Resized = true;
- GatheredScalars.append(VF - GatheredScalars.size(),
- PoisonValue::get(ScalarTy));
- }
-
- // Do not try to look for reshuffled loads for gathered loads (they will be
- // handled later), for vectorized scalars, and cases, which are definitely
- // not profitable (splats and small gather nodes.)
- if (ExtractShuffle || E->getOpcode() != Instruction::Load ||
- E->isAltShuffle() ||
- all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) ||
- isSplat(E->Scalars) ||
- (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2))
- GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries);
- if (GatherShuffle) {
- assert((Entries.size() == 1 || Entries.size() == 2) &&
- "Expected shuffle of 1 or 2 entries.");
- if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&
- Entries.front()->isSame(E->Scalars)) {
- // Perfect match in the graph, will reuse the previously vectorized
- // node. Cost is 0.
- LLVM_DEBUG(
- dbgs()
- << "SLP: perfect diamond match for gather bundle that starts with "
- << *VL.front() << ".\n");
- // Restore the mask for previous partially matched values.
- for (auto [I, V] : enumerate(E->Scalars)) {
- if (isa<PoisonValue>(V)) {
- Mask[I] = PoisonMaskElem;
- continue;
- }
- if (Mask[I] == PoisonMaskElem)
- Mask[I] = Entries.front()->findLaneForValue(V);
- }
- Estimator.add(Entries.front(), Mask);
- return Estimator.finalize(E->ReuseShuffleIndices);
- }
- if (!Resized) {
- unsigned VF1 = Entries.front()->getVectorFactor();
- unsigned VF2 = Entries.back()->getVectorFactor();
- if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF)
- GatheredScalars.append(VF - GatheredScalars.size(),
- PoisonValue::get(ScalarTy));
- }
- // Remove shuffled elements from list of gathers.
- for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
- if (Mask[I] != PoisonMaskElem)
- GatheredScalars[I] = PoisonValue::get(ScalarTy);
- }
- LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
- << " entries for bundle that starts with "
- << *VL.front() << ".\n";);
- if (Entries.size() == 1)
- Estimator.add(Entries.front(), Mask);
- else
- Estimator.add(Entries.front(), Entries.back(), Mask);
- if (all_of(GatheredScalars, PoisonValue ::classof))
- return Estimator.finalize(E->ReuseShuffleIndices);
- return Estimator.finalize(
- E->ReuseShuffleIndices, E->Scalars.size(),
- [&](Value *&Vec, SmallVectorImpl<int> &Mask) {
- Vec = Estimator.gather(GatheredScalars,
- Constant::getNullValue(FixedVectorType::get(
- GatheredScalars.front()->getType(),
- GatheredScalars.size())));
- });
- }
- if (!all_of(GatheredScalars, PoisonValue::classof)) {
- auto Gathers = ArrayRef(GatheredScalars).take_front(VL.size());
- bool SameGathers = VL.equals(Gathers);
- Value *BV = Estimator.gather(
- Gathers, SameGathers ? nullptr
- : Constant::getNullValue(FixedVectorType::get(
- GatheredScalars.front()->getType(),
- GatheredScalars.size())));
- SmallVector<int> ReuseMask(Gathers.size(), PoisonMaskElem);
- std::iota(ReuseMask.begin(), ReuseMask.end(), 0);
- Estimator.add(BV, ReuseMask);
- }
- if (ExtractShuffle)
- Estimator.add(E, std::nullopt);
- return Estimator.finalize(E->ReuseShuffleIndices);
+ return processBuildVector<ShuffleCostEstimator, InstructionCost>(
+ E, *TTI, VectorizedVals, *this, CheckedExtracts);
}
InstructionCost CommonCost = 0;
SmallVector<int> Mask;
- if (!E->ReorderIndices.empty()) {
+ if (!E->ReorderIndices.empty() &&
+ E->State != TreeEntry::PossibleStridedVectorize) {
SmallVector<int> NewMask;
if (E->getOpcode() == Instruction::Store) {
// For stores the order is actually a mask.
@@ -7429,11 +7784,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
}
if (NeedToShuffleReuses)
::addMask(Mask, E->ReuseShuffleIndices);
- if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask))
+ if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
CommonCost =
TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
assert((E->State == TreeEntry::Vectorize ||
- E->State == TreeEntry::ScatterVectorize) &&
+ E->State == TreeEntry::ScatterVectorize ||
+ E->State == TreeEntry::PossibleStridedVectorize) &&
"Unhandled state");
assert(E->getOpcode() &&
((allSameType(VL) && allSameBlock(VL)) ||
@@ -7443,7 +7799,34 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Instruction *VL0 = E->getMainOp();
unsigned ShuffleOrOp =
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
- const unsigned Sz = VL.size();
+ SetVector<Value *> UniqueValues(VL.begin(), VL.end());
+ const unsigned Sz = UniqueValues.size();
+ SmallBitVector UsedScalars(Sz, false);
+ for (unsigned I = 0; I < Sz; ++I) {
+ if (getTreeEntry(UniqueValues[I]) == E)
+ continue;
+ UsedScalars.set(I);
+ }
+ auto GetCastContextHint = [&](Value *V) {
+ if (const TreeEntry *OpTE = getTreeEntry(V)) {
+ if (OpTE->State == TreeEntry::ScatterVectorize)
+ return TTI::CastContextHint::GatherScatter;
+ if (OpTE->State == TreeEntry::Vectorize &&
+ OpTE->getOpcode() == Instruction::Load && !OpTE->isAltShuffle()) {
+ if (OpTE->ReorderIndices.empty())
+ return TTI::CastContextHint::Normal;
+ SmallVector<int> Mask;
+ inversePermutation(OpTE->ReorderIndices, Mask);
+ if (ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
+ return TTI::CastContextHint::Reversed;
+ }
+ } else {
+ InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI);
+ if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle())
+ return TTI::CastContextHint::GatherScatter;
+ }
+ return TTI::CastContextHint::None;
+ };
auto GetCostDiff =
[=](function_ref<InstructionCost(unsigned)> ScalarEltCost,
function_ref<InstructionCost(InstructionCost)> VectorCost) {
@@ -7453,13 +7836,49 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// For some of the instructions no need to calculate cost for each
// particular instruction, we can use the cost of the single
// instruction x total number of scalar instructions.
- ScalarCost = Sz * ScalarEltCost(0);
+ ScalarCost = (Sz - UsedScalars.count()) * ScalarEltCost(0);
} else {
- for (unsigned I = 0; I < Sz; ++I)
+ for (unsigned I = 0; I < Sz; ++I) {
+ if (UsedScalars.test(I))
+ continue;
ScalarCost += ScalarEltCost(I);
+ }
}
InstructionCost VecCost = VectorCost(CommonCost);
+ // Check if the current node must be resized, if the parent node is not
+ // resized.
+ if (!UnaryInstruction::isCast(E->getOpcode()) && E->Idx != 0) {
+ const EdgeInfo &EI = E->UserTreeIndices.front();
+ if ((EI.UserTE->getOpcode() != Instruction::Select ||
+ EI.EdgeIdx != 0) &&
+ It != MinBWs.end()) {
+ auto UserBWIt = MinBWs.find(EI.UserTE);
+ Type *UserScalarTy =
+ EI.UserTE->getOperand(EI.EdgeIdx).front()->getType();
+ if (UserBWIt != MinBWs.end())
+ UserScalarTy = IntegerType::get(ScalarTy->getContext(),
+ UserBWIt->second.first);
+ if (ScalarTy != UserScalarTy) {
+ unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
+ unsigned SrcBWSz = DL->getTypeSizeInBits(UserScalarTy);
+ unsigned VecOpcode;
+ auto *SrcVecTy =
+ FixedVectorType::get(UserScalarTy, E->getVectorFactor());
+ if (BWSz > SrcBWSz)
+ VecOpcode = Instruction::Trunc;
+ else
+ VecOpcode =
+ It->second.second ? Instruction::SExt : Instruction::ZExt;
+ TTI::CastContextHint CCH = GetCastContextHint(VL0);
+ VecCost += TTI->getCastInstrCost(VecOpcode, VecTy, SrcVecTy, CCH,
+ CostKind);
+ ScalarCost +=
+ Sz * TTI->getCastInstrCost(VecOpcode, ScalarTy, UserScalarTy,
+ CCH, CostKind);
+ }
+ }
+ }
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost - CommonCost,
ScalarCost, "Calculated costs for Tree"));
return VecCost - ScalarCost;
@@ -7550,7 +7969,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// Count reused scalars.
InstructionCost ScalarCost = 0;
SmallPtrSet<const TreeEntry *, 4> CountedOps;
- for (Value *V : VL) {
+ for (Value *V : UniqueValues) {
auto *PHI = dyn_cast<PHINode>(V);
if (!PHI)
continue;
@@ -7571,8 +7990,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
}
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
- auto GetScalarCost = [=](unsigned Idx) {
- auto *I = cast<Instruction>(VL[Idx]);
+ auto GetScalarCost = [&](unsigned Idx) {
+ auto *I = cast<Instruction>(UniqueValues[Idx]);
VectorType *SrcVecTy;
if (ShuffleOrOp == Instruction::ExtractElement) {
auto *EE = cast<ExtractElementInst>(I);
@@ -7680,8 +8099,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// need to shift the vector.
// Do not calculate the cost if the actual size is the register size and
// we can merge this shuffle with the following SK_Select.
- auto *InsertVecTy =
- FixedVectorType::get(SrcVecTy->getElementType(), InsertVecSz);
+ auto *InsertVecTy = FixedVectorType::get(ScalarTy, InsertVecSz);
if (!IsIdentity)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
InsertVecTy, Mask);
@@ -7697,8 +8115,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask));
if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) {
if (InsertVecSz != VecSz) {
- auto *ActualVecTy =
- FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
+ auto *ActualVecTy = FixedVectorType::get(ScalarTy, VecSz);
Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy,
std::nullopt, CostKind, OffsetBeg - Offset,
InsertVecTy);
@@ -7729,22 +8146,52 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- auto GetScalarCost = [=](unsigned Idx) {
- auto *VI = cast<Instruction>(VL[Idx]);
- return TTI->getCastInstrCost(E->getOpcode(), ScalarTy,
- VI->getOperand(0)->getType(),
+ auto SrcIt = MinBWs.find(getOperandEntry(E, 0));
+ Type *SrcScalarTy = VL0->getOperand(0)->getType();
+ auto *SrcVecTy = FixedVectorType::get(SrcScalarTy, VL.size());
+ unsigned Opcode = ShuffleOrOp;
+ unsigned VecOpcode = Opcode;
+ if (!ScalarTy->isFloatingPointTy() && !SrcScalarTy->isFloatingPointTy() &&
+ (SrcIt != MinBWs.end() || It != MinBWs.end())) {
+ // Check if the values are candidates to demote.
+ unsigned SrcBWSz = DL->getTypeSizeInBits(SrcScalarTy);
+ if (SrcIt != MinBWs.end()) {
+ SrcBWSz = SrcIt->second.first;
+ SrcScalarTy = IntegerType::get(F->getContext(), SrcBWSz);
+ SrcVecTy = FixedVectorType::get(SrcScalarTy, VL.size());
+ }
+ unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
+ if (BWSz == SrcBWSz) {
+ VecOpcode = Instruction::BitCast;
+ } else if (BWSz < SrcBWSz) {
+ VecOpcode = Instruction::Trunc;
+ } else if (It != MinBWs.end()) {
+ assert(BWSz > SrcBWSz && "Invalid cast!");
+ VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
+ }
+ }
+ auto GetScalarCost = [&](unsigned Idx) -> InstructionCost {
+ // Do not count cost here if minimum bitwidth is in effect and it is just
+ // a bitcast (here it is just a noop).
+ if (VecOpcode != Opcode && VecOpcode == Instruction::BitCast)
+ return TTI::TCC_Free;
+ auto *VI = VL0->getOpcode() == Opcode
+ ? cast<Instruction>(UniqueValues[Idx])
+ : nullptr;
+ return TTI->getCastInstrCost(Opcode, VL0->getType(),
+ VL0->getOperand(0)->getType(),
TTI::getCastContextHint(VI), CostKind, VI);
};
auto GetVectorCost = [=](InstructionCost CommonCost) {
- Type *SrcTy = VL0->getOperand(0)->getType();
- auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
- InstructionCost VecCost = CommonCost;
- // Check if the values are candidates to demote.
- if (!MinBWs.count(VL0) || VecTy != SrcVecTy)
- VecCost +=
- TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy,
- TTI::getCastContextHint(VL0), CostKind, VL0);
- return VecCost;
+ // Do not count cost here if minimum bitwidth is in effect and it is just
+ // a bitcast (here it is just a noop).
+ if (VecOpcode != Opcode && VecOpcode == Instruction::BitCast)
+ return CommonCost;
+ auto *VI = VL0->getOpcode() == Opcode ? VL0 : nullptr;
+ TTI::CastContextHint CCH = GetCastContextHint(VL0->getOperand(0));
+ return CommonCost +
+ TTI->getCastInstrCost(VecOpcode, VecTy, SrcVecTy, CCH, CostKind,
+ VecOpcode == Opcode ? VI : nullptr);
};
return GetCostDiff(GetScalarCost, GetVectorCost);
}
@@ -7761,7 +8208,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
? CmpInst::BAD_FCMP_PREDICATE
: CmpInst::BAD_ICMP_PREDICATE;
auto GetScalarCost = [&](unsigned Idx) {
- auto *VI = cast<Instruction>(VL[Idx]);
+ auto *VI = cast<Instruction>(UniqueValues[Idx]);
CmpInst::Predicate CurrentPred = ScalarTy->isFloatingPointTy()
? CmpInst::BAD_FCMP_PREDICATE
: CmpInst::BAD_ICMP_PREDICATE;
@@ -7821,8 +8268,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- auto GetScalarCost = [=](unsigned Idx) {
- auto *VI = cast<Instruction>(VL[Idx]);
+ auto GetScalarCost = [&](unsigned Idx) {
+ auto *VI = cast<Instruction>(UniqueValues[Idx]);
unsigned OpIdx = isa<UnaryOperator>(VI) ? 0 : 1;
TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(VI->getOperand(0));
TTI::OperandValueInfo Op2Info =
@@ -7833,8 +8280,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
};
auto GetVectorCost = [=](InstructionCost CommonCost) {
unsigned OpIdx = isa<UnaryOperator>(VL0) ? 0 : 1;
- TTI::OperandValueInfo Op1Info = getOperandInfo(VL, 0);
- TTI::OperandValueInfo Op2Info = getOperandInfo(VL, OpIdx);
+ TTI::OperandValueInfo Op1Info = getOperandInfo(E->getOperand(0));
+ TTI::OperandValueInfo Op2Info = getOperandInfo(E->getOperand(OpIdx));
return TTI->getArithmeticInstrCost(ShuffleOrOp, VecTy, CostKind, Op1Info,
Op2Info) +
CommonCost;
@@ -7845,23 +8292,25 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return CommonCost + GetGEPCostDiff(VL, VL0);
}
case Instruction::Load: {
- auto GetScalarCost = [=](unsigned Idx) {
- auto *VI = cast<LoadInst>(VL[Idx]);
+ auto GetScalarCost = [&](unsigned Idx) {
+ auto *VI = cast<LoadInst>(UniqueValues[Idx]);
return TTI->getMemoryOpCost(Instruction::Load, ScalarTy, VI->getAlign(),
VI->getPointerAddressSpace(), CostKind,
TTI::OperandValueInfo(), VI);
};
auto *LI0 = cast<LoadInst>(VL0);
- auto GetVectorCost = [=](InstructionCost CommonCost) {
+ auto GetVectorCost = [&](InstructionCost CommonCost) {
InstructionCost VecLdCost;
if (E->State == TreeEntry::Vectorize) {
VecLdCost = TTI->getMemoryOpCost(
Instruction::Load, VecTy, LI0->getAlign(),
LI0->getPointerAddressSpace(), CostKind, TTI::OperandValueInfo());
} else {
- assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");
+ assert((E->State == TreeEntry::ScatterVectorize ||
+ E->State == TreeEntry::PossibleStridedVectorize) &&
+ "Unknown EntryState");
Align CommonAlignment = LI0->getAlign();
- for (Value *V : VL)
+ for (Value *V : UniqueValues)
CommonAlignment =
std::min(CommonAlignment, cast<LoadInst>(V)->getAlign());
VecLdCost = TTI->getGatherScatterOpCost(
@@ -7874,7 +8323,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
InstructionCost Cost = GetCostDiff(GetScalarCost, GetVectorCost);
// If this node generates masked gather load then it is not a terminal node.
// Hence address operand cost is estimated separately.
- if (E->State == TreeEntry::ScatterVectorize)
+ if (E->State == TreeEntry::ScatterVectorize ||
+ E->State == TreeEntry::PossibleStridedVectorize)
return Cost;
// Estimate cost of GEPs since this tree node is a terminator.
@@ -7887,7 +8337,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
bool IsReorder = !E->ReorderIndices.empty();
auto GetScalarCost = [=](unsigned Idx) {
auto *VI = cast<StoreInst>(VL[Idx]);
- TTI::OperandValueInfo OpInfo = getOperandInfo(VI, 0);
+ TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(VI->getValueOperand());
return TTI->getMemoryOpCost(Instruction::Store, ScalarTy, VI->getAlign(),
VI->getPointerAddressSpace(), CostKind,
OpInfo, VI);
@@ -7896,7 +8346,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
auto GetVectorCost = [=](InstructionCost CommonCost) {
// We know that we can merge the stores. Calculate the cost.
- TTI::OperandValueInfo OpInfo = getOperandInfo(VL, 0);
+ TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
return TTI->getMemoryOpCost(Instruction::Store, VecTy, BaseSI->getAlign(),
BaseSI->getPointerAddressSpace(), CostKind,
OpInfo) +
@@ -7912,8 +8362,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
GetGEPCostDiff(PointerOps, BaseSI->getPointerOperand());
}
case Instruction::Call: {
- auto GetScalarCost = [=](unsigned Idx) {
- auto *CI = cast<CallInst>(VL[Idx]);
+ auto GetScalarCost = [&](unsigned Idx) {
+ auto *CI = cast<CallInst>(UniqueValues[Idx]);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (ID != Intrinsic::not_intrinsic) {
IntrinsicCostAttributes CostAttrs(ID, *CI, 1);
@@ -7954,8 +8404,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
}
return false;
};
- auto GetScalarCost = [=](unsigned Idx) {
- auto *VI = cast<Instruction>(VL[Idx]);
+ auto GetScalarCost = [&](unsigned Idx) {
+ auto *VI = cast<Instruction>(UniqueValues[Idx]);
assert(E->isOpcodeOrAlt(VI) && "Unexpected main/alternate opcode");
(void)E;
return TTI->getInstructionCost(VI, CostKind);
@@ -7995,21 +8445,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
TTI::CastContextHint::None, CostKind);
}
- if (E->ReuseShuffleIndices.empty()) {
- VecCost +=
- TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy);
- } else {
- SmallVector<int> Mask;
- buildShuffleEntryMask(
- E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
- [E](Instruction *I) {
- assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
- return I->getOpcode() == E->getAltOpcode();
- },
- Mask);
- VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- FinalVecTy, Mask);
- }
+ SmallVector<int> Mask;
+ E->buildAltOpShuffleMask(
+ [E](Instruction *I) {
+ assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ return I->getOpcode() == E->getAltOpcode();
+ },
+ Mask);
+ VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+ FinalVecTy, Mask);
return VecCost;
};
return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -8065,7 +8509,8 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
// Gathering cost would be too much for tiny trees.
if (VectorizableTree[0]->State == TreeEntry::NeedToGather ||
(VectorizableTree[1]->State == TreeEntry::NeedToGather &&
- VectorizableTree[0]->State != TreeEntry::ScatterVectorize))
+ VectorizableTree[0]->State != TreeEntry::ScatterVectorize &&
+ VectorizableTree[0]->State != TreeEntry::PossibleStridedVectorize))
return false;
return true;
@@ -8144,6 +8589,23 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
allConstant(VectorizableTree[1]->Scalars))))
return true;
+ // If the graph includes only PHI nodes and gathers, it is defnitely not
+ // profitable for the vectorization, we can skip it, if the cost threshold is
+ // default. The cost of vectorized PHI nodes is almost always 0 + the cost of
+ // gathers/buildvectors.
+ constexpr int Limit = 4;
+ if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
+ !VectorizableTree.empty() &&
+ all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
+ return (TE->State == TreeEntry::NeedToGather &&
+ TE->getOpcode() != Instruction::ExtractElement &&
+ count_if(TE->Scalars,
+ [](Value *V) { return isa<ExtractElementInst>(V); }) <=
+ Limit) ||
+ TE->getOpcode() == Instruction::PHI;
+ }))
+ return true;
+
// We can vectorize the tree if its size is greater than or equal to the
// minimum size specified by the MinTreeSize command line option.
if (VectorizableTree.size() >= MinTreeSize)
@@ -8435,16 +8897,6 @@ static T *performExtractsShuffleAction(
}
InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
- // Build a map for gathered scalars to the nodes where they are used.
- ValueToGatherNodes.clear();
- for (const std::unique_ptr<TreeEntry> &EntryPtr : VectorizableTree) {
- if (EntryPtr->State != TreeEntry::NeedToGather)
- continue;
- for (Value *V : EntryPtr->Scalars)
- if (!isConstant(V))
- ValueToGatherNodes.try_emplace(V).first->getSecond().insert(
- EntryPtr.get());
- }
InstructionCost Cost = 0;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
<< VectorizableTree.size() << ".\n");
@@ -8460,8 +8912,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
E->isSame(TE.Scalars)) {
// Some gather nodes might be absolutely the same as some vectorizable
// nodes after reordering, need to handle it.
- LLVM_DEBUG(dbgs() << "SLP: Adding cost 0 for bundle that starts with "
- << *TE.Scalars[0] << ".\n"
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost 0 for bundle "
+ << shortBundleName(TE.Scalars) << ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
continue;
}
@@ -8469,9 +8921,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost C = getEntryCost(&TE, VectorizedVals, CheckedExtracts);
Cost += C;
- LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
- << " for bundle that starts with " << *TE.Scalars[0]
- << ".\n"
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle "
+ << shortBundleName(TE.Scalars) << ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
}
@@ -8480,6 +8931,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
SmallVector<MapVector<const TreeEntry *, SmallVector<int>>> ShuffleMasks;
SmallVector<std::pair<Value *, const TreeEntry *>> FirstUsers;
SmallVector<APInt> DemandedElts;
+ SmallDenseSet<Value *, 4> UsedInserts;
+ DenseSet<Value *> VectorCasts;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
@@ -8500,6 +8953,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// to detect it as a final shuffled/identity match.
if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User)) {
if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
+ if (!UsedInserts.insert(VU).second)
+ continue;
std::optional<unsigned> InsertIdx = getInsertIndex(VU);
if (InsertIdx) {
const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
@@ -8546,6 +9001,28 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
FirstUsers.emplace_back(VU, ScalarTE);
DemandedElts.push_back(APInt::getZero(FTy->getNumElements()));
VecId = FirstUsers.size() - 1;
+ auto It = MinBWs.find(ScalarTE);
+ if (It != MinBWs.end() && VectorCasts.insert(EU.Scalar).second) {
+ unsigned BWSz = It->second.second;
+ unsigned SrcBWSz = DL->getTypeSizeInBits(FTy->getElementType());
+ unsigned VecOpcode;
+ if (BWSz < SrcBWSz)
+ VecOpcode = Instruction::Trunc;
+ else
+ VecOpcode =
+ It->second.second ? Instruction::SExt : Instruction::ZExt;
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost C = TTI->getCastInstrCost(
+ VecOpcode, FTy,
+ FixedVectorType::get(
+ IntegerType::get(FTy->getContext(), It->second.first),
+ FTy->getNumElements()),
+ TTI::CastContextHint::None, CostKind);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for extending externally used vector with "
+ "non-equal minimum bitwidth.\n");
+ Cost += C;
+ }
} else {
if (isFirstInsertElement(VU, cast<InsertElementInst>(It->first)))
It->first = VU;
@@ -8567,11 +9044,11 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// for the extract and the added cost of the sign extend if needed.
auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
- if (MinBWs.count(ScalarRoot)) {
- auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
- auto Extend =
- MinBWs[ScalarRoot].second ? Instruction::SExt : Instruction::ZExt;
+ auto It = MinBWs.find(getTreeEntry(EU.Scalar));
+ if (It != MinBWs.end()) {
+ auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
+ unsigned Extend =
+ It->second.second ? Instruction::SExt : Instruction::ZExt;
VecTy = FixedVectorType::get(MinTy, BundleWidth);
ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
VecTy, EU.Lane);
@@ -8580,6 +9057,21 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
CostKind, EU.Lane);
}
}
+ // Add reduced value cost, if resized.
+ if (!VectorizedVals.empty()) {
+ auto BWIt = MinBWs.find(VectorizableTree.front().get());
+ if (BWIt != MinBWs.end()) {
+ Type *DstTy = VectorizableTree.front()->Scalars.front()->getType();
+ unsigned OriginalSz = DL->getTypeSizeInBits(DstTy);
+ unsigned Opcode = Instruction::Trunc;
+ if (OriginalSz < BWIt->second.first)
+ Opcode = BWIt->second.second ? Instruction::SExt : Instruction::ZExt;
+ Type *SrcTy = IntegerType::get(DstTy->getContext(), BWIt->second.first);
+ Cost += TTI->getCastInstrCost(Opcode, DstTy, SrcTy,
+ TTI::CastContextHint::None,
+ TTI::TCK_RecipThroughput);
+ }
+ }
InstructionCost SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
@@ -8590,9 +9082,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
unsigned VecVF = TE->getVectorFactor();
if (VF != VecVF &&
(any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); }) ||
- (all_of(Mask,
- [VF](int Idx) { return Idx < 2 * static_cast<int>(VF); }) &&
- !ShuffleVectorInst::isIdentityMask(Mask)))) {
+ !ShuffleVectorInst::isIdentityMask(Mask, VF))) {
SmallVector<int> OrigMask(VecVF, PoisonMaskElem);
std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
OrigMask.begin());
@@ -8611,19 +9101,23 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// Calculate the cost of the reshuffled vectors, if any.
for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
Value *Base = cast<Instruction>(FirstUsers[I].first)->getOperand(0);
- unsigned VF = ShuffleMasks[I].begin()->second.size();
- auto *FTy = FixedVectorType::get(
- cast<VectorType>(FirstUsers[I].first->getType())->getElementType(), VF);
auto Vector = ShuffleMasks[I].takeVector();
- auto &&EstimateShufflesCost = [this, FTy,
- &Cost](ArrayRef<int> Mask,
- ArrayRef<const TreeEntry *> TEs) {
+ unsigned VF = 0;
+ auto EstimateShufflesCost = [&](ArrayRef<int> Mask,
+ ArrayRef<const TreeEntry *> TEs) {
assert((TEs.size() == 1 || TEs.size() == 2) &&
"Expected exactly 1 or 2 tree entries.");
if (TEs.size() == 1) {
- int Limit = 2 * Mask.size();
- if (!all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) ||
- !ShuffleVectorInst::isIdentityMask(Mask)) {
+ if (VF == 0)
+ VF = TEs.front()->getVectorFactor();
+ auto *FTy =
+ FixedVectorType::get(TEs.back()->Scalars.front()->getType(), VF);
+ if (!ShuffleVectorInst::isIdentityMask(Mask, VF) &&
+ !all_of(enumerate(Mask), [=](const auto &Data) {
+ return Data.value() == PoisonMaskElem ||
+ (Data.index() < VF &&
+ static_cast<int>(Data.index()) == Data.value());
+ })) {
InstructionCost C =
TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
@@ -8634,6 +9128,15 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
Cost += C;
}
} else {
+ if (VF == 0) {
+ if (TEs.front() &&
+ TEs.front()->getVectorFactor() == TEs.back()->getVectorFactor())
+ VF = TEs.front()->getVectorFactor();
+ else
+ VF = Mask.size();
+ }
+ auto *FTy =
+ FixedVectorType::get(TEs.back()->Scalars.front()->getType(), VF);
InstructionCost C =
TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
@@ -8643,6 +9146,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
dbgs() << "SLP: Current total cost = " << Cost << "\n");
Cost += C;
}
+ VF = Mask.size();
return TEs.back();
};
(void)performExtractsShuffleAction<const TreeEntry>(
@@ -8671,54 +9175,198 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
return Cost;
}
-std::optional<TargetTransformInfo::ShuffleKind>
-BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
- SmallVectorImpl<int> &Mask,
- SmallVectorImpl<const TreeEntry *> &Entries) {
- Entries.clear();
- // No need to check for the topmost gather node.
- if (TE == VectorizableTree.front().get())
+/// Tries to find extractelement instructions with constant indices from fixed
+/// vector type and gather such instructions into a bunch, which highly likely
+/// might be detected as a shuffle of 1 or 2 input vectors. If this attempt was
+/// successful, the matched scalars are replaced by poison values in \p VL for
+/// future analysis.
+std::optional<TTI::ShuffleKind>
+BoUpSLP::tryToGatherSingleRegisterExtractElements(
+ MutableArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) const {
+ // Scan list of gathered scalars for extractelements that can be represented
+ // as shuffles.
+ MapVector<Value *, SmallVector<int>> VectorOpToIdx;
+ SmallVector<int> UndefVectorExtracts;
+ for (int I = 0, E = VL.size(); I < E; ++I) {
+ auto *EI = dyn_cast<ExtractElementInst>(VL[I]);
+ if (!EI) {
+ if (isa<UndefValue>(VL[I]))
+ UndefVectorExtracts.push_back(I);
+ continue;
+ }
+ auto *VecTy = dyn_cast<FixedVectorType>(EI->getVectorOperandType());
+ if (!VecTy || !isa<ConstantInt, UndefValue>(EI->getIndexOperand()))
+ continue;
+ std::optional<unsigned> Idx = getExtractIndex(EI);
+ // Undefined index.
+ if (!Idx) {
+ UndefVectorExtracts.push_back(I);
+ continue;
+ }
+ SmallBitVector ExtractMask(VecTy->getNumElements(), true);
+ ExtractMask.reset(*Idx);
+ if (isUndefVector(EI->getVectorOperand(), ExtractMask).all()) {
+ UndefVectorExtracts.push_back(I);
+ continue;
+ }
+ VectorOpToIdx[EI->getVectorOperand()].push_back(I);
+ }
+ // Sort the vector operands by the maximum number of uses in extractelements.
+ MapVector<unsigned, SmallVector<Value *>> VFToVector;
+ for (const auto &Data : VectorOpToIdx)
+ VFToVector[cast<FixedVectorType>(Data.first->getType())->getNumElements()]
+ .push_back(Data.first);
+ for (auto &Data : VFToVector) {
+ stable_sort(Data.second, [&VectorOpToIdx](Value *V1, Value *V2) {
+ return VectorOpToIdx.find(V1)->second.size() >
+ VectorOpToIdx.find(V2)->second.size();
+ });
+ }
+ // Find the best pair of the vectors with the same number of elements or a
+ // single vector.
+ const int UndefSz = UndefVectorExtracts.size();
+ unsigned SingleMax = 0;
+ Value *SingleVec = nullptr;
+ unsigned PairMax = 0;
+ std::pair<Value *, Value *> PairVec(nullptr, nullptr);
+ for (auto &Data : VFToVector) {
+ Value *V1 = Data.second.front();
+ if (SingleMax < VectorOpToIdx[V1].size() + UndefSz) {
+ SingleMax = VectorOpToIdx[V1].size() + UndefSz;
+ SingleVec = V1;
+ }
+ Value *V2 = nullptr;
+ if (Data.second.size() > 1)
+ V2 = *std::next(Data.second.begin());
+ if (V2 && PairMax < VectorOpToIdx[V1].size() + VectorOpToIdx[V2].size() +
+ UndefSz) {
+ PairMax = VectorOpToIdx[V1].size() + VectorOpToIdx[V2].size() + UndefSz;
+ PairVec = std::make_pair(V1, V2);
+ }
+ }
+ if (SingleMax == 0 && PairMax == 0 && UndefSz == 0)
+ return std::nullopt;
+ // Check if better to perform a shuffle of 2 vectors or just of a single
+ // vector.
+ SmallVector<Value *> SavedVL(VL.begin(), VL.end());
+ SmallVector<Value *> GatheredExtracts(
+ VL.size(), PoisonValue::get(VL.front()->getType()));
+ if (SingleMax >= PairMax && SingleMax) {
+ for (int Idx : VectorOpToIdx[SingleVec])
+ std::swap(GatheredExtracts[Idx], VL[Idx]);
+ } else {
+ for (Value *V : {PairVec.first, PairVec.second})
+ for (int Idx : VectorOpToIdx[V])
+ std::swap(GatheredExtracts[Idx], VL[Idx]);
+ }
+ // Add extracts from undefs too.
+ for (int Idx : UndefVectorExtracts)
+ std::swap(GatheredExtracts[Idx], VL[Idx]);
+ // Check that gather of extractelements can be represented as just a
+ // shuffle of a single/two vectors the scalars are extracted from.
+ std::optional<TTI::ShuffleKind> Res =
+ isFixedVectorShuffle(GatheredExtracts, Mask);
+ if (!Res) {
+ // TODO: try to check other subsets if possible.
+ // Restore the original VL if attempt was not successful.
+ copy(SavedVL, VL.begin());
return std::nullopt;
+ }
+ // Restore unused scalars from mask, if some of the extractelements were not
+ // selected for shuffle.
+ for (int I = 0, E = GatheredExtracts.size(); I < E; ++I) {
+ if (Mask[I] == PoisonMaskElem && !isa<PoisonValue>(GatheredExtracts[I]) &&
+ isa<UndefValue>(GatheredExtracts[I])) {
+ std::swap(VL[I], GatheredExtracts[I]);
+ continue;
+ }
+ auto *EI = dyn_cast<ExtractElementInst>(VL[I]);
+ if (!EI || !isa<FixedVectorType>(EI->getVectorOperandType()) ||
+ !isa<ConstantInt, UndefValue>(EI->getIndexOperand()) ||
+ is_contained(UndefVectorExtracts, I))
+ continue;
+ }
+ return Res;
+}
+
+/// Tries to find extractelement instructions with constant indices from fixed
+/// vector type and gather such instructions into a bunch, which highly likely
+/// might be detected as a shuffle of 1 or 2 input vectors. If this attempt was
+/// successful, the matched scalars are replaced by poison values in \p VL for
+/// future analysis.
+SmallVector<std::optional<TTI::ShuffleKind>>
+BoUpSLP::tryToGatherExtractElements(SmallVectorImpl<Value *> &VL,
+ SmallVectorImpl<int> &Mask,
+ unsigned NumParts) const {
+ assert(NumParts > 0 && "NumParts expected be greater than or equal to 1.");
+ SmallVector<std::optional<TTI::ShuffleKind>> ShufflesRes(NumParts);
Mask.assign(VL.size(), PoisonMaskElem);
- assert(TE->UserTreeIndices.size() == 1 &&
- "Expected only single user of the gather node.");
+ unsigned SliceSize = VL.size() / NumParts;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ // Scan list of gathered scalars for extractelements that can be represented
+ // as shuffles.
+ MutableArrayRef<Value *> SubVL =
+ MutableArrayRef(VL).slice(Part * SliceSize, SliceSize);
+ SmallVector<int> SubMask;
+ std::optional<TTI::ShuffleKind> Res =
+ tryToGatherSingleRegisterExtractElements(SubVL, SubMask);
+ ShufflesRes[Part] = Res;
+ copy(SubMask, std::next(Mask.begin(), Part * SliceSize));
+ }
+ if (none_of(ShufflesRes, [](const std::optional<TTI::ShuffleKind> &Res) {
+ return Res.has_value();
+ }))
+ ShufflesRes.clear();
+ return ShufflesRes;
+}
+
+std::optional<TargetTransformInfo::ShuffleKind>
+BoUpSLP::isGatherShuffledSingleRegisterEntry(
+ const TreeEntry *TE, ArrayRef<Value *> VL, MutableArrayRef<int> Mask,
+ SmallVectorImpl<const TreeEntry *> &Entries, unsigned Part) {
+ Entries.clear();
// TODO: currently checking only for Scalars in the tree entry, need to count
// reused elements too for better cost estimation.
- Instruction &UserInst =
- getLastInstructionInBundle(TE->UserTreeIndices.front().UserTE);
- BasicBlock *ParentBB = nullptr;
+ const EdgeInfo &TEUseEI = TE->UserTreeIndices.front();
+ const Instruction *TEInsertPt = &getLastInstructionInBundle(TEUseEI.UserTE);
+ const BasicBlock *TEInsertBlock = nullptr;
// Main node of PHI entries keeps the correct order of operands/incoming
// blocks.
- if (auto *PHI =
- dyn_cast<PHINode>(TE->UserTreeIndices.front().UserTE->getMainOp())) {
- ParentBB = PHI->getIncomingBlock(TE->UserTreeIndices.front().EdgeIdx);
+ if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp())) {
+ TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx);
+ TEInsertPt = TEInsertBlock->getTerminator();
} else {
- ParentBB = UserInst.getParent();
+ TEInsertBlock = TEInsertPt->getParent();
}
- auto *NodeUI = DT->getNode(ParentBB);
+ auto *NodeUI = DT->getNode(TEInsertBlock);
assert(NodeUI && "Should only process reachable instructions");
SmallPtrSet<Value *, 4> GatheredScalars(VL.begin(), VL.end());
- auto CheckOrdering = [&](Instruction *LastEI) {
- // Check if the user node of the TE comes after user node of EntryPtr,
- // otherwise EntryPtr depends on TE.
- // Gather nodes usually are not scheduled and inserted before their first
- // user node. So, instead of checking dependency between the gather nodes
- // themselves, we check the dependency between their user nodes.
- // If one user node comes before the second one, we cannot use the second
- // gather node as the source vector for the first gather node, because in
- // the list of instructions it will be emitted later.
- auto *EntryParent = LastEI->getParent();
- auto *NodeEUI = DT->getNode(EntryParent);
+ auto CheckOrdering = [&](const Instruction *InsertPt) {
+ // Argument InsertPt is an instruction where vector code for some other
+ // tree entry (one that shares one or more scalars with TE) is going to be
+ // generated. This lambda returns true if insertion point of vector code
+ // for the TE dominates that point (otherwise dependency is the other way
+ // around). The other node is not limited to be of a gather kind. Gather
+ // nodes are not scheduled and their vector code is inserted before their
+ // first user. If user is PHI, that is supposed to be at the end of a
+ // predecessor block. Otherwise it is the last instruction among scalars of
+ // the user node. So, instead of checking dependency between instructions
+ // themselves, we check dependency between their insertion points for vector
+ // code (since each scalar instruction ends up as a lane of a vector
+ // instruction).
+ const BasicBlock *InsertBlock = InsertPt->getParent();
+ auto *NodeEUI = DT->getNode(InsertBlock);
if (!NodeEUI)
return false;
assert((NodeUI == NodeEUI) ==
(NodeUI->getDFSNumIn() == NodeEUI->getDFSNumIn()) &&
"Different nodes should have different DFS numbers");
// Check the order of the gather nodes users.
- if (UserInst.getParent() != EntryParent &&
+ if (TEInsertPt->getParent() != InsertBlock &&
(DT->dominates(NodeUI, NodeEUI) || !DT->dominates(NodeEUI, NodeUI)))
return false;
- if (UserInst.getParent() == EntryParent && UserInst.comesBefore(LastEI))
+ if (TEInsertPt->getParent() == InsertBlock &&
+ TEInsertPt->comesBefore(InsertPt))
return false;
return true;
};
@@ -8743,43 +9391,42 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
[&](Value *V) { return GatheredScalars.contains(V); }) &&
"Must contain at least single gathered value.");
assert(TEPtr->UserTreeIndices.size() == 1 &&
- "Expected only single user of the gather node.");
- PHINode *EntryPHI =
- dyn_cast<PHINode>(TEPtr->UserTreeIndices.front().UserTE->getMainOp());
- Instruction *EntryUserInst =
- EntryPHI ? nullptr
- : &getLastInstructionInBundle(
- TEPtr->UserTreeIndices.front().UserTE);
- if (&UserInst == EntryUserInst) {
- assert(!EntryPHI && "Unexpected phi node entry.");
- // If 2 gathers are operands of the same entry, compare operands
- // indices, use the earlier one as the base.
- if (TE->UserTreeIndices.front().UserTE ==
- TEPtr->UserTreeIndices.front().UserTE &&
- TE->UserTreeIndices.front().EdgeIdx <
- TEPtr->UserTreeIndices.front().EdgeIdx)
+ "Expected only single user of a gather node.");
+ const EdgeInfo &UseEI = TEPtr->UserTreeIndices.front();
+
+ PHINode *UserPHI = dyn_cast<PHINode>(UseEI.UserTE->getMainOp());
+ const Instruction *InsertPt =
+ UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator()
+ : &getLastInstructionInBundle(UseEI.UserTE);
+ if (TEInsertPt == InsertPt) {
+ // If 2 gathers are operands of the same entry (regardless of whether
+ // user is PHI or else), compare operands indices, use the earlier one
+ // as the base.
+ if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx)
+ continue;
+ // If the user instruction is used for some reason in different
+ // vectorized nodes - make it depend on index.
+ if (TEUseEI.UserTE != UseEI.UserTE &&
+ TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
continue;
}
- // Check if the user node of the TE comes after user node of EntryPtr,
- // otherwise EntryPtr depends on TE.
- auto *EntryI =
- EntryPHI
- ? EntryPHI
- ->getIncomingBlock(TEPtr->UserTreeIndices.front().EdgeIdx)
- ->getTerminator()
- : EntryUserInst;
- if ((ParentBB != EntryI->getParent() ||
- TE->UserTreeIndices.front().EdgeIdx <
- TEPtr->UserTreeIndices.front().EdgeIdx ||
- TE->UserTreeIndices.front().UserTE !=
- TEPtr->UserTreeIndices.front().UserTE) &&
- !CheckOrdering(EntryI))
+
+ // Check if the user node of the TE comes after user node of TEPtr,
+ // otherwise TEPtr depends on TE.
+ if ((TEInsertBlock != InsertPt->getParent() ||
+ TEUseEI.EdgeIdx < UseEI.EdgeIdx || TEUseEI.UserTE != UseEI.UserTE) &&
+ !CheckOrdering(InsertPt))
continue;
VToTEs.insert(TEPtr);
}
if (const TreeEntry *VTE = getTreeEntry(V)) {
- Instruction &EntryUserInst = getLastInstructionInBundle(VTE);
- if (&EntryUserInst == &UserInst || !CheckOrdering(&EntryUserInst))
+ Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
+ if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
+ continue;
+ auto It = MinBWs.find(VTE);
+ // If vectorize node is demoted - do not match.
+ if (It != MinBWs.end() &&
+ It->second.first != DL->getTypeSizeInBits(V->getType()))
continue;
VToTEs.insert(VTE);
}
@@ -8823,8 +9470,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
}
}
- if (UsedTEs.empty())
+ if (UsedTEs.empty()) {
+ Entries.clear();
return std::nullopt;
+ }
unsigned VF = 0;
if (UsedTEs.size() == 1) {
@@ -8838,9 +9487,19 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
auto *It = find_if(FirstEntries, [=](const TreeEntry *EntryPtr) {
return EntryPtr->isSame(VL) || EntryPtr->isSame(TE->Scalars);
});
- if (It != FirstEntries.end() && (*It)->getVectorFactor() == VL.size()) {
+ if (It != FirstEntries.end() &&
+ ((*It)->getVectorFactor() == VL.size() ||
+ ((*It)->getVectorFactor() == TE->Scalars.size() &&
+ TE->ReuseShuffleIndices.size() == VL.size() &&
+ (*It)->isSame(TE->Scalars)))) {
Entries.push_back(*It);
- std::iota(Mask.begin(), Mask.end(), 0);
+ if ((*It)->getVectorFactor() == VL.size()) {
+ std::iota(std::next(Mask.begin(), Part * VL.size()),
+ std::next(Mask.begin(), (Part + 1) * VL.size()), 0);
+ } else {
+ SmallVector<int> CommonMask = TE->getCommonMask();
+ copy(CommonMask, Mask.begin());
+ }
// Clear undef scalars.
for (int I = 0, Sz = VL.size(); I < Sz; ++I)
if (isa<PoisonValue>(VL[I]))
@@ -8923,12 +9582,9 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
// by extractelements processing) or may form vector node in future.
auto MightBeIgnored = [=](Value *V) {
auto *I = dyn_cast<Instruction>(V);
- SmallVector<Value *> IgnoredVals;
- if (UserIgnoreList)
- IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end());
return I && !IsSplatOrUndefs && !ScalarToTreeEntry.count(I) &&
!isVectorLikeInstWithConstOps(I) &&
- !areAllUsersVectorized(I, IgnoredVals) && isSimple(I);
+ !areAllUsersVectorized(I, UserIgnoreList) && isSimple(I);
};
// Check that the neighbor instruction may form a full vector node with the
// current instruction V. It is possible, if they have same/alternate opcode
@@ -8980,7 +9636,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
TempEntries.push_back(Entries[I]);
}
Entries.swap(TempEntries);
- if (EntryLanes.size() == Entries.size() && !VL.equals(TE->Scalars)) {
+ if (EntryLanes.size() == Entries.size() &&
+ !VL.equals(ArrayRef(TE->Scalars)
+ .slice(Part * VL.size(),
+ std::min<int>(VL.size(), TE->Scalars.size())))) {
// We may have here 1 or 2 entries only. If the number of scalars is equal
// to the number of entries, no need to do the analysis, it is not very
// profitable. Since VL is not the same as TE->Scalars, it means we already
@@ -8993,9 +9652,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
// Pair.first is the offset to the vector, while Pair.second is the index of
// scalar in the list.
for (const std::pair<unsigned, int> &Pair : EntryLanes) {
- Mask[Pair.second] = Pair.first * VF +
- Entries[Pair.first]->findLaneForValue(VL[Pair.second]);
- IsIdentity &= Mask[Pair.second] == Pair.second;
+ unsigned Idx = Part * VL.size() + Pair.second;
+ Mask[Idx] = Pair.first * VF +
+ Entries[Pair.first]->findLaneForValue(VL[Pair.second]);
+ IsIdentity &= Mask[Idx] == Pair.second;
}
switch (Entries.size()) {
case 1:
@@ -9010,9 +9670,64 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
break;
}
Entries.clear();
+ // Clear the corresponding mask elements.
+ std::fill(std::next(Mask.begin(), Part * VL.size()),
+ std::next(Mask.begin(), (Part + 1) * VL.size()), PoisonMaskElem);
return std::nullopt;
}
+SmallVector<std::optional<TargetTransformInfo::ShuffleKind>>
+BoUpSLP::isGatherShuffledEntry(
+ const TreeEntry *TE, ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask,
+ SmallVectorImpl<SmallVector<const TreeEntry *>> &Entries,
+ unsigned NumParts) {
+ assert(NumParts > 0 && NumParts < VL.size() &&
+ "Expected positive number of registers.");
+ Entries.clear();
+ // No need to check for the topmost gather node.
+ if (TE == VectorizableTree.front().get())
+ return {};
+ Mask.assign(VL.size(), PoisonMaskElem);
+ assert(TE->UserTreeIndices.size() == 1 &&
+ "Expected only single user of the gather node.");
+ assert(VL.size() % NumParts == 0 &&
+ "Number of scalars must be divisible by NumParts.");
+ unsigned SliceSize = VL.size() / NumParts;
+ SmallVector<std::optional<TTI::ShuffleKind>> Res;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ ArrayRef<Value *> SubVL = VL.slice(Part * SliceSize, SliceSize);
+ SmallVectorImpl<const TreeEntry *> &SubEntries = Entries.emplace_back();
+ std::optional<TTI::ShuffleKind> SubRes =
+ isGatherShuffledSingleRegisterEntry(TE, SubVL, Mask, SubEntries, Part);
+ if (!SubRes)
+ SubEntries.clear();
+ Res.push_back(SubRes);
+ if (SubEntries.size() == 1 && *SubRes == TTI::SK_PermuteSingleSrc &&
+ SubEntries.front()->getVectorFactor() == VL.size() &&
+ (SubEntries.front()->isSame(TE->Scalars) ||
+ SubEntries.front()->isSame(VL))) {
+ SmallVector<const TreeEntry *> LocalSubEntries;
+ LocalSubEntries.swap(SubEntries);
+ Entries.clear();
+ Res.clear();
+ std::iota(Mask.begin(), Mask.end(), 0);
+ // Clear undef scalars.
+ for (int I = 0, Sz = VL.size(); I < Sz; ++I)
+ if (isa<PoisonValue>(VL[I]))
+ Mask[I] = PoisonMaskElem;
+ Entries.emplace_back(1, LocalSubEntries.front());
+ Res.push_back(TargetTransformInfo::SK_PermuteSingleSrc);
+ return Res;
+ }
+ }
+ if (all_of(Res,
+ [](const std::optional<TTI::ShuffleKind> &SK) { return !SK; })) {
+ Entries.clear();
+ return {};
+ }
+ return Res;
+}
+
InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
bool ForPoisonSrc) const {
// Find the type of the operands in VL.
@@ -9224,18 +9939,20 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
auto *Front = E->getMainOp();
Instruction *LastInst = &getLastInstructionInBundle(E);
assert(LastInst && "Failed to find last instruction in bundle");
+ BasicBlock::iterator LastInstIt = LastInst->getIterator();
// If the instruction is PHI, set the insert point after all the PHIs.
bool IsPHI = isa<PHINode>(LastInst);
if (IsPHI)
- LastInst = LastInst->getParent()->getFirstNonPHI();
+ LastInstIt = LastInst->getParent()->getFirstNonPHIIt();
if (IsPHI || (E->State != TreeEntry::NeedToGather &&
doesNotNeedToSchedule(E->Scalars))) {
- Builder.SetInsertPoint(LastInst);
+ Builder.SetInsertPoint(LastInst->getParent(), LastInstIt);
} else {
// Set the insertion point after the last instruction in the bundle. Set the
// debug location to Front.
- Builder.SetInsertPoint(LastInst->getParent(),
- std::next(LastInst->getIterator()));
+ Builder.SetInsertPoint(
+ LastInst->getParent(),
+ LastInst->getNextNonDebugInstruction()->getIterator());
}
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}
@@ -9271,10 +9988,12 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
GatherShuffleExtractSeq.insert(InsElt);
CSEBlocks.insert(InsElt->getParent());
// Add to our 'need-to-extract' list.
- if (TreeEntry *Entry = getTreeEntry(V)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(V);
- ExternalUses.emplace_back(V, InsElt, FoundLane);
+ if (isa<Instruction>(V)) {
+ if (TreeEntry *Entry = getTreeEntry(V)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = Entry->findLaneForValue(V);
+ ExternalUses.emplace_back(V, InsElt, FoundLane);
+ }
}
return Vec;
};
@@ -9367,12 +10086,12 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
/// Holds all of the instructions that we gathered.
SetVector<Instruction *> &GatherShuffleExtractSeq;
/// A list of blocks that we are going to CSE.
- SetVector<BasicBlock *> &CSEBlocks;
+ DenseSet<BasicBlock *> &CSEBlocks;
public:
ShuffleIRBuilder(IRBuilderBase &Builder,
SetVector<Instruction *> &GatherShuffleExtractSeq,
- SetVector<BasicBlock *> &CSEBlocks)
+ DenseSet<BasicBlock *> &CSEBlocks)
: Builder(Builder), GatherShuffleExtractSeq(GatherShuffleExtractSeq),
CSEBlocks(CSEBlocks) {}
~ShuffleIRBuilder() = default;
@@ -9392,7 +10111,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
return V1;
unsigned VF = Mask.size();
unsigned LocalVF = cast<FixedVectorType>(V1->getType())->getNumElements();
- if (VF == LocalVF && ShuffleVectorInst::isIdentityMask(Mask))
+ if (VF == LocalVF && ShuffleVectorInst::isIdentityMask(Mask, VF))
return V1;
Value *Vec = Builder.CreateShuffleVector(V1, Mask);
if (auto *I = dyn_cast<Instruction>(Vec)) {
@@ -9455,7 +10174,11 @@ public:
: Builder(Builder), R(R) {}
/// Adjusts extractelements after reusing them.
- Value *adjustExtracts(const TreeEntry *E, ArrayRef<int> Mask) {
+ Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
+ ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
+ unsigned NumParts, bool &UseVecBaseAsInput) {
+ UseVecBaseAsInput = false;
+ SmallPtrSet<Value *, 4> UniqueBases;
Value *VecBase = nullptr;
for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
int Idx = Mask[I];
@@ -9463,6 +10186,10 @@ public:
continue;
auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
VecBase = EI->getVectorOperand();
+ if (const TreeEntry *TE = R.getTreeEntry(VecBase))
+ VecBase = TE->VectorizedValue;
+ assert(VecBase && "Expected vectorized value.");
+ UniqueBases.insert(VecBase);
// If the only one use is vectorized - can delete the extractelement
// itself.
if (!EI->hasOneUse() || any_of(EI->users(), [&](User *U) {
@@ -9471,14 +10198,97 @@ public:
continue;
R.eraseInstruction(EI);
}
- return VecBase;
+ if (NumParts == 1 || UniqueBases.size() == 1)
+ return VecBase;
+ UseVecBaseAsInput = true;
+ auto TransformToIdentity = [](MutableArrayRef<int> Mask) {
+ for (auto [I, Idx] : enumerate(Mask))
+ if (Idx != PoisonMaskElem)
+ Idx = I;
+ };
+ // Perform multi-register vector shuffle, joining them into a single virtual
+ // long vector.
+ // Need to shuffle each part independently and then insert all this parts
+ // into a long virtual vector register, forming the original vector.
+ Value *Vec = nullptr;
+ SmallVector<int> VecMask(Mask.size(), PoisonMaskElem);
+ unsigned SliceSize = E->Scalars.size() / NumParts;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ ArrayRef<Value *> VL =
+ ArrayRef(E->Scalars).slice(Part * SliceSize, SliceSize);
+ MutableArrayRef<int> SubMask = Mask.slice(Part * SliceSize, SliceSize);
+ constexpr int MaxBases = 2;
+ SmallVector<Value *, MaxBases> Bases(MaxBases);
+#ifndef NDEBUG
+ int PrevSize = 0;
+#endif // NDEBUG
+ for (const auto [I, V]: enumerate(VL)) {
+ if (SubMask[I] == PoisonMaskElem)
+ continue;
+ Value *VecOp = cast<ExtractElementInst>(V)->getVectorOperand();
+ if (const TreeEntry *TE = R.getTreeEntry(VecOp))
+ VecOp = TE->VectorizedValue;
+ assert(VecOp && "Expected vectorized value.");
+ const int Size =
+ cast<FixedVectorType>(VecOp->getType())->getNumElements();
+#ifndef NDEBUG
+ assert((PrevSize == Size || PrevSize == 0) &&
+ "Expected vectors of the same size.");
+ PrevSize = Size;
+#endif // NDEBUG
+ Bases[SubMask[I] < Size ? 0 : 1] = VecOp;
+ }
+ if (!Bases.front())
+ continue;
+ Value *SubVec;
+ if (Bases.back()) {
+ SubVec = createShuffle(Bases.front(), Bases.back(), SubMask);
+ TransformToIdentity(SubMask);
+ } else {
+ SubVec = Bases.front();
+ }
+ if (!Vec) {
+ Vec = SubVec;
+ assert((Part == 0 || all_of(seq<unsigned>(0, Part),
+ [&](unsigned P) {
+ ArrayRef<int> SubMask =
+ Mask.slice(P * SliceSize, SliceSize);
+ return all_of(SubMask, [](int Idx) {
+ return Idx == PoisonMaskElem;
+ });
+ })) &&
+ "Expected first part or all previous parts masked.");
+ copy(SubMask, std::next(VecMask.begin(), Part * SliceSize));
+ } else {
+ unsigned VF = cast<FixedVectorType>(Vec->getType())->getNumElements();
+ if (Vec->getType() != SubVec->getType()) {
+ unsigned SubVecVF =
+ cast<FixedVectorType>(SubVec->getType())->getNumElements();
+ VF = std::max(VF, SubVecVF);
+ }
+ // Adjust SubMask.
+ for (auto [I, Idx] : enumerate(SubMask))
+ if (Idx != PoisonMaskElem)
+ Idx += VF;
+ copy(SubMask, std::next(VecMask.begin(), Part * SliceSize));
+ Vec = createShuffle(Vec, SubVec, VecMask);
+ TransformToIdentity(VecMask);
+ }
+ }
+ copy(VecMask, Mask.begin());
+ return Vec;
}
/// Checks if the specified entry \p E needs to be delayed because of its
/// dependency nodes.
- Value *needToDelay(const TreeEntry *E, ArrayRef<const TreeEntry *> Deps) {
+ std::optional<Value *>
+ needToDelay(const TreeEntry *E,
+ ArrayRef<SmallVector<const TreeEntry *>> Deps) const {
// No need to delay emission if all deps are ready.
- if (all_of(Deps, [](const TreeEntry *TE) { return TE->VectorizedValue; }))
- return nullptr;
+ if (all_of(Deps, [](ArrayRef<const TreeEntry *> TEs) {
+ return all_of(
+ TEs, [](const TreeEntry *TE) { return TE->VectorizedValue; });
+ }))
+ return std::nullopt;
// Postpone gather emission, will be emitted after the end of the
// process to keep correct order.
auto *VecTy = FixedVectorType::get(E->Scalars.front()->getType(),
@@ -9487,6 +10297,16 @@ public:
VecTy, PoisonValue::get(PointerType::getUnqual(VecTy->getContext())),
MaybeAlign());
}
+ /// Adds 2 input vectors (in form of tree entries) and the mask for their
+ /// shuffling.
+ void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
+ add(E1.VectorizedValue, E2.VectorizedValue, Mask);
+ }
+ /// Adds single input vector (in form of tree entry) and the mask for its
+ /// shuffling.
+ void add(const TreeEntry &E1, ArrayRef<int> Mask) {
+ add(E1.VectorizedValue, Mask);
+ }
/// Adds 2 input vectors and the mask for their shuffling.
void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
assert(V1 && V2 && !Mask.empty() && "Expected non-empty input vectors.");
@@ -9516,7 +10336,7 @@ public:
InVectors.push_back(V1);
}
/// Adds another one input vector and the mask for the shuffling.
- void add(Value *V1, ArrayRef<int> Mask) {
+ void add(Value *V1, ArrayRef<int> Mask, bool = false) {
if (InVectors.empty()) {
if (!isa<FixedVectorType>(V1->getType())) {
V1 = createShuffle(V1, nullptr, CommonMask);
@@ -9578,7 +10398,8 @@ public:
inversePermutation(Order, NewMask);
add(V1, NewMask);
}
- Value *gather(ArrayRef<Value *> VL, Value *Root = nullptr) {
+ Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
+ Value *Root = nullptr) {
return R.gather(VL, Root);
}
Value *createFreeze(Value *V) { return Builder.CreateFreeze(V); }
@@ -9639,8 +10460,14 @@ public:
}
};
-Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
- ArrayRef<Value *> VL = E->getOperand(NodeIdx);
+Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
+ bool PostponedPHIs) {
+ ValueList &VL = E->getOperand(NodeIdx);
+ if (E->State == TreeEntry::PossibleStridedVectorize &&
+ !E->ReorderIndices.empty()) {
+ SmallVector<int> Mask(E->ReorderIndices.begin(), E->ReorderIndices.end());
+ reorderScalars(VL, Mask);
+ }
const unsigned VF = VL.size();
InstructionsState S = getSameOpcode(VL, *TLI);
// Special processing for GEPs bundle, which may include non-gep values.
@@ -9651,23 +10478,39 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
S = getSameOpcode(*It, *TLI);
}
if (S.getOpcode()) {
- if (TreeEntry *VE = getTreeEntry(S.OpValue);
- VE && VE->isSame(VL) &&
- (any_of(VE->UserTreeIndices,
- [E, NodeIdx](const EdgeInfo &EI) {
- return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
- }) ||
- any_of(VectorizableTree,
- [E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
- return TE->isOperandGatherNode({E, NodeIdx}) &&
- VE->isSame(TE->Scalars);
- }))) {
+ auto CheckSameVE = [&](const TreeEntry *VE) {
+ return VE->isSame(VL) &&
+ (any_of(VE->UserTreeIndices,
+ [E, NodeIdx](const EdgeInfo &EI) {
+ return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
+ }) ||
+ any_of(VectorizableTree,
+ [E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->isOperandGatherNode({E, NodeIdx}) &&
+ VE->isSame(TE->Scalars);
+ }));
+ };
+ TreeEntry *VE = getTreeEntry(S.OpValue);
+ bool IsSameVE = VE && CheckSameVE(VE);
+ if (!IsSameVE) {
+ auto It = MultiNodeScalars.find(S.OpValue);
+ if (It != MultiNodeScalars.end()) {
+ auto *I = find_if(It->getSecond(), [&](const TreeEntry *TE) {
+ return TE != VE && CheckSameVE(TE);
+ });
+ if (I != It->getSecond().end()) {
+ VE = *I;
+ IsSameVE = true;
+ }
+ }
+ }
+ if (IsSameVE) {
auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
ShuffleBuilder.add(V, Mask);
return ShuffleBuilder.finalize(std::nullopt);
};
- Value *V = vectorizeTree(VE);
+ Value *V = vectorizeTree(VE, PostponedPHIs);
if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
if (!VE->ReuseShuffleIndices.empty()) {
// Reshuffle to get only unique values.
@@ -9740,14 +10583,7 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
assert(I->get()->UserTreeIndices.size() == 1 &&
"Expected only single user for the gather node.");
assert(I->get()->isSame(VL) && "Expected same list of scalars.");
- IRBuilder<>::InsertPointGuard Guard(Builder);
- if (E->getOpcode() != Instruction::InsertElement &&
- E->getOpcode() != Instruction::PHI) {
- Instruction *LastInst = &getLastInstructionInBundle(E);
- assert(LastInst && "Failed to find last instruction in bundle");
- Builder.SetInsertPoint(LastInst);
- }
- return vectorizeTree(I->get());
+ return vectorizeTree(I->get(), PostponedPHIs);
}
template <typename BVTy, typename ResTy, typename... Args>
@@ -9765,7 +10601,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
inversePermutation(E->ReorderIndices, ReorderMask);
if (!ReorderMask.empty())
reorderScalars(GatheredScalars, ReorderMask);
- auto FindReusedSplat = [&](SmallVectorImpl<int> &Mask) {
+ auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF) {
if (!isSplat(E->Scalars) || none_of(E->Scalars, [](Value *V) {
return isa<UndefValue>(V) && !isa<PoisonValue>(V);
}))
@@ -9782,70 +10618,102 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
});
if (It == VectorizableTree.end())
return false;
- unsigned I =
- *find_if_not(Mask, [](int Idx) { return Idx == PoisonMaskElem; });
- int Sz = Mask.size();
- if (all_of(Mask, [Sz](int Idx) { return Idx < 2 * Sz; }) &&
- ShuffleVectorInst::isIdentityMask(Mask))
+ int Idx;
+ if ((Mask.size() < InputVF &&
+ ShuffleVectorInst::isExtractSubvectorMask(Mask, InputVF, Idx) &&
+ Idx == 0) ||
+ (Mask.size() == InputVF &&
+ ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))) {
std::iota(Mask.begin(), Mask.end(), 0);
- else
+ } else {
+ unsigned I =
+ *find_if_not(Mask, [](int Idx) { return Idx == PoisonMaskElem; });
std::fill(Mask.begin(), Mask.end(), I);
+ }
return true;
};
BVTy ShuffleBuilder(Params...);
ResTy Res = ResTy();
SmallVector<int> Mask;
- SmallVector<int> ExtractMask;
- std::optional<TargetTransformInfo::ShuffleKind> ExtractShuffle;
- std::optional<TargetTransformInfo::ShuffleKind> GatherShuffle;
- SmallVector<const TreeEntry *> Entries;
+ SmallVector<int> ExtractMask(GatheredScalars.size(), PoisonMaskElem);
+ SmallVector<std::optional<TTI::ShuffleKind>> ExtractShuffles;
+ Value *ExtractVecBase = nullptr;
+ bool UseVecBaseAsInput = false;
+ SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles;
+ SmallVector<SmallVector<const TreeEntry *>> Entries;
Type *ScalarTy = GatheredScalars.front()->getType();
+ auto *VecTy = FixedVectorType::get(ScalarTy, GatheredScalars.size());
+ unsigned NumParts = TTI->getNumberOfParts(VecTy);
+ if (NumParts == 0 || NumParts >= GatheredScalars.size())
+ NumParts = 1;
if (!all_of(GatheredScalars, UndefValue::classof)) {
// Check for gathered extracts.
- ExtractShuffle = tryToGatherExtractElements(GatheredScalars, ExtractMask);
- SmallVector<Value *> IgnoredVals;
- if (UserIgnoreList)
- IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end());
bool Resized = false;
- if (Value *VecBase = ShuffleBuilder.adjustExtracts(E, ExtractMask))
- if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
- if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) {
- Resized = true;
- GatheredScalars.append(VF - GatheredScalars.size(),
- PoisonValue::get(ScalarTy));
- }
+ ExtractShuffles =
+ tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts);
+ if (!ExtractShuffles.empty()) {
+ SmallVector<const TreeEntry *> ExtractEntries;
+ for (auto [Idx, I] : enumerate(ExtractMask)) {
+ if (I == PoisonMaskElem)
+ continue;
+ if (const auto *TE = getTreeEntry(
+ cast<ExtractElementInst>(E->Scalars[Idx])->getVectorOperand()))
+ ExtractEntries.push_back(TE);
+ }
+ if (std::optional<ResTy> Delayed =
+ ShuffleBuilder.needToDelay(E, ExtractEntries)) {
+ // Delay emission of gathers which are not ready yet.
+ PostponedGathers.insert(E);
+ // Postpone gather emission, will be emitted after the end of the
+ // process to keep correct order.
+ return *Delayed;
+ }
+ if (Value *VecBase = ShuffleBuilder.adjustExtracts(
+ E, ExtractMask, ExtractShuffles, NumParts, UseVecBaseAsInput)) {
+ ExtractVecBase = VecBase;
+ if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
+ if (VF == VecBaseTy->getNumElements() &&
+ GatheredScalars.size() != VF) {
+ Resized = true;
+ GatheredScalars.append(VF - GatheredScalars.size(),
+ PoisonValue::get(ScalarTy));
+ }
+ }
+ }
// Gather extracts after we check for full matched gathers only.
- if (ExtractShuffle || E->getOpcode() != Instruction::Load ||
+ if (!ExtractShuffles.empty() || E->getOpcode() != Instruction::Load ||
E->isAltShuffle() ||
all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) ||
isSplat(E->Scalars) ||
(E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) {
- GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries);
+ GatherShuffles =
+ isGatherShuffledEntry(E, GatheredScalars, Mask, Entries, NumParts);
}
- if (GatherShuffle) {
- if (Value *Delayed = ShuffleBuilder.needToDelay(E, Entries)) {
+ if (!GatherShuffles.empty()) {
+ if (std::optional<ResTy> Delayed =
+ ShuffleBuilder.needToDelay(E, Entries)) {
// Delay emission of gathers which are not ready yet.
PostponedGathers.insert(E);
// Postpone gather emission, will be emitted after the end of the
// process to keep correct order.
- return Delayed;
+ return *Delayed;
}
- assert((Entries.size() == 1 || Entries.size() == 2) &&
- "Expected shuffle of 1 or 2 entries.");
- if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&
- Entries.front()->isSame(E->Scalars)) {
+ if (GatherShuffles.size() == 1 &&
+ *GatherShuffles.front() == TTI::SK_PermuteSingleSrc &&
+ Entries.front().front()->isSame(E->Scalars)) {
// Perfect match in the graph, will reuse the previously vectorized
// node. Cost is 0.
LLVM_DEBUG(
dbgs()
- << "SLP: perfect diamond match for gather bundle that starts with "
- << *E->Scalars.front() << ".\n");
+ << "SLP: perfect diamond match for gather bundle "
+ << shortBundleName(E->Scalars) << ".\n");
// Restore the mask for previous partially matched values.
- if (Entries.front()->ReorderIndices.empty() &&
- ((Entries.front()->ReuseShuffleIndices.empty() &&
- E->Scalars.size() == Entries.front()->Scalars.size()) ||
- (E->Scalars.size() ==
- Entries.front()->ReuseShuffleIndices.size()))) {
+ Mask.resize(E->Scalars.size());
+ const TreeEntry *FrontTE = Entries.front().front();
+ if (FrontTE->ReorderIndices.empty() &&
+ ((FrontTE->ReuseShuffleIndices.empty() &&
+ E->Scalars.size() == FrontTE->Scalars.size()) ||
+ (E->Scalars.size() == FrontTE->ReuseShuffleIndices.size()))) {
std::iota(Mask.begin(), Mask.end(), 0);
} else {
for (auto [I, V] : enumerate(E->Scalars)) {
@@ -9853,17 +10721,20 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
Mask[I] = PoisonMaskElem;
continue;
}
- Mask[I] = Entries.front()->findLaneForValue(V);
+ Mask[I] = FrontTE->findLaneForValue(V);
}
}
- ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
+ ShuffleBuilder.add(*FrontTE, Mask);
Res = ShuffleBuilder.finalize(E->getCommonMask());
return Res;
}
if (!Resized) {
- unsigned VF1 = Entries.front()->getVectorFactor();
- unsigned VF2 = Entries.back()->getVectorFactor();
- if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF)
+ if (GatheredScalars.size() != VF &&
+ any_of(Entries, [&](ArrayRef<const TreeEntry *> TEs) {
+ return any_of(TEs, [&](const TreeEntry *TE) {
+ return TE->getVectorFactor() == VF;
+ });
+ }))
GatheredScalars.append(VF - GatheredScalars.size(),
PoisonValue::get(ScalarTy));
}
@@ -9943,78 +10814,108 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
if (It != Scalars.end()) {
// Replace undefs by the non-poisoned scalars and emit broadcast.
int Pos = std::distance(Scalars.begin(), It);
- for_each(UndefPos, [&](int I) {
+ for (int I : UndefPos) {
// Set the undef position to the non-poisoned scalar.
ReuseMask[I] = Pos;
// Replace the undef by the poison, in the mask it is replaced by
// non-poisoned scalar already.
if (I != Pos)
Scalars[I] = PoisonValue::get(ScalarTy);
- });
+ }
} else {
// Replace undefs by the poisons, emit broadcast and then emit
// freeze.
- for_each(UndefPos, [&](int I) {
+ for (int I : UndefPos) {
ReuseMask[I] = PoisonMaskElem;
if (isa<UndefValue>(Scalars[I]))
Scalars[I] = PoisonValue::get(ScalarTy);
- });
+ }
NeedFreeze = true;
}
}
};
- if (ExtractShuffle || GatherShuffle) {
+ if (!ExtractShuffles.empty() || !GatherShuffles.empty()) {
bool IsNonPoisoned = true;
- bool IsUsedInExpr = false;
+ bool IsUsedInExpr = true;
Value *Vec1 = nullptr;
- if (ExtractShuffle) {
+ if (!ExtractShuffles.empty()) {
// Gather of extractelements can be represented as just a shuffle of
// a single/two vectors the scalars are extracted from.
// Find input vectors.
Value *Vec2 = nullptr;
for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
- if (ExtractMask[I] == PoisonMaskElem ||
- (!Mask.empty() && Mask[I] != PoisonMaskElem)) {
+ if (!Mask.empty() && Mask[I] != PoisonMaskElem)
ExtractMask[I] = PoisonMaskElem;
- continue;
- }
- if (isa<UndefValue>(E->Scalars[I]))
- continue;
- auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
- if (!Vec1) {
- Vec1 = EI->getVectorOperand();
- } else if (Vec1 != EI->getVectorOperand()) {
- assert((!Vec2 || Vec2 == EI->getVectorOperand()) &&
- "Expected only 1 or 2 vectors shuffle.");
- Vec2 = EI->getVectorOperand();
+ }
+ if (UseVecBaseAsInput) {
+ Vec1 = ExtractVecBase;
+ } else {
+ for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
+ if (ExtractMask[I] == PoisonMaskElem)
+ continue;
+ if (isa<UndefValue>(E->Scalars[I]))
+ continue;
+ auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
+ Value *VecOp = EI->getVectorOperand();
+ if (const auto *TE = getTreeEntry(VecOp))
+ if (TE->VectorizedValue)
+ VecOp = TE->VectorizedValue;
+ if (!Vec1) {
+ Vec1 = VecOp;
+ } else if (Vec1 != EI->getVectorOperand()) {
+ assert((!Vec2 || Vec2 == EI->getVectorOperand()) &&
+ "Expected only 1 or 2 vectors shuffle.");
+ Vec2 = VecOp;
+ }
}
}
if (Vec2) {
+ IsUsedInExpr = false;
IsNonPoisoned &=
isGuaranteedNotToBePoison(Vec1) && isGuaranteedNotToBePoison(Vec2);
ShuffleBuilder.add(Vec1, Vec2, ExtractMask);
} else if (Vec1) {
- IsUsedInExpr = FindReusedSplat(ExtractMask);
- ShuffleBuilder.add(Vec1, ExtractMask);
+ IsUsedInExpr &= FindReusedSplat(
+ ExtractMask,
+ cast<FixedVectorType>(Vec1->getType())->getNumElements());
+ ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true);
IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
} else {
+ IsUsedInExpr = false;
ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
ScalarTy, GatheredScalars.size())),
- ExtractMask);
+ ExtractMask, /*ForExtracts=*/true);
}
}
- if (GatherShuffle) {
- if (Entries.size() == 1) {
- IsUsedInExpr = FindReusedSplat(Mask);
- ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
- IsNonPoisoned &=
- isGuaranteedNotToBePoison(Entries.front()->VectorizedValue);
- } else {
- ShuffleBuilder.add(Entries.front()->VectorizedValue,
- Entries.back()->VectorizedValue, Mask);
- IsNonPoisoned &=
- isGuaranteedNotToBePoison(Entries.front()->VectorizedValue) &&
- isGuaranteedNotToBePoison(Entries.back()->VectorizedValue);
+ if (!GatherShuffles.empty()) {
+ unsigned SliceSize = E->Scalars.size() / NumParts;
+ SmallVector<int> VecMask(Mask.size(), PoisonMaskElem);
+ for (const auto [I, TEs] : enumerate(Entries)) {
+ if (TEs.empty()) {
+ assert(!GatherShuffles[I] &&
+ "No shuffles with empty entries list expected.");
+ continue;
+ }
+ assert((TEs.size() == 1 || TEs.size() == 2) &&
+ "Expected shuffle of 1 or 2 entries.");
+ auto SubMask = ArrayRef(Mask).slice(I * SliceSize, SliceSize);
+ VecMask.assign(VecMask.size(), PoisonMaskElem);
+ copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
+ if (TEs.size() == 1) {
+ IsUsedInExpr &=
+ FindReusedSplat(VecMask, TEs.front()->getVectorFactor());
+ ShuffleBuilder.add(*TEs.front(), VecMask);
+ if (TEs.front()->VectorizedValue)
+ IsNonPoisoned &=
+ isGuaranteedNotToBePoison(TEs.front()->VectorizedValue);
+ } else {
+ IsUsedInExpr = false;
+ ShuffleBuilder.add(*TEs.front(), *TEs.back(), VecMask);
+ if (TEs.front()->VectorizedValue && TEs.back()->VectorizedValue)
+ IsNonPoisoned &=
+ isGuaranteedNotToBePoison(TEs.front()->VectorizedValue) &&
+ isGuaranteedNotToBePoison(TEs.back()->VectorizedValue);
+ }
}
}
// Try to figure out best way to combine values: build a shuffle and insert
@@ -10025,16 +10926,24 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
int MSz = Mask.size();
// Try to build constant vector and shuffle with it only if currently we
// have a single permutation and more than 1 scalar constants.
- bool IsSingleShuffle = !ExtractShuffle || !GatherShuffle;
+ bool IsSingleShuffle = ExtractShuffles.empty() || GatherShuffles.empty();
bool IsIdentityShuffle =
- (ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc) ==
- TTI::SK_PermuteSingleSrc &&
+ ((UseVecBaseAsInput ||
+ all_of(ExtractShuffles,
+ [](const std::optional<TTI::ShuffleKind> &SK) {
+ return SK.value_or(TTI::SK_PermuteTwoSrc) ==
+ TTI::SK_PermuteSingleSrc;
+ })) &&
none_of(ExtractMask, [&](int I) { return I >= EMSz; }) &&
- ShuffleVectorInst::isIdentityMask(ExtractMask)) ||
- (GatherShuffle.value_or(TTI::SK_PermuteTwoSrc) ==
- TTI::SK_PermuteSingleSrc &&
+ ShuffleVectorInst::isIdentityMask(ExtractMask, EMSz)) ||
+ (!GatherShuffles.empty() &&
+ all_of(GatherShuffles,
+ [](const std::optional<TTI::ShuffleKind> &SK) {
+ return SK.value_or(TTI::SK_PermuteTwoSrc) ==
+ TTI::SK_PermuteSingleSrc;
+ }) &&
none_of(Mask, [&](int I) { return I >= MSz; }) &&
- ShuffleVectorInst::isIdentityMask(Mask));
+ ShuffleVectorInst::isIdentityMask(Mask, MSz));
bool EnoughConstsForShuffle =
IsSingleShuffle &&
(none_of(GatheredScalars,
@@ -10064,7 +10973,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
if (!all_of(GatheredScalars, PoisonValue::classof)) {
SmallVector<int> BVMask(GatheredScalars.size(), PoisonMaskElem);
TryPackScalars(GatheredScalars, BVMask, /*IsRootPoison=*/true);
- Value *BV = ShuffleBuilder.gather(GatheredScalars);
+ Value *BV = ShuffleBuilder.gather(GatheredScalars, BVMask.size());
ShuffleBuilder.add(BV, BVMask);
}
if (all_of(NonConstants, [=](Value *V) {
@@ -10078,13 +10987,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
E->ReuseShuffleIndices, E->Scalars.size(),
[&](Value *&Vec, SmallVectorImpl<int> &Mask) {
TryPackScalars(NonConstants, Mask, /*IsRootPoison=*/false);
- Vec = ShuffleBuilder.gather(NonConstants, Vec);
+ Vec = ShuffleBuilder.gather(NonConstants, Mask.size(), Vec);
});
} else if (!allConstant(GatheredScalars)) {
// Gather unique scalars and all constants.
SmallVector<int> ReuseMask(GatheredScalars.size(), PoisonMaskElem);
TryPackScalars(GatheredScalars, ReuseMask, /*IsRootPoison=*/true);
- Value *BV = ShuffleBuilder.gather(GatheredScalars);
+ Value *BV = ShuffleBuilder.gather(GatheredScalars, ReuseMask.size());
ShuffleBuilder.add(BV, ReuseMask);
Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
} else {
@@ -10109,10 +11018,12 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
*this);
}
-Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
+Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
IRBuilder<>::InsertPointGuard Guard(Builder);
- if (E->VectorizedValue) {
+ if (E->VectorizedValue &&
+ (E->State != TreeEntry::Vectorize || E->getOpcode() != Instruction::PHI ||
+ E->isAltShuffle())) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
return E->VectorizedValue;
}
@@ -10126,13 +11037,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return Vec;
}
- auto FinalShuffle = [&](Value *V, const TreeEntry *E) {
+ auto FinalShuffle = [&](Value *V, const TreeEntry *E, VectorType *VecTy,
+ bool IsSigned) {
+ if (V->getType() != VecTy)
+ V = Builder.CreateIntCast(V, VecTy, IsSigned);
ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
if (E->getOpcode() == Instruction::Store) {
ArrayRef<int> Mask =
ArrayRef(reinterpret_cast<const int *>(E->ReorderIndices.begin()),
E->ReorderIndices.size());
ShuffleBuilder.add(V, Mask);
+ } else if (E->State == TreeEntry::PossibleStridedVectorize) {
+ ShuffleBuilder.addOrdered(V, std::nullopt);
} else {
ShuffleBuilder.addOrdered(V, E->ReorderIndices);
}
@@ -10140,7 +11056,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
};
assert((E->State == TreeEntry::Vectorize ||
- E->State == TreeEntry::ScatterVectorize) &&
+ E->State == TreeEntry::ScatterVectorize ||
+ E->State == TreeEntry::PossibleStridedVectorize) &&
"Unhandled state");
unsigned ShuffleOrOp =
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
@@ -10150,6 +11067,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ScalarTy = Store->getValueOperand()->getType();
else if (auto *IE = dyn_cast<InsertElementInst>(VL0))
ScalarTy = IE->getOperand(1)->getType();
+ bool IsSigned = false;
+ auto It = MinBWs.find(E);
+ if (It != MinBWs.end()) {
+ ScalarTy = IntegerType::get(F->getContext(), It->second.first);
+ IsSigned = It->second.second;
+ }
auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
switch (ShuffleOrOp) {
case Instruction::PHI: {
@@ -10157,32 +11080,45 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E != VectorizableTree.front().get() ||
!E->UserTreeIndices.empty()) &&
"PHI reordering is free.");
+ if (PostponedPHIs && E->VectorizedValue)
+ return E->VectorizedValue;
auto *PH = cast<PHINode>(VL0);
- Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
- Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
- Value *V = NewPhi;
-
- // Adjust insertion point once all PHI's have been generated.
- Builder.SetInsertPoint(&*PH->getParent()->getFirstInsertionPt());
+ Builder.SetInsertPoint(PH->getParent(),
+ PH->getParent()->getFirstNonPHIIt());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
+ if (PostponedPHIs || !E->VectorizedValue) {
+ PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
+ E->PHI = NewPhi;
+ Value *V = NewPhi;
+
+ // Adjust insertion point once all PHI's have been generated.
+ Builder.SetInsertPoint(PH->getParent(),
+ PH->getParent()->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
- E->VectorizedValue = V;
+ E->VectorizedValue = V;
+ if (PostponedPHIs)
+ return V;
+ }
+ PHINode *NewPhi = cast<PHINode>(E->PHI);
+ // If phi node is fully emitted - exit.
+ if (NewPhi->getNumIncomingValues() != 0)
+ return NewPhi;
// PHINodes may have multiple entries from the same block. We want to
// visit every block once.
SmallPtrSet<BasicBlock *, 4> VisitedBBs;
- for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
+ for (unsigned I : seq<unsigned>(0, PH->getNumIncomingValues())) {
ValueList Operands;
- BasicBlock *IBB = PH->getIncomingBlock(i);
+ BasicBlock *IBB = PH->getIncomingBlock(I);
// Stop emission if all incoming values are generated.
if (NewPhi->getNumIncomingValues() == PH->getNumIncomingValues()) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
- return V;
+ return NewPhi;
}
if (!VisitedBBs.insert(IBB).second) {
@@ -10192,37 +11128,54 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- Value *Vec = vectorizeOperand(E, i);
+ Value *Vec = vectorizeOperand(E, I, /*PostponedPHIs=*/true);
+ if (VecTy != Vec->getType()) {
+ assert(MinBWs.contains(getOperandEntry(E, I)) &&
+ "Expected item in MinBWs.");
+ Vec = Builder.CreateIntCast(Vec, VecTy, It->second.second);
+ }
NewPhi->addIncoming(Vec, IBB);
}
assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
"Invalid number of incoming values");
- return V;
+ return NewPhi;
}
case Instruction::ExtractElement: {
Value *V = E->getSingleOperand(0);
setInsertPointAfterBundle(E);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
return V;
}
case Instruction::ExtractValue: {
auto *LI = cast<LoadInst>(E->getSingleOperand(0));
Builder.SetInsertPoint(LI);
- auto *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
- Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
+ Value *Ptr = LI->getPointerOperand();
LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
Value *NewV = propagateMetadata(V, E->Scalars);
- NewV = FinalShuffle(NewV, E);
+ NewV = FinalShuffle(NewV, E, VecTy, IsSigned);
E->VectorizedValue = NewV;
return NewV;
}
case Instruction::InsertElement: {
assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique");
Builder.SetInsertPoint(cast<Instruction>(E->Scalars.back()));
- Value *V = vectorizeOperand(E, 1);
+ Value *V = vectorizeOperand(E, 1, PostponedPHIs);
+ ArrayRef<Value *> Op = E->getOperand(1);
+ Type *ScalarTy = Op.front()->getType();
+ if (cast<VectorType>(V->getType())->getElementType() != ScalarTy) {
+ assert(ScalarTy->isIntegerTy() && "Expected item in MinBWs.");
+ std::pair<unsigned, bool> Res = MinBWs.lookup(getOperandEntry(E, 1));
+ assert(Res.first > 0 && "Expected item in MinBWs.");
+ V = Builder.CreateIntCast(
+ V,
+ FixedVectorType::get(
+ ScalarTy,
+ cast<FixedVectorType>(V->getType())->getNumElements()),
+ Res.second);
+ }
// Create InsertVector shuffle if necessary
auto *FirstInsert = cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
@@ -10255,7 +11208,57 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Mask[InsertIdx - Offset] = I;
}
if (!IsIdentity || NumElts != NumScalars) {
- V = Builder.CreateShuffleVector(V, Mask);
+ Value *V2 = nullptr;
+ bool IsVNonPoisonous = isGuaranteedNotToBePoison(V) && !isConstant(V);
+ SmallVector<int> InsertMask(Mask);
+ if (NumElts != NumScalars && Offset == 0) {
+ // Follow all insert element instructions from the current buildvector
+ // sequence.
+ InsertElementInst *Ins = cast<InsertElementInst>(VL0);
+ do {
+ std::optional<unsigned> InsertIdx = getInsertIndex(Ins);
+ if (!InsertIdx)
+ break;
+ if (InsertMask[*InsertIdx] == PoisonMaskElem)
+ InsertMask[*InsertIdx] = *InsertIdx;
+ if (!Ins->hasOneUse())
+ break;
+ Ins = dyn_cast_or_null<InsertElementInst>(
+ Ins->getUniqueUndroppableUser());
+ } while (Ins);
+ SmallBitVector UseMask =
+ buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask);
+ SmallBitVector IsFirstPoison =
+ isUndefVector<true>(FirstInsert->getOperand(0), UseMask);
+ SmallBitVector IsFirstUndef =
+ isUndefVector(FirstInsert->getOperand(0), UseMask);
+ if (!IsFirstPoison.all()) {
+ unsigned Idx = 0;
+ for (unsigned I = 0; I < NumElts; I++) {
+ if (InsertMask[I] == PoisonMaskElem && !IsFirstPoison.test(I) &&
+ IsFirstUndef.test(I)) {
+ if (IsVNonPoisonous) {
+ InsertMask[I] = I < NumScalars ? I : 0;
+ continue;
+ }
+ if (!V2)
+ V2 = UndefValue::get(V->getType());
+ if (Idx >= NumScalars)
+ Idx = NumScalars - 1;
+ InsertMask[I] = NumScalars + Idx;
+ ++Idx;
+ } else if (InsertMask[I] != PoisonMaskElem &&
+ Mask[I] == PoisonMaskElem) {
+ InsertMask[I] = PoisonMaskElem;
+ }
+ }
+ } else {
+ InsertMask = Mask;
+ }
+ }
+ if (!V2)
+ V2 = PoisonValue::get(V->getType());
+ V = Builder.CreateShuffleVector(V, V2, InsertMask);
if (auto *I = dyn_cast<Instruction>(V)) {
GatherShuffleExtractSeq.insert(I);
CSEBlocks.insert(I->getParent());
@@ -10274,15 +11277,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if ((!IsIdentity || Offset != 0 || !IsFirstUndef.all()) &&
NumElts != NumScalars) {
if (IsFirstUndef.all()) {
- if (!ShuffleVectorInst::isIdentityMask(InsertMask)) {
- SmallBitVector IsFirstPoison =
- isUndefVector<true>(FirstInsert->getOperand(0), UseMask);
- if (!IsFirstPoison.all()) {
- for (unsigned I = 0; I < NumElts; I++) {
- if (InsertMask[I] == PoisonMaskElem && !IsFirstPoison.test(I))
- InsertMask[I] = I + NumElts;
+ if (!ShuffleVectorInst::isIdentityMask(InsertMask, NumElts)) {
+ SmallBitVector IsFirstPoison =
+ isUndefVector<true>(FirstInsert->getOperand(0), UseMask);
+ if (!IsFirstPoison.all()) {
+ for (unsigned I = 0; I < NumElts; I++) {
+ if (InsertMask[I] == PoisonMaskElem && !IsFirstPoison.test(I))
+ InsertMask[I] = I + NumElts;
+ }
}
- }
V = Builder.CreateShuffleVector(
V,
IsFirstPoison.all() ? PoisonValue::get(V->getType())
@@ -10330,15 +11333,36 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::BitCast: {
setInsertPointAfterBundle(E);
- Value *InVec = vectorizeOperand(E, 0);
+ Value *InVec = vectorizeOperand(E, 0, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
auto *CI = cast<CastInst>(VL0);
- Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- V = FinalShuffle(V, E);
+ Instruction::CastOps VecOpcode = CI->getOpcode();
+ Type *SrcScalarTy = VL0->getOperand(0)->getType();
+ auto SrcIt = MinBWs.find(getOperandEntry(E, 0));
+ if (!ScalarTy->isFloatingPointTy() && !SrcScalarTy->isFloatingPointTy() &&
+ (SrcIt != MinBWs.end() || It != MinBWs.end())) {
+ // Check if the values are candidates to demote.
+ unsigned SrcBWSz = DL->getTypeSizeInBits(SrcScalarTy);
+ if (SrcIt != MinBWs.end())
+ SrcBWSz = SrcIt->second.first;
+ unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
+ if (BWSz == SrcBWSz) {
+ VecOpcode = Instruction::BitCast;
+ } else if (BWSz < SrcBWSz) {
+ VecOpcode = Instruction::Trunc;
+ } else if (It != MinBWs.end()) {
+ assert(BWSz > SrcBWSz && "Invalid cast!");
+ VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
+ }
+ }
+ Value *V = (VecOpcode != ShuffleOrOp && VecOpcode == Instruction::BitCast)
+ ? InVec
+ : Builder.CreateCast(VecOpcode, InVec, VecTy);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -10348,21 +11372,30 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::ICmp: {
setInsertPointAfterBundle(E);
- Value *L = vectorizeOperand(E, 0);
+ Value *L = vectorizeOperand(E, 0, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
- Value *R = vectorizeOperand(E, 1);
+ Value *R = vectorizeOperand(E, 1, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
+ if (L->getType() != R->getType()) {
+ assert((MinBWs.contains(getOperandEntry(E, 0)) ||
+ MinBWs.contains(getOperandEntry(E, 1))) &&
+ "Expected item in MinBWs.");
+ L = Builder.CreateIntCast(L, VecTy, IsSigned);
+ R = Builder.CreateIntCast(R, VecTy, IsSigned);
+ }
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V = Builder.CreateCmp(P0, L, R);
propagateIRFlags(V, E->Scalars, VL0);
- V = FinalShuffle(V, E);
+ // Do not cast for cmps.
+ VecTy = cast<FixedVectorType>(V->getType());
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -10371,24 +11404,31 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Select: {
setInsertPointAfterBundle(E);
- Value *Cond = vectorizeOperand(E, 0);
+ Value *Cond = vectorizeOperand(E, 0, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
- Value *True = vectorizeOperand(E, 1);
+ Value *True = vectorizeOperand(E, 1, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
- Value *False = vectorizeOperand(E, 2);
+ Value *False = vectorizeOperand(E, 2, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
+ if (True->getType() != False->getType()) {
+ assert((MinBWs.contains(getOperandEntry(E, 1)) ||
+ MinBWs.contains(getOperandEntry(E, 2))) &&
+ "Expected item in MinBWs.");
+ True = Builder.CreateIntCast(True, VecTy, IsSigned);
+ False = Builder.CreateIntCast(False, VecTy, IsSigned);
+ }
Value *V = Builder.CreateSelect(Cond, True, False);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -10397,7 +11437,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::FNeg: {
setInsertPointAfterBundle(E);
- Value *Op = vectorizeOperand(E, 0);
+ Value *Op = vectorizeOperand(E, 0, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -10410,7 +11450,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -10437,16 +11477,23 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Xor: {
setInsertPointAfterBundle(E);
- Value *LHS = vectorizeOperand(E, 0);
+ Value *LHS = vectorizeOperand(E, 0, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
- Value *RHS = vectorizeOperand(E, 1);
+ Value *RHS = vectorizeOperand(E, 1, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
+ if (LHS->getType() != RHS->getType()) {
+ assert((MinBWs.contains(getOperandEntry(E, 0)) ||
+ MinBWs.contains(getOperandEntry(E, 1))) &&
+ "Expected item in MinBWs.");
+ LHS = Builder.CreateIntCast(LHS, VecTy, IsSigned);
+ RHS = Builder.CreateIntCast(RHS, VecTy, IsSigned);
+ }
Value *V = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
@@ -10455,7 +11502,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -10476,14 +11523,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar so we add the new
// LoadInst to ExternalUses list to make sure that an extract will
// be generated in the future.
- if (TreeEntry *Entry = getTreeEntry(PO)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(PO);
- ExternalUses.emplace_back(PO, NewLI, FoundLane);
+ if (isa<Instruction>(PO)) {
+ if (TreeEntry *Entry = getTreeEntry(PO)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = Entry->findLaneForValue(PO);
+ ExternalUses.emplace_back(PO, NewLI, FoundLane);
+ }
}
} else {
- assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
- Value *VecPtr = vectorizeOperand(E, 0);
+ assert((E->State == TreeEntry::ScatterVectorize ||
+ E->State == TreeEntry::PossibleStridedVectorize) &&
+ "Unhandled state");
+ Value *VecPtr = vectorizeOperand(E, 0, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
@@ -10497,35 +11548,32 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = propagateMetadata(NewLI, E->Scalars);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::Store: {
auto *SI = cast<StoreInst>(VL0);
- unsigned AS = SI->getPointerAddressSpace();
setInsertPointAfterBundle(E);
- Value *VecValue = vectorizeOperand(E, 0);
- VecValue = FinalShuffle(VecValue, E);
+ Value *VecValue = vectorizeOperand(E, 0, PostponedPHIs);
+ VecValue = FinalShuffle(VecValue, E, VecTy, IsSigned);
- Value *ScalarPtr = SI->getPointerOperand();
- Value *VecPtr = Builder.CreateBitCast(
- ScalarPtr, VecValue->getType()->getPointerTo(AS));
+ Value *Ptr = SI->getPointerOperand();
StoreInst *ST =
- Builder.CreateAlignedStore(VecValue, VecPtr, SI->getAlign());
+ Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign());
- // The pointer operand uses an in-tree scalar, so add the new BitCast or
- // StoreInst to ExternalUses to make sure that an extract will be
- // generated in the future.
- if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
- ExternalUses.push_back(ExternalUser(
- ScalarPtr, ScalarPtr != VecPtr ? cast<User>(VecPtr) : ST,
- FoundLane));
+ // The pointer operand uses an in-tree scalar, so add the new StoreInst to
+ // ExternalUses to make sure that an extract will be generated in the
+ // future.
+ if (isa<Instruction>(Ptr)) {
+ if (TreeEntry *Entry = getTreeEntry(Ptr)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = Entry->findLaneForValue(Ptr);
+ ExternalUses.push_back(ExternalUser(Ptr, ST, FoundLane));
+ }
}
Value *V = propagateMetadata(ST, E->Scalars);
@@ -10538,7 +11586,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
auto *GEP0 = cast<GetElementPtrInst>(VL0);
setInsertPointAfterBundle(E);
- Value *Op0 = vectorizeOperand(E, 0);
+ Value *Op0 = vectorizeOperand(E, 0, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
@@ -10546,7 +11594,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
SmallVector<Value *> OpVecs;
for (int J = 1, N = GEP0->getNumOperands(); J < N; ++J) {
- Value *OpVec = vectorizeOperand(E, J);
+ Value *OpVec = vectorizeOperand(E, J, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
@@ -10564,7 +11612,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
V = propagateMetadata(I, GEPs);
}
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -10586,41 +11634,42 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
VecCallCosts.first <= VecCallCosts.second;
Value *ScalarArg = nullptr;
- std::vector<Value *> OpVecs;
+ SmallVector<Value *> OpVecs;
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
TysForDecl.push_back(
FixedVectorType::get(CI->getType(), E->Scalars.size()));
- for (int j = 0, e = CI->arg_size(); j < e; ++j) {
+ for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
// vectorized.
- if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, j)) {
+ if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, I)) {
CallInst *CEI = cast<CallInst>(VL0);
- ScalarArg = CEI->getArgOperand(j);
- OpVecs.push_back(CEI->getArgOperand(j));
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j))
+ ScalarArg = CEI->getArgOperand(I);
+ OpVecs.push_back(CEI->getArgOperand(I));
+ if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I))
TysForDecl.push_back(ScalarArg->getType());
continue;
}
- Value *OpVec = vectorizeOperand(E, j);
+ Value *OpVec = vectorizeOperand(E, I, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
- LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
+ LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j))
+ if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I))
TysForDecl.push_back(OpVec->getType());
}
Function *CF;
if (!UseIntrinsic) {
VFShape Shape =
- VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
- VecTy->getNumElements())),
+ VFShape::get(CI->getFunctionType(),
+ ElementCount::getFixed(
+ static_cast<unsigned>(VecTy->getNumElements())),
false /*HasGlobalPred*/);
CF = VFDatabase(*CI).getVectorizedFunction(Shape);
} else {
@@ -10634,7 +11683,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The scalar argument uses an in-tree scalar so we add the new vectorized
// call to ExternalUses list to make sure that an extract will be
// generated in the future.
- if (ScalarArg) {
+ if (isa_and_present<Instruction>(ScalarArg)) {
if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
@@ -10644,7 +11693,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
propagateIRFlags(V, E->Scalars, VL0);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -10662,20 +11711,27 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *LHS = nullptr, *RHS = nullptr;
if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) {
setInsertPointAfterBundle(E);
- LHS = vectorizeOperand(E, 0);
+ LHS = vectorizeOperand(E, 0, PostponedPHIs);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
- RHS = vectorizeOperand(E, 1);
+ RHS = vectorizeOperand(E, 1, PostponedPHIs);
} else {
setInsertPointAfterBundle(E);
- LHS = vectorizeOperand(E, 0);
+ LHS = vectorizeOperand(E, 0, PostponedPHIs);
}
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
+ if (LHS && RHS && LHS->getType() != RHS->getType()) {
+ assert((MinBWs.contains(getOperandEntry(E, 0)) ||
+ MinBWs.contains(getOperandEntry(E, 1))) &&
+ "Expected item in MinBWs.");
+ LHS = Builder.CreateIntCast(LHS, VecTy, IsSigned);
+ RHS = Builder.CreateIntCast(RHS, VecTy, IsSigned);
+ }
Value *V0, *V1;
if (Instruction::isBinaryOp(E->getOpcode())) {
@@ -10708,8 +11764,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// each vector operation.
ValueList OpScalars, AltScalars;
SmallVector<int> Mask;
- buildShuffleEntryMask(
- E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
+ E->buildAltOpShuffleMask(
[E, this](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
return isAlternateInstruction(I, E->getMainOp(), E->getAltOp(),
@@ -10727,6 +11782,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CSEBlocks.insert(I->getParent());
}
+ if (V->getType() != VecTy && !isa<CmpInst>(VL0))
+ V = Builder.CreateIntCast(
+ V, FixedVectorType::get(ScalarTy, E->getVectorFactor()), IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -10767,9 +11825,19 @@ Value *BoUpSLP::vectorizeTree(
// need to rebuild it.
EntryToLastInstruction.clear();
- Builder.SetInsertPoint(ReductionRoot ? ReductionRoot
- : &F->getEntryBlock().front());
- auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
+ if (ReductionRoot)
+ Builder.SetInsertPoint(ReductionRoot->getParent(),
+ ReductionRoot->getIterator());
+ else
+ Builder.SetInsertPoint(&F->getEntryBlock(), F->getEntryBlock().begin());
+
+ // Postpone emission of PHIs operands to avoid cyclic dependencies issues.
+ (void)vectorizeTree(VectorizableTree[0].get(), /*PostponedPHIs=*/true);
+ for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree)
+ if (TE->State == TreeEntry::Vectorize &&
+ TE->getOpcode() == Instruction::PHI && !TE->isAltShuffle() &&
+ TE->VectorizedValue)
+ (void)vectorizeTree(TE.get(), /*PostponedPHIs=*/false);
// Run through the list of postponed gathers and emit them, replacing the temp
// emitted allocas with actual vector instructions.
ArrayRef<const TreeEntry *> PostponedNodes = PostponedGathers.getArrayRef();
@@ -10786,9 +11854,32 @@ Value *BoUpSLP::vectorizeTree(
TE->VectorizedValue = nullptr;
auto *UserI =
cast<Instruction>(TE->UserTreeIndices.front().UserTE->VectorizedValue);
- Builder.SetInsertPoint(PrevVec);
+ // If user is a PHI node, its vector code have to be inserted right before
+ // block terminator. Since the node was delayed, there were some unresolved
+ // dependencies at the moment when stab instruction was emitted. In a case
+ // when any of these dependencies turn out an operand of another PHI, coming
+ // from this same block, position of a stab instruction will become invalid.
+ // The is because source vector that supposed to feed this gather node was
+ // inserted at the end of the block [after stab instruction]. So we need
+ // to adjust insertion point again to the end of block.
+ if (isa<PHINode>(UserI)) {
+ // Insert before all users.
+ Instruction *InsertPt = PrevVec->getParent()->getTerminator();
+ for (User *U : PrevVec->users()) {
+ if (U == UserI)
+ continue;
+ auto *UI = dyn_cast<Instruction>(U);
+ if (!UI || isa<PHINode>(UI) || UI->getParent() != InsertPt->getParent())
+ continue;
+ if (UI->comesBefore(InsertPt))
+ InsertPt = UI;
+ }
+ Builder.SetInsertPoint(InsertPt);
+ } else {
+ Builder.SetInsertPoint(PrevVec);
+ }
Builder.SetCurrentDebugLocation(UserI->getDebugLoc());
- Value *Vec = vectorizeTree(TE);
+ Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false);
PrevVec->replaceAllUsesWith(Vec);
PostponedValues.try_emplace(Vec).first->second.push_back(TE);
// Replace the stub vector node, if it was used before for one of the
@@ -10801,26 +11892,6 @@ Value *BoUpSLP::vectorizeTree(
eraseInstruction(PrevVec);
}
- // If the vectorized tree can be rewritten in a smaller type, we truncate the
- // vectorized root. InstCombine will then rewrite the entire expression. We
- // sign extend the extracted values below.
- auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
- if (MinBWs.count(ScalarRoot)) {
- if (auto *I = dyn_cast<Instruction>(VectorRoot)) {
- // If current instr is a phi and not the last phi, insert it after the
- // last phi node.
- if (isa<PHINode>(I))
- Builder.SetInsertPoint(&*I->getParent()->getFirstInsertionPt());
- else
- Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
- }
- auto BundleWidth = VectorizableTree[0]->Scalars.size();
- auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
- auto *VecTy = FixedVectorType::get(MinTy, BundleWidth);
- auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
- VectorizableTree[0]->VectorizedValue = Trunc;
- }
-
LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
<< " values .\n");
@@ -10830,6 +11901,8 @@ Value *BoUpSLP::vectorizeTree(
// Maps extract Scalar to the corresponding extractelement instruction in the
// basic block. Only one extractelement per block should be emitted.
DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs;
+ SmallDenseSet<Value *, 4> UsedInserts;
+ DenseMap<Value *, Value *> VectorCasts;
// Extract all of the elements with the external uses.
for (const auto &ExternalUse : ExternalUses) {
Value *Scalar = ExternalUse.Scalar;
@@ -10864,7 +11937,8 @@ Value *BoUpSLP::vectorizeTree(
Instruction *I = EEIt->second;
if (Builder.GetInsertPoint() != Builder.GetInsertBlock()->end() &&
Builder.GetInsertPoint()->comesBefore(I))
- I->moveBefore(&*Builder.GetInsertPoint());
+ I->moveBefore(*Builder.GetInsertPoint()->getParent(),
+ Builder.GetInsertPoint());
Ex = I;
}
}
@@ -10887,11 +11961,10 @@ Value *BoUpSLP::vectorizeTree(
}
// If necessary, sign-extend or zero-extend ScalarRoot
// to the larger type.
- if (!MinBWs.count(ScalarRoot))
- return Ex;
- if (MinBWs[ScalarRoot].second)
- return Builder.CreateSExt(Ex, Scalar->getType());
- return Builder.CreateZExt(Ex, Scalar->getType());
+ if (Scalar->getType() != Ex->getType())
+ return Builder.CreateIntCast(Ex, Scalar->getType(),
+ MinBWs.find(E)->second.second);
+ return Ex;
}
assert(isa<FixedVectorType>(Scalar->getType()) &&
isa<InsertElementInst>(Scalar) &&
@@ -10909,12 +11982,13 @@ Value *BoUpSLP::vectorizeTree(
"ExternallyUsedValues map");
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
if (auto *PHI = dyn_cast<PHINode>(VecI))
- Builder.SetInsertPoint(PHI->getParent()->getFirstNonPHI());
+ Builder.SetInsertPoint(PHI->getParent(),
+ PHI->getParent()->getFirstNonPHIIt());
else
Builder.SetInsertPoint(VecI->getParent(),
std::next(VecI->getIterator()));
} else {
- Builder.SetInsertPoint(&F->getEntryBlock().front());
+ Builder.SetInsertPoint(&F->getEntryBlock(), F->getEntryBlock().begin());
}
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
// Required to update internally referenced instructions.
@@ -10927,12 +12001,26 @@ Value *BoUpSLP::vectorizeTree(
// Skip if the scalar is another vector op or Vec is not an instruction.
if (!Scalar->getType()->isVectorTy() && isa<Instruction>(Vec)) {
if (auto *FTy = dyn_cast<FixedVectorType>(User->getType())) {
+ if (!UsedInserts.insert(VU).second)
+ continue;
+ // Need to use original vector, if the root is truncated.
+ auto BWIt = MinBWs.find(E);
+ if (BWIt != MinBWs.end() && Vec->getType() != VU->getType()) {
+ auto VecIt = VectorCasts.find(Scalar);
+ if (VecIt == VectorCasts.end()) {
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ if (auto *IVec = dyn_cast<Instruction>(Vec))
+ Builder.SetInsertPoint(IVec->getNextNonDebugInstruction());
+ Vec = Builder.CreateIntCast(Vec, VU->getType(),
+ BWIt->second.second);
+ VectorCasts.try_emplace(Scalar, Vec);
+ } else {
+ Vec = VecIt->second;
+ }
+ }
+
std::optional<unsigned> InsertIdx = getInsertIndex(VU);
if (InsertIdx) {
- // Need to use original vector, if the root is truncated.
- if (MinBWs.count(Scalar) &&
- VectorizableTree[0]->VectorizedValue == Vec)
- Vec = VectorRoot;
auto *It =
find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
// Checks if 2 insertelements are from the same buildvector.
@@ -10992,18 +12080,18 @@ Value *BoUpSLP::vectorizeTree(
// Find the insertion point for the extractelement lane.
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
if (PHINode *PH = dyn_cast<PHINode>(User)) {
- for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
- if (PH->getIncomingValue(i) == Scalar) {
+ for (unsigned I : seq<unsigned>(0, PH->getNumIncomingValues())) {
+ if (PH->getIncomingValue(I) == Scalar) {
Instruction *IncomingTerminator =
- PH->getIncomingBlock(i)->getTerminator();
+ PH->getIncomingBlock(I)->getTerminator();
if (isa<CatchSwitchInst>(IncomingTerminator)) {
Builder.SetInsertPoint(VecI->getParent(),
std::next(VecI->getIterator()));
} else {
- Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
+ Builder.SetInsertPoint(PH->getIncomingBlock(I)->getTerminator());
}
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
- PH->setOperand(i, NewInst);
+ PH->setOperand(I, NewInst);
}
}
} else {
@@ -11012,7 +12100,7 @@ Value *BoUpSLP::vectorizeTree(
User->replaceUsesOfWith(Scalar, NewInst);
}
} else {
- Builder.SetInsertPoint(&F->getEntryBlock().front());
+ Builder.SetInsertPoint(&F->getEntryBlock(), F->getEntryBlock().begin());
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
User->replaceUsesOfWith(Scalar, NewInst);
}
@@ -11085,7 +12173,7 @@ Value *BoUpSLP::vectorizeTree(
// non-resizing mask.
if (Mask.size() != cast<FixedVectorType>(Vals.front()->getType())
->getNumElements() ||
- !ShuffleVectorInst::isIdentityMask(Mask))
+ !ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
return CreateShuffle(Vals.front(), nullptr, Mask);
return Vals.front();
}
@@ -11676,7 +12764,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
}
}
- auto makeControlDependent = [&](Instruction *I) {
+ auto MakeControlDependent = [&](Instruction *I) {
auto *DepDest = getScheduleData(I);
assert(DepDest && "must be in schedule window");
DepDest->ControlDependencies.push_back(BundleMember);
@@ -11698,7 +12786,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
continue;
// Add the dependency
- makeControlDependent(I);
+ MakeControlDependent(I);
if (!isGuaranteedToTransferExecutionToSuccessor(I))
// Everything past here must be control dependent on I.
@@ -11724,7 +12812,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
continue;
// Add the dependency
- makeControlDependent(I);
+ MakeControlDependent(I);
}
}
@@ -11742,7 +12830,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
continue;
// Add the dependency
- makeControlDependent(I);
+ MakeControlDependent(I);
break;
}
}
@@ -11757,7 +12845,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
"NextLoadStore list for non memory effecting bundle?");
MemoryLocation SrcLoc = getLocation(SrcInst);
bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
- unsigned numAliased = 0;
+ unsigned NumAliased = 0;
unsigned DistToSrc = 1;
for (; DepDest; DepDest = DepDest->NextLoadStore) {
@@ -11772,13 +12860,13 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
// check this limit even between two read-only instructions.
if (DistToSrc >= MaxMemDepDistance ||
((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
- (numAliased >= AliasedCheckLimit ||
+ (NumAliased >= AliasedCheckLimit ||
SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
// We increment the counter only if the locations are aliased
// (instead of counting all alias checks). This gives a better
// balance between reduced runtime and accurate dependencies.
- numAliased++;
+ NumAliased++;
DepDest->MemoryDependencies.push_back(BundleMember);
BundleMember->Dependencies++;
@@ -11880,20 +12968,20 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
// Do the "real" scheduling.
while (!ReadyInsts.empty()) {
- ScheduleData *picked = *ReadyInsts.begin();
+ ScheduleData *Picked = *ReadyInsts.begin();
ReadyInsts.erase(ReadyInsts.begin());
// Move the scheduled instruction(s) to their dedicated places, if not
// there yet.
- for (ScheduleData *BundleMember = picked; BundleMember;
+ for (ScheduleData *BundleMember = Picked; BundleMember;
BundleMember = BundleMember->NextInBundle) {
- Instruction *pickedInst = BundleMember->Inst;
- if (pickedInst->getNextNode() != LastScheduledInst)
- pickedInst->moveBefore(LastScheduledInst);
- LastScheduledInst = pickedInst;
+ Instruction *PickedInst = BundleMember->Inst;
+ if (PickedInst->getNextNode() != LastScheduledInst)
+ PickedInst->moveBefore(LastScheduledInst);
+ LastScheduledInst = PickedInst;
}
- BS->schedule(picked, ReadyInsts);
+ BS->schedule(Picked, ReadyInsts);
}
// Check that we didn't break any of our invariants.
@@ -11994,21 +13082,22 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
// Determine if a value V in a vectorizable expression Expr can be demoted to a
// smaller type with a truncation. We collect the values that will be demoted
// in ToDemote and additional roots that require investigating in Roots.
-static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
- SmallVectorImpl<Value *> &ToDemote,
- SmallVectorImpl<Value *> &Roots) {
+bool BoUpSLP::collectValuesToDemote(
+ Value *V, SmallVectorImpl<Value *> &ToDemote,
+ DenseMap<Instruction *, SmallVector<unsigned>> &DemotedConsts,
+ SmallVectorImpl<Value *> &Roots, DenseSet<Value *> &Visited) const {
// We can always demote constants.
- if (isa<Constant>(V)) {
- ToDemote.push_back(V);
+ if (isa<Constant>(V))
return true;
- }
- // If the value is not an instruction in the expression with only one use, it
- // cannot be demoted.
+ // If the value is not a vectorized instruction in the expression with only
+ // one use, it cannot be demoted.
auto *I = dyn_cast<Instruction>(V);
- if (!I || !I->hasOneUse() || !Expr.count(I))
+ if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second)
return false;
+ unsigned Start = 0;
+ unsigned End = I->getNumOperands();
switch (I->getOpcode()) {
// We can always demote truncations and extensions. Since truncations can
@@ -12030,16 +13119,21 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) ||
- !collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots))
+ if (!collectValuesToDemote(I->getOperand(0), ToDemote, DemotedConsts, Roots,
+ Visited) ||
+ !collectValuesToDemote(I->getOperand(1), ToDemote, DemotedConsts, Roots,
+ Visited))
return false;
break;
// We can demote selects if we can demote their true and false values.
case Instruction::Select: {
+ Start = 1;
SelectInst *SI = cast<SelectInst>(I);
- if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) ||
- !collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots))
+ if (!collectValuesToDemote(SI->getTrueValue(), ToDemote, DemotedConsts,
+ Roots, Visited) ||
+ !collectValuesToDemote(SI->getFalseValue(), ToDemote, DemotedConsts,
+ Roots, Visited))
return false;
break;
}
@@ -12049,7 +13143,8 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots))
+ if (!collectValuesToDemote(IncValue, ToDemote, DemotedConsts, Roots,
+ Visited))
return false;
break;
}
@@ -12059,6 +13154,10 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
return false;
}
+ // Gather demoted constant operands.
+ for (unsigned Idx : seq<unsigned>(Start, End))
+ if (isa<Constant>(I->getOperand(Idx)))
+ DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx);
// Record the value that we can demote.
ToDemote.push_back(V);
return true;
@@ -12076,44 +13175,26 @@ void BoUpSLP::computeMinimumValueSizes() {
if (!TreeRootIT)
return;
- // If the expression is not rooted by a store, these roots should have
- // external uses. We will rely on InstCombine to rewrite the expression in
- // the narrower type. However, InstCombine only rewrites single-use values.
- // This means that if a tree entry other than a root is used externally, it
- // must have multiple uses and InstCombine will not rewrite it. The code
- // below ensures that only the roots are used externally.
- SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end());
- for (auto &EU : ExternalUses)
- if (!Expr.erase(EU.Scalar))
- return;
- if (!Expr.empty())
+ // Ensure the roots of the vectorizable tree don't form a cycle.
+ if (!VectorizableTree.front()->UserTreeIndices.empty())
return;
- // Collect the scalar values of the vectorizable expression. We will use this
- // context to determine which values can be demoted. If we see a truncation,
- // we mark it as seeding another demotion.
- for (auto &EntryPtr : VectorizableTree)
- Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
-
- // Ensure the roots of the vectorizable tree don't form a cycle. They must
- // have a single external user that is not in the vectorizable tree.
- for (auto *Root : TreeRoot)
- if (!Root->hasOneUse() || Expr.count(*Root->user_begin()))
- return;
-
// Conservatively determine if we can actually truncate the roots of the
// expression. Collect the values that can be demoted in ToDemote and
// additional roots that require investigating in Roots.
SmallVector<Value *, 32> ToDemote;
+ DenseMap<Instruction *, SmallVector<unsigned>> DemotedConsts;
SmallVector<Value *, 4> Roots;
- for (auto *Root : TreeRoot)
- if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
+ for (auto *Root : TreeRoot) {
+ DenseSet<Value *> Visited;
+ if (!collectValuesToDemote(Root, ToDemote, DemotedConsts, Roots, Visited))
return;
+ }
// The maximum bit width required to represent all the values that can be
// demoted without loss of precision. It would be safe to truncate the roots
// of the expression to this width.
- auto MaxBitWidth = 8u;
+ auto MaxBitWidth = 1u;
// We first check if all the bits of the roots are demanded. If they're not,
// we can truncate the roots to this narrower type.
@@ -12138,9 +13219,9 @@ void BoUpSLP::computeMinimumValueSizes() {
// maximum bit width required to store the scalar by using ValueTracking to
// compute the number of high-order bits we can truncate.
if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
- llvm::all_of(TreeRoot, [](Value *R) {
- assert(R->hasOneUse() && "Root should have only one use!");
- return isa<GetElementPtrInst>(R->user_back());
+ all_of(TreeRoot, [](Value *V) {
+ return all_of(V->users(),
+ [](User *U) { return isa<GetElementPtrInst>(U); });
})) {
MaxBitWidth = 8u;
@@ -12189,12 +13270,39 @@ void BoUpSLP::computeMinimumValueSizes() {
// If we can truncate the root, we must collect additional values that might
// be demoted as a result. That is, those seeded by truncations we will
// modify.
- while (!Roots.empty())
- collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
+ while (!Roots.empty()) {
+ DenseSet<Value *> Visited;
+ collectValuesToDemote(Roots.pop_back_val(), ToDemote, DemotedConsts, Roots,
+ Visited);
+ }
// Finally, map the values we can demote to the maximum bit with we computed.
- for (auto *Scalar : ToDemote)
- MinBWs[Scalar] = std::make_pair(MaxBitWidth, !IsKnownPositive);
+ for (auto *Scalar : ToDemote) {
+ auto *TE = getTreeEntry(Scalar);
+ assert(TE && "Expected vectorized scalar.");
+ if (MinBWs.contains(TE))
+ continue;
+ bool IsSigned = any_of(TE->Scalars, [&](Value *R) {
+ KnownBits Known = computeKnownBits(R, *DL);
+ return !Known.isNonNegative();
+ });
+ MinBWs.try_emplace(TE, MaxBitWidth, IsSigned);
+ const auto *I = cast<Instruction>(Scalar);
+ auto DCIt = DemotedConsts.find(I);
+ if (DCIt != DemotedConsts.end()) {
+ for (unsigned Idx : DCIt->getSecond()) {
+ // Check that all instructions operands are demoted.
+ if (all_of(TE->Scalars, [&](Value *V) {
+ auto SIt = DemotedConsts.find(cast<Instruction>(V));
+ return SIt != DemotedConsts.end() &&
+ is_contained(SIt->getSecond(), Idx);
+ })) {
+ const TreeEntry *CTE = getOperandEntry(TE, Idx);
+ MinBWs.try_emplace(CTE, MaxBitWidth, IsSigned);
+ }
+ }
+ }
+ }
}
PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
@@ -12348,139 +13456,206 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
BoUpSLP::ValueSet VectorizedStores;
bool Changed = false;
- int E = Stores.size();
- SmallBitVector Tails(E, false);
- int MaxIter = MaxStoreLookup.getValue();
- SmallVector<std::pair<int, int>, 16> ConsecutiveChain(
- E, std::make_pair(E, INT_MAX));
- SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E, false));
- int IterCnt;
- auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter,
- &CheckedPairs,
- &ConsecutiveChain](int K, int Idx) {
- if (IterCnt >= MaxIter)
- return true;
- if (CheckedPairs[Idx].test(K))
- return ConsecutiveChain[K].second == 1 &&
- ConsecutiveChain[K].first == Idx;
- ++IterCnt;
- CheckedPairs[Idx].set(K);
- CheckedPairs[K].set(Idx);
- std::optional<int> Diff = getPointersDiff(
- Stores[K]->getValueOperand()->getType(), Stores[K]->getPointerOperand(),
- Stores[Idx]->getValueOperand()->getType(),
- Stores[Idx]->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true);
- if (!Diff || *Diff == 0)
- return false;
- int Val = *Diff;
- if (Val < 0) {
- if (ConsecutiveChain[Idx].second > -Val) {
- Tails.set(K);
- ConsecutiveChain[Idx] = std::make_pair(K, -Val);
- }
- return false;
+ // Stores the pair of stores (first_store, last_store) in a range, that were
+ // already tried to be vectorized. Allows to skip the store ranges that were
+ // already tried to be vectorized but the attempts were unsuccessful.
+ DenseSet<std::pair<Value *, Value *>> TriedSequences;
+ struct StoreDistCompare {
+ bool operator()(const std::pair<unsigned, int> &Op1,
+ const std::pair<unsigned, int> &Op2) const {
+ return Op1.second < Op2.second;
}
- if (ConsecutiveChain[K].second <= Val)
- return false;
-
- Tails.set(Idx);
- ConsecutiveChain[K] = std::make_pair(Idx, Val);
- return Val == 1;
};
- // Do a quadratic search on all of the given stores in reverse order and find
- // all of the pairs of stores that follow each other.
- for (int Idx = E - 1; Idx >= 0; --Idx) {
- // If a store has multiple consecutive store candidates, search according
- // to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
- // This is because usually pairing with immediate succeeding or preceding
- // candidate create the best chance to find slp vectorization opportunity.
- const int MaxLookDepth = std::max(E - Idx, Idx + 1);
- IterCnt = 0;
- for (int Offset = 1, F = MaxLookDepth; Offset < F; ++Offset)
- if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) ||
- (Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx)))
- break;
- }
-
- // Tracks if we tried to vectorize stores starting from the given tail
- // already.
- SmallBitVector TriedTails(E, false);
- // For stores that start but don't end a link in the chain:
- for (int Cnt = E; Cnt > 0; --Cnt) {
- int I = Cnt - 1;
- if (ConsecutiveChain[I].first == E || Tails.test(I))
- continue;
- // We found a store instr that starts a chain. Now follow the chain and try
- // to vectorize it.
+ // A set of pairs (index of store in Stores array ref, Distance of the store
+ // address relative to base store address in units).
+ using StoreIndexToDistSet =
+ std::set<std::pair<unsigned, int>, StoreDistCompare>;
+ auto TryToVectorize = [&](const StoreIndexToDistSet &Set) {
+ int PrevDist = -1;
BoUpSLP::ValueList Operands;
// Collect the chain into a list.
- while (I != E && !VectorizedStores.count(Stores[I])) {
- Operands.push_back(Stores[I]);
- Tails.set(I);
- if (ConsecutiveChain[I].second != 1) {
- // Mark the new end in the chain and go back, if required. It might be
- // required if the original stores come in reversed order, for example.
- if (ConsecutiveChain[I].first != E &&
- Tails.test(ConsecutiveChain[I].first) && !TriedTails.test(I) &&
- !VectorizedStores.count(Stores[ConsecutiveChain[I].first])) {
- TriedTails.set(I);
- Tails.reset(ConsecutiveChain[I].first);
- if (Cnt < ConsecutiveChain[I].first + 2)
- Cnt = ConsecutiveChain[I].first + 2;
+ for (auto [Idx, Data] : enumerate(Set)) {
+ if (Operands.empty() || Data.second - PrevDist == 1) {
+ Operands.push_back(Stores[Data.first]);
+ PrevDist = Data.second;
+ if (Idx != Set.size() - 1)
+ continue;
+ }
+ if (Operands.size() <= 1) {
+ Operands.clear();
+ Operands.push_back(Stores[Data.first]);
+ PrevDist = Data.second;
+ continue;
+ }
+
+ unsigned MaxVecRegSize = R.getMaxVecRegSize();
+ unsigned EltSize = R.getVectorElementSize(Operands[0]);
+ unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
+
+ unsigned MaxVF =
+ std::min(R.getMaximumVF(EltSize, Instruction::Store), MaxElts);
+ auto *Store = cast<StoreInst>(Operands[0]);
+ Type *StoreTy = Store->getValueOperand()->getType();
+ Type *ValueTy = StoreTy;
+ if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
+ ValueTy = Trunc->getSrcTy();
+ unsigned MinVF = TTI->getStoreMinimumVF(
+ R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy);
+
+ if (MaxVF <= MinVF) {
+ LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
+ << ") <= "
+ << "MinVF (" << MinVF << ")\n");
+ }
+
+ // FIXME: Is division-by-2 the correct step? Should we assert that the
+ // register size is a power-of-2?
+ unsigned StartIdx = 0;
+ for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
+ for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
+ ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
+ assert(
+ all_of(
+ Slice,
+ [&](Value *V) {
+ return cast<StoreInst>(V)->getValueOperand()->getType() ==
+ cast<StoreInst>(Slice.front())
+ ->getValueOperand()
+ ->getType();
+ }) &&
+ "Expected all operands of same type.");
+ if (!VectorizedStores.count(Slice.front()) &&
+ !VectorizedStores.count(Slice.back()) &&
+ TriedSequences.insert(std::make_pair(Slice.front(), Slice.back()))
+ .second &&
+ vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
+ // Mark the vectorized stores so that we don't vectorize them again.
+ VectorizedStores.insert(Slice.begin(), Slice.end());
+ Changed = true;
+ // If we vectorized initial block, no need to try to vectorize it
+ // again.
+ if (Cnt == StartIdx)
+ StartIdx += Size;
+ Cnt += Size;
+ continue;
+ }
+ ++Cnt;
}
- break;
+ // Check if the whole array was vectorized already - exit.
+ if (StartIdx >= Operands.size())
+ break;
}
- // Move to the next value in the chain.
- I = ConsecutiveChain[I].first;
+ Operands.clear();
+ Operands.push_back(Stores[Data.first]);
+ PrevDist = Data.second;
}
- assert(!Operands.empty() && "Expected non-empty list of stores.");
+ };
- unsigned MaxVecRegSize = R.getMaxVecRegSize();
- unsigned EltSize = R.getVectorElementSize(Operands[0]);
- unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
-
- unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
- MaxElts);
- auto *Store = cast<StoreInst>(Operands[0]);
- Type *StoreTy = Store->getValueOperand()->getType();
- Type *ValueTy = StoreTy;
- if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
- ValueTy = Trunc->getSrcTy();
- unsigned MinVF = TTI->getStoreMinimumVF(
- R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy);
-
- if (MaxVF <= MinVF) {
- LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF << ") <= "
- << "MinVF (" << MinVF << ")\n");
- }
-
- // FIXME: Is division-by-2 the correct step? Should we assert that the
- // register size is a power-of-2?
- unsigned StartIdx = 0;
- for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
- for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
- ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
- if (!VectorizedStores.count(Slice.front()) &&
- !VectorizedStores.count(Slice.back()) &&
- vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
- // Mark the vectorized stores so that we don't vectorize them again.
- VectorizedStores.insert(Slice.begin(), Slice.end());
- Changed = true;
- // If we vectorized initial block, no need to try to vectorize it
- // again.
- if (Cnt == StartIdx)
- StartIdx += Size;
- Cnt += Size;
- continue;
- }
- ++Cnt;
+ // Stores pair (first: index of the store into Stores array ref, address of
+ // which taken as base, second: sorted set of pairs {index, dist}, which are
+ // indices of stores in the set and their store location distances relative to
+ // the base address).
+
+ // Need to store the index of the very first store separately, since the set
+ // may be reordered after the insertion and the first store may be moved. This
+ // container allows to reduce number of calls of getPointersDiff() function.
+ SmallVector<std::pair<unsigned, StoreIndexToDistSet>> SortedStores;
+ // Inserts the specified store SI with the given index Idx to the set of the
+ // stores. If the store with the same distance is found already - stop
+ // insertion, try to vectorize already found stores. If some stores from this
+ // sequence were not vectorized - try to vectorize them with the new store
+ // later. But this logic is applied only to the stores, that come before the
+ // previous store with the same distance.
+ // Example:
+ // 1. store x, %p
+ // 2. store y, %p+1
+ // 3. store z, %p+2
+ // 4. store a, %p
+ // 5. store b, %p+3
+ // - Scan this from the last to first store. The very first bunch of stores is
+ // {5, {{4, -3}, {2, -2}, {3, -1}, {5, 0}}} (the element in SortedStores
+ // vector).
+ // - The next store in the list - #1 - has the same distance from store #5 as
+ // the store #4.
+ // - Try to vectorize sequence of stores 4,2,3,5.
+ // - If all these stores are vectorized - just drop them.
+ // - If some of them are not vectorized (say, #3 and #5), do extra analysis.
+ // - Start new stores sequence.
+ // The new bunch of stores is {1, {1, 0}}.
+ // - Add the stores from previous sequence, that were not vectorized.
+ // Here we consider the stores in the reversed order, rather they are used in
+ // the IR (Stores are reversed already, see vectorizeStoreChains() function).
+ // Store #3 can be added -> comes after store #4 with the same distance as
+ // store #1.
+ // Store #5 cannot be added - comes before store #4.
+ // This logic allows to improve the compile time, we assume that the stores
+ // after previous store with the same distance most likely have memory
+ // dependencies and no need to waste compile time to try to vectorize them.
+ // - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
+ auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
+ for (std::pair<unsigned, StoreIndexToDistSet> &Set : SortedStores) {
+ std::optional<int> Diff = getPointersDiff(
+ Stores[Set.first]->getValueOperand()->getType(),
+ Stores[Set.first]->getPointerOperand(),
+ SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
+ /*StrictCheck=*/true);
+ if (!Diff)
+ continue;
+ auto It = Set.second.find(std::make_pair(Idx, *Diff));
+ if (It == Set.second.end()) {
+ Set.second.emplace(Idx, *Diff);
+ return;
}
- // Check if the whole array was vectorized already - exit.
- if (StartIdx >= Operands.size())
- break;
+ // Try to vectorize the first found set to avoid duplicate analysis.
+ TryToVectorize(Set.second);
+ StoreIndexToDistSet PrevSet;
+ PrevSet.swap(Set.second);
+ Set.first = Idx;
+ Set.second.emplace(Idx, 0);
+ // Insert stores that followed previous match to try to vectorize them
+ // with this store.
+ unsigned StartIdx = It->first + 1;
+ SmallBitVector UsedStores(Idx - StartIdx);
+ // Distances to previously found dup store (or this store, since they
+ // store to the same addresses).
+ SmallVector<int> Dists(Idx - StartIdx, 0);
+ for (const std::pair<unsigned, int> &Pair : reverse(PrevSet)) {
+ // Do not try to vectorize sequences, we already tried.
+ if (Pair.first <= It->first ||
+ VectorizedStores.contains(Stores[Pair.first]))
+ break;
+ unsigned BI = Pair.first - StartIdx;
+ UsedStores.set(BI);
+ Dists[BI] = Pair.second - It->second;
+ }
+ for (unsigned I = StartIdx; I < Idx; ++I) {
+ unsigned BI = I - StartIdx;
+ if (UsedStores.test(BI))
+ Set.second.emplace(I, Dists[BI]);
+ }
+ return;
}
+ auto &Res = SortedStores.emplace_back();
+ Res.first = Idx;
+ Res.second.emplace(Idx, 0);
+ };
+ StoreInst *PrevStore = Stores.front();
+ for (auto [I, SI] : enumerate(Stores)) {
+ // Check that we do not try to vectorize stores of different types.
+ if (PrevStore->getValueOperand()->getType() !=
+ SI->getValueOperand()->getType()) {
+ for (auto &Set : SortedStores)
+ TryToVectorize(Set.second);
+ SortedStores.clear();
+ PrevStore = SI;
+ }
+ FillStoresSet(I, SI);
}
+ // Final vectorization attempt.
+ for (auto &Set : SortedStores)
+ TryToVectorize(Set.second);
+
return Changed;
}
@@ -12507,8 +13682,10 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
// constant index, or a pointer operand that doesn't point to a scalar
// type.
else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- auto Idx = GEP->idx_begin()->get();
- if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
+ if (GEP->getNumIndices() != 1)
+ continue;
+ Value *Idx = GEP->idx_begin()->get();
+ if (isa<Constant>(Idx))
continue;
if (!isValidElementType(Idx->getType()))
continue;
@@ -12542,8 +13719,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// NOTE: the following will give user internal llvm type name, which may
// not be useful.
R.getORE()->emit([&]() {
- std::string type_str;
- llvm::raw_string_ostream rso(type_str);
+ std::string TypeStr;
+ llvm::raw_string_ostream rso(TypeStr);
Ty->print(rso);
return OptimizationRemarkMissed(SV_NAME, "UnsupportedType", I0)
<< "Cannot SLP vectorize list: type "
@@ -12878,10 +14055,12 @@ class HorizontalReduction {
static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
Value *RHS, const Twine &Name,
const ReductionOpsListType &ReductionOps) {
- bool UseSelect = ReductionOps.size() == 2 ||
- // Logical or/and.
- (ReductionOps.size() == 1 &&
- isa<SelectInst>(ReductionOps.front().front()));
+ bool UseSelect =
+ ReductionOps.size() == 2 ||
+ // Logical or/and.
+ (ReductionOps.size() == 1 && any_of(ReductionOps.front(), [](Value *V) {
+ return isa<SelectInst>(V);
+ }));
assert((!UseSelect || ReductionOps.size() != 2 ||
isa<SelectInst>(ReductionOps[1][0])) &&
"Expected cmp + select pairs for reduction");
@@ -13315,12 +14494,26 @@ public:
// Update the final value in the reduction.
Builder.SetCurrentDebugLocation(
cast<Instruction>(ReductionOps.front().front())->getDebugLoc());
+ if ((isa<PoisonValue>(VectorizedTree) && !isa<PoisonValue>(Res)) ||
+ (isGuaranteedNotToBePoison(Res) &&
+ !isGuaranteedNotToBePoison(VectorizedTree))) {
+ auto It = ReducedValsToOps.find(Res);
+ if (It != ReducedValsToOps.end() &&
+ any_of(It->getSecond(),
+ [](Instruction *I) { return isBoolLogicOp(I); }))
+ std::swap(VectorizedTree, Res);
+ }
+
return createOp(Builder, RdxKind, VectorizedTree, Res, "op.rdx",
ReductionOps);
}
// Initialize the final value in the reduction.
return Res;
};
+ bool AnyBoolLogicOp =
+ any_of(ReductionOps.back(), [](Value *V) {
+ return isBoolLogicOp(cast<Instruction>(V));
+ });
// The reduction root is used as the insertion point for new instructions,
// so set it as externally used to prevent it from being deleted.
ExternallyUsedValues[ReductionRoot];
@@ -13364,10 +14557,12 @@ public:
// Check if the reduction value was not overriden by the extractelement
// instruction because of the vectorization and exclude it, if it is not
// compatible with other values.
- if (auto *Inst = dyn_cast<Instruction>(RdxVal))
- if (isVectorLikeInstWithConstOps(Inst) &&
- (!S.getOpcode() || !S.isOpcodeOrAlt(Inst)))
- continue;
+ // Also check if the instruction was folded to constant/other value.
+ auto *Inst = dyn_cast<Instruction>(RdxVal);
+ if ((Inst && isVectorLikeInstWithConstOps(Inst) &&
+ (!S.getOpcode() || !S.isOpcodeOrAlt(Inst))) ||
+ (S.getOpcode() && !Inst))
+ continue;
Candidates.push_back(RdxVal);
TrackedToOrig.try_emplace(RdxVal, OrigReducedVals[Cnt]);
}
@@ -13543,11 +14738,9 @@ public:
for (unsigned Cnt = 0, Sz = ReducedVals.size(); Cnt < Sz; ++Cnt) {
if (Cnt == I || (ShuffledExtracts && Cnt == I - 1))
continue;
- for_each(ReducedVals[Cnt],
- [&LocalExternallyUsedValues, &TrackedVals](Value *V) {
- if (isa<Instruction>(V))
- LocalExternallyUsedValues[TrackedVals[V]];
- });
+ for (Value *V : ReducedVals[Cnt])
+ if (isa<Instruction>(V))
+ LocalExternallyUsedValues[TrackedVals[V]];
}
if (!IsSupportedHorRdxIdentityOp) {
// Number of uses of the candidates in the vector of values.
@@ -13591,7 +14784,7 @@ public:
// Update LocalExternallyUsedValues for the scalar, replaced by
// extractelement instructions.
for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) {
- auto It = ExternallyUsedValues.find(Pair.first);
+ auto *It = ExternallyUsedValues.find(Pair.first);
if (It == ExternallyUsedValues.end())
continue;
LocalExternallyUsedValues[Pair.second].append(It->second);
@@ -13605,7 +14798,8 @@ public:
InstructionCost ReductionCost =
getReductionCost(TTI, VL, IsCmpSelMinMax, ReduxWidth, RdxFMF);
InstructionCost Cost = TreeCost + ReductionCost;
- LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for reduction\n");
+ LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost
+ << " for reduction\n");
if (!Cost.isValid())
return nullptr;
if (Cost >= -SLPCostThreshold) {
@@ -13652,7 +14846,9 @@ public:
// To prevent poison from leaking across what used to be sequential,
// safe, scalar boolean logic operations, the reduction operand must be
// frozen.
- if (isBoolLogicOp(RdxRootInst))
+ if ((isBoolLogicOp(RdxRootInst) ||
+ (AnyBoolLogicOp && VL.size() != TrackedVals.size())) &&
+ !isGuaranteedNotToBePoison(VectorizedRoot))
VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
// Emit code to correctly handle reused reduced values, if required.
@@ -13664,6 +14860,16 @@ public:
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
+ if (ReducedSubTree->getType() != VL.front()->getType()) {
+ ReducedSubTree = Builder.CreateIntCast(
+ ReducedSubTree, VL.front()->getType(), any_of(VL, [&](Value *R) {
+ KnownBits Known = computeKnownBits(
+ R, cast<Instruction>(ReductionOps.front().front())
+ ->getModule()
+ ->getDataLayout());
+ return !Known.isNonNegative();
+ }));
+ }
// Improved analysis for add/fadd/xor reductions with same scale factor
// for all operands of reductions. We can emit scalar ops for them
@@ -13716,31 +14922,33 @@ public:
// RedOp2 = select i1 ?, i1 RHS, i1 false
// Then, we must freeze LHS in the new op.
- auto &&FixBoolLogicalOps =
- [&Builder, VectorizedTree](Value *&LHS, Value *&RHS,
- Instruction *RedOp1, Instruction *RedOp2) {
- if (!isBoolLogicOp(RedOp1))
- return;
- if (LHS == VectorizedTree || getRdxOperand(RedOp1, 0) == LHS ||
- isGuaranteedNotToBePoison(LHS))
- return;
- if (!isBoolLogicOp(RedOp2))
- return;
- if (RHS == VectorizedTree || getRdxOperand(RedOp2, 0) == RHS ||
- isGuaranteedNotToBePoison(RHS)) {
- std::swap(LHS, RHS);
- return;
- }
- LHS = Builder.CreateFreeze(LHS);
- };
+ auto FixBoolLogicalOps = [&, VectorizedTree](Value *&LHS, Value *&RHS,
+ Instruction *RedOp1,
+ Instruction *RedOp2,
+ bool InitStep) {
+ if (!AnyBoolLogicOp)
+ return;
+ if (isBoolLogicOp(RedOp1) &&
+ ((!InitStep && LHS == VectorizedTree) ||
+ getRdxOperand(RedOp1, 0) == LHS || isGuaranteedNotToBePoison(LHS)))
+ return;
+ if (isBoolLogicOp(RedOp2) && ((!InitStep && RHS == VectorizedTree) ||
+ getRdxOperand(RedOp2, 0) == RHS ||
+ isGuaranteedNotToBePoison(RHS))) {
+ std::swap(LHS, RHS);
+ return;
+ }
+ if (LHS != VectorizedTree)
+ LHS = Builder.CreateFreeze(LHS);
+ };
// Finish the reduction.
// Need to add extra arguments and not vectorized possible reduction
// values.
// Try to avoid dependencies between the scalar remainders after
// reductions.
- auto &&FinalGen =
- [this, &Builder, &TrackedVals, &FixBoolLogicalOps](
- ArrayRef<std::pair<Instruction *, Value *>> InstVals) {
+ auto FinalGen =
+ [&](ArrayRef<std::pair<Instruction *, Value *>> InstVals,
+ bool InitStep) {
unsigned Sz = InstVals.size();
SmallVector<std::pair<Instruction *, Value *>> ExtraReds(Sz / 2 +
Sz % 2);
@@ -13761,7 +14969,7 @@ public:
// sequential, safe, scalar boolean logic operations, the
// reduction operand must be frozen.
FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first,
- RedOp);
+ RedOp, InitStep);
Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
StableRdxVal2, "op.rdx", ReductionOps);
ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
@@ -13791,11 +14999,13 @@ public:
ExtraReductions.emplace_back(I, Pair.first);
}
// Iterate through all not-vectorized reduction values/extra arguments.
+ bool InitStep = true;
while (ExtraReductions.size() > 1) {
VectorizedTree = ExtraReductions.front().second;
SmallVector<std::pair<Instruction *, Value *>> NewReds =
- FinalGen(ExtraReductions);
+ FinalGen(ExtraReductions, InitStep);
ExtraReductions.swap(NewReds);
+ InitStep = false;
}
VectorizedTree = ExtraReductions.front().second;
@@ -13842,8 +15052,7 @@ private:
bool IsCmpSelMinMax, unsigned ReduxWidth,
FastMathFlags FMF) {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- Value *FirstReducedVal = ReducedVals.front();
- Type *ScalarTy = FirstReducedVal->getType();
+ Type *ScalarTy = ReducedVals.front()->getType();
FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
InstructionCost VectorCost = 0, ScalarCost;
// If all of the reduced values are constant, the vector cost is 0, since
@@ -13917,7 +15126,7 @@ private:
}
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VectorCost - ScalarCost
- << " for reduction that starts with " << *FirstReducedVal
+ << " for reduction of " << shortBundleName(ReducedVals)
<< " (It is a splitting reduction)\n");
return VectorCost - ScalarCost;
}
@@ -13932,7 +15141,7 @@ private:
"A call to the llvm.fmuladd intrinsic is not handled yet");
++NumVectorInstructions;
- return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind);
+ return createSimpleTargetReduction(Builder, VectorizedValue, RdxKind);
}
/// Emits optimized code for unique scalar value reused \p Cnt times.
@@ -13979,8 +15188,8 @@ private:
case RecurKind::Mul:
case RecurKind::FMul:
case RecurKind::FMulAdd:
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
+ case RecurKind::IAnyOf:
+ case RecurKind::FAnyOf:
case RecurKind::None:
llvm_unreachable("Unexpected reduction kind for repeated scalar.");
}
@@ -14068,8 +15277,8 @@ private:
case RecurKind::Mul:
case RecurKind::FMul:
case RecurKind::FMulAdd:
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
+ case RecurKind::IAnyOf:
+ case RecurKind::FAnyOf:
case RecurKind::None:
llvm_unreachable("Unexpected reduction kind for reused scalars.");
}
@@ -14164,8 +15373,8 @@ static bool findBuildAggregate(Instruction *LastInsertInst,
InsertElts.resize(*AggregateSize);
findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts, 0);
- llvm::erase_value(BuildVectorOpds, nullptr);
- llvm::erase_value(InsertElts, nullptr);
+ llvm::erase(BuildVectorOpds, nullptr);
+ llvm::erase(InsertElts, nullptr);
if (BuildVectorOpds.size() >= 2)
return true;
@@ -14401,8 +15610,7 @@ bool SLPVectorizerPass::tryToVectorize(ArrayRef<WeakTrackingVH> Insts,
bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
BasicBlock *BB, BoUpSLP &R) {
- const DataLayout &DL = BB->getModule()->getDataLayout();
- if (!R.canMapToVector(IVI->getType(), DL))
+ if (!R.canMapToVector(IVI->getType()))
return false;
SmallVector<Value *, 16> BuildVectorOpds;
@@ -14541,11 +15749,11 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI,
if (BasePred1 > BasePred2)
return false;
// Compare operands.
- bool LEPreds = Pred1 <= Pred2;
- bool GEPreds = Pred1 >= Pred2;
+ bool CI1Preds = Pred1 == BasePred1;
+ bool CI2Preds = Pred2 == BasePred1;
for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) {
- auto *Op1 = CI1->getOperand(LEPreds ? I : E - I - 1);
- auto *Op2 = CI2->getOperand(GEPreds ? I : E - I - 1);
+ auto *Op1 = CI1->getOperand(CI1Preds ? I : E - I - 1);
+ auto *Op2 = CI2->getOperand(CI2Preds ? I : E - I - 1);
if (Op1->getValueID() < Op2->getValueID())
return !IsCompatibility;
if (Op1->getValueID() > Op2->getValueID())
@@ -14691,14 +15899,20 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
return true;
if (Opcodes1.size() > Opcodes2.size())
return false;
- std::optional<bool> ConstOrder;
for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
// Undefs are compatible with any other value.
if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I])) {
- if (!ConstOrder)
- ConstOrder =
- !isa<UndefValue>(Opcodes1[I]) && isa<UndefValue>(Opcodes2[I]);
- continue;
+ if (isa<Instruction>(Opcodes1[I]))
+ return true;
+ if (isa<Instruction>(Opcodes2[I]))
+ return false;
+ if (isa<Constant>(Opcodes1[I]) && !isa<UndefValue>(Opcodes1[I]))
+ return true;
+ if (isa<Constant>(Opcodes2[I]) && !isa<UndefValue>(Opcodes2[I]))
+ return false;
+ if (isa<UndefValue>(Opcodes1[I]) && isa<UndefValue>(Opcodes2[I]))
+ continue;
+ return isa<UndefValue>(Opcodes2[I]);
}
if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
@@ -14714,21 +15928,26 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
InstructionsState S = getSameOpcode({I1, I2}, *TLI);
- if (S.getOpcode())
+ if (S.getOpcode() && !S.isAltShuffle())
continue;
return I1->getOpcode() < I2->getOpcode();
}
- if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I])) {
- if (!ConstOrder)
- ConstOrder = Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID();
- continue;
- }
+ if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
+ return Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID();
+ if (isa<Instruction>(Opcodes1[I]))
+ return true;
+ if (isa<Instruction>(Opcodes2[I]))
+ return false;
+ if (isa<Constant>(Opcodes1[I]))
+ return true;
+ if (isa<Constant>(Opcodes2[I]))
+ return false;
if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
return true;
if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
return false;
}
- return ConstOrder && *ConstOrder;
+ return false;
};
auto AreCompatiblePHIs = [&PHIToOpcodes, this](Value *V1, Value *V2) {
if (V1 == V2)
@@ -14776,6 +15995,9 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
Incoming.push_back(P);
}
+ if (Incoming.size() <= 1)
+ break;
+
// Find the corresponding non-phi nodes for better matching when trying to
// build the tree.
for (Value *V : Incoming) {
@@ -14838,41 +16060,41 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
return I->use_empty() &&
(I->getType()->isVoidTy() || isa<CallInst, InvokeInst>(I));
};
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ for (BasicBlock::iterator It = BB->begin(), E = BB->end(); It != E; ++It) {
// Skip instructions with scalable type. The num of elements is unknown at
// compile-time for scalable type.
- if (isa<ScalableVectorType>(it->getType()))
+ if (isa<ScalableVectorType>(It->getType()))
continue;
// Skip instructions marked for the deletion.
- if (R.isDeleted(&*it))
+ if (R.isDeleted(&*It))
continue;
// We may go through BB multiple times so skip the one we have checked.
- if (!VisitedInstrs.insert(&*it).second) {
- if (HasNoUsers(&*it) &&
- VectorizeInsertsAndCmps(/*VectorizeCmps=*/it->isTerminator())) {
+ if (!VisitedInstrs.insert(&*It).second) {
+ if (HasNoUsers(&*It) &&
+ VectorizeInsertsAndCmps(/*VectorizeCmps=*/It->isTerminator())) {
// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.
Changed = true;
- it = BB->begin();
- e = BB->end();
+ It = BB->begin();
+ E = BB->end();
}
continue;
}
- if (isa<DbgInfoIntrinsic>(it))
+ if (isa<DbgInfoIntrinsic>(It))
continue;
// Try to vectorize reductions that use PHINodes.
- if (PHINode *P = dyn_cast<PHINode>(it)) {
+ if (PHINode *P = dyn_cast<PHINode>(It)) {
// Check that the PHI is a reduction PHI.
if (P->getNumIncomingValues() == 2) {
// Try to match and vectorize a horizontal reduction.
Instruction *Root = getReductionInstr(DT, P, BB, LI);
if (Root && vectorizeRootInstruction(P, Root, BB, R, TTI)) {
Changed = true;
- it = BB->begin();
- e = BB->end();
+ It = BB->begin();
+ E = BB->end();
continue;
}
}
@@ -14897,23 +16119,23 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
continue;
}
- if (HasNoUsers(&*it)) {
+ if (HasNoUsers(&*It)) {
bool OpsChanged = false;
- auto *SI = dyn_cast<StoreInst>(it);
+ auto *SI = dyn_cast<StoreInst>(It);
bool TryToVectorizeRoot = ShouldStartVectorizeHorAtStore || !SI;
if (SI) {
- auto I = Stores.find(getUnderlyingObject(SI->getPointerOperand()));
+ auto *I = Stores.find(getUnderlyingObject(SI->getPointerOperand()));
// Try to vectorize chain in store, if this is the only store to the
// address in the block.
// TODO: This is just a temporarily solution to save compile time. Need
// to investigate if we can safely turn on slp-vectorize-hor-store
// instead to allow lookup for reduction chains in all non-vectorized
// stores (need to check side effects and compile time).
- TryToVectorizeRoot = (I == Stores.end() || I->second.size() == 1) &&
- SI->getValueOperand()->hasOneUse();
+ TryToVectorizeRoot |= (I == Stores.end() || I->second.size() == 1) &&
+ SI->getValueOperand()->hasOneUse();
}
if (TryToVectorizeRoot) {
- for (auto *V : it->operand_values()) {
+ for (auto *V : It->operand_values()) {
// Postponed instructions should not be vectorized here, delay their
// vectorization.
if (auto *VI = dyn_cast<Instruction>(V);
@@ -14926,21 +16148,21 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// top-tree instructions to try to vectorize as many instructions as
// possible.
OpsChanged |=
- VectorizeInsertsAndCmps(/*VectorizeCmps=*/it->isTerminator());
+ VectorizeInsertsAndCmps(/*VectorizeCmps=*/It->isTerminator());
if (OpsChanged) {
// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.
Changed = true;
- it = BB->begin();
- e = BB->end();
+ It = BB->begin();
+ E = BB->end();
continue;
}
}
- if (isa<InsertElementInst, InsertValueInst>(it))
- PostProcessInserts.insert(&*it);
- else if (isa<CmpInst>(it))
- PostProcessCmps.insert(cast<CmpInst>(&*it));
+ if (isa<InsertElementInst, InsertValueInst>(It))
+ PostProcessInserts.insert(&*It);
+ else if (isa<CmpInst>(It))
+ PostProcessCmps.insert(cast<CmpInst>(&*It));
}
return Changed;
@@ -15044,6 +16266,12 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
// compatible (have the same opcode, same parent), otherwise it is
// definitely not profitable to try to vectorize them.
auto &&StoreSorter = [this](StoreInst *V, StoreInst *V2) {
+ if (V->getValueOperand()->getType()->getTypeID() <
+ V2->getValueOperand()->getType()->getTypeID())
+ return true;
+ if (V->getValueOperand()->getType()->getTypeID() >
+ V2->getValueOperand()->getType()->getTypeID())
+ return false;
if (V->getPointerOperandType()->getTypeID() <
V2->getPointerOperandType()->getTypeID())
return true;
@@ -15082,6 +16310,8 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
auto &&AreCompatibleStores = [this](StoreInst *V1, StoreInst *V2) {
if (V1 == V2)
return true;
+ if (V1->getValueOperand()->getType() != V2->getValueOperand()->getType())
+ return false;
if (V1->getPointerOperandType() != V2->getPointerOperandType())
return false;
// Undefs are compatible with any other value.
@@ -15113,8 +16343,13 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
if (!isValidElementType(Pair.second.front()->getValueOperand()->getType()))
continue;
+ // Reverse stores to do bottom-to-top analysis. This is important if the
+ // values are stores to the same addresses several times, in this case need
+ // to follow the stores order (reversed to meet the memory dependecies).
+ SmallVector<StoreInst *> ReversedStores(Pair.second.rbegin(),
+ Pair.second.rend());
Changed |= tryToVectorizeSequence<StoreInst>(
- Pair.second, StoreSorter, AreCompatibleStores,
+ ReversedStores, StoreSorter, AreCompatibleStores,
[this, &R](ArrayRef<StoreInst *> Candidates, bool) {
return vectorizeStores(Candidates, R);
},
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 1271d1424c03..7ff6749a0908 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -133,9 +133,12 @@ public:
Ingredient2Recipe[I] = R;
}
+ /// Create the mask for the vector loop header block.
+ void createHeaderMask(VPlan &Plan);
+
/// A helper function that computes the predicate of the block BB, assuming
- /// that the header block of the loop is set to True. It returns the *entry*
- /// mask for the block BB.
+ /// that the header block of the loop is set to True or the loop mask when
+ /// tail folding. It returns the *entry* mask for the block BB.
VPValue *createBlockInMask(BasicBlock *BB, VPlan &Plan);
/// A helper function that computes the predicate of the edge between SRC
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
index e81b88fd8099..1d7df9c9575a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -19,7 +19,6 @@
#include "VPlan.h"
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -234,6 +233,99 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
// set(Def, Extract, Instance);
return Extract;
}
+
+Value *VPTransformState::get(VPValue *Def, unsigned Part) {
+ // If Values have been set for this Def return the one relevant for \p Part.
+ if (hasVectorValue(Def, Part))
+ return Data.PerPartOutput[Def][Part];
+
+ auto GetBroadcastInstrs = [this, Def](Value *V) {
+ bool SafeToHoist = Def->isDefinedOutsideVectorRegions();
+ if (VF.isScalar())
+ return V;
+ // Place the code for broadcasting invariant variables in the new preheader.
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ if (SafeToHoist) {
+ BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB[cast<VPBasicBlock>(
+ Plan->getVectorLoopRegion()->getSinglePredecessor())];
+ if (LoopVectorPreHeader)
+ Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+ }
+
+ // Place the code for broadcasting invariant variables in the new preheader.
+ // Broadcast the scalar into all locations in the vector.
+ Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
+
+ return Shuf;
+ };
+
+ if (!hasScalarValue(Def, {Part, 0})) {
+ assert(Def->isLiveIn() && "expected a live-in");
+ if (Part != 0)
+ return get(Def, 0);
+ Value *IRV = Def->getLiveInIRValue();
+ Value *B = GetBroadcastInstrs(IRV);
+ set(Def, B, Part);
+ return B;
+ }
+
+ Value *ScalarValue = get(Def, {Part, 0});
+ // If we aren't vectorizing, we can just copy the scalar map values over
+ // to the vector map.
+ if (VF.isScalar()) {
+ set(Def, ScalarValue, Part);
+ return ScalarValue;
+ }
+
+ bool IsUniform = vputils::isUniformAfterVectorization(Def);
+
+ unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1;
+ // Check if there is a scalar value for the selected lane.
+ if (!hasScalarValue(Def, {Part, LastLane})) {
+ // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
+ // VPExpandSCEVRecipes can also be uniform.
+ assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
+ isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe()) ||
+ isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
+ "unexpected recipe found to be invariant");
+ IsUniform = true;
+ LastLane = 0;
+ }
+
+ auto *LastInst = cast<Instruction>(get(Def, {Part, LastLane}));
+ // Set the insert point after the last scalarized instruction or after the
+ // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
+ // will directly follow the scalar definitions.
+ auto OldIP = Builder.saveIP();
+ auto NewIP =
+ isa<PHINode>(LastInst)
+ ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
+ : std::next(BasicBlock::iterator(LastInst));
+ Builder.SetInsertPoint(&*NewIP);
+
+ // However, if we are vectorizing, we need to construct the vector values.
+ // If the value is known to be uniform after vectorization, we can just
+ // broadcast the scalar value corresponding to lane zero for each unroll
+ // iteration. Otherwise, we construct the vector values using
+ // insertelement instructions. Since the resulting vectors are stored in
+ // State, we will only generate the insertelements once.
+ Value *VectorValue = nullptr;
+ if (IsUniform) {
+ VectorValue = GetBroadcastInstrs(ScalarValue);
+ set(Def, VectorValue, Part);
+ } else {
+ // Initialize packing with insertelements to start from undef.
+ assert(!VF.isScalable() && "VF is assumed to be non scalable.");
+ Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
+ set(Def, Undef, Part);
+ for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
+ packScalarIntoVectorValue(Def, {Part, Lane});
+ VectorValue = get(Def, Part);
+ }
+ Builder.restoreIP(OldIP);
+ return VectorValue;
+}
+
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
@@ -267,18 +359,15 @@ void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
}
}
-void VPTransformState::setDebugLocFromInst(const Value *V) {
- const Instruction *Inst = dyn_cast<Instruction>(V);
- if (!Inst) {
- Builder.SetCurrentDebugLocation(DebugLoc());
- return;
- }
-
- const DILocation *DIL = Inst->getDebugLoc();
+void VPTransformState::setDebugLocFrom(DebugLoc DL) {
+ const DILocation *DIL = DL;
// When a FSDiscriminator is enabled, we don't need to add the multiply
// factors to the discriminators.
- if (DIL && Inst->getFunction()->shouldEmitDebugInfoForProfiling() &&
- !Inst->isDebugOrPseudoInst() && !EnableFSDiscriminator) {
+ if (DIL &&
+ Builder.GetInsertBlock()
+ ->getParent()
+ ->shouldEmitDebugInfoForProfiling() &&
+ !EnableFSDiscriminator) {
// FIXME: For scalable vectors, assume vscale=1.
auto NewDIL =
DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
@@ -291,6 +380,15 @@ void VPTransformState::setDebugLocFromInst(const Value *V) {
Builder.SetCurrentDebugLocation(DIL);
}
+void VPTransformState::packScalarIntoVectorValue(VPValue *Def,
+ const VPIteration &Instance) {
+ Value *ScalarInst = get(Def, Instance);
+ Value *VectorValue = get(Def, Instance.Part);
+ VectorValue = Builder.CreateInsertElement(
+ VectorValue, ScalarInst, Instance.Lane.getAsRuntimeExpr(Builder, VF));
+ set(Def, VectorValue, Instance.Part);
+}
+
BasicBlock *
VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
// BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
@@ -616,22 +714,17 @@ VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) {
auto Plan = std::make_unique<VPlan>(Preheader, VecPreheader);
Plan->TripCount =
vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
+ // Create empty VPRegionBlock, to be filled during processing later.
+ auto *TopRegion = new VPRegionBlock("vector loop", false /*isReplicator*/);
+ VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
+ VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
+ VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
return Plan;
}
-VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
- VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
- for (VPRecipeBase &R : Header->phis()) {
- if (isa<VPActiveLaneMaskPHIRecipe>(&R))
- return cast<VPActiveLaneMaskPHIRecipe>(&R);
- }
- return nullptr;
-}
-
void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
Value *CanonicalIVStartValue,
- VPTransformState &State,
- bool IsEpilogueVectorization) {
+ VPTransformState &State) {
// Check if the backedge taken count is needed, and if so build it.
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
@@ -648,6 +741,12 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(&VectorTripCount, VectorTripCountV, Part);
+ IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
+ // FIXME: Model VF * UF computation completely in VPlan.
+ State.set(&VFxUF,
+ createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF),
+ 0);
+
// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
// FIXME: Improve modeling for canonical IV start values in the epilogue loop.
@@ -656,16 +755,12 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
auto *IV = getCanonicalIV();
assert(all_of(IV->users(),
[](const VPUser *U) {
- if (isa<VPScalarIVStepsRecipe>(U) ||
- isa<VPDerivedIVRecipe>(U))
- return true;
- auto *VPI = cast<VPInstruction>(U);
- return VPI->getOpcode() ==
- VPInstruction::CanonicalIVIncrement ||
- VPI->getOpcode() ==
- VPInstruction::CanonicalIVIncrementNUW;
+ return isa<VPScalarIVStepsRecipe>(U) ||
+ isa<VPDerivedIVRecipe>(U) ||
+ cast<VPInstruction>(U)->getOpcode() ==
+ Instruction::Add;
}) &&
- "the canonical IV should only be used by its increments or "
+ "the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
IV->setOperand(0, VPV);
}
@@ -754,11 +849,14 @@ void VPlan::execute(VPTransformState *State) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
-void VPlan::print(raw_ostream &O) const {
+void VPlan::printLiveIns(raw_ostream &O) const {
VPSlotTracker SlotTracker(this);
- O << "VPlan '" << getName() << "' {";
+ if (VFxUF.getNumUsers() > 0) {
+ O << "\nLive-in ";
+ VFxUF.printAsOperand(O, SlotTracker);
+ O << " = VF * UF";
+ }
if (VectorTripCount.getNumUsers() > 0) {
O << "\nLive-in ";
@@ -778,6 +876,15 @@ void VPlan::print(raw_ostream &O) const {
TripCount->printAsOperand(O, SlotTracker);
O << " = original trip-count";
O << "\n";
+}
+
+LLVM_DUMP_METHOD
+void VPlan::print(raw_ostream &O) const {
+ VPSlotTracker SlotTracker(this);
+
+ O << "VPlan '" << getName() << "' {";
+
+ printLiveIns(O);
if (!getPreheader()->empty()) {
O << "\n";
@@ -895,11 +1002,18 @@ void VPlanPrinter::dump() {
OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
if (!Plan.getName().empty())
OS << "\\n" << DOT::EscapeString(Plan.getName());
- if (Plan.BackedgeTakenCount) {
- OS << ", where:\\n";
- Plan.BackedgeTakenCount->print(OS, SlotTracker);
- OS << " := BackedgeTakenCount";
+
+ {
+ // Print live-ins.
+ std::string Str;
+ raw_string_ostream SS(Str);
+ Plan.printLiveIns(SS);
+ SmallVector<StringRef, 0> Lines;
+ StringRef(Str).rtrim('\n').split(Lines, "\n");
+ for (auto Line : Lines)
+ OS << DOT::EscapeString(Line.str()) << "\\n";
}
+
OS << "\"]\n";
OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
OS << "edge [fontname=Courier, fontsize=30]\n";
@@ -1021,16 +1135,43 @@ void VPlanIngredient::print(raw_ostream &O) const {
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
void VPValue::replaceAllUsesWith(VPValue *New) {
+ if (this == New)
+ return;
for (unsigned J = 0; J < getNumUsers();) {
VPUser *User = Users[J];
- unsigned NumUsers = getNumUsers();
+ bool RemovedUser = false;
for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
- if (User->getOperand(I) == this)
+ if (User->getOperand(I) == this) {
User->setOperand(I, New);
+ RemovedUser = true;
+ }
// If a user got removed after updating the current user, the next user to
// update will be moved to the current position, so we only need to
// increment the index if the number of users did not change.
- if (NumUsers == getNumUsers())
+ if (!RemovedUser)
+ J++;
+ }
+}
+
+void VPValue::replaceUsesWithIf(
+ VPValue *New,
+ llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
+ if (this == New)
+ return;
+ for (unsigned J = 0; J < getNumUsers();) {
+ VPUser *User = Users[J];
+ bool RemovedUser = false;
+ for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
+ if (User->getOperand(I) != this || !ShouldReplace(*User, I))
+ continue;
+
+ RemovedUser = true;
+ User->setOperand(I, New);
+ }
+ // If a user got removed after updating the current user, the next user to
+ // update will be moved to the current position, so we only need to
+ // increment the index if the number of users did not change.
+ if (!RemovedUser)
J++;
}
}
@@ -1116,6 +1257,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) {
}
void VPSlotTracker::assignSlots(const VPlan &Plan) {
+ if (Plan.VFxUF.getNumUsers() > 0)
+ assignSlot(&Plan.VFxUF);
assignSlot(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
assignSlot(Plan.BackedgeTakenCount);
@@ -1139,6 +1282,11 @@ bool vputils::onlyFirstLaneUsed(VPValue *Def) {
[Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
}
+bool vputils::onlyFirstPartUsed(VPValue *Def) {
+ return all_of(Def->users(),
+ [Def](VPUser *U) { return U->onlyFirstPartUsed(Def); });
+}
+
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
ScalarEvolution &SE) {
if (auto *Expanded = Plan.getSCEVExpansion(Expr))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
index 73313465adea..94cb76889813 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -23,6 +23,7 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
+#include "VPlanAnalysis.h"
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
@@ -233,9 +234,9 @@ struct VPIteration {
struct VPTransformState {
VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
DominatorTree *DT, IRBuilderBase &Builder,
- InnerLoopVectorizer *ILV, VPlan *Plan)
+ InnerLoopVectorizer *ILV, VPlan *Plan, LLVMContext &Ctx)
: VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan),
- LVer(nullptr) {}
+ LVer(nullptr), TypeAnalysis(Ctx) {}
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
ElementCount VF;
@@ -274,10 +275,6 @@ struct VPTransformState {
I->second[Part];
}
- bool hasAnyVectorValue(VPValue *Def) const {
- return Data.PerPartOutput.contains(Def);
- }
-
bool hasScalarValue(VPValue *Def, VPIteration Instance) {
auto I = Data.PerPartScalars.find(Def);
if (I == Data.PerPartScalars.end())
@@ -349,8 +346,11 @@ struct VPTransformState {
/// vector of instructions.
void addMetadata(ArrayRef<Value *> To, Instruction *From);
- /// Set the debug location in the builder using the debug location in \p V.
- void setDebugLocFromInst(const Value *V);
+ /// Set the debug location in the builder using the debug location \p DL.
+ void setDebugLocFrom(DebugLoc DL);
+
+ /// Construct the vector value of a scalarized value \p V one lane at a time.
+ void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance);
/// Hold state information used when constructing the CFG of the output IR,
/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
@@ -410,6 +410,9 @@ struct VPTransformState {
/// Map SCEVs to their expanded values. Populated when executing
/// VPExpandSCEVRecipes.
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
+
+ /// VPlan-based type analysis.
+ VPTypeAnalysis TypeAnalysis;
};
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
@@ -582,6 +585,8 @@ public:
/// This VPBlockBase must have no successors.
void setOneSuccessor(VPBlockBase *Successor) {
assert(Successors.empty() && "Setting one successor when others exist.");
+ assert(Successor->getParent() == getParent() &&
+ "connected blocks must have the same parent");
appendSuccessor(Successor);
}
@@ -693,7 +698,7 @@ public:
};
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
-/// instructions. VPRecipeBase owns the the VPValues it defines through VPDef
+/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
/// and is responsible for deleting its defined values. Single-value
/// VPRecipeBases that also inherit from VPValue must make sure to inherit from
/// VPRecipeBase before VPValue.
@@ -706,13 +711,18 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
/// Each VPRecipe belongs to a single VPBasicBlock.
VPBasicBlock *Parent = nullptr;
+ /// The debug location for the recipe.
+ DebugLoc DL;
+
public:
- VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands)
- : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {}
+ VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
+ DebugLoc DL = {})
+ : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
template <typename IterT>
- VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands)
- : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {}
+ VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,
+ DebugLoc DL = {})
+ : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
virtual ~VPRecipeBase() = default;
/// \return the VPBasicBlock which this VPRecipe belongs to.
@@ -789,6 +799,9 @@ public:
bool mayReadOrWriteMemory() const {
return mayReadFromMemory() || mayWriteToMemory();
}
+
+ /// Returns the debug location of the recipe.
+ DebugLoc getDebugLoc() const { return DL; }
};
// Helper macro to define common classof implementations for recipes.
@@ -808,153 +821,30 @@ public:
return R->getVPDefID() == VPDefID; \
}
-/// This is a concrete Recipe that models a single VPlan-level instruction.
-/// While as any Recipe it may generate a sequence of IR instructions when
-/// executed, these instructions would always form a single-def expression as
-/// the VPInstruction is also a single def-use vertex.
-class VPInstruction : public VPRecipeBase, public VPValue {
- friend class VPlanSlp;
-
-public:
- /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
- enum {
- FirstOrderRecurrenceSplice =
- Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
- // values of a first-order recurrence.
- Not,
- ICmpULE,
- SLPLoad,
- SLPStore,
- ActiveLaneMask,
- CalculateTripCountMinusVF,
- CanonicalIVIncrement,
- CanonicalIVIncrementNUW,
- // The next two are similar to the above, but instead increment the
- // canonical IV separately for each unrolled part.
- CanonicalIVIncrementForPart,
- CanonicalIVIncrementForPartNUW,
- BranchOnCount,
- BranchOnCond
- };
-
-private:
- typedef unsigned char OpcodeTy;
- OpcodeTy Opcode;
- FastMathFlags FMF;
- DebugLoc DL;
-
- /// An optional name that can be used for the generated IR instruction.
- const std::string Name;
-
- /// Utility method serving execute(): generates a single instance of the
- /// modeled instruction. \returns the generated value for \p Part.
- /// In some cases an existing value is returned rather than a generated
- /// one.
- Value *generateInstruction(VPTransformState &State, unsigned Part);
-
-protected:
- void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
-
-public:
- VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
- const Twine &Name = "")
- : VPRecipeBase(VPDef::VPInstructionSC, Operands), VPValue(this),
- Opcode(Opcode), DL(DL), Name(Name.str()) {}
-
- VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
- DebugLoc DL = {}, const Twine &Name = "")
- : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
-
- VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
-
- VPInstruction *clone() const {
- SmallVector<VPValue *, 2> Operands(operands());
- return new VPInstruction(Opcode, Operands, DL, Name);
- }
-
- unsigned getOpcode() const { return Opcode; }
-
- /// Generate the instruction.
- /// TODO: We currently execute only per-part unless a specific instance is
- /// provided.
- void execute(VPTransformState &State) override;
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the VPInstruction to \p O.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-
- /// Print the VPInstruction to dbgs() (for debugging).
- LLVM_DUMP_METHOD void dump() const;
-#endif
-
- /// Return true if this instruction may modify memory.
- bool mayWriteToMemory() const {
- // TODO: we can use attributes of the called function to rule out memory
- // modifications.
- return Opcode == Instruction::Store || Opcode == Instruction::Call ||
- Opcode == Instruction::Invoke || Opcode == SLPStore;
- }
-
- bool hasResult() const {
- // CallInst may or may not have a result, depending on the called function.
- // Conservatively return calls have results for now.
- switch (getOpcode()) {
- case Instruction::Ret:
- case Instruction::Br:
- case Instruction::Store:
- case Instruction::Switch:
- case Instruction::IndirectBr:
- case Instruction::Resume:
- case Instruction::CatchRet:
- case Instruction::Unreachable:
- case Instruction::Fence:
- case Instruction::AtomicRMW:
- case VPInstruction::BranchOnCond:
- case VPInstruction::BranchOnCount:
- return false;
- default:
- return true;
- }
- }
-
- /// Set the fast-math flags.
- void setFastMathFlags(FastMathFlags FMFNew);
-
- /// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- if (getOperand(0) != Op)
- return false;
- switch (getOpcode()) {
- default:
- return false;
- case VPInstruction::ActiveLaneMask:
- case VPInstruction::CalculateTripCountMinusVF:
- case VPInstruction::CanonicalIVIncrement:
- case VPInstruction::CanonicalIVIncrementNUW:
- case VPInstruction::CanonicalIVIncrementForPart:
- case VPInstruction::CanonicalIVIncrementForPartNUW:
- case VPInstruction::BranchOnCount:
- return true;
- };
- llvm_unreachable("switch should return");
- }
-};
-
/// Class to record LLVM IR flag for a recipe along with it.
class VPRecipeWithIRFlags : public VPRecipeBase {
enum class OperationType : unsigned char {
+ Cmp,
OverflowingBinOp,
+ DisjointOp,
PossiblyExactOp,
GEPOp,
FPMathOp,
+ NonNegOp,
Other
};
+
+public:
struct WrapFlagsTy {
char HasNUW : 1;
char HasNSW : 1;
+
+ WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
+ };
+
+private:
+ struct DisjointFlagsTy {
+ char IsDisjoint : 1;
};
struct ExactFlagsTy {
char IsExact : 1;
@@ -962,6 +852,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
struct GEPFlagsTy {
char IsInBounds : 1;
};
+ struct NonNegFlagsTy {
+ char NonNeg : 1;
+ };
struct FastMathFlagsTy {
char AllowReassoc : 1;
char NoNaNs : 1;
@@ -970,56 +863,81 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
char AllowReciprocal : 1;
char AllowContract : 1;
char ApproxFunc : 1;
+
+ FastMathFlagsTy(const FastMathFlags &FMF);
};
OperationType OpType;
union {
+ CmpInst::Predicate CmpPredicate;
WrapFlagsTy WrapFlags;
+ DisjointFlagsTy DisjointFlags;
ExactFlagsTy ExactFlags;
GEPFlagsTy GEPFlags;
+ NonNegFlagsTy NonNegFlags;
FastMathFlagsTy FMFs;
- unsigned char AllFlags;
+ unsigned AllFlags;
};
public:
template <typename IterT>
- VPRecipeWithIRFlags(const unsigned char SC, iterator_range<IterT> Operands)
- : VPRecipeBase(SC, Operands) {
+ VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
+ : VPRecipeBase(SC, Operands, DL) {
OpType = OperationType::Other;
AllFlags = 0;
}
template <typename IterT>
- VPRecipeWithIRFlags(const unsigned char SC, iterator_range<IterT> Operands,
- Instruction &I)
- : VPRecipeWithIRFlags(SC, Operands) {
- if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
+ VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
+ : VPRecipeWithIRFlags(SC, Operands, I.getDebugLoc()) {
+ if (auto *Op = dyn_cast<CmpInst>(&I)) {
+ OpType = OperationType::Cmp;
+ CmpPredicate = Op->getPredicate();
+ } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
+ OpType = OperationType::DisjointOp;
+ DisjointFlags.IsDisjoint = Op->isDisjoint();
+ } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
OpType = OperationType::OverflowingBinOp;
- WrapFlags.HasNUW = Op->hasNoUnsignedWrap();
- WrapFlags.HasNSW = Op->hasNoSignedWrap();
+ WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
} else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
OpType = OperationType::PossiblyExactOp;
ExactFlags.IsExact = Op->isExact();
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
OpType = OperationType::GEPOp;
GEPFlags.IsInBounds = GEP->isInBounds();
+ } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
+ OpType = OperationType::NonNegOp;
+ NonNegFlags.NonNeg = PNNI->hasNonNeg();
} else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
OpType = OperationType::FPMathOp;
- FastMathFlags FMF = Op->getFastMathFlags();
- FMFs.AllowReassoc = FMF.allowReassoc();
- FMFs.NoNaNs = FMF.noNaNs();
- FMFs.NoInfs = FMF.noInfs();
- FMFs.NoSignedZeros = FMF.noSignedZeros();
- FMFs.AllowReciprocal = FMF.allowReciprocal();
- FMFs.AllowContract = FMF.allowContract();
- FMFs.ApproxFunc = FMF.approxFunc();
+ FMFs = Op->getFastMathFlags();
}
}
+ template <typename IterT>
+ VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ CmpInst::Predicate Pred, DebugLoc DL = {})
+ : VPRecipeBase(SC, Operands, DL), OpType(OperationType::Cmp),
+ CmpPredicate(Pred) {}
+
+ template <typename IterT>
+ VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ WrapFlagsTy WrapFlags, DebugLoc DL = {})
+ : VPRecipeBase(SC, Operands, DL), OpType(OperationType::OverflowingBinOp),
+ WrapFlags(WrapFlags) {}
+
+ template <typename IterT>
+ VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
+ FastMathFlags FMFs, DebugLoc DL = {})
+ : VPRecipeBase(SC, Operands, DL), OpType(OperationType::FPMathOp),
+ FMFs(FMFs) {}
+
static inline bool classof(const VPRecipeBase *R) {
- return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
+ return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC;
}
@@ -1032,6 +950,9 @@ public:
WrapFlags.HasNUW = false;
WrapFlags.HasNSW = false;
break;
+ case OperationType::DisjointOp:
+ DisjointFlags.IsDisjoint = false;
+ break;
case OperationType::PossiblyExactOp:
ExactFlags.IsExact = false;
break;
@@ -1042,6 +963,10 @@ public:
FMFs.NoNaNs = false;
FMFs.NoInfs = false;
break;
+ case OperationType::NonNegOp:
+ NonNegFlags.NonNeg = false;
+ break;
+ case OperationType::Cmp:
case OperationType::Other:
break;
}
@@ -1054,6 +979,9 @@ public:
I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
I->setHasNoSignedWrap(WrapFlags.HasNSW);
break;
+ case OperationType::DisjointOp:
+ cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
+ break;
case OperationType::PossiblyExactOp:
I->setIsExact(ExactFlags.IsExact);
break;
@@ -1069,43 +997,209 @@ public:
I->setHasAllowContract(FMFs.AllowContract);
I->setHasApproxFunc(FMFs.ApproxFunc);
break;
+ case OperationType::NonNegOp:
+ I->setNonNeg(NonNegFlags.NonNeg);
+ break;
+ case OperationType::Cmp:
case OperationType::Other:
break;
}
}
+ CmpInst::Predicate getPredicate() const {
+ assert(OpType == OperationType::Cmp &&
+ "recipe doesn't have a compare predicate");
+ return CmpPredicate;
+ }
+
bool isInBounds() const {
assert(OpType == OperationType::GEPOp &&
"recipe doesn't have inbounds flag");
return GEPFlags.IsInBounds;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- FastMathFlags getFastMathFlags() const {
- FastMathFlags Res;
- Res.setAllowReassoc(FMFs.AllowReassoc);
- Res.setNoNaNs(FMFs.NoNaNs);
- Res.setNoInfs(FMFs.NoInfs);
- Res.setNoSignedZeros(FMFs.NoSignedZeros);
- Res.setAllowReciprocal(FMFs.AllowReciprocal);
- Res.setAllowContract(FMFs.AllowContract);
- Res.setApproxFunc(FMFs.ApproxFunc);
- return Res;
+ /// Returns true if the recipe has fast-math flags.
+ bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
+
+ FastMathFlags getFastMathFlags() const;
+
+ bool hasNoUnsignedWrap() const {
+ assert(OpType == OperationType::OverflowingBinOp &&
+ "recipe doesn't have a NUW flag");
+ return WrapFlags.HasNUW;
}
+ bool hasNoSignedWrap() const {
+ assert(OpType == OperationType::OverflowingBinOp &&
+ "recipe doesn't have a NSW flag");
+ return WrapFlags.HasNSW;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void printFlags(raw_ostream &O) const;
#endif
};
+/// This is a concrete Recipe that models a single VPlan-level instruction.
+/// While as any Recipe it may generate a sequence of IR instructions when
+/// executed, these instructions would always form a single-def expression as
+/// the VPInstruction is also a single def-use vertex.
+class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
+ friend class VPlanSlp;
+
+public:
+ /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
+ enum {
+ FirstOrderRecurrenceSplice =
+ Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
+ // values of a first-order recurrence.
+ Not,
+ SLPLoad,
+ SLPStore,
+ ActiveLaneMask,
+ CalculateTripCountMinusVF,
+ // Increment the canonical IV separately for each unrolled part.
+ CanonicalIVIncrementForPart,
+ BranchOnCount,
+ BranchOnCond
+ };
+
+private:
+ typedef unsigned char OpcodeTy;
+ OpcodeTy Opcode;
+
+ /// An optional name that can be used for the generated IR instruction.
+ const std::string Name;
+
+ /// Utility method serving execute(): generates a single instance of the
+ /// modeled instruction. \returns the generated value for \p Part.
+ /// In some cases an existing value is returned rather than a generated
+ /// one.
+ Value *generateInstruction(VPTransformState &State, unsigned Part);
+
+#if !defined(NDEBUG)
+ /// Return true if the VPInstruction is a floating point math operation, i.e.
+ /// has fast-math flags.
+ bool isFPMathOp() const;
+#endif
+
+protected:
+ void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
+
+public:
+ VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
+ const Twine &Name = "")
+ : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
+ VPValue(this), Opcode(Opcode), Name(Name.str()) {}
+
+ VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
+ DebugLoc DL = {}, const Twine &Name = "")
+ : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
+
+ VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
+ VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
+
+ VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
+ WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
+ : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
+ VPValue(this), Opcode(Opcode), Name(Name.str()) {}
+
+ VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
+ FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
+
+ VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
+
+ unsigned getOpcode() const { return Opcode; }
+
+ /// Generate the instruction.
+ /// TODO: We currently execute only per-part unless a specific instance is
+ /// provided.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the VPInstruction to \p O.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+
+ /// Print the VPInstruction to dbgs() (for debugging).
+ LLVM_DUMP_METHOD void dump() const;
+#endif
+
+ /// Return true if this instruction may modify memory.
+ bool mayWriteToMemory() const {
+ // TODO: we can use attributes of the called function to rule out memory
+ // modifications.
+ return Opcode == Instruction::Store || Opcode == Instruction::Call ||
+ Opcode == Instruction::Invoke || Opcode == SLPStore;
+ }
+
+ bool hasResult() const {
+ // CallInst may or may not have a result, depending on the called function.
+ // Conservatively return calls have results for now.
+ switch (getOpcode()) {
+ case Instruction::Ret:
+ case Instruction::Br:
+ case Instruction::Store:
+ case Instruction::Switch:
+ case Instruction::IndirectBr:
+ case Instruction::Resume:
+ case Instruction::CatchRet:
+ case Instruction::Unreachable:
+ case Instruction::Fence:
+ case Instruction::AtomicRMW:
+ case VPInstruction::BranchOnCond:
+ case VPInstruction::BranchOnCount:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ if (getOperand(0) != Op)
+ return false;
+ switch (getOpcode()) {
+ default:
+ return false;
+ case VPInstruction::ActiveLaneMask:
+ case VPInstruction::CalculateTripCountMinusVF:
+ case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::BranchOnCount:
+ return true;
+ };
+ llvm_unreachable("switch should return");
+ }
+
+ /// Returns true if the recipe only uses the first part of operand \p Op.
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ if (getOperand(0) != Op)
+ return false;
+ switch (getOpcode()) {
+ default:
+ return false;
+ case VPInstruction::BranchOnCount:
+ return true;
+ };
+ llvm_unreachable("switch should return");
+ }
+};
+
/// VPWidenRecipe is a recipe for producing a copy of vector type its
/// ingredient. This recipe covers most of the traditional vectorization cases
/// where each ingredient transforms into a vectorized version of itself.
class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue {
+ unsigned Opcode;
public:
template <typename IterT>
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
- : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPValue(this, &I) {}
+ : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPValue(this, &I),
+ Opcode(I.getOpcode()) {}
~VPWidenRecipe() override = default;
@@ -1114,6 +1208,8 @@ public:
/// Produce widened copies of all Ingredients.
void execute(VPTransformState &State) override;
+ unsigned getOpcode() const { return Opcode; }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -1122,7 +1218,7 @@ public:
};
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
-class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
+class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue {
/// Cast instruction opcode.
Instruction::CastOps Opcode;
@@ -1131,15 +1227,19 @@ class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
public:
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- CastInst *UI = nullptr)
- : VPRecipeBase(VPDef::VPWidenCastSC, Op), VPValue(this, UI),
+ CastInst &UI)
+ : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPValue(this, &UI),
Opcode(Opcode), ResultTy(ResultTy) {
- assert((!UI || UI->getOpcode() == Opcode) &&
+ assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
- assert((!UI || UI->getType() == ResultTy) &&
+ assert(UI.getType() == ResultTy &&
"result type of underlying cast doesn't match");
}
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), VPValue(this, nullptr),
+ Opcode(Opcode), ResultTy(ResultTy) {}
+
~VPWidenCastRecipe() override = default;
VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
@@ -1196,7 +1296,8 @@ public:
struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
template <typename IterT>
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
- : VPRecipeBase(VPDef::VPWidenSelectSC, Operands), VPValue(this, &I) {}
+ : VPRecipeBase(VPDef::VPWidenSelectSC, Operands, I.getDebugLoc()),
+ VPValue(this, &I) {}
~VPWidenSelectRecipe() override = default;
@@ -1282,8 +1383,8 @@ public:
class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue {
protected:
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
- VPValue *Start = nullptr)
- : VPRecipeBase(VPDefID, {}), VPValue(this, UnderlyingInstr) {
+ VPValue *Start = nullptr, DebugLoc DL = {})
+ : VPRecipeBase(VPDefID, {}, DL), VPValue(this, UnderlyingInstr) {
if (Start)
addOperand(Start);
}
@@ -1404,7 +1505,7 @@ public:
bool isCanonical() const;
/// Returns the scalar type of the induction.
- const Type *getScalarType() const {
+ Type *getScalarType() const {
return Trunc ? Trunc->getType() : IV->getType();
}
};
@@ -1565,14 +1666,13 @@ public:
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
/// instructions.
class VPBlendRecipe : public VPRecipeBase, public VPValue {
- PHINode *Phi;
-
public:
/// The blend operation is a User of the incoming values and of their
/// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
/// might be incoming with a full mask for which there is no VPValue.
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
- : VPRecipeBase(VPDef::VPBlendSC, Operands), VPValue(this, Phi), Phi(Phi) {
+ : VPRecipeBase(VPDef::VPBlendSC, Operands, Phi->getDebugLoc()),
+ VPValue(this, Phi) {
assert(Operands.size() > 0 &&
((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
"Expected either a single incoming value or a positive even number "
@@ -1701,16 +1801,13 @@ public:
/// The Operands are {ChainOp, VecOp, [Condition]}.
class VPReductionRecipe : public VPRecipeBase, public VPValue {
/// The recurrence decriptor for the reduction in question.
- const RecurrenceDescriptor *RdxDesc;
- /// Pointer to the TTI, needed to create the target reduction
- const TargetTransformInfo *TTI;
+ const RecurrenceDescriptor &RdxDesc;
public:
- VPReductionRecipe(const RecurrenceDescriptor *R, Instruction *I,
- VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- const TargetTransformInfo *TTI)
+ VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
+ VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp)
: VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I),
- RdxDesc(R), TTI(TTI) {
+ RdxDesc(R) {
if (CondOp)
addOperand(CondOp);
}
@@ -2008,11 +2105,9 @@ public:
/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
/// canonical induction variable.
class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
- DebugLoc DL;
-
public:
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
- : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV), DL(DL) {}
+ : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
~VPCanonicalIVPHIRecipe() override = default;
@@ -2032,8 +2127,8 @@ public:
#endif
/// Returns the scalar type of the induction.
- const Type *getScalarType() const {
- return getOperand(0)->getLiveInIRValue()->getType();
+ Type *getScalarType() const {
+ return getStartValue()->getLiveInIRValue()->getType();
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
@@ -2043,6 +2138,13 @@ public:
return true;
}
+ /// Returns true if the recipe only uses the first part of operand \p Op.
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
/// Check if the induction described by \p Kind, /p Start and \p Step is
/// canonical, i.e. has the same start, step (of 1), and type as the
/// canonical IV.
@@ -2055,12 +2157,10 @@ public:
/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
/// remove VPActiveLaneMaskPHIRecipe.
class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
- DebugLoc DL;
-
public:
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
- : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask),
- DL(DL) {}
+ : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
+ DL) {}
~VPActiveLaneMaskPHIRecipe() override = default;
@@ -2113,19 +2213,24 @@ public:
/// an IV with different start and step values, using Start + CanonicalIV *
/// Step.
class VPDerivedIVRecipe : public VPRecipeBase, public VPValue {
- /// The type of the result value. It may be smaller than the type of the
- /// induction and in this case it will get truncated to ResultTy.
- Type *ResultTy;
+ /// If not nullptr, the result of the induction will get truncated to
+ /// TruncResultTy.
+ Type *TruncResultTy;
- /// Induction descriptor for the induction the canonical IV is transformed to.
- const InductionDescriptor &IndDesc;
+ /// Kind of the induction.
+ const InductionDescriptor::InductionKind Kind;
+ /// If not nullptr, the floating point induction binary operator. Must be set
+ /// for floating point inductions.
+ const FPMathOperator *FPBinOp;
public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
- Type *ResultTy)
+ Type *TruncResultTy)
: VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
- VPValue(this), ResultTy(ResultTy), IndDesc(IndDesc) {}
+ VPValue(this), TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
+ FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
+ }
~VPDerivedIVRecipe() override = default;
@@ -2141,6 +2246,11 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
+ Type *getScalarType() const {
+ return TruncResultTy ? TruncResultTy
+ : getStartValue()->getLiveInIRValue()->getType();
+ }
+
VPValue *getStartValue() const { return getOperand(0); }
VPValue *getCanonicalIV() const { return getOperand(1); }
VPValue *getStepValue() const { return getOperand(2); }
@@ -2155,14 +2265,23 @@ public:
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their scalar values.
-class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue {
- const InductionDescriptor &IndDesc;
+class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, public VPValue {
+ Instruction::BinaryOps InductionOpcode;
public:
+ VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step,
+ Instruction::BinaryOps Opcode, FastMathFlags FMFs)
+ : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
+ ArrayRef<VPValue *>({IV, Step}), FMFs),
+ VPValue(this), InductionOpcode(Opcode) {}
+
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
VPValue *Step)
- : VPRecipeBase(VPDef::VPScalarIVStepsSC, {IV, Step}), VPValue(this),
- IndDesc(IndDesc) {}
+ : VPScalarIVStepsRecipe(
+ IV, Step, IndDesc.getInductionOpcode(),
+ dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
+ ? IndDesc.getInductionBinOp()->getFastMathFlags()
+ : FastMathFlags()) {}
~VPScalarIVStepsRecipe() override = default;
@@ -2445,6 +2564,9 @@ class VPlan {
/// Represents the vector trip count.
VPValue VectorTripCount;
+ /// Represents the loop-invariant VF * UF of the vector loop region.
+ VPValue VFxUF;
+
/// Holds a mapping between Values and their corresponding VPValue inside
/// VPlan.
Value2VPValueTy Value2VPValue;
@@ -2490,15 +2612,17 @@ public:
~VPlan();
- /// Create an initial VPlan with preheader and entry blocks. Creates a
- /// VPExpandSCEVRecipe for \p TripCount and uses it as plan's trip count.
+ /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping
+ /// original scalar pre-header) which contains SCEV expansions that need to
+ /// happen before the CFG is modified; a VPBasicBlock for the vector
+ /// pre-header, followed by a region for the vector loop, followed by the
+ /// middle VPBasicBlock.
static VPlanPtr createInitialVPlan(const SCEV *TripCount,
ScalarEvolution &PSE);
/// Prepare the plan for execution, setting up the required live-in values.
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
- Value *CanonicalIVStartValue, VPTransformState &State,
- bool IsEpilogueVectorization);
+ Value *CanonicalIVStartValue, VPTransformState &State);
/// Generate the IR code for this VPlan.
void execute(VPTransformState *State);
@@ -2522,6 +2646,9 @@ public:
/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }
+ /// Returns VF * UF of the vector loop region.
+ VPValue &getVFxUF() { return VFxUF; }
+
/// Mark the plan to indicate that using Value2VPValue is not safe any
/// longer, because it may be stale.
void disableValue2VPValue() { Value2VPValueEnabled = false; }
@@ -2583,13 +2710,10 @@ public:
return getVPValue(V);
}
- void removeVPValueFor(Value *V) {
- assert(Value2VPValueEnabled &&
- "IR value to VPValue mapping may be out of date!");
- Value2VPValue.erase(V);
- }
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the live-ins of this VPlan to \p O.
+ void printLiveIns(raw_ostream &O) const;
+
/// Print this VPlan to \p O.
void print(raw_ostream &O) const;
@@ -2628,10 +2752,6 @@ public:
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}
- /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there
- /// be only one at most. If there isn't one, then return nullptr.
- VPActiveLaneMaskPHIRecipe *getActiveLaneMaskPhi();
-
void addLiveOut(PHINode *PN, VPValue *V);
void removeLiveOut(PHINode *PN) {
@@ -2959,6 +3079,9 @@ namespace vputils {
/// Returns true if only the first lane of \p Def is used.
bool onlyFirstLaneUsed(VPValue *Def);
+/// Returns true if only the first part of \p Def is used.
+bool onlyFirstPartUsed(VPValue *Def);
+
/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
new file mode 100644
index 000000000000..97a8a1803bbf
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -0,0 +1,237 @@
+//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VPlanAnalysis.h"
+#include "VPlan.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vplan"
+
+Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
+ Type *ResTy = inferScalarType(R->getIncomingValue(0));
+ for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
+ VPValue *Inc = R->getIncomingValue(I);
+ assert(inferScalarType(Inc) == ResTy &&
+ "different types inferred for different incoming values");
+ CachedTypes[Inc] = ResTy;
+ }
+ return ResTy;
+}
+
+Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
+ switch (R->getOpcode()) {
+ case Instruction::Select: {
+ Type *ResTy = inferScalarType(R->getOperand(1));
+ VPValue *OtherV = R->getOperand(2);
+ assert(inferScalarType(OtherV) == ResTy &&
+ "different types inferred for different operands");
+ CachedTypes[OtherV] = ResTy;
+ return ResTy;
+ }
+ case VPInstruction::FirstOrderRecurrenceSplice: {
+ Type *ResTy = inferScalarType(R->getOperand(0));
+ VPValue *OtherV = R->getOperand(1);
+ assert(inferScalarType(OtherV) == ResTy &&
+ "different types inferred for different operands");
+ CachedTypes[OtherV] = ResTy;
+ return ResTy;
+ }
+ default:
+ break;
+ }
+ // Type inference not implemented for opcode.
+ LLVM_DEBUG({
+ dbgs() << "LV: Found unhandled opcode for: ";
+ R->getVPSingleValue()->dump();
+ });
+ llvm_unreachable("Unhandled opcode!");
+}
+
+Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
+ unsigned Opcode = R->getOpcode();
+ switch (Opcode) {
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return IntegerType::get(Ctx, 1);
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ Type *ResTy = inferScalarType(R->getOperand(0));
+ assert(ResTy == inferScalarType(R->getOperand(1)) &&
+ "types for both operands must match for binary op");
+ CachedTypes[R->getOperand(1)] = ResTy;
+ return ResTy;
+ }
+ case Instruction::FNeg:
+ case Instruction::Freeze:
+ return inferScalarType(R->getOperand(0));
+ default:
+ break;
+ }
+
+ // Type inference not implemented for opcode.
+ LLVM_DEBUG({
+ dbgs() << "LV: Found unhandled opcode for: ";
+ R->getVPSingleValue()->dump();
+ });
+ llvm_unreachable("Unhandled opcode!");
+}
+
+Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
+ auto &CI = *cast<CallInst>(R->getUnderlyingInstr());
+ return CI.getType();
+}
+
+Type *VPTypeAnalysis::inferScalarTypeForRecipe(
+ const VPWidenMemoryInstructionRecipe *R) {
+ assert(!R->isStore() && "Store recipes should not define any values");
+ return cast<LoadInst>(&R->getIngredient())->getType();
+}
+
+Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) {
+ Type *ResTy = inferScalarType(R->getOperand(1));
+ VPValue *OtherV = R->getOperand(2);
+ assert(inferScalarType(OtherV) == ResTy &&
+ "different types inferred for different operands");
+ CachedTypes[OtherV] = ResTy;
+ return ResTy;
+}
+
+Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) {
+ switch (R->getUnderlyingInstr()->getOpcode()) {
+ case Instruction::Call: {
+ unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1);
+ return cast<Function>(R->getOperand(CallIdx)->getLiveInIRValue())
+ ->getReturnType();
+ }
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ Type *ResTy = inferScalarType(R->getOperand(0));
+ assert(ResTy == inferScalarType(R->getOperand(1)) &&
+ "inferred types for operands of binary op don't match");
+ CachedTypes[R->getOperand(1)] = ResTy;
+ return ResTy;
+ }
+ case Instruction::Select: {
+ Type *ResTy = inferScalarType(R->getOperand(1));
+ assert(ResTy == inferScalarType(R->getOperand(2)) &&
+ "inferred types for operands of select op don't match");
+ CachedTypes[R->getOperand(2)] = ResTy;
+ return ResTy;
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return IntegerType::get(Ctx, 1);
+ case Instruction::Alloca:
+ case Instruction::BitCast:
+ case Instruction::Trunc:
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::ExtractValue:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ return R->getUnderlyingInstr()->getType();
+ case Instruction::Freeze:
+ case Instruction::FNeg:
+ case Instruction::GetElementPtr:
+ return inferScalarType(R->getOperand(0));
+ case Instruction::Load:
+ return cast<LoadInst>(R->getUnderlyingInstr())->getType();
+ case Instruction::Store:
+ // FIXME: VPReplicateRecipes with store opcodes still define a result
+ // VPValue, so we need to handle them here. Remove the code here once this
+ // is modeled accurately in VPlan.
+ return Type::getVoidTy(Ctx);
+ default:
+ break;
+ }
+ // Type inference not implemented for opcode.
+ LLVM_DEBUG({
+ dbgs() << "LV: Found unhandled opcode for: ";
+ R->getVPSingleValue()->dump();
+ });
+ llvm_unreachable("Unhandled opcode");
+}
+
+Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
+ if (Type *CachedTy = CachedTypes.lookup(V))
+ return CachedTy;
+
+ if (V->isLiveIn())
+ return V->getLiveInIRValue()->getType();
+
+ Type *ResultTy =
+ TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
+ .Case<VPCanonicalIVPHIRecipe, VPFirstOrderRecurrencePHIRecipe,
+ VPReductionPHIRecipe, VPWidenPointerInductionRecipe>(
+ [this](const auto *R) {
+ // Handle header phi recipes, except VPWienIntOrFpInduction
+ // which needs special handling due it being possibly truncated.
+ // TODO: consider inferring/caching type of siblings, e.g.,
+ // backedge value, here and in cases below.
+ return inferScalarType(R->getStartValue());
+ })
+ .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
+ [](const auto *R) { return R->getScalarType(); })
+ .Case<VPPredInstPHIRecipe, VPWidenPHIRecipe, VPScalarIVStepsRecipe,
+ VPWidenGEPRecipe>([this](const VPRecipeBase *R) {
+ return inferScalarType(R->getOperand(0));
+ })
+ .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
+ VPWidenCallRecipe, VPWidenMemoryInstructionRecipe,
+ VPWidenSelectRecipe>(
+ [this](const auto *R) { return inferScalarTypeForRecipe(R); })
+ .Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
+ // TODO: Use info from interleave group.
+ return V->getUnderlyingValue()->getType();
+ })
+ .Case<VPWidenCastRecipe>(
+ [](const VPWidenCastRecipe *R) { return R->getResultType(); });
+ assert(ResultTy && "could not infer type for the given VPValue");
+ CachedTypes[V] = ResultTy;
+ return ResultTy;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
new file mode 100644
index 000000000000..7276641551ae
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
@@ -0,0 +1,61 @@
+//===- VPlanAnalysis.h - Various Analyses working on VPlan ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANANALYSIS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANANALYSIS_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class LLVMContext;
+class VPValue;
+class VPBlendRecipe;
+class VPInstruction;
+class VPWidenRecipe;
+class VPWidenCallRecipe;
+class VPWidenIntOrFpInductionRecipe;
+class VPWidenMemoryInstructionRecipe;
+struct VPWidenSelectRecipe;
+class VPReplicateRecipe;
+class Type;
+
+/// An analysis for type-inference for VPValues.
+/// It infers the scalar type for a given VPValue by bottom-up traversing
+/// through defining recipes until root nodes with known types are reached (e.g.
+/// live-ins or load recipes). The types are then propagated top down through
+/// operations.
+/// Note that the analysis caches the inferred types. A new analysis object must
+/// be constructed once a VPlan has been modified in a way that invalidates any
+/// of the previously inferred types.
+class VPTypeAnalysis {
+ DenseMap<const VPValue *, Type *> CachedTypes;
+ LLVMContext &Ctx;
+
+ Type *inferScalarTypeForRecipe(const VPBlendRecipe *R);
+ Type *inferScalarTypeForRecipe(const VPInstruction *R);
+ Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R);
+ Type *inferScalarTypeForRecipe(const VPWidenRecipe *R);
+ Type *inferScalarTypeForRecipe(const VPWidenIntOrFpInductionRecipe *R);
+ Type *inferScalarTypeForRecipe(const VPWidenMemoryInstructionRecipe *R);
+ Type *inferScalarTypeForRecipe(const VPWidenSelectRecipe *R);
+ Type *inferScalarTypeForRecipe(const VPReplicateRecipe *R);
+
+public:
+ VPTypeAnalysis(LLVMContext &Ctx) : Ctx(Ctx) {}
+
+ /// Infer the type of \p V. Returns the scalar type of \p V.
+ Type *inferScalarType(const VPValue *V);
+
+ /// Return the LLVMContext used by the analysis.
+ LLVMContext &getContext() { return Ctx; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANANALYSIS_H
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index f6e3a2a16db8..f950d4740e41 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -61,6 +61,7 @@ private:
// Utility functions.
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
+ void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
void fixPhiNodes();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
#ifndef NDEBUG
@@ -81,14 +82,43 @@ public:
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
// must have no predecessors.
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
- SmallVector<VPBlockBase *, 8> VPBBPreds;
+ auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
+ auto *SinglePred = BB->getSinglePredecessor();
+ Loop *LoopForBB = LI->getLoopFor(BB);
+ if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
+ return nullptr;
+ // The input IR must be in loop-simplify form, ensuring a single predecessor
+ // for exit blocks.
+ assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
+ "SinglePred must be the only loop latch");
+ return SinglePred;
+ };
+ if (auto *LatchBB = GetLatchOfExit(BB)) {
+ auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
+ assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
+ "successor must already be set for PredRegion; it must have VPBB "
+ "as single successor");
+ VPBB->setPredecessors({PredRegion});
+ return;
+ }
// Collect VPBB predecessors.
+ SmallVector<VPBlockBase *, 2> VPBBPreds;
for (BasicBlock *Pred : predecessors(BB))
VPBBPreds.push_back(getOrCreateVPBB(Pred));
-
VPBB->setPredecessors(VPBBPreds);
}
+static bool isHeaderBB(BasicBlock *BB, Loop *L) {
+ return L && BB == L->getHeader();
+}
+
+void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
+ BasicBlock *BB) {
+ // BB is a loop header block. Connect the region to the loop preheader.
+ Loop *LoopOfBB = LI->getLoopFor(BB);
+ Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
+}
+
// Add operands to VPInstructions representing phi nodes from the input IR.
void PlainCFGBuilder::fixPhiNodes() {
for (auto *Phi : PhisToFix) {
@@ -100,38 +130,85 @@ void PlainCFGBuilder::fixPhiNodes() {
assert(VPPhi->getNumOperands() == 0 &&
"Expected VPInstruction with no operands.");
+ Loop *L = LI->getLoopFor(Phi->getParent());
+ if (isHeaderBB(Phi->getParent(), L)) {
+ // For header phis, make sure the incoming value from the loop
+ // predecessor is the first operand of the recipe.
+ assert(Phi->getNumOperands() == 2);
+ BasicBlock *LoopPred = L->getLoopPredecessor();
+ VPPhi->addIncoming(
+ getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopPred)),
+ BB2VPBB[LoopPred]);
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ VPPhi->addIncoming(
+ getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopLatch)),
+ BB2VPBB[LoopLatch]);
+ continue;
+ }
+
for (unsigned I = 0; I != Phi->getNumOperands(); ++I)
VPPhi->addIncoming(getOrCreateVPOperand(Phi->getIncomingValue(I)),
BB2VPBB[Phi->getIncomingBlock(I)]);
}
}
+static bool isHeaderVPBB(VPBasicBlock *VPBB) {
+ return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
+}
+
+/// Return true of \p L loop is contained within \p OuterLoop.
+static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
+ if (L->getLoopDepth() < OuterLoop->getLoopDepth())
+ return false;
+ const Loop *P = L;
+ while (P) {
+ if (P == OuterLoop)
+ return true;
+ P = P->getParentLoop();
+ }
+ return false;
+}
+
// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
// corresponding to the containing loop or retrieve an existing one if it was
// already created. If no region exists yet for the loop containing \p BB, a new
// one is created.
VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
- auto BlockIt = BB2VPBB.find(BB);
- if (BlockIt != BB2VPBB.end())
+ if (auto *VPBB = BB2VPBB.lookup(BB)) {
// Retrieve existing VPBB.
- return BlockIt->second;
-
- // Get or create a region for the loop containing BB.
- Loop *CurrentLoop = LI->getLoopFor(BB);
- VPRegionBlock *ParentR = nullptr;
- if (CurrentLoop) {
- auto Iter = Loop2Region.insert({CurrentLoop, nullptr});
- if (Iter.second)
- Iter.first->second = new VPRegionBlock(
- CurrentLoop->getHeader()->getName().str(), false /*isReplicator*/);
- ParentR = Iter.first->second;
+ return VPBB;
}
// Create new VPBB.
- LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n");
- VPBasicBlock *VPBB = new VPBasicBlock(BB->getName());
+ StringRef Name = isHeaderBB(BB, TheLoop) ? "vector.body" : BB->getName();
+ LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
+ VPBasicBlock *VPBB = new VPBasicBlock(Name);
BB2VPBB[BB] = VPBB;
- VPBB->setParent(ParentR);
+
+ // Get or create a region for the loop containing BB.
+ Loop *LoopOfBB = LI->getLoopFor(BB);
+ if (!LoopOfBB || !doesContainLoop(LoopOfBB, TheLoop))
+ return VPBB;
+
+ auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
+ if (!isHeaderBB(BB, LoopOfBB)) {
+ assert(RegionOfVPBB &&
+ "Region should have been created by visiting header earlier");
+ VPBB->setParent(RegionOfVPBB);
+ return VPBB;
+ }
+
+ assert(!RegionOfVPBB &&
+ "First visit of a header basic block expects to register its region.");
+ // Handle a header - take care of its Region.
+ if (LoopOfBB == TheLoop) {
+ RegionOfVPBB = Plan.getVectorLoopRegion();
+ } else {
+ RegionOfVPBB = new VPRegionBlock(Name.str(), false /*isReplicator*/);
+ RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
+ }
+ RegionOfVPBB->setEntry(VPBB);
+ Loop2Region[LoopOfBB] = RegionOfVPBB;
return VPBB;
}
@@ -254,6 +331,25 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
// Main interface to build the plain CFG.
void PlainCFGBuilder::buildPlainCFG() {
+ // 0. Reuse the top-level region, vector-preheader and exit VPBBs from the
+ // skeleton. These were created directly rather than via getOrCreateVPBB(),
+ // revisit them now to update BB2VPBB. Note that header/entry and
+ // latch/exiting VPBB's of top-level region have yet to be created.
+ VPRegionBlock *TheRegion = Plan.getVectorLoopRegion();
+ BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
+ assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
+ "Unexpected loop preheader");
+ auto *VectorPreheaderVPBB =
+ cast<VPBasicBlock>(TheRegion->getSinglePredecessor());
+ // ThePreheaderBB conceptually corresponds to both Plan.getPreheader() (which
+ // wraps the original preheader BB) and Plan.getEntry() (which represents the
+ // new vector preheader); here we're interested in setting BB2VPBB to the
+ // latter.
+ BB2VPBB[ThePreheaderBB] = VectorPreheaderVPBB;
+ BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock();
+ assert(LoopExitBB && "Loops with multiple exits are not supported.");
+ BB2VPBB[LoopExitBB] = cast<VPBasicBlock>(TheRegion->getSingleSuccessor());
+
// 1. Scan the body of the loop in a topological order to visit each basic
// block after having visited its predecessor basic blocks. Create a VPBB for
// each BB and link it to its successor and predecessor VPBBs. Note that
@@ -263,21 +359,11 @@ void PlainCFGBuilder::buildPlainCFG() {
// Loop PH needs to be explicitly visited since it's not taken into account by
// LoopBlocksDFS.
- BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
- assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
- "Unexpected loop preheader");
- VPBasicBlock *ThePreheaderVPBB = Plan.getEntry();
- BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB;
- ThePreheaderVPBB->setName("vector.ph");
for (auto &I : *ThePreheaderBB) {
if (I.getType()->isVoidTy())
continue;
IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I);
}
- // Create empty VPBB for Loop H so that we can link PH->H.
- VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader());
- HeaderVPBB->setName("vector.body");
- ThePreheaderVPBB->setOneSuccessor(HeaderVPBB);
LoopBlocksRPO RPO(TheLoop);
RPO.perform(LI);
@@ -286,88 +372,55 @@ void PlainCFGBuilder::buildPlainCFG() {
// Create or retrieve the VPBasicBlock for this BB and create its
// VPInstructions.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
+ VPRegionBlock *Region = VPBB->getParent();
createVPInstructionsForVPBB(VPBB, BB);
+ Loop *LoopForBB = LI->getLoopFor(BB);
+ // Set VPBB predecessors in the same order as they are in the incoming BB.
+ if (!isHeaderBB(BB, LoopForBB)) {
+ setVPBBPredsFromBB(VPBB, BB);
+ } else {
+ // BB is a loop header, set the predecessor for the region, except for the
+ // top region, whose predecessor was set when creating VPlan's skeleton.
+ assert(isHeaderVPBB(VPBB) && "isHeaderBB and isHeaderVPBB disagree");
+ if (TheRegion != Region)
+ setRegionPredsFromBB(Region, BB);
+ }
// Set VPBB successors. We create empty VPBBs for successors if they don't
// exist already. Recipes will be created when the successor is visited
// during the RPO traversal.
- Instruction *TI = BB->getTerminator();
- assert(TI && "Terminator expected.");
- unsigned NumSuccs = TI->getNumSuccessors();
-
+ auto *BI = cast<BranchInst>(BB->getTerminator());
+ unsigned NumSuccs = succ_size(BB);
if (NumSuccs == 1) {
- VPBasicBlock *SuccVPBB = getOrCreateVPBB(TI->getSuccessor(0));
- assert(SuccVPBB && "VPBB Successor not found.");
- VPBB->setOneSuccessor(SuccVPBB);
- } else if (NumSuccs == 2) {
- VPBasicBlock *SuccVPBB0 = getOrCreateVPBB(TI->getSuccessor(0));
- assert(SuccVPBB0 && "Successor 0 not found.");
- VPBasicBlock *SuccVPBB1 = getOrCreateVPBB(TI->getSuccessor(1));
- assert(SuccVPBB1 && "Successor 1 not found.");
-
- // Get VPBB's condition bit.
- assert(isa<BranchInst>(TI) && "Unsupported terminator!");
- // Look up the branch condition to get the corresponding VPValue
- // representing the condition bit in VPlan (which may be in another VPBB).
- assert(IRDef2VPValue.count(cast<BranchInst>(TI)->getCondition()) &&
- "Missing condition bit in IRDef2VPValue!");
-
- // Link successors.
- VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1);
- } else
- llvm_unreachable("Number of successors not supported.");
-
- // Set VPBB predecessors in the same order as they are in the incoming BB.
- setVPBBPredsFromBB(VPBB, BB);
+ auto *Successor = getOrCreateVPBB(BB->getSingleSuccessor());
+ VPBB->setOneSuccessor(isHeaderVPBB(Successor)
+ ? Successor->getParent()
+ : static_cast<VPBlockBase *>(Successor));
+ continue;
+ }
+ assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
+ "block must have conditional branch with 2 successors");
+ // Look up the branch condition to get the corresponding VPValue
+ // representing the condition bit in VPlan (which may be in another VPBB).
+ assert(IRDef2VPValue.contains(BI->getCondition()) &&
+ "Missing condition bit in IRDef2VPValue!");
+ VPBasicBlock *Successor0 = getOrCreateVPBB(BI->getSuccessor(0));
+ VPBasicBlock *Successor1 = getOrCreateVPBB(BI->getSuccessor(1));
+ if (!LoopForBB || BB != LoopForBB->getLoopLatch()) {
+ VPBB->setTwoSuccessors(Successor0, Successor1);
+ continue;
+ }
+ // For a latch we need to set the successor of the region rather than that
+ // of VPBB and it should be set to the exit, i.e., non-header successor,
+ // except for the top region, whose successor was set when creating VPlan's
+ // skeleton.
+ if (TheRegion != Region)
+ Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
+ : Successor0);
+ Region->setExiting(VPBB);
}
- // 2. Process outermost loop exit. We created an empty VPBB for the loop
- // single exit BB during the RPO traversal of the loop body but Instructions
- // weren't visited because it's not part of the the loop.
- BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock();
- assert(LoopExitBB && "Loops with multiple exits are not supported.");
- VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB];
- // Loop exit was already set as successor of the loop exiting BB.
- // We only set its predecessor VPBB now.
- setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB);
-
- // 3. Fix up region blocks for loops. For each loop,
- // * use the header block as entry to the corresponding region,
- // * use the latch block as exit of the corresponding region,
- // * set the region as successor of the loop pre-header, and
- // * set the exit block as successor to the region.
- SmallVector<Loop *> LoopWorkList;
- LoopWorkList.push_back(TheLoop);
- while (!LoopWorkList.empty()) {
- Loop *L = LoopWorkList.pop_back_val();
- BasicBlock *Header = L->getHeader();
- BasicBlock *Exiting = L->getLoopLatch();
- assert(Exiting == L->getExitingBlock() &&
- "Latch must be the only exiting block");
- VPRegionBlock *Region = Loop2Region[L];
- VPBasicBlock *HeaderVPBB = getOrCreateVPBB(Header);
- VPBasicBlock *ExitingVPBB = getOrCreateVPBB(Exiting);
-
- // Disconnect backedge and pre-header from header.
- VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(L->getLoopPreheader());
- VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
- VPBlockUtils::disconnectBlocks(ExitingVPBB, HeaderVPBB);
-
- Region->setParent(PreheaderVPBB->getParent());
- Region->setEntry(HeaderVPBB);
- VPBlockUtils::connectBlocks(PreheaderVPBB, Region);
-
- // Disconnect exit block from exiting (=latch) block, set exiting block and
- // connect region to exit block.
- VPBasicBlock *ExitVPBB = getOrCreateVPBB(L->getExitBlock());
- VPBlockUtils::disconnectBlocks(ExitingVPBB, ExitVPBB);
- Region->setExiting(ExitingVPBB);
- VPBlockUtils::connectBlocks(Region, ExitVPBB);
-
- // Queue sub-loops for processing.
- LoopWorkList.append(L->begin(), L->end());
- }
- // 4. The whole CFG has been built at this point so all the input Values must
+ // 2. The whole CFG has been built at this point so all the input Values must
// have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding
// VPlan operands.
fixPhiNodes();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 26c309eed800..02e400d590be 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "VPlan.h"
+#include "VPlanAnalysis.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
@@ -43,6 +44,8 @@ extern cl::opt<bool> EnableVPlanNativePath;
bool VPRecipeBase::mayWriteToMemory() const {
switch (getVPDefID()) {
+ case VPInterleaveSC:
+ return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
case VPWidenMemoryInstructionSC: {
return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
}
@@ -114,6 +117,16 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPDerivedIVSC:
case VPPredInstPHISC:
return false;
+ case VPInstructionSC:
+ switch (cast<VPInstruction>(this)->getOpcode()) {
+ case Instruction::ICmp:
+ case VPInstruction::Not:
+ case VPInstruction::CalculateTripCountMinusVF:
+ case VPInstruction::CanonicalIVIncrementForPart:
+ return false;
+ default:
+ return true;
+ }
case VPWidenCallSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
->mayHaveSideEffects();
@@ -135,6 +148,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
"underlying instruction has side-effects");
return false;
}
+ case VPInterleaveSC:
+ return mayWriteToMemory();
case VPWidenMemoryInstructionSC:
assert(cast<VPWidenMemoryInstructionRecipe>(this)
->getIngredient()
@@ -156,8 +171,13 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
VPValue *ExitValue = getOperand(0);
if (vputils::isUniformAfterVectorization(ExitValue))
Lane = VPLane::getFirstLane();
+ VPBasicBlock *MiddleVPBB =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
+ assert(MiddleVPBB->getNumSuccessors() == 0 &&
+ "the middle block must not have any successors");
+ BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
- State.Builder.GetInsertBlock());
+ MiddleBB);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -216,15 +236,55 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
insertBefore(BB, I);
}
+FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
+ assert(OpType == OperationType::FPMathOp &&
+ "recipe doesn't have fast math flags");
+ FastMathFlags Res;
+ Res.setAllowReassoc(FMFs.AllowReassoc);
+ Res.setNoNaNs(FMFs.NoNaNs);
+ Res.setNoInfs(FMFs.NoInfs);
+ Res.setNoSignedZeros(FMFs.NoSignedZeros);
+ Res.setAllowReciprocal(FMFs.AllowReciprocal);
+ Res.setAllowContract(FMFs.AllowContract);
+ Res.setApproxFunc(FMFs.ApproxFunc);
+ return Res;
+}
+
+VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
+ VPValue *A, VPValue *B, DebugLoc DL,
+ const Twine &Name)
+ : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
+ Pred, DL),
+ VPValue(this), Opcode(Opcode), Name(Name.str()) {
+ assert(Opcode == Instruction::ICmp &&
+ "only ICmp predicates supported at the moment");
+}
+
+VPInstruction::VPInstruction(unsigned Opcode,
+ std::initializer_list<VPValue *> Operands,
+ FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
+ : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
+ VPValue(this), Opcode(Opcode), Name(Name.str()) {
+ // Make sure the VPInstruction is a floating-point operation.
+ assert(isFPMathOp() && "this op can't take fast-math flags");
+}
+
Value *VPInstruction::generateInstruction(VPTransformState &State,
unsigned Part) {
IRBuilderBase &Builder = State.Builder;
- Builder.SetCurrentDebugLocation(DL);
+ Builder.SetCurrentDebugLocation(getDebugLoc());
if (Instruction::isBinaryOp(getOpcode())) {
+ if (Part != 0 && vputils::onlyFirstPartUsed(this))
+ return State.get(this, 0);
+
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
- return Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
+ auto *Res =
+ Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
+ if (auto *I = dyn_cast<Instruction>(Res))
+ setFlags(I);
+ return Res;
}
switch (getOpcode()) {
@@ -232,10 +292,10 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Value *A = State.get(getOperand(0), Part);
return Builder.CreateNot(A, Name);
}
- case VPInstruction::ICmpULE: {
- Value *IV = State.get(getOperand(0), Part);
- Value *TC = State.get(getOperand(1), Part);
- return Builder.CreateICmpULE(IV, TC, Name);
+ case Instruction::ICmp: {
+ Value *A = State.get(getOperand(0), Part);
+ Value *B = State.get(getOperand(1), Part);
+ return Builder.CreateCmp(getPredicate(), A, B, Name);
}
case Instruction::Select: {
Value *Cond = State.get(getOperand(0), Part);
@@ -285,23 +345,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
return Builder.CreateSelect(Cmp, Sub, Zero);
}
- case VPInstruction::CanonicalIVIncrement:
- case VPInstruction::CanonicalIVIncrementNUW: {
- if (Part == 0) {
- bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
- auto *Phi = State.get(getOperand(0), 0);
- // The loop step is equal to the vectorization factor (num of SIMD
- // elements) times the unroll factor (num of SIMD instructions).
- Value *Step =
- createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
- return Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
- }
- return State.get(this, 0);
- }
-
- case VPInstruction::CanonicalIVIncrementForPart:
- case VPInstruction::CanonicalIVIncrementForPartNUW: {
- bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
+ case VPInstruction::CanonicalIVIncrementForPart: {
auto *IV = State.get(getOperand(0), VPIteration(0, 0));
if (Part == 0)
return IV;
@@ -309,7 +353,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
// The canonical IV is incremented by the vectorization factor (num of SIMD
// elements) times the unroll part.
Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
- return Builder.CreateAdd(IV, Step, Name, IsNUW, false);
+ return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
+ hasNoSignedWrap());
}
case VPInstruction::BranchOnCond: {
if (Part != 0)
@@ -361,10 +406,25 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
}
}
+#if !defined(NDEBUG)
+bool VPInstruction::isFPMathOp() const {
+ // Inspired by FPMathOperator::classof. Notable differences are that we don't
+ // support Call, PHI and Select opcodes here yet.
+ return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
+ Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
+ Opcode == Instruction::FCmp || Opcode == Instruction::Select;
+}
+#endif
+
void VPInstruction::execute(VPTransformState &State) {
assert(!State.Instance && "VPInstruction executing an Instance");
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
- State.Builder.setFastMathFlags(FMF);
+ assert((hasFastMathFlags() == isFPMathOp() ||
+ getOpcode() == Instruction::Select) &&
+ "Recipe not a FPMathOp but has fast-math flags?");
+ if (hasFastMathFlags())
+ State.Builder.setFastMathFlags(getFastMathFlags());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *GeneratedValue = generateInstruction(State, Part);
if (!hasResult())
@@ -393,9 +453,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::Not:
O << "not";
break;
- case VPInstruction::ICmpULE:
- O << "icmp ule";
- break;
case VPInstruction::SLPLoad:
O << "combined load";
break;
@@ -408,12 +465,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::FirstOrderRecurrenceSplice:
O << "first-order splice";
break;
- case VPInstruction::CanonicalIVIncrement:
- O << "VF * UF + ";
- break;
- case VPInstruction::CanonicalIVIncrementNUW:
- O << "VF * UF +(nuw) ";
- break;
case VPInstruction::BranchOnCond:
O << "branch-on-cond";
break;
@@ -421,49 +472,35 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
O << "TC > VF ? TC - VF : 0";
break;
case VPInstruction::CanonicalIVIncrementForPart:
- O << "VF * Part + ";
- break;
- case VPInstruction::CanonicalIVIncrementForPartNUW:
- O << "VF * Part +(nuw) ";
+ O << "VF * Part +";
break;
case VPInstruction::BranchOnCount:
- O << "branch-on-count ";
+ O << "branch-on-count";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
- O << FMF;
-
- for (const VPValue *Operand : operands()) {
- O << " ";
- Operand->printAsOperand(O, SlotTracker);
- }
+ printFlags(O);
+ printOperands(O, SlotTracker);
- if (DL) {
+ if (auto DL = getDebugLoc()) {
O << ", !dbg ";
DL.print(O);
}
}
#endif
-void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
- // Make sure the VPInstruction is a floating-point operation.
- assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
- Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
- Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
- Opcode == Instruction::FCmp) &&
- "this op can't take fast-math flags");
- FMF = FMFNew;
-}
-
void VPWidenCallRecipe::execute(VPTransformState &State) {
assert(State.VF.isVector() && "not widening");
auto &CI = *cast<CallInst>(getUnderlyingInstr());
assert(!isa<DbgInfoIntrinsic>(CI) &&
"DbgInfoIntrinsic should have been dropped during VPlan construction");
- State.setDebugLocFromInst(&CI);
+ State.setDebugLocFrom(CI.getDebugLoc());
+ FunctionType *VFTy = nullptr;
+ if (Variant)
+ VFTy = Variant->getFunctionType();
for (unsigned Part = 0; Part < State.UF; ++Part) {
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
@@ -475,12 +512,15 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
for (const auto &I : enumerate(operands())) {
// Some intrinsics have a scalar argument - don't replace it with a
// vector.
+ // Some vectorized function variants may also take a scalar argument,
+ // e.g. linear parameters for pointers.
Value *Arg;
- if (VectorIntrinsicID == Intrinsic::not_intrinsic ||
- !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
- Arg = State.get(I.value(), Part);
- else
+ if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) ||
+ (VectorIntrinsicID != Intrinsic::not_intrinsic &&
+ isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())))
Arg = State.get(I.value(), VPIteration(0, 0));
+ else
+ Arg = State.get(I.value(), Part);
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
TysForDecl.push_back(Arg->getType());
Args.push_back(Arg);
@@ -553,8 +593,7 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPWidenSelectRecipe::execute(VPTransformState &State) {
- auto &I = *cast<SelectInst>(getUnderlyingInstr());
- State.setDebugLocFromInst(&I);
+ State.setDebugLocFrom(getDebugLoc());
// The condition can be loop invariant but still defined inside the
// loop. This means that we can't just use the original 'cond' value.
@@ -569,13 +608,31 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
Value *Op1 = State.get(getOperand(2), Part);
Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
State.set(this, Sel, Part);
- State.addMetadata(Sel, &I);
+ State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}
}
+VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
+ const FastMathFlags &FMF) {
+ AllowReassoc = FMF.allowReassoc();
+ NoNaNs = FMF.noNaNs();
+ NoInfs = FMF.noInfs();
+ NoSignedZeros = FMF.noSignedZeros();
+ AllowReciprocal = FMF.allowReciprocal();
+ AllowContract = FMF.allowContract();
+ ApproxFunc = FMF.approxFunc();
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
switch (OpType) {
+ case OperationType::Cmp:
+ O << " " << CmpInst::getPredicateName(getPredicate());
+ break;
+ case OperationType::DisjointOp:
+ if (DisjointFlags.IsDisjoint)
+ O << " disjoint";
+ break;
case OperationType::PossiblyExactOp:
if (ExactFlags.IsExact)
O << " exact";
@@ -593,17 +650,22 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
if (GEPFlags.IsInBounds)
O << " inbounds";
break;
+ case OperationType::NonNegOp:
+ if (NonNegFlags.NonNeg)
+ O << " nneg";
+ break;
case OperationType::Other:
break;
}
- O << " ";
+ if (getNumOperands() > 0)
+ O << " ";
}
#endif
void VPWidenRecipe::execute(VPTransformState &State) {
- auto &I = *cast<Instruction>(getUnderlyingValue());
+ State.setDebugLocFrom(getDebugLoc());
auto &Builder = State.Builder;
- switch (I.getOpcode()) {
+ switch (Opcode) {
case Instruction::Call:
case Instruction::Br:
case Instruction::PHI:
@@ -630,28 +692,24 @@ void VPWidenRecipe::execute(VPTransformState &State) {
case Instruction::Or:
case Instruction::Xor: {
// Just widen unops and binops.
- State.setDebugLocFromInst(&I);
-
for (unsigned Part = 0; Part < State.UF; ++Part) {
SmallVector<Value *, 2> Ops;
for (VPValue *VPOp : operands())
Ops.push_back(State.get(VPOp, Part));
- Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
+ Value *V = Builder.CreateNAryOp(Opcode, Ops);
if (auto *VecOp = dyn_cast<Instruction>(V))
setFlags(VecOp);
// Use this vector value for all users of the original instruction.
State.set(this, V, Part);
- State.addMetadata(V, &I);
+ State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}
break;
}
case Instruction::Freeze: {
- State.setDebugLocFromInst(&I);
-
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Op = State.get(getOperand(0), Part);
@@ -663,9 +721,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
case Instruction::ICmp:
case Instruction::FCmp: {
// Widen compares. Generate vector compares.
- bool FCmp = (I.getOpcode() == Instruction::FCmp);
- auto *Cmp = cast<CmpInst>(&I);
- State.setDebugLocFromInst(Cmp);
+ bool FCmp = Opcode == Instruction::FCmp;
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
@@ -673,51 +729,64 @@ void VPWidenRecipe::execute(VPTransformState &State) {
if (FCmp) {
// Propagate fast math flags.
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- Builder.setFastMathFlags(Cmp->getFastMathFlags());
- C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
+ Builder.setFastMathFlags(I->getFastMathFlags());
+ C = Builder.CreateFCmp(getPredicate(), A, B);
} else {
- C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ C = Builder.CreateICmp(getPredicate(), A, B);
}
State.set(this, C, Part);
- State.addMetadata(C, &I);
+ State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}
break;
}
default:
// This instruction is not vectorized by simple widening.
- LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
+ LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
+ << Instruction::getOpcodeName(Opcode));
llvm_unreachable("Unhandled instruction!");
} // end of switch.
+
+#if !defined(NDEBUG)
+ // Verify that VPlan type inference results agree with the type of the
+ // generated values.
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
+ State.VF) == State.get(this, Part)->getType() &&
+ "inferred type and type from generated instructions do not match");
+ }
+#endif
}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
- const Instruction *UI = getUnderlyingInstr();
- O << " = " << UI->getOpcodeName();
+ O << " = " << Instruction::getOpcodeName(Opcode);
printFlags(O);
- if (auto *Cmp = dyn_cast<CmpInst>(UI))
- O << Cmp->getPredicate() << " ";
printOperands(O, SlotTracker);
}
#endif
void VPWidenCastRecipe::execute(VPTransformState &State) {
- auto *I = cast_or_null<Instruction>(getUnderlyingValue());
- if (I)
- State.setDebugLocFromInst(I);
+ State.setDebugLocFrom(getDebugLoc());
auto &Builder = State.Builder;
/// Vectorize casts.
assert(State.VF.isVector() && "Not vectorizing?");
Type *DestTy = VectorType::get(getResultType(), State.VF);
-
+ VPValue *Op = getOperand(0);
for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *A = State.get(getOperand(0), Part);
+ if (Part > 0 && Op->isLiveIn()) {
+ // FIXME: Remove once explicit unrolling is implemented using VPlan.
+ State.set(this, State.get(this, 0), Part);
+ continue;
+ }
+ Value *A = State.get(Op, Part);
Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
State.set(this, Cast, Part);
- State.addMetadata(Cast, I);
+ State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
}
}
@@ -727,10 +796,182 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent << "WIDEN-CAST ";
printAsOperand(O, SlotTracker);
O << " = " << Instruction::getOpcodeName(Opcode) << " ";
+ printFlags(O);
printOperands(O, SlotTracker);
O << " to " << *getResultType();
}
+#endif
+
+/// This function adds
+/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
+/// to each vector element of Val. The sequence starts at StartIndex.
+/// \p Opcode is relevant for FP induction variable.
+static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
+ Instruction::BinaryOps BinOp, ElementCount VF,
+ IRBuilderBase &Builder) {
+ assert(VF.isVector() && "only vector VFs are supported");
+
+ // Create and check the types.
+ auto *ValVTy = cast<VectorType>(Val->getType());
+ ElementCount VLen = ValVTy->getElementCount();
+
+ Type *STy = Val->getType()->getScalarType();
+ assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
+ "Induction Step must be an integer or FP");
+ assert(Step->getType() == STy && "Step has wrong type");
+
+ SmallVector<Constant *, 8> Indices;
+
+ // Create a vector of consecutive numbers from zero to VF.
+ VectorType *InitVecValVTy = ValVTy;
+ if (STy->isFloatingPointTy()) {
+ Type *InitVecValSTy =
+ IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
+ InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
+ }
+ Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
+
+ // Splat the StartIdx
+ Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
+
+ if (STy->isIntegerTy()) {
+ InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
+ Step = Builder.CreateVectorSplat(VLen, Step);
+ assert(Step->getType() == Val->getType() && "Invalid step vec");
+ // FIXME: The newly created binary instructions should contain nsw/nuw
+ // flags, which can be found from the original scalar operations.
+ Step = Builder.CreateMul(InitVec, Step);
+ return Builder.CreateAdd(Val, Step, "induction");
+ }
+
+ // Floating point induction.
+ assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
+ "Binary Opcode should be specified for FP induction");
+ InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
+ InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
+
+ Step = Builder.CreateVectorSplat(VLen, Step);
+ Value *MulOp = Builder.CreateFMul(InitVec, Step);
+ return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
+}
+
+/// A helper function that returns an integer or floating-point constant with
+/// value C.
+static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
+ return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
+ : ConstantFP::get(Ty, C);
+}
+
+static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
+ ElementCount VF) {
+ assert(FTy->isFloatingPointTy() && "Expected floating point type!");
+ Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
+ Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
+ return B.CreateUIToFP(RuntimeVF, FTy);
+}
+
+void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
+ assert(!State.Instance && "Int or FP induction being replicated.");
+
+ Value *Start = getStartValue()->getLiveInIRValue();
+ const InductionDescriptor &ID = getInductionDescriptor();
+ TruncInst *Trunc = getTruncInst();
+ IRBuilderBase &Builder = State.Builder;
+ assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
+ assert(State.VF.isVector() && "must have vector VF");
+
+ // The value from the original loop to which we are mapping the new induction
+ // variable.
+ Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
+
+ // Fast-math-flags propagate from the original induction instruction.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
+ Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
+
+ // Now do the actual transformations, and start with fetching the step value.
+ Value *Step = State.get(getStepValue(), VPIteration(0, 0));
+
+ assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
+ "Expected either an induction phi-node or a truncate of it!");
+
+ // Construct the initial value of the vector IV in the vector loop preheader
+ auto CurrIP = Builder.saveIP();
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ Builder.SetInsertPoint(VectorPH->getTerminator());
+ if (isa<TruncInst>(EntryVal)) {
+ assert(Start->getType()->isIntegerTy() &&
+ "Truncation requires an integer type");
+ auto *TruncType = cast<IntegerType>(EntryVal->getType());
+ Step = Builder.CreateTrunc(Step, TruncType);
+ Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
+ }
+
+ Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
+ Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
+ Value *SteppedStart = getStepVector(
+ SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
+
+ // We create vector phi nodes for both integer and floating-point induction
+ // variables. Here, we determine the kind of arithmetic we will perform.
+ Instruction::BinaryOps AddOp;
+ Instruction::BinaryOps MulOp;
+ if (Step->getType()->isIntegerTy()) {
+ AddOp = Instruction::Add;
+ MulOp = Instruction::Mul;
+ } else {
+ AddOp = ID.getInductionOpcode();
+ MulOp = Instruction::FMul;
+ }
+
+ // Multiply the vectorization factor by the step using integer or
+ // floating-point arithmetic as appropriate.
+ Type *StepType = Step->getType();
+ Value *RuntimeVF;
+ if (Step->getType()->isFloatingPointTy())
+ RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
+ else
+ RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
+ Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
+
+ // Create a vector splat to use in the induction update.
+ //
+ // FIXME: If the step is non-constant, we create the vector splat with
+ // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
+ // handle a constant vector splat.
+ Value *SplatVF = isa<Constant>(Mul)
+ ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
+ : Builder.CreateVectorSplat(State.VF, Mul);
+ Builder.restoreIP(CurrIP);
+
+ // We may need to add the step a number of times, depending on the unroll
+ // factor. The last of those goes into the PHI.
+ PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
+ VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
+ VecInd->setDebugLoc(EntryVal->getDebugLoc());
+ Instruction *LastInduction = VecInd;
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ State.set(this, LastInduction, Part);
+
+ if (isa<TruncInst>(EntryVal))
+ State.addMetadata(LastInduction, EntryVal);
+ LastInduction = cast<Instruction>(
+ Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
+ LastInduction->setDebugLoc(EntryVal->getDebugLoc());
+ }
+
+ LastInduction->setName("vec.ind.next");
+ VecInd->addIncoming(SteppedStart, VectorPH);
+ // Add induction update using an incorrect block temporarily. The phi node
+ // will be fixed after VPlan execution. Note that at this point the latch
+ // block cannot be used, as it does not exist yet.
+ // TODO: Model increment value in VPlan, by turning the recipe into a
+ // multi-def and a subclass of VPHeaderPHIRecipe.
+ VecInd->addIncoming(LastInduction, VectorPH);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-INDUCTION";
@@ -770,17 +1011,112 @@ void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
O << " * ";
getStepValue()->printAsOperand(O, SlotTracker);
- if (IndDesc.getStep()->getType() != ResultTy)
- O << " (truncated to " << *ResultTy << ")";
+ if (TruncResultTy)
+ O << " (truncated to " << *TruncResultTy << ")";
}
#endif
+void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
+ // Fast-math-flags propagate from the original induction instruction.
+ IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
+ if (hasFastMathFlags())
+ State.Builder.setFastMathFlags(getFastMathFlags());
+
+ /// Compute scalar induction steps. \p ScalarIV is the scalar induction
+ /// variable on which to base the steps, \p Step is the size of the step.
+
+ Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
+ Value *Step = State.get(getStepValue(), VPIteration(0, 0));
+ IRBuilderBase &Builder = State.Builder;
+
+ // Ensure step has the same type as that of scalar IV.
+ Type *BaseIVTy = BaseIV->getType()->getScalarType();
+ if (BaseIVTy != Step->getType()) {
+ // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
+ // avoid separate truncate here.
+ assert(Step->getType()->isIntegerTy() &&
+ "Truncation requires an integer step");
+ Step = State.Builder.CreateTrunc(Step, BaseIVTy);
+ }
+
+ // We build scalar steps for both integer and floating-point induction
+ // variables. Here, we determine the kind of arithmetic we will perform.
+ Instruction::BinaryOps AddOp;
+ Instruction::BinaryOps MulOp;
+ if (BaseIVTy->isIntegerTy()) {
+ AddOp = Instruction::Add;
+ MulOp = Instruction::Mul;
+ } else {
+ AddOp = InductionOpcode;
+ MulOp = Instruction::FMul;
+ }
+
+ // Determine the number of scalars we need to generate for each unroll
+ // iteration.
+ bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
+ // Compute the scalar steps and save the results in State.
+ Type *IntStepTy =
+ IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
+ Type *VecIVTy = nullptr;
+ Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
+ if (!FirstLaneOnly && State.VF.isScalable()) {
+ VecIVTy = VectorType::get(BaseIVTy, State.VF);
+ UnitStepVec =
+ Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
+ SplatStep = Builder.CreateVectorSplat(State.VF, Step);
+ SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
+ }
+
+ unsigned StartPart = 0;
+ unsigned EndPart = State.UF;
+ unsigned StartLane = 0;
+ unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
+ if (State.Instance) {
+ StartPart = State.Instance->Part;
+ EndPart = StartPart + 1;
+ StartLane = State.Instance->Lane.getKnownLane();
+ EndLane = StartLane + 1;
+ }
+ for (unsigned Part = StartPart; Part < EndPart; ++Part) {
+ Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
+
+ if (!FirstLaneOnly && State.VF.isScalable()) {
+ auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
+ auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
+ if (BaseIVTy->isFloatingPointTy())
+ InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
+ auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
+ auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
+ State.set(this, Add, Part);
+ // It's useful to record the lane values too for the known minimum number
+ // of elements so we do those below. This improves the code quality when
+ // trying to extract the first element, for example.
+ }
+
+ if (BaseIVTy->isFloatingPointTy())
+ StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
+
+ for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
+ Value *StartIdx = Builder.CreateBinOp(
+ AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
+ // The step returned by `createStepForVF` is a runtime-evaluated value
+ // when VF is scalable. Otherwise, it should be folded into a Constant.
+ assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
+ "Expected StartIdx to be folded to a constant when VF is not "
+ "scalable");
+ auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
+ auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
+ State.set(this, Add, VPIteration(Part, Lane));
+ }
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
- O << Indent << "= SCALAR-STEPS ";
+ O << " = SCALAR-STEPS ";
printOperands(O, SlotTracker);
}
#endif
@@ -874,7 +1210,7 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPBlendRecipe::execute(VPTransformState &State) {
- State.setDebugLocFromInst(Phi);
+ State.setDebugLocFrom(getDebugLoc());
// We know that all PHIs in non-header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
@@ -916,7 +1252,7 @@ void VPBlendRecipe::execute(VPTransformState &State) {
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "BLEND ";
- Phi->printAsOperand(O, false);
+ printAsOperand(O, SlotTracker);
O << " =";
if (getNumIncomingValues() == 1) {
// Not a User of any mask: not really blending, this is a
@@ -942,14 +1278,14 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
O << " +";
if (isa<FPMathOperator>(getUnderlyingInstr()))
O << getUnderlyingInstr()->getFastMathFlags();
- O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
+ O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (getCondOp()) {
O << ", ";
getCondOp()->printAsOperand(O, SlotTracker);
}
O << ")";
- if (RdxDesc->IntermediateStore)
+ if (RdxDesc.IntermediateStore)
O << " (with final reduction value stored in invariant address sank "
"outside of loop)";
}
@@ -1093,12 +1429,12 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
Value *Start = getStartValue()->getLiveInIRValue();
- PHINode *EntryPart = PHINode::Create(
- Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
+ PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
+ EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
EntryPart->addIncoming(Start, VectorPH);
- EntryPart->setDebugLoc(DL);
+ EntryPart->setDebugLoc(getDebugLoc());
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, EntryPart, Part);
}
@@ -1108,7 +1444,8 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
- O << " = CANONICAL-INDUCTION";
+ O << " = CANONICAL-INDUCTION ";
+ printOperands(O, SlotTracker);
}
#endif
@@ -1221,8 +1558,8 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
}
// Create a phi node for the new recurrence.
- PHINode *EntryPart = PHINode::Create(
- VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
+ PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
+ EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
EntryPart->addIncoming(VectorInit, VectorPH);
State.set(this, EntryPart, 0);
}
@@ -1254,8 +1591,8 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
"recipe must be in the vector loop header");
unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *EntryPart =
- PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
+ Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
+ EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
State.set(this, EntryPart, Part);
}
@@ -1269,8 +1606,8 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
Value *Iden = nullptr;
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
- RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
- // MinMax reduction have the start value as their identify.
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
+ // MinMax and AnyOf reductions have the start value as their identity.
if (ScalarPHI) {
Iden = StartV;
} else {
@@ -1316,23 +1653,7 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
assert(EnableVPlanNativePath &&
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
- // Currently we enter here in the VPlan-native path for non-induction
- // PHIs where all control flow is uniform. We simply widen these PHIs.
- // Create a vector phi with no operands - the vector phi operands will be
- // set at the end of vector code generation.
- VPBasicBlock *Parent = getParent();
- VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
- unsigned StartIdx = 0;
- // For phis in header blocks of loop regions, use the index of the value
- // coming from the preheader.
- if (LoopRegion->getEntryBasicBlock() == Parent) {
- for (unsigned I = 0; I < getNumOperands(); ++I) {
- if (getIncomingBlock(I) ==
- LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
- StartIdx = I;
- }
- }
- Value *Op0 = State.get(getOperand(StartIdx), 0);
+ Value *Op0 = State.get(getOperand(0), 0);
Type *VecTy = Op0->getType();
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
State.set(this, VecPhi, 0);
@@ -1368,7 +1689,7 @@ void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
PHINode *EntryPart =
State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
EntryPart->addIncoming(StartMask, VectorPH);
- EntryPart->setDebugLoc(DL);
+ EntryPart->setDebugLoc(getDebugLoc());
State.set(this, EntryPart, Part);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 83bfdfd09d19..33132880d5a4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -12,17 +12,22 @@
//===----------------------------------------------------------------------===//
#include "VPlanTransforms.h"
-#include "VPlanDominatorTree.h"
#include "VPRecipeBuilder.h"
+#include "VPlanAnalysis.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/PatternMatch.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
+
void VPlanTransforms::VPInstructionsToVPRecipes(
VPlanPtr &Plan,
function_ref<const InductionDescriptor *(PHINode *)>
@@ -76,7 +81,7 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
} else if (auto *CI = dyn_cast<CastInst>(Inst)) {
NewRecipe = new VPWidenCastRecipe(
- CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI);
+ CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
} else {
NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
}
@@ -158,17 +163,10 @@ static bool sinkScalarOperands(VPlan &Plan) {
// TODO: add ".cloned" suffix to name of Clone's VPValue.
Clone->insertBefore(SinkCandidate);
- for (auto *U : to_vector(SinkCandidate->getVPSingleValue()->users())) {
- auto *UI = cast<VPRecipeBase>(U);
- if (UI->getParent() == SinkTo)
- continue;
-
- for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) {
- if (UI->getOperand(Idx) != SinkCandidate->getVPSingleValue())
- continue;
- UI->setOperand(Idx, Clone);
- }
- }
+ SinkCandidate->getVPSingleValue()->replaceUsesWithIf(
+ Clone, [SinkTo](VPUser &U, unsigned) {
+ return cast<VPRecipeBase>(&U)->getParent() != SinkTo;
+ });
}
SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
for (VPValue *Op : SinkCandidate->operands())
@@ -273,16 +271,10 @@ static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
VPValue *PredInst1 =
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
- for (VPUser *U : to_vector(Phi1ToMoveV->users())) {
- auto *UI = dyn_cast<VPRecipeBase>(U);
- if (!UI || UI->getParent() != Then2)
- continue;
- for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
- if (Phi1ToMoveV != U->getOperand(I))
- continue;
- U->setOperand(I, PredInst1);
- }
- }
+ Phi1ToMoveV->replaceUsesWithIf(PredInst1, [Then2](VPUser &U, unsigned) {
+ auto *UI = dyn_cast<VPRecipeBase>(&U);
+ return UI && UI->getParent() == Then2;
+ });
Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
}
@@ -479,15 +471,45 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
// The recipes in the block are processed in reverse order, to catch chains
// of dead recipes.
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
- if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) {
- return V->getNumUsers() > 0;
- }))
+ // A user keeps R alive:
+ if (any_of(R.definedValues(),
+ [](VPValue *V) { return V->getNumUsers(); }))
+ continue;
+
+ // Having side effects keeps R alive, but do remove conditional assume
+ // instructions as their conditions may be flattened.
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ bool IsConditionalAssume =
+ RepR && RepR->isPredicated() &&
+ match(RepR->getUnderlyingInstr(), m_Intrinsic<Intrinsic::assume>());
+ if (R.mayHaveSideEffects() && !IsConditionalAssume)
continue;
+
R.eraseFromParent();
}
}
}
+static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
+ ScalarEvolution &SE, Instruction *TruncI,
+ Type *IVTy, VPValue *StartV,
+ VPValue *Step) {
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ auto IP = HeaderVPBB->getFirstNonPhi();
+ VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+ Type *TruncTy = TruncI ? TruncI->getType() : IVTy;
+ VPValue *BaseIV = CanonicalIV;
+ if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, TruncTy)) {
+ BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step,
+ TruncI ? TruncI->getType() : nullptr);
+ HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
+ }
+
+ VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
+ HeaderVPBB->insert(Steps, IP);
+ return Steps;
+}
+
void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
SmallVector<VPRecipeBase *> ToRemove;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
@@ -501,36 +523,18 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
}))
continue;
- auto IP = HeaderVPBB->getFirstNonPhi();
- VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
- Type *ResultTy = WideIV->getPHINode()->getType();
- if (Instruction *TruncI = WideIV->getTruncInst())
- ResultTy = TruncI->getType();
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
- VPValue *Step = WideIV->getStepValue();
- VPValue *BaseIV = CanonicalIV;
- if (!CanonicalIV->isCanonical(ID.getKind(), WideIV->getStartValue(), Step,
- ResultTy)) {
- BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV,
- Step, ResultTy);
- HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
- }
-
- VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
- HeaderVPBB->insert(Steps, IP);
+ VPValue *Steps = createScalarIVSteps(
+ Plan, ID, SE, WideIV->getTruncInst(), WideIV->getPHINode()->getType(),
+ WideIV->getStartValue(), WideIV->getStepValue());
- // Update scalar users of IV to use Step instead. Use SetVector to ensure
- // the list of users doesn't contain duplicates.
- SetVector<VPUser *> Users(WideIV->user_begin(), WideIV->user_end());
- for (VPUser *U : Users) {
- if (HasOnlyVectorVFs && !U->usesScalars(WideIV))
- continue;
- for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) {
- if (U->getOperand(I) != WideIV)
- continue;
- U->setOperand(I, Steps);
- }
- }
+ // Update scalar users of IV to use Step instead.
+ if (!HasOnlyVectorVFs)
+ WideIV->replaceAllUsesWith(Steps);
+ else
+ WideIV->replaceUsesWithIf(Steps, [WideIV](VPUser &U, unsigned) {
+ return U.usesScalars(WideIV);
+ });
}
}
@@ -778,3 +782,375 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
}
}
}
+
+/// Returns true is \p V is constant one.
+static bool isConstantOne(VPValue *V) {
+ if (!V->isLiveIn())
+ return false;
+ auto *C = dyn_cast<ConstantInt>(V->getLiveInIRValue());
+ return C && C->isOne();
+}
+
+/// Returns the llvm::Instruction opcode for \p R.
+static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
+ if (auto *WidenR = dyn_cast<VPWidenRecipe>(&R))
+ return WidenR->getUnderlyingInstr()->getOpcode();
+ if (auto *WidenC = dyn_cast<VPWidenCastRecipe>(&R))
+ return WidenC->getOpcode();
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R))
+ return RepR->getUnderlyingInstr()->getOpcode();
+ if (auto *VPI = dyn_cast<VPInstruction>(&R))
+ return VPI->getOpcode();
+ return 0;
+}
+
+/// Try to simplify recipe \p R.
+static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+ switch (getOpcodeForRecipe(R)) {
+ case Instruction::Mul: {
+ VPValue *A = R.getOperand(0);
+ VPValue *B = R.getOperand(1);
+ if (isConstantOne(A))
+ return R.getVPSingleValue()->replaceAllUsesWith(B);
+ if (isConstantOne(B))
+ return R.getVPSingleValue()->replaceAllUsesWith(A);
+ break;
+ }
+ case Instruction::Trunc: {
+ VPRecipeBase *Ext = R.getOperand(0)->getDefiningRecipe();
+ if (!Ext)
+ break;
+ unsigned ExtOpcode = getOpcodeForRecipe(*Ext);
+ if (ExtOpcode != Instruction::ZExt && ExtOpcode != Instruction::SExt)
+ break;
+ VPValue *A = Ext->getOperand(0);
+ VPValue *Trunc = R.getVPSingleValue();
+ Type *TruncTy = TypeInfo.inferScalarType(Trunc);
+ Type *ATy = TypeInfo.inferScalarType(A);
+ if (TruncTy == ATy) {
+ Trunc->replaceAllUsesWith(A);
+ } else if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
+ auto *VPC =
+ new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
+ VPC->insertBefore(&R);
+ Trunc->replaceAllUsesWith(VPC);
+ } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
+ auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
+ VPC->insertBefore(&R);
+ Trunc->replaceAllUsesWith(VPC);
+ }
+#ifndef NDEBUG
+ // Verify that the cached type info is for both A and its users is still
+ // accurate by comparing it to freshly computed types.
+ VPTypeAnalysis TypeInfo2(TypeInfo.getContext());
+ assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
+ for (VPUser *U : A->users()) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ if (!R)
+ continue;
+ for (VPValue *VPV : R->definedValues())
+ assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
+ }
+#endif
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+/// Try to simplify the recipes in \p Plan.
+static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getEntry());
+ VPTypeAnalysis TypeInfo(Ctx);
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ simplifyRecipe(R, TypeInfo);
+ }
+ }
+}
+
+void VPlanTransforms::truncateToMinimalBitwidths(
+ VPlan &Plan, const MapVector<Instruction *, uint64_t> &MinBWs,
+ LLVMContext &Ctx) {
+#ifndef NDEBUG
+ // Count the processed recipes and cross check the count later with MinBWs
+ // size, to make sure all entries in MinBWs have been handled.
+ unsigned NumProcessedRecipes = 0;
+#endif
+ // Keep track of created truncates, so they can be re-used. Note that we
+ // cannot use RAUW after creating a new truncate, as this would could make
+ // other uses have different types for their operands, making them invalidly
+ // typed.
+ DenseMap<VPValue *, VPWidenCastRecipe *> ProcessedTruncs;
+ VPTypeAnalysis TypeInfo(Ctx);
+ VPBasicBlock *PH = Plan.getEntry();
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
+ VPWidenSelectRecipe>(&R))
+ continue;
+
+ VPValue *ResultVPV = R.getVPSingleValue();
+ auto *UI = cast_or_null<Instruction>(ResultVPV->getUnderlyingValue());
+ unsigned NewResSizeInBits = MinBWs.lookup(UI);
+ if (!NewResSizeInBits)
+ continue;
+
+#ifndef NDEBUG
+ NumProcessedRecipes++;
+#endif
+ // If the value wasn't vectorized, we must maintain the original scalar
+ // type. Skip those here, after incrementing NumProcessedRecipes. Also
+ // skip casts which do not need to be handled explicitly here, as
+ // redundant casts will be removed during recipe simplification.
+ if (isa<VPReplicateRecipe, VPWidenCastRecipe>(&R)) {
+#ifndef NDEBUG
+ // If any of the operands is a live-in and not used by VPWidenRecipe or
+ // VPWidenSelectRecipe, but in MinBWs, make sure it is counted as
+ // processed as well. When MinBWs is currently constructed, there is no
+ // information about whether recipes are widened or replicated and in
+ // case they are reciplicated the operands are not truncated. Counting
+ // them them here ensures we do not miss any recipes in MinBWs.
+ // TODO: Remove once the analysis is done on VPlan.
+ for (VPValue *Op : R.operands()) {
+ if (!Op->isLiveIn())
+ continue;
+ auto *UV = dyn_cast_or_null<Instruction>(Op->getUnderlyingValue());
+ if (UV && MinBWs.contains(UV) && !ProcessedTruncs.contains(Op) &&
+ all_of(Op->users(), [](VPUser *U) {
+ return !isa<VPWidenRecipe, VPWidenSelectRecipe>(U);
+ })) {
+ // Add an entry to ProcessedTruncs to avoid counting the same
+ // operand multiple times.
+ ProcessedTruncs[Op] = nullptr;
+ NumProcessedRecipes += 1;
+ }
+ }
+#endif
+ continue;
+ }
+
+ Type *OldResTy = TypeInfo.inferScalarType(ResultVPV);
+ unsigned OldResSizeInBits = OldResTy->getScalarSizeInBits();
+ assert(OldResTy->isIntegerTy() && "only integer types supported");
+ if (OldResSizeInBits == NewResSizeInBits)
+ continue;
+ assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");
+ (void)OldResSizeInBits;
+
+ auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits);
+
+ // Shrink operands by introducing truncates as needed.
+ unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;
+ for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
+ auto *Op = R.getOperand(Idx);
+ unsigned OpSizeInBits =
+ TypeInfo.inferScalarType(Op)->getScalarSizeInBits();
+ if (OpSizeInBits == NewResSizeInBits)
+ continue;
+ assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");
+ auto [ProcessedIter, IterIsEmpty] =
+ ProcessedTruncs.insert({Op, nullptr});
+ VPWidenCastRecipe *NewOp =
+ IterIsEmpty
+ ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy)
+ : ProcessedIter->second;
+ R.setOperand(Idx, NewOp);
+ if (!IterIsEmpty)
+ continue;
+ ProcessedIter->second = NewOp;
+ if (!Op->isLiveIn()) {
+ NewOp->insertBefore(&R);
+ } else {
+ PH->appendRecipe(NewOp);
+#ifndef NDEBUG
+ auto *OpInst = dyn_cast<Instruction>(Op->getLiveInIRValue());
+ bool IsContained = MinBWs.contains(OpInst);
+ NumProcessedRecipes += IsContained;
+#endif
+ }
+ }
+
+ // Any wrapping introduced by shrinking this operation shouldn't be
+ // considered undefined behavior. So, we can't unconditionally copy
+ // arithmetic wrapping flags to VPW.
+ if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
+ VPW->dropPoisonGeneratingFlags();
+
+ // Extend result to original width.
+ auto *Ext = new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);
+ Ext->insertAfter(&R);
+ ResultVPV->replaceAllUsesWith(Ext);
+ Ext->setOperand(0, ResultVPV);
+ }
+ }
+
+ assert(MinBWs.size() == NumProcessedRecipes &&
+ "some entries in MinBWs haven't been processed");
+}
+
+void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
+ removeRedundantCanonicalIVs(Plan);
+ removeRedundantInductionCasts(Plan);
+
+ optimizeInductions(Plan, SE);
+ simplifyRecipes(Plan, SE.getContext());
+ removeDeadRecipes(Plan);
+
+ createAndOptimizeReplicateRegions(Plan);
+
+ removeRedundantExpandSCEVRecipes(Plan);
+ mergeBlocksIntoPredecessors(Plan);
+}
+
+// Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace
+// the loop terminator with a branch-on-cond recipe with the negated
+// active-lane-mask as operand. Note that this turns the loop into an
+// uncountable one. Only the existing terminator is replaced, all other existing
+// recipes/users remain unchanged, except for poison-generating flags being
+// dropped from the canonical IV increment. Return the created
+// VPActiveLaneMaskPHIRecipe.
+//
+// The function uses the following definitions:
+//
+// %TripCount = DataWithControlFlowWithoutRuntimeCheck ?
+// calculate-trip-count-minus-VF (original TC) : original TC
+// %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ?
+// CanonicalIVPhi : CanonicalIVIncrement
+// %StartV is the canonical induction start value.
+//
+// The function adds the following recipes:
+//
+// vector.ph:
+// %TripCount = calculate-trip-count-minus-VF (original TC)
+// [if DataWithControlFlowWithoutRuntimeCheck]
+// %EntryInc = canonical-iv-increment-for-part %StartV
+// %EntryALM = active-lane-mask %EntryInc, %TripCount
+//
+// vector.body:
+// ...
+// %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ]
+// ...
+// %InLoopInc = canonical-iv-increment-for-part %IncrementValue
+// %ALM = active-lane-mask %InLoopInc, TripCount
+// %Negated = Not %ALM
+// branch-on-cond %Negated
+//
+static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
+ VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
+ VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
+ VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
+ auto *CanonicalIVPHI = Plan.getCanonicalIV();
+ VPValue *StartV = CanonicalIVPHI->getStartValue();
+
+ auto *CanonicalIVIncrement =
+ cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
+ // TODO: Check if dropping the flags is needed if
+ // !DataAndControlFlowWithoutRuntimeCheck.
+ CanonicalIVIncrement->dropPoisonGeneratingFlags();
+ DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
+ // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
+ // we have to take unrolling into account. Each part needs to start at
+ // Part * VF
+ auto *VecPreheader = cast<VPBasicBlock>(TopRegion->getSinglePredecessor());
+ VPBuilder Builder(VecPreheader);
+
+ // Create the ActiveLaneMask instruction using the correct start values.
+ VPValue *TC = Plan.getTripCount();
+
+ VPValue *TripCount, *IncrementValue;
+ if (!DataAndControlFlowWithoutRuntimeCheck) {
+ // When the loop is guarded by a runtime overflow check for the loop
+ // induction variable increment by VF, we can increment the value before
+ // the get.active.lane mask and use the unmodified tripcount.
+ IncrementValue = CanonicalIVIncrement;
+ TripCount = TC;
+ } else {
+ // When avoiding a runtime check, the active.lane.mask inside the loop
+ // uses a modified trip count and the induction variable increment is
+ // done after the active.lane.mask intrinsic is called.
+ IncrementValue = CanonicalIVPHI;
+ TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
+ {TC}, DL);
+ }
+ auto *EntryIncrement = Builder.createOverflowingOp(
+ VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false, false}, DL,
+ "index.part.next");
+
+ // Create the active lane mask instruction in the VPlan preheader.
+ auto *EntryALM =
+ Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC},
+ DL, "active.lane.mask.entry");
+
+ // Now create the ActiveLaneMaskPhi recipe in the main loop using the
+ // preheader ActiveLaneMask instruction.
+ auto LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());
+ LaneMaskPhi->insertAfter(CanonicalIVPHI);
+
+ // Create the active lane mask for the next iteration of the loop before the
+ // original terminator.
+ VPRecipeBase *OriginalTerminator = EB->getTerminator();
+ Builder.setInsertPoint(OriginalTerminator);
+ auto *InLoopIncrement =
+ Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
+ {IncrementValue}, {false, false}, DL);
+ auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
+ {InLoopIncrement, TripCount}, DL,
+ "active.lane.mask.next");
+ LaneMaskPhi->addOperand(ALM);
+
+ // Replace the original terminator with BranchOnCond. We have to invert the
+ // mask here because a true condition means jumping to the exit block.
+ auto *NotMask = Builder.createNot(ALM, DL);
+ Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);
+ OriginalTerminator->eraseFromParent();
+ return LaneMaskPhi;
+}
+
+void VPlanTransforms::addActiveLaneMask(
+ VPlan &Plan, bool UseActiveLaneMaskForControlFlow,
+ bool DataAndControlFlowWithoutRuntimeCheck) {
+ assert((!DataAndControlFlowWithoutRuntimeCheck ||
+ UseActiveLaneMaskForControlFlow) &&
+ "DataAndControlFlowWithoutRuntimeCheck implies "
+ "UseActiveLaneMaskForControlFlow");
+
+ auto FoundWidenCanonicalIVUser =
+ find_if(Plan.getCanonicalIV()->users(),
+ [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
+ assert(FoundWidenCanonicalIVUser &&
+ "Must have widened canonical IV when tail folding!");
+ auto *WideCanonicalIV =
+ cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
+ VPRecipeBase *LaneMask;
+ if (UseActiveLaneMaskForControlFlow) {
+ LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
+ Plan, DataAndControlFlowWithoutRuntimeCheck);
+ } else {
+ LaneMask = new VPInstruction(VPInstruction::ActiveLaneMask,
+ {WideCanonicalIV, Plan.getTripCount()},
+ nullptr, "active.lane.mask");
+ LaneMask->insertAfter(WideCanonicalIV);
+ }
+
+ // Walk users of WideCanonicalIV and replace all compares of the form
+ // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
+ // active-lane-mask.
+ VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
+ for (VPUser *U : SmallVector<VPUser *>(WideCanonicalIV->users())) {
+ auto *CompareToReplace = dyn_cast<VPInstruction>(U);
+ if (!CompareToReplace ||
+ CompareToReplace->getOpcode() != Instruction::ICmp ||
+ CompareToReplace->getPredicate() != CmpInst::ICMP_ULE ||
+ CompareToReplace->getOperand(1) != BTC)
+ continue;
+
+ assert(CompareToReplace->getOperand(0) == WideCanonicalIV &&
+ "WidenCanonicalIV must be the first operand of the compare");
+ CompareToReplace->replaceAllUsesWith(LaneMask->getVPSingleValue());
+ CompareToReplace->eraseFromParent();
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 3eccf6e9600d..3bf91115debb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -22,11 +22,9 @@ class InductionDescriptor;
class Instruction;
class PHINode;
class ScalarEvolution;
-class Loop;
class PredicatedScalarEvolution;
class TargetLibraryInfo;
class VPBuilder;
-class VPRecipeBuilder;
struct VPlanTransforms {
/// Replaces the VPInstructions in \p Plan with corresponding
@@ -37,12 +35,56 @@ struct VPlanTransforms {
GetIntOrFpInductionDescriptor,
ScalarEvolution &SE, const TargetLibraryInfo &TLI);
+ /// Sink users of fixed-order recurrences after the recipe defining their
+ /// previous value. Then introduce FirstOrderRecurrenceSplice VPInstructions
+ /// to combine the value from the recurrence phis and previous values. The
+ /// current implementation assumes all users can be sunk after the previous
+ /// value, which is enforced by earlier legality checks.
+ /// \returns true if all users of fixed-order recurrences could be re-arranged
+ /// as needed or false if it is not possible. In the latter case, \p Plan is
+ /// not valid.
+ static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
+
+ /// Clear NSW/NUW flags from reduction instructions if necessary.
+ static void clearReductionWrapFlags(VPlan &Plan);
+
+ /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
+ /// resulting plan to \p BestVF and \p BestUF.
+ static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
+ unsigned BestUF,
+ PredicatedScalarEvolution &PSE);
+
+ /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
+ /// optimizations, dead recipe removal, replicate region optimizations and
+ /// block merging.
+ static void optimize(VPlan &Plan, ScalarEvolution &SE);
+
/// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
/// region block and remove the mask operand. Optimize the created regions by
/// iteratively sinking scalar operands into the region, followed by merging
/// regions until no improvements are remaining.
static void createAndOptimizeReplicateRegions(VPlan &Plan);
+ /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
+ /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
+ /// UseActiveLaneMaskForControlFlow is true, introduce an
+ /// VPActiveLaneMaskPHIRecipe. If \p DataAndControlFlowWithoutRuntimeCheck is
+ /// true, no minimum-iteration runtime check will be created (during skeleton
+ /// creation) and instead it is handled using active-lane-mask. \p
+ /// DataAndControlFlowWithoutRuntimeCheck implies \p
+ /// UseActiveLaneMaskForControlFlow.
+ static void addActiveLaneMask(VPlan &Plan,
+ bool UseActiveLaneMaskForControlFlow,
+ bool DataAndControlFlowWithoutRuntimeCheck);
+
+ /// Insert truncates and extends for any truncated recipe. Redundant casts
+ /// will be folded later.
+ static void
+ truncateToMinimalBitwidths(VPlan &Plan,
+ const MapVector<Instruction *, uint64_t> &MinBWs,
+ LLVMContext &Ctx);
+
+private:
/// Remove redundant VPBasicBlocks by merging them into their predecessor if
/// the predecessor has a single successor.
static bool mergeBlocksIntoPredecessors(VPlan &Plan);
@@ -71,24 +113,6 @@ struct VPlanTransforms {
/// them with already existing recipes expanding the same SCEV expression.
static void removeRedundantExpandSCEVRecipes(VPlan &Plan);
- /// Sink users of fixed-order recurrences after the recipe defining their
- /// previous value. Then introduce FirstOrderRecurrenceSplice VPInstructions
- /// to combine the value from the recurrence phis and previous values. The
- /// current implementation assumes all users can be sunk after the previous
- /// value, which is enforced by earlier legality checks.
- /// \returns true if all users of fixed-order recurrences could be re-arranged
- /// as needed or false if it is not possible. In the latter case, \p Plan is
- /// not valid.
- static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
-
- /// Clear NSW/NUW flags from reduction instructions if necessary.
- static void clearReductionWrapFlags(VPlan &Plan);
-
- /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
- /// resulting plan to \p BestVF and \p BestUF.
- static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
- unsigned BestUF,
- PredicatedScalarEvolution &PSE);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
index ac110bb3b0ef..116acad8e8f3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -121,18 +121,11 @@ public:
/// Remove a single \p User from the list of users.
void removeUser(VPUser &User) {
- bool Found = false;
// The same user can be added multiple times, e.g. because the same VPValue
// is used twice by the same VPUser. Remove a single one.
- erase_if(Users, [&User, &Found](VPUser *Other) {
- if (Found)
- return false;
- if (Other == &User) {
- Found = true;
- return true;
- }
- return false;
- });
+ auto *I = find(Users, &User);
+ if (I != Users.end())
+ Users.erase(I);
}
typedef SmallVectorImpl<VPUser *>::iterator user_iterator;
@@ -163,6 +156,13 @@ public:
void replaceAllUsesWith(VPValue *New);
+ /// Go through the uses list for this VPValue and make each use point to \p
+ /// New if the callback ShouldReplace returns true for the given use specified
+ /// by a pair of (VPUser, the use index).
+ void replaceUsesWithIf(
+ VPValue *New,
+ llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace);
+
/// Returns the recipe defining this VPValue or nullptr if it is not defined
/// by a recipe, i.e. is a live-in.
VPRecipeBase *getDefiningRecipe();
@@ -296,6 +296,14 @@ public:
"Op must be an operand of the recipe");
return false;
}
+
+ /// Returns true if the VPUser only uses the first part of operand \p Op.
+ /// Conservatively returns false.
+ virtual bool onlyFirstPartUsed(const VPValue *Op) const {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return false;
+ }
};
/// This class augments a recipe with a set of VPValues defined by the recipe.
@@ -325,7 +333,7 @@ class VPDef {
assert(V->Def == this && "can only remove VPValue linked with this VPDef");
assert(is_contained(DefinedValues, V) &&
"VPValue to remove must be in DefinedValues");
- erase_value(DefinedValues, V);
+ llvm::erase(DefinedValues, V);
V->Def = nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 13464c9d3496..f18711ba30b7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -13,6 +13,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Vectorize/VectorCombine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -28,6 +30,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
#include <numeric>
+#include <queue>
#define DEBUG_TYPE "vector-combine"
#include "llvm/Transforms/Utils/InstructionWorklist.h"
@@ -100,8 +103,9 @@ private:
Instruction &I);
bool foldExtractExtract(Instruction &I);
bool foldInsExtFNeg(Instruction &I);
- bool foldBitcastShuf(Instruction &I);
+ bool foldBitcastShuffle(Instruction &I);
bool scalarizeBinopOrCmp(Instruction &I);
+ bool scalarizeVPIntrinsic(Instruction &I);
bool foldExtractedCmps(Instruction &I);
bool foldSingleElementStore(Instruction &I);
bool scalarizeLoadExtract(Instruction &I);
@@ -258,8 +262,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// It is safe and potentially profitable to load a vector directly:
// inselt undef, load Scalar, 0 --> load VecPtr
IRBuilder<> Builder(Load);
- Value *CastedPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
- SrcPtr, MinVecTy->getPointerTo(AS));
+ Value *CastedPtr =
+ Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Builder.getPtrTy(AS));
Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
VecLd = Builder.CreateShuffleVector(VecLd, Mask);
@@ -321,7 +325,7 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I) {
IRBuilder<> Builder(Load);
Value *CastedPtr =
- Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Ty->getPointerTo(AS));
+ Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Builder.getPtrTy(AS));
Value *VecLd = Builder.CreateAlignedLoad(Ty, CastedPtr, Alignment);
replaceValue(I, *VecLd);
++NumVecLoad;
@@ -677,7 +681,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
/// If this is a bitcast of a shuffle, try to bitcast the source vector to the
/// destination type followed by shuffle. This can enable further transforms by
/// moving bitcasts or shuffles together.
-bool VectorCombine::foldBitcastShuf(Instruction &I) {
+bool VectorCombine::foldBitcastShuffle(Instruction &I) {
Value *V;
ArrayRef<int> Mask;
if (!match(&I, m_BitCast(
@@ -687,35 +691,43 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
// 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
// scalable type is unknown; Second, we cannot reason if the narrowed shuffle
// mask for scalable type is a splat or not.
- // 2) Disallow non-vector casts and length-changing shuffles.
+ // 2) Disallow non-vector casts.
// TODO: We could allow any shuffle.
+ auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
- if (!SrcTy || I.getOperand(0)->getType() != SrcTy)
+ if (!DestTy || !SrcTy)
+ return false;
+
+ unsigned DestEltSize = DestTy->getScalarSizeInBits();
+ unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
+ if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
return false;
- auto *DestTy = cast<FixedVectorType>(I.getType());
- unsigned DestNumElts = DestTy->getNumElements();
- unsigned SrcNumElts = SrcTy->getNumElements();
SmallVector<int, 16> NewMask;
- if (SrcNumElts <= DestNumElts) {
+ if (DestEltSize <= SrcEltSize) {
// The bitcast is from wide to narrow/equal elements. The shuffle mask can
// always be expanded to the equivalent form choosing narrower elements.
- assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
- unsigned ScaleFactor = DestNumElts / SrcNumElts;
+ assert(SrcEltSize % DestEltSize == 0 && "Unexpected shuffle mask");
+ unsigned ScaleFactor = SrcEltSize / DestEltSize;
narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
} else {
// The bitcast is from narrow elements to wide elements. The shuffle mask
// must choose consecutive elements to allow casting first.
- assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
- unsigned ScaleFactor = SrcNumElts / DestNumElts;
+ assert(DestEltSize % SrcEltSize == 0 && "Unexpected shuffle mask");
+ unsigned ScaleFactor = DestEltSize / SrcEltSize;
if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
return false;
}
+ // Bitcast the shuffle src - keep its original width but using the destination
+ // scalar type.
+ unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
+ auto *ShuffleTy = FixedVectorType::get(DestTy->getScalarType(), NumSrcElts);
+
// The new shuffle must not cost more than the old shuffle. The bitcast is
// moved ahead of the shuffle, so assume that it has the same cost as before.
InstructionCost DestCost = TTI.getShuffleCost(
- TargetTransformInfo::SK_PermuteSingleSrc, DestTy, NewMask);
+ TargetTransformInfo::SK_PermuteSingleSrc, ShuffleTy, NewMask);
InstructionCost SrcCost =
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy, Mask);
if (DestCost > SrcCost || !DestCost.isValid())
@@ -723,12 +735,131 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
// bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
++NumShufOfBitcast;
- Value *CastV = Builder.CreateBitCast(V, DestTy);
+ Value *CastV = Builder.CreateBitCast(V, ShuffleTy);
Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
replaceValue(I, *Shuf);
return true;
}
+/// VP Intrinsics whose vector operands are both splat values may be simplified
+/// into the scalar version of the operation and the result splatted. This
+/// can lead to scalarization down the line.
+bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
+ if (!isa<VPIntrinsic>(I))
+ return false;
+ VPIntrinsic &VPI = cast<VPIntrinsic>(I);
+ Value *Op0 = VPI.getArgOperand(0);
+ Value *Op1 = VPI.getArgOperand(1);
+
+ if (!isSplatValue(Op0) || !isSplatValue(Op1))
+ return false;
+
+ // Check getSplatValue early in this function, to avoid doing unnecessary
+ // work.
+ Value *ScalarOp0 = getSplatValue(Op0);
+ Value *ScalarOp1 = getSplatValue(Op1);
+ if (!ScalarOp0 || !ScalarOp1)
+ return false;
+
+ // For the binary VP intrinsics supported here, the result on disabled lanes
+ // is a poison value. For now, only do this simplification if all lanes
+ // are active.
+ // TODO: Relax the condition that all lanes are active by using insertelement
+ // on inactive lanes.
+ auto IsAllTrueMask = [](Value *MaskVal) {
+ if (Value *SplattedVal = getSplatValue(MaskVal))
+ if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
+ return ConstValue->isAllOnesValue();
+ return false;
+ };
+ if (!IsAllTrueMask(VPI.getArgOperand(2)))
+ return false;
+
+ // Check to make sure we support scalarization of the intrinsic
+ Intrinsic::ID IntrID = VPI.getIntrinsicID();
+ if (!VPBinOpIntrinsic::isVPBinOp(IntrID))
+ return false;
+
+ // Calculate cost of splatting both operands into vectors and the vector
+ // intrinsic
+ VectorType *VecTy = cast<VectorType>(VPI.getType());
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost SplatCost =
+ TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind, 0) +
+ TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
+
+ // Calculate the cost of the VP Intrinsic
+ SmallVector<Type *, 4> Args;
+ for (Value *V : VPI.args())
+ Args.push_back(V->getType());
+ IntrinsicCostAttributes Attrs(IntrID, VecTy, Args);
+ InstructionCost VectorOpCost = TTI.getIntrinsicInstrCost(Attrs, CostKind);
+ InstructionCost OldCost = 2 * SplatCost + VectorOpCost;
+
+ // Determine scalar opcode
+ std::optional<unsigned> FunctionalOpcode =
+ VPI.getFunctionalOpcode();
+ std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
+ if (!FunctionalOpcode) {
+ ScalarIntrID = VPI.getFunctionalIntrinsicID();
+ if (!ScalarIntrID)
+ return false;
+ }
+
+ // Calculate cost of scalarizing
+ InstructionCost ScalarOpCost = 0;
+ if (ScalarIntrID) {
+ IntrinsicCostAttributes Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
+ ScalarOpCost = TTI.getIntrinsicInstrCost(Attrs, CostKind);
+ } else {
+ ScalarOpCost =
+ TTI.getArithmeticInstrCost(*FunctionalOpcode, VecTy->getScalarType());
+ }
+
+ // The existing splats may be kept around if other instructions use them.
+ InstructionCost CostToKeepSplats =
+ (SplatCost * !Op0->hasOneUse()) + (SplatCost * !Op1->hasOneUse());
+ InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
+
+ LLVM_DEBUG(dbgs() << "Found a VP Intrinsic to scalarize: " << VPI
+ << "\n");
+ LLVM_DEBUG(dbgs() << "Cost of Intrinsic: " << OldCost
+ << ", Cost of scalarizing:" << NewCost << "\n");
+
+ // We want to scalarize unless the vector variant actually has lower cost.
+ if (OldCost < NewCost || !NewCost.isValid())
+ return false;
+
+ // Scalarize the intrinsic
+ ElementCount EC = cast<VectorType>(Op0->getType())->getElementCount();
+ Value *EVL = VPI.getArgOperand(3);
+ const DataLayout &DL = VPI.getModule()->getDataLayout();
+
+ // If the VP op might introduce UB or poison, we can scalarize it provided
+ // that we know the EVL > 0: If the EVL is zero, then the original VP op
+ // becomes a no-op and thus won't be UB, so make sure we don't introduce UB by
+ // scalarizing it.
+ bool SafeToSpeculate;
+ if (ScalarIntrID)
+ SafeToSpeculate = Intrinsic::getAttributes(I.getContext(), *ScalarIntrID)
+ .hasFnAttr(Attribute::AttrKind::Speculatable);
+ else
+ SafeToSpeculate = isSafeToSpeculativelyExecuteWithOpcode(
+ *FunctionalOpcode, &VPI, nullptr, &AC, &DT);
+ if (!SafeToSpeculate && !isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT))
+ return false;
+
+ Value *ScalarVal =
+ ScalarIntrID
+ ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,
+ {ScalarOp0, ScalarOp1})
+ : Builder.CreateBinOp((Instruction::BinaryOps)(*FunctionalOpcode),
+ ScalarOp0, ScalarOp1);
+
+ replaceValue(VPI, *Builder.CreateVectorSplat(EC, ScalarVal));
+ return true;
+}
+
/// Match a vector binop or compare instruction with at least one inserted
/// scalar operand and convert to scalar binop/cmp followed by insertelement.
bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
@@ -1013,19 +1144,24 @@ public:
/// Check if it is legal to scalarize a memory access to \p VecTy at index \p
/// Idx. \p Idx must access a valid vector element.
-static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy,
- Value *Idx, Instruction *CtxI,
+static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx,
+ Instruction *CtxI,
AssumptionCache &AC,
const DominatorTree &DT) {
+ // We do checks for both fixed vector types and scalable vector types.
+ // This is the number of elements of fixed vector types,
+ // or the minimum number of elements of scalable vector types.
+ uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
+
if (auto *C = dyn_cast<ConstantInt>(Idx)) {
- if (C->getValue().ult(VecTy->getNumElements()))
+ if (C->getValue().ult(NumElements))
return ScalarizationResult::safe();
return ScalarizationResult::unsafe();
}
unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
APInt Zero(IntWidth, 0);
- APInt MaxElts(IntWidth, VecTy->getNumElements());
+ APInt MaxElts(IntWidth, NumElements);
ConstantRange ValidIndices(Zero, MaxElts);
ConstantRange IdxRange(IntWidth, true);
@@ -1074,8 +1210,7 @@ static Align computeAlignmentAfterScalarization(Align VectorAlignment,
// store i32 %b, i32* %1
bool VectorCombine::foldSingleElementStore(Instruction &I) {
auto *SI = cast<StoreInst>(&I);
- if (!SI->isSimple() ||
- !isa<FixedVectorType>(SI->getValueOperand()->getType()))
+ if (!SI->isSimple() || !isa<VectorType>(SI->getValueOperand()->getType()))
return false;
// TODO: Combine more complicated patterns (multiple insert) by referencing
@@ -1089,13 +1224,13 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
return false;
if (auto *Load = dyn_cast<LoadInst>(Source)) {
- auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
+ auto VecTy = cast<VectorType>(SI->getValueOperand()->getType());
const DataLayout &DL = I.getModule()->getDataLayout();
Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
// Don't optimize for atomic/volatile load or store. Ensure memory is not
// modified between, vector type matches store size, and index is inbounds.
if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
- !DL.typeSizeEqualsStoreSize(Load->getType()) ||
+ !DL.typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||
SrcAddr != SI->getPointerOperand()->stripPointerCasts())
return false;
@@ -1130,19 +1265,26 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
if (!match(&I, m_Load(m_Value(Ptr))))
return false;
- auto *FixedVT = cast<FixedVectorType>(I.getType());
+ auto *VecTy = cast<VectorType>(I.getType());
auto *LI = cast<LoadInst>(&I);
const DataLayout &DL = I.getModule()->getDataLayout();
- if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(FixedVT))
+ if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(VecTy->getScalarType()))
return false;
InstructionCost OriginalCost =
- TTI.getMemoryOpCost(Instruction::Load, FixedVT, LI->getAlign(),
+ TTI.getMemoryOpCost(Instruction::Load, VecTy, LI->getAlign(),
LI->getPointerAddressSpace());
InstructionCost ScalarizedCost = 0;
Instruction *LastCheckedInst = LI;
unsigned NumInstChecked = 0;
+ DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
+ auto FailureGuard = make_scope_exit([&]() {
+ // If the transform is aborted, discard the ScalarizationResults.
+ for (auto &Pair : NeedFreeze)
+ Pair.second.discard();
+ });
+
// Check if all users of the load are extracts with no memory modifications
// between the load and the extract. Compute the cost of both the original
// code and the scalarized version.
@@ -1151,9 +1293,6 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
if (!UI || UI->getParent() != LI->getParent())
return false;
- if (!isGuaranteedNotToBePoison(UI->getOperand(1), &AC, LI, &DT))
- return false;
-
// Check if any instruction between the load and the extract may modify
// memory.
if (LastCheckedInst->comesBefore(UI)) {
@@ -1168,22 +1307,23 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
LastCheckedInst = UI;
}
- auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT);
- if (!ScalarIdx.isSafe()) {
- // TODO: Freeze index if it is safe to do so.
- ScalarIdx.discard();
+ auto ScalarIdx = canScalarizeAccess(VecTy, UI->getOperand(1), &I, AC, DT);
+ if (ScalarIdx.isUnsafe())
return false;
+ if (ScalarIdx.isSafeWithFreeze()) {
+ NeedFreeze.try_emplace(UI, ScalarIdx);
+ ScalarIdx.discard();
}
auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
OriginalCost +=
- TTI.getVectorInstrCost(Instruction::ExtractElement, FixedVT, CostKind,
+ TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, CostKind,
Index ? Index->getZExtValue() : -1);
ScalarizedCost +=
- TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
+ TTI.getMemoryOpCost(Instruction::Load, VecTy->getElementType(),
Align(1), LI->getPointerAddressSpace());
- ScalarizedCost += TTI.getAddressComputationCost(FixedVT->getElementType());
+ ScalarizedCost += TTI.getAddressComputationCost(VecTy->getElementType());
}
if (ScalarizedCost >= OriginalCost)
@@ -1192,21 +1332,27 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
// Replace extracts with narrow scalar loads.
for (User *U : LI->users()) {
auto *EI = cast<ExtractElementInst>(U);
- Builder.SetInsertPoint(EI);
-
Value *Idx = EI->getOperand(1);
+
+ // Insert 'freeze' for poison indexes.
+ auto It = NeedFreeze.find(EI);
+ if (It != NeedFreeze.end())
+ It->second.freeze(Builder, *cast<Instruction>(Idx));
+
+ Builder.SetInsertPoint(EI);
Value *GEP =
- Builder.CreateInBoundsGEP(FixedVT, Ptr, {Builder.getInt32(0), Idx});
+ Builder.CreateInBoundsGEP(VecTy, Ptr, {Builder.getInt32(0), Idx});
auto *NewLoad = cast<LoadInst>(Builder.CreateLoad(
- FixedVT->getElementType(), GEP, EI->getName() + ".scalar"));
+ VecTy->getElementType(), GEP, EI->getName() + ".scalar"));
Align ScalarOpAlignment = computeAlignmentAfterScalarization(
- LI->getAlign(), FixedVT->getElementType(), Idx, DL);
+ LI->getAlign(), VecTy->getElementType(), Idx, DL);
NewLoad->setAlignment(ScalarOpAlignment);
replaceValue(*EI, *NewLoad);
}
+ FailureGuard.release();
return true;
}
@@ -1340,21 +1486,28 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
dyn_cast<FixedVectorType>(Shuffle->getOperand(0)->getType());
if (!ShuffleInputType)
return false;
- int NumInputElts = ShuffleInputType->getNumElements();
+ unsigned NumInputElts = ShuffleInputType->getNumElements();
// Find the mask from sorting the lanes into order. This is most likely to
// become a identity or concat mask. Undef elements are pushed to the end.
SmallVector<int> ConcatMask;
Shuffle->getShuffleMask(ConcatMask);
sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });
+ // In the case of a truncating shuffle it's possible for the mask
+ // to have an index greater than the size of the resulting vector.
+ // This requires special handling.
+ bool IsTruncatingShuffle = VecType->getNumElements() < NumInputElts;
bool UsesSecondVec =
- any_of(ConcatMask, [&](int M) { return M >= NumInputElts; });
+ any_of(ConcatMask, [&](int M) { return M >= (int)NumInputElts; });
+
+ FixedVectorType *VecTyForCost =
+ (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
InstructionCost OldCost = TTI.getShuffleCost(
- UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType,
- Shuffle->getShuffleMask());
+ UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
+ VecTyForCost, Shuffle->getShuffleMask());
InstructionCost NewCost = TTI.getShuffleCost(
- UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType,
- ConcatMask);
+ UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
+ VecTyForCost, ConcatMask);
LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
<< "\n");
@@ -1657,16 +1810,16 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
return SSV->getOperand(Op);
return SV->getOperand(Op);
};
- Builder.SetInsertPoint(SVI0A->getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*SVI0A->getInsertionPointAfterDef());
Value *NSV0A = Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0),
GetShuffleOperand(SVI0A, 1), V1A);
- Builder.SetInsertPoint(SVI0B->getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*SVI0B->getInsertionPointAfterDef());
Value *NSV0B = Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0),
GetShuffleOperand(SVI0B, 1), V1B);
- Builder.SetInsertPoint(SVI1A->getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*SVI1A->getInsertionPointAfterDef());
Value *NSV1A = Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0),
GetShuffleOperand(SVI1A, 1), V2A);
- Builder.SetInsertPoint(SVI1B->getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*SVI1B->getInsertionPointAfterDef());
Value *NSV1B = Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0),
GetShuffleOperand(SVI1B, 1), V2B);
Builder.SetInsertPoint(Op0);
@@ -1723,9 +1876,6 @@ bool VectorCombine::run() {
case Instruction::ShuffleVector:
MadeChange |= widenSubvectorLoad(I);
break;
- case Instruction::Load:
- MadeChange |= scalarizeLoadExtract(I);
- break;
default:
break;
}
@@ -1733,13 +1883,15 @@ bool VectorCombine::run() {
// This transform works with scalable and fixed vectors
// TODO: Identify and allow other scalable transforms
- if (isa<VectorType>(I.getType()))
+ if (isa<VectorType>(I.getType())) {
MadeChange |= scalarizeBinopOrCmp(I);
+ MadeChange |= scalarizeLoadExtract(I);
+ MadeChange |= scalarizeVPIntrinsic(I);
+ }
if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);
-
// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
return;
@@ -1758,7 +1910,7 @@ bool VectorCombine::run() {
MadeChange |= foldSelectShuffle(I);
break;
case Instruction::BitCast:
- MadeChange |= foldBitcastShuf(I);
+ MadeChange |= foldBitcastShuffle(I);
break;
}
} else {
diff --git a/contrib/llvm-project/llvm/lib/XRay/BlockVerifier.cpp b/contrib/llvm-project/llvm/lib/XRay/BlockVerifier.cpp
index 9fb49fa9a860..99f255ea6688 100644
--- a/contrib/llvm-project/llvm/lib/XRay/BlockVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/XRay/BlockVerifier.cpp
@@ -8,6 +8,8 @@
#include "llvm/XRay/BlockVerifier.h"
#include "llvm/Support/Error.h"
+#include <bitset>
+
namespace llvm {
namespace xray {
namespace {
diff --git a/contrib/llvm-project/llvm/lib/XRay/FDRTraceWriter.cpp b/contrib/llvm-project/llvm/lib/XRay/FDRTraceWriter.cpp
index 2b80740ed436..8e67a8f98433 100644
--- a/contrib/llvm-project/llvm/lib/XRay/FDRTraceWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/XRay/FDRTraceWriter.cpp
@@ -57,7 +57,7 @@ Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) {
} // namespace
FDRTraceWriter::FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H)
- : OS(O, support::endianness::native) {
+ : OS(O, llvm::endianness::native) {
// We need to re-construct a header, by writing the fields we care about for
// traces, in the format that the runtime would have written.
uint32_t BitField =